pax_global_header00006660000000000000000000000064137037224670014525gustar00rootroot0000000000000052 comment=48bfca38f6f175435052a59791922a1a453d9609 rocksdb-6.11.4/000077500000000000000000000000001370372246700132455ustar00rootroot00000000000000rocksdb-6.11.4/.circleci/000077500000000000000000000000001370372246700151005ustar00rootroot00000000000000rocksdb-6.11.4/.circleci/config.yml000066400000000000000000000102561370372246700170740ustar00rootroot00000000000000version: 2.1 orbs: win: circleci/windows@2.4.0 executors: windows-2xlarge: machine: image: 'windows-server-2019-vs2019:201908-06' resource_class: windows.2xlarge shell: bash.exe jobs: build-linux: machine: image: ubuntu-1604:201903-01 resource_class: 2xlarge steps: - checkout # check out the code in the project directory - run: pyenv global 3.5.2 - run: sudo apt-get update -y - run: sudo apt-get install -y libgflags-dev - run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 make J=32 all check -j32 build-linux-release: machine: image: ubuntu-1604:201903-01 resource_class: 2xlarge steps: - checkout # check out the code in the project directory - run: make release -j32 build-linux-lite: machine: image: ubuntu-1604:201903-01 resource_class: 2xlarge steps: - checkout # check out the code in the project directory - run: pyenv global 3.5.2 - run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 LITE=1 make J=32 all check -j32 build-linux-lite-release: machine: image: ubuntu-1604:201903-01 resource_class: large steps: - checkout # check out the code in the project directory - run: make release -j32 build-linux-clang-no-test: machine: image: ubuntu-1604:201903-01 resource_class: 2xlarge steps: - checkout # check out the code in the project directory - run: USE_CLANG=1 make all -j32 build-linux-cmake: machine: image: ubuntu-1604:201903-01 resource_class: 2xlarge steps: - checkout # check out the code in the project directory - run: mkdir build && cd build && cmake -DWITH_GFLAGS=0 .. && make -j32 build-windows: executor: windows-2xlarge environment: THIRDPARTY_HOME: C:/Users/circleci/thirdparty CMAKE_HOME: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64 CMAKE_BIN: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64/bin/cmake.exe CMAKE_GENERATOR: Visual Studio 16 2019 SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.7 SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.7;C:/Users/circleci/thirdparty/snappy-1.1.7/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.7/build/Debug/snappy.lib steps: - checkout - run: name: "Install thirdparty dependencies" command: | mkdir ${THIRDPARTY_HOME} cd ${THIRDPARTY_HOME} echo "Installing CMake..." curl --fail --silent --show-error --output cmake-3.16.4-win64-x64.zip --location https://github.com/Kitware/CMake/releases/download/v3.16.4/cmake-3.16.4-win64-x64.zip unzip -q cmake-3.16.4-win64-x64.zip echo "Building Snappy dependency..." curl --fail --silent --show-error --output snappy-1.1.7.zip --location https://github.com/google/snappy/archive/1.1.7.zip unzip -q snappy-1.1.7.zip cd snappy-1.1.7 mkdir build cd build ${CMAKE_BIN} -G "${CMAKE_GENERATOR}" .. msbuild.exe Snappy.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64 - run: name: "Build RocksDB" command: | mkdir build cd build ${CMAKE_BIN} -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 .. cd .. msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64 - run: name: "Test RocksDB" shell: powershell.exe command: | build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,env_basic_test,env_test,db_merge_operand_test -Concurrency 16 workflows: build-linux: jobs: - build-linux build-linux-lite: jobs: - build-linux-lite build-linux-release: jobs: - build-linux-release build-linux-lite-release: jobs: - build-linux-lite-release build-linux-clang-no-test: jobs: - build-linux-clang-no-test build-linux-cmake: jobs: - build-linux-cmake build-windows: jobs: - build-windows rocksdb-6.11.4/.clang-format000066400000000000000000000002121370372246700156130ustar00rootroot00000000000000# Complete list of style options can be found at: # http://clang.llvm.org/docs/ClangFormatStyleOptions.html --- BasedOnStyle: Google ... rocksdb-6.11.4/.github/000077500000000000000000000000001370372246700146055ustar00rootroot00000000000000rocksdb-6.11.4/.github/workflows/000077500000000000000000000000001370372246700166425ustar00rootroot00000000000000rocksdb-6.11.4/.github/workflows/sanity_check.yml000066400000000000000000000021211370372246700220250ustar00rootroot00000000000000name: Check buck targets and code format on: [push, pull_request] jobs: check: name: Check TARGETS file and code format runs-on: ubuntu-latest steps: - name: Checkout feature branch uses: actions/checkout@v2 with: fetch-depth: 0 - name: Fetch from upstream run: | git remote add upstream https://github.com/facebook/rocksdb.git && git fetch upstream - name: Where am I run: | echo git status && git status echo "git remote -v" && git remote -v echo git branch && git branch - name: Setup Python uses: actions/setup-python@v1 - name: Install Dependencies run: python -m pip install --upgrade pip - name: Install argparse run: pip install argparse - name: Download clang-format-diff.py uses: wei/wget@v1 with: args: https://raw.githubusercontent.com/llvm-mirror/clang/master/tools/clang-format/clang-format-diff.py - name: Check format run: VERBOSE_CHECK=1 make check-format - name: Compare buckify output run: make check-buck-targets rocksdb-6.11.4/.gitignore000066400000000000000000000017061370372246700152410ustar00rootroot00000000000000make_config.mk *.a *.arc *.d *.dylib* *.gcda *.gcno *.o *.so *.so.* *_test *_bench *_stress *.out *.class *.jar *.*jnilib* *.d-e *.o-* *.swp *~ *.vcxproj *.vcxproj.filters *.sln *.cmake .watchmanconfig CMakeCache.txt CMakeFiles/ build/ ldb manifest_dump sst_dump blob_dump block_cache_trace_analyzer db_with_timestamp_basic_test tools/block_cache_analyzer/*.pyc column_aware_encoding_exp util/build_version.cc build_tools/VALGRIND_LOGS/ coverage/COVERAGE_REPORT .gdbhistory .gdb_history package/ unity.a tags etags rocksdb_dump rocksdb_undump db_test2 trace_analyzer trace_analyzer_test block_cache_trace_analyzer .DS_Store .vs .vscode java/out java/target java/test-libs java/*.log java/include/org_rocksdb_*.h .idea/ *.iml rocksdb.cc rocksdb.h unity.cc java/crossbuild/.vagrant .vagrant/ java/**/*.asc java/javadoc scan_build_report/ t LOG db_logs/ tp2/ fbcode/ fbcode buckifier/*.pyc buckifier/__pycache__ compile_commands.json clang-format-diff.py .py3/ rocksdb-6.11.4/.lgtm.yml000066400000000000000000000001031370372246700150030ustar00rootroot00000000000000extraction: cpp: index: build_command: make static_lib rocksdb-6.11.4/.travis.yml000066400000000000000000000215041370372246700153600ustar00rootroot00000000000000dist: xenial language: cpp os: - linux - osx arch: - amd64 - arm64 - ppc64le compiler: - clang - gcc osx_image: xcode9.4 cache: - ccache addons: apt: sources: - ubuntu-toolchain-r-test packages: - libgflags-dev - libbz2-dev - liblz4-dev - libsnappy-dev - liblzma-dev # xv - libzstd-dev - zlib1g-dev homebrew: update: true packages: - ccache - gflags - lz4 - snappy - xz - zstd env: - TEST_GROUP=platform_dependent # 16-18 minutes - TEST_GROUP=1 # 33-35 minutes - TEST_GROUP=2 # 18-20 minutes - TEST_GROUP=3 # 20-22 minutes - TEST_GROUP=4 # 12-14 minutes # Run java tests - JOB_NAME=java_test # 4-11 minutes # Build ROCKSDB_LITE - JOB_NAME=lite_build # 3-4 minutes # Build examples - JOB_NAME=examples # 5-7 minutes - JOB_NAME=cmake # 3-5 minutes - JOB_NAME=cmake-gcc8 # 3-5 minutes - JOB_NAME=cmake-gcc9 # 3-5 minutes - JOB_NAME=cmake-gcc9-c++20 # 3-5 minutes - JOB_NAME=cmake-mingw # 3 minutes - JOB_NAME=make-gcc4.8 - JOB_NAME=status_checked matrix: exclude: - os: osx env: JOB_NAME=cmake-gcc8 - os: osx env: JOB_NAME=cmake-gcc9 - os: osx env: JOB_NAME=cmake-gcc9-c++20 - os: osx env: JOB_NAME=cmake-mingw - os: osx env: JOB_NAME=make-gcc4.8 - os: osx arch: ppc64le - os: osx compiler: gcc - os : linux arch: arm64 env: JOB_NAME=cmake-mingw - os : linux arch: arm64 env: JOB_NAME=make-gcc4.8 - os: linux arch: ppc64le env: JOB_NAME=cmake-mingw - os: linux arch: ppc64le env: JOB_NAME=make-gcc4.8 - os: linux compiler: clang # Exclude all but most unique cmake variants for pull requests, but build all in branches - if: type = pull_request os : linux arch: amd64 env: JOB_NAME=cmake - if: type = pull_request os : linux arch: amd64 env: JOB_NAME=cmake-gcc8 - if: type = pull_request os : linux arch: amd64 env: JOB_NAME=cmake-gcc9 # Exclude most osx, arm64 and ppc64le tests for pull requests, but build in branches # Temporarily disable ppc64le unit tests in PRs until Travis gets its act together (#6653) - if: type = pull_request os: linux arch: ppc64le env: TEST_GROUP=platform_dependent # NB: the cmake build is a partial java test - if: type = pull_request os: osx env: TEST_GROUP=1 - if: type = pull_request os : linux arch: arm64 env: TEST_GROUP=1 - if: type = pull_request os: linux arch: ppc64le env: TEST_GROUP=1 - if: type = pull_request os: osx env: TEST_GROUP=2 - if: type = pull_request os : linux arch: arm64 env: TEST_GROUP=2 - if: type = pull_request os: linux arch: ppc64le env: TEST_GROUP=2 - if: type = pull_request os: osx env: TEST_GROUP=3 - if: type = pull_request os : linux arch: arm64 env: TEST_GROUP=3 - if: type = pull_request os: linux arch: ppc64le env: TEST_GROUP=3 - if: type = pull_request os: osx env: TEST_GROUP=4 - if: type = pull_request os : linux arch: arm64 env: TEST_GROUP=4 - if: type = pull_request os: linux arch: ppc64le env: TEST_GROUP=4 - if: type = pull_request AND commit_message !~ /java/ os : osx env: JOB_NAME=java_test - if: type = pull_request AND commit_message !~ /java/ os : linux arch: arm64 env: JOB_NAME=java_test - if: type = pull_request AND commit_message !~ /java/ os: linux arch: ppc64le env: JOB_NAME=java_test - if: type = pull_request os : osx env: JOB_NAME=lite_build - if: type = pull_request os : linux arch: arm64 env: JOB_NAME=lite_build - if: type = pull_request os: linux arch: ppc64le env: JOB_NAME=lite_build - if: type = pull_request os : osx env: JOB_NAME=examples - if: type = pull_request os : linux arch: arm64 env: JOB_NAME=examples - if: type = pull_request os: linux arch: ppc64le env: JOB_NAME=examples - if: type = pull_request os : linux arch: arm64 env: JOB_NAME=cmake-gcc8 - if: type = pull_request os: linux arch: ppc64le env: JOB_NAME=cmake-gcc8 - if: type = pull_request os : linux arch: arm64 env: JOB_NAME=cmake-gcc9 - if: type = pull_request os: linux arch: ppc64le env: JOB_NAME=cmake-gcc9 - if: type = pull_request os : linux arch: arm64 env: JOB_NAME=cmake-gcc9-c++20 - if: type = pull_request os: linux arch: ppc64le env: JOB_NAME=cmake-gcc9-c++20 - if: type = pull_request os : osx env: JOB_NAME=status_checked - if: type = pull_request os : linux arch: arm64 env: JOB_NAME=status_checked - if: type = pull_request os: linux arch: ppc64le env: JOB_NAME=status_checked install: - if [ "${TRAVIS_OS_NAME}" == osx ]; then PATH=$PATH:/usr/local/opt/ccache/libexec; fi - if [ "${JOB_NAME}" == cmake-gcc8 ]; then sudo apt-get install -y g++-8; CC=gcc-8 && CXX=g++-8; fi - if [ "${JOB_NAME}" == cmake-gcc9 ] || [ "${JOB_NAME}" == cmake-gcc9-c++20 ]; then sudo apt-get install -y g++-9; CC=gcc-9 && CXX=g++-9; fi - if [ "${JOB_NAME}" == cmake-mingw ]; then sudo apt-get install -y mingw-w64 ; fi - if [ "${JOB_NAME}" == make-gcc4.8 ]; then sudo apt-get install -y g++-4.8 ; CC=gcc-4.8 && CXX=g++-4.8; fi - if [[ "${JOB_NAME}" == cmake* ]] && [ "${TRAVIS_OS_NAME}" == linux ]; then CMAKE_DIST_URL="https://rocksdb-deps.s3-us-west-2.amazonaws.com/cmake/cmake-3.14.5-Linux-$(uname -m).tar.bz2"; TAR_OPT="--strip-components=1 -xj"; if [ "aarch64" == "$(uname -m)" ]; then sudo apt-get install -y libuv1 librhash0; sudo apt-get upgrade -y libstdc++6; fi; mkdir cmake-dist && curl --silent --fail --show-error --location "${CMAKE_DIST_URL}" | tar -C cmake-dist ${TAR_OPT} && export PATH=$PWD/cmake-dist/bin:$PATH; fi - | if [[ "${JOB_NAME}" == java_test || "${JOB_NAME}" == cmake* ]]; then # Ensure JDK 8 if [ "${TRAVIS_OS_NAME}" == osx ]; then brew tap AdoptOpenJDK/openjdk brew cask install adoptopenjdk8 export JAVA_HOME=$(/usr/libexec/java_home) else sudo apt-get install -y openjdk-8-jdk export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture) fi echo "JAVA_HOME=${JAVA_HOME}" which java && java -version which javac && javac -version fi before_script: # Increase the maximum number of open file descriptors, since some tests use # more FDs than the default limit. - ulimit -n 8192 script: - date; ${CXX} --version - if [ `command -v ccache` ]; then ccache -C; fi - case $TEST_GROUP in platform_dependent) OPT=-DTRAVIS V=1 ROCKSDBTESTS_END=db_block_cache_test make -j4 all_but_some_tests check_some ;; 1) OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=db_block_cache_test ROCKSDBTESTS_END=db_iter_test make -j4 check_some ;; 2) OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" V=1 make -j4 tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" V=1 ROCKSDBTESTS_START=db_iter_test ROCKSDBTESTS_END=options_file_test make -j4 check_some ;; 3) OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=options_file_test ROCKSDBTESTS_END=write_prepared_transaction_test make -j4 check_some ;; 4) OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=write_prepared_transaction_test make -j4 check_some ;; esac - case $JOB_NAME in java_test) OPT=-DTRAVIS V=1 make rocksdbjava jtest ;; lite_build) OPT='-DTRAVIS -DROCKSDB_LITE' V=1 make -j4 all ;; examples) OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4 ;; cmake-mingw) sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni ;; cmake*) case $JOB_NAME in *-c++20) OPT=-DCMAKE_CXX_STANDARD=20 ;; esac mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=0 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. && make -j4 && cd .. && rm -rf build && mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release $OPT && make -j4 rocksdb rocksdbjni ;; make-gcc4.8) OPT=-DTRAVIS V=1 SKIP_LINK=1 make -j4 all && [ "Linking broken because libgflags compiled with newer ABI" ] ;; status_checked) OPT=-DTRAVIS V=1 ASSERT_STATUS_CHECKED=1 make -j4 check_some ;; esac notifications: email: - leveldb@fb.com rocksdb-6.11.4/.watchmanconfig000066400000000000000000000002021370372246700162300ustar00rootroot00000000000000{ "content_hash_warming": true, "content_hash_max_items": 333333, "hint_num_files_per_dir": 8, "fsevents_latency": 0.05 } rocksdb-6.11.4/AUTHORS000066400000000000000000000005021370372246700143120ustar00rootroot00000000000000Facebook Inc. Facebook Engineering Team Google Inc. # Initial version authors: Jeffrey Dean Sanjay Ghemawat # Partial list of contributors: Kevin Regan Johan Bilien Matthew Von-Maszewski (Basho Technologies) rocksdb-6.11.4/CMakeLists.txt000066400000000000000000001252401370372246700160110ustar00rootroot00000000000000# Prerequisites for Windows: # This cmake build is for Windows 64-bit only. # # Prerequisites: # You must have at least Visual Studio 2015 Update 3. Start the Developer Command Prompt window that is a part of Visual Studio installation. # Run the build commands from within the Developer Command Prompt window to have paths to the compiler and runtime libraries set. # You must have git.exe in your %PATH% environment variable. # # To build Rocksdb for Windows is as easy as 1-2-3-4-5: # # 1. Update paths to third-party libraries in thirdparty.inc file # 2. Create a new directory for build artifacts # mkdir build # cd build # 3. Run cmake to generate project files for Windows, add more options to enable required third-party libraries. # See thirdparty.inc for more information. # sample command: cmake -G "Visual Studio 15 Win64" -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 -DWITH_SNAPPY=1 -DWITH_JEMALLOC=1 -DWITH_JNI=1 .. # 4. Then build the project in debug mode (you may want to add /m[:] flag to run msbuild in parallel threads # or simply /m to use all avail cores) # msbuild rocksdb.sln # # rocksdb.sln build features exclusions of test only code in Release. If you build ALL_BUILD then everything # will be attempted but test only code does not build in Release mode. # # 5. And release mode (/m[:] is also supported) # msbuild rocksdb.sln /p:Configuration=Release # # Linux: # # 1. Install a recent toolchain such as devtoolset-3 if you're on a older distro. C++11 required. # 2. mkdir build; cd build # 3. cmake .. # 4. make -j cmake_minimum_required(VERSION 3.5.1) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules/") include(ReadVersion) get_rocksdb_version(rocksdb_VERSION) project(rocksdb VERSION ${rocksdb_VERSION} LANGUAGES CXX C ASM) if(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) endif() if(NOT CMAKE_BUILD_TYPE) if(EXISTS "${CMAKE_SOURCE_DIR}/.git") set(default_build_type "Debug") else() set(default_build_type "RelWithDebInfo") endif() set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Default BUILD_TYPE is ${default_build_type}" FORCE) endif() find_program(CCACHE_FOUND ccache) if(CCACHE_FOUND) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) endif(CCACHE_FOUND) option(WITH_JEMALLOC "build with JeMalloc" OFF) option(WITH_SNAPPY "build with SNAPPY" OFF) option(WITH_LZ4 "build with lz4" OFF) option(WITH_ZLIB "build with zlib" OFF) option(WITH_ZSTD "build with zstd" OFF) option(WITH_WINDOWS_UTF8_FILENAMES "use UTF8 as characterset for opening files, regardles of the system code page" OFF) if (WITH_WINDOWS_UTF8_FILENAMES) add_definitions(-DROCKSDB_WINDOWS_UTF8_FILENAMES) endif() # third-party/folly is only validated to work on Linux and Windows for now. # So only turn it on there by default. if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") if(MSVC AND MSVC_VERSION LESS 1910) # Folly does not compile with MSVC older than VS2017 option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) else() option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) endif() else() option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) endif() if( NOT DEFINED CMAKE_CXX_STANDARD ) set(CMAKE_CXX_STANDARD 11) endif() include(CMakeDependentOption) CMAKE_DEPENDENT_OPTION(WITH_GFLAGS "build with GFlags" ON "NOT MSVC;NOT MINGW" OFF) if(MSVC) option(WITH_XPRESS "build with windows built in compression" OFF) include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc) else() if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") # FreeBSD has jemalloc as default malloc # but it does not have all the jemalloc files in include/... set(WITH_JEMALLOC ON) else() if(WITH_JEMALLOC) find_package(JeMalloc REQUIRED) add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) list(APPEND THIRDPARTY_LIBS JeMalloc::JeMalloc) endif() endif() if(WITH_GFLAGS) # Config with namespace available since gflags 2.2.2 option(GFLAGS_USE_TARGET_NAMESPACE "Use gflags import target with namespace." ON) find_package(gflags CONFIG) if(gflags_FOUND) if(TARGET ${GFLAGS_TARGET}) # Config with GFLAGS_TARGET available since gflags 2.2.0 list(APPEND THIRDPARTY_LIBS ${GFLAGS_TARGET}) else() # Config with GFLAGS_LIBRARIES available since gflags 2.1.0 list(APPEND THIRDPARTY_LIBS ${GFLAGS_LIBRARIES}) endif() else() find_package(gflags REQUIRED) list(APPEND THIRDPARTY_LIBS gflags::gflags) endif() add_definitions(-DGFLAGS=1) endif() if(WITH_SNAPPY) find_package(Snappy CONFIG) if(NOT Snappy_FOUND) find_package(Snappy REQUIRED) endif() add_definitions(-DSNAPPY) list(APPEND THIRDPARTY_LIBS Snappy::snappy) endif() if(WITH_ZLIB) find_package(ZLIB REQUIRED) add_definitions(-DZLIB) list(APPEND THIRDPARTY_LIBS ZLIB::ZLIB) endif() option(WITH_BZ2 "build with bzip2" OFF) if(WITH_BZ2) find_package(BZip2 REQUIRED) add_definitions(-DBZIP2) if(BZIP2_INCLUDE_DIRS) include_directories(${BZIP2_INCLUDE_DIRS}) else() include_directories(${BZIP2_INCLUDE_DIR}) endif() list(APPEND THIRDPARTY_LIBS ${BZIP2_LIBRARIES}) endif() if(WITH_LZ4) find_package(lz4 REQUIRED) add_definitions(-DLZ4) list(APPEND THIRDPARTY_LIBS lz4::lz4) endif() if(WITH_ZSTD) find_package(zstd REQUIRED) add_definitions(-DZSTD) include_directories(${ZSTD_INCLUDE_DIR}) list(APPEND THIRDPARTY_LIBS zstd::zstd) endif() endif() string(TIMESTAMP TS "%Y/%m/%d %H:%M:%S" UTC) set(GIT_DATE_TIME "${TS}" CACHE STRING "the time we first built rocksdb") find_package(Git) if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") if(WIN32) execute_process(COMMAND $ENV{COMSPEC} /C ${GIT_EXECUTABLE} -C ${CMAKE_CURRENT_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA) else() execute_process(COMMAND ${GIT_EXECUTABLE} -C ${CMAKE_CURRENT_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA) endif() else() set(GIT_SHA 0) endif() string(REGEX REPLACE "[^0-9a-f]+" "" GIT_SHA "${GIT_SHA}") option(WITH_MD_LIBRARY "build with MD" ON) if(WIN32 AND MSVC) if(WITH_MD_LIBRARY) set(RUNTIME_LIBRARY "MD") else() set(RUNTIME_LIBRARY "MT") endif() endif() set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc) configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY) add_library(build_version OBJECT ${BUILD_VERSION_CC}) target_include_directories(build_version PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/util) if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W4 /wd4127 /wd4800 /wd4996 /wd4351 /wd4100 /wd4204 /wd4324") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wextra -Wall") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsign-compare -Wshadow -Wno-unused-parameter -Wno-unused-variable -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers -Wno-strict-aliasing") if(MINGW) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format -fno-asynchronous-unwind-tables") add_definitions(-D_POSIX_C_SOURCE=1) endif() if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("-momit-leaf-frame-pointer" HAVE_OMIT_LEAF_FRAME_POINTER) if(HAVE_OMIT_LEAF_FRAME_POINTER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -momit-leaf-frame-pointer") endif() endif() endif() include(CheckCCompilerFlag) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") CHECK_C_COMPILER_FLAG("-mcpu=power9" HAS_POWER9) if(HAS_POWER9) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power9 -mtune=power9") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power9 -mtune=power9") else() CHECK_C_COMPILER_FLAG("-mcpu=power8" HAS_POWER8) if(HAS_POWER8) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8 -mtune=power8") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8 -mtune=power8") endif(HAS_POWER8) endif(HAS_POWER9) CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC) if(HAS_ALTIVEC) message(STATUS " HAS_ALTIVEC yes") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec") endif(HAS_ALTIVEC) endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") CHECK_C_COMPILER_FLAG("-march=armv8-a+crc+crypto" HAS_ARMV8_CRC) if(HAS_ARMV8_CRC) message(STATUS " HAS_ARMV8_CRC yes") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") endif(HAS_ARMV8_CRC) endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") option(PORTABLE "build a portable binary" OFF) option(FORCE_SSE42 "force building with SSE4.2, even when PORTABLE=ON" OFF) if(PORTABLE) # MSVC does not need a separate compiler flag to enable SSE4.2; if nmmintrin.h # is available, it is available by default. if(FORCE_SSE42 AND NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mpclmul") endif() else() if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") else() if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64" AND NOT HAS_ARMV8_CRC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() endif() endif() include(CheckCXXSourceCompiles) if(NOT MSVC) set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul") endif() CHECK_CXX_SOURCE_COMPILES(" #include #include #include int main() { volatile uint32_t x = _mm_crc32_u32(0, 0); const auto a = _mm_set_epi64x(0, 0); const auto b = _mm_set_epi64x(0, 0); const auto c = _mm_clmulepi64_si128(a, b, 0x00); auto d = _mm_cvtsi128_si64(c); } " HAVE_SSE42) unset(CMAKE_REQUIRED_FLAGS) if(HAVE_SSE42) add_definitions(-DHAVE_SSE42) add_definitions(-DHAVE_PCLMUL) elseif(FORCE_SSE42) message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled") endif() CHECK_CXX_SOURCE_COMPILES(" #if defined(_MSC_VER) && !defined(__thread) #define __thread __declspec(thread) #endif int main() { static __thread int tls; } " HAVE_THREAD_LOCAL) if(HAVE_THREAD_LOCAL) add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) endif() option(FAIL_ON_WARNINGS "Treat compile warnings as errors" ON) if(FAIL_ON_WARNINGS) if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") else() # assume GCC set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") endif() endif() option(WITH_ASAN "build with ASAN" OFF) if(WITH_ASAN) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address") if(WITH_JEMALLOC) message(FATAL "ASAN does not work well with JeMalloc") endif() endif() option(WITH_TSAN "build with TSAN" OFF) if(WITH_TSAN) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread -pie") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread -fPIC") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread -fPIC") if(WITH_JEMALLOC) message(FATAL "TSAN does not work well with JeMalloc") endif() endif() option(WITH_UBSAN "build with UBSAN" OFF) if(WITH_UBSAN) add_definitions(-DROCKSDB_UBSAN_RUN) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") if(WITH_JEMALLOC) message(FATAL "UBSAN does not work well with JeMalloc") endif() endif() option(WITH_NUMA "build with NUMA policy support" OFF) if(WITH_NUMA) find_package(NUMA REQUIRED) add_definitions(-DNUMA) include_directories(${NUMA_INCLUDE_DIR}) list(APPEND THIRDPARTY_LIBS NUMA::NUMA) endif() option(WITH_TBB "build with Threading Building Blocks (TBB)" OFF) if(WITH_TBB) find_package(TBB REQUIRED) add_definitions(-DTBB) list(APPEND THIRDPARTY_LIBS TBB::TBB) endif() # Stall notifications eat some performance from inserts option(DISABLE_STALL_NOTIF "Build with stall notifications" OFF) if(DISABLE_STALL_NOTIF) add_definitions(-DROCKSDB_DISABLE_STALL_NOTIFICATION) endif() option(WITH_DYNAMIC_EXTENSION "build with dynamic extension support" OFF) if(NOT WITH_DYNAMIC_EXTENSION) add_definitions(-DROCKSDB_NO_DYNAMIC_EXTENSION) endif() if(DEFINED USE_RTTI) if(USE_RTTI) message(STATUS "Enabling RTTI") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DROCKSDB_USE_RTTI") else() if(MSVC) message(STATUS "Disabling RTTI in Release builds. Always on in Debug.") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GR-") else() message(STATUS "Disabling RTTI in Release builds") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-rtti") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti") endif() endif() else() message(STATUS "Enabling RTTI in Debug builds only (default)") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI") if(MSVC) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GR-") else() set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti") endif() endif() # Used to run CI build and tests so we can run faster option(OPTDBG "Build optimized debug build with MSVC" OFF) option(WITH_RUNTIME_DEBUG "build with debug version of runtime library" ON) if(MSVC) if(OPTDBG) message(STATUS "Debug optimization is enabled") set(CMAKE_CXX_FLAGS_DEBUG "/Oxt") else() set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Od /RTC1") # Minimal Build is deprecated after MSVC 2015 if( MSVC_VERSION GREATER 1900 ) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Gm-") else() set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Gm") endif() endif() if(WITH_RUNTIME_DEBUG) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /${RUNTIME_LIBRARY}d") else() set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /${RUNTIME_LIBRARY}") endif() set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oxt /Zp8 /Gm- /Gy /${RUNTIME_LIBRARY}") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEBUG") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG") endif() if(CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-builtin-memcmp") endif() option(ROCKSDB_LITE "Build RocksDBLite version" OFF) if(ROCKSDB_LITE) add_definitions(-DROCKSDB_LITE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -Os") endif() if(CMAKE_SYSTEM_NAME MATCHES "Cygwin") add_definitions(-fno-builtin-memcmp -DCYGWIN) elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) if(CMAKE_SYSTEM_PROCESSOR MATCHES arm) add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE) # no debug info for IOS, that will make our library big add_definitions(-DNDEBUG) endif() elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") add_definitions(-DOS_SOLARIS) elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") add_definitions(-DOS_FREEBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") add_definitions(-DOS_NETBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "OpenBSD") add_definitions(-DOS_OPENBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly") add_definitions(-DOS_DRAGONFLYBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "Android") add_definitions(-DOS_ANDROID) elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DNOMINMAX) if(MINGW) add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_VISTA) endif() endif() if(NOT WIN32) add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX) endif() option(WITH_FALLOCATE "build with fallocate" ON) if(WITH_FALLOCATE) CHECK_CXX_SOURCE_COMPILES(" #include #include int main() { int fd = open(\"/dev/null\", 0); fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024); } " HAVE_FALLOCATE) if(HAVE_FALLOCATE) add_definitions(-DROCKSDB_FALLOCATE_PRESENT) endif() endif() CHECK_CXX_SOURCE_COMPILES(" #include int main() { int fd = open(\"/dev/null\", 0); sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE); } " HAVE_SYNC_FILE_RANGE_WRITE) if(HAVE_SYNC_FILE_RANGE_WRITE) add_definitions(-DROCKSDB_RANGESYNC_PRESENT) endif() CHECK_CXX_SOURCE_COMPILES(" #include int main() { (void) PTHREAD_MUTEX_ADAPTIVE_NP; } " HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) if(HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) add_definitions(-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX) endif() include(CheckCXXSymbolExists) check_cxx_symbol_exists(malloc_usable_size malloc.h HAVE_MALLOC_USABLE_SIZE) if(HAVE_MALLOC_USABLE_SIZE) add_definitions(-DROCKSDB_MALLOC_USABLE_SIZE) endif() check_cxx_symbol_exists(sched_getcpu sched.h HAVE_SCHED_GETCPU) if(HAVE_SCHED_GETCPU) add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT) endif() check_cxx_symbol_exists(getauxval auvx.h HAVE_AUXV_GETAUXVAL) if(HAVE_AUXV_GETAUXVAL) add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT) endif() include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR}/include) if(WITH_FOLLY_DISTRIBUTED_MUTEX) include_directories(${PROJECT_SOURCE_DIR}/third-party/folly) endif() find_package(Threads REQUIRED) # Main library source code set(SOURCES cache/cache.cc cache/clock_cache.cc cache/lru_cache.cc cache/sharded_cache.cc db/arena_wrapped_db_iter.cc db/blob/blob_file_addition.cc db/blob/blob_file_garbage.cc db/blob/blob_file_meta.cc db/blob/blob_log_format.cc db/blob/blob_log_reader.cc db/blob/blob_log_writer.cc db/builder.cc db/c.cc db/column_family.cc db/compacted_db_impl.cc db/compaction/compaction.cc db/compaction/compaction_iterator.cc db/compaction/compaction_picker.cc db/compaction/compaction_job.cc db/compaction/compaction_picker_fifo.cc db/compaction/compaction_picker_level.cc db/compaction/compaction_picker_universal.cc db/convenience.cc db/db_filesnapshot.cc db/db_impl/db_impl.cc db/db_impl/db_impl_write.cc db/db_impl/db_impl_compaction_flush.cc db/db_impl/db_impl_files.cc db/db_impl/db_impl_open.cc db/db_impl/db_impl_debug.cc db/db_impl/db_impl_experimental.cc db/db_impl/db_impl_readonly.cc db/db_impl/db_impl_secondary.cc db/db_info_dumper.cc db/db_iter.cc db/dbformat.cc db/error_handler.cc db/event_helpers.cc db/experimental.cc db/external_sst_file_ingestion_job.cc db/file_indexer.cc db/flush_job.cc db/flush_scheduler.cc db/forward_iterator.cc db/import_column_family_job.cc db/internal_stats.cc db/logs_with_prep_tracker.cc db/log_reader.cc db/log_writer.cc db/malloc_stats.cc db/memtable.cc db/memtable_list.cc db/merge_helper.cc db/merge_operator.cc db/range_del_aggregator.cc db/range_tombstone_fragmenter.cc db/repair.cc db/snapshot_impl.cc db/table_cache.cc db/table_properties_collector.cc db/transaction_log_impl.cc db/trim_history_scheduler.cc db/version_builder.cc db/version_edit.cc db/version_edit_handler.cc db/version_set.cc db/wal_manager.cc db/write_batch.cc db/write_batch_base.cc db/write_controller.cc db/write_thread.cc env/env.cc env/env_chroot.cc env/env_encryption.cc env/env_hdfs.cc env/file_system.cc env/mock_env.cc file/delete_scheduler.cc file/file_prefetch_buffer.cc file/file_util.cc file/filename.cc file/random_access_file_reader.cc file/read_write_util.cc file/readahead_raf.cc file/sequence_file_reader.cc file/sst_file_manager_impl.cc file/writable_file_writer.cc logging/auto_roll_logger.cc logging/event_logger.cc logging/log_buffer.cc memory/arena.cc memory/concurrent_arena.cc memory/jemalloc_nodump_allocator.cc memory/memkind_kmem_allocator.cc memtable/alloc_tracker.cc memtable/hash_linklist_rep.cc memtable/hash_skiplist_rep.cc memtable/skiplistrep.cc memtable/vectorrep.cc memtable/write_buffer_manager.cc monitoring/histogram.cc monitoring/histogram_windowing.cc monitoring/in_memory_stats_history.cc monitoring/instrumented_mutex.cc monitoring/iostats_context.cc monitoring/perf_context.cc monitoring/perf_level.cc monitoring/persistent_stats_history.cc monitoring/statistics.cc monitoring/thread_status_impl.cc monitoring/thread_status_updater.cc monitoring/thread_status_util.cc monitoring/thread_status_util_debug.cc options/cf_options.cc options/db_options.cc options/options.cc options/options_helper.cc options/options_parser.cc port/stack_trace.cc table/adaptive/adaptive_table_factory.cc table/block_based/binary_search_index_reader.cc table/block_based/block.cc table/block_based/block_based_filter_block.cc table/block_based/block_based_table_builder.cc table/block_based/block_based_table_factory.cc table/block_based/block_based_table_iterator.cc table/block_based/block_based_table_reader.cc table/block_based/block_builder.cc table/block_based/block_prefetcher.cc table/block_based/block_prefix_index.cc table/block_based/data_block_hash_index.cc table/block_based/data_block_footer.cc table/block_based/filter_block_reader_common.cc table/block_based/filter_policy.cc table/block_based/flush_block_policy.cc table/block_based/full_filter_block.cc table/block_based/hash_index_reader.cc table/block_based/index_builder.cc table/block_based/index_reader_common.cc table/block_based/parsed_full_filter_block.cc table/block_based/partitioned_filter_block.cc table/block_based/partitioned_index_iterator.cc table/block_based/partitioned_index_reader.cc table/block_based/reader_common.cc table/block_based/uncompression_dict_reader.cc table/block_fetcher.cc table/cuckoo/cuckoo_table_builder.cc table/cuckoo/cuckoo_table_factory.cc table/cuckoo/cuckoo_table_reader.cc table/format.cc table/get_context.cc table/iterator.cc table/merging_iterator.cc table/meta_blocks.cc table/persistent_cache_helper.cc table/plain/plain_table_bloom.cc table/plain/plain_table_builder.cc table/plain/plain_table_factory.cc table/plain/plain_table_index.cc table/plain/plain_table_key_coding.cc table/plain/plain_table_reader.cc table/sst_file_reader.cc table/sst_file_writer.cc table/table_properties.cc table/two_level_iterator.cc test_util/sync_point.cc test_util/sync_point_impl.cc test_util/testutil.cc test_util/transaction_test_util.cc tools/block_cache_analyzer/block_cache_trace_analyzer.cc tools/dump/db_dump_tool.cc tools/ldb_cmd.cc tools/ldb_tool.cc tools/sst_dump_tool.cc tools/trace_analyzer_tool.cc trace_replay/trace_replay.cc trace_replay/block_cache_tracer.cc util/coding.cc util/compaction_job_stats_impl.cc util/comparator.cc util/compression_context_cache.cc util/concurrent_task_limiter_impl.cc util/crc32c.cc util/dynamic_bloom.cc util/hash.cc util/murmurhash.cc util/random.cc util/rate_limiter.cc util/slice.cc util/file_checksum_helper.cc util/status.cc util/string_util.cc util/thread_local.cc util/threadpool_imp.cc util/xxhash.cc utilities/backupable/backupable_db.cc utilities/blob_db/blob_compaction_filter.cc utilities/blob_db/blob_db.cc utilities/blob_db/blob_db_impl.cc utilities/blob_db/blob_db_impl_filesnapshot.cc utilities/blob_db/blob_dump_tool.cc utilities/blob_db/blob_file.cc utilities/cassandra/cassandra_compaction_filter.cc utilities/cassandra/format.cc utilities/cassandra/merge_operator.cc utilities/checkpoint/checkpoint_impl.cc utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc utilities/debug.cc utilities/env_mirror.cc utilities/env_timed.cc utilities/leveldb_options/leveldb_options.cc utilities/memory/memory_util.cc utilities/merge_operators/bytesxor.cc utilities/merge_operators/max.cc utilities/merge_operators/put.cc utilities/merge_operators/sortlist.cc utilities/merge_operators/string_append/stringappend.cc utilities/merge_operators/string_append/stringappend2.cc utilities/merge_operators/uint64add.cc utilities/object_registry.cc utilities/option_change_migration/option_change_migration.cc utilities/options/options_util.cc utilities/persistent_cache/block_cache_tier.cc utilities/persistent_cache/block_cache_tier_file.cc utilities/persistent_cache/block_cache_tier_metadata.cc utilities/persistent_cache/persistent_cache_tier.cc utilities/persistent_cache/volatile_tier_impl.cc utilities/simulator_cache/cache_simulator.cc utilities/simulator_cache/sim_cache.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc utilities/trace/file_trace_reader_writer.cc utilities/transactions/optimistic_transaction_db_impl.cc utilities/transactions/optimistic_transaction.cc utilities/transactions/pessimistic_transaction.cc utilities/transactions/pessimistic_transaction_db.cc utilities/transactions/snapshot_checker.cc utilities/transactions/transaction_base.cc utilities/transactions/transaction_db_mutex_impl.cc utilities/transactions/transaction_lock_mgr.cc utilities/transactions/transaction_util.cc utilities/transactions/write_prepared_txn.cc utilities/transactions/write_prepared_txn_db.cc utilities/transactions/write_unprepared_txn.cc utilities/transactions/write_unprepared_txn_db.cc utilities/ttl/db_ttl_impl.cc utilities/write_batch_with_index/write_batch_with_index.cc utilities/write_batch_with_index/write_batch_with_index_internal.cc $) if(HAVE_SSE42 AND NOT MSVC) set_source_files_properties( util/crc32c.cc PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") list(APPEND SOURCES util/crc32c_ppc.c util/crc32c_ppc_asm.S) endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") if(HAS_ARMV8_CRC) list(APPEND SOURCES util/crc32c_arm64.cc) endif(HAS_ARMV8_CRC) if(WIN32) list(APPEND SOURCES port/win/io_win.cc port/win/env_win.cc port/win/env_default.cc port/win/port_win.cc port/win/win_logger.cc) if(NOT MINGW) # Mingw only supports std::thread when using # posix threads. list(APPEND SOURCES port/win/win_thread.cc) endif() if(WITH_XPRESS) list(APPEND SOURCES port/win/xpress_win.cc) endif() if(WITH_JEMALLOC) list(APPEND SOURCES port/win/win_jemalloc.cc) endif() else() list(APPEND SOURCES port/port_posix.cc env/env_posix.cc env/fs_posix.cc env/io_posix.cc) endif() if(WITH_FOLLY_DISTRIBUTED_MUTEX) list(APPEND SOURCES third-party/folly/folly/detail/Futex.cpp third-party/folly/folly/synchronization/AtomicNotification.cpp third-party/folly/folly/synchronization/DistributedMutex.cpp third-party/folly/folly/synchronization/ParkingLot.cpp third-party/folly/folly/synchronization/WaitOptions.cpp) endif() set(ROCKSDB_STATIC_LIB rocksdb${ARTIFACT_SUFFIX}) set(ROCKSDB_SHARED_LIB rocksdb-shared${ARTIFACT_SUFFIX}) option(ROCKSDB_BUILD_SHARED "Build shared versions of the RocksDB libraries" ON) option(WITH_LIBRADOS "Build with librados" OFF) if(WITH_LIBRADOS) list(APPEND SOURCES utilities/env_librados.cc) list(APPEND THIRDPARTY_LIBS rados) endif() if(WIN32) set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) else() set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT}) endif() add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES}) target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) if(ROCKSDB_BUILD_SHARED) add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES}) target_link_libraries(${ROCKSDB_SHARED_LIB} PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) if(WIN32) set_target_properties(${ROCKSDB_SHARED_LIB} PROPERTIES COMPILE_DEFINITIONS "ROCKSDB_DLL;ROCKSDB_LIBRARY_EXPORTS") if(MSVC) set_target_properties(${ROCKSDB_STATIC_LIB} PROPERTIES COMPILE_FLAGS "/Fd${CMAKE_CFG_INTDIR}/${ROCKSDB_STATIC_LIB}.pdb") set_target_properties(${ROCKSDB_SHARED_LIB} PROPERTIES COMPILE_FLAGS "/Fd${CMAKE_CFG_INTDIR}/${ROCKSDB_SHARED_LIB}.pdb") endif() else() set_target_properties(${ROCKSDB_SHARED_LIB} PROPERTIES LINKER_LANGUAGE CXX VERSION ${rocksdb_VERSION} SOVERSION ${rocksdb_VERSION_MAJOR} OUTPUT_NAME "rocksdb") endif() endif() if(ROCKSDB_BUILD_SHARED AND NOT WIN32) set(ROCKSDB_LIB ${ROCKSDB_SHARED_LIB}) else() set(ROCKSDB_LIB ${ROCKSDB_STATIC_LIB}) endif() option(WITH_JNI "build with JNI" OFF) # Tests are excluded from Release builds CMAKE_DEPENDENT_OPTION(WITH_TESTS "build with tests" ON "CMAKE_BUILD_TYPE STREQUAL Debug" OFF) option(WITH_BENCHMARK_TOOLS "build with benchmarks" ON) option(WITH_CORE_TOOLS "build with ldb and sst_dump" ON) option(WITH_TOOLS "build with tools" ON) if(WITH_TESTS OR WITH_BENCHMARK_TOOLS OR WITH_TOOLS OR WITH_JNI OR JNI) include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third-party/gtest-1.8.1/fused-src) endif() if(WITH_JNI OR JNI) message(STATUS "JNI library is enabled") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/java) else() message(STATUS "JNI library is disabled") endif() # Installation and packaging if(WIN32) option(ROCKSDB_INSTALL_ON_WINDOWS "Enable install target on Windows" OFF) endif() if(NOT WIN32 OR ROCKSDB_INSTALL_ON_WINDOWS) if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") # Change default installation prefix on Linux to /usr set(CMAKE_INSTALL_PREFIX /usr CACHE PATH "Install path prefix, prepended onto install directories." FORCE) endif() endif() include(GNUInstallDirs) include(CMakePackageConfigHelpers) set(package_config_destination ${CMAKE_INSTALL_LIBDIR}/cmake/rocksdb) configure_package_config_file( ${CMAKE_CURRENT_LIST_DIR}/cmake/RocksDBConfig.cmake.in RocksDBConfig.cmake INSTALL_DESTINATION ${package_config_destination} ) write_basic_package_version_file( RocksDBConfigVersion.cmake VERSION ${rocksdb_VERSION} COMPATIBILITY SameMajorVersion ) install(DIRECTORY include/rocksdb COMPONENT devel DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") install(DIRECTORY "${PROJECT_SOURCE_DIR}/cmake/modules" COMPONENT devel DESTINATION ${package_config_destination}) install( TARGETS ${ROCKSDB_STATIC_LIB} EXPORT RocksDBTargets COMPONENT devel ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" ) if(ROCKSDB_BUILD_SHARED) install( TARGETS ${ROCKSDB_SHARED_LIB} EXPORT RocksDBTargets COMPONENT runtime ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" ) endif() install( EXPORT RocksDBTargets COMPONENT devel DESTINATION ${package_config_destination} NAMESPACE RocksDB:: ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/RocksDBConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/RocksDBConfigVersion.cmake COMPONENT devel DESTINATION ${package_config_destination} ) endif() if(WITH_TESTS) add_subdirectory(third-party/gtest-1.8.1/fused-src/gtest) add_library(testharness STATIC test_util/testharness.cc) target_link_libraries(testharness gtest) set(TESTS cache/cache_test.cc cache/lru_cache_test.cc db/blob/blob_file_addition_test.cc db/blob/blob_file_garbage_test.cc db/blob/db_blob_index_test.cc db/column_family_test.cc db/compact_files_test.cc db/compaction/compaction_job_stats_test.cc db/compaction/compaction_job_test.cc db/compaction/compaction_iterator_test.cc db/compaction/compaction_picker_test.cc db/comparator_db_test.cc db/corruption_test.cc db/cuckoo_table_db_test.cc db/db_basic_test.cc db/db_with_timestamp_basic_test.cc db/db_block_cache_test.cc db/db_bloom_filter_test.cc db/db_compaction_filter_test.cc db/db_compaction_test.cc db/db_dynamic_level_test.cc db/db_flush_test.cc db/db_inplace_update_test.cc db/db_io_failure_test.cc db/db_iter_test.cc db/db_iter_stress_test.cc db/db_iterator_test.cc db/db_log_iter_test.cc db/db_memtable_test.cc db/db_merge_operator_test.cc db/db_merge_operand_test.cc db/db_options_test.cc db/db_properties_test.cc db/db_range_del_test.cc db/db_impl/db_secondary_test.cc db/db_sst_test.cc db/db_statistics_test.cc db/db_table_properties_test.cc db/db_tailing_iter_test.cc db/db_test.cc db/db_test2.cc db/db_logical_block_size_cache_test.cc db/db_universal_compaction_test.cc db/db_wal_test.cc db/db_with_timestamp_compaction_test.cc db/db_write_test.cc db/dbformat_test.cc db/deletefile_test.cc db/error_handler_fs_test.cc db/obsolete_files_test.cc db/external_sst_file_basic_test.cc db/external_sst_file_test.cc db/fault_injection_test.cc db/file_indexer_test.cc db/filename_test.cc db/flush_job_test.cc db/listener_test.cc db/log_test.cc db/manual_compaction_test.cc db/memtable_list_test.cc db/merge_helper_test.cc db/merge_test.cc db/options_file_test.cc db/perf_context_test.cc db/plain_table_db_test.cc db/prefix_test.cc db/range_del_aggregator_test.cc db/range_tombstone_fragmenter_test.cc db/repair_test.cc db/table_properties_collector_test.cc db/version_builder_test.cc db/version_edit_test.cc db/version_set_test.cc db/wal_manager_test.cc db/write_batch_test.cc db/write_callback_test.cc db/write_controller_test.cc env/env_basic_test.cc env/env_test.cc env/io_posix_test.cc env/mock_env_test.cc file/delete_scheduler_test.cc file/random_access_file_reader_test.cc logging/auto_roll_logger_test.cc logging/env_logger_test.cc logging/event_logger_test.cc memory/arena_test.cc memory/memkind_kmem_allocator_test.cc memtable/inlineskiplist_test.cc memtable/skiplist_test.cc memtable/write_buffer_manager_test.cc monitoring/histogram_test.cc monitoring/iostats_context_test.cc monitoring/statistics_test.cc monitoring/stats_history_test.cc options/options_settable_test.cc options/options_test.cc table/block_based/block_based_filter_block_test.cc table/block_based/block_based_table_reader_test.cc table/block_based/block_test.cc table/block_based/data_block_hash_index_test.cc table/block_based/full_filter_block_test.cc table/block_based/partitioned_filter_block_test.cc table/cleanable_test.cc table/cuckoo/cuckoo_table_builder_test.cc table/cuckoo/cuckoo_table_reader_test.cc table/merger_test.cc table/sst_file_reader_test.cc table/table_test.cc table/block_fetcher_test.cc test_util/testutil_test.cc tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc tools/ldb_cmd_test.cc tools/reduce_levels_test.cc tools/sst_dump_test.cc tools/trace_analyzer_test.cc util/autovector_test.cc util/bloom_test.cc util/coding_test.cc util/crc32c_test.cc util/defer_test.cc util/dynamic_bloom_test.cc util/file_reader_writer_test.cc util/filelock_test.cc util/hash_test.cc util/heap_test.cc util/random_test.cc util/rate_limiter_test.cc util/repeatable_thread_test.cc util/slice_test.cc util/slice_transform_test.cc util/timer_queue_test.cc util/timer_test.cc util/thread_list_test.cc util/thread_local_test.cc util/work_queue_test.cc utilities/backupable/backupable_db_test.cc utilities/blob_db/blob_db_test.cc utilities/cassandra/cassandra_functional_test.cc utilities/cassandra/cassandra_format_test.cc utilities/cassandra/cassandra_row_merge_test.cc utilities/cassandra/cassandra_serialize_test.cc utilities/checkpoint/checkpoint_test.cc utilities/memory/memory_test.cc utilities/merge_operators/string_append/stringappend_test.cc utilities/object_registry_test.cc utilities/option_change_migration/option_change_migration_test.cc utilities/options/options_util_test.cc utilities/persistent_cache/hash_table_test.cc utilities/persistent_cache/persistent_cache_test.cc utilities/simulator_cache/cache_simulator_test.cc utilities/simulator_cache/sim_cache_test.cc utilities/table_properties_collectors/compact_on_deletion_collector_test.cc utilities/transactions/optimistic_transaction_test.cc utilities/transactions/transaction_test.cc utilities/transactions/transaction_lock_mgr_test.cc utilities/transactions/write_prepared_transaction_test.cc utilities/transactions/write_unprepared_transaction_test.cc utilities/ttl/ttl_test.cc utilities/write_batch_with_index/write_batch_with_index_test.cc ) if(WITH_LIBRADOS) list(APPEND TESTS utilities/env_librados_test.cc) endif() if(WITH_FOLLY_DISTRIBUTED_MUTEX) list(APPEND TESTS third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp) endif() set(TESTUTIL_SOURCE db/db_test_util.cc monitoring/thread_status_updater_debug.cc table/mock_table.cc test_util/fault_injection_test_env.cc test_util/fault_injection_test_fs.cc utilities/cassandra/test_utils.cc ) enable_testing() add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND}) set(TESTUTILLIB testutillib${ARTIFACT_SUFFIX}) add_library(${TESTUTILLIB} STATIC ${TESTUTIL_SOURCE}) target_link_libraries(${TESTUTILLIB} ${ROCKSDB_LIB}) if(MSVC) set_target_properties(${TESTUTILLIB} PROPERTIES COMPILE_FLAGS "/Fd${CMAKE_CFG_INTDIR}/testutillib${ARTIFACT_SUFFIX}.pdb") endif() set_target_properties(${TESTUTILLIB} PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD_RELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_MINRELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_RELWITHDEBINFO 1 ) foreach(sourcefile ${TESTS}) get_filename_component(exename ${sourcefile} NAME_WE) add_executable(${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX} ${sourcefile}) set_target_properties(${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX} PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD_RELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_MINRELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_RELWITHDEBINFO 1 OUTPUT_NAME ${exename}${ARTIFACT_SUFFIX} ) target_link_libraries(${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX} testutillib${ARTIFACT_SUFFIX} testharness gtest ${ROCKSDB_LIB}) if(NOT "${exename}" MATCHES "db_sanity_test") add_test(NAME ${exename} COMMAND ${exename}${ARTIFACT_SUFFIX}) add_dependencies(check ${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX}) endif() if("${exename}" MATCHES "env_librados_test") # env_librados_test.cc uses librados directly target_link_libraries(${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX} rados) endif() endforeach(sourcefile ${TESTS}) if(WIN32) # C executables must link to a shared object if(ROCKSDB_BUILD_SHARED) set(ROCKSDB_LIB_FOR_C ${ROCKSDB_SHARED_LIB}) else() set(ROCKSDB_LIB_FOR_C OFF) endif() else() set(ROCKSDB_LIB_FOR_C ${ROCKSDB_LIB}) endif() if(ROCKSDB_LIB_FOR_C) set(C_TESTS db/c_test.c) # C executables must link to a shared object add_executable(c_test db/c_test.c) target_link_libraries(c_test ${ROCKSDB_SHARED_LIB} testharness) add_test(NAME c_test COMMAND c_test${ARTIFACT_SUFFIX}) add_dependencies(check c_test) endif() endif() if(WITH_BENCHMARK_TOOLS) add_executable(db_bench tools/db_bench.cc tools/db_bench_tool.cc) target_link_libraries(db_bench ${ROCKSDB_LIB}) add_executable(cache_bench cache/cache_bench.cc) target_link_libraries(cache_bench ${ROCKSDB_LIB}) add_executable(memtablerep_bench memtable/memtablerep_bench.cc) target_link_libraries(memtablerep_bench ${ROCKSDB_LIB}) add_executable(range_del_aggregator_bench db/range_del_aggregator_bench.cc) target_link_libraries(range_del_aggregator_bench ${ROCKSDB_LIB}) add_executable(table_reader_bench table/table_reader_bench.cc) target_link_libraries(table_reader_bench ${ROCKSDB_LIB} testharness) add_executable(filter_bench util/filter_bench.cc) target_link_libraries(filter_bench ${ROCKSDB_LIB}) add_executable(hash_table_bench utilities/persistent_cache/hash_table_bench.cc) target_link_libraries(hash_table_bench ${ROCKSDB_LIB}) endif() if(WITH_CORE_TOOLS OR WITH_TOOLS) add_subdirectory(tools) add_custom_target(core_tools DEPENDS ${core_tool_deps}) endif() if(WITH_TOOLS) add_subdirectory(db_stress_tool) add_custom_target(tools DEPENDS ${tool_deps}) endif() rocksdb-6.11.4/CODE_OF_CONDUCT.md000066400000000000000000000064341370372246700160530ustar00rootroot00000000000000# Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at . All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq rocksdb-6.11.4/CONTRIBUTING.md000066400000000000000000000013021370372246700154720ustar00rootroot00000000000000# Contributing to RocksDB ## Code of Conduct The code of conduct is described in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md) ## Contributor License Agreement ("CLA") In order to accept your pull request, we need you to submit a CLA. You only need to do this once, so if you've done this for another Facebook open source project, you're good to go. If you are submitting a pull request for the first time, just let us know that you have completed the CLA and we can cross-check with your GitHub username. Complete your CLA here: If you prefer to sign a paper copy, we can send you a PDF. Send us an e-mail or create a new github issue to request the CLA in PDF format. rocksdb-6.11.4/COPYING000066400000000000000000000432541370372246700143100ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. rocksdb-6.11.4/DEFAULT_OPTIONS_HISTORY.md000066400000000000000000000027721370372246700173170ustar00rootroot00000000000000# RocksDB default options change log ## Unreleased * delayed_write_rate takes the rate given by rate_limiter if not specified. ## 5.2 * Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files. ## 5.0 (11/17/2016) * Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default * Options.level0_stop_writes_trigger default value changes from 24 to 32. ## 4.8.0 (5/2/2016) * options.max_open_files changes from 5000 to -1. It improves performance, but users need to set file descriptor limit to be large enough and watch memory usage for index and bloom filters. * options.base_background_compactions changes from max_background_compactions to 1. When users set higher max_background_compactions but the write throughput is not high, the writes are less spiky to disks. * options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kPointInTimeRecovery. Avoid some false positive when file system or hardware reorder the writes for file data and metadata. ## 4.7.0 (4/8/2016) * options.write_buffer_size changes from 4MB to 64MB. * options.target_file_size_base changes from 2MB to 64MB. * options.max_bytes_for_level_base changes from 10MB to 256MB. * options.soft_pending_compaction_bytes_limit changes from 0 (disabled) to 64GB. * options.hard_pending_compaction_bytes_limit changes from 0 (disabled) to 256GB. * table_cache_numshardbits changes from 4 to 6. * max_file_opening_threads changes from 1 to 16. rocksdb-6.11.4/DUMP_FORMAT.md000066400000000000000000000013731370372246700154100ustar00rootroot00000000000000## RocksDB dump format The version 1 RocksDB dump format is fairly simple: 1) The dump starts with the magic 8 byte identifier "ROCKDUMP" 2) The magic is followed by an 8 byte big-endian version which is 0x00000001. 3) Next are arbitrarily sized chunks of bytes prepended by 4 byte little endian number indicating how large each chunk is. 4) The first chunk is special and is a json string indicating some things about the creation of this dump. It contains the following keys: * database-path: The path of the database this dump was created from. * hostname: The hostname of the machine where the dump was created. * creation-time: Unix seconds since epoc when this dump was created. 5) Following the info dump the slices paired into are key/value pairs. rocksdb-6.11.4/HISTORY.md000066400000000000000000003652231370372246700147430ustar00rootroot00000000000000# Rocksdb Change Log ## 6.11.4 (7/15/2020) ### Bug Fixes * Make compaction report InternalKey corruption while iterating over the input. ## 6.11.3 (7/9/2020) ### Bug Fixes * Fix a bug when index_type == kTwoLevelIndexSearch in PartitionedIndexBuilder to update FlushPolicy to point to internal key partitioner when it changes from user-key mode to internal-key mode in index partition. * Disable file deletion after MANIFEST write/sync failure until db re-open or Resume() so that subsequent re-open will not see MANIFEST referencing deleted SSTs. ## 6.11.1 (6/23/2020) ### Bug Fixes * Best-efforts recovery ignores CURRENT file completely. If CURRENT file is missing during recovery, best-efforts recovery still proceeds with MANIFEST file(s). * In best-efforts recovery, an error that is not Corruption or IOError::kNotFound or IOError::kPathNotFound will be overwritten silently. Fix this by checking all non-ok cases and return early. * Compressed block cache was automatically disabled with read-only DBs by mistake. Now it is fixed: compressed block cache will be in effective with read-only DB too. * Fail recovery and report once hitting a physical log record checksum mismatch, while reading MANIFEST. RocksDB should not continue processing the MANIFEST any further. * Fix a bug of wrong iterator result if another thread finishes an update and a DB flush between two statement. ### Public API Change * `DB::OpenForReadOnly()` now returns `Status::NotFound` when the specified DB directory does not exist. Previously the error returned depended on the underlying `Env`. ## 6.11 (6/12/2020) ### Bug Fixes * Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true. * Fix possible false NotFound status from batched MultiGet using index type kHashSearch. * Fix corruption caused by enabling delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode, along with parallel compactions. The bug can result in two parallel compactions picking the same input files, resulting in the DB resurrecting older and deleted versions of some keys. * Fix a use-after-free bug in best-efforts recovery. column_family_memtables_ needs to point to valid ColumnFamilySet. * Let best-efforts recovery ignore corrupted files during table loading. * Fix corrupt key read from ingested file when iterator direction switches from reverse to forward at a key that is a prefix of another key in the same file. It is only possible in files with a non-zero global seqno. * Fix abnormally large estimate from GetApproximateSizes when a range starts near the end of one SST file and near the beginning of another. Now GetApproximateSizes consistently and fairly includes the size of SST metadata in addition to data blocks, attributing metadata proportionally among the data blocks based on their size. * Fix potential file descriptor leakage in PosixEnv's IsDirectory() and NewRandomAccessFile(). * Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch). * Fix sst_dump to return non-zero exit code if the specified file is not a recognized SST file or fails requested checks. * Fix incorrect results from batched MultiGet for duplicate keys, when the duplicate key matches the largest key of an SST file and the value type for the key in the file is a merge value. ### Public API Change * Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request. * BlobDB now explicitly disallows using the default column family's storage directories as blob directory. * DeleteRange now returns `Status::InvalidArgument` if the range's end key comes before its start key according to the user comparator. Previously the behavior was undefined. * ldb now uses options.force_consistency_checks = true by default and "--disable_consistency_checks" is added to disable it. * DB::OpenForReadOnly no longer creates files or directories if the named DB does not exist, unless create_if_missing is set to true. * The consistency checks that validate LSM state changes (table file additions/deletions during flushes and compactions) are now stricter, more efficient, and no longer optional, i.e. they are performed even if `force_consistency_checks` is `false`. * Disable delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode and num_levels = 1 in order to avoid a corruption bug. * `pin_l0_filter_and_index_blocks_in_cache` no longer applies to L0 files larger than `1.5 * write_buffer_size` to give more predictable memory usage. Such L0 files may exist due to intra-L0 compaction, external file ingestion, or user dynamically changing `write_buffer_size` (note, however, that files that are already pinned will continue being pinned, even after such a dynamic change). * In point-in-time wal recovery mode, fail database recovery in case of IOError while reading the WAL to avoid data loss. ### New Features * sst_dump to add a new --readahead_size argument. Users can specify read size when scanning the data. Sst_dump also tries to prefetch tail part of the SST files so usually some number of I/Os are saved there too. * Generate file checksum in SstFileWriter if Options.file_checksum_gen_factory is set. The checksum and checksum function name are stored in ExternalSstFileInfo after the sst file write is finished. * Add a value_size_soft_limit in read options which limits the cumulative value size of keys read in batches in MultiGet. Once the cumulative value size of found keys exceeds read_options.value_size_soft_limit, all the remaining keys are returned with status Abort without further finding their values. By default the value_size_soft_limit is std::numeric_limits::max(). * Enable SST file ingestion with file checksum information when calling IngestExternalFiles(const std::vector& args). Added files_checksums and files_checksum_func_names to IngestExternalFileArg such that user can ingest the sst files with their file checksum information. Added verify_file_checksum to IngestExternalFileOptions (default is True). To be backward compatible, if DB does not enable file checksum or user does not provide checksum information (vectors of files_checksums and files_checksum_func_names are both empty), verification of file checksum is always sucessful. If DB enables file checksum, DB will always generate the checksum for each ingested SST file during Prepare stage of ingestion and store the checksum in Manifest, unless verify_file_checksum is False and checksum information is provided by the application. In this case, we only verify the checksum function name and directly store the ingested checksum in Manifest. If verify_file_checksum is set to True, DB will verify the ingested checksum and function name with the genrated ones. Any mismatch will fail the ingestion. Note that, if IngestExternalFileOptions::write_global_seqno is True, the seqno will be changed in the ingested file. Therefore, the checksum of the file will be changed. In this case, a new checksum will be generated after the seqno is updated and be stored in the Manifest. ### Performance Improvements * Eliminate redundant key comparisons during random access in block-based tables. ## 6.10 (5/2/2020) ### Bug Fixes * Fix wrong result being read from ingested file. May happen when a key in the file happen to be prefix of another key also in the file. The issue can further cause more data corruption. The issue exists with rocksdb >= 5.0.0 since DB::IngestExternalFile() was introduced. * Finish implementation of BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey. It's now ready for use. Significantly reduces read amplification in some setups, especially for iterator seeks. * Fix a bug by updating CURRENT file so that it points to the correct MANIFEST file after best-efforts recovery. * Fixed a bug where ColumnFamilyHandle objects were not cleaned up in case an error happened during BlobDB's open after the base DB had been opened. * Fix a potential undefined behavior caused by trying to dereference nullable pointer (timestamp argument) in DB::MultiGet. * Fix a bug caused by not including user timestamp in MultiGet LookupKey construction. This can lead to wrong query result since the trailing bytes of a user key, if not shorter than timestamp, will be mistaken for user timestamp. * Fix a bug caused by using wrong compare function when sorting the input keys of MultiGet with timestamps. * Upgraded version of bzip library (1.0.6 -> 1.0.8) used with RocksJava to address potential vulnerabilities if an attacker can manipulate compressed data saved and loaded by RocksDB (not normal). See issue #6703. ### Public API Change * Add a ConfigOptions argument to the APIs dealing with converting options to and from strings and files. The ConfigOptions is meant to replace some of the options (such as input_strings_escaped and ignore_unknown_options) and allow for more parameters to be passed in the future without changing the function signature. * Add NewFileChecksumGenCrc32cFactory to the file checksum public API, such that the builtin Crc32c based file checksum generator factory can be used by applications. * Add IsDirectory to Env and FS to indicate if a path is a directory. ### New Features * Added support for pipelined & parallel compression optimization for `BlockBasedTableBuilder`. This optimization makes block building, block compression and block appending a pipeline, and uses multiple threads to accelerate block compression. Users can set `CompressionOptions::parallel_threads` greater than 1 to enable compression parallelism. This feature is experimental for now. * Provide an allocator for memkind to be used with block cache. This is to work with memory technologies (Intel DCPMM is one such technology currently available) that require different libraries for allocation and management (such as PMDK and memkind). The high capacities available make it possible to provision large caches (up to several TBs in size) beyond what is achievable with DRAM. * Option `max_background_flushes` can be set dynamically using DB::SetDBOptions(). * Added functionality in sst_dump tool to check the compressed file size for different compression levels and print the time spent on compressing files with each compression type. Added arguments `--compression_level_from` and `--compression_level_to` to report size of all compression levels and one compression_type must be specified with it so that it will report compressed sizes of one compression type with different levels. * Added statistics for redundant insertions into block cache: rocksdb.block.cache.*add.redundant. (There is currently no coordination to ensure that only one thread loads a table block when many threads are trying to access that same table block.) ### Bug Fixes * Fix a bug when making options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts dynamically changeable: the modified values are not written to option files or returned back to users when being queried. * Fix a bug where index key comparisons were unaccounted in `PerfContext::user_key_comparison_count` for lookups in files written with `format_version >= 3`. * Fix many bloom.filter statistics not being updated in batch MultiGet. ### Performance Improvements * Improve performance of batch MultiGet with partitioned filters, by sharing block cache lookups to applicable filter blocks. * Reduced memory copies when fetching and uncompressing compressed blocks from sst files. ## 6.9.0 (03/29/2020) ### Behavior changes * Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly. ### Public API Change * Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias. * Updated default format_version in BlockBasedTableOptions from 2 to 4. SST files generated with the new default can be read by RocksDB versions 5.16 and newer, and use more efficient encoding of keys in index blocks. * A new parameter `CreateBackupOptions` is added to both `BackupEngine::CreateNewBackup` and `BackupEngine::CreateNewBackupWithMetadata`, you can decrease CPU priority of `BackupEngine`'s background threads by setting `decrease_background_thread_cpu_priority` and `background_thread_cpu_priority` in `CreateBackupOptions`. * Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Finalize should be only called once after all data is processed to generate the final checksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string. ### Bug Fixes * Fix a bug where range tombstone blocks in ingested files were cached incorrectly during ingestion. If range tombstones were read from those incorrectly cached blocks, the keys they covered would be exposed. * Fix a data race that might cause crash when calling DB::GetCreationTimeOfOldestFile() by a small chance. The bug was introduced in 6.6 Release. * Fix a bug where a boolean value optimize_filters_for_hits was for max threads when calling load table handles after a flush or compaction. The value is correct to 1. The bug should not cause user visible problems. * Fix a bug which might crash the service when write buffer manager fails to insert the dummy handle to the block cache. ### Performance Improvements * In CompactRange, for levels starting from 0, if the level does not have any file with any key falling in the specified range, the level is skipped. So instead of always compacting from level 0, the compaction starts from the first level with keys in the specified range until the last such level. * Reduced memory copy when reading sst footer and blobdb in direct IO mode. * When restarting a database with large numbers of sst files, large amount of CPU time is spent on getting logical block size of the sst files, which slows down the starting progress, this inefficiency is optimized away with an internal cache for the logical block sizes. ### New Features * Basic support for user timestamp in iterator. Seek/SeekToFirst/Next and lower/upper bounds are supported. Reverse iteration is not supported. Merge is not considered. * When file lock failure when the lock is held by the current process, return acquiring time and thread ID in the error message. * Added a new option, best_efforts_recovery (default: false), to allow database to open in a db dir with missing table files. During best efforts recovery, missing table files are ignored, and database recovers to the most recent state without missing table file. Cross-column-family consistency is not guaranteed even if WAL is enabled. * options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts are now dynamically changeable. ## 6.8.0 (02/24/2020) ### Java API Changes * Major breaking changes to Java comparators, toward standardizing on ByteBuffer for performant, locale-neutral operations on keys (#6252). * Added overloads of common API methods using direct ByteBuffers for keys and values (#2283). ### Bug Fixes * Fix incorrect results while block-based table uses kHashSearch, together with Prev()/SeekForPrev(). * Fix a bug that prevents opening a DB after two consecutive crash with TransactionDB, where the first crash recovers from a corrupted WAL with kPointInTimeRecovery but the second cannot. * Fixed issue #6316 that can cause a corruption of the MANIFEST file in the middle when writing to it fails due to no disk space. * Add DBOptions::skip_checking_sst_file_sizes_on_db_open. It disables potentially expensive checking of all sst file sizes in DB::Open(). * BlobDB now ignores trivially moved files when updating the mapping between blob files and SSTs. This should mitigate issue #6338 where out of order flush/compaction notifications could trigger an assertion with the earlier code. * Batched MultiGet() ignores IO errors while reading data blocks, causing it to potentially continue looking for a key and returning stale results. * `WriteBatchWithIndex::DeleteRange` returns `Status::NotSupported`. Previously it returned success even though reads on the batch did not account for range tombstones. The corresponding language bindings now cannot be used. In C, that includes `rocksdb_writebatch_wi_delete_range`, `rocksdb_writebatch_wi_delete_range_cf`, `rocksdb_writebatch_wi_delete_rangev`, and `rocksdb_writebatch_wi_delete_rangev_cf`. In Java, that includes `WriteBatchWithIndex::deleteRange`. * Assign new MANIFEST file number when caller tries to create a new MANIFEST by calling LogAndApply(..., new_descriptor_log=true). This bug can cause MANIFEST being overwritten during recovery if options.write_dbid_to_manifest = true and there are WAL file(s). ### Performance Improvements * Perfom readahead when reading from option files. Inside DB, options.log_readahead_size will be used as the readahead size. In other cases, a default 512KB is used. ### Public API Change * The BlobDB garbage collector now emits the statistics `BLOB_DB_GC_NUM_FILES` (number of blob files obsoleted during GC), `BLOB_DB_GC_NUM_NEW_FILES` (number of new blob files generated during GC), `BLOB_DB_GC_FAILURES` (number of failed GC passes), `BLOB_DB_GC_NUM_KEYS_RELOCATED` (number of blobs relocated during GC), and `BLOB_DB_GC_BYTES_RELOCATED` (total size of blobs relocated during GC). On the other hand, the following statistics, which are not relevant for the new GC implementation, are now deprecated: `BLOB_DB_GC_NUM_KEYS_OVERWRITTEN`, `BLOB_DB_GC_NUM_KEYS_EXPIRED`, `BLOB_DB_GC_BYTES_OVERWRITTEN`, `BLOB_DB_GC_BYTES_EXPIRED`, and `BLOB_DB_GC_MICROS`. * Disable recycle_log_file_num when an inconsistent recovery modes are requested: kPointInTimeRecovery and kAbsoluteConsistency ### New Features * Added the checksum for each SST file generated by Flush or Compaction. Added sst_file_checksum_func to Options such that user can plugin their own SST file checksum function via override the FileChecksumFunc class. If user does not set the sst_file_checksum_func, SST file checksum calculation will not be enabled. The checksum information inlcuding uint32_t checksum value and a checksum function name (string). The checksum information is stored in FileMetadata in version store and also logged to MANIFEST. A new tool is added to LDB such that user can dump out a list of file checksum information from MANIFEST (stored in an unordered_map). * `db_bench` now supports `value_size_distribution_type`, `value_size_min`, `value_size_max` options for generating random variable sized value. Added `blob_db_compression_type` option for BlobDB to enable blob compression. * Replace RocksDB namespace "rocksdb" with flag "ROCKSDB_NAMESPACE" which if is not defined, defined as "rocksdb" in header file rocksdb_namespace.h. ## 6.7.0 (01/21/2020) ### Public API Change * Added a rocksdb::FileSystem class in include/rocksdb/file_system.h to encapsulate file creation/read/write operations, and an option DBOptions::file_system to allow a user to pass in an instance of rocksdb::FileSystem. If its a non-null value, this will take precendence over DBOptions::env for file operations. A new API rocksdb::FileSystem::Default() returns a platform default object. The DBOptions::env option and Env::Default() API will continue to be used for threading and other OS related functions, and where DBOptions::file_system is not specified, for file operations. For storage developers who are accustomed to rocksdb::Env, the interface in rocksdb::FileSystem is new and will probably undergo some changes as more storage systems are ported to it from rocksdb::Env. As of now, no env other than Posix has been ported to the new interface. * A new rocksdb::NewSstFileManager() API that allows the caller to pass in separate Env and FileSystem objects. * Changed Java API for RocksDB.keyMayExist functions to use Holder instead of StringBuilder, so that retrieved values need not decode to Strings. * A new `OptimisticTransactionDBOptions` Option that allows users to configure occ validation policy. The default policy changes from kValidateSerial to kValidateParallel to reduce mutex contention. ### Bug Fixes * Fix a bug that can cause unnecessary bg thread to be scheduled(#6104). * Fix crash caused by concurrent CF iterations and drops(#6147). * Fix a race condition for cfd->log_number_ between manifest switch and memtable switch (PR 6249) when number of column families is greater than 1. * Fix a bug on fractional cascading index when multiple files at the same level contain the same smallest user key, and those user keys are for merge operands. In this case, Get() the exact key may miss some merge operands. * Delcare kHashSearch index type feature-incompatible with index_block_restart_interval larger than 1. * Fixed an issue where the thread pools were not resized upon setting `max_background_jobs` dynamically through the `SetDBOptions` interface. * Fix a bug that can cause write threads to hang when a slowdown/stall happens and there is a mix of writers with WriteOptions::no_slowdown set/unset. * Fixed an issue where an incorrect "number of input records" value was used to compute the "records dropped" statistics for compactions. * Fix a regression bug that causes segfault when hash is used, max_open_files != -1 and total order seek is used and switched back. ### New Features * It is now possible to enable periodic compactions for the base DB when using BlobDB. * BlobDB now garbage collects non-TTL blobs when `enable_garbage_collection` is set to `true` in `BlobDBOptions`. Garbage collection is performed during compaction: any valid blobs located in the oldest N files (where N is the number of non-TTL blob files multiplied by the value of `BlobDBOptions::garbage_collection_cutoff`) encountered during compaction get relocated to new blob files, and old blob files are dropped once they are no longer needed. Note: we recommend enabling periodic compactions for the base DB when using this feature to deal with the case when some old blob files are kept alive by SSTs that otherwise do not get picked for compaction. * `db_bench` now supports the `garbage_collection_cutoff` option for BlobDB. * Introduce ReadOptions.auto_prefix_mode. When set to true, iterator will return the same result as total order seek, but may choose to use prefix seek internally based on seek key and iterator upper bound. * MultiGet() can use IO Uring to parallelize read from the same SST file. This featuer is by default disabled. It can be enabled with environment variable ROCKSDB_USE_IO_URING. ## 6.6.2 (01/13/2020) ### Bug Fixes * Fixed a bug where non-L0 compaction input files were not considered to compute the `creation_time` of new compaction outputs. ## 6.6.1 (01/02/2020) ### Bug Fixes * Fix a bug in WriteBatchWithIndex::MultiGetFromBatchAndDB, which is called by Transaction::MultiGet, that causes due to stale pointer access when the number of keys is > 32 * Fixed two performance issues related to memtable history trimming. First, a new SuperVersion is now created only if some memtables were actually trimmed. Second, trimming is only scheduled if there is at least one flushed memtable that is kept in memory for the purposes of transaction conflict checking. * BlobDB no longer updates the SST to blob file mapping upon failed compactions. * Fix a bug in which a snapshot read through an iterator could be affected by a DeleteRange after the snapshot (#6062). * Fixed a bug where BlobDB was comparing the `ColumnFamilyHandle` pointers themselves instead of only the column family IDs when checking whether an API call uses the default column family or not. * Delete superversions in BackgroundCallPurge. * Fix use-after-free and double-deleting files in BackgroundCallPurge(). ## 6.6.0 (11/25/2019) ### Bug Fixes * Fix data corruption caused by output of intra-L0 compaction on ingested file not being placed in correct order in L0. * Fix a data race between Version::GetColumnFamilyMetaData() and Compaction::MarkFilesBeingCompacted() for access to being_compacted (#6056). The current fix acquires the db mutex during Version::GetColumnFamilyMetaData(), which may cause regression. * Fix a bug in DBIter that is_blob_ state isn't updated when iterating backward using seek. * Fix a bug when format_version=3, partitioned filters, and prefix search are used in conjunction. The bug could result into Seek::(prefix) returning NotFound for an existing prefix. * Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strong results when reseek happens with a different iterator upper bound. * Fix a bug causing a crash during ingest external file when background compaction cause severe error (file not found). * Fix a bug when partitioned filters and prefix search are used in conjunction, ::SeekForPrev could return invalid for an existing prefix. ::SeekForPrev might be called by the user, or internally on ::Prev, or within ::Seek if the return value involves Delete or a Merge operand. * Fix OnFlushCompleted fired before flush result persisted in MANIFEST when there's concurrent flush job. The bug exists since OnFlushCompleted was introduced in rocksdb 3.8. * Fixed an sst_dump crash on some plain table SST files. * Fixed a memory leak in some error cases of opening plain table SST files. * Fix a bug when a crash happens while calling WriteLevel0TableForRecovery for multiple column families, leading to a column family's log number greater than the first corrutped log number when the DB is being opened in PointInTime recovery mode during next recovery attempt (#5856). ### New Features * Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold. * `GetLiveFilesMetaData` and `GetColumnFamilyMetaData` now expose the file number of SST files as well as the oldest blob file referenced by each SST. * A batched MultiGet API (DB::MultiGet()) that supports retrieving keys from multiple column families. * Full and partitioned filters in the block-based table use an improved Bloom filter implementation, enabled with format_version 5 (or above) because previous releases cannot read this filter. This replacement is faster and more accurate, especially for high bits per key or millions of keys in a single (full) filter. For example, the new Bloom filter has the same false positive rate at 9.55 bits per key as the old one at 10 bits per key, and a lower false positive rate at 16 bits per key than the old one at 100 bits per key. * Added AVX2 instructions to USE_SSE builds to accelerate the new Bloom filter and XXH3-based hash function on compatible x86_64 platforms (Haswell and later, ~2014). * Support options.ttl or options.periodic_compaction_seconds with options.max_open_files = -1. File's oldest ancester time and file creation time will be written to manifest. If it is availalbe, this information will be used instead of creation_time and file_creation_time in table properties. * Setting options.ttl for universal compaction now has the same meaning as setting periodic_compaction_seconds. * SstFileMetaData also returns file creation time and oldest ancester time. * The `sst_dump` command line tool `recompress` command now displays how many blocks were compressed and how many were not, in particular how many were not compressed because the compression ratio was not met (12.5% threshold for GoodCompressionRatio), as seen in the `number.block.not_compressed` counter stat since version 6.0.0. * The block cache usage is now takes into account the overhead of metadata per each entry. This results into more accurate management of memory. A side-effect of this feature is that less items are fit into the block cache of the same size, which would result to higher cache miss rates. This can be remedied by increasing the block cache size or passing kDontChargeCacheMetadata to its constuctor to restore the old behavior. * When using BlobDB, a mapping is maintained and persisted in the MANIFEST between each SST file and the oldest non-TTL blob file it references. * `db_bench` now supports and by default issues non-TTL Puts to BlobDB. TTL Puts can be enabled by specifying a non-zero value for the `blob_db_max_ttl_range` command line parameter explicitly. * `sst_dump` now supports printing BlobDB blob indexes in a human-readable format. This can be enabled by specifying the `decode_blob_index` flag on the command line. * A number of new information elements are now exposed through the EventListener interface. For flushes, the file numbers of the new SST file and the oldest blob file referenced by the SST are propagated. For compactions, the level, file number, and the oldest blob file referenced are passed to the client for each compaction input and output file. ### Public API Change * RocksDB release 4.1 or older will not be able to open DB generated by the new release. 4.2 was released on Feb 23, 2016. * TTL Compactions in Level compaction style now initiate successive cascading compactions on a key range so that it reaches the bottom level quickly on TTL expiry. `creation_time` table property for compaction output files is now set to the minimum of the creation times of all compaction inputs. * With FIFO compaction style, options.periodic_compaction_seconds will have the same meaning as options.ttl. Whichever stricter will be used. With the default options.periodic_compaction_seconds value with options.ttl's default of 0, RocksDB will give a default of 30 days. * Added an API GetCreationTimeOfOldestFile(uint64_t* creation_time) to get the file_creation_time of the oldest SST file in the DB. * FilterPolicy now exposes additional API to make it possible to choose filter configurations based on context, such as table level and compaction style. See `LevelAndStyleCustomFilterPolicy` in db_bloom_filter_test.cc. While most existing custom implementations of FilterPolicy should continue to work as before, those wrapping the return of NewBloomFilterPolicy will require overriding new function `GetBuilderWithContext()`, because calling `GetFilterBitsBuilder()` on the FilterPolicy returned by NewBloomFilterPolicy is no longer supported. * An unlikely usage of FilterPolicy is no longer supported. Calling GetFilterBitsBuilder() on the FilterPolicy returned by NewBloomFilterPolicy will now cause an assertion violation in debug builds, because RocksDB has internally migrated to a more elaborate interface that is expected to evolve further. Custom implementations of FilterPolicy should work as before, except those wrapping the return of NewBloomFilterPolicy, which will require a new override of a protected function in FilterPolicy. * NewBloomFilterPolicy now takes bits_per_key as a double instead of an int. This permits finer control over the memory vs. accuracy trade-off in the new Bloom filter implementation and should not change source code compatibility. * The option BackupableDBOptions::max_valid_backups_to_open is now only used when opening BackupEngineReadOnly. When opening a read/write BackupEngine, anything but the default value logs a warning and is treated as the default. This change ensures that backup deletion has proper accounting of shared files to ensure they are deleted when no longer referenced by a backup. * Deprecate `snap_refresh_nanos` option. * Added DisableManualCompaction/EnableManualCompaction to stop and resume manual compaction. * Add TryCatchUpWithPrimary() to StackableDB in non-LITE mode. * Add a new Env::LoadEnv() overloaded function to return a shared_ptr to Env. * Flush sets file name to "(nil)" for OnTableFileCreationCompleted() if the flush does not produce any L0. This can happen if the file is empty thus delete by RocksDB. ### Default Option Changes * Changed the default value of periodic_compaction_seconds to `UINT64_MAX - 1` which allows RocksDB to auto-tune periodic compaction scheduling. When using the default value, periodic compactions are now auto-enabled if a compaction filter is used. A value of `0` will turn off the feature completely. * Changed the default value of ttl to `UINT64_MAX - 1` which allows RocksDB to auto-tune ttl value. When using the default value, TTL will be auto-enabled to 30 days, when the feature is supported. To revert the old behavior, you can explicitly set it to 0. ### Performance Improvements * For 64-bit hashing, RocksDB is standardizing on a slightly modified preview version of XXH3. This function is now used for many non-persisted hashes, along with fastrange64() in place of the modulus operator, and some benchmarks show a slight improvement. * Level iterator to invlidate the iterator more often in prefix seek and the level is filtered out by prefix bloom. ## 6.5.2 (11/15/2019) ### Bug Fixes * Fix a assertion failure in MultiGet() when BlockBasedTableOptions::no_block_cache is true and there is no compressed block cache * Fix a buffer overrun problem in BlockBasedTable::MultiGet() when compression is enabled and no compressed block cache is configured. * If a call to BackupEngine::PurgeOldBackups or BackupEngine::DeleteBackup suffered a crash, power failure, or I/O error, files could be left over from old backups that could only be purged with a call to GarbageCollect. Any call to PurgeOldBackups, DeleteBackup, or GarbageCollect should now suffice to purge such files. ## 6.5.1 (10/16/2019) ### Bug Fixes * Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strange results when reseek happens with a different iterator upper bound. * Fix a bug in BlockBasedTableIterator that might return incorrect results when reseek happens with a different iterator upper bound. * Fix a bug when partitioned filters and prefix search are used in conjunction, ::SeekForPrev could return invalid for an existing prefix. ::SeekForPrev might be called by the user, or internally on ::Prev, or within ::Seek if the return value involves Delete or a Merge operand. ## 6.5.0 (9/13/2019) ### Bug Fixes * Fixed a number of data races in BlobDB. * Fix a bug where the compaction snapshot refresh feature is not disabled as advertised when `snap_refresh_nanos` is set to 0.. * Fix bloom filter lookups by the MultiGet batching API when BlockBasedTableOptions::whole_key_filtering is false, by checking that a key is in the perfix_extractor domain and extracting the prefix before looking up. * Fix a bug in file ingestion caused by incorrect file number allocation when the number of column families involved in the ingestion exceeds 2. ### New Features * Introduced DBOptions::max_write_batch_group_size_bytes to configure maximum limit on number of bytes that are written in a single batch of WAL or memtable write. It is followed when the leader write size is larger than 1/8 of this limit. * VerifyChecksum() by default will issue readahead. Allow ReadOptions to be passed in to those functions to override the readhead size. For checksum verifying before external SST file ingestion, a new option IngestExternalFileOptions.verify_checksums_readahead_size, is added for this readahead setting. * When user uses options.force_consistency_check in RocksDb, instead of crashing the process, we now pass the error back to the users without killing the process. * Add an option `memtable_insert_hint_per_batch` to WriteOptions. If it is true, each WriteBatch will maintain its own insert hints for each memtable in concurrent write. See include/rocksdb/options.h for more details. ### Public API Change * Added max_write_buffer_size_to_maintain option to better control memory usage of immutable memtables. * Added a lightweight API GetCurrentWalFile() to get last live WAL filename and size. Meant to be used as a helper for backup/restore tooling in a larger ecosystem such as MySQL with a MyRocks storage engine. * The MemTable Bloom filter, when enabled, now always uses cache locality. Options::bloom_locality now only affects the PlainTable SST format. ### Performance Improvements * Improve the speed of the MemTable Bloom filter, reducing the write overhead of enabling it by 1/3 to 1/2, with similar benefit to read performance. ## 6.4.0 (7/30/2019) ### Default Option Change * LRUCacheOptions.high_pri_pool_ratio is set to 0.5 (previously 0.0) by default, which means that by default midpoint insertion is enabled. The same change is made for the default value of high_pri_pool_ratio argument in NewLRUCache(). When block cache is not explicitly created, the small block cache created by BlockBasedTable will still has this option to be 0.0. * Change BlockBasedTableOptions.cache_index_and_filter_blocks_with_high_priority's default value from false to true. ### Public API Change * Filter and compression dictionary blocks are now handled similarly to data blocks with regards to the block cache: instead of storing objects in the cache, only the blocks themselves are cached. In addition, filter and compression dictionary blocks (as well as filter partitions) no longer get evicted from the cache when a table is closed. * Due to the above refactoring, block cache eviction statistics for filter and compression dictionary blocks are temporarily broken. We plan to reintroduce them in a later phase. * The semantics of the per-block-type block read counts in the performance context now match those of the generic block_read_count. * Errors related to the retrieval of the compression dictionary are now propagated to the user. * db_bench adds a "benchmark" stats_history, which prints out the whole stats history. * Overload GetAllKeyVersions() to support non-default column family. * Added new APIs ExportColumnFamily() and CreateColumnFamilyWithImport() to support export and import of a Column Family. https://github.com/facebook/rocksdb/issues/3469 * ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator. * Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env. * Added new overload of GetApproximateSizes which gets SizeApproximationOptions object and returns a Status. The older overloads are redirecting their calls to this new method and no longer assert if the include_flags doesn't have either of INCLUDE_MEMTABLES or INCLUDE_FILES bits set. It's recommended to use the new method only, as it is more type safe and returns a meaningful status in case of errors. * LDBCommandRunner::RunCommand() to return the status code as an integer, rather than call exit() using the code. ### New Features * Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact. * Compression dictionary blocks are now prefetched and pinned in the cache (based on the customer's settings) the same way as index and filter blocks. * Added DBOptions::log_readahead_size which specifies the number of bytes to prefetch when reading the log. This is mostly useful for reading a remotely located log, as it can save the number of round-trips. If 0 (default), then the prefetching is disabled. * Added new option in SizeApproximationOptions used with DB::GetApproximateSizes. When approximating the files total size that is used to store a keys range, allow approximation with an error margin of up to total_files_size * files_size_error_margin. This allows to take some shortcuts in files size approximation, resulting in better performance, while guaranteeing the resulting error is within a reasonable margin. * Support loading custom objects in unit tests. In the affected unit tests, RocksDB will create custom Env objects based on environment variable TEST_ENV_URI. Users need to make sure custom object types are properly registered. For example, a static library should expose a `RegisterCustomObjects` function. By linking the unit test binary with the static library, the unit test can execute this function. ### Performance Improvements * Reduce iterator key comparison for upper/lower bound check. * Improve performance of row_cache: make reads with newer snapshots than data in an SST file share the same cache key, except in some transaction cases. * The compression dictionary is no longer copied to a new object upon retrieval. ### Bug Fixes * Fix ingested file and directory not being fsync. * Return TryAgain status in place of Corruption when new tail is not visible to TransactionLogIterator. * Fixed a regression where the fill_cache read option also affected index blocks. * Fixed an issue where using cache_index_and_filter_blocks==false affected partitions of partitioned indexes/filters as well. ## 6.3.2 (8/15/2019) ### Public API Change * The semantics of the per-block-type block read counts in the performance context now match those of the generic block_read_count. ### Bug Fixes * Fixed a regression where the fill_cache read option also affected index blocks. * Fixed an issue where using cache_index_and_filter_blocks==false affected partitions of partitioned indexes as well. ## 6.3.1 (7/24/2019) ### Bug Fixes * Fix auto rolling bug introduced in 6.3.0, which causes segfault if log file creation fails. ## 6.3.0 (6/18/2019) ### Public API Change * Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released. * Index blocks are now handled similarly to data blocks with regards to the block cache: instead of storing objects in the cache, only the blocks themselves are cached. In addition, index blocks no longer get evicted from the cache when a table is closed, can now use the compressed block cache (if any), and can be shared among multiple table readers. * Partitions of partitioned indexes no longer affect the read amplification statistics. * Due to the above refactoring, block cache eviction statistics for indexes are temporarily broken. We plan to reintroduce them in a later phase. * options.keep_log_file_num will be enforced strictly all the time. File names of all log files will be tracked, which may take significantly amount of memory if options.keep_log_file_num is large and either of options.max_log_file_size or options.log_file_time_to_roll is set. * Add initial support for Get/Put with user timestamps. Users can specify timestamps via ReadOptions and WriteOptions when calling DB::Get and DB::Put. * Accessing a partition of a partitioned filter or index through a pinned reference is no longer considered a cache hit. * Add C bindings for secondary instance, i.e. DBImplSecondary. * Rate limited deletion of WALs is only enabled if DBOptions::wal_dir is not set, or explicitly set to db_name passed to DB::Open and DBOptions::db_paths is empty, or same as db_paths[0].path ### New Features * Add an option `snap_refresh_nanos` (default to 0) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature. * Add an option `unordered_write` which trades snapshot guarantees with higher write throughput. When used with WRITE_PREPARED transactions with two_write_queues=true, it offers higher throughput with however no compromise on guarantees. * Allow DBImplSecondary to remove memtables with obsolete data after replaying MANIFEST and WAL. * Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error. * Add command `list_file_range_deletes` in ldb, which prints out tombstones in SST files. ### Performance Improvements * Reduce binary search when iterator reseek into the same data block. * DBIter::Next() can skip user key checking if previous entry's seqnum is 0. * Merging iterator to avoid child iterator reseek for some cases * Log Writer will flush after finishing the whole record, rather than a fragment. * Lower MultiGet batching API latency by reading data blocks from disk in parallel ### General Improvements * Added new status code kColumnFamilyDropped to distinguish between Column Family Dropped and DB Shutdown in progress. * Improve ColumnFamilyOptions validation when creating a new column family. ### Bug Fixes * Fix a bug in WAL replay of secondary instance by skipping write batches with older sequence numbers than the current last sequence number. * Fix flush's/compaction's merge processing logic which allowed `Put`s covered by range tombstones to reappear. Note `Put`s may exist even if the user only ever called `Merge()` due to an internal conversion during compaction to the bottommost level. * Fix/improve memtable earliest sequence assignment and WAL replay so that WAL entries of unflushed column families will not be skipped after replaying the MANIFEST and increasing db sequence due to another flushed/compacted column family. * Fix a bug caused by secondary not skipping the beginning of new MANIFEST. * On DB open, delete WAL trash files left behind in wal_dir ## 6.2.0 (4/30/2019) ### New Features * Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`. * Improve range scan performance by avoiding per-key upper bound check in BlockBasedTableIterator. * Introduce Periodic Compaction for Level style compaction. Files are re-compacted periodically and put in the same level. * Block-based table index now contains exact highest key in the file, rather than an upper bound. This may improve Get() and iterator Seek() performance in some situations, especially when direct IO is enabled and block cache is disabled. A setting BlockBasedTableOptions::index_shortening is introduced to control this behavior. Set it to kShortenSeparatorsAndSuccessor to get the old behavior. * When reading from option file/string/map, customized envs can be filled according to object registry. * Improve range scan performance when using explicit user readahead by not creating new table readers for every iterator. * Add index type BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey. It significantly reduces read amplification in some setups, especially for iterator seeks. It's not fully implemented yet: IO errors are not handled right. ### Public API Change * Change the behavior of OptimizeForPointLookup(): move away from hash-based block-based-table index, and use whole key memtable filtering. * Change the behavior of OptimizeForSmallDb(): use a 16MB block cache, put index and filter blocks into it, and cost the memtable size to it. DBOptions.OptimizeForSmallDb() and ColumnFamilyOptions.OptimizeForSmallDb() start to take an optional cache object. * Added BottommostLevelCompaction::kForceOptimized to avoid double compacting newly compacted files in the bottommost level compaction of manual compaction. Note this option may prohibit the manual compaction to produce a single file in the bottommost level. ### Bug Fixes * Adjust WriteBufferManager's dummy entry size to block cache from 1MB to 256KB. * Fix a race condition between WritePrepared::Get and ::Put with duplicate keys. * Fix crash when memtable prefix bloom is enabled and read/write a key out of domain of prefix extractor. * Close a WAL file before another thread deletes it. * Fix an assertion failure `IsFlushPending() == true` caused by one bg thread releasing the db mutex in ~ColumnFamilyData and another thread clearing `flush_requested_` flag. ## 6.1.1 (4/9/2019) ### New Features * When reading from option file/string/map, customized comparators and/or merge operators can be filled according to object registry. ### Public API Change ### Bug Fixes * Fix a bug in 2PC where a sequence of txn prepare, memtable flush, and crash could result in losing the prepared transaction. * Fix a bug in Encryption Env which could cause encrypted files to be read beyond file boundaries. ## 6.1.0 (3/27/2019) ### New Features * Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers. * Added a feature to perform data-block sampling for compressibility, and report stats to user. * Add support for trace filtering. * Add DBOptions.avoid_unnecessary_blocking_io. If true, we avoid file deletion when destroying ColumnFamilyHandle and Iterator. Instead, a job is scheduled to delete the files in background. ### Public API Change * Remove bundled fbson library. * statistics.stats_level_ becomes atomic. It is preferred to use statistics.set_stats_level() and statistics.get_stats_level() to access it. * Introduce a new IOError subcode, PathNotFound, to indicate trying to open a nonexistent file or directory for read. * Add initial support for multiple db instances sharing the same data in single-writer, multi-reader mode. * Removed some "using std::xxx" from public headers. ### Bug Fixes * Fix JEMALLOC_CXX_THROW macro missing from older Jemalloc versions, causing build failures on some platforms. * Fix SstFileReader not able to open file ingested with write_glbal_seqno=true. ## 6.0.0 (2/19/2019) ### New Features * Enabled checkpoint on readonly db (DBImplReadOnly). * Make DB ignore dropped column families while committing results of atomic flush. * RocksDB may choose to preopen some files even if options.max_open_files != -1. This may make DB open slightly longer. * For users of dictionary compression with ZSTD v0.7.0+, we now reuse the same digested dictionary when compressing each of an SST file's data blocks for faster compression speeds. * For all users of dictionary compression who set `cache_index_and_filter_blocks == true`, we now store dictionary data used for decompression in the block cache for better control over memory usage. For users of ZSTD v1.1.4+ who compile with -DZSTD_STATIC_LINKING_ONLY, this includes a digested dictionary, which is used to increase decompression speed. * Add support for block checksums verification for external SST files before ingestion. * Introduce stats history which periodically saves Statistics snapshots and added `GetStatsHistory` API to retrieve these snapshots. * Add a place holder in manifest which indicate a record from future that can be safely ignored. * Add support for trace sampling. * Enable properties block checksum verification for block-based tables. * For all users of dictionary compression, we now generate a separate dictionary for compressing each bottom-level SST file. Previously we reused a single dictionary for a whole compaction to bottom level. The new approach achieves better compression ratios; however, it uses more memory and CPU for buffering/sampling data blocks and training dictionaries. * Add whole key bloom filter support in memtable. * Files written by `SstFileWriter` will now use dictionary compression if it is configured in the file writer's `CompressionOptions`. ### Public API Change * Disallow CompactionFilter::IgnoreSnapshots() = false, because it is not very useful and the behavior is confusing. The filter will filter everything if there is no snapshot declared by the time the compaction starts. However, users can define a snapshot after the compaction starts and before it finishes and this new snapshot won't be repeatable, because after the compaction finishes, some keys may be dropped. * CompactionPri = kMinOverlappingRatio also uses compensated file size, which boosts file with lots of tombstones to be compacted first. * Transaction::GetForUpdate is extended with a do_validate parameter with default value of true. If false it skips validating the snapshot before doing the read. Similarly ::Merge, ::Put, ::Delete, and ::SingleDelete are extended with assume_tracked with default value of false. If true it indicates that call is assumed to be after a ::GetForUpdate. * `TableProperties::num_entries` and `TableProperties::num_deletions` now also account for number of range tombstones. * Remove geodb, spatial_db, document_db, json_document, date_tiered_db, and redis_lists. * With "ldb ----try_load_options", when wal_dir specified by the option file doesn't exist, ignore it. * Change time resolution in FileOperationInfo. * Deleting Blob files also go through SStFileManager. * Remove CuckooHash memtable. * The counter stat `number.block.not_compressed` now also counts blocks not compressed due to poor compression ratio. * Remove ttl option from `CompactionOptionsFIFO`. The option has been deprecated and ttl in `ColumnFamilyOptions` is used instead. * Support SST file ingestion across multiple column families via DB::IngestExternalFiles. See the function's comment about atomicity. * Remove Lua compaction filter. ### Bug Fixes * Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls. * Fix a memory leak when files with range tombstones are read in mmap mode and block cache is enabled * Fix handling of corrupt range tombstone blocks such that corruptions cannot cause deleted keys to reappear * Lock free MultiGet * Fix incorrect `NotFound` point lookup result when querying the endpoint of a file that has been extended by a range tombstone. * Fix with pipelined write, write leaders's callback failure lead to the whole write group fail. ### Change Default Options * Change options.compaction_pri's default to kMinOverlappingRatio ## 5.18.0 (11/30/2018) ### New Features * Introduced `JemallocNodumpAllocator` memory allocator. When being use, block cache will be excluded from core dump. * Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. Added per-level perf context for bloom filter and `Get` query. * With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind. * Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled. * Added `num_deletions` and `num_merge_operands` members to `TableProperties`. * Added "rocksdb.min-obsolete-sst-number-to-keep" DB property that reports the lower bound on SST file numbers that are being kept from deletion, even if the SSTs are obsolete. * Add xxhash64 checksum support * Introduced `MemoryAllocator`, which lets the user specify custom memory allocator for block based table. * Improved `DeleteRange` to prevent read performance degradation. The feature is no longer marked as experimental. ### Public API Change * `DBOptions::use_direct_reads` now affects reads issued by `BackupEngine` on the database's SSTs. * `NO_ITERATORS` is divided into two counters `NO_ITERATOR_CREATED` and `NO_ITERATOR_DELETE`. Both of them are only increasing now, just as other counters. ### Bug Fixes * Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set. * Fix in-memory range tombstone truncation to avoid erroneously covering newer keys at a lower level, and include range tombstones in compacted files whose largest key is the range tombstone's start key. * Properly set the stop key for a truncated manual CompactRange * Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds. * Fix the bug that WriteBatchWithIndex's SeekForPrev() doesn't see the entries with the same key. * Fix the bug where user comparator was sometimes fed with InternalKey instead of the user key. The bug manifests when during GenerateBottommostFiles. * Fix a bug in WritePrepared txns where if the number of old snapshots goes beyond the snapshot cache size (128 default) the rest will not be checked when evicting a commit entry from the commit cache. * Fixed Get correctness bug in the presence of range tombstones where merge operands covered by a range tombstone always result in NotFound. * Start populating `NO_FILE_CLOSES` ticker statistic, which was always zero previously. * The default value of NewBloomFilterPolicy()'s argument use_block_based_builder is changed to false. Note that this new default may cause large temp memory usage when building very large SST files. ## 5.17.0 (10/05/2018) ### Public API Change * `OnTableFileCreated` will now be called for empty files generated during compaction. In that case, `TableFileCreationInfo::file_path` will be "(nil)" and `TableFileCreationInfo::file_size` will be zero. * Add `FlushOptions::allow_write_stall`, which controls whether Flush calls start working immediately, even if it causes user writes to stall, or will wait until flush can be performed without causing write stall (similar to `CompactRangeOptions::allow_write_stall`). Note that the default value is false, meaning we add delay to Flush calls until stalling can be avoided when possible. This is behavior change compared to previous RocksDB versions, where Flush calls didn't check if they might cause stall or not. * Application using PessimisticTransactionDB is expected to rollback/commit recovered transactions before starting new ones. This assumption is used to skip concurrency control during recovery. * Expose column family id to `OnCompactionCompleted`. ### New Features * TransactionOptions::skip_concurrency_control allows pessimistic transactions to skip the overhead of concurrency control. Could be used for optimizing certain transactions or during recovery. ### Bug Fixes * Avoid creating empty SSTs and subsequently deleting them in certain cases during compaction. * Sync CURRENT file contents during checkpoint. ## 5.16.3 (10/1/2018) ### Bug Fixes * Fix crash caused when `CompactFiles` run with `CompactionOptions::compression == CompressionType::kDisableCompressionOption`. Now that setting causes the compression type to be chosen according to the column family-wide compression options. ## 5.16.2 (9/21/2018) ### Bug Fixes * Fix bug in partition filters with format_version=4. ## 5.16.1 (9/17/2018) ### Bug Fixes * Remove trace_analyzer_tool from rocksdb_lib target in TARGETS file. * Fix RocksDB Java build and tests. * Remove sync point in Block destructor. ## 5.16.0 (8/21/2018) ### Public API Change * The merge operands are passed to `MergeOperator::ShouldMerge` in the reversed order relative to how they were merged (passed to FullMerge or FullMergeV2) for performance reasons * GetAllKeyVersions() to take an extra argument of `max_num_ikeys`. * Using ZSTD dictionary trainer (i.e., setting `CompressionOptions::zstd_max_train_bytes` to a nonzero value) now requires ZSTD version 1.1.3 or later. ### New Features * Changes the format of index blocks by delta encoding the index values, which are the block handles. This saves the encoding of BlockHandle::offset of the non-head index entries in each restart interval. The feature is backward compatible but not forward compatible. It is disabled by default unless format_version 4 or above is used. * Add a new tool: trace_analyzer. Trace_analyzer analyzes the trace file generated by using trace_replay API. It can convert the binary format trace file to a human readable txt file, output the statistics of the analyzed query types such as access statistics and size statistics, combining the dumped whole key space file to analyze, support query correlation analyzing, and etc. Current supported query types are: Get, Put, Delete, SingleDelete, DeleteRange, Merge, Iterator (Seek, SeekForPrev only). * Add hash index support to data blocks, which helps reducing the cpu utilization of point-lookup operations. This feature is backward compatible with the data block created without the hash index. It is disabled by default unless BlockBasedTableOptions::data_block_index_type is set to data_block_index_type = kDataBlockBinaryAndHash. ### Bug Fixes * Fix a bug in misreporting the estimated partition index size in properties block. ## 5.15.0 (7/17/2018) ### Public API Change * Remove managed iterator. ReadOptions.managed is not effective anymore. * For bottommost_compression, a compatible CompressionOptions is added via `bottommost_compression_opts`. To keep backward compatible, a new boolean `enabled` is added to CompressionOptions. For compression_opts, it will be always used no matter what value of `enabled` is. For bottommost_compression_opts, it will only be used when user set `enabled=true`, otherwise, compression_opts will be used for bottommost_compression as default. * With LRUCache, when high_pri_pool_ratio > 0, midpoint insertion strategy will be enabled to put low-pri items to the tail of low-pri list (the midpoint) when they first inserted into the cache. This is to make cache entries never get hit age out faster, improving cache efficiency when large background scan presents. * For users of `Statistics` objects created via `CreateDBStatistics()`, the format of the string returned by its `ToString()` method has changed. * The "rocksdb.num.entries" table property no longer counts range deletion tombstones as entries. ### New Features * Changes the format of index blocks by storing the key in their raw form rather than converting them to InternalKey. This saves 8 bytes per index key. The feature is backward compatible but not forward compatible. It is disabled by default unless format_version 3 or above is used. * Avoid memcpy when reading mmap files with OpenReadOnly and max_open_files==-1. * Support dynamically changing `ColumnFamilyOptions::ttl` via `SetOptions()`. * Add a new table property, "rocksdb.num.range-deletions", which counts the number of range deletion tombstones in the table. * Improve the performance of iterators doing long range scans by using readahead, when using direct IO. * pin_top_level_index_and_filter (default true) in BlockBasedTableOptions can be used in combination with cache_index_and_filter_blocks to prefetch and pin the top-level index of partitioned index and filter blocks in cache. It has no impact when cache_index_and_filter_blocks is false. * Write properties meta-block at the end of block-based table to save read-ahead IO. ### Bug Fixes * Fix deadlock with enable_pipelined_write=true and max_successive_merges > 0 * Check conflict at output level in CompactFiles. * Fix corruption in non-iterator reads when mmap is used for file reads * Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled. * Change default value of `bytes_max_delete_chunk` to 0 in NewSstFileManager() as it doesn't work well with checkpoints. * Fix a bug caused by not copying the block trailer with compressed SST file, direct IO, prefetcher and no compressed block cache. * Fix write can stuck indefinitely if enable_pipelined_write=true. The issue exists since pipelined write was introduced in 5.5.0. ## 5.14.0 (5/16/2018) ### Public API Change * Add a BlockBasedTableOption to align uncompressed data blocks on the smaller of block size or page size boundary, to reduce flash reads by avoiding reads spanning 4K pages. * The background thread naming convention changed (on supporting platforms) to "rocksdb:", e.g., "rocksdb:low0". * Add a new ticker stat rocksdb.number.multiget.keys.found to count number of keys successfully read in MultiGet calls * Touch-up to write-related counters in PerfContext. New counters added: write_scheduling_flushes_compactions_time, write_thread_wait_nanos. Counters whose behavior was fixed or modified: write_memtable_time, write_pre_and_post_process_time, write_delay_time. * Posix Env's NewRandomRWFile() will fail if the file doesn't exist. * Now, `DBOptions::use_direct_io_for_flush_and_compaction` only applies to background writes, and `DBOptions::use_direct_reads` applies to both user reads and background reads. This conforms with Linux's `open(2)` manpage, which advises against simultaneously reading a file in buffered and direct modes, due to possibly undefined behavior and degraded performance. * Iterator::Valid() always returns false if !status().ok(). So, now when doing a Seek() followed by some Next()s, there's no need to check status() after every operation. * Iterator::Seek()/SeekForPrev()/SeekToFirst()/SeekToLast() always resets status(). * Introduced `CompressionOptions::kDefaultCompressionLevel`, which is a generic way to tell RocksDB to use the compression library's default level. It is now the default value for `CompressionOptions::level`. Previously the level defaulted to -1, which gave poor compression ratios in ZSTD. ### New Features * Introduce TTL for level compaction so that all files older than ttl go through the compaction process to get rid of old data. * TransactionDBOptions::write_policy can be configured to enable WritePrepared 2PC transactions. Read more about them in the wiki. * Add DB properties "rocksdb.block-cache-capacity", "rocksdb.block-cache-usage", "rocksdb.block-cache-pinned-usage" to show block cache usage. * Add `Env::LowerThreadPoolCPUPriority(Priority)` method, which lowers the CPU priority of background (esp. compaction) threads to minimize interference with foreground tasks. * Fsync parent directory after deleting a file in delete scheduler. * In level-based compaction, if bottom-pri thread pool was setup via `Env::SetBackgroundThreads()`, compactions to the bottom level will be delegated to that thread pool. * `prefix_extractor` has been moved from ImmutableCFOptions to MutableCFOptions, meaning it can be dynamically changed without a DB restart. ### Bug Fixes * Fsync after writing global seq number to the ingestion file in ExternalSstFileIngestionJob. * Fix WAL corruption caused by race condition between user write thread and FlushWAL when two_write_queue is not set. * Fix `BackupableDBOptions::max_valid_backups_to_open` to not delete backup files when refcount cannot be accurately determined. * Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters * Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true. * Fix wrong results by ReverseBytewiseComparator::FindShortSuccessor() ### Java API Changes * Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances. * Added SstFileManager to the Java API to allow managing SST files across DB instances. ## 5.13.0 (3/20/2018) ### Public API Change * RocksDBOptionsParser::Parse()'s `ignore_unknown_options` argument will only be effective if the option file shows it is generated using a higher version of RocksDB than the current version. * Remove CompactionEventListener. ### New Features * SstFileManager now can cancel compactions if they will result in max space errors. SstFileManager users can also use SetCompactionBufferSize to specify how much space must be leftover during a compaction for auxiliary file functions such as logging and flushing. * Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables. * If `ColumnFamilyOptions::max_subcompactions` is set greater than one, we now parallelize large manual level-based compactions. * Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree. * NewSstFileManager to add an argument bytes_max_delete_chunk with default 64MB. With this argument, a file larger than 64MB will be ftruncated multiple times based on this size. ### Bug Fixes * Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map. * Fix WAL corruption caused by race condition between user write thread and backup/checkpoint thread. ## 5.12.0 (2/14/2018) ### Public API Change * Iterator::SeekForPrev is now a pure virtual method. This is to prevent user who implement the Iterator interface fail to implement SeekForPrev by mistake. * Add `include_end` option to make the range end exclusive when `include_end == false` in `DeleteFilesInRange()`. * Add `CompactRangeOptions::allow_write_stall`, which makes `CompactRange` start working immediately, even if it causes user writes to stall. The default value is false, meaning we add delay to `CompactRange` calls until stalling can be avoided when possible. Note this delay is not present in previous RocksDB versions. * Creating checkpoint with empty directory now returns `Status::InvalidArgument`; previously, it returned `Status::IOError`. * Adds a BlockBasedTableOption to turn off index block compression. * Close() method now returns a status when closing a db. ### New Features * Improve the performance of iterators doing long range scans by using readahead. * Add new function `DeleteFilesInRanges()` to delete files in multiple ranges at once for better performance. * FreeBSD build support for RocksDB and RocksJava. * Improved performance of long range scans with readahead. * Updated to and now continuously tested in Visual Studio 2017. ### Bug Fixes * Fix `DisableFileDeletions()` followed by `GetSortedWalFiles()` to not return obsolete WAL files that `PurgeObsoleteFiles()` is going to delete. * Fix Handle error return from WriteBuffer() during WAL file close and DB close. * Fix advance reservation of arena block addresses. * Fix handling of empty string as checkpoint directory. ## 5.11.0 (01/08/2018) ### Public API Change * Add `autoTune` and `getBytesPerSecond()` to RocksJava RateLimiter ### New Features * Add a new histogram stat called rocksdb.db.flush.micros for memtable flush. * Add "--use_txn" option to use transactional API in db_stress. * Disable onboard cache for compaction output in Windows platform. * Improve the performance of iterators doing long range scans by using readahead. ### Bug Fixes * Fix a stack-use-after-scope bug in ForwardIterator. * Fix builds on platforms including Linux, Windows, and PowerPC. * Fix buffer overrun in backup engine for DBs with huge number of files. * Fix a mislabel bug for bottom-pri compaction threads. * Fix DB::Flush() keep waiting after flush finish under certain condition. ## 5.10.0 (12/11/2017) ### Public API Change * When running `make` with environment variable `USE_SSE` set and `PORTABLE` unset, will use all machine features available locally. Previously this combination only compiled SSE-related features. ### New Features * Provide lifetime hints when writing files on Linux. This reduces hardware write-amp on storage devices supporting multiple streams. * Add a DB stat, `NUMBER_ITER_SKIP`, which returns how many internal keys were skipped during iterations (e.g., due to being tombstones or duplicate versions of a key). * Add PerfContext counters, `key_lock_wait_count` and `key_lock_wait_time`, which measure the number of times transactions wait on key locks and total amount of time waiting. ### Bug Fixes * Fix IOError on WAL write doesn't propagate to write group follower * Make iterator invalid on merge error. * Fix performance issue in `IngestExternalFile()` affecting databases with large number of SST files. * Fix possible corruption to LSM structure when `DeleteFilesInRange()` deletes a subset of files spanned by a `DeleteRange()` marker. ## 5.9.0 (11/1/2017) ### Public API Change * `BackupableDBOptions::max_valid_backups_to_open == 0` now means no backups will be opened during BackupEngine initialization. Previously this condition disabled limiting backups opened. * `DBOptions::preserve_deletes` is a new option that allows one to specify that DB should not drop tombstones for regular deletes if they have sequence number larger than what was set by the new API call `DB::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum)`. Disabled by default. * API call `DB::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum)` was added, users who wish to preserve deletes are expected to periodically call this function to advance the cutoff seqnum (all deletes made before this seqnum can be dropped by DB). It's user responsibility to figure out how to advance the seqnum in the way so the tombstones are kept for the desired period of time, yet are eventually processed in time and don't eat up too much space. * `ReadOptions::iter_start_seqnum` was added; if set to something > 0 user will see 2 changes in iterators behavior 1) only keys written with sequence larger than this parameter would be returned and 2) the `Slice` returned by iter->key() now points to the memory that keep User-oriented representation of the internal key, rather than user key. New struct `FullKey` was added to represent internal keys, along with a new helper function `ParseFullKey(const Slice& internal_key, FullKey* result);`. * Deprecate trash_dir param in NewSstFileManager, right now we will rename deleted files to .trash instead of moving them to trash directory * Allow setting a custom trash/DB size ratio limit in the SstFileManager, after which files that are to be scheduled for deletion are deleted immediately, regardless of any delete ratelimit. * Return an error on write if write_options.sync = true and write_options.disableWAL = true to warn user of inconsistent options. Previously we will not write to WAL and not respecting the sync options in this case. ### New Features * CRC32C is now using the 3-way pipelined SSE algorithm `crc32c_3way` on supported platforms to improve performance. The system will choose to use this algorithm on supported platforms automatically whenever possible. If PCLMULQDQ is not supported it will fall back to the old Fast_CRC32 algorithm. * `DBOptions::writable_file_max_buffer_size` can now be changed dynamically. * `DBOptions::bytes_per_sync`, `DBOptions::compaction_readahead_size`, and `DBOptions::wal_bytes_per_sync` can now be changed dynamically, `DBOptions::wal_bytes_per_sync` will flush all memtables and switch to a new WAL file. * Support dynamic adjustment of rate limit according to demand for background I/O. It can be enabled by passing `true` to the `auto_tuned` parameter in `NewGenericRateLimiter()`. The value passed as `rate_bytes_per_sec` will still be respected as an upper-bound. * Support dynamically changing `ColumnFamilyOptions::compaction_options_fifo`. * Introduce `EventListener::OnStallConditionsChanged()` callback. Users can implement it to be notified when user writes are stalled, stopped, or resumed. * Add a new db property "rocksdb.estimate-oldest-key-time" to return oldest data timestamp. The property is available only for FIFO compaction with compaction_options_fifo.allow_compaction = false. * Upon snapshot release, recompact bottommost files containing deleted/overwritten keys that previously could not be dropped due to the snapshot. This alleviates space-amp caused by long-held snapshots. * Support lower bound on iterators specified via `ReadOptions::iterate_lower_bound`. * Support for differential snapshots (via iterator emitting the sequence of key-values representing the difference between DB state at two different sequence numbers). Supports preserving and emitting puts and regular deletes, doesn't support SingleDeletes, MergeOperator, Blobs and Range Deletes. ### Bug Fixes * Fix a potential data inconsistency issue during point-in-time recovery. `DB:Open()` will abort if column family inconsistency is found during PIT recovery. * Fix possible metadata corruption in databases using `DeleteRange()`. ## 5.8.0 (08/30/2017) ### Public API Change * Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints. * `Slice::compare` and BytewiseComparator `Compare` no longer accept `Slice`s containing nullptr. * `Transaction::Get` and `Transaction::GetForUpdate` variants with `PinnableSlice` added. ### New Features * Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators. * Replace dynamic_cast<> (except unit test) so people can choose to build with RTTI off. With make, release mode is by default built with -fno-rtti and debug mode is built without it. Users can override it by setting USE_RTTI=0 or 1. * Universal compactions including the bottom level can be executed in a dedicated thread pool. This alleviates head-of-line blocking in the compaction queue, which cause write stalling, particularly in multi-instance use cases. Users can enable this feature via `Env::SetBackgroundThreads(N, Env::Priority::BOTTOM)`, where `N > 0`. * Allow merge operator to be called even with a single merge operand during compactions, by appropriately overriding `MergeOperator::AllowSingleOperand`. * Add `DB::VerifyChecksum()`, which verifies the checksums in all SST files in a running DB. * Block-based table support for disabling checksums by setting `BlockBasedTableOptions::checksum = kNoChecksum`. ### Bug Fixes * Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`. * Fix incorrect dropping of deletions during intra-L0 compaction. * Fix transient reappearance of keys covered by range deletions when memtable prefix bloom filter is enabled. * Fix potentially wrong file smallest key when range deletions separated by snapshot are written together. ## 5.7.0 (07/13/2017) ### Public API Change * DB property "rocksdb.sstables" now prints keys in hex form. ### New Features * Measure estimated number of reads per file. The information can be accessed through DB::GetColumnFamilyMetaData or "rocksdb.sstables" DB property. * RateLimiter support for throttling background reads, or throttling the sum of background reads and writes. This can give more predictable I/O usage when compaction reads more data than it writes, e.g., due to lots of deletions. * [Experimental] FIFO compaction with TTL support. It can be enabled by setting CompactionOptionsFIFO.ttl > 0. * Introduce `EventListener::OnBackgroundError()` callback. Users can implement it to be notified of errors causing the DB to enter read-only mode, and optionally override them. * Partitioned Index/Filters exiting the experimental mode. To enable partitioned indexes set index_type to kTwoLevelIndexSearch and to further enable partitioned filters set partition_filters to true. To configure the partition size set metadata_block_size. ### Bug Fixes * Fix discarding empty compaction output files when `DeleteRange()` is used together with subcompactions. ## 5.6.0 (06/06/2017) ### Public API Change * Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads. * Replace `Options::max_background_flushes`, `Options::max_background_compactions`, and `Options::base_background_compactions` all with `Options::max_background_jobs`, which automatically decides how many threads to allocate towards flush/compaction. * options.delayed_write_rate by default take the value of options.rate_limiter rate. * Replace global variable `IOStatsContext iostats_context` with `IOStatsContext* get_iostats_context()`; replace global variable `PerfContext perf_context` with `PerfContext* get_perf_context()`. ### New Features * Change ticker/histogram statistics implementations to use core-local storage. This improves aggregation speed compared to our previous thread-local approach, particularly for applications with many threads. * Users can pass a cache object to write buffer manager, so that they can cap memory usage for memtable and block cache using one single limit. * Flush will be triggered when 7/8 of the limit introduced by write_buffer_manager or db_write_buffer_size is triggered, so that the hard threshold is hard to hit. * Introduce WriteOptions.low_pri. If it is true, low priority writes will be throttled if the compaction is behind. * `DB::IngestExternalFile()` now supports ingesting files into a database containing range deletions. ### Bug Fixes * Shouldn't ignore return value of fsync() in flush. ## 5.5.0 (05/17/2017) ### New Features * FIFO compaction to support Intra L0 compaction too with CompactionOptionsFIFO.allow_compaction=true. * DB::ResetStats() to reset internal stats. * Statistics::Reset() to reset user stats. * ldb add option --try_load_options, which will open DB with its own option file. * Introduce WriteBatch::PopSavePoint to pop the most recent save point explicitly. * Support dynamically change `max_open_files` option via SetDBOptions() * Added DB::CreateColumnFamilie() and DB::DropColumnFamilies() to bulk create/drop column families. * Add debugging function `GetAllKeyVersions` to see internal versions of a range of keys. * Support file ingestion with universal compaction style * Support file ingestion behind with option `allow_ingest_behind` * New option enable_pipelined_write which may improve write throughput in case writing from multiple threads and WAL enabled. ### Bug Fixes * Fix the bug that Direct I/O uses direct reads for non-SST file ## 5.4.0 (04/11/2017) ### Public API Change * random_access_max_buffer_size no longer has any effect * Removed Env::EnableReadAhead(), Env::ShouldForwardRawRequest() * Support dynamically change `stats_dump_period_sec` option via SetDBOptions(). * Added ReadOptions::max_skippable_internal_keys to set a threshold to fail a request as incomplete when too many keys are being skipped when using iterators. * DB::Get in place of std::string accepts PinnableSlice, which avoids the extra memcpy of value to std::string in most of cases. * PinnableSlice releases the pinned resources that contain the value when it is destructed or when ::Reset() is called on it. * The old API that accepts std::string, although discouraged, is still supported. * Replace Options::use_direct_writes with Options::use_direct_io_for_flush_and_compaction. Read Direct IO wiki for details. * Added CompactionEventListener and EventListener::OnFlushBegin interfaces. ### New Features * Memtable flush can be avoided during checkpoint creation if total log file size is smaller than a threshold specified by the user. * Introduce level-based L0->L0 compactions to reduce file count, so write delays are incurred less often. * (Experimental) Partitioning filters which creates an index on the partitions. The feature can be enabled by setting partition_filters when using kFullFilter. Currently the feature also requires two-level indexing to be enabled. Number of partitions is the same as the number of partitions for indexes, which is controlled by metadata_block_size. ## 5.3.0 (03/08/2017) ### Public API Change * Remove disableDataSync option. * Remove timeout_hint_us option from WriteOptions. The option has been deprecated and has no effect since 3.13.0. * Remove option min_partial_merge_operands. Partial merge operands will always be merged in flush or compaction if there are more than one. * Remove option verify_checksums_in_compaction. Compaction will always verify checksum. ### Bug Fixes * Fix the bug that iterator may skip keys ## 5.2.0 (02/08/2017) ### Public API Change * NewLRUCache() will determine number of shard bits automatically based on capacity, if the user doesn't pass one. This also impacts the default block cache when the user doesn't explicit provide one. * Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files. * Options::use_direct_writes and Options::use_direct_reads are now ready to use. * (Experimental) Two-level indexing that partition the index and creates a 2nd level index on the partitions. The feature can be enabled by setting kTwoLevelIndexSearch as IndexType and configuring index_per_partition. ### New Features * Added new overloaded function GetApproximateSizes that allows to specify if memtable stats should be computed only without computing SST files' stats approximations. * Added new function GetApproximateMemTableStats that approximates both number of records and size of memtables. * Add Direct I/O mode for SST file I/O ### Bug Fixes * RangeSync() should work if ROCKSDB_FALLOCATE_PRESENT is not set * Fix wrong results in a data race case in Get() * Some fixes related to 2PC. * Fix bugs of data corruption in direct I/O ## 5.1.0 (01/13/2017) * Support dynamically change `delete_obsolete_files_period_micros` option via SetDBOptions(). * Added EventListener::OnExternalFileIngested which will be called when IngestExternalFile() add a file successfully. * BackupEngine::Open and BackupEngineReadOnly::Open now always return error statuses matching those of the backup Env. ### Bug Fixes * Fix the bug that if 2PC is enabled, checkpoints may loss some recent transactions. * When file copying is needed when creating checkpoints or bulk loading files, fsync the file after the file copying. ## 5.0.0 (11/17/2016) ### Public API Change * Options::max_bytes_for_level_multiplier is now a double along with all getters and setters. * Support dynamically change `delayed_write_rate` and `max_total_wal_size` options via SetDBOptions(). * Introduce DB::DeleteRange for optimized deletion of large ranges of contiguous keys. * Support dynamically change `delayed_write_rate` option via SetDBOptions(). * Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default. * Remove Tickers::SEQUENCE_NUMBER to avoid confusion if statistics object is shared among RocksDB instance. Alternatively DB::GetLatestSequenceNumber() can be used to get the same value. * Options.level0_stop_writes_trigger default value changes from 24 to 32. * New compaction filter API: CompactionFilter::FilterV2(). Allows to drop ranges of keys. * Removed flashcache support. * DB::AddFile() is deprecated and is replaced with DB::IngestExternalFile(). DB::IngestExternalFile() remove all the restrictions that existed for DB::AddFile. ### New Features * Add avoid_flush_during_shutdown option, which speeds up DB shutdown by not flushing unpersisted data (i.e. with disableWAL = true). Unpersisted data will be lost. The options is dynamically changeable via SetDBOptions(). * Add memtable_insert_with_hint_prefix_extractor option. The option is mean to reduce CPU usage for inserting keys into memtable, if keys can be group by prefix and insert for each prefix are sequential or almost sequential. See include/rocksdb/options.h for more details. * Add LuaCompactionFilter in utilities. This allows developers to write compaction filters in Lua. To use this feature, LUA_PATH needs to be set to the root directory of Lua. * No longer populate "LATEST_BACKUP" file in backup directory, which formerly contained the number of the latest backup. The latest backup can be determined by finding the highest numbered file in the "meta/" subdirectory. ## 4.13.0 (10/18/2016) ### Public API Change * DB::GetOptions() reflect dynamic changed options (i.e. through DB::SetOptions()) and return copy of options instead of reference. * Added Statistics::getAndResetTickerCount(). ### New Features * Add DB::SetDBOptions() to dynamic change base_background_compactions and max_background_compactions. * Added Iterator::SeekForPrev(). This new API will seek to the last key that less than or equal to the target key. ## 4.12.0 (9/12/2016) ### Public API Change * CancelAllBackgroundWork() flushes all memtables for databases containing writes that have bypassed the WAL (writes issued with WriteOptions::disableWAL=true) before shutting down background threads. * Merge options source_compaction_factor, max_grandparent_overlap_bytes and expanded_compaction_factor into max_compaction_bytes. * Remove ImmutableCFOptions. * Add a compression type ZSTD, which can work with ZSTD 0.8.0 or up. Still keep ZSTDNotFinal for compatibility reasons. ### New Features * Introduce NewClockCache, which is based on CLOCK algorithm with better concurrent performance in some cases. It can be used to replace the default LRU-based block cache and table cache. To use it, RocksDB need to be linked with TBB lib. * Change ticker/histogram statistics implementations to accumulate data in thread-local storage, which improves CPU performance by reducing cache coherency costs. Callers of CreateDBStatistics do not need to change anything to use this feature. * Block cache mid-point insertion, where index and filter block are inserted into LRU block cache with higher priority. The feature can be enabled by setting BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority to true and high_pri_pool_ratio > 0 when creating NewLRUCache. ## 4.11.0 (8/1/2016) ### Public API Change * options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter. ### New Features * A tool to migrate DB after options change. See include/rocksdb/utilities/option_change_migration.h. * Add ReadOptions.background_purge_on_iterator_cleanup. If true, we avoid file deletion when destroying iterators. ## 4.10.0 (7/5/2016) ### Public API Change * options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes * enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one. * Deprecate options.filter_deletes. ### New Features * Add avoid_flush_during_recovery option. * Add a read option background_purge_on_iterator_cleanup to avoid deleting files in foreground when destroying iterators. Instead, a job is scheduled in high priority queue and would be executed in a separate background thread. * RepairDB support for column families. RepairDB now associates data with non-default column families using information embedded in the SST/WAL files (4.7 or later). For data written by 4.6 or earlier, RepairDB associates it with the default column family. * Add options.write_buffer_manager which allows users to control total memtable sizes across multiple DB instances. ## 4.9.0 (6/9/2016) ### Public API changes * Add bottommost_compression option, This option can be used to set a specific compression algorithm for the bottommost level (Last level containing files in the DB). * Introduce CompactionJobInfo::compression, This field state the compression algorithm used to generate the output files of the compaction. * Deprecate BlockBaseTableOptions.hash_index_allow_collision=false * Deprecate options builder (GetOptions()). ### New Features * Introduce NewSimCache() in rocksdb/utilities/sim_cache.h. This function creates a block cache that is able to give simulation results (mainly hit rate) of simulating block behavior with a configurable cache size. ## 4.8.0 (5/2/2016) ### Public API Change * Allow preset compression dictionary for improved compression of block-based tables. This is supported for zlib, zstd, and lz4. The compression dictionary's size is configurable via CompressionOptions::max_dict_bytes. * Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F * Expose estimate of per-level compression ratio via DB property: "rocksdb.compression-ratio-at-levelN". * Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status. ### New Features * Add ReadOptions::readahead_size. If non-zero, NewIterator will create a new table reader which performs reads of the given size. ## 4.7.0 (4/8/2016) ### Public API Change * rename options compaction_measure_io_stats to report_bg_io_stats and include flush too. * Change some default options. Now default options will optimize for server-workloads. Also enable slowdown and full stop triggers for pending compaction bytes. These changes may cause sub-optimal performance or significant increase of resource usage. To avoid these risks, users can open existing RocksDB with options extracted from RocksDB option files. See https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File for how to use RocksDB option files. Or you can call Options.OldDefaults() to recover old defaults. DEFAULT_OPTIONS_HISTORY.md will track change history of default options. ## 4.6.0 (3/10/2016) ### Public API Changes * Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier. * Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signature of Cache::Insert() is updated accordingly. * Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted. * Add monotonically increasing counter (DB property "rocksdb.current-super-version-number") that increments upon any change to the LSM tree. ### New Features * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" ## 4.5.0 (2/5/2016) ### Public API Changes * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes. * Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. * DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead ### New Features * ldb tool now supports operations to non-default column families. * Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true. * Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate. ## 4.4.0 (1/14/2016) ### Public API Changes * Change names in CompactionPri and add a new one. * Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit. * If options.max_write_buffer_number > 3, writes will be slowed down when writing to the last write buffer to delay a full stop. * Introduce CompactionJobInfo::compaction_reason, this field include the reason to trigger the compaction. * After slow down is triggered, if estimated pending compaction bytes keep increasing, slowdown more. * Increase default options.delayed_write_rate to 2MB/s. * Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb. ## 4.3.0 (12/8/2015) ### New Features * CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key. * RocksDB will now persist options under the same directory as the RocksDB database on successful DB::Open, CreateColumnFamily, DropColumnFamily, and SetOptions. * Introduce LoadLatestOptions() in rocksdb/utilities/options_util.h. This function can construct the latest DBOptions / ColumnFamilyOptions used by the specified RocksDB intance. * Introduce CheckOptionsCompatibility() in rocksdb/utilities/options_util.h. This function checks whether the input set of options is able to open the specified DB successfully. ### Public API Changes * When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. ## 4.2.0 (11/9/2015) ### New Features * Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions. * Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families. * Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances. ### Public API Changes * CompactionFilter::Context includes information of Column Family ID * The need-compaction hint given by TablePropertiesCollector::NeedCompact() will be persistent and recoverable after DB recovery. This introduces a breaking format change. If you use this experimental feature, including NewCompactOnDeletionCollectorFactory() in the new version, you may not be able to directly downgrade the DB back to version 4.0 or lower. * TablePropertiesCollectorFactory::CreateTablePropertiesCollector() now takes an option Context, containing the information of column family ID for the file being written. * Remove DefaultCompactionFilterFactory. ## 4.1.0 (10/8/2015) ### New Features * Added single delete operation as a more efficient way to delete keys that have not been overwritten. * Added experimental AddFile() to DB interface that allow users to add files created by SstFileWriter into an empty Database, see include/rocksdb/sst_file_writer.h and DB::AddFile() for more info. * Added support for opening SST files with .ldb suffix which enables opening LevelDB databases. * CompactionFilter now supports filtering of merge operands and merge results. ### Public API Changes * Added SingleDelete() to the DB interface. * Added AddFile() to DB interface. * Added SstFileWriter class. * CompactionFilter has a new method FilterMergeOperand() that RocksDB applies to every merge operand during compaction to decide whether to filter the operand. * We removed CompactionFilterV2 interfaces from include/rocksdb/compaction_filter.h. The functionality was deprecated already in version 3.13. ## 4.0.0 (9/9/2015) ### New Features * Added support for transactions. See include/rocksdb/utilities/transaction.h for more info. * DB::GetProperty() now accepts "rocksdb.aggregated-table-properties" and "rocksdb.aggregated-table-properties-at-levelN", in which case it returns aggregated table properties of the target column family, or the aggregated table properties of the specified level N if the "at-level" version is used. * Add compression option kZSTDNotFinalCompression for people to experiment ZSTD although its format is not finalized. * We removed the need for LATEST_BACKUP file in BackupEngine. We still keep writing it when we create new backups (because of backward compatibility), but we don't read it anymore. ### Public API Changes * Removed class Env::RandomRWFile and Env::NewRandomRWFile(). * Renamed DBOptions.num_subcompactions to DBOptions.max_subcompactions to make the name better match the actual functionality of the option. * Added Equal() method to the Comparator interface that can optionally be overwritten in cases where equality comparisons can be done more efficiently than three-way comparisons. * Previous 'experimental' OptimisticTransaction class has been replaced by Transaction class. ## 3.13.0 (8/6/2015) ### New Features * RollbackToSavePoint() in WriteBatch/WriteBatchWithIndex * Add NewCompactOnDeletionCollectorFactory() in utilities/table_properties_collectors, which allows rocksdb to mark a SST file as need-compaction when it observes at least D deletion entries in any N consecutive entries in that SST file. Note that this feature depends on an experimental NeedCompact() API --- the result of this API will not persist after DB restart. * Add DBOptions::delete_scheduler. Use NewDeleteScheduler() in include/rocksdb/delete_scheduler.h to create a DeleteScheduler that can be shared among multiple RocksDB instances to control the file deletion rate of SST files that exist in the first db_path. ### Public API Changes * Deprecated WriteOptions::timeout_hint_us. We no longer support write timeout. If you really need this option, talk to us and we might consider returning it. * Deprecated purge_redundant_kvs_while_flush option. * Removed BackupEngine::NewBackupEngine() and NewReadOnlyBackupEngine() that were deprecated in RocksDB 3.8. Please use BackupEngine::Open() instead. * Deprecated Compaction Filter V2. We are not aware of any existing use-cases. If you use this filter, your compile will break with RocksDB 3.13. Please let us know if you use it and we'll put it back in RocksDB 3.14. * Env::FileExists now returns a Status instead of a boolean * Add statistics::getHistogramString() to print detailed distribution of a histogram metric. * Add DBOptions::skip_stats_update_on_db_open. When it is on, DB::Open() will run faster as it skips the random reads required for loading necessary stats from SST files to optimize compaction. ## 3.12.0 (7/2/2015) ### New Features * Added experimental support for optimistic transactions. See include/rocksdb/utilities/optimistic_transaction.h for more info. * Added a new way to report QPS from db_bench (check out --report_file and --report_interval_seconds) * Added a cache for individual rows. See DBOptions::row_cache for more info. * Several new features on EventListener (see include/rocksdb/listener.h): - OnCompationCompleted() now returns per-compaction job statistics, defined in include/rocksdb/compaction_job_stats.h. - Added OnTableFileCreated() and OnTableFileDeleted(). * Add compaction_options_universal.enable_trivial_move to true, to allow trivial move while performing universal compaction. Trivial move will happen only when all the input files are non overlapping. ### Public API changes * EventListener::OnFlushCompleted() now passes FlushJobInfo instead of a list of parameters. * DB::GetDbIdentity() is now a const function. If this function is overridden in your application, be sure to also make GetDbIdentity() const to avoid compile error. * Move listeners from ColumnFamilyOptions to DBOptions. * Add max_write_buffer_number_to_maintain option * DB::CompactRange()'s parameter reduce_level is changed to change_level, to allow users to move levels to lower levels if allowed. It can be used to migrate a DB from options.level_compaction_dynamic_level_bytes=false to options.level_compaction_dynamic_level_bytes.true. * Change default value for options.compaction_filter_factory and options.compaction_filter_factory_v2 to nullptr instead of DefaultCompactionFilterFactory and DefaultCompactionFilterFactoryV2. * If CancelAllBackgroundWork is called without doing a flush after doing loads with WAL disabled, the changes which haven't been flushed before the call to CancelAllBackgroundWork will be lost. * WBWIIterator::Entry() now returns WriteEntry instead of `const WriteEntry&` * options.hard_rate_limit is deprecated. * When options.soft_rate_limit or options.level0_slowdown_writes_trigger is triggered, the way to slow down writes is changed to: write rate to DB is limited to to options.delayed_write_rate. * DB::GetApproximateSizes() adds a parameter to allow the estimation to include data in mem table, with default to be not to include. It is now only supported in skip list mem table. * DB::CompactRange() now accept CompactRangeOptions instead of multiple parameters. CompactRangeOptions is defined in include/rocksdb/options.h. * CompactRange() will now skip bottommost level compaction for level based compaction if there is no compaction filter, bottommost_level_compaction is introduced in CompactRangeOptions to control when it's possible to skip bottommost level compaction. This mean that if you want the compaction to produce a single file you need to set bottommost_level_compaction to BottommostLevelCompaction::kForce. * Add Cache.GetPinnedUsage() to get the size of memory occupied by entries that are in use by the system. * DB:Open() will fail if the compression specified in Options is not linked with the binary. If you see this failure, recompile RocksDB with compression libraries present on your system. Also, previously our default compression was snappy. This behavior is now changed. Now, the default compression is snappy only if it's available on the system. If it isn't we change the default to kNoCompression. * We changed how we account for memory used in block cache. Previously, we only counted the sum of block sizes currently present in block cache. Now, we count the actual memory usage of the blocks. For example, a block of size 4.5KB will use 8KB memory with jemalloc. This might decrease your memory usage and possibly decrease performance. Increase block cache size if you see this happening after an upgrade. * Add BackupEngineImpl.options_.max_background_operations to specify the maximum number of operations that may be performed in parallel. Add support for parallelized backup and restore. * Add DB::SyncWAL() that does a WAL sync without blocking writers. ## 3.11.0 (5/19/2015) ### New Features * Added a new API Cache::SetCapacity(size_t capacity) to dynamically change the maximum configured capacity of the cache. If the new capacity is less than the existing cache usage, the implementation will try to lower the usage by evicting the necessary number of elements following a strict LRU policy. * Added an experimental API for handling flashcache devices (blacklists background threads from caching their reads) -- NewFlashcacheAwareEnv * If universal compaction is used and options.num_levels > 1, compact files are tried to be stored in none-L0 with smaller files based on options.target_file_size_base. The limitation of DB size when using universal compaction is greatly mitigated by using more levels. You can set num_levels = 1 to make universal compaction behave as before. If you set num_levels > 1 and want to roll back to a previous version, you need to compact all files to a big file in level 0 (by setting target_file_size_base to be large and CompactRange(, nullptr, nullptr, true, 0) and reopen the DB with the same version to rewrite the manifest, and then you can open it using previous releases. * More information about rocksdb background threads are available in Env::GetThreadList(), including the number of bytes read / written by a compaction job, mem-table size and current number of bytes written by a flush job and many more. Check include/rocksdb/thread_status.h for more detail. ### Public API changes * TablePropertiesCollector::AddUserKey() is added to replace TablePropertiesCollector::Add(). AddUserKey() exposes key type, sequence number and file size up to now to users. * DBOptions::bytes_per_sync used to apply to both WAL and table files. As of 3.11 it applies only to table files. If you want to use this option to sync WAL in the background, please use wal_bytes_per_sync ## 3.10.0 (3/24/2015) ### New Features * GetThreadStatus() is now able to report detailed thread status, including: - Thread Operation including flush and compaction. - The stage of the current thread operation. - The elapsed time in micros since the current thread operation started. More information can be found in include/rocksdb/thread_status.h. In addition, when running db_bench with --thread_status_per_interval, db_bench will also report thread status periodically. * Changed the LRU caching algorithm so that referenced blocks (by iterators) are never evicted. This change made parameter removeScanCountLimit obsolete. Because of that NewLRUCache doesn't take three arguments anymore. table_cache_remove_scan_limit option is also removed * By default we now optimize the compilation for the compilation platform (using -march=native). If you want to build portable binary, use 'PORTABLE=1' before the make command. * We now allow level-compaction to place files in different paths by specifying them in db_paths along with the target_size. Lower numbered levels will be placed earlier in the db_paths and higher numbered levels will be placed later in the db_paths vector. * Potentially big performance improvements if you're using RocksDB with lots of column families (100-1000) * Added BlockBasedTableOptions.format_version option, which allows user to specify which version of block based table he wants. As a general guideline, newer versions have more features, but might not be readable by older versions of RocksDB. * Added new block based table format (version 2), which you can enable by setting BlockBasedTableOptions.format_version = 2. This format changes how we encode size information in compressed blocks and should help with memory allocations if you're using Zlib or BZip2 compressions. * MemEnv (env that stores data in memory) is now available in default library build. You can create it by calling NewMemEnv(). * Add SliceTransform.SameResultWhenAppended() to help users determine it is safe to apply prefix bloom/hash. * Block based table now makes use of prefix bloom filter if it is a full fulter. * Block based table remembers whether a whole key or prefix based bloom filter is supported in SST files. Do a sanity check when reading the file with users' configuration. * Fixed a bug in ReadOnlyBackupEngine that deleted corrupted backups in some cases, even though the engine was ReadOnly * options.level_compaction_dynamic_level_bytes, a feature to allow RocksDB to pick dynamic base of bytes for levels. With this feature turned on, we will automatically adjust max bytes for each level. The goal of this feature is to have lower bound on size amplification. For more details, see comments in options.h. * Added an abstract base class WriteBatchBase for write batches * Fixed a bug where we start deleting files of a dropped column families even if there are still live references to it ### Public API changes * Deprecated skip_log_error_on_recovery and table_cache_remove_scan_count_limit options. * Logger method logv with log level parameter is now virtual ### RocksJava * Added compression per level API. * MemEnv is now available in RocksJava via RocksMemEnv class. * lz4 compression is now included in rocksjava static library when running `make rocksdbjavastatic`. * Overflowing a size_t when setting rocksdb options now throws an IllegalArgumentException, which removes the necessity for a developer to catch these Exceptions explicitly. ## 3.9.0 (12/8/2014) ### New Features * Add rocksdb::GetThreadList(), which in the future will return the current status of all rocksdb-related threads. We will have more code instruments in the following RocksDB releases. * Change convert function in rocksdb/utilities/convenience.h to return Status instead of boolean. Also add support for nested options in convert function ### Public API changes * New API to create a checkpoint added. Given a directory name, creates a new database which is an image of the existing database. * New API LinkFile added to Env. If you implement your own Env class, an implementation of the API LinkFile will have to be provided. * MemTableRep takes MemTableAllocator instead of Arena ### Improvements * RocksDBLite library now becomes smaller and will be compiled with -fno-exceptions flag. ## 3.8.0 (11/14/2014) ### Public API changes * BackupEngine::NewBackupEngine() was deprecated; please use BackupEngine::Open() from now on. * BackupableDB/RestoreBackupableDB have new GarbageCollect() methods, which will clean up files from corrupt and obsolete backups. * BackupableDB/RestoreBackupableDB have new GetCorruptedBackups() methods which list corrupt backups. ### Cleanup * Bunch of code cleanup, some extra warnings turned on (-Wshadow, -Wshorten-64-to-32, -Wnon-virtual-dtor) ### New features * CompactFiles and EventListener, although they are still in experimental state * Full ColumnFamily support in RocksJava. ## 3.7.0 (11/6/2014) ### Public API changes * Introduce SetOptions() API to allow adjusting a subset of options dynamically online * Introduce 4 new convenient functions for converting Options from string: GetColumnFamilyOptionsFromMap(), GetColumnFamilyOptionsFromString(), GetDBOptionsFromMap(), GetDBOptionsFromString() * Remove WriteBatchWithIndex.Delete() overloads using SliceParts * When opening a DB, if options.max_background_compactions is larger than the existing low pri pool of options.env, it will enlarge it. Similarly, options.max_background_flushes is larger than the existing high pri pool of options.env, it will enlarge it. ## 3.6.0 (10/7/2014) ### Disk format changes * If you're using RocksDB on ARM platforms and you're using default bloom filter, there is a disk format change you need to be aware of. There are three steps you need to do when you convert to new release: 1. turn off filter policy, 2. compact the whole database, 3. turn on filter policy ### Behavior changes * We have refactored our system of stalling writes. Any stall-related statistics' meanings are changed. Instead of per-write stall counts, we now count stalls per-epoch, where epochs are periods between flushes and compactions. You'll find more information in our Tuning Perf Guide once we release RocksDB 3.6. * When disableDataSync=true, we no longer sync the MANIFEST file. * Add identity_as_first_hash property to CuckooTable. SST file needs to be rebuilt to be opened by reader properly. ### Public API changes * Change target_file_size_base type to uint64_t from int. * Remove allow_thread_local. This feature was proved to be stable, so we are turning it always-on. ## 3.5.0 (9/3/2014) ### New Features * Add include/utilities/write_batch_with_index.h, providing a utility class to query data out of WriteBatch when building it. * Move BlockBasedTable related options to BlockBasedTableOptions from Options. Change corresponding JNI interface. Options affected include: no_block_cache, block_cache, block_cache_compressed, block_size, block_size_deviation, block_restart_interval, filter_policy, whole_key_filtering. filter_policy is changed to shared_ptr from a raw pointer. * Remove deprecated options: disable_seek_compaction and db_stats_log_interval * OptimizeForPointLookup() takes one parameter for block cache size. It now builds hash index, bloom filter, and block cache. ### Public API changes * The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. ## 3.4.0 (8/18/2014) ### New Features * Support Multiple DB paths in universal style compactions * Add feature of storing plain table index and bloom filter in SST file. * CompactRange() will never output compacted files to level 0. This used to be the case when all the compaction input files were at level 0. * Added iterate_upper_bound to define the extent upto which the forward iterator will return entries. This will prevent iterating over delete markers and overwritten entries for edge cases where you want to break out the iterator anyways. This may improve performance in case there are a large number of delete markers or overwritten entries. ### Public API changes * DBOptions.db_paths now is a vector of a DBPath structure which indicates both of path and target size * NewPlainTableFactory instead of bunch of parameters now accepts PlainTableOptions, which is defined in include/rocksdb/table.h * Moved include/utilities/*.h to include/rocksdb/utilities/*.h * Statistics APIs now take uint32_t as type instead of Tickers. Also make two access functions getTickerCount and histogramData const * Add DB property rocksdb.estimate-num-keys, estimated number of live keys in DB. * Add DB::GetIntProperty(), which returns DB properties that are integer as uint64_t. * The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. ## 3.3.0 (7/10/2014) ### New Features * Added JSON API prototype. * HashLinklist reduces performance outlier caused by skewed bucket by switching data in the bucket from linked list to skip list. Add parameter threshold_use_skiplist in NewHashLinkListRepFactory(). * RocksDB is now able to reclaim storage space more effectively during the compaction process. This is done by compensating the size of each deletion entry by the 2X average value size, which makes compaction to be triggered by deletion entries more easily. * Add TimeOut API to write. Now WriteOptions have a variable called timeout_hint_us. With timeout_hint_us set to non-zero, any write associated with this timeout_hint_us may be aborted when it runs longer than the specified timeout_hint_us, and it is guaranteed that any write completes earlier than the specified time-out will not be aborted due to the time-out condition. * Add a rate_limiter option, which controls total throughput of flush and compaction. The throughput is specified in bytes/sec. Flush always has precedence over compaction when available bandwidth is constrained. ### Public API changes * Removed NewTotalOrderPlainTableFactory because it is not used and implemented semantically incorrect. ## 3.2.0 (06/20/2014) ### Public API changes * We removed seek compaction as a concept from RocksDB because: 1) It makes more sense for spinning disk workloads, while RocksDB is primarily designed for flash and memory, 2) It added some complexity to the important code-paths, 3) None of our internal customers were really using it. Because of that, Options::disable_seek_compaction is now obsolete. It is still a parameter in Options, so it does not break the build, but it does not have any effect. We plan to completely remove it at some point, so we ask users to please remove this option from your code base. * Add two parameters to NewHashLinkListRepFactory() for logging on too many entries in a hash bucket when flushing. * Added new option BlockBasedTableOptions::hash_index_allow_collision. When enabled, prefix hash index for block-based table will not store prefix and allow hash collision, reducing memory consumption. ### New Features * PlainTable now supports a new key encoding: for keys of the same prefix, the prefix is only written once. It can be enabled through encoding_type parameter of NewPlainTableFactory() * Add AdaptiveTableFactory, which is used to convert from a DB of PlainTable to BlockBasedTabe, or vise versa. It can be created using NewAdaptiveTableFactory() ### Performance Improvements * Tailing Iterator re-implemeted with ForwardIterator + Cascading Search Hint , see ~20% throughput improvement. ## 3.1.0 (05/21/2014) ### Public API changes * Replaced ColumnFamilyOptions::table_properties_collectors with ColumnFamilyOptions::table_properties_collector_factories ### New Features * Hash index for block-based table will be materialized and reconstructed more efficiently. Previously hash index is constructed by scanning the whole table during every table open. * FIFO compaction style ## 3.0.0 (05/05/2014) ### Public API changes * Added _LEVEL to all InfoLogLevel enums * Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes * MemTableRepFactory::CreateMemTableRep() takes info logger as an extra parameter. ### New Features * Column family support * Added an option to use different checksum functions in BlockBasedTableOptions * Added ApplyToAllCacheEntries() function to Cache ## 2.8.0 (04/04/2014) * Removed arena.h from public header files. * By default, checksums are verified on every read from database * Change default value of several options, including: paranoid_checks=true, max_open_files=5000, level0_slowdown_writes_trigger=20, level0_stop_writes_trigger=24, disable_seek_compaction=true, max_background_flushes=1 and allow_mmap_writes=false * Added is_manual_compaction to CompactionFilter::Context * Added "virtual void WaitForJoin()" in class Env. Default operation is no-op. * Removed BackupEngine::DeleteBackupsNewerThan() function * Added new option -- verify_checksums_in_compaction * Changed Options.prefix_extractor from raw pointer to shared_ptr (take ownership) Changed HashSkipListRepFactory and HashLinkListRepFactory constructor to not take SliceTransform object (use Options.prefix_extractor implicitly) * Added Env::GetThreadPoolQueueLen(), which returns the waiting queue length of thread pools * Added a command "checkconsistency" in ldb tool, which checks if file system state matches DB state (file existence and file sizes) * Separate options related to block based table to a new struct BlockBasedTableOptions. * WriteBatch has a new function Count() to return total size in the batch, and Data() now returns a reference instead of a copy * Add more counters to perf context. * Supports several more DB properties: compaction-pending, background-errors and cur-size-active-mem-table. ### New Features * If we find one truncated record at the end of the MANIFEST or WAL files, we will ignore it. We assume that writers of these records were interrupted and that we can safely ignore it. * A new SST format "PlainTable" is added, which is optimized for memory-only workloads. It can be created through NewPlainTableFactory() or NewTotalOrderPlainTableFactory(). * A new mem table implementation hash linked list optimizing for the case that there are only few keys for each prefix, which can be created through NewHashLinkListRepFactory(). * Merge operator supports a new function PartialMergeMulti() to allow users to do partial merges against multiple operands. * Now compaction filter has a V2 interface. It buffers the kv-pairs sharing the same key prefix, process them in batches, and return the batched results back to DB. The new interface uses a new structure CompactionFilterContext for the same purpose as CompactionFilter::Context in V1. * Geo-spatial support for locations and radial-search. ## 2.7.0 (01/28/2014) ### Public API changes * Renamed `StackableDB::GetRawDB()` to `StackableDB::GetBaseDB()`. * Renamed `WriteBatch::Data()` `const std::string& Data() const`. * Renamed class `TableStats` to `TableProperties`. * Deleted class `PrefixHashRepFactory`. Please use `NewHashSkipListRepFactory()` instead. * Supported multi-threaded `EnableFileDeletions()` and `DisableFileDeletions()`. * Added `DB::GetOptions()`. * Added `DB::GetDbIdentity()`. ### New Features * Added [BackupableDB](https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F) * Implemented [TailingIterator](https://github.com/facebook/rocksdb/wiki/Tailing-Iterator), a special type of iterator that doesn't create a snapshot (can be used to read newly inserted data) and is optimized for doing sequential reads. * Added property block for table, which allows (1) a table to store its metadata and (2) end user to collect and store properties they are interested in. * Enabled caching index and filter block in block cache (turned off by default). * Supported error report when doing manual compaction. * Supported additional Linux platform flavors and Mac OS. * Put with `SliceParts` - Variant of `Put()` that gathers output like `writev(2)` * Bug fixes and code refactor for compatibility with upcoming Column Family feature. ### Performance Improvements * Huge benchmark performance improvements by multiple efforts. For example, increase in readonly QPS from about 530k in 2.6 release to 1.1 million in 2.7 [1] * Speeding up a way RocksDB deleted obsolete files - no longer listing the whole directory under a lock -- decrease in p99 * Use raw pointer instead of shared pointer for statistics: [5b825d](https://github.com/facebook/rocksdb/commit/5b825d6964e26ec3b4bb6faa708ebb1787f1d7bd) -- huge increase in performance -- shared pointers are slow * Optimized locking for `Get()` -- [1fdb3f](https://github.com/facebook/rocksdb/commit/1fdb3f7dc60e96394e3e5b69a46ede5d67fb976c) -- 1.5x QPS increase for some workloads * Cache speedup - [e8d40c3](https://github.com/facebook/rocksdb/commit/e8d40c31b3cca0c3e1ae9abe9b9003b1288026a9) * Implemented autovector, which allocates first N elements on stack. Most of vectors in RocksDB are small. Also, we never want to allocate heap objects while holding a mutex. -- [c01676e4](https://github.com/facebook/rocksdb/commit/c01676e46d3be08c3c140361ef1f5884f47d3b3c) * Lots of efforts to move malloc, memcpy and IO outside of locks rocksdb-6.11.4/INSTALL.md000066400000000000000000000170211370372246700146760ustar00rootroot00000000000000## Compilation **Important**: If you plan to run RocksDB in production, don't compile using default `make` or `make all`. That will compile RocksDB in debug mode, which is much slower than release mode. RocksDB's library should be able to compile without any dependency installed, although we recommend installing some compression libraries (see below). We do depend on newer gcc/clang with C++11 support. There are few options when compiling RocksDB: * [recommended] `make static_lib` will compile librocksdb.a, RocksDB static library. Compiles static library in release mode. * `make shared_lib` will compile librocksdb.so, RocksDB shared library. Compiles shared library in release mode. * `make check` will compile and run all the unit tests. `make check` will compile RocksDB in debug mode. * `make all` will compile our static library, and all our tools and unit tests. Our tools depend on gflags. You will need to have gflags installed to run `make all`. This will compile RocksDB in debug mode. Don't use binaries compiled by `make all` in production. * By default the binary we produce is optimized for the platform you're compiling on (`-march=native` or the equivalent). SSE4.2 will thus be enabled automatically if your CPU supports it. To print a warning if your CPU does not support SSE4.2, build with `USE_SSE=1 make static_lib` or, if using CMake, `cmake -DFORCE_SSE42=ON`. If you want to build a portable binary, add `PORTABLE=1` before your make commands, like this: `PORTABLE=1 make static_lib`. ## Dependencies * You can link RocksDB with following compression libraries: - [zlib](http://www.zlib.net/) - a library for data compression. - [bzip2](http://www.bzip.org/) - a library for data compression. - [lz4](https://github.com/lz4/lz4) - a library for extremely fast data compression. - [snappy](http://google.github.io/snappy/) - a library for fast data compression. - [zstandard](http://www.zstd.net) - Fast real-time compression algorithm. * All our tools depend on: - [gflags](https://gflags.github.io/gflags/) - a library that handles command line flags processing. You can compile rocksdb library even if you don't have gflags installed. * If you wish to build the RocksJava static target, then cmake is required for building Snappy. ## Supported platforms * **Linux - Ubuntu** * Upgrade your gcc to version at least 4.8 to get C++11 support. * Install gflags. First, try: `sudo apt-get install libgflags-dev` If this doesn't work and you're using Ubuntu, here's a nice tutorial: (http://askubuntu.com/questions/312173/installing-gflags-12-04) * Install snappy. This is usually as easy as: `sudo apt-get install libsnappy-dev`. * Install zlib. Try: `sudo apt-get install zlib1g-dev`. * Install bzip2: `sudo apt-get install libbz2-dev`. * Install lz4: `sudo apt-get install liblz4-dev`. * Install zstandard: `sudo apt-get install libzstd-dev`. * **Linux - CentOS / RHEL** * Upgrade your gcc to version at least 4.8 to get C++11 support: `yum install gcc48-c++` * Install gflags: git clone https://github.com/gflags/gflags.git cd gflags git checkout v2.0 ./configure && make && sudo make install **Notice**: Once installed, please add the include path for gflags to your `CPATH` environment variable and the lib path to `LIBRARY_PATH`. If installed with default settings, the include path will be `/usr/local/include` and the lib path will be `/usr/local/lib`. * Install snappy: sudo yum install snappy snappy-devel * Install zlib: sudo yum install zlib zlib-devel * Install bzip2: sudo yum install bzip2 bzip2-devel * Install lz4: sudo yum install lz4-devel * Install ASAN (optional for debugging): sudo yum install libasan * Install zstandard: wget https://github.com/facebook/zstd/archive/v1.1.3.tar.gz mv v1.1.3.tar.gz zstd-1.1.3.tar.gz tar zxvf zstd-1.1.3.tar.gz cd zstd-1.1.3 make && sudo make install * **OS X**: * Install latest C++ compiler that supports C++ 11: * Update XCode: run `xcode-select --install` (or install it from XCode App's settting). * Install via [homebrew](http://brew.sh/). * If you're first time developer in MacOS, you still need to run: `xcode-select --install` in your command line. * run `brew tap homebrew/versions; brew install gcc48 --use-llvm` to install gcc 4.8 (or higher). * run `brew install rocksdb` * **FreeBSD** (11.01): * You can either install RocksDB from the Ports system using `cd /usr/ports/databases/rocksdb && make install`, or you can follow the details below to install dependencies and compile from source code: * Install the dependencies for RocksDB: export BATCH=YES cd /usr/ports/devel/gmake && make install cd /usr/ports/devel/gflags && make install cd /usr/ports/archivers/snappy && make install cd /usr/ports/archivers/bzip2 && make install cd /usr/ports/archivers/liblz4 && make install cd /usr/ports/archivesrs/zstd && make install cd /usr/ports/devel/git && make install * Install the dependencies for RocksJava (optional): export BATCH=yes cd /usr/ports/java/openjdk7 && make install * Build RocksDB from source: cd ~ git clone https://github.com/facebook/rocksdb.git cd rocksdb gmake static_lib * Build RocksJava from source (optional): cd rocksdb export JAVA_HOME=/usr/local/openjdk7 gmake rocksdbjava * **OpenBSD** (6.3/-current): * As RocksDB is not available in the ports yet you have to build it on your own: * Install the dependencies for RocksDB: pkg_add gmake gflags snappy bzip2 lz4 zstd git jdk bash findutils gnuwatch * Build RocksDB from source: cd ~ git clone https://github.com/facebook/rocksdb.git cd rocksdb gmake static_lib * Build RocksJava from source (optional): cd rocksdb export JAVA_HOME=/usr/local/jdk-1.8.0 export PATH=$PATH:/usr/local/jdk-1.8.0/bin gmake rocksdbjava * **iOS**: * Run: `TARGET_OS=IOS make static_lib`. When building the project which uses rocksdb iOS library, make sure to define two important pre-processing macros: `ROCKSDB_LITE` and `IOS_CROSS_COMPILE`. * **Windows**: * For building with MS Visual Studio 13 you will need Update 4 installed. * Read and follow the instructions at CMakeLists.txt * Or install via [vcpkg](https://github.com/microsoft/vcpkg) * run `vcpkg install rocksdb:x64-windows` * **AIX 6.1** * Install AIX Toolbox rpms with gcc * Use these environment variables: export PORTABLE=1 export CC=gcc export AR="ar -X64" export EXTRA_ARFLAGS=-X64 export EXTRA_CFLAGS=-maix64 export EXTRA_CXXFLAGS=-maix64 export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc" export LIBPATH=/opt/freeware/lib export JAVA_HOME=/usr/java8_64 export PATH=/opt/freeware/bin:$PATH * **Solaris Sparc** * Install GCC 4.8.2 and higher. * Use these environment variables: export CC=gcc export EXTRA_CFLAGS=-m64 export EXTRA_CXXFLAGS=-m64 export EXTRA_LDFLAGS=-m64 export PORTABLE=1 export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc" rocksdb-6.11.4/LANGUAGE-BINDINGS.md000066400000000000000000000022251370372246700161460ustar00rootroot00000000000000This is the list of all known third-party language bindings for RocksDB. If something is missing, please open a pull request to add it. * Java - https://github.com/facebook/rocksdb/tree/master/java * Python * http://python-rocksdb.readthedocs.io/en/latest/ * http://pyrocksdb.readthedocs.org/en/latest/ (unmaintained) * Perl - https://metacpan.org/pod/RocksDB * Node.js - https://npmjs.org/package/rocksdb * Go - https://github.com/tecbot/gorocksdb * Ruby - http://rubygems.org/gems/rocksdb-ruby * Haskell - https://hackage.haskell.org/package/rocksdb-haskell * PHP - https://github.com/Photonios/rocksdb-php * C# - https://github.com/warrenfalk/rocksdb-sharp * Rust * https://github.com/pingcap/rust-rocksdb (used in production fork of https://github.com/spacejam/rust-rocksdb) * https://github.com/spacejam/rust-rocksdb * https://github.com/bh1xuw/rust-rocks * D programming language - https://github.com/b1naryth1ef/rocksdb * Erlang - https://gitlab.com/barrel-db/erlang-rocksdb * Elixir - https://github.com/urbint/rox * Nim - https://github.com/status-im/nim-rocksdb * Swift and Objective-C (iOS/OSX) - https://github.com/iabudiab/ObjectiveRocks rocksdb-6.11.4/LICENSE.Apache000066400000000000000000000261361370372246700154420ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. rocksdb-6.11.4/LICENSE.leveldb000066400000000000000000000030441370372246700156670ustar00rootroot00000000000000This contains code that is from LevelDB, and that code is under the following license: Copyright (c) 2011 The LevelDB Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. rocksdb-6.11.4/Makefile000066400000000000000000002370241370372246700147150ustar00rootroot00000000000000# Copyright (c) 2011 The LevelDB Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. See the AUTHORS file for names of contributors. # Inherit some settings from environment variables, if available #----------------------------------------------- BASH_EXISTS := $(shell which bash) SHELL := $(shell which bash) # Default to python3. Some distros like CentOS 8 do not have `python`. ifeq ($(origin PYTHON), undefined) PYTHON := $(shell which python3 || which python || echo python3) endif export PYTHON CLEAN_FILES = # deliberately empty, so we can append below. CFLAGS += ${EXTRA_CFLAGS} CXXFLAGS += ${EXTRA_CXXFLAGS} LDFLAGS += $(EXTRA_LDFLAGS) MACHINE ?= $(shell uname -m) ARFLAGS = ${EXTRA_ARFLAGS} rs STRIPFLAGS = -S -x # Transform parallel LOG output into something more readable. perl_command = perl -n \ -e '@a=split("\t",$$_,-1); $$t=$$a[8];' \ -e '$$t =~ /.*if\s\[\[\s"(.*?\.[\w\/]+)/ and $$t=$$1;' \ -e '$$t =~ s,^\./,,;' \ -e '$$t =~ s, >.*,,; chomp $$t;' \ -e '$$t =~ /.*--gtest_filter=(.*?\.[\w\/]+)/ and $$t=$$1;' \ -e 'printf "%7.3f %s %s\n", $$a[3], $$a[6] == 0 ? "PASS" : "FAIL", $$t' quoted_perl_command = $(subst ','\'',$(perl_command)) # DEBUG_LEVEL can have three values: # * DEBUG_LEVEL=2; this is the ultimate debug mode. It will compile rocksdb # without any optimizations. To compile with level 2, issue `make dbg` # * DEBUG_LEVEL=1; debug level 1 enables all assertions and debug code, but # compiles rocksdb with -O2 optimizations. this is the default debug level. # `make all` or `make ` compile RocksDB with debug level 1. # We use this debug level when developing RocksDB. # * DEBUG_LEVEL=0; this is the debug level we use for release. If you're # running rocksdb in production you most definitely want to compile RocksDB # with debug level 0. To compile with level 0, run `make shared_lib`, # `make install-shared`, `make static_lib`, `make install-static` or # `make install` # Set the default DEBUG_LEVEL to 1 DEBUG_LEVEL?=1 ifeq ($(MAKECMDGOALS),dbg) DEBUG_LEVEL=2 endif ifeq ($(MAKECMDGOALS),clean) DEBUG_LEVEL=0 endif ifeq ($(MAKECMDGOALS),release) DEBUG_LEVEL=0 endif ifeq ($(MAKECMDGOALS),shared_lib) DEBUG_LEVEL=0 endif ifeq ($(MAKECMDGOALS),install-shared) DEBUG_LEVEL=0 endif ifeq ($(MAKECMDGOALS),static_lib) DEBUG_LEVEL=0 endif ifeq ($(MAKECMDGOALS),install-static) DEBUG_LEVEL=0 endif ifeq ($(MAKECMDGOALS),install) DEBUG_LEVEL=0 endif ifeq ($(MAKECMDGOALS),rocksdbjavastatic) ifneq ($(DEBUG_LEVEL),2) DEBUG_LEVEL=0 endif endif ifeq ($(MAKECMDGOALS),rocksdbjavastaticrelease) ifneq ($(DEBUG_LEVEL),2) DEBUG_LEVEL=0 endif endif ifeq ($(MAKECMDGOALS),rocksdbjavastaticreleasedocker) ifneq ($(DEBUG_LEVEL),2) DEBUG_LEVEL=0 endif endif ifeq ($(MAKECMDGOALS),rocksdbjavastaticpublish) DEBUG_LEVEL=0 endif $(info $$DEBUG_LEVEL is ${DEBUG_LEVEL}) # Lite build flag. LITE ?= 0 ifeq ($(LITE), 0) ifneq ($(filter -DROCKSDB_LITE,$(OPT)),) # Be backward compatible and support older format where OPT=-DROCKSDB_LITE is # specified instead of LITE=1 on the command line. LITE=1 endif else ifeq ($(LITE), 1) ifeq ($(filter -DROCKSDB_LITE,$(OPT)),) OPT += -DROCKSDB_LITE endif endif # Figure out optimize level. ifneq ($(DEBUG_LEVEL), 2) ifeq ($(LITE), 0) OPT += -O2 else OPT += -Os endif endif # compile with -O2 if debug level is not 2 ifneq ($(DEBUG_LEVEL), 2) OPT += -fno-omit-frame-pointer # Skip for archs that don't support -momit-leaf-frame-pointer ifeq (,$(shell $(CXX) -fsyntax-only -momit-leaf-frame-pointer -xc /dev/null 2>&1)) OPT += -momit-leaf-frame-pointer endif endif ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1)) CXXFLAGS += -DHAS_ALTIVEC CFLAGS += -DHAS_ALTIVEC HAS_ALTIVEC=1 endif ifeq (,$(shell $(CXX) -fsyntax-only -mcpu=power8 -xc /dev/null 2>&1)) CXXFLAGS += -DHAVE_POWER8 CFLAGS += -DHAVE_POWER8 HAVE_POWER8=1 endif ifeq (,$(shell $(CXX) -fsyntax-only -march=armv8-a+crc+crypto -xc /dev/null 2>&1)) CXXFLAGS += -march=armv8-a+crc+crypto CFLAGS += -march=armv8-a+crc+crypto ARMCRC_SOURCE=1 endif # if we're compiling for release, compile without debug code (-DNDEBUG) ifeq ($(DEBUG_LEVEL),0) OPT += -DNDEBUG ifneq ($(USE_RTTI), 1) CXXFLAGS += -fno-rtti else CXXFLAGS += -DROCKSDB_USE_RTTI endif else ifneq ($(USE_RTTI), 0) CXXFLAGS += -DROCKSDB_USE_RTTI else CXXFLAGS += -fno-rtti endif ifdef ASSERT_STATUS_CHECKED ifeq ($(filter -DROCKSDB_ASSERT_STATUS_CHECKED,$(OPT)),) OPT += -DROCKSDB_ASSERT_STATUS_CHECKED endif endif $(warning Warning: Compiling in debug mode. Don't use the resulting binary in production) endif #----------------------------------------------- include src.mk AM_DEFAULT_VERBOSITY = 0 AM_V_GEN = $(am__v_GEN_$(V)) am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY)) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_$(V)) am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) am__v_at_0 = @ am__v_at_1 = AM_V_CC = $(am__v_CC_$(V)) am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY)) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = AM_V_CCLD = $(am__v_CCLD_$(V)) am__v_CCLD_ = $(am__v_CCLD_$(AM_DEFAULT_VERBOSITY)) ifneq ($(SKIP_LINK), 1) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = else am__v_CCLD_0 = @echo " !CCLD " $@; true skip am__v_CCLD_1 = true skip endif AM_V_AR = $(am__v_AR_$(V)) am__v_AR_ = $(am__v_AR_$(AM_DEFAULT_VERBOSITY)) am__v_AR_0 = @echo " AR " $@; am__v_AR_1 = ifdef ROCKSDB_USE_LIBRADOS LIB_SOURCES += utilities/env_librados.cc LDFLAGS += -lrados endif AM_LINK = $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) # Detect what platform we're building on. # Export some common variables that might have been passed as Make variables # instead of environment variables. dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \ export COMPILE_WITH_ASAN="$(COMPILE_WITH_ASAN)"; \ export COMPILE_WITH_TSAN="$(COMPILE_WITH_TSAN)"; \ export COMPILE_WITH_UBSAN="$(COMPILE_WITH_UBSAN)"; \ export PORTABLE="$(PORTABLE)"; \ export ROCKSDB_NO_FBCODE="$(ROCKSDB_NO_FBCODE)"; \ export USE_CLANG="$(USE_CLANG)"; \ "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk")) # this file is generated by the previous line to set build flags and sources include make_config.mk export JAVAC_ARGS CLEAN_FILES += make_config.mk missing_make_config_paths := $(shell \ grep "\./\S*\|/\S*" -o $(CURDIR)/make_config.mk | \ while read path; \ do [ -e $$path ] || echo $$path; \ done | sort | uniq) $(foreach path, $(missing_make_config_paths), \ $(warning Warning: $(path) does not exist)) ifeq ($(PLATFORM), OS_AIX) # no debug info else ifneq ($(PLATFORM), IOS) CFLAGS += -g CXXFLAGS += -g else # no debug info for IOS, that will make our library big OPT += -DNDEBUG endif ifeq ($(PLATFORM), OS_AIX) ARFLAGS = -X64 rs STRIPFLAGS = -X64 -x endif ifeq ($(PLATFORM), OS_SOLARIS) PLATFORM_CXXFLAGS += -D _GLIBCXX_USE_C99 endif ifneq ($(filter -DROCKSDB_LITE,$(OPT)),) # found CFLAGS += -fno-exceptions CXXFLAGS += -fno-exceptions # LUA is not supported under ROCKSDB_LITE LUA_PATH = endif # ASAN doesn't work well with jemalloc. If we're compiling with ASAN, we should use regular malloc. ifdef COMPILE_WITH_ASAN DISABLE_JEMALLOC=1 EXEC_LDFLAGS += -fsanitize=address PLATFORM_CCFLAGS += -fsanitize=address PLATFORM_CXXFLAGS += -fsanitize=address endif # TSAN doesn't work well with jemalloc. If we're compiling with TSAN, we should use regular malloc. ifdef COMPILE_WITH_TSAN DISABLE_JEMALLOC=1 EXEC_LDFLAGS += -fsanitize=thread PLATFORM_CCFLAGS += -fsanitize=thread -fPIC -DFOLLY_SANITIZE_THREAD PLATFORM_CXXFLAGS += -fsanitize=thread -fPIC -DFOLLY_SANITIZE_THREAD # Turn off -pg when enabling TSAN testing, because that induces # a link failure. TODO: find the root cause PROFILING_FLAGS = # LUA is not supported under TSAN LUA_PATH = # Limit keys for crash test under TSAN to avoid error: # "ThreadSanitizer: DenseSlabAllocator overflow. Dying." CRASH_TEST_EXT_ARGS += --max_key=1000000 endif # AIX doesn't work with -pg ifeq ($(PLATFORM), OS_AIX) PROFILING_FLAGS = endif # USAN doesn't work well with jemalloc. If we're compiling with USAN, we should use regular malloc. ifdef COMPILE_WITH_UBSAN DISABLE_JEMALLOC=1 # Suppress alignment warning because murmurhash relies on casting unaligned # memory to integer. Fixing it may cause performance regression. 3-way crc32 # relies on it too, although it can be rewritten to eliminate with minimal # performance regression. EXEC_LDFLAGS += -fsanitize=undefined -fno-sanitize-recover=all PLATFORM_CCFLAGS += -fsanitize=undefined -fno-sanitize-recover=all -DROCKSDB_UBSAN_RUN PLATFORM_CXXFLAGS += -fsanitize=undefined -fno-sanitize-recover=all -DROCKSDB_UBSAN_RUN endif ifdef ROCKSDB_VALGRIND_RUN PLATFORM_CCFLAGS += -DROCKSDB_VALGRIND_RUN PLATFORM_CXXFLAGS += -DROCKSDB_VALGRIND_RUN endif ifndef DISABLE_JEMALLOC ifdef JEMALLOC PLATFORM_CXXFLAGS += -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE PLATFORM_CCFLAGS += -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE endif ifdef WITH_JEMALLOC_FLAG PLATFORM_LDFLAGS += -ljemalloc JAVA_LDFLAGS += -ljemalloc endif EXEC_LDFLAGS := $(JEMALLOC_LIB) $(EXEC_LDFLAGS) PLATFORM_CXXFLAGS += $(JEMALLOC_INCLUDE) PLATFORM_CCFLAGS += $(JEMALLOC_INCLUDE) endif ifndef USE_FOLLY_DISTRIBUTED_MUTEX USE_FOLLY_DISTRIBUTED_MUTEX=0 endif export GTEST_THROW_ON_FAILURE=1 export GTEST_HAS_EXCEPTIONS=1 GTEST_DIR = third-party/gtest-1.8.1/fused-src # AIX: pre-defined system headers are surrounded by an extern "C" block ifeq ($(PLATFORM), OS_AIX) PLATFORM_CCFLAGS += -I$(GTEST_DIR) PLATFORM_CXXFLAGS += -I$(GTEST_DIR) else PLATFORM_CCFLAGS += -isystem $(GTEST_DIR) PLATFORM_CXXFLAGS += -isystem $(GTEST_DIR) endif ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) FOLLY_DIR = ./third-party/folly # AIX: pre-defined system headers are surrounded by an extern "C" block ifeq ($(PLATFORM), OS_AIX) PLATFORM_CCFLAGS += -I$(FOLLY_DIR) PLATFORM_CXXFLAGS += -I$(FOLLY_DIR) else PLATFORM_CCFLAGS += -isystem $(FOLLY_DIR) PLATFORM_CXXFLAGS += -isystem $(FOLLY_DIR) endif endif ifdef TEST_CACHE_LINE_SIZE PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) endif # This (the first rule) must depend on "all". default: all WARNING_FLAGS = -W -Wextra -Wall -Wsign-compare -Wshadow \ -Wunused-parameter ifeq ($(PLATFORM), OS_OPENBSD) WARNING_FLAGS += -Wno-unused-lambda-capture endif ifndef DISABLE_WARNING_AS_ERROR WARNING_FLAGS += -Werror endif ifdef LUA_PATH ifndef LUA_INCLUDE LUA_INCLUDE=$(LUA_PATH)/include endif LUA_INCLUDE_FILE=$(LUA_INCLUDE)/lualib.h ifeq ("$(wildcard $(LUA_INCLUDE_FILE))", "") # LUA_INCLUDE_FILE does not exist $(error Cannot find lualib.h under $(LUA_INCLUDE). Try to specify both LUA_PATH and LUA_INCLUDE manually) endif LUA_FLAGS = -I$(LUA_INCLUDE) -DLUA -DLUA_COMPAT_ALL CFLAGS += $(LUA_FLAGS) CXXFLAGS += $(LUA_FLAGS) ifndef LUA_LIB LUA_LIB = $(LUA_PATH)/lib/liblua.a endif ifeq ("$(wildcard $(LUA_LIB))", "") # LUA_LIB does not exist $(error $(LUA_LIB) does not exist. Try to specify both LUA_PATH and LUA_LIB manually) endif EXEC_LDFLAGS += $(LUA_LIB) endif ifeq ($(NO_THREEWAY_CRC32C), 1) CXXFLAGS += -DNO_THREEWAY_CRC32C endif CFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) CXXFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers LDFLAGS += $(PLATFORM_LDFLAGS) # If NO_UPDATE_BUILD_VERSION is set we don't update util/build_version.cc, but # the file needs to already exist or else the build will fail ifndef NO_UPDATE_BUILD_VERSION date := $(shell date +%F) ifdef FORCE_GIT_SHA git_sha := $(FORCE_GIT_SHA) else git_sha := $(shell git rev-parse HEAD 2>/dev/null) endif gen_build_version = sed -e s/@@GIT_SHA@@/$(git_sha)/ -e s/@@GIT_DATE_TIME@@/$(date)/ util/build_version.cc.in # Record the version of the source that we are compiling. # We keep a record of the git revision in this file. It is then built # as a regular source file as part of the compilation process. # One can run "strings executable_filename | grep _build_" to find # the version of the source that we used to build the executable file. FORCE: util/build_version.cc: FORCE $(AM_V_GEN)rm -f $@-t $(AM_V_at)$(gen_build_version) > $@-t $(AM_V_at)if test -f $@; then \ cmp -s $@-t $@ && rm -f $@-t || mv -f $@-t $@; \ else mv -f $@-t $@; fi endif LIBOBJECTS = $(LIB_SOURCES:.cc=.o) ifeq ($(HAVE_POWER8),1) LIB_CC_OBJECTS = $(LIB_SOURCES:.cc=.o) LIBOBJECTS += $(LIB_SOURCES_C:.c=.o) LIBOBJECTS += $(LIB_SOURCES_ASM:.S=.o) else LIB_CC_OBJECTS = $(LIB_SOURCES:.cc=.o) endif LIBOBJECTS += $(TOOL_LIB_SOURCES:.cc=.o) MOCKOBJECTS = $(MOCK_LIB_SOURCES:.cc=.o) ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) FOLLYOBJECTS = $(FOLLY_SOURCES:.cpp=.o) endif GTEST = $(GTEST_DIR)/gtest/gtest-all.o TESTUTIL = ./test_util/testutil.o TESTHARNESS = ./test_util/testharness.o $(TESTUTIL) $(MOCKOBJECTS) $(GTEST) VALGRIND_ERROR = 2 VALGRIND_VER := $(join $(VALGRIND_VER),valgrind) VALGRIND_OPTS = --error-exitcode=$(VALGRIND_ERROR) --leak-check=full BENCHTOOLOBJECTS = $(BENCH_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) ANALYZETOOLOBJECTS = $(ANALYZER_LIB_SOURCES:.cc=.o) ifeq ($(DEBUG_LEVEL),0) STRESSTOOLOBJECTS = $(STRESS_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) else STRESSTOOLOBJECTS = $(STRESS_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) \ $(TESTHARNESS) endif EXPOBJECTS = $(LIBOBJECTS) $(TESTUTIL) TESTS = \ db_basic_test \ db_with_timestamp_basic_test \ db_encryption_test \ db_test2 \ external_sst_file_basic_test \ auto_roll_logger_test \ bloom_test \ dynamic_bloom_test \ c_test \ checkpoint_test \ crc32c_test \ coding_test \ inlineskiplist_test \ env_basic_test \ env_test \ env_logger_test \ io_posix_test \ hash_test \ random_test \ thread_local_test \ work_queue_test \ rate_limiter_test \ perf_context_test \ iostats_context_test \ db_wal_test \ db_block_cache_test \ db_test \ db_logical_block_size_cache_test \ db_blob_index_test \ db_iter_test \ db_iter_stress_test \ db_log_iter_test \ db_bloom_filter_test \ db_compaction_filter_test \ db_compaction_test \ db_dynamic_level_test \ db_flush_test \ db_inplace_update_test \ db_iterator_test \ db_memtable_test \ db_merge_operator_test \ db_merge_operand_test \ db_options_test \ db_range_del_test \ db_secondary_test \ db_sst_test \ db_tailing_iter_test \ db_io_failure_test \ db_properties_test \ db_table_properties_test \ db_statistics_test \ db_write_test \ error_handler_fs_test \ autovector_test \ blob_db_test \ cleanable_test \ column_family_test \ table_properties_collector_test \ arena_test \ memkind_kmem_allocator_test \ block_test \ data_block_hash_index_test \ cache_test \ corruption_test \ slice_test \ slice_transform_test \ dbformat_test \ fault_injection_test \ filelock_test \ filename_test \ random_access_file_reader_test \ file_reader_writer_test \ block_based_filter_block_test \ block_based_table_reader_test \ full_filter_block_test \ partitioned_filter_block_test \ hash_table_test \ histogram_test \ log_test \ manual_compaction_test \ mock_env_test \ memtable_list_test \ merge_helper_test \ memory_test \ merge_test \ merger_test \ util_merge_operators_test \ options_file_test \ reduce_levels_test \ plain_table_db_test \ comparator_db_test \ external_sst_file_test \ import_column_family_test \ prefix_test \ skiplist_test \ write_buffer_manager_test \ stringappend_test \ cassandra_format_test \ cassandra_functional_test \ cassandra_row_merge_test \ cassandra_serialize_test \ ttl_test \ backupable_db_test \ cache_simulator_test \ sim_cache_test \ version_edit_test \ version_set_test \ compaction_picker_test \ version_builder_test \ file_indexer_test \ write_batch_test \ write_batch_with_index_test \ write_controller_test\ deletefile_test \ obsolete_files_test \ table_test \ block_fetcher_test \ delete_scheduler_test \ options_test \ options_settable_test \ options_util_test \ event_logger_test \ timer_queue_test \ cuckoo_table_builder_test \ cuckoo_table_reader_test \ cuckoo_table_db_test \ flush_job_test \ wal_manager_test \ listener_test \ compaction_iterator_test \ compaction_job_test \ thread_list_test \ sst_dump_test \ compact_files_test \ optimistic_transaction_test \ write_callback_test \ heap_test \ compact_on_deletion_collector_test \ compaction_job_stats_test \ option_change_migration_test \ transaction_test \ transaction_lock_mgr_test \ ldb_cmd_test \ persistent_cache_test \ statistics_test \ stats_history_test \ lru_cache_test \ object_registry_test \ repair_test \ env_timed_test \ write_prepared_transaction_test \ write_unprepared_transaction_test \ db_universal_compaction_test \ trace_analyzer_test \ repeatable_thread_test \ range_tombstone_fragmenter_test \ range_del_aggregator_test \ sst_file_reader_test \ db_secondary_test \ block_cache_tracer_test \ block_cache_trace_analyzer_test \ defer_test \ blob_file_addition_test \ blob_file_garbage_test \ timer_test \ db_with_timestamp_compaction_test \ testutil_test \ PARALLEL_TEST = \ backupable_db_test \ db_bloom_filter_test \ db_compaction_filter_test \ db_compaction_test \ db_merge_operator_test \ db_sst_test \ db_test \ db_universal_compaction_test \ db_wal_test \ external_sst_file_test \ import_column_family_test \ fault_injection_test \ file_reader_writer_test \ inlineskiplist_test \ manual_compaction_test \ persistent_cache_test \ table_test \ transaction_test \ transaction_lock_mgr_test \ write_prepared_transaction_test \ write_unprepared_transaction_test \ ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) TESTS += folly_synchronization_distributed_mutex_test PARALLEL_TEST += folly_synchronization_distributed_mutex_test endif # options_settable_test doesn't pass with UBSAN as we use hack in the test ifdef COMPILE_WITH_UBSAN TESTS := $(shell echo $(TESTS) | sed 's/\boptions_settable_test\b//g') endif ifdef ASSERT_STATUS_CHECKED # This is a new check for which we will add support incrementally. The # whitelist can be removed once support is fully added. TESTS_WHITELIST = \ arena_test \ autovector_test \ blob_file_addition_test \ blob_file_garbage_test \ bloom_test \ cassandra_format_test \ cassandra_row_merge_test \ cassandra_serialize_test \ cleanable_test \ coding_test \ crc32c_test \ dbformat_test \ defer_test \ dynamic_bloom_test \ event_logger_test \ file_indexer_test \ folly_synchronization_distributed_mutex_test \ hash_table_test \ hash_test \ heap_test \ histogram_test \ inlineskiplist_test \ io_posix_test \ iostats_context_test \ memkind_kmem_allocator_test \ merger_test \ mock_env_test \ object_registry_test \ options_settable_test \ options_test \ random_test \ range_del_aggregator_test \ range_tombstone_fragmenter_test \ repeatable_thread_test \ skiplist_test \ slice_test \ statistics_test \ thread_local_test \ timer_queue_test \ timer_test \ util_merge_operators_test \ version_edit_test \ work_queue_test \ write_controller_test \ TESTS := $(filter $(TESTS_WHITELIST),$(TESTS)) PARALLEL_TEST := $(filter $(TESTS_WHITELIST),$(PARALLEL_TEST)) endif SUBSET := $(TESTS) ifdef ROCKSDBTESTS_START SUBSET := $(shell echo $(SUBSET) | sed 's/^.*$(ROCKSDBTESTS_START)/$(ROCKSDBTESTS_START)/') endif ifdef ROCKSDBTESTS_END SUBSET := $(shell echo $(SUBSET) | sed 's/$(ROCKSDBTESTS_END).*//') endif TOOLS = \ sst_dump \ db_sanity_test \ db_stress \ write_stress \ ldb \ db_repl_stress \ rocksdb_dump \ rocksdb_undump \ blob_dump \ trace_analyzer \ block_cache_trace_analyzer \ TEST_LIBS = \ librocksdb_env_basic_test.a # TODO: add back forward_iterator_bench, after making it build in all environemnts. BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench filter_bench persistent_cache_bench range_del_aggregator_bench # if user didn't config LIBNAME, set the default ifeq ($(LIBNAME),) # we should only run rocksdb in production with DEBUG_LEVEL 0 ifeq ($(DEBUG_LEVEL),0) LIBNAME=librocksdb else LIBNAME=librocksdb_debug endif endif LIBRARY = ${LIBNAME}.a TOOLS_LIBRARY = ${LIBNAME}_tools.a STRESS_LIBRARY = ${LIBNAME}_stress.a ROCKSDB_MAJOR = $(shell egrep "ROCKSDB_MAJOR.[0-9]" include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_MINOR = $(shell egrep "ROCKSDB_MINOR.[0-9]" include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_PATCH = $(shell egrep "ROCKSDB_PATCH.[0-9]" include/rocksdb/version.h | cut -d ' ' -f 3) default: all #----------------------------------------------- # Create platform independent shared libraries. #----------------------------------------------- ifneq ($(PLATFORM_SHARED_EXT),) ifneq ($(PLATFORM_SHARED_VERSIONED),true) SHARED1 = ${LIBNAME}.$(PLATFORM_SHARED_EXT) SHARED2 = $(SHARED1) SHARED3 = $(SHARED1) SHARED4 = $(SHARED1) SHARED = $(SHARED1) else SHARED_MAJOR = $(ROCKSDB_MAJOR) SHARED_MINOR = $(ROCKSDB_MINOR) SHARED_PATCH = $(ROCKSDB_PATCH) SHARED1 = ${LIBNAME}.$(PLATFORM_SHARED_EXT) ifeq ($(PLATFORM), OS_MACOSX) SHARED_OSX = $(LIBNAME).$(SHARED_MAJOR) SHARED2 = $(SHARED_OSX).$(PLATFORM_SHARED_EXT) SHARED3 = $(SHARED_OSX).$(SHARED_MINOR).$(PLATFORM_SHARED_EXT) SHARED4 = $(SHARED_OSX).$(SHARED_MINOR).$(SHARED_PATCH).$(PLATFORM_SHARED_EXT) else SHARED2 = $(SHARED1).$(SHARED_MAJOR) SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR) SHARED4 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR).$(SHARED_PATCH) endif SHARED = $(SHARED1) $(SHARED2) $(SHARED3) $(SHARED4) $(SHARED1): $(SHARED4) ln -fs $(SHARED4) $(SHARED1) $(SHARED2): $(SHARED4) ln -fs $(SHARED4) $(SHARED2) $(SHARED3): $(SHARED4) ln -fs $(SHARED4) $(SHARED3) endif ifeq ($(HAVE_POWER8),1) SHARED_C_OBJECTS = $(LIB_SOURCES_C:.c=.o) SHARED_ASM_OBJECTS = $(LIB_SOURCES_ASM:.S=.o) SHARED_C_LIBOBJECTS = $(patsubst %.o,shared-objects/%.o,$(SHARED_C_OBJECTS)) SHARED_ASM_LIBOBJECTS = $(patsubst %.o,shared-objects/%.o,$(SHARED_ASM_OBJECTS)) shared_libobjects = $(patsubst %,shared-objects/%,$(LIB_CC_OBJECTS)) else shared_libobjects = $(patsubst %,shared-objects/%,$(LIBOBJECTS)) endif CLEAN_FILES += shared-objects shared_all_libobjects = $(shared_libobjects) ifeq ($(HAVE_POWER8),1) shared-ppc-objects = $(SHARED_C_LIBOBJECTS) $(SHARED_ASM_LIBOBJECTS) shared-objects/util/crc32c_ppc.o: util/crc32c_ppc.c $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ shared-objects/util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ endif $(shared_libobjects): shared-objects/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@ ifeq ($(HAVE_POWER8),1) shared_all_libobjects = $(shared_libobjects) $(shared-ppc-objects) endif $(SHARED4): $(shared_all_libobjects) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(shared_all_libobjects) $(LDFLAGS) -o $@ endif # PLATFORM_SHARED_EXT .PHONY: blackbox_crash_test check clean coverage crash_test ldb_tests package \ release tags tags0 valgrind_check whitebox_crash_test format static_lib shared_lib all \ dbg rocksdbjavastatic rocksdbjava install install-static install-shared uninstall \ analyze tools tools_lib \ blackbox_crash_test_with_atomic_flush whitebox_crash_test_with_atomic_flush \ blackbox_crash_test_with_txn whitebox_crash_test_with_txn all: $(LIBRARY) $(BENCHMARKS) tools tools_lib test_libs $(TESTS) all_but_some_tests: $(LIBRARY) $(BENCHMARKS) tools tools_lib test_libs $(SUBSET) static_lib: $(LIBRARY) shared_lib: $(SHARED) stress_lib: $(STRESS_LIBRARY) tools: $(TOOLS) tools_lib: $(TOOLS_LIBRARY) test_libs: $(TEST_LIBS) dbg: $(LIBRARY) $(BENCHMARKS) tools $(TESTS) # creates static library and programs release: $(MAKE) clean DEBUG_LEVEL=0 $(MAKE) static_lib tools db_bench coverage: $(MAKE) clean COVERAGEFLAGS="-fprofile-arcs -ftest-coverage" LDFLAGS+="-lgcov" $(MAKE) J=1 all check cd coverage && ./coverage_test.sh # Delete intermediate files $(FIND) . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm {} \; ifneq (,$(filter check parallel_check,$(MAKECMDGOALS)),) # Use /dev/shm if it has the sticky bit set (otherwise, /tmp), # and create a randomly-named rocksdb.XXXX directory therein. # We'll use that directory in the "make check" rules. ifeq ($(TMPD),) TMPDIR := $(shell echo $${TMPDIR:-/tmp}) TMPD := $(shell f=/dev/shm; test -k $$f || f=$(TMPDIR); \ perl -le 'use File::Temp "tempdir";' \ -e 'print tempdir("'$$f'/rocksdb.XXXX", CLEANUP => 0)') endif endif # Run all tests in parallel, accumulating per-test logs in t/log-*. # # Each t/run-* file is a tiny generated bourne shell script that invokes one of # sub-tests. Why use a file for this? Because that makes the invocation of # parallel below simpler, which in turn makes the parsing of parallel's # LOG simpler (the latter is for live monitoring as parallel # tests run). # # Test names are extracted by running tests with --gtest_list_tests. # This filter removes the "#"-introduced comments, and expands to # fully-qualified names by changing input like this: # # DBTest. # Empty # WriteEmptyBatch # MultiThreaded/MultiThreadedDBTest. # MultiThreaded/0 # GetParam() = 0 # MultiThreaded/1 # GetParam() = 1 # # into this: # # DBTest.Empty # DBTest.WriteEmptyBatch # MultiThreaded/MultiThreadedDBTest.MultiThreaded/0 # MultiThreaded/MultiThreadedDBTest.MultiThreaded/1 # parallel_tests = $(patsubst %,parallel_%,$(PARALLEL_TEST)) .PHONY: gen_parallel_tests $(parallel_tests) $(parallel_tests): $(PARALLEL_TEST) $(AM_V_at)TEST_BINARY=$(patsubst parallel_%,%,$@); \ TEST_NAMES=` \ ./$$TEST_BINARY --gtest_list_tests \ | perl -n \ -e 's/ *\#.*//;' \ -e '/^(\s*)(\S+)/; !$$1 and do {$$p=$$2; break};' \ -e 'print qq! $$p$$2!'`; \ for TEST_NAME in $$TEST_NAMES; do \ TEST_SCRIPT=t/run-$$TEST_BINARY-$${TEST_NAME//\//-}; \ echo " GEN " $$TEST_SCRIPT; \ printf '%s\n' \ '#!/bin/sh' \ "d=\$(TMPD)$$TEST_SCRIPT" \ 'mkdir -p $$d' \ "TEST_TMPDIR=\$$d $(DRIVER) ./$$TEST_BINARY --gtest_filter=$$TEST_NAME" \ > $$TEST_SCRIPT; \ chmod a=rx $$TEST_SCRIPT; \ done gen_parallel_tests: $(AM_V_at)mkdir -p t $(AM_V_at)rm -f t/run-* $(MAKE) $(parallel_tests) # Reorder input lines (which are one per test) so that the # longest-running tests appear first in the output. # Do this by prefixing each selected name with its duration, # sort the resulting names, and remove the leading numbers. # FIXME: the "100" we prepend is a fake time, for now. # FIXME: squirrel away timings from each run and use them # (when present) on subsequent runs to order these tests. # # Without this reordering, these two tests would happen to start only # after almost all other tests had completed, thus adding 100 seconds # to the duration of parallel "make check". That's the difference # between 4 minutes (old) and 2m20s (new). # # 152.120 PASS t/DBTest.FileCreationRandomFailure # 107.816 PASS t/DBTest.EncodeDecompressedBlockSizeTest # slow_test_regexp = \ ^.*SnapshotConcurrentAccessTest.*$$|^t/run-table_test-HarnessTest.Randomized$$|^t/run-db_test-.*(?:FileCreationRandomFailure|EncodeDecompressedBlockSizeTest)$$|^.*RecoverFromCorruptedWALWithoutFlush$$ prioritize_long_running_tests = \ perl -pe 's,($(slow_test_regexp)),100 $$1,' \ | sort -k1,1gr \ | sed 's/^[.0-9]* //' # "make check" uses # Run with "make J=1 check" to disable parallelism in "make check". # Run with "make J=200% check" to run two parallel jobs per core. # The default is to run one job per core (J=100%). # See "man parallel" for its "-j ..." option. J ?= 100% # Use this regexp to select the subset of tests whose names match. tests-regexp = . ifeq ($(PRINT_PARALLEL_OUTPUTS), 1) parallel_com = '{}' else parallel_com = '{} >& t/log-{/}' endif .PHONY: check_0 check_0: $(AM_V_GEN)export TEST_TMPDIR=$(TMPD); \ printf '%s\n' '' \ 'To monitor subtest ,' \ ' run "make watch-log" in a separate window' ''; \ test -t 1 && eta=--eta || eta=; \ { \ printf './%s\n' $(filter-out $(PARALLEL_TEST),$(TESTS)); \ find t -name 'run-*' -print; \ } \ | $(prioritize_long_running_tests) \ | grep -E '$(tests-regexp)' \ | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG $$eta --gnu $(parallel_com) ; \ parallel_retcode=$$? ; \ awk '{ if ($$7 != 0 || $$8 != 0) { if ($$7 == "Exitval") { h = $$0; } else { if (!f) print h; print; f = 1 } } } END { if(f) exit 1; }' < LOG ; \ if [ $$parallel_retcode -ne 0 ] ; then exit 1 ; fi valgrind-blacklist-regexp = InlineSkipTest.ConcurrentInsert|TransactionStressTest.DeadlockStress|DBCompactionTest.SuggestCompactRangeNoTwoLevel0Compactions|BackupableDBTest.RateLimiting|DBTest.CloseSpeedup|DBTest.ThreadStatusFlush|DBTest.RateLimitingTest|DBTest.EncodeDecompressedBlockSizeTest|FaultInjectionTest.UninstalledCompaction|HarnessTest.Randomized|ExternalSSTFileTest.CompactDuringAddFileRandom|ExternalSSTFileTest.IngestFileWithGlobalSeqnoRandomized|MySQLStyleTransactionTest.TransactionStressTest .PHONY: valgrind_check_0 valgrind_check_0: $(AM_V_GEN)export TEST_TMPDIR=$(TMPD); \ printf '%s\n' '' \ 'To monitor subtest ,' \ ' run "make watch-log" in a separate window' ''; \ test -t 1 && eta=--eta || eta=; \ { \ printf './%s\n' $(filter-out $(PARALLEL_TEST) %skiplist_test options_settable_test, $(TESTS)); \ find t -name 'run-*' -print; \ } \ | $(prioritize_long_running_tests) \ | grep -E '$(tests-regexp)' \ | grep -E -v '$(valgrind-blacklist-regexp)' \ | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG $$eta --gnu \ '(if [[ "{}" == "./"* ]] ; then $(DRIVER) {}; else {}; fi) ' \ '>& t/valgrind_log-{/}' CLEAN_FILES += t LOG $(TMPD) # When running parallel "make check", you can monitor its progress # from another window. # Run "make watch_LOG" to show the duration,PASS/FAIL,name of parallel # tests as they are being run. We sort them so that longer-running ones # appear at the top of the list and any failing tests remain at the top # regardless of their duration. As with any use of "watch", hit ^C to # interrupt. watch-log: $(WATCH) --interval=0 'sort -k7,7nr -k4,4gr LOG|$(quoted_perl_command)' dump-log: bash -c '$(quoted_perl_command)' < LOG # If J != 1 and GNU parallel is installed, run the tests in parallel, # via the check_0 rule above. Otherwise, run them sequentially. check: all $(MAKE) gen_parallel_tests $(AM_V_GEN)if test "$(J)" != 1 \ && (build_tools/gnu_parallel --gnu --help 2>/dev/null) | \ grep -q 'GNU Parallel'; \ then \ $(MAKE) T="$$t" TMPD=$(TMPD) check_0; \ else \ for t in $(TESTS); do \ echo "===== Running $$t (`date`)"; ./$$t || exit 1; done; \ fi rm -rf $(TMPD) ifneq ($(PLATFORM), OS_AIX) $(PYTHON) tools/check_all_python.py ifeq ($(filter -DROCKSDB_LITE,$(OPT)),) ifndef ASSERT_STATUS_CHECKED # not yet working with these tests $(PYTHON) tools/ldb_test.py sh tools/rocksdb_dump_test.sh endif endif endif ifndef SKIP_FORMAT_BUCK_CHECKS $(MAKE) check-format $(MAKE) check-buck-targets endif # TODO add ldb_tests check_some: $(SUBSET) for t in $(SUBSET); do echo "===== Running $$t (`date`)"; ./$$t || exit 1; done .PHONY: ldb_tests ldb_tests: ldb $(PYTHON) tools/ldb_test.py crash_test: whitebox_crash_test blackbox_crash_test crash_test_with_atomic_flush: whitebox_crash_test_with_atomic_flush blackbox_crash_test_with_atomic_flush crash_test_with_txn: whitebox_crash_test_with_txn blackbox_crash_test_with_txn blackbox_crash_test: db_stress $(PYTHON) -u tools/db_crashtest.py --simple blackbox $(CRASH_TEST_EXT_ARGS) $(PYTHON) -u tools/db_crashtest.py blackbox $(CRASH_TEST_EXT_ARGS) blackbox_crash_test_with_atomic_flush: db_stress $(PYTHON) -u tools/db_crashtest.py --cf_consistency blackbox $(CRASH_TEST_EXT_ARGS) blackbox_crash_test_with_txn: db_stress $(PYTHON) -u tools/db_crashtest.py --txn blackbox $(CRASH_TEST_EXT_ARGS) ifeq ($(CRASH_TEST_KILL_ODD),) CRASH_TEST_KILL_ODD=888887 endif whitebox_crash_test: db_stress $(PYTHON) -u tools/db_crashtest.py --simple whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) $(PYTHON) -u tools/db_crashtest.py whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) whitebox_crash_test_with_atomic_flush: db_stress $(PYTHON) -u tools/db_crashtest.py --cf_consistency whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) whitebox_crash_test_with_txn: db_stress $(PYTHON) -u tools/db_crashtest.py --txn whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) asan_check: $(MAKE) clean COMPILE_WITH_ASAN=1 $(MAKE) check -j32 $(MAKE) clean asan_crash_test: $(MAKE) clean COMPILE_WITH_ASAN=1 $(MAKE) crash_test $(MAKE) clean asan_crash_test_with_atomic_flush: $(MAKE) clean COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_atomic_flush $(MAKE) clean asan_crash_test_with_txn: $(MAKE) clean COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_txn $(MAKE) clean ubsan_check: $(MAKE) clean COMPILE_WITH_UBSAN=1 $(MAKE) check -j32 $(MAKE) clean ubsan_crash_test: $(MAKE) clean COMPILE_WITH_UBSAN=1 $(MAKE) crash_test $(MAKE) clean ubsan_crash_test_with_atomic_flush: $(MAKE) clean COMPILE_WITH_UBSAN=1 $(MAKE) crash_test_with_atomic_flush $(MAKE) clean ubsan_crash_test_with_txn: $(MAKE) clean COMPILE_WITH_UBSAN=1 $(MAKE) crash_test_with_txn $(MAKE) clean valgrind_test: ROCKSDB_VALGRIND_RUN=1 DISABLE_JEMALLOC=1 $(MAKE) valgrind_check valgrind_check: $(TESTS) $(MAKE) DRIVER="$(VALGRIND_VER) $(VALGRIND_OPTS)" gen_parallel_tests $(AM_V_GEN)if test "$(J)" != 1 \ && (build_tools/gnu_parallel --gnu --help 2>/dev/null) | \ grep -q 'GNU Parallel'; \ then \ $(MAKE) TMPD=$(TMPD) \ DRIVER="$(VALGRIND_VER) $(VALGRIND_OPTS)" valgrind_check_0; \ else \ for t in $(filter-out %skiplist_test options_settable_test,$(TESTS)); do \ $(VALGRIND_VER) $(VALGRIND_OPTS) ./$$t; \ ret_code=$$?; \ if [ $$ret_code -ne 0 ]; then \ exit $$ret_code; \ fi; \ done; \ fi ifneq ($(PAR_TEST),) parloop: ret_bad=0; \ for t in $(PAR_TEST); do \ echo "===== Running $$t in parallel $(NUM_PAR) (`date`)";\ if [ $(db_test) -eq 1 ]; then \ seq $(J) | v="$$t" build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{}; export TEST_TMPDIR=$$s;' \ 'timeout 2m ./db_test --gtest_filter=$$v >> $$s/log-{} 2>1'; \ else\ seq $(J) | v="./$$t" build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{};' \ 'export TEST_TMPDIR=$$s; timeout 10m $$v >> $$s/log-{} 2>1'; \ fi; \ ret_code=$$?; \ if [ $$ret_code -ne 0 ]; then \ ret_bad=$$ret_code; \ echo $$t exited with $$ret_code; \ fi; \ done; \ exit $$ret_bad; endif test_names = \ ./db_test --gtest_list_tests \ | perl -n \ -e 's/ *\#.*//;' \ -e '/^(\s*)(\S+)/; !$$1 and do {$$p=$$2; break};' \ -e 'print qq! $$p$$2!' parallel_check: $(TESTS) $(AM_V_GEN)if test "$(J)" > 1 \ && (build_tools/gnu_parallel --gnu --help 2>/dev/null) | \ grep -q 'GNU Parallel'; \ then \ echo Running in parallel $(J); \ else \ echo "Need to have GNU Parallel and J > 1"; exit 1; \ fi; \ ret_bad=0; \ echo $(J);\ echo Test Dir: $(TMPD); \ seq $(J) | build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{}; rm -rf $$s; mkdir $$s'; \ $(MAKE) PAR_TEST="$(shell $(test_names))" TMPD=$(TMPD) \ J=$(J) db_test=1 parloop; \ $(MAKE) PAR_TEST="$(filter-out db_test, $(TESTS))" \ TMPD=$(TMPD) J=$(J) db_test=0 parloop; analyze: clean USE_CLANG=1 $(MAKE) analyze_incremental analyze_incremental: $(CLANG_SCAN_BUILD) --use-analyzer=$(CLANG_ANALYZER) \ --use-c++=$(CXX) --use-cc=$(CC) --status-bugs \ -o $(CURDIR)/scan_build_report \ $(MAKE) dbg CLEAN_FILES += unity.cc unity.cc: Makefile rm -f $@ $@-t for source_file in $(LIB_SOURCES); do \ echo "#include \"$$source_file\"" >> $@-t; \ done chmod a=r $@-t mv $@-t $@ unity.a: unity.o $(AM_V_AR)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ unity.o TOOLLIBOBJECTS = $(TOOL_LIB_SOURCES:.cc=.o) # try compiling db_test with unity unity_test: db/db_test.o db/db_test_util.o $(TESTHARNESS) $(TOOLLIBOBJECTS) unity.a $(AM_LINK) ./unity_test rocksdb.h rocksdb.cc: build_tools/amalgamate.py Makefile $(LIB_SOURCES) unity.cc build_tools/amalgamate.py -I. -i./include unity.cc -x include/rocksdb/c.h -H rocksdb.h -o rocksdb.cc clean: clean-ext-libraries-all clean-rocks clean-rocksjava clean-not-downloaded: clean-ext-libraries-bin clean-rocks clean-not-downloaded-rocksjava clean-rocks: rm -f $(BENCHMARKS) $(TOOLS) $(TESTS) $(PARALLEL_TEST) $(LIBRARY) $(SHARED) rm -rf $(CLEAN_FILES) ios-x86 ios-arm scan_build_report $(FIND) . -name "*.[oda]" -exec rm -f {} \; $(FIND) . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm {} \; clean-rocksjava: cd java && $(MAKE) clean clean-not-downloaded-rocksjava: cd java && $(MAKE) clean-not-downloaded clean-ext-libraries-all: rm -rf bzip2* snappy* zlib* lz4* zstd* clean-ext-libraries-bin: find . -maxdepth 1 -type d \( -name bzip2\* -or -name snappy\* -or -name zlib\* -or -name lz4\* -or -name zstd\* \) -prune -exec rm -rf {} \; tags: ctags -R . cscope -b `$(FIND) . -name '*.cc'` `$(FIND) . -name '*.h'` `$(FIND) . -name '*.c'` ctags -e -R -o etags * tags0: ctags -R . cscope -b `$(FIND) . -name '*.cc' -and ! -name '*_test.cc'` \ `$(FIND) . -name '*.c' -and ! -name '*_test.c'` \ `$(FIND) . -name '*.h' -and ! -name '*_test.h'` ctags -e -R -o etags * format: build_tools/format-diff.sh check-format: build_tools/format-diff.sh -c check-buck-targets: buckifier/check_buck_targets.sh package: bash build_tools/make_package.sh $(SHARED_MAJOR).$(SHARED_MINOR) # --------------------------------------------------------------------------- # Unit tests and tools # --------------------------------------------------------------------------- $(LIBRARY): $(LIBOBJECTS) $(AM_V_AR)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIBOBJECTS) $(TOOLS_LIBRARY): $(BENCH_LIB_SOURCES:.cc=.o) $(TOOL_LIB_SOURCES:.cc=.o) $(LIB_SOURCES:.cc=.o) $(TESTUTIL) $(ANALYZER_LIB_SOURCES:.cc=.o) $(AM_V_AR)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ $(STRESS_LIBRARY): $(LIB_SOURCES:.cc=.o) $(TESTUTIL) $(ANALYZER_LIB_SOURCES:.cc=.o) $(STRESS_LIB_SOURCES:.cc=.o) $(AM_V_AR)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ librocksdb_env_basic_test.a: env/env_basic_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_V_AR)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ db_bench: tools/db_bench.o $(BENCHTOOLOBJECTS) $(AM_LINK) trace_analyzer: tools/trace_analyzer.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS) $(AM_LINK) block_cache_trace_analyzer: tools/block_cache_analyzer/block_cache_trace_analyzer_tool.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS) $(AM_LINK) ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) folly_synchronization_distributed_mutex_test: $(LIBOBJECTS) $(TESTHARNESS) $(FOLLYOBJECTS) third-party/folly/folly/synchronization/test/DistributedMutexTest.o $(AM_LINK) endif cache_bench: cache/cache_bench.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) persistent_cache_bench: utilities/persistent_cache/persistent_cache_bench.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) memtablerep_bench: memtable/memtablerep_bench.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) filter_bench: util/filter_bench.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) db_stress: db_stress_tool/db_stress.o $(STRESSTOOLOBJECTS) $(AM_LINK) write_stress: tools/write_stress.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) db_sanity_test: tools/db_sanity_test.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) db_repl_stress: tools/db_repl_stress.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) arena_test: memory/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) memkind_kmem_allocator_test: memory/memkind_kmem_allocator_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) autovector_test: util/autovector_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) column_family_test: db/column_family_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) table_properties_collector_test: db/table_properties_collector_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) dynamic_bloom_test: util/dynamic_bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cache_test: cache/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) random_test: util/random_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) option_change_migration_test: utilities/option_change_migration/option_change_migration_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) stringappend_test: utilities/merge_operators/string_append/stringappend_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cassandra_format_test: utilities/cassandra/cassandra_format_test.o utilities/cassandra/test_utils.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cassandra_functional_test: utilities/cassandra/cassandra_functional_test.o utilities/cassandra/test_utils.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cassandra_row_merge_test: utilities/cassandra/cassandra_row_merge_test.o utilities/cassandra/test_utils.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cassandra_serialize_test: utilities/cassandra/cassandra_serialize_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) hash_table_test: utilities/persistent_cache/hash_table_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) histogram_test: monitoring/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) thread_local_test: util/thread_local_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) work_queue_test: util/work_queue_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) corruption_test: db/corruption_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) slice_test: util/slice_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) slice_transform_test: util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_basic_test: db/db_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_with_timestamp_basic_test: db/db_with_timestamp_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_with_timestamp_compaction_test: db/db_with_timestamp_compaction_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_encryption_test: db/db_encryption_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_test2: db/db_test2.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_logical_block_size_cache_test: db/db_logical_block_size_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_blob_index_test: db/blob/db_blob_index_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_block_cache_test: db/db_block_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_bloom_filter_test: db/db_bloom_filter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_log_iter_test: db/db_log_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_compaction_filter_test: db/db_compaction_filter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_compaction_test: db/db_compaction_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_dynamic_level_test: db/db_dynamic_level_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_flush_test: db/db_flush_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_inplace_update_test: db/db_inplace_update_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_iterator_test: db/db_iterator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_memtable_test: db/db_memtable_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_merge_operator_test: db/db_merge_operator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_merge_operand_test: db/db_merge_operand_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_options_test: db/db_options_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_range_del_test: db/db_range_del_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_sst_test: db/db_sst_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_statistics_test: db/db_statistics_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_write_test: db/db_write_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) error_handler_fs_test: db/error_handler_fs_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) external_sst_file_basic_test: db/external_sst_file_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) external_sst_file_test: db/external_sst_file_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) import_column_family_test: db/import_column_family_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_tailing_iter_test: db/db_tailing_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_iter_test: db/db_iter_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_iter_stress_test: db/db_iter_stress_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_universal_compaction_test: db/db_universal_compaction_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_wal_test: db/db_wal_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_io_failure_test: db/db_io_failure_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_properties_test: db/db_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_table_properties_test: db/db_table_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) $(PROFILING_FLAGS) plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) comparator_db_test: db/comparator_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) table_reader_bench: table/table_reader_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) $(PROFILING_FLAGS) perf_context_test: db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) prefix_test: db/prefix_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) checkpoint_test: utilities/checkpoint/checkpoint_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cache_simulator_test: utilities/simulator_cache/cache_simulator_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) env_mirror_test: utilities/env_mirror_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) env_timed_test: utilities/env_timed_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) ifdef ROCKSDB_USE_LIBRADOS env_librados_test: utilities/env_librados_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) endif object_registry_test: utilities/object_registry_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) ttl_test: utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) write_batch_with_index_test: utilities/write_batch_with_index/write_batch_with_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) flush_job_test: db/flush_job_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) compaction_iterator_test: db/compaction/compaction_iterator_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) compaction_job_test: db/compaction/compaction_job_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) compaction_job_stats_test: db/compaction/compaction_job_stats_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) compact_on_deletion_collector_test: utilities/table_properties_collectors/compact_on_deletion_collector_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) wal_manager_test: db/wal_manager_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) env_basic_test: env/env_basic_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) env_test: env/env_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) io_posix_test: env/io_posix_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) fault_injection_test: db/fault_injection_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) rate_limiter_test: util/rate_limiter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) delete_scheduler_test: file/delete_scheduler_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) random_access_file_reader_test: file/random_access_file_reader_test.o $(LIBOBJECTS) $(TESTHARNESS) $(TESTUTIL) $(AM_LINK) file_reader_writer_test: util/file_reader_writer_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) block_based_filter_block_test: table/block_based/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) block_based_table_reader_test: table/block_based/block_based_table_reader_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) full_filter_block_test: table/block_based/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) partitioned_filter_block_test: table/block_based/partitioned_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cleanable_test: table/cleanable_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) block_fetcher_test: table/block_fetcher_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) block_test: table/block_based/block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) data_block_hash_index_test: table/block_based/data_block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) inlineskiplist_test: memtable/inlineskiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) skiplist_test: memtable/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) write_buffer_manager_test: memtable/write_buffer_manager_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) compaction_picker_test: db/compaction/compaction_picker_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) version_builder_test: db/version_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) file_indexer_test: db/file_indexer_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) reduce_levels_test: tools/reduce_levels_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) write_controller_test: db/write_controller_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) merge_helper_test: db/merge_helper_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) memory_test: utilities/memory/memory_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) merge_test: db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) merger_test: table/merger_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) util_merge_operators_test: utilities/util_merge_operators_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) options_file_test: db/options_file_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) deletefile_test: db/deletefile_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) obsolete_files_test: db/obsolete_files_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) rocksdb_dump: tools/dump/rocksdb_dump.o $(LIBOBJECTS) $(AM_LINK) rocksdb_undump: tools/dump/rocksdb_undump.o $(LIBOBJECTS) $(AM_LINK) cuckoo_table_builder_test: table/cuckoo/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cuckoo_table_reader_test: table/cuckoo/cuckoo_table_reader_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) cuckoo_table_db_test: db/cuckoo_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) listener_test: db/listener_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) thread_list_test: util/thread_list_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) compact_files_test: db/compact_files_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) options_test: options/options_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) options_settable_test: options/options_settable_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) options_util_test: utilities/options/options_util_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_bench_tool_test: tools/db_bench_tool_test.o $(BENCHTOOLOBJECTS) $(TESTHARNESS) $(AM_LINK) trace_analyzer_test: tools/trace_analyzer_test.o $(LIBOBJECTS) $(ANALYZETOOLOBJECTS) $(TESTHARNESS) $(AM_LINK) event_logger_test: logging/event_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) timer_queue_test: util/timer_queue_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) sst_dump_test: tools/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) optimistic_transaction_test: utilities/transactions/optimistic_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) mock_env_test : env/mock_env_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) manual_compaction_test: db/manual_compaction_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) filelock_test: util/filelock_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) auto_roll_logger_test: logging/auto_roll_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) env_logger_test: logging/env_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) memtable_list_test: db/memtable_list_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) write_callback_test: db/write_callback_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) heap_test: util/heap_test.o $(GTEST) $(AM_LINK) transaction_lock_mgr_test: utilities/transactions/transaction_lock_mgr_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) transaction_test: utilities/transactions/transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) write_prepared_transaction_test: utilities/transactions/write_prepared_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) write_unprepared_transaction_test: utilities/transactions/write_unprepared_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) sst_dump: tools/sst_dump.o $(LIBOBJECTS) $(AM_LINK) blob_dump: tools/blob_dump.o $(LIBOBJECTS) $(AM_LINK) repair_test: db/repair_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) ldb_cmd_test: tools/ldb_cmd_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) ldb: tools/ldb.o $(LIBOBJECTS) $(AM_LINK) iostats_context_test: monitoring/iostats_context_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) persistent_cache_test: utilities/persistent_cache/persistent_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) statistics_test: monitoring/statistics_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) stats_history_test: monitoring/stats_history_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) lru_cache_test: cache/lru_cache_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) range_del_aggregator_test: db/range_del_aggregator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) range_del_aggregator_bench: db/range_del_aggregator_bench.o $(LIBOBJECTS) $(TESTUTIL) $(AM_LINK) blob_db_test: utilities/blob_db/blob_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) repeatable_thread_test: util/repeatable_thread_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) range_tombstone_fragmenter_test: db/range_tombstone_fragmenter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) sst_file_reader_test: table/sst_file_reader_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) db_secondary_test: db/db_impl/db_secondary_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) block_cache_tracer_test: trace_replay/block_cache_tracer_test.o trace_replay/block_cache_tracer.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) block_cache_trace_analyzer_test: tools/block_cache_analyzer/block_cache_trace_analyzer_test.o tools/block_cache_analyzer/block_cache_trace_analyzer.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) defer_test: util/defer_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) blob_file_addition_test: db/blob/blob_file_addition_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) blob_file_garbage_test: db/blob/blob_file_garbage_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) timer_test: util/timer_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) testutil_test: test_util/testutil_test.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) #------------------------------------------------- # make install related stuff INSTALL_PATH ?= /usr/local uninstall: rm -rf $(INSTALL_PATH)/include/rocksdb \ $(INSTALL_PATH)/lib/$(LIBRARY) \ $(INSTALL_PATH)/lib/$(SHARED4) \ $(INSTALL_PATH)/lib/$(SHARED3) \ $(INSTALL_PATH)/lib/$(SHARED2) \ $(INSTALL_PATH)/lib/$(SHARED1) install-headers: install -d $(INSTALL_PATH)/lib for header_dir in `$(FIND) "include/rocksdb" -type d`; do \ install -d $(INSTALL_PATH)/$$header_dir; \ done for header in `$(FIND) "include/rocksdb" -type f -name *.h`; do \ install -C -m 644 $$header $(INSTALL_PATH)/$$header; \ done install-static: install-headers $(LIBRARY) install -C -m 755 $(LIBRARY) $(INSTALL_PATH)/lib install-shared: install-headers $(SHARED4) install -C -m 755 $(SHARED4) $(INSTALL_PATH)/lib && \ ln -fs $(SHARED4) $(INSTALL_PATH)/lib/$(SHARED3) && \ ln -fs $(SHARED4) $(INSTALL_PATH)/lib/$(SHARED2) && \ ln -fs $(SHARED4) $(INSTALL_PATH)/lib/$(SHARED1) # install static by default + install shared if it exists install: install-static [ -e $(SHARED4) ] && $(MAKE) install-shared || : #------------------------------------------------- # --------------------------------------------------------------------------- # Jni stuff # --------------------------------------------------------------------------- JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux ifeq ($(PLATFORM), OS_SOLARIS) ARCH := $(shell isainfo -b) else ifeq ($(PLATFORM), OS_OPENBSD) ifneq (,$(filter amd64 ppc64 ppc64le arm64 aarch64 sparc64, $(MACHINE))) ARCH := 64 else ARCH := 32 endif else ARCH := $(shell getconf LONG_BIT) endif ifeq ($(shell ldd /usr/bin/env 2>/dev/null | grep -q musl; echo $$?),0) JNI_LIBC = musl # GNU LibC (or glibc) is so pervasive we can assume it is the default # else # JNI_LIBC = glibc endif ifneq ($(origin JNI_LIBC), undefined) JNI_LIBC_POSTFIX = -$(JNI_LIBC) endif ifneq (,$(filter ppc% arm64 aarch64 sparc64, $(MACHINE))) ROCKSDBJNILIB = librocksdbjni-linux-$(MACHINE)$(JNI_LIBC_POSTFIX).so else ROCKSDBJNILIB = librocksdbjni-linux$(ARCH)$(JNI_LIBC_POSTFIX).so endif ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar ROCKSDB_JAVADOCS_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-javadoc.jar ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-sources.jar SHA256_CMD = sha256sum ZLIB_VER ?= 1.2.11 ZLIB_SHA256 ?= c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 ZLIB_DOWNLOAD_BASE ?= http://zlib.net BZIP2_VER ?= 1.0.8 BZIP2_SHA256 ?= ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269 BZIP2_DOWNLOAD_BASE ?= https://sourceware.org/pub/bzip2 SNAPPY_VER ?= 1.1.8 SNAPPY_SHA256 ?= 16b677f07832a612b0836178db7f374e414f94657c138e6993cbfc5dcc58651f SNAPPY_DOWNLOAD_BASE ?= https://github.com/google/snappy/archive LZ4_VER ?= 1.9.2 LZ4_SHA256 ?= 658ba6191fa44c92280d4aa2c271b0f4fbc0e34d249578dd05e50e76d0e5efcc LZ4_DOWNLOAD_BASE ?= https://github.com/lz4/lz4/archive ZSTD_VER ?= 1.4.4 ZSTD_SHA256 ?= a364f5162c7d1a455cc915e8e3cf5f4bd8b75d09bc0f53965b0c9ca1383c52c8 ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 ifeq ($(PLATFORM), OS_MACOSX) ROCKSDBJNILIB = librocksdbjni-osx.jnilib ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar SHA256_CMD = openssl sha256 -r ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","") JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin else JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/ endif endif ifeq ($(PLATFORM), OS_FREEBSD) JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/freebsd ROCKSDBJNILIB = librocksdbjni-freebsd$(ARCH).so ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-freebsd$(ARCH).jar endif ifeq ($(PLATFORM), OS_SOLARIS) ROCKSDBJNILIB = librocksdbjni-solaris$(ARCH).so ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-solaris$(ARCH).jar JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/solaris SHA256_CMD = digest -a sha256 endif ifeq ($(PLATFORM), OS_AIX) JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/aix ROCKSDBJNILIB = librocksdbjni-aix.so EXTRACT_SOURCES = gunzip < TAR_GZ | tar xvf - SNAPPY_MAKE_TARGET = libsnappy.la endif ifeq ($(PLATFORM), OS_OPENBSD) JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/openbsd ROCKSDBJNILIB = librocksdbjni-openbsd$(ARCH).so ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-openbsd$(ARCH).jar endif libz.a: -rm -rf zlib-$(ZLIB_VER) ifeq (,$(wildcard ./zlib-$(ZLIB_VER).tar.gz)) curl --fail --output zlib-$(ZLIB_VER).tar.gz --location ${ZLIB_DOWNLOAD_BASE}/zlib-$(ZLIB_VER).tar.gz endif ZLIB_SHA256_ACTUAL=`$(SHA256_CMD) zlib-$(ZLIB_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(ZLIB_SHA256)" != "$$ZLIB_SHA256_ACTUAL" ]; then \ echo zlib-$(ZLIB_VER).tar.gz checksum mismatch, expected=\"$(ZLIB_SHA256)\" actual=\"$$ZLIB_SHA256_ACTUAL\"; \ exit 1; \ fi tar xvzf zlib-$(ZLIB_VER).tar.gz cd zlib-$(ZLIB_VER) && CFLAGS='-fPIC ${EXTRA_CFLAGS}' LDFLAGS='${EXTRA_LDFLAGS}' ./configure --static && $(MAKE) cp zlib-$(ZLIB_VER)/libz.a . libbz2.a: -rm -rf bzip2-$(BZIP2_VER) ifeq (,$(wildcard ./bzip2-$(BZIP2_VER).tar.gz)) curl --fail --output bzip2-$(BZIP2_VER).tar.gz --location ${CURL_SSL_OPTS} ${BZIP2_DOWNLOAD_BASE}/bzip2-$(BZIP2_VER).tar.gz endif BZIP2_SHA256_ACTUAL=`$(SHA256_CMD) bzip2-$(BZIP2_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(BZIP2_SHA256)" != "$$BZIP2_SHA256_ACTUAL" ]; then \ echo bzip2-$(BZIP2_VER).tar.gz checksum mismatch, expected=\"$(BZIP2_SHA256)\" actual=\"$$BZIP2_SHA256_ACTUAL\"; \ exit 1; \ fi tar xvzf bzip2-$(BZIP2_VER).tar.gz cd bzip2-$(BZIP2_VER) && $(MAKE) CFLAGS='-fPIC -O2 -g -D_FILE_OFFSET_BITS=64 ${EXTRA_CFLAGS}' AR='ar ${EXTRA_ARFLAGS}' cp bzip2-$(BZIP2_VER)/libbz2.a . libsnappy.a: -rm -rf snappy-$(SNAPPY_VER) ifeq (,$(wildcard ./snappy-$(SNAPPY_VER).tar.gz)) curl --fail --output snappy-$(SNAPPY_VER).tar.gz --location ${CURL_SSL_OPTS} ${SNAPPY_DOWNLOAD_BASE}/$(SNAPPY_VER).tar.gz endif SNAPPY_SHA256_ACTUAL=`$(SHA256_CMD) snappy-$(SNAPPY_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(SNAPPY_SHA256)" != "$$SNAPPY_SHA256_ACTUAL" ]; then \ echo snappy-$(SNAPPY_VER).tar.gz checksum mismatch, expected=\"$(SNAPPY_SHA256)\" actual=\"$$SNAPPY_SHA256_ACTUAL\"; \ exit 1; \ fi tar xvzf snappy-$(SNAPPY_VER).tar.gz mkdir snappy-$(SNAPPY_VER)/build cd snappy-$(SNAPPY_VER)/build && CFLAGS='${EXTRA_CFLAGS}' CXXFLAGS='${EXTRA_CXXFLAGS}' LDFLAGS='${EXTRA_LDFLAGS}' cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON .. && $(MAKE) ${SNAPPY_MAKE_TARGET} cp snappy-$(SNAPPY_VER)/build/libsnappy.a . liblz4.a: -rm -rf lz4-$(LZ4_VER) ifeq (,$(wildcard ./lz4-$(LZ4_VER).tar.gz)) curl --fail --output lz4-$(LZ4_VER).tar.gz --location ${CURL_SSL_OPTS} ${LZ4_DOWNLOAD_BASE}/v$(LZ4_VER).tar.gz endif LZ4_SHA256_ACTUAL=`$(SHA256_CMD) lz4-$(LZ4_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(LZ4_SHA256)" != "$$LZ4_SHA256_ACTUAL" ]; then \ echo lz4-$(LZ4_VER).tar.gz checksum mismatch, expected=\"$(LZ4_SHA256)\" actual=\"$$LZ4_SHA256_ACTUAL\"; \ exit 1; \ fi tar xvzf lz4-$(LZ4_VER).tar.gz cd lz4-$(LZ4_VER)/lib && $(MAKE) CFLAGS='-fPIC -O2 ${EXTRA_CFLAGS}' all cp lz4-$(LZ4_VER)/lib/liblz4.a . libzstd.a: -rm -rf zstd-$(ZSTD_VER) ifeq (,$(wildcard ./zstd-$(ZSTD_VER).tar.gz)) curl --fail --output zstd-$(ZSTD_VER).tar.gz --location ${CURL_SSL_OPTS} ${ZSTD_DOWNLOAD_BASE}/v$(ZSTD_VER).tar.gz endif ZSTD_SHA256_ACTUAL=`$(SHA256_CMD) zstd-$(ZSTD_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(ZSTD_SHA256)" != "$$ZSTD_SHA256_ACTUAL" ]; then \ echo zstd-$(ZSTD_VER).tar.gz checksum mismatch, expected=\"$(ZSTD_SHA256)\" actual=\"$$ZSTD_SHA256_ACTUAL\"; \ exit 1; \ fi tar xvzf zstd-$(ZSTD_VER).tar.gz cd zstd-$(ZSTD_VER)/lib && DESTDIR=. PREFIX= $(MAKE) CFLAGS='-fPIC -O2 ${EXTRA_CFLAGS}' install cp zstd-$(ZSTD_VER)/lib/libzstd.a . # A version of each $(LIBOBJECTS) compiled with -fPIC and a fixed set of static compression libraries java_static_libobjects = $(patsubst %,jls/%,$(LIB_CC_OBJECTS)) CLEAN_FILES += jls java_static_all_libobjects = $(java_static_libobjects) ifneq ($(ROCKSDB_JAVA_NO_COMPRESSION), 1) JAVA_COMPRESSIONS = libz.a libbz2.a libsnappy.a liblz4.a libzstd.a endif JAVA_STATIC_FLAGS = -DZLIB -DBZIP2 -DSNAPPY -DLZ4 -DZSTD JAVA_STATIC_INCLUDES = -I./zlib-$(ZLIB_VER) -I./bzip2-$(BZIP2_VER) -I./snappy-$(SNAPPY_VER) -I./lz4-$(LZ4_VER)/lib -I./zstd-$(ZSTD_VER)/lib/include ifeq ($(HAVE_POWER8),1) JAVA_STATIC_C_LIBOBJECTS = $(patsubst %.c.o,jls/%.c.o,$(LIB_SOURCES_C:.c=.o)) JAVA_STATIC_ASM_LIBOBJECTS = $(patsubst %.S.o,jls/%.S.o,$(LIB_SOURCES_ASM:.S=.o)) java_static_ppc_libobjects = $(JAVA_STATIC_C_LIBOBJECTS) $(JAVA_STATIC_ASM_LIBOBJECTS) jls/util/crc32c_ppc.o: util/crc32c_ppc.c $(AM_V_CC)$(CC) $(CFLAGS) $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) -c $< -o $@ jls/util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S $(AM_V_CC)$(CC) $(CFLAGS) $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) -c $< -o $@ java_static_all_libobjects += $(java_static_ppc_libobjects) endif $(java_static_libobjects): jls/%.o: %.cc $(JAVA_COMPRESSIONS) $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) -fPIC -c $< -o $@ $(COVERAGEFLAGS) rocksdbjavastatic: $(java_static_all_libobjects) cd java;$(MAKE) javalib; rm -f ./java/target/$(ROCKSDBJNILIB) $(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC \ -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) \ $(java_static_all_libobjects) $(COVERAGEFLAGS) \ $(JAVA_COMPRESSIONS) $(JAVA_STATIC_LDFLAGS) cd java/target;if [ "$(DEBUG_LEVEL)" == "0" ]; then \ strip $(STRIPFLAGS) $(ROCKSDBJNILIB); \ fi cd java;jar -cf target/$(ROCKSDB_JAR) HISTORY*.md cd java/target;jar -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) cd java/target/classes;jar -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class cd java/target/apidocs;jar -cf ../$(ROCKSDB_JAVADOCS_JAR) * cd java/src/main/java;jar -cf ../../../target/$(ROCKSDB_SOURCES_JAR) org rocksdbjavastaticrelease: rocksdbjavastatic cd java/crossbuild && (vagrant destroy -f || true) && vagrant up linux32 && vagrant halt linux32 && vagrant up linux64 && vagrant halt linux64 && vagrant up linux64-musl && vagrant halt linux64-musl cd java;jar -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md cd java/target;jar -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib cd java/target/classes;jar -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class rocksdbjavastaticreleasedocker: rocksdbjavastatic rocksdbjavastaticdockerx86 rocksdbjavastaticdockerx86_64 rocksdbjavastaticdockerx86musl rocksdbjavastaticdockerx86_64musl cd java;jar -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md cd java/target;jar -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib cd java/target/classes;jar -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class rocksdbjavastaticdockerx86: mkdir -p java/target docker run --rm --name rocksdb_linux_x86-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos6_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerx86_64: mkdir -p java/target docker run --rm --name rocksdb_linux_x64-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos6_x64-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerppc64le: mkdir -p java/target docker run --rm --name rocksdb_linux_ppc64le-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos7_ppc64le-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerarm64v8: mkdir -p java/target docker run --rm --name rocksdb_linux_arm64v8-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos7_arm64v8-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerx86musl: mkdir -p java/target docker run --rm --name rocksdb_linux_x86-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerx86_64musl: mkdir -p java/target docker run --rm --name rocksdb_linux_x64-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_x64-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerppc64lemusl: mkdir -p java/target docker run --rm --name rocksdb_linux_ppc64le-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_ppc64le-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerarm64v8musl: mkdir -p java/target docker run --rm --name rocksdb_linux_arm64v8-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_arm64v8-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticpublish: rocksdbjavastaticrelease rocksdbjavastaticpublishcentral rocksdbjavastaticpublishdocker: rocksdbjavastaticreleasedocker rocksdbjavastaticpublishcentral rocksdbjavastaticpublishcentral: mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-javadoc.jar -Dclassifier=javadoc mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-sources.jar -Dclassifier=sources mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux64.jar -Dclassifier=linux64 mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux32.jar -Dclassifier=linux32 mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux64-musl.jar -Dclassifier=linux64-musl mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux32-musl.jar -Dclassifier=linux32-musl mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar -Dclassifier=osx mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-win64.jar -Dclassifier=win64 mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar # A version of each $(LIBOBJECTS) compiled with -fPIC ifeq ($(HAVE_POWER8),1) JAVA_CC_OBJECTS = $(SHARED_CC_OBJECTS) JAVA_C_OBJECTS = $(SHARED_C_OBJECTS) JAVA_ASM_OBJECTS = $(SHARED_ASM_OBJECTS) JAVA_C_LIBOBJECTS = $(patsubst %.c.o,jl/%.c.o,$(JAVA_C_OBJECTS)) JAVA_ASM_LIBOBJECTS = $(patsubst %.S.o,jl/%.S.o,$(JAVA_ASM_OBJECTS)) endif java_libobjects = $(patsubst %,jl/%,$(LIB_CC_OBJECTS)) CLEAN_FILES += jl java_all_libobjects = $(java_libobjects) ifeq ($(HAVE_POWER8),1) java_ppc_libobjects = $(JAVA_C_LIBOBJECTS) $(JAVA_ASM_LIBOBJECTS) jl/crc32c_ppc.o: util/crc32c_ppc.c $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ jl/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ java_all_libobjects += $(java_ppc_libobjects) endif $(java_libobjects): jl/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) rocksdbjava: $(java_all_libobjects) $(AM_V_GEN)cd java;$(MAKE) javalib; $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) $(java_all_libobjects) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) $(AM_V_at)cd java;jar -cf target/$(ROCKSDB_JAR) HISTORY*.md $(AM_V_at)cd java/target;jar -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) $(AM_V_at)cd java/target/classes;jar -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class jclean: cd java;$(MAKE) clean; jtest_compile: rocksdbjava cd java;$(MAKE) java_test jtest_run: cd java;$(MAKE) run_test jtest: rocksdbjava cd java;$(MAKE) sample;$(MAKE) test; $(PYTHON) tools/check_all_python.py # TODO peterd: find a better place for this check in CI targets jdb_bench: cd java;$(MAKE) db_bench; commit_prereq: build_tools/rocksdb-lego-determinator \ build_tools/precommit_checker.py J=$(J) build_tools/precommit_checker.py unit unit_481 clang_unit release release_481 clang_release tsan asan ubsan lite unit_non_shm $(MAKE) clean && $(MAKE) jclean && $(MAKE) rocksdbjava; # --------------------------------------------------------------------------- # Platform-specific compilation # --------------------------------------------------------------------------- ifeq ($(PLATFORM), IOS) # For iOS, create universal object files to be used on both the simulator and # a device. XCODEROOT=$(shell xcode-select -print-path) PLATFORMSROOT=$(XCODEROOT)/Platforms SIMULATORROOT=$(PLATFORMSROOT)/iPhoneSimulator.platform/Developer DEVICEROOT=$(PLATFORMSROOT)/iPhoneOS.platform/Developer IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBundleShortVersionString) .cc.o: mkdir -p ios-x86/$(dir $@) $(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@ mkdir -p ios-arm/$(dir $@) xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -arch armv7s -arch arm64 -c $< -o ios-arm/$@ lipo ios-x86/$@ ios-arm/$@ -create -output $@ .c.o: mkdir -p ios-x86/$(dir $@) $(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@ mkdir -p ios-arm/$(dir $@) xcrun -sdk iphoneos $(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -arch armv7s -arch arm64 -c $< -o ios-arm/$@ lipo ios-x86/$@ ios-arm/$@ -create -output $@ else ifeq ($(HAVE_POWER8),1) util/crc32c_ppc.o: util/crc32c_ppc.c $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ endif .cc.o: $(AM_V_CC)$(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) .cpp.o: $(AM_V_CC)$(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) .c.o: $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ endif # --------------------------------------------------------------------------- # Source files dependencies detection # --------------------------------------------------------------------------- # FIXME: nothing checks that entries in MAIN_SOURCES actually exist all_sources = $(LIB_SOURCES) $(MAIN_SOURCES) $(MOCK_LIB_SOURCES) $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(TEST_LIB_SOURCES) $(ANALYZER_LIB_SOURCES) $(STRESS_LIB_SOURCES) DEPFILES = $(all_sources:.cc=.cc.d) ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) DEPFILES += $(FOLLY_SOURCES:.cpp=.cpp.d) endif # Add proper dependency support so changing a .h file forces a .cc file to # rebuild. # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. %.cc.d: %.cc @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.cc=.o)' "$<" -o '$@' %.cpp.d: %.cpp @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.cpp=.o)' "$<" -o '$@' ifeq ($(HAVE_POWER8),1) DEPFILES_C = $(LIB_SOURCES_C:.c=.c.d) DEPFILES_ASM = $(LIB_SOURCES_ASM:.S=.S.d) %.c.d: %.c @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.c=.o)' "$<" -o '$@' %.S.d: %.S @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.S=.o)' "$<" -o '$@' $(DEPFILES_C): %.c.d $(DEPFILES_ASM): %.S.d depend: $(DEPFILES) $(DEPFILES_C) $(DEPFILES_ASM) else depend: $(DEPFILES) endif # if the make goal is either "clean" or "format", we shouldn't # try to import the *.d files. # TODO(kailiu) The unfamiliarity of Make's conditions leads to the ugly # working solution. ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),format) ifneq ($(MAKECMDGOALS),jclean) ifneq ($(MAKECMDGOALS),jtest) ifneq ($(MAKECMDGOALS),package) ifneq ($(MAKECMDGOALS),analyze) -include $(DEPFILES) endif endif endif endif endif endif rocksdb-6.11.4/README.md000066400000000000000000000035561370372246700145350ustar00rootroot00000000000000## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage [![Linux/Mac Build Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb) [![Windows Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/master?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/master) [![PPC64le Build Status](http://140.211.168.68:8080/buildStatus/icon?job=Rocksdb)](http://140.211.168.68:8080/job/Rocksdb) RocksDB is developed and maintained by Facebook Database Engineering Team. It is built on earlier work on [LevelDB](https://github.com/google/leveldb) by Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com) This code is a library that forms the core building block for a fast key-value server, especially suited for storing data on flash drives. It has a Log-Structured-Merge-Database (LSM) design with flexible tradeoffs between Write-Amplification-Factor (WAF), Read-Amplification-Factor (RAF) and Space-Amplification-Factor (SAF). It has multi-threaded compactions, making it especially suitable for storing multiple terabytes of data in a single database. Start with example usage here: https://github.com/facebook/rocksdb/tree/master/examples See the [github wiki](https://github.com/facebook/rocksdb/wiki) for more explanation. The public interface is in `include/`. Callers should not include or rely on the details of any other header files in this package. Those internal APIs may be changed without warning. Design discussions are conducted in https://www.facebook.com/groups/rocksdb.dev/ and https://rocksdb.slack.com/ ## License RocksDB is dual-licensed under both the GPLv2 (found in the COPYING file in the root directory) and Apache 2.0 License (found in the LICENSE.Apache file in the root directory). You may select, at your option, one of the above-listed licenses. rocksdb-6.11.4/ROCKSDB_LITE.md000066400000000000000000000020171370372246700154730ustar00rootroot00000000000000# RocksDBLite RocksDBLite is a project focused on mobile use cases, which don't need a lot of fancy things we've built for server workloads and they are very sensitive to binary size. For that reason, we added a compile flag ROCKSDB_LITE that comments out a lot of the nonessential code and keeps the binary lean. Some examples of the features disabled by ROCKSDB_LITE: * compiled-in support for LDB tool * No backupable DB * No support for replication (which we provide in form of TransactionalIterator) * No advanced monitoring tools * No special-purpose memtables that are highly optimized for specific use cases * No Transactions When adding a new big feature to RocksDB, please add ROCKSDB_LITE compile guard if: * Nobody from mobile really needs your feature, * Your feature is adding a lot of weight to the binary. Don't add ROCKSDB_LITE compile guard if: * It would introduce a lot of code complexity. Compile guards make code harder to read. It's a trade-off. * Your feature is not adding a lot of weight. If unsure, ask. :) rocksdb-6.11.4/TARGETS000066400000000000000000001144341370372246700143100ustar00rootroot00000000000000# This file @generated by `python3 buckifier/buckify_rocksdb.py` # --> DO NOT EDIT MANUALLY <-- # This file is a Facebook-specific integration for buck builds, so can # only be validated by Facebook employees. # load("@fbcode_macros//build_defs:auto_headers.bzl", "AutoHeaders") load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library") load(":defs.bzl", "test_binary") REPO_PATH = package_name() + "/" ROCKSDB_COMPILER_FLAGS = [ "-fno-builtin-memcmp", # Needed to compile in fbcode "-Wno-expansion-to-defined", # Added missing flags from output of build_detect_platform "-Wnarrowing", "-DROCKSDB_NO_DYNAMIC_EXTENSION", ] ROCKSDB_EXTERNAL_DEPS = [ ("bzip2", None, "bz2"), ("snappy", None, "snappy"), ("zlib", None, "z"), ("gflags", None, "gflags"), ("lz4", None, "lz4"), ("zstd", None), ("tbb", None), ] ROCKSDB_OS_DEPS = [ ( "linux", ["third-party//numa:numa", "third-party//liburing:uring"], ), ] ROCKSDB_OS_PREPROCESSOR_FLAGS = [ ( "linux", [ "-DOS_LINUX", "-DROCKSDB_FALLOCATE_PRESENT", "-DROCKSDB_MALLOC_USABLE_SIZE", "-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX", "-DROCKSDB_RANGESYNC_PRESENT", "-DROCKSDB_SCHED_GETCPU_PRESENT", "-DROCKSDB_IOURING_PRESENT", "-DHAVE_SSE42", "-DLIBURING", "-DNUMA", ], ), ( "macos", ["-DOS_MACOSX"], ), ] ROCKSDB_PREPROCESSOR_FLAGS = [ "-DROCKSDB_PLATFORM_POSIX", "-DROCKSDB_LIB_IO_POSIX", "-DROCKSDB_SUPPORT_THREAD_LOCAL", # Flags to enable libs we include "-DSNAPPY", "-DZLIB", "-DBZIP2", "-DLZ4", "-DZSTD", "-DZSTD_STATIC_LINKING_ONLY", "-DGFLAGS=gflags", "-DTBB", # Added missing flags from output of build_detect_platform "-DROCKSDB_BACKTRACE", # Directories with files for #include "-I" + REPO_PATH + "include/", "-I" + REPO_PATH, ] ROCKSDB_ARCH_PREPROCESSOR_FLAGS = { "x86_64": [ "-DHAVE_PCLMUL", ], } build_mode = read_config("fbcode", "build_mode") is_opt_mode = build_mode.startswith("opt") # -DNDEBUG is added by default in opt mode in fbcode. But adding it twice # doesn't harm and avoid forgetting to add it. ROCKSDB_COMPILER_FLAGS += (["-DNDEBUG"] if is_opt_mode else []) sanitizer = read_config("fbcode", "sanitizer") # Do not enable jemalloc if sanitizer presents. RocksDB will further detect # whether the binary is linked with jemalloc at runtime. ROCKSDB_OS_PREPROCESSOR_FLAGS += ([( "linux", ["-DROCKSDB_JEMALLOC"], )] if sanitizer == "" else []) ROCKSDB_OS_DEPS += ([( "linux", ["third-party//jemalloc:headers"], )] if sanitizer == "" else []) ROCKSDB_LIB_DEPS = [ ":rocksdb_lib", ":rocksdb_test_lib", ] if not is_opt_mode else [":rocksdb_lib"] cpp_library( name = "rocksdb_lib", srcs = [ "cache/cache.cc", "cache/clock_cache.cc", "cache/lru_cache.cc", "cache/sharded_cache.cc", "db/arena_wrapped_db_iter.cc", "db/blob/blob_file_addition.cc", "db/blob/blob_file_garbage.cc", "db/blob/blob_file_meta.cc", "db/blob/blob_log_format.cc", "db/blob/blob_log_reader.cc", "db/blob/blob_log_writer.cc", "db/builder.cc", "db/c.cc", "db/column_family.cc", "db/compacted_db_impl.cc", "db/compaction/compaction.cc", "db/compaction/compaction_iterator.cc", "db/compaction/compaction_job.cc", "db/compaction/compaction_picker.cc", "db/compaction/compaction_picker_fifo.cc", "db/compaction/compaction_picker_level.cc", "db/compaction/compaction_picker_universal.cc", "db/convenience.cc", "db/db_filesnapshot.cc", "db/db_impl/db_impl.cc", "db/db_impl/db_impl_compaction_flush.cc", "db/db_impl/db_impl_debug.cc", "db/db_impl/db_impl_experimental.cc", "db/db_impl/db_impl_files.cc", "db/db_impl/db_impl_open.cc", "db/db_impl/db_impl_readonly.cc", "db/db_impl/db_impl_secondary.cc", "db/db_impl/db_impl_write.cc", "db/db_info_dumper.cc", "db/db_iter.cc", "db/dbformat.cc", "db/error_handler.cc", "db/event_helpers.cc", "db/experimental.cc", "db/external_sst_file_ingestion_job.cc", "db/file_indexer.cc", "db/flush_job.cc", "db/flush_scheduler.cc", "db/forward_iterator.cc", "db/import_column_family_job.cc", "db/internal_stats.cc", "db/log_reader.cc", "db/log_writer.cc", "db/logs_with_prep_tracker.cc", "db/malloc_stats.cc", "db/memtable.cc", "db/memtable_list.cc", "db/merge_helper.cc", "db/merge_operator.cc", "db/range_del_aggregator.cc", "db/range_tombstone_fragmenter.cc", "db/repair.cc", "db/snapshot_impl.cc", "db/table_cache.cc", "db/table_properties_collector.cc", "db/transaction_log_impl.cc", "db/trim_history_scheduler.cc", "db/version_builder.cc", "db/version_edit.cc", "db/version_edit_handler.cc", "db/version_set.cc", "db/wal_manager.cc", "db/write_batch.cc", "db/write_batch_base.cc", "db/write_controller.cc", "db/write_thread.cc", "env/env.cc", "env/env_chroot.cc", "env/env_encryption.cc", "env/env_hdfs.cc", "env/env_posix.cc", "env/file_system.cc", "env/fs_posix.cc", "env/io_posix.cc", "env/mock_env.cc", "file/delete_scheduler.cc", "file/file_prefetch_buffer.cc", "file/file_util.cc", "file/filename.cc", "file/random_access_file_reader.cc", "file/read_write_util.cc", "file/readahead_raf.cc", "file/sequence_file_reader.cc", "file/sst_file_manager_impl.cc", "file/writable_file_writer.cc", "logging/auto_roll_logger.cc", "logging/event_logger.cc", "logging/log_buffer.cc", "memory/arena.cc", "memory/concurrent_arena.cc", "memory/jemalloc_nodump_allocator.cc", "memory/memkind_kmem_allocator.cc", "memtable/alloc_tracker.cc", "memtable/hash_linklist_rep.cc", "memtable/hash_skiplist_rep.cc", "memtable/skiplistrep.cc", "memtable/vectorrep.cc", "memtable/write_buffer_manager.cc", "monitoring/histogram.cc", "monitoring/histogram_windowing.cc", "monitoring/in_memory_stats_history.cc", "monitoring/instrumented_mutex.cc", "monitoring/iostats_context.cc", "monitoring/perf_context.cc", "monitoring/perf_level.cc", "monitoring/persistent_stats_history.cc", "monitoring/statistics.cc", "monitoring/thread_status_impl.cc", "monitoring/thread_status_updater.cc", "monitoring/thread_status_updater_debug.cc", "monitoring/thread_status_util.cc", "monitoring/thread_status_util_debug.cc", "options/cf_options.cc", "options/db_options.cc", "options/options.cc", "options/options_helper.cc", "options/options_parser.cc", "port/port_posix.cc", "port/stack_trace.cc", "table/adaptive/adaptive_table_factory.cc", "table/block_based/binary_search_index_reader.cc", "table/block_based/block.cc", "table/block_based/block_based_filter_block.cc", "table/block_based/block_based_table_builder.cc", "table/block_based/block_based_table_factory.cc", "table/block_based/block_based_table_iterator.cc", "table/block_based/block_based_table_reader.cc", "table/block_based/block_builder.cc", "table/block_based/block_prefetcher.cc", "table/block_based/block_prefix_index.cc", "table/block_based/data_block_footer.cc", "table/block_based/data_block_hash_index.cc", "table/block_based/filter_block_reader_common.cc", "table/block_based/filter_policy.cc", "table/block_based/flush_block_policy.cc", "table/block_based/full_filter_block.cc", "table/block_based/hash_index_reader.cc", "table/block_based/index_builder.cc", "table/block_based/index_reader_common.cc", "table/block_based/parsed_full_filter_block.cc", "table/block_based/partitioned_filter_block.cc", "table/block_based/partitioned_index_iterator.cc", "table/block_based/partitioned_index_reader.cc", "table/block_based/reader_common.cc", "table/block_based/uncompression_dict_reader.cc", "table/block_fetcher.cc", "table/cuckoo/cuckoo_table_builder.cc", "table/cuckoo/cuckoo_table_factory.cc", "table/cuckoo/cuckoo_table_reader.cc", "table/format.cc", "table/get_context.cc", "table/iterator.cc", "table/merging_iterator.cc", "table/meta_blocks.cc", "table/persistent_cache_helper.cc", "table/plain/plain_table_bloom.cc", "table/plain/plain_table_builder.cc", "table/plain/plain_table_factory.cc", "table/plain/plain_table_index.cc", "table/plain/plain_table_key_coding.cc", "table/plain/plain_table_reader.cc", "table/sst_file_reader.cc", "table/sst_file_writer.cc", "table/table_properties.cc", "table/two_level_iterator.cc", "test_util/sync_point.cc", "test_util/sync_point_impl.cc", "test_util/transaction_test_util.cc", "tools/dump/db_dump_tool.cc", "tools/ldb_cmd.cc", "tools/ldb_tool.cc", "tools/sst_dump_tool.cc", "trace_replay/block_cache_tracer.cc", "trace_replay/trace_replay.cc", "util/build_version.cc", "util/coding.cc", "util/compaction_job_stats_impl.cc", "util/comparator.cc", "util/compression_context_cache.cc", "util/concurrent_task_limiter_impl.cc", "util/crc32c.cc", "util/dynamic_bloom.cc", "util/file_checksum_helper.cc", "util/hash.cc", "util/murmurhash.cc", "util/random.cc", "util/rate_limiter.cc", "util/slice.cc", "util/status.cc", "util/string_util.cc", "util/thread_local.cc", "util/threadpool_imp.cc", "util/xxhash.cc", "utilities/backupable/backupable_db.cc", "utilities/blob_db/blob_compaction_filter.cc", "utilities/blob_db/blob_db.cc", "utilities/blob_db/blob_db_impl.cc", "utilities/blob_db/blob_db_impl_filesnapshot.cc", "utilities/blob_db/blob_dump_tool.cc", "utilities/blob_db/blob_file.cc", "utilities/cassandra/cassandra_compaction_filter.cc", "utilities/cassandra/format.cc", "utilities/cassandra/merge_operator.cc", "utilities/checkpoint/checkpoint_impl.cc", "utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc", "utilities/convenience/info_log_finder.cc", "utilities/debug.cc", "utilities/env_mirror.cc", "utilities/env_timed.cc", "utilities/leveldb_options/leveldb_options.cc", "utilities/memory/memory_util.cc", "utilities/merge_operators/bytesxor.cc", "utilities/merge_operators/max.cc", "utilities/merge_operators/put.cc", "utilities/merge_operators/sortlist.cc", "utilities/merge_operators/string_append/stringappend.cc", "utilities/merge_operators/string_append/stringappend2.cc", "utilities/merge_operators/uint64add.cc", "utilities/object_registry.cc", "utilities/option_change_migration/option_change_migration.cc", "utilities/options/options_util.cc", "utilities/persistent_cache/block_cache_tier.cc", "utilities/persistent_cache/block_cache_tier_file.cc", "utilities/persistent_cache/block_cache_tier_metadata.cc", "utilities/persistent_cache/persistent_cache_tier.cc", "utilities/persistent_cache/volatile_tier_impl.cc", "utilities/simulator_cache/cache_simulator.cc", "utilities/simulator_cache/sim_cache.cc", "utilities/table_properties_collectors/compact_on_deletion_collector.cc", "utilities/trace/file_trace_reader_writer.cc", "utilities/transactions/optimistic_transaction.cc", "utilities/transactions/optimistic_transaction_db_impl.cc", "utilities/transactions/pessimistic_transaction.cc", "utilities/transactions/pessimistic_transaction_db.cc", "utilities/transactions/snapshot_checker.cc", "utilities/transactions/transaction_base.cc", "utilities/transactions/transaction_db_mutex_impl.cc", "utilities/transactions/transaction_lock_mgr.cc", "utilities/transactions/transaction_util.cc", "utilities/transactions/write_prepared_txn.cc", "utilities/transactions/write_prepared_txn_db.cc", "utilities/transactions/write_unprepared_txn.cc", "utilities/transactions/write_unprepared_txn_db.cc", "utilities/ttl/db_ttl_impl.cc", "utilities/write_batch_with_index/write_batch_with_index.cc", "utilities/write_batch_with_index/write_batch_with_index_internal.cc", ], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [], external_deps = ROCKSDB_EXTERNAL_DEPS, ) cpp_library( name = "rocksdb_test_lib", srcs = [ "db/db_test_util.cc", "table/mock_table.cc", "test_util/fault_injection_test_env.cc", "test_util/fault_injection_test_fs.cc", "test_util/testharness.cc", "test_util/testutil.cc", "tools/block_cache_analyzer/block_cache_trace_analyzer.cc", "tools/trace_analyzer_tool.cc", "utilities/cassandra/test_utils.cc", ], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [":rocksdb_lib"], external_deps = ROCKSDB_EXTERNAL_DEPS + [ ("googletest", None, "gtest"), ], ) cpp_library( name = "rocksdb_tools_lib", srcs = [ "test_util/testutil.cc", "tools/block_cache_analyzer/block_cache_trace_analyzer.cc", "tools/db_bench_tool.cc", "tools/trace_analyzer_tool.cc", ], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [":rocksdb_lib"], external_deps = ROCKSDB_EXTERNAL_DEPS, ) cpp_library( name = "rocksdb_stress_lib", srcs = [ "db_stress_tool/batched_ops_stress.cc", "db_stress_tool/cf_consistency_stress.cc", "db_stress_tool/db_stress_common.cc", "db_stress_tool/db_stress_driver.cc", "db_stress_tool/db_stress_gflags.cc", "db_stress_tool/db_stress_shared_state.cc", "db_stress_tool/db_stress_test_base.cc", "db_stress_tool/db_stress_tool.cc", "db_stress_tool/no_batched_ops_stress.cc", "test_util/testutil.cc", "tools/block_cache_analyzer/block_cache_trace_analyzer.cc", "tools/trace_analyzer_tool.cc", ], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = ROCKSDB_LIB_DEPS, external_deps = ROCKSDB_EXTERNAL_DEPS, ) if not is_opt_mode: cpp_binary( name = "c_test_bin", srcs = ["db/c_test.c"], arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [":rocksdb_test_lib"], ) if not is_opt_mode: custom_unittest( "c_test", command = [ native.package_name() + "/buckifier/rocks_test_runner.sh", "$(location :{})".format("c_test_bin"), ], type = "simple", ) cpp_library( name = "env_basic_test_lib", srcs = ["env/env_basic_test.cc"], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [":rocksdb_test_lib"], external_deps = ROCKSDB_EXTERNAL_DEPS, ) # [test_name, test_src, test_type, extra_deps, extra_compiler_flags] ROCKS_TESTS = [ [ "arena_test", "memory/arena_test.cc", "serial", [], [], ], [ "auto_roll_logger_test", "logging/auto_roll_logger_test.cc", "serial", [], [], ], [ "autovector_test", "util/autovector_test.cc", "serial", [], [], ], [ "backupable_db_test", "utilities/backupable/backupable_db_test.cc", "parallel", [], [], ], [ "blob_db_test", "utilities/blob_db/blob_db_test.cc", "serial", [], [], ], [ "blob_file_addition_test", "db/blob/blob_file_addition_test.cc", "serial", [], [], ], [ "blob_file_garbage_test", "db/blob/blob_file_garbage_test.cc", "serial", [], [], ], [ "block_based_filter_block_test", "table/block_based/block_based_filter_block_test.cc", "serial", [], [], ], [ "block_based_table_reader_test", "table/block_based/block_based_table_reader_test.cc", "serial", [], [], ], [ "block_cache_trace_analyzer_test", "tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc", "serial", [], [], ], [ "block_cache_tracer_test", "trace_replay/block_cache_tracer_test.cc", "serial", [], [], ], [ "block_fetcher_test", "table/block_fetcher_test.cc", "serial", [], [], ], [ "block_test", "table/block_based/block_test.cc", "serial", [], [], ], [ "bloom_test", "util/bloom_test.cc", "serial", [], [], ], [ "cache_simulator_test", "utilities/simulator_cache/cache_simulator_test.cc", "serial", [], [], ], [ "cache_test", "cache/cache_test.cc", "serial", [], [], ], [ "cassandra_format_test", "utilities/cassandra/cassandra_format_test.cc", "serial", [], [], ], [ "cassandra_functional_test", "utilities/cassandra/cassandra_functional_test.cc", "serial", [], [], ], [ "cassandra_row_merge_test", "utilities/cassandra/cassandra_row_merge_test.cc", "serial", [], [], ], [ "cassandra_serialize_test", "utilities/cassandra/cassandra_serialize_test.cc", "serial", [], [], ], [ "checkpoint_test", "utilities/checkpoint/checkpoint_test.cc", "serial", [], [], ], [ "cleanable_test", "table/cleanable_test.cc", "serial", [], [], ], [ "coding_test", "util/coding_test.cc", "serial", [], [], ], [ "column_family_test", "db/column_family_test.cc", "serial", [], [], ], [ "compact_files_test", "db/compact_files_test.cc", "serial", [], [], ], [ "compact_on_deletion_collector_test", "utilities/table_properties_collectors/compact_on_deletion_collector_test.cc", "serial", [], [], ], [ "compaction_iterator_test", "db/compaction/compaction_iterator_test.cc", "serial", [], [], ], [ "compaction_job_stats_test", "db/compaction/compaction_job_stats_test.cc", "serial", [], [], ], [ "compaction_job_test", "db/compaction/compaction_job_test.cc", "serial", [], [], ], [ "compaction_picker_test", "db/compaction/compaction_picker_test.cc", "serial", [], [], ], [ "comparator_db_test", "db/comparator_db_test.cc", "serial", [], [], ], [ "corruption_test", "db/corruption_test.cc", "serial", [], [], ], [ "crc32c_test", "util/crc32c_test.cc", "serial", [], [], ], [ "cuckoo_table_builder_test", "table/cuckoo/cuckoo_table_builder_test.cc", "serial", [], [], ], [ "cuckoo_table_db_test", "db/cuckoo_table_db_test.cc", "serial", [], [], ], [ "cuckoo_table_reader_test", "table/cuckoo/cuckoo_table_reader_test.cc", "serial", [], [], ], [ "data_block_hash_index_test", "table/block_based/data_block_hash_index_test.cc", "serial", [], [], ], [ "db_basic_test", "db/db_basic_test.cc", "serial", [], [], ], [ "db_blob_index_test", "db/blob/db_blob_index_test.cc", "serial", [], [], ], [ "db_block_cache_test", "db/db_block_cache_test.cc", "serial", [], [], ], [ "db_bloom_filter_test", "db/db_bloom_filter_test.cc", "parallel", [], [], ], [ "db_compaction_filter_test", "db/db_compaction_filter_test.cc", "parallel", [], [], ], [ "db_compaction_test", "db/db_compaction_test.cc", "parallel", [], [], ], [ "db_dynamic_level_test", "db/db_dynamic_level_test.cc", "serial", [], [], ], [ "db_encryption_test", "db/db_encryption_test.cc", "serial", [], [], ], [ "db_flush_test", "db/db_flush_test.cc", "serial", [], [], ], [ "db_inplace_update_test", "db/db_inplace_update_test.cc", "serial", [], [], ], [ "db_io_failure_test", "db/db_io_failure_test.cc", "serial", [], [], ], [ "db_iter_stress_test", "db/db_iter_stress_test.cc", "serial", [], [], ], [ "db_iter_test", "db/db_iter_test.cc", "serial", [], [], ], [ "db_iterator_test", "db/db_iterator_test.cc", "serial", [], [], ], [ "db_log_iter_test", "db/db_log_iter_test.cc", "serial", [], [], ], [ "db_logical_block_size_cache_test", "db/db_logical_block_size_cache_test.cc", "serial", [], [], ], [ "db_memtable_test", "db/db_memtable_test.cc", "serial", [], [], ], [ "db_merge_operand_test", "db/db_merge_operand_test.cc", "serial", [], [], ], [ "db_merge_operator_test", "db/db_merge_operator_test.cc", "parallel", [], [], ], [ "db_options_test", "db/db_options_test.cc", "serial", [], [], ], [ "db_properties_test", "db/db_properties_test.cc", "serial", [], [], ], [ "db_range_del_test", "db/db_range_del_test.cc", "serial", [], [], ], [ "db_secondary_test", "db/db_impl/db_secondary_test.cc", "serial", [], [], ], [ "db_sst_test", "db/db_sst_test.cc", "parallel", [], [], ], [ "db_statistics_test", "db/db_statistics_test.cc", "serial", [], [], ], [ "db_table_properties_test", "db/db_table_properties_test.cc", "serial", [], [], ], [ "db_tailing_iter_test", "db/db_tailing_iter_test.cc", "serial", [], [], ], [ "db_test", "db/db_test.cc", "parallel", [], [], ], [ "db_test2", "db/db_test2.cc", "serial", [], [], ], [ "db_universal_compaction_test", "db/db_universal_compaction_test.cc", "parallel", [], [], ], [ "db_wal_test", "db/db_wal_test.cc", "parallel", [], [], ], [ "db_with_timestamp_basic_test", "db/db_with_timestamp_basic_test.cc", "serial", [], [], ], [ "db_with_timestamp_compaction_test", "db/db_with_timestamp_compaction_test.cc", "serial", [], [], ], [ "db_write_test", "db/db_write_test.cc", "serial", [], [], ], [ "dbformat_test", "db/dbformat_test.cc", "serial", [], [], ], [ "defer_test", "util/defer_test.cc", "serial", [], [], ], [ "delete_scheduler_test", "file/delete_scheduler_test.cc", "serial", [], [], ], [ "deletefile_test", "db/deletefile_test.cc", "serial", [], [], ], [ "dynamic_bloom_test", "util/dynamic_bloom_test.cc", "serial", [], [], ], [ "env_basic_test", "env/env_basic_test.cc", "serial", [], [], ], [ "env_logger_test", "logging/env_logger_test.cc", "serial", [], [], ], [ "env_test", "env/env_test.cc", "serial", [], [], ], [ "env_timed_test", "utilities/env_timed_test.cc", "serial", [], [], ], [ "error_handler_fs_test", "db/error_handler_fs_test.cc", "serial", [], [], ], [ "event_logger_test", "logging/event_logger_test.cc", "serial", [], [], ], [ "external_sst_file_basic_test", "db/external_sst_file_basic_test.cc", "serial", [], [], ], [ "external_sst_file_test", "db/external_sst_file_test.cc", "parallel", [], [], ], [ "fault_injection_test", "db/fault_injection_test.cc", "parallel", [], [], ], [ "file_indexer_test", "db/file_indexer_test.cc", "serial", [], [], ], [ "file_reader_writer_test", "util/file_reader_writer_test.cc", "parallel", [], [], ], [ "filelock_test", "util/filelock_test.cc", "serial", [], [], ], [ "filename_test", "db/filename_test.cc", "serial", [], [], ], [ "flush_job_test", "db/flush_job_test.cc", "serial", [], [], ], [ "full_filter_block_test", "table/block_based/full_filter_block_test.cc", "serial", [], [], ], [ "hash_table_test", "utilities/persistent_cache/hash_table_test.cc", "serial", [], [], ], [ "hash_test", "util/hash_test.cc", "serial", [], [], ], [ "heap_test", "util/heap_test.cc", "serial", [], [], ], [ "histogram_test", "monitoring/histogram_test.cc", "serial", [], [], ], [ "import_column_family_test", "db/import_column_family_test.cc", "parallel", [], [], ], [ "inlineskiplist_test", "memtable/inlineskiplist_test.cc", "parallel", [], [], ], [ "io_posix_test", "env/io_posix_test.cc", "serial", [], [], ], [ "iostats_context_test", "monitoring/iostats_context_test.cc", "serial", [], [], ], [ "ldb_cmd_test", "tools/ldb_cmd_test.cc", "serial", [], [], ], [ "listener_test", "db/listener_test.cc", "serial", [], [], ], [ "log_test", "db/log_test.cc", "serial", [], [], ], [ "lru_cache_test", "cache/lru_cache_test.cc", "serial", [], [], ], [ "manual_compaction_test", "db/manual_compaction_test.cc", "parallel", [], [], ], [ "memkind_kmem_allocator_test", "memory/memkind_kmem_allocator_test.cc", "serial", [], [], ], [ "memory_test", "utilities/memory/memory_test.cc", "serial", [], [], ], [ "memtable_list_test", "db/memtable_list_test.cc", "serial", [], [], ], [ "merge_helper_test", "db/merge_helper_test.cc", "serial", [], [], ], [ "merge_test", "db/merge_test.cc", "serial", [], [], ], [ "merger_test", "table/merger_test.cc", "serial", [], [], ], [ "mock_env_test", "env/mock_env_test.cc", "serial", [], [], ], [ "object_registry_test", "utilities/object_registry_test.cc", "serial", [], [], ], [ "obsolete_files_test", "db/obsolete_files_test.cc", "serial", [], [], ], [ "optimistic_transaction_test", "utilities/transactions/optimistic_transaction_test.cc", "serial", [], [], ], [ "option_change_migration_test", "utilities/option_change_migration/option_change_migration_test.cc", "serial", [], [], ], [ "options_file_test", "db/options_file_test.cc", "serial", [], [], ], [ "options_settable_test", "options/options_settable_test.cc", "serial", [], [], ], [ "options_test", "options/options_test.cc", "serial", [], [], ], [ "options_util_test", "utilities/options/options_util_test.cc", "serial", [], [], ], [ "partitioned_filter_block_test", "table/block_based/partitioned_filter_block_test.cc", "serial", [], [], ], [ "perf_context_test", "db/perf_context_test.cc", "serial", [], [], ], [ "persistent_cache_test", "utilities/persistent_cache/persistent_cache_test.cc", "parallel", [], [], ], [ "plain_table_db_test", "db/plain_table_db_test.cc", "serial", [], [], ], [ "prefix_test", "db/prefix_test.cc", "serial", [], [], ], [ "random_access_file_reader_test", "file/random_access_file_reader_test.cc", "serial", [], [], ], [ "random_test", "util/random_test.cc", "serial", [], [], ], [ "range_del_aggregator_test", "db/range_del_aggregator_test.cc", "serial", [], [], ], [ "range_tombstone_fragmenter_test", "db/range_tombstone_fragmenter_test.cc", "serial", [], [], ], [ "rate_limiter_test", "util/rate_limiter_test.cc", "serial", [], [], ], [ "reduce_levels_test", "tools/reduce_levels_test.cc", "serial", [], [], ], [ "repair_test", "db/repair_test.cc", "serial", [], [], ], [ "repeatable_thread_test", "util/repeatable_thread_test.cc", "serial", [], [], ], [ "sim_cache_test", "utilities/simulator_cache/sim_cache_test.cc", "serial", [], [], ], [ "skiplist_test", "memtable/skiplist_test.cc", "serial", [], [], ], [ "slice_test", "util/slice_test.cc", "serial", [], [], ], [ "slice_transform_test", "util/slice_transform_test.cc", "serial", [], [], ], [ "sst_dump_test", "tools/sst_dump_test.cc", "serial", [], [], ], [ "sst_file_reader_test", "table/sst_file_reader_test.cc", "serial", [], [], ], [ "statistics_test", "monitoring/statistics_test.cc", "serial", [], [], ], [ "stats_history_test", "monitoring/stats_history_test.cc", "serial", [], [], ], [ "stringappend_test", "utilities/merge_operators/string_append/stringappend_test.cc", "serial", [], [], ], [ "table_properties_collector_test", "db/table_properties_collector_test.cc", "serial", [], [], ], [ "table_test", "table/table_test.cc", "parallel", [], [], ], [ "testutil_test", "test_util/testutil_test.cc", "serial", [], [], ], [ "thread_list_test", "util/thread_list_test.cc", "serial", [], [], ], [ "thread_local_test", "util/thread_local_test.cc", "serial", [], [], ], [ "timer_queue_test", "util/timer_queue_test.cc", "serial", [], [], ], [ "timer_test", "util/timer_test.cc", "serial", [], [], ], [ "trace_analyzer_test", "tools/trace_analyzer_test.cc", "serial", [], [], ], [ "transaction_lock_mgr_test", "utilities/transactions/transaction_lock_mgr_test.cc", "parallel", [], [], ], [ "transaction_test", "utilities/transactions/transaction_test.cc", "parallel", [], [], ], [ "ttl_test", "utilities/ttl/ttl_test.cc", "serial", [], [], ], [ "util_merge_operators_test", "utilities/util_merge_operators_test.cc", "serial", [], [], ], [ "version_builder_test", "db/version_builder_test.cc", "serial", [], [], ], [ "version_edit_test", "db/version_edit_test.cc", "serial", [], [], ], [ "version_set_test", "db/version_set_test.cc", "serial", [], [], ], [ "wal_manager_test", "db/wal_manager_test.cc", "serial", [], [], ], [ "work_queue_test", "util/work_queue_test.cc", "serial", [], [], ], [ "write_batch_test", "db/write_batch_test.cc", "serial", [], [], ], [ "write_batch_with_index_test", "utilities/write_batch_with_index/write_batch_with_index_test.cc", "serial", [], [], ], [ "write_buffer_manager_test", "memtable/write_buffer_manager_test.cc", "serial", [], [], ], [ "write_callback_test", "db/write_callback_test.cc", "serial", [], [], ], [ "write_controller_test", "db/write_controller_test.cc", "serial", [], [], ], [ "write_prepared_transaction_test", "utilities/transactions/write_prepared_transaction_test.cc", "parallel", [], [], ], [ "write_unprepared_transaction_test", "utilities/transactions/write_unprepared_transaction_test.cc", "parallel", [], [], ], ] # Generate a test rule for each entry in ROCKS_TESTS # Do not build the tests in opt mode, since SyncPoint and other test code # will not be included. [ cpp_unittest( name = test_name, srcs = [test_cc], arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [":rocksdb_test_lib"] + extra_deps, external_deps = ROCKSDB_EXTERNAL_DEPS + [ ("googletest", None, "gtest"), ], ) for test_name, test_cc, parallelism, extra_deps, extra_compiler_flags in ROCKS_TESTS if not is_opt_mode ] rocksdb-6.11.4/USERS.md000066400000000000000000000160501370372246700144720ustar00rootroot00000000000000This document lists users of RocksDB and their use cases. If you are using RocksDB, please open a pull request and add yourself to the list. ## Facebook At Facebook, we use RocksDB as storage engines in multiple data management services and a backend for many different stateful services, including: 1. MyRocks -- https://github.com/MySQLOnRocksDB/mysql-5.6 2. MongoRocks -- https://github.com/mongodb-partners/mongo-rocks 3. ZippyDB -- Facebook's distributed key-value store with Paxos-style replication, built on top of RocksDB.[1] https://www.youtube.com/watch?v=DfiN7pG0D0khtt 4. Laser -- Laser is a high query throughput, low (millisecond) latency, key-value storage service built on top of RocksDB.[1] 4. Dragon -- a distributed graph query engine. https://code.facebook.com/posts/1737605303120405/dragon-a-distributed-graph-query-engine/ 5. Stylus -- a low-level stream processing framework writtenin C++.[1] 6. LogDevice -- a distributed data store for logs [2] [1] https://research.facebook.com/publications/realtime-data-processing-at-facebook/ [2] https://code.facebook.com/posts/357056558062811/logdevice-a-distributed-data-store-for-logs/ ## LinkedIn Two different use cases at Linkedin are using RocksDB as a storage engine: 1. LinkedIn's follow feed for storing user's activities. Check out the blog post: https://engineering.linkedin.com/blog/2016/03/followfeed--linkedin-s-feed-made-faster-and-smarter 2. Apache Samza, open source framework for stream processing Learn more about those use cases in a Tech Talk by Ankit Gupta and Naveen Somasundaram: http://www.youtube.com/watch?v=plqVp_OnSzg ## Yahoo Yahoo is using RocksDB as a storage engine for their biggest distributed data store Sherpa. Learn more about it here: http://yahooeng.tumblr.com/post/120730204806/sherpa-scales-new-heights ## CockroachDB CockroachDB is an open-source geo-replicated transactional database. They are using RocksDB as their storage engine. Check out their github: https://github.com/cockroachdb/cockroach ## DNANexus DNANexus is using RocksDB to speed up processing of genomics data. You can learn more from this great blog post by Mike Lin: http://devblog.dnanexus.com/faster-bam-sorting-with-samtools-and-rocksdb/ ## Iron.io Iron.io is using RocksDB as a storage engine for their distributed queueing system. Learn more from Tech Talk by Reed Allman: http://www.youtube.com/watch?v=HTjt6oj-RL4 ## Tango Me Tango is using RocksDB as a graph storage to store all users' connection data and other social activity data. ## Turn Turn is using RocksDB as a storage layer for their key/value store, serving at peak 2.4MM QPS out of different datacenters. Check out our RocksDB Protobuf merge operator at: https://github.com/vladb38/rocksdb_protobuf ## Santanader UK/Cloudera Profession Services Check out their blog post: http://blog.cloudera.com/blog/2015/08/inside-santanders-near-real-time-data-ingest-architecture/ ## Airbnb Airbnb is using RocksDB as a storage engine for their personalized search service. You can learn more about it here: https://www.youtube.com/watch?v=ASQ6XMtogMs ## Alluxio [Alluxio](https://www.alluxio.io) uses RocksDB to serve and scale file system metadata to beyond 1 Billion files. The detailed design and implementation is described in this engineering blog: https://www.alluxio.io/blog/scalable-metadata-service-in-alluxio-storing-billions-of-files/ ## Pinterest Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtube.com/watch?v=MtFEVEs_2Vo ## Smyte [Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services. ## Rakuten Marketing [Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP. ## VWO, Wingify [VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed. ## quasardb [quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark. quasardb uses a heavily tuned RocksDB as its persistence layer. ## Netflix [Netflix](http://techblog.netflix.com/2016/05/application-data-caching-using-ssds.html) Netflix uses RocksDB on AWS EC2 instances with local SSD drives to cache application data. ## TiKV [TiKV](https://github.com/pingcap/tikv) is a GEO-replicated, high-performance, distributed, transactional key-value database. TiKV is powered by Rust and Raft. TiKV uses RocksDB as its persistence layer. ## Apache Flink [Apache Flink](https://flink.apache.org/news/2016/03/08/release-1.0.0.html) uses RocksDB to store state locally on a machine. ## Dgraph [Dgraph](https://github.com/dgraph-io/dgraph) is an open-source, scalable, distributed, low latency, high throughput Graph database .They use RocksDB to store state locally on a machine. ## Uber [Uber](http://eng.uber.com/cherami/) uses RocksDB as a durable and scalable task queue. ## 360 Pika [360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been widely used in many company ## LzLabs LzLabs is using RocksDB as a storage engine in their multi-database distributed framework to store application configuration and user data. ## ProfaneDB [ProfaneDB](https://profanedb.gitlab.io/) is a database for Protocol Buffers, and uses RocksDB for storage. It is accessible via gRPC, and the schema is defined using directly `.proto` files. ## IOTA Foundation [IOTA Foundation](https://www.iota.org/) is using RocksDB in the [IOTA Reference Implementation (IRI)](https://github.com/iotaledger/iri) to store the local state of the Tangle. The Tangle is the first open-source distributed ledger powering the future of the Internet of Things. ## Avrio Project [Avrio Project](http://avrio-project.github.io/avrio.network/) is using RocksDB in [Avrio ](https://github.com/avrio-project/avrio) to store blocks, account balances and data and other blockchain-releated data. Avrio is a multiblockchain decentralized cryptocurrency empowering monetary transactions. ## Crux [Crux](https://github.com/juxt/crux) is a document database that uses RocksDB for local [EAV](https://en.wikipedia.org/wiki/Entity%E2%80%93attribute%E2%80%93value_model) index storage to enable point-in-time bitemporal Datalog queries. The "unbundled" architecture uses Kafka to provide horizontal scalability. ## Nebula Graph [Nebula Graph](https://github.com/vesoft-inc/nebula) is a distributed, scalable, lightning-fast, open source graph database capable of hosting super large scale graphs with dozens of billions of vertices (nodes) and trillions of edges, with milliseconds of latency. ## YugabyteDB [YugabyteDB](https://www.yugabyte.com/) is an open source, high performance, distributed SQL database that uses RocksDB as its storage layer. For more information, please see https://github.com/yugabyte/yugabyte-db/. rocksdb-6.11.4/Vagrantfile000066400000000000000000000017711370372246700154400ustar00rootroot00000000000000# Vagrant file Vagrant.configure("2") do |config| config.vm.provider "virtualbox" do |v| v.memory = 4096 v.cpus = 2 end config.vm.define "ubuntu14" do |box| box.vm.box = "ubuntu/trusty64" end config.vm.define "centos65" do |box| box.vm.box = "chef/centos-6.5" end config.vm.define "centos7" do |box| box.vm.box = "centos/7" box.vm.provision "shell", path: "build_tools/setup_centos7.sh" end config.vm.define "FreeBSD10" do |box| box.vm.guest = :freebsd box.vm.box = "robin/freebsd-10" # FreeBSD does not support 'mount_virtualbox_shared_folder', use NFS box.vm.synced_folder ".", "/vagrant", :nfs => true, id: "vagrant-root" box.vm.network "private_network", ip: "10.0.1.10" # build everything after creating VM, skip using --no-provision box.vm.provision "shell", inline: <<-SCRIPT pkg install -y gmake clang35 export CXX=/usr/local/bin/clang++35 cd /vagrant gmake clean gmake all OPT=-g SCRIPT end end rocksdb-6.11.4/WINDOWS_PORT.md000066400000000000000000000310521370372246700155660ustar00rootroot00000000000000# Microsoft Contribution Notes ## Contributors * Alexander Zinoviev https://github.com/zinoale * Dmitri Smirnov https://github.com/yuslepukhin * Praveen Rao https://github.com/PraveenSinghRao * Sherlock Huang https://github.com/SherlockNoMad ## Introduction RocksDB is a well proven open source key-value persistent store, optimized for fast storage. It provides scalability with number of CPUs and storage IOPS, to support IO-bound, in-memory and write-once workloads, most importantly, to be flexible to allow for innovation. As Microsoft Bing team we have been continuously pushing hard to improve the scalability, efficiency of platform and eventually benefit Bing end-user satisfaction. We would like to explore the opportunity to embrace open source, RocksDB here, to use, enhance and customize for our usage, and also contribute back to the RocksDB community. Herein, we are pleased to offer this RocksDB port for Windows platform. These notes describe some decisions and changes we had to make with regards to porting RocksDB on Windows. We hope this will help both reviewers and users of the Windows port. We are open for comments and improvements. ## OS specifics All of the porting, testing and benchmarking was done on Windows Server 2012 R2 Datacenter 64-bit but to the best of our knowledge there is not a specific API we used during porting that is unsupported on other Windows OS after Vista. ## Porting goals We strive to achieve the following goals: * make use of the existing porting interface of RocksDB * make minimum [WY2]modifications within platform independent code. * make all unit test pass both in debug and release builds. * Note: latest introduction of SyncPoint seems to disable running db_test in Release. * make performance on par with published benchmarks accounting for HW differences * we would like to keep the port code inline with the master branch with no forking ## Build system We have chosen CMake as a widely accepted build system to build the Windows port. It is very fast and convenient. At the same time it generates Visual Studio projects that are both usable from a command line and IDE. The top-level CMakeLists.txt file contains description of all targets and build rules. It also provides brief instructions on how to build the software for Windows. One more build related file is thirdparty.inc that also resides on the top level. This file must be edited to point to actual third party libraries location. We think that it would be beneficial to merge the existing make-based build system and the new cmake-based build system into a single one to use on all platforms. All building and testing was done for 64-bit. We have not conducted any testing for 32-bit and early reports indicate that it will not run on 32-bit. ## C++ and STL notes We had to make some minimum changes within the portable files that either account for OS differences or the shortcomings of C++11 support in the current version of the MS compiler. Most or all of them are expected to be fixed in the upcoming compiler releases. We plan to use this port for our business purposes here at Bing and this provided business justification for this port. This also means, we do not have at present to choose the compiler version at will. * Certain headers that are not present and not necessary on Windows were simply `#ifndef OS_WIN` in a few places (`unistd.h`) * All posix specific headers were replaced to port/port.h which worked well * Replaced `dirent.h` for `port/port_dirent.h` (very few places) with the implementation of the relevant interfaces within `rocksdb::port` namespace * Replaced `sys/time.h` to `port/sys_time.h` (few places) implemented equivalents within `rocksdb::port` * `printf %z` specification is not supported on Windows. To imitate existing standards we came up with a string macro `ROCKSDB_PRIszt` which expands to `zu` on posix systems and to `Iu` on windows. * in class member initialization were moved to a __ctors in some cases * `constexpr` is not supported. We had to replace `std::numeric_limits<>::max/min()` to its C macros for constants. Sometimes we had to make class members `static const` and place a definition within a .cc file. * `constexpr` for functions was replaced to a template specialization (1 place) * Union members that have non-trivial constructors were replaced to `char[]` in one place along with bug fixes (spatial experimental feature) * Zero-sized arrays are deemed a non-standard extension which we converted to 1 size array and that should work well for the purposes of these classes. * `std::chrono` lacks nanoseconds support (fixed in the upcoming release of the STL) and we had to use `QueryPerfCounter()` within env_win.cc * Function local statics initialization is still not safe. Used `std::once` to mitigate within WinEnv. ## Windows Environments notes We endeavored to make it functionally on par with posix_env. This means we replicated the functionality of the thread pool and other things as precise as possible, including: * Replicate posix logic using std:thread primitives. * Implement all posix_env disk access functionality. * Set `use_os_buffer=false` to disable OS disk buffering for WinWritableFile and WinRandomAccessFile. * Replace `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure. * Use `SetFileInformationByHandle` to compensate absence of `fallocate`. ### In detail Even though Windows provides its own efficient thread-pool implementation we chose to replicate posix logic using `std::thread` primitives. This allows anyone to quickly detect any changes within the posix source code and replicate them within windows env. This has proven to work very well. At the same time for anyone who wishes to replace the built-in thread-pool can do so using RocksDB stackable environments. For disk access we implemented all of the functionality present within the posix_env which includes memory mapped files, random access, rate-limiter support etc. The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It’s not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST. We have replaced `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure so we can atomically seek to the position of the disk operation but still perform the operation synchronously. Thus we able to emulate that functionality of `pread/pwrite` reasonably well. The only difference is that the file pointer is not returned to its original position but that hardly matters given the random nature of access. We used `SetFileInformationByHandle` both to truncate files after writing a full final page to disk and to pre-allocate disk space for faster I/O thus compensating for the absence of `fallocate` although some differences remain. For example, the pre-allocated space is not filled with zeros like on Linux, however, on a positive note, the end of file position is also not modified after pre-allocation. RocksDB renames, copies and deletes files at will even though they may be opened with another handle at the same time. We had to relax and allow nearly all the concurrent access permissions possible. ## Thread-Local Storage Thread-Local storage plays a significant role for RocksDB performance. Rather than creating a separate implementation we chose to create inline wrappers that forward `pthread_specific` calls to Windows `Tls` interfaces within `rocksdb::port` namespace. This leaves the existing meat of the logic in tact and unchanged and just as maintainable. To mitigate the lack of thread local storage cleanup on thread-exit we added a limited amount of windows specific code within the same thread_local.cc file that injects a cleanup callback into a `"__tls"` structure within `".CRT$XLB"` data segment. This approach guarantees that the callback is invoked regardless of whether RocksDB used within an executable, standalone DLL or within another DLL. ## Jemalloc usage When RocksDB is used with Jemalloc the latter needs to be initialized before any of the C++ globals or statics. To accomplish that we injected an initialization routine into `".CRT$XCT"` that is automatically invoked by the runtime before initializing static objects. je-uninit is queued to `atexit()`. The jemalloc redirecting `new/delete` global operators are used by the linker providing certain conditions are met. See build section in these notes. ## Stack Trace and Unhandled Exception Handler We decided not to implement these two features because the hosting program as a rule has these two things in it. We experienced no inconveniences debugging issues in the debugger or analyzing process dumps if need be and thus we did not see this as a priority. ## Performance results ### Setup All of the benchmarks are run on the same set of machines. Here are the details of the test setup: * 2 Intel(R) Xeon(R) E5 2450 0 @ 2.10 GHz (total 16 cores) * 2 XK0480GDQPH SSD Device, total 894GB free disk * Machine has 128 GB of RAM * Operating System: Windows Server 2012 R2 Datacenter * 100 Million keys; each key is of size 10 bytes, each value is of size 800 bytes * total database size is ~76GB * The performance result is based on RocksDB 3.11. * The parameters used, unless specified, were exactly the same as published in the GitHub Wiki page. ### RocksDB on flash storage #### Test 1. Bulk Load of keys in Random Order Version 3.11 * Total Run Time: 17.6 min * Fillrandom: 5.480 micros/op 182465 ops/sec; 142.0 MB/s * Compact: 486056544.000 micros/op 0 ops/sec Version 3.10 * Total Run Time: 16.2 min * Fillrandom: 5.018 micros/op 199269 ops/sec; 155.1 MB/s * Compact: 441313173.000 micros/op 0 ops/sec; #### Test 2. Bulk Load of keys in Sequential Order Version 3.11 * Fillseq: 4.944 micros/op 202k ops/sec; 157.4 MB/s Version 3.10 * Fillseq: 4.105 micros/op 243.6k ops/sec; 189.6 MB/s #### Test 3. Random Write Version 3.11 * Unbuffered I/O enabled * Overwrite: 52.661 micros/op 18.9k ops/sec; 14.8 MB/s Version 3.10 * Unbuffered I/O enabled * Overwrite: 52.661 micros/op 18.9k ops/sec; #### Test 4. Random Read Version 3.11 * Unbuffered I/O enabled * Readrandom: 15.716 micros/op 63.6k ops/sec; 49.5 MB/s Version 3.10 * Unbuffered I/O enabled * Readrandom: 15.548 micros/op 64.3k ops/sec; #### Test 5. Multi-threaded read and single-threaded write Version 3.11 * Unbuffered I/O enabled * Readwhilewriting: 25.128 micros/op 39.7k ops/sec; Version 3.10 * Unbuffered I/O enabled * Readwhilewriting: 24.854 micros/op 40.2k ops/sec; ### RocksDB In Memory #### Test 1. Point Lookup Version 3.11 80K writes/sec * Write Rate Achieved: 40.5k write/sec; * Readwhilewriting: 0.314 micros/op 3187455 ops/sec; 364.8 MB/s (715454999 of 715454999 found) Version 3.10 * Write Rate Achieved: 50.6k write/sec * Readwhilewriting: 0.316 micros/op 3162028 ops/sec; (719576999 of 719576999 found) *10K writes/sec* Version 3.11 * Write Rate Achieved: 5.8k/s write/sec * Readwhilewriting: 0.246 micros/op 4062669 ops/sec; 464.9 MB/s (915481999 of 915481999 found) Version 3.10 * Write Rate Achieved: 5.8k/s write/sec * Readwhilewriting: 0.244 micros/op 4106253 ops/sec; (927986999 of 927986999 found) #### Test 2. Prefix Range Query Version 3.11 80K writes/sec * Write Rate Achieved: 46.3k/s write/sec * Readwhilewriting: 0.362 micros/op 2765052 ops/sec; 316.4 MB/s (611549999 of 611549999 found) Version 3.10 * Write Rate Achieved: 45.8k/s write/sec * Readwhilewriting: 0.317 micros/op 3154941 ops/sec; (708158999 of 708158999 found) Version 3.11 10K writes/sec * Write Rate Achieved: 5.78k write/sec * Readwhilewriting: 0.269 micros/op 3716692 ops/sec; 425.3 MB/s (837401999 of 837401999 found) Version 3.10 * Write Rate Achieved: 5.7k write/sec * Readwhilewriting: 0.261 micros/op 3830152 ops/sec; (863482999 of 863482999 found) We think that there is still big room to improve the performance, which will be an ongoing effort for us. rocksdb-6.11.4/appveyor.yml000066400000000000000000000074551370372246700156500ustar00rootroot00000000000000version: 1.0.{build} image: Visual Studio 2019 environment: JAVA_HOME: C:\Program Files\Java\jdk1.8.0 THIRDPARTY_HOME: $(APPVEYOR_BUILD_FOLDER)\thirdparty SNAPPY_HOME: $(THIRDPARTY_HOME)\snappy-1.1.7 SNAPPY_INCLUDE: $(SNAPPY_HOME);$(SNAPPY_HOME)\build SNAPPY_LIB_DEBUG: $(SNAPPY_HOME)\build\Debug\snappy.lib SNAPPY_LIB_RELEASE: $(SNAPPY_HOME)\build\Release\snappy.lib LZ4_HOME: $(THIRDPARTY_HOME)\lz4-1.8.3 LZ4_INCLUDE: $(LZ4_HOME)\lib LZ4_LIB_DEBUG: $(LZ4_HOME)\visual\VS2010\bin\x64_Debug\liblz4_static.lib LZ4_LIB_RELEASE: $(LZ4_HOME)\visual\VS2010\bin\x64_Release\liblz4_static.lib ZSTD_HOME: $(THIRDPARTY_HOME)\zstd-1.4.0 ZSTD_INCLUDE: $(ZSTD_HOME)\lib;$(ZSTD_HOME)\lib\dictBuilder ZSTD_LIB_DEBUG: $(ZSTD_HOME)\build\VS2010\bin\x64_Debug\libzstd_static.lib ZSTD_LIB_RELEASE: $(ZSTD_HOME)\build\VS2010\bin\x64_Release\libzstd_static.lib matrix: - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 CMAKE_GENERATOR: Visual Studio 14 Win64 DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\IDE\devenv.com - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 CMAKE_GENERATOR: Visual Studio 15 Win64 DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\devenv.com - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 CMAKE_GENERATOR: Visual Studio 16 CMAKE_PLATEFORM_NAME: x64 DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\devenv.com - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 CMAKE_GENERATOR: Visual Studio 16 CMAKE_PLATEFORM_NAME: x64 CMAKE_OPT: -DCMAKE_CXX_STANDARD=20 DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\devenv.com install: - md %THIRDPARTY_HOME% - echo "Building Snappy dependency..." - cd %THIRDPARTY_HOME% - curl --fail --silent --show-error --output snappy-1.1.7.zip --location https://github.com/google/snappy/archive/1.1.7.zip - unzip snappy-1.1.7.zip - cd snappy-1.1.7 - mkdir build - cd build - if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%") - cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT% - msbuild Snappy.sln /p:Configuration=Debug /p:Platform=x64 - msbuild Snappy.sln /p:Configuration=Release /p:Platform=x64 - echo "Building LZ4 dependency..." - cd %THIRDPARTY_HOME% - curl --fail --silent --show-error --output lz4-1.8.3.zip --location https://github.com/lz4/lz4/archive/v1.8.3.zip - unzip lz4-1.8.3.zip - cd lz4-1.8.3\visual\VS2010 - ps: $CMD="$Env:DEV_ENV"; & $CMD lz4.sln /upgrade - msbuild lz4.sln /p:Configuration=Debug /p:Platform=x64 - msbuild lz4.sln /p:Configuration=Release /p:Platform=x64 - echo "Building ZStd dependency..." - cd %THIRDPARTY_HOME% - curl --fail --silent --show-error --output zstd-1.4.0.zip --location https://github.com/facebook/zstd/archive/v1.4.0.zip - unzip zstd-1.4.0.zip - cd zstd-1.4.0\build\VS2010 - ps: $CMD="$Env:DEV_ENV"; & $CMD zstd.sln /upgrade - msbuild zstd.sln /p:Configuration=Debug /p:Platform=x64 - msbuild zstd.sln /p:Configuration=Release /p:Platform=x64 before_build: - md %APPVEYOR_BUILD_FOLDER%\build - cd %APPVEYOR_BUILD_FOLDER%\build - if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%") - cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT% %CMAKE_OPT% -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DLZ4=1 -DZSTD=1 -DXPRESS=1 -DJNI=1 - cd .. build: project: build\rocksdb.sln parallel: true verbosity: normal test: test_script: - ps: build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_with_timestamp_basic_test,db_test2,db_test,env_basic_test,env_test,db_merge_operand_test -Concurrency 8 on_failure: - cmd: 7z a build-failed.zip %APPVEYOR_BUILD_FOLDER%\build\ && appveyor PushArtifact build-failed.zip rocksdb-6.11.4/buckifier/000077500000000000000000000000001370372246700152105ustar00rootroot00000000000000rocksdb-6.11.4/buckifier/buckify_rocksdb.py000066400000000000000000000172061370372246700207330ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals try: from builtins import str except ImportError: from __builtin__ import str from targets_builder import TARGETSBuilder import json import os import fnmatch import sys from util import ColorString # This script generates TARGETS file for Buck. # Buck is a build tool specifying dependencies among different build targets. # User can pass extra dependencies as a JSON object via command line, and this # script can include these dependencies in the generate TARGETS file. # Usage: # $python3 buckifier/buckify_rocksdb.py # (This generates a TARGET file without user-specified dependency for unit # tests.) # $python3 buckifier/buckify_rocksdb.py \ # '{"fake": { \ # "extra_deps": [":test_dep", "//fakes/module:mock1"], \ # "extra_compiler_flags": ["-DROCKSDB_LITE", "-Os"], \ # } \ # }' # (Generated TARGETS file has test_dep and mock1 as dependencies for RocksDB # unit tests, and will use the extra_compiler_flags to compile the unit test # source.) # tests to export as libraries for inclusion in other projects _EXPORTED_TEST_LIBS = ["env_basic_test"] # Parse src.mk files as a Dictionary of # VAR_NAME => list of files def parse_src_mk(repo_path): src_mk = repo_path + "/src.mk" src_files = {} for line in open(src_mk): line = line.strip() if len(line) == 0 or line[0] == '#': continue if '=' in line: current_src = line.split('=')[0].strip() src_files[current_src] = [] elif '.cc' in line: src_path = line.split('.cc')[0].strip() + '.cc' src_files[current_src].append(src_path) return src_files # get all .cc / .c files def get_cc_files(repo_path): cc_files = [] for root, dirnames, filenames in os.walk(repo_path): # noqa: B007 T25377293 Grandfathered in root = root[(len(repo_path) + 1):] if "java" in root: # Skip java continue for filename in fnmatch.filter(filenames, '*.cc'): cc_files.append(os.path.join(root, filename)) return cc_files # Get tests from Makefile def get_tests(repo_path): Makefile = repo_path + "/Makefile" # Dictionary TEST_NAME => IS_PARALLEL tests = {} found_tests = False for line in open(Makefile): line = line.strip() if line.startswith("TESTS ="): found_tests = True elif found_tests: if line.endswith("\\"): # remove the trailing \ line = line[:-1] line = line.strip() tests[line] = False else: # we consumed all the tests break found_parallel_tests = False for line in open(Makefile): line = line.strip() if line.startswith("PARALLEL_TEST ="): found_parallel_tests = True elif found_parallel_tests: if line.endswith("\\"): # remove the trailing \ line = line[:-1] line = line.strip() tests[line] = True else: # we consumed all the parallel tests break return tests # Parse extra dependencies passed by user from command line def get_dependencies(): deps_map = { '': { 'extra_deps': [], 'extra_compiler_flags': [] } } if len(sys.argv) < 2: return deps_map def encode_dict(data): rv = {} for k, v in data.items(): if isinstance(v, dict): v = encode_dict(v) rv[k] = v return rv extra_deps = json.loads(sys.argv[1], object_hook=encode_dict) for target_alias, deps in extra_deps.items(): deps_map[target_alias] = deps return deps_map # Prepare TARGETS file for buck def generate_targets(repo_path, deps_map): print(ColorString.info("Generating TARGETS")) # parsed src.mk file src_mk = parse_src_mk(repo_path) # get all .cc files cc_files = get_cc_files(repo_path) # get tests from Makefile tests = get_tests(repo_path) if src_mk is None or cc_files is None or tests is None: return False TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path) # rocksdb_lib TARGETS.add_library( "rocksdb_lib", src_mk["LIB_SOURCES"] + src_mk["TOOL_LIB_SOURCES"]) # rocksdb_test_lib TARGETS.add_library( "rocksdb_test_lib", src_mk.get("MOCK_LIB_SOURCES", []) + src_mk.get("TEST_LIB_SOURCES", []) + src_mk.get("EXP_LIB_SOURCES", []) + src_mk.get("ANALYZER_LIB_SOURCES", []), [":rocksdb_lib"], extra_external_deps=""" + [ ("googletest", None, "gtest"), ]""") # rocksdb_tools_lib TARGETS.add_library( "rocksdb_tools_lib", src_mk.get("BENCH_LIB_SOURCES", []) + src_mk.get("ANALYZER_LIB_SOURCES", []) + ["test_util/testutil.cc"], [":rocksdb_lib"]) # rocksdb_stress_lib TARGETS.add_rocksdb_library( "rocksdb_stress_lib", src_mk.get("ANALYZER_LIB_SOURCES", []) + src_mk.get('STRESS_LIB_SOURCES', []) + ["test_util/testutil.cc"]) print("Extra dependencies:\n{0}".format(json.dumps(deps_map))) # c_test.c is added through TARGETS.add_c_test(). If there # are more than one .c test file, we need to extend # TARGETS.add_c_test() to include other C tests too. TARGETS.add_c_test() # test for every .cc test we found in the Makefile for target_alias, deps in deps_map.items(): for test in sorted(tests): if test == 'c_test': continue match_src = [src for src in cc_files if ("/%s.cc" % test) in src] if len(match_src) == 0: print(ColorString.warning("Cannot find .cc file for %s" % test)) continue elif len(match_src) > 1: print(ColorString.warning("Found more than one .cc for %s" % test)) print(match_src) continue assert(len(match_src) == 1) is_parallel = tests[test] test_target_name = \ test if not target_alias else test + "_" + target_alias TARGETS.register_test( test_target_name, match_src[0], is_parallel, json.dumps(deps['extra_deps']), json.dumps(deps['extra_compiler_flags'])) if test in _EXPORTED_TEST_LIBS: test_library = "%s_lib" % test_target_name TARGETS.add_library(test_library, match_src, [":rocksdb_test_lib"]) TARGETS.flush_tests() print(ColorString.info("Generated TARGETS Summary:")) print(ColorString.info("- %d libs" % TARGETS.total_lib)) print(ColorString.info("- %d binarys" % TARGETS.total_bin)) print(ColorString.info("- %d tests" % TARGETS.total_test)) return True def get_rocksdb_path(): # rocksdb = {script_dir}/.. script_dir = os.path.dirname(sys.argv[0]) script_dir = os.path.abspath(script_dir) rocksdb_path = os.path.abspath( os.path.join(script_dir, "../")) return rocksdb_path def exit_with_error(msg): print(ColorString.error(msg)) sys.exit(1) def main(): deps_map = get_dependencies() # Generate TARGETS file for buck ok = generate_targets(get_rocksdb_path(), deps_map) if not ok: exit_with_error("Failed to generate TARGETS files") if __name__ == "__main__": main() rocksdb-6.11.4/buckifier/check_buck_targets.sh000077500000000000000000000014101370372246700213550ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # If clang_format_diff.py command is not specfied, we assume we are able to # access directly without any path. TGT_DIFF=`git diff TARGETS | head -n 1` if [ ! -z "$TGT_DIFF" ] then echo "TARGETS file has uncommitted changes. Skip this check." exit 0 fi echo Backup original TARGETS file. cp TARGETS TARGETS.bkp ${PYTHON:-python3} buckifier/buckify_rocksdb.py TGT_DIFF=`git diff TARGETS | head -n 1` if [ -z "$TGT_DIFF" ] then mv TARGETS.bkp TARGETS exit 0 else echo "Please run '${PYTHON:-python3} buckifier/buckify_rocksdb.py' to update TARGETS file." echo "Do not manually update TARGETS file." ${PYTHON:-python3} --version mv TARGETS.bkp TARGETS exit 1 fi rocksdb-6.11.4/buckifier/rocks_test_runner.sh000077500000000000000000000004141370372246700213170ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Create a tmp directory for the test to use TEST_DIR=$(mktemp -d /dev/shm/fbcode_rocksdb_XXXXXXX) # shellcheck disable=SC2068 TEST_TMPDIR="$TEST_DIR" $@ && rm -rf "$TEST_DIR" rocksdb-6.11.4/buckifier/targets_builder.py000066400000000000000000000075341370372246700207520ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals try: from builtins import object from builtins import str except ImportError: from __builtin__ import object from __builtin__ import str import targets_cfg def pretty_list(lst, indent=8): if lst is None or len(lst) == 0: return "" if len(lst) == 1: return "\"%s\"" % lst[0] separator = "\",\n%s\"" % (" " * indent) res = separator.join(sorted(lst)) res = "\n" + (" " * indent) + "\"" + res + "\",\n" + (" " * (indent - 4)) return res class TARGETSBuilder(object): def __init__(self, path): self.path = path self.targets_file = open(path, 'w') header = targets_cfg.rocksdb_target_header_template self.targets_file.write(header) self.total_lib = 0 self.total_bin = 0 self.total_test = 0 self.tests_cfg = "" def __del__(self): self.targets_file.close() def add_library(self, name, srcs, deps=None, headers=None, extra_external_deps=""): headers_attr_prefix = "" if headers is None: headers_attr_prefix = "auto_" headers = "AutoHeaders.RECURSIVE_GLOB" else: headers = "[" + pretty_list(headers) + "]" self.targets_file.write(targets_cfg.library_template.format( name=name, srcs=pretty_list(srcs), headers_attr_prefix=headers_attr_prefix, headers=headers, deps=pretty_list(deps), extra_external_deps=extra_external_deps)) self.total_lib = self.total_lib + 1 def add_rocksdb_library(self, name, srcs, headers=None): headers_attr_prefix = "" if headers is None: headers_attr_prefix = "auto_" headers = "AutoHeaders.RECURSIVE_GLOB" else: headers = "[" + pretty_list(headers) + "]" self.targets_file.write(targets_cfg.rocksdb_library_template.format( name=name, srcs=pretty_list(srcs), headers_attr_prefix=headers_attr_prefix, headers=headers)) self.total_lib = self.total_lib + 1 def add_binary(self, name, srcs, deps=None): self.targets_file.write(targets_cfg.binary_template % ( name, pretty_list(srcs), pretty_list(deps))) self.total_bin = self.total_bin + 1 def add_c_test(self): self.targets_file.write(""" if not is_opt_mode: cpp_binary( name = "c_test_bin", srcs = ["db/c_test.c"], arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [":rocksdb_test_lib"], ) if not is_opt_mode: custom_unittest( "c_test", command = [ native.package_name() + "/buckifier/rocks_test_runner.sh", "$(location :{})".format("c_test_bin"), ], type = "simple", ) """) def register_test(self, test_name, src, is_parallel, extra_deps, extra_compiler_flags): exec_mode = "serial" if is_parallel: exec_mode = "parallel" self.tests_cfg += targets_cfg.test_cfg_template % ( test_name, str(src), str(exec_mode), extra_deps, extra_compiler_flags) self.total_test = self.total_test + 1 def flush_tests(self): self.targets_file.write(targets_cfg.unittests_template % self.tests_cfg) self.tests_cfg = "" rocksdb-6.11.4/buckifier/targets_cfg.py000066400000000000000000000123301370372246700200510ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals rocksdb_target_header_template = \ """# This file \100generated by `python3 buckifier/buckify_rocksdb.py` # --> DO NOT EDIT MANUALLY <-- # This file is a Facebook-specific integration for buck builds, so can # only be validated by Facebook employees. # load("@fbcode_macros//build_defs:auto_headers.bzl", "AutoHeaders") load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library") load(":defs.bzl", "test_binary") REPO_PATH = package_name() + "/" ROCKSDB_COMPILER_FLAGS = [ "-fno-builtin-memcmp", # Needed to compile in fbcode "-Wno-expansion-to-defined", # Added missing flags from output of build_detect_platform "-Wnarrowing", "-DROCKSDB_NO_DYNAMIC_EXTENSION", ] ROCKSDB_EXTERNAL_DEPS = [ ("bzip2", None, "bz2"), ("snappy", None, "snappy"), ("zlib", None, "z"), ("gflags", None, "gflags"), ("lz4", None, "lz4"), ("zstd", None), ("tbb", None), ] ROCKSDB_OS_DEPS = [ ( "linux", ["third-party//numa:numa", "third-party//liburing:uring"], ), ] ROCKSDB_OS_PREPROCESSOR_FLAGS = [ ( "linux", [ "-DOS_LINUX", "-DROCKSDB_FALLOCATE_PRESENT", "-DROCKSDB_MALLOC_USABLE_SIZE", "-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX", "-DROCKSDB_RANGESYNC_PRESENT", "-DROCKSDB_SCHED_GETCPU_PRESENT", "-DROCKSDB_IOURING_PRESENT", "-DHAVE_SSE42", "-DLIBURING", "-DNUMA", ], ), ( "macos", ["-DOS_MACOSX"], ), ] ROCKSDB_PREPROCESSOR_FLAGS = [ "-DROCKSDB_PLATFORM_POSIX", "-DROCKSDB_LIB_IO_POSIX", "-DROCKSDB_SUPPORT_THREAD_LOCAL", # Flags to enable libs we include "-DSNAPPY", "-DZLIB", "-DBZIP2", "-DLZ4", "-DZSTD", "-DZSTD_STATIC_LINKING_ONLY", "-DGFLAGS=gflags", "-DTBB", # Added missing flags from output of build_detect_platform "-DROCKSDB_BACKTRACE", # Directories with files for #include "-I" + REPO_PATH + "include/", "-I" + REPO_PATH, ] ROCKSDB_ARCH_PREPROCESSOR_FLAGS = { "x86_64": [ "-DHAVE_PCLMUL", ], } build_mode = read_config("fbcode", "build_mode") is_opt_mode = build_mode.startswith("opt") # -DNDEBUG is added by default in opt mode in fbcode. But adding it twice # doesn't harm and avoid forgetting to add it. ROCKSDB_COMPILER_FLAGS += (["-DNDEBUG"] if is_opt_mode else []) sanitizer = read_config("fbcode", "sanitizer") # Do not enable jemalloc if sanitizer presents. RocksDB will further detect # whether the binary is linked with jemalloc at runtime. ROCKSDB_OS_PREPROCESSOR_FLAGS += ([( "linux", ["-DROCKSDB_JEMALLOC"], )] if sanitizer == "" else []) ROCKSDB_OS_DEPS += ([( "linux", ["third-party//jemalloc:headers"], )] if sanitizer == "" else []) ROCKSDB_LIB_DEPS = [ ":rocksdb_lib", ":rocksdb_test_lib", ] if not is_opt_mode else [":rocksdb_lib"] """ library_template = """ cpp_library( name = "{name}", srcs = [{srcs}], {headers_attr_prefix}headers = {headers}, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [{deps}], external_deps = ROCKSDB_EXTERNAL_DEPS{extra_external_deps}, ) """ rocksdb_library_template = """ cpp_library( name = "{name}", srcs = [{srcs}], {headers_attr_prefix}headers = {headers}, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = ROCKSDB_LIB_DEPS, external_deps = ROCKSDB_EXTERNAL_DEPS, ) """ binary_template = """ cpp_binary( name = "%s", srcs = [%s], arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [%s], external_deps = ROCKSDB_EXTERNAL_DEPS, ) """ test_cfg_template = """ [ "%s", "%s", "%s", %s, %s, ], """ unittests_template = """ # [test_name, test_src, test_type, extra_deps, extra_compiler_flags] ROCKS_TESTS = [ %s] # Generate a test rule for each entry in ROCKS_TESTS # Do not build the tests in opt mode, since SyncPoint and other test code # will not be included. [ cpp_unittest( name = test_name, srcs = [test_cc], arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, deps = [":rocksdb_test_lib"] + extra_deps, external_deps = ROCKSDB_EXTERNAL_DEPS + [ ("googletest", None, "gtest"), ], ) for test_name, test_cc, parallelism, extra_deps, extra_compiler_flags in ROCKS_TESTS if not is_opt_mode ] """ rocksdb-6.11.4/buckifier/util.py000066400000000000000000000070751370372246700165500ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. """ This module keeps commonly used components. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals try: from builtins import object except ImportError: from __builtin__ import object import subprocess import sys import os import time class ColorString(object): """ Generate colorful strings on terminal """ HEADER = '\033[95m' BLUE = '\033[94m' GREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' @staticmethod def _make_color_str(text, color): # In Python2, default encoding for unicode string is ASCII if sys.version_info.major <= 2: return "".join( [color, text.encode('utf-8'), ColorString.ENDC]) # From Python3, default encoding for unicode string is UTF-8 return "".join( [color, text, ColorString.ENDC]) @staticmethod def ok(text): if ColorString.is_disabled: return text return ColorString._make_color_str(text, ColorString.GREEN) @staticmethod def info(text): if ColorString.is_disabled: return text return ColorString._make_color_str(text, ColorString.BLUE) @staticmethod def header(text): if ColorString.is_disabled: return text return ColorString._make_color_str(text, ColorString.HEADER) @staticmethod def error(text): if ColorString.is_disabled: return text return ColorString._make_color_str(text, ColorString.FAIL) @staticmethod def warning(text): if ColorString.is_disabled: return text return ColorString._make_color_str(text, ColorString.WARNING) is_disabled = False def run_shell_command(shell_cmd, cmd_dir=None): """ Run a single shell command. @returns a tuple of shell command return code, stdout, stderr """ if cmd_dir is not None and not os.path.exists(cmd_dir): run_shell_command("mkdir -p %s" % cmd_dir) start = time.time() print("\t>>> Running: " + shell_cmd) p = subprocess.Popen(shell_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cmd_dir) stdout, stderr = p.communicate() end = time.time() # Report time if we spent more than 5 minutes executing a command execution_time = end - start if execution_time > (60 * 5): mins = (execution_time / 60) secs = (execution_time % 60) print("\t>time spent: %d minutes %d seconds" % (mins, secs)) return p.returncode, stdout, stderr def run_shell_commands(shell_cmds, cmd_dir=None, verbose=False): """ Execute a sequence of shell commands, which is equivalent to running `cmd1 && cmd2 && cmd3` @returns boolean indication if all commands succeeds. """ if cmd_dir: print("\t=== Set current working directory => %s" % cmd_dir) for shell_cmd in shell_cmds: ret_code, stdout, stderr = run_shell_command(shell_cmd, cmd_dir) if stdout: if verbose or ret_code != 0: print(ColorString.info("stdout: \n"), stdout) if stderr: # contents in stderr is not necessarily to be error messages. if verbose or ret_code != 0: print(ColorString.error("stderr: \n"), stderr) if ret_code != 0: return False return True rocksdb-6.11.4/build_tools/000077500000000000000000000000001370372246700155645ustar00rootroot00000000000000rocksdb-6.11.4/build_tools/amalgamate.py000077500000000000000000000112441370372246700202340ustar00rootroot00000000000000#!/usr/bin/python # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # amalgamate.py creates an amalgamation from a unity build. # It can be run with either Python 2 or 3. # An amalgamation consists of a header that includes the contents of all public # headers and a source file that includes the contents of all source files and # private headers. # # This script works by starting with the unity build file and recursively expanding # #include directives. If the #include is found in a public include directory, # that header is expanded into the amalgamation header. # # A particular header is only expanded once, so this script will # break if there are multiple inclusions of the same header that are expected to # expand differently. Similarly, this type of code causes issues: # # #ifdef FOO # #include "bar.h" # // code here # #else # #include "bar.h" // oops, doesn't get expanded # // different code here # #endif # # The solution is to move the include out of the #ifdef. from __future__ import print_function import argparse from os import path import re import sys include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$') included = set() excluded = set() def find_header(name, abs_path, include_paths): samedir = path.join(path.dirname(abs_path), name) if path.exists(samedir): return samedir for include_path in include_paths: include_path = path.join(include_path, name) if path.exists(include_path): return include_path return None def expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths): if include_path in included: return False included.add(include_path) with open(include_path) as f: print('#line 1 "{}"'.format(include_path), file=source_out) process_file(f, include_path, source_out, header_out, include_paths, public_include_paths) return True def process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths): for (line, text) in enumerate(f): m = include_re.match(text) if m: filename = m.groups()[0] # first check private headers include_path = find_header(filename, abs_path, include_paths) if include_path: if include_path in excluded: source_out.write(text) expanded = False else: expanded = expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths) else: # now try public headers include_path = find_header(filename, abs_path, public_include_paths) if include_path: # found public header expanded = False if include_path in excluded: source_out.write(text) else: expand_include(include_path, f, abs_path, header_out, None, public_include_paths, []) else: sys.exit("unable to find {}, included in {} on line {}".format(filename, abs_path, line)) if expanded: print('#line {} "{}"'.format(line+1, abs_path), file=source_out) elif text != "#pragma once\n": source_out.write(text) def main(): parser = argparse.ArgumentParser(description="Transform a unity build into an amalgamation") parser.add_argument("source", help="source file") parser.add_argument("-I", action="append", dest="include_paths", help="include paths for private headers") parser.add_argument("-i", action="append", dest="public_include_paths", help="include paths for public headers") parser.add_argument("-x", action="append", dest="excluded", help="excluded header files") parser.add_argument("-o", dest="source_out", help="output C++ file", required=True) parser.add_argument("-H", dest="header_out", help="output C++ header file", required=True) args = parser.parse_args() include_paths = list(map(path.abspath, args.include_paths or [])) public_include_paths = list(map(path.abspath, args.public_include_paths or [])) excluded.update(map(path.abspath, args.excluded or [])) filename = args.source abs_path = path.abspath(filename) with open(filename) as f, open(args.source_out, 'w') as source_out, open(args.header_out, 'w') as header_out: print('#line 1 "{}"'.format(filename), file=source_out) print('#include "{}"'.format(header_out.name), file=source_out) process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths) if __name__ == "__main__": main() rocksdb-6.11.4/build_tools/build_detect_platform000077500000000000000000000643301370372246700220530ustar00rootroot00000000000000#!/usr/bin/env bash # # Detects OS we're compiling on and outputs a file specified by the first # argument, which in turn gets read while processing Makefile. # # The output will set the following variables: # CC C Compiler path # CXX C++ Compiler path # PLATFORM_LDFLAGS Linker flags # JAVA_LDFLAGS Linker flags for RocksDBJava # JAVA_STATIC_LDFLAGS Linker flags for RocksDBJava static build # JAVAC_ARGS Arguments for javac # PLATFORM_SHARED_EXT Extension for shared libraries # PLATFORM_SHARED_LDFLAGS Flags for building shared library # PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library # PLATFORM_CCFLAGS C compiler flags # PLATFORM_CXXFLAGS C++ compiler flags. Will contain: # PLATFORM_SHARED_VERSIONED Set to 'true' if platform supports versioned # shared libraries, empty otherwise. # FIND Command for the find utility # WATCH Command for the watch utility # # The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following: # # -DROCKSDB_PLATFORM_POSIX if posix-platform based # -DSNAPPY if the Snappy library is present # -DLZ4 if the LZ4 library is present # -DZSTD if the ZSTD library is present # -DNUMA if the NUMA library is present # -DTBB if the TBB library is present # -DMEMKIND if the memkind library is present # # Using gflags in rocksdb: # Our project depends on gflags, which requires users to take some extra steps # before they can compile the whole repository: # 1. Install gflags. You may download it from here: # https://gflags.github.io/gflags/ (Mac users can `brew install gflags`) # 2. Once installed, add the include path for gflags to your CPATH env var and # the lib path to LIBRARY_PATH. If installed with default settings, the lib # will be /usr/local/lib and the include path will be /usr/local/include OUTPUT=$1 if test -z "$OUTPUT"; then echo "usage: $0 " >&2 exit 1 fi # we depend on C++11 PLATFORM_CXXFLAGS="-std=c++11" # we currently depend on POSIX platform COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX" # Default to fbcode gcc on internal fb machines if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then FBCODE_BUILD="true" # If we're compiling with TSAN we need pic build PIC_BUILD=$COMPILE_WITH_TSAN if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then # we need this to build with MySQL. Don't use for other purposes. source "$PWD/build_tools/fbcode_config4.8.1.sh" elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_5xx" ]; then source "$PWD/build_tools/fbcode_config.sh" else source "$PWD/build_tools/fbcode_config_platform007.sh" fi fi # Delete existing output, if it exists rm -f "$OUTPUT" touch "$OUTPUT" if test -z "$CC"; then if [ -x "$(command -v cc)" ]; then CC=cc elif [ -x "$(command -v clang)" ]; then CC=clang else CC=cc fi fi if test -z "$CXX"; then if [ -x "$(command -v g++)" ]; then CXX=g++ elif [ -x "$(command -v clang++)" ]; then CXX=clang++ else CXX=g++ fi fi # Detect OS if test -z "$TARGET_OS"; then TARGET_OS=`uname -s` fi if test -z "$TARGET_ARCHITECTURE"; then TARGET_ARCHITECTURE=`uname -m` fi if test -z "$CLANG_SCAN_BUILD"; then CLANG_SCAN_BUILD=scan-build fi if test -z "$CLANG_ANALYZER"; then CLANG_ANALYZER=$(command -v clang++ 2> /dev/null) fi if test -z "$FIND"; then FIND=find fi if test -z "$WATCH"; then WATCH=watch fi COMMON_FLAGS="$COMMON_FLAGS ${CFLAGS}" CROSS_COMPILE= PLATFORM_CCFLAGS= PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS" PLATFORM_SHARED_EXT="so" PLATFORM_SHARED_LDFLAGS="-Wl,--no-as-needed -shared -Wl,-soname -Wl," PLATFORM_SHARED_CFLAGS="-fPIC" PLATFORM_SHARED_VERSIONED=true # generic port files (working on all platform by #ifdef) go directly in /port GENERIC_PORT_FILES=`cd "$ROCKSDB_ROOT"; find port -name '*.cc' | tr "\n" " "` # On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp case "$TARGET_OS" in Darwin) PLATFORM=OS_MACOSX COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX" PLATFORM_SHARED_EXT=dylib PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " # PORT_FILES=port/darwin/darwin_specific.cc ;; IOS) PLATFORM=IOS COMMON_FLAGS="$COMMON_FLAGS -DOS_MACOSX -DIOS_CROSS_COMPILE -DROCKSDB_LITE" PLATFORM_SHARED_EXT=dylib PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " CROSS_COMPILE=true PLATFORM_SHARED_VERSIONED= ;; Linux) PLATFORM=OS_LINUX COMMON_FLAGS="$COMMON_FLAGS -DOS_LINUX" if [ -z "$USE_CLANG" ]; then COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" else PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" fi PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" if test $ROCKSDB_USE_IO_URING; then # check for liburing $CXX $CFLAGS -x c++ - -luring -o /dev/null 2>/dev/null < int main() { struct io_uring ring; io_uring_queue_init(1, &ring, 0); return 0; } EOF if [ "$?" = 0 ]; then PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -luring" COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_IOURING_PRESENT" fi fi if test -z "$USE_FOLLY_DISTRIBUTED_MUTEX"; then USE_FOLLY_DISTRIBUTED_MUTEX=1 fi # PORT_FILES=port/linux/linux_specific.cc ;; SunOS) PLATFORM=OS_SOLARIS COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS -m64" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -static-libstdc++ -static-libgcc -m64" # PORT_FILES=port/sunos/sunos_specific.cc ;; AIX) PLATFORM=OS_AIX CC=gcc COMMON_FLAGS="$COMMON_FLAGS -maix64 -pthread -fno-builtin-memcmp -D_REENTRANT -DOS_AIX -D__STDC_FORMAT_MACROS" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread -lpthread -lrt -maix64 -static-libstdc++ -static-libgcc" # PORT_FILES=port/aix/aix_specific.cc ;; FreeBSD) PLATFORM=OS_FREEBSD CXX=clang++ COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" # PORT_FILES=port/freebsd/freebsd_specific.cc ;; NetBSD) PLATFORM=OS_NETBSD COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lgcc_s" # PORT_FILES=port/netbsd/netbsd_specific.cc ;; OpenBSD) PLATFORM=OS_OPENBSD CXX=clang++ COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -pthread" # PORT_FILES=port/openbsd/openbsd_specific.cc FIND=gfind WATCH=gnuwatch ;; DragonFly) PLATFORM=OS_DRAGONFLYBSD COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" # PORT_FILES=port/dragonfly/dragonfly_specific.cc ;; Cygwin) PLATFORM=CYGWIN PLATFORM_SHARED_CFLAGS="" PLATFORM_CXXFLAGS="-std=gnu++11" COMMON_FLAGS="$COMMON_FLAGS -DCYGWIN" if [ -z "$USE_CLANG" ]; then COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" else PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" fi PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" # PORT_FILES=port/linux/linux_specific.cc ;; OS_ANDROID_CROSSCOMPILE) PLATFORM=OS_ANDROID COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DROCKSDB_PLATFORM_POSIX" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS " # All pthread features are in the Android C library # PORT_FILES=port/android/android.cc CROSS_COMPILE=true ;; *) echo "Unknown platform!" >&2 exit 1 esac PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}" JAVA_LDFLAGS="$PLATFORM_LDFLAGS" JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS" JAVAC_ARGS="-source 7" if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then # Cross-compiling; do not try any compilation tests. # Also don't need any compilation tests if compiling on fbcode if [ "$FBCODE_BUILD" = "true" ]; then # Enable backtrace on fbcode since the necessary libraries are present COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" fi true else if ! test $ROCKSDB_DISABLE_FALLOCATE; then # Test whether fallocate is available $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < #include int main() { int fd = open("/dev/null", 0); fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024); } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_FALLOCATE_PRESENT" fi fi if ! test $ROCKSDB_DISABLE_SNAPPY; then # Test whether Snappy library is installed # http://code.google.com/p/snappy/ $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lsnappy" JAVA_LDFLAGS="$JAVA_LDFLAGS -lsnappy" fi fi if ! test $ROCKSDB_DISABLE_GFLAGS; then # Test whether gflags library is installed # http://gflags.github.io/gflags/ # check if the namespace is gflags $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF #include int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" else # check if namespace is google $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF #include using namespace google; int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=google" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" fi fi fi if ! test $ROCKSDB_DISABLE_ZLIB; then # Test whether zlib library is installed $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DZLIB" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lz" JAVA_LDFLAGS="$JAVA_LDFLAGS -lz" fi fi if ! test $ROCKSDB_DISABLE_BZIP; then # Test whether bzip library is installed $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DBZIP2" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbz2" JAVA_LDFLAGS="$JAVA_LDFLAGS -lbz2" fi fi if ! test $ROCKSDB_DISABLE_LZ4; then # Test whether lz4 library is installed $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < #include int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DLZ4" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -llz4" JAVA_LDFLAGS="$JAVA_LDFLAGS -llz4" fi fi if ! test $ROCKSDB_DISABLE_ZSTD; then # Test whether zstd library is installed $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DZSTD" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lzstd" JAVA_LDFLAGS="$JAVA_LDFLAGS -lzstd" fi fi if ! test $ROCKSDB_DISABLE_NUMA; then # Test whether numa is available $CXX $CFLAGS -x c++ - -o /dev/null -lnuma 2>/dev/null < #include int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DNUMA" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lnuma" JAVA_LDFLAGS="$JAVA_LDFLAGS -lnuma" fi fi if ! test $ROCKSDB_DISABLE_TBB; then # Test whether tbb is available $CXX $CFLAGS $LDFLAGS -x c++ - -o /dev/null -ltbb 2>/dev/null < int main() {} EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DTBB" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltbb" JAVA_LDFLAGS="$JAVA_LDFLAGS -ltbb" fi fi if ! test $ROCKSDB_DISABLE_JEMALLOC; then # Test whether jemalloc is available if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null -ljemalloc \ 2>/dev/null; then # This will enable some preprocessor identifiers in the Makefile JEMALLOC=1 # JEMALLOC can be enabled either using the flag (like here) or by # providing direct link to the jemalloc library WITH_JEMALLOC_FLAG=1 # check for JEMALLOC installed with HomeBrew if [ "$PLATFORM" == "OS_MACOSX" ]; then if hash brew 2>/dev/null && brew ls --versions jemalloc > /dev/null; then JEMALLOC_VER=$(brew ls --versions jemalloc | tail -n 1 | cut -f 2 -d ' ') JEMALLOC_INCLUDE="-I/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/include" JEMALLOC_LIB="/usr/local/Cellar/jemalloc/${JEMALLOC_VER}/lib/libjemalloc_pic.a" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $JEMALLOC_LIB" JAVA_STATIC_LDFLAGS="$JAVA_STATIC_LDFLAGS $JEMALLOC_LIB" fi fi fi fi if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then # jemalloc is not available. Let's try tcmalloc if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null \ -ltcmalloc 2>/dev/null; then PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc" JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc" fi fi if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then # Test whether malloc_usable_size is available $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < int main() { size_t res = malloc_usable_size(0); (void)res; return 0; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_MALLOC_USABLE_SIZE" fi fi if ! test $ROCKSDB_DISABLE_MEMKIND; then # Test whether memkind library is installed $CXX $CFLAGS $COMMON_FLAGS -lmemkind -x c++ - -o /dev/null 2>/dev/null < int main() { memkind_malloc(MEMKIND_DAX_KMEM, 1024); return 0; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DMEMKIND" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lmemkind" JAVA_LDFLAGS="$JAVA_LDFLAGS -lmemkind" fi fi if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then # Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < int main() { int x = PTHREAD_MUTEX_ADAPTIVE_NP; (void)x; return 0; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX" fi fi if ! test $ROCKSDB_DISABLE_BACKTRACE; then # Test whether backtrace is available $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < int main() { void* frames[1]; backtrace_symbols(frames, backtrace(frames, 1)); return 0; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" else # Test whether execinfo library is installed $CXX $CFLAGS -lexecinfo -x c++ - -o /dev/null 2>/dev/null < int main() { void* frames[1]; backtrace_symbols(frames, backtrace(frames, 1)); } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lexecinfo" JAVA_LDFLAGS="$JAVA_LDFLAGS -lexecinfo" fi fi fi if ! test $ROCKSDB_DISABLE_PG; then # Test if -pg is supported $CXX $CFLAGS -pg -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() { int fd = open("/dev/null", 0); sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE); } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_RANGESYNC_PRESENT" fi fi if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then # Test whether sched_getcpu is supported $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < int main() { int cpuid = sched_getcpu(); (void)cpuid; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SCHED_GETCPU_PRESENT" fi fi if ! test $ROCKSDB_DISABLE_AUXV_GETAUXVAL; then # Test whether getauxval is supported $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < int main() { uint64_t auxv = getauxval(AT_HWCAP); (void)auxv; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_AUXV_GETAUXVAL_PRESENT" fi fi if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then # Test whether c++17 aligned-new is supported $CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o /dev/null 2>/dev/null </dev/null <&2 exit 1 fi HDFS_CCFLAGS="$HDFS_CCFLAGS -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -DUSE_HDFS -I$HADOOP_HOME/include" HDFS_LDFLAGS="$HDFS_LDFLAGS -lhdfs -L$JAVA_HOME/jre/lib/amd64 -L$HADOOP_HOME/lib/native" HDFS_LDFLAGS="$HDFS_LDFLAGS -L$JAVA_HOME/jre/lib/amd64/server -L$GLIBC_RUNTIME_PATH/lib" HDFS_LDFLAGS="$HDFS_LDFLAGS -ldl -lverify -ljava -ljvm" COMMON_FLAGS="$COMMON_FLAGS $HDFS_CCFLAGS" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $HDFS_LDFLAGS" JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS" fi if test "0$PORTABLE" -eq 0; then if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then # Tune for this POWER processor, treating '+' models as base models POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+` COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER " elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then COMMON_FLAGS="$COMMON_FLAGS -march=z10 " elif test -n "`echo $TARGET_ARCHITECTURE | grep -e^arm -e^aarch64`"; then # TODO: Handle this with approprite options. COMMON_FLAGS="$COMMON_FLAGS" elif test -n "`echo $TARGET_ARCHITECTURE | grep ^aarch64`"; then COMMON_FLAGS="$COMMON_FLAGS" elif [ "$TARGET_OS" == "IOS" ]; then COMMON_FLAGS="$COMMON_FLAGS" elif [ "$TARGET_OS" == "AIX" ] || [ "$TARGET_OS" == "SunOS" ]; then # TODO: Not sure why we don't use -march=native on these OSes if test "$USE_SSE"; then TRY_SSE_ETC="1" fi else COMMON_FLAGS="$COMMON_FLAGS -march=native " fi else # PORTABLE=1 if test "$USE_SSE"; then TRY_SSE_ETC="1" fi fi if test "$TRY_SSE_ETC"; then # The USE_SSE flag now means "attempt to compile with widely-available # Intel architecture extensions utilized by specific optimizations in the # source code." It's a qualifier on PORTABLE=1 that means "mostly portable." # It doesn't even really check that your current CPU is compatible. # # SSE4.2 available since nehalem, ca. 2008-2010 TRY_SSE42="-msse4.2" # PCLMUL available since westmere, ca. 2010-2011 TRY_PCLMUL="-mpclmul" # AVX2 available since haswell, ca. 2013-2015 TRY_AVX2="-mavx2" fi $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null < #include int main() { volatile uint32_t x = _mm_crc32_u32(0, 0); (void)x; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS $TRY_SSE42 -DHAVE_SSE42" elif test "$USE_SSE"; then echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" >&2 fi $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o /dev/null 2>/dev/null < #include int main() { const auto a = _mm_set_epi64x(0, 0); const auto b = _mm_set_epi64x(0, 0); const auto c = _mm_clmulepi64_si128(a, b, 0x00); auto d = _mm_cvtsi128_si64(c); (void)d; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS $TRY_PCLMUL -DHAVE_PCLMUL" elif test "$USE_SSE"; then echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling" >&2 fi $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o /dev/null 2>/dev/null < #include int main() { const auto a = _mm256_setr_epi32(0, 1, 2, 3, 4, 7, 6, 5); const auto b = _mm256_permutevar8x32_epi32(a, a); (void)b; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS $TRY_AVX2 -DHAVE_AVX2" elif test "$USE_SSE"; then echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2 fi $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < int main() { uint64_t a = 0xffffFFFFffffFFFF; __uint128_t b = __uint128_t(a) * a; a = static_cast(b >> 64); (void)a; } EOF if [ "$?" = 0 ]; then COMMON_FLAGS="$COMMON_FLAGS -DHAVE_UINT128_EXTENSION" fi # iOS doesn't support thread-local storage, but this check would erroneously # succeed because the cross-compiler flags are added by the Makefile, not this # script. if [ "$PLATFORM" != IOS ]; then $CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null </dev/null if [ "$?" = 0 ]; then EXEC_LDFLAGS+="-ldl" rm -f test_dl.o fi fi fi PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS" PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS" VALGRIND_VER="$VALGRIND_VER" ROCKSDB_MAJOR=`build_tools/version.sh major` ROCKSDB_MINOR=`build_tools/version.sh minor` ROCKSDB_PATCH=`build_tools/version.sh patch` echo "CC=$CC" >> "$OUTPUT" echo "CXX=$CXX" >> "$OUTPUT" echo "PLATFORM=$PLATFORM" >> "$OUTPUT" echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> "$OUTPUT" echo "JAVA_LDFLAGS=$JAVA_LDFLAGS" >> "$OUTPUT" echo "JAVA_STATIC_LDFLAGS=$JAVA_STATIC_LDFLAGS" >> "$OUTPUT" echo "JAVAC_ARGS=$JAVAC_ARGS" >> "$OUTPUT" echo "VALGRIND_VER=$VALGRIND_VER" >> "$OUTPUT" echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> "$OUTPUT" echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> "$OUTPUT" echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> "$OUTPUT" echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> "$OUTPUT" echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> "$OUTPUT" echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> "$OUTPUT" echo "EXEC_LDFLAGS=$EXEC_LDFLAGS" >> "$OUTPUT" echo "JEMALLOC_INCLUDE=$JEMALLOC_INCLUDE" >> "$OUTPUT" echo "JEMALLOC_LIB=$JEMALLOC_LIB" >> "$OUTPUT" echo "ROCKSDB_MAJOR=$ROCKSDB_MAJOR" >> "$OUTPUT" echo "ROCKSDB_MINOR=$ROCKSDB_MINOR" >> "$OUTPUT" echo "ROCKSDB_PATCH=$ROCKSDB_PATCH" >> "$OUTPUT" echo "CLANG_SCAN_BUILD=$CLANG_SCAN_BUILD" >> "$OUTPUT" echo "CLANG_ANALYZER=$CLANG_ANALYZER" >> "$OUTPUT" echo "PROFILING_FLAGS=$PROFILING_FLAGS" >> "$OUTPUT" echo "FIND=$FIND" >> "$OUTPUT" echo "WATCH=$WATCH" >> "$OUTPUT" # This will enable some related identifiers for the preprocessor if test -n "$JEMALLOC"; then echo "JEMALLOC=1" >> "$OUTPUT" fi # Indicates that jemalloc should be enabled using -ljemalloc flag # The alternative is to porvide a direct link to the library via JEMALLOC_LIB # and JEMALLOC_INCLUDE if test -n "$WITH_JEMALLOC_FLAG"; then echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT" fi echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT" if test -n "$USE_FOLLY_DISTRIBUTED_MUTEX"; then echo "USE_FOLLY_DISTRIBUTED_MUTEX=$USE_FOLLY_DISTRIBUTED_MUTEX" >> "$OUTPUT" fi rocksdb-6.11.4/build_tools/dependencies.sh000066400000000000000000000041251370372246700205500ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/5.x/centos7-native/c447969 CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/1bd23f9917738974ad0ff305aa23eb5f93f18305/9.0.0/centos7-native/c9f9104 LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/5.x/gcc-5-glibc-2.23/339d858 GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.23/gcc-5-glibc-2.23/ca1d1c0 SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/gcc-5-glibc-2.23/9bc6787 ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/gcc-5-glibc-2.23/9bc6787 BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/gcc-5-glibc-2.23/9bc6787 LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/gcc-5-glibc-2.23/9bc6787 ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/gcc-5-glibc-2.23/03859b5 GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/gcc-5-glibc-2.23/9bc6787 JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/gcc-5-glibc-2.23/0c8f76d NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/gcc-5-glibc-2.23/9bc6787 LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/gcc-5-glibc-2.23/b443de1 TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/gcc-5-glibc-2.23/9bc6787 KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/4.0.9-36_fbk5_2933_gd092e3f/gcc-5-glibc-2.23/da39a3e BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/2e3cb7d119b3cea5f1e738cc13a1ac69f49eb875/2.29.1/centos7-native/da39a3e VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/gcc-5-glibc-2.23/9bc6787 LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.2.3/gcc-5-glibc-2.23/65372bd rocksdb-6.11.4/build_tools/dependencies_4.8.1.sh000066400000000000000000000042551370372246700213040ustar00rootroot00000000000000# shellcheck disable=SC2148 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. GCC_BASE=/mnt/gvfs/third-party2/gcc/cf7d14c625ce30bae1a4661c2319c5a283e4dd22/4.8.1/centos6-native/cc6c9dc CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/8598c375b0e94e1448182eb3df034704144a838d/stable/centos6-native/3f16ddd LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/d6e0a7da6faba45f5e5b1638f9edd7afc2f34e7d/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc GLIBC_BASE=/mnt/gvfs/third-party2/glibc/d282e6e8f3d20f4e40a516834847bdc038e07973/2.17/gcc-4.8.1-glibc-2.17/99df8fc SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/8c38a4c1e52b4c2cc8a9cdc31b9c947ed7dbfcb4/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a ZLIB_BASE=/mnt/gvfs/third-party2/zlib/0882df3713c7a84f15abe368dc004581f20b39d7/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/740325875f6729f42d28deaa2147b0854f3a347e/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a LZ4_BASE=/mnt/gvfs/third-party2/lz4/0e790b441e2d9acd68d51e1d2e028f88c6a79ddf/r131/gcc-4.8.1-glibc-2.17/c3f970a ZSTD_BASE=/mnt/gvfs/third-party2/zstd/9455f75ff7f4831dc9fda02a6a0f8c68922fad8f/1.0.0/gcc-4.8.1-glibc-2.17/c3f970a GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/f001a51b2854957676d07306ef3abf67186b5c8b/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/fc8a13ca1fffa4d0765c716c5a0b49f0c107518f/master/gcc-4.8.1-glibc-2.17/8d31e51 NUMA_BASE=/mnt/gvfs/third-party2/numa/17c514c4d102a25ca15f4558be564eeed76f4b6a/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/ad576de2a1ea560c4d3434304f0fc4e079bede42/trunk/gcc-4.8.1-glibc-2.17/675d945 TBB_BASE=/mnt/gvfs/third-party2/tbb/9d9a554877d0c5bef330fe818ab7178806dd316a/4.0_update2/gcc-4.8.1-glibc-2.17/c3f970a KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/7c111ff27e0c466235163f00f280a9d617c3d2ec/4.0.9-36_fbk5_2933_gd092e3f/gcc-4.8.1-glibc-2.17/da39a3e BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/b7fd454c4b10c6a81015d4524ed06cdeab558490/2.26/centos6-native/da39a3e VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d7f4d4d86674a57668e3a96f76f0e17dd0eb8765/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a LUA_BASE=/mnt/gvfs/third-party2/lua/61e4abf5813bbc39bc4f548757ccfcadde175a48/5.2.3/centos6-native/730f94e rocksdb-6.11.4/build_tools/dependencies_platform007.sh000066400000000000000000000041401370372246700227000ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6 CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/ LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413 SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614 GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002 NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9 TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d LIBURING_BASE=/mnt/gvfs/third-party2/liburing/79427253fd0d42677255aacfe6d13bfe63f752eb/20190828/platform007/ca4da3d KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614 VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832 rocksdb-6.11.4/build_tools/dockerbuild.sh000077500000000000000000000002241370372246700204100ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. docker run -v $PWD:/rocks -w /rocks buildpack-deps make rocksdb-6.11.4/build_tools/error_filter.py000066400000000000000000000151451370372246700206420ustar00rootroot00000000000000# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. # This source code is licensed under both the GPLv2 (found in the # COPYING file in the root directory) and Apache 2.0 License # (found in the LICENSE.Apache file in the root directory). '''Filter for error messages in test output: - Receives merged stdout/stderr from test on stdin - Finds patterns of known error messages for test name (first argument) - Prints those error messages to stdout ''' from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import re import sys class ErrorParserBase(object): def parse_error(self, line): '''Parses a line of test output. If it contains an error, returns a formatted message describing the error; otherwise, returns None. Subclasses must override this method. ''' raise NotImplementedError class GTestErrorParser(ErrorParserBase): '''A parser that remembers the last test that began running so it can print that test's name upon detecting failure. ''' _GTEST_NAME_PATTERN = re.compile(r'\[ RUN \] (\S+)$') # format: ':: Failure' _GTEST_FAIL_PATTERN = re.compile(r'(unknown file|\S+:\d+): Failure$') def __init__(self): self._last_gtest_name = 'Unknown test' def parse_error(self, line): gtest_name_match = self._GTEST_NAME_PATTERN.match(line) if gtest_name_match: self._last_gtest_name = gtest_name_match.group(1) return None gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line) if gtest_fail_match: return '%s failed: %s' % ( self._last_gtest_name, gtest_fail_match.group(1)) return None class MatchErrorParser(ErrorParserBase): '''A simple parser that returns the whole line if it matches the pattern. ''' def __init__(self, pattern): self._pattern = re.compile(pattern) def parse_error(self, line): if self._pattern.match(line): return line return None class CompilerErrorParser(MatchErrorParser): def __init__(self): # format (compile error): # '::: error: ' # format (link error): # ':: error: ' # The below regex catches both super(CompilerErrorParser, self).__init__(r'\S+:\d+: error:') class ScanBuildErrorParser(MatchErrorParser): def __init__(self): super(ScanBuildErrorParser, self).__init__( r'scan-build: \d+ bugs found.$') class DbCrashErrorParser(MatchErrorParser): def __init__(self): super(DbCrashErrorParser, self).__init__(r'\*\*\*.*\^$|TEST FAILED.') class WriteStressErrorParser(MatchErrorParser): def __init__(self): super(WriteStressErrorParser, self).__init__( r'ERROR: write_stress died with exitcode=\d+') class AsanErrorParser(MatchErrorParser): def __init__(self): super(AsanErrorParser, self).__init__( r'==\d+==ERROR: AddressSanitizer:') class UbsanErrorParser(MatchErrorParser): def __init__(self): # format: '::: runtime error: ' super(UbsanErrorParser, self).__init__(r'\S+:\d+:\d+: runtime error:') class ValgrindErrorParser(MatchErrorParser): def __init__(self): # just grab the summary, valgrind doesn't clearly distinguish errors # from other log messages. super(ValgrindErrorParser, self).__init__(r'==\d+== ERROR SUMMARY:') class CompatErrorParser(MatchErrorParser): def __init__(self): super(CompatErrorParser, self).__init__(r'==== .*[Ee]rror.* ====$') class TsanErrorParser(MatchErrorParser): def __init__(self): super(TsanErrorParser, self).__init__(r'WARNING: ThreadSanitizer:') _TEST_NAME_TO_PARSERS = { 'punit': [CompilerErrorParser, GTestErrorParser], 'unit': [CompilerErrorParser, GTestErrorParser], 'release': [CompilerErrorParser, GTestErrorParser], 'unit_481': [CompilerErrorParser, GTestErrorParser], 'release_481': [CompilerErrorParser, GTestErrorParser], 'clang_unit': [CompilerErrorParser, GTestErrorParser], 'clang_release': [CompilerErrorParser, GTestErrorParser], 'clang_analyze': [CompilerErrorParser, ScanBuildErrorParser], 'code_cov': [CompilerErrorParser, GTestErrorParser], 'unity': [CompilerErrorParser, GTestErrorParser], 'lite': [CompilerErrorParser], 'lite_test': [CompilerErrorParser, GTestErrorParser], 'stress_crash': [CompilerErrorParser, DbCrashErrorParser], 'stress_crash_with_atomic_flush': [CompilerErrorParser, DbCrashErrorParser], 'stress_crash_with_txn': [CompilerErrorParser, DbCrashErrorParser], 'write_stress': [CompilerErrorParser, WriteStressErrorParser], 'asan': [CompilerErrorParser, GTestErrorParser, AsanErrorParser], 'asan_crash': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], 'asan_crash_with_atomic_flush': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], 'asan_crash_with_txn': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser], 'ubsan': [CompilerErrorParser, GTestErrorParser, UbsanErrorParser], 'ubsan_crash': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], 'ubsan_crash_with_atomic_flush': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], 'ubsan_crash_with_txn': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser], 'valgrind': [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser], 'tsan': [CompilerErrorParser, GTestErrorParser, TsanErrorParser], 'format_compatible': [CompilerErrorParser, CompatErrorParser], 'run_format_compatible': [CompilerErrorParser, CompatErrorParser], 'no_compression': [CompilerErrorParser, GTestErrorParser], 'run_no_compression': [CompilerErrorParser, GTestErrorParser], 'regression': [CompilerErrorParser], 'run_regression': [CompilerErrorParser], } def main(): if len(sys.argv) != 2: return 'Usage: %s ' % sys.argv[0] test_name = sys.argv[1] if test_name not in _TEST_NAME_TO_PARSERS: return 'Unknown test name: %s' % test_name error_parsers = [] for parser_cls in _TEST_NAME_TO_PARSERS[test_name]: error_parsers.append(parser_cls()) for line in sys.stdin: line = line.strip() for error_parser in error_parsers: error_msg = error_parser.parse_error(line) if error_msg is not None: print(error_msg) if __name__ == '__main__': sys.exit(main()) rocksdb-6.11.4/build_tools/fb_compile_mongo.sh000077500000000000000000000027751370372246700214340ustar00rootroot00000000000000#!/bin/sh # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # fail early set -e if test -z $ROCKSDB_PATH; then ROCKSDB_PATH=~/rocksdb fi source $ROCKSDB_PATH/build_tools/fbcode_config4.8.1.sh EXTRA_LDFLAGS="" if test -z $ALLOC; then # default ALLOC=tcmalloc elif [[ $ALLOC == "jemalloc" ]]; then ALLOC=system EXTRA_LDFLAGS+=" -Wl,--whole-archive $JEMALLOC_LIB -Wl,--no-whole-archive" fi # we need to force mongo to use static library, not shared STATIC_LIB_DEP_DIR='build/static_library_dependencies' test -d $STATIC_LIB_DEP_DIR || mkdir $STATIC_LIB_DEP_DIR test -h $STATIC_LIB_DEP_DIR/`basename $SNAPPY_LIBS` || ln -s $SNAPPY_LIBS $STATIC_LIB_DEP_DIR test -h $STATIC_LIB_DEP_DIR/`basename $LZ4_LIBS` || ln -s $LZ4_LIBS $STATIC_LIB_DEP_DIR EXTRA_LDFLAGS+=" -L $STATIC_LIB_DEP_DIR" set -x EXTRA_CMD="" if ! test -e version.json; then # this is Mongo 3.0 EXTRA_CMD="--rocksdb \ --variant-dir=linux2/norm --cxx=${CXX} \ --cc=${CC} \ --use-system-zlib" # add this line back to normal code path # when https://jira.mongodb.org/browse/SERVER-19123 is resolved fi scons \ LINKFLAGS="$EXTRA_LDFLAGS $EXEC_LDFLAGS $PLATFORM_LDFLAGS" \ CCFLAGS="$CXXFLAGS -L $STATIC_LIB_DEP_DIR" \ LIBS="lz4 gcc stdc++" \ LIBPATH="$ROCKSDB_PATH" \ CPPPATH="$ROCKSDB_PATH/include" \ -j32 \ --allocator=$ALLOC \ --nostrip \ --opt=on \ --disable-minimum-compiler-version-enforcement \ --use-system-snappy \ --disable-warnings-as-errors \ $EXTRA_CMD $* rocksdb-6.11.4/build_tools/fbcode_config.sh000066400000000000000000000112731370372246700206730ustar00rootroot00000000000000#!/bin/sh # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # # Set environment variables so that we can compile rocksdb using # fbcode settings. It uses the latest g++ and clang compilers and also # uses jemalloc # Environment variables that change the behavior of this script: # PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included BASEDIR=`dirname $BASH_SOURCE` source "$BASEDIR/dependencies.sh" CFLAGS="" # libgcc LIBGCC_INCLUDE="$LIBGCC_BASE/include" LIBGCC_LIBS=" -L $LIBGCC_BASE/lib" # glibc GLIBC_INCLUDE="$GLIBC_BASE/include" GLIBC_LIBS=" -L $GLIBC_BASE/lib" # snappy SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/" if test -z $PIC_BUILD; then SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" else SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a" fi CFLAGS+=" -DSNAPPY" if test -z $PIC_BUILD; then # location of zlib headers and libraries ZLIB_INCLUDE=" -I $ZLIB_BASE/include/" ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" CFLAGS+=" -DZLIB" # location of bzip headers and libraries BZIP_INCLUDE=" -I $BZIP2_BASE/include/" BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a" CFLAGS+=" -DBZIP2" LZ4_INCLUDE=" -I $LZ4_BASE/include/" LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" CFLAGS+=" -DLZ4" fi ZSTD_INCLUDE=" -I $ZSTD_BASE/include/" if test -z $PIC_BUILD; then ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" else ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a" fi CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY" # location of gflags headers and libraries GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" if test -z $PIC_BUILD; then GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" else GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a" fi CFLAGS+=" -DGFLAGS=gflags" # location of jemalloc JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/" JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a" if test -z $PIC_BUILD; then # location of numa NUMA_INCLUDE=" -I $NUMA_BASE/include/" NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" CFLAGS+=" -DNUMA" # location of libunwind LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" fi # location of TBB TBB_INCLUDE=" -isystem $TBB_BASE/include/" if test -z $PIC_BUILD; then TBB_LIBS="$TBB_BASE/lib/libtbb.a" else TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a" fi CFLAGS+=" -DTBB" test "$USE_SSE" || USE_SSE=1 export USE_SSE test "$PORTABLE" || PORTABLE=1 export PORTABLE BINUTILS="$BINUTILS_BASE/bin" AR="$BINUTILS/ar" DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE" STDLIBS="-L $GCC_BASE/lib64" CLANG_BIN="$CLANG_BASE/bin" CLANG_LIB="$CLANG_BASE/lib" CLANG_SRC="$CLANG_BASE/../../src" CLANG_ANALYZER="$CLANG_BIN/clang++" CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build" if [ -z "$USE_CLANG" ]; then # gcc CC="$GCC_BASE/bin/gcc" CXX="$GCC_BASE/bin/g++" CFLAGS+=" -B$BINUTILS/gold" CFLAGS+=" -isystem $GLIBC_INCLUDE" CFLAGS+=" -isystem $LIBGCC_INCLUDE" JEMALLOC=1 else # clang CLANG_INCLUDE="$CLANG_LIB/clang/stable/include" CC="$CLANG_BIN/clang" CXX="$CLANG_BIN/clang++" KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include" CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib" CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x " CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x/x86_64-facebook-linux " CFLAGS+=" -isystem $GLIBC_INCLUDE" CFLAGS+=" -isystem $LIBGCC_INCLUDE" CFLAGS+=" -isystem $CLANG_INCLUDE" CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " CFLAGS+=" -Wno-expansion-to-defined " CXXFLAGS="-nostdinc++" fi CFLAGS+=" $DEPS_INCLUDE" CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42" CXXFLAGS+=" $CFLAGS" EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS" EXEC_LDFLAGS+=" -B$BINUTILS/gold" EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-5-glibc-2.23/lib/ld.so" EXEC_LDFLAGS+=" $LIBUNWIND" EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-5-glibc-2.23/lib" # required by libtbb EXEC_LDFLAGS+=" -ldl" PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS" VALGRIND_VER="$VALGRIND_BASE/bin/" LUA_PATH="$LUA_BASE" if test -z $PIC_BUILD; then LUA_LIB=" $LUA_PATH/lib/liblua.a" else LUA_LIB=" $LUA_PATH/lib/liblua_pic.a" fi USE_FOLLY_DISTRIBUTED_MUTEX=1 export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB rocksdb-6.11.4/build_tools/fbcode_config4.8.1.sh000066400000000000000000000071671370372246700212730ustar00rootroot00000000000000#!/bin/sh # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # # Set environment variables so that we can compile rocksdb using # fbcode settings. It uses the latest g++ compiler and also # uses jemalloc BASEDIR=`dirname $BASH_SOURCE` source "$BASEDIR/dependencies_4.8.1.sh" # location of libgcc LIBGCC_INCLUDE="$LIBGCC_BASE/include" LIBGCC_LIBS=" -L $LIBGCC_BASE/lib" # location of glibc GLIBC_INCLUDE="$GLIBC_BASE/include" GLIBC_LIBS=" -L $GLIBC_BASE/lib" # location of snappy headers and libraries SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include" SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" # location of zlib headers and libraries ZLIB_INCLUDE=" -I $ZLIB_BASE/include" ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" # location of bzip headers and libraries BZIP2_INCLUDE=" -I $BZIP2_BASE/include/" BZIP2_LIBS=" $BZIP2_BASE/lib/libbz2.a" LZ4_INCLUDE=" -I $LZ4_BASE/include" LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" ZSTD_INCLUDE=" -I $ZSTD_BASE/include" ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" # location of gflags headers and libraries GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" # location of jemalloc JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include" JEMALLOC_LIB="$JEMALLOC_BASE/lib/libjemalloc.a" # location of numa NUMA_INCLUDE=" -I $NUMA_BASE/include/" NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" # location of libunwind LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" # location of tbb TBB_INCLUDE=" -isystem $TBB_BASE/include/" TBB_LIBS="$TBB_BASE/lib/libtbb.a" test "$USE_SSE" || USE_SSE=1 export USE_SSE test "$PORTABLE" || PORTABLE=1 export PORTABLE BINUTILS="$BINUTILS_BASE/bin" AR="$BINUTILS/ar" DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP2_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE" STDLIBS="-L $GCC_BASE/lib64" if [ -z "$USE_CLANG" ]; then # gcc CC="$GCC_BASE/bin/gcc" CXX="$GCC_BASE/bin/g++" CFLAGS="-B$BINUTILS/gold -m64 -mtune=generic" CFLAGS+=" -isystem $GLIBC_INCLUDE" CFLAGS+=" -isystem $LIBGCC_INCLUDE" JEMALLOC=1 else # clang CLANG_BIN="$CLANG_BASE/bin" CLANG_LIB="$CLANG_BASE/lib" CLANG_INCLUDE="$CLANG_LIB/clang/*/include" CC="$CLANG_BIN/clang" CXX="$CLANG_BIN/clang++" KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include/" CFLAGS="-B$BINUTILS/gold -nostdinc -nostdlib" CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1 " CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1/x86_64-facebook-linux " CFLAGS+=" -isystem $GLIBC_INCLUDE" CFLAGS+=" -isystem $LIBGCC_INCLUDE" CFLAGS+=" -isystem $CLANG_INCLUDE" CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " CXXFLAGS="-nostdinc++" fi CFLAGS+=" $DEPS_INCLUDE" CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42" CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DZSTD -DNUMA -DTBB" CXXFLAGS+=" $CFLAGS" EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS" EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/ld.so" EXEC_LDFLAGS+=" $LIBUNWIND" EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib" # required by libtbb EXEC_LDFLAGS+=" -ldl" PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS" VALGRIND_VER="$VALGRIND_BASE/bin/" LUA_PATH="$LUA_BASE" export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE LUA_PATH rocksdb-6.11.4/build_tools/fbcode_config_platform007.sh000066400000000000000000000116611370372246700230270ustar00rootroot00000000000000#!/bin/sh # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # # Set environment variables so that we can compile rocksdb using # fbcode settings. It uses the latest g++ and clang compilers and also # uses jemalloc # Environment variables that change the behavior of this script: # PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included BASEDIR=`dirname $BASH_SOURCE` source "$BASEDIR/dependencies_platform007.sh" CFLAGS="" # libgcc LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0" LIBGCC_LIBS=" -L $LIBGCC_BASE/lib" # glibc GLIBC_INCLUDE="$GLIBC_BASE/include" GLIBC_LIBS=" -L $GLIBC_BASE/lib" # snappy SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/" if test -z $PIC_BUILD; then SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a" else SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a" fi CFLAGS+=" -DSNAPPY" if test -z $PIC_BUILD; then # location of zlib headers and libraries ZLIB_INCLUDE=" -I $ZLIB_BASE/include/" ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a" CFLAGS+=" -DZLIB" # location of bzip headers and libraries BZIP_INCLUDE=" -I $BZIP2_BASE/include/" BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a" CFLAGS+=" -DBZIP2" LZ4_INCLUDE=" -I $LZ4_BASE/include/" LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a" CFLAGS+=" -DLZ4" fi ZSTD_INCLUDE=" -I $ZSTD_BASE/include/" if test -z $PIC_BUILD; then ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a" else ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a" fi CFLAGS+=" -DZSTD" # location of gflags headers and libraries GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/" if test -z $PIC_BUILD; then GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a" else GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a" fi CFLAGS+=" -DGFLAGS=gflags" # location of jemalloc JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/" JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a" if test -z $PIC_BUILD; then # location of numa NUMA_INCLUDE=" -I $NUMA_BASE/include/" NUMA_LIB=" $NUMA_BASE/lib/libnuma.a" CFLAGS+=" -DNUMA" # location of libunwind LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a" fi # location of TBB TBB_INCLUDE=" -isystem $TBB_BASE/include/" if test -z $PIC_BUILD; then TBB_LIBS="$TBB_BASE/lib/libtbb.a" else TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a" fi CFLAGS+=" -DTBB" # location of LIBURING LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/" if test -z $PIC_BUILD; then LIBURING_LIBS="$LIBURING_BASE/lib/liburing.a" else LIBURING_LIBS="$LIBURING_BASE/lib/liburing_pic.a" fi CFLAGS+=" -DLIBURING" test "$USE_SSE" || USE_SSE=1 export USE_SSE test "$PORTABLE" || PORTABLE=1 export PORTABLE BINUTILS="$BINUTILS_BASE/bin" AR="$BINUTILS/ar" DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE" STDLIBS="-L $GCC_BASE/lib64" CLANG_BIN="$CLANG_BASE/bin" CLANG_LIB="$CLANG_BASE/lib" CLANG_SRC="$CLANG_BASE/../../src" CLANG_ANALYZER="$CLANG_BIN/clang++" CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build" if [ -z "$USE_CLANG" ]; then # gcc CC="$GCC_BASE/bin/gcc" CXX="$GCC_BASE/bin/g++" CFLAGS+=" -B$BINUTILS/gold" CFLAGS+=" -isystem $LIBGCC_INCLUDE" CFLAGS+=" -isystem $GLIBC_INCLUDE" JEMALLOC=1 else # clang CLANG_INCLUDE="$CLANG_LIB/clang/stable/include" CC="$CLANG_BIN/clang" CXX="$CLANG_BIN/clang++" KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include" CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib" CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x " CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux " CFLAGS+=" -isystem $GLIBC_INCLUDE" CFLAGS+=" -isystem $LIBGCC_INCLUDE" CFLAGS+=" -isystem $CLANG_INCLUDE" CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux " CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE " CFLAGS+=" -Wno-expansion-to-defined " CXXFLAGS="-nostdinc++" fi CFLAGS+=" $DEPS_INCLUDE" CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT" CXXFLAGS+=" $CFLAGS" EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS" EXEC_LDFLAGS+=" -B$BINUTILS/gold" EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so" EXEC_LDFLAGS+=" $LIBUNWIND" EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib" # required by libtbb EXEC_LDFLAGS+=" -ldl" PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++" EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS" VALGRIND_VER="$VALGRIND_BASE/bin/" # lua not supported because it's on track for deprecation, I think LUA_PATH= LUA_LIB= USE_FOLLY_DISTRIBUTED_MUTEX=1 export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB rocksdb-6.11.4/build_tools/format-diff.sh000077500000000000000000000156121370372246700203260ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # If clang_format_diff.py command is not specfied, we assume we are able to # access directly without any path. print_usage () { echo "Usage:" echo "format-diff.sh [OPTIONS]" echo "-c: check only." echo "-h: print this message." } while getopts ':ch' OPTION; do case "$OPTION" in c) CHECK_ONLY=1 ;; h) print_usage exit 1 ;; ?) print_usage exit 1 ;; esac done REPO_ROOT="$(git rev-parse --show-toplevel)" if [ "$CLANG_FORMAT_DIFF" ]; then echo "Note: CLANG_FORMAT_DIFF='$CLANG_FORMAT_DIFF'" # Dry run to confirm dependencies like argparse if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then true #Good else exit 128 fi else # First try directly executing the possibilities if clang-format-diff.py --help &> /dev/null < /dev/null; then CLANG_FORMAT_DIFF=clang-format-diff.py elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py else # This probably means we need to directly invoke the interpreter. # But first find clang-format-diff.py if [ -f "$REPO_ROOT/clang-format-diff.py" ]; then CFD_PATH="$REPO_ROOT/clang-format-diff.py" elif which clang-format-diff.py &> /dev/null; then CFD_PATH="$(which clang-format-diff.py)" else echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!" echo "You can download clang-format-diff.py by running: " echo " curl --location http://goo.gl/iUW1u2 -o ${CLANG_FORMAT_DIFF}" echo "You can download clang-format by running:" echo " brew install clang-format" echo " Or" echo " apt install clang-format" echo " This might work too:" echo " yum install git-clang-format" echo "Then, move both files (i.e. ${CLANG_FORMAT_DIFF} and clang-format) to some directory within PATH=${PATH}" echo "and make sure ${CLANG_FORMAT_DIFF} is executable." exit 128 fi # Check argparse pre-req on interpreter, or it will fail if echo import argparse | ${PYTHON:-python3}; then true # Good else echo "To run clang-format-diff.py, we'll need the library "argparse" to be" echo "installed. You can try either of the follow ways to install it:" echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse" echo " 2. easy_install argparse (if you have easy_install)" echo " 3. pip install argparse (if you have pip)" exit 129 fi # Unfortunately, some machines have a Python2 clang-format-diff.py # installed but only a Python3 interpreter installed. Rather than trying # different Python versions that might be installed, we can try migrating # the code to Python3 if it looks like Python2 if grep -q "print '" "$CFD_PATH" && \ ${PYTHON:-python3} --version | grep -q 'ython 3'; then if [ ! -f "$REPO_ROOT/.py3/clang-format-diff.py" ]; then echo "Migrating $CFD_PATH to Python3 in a hidden file" mkdir -p "$REPO_ROOT/.py3" ${PYTHON:-python3} -m lib2to3 -w -n -o "$REPO_ROOT/.py3" "$CFD_PATH" > /dev/null || exit 128 fi CFD_PATH="$REPO_ROOT/.py3/clang-format-diff.py" fi CLANG_FORMAT_DIFF="${PYTHON:-python3} $CFD_PATH" # This had better work after all those checks if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then true #Good else exit 128 fi fi fi # TODO(kailiu) following work is not complete since we still need to figure # out how to add the modified files done pre-commit hook to git's commit index. # # Check if this script has already been added to pre-commit hook. # Will suggest user to add this script to pre-commit hook if their pre-commit # is empty. # PRE_COMMIT_SCRIPT_PATH="`git rev-parse --show-toplevel`/.git/hooks/pre-commit" # if ! ls $PRE_COMMIT_SCRIPT_PATH &> /dev/null # then # echo "Would you like to add this script to pre-commit hook, which will do " # echo -n "the format check for all the affected lines before you check in (y/n):" # read add_to_hook # if [ "$add_to_hook" == "y" ] # then # ln -s `git rev-parse --show-toplevel`/build_tools/format-diff.sh $PRE_COMMIT_SCRIPT_PATH # fi # fi set -e uncommitted_code=`git diff HEAD` # If there's no uncommitted changes, we assume user are doing post-commit # format check, in which case we'll try to check the modified lines vs. the # facebook/rocksdb.git master branch. Otherwise, we'll check format of the # uncommitted code only. if [ -z "$uncommitted_code" ] then # Attempt to get name of facebook/rocksdb.git remote. [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)" # Fall back on 'origin' if that fails [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin # Use master branch from that remote [ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/master" # Get the common ancestor with that remote branch. Everything after that # common ancestor would be considered the contents of a pull request, so # should be relevant for formatting fixes. FORMAT_UPSTREAM_MERGE_BASE="$(git merge-base "$FORMAT_UPSTREAM" HEAD)" # Get the differences diffs=$(git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -p 1) else # Check the format of uncommitted lines, diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1) fi if [ -z "$diffs" ] then echo "Nothing needs to be reformatted!" exit 0 elif [ $CHECK_ONLY ] then echo "Your change has unformatted code. Please run make format!" if [ $VERBOSE_CHECK ]; then clang-format --version echo "$diffs" fi exit 1 fi # Highlight the insertion/deletion from the clang-format-diff.py's output COLOR_END="\033[0m" COLOR_RED="\033[0;31m" COLOR_GREEN="\033[0;32m" echo -e "Detect lines that doesn't follow the format rules:\r" # Add the color to the diff. lines added will be green; lines removed will be red. echo "$diffs" | sed -e "s/\(^-.*$\)/`echo -e \"$COLOR_RED\1$COLOR_END\"`/" | sed -e "s/\(^+.*$\)/`echo -e \"$COLOR_GREEN\1$COLOR_END\"`/" if [[ "$OPT" == *"-DTRAVIS"* ]] then exit 1 fi echo -e "Would you like to fix the format automatically (y/n): \c" # Make sure under any mode, we can read user input. exec < /dev/tty read to_fix if [ "$to_fix" != "y" ] then exit 1 fi # Do in-place format adjustment. if [ -z "$uncommitted_code" ] then git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -i -p 1 else git diff -U0 HEAD | $CLANG_FORMAT_DIFF -i -p 1 fi echo "Files reformatted!" # Amend to last commit if user do the post-commit format check if [ -z "$uncommitted_code" ]; then echo -e "Would you like to amend the changes to last commit (`git log HEAD --oneline | head -1`)? (y/n): \c" read to_amend if [ "$to_amend" == "y" ] then git commit -a --amend --reuse-message HEAD echo "Amended to last commit" fi fi rocksdb-6.11.4/build_tools/gnu_parallel000077500000000000000000007351141370372246700201720ustar00rootroot00000000000000#!/usr/bin/env perl # Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and # Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see # or write to the Free Software Foundation, Inc., 51 Franklin St, # Fifth Floor, Boston, MA 02110-1301 USA # open3 used in Job::start use IPC::Open3; # &WNOHANG used in reaper use POSIX qw(:sys_wait_h setsid ceil :errno_h); # gensym used in Job::start use Symbol qw(gensym); # tempfile used in Job::start use File::Temp qw(tempfile tempdir); # mkpath used in openresultsfile use File::Path; # GetOptions used in get_options_from_array use Getopt::Long; # Used to ensure code quality use strict; use File::Basename; if(not $ENV{HOME}) { # $ENV{HOME} is sometimes not set if called from PHP ::warning("\$HOME not set. Using /tmp\n"); $ENV{HOME} = "/tmp"; } save_stdin_stdout_stderr(); save_original_signal_handler(); parse_options(); ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); my $number_of_args; if($Global::max_number_of_args) { $number_of_args=$Global::max_number_of_args; } elsif ($opt::X or $opt::m or $opt::xargs) { $number_of_args = undef; } else { $number_of_args = 1; } my @command; @command = @ARGV; my @fhlist; if($opt::pipepart) { @fhlist = map { open_or_exit($_) } "/dev/null"; } else { @fhlist = map { open_or_exit($_) } @opt::a; if(not @fhlist and not $opt::pipe) { @fhlist = (*STDIN); } } if($opt::skip_first_line) { # Skip the first line for the first file handle my $fh = $fhlist[0]; <$fh>; } if($opt::header and not $opt::pipe) { my $fh = $fhlist[0]; # split with colsep or \t # $header force $colsep = \t if undef? my $delimiter = $opt::colsep; $delimiter ||= "\$"; my $id = 1; for my $fh (@fhlist) { my $line = <$fh>; chomp($line); ::debug("init", "Delimiter: '$delimiter'"); for my $s (split /$delimiter/o, $line) { ::debug("init", "Colname: '$s'"); # Replace {colname} with {2} # TODO accept configurable short hands # TODO how to deal with headers in {=...=} for(@command) { s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; } $Global::input_source_header{$id} = $s; $id++; } } } else { my $id = 1; for my $fh (@fhlist) { $Global::input_source_header{$id} = $id; $id++; } } if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { # Parallel check all hosts are up. Remove hosts that are down filter_hosts(); } if($opt::nonall or $opt::onall) { onall(@command); wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); } # TODO --transfer foo/./bar --cleanup # multiple --transfer and --basefile with different /./ $Global::JobQueue = JobQueue->new( \@command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files); if($opt::eta or $opt::bar) { # Count the number of jobs before starting any $Global::JobQueue->total_jobs(); } if($opt::pipepart) { @Global::cat_partials = map { pipe_part_files($_) } @opt::a; # Unget the command as many times as there are parts $Global::JobQueue->{'commandlinequeue'}->unget( map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials ); } for my $sshlogin (values %Global::host) { $sshlogin->max_jobs_running(); } init_run_jobs(); my $sem; if($Global::semaphore) { $sem = acquire_semaphore(); } $SIG{TERM} = \&start_no_new_jobs; start_more_jobs(); if(not $opt::pipepart) { if($opt::pipe) { spreadstdin(); } } ::debug("init", "Start draining\n"); drain_job_queue(); ::debug("init", "Done draining\n"); reaper(); ::debug("init", "Done reaping\n"); if($opt::pipe and @opt::a) { for my $job (@Global::tee_jobs) { unlink $job->fh(2,"name"); $job->set_fh(2,"name",""); $job->print(); unlink $job->fh(1,"name"); } } ::debug("init", "Cleaning\n"); cleanup(); if($Global::semaphore) { $sem->release(); } for(keys %Global::sshmaster) { kill "TERM", $_; } ::debug("init", "Halt\n"); if($opt::halt_on_error) { wait_and_exit($Global::halt_on_error_exitstatus); } else { wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); } sub __PIPE_MODE__ {} sub pipe_part_files { # Input: # $file = the file to read # Returns: # @commands that will cat_partial each part my ($file) = @_; my $buf = ""; my $header = find_header(\$buf,open_or_exit($file)); # find positions my @pos = find_split_positions($file,$opt::blocksize,length $header); # Make @cat_partials my @cat_partials = (); for(my $i=0; $i<$#pos; $i++) { push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); } # Remote exec should look like: # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\) # ssh -tt not allowed. Remote will die due to broken pipe anyway. # TODO test remote with --fifo / --cat return @cat_partials; } sub find_header { # Input: # $buf_ref = reference to read-in buffer # $fh = filehandle to read from # Uses: # $opt::header # $opt::blocksize # Returns: # $header string my ($buf_ref, $fh) = @_; my $header = ""; if($opt::header) { if($opt::header eq ":") { $opt::header = "(.*\n)"; } # Number = number of lines $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { if($$buf_ref=~s/^($opt::header)//) { $header = $1; last; } } } return $header; } sub find_split_positions { # Input: # $file = the file to read # $block = (minimal) --block-size of each chunk # $headerlen = length of header to be skipped # Uses: # $opt::recstart # $opt::recend # Returns: # @positions of block start/end my($file, $block, $headerlen) = @_; my $size = -s $file; $block = int $block; # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 # The optimal dd blocksize for freebsd = 2^15..2^17 my $dd_block_size = 131072; # 2^17 my @pos; my ($recstart,$recend) = recstartrecend(); my $recendrecstart = $recend.$recstart; my $fh = ::open_or_exit($file); push(@pos,$headerlen); for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { my $buf; seek($fh, $pos, 0) || die; while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { if($opt::regexp) { # If match /$recend$recstart/ => Record position if($buf =~ /(.*$recend)$recstart/os) { my $i = length($1); push(@pos,$pos+$i); # Start looking for next record _after_ this match $pos += $i; last; } } else { # If match $recend$recstart => Record position my $i = index($buf,$recendrecstart); if($i != -1) { push(@pos,$pos+$i); # Start looking for next record _after_ this match $pos += $i; last; } } } } push(@pos,$size); close $fh; return @pos; } sub cat_partial { # Input: # $file = the file to read # ($start, $end, [$start2, $end2, ...]) = start byte, end byte # Returns: # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout my($file, @start_end) = @_; my($start, $i); # Convert start_end to start_len my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; return "<". shell_quote_scalar($file) . q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . " @start_len"; } sub spreadstdin { # read a record # Spawn a job and print the record to it. # Uses: # $opt::blocksize # STDIN # $opr::r # $Global::max_lines # $Global::max_number_of_args # $opt::regexp # $Global::start_no_new_jobs # $opt::roundrobin # %Global::running my $buf = ""; my ($recstart,$recend) = recstartrecend(); my $recendrecstart = $recend.$recstart; my $chunk_number = 1; my $one_time_through; my $blocksize = $opt::blocksize; my $in = *STDIN; my $header = find_header(\$buf,$in); while(1) { my $anything_written = 0; if(not read($in,substr($buf,length $buf,0),$blocksize)) { # End-of-file $chunk_number != 1 and last; # Force the while-loop once if everything was read by header reading $one_time_through++ and last; } if($opt::r) { # Remove empty lines $buf =~ s/^\s*\n//gm; if(length $buf == 0) { next; } } if($Global::max_lines and not $Global::max_number_of_args) { # Read n-line records my $n_lines = $buf =~ tr/\n/\n/; my $last_newline_pos = rindex($buf,"\n"); while($n_lines % $Global::max_lines) { $n_lines--; $last_newline_pos = rindex($buf,"\n",$last_newline_pos-1); } # Chop at $last_newline_pos as that is where n-line record ends $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,$last_newline_pos+1); substr($buf,0,$last_newline_pos+1) = ""; } elsif($opt::regexp) { if($Global::max_number_of_args) { # -N => (start..*?end){n} # -L -N => (start..*?end){n*l} my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { # Copy to modifiable variable my $b = $1; $anything_written += write_record_to_pipe($chunk_number++,\$header,\$b, $recstart,$recend,length $1); } } else { # Find the last recend-recstart in $buf if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { # Copy to modifiable variable my $b = $1; $anything_written += write_record_to_pipe($chunk_number++,\$header,\$b, $recstart,$recend,length $1); } } } else { if($Global::max_number_of_args) { # -N => (start..*?end){n} my $i = 0; my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { $i += length $recend; # find the actual splitting location $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,$i); substr($buf,0,$i) = ""; } } else { # Find the last recend-recstart in $buf my $i = rindex($buf,$recendrecstart); if($i != -1) { $i += length $recend; # find the actual splitting location $anything_written += write_record_to_pipe($chunk_number++,\$header,\$buf, $recstart,$recend,$i); substr($buf,0,$i) = ""; } } } if(not $anything_written and not eof($in)) { # Nothing was written - maybe the block size < record size? # Increase blocksize exponentially my $old_blocksize = $blocksize; $blocksize = ceil($blocksize * 1.3 + 1); ::warning("A record was longer than $old_blocksize. " . "Increasing to --blocksize $blocksize\n"); } } ::debug("init", "Done reading input\n"); # If there is anything left in the buffer write it substr($buf,0,0) = ""; write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); $Global::start_no_new_jobs ||= 1; if($opt::roundrobin) { for my $job (values %Global::running) { close $job->fh(0,"w"); } my %incomplete_jobs = %Global::running; my $sleep = 1; while(keys %incomplete_jobs) { my $something_written = 0; for my $pid (keys %incomplete_jobs) { my $job = $incomplete_jobs{$pid}; if($job->stdin_buffer_length()) { $something_written += $job->non_block_write(); } else { delete $incomplete_jobs{$pid} } } if($something_written) { $sleep = $sleep/2+0.001; } $sleep = ::reap_usleep($sleep); } } } sub recstartrecend { # Uses: # $opt::recstart # $opt::recend # Returns: # $recstart,$recend with default values and regexp conversion my($recstart,$recend); if(defined($opt::recstart) and defined($opt::recend)) { # If both --recstart and --recend is given then both must match $recstart = $opt::recstart; $recend = $opt::recend; } elsif(defined($opt::recstart)) { # If --recstart is given it must match start of record $recstart = $opt::recstart; $recend = ""; } elsif(defined($opt::recend)) { # If --recend is given then it must match end of record $recstart = ""; $recend = $opt::recend; } if($opt::regexp) { # If $recstart/$recend contains '|' this should only apply to the regexp $recstart = "(?:".$recstart.")"; $recend = "(?:".$recend.")"; } else { # $recstart/$recend = printf strings (\n) $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; } return ($recstart,$recend); } sub nindex { # See if string is in buffer N times # Returns: # the position where the Nth copy is found my ($buf_ref, $str, $n) = @_; my $i = 0; for(1..$n) { $i = index($$buf_ref,$str,$i+1); if($i == -1) { last } } return $i; } { my @robin_queue; sub round_robin_write { # Input: # $header_ref = ref to $header string # $block_ref = ref to $block to be written # $recstart = record start string # $recend = record end string # $endpos = end position of $block # Uses: # %Global::running my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_; my $something_written = 0; my $block_passed = 0; my $sleep = 1; while(not $block_passed) { # Continue flushing existing buffers # until one is empty and a new block is passed # Make a queue to spread the blocks evenly if(not @robin_queue) { push @robin_queue, values %Global::running; } while(my $job = shift @robin_queue) { if($job->stdin_buffer_length() > 0) { $something_written += $job->non_block_write(); } else { $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend); $block_passed = 1; $job->set_virgin(0); $something_written += $job->non_block_write(); last; } } $sleep = ::reap_usleep($sleep); } return $something_written; } } sub write_record_to_pipe { # Fork then # Write record from pos 0 .. $endpos to pipe # Input: # $chunk_number = sequence number - to see if already run # $header_ref = reference to header string to prepend # $record_ref = reference to record to write # $recstart = start string of record # $recend = end string of record # $endpos = position in $record_ref where record ends # Uses: # $Global::job_already_run # $opt::roundrobin # @Global::virgin_jobs # Returns: # Number of chunks written (0 or 1) my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_; if($endpos == 0) { return 0; } if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } if($opt::roundrobin) { return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos); } # If no virgin found, backoff my $sleep = 0.0001; # 0.01 ms - better performance on highend while(not @Global::virgin_jobs) { ::debug("pipe", "No virgin jobs"); $sleep = ::reap_usleep($sleep); # Jobs may not be started because of loadavg # or too little time between each ssh login. start_more_jobs(); } my $job = shift @Global::virgin_jobs; # Job is no longer virgin $job->set_virgin(0); if(fork()) { # Skip } else { # Chop of at $endpos as we do not know how many rec_sep will # be removed. substr($$record_ref,$endpos,length $$record_ref) = ""; # Remove rec_sep if($opt::remove_rec_sep) { Job::remove_rec_sep($record_ref,$recstart,$recend); } $job->write($header_ref); $job->write($record_ref); close $job->fh(0,"w"); exit(0); } close $job->fh(0,"w"); return 1; } sub __SEM_MODE__ {} sub acquire_semaphore { # Acquires semaphore. If needed: spawns to the background # Uses: # @Global::host # Returns: # The semaphore to be released when jobs is complete $Global::host{':'} = SSHLogin->new(":"); my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); $sem->acquire(); if($Semaphore::fg) { # skip } else { # If run in the background, the PID will change # therefore release and re-acquire the semaphore $sem->release(); if(fork()) { exit(0); } else { # child # Get a semaphore for this pid ::die_bug("Can't start a new session: $!") if setsid() == -1; $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); $sem->acquire(); } } return $sem; } sub __PARSE_OPTIONS__ {} sub options_hash { # Returns: # %hash = the GetOptions config return ("debug|D=s" => \$opt::D, "xargs" => \$opt::xargs, "m" => \$opt::m, "X" => \$opt::X, "v" => \@opt::v, "joblog=s" => \$opt::joblog, "results|result|res=s" => \$opt::results, "resume" => \$opt::resume, "resume-failed|resumefailed" => \$opt::resume_failed, "silent" => \$opt::silent, #"silent-error|silenterror" => \$opt::silent_error, "keep-order|keeporder|k" => \$opt::keeporder, "group" => \$opt::group, "g" => \$opt::retired, "ungroup|u" => \$opt::ungroup, "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer, "tmux" => \$opt::tmux, "null|0" => \$opt::0, "quote|q" => \$opt::q, # Replacement strings "parens=s" => \$opt::parens, "rpl=s" => \@opt::rpl, "plus" => \$opt::plus, "I=s" => \$opt::I, "extensionreplace|er=s" => \$opt::U, "U=s" => \$opt::retired, "basenamereplace|bnr=s" => \$opt::basenamereplace, "dirnamereplace|dnr=s" => \$opt::dirnamereplace, "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, "seqreplace=s" => \$opt::seqreplace, "slotreplace=s" => \$opt::slotreplace, "jobs|j=s" => \$opt::jobs, "delay=f" => \$opt::delay, "sshdelay=f" => \$opt::sshdelay, "load=s" => \$opt::load, "noswap" => \$opt::noswap, "max-line-length-allowed" => \$opt::max_line_length_allowed, "number-of-cpus" => \$opt::number_of_cpus, "number-of-cores" => \$opt::number_of_cores, "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, "shellquote|shell_quote|shell-quote" => \$opt::shellquote, "nice=i" => \$opt::nice, "timeout=s" => \$opt::timeout, "tag" => \$opt::tag, "tagstring|tag-string=s" => \$opt::tagstring, "onall" => \$opt::onall, "nonall" => \$opt::nonall, "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, "sshlogin|S=s" => \@opt::sshlogin, "sshloginfile|slf=s" => \@opt::sshloginfile, "controlmaster|M" => \$opt::controlmaster, "return=s" => \@opt::return, "trc=s" => \@opt::trc, "transfer" => \$opt::transfer, "cleanup" => \$opt::cleanup, "basefile|bf=s" => \@opt::basefile, "B=s" => \$opt::retired, "ctrlc|ctrl-c" => \$opt::ctrlc, "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc, "workdir|work-dir|wd=s" => \$opt::workdir, "W=s" => \$opt::retired, "tmpdir=s" => \$opt::tmpdir, "tempdir=s" => \$opt::tmpdir, "use-compress-program|compress-program=s" => \$opt::compress_program, "use-decompress-program|decompress-program=s" => \$opt::decompress_program, "compress" => \$opt::compress, "tty" => \$opt::tty, "T" => \$opt::retired, "halt-on-error|halt=s" => \$opt::halt_on_error, "H=i" => \$opt::retired, "retries=i" => \$opt::retries, "dry-run|dryrun" => \$opt::dryrun, "progress" => \$opt::progress, "eta" => \$opt::eta, "bar" => \$opt::bar, "arg-sep|argsep=s" => \$opt::arg_sep, "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, "trim=s" => \$opt::trim, "env=s" => \@opt::env, "recordenv|record-env" => \$opt::record_env, "plain" => \$opt::plain, "profile|J=s" => \@opt::profile, "pipe|spreadstdin" => \$opt::pipe, "robin|round-robin|roundrobin" => \$opt::roundrobin, "recstart=s" => \$opt::recstart, "recend=s" => \$opt::recend, "regexp|regex" => \$opt::regexp, "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, "files|output-as-files|outputasfiles" => \$opt::files, "block|block-size|blocksize=s" => \$opt::blocksize, "tollef" => \$opt::retired, "gnu" => \$opt::gnu, "xapply" => \$opt::xapply, "bibtex" => \$opt::bibtex, "nn|nonotice|no-notice" => \$opt::no_notice, # xargs-compatibility - implemented, man, testsuite "max-procs|P=s" => \$opt::jobs, "delimiter|d=s" => \$opt::d, "max-chars|s=i" => \$opt::max_chars, "arg-file|a=s" => \@opt::a, "no-run-if-empty|r" => \$opt::r, "replace|i:s" => \$opt::i, "E=s" => \$opt::eof, "eof|e:s" => \$opt::eof, "max-args|n=i" => \$opt::max_args, "max-replace-args|N=i" => \$opt::max_replace_args, "colsep|col-sep|C=s" => \$opt::colsep, "help|h" => \$opt::help, "L=f" => \$opt::L, "max-lines|l:f" => \$opt::max_lines, "interactive|p" => \$opt::p, "verbose|t" => \$opt::verbose, "version|V" => \$opt::version, "minversion|min-version=i" => \$opt::minversion, "show-limits|showlimits" => \$opt::show_limits, "exit|x" => \$opt::x, # Semaphore "semaphore" => \$opt::semaphore, "semaphoretimeout=i" => \$opt::semaphoretimeout, "semaphorename|id=s" => \$opt::semaphorename, "fg" => \$opt::fg, "bg" => \$opt::bg, "wait" => \$opt::wait, # Shebang #!/usr/bin/parallel --shebang "shebang|hashbang" => \$opt::shebang, "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, "Y" => \$opt::retired, "skip-first-line" => \$opt::skip_first_line, "header=s" => \$opt::header, "cat" => \$opt::cat, "fifo" => \$opt::fifo, "pipepart|pipe-part" => \$opt::pipepart, "hgrp|hostgroup|hostgroups" => \$opt::hostgroups, ); } sub get_options_from_array { # Run GetOptions on @array # Input: # $array_ref = ref to @ARGV to parse # @keep_only = Keep only these options # Uses: # @ARGV # Returns: # true if parsing worked # false if parsing failed # @$array_ref is changed my ($array_ref, @keep_only) = @_; if(not @$array_ref) { # Empty array: No need to look more at that return 1; } # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not # supported everywhere my @save_argv; my $this_is_ARGV = (\@::ARGV == $array_ref); if(not $this_is_ARGV) { @save_argv = @::ARGV; @::ARGV = @{$array_ref}; } # If @keep_only set: Ignore all values except @keep_only my %options = options_hash(); if(@keep_only) { my (%keep,@dummy); @keep{@keep_only} = @keep_only; for my $k (grep { not $keep{$_} } keys %options) { # Store the value of the option in @dummy $options{$k} = \@dummy; } } my $retval = GetOptions(%options); if(not $this_is_ARGV) { @{$array_ref} = @::ARGV; @::ARGV = @save_argv; } return $retval; } sub parse_options { # Returns: N/A # Defaults: $Global::version = 20141122; $Global::progname = 'parallel'; $Global::infinity = 2**31; $Global::debug = 0; $Global::verbose = 0; $Global::quoting = 0; # Read only table with default --rpl values %Global::replace = ( '{}' => '', '{#}' => '1 $_=$job->seq()', '{%}' => '1 $_=$job->slot()', '{/}' => 's:.*/::', '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', '{/.}' => 's:.*/::; s:\.[^/.]+$::;', '{.}' => 's:\.[^/.]+$::', ); %Global::plus = ( # {} = {+/}/{/} # = {.}.{+.} = {+/}/{/.}.{+.} # = {..}.{+..} = {+/}/{/..}.{+..} # = {...}.{+...} = {+/}/{/...}.{+...} '{+/}' => 's:/[^/]*$::', '{+.}' => 's:.*\.::', '{+..}' => 's:.*\.([^.]*\.):$1:', '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', ); # Modifiable copy of %Global::replace %Global::rpl = %Global::replace; $Global::parens = "{==}"; $/="\n"; $Global::ignore_empty = 0; $Global::interactive = 0; $Global::stderr_verbose = 0; $Global::default_simultaneous_sshlogins = 9; $Global::exitstatus = 0; $Global::halt_on_error_exitstatus = 0; $Global::arg_sep = ":::"; $Global::arg_file_sep = "::::"; $Global::trim = 'n'; $Global::max_jobs_running = 0; $Global::job_already_run = ''; $ENV{'TMPDIR'} ||= "/tmp"; @ARGV=read_options(); if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 $Global::debug = $opt::D; $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; if(defined $opt::X) { $Global::ContextReplace = 1; } if(defined $opt::silent) { $Global::verbose = 0; } if(defined $opt::0) { $/ = "\0"; } if(defined $opt::d) { my $e="sprintf \"$opt::d\""; $/ = eval $e; } if(defined $opt::p) { $Global::interactive = $opt::p; } if(defined $opt::q) { $Global::quoting = 1; } if(defined $opt::r) { $Global::ignore_empty = 1; } if(defined $opt::verbose) { $Global::stderr_verbose = 1; } # Deal with --rpl sub rpl { # Modify %Global::rpl # Replace $old with $new my ($old,$new) = @_; if($old ne $new) { $Global::rpl{$new} = $Global::rpl{$old}; delete $Global::rpl{$old}; } } if(defined $opt::parens) { $Global::parens = $opt::parens; } my $parenslen = 0.5*length $Global::parens; $Global::parensleft = substr($Global::parens,0,$parenslen); $Global::parensright = substr($Global::parens,$parenslen); if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } if(defined $opt::I) { rpl('{}',$opt::I); } if(defined $opt::U) { rpl('{.}',$opt::U); } if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } if(defined $opt::basenameextensionreplace) { rpl('{/.}',$opt::basenameextensionreplace); } for(@opt::rpl) { # Create $Global::rpl entries for --rpl options # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" my ($shorthand,$long) = split/ /,$_,2; $Global::rpl{$shorthand} = $long; } if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } if(defined $opt::help) { die_usage(); } if(defined $opt::colsep) { $Global::trim = 'lr'; } if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } if(defined $opt::trim) { $Global::trim = $opt::trim; } if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } if(defined $opt::number_of_cores) { print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); } if(defined $opt::max_line_length_allowed) { print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); } if(defined $opt::version) { version(); wait_and_exit(0); } if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } if(defined $opt::record_env) { record_env(); wait_and_exit(0); } if(defined $opt::show_limits) { show_limits(); } if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } if(@opt::return) { push @Global::ret_files, @opt::return; } if(not defined $opt::recstart and not defined $opt::recend) { $opt::recend = "\n"; } if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } $opt::blocksize = multiply_binary_prefix($opt::blocksize); if(defined $opt::controlmaster) { $opt::noctrlc = 1; } if(defined $opt::semaphore) { $Global::semaphore = 1; } if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } if(defined $opt::semaphorename) { $Global::semaphore = 1; } if(defined $opt::fg) { $Global::semaphore = 1; } if(defined $opt::bg) { $Global::semaphore = 1; } if(defined $opt::wait) { $Global::semaphore = 1; } if(defined $opt::halt_on_error and $opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; } if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { ::error("--timeout must be seconds or percentage\n"); wait_and_exit(255); } if(defined $opt::minversion) { print $Global::version,"\n"; if($Global::version < $opt::minversion) { wait_and_exit(255); } else { wait_and_exit(0); } } if(not defined $opt::delay) { # Set --delay to --sshdelay if not set $opt::delay = $opt::sshdelay; } if($opt::compress_program) { $opt::compress = 1; $opt::decompress_program ||= $opt::compress_program." -dc"; } if($opt::compress) { my ($compress, $decompress) = find_compression_program(); $opt::compress_program ||= $compress; $opt::decompress_program ||= $decompress; } if(defined $opt::nonall) { # Append a dummy empty argument push @ARGV, $Global::arg_sep, ""; } if(defined $opt::tty) { # Defaults for --tty: -j1 -u # Can be overridden with -jXXX -g if(not defined $opt::jobs) { $opt::jobs = 1; } if(not defined $opt::group) { $opt::ungroup = 0; } } if(@opt::trc) { push @Global::ret_files, @opt::trc; $opt::transfer = 1; $opt::cleanup = 1; } if(defined $opt::max_lines) { if($opt::max_lines eq "-0") { # -l -0 (swallowed -0) $opt::max_lines = 1; $opt::0 = 1; $/ = "\0"; } elsif ($opt::max_lines == 0) { # If not given (or if 0 is given) => 1 $opt::max_lines = 1; } $Global::max_lines = $opt::max_lines; if(not $opt::pipe) { # --pipe -L means length of record - not max_number_of_args $Global::max_number_of_args ||= $Global::max_lines; } } # Read more than one arg at a time (-L, -N) if(defined $opt::L) { $Global::max_lines = $opt::L; if(not $opt::pipe) { # --pipe -L means length of record - not max_number_of_args $Global::max_number_of_args ||= $Global::max_lines; } } if(defined $opt::max_replace_args) { $Global::max_number_of_args = $opt::max_replace_args; $Global::ContextReplace = 1; } if((defined $opt::L or defined $opt::max_replace_args) and not ($opt::xargs or $opt::m)) { $Global::ContextReplace = 1; } if(defined $opt::tag and not defined $opt::tagstring) { $opt::tagstring = "\257<\257>"; # Default = {} } if(defined $opt::pipepart and (defined $opt::L or defined $opt::max_lines or defined $opt::max_replace_args)) { ::error("--pipepart is incompatible with --max-replace-args, ", "--max-lines, and -L.\n"); wait_and_exit(255); } if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { # Deal with ::: and :::: @ARGV=read_args_from_command_line(); } # Semaphore defaults # Must be done before computing number of processes and max_line_length # because when running as a semaphore GNU Parallel does not read args $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' if($Global::semaphore) { # A semaphore does not take input from neither stdin nor file @opt::a = ("/dev/null"); push(@Global::unget_argv, [Arg->new("")]); $Semaphore::timeout = $opt::semaphoretimeout || 0; if(defined $opt::semaphorename) { $Semaphore::name = $opt::semaphorename; } else { $Semaphore::name = `tty`; chomp $Semaphore::name; } $Semaphore::fg = $opt::fg; $Semaphore::wait = $opt::wait; $Global::default_simultaneous_sshlogins = 1; if(not defined $opt::jobs) { $opt::jobs = 1; } if($Global::interactive and $opt::bg) { ::error("Jobs running in the ". "background cannot be interactive.\n"); ::wait_and_exit(255); } } if(defined $opt::eta) { $opt::progress = $opt::eta; } if(defined $opt::bar) { $opt::progress = $opt::bar; } if(defined $opt::retired) { ::error("-g has been retired. Use --group.\n"); ::error("-B has been retired. Use --bf.\n"); ::error("-T has been retired. Use --tty.\n"); ::error("-U has been retired. Use --er.\n"); ::error("-W has been retired. Use --wd.\n"); ::error("-Y has been retired. Use --shebang.\n"); ::error("-H has been retired. Use --halt.\n"); ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n"); ::wait_and_exit(255); } citation_notice(); parse_sshlogin(); parse_env_var(); if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { # As we do not know the max line length on the remote machine # long commands generated by xargs may fail # If opt_N is set, it is probably safe ::warning("Using -X or -m with --sshlogin may fail.\n"); } if(not defined $opt::jobs) { $opt::jobs = "100%"; } open_joblog(); } sub env_quote { # Input: # $v = value to quote # Returns: # $v = value quoted as environment variable my $v = $_[0]; $v =~ s/([\\])/\\$1/g; $v =~ s/([\[\] \#\'\&\<\>\(\)\;\{\}\t\"\$\`\*\174\!\?\~])/\\$1/g; $v =~ s/\n/"\n"/g; return $v; } sub record_env { # Record current %ENV-keys in ~/.parallel/ignored_vars # Returns: N/A my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; if(open(my $vars_fh, ">", $ignore_filename)) { print $vars_fh map { $_,"\n" } keys %ENV; } else { ::error("Cannot write to $ignore_filename\n"); ::wait_and_exit(255); } } sub parse_env_var { # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen # # Bash functions must be parsed to export them remotely # Pre-shellshock style bash function: # myfunc=() {... # Post-shellshock style bash function: # BASH_FUNC_myfunc()=() {... # # Uses: # $Global::envvar = eval string that will set variables in both bash and csh # $Global::envwarn = If functions are used: Give warning in csh # $Global::envvarlen = length of $Global::envvar # @opt::env # $Global::shell # %ENV # Returns: N/A $Global::envvar = ""; $Global::envwarn = ""; my @vars = ('parallel_bash_environment'); for my $varstring (@opt::env) { # Split up --env VAR1,VAR2 push @vars, split /,/, $varstring; } if(grep { /^_$/ } @vars) { # --env _ # Include all vars that are not in a clean environment if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { my @ignore = <$vars_fh>; chomp @ignore; my %ignore; @ignore{@ignore} = @ignore; close $vars_fh; push @vars, grep { not defined $ignore{$_} } keys %ENV; @vars = grep { not /^_$/ } @vars; } else { ::error("Run '$Global::progname --record-env' in a clean environment first.\n"); ::wait_and_exit(255); } } # Duplicate vars as BASH functions to include post-shellshock functions. # So --env myfunc should also look for BASH_FUNC_myfunc() @vars = map { $_, "BASH_FUNC_$_()" } @vars; # Keep only defined variables @vars = grep { defined($ENV{$_}) } @vars; # Pre-shellshock style bash function: # myfunc=() { echo myfunc # } # Post-shellshock style bash function: # BASH_FUNC_myfunc()=() { echo myfunc # } my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; if(@bash_functions) { # Functions are not supported for all shells if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { ::warning("Shell functions may not be supported in $Global::shell\n"); } } # Pre-shellschock names are without () my @bash_pre_shellshock = grep { not /\(\)/ } @bash_functions; # Post-shellschock names are with () my @bash_post_shellshock = grep { /\(\)/ } @bash_functions; my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) } grep { not /^parallel_bash_environment$/ } @non_functions); my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) } @non_functions, @bash_pre_shellshock); push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock; push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock; #ssh -tt -oLogLevel=quiet lo 'eval `echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ BASH_FUNC_myfunc\ \\\(\\\)\\\ \\\{\\\ \\\ echo\\\ a\"' #'\"\\\}\ \|\|\ myfunc\(\)\ \{\ \ echo\ a' #'\}\ \;myfunc\ 1; # Check if any variables contain \n if(my @v = map { s/BASH_FUNC_(.*)\(\)/$1/; $_ } grep { $ENV{$_}=~/\n/ } @vars) { # \n is bad for csh and will cause it to fail. $Global::envwarn = ::shell_quote_scalar(q{echo $SHELL | egrep "/t?csh" > /dev/null && echo CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset }."@v".q{ && exec false;}."\n\n") . $Global::envwarn; } if(not @qcsh) { push @qcsh, "true"; } if(not @qbash) { push @qbash, "true"; } # Create lines like: # echo $SHELL | grep "/t\\{0,1\\}csh" >/dev/null && setenv V1 val1 && setenv V2 val2 || export V1=val1 && export V2=val2 ; echo "$V1$V2" if(@vars) { $Global::envvar .= join"", (q{echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null && } . join(" && ", @qcsh) . q{ || } . join(" && ", @qbash) .q{;}); if($ENV{'parallel_bash_environment'}) { $Global::envvar .= 'eval "$parallel_bash_environment";'."\n"; } } $Global::envvarlen = length $Global::envvar; } sub open_joblog { # Open joblog as specified by --joblog # Uses: # $opt::resume # $opt::resume_failed # $opt::joblog # $opt::results # $Global::job_already_run # %Global::fd my $append = 0; if(($opt::resume or $opt::resume_failed) and not ($opt::joblog or $opt::results)) { ::error("--resume and --resume-failed require --joblog or --results.\n"); ::wait_and_exit(255); } if($opt::joblog) { if($opt::resume || $opt::resume_failed) { if(open(my $joblog_fh, "<", $opt::joblog)) { # Read the joblog $append = <$joblog_fh>; # If there is a header: Open as append later my $joblog_regexp; if($opt::resume_failed) { # Make a regexp that only matches commands with exit+signal=0 # 4 host 1360490623.067 3.445 1023 1222 0 0 command $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; } else { # Just match the job number $joblog_regexp='^(\d+)'; } while(<$joblog_fh>) { if(/$joblog_regexp/o) { # This is 30% faster than set_job_already_run($1); vec($Global::job_already_run,($1||0),1) = 1; } elsif(not /\d+\s+[^\s]+\s+([0-9.]+\s+){6}/) { ::error("Format of '$opt::joblog' is wrong: $_"); ::wait_and_exit(255); } } close $joblog_fh; } } if($append) { # Append to joblog if(not open($Global::joblog, ">>", $opt::joblog)) { ::error("Cannot append to --joblog $opt::joblog.\n"); ::wait_and_exit(255); } } else { if($opt::joblog eq "-") { # Use STDOUT as joblog $Global::joblog = $Global::fd{1}; } elsif(not open($Global::joblog, ">", $opt::joblog)) { # Overwrite the joblog ::error("Cannot write to --joblog $opt::joblog.\n"); ::wait_and_exit(255); } print $Global::joblog join("\t", "Seq", "Host", "Starttime", "JobRuntime", "Send", "Receive", "Exitval", "Signal", "Command" ). "\n"; } } } sub find_compression_program { # Find a fast compression program # Returns: # $compress_program = compress program with options # $decompress_program = decompress program with options # Search for these. Sorted by speed my @prg = qw(lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2); for my $p (@prg) { if(which($p)) { return ("$p -c -1","$p -dc"); } } # Fall back to cat return ("cat","cat"); } sub read_options { # Read options from command line, profile and $PARALLEL # Uses: # $opt::shebang_wrap # $opt::shebang # @ARGV # $opt::plain # @opt::profile # $ENV{'HOME'} # $ENV{'PARALLEL'} # Returns: # @ARGV_no_opt = @ARGV without --options # This must be done first as this may exec myself if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or $ARGV[0] =~ /^--shebang-?wrap/ or $ARGV[0] =~ /^--hashbang/)) { # Program is called from #! line in script # remove --shebang-wrap if it is set $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); # remove --shebang if it is set $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); # remove --hashbang if it is set $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); if($opt::shebang) { my $argfile = shell_quote_scalar(pop @ARGV); # exec myself to split $ARGV[0] into separate fields exec "$0 --skip-first-line -a $argfile @ARGV"; } if($opt::shebang_wrap) { my @options; my @parser; if ($^O eq 'freebsd') { # FreeBSD's #! puts different values in @ARGV than Linux' does. my @nooptions = @ARGV; get_options_from_array(\@nooptions); while($#ARGV > $#nooptions) { push @options, shift @ARGV; } while(@ARGV and $ARGV[0] ne ":::") { push @parser, shift @ARGV; } if(@ARGV and $ARGV[0] eq ":::") { shift @ARGV; } } else { @options = shift @ARGV; } my $script = shell_quote_scalar(shift @ARGV); # exec myself to split $ARGV[0] into separate fields exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; } } Getopt::Long::Configure("bundling","require_order"); my @ARGV_copy = @ARGV; # Check if there is a --profile to set @opt::profile get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); my @ARGV_profile = (); my @ARGV_env = (); if(not $opt::plain) { # Add options from .parallel/config and other profiles my @config_profiles = ( "/etc/parallel/config", $ENV{'HOME'}."/.parallel/config", $ENV{'HOME'}."/.parallelrc"); my @profiles = @config_profiles; if(@opt::profile) { # --profile overrides default profiles @profiles = (); for my $profile (@opt::profile) { if(-r $profile) { push @profiles, $profile; } else { push @profiles, $ENV{'HOME'}."/.parallel/".$profile; } } } for my $profile (@profiles) { if(-r $profile) { open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); while(<$in_fh>) { /^\s*\#/ and next; chomp; push @ARGV_profile, shellwords($_); } close $in_fh; } else { if(grep /^$profile$/, @config_profiles) { # config file is not required to exist } else { ::error("$profile not readable.\n"); wait_and_exit(255); } } } # Add options from shell variable $PARALLEL if($ENV{'PARALLEL'}) { @ARGV_env = shellwords($ENV{'PARALLEL'}); } } Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV_profile) || die_usage(); get_options_from_array(\@ARGV_env) || die_usage(); get_options_from_array(\@ARGV) || die_usage(); # Prepend non-options to @ARGV (such as commands like 'nice') unshift @ARGV, @ARGV_profile, @ARGV_env; return @ARGV; } sub read_args_from_command_line { # Arguments given on the command line after: # ::: ($Global::arg_sep) # :::: ($Global::arg_file_sep) # Removes the arguments from @ARGV and: # - puts filenames into -a # - puts arguments into files and add the files to -a # Input: # @::ARGV = command option ::: arg arg arg :::: argfiles # Uses: # $Global::arg_sep # $Global::arg_file_sep # $opt::internal_pipe_means_argfiles # $opt::pipe # @opt::a # Returns: # @argv_no_argsep = @::ARGV without ::: and :::: and following args my @new_argv = (); for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { if($arg eq $Global::arg_sep or $arg eq $Global::arg_file_sep) { my $group = $arg; # This group of arguments is args or argfiles my @group; while(defined ($arg = shift @ARGV)) { if($arg eq $Global::arg_sep or $arg eq $Global::arg_file_sep) { # exit while loop if finding new separator last; } else { # If not hitting ::: or :::: # Append it to the group push @group, $arg; } } if($group eq $Global::arg_file_sep or ($opt::internal_pipe_means_argfiles and $opt::pipe) ) { # Group of file names on the command line. # Append args into -a push @opt::a, @group; } elsif($group eq $Global::arg_sep) { # Group of arguments on the command line. # Put them into a file. # Create argfile my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); unlink($name); # Put args into argfile print $outfh map { $_,$/ } @group; seek $outfh, 0, 0; # Append filehandle to -a push @opt::a, $outfh; } else { ::die_bug("Unknown command line group: $group"); } if(defined($arg)) { # $arg is ::: or :::: redo; } else { # $arg is undef -> @ARGV empty last; } } push @new_argv, $arg; } # Output: @ARGV = command to run with options return @new_argv; } sub cleanup { # Returns: N/A if(@opt::basefile) { cleanup_basefile(); } } sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} sub shell_quote { # Input: # @strings = strings to be quoted # Output: # @shell_quoted_strings = string quoted with \ as needed by the shell my @strings = (@_); for my $a (@strings) { $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' } return wantarray ? @strings : "@strings"; } sub shell_quote_empty { # Inputs: # @strings = strings to be quoted # Returns: # @quoted_strings = empty strings quoted as ''. my @strings = shell_quote(@_); for my $a (@strings) { if($a eq "") { $a = "''"; } } return wantarray ? @strings : "@strings"; } sub shell_quote_scalar { # Quote the string so shell will not expand any special chars # Inputs: # $string = string to be quoted # Returns: # $shell_quoted = string quoted with \ as needed by the shell my $a = $_[0]; if(defined $a) { # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; # This is 1% faster than the above $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go; $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \' } return $a; } sub shell_quote_file { # Quote the string so shell will not expand any special chars and prepend ./ if needed # Input: # $filename = filename to be shell quoted # Returns: # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed my $a = shell_quote_scalar(shift); if(defined $a) { if($a =~ m:^/: or $a =~ m:^\./:) { # /abs/path or ./rel/path => skip } else { # rel/path => ./rel/path $a = "./".$a; } } return $a; } sub shellwords { # Input: # $string = shell line # Returns: # @shell_words = $string split into words as shell would do $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; return Text::ParseWords::shellwords(@_); } sub __FILEHANDLES__ {} sub save_stdin_stdout_stderr { # Remember the original STDIN, STDOUT and STDERR # and file descriptors opened by the shell (e.g. 3>/tmp/foo) # Uses: # %Global::fd # $Global::original_stderr # $Global::original_stdin # Returns: N/A # Find file descriptors that are already opened (by the shell) for my $fdno (1..61) { # /dev/fd/62 and above are used by bash for <(cmd) my $fh; # 2-argument-open is used to be compatible with old perl 5.8.0 # bug #43570: Perl 5.8.0 creates 61 files if(open($fh,">&=$fdno")) { $Global::fd{$fdno}=$fh; } } open $Global::original_stderr, ">&", "STDERR" or ::die_bug("Can't dup STDERR: $!"); open $Global::original_stdin, "<&", "STDIN" or ::die_bug("Can't dup STDIN: $!"); } sub enough_file_handles { # Check that we have enough filehandles available for starting # another job # Uses: # $opt::ungroup # %Global::fd # Returns: # 1 if ungrouped (thus not needing extra filehandles) # 0 if too few filehandles # 1 if enough filehandles if(not $opt::ungroup) { my %fh; my $enough_filehandles = 1; # perl uses 7 filehandles for something? # open3 uses 2 extra filehandles temporarily # We need a filehandle for each redirected file descriptor # (normally just STDOUT and STDERR) for my $i (1..(7+2+keys %Global::fd)) { $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); } for (values %fh) { close $_; } return $enough_filehandles; } else { # Ungrouped does not need extra file handles return 1; } } sub open_or_exit { # Open a file name or exit if the file cannot be opened # Inputs: # $file = filehandle or filename to open # Uses: # $Global::stdin_in_opt_a # $Global::original_stdin # Returns: # $fh = file handle to read-opened file my $file = shift; if($file eq "-") { $Global::stdin_in_opt_a = 1; return ($Global::original_stdin || *STDIN); } if(ref $file eq "GLOB") { # This is an open filehandle return $file; } my $fh = gensym; if(not open($fh, "<", $file)) { ::error("Cannot open input file `$file': No such file or directory.\n"); wait_and_exit(255); } return $fh; } sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} # Variable structure: # # $Global::running{$pid} = Pointer to Job-object # @Global::virgin_jobs = Pointer to Job-object that have received no input # $Global::host{$sshlogin} = Pointer to SSHLogin-object # $Global::total_running = total number of running jobs # $Global::total_started = total jobs started sub init_run_jobs { $Global::total_running = 0; $Global::total_started = 0; $Global::tty_taken = 0; $SIG{USR1} = \&list_running_jobs; $SIG{USR2} = \&toggle_progress; if(@opt::basefile) { setup_basefile(); } } { my $last_time; my %last_mtime; sub start_more_jobs { # Run start_another_job() but only if: # * not $Global::start_no_new_jobs set # * not JobQueue is empty # * not load on server is too high # * not server swapping # * not too short time since last remote login # Uses: # $Global::max_procs_file # $Global::max_procs_file_last_mod # %Global::host # @opt::sshloginfile # $Global::start_no_new_jobs # $opt::filter_hosts # $Global::JobQueue # $opt::pipe # $opt::load # $opt::noswap # $opt::delay # $Global::newest_starttime # Returns: # $jobs_started = number of jobs started my $jobs_started = 0; my $jobs_started_this_round = 0; if($Global::start_no_new_jobs) { return $jobs_started; } if(time - ($last_time||0) > 1) { # At most do this every second $last_time = time; if($Global::max_procs_file) { # --jobs filename my $mtime = (stat($Global::max_procs_file))[9]; if($mtime > $Global::max_procs_file_last_mod) { # file changed: Force re-computing max_jobs_running $Global::max_procs_file_last_mod = $mtime; for my $sshlogin (values %Global::host) { $sshlogin->set_max_jobs_running(undef); } } } if(@opt::sshloginfile) { # Is --sshloginfile changed? for my $slf (@opt::sshloginfile) { my $actual_file = expand_slf_shorthand($slf); my $mtime = (stat($actual_file))[9]; $last_mtime{$actual_file} ||= $mtime; if($mtime - $last_mtime{$actual_file} > 1) { ::debug("run","--sshloginfile $actual_file changed. reload\n"); $last_mtime{$actual_file} = $mtime; # Reload $slf # Empty sshlogins @Global::sshlogin = (); for (values %Global::host) { # Don't start new jobs on any host # except the ones added back later $_->set_max_jobs_running(0); } # This will set max_jobs_running on the SSHlogins read_sshloginfile($actual_file); parse_sshlogin(); $opt::filter_hosts and filter_hosts(); setup_basefile(); } } } } do { $jobs_started_this_round = 0; # This will start 1 job on each --sshlogin (if possible) # thus distribute the jobs on the --sshlogins round robin for my $sshlogin (values %Global::host) { if($Global::JobQueue->empty() and not $opt::pipe) { # No more jobs in the queue last; } debug("run", "Running jobs before on ", $sshlogin->string(), ": ", $sshlogin->jobs_running(), "\n"); if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { if($opt::load and $sshlogin->loadavg_too_high()) { # The load is too high or unknown next; } if($opt::noswap and $sshlogin->swapping()) { # The server is swapping next; } if($sshlogin->too_fast_remote_login()) { # It has been too short since next; } if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { # It has been too short since last start next; } debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), " out of ", $sshlogin->max_jobs_running(), " jobs running. Start another.\n"); if(start_another_job($sshlogin) == 0) { # No more jobs to start on this $sshlogin debug("run","No jobs started on ", $sshlogin->string(), "\n"); next; } $sshlogin->inc_jobs_running(); $sshlogin->set_last_login_at(::now()); $jobs_started++; $jobs_started_this_round++; } debug("run","Running jobs after on ", $sshlogin->string(), ": ", $sshlogin->jobs_running(), " of ", $sshlogin->max_jobs_running(), "\n"); } } while($jobs_started_this_round); return $jobs_started; } } { my $no_more_file_handles_warned; sub start_another_job { # If there are enough filehandles # and JobQueue not empty # and not $job is in joblog # Then grab a job from Global::JobQueue, # start it at sshlogin # mark it as virgin_job # Inputs: # $sshlogin = the SSHLogin to start the job on # Uses: # $Global::JobQueue # $opt::pipe # $opt::results # $opt::resume # @Global::virgin_jobs # Returns: # 1 if another jobs was started # 0 otherwise my $sshlogin = shift; # Do we have enough file handles to start another job? if(enough_file_handles()) { if($Global::JobQueue->empty() and not $opt::pipe) { # No more commands to run debug("start", "Not starting: JobQueue empty\n"); return 0; } else { my $job; # Skip jobs already in job log # Skip jobs already in results do { $job = get_job_with_sshlogin($sshlogin); if(not defined $job) { # No command available for that sshlogin debug("start", "Not starting: no jobs available for ", $sshlogin->string(), "\n"); return 0; } } while ($job->is_already_in_joblog() or ($opt::results and $opt::resume and $job->is_already_in_results())); debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", $job->replaced(),"'\n"); if($job->start()) { if($opt::pipe) { push(@Global::virgin_jobs,$job); } debug("start", "Started as seq ", $job->seq(), " pid:", $job->pid(), "\n"); return 1; } else { # Not enough processes to run the job. # Put it back on the queue. $Global::JobQueue->unget($job); # Count down the number of jobs to run for this SSHLogin. my $max = $sshlogin->max_jobs_running(); if($max > 1) { $max--; } else { ::error("No more processes: cannot run a single job. Something is wrong.\n"); ::wait_and_exit(255); } $sshlogin->set_max_jobs_running($max); # Sleep up to 300 ms to give other processes time to die ::usleep(rand()*300); ::warning("No more processes: ", "Decreasing number of running jobs to $max. ", "Raising ulimit -u or /etc/security/limits.conf may help.\n"); return 0; } } } else { # No more file handles $no_more_file_handles_warned++ or ::warning("No more file handles. ", "Raising ulimit -n or /etc/security/limits.conf may help.\n"); return 0; } } } sub init_progress { # Uses: # $opt::bar # Returns: # list of computers for progress output $|=1; if($opt::bar) { return("",""); } my %progress = progress(); return ("\nComputers / CPU cores / Max jobs to run\n", $progress{'workerlist'}); } sub drain_job_queue { # Uses: # $opt::progress # $Global::original_stderr # $Global::total_running # $Global::max_jobs_running # %Global::running # $Global::JobQueue # %Global::host # $Global::start_no_new_jobs # Returns: N/A if($opt::progress) { print $Global::original_stderr init_progress(); } my $last_header=""; my $sleep = 0.2; do { while($Global::total_running > 0) { debug($Global::total_running, "==", scalar keys %Global::running," slots: ", $Global::max_jobs_running); if($opt::pipe) { # When using --pipe sometimes file handles are not closed properly for my $job (values %Global::running) { close $job->fh(0,"w"); } } if($opt::progress) { my %progress = progress(); if($last_header ne $progress{'header'}) { print $Global::original_stderr "\n", $progress{'header'}, "\n"; $last_header = $progress{'header'}; } print $Global::original_stderr "\r",$progress{'status'}; flush $Global::original_stderr; } if($Global::total_running < $Global::max_jobs_running and not $Global::JobQueue->empty()) { # These jobs may not be started because of loadavg # or too little time between each ssh login. if(start_more_jobs() > 0) { # Exponential back-on if jobs were started $sleep = $sleep/2+0.001; } } # Sometimes SIGCHLD is not registered, so force reaper $sleep = ::reap_usleep($sleep); } if(not $Global::JobQueue->empty()) { # These jobs may not be started: # * because there the --filter-hosts has removed all if(not %Global::host) { ::error("There are no hosts left to run on.\n"); ::wait_and_exit(255); } # * because of loadavg # * because of too little time between each ssh login. start_more_jobs(); $sleep = ::reap_usleep($sleep); if($Global::max_jobs_running == 0) { ::warning("There are no job slots available. Increase --jobs.\n"); } } } while ($Global::total_running > 0 or not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); if($opt::progress) { my %progress = progress(); print $Global::original_stderr "\r", $progress{'status'}, "\n"; flush $Global::original_stderr; } } sub toggle_progress { # Turn on/off progress view # Uses: # $opt::progress # $Global::original_stderr # Returns: N/A $opt::progress = not $opt::progress; if($opt::progress) { print $Global::original_stderr init_progress(); } } sub progress { # Uses: # $opt::bar # $opt::eta # %Global::host # $Global::total_started # Returns: # $workerlist = list of workers # $header = that will fit on the screen # $status = message that will fit on the screen if($opt::bar) { return ("workerlist" => "", "header" => "", "status" => bar()); } my $eta = ""; my ($status,$header)=("",""); if($opt::eta) { my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = compute_eta(); $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", $this_eta, $left, $avgtime); } my $termcols = terminal_columns(); my @workers = sort keys %Global::host; my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; my $workerno = 1; my %workerno = map { ($_=>$workerno++) } @workers; my $workerlist = ""; for my $w (@workers) { $workerlist .= $workerno{$w}.":".$sshlogin{$w} ." / ". ($Global::host{$w}->ncpus() || "-")." / ". $Global::host{$w}->max_jobs_running()."\n"; } $status = "x"x($termcols+1); if(length $status > $termcols) { # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; $status = $eta . join(" ",map { if($Global::total_started) { my $completed = ($Global::host{$_}->jobs_completed()||0); my $running = $Global::host{$_}->jobs_running(); my $time = $completed ? (time-$^T)/($completed) : "0"; sprintf("%s:%d/%d/%d%%/%.1fs ", $sshlogin{$_}, $running, $completed, ($running+$completed)*100 / $Global::total_started, $time); } } @workers); } if(length $status > $termcols) { # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map { my $completed = ($Global::host{$_}->jobs_completed()||0); my $running = $Global::host{$_}->jobs_running(); my $time = $completed ? (time-$^T)/($completed) : "0"; sprintf("%s:%d/%d/%d%%/%.1fs ", $workerno{$_}, $running, $completed, ($running+$completed)*100 / $Global::total_started, $time); } @workers); } if(length $status > $termcols) { # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map { sprintf("%s:%d/%d/%d%%", $sshlogin{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0), ($Global::host{$_}->jobs_running()+ ($Global::host{$_}->jobs_completed()||0))*100 / $Global::total_started) } @workers); } if(length $status > $termcols) { # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% $header = "Computer:jobs running/jobs completed/%of started jobs"; $status = $eta . join(" ",map { sprintf("%s:%d/%d/%d%%", $workerno{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0), ($Global::host{$_}->jobs_running()+ ($Global::host{$_}->jobs_completed()||0))*100 / $Global::total_started) } @workers); } if(length $status > $termcols) { # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX $header = "Computer:jobs running/jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d/%d", $sshlogin{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX $header = "Computer:jobs running/jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d/%d", $sshlogin{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX $header = "Computer:jobs running/jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d/%d", $workerno{$_}, $Global::host{$_}->jobs_running(), ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX $header = "Computer:jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d", $sshlogin{$_}, ($Global::host{$_}->jobs_completed()||0)) } @workers); } if(length $status > $termcols) { # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX $header = "Computer:jobs completed"; $status = $eta . join(" ",map { sprintf("%s:%d", $workerno{$_}, ($Global::host{$_}->jobs_completed()||0)) } @workers); } return ("workerlist" => $workerlist, "header" => $header, "status" => $status); } { my ($total, $first_completed, $smoothed_avg_time); sub compute_eta { # Calculate important numbers for ETA # Returns: # $total = number of jobs in total # $completed = number of jobs completed # $left = number of jobs left # $pctcomplete = percent of jobs completed # $avgtime = averaged time # $eta = smoothed eta $total ||= $Global::JobQueue->total_jobs(); my $completed = 0; for(values %Global::host) { $completed += $_->jobs_completed() } my $left = $total - $completed; if(not $completed) { return($total, $completed, $left, 0, 0, 0); } my $pctcomplete = $completed / $total; $first_completed ||= time; my $timepassed = (time - $first_completed); my $avgtime = $timepassed / $completed; $smoothed_avg_time ||= $avgtime; # Smooth the eta so it does not jump wildly $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + $pctcomplete * $avgtime; my $eta = int($left * $smoothed_avg_time); return($total, $completed, $left, $pctcomplete, $avgtime, $eta); } } { my ($rev,$reset); sub bar { # Return: # $status = bar with eta, completed jobs, arg and pct $rev ||= "\033[7m"; $reset ||= "\033[0m"; my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = compute_eta(); my $arg = $Global::newest_job ? $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; # These chars mess up display in the terminal $arg =~ tr/[\011-\016\033\302-\365]//d; my $bar_text = sprintf("%d%% %d:%d=%ds %s", $pctcomplete*100, $completed, $left, $eta, $arg); my $terminal_width = terminal_columns(); my $s = sprintf("%-${terminal_width}s", substr($bar_text." "x$terminal_width, 0,$terminal_width)); my $width = int($terminal_width * $pctcomplete); substr($s,$width,0) = $reset; my $zenity = sprintf("%-${terminal_width}s", substr("# $eta sec $arg", 0,$terminal_width)); $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header "\r" . $rev . $s . $reset; return $s; } } { my ($columns,$last_column_time); sub terminal_columns { # Get the number of columns of the display # Returns: # number of columns of the screen if(not $columns or $last_column_time < time) { $last_column_time = time; $columns = $ENV{'COLUMNS'}; if(not $columns) { my $resize = qx{ resize 2>/dev/null }; $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; } $columns ||= 80; } return $columns; } } sub get_job_with_sshlogin { # Returns: # next job object for $sshlogin if any available my $sshlogin = shift; my $job = undef; if ($opt::hostgroups) { my @other_hostgroup_jobs = (); while($job = $Global::JobQueue->get()) { if($sshlogin->in_hostgroups($job->hostgroups())) { # Found a job for this hostgroup last; } else { # This job was not in the hostgroups of $sshlogin push @other_hostgroup_jobs, $job; } } $Global::JobQueue->unget(@other_hostgroup_jobs); if(not defined $job) { # No more jobs return undef; } } else { $job = $Global::JobQueue->get(); if(not defined $job) { # No more jobs ::debug("start", "No more jobs: JobQueue empty\n"); return undef; } } my $clean_command = $job->replaced(); if($clean_command =~ /^\s*$/) { # Do not run empty lines if(not $Global::JobQueue->empty()) { return get_job_with_sshlogin($sshlogin); } else { return undef; } } $job->set_sshlogin($sshlogin); if($opt::retries and $clean_command and $job->failed_here()) { # This command with these args failed for this sshlogin my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); # Only look at the Global::host that have > 0 jobslots if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host and $job->failed_here() == $min_failures) { # It failed the same or more times on another host: # run it on this host } else { # If it failed fewer times on another host: # Find another job to run my $nextjob; if(not $Global::JobQueue->empty()) { # This can potentially recurse for all args no warnings 'recursion'; $nextjob = get_job_with_sshlogin($sshlogin); } # Push the command back on the queue $Global::JobQueue->unget($job); return $nextjob; } } return $job; } sub __REMOTE_SSH__ {} sub read_sshloginfiles { # Returns: N/A for my $s (@_) { read_sshloginfile(expand_slf_shorthand($s)); } } sub expand_slf_shorthand { my $file = shift; if($file eq "-") { # skip: It is stdin } elsif($file eq "..") { $file = $ENV{'HOME'}."/.parallel/sshloginfile"; } elsif($file eq ".") { $file = "/etc/parallel/sshloginfile"; } elsif(not -r $file) { if(not -r $ENV{'HOME'}."/.parallel/".$file) { # Try prepending ~/.parallel ::error("Cannot open $file.\n"); ::wait_and_exit(255); } else { $file = $ENV{'HOME'}."/.parallel/".$file; } } return $file; } sub read_sshloginfile { # Returns: N/A my $file = shift; my $close = 1; my $in_fh; ::debug("init","--slf ",$file); if($file eq "-") { $in_fh = *STDIN; $close = 0; } else { if(not open($in_fh, "<", $file)) { # Try the filename ::error("Cannot open $file.\n"); ::wait_and_exit(255); } } while(<$in_fh>) { chomp; /^\s*#/ and next; /^\s*$/ and next; push @Global::sshlogin, $_; } if($close) { close $in_fh; } } sub parse_sshlogin { # Returns: N/A my @login; if(not @Global::sshlogin) { @Global::sshlogin = (":"); } for my $sshlogin (@Global::sshlogin) { # Split up -S sshlogin,sshlogin for my $s (split /,/, $sshlogin) { if ($s eq ".." or $s eq "-") { # This may add to @Global::sshlogin - possibly bug read_sshloginfile(expand_slf_shorthand($s)); } else { push (@login, $s); } } } $Global::minimal_command_line_length = 8_000_000; my @allowed_hostgroups; for my $ncpu_sshlogin_string (::uniq(@login)) { my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); my $sshlogin_string = $sshlogin->string(); if($sshlogin_string eq "") { # This is an ssh group: -S @webservers push @allowed_hostgroups, $sshlogin->hostgroups(); next; } if($Global::host{$sshlogin_string}) { # This sshlogin has already been added: # It is probably a host that has come back # Set the max_jobs_running back to the original debug("run","Already seen $sshlogin_string\n"); if($sshlogin->{'ncpus'}) { # If ncpus set by '#/' of the sshlogin, overwrite it: $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); } $Global::host{$sshlogin_string}->set_max_jobs_running(undef); next; } if($sshlogin_string eq ":") { $sshlogin->set_maxlength(Limits::Command::max_length()); } else { # If all chars needs to be quoted, every other character will be \ $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); } $Global::minimal_command_line_length = ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); $Global::host{$sshlogin_string} = $sshlogin; } if(@allowed_hostgroups) { # Remove hosts that are not in these groups while (my ($string, $sshlogin) = each %Global::host) { if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { delete $Global::host{$string}; } } } # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) { if(not remote_hosts()) { # There are no remote hosts if(@opt::trc) { ::warning("--trc ignored as there are no remote --sshlogin.\n"); } elsif (defined $opt::transfer) { ::warning("--transfer ignored as there are no remote --sshlogin.\n"); } elsif (@opt::return) { ::warning("--return ignored as there are no remote --sshlogin.\n"); } elsif (defined $opt::cleanup) { ::warning("--cleanup ignored as there are no remote --sshlogin.\n"); } elsif (@opt::basefile) { ::warning("--basefile ignored as there are no remote --sshlogin.\n"); } } } } sub remote_hosts { # Return sshlogins that are not ':' # Returns: # list of sshlogins with ':' removed return grep !/^:$/, keys %Global::host; } sub setup_basefile { # Transfer basefiles to each $sshlogin # This needs to be done before first jobs on $sshlogin is run # Returns: N/A my $cmd = ""; my $rsync_destdir; my $workdir; for my $sshlogin (values %Global::host) { if($sshlogin->string() eq ":") { next } for my $file (@opt::basefile) { if($file !~ m:^/: and $opt::workdir eq "...") { ::error("Work dir '...' will not work with relative basefiles\n"); ::wait_and_exit(255); } $workdir ||= Job->new("")->workdir(); $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; } } $cmd .= "wait;"; debug("init", "basesetup: $cmd\n"); print `$cmd`; } sub cleanup_basefile { # Remove the basefiles transferred # Returns: N/A my $cmd=""; my $workdir = Job->new("")->workdir(); for my $sshlogin (values %Global::host) { if($sshlogin->string() eq ":") { next } for my $file (@opt::basefile) { $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; } } $cmd .= "wait;"; debug("init", "basecleanup: $cmd\n"); print `$cmd`; } sub filter_hosts { my(@cores, @cpus, @maxline, @echo); my $envvar = ::shell_quote_scalar($Global::envvar); while (my ($host, $sshlogin) = each %Global::host) { if($host eq ":") { next } # The 'true' is used to get the $host out later my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0"); push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0"); push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0"); # 'echo' is used to get the best possible value for an ssh login time push(@echo, $host."\t".$sshcmd." echo\n\0"); } my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); print $fh @cores, @cpus, @maxline, @echo; close $fh; # --timeout 5: Setting up an SSH connection and running a simple # command should never take > 5 sec. # --delay 0.1: If multiple sshlogins use the same proxy the delay # will make it less likely to overload the ssh daemon. # --retries 3: If the ssh daemon it overloaded, try 3 times # -s 16000: Half of the max line on UnixWare my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; ::debug("init", $cmd, "\n"); open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); my $prepend = ""; while(<$host_fh>) { if(/\'$/) { # if last char = ' then append next line # This may be due to quoting of $Global::envvar $prepend .= $_; next; } $_ = $prepend . $_; $prepend = ""; chomp; my @col = split /\t/, $_; if(defined $col[6]) { # This is a line from --joblog # seq host time spent sent received exit signal command # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores if($col[0] eq "Seq" and $col[1] eq "Host" and $col[2] eq "Starttime") { # Header => skip next; } # Get server from: eval true server\; $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); my $host = $1; $host =~ tr/\\//d; $Global::host{$host} or next; if($col[6] eq "255" or $col[7] eq "15") { # exit == 255 or signal == 15: ssh failed # Remove sshlogin ::debug("init", "--filtered $host\n"); push(@down_hosts, $host); @down_hosts = uniq(@down_hosts); } elsif($col[6] eq "127") { # signal == 127: parallel not installed remote # Set ncpus and ncores = 1 ::warning("Could not figure out ", "number of cpus on $host. Using 1.\n"); $ncores{$host} = 1; $ncpus{$host} = 1; $maxlen{$host} = Limits::Command::max_length(); } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { # Remember how log it took to log in # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); } else { ::die_bug("host check unmatched long jobline: $_"); } } elsif($Global::host{$col[0]}) { # This output from --number-of-cores, --number-of-cpus, # --max-line-length-allowed # ncores: server 8 # ncpus: server 2 # maxlen: server 131071 if(not $ncores{$col[0]}) { $ncores{$col[0]} = $col[1]; } elsif(not $ncpus{$col[0]}) { $ncpus{$col[0]} = $col[1]; } elsif(not $maxlen{$col[0]}) { $maxlen{$col[0]} = $col[1]; } elsif(not $echo{$col[0]}) { $echo{$col[0]} = $col[1]; } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { # Skip these: # perl: warning: Setting locale failed. # perl: warning: Please check that your locale settings: # LANGUAGE = (unset), # LC_ALL = (unset), # LANG = "en_US.UTF-8" # are supported and installed on your system. # perl: warning: Falling back to the standard locale ("C"). } else { ::die_bug("host check too many col0: $_"); } } else { ::die_bug("host check unmatched short jobline ($col[0]): $_"); } } close $host_fh; $Global::debug or unlink $tmpfile; delete @Global::host{@down_hosts}; @down_hosts and ::warning("Removed @down_hosts\n"); $Global::minimal_command_line_length = 8_000_000; while (my ($sshlogin, $obj) = each %Global::host) { if($sshlogin eq ":") { next } $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); if($opt::use_cpus_instead_of_cores) { $obj->set_ncpus($ncpus{$sshlogin}); } else { $obj->set_ncpus($ncores{$sshlogin}); } $obj->set_time_to_login($time_to_login{$sshlogin}); $obj->set_maxlength($maxlen{$sshlogin}); $Global::minimal_command_line_length = ::min($Global::minimal_command_line_length, int($maxlen{$sshlogin}/2)); ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus{$sshlogin}, " ncores:", $ncores{$sshlogin}, " time_to_login:", $time_to_login{$sshlogin}, " maxlen:", $maxlen{$sshlogin}, " min_max_len:", $Global::minimal_command_line_length,"\n"); } } sub onall { sub tmp_joblog { my $joblog = shift; if(not defined $joblog) { return undef; } my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); close $fh; return $tmpfile; } my @command = @_; if($Global::quoting) { @command = shell_quote_empty(@command); } # Copy all @fhlist into tempfiles my @argfiles = (); for my $fh (@fhlist) { my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); print $outfh (<$fh>); close $outfh; push @argfiles, $name; } if(@opt::basefile) { setup_basefile(); } # for each sshlogin do: # parallel -S $sshlogin $command :::: @argfiles # # Pass some of the options to the sub-parallels, not all of them as # -P should only go to the first, and -S should not be copied at all. my $options = join(" ", ((defined $opt::jobs) ? "-P $opt::jobs" : ""), ((defined $opt::linebuffer) ? "--linebuffer" : ""), ((defined $opt::ungroup) ? "-u" : ""), ((defined $opt::group) ? "-g" : ""), ((defined $opt::keeporder) ? "--keeporder" : ""), ((defined $opt::D) ? "-D $opt::D" : ""), ((defined $opt::plain) ? "--plain" : ""), ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), ); my $suboptions = join(" ", ((defined $opt::ungroup) ? "-u" : ""), ((defined $opt::linebuffer) ? "--linebuffer" : ""), ((defined $opt::group) ? "-g" : ""), ((defined $opt::files) ? "--files" : ""), ((defined $opt::keeporder) ? "--keeporder" : ""), ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), ((@opt::v) ? "-vv" : ""), ((defined $opt::D) ? "-D $opt::D" : ""), ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), ((defined $opt::plain) ? "--plain" : ""), ((defined $opt::retries) ? "--retries ".$opt::retries : ""), ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), ); ::debug("init", "| $0 $options\n"); open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || ::die_bug("This does not run GNU Parallel: $0 $options"); my @joblogs; for my $host (sort keys %Global::host) { my $sshlogin = $Global::host{$host}; my $joblog = tmp_joblog($opt::joblog); if($joblog) { push @joblogs, $joblog; $joblog = "--joblog $joblog"; } my $quad = $opt::arg_file_sep || "::::"; ::debug("init", "$0 $suboptions -j1 $joblog ", ((defined $opt::tag) ? "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), " -S ", shell_quote_scalar($sshlogin->string())," ", join(" ",shell_quote(@command))," $quad @argfiles\n"); print $parallel_fh "$0 $suboptions -j1 $joblog ", ((defined $opt::tag) ? "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), " -S ", shell_quote_scalar($sshlogin->string())," ", join(" ",shell_quote(@command))," $quad @argfiles\n"; } close $parallel_fh; $Global::exitstatus = $? >> 8; debug("init", "--onall exitvalue ", $?); if(@opt::basefile) { cleanup_basefile(); } $Global::debug or unlink(@argfiles); my %seen; for my $joblog (@joblogs) { # Append to $joblog open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); # Skip first line (header); <$fh>; print $Global::joblog (<$fh>); close $fh; unlink($joblog); } } sub __SIGNAL_HANDLING__ {} sub save_original_signal_handler { # Remember the original signal handler # Returns: N/A $SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X $SIG{INT} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } unlink keys %Global::unlink; exit -1 }; $SIG{TERM} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } unlink keys %Global::unlink; exit -1 }; %Global::original_sig = %SIG; $SIG{TERM} = sub {}; # Dummy until jobs really start } sub list_running_jobs { # Returns: N/A for my $v (values %Global::running) { print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n"; } } sub start_no_new_jobs { # Returns: N/A $SIG{TERM} = $Global::original_sig{TERM}; print $Global::original_stderr ("$Global::progname: SIGTERM received. No new jobs will be started.\n", "$Global::progname: Waiting for these ", scalar(keys %Global::running), " jobs to finish. Send SIGTERM again to stop now.\n"); list_running_jobs(); $Global::start_no_new_jobs ||= 1; } sub reaper { # A job finished. # Print the output. # Start another job # Returns: N/A my $stiff; my $children_reaped = 0; debug("run", "Reaper "); while (($stiff = waitpid(-1, &WNOHANG)) > 0) { $children_reaped++; if($Global::sshmaster{$stiff}) { # This is one of the ssh -M: ignore next; } my $job = $Global::running{$stiff}; # '-a <(seq 10)' will give us a pid not in %Global::running $job or next; $job->set_exitstatus($? >> 8); $job->set_exitsignal($? & 127); debug("run", "died (", $job->exitstatus(), "): ", $job->seq()); $job->set_endtime(::now()); if($stiff == $Global::tty_taken) { # The process that died had the tty => release it $Global::tty_taken = 0; } if(not $job->should_be_retried()) { # The job is done # Free the jobslot push @Global::slots, $job->slot(); if($opt::timeout) { # Update average runtime for timeout $Global::timeoutq->update_delta_time($job->runtime()); } # Force printing now if the job failed and we are going to exit my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2 and $job->exitstatus()); if($opt::keeporder and not $print_now) { print_earlier_jobs($job); } else { $job->print(); } if($job->exitstatus()) { process_failed_job($job); } } my $sshlogin = $job->sshlogin(); $sshlogin->dec_jobs_running(); $sshlogin->inc_jobs_completed(); $Global::total_running--; delete $Global::running{$stiff}; start_more_jobs(); } debug("run", "done "); return $children_reaped; } sub process_failed_job { # The jobs had a exit status <> 0, so error # Returns: N/A my $job = shift; $Global::exitstatus++; $Global::total_failed++; if($opt::halt_on_error) { if($opt::halt_on_error == 1 or ($opt::halt_on_error < 1 and $Global::total_failed > 3 and $Global::total_failed / $Global::total_started > $opt::halt_on_error)) { # If halt on error == 1 or --halt 10% # we should gracefully exit print $Global::original_stderr ("$Global::progname: Starting no more jobs. ", "Waiting for ", scalar(keys %Global::running), " jobs to finish. This job failed:\n", $job->replaced(),"\n"); $Global::start_no_new_jobs ||= 1; $Global::halt_on_error_exitstatus = $job->exitstatus(); } elsif($opt::halt_on_error == 2) { # If halt on error == 2 we should exit immediately print $Global::original_stderr ("$Global::progname: This job failed:\n", $job->replaced(),"\n"); exit ($job->exitstatus()); } } } { my (%print_later,$job_end_sequence); sub print_earlier_jobs { # Print jobs completed earlier # Returns: N/A my $job = shift; $print_later{$job->seq()} = $job; $job_end_sequence ||= 1; debug("run", "Looking for: $job_end_sequence ", "Current: ", $job->seq(), "\n"); for(my $j = $print_later{$job_end_sequence}; $j or vec($Global::job_already_run,$job_end_sequence,1); $job_end_sequence++, $j = $print_later{$job_end_sequence}) { debug("run", "Found job end $job_end_sequence"); if($j) { $j->print(); delete $print_later{$job_end_sequence}; } } } } sub __USAGE__ {} sub wait_and_exit { # If we do not wait, we sometimes get segfault # Returns: N/A my $error = shift; if($error) { # Kill all without printing for my $job (values %Global::running) { $job->kill("TERM"); $job->kill("TERM"); } } for (keys %Global::unkilled_children) { kill 9, $_; waitpid($_,0); delete $Global::unkilled_children{$_}; } wait(); exit($error); } sub die_usage { # Returns: N/A usage(); wait_and_exit(255); } sub usage { # Returns: N/A print join ("\n", "Usage:", "", "$Global::progname [options] [command [arguments]] < list_of_arguments", "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", "cat ... | $Global::progname --pipe [options] [command [arguments]]", "", "-j n Run n jobs in parallel", "-k Keep same order", "-X Multiple arguments with context replace", "--colsep regexp Split input on regexp for positional replacements", "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", "", "-S sshlogin Example: foo\@server.example.com", "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", "--onall Run the given command with argument on all sshlogins", "--nonall Run the given command with no arguments on all sshlogins", "", "--pipe Split stdin (standard input) to multiple jobs.", "--recend str Record end separator for --pipe.", "--recstart str Record start separator for --pipe.", "", "See 'man $Global::progname' for details", "", "When using programs that use GNU Parallel to process data for publication please cite:", "", "O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", ";login: The USENIX Magazine, February 2011:42-47.", "", "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", ""); } sub citation_notice { # if --no-notice or --plain: do nothing # if stderr redirected: do nothing # if ~/.parallel/will-cite: do nothing # else: print citation notice to stderr if($opt::no_notice or $opt::plain or not -t $Global::original_stderr or -e $ENV{'HOME'}."/.parallel/will-cite") { # skip } else { print $Global::original_stderr ("When using programs that use GNU Parallel to process data for publication please cite:\n", "\n", " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n", " ;login: The USENIX Magazine, February 2011:42-47.\n", "\n", "This helps funding further development; and it won't cost you a cent.\n", "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", "\n", "To silence this citation notice run 'parallel --bibtex' once or use '--no-notice'.\n\n", ); flush $Global::original_stderr; } } sub warning { my @w = @_; my $fh = $Global::original_stderr || *STDERR; my $prog = $Global::progname || "parallel"; print $fh $prog, ": Warning: ", @w; } sub error { my @w = @_; my $fh = $Global::original_stderr || *STDERR; my $prog = $Global::progname || "parallel"; print $fh $prog, ": Error: ", @w; } sub die_bug { my $bugid = shift; print STDERR ("$Global::progname: This should not happen. You have found a bug.\n", "Please contact and include:\n", "* The version number: $Global::version\n", "* The bugid: $bugid\n", "* The command line being run\n", "* The files being read (put the files on a webserver if they are big)\n", "\n", "If you get the error on smaller/fewer files, please include those instead.\n"); ::wait_and_exit(255); } sub version { # Returns: N/A if($opt::tollef and not $opt::gnu) { print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; } print join("\n", "GNU $Global::progname $Global::version", "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and Free Software Foundation, Inc.", "License GPLv3+: GNU GPL version 3 or later ", "This is free software: you are free to change and redistribute it.", "GNU $Global::progname comes with no warranty.", "", "Web site: http://www.gnu.org/software/${Global::progname}\n", "When using programs that use GNU Parallel to process data for publication please cite:\n", "O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ", ";login: The USENIX Magazine, February 2011:42-47.\n", "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", ); } sub bibtex { # Returns: N/A if($opt::tollef and not $opt::gnu) { print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; } print join("\n", "When using programs that use GNU Parallel to process data for publication please cite:", "", "\@article{Tange2011a,", " title = {GNU Parallel - The Command-Line Power Tool},", " author = {O. Tange},", " address = {Frederiksberg, Denmark},", " journal = {;login: The USENIX Magazine},", " month = {Feb},", " number = {1},", " volume = {36},", " url = {http://www.gnu.org/s/parallel},", " year = {2011},", " pages = {42-47}", "}", "", "(Feel free to use \\nocite{Tange2011a})", "", "This helps funding further development.", "", "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", "" ); while(not -e $ENV{'HOME'}."/.parallel/will-cite") { print "\nType: 'will cite' and press enter.\n> "; my $input = ; if($input =~ /will cite/i) { mkdir $ENV{'HOME'}."/.parallel"; open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite") || ::die_bug("Cannot write: ".$ENV{'HOME'}."/.parallel/will-cite"); close $fh; print "\nThank you for your support. It is much appreciated. The citation\n", "notice is now silenced.\n"; } } } sub show_limits { # Returns: N/A print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", "Maximal used size of command: ",Limits::Command::max_length(),"\n", "\n", "Execution of will continue now, and it will try to read its input\n", "and run commands; if this is not what you wanted to happen, please\n", "press CTRL-D or CTRL-C\n"); } sub __GENERIC_COMMON_FUNCTION__ {} sub uniq { # Remove duplicates and return unique values return keys %{{ map { $_ => 1 } @_ }}; } sub min { # Returns: # Minimum value of array my $min; for (@_) { # Skip undefs defined $_ or next; defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef $min = ($min < $_) ? $min : $_; } return $min; } sub max { # Returns: # Maximum value of array my $max; for (@_) { # Skip undefs defined $_ or next; defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef $max = ($max > $_) ? $max : $_; } return $max; } sub sum { # Returns: # Sum of values of array my @args = @_; my $sum = 0; for (@args) { # Skip undefs $_ and do { $sum += $_; } } return $sum; } sub undef_as_zero { my $a = shift; return $a ? $a : 0; } sub undef_as_empty { my $a = shift; return $a ? $a : ""; } { my $hostname; sub hostname { if(not $hostname) { $hostname = `hostname`; chomp($hostname); $hostname ||= "nohostname"; } return $hostname; } } sub which { # Input: # @programs = programs to find the path to # Returns: # @full_path = full paths to @programs. Nothing if not found my @which; for my $prg (@_) { push @which, map { $_."/".$prg } grep { -x $_."/".$prg } split(":",$ENV{'PATH'}); } return @which; } { my ($regexp,%fakename); sub parent_shell { # Input: # $pid = pid to see if (grand)*parent is a shell # Returns: # $shellpath = path to shell - undef if no shell found my $pid = shift; if(not $regexp) { # All shells known to mankind # # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh # posh rbash rush rzsh sash sh static-sh tcsh yash zsh my @shells = qw(ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh posh rbash rush rzsh sash sh static-sh tcsh yash zsh -sh -csh); # Can be formatted as: # [sh] -sh sh busybox sh # /bin/sh /sbin/sh /opt/csw/sh # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh my $shell = "(?:".join("|",@shells).")"; $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| )'; %fakename = ( # csh and tcsh disguise themselves as -sh/-csh "-sh" => ["csh", "tcsh"], "-csh" => ["tcsh", "csh"], ); } my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); my $shellpath; my $testpid = $pid; while($testpid) { ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); if($name_of_ref->{$testpid} =~ /$regexp/o) { ::debug("init", "which ".($3||$6)." => "); $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; ::debug("init", "shell path $shellpath\n"); $shellpath and last; } $testpid = $parent_of_ref->{$testpid}; } return $shellpath; } } { my %pid_parentpid_cmd; sub pid_table { # Returns: # %children_of = { pid -> children of pid } # %parent_of = { pid -> pid of parent } # %name_of = { pid -> commandname } if(not %pid_parentpid_cmd) { # Filter for SysV-style `ps` my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). q(s/^.{$s}//; print "@F[1,2] $_"' ); # BSD-style `ps` my $bsd = q(ps -o pid,ppid,command -ax); %pid_parentpid_cmd = ( 'aix' => $sysv, 'cygwin' => $sysv, 'msys' => $sysv, 'dec_osf' => $sysv, 'darwin' => $bsd, 'dragonfly' => $bsd, 'freebsd' => $bsd, 'gnu' => $sysv, 'hpux' => $sysv, 'linux' => $sysv, 'mirbsd' => $bsd, 'netbsd' => $bsd, 'nto' => $sysv, 'openbsd' => $bsd, 'solaris' => $sysv, 'svr5' => $sysv, ); } $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); my (@pidtable,%parent_of,%children_of,%name_of); # Table with pid -> children of pid @pidtable = `$pid_parentpid_cmd{$^O}`; my $p=$$; for (@pidtable) { # must match: 24436 21224 busybox ash /(\S+)\s+(\S+)\s+(\S+.*)/ or ::die_bug("pidtable format: $_"); $parent_of{$1} = $2; push @{$children_of{$2}}, $1; $name_of{$1} = $3; } return(\%children_of, \%parent_of, \%name_of); } } sub reap_usleep { # Reap dead children. # If no dead children: Sleep specified amount with exponential backoff # Input: # $ms = milliseconds to sleep # Returns: # $ms/2+0.001 if children reaped # $ms*1.1 if no children reaped my $ms = shift; if(reaper()) { # Sleep exponentially shorter (1/2^n) if a job finished return $ms/2+0.001; } else { if($opt::timeout) { $Global::timeoutq->process_timeouts(); } usleep($ms); Job::exit_if_disk_full(); if($opt::linebuffer) { for my $job (values %Global::running) { $job->print(); } } # Sleep exponentially longer (1.1^n) if a job did not finish # though at most 1000 ms. return (($ms < 1000) ? ($ms * 1.1) : ($ms)); } } sub usleep { # Sleep this many milliseconds. # Input: # $ms = milliseconds to sleep my $ms = shift; ::debug(int($ms),"ms "); select(undef, undef, undef, $ms/1000); } sub now { # Returns time since epoch as in seconds with 3 decimals # Uses: # @Global::use # Returns: # $time = time now with millisecond accuracy if(not $Global::use{"Time::HiRes"}) { if(eval "use Time::HiRes qw ( time );") { eval "sub TimeHiRestime { return Time::HiRes::time };"; } else { eval "sub TimeHiRestime { return time() };"; } $Global::use{"Time::HiRes"} = 1; } return (int(TimeHiRestime()*1000))/1000; } sub multiply_binary_prefix { # Evalualte numbers with binary prefix # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 # 13G = 13*1024*1024*1024 = 13958643712 # Input: # $s = string with prefixes # Returns: # $value = int with prefixes multiplied my $s = shift; $s =~ s/ki/*1024/gi; $s =~ s/mi/*1024*1024/gi; $s =~ s/gi/*1024*1024*1024/gi; $s =~ s/ti/*1024*1024*1024*1024/gi; $s =~ s/pi/*1024*1024*1024*1024*1024/gi; $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; $s =~ s/K/*1024/g; $s =~ s/M/*1024*1024/g; $s =~ s/G/*1024*1024*1024/g; $s =~ s/T/*1024*1024*1024*1024/g; $s =~ s/P/*1024*1024*1024*1024*1024/g; $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; $s =~ s/k/*1000/g; $s =~ s/m/*1000*1000/g; $s =~ s/g/*1000*1000*1000/g; $s =~ s/t/*1000*1000*1000*1000/g; $s =~ s/p/*1000*1000*1000*1000*1000/g; $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; $s = eval $s; ::debug($s); return $s; } sub tmpfile { # Create tempfile as $TMPDIR/parXXXXX # Returns: # $filename = file name created return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); } sub __DEBUGGING__ {} sub debug { # Uses: # $Global::debug # %Global::fd # Returns: N/A $Global::debug or return; @_ = grep { defined $_ ? $_ : "" } @_; if($Global::debug eq "all" or $Global::debug eq $_[0]) { if($Global::fd{1}) { # Original stdout was saved my $stdout = $Global::fd{1}; print $stdout @_[1..$#_]; } else { print @_[1..$#_]; } } } sub my_memory_usage { # Returns: # memory usage if found # 0 otherwise use strict; use FileHandle; my $pid = $$; if(-e "/proc/$pid/stat") { my $fh = FileHandle->new("; chomp $data; $fh->close; my @procinfo = split(/\s+/,$data); return undef_as_zero($procinfo[22]); } else { return 0; } } sub my_size { # Returns: # $size = size of object if Devel::Size is installed # -1 otherwise my @size_this = (@_); eval "use Devel::Size qw(size total_size)"; if ($@) { return -1; } else { return total_size(@_); } } sub my_dump { # Returns: # ascii expression of object if Data::Dump(er) is installed # error code otherwise my @dump_this = (@_); eval "use Data::Dump qw(dump);"; if ($@) { # Data::Dump not installed eval "use Data::Dumper;"; if ($@) { my $err = "Neither Data::Dump nor Data::Dumper is installed\n". "Not dumping output\n"; print $Global::original_stderr $err; return $err; } else { return Dumper(@dump_this); } } else { # Create a dummy Data::Dump:dump as Hans Schou sometimes has # it undefined eval "sub Data::Dump:dump {}"; eval "use Data::Dump qw(dump);"; return (Data::Dump::dump(@dump_this)); } } sub my_croak { eval "use Carp; 1"; $Carp::Verbose = 1; croak(@_); } sub my_carp { eval "use Carp; 1"; $Carp::Verbose = 1; carp(@_); } sub __OBJECT_ORIENTED_PARTS__ {} package SSHLogin; sub new { my $class = shift; my $sshlogin_string = shift; my $ncpus; my %hostgroups; # SSHLogins can have these formats: # @grp+grp/ncpu//usr/bin/ssh user@server # ncpu//usr/bin/ssh user@server # /usr/bin/ssh user@server # user@server # ncpu/user@server # @grp+grp/user@server if($sshlogin_string =~ s:^\@([^/]+)/?::) { # Look for SSHLogin hostgroups %hostgroups = map { $_ => 1 } split(/\+/, $1); } if ($sshlogin_string =~ s:^(\d+)/::) { # Override default autodetected ncpus unless missing $ncpus = $1; } my $string = $sshlogin_string; # An SSHLogin is always in the hostgroup of its $string-name $hostgroups{$string} = 1; @Global::hostgroups{keys %hostgroups} = values %hostgroups; my @unget = (); my $no_slash_string = $string; $no_slash_string =~ s/[^-a-z0-9:]/_/gi; return bless { 'string' => $string, 'jobs_running' => 0, 'jobs_completed' => 0, 'maxlength' => undef, 'max_jobs_running' => undef, 'orig_max_jobs_running' => undef, 'ncpus' => $ncpus, 'hostgroups' => \%hostgroups, 'sshcommand' => undef, 'serverlogin' => undef, 'control_path_dir' => undef, 'control_path' => undef, 'time_to_login' => undef, 'last_login_at' => undef, 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . $no_slash_string, 'loadavg' => undef, 'last_loadavg_update' => 0, 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . $no_slash_string, 'swap_activity' => undef, }, ref($class) || $class; } sub DESTROY { my $self = shift; # Remove temporary files if they are created. unlink $self->{'loadavg_file'}; unlink $self->{'swap_activity_file'}; } sub string { my $self = shift; return $self->{'string'}; } sub jobs_running { my $self = shift; return ($self->{'jobs_running'} || "0"); } sub inc_jobs_running { my $self = shift; $self->{'jobs_running'}++; } sub dec_jobs_running { my $self = shift; $self->{'jobs_running'}--; } sub set_maxlength { my $self = shift; $self->{'maxlength'} = shift; } sub maxlength { my $self = shift; return $self->{'maxlength'}; } sub jobs_completed { my $self = shift; return $self->{'jobs_completed'}; } sub in_hostgroups { # Input: # @hostgroups = the hostgroups to look for # Returns: # true if intersection of @hostgroups and the hostgroups of this # SSHLogin is non-empty my $self = shift; return grep { defined $self->{'hostgroups'}{$_} } @_; } sub hostgroups { my $self = shift; return keys %{$self->{'hostgroups'}}; } sub inc_jobs_completed { my $self = shift; $self->{'jobs_completed'}++; } sub set_max_jobs_running { my $self = shift; if(defined $self->{'max_jobs_running'}) { $Global::max_jobs_running -= $self->{'max_jobs_running'}; } $self->{'max_jobs_running'} = shift; if(defined $self->{'max_jobs_running'}) { # max_jobs_running could be resat if -j is a changed file $Global::max_jobs_running += $self->{'max_jobs_running'}; } # Initialize orig to the first non-zero value that comes around $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; } sub swapping { my $self = shift; my $swapping = $self->swap_activity(); return (not defined $swapping or $swapping) } sub swap_activity { # If the currently known swap activity is too old: # Recompute a new one in the background # Returns: # last swap activity computed my $self = shift; # Should we update the swap_activity file? my $update_swap_activity_file = 0; if(-r $self->{'swap_activity_file'}) { open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); my $swap_out = <$swap_fh>; close $swap_fh; if($swap_out =~ /^(\d+)$/) { $self->{'swap_activity'} = $1; ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); } ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); if(time - $self->{'last_swap_activity_update'} > 10) { # last swap activity update was started 10 seconds ago ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); $update_swap_activity_file = 1; } } else { ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); $self->{'swap_activity'} = undef; $update_swap_activity_file = 1; } if($update_swap_activity_file) { ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); $self->{'last_swap_activity_update'} = time; -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; my $swap_activity; $swap_activity = swapactivityscript(); if($self->{'string'} ne ":") { $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . ::shell_quote_scalar($swap_activity); } # Run swap_activity measuring. # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file my $file = $self->{'swap_activity_file'}; my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); ::debug("swap", "\n", $swap_activity, "\n"); qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; } return $self->{'swap_activity'}; } { my $script; sub swapactivityscript { # Returns: # shellscript for detecting swap activity # # arguments for vmstat are OS dependant # swap_in and swap_out are in different columns depending on OS # if(not $script) { my %vmstat = ( # linux: $7*$8 # $ vmstat 1 2 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- # r b swpd free buff cache si so bi bo in cs us sy id wa # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], # solaris: $6*$7 # $ vmstat -S 1 2 # kthr memory page disk faults cpu # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], # darwin (macosx): $21*$22 # $ vm_stat -c 2 1 # Mach Virtual Memory Statistics: (page size of 4096 bytes) # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], # ultrix: $12*$13 # $ vmstat -S 1 2 # procs faults cpu memory page disk # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], # aix: $6*$7 # $ vmstat 1 2 # System configuration: lcpu=1 mem=2048MB # # kthr memory page faults cpu # ----- ----------- ------------------------ ------------ ----------- # r b avm fre re pi po fr sr cy in sy cs us sy id wa # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], # freebsd: $8*$9 # $ vmstat -H 1 2 # procs memory page disks faults cpu # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], # mirbsd: $8*$9 # $ vmstat 1 2 # procs memory page disks traps cpu # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], # netbsd: $7*$8 # $ vmstat 1 2 # procs memory page disks faults cpu # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], # openbsd: $8*$9 # $ vmstat 1 2 # procs memory page disks traps cpu # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], # hpux: $8*$9 # $ vmstat 1 2 # procs memory page faults cpu # r b w avm free re at pi po fr de sr in sy cs us sy id # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], # dec_osf (tru64): $11*$12 # $ vmstat 1 2 # Virtual Memory Statistics: (pagesize = 8192) # procs memory pages intr cpu # r w u act free wire fault cow zero react pin pout in sy cs us sy id # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], # gnu (hurd): $7*$8 # $ vmstat -k 1 2 # (pagesize: 4, size: 512288, swap size: 894972) # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], # -nto (qnx has no swap) #-irix #-svr5 (scosysv) ); my $perlscript = ""; for my $os (keys %vmstat) { #q[ { vmstat 1 2 2> /dev/null || vmstat -c 1 2; } | ]. # q[ awk 'NR!=4{next} NF==17||NF==16{print $7*$8} NF==22{print $21*$22} {exit}' ]; $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . $vmstat{$os}[1] . '}"` }'; } $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); $script = $Global::envvar. " " .$perlscript; } return $script; } } sub too_fast_remote_login { my $self = shift; if($self->{'last_login_at'} and $self->{'time_to_login'}) { # sshd normally allows 10 simultaneous logins # A login takes time_to_login # So time_to_login/5 should be safe # If now <= last_login + time_to_login/5: Then it is too soon. my $too_fast = (::now() <= $self->{'last_login_at'} + $self->{'time_to_login'}/5); ::debug("run", "Too fast? $too_fast "); return $too_fast; } else { # No logins so far (or time_to_login not computed): it is not too fast return 0; } } sub last_login_at { my $self = shift; return $self->{'last_login_at'}; } sub set_last_login_at { my $self = shift; $self->{'last_login_at'} = shift; } sub loadavg_too_high { my $self = shift; my $loadavg = $self->loadavg(); return (not defined $loadavg or $loadavg > $self->max_loadavg()); } sub loadavg { # If the currently know loadavg is too old: # Recompute a new one in the background # The load average is computed as the number of processes waiting for disk # or CPU right now. So it is the server load this instant and not averaged over # several minutes. This is needed so GNU Parallel will at most start one job # that will push the load over the limit. # # Returns: # $last_loadavg = last load average computed (undef if none) my $self = shift; # Should we update the loadavg file? my $update_loadavg_file = 0; if(open(my $load_fh, "<", $self->{'loadavg_file'})) { local $/ = undef; my $load_out = <$load_fh>; close $load_fh; my $load =()= ($load_out=~/(^[DR]....[^\[])/gm); if($load > 0) { # load is overestimated by 1 $self->{'loadavg'} = $load - 1; ::debug("load", "New loadavg: ", $self->{'loadavg'}); } else { ::die_bug("loadavg_invalid_content: $load_out"); } ::debug("load", "Last update: ", $self->{'last_loadavg_update'}); if(time - $self->{'last_loadavg_update'} > 10) { # last loadavg was started 10 seconds ago ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ", $self->{'loadavg_file'}); $update_loadavg_file = 1; } } else { ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); $self->{'loadavg'} = undef; $update_loadavg_file = 1; } if($update_loadavg_file) { ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); $self->{'last_loadavg_update'} = time; -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; my $cmd = ""; if($self->{'string'} ne ":") { $cmd = $self->sshcommand() . " " . $self->serverlogin() . " "; } # TODO Is is called 'ps ax -o state,command' on other platforms? $cmd .= "ps ax -o state,command"; # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file my $file = $self->{'loadavg_file'}; my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; } return $self->{'loadavg'}; } sub max_loadavg { my $self = shift; # If --load is a file it might be changed if($Global::max_load_file) { my $mtime = (stat($Global::max_load_file))[9]; if($mtime > $Global::max_load_file_last_mod) { $Global::max_load_file_last_mod = $mtime; for my $sshlogin (values %Global::host) { $sshlogin->set_max_loadavg(undef); } } } if(not defined $self->{'max_loadavg'}) { $self->{'max_loadavg'} = $self->compute_max_loadavg($opt::load); } ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); return $self->{'max_loadavg'}; } sub set_max_loadavg { my $self = shift; $self->{'max_loadavg'} = shift; } sub compute_max_loadavg { # Parse the max loadaverage that the user asked for using --load # Returns: # max loadaverage my $self = shift; my $loadspec = shift; my $load; if(defined $loadspec) { if($loadspec =~ /^\+(\d+)$/) { # E.g. --load +2 my $j = $1; $load = $self->ncpus() + $j; } elsif ($loadspec =~ /^-(\d+)$/) { # E.g. --load -2 my $j = $1; $load = $self->ncpus() - $j; } elsif ($loadspec =~ /^(\d+)\%$/) { my $j = $1; $load = $self->ncpus() * $j / 100; } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { $load = $1; } elsif (-f $loadspec) { $Global::max_load_file = $loadspec; $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; if(open(my $in_fh, "<", $Global::max_load_file)) { my $opt_load_file = join("",<$in_fh>); close $in_fh; $load = $self->compute_max_loadavg($opt_load_file); } else { print $Global::original_stderr "Cannot open $loadspec\n"; ::wait_and_exit(255); } } else { print $Global::original_stderr "Parsing of --load failed\n"; ::die_usage(); } if($load < 0.01) { $load = 0.01; } } return $load; } sub time_to_login { my $self = shift; return $self->{'time_to_login'}; } sub set_time_to_login { my $self = shift; $self->{'time_to_login'} = shift; } sub max_jobs_running { my $self = shift; if(not defined $self->{'max_jobs_running'}) { my $nproc = $self->compute_number_of_processes($opt::jobs); $self->set_max_jobs_running($nproc); } return $self->{'max_jobs_running'}; } sub orig_max_jobs_running { my $self = shift; return $self->{'orig_max_jobs_running'}; } sub compute_number_of_processes { # Number of processes wanted and limited by system resources # Returns: # Number of processes my $self = shift; my $opt_P = shift; my $wanted_processes = $self->user_requested_processes($opt_P); if(not defined $wanted_processes) { $wanted_processes = $Global::default_simultaneous_sshlogins; } ::debug("load", "Wanted procs: $wanted_processes\n"); my $system_limit = $self->processes_available_by_system_limit($wanted_processes); ::debug("load", "Limited to procs: $system_limit\n"); return $system_limit; } sub processes_available_by_system_limit { # If the wanted number of processes is bigger than the system limits: # Limit them to the system limits # Limits are: File handles, number of input lines, processes, # and taking > 1 second to spawn 10 extra processes # Returns: # Number of processes my $self = shift; my $wanted_processes = shift; my $system_limit = 0; my @jobs = (); my $job; my @args = (); my $arg; my $more_filehandles = 1; my $max_system_proc_reached = 0; my $slow_spawining_warning_printed = 0; my $time = time; my %fh; my @children; # Reserve filehandles # perl uses 7 filehandles for something? # parallel uses 1 for memory_usage # parallel uses 4 for ? for my $i (1..12) { open($fh{"init-$i"}, "<", "/dev/null"); } for(1..2) { # System process limit my $child; if($child = fork()) { push (@children,$child); $Global::unkilled_children{$child} = 1; } elsif(defined $child) { # The child takes one process slot # It will be killed later $SIG{TERM} = $Global::original_sig{TERM}; sleep 10000000; exit(0); } else { $max_system_proc_reached = 1; } } my $count_jobs_already_read = $Global::JobQueue->next_seq(); my $wait_time_for_getting_args = 0; my $start_time = time; while(1) { $system_limit >= $wanted_processes and last; not $more_filehandles and last; $max_system_proc_reached and last; my $before_getting_arg = time; if($Global::semaphore or $opt::pipe) { # Skip: No need to get args } elsif(defined $opt::retries and $count_jobs_already_read) { # For retries we may need to run all jobs on this sshlogin # so include the already read jobs for this sshlogin $count_jobs_already_read--; } else { if($opt::X or $opt::m) { # The arguments may have to be re-spread over several jobslots # So pessimistically only read one arg per jobslot # instead of a full commandline if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { if($Global::JobQueue->empty()) { last; } else { ($job) = $Global::JobQueue->get(); push(@jobs, $job); } } else { ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); push(@args, $arg); } } else { # If there are no more command lines, then we have a process # per command line, so no need to go further $Global::JobQueue->empty() and last; ($job) = $Global::JobQueue->get(); push(@jobs, $job); } } $wait_time_for_getting_args += time - $before_getting_arg; $system_limit++; # Every simultaneous process uses 2 filehandles when grouping # Every simultaneous process uses 2 filehandles when compressing $more_filehandles = open($fh{$system_limit*10}, "<", "/dev/null") && open($fh{$system_limit*10+2}, "<", "/dev/null") && open($fh{$system_limit*10+3}, "<", "/dev/null") && open($fh{$system_limit*10+4}, "<", "/dev/null"); # System process limit my $child; if($child = fork()) { push (@children,$child); $Global::unkilled_children{$child} = 1; } elsif(defined $child) { # The child takes one process slot # It will be killed later $SIG{TERM} = $Global::original_sig{TERM}; sleep 10000000; exit(0); } else { $max_system_proc_reached = 1; } my $forktime = time - $time - $wait_time_for_getting_args; ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", $forktime, " (processes so far: ", $system_limit,")\n"); if($system_limit > 10 and $forktime > 1 and $forktime > $system_limit * 0.01 and not $slow_spawining_warning_printed) { # It took more than 0.01 second to fork a processes on avg. # Give the user a warning. He can press Ctrl-C if this # sucks. print $Global::original_stderr ("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n", "Consider adjusting -j. Press CTRL-C to stop.\n"); $slow_spawining_warning_printed = 1; } } # Cleanup: Close the files for (values %fh) { close $_ } # Cleanup: Kill the children for my $pid (@children) { kill 9, $pid; waitpid($pid,0); delete $Global::unkilled_children{$pid}; } # Cleanup: Unget the command_lines or the @args $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); $Global::JobQueue->unget(@jobs); if($system_limit < $wanted_processes) { # The system_limit is less than the wanted_processes if($system_limit < 1 and not $Global::JobQueue->empty()) { ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n", "or /proc/sys/kernel/pid_max may help.\n"); ::wait_and_exit(255); } if(not $more_filehandles) { ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n", "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ", "raising ulimit -n or /etc/security/limits.conf may help.\n"); } if($max_system_proc_reached) { ::warning("Only enough available processes to run ", $system_limit, " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n", "or /proc/sys/kernel/pid_max may help.\n"); } } if($] == 5.008008 and $system_limit > 1000) { # https://savannah.gnu.org/bugs/?36942 $system_limit = 1000; } if($Global::JobQueue->empty()) { $system_limit ||= 1; } if($self->string() ne ":" and $system_limit > $Global::default_simultaneous_sshlogins) { $system_limit = $self->simultaneous_sshlogin_limit($system_limit); } return $system_limit; } sub simultaneous_sshlogin_limit { # Test by logging in wanted number of times simultaneously # Returns: # min($wanted_processes,$working_simultaneous_ssh_logins-1) my $self = shift; my $wanted_processes = shift; if($self->{'time_to_login'}) { return $wanted_processes; } # Try twice because it guesses wrong sometimes # Choose the minimal my $ssh_limit = ::min($self->simultaneous_sshlogin($wanted_processes), $self->simultaneous_sshlogin($wanted_processes)); if($ssh_limit < $wanted_processes) { my $serverlogin = $self->serverlogin(); ::warning("ssh to $serverlogin only allows ", "for $ssh_limit simultaneous logins.\n", "You may raise this by changing ", "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n", "Using only ",$ssh_limit-1," connections ", "to avoid race conditions.\n"); } # Race condition can cause problem if using all sshs. if($ssh_limit > 1) { $ssh_limit -= 1; } return $ssh_limit; } sub simultaneous_sshlogin { # Using $sshlogin try to see if we can do $wanted_processes # simultaneous logins # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l # Returns: # Number of succesful logins my $self = shift; my $wanted_processes = shift; my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin &1 &"x$wanted_processes; ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or ::die_bug("simultaneouslogin"); my $ssh_limit = <$simul_fh>; close $simul_fh; chomp $ssh_limit; return $ssh_limit; } sub set_ncpus { my $self = shift; $self->{'ncpus'} = shift; } sub user_requested_processes { # Parse the number of processes that the user asked for using -j # Returns: # the number of processes to run on this sshlogin my $self = shift; my $opt_P = shift; my $processes; if(defined $opt_P) { if($opt_P =~ /^\+(\d+)$/) { # E.g. -P +2 my $j = $1; $processes = $self->ncpus() + $j; } elsif ($opt_P =~ /^-(\d+)$/) { # E.g. -P -2 my $j = $1; $processes = $self->ncpus() - $j; } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { # E.g. -P 10.5% my $j = $1; $processes = $self->ncpus() * $j / 100; } elsif ($opt_P =~ /^(\d+)$/) { $processes = $1; if($processes == 0) { # -P 0 = infinity (or at least close) $processes = $Global::infinity; } } elsif (-f $opt_P) { $Global::max_procs_file = $opt_P; $Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9]; if(open(my $in_fh, "<", $Global::max_procs_file)) { my $opt_P_file = join("",<$in_fh>); close $in_fh; $processes = $self->user_requested_processes($opt_P_file); } else { ::error("Cannot open $opt_P.\n"); ::wait_and_exit(255); } } else { ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n"); ::die_usage(); } $processes = ::ceil($processes); } return $processes; } sub ncpus { my $self = shift; if(not defined $self->{'ncpus'}) { my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); if($serverlogin eq ":") { if($opt::use_cpus_instead_of_cores) { $self->{'ncpus'} = no_of_cpus(); } else { $self->{'ncpus'} = no_of_cores(); } } else { my $ncpu; my $sqe = ::shell_quote_scalar($Global::envvar); if($opt::use_cpus_instead_of_cores) { $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus); } else { ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n)); $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores); } chomp $ncpu; if($ncpu =~ /^\s*[0-9]+\s*$/s) { $self->{'ncpus'} = $ncpu; } else { ::warning("Could not figure out ", "number of cpus on $serverlogin ($ncpu). Using 1.\n"); $self->{'ncpus'} = 1; } } } return $self->{'ncpus'}; } sub no_of_cpus { # Returns: # Number of physical CPUs local $/="\n"; # If delimiter is set, then $/ will be wrong my $no_of_cpus; if ($^O eq 'linux') { $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); } elsif ($^O eq 'freebsd') { $no_of_cpus = no_of_cpus_freebsd(); } elsif ($^O eq 'netbsd') { $no_of_cpus = no_of_cpus_netbsd(); } elsif ($^O eq 'openbsd') { $no_of_cpus = no_of_cpus_openbsd(); } elsif ($^O eq 'gnu') { $no_of_cpus = no_of_cpus_hurd(); } elsif ($^O eq 'darwin') { $no_of_cpus = no_of_cpus_darwin(); } elsif ($^O eq 'solaris') { $no_of_cpus = no_of_cpus_solaris(); } elsif ($^O eq 'aix') { $no_of_cpus = no_of_cpus_aix(); } elsif ($^O eq 'hpux') { $no_of_cpus = no_of_cpus_hpux(); } elsif ($^O eq 'nto') { $no_of_cpus = no_of_cpus_qnx(); } elsif ($^O eq 'svr5') { $no_of_cpus = no_of_cpus_openserver(); } elsif ($^O eq 'irix') { $no_of_cpus = no_of_cpus_irix(); } elsif ($^O eq 'dec_osf') { $no_of_cpus = no_of_cpus_tru64(); } else { $no_of_cpus = (no_of_cpus_gnu_linux() || no_of_cpus_freebsd() || no_of_cpus_netbsd() || no_of_cpus_openbsd() || no_of_cpus_hurd() || no_of_cpus_darwin() || no_of_cpus_solaris() || no_of_cpus_aix() || no_of_cpus_hpux() || no_of_cpus_qnx() || no_of_cpus_openserver() || no_of_cpus_irix() || no_of_cpus_tru64() # Number of cores is better than no guess for #CPUs || nproc() ); } if($no_of_cpus) { chomp $no_of_cpus; return $no_of_cpus; } else { ::warning("Cannot figure out number of cpus. Using 1.\n"); return 1; } } sub no_of_cores { # Returns: # Number of CPU cores local $/="\n"; # If delimiter is set, then $/ will be wrong my $no_of_cores; if ($^O eq 'linux') { $no_of_cores = no_of_cores_gnu_linux(); } elsif ($^O eq 'freebsd') { $no_of_cores = no_of_cores_freebsd(); } elsif ($^O eq 'netbsd') { $no_of_cores = no_of_cores_netbsd(); } elsif ($^O eq 'openbsd') { $no_of_cores = no_of_cores_openbsd(); } elsif ($^O eq 'gnu') { $no_of_cores = no_of_cores_hurd(); } elsif ($^O eq 'darwin') { $no_of_cores = no_of_cores_darwin(); } elsif ($^O eq 'solaris') { $no_of_cores = no_of_cores_solaris(); } elsif ($^O eq 'aix') { $no_of_cores = no_of_cores_aix(); } elsif ($^O eq 'hpux') { $no_of_cores = no_of_cores_hpux(); } elsif ($^O eq 'nto') { $no_of_cores = no_of_cores_qnx(); } elsif ($^O eq 'svr5') { $no_of_cores = no_of_cores_openserver(); } elsif ($^O eq 'irix') { $no_of_cores = no_of_cores_irix(); } elsif ($^O eq 'dec_osf') { $no_of_cores = no_of_cores_tru64(); } else { $no_of_cores = (no_of_cores_gnu_linux() || no_of_cores_freebsd() || no_of_cores_netbsd() || no_of_cores_openbsd() || no_of_cores_hurd() || no_of_cores_darwin() || no_of_cores_solaris() || no_of_cores_aix() || no_of_cores_hpux() || no_of_cores_qnx() || no_of_cores_openserver() || no_of_cores_irix() || no_of_cores_tru64() || nproc() ); } if($no_of_cores) { chomp $no_of_cores; return $no_of_cores; } else { ::warning("Cannot figure out number of CPU cores. Using 1.\n"); return 1; } } sub nproc { # Returns: # Number of cores using `nproc` my $no_of_cores = `nproc 2>/dev/null`; return $no_of_cores; } sub no_of_cpus_gnu_linux { # Returns: # Number of physical CPUs on GNU/Linux # undef if not GNU/Linux my $no_of_cpus; my $no_of_cores; if(-e "/proc/cpuinfo") { $no_of_cpus = 0; $no_of_cores = 0; my %seen; open(my $in_fh, "<", "/proc/cpuinfo") || return undef; while(<$in_fh>) { if(/^physical id.*[:](.*)/ and not $seen{$1}++) { $no_of_cpus++; } /^processor.*[:]/i and $no_of_cores++; } close $in_fh; } return ($no_of_cpus||$no_of_cores); } sub no_of_cores_gnu_linux { # Returns: # Number of CPU cores on GNU/Linux # undef if not GNU/Linux my $no_of_cores; if(-e "/proc/cpuinfo") { $no_of_cores = 0; open(my $in_fh, "<", "/proc/cpuinfo") || return undef; while(<$in_fh>) { /^processor.*[:]/i and $no_of_cores++; } close $in_fh; } return $no_of_cores; } sub no_of_cpus_freebsd { # Returns: # Number of physical CPUs on FreeBSD # undef if not FreeBSD my $no_of_cpus = (`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'` or `sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`); chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_freebsd { # Returns: # Number of CPU cores on FreeBSD # undef if not FreeBSD my $no_of_cores = (`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'` or `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_netbsd { # Returns: # Number of physical CPUs on NetBSD # undef if not NetBSD my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_netbsd { # Returns: # Number of CPU cores on NetBSD # undef if not NetBSD my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_openbsd { # Returns: # Number of physical CPUs on OpenBSD # undef if not OpenBSD my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_openbsd { # Returns: # Number of CPU cores on OpenBSD # undef if not OpenBSD my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_hurd { # Returns: # Number of physical CPUs on HURD # undef if not HURD my $no_of_cpus = `nproc`; chomp $no_of_cpus; return $no_of_cpus; } sub no_of_cores_hurd { # Returns: # Number of physical CPUs on HURD # undef if not HURD my $no_of_cores = `nproc`; chomp $no_of_cores; return $no_of_cores; } sub no_of_cpus_darwin { # Returns: # Number of physical CPUs on Mac Darwin # undef if not Mac Darwin my $no_of_cpus = (`sysctl -n hw.physicalcpu 2>/dev/null` or `sysctl -a hw 2>/dev/null | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }'`); return $no_of_cpus; } sub no_of_cores_darwin { # Returns: # Number of CPU cores on Mac Darwin # undef if not Mac Darwin my $no_of_cores = (`sysctl -n hw.logicalcpu 2>/dev/null` or `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); return $no_of_cores; } sub no_of_cpus_solaris { # Returns: # Number of physical CPUs on Solaris # undef if not Solaris if(-x "/usr/sbin/psrinfo") { my @psrinfo = `/usr/sbin/psrinfo`; if($#psrinfo >= 0) { return $#psrinfo +1; } } if(-x "/usr/sbin/prtconf") { my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; if($#prtconf >= 0) { return $#prtconf +1; } } return undef; } sub no_of_cores_solaris { # Returns: # Number of CPU cores on Solaris # undef if not Solaris if(-x "/usr/sbin/psrinfo") { my @psrinfo = `/usr/sbin/psrinfo`; if($#psrinfo >= 0) { return $#psrinfo +1; } } if(-x "/usr/sbin/prtconf") { my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; if($#prtconf >= 0) { return $#prtconf +1; } } return undef; } sub no_of_cpus_aix { # Returns: # Number of physical CPUs on AIX # undef if not AIX my $no_of_cpus = 0; if(-x "/usr/sbin/lscfg") { open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") || return undef; $no_of_cpus = <$in_fh>; chomp ($no_of_cpus); close $in_fh; } return $no_of_cpus; } sub no_of_cores_aix { # Returns: # Number of CPU cores on AIX # undef if not AIX my $no_of_cores; if(-x "/usr/bin/vmstat") { open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; while(<$in_fh>) { /lcpu=([0-9]*) / and $no_of_cores = $1; } close $in_fh; } return $no_of_cores; } sub no_of_cpus_hpux { # Returns: # Number of physical CPUs on HP-UX # undef if not HP-UX my $no_of_cpus = (`/usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'`); return $no_of_cpus; } sub no_of_cores_hpux { # Returns: # Number of CPU cores on HP-UX # undef if not HP-UX my $no_of_cores = (`/usr/bin/mpsched -s 2>&1 | grep 'Processor Count' | awk '{ print \$3 }'`); return $no_of_cores; } sub no_of_cpus_qnx { # Returns: # Number of physical CPUs on QNX # undef if not QNX # BUG: It is now known how to calculate this. my $no_of_cpus = 0; return $no_of_cpus; } sub no_of_cores_qnx { # Returns: # Number of CPU cores on QNX # undef if not QNX # BUG: It is now known how to calculate this. my $no_of_cores = 0; return $no_of_cores; } sub no_of_cpus_openserver { # Returns: # Number of physical CPUs on SCO OpenServer # undef if not SCO OpenServer my $no_of_cpus = 0; if(-x "/usr/sbin/psrinfo") { my @psrinfo = `/usr/sbin/psrinfo`; if($#psrinfo >= 0) { return $#psrinfo +1; } } return $no_of_cpus; } sub no_of_cores_openserver { # Returns: # Number of CPU cores on SCO OpenServer # undef if not SCO OpenServer my $no_of_cores = 0; if(-x "/usr/sbin/psrinfo") { my @psrinfo = `/usr/sbin/psrinfo`; if($#psrinfo >= 0) { return $#psrinfo +1; } } return $no_of_cores; } sub no_of_cpus_irix { # Returns: # Number of physical CPUs on IRIX # undef if not IRIX my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; return $no_of_cpus; } sub no_of_cores_irix { # Returns: # Number of CPU cores on IRIX # undef if not IRIX my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; return $no_of_cores; } sub no_of_cpus_tru64 { # Returns: # Number of physical CPUs on Tru64 # undef if not Tru64 my $no_of_cpus = `sizer -pr`; return $no_of_cpus; } sub no_of_cores_tru64 { # Returns: # Number of CPU cores on Tru64 # undef if not Tru64 my $no_of_cores = `sizer -pr`; return $no_of_cores; } sub sshcommand { my $self = shift; if (not defined $self->{'sshcommand'}) { $self->sshcommand_of_sshlogin(); } return $self->{'sshcommand'}; } sub serverlogin { my $self = shift; if (not defined $self->{'serverlogin'}) { $self->sshcommand_of_sshlogin(); } return $self->{'serverlogin'}; } sub sshcommand_of_sshlogin { # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server') # 'user@server' -> ('ssh','user@server') # 'myssh user@server' -> ('myssh','user@server') # 'myssh -l user server' -> ('myssh -l user','server') # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server') # Returns: # sshcommand - defaults to 'ssh' # login@host my $self = shift; my ($sshcmd, $serverlogin); if($self->{'string'} =~ /(.+) (\S+)$/) { # Own ssh command $sshcmd = $1; $serverlogin = $2; } else { # Normal ssh if($opt::controlmaster) { # Use control_path to make ssh faster my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; $sshcmd = "ssh -S ".$control_path; $serverlogin = $self->{'string'}; if(not $self->{'control_path'}{$control_path}++) { # Master is not running for this control_path # Start it my $pid = fork(); if($pid) { $Global::sshmaster{$pid} ||= 1; } else { $SIG{'TERM'} = undef; # Ignore the 'foo' being printed open(STDOUT,">","/dev/null"); # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument" open(STDERR,">","/dev/null"); open(STDIN,"<","/dev/null"); # Run a sleep that outputs data, so it will discover if the ssh connection closes. my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}'); my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep); exec(@master); } } } else { $sshcmd = "ssh"; $serverlogin = $self->{'string'}; } } $self->{'sshcommand'} = $sshcmd; $self->{'serverlogin'} = $serverlogin; } sub control_path_dir { # Returns: # path to directory my $self = shift; if(not defined $self->{'control_path_dir'}) { -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; $self->{'control_path_dir'} = File::Temp::tempdir($ENV{'HOME'} . "/.parallel/tmp/control_path_dir-XXXX", CLEANUP => 1); } return $self->{'control_path_dir'}; } sub rsync_transfer_cmd { # Command to run to transfer a file # Input: # $file = filename of file to transfer # $workdir = destination dir # Returns: # $cmd = rsync command to run to transfer $file ("" if unreadable) my $self = shift; my $file = shift; my $workdir = shift; if(not -r $file) { ::warning($file, " is not readable and will not be transferred.\n"); return "true"; } my $rsync_destdir; if($file =~ m:^/:) { # rsync /foo/bar / $rsync_destdir = "/"; } else { $rsync_destdir = ::shell_quote_file($workdir); } $file = ::shell_quote_file($file); my $sshcmd = $self->sshcommand(); my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); my $serverlogin = $self->serverlogin(); # Make dir if it does not exist return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" . rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )"; } sub cleanup_cmd { # Command to run to remove the remote file # Input: # $file = filename to remove # $workdir = destination dir # Returns: # $cmd = ssh command to run to remove $file and empty parent dirs my $self = shift; my $file = shift; my $workdir = shift; my $f = $file; if($f =~ m:/\./:) { # foo/bar/./baz/quux => workdir/baz/quux # /foo/bar/./baz/quux => workdir/baz/quux $f =~ s:.*/\./:$workdir/:; } elsif($f =~ m:^[^/]:) { # foo/bar => workdir/foo/bar $f = $workdir."/".$f; } my @subdirs = split m:/:, ::dirname($f); my @rmdir; my $dir = ""; for(@subdirs) { $dir .= $_."/"; unshift @rmdir, ::shell_quote_file($dir); } my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; if(defined $opt::workdir and $opt::workdir eq "...") { $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; } $f = ::shell_quote_file($f); my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)"); } { my $rsync; sub rsync { # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. # If the version >= 3.1.0: downgrade to protocol 30 if(not $rsync) { my @out = `rsync --version`; for (@out) { if(/version (\d+.\d+)(.\d+)?/) { if($1 >= 3.1) { # Version 3.1.0 or later: Downgrade to protocol 30 $rsync = "rsync --protocol 30"; } else { $rsync = "rsync"; } } } $rsync or ::die_bug("Cannot figure out version of rsync: @out"); } return $rsync; } } package JobQueue; sub new { my $class = shift; my $commandref = shift; my $read_from = shift; my $context_replace = shift; my $max_number_of_args = shift; my $return_files = shift; my $commandlinequeue = CommandLineQueue->new ($commandref, $read_from, $context_replace, $max_number_of_args, $return_files); my @unget = (); return bless { 'unget' => \@unget, 'commandlinequeue' => $commandlinequeue, 'total_jobs' => undef, }, ref($class) || $class; } sub get { my $self = shift; if(@{$self->{'unget'}}) { my $job = shift @{$self->{'unget'}}; return ($job); } else { my $commandline = $self->{'commandlinequeue'}->get(); if(defined $commandline) { my $job = Job->new($commandline); return $job; } else { return undef; } } } sub unget { my $self = shift; unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}}) && $self->{'commandlinequeue'}->empty(); ::debug("run", "JobQueue->empty $empty "); return $empty; } sub total_jobs { my $self = shift; if(not defined $self->{'total_jobs'}) { my $job; my @queue; my $start = time; while($job = $self->get()) { if(time - $start > 10) { ::warning("Reading all arguments takes longer than 10 seconds.\n"); $opt::eta && ::warning("Consider removing --eta.\n"); $opt::bar && ::warning("Consider removing --bar.\n"); last; } push @queue, $job; } while($job = $self->get()) { push @queue, $job; } $self->unget(@queue); $self->{'total_jobs'} = $#queue+1; } return $self->{'total_jobs'}; } sub next_seq { my $self = shift; return $self->{'commandlinequeue'}->seq(); } sub quote_args { my $self = shift; return $self->{'commandlinequeue'}->quote_args(); } package Job; sub new { my $class = shift; my $commandlineref = shift; return bless { 'commandline' => $commandlineref, # CommandLine object 'workdir' => undef, # --workdir 'stdin' => undef, # filehandle for stdin (used for --pipe) # filename for writing stdout to (used for --files) 'remaining' => "", # remaining data not sent to stdin (used for --pipe) 'datawritten' => 0, # amount of data sent via stdin (used for --pipe) 'transfersize' => 0, # size of files using --transfer 'returnsize' => 0, # size of files using --return 'pid' => undef, # hash of { SSHLogins => number of times the command failed there } 'failed' => undef, 'sshlogin' => undef, # The commandline wrapped with rsync and ssh 'sshlogin_wrap' => undef, 'exitstatus' => undef, 'exitsignal' => undef, # Timestamp for timeout if any 'timeout' => undef, 'virgin' => 1, }, ref($class) || $class; } sub replaced { my $self = shift; $self->{'commandline'} or ::die_bug("commandline empty"); return $self->{'commandline'}->replaced(); } sub seq { my $self = shift; return $self->{'commandline'}->seq(); } sub slot { my $self = shift; return $self->{'commandline'}->slot(); } { my($cattail); sub cattail { # Returns: # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] if(not $cattail) { $cattail = q{ # cat followed by tail. # If $writerpid dead: finish after this round use Fcntl; $|=1; my ($cmd, $writerpid, $read_file, $unlink_file) = @ARGV; if($read_file) { open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); } else { *IN = *STDIN; } my $flags; fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle $flags |= O_NONBLOCK; # Add non-blocking to the flags fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); while(1) { # clear EOF seek(IN,0,1); my $writer_running = kill 0, $writerpid; $read = sysread(IN,$buf,32768); if($read) { # We can unlink the file now: The writer has written something -e $unlink_file and unlink $unlink_file; # Blocking print while($buf) { my $bytes_written = syswrite(OUT,$buf); # syswrite may be interrupted by SIGHUP substr($buf,0,$bytes_written) = ""; } # Something printed: Wait less next time $sleep /= 2; } else { if(eof(IN) and not $writer_running) { # Writer dead: There will never be more to read => exit exit; } # TODO This could probably be done more efficiently using select(2) # Nothing read: Wait longer before next read # Up to 30 milliseconds $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); usleep($sleep); } } sub usleep { # Sleep this many milliseconds. my $secs = shift; select(undef, undef, undef, $secs/1000); } }; $cattail =~ s/#.*//mg; $cattail =~ s/\s+/ /g; } return $cattail; } } sub openoutputfiles { # Open files for STDOUT and STDERR # Set file handles in $self->fh my $self = shift; my ($outfhw, $errfhw, $outname, $errname); if($opt::results) { my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); # Output in: prefix/name1/val1/name2/val2/stdout my $dir = $opt::results."/".$args_as_dirname; if(eval{ File::Path::mkpath($dir); }) { # OK } else { # mkpath failed: Argument probably too long. # Set $Global::max_file_length, which will keep the individual # dir names shorter than the max length max_file_name_length($opt::results); $args_as_dirname = $self->{'commandline'}->args_as_dirname(); # prefix/name1/val1/name2/val2/ $dir = $opt::results."/".$args_as_dirname; File::Path::mkpath($dir); } # prefix/name1/val1/name2/val2/stdout $outname = "$dir/stdout"; if(not open($outfhw, "+>", $outname)) { ::error("Cannot write to `$outname'.\n"); ::wait_and_exit(255); } # prefix/name1/val1/name2/val2/stderr $errname = "$dir/stderr"; if(not open($errfhw, "+>", $errname)) { ::error("Cannot write to `$errname'.\n"); ::wait_and_exit(255); } $self->set_fh(1,"unlink",""); $self->set_fh(2,"unlink",""); } elsif(not $opt::ungroup) { # To group we create temporary files for STDOUT and STDERR # To avoid the cleanup unlink the files immediately (but keep them open) if(@Global::tee_jobs) { # files must be removed when the tee is done } elsif($opt::files) { ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); # --files => only remove stderr $self->set_fh(1,"unlink",""); $self->set_fh(2,"unlink",$errname); } else { ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); $self->set_fh(1,"unlink",$outname); $self->set_fh(2,"unlink",$errname); } } else { # --ungroup open($outfhw,">&",$Global::fd{1}) || die; open($errfhw,">&",$Global::fd{2}) || die; # File name must be empty as it will otherwise be printed $outname = ""; $errname = ""; $self->set_fh(1,"unlink",$outname); $self->set_fh(2,"unlink",$errname); } # Set writing FD $self->set_fh(1,'w',$outfhw); $self->set_fh(2,'w',$errfhw); $self->set_fh(1,'name',$outname); $self->set_fh(2,'name',$errname); if($opt::compress) { # Send stdout to stdin for $opt::compress_program(1) # Send stderr to stdin for $opt::compress_program(2) # cattail get pid: $pid = $self->fh($fdno,'rpid'); my $cattail = cattail(); for my $fdno (1,2) { my $wpid = open(my $fdw,"|-","$opt::compress_program >>". $self->fh($fdno,'name')) || die $?; $self->set_fh($fdno,'w',$fdw); $self->set_fh($fdno,'wpid',$wpid); my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $opt::decompress_program, $wpid, $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?; $self->set_fh($fdno,'r',$fdr); $self->set_fh($fdno,'rpid',$rpid); } } elsif(not $opt::ungroup) { # Set reading FD if using --group (--ungroup does not need) for my $fdno (1,2) { # Re-open the file for reading # so fdw can be closed separately # and fdr can be seeked separately (for --line-buffer) open(my $fdr,"<", $self->fh($fdno,'name')) || ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); $self->set_fh($fdno,'r',$fdr); # Unlink if required $Global::debug or unlink $self->fh($fdno,"unlink"); } } if($opt::linebuffer) { # Set non-blocking when using --linebuffer $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; for my $fdno (1,2) { my $fdr = $self->fh($fdno,'r'); my $flags; fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle $flags |= &O_NONBLOCK; # Add non-blocking to the flags fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle } } } sub max_file_name_length { # Figure out the max length of a subdir # TODO and the max total length # Ext4 = 255,130816 my $testdir = shift; my $upper = 8_000_000; my $len = 8; my $dir="x"x$len; do { rmdir($testdir."/".$dir); $len *= 16; $dir="x"x$len; } while (mkdir $testdir."/".$dir); # Then search for the actual max length between $len/16 and $len my $min = $len/16; my $max = $len; while($max-$min > 5) { # If we are within 5 chars of the exact value: # it is not worth the extra time to find the exact value my $test = int(($min+$max)/2); $dir="x"x$test; if(mkdir $testdir."/".$dir) { rmdir($testdir."/".$dir); $min = $test; } else { $max = $test; } } $Global::max_file_length = $min; return $min; } sub set_fh { # Set file handle my ($self, $fd_no, $key, $fh) = @_; $self->{'fd'}{$fd_no,$key} = $fh; } sub fh { # Get file handle my ($self, $fd_no, $key) = @_; return $self->{'fd'}{$fd_no,$key}; } sub write { my $self = shift; my $remaining_ref = shift; my $stdin_fh = $self->fh(0,"w"); syswrite($stdin_fh,$$remaining_ref); } sub set_stdin_buffer { # Copy stdin buffer from $block_ref up to $endpos # Prepend with $header_ref # Remove $recstart and $recend if needed # Input: # $header_ref = ref to $header to prepend # $block_ref = ref to $block to pass on # $endpos = length of $block to pass on # $recstart = --recstart regexp # $recend = --recend regexp # Returns: # N/A my $self = shift; my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_; $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos); if($opt::remove_rec_sep) { remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend); } $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'}; $self->{'stdin_buffer_pos'} = 0; } sub stdin_buffer_length { my $self = shift; return $self->{'stdin_buffer_length'}; } sub remove_rec_sep { my ($block_ref,$recstart,$recend) = @_; # Remove record separator $$block_ref =~ s/$recend$recstart//gos; $$block_ref =~ s/^$recstart//os; $$block_ref =~ s/$recend$//os; } sub non_block_write { my $self = shift; my $something_written = 0; use POSIX qw(:errno_h); # use Fcntl; # my $flags = ''; for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) { my $in = $self->fh(0,"w"); # fcntl($in, F_GETFL, $flags) # or die "Couldn't get flags for HANDLE : $!\n"; # $flags |= O_NONBLOCK; # fcntl($in, F_SETFL, $flags) # or die "Couldn't set flags for HANDLE: $!\n"; my $rv = syswrite($in, $buf); if (!defined($rv) && $! == EAGAIN) { # would block $something_written = 0; } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) { # incomplete write # Remove the written part $self->{'stdin_buffer_pos'} += $rv; $something_written = $rv; } else { # successfully wrote everything my $a=""; $self->set_stdin_buffer(\$a,\$a,"",""); $something_written = $rv; } } ::debug("pipe", "Non-block: ", $something_written); return $something_written; } sub virgin { my $self = shift; return $self->{'virgin'}; } sub set_virgin { my $self = shift; $self->{'virgin'} = shift; } sub pid { my $self = shift; return $self->{'pid'}; } sub set_pid { my $self = shift; $self->{'pid'} = shift; } sub starttime { # Returns: # UNIX-timestamp this job started my $self = shift; return sprintf("%.3f",$self->{'starttime'}); } sub set_starttime { my $self = shift; my $starttime = shift || ::now(); $self->{'starttime'} = $starttime; } sub runtime { # Returns: # Run time in seconds my $self = shift; return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); } sub endtime { # Returns: # UNIX-timestamp this job ended # 0 if not ended yet my $self = shift; return ($self->{'endtime'} || 0); } sub set_endtime { my $self = shift; my $endtime = shift; $self->{'endtime'} = $endtime; } sub timedout { # Is the job timedout? # Input: # $delta_time = time that the job may run # Returns: # True or false my $self = shift; my $delta_time = shift; return time > $self->{'starttime'} + $delta_time; } sub kill { # Kill the job. # Send the signals to (grand)*children and pid. # If no signals: TERM TERM KILL # Wait 200 ms after each TERM. # Input: # @signals = signals to send my $self = shift; my @signals = @_; my @family_pids = $self->family_pids(); # Record this jobs as failed $self->set_exitstatus(-1); # Send two TERMs to give time to clean up ::debug("run", "Kill seq ", $self->seq(), "\n"); my @send_signals = @signals || ("TERM", "TERM", "KILL"); for my $signal (@send_signals) { my $alive = 0; for my $pid (@family_pids) { if(kill 0, $pid) { # The job still running kill $signal, $pid; $alive = 1; } } # If a signal was given as input, do not do the sleep below @signals and next; if($signal eq "TERM" and $alive) { # Wait up to 200 ms between TERMs - but only if any pids are alive my $sleep = 1; for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200; $sleepsum += $sleep) { $sleep = ::reap_usleep($sleep); } } } } sub family_pids { # Find the pids with this->pid as (grand)*parent # Returns: # @pids = pids of (grand)*children my $self = shift; my $pid = $self->pid(); my @pids; my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); my @more = ($pid); # While more (grand)*children while(@more) { my @m; push @pids, @more; for my $parent (@more) { if($children_of_ref->{$parent}) { # add the children of this parent push @m, @{$children_of_ref->{$parent}}; } } @more = @m; } return (@pids); } sub failed { # return number of times failed for this $sshlogin # Input: # $sshlogin # Returns: # Number of times failed for $sshlogin my $self = shift; my $sshlogin = shift; return $self->{'failed'}{$sshlogin}; } sub failed_here { # return number of times failed for the current $sshlogin # Returns: # Number of times failed for this sshlogin my $self = shift; return $self->{'failed'}{$self->sshlogin()}; } sub add_failed { # increase the number of times failed for this $sshlogin my $self = shift; my $sshlogin = shift; $self->{'failed'}{$sshlogin}++; } sub add_failed_here { # increase the number of times failed for the current $sshlogin my $self = shift; $self->{'failed'}{$self->sshlogin()}++; } sub reset_failed { # increase the number of times failed for this $sshlogin my $self = shift; my $sshlogin = shift; delete $self->{'failed'}{$sshlogin}; } sub reset_failed_here { # increase the number of times failed for this $sshlogin my $self = shift; delete $self->{'failed'}{$self->sshlogin()}; } sub min_failed { # Returns: # the number of sshlogins this command has failed on # the minimal number of times this command has failed my $self = shift; my $min_failures = ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; return ($number_of_sshlogins_failed_on,$min_failures); } sub total_failed { # Returns: # $total_failures = the number of times this command has failed my $self = shift; my $total_failures = 0; for (values %{$self->{'failed'}}) { $total_failures += $_; } return $total_failures; } sub wrapped { # Wrap command with: # * --shellquote # * --nice # * --cat # * --fifo # * --sshlogin # * --pipepart (@Global::cat_partials) # * --pipe # * --tmux # The ordering of the wrapping is important: # * --nice/--cat/--fifo should be done on the remote machine # * --pipepart/--pipe should be done on the local machine inside --tmux # Uses: # $Global::envvar # $opt::shellquote # $opt::nice # $Global::shell # $opt::cat # $opt::fifo # @Global::cat_partials # $opt::pipe # $opt::tmux # Returns: # $self->{'wrapped'} = the command wrapped with the above my $self = shift; if(not defined $self->{'wrapped'}) { my $command = $Global::envvar.$self->replaced(); if($opt::shellquote) { # Prepend echo # and quote twice $command = "echo " . ::shell_quote_scalar(::shell_quote_scalar($command)); } if($opt::nice) { # Prepend \nice -n19 $SHELL -c # and quote. # The '\' before nice is needed to avoid tcsh's built-in $command = '\nice'. " -n". $opt::nice. " ". $Global::shell. " -c ". ::shell_quote_scalar($command); } if($opt::cat) { # Prepend 'cat > {};' # Append '_EXIT=$?;(rm {};exit $_EXIT)' $command = $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0). $command. $self->{'commandline'}->replace_placeholders( ["; _EXIT=\$?; rm \257<\257>; exit \$_EXIT"], 0, 0); } elsif($opt::fifo) { # Prepend 'mkfifo {}; (' # Append ') & _PID=$!; cat > {}; wait $_PID; _EXIT=$?;(rm {};exit $_EXIT)' $command = $self->{'commandline'}->replace_placeholders(["mkfifo \257<\257>; ("], 0, 0). $command. $self->{'commandline'}->replace_placeholders([") & _PID=\$!; cat > \257<\257>; ", "wait \$_PID; _EXIT=\$?; ", "rm \257<\257>; exit \$_EXIT"], 0,0); } # Wrap with ssh + tranferring of files $command = $self->sshlogin_wrap($command); if(@Global::cat_partials) { # Prepend: # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 | $command = (shift @Global::cat_partials). "|". "(". $command. ")"; } elsif($opt::pipe) { # Prepend EOF-detector to avoid starting $command if EOF. # The $tmpfile might exist if run on a remote system - we accept that risk my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr"); # Unlink to avoid leaving files if --dry-run or --sshlogin unlink $tmpfile; $command = # Exit value: # empty input = true # some input = exit val from command qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }. qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }. qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }. "($command);"; } if($opt::tmux) { # Wrap command with 'tmux' $command = $self->tmux_wrap($command); } $self->{'wrapped'} = $command; } return $self->{'wrapped'}; } sub set_sshlogin { my $self = shift; my $sshlogin = shift; $self->{'sshlogin'} = $sshlogin; delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong delete $self->{'wrapped'}; } sub sshlogin { my $self = shift; return $self->{'sshlogin'}; } sub sshlogin_wrap { # Wrap the command with the commands needed to run remotely # Returns: # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands my $self = shift; my $command = shift; if(not defined $self->{'sshlogin_wrap'}) { my $sshlogin = $self->sshlogin(); my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); my ($pre,$post,$cleanup)=("","",""); if($serverlogin eq ":") { # No transfer neeeded $self->{'sshlogin_wrap'} = $command; } else { # --transfer $pre .= $self->sshtransfer(); # --return $post .= $self->sshreturn(); # --cleanup $post .= $self->sshcleanup(); if($post) { # We need to save the exit status of the job $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; } # If the remote login shell is (t)csh then use 'setenv' # otherwise use 'export' # We cannot use parse_env_var(), as PARALLEL_SEQ changes # for each command my $parallel_env = ($Global::envwarn . q{ 'eval `echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null } . q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; } . q{ setenv PARALLEL_PID '$PARALLEL_PID' } . q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; } . q{ PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' }); my $remote_pre = ""; my $ssh_options = ""; if(($opt::pipe or $opt::pipepart) and $opt::ctrlc or not ($opt::pipe or $opt::pipepart) and not $opt::noctrlc) { # TODO Determine if this is needed # Propagating CTRL-C to kill remote jobs requires # remote jobs to be run with a terminal. $ssh_options = "-tt -oLogLevel=quiet"; # $ssh_options = ""; # tty - check if we have a tty. # stty: # -onlcr - make output 8-bit clean # isig - pass CTRL-C as signal # -echo - do not echo input $remote_pre .= ::shell_quote_scalar('tty >/dev/null && stty isig -onlcr -echo;'); } if($opt::workdir) { my $wd = ::shell_quote_file($self->workdir()); $remote_pre .= ::shell_quote_scalar("mkdir -p ") . $wd . ::shell_quote_scalar("; cd ") . $wd . # exit 255 (instead of exec false) would be the correct thing, # but that fails on tcsh ::shell_quote_scalar(qq{ || exec false;}); } # This script is to solve the problem of # * not mixing STDERR and STDOUT # * terminating with ctrl-c # It works on Linux but not Solaris # Finishes on Solaris, but wrong exit code: # $SIG{CHLD} = sub {exit ($?&127 ? 128+($?&127) : 1+$?>>8)}; # Hangs on Solaris, but correct exit code on Linux: # $SIG{CHLD} = sub { $done = 1 }; # $p->poll; my $signal_script = "perl -e '". q{ use IO::Poll; $SIG{CHLD} = sub { $done = 1 }; $p = IO::Poll->new; $p->mask(STDOUT, POLLHUP); $pid=fork; unless($pid) {setpgrp; exec $ENV{SHELL}, "-c", @ARGV; die "exec: $!\n"} $p->poll; kill SIGHUP, -${pid} unless $done; wait; exit ($?&127 ? 128+($?&127) : 1+$?>>8) } . "' "; $signal_script =~ s/\s+/ /g; $self->{'sshlogin_wrap'} = ($pre . "$sshcmd $ssh_options $serverlogin $parallel_env " . $remote_pre # . ::shell_quote_scalar($signal_script . ::shell_quote_scalar($command)) . ::shell_quote_scalar($command) . ";" . $post); } } return $self->{'sshlogin_wrap'}; } sub transfer { # Files to transfer # Returns: # @transfer - File names of files to transfer my $self = shift; my @transfer = (); $self->{'transfersize'} = 0; if($opt::transfer) { for my $record (@{$self->{'commandline'}{'arg_list'}}) { # Merge arguments from records into args for my $arg (@$record) { CORE::push @transfer, $arg->orig(); # filesize if(-e $arg->orig()) { $self->{'transfersize'} += (stat($arg->orig()))[7]; } } } } return @transfer; } sub transfersize { my $self = shift; return $self->{'transfersize'}; } sub sshtransfer { # Returns for each transfer file: # rsync $file remote:$workdir my $self = shift; my @pre; my $sshlogin = $self->sshlogin(); my $workdir = $self->workdir(); for my $file ($self->transfer()) { push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; } return join("",@pre); } sub return { # Files to return # Non-quoted and with {...} substituted # Returns: # @non_quoted_filenames my $self = shift; return $self->{'commandline'}-> replace_placeholders($self->{'commandline'}{'return_files'},0,0); } sub returnsize { # This is called after the job has finished # Returns: # $number_of_bytes transferred in return my $self = shift; for my $file ($self->return()) { if(-e $file) { $self->{'returnsize'} += (stat($file))[7]; } } return $self->{'returnsize'}; } sub sshreturn { # Returns for each return-file: # rsync remote:$workdir/$file . my $self = shift; my $sshlogin = $self->sshlogin(); my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); my $pre = ""; for my $file ($self->return()) { $file =~ s:^\./::g; # Remove ./ if any my $relpath = ($file !~ m:^/:); # Is the path relative? my $cd = ""; my $wd = ""; if($relpath) { # rsync -avR /foo/./bar/baz.c remote:/tmp/ # == (on old systems) # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ $wd = ::shell_quote_file($self->workdir()."/"); } # Only load File::Basename if actually needed $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; # dir/./file means relative to dir, so remove dir on remote $file =~ m:(.*)/\./:; my $basedir = $1 ? ::shell_quote_file($1."/") : ""; my $nobasedir = $file; $nobasedir =~ s:.*/\./::; $cd = ::shell_quote_file(::dirname($nobasedir)); my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file))); # --return # mkdir -p /home/tange/dir/subdir/; # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" # server:file.gz /home/tange/dir/subdir/ $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". $basename . " ".$basedir.$cd.";"; } return $pre; } sub sshcleanup { # Return the sshcommand needed to remove the file # Returns: # ssh command needed to remove files from sshlogin my $self = shift; my $sshlogin = $self->sshlogin(); my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); my $workdir = $self->workdir(); my $cleancmd = ""; for my $file ($self->cleanup()) { my @subworkdirs = parentdirs_of($file); $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; } if(defined $opt::workdir and $opt::workdir eq "...") { $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';'; } return $cleancmd; } sub cleanup { # Returns: # Files to remove at cleanup my $self = shift; if($opt::cleanup) { my @transfer = $self->transfer(); my @return = $self->return(); return (@transfer,@return); } else { return (); } } sub workdir { # Returns: # the workdir on a remote machine my $self = shift; if(not defined $self->{'workdir'}) { my $workdir; if(defined $opt::workdir) { if($opt::workdir eq ".") { # . means current dir my $home = $ENV{'HOME'}; eval 'use Cwd'; my $cwd = cwd(); $workdir = $cwd; if($home) { # If homedir exists: remove the homedir from # workdir if cwd starts with homedir # E.g. /home/foo/my/dir => my/dir # E.g. /tmp/my/dir => /tmp/my/dir my ($home_dev, $home_ino) = (stat($home))[0,1]; my $parent = ""; my @dir_parts = split(m:/:,$cwd); my $part; while(defined ($part = shift @dir_parts)) { $part eq "" and next; $parent .= "/".$part; my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; if($parent_dev == $home_dev and $parent_ino == $home_ino) { # dev and ino is the same: We found the homedir. $workdir = join("/",@dir_parts); last; } } } if($workdir eq "") { $workdir = "."; } } elsif($opt::workdir eq "...") { $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ . "-" . $self->seq(); } else { $workdir = $opt::workdir; # Rsync treats /./ special. We don't want that $workdir =~ s:/\./:/:g; # Remove /./ $workdir =~ s:/+$::; # Remove ending / if any $workdir =~ s:^\./::g; # Remove starting ./ if any } } else { $workdir = "."; } $self->{'workdir'} = ::shell_quote_scalar($workdir); } return $self->{'workdir'}; } sub parentdirs_of { # Return: # all parentdirs except . of this dir or file - sorted desc by length my $d = shift; my @parents = (); while($d =~ s:/[^/]+$::) { if($d ne ".") { push @parents, $d; } } return @parents; } sub start { # Setup STDOUT and STDERR for a job and start it. # Returns: # job-object or undef if job not to run my $job = shift; # Get the shell command to be executed (possibly with ssh infront). my $command = $job->wrapped(); if($Global::interactive or $Global::stderr_verbose) { if($Global::interactive) { print $Global::original_stderr "$command ?..."; open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); my $answer = <$tty_fh>; close $tty_fh; my $run_yes = ($answer =~ /^\s*y/i); if (not $run_yes) { $command = "true"; # Run the command 'true' } } else { print $Global::original_stderr "$command\n"; } } my $pid; $job->openoutputfiles(); my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); local (*IN,*OUT,*ERR); open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!"); open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!"); if(($opt::dryrun or $Global::verbose) and $opt::ungroup) { if($Global::verbose <= 1) { print $stdout_fh $job->replaced(),"\n"; } else { # Verbose level > 1: Print the rsync and stuff print $stdout_fh $command,"\n"; } } if($opt::dryrun) { $command = "true"; } $ENV{'PARALLEL_SEQ'} = $job->seq(); $ENV{'PARALLEL_PID'} = $$; ::debug("run", $Global::total_running, " processes . Starting (", $job->seq(), "): $command\n"); if($opt::pipe) { my ($stdin_fh); # The eval is needed to catch exception from open3 eval { $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || ::die_bug("open3-pipe"); 1; }; $job->set_fh(0,"w",$stdin_fh); } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 and $job->sshlogin()->string() eq ":") { # Give STDIN to the first job if using -a (but only if running # locally - otherwise CTRL-C does not work for other jobs Bug#36585) *IN = *STDIN; # The eval is needed to catch exception from open3 eval { $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || ::die_bug("open3-a"); 1; }; # Re-open to avoid complaining open(STDIN, "<&", $Global::original_stdin) or ::die_bug("dup-\$Global::original_stdin: $!"); } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and open(my $devtty_fh, "<", "/dev/tty")) { # Give /dev/tty to the command if no one else is using it *IN = $devtty_fh; # The eval is needed to catch exception from open3 eval { $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || ::die_bug("open3-/dev/tty"); $Global::tty_taken = $pid; close $devtty_fh; 1; }; } else { # The eval is needed to catch exception from open3 eval { $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || ::die_bug("open3-gensym"); 1; }; } if($pid) { # A job was started $Global::total_running++; $Global::total_started++; $job->set_pid($pid); $job->set_starttime(); $Global::running{$job->pid()} = $job; if($opt::timeout) { $Global::timeoutq->insert($job); } $Global::newest_job = $job; $Global::newest_starttime = ::now(); return $job; } else { # No more processes ::debug("run", "Cannot spawn more jobs.\n"); return undef; } } sub tmux_wrap { # Wrap command with tmux for session pPID # Input: # $actual_command = the actual command being run (incl ssh wrap) my $self = shift; my $actual_command = shift; # Temporary file name. Used for fifo to communicate exit val my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".tmx"); $Global::unlink{$tmpfile}=1; close $fh; unlink $tmpfile; my $visual_command = $self->replaced(); my $title = $visual_command; # ; causes problems # ascii 194-245 annoys tmux $title =~ tr/[\011-\016;\302-\365]//d; my $tmux; if($Global::total_running == 0) { $tmux = "tmux new-session -s p$$ -d -n ". ::shell_quote_scalar($title); print $Global::original_stderr "See output with: tmux attach -t p$$\n"; } else { $tmux = "tmux new-window -t p$$ -n ".::shell_quote_scalar($title); } return "mkfifo $tmpfile; $tmux ". # Run in tmux ::shell_quote_scalar( "(".$actual_command.');(echo $?$status;echo 255) >'.$tmpfile."&". "echo ".::shell_quote_scalar($visual_command).";". "echo \007Job finished at: `date`;sleep 10"). # Run outside tmux # Read the first line from the fifo and use that as status code "; exit `perl -ne 'unlink \$ARGV; 1..1 and print' $tmpfile` "; } sub is_already_in_results { # Do we already have results for this job? # Returns: # $job_already_run = bool whether there is output for this or not my $job = $_[0]; my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); # prefix/name1/val1/name2/val2/ my $dir = $opt::results."/".$args_as_dirname; ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); return -e "$dir/stdout"; } sub is_already_in_joblog { my $job = shift; return vec($Global::job_already_run,$job->seq(),1); } sub set_job_in_joblog { my $job = shift; vec($Global::job_already_run,$job->seq(),1) = 1; } sub should_be_retried { # Should this job be retried? # Returns # 0 - do not retry # 1 - job queued for retry my $self = shift; if (not $opt::retries) { return 0; } if(not $self->exitstatus()) { # Completed with success. If there is a recorded failure: forget it $self->reset_failed_here(); return 0 } else { # The job failed. Should it be retried? $self->add_failed_here(); if($self->total_failed() == $opt::retries) { # This has been retried enough return 0; } else { # This command should be retried $self->set_endtime(undef); $Global::JobQueue->unget($self); ::debug("run", "Retry ", $self->seq(), "\n"); return 1; } } } sub print { # Print the output of the jobs # Returns: N/A my $self = shift; ::debug("print", ">>joboutput ", $self->replaced(), "\n"); if($opt::dryrun) { # Nothing was printed to this job: # cleanup tmp files if --files was set unlink $self->fh(1,"name"); } if($opt::pipe and $self->virgin()) { # Skip --joblog, --dryrun, --verbose } else { if($Global::joblog and defined $self->{'exitstatus'}) { # Add to joblog when finished $self->print_joblog(); } # Printing is only relevant for grouped/--line-buffer output. $opt::ungroup and return; # Check for disk full exit_if_disk_full(); if(($opt::dryrun or $Global::verbose) and not $self->{'verbose_printed'}) { $self->{'verbose_printed'}++; if($Global::verbose <= 1) { print STDOUT $self->replaced(),"\n"; } else { # Verbose level > 1: Print the rsync and stuff print STDOUT $self->wrapped(),"\n"; } # If STDOUT and STDERR are merged, # we want the command to be printed first # so flush to avoid STDOUT being buffered flush STDOUT; } } for my $fdno (sort { $a <=> $b } keys %Global::fd) { # Sort by file descriptor numerically: 1,2,3,..,9,10,11 $fdno == 0 and next; my $out_fd = $Global::fd{$fdno}; my $in_fh = $self->fh($fdno,"r"); if(not $in_fh) { if(not $Job::file_descriptor_warning_printed{$fdno}++) { # ::warning("File descriptor $fdno not defined\n"); } next; } ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):"); if($opt::files) { # If --compress: $in_fh must be closed first. close $self->fh($fdno,"w"); close $in_fh; if($opt::pipe and $self->virgin()) { # Nothing was printed to this job: # cleanup unused tmp files if --files was set for my $fdno (1,2) { unlink $self->fh($fdno,"name"); unlink $self->fh($fdno,"unlink"); } } elsif($fdno == 1 and $self->fh($fdno,"name")) { print $out_fd $self->fh($fdno,"name"),"\n"; } } elsif($opt::linebuffer) { # Line buffered print out $self->linebuffer_print($fdno,$in_fh,$out_fd); } else { my $buf; close $self->fh($fdno,"w"); seek $in_fh, 0, 0; # $in_fh is now ready for reading at position 0 if($opt::tag or defined $opt::tagstring) { my $tag = $self->tag(); if($fdno == 2) { # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt # This is a crappy way of ignoring it. while(<$in_fh>) { if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) { # Skip } else { print $out_fd $tag,$_; } # At most run the loop once last; } } while(<$in_fh>) { print $out_fd $tag,$_; } } else { my $buf; if($fdno == 2) { # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt # This is a crappy way of ignoring it. sysread($in_fh,$buf,1_000); $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; print $out_fd $buf; } while(sysread($in_fh,$buf,32768)) { print $out_fd $buf; } } close $in_fh; } flush $out_fd; } ::debug("print", "<{'partial_line',$fdno}; if(defined $self->{'exitstatus'}) { # If the job is dead: close printing fh. Needed for --compress close $self->fh($fdno,"w"); if($opt::compress) { # Blocked reading in final round $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; for my $fdno (1,2) { my $fdr = $self->fh($fdno,'r'); my $flags; fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle } } } # This seek will clear EOF seek $in_fh, tell($in_fh), 0; # The read is non-blocking: The $in_fh is set to non-blocking. # 32768 --tag = 5.1s # 327680 --tag = 4.4s # 1024000 --tag = 4.4s # 3276800 --tag = 4.3s # 32768000 --tag = 4.7s # 10240000 --tag = 4.3s while(read($in_fh,substr($$partial,length $$partial),3276800)) { # Append to $$partial # Find the last \n my $i = rindex($$partial,"\n"); if($i != -1) { # One or more complete lines were found if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) { # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt # This is a crappy way of ignoring it. $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; # Length of partial line has changed: Find the last \n again $i = rindex($$partial,"\n"); } if($opt::tag or defined $opt::tagstring) { # Replace ^ with $tag within the full line my $tag = $self->tag(); substr($$partial,0,$i+1) =~ s/^/$tag/gm; # Length of partial line has changed: Find the last \n again $i = rindex($$partial,"\n"); } # Print up to and including the last \n print $out_fd substr($$partial,0,$i+1); # Remove the printed part substr($$partial,0,$i+1)=""; } } if(defined $self->{'exitstatus'}) { # If the job is dead: print the remaining partial line # read remaining if($$partial and ($opt::tag or defined $opt::tagstring)) { my $tag = $self->tag(); $$partial =~ s/^/$tag/gm; } print $out_fd $$partial; # Release the memory $$partial = undef; if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { # decompress still running } else { # decompress done: close fh close $in_fh; } } } sub print_joblog { my $self = shift; my $cmd; if($Global::verbose <= 1) { $cmd = $self->replaced(); } else { # Verbose level > 1: Print the rsync and stuff $cmd = "@command"; } print $Global::joblog join("\t", $self->seq(), $self->sshlogin()->string(), $self->starttime(), sprintf("%10.3f",$self->runtime()), $self->transfersize(), $self->returnsize(), $self->exitstatus(), $self->exitsignal(), $cmd ). "\n"; flush $Global::joblog; $self->set_job_in_joblog(); } sub tag { my $self = shift; if(not defined $self->{'tag'}) { $self->{'tag'} = $self->{'commandline'}-> replace_placeholders([$opt::tagstring],0,0)."\t"; } return $self->{'tag'}; } sub hostgroups { my $self = shift; if(not defined $self->{'hostgroups'}) { $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; } return @{$self->{'hostgroups'}}; } sub exitstatus { my $self = shift; return $self->{'exitstatus'}; } sub set_exitstatus { my $self = shift; my $exitstatus = shift; if($exitstatus) { # Overwrite status if non-zero $self->{'exitstatus'} = $exitstatus; } else { # Set status but do not overwrite # Status may have been set by --timeout $self->{'exitstatus'} ||= $exitstatus; } } sub exitsignal { my $self = shift; return $self->{'exitsignal'}; } sub set_exitsignal { my $self = shift; my $exitsignal = shift; $self->{'exitsignal'} = $exitsignal; } { my ($disk_full_fh, $b8193, $name); sub exit_if_disk_full { # Checks if $TMPDIR is full by writing 8kb to a tmpfile # If the disk is full: Exit immediately. # Returns: # N/A if(not $disk_full_fh) { ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); unlink $name; $b8193 = "x"x8193; } # Linux does not discover if a disk is full if writing <= 8192 # Tested on: # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos # ntfs reiserfs tmpfs ubifs vfat xfs # TODO this should be tested on different OS similar to this: # # doit() { # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop # seq 100000 | parallel --tmpdir /mnt/loop/ true & # seq 6900000 > /mnt/loop/i && echo seq OK # seq 6980868 > /mnt/loop/i # seq 10000 > /mnt/loop/ii # sleep 3 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ # echo >&2 # } print $disk_full_fh $b8193; if(not $disk_full_fh or tell $disk_full_fh == 0) { ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); ::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); ::wait_and_exit(255); } truncate $disk_full_fh, 0; seek($disk_full_fh, 0, 0) || die; } } package CommandLine; sub new { my $class = shift; my $seq = shift; my $commandref = shift; $commandref || die; my $arg_queue = shift; my $context_replace = shift; my $max_number_of_args = shift; # for -N and normal (-n1) my $return_files = shift; my $replacecount_ref = shift; my $len_ref = shift; my %replacecount = %$replacecount_ref; my %len = %$len_ref; for (keys %$replacecount_ref) { # Total length of this replacement string {} replaced with all args $len{$_} = 0; } return bless { 'command' => $commandref, 'seq' => $seq, 'len' => \%len, 'arg_list' => [], 'arg_queue' => $arg_queue, 'max_number_of_args' => $max_number_of_args, 'replacecount' => \%replacecount, 'context_replace' => $context_replace, 'return_files' => $return_files, 'replaced' => undef, }, ref($class) || $class; } sub seq { my $self = shift; return $self->{'seq'}; } { my $max_slot_number; sub slot { # Find the number of a free job slot and return it # Uses: # @Global::slots # Returns: # $jobslot = number of jobslot my $self = shift; if(not $self->{'slot'}) { if(not @Global::slots) { # $Global::max_slot_number will typically be $Global::max_jobs_running push @Global::slots, ++$max_slot_number; } $self->{'slot'} = shift @Global::slots; } return $self->{'slot'}; } } sub populate { # Add arguments from arg_queue until the number of arguments or # max line length is reached # Uses: # $Global::minimal_command_line_length # $opt::cat # $opt::fifo # $Global::JobQueue # $opt::m # $opt::X # $CommandLine::already_spread # $Global::max_jobs_running # Returns: N/A my $self = shift; my $next_arg; my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length(); if($opt::cat or $opt::fifo) { # Generate a tempfile name that will be used as {} my($outfh,$name) = ::tmpfile(SUFFIX => ".pip"); close $outfh; # Unlink is needed if: ssh otheruser@localhost unlink $name; $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget([Arg->new($name)]); } while (not $self->{'arg_queue'}->empty()) { $next_arg = $self->{'arg_queue'}->get(); if(not defined $next_arg) { next; } $self->push($next_arg); if($self->len() >= $max_len) { # Command length is now > max_length # If there are arguments: remove the last # If there are no arguments: Error # TODO stuff about -x opt_x if($self->number_of_args() > 1) { # There is something to work on $self->{'arg_queue'}->unget($self->pop()); last; } else { my $args = join(" ", map { $_->orig() } @$next_arg); ::error("Command line too long (", $self->len(), " >= ", $max_len, ") at number ", $self->{'arg_queue'}->arg_number(), ": ". (substr($args,0,50))."...\n"); $self->{'arg_queue'}->unget($self->pop()); ::wait_and_exit(255); } } if(defined $self->{'max_number_of_args'}) { if($self->number_of_args() >= $self->{'max_number_of_args'}) { last; } } } if(($opt::m or $opt::X) and not $CommandLine::already_spread and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { # -m or -X and EOF => Spread the arguments over all jobslots # (unless they are already spread) $CommandLine::already_spread ||= 1; if($self->number_of_args() > 1) { $self->{'max_number_of_args'} = ::ceil($self->number_of_args()/$Global::max_jobs_running); $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = $self->{'max_number_of_args'}; $self->{'arg_queue'}->unget($self->pop_all()); while($self->number_of_args() < $self->{'max_number_of_args'}) { $self->push($self->{'arg_queue'}->get()); } } } } sub push { # Add one or more records as arguments # Returns: N/A my $self = shift; my $record = shift; push @{$self->{'arg_list'}}, $record; my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; my $rep; for my $arg (@$record) { if(defined $arg) { for my $perlexpr (keys %{$self->{'replacecount'}}) { # 50% faster than below $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self); # $rep = $arg->replace($perlexpr,$quote_arg,$self); # $self->{'len'}{$perlexpr} += length $rep; # ::debug("length", "Length: ", length $rep, # "(", $perlexpr, "=>", $rep, ")\n"); } } } } sub pop { # Remove last argument # Returns: # the last record my $self = shift; my $record = pop @{$self->{'arg_list'}}; my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; for my $arg (@$record) { if(defined $arg) { for my $perlexpr (keys %{$self->{'replacecount'}}) { $self->{'len'}{$perlexpr} -= length $arg->replace($perlexpr,$quote_arg,$self); } } } return $record; } sub pop_all { # Remove all arguments and zeros the length of replacement strings # Returns: # all records my $self = shift; my @popped = @{$self->{'arg_list'}}; for my $replacement_string (keys %{$self->{'replacecount'}}) { $self->{'len'}{$replacement_string} = 0; } $self->{'arg_list'} = []; return @popped; } sub number_of_args { # The number of records # Returns: # number of records my $self = shift; # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue. # # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq # eagdoq oapq. # # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq # eagdoq oapq U daf13'qp ftq eagdoq oapq # tffb://qz.iuwubqpum.ads/iuwu/DAF13 # # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq # tmp fa nq daf13'qp. # # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz. # # This is really the number of records return $#{$self->{'arg_list'}}+1; } sub number_of_recargs { # The number of args in records # Returns: # number of args records my $self = shift; my $sum = 0; my $nrec = scalar @{$self->{'arg_list'}}; if($nrec) { $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); } return $sum; } sub args_as_string { # Returns: # all unmodified arguments joined with ' ' (similar to {}) my $self = shift; return (join " ", map { $_->orig() } map { @$_ } @{$self->{'arg_list'}}); } sub args_as_dirname { # Returns: # all unmodified arguments joined with '/' (similar to {}) # \t \0 \\ and / are quoted as: \t \0 \\ \_ # If $Global::max_file_length: Keep subdirs < $Global::max_file_length my $self = shift; my @res = (); for my $rec_ref (@{$self->{'arg_list'}}) { # If headers are used, sort by them. # Otherwise keep the order from the command line. my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); for my $n (@header_indexes_sorted) { CORE::push(@res, $Global::input_source_header{$n}, map { my $s = $_; # \t \0 \\ and / are quoted as: \t \0 \\ \_ $s =~ s/\\/\\\\/g; $s =~ s/\t/\\t/g; $s =~ s/\0/\\0/g; $s =~ s:/:\\_:g; if($Global::max_file_length) { # Keep each subdir shorter than the longest # allowed file name $s = substr($s,0,$Global::max_file_length); } $s; } $rec_ref->[$n-1]->orig()); } } return join "/", @res; } sub header_indexes_sorted { # Sort headers first by number then by name. # E.g.: 1a 1b 11a 11b # Returns: # Indexes of %Global::input_source_header sorted my $max_col = shift; no warnings 'numeric'; for my $col (1 .. $max_col) { # Make sure the header is defined. If it is not: use column number if(not defined $Global::input_source_header{$col}) { $Global::input_source_header{$col} = $col; } } my @header_indexes_sorted = sort { # Sort headers numerically then asciibetically $Global::input_source_header{$a} <=> $Global::input_source_header{$b} or $Global::input_source_header{$a} cmp $Global::input_source_header{$b} } 1 .. $max_col; return @header_indexes_sorted; } sub len { # Uses: # $opt::shellquote # The length of the command line with args substituted my $self = shift; my $len = 0; # Add length of the original command with no args # Length of command w/ all replacement args removed $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; ::debug("length", "noncontext + command: $len\n"); my $recargs = $self->number_of_recargs(); if($self->{'context_replace'}) { # Context is duplicated for each arg $len += $recargs * $self->{'len'}{'context'}; for my $replstring (keys %{$self->{'replacecount'}}) { # If the replacements string is more than once: mulitply its length $len += $self->{'len'}{$replstring} * $self->{'replacecount'}{$replstring}; ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", $self->{'replacecount'}{$replstring}, "\n"); } # echo 11 22 33 44 55 66 77 88 99 1010 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 # 5 + ctxgrp*arg ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); # Add space between context groups $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); } else { # Each replacement string may occur several times # Add the length for each time $len += 1*$self->{'len'}{'context'}; ::debug("length", "context+noncontext + command: $len\n"); for my $replstring (keys %{$self->{'replacecount'}}) { # (space between regargs + length of replacement) # * number this replacement is used $len += ($recargs -1 + $self->{'len'}{$replstring}) * $self->{'replacecount'}{$replstring}; } } if($opt::nice) { # Pessimistic length if --nice is set # Worse than worst case: every char needs to be quoted with \ $len *= 2; } if($Global::quoting) { # Pessimistic length if -q is set # Worse than worst case: every char needs to be quoted with \ $len *= 2; } if($opt::shellquote) { # Pessimistic length if --shellquote is set # Worse than worst case: every char needs to be quoted with \ twice $len *= 4; } # If we are using --env, add the prefix for that, too. $len += $Global::envvarlen; return $len; } sub replaced { # Uses: # $Global::noquote # $Global::quoting # Returns: # $replaced = command with place holders replaced and prepended my $self = shift; if(not defined $self->{'replaced'}) { # Don't quote arguments if the input is the full command line my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; $self->{'replaced'} = $self->replace_placeholders($self->{'command'},$Global::quoting,$quote_arg); my $len = length $self->{'replaced'}; if ($len != $self->len()) { ::debug("length", $len, " != ", $self->len(), " ", $self->{'replaced'}, "\n"); } else { ::debug("length", $len, " == ", $self->len(), " ", $self->{'replaced'}, "\n"); } } return $self->{'replaced'}; } sub replace_placeholders { # Replace foo{}bar with fooargbar # Input: # $targetref = command as shell words # $quote = should everything be quoted? # $quote_arg = should replaced arguments be quoted? # Returns: # @target with placeholders replaced my $self = shift; my $targetref = shift; my $quote = shift; my $quote_arg = shift; my $context_replace = $self->{'context_replace'}; my @target = @$targetref; ::debug("replace", "Replace @target\n"); # -X = context replace # maybe multiple input sources # maybe --xapply if(not @target) { # @target is empty: Return empty array return @target; } # Fish out the words that have replacement strings in them my %word; for (@target) { my $tt = $_; ::debug("replace", "Target: $tt"); # a{1}b{}c{}d # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d # A B C => aAbA B CcA B Cd # -X A B C => aAbAcAd aAbBcBd aAbCcCd if($context_replace) { while($tt =~ s/([^\s\257]* # before {= (?: \257< # {= [^\257]*? # The perl expression \257> # =} [^\s\257]* # after =} )+)/ /x) { # $1 = pre \257 perlexpr \257 post $word{"$1"} ||= 1; } } else { while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { # $f = \257 perlexpr \257 $word{$1} ||= 1; } } } my @word = keys %word; my %replace; my @arg; for my $record (@{$self->{'arg_list'}}) { # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] # Merge arg-objects from records into @arg for easy access CORE::push @arg, @$record; } # Add one arg if empty to allow {#} and {%} to be computed only once if(not @arg) { @arg = (Arg->new("")); } # Number of arguments - used for positional arguments my $n = $#_+1; # This is actually a CommandLine-object, # but it looks nice to be able to say {= $job->slot() =} my $job = $self; for my $word (@word) { # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF my $w = $word; ::debug("replace", "Replacing in $w\n"); # Replace positional arguments $w =~ s< ([^\s\257]*) # before {= \257< # {= (-?\d+) # Position (eg. -2 or 3) ([^\257]*?) # The perl expression \257> # =} ([^\s\257]*) # after =} > { $1. # Context (pre) ( $arg[$2 > 0 ? $2-1 : $n+$2] ? # If defined: replace $arg[$2 > 0 ? $2-1 : $n+$2]->replace($3,$quote_arg,$self) : "") .$4 }egx;# Context (post) ::debug("replace", "Positional replaced $word with: $w\n"); if($w !~ /\257/) { # No more replacement strings in $w: No need to do more if($quote) { CORE::push(@{$replace{::shell_quote($word)}}, $w); } else { CORE::push(@{$replace{$word}}, $w); } next; } # for each arg: # compute replacement for each string # replace replacement strings with replacement in the word value # push to replace word value ::debug("replace", "Positional done: $w\n"); for my $arg (@arg) { my $val = $w; my $number_of_replacements = 0; for my $perlexpr (keys %{$self->{'replacecount'}}) { # Replace {= perl expr =} with value for each arg $number_of_replacements += $val =~ s{\257<\Q$perlexpr\E\257>} {$arg ? $arg->replace($perlexpr,$quote_arg,$self) : ""}eg; } my $ww = $word; if($quote) { $ww = ::shell_quote_scalar($word); $val = ::shell_quote_scalar($val); } if($number_of_replacements) { CORE::push(@{$replace{$ww}}, $val); } } } if($quote) { @target = ::shell_quote(@target); } # ::debug("replace", "%replace=",::my_dump(%replace),"\n"); if(%replace) { # Substitute the replace strings with the replacement values # Must be sorted by length if a short word is a substring of a long word my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } sort { length $b <=> length $a } keys %replace); for(@target) { s/($regexp)/join(" ",@{$replace{$1}})/ge; } } ::debug("replace", "Return @target\n"); return wantarray ? @target : "@target"; } package CommandLineQueue; sub new { my $class = shift; my $commandref = shift; my $read_from = shift; my $context_replace = shift; my $max_number_of_args = shift; my $return_files = shift; my @unget = (); my ($count,%replacecount,$posrpl,$perlexpr,%len); my @command = @$commandref; # If the first command start with '-' it is probably an option if($command[0] =~ /^\s*(-\S+)/) { # Is this really a command in $PATH starting with '-'? my $cmd = $1; if(not ::which($cmd)) { ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n"); ::wait_and_exit(255); } } # Replace replacement strings with {= perl expr =} # Protect matching inside {= perl expr =} # by replacing {= and =} with \257< and \257> for(@command) { if(/\257/) { ::error("Command cannot contain the character \257. Use a function for that.\n"); ::wait_and_exit(255); } s/\Q$Global::parensleft\E(.*?)\Q$Global::parensright\E/\257<$1\257>/gx; } for my $rpl (keys %Global::rpl) { # Replace the short hand string with the {= perl expr =} in $command and $opt::tagstring # Avoid replacing inside existing {= perl expr =} for(@command,@Global::ret_files) { while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/xg) { } } if(defined $opt::tagstring) { for($opt::tagstring) { while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/x) {} } } # Do the same for the positional replacement strings # A bit harder as we have to put in the position number $posrpl = $rpl; if($posrpl =~ s/^\{//) { # Only do this if the shorthand start with { for(@command,@Global::ret_files) { s/\{(-?\d+)\Q$posrpl\E/\257<$1 $Global::rpl{$rpl}\257>/g; } if(defined $opt::tagstring) { $opt::tagstring =~ s/\{(-?\d+)\Q$posrpl\E/\257<$1 $perlexpr\257>/g; } } } my $sum = 0; while($sum == 0) { # Count how many times each replacement string is used my @cmd = @command; my $contextlen = 0; my $noncontextlen = 0; my $contextgroups = 0; for my $c (@cmd) { while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "$_++" => 2 } $replacecount{$1} ++; $sum++; } # Measure the length of the context around the {= perl expr =} # Use that {=...=} has been replaced with \000 above # So there is no need to deal with \257< while($c =~ s/ (\S*\000\S*) //x) { my $w = $1; $w =~ tr/\000//d; # Remove all \000's $contextlen += length($w); $contextgroups++; } # All {= perl expr =} have been removed: The rest is non-context $noncontextlen += length $c; } if($opt::tagstring) { my $t = $opt::tagstring; while($t =~ s/ \257<([^\257]*)\257> //x) { # %replacecount = { "perlexpr" => number of times seen } # e.g { "$_++" => 2 } # But for tagstring we just need to mark it as seen $replacecount{$1}||=1; } } $len{'context'} = 0+$contextlen; $len{'noncontext'} = $noncontextlen; $len{'contextgroups'} = $contextgroups; $len{'noncontextgroups'} = @cmd-$contextgroups; ::debug("length", "@command Context: ", $len{'context'}, " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); if($sum == 0) { # Default command = {} # If not replacement string: append {} if(not @command) { @command = ("\257<\257>"); $Global::noquote = 1; } elsif(($opt::pipe or $opt::pipepart) and not $opt::fifo and not $opt::cat) { # With --pipe / --pipe-part you can have no replacement last; } else { # Append {} to the command if there are no {...}'s and no {=...=} push @command, ("\257<\257>"); } } } return bless { 'unget' => \@unget, 'command' => \@command, 'replacecount' => \%replacecount, 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), 'context_replace' => $context_replace, 'len' => \%len, 'max_number_of_args' => $max_number_of_args, 'size' => undef, 'return_files' => $return_files, 'seq' => 1, }, ref($class) || $class; } sub get { my $self = shift; if(@{$self->{'unget'}}) { my $cmd_line = shift @{$self->{'unget'}}; return ($cmd_line); } else { my $cmd_line; $cmd_line = CommandLine->new($self->seq(), $self->{'command'}, $self->{'arg_queue'}, $self->{'context_replace'}, $self->{'max_number_of_args'}, $self->{'return_files'}, $self->{'replacecount'}, $self->{'len'}, ); $cmd_line->populate(); ::debug("init","cmd_line->number_of_args ", $cmd_line->number_of_args(), "\n"); if($opt::pipe or $opt::pipepart) { if($cmd_line->replaced() eq "") { # Empty command - pipe requires a command ::error("--pipe must have a command to pipe into (e.g. 'cat').\n"); ::wait_and_exit(255); } } else { if($cmd_line->number_of_args() == 0) { # We did not get more args - maybe at EOF string? return undef; } elsif($cmd_line->replaced() eq "") { # Empty command - get the next instead return $self->get(); } } $self->set_seq($self->seq()+1); return $cmd_line; } } sub unget { my $self = shift; unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); ::debug("run", "CommandLineQueue->empty $empty"); return $empty; } sub seq { my $self = shift; return $self->{'seq'}; } sub set_seq { my $self = shift; $self->{'seq'} = shift; } sub quote_args { my $self = shift; # If there is not command emulate |bash return $self->{'command'}; } sub size { my $self = shift; if(not $self->{'size'}) { my @all_lines = (); while(not $self->{'arg_queue'}->empty()) { push @all_lines, CommandLine->new($self->{'command'}, $self->{'arg_queue'}, $self->{'context_replace'}, $self->{'max_number_of_args'}); } $self->{'size'} = @all_lines; $self->unget(@all_lines); } return $self->{'size'}; } package Limits::Command; # Maximal command line length (for -m and -X) sub max_length { # Find the max_length of a command line and cache it # Returns: # number of chars on the longest command line allowed if(not $Limits::Command::line_max_len) { # Disk cache of max command line length my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); my $cached_limit; if(-e $len_cache) { open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); $cached_limit = <$fh>; close $fh; } else { $cached_limit = real_max_length(); # If $HOME is write protected: Do not fail mkdir($ENV{'HOME'} . "/.parallel"); mkdir($ENV{'HOME'} . "/.parallel/tmp"); open(my $fh, ">", $len_cache); print $fh $cached_limit; close $fh; } $Limits::Command::line_max_len = $cached_limit; if($opt::max_chars) { if($opt::max_chars <= $cached_limit) { $Limits::Command::line_max_len = $opt::max_chars; } else { ::warning("Value for -s option ", "should be < $cached_limit.\n"); } } } return $Limits::Command::line_max_len; } sub real_max_length { # Find the max_length of a command line # Returns: # The maximal command line length # Use an upper bound of 8 MB if the shell allows for for infinite long lengths my $upper = 8_000_000; my $len = 8; do { if($len > $upper) { return $len }; $len *= 16; } while (is_acceptable_command_line_length($len)); # Then search for the actual max length between 0 and upper bound return binary_find_max_length(int($len/16),$len); } sub binary_find_max_length { # Given a lower and upper bound find the max_length of a command line # Returns: # number of chars on the longest command line allowed my ($lower, $upper) = (@_); if($lower == $upper or $lower == $upper-1) { return $lower; } my $middle = int (($upper-$lower)/2 + $lower); ::debug("init", "Maxlen: $lower,$upper,$middle : "); if (is_acceptable_command_line_length($middle)) { return binary_find_max_length($middle,$upper); } else { return binary_find_max_length($lower,$middle); } } sub is_acceptable_command_line_length { # Test if a command line of this length can run # Returns: # 0 if the command line length is too long # 1 otherwise my $len = shift; local *STDERR; open (STDERR, ">", "/dev/null"); system "true "."x"x$len; close STDERR; ::debug("init", "$len=$? "); return not $?; } package RecordQueue; sub new { my $class = shift; my $fhs = shift; my $colsep = shift; my @unget = (); my $arg_sub_queue; if($colsep) { # Open one file with colsep $arg_sub_queue = RecordColQueue->new($fhs); } else { # Open one or more files if multiple -a $arg_sub_queue = MultifileQueue->new($fhs); } return bless { 'unget' => \@unget, 'arg_number' => 0, 'arg_sub_queue' => $arg_sub_queue, }, ref($class) || $class; } sub get { # Returns: # reference to array of Arg-objects my $self = shift; if(@{$self->{'unget'}}) { $self->{'arg_number'}++; return shift @{$self->{'unget'}}; } my $ret = $self->{'arg_sub_queue'}->get(); if(defined $Global::max_number_of_args and $Global::max_number_of_args == 0) { ::debug("run", "Read 1 but return 0 args\n"); return [Arg->new("")]; } else { return $ret; } } sub unget { my $self = shift; ::debug("run", "RecordQueue-unget '@_'\n"); $self->{'arg_number'} -= @_; unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = not @{$self->{'unget'}}; $empty &&= $self->{'arg_sub_queue'}->empty(); ::debug("run", "RecordQueue->empty $empty"); return $empty; } sub arg_number { my $self = shift; return $self->{'arg_number'}; } package RecordColQueue; sub new { my $class = shift; my $fhs = shift; my @unget = (); my $arg_sub_queue = MultifileQueue->new($fhs); return bless { 'unget' => \@unget, 'arg_sub_queue' => $arg_sub_queue, }, ref($class) || $class; } sub get { # Returns: # reference to array of Arg-objects my $self = shift; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } my $unget_ref=$self->{'unget'}; if($self->{'arg_sub_queue'}->empty()) { return undef; } my $in_record = $self->{'arg_sub_queue'}->get(); if(defined $in_record) { my @out_record = (); for my $arg (@$in_record) { ::debug("run", "RecordColQueue::arg $arg\n"); my $line = $arg->orig(); ::debug("run", "line='$line'\n"); if($line ne "") { for my $s (split /$opt::colsep/o, $line, -1) { push @out_record, Arg->new($s); } } else { push @out_record, Arg->new(""); } } return \@out_record; } else { return undef; } } sub unget { my $self = shift; ::debug("run", "RecordColQueue-unget '@_'\n"); unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); ::debug("run", "RecordColQueue->empty $empty"); return $empty; } package MultifileQueue; @Global::unget_argv=(); sub new { my $class = shift; my $fhs = shift; for my $fh (@$fhs) { if(-t $fh) { ::warning("Input is read from the terminal. ". "Only experts do this on purpose. ". "Press CTRL-D to exit.\n"); } } return bless { 'unget' => \@Global::unget_argv, 'fhs' => $fhs, 'arg_matrix' => undef, }, ref($class) || $class; } sub get { my $self = shift; if($opt::xapply) { return $self->xapply_get(); } else { return $self->nest_get(); } } sub unget { my $self = shift; ::debug("run", "MultifileQueue-unget '@_'\n"); unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @Global::unget_argv and not @{$self->{'unget'}}); for my $fh (@{$self->{'fhs'}}) { $empty &&= eof($fh); } ::debug("run", "MultifileQueue->empty $empty "); return $empty; } sub xapply_get { my $self = shift; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } my @record = (); my $prepend = undef; my $empty = 1; for my $fh (@{$self->{'fhs'}}) { my $arg = read_arg_from_fh($fh); if(defined $arg) { # Record $arg for recycling at end of file push @{$self->{'arg_matrix'}{$fh}}, $arg; push @record, $arg; $empty = 0; } else { ::debug("run", "EOA "); # End of file: Recycle arguments push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; # return last @{$args->{'args'}{$fh}}; push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; } } if($empty) { return undef; } else { return \@record; } } sub nest_get { my $self = shift; if(@{$self->{'unget'}}) { return shift @{$self->{'unget'}}; } my @record = (); my $prepend = undef; my $empty = 1; my $no_of_inputsources = $#{$self->{'fhs'}} + 1; if(not $self->{'arg_matrix'}) { # Initialize @arg_matrix with one arg from each file # read one line from each file my @first_arg_set; my $all_empty = 1; for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); if(defined $arg) { $all_empty = 0; } $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; } if($all_empty) { # All filehandles were at eof or eof-string return undef; } return [@first_arg_set]; } # Treat the case with one input source special. For multiple # input sources we need to remember all previously read values to # generate all combinations. But for one input source we can # forget the value after first use. if($no_of_inputsources == 1) { my $arg = read_arg_from_fh($self->{'fhs'}[0]); if(defined($arg)) { return [$arg]; } return undef; } for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { if(eof($self->{'fhs'}[$fhno])) { next; } else { # read one my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); defined($arg) || next; # If we just read an EOF string: Treat this as EOF my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; $self->{'arg_matrix'}[$fhno][$len] = $arg; # make all new combinations my @combarg = (); for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}]; } $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry # map combinations # [ 1, 3, 7 ], [ 2, 4, 1 ] # => # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ] my @mapped; for my $c (expand_combinations(@combarg)) { my @a; for my $n (0 .. $no_of_inputsources - 1 ) { push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; } push @mapped, \@a; } # append the mapped to the ungotten arguments push @{$self->{'unget'}}, @mapped; # get the first return shift @{$self->{'unget'}}; } } # all are eof or at EOF string; return from the unget queue return shift @{$self->{'unget'}}; } sub read_arg_from_fh { # Read one Arg from filehandle # Returns: # Arg-object with one read line # undef if end of file my $fh = shift; my $prepend = undef; my $arg; do {{ # This makes 10% faster if(not ($arg = <$fh>)) { if(defined $prepend) { return Arg->new($prepend); } else { return undef; } } # ::debug("run", "read $arg\n"); # Remove delimiter $arg =~ s:$/$::; if($Global::end_of_file_string and $arg eq $Global::end_of_file_string) { # Ignore the rest of input file close $fh; ::debug("run", "EOF-string ($arg) met\n"); if(defined $prepend) { return Arg->new($prepend); } else { return undef; } } if(defined $prepend) { $arg = $prepend.$arg; # For line continuation $prepend = undef; #undef; } if($Global::ignore_empty) { if($arg =~ /^\s*$/) { redo; # Try the next line } } if($Global::max_lines) { if($arg =~ /\s$/) { # Trailing space => continued on next line $prepend = $arg; redo; } } }} while (1 == 0); # Dummy loop {{}} for redo if(defined $arg) { return Arg->new($arg); } else { ::die_bug("multiread arg undefined"); } } sub expand_combinations { # Input: # ([xmin,xmax], [ymin,ymax], ...) # Returns: ([x,y,...],[x,y,...]) # where xmin <= x <= xmax and ymin <= y <= ymax my $minmax_ref = shift; my $xmin = $$minmax_ref[0]; my $xmax = $$minmax_ref[1]; my @p; if(@_) { # If there are more columns: Compute those recursively my @rest = expand_combinations(@_); for(my $x = $xmin; $x <= $xmax; $x++) { push @p, map { [$x, @$_] } @rest; } } else { for(my $x = $xmin; $x <= $xmax; $x++) { push @p, [$x]; } } return @p; } package Arg; sub new { my $class = shift; my $orig = shift; my @hostgroups; if($opt::hostgroups) { if($orig =~ s:@(.+)::) { # We found hostgroups on the arg @hostgroups = split(/\+/, $1); if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { ::warning("No such hostgroup (@hostgroups)\n"); @hostgroups = (keys %Global::hostgroups); } } else { @hostgroups = (keys %Global::hostgroups); } } return bless { 'orig' => $orig, 'hostgroups' => \@hostgroups, }, ref($class) || $class; } sub replace { # Calculates the corresponding value for a given perl expression # Returns: # The calculated string (quoted if asked for) my $self = shift; my $perlexpr = shift; # E.g. $_=$_ or s/.gz// my $quote = (shift) ? 1 : 0; # should the string be quoted? # This is actually a CommandLine-object, # but it looks nice to be able to say {= $job->slot() =} my $job = shift; $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace if(not defined $self->{"rpl",0,$perlexpr}) { local $_; if($Global::trim eq "n") { $_ = $self->{'orig'}; } else { $_ = trim_of($self->{'orig'}); } ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); if(not $Global::perleval{$perlexpr}) { # Make an anonymous function of the $perlexpr # And more importantly: Compile it only once if($Global::perleval{$perlexpr} = eval('sub { no strict; no warnings; my $job = shift; '. $perlexpr.' }')) { # All is good } else { # The eval failed. Maybe $perlexpr is invalid perl? ::error("Cannot use $perlexpr: $@\n"); ::wait_and_exit(255); } } # Execute the function $Global::perleval{$perlexpr}->($job); $self->{"rpl",0,$perlexpr} = $_; } if(not defined $self->{"rpl",$quote,$perlexpr}) { $self->{"rpl",1,$perlexpr} = ::shell_quote_scalar($self->{"rpl",0,$perlexpr}); } return $self->{"rpl",$quote,$perlexpr}; } sub orig { my $self = shift; return $self->{'orig'}; } sub trim_of { # Removes white space as specifed by --trim: # n = nothing # l = start # r = end # lr|rl = both # Returns: # string with white space removed as needed my @strings = map { defined $_ ? $_ : "" } (@_); my $arg; if($Global::trim eq "n") { # skip } elsif($Global::trim eq "l") { for my $arg (@strings) { $arg =~ s/^\s+//; } } elsif($Global::trim eq "r") { for my $arg (@strings) { $arg =~ s/\s+$//; } } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } } else { ::error("--trim must be one of: r l rl lr.\n"); ::wait_and_exit(255); } return wantarray ? @strings : "@strings"; } package TimeoutQueue; sub new { my $class = shift; my $delta_time = shift; my ($pct); if($delta_time =~ /(\d+(\.\d+)?)%/) { # Timeout in percent $pct = $1/100; $delta_time = 1_000_000; } return bless { 'queue' => [], 'delta_time' => $delta_time, 'pct' => $pct, 'remedian_idx' => 0, 'remedian_arr' => [], 'remedian' => undef, }, ref($class) || $class; } sub delta_time { my $self = shift; return $self->{'delta_time'}; } sub set_delta_time { my $self = shift; $self->{'delta_time'} = shift; } sub remedian { my $self = shift; return $self->{'remedian'}; } sub set_remedian { # Set median of the last 999^3 (=997002999) values using Remedian # # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A # robust averaging method for large data sets." Journal of the # American Statistical Association 85.409 (1990): 97-104. my $self = shift; my $val = shift; my $i = $self->{'remedian_idx'}++; my $rref = $self->{'remedian_arr'}; $rref->[0][$i%999] = $val; $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; } sub update_delta_time { # Update delta_time based on runtime of finished job if timeout is # a percentage my $self = shift; my $runtime = shift; if($self->{'pct'}) { $self->set_remedian($runtime); $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); ::debug("run", "Timeout: $self->{'delta_time'}s "); } } sub process_timeouts { # Check if there was a timeout my $self = shift; # $self->{'queue'} is sorted by start time while (@{$self->{'queue'}}) { my $job = $self->{'queue'}[0]; if($job->endtime()) { # Job already finished. No need to timeout the job # This could be because of --keep-order shift @{$self->{'queue'}}; } elsif($job->timedout($self->{'delta_time'})) { # Need to shift off queue before kill # because kill calls usleep that calls process_timeouts shift @{$self->{'queue'}}; $job->kill(); } else { # Because they are sorted by start time the rest are later last; } } } sub insert { my $self = shift; my $in = shift; push @{$self->{'queue'}}, $in; } package Semaphore; # This package provides a counting semaphore # # If a process dies without releasing the semaphore the next process # that needs that entry will clean up dead semaphores # # The semaphores are stored in ~/.parallel/semaphores/id- Each # file in ~/.parallel/semaphores/id-/ is the process ID of the # process holding the entry. If the process dies, the entry can be # taken by another process. sub new { my $class = shift; my $id = shift; my $count = shift; $id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex $id="id-".$id; # To distinguish it from a process id my $parallel_dir = $ENV{'HOME'}."/.parallel"; -d $parallel_dir or mkdir_or_die($parallel_dir); my $parallel_locks = $parallel_dir."/semaphores"; -d $parallel_locks or mkdir_or_die($parallel_locks); my $lockdir = "$parallel_locks/$id"; my $lockfile = $lockdir.".lock"; if($count < 1) { ::die_bug("semaphore-count: $count"); } return bless { 'lockfile' => $lockfile, 'lockfh' => Symbol::gensym(), 'lockdir' => $lockdir, 'id' => $id, 'idfile' => $lockdir."/".$id, 'pid' => $$, 'pidfile' => $lockdir."/".$$.'@'.::hostname(), 'count' => $count + 1 # nlinks returns a link for the 'id-' as well }, ref($class) || $class; } sub acquire { my $self = shift; my $sleep = 1; # 1 ms my $start_time = time; while(1) { $self->atomic_link_if_count_less_than() and last; ::debug("sem", "Remove dead locks"); my $lockdir = $self->{'lockdir'}; for my $d (glob "$lockdir/*") { ::debug("sem", "Lock $d $lockdir\n"); $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; my ($pid, $host) = ($1, $2); if($host eq ::hostname()) { if(not kill 0, $1) { ::debug("sem", "Dead: $d"); unlink $d; } else { ::debug("sem", "Alive: $d"); } } } # try again $self->atomic_link_if_count_less_than() and last; # Retry slower and slower up to 1 second $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); # Random to avoid every sleeping job waking up at the same time ::usleep(rand()*$sleep); if(defined($opt::timeout) and $start_time + $opt::timeout > time) { # Acquire the lock anyway if(not -e $self->{'idfile'}) { open (my $fh, ">", $self->{'idfile'}) or ::die_bug("timeout_write_idfile: $self->{'idfile'}"); close $fh; } link $self->{'idfile'}, $self->{'pidfile'}; last; } } ::debug("sem", "acquired $self->{'pid'}\n"); } sub release { my $self = shift; unlink $self->{'pidfile'}; if($self->nlinks() == 1) { # This is the last link, so atomic cleanup $self->lock(); if($self->nlinks() == 1) { unlink $self->{'idfile'}; rmdir $self->{'lockdir'}; } $self->unlock(); } ::debug("run", "released $self->{'pid'}\n"); } sub _release { my $self = shift; unlink $self->{'pidfile'}; $self->lock(); my $nlinks = $self->nlinks(); ::debug("sem", $nlinks, "<", $self->{'count'}); if($nlinks-- > 1) { unlink $self->{'idfile'}; open (my $fh, ">", $self->{'idfile'}) or ::die_bug("write_idfile: $self->{'idfile'}"); print $fh "#"x$nlinks; close $fh; } else { unlink $self->{'idfile'}; rmdir $self->{'lockdir'}; } $self->unlock(); ::debug("sem", "released $self->{'pid'}\n"); } sub atomic_link_if_count_less_than { # Link $file1 to $file2 if nlinks to $file1 < $count my $self = shift; my $retval = 0; $self->lock(); ::debug($self->nlinks(), "<", $self->{'count'}); if($self->nlinks() < $self->{'count'}) { -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); if(not -e $self->{'idfile'}) { open (my $fh, ">", $self->{'idfile'}) or ::die_bug("write_idfile: $self->{'idfile'}"); close $fh; } $retval = link $self->{'idfile'}, $self->{'pidfile'}; } $self->unlock(); ::debug("run", "atomic $retval"); return $retval; } sub _atomic_link_if_count_less_than { # Link $file1 to $file2 if nlinks to $file1 < $count my $self = shift; my $retval = 0; $self->lock(); my $nlinks = $self->nlinks(); ::debug("sem", $nlinks, "<", $self->{'count'}); if($nlinks++ < $self->{'count'}) { -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); if(not -e $self->{'idfile'}) { open (my $fh, ">", $self->{'idfile'}) or ::die_bug("write_idfile: $self->{'idfile'}"); close $fh; } open (my $fh, ">", $self->{'idfile'}) or ::die_bug("write_idfile: $self->{'idfile'}"); print $fh "#"x$nlinks; close $fh; $retval = link $self->{'idfile'}, $self->{'pidfile'}; } $self->unlock(); ::debug("sem", "atomic $retval"); return $retval; } sub nlinks { my $self = shift; if(-e $self->{'idfile'}) { ::debug("sem", "nlinks", (stat(_))[3], "size", (stat(_))[7], "\n"); return (stat(_))[3]; } else { return 0; } } sub lock { my $self = shift; my $sleep = 100; # 100 ms my $total_sleep = 0; $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; my $locked = 0; while(not $locked) { if(tell($self->{'lockfh'}) == -1) { # File not open open($self->{'lockfh'}, ">", $self->{'lockfile'}) or ::debug("run", "Cannot open $self->{'lockfile'}"); } if($self->{'lockfh'}) { # File is open chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { # The file is locked: No need to retry $locked = 1; last; } else { if ($! =~ m/Function not implemented/) { ::warning("flock: $!"); ::warning("Will wait for a random while\n"); ::usleep(rand(5000)); # File cannot be locked: No need to retry $locked = 2; last; } } } # Locking failed in first round # Sleep and try again $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); # Random to avoid every sleeping job waking up at the same time ::usleep(rand()*$sleep); $total_sleep += $sleep; if($opt::semaphoretimeout) { if($total_sleep/1000 > $opt::semaphoretimeout) { # Timeout: bail out ::warning("Semaphore timed out. Ignoring timeout."); $locked = 3; last; } } else { if($total_sleep/1000 > 30) { ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); } } } ::debug("run", "locked $self->{'lockfile'}"); } sub unlock { my $self = shift; unlink $self->{'lockfile'}; close $self->{'lockfh'}; ::debug("run", "unlocked\n"); } sub mkdir_or_die { # If dir is not writable: die my $dir = shift; my @dir_parts = split(m:/:,$dir); my ($ddir,$part); while(defined ($part = shift @dir_parts)) { $part eq "" and next; $ddir .= "/".$part; -d $ddir and next; mkdir $ddir; } if(not -w $dir) { ::error("Cannot write to $dir: $!\n"); ::wait_and_exit(255); } } # Keep perl -w happy $opt::x = $Semaphore::timeout = $Semaphore::wait = $Job::file_descriptor_warning_printed = 0; rocksdb-6.11.4/build_tools/make_package.sh000077500000000000000000000061131370372246700205140ustar00rootroot00000000000000# shellcheck disable=SC1113 #/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. set -e function log() { echo "[+] $1" } function fatal() { echo "[!] $1" exit 1 } function platform() { local __resultvar=$1 if [[ -f "/etc/yum.conf" ]]; then eval $__resultvar="centos" elif [[ -f "/etc/dpkg/dpkg.cfg" ]]; then eval $__resultvar="ubuntu" else fatal "Unknwon operating system" fi } platform OS function package() { if [[ $OS = "ubuntu" ]]; then if dpkg --get-selections | grep --quiet $1; then log "$1 is already installed. skipping." else # shellcheck disable=SC2068 apt-get install $@ -y fi elif [[ $OS = "centos" ]]; then if rpm -qa | grep --quiet $1; then log "$1 is already installed. skipping." else # shellcheck disable=SC2068 yum install $@ -y fi fi } function detect_fpm_output() { if [[ $OS = "ubuntu" ]]; then export FPM_OUTPUT=deb elif [[ $OS = "centos" ]]; then export FPM_OUTPUT=rpm fi } detect_fpm_output function gem_install() { if gem list | grep --quiet $1; then log "$1 is already installed. skipping." else # shellcheck disable=SC2068 gem install $@ fi } function main() { if [[ $# -ne 1 ]]; then fatal "Usage: $0 " else log "using rocksdb version: $1" fi if [[ -d /vagrant ]]; then if [[ $OS = "ubuntu" ]]; then package g++-4.8 export CXX=g++-4.8 # the deb would depend on libgflags2, but the static lib is the only thing # installed by make install package libgflags-dev package ruby-all-dev elif [[ $OS = "centos" ]]; then pushd /etc/yum.repos.d if [[ ! -f /etc/yum.repos.d/devtools-1.1.repo ]]; then wget http://people.centos.org/tru/devtools-1.1/devtools-1.1.repo fi package devtoolset-1.1-gcc --enablerepo=testing-1.1-devtools-6 package devtoolset-1.1-gcc-c++ --enablerepo=testing-1.1-devtools-6 export CC=/opt/centos/devtoolset-1.1/root/usr/bin/gcc export CPP=/opt/centos/devtoolset-1.1/root/usr/bin/cpp export CXX=/opt/centos/devtoolset-1.1/root/usr/bin/c++ export PATH=$PATH:/opt/centos/devtoolset-1.1/root/usr/bin popd if ! rpm -qa | grep --quiet gflags; then rpm -i https://github.com/schuhschuh/gflags/releases/download/v2.1.0/gflags-devel-2.1.0-1.amd64.rpm fi package ruby package ruby-devel package rubygems package rpm-build fi fi gem_install fpm make static_lib make install INSTALL_PATH=package cd package LIB_DIR=lib if [[ -z "$ARCH" ]]; then ARCH=$(getconf LONG_BIT) fi if [[ ("$FPM_OUTPUT" = "rpm") && ($ARCH -eq 64) ]]; then mv lib lib64 LIB_DIR=lib64 fi fpm \ -s dir \ -t $FPM_OUTPUT \ -n rocksdb \ -v $1 \ --prefix /usr \ --url http://rocksdb.org/ \ -m rocksdb@fb.com \ --license BSD \ --vendor Facebook \ --description "RocksDB is an embeddable persistent key-value store for fast storage." \ include $LIB_DIR } # shellcheck disable=SC2068 main $@ rocksdb-6.11.4/build_tools/precommit_checker.py000077500000000000000000000130651370372246700216310ustar00rootroot00000000000000#!/usr/bin/env python2.7 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import argparse import commands import subprocess import sys import re import os import time # # Simple logger # class Log: def __init__(self, filename): self.filename = filename self.f = open(self.filename, 'w+', 0) def caption(self, str): line = "\n##### %s #####\n" % str if self.f: self.f.write("%s \n" % line) else: print(line) def error(self, str): data = "\n\n##### ERROR ##### %s" % str if self.f: self.f.write("%s \n" % data) else: print(data) def log(self, str): if self.f: self.f.write("%s \n" % str) else: print(str) # # Shell Environment # class Env(object): def __init__(self, logfile, tests): self.tests = tests self.log = Log(logfile) def shell(self, cmd, path=os.getcwd()): if path: os.chdir(path) self.log.log("==== shell session ===========================") self.log.log("%s> %s" % (path, cmd)) status = subprocess.call("cd %s; %s" % (path, cmd), shell=True, stdout=self.log.f, stderr=self.log.f) self.log.log("status = %s" % status) self.log.log("============================================== \n\n") return status def GetOutput(self, cmd, path=os.getcwd()): if path: os.chdir(path) self.log.log("==== shell session ===========================") self.log.log("%s> %s" % (path, cmd)) status, out = commands.getstatusoutput(cmd) self.log.log("status = %s" % status) self.log.log("out = %s" % out) self.log.log("============================================== \n\n") return status, out # # Pre-commit checker # class PreCommitChecker(Env): def __init__(self, args): Env.__init__(self, args.logfile, args.tests) self.ignore_failure = args.ignore_failure # # Get commands for a given job from the determinator file # def get_commands(self, test): status, out = self.GetOutput( "RATIO=1 build_tools/rocksdb-lego-determinator %s" % test, ".") return status, out # # Run a specific CI job # def run_test(self, test): self.log.caption("Running test %s locally" % test) # get commands for the CI job determinator status, cmds = self.get_commands(test) if status != 0: self.log.error("Error getting commands for test %s" % test) return False # Parse the JSON to extract the commands to run cmds = re.findall("'shell':'([^\']*)'", cmds) if len(cmds) == 0: self.log.log("No commands found") return False # Run commands for cmd in cmds: # Replace J=<..> with the local environment variable if "J" in os.environ: cmd = cmd.replace("J=1", "J=%s" % os.environ["J"]) cmd = cmd.replace("make ", "make -j%s " % os.environ["J"]) # Run the command status = self.shell(cmd, ".") if status != 0: self.log.error("Error running command %s for test %s" % (cmd, test)) return False return True # # Run specified CI jobs # def run_tests(self): if not self.tests: self.log.error("Invalid args. Please provide tests") return False self.print_separator() self.print_row("TEST", "RESULT") self.print_separator() result = True for test in self.tests: start_time = time.time() self.print_test(test) result = self.run_test(test) elapsed_min = (time.time() - start_time) / 60 if not result: self.log.error("Error running test %s" % test) self.print_result("FAIL (%dm)" % elapsed_min) if not self.ignore_failure: return False result = False else: self.print_result("PASS (%dm)" % elapsed_min) self.print_separator() return result # # Print a line # def print_separator(self): print("".ljust(60, "-")) # # Print two colums # def print_row(self, c0, c1): print("%s%s" % (c0.ljust(40), c1.ljust(20))) def print_test(self, test): print(test.ljust(40), end="") sys.stdout.flush() def print_result(self, result): print(result.ljust(20)) # # Main # parser = argparse.ArgumentParser(description='RocksDB pre-commit checker.') # --log parser.add_argument('--logfile', default='/tmp/precommit-check.log', help='Log file. Default is /tmp/precommit-check.log') # --ignore_failure parser.add_argument('--ignore_failure', action='store_true', default=False, help='Stop when an error occurs') # parser.add_argument('tests', nargs='+', help='CI test(s) to run. e.g: unit punit asan tsan ubsan') args = parser.parse_args() checker = PreCommitChecker(args) print("Please follow log %s" % checker.log.filename) if not checker.run_tests(): print("Error running tests. Please check log file %s" % checker.log.filename) sys.exit(1) sys.exit(0) rocksdb-6.11.4/build_tools/regression_build_test.sh000077500000000000000000000265221370372246700225300ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. set -e NUM=10000000 if [ $# -eq 1 ];then DATA_DIR=$1 elif [ $# -eq 2 ];then DATA_DIR=$1 STAT_FILE=$2 fi # On the production build servers, set data and stat # files/directories not in /tmp or else the tempdir cleaning # scripts will make you very unhappy. DATA_DIR=${DATA_DIR:-$(mktemp -t -d rocksdb_XXXX)} STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)} function cleanup { rm -rf $DATA_DIR rm -f $STAT_FILE.fillseq rm -f $STAT_FILE.readrandom rm -f $STAT_FILE.overwrite rm -f $STAT_FILE.memtablefillreadrandom } trap cleanup EXIT if [ -z $GIT_BRANCH ]; then git_br=`git rev-parse --abbrev-ref HEAD` else git_br=$(basename $GIT_BRANCH) fi if [ $git_br == "master" ]; then git_br="" else git_br="."$git_br fi make release # measure fillseq + fill up the DB for overwrite benchmark ./db_bench \ --benchmarks=fillseq \ --db=$DATA_DIR \ --use_existing_db=0 \ --bloom_bits=10 \ --num=$NUM \ --writes=$NUM \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 > ${STAT_FILE}.fillseq # measure overwrite performance ./db_bench \ --benchmarks=overwrite \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$NUM \ --writes=$((NUM / 10)) \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=8 > ${STAT_FILE}.overwrite # fill up the db for readrandom benchmark (1GB total size) ./db_bench \ --benchmarks=fillseq \ --db=$DATA_DIR \ --use_existing_db=0 \ --bloom_bits=10 \ --num=$NUM \ --writes=$NUM \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=1 > /dev/null # measure readrandom with 6GB block cache ./db_bench \ --benchmarks=readrandom \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$NUM \ --reads=$((NUM / 5)) \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=16 > ${STAT_FILE}.readrandom # measure readrandom with 6GB block cache and tailing iterator ./db_bench \ --benchmarks=readrandom \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$NUM \ --reads=$((NUM / 5)) \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --use_tailing_iterator=1 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=16 > ${STAT_FILE}.readrandomtailing # measure readrandom with 100MB block cache ./db_bench \ --benchmarks=readrandom \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$NUM \ --reads=$((NUM / 5)) \ --cache_size=104857600 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=16 > ${STAT_FILE}.readrandomsmallblockcache # measure readrandom with 8k data in memtable ./db_bench \ --benchmarks=overwrite,readrandom \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$NUM \ --reads=$((NUM / 5)) \ --writes=512 \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --write_buffer_size=1000000000 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=16 > ${STAT_FILE}.readrandom_mem_sst # fill up the db for readrandom benchmark with filluniquerandom (1GB total size) ./db_bench \ --benchmarks=filluniquerandom \ --db=$DATA_DIR \ --use_existing_db=0 \ --bloom_bits=10 \ --num=$((NUM / 4)) \ --writes=$((NUM / 4)) \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=1 > /dev/null # dummy test just to compact the data ./db_bench \ --benchmarks=readrandom \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$((NUM / 1000)) \ --reads=$((NUM / 1000)) \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=16 > /dev/null # measure readrandom after load with filluniquerandom with 6GB block cache ./db_bench \ --benchmarks=readrandom \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$((NUM / 4)) \ --reads=$((NUM / 4)) \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --disable_auto_compactions=1 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=16 > ${STAT_FILE}.readrandom_filluniquerandom # measure readwhilewriting after load with filluniquerandom with 6GB block cache ./db_bench \ --benchmarks=readwhilewriting \ --db=$DATA_DIR \ --use_existing_db=1 \ --bloom_bits=10 \ --num=$((NUM / 4)) \ --reads=$((NUM / 4)) \ --benchmark_write_rate_limit=$(( 110 * 1024 )) \ --write_buffer_size=100000000 \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=16 > ${STAT_FILE}.readwhilewriting # measure memtable performance -- none of the data gets flushed to disk ./db_bench \ --benchmarks=fillrandom,readrandom, \ --db=$DATA_DIR \ --use_existing_db=0 \ --num=$((NUM / 10)) \ --reads=$NUM \ --cache_size=6442450944 \ --cache_numshardbits=6 \ --table_cache_numshardbits=4 \ --write_buffer_size=1000000000 \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --value_size=10 \ --threads=16 > ${STAT_FILE}.memtablefillreadrandom common_in_mem_args="--db=/dev/shm/rocksdb \ --num_levels=6 \ --key_size=20 \ --prefix_size=12 \ --keys_per_prefix=10 \ --value_size=100 \ --compression_type=none \ --compression_ratio=1 \ --hard_rate_limit=2 \ --write_buffer_size=134217728 \ --max_write_buffer_number=4 \ --level0_file_num_compaction_trigger=8 \ --level0_slowdown_writes_trigger=16 \ --level0_stop_writes_trigger=24 \ --target_file_size_base=134217728 \ --max_bytes_for_level_base=1073741824 \ --disable_wal=0 \ --wal_dir=/dev/shm/rocksdb \ --sync=0 \ --verify_checksum=1 \ --delete_obsolete_files_period_micros=314572800 \ --max_grandparent_overlap_factor=10 \ --use_plain_table=1 \ --open_files=-1 \ --mmap_read=1 \ --mmap_write=0 \ --memtablerep=prefix_hash \ --bloom_bits=10 \ --bloom_locality=1 \ --perf_level=0" # prepare a in-memory DB with 50M keys, total DB size is ~6G ./db_bench \ $common_in_mem_args \ --statistics=0 \ --max_background_compactions=16 \ --max_background_flushes=16 \ --benchmarks=filluniquerandom \ --use_existing_db=0 \ --num=52428800 \ --threads=1 > /dev/null # Readwhilewriting ./db_bench \ $common_in_mem_args \ --statistics=1 \ --max_background_compactions=4 \ --max_background_flushes=0 \ --benchmarks=readwhilewriting\ --use_existing_db=1 \ --duration=600 \ --threads=32 \ --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram # Seekrandomwhilewriting ./db_bench \ $common_in_mem_args \ --statistics=1 \ --max_background_compactions=4 \ --max_background_flushes=0 \ --benchmarks=seekrandomwhilewriting \ --use_existing_db=1 \ --use_tailing_iterator=1 \ --duration=600 \ --threads=32 \ --benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram # measure fillseq with bunch of column families ./db_bench \ --benchmarks=fillseq \ --num_column_families=500 \ --write_buffer_size=1048576 \ --db=$DATA_DIR \ --use_existing_db=0 \ --num=$NUM \ --writes=$NUM \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 > ${STAT_FILE}.fillseq_lots_column_families # measure overwrite performance with bunch of column families ./db_bench \ --benchmarks=overwrite \ --num_column_families=500 \ --write_buffer_size=1048576 \ --db=$DATA_DIR \ --use_existing_db=1 \ --num=$NUM \ --writes=$((NUM / 10)) \ --open_files=55000 \ --statistics=1 \ --histogram=1 \ --disable_wal=1 \ --sync=0 \ --threads=8 > ${STAT_FILE}.overwrite_lots_column_families # send data to ods function send_to_ods { key="$1" value="$2" if [ -z $JENKINS_HOME ]; then # running on devbox, just print out the values echo $1 $2 return fi if [ -z "$value" ];then echo >&2 "ERROR: Key $key doesn't have a value." return fi curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \ --connect-timeout 60 } function send_benchmark_to_ods { bench="$1" bench_key="$2" file="$3" QPS=$(grep $bench $file | awk '{print $5}') P50_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $3}' ) P75_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $5}' ) P99_MICROS=$(grep $bench $file -A 6 | grep "Percentiles" | awk '{print $7}' ) send_to_ods rocksdb.build.$bench_key.qps $QPS send_to_ods rocksdb.build.$bench_key.p50_micros $P50_MICROS send_to_ods rocksdb.build.$bench_key.p75_micros $P75_MICROS send_to_ods rocksdb.build.$bench_key.p99_micros $P99_MICROS } send_benchmark_to_ods overwrite overwrite $STAT_FILE.overwrite send_benchmark_to_ods fillseq fillseq $STAT_FILE.fillseq send_benchmark_to_ods readrandom readrandom $STAT_FILE.readrandom send_benchmark_to_ods readrandom readrandom_tailing $STAT_FILE.readrandomtailing send_benchmark_to_ods readrandom readrandom_smallblockcache $STAT_FILE.readrandomsmallblockcache send_benchmark_to_ods readrandom readrandom_memtable_sst $STAT_FILE.readrandom_mem_sst send_benchmark_to_ods readrandom readrandom_fillunique_random $STAT_FILE.readrandom_filluniquerandom send_benchmark_to_ods fillrandom memtablefillrandom $STAT_FILE.memtablefillreadrandom send_benchmark_to_ods readrandom memtablereadrandom $STAT_FILE.memtablefillreadrandom send_benchmark_to_ods readwhilewriting readwhilewriting $STAT_FILE.readwhilewriting send_benchmark_to_ods readwhilewriting readwhilewriting_in_ram ${STAT_FILE}.readwhilewriting_in_ram send_benchmark_to_ods seekrandomwhilewriting seekwhilewriting_in_ram ${STAT_FILE}.seekwhilewriting_in_ram send_benchmark_to_ods fillseq fillseq_lots_column_families ${STAT_FILE}.fillseq_lots_column_families send_benchmark_to_ods overwrite overwrite_lots_column_families ${STAT_FILE}.overwrite_lots_column_families rocksdb-6.11.4/build_tools/rocksdb-lego-determinator000077500000000000000000000675161370372246700225770ustar00rootroot00000000000000#!/usr/bin/env bash # This script is executed by Sandcastle # to determine next steps to run # Usage: # EMAIL= ONCALL= TRIGGER= SUBSCRIBER= rocks_ci.py # # Input Value # ------------------------------------------------------------------------- # EMAIL Email address to report on trigger conditions # ONCALL Email address to raise a task on failure # TRIGGER Trigger conditions for email. Valid values are fail, warn, all # SUBSCRIBER Email addresss to add as subscriber for task # # # Report configuration # REPORT_EMAIL= if [ ! -z $EMAIL ]; then if [ -z $TRIGGER ]; then TRIGGER="fail" fi REPORT_EMAIL=" { 'type':'email', 'triggers': [ '$TRIGGER' ], 'emails':['$EMAIL'] }," fi CREATE_TASK= if [ ! -z $ONCALL ]; then CREATE_TASK=" { 'type':'task', 'triggers':[ 'fail' ], 'priority':0, 'subscribers':[ '$SUBSCRIBER' ], 'tags':[ 'rocksdb', 'ci' ], }," fi # For now, create the tasks using only the dedicated task creation tool. CREATE_TASK= REPORT= if [[ ! -z $REPORT_EMAIL || ! -z $CREATE_TASK ]]; then REPORT="'report': [ $REPORT_EMAIL $CREATE_TASK ]" fi # # Helper variables # CLEANUP_ENV=" { 'name':'Cleanup environment', 'shell':'rm -rf /dev/shm/rocksdb && mkdir /dev/shm/rocksdb && (chmod +t /dev/shm || true) && make clean', 'user':'root' }" UPLOAD_DB_DIR=" { 'name':'Upload database directory', 'shell':'tar -cvzf rocksdb_db.tar.gz /dev/shm/rocksdb/', 'user':'root', 'cleanup':true, 'provide_artifacts': [ { 'name':'rocksdb_db_dir', 'paths': ['rocksdb_db.tar.gz'], 'bundle': false, }, ], }" # We will eventually set the RATIO to 1, but we want do this # in steps. RATIO=$(nproc) will make it work as J=1 if [ -z $RATIO ]; then RATIO=$(nproc) fi if [ -z $PARALLEL_J ]; then PARALLEL_J="J=$(expr $(nproc) / ${RATIO})" fi if [ -z $PARALLEL_j ]; then PARALLEL_j="-j$(expr $(nproc) / ${RATIO})" fi PARALLELISM="$PARALLEL_J $PARALLEL_j" DEBUG="OPT=-g" SHM="TEST_TMPDIR=/dev/shm/rocksdb" NON_SHM="TMPD=/tmp/rocksdb_test_tmp" GCC_481="ROCKSDB_FBCODE_BUILD_WITH_481=1" ASAN="COMPILE_WITH_ASAN=1" CLANG="USE_CLANG=1" # in gcc-5 there are known problems with TSAN like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71090. # using platform007 gives us gcc-8 or higher which has that bug fixed. TSAN="ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007=1 COMPILE_WITH_TSAN=1" UBSAN="COMPILE_WITH_UBSAN=1" TSAN_CRASH='CRASH_TEST_EXT_ARGS="--compression_type=zstd --log2_keys_per_lock=22"' NON_TSAN_CRASH="CRASH_TEST_EXT_ARGS=--compression_type=zstd" DISABLE_JEMALLOC="DISABLE_JEMALLOC=1" HTTP_PROXY="https_proxy=http://fwdproxy.29.prn1:8080 http_proxy=http://fwdproxy.29.prn1:8080 ftp_proxy=http://fwdproxy.29.prn1:8080" SETUP_JAVA_ENV="export $HTTP_PROXY; export JAVA_HOME=/usr/local/jdk-8u60-64/; export PATH=\$JAVA_HOME/bin:\$PATH" PARSER="'parser':'python build_tools/error_filter.py $1'" CONTRUN_NAME="ROCKSDB_CONTRUN_NAME" # This code is getting called under various scenarios. What we care about is to # understand when it's called from nightly contruns because in that case we'll # create tasks for any failures. To follow the existing pattern, we'll check # the value of $ONCALL. If it's a diff then just call `false` to make sure # that errors will be properly propagated to the caller. if [ ! -z $ONCALL ]; then TASK_CREATION_TOOL="/usr/local/bin/mysql_mtr_filter --rocksdb --oncall $ONCALL" else TASK_CREATION_TOOL="false" fi # # A mechanism to disable tests temporarily # DISABLE_COMMANDS="[ { 'name':'Disable test', 'oncall':'$ONCALL', 'steps': [ { 'name':'Job disabled. Please contact test owner', 'shell':'exit 1', 'user':'root' }, ], } ]" # # RocksDB unit test # UNIT_TEST_COMMANDS="[ { 'name':'Rocksdb Unit Test', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build and test RocksDB debug version', 'shell':'$SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=check $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB unit test not under /dev/shm # UNIT_TEST_NON_SHM_COMMANDS="[ { 'name':'Rocksdb Unit Test', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and test RocksDB debug version', 'timeout': 86400, 'shell':'$NON_SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=non_shm_check $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB release build and unit tests # RELEASE_BUILD_COMMANDS="[ { 'name':'Rocksdb Release Build', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build RocksDB release', 'shell':'make $PARALLEL_j release || $CONTRUN_NAME=release $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB unit test on gcc-4.8.1 # UNIT_TEST_COMMANDS_481="[ { 'name':'Rocksdb Unit Test on GCC 4.8.1', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build and test RocksDB debug version', 'shell':'$SHM $GCC_481 $DEBUG make $PARALLELISM check || $CONTRUN_NAME=unit_gcc_481_check $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB release build and unit tests # RELEASE_BUILD_COMMANDS_481="[ { 'name':'Rocksdb Release on GCC 4.8.1', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build RocksDB release on GCC 4.8.1', 'shell':'$GCC_481 make $PARALLEL_j release || $CONTRUN_NAME=release_gcc481 $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB unit test with CLANG # CLANG_UNIT_TEST_COMMANDS="[ { 'name':'Rocksdb Unit Test', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build and test RocksDB debug', 'shell':'$CLANG $SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=clang_check $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB release build with CLANG # CLANG_RELEASE_BUILD_COMMANDS="[ { 'name':'Rocksdb CLANG Release Build', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build RocksDB release', 'shell':'$CLANG make $PARALLEL_j release|| $CONTRUN_NAME=clang_release $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB analyze # CLANG_ANALYZE_COMMANDS="[ { 'name':'Rocksdb analyze', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'RocksDB build and analyze', 'shell':'$CLANG $SHM $DEBUG make $PARALLEL_j analyze || $CONTRUN_NAME=clang_analyze $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB code coverage # CODE_COV_COMMANDS="[ { 'name':'Rocksdb Unit Test Code Coverage', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build, test and collect code coverage info', 'shell':'$SHM $DEBUG make $PARALLELISM coverage || $CONTRUN_NAME=coverage $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB unity # UNITY_COMMANDS="[ { 'name':'Rocksdb Unity', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build, test unity test', 'shell':'$SHM $DEBUG V=1 make J=1 unity_test || $CONTRUN_NAME=unity_test $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # Build RocksDB lite # LITE_BUILD_COMMANDS="[ { 'name':'Rocksdb Lite build', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build RocksDB debug version', 'shell':'make J=1 LITE=1 all check || $CONTRUN_NAME=lite $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # Report RocksDB lite binary size to scuba REPORT_LITE_BINARY_SIZE_COMMANDS="[ { 'name':'Rocksdb Lite Binary Size', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Report RocksDB Lite binary size to scuba', 'shell':'tools/report_lite_binary_size.sh', 'user':'root', }, ], ]" # # RocksDB stress/crash test # STRESS_CRASH_TEST_COMMANDS="[ { 'name':'Rocksdb Stress and Crash Test', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug stress tests', 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', 'user':'root', $PARSER }, { 'name':'Build and run RocksDB debug crash tests', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test || $CONTRUN_NAME=crash_test $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB stress/crash test with atomic flush # STRESS_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { 'name':'Rocksdb Stress and Crash Test with atomic flush', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug stress tests', 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', 'user':'root', $PARSER }, { 'name':'Build and run RocksDB debug crash tests with atomic flush', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test_with_atomic_flush || $CONTRUN_NAME=crash_test_with_atomic_flush $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB stress/crash test with txn # STRESS_CRASH_TEST_WITH_TXN_COMMANDS="[ { 'name':'Rocksdb Stress and Crash Test with txn', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug stress tests', 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', 'user':'root', $PARSER }, { 'name':'Build and run RocksDB debug crash tests with txn', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test_with_txn || $CONTRUN_NAME=crash_test_with_txn $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # RocksDB write stress test. # We run on disk device on purpose (i.e. no $SHM) # because we want to add some randomness to fsync commands WRITE_STRESS_COMMANDS="[ { 'name':'Rocksdb Write Stress Test', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB write stress tests', 'shell':'make write_stress && python tools/write_stress_runner.py --runtime_sec=3600 --db=/tmp/rocksdb_write_stress || $CONTRUN_NAME=write_stress $TASK_CREATION_TOOL', 'user':'root', $PARSER } ], 'artifacts': [{'name': 'database', 'paths': ['/tmp/rocksdb_write_stress']}], $REPORT } ]" # # RocksDB test under address sanitizer # ASAN_TEST_COMMANDS="[ { 'name':'Rocksdb Unit Test under ASAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Test RocksDB debug under ASAN', 'shell':'set -o pipefail && ($SHM $ASAN $DEBUG make $PARALLELISM asan_check || $CONTRUN_NAME=asan_check $TASK_CREATION_TOOL) |& /usr/facebook/ops/scripts/asan_symbolize.py -d', 'user':'root', $PARSER } ], $REPORT } ]" # # RocksDB crash testing under address sanitizer # ASAN_CRASH_TEST_COMMANDS="[ { 'name':'Rocksdb crash test under ASAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug asan_crash_test', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test || $CONTRUN_NAME=asan_crash_test $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB crash testing with atomic flush under address sanitizer # ASAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { 'name':'Rocksdb crash test with atomic flush under ASAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug asan_crash_test_with_atomic_flush', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test_with_atomic_flush || $CONTRUN_NAME=asan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB crash testing with txn under address sanitizer # ASAN_CRASH_TEST_WITH_TXN_COMMANDS="[ { 'name':'Rocksdb crash test with txn under ASAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug asan_crash_test_with_txn', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test_with_txn || $CONTRUN_NAME=asan_crash_test_with_txn $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB test under undefined behavior sanitizer # UBSAN_TEST_COMMANDS="[ { 'name':'Rocksdb Unit Test under UBSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Test RocksDB debug under UBSAN', 'shell':'set -o pipefail && $SHM $UBSAN $CLANG $DEBUG make $PARALLELISM ubsan_check || $CONTRUN_NAME=ubsan_check $TASK_CREATION_TOOL', 'user':'root', $PARSER } ], $REPORT } ]" # # RocksDB crash testing under udnefined behavior sanitizer # UBSAN_CRASH_TEST_COMMANDS="[ { 'name':'Rocksdb crash test under UBSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug ubsan_crash_test', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH $CLANG make J=1 ubsan_crash_test || $CONTRUN_NAME=ubsan_crash_test $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB crash testing with atomic flush under undefined behavior sanitizer # UBSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { 'name':'Rocksdb crash test with atomic flush under UBSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug ubsan_crash_test_with_atomic_flush', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH $CLANG make J=1 ubsan_crash_test_with_atomic_flush || $CONTRUN_NAME=ubsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB crash testing with txn under undefined behavior sanitizer # UBSAN_CRASH_TEST_WITH_TXN_COMMANDS="[ { 'name':'Rocksdb crash test with txn under UBSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Build and run RocksDB debug ubsan_crash_test_with_txn', 'timeout': 86400, 'shell':'$SHM $DEBUG $NON_TSAN_CRASH $CLANG make J=1 ubsan_crash_test_with_txn || $CONTRUN_NAME=ubsan_crash_test_with_txn $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB unit test under valgrind # VALGRIND_TEST_COMMANDS="[ { 'name':'Rocksdb Unit Test under valgrind', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Run RocksDB debug unit tests', 'timeout': 86400, 'shell':'$SHM $DEBUG make $PARALLELISM valgrind_test || $CONTRUN_NAME=valgrind_check $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB test under TSAN # TSAN_UNIT_TEST_COMMANDS="[ { 'name':'Rocksdb Unit Test under TSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Run RocksDB debug unit test', 'timeout': 86400, 'shell':'set -o pipefail && $SHM $DEBUG $TSAN make $PARALLELISM check || $CONTRUN_NAME=tsan_check $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB crash test under TSAN # TSAN_CRASH_TEST_COMMANDS="[ { 'name':'Rocksdb Crash Test under TSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Compile and run', 'timeout': 86400, 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test || $CONTRUN_NAME=tsan_crash_test $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB crash test with atomic flush under TSAN # TSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { 'name':'Rocksdb Crash Test with atomic flush under TSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Compile and run', 'timeout': 86400, 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test_with_atomic_flush || $CONTRUN_NAME=tsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB crash test with txn under TSAN # TSAN_CRASH_TEST_WITH_TXN_COMMANDS="[ { 'name':'Rocksdb Crash Test with txn under TSAN', 'oncall':'$ONCALL', 'executeLocal': 'true', 'timeout': 86400, 'steps': [ $CLEANUP_ENV, { 'name':'Compile and run', 'timeout': 86400, 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test_with_txn || $CONTRUN_NAME=tsan_crash_test_with_txn $TASK_CREATION_TOOL', 'user':'root', $PARSER }, $UPLOAD_DB_DIR, ], $REPORT } ]" # # RocksDB format compatible # run_format_compatible() { export TEST_TMPDIR=/dev/shm/rocksdb rm -rf /dev/shm/rocksdb mkdir /dev/shm/rocksdb export https_proxy="fwdproxy:8080" tools/check_format_compatible.sh } FORMAT_COMPATIBLE_COMMANDS="[ { 'name':'Rocksdb Format Compatible tests', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Run RocksDB debug unit test', 'shell':'build_tools/rocksdb-lego-determinator run_format_compatible || $CONTRUN_NAME=run_format_compatible $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB no compression # run_no_compression() { export TEST_TMPDIR=/dev/shm/rocksdb rm -rf /dev/shm/rocksdb mkdir /dev/shm/rocksdb make clean cat build_tools/fbcode_config.sh | grep -iv dzstd | grep -iv dzlib | grep -iv dlz4 | grep -iv dsnappy | grep -iv dbzip2 > .tmp.fbcode_config.sh mv .tmp.fbcode_config.sh build_tools/fbcode_config.sh cat Makefile | grep -v tools/ldb_test.py > .tmp.Makefile mv .tmp.Makefile Makefile make $DEBUG J=1 check } NO_COMPRESSION_COMMANDS="[ { 'name':'Rocksdb No Compression tests', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Run RocksDB debug unit test', 'shell':'build_tools/rocksdb-lego-determinator run_no_compression || $CONTRUN_NAME=run_no_compression $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB regression # run_regression() { time -v bash -vx ./build_tools/regression_build_test.sh $(mktemp -d $WORKSPACE/leveldb.XXXX) $(mktemp leveldb_test_stats.XXXX) # ======= report size to ODS ======== # parameters: $1 -- key, $2 -- value function send_size_to_ods { curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=rocksdb.build_size.$1&value=$2" \ --connect-timeout 60 } # === normal build === make clean make -j$(nproc) static_lib send_size_to_ods static_lib $(stat --printf="%s" librocksdb.a) strip librocksdb.a send_size_to_ods static_lib_stripped $(stat --printf="%s" librocksdb.a) make -j$(nproc) shared_lib send_size_to_ods shared_lib $(stat --printf="%s" `readlink -f librocksdb.so`) strip `readlink -f librocksdb.so` send_size_to_ods shared_lib_stripped $(stat --printf="%s" `readlink -f librocksdb.so`) # === lite build === make clean make LITE=1 -j$(nproc) static_lib send_size_to_ods static_lib_lite $(stat --printf="%s" librocksdb.a) strip librocksdb.a send_size_to_ods static_lib_lite_stripped $(stat --printf="%s" librocksdb.a) make LITE=1 -j$(nproc) shared_lib send_size_to_ods shared_lib_lite $(stat --printf="%s" `readlink -f librocksdb.so`) strip `readlink -f librocksdb.so` send_size_to_ods shared_lib_lite_stripped $(stat --printf="%s" `readlink -f librocksdb.so`) } REGRESSION_COMMANDS="[ { 'name':'Rocksdb regression commands', 'oncall':'$ONCALL', 'steps': [ $CLEANUP_ENV, { 'name':'Make and run script', 'shell':'build_tools/rocksdb-lego-determinator run_regression || $CONTRUN_NAME=run_regression $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" # # RocksDB Java build # JAVA_BUILD_TEST_COMMANDS="[ { 'name':'Rocksdb Java Build', 'oncall':'$ONCALL', 'executeLocal': 'true', 'steps': [ $CLEANUP_ENV, { 'name':'Build RocksDB for Java', 'shell':'$SETUP_JAVA_ENV; $SHM make rocksdbjava || $CONTRUN_NAME=rocksdbjava $TASK_CREATION_TOOL', 'user':'root', $PARSER }, ], $REPORT } ]" case $1 in unit) echo $UNIT_TEST_COMMANDS ;; unit_non_shm) echo $UNIT_TEST_NON_SHM_COMMANDS ;; release) echo $RELEASE_BUILD_COMMANDS ;; unit_481) echo $UNIT_TEST_COMMANDS_481 ;; release_481) echo $RELEASE_BUILD_COMMANDS_481 ;; clang_unit) echo $CLANG_UNIT_TEST_COMMANDS ;; clang_release) echo $CLANG_RELEASE_BUILD_COMMANDS ;; clang_analyze) echo $CLANG_ANALYZE_COMMANDS ;; code_cov) echo $CODE_COV_COMMANDS ;; unity) echo $UNITY_COMMANDS ;; lite) echo $LITE_BUILD_COMMANDS ;; report_lite_binary_size) echo $REPORT_LITE_BINARY_SIZE_COMMANDS ;; stress_crash) echo $STRESS_CRASH_TEST_COMMANDS ;; stress_crash_with_atomic_flush) echo $STRESS_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; stress_crash_with_txn) echo $STRESS_CRASH_TEST_WITH_TXN_COMMANDS ;; write_stress) echo $WRITE_STRESS_COMMANDS ;; asan) echo $ASAN_TEST_COMMANDS ;; asan_crash) echo $ASAN_CRASH_TEST_COMMANDS ;; asan_crash_with_atomic_flush) echo $ASAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; asan_crash_with_txn) echo $ASAN_CRASH_TEST_WITH_TXN_COMMANDS ;; ubsan) echo $UBSAN_TEST_COMMANDS ;; ubsan_crash) echo $UBSAN_CRASH_TEST_COMMANDS ;; ubsan_crash_with_atomic_flush) echo $UBSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; ubsan_crash_with_txn) echo $UBSAN_CRASH_TEST_WITH_TXN_COMMANDS ;; valgrind) echo $VALGRIND_TEST_COMMANDS ;; tsan) echo $TSAN_UNIT_TEST_COMMANDS ;; tsan_crash) echo $TSAN_CRASH_TEST_COMMANDS ;; tsan_crash_with_atomic_flush) echo $TSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; tsan_crash_with_txn) echo $TSAN_CRASH_TEST_WITH_TXN_COMMANDS ;; format_compatible) echo $FORMAT_COMPATIBLE_COMMANDS ;; run_format_compatible) run_format_compatible ;; no_compression) echo $NO_COMPRESSION_COMMANDS ;; run_no_compression) run_no_compression ;; regression) echo $REGRESSION_COMMANDS ;; run_regression) run_regression ;; java_build) echo $JAVA_BUILD_TEST_COMMANDS ;; *) echo "Invalid determinator command" exit 1 ;; esac rocksdb-6.11.4/build_tools/run_ci_db_test.ps1000066400000000000000000000350041370372246700211760ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # This script enables you running RocksDB tests by running # All the tests concurrently and utilizing all the cores Param( [switch]$EnableJE = $false, # Look for and use test executable, append _je to listed exclusions [switch]$RunAll = $false, # Will attempt discover all *_test[_je].exe binaries and run all # of them as Google suites. I.e. It will run test cases concurrently # except those mentioned as $Run, those will run as individual test cases # And any execlued with $ExcludeExes or $ExcludeCases # It will also not run any individual test cases # excluded but $ExcludeCasese [switch]$RunAllExe = $false, # Look for and use test exdcutables, append _je to exclusions automatically # It will attempt to run them in parallel w/o breaking them up on individual # test cases. Those listed with $ExcludeExes will be excluded [string]$SuiteRun = "", # Split test suites in test cases and run in parallel, not compatible with $RunAll [string]$Run = "", # Run specified executables in parallel but do not split to test cases [string]$ExcludeCases = "", # Exclude test cases, expects a comma separated list, no spaces # Takes effect when $RunAll or $SuiteRun is specified. Must have full # Test cases name including a group and a parameter if any [string]$ExcludeExes = "", # Exclude exes from consideration, expects a comma separated list, # no spaces. Takes effect only when $RunAll is specified [string]$WorkFolder = "", # Direct tests to use that folder. SSD or Ram drive are better options. # Number of async tasks that would run concurrently. Recommend a number below 64. # However, CPU utlization really depends on the storage media. Recommend ram based disk. # a value of 1 will run everything serially [int]$Concurrency = 8, [int]$Limit = -1 # -1 means do not limit for test purposes ) # Folders and commands must be fullpath to run assuming # the current folder is at the root of the git enlistment $StartDate = (Get-Date) $StartDate $DebugPreference = "Continue" # These tests are not google test suites and we should guard # Against running them as suites $RunOnly = New-Object System.Collections.Generic.HashSet[string] $RunOnly.Add("c_test") | Out-Null $RunOnly.Add("compact_on_deletion_collector_test") | Out-Null $RunOnly.Add("merge_test") | Out-Null $RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written $RunOnly.Add("backupable_db_test") | Out-Null # Disabled $RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest if($RunAll -and $SuiteRun -ne "") { Write-Error "$RunAll and $SuiteRun are not compatible" exit 1 } if($RunAllExe -and $Run -ne "") { Write-Error "$RunAllExe and $Run are not compatible" exit 1 } # If running under Appveyor assume that root [string]$Appveyor = $Env:APPVEYOR_BUILD_FOLDER if($Appveyor -ne "") { $RootFolder = $Appveyor } else { $RootFolder = $PSScriptRoot -replace '\\build_tools', '' } $LogFolder = -Join($RootFolder, "\db_logs\") $BinariesFolder = -Join($RootFolder, "\build\Debug\") if($WorkFolder -eq "") { # If TEST_TMPDIR is set use it [string]$var = $Env:TEST_TMPDIR if($var -eq "") { $WorkFolder = -Join($RootFolder, "\db_tests\") $Env:TEST_TMPDIR = $WorkFolder } else { $WorkFolder = $var } } else { # Override from a command line $Env:TEST_TMPDIR = $WorkFolder } Write-Output "Root: $RootFolder, WorkFolder: $WorkFolder" Write-Output "BinariesFolder: $BinariesFolder, LogFolder: $LogFolder" # Create test directories in the current folder md -Path $WorkFolder -ErrorAction Ignore | Out-Null md -Path $LogFolder -ErrorAction Ignore | Out-Null $ExcludeCasesSet = New-Object System.Collections.Generic.HashSet[string] if($ExcludeCases -ne "") { Write-Host "ExcludeCases: $ExcludeCases" $l = $ExcludeCases -split ' ' ForEach($t in $l) { $ExcludeCasesSet.Add($t) | Out-Null } } $ExcludeExesSet = New-Object System.Collections.Generic.HashSet[string] if($ExcludeExes -ne "") { Write-Host "ExcludeExe: $ExcludeExes" $l = $ExcludeExes -split ' ' ForEach($t in $l) { $ExcludeExesSet.Add($t) | Out-Null } } # Extract the names of its tests by running db_test with --gtest_list_tests. # This filter removes the "#"-introduced comments, and expands to # fully-qualified names by changing input like this: # # DBTest. # Empty # WriteEmptyBatch # MultiThreaded/MultiThreadedDBTest. # MultiThreaded/0 # GetParam() = 0 # MultiThreaded/1 # GetParam() = 1 # # into this: # # DBTest.Empty # DBTest.WriteEmptyBatch # MultiThreaded/MultiThreadedDBTest.MultiThreaded/0 # MultiThreaded/MultiThreadedDBTest.MultiThreaded/1 # # Output into the parameter in a form TestName -> Log File Name function ExtractTestCases([string]$GTestExe, $HashTable) { $Tests = @() # Run db_test to get a list of tests and store it into $a array &$GTestExe --gtest_list_tests | tee -Variable Tests | Out-Null # Current group $Group="" ForEach( $l in $Tests) { # Leading whitespace is fine $l = $l -replace '^\s+','' # Trailing dot is a test group but no whitespace if ($l -match "\.$" -and $l -notmatch "\s+") { $Group = $l } else { # Otherwise it is a test name, remove leading space $test = $l # remove trailing comment if any and create a log name $test = $test -replace '\s+\#.*','' $test = "$Group$test" if($ExcludeCasesSet.Contains($test)) { Write-Warning "$test case is excluded" continue } $test_log = $test -replace '[\./]','_' $test_log += ".log" $log_path = -join ($LogFolder, $test_log) # Add to a hashtable $HashTable.Add($test, $log_path); } } } # The function removes trailing .exe siffix if any, # creates a name for the log file # Then adds the test name if it was not excluded into # a HashTable in a form of test_name -> log_path function MakeAndAdd([string]$token, $HashTable) { $test_name = $token -replace '.exe$', '' $log_name = -join ($test_name, ".log") $log_path = -join ($LogFolder, $log_name) $HashTable.Add($test_name, $log_path) } # This function takes a list of Suites to run # Lists all the test cases in each of the suite # and populates HashOfHashes # Ordered by suite(exe) @{ Exe = @{ TestCase = LogName }} function ProcessSuites($ListOfSuites, $HashOfHashes) { $suite_list = $ListOfSuites # Problem: if you run --gtest_list_tests on # a non Google Test executable then it will start executing # and we will get nowhere ForEach($suite in $suite_list) { if($RunOnly.Contains($suite)) { Write-Warning "$suite is excluded from running as Google test suite" continue } if($EnableJE) { $suite += "_je" } $Cases = [ordered]@{} $Cases.Clear() $suite_exe = -Join ($BinariesFolder, $suite) ExtractTestCases -GTestExe $suite_exe -HashTable $Cases if($Cases.Count -gt 0) { $HashOfHashes.Add($suite, $Cases); } } # Make logs and run if($CasesToRun.Count -lt 1) { Write-Error "Failed to extract tests from $SuiteRun" exit 1 } } # This will contain all test executables to run # Hash table that contains all non suite # Test executable to run $TestExes = [ordered]@{} # Check for test exe that are not # Google Test Suites # Since this is explicitely mentioned it is not subject # for exclusions if($Run -ne "") { $test_list = $Run -split ' ' ForEach($t in $test_list) { if($EnableJE) { $t += "_je" } MakeAndAdd -token $t -HashTable $TestExes } if($TestExes.Count -lt 1) { Write-Error "Failed to extract tests from $Run" exit 1 } } elseif($RunAllExe) { # Discover all the test binaries if($EnableJE) { $pattern = "*_test_je.exe" } else { $pattern = "*_test.exe" } $search_path = -join ($BinariesFolder, $pattern) Write-Host "Binaries Search Path: $search_path" $DiscoveredExe = @() dir -Path $search_path | ForEach-Object { $DiscoveredExe += ($_.Name) } # Remove exclusions ForEach($e in $DiscoveredExe) { $e = $e -replace '.exe$', '' $bare_name = $e -replace '_je$', '' if($ExcludeExesSet.Contains($bare_name)) { Write-Warning "Test $e is excluded" continue } MakeAndAdd -token $e -HashTable $TestExes } if($TestExes.Count -lt 1) { Write-Error "Failed to discover test executables" exit 1 } } # Ordered by exe @{ Exe = @{ TestCase = LogName }} $CasesToRun = [ordered]@{} if($SuiteRun -ne "") { $suite_list = $SuiteRun -split ' ' ProcessSuites -ListOfSuites $suite_list -HashOfHashes $CasesToRun } elseif ($RunAll) { # Discover all the test binaries if($EnableJE) { $pattern = "*_test_je.exe" } else { $pattern = "*_test.exe" } $search_path = -join ($BinariesFolder, $pattern) Write-Host "Binaries Search Path: $search_path" $ListOfExe = @() dir -Path $search_path | ForEach-Object { $ListOfExe += ($_.Name) } # Exclude those in RunOnly from running as suites $ListOfSuites = @() ForEach($e in $ListOfExe) { $e = $e -replace '.exe$', '' $bare_name = $e -replace '_je$', '' if($ExcludeExesSet.Contains($bare_name)) { Write-Warning "Test $e is excluded" continue } if($RunOnly.Contains($bare_name)) { MakeAndAdd -token $e -HashTable $TestExes } else { $ListOfSuites += $bare_name } } ProcessSuites -ListOfSuites $ListOfSuites -HashOfHashes $CasesToRun } # Invoke a test with a filter and redirect all output $InvokeTestCase = { param($exe, $test, $log); &$exe --gtest_filter=$test > $log 2>&1 } # Invoke all tests and redirect output $InvokeTestAsync = { param($exe, $log) &$exe > $log 2>&1 } # Hash that contains tests to rerun if any failed # Those tests will be rerun sequentially # $Rerun = [ordered]@{} # Test limiting factor here [int]$count = 0 # Overall status [bool]$script:success = $true; function RunJobs($Suites, $TestCmds, [int]$ConcurrencyVal) { # Array to wait for any of the running jobs $jobs = @() # Hash JobToLog $JobToLog = @{} # Wait for all to finish and get the results while(($JobToLog.Count -gt 0) -or ($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) { # Make sure we have maximum concurrent jobs running if anything # and the $Limit either not set or allows to proceed while(($JobToLog.Count -lt $ConcurrencyVal) -and ((($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) -and (($Limit -lt 0) -or ($count -lt $Limit)))) { # We always favore suites to run if available [string]$exe_name = "" [string]$log_path = "" $Cases = @{} if($Suites.Count -gt 0) { # Will the first one ForEach($e in $Suites.Keys) { $exe_name = $e $Cases = $Suites[$e] break } [string]$test_case = "" [string]$log_path = "" ForEach($c in $Cases.Keys) { $test_case = $c $log_path = $Cases[$c] break } Write-Host "Starting $exe_name::$test_case" [string]$Exe = -Join ($BinariesFolder, $exe_name) $job = Start-Job -Name "$exe_name::$test_case" -ArgumentList @($Exe,$test_case,$log_path) -ScriptBlock $InvokeTestCase $JobToLog.Add($job, $log_path) $Cases.Remove($test_case) if($Cases.Count -lt 1) { $Suites.Remove($exe_name) } } elseif ($TestCmds.Count -gt 0) { ForEach($e in $TestCmds.Keys) { $exe_name = $e $log_path = $TestCmds[$e] break } Write-Host "Starting $exe_name" [string]$Exe = -Join ($BinariesFolder, $exe_name) $job = Start-Job -Name $exe_name -ScriptBlock $InvokeTestAsync -ArgumentList @($Exe,$log_path) $JobToLog.Add($job, $log_path) $TestCmds.Remove($exe_name) } else { Write-Error "In the job loop but nothing to run" exit 1 } ++$count } # End of Job starting loop if($JobToLog.Count -lt 1) { break } $jobs = @() foreach($k in $JobToLog.Keys) { $jobs += $k } $completed = Wait-Job -Job $jobs -Any $log = $JobToLog[$completed] $JobToLog.Remove($completed) $message = -join @($completed.Name, " State: ", ($completed.State)) $log_content = @(Get-Content $log) if($completed.State -ne "Completed") { $script:success = $false Write-Warning $message $log_content | Write-Warning } else { # Scan the log. If we find PASSED and no occurrence of FAILED # then it is a success [bool]$pass_found = $false ForEach($l in $log_content) { if(($l -match "^\[\s+FAILED") -or ($l -match "Assertion failed:")) { $pass_found = $false break } if(($l -match "^\[\s+PASSED") -or ($l -match " : PASSED$") -or ($l -match "^PASS$") -or # Special c_test case ($l -match "Passed all tests!") ) { $pass_found = $true } } if(!$pass_found) { $script:success = $false; Write-Warning $message $log_content | Write-Warning } else { Write-Host $message } } # Remove cached job info from the system # Should be no output Receive-Job -Job $completed | Out-Null } } RunJobs -Suites $CasesToRun -TestCmds $TestExes -ConcurrencyVal $Concurrency $EndDate = (Get-Date) New-TimeSpan -Start $StartDate -End $EndDate | ForEach-Object { "Elapsed time: {0:g}" -f $_ } if(!$script:success) { # This does not succeed killing off jobs quick # So we simply exit # Remove-Job -Job $jobs -Force # indicate failure using this exit code exit 1 } exit 0 rocksdb-6.11.4/build_tools/setup_centos7.sh000077500000000000000000000022621370372246700207270ustar00rootroot00000000000000#!/bin/bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. set -ex ROCKSDB_VERSION="6.7.3" ZSTD_VERSION="1.4.4" echo "This script configures CentOS with everything needed to build and run RocksDB" yum update -y && yum install epel-release -y yum install -y \ wget \ gcc-c++ \ snappy snappy-devel \ zlib zlib-devel \ bzip2 bzip2-devel \ lz4-devel \ libasan \ gflags mkdir -pv /usr/local/rocksdb-${ROCKSDB_VERSION} ln -sfT /usr/local/rocksdb-${ROCKSDB_VERSION} /usr/local/rocksdb wget -qO /tmp/zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz wget -qO /tmp/rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz cd /tmp tar xzvf zstd-${ZSTD_VERSION}.tar.gz tar xzvf rocksdb-${ROCKSDB_VERSION}.tar.gz -C /usr/local/ echo "Installing ZSTD..." pushd zstd-${ZSTD_VERSION} make && make install popd echo "Compiling RocksDB..." cd /usr/local/rocksdb chown -R vagrant:vagrant /usr/local/rocksdb/ sudo -u vagrant make static_lib cd examples/ sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ make all sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ ./c_simple_example rocksdb-6.11.4/build_tools/update_dependencies.sh000077500000000000000000000126061370372246700221200ustar00rootroot00000000000000#!/bin/sh # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # # Update dependencies.sh file with the latest avaliable versions BASEDIR=$(dirname $0) OUTPUT="" function log_header() { echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT" } function log_variable() { echo "$1=${!1}" >> "$OUTPUT" } TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2" ## $1 => lib name ## $2 => lib version (if not provided, will try to pick latest) ## $3 => platform (if not provided, will try to pick latest gcc) ## ## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location function get_lib_base() { local lib_name=$1 local lib_version=$2 local lib_platform=$3 local result="$TP2_LATEST/$lib_name/" # Lib Version if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then # version is not provided, use latest result=`ls -dr1v $result/*/ | head -n1` else result="$result/$lib_version/" fi # Lib Platform if [ -z "$lib_platform" ]; then # platform is not provided, use latest gcc result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1` else echo $lib_platform result="$result/$lib_platform/" fi result=`ls -1d $result/*/ | head -n1` # lib_name => LIB_NAME_BASE local __res_var=${lib_name^^}"_BASE" __res_var=`echo $__res_var | tr - _` # LIB_NAME_BASE=$result eval $__res_var=`readlink -f $result` log_variable $__res_var } ########################################################### # platform007 dependencies # ########################################################### OUTPUT="$BASEDIR/dependencies_platform007.sh" rm -f "$OUTPUT" touch "$OUTPUT" echo "Writing dependencies to $OUTPUT" # Compilers locations GCC_BASE=`readlink -f $TP2_LATEST/gcc/7.x/centos7-native/*/` CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/` log_header log_variable GCC_BASE log_variable CLANG_BASE # Libraries locations get_lib_base libgcc 7.x platform007 get_lib_base glibc 2.26 platform007 get_lib_base snappy LATEST platform007 get_lib_base zlib LATEST platform007 get_lib_base bzip2 LATEST platform007 get_lib_base lz4 LATEST platform007 get_lib_base zstd LATEST platform007 get_lib_base gflags LATEST platform007 get_lib_base jemalloc LATEST platform007 get_lib_base numa LATEST platform007 get_lib_base libunwind LATEST platform007 get_lib_base tbb LATEST platform007 get_lib_base liburing LATEST platform007 get_lib_base kernel-headers fb platform007 get_lib_base binutils LATEST centos7-native get_lib_base valgrind LATEST platform007 get_lib_base lua 5.3.4 platform007 git diff $OUTPUT ########################################################### # 5.x dependencies # ########################################################### OUTPUT="$BASEDIR/dependencies.sh" rm -f "$OUTPUT" touch "$OUTPUT" echo "Writing dependencies to $OUTPUT" # Compilers locations GCC_BASE=`readlink -f $TP2_LATEST/gcc/5.x/centos7-native/*/` CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/` log_header log_variable GCC_BASE log_variable CLANG_BASE # Libraries locations get_lib_base libgcc 5.x gcc-5-glibc-2.23 get_lib_base glibc 2.23 gcc-5-glibc-2.23 get_lib_base snappy LATEST gcc-5-glibc-2.23 get_lib_base zlib LATEST gcc-5-glibc-2.23 get_lib_base bzip2 LATEST gcc-5-glibc-2.23 get_lib_base lz4 LATEST gcc-5-glibc-2.23 get_lib_base zstd LATEST gcc-5-glibc-2.23 get_lib_base gflags LATEST gcc-5-glibc-2.23 get_lib_base jemalloc LATEST gcc-5-glibc-2.23 get_lib_base numa LATEST gcc-5-glibc-2.23 get_lib_base libunwind LATEST gcc-5-glibc-2.23 get_lib_base tbb LATEST gcc-5-glibc-2.23 get_lib_base kernel-headers 4.0.9-36_fbk5_2933_gd092e3f gcc-5-glibc-2.23 get_lib_base binutils LATEST centos7-native get_lib_base valgrind LATEST gcc-5-glibc-2.23 get_lib_base lua 5.2.3 gcc-5-glibc-2.23 git diff $OUTPUT ########################################################### # 4.8.1 dependencies # ########################################################### OUTPUT="$BASEDIR/dependencies_4.8.1.sh" rm -f "$OUTPUT" touch "$OUTPUT" echo "Writing 4.8.1 dependencies to $OUTPUT" # Compilers locations GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/` CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos6-native/*/` log_header log_variable GCC_BASE log_variable CLANG_BASE # Libraries locations get_lib_base libgcc 4.8.1 gcc-4.8.1-glibc-2.17 get_lib_base glibc 2.17 gcc-4.8.1-glibc-2.17 get_lib_base snappy LATEST gcc-4.8.1-glibc-2.17 get_lib_base zlib LATEST gcc-4.8.1-glibc-2.17 get_lib_base bzip2 LATEST gcc-4.8.1-glibc-2.17 get_lib_base lz4 LATEST gcc-4.8.1-glibc-2.17 get_lib_base zstd LATEST gcc-4.8.1-glibc-2.17 get_lib_base gflags LATEST gcc-4.8.1-glibc-2.17 get_lib_base jemalloc LATEST gcc-4.8.1-glibc-2.17 get_lib_base numa LATEST gcc-4.8.1-glibc-2.17 get_lib_base libunwind LATEST gcc-4.8.1-glibc-2.17 get_lib_base tbb 4.0_update2 gcc-4.8.1-glibc-2.17 get_lib_base kernel-headers LATEST gcc-4.8.1-glibc-2.17 get_lib_base binutils LATEST centos6-native get_lib_base valgrind 3.8.1 gcc-4.8.1-glibc-2.17 get_lib_base lua 5.2.3 centos6-native git diff $OUTPUT rocksdb-6.11.4/build_tools/version.sh000077500000000000000000000014011370372246700176040ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. if [ "$#" = "0" ]; then echo "Usage: $0 major|minor|patch|full" exit 1 fi if [ "$1" = "major" ]; then cat include/rocksdb/version.h | grep MAJOR | head -n1 | awk '{print $3}' fi if [ "$1" = "minor" ]; then cat include/rocksdb/version.h | grep MINOR | head -n1 | awk '{print $3}' fi if [ "$1" = "patch" ]; then cat include/rocksdb/version.h | grep PATCH | head -n1 | awk '{print $3}' fi if [ "$1" = "full" ]; then awk '/#define ROCKSDB/ { env[$2] = $3 } END { printf "%s.%s.%s\n", env["ROCKSDB_MAJOR"], env["ROCKSDB_MINOR"], env["ROCKSDB_PATCH"] }' \ include/rocksdb/version.h fi rocksdb-6.11.4/cache/000077500000000000000000000000001370372246700143105ustar00rootroot00000000000000rocksdb-6.11.4/cache/cache.cc000066400000000000000000000051051370372246700156630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/cache.h" #include "cache/lru_cache.h" #include "options/options_helper.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE static std::unordered_map lru_cache_options_type_info = { {"capacity", {offsetof(struct LRUCacheOptions, capacity), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct LRUCacheOptions, capacity)}}, {"num_shard_bits", {offsetof(struct LRUCacheOptions, num_shard_bits), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct LRUCacheOptions, num_shard_bits)}}, {"strict_capacity_limit", {offsetof(struct LRUCacheOptions, strict_capacity_limit), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct LRUCacheOptions, strict_capacity_limit)}}, {"high_pri_pool_ratio", {offsetof(struct LRUCacheOptions, high_pri_pool_ratio), OptionType::kDouble, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct LRUCacheOptions, high_pri_pool_ratio)}}}; #endif // ROCKSDB_LITE Status Cache::CreateFromString(const ConfigOptions& config_options, const std::string& value, std::shared_ptr* result) { Status status; std::shared_ptr cache; if (value.find('=') == std::string::npos) { cache = NewLRUCache(ParseSizeT(value)); } else { #ifndef ROCKSDB_LITE LRUCacheOptions cache_opts; status = OptionTypeInfo::ParseStruct( config_options, "", &lru_cache_options_type_info, "", value, reinterpret_cast(&cache_opts)); if (status.ok()) { cache = NewLRUCache(cache_opts); } #else (void)config_options; status = Status::NotSupported("Cannot load cache in LITE mode ", value); #endif //! ROCKSDB_LITE } if (status.ok()) { result->swap(cache); } return status; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/cache/cache_bench.cc000066400000000000000000000257441370372246700170350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef GFLAGS #include int main() { fprintf(stderr, "Please install gflags to run rocksdb tools\n"); return 1; } #else #include #include #include #include #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "util/coding.h" #include "util/gflags_compat.h" #include "util/hash.h" #include "util/mutexlock.h" #include "util/random.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; static constexpr uint32_t KiB = uint32_t{1} << 10; static constexpr uint32_t MiB = KiB << 10; static constexpr uint64_t GiB = MiB << 10; DEFINE_uint32(threads, 16, "Number of concurrent threads to run."); DEFINE_uint64(cache_size, 1 * GiB, "Number of bytes to use as a cache of uncompressed data."); DEFINE_uint32(num_shard_bits, 6, "shard_bits."); DEFINE_double(resident_ratio, 0.25, "Ratio of keys fitting in cache to keyspace."); DEFINE_uint64(ops_per_thread, 0, "Number of operations per thread. (Default: 5 * keyspace size)"); DEFINE_uint32(value_bytes, 8 * KiB, "Size of each value added."); DEFINE_uint32(skew, 5, "Degree of skew in key selection"); DEFINE_bool(populate_cache, true, "Populate cache before operations"); DEFINE_uint32(lookup_insert_percent, 87, "Ratio of lookup (+ insert on not found) to total workload " "(expressed as a percentage)"); DEFINE_uint32(insert_percent, 2, "Ratio of insert to total workload (expressed as a percentage)"); DEFINE_uint32(lookup_percent, 10, "Ratio of lookup to total workload (expressed as a percentage)"); DEFINE_uint32(erase_percent, 1, "Ratio of erase to total workload (expressed as a percentage)"); DEFINE_bool(use_clock_cache, false, ""); namespace ROCKSDB_NAMESPACE { class CacheBench; namespace { // State shared by all concurrent executions of the same benchmark. class SharedState { public: explicit SharedState(CacheBench* cache_bench) : cv_(&mu_), num_initialized_(0), start_(false), num_done_(0), cache_bench_(cache_bench) {} ~SharedState() {} port::Mutex* GetMutex() { return &mu_; } port::CondVar* GetCondVar() { return &cv_; } CacheBench* GetCacheBench() const { return cache_bench_; } void IncInitialized() { num_initialized_++; } void IncDone() { num_done_++; } bool AllInitialized() const { return num_initialized_ >= FLAGS_threads; } bool AllDone() const { return num_done_ >= FLAGS_threads; } void SetStart() { start_ = true; } bool Started() const { return start_; } private: port::Mutex mu_; port::CondVar cv_; uint64_t num_initialized_; bool start_; uint64_t num_done_; CacheBench* cache_bench_; }; // Per-thread state for concurrent executions of the same benchmark. struct ThreadState { uint32_t tid; Random64 rnd; SharedState* shared; ThreadState(uint32_t index, SharedState* _shared) : tid(index), rnd(1000 + index), shared(_shared) {} }; struct KeyGen { char key_data[27]; Slice GetRand(Random64& rnd, uint64_t max_key) { uint64_t raw = rnd.Next(); // Skew according to setting for (uint32_t i = 0; i < FLAGS_skew; ++i) { raw = std::min(raw, rnd.Next()); } uint64_t key = fastrange64(raw, max_key); // Variable size and alignment size_t off = key % 8; key_data[0] = char{42}; EncodeFixed64(key_data + 1, key); key_data[9] = char{11}; EncodeFixed64(key_data + 10, key); key_data[18] = char{4}; EncodeFixed64(key_data + 19, key); return Slice(&key_data[off], sizeof(key_data) - off); } }; char* createValue(Random64& rnd) { char* rv = new char[FLAGS_value_bytes]; // Fill with some filler data, and take some CPU time for (uint32_t i = 0; i < FLAGS_value_bytes; i += 8) { EncodeFixed64(rv + i, rnd.Next()); } return rv; } void deleter(const Slice& /*key*/, void* value) { delete[] static_cast(value); } } // namespace class CacheBench { static constexpr uint64_t kHundredthUint64 = std::numeric_limits::max() / 100U; public: CacheBench() : max_key_(static_cast(FLAGS_cache_size / FLAGS_resident_ratio / FLAGS_value_bytes)), lookup_insert_threshold_(kHundredthUint64 * FLAGS_lookup_insert_percent), insert_threshold_(lookup_insert_threshold_ + kHundredthUint64 * FLAGS_insert_percent), lookup_threshold_(insert_threshold_ + kHundredthUint64 * FLAGS_lookup_percent), erase_threshold_(lookup_threshold_ + kHundredthUint64 * FLAGS_erase_percent) { if (erase_threshold_ != 100U * kHundredthUint64) { fprintf(stderr, "Percentages must add to 100.\n"); exit(1); } if (FLAGS_use_clock_cache) { cache_ = NewClockCache(FLAGS_cache_size, FLAGS_num_shard_bits); if (!cache_) { fprintf(stderr, "Clock cache not supported.\n"); exit(1); } } else { cache_ = NewLRUCache(FLAGS_cache_size, FLAGS_num_shard_bits); } if (FLAGS_ops_per_thread == 0) { FLAGS_ops_per_thread = 5 * max_key_; } } ~CacheBench() {} void PopulateCache() { Random64 rnd(1); KeyGen keygen; for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) { cache_->Insert(keygen.GetRand(rnd, max_key_), createValue(rnd), FLAGS_value_bytes, &deleter); } } bool Run() { ROCKSDB_NAMESPACE::Env* env = ROCKSDB_NAMESPACE::Env::Default(); PrintEnv(); SharedState shared(this); std::vector > threads(FLAGS_threads); for (uint32_t i = 0; i < FLAGS_threads; i++) { threads[i].reset(new ThreadState(i, &shared)); env->StartThread(ThreadBody, threads[i].get()); } { MutexLock l(shared.GetMutex()); while (!shared.AllInitialized()) { shared.GetCondVar()->Wait(); } // Record start time uint64_t start_time = env->NowMicros(); // Start all threads shared.SetStart(); shared.GetCondVar()->SignalAll(); // Wait threads to complete while (!shared.AllDone()) { shared.GetCondVar()->Wait(); } // Record end time uint64_t end_time = env->NowMicros(); double elapsed = static_cast(end_time - start_time) * 1e-6; uint32_t qps = static_cast( static_cast(FLAGS_threads * FLAGS_ops_per_thread) / elapsed); fprintf(stdout, "Complete in %.3f s; QPS = %u\n", elapsed, qps); } return true; } private: std::shared_ptr cache_; const uint64_t max_key_; // Cumulative thresholds in the space of a random uint64_t const uint64_t lookup_insert_threshold_; const uint64_t insert_threshold_; const uint64_t lookup_threshold_; const uint64_t erase_threshold_; static void ThreadBody(void* v) { ThreadState* thread = static_cast(v); SharedState* shared = thread->shared; { MutexLock l(shared->GetMutex()); shared->IncInitialized(); if (shared->AllInitialized()) { shared->GetCondVar()->SignalAll(); } while (!shared->Started()) { shared->GetCondVar()->Wait(); } } thread->shared->GetCacheBench()->OperateCache(thread); { MutexLock l(shared->GetMutex()); shared->IncDone(); if (shared->AllDone()) { shared->GetCondVar()->SignalAll(); } } } void OperateCache(ThreadState* thread) { // To use looked-up values uint64_t result = 0; // To hold handles for a non-trivial amount of time Cache::Handle* handle = nullptr; KeyGen gen; for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) { Slice key = gen.GetRand(thread->rnd, max_key_); uint64_t random_op = thread->rnd.Next(); if (random_op < lookup_insert_threshold_) { if (handle) { cache_->Release(handle); handle = nullptr; } // do lookup handle = cache_->Lookup(key); if (handle) { // do something with the data result += NPHash64(static_cast(cache_->Value(handle)), FLAGS_value_bytes); } else { // do insert cache_->Insert(key, createValue(thread->rnd), FLAGS_value_bytes, &deleter, &handle); } } else if (random_op < insert_threshold_) { if (handle) { cache_->Release(handle); handle = nullptr; } // do insert cache_->Insert(key, createValue(thread->rnd), FLAGS_value_bytes, &deleter, &handle); } else if (random_op < lookup_threshold_) { if (handle) { cache_->Release(handle); handle = nullptr; } // do lookup handle = cache_->Lookup(key); if (handle) { // do something with the data result += NPHash64(static_cast(cache_->Value(handle)), FLAGS_value_bytes); } } else if (random_op < erase_threshold_) { // do erase cache_->Erase(key); } else { // Should be extremely unlikely (noop) assert(random_op >= kHundredthUint64 * 100U); } } if (handle) { cache_->Release(handle); handle = nullptr; } } void PrintEnv() const { printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion); printf("Number of threads : %u\n", FLAGS_threads); printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread); printf("Cache size : %" PRIu64 "\n", FLAGS_cache_size); printf("Num shard bits : %u\n", FLAGS_num_shard_bits); printf("Max key : %" PRIu64 "\n", max_key_); printf("Resident ratio : %g\n", FLAGS_resident_ratio); printf("Skew degree : %u\n", FLAGS_skew); printf("Populate cache : %d\n", int{FLAGS_populate_cache}); printf("Lookup+Insert pct : %u%%\n", FLAGS_lookup_insert_percent); printf("Insert percentage : %u%%\n", FLAGS_insert_percent); printf("Lookup percentage : %u%%\n", FLAGS_lookup_percent); printf("Erase percentage : %u%%\n", FLAGS_erase_percent); printf("----------------------------\n"); } }; } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ParseCommandLineFlags(&argc, &argv, true); if (FLAGS_threads <= 0) { fprintf(stderr, "threads number <= 0\n"); exit(1); } ROCKSDB_NAMESPACE::CacheBench bench; if (FLAGS_populate_cache) { bench.PopulateCache(); printf("Population complete\n"); printf("----------------------------\n"); } if (bench.Run()) { return 0; } else { return 1; } } #endif // GFLAGS rocksdb-6.11.4/cache/cache_test.cc000066400000000000000000000561251370372246700167320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/cache.h" #include #include #include #include #include #include "cache/clock_cache.h" #include "cache/lru_cache.h" #include "test_util/testharness.h" #include "util/coding.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // Conversions between numeric keys/values and the types expected by Cache. static std::string EncodeKey(int k) { std::string result; PutFixed32(&result, k); return result; } static int DecodeKey(const Slice& k) { assert(k.size() == 4); return DecodeFixed32(k.data()); } static void* EncodeValue(uintptr_t v) { return reinterpret_cast(v); } static int DecodeValue(void* v) { return static_cast(reinterpret_cast(v)); } const std::string kLRU = "lru"; const std::string kClock = "clock"; void dumbDeleter(const Slice& /*key*/, void* /*value*/) {} void eraseDeleter(const Slice& /*key*/, void* value) { Cache* cache = reinterpret_cast(value); cache->Erase("foo"); } class CacheTest : public testing::TestWithParam { public: static CacheTest* current_; static void Deleter(const Slice& key, void* v) { current_->deleted_keys_.push_back(DecodeKey(key)); current_->deleted_values_.push_back(DecodeValue(v)); } static const int kCacheSize = 1000; static const int kNumShardBits = 4; static const int kCacheSize2 = 100; static const int kNumShardBits2 = 2; std::vector deleted_keys_; std::vector deleted_values_; std::shared_ptr cache_; std::shared_ptr cache2_; CacheTest() : cache_(NewCache(kCacheSize, kNumShardBits, false)), cache2_(NewCache(kCacheSize2, kNumShardBits2, false)) { current_ = this; } ~CacheTest() override {} std::shared_ptr NewCache(size_t capacity) { auto type = GetParam(); if (type == kLRU) { return NewLRUCache(capacity); } if (type == kClock) { return NewClockCache(capacity); } return nullptr; } std::shared_ptr NewCache( size_t capacity, int num_shard_bits, bool strict_capacity_limit, CacheMetadataChargePolicy charge_policy = kDontChargeCacheMetadata) { auto type = GetParam(); if (type == kLRU) { LRUCacheOptions co; co.capacity = capacity; co.num_shard_bits = num_shard_bits; co.strict_capacity_limit = strict_capacity_limit; co.high_pri_pool_ratio = 0; co.metadata_charge_policy = charge_policy; return NewLRUCache(co); } if (type == kClock) { return NewClockCache(capacity, num_shard_bits, strict_capacity_limit, charge_policy); } return nullptr; } int Lookup(std::shared_ptr cache, int key) { Cache::Handle* handle = cache->Lookup(EncodeKey(key)); const int r = (handle == nullptr) ? -1 : DecodeValue(cache->Value(handle)); if (handle != nullptr) { cache->Release(handle); } return r; } void Insert(std::shared_ptr cache, int key, int value, int charge = 1) { cache->Insert(EncodeKey(key), EncodeValue(value), charge, &CacheTest::Deleter); } void Erase(std::shared_ptr cache, int key) { cache->Erase(EncodeKey(key)); } int Lookup(int key) { return Lookup(cache_, key); } void Insert(int key, int value, int charge = 1) { Insert(cache_, key, value, charge); } void Erase(int key) { Erase(cache_, key); } int Lookup2(int key) { return Lookup(cache2_, key); } void Insert2(int key, int value, int charge = 1) { Insert(cache2_, key, value, charge); } void Erase2(int key) { Erase(cache2_, key); } }; CacheTest* CacheTest::current_; class LRUCacheTest : public CacheTest {}; TEST_P(CacheTest, UsageTest) { // cache is std::shared_ptr and will be automatically cleaned up. const uint64_t kCapacity = 100000; auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata); ASSERT_EQ(0, cache->GetUsage()); ASSERT_EQ(0, precise_cache->GetUsage()); size_t usage = 0; char value[10] = "abcdef"; // make sure everything will be cached for (int i = 1; i < 100; ++i) { std::string key(i, 'a'); auto kv_size = key.size() + 5; cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter); precise_cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter); usage += kv_size; ASSERT_EQ(usage, cache->GetUsage()); ASSERT_LT(usage, precise_cache->GetUsage()); } cache->EraseUnRefEntries(); precise_cache->EraseUnRefEntries(); ASSERT_EQ(0, cache->GetUsage()); ASSERT_EQ(0, precise_cache->GetUsage()); // make sure the cache will be overloaded for (uint64_t i = 1; i < kCapacity; ++i) { auto key = ToString(i); cache->Insert(key, reinterpret_cast(value), key.size() + 5, dumbDeleter); precise_cache->Insert(key, reinterpret_cast(value), key.size() + 5, dumbDeleter); } // the usage should be close to the capacity ASSERT_GT(kCapacity, cache->GetUsage()); ASSERT_GT(kCapacity, precise_cache->GetUsage()); ASSERT_LT(kCapacity * 0.95, cache->GetUsage()); ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage()); } TEST_P(CacheTest, PinnedUsageTest) { // cache is std::shared_ptr and will be automatically cleaned up. const uint64_t kCapacity = 200000; auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata); size_t pinned_usage = 0; char value[10] = "abcdef"; std::forward_list unreleased_handles; std::forward_list unreleased_handles_in_precise_cache; // Add entries. Unpin some of them after insertion. Then, pin some of them // again. Check GetPinnedUsage(). for (int i = 1; i < 100; ++i) { std::string key(i, 'a'); auto kv_size = key.size() + 5; Cache::Handle* handle; Cache::Handle* handle_in_precise_cache; cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter, &handle); assert(handle); precise_cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter, &handle_in_precise_cache); assert(handle_in_precise_cache); pinned_usage += kv_size; ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage()); if (i % 2 == 0) { cache->Release(handle); precise_cache->Release(handle_in_precise_cache); pinned_usage -= kv_size; ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage()); } else { unreleased_handles.push_front(handle); unreleased_handles_in_precise_cache.push_front(handle_in_precise_cache); } if (i % 3 == 0) { unreleased_handles.push_front(cache->Lookup(key)); auto x = precise_cache->Lookup(key); assert(x); unreleased_handles_in_precise_cache.push_front(x); // If i % 2 == 0, then the entry was unpinned before Lookup, so pinned // usage increased if (i % 2 == 0) { pinned_usage += kv_size; } ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage()); } } auto precise_cache_pinned_usage = precise_cache->GetPinnedUsage(); ASSERT_LT(pinned_usage, precise_cache_pinned_usage); // check that overloading the cache does not change the pinned usage for (uint64_t i = 1; i < 2 * kCapacity; ++i) { auto key = ToString(i); cache->Insert(key, reinterpret_cast(value), key.size() + 5, dumbDeleter); precise_cache->Insert(key, reinterpret_cast(value), key.size() + 5, dumbDeleter); } ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage()); cache->EraseUnRefEntries(); precise_cache->EraseUnRefEntries(); ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage()); // release handles for pinned entries to prevent memory leaks for (auto handle : unreleased_handles) { cache->Release(handle); } for (auto handle : unreleased_handles_in_precise_cache) { precise_cache->Release(handle); } ASSERT_EQ(0, cache->GetPinnedUsage()); ASSERT_EQ(0, precise_cache->GetPinnedUsage()); cache->EraseUnRefEntries(); precise_cache->EraseUnRefEntries(); ASSERT_EQ(0, cache->GetUsage()); ASSERT_EQ(0, precise_cache->GetUsage()); } TEST_P(CacheTest, HitAndMiss) { ASSERT_EQ(-1, Lookup(100)); Insert(100, 101); ASSERT_EQ(101, Lookup(100)); ASSERT_EQ(-1, Lookup(200)); ASSERT_EQ(-1, Lookup(300)); Insert(200, 201); ASSERT_EQ(101, Lookup(100)); ASSERT_EQ(201, Lookup(200)); ASSERT_EQ(-1, Lookup(300)); Insert(100, 102); ASSERT_EQ(102, Lookup(100)); ASSERT_EQ(201, Lookup(200)); ASSERT_EQ(-1, Lookup(300)); ASSERT_EQ(1U, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[0]); ASSERT_EQ(101, deleted_values_[0]); } TEST_P(CacheTest, InsertSameKey) { Insert(1, 1); Insert(1, 2); ASSERT_EQ(2, Lookup(1)); } TEST_P(CacheTest, Erase) { Erase(200); ASSERT_EQ(0U, deleted_keys_.size()); Insert(100, 101); Insert(200, 201); Erase(100); ASSERT_EQ(-1, Lookup(100)); ASSERT_EQ(201, Lookup(200)); ASSERT_EQ(1U, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[0]); ASSERT_EQ(101, deleted_values_[0]); Erase(100); ASSERT_EQ(-1, Lookup(100)); ASSERT_EQ(201, Lookup(200)); ASSERT_EQ(1U, deleted_keys_.size()); } TEST_P(CacheTest, EntriesArePinned) { Insert(100, 101); Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); ASSERT_EQ(101, DecodeValue(cache_->Value(h1))); ASSERT_EQ(1U, cache_->GetUsage()); Insert(100, 102); Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); ASSERT_EQ(102, DecodeValue(cache_->Value(h2))); ASSERT_EQ(0U, deleted_keys_.size()); ASSERT_EQ(2U, cache_->GetUsage()); cache_->Release(h1); ASSERT_EQ(1U, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[0]); ASSERT_EQ(101, deleted_values_[0]); ASSERT_EQ(1U, cache_->GetUsage()); Erase(100); ASSERT_EQ(-1, Lookup(100)); ASSERT_EQ(1U, deleted_keys_.size()); ASSERT_EQ(1U, cache_->GetUsage()); cache_->Release(h2); ASSERT_EQ(2U, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[1]); ASSERT_EQ(102, deleted_values_[1]); ASSERT_EQ(0U, cache_->GetUsage()); } TEST_P(CacheTest, EvictionPolicy) { Insert(100, 101); Insert(200, 201); // Frequently used entry must be kept around for (int i = 0; i < kCacheSize * 2; i++) { Insert(1000+i, 2000+i); ASSERT_EQ(101, Lookup(100)); } ASSERT_EQ(101, Lookup(100)); ASSERT_EQ(-1, Lookup(200)); } TEST_P(CacheTest, ExternalRefPinsEntries) { Insert(100, 101); Cache::Handle* h = cache_->Lookup(EncodeKey(100)); ASSERT_TRUE(cache_->Ref(h)); ASSERT_EQ(101, DecodeValue(cache_->Value(h))); ASSERT_EQ(1U, cache_->GetUsage()); for (int i = 0; i < 3; ++i) { if (i > 0) { // First release (i == 1) corresponds to Ref(), second release (i == 2) // corresponds to Lookup(). Then, since all external refs are released, // the below insertions should push out the cache entry. cache_->Release(h); } // double cache size because the usage bit in block cache prevents 100 from // being evicted in the first kCacheSize iterations for (int j = 0; j < 2 * kCacheSize + 100; j++) { Insert(1000 + j, 2000 + j); } if (i < 2) { ASSERT_EQ(101, Lookup(100)); } } ASSERT_EQ(-1, Lookup(100)); } TEST_P(CacheTest, EvictionPolicyRef) { Insert(100, 101); Insert(101, 102); Insert(102, 103); Insert(103, 104); Insert(200, 101); Insert(201, 102); Insert(202, 103); Insert(203, 104); Cache::Handle* h201 = cache_->Lookup(EncodeKey(200)); Cache::Handle* h202 = cache_->Lookup(EncodeKey(201)); Cache::Handle* h203 = cache_->Lookup(EncodeKey(202)); Cache::Handle* h204 = cache_->Lookup(EncodeKey(203)); Insert(300, 101); Insert(301, 102); Insert(302, 103); Insert(303, 104); // Insert entries much more than Cache capacity for (int i = 0; i < kCacheSize * 2; i++) { Insert(1000 + i, 2000 + i); } // Check whether the entries inserted in the beginning // are evicted. Ones without extra ref are evicted and // those with are not. ASSERT_EQ(-1, Lookup(100)); ASSERT_EQ(-1, Lookup(101)); ASSERT_EQ(-1, Lookup(102)); ASSERT_EQ(-1, Lookup(103)); ASSERT_EQ(-1, Lookup(300)); ASSERT_EQ(-1, Lookup(301)); ASSERT_EQ(-1, Lookup(302)); ASSERT_EQ(-1, Lookup(303)); ASSERT_EQ(101, Lookup(200)); ASSERT_EQ(102, Lookup(201)); ASSERT_EQ(103, Lookup(202)); ASSERT_EQ(104, Lookup(203)); // Cleaning up all the handles cache_->Release(h201); cache_->Release(h202); cache_->Release(h203); cache_->Release(h204); } TEST_P(CacheTest, EvictEmptyCache) { // Insert item large than capacity to trigger eviction on empty cache. auto cache = NewCache(1, 0, false); ASSERT_OK(cache->Insert("foo", nullptr, 10, dumbDeleter)); } TEST_P(CacheTest, EraseFromDeleter) { // Have deleter which will erase item from cache, which will re-enter // the cache at that point. std::shared_ptr cache = NewCache(10, 0, false); ASSERT_OK(cache->Insert("foo", nullptr, 1, dumbDeleter)); ASSERT_OK(cache->Insert("bar", cache.get(), 1, eraseDeleter)); cache->Erase("bar"); ASSERT_EQ(nullptr, cache->Lookup("foo")); ASSERT_EQ(nullptr, cache->Lookup("bar")); } TEST_P(CacheTest, ErasedHandleState) { // insert a key and get two handles Insert(100, 1000); Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); ASSERT_EQ(h1, h2); ASSERT_EQ(DecodeValue(cache_->Value(h1)), 1000); ASSERT_EQ(DecodeValue(cache_->Value(h2)), 1000); // delete the key from the cache Erase(100); // can no longer find in the cache ASSERT_EQ(-1, Lookup(100)); // release one handle cache_->Release(h1); // still can't find in cache ASSERT_EQ(-1, Lookup(100)); cache_->Release(h2); } TEST_P(CacheTest, HeavyEntries) { // Add a bunch of light and heavy entries and then count the combined // size of items still in the cache, which must be approximately the // same as the total capacity. const int kLight = 1; const int kHeavy = 10; int added = 0; int index = 0; while (added < 2*kCacheSize) { const int weight = (index & 1) ? kLight : kHeavy; Insert(index, 1000+index, weight); added += weight; index++; } int cached_weight = 0; for (int i = 0; i < index; i++) { const int weight = (i & 1 ? kLight : kHeavy); int r = Lookup(i); if (r >= 0) { cached_weight += weight; ASSERT_EQ(1000+i, r); } } ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10); } TEST_P(CacheTest, NewId) { uint64_t a = cache_->NewId(); uint64_t b = cache_->NewId(); ASSERT_NE(a, b); } class Value { public: explicit Value(size_t v) : v_(v) { } size_t v_; }; namespace { void deleter(const Slice& /*key*/, void* value) { delete static_cast(value); } } // namespace TEST_P(CacheTest, ReleaseAndErase) { std::shared_ptr cache = NewCache(5, 0, false); Cache::Handle* handle; Status s = cache->Insert(EncodeKey(100), EncodeValue(100), 1, &CacheTest::Deleter, &handle); ASSERT_TRUE(s.ok()); ASSERT_EQ(5U, cache->GetCapacity()); ASSERT_EQ(1U, cache->GetUsage()); ASSERT_EQ(0U, deleted_keys_.size()); auto erased = cache->Release(handle, true); ASSERT_TRUE(erased); // This tests that deleter has been called ASSERT_EQ(1U, deleted_keys_.size()); } TEST_P(CacheTest, ReleaseWithoutErase) { std::shared_ptr cache = NewCache(5, 0, false); Cache::Handle* handle; Status s = cache->Insert(EncodeKey(100), EncodeValue(100), 1, &CacheTest::Deleter, &handle); ASSERT_TRUE(s.ok()); ASSERT_EQ(5U, cache->GetCapacity()); ASSERT_EQ(1U, cache->GetUsage()); ASSERT_EQ(0U, deleted_keys_.size()); auto erased = cache->Release(handle); ASSERT_FALSE(erased); // This tests that deleter is not called. When cache has free capacity it is // not expected to immediately erase the released items. ASSERT_EQ(0U, deleted_keys_.size()); } TEST_P(CacheTest, SetCapacity) { // test1: increase capacity // lets create a cache with capacity 5, // then, insert 5 elements, then increase capacity // to 10, returned capacity should be 10, usage=5 std::shared_ptr cache = NewCache(5, 0, false); std::vector handles(10); // Insert 5 entries, but not releasing. for (size_t i = 0; i < 5; i++) { std::string key = ToString(i+1); Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); ASSERT_TRUE(s.ok()); } ASSERT_EQ(5U, cache->GetCapacity()); ASSERT_EQ(5U, cache->GetUsage()); cache->SetCapacity(10); ASSERT_EQ(10U, cache->GetCapacity()); ASSERT_EQ(5U, cache->GetUsage()); // test2: decrease capacity // insert 5 more elements to cache, then release 5, // then decrease capacity to 7, final capacity should be 7 // and usage should be 7 for (size_t i = 5; i < 10; i++) { std::string key = ToString(i+1); Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); ASSERT_TRUE(s.ok()); } ASSERT_EQ(10U, cache->GetCapacity()); ASSERT_EQ(10U, cache->GetUsage()); for (size_t i = 0; i < 5; i++) { cache->Release(handles[i]); } ASSERT_EQ(10U, cache->GetCapacity()); ASSERT_EQ(10U, cache->GetUsage()); cache->SetCapacity(7); ASSERT_EQ(7, cache->GetCapacity()); ASSERT_EQ(7, cache->GetUsage()); // release remaining 5 to keep valgrind happy for (size_t i = 5; i < 10; i++) { cache->Release(handles[i]); } } TEST_P(LRUCacheTest, SetStrictCapacityLimit) { // test1: set the flag to false. Insert more keys than capacity. See if they // all go through. std::shared_ptr cache = NewCache(5, 0, false); std::vector handles(10); Status s; for (size_t i = 0; i < 10; i++) { std::string key = ToString(i + 1); s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); ASSERT_OK(s); ASSERT_NE(nullptr, handles[i]); } ASSERT_EQ(10, cache->GetUsage()); // test2: set the flag to true. Insert and check if it fails. std::string extra_key = "extra"; Value* extra_value = new Value(0); cache->SetStrictCapacityLimit(true); Cache::Handle* handle; s = cache->Insert(extra_key, extra_value, 1, &deleter, &handle); ASSERT_TRUE(s.IsIncomplete()); ASSERT_EQ(nullptr, handle); ASSERT_EQ(10, cache->GetUsage()); for (size_t i = 0; i < 10; i++) { cache->Release(handles[i]); } // test3: init with flag being true. std::shared_ptr cache2 = NewCache(5, 0, true); for (size_t i = 0; i < 5; i++) { std::string key = ToString(i + 1); s = cache2->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); ASSERT_OK(s); ASSERT_NE(nullptr, handles[i]); } s = cache2->Insert(extra_key, extra_value, 1, &deleter, &handle); ASSERT_TRUE(s.IsIncomplete()); ASSERT_EQ(nullptr, handle); // test insert without handle s = cache2->Insert(extra_key, extra_value, 1, &deleter); // AS if the key have been inserted into cache but get evicted immediately. ASSERT_OK(s); ASSERT_EQ(5, cache2->GetUsage()); ASSERT_EQ(nullptr, cache2->Lookup(extra_key)); for (size_t i = 0; i < 5; i++) { cache2->Release(handles[i]); } } TEST_P(CacheTest, OverCapacity) { size_t n = 10; // a LRUCache with n entries and one shard only std::shared_ptr cache = NewCache(n, 0, false); std::vector handles(n+1); // Insert n+1 entries, but not releasing. for (size_t i = 0; i < n + 1; i++) { std::string key = ToString(i+1); Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]); ASSERT_TRUE(s.ok()); } // Guess what's in the cache now? for (size_t i = 0; i < n + 1; i++) { std::string key = ToString(i+1); auto h = cache->Lookup(key); ASSERT_TRUE(h != nullptr); if (h) cache->Release(h); } // the cache is over capacity since nothing could be evicted ASSERT_EQ(n + 1U, cache->GetUsage()); for (size_t i = 0; i < n + 1; i++) { cache->Release(handles[i]); } // Make sure eviction is triggered. cache->SetCapacity(n); // cache is under capacity now since elements were released ASSERT_EQ(n, cache->GetUsage()); // element 0 is evicted and the rest is there // This is consistent with the LRU policy since the element 0 // was released first for (size_t i = 0; i < n + 1; i++) { std::string key = ToString(i+1); auto h = cache->Lookup(key); if (h) { ASSERT_NE(i, 0U); cache->Release(h); } else { ASSERT_EQ(i, 0U); } } } namespace { std::vector> callback_state; void callback(void* entry, size_t charge) { callback_state.push_back({DecodeValue(entry), static_cast(charge)}); } }; TEST_P(CacheTest, ApplyToAllCacheEntiresTest) { std::vector> inserted; callback_state.clear(); for (int i = 0; i < 10; ++i) { Insert(i, i * 2, i + 1); inserted.push_back({i * 2, i + 1}); } cache_->ApplyToAllCacheEntries(callback, true); std::sort(inserted.begin(), inserted.end()); std::sort(callback_state.begin(), callback_state.end()); ASSERT_TRUE(inserted == callback_state); } TEST_P(CacheTest, DefaultShardBits) { // test1: set the flag to false. Insert more keys than capacity. See if they // all go through. std::shared_ptr cache = NewCache(16 * 1024L * 1024L); ShardedCache* sc = dynamic_cast(cache.get()); ASSERT_EQ(5, sc->GetNumShardBits()); cache = NewLRUCache(511 * 1024L, -1, true); sc = dynamic_cast(cache.get()); ASSERT_EQ(0, sc->GetNumShardBits()); cache = NewLRUCache(1024L * 1024L * 1024L, -1, true); sc = dynamic_cast(cache.get()); ASSERT_EQ(6, sc->GetNumShardBits()); } TEST_P(CacheTest, GetCharge) { Insert(1, 2); Cache::Handle* h1 = cache_->Lookup(EncodeKey(1)); ASSERT_EQ(2, DecodeValue(cache_->Value(h1))); ASSERT_EQ(1, cache_->GetCharge(h1)); cache_->Release(h1); } #ifdef SUPPORT_CLOCK_CACHE std::shared_ptr (*new_clock_cache_func)( size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache; INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU, kClock)); #else INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU)); #endif // SUPPORT_CLOCK_CACHE INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, testing::Values(kLRU)); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/cache/clock_cache.cc000066400000000000000000000666031370372246700170500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "cache/clock_cache.h" #ifndef SUPPORT_CLOCK_CACHE namespace ROCKSDB_NAMESPACE { std::shared_ptr NewClockCache( size_t /*capacity*/, int /*num_shard_bits*/, bool /*strict_capacity_limit*/, CacheMetadataChargePolicy /*metadata_charge_policy*/) { // Clock cache not supported. return nullptr; } } // namespace ROCKSDB_NAMESPACE #else #include #include #include // "tbb/concurrent_hash_map.h" requires RTTI if exception is enabled. // Disable it so users can chooose to disable RTTI. #ifndef ROCKSDB_USE_RTTI #define TBB_USE_EXCEPTIONS 0 #endif #include "tbb/concurrent_hash_map.h" #include "cache/sharded_cache.h" #include "port/malloc.h" #include "port/port.h" #include "util/autovector.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { namespace { // An implementation of the Cache interface based on CLOCK algorithm, with // better concurrent performance than LRUCache. The idea of CLOCK algorithm // is to maintain all cache entries in a circular list, and an iterator // (the "head") pointing to the last examined entry. Eviction starts from the // current head. Each entry is given a second chance before eviction, if it // has been access since last examine. In contrast to LRU, no modification // to the internal data-structure (except for flipping the usage bit) needs // to be done upon lookup. This gives us oppertunity to implement a cache // with better concurrency. // // Each cache entry is represented by a cache handle, and all the handles // are arranged in a circular list, as describe above. Upon erase of an entry, // we never remove the handle. Instead, the handle is put into a recycle bin // to be re-use. This is to avoid memory dealocation, which is hard to deal // with in concurrent environment. // // The cache also maintains a concurrent hash map for lookup. Any concurrent // hash map implementation should do the work. We currently use // tbb::concurrent_hash_map because it supports concurrent erase. // // Each cache handle has the following flags and counters, which are squeeze // in an atomic interger, to make sure the handle always be in a consistent // state: // // * In-cache bit: whether the entry is reference by the cache itself. If // an entry is in cache, its key would also be available in the hash map. // * Usage bit: whether the entry has been access by user since last // examine for eviction. Can be reset by eviction. // * Reference count: reference count by user. // // An entry can be reference only when it's in cache. An entry can be evicted // only when it is in cache, has no usage since last examine, and reference // count is zero. // // The follow figure shows a possible layout of the cache. Boxes represents // cache handles and numbers in each box being in-cache bit, usage bit and // reference count respectively. // // hash map: // +-------+--------+ // | key | handle | // +-------+--------+ // | "foo" | 5 |-------------------------------------+ // +-------+--------+ | // | "bar" | 2 |--+ | // +-------+--------+ | | // | | // head | | // | | | // circular list: | | | // +-------+ +-------+ +-------+ +-------+ +-------+ +------- // |(0,0,0)|---|(1,1,0)|---|(0,0,0)|---|(0,1,3)|---|(1,0,0)|---| ... // +-------+ +-------+ +-------+ +-------+ +-------+ +------- // | | // +-------+ +-----------+ // | | // +---+---+ // recycle bin: | 1 | 3 | // +---+---+ // // Suppose we try to insert "baz" into the cache at this point and the cache is // full. The cache will first look for entries to evict, starting from where // head points to (the second entry). It resets usage bit of the second entry, // skips the third and fourth entry since they are not in cache, and finally // evict the fifth entry ("foo"). It looks at recycle bin for available handle, // grabs handle 3, and insert the key into the handle. The following figure // shows the resulting layout. // // hash map: // +-------+--------+ // | key | handle | // +-------+--------+ // | "baz" | 3 |-------------+ // +-------+--------+ | // | "bar" | 2 |--+ | // +-------+--------+ | | // | | // | | head // | | | // circular list: | | | // +-------+ +-------+ +-------+ +-------+ +-------+ +------- // |(0,0,0)|---|(1,0,0)|---|(1,0,0)|---|(0,1,3)|---|(0,0,0)|---| ... // +-------+ +-------+ +-------+ +-------+ +-------+ +------- // | | // +-------+ +-----------------------------------+ // | | // +---+---+ // recycle bin: | 1 | 5 | // +---+---+ // // A global mutex guards the circular list, the head, and the recycle bin. // We additionally require that modifying the hash map needs to hold the mutex. // As such, Modifying the cache (such as Insert() and Erase()) require to // hold the mutex. Lookup() only access the hash map and the flags associated // with each handle, and don't require explicit locking. Release() has to // acquire the mutex only when it releases the last reference to the entry and // the entry has been erased from cache explicitly. A future improvement could // be to remove the mutex completely. // // Benchmark: // We run readrandom db_bench on a test DB of size 13GB, with size of each // level: // // Level Files Size(MB) // ------------------------- // L0 1 0.01 // L1 18 17.32 // L2 230 182.94 // L3 1186 1833.63 // L4 4602 8140.30 // // We test with both 32 and 16 read threads, with 2GB cache size (the whole DB // doesn't fits in) and 64GB cache size (the whole DB can fit in cache), and // whether to put index and filter blocks in block cache. The benchmark runs // with // with RocksDB 4.10. We got the following result: // // Threads Cache Cache ClockCache LRUCache // Size Index/Filter Throughput(MB/s) Hit Throughput(MB/s) Hit // 32 2GB yes 466.7 85.9% 433.7 86.5% // 32 2GB no 529.9 72.7% 532.7 73.9% // 32 64GB yes 649.9 99.9% 507.9 99.9% // 32 64GB no 740.4 99.9% 662.8 99.9% // 16 2GB yes 278.4 85.9% 283.4 86.5% // 16 2GB no 318.6 72.7% 335.8 73.9% // 16 64GB yes 391.9 99.9% 353.3 99.9% // 16 64GB no 433.8 99.8% 419.4 99.8% // Cache entry meta data. struct CacheHandle { Slice key; uint32_t hash; void* value; size_t charge; void (*deleter)(const Slice&, void* value); // Flags and counters associated with the cache handle: // lowest bit: in-cache bit // second lowest bit: usage bit // the rest bits: reference count // The handle is unused when flags equals to 0. The thread decreases the count // to 0 is responsible to put the handle back to recycle_ and cleanup memory. std::atomic flags; CacheHandle() = default; CacheHandle(const CacheHandle& a) { *this = a; } CacheHandle(const Slice& k, void* v, void (*del)(const Slice& key, void* value)) : key(k), value(v), deleter(del) {} CacheHandle& operator=(const CacheHandle& a) { // Only copy members needed for deletion. key = a.key; value = a.value; deleter = a.deleter; return *this; } inline static size_t CalcTotalCharge( Slice key, size_t charge, CacheMetadataChargePolicy metadata_charge_policy) { size_t meta_charge = 0; if (metadata_charge_policy == kFullChargeCacheMetadata) { meta_charge += sizeof(CacheHandle); #ifdef ROCKSDB_MALLOC_USABLE_SIZE meta_charge += malloc_usable_size(static_cast(const_cast(key.data()))); #else meta_charge += key.size(); #endif } return charge + meta_charge; } inline size_t CalcTotalCharge( CacheMetadataChargePolicy metadata_charge_policy) { return CalcTotalCharge(key, charge, metadata_charge_policy); } }; // Key of hash map. We store hash value with the key for convenience. struct CacheKey { Slice key; uint32_t hash_value; CacheKey() = default; CacheKey(const Slice& k, uint32_t h) { key = k; hash_value = h; } static bool equal(const CacheKey& a, const CacheKey& b) { return a.hash_value == b.hash_value && a.key == b.key; } static size_t hash(const CacheKey& a) { return static_cast(a.hash_value); } }; struct CleanupContext { // List of values to be deleted, along with the key and deleter. autovector to_delete_value; // List of keys to be deleted. autovector to_delete_key; }; // A cache shard which maintains its own CLOCK cache. class ClockCacheShard final : public CacheShard { public: // Hash map type. typedef tbb::concurrent_hash_map HashTable; ClockCacheShard(); ~ClockCacheShard() override; // Interfaces void SetCapacity(size_t capacity) override; void SetStrictCapacityLimit(bool strict_capacity_limit) override; Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Cache::Handle** handle, Cache::Priority priority) override; Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; // If the entry in in cache, increase reference count and return true. // Return false otherwise. // // Not necessary to hold mutex_ before being called. bool Ref(Cache::Handle* handle) override; bool Release(Cache::Handle* handle, bool force_erase = false) override; void Erase(const Slice& key, uint32_t hash) override; bool EraseAndConfirm(const Slice& key, uint32_t hash, CleanupContext* context); size_t GetUsage() const override; size_t GetPinnedUsage() const override; void EraseUnRefEntries() override; void ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) override; private: static const uint32_t kInCacheBit = 1; static const uint32_t kUsageBit = 2; static const uint32_t kRefsOffset = 2; static const uint32_t kOneRef = 1 << kRefsOffset; // Helper functions to extract cache handle flags and counters. static bool InCache(uint32_t flags) { return flags & kInCacheBit; } static bool HasUsage(uint32_t flags) { return flags & kUsageBit; } static uint32_t CountRefs(uint32_t flags) { return flags >> kRefsOffset; } // Decrease reference count of the entry. If this decreases the count to 0, // recycle the entry. If set_usage is true, also set the usage bit. // // returns true if a value is erased. // // Not necessary to hold mutex_ before being called. bool Unref(CacheHandle* handle, bool set_usage, CleanupContext* context); // Unset in-cache bit of the entry. Recycle the handle if necessary. // // returns true if a value is erased. // // Has to hold mutex_ before being called. bool UnsetInCache(CacheHandle* handle, CleanupContext* context); // Put the handle back to recycle_ list, and put the value associated with // it into to-be-deleted list. It doesn't cleanup the key as it might be // reused by another handle. // // Has to hold mutex_ before being called. void RecycleHandle(CacheHandle* handle, CleanupContext* context); // Delete keys and values in to-be-deleted list. Call the method without // holding mutex, as destructors can be expensive. void Cleanup(const CleanupContext& context); // Examine the handle for eviction. If the handle is in cache, usage bit is // not set, and referece count is 0, evict it from cache. Otherwise unset // the usage bit. // // Has to hold mutex_ before being called. bool TryEvict(CacheHandle* value, CleanupContext* context); // Scan through the circular list, evict entries until we get enough capacity // for new cache entry of specific size. Return true if success, false // otherwise. // // Has to hold mutex_ before being called. bool EvictFromCache(size_t charge, CleanupContext* context); CacheHandle* Insert(const Slice& key, uint32_t hash, void* value, size_t change, void (*deleter)(const Slice& key, void* value), bool hold_reference, CleanupContext* context, bool* overwritten); // Guards list_, head_, and recycle_. In addition, updating table_ also has // to hold the mutex, to avoid the cache being in inconsistent state. mutable port::Mutex mutex_; // The circular list of cache handles. Initially the list is empty. Once a // handle is needed by insertion, and no more handles are available in // recycle bin, one more handle is appended to the end. // // We use std::deque for the circular list because we want to make sure // pointers to handles are valid through out the life-cycle of the cache // (in contrast to std::vector), and be able to grow the list (in contrast // to statically allocated arrays). std::deque list_; // Pointer to the next handle in the circular list to be examine for // eviction. size_t head_; // Recycle bin of cache handles. autovector recycle_; // Maximum cache size. std::atomic capacity_; // Current total size of the cache. std::atomic usage_; // Total un-released cache size. std::atomic pinned_usage_; // Whether allow insert into cache if cache is full. std::atomic strict_capacity_limit_; // Hash table (tbb::concurrent_hash_map) for lookup. HashTable table_; }; ClockCacheShard::ClockCacheShard() : head_(0), usage_(0), pinned_usage_(0), strict_capacity_limit_(false) {} ClockCacheShard::~ClockCacheShard() { for (auto& handle : list_) { uint32_t flags = handle.flags.load(std::memory_order_relaxed); if (InCache(flags) || CountRefs(flags) > 0) { if (handle.deleter != nullptr) { (*handle.deleter)(handle.key, handle.value); } delete[] handle.key.data(); } } } size_t ClockCacheShard::GetUsage() const { return usage_.load(std::memory_order_relaxed); } size_t ClockCacheShard::GetPinnedUsage() const { return pinned_usage_.load(std::memory_order_relaxed); } void ClockCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) { if (thread_safe) { mutex_.Lock(); } for (auto& handle : list_) { // Use relaxed semantics instead of acquire semantics since we are either // holding mutex, or don't have thread safe requirement. uint32_t flags = handle.flags.load(std::memory_order_relaxed); if (InCache(flags)) { callback(handle.value, handle.charge); } } if (thread_safe) { mutex_.Unlock(); } } void ClockCacheShard::RecycleHandle(CacheHandle* handle, CleanupContext* context) { mutex_.AssertHeld(); assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0); context->to_delete_key.push_back(handle->key.data()); context->to_delete_value.emplace_back(*handle); size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_); handle->key.clear(); handle->value = nullptr; handle->deleter = nullptr; recycle_.push_back(handle); usage_.fetch_sub(total_charge, std::memory_order_relaxed); } void ClockCacheShard::Cleanup(const CleanupContext& context) { for (const CacheHandle& handle : context.to_delete_value) { if (handle.deleter) { (*handle.deleter)(handle.key, handle.value); } } for (const char* key : context.to_delete_key) { delete[] key; } } bool ClockCacheShard::Ref(Cache::Handle* h) { auto handle = reinterpret_cast(h); // CAS loop to increase reference count. uint32_t flags = handle->flags.load(std::memory_order_relaxed); while (InCache(flags)) { // Use acquire semantics on success, as further operations on the cache // entry has to be order after reference count is increased. if (handle->flags.compare_exchange_weak(flags, flags + kOneRef, std::memory_order_acquire, std::memory_order_relaxed)) { if (CountRefs(flags) == 0) { // No reference count before the operation. size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_); pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed); } return true; } } return false; } bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage, CleanupContext* context) { if (set_usage) { handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed); } // Use acquire-release semantics as previous operations on the cache entry // has to be order before reference count is decreased, and potential cleanup // of the entry has to be order after. uint32_t flags = handle->flags.fetch_sub(kOneRef, std::memory_order_acq_rel); assert(CountRefs(flags) > 0); if (CountRefs(flags) == 1) { // this is the last reference. size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_); pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed); // Cleanup if it is the last reference. if (!InCache(flags)) { MutexLock l(&mutex_); RecycleHandle(handle, context); } } return context->to_delete_value.size(); } bool ClockCacheShard::UnsetInCache(CacheHandle* handle, CleanupContext* context) { mutex_.AssertHeld(); // Use acquire-release semantics as previous operations on the cache entry // has to be order before reference count is decreased, and potential cleanup // of the entry has to be order after. uint32_t flags = handle->flags.fetch_and(~kInCacheBit, std::memory_order_acq_rel); // Cleanup if it is the last reference. if (InCache(flags) && CountRefs(flags) == 0) { RecycleHandle(handle, context); } return context->to_delete_value.size(); } bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) { mutex_.AssertHeld(); uint32_t flags = kInCacheBit; if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire, std::memory_order_relaxed)) { bool erased __attribute__((__unused__)) = table_.erase(CacheKey(handle->key, handle->hash)); assert(erased); RecycleHandle(handle, context); return true; } handle->flags.fetch_and(~kUsageBit, std::memory_order_relaxed); return false; } bool ClockCacheShard::EvictFromCache(size_t charge, CleanupContext* context) { size_t usage = usage_.load(std::memory_order_relaxed); size_t capacity = capacity_.load(std::memory_order_relaxed); if (usage == 0) { return charge <= capacity; } size_t new_head = head_; bool second_iteration = false; while (usage + charge > capacity) { assert(new_head < list_.size()); if (TryEvict(&list_[new_head], context)) { usage = usage_.load(std::memory_order_relaxed); } new_head = (new_head + 1 >= list_.size()) ? 0 : new_head + 1; if (new_head == head_) { if (second_iteration) { return false; } else { second_iteration = true; } } } head_ = new_head; return true; } void ClockCacheShard::SetCapacity(size_t capacity) { CleanupContext context; { MutexLock l(&mutex_); capacity_.store(capacity, std::memory_order_relaxed); EvictFromCache(0, &context); } Cleanup(context); } void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) { strict_capacity_limit_.store(strict_capacity_limit, std::memory_order_relaxed); } CacheHandle* ClockCacheShard::Insert( const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), bool hold_reference, CleanupContext* context, bool* overwritten) { assert(overwritten != nullptr && *overwritten == false); size_t total_charge = CacheHandle::CalcTotalCharge(key, charge, metadata_charge_policy_); MutexLock l(&mutex_); bool success = EvictFromCache(total_charge, context); bool strict = strict_capacity_limit_.load(std::memory_order_relaxed); if (!success && (strict || !hold_reference)) { context->to_delete_key.push_back(key.data()); if (!hold_reference) { context->to_delete_value.emplace_back(key, value, deleter); } return nullptr; } // Grab available handle from recycle bin. If recycle bin is empty, create // and append new handle to end of circular list. CacheHandle* handle = nullptr; if (!recycle_.empty()) { handle = recycle_.back(); recycle_.pop_back(); } else { list_.emplace_back(); handle = &list_.back(); } // Fill handle. handle->key = key; handle->hash = hash; handle->value = value; handle->charge = charge; handle->deleter = deleter; uint32_t flags = hold_reference ? kInCacheBit + kOneRef : kInCacheBit; handle->flags.store(flags, std::memory_order_relaxed); HashTable::accessor accessor; if (table_.find(accessor, CacheKey(key, hash))) { *overwritten = true; CacheHandle* existing_handle = accessor->second; table_.erase(accessor); UnsetInCache(existing_handle, context); } table_.insert(HashTable::value_type(CacheKey(key, hash), handle)); if (hold_reference) { pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed); } usage_.fetch_add(total_charge, std::memory_order_relaxed); return handle; } Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Cache::Handle** out_handle, Cache::Priority /*priority*/) { CleanupContext context; HashTable::accessor accessor; char* key_data = new char[key.size()]; memcpy(key_data, key.data(), key.size()); Slice key_copy(key_data, key.size()); bool overwritten = false; CacheHandle* handle = Insert(key_copy, hash, value, charge, deleter, out_handle != nullptr, &context, &overwritten); Status s; if (out_handle != nullptr) { if (handle == nullptr) { s = Status::Incomplete("Insert failed due to LRU cache being full."); } else { *out_handle = reinterpret_cast(handle); } } if (overwritten) { assert(s.ok()); s = Status::OkOverwritten(); } Cleanup(context); return s; } Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) { HashTable::const_accessor accessor; if (!table_.find(accessor, CacheKey(key, hash))) { return nullptr; } CacheHandle* handle = accessor->second; accessor.release(); // Ref() could fail if another thread sneak in and evict/erase the cache // entry before we are able to hold reference. if (!Ref(reinterpret_cast(handle))) { return nullptr; } // Double check the key since the handle may now representing another key // if other threads sneak in, evict/erase the entry and re-used the handle // for another cache entry. if (hash != handle->hash || key != handle->key) { CleanupContext context; Unref(handle, false, &context); // It is possible Unref() delete the entry, so we need to cleanup. Cleanup(context); return nullptr; } return reinterpret_cast(handle); } bool ClockCacheShard::Release(Cache::Handle* h, bool force_erase) { CleanupContext context; CacheHandle* handle = reinterpret_cast(h); bool erased = Unref(handle, true, &context); if (force_erase && !erased) { erased = EraseAndConfirm(handle->key, handle->hash, &context); } Cleanup(context); return erased; } void ClockCacheShard::Erase(const Slice& key, uint32_t hash) { CleanupContext context; EraseAndConfirm(key, hash, &context); Cleanup(context); } bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash, CleanupContext* context) { MutexLock l(&mutex_); HashTable::accessor accessor; bool erased = false; if (table_.find(accessor, CacheKey(key, hash))) { CacheHandle* handle = accessor->second; table_.erase(accessor); erased = UnsetInCache(handle, context); } return erased; } void ClockCacheShard::EraseUnRefEntries() { CleanupContext context; { MutexLock l(&mutex_); table_.clear(); for (auto& handle : list_) { UnsetInCache(&handle, &context); } } Cleanup(context); } class ClockCache final : public ShardedCache { public: ClockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy) : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { int num_shards = 1 << num_shard_bits; shards_ = new ClockCacheShard[num_shards]; for (int i = 0; i < num_shards; i++) { shards_[i].set_metadata_charge_policy(metadata_charge_policy); } SetCapacity(capacity); SetStrictCapacityLimit(strict_capacity_limit); } ~ClockCache() override { delete[] shards_; } const char* Name() const override { return "ClockCache"; } CacheShard* GetShard(int shard) override { return reinterpret_cast(&shards_[shard]); } const CacheShard* GetShard(int shard) const override { return reinterpret_cast(&shards_[shard]); } void* Value(Handle* handle) override { return reinterpret_cast(handle)->value; } size_t GetCharge(Handle* handle) const override { return reinterpret_cast(handle)->charge; } uint32_t GetHash(Handle* handle) const override { return reinterpret_cast(handle)->hash; } void DisownData() override { shards_ = nullptr; } private: ClockCacheShard* shards_; }; } // end anonymous namespace std::shared_ptr NewClockCache( size_t capacity, int num_shard_bits, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy) { if (num_shard_bits < 0) { num_shard_bits = GetDefaultCacheShardBits(capacity); } return std::make_shared( capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy); } } // namespace ROCKSDB_NAMESPACE #endif // SUPPORT_CLOCK_CACHE rocksdb-6.11.4/cache/clock_cache.h000066400000000000000000000011311370372246700166730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/cache.h" #if defined(TBB) && !defined(ROCKSDB_LITE) #define SUPPORT_CLOCK_CACHE #endif rocksdb-6.11.4/cache/lru_cache.cc000066400000000000000000000416631370372246700165560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "cache/lru_cache.h" #include #include #include #include #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) { Resize(); } LRUHandleTable::~LRUHandleTable() { ApplyToAllCacheEntries([](LRUHandle* h) { if (!h->HasRefs()) { h->Free(); } }); delete[] list_; } LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) { return *FindPointer(key, hash); } LRUHandle* LRUHandleTable::Insert(LRUHandle* h) { LRUHandle** ptr = FindPointer(h->key(), h->hash); LRUHandle* old = *ptr; h->next_hash = (old == nullptr ? nullptr : old->next_hash); *ptr = h; if (old == nullptr) { ++elems_; if (elems_ > length_) { // Since each cache entry is fairly large, we aim for a small // average linked list length (<= 1). Resize(); } } return old; } LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) { LRUHandle** ptr = FindPointer(key, hash); LRUHandle* result = *ptr; if (result != nullptr) { *ptr = result->next_hash; --elems_; } return result; } LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) { LRUHandle** ptr = &list_[hash & (length_ - 1)]; while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) { ptr = &(*ptr)->next_hash; } return ptr; } void LRUHandleTable::Resize() { uint32_t new_length = 16; while (new_length < elems_ * 1.5) { new_length *= 2; } LRUHandle** new_list = new LRUHandle*[new_length]; memset(new_list, 0, sizeof(new_list[0]) * new_length); uint32_t count = 0; for (uint32_t i = 0; i < length_; i++) { LRUHandle* h = list_[i]; while (h != nullptr) { LRUHandle* next = h->next_hash; uint32_t hash = h->hash; LRUHandle** ptr = &new_list[hash & (new_length - 1)]; h->next_hash = *ptr; *ptr = h; h = next; count++; } } assert(elems_ == count); delete[] list_; list_ = new_list; length_ = new_length; } LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit, double high_pri_pool_ratio, bool use_adaptive_mutex, CacheMetadataChargePolicy metadata_charge_policy) : capacity_(0), high_pri_pool_usage_(0), strict_capacity_limit_(strict_capacity_limit), high_pri_pool_ratio_(high_pri_pool_ratio), high_pri_pool_capacity_(0), usage_(0), lru_usage_(0), mutex_(use_adaptive_mutex) { set_metadata_charge_policy(metadata_charge_policy); // Make empty circular linked list lru_.next = &lru_; lru_.prev = &lru_; lru_low_pri_ = &lru_; SetCapacity(capacity); } void LRUCacheShard::EraseUnRefEntries() { autovector last_reference_list; { MutexLock l(&mutex_); while (lru_.next != &lru_) { LRUHandle* old = lru_.next; // LRU list contains only elements which can be evicted assert(old->InCache() && !old->HasRefs()); LRU_Remove(old); table_.Remove(old->key(), old->hash); old->SetInCache(false); size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_); assert(usage_ >= total_charge); usage_ -= total_charge; last_reference_list.push_back(old); } } for (auto entry : last_reference_list) { entry->Free(); } } void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) { const auto applyCallback = [&]() { table_.ApplyToAllCacheEntries( [callback](LRUHandle* h) { callback(h->value, h->charge); }); }; if (thread_safe) { MutexLock l(&mutex_); applyCallback(); } else { applyCallback(); } } void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) { MutexLock l(&mutex_); *lru = &lru_; *lru_low_pri = lru_low_pri_; } size_t LRUCacheShard::TEST_GetLRUSize() { MutexLock l(&mutex_); LRUHandle* lru_handle = lru_.next; size_t lru_size = 0; while (lru_handle != &lru_) { lru_size++; lru_handle = lru_handle->next; } return lru_size; } double LRUCacheShard::GetHighPriPoolRatio() { MutexLock l(&mutex_); return high_pri_pool_ratio_; } void LRUCacheShard::LRU_Remove(LRUHandle* e) { assert(e->next != nullptr); assert(e->prev != nullptr); if (lru_low_pri_ == e) { lru_low_pri_ = e->prev; } e->next->prev = e->prev; e->prev->next = e->next; e->prev = e->next = nullptr; size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); assert(lru_usage_ >= total_charge); lru_usage_ -= total_charge; if (e->InHighPriPool()) { assert(high_pri_pool_usage_ >= total_charge); high_pri_pool_usage_ -= total_charge; } } void LRUCacheShard::LRU_Insert(LRUHandle* e) { assert(e->next == nullptr); assert(e->prev == nullptr); size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) { // Inset "e" to head of LRU list. e->next = &lru_; e->prev = lru_.prev; e->prev->next = e; e->next->prev = e; e->SetInHighPriPool(true); high_pri_pool_usage_ += total_charge; MaintainPoolSize(); } else { // Insert "e" to the head of low-pri pool. Note that when // high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list. e->next = lru_low_pri_->next; e->prev = lru_low_pri_; e->prev->next = e; e->next->prev = e; e->SetInHighPriPool(false); lru_low_pri_ = e; } lru_usage_ += total_charge; } void LRUCacheShard::MaintainPoolSize() { while (high_pri_pool_usage_ > high_pri_pool_capacity_) { // Overflow last entry in high-pri pool to low-pri pool. lru_low_pri_ = lru_low_pri_->next; assert(lru_low_pri_ != &lru_); lru_low_pri_->SetInHighPriPool(false); size_t total_charge = lru_low_pri_->CalcTotalCharge(metadata_charge_policy_); assert(high_pri_pool_usage_ >= total_charge); high_pri_pool_usage_ -= total_charge; } } void LRUCacheShard::EvictFromLRU(size_t charge, autovector* deleted) { while ((usage_ + charge) > capacity_ && lru_.next != &lru_) { LRUHandle* old = lru_.next; // LRU list contains only elements which can be evicted assert(old->InCache() && !old->HasRefs()); LRU_Remove(old); table_.Remove(old->key(), old->hash); old->SetInCache(false); size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_); assert(usage_ >= old_total_charge); usage_ -= old_total_charge; deleted->push_back(old); } } void LRUCacheShard::SetCapacity(size_t capacity) { autovector last_reference_list; { MutexLock l(&mutex_); capacity_ = capacity; high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_; EvictFromLRU(0, &last_reference_list); } // Free the entries outside of mutex for performance reasons for (auto entry : last_reference_list) { entry->Free(); } } void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) { MutexLock l(&mutex_); strict_capacity_limit_ = strict_capacity_limit; } Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) { MutexLock l(&mutex_); LRUHandle* e = table_.Lookup(key, hash); if (e != nullptr) { assert(e->InCache()); if (!e->HasRefs()) { // The entry is in LRU since it's in hash and has no external references LRU_Remove(e); } e->Ref(); e->SetHit(); } return reinterpret_cast(e); } bool LRUCacheShard::Ref(Cache::Handle* h) { LRUHandle* e = reinterpret_cast(h); MutexLock l(&mutex_); // To create another reference - entry must be already externally referenced assert(e->HasRefs()); e->Ref(); return true; } void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) { MutexLock l(&mutex_); high_pri_pool_ratio_ = high_pri_pool_ratio; high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_; MaintainPoolSize(); } bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) { if (handle == nullptr) { return false; } LRUHandle* e = reinterpret_cast(handle); bool last_reference = false; { MutexLock l(&mutex_); last_reference = e->Unref(); if (last_reference && e->InCache()) { // The item is still in cache, and nobody else holds a reference to it if (usage_ > capacity_ || force_erase) { // The LRU list must be empty since the cache is full assert(lru_.next == &lru_ || force_erase); // Take this opportunity and remove the item table_.Remove(e->key(), e->hash); e->SetInCache(false); } else { // Put the item back on the LRU list, and don't free it LRU_Insert(e); last_reference = false; } } if (last_reference) { size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); assert(usage_ >= total_charge); usage_ -= total_charge; } } // Free the entry here outside of mutex for performance reasons if (last_reference) { e->Free(); } return last_reference; } Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Cache::Handle** handle, Cache::Priority priority) { // Allocate the memory here outside of the mutex // If the cache is full, we'll have to release it // It shouldn't happen very often though. LRUHandle* e = reinterpret_cast( new char[sizeof(LRUHandle) - 1 + key.size()]); Status s = Status::OK(); autovector last_reference_list; e->value = value; e->deleter = deleter; e->charge = charge; e->key_length = key.size(); e->flags = 0; e->hash = hash; e->refs = 0; e->next = e->prev = nullptr; e->SetInCache(true); e->SetPriority(priority); memcpy(e->key_data, key.data(), key.size()); size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); { MutexLock l(&mutex_); // Free the space following strict LRU policy until enough space // is freed or the lru list is empty EvictFromLRU(total_charge, &last_reference_list); if ((usage_ + total_charge) > capacity_ && (strict_capacity_limit_ || handle == nullptr)) { if (handle == nullptr) { // Don't insert the entry but still return ok, as if the entry inserted // into cache and get evicted immediately. e->SetInCache(false); last_reference_list.push_back(e); } else { delete[] reinterpret_cast(e); *handle = nullptr; s = Status::Incomplete("Insert failed due to LRU cache being full."); } } else { // Insert into the cache. Note that the cache might get larger than its // capacity if not enough space was freed up. LRUHandle* old = table_.Insert(e); usage_ += total_charge; if (old != nullptr) { s = Status::OkOverwritten(); assert(old->InCache()); old->SetInCache(false); if (!old->HasRefs()) { // old is on LRU because it's in cache and its reference count is 0 LRU_Remove(old); size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_); assert(usage_ >= old_total_charge); usage_ -= old_total_charge; last_reference_list.push_back(old); } } if (handle == nullptr) { LRU_Insert(e); } else { e->Ref(); *handle = reinterpret_cast(e); } } } // Free the entries here outside of mutex for performance reasons for (auto entry : last_reference_list) { entry->Free(); } return s; } void LRUCacheShard::Erase(const Slice& key, uint32_t hash) { LRUHandle* e; bool last_reference = false; { MutexLock l(&mutex_); e = table_.Remove(key, hash); if (e != nullptr) { assert(e->InCache()); e->SetInCache(false); if (!e->HasRefs()) { // The entry is in LRU since it's in hash and has no external references LRU_Remove(e); size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); assert(usage_ >= total_charge); usage_ -= total_charge; last_reference = true; } } } // Free the entry here outside of mutex for performance reasons // last_reference will only be true if e != nullptr if (last_reference) { e->Free(); } } size_t LRUCacheShard::GetUsage() const { MutexLock l(&mutex_); return usage_; } size_t LRUCacheShard::GetPinnedUsage() const { MutexLock l(&mutex_); assert(usage_ >= lru_usage_); return usage_ - lru_usage_; } std::string LRUCacheShard::GetPrintableOptions() const { const int kBufferSize = 200; char buffer[kBufferSize]; { MutexLock l(&mutex_); snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n", high_pri_pool_ratio_); } return std::string(buffer); } LRUCache::LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, double high_pri_pool_ratio, std::shared_ptr allocator, bool use_adaptive_mutex, CacheMetadataChargePolicy metadata_charge_policy) : ShardedCache(capacity, num_shard_bits, strict_capacity_limit, std::move(allocator)) { num_shards_ = 1 << num_shard_bits; shards_ = reinterpret_cast( port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_)); size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_; for (int i = 0; i < num_shards_; i++) { new (&shards_[i]) LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio, use_adaptive_mutex, metadata_charge_policy); } } LRUCache::~LRUCache() { if (shards_ != nullptr) { assert(num_shards_ > 0); for (int i = 0; i < num_shards_; i++) { shards_[i].~LRUCacheShard(); } port::cacheline_aligned_free(shards_); } } CacheShard* LRUCache::GetShard(int shard) { return reinterpret_cast(&shards_[shard]); } const CacheShard* LRUCache::GetShard(int shard) const { return reinterpret_cast(&shards_[shard]); } void* LRUCache::Value(Handle* handle) { return reinterpret_cast(handle)->value; } size_t LRUCache::GetCharge(Handle* handle) const { return reinterpret_cast(handle)->charge; } uint32_t LRUCache::GetHash(Handle* handle) const { return reinterpret_cast(handle)->hash; } void LRUCache::DisownData() { // Do not drop data if compile with ASAN to suppress leak warning. #if defined(__clang__) #if !defined(__has_feature) || !__has_feature(address_sanitizer) shards_ = nullptr; num_shards_ = 0; #endif #else // __clang__ #ifndef __SANITIZE_ADDRESS__ shards_ = nullptr; num_shards_ = 0; #endif // !__SANITIZE_ADDRESS__ #endif // __clang__ } size_t LRUCache::TEST_GetLRUSize() { size_t lru_size_of_all_shards = 0; for (int i = 0; i < num_shards_; i++) { lru_size_of_all_shards += shards_[i].TEST_GetLRUSize(); } return lru_size_of_all_shards; } double LRUCache::GetHighPriPoolRatio() { double result = 0.0; if (num_shards_ > 0) { result = shards_[0].GetHighPriPoolRatio(); } return result; } std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts) { return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits, cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio, cache_opts.memory_allocator, cache_opts.use_adaptive_mutex, cache_opts.metadata_charge_policy); } std::shared_ptr NewLRUCache( size_t capacity, int num_shard_bits, bool strict_capacity_limit, double high_pri_pool_ratio, std::shared_ptr memory_allocator, bool use_adaptive_mutex, CacheMetadataChargePolicy metadata_charge_policy) { if (num_shard_bits >= 20) { return nullptr; // the cache cannot be sharded into too many fine pieces } if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) { // invalid high_pri_pool_ratio return nullptr; } if (num_shard_bits < 0) { num_shard_bits = GetDefaultCacheShardBits(capacity); } return std::make_shared( capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio, std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/cache/lru_cache.h000066400000000000000000000264621370372246700164200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "cache/sharded_cache.h" #include "port/malloc.h" #include "port/port.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { // LRU cache implementation. This class is not thread-safe. // An entry is a variable length heap-allocated structure. // Entries are referenced by cache and/or by any external entity. // The cache keeps all its entries in a hash table. Some elements // are also stored on LRU list. // // LRUHandle can be in these states: // 1. Referenced externally AND in hash table. // In that case the entry is *not* in the LRU list // (refs >= 1 && in_cache == true) // 2. Not referenced externally AND in hash table. // In that case the entry is in the LRU list and can be freed. // (refs == 0 && in_cache == true) // 3. Referenced externally AND not in hash table. // In that case the entry is not in the LRU list and not in hash table. // The entry can be freed when refs becomes 0. // (refs >= 1 && in_cache == false) // // All newly created LRUHandles are in state 1. If you call // LRUCacheShard::Release on entry in state 1, it will go into state 2. // To move from state 1 to state 3, either call LRUCacheShard::Erase or // LRUCacheShard::Insert with the same key (but possibly different value). // To move from state 2 to state 1, use LRUCacheShard::Lookup. // Before destruction, make sure that no handles are in state 1. This means // that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a // matching LRUCache::Release (to move into state 2) or LRUCacheShard::Erase // (to move into state 3). struct LRUHandle { void* value; void (*deleter)(const Slice&, void* value); LRUHandle* next_hash; LRUHandle* next; LRUHandle* prev; size_t charge; // TODO(opt): Only allow uint32_t? size_t key_length; // The hash of key(). Used for fast sharding and comparisons. uint32_t hash; // The number of external refs to this entry. The cache itself is not counted. uint32_t refs; enum Flags : uint8_t { // Whether this entry is referenced by the hash table. IN_CACHE = (1 << 0), // Whether this entry is high priority entry. IS_HIGH_PRI = (1 << 1), // Whether this entry is in high-pri pool. IN_HIGH_PRI_POOL = (1 << 2), // Wwhether this entry has had any lookups (hits). HAS_HIT = (1 << 3), }; uint8_t flags; // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!) char key_data[1]; Slice key() const { return Slice(key_data, key_length); } // Increase the reference count by 1. void Ref() { refs++; } // Just reduce the reference count by 1. Return true if it was last reference. bool Unref() { assert(refs > 0); refs--; return refs == 0; } // Return true if there are external refs, false otherwise. bool HasRefs() const { return refs > 0; } bool InCache() const { return flags & IN_CACHE; } bool IsHighPri() const { return flags & IS_HIGH_PRI; } bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; } bool HasHit() const { return flags & HAS_HIT; } void SetInCache(bool in_cache) { if (in_cache) { flags |= IN_CACHE; } else { flags &= ~IN_CACHE; } } void SetPriority(Cache::Priority priority) { if (priority == Cache::Priority::HIGH) { flags |= IS_HIGH_PRI; } else { flags &= ~IS_HIGH_PRI; } } void SetInHighPriPool(bool in_high_pri_pool) { if (in_high_pri_pool) { flags |= IN_HIGH_PRI_POOL; } else { flags &= ~IN_HIGH_PRI_POOL; } } void SetHit() { flags |= HAS_HIT; } void Free() { assert(refs == 0); if (deleter) { (*deleter)(key(), value); } delete[] reinterpret_cast(this); } // Caclculate the memory usage by metadata inline size_t CalcTotalCharge( CacheMetadataChargePolicy metadata_charge_policy) { size_t meta_charge = 0; if (metadata_charge_policy == kFullChargeCacheMetadata) { #ifdef ROCKSDB_MALLOC_USABLE_SIZE meta_charge += malloc_usable_size(static_cast(this)); #else // This is the size that is used when a new handle is created meta_charge += sizeof(LRUHandle) - 1 + key_length; #endif } return charge + meta_charge; } }; // We provide our own simple hash table since it removes a whole bunch // of porting hacks and is also faster than some of the built-in hash // table implementations in some of the compiler/runtime combinations // we have tested. E.g., readrandom speeds up by ~5% over the g++ // 4.4.3's builtin hashtable. class LRUHandleTable { public: LRUHandleTable(); ~LRUHandleTable(); LRUHandle* Lookup(const Slice& key, uint32_t hash); LRUHandle* Insert(LRUHandle* h); LRUHandle* Remove(const Slice& key, uint32_t hash); template void ApplyToAllCacheEntries(T func) { for (uint32_t i = 0; i < length_; i++) { LRUHandle* h = list_[i]; while (h != nullptr) { auto n = h->next_hash; assert(h->InCache()); func(h); h = n; } } } private: // Return a pointer to slot that points to a cache entry that // matches key/hash. If there is no such cache entry, return a // pointer to the trailing slot in the corresponding linked list. LRUHandle** FindPointer(const Slice& key, uint32_t hash); void Resize(); // The table consists of an array of buckets where each bucket is // a linked list of cache entries that hash into the bucket. LRUHandle** list_; uint32_t length_; uint32_t elems_; }; // A single shard of sharded cache. class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { public: LRUCacheShard(size_t capacity, bool strict_capacity_limit, double high_pri_pool_ratio, bool use_adaptive_mutex, CacheMetadataChargePolicy metadata_charge_policy); virtual ~LRUCacheShard() override = default; // Separate from constructor so caller can easily make an array of LRUCache // if current usage is more than new capacity, the function will attempt to // free the needed space virtual void SetCapacity(size_t capacity) override; // Set the flag to reject insertion if cache if full. virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override; // Set percentage of capacity reserved for high-pri cache entries. void SetHighPriorityPoolRatio(double high_pri_pool_ratio); // Like Cache methods, but with an extra "hash" parameter. virtual Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Cache::Handle** handle, Cache::Priority priority) override; virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; virtual bool Ref(Cache::Handle* handle) override; virtual bool Release(Cache::Handle* handle, bool force_erase = false) override; virtual void Erase(const Slice& key, uint32_t hash) override; // Although in some platforms the update of size_t is atomic, to make sure // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll // protect them with mutex_. virtual size_t GetUsage() const override; virtual size_t GetPinnedUsage() const override; virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) override; virtual void EraseUnRefEntries() override; virtual std::string GetPrintableOptions() const override; void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri); // Retrieves number of elements in LRU, for unit test purpose only // not threadsafe size_t TEST_GetLRUSize(); // Retrives high pri pool ratio double GetHighPriPoolRatio(); private: void LRU_Remove(LRUHandle* e); void LRU_Insert(LRUHandle* e); // Overflow the last entry in high-pri pool to low-pri pool until size of // high-pri pool is no larger than the size specify by high_pri_pool_pct. void MaintainPoolSize(); // Free some space following strict LRU policy until enough space // to hold (usage_ + charge) is freed or the lru list is empty // This function is not thread safe - it needs to be executed while // holding the mutex_ void EvictFromLRU(size_t charge, autovector* deleted); // Initialized before use. size_t capacity_; // Memory size for entries in high-pri pool. size_t high_pri_pool_usage_; // Whether to reject insertion if cache reaches its full capacity. bool strict_capacity_limit_; // Ratio of capacity reserved for high priority cache entries. double high_pri_pool_ratio_; // High-pri pool size, equals to capacity * high_pri_pool_ratio. // Remember the value to avoid recomputing each time. double high_pri_pool_capacity_; // Dummy head of LRU list. // lru.prev is newest entry, lru.next is oldest entry. // LRU contains items which can be evicted, ie reference only by cache LRUHandle lru_; // Pointer to head of low-pri pool in LRU list. LRUHandle* lru_low_pri_; // ------------^^^^^^^^^^^^^----------- // Not frequently modified data members // ------------------------------------ // // We separate data members that are updated frequently from the ones that // are not frequently updated so that they don't share the same cache line // which will lead into false cache sharing // // ------------------------------------ // Frequently modified data members // ------------vvvvvvvvvvvvv----------- LRUHandleTable table_; // Memory size for entries residing in the cache size_t usage_; // Memory size for entries residing only in the LRU list size_t lru_usage_; // mutex_ protects the following state. // We don't count mutex_ as the cache's internal state so semantically we // don't mind mutex_ invoking the non-const actions. mutable port::Mutex mutex_; }; class LRUCache #ifdef NDEBUG final #endif : public ShardedCache { public: LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, double high_pri_pool_ratio, std::shared_ptr memory_allocator = nullptr, bool use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy metadata_charge_policy = kDontChargeCacheMetadata); virtual ~LRUCache(); virtual const char* Name() const override { return "LRUCache"; } virtual CacheShard* GetShard(int shard) override; virtual const CacheShard* GetShard(int shard) const override; virtual void* Value(Handle* handle) override; virtual size_t GetCharge(Handle* handle) const override; virtual uint32_t GetHash(Handle* handle) const override; virtual void DisownData() override; // Retrieves number of elements in LRU, for unit test purpose only size_t TEST_GetLRUSize(); // Retrives high pri pool ratio double GetHighPriPoolRatio(); private: LRUCacheShard* shards_ = nullptr; int num_shards_ = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/cache/lru_cache_test.cc000066400000000000000000000140451370372246700176070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "cache/lru_cache.h" #include #include #include "port/port.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class LRUCacheTest : public testing::Test { public: LRUCacheTest() {} ~LRUCacheTest() override { DeleteCache(); } void DeleteCache() { if (cache_ != nullptr) { cache_->~LRUCacheShard(); port::cacheline_aligned_free(cache_); cache_ = nullptr; } } void NewCache(size_t capacity, double high_pri_pool_ratio = 0.0, bool use_adaptive_mutex = kDefaultToAdaptiveMutex) { DeleteCache(); cache_ = reinterpret_cast( port::cacheline_aligned_alloc(sizeof(LRUCacheShard))); new (cache_) LRUCacheShard(capacity, false /*strict_capcity_limit*/, high_pri_pool_ratio, use_adaptive_mutex, kDontChargeCacheMetadata); } void Insert(const std::string& key, Cache::Priority priority = Cache::Priority::LOW) { cache_->Insert(key, 0 /*hash*/, nullptr /*value*/, 1 /*charge*/, nullptr /*deleter*/, nullptr /*handle*/, priority); } void Insert(char key, Cache::Priority priority = Cache::Priority::LOW) { Insert(std::string(1, key), priority); } bool Lookup(const std::string& key) { auto handle = cache_->Lookup(key, 0 /*hash*/); if (handle) { cache_->Release(handle); return true; } return false; } bool Lookup(char key) { return Lookup(std::string(1, key)); } void Erase(const std::string& key) { cache_->Erase(key, 0 /*hash*/); } void ValidateLRUList(std::vector keys, size_t num_high_pri_pool_keys = 0) { LRUHandle* lru; LRUHandle* lru_low_pri; cache_->TEST_GetLRUList(&lru, &lru_low_pri); LRUHandle* iter = lru; bool in_high_pri_pool = false; size_t high_pri_pool_keys = 0; if (iter == lru_low_pri) { in_high_pri_pool = true; } for (const auto& key : keys) { iter = iter->next; ASSERT_NE(lru, iter); ASSERT_EQ(key, iter->key().ToString()); ASSERT_EQ(in_high_pri_pool, iter->InHighPriPool()); if (in_high_pri_pool) { high_pri_pool_keys++; } if (iter == lru_low_pri) { ASSERT_FALSE(in_high_pri_pool); in_high_pri_pool = true; } } ASSERT_EQ(lru, iter->next); ASSERT_TRUE(in_high_pri_pool); ASSERT_EQ(num_high_pri_pool_keys, high_pri_pool_keys); } private: LRUCacheShard* cache_ = nullptr; }; TEST_F(LRUCacheTest, BasicLRU) { NewCache(5); for (char ch = 'a'; ch <= 'e'; ch++) { Insert(ch); } ValidateLRUList({"a", "b", "c", "d", "e"}); for (char ch = 'x'; ch <= 'z'; ch++) { Insert(ch); } ValidateLRUList({"d", "e", "x", "y", "z"}); ASSERT_FALSE(Lookup("b")); ValidateLRUList({"d", "e", "x", "y", "z"}); ASSERT_TRUE(Lookup("e")); ValidateLRUList({"d", "x", "y", "z", "e"}); ASSERT_TRUE(Lookup("z")); ValidateLRUList({"d", "x", "y", "e", "z"}); Erase("x"); ValidateLRUList({"d", "y", "e", "z"}); ASSERT_TRUE(Lookup("d")); ValidateLRUList({"y", "e", "z", "d"}); Insert("u"); ValidateLRUList({"y", "e", "z", "d", "u"}); Insert("v"); ValidateLRUList({"e", "z", "d", "u", "v"}); } TEST_F(LRUCacheTest, MidpointInsertion) { // Allocate 2 cache entries to high-pri pool. NewCache(5, 0.45); Insert("a", Cache::Priority::LOW); Insert("b", Cache::Priority::LOW); Insert("c", Cache::Priority::LOW); Insert("x", Cache::Priority::HIGH); Insert("y", Cache::Priority::HIGH); ValidateLRUList({"a", "b", "c", "x", "y"}, 2); // Low-pri entries inserted to the tail of low-pri list (the midpoint). // After lookup, it will move to the tail of the full list. Insert("d", Cache::Priority::LOW); ValidateLRUList({"b", "c", "d", "x", "y"}, 2); ASSERT_TRUE(Lookup("d")); ValidateLRUList({"b", "c", "x", "y", "d"}, 2); // High-pri entries will be inserted to the tail of full list. Insert("z", Cache::Priority::HIGH); ValidateLRUList({"c", "x", "y", "d", "z"}, 2); } TEST_F(LRUCacheTest, EntriesWithPriority) { // Allocate 2 cache entries to high-pri pool. NewCache(5, 0.45); Insert("a", Cache::Priority::LOW); Insert("b", Cache::Priority::LOW); Insert("c", Cache::Priority::LOW); ValidateLRUList({"a", "b", "c"}, 0); // Low-pri entries can take high-pri pool capacity if available Insert("u", Cache::Priority::LOW); Insert("v", Cache::Priority::LOW); ValidateLRUList({"a", "b", "c", "u", "v"}, 0); Insert("X", Cache::Priority::HIGH); Insert("Y", Cache::Priority::HIGH); ValidateLRUList({"c", "u", "v", "X", "Y"}, 2); // High-pri entries can overflow to low-pri pool. Insert("Z", Cache::Priority::HIGH); ValidateLRUList({"u", "v", "X", "Y", "Z"}, 2); // Low-pri entries will be inserted to head of low-pri pool. Insert("a", Cache::Priority::LOW); ValidateLRUList({"v", "X", "a", "Y", "Z"}, 2); // Low-pri entries will be inserted to head of high-pri pool after lookup. ASSERT_TRUE(Lookup("v")); ValidateLRUList({"X", "a", "Y", "Z", "v"}, 2); // High-pri entries will be inserted to the head of the list after lookup. ASSERT_TRUE(Lookup("X")); ValidateLRUList({"a", "Y", "Z", "v", "X"}, 2); ASSERT_TRUE(Lookup("Z")); ValidateLRUList({"a", "Y", "v", "X", "Z"}, 2); Erase("Y"); ValidateLRUList({"a", "v", "X", "Z"}, 2); Erase("X"); ValidateLRUList({"a", "v", "Z"}, 1); Insert("d", Cache::Priority::LOW); Insert("e", Cache::Priority::LOW); ValidateLRUList({"a", "v", "d", "e", "Z"}, 1); Insert("f", Cache::Priority::LOW); Insert("g", Cache::Priority::LOW); ValidateLRUList({"d", "e", "f", "g", "Z"}, 1); ASSERT_TRUE(Lookup("d")); ValidateLRUList({"e", "f", "g", "Z", "d"}, 2); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/cache/sharded_cache.cc000066400000000000000000000117371370372246700173650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "cache/sharded_cache.h" #include #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { ShardedCache::ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, std::shared_ptr allocator) : Cache(std::move(allocator)), num_shard_bits_(num_shard_bits), capacity_(capacity), strict_capacity_limit_(strict_capacity_limit), last_id_(1) {} void ShardedCache::SetCapacity(size_t capacity) { int num_shards = 1 << num_shard_bits_; const size_t per_shard = (capacity + (num_shards - 1)) / num_shards; MutexLock l(&capacity_mutex_); for (int s = 0; s < num_shards; s++) { GetShard(s)->SetCapacity(per_shard); } capacity_ = capacity; } void ShardedCache::SetStrictCapacityLimit(bool strict_capacity_limit) { int num_shards = 1 << num_shard_bits_; MutexLock l(&capacity_mutex_); for (int s = 0; s < num_shards; s++) { GetShard(s)->SetStrictCapacityLimit(strict_capacity_limit); } strict_capacity_limit_ = strict_capacity_limit; } Status ShardedCache::Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Handle** handle, Priority priority) { uint32_t hash = HashSlice(key); return GetShard(Shard(hash)) ->Insert(key, hash, value, charge, deleter, handle, priority); } Cache::Handle* ShardedCache::Lookup(const Slice& key, Statistics* /*stats*/) { uint32_t hash = HashSlice(key); return GetShard(Shard(hash))->Lookup(key, hash); } bool ShardedCache::Ref(Handle* handle) { uint32_t hash = GetHash(handle); return GetShard(Shard(hash))->Ref(handle); } bool ShardedCache::Release(Handle* handle, bool force_erase) { uint32_t hash = GetHash(handle); return GetShard(Shard(hash))->Release(handle, force_erase); } void ShardedCache::Erase(const Slice& key) { uint32_t hash = HashSlice(key); GetShard(Shard(hash))->Erase(key, hash); } uint64_t ShardedCache::NewId() { return last_id_.fetch_add(1, std::memory_order_relaxed); } size_t ShardedCache::GetCapacity() const { MutexLock l(&capacity_mutex_); return capacity_; } bool ShardedCache::HasStrictCapacityLimit() const { MutexLock l(&capacity_mutex_); return strict_capacity_limit_; } size_t ShardedCache::GetUsage() const { // We will not lock the cache when getting the usage from shards. int num_shards = 1 << num_shard_bits_; size_t usage = 0; for (int s = 0; s < num_shards; s++) { usage += GetShard(s)->GetUsage(); } return usage; } size_t ShardedCache::GetUsage(Handle* handle) const { return GetCharge(handle); } size_t ShardedCache::GetPinnedUsage() const { // We will not lock the cache when getting the usage from shards. int num_shards = 1 << num_shard_bits_; size_t usage = 0; for (int s = 0; s < num_shards; s++) { usage += GetShard(s)->GetPinnedUsage(); } return usage; } void ShardedCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) { int num_shards = 1 << num_shard_bits_; for (int s = 0; s < num_shards; s++) { GetShard(s)->ApplyToAllCacheEntries(callback, thread_safe); } } void ShardedCache::EraseUnRefEntries() { int num_shards = 1 << num_shard_bits_; for (int s = 0; s < num_shards; s++) { GetShard(s)->EraseUnRefEntries(); } } std::string ShardedCache::GetPrintableOptions() const { std::string ret; ret.reserve(20000); const int kBufferSize = 200; char buffer[kBufferSize]; { MutexLock l(&capacity_mutex_); snprintf(buffer, kBufferSize, " capacity : %" ROCKSDB_PRIszt "\n", capacity_); ret.append(buffer); snprintf(buffer, kBufferSize, " num_shard_bits : %d\n", num_shard_bits_); ret.append(buffer); snprintf(buffer, kBufferSize, " strict_capacity_limit : %d\n", strict_capacity_limit_); ret.append(buffer); } snprintf(buffer, kBufferSize, " memory_allocator : %s\n", memory_allocator() ? memory_allocator()->Name() : "None"); ret.append(buffer); ret.append(GetShard(0)->GetPrintableOptions()); return ret; } int GetDefaultCacheShardBits(size_t capacity) { int num_shard_bits = 0; size_t min_shard_size = 512L * 1024L; // Every shard is at least 512KB. size_t num_shards = capacity / min_shard_size; while (num_shards >>= 1) { if (++num_shard_bits >= 6) { // No more than 6. return num_shard_bits; } } return num_shard_bits; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/cache/sharded_cache.h000066400000000000000000000106461370372246700172250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "port/port.h" #include "rocksdb/cache.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { // Single cache shard interface. class CacheShard { public: CacheShard() = default; virtual ~CacheShard() = default; virtual Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Cache::Handle** handle, Cache::Priority priority) = 0; virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0; virtual bool Ref(Cache::Handle* handle) = 0; virtual bool Release(Cache::Handle* handle, bool force_erase = false) = 0; virtual void Erase(const Slice& key, uint32_t hash) = 0; virtual void SetCapacity(size_t capacity) = 0; virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0; virtual size_t GetUsage() const = 0; virtual size_t GetPinnedUsage() const = 0; virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) = 0; virtual void EraseUnRefEntries() = 0; virtual std::string GetPrintableOptions() const { return ""; } void set_metadata_charge_policy( CacheMetadataChargePolicy metadata_charge_policy) { metadata_charge_policy_ = metadata_charge_policy; } protected: CacheMetadataChargePolicy metadata_charge_policy_ = kDontChargeCacheMetadata; }; // Generic cache interface which shards cache by hash of keys. 2^num_shard_bits // shards will be created, with capacity split evenly to each of the shards. // Keys are sharded by the highest num_shard_bits bits of hash value. class ShardedCache : public Cache { public: ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, std::shared_ptr memory_allocator = nullptr); virtual ~ShardedCache() = default; virtual const char* Name() const override = 0; virtual CacheShard* GetShard(int shard) = 0; virtual const CacheShard* GetShard(int shard) const = 0; virtual void* Value(Handle* handle) override = 0; virtual size_t GetCharge(Handle* handle) const override = 0; virtual uint32_t GetHash(Handle* handle) const = 0; virtual void DisownData() override = 0; virtual void SetCapacity(size_t capacity) override; virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override; virtual Status Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Handle** handle, Priority priority) override; virtual Handle* Lookup(const Slice& key, Statistics* stats) override; virtual bool Ref(Handle* handle) override; virtual bool Release(Handle* handle, bool force_erase = false) override; virtual void Erase(const Slice& key) override; virtual uint64_t NewId() override; virtual size_t GetCapacity() const override; virtual bool HasStrictCapacityLimit() const override; virtual size_t GetUsage() const override; virtual size_t GetUsage(Handle* handle) const override; virtual size_t GetPinnedUsage() const override; virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) override; virtual void EraseUnRefEntries() override; virtual std::string GetPrintableOptions() const override; int GetNumShardBits() const { return num_shard_bits_; } private: static inline uint32_t HashSlice(const Slice& s) { return static_cast(GetSliceNPHash64(s)); } uint32_t Shard(uint32_t hash) { // Note, hash >> 32 yields hash in gcc, not the zero we expect! return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0; } int num_shard_bits_; mutable port::Mutex capacity_mutex_; size_t capacity_; bool strict_capacity_limit_; std::atomic last_id_; }; extern int GetDefaultCacheShardBits(size_t capacity); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/cmake/000077500000000000000000000000001370372246700143255ustar00rootroot00000000000000rocksdb-6.11.4/cmake/RocksDBConfig.cmake.in000066400000000000000000000015731370372246700203570ustar00rootroot00000000000000@PACKAGE_INIT@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/modules") include(CMakeFindDependencyMacro) set(GFLAGS_USE_TARGET_NAMESPACE @GFLAGS_USE_TARGET_NAMESPACE@) if(@WITH_JEMALLOC@) find_dependency(JeMalloc) endif() if(@WITH_GFLAGS@) find_dependency(gflags CONFIG) if(NOT gflags_FOUND) find_dependency(gflags) endif() endif() if(@WITH_SNAPPY@) find_dependency(Snappy CONFIG) if(NOT Snappy_FOUND) find_dependency(Snappy) endif() endif() if(@WITH_ZLIB@) find_dependency(ZLIB) endif() if(@WITH_BZ2@) find_dependency(BZip2) endif() if(@WITH_LZ4@) find_dependency(lz4) endif() if(@WITH_ZSTD@) find_dependency(zstd) endif() if(@WITH_NUMA@) find_dependency(NUMA) endif() if(@WITH_TBB@) find_dependency(TBB) endif() find_dependency(Threads) include("${CMAKE_CURRENT_LIST_DIR}/RocksDBTargets.cmake") check_required_components(RocksDB) rocksdb-6.11.4/cmake/modules/000077500000000000000000000000001370372246700157755ustar00rootroot00000000000000rocksdb-6.11.4/cmake/modules/CxxFlags.cmake000066400000000000000000000004251370372246700205170ustar00rootroot00000000000000macro(get_cxx_std_flags FLAGS_VARIABLE) if( CMAKE_CXX_STANDARD_REQUIRED ) set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_STANDARD_COMPILE_OPTION}) else() set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_EXTENSION_COMPILE_OPTION}) endif() endmacro() rocksdb-6.11.4/cmake/modules/FindJeMalloc.cmake000066400000000000000000000016111370372246700212650ustar00rootroot00000000000000# - Find JeMalloc library # Find the native JeMalloc includes and library # # JeMalloc_INCLUDE_DIRS - where to find jemalloc.h, etc. # JeMalloc_LIBRARIES - List of libraries when using jemalloc. # JeMalloc_FOUND - True if jemalloc found. find_path(JeMalloc_INCLUDE_DIRS NAMES jemalloc/jemalloc.h HINTS ${JEMALLOC_ROOT_DIR}/include) find_library(JeMalloc_LIBRARIES NAMES jemalloc HINTS ${JEMALLOC_ROOT_DIR}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(JeMalloc DEFAULT_MSG JeMalloc_LIBRARIES JeMalloc_INCLUDE_DIRS) mark_as_advanced( JeMalloc_LIBRARIES JeMalloc_INCLUDE_DIRS) if(JeMalloc_FOUND AND NOT (TARGET JeMalloc::JeMalloc)) add_library (JeMalloc::JeMalloc UNKNOWN IMPORTED) set_target_properties(JeMalloc::JeMalloc PROPERTIES IMPORTED_LOCATION ${JeMalloc_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES ${JeMalloc_INCLUDE_DIRS}) endif() rocksdb-6.11.4/cmake/modules/FindNUMA.cmake000066400000000000000000000014121370372246700203360ustar00rootroot00000000000000# - Find NUMA # Find the NUMA library and includes # # NUMA_INCLUDE_DIRS - where to find numa.h, etc. # NUMA_LIBRARIES - List of libraries when using NUMA. # NUMA_FOUND - True if NUMA found. find_path(NUMA_INCLUDE_DIRS NAMES numa.h numaif.h HINTS ${NUMA_ROOT_DIR}/include) find_library(NUMA_LIBRARIES NAMES numa HINTS ${NUMA_ROOT_DIR}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(NUMA DEFAULT_MSG NUMA_LIBRARIES NUMA_INCLUDE_DIRS) mark_as_advanced( NUMA_LIBRARIES NUMA_INCLUDE_DIRS) if(NUMA_FOUND AND NOT (TARGET NUMA::NUMA)) add_library (NUMA::NUMA UNKNOWN IMPORTED) set_target_properties(NUMA::NUMA PROPERTIES IMPORTED_LOCATION ${NUMA_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES ${NUMA_INCLUDE_DIRS}) endif() rocksdb-6.11.4/cmake/modules/FindSnappy.cmake000066400000000000000000000015051370372246700210530ustar00rootroot00000000000000# - Find Snappy # Find the snappy compression library and includes # # Snappy_INCLUDE_DIRS - where to find snappy.h, etc. # Snappy_LIBRARIES - List of libraries when using snappy. # Snappy_FOUND - True if snappy found. find_path(Snappy_INCLUDE_DIRS NAMES snappy.h HINTS ${snappy_ROOT_DIR}/include) find_library(Snappy_LIBRARIES NAMES snappy HINTS ${snappy_ROOT_DIR}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Snappy DEFAULT_MSG Snappy_LIBRARIES Snappy_INCLUDE_DIRS) mark_as_advanced( Snappy_LIBRARIES Snappy_INCLUDE_DIRS) if(Snappy_FOUND AND NOT (TARGET Snappy::snappy)) add_library (Snappy::snappy UNKNOWN IMPORTED) set_target_properties(Snappy::snappy PROPERTIES IMPORTED_LOCATION ${Snappy_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES ${Snappy_INCLUDE_DIRS}) endif() rocksdb-6.11.4/cmake/modules/FindTBB.cmake000066400000000000000000000015271370372246700202140ustar00rootroot00000000000000# - Find TBB # Find the Thread Building Blocks library and includes # # TBB_INCLUDE_DIRS - where to find tbb.h, etc. # TBB_LIBRARIES - List of libraries when using TBB. # TBB_FOUND - True if TBB found. if(NOT DEFINED TBB_ROOT_DIR) set(TBB_ROOT_DIR "$ENV{TBBROOT}") endif() find_path(TBB_INCLUDE_DIRS NAMES tbb/tbb.h HINTS ${TBB_ROOT_DIR}/include) find_library(TBB_LIBRARIES NAMES tbb HINTS ${TBB_ROOT_DIR}/lib ENV LIBRARY_PATH) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(TBB DEFAULT_MSG TBB_LIBRARIES TBB_INCLUDE_DIRS) mark_as_advanced( TBB_LIBRARIES TBB_INCLUDE_DIRS) if(TBB_FOUND AND NOT (TARGET TBB::TBB)) add_library (TBB::TBB UNKNOWN IMPORTED) set_target_properties(TBB::TBB PROPERTIES IMPORTED_LOCATION ${TBB_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES ${TBB_INCLUDE_DIRS}) endif() rocksdb-6.11.4/cmake/modules/Findgflags.cmake000066400000000000000000000014531370372246700210460ustar00rootroot00000000000000# - Find gflags library # Find the gflags includes and library # # GFLAGS_INCLUDE_DIR - where to find gflags.h. # GFLAGS_LIBRARIES - List of libraries when using gflags. # gflags_FOUND - True if gflags found. find_path(GFLAGS_INCLUDE_DIR NAMES gflags/gflags.h) find_library(GFLAGS_LIBRARIES NAMES gflags) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(gflags DEFAULT_MSG GFLAGS_LIBRARIES GFLAGS_INCLUDE_DIR) mark_as_advanced( GFLAGS_LIBRARIES GFLAGS_INCLUDE_DIR) if(gflags_FOUND AND NOT (TARGET gflags::gflags)) add_library(gflags::gflags UNKNOWN IMPORTED) set_target_properties(gflags::gflags PROPERTIES IMPORTED_LOCATION ${GFLAGS_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES ${GFLAGS_INCLUDE_DIR} IMPORTED_LINK_INTERFACE_LANGUAGES "CXX") endif() rocksdb-6.11.4/cmake/modules/Findlz4.cmake000066400000000000000000000013601370372246700203110ustar00rootroot00000000000000# - Find Lz4 # Find the lz4 compression library and includes # # lz4_INCLUDE_DIRS - where to find lz4.h, etc. # lz4_LIBRARIES - List of libraries when using lz4. # lz4_FOUND - True if lz4 found. find_path(lz4_INCLUDE_DIRS NAMES lz4.h HINTS ${lz4_ROOT_DIR}/include) find_library(lz4_LIBRARIES NAMES lz4 HINTS ${lz4_ROOT_DIR}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(lz4 DEFAULT_MSG lz4_LIBRARIES lz4_INCLUDE_DIRS) mark_as_advanced( lz4_LIBRARIES lz4_INCLUDE_DIRS) if(lz4_FOUND AND NOT (TARGET lz4::lz4)) add_library(lz4::lz4 UNKNOWN IMPORTED) set_target_properties(lz4::lz4 PROPERTIES IMPORTED_LOCATION ${lz4_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES ${lz4_INCLUDE_DIRS}) endif() rocksdb-6.11.4/cmake/modules/Findzstd.cmake000066400000000000000000000014151370372246700205650ustar00rootroot00000000000000# - Find zstd # Find the zstd compression library and includes # # zstd_INCLUDE_DIRS - where to find zstd.h, etc. # zstd_LIBRARIES - List of libraries when using zstd. # zstd_FOUND - True if zstd found. find_path(zstd_INCLUDE_DIRS NAMES zstd.h HINTS ${zstd_ROOT_DIR}/include) find_library(zstd_LIBRARIES NAMES zstd HINTS ${zstd_ROOT_DIR}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(zstd DEFAULT_MSG zstd_LIBRARIES zstd_INCLUDE_DIRS) mark_as_advanced( zstd_LIBRARIES zstd_INCLUDE_DIRS) if(zstd_FOUND AND NOT (TARGET zstd::zstd)) add_library (zstd::zstd UNKNOWN IMPORTED) set_target_properties(zstd::zstd PROPERTIES IMPORTED_LOCATION ${zstd_LIBRARIES} INTERFACE_INCLUDE_DIRECTORIES ${zstd_INCLUDE_DIRS}) endif() rocksdb-6.11.4/cmake/modules/ReadVersion.cmake000066400000000000000000000007731370372246700212270ustar00rootroot00000000000000# Read rocksdb version from version.h header file. function(get_rocksdb_version version_var) file(READ "${CMAKE_CURRENT_SOURCE_DIR}/include/rocksdb/version.h" version_header_file) foreach(component MAJOR MINOR PATCH) string(REGEX MATCH "#define ROCKSDB_${component} ([0-9]+)" _ ${version_header_file}) set(ROCKSDB_VERSION_${component} ${CMAKE_MATCH_1}) endforeach() set(${version_var} "${ROCKSDB_VERSION_MAJOR}.${ROCKSDB_VERSION_MINOR}.${ROCKSDB_VERSION_PATCH}" PARENT_SCOPE) endfunction() rocksdb-6.11.4/coverage/000077500000000000000000000000001370372246700150405ustar00rootroot00000000000000rocksdb-6.11.4/coverage/coverage_test.sh000077500000000000000000000045351370372246700202400ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Exit on error. set -e if [ -n "$USE_CLANG" ]; then echo "Error: Coverage test is supported only for gcc." exit 1 fi ROOT=".." # Fetch right version of gcov if [ -d /mnt/gvfs/third-party -a -z "$CXX" ]; then source $ROOT/build_tools/fbcode_config_platform007.sh GCOV=$GCC_BASE/bin/gcov else GCOV=$(which gcov) fi echo -e "Using $GCOV" COVERAGE_DIR="$PWD/COVERAGE_REPORT" mkdir -p $COVERAGE_DIR # Find all gcno files to generate the coverage report PYTHON=${1:-`which python`} echo -e "Using $PYTHON" GCNO_FILES=`find $ROOT -name "*.gcno"` $GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null | # Parse the raw gcov report to more human readable form. $PYTHON $ROOT/coverage/parse_gcov_output.py | # Write the output to both stdout and report file. tee $COVERAGE_DIR/coverage_report_all.txt && echo -e "Generated coverage report for all files: $COVERAGE_DIR/coverage_report_all.txt\n" # TODO: we also need to get the files of the latest commits. # Get the most recently committed files. LATEST_FILES=` git show --pretty="format:" --name-only HEAD | grep -v "^$" | paste -s -d,` RECENT_REPORT=$COVERAGE_DIR/coverage_report_recent.txt echo -e "Recently updated files: $LATEST_FILES\n" > $RECENT_REPORT $GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null | $PYTHON $ROOT/coverage/parse_gcov_output.py -interested-files $LATEST_FILES | tee -a $RECENT_REPORT && echo -e "Generated coverage report for recently updated files: $RECENT_REPORT\n" # Unless otherwise specified, we'll not generate html report by default if [ -z "$HTML" ]; then exit 0 fi # Generate the html report. If we cannot find lcov in this machine, we'll simply # skip this step. echo "Generating the html coverage report..." LCOV=$(which lcov || true 2>/dev/null) if [ -z $LCOV ] then echo "Skip: Cannot find lcov to generate the html report." exit 0 fi LCOV_VERSION=$(lcov -v | grep 1.1 || true) if [ $LCOV_VERSION ] then echo "Not supported lcov version. Expect lcov 1.1." exit 0 fi (cd $ROOT; lcov --no-external \ --capture \ --directory $PWD \ --gcov-tool $GCOV \ --output-file $COVERAGE_DIR/coverage.info) genhtml $COVERAGE_DIR/coverage.info -o $COVERAGE_DIR echo "HTML Coverage report is generated in $COVERAGE_DIR" rocksdb-6.11.4/coverage/parse_gcov_output.py000066400000000000000000000104151370372246700211630ustar00rootroot00000000000000#!/usr/bin/env python # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. from __future__ import print_function import optparse import re import sys # the gcov report follows certain pattern. Each file will have two lines # of report, from which we can extract the file name, total lines and coverage # percentage. def parse_gcov_report(gcov_input): per_file_coverage = {} total_coverage = None for line in sys.stdin: line = line.strip() # --First line of the coverage report (with file name in it)? match_obj = re.match("^File '(.*)'$", line) if match_obj: # fetch the file name from the first line of the report. current_file = match_obj.group(1) continue # -- Second line of the file report (with coverage percentage) match_obj = re.match("^Lines executed:(.*)% of (.*)", line) if match_obj: coverage = float(match_obj.group(1)) lines = int(match_obj.group(2)) if current_file is not None: per_file_coverage[current_file] = (coverage, lines) current_file = None else: # If current_file is not set, we reach the last line of report, # which contains the summarized coverage percentage. total_coverage = (coverage, lines) continue # If the line's pattern doesn't fall into the above categories. We # can simply ignore them since they're either empty line or doesn't # find executable lines of the given file. current_file = None return per_file_coverage, total_coverage def get_option_parser(): usage = "Parse the gcov output and generate more human-readable code " +\ "coverage report." parser = optparse.OptionParser(usage) parser.add_option( "--interested-files", "-i", dest="filenames", help="Comma separated files names. if specified, we will display " + "the coverage report only for interested source files. " + "Otherwise we will display the coverage report for all " + "source files." ) return parser def display_file_coverage(per_file_coverage, total_coverage): # To print out auto-adjustable column, we need to know the longest # length of file names. max_file_name_length = max( len(fname) for fname in per_file_coverage.keys() ) # -- Print header # size of separator is determined by 3 column sizes: # file name, coverage percentage and lines. header_template = \ "%" + str(max_file_name_length) + "s\t%s\t%s" separator = "-" * (max_file_name_length + 10 + 20) print(header_template % ("Filename", "Coverage", "Lines")) # noqa: E999 T25377293 Grandfathered in print(separator) # -- Print body # template for printing coverage report for each file. record_template = "%" + str(max_file_name_length) + "s\t%5.2f%%\t%10d" for fname, coverage_info in per_file_coverage.items(): coverage, lines = coverage_info print(record_template % (fname, coverage, lines)) # -- Print footer if total_coverage: print(separator) print(record_template % ("Total", total_coverage[0], total_coverage[1])) def report_coverage(): parser = get_option_parser() (options, args) = parser.parse_args() interested_files = set() if options.filenames is not None: interested_files = set(f.strip() for f in options.filenames.split(',')) # To make things simple, right now we only read gcov report from the input per_file_coverage, total_coverage = parse_gcov_report(sys.stdin) # Check if we need to display coverage info for interested files. if len(interested_files): per_file_coverage = dict( (fname, per_file_coverage[fname]) for fname in interested_files if fname in per_file_coverage ) # If we only interested in several files, it makes no sense to report # the total_coverage total_coverage = None if not len(per_file_coverage): print("Cannot find coverage info for the given files.", file=sys.stderr) return display_file_coverage(per_file_coverage, total_coverage) if __name__ == "__main__": report_coverage() rocksdb-6.11.4/db/000077500000000000000000000000001370372246700136325ustar00rootroot00000000000000rocksdb-6.11.4/db/arena_wrapped_db_iter.cc000066400000000000000000000111171370372246700204420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/arena_wrapped_db_iter.h" #include "memory/arena.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "table/internal_iterator.h" #include "table/iterator_wrapper.h" #include "util/user_comparator_wrapper.h" namespace ROCKSDB_NAMESPACE { Status ArenaWrappedDBIter::GetProperty(std::string prop_name, std::string* prop) { if (prop_name == "rocksdb.iterator.super-version-number") { // First try to pass the value returned from inner iterator. if (!db_iter_->GetProperty(prop_name, prop).ok()) { *prop = ToString(sv_number_); } return Status::OK(); } return db_iter_->GetProperty(prop_name, prop); } void ArenaWrappedDBIter::Init(Env* env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iteration, uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, bool allow_blob, bool allow_refresh) { auto mem = arena_.AllocateAligned(sizeof(DBIter)); db_iter_ = new (mem) DBIter(env, read_options, cf_options, mutable_cf_options, cf_options.user_comparator, nullptr, sequence, true, max_sequential_skip_in_iteration, read_callback, db_impl, cfd, allow_blob); sv_number_ = version_number; allow_refresh_ = allow_refresh; } Status ArenaWrappedDBIter::Refresh() { if (cfd_ == nullptr || db_impl_ == nullptr || !allow_refresh_) { return Status::NotSupported("Creating renew iterator is not allowed."); } assert(db_iter_ != nullptr); // TODO(yiwu): For last_seq_same_as_publish_seq_==false, this is not the // correct behavior. Will be corrected automatically when we take a snapshot // here for the case of WritePreparedTxnDB. uint64_t cur_sv_number = cfd_->GetSuperVersionNumber(); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1"); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2"); if (sv_number_ != cur_sv_number) { Env* env = db_iter_->env(); db_iter_->~DBIter(); arena_.~Arena(); new (&arena_) Arena(); SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); if (read_callback_) { read_callback_->Refresh(latest_seq); } Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, latest_seq, sv->mutable_cf_options.max_sequential_skip_in_iterations, cur_sv_number, read_callback_, db_impl_, cfd_, allow_blob_, allow_refresh_); InternalIterator* internal_iter = db_impl_->NewInternalIterator( read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(), latest_seq, /* allow_unprepared_value */ true); SetIterUnderDBIter(internal_iter); } else { db_iter_->set_sequence(db_impl_->GetLatestSequenceNumber()); db_iter_->set_valid(false); } return Status::OK(); } ArenaWrappedDBIter* NewArenaWrappedDbIterator( Env* env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, bool allow_blob, bool allow_refresh) { ArenaWrappedDBIter* iter = new ArenaWrappedDBIter(); iter->Init(env, read_options, cf_options, mutable_cf_options, sequence, max_sequential_skip_in_iterations, version_number, read_callback, db_impl, cfd, allow_blob, allow_refresh); if (db_impl != nullptr && cfd != nullptr && allow_refresh) { iter->StoreRefreshInfo(read_options, db_impl, cfd, read_callback, allow_blob); } return iter; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/arena_wrapped_db_iter.h000066400000000000000000000106441370372246700203100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "db/db_impl/db_impl.h" #include "db/db_iter.h" #include "db/dbformat.h" #include "db/range_del_aggregator.h" #include "memory/arena.h" #include "options/cf_options.h" #include "rocksdb/db.h" #include "rocksdb/iterator.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class Arena; // A wrapper iterator which wraps DB Iterator and the arena, with which the DB // iterator is supposed to be allocated. This class is used as an entry point of // a iterator hierarchy whose memory can be allocated inline. In that way, // accessing the iterator tree can be more cache friendly. It is also faster // to allocate. // When using the class's Iterator interface, the behavior is exactly // the same as the inner DBIter. class ArenaWrappedDBIter : public Iterator { public: virtual ~ArenaWrappedDBIter() { db_iter_->~DBIter(); } // Get the arena to be used to allocate memory for DBIter to be wrapped, // as well as child iterators in it. virtual Arena* GetArena() { return &arena_; } virtual ReadRangeDelAggregator* GetRangeDelAggregator() { return db_iter_->GetRangeDelAggregator(); } // Set the internal iterator wrapped inside the DB Iterator. Usually it is // a merging iterator. virtual void SetIterUnderDBIter(InternalIterator* iter) { db_iter_->SetIter(iter); } bool Valid() const override { return db_iter_->Valid(); } void SeekToFirst() override { db_iter_->SeekToFirst(); } void SeekToLast() override { db_iter_->SeekToLast(); } // 'target' does not contain timestamp, even if user timestamp feature is // enabled. void Seek(const Slice& target) override { db_iter_->Seek(target); } void SeekForPrev(const Slice& target) override { db_iter_->SeekForPrev(target); } void Next() override { db_iter_->Next(); } void Prev() override { db_iter_->Prev(); } Slice key() const override { return db_iter_->key(); } Slice value() const override { return db_iter_->value(); } Status status() const override { return db_iter_->status(); } Slice timestamp() const override { return db_iter_->timestamp(); } bool IsBlob() const { return db_iter_->IsBlob(); } Status GetProperty(std::string prop_name, std::string* prop) override; Status Refresh() override; void Init(Env* env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, bool allow_blob, bool allow_refresh); // Store some parameters so we can refresh the iterator at a later point // with these same params void StoreRefreshInfo(const ReadOptions& read_options, DBImpl* db_impl, ColumnFamilyData* cfd, ReadCallback* read_callback, bool allow_blob) { read_options_ = read_options; db_impl_ = db_impl; cfd_ = cfd; read_callback_ = read_callback; allow_blob_ = allow_blob; } private: DBIter* db_iter_; Arena arena_; uint64_t sv_number_; ColumnFamilyData* cfd_ = nullptr; DBImpl* db_impl_ = nullptr; ReadOptions read_options_; ReadCallback* read_callback_; bool allow_blob_ = false; bool allow_refresh_ = true; }; // Generate the arena wrapped iterator class. // `db_impl` and `cfd` are used for reneweal. If left null, renewal will not // be supported. extern ArenaWrappedDBIter* NewArenaWrappedDbIterator( Env* env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl = nullptr, ColumnFamilyData* cfd = nullptr, bool allow_blob = false, bool allow_refresh = true); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/000077500000000000000000000000001370372246700145505ustar00rootroot00000000000000rocksdb-6.11.4/db/blob/blob_constants.h000066400000000000000000000007021370372246700177320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { constexpr uint64_t kInvalidBlobFileNumber = 0; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/blob_file_addition.cc000066400000000000000000000114141370372246700206500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/blob/blob_file_addition.h" #include #include #include "logging/event_logger.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "test_util/sync_point.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { // Tags for custom fields. Note that these get persisted in the manifest, // so existing tags should not be modified. enum BlobFileAddition::CustomFieldTags : uint32_t { kEndMarker, // Add forward compatible fields here ///////////////////////////////////////////////////////////////////// kForwardIncompatibleMask = 1 << 6, // Add forward incompatible fields here }; void BlobFileAddition::EncodeTo(std::string* output) const { PutVarint64(output, blob_file_number_); PutVarint64(output, total_blob_count_); PutVarint64(output, total_blob_bytes_); PutLengthPrefixedSlice(output, checksum_method_); PutLengthPrefixedSlice(output, checksum_value_); // Encode any custom fields here. The format to use is a Varint32 tag (see // CustomFieldTags above) followed by a length prefixed slice. Unknown custom // fields will be ignored during decoding unless they're in the forward // incompatible range. TEST_SYNC_POINT_CALLBACK("BlobFileAddition::EncodeTo::CustomFields", output); PutVarint32(output, kEndMarker); } Status BlobFileAddition::DecodeFrom(Slice* input) { constexpr char class_name[] = "BlobFileAddition"; if (!GetVarint64(input, &blob_file_number_)) { return Status::Corruption(class_name, "Error decoding blob file number"); } if (!GetVarint64(input, &total_blob_count_)) { return Status::Corruption(class_name, "Error decoding total blob count"); } if (!GetVarint64(input, &total_blob_bytes_)) { return Status::Corruption(class_name, "Error decoding total blob bytes"); } Slice checksum_method; if (!GetLengthPrefixedSlice(input, &checksum_method)) { return Status::Corruption(class_name, "Error decoding checksum method"); } checksum_method_ = checksum_method.ToString(); Slice checksum_value; if (!GetLengthPrefixedSlice(input, &checksum_value)) { return Status::Corruption(class_name, "Error decoding checksum value"); } checksum_value_ = checksum_value.ToString(); while (true) { uint32_t custom_field_tag = 0; if (!GetVarint32(input, &custom_field_tag)) { return Status::Corruption(class_name, "Error decoding custom field tag"); } if (custom_field_tag == kEndMarker) { break; } if (custom_field_tag & kForwardIncompatibleMask) { return Status::Corruption( class_name, "Forward incompatible custom field encountered"); } Slice custom_field_value; if (!GetLengthPrefixedSlice(input, &custom_field_value)) { return Status::Corruption(class_name, "Error decoding custom field value"); } } return Status::OK(); } std::string BlobFileAddition::DebugString() const { std::ostringstream oss; oss << *this; return oss.str(); } std::string BlobFileAddition::DebugJSON() const { JSONWriter jw; jw << *this; jw.EndObject(); return jw.Get(); } bool operator==(const BlobFileAddition& lhs, const BlobFileAddition& rhs) { return lhs.GetBlobFileNumber() == rhs.GetBlobFileNumber() && lhs.GetTotalBlobCount() == rhs.GetTotalBlobCount() && lhs.GetTotalBlobBytes() == rhs.GetTotalBlobBytes() && lhs.GetChecksumMethod() == rhs.GetChecksumMethod() && lhs.GetChecksumValue() == rhs.GetChecksumValue(); } bool operator!=(const BlobFileAddition& lhs, const BlobFileAddition& rhs) { return !(lhs == rhs); } std::ostream& operator<<(std::ostream& os, const BlobFileAddition& blob_file_addition) { os << "blob_file_number: " << blob_file_addition.GetBlobFileNumber() << " total_blob_count: " << blob_file_addition.GetTotalBlobCount() << " total_blob_bytes: " << blob_file_addition.GetTotalBlobBytes() << " checksum_method: " << blob_file_addition.GetChecksumMethod() << " checksum_value: " << blob_file_addition.GetChecksumValue(); return os; } JSONWriter& operator<<(JSONWriter& jw, const BlobFileAddition& blob_file_addition) { jw << "BlobFileNumber" << blob_file_addition.GetBlobFileNumber() << "TotalBlobCount" << blob_file_addition.GetTotalBlobCount() << "TotalBlobBytes" << blob_file_addition.GetTotalBlobBytes() << "ChecksumMethod" << blob_file_addition.GetChecksumMethod() << "ChecksumValue" << blob_file_addition.GetChecksumValue(); return jw; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/blob_file_addition.h000066400000000000000000000042561370372246700205200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "db/blob/blob_constants.h" #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class JSONWriter; class Slice; class Status; class BlobFileAddition { public: BlobFileAddition() = default; BlobFileAddition(uint64_t blob_file_number, uint64_t total_blob_count, uint64_t total_blob_bytes, std::string checksum_method, std::string checksum_value) : blob_file_number_(blob_file_number), total_blob_count_(total_blob_count), total_blob_bytes_(total_blob_bytes), checksum_method_(std::move(checksum_method)), checksum_value_(std::move(checksum_value)) { assert(checksum_method_.empty() == checksum_value_.empty()); } uint64_t GetBlobFileNumber() const { return blob_file_number_; } uint64_t GetTotalBlobCount() const { return total_blob_count_; } uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; } const std::string& GetChecksumMethod() const { return checksum_method_; } const std::string& GetChecksumValue() const { return checksum_value_; } void EncodeTo(std::string* output) const; Status DecodeFrom(Slice* input); std::string DebugString() const; std::string DebugJSON() const; private: enum CustomFieldTags : uint32_t; uint64_t blob_file_number_ = kInvalidBlobFileNumber; uint64_t total_blob_count_ = 0; uint64_t total_blob_bytes_ = 0; std::string checksum_method_; std::string checksum_value_; }; bool operator==(const BlobFileAddition& lhs, const BlobFileAddition& rhs); bool operator!=(const BlobFileAddition& lhs, const BlobFileAddition& rhs); std::ostream& operator<<(std::ostream& os, const BlobFileAddition& blob_file_addition); JSONWriter& operator<<(JSONWriter& jw, const BlobFileAddition& blob_file_addition); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/blob_file_addition_test.cc000066400000000000000000000147251370372246700217170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/blob/blob_file_addition.h" #include #include #include #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { class BlobFileAdditionTest : public testing::Test { public: static void TestEncodeDecode(const BlobFileAddition& blob_file_addition) { std::string encoded; blob_file_addition.EncodeTo(&encoded); BlobFileAddition decoded; Slice input(encoded); ASSERT_OK(decoded.DecodeFrom(&input)); ASSERT_EQ(blob_file_addition, decoded); } }; TEST_F(BlobFileAdditionTest, Empty) { BlobFileAddition blob_file_addition; ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), kInvalidBlobFileNumber); ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 0); ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(), 0); ASSERT_TRUE(blob_file_addition.GetChecksumMethod().empty()); ASSERT_TRUE(blob_file_addition.GetChecksumValue().empty()); TestEncodeDecode(blob_file_addition); } TEST_F(BlobFileAdditionTest, NonEmpty) { constexpr uint64_t blob_file_number = 123; constexpr uint64_t total_blob_count = 2; constexpr uint64_t total_blob_bytes = 123456; const std::string checksum_method("SHA1"); const std::string checksum_value("bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"); BlobFileAddition blob_file_addition(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number); ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), total_blob_count); ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(), total_blob_bytes); ASSERT_EQ(blob_file_addition.GetChecksumMethod(), checksum_method); ASSERT_EQ(blob_file_addition.GetChecksumValue(), checksum_value); TestEncodeDecode(blob_file_addition); } TEST_F(BlobFileAdditionTest, DecodeErrors) { std::string str; Slice slice(str); BlobFileAddition blob_file_addition; { const Status s = blob_file_addition.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "blob file number")); } constexpr uint64_t blob_file_number = 123; PutVarint64(&str, blob_file_number); slice = str; { const Status s = blob_file_addition.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "total blob count")); } constexpr uint64_t total_blob_count = 4567; PutVarint64(&str, total_blob_count); slice = str; { const Status s = blob_file_addition.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "total blob bytes")); } constexpr uint64_t total_blob_bytes = 12345678; PutVarint64(&str, total_blob_bytes); slice = str; { const Status s = blob_file_addition.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "checksum method")); } constexpr char checksum_method[] = "SHA1"; PutLengthPrefixedSlice(&str, checksum_method); slice = str; { const Status s = blob_file_addition.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "checksum value")); } constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"; PutLengthPrefixedSlice(&str, checksum_value); slice = str; { const Status s = blob_file_addition.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "custom field tag")); } constexpr uint32_t custom_tag = 2; PutVarint32(&str, custom_tag); slice = str; { const Status s = blob_file_addition.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "custom field value")); } } TEST_F(BlobFileAdditionTest, ForwardCompatibleCustomField) { SyncPoint::GetInstance()->SetCallBack( "BlobFileAddition::EncodeTo::CustomFields", [&](void* arg) { std::string* output = static_cast(arg); constexpr uint32_t forward_compatible_tag = 2; PutVarint32(output, forward_compatible_tag); PutLengthPrefixedSlice(output, "deadbeef"); }); SyncPoint::GetInstance()->EnableProcessing(); constexpr uint64_t blob_file_number = 678; constexpr uint64_t total_blob_count = 9999; constexpr uint64_t total_blob_bytes = 100000000; const std::string checksum_method("CRC32"); const std::string checksum_value("3d87ff57"); BlobFileAddition blob_file_addition(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); TestEncodeDecode(blob_file_addition); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_F(BlobFileAdditionTest, ForwardIncompatibleCustomField) { SyncPoint::GetInstance()->SetCallBack( "BlobFileAddition::EncodeTo::CustomFields", [&](void* arg) { std::string* output = static_cast(arg); constexpr uint32_t forward_incompatible_tag = (1 << 6) + 1; PutVarint32(output, forward_incompatible_tag); PutLengthPrefixedSlice(output, "foobar"); }); SyncPoint::GetInstance()->EnableProcessing(); constexpr uint64_t blob_file_number = 456; constexpr uint64_t total_blob_count = 100; constexpr uint64_t total_blob_bytes = 2000000; const std::string checksum_method("CRC32B"); const std::string checksum_value("6dbdf23a"); BlobFileAddition blob_file_addition(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); std::string encoded; blob_file_addition.EncodeTo(&encoded); BlobFileAddition decoded_blob_file_addition; Slice input(encoded); const Status s = decoded_blob_file_addition.DecodeFrom(&input); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Forward incompatible")); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/blob/blob_file_garbage.cc000066400000000000000000000076001370372246700204470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/blob/blob_file_garbage.h" #include #include #include "logging/event_logger.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "test_util/sync_point.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { // Tags for custom fields. Note that these get persisted in the manifest, // so existing tags should not be modified. enum BlobFileGarbage::CustomFieldTags : uint32_t { kEndMarker, // Add forward compatible fields here ///////////////////////////////////////////////////////////////////// kForwardIncompatibleMask = 1 << 6, // Add forward incompatible fields here }; void BlobFileGarbage::EncodeTo(std::string* output) const { PutVarint64(output, blob_file_number_); PutVarint64(output, garbage_blob_count_); PutVarint64(output, garbage_blob_bytes_); // Encode any custom fields here. The format to use is a Varint32 tag (see // CustomFieldTags above) followed by a length prefixed slice. Unknown custom // fields will be ignored during decoding unless they're in the forward // incompatible range. TEST_SYNC_POINT_CALLBACK("BlobFileGarbage::EncodeTo::CustomFields", output); PutVarint32(output, kEndMarker); } Status BlobFileGarbage::DecodeFrom(Slice* input) { constexpr char class_name[] = "BlobFileGarbage"; if (!GetVarint64(input, &blob_file_number_)) { return Status::Corruption(class_name, "Error decoding blob file number"); } if (!GetVarint64(input, &garbage_blob_count_)) { return Status::Corruption(class_name, "Error decoding garbage blob count"); } if (!GetVarint64(input, &garbage_blob_bytes_)) { return Status::Corruption(class_name, "Error decoding garbage blob bytes"); } while (true) { uint32_t custom_field_tag = 0; if (!GetVarint32(input, &custom_field_tag)) { return Status::Corruption(class_name, "Error decoding custom field tag"); } if (custom_field_tag == kEndMarker) { break; } if (custom_field_tag & kForwardIncompatibleMask) { return Status::Corruption( class_name, "Forward incompatible custom field encountered"); } Slice custom_field_value; if (!GetLengthPrefixedSlice(input, &custom_field_value)) { return Status::Corruption(class_name, "Error decoding custom field value"); } } return Status::OK(); } std::string BlobFileGarbage::DebugString() const { std::ostringstream oss; oss << *this; return oss.str(); } std::string BlobFileGarbage::DebugJSON() const { JSONWriter jw; jw << *this; jw.EndObject(); return jw.Get(); } bool operator==(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs) { return lhs.GetBlobFileNumber() == rhs.GetBlobFileNumber() && lhs.GetGarbageBlobCount() == rhs.GetGarbageBlobCount() && lhs.GetGarbageBlobBytes() == rhs.GetGarbageBlobBytes(); } bool operator!=(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs) { return !(lhs == rhs); } std::ostream& operator<<(std::ostream& os, const BlobFileGarbage& blob_file_garbage) { os << "blob_file_number: " << blob_file_garbage.GetBlobFileNumber() << " garbage_blob_count: " << blob_file_garbage.GetGarbageBlobCount() << " garbage_blob_bytes: " << blob_file_garbage.GetGarbageBlobBytes(); return os; } JSONWriter& operator<<(JSONWriter& jw, const BlobFileGarbage& blob_file_garbage) { jw << "BlobFileNumber" << blob_file_garbage.GetBlobFileNumber() << "GarbageBlobCount" << blob_file_garbage.GetGarbageBlobCount() << "GarbageBlobBytes" << blob_file_garbage.GetGarbageBlobBytes(); return jw; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/blob_file_garbage.h000066400000000000000000000033301370372246700203050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "db/blob/blob_constants.h" #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class JSONWriter; class Slice; class Status; class BlobFileGarbage { public: BlobFileGarbage() = default; BlobFileGarbage(uint64_t blob_file_number, uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) : blob_file_number_(blob_file_number), garbage_blob_count_(garbage_blob_count), garbage_blob_bytes_(garbage_blob_bytes) {} uint64_t GetBlobFileNumber() const { return blob_file_number_; } uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; } uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; } void EncodeTo(std::string* output) const; Status DecodeFrom(Slice* input); std::string DebugString() const; std::string DebugJSON() const; private: enum CustomFieldTags : uint32_t; uint64_t blob_file_number_ = kInvalidBlobFileNumber; uint64_t garbage_blob_count_ = 0; uint64_t garbage_blob_bytes_ = 0; }; bool operator==(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs); bool operator!=(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs); std::ostream& operator<<(std::ostream& os, const BlobFileGarbage& blob_file_garbage); JSONWriter& operator<<(JSONWriter& jw, const BlobFileGarbage& blob_file_garbage); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/blob_file_garbage_test.cc000066400000000000000000000121641370372246700215070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/blob/blob_file_garbage.h" #include #include #include #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { class BlobFileGarbageTest : public testing::Test { public: static void TestEncodeDecode(const BlobFileGarbage& blob_file_garbage) { std::string encoded; blob_file_garbage.EncodeTo(&encoded); BlobFileGarbage decoded; Slice input(encoded); ASSERT_OK(decoded.DecodeFrom(&input)); ASSERT_EQ(blob_file_garbage, decoded); } }; TEST_F(BlobFileGarbageTest, Empty) { BlobFileGarbage blob_file_garbage; ASSERT_EQ(blob_file_garbage.GetBlobFileNumber(), kInvalidBlobFileNumber); ASSERT_EQ(blob_file_garbage.GetGarbageBlobCount(), 0); ASSERT_EQ(blob_file_garbage.GetGarbageBlobBytes(), 0); TestEncodeDecode(blob_file_garbage); } TEST_F(BlobFileGarbageTest, NonEmpty) { constexpr uint64_t blob_file_number = 123; constexpr uint64_t garbage_blob_count = 1; constexpr uint64_t garbage_blob_bytes = 9876; BlobFileGarbage blob_file_garbage(blob_file_number, garbage_blob_count, garbage_blob_bytes); ASSERT_EQ(blob_file_garbage.GetBlobFileNumber(), blob_file_number); ASSERT_EQ(blob_file_garbage.GetGarbageBlobCount(), garbage_blob_count); ASSERT_EQ(blob_file_garbage.GetGarbageBlobBytes(), garbage_blob_bytes); TestEncodeDecode(blob_file_garbage); } TEST_F(BlobFileGarbageTest, DecodeErrors) { std::string str; Slice slice(str); BlobFileGarbage blob_file_garbage; { const Status s = blob_file_garbage.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "blob file number")); } constexpr uint64_t blob_file_number = 123; PutVarint64(&str, blob_file_number); slice = str; { const Status s = blob_file_garbage.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "garbage blob count")); } constexpr uint64_t garbage_blob_count = 4567; PutVarint64(&str, garbage_blob_count); slice = str; { const Status s = blob_file_garbage.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "garbage blob bytes")); } constexpr uint64_t garbage_blob_bytes = 12345678; PutVarint64(&str, garbage_blob_bytes); slice = str; { const Status s = blob_file_garbage.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "custom field tag")); } constexpr uint32_t custom_tag = 2; PutVarint32(&str, custom_tag); slice = str; { const Status s = blob_file_garbage.DecodeFrom(&slice); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "custom field value")); } } TEST_F(BlobFileGarbageTest, ForwardCompatibleCustomField) { SyncPoint::GetInstance()->SetCallBack( "BlobFileGarbage::EncodeTo::CustomFields", [&](void* arg) { std::string* output = static_cast(arg); constexpr uint32_t forward_compatible_tag = 2; PutVarint32(output, forward_compatible_tag); PutLengthPrefixedSlice(output, "deadbeef"); }); SyncPoint::GetInstance()->EnableProcessing(); constexpr uint64_t blob_file_number = 678; constexpr uint64_t garbage_blob_count = 9999; constexpr uint64_t garbage_blob_bytes = 100000000; BlobFileGarbage blob_file_garbage(blob_file_number, garbage_blob_count, garbage_blob_bytes); TestEncodeDecode(blob_file_garbage); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_F(BlobFileGarbageTest, ForwardIncompatibleCustomField) { SyncPoint::GetInstance()->SetCallBack( "BlobFileGarbage::EncodeTo::CustomFields", [&](void* arg) { std::string* output = static_cast(arg); constexpr uint32_t forward_incompatible_tag = (1 << 6) + 1; PutVarint32(output, forward_incompatible_tag); PutLengthPrefixedSlice(output, "foobar"); }); SyncPoint::GetInstance()->EnableProcessing(); constexpr uint64_t blob_file_number = 456; constexpr uint64_t garbage_blob_count = 100; constexpr uint64_t garbage_blob_bytes = 2000000; BlobFileGarbage blob_file_garbage(blob_file_number, garbage_blob_count, garbage_blob_bytes); std::string encoded; blob_file_garbage.EncodeTo(&encoded); BlobFileGarbage decoded_blob_file_addition; Slice input(encoded); const Status s = decoded_blob_file_addition.DecodeFrom(&input); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Forward incompatible")); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/blob/blob_file_meta.cc000066400000000000000000000030121370372246700177760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/blob/blob_file_meta.h" #include #include namespace ROCKSDB_NAMESPACE { std::string SharedBlobFileMetaData::DebugString() const { std::ostringstream oss; oss << (*this); return oss.str(); } std::ostream& operator<<(std::ostream& os, const SharedBlobFileMetaData& shared_meta) { os << "blob_file_number: " << shared_meta.GetBlobFileNumber() << " total_blob_count: " << shared_meta.GetTotalBlobCount() << " total_blob_bytes: " << shared_meta.GetTotalBlobBytes() << " checksum_method: " << shared_meta.GetChecksumMethod() << " checksum_value: " << shared_meta.GetChecksumValue(); return os; } std::string BlobFileMetaData::DebugString() const { std::ostringstream oss; oss << (*this); return oss.str(); } std::ostream& operator<<(std::ostream& os, const BlobFileMetaData& meta) { const auto& shared_meta = meta.GetSharedMeta(); assert(shared_meta); os << (*shared_meta); os << " linked_ssts: {"; for (uint64_t file_number : meta.GetLinkedSsts()) { os << ' ' << file_number; } os << " }"; os << " garbage_blob_count: " << meta.GetGarbageBlobCount() << " garbage_blob_bytes: " << meta.GetGarbageBlobBytes(); return os; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/blob_file_meta.h000066400000000000000000000144021370372246700176450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { // SharedBlobFileMetaData represents the immutable part of blob files' metadata, // like the blob file number, total number and size of blobs, or checksum // method and value. There is supposed to be one object of this class per blob // file (shared across all versions that include the blob file in question); // hence, the type is neither copyable nor movable. A blob file can be marked // obsolete when the corresponding SharedBlobFileMetaData object is destroyed. class SharedBlobFileMetaData { public: static std::shared_ptr Create( uint64_t blob_file_number, uint64_t total_blob_count, uint64_t total_blob_bytes, std::string checksum_method, std::string checksum_value) { return std::shared_ptr(new SharedBlobFileMetaData( blob_file_number, total_blob_count, total_blob_bytes, std::move(checksum_method), std::move(checksum_value))); } template static std::shared_ptr Create( uint64_t blob_file_number, uint64_t total_blob_count, uint64_t total_blob_bytes, std::string checksum_method, std::string checksum_value, Deleter deleter) { return std::shared_ptr( new SharedBlobFileMetaData(blob_file_number, total_blob_count, total_blob_bytes, std::move(checksum_method), std::move(checksum_value)), deleter); } SharedBlobFileMetaData(const SharedBlobFileMetaData&) = delete; SharedBlobFileMetaData& operator=(const SharedBlobFileMetaData&) = delete; SharedBlobFileMetaData(SharedBlobFileMetaData&&) = delete; SharedBlobFileMetaData& operator=(SharedBlobFileMetaData&&) = delete; uint64_t GetBlobFileNumber() const { return blob_file_number_; } uint64_t GetTotalBlobCount() const { return total_blob_count_; } uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; } const std::string& GetChecksumMethod() const { return checksum_method_; } const std::string& GetChecksumValue() const { return checksum_value_; } std::string DebugString() const; private: SharedBlobFileMetaData(uint64_t blob_file_number, uint64_t total_blob_count, uint64_t total_blob_bytes, std::string checksum_method, std::string checksum_value) : blob_file_number_(blob_file_number), total_blob_count_(total_blob_count), total_blob_bytes_(total_blob_bytes), checksum_method_(std::move(checksum_method)), checksum_value_(std::move(checksum_value)) { assert(checksum_method_.empty() == checksum_value_.empty()); } uint64_t blob_file_number_; uint64_t total_blob_count_; uint64_t total_blob_bytes_; std::string checksum_method_; std::string checksum_value_; }; std::ostream& operator<<(std::ostream& os, const SharedBlobFileMetaData& shared_meta); // BlobFileMetaData contains the part of the metadata for blob files that can // vary across versions, like the amount of garbage in the blob file. In // addition, BlobFileMetaData objects point to and share the ownership of the // SharedBlobFileMetaData object for the corresponding blob file. Similarly to // SharedBlobFileMetaData, BlobFileMetaData are not copyable or movable. They // are meant to be jointly owned by the versions in which the blob file has the // same (immutable *and* mutable) state. class BlobFileMetaData { public: using LinkedSsts = std::unordered_set; static std::shared_ptr Create( std::shared_ptr shared_meta, LinkedSsts linked_ssts, uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) { return std::shared_ptr( new BlobFileMetaData(std::move(shared_meta), std::move(linked_ssts), garbage_blob_count, garbage_blob_bytes)); } BlobFileMetaData(const BlobFileMetaData&) = delete; BlobFileMetaData& operator=(const BlobFileMetaData&) = delete; BlobFileMetaData(BlobFileMetaData&&) = delete; BlobFileMetaData& operator=(BlobFileMetaData&&) = delete; const std::shared_ptr& GetSharedMeta() const { return shared_meta_; } uint64_t GetBlobFileNumber() const { assert(shared_meta_); return shared_meta_->GetBlobFileNumber(); } uint64_t GetTotalBlobCount() const { assert(shared_meta_); return shared_meta_->GetTotalBlobCount(); } uint64_t GetTotalBlobBytes() const { assert(shared_meta_); return shared_meta_->GetTotalBlobBytes(); } const std::string& GetChecksumMethod() const { assert(shared_meta_); return shared_meta_->GetChecksumMethod(); } const std::string& GetChecksumValue() const { assert(shared_meta_); return shared_meta_->GetChecksumValue(); } const LinkedSsts& GetLinkedSsts() const { return linked_ssts_; } uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; } uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; } std::string DebugString() const; private: BlobFileMetaData(std::shared_ptr shared_meta, LinkedSsts linked_ssts, uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) : shared_meta_(std::move(shared_meta)), linked_ssts_(std::move(linked_ssts)), garbage_blob_count_(garbage_blob_count), garbage_blob_bytes_(garbage_blob_bytes) { assert(shared_meta_); assert(garbage_blob_count_ <= shared_meta_->GetTotalBlobCount()); assert(garbage_blob_bytes_ <= shared_meta_->GetTotalBlobBytes()); } std::shared_ptr shared_meta_; LinkedSsts linked_ssts_; uint64_t garbage_blob_count_; uint64_t garbage_blob_bytes_; }; std::ostream& operator<<(std::ostream& os, const BlobFileMetaData& meta); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/blob/blob_index.h000066400000000000000000000126311370372246700170310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/options.h" #include "util/coding.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // BlobIndex is a pointer to the blob and metadata of the blob. The index is // stored in base DB as ValueType::kTypeBlobIndex. // There are three types of blob index: // // kInlinedTTL: // +------+------------+---------------+ // | type | expiration | value | // +------+------------+---------------+ // | char | varint64 | variable size | // +------+------------+---------------+ // // kBlob: // +------+-------------+----------+----------+-------------+ // | type | file number | offset | size | compression | // +------+-------------+----------+----------+-------------+ // | char | varint64 | varint64 | varint64 | char | // +------+-------------+----------+----------+-------------+ // // kBlobTTL: // +------+------------+-------------+----------+----------+-------------+ // | type | expiration | file number | offset | size | compression | // +------+------------+-------------+----------+----------+-------------+ // | char | varint64 | varint64 | varint64 | varint64 | char | // +------+------------+-------------+----------+----------+-------------+ // // There isn't a kInlined (without TTL) type since we can store it as a plain // value (i.e. ValueType::kTypeValue). class BlobIndex { public: enum class Type : unsigned char { kInlinedTTL = 0, kBlob = 1, kBlobTTL = 2, kUnknown = 3, }; BlobIndex() : type_(Type::kUnknown) {} bool IsInlined() const { return type_ == Type::kInlinedTTL; } bool HasTTL() const { return type_ == Type::kInlinedTTL || type_ == Type::kBlobTTL; } uint64_t expiration() const { assert(HasTTL()); return expiration_; } const Slice& value() const { assert(IsInlined()); return value_; } uint64_t file_number() const { assert(!IsInlined()); return file_number_; } uint64_t offset() const { assert(!IsInlined()); return offset_; } uint64_t size() const { assert(!IsInlined()); return size_; } Status DecodeFrom(Slice slice) { static const std::string kErrorMessage = "Error while decoding blob index"; assert(slice.size() > 0); type_ = static_cast(*slice.data()); if (type_ >= Type::kUnknown) { return Status::Corruption( kErrorMessage, "Unknown blob index type: " + ToString(static_cast(type_))); } slice = Slice(slice.data() + 1, slice.size() - 1); if (HasTTL()) { if (!GetVarint64(&slice, &expiration_)) { return Status::Corruption(kErrorMessage, "Corrupted expiration"); } } if (IsInlined()) { value_ = slice; } else { if (GetVarint64(&slice, &file_number_) && GetVarint64(&slice, &offset_) && GetVarint64(&slice, &size_) && slice.size() == 1) { compression_ = static_cast(*slice.data()); } else { return Status::Corruption(kErrorMessage, "Corrupted blob offset"); } } return Status::OK(); } std::string DebugString(bool output_hex) const { std::ostringstream oss; if (IsInlined()) { oss << "[inlined blob] value:" << value_.ToString(output_hex); } else { oss << "[blob ref] file:" << file_number_ << " offset:" << offset_ << " size:" << size_; } if (HasTTL()) { oss << " exp:" << expiration_; } return oss.str(); } static void EncodeInlinedTTL(std::string* dst, uint64_t expiration, const Slice& value) { assert(dst != nullptr); dst->clear(); dst->reserve(1 + kMaxVarint64Length + value.size()); dst->push_back(static_cast(Type::kInlinedTTL)); PutVarint64(dst, expiration); dst->append(value.data(), value.size()); } static void EncodeBlob(std::string* dst, uint64_t file_number, uint64_t offset, uint64_t size, CompressionType compression) { assert(dst != nullptr); dst->clear(); dst->reserve(kMaxVarint64Length * 3 + 2); dst->push_back(static_cast(Type::kBlob)); PutVarint64(dst, file_number); PutVarint64(dst, offset); PutVarint64(dst, size); dst->push_back(static_cast(compression)); } static void EncodeBlobTTL(std::string* dst, uint64_t expiration, uint64_t file_number, uint64_t offset, uint64_t size, CompressionType compression) { assert(dst != nullptr); dst->clear(); dst->reserve(kMaxVarint64Length * 4 + 2); dst->push_back(static_cast(Type::kBlobTTL)); PutVarint64(dst, expiration); PutVarint64(dst, file_number); PutVarint64(dst, offset); PutVarint64(dst, size); dst->push_back(static_cast(compression)); } private: Type type_ = Type::kUnknown; uint64_t expiration_ = 0; Slice value_; uint64_t file_number_ = 0; uint64_t offset_ = 0; uint64_t size_ = 0; CompressionType compression_ = kNoCompression; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/blob/blob_log_format.cc000066400000000000000000000121401370372246700202040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include "db/blob/blob_log_format.h" #include "util/coding.h" #include "util/crc32c.h" namespace ROCKSDB_NAMESPACE { namespace blob_db { void BlobLogHeader::EncodeTo(std::string* dst) { assert(dst != nullptr); dst->clear(); dst->reserve(BlobLogHeader::kSize); PutFixed32(dst, kMagicNumber); PutFixed32(dst, version); PutFixed32(dst, column_family_id); unsigned char flags = (has_ttl ? 1 : 0); dst->push_back(flags); dst->push_back(compression); PutFixed64(dst, expiration_range.first); PutFixed64(dst, expiration_range.second); } Status BlobLogHeader::DecodeFrom(Slice src) { static const std::string kErrorMessage = "Error while decoding blob log header"; if (src.size() != BlobLogHeader::kSize) { return Status::Corruption(kErrorMessage, "Unexpected blob file header size"); } uint32_t magic_number; unsigned char flags; if (!GetFixed32(&src, &magic_number) || !GetFixed32(&src, &version) || !GetFixed32(&src, &column_family_id)) { return Status::Corruption( kErrorMessage, "Error decoding magic number, version and column family id"); } if (magic_number != kMagicNumber) { return Status::Corruption(kErrorMessage, "Magic number mismatch"); } if (version != kVersion1) { return Status::Corruption(kErrorMessage, "Unknown header version"); } flags = src.data()[0]; compression = static_cast(src.data()[1]); has_ttl = (flags & 1) == 1; src.remove_prefix(2); if (!GetFixed64(&src, &expiration_range.first) || !GetFixed64(&src, &expiration_range.second)) { return Status::Corruption(kErrorMessage, "Error decoding expiration range"); } return Status::OK(); } void BlobLogFooter::EncodeTo(std::string* dst) { assert(dst != nullptr); dst->clear(); dst->reserve(BlobLogFooter::kSize); PutFixed32(dst, kMagicNumber); PutFixed64(dst, blob_count); PutFixed64(dst, expiration_range.first); PutFixed64(dst, expiration_range.second); crc = crc32c::Value(dst->c_str(), dst->size()); crc = crc32c::Mask(crc); PutFixed32(dst, crc); } Status BlobLogFooter::DecodeFrom(Slice src) { static const std::string kErrorMessage = "Error while decoding blob log footer"; if (src.size() != BlobLogFooter::kSize) { return Status::Corruption(kErrorMessage, "Unexpected blob file footer size"); } uint32_t src_crc = 0; src_crc = crc32c::Value(src.data(), BlobLogFooter::kSize - sizeof(uint32_t)); src_crc = crc32c::Mask(src_crc); uint32_t magic_number = 0; if (!GetFixed32(&src, &magic_number) || !GetFixed64(&src, &blob_count) || !GetFixed64(&src, &expiration_range.first) || !GetFixed64(&src, &expiration_range.second) || !GetFixed32(&src, &crc)) { return Status::Corruption(kErrorMessage, "Error decoding content"); } if (magic_number != kMagicNumber) { return Status::Corruption(kErrorMessage, "Magic number mismatch"); } if (src_crc != crc) { return Status::Corruption(kErrorMessage, "CRC mismatch"); } return Status::OK(); } void BlobLogRecord::EncodeHeaderTo(std::string* dst) { assert(dst != nullptr); dst->clear(); dst->reserve(BlobLogRecord::kHeaderSize + key.size() + value.size()); PutFixed64(dst, key.size()); PutFixed64(dst, value.size()); PutFixed64(dst, expiration); header_crc = crc32c::Value(dst->c_str(), dst->size()); header_crc = crc32c::Mask(header_crc); PutFixed32(dst, header_crc); blob_crc = crc32c::Value(key.data(), key.size()); blob_crc = crc32c::Extend(blob_crc, value.data(), value.size()); blob_crc = crc32c::Mask(blob_crc); PutFixed32(dst, blob_crc); } Status BlobLogRecord::DecodeHeaderFrom(Slice src) { static const std::string kErrorMessage = "Error while decoding blob record"; if (src.size() != BlobLogRecord::kHeaderSize) { return Status::Corruption(kErrorMessage, "Unexpected blob record header size"); } uint32_t src_crc = 0; src_crc = crc32c::Value(src.data(), BlobLogRecord::kHeaderSize - 8); src_crc = crc32c::Mask(src_crc); if (!GetFixed64(&src, &key_size) || !GetFixed64(&src, &value_size) || !GetFixed64(&src, &expiration) || !GetFixed32(&src, &header_crc) || !GetFixed32(&src, &blob_crc)) { return Status::Corruption(kErrorMessage, "Error decoding content"); } if (src_crc != header_crc) { return Status::Corruption(kErrorMessage, "Header CRC mismatch"); } return Status::OK(); } Status BlobLogRecord::CheckBlobCRC() const { uint32_t expected_crc = 0; expected_crc = crc32c::Value(key.data(), key.size()); expected_crc = crc32c::Extend(expected_crc, value.data(), value.size()); expected_crc = crc32c::Mask(expected_crc); if (expected_crc != blob_crc) { return Status::Corruption("Blob CRC mismatch"); } return Status::OK(); } } // namespace blob_db } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/blob/blob_log_format.h000066400000000000000000000105701370372246700200530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Log format information shared by reader and writer. #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { namespace blob_db { constexpr uint32_t kMagicNumber = 2395959; // 0x00248f37 constexpr uint32_t kVersion1 = 1; using ExpirationRange = std::pair; // Format of blob log file header (30 bytes): // // +--------------+---------+---------+-------+-------------+-------------------+ // | magic number | version | cf id | flags | compression | expiration range | // +--------------+---------+---------+-------+-------------+-------------------+ // | Fixed32 | Fixed32 | Fixed32 | char | char | Fixed64 Fixed64 | // +--------------+---------+---------+-------+-------------+-------------------+ // // List of flags: // has_ttl: Whether the file contain TTL data. // // Expiration range in the header is a rough range based on // blob_db_options.ttl_range_secs. struct BlobLogHeader { static constexpr size_t kSize = 30; BlobLogHeader() = default; BlobLogHeader(uint32_t _column_family_id, CompressionType _compression, bool _has_ttl, const ExpirationRange& _expiration_range) : column_family_id(_column_family_id), compression(_compression), has_ttl(_has_ttl), expiration_range(_expiration_range) {} uint32_t version = kVersion1; uint32_t column_family_id = 0; CompressionType compression = kNoCompression; bool has_ttl = false; ExpirationRange expiration_range; void EncodeTo(std::string* dst); Status DecodeFrom(Slice slice); }; // Format of blob log file footer (32 bytes): // // +--------------+------------+-------------------+------------+ // | magic number | blob count | expiration range | footer CRC | // +--------------+------------+-------------------+------------+ // | Fixed32 | Fixed64 | Fixed64 + Fixed64 | Fixed32 | // +--------------+------------+-------------------+------------+ // // The footer will be presented only when the blob file is properly closed. // // Unlike the same field in file header, expiration range in the footer is the // range of smallest and largest expiration of the data in this file. struct BlobLogFooter { static constexpr size_t kSize = 32; uint64_t blob_count = 0; ExpirationRange expiration_range = std::make_pair(0, 0); uint32_t crc = 0; void EncodeTo(std::string* dst); Status DecodeFrom(Slice slice); }; // Blob record format (32 bytes header + key + value): // // +------------+--------------+------------+------------+----------+---------+-----------+ // | key length | value length | expiration | header CRC | blob CRC | key | value | // +------------+--------------+------------+------------+----------+---------+-----------+ // | Fixed64 | Fixed64 | Fixed64 | Fixed32 | Fixed32 | key len | value len | // +------------+--------------+------------+------------+----------+---------+-----------+ // // If file has has_ttl = false, expiration field is always 0, and the blob // doesn't has expiration. // // Also note that if compression is used, value is compressed value and value // length is compressed value length. // // Header CRC is the checksum of (key_len + val_len + expiration), while // blob CRC is the checksum of (key + value). // // We could use variable length encoding (Varint64) to save more space, but it // make reader more complicated. struct BlobLogRecord { // header include fields up to blob CRC static constexpr size_t kHeaderSize = 32; uint64_t key_size = 0; uint64_t value_size = 0; uint64_t expiration = 0; uint32_t header_crc = 0; uint32_t blob_crc = 0; Slice key; Slice value; std::unique_ptr key_buf; std::unique_ptr value_buf; uint64_t record_size() const { return kHeaderSize + key_size + value_size; } void EncodeHeaderTo(std::string* dst); Status DecodeHeaderFrom(Slice src); Status CheckBlobCRC() const; }; } // namespace blob_db } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/blob/blob_log_reader.cc000066400000000000000000000057461370372246700201740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include "db/blob/blob_log_reader.h" #include #include "file/random_access_file_reader.h" #include "monitoring/statistics.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { namespace blob_db { Reader::Reader(std::unique_ptr&& file_reader, Env* env, Statistics* statistics) : file_(std::move(file_reader)), env_(env), statistics_(statistics), buffer_(), next_byte_(0) {} Status Reader::ReadSlice(uint64_t size, Slice* slice, char* buf) { StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); Status s = file_->Read(IOOptions(), next_byte_, static_cast(size), slice, buf, nullptr); next_byte_ += size; if (!s.ok()) { return s; } RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, slice->size()); if (slice->size() != size) { return Status::Corruption("EOF reached while reading record"); } return s; } Status Reader::ReadHeader(BlobLogHeader* header) { assert(file_.get() != nullptr); assert(next_byte_ == 0); Status s = ReadSlice(BlobLogHeader::kSize, &buffer_, header_buf_); if (!s.ok()) { return s; } if (buffer_.size() != BlobLogHeader::kSize) { return Status::Corruption("EOF reached before file header"); } return header->DecodeFrom(buffer_); } Status Reader::ReadRecord(BlobLogRecord* record, ReadLevel level, uint64_t* blob_offset) { Status s = ReadSlice(BlobLogRecord::kHeaderSize, &buffer_, header_buf_); if (!s.ok()) { return s; } if (buffer_.size() != BlobLogRecord::kHeaderSize) { return Status::Corruption("EOF reached before record header"); } s = record->DecodeHeaderFrom(buffer_); if (!s.ok()) { return s; } uint64_t kb_size = record->key_size + record->value_size; if (blob_offset != nullptr) { *blob_offset = next_byte_ + record->key_size; } switch (level) { case kReadHeader: next_byte_ += kb_size; break; case kReadHeaderKey: record->key_buf.reset(new char[record->key_size]); s = ReadSlice(record->key_size, &record->key, record->key_buf.get()); next_byte_ += record->value_size; break; case kReadHeaderKeyBlob: record->key_buf.reset(new char[record->key_size]); s = ReadSlice(record->key_size, &record->key, record->key_buf.get()); if (s.ok()) { record->value_buf.reset(new char[record->value_size]); s = ReadSlice(record->value_size, &record->value, record->value_buf.get()); } if (s.ok()) { s = record->CheckBlobCRC(); } break; } return s; } } // namespace blob_db } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/blob/blob_log_reader.h000066400000000000000000000045121370372246700200240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #ifndef ROCKSDB_LITE #include #include #include "db/blob/blob_log_format.h" #include "file/random_access_file_reader.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class SequentialFileReader; class Logger; namespace blob_db { /** * Reader is a general purpose log stream reader implementation. The actual job * of reading from the device is implemented by the SequentialFile interface. * * Please see Writer for details on the file and record layout. */ class Reader { public: enum ReadLevel { kReadHeader, kReadHeaderKey, kReadHeaderKeyBlob, }; // Create a reader that will return log records from "*file". // "*file" must remain live while this Reader is in use. Reader(std::unique_ptr&& file_reader, Env* env, Statistics* statistics); // No copying allowed Reader(const Reader&) = delete; Reader& operator=(const Reader&) = delete; ~Reader() = default; Status ReadHeader(BlobLogHeader* header); // Read the next record into *record. Returns true if read // successfully, false if we hit end of the input. May use // "*scratch" as temporary storage. The contents filled in *record // will only be valid until the next mutating operation on this // reader or the next mutation to *scratch. // If blob_offset is non-null, return offset of the blob through it. Status ReadRecord(BlobLogRecord* record, ReadLevel level = kReadHeader, uint64_t* blob_offset = nullptr); void ResetNextByte() { next_byte_ = 0; } uint64_t GetNextByte() const { return next_byte_; } private: Status ReadSlice(uint64_t size, Slice* slice, char* buf); const std::unique_ptr file_; Env* env_; Statistics* statistics_; Slice buffer_; char header_buf_[BlobLogRecord::kHeaderSize]; // which byte to read next. For asserting proper usage uint64_t next_byte_; }; } // namespace blob_db } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/blob/blob_log_writer.cc000066400000000000000000000100611370372246700202300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "db/blob/blob_log_writer.h" #include #include #include "db/blob/blob_log_format.h" #include "file/writable_file_writer.h" #include "monitoring/statistics.h" #include "rocksdb/env.h" #include "util/coding.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { namespace blob_db { Writer::Writer(std::unique_ptr&& dest, Env* env, Statistics* statistics, uint64_t log_number, uint64_t bpsync, bool use_fs, uint64_t boffset) : dest_(std::move(dest)), env_(env), statistics_(statistics), log_number_(log_number), block_offset_(boffset), bytes_per_sync_(bpsync), next_sync_offset_(0), use_fsync_(use_fs), last_elem_type_(kEtNone) {} Status Writer::Sync() { StopWatch sync_sw(env_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS); Status s = dest_->Sync(use_fsync_); RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED); return s; } Status Writer::WriteHeader(BlobLogHeader& header) { assert(block_offset_ == 0); assert(last_elem_type_ == kEtNone); std::string str; header.EncodeTo(&str); Status s = dest_->Append(Slice(str)); if (s.ok()) { block_offset_ += str.size(); s = dest_->Flush(); } last_elem_type_ = kEtFileHdr; RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN, BlobLogHeader::kSize); return s; } Status Writer::AppendFooter(BlobLogFooter& footer) { assert(block_offset_ != 0); assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord); std::string str; footer.EncodeTo(&str); Status s = dest_->Append(Slice(str)); if (s.ok()) { block_offset_ += str.size(); s = dest_->Close(); dest_.reset(); } last_elem_type_ = kEtFileFooter; RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN, BlobLogFooter::kSize); return s; } Status Writer::AddRecord(const Slice& key, const Slice& val, uint64_t expiration, uint64_t* key_offset, uint64_t* blob_offset) { assert(block_offset_ != 0); assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord); std::string buf; ConstructBlobHeader(&buf, key, val, expiration); Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset); return s; } Status Writer::AddRecord(const Slice& key, const Slice& val, uint64_t* key_offset, uint64_t* blob_offset) { assert(block_offset_ != 0); assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord); std::string buf; ConstructBlobHeader(&buf, key, val, 0); Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset); return s; } void Writer::ConstructBlobHeader(std::string* buf, const Slice& key, const Slice& val, uint64_t expiration) { BlobLogRecord record; record.key = key; record.value = val; record.expiration = expiration; record.EncodeHeaderTo(buf); } Status Writer::EmitPhysicalRecord(const std::string& headerbuf, const Slice& key, const Slice& val, uint64_t* key_offset, uint64_t* blob_offset) { StopWatch write_sw(env_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS); Status s = dest_->Append(Slice(headerbuf)); if (s.ok()) { s = dest_->Append(key); } if (s.ok()) { s = dest_->Append(val); } if (s.ok()) { s = dest_->Flush(); } *key_offset = block_offset_ + BlobLogRecord::kHeaderSize; *blob_offset = *key_offset + key.size(); block_offset_ = *blob_offset + val.size(); last_elem_type_ = kEtRecord; RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN, BlobLogRecord::kHeaderSize + key.size() + val.size()); return s; } } // namespace blob_db } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/blob/blob_log_writer.h000066400000000000000000000053151370372246700201000ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include "db/blob/blob_log_format.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { class WritableFileWriter; namespace blob_db { /** * Writer is the blob log stream writer. It provides an append-only * abstraction for writing blob data. * * * Look at blob_db_format.h to see the details of the record formats. */ class Writer { public: // Create a writer that will append data to "*dest". // "*dest" must be initially empty. // "*dest" must remain live while this Writer is in use. Writer(std::unique_ptr&& dest, Env* env, Statistics* statistics, uint64_t log_number, uint64_t bpsync, bool use_fsync, uint64_t boffset = 0); // No copying allowed Writer(const Writer&) = delete; Writer& operator=(const Writer&) = delete; ~Writer() = default; static void ConstructBlobHeader(std::string* buf, const Slice& key, const Slice& val, uint64_t expiration); Status AddRecord(const Slice& key, const Slice& val, uint64_t* key_offset, uint64_t* blob_offset); Status AddRecord(const Slice& key, const Slice& val, uint64_t expiration, uint64_t* key_offset, uint64_t* blob_offset); Status EmitPhysicalRecord(const std::string& headerbuf, const Slice& key, const Slice& val, uint64_t* key_offset, uint64_t* blob_offset); Status AppendFooter(BlobLogFooter& footer); Status WriteHeader(BlobLogHeader& header); WritableFileWriter* file() { return dest_.get(); } const WritableFileWriter* file() const { return dest_.get(); } uint64_t get_log_number() const { return log_number_; } bool ShouldSync() const { return block_offset_ > next_sync_offset_; } Status Sync(); void ResetSyncPointer() { next_sync_offset_ += bytes_per_sync_; } private: std::unique_ptr dest_; Env* env_; Statistics* statistics_; uint64_t log_number_; uint64_t block_offset_; // Current offset in block uint64_t bytes_per_sync_; uint64_t next_sync_offset_; bool use_fsync_; public: enum ElemType { kEtNone, kEtFileHdr, kEtRecord, kEtFileFooter }; ElemType last_elem_type_; }; } // namespace blob_db } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/blob/db_blob_index_test.cc000066400000000000000000000355101370372246700206740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include #include "db/arena_wrapped_db_iter.h" #include "db/column_family.h" #include "db/db_iter.h" #include "db/db_test_util.h" #include "db/dbformat.h" #include "db/write_batch_internal.h" #include "port/port.h" #include "port/stack_trace.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { // kTypeBlobIndex is a value type used by BlobDB only. The base rocksdb // should accept the value type on write, and report not supported value // for reads, unless caller request for it explicitly. The base rocksdb // doesn't understand format of actual blob index (the value). class DBBlobIndexTest : public DBTestBase { public: enum Tier { kMemtable = 0, kImmutableMemtables = 1, kL0SstFile = 2, kLnSstFile = 3, }; const std::vector kAllTiers = {Tier::kMemtable, Tier::kImmutableMemtables, Tier::kL0SstFile, Tier::kLnSstFile}; DBBlobIndexTest() : DBTestBase("/db_blob_index_test") {} ColumnFamilyHandle* cfh() { return dbfull()->DefaultColumnFamily(); } ColumnFamilyData* cfd() { return reinterpret_cast(cfh())->cfd(); } Status PutBlobIndex(WriteBatch* batch, const Slice& key, const Slice& blob_index) { return WriteBatchInternal::PutBlobIndex(batch, cfd()->GetID(), key, blob_index); } Status Write(WriteBatch* batch) { return dbfull()->Write(WriteOptions(), batch); } std::string GetImpl(const Slice& key, bool* is_blob_index = nullptr, const Snapshot* snapshot = nullptr) { ReadOptions read_options; read_options.snapshot = snapshot; PinnableSlice value; DBImpl::GetImplOptions get_impl_options; get_impl_options.column_family = cfh(); get_impl_options.value = &value; get_impl_options.is_blob_index = is_blob_index; auto s = dbfull()->GetImpl(read_options, key, get_impl_options); if (s.IsNotFound()) { return "NOT_FOUND"; } if (s.IsNotSupported()) { return "NOT_SUPPORTED"; } if (!s.ok()) { return s.ToString(); } return value.ToString(); } std::string GetBlobIndex(const Slice& key, const Snapshot* snapshot = nullptr) { bool is_blob_index = false; std::string value = GetImpl(key, &is_blob_index, snapshot); if (!is_blob_index) { return "NOT_BLOB"; } return value; } ArenaWrappedDBIter* GetBlobIterator() { return dbfull()->NewIteratorImpl( ReadOptions(), cfd(), dbfull()->GetLatestSequenceNumber(), nullptr /*read_callback*/, true /*allow_blob*/); } Options GetTestOptions() { Options options; options.create_if_missing = true; options.num_levels = 2; options.disable_auto_compactions = true; // Disable auto flushes. options.max_write_buffer_number = 10; options.min_write_buffer_number_to_merge = 10; options.merge_operator = MergeOperators::CreateStringAppendOperator(); return options; } void MoveDataTo(Tier tier) { switch (tier) { case Tier::kMemtable: break; case Tier::kImmutableMemtables: ASSERT_OK(dbfull()->TEST_SwitchMemtable()); break; case Tier::kL0SstFile: ASSERT_OK(Flush()); break; case Tier::kLnSstFile: ASSERT_OK(Flush()); ASSERT_OK(Put("a", "dummy")); ASSERT_OK(Put("z", "dummy")); ASSERT_OK(Flush()); ASSERT_OK( dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); #ifndef ROCKSDB_LITE ASSERT_EQ("0,1", FilesPerLevel()); #endif // !ROCKSDB_LITE break; } } }; // Should be able to write kTypeBlobIndex to memtables and SST files. TEST_F(DBBlobIndexTest, Write) { for (auto tier : kAllTiers) { DestroyAndReopen(GetTestOptions()); for (int i = 1; i <= 5; i++) { std::string index = ToString(i); WriteBatch batch; ASSERT_OK(PutBlobIndex(&batch, "key" + index, "blob" + index)); ASSERT_OK(Write(&batch)); } MoveDataTo(tier); for (int i = 1; i <= 5; i++) { std::string index = ToString(i); ASSERT_EQ("blob" + index, GetBlobIndex("key" + index)); } } } // Get should be able to return blob index if is_blob_index is provided, // otherwise return Status::NotSupported status. TEST_F(DBBlobIndexTest, Get) { for (auto tier : kAllTiers) { DestroyAndReopen(GetTestOptions()); WriteBatch batch; ASSERT_OK(batch.Put("key", "value")); ASSERT_OK(PutBlobIndex(&batch, "blob_key", "blob_index")); ASSERT_OK(Write(&batch)); MoveDataTo(tier); // Verify normal value bool is_blob_index = false; PinnableSlice value; ASSERT_EQ("value", Get("key")); ASSERT_EQ("value", GetImpl("key")); ASSERT_EQ("value", GetImpl("key", &is_blob_index)); ASSERT_FALSE(is_blob_index); // Verify blob index ASSERT_TRUE(Get("blob_key", &value).IsNotSupported()); ASSERT_EQ("NOT_SUPPORTED", GetImpl("blob_key")); ASSERT_EQ("blob_index", GetImpl("blob_key", &is_blob_index)); ASSERT_TRUE(is_blob_index); } } // Get should NOT return Status::NotSupported if blob index is updated with // a normal value. TEST_F(DBBlobIndexTest, Updated) { for (auto tier : kAllTiers) { DestroyAndReopen(GetTestOptions()); WriteBatch batch; for (int i = 0; i < 10; i++) { ASSERT_OK(PutBlobIndex(&batch, "key" + ToString(i), "blob_index")); } ASSERT_OK(Write(&batch)); // Avoid blob values from being purged. const Snapshot* snapshot = dbfull()->GetSnapshot(); ASSERT_OK(Put("key1", "new_value")); ASSERT_OK(Merge("key2", "a")); ASSERT_OK(Merge("key2", "b")); ASSERT_OK(Merge("key2", "c")); ASSERT_OK(Delete("key3")); ASSERT_OK(SingleDelete("key4")); ASSERT_OK(Delete("key5")); ASSERT_OK(Merge("key5", "a")); ASSERT_OK(Merge("key5", "b")); ASSERT_OK(Merge("key5", "c")); ASSERT_OK(dbfull()->DeleteRange(WriteOptions(), cfh(), "key6", "key9")); MoveDataTo(tier); for (int i = 0; i < 10; i++) { ASSERT_EQ("blob_index", GetBlobIndex("key" + ToString(i), snapshot)); } ASSERT_EQ("new_value", Get("key1")); ASSERT_EQ("NOT_SUPPORTED", GetImpl("key2")); ASSERT_EQ("NOT_FOUND", Get("key3")); ASSERT_EQ("NOT_FOUND", Get("key4")); ASSERT_EQ("a,b,c", GetImpl("key5")); for (int i = 6; i < 9; i++) { ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i))); } ASSERT_EQ("blob_index", GetBlobIndex("key9")); dbfull()->ReleaseSnapshot(snapshot); } } // Iterator should get blob value if allow_blob flag is set, // otherwise return Status::NotSupported status. TEST_F(DBBlobIndexTest, Iterate) { const std::vector> data = { /*00*/ {kTypeValue}, /*01*/ {kTypeBlobIndex}, /*02*/ {kTypeValue}, /*03*/ {kTypeBlobIndex, kTypeValue}, /*04*/ {kTypeValue}, /*05*/ {kTypeValue, kTypeBlobIndex}, /*06*/ {kTypeValue}, /*07*/ {kTypeDeletion, kTypeBlobIndex}, /*08*/ {kTypeValue}, /*09*/ {kTypeSingleDeletion, kTypeBlobIndex}, /*10*/ {kTypeValue}, /*11*/ {kTypeMerge, kTypeMerge, kTypeMerge, kTypeBlobIndex}, /*12*/ {kTypeValue}, /*13*/ {kTypeMerge, kTypeMerge, kTypeMerge, kTypeDeletion, kTypeBlobIndex}, /*14*/ {kTypeValue}, /*15*/ {kTypeBlobIndex}, /*16*/ {kTypeValue}, }; auto get_key = [](int index) { char buf[20]; snprintf(buf, sizeof(buf), "%02d", index); return "key" + std::string(buf); }; auto get_value = [&](int index, int version) { return get_key(index) + "_value" + ToString(version); }; auto check_iterator = [&](Iterator* iterator, Status::Code expected_status, const Slice& expected_value) { ASSERT_EQ(expected_status, iterator->status().code()); if (expected_status == Status::kOk) { ASSERT_TRUE(iterator->Valid()); ASSERT_EQ(expected_value, iterator->value()); } else { ASSERT_FALSE(iterator->Valid()); } }; auto create_normal_iterator = [&]() -> Iterator* { return dbfull()->NewIterator(ReadOptions()); }; auto create_blob_iterator = [&]() -> Iterator* { return GetBlobIterator(); }; auto check_is_blob = [&](bool is_blob) { return [is_blob](Iterator* iterator) { ASSERT_EQ(is_blob, reinterpret_cast(iterator)->IsBlob()); }; }; auto verify = [&](int index, Status::Code expected_status, const Slice& forward_value, const Slice& backward_value, std::function create_iterator, std::function extra_check = nullptr) { // Seek auto* iterator = create_iterator(); ASSERT_OK(iterator->Refresh()); iterator->Seek(get_key(index)); check_iterator(iterator, expected_status, forward_value); if (extra_check) { extra_check(iterator); } delete iterator; // Next iterator = create_iterator(); ASSERT_OK(iterator->Refresh()); iterator->Seek(get_key(index - 1)); ASSERT_TRUE(iterator->Valid()); iterator->Next(); check_iterator(iterator, expected_status, forward_value); if (extra_check) { extra_check(iterator); } delete iterator; // SeekForPrev iterator = create_iterator(); ASSERT_OK(iterator->Refresh()); iterator->SeekForPrev(get_key(index)); check_iterator(iterator, expected_status, backward_value); if (extra_check) { extra_check(iterator); } delete iterator; // Prev iterator = create_iterator(); iterator->Seek(get_key(index + 1)); ASSERT_TRUE(iterator->Valid()); iterator->Prev(); check_iterator(iterator, expected_status, backward_value); if (extra_check) { extra_check(iterator); } delete iterator; }; for (auto tier : {Tier::kMemtable} /*kAllTiers*/) { // Avoid values from being purged. std::vector snapshots; DestroyAndReopen(GetTestOptions()); // fill data for (int i = 0; i < static_cast(data.size()); i++) { for (int j = static_cast(data[i].size()) - 1; j >= 0; j--) { std::string key = get_key(i); std::string value = get_value(i, j); WriteBatch batch; switch (data[i][j]) { case kTypeValue: ASSERT_OK(Put(key, value)); break; case kTypeDeletion: ASSERT_OK(Delete(key)); break; case kTypeSingleDeletion: ASSERT_OK(SingleDelete(key)); break; case kTypeMerge: ASSERT_OK(Merge(key, value)); break; case kTypeBlobIndex: ASSERT_OK(PutBlobIndex(&batch, key, value)); ASSERT_OK(Write(&batch)); break; default: assert(false); }; } snapshots.push_back(dbfull()->GetSnapshot()); } ASSERT_OK( dbfull()->DeleteRange(WriteOptions(), cfh(), get_key(15), get_key(16))); snapshots.push_back(dbfull()->GetSnapshot()); MoveDataTo(tier); // Normal iterator verify(1, Status::kNotSupported, "", "", create_normal_iterator); verify(3, Status::kNotSupported, "", "", create_normal_iterator); verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), create_normal_iterator); verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), create_normal_iterator); verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), create_normal_iterator); verify(11, Status::kNotSupported, "", "", create_normal_iterator); verify(13, Status::kOk, get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), create_normal_iterator); verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), create_normal_iterator); // Iterator with blob support verify(1, Status::kOk, get_value(1, 0), get_value(1, 0), create_blob_iterator, check_is_blob(true)); verify(3, Status::kOk, get_value(3, 0), get_value(3, 0), create_blob_iterator, check_is_blob(true)); verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), create_blob_iterator, check_is_blob(false)); verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), create_blob_iterator, check_is_blob(false)); verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), create_blob_iterator, check_is_blob(false)); verify(11, Status::kNotSupported, "", "", create_blob_iterator); verify(13, Status::kOk, get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), create_blob_iterator, check_is_blob(false)); verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), create_blob_iterator, check_is_blob(false)); #ifndef ROCKSDB_LITE // Iterator with blob support and using seek. ASSERT_OK(dbfull()->SetOptions( cfh(), {{"max_sequential_skip_in_iterations", "0"}})); verify(1, Status::kOk, get_value(1, 0), get_value(1, 0), create_blob_iterator, check_is_blob(true)); verify(3, Status::kOk, get_value(3, 0), get_value(3, 0), create_blob_iterator, check_is_blob(true)); verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), create_blob_iterator, check_is_blob(false)); verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), create_blob_iterator, check_is_blob(false)); verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), create_blob_iterator, check_is_blob(false)); verify(11, Status::kNotSupported, "", "", create_blob_iterator); verify(13, Status::kOk, get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), create_blob_iterator, check_is_blob(false)); verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), create_blob_iterator, check_is_blob(false)); #endif // !ROCKSDB_LITE for (auto* snapshot : snapshots) { dbfull()->ReleaseSnapshot(snapshot); } } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/builder.cc000066400000000000000000000260671370372246700156020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/builder.h" #include #include #include #include "db/compaction/compaction_iterator.h" #include "db/dbformat.h" #include "db/event_helpers.h" #include "db/internal_stats.h" #include "db/merge_helper.h" #include "db/range_del_aggregator.h" #include "db/table_cache.h" #include "db/version_edit.h" #include "file/filename.h" #include "file/read_write_util.h" #include "file/writable_file_writer.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/thread_status_util.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "rocksdb/table.h" #include "table/block_based/block_based_table_builder.h" #include "table/format.h" #include "table/internal_iterator.h" #include "test_util/sync_point.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { class TableFactory; TableBuilder* NewTableBuilder( const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, const std::string& column_family_name, WritableFileWriter* file, const CompressionType compression_type, uint64_t sample_for_compression, const CompressionOptions& compression_opts, int level, const bool skip_filters, const uint64_t creation_time, const uint64_t oldest_key_time, const uint64_t target_file_size, const uint64_t file_creation_time) { assert((column_family_id == TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == column_family_name.empty()); return ioptions.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, internal_comparator, int_tbl_prop_collector_factories, compression_type, sample_for_compression, compression_opts, skip_filters, column_family_name, level, creation_time, oldest_key_time, target_file_size, file_creation_time), column_family_id, file); } Status BuildTable( const std::string& dbname, Env* env, FileSystem* fs, const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, const FileOptions& file_options, TableCache* table_cache, InternalIterator* iter, std::vector> range_del_iters, FileMetaData* meta, const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, const std::string& column_family_name, std::vector snapshots, SequenceNumber earliest_write_conflict_snapshot, SnapshotChecker* snapshot_checker, const CompressionType compression, uint64_t sample_for_compression, const CompressionOptions& compression_opts, bool paranoid_file_checks, InternalStats* internal_stats, TableFileCreationReason reason, IOStatus* io_status, EventLogger* event_logger, int job_id, const Env::IOPriority io_priority, TableProperties* table_properties, int level, const uint64_t creation_time, const uint64_t oldest_key_time, Env::WriteLifeTimeHint write_hint, const uint64_t file_creation_time) { assert((column_family_id == TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == column_family_name.empty()); // Reports the IOStats for flush for every following bytes. const size_t kReportFlushIOStatsEvery = 1048576; Status s; meta->fd.file_size = 0; iter->SeekToFirst(); std::unique_ptr range_del_agg( new CompactionRangeDelAggregator(&internal_comparator, snapshots)); for (auto& range_del_iter : range_del_iters) { range_del_agg->AddTombstones(std::move(range_del_iter)); } std::string fname = TableFileName(ioptions.cf_paths, meta->fd.GetNumber(), meta->fd.GetPathId()); #ifndef ROCKSDB_LITE EventHelpers::NotifyTableFileCreationStarted( ioptions.listeners, dbname, column_family_name, fname, job_id, reason); #endif // !ROCKSDB_LITE TableProperties tp; if (iter->Valid() || !range_del_agg->IsEmpty()) { TableBuilder* builder; std::unique_ptr file_writer; // Currently we only enable dictionary compression during compaction to the // bottommost level. CompressionOptions compression_opts_for_flush(compression_opts); compression_opts_for_flush.max_dict_bytes = 0; compression_opts_for_flush.zstd_max_train_bytes = 0; { std::unique_ptr file; #ifndef NDEBUG bool use_direct_writes = file_options.use_direct_writes; TEST_SYNC_POINT_CALLBACK("BuildTable:create_file", &use_direct_writes); #endif // !NDEBUG s = NewWritableFile(fs, fname, &file, file_options); if (!s.ok()) { EventHelpers::LogAndNotifyTableFileCreationFinished( event_logger, ioptions.listeners, dbname, column_family_name, fname, job_id, meta->fd, kInvalidBlobFileNumber, tp, reason, s); return s; } file->SetIOPriority(io_priority); file->SetWriteLifeTimeHint(write_hint); file_writer.reset(new WritableFileWriter( std::move(file), fname, file_options, env, ioptions.statistics, ioptions.listeners, ioptions.file_checksum_gen_factory)); builder = NewTableBuilder( ioptions, mutable_cf_options, internal_comparator, int_tbl_prop_collector_factories, column_family_id, column_family_name, file_writer.get(), compression, sample_for_compression, compression_opts_for_flush, level, false /* skip_filters */, creation_time, oldest_key_time, 0 /*target_file_size*/, file_creation_time); } MergeHelper merge(env, internal_comparator.user_comparator(), ioptions.merge_operator, nullptr, ioptions.info_log, true /* internal key corruption is not ok */, snapshots.empty() ? 0 : snapshots.back(), snapshot_checker); CompactionIterator c_iter( iter, internal_comparator.user_comparator(), &merge, kMaxSequenceNumber, &snapshots, earliest_write_conflict_snapshot, snapshot_checker, env, ShouldReportDetailedTime(env, ioptions.statistics), true /* internal key corruption is not ok */, range_del_agg.get()); c_iter.SeekToFirst(); for (; c_iter.Valid(); c_iter.Next()) { const Slice& key = c_iter.key(); const Slice& value = c_iter.value(); const ParsedInternalKey& ikey = c_iter.ikey(); builder->Add(key, value); meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type); // TODO(noetzli): Update stats after flush, too. if (io_priority == Env::IO_HIGH && IOSTATS(bytes_written) >= kReportFlushIOStatsEvery) { ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::FLUSH_BYTES_WRITTEN, IOSTATS(bytes_written)); } } auto range_del_it = range_del_agg->NewIterator(); for (range_del_it->SeekToFirst(); range_del_it->Valid(); range_del_it->Next()) { auto tombstone = range_del_it->Tombstone(); auto kv = tombstone.Serialize(); builder->Add(kv.first.Encode(), kv.second); meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(), tombstone.seq_, internal_comparator); } // Finish and check for builder errors bool empty = builder->IsEmpty(); s = c_iter.status(); TEST_SYNC_POINT("BuildTable:BeforeFinishBuildTable"); if (!s.ok() || empty) { builder->Abandon(); } else { s = builder->Finish(); } *io_status = builder->io_status(); if (s.ok() && !empty) { uint64_t file_size = builder->FileSize(); meta->fd.file_size = file_size; meta->marked_for_compaction = builder->NeedCompact(); assert(meta->fd.GetFileSize() > 0); tp = builder->GetTableProperties(); // refresh now that builder is finished if (table_properties) { *table_properties = tp; } } delete builder; // Finish and check for file errors if (s.ok() && !empty) { StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS); *io_status = file_writer->Sync(ioptions.use_fsync); } if (io_status->ok() && !empty) { *io_status = file_writer->Close(); } if (io_status->ok() && !empty) { // Add the checksum information to file metadata. meta->file_checksum = file_writer->GetFileChecksum(); meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName(); } if (!io_status->ok()) { s = *io_status; } // TODO Also check the IO status when create the Iterator. if (s.ok() && !empty) { // Verify that the table is usable // We set for_compaction to false and don't OptimizeForCompactionTableRead // here because this is a special case after we finish the table building // No matter whether use_direct_io_for_flush_and_compaction is true, // we will regrad this verification as user reads since the goal is // to cache it here for further user reads std::unique_ptr it(table_cache->NewIterator( ReadOptions(), file_options, internal_comparator, *meta, nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor.get(), nullptr, (internal_stats == nullptr) ? nullptr : internal_stats->GetFileReadHist(0), TableReaderCaller::kFlush, /*arena=*/nullptr, /*skip_filter=*/false, level, MaxFileSizeForL0MetaPin(mutable_cf_options), /*smallest_compaction_key=*/nullptr, /*largest_compaction_key*/ nullptr, /*allow_unprepared_value*/ false)); s = it->status(); if (s.ok() && paranoid_file_checks) { for (it->SeekToFirst(); it->Valid(); it->Next()) { } s = it->status(); } } } // Check for input iterator errors if (!iter->status().ok()) { s = iter->status(); } if (!s.ok() || meta->fd.GetFileSize() == 0) { fs->DeleteFile(fname, IOOptions(), nullptr); } if (meta->fd.GetFileSize() == 0) { fname = "(nil)"; } // Output to event logger and fire events. EventHelpers::LogAndNotifyTableFileCreationFinished( event_logger, ioptions.listeners, dbname, column_family_name, fname, job_id, meta->fd, meta->oldest_blob_file_number, tp, reason, s); return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/builder.h000066400000000000000000000075241370372246700154410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "db/range_tombstone_fragmenter.h" #include "db/table_properties_collector.h" #include "logging/event_logger.h" #include "options/cf_options.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/listener.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "rocksdb/table_properties.h" #include "rocksdb/types.h" #include "table/scoped_arena_iterator.h" namespace ROCKSDB_NAMESPACE { struct Options; struct FileMetaData; class Env; struct EnvOptions; class Iterator; class SnapshotChecker; class TableCache; class VersionEdit; class TableBuilder; class WritableFileWriter; class InternalStats; // @param column_family_name Name of the column family that is also identified // by column_family_id, or empty string if unknown. It must outlive the // TableBuilder returned by this function. TableBuilder* NewTableBuilder( const ImmutableCFOptions& options, const MutableCFOptions& moptions, const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, const std::string& column_family_name, WritableFileWriter* file, const CompressionType compression_type, const uint64_t sample_for_compression, const CompressionOptions& compression_opts, int level, const bool skip_filters = false, const uint64_t creation_time = 0, const uint64_t oldest_key_time = 0, const uint64_t target_file_size = 0, const uint64_t file_creation_time = 0); // Build a Table file from the contents of *iter. The generated file // will be named according to number specified in meta. On success, the rest of // *meta will be filled with metadata about the generated table. // If no data is present in *iter, meta->file_size will be set to // zero, and no Table file will be produced. // // @param column_family_name Name of the column family that is also identified // by column_family_id, or empty string if unknown. extern Status BuildTable( const std::string& dbname, Env* env, FileSystem* fs, const ImmutableCFOptions& options, const MutableCFOptions& mutable_cf_options, const FileOptions& file_options, TableCache* table_cache, InternalIterator* iter, std::vector> range_del_iters, FileMetaData* meta, const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, const std::string& column_family_name, std::vector snapshots, SequenceNumber earliest_write_conflict_snapshot, SnapshotChecker* snapshot_checker, const CompressionType compression, const uint64_t sample_for_compression, const CompressionOptions& compression_opts, bool paranoid_file_checks, InternalStats* internal_stats, TableFileCreationReason reason, IOStatus* io_status, EventLogger* event_logger = nullptr, int job_id = 0, const Env::IOPriority io_priority = Env::IO_HIGH, TableProperties* table_properties = nullptr, int level = -1, const uint64_t creation_time = 0, const uint64_t oldest_key_time = 0, Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET, const uint64_t file_creation_time = 0); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/c.cc000066400000000000000000004521331370372246700143730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include "rocksdb/c.h" #include #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/comparator.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/iterator.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "rocksdb/universal_compaction.h" #include "rocksdb/utilities/backupable_db.h" #include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/db_ttl.h" #include "rocksdb/utilities/memory_util.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "rocksdb/write_batch.h" #include "rocksdb/perf_context.h" #include "utilities/merge_operators.h" #include #include #include using ROCKSDB_NAMESPACE::BackupableDBOptions; using ROCKSDB_NAMESPACE::BackupEngine; using ROCKSDB_NAMESPACE::BackupID; using ROCKSDB_NAMESPACE::BackupInfo; using ROCKSDB_NAMESPACE::BatchResult; using ROCKSDB_NAMESPACE::BlockBasedTableOptions; using ROCKSDB_NAMESPACE::BottommostLevelCompaction; using ROCKSDB_NAMESPACE::BytewiseComparator; using ROCKSDB_NAMESPACE::Cache; using ROCKSDB_NAMESPACE::Checkpoint; using ROCKSDB_NAMESPACE::ColumnFamilyDescriptor; using ROCKSDB_NAMESPACE::ColumnFamilyHandle; using ROCKSDB_NAMESPACE::ColumnFamilyOptions; using ROCKSDB_NAMESPACE::CompactionFilter; using ROCKSDB_NAMESPACE::CompactionFilterContext; using ROCKSDB_NAMESPACE::CompactionFilterFactory; using ROCKSDB_NAMESPACE::CompactionOptionsFIFO; using ROCKSDB_NAMESPACE::CompactRangeOptions; using ROCKSDB_NAMESPACE::Comparator; using ROCKSDB_NAMESPACE::CompressionType; using ROCKSDB_NAMESPACE::CuckooTableOptions; using ROCKSDB_NAMESPACE::DB; using ROCKSDB_NAMESPACE::DBOptions; using ROCKSDB_NAMESPACE::DbPath; using ROCKSDB_NAMESPACE::Env; using ROCKSDB_NAMESPACE::EnvOptions; using ROCKSDB_NAMESPACE::FileLock; using ROCKSDB_NAMESPACE::FilterPolicy; using ROCKSDB_NAMESPACE::FlushOptions; using ROCKSDB_NAMESPACE::InfoLogLevel; using ROCKSDB_NAMESPACE::IngestExternalFileOptions; using ROCKSDB_NAMESPACE::Iterator; using ROCKSDB_NAMESPACE::LiveFileMetaData; using ROCKSDB_NAMESPACE::Logger; using ROCKSDB_NAMESPACE::MemoryUtil; using ROCKSDB_NAMESPACE::MergeOperator; using ROCKSDB_NAMESPACE::MergeOperators; using ROCKSDB_NAMESPACE::NewBloomFilterPolicy; using ROCKSDB_NAMESPACE::NewGenericRateLimiter; using ROCKSDB_NAMESPACE::NewLRUCache; using ROCKSDB_NAMESPACE::OptimisticTransactionDB; using ROCKSDB_NAMESPACE::OptimisticTransactionOptions; using ROCKSDB_NAMESPACE::Options; using ROCKSDB_NAMESPACE::PerfContext; using ROCKSDB_NAMESPACE::PerfLevel; using ROCKSDB_NAMESPACE::PinnableSlice; using ROCKSDB_NAMESPACE::RandomAccessFile; using ROCKSDB_NAMESPACE::Range; using ROCKSDB_NAMESPACE::RateLimiter; using ROCKSDB_NAMESPACE::ReadOptions; using ROCKSDB_NAMESPACE::RestoreOptions; using ROCKSDB_NAMESPACE::SequentialFile; using ROCKSDB_NAMESPACE::Slice; using ROCKSDB_NAMESPACE::SliceParts; using ROCKSDB_NAMESPACE::SliceTransform; using ROCKSDB_NAMESPACE::Snapshot; using ROCKSDB_NAMESPACE::SstFileWriter; using ROCKSDB_NAMESPACE::Status; using ROCKSDB_NAMESPACE::Transaction; using ROCKSDB_NAMESPACE::TransactionDB; using ROCKSDB_NAMESPACE::TransactionDBOptions; using ROCKSDB_NAMESPACE::TransactionLogIterator; using ROCKSDB_NAMESPACE::TransactionOptions; using ROCKSDB_NAMESPACE::WALRecoveryMode; using ROCKSDB_NAMESPACE::WritableFile; using ROCKSDB_NAMESPACE::WriteBatch; using ROCKSDB_NAMESPACE::WriteBatchWithIndex; using ROCKSDB_NAMESPACE::WriteOptions; using std::shared_ptr; using std::vector; using std::unordered_set; using std::map; extern "C" { struct rocksdb_t { DB* rep; }; struct rocksdb_backup_engine_t { BackupEngine* rep; }; struct rocksdb_backup_engine_info_t { std::vector rep; }; struct rocksdb_restore_options_t { RestoreOptions rep; }; struct rocksdb_iterator_t { Iterator* rep; }; struct rocksdb_writebatch_t { WriteBatch rep; }; struct rocksdb_writebatch_wi_t { WriteBatchWithIndex* rep; }; struct rocksdb_snapshot_t { const Snapshot* rep; }; struct rocksdb_flushoptions_t { FlushOptions rep; }; struct rocksdb_fifo_compaction_options_t { CompactionOptionsFIFO rep; }; struct rocksdb_readoptions_t { ReadOptions rep; // stack variables to set pointers to in ReadOptions Slice upper_bound; Slice lower_bound; }; struct rocksdb_writeoptions_t { WriteOptions rep; }; struct rocksdb_options_t { Options rep; }; struct rocksdb_compactoptions_t { CompactRangeOptions rep; }; struct rocksdb_block_based_table_options_t { BlockBasedTableOptions rep; }; struct rocksdb_cuckoo_table_options_t { CuckooTableOptions rep; }; struct rocksdb_seqfile_t { SequentialFile* rep; }; struct rocksdb_randomfile_t { RandomAccessFile* rep; }; struct rocksdb_writablefile_t { WritableFile* rep; }; struct rocksdb_wal_iterator_t { TransactionLogIterator* rep; }; struct rocksdb_wal_readoptions_t { TransactionLogIterator::ReadOptions rep; }; struct rocksdb_filelock_t { FileLock* rep; }; struct rocksdb_logger_t { std::shared_ptr rep; }; struct rocksdb_cache_t { std::shared_ptr rep; }; struct rocksdb_livefiles_t { std::vector rep; }; struct rocksdb_column_family_handle_t { ColumnFamilyHandle* rep; }; struct rocksdb_envoptions_t { EnvOptions rep; }; struct rocksdb_ingestexternalfileoptions_t { IngestExternalFileOptions rep; }; struct rocksdb_sstfilewriter_t { SstFileWriter* rep; }; struct rocksdb_ratelimiter_t { std::shared_ptr rep; }; struct rocksdb_perfcontext_t { PerfContext* rep; }; struct rocksdb_pinnableslice_t { PinnableSlice rep; }; struct rocksdb_transactiondb_options_t { TransactionDBOptions rep; }; struct rocksdb_transactiondb_t { TransactionDB* rep; }; struct rocksdb_transaction_options_t { TransactionOptions rep; }; struct rocksdb_transaction_t { Transaction* rep; }; struct rocksdb_checkpoint_t { Checkpoint* rep; }; struct rocksdb_optimistictransactiondb_t { OptimisticTransactionDB* rep; }; struct rocksdb_optimistictransaction_options_t { OptimisticTransactionOptions rep; }; struct rocksdb_compactionfiltercontext_t { CompactionFilter::Context rep; }; struct rocksdb_compactionfilter_t : public CompactionFilter { void* state_; void (*destructor_)(void*); unsigned char (*filter_)( void*, int level, const char* key, size_t key_length, const char* existing_value, size_t value_length, char** new_value, size_t *new_value_length, unsigned char* value_changed); const char* (*name_)(void*); unsigned char ignore_snapshots_; ~rocksdb_compactionfilter_t() override { (*destructor_)(state_); } bool Filter(int level, const Slice& key, const Slice& existing_value, std::string* new_value, bool* value_changed) const override { char* c_new_value = nullptr; size_t new_value_length = 0; unsigned char c_value_changed = 0; unsigned char result = (*filter_)( state_, level, key.data(), key.size(), existing_value.data(), existing_value.size(), &c_new_value, &new_value_length, &c_value_changed); if (c_value_changed) { new_value->assign(c_new_value, new_value_length); *value_changed = true; } return result; } const char* Name() const override { return (*name_)(state_); } bool IgnoreSnapshots() const override { return ignore_snapshots_; } }; struct rocksdb_compactionfilterfactory_t : public CompactionFilterFactory { void* state_; void (*destructor_)(void*); rocksdb_compactionfilter_t* (*create_compaction_filter_)( void*, rocksdb_compactionfiltercontext_t* context); const char* (*name_)(void*); ~rocksdb_compactionfilterfactory_t() override { (*destructor_)(state_); } std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { rocksdb_compactionfiltercontext_t ccontext; ccontext.rep = context; CompactionFilter* cf = (*create_compaction_filter_)(state_, &ccontext); return std::unique_ptr(cf); } const char* Name() const override { return (*name_)(state_); } }; struct rocksdb_comparator_t : public Comparator { void* state_; void (*destructor_)(void*); int (*compare_)( void*, const char* a, size_t alen, const char* b, size_t blen); const char* (*name_)(void*); ~rocksdb_comparator_t() override { (*destructor_)(state_); } int Compare(const Slice& a, const Slice& b) const override { return (*compare_)(state_, a.data(), a.size(), b.data(), b.size()); } const char* Name() const override { return (*name_)(state_); } // No-ops since the C binding does not support key shortening methods. void FindShortestSeparator(std::string*, const Slice&) const override {} void FindShortSuccessor(std::string* /*key*/) const override {} }; struct rocksdb_filterpolicy_t : public FilterPolicy { void* state_; void (*destructor_)(void*); const char* (*name_)(void*); char* (*create_)( void*, const char* const* key_array, const size_t* key_length_array, int num_keys, size_t* filter_length); unsigned char (*key_match_)( void*, const char* key, size_t length, const char* filter, size_t filter_length); void (*delete_filter_)( void*, const char* filter, size_t filter_length); ~rocksdb_filterpolicy_t() override { (*destructor_)(state_); } const char* Name() const override { return (*name_)(state_); } void CreateFilter(const Slice* keys, int n, std::string* dst) const override { std::vector key_pointers(n); std::vector key_sizes(n); for (int i = 0; i < n; i++) { key_pointers[i] = keys[i].data(); key_sizes[i] = keys[i].size(); } size_t len; char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len); dst->append(filter, len); if (delete_filter_ != nullptr) { (*delete_filter_)(state_, filter, len); } else { free(filter); } } bool KeyMayMatch(const Slice& key, const Slice& filter) const override { return (*key_match_)(state_, key.data(), key.size(), filter.data(), filter.size()); } }; struct rocksdb_mergeoperator_t : public MergeOperator { void* state_; void (*destructor_)(void*); const char* (*name_)(void*); char* (*full_merge_)( void*, const char* key, size_t key_length, const char* existing_value, size_t existing_value_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length); char* (*partial_merge_)(void*, const char* key, size_t key_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length); void (*delete_value_)( void*, const char* value, size_t value_length); ~rocksdb_mergeoperator_t() override { (*destructor_)(state_); } const char* Name() const override { return (*name_)(state_); } bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override { size_t n = merge_in.operand_list.size(); std::vector operand_pointers(n); std::vector operand_sizes(n); for (size_t i = 0; i < n; i++) { Slice operand(merge_in.operand_list[i]); operand_pointers[i] = operand.data(); operand_sizes[i] = operand.size(); } const char* existing_value_data = nullptr; size_t existing_value_len = 0; if (merge_in.existing_value != nullptr) { existing_value_data = merge_in.existing_value->data(); existing_value_len = merge_in.existing_value->size(); } unsigned char success; size_t new_value_len; char* tmp_new_value = (*full_merge_)( state_, merge_in.key.data(), merge_in.key.size(), existing_value_data, existing_value_len, &operand_pointers[0], &operand_sizes[0], static_cast(n), &success, &new_value_len); merge_out->new_value.assign(tmp_new_value, new_value_len); if (delete_value_ != nullptr) { (*delete_value_)(state_, tmp_new_value, new_value_len); } else { free(tmp_new_value); } return success; } bool PartialMergeMulti(const Slice& key, const std::deque& operand_list, std::string* new_value, Logger* /*logger*/) const override { size_t operand_count = operand_list.size(); std::vector operand_pointers(operand_count); std::vector operand_sizes(operand_count); for (size_t i = 0; i < operand_count; ++i) { Slice operand(operand_list[i]); operand_pointers[i] = operand.data(); operand_sizes[i] = operand.size(); } unsigned char success; size_t new_value_len; char* tmp_new_value = (*partial_merge_)( state_, key.data(), key.size(), &operand_pointers[0], &operand_sizes[0], static_cast(operand_count), &success, &new_value_len); new_value->assign(tmp_new_value, new_value_len); if (delete_value_ != nullptr) { (*delete_value_)(state_, tmp_new_value, new_value_len); } else { free(tmp_new_value); } return success; } }; struct rocksdb_dbpath_t { DbPath rep; }; struct rocksdb_env_t { Env* rep; bool is_default; }; struct rocksdb_slicetransform_t : public SliceTransform { void* state_; void (*destructor_)(void*); const char* (*name_)(void*); char* (*transform_)( void*, const char* key, size_t length, size_t* dst_length); unsigned char (*in_domain_)( void*, const char* key, size_t length); unsigned char (*in_range_)( void*, const char* key, size_t length); ~rocksdb_slicetransform_t() override { (*destructor_)(state_); } const char* Name() const override { return (*name_)(state_); } Slice Transform(const Slice& src) const override { size_t len; char* dst = (*transform_)(state_, src.data(), src.size(), &len); return Slice(dst, len); } bool InDomain(const Slice& src) const override { return (*in_domain_)(state_, src.data(), src.size()); } bool InRange(const Slice& src) const override { return (*in_range_)(state_, src.data(), src.size()); } }; struct rocksdb_universal_compaction_options_t { ROCKSDB_NAMESPACE::CompactionOptionsUniversal* rep; }; static bool SaveError(char** errptr, const Status& s) { assert(errptr != nullptr); if (s.ok()) { return false; } else if (*errptr == nullptr) { *errptr = strdup(s.ToString().c_str()); } else { // TODO(sanjay): Merge with existing error? // This is a bug if *errptr is not created by malloc() free(*errptr); *errptr = strdup(s.ToString().c_str()); } return true; } static char* CopyString(const std::string& str) { char* result = reinterpret_cast(malloc(sizeof(char) * str.size())); memcpy(result, str.data(), sizeof(char) * str.size()); return result; } rocksdb_t* rocksdb_open( const rocksdb_options_t* options, const char* name, char** errptr) { DB* db; if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) { return nullptr; } rocksdb_t* result = new rocksdb_t; result->rep = db; return result; } rocksdb_t* rocksdb_open_with_ttl( const rocksdb_options_t* options, const char* name, int ttl, char** errptr) { ROCKSDB_NAMESPACE::DBWithTTL* db; if (SaveError(errptr, ROCKSDB_NAMESPACE::DBWithTTL::Open( options->rep, std::string(name), &db, ttl))) { return nullptr; } rocksdb_t* result = new rocksdb_t; result->rep = db; return result; } rocksdb_t* rocksdb_open_for_read_only( const rocksdb_options_t* options, const char* name, unsigned char error_if_log_file_exist, char** errptr) { DB* db; if (SaveError(errptr, DB::OpenForReadOnly(options->rep, std::string(name), &db, error_if_log_file_exist))) { return nullptr; } rocksdb_t* result = new rocksdb_t; result->rep = db; return result; } rocksdb_t* rocksdb_open_as_secondary(const rocksdb_options_t* options, const char* name, const char* secondary_path, char** errptr) { DB* db; if (SaveError(errptr, DB::OpenAsSecondary(options->rep, std::string(name), std::string(secondary_path), &db))) { return nullptr; } rocksdb_t* result = new rocksdb_t; result->rep = db; return result; } rocksdb_backup_engine_t* rocksdb_backup_engine_open( const rocksdb_options_t* options, const char* path, char** errptr) { BackupEngine* be; if (SaveError(errptr, BackupEngine::Open(options->rep.env, BackupableDBOptions(path, nullptr, true, options->rep.info_log.get()), &be))) { return nullptr; } rocksdb_backup_engine_t* result = new rocksdb_backup_engine_t; result->rep = be; return result; } void rocksdb_backup_engine_create_new_backup(rocksdb_backup_engine_t* be, rocksdb_t* db, char** errptr) { SaveError(errptr, be->rep->CreateNewBackup(db->rep)); } void rocksdb_backup_engine_create_new_backup_flush(rocksdb_backup_engine_t* be, rocksdb_t* db, unsigned char flush_before_backup, char** errptr) { SaveError(errptr, be->rep->CreateNewBackup(db->rep, flush_before_backup)); } void rocksdb_backup_engine_purge_old_backups(rocksdb_backup_engine_t* be, uint32_t num_backups_to_keep, char** errptr) { SaveError(errptr, be->rep->PurgeOldBackups(num_backups_to_keep)); } rocksdb_restore_options_t* rocksdb_restore_options_create() { return new rocksdb_restore_options_t; } void rocksdb_restore_options_destroy(rocksdb_restore_options_t* opt) { delete opt; } void rocksdb_restore_options_set_keep_log_files(rocksdb_restore_options_t* opt, int v) { opt->rep.keep_log_files = v; } void rocksdb_backup_engine_verify_backup(rocksdb_backup_engine_t* be, uint32_t backup_id, char** errptr) { SaveError(errptr, be->rep->VerifyBackup(static_cast(backup_id))); } void rocksdb_backup_engine_restore_db_from_latest_backup( rocksdb_backup_engine_t* be, const char* db_dir, const char* wal_dir, const rocksdb_restore_options_t* restore_options, char** errptr) { SaveError(errptr, be->rep->RestoreDBFromLatestBackup(std::string(db_dir), std::string(wal_dir), restore_options->rep)); } const rocksdb_backup_engine_info_t* rocksdb_backup_engine_get_backup_info( rocksdb_backup_engine_t* be) { rocksdb_backup_engine_info_t* result = new rocksdb_backup_engine_info_t; be->rep->GetBackupInfo(&result->rep); return result; } int rocksdb_backup_engine_info_count(const rocksdb_backup_engine_info_t* info) { return static_cast(info->rep.size()); } int64_t rocksdb_backup_engine_info_timestamp( const rocksdb_backup_engine_info_t* info, int index) { return info->rep[index].timestamp; } uint32_t rocksdb_backup_engine_info_backup_id( const rocksdb_backup_engine_info_t* info, int index) { return info->rep[index].backup_id; } uint64_t rocksdb_backup_engine_info_size( const rocksdb_backup_engine_info_t* info, int index) { return info->rep[index].size; } uint32_t rocksdb_backup_engine_info_number_files( const rocksdb_backup_engine_info_t* info, int index) { return info->rep[index].number_files; } void rocksdb_backup_engine_info_destroy( const rocksdb_backup_engine_info_t* info) { delete info; } void rocksdb_backup_engine_close(rocksdb_backup_engine_t* be) { delete be->rep; delete be; } rocksdb_checkpoint_t* rocksdb_checkpoint_object_create(rocksdb_t* db, char** errptr) { Checkpoint* checkpoint; if (SaveError(errptr, Checkpoint::Create(db->rep, &checkpoint))) { return nullptr; } rocksdb_checkpoint_t* result = new rocksdb_checkpoint_t; result->rep = checkpoint; return result; } void rocksdb_checkpoint_create(rocksdb_checkpoint_t* checkpoint, const char* checkpoint_dir, uint64_t log_size_for_flush, char** errptr) { SaveError(errptr, checkpoint->rep->CreateCheckpoint( std::string(checkpoint_dir), log_size_for_flush)); } void rocksdb_checkpoint_object_destroy(rocksdb_checkpoint_t* checkpoint) { delete checkpoint->rep; delete checkpoint; } void rocksdb_close(rocksdb_t* db) { delete db->rep; delete db; } void rocksdb_options_set_uint64add_merge_operator(rocksdb_options_t* opt) { opt->rep.merge_operator = ROCKSDB_NAMESPACE::MergeOperators::CreateUInt64AddOperator(); } rocksdb_t* rocksdb_open_column_families( const rocksdb_options_t* db_options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, char** errptr) { std::vector column_families; for (int i = 0; i < num_column_families; i++) { column_families.push_back(ColumnFamilyDescriptor( std::string(column_family_names[i]), ColumnFamilyOptions(column_family_options[i]->rep))); } DB* db; std::vector handles; if (SaveError(errptr, DB::Open(DBOptions(db_options->rep), std::string(name), column_families, &handles, &db))) { return nullptr; } for (size_t i = 0; i < handles.size(); i++) { rocksdb_column_family_handle_t* c_handle = new rocksdb_column_family_handle_t; c_handle->rep = handles[i]; column_family_handles[i] = c_handle; } rocksdb_t* result = new rocksdb_t; result->rep = db; return result; } rocksdb_t* rocksdb_open_for_read_only_column_families( const rocksdb_options_t* db_options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, unsigned char error_if_log_file_exist, char** errptr) { std::vector column_families; for (int i = 0; i < num_column_families; i++) { column_families.push_back(ColumnFamilyDescriptor( std::string(column_family_names[i]), ColumnFamilyOptions(column_family_options[i]->rep))); } DB* db; std::vector handles; if (SaveError(errptr, DB::OpenForReadOnly(DBOptions(db_options->rep), std::string(name), column_families, &handles, &db, error_if_log_file_exist))) { return nullptr; } for (size_t i = 0; i < handles.size(); i++) { rocksdb_column_family_handle_t* c_handle = new rocksdb_column_family_handle_t; c_handle->rep = handles[i]; column_family_handles[i] = c_handle; } rocksdb_t* result = new rocksdb_t; result->rep = db; return result; } rocksdb_t* rocksdb_open_as_secondary_column_families( const rocksdb_options_t* db_options, const char* name, const char* secondary_path, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, char** errptr) { std::vector column_families; for (int i = 0; i != num_column_families; ++i) { column_families.emplace_back( std::string(column_family_names[i]), ColumnFamilyOptions(column_family_options[i]->rep)); } DB* db; std::vector handles; if (SaveError(errptr, DB::OpenAsSecondary(DBOptions(db_options->rep), std::string(name), std::string(secondary_path), column_families, &handles, &db))) { return nullptr; } for (size_t i = 0; i != handles.size(); ++i) { rocksdb_column_family_handle_t* c_handle = new rocksdb_column_family_handle_t; c_handle->rep = handles[i]; column_family_handles[i] = c_handle; } rocksdb_t* result = new rocksdb_t; result->rep = db; return result; } char** rocksdb_list_column_families( const rocksdb_options_t* options, const char* name, size_t* lencfs, char** errptr) { std::vector fams; SaveError(errptr, DB::ListColumnFamilies(DBOptions(options->rep), std::string(name), &fams)); *lencfs = fams.size(); char** column_families = static_cast(malloc(sizeof(char*) * fams.size())); for (size_t i = 0; i < fams.size(); i++) { column_families[i] = strdup(fams[i].c_str()); } return column_families; } void rocksdb_list_column_families_destroy(char** list, size_t len) { for (size_t i = 0; i < len; ++i) { free(list[i]); } free(list); } rocksdb_column_family_handle_t* rocksdb_create_column_family( rocksdb_t* db, const rocksdb_options_t* column_family_options, const char* column_family_name, char** errptr) { rocksdb_column_family_handle_t* handle = new rocksdb_column_family_handle_t; SaveError(errptr, db->rep->CreateColumnFamily(ColumnFamilyOptions(column_family_options->rep), std::string(column_family_name), &(handle->rep))); return handle; } void rocksdb_drop_column_family( rocksdb_t* db, rocksdb_column_family_handle_t* handle, char** errptr) { SaveError(errptr, db->rep->DropColumnFamily(handle->rep)); } void rocksdb_column_family_handle_destroy(rocksdb_column_family_handle_t* handle) { delete handle->rep; delete handle; } void rocksdb_put( rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen))); } void rocksdb_put_cf( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, db->rep->Put(options->rep, column_family->rep, Slice(key, keylen), Slice(val, vallen))); } void rocksdb_delete( rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key, size_t keylen, char** errptr) { SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen))); } void rocksdb_delete_cf( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, char** errptr) { SaveError(errptr, db->rep->Delete(options->rep, column_family->rep, Slice(key, keylen))); } void rocksdb_delete_range_cf(rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len, char** errptr) { SaveError(errptr, db->rep->DeleteRange(options->rep, column_family->rep, Slice(start_key, start_key_len), Slice(end_key, end_key_len))); } void rocksdb_merge( rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, db->rep->Merge(options->rep, Slice(key, keylen), Slice(val, vallen))); } void rocksdb_merge_cf( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, db->rep->Merge(options->rep, column_family->rep, Slice(key, keylen), Slice(val, vallen))); } void rocksdb_write( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_writebatch_t* batch, char** errptr) { SaveError(errptr, db->rep->Write(options->rep, &batch->rep)); } char* rocksdb_get( rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, size_t keylen, size_t* vallen, char** errptr) { char* result = nullptr; std::string tmp; Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp); if (s.ok()) { *vallen = tmp.size(); result = CopyString(tmp); } else { *vallen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } char* rocksdb_get_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr) { char* result = nullptr; std::string tmp; Status s = db->rep->Get(options->rep, column_family->rep, Slice(key, keylen), &tmp); if (s.ok()) { *vallen = tmp.size(); result = CopyString(tmp); } else { *vallen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } void rocksdb_multi_get( rocksdb_t* db, const rocksdb_readoptions_t* options, size_t num_keys, const char* const* keys_list, const size_t* keys_list_sizes, char** values_list, size_t* values_list_sizes, char** errs) { std::vector keys(num_keys); for (size_t i = 0; i < num_keys; i++) { keys[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector values(num_keys); std::vector statuses = db->rep->MultiGet(options->rep, keys, &values); for (size_t i = 0; i < num_keys; i++) { if (statuses[i].ok()) { values_list[i] = CopyString(values[i]); values_list_sizes[i] = values[i].size(); errs[i] = nullptr; } else { values_list[i] = nullptr; values_list_sizes[i] = 0; if (!statuses[i].IsNotFound()) { errs[i] = strdup(statuses[i].ToString().c_str()); } else { errs[i] = nullptr; } } } } void rocksdb_multi_get_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, const rocksdb_column_family_handle_t* const* column_families, size_t num_keys, const char* const* keys_list, const size_t* keys_list_sizes, char** values_list, size_t* values_list_sizes, char** errs) { std::vector keys(num_keys); std::vector cfs(num_keys); for (size_t i = 0; i < num_keys; i++) { keys[i] = Slice(keys_list[i], keys_list_sizes[i]); cfs[i] = column_families[i]->rep; } std::vector values(num_keys); std::vector statuses = db->rep->MultiGet(options->rep, cfs, keys, &values); for (size_t i = 0; i < num_keys; i++) { if (statuses[i].ok()) { values_list[i] = CopyString(values[i]); values_list_sizes[i] = values[i].size(); errs[i] = nullptr; } else { values_list[i] = nullptr; values_list_sizes[i] = 0; if (!statuses[i].IsNotFound()) { errs[i] = strdup(statuses[i].ToString().c_str()); } else { errs[i] = nullptr; } } } } rocksdb_iterator_t* rocksdb_create_iterator( rocksdb_t* db, const rocksdb_readoptions_t* options) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = db->rep->NewIterator(options->rep); return result; } rocksdb_wal_iterator_t* rocksdb_get_updates_since( rocksdb_t* db, uint64_t seq_number, const rocksdb_wal_readoptions_t* options, char** errptr) { std::unique_ptr iter; TransactionLogIterator::ReadOptions ro; if (options!=nullptr) { ro = options->rep; } if (SaveError(errptr, db->rep->GetUpdatesSince(seq_number, &iter, ro))) { return nullptr; } rocksdb_wal_iterator_t* result = new rocksdb_wal_iterator_t; result->rep = iter.release(); return result; } void rocksdb_wal_iter_next(rocksdb_wal_iterator_t* iter) { iter->rep->Next(); } unsigned char rocksdb_wal_iter_valid(const rocksdb_wal_iterator_t* iter) { return iter->rep->Valid(); } void rocksdb_wal_iter_status (const rocksdb_wal_iterator_t* iter, char** errptr) { SaveError(errptr, iter->rep->status()); } void rocksdb_wal_iter_destroy (const rocksdb_wal_iterator_t* iter) { delete iter->rep; delete iter; } rocksdb_writebatch_t* rocksdb_wal_iter_get_batch (const rocksdb_wal_iterator_t* iter, uint64_t* seq) { rocksdb_writebatch_t* result = rocksdb_writebatch_create(); BatchResult wal_batch = iter->rep->GetBatch(); result->rep = std::move(*wal_batch.writeBatchPtr); if (seq != nullptr) { *seq = wal_batch.sequence; } return result; } uint64_t rocksdb_get_latest_sequence_number (rocksdb_t *db) { return db->rep->GetLatestSequenceNumber(); } rocksdb_iterator_t* rocksdb_create_iterator_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = db->rep->NewIterator(options->rep, column_family->rep); return result; } void rocksdb_create_iterators( rocksdb_t *db, rocksdb_readoptions_t* opts, rocksdb_column_family_handle_t** column_families, rocksdb_iterator_t** iterators, size_t size, char** errptr) { std::vector column_families_vec; for (size_t i = 0; i < size; i++) { column_families_vec.push_back(column_families[i]->rep); } std::vector res; Status status = db->rep->NewIterators(opts->rep, column_families_vec, &res); assert(res.size() == size); if (SaveError(errptr, status)) { return; } for (size_t i = 0; i < size; i++) { iterators[i] = new rocksdb_iterator_t; iterators[i]->rep = res[i]; } } const rocksdb_snapshot_t* rocksdb_create_snapshot( rocksdb_t* db) { rocksdb_snapshot_t* result = new rocksdb_snapshot_t; result->rep = db->rep->GetSnapshot(); return result; } void rocksdb_release_snapshot( rocksdb_t* db, const rocksdb_snapshot_t* snapshot) { db->rep->ReleaseSnapshot(snapshot->rep); delete snapshot; } char* rocksdb_property_value( rocksdb_t* db, const char* propname) { std::string tmp; if (db->rep->GetProperty(Slice(propname), &tmp)) { // We use strdup() since we expect human readable output. return strdup(tmp.c_str()); } else { return nullptr; } } int rocksdb_property_int( rocksdb_t* db, const char* propname, uint64_t *out_val) { if (db->rep->GetIntProperty(Slice(propname), out_val)) { return 0; } else { return -1; } } int rocksdb_property_int_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* propname, uint64_t *out_val) { if (db->rep->GetIntProperty(column_family->rep, Slice(propname), out_val)) { return 0; } else { return -1; } } char* rocksdb_property_value_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* propname) { std::string tmp; if (db->rep->GetProperty(column_family->rep, Slice(propname), &tmp)) { // We use strdup() since we expect human readable output. return strdup(tmp.c_str()); } else { return nullptr; } } void rocksdb_approximate_sizes( rocksdb_t* db, int num_ranges, const char* const* range_start_key, const size_t* range_start_key_len, const char* const* range_limit_key, const size_t* range_limit_key_len, uint64_t* sizes) { Range* ranges = new Range[num_ranges]; for (int i = 0; i < num_ranges; i++) { ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]); ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]); } db->rep->GetApproximateSizes(ranges, num_ranges, sizes); delete[] ranges; } void rocksdb_approximate_sizes_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, int num_ranges, const char* const* range_start_key, const size_t* range_start_key_len, const char* const* range_limit_key, const size_t* range_limit_key_len, uint64_t* sizes) { Range* ranges = new Range[num_ranges]; for (int i = 0; i < num_ranges; i++) { ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]); ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]); } db->rep->GetApproximateSizes(column_family->rep, ranges, num_ranges, sizes); delete[] ranges; } void rocksdb_delete_file( rocksdb_t* db, const char* name) { db->rep->DeleteFile(name); } const rocksdb_livefiles_t* rocksdb_livefiles( rocksdb_t* db) { rocksdb_livefiles_t* result = new rocksdb_livefiles_t; db->rep->GetLiveFilesMetaData(&result->rep); return result; } void rocksdb_compact_range( rocksdb_t* db, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len) { Slice a, b; db->rep->CompactRange( CompactRangeOptions(), // Pass nullptr Slice if corresponding "const char*" is nullptr (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr), (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr)); } void rocksdb_compact_range_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len) { Slice a, b; db->rep->CompactRange( CompactRangeOptions(), column_family->rep, // Pass nullptr Slice if corresponding "const char*" is nullptr (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr), (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr)); } void rocksdb_compact_range_opt(rocksdb_t* db, rocksdb_compactoptions_t* opt, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len) { Slice a, b; db->rep->CompactRange( opt->rep, // Pass nullptr Slice if corresponding "const char*" is nullptr (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr), (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr)); } void rocksdb_compact_range_cf_opt(rocksdb_t* db, rocksdb_column_family_handle_t* column_family, rocksdb_compactoptions_t* opt, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len) { Slice a, b; db->rep->CompactRange( opt->rep, column_family->rep, // Pass nullptr Slice if corresponding "const char*" is nullptr (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr), (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr)); } void rocksdb_flush( rocksdb_t* db, const rocksdb_flushoptions_t* options, char** errptr) { SaveError(errptr, db->rep->Flush(options->rep)); } void rocksdb_flush_cf( rocksdb_t* db, const rocksdb_flushoptions_t* options, rocksdb_column_family_handle_t* column_family, char** errptr) { SaveError(errptr, db->rep->Flush(options->rep, column_family->rep)); } void rocksdb_disable_file_deletions( rocksdb_t* db, char** errptr) { SaveError(errptr, db->rep->DisableFileDeletions()); } void rocksdb_enable_file_deletions( rocksdb_t* db, unsigned char force, char** errptr) { SaveError(errptr, db->rep->EnableFileDeletions(force)); } void rocksdb_destroy_db( const rocksdb_options_t* options, const char* name, char** errptr) { SaveError(errptr, DestroyDB(name, options->rep)); } void rocksdb_repair_db( const rocksdb_options_t* options, const char* name, char** errptr) { SaveError(errptr, RepairDB(name, options->rep)); } void rocksdb_iter_destroy(rocksdb_iterator_t* iter) { delete iter->rep; delete iter; } unsigned char rocksdb_iter_valid(const rocksdb_iterator_t* iter) { return iter->rep->Valid(); } void rocksdb_iter_seek_to_first(rocksdb_iterator_t* iter) { iter->rep->SeekToFirst(); } void rocksdb_iter_seek_to_last(rocksdb_iterator_t* iter) { iter->rep->SeekToLast(); } void rocksdb_iter_seek(rocksdb_iterator_t* iter, const char* k, size_t klen) { iter->rep->Seek(Slice(k, klen)); } void rocksdb_iter_seek_for_prev(rocksdb_iterator_t* iter, const char* k, size_t klen) { iter->rep->SeekForPrev(Slice(k, klen)); } void rocksdb_iter_next(rocksdb_iterator_t* iter) { iter->rep->Next(); } void rocksdb_iter_prev(rocksdb_iterator_t* iter) { iter->rep->Prev(); } const char* rocksdb_iter_key(const rocksdb_iterator_t* iter, size_t* klen) { Slice s = iter->rep->key(); *klen = s.size(); return s.data(); } const char* rocksdb_iter_value(const rocksdb_iterator_t* iter, size_t* vlen) { Slice s = iter->rep->value(); *vlen = s.size(); return s.data(); } void rocksdb_iter_get_error(const rocksdb_iterator_t* iter, char** errptr) { SaveError(errptr, iter->rep->status()); } rocksdb_writebatch_t* rocksdb_writebatch_create() { return new rocksdb_writebatch_t; } rocksdb_writebatch_t* rocksdb_writebatch_create_from(const char* rep, size_t size) { rocksdb_writebatch_t* b = new rocksdb_writebatch_t; b->rep = WriteBatch(std::string(rep, size)); return b; } void rocksdb_writebatch_destroy(rocksdb_writebatch_t* b) { delete b; } void rocksdb_writebatch_clear(rocksdb_writebatch_t* b) { b->rep.Clear(); } int rocksdb_writebatch_count(rocksdb_writebatch_t* b) { return b->rep.Count(); } void rocksdb_writebatch_put( rocksdb_writebatch_t* b, const char* key, size_t klen, const char* val, size_t vlen) { b->rep.Put(Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_put_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen) { b->rep.Put(column_family->rep, Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_putv( rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep.Put(SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_putv_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep.Put(column_family->rep, SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_merge( rocksdb_writebatch_t* b, const char* key, size_t klen, const char* val, size_t vlen) { b->rep.Merge(Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_merge_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen) { b->rep.Merge(column_family->rep, Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_mergev( rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep.Merge(SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_mergev_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep.Merge(column_family->rep, SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_delete( rocksdb_writebatch_t* b, const char* key, size_t klen) { b->rep.Delete(Slice(key, klen)); } void rocksdb_writebatch_singledelete(rocksdb_writebatch_t* b, const char* key, size_t klen) { b->rep.SingleDelete(Slice(key, klen)); } void rocksdb_writebatch_delete_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen) { b->rep.Delete(column_family->rep, Slice(key, klen)); } void rocksdb_writebatch_singledelete_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen) { b->rep.SingleDelete(column_family->rep, Slice(key, klen)); } void rocksdb_writebatch_deletev( rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } b->rep.Delete(SliceParts(key_slices.data(), num_keys)); } void rocksdb_writebatch_deletev_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } b->rep.Delete(column_family->rep, SliceParts(key_slices.data(), num_keys)); } void rocksdb_writebatch_delete_range(rocksdb_writebatch_t* b, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len) { b->rep.DeleteRange(Slice(start_key, start_key_len), Slice(end_key, end_key_len)); } void rocksdb_writebatch_delete_range_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len) { b->rep.DeleteRange(column_family->rep, Slice(start_key, start_key_len), Slice(end_key, end_key_len)); } void rocksdb_writebatch_delete_rangev(rocksdb_writebatch_t* b, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes) { std::vector start_key_slices(num_keys); std::vector end_key_slices(num_keys); for (int i = 0; i < num_keys; i++) { start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]); end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]); } b->rep.DeleteRange(SliceParts(start_key_slices.data(), num_keys), SliceParts(end_key_slices.data(), num_keys)); } void rocksdb_writebatch_delete_rangev_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes) { std::vector start_key_slices(num_keys); std::vector end_key_slices(num_keys); for (int i = 0; i < num_keys; i++) { start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]); end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]); } b->rep.DeleteRange(column_family->rep, SliceParts(start_key_slices.data(), num_keys), SliceParts(end_key_slices.data(), num_keys)); } void rocksdb_writebatch_put_log_data( rocksdb_writebatch_t* b, const char* blob, size_t len) { b->rep.PutLogData(Slice(blob, len)); } class H : public WriteBatch::Handler { public: void* state_; void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen); void (*deleted_)(void*, const char* k, size_t klen); void Put(const Slice& key, const Slice& value) override { (*put_)(state_, key.data(), key.size(), value.data(), value.size()); } void Delete(const Slice& key) override { (*deleted_)(state_, key.data(), key.size()); } }; void rocksdb_writebatch_iterate( rocksdb_writebatch_t* b, void* state, void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), void (*deleted)(void*, const char* k, size_t klen)) { H handler; handler.state_ = state; handler.put_ = put; handler.deleted_ = deleted; b->rep.Iterate(&handler); } const char* rocksdb_writebatch_data(rocksdb_writebatch_t* b, size_t* size) { *size = b->rep.GetDataSize(); return b->rep.Data().c_str(); } void rocksdb_writebatch_set_save_point(rocksdb_writebatch_t* b) { b->rep.SetSavePoint(); } void rocksdb_writebatch_rollback_to_save_point(rocksdb_writebatch_t* b, char** errptr) { SaveError(errptr, b->rep.RollbackToSavePoint()); } void rocksdb_writebatch_pop_save_point(rocksdb_writebatch_t* b, char** errptr) { SaveError(errptr, b->rep.PopSavePoint()); } rocksdb_writebatch_wi_t* rocksdb_writebatch_wi_create(size_t reserved_bytes, unsigned char overwrite_key) { rocksdb_writebatch_wi_t* b = new rocksdb_writebatch_wi_t; b->rep = new WriteBatchWithIndex(BytewiseComparator(), reserved_bytes, overwrite_key); return b; } void rocksdb_writebatch_wi_destroy(rocksdb_writebatch_wi_t* b) { if (b->rep) { delete b->rep; } delete b; } void rocksdb_writebatch_wi_clear(rocksdb_writebatch_wi_t* b) { b->rep->Clear(); } int rocksdb_writebatch_wi_count(rocksdb_writebatch_wi_t* b) { return b->rep->GetWriteBatch()->Count(); } void rocksdb_writebatch_wi_put( rocksdb_writebatch_wi_t* b, const char* key, size_t klen, const char* val, size_t vlen) { b->rep->Put(Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_wi_put_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen) { b->rep->Put(column_family->rep, Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_wi_putv( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep->Put(SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_wi_putv_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep->Put(column_family->rep, SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_wi_merge( rocksdb_writebatch_wi_t* b, const char* key, size_t klen, const char* val, size_t vlen) { b->rep->Merge(Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_wi_merge_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen) { b->rep->Merge(column_family->rep, Slice(key, klen), Slice(val, vlen)); } void rocksdb_writebatch_wi_mergev( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep->Merge(SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_wi_mergev_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } std::vector value_slices(num_values); for (int i = 0; i < num_values; i++) { value_slices[i] = Slice(values_list[i], values_list_sizes[i]); } b->rep->Merge(column_family->rep, SliceParts(key_slices.data(), num_keys), SliceParts(value_slices.data(), num_values)); } void rocksdb_writebatch_wi_delete( rocksdb_writebatch_wi_t* b, const char* key, size_t klen) { b->rep->Delete(Slice(key, klen)); } void rocksdb_writebatch_wi_singledelete(rocksdb_writebatch_wi_t* b, const char* key, size_t klen) { b->rep->SingleDelete(Slice(key, klen)); } void rocksdb_writebatch_wi_delete_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen) { b->rep->Delete(column_family->rep, Slice(key, klen)); } void rocksdb_writebatch_wi_singledelete_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen) { b->rep->SingleDelete(column_family->rep, Slice(key, klen)); } void rocksdb_writebatch_wi_deletev( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } b->rep->Delete(SliceParts(key_slices.data(), num_keys)); } void rocksdb_writebatch_wi_deletev_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes) { std::vector key_slices(num_keys); for (int i = 0; i < num_keys; i++) { key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]); } b->rep->Delete(column_family->rep, SliceParts(key_slices.data(), num_keys)); } void rocksdb_writebatch_wi_delete_range(rocksdb_writebatch_wi_t* b, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len) { b->rep->DeleteRange(Slice(start_key, start_key_len), Slice(end_key, end_key_len)); } void rocksdb_writebatch_wi_delete_range_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len) { b->rep->DeleteRange(column_family->rep, Slice(start_key, start_key_len), Slice(end_key, end_key_len)); } void rocksdb_writebatch_wi_delete_rangev(rocksdb_writebatch_wi_t* b, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes) { std::vector start_key_slices(num_keys); std::vector end_key_slices(num_keys); for (int i = 0; i < num_keys; i++) { start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]); end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]); } b->rep->DeleteRange(SliceParts(start_key_slices.data(), num_keys), SliceParts(end_key_slices.data(), num_keys)); } void rocksdb_writebatch_wi_delete_rangev_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes) { std::vector start_key_slices(num_keys); std::vector end_key_slices(num_keys); for (int i = 0; i < num_keys; i++) { start_key_slices[i] = Slice(start_keys_list[i], start_keys_list_sizes[i]); end_key_slices[i] = Slice(end_keys_list[i], end_keys_list_sizes[i]); } b->rep->DeleteRange(column_family->rep, SliceParts(start_key_slices.data(), num_keys), SliceParts(end_key_slices.data(), num_keys)); } void rocksdb_writebatch_wi_put_log_data( rocksdb_writebatch_wi_t* b, const char* blob, size_t len) { b->rep->PutLogData(Slice(blob, len)); } void rocksdb_writebatch_wi_iterate( rocksdb_writebatch_wi_t* b, void* state, void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), void (*deleted)(void*, const char* k, size_t klen)) { H handler; handler.state_ = state; handler.put_ = put; handler.deleted_ = deleted; b->rep->GetWriteBatch()->Iterate(&handler); } const char* rocksdb_writebatch_wi_data(rocksdb_writebatch_wi_t* b, size_t* size) { WriteBatch* wb = b->rep->GetWriteBatch(); *size = wb->GetDataSize(); return wb->Data().c_str(); } void rocksdb_writebatch_wi_set_save_point(rocksdb_writebatch_wi_t* b) { b->rep->SetSavePoint(); } void rocksdb_writebatch_wi_rollback_to_save_point(rocksdb_writebatch_wi_t* b, char** errptr) { SaveError(errptr, b->rep->RollbackToSavePoint()); } rocksdb_iterator_t* rocksdb_writebatch_wi_create_iterator_with_base( rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = wbwi->rep->NewIteratorWithBase(base_iterator->rep); delete base_iterator; return result; } rocksdb_iterator_t* rocksdb_writebatch_wi_create_iterator_with_base_cf( rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator, rocksdb_column_family_handle_t* column_family) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = wbwi->rep->NewIteratorWithBase(column_family->rep, base_iterator->rep); delete base_iterator; return result; } char* rocksdb_writebatch_wi_get_from_batch( rocksdb_writebatch_wi_t* wbwi, const rocksdb_options_t* options, const char* key, size_t keylen, size_t* vallen, char** errptr) { char* result = nullptr; std::string tmp; Status s = wbwi->rep->GetFromBatch(options->rep, Slice(key, keylen), &tmp); if (s.ok()) { *vallen = tmp.size(); result = CopyString(tmp); } else { *vallen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } char* rocksdb_writebatch_wi_get_from_batch_cf( rocksdb_writebatch_wi_t* wbwi, const rocksdb_options_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr) { char* result = nullptr; std::string tmp; Status s = wbwi->rep->GetFromBatch(column_family->rep, options->rep, Slice(key, keylen), &tmp); if (s.ok()) { *vallen = tmp.size(); result = CopyString(tmp); } else { *vallen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } char* rocksdb_writebatch_wi_get_from_batch_and_db( rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, size_t keylen, size_t* vallen, char** errptr) { char* result = nullptr; std::string tmp; Status s = wbwi->rep->GetFromBatchAndDB(db->rep, options->rep, Slice(key, keylen), &tmp); if (s.ok()) { *vallen = tmp.size(); result = CopyString(tmp); } else { *vallen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } char* rocksdb_writebatch_wi_get_from_batch_and_db_cf( rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr) { char* result = nullptr; std::string tmp; Status s = wbwi->rep->GetFromBatchAndDB(db->rep, options->rep, column_family->rep, Slice(key, keylen), &tmp); if (s.ok()) { *vallen = tmp.size(); result = CopyString(tmp); } else { *vallen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } void rocksdb_write_writebatch_wi( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_writebatch_wi_t* wbwi, char** errptr) { WriteBatch* wb = wbwi->rep->GetWriteBatch(); SaveError(errptr, db->rep->Write(options->rep, wb)); } rocksdb_block_based_table_options_t* rocksdb_block_based_options_create() { return new rocksdb_block_based_table_options_t; } void rocksdb_block_based_options_destroy( rocksdb_block_based_table_options_t* options) { delete options; } void rocksdb_block_based_options_set_block_size( rocksdb_block_based_table_options_t* options, size_t block_size) { options->rep.block_size = block_size; } void rocksdb_block_based_options_set_block_size_deviation( rocksdb_block_based_table_options_t* options, int block_size_deviation) { options->rep.block_size_deviation = block_size_deviation; } void rocksdb_block_based_options_set_block_restart_interval( rocksdb_block_based_table_options_t* options, int block_restart_interval) { options->rep.block_restart_interval = block_restart_interval; } void rocksdb_block_based_options_set_index_block_restart_interval( rocksdb_block_based_table_options_t* options, int index_block_restart_interval) { options->rep.index_block_restart_interval = index_block_restart_interval; } void rocksdb_block_based_options_set_metadata_block_size( rocksdb_block_based_table_options_t* options, uint64_t metadata_block_size) { options->rep.metadata_block_size = metadata_block_size; } void rocksdb_block_based_options_set_partition_filters( rocksdb_block_based_table_options_t* options, unsigned char partition_filters) { options->rep.partition_filters = partition_filters; } void rocksdb_block_based_options_set_use_delta_encoding( rocksdb_block_based_table_options_t* options, unsigned char use_delta_encoding) { options->rep.use_delta_encoding = use_delta_encoding; } void rocksdb_block_based_options_set_filter_policy( rocksdb_block_based_table_options_t* options, rocksdb_filterpolicy_t* filter_policy) { options->rep.filter_policy.reset(filter_policy); } void rocksdb_block_based_options_set_no_block_cache( rocksdb_block_based_table_options_t* options, unsigned char no_block_cache) { options->rep.no_block_cache = no_block_cache; } void rocksdb_block_based_options_set_block_cache( rocksdb_block_based_table_options_t* options, rocksdb_cache_t* block_cache) { if (block_cache) { options->rep.block_cache = block_cache->rep; } } void rocksdb_block_based_options_set_block_cache_compressed( rocksdb_block_based_table_options_t* options, rocksdb_cache_t* block_cache_compressed) { if (block_cache_compressed) { options->rep.block_cache_compressed = block_cache_compressed->rep; } } void rocksdb_block_based_options_set_whole_key_filtering( rocksdb_block_based_table_options_t* options, unsigned char v) { options->rep.whole_key_filtering = v; } void rocksdb_block_based_options_set_format_version( rocksdb_block_based_table_options_t* options, int v) { options->rep.format_version = v; } void rocksdb_block_based_options_set_index_type( rocksdb_block_based_table_options_t* options, int v) { options->rep.index_type = static_cast(v); } void rocksdb_block_based_options_set_data_block_index_type( rocksdb_block_based_table_options_t* options, int v) { options->rep.data_block_index_type = static_cast(v); } void rocksdb_block_based_options_set_data_block_hash_ratio( rocksdb_block_based_table_options_t* options, double v) { options->rep.data_block_hash_table_util_ratio = v; } void rocksdb_block_based_options_set_hash_index_allow_collision( rocksdb_block_based_table_options_t* options, unsigned char v) { options->rep.hash_index_allow_collision = v; } void rocksdb_block_based_options_set_cache_index_and_filter_blocks( rocksdb_block_based_table_options_t* options, unsigned char v) { options->rep.cache_index_and_filter_blocks = v; } void rocksdb_block_based_options_set_cache_index_and_filter_blocks_with_high_priority( rocksdb_block_based_table_options_t* options, unsigned char v) { options->rep.cache_index_and_filter_blocks_with_high_priority = v; } void rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache( rocksdb_block_based_table_options_t* options, unsigned char v) { options->rep.pin_l0_filter_and_index_blocks_in_cache = v; } void rocksdb_block_based_options_set_pin_top_level_index_and_filter( rocksdb_block_based_table_options_t* options, unsigned char v) { options->rep.pin_top_level_index_and_filter = v; } void rocksdb_options_set_block_based_table_factory( rocksdb_options_t *opt, rocksdb_block_based_table_options_t* table_options) { if (table_options) { opt->rep.table_factory.reset( ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(table_options->rep)); } } rocksdb_cuckoo_table_options_t* rocksdb_cuckoo_options_create() { return new rocksdb_cuckoo_table_options_t; } void rocksdb_cuckoo_options_destroy( rocksdb_cuckoo_table_options_t* options) { delete options; } void rocksdb_cuckoo_options_set_hash_ratio( rocksdb_cuckoo_table_options_t* options, double v) { options->rep.hash_table_ratio = v; } void rocksdb_cuckoo_options_set_max_search_depth( rocksdb_cuckoo_table_options_t* options, uint32_t v) { options->rep.max_search_depth = v; } void rocksdb_cuckoo_options_set_cuckoo_block_size( rocksdb_cuckoo_table_options_t* options, uint32_t v) { options->rep.cuckoo_block_size = v; } void rocksdb_cuckoo_options_set_identity_as_first_hash( rocksdb_cuckoo_table_options_t* options, unsigned char v) { options->rep.identity_as_first_hash = v; } void rocksdb_cuckoo_options_set_use_module_hash( rocksdb_cuckoo_table_options_t* options, unsigned char v) { options->rep.use_module_hash = v; } void rocksdb_options_set_cuckoo_table_factory( rocksdb_options_t *opt, rocksdb_cuckoo_table_options_t* table_options) { if (table_options) { opt->rep.table_factory.reset( ROCKSDB_NAMESPACE::NewCuckooTableFactory(table_options->rep)); } } void rocksdb_set_options( rocksdb_t* db, int count, const char* const keys[], const char* const values[], char** errptr) { std::unordered_map options_map; for (int i=0; irep->SetOptions(options_map)); } void rocksdb_set_options_cf( rocksdb_t* db, rocksdb_column_family_handle_t* handle, int count, const char* const keys[], const char* const values[], char** errptr) { std::unordered_map options_map; for (int i=0; irep->SetOptions(handle->rep, options_map)); } rocksdb_options_t* rocksdb_options_create() { return new rocksdb_options_t; } void rocksdb_options_destroy(rocksdb_options_t* options) { delete options; } rocksdb_options_t* rocksdb_options_create_copy(rocksdb_options_t* options) { return new rocksdb_options_t(*options); } void rocksdb_options_increase_parallelism( rocksdb_options_t* opt, int total_threads) { opt->rep.IncreaseParallelism(total_threads); } void rocksdb_options_optimize_for_point_lookup( rocksdb_options_t* opt, uint64_t block_cache_size_mb) { opt->rep.OptimizeForPointLookup(block_cache_size_mb); } void rocksdb_options_optimize_level_style_compaction( rocksdb_options_t* opt, uint64_t memtable_memory_budget) { opt->rep.OptimizeLevelStyleCompaction(memtable_memory_budget); } void rocksdb_options_optimize_universal_style_compaction( rocksdb_options_t* opt, uint64_t memtable_memory_budget) { opt->rep.OptimizeUniversalStyleCompaction(memtable_memory_budget); } void rocksdb_options_set_allow_ingest_behind( rocksdb_options_t* opt, unsigned char v) { opt->rep.allow_ingest_behind = v; } unsigned char rocksdb_options_get_allow_ingest_behind(rocksdb_options_t* opt) { return opt->rep.allow_ingest_behind; } void rocksdb_options_set_compaction_filter( rocksdb_options_t* opt, rocksdb_compactionfilter_t* filter) { opt->rep.compaction_filter = filter; } void rocksdb_options_set_compaction_filter_factory( rocksdb_options_t* opt, rocksdb_compactionfilterfactory_t* factory) { opt->rep.compaction_filter_factory = std::shared_ptr(factory); } void rocksdb_options_compaction_readahead_size( rocksdb_options_t* opt, size_t s) { opt->rep.compaction_readahead_size = s; } size_t rocksdb_options_get_compaction_readahead_size(rocksdb_options_t* opt) { return opt->rep.compaction_readahead_size; } void rocksdb_options_set_comparator( rocksdb_options_t* opt, rocksdb_comparator_t* cmp) { opt->rep.comparator = cmp; } void rocksdb_options_set_merge_operator( rocksdb_options_t* opt, rocksdb_mergeoperator_t* merge_operator) { opt->rep.merge_operator = std::shared_ptr(merge_operator); } void rocksdb_options_set_create_if_missing( rocksdb_options_t* opt, unsigned char v) { opt->rep.create_if_missing = v; } unsigned char rocksdb_options_get_create_if_missing(rocksdb_options_t* opt) { return opt->rep.create_if_missing; } void rocksdb_options_set_create_missing_column_families( rocksdb_options_t* opt, unsigned char v) { opt->rep.create_missing_column_families = v; } unsigned char rocksdb_options_get_create_missing_column_families( rocksdb_options_t* opt) { return opt->rep.create_missing_column_families; } void rocksdb_options_set_error_if_exists( rocksdb_options_t* opt, unsigned char v) { opt->rep.error_if_exists = v; } unsigned char rocksdb_options_get_error_if_exists(rocksdb_options_t* opt) { return opt->rep.error_if_exists; } void rocksdb_options_set_paranoid_checks( rocksdb_options_t* opt, unsigned char v) { opt->rep.paranoid_checks = v; } unsigned char rocksdb_options_get_paranoid_checks(rocksdb_options_t* opt) { return opt->rep.paranoid_checks; } void rocksdb_options_set_db_paths(rocksdb_options_t* opt, const rocksdb_dbpath_t** dbpath_values, size_t num_paths) { std::vector db_paths(num_paths); for (size_t i = 0; i < num_paths; ++i) { db_paths[i] = dbpath_values[i]->rep; } opt->rep.db_paths = db_paths; } void rocksdb_options_set_env(rocksdb_options_t* opt, rocksdb_env_t* env) { opt->rep.env = (env ? env->rep : nullptr); } void rocksdb_options_set_info_log(rocksdb_options_t* opt, rocksdb_logger_t* l) { if (l) { opt->rep.info_log = l->rep; } } void rocksdb_options_set_info_log_level( rocksdb_options_t* opt, int v) { opt->rep.info_log_level = static_cast(v); } int rocksdb_options_get_info_log_level(rocksdb_options_t* opt) { return static_cast(opt->rep.info_log_level); } void rocksdb_options_set_db_write_buffer_size(rocksdb_options_t* opt, size_t s) { opt->rep.db_write_buffer_size = s; } size_t rocksdb_options_get_db_write_buffer_size(rocksdb_options_t* opt) { return opt->rep.db_write_buffer_size; } void rocksdb_options_set_write_buffer_size(rocksdb_options_t* opt, size_t s) { opt->rep.write_buffer_size = s; } size_t rocksdb_options_get_write_buffer_size(rocksdb_options_t* opt) { return opt->rep.write_buffer_size; } void rocksdb_options_set_max_open_files(rocksdb_options_t* opt, int n) { opt->rep.max_open_files = n; } int rocksdb_options_get_max_open_files(rocksdb_options_t* opt) { return opt->rep.max_open_files; } void rocksdb_options_set_max_file_opening_threads(rocksdb_options_t* opt, int n) { opt->rep.max_file_opening_threads = n; } int rocksdb_options_get_max_file_opening_threads(rocksdb_options_t* opt) { return opt->rep.max_file_opening_threads; } void rocksdb_options_set_max_total_wal_size(rocksdb_options_t* opt, uint64_t n) { opt->rep.max_total_wal_size = n; } uint64_t rocksdb_options_get_max_total_wal_size(rocksdb_options_t* opt) { return opt->rep.max_total_wal_size; } void rocksdb_options_set_target_file_size_base( rocksdb_options_t* opt, uint64_t n) { opt->rep.target_file_size_base = n; } uint64_t rocksdb_options_get_target_file_size_base(rocksdb_options_t* opt) { return opt->rep.target_file_size_base; } void rocksdb_options_set_target_file_size_multiplier( rocksdb_options_t* opt, int n) { opt->rep.target_file_size_multiplier = n; } int rocksdb_options_get_target_file_size_multiplier(rocksdb_options_t* opt) { return opt->rep.target_file_size_multiplier; } void rocksdb_options_set_max_bytes_for_level_base( rocksdb_options_t* opt, uint64_t n) { opt->rep.max_bytes_for_level_base = n; } uint64_t rocksdb_options_get_max_bytes_for_level_base(rocksdb_options_t* opt) { return opt->rep.max_bytes_for_level_base; } void rocksdb_options_set_level_compaction_dynamic_level_bytes( rocksdb_options_t* opt, unsigned char v) { opt->rep.level_compaction_dynamic_level_bytes = v; } unsigned char rocksdb_options_get_level_compaction_dynamic_level_bytes( rocksdb_options_t* opt) { return opt->rep.level_compaction_dynamic_level_bytes; } void rocksdb_options_set_max_bytes_for_level_multiplier(rocksdb_options_t* opt, double n) { opt->rep.max_bytes_for_level_multiplier = n; } double rocksdb_options_get_max_bytes_for_level_multiplier( rocksdb_options_t* opt) { return opt->rep.max_bytes_for_level_multiplier; } void rocksdb_options_set_max_compaction_bytes(rocksdb_options_t* opt, uint64_t n) { opt->rep.max_compaction_bytes = n; } void rocksdb_options_set_max_bytes_for_level_multiplier_additional( rocksdb_options_t* opt, int* level_values, size_t num_levels) { opt->rep.max_bytes_for_level_multiplier_additional.resize(num_levels); for (size_t i = 0; i < num_levels; ++i) { opt->rep.max_bytes_for_level_multiplier_additional[i] = level_values[i]; } } void rocksdb_options_enable_statistics(rocksdb_options_t* opt) { opt->rep.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); } void rocksdb_options_set_skip_stats_update_on_db_open(rocksdb_options_t* opt, unsigned char val) { opt->rep.skip_stats_update_on_db_open = val; } unsigned char rocksdb_options_get_skip_stats_update_on_db_open( rocksdb_options_t* opt) { return opt->rep.skip_stats_update_on_db_open; } void rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open( rocksdb_options_t* opt, unsigned char val) { opt->rep.skip_checking_sst_file_sizes_on_db_open = val; } unsigned char rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open( rocksdb_options_t* opt) { return opt->rep.skip_checking_sst_file_sizes_on_db_open; } void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) { opt->rep.num_levels = n; } int rocksdb_options_get_num_levels(rocksdb_options_t* opt) { return opt->rep.num_levels; } void rocksdb_options_set_level0_file_num_compaction_trigger( rocksdb_options_t* opt, int n) { opt->rep.level0_file_num_compaction_trigger = n; } int rocksdb_options_get_level0_file_num_compaction_trigger( rocksdb_options_t* opt) { return opt->rep.level0_file_num_compaction_trigger; } void rocksdb_options_set_level0_slowdown_writes_trigger( rocksdb_options_t* opt, int n) { opt->rep.level0_slowdown_writes_trigger = n; } int rocksdb_options_get_level0_slowdown_writes_trigger(rocksdb_options_t* opt) { return opt->rep.level0_slowdown_writes_trigger; } void rocksdb_options_set_level0_stop_writes_trigger( rocksdb_options_t* opt, int n) { opt->rep.level0_stop_writes_trigger = n; } int rocksdb_options_get_level0_stop_writes_trigger(rocksdb_options_t* opt) { return opt->rep.level0_stop_writes_trigger; } void rocksdb_options_set_max_mem_compaction_level(rocksdb_options_t* /*opt*/, int /*n*/) {} void rocksdb_options_set_wal_recovery_mode(rocksdb_options_t* opt,int mode) { opt->rep.wal_recovery_mode = static_cast(mode); } void rocksdb_options_set_compression(rocksdb_options_t* opt, int t) { opt->rep.compression = static_cast(t); } void rocksdb_options_set_bottommost_compression(rocksdb_options_t* opt, int t) { opt->rep.bottommost_compression = static_cast(t); } void rocksdb_options_set_compression_per_level(rocksdb_options_t* opt, int* level_values, size_t num_levels) { opt->rep.compression_per_level.resize(num_levels); for (size_t i = 0; i < num_levels; ++i) { opt->rep.compression_per_level[i] = static_cast(level_values[i]); } } void rocksdb_options_set_bottommost_compression_options(rocksdb_options_t* opt, int w_bits, int level, int strategy, int max_dict_bytes, unsigned char enabled) { opt->rep.bottommost_compression_opts.window_bits = w_bits; opt->rep.bottommost_compression_opts.level = level; opt->rep.bottommost_compression_opts.strategy = strategy; opt->rep.bottommost_compression_opts.max_dict_bytes = max_dict_bytes; opt->rep.bottommost_compression_opts.enabled = enabled; } void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes( rocksdb_options_t* opt, int zstd_max_train_bytes, unsigned char enabled) { opt->rep.bottommost_compression_opts.zstd_max_train_bytes = zstd_max_train_bytes; opt->rep.bottommost_compression_opts.enabled = enabled; } void rocksdb_options_set_compression_options(rocksdb_options_t* opt, int w_bits, int level, int strategy, int max_dict_bytes) { opt->rep.compression_opts.window_bits = w_bits; opt->rep.compression_opts.level = level; opt->rep.compression_opts.strategy = strategy; opt->rep.compression_opts.max_dict_bytes = max_dict_bytes; } void rocksdb_options_set_compression_options_zstd_max_train_bytes( rocksdb_options_t* opt, int zstd_max_train_bytes) { opt->rep.compression_opts.zstd_max_train_bytes = zstd_max_train_bytes; } void rocksdb_options_set_prefix_extractor( rocksdb_options_t* opt, rocksdb_slicetransform_t* prefix_extractor) { opt->rep.prefix_extractor.reset(prefix_extractor); } void rocksdb_options_set_use_fsync( rocksdb_options_t* opt, int use_fsync) { opt->rep.use_fsync = use_fsync; } void rocksdb_options_set_db_log_dir( rocksdb_options_t* opt, const char* db_log_dir) { opt->rep.db_log_dir = db_log_dir; } void rocksdb_options_set_wal_dir( rocksdb_options_t* opt, const char* v) { opt->rep.wal_dir = v; } void rocksdb_options_set_WAL_ttl_seconds(rocksdb_options_t* opt, uint64_t ttl) { opt->rep.WAL_ttl_seconds = ttl; } void rocksdb_options_set_WAL_size_limit_MB( rocksdb_options_t* opt, uint64_t limit) { opt->rep.WAL_size_limit_MB = limit; } void rocksdb_options_set_manifest_preallocation_size( rocksdb_options_t* opt, size_t v) { opt->rep.manifest_preallocation_size = v; } // noop void rocksdb_options_set_purge_redundant_kvs_while_flush( rocksdb_options_t* /*opt*/, unsigned char /*v*/) {} void rocksdb_options_set_use_direct_reads(rocksdb_options_t* opt, unsigned char v) { opt->rep.use_direct_reads = v; } void rocksdb_options_set_use_direct_io_for_flush_and_compaction( rocksdb_options_t* opt, unsigned char v) { opt->rep.use_direct_io_for_flush_and_compaction = v; } void rocksdb_options_set_allow_mmap_reads( rocksdb_options_t* opt, unsigned char v) { opt->rep.allow_mmap_reads = v; } void rocksdb_options_set_allow_mmap_writes( rocksdb_options_t* opt, unsigned char v) { opt->rep.allow_mmap_writes = v; } void rocksdb_options_set_is_fd_close_on_exec( rocksdb_options_t* opt, unsigned char v) { opt->rep.is_fd_close_on_exec = v; } void rocksdb_options_set_skip_log_error_on_recovery( rocksdb_options_t* opt, unsigned char v) { opt->rep.skip_log_error_on_recovery = v; } void rocksdb_options_set_stats_dump_period_sec( rocksdb_options_t* opt, unsigned int v) { opt->rep.stats_dump_period_sec = v; } void rocksdb_options_set_advise_random_on_open( rocksdb_options_t* opt, unsigned char v) { opt->rep.advise_random_on_open = v; } void rocksdb_options_set_access_hint_on_compaction_start( rocksdb_options_t* opt, int v) { switch(v) { case 0: opt->rep.access_hint_on_compaction_start = ROCKSDB_NAMESPACE::Options::NONE; break; case 1: opt->rep.access_hint_on_compaction_start = ROCKSDB_NAMESPACE::Options::NORMAL; break; case 2: opt->rep.access_hint_on_compaction_start = ROCKSDB_NAMESPACE::Options::SEQUENTIAL; break; case 3: opt->rep.access_hint_on_compaction_start = ROCKSDB_NAMESPACE::Options::WILLNEED; break; } } void rocksdb_options_set_use_adaptive_mutex( rocksdb_options_t* opt, unsigned char v) { opt->rep.use_adaptive_mutex = v; } void rocksdb_options_set_wal_bytes_per_sync( rocksdb_options_t* opt, uint64_t v) { opt->rep.wal_bytes_per_sync = v; } void rocksdb_options_set_bytes_per_sync( rocksdb_options_t* opt, uint64_t v) { opt->rep.bytes_per_sync = v; } void rocksdb_options_set_writable_file_max_buffer_size(rocksdb_options_t* opt, uint64_t v) { opt->rep.writable_file_max_buffer_size = static_cast(v); } void rocksdb_options_set_allow_concurrent_memtable_write(rocksdb_options_t* opt, unsigned char v) { opt->rep.allow_concurrent_memtable_write = v; } void rocksdb_options_set_enable_write_thread_adaptive_yield( rocksdb_options_t* opt, unsigned char v) { opt->rep.enable_write_thread_adaptive_yield = v; } void rocksdb_options_set_max_sequential_skip_in_iterations( rocksdb_options_t* opt, uint64_t v) { opt->rep.max_sequential_skip_in_iterations = v; } void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t* opt, int n) { opt->rep.max_write_buffer_number = n; } int rocksdb_options_get_max_write_buffer_number(rocksdb_options_t* opt) { return opt->rep.max_write_buffer_number; } void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t* opt, int n) { opt->rep.min_write_buffer_number_to_merge = n; } int rocksdb_options_get_min_write_buffer_number_to_merge( rocksdb_options_t* opt) { return opt->rep.min_write_buffer_number_to_merge; } void rocksdb_options_set_max_write_buffer_number_to_maintain( rocksdb_options_t* opt, int n) { opt->rep.max_write_buffer_number_to_maintain = n; } int rocksdb_options_get_max_write_buffer_number_to_maintain( rocksdb_options_t* opt) { return opt->rep.max_write_buffer_number_to_maintain; } void rocksdb_options_set_max_write_buffer_size_to_maintain( rocksdb_options_t* opt, int64_t n) { opt->rep.max_write_buffer_size_to_maintain = n; } int64_t rocksdb_options_get_max_write_buffer_size_to_maintain( rocksdb_options_t* opt) { return opt->rep.max_write_buffer_size_to_maintain; } void rocksdb_options_set_enable_pipelined_write(rocksdb_options_t* opt, unsigned char v) { opt->rep.enable_pipelined_write = v; } unsigned char rocksdb_options_get_enable_pipelined_write( rocksdb_options_t* opt) { return opt->rep.enable_pipelined_write; } void rocksdb_options_set_unordered_write(rocksdb_options_t* opt, unsigned char v) { opt->rep.unordered_write = v; } unsigned char rocksdb_options_get_unordered_write(rocksdb_options_t* opt) { return opt->rep.unordered_write; } void rocksdb_options_set_max_subcompactions(rocksdb_options_t* opt, uint32_t n) { opt->rep.max_subcompactions = n; } uint32_t rocksdb_options_get_max_subcompactions(rocksdb_options_t* opt) { return opt->rep.max_subcompactions; } void rocksdb_options_set_max_background_jobs(rocksdb_options_t* opt, int n) { opt->rep.max_background_jobs = n; } void rocksdb_options_set_max_background_compactions(rocksdb_options_t* opt, int n) { opt->rep.max_background_compactions = n; } void rocksdb_options_set_base_background_compactions(rocksdb_options_t* opt, int n) { opt->rep.base_background_compactions = n; } void rocksdb_options_set_max_background_flushes(rocksdb_options_t* opt, int n) { opt->rep.max_background_flushes = n; } void rocksdb_options_set_max_log_file_size(rocksdb_options_t* opt, size_t v) { opt->rep.max_log_file_size = v; } void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t* opt, size_t v) { opt->rep.log_file_time_to_roll = v; } void rocksdb_options_set_keep_log_file_num(rocksdb_options_t* opt, size_t v) { opt->rep.keep_log_file_num = v; } void rocksdb_options_set_recycle_log_file_num(rocksdb_options_t* opt, size_t v) { opt->rep.recycle_log_file_num = v; } void rocksdb_options_set_soft_rate_limit(rocksdb_options_t* opt, double v) { opt->rep.soft_rate_limit = v; } void rocksdb_options_set_hard_rate_limit(rocksdb_options_t* opt, double v) { opt->rep.hard_rate_limit = v; } void rocksdb_options_set_soft_pending_compaction_bytes_limit(rocksdb_options_t* opt, size_t v) { opt->rep.soft_pending_compaction_bytes_limit = v; } void rocksdb_options_set_hard_pending_compaction_bytes_limit(rocksdb_options_t* opt, size_t v) { opt->rep.hard_pending_compaction_bytes_limit = v; } void rocksdb_options_set_rate_limit_delay_max_milliseconds( rocksdb_options_t* opt, unsigned int v) { opt->rep.rate_limit_delay_max_milliseconds = v; } void rocksdb_options_set_max_manifest_file_size( rocksdb_options_t* opt, size_t v) { opt->rep.max_manifest_file_size = v; } void rocksdb_options_set_table_cache_numshardbits( rocksdb_options_t* opt, int v) { opt->rep.table_cache_numshardbits = v; } void rocksdb_options_set_table_cache_remove_scan_count_limit( rocksdb_options_t* /*opt*/, int /*v*/) { // this option is deprecated } void rocksdb_options_set_arena_block_size( rocksdb_options_t* opt, size_t v) { opt->rep.arena_block_size = v; } void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t* opt, int disable) { opt->rep.disable_auto_compactions = disable; } void rocksdb_options_set_optimize_filters_for_hits(rocksdb_options_t* opt, int v) { opt->rep.optimize_filters_for_hits = v; } void rocksdb_options_set_delete_obsolete_files_period_micros( rocksdb_options_t* opt, uint64_t v) { opt->rep.delete_obsolete_files_period_micros = v; } void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t* opt) { opt->rep.PrepareForBulkLoad(); } void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t *opt) { opt->rep.memtable_factory.reset(new ROCKSDB_NAMESPACE::VectorRepFactory); } void rocksdb_options_set_memtable_prefix_bloom_size_ratio( rocksdb_options_t* opt, double v) { opt->rep.memtable_prefix_bloom_size_ratio = v; } void rocksdb_options_set_memtable_huge_page_size(rocksdb_options_t* opt, size_t v) { opt->rep.memtable_huge_page_size = v; } void rocksdb_options_set_hash_skip_list_rep( rocksdb_options_t *opt, size_t bucket_count, int32_t skiplist_height, int32_t skiplist_branching_factor) { ROCKSDB_NAMESPACE::MemTableRepFactory* factory = ROCKSDB_NAMESPACE::NewHashSkipListRepFactory( bucket_count, skiplist_height, skiplist_branching_factor); opt->rep.memtable_factory.reset(factory); } void rocksdb_options_set_hash_link_list_rep( rocksdb_options_t *opt, size_t bucket_count) { opt->rep.memtable_factory.reset( ROCKSDB_NAMESPACE::NewHashLinkListRepFactory(bucket_count)); } void rocksdb_options_set_plain_table_factory( rocksdb_options_t *opt, uint32_t user_key_len, int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness) { ROCKSDB_NAMESPACE::PlainTableOptions options; options.user_key_len = user_key_len; options.bloom_bits_per_key = bloom_bits_per_key; options.hash_table_ratio = hash_table_ratio; options.index_sparseness = index_sparseness; ROCKSDB_NAMESPACE::TableFactory* factory = ROCKSDB_NAMESPACE::NewPlainTableFactory(options); opt->rep.table_factory.reset(factory); } void rocksdb_options_set_max_successive_merges( rocksdb_options_t* opt, size_t v) { opt->rep.max_successive_merges = v; } void rocksdb_options_set_bloom_locality( rocksdb_options_t* opt, uint32_t v) { opt->rep.bloom_locality = v; } void rocksdb_options_set_inplace_update_support( rocksdb_options_t* opt, unsigned char v) { opt->rep.inplace_update_support = v; } void rocksdb_options_set_inplace_update_num_locks( rocksdb_options_t* opt, size_t v) { opt->rep.inplace_update_num_locks = v; } void rocksdb_options_set_report_bg_io_stats( rocksdb_options_t* opt, int v) { opt->rep.report_bg_io_stats = v; } void rocksdb_options_set_compaction_style(rocksdb_options_t *opt, int style) { opt->rep.compaction_style = static_cast(style); } void rocksdb_options_set_universal_compaction_options(rocksdb_options_t *opt, rocksdb_universal_compaction_options_t *uco) { opt->rep.compaction_options_universal = *(uco->rep); } void rocksdb_options_set_fifo_compaction_options( rocksdb_options_t* opt, rocksdb_fifo_compaction_options_t* fifo) { opt->rep.compaction_options_fifo = fifo->rep; } char *rocksdb_options_statistics_get_string(rocksdb_options_t *opt) { ROCKSDB_NAMESPACE::Statistics* statistics = opt->rep.statistics.get(); if (statistics) { return strdup(statistics->ToString().c_str()); } return nullptr; } void rocksdb_options_set_ratelimiter(rocksdb_options_t *opt, rocksdb_ratelimiter_t *limiter) { if (limiter) { opt->rep.rate_limiter = limiter->rep; } } void rocksdb_options_set_atomic_flush(rocksdb_options_t* opt, unsigned char atomic_flush) { opt->rep.atomic_flush = atomic_flush; } rocksdb_ratelimiter_t* rocksdb_ratelimiter_create( int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness) { rocksdb_ratelimiter_t* rate_limiter = new rocksdb_ratelimiter_t; rate_limiter->rep.reset( NewGenericRateLimiter(rate_bytes_per_sec, refill_period_us, fairness)); return rate_limiter; } void rocksdb_ratelimiter_destroy(rocksdb_ratelimiter_t *limiter) { delete limiter; } void rocksdb_options_set_row_cache(rocksdb_options_t* opt, rocksdb_cache_t* cache) { if(cache) { opt->rep.row_cache = cache->rep; } } void rocksdb_set_perf_level(int v) { PerfLevel level = static_cast(v); SetPerfLevel(level); } rocksdb_perfcontext_t* rocksdb_perfcontext_create() { rocksdb_perfcontext_t* context = new rocksdb_perfcontext_t; context->rep = ROCKSDB_NAMESPACE::get_perf_context(); return context; } void rocksdb_perfcontext_reset(rocksdb_perfcontext_t* context) { context->rep->Reset(); } char* rocksdb_perfcontext_report(rocksdb_perfcontext_t* context, unsigned char exclude_zero_counters) { return strdup(context->rep->ToString(exclude_zero_counters).c_str()); } uint64_t rocksdb_perfcontext_metric(rocksdb_perfcontext_t* context, int metric) { PerfContext* rep = context->rep; switch (metric) { case rocksdb_user_key_comparison_count: return rep->user_key_comparison_count; case rocksdb_block_cache_hit_count: return rep->block_cache_hit_count; case rocksdb_block_read_count: return rep->block_read_count; case rocksdb_block_read_byte: return rep->block_read_byte; case rocksdb_block_read_time: return rep->block_read_time; case rocksdb_block_checksum_time: return rep->block_checksum_time; case rocksdb_block_decompress_time: return rep->block_decompress_time; case rocksdb_get_read_bytes: return rep->get_read_bytes; case rocksdb_multiget_read_bytes: return rep->multiget_read_bytes; case rocksdb_iter_read_bytes: return rep->iter_read_bytes; case rocksdb_internal_key_skipped_count: return rep->internal_key_skipped_count; case rocksdb_internal_delete_skipped_count: return rep->internal_delete_skipped_count; case rocksdb_internal_recent_skipped_count: return rep->internal_recent_skipped_count; case rocksdb_internal_merge_count: return rep->internal_merge_count; case rocksdb_get_snapshot_time: return rep->get_snapshot_time; case rocksdb_get_from_memtable_time: return rep->get_from_memtable_time; case rocksdb_get_from_memtable_count: return rep->get_from_memtable_count; case rocksdb_get_post_process_time: return rep->get_post_process_time; case rocksdb_get_from_output_files_time: return rep->get_from_output_files_time; case rocksdb_seek_on_memtable_time: return rep->seek_on_memtable_time; case rocksdb_seek_on_memtable_count: return rep->seek_on_memtable_count; case rocksdb_next_on_memtable_count: return rep->next_on_memtable_count; case rocksdb_prev_on_memtable_count: return rep->prev_on_memtable_count; case rocksdb_seek_child_seek_time: return rep->seek_child_seek_time; case rocksdb_seek_child_seek_count: return rep->seek_child_seek_count; case rocksdb_seek_min_heap_time: return rep->seek_min_heap_time; case rocksdb_seek_max_heap_time: return rep->seek_max_heap_time; case rocksdb_seek_internal_seek_time: return rep->seek_internal_seek_time; case rocksdb_find_next_user_entry_time: return rep->find_next_user_entry_time; case rocksdb_write_wal_time: return rep->write_wal_time; case rocksdb_write_memtable_time: return rep->write_memtable_time; case rocksdb_write_delay_time: return rep->write_delay_time; case rocksdb_write_pre_and_post_process_time: return rep->write_pre_and_post_process_time; case rocksdb_db_mutex_lock_nanos: return rep->db_mutex_lock_nanos; case rocksdb_db_condition_wait_nanos: return rep->db_condition_wait_nanos; case rocksdb_merge_operator_time_nanos: return rep->merge_operator_time_nanos; case rocksdb_read_index_block_nanos: return rep->read_index_block_nanos; case rocksdb_read_filter_block_nanos: return rep->read_filter_block_nanos; case rocksdb_new_table_block_iter_nanos: return rep->new_table_block_iter_nanos; case rocksdb_new_table_iterator_nanos: return rep->new_table_iterator_nanos; case rocksdb_block_seek_nanos: return rep->block_seek_nanos; case rocksdb_find_table_nanos: return rep->find_table_nanos; case rocksdb_bloom_memtable_hit_count: return rep->bloom_memtable_hit_count; case rocksdb_bloom_memtable_miss_count: return rep->bloom_memtable_miss_count; case rocksdb_bloom_sst_hit_count: return rep->bloom_sst_hit_count; case rocksdb_bloom_sst_miss_count: return rep->bloom_sst_miss_count; case rocksdb_key_lock_wait_time: return rep->key_lock_wait_time; case rocksdb_key_lock_wait_count: return rep->key_lock_wait_count; case rocksdb_env_new_sequential_file_nanos: return rep->env_new_sequential_file_nanos; case rocksdb_env_new_random_access_file_nanos: return rep->env_new_random_access_file_nanos; case rocksdb_env_new_writable_file_nanos: return rep->env_new_writable_file_nanos; case rocksdb_env_reuse_writable_file_nanos: return rep->env_reuse_writable_file_nanos; case rocksdb_env_new_random_rw_file_nanos: return rep->env_new_random_rw_file_nanos; case rocksdb_env_new_directory_nanos: return rep->env_new_directory_nanos; case rocksdb_env_file_exists_nanos: return rep->env_file_exists_nanos; case rocksdb_env_get_children_nanos: return rep->env_get_children_nanos; case rocksdb_env_get_children_file_attributes_nanos: return rep->env_get_children_file_attributes_nanos; case rocksdb_env_delete_file_nanos: return rep->env_delete_file_nanos; case rocksdb_env_create_dir_nanos: return rep->env_create_dir_nanos; case rocksdb_env_create_dir_if_missing_nanos: return rep->env_create_dir_if_missing_nanos; case rocksdb_env_delete_dir_nanos: return rep->env_delete_dir_nanos; case rocksdb_env_get_file_size_nanos: return rep->env_get_file_size_nanos; case rocksdb_env_get_file_modification_time_nanos: return rep->env_get_file_modification_time_nanos; case rocksdb_env_rename_file_nanos: return rep->env_rename_file_nanos; case rocksdb_env_link_file_nanos: return rep->env_link_file_nanos; case rocksdb_env_lock_file_nanos: return rep->env_lock_file_nanos; case rocksdb_env_unlock_file_nanos: return rep->env_unlock_file_nanos; case rocksdb_env_new_logger_nanos: return rep->env_new_logger_nanos; default: break; } return 0; } void rocksdb_perfcontext_destroy(rocksdb_perfcontext_t* context) { delete context; } /* TODO: DB::OpenForReadOnly DB::KeyMayExist DB::GetOptions DB::GetSortedWalFiles DB::GetLatestSequenceNumber DB::GetUpdatesSince DB::GetDbIdentity DB::RunManualCompaction custom cache table_properties_collectors */ rocksdb_compactionfilter_t* rocksdb_compactionfilter_create( void* state, void (*destructor)(void*), unsigned char (*filter)( void*, int level, const char* key, size_t key_length, const char* existing_value, size_t value_length, char** new_value, size_t *new_value_length, unsigned char* value_changed), const char* (*name)(void*)) { rocksdb_compactionfilter_t* result = new rocksdb_compactionfilter_t; result->state_ = state; result->destructor_ = destructor; result->filter_ = filter; result->ignore_snapshots_ = true; result->name_ = name; return result; } void rocksdb_compactionfilter_set_ignore_snapshots( rocksdb_compactionfilter_t* filter, unsigned char whether_ignore) { filter->ignore_snapshots_ = whether_ignore; } void rocksdb_compactionfilter_destroy(rocksdb_compactionfilter_t* filter) { delete filter; } unsigned char rocksdb_compactionfiltercontext_is_full_compaction( rocksdb_compactionfiltercontext_t* context) { return context->rep.is_full_compaction; } unsigned char rocksdb_compactionfiltercontext_is_manual_compaction( rocksdb_compactionfiltercontext_t* context) { return context->rep.is_manual_compaction; } rocksdb_compactionfilterfactory_t* rocksdb_compactionfilterfactory_create( void* state, void (*destructor)(void*), rocksdb_compactionfilter_t* (*create_compaction_filter)( void*, rocksdb_compactionfiltercontext_t* context), const char* (*name)(void*)) { rocksdb_compactionfilterfactory_t* result = new rocksdb_compactionfilterfactory_t; result->state_ = state; result->destructor_ = destructor; result->create_compaction_filter_ = create_compaction_filter; result->name_ = name; return result; } void rocksdb_compactionfilterfactory_destroy( rocksdb_compactionfilterfactory_t* factory) { delete factory; } rocksdb_comparator_t* rocksdb_comparator_create( void* state, void (*destructor)(void*), int (*compare)( void*, const char* a, size_t alen, const char* b, size_t blen), const char* (*name)(void*)) { rocksdb_comparator_t* result = new rocksdb_comparator_t; result->state_ = state; result->destructor_ = destructor; result->compare_ = compare; result->name_ = name; return result; } void rocksdb_comparator_destroy(rocksdb_comparator_t* cmp) { delete cmp; } rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( void* state, void (*destructor)(void*), char* (*create_filter)( void*, const char* const* key_array, const size_t* key_length_array, int num_keys, size_t* filter_length), unsigned char (*key_may_match)( void*, const char* key, size_t length, const char* filter, size_t filter_length), void (*delete_filter)( void*, const char* filter, size_t filter_length), const char* (*name)(void*)) { rocksdb_filterpolicy_t* result = new rocksdb_filterpolicy_t; result->state_ = state; result->destructor_ = destructor; result->create_ = create_filter; result->key_match_ = key_may_match; result->delete_filter_ = delete_filter; result->name_ = name; return result; } void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t* filter) { delete filter; } rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(int bits_per_key, bool original_format) { // Make a rocksdb_filterpolicy_t, but override all of its methods so // they delegate to a NewBloomFilterPolicy() instead of user // supplied C functions. struct Wrapper : public rocksdb_filterpolicy_t { const FilterPolicy* rep_; ~Wrapper() override { delete rep_; } const char* Name() const override { return rep_->Name(); } void CreateFilter(const Slice* keys, int n, std::string* dst) const override { return rep_->CreateFilter(keys, n, dst); } bool KeyMayMatch(const Slice& key, const Slice& filter) const override { return rep_->KeyMayMatch(key, filter); } // No need to override GetFilterBitsBuilder if this one is overridden ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext( const ROCKSDB_NAMESPACE::FilterBuildingContext& context) const override { return rep_->GetBuilderWithContext(context); } ROCKSDB_NAMESPACE::FilterBitsReader* GetFilterBitsReader( const Slice& contents) const override { return rep_->GetFilterBitsReader(contents); } static void DoNothing(void*) {} }; Wrapper* wrapper = new Wrapper; wrapper->rep_ = NewBloomFilterPolicy(bits_per_key, original_format); wrapper->state_ = nullptr; wrapper->delete_filter_ = nullptr; wrapper->destructor_ = &Wrapper::DoNothing; return wrapper; } rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_full(int bits_per_key) { return rocksdb_filterpolicy_create_bloom_format(bits_per_key, false); } rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) { return rocksdb_filterpolicy_create_bloom_format(bits_per_key, true); } rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( void* state, void (*destructor)(void*), char* (*full_merge)(void*, const char* key, size_t key_length, const char* existing_value, size_t existing_value_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length), char* (*partial_merge)(void*, const char* key, size_t key_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length), void (*delete_value)(void*, const char* value, size_t value_length), const char* (*name)(void*)) { rocksdb_mergeoperator_t* result = new rocksdb_mergeoperator_t; result->state_ = state; result->destructor_ = destructor; result->full_merge_ = full_merge; result->partial_merge_ = partial_merge; result->delete_value_ = delete_value; result->name_ = name; return result; } void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t* merge_operator) { delete merge_operator; } rocksdb_readoptions_t* rocksdb_readoptions_create() { return new rocksdb_readoptions_t; } void rocksdb_readoptions_destroy(rocksdb_readoptions_t* opt) { delete opt; } void rocksdb_readoptions_set_verify_checksums( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.verify_checksums = v; } void rocksdb_readoptions_set_fill_cache( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.fill_cache = v; } void rocksdb_readoptions_set_snapshot( rocksdb_readoptions_t* opt, const rocksdb_snapshot_t* snap) { opt->rep.snapshot = (snap ? snap->rep : nullptr); } void rocksdb_readoptions_set_iterate_upper_bound( rocksdb_readoptions_t* opt, const char* key, size_t keylen) { if (key == nullptr) { opt->upper_bound = Slice(); opt->rep.iterate_upper_bound = nullptr; } else { opt->upper_bound = Slice(key, keylen); opt->rep.iterate_upper_bound = &opt->upper_bound; } } void rocksdb_readoptions_set_iterate_lower_bound( rocksdb_readoptions_t *opt, const char* key, size_t keylen) { if (key == nullptr) { opt->lower_bound = Slice(); opt->rep.iterate_lower_bound = nullptr; } else { opt->lower_bound = Slice(key, keylen); opt->rep.iterate_lower_bound = &opt->lower_bound; } } void rocksdb_readoptions_set_read_tier( rocksdb_readoptions_t* opt, int v) { opt->rep.read_tier = static_cast(v); } void rocksdb_readoptions_set_tailing( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.tailing = v; } void rocksdb_readoptions_set_managed( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.managed = v; } void rocksdb_readoptions_set_readahead_size( rocksdb_readoptions_t* opt, size_t v) { opt->rep.readahead_size = v; } void rocksdb_readoptions_set_prefix_same_as_start( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.prefix_same_as_start = v; } void rocksdb_readoptions_set_pin_data(rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.pin_data = v; } void rocksdb_readoptions_set_total_order_seek(rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.total_order_seek = v; } void rocksdb_readoptions_set_max_skippable_internal_keys( rocksdb_readoptions_t* opt, uint64_t v) { opt->rep.max_skippable_internal_keys = v; } void rocksdb_readoptions_set_background_purge_on_iterator_cleanup( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.background_purge_on_iterator_cleanup = v; } void rocksdb_readoptions_set_ignore_range_deletions( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.ignore_range_deletions = v; } rocksdb_writeoptions_t* rocksdb_writeoptions_create() { return new rocksdb_writeoptions_t; } void rocksdb_writeoptions_destroy(rocksdb_writeoptions_t* opt) { delete opt; } void rocksdb_writeoptions_set_sync( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.sync = v; } void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable) { opt->rep.disableWAL = disable; } void rocksdb_writeoptions_set_ignore_missing_column_families( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.ignore_missing_column_families = v; } void rocksdb_writeoptions_set_no_slowdown( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.no_slowdown = v; } void rocksdb_writeoptions_set_low_pri( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.low_pri = v; } void rocksdb_writeoptions_set_memtable_insert_hint_per_batch( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.memtable_insert_hint_per_batch = v; } rocksdb_compactoptions_t* rocksdb_compactoptions_create() { return new rocksdb_compactoptions_t; } void rocksdb_compactoptions_destroy(rocksdb_compactoptions_t* opt) { delete opt; } void rocksdb_compactoptions_set_bottommost_level_compaction( rocksdb_compactoptions_t* opt, unsigned char v) { opt->rep.bottommost_level_compaction = static_cast(v); } void rocksdb_compactoptions_set_exclusive_manual_compaction( rocksdb_compactoptions_t* opt, unsigned char v) { opt->rep.exclusive_manual_compaction = v; } void rocksdb_compactoptions_set_change_level(rocksdb_compactoptions_t* opt, unsigned char v) { opt->rep.change_level = v; } void rocksdb_compactoptions_set_target_level(rocksdb_compactoptions_t* opt, int n) { opt->rep.target_level = n; } rocksdb_flushoptions_t* rocksdb_flushoptions_create() { return new rocksdb_flushoptions_t; } void rocksdb_flushoptions_destroy(rocksdb_flushoptions_t* opt) { delete opt; } void rocksdb_flushoptions_set_wait( rocksdb_flushoptions_t* opt, unsigned char v) { opt->rep.wait = v; } rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) { rocksdb_cache_t* c = new rocksdb_cache_t; c->rep = NewLRUCache(capacity); return c; } void rocksdb_cache_destroy(rocksdb_cache_t* cache) { delete cache; } void rocksdb_cache_set_capacity(rocksdb_cache_t* cache, size_t capacity) { cache->rep->SetCapacity(capacity); } size_t rocksdb_cache_get_usage(rocksdb_cache_t* cache) { return cache->rep->GetUsage(); } size_t rocksdb_cache_get_pinned_usage(rocksdb_cache_t* cache) { return cache->rep->GetPinnedUsage(); } rocksdb_dbpath_t* rocksdb_dbpath_create(const char* path, uint64_t target_size) { rocksdb_dbpath_t* result = new rocksdb_dbpath_t; result->rep.path = std::string(path); result->rep.target_size = target_size; return result; } void rocksdb_dbpath_destroy(rocksdb_dbpath_t* dbpath) { delete dbpath; } rocksdb_env_t* rocksdb_create_default_env() { rocksdb_env_t* result = new rocksdb_env_t; result->rep = Env::Default(); result->is_default = true; return result; } rocksdb_env_t* rocksdb_create_mem_env() { rocksdb_env_t* result = new rocksdb_env_t; result->rep = ROCKSDB_NAMESPACE::NewMemEnv(Env::Default()); result->is_default = false; return result; } void rocksdb_env_set_background_threads(rocksdb_env_t* env, int n) { env->rep->SetBackgroundThreads(n); } void rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env, int n) { env->rep->SetBackgroundThreads(n, Env::HIGH); } void rocksdb_env_join_all_threads(rocksdb_env_t* env) { env->rep->WaitForJoin(); } void rocksdb_env_lower_thread_pool_io_priority(rocksdb_env_t* env) { env->rep->LowerThreadPoolIOPriority(); } void rocksdb_env_lower_high_priority_thread_pool_io_priority(rocksdb_env_t* env) { env->rep->LowerThreadPoolIOPriority(Env::HIGH); } void rocksdb_env_lower_thread_pool_cpu_priority(rocksdb_env_t* env) { env->rep->LowerThreadPoolCPUPriority(); } void rocksdb_env_lower_high_priority_thread_pool_cpu_priority(rocksdb_env_t* env) { env->rep->LowerThreadPoolCPUPriority(Env::HIGH); } void rocksdb_env_destroy(rocksdb_env_t* env) { if (!env->is_default) delete env->rep; delete env; } rocksdb_envoptions_t* rocksdb_envoptions_create() { rocksdb_envoptions_t* opt = new rocksdb_envoptions_t; return opt; } void rocksdb_envoptions_destroy(rocksdb_envoptions_t* opt) { delete opt; } rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create( const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options) { rocksdb_sstfilewriter_t* writer = new rocksdb_sstfilewriter_t; writer->rep = new SstFileWriter(env->rep, io_options->rep); return writer; } rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create_with_comparator( const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options, const rocksdb_comparator_t* /*comparator*/) { rocksdb_sstfilewriter_t* writer = new rocksdb_sstfilewriter_t; writer->rep = new SstFileWriter(env->rep, io_options->rep); return writer; } void rocksdb_sstfilewriter_open(rocksdb_sstfilewriter_t* writer, const char* name, char** errptr) { SaveError(errptr, writer->rep->Open(std::string(name))); } void rocksdb_sstfilewriter_add(rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, writer->rep->Put(Slice(key, keylen), Slice(val, vallen))); } void rocksdb_sstfilewriter_put(rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, writer->rep->Put(Slice(key, keylen), Slice(val, vallen))); } void rocksdb_sstfilewriter_merge(rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, writer->rep->Merge(Slice(key, keylen), Slice(val, vallen))); } void rocksdb_sstfilewriter_delete(rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, char** errptr) { SaveError(errptr, writer->rep->Delete(Slice(key, keylen))); } void rocksdb_sstfilewriter_finish(rocksdb_sstfilewriter_t* writer, char** errptr) { SaveError(errptr, writer->rep->Finish(nullptr)); } void rocksdb_sstfilewriter_file_size(rocksdb_sstfilewriter_t* writer, uint64_t* file_size) { *file_size = writer->rep->FileSize(); } void rocksdb_sstfilewriter_destroy(rocksdb_sstfilewriter_t* writer) { delete writer->rep; delete writer; } rocksdb_ingestexternalfileoptions_t* rocksdb_ingestexternalfileoptions_create() { rocksdb_ingestexternalfileoptions_t* opt = new rocksdb_ingestexternalfileoptions_t; return opt; } void rocksdb_ingestexternalfileoptions_set_move_files( rocksdb_ingestexternalfileoptions_t* opt, unsigned char move_files) { opt->rep.move_files = move_files; } void rocksdb_ingestexternalfileoptions_set_snapshot_consistency( rocksdb_ingestexternalfileoptions_t* opt, unsigned char snapshot_consistency) { opt->rep.snapshot_consistency = snapshot_consistency; } void rocksdb_ingestexternalfileoptions_set_allow_global_seqno( rocksdb_ingestexternalfileoptions_t* opt, unsigned char allow_global_seqno) { opt->rep.allow_global_seqno = allow_global_seqno; } void rocksdb_ingestexternalfileoptions_set_allow_blocking_flush( rocksdb_ingestexternalfileoptions_t* opt, unsigned char allow_blocking_flush) { opt->rep.allow_blocking_flush = allow_blocking_flush; } void rocksdb_ingestexternalfileoptions_set_ingest_behind( rocksdb_ingestexternalfileoptions_t* opt, unsigned char ingest_behind) { opt->rep.ingest_behind = ingest_behind; } void rocksdb_ingestexternalfileoptions_destroy( rocksdb_ingestexternalfileoptions_t* opt) { delete opt; } void rocksdb_ingest_external_file( rocksdb_t* db, const char* const* file_list, const size_t list_len, const rocksdb_ingestexternalfileoptions_t* opt, char** errptr) { std::vector files(list_len); for (size_t i = 0; i < list_len; ++i) { files[i] = std::string(file_list[i]); } SaveError(errptr, db->rep->IngestExternalFile(files, opt->rep)); } void rocksdb_ingest_external_file_cf( rocksdb_t* db, rocksdb_column_family_handle_t* handle, const char* const* file_list, const size_t list_len, const rocksdb_ingestexternalfileoptions_t* opt, char** errptr) { std::vector files(list_len); for (size_t i = 0; i < list_len; ++i) { files[i] = std::string(file_list[i]); } SaveError(errptr, db->rep->IngestExternalFile(handle->rep, files, opt->rep)); } void rocksdb_try_catch_up_with_primary(rocksdb_t* db, char** errptr) { SaveError(errptr, db->rep->TryCatchUpWithPrimary()); } rocksdb_slicetransform_t* rocksdb_slicetransform_create( void* state, void (*destructor)(void*), char* (*transform)( void*, const char* key, size_t length, size_t* dst_length), unsigned char (*in_domain)( void*, const char* key, size_t length), unsigned char (*in_range)( void*, const char* key, size_t length), const char* (*name)(void*)) { rocksdb_slicetransform_t* result = new rocksdb_slicetransform_t; result->state_ = state; result->destructor_ = destructor; result->transform_ = transform; result->in_domain_ = in_domain; result->in_range_ = in_range; result->name_ = name; return result; } void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t* st) { delete st; } struct Wrapper : public rocksdb_slicetransform_t { const SliceTransform* rep_; ~Wrapper() override { delete rep_; } const char* Name() const override { return rep_->Name(); } Slice Transform(const Slice& src) const override { return rep_->Transform(src); } bool InDomain(const Slice& src) const override { return rep_->InDomain(src); } bool InRange(const Slice& src) const override { return rep_->InRange(src); } static void DoNothing(void*) { } }; rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t prefixLen) { Wrapper* wrapper = new Wrapper; wrapper->rep_ = ROCKSDB_NAMESPACE::NewFixedPrefixTransform(prefixLen); wrapper->state_ = nullptr; wrapper->destructor_ = &Wrapper::DoNothing; return wrapper; } rocksdb_slicetransform_t* rocksdb_slicetransform_create_noop() { Wrapper* wrapper = new Wrapper; wrapper->rep_ = ROCKSDB_NAMESPACE::NewNoopTransform(); wrapper->state_ = nullptr; wrapper->destructor_ = &Wrapper::DoNothing; return wrapper; } rocksdb_universal_compaction_options_t* rocksdb_universal_compaction_options_create() { rocksdb_universal_compaction_options_t* result = new rocksdb_universal_compaction_options_t; result->rep = new ROCKSDB_NAMESPACE::CompactionOptionsUniversal; return result; } void rocksdb_universal_compaction_options_set_size_ratio( rocksdb_universal_compaction_options_t* uco, int ratio) { uco->rep->size_ratio = ratio; } void rocksdb_universal_compaction_options_set_min_merge_width( rocksdb_universal_compaction_options_t* uco, int w) { uco->rep->min_merge_width = w; } void rocksdb_universal_compaction_options_set_max_merge_width( rocksdb_universal_compaction_options_t* uco, int w) { uco->rep->max_merge_width = w; } void rocksdb_universal_compaction_options_set_max_size_amplification_percent( rocksdb_universal_compaction_options_t* uco, int p) { uco->rep->max_size_amplification_percent = p; } void rocksdb_universal_compaction_options_set_compression_size_percent( rocksdb_universal_compaction_options_t* uco, int p) { uco->rep->compression_size_percent = p; } void rocksdb_universal_compaction_options_set_stop_style( rocksdb_universal_compaction_options_t* uco, int style) { uco->rep->stop_style = static_cast(style); } void rocksdb_universal_compaction_options_destroy( rocksdb_universal_compaction_options_t* uco) { delete uco->rep; delete uco; } rocksdb_fifo_compaction_options_t* rocksdb_fifo_compaction_options_create() { rocksdb_fifo_compaction_options_t* result = new rocksdb_fifo_compaction_options_t; result->rep = CompactionOptionsFIFO(); return result; } void rocksdb_fifo_compaction_options_set_max_table_files_size( rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size) { fifo_opts->rep.max_table_files_size = size; } void rocksdb_fifo_compaction_options_destroy( rocksdb_fifo_compaction_options_t* fifo_opts) { delete fifo_opts; } void rocksdb_options_set_min_level_to_compress(rocksdb_options_t* opt, int level) { if (level >= 0) { assert(level <= opt->rep.num_levels); opt->rep.compression_per_level.resize(opt->rep.num_levels); for (int i = 0; i < level; i++) { opt->rep.compression_per_level[i] = ROCKSDB_NAMESPACE::kNoCompression; } for (int i = level; i < opt->rep.num_levels; i++) { opt->rep.compression_per_level[i] = opt->rep.compression; } } } int rocksdb_livefiles_count( const rocksdb_livefiles_t* lf) { return static_cast(lf->rep.size()); } const char* rocksdb_livefiles_name( const rocksdb_livefiles_t* lf, int index) { return lf->rep[index].name.c_str(); } int rocksdb_livefiles_level( const rocksdb_livefiles_t* lf, int index) { return lf->rep[index].level; } size_t rocksdb_livefiles_size( const rocksdb_livefiles_t* lf, int index) { return lf->rep[index].size; } const char* rocksdb_livefiles_smallestkey( const rocksdb_livefiles_t* lf, int index, size_t* size) { *size = lf->rep[index].smallestkey.size(); return lf->rep[index].smallestkey.data(); } const char* rocksdb_livefiles_largestkey( const rocksdb_livefiles_t* lf, int index, size_t* size) { *size = lf->rep[index].largestkey.size(); return lf->rep[index].largestkey.data(); } uint64_t rocksdb_livefiles_entries( const rocksdb_livefiles_t* lf, int index) { return lf->rep[index].num_entries; } uint64_t rocksdb_livefiles_deletions( const rocksdb_livefiles_t* lf, int index) { return lf->rep[index].num_deletions; } extern void rocksdb_livefiles_destroy( const rocksdb_livefiles_t* lf) { delete lf; } void rocksdb_get_options_from_string(const rocksdb_options_t* base_options, const char* opts_str, rocksdb_options_t* new_options, char** errptr) { SaveError(errptr, GetOptionsFromString(base_options->rep, std::string(opts_str), &new_options->rep)); } void rocksdb_delete_file_in_range(rocksdb_t* db, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len, char** errptr) { Slice a, b; SaveError( errptr, DeleteFilesInRange( db->rep, db->rep->DefaultColumnFamily(), (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr), (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr))); } void rocksdb_delete_file_in_range_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len, char** errptr) { Slice a, b; SaveError( errptr, DeleteFilesInRange( db->rep, column_family->rep, (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr), (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr))); } rocksdb_transactiondb_options_t* rocksdb_transactiondb_options_create() { return new rocksdb_transactiondb_options_t; } void rocksdb_transactiondb_options_destroy(rocksdb_transactiondb_options_t* opt){ delete opt; } void rocksdb_transactiondb_options_set_max_num_locks( rocksdb_transactiondb_options_t* opt, int64_t max_num_locks) { opt->rep.max_num_locks = max_num_locks; } void rocksdb_transactiondb_options_set_num_stripes( rocksdb_transactiondb_options_t* opt, size_t num_stripes) { opt->rep.num_stripes = num_stripes; } void rocksdb_transactiondb_options_set_transaction_lock_timeout( rocksdb_transactiondb_options_t* opt, int64_t txn_lock_timeout) { opt->rep.transaction_lock_timeout = txn_lock_timeout; } void rocksdb_transactiondb_options_set_default_lock_timeout( rocksdb_transactiondb_options_t* opt, int64_t default_lock_timeout) { opt->rep.default_lock_timeout = default_lock_timeout; } rocksdb_transaction_options_t* rocksdb_transaction_options_create() { return new rocksdb_transaction_options_t; } void rocksdb_transaction_options_destroy(rocksdb_transaction_options_t* opt) { delete opt; } void rocksdb_transaction_options_set_set_snapshot( rocksdb_transaction_options_t* opt, unsigned char v) { opt->rep.set_snapshot = v; } void rocksdb_transaction_options_set_deadlock_detect( rocksdb_transaction_options_t* opt, unsigned char v) { opt->rep.deadlock_detect = v; } void rocksdb_transaction_options_set_lock_timeout( rocksdb_transaction_options_t* opt, int64_t lock_timeout) { opt->rep.lock_timeout = lock_timeout; } void rocksdb_transaction_options_set_expiration( rocksdb_transaction_options_t* opt, int64_t expiration) { opt->rep.expiration = expiration; } void rocksdb_transaction_options_set_deadlock_detect_depth( rocksdb_transaction_options_t* opt, int64_t depth) { opt->rep.deadlock_detect_depth = depth; } void rocksdb_transaction_options_set_max_write_batch_size( rocksdb_transaction_options_t* opt, size_t size) { opt->rep.max_write_batch_size = size; } rocksdb_optimistictransaction_options_t* rocksdb_optimistictransaction_options_create() { return new rocksdb_optimistictransaction_options_t; } void rocksdb_optimistictransaction_options_destroy( rocksdb_optimistictransaction_options_t* opt) { delete opt; } void rocksdb_optimistictransaction_options_set_set_snapshot( rocksdb_optimistictransaction_options_t* opt, unsigned char v) { opt->rep.set_snapshot = v; } rocksdb_column_family_handle_t* rocksdb_transactiondb_create_column_family( rocksdb_transactiondb_t* txn_db, const rocksdb_options_t* column_family_options, const char* column_family_name, char** errptr) { rocksdb_column_family_handle_t* handle = new rocksdb_column_family_handle_t; SaveError(errptr, txn_db->rep->CreateColumnFamily( ColumnFamilyOptions(column_family_options->rep), std::string(column_family_name), &(handle->rep))); return handle; } rocksdb_transactiondb_t* rocksdb_transactiondb_open( const rocksdb_options_t* options, const rocksdb_transactiondb_options_t* txn_db_options, const char* name, char** errptr) { TransactionDB* txn_db; if (SaveError(errptr, TransactionDB::Open(options->rep, txn_db_options->rep, std::string(name), &txn_db))) { return nullptr; } rocksdb_transactiondb_t* result = new rocksdb_transactiondb_t; result->rep = txn_db; return result; } rocksdb_transactiondb_t* rocksdb_transactiondb_open_column_families( const rocksdb_options_t* options, const rocksdb_transactiondb_options_t* txn_db_options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, char** errptr) { std::vector column_families; for (int i = 0; i < num_column_families; i++) { column_families.push_back(ColumnFamilyDescriptor( std::string(column_family_names[i]), ColumnFamilyOptions(column_family_options[i]->rep))); } TransactionDB* txn_db; std::vector handles; if (SaveError(errptr, TransactionDB::Open(options->rep, txn_db_options->rep, std::string(name), column_families, &handles, &txn_db))) { return nullptr; } for (size_t i = 0; i < handles.size(); i++) { rocksdb_column_family_handle_t* c_handle = new rocksdb_column_family_handle_t; c_handle->rep = handles[i]; column_family_handles[i] = c_handle; } rocksdb_transactiondb_t* result = new rocksdb_transactiondb_t; result->rep = txn_db; return result; } const rocksdb_snapshot_t* rocksdb_transactiondb_create_snapshot( rocksdb_transactiondb_t* txn_db) { rocksdb_snapshot_t* result = new rocksdb_snapshot_t; result->rep = txn_db->rep->GetSnapshot(); return result; } void rocksdb_transactiondb_release_snapshot( rocksdb_transactiondb_t* txn_db, const rocksdb_snapshot_t* snapshot) { txn_db->rep->ReleaseSnapshot(snapshot->rep); delete snapshot; } rocksdb_transaction_t* rocksdb_transaction_begin( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* write_options, const rocksdb_transaction_options_t* txn_options, rocksdb_transaction_t* old_txn) { if (old_txn == nullptr) { rocksdb_transaction_t* result = new rocksdb_transaction_t; result->rep = txn_db->rep->BeginTransaction(write_options->rep, txn_options->rep, nullptr); return result; } old_txn->rep = txn_db->rep->BeginTransaction(write_options->rep, txn_options->rep, old_txn->rep); return old_txn; } void rocksdb_transaction_commit(rocksdb_transaction_t* txn, char** errptr) { SaveError(errptr, txn->rep->Commit()); } void rocksdb_transaction_rollback(rocksdb_transaction_t* txn, char** errptr) { SaveError(errptr, txn->rep->Rollback()); } void rocksdb_transaction_set_savepoint(rocksdb_transaction_t* txn) { txn->rep->SetSavePoint(); } void rocksdb_transaction_rollback_to_savepoint(rocksdb_transaction_t* txn, char** errptr) { SaveError(errptr, txn->rep->RollbackToSavePoint()); } void rocksdb_transaction_destroy(rocksdb_transaction_t* txn) { delete txn->rep; delete txn; } const rocksdb_snapshot_t* rocksdb_transaction_get_snapshot( rocksdb_transaction_t* txn) { rocksdb_snapshot_t* result = new rocksdb_snapshot_t; result->rep = txn->rep->GetSnapshot(); return result; } // Read a key inside a transaction char* rocksdb_transaction_get(rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, const char* key, size_t klen, size_t* vlen, char** errptr) { char* result = nullptr; std::string tmp; Status s = txn->rep->Get(options->rep, Slice(key, klen), &tmp); if (s.ok()) { *vlen = tmp.size(); result = CopyString(tmp); } else { *vlen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } char* rocksdb_transaction_get_cf(rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, size_t* vlen, char** errptr) { char* result = nullptr; std::string tmp; Status s = txn->rep->Get(options->rep, column_family->rep, Slice(key, klen), &tmp); if (s.ok()) { *vlen = tmp.size(); result = CopyString(tmp); } else { *vlen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } // Read a key inside a transaction char* rocksdb_transaction_get_for_update(rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, const char* key, size_t klen, size_t* vlen, unsigned char exclusive, char** errptr) { char* result = nullptr; std::string tmp; Status s = txn->rep->GetForUpdate(options->rep, Slice(key, klen), &tmp, exclusive); if (s.ok()) { *vlen = tmp.size(); result = CopyString(tmp); } else { *vlen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } char* rocksdb_transaction_get_for_update_cf( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, size_t* vlen, unsigned char exclusive, char** errptr) { char* result = nullptr; std::string tmp; Status s = txn->rep->GetForUpdate(options->rep, column_family->rep, Slice(key, klen), &tmp, exclusive); if (s.ok()) { *vlen = tmp.size(); result = CopyString(tmp); } else { *vlen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } // Read a key outside a transaction char* rocksdb_transactiondb_get( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, const char* key, size_t klen, size_t* vlen, char** errptr){ char* result = nullptr; std::string tmp; Status s = txn_db->rep->Get(options->rep, Slice(key, klen), &tmp); if (s.ok()) { *vlen = tmp.size(); result = CopyString(tmp); } else { *vlen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } char* rocksdb_transactiondb_get_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr) { char* result = nullptr; std::string tmp; Status s = txn_db->rep->Get(options->rep, column_family->rep, Slice(key, keylen), &tmp); if (s.ok()) { *vallen = tmp.size(); result = CopyString(tmp); } else { *vallen = 0; if (!s.IsNotFound()) { SaveError(errptr, s); } } return result; } // Put a key inside a transaction void rocksdb_transaction_put(rocksdb_transaction_t* txn, const char* key, size_t klen, const char* val, size_t vlen, char** errptr) { SaveError(errptr, txn->rep->Put(Slice(key, klen), Slice(val, vlen))); } void rocksdb_transaction_put_cf(rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen, char** errptr) { SaveError(errptr, txn->rep->Put(column_family->rep, Slice(key, klen), Slice(val, vlen))); } // Put a key outside a transaction void rocksdb_transactiondb_put(rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, const char* key, size_t klen, const char* val, size_t vlen, char** errptr) { SaveError(errptr, txn_db->rep->Put(options->rep, Slice(key, klen), Slice(val, vlen))); } void rocksdb_transactiondb_put_cf(rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr) { SaveError(errptr, txn_db->rep->Put(options->rep, column_family->rep, Slice(key, keylen), Slice(val, vallen))); } // Write batch into transaction db void rocksdb_transactiondb_write( rocksdb_transactiondb_t* db, const rocksdb_writeoptions_t* options, rocksdb_writebatch_t* batch, char** errptr) { SaveError(errptr, db->rep->Write(options->rep, &batch->rep)); } // Merge a key inside a transaction void rocksdb_transaction_merge(rocksdb_transaction_t* txn, const char* key, size_t klen, const char* val, size_t vlen, char** errptr) { SaveError(errptr, txn->rep->Merge(Slice(key, klen), Slice(val, vlen))); } void rocksdb_transaction_merge_cf(rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen, char** errptr) { SaveError(errptr, txn->rep->Merge(column_family->rep, Slice(key, klen), Slice(val, vlen))); } // Merge a key outside a transaction void rocksdb_transactiondb_merge(rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, const char* key, size_t klen, const char* val, size_t vlen, char** errptr) { SaveError(errptr, txn_db->rep->Merge(options->rep, Slice(key, klen), Slice(val, vlen))); } void rocksdb_transactiondb_merge_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen, char** errptr) { SaveError(errptr, txn_db->rep->Merge(options->rep, column_family->rep, Slice(key, klen), Slice(val, vlen))); } // Delete a key inside a transaction void rocksdb_transaction_delete(rocksdb_transaction_t* txn, const char* key, size_t klen, char** errptr) { SaveError(errptr, txn->rep->Delete(Slice(key, klen))); } void rocksdb_transaction_delete_cf( rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, char** errptr) { SaveError(errptr, txn->rep->Delete(column_family->rep, Slice(key, klen))); } // Delete a key outside a transaction void rocksdb_transactiondb_delete(rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, const char* key, size_t klen, char** errptr) { SaveError(errptr, txn_db->rep->Delete(options->rep, Slice(key, klen))); } void rocksdb_transactiondb_delete_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, char** errptr) { SaveError(errptr, txn_db->rep->Delete(options->rep, column_family->rep, Slice(key, keylen))); } // Create an iterator inside a transaction rocksdb_iterator_t* rocksdb_transaction_create_iterator( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = txn->rep->GetIterator(options->rep); return result; } // Create an iterator inside a transaction with column family rocksdb_iterator_t* rocksdb_transaction_create_iterator_cf( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = txn->rep->GetIterator(options->rep, column_family->rep); return result; } // Create an iterator outside a transaction rocksdb_iterator_t* rocksdb_transactiondb_create_iterator( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = txn_db->rep->NewIterator(options->rep); return result; } rocksdb_iterator_t* rocksdb_transactiondb_create_iterator_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family) { rocksdb_iterator_t* result = new rocksdb_iterator_t; result->rep = txn_db->rep->NewIterator(options->rep, column_family->rep); return result; } void rocksdb_transactiondb_close(rocksdb_transactiondb_t* txn_db) { delete txn_db->rep; delete txn_db; } rocksdb_checkpoint_t* rocksdb_transactiondb_checkpoint_object_create( rocksdb_transactiondb_t* txn_db, char** errptr) { Checkpoint* checkpoint; if (SaveError(errptr, Checkpoint::Create(txn_db->rep, &checkpoint))) { return nullptr; } rocksdb_checkpoint_t* result = new rocksdb_checkpoint_t; result->rep = checkpoint; return result; } rocksdb_optimistictransactiondb_t* rocksdb_optimistictransactiondb_open( const rocksdb_options_t* options, const char* name, char** errptr) { OptimisticTransactionDB* otxn_db; if (SaveError(errptr, OptimisticTransactionDB::Open( options->rep, std::string(name), &otxn_db))) { return nullptr; } rocksdb_optimistictransactiondb_t* result = new rocksdb_optimistictransactiondb_t; result->rep = otxn_db; return result; } rocksdb_optimistictransactiondb_t* rocksdb_optimistictransactiondb_open_column_families( const rocksdb_options_t* db_options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, char** errptr) { std::vector column_families; for (int i = 0; i < num_column_families; i++) { column_families.push_back(ColumnFamilyDescriptor( std::string(column_family_names[i]), ColumnFamilyOptions(column_family_options[i]->rep))); } OptimisticTransactionDB* otxn_db; std::vector handles; if (SaveError(errptr, OptimisticTransactionDB::Open( DBOptions(db_options->rep), std::string(name), column_families, &handles, &otxn_db))) { return nullptr; } for (size_t i = 0; i < handles.size(); i++) { rocksdb_column_family_handle_t* c_handle = new rocksdb_column_family_handle_t; c_handle->rep = handles[i]; column_family_handles[i] = c_handle; } rocksdb_optimistictransactiondb_t* result = new rocksdb_optimistictransactiondb_t; result->rep = otxn_db; return result; } rocksdb_t* rocksdb_optimistictransactiondb_get_base_db( rocksdb_optimistictransactiondb_t* otxn_db) { DB* base_db = otxn_db->rep->GetBaseDB(); if (base_db != nullptr) { rocksdb_t* result = new rocksdb_t; result->rep = base_db; return result; } return nullptr; } void rocksdb_optimistictransactiondb_close_base_db(rocksdb_t* base_db) { delete base_db; } rocksdb_transaction_t* rocksdb_optimistictransaction_begin( rocksdb_optimistictransactiondb_t* otxn_db, const rocksdb_writeoptions_t* write_options, const rocksdb_optimistictransaction_options_t* otxn_options, rocksdb_transaction_t* old_txn) { if (old_txn == nullptr) { rocksdb_transaction_t* result = new rocksdb_transaction_t; result->rep = otxn_db->rep->BeginTransaction(write_options->rep, otxn_options->rep, nullptr); return result; } old_txn->rep = otxn_db->rep->BeginTransaction( write_options->rep, otxn_options->rep, old_txn->rep); return old_txn; } void rocksdb_optimistictransactiondb_close( rocksdb_optimistictransactiondb_t* otxn_db) { delete otxn_db->rep; delete otxn_db; } void rocksdb_free(void* ptr) { free(ptr); } rocksdb_pinnableslice_t* rocksdb_get_pinned( rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, size_t keylen, char** errptr) { rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t); Status s = db->rep->Get(options->rep, db->rep->DefaultColumnFamily(), Slice(key, keylen), &v->rep); if (!s.ok()) { delete (v); if (!s.IsNotFound()) { SaveError(errptr, s); } return nullptr; } return v; } rocksdb_pinnableslice_t* rocksdb_get_pinned_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, char** errptr) { rocksdb_pinnableslice_t* v = new (rocksdb_pinnableslice_t); Status s = db->rep->Get(options->rep, column_family->rep, Slice(key, keylen), &v->rep); if (!s.ok()) { delete v; if (!s.IsNotFound()) { SaveError(errptr, s); } return nullptr; } return v; } void rocksdb_pinnableslice_destroy(rocksdb_pinnableslice_t* v) { delete v; } const char* rocksdb_pinnableslice_value(const rocksdb_pinnableslice_t* v, size_t* vlen) { if (!v) { *vlen = 0; return nullptr; } *vlen = v->rep.size(); return v->rep.data(); } // container to keep databases and caches in order to use // ROCKSDB_NAMESPACE::MemoryUtil struct rocksdb_memory_consumers_t { std::vector dbs; std::unordered_set caches; }; // initializes new container of memory consumers rocksdb_memory_consumers_t* rocksdb_memory_consumers_create() { return new rocksdb_memory_consumers_t; } // adds datatabase to the container of memory consumers void rocksdb_memory_consumers_add_db(rocksdb_memory_consumers_t* consumers, rocksdb_t* db) { consumers->dbs.push_back(db); } // adds cache to the container of memory consumers void rocksdb_memory_consumers_add_cache(rocksdb_memory_consumers_t* consumers, rocksdb_cache_t* cache) { consumers->caches.insert(cache); } // deletes container with memory consumers void rocksdb_memory_consumers_destroy(rocksdb_memory_consumers_t* consumers) { delete consumers; } // contains memory usage statistics provided by ROCKSDB_NAMESPACE::MemoryUtil struct rocksdb_memory_usage_t { uint64_t mem_table_total; uint64_t mem_table_unflushed; uint64_t mem_table_readers_total; uint64_t cache_total; }; // estimates amount of memory occupied by consumers (dbs and caches) rocksdb_memory_usage_t* rocksdb_approximate_memory_usage_create( rocksdb_memory_consumers_t* consumers, char** errptr) { vector dbs; for (auto db : consumers->dbs) { dbs.push_back(db->rep); } unordered_set cache_set; for (auto cache : consumers->caches) { cache_set.insert(const_cast(cache->rep.get())); } std::map usage_by_type; auto status = MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set, &usage_by_type); if (SaveError(errptr, status)) { return nullptr; } auto result = new rocksdb_memory_usage_t; result->mem_table_total = usage_by_type[MemoryUtil::kMemTableTotal]; result->mem_table_unflushed = usage_by_type[MemoryUtil::kMemTableUnFlushed]; result->mem_table_readers_total = usage_by_type[MemoryUtil::kTableReadersTotal]; result->cache_total = usage_by_type[MemoryUtil::kCacheTotal]; return result; } uint64_t rocksdb_approximate_memory_usage_get_mem_table_total( rocksdb_memory_usage_t* memory_usage) { return memory_usage->mem_table_total; } uint64_t rocksdb_approximate_memory_usage_get_mem_table_unflushed( rocksdb_memory_usage_t* memory_usage) { return memory_usage->mem_table_unflushed; } uint64_t rocksdb_approximate_memory_usage_get_mem_table_readers_total( rocksdb_memory_usage_t* memory_usage) { return memory_usage->mem_table_readers_total; } uint64_t rocksdb_approximate_memory_usage_get_cache_total( rocksdb_memory_usage_t* memory_usage) { return memory_usage->cache_total; } void rocksdb_options_set_dump_malloc_stats(rocksdb_options_t* opt, unsigned char val) { opt->rep.dump_malloc_stats = val; } void rocksdb_options_set_memtable_whole_key_filtering(rocksdb_options_t* opt, unsigned char val) { opt->rep.memtable_whole_key_filtering = val; } // deletes container with memory usage estimates void rocksdb_approximate_memory_usage_destroy(rocksdb_memory_usage_t* usage) { delete usage; } void rocksdb_cancel_all_background_work(rocksdb_t* db, unsigned char wait) { CancelAllBackgroundWork(db->rep, wait); } } // end extern "C" #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/c_test.c000066400000000000000000002301341370372246700152620ustar00rootroot00000000000000/* Copyright (c) 2011 The LevelDB Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. See the AUTHORS file for names of contributors. */ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #ifndef ROCKSDB_LITE // Lite does not support C API #include "rocksdb/c.h" #include #include #include #include #ifndef OS_WIN #include #endif #include // Can not use port/port.h macros as this is a c file #ifdef OS_WIN #include // Ok for uniqueness int geteuid() { int result = 0; result = ((int)GetCurrentProcessId() << 16); result |= (int)GetCurrentThreadId(); return result; } // VS < 2015 #if defined(_MSC_VER) && (_MSC_VER < 1900) #define snprintf _snprintf #endif #endif const char* phase = ""; static char dbname[200]; static char sstfilename[200]; static char dbbackupname[200]; static char dbcheckpointname[200]; static char dbpathname[200]; static char secondary_path[200]; static void StartPhase(const char* name) { fprintf(stderr, "=== Test %s\n", name); phase = name; } #ifdef _MSC_VER #pragma warning(push) #pragma warning (disable: 4996) // getenv security warning #endif static const char* GetTempDir(void) { const char* ret = getenv("TEST_TMPDIR"); if (ret == NULL || ret[0] == '\0') ret = "/tmp"; return ret; } #ifdef _MSC_VER #pragma warning(pop) #endif #define CheckNoError(err) \ if ((err) != NULL) { \ fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, (err)); \ abort(); \ } #define CheckCondition(cond) \ if (!(cond)) { \ fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, #cond); \ abort(); \ } static void CheckEqual(const char* expected, const char* v, size_t n) { if (expected == NULL && v == NULL) { // ok } else if (expected != NULL && v != NULL && n == strlen(expected) && memcmp(expected, v, n) == 0) { // ok return; } else { fprintf(stderr, "%s: expected '%s', got '%s'\n", phase, (expected ? expected : "(null)"), (v ? v : "(null")); abort(); } } static void Free(char** ptr) { if (*ptr) { free(*ptr); *ptr = NULL; } } static void CheckValue( char* err, const char* expected, char** actual, size_t actual_length) { CheckNoError(err); CheckEqual(expected, *actual, actual_length); Free(actual); } static void CheckGet( rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, const char* expected) { char* err = NULL; size_t val_len; char* val; val = rocksdb_get(db, options, key, strlen(key), &val_len, &err); CheckNoError(err); CheckEqual(expected, val, val_len); Free(&val); } static void CheckGetCF( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* handle, const char* key, const char* expected) { char* err = NULL; size_t val_len; char* val; val = rocksdb_get_cf(db, options, handle, key, strlen(key), &val_len, &err); CheckNoError(err); CheckEqual(expected, val, val_len); Free(&val); } static void CheckPinGet(rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, const char* expected) { char* err = NULL; size_t val_len; const char* val; rocksdb_pinnableslice_t* p; p = rocksdb_get_pinned(db, options, key, strlen(key), &err); CheckNoError(err); val = rocksdb_pinnableslice_value(p, &val_len); CheckEqual(expected, val, val_len); rocksdb_pinnableslice_destroy(p); } static void CheckPinGetCF(rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* handle, const char* key, const char* expected) { char* err = NULL; size_t val_len; const char* val; rocksdb_pinnableslice_t* p; p = rocksdb_get_pinned_cf(db, options, handle, key, strlen(key), &err); CheckNoError(err); val = rocksdb_pinnableslice_value(p, &val_len); CheckEqual(expected, val, val_len); rocksdb_pinnableslice_destroy(p); } static void CheckIter(rocksdb_iterator_t* iter, const char* key, const char* val) { size_t len; const char* str; str = rocksdb_iter_key(iter, &len); CheckEqual(key, str, len); str = rocksdb_iter_value(iter, &len); CheckEqual(val, str, len); } // Callback from rocksdb_writebatch_iterate() static void CheckPut(void* ptr, const char* k, size_t klen, const char* v, size_t vlen) { int* state = (int*) ptr; CheckCondition(*state < 2); switch (*state) { case 0: CheckEqual("bar", k, klen); CheckEqual("b", v, vlen); break; case 1: CheckEqual("box", k, klen); CheckEqual("c", v, vlen); break; } (*state)++; } // Callback from rocksdb_writebatch_iterate() static void CheckDel(void* ptr, const char* k, size_t klen) { int* state = (int*) ptr; CheckCondition(*state == 2); CheckEqual("bar", k, klen); (*state)++; } static void CmpDestroy(void* arg) { (void)arg; } static int CmpCompare(void* arg, const char* a, size_t alen, const char* b, size_t blen) { (void)arg; size_t n = (alen < blen) ? alen : blen; int r = memcmp(a, b, n); if (r == 0) { if (alen < blen) r = -1; else if (alen > blen) r = +1; } return r; } static const char* CmpName(void* arg) { (void)arg; return "foo"; } // Custom filter policy static unsigned char fake_filter_result = 1; static void FilterDestroy(void* arg) { (void)arg; } static const char* FilterName(void* arg) { (void)arg; return "TestFilter"; } static char* FilterCreate( void* arg, const char* const* key_array, const size_t* key_length_array, int num_keys, size_t* filter_length) { (void)arg; (void)key_array; (void)key_length_array; (void)num_keys; *filter_length = 4; char* result = malloc(4); memcpy(result, "fake", 4); return result; } static unsigned char FilterKeyMatch( void* arg, const char* key, size_t length, const char* filter, size_t filter_length) { (void)arg; (void)key; (void)length; CheckCondition(filter_length == 4); CheckCondition(memcmp(filter, "fake", 4) == 0); return fake_filter_result; } // Custom compaction filter static void CFilterDestroy(void* arg) { (void)arg; } static const char* CFilterName(void* arg) { (void)arg; return "foo"; } static unsigned char CFilterFilter(void* arg, int level, const char* key, size_t key_length, const char* existing_value, size_t value_length, char** new_value, size_t* new_value_length, unsigned char* value_changed) { (void)arg; (void)level; (void)existing_value; (void)value_length; if (key_length == 3) { if (memcmp(key, "bar", key_length) == 0) { return 1; } else if (memcmp(key, "baz", key_length) == 0) { *value_changed = 1; *new_value = "newbazvalue"; *new_value_length = 11; return 0; } } return 0; } static void CFilterFactoryDestroy(void* arg) { (void)arg; } static const char* CFilterFactoryName(void* arg) { (void)arg; return "foo"; } static rocksdb_compactionfilter_t* CFilterCreate( void* arg, rocksdb_compactionfiltercontext_t* context) { (void)arg; (void)context; return rocksdb_compactionfilter_create(NULL, CFilterDestroy, CFilterFilter, CFilterName); } static rocksdb_t* CheckCompaction(rocksdb_t* db, rocksdb_options_t* options, rocksdb_readoptions_t* roptions, rocksdb_writeoptions_t* woptions) { char* err = NULL; db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo", 3, "foovalue", 8, &err); CheckNoError(err); CheckGet(db, roptions, "foo", "foovalue"); rocksdb_put(db, woptions, "bar", 3, "barvalue", 8, &err); CheckNoError(err); CheckGet(db, roptions, "bar", "barvalue"); rocksdb_put(db, woptions, "baz", 3, "bazvalue", 8, &err); CheckNoError(err); CheckGet(db, roptions, "baz", "bazvalue"); // Force compaction rocksdb_compact_range(db, NULL, 0, NULL, 0); // should have filtered bar, but not foo CheckGet(db, roptions, "foo", "foovalue"); CheckGet(db, roptions, "bar", NULL); CheckGet(db, roptions, "baz", "newbazvalue"); return db; } // Custom merge operator static void MergeOperatorDestroy(void* arg) { (void)arg; } static const char* MergeOperatorName(void* arg) { (void)arg; return "TestMergeOperator"; } static char* MergeOperatorFullMerge( void* arg, const char* key, size_t key_length, const char* existing_value, size_t existing_value_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length) { (void)arg; (void)key; (void)key_length; (void)existing_value; (void)existing_value_length; (void)operands_list; (void)operands_list_length; (void)num_operands; *new_value_length = 4; *success = 1; char* result = malloc(4); memcpy(result, "fake", 4); return result; } static char* MergeOperatorPartialMerge( void* arg, const char* key, size_t key_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length) { (void)arg; (void)key; (void)key_length; (void)operands_list; (void)operands_list_length; (void)num_operands; *new_value_length = 4; *success = 1; char* result = malloc(4); memcpy(result, "fake", 4); return result; } static void CheckTxnGet( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, const char* key, const char* expected) { char* err = NULL; size_t val_len; char* val; val = rocksdb_transaction_get(txn, options, key, strlen(key), &val_len, &err); CheckNoError(err); CheckEqual(expected, val, val_len); Free(&val); } static void CheckTxnGetCF(rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, const char* expected) { char* err = NULL; size_t val_len; char* val; val = rocksdb_transaction_get_cf(txn, options, column_family, key, strlen(key), &val_len, &err); CheckNoError(err); CheckEqual(expected, val, val_len); Free(&val); } static void CheckTxnDBGet( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, const char* key, const char* expected) { char* err = NULL; size_t val_len; char* val; val = rocksdb_transactiondb_get(txn_db, options, key, strlen(key), &val_len, &err); CheckNoError(err); CheckEqual(expected, val, val_len); Free(&val); } static void CheckTxnDBGetCF(rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, const char* expected) { char* err = NULL; size_t val_len; char* val; val = rocksdb_transactiondb_get_cf(txn_db, options, column_family, key, strlen(key), &val_len, &err); CheckNoError(err); CheckEqual(expected, val, val_len); Free(&val); } int main(int argc, char** argv) { (void)argc; (void)argv; rocksdb_t* db; rocksdb_comparator_t* cmp; rocksdb_cache_t* cache; rocksdb_dbpath_t *dbpath; rocksdb_env_t* env; rocksdb_options_t* options; rocksdb_compactoptions_t* coptions; rocksdb_block_based_table_options_t* table_options; rocksdb_readoptions_t* roptions; rocksdb_writeoptions_t* woptions; rocksdb_ratelimiter_t* rate_limiter; rocksdb_transactiondb_t* txn_db; rocksdb_transactiondb_options_t* txn_db_options; rocksdb_transaction_t* txn; rocksdb_transaction_options_t* txn_options; rocksdb_optimistictransactiondb_t* otxn_db; rocksdb_optimistictransaction_options_t* otxn_options; char* err = NULL; int run = -1; snprintf(dbname, sizeof(dbname), "%s/rocksdb_c_test-%d", GetTempDir(), ((int) geteuid())); snprintf(dbbackupname, sizeof(dbbackupname), "%s/rocksdb_c_test-%d-backup", GetTempDir(), ((int) geteuid())); snprintf(dbcheckpointname, sizeof(dbcheckpointname), "%s/rocksdb_c_test-%d-checkpoint", GetTempDir(), ((int) geteuid())); snprintf(sstfilename, sizeof(sstfilename), "%s/rocksdb_c_test-%d-sst", GetTempDir(), ((int)geteuid())); snprintf(dbpathname, sizeof(dbpathname), "%s/rocksdb_c_test-%d-dbpath", GetTempDir(), ((int) geteuid())); StartPhase("create_objects"); cmp = rocksdb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName); dbpath = rocksdb_dbpath_create(dbpathname, 1024 * 1024); env = rocksdb_create_default_env(); cache = rocksdb_cache_create_lru(100000); options = rocksdb_options_create(); rocksdb_options_set_comparator(options, cmp); rocksdb_options_set_error_if_exists(options, 1); rocksdb_options_set_env(options, env); rocksdb_options_set_info_log(options, NULL); rocksdb_options_set_write_buffer_size(options, 100000); rocksdb_options_set_paranoid_checks(options, 1); rocksdb_options_set_max_open_files(options, 10); rocksdb_options_set_base_background_compactions(options, 1); table_options = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(table_options, cache); rocksdb_block_based_options_set_data_block_index_type(table_options, 1); rocksdb_block_based_options_set_data_block_hash_ratio(table_options, 0.75); rocksdb_options_set_block_based_table_factory(options, table_options); rocksdb_options_set_compression(options, rocksdb_no_compression); rocksdb_options_set_compression_options(options, -14, -1, 0, 0); int compression_levels[] = {rocksdb_no_compression, rocksdb_no_compression, rocksdb_no_compression, rocksdb_no_compression}; rocksdb_options_set_compression_per_level(options, compression_levels, 4); rate_limiter = rocksdb_ratelimiter_create(1000 * 1024 * 1024, 100 * 1000, 10); rocksdb_options_set_ratelimiter(options, rate_limiter); rocksdb_ratelimiter_destroy(rate_limiter); roptions = rocksdb_readoptions_create(); rocksdb_readoptions_set_verify_checksums(roptions, 1); rocksdb_readoptions_set_fill_cache(roptions, 1); woptions = rocksdb_writeoptions_create(); rocksdb_writeoptions_set_sync(woptions, 1); coptions = rocksdb_compactoptions_create(); rocksdb_compactoptions_set_exclusive_manual_compaction(coptions, 1); StartPhase("destroy"); rocksdb_destroy_db(options, dbname, &err); Free(&err); StartPhase("open_error"); rocksdb_open(options, dbname, &err); CheckCondition(err != NULL); Free(&err); StartPhase("open"); rocksdb_options_set_create_if_missing(options, 1); db = rocksdb_open(options, dbname, &err); CheckNoError(err); CheckGet(db, roptions, "foo", NULL); StartPhase("put"); rocksdb_put(db, woptions, "foo", 3, "hello", 5, &err); CheckNoError(err); CheckGet(db, roptions, "foo", "hello"); StartPhase("backup_and_restore"); { rocksdb_destroy_db(options, dbbackupname, &err); CheckNoError(err); rocksdb_backup_engine_t *be = rocksdb_backup_engine_open(options, dbbackupname, &err); CheckNoError(err); rocksdb_backup_engine_create_new_backup(be, db, &err); CheckNoError(err); // need a change to trigger a new backup rocksdb_delete(db, woptions, "does-not-exist", 14, &err); CheckNoError(err); rocksdb_backup_engine_create_new_backup(be, db, &err); CheckNoError(err); const rocksdb_backup_engine_info_t* bei = rocksdb_backup_engine_get_backup_info(be); CheckCondition(rocksdb_backup_engine_info_count(bei) > 1); rocksdb_backup_engine_info_destroy(bei); rocksdb_backup_engine_purge_old_backups(be, 1, &err); CheckNoError(err); bei = rocksdb_backup_engine_get_backup_info(be); CheckCondition(rocksdb_backup_engine_info_count(bei) == 1); rocksdb_backup_engine_info_destroy(bei); rocksdb_delete(db, woptions, "foo", 3, &err); CheckNoError(err); rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); CheckNoError(err); rocksdb_restore_options_t *restore_options = rocksdb_restore_options_create(); rocksdb_restore_options_set_keep_log_files(restore_options, 0); rocksdb_backup_engine_restore_db_from_latest_backup(be, dbname, dbname, restore_options, &err); CheckNoError(err); rocksdb_restore_options_destroy(restore_options); rocksdb_options_set_error_if_exists(options, 0); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_options_set_error_if_exists(options, 1); CheckGet(db, roptions, "foo", "hello"); rocksdb_backup_engine_close(be); } StartPhase("checkpoint"); { rocksdb_destroy_db(options, dbcheckpointname, &err); CheckNoError(err); rocksdb_checkpoint_t* checkpoint = rocksdb_checkpoint_object_create(db, &err); CheckNoError(err); rocksdb_checkpoint_create(checkpoint, dbcheckpointname, 0, &err); CheckNoError(err); // start a new database from the checkpoint rocksdb_close(db); rocksdb_options_set_error_if_exists(options, 0); db = rocksdb_open(options, dbcheckpointname, &err); CheckNoError(err); CheckGet(db, roptions, "foo", "hello"); rocksdb_checkpoint_object_destroy(checkpoint); rocksdb_close(db); rocksdb_destroy_db(options, dbcheckpointname, &err); CheckNoError(err); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_options_set_error_if_exists(options, 1); } StartPhase("compactall"); rocksdb_compact_range(db, NULL, 0, NULL, 0); CheckGet(db, roptions, "foo", "hello"); StartPhase("compactrange"); rocksdb_compact_range(db, "a", 1, "z", 1); CheckGet(db, roptions, "foo", "hello"); StartPhase("compactallopt"); rocksdb_compact_range_opt(db, coptions, NULL, 0, NULL, 0); CheckGet(db, roptions, "foo", "hello"); StartPhase("compactrangeopt"); rocksdb_compact_range_opt(db, coptions, "a", 1, "z", 1); CheckGet(db, roptions, "foo", "hello"); // Simple check cache usage StartPhase("cache_usage"); { rocksdb_readoptions_set_pin_data(roptions, 1); rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions); rocksdb_iter_seek(iter, "foo", 3); size_t usage = rocksdb_cache_get_usage(cache); CheckCondition(usage > 0); size_t pin_usage = rocksdb_cache_get_pinned_usage(cache); CheckCondition(pin_usage > 0); rocksdb_iter_next(iter); rocksdb_iter_destroy(iter); rocksdb_readoptions_set_pin_data(roptions, 0); } StartPhase("addfile"); { rocksdb_envoptions_t* env_opt = rocksdb_envoptions_create(); rocksdb_options_t* io_options = rocksdb_options_create(); rocksdb_sstfilewriter_t* writer = rocksdb_sstfilewriter_create(env_opt, io_options); remove(sstfilename); rocksdb_sstfilewriter_open(writer, sstfilename, &err); CheckNoError(err); rocksdb_sstfilewriter_put(writer, "sstk1", 5, "v1", 2, &err); CheckNoError(err); rocksdb_sstfilewriter_put(writer, "sstk2", 5, "v2", 2, &err); CheckNoError(err); rocksdb_sstfilewriter_put(writer, "sstk3", 5, "v3", 2, &err); CheckNoError(err); rocksdb_sstfilewriter_finish(writer, &err); CheckNoError(err); rocksdb_ingestexternalfileoptions_t* ing_opt = rocksdb_ingestexternalfileoptions_create(); const char* file_list[1] = {sstfilename}; rocksdb_ingest_external_file(db, file_list, 1, ing_opt, &err); CheckNoError(err); CheckGet(db, roptions, "sstk1", "v1"); CheckGet(db, roptions, "sstk2", "v2"); CheckGet(db, roptions, "sstk3", "v3"); remove(sstfilename); rocksdb_sstfilewriter_open(writer, sstfilename, &err); CheckNoError(err); rocksdb_sstfilewriter_put(writer, "sstk2", 5, "v4", 2, &err); CheckNoError(err); rocksdb_sstfilewriter_put(writer, "sstk22", 6, "v5", 2, &err); CheckNoError(err); rocksdb_sstfilewriter_put(writer, "sstk3", 5, "v6", 2, &err); CheckNoError(err); rocksdb_sstfilewriter_finish(writer, &err); CheckNoError(err); rocksdb_ingest_external_file(db, file_list, 1, ing_opt, &err); CheckNoError(err); CheckGet(db, roptions, "sstk1", "v1"); CheckGet(db, roptions, "sstk2", "v4"); CheckGet(db, roptions, "sstk22", "v5"); CheckGet(db, roptions, "sstk3", "v6"); rocksdb_ingestexternalfileoptions_destroy(ing_opt); rocksdb_sstfilewriter_destroy(writer); rocksdb_options_destroy(io_options); rocksdb_envoptions_destroy(env_opt); // Delete all keys we just ingested rocksdb_delete(db, woptions, "sstk1", 5, &err); CheckNoError(err); rocksdb_delete(db, woptions, "sstk2", 5, &err); CheckNoError(err); rocksdb_delete(db, woptions, "sstk22", 6, &err); CheckNoError(err); rocksdb_delete(db, woptions, "sstk3", 5, &err); CheckNoError(err); } StartPhase("writebatch"); { rocksdb_writebatch_t* wb = rocksdb_writebatch_create(); rocksdb_writebatch_put(wb, "foo", 3, "a", 1); rocksdb_writebatch_clear(wb); rocksdb_writebatch_put(wb, "bar", 3, "b", 1); rocksdb_writebatch_put(wb, "box", 3, "c", 1); rocksdb_writebatch_delete(wb, "bar", 3); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "foo", "hello"); CheckGet(db, roptions, "bar", NULL); CheckGet(db, roptions, "box", "c"); int pos = 0; rocksdb_writebatch_iterate(wb, &pos, CheckPut, CheckDel); CheckCondition(pos == 3); rocksdb_writebatch_clear(wb); rocksdb_writebatch_put(wb, "bar", 3, "b", 1); rocksdb_writebatch_put(wb, "bay", 3, "d", 1); rocksdb_writebatch_delete_range(wb, "bar", 3, "bay", 3); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "bar", NULL); CheckGet(db, roptions, "bay", "d"); rocksdb_writebatch_clear(wb); const char* start_list[1] = {"bay"}; const size_t start_sizes[1] = {3}; const char* end_list[1] = {"baz"}; const size_t end_sizes[1] = {3}; rocksdb_writebatch_delete_rangev(wb, 1, start_list, start_sizes, end_list, end_sizes); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "bay", NULL); rocksdb_writebatch_destroy(wb); } StartPhase("writebatch_vectors"); { rocksdb_writebatch_t* wb = rocksdb_writebatch_create(); const char* k_list[2] = { "z", "ap" }; const size_t k_sizes[2] = { 1, 2 }; const char* v_list[3] = { "x", "y", "z" }; const size_t v_sizes[3] = { 1, 1, 1 }; rocksdb_writebatch_putv(wb, 2, k_list, k_sizes, 3, v_list, v_sizes); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "zap", "xyz"); rocksdb_writebatch_delete(wb, "zap", 3); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "zap", NULL); rocksdb_writebatch_destroy(wb); } StartPhase("writebatch_savepoint"); { rocksdb_writebatch_t* wb = rocksdb_writebatch_create(); rocksdb_writebatch_set_save_point(wb); rocksdb_writebatch_set_save_point(wb); const char* k_list[2] = {"z", "ap"}; const size_t k_sizes[2] = {1, 2}; const char* v_list[3] = {"x", "y", "z"}; const size_t v_sizes[3] = {1, 1, 1}; rocksdb_writebatch_pop_save_point(wb, &err); CheckNoError(err); rocksdb_writebatch_putv(wb, 2, k_list, k_sizes, 3, v_list, v_sizes); rocksdb_writebatch_rollback_to_save_point(wb, &err); CheckNoError(err); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "zap", NULL); rocksdb_writebatch_destroy(wb); } StartPhase("writebatch_rep"); { rocksdb_writebatch_t* wb1 = rocksdb_writebatch_create(); rocksdb_writebatch_put(wb1, "baz", 3, "d", 1); rocksdb_writebatch_put(wb1, "quux", 4, "e", 1); rocksdb_writebatch_delete(wb1, "quux", 4); size_t repsize1 = 0; const char* rep = rocksdb_writebatch_data(wb1, &repsize1); rocksdb_writebatch_t* wb2 = rocksdb_writebatch_create_from(rep, repsize1); CheckCondition(rocksdb_writebatch_count(wb1) == rocksdb_writebatch_count(wb2)); size_t repsize2 = 0; CheckCondition( memcmp(rep, rocksdb_writebatch_data(wb2, &repsize2), repsize1) == 0); rocksdb_writebatch_destroy(wb1); rocksdb_writebatch_destroy(wb2); } StartPhase("writebatch_wi"); { rocksdb_writebatch_wi_t* wbi = rocksdb_writebatch_wi_create(0, 1); rocksdb_writebatch_wi_put(wbi, "foo", 3, "a", 1); rocksdb_writebatch_wi_clear(wbi); rocksdb_writebatch_wi_put(wbi, "bar", 3, "b", 1); rocksdb_writebatch_wi_put(wbi, "box", 3, "c", 1); rocksdb_writebatch_wi_delete(wbi, "bar", 3); int count = rocksdb_writebatch_wi_count(wbi); CheckCondition(count == 3); size_t size; char* value; value = rocksdb_writebatch_wi_get_from_batch(wbi, options, "box", 3, &size, &err); CheckValue(err, "c", &value, size); value = rocksdb_writebatch_wi_get_from_batch(wbi, options, "bar", 3, &size, &err); CheckValue(err, NULL, &value, size); value = rocksdb_writebatch_wi_get_from_batch_and_db(wbi, db, roptions, "foo", 3, &size, &err); CheckValue(err, "hello", &value, size); value = rocksdb_writebatch_wi_get_from_batch_and_db(wbi, db, roptions, "box", 3, &size, &err); CheckValue(err, "c", &value, size); rocksdb_write_writebatch_wi(db, woptions, wbi, &err); CheckNoError(err); CheckGet(db, roptions, "foo", "hello"); CheckGet(db, roptions, "bar", NULL); CheckGet(db, roptions, "box", "c"); int pos = 0; rocksdb_writebatch_wi_iterate(wbi, &pos, CheckPut, CheckDel); CheckCondition(pos == 3); rocksdb_writebatch_wi_clear(wbi); rocksdb_writebatch_wi_destroy(wbi); } StartPhase("writebatch_wi_vectors"); { rocksdb_writebatch_wi_t* wb = rocksdb_writebatch_wi_create(0, 1); const char* k_list[2] = { "z", "ap" }; const size_t k_sizes[2] = { 1, 2 }; const char* v_list[3] = { "x", "y", "z" }; const size_t v_sizes[3] = { 1, 1, 1 }; rocksdb_writebatch_wi_putv(wb, 2, k_list, k_sizes, 3, v_list, v_sizes); rocksdb_write_writebatch_wi(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "zap", "xyz"); rocksdb_writebatch_wi_delete(wb, "zap", 3); rocksdb_write_writebatch_wi(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "zap", NULL); rocksdb_writebatch_wi_destroy(wb); } StartPhase("writebatch_wi_savepoint"); { rocksdb_writebatch_wi_t* wb = rocksdb_writebatch_wi_create(0, 1); rocksdb_writebatch_wi_set_save_point(wb); const char* k_list[2] = {"z", "ap"}; const size_t k_sizes[2] = {1, 2}; const char* v_list[3] = {"x", "y", "z"}; const size_t v_sizes[3] = {1, 1, 1}; rocksdb_writebatch_wi_putv(wb, 2, k_list, k_sizes, 3, v_list, v_sizes); rocksdb_writebatch_wi_rollback_to_save_point(wb, &err); CheckNoError(err); rocksdb_write_writebatch_wi(db, woptions, wb, &err); CheckNoError(err); CheckGet(db, roptions, "zap", NULL); rocksdb_writebatch_wi_destroy(wb); } StartPhase("iter"); { rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "box", "c"); rocksdb_iter_next(iter); CheckIter(iter, "foo", "hello"); rocksdb_iter_prev(iter); CheckIter(iter, "box", "c"); rocksdb_iter_prev(iter); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_last(iter); CheckIter(iter, "foo", "hello"); rocksdb_iter_seek(iter, "b", 1); CheckIter(iter, "box", "c"); rocksdb_iter_seek_for_prev(iter, "g", 1); CheckIter(iter, "foo", "hello"); rocksdb_iter_seek_for_prev(iter, "box", 3); CheckIter(iter, "box", "c"); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); } StartPhase("wbwi_iter"); { rocksdb_iterator_t* base_iter = rocksdb_create_iterator(db, roptions); rocksdb_writebatch_wi_t* wbi = rocksdb_writebatch_wi_create(0, 1); rocksdb_writebatch_wi_put(wbi, "bar", 3, "b", 1); rocksdb_writebatch_wi_delete(wbi, "foo", 3); rocksdb_iterator_t* iter = rocksdb_writebatch_wi_create_iterator_with_base(wbi, base_iter); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "bar", "b"); rocksdb_iter_next(iter); CheckIter(iter, "box", "c"); rocksdb_iter_prev(iter); CheckIter(iter, "bar", "b"); rocksdb_iter_prev(iter); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_last(iter); CheckIter(iter, "box", "c"); rocksdb_iter_seek(iter, "b", 1); CheckIter(iter, "bar", "b"); rocksdb_iter_seek_for_prev(iter, "c", 1); CheckIter(iter, "box", "c"); rocksdb_iter_seek_for_prev(iter, "box", 3); CheckIter(iter, "box", "c"); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); rocksdb_writebatch_wi_destroy(wbi); } StartPhase("multiget"); { const char* keys[3] = { "box", "foo", "notfound" }; const size_t keys_sizes[3] = { 3, 3, 8 }; char* vals[3]; size_t vals_sizes[3]; char* errs[3]; rocksdb_multi_get(db, roptions, 3, keys, keys_sizes, vals, vals_sizes, errs); int i; for (i = 0; i < 3; i++) { CheckEqual(NULL, errs[i], 0); switch (i) { case 0: CheckEqual("c", vals[i], vals_sizes[i]); break; case 1: CheckEqual("hello", vals[i], vals_sizes[i]); break; case 2: CheckEqual(NULL, vals[i], vals_sizes[i]); break; } Free(&vals[i]); } } StartPhase("pin_get"); { CheckPinGet(db, roptions, "box", "c"); CheckPinGet(db, roptions, "foo", "hello"); CheckPinGet(db, roptions, "notfound", NULL); } StartPhase("approximate_sizes"); { int i; int n = 20000; char keybuf[100]; char valbuf[100]; uint64_t sizes[2]; const char* start[2] = { "a", "k00000000000000010000" }; size_t start_len[2] = { 1, 21 }; const char* limit[2] = { "k00000000000000010000", "z" }; size_t limit_len[2] = { 21, 1 }; rocksdb_writeoptions_set_sync(woptions, 0); for (i = 0; i < n; i++) { snprintf(keybuf, sizeof(keybuf), "k%020d", i); snprintf(valbuf, sizeof(valbuf), "v%020d", i); rocksdb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf), &err); CheckNoError(err); } rocksdb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes); CheckCondition(sizes[0] > 0); CheckCondition(sizes[1] > 0); } StartPhase("property"); { char* prop = rocksdb_property_value(db, "nosuchprop"); CheckCondition(prop == NULL); prop = rocksdb_property_value(db, "rocksdb.stats"); CheckCondition(prop != NULL); Free(&prop); } StartPhase("snapshot"); { const rocksdb_snapshot_t* snap; snap = rocksdb_create_snapshot(db); rocksdb_delete(db, woptions, "foo", 3, &err); CheckNoError(err); rocksdb_readoptions_set_snapshot(roptions, snap); CheckGet(db, roptions, "foo", "hello"); rocksdb_readoptions_set_snapshot(roptions, NULL); CheckGet(db, roptions, "foo", NULL); rocksdb_release_snapshot(db, snap); } StartPhase("repair"); { // If we do not compact here, then the lazy deletion of // files (https://reviews.facebook.net/D6123) would leave // around deleted files and the repair process will find // those files and put them back into the database. rocksdb_compact_range(db, NULL, 0, NULL, 0); rocksdb_close(db); rocksdb_options_set_create_if_missing(options, 0); rocksdb_options_set_error_if_exists(options, 0); rocksdb_options_set_wal_recovery_mode(options, 2); rocksdb_repair_db(options, dbname, &err); CheckNoError(err); db = rocksdb_open(options, dbname, &err); CheckNoError(err); CheckGet(db, roptions, "foo", NULL); CheckGet(db, roptions, "bar", NULL); CheckGet(db, roptions, "box", "c"); rocksdb_options_set_create_if_missing(options, 1); rocksdb_options_set_error_if_exists(options, 1); } StartPhase("filter"); for (run = 0; run <= 2; run++) { // First run uses custom filter // Second run uses old block-based bloom filter // Third run uses full bloom filter CheckNoError(err); rocksdb_filterpolicy_t* policy; if (run == 0) { policy = rocksdb_filterpolicy_create(NULL, FilterDestroy, FilterCreate, FilterKeyMatch, NULL, FilterName); } else if (run == 1) { policy = rocksdb_filterpolicy_create_bloom(8); } else { policy = rocksdb_filterpolicy_create_bloom_full(8); } rocksdb_block_based_options_set_filter_policy(table_options, policy); // Create new database rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); rocksdb_options_set_block_based_table_factory(options, table_options); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo", 3, "foovalue", 8, &err); CheckNoError(err); rocksdb_put(db, woptions, "bar", 3, "barvalue", 8, &err); CheckNoError(err); { // Add enough keys to get just one reasonably populated Bloom filter const int keys_to_add = 1500; int i; char keybuf[100]; for (i = 0; i < keys_to_add; i++) { snprintf(keybuf, sizeof(keybuf), "yes%020d", i); rocksdb_put(db, woptions, keybuf, strlen(keybuf), "val", 3, &err); CheckNoError(err); } } rocksdb_compact_range(db, NULL, 0, NULL, 0); fake_filter_result = 1; CheckGet(db, roptions, "foo", "foovalue"); CheckGet(db, roptions, "bar", "barvalue"); if (run == 0) { // Must not find value when custom filter returns false fake_filter_result = 0; CheckGet(db, roptions, "foo", NULL); CheckGet(db, roptions, "bar", NULL); fake_filter_result = 1; CheckGet(db, roptions, "foo", "foovalue"); CheckGet(db, roptions, "bar", "barvalue"); } { // Query some keys not added to identify Bloom filter implementation // from false positive queries, using perfcontext to detect Bloom // filter behavior rocksdb_perfcontext_t* perf = rocksdb_perfcontext_create(); rocksdb_perfcontext_reset(perf); const int keys_to_query = 10000; int i; char keybuf[100]; for (i = 0; i < keys_to_query; i++) { fake_filter_result = i % 2; snprintf(keybuf, sizeof(keybuf), "no%020d", i); CheckGet(db, roptions, keybuf, NULL); } const int hits = (int)rocksdb_perfcontext_metric(perf, rocksdb_bloom_sst_hit_count); if (run == 0) { // Due to half true, half false with fake filter result CheckCondition(hits == keys_to_query / 2); } else if (run == 1) { // Essentially a fingerprint of the block-based Bloom schema CheckCondition(hits == 241); } else { // Essentially a fingerprint of the full Bloom schema(s), // format_version < 5, which vary for three different CACHE_LINE_SIZEs CheckCondition(hits == 224 || hits == 180 || hits == 125); } CheckCondition( (keys_to_query - hits) == (int)rocksdb_perfcontext_metric(perf, rocksdb_bloom_sst_miss_count)); rocksdb_perfcontext_destroy(perf); } // Reset the policy rocksdb_block_based_options_set_filter_policy(table_options, NULL); rocksdb_options_set_block_based_table_factory(options, table_options); } StartPhase("compaction_filter"); { rocksdb_options_t* options_with_filter = rocksdb_options_create(); rocksdb_options_set_create_if_missing(options_with_filter, 1); rocksdb_compactionfilter_t* cfilter; cfilter = rocksdb_compactionfilter_create(NULL, CFilterDestroy, CFilterFilter, CFilterName); // Create new database rocksdb_close(db); rocksdb_destroy_db(options_with_filter, dbname, &err); rocksdb_options_set_compaction_filter(options_with_filter, cfilter); db = CheckCompaction(db, options_with_filter, roptions, woptions); rocksdb_options_set_compaction_filter(options_with_filter, NULL); rocksdb_compactionfilter_destroy(cfilter); rocksdb_options_destroy(options_with_filter); } StartPhase("compaction_filter_factory"); { rocksdb_options_t* options_with_filter_factory = rocksdb_options_create(); rocksdb_options_set_create_if_missing(options_with_filter_factory, 1); rocksdb_compactionfilterfactory_t* factory; factory = rocksdb_compactionfilterfactory_create( NULL, CFilterFactoryDestroy, CFilterCreate, CFilterFactoryName); // Create new database rocksdb_close(db); rocksdb_destroy_db(options_with_filter_factory, dbname, &err); rocksdb_options_set_compaction_filter_factory(options_with_filter_factory, factory); db = CheckCompaction(db, options_with_filter_factory, roptions, woptions); rocksdb_options_set_compaction_filter_factory( options_with_filter_factory, NULL); rocksdb_options_destroy(options_with_filter_factory); } StartPhase("merge_operator"); { rocksdb_mergeoperator_t* merge_operator; merge_operator = rocksdb_mergeoperator_create( NULL, MergeOperatorDestroy, MergeOperatorFullMerge, MergeOperatorPartialMerge, NULL, MergeOperatorName); // Create new database rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); rocksdb_options_set_merge_operator(options, merge_operator); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo", 3, "foovalue", 8, &err); CheckNoError(err); CheckGet(db, roptions, "foo", "foovalue"); rocksdb_merge(db, woptions, "foo", 3, "barvalue", 8, &err); CheckNoError(err); CheckGet(db, roptions, "foo", "fake"); // Merge of a non-existing value rocksdb_merge(db, woptions, "bar", 3, "barvalue", 8, &err); CheckNoError(err); CheckGet(db, roptions, "bar", "fake"); } StartPhase("columnfamilies"); { rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); CheckNoError(err); rocksdb_options_t* db_options = rocksdb_options_create(); rocksdb_options_set_create_if_missing(db_options, 1); db = rocksdb_open(db_options, dbname, &err); CheckNoError(err) rocksdb_column_family_handle_t* cfh; cfh = rocksdb_create_column_family(db, db_options, "cf1", &err); rocksdb_column_family_handle_destroy(cfh); CheckNoError(err); rocksdb_close(db); size_t cflen; char** column_fams = rocksdb_list_column_families(db_options, dbname, &cflen, &err); CheckNoError(err); CheckEqual("default", column_fams[0], 7); CheckEqual("cf1", column_fams[1], 3); CheckCondition(cflen == 2); rocksdb_list_column_families_destroy(column_fams, cflen); rocksdb_options_t* cf_options = rocksdb_options_create(); const char* cf_names[2] = {"default", "cf1"}; const rocksdb_options_t* cf_opts[2] = {cf_options, cf_options}; rocksdb_column_family_handle_t* handles[2]; db = rocksdb_open_column_families(db_options, dbname, 2, cf_names, cf_opts, handles, &err); CheckNoError(err); rocksdb_put_cf(db, woptions, handles[1], "foo", 3, "hello", 5, &err); CheckNoError(err); rocksdb_put_cf(db, woptions, handles[1], "foobar1", 7, "hello1", 6, &err); CheckNoError(err); rocksdb_put_cf(db, woptions, handles[1], "foobar2", 7, "hello2", 6, &err); CheckNoError(err); rocksdb_put_cf(db, woptions, handles[1], "foobar3", 7, "hello3", 6, &err); CheckNoError(err); rocksdb_put_cf(db, woptions, handles[1], "foobar4", 7, "hello4", 6, &err); CheckNoError(err); rocksdb_flushoptions_t *flush_options = rocksdb_flushoptions_create(); rocksdb_flushoptions_set_wait(flush_options, 1); rocksdb_flush_cf(db, flush_options, handles[1], &err); CheckNoError(err) rocksdb_flushoptions_destroy(flush_options); CheckGetCF(db, roptions, handles[1], "foo", "hello"); CheckPinGetCF(db, roptions, handles[1], "foo", "hello"); rocksdb_delete_cf(db, woptions, handles[1], "foo", 3, &err); CheckNoError(err); rocksdb_delete_range_cf(db, woptions, handles[1], "foobar2", 7, "foobar4", 7, &err); CheckNoError(err); CheckGetCF(db, roptions, handles[1], "foo", NULL); CheckPinGetCF(db, roptions, handles[1], "foo", NULL); rocksdb_writebatch_t* wb = rocksdb_writebatch_create(); rocksdb_writebatch_put_cf(wb, handles[1], "baz", 3, "a", 1); rocksdb_writebatch_clear(wb); rocksdb_writebatch_put_cf(wb, handles[1], "bar", 3, "b", 1); rocksdb_writebatch_put_cf(wb, handles[1], "box", 3, "c", 1); rocksdb_writebatch_delete_cf(wb, handles[1], "bar", 3); rocksdb_write(db, woptions, wb, &err); CheckNoError(err); CheckGetCF(db, roptions, handles[1], "baz", NULL); CheckGetCF(db, roptions, handles[1], "bar", NULL); CheckGetCF(db, roptions, handles[1], "box", "c"); CheckPinGetCF(db, roptions, handles[1], "baz", NULL); CheckPinGetCF(db, roptions, handles[1], "bar", NULL); CheckPinGetCF(db, roptions, handles[1], "box", "c"); rocksdb_writebatch_destroy(wb); const char* keys[3] = { "box", "box", "barfooxx" }; const rocksdb_column_family_handle_t* get_handles[3] = { handles[0], handles[1], handles[1] }; const size_t keys_sizes[3] = { 3, 3, 8 }; char* vals[3]; size_t vals_sizes[3]; char* errs[3]; rocksdb_multi_get_cf(db, roptions, get_handles, 3, keys, keys_sizes, vals, vals_sizes, errs); int i; for (i = 0; i < 3; i++) { CheckEqual(NULL, errs[i], 0); switch (i) { case 0: CheckEqual(NULL, vals[i], vals_sizes[i]); // wrong cf break; case 1: CheckEqual("c", vals[i], vals_sizes[i]); // bingo break; case 2: CheckEqual(NULL, vals[i], vals_sizes[i]); // normal not found break; } Free(&vals[i]); } rocksdb_iterator_t* iter = rocksdb_create_iterator_cf(db, roptions, handles[1]); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); CheckCondition(rocksdb_iter_valid(iter)); for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) { i++; } CheckCondition(i == 3); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); rocksdb_column_family_handle_t* iters_cf_handles[2] = { handles[0], handles[1] }; rocksdb_iterator_t* iters_handles[2]; rocksdb_create_iterators(db, roptions, iters_cf_handles, iters_handles, 2, &err); CheckNoError(err); iter = iters_handles[0]; CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_destroy(iter); iter = iters_handles[1]; CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); CheckCondition(rocksdb_iter_valid(iter)); for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) { i++; } CheckCondition(i == 3); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); rocksdb_drop_column_family(db, handles[1], &err); CheckNoError(err); for (i = 0; i < 2; i++) { rocksdb_column_family_handle_destroy(handles[i]); } rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); rocksdb_options_destroy(db_options); rocksdb_options_destroy(cf_options); } StartPhase("prefix"); { // Create new database rocksdb_options_set_allow_mmap_reads(options, 1); rocksdb_options_set_prefix_extractor(options, rocksdb_slicetransform_create_fixed_prefix(3)); rocksdb_options_set_hash_skip_list_rep(options, 5000, 4, 4); rocksdb_options_set_plain_table_factory(options, 4, 10, 0.75, 16); rocksdb_options_set_allow_concurrent_memtable_write(options, 0); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo1", 4, "foo", 3, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo2", 4, "foo", 3, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo3", 4, "foo", 3, &err); CheckNoError(err); rocksdb_put(db, woptions, "bar1", 4, "bar", 3, &err); CheckNoError(err); rocksdb_put(db, woptions, "bar2", 4, "bar", 3, &err); CheckNoError(err); rocksdb_put(db, woptions, "bar3", 4, "bar", 3, &err); CheckNoError(err); rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek(iter, "bar", 3); rocksdb_iter_get_error(iter, &err); CheckNoError(err); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "bar1", "bar"); rocksdb_iter_next(iter); CheckIter(iter, "bar2", "bar"); rocksdb_iter_next(iter); CheckIter(iter, "bar3", "bar"); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); rocksdb_readoptions_set_total_order_seek(roptions, 1); iter = rocksdb_create_iterator(db, roptions); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek(iter, "ba", 2); rocksdb_iter_get_error(iter, &err); CheckNoError(err); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "bar1", "bar"); rocksdb_iter_destroy(iter); rocksdb_readoptions_set_total_order_seek(roptions, 0); rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); } // Check memory usage stats StartPhase("approximate_memory_usage"); { // Create database db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_memory_consumers_t* consumers; consumers = rocksdb_memory_consumers_create(); rocksdb_memory_consumers_add_db(consumers, db); rocksdb_memory_consumers_add_cache(consumers, cache); // take memory usage report before write-read operation rocksdb_memory_usage_t* mu1; mu1 = rocksdb_approximate_memory_usage_create(consumers, &err); CheckNoError(err); // Put data (this should affect memtables) rocksdb_put(db, woptions, "memory", 6, "test", 4, &err); CheckNoError(err); CheckGet(db, roptions, "memory", "test"); // take memory usage report after write-read operation rocksdb_memory_usage_t* mu2; mu2 = rocksdb_approximate_memory_usage_create(consumers, &err); CheckNoError(err); // amount of memory used within memtables should grow CheckCondition(rocksdb_approximate_memory_usage_get_mem_table_total(mu2) >= rocksdb_approximate_memory_usage_get_mem_table_total(mu1)); CheckCondition(rocksdb_approximate_memory_usage_get_mem_table_unflushed(mu2) >= rocksdb_approximate_memory_usage_get_mem_table_unflushed(mu1)); rocksdb_memory_consumers_destroy(consumers); rocksdb_approximate_memory_usage_destroy(mu1); rocksdb_approximate_memory_usage_destroy(mu2); rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); CheckNoError(err); } StartPhase("cuckoo_options"); { rocksdb_cuckoo_table_options_t* cuckoo_options; cuckoo_options = rocksdb_cuckoo_options_create(); rocksdb_cuckoo_options_set_hash_ratio(cuckoo_options, 0.5); rocksdb_cuckoo_options_set_max_search_depth(cuckoo_options, 200); rocksdb_cuckoo_options_set_cuckoo_block_size(cuckoo_options, 10); rocksdb_cuckoo_options_set_identity_as_first_hash(cuckoo_options, 1); rocksdb_cuckoo_options_set_use_module_hash(cuckoo_options, 0); rocksdb_options_set_cuckoo_table_factory(options, cuckoo_options); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_cuckoo_options_destroy(cuckoo_options); } StartPhase("options"); { rocksdb_options_t* o; o = rocksdb_options_create(); // Set and check options. rocksdb_options_set_allow_ingest_behind(o, 1); CheckCondition(1 == rocksdb_options_get_allow_ingest_behind(o)); rocksdb_options_compaction_readahead_size(o, 10); CheckCondition(10 == rocksdb_options_get_compaction_readahead_size(o)); rocksdb_options_set_create_if_missing(o, 1); CheckCondition(1 == rocksdb_options_get_create_if_missing(o)); rocksdb_options_set_create_missing_column_families(o, 1); CheckCondition(1 == rocksdb_options_get_create_missing_column_families(o)); rocksdb_options_set_error_if_exists(o, 1); CheckCondition(1 == rocksdb_options_get_error_if_exists(o)); rocksdb_options_set_paranoid_checks(o, 1); CheckCondition(1 == rocksdb_options_get_paranoid_checks(o)); rocksdb_options_set_info_log_level(o, 3); CheckCondition(3 == rocksdb_options_get_info_log_level(o)); rocksdb_options_set_write_buffer_size(o, 100); CheckCondition(100 == rocksdb_options_get_write_buffer_size(o)); rocksdb_options_set_db_write_buffer_size(o, 1000); CheckCondition(1000 == rocksdb_options_get_db_write_buffer_size(o)); rocksdb_options_set_max_open_files(o, 21); CheckCondition(21 == rocksdb_options_get_max_open_files(o)); rocksdb_options_set_max_file_opening_threads(o, 5); CheckCondition(5 == rocksdb_options_get_max_file_opening_threads(o)); rocksdb_options_set_max_total_wal_size(o, 400); CheckCondition(400 == rocksdb_options_get_max_total_wal_size(o)); rocksdb_options_set_num_levels(o, 7); CheckCondition(7 == rocksdb_options_get_num_levels(o)); rocksdb_options_set_level0_file_num_compaction_trigger(o, 4); CheckCondition(4 == rocksdb_options_get_level0_file_num_compaction_trigger(o)); rocksdb_options_set_level0_slowdown_writes_trigger(o, 6); CheckCondition(6 == rocksdb_options_get_level0_slowdown_writes_trigger(o)); rocksdb_options_set_level0_stop_writes_trigger(o, 8); CheckCondition(8 == rocksdb_options_get_level0_stop_writes_trigger(o)); rocksdb_options_set_target_file_size_base(o, 256); CheckCondition(256 == rocksdb_options_get_target_file_size_base(o)); rocksdb_options_set_target_file_size_multiplier(o, 3); CheckCondition(3 == rocksdb_options_get_target_file_size_multiplier(o)); rocksdb_options_set_max_bytes_for_level_base(o, 1024); CheckCondition(1024 == rocksdb_options_get_max_bytes_for_level_base(o)); rocksdb_options_set_level_compaction_dynamic_level_bytes(o, 1); CheckCondition(1 == rocksdb_options_get_level_compaction_dynamic_level_bytes(o)); rocksdb_options_set_max_bytes_for_level_multiplier(o, 2.0); CheckCondition(2.0 == rocksdb_options_get_max_bytes_for_level_multiplier(o)); rocksdb_options_set_skip_stats_update_on_db_open(o, 1); CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o)); rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(o, 1); CheckCondition( 1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(o)); rocksdb_options_set_max_write_buffer_number(o, 97); CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(o)); rocksdb_options_set_min_write_buffer_number_to_merge(o, 23); CheckCondition(23 == rocksdb_options_get_min_write_buffer_number_to_merge(o)); rocksdb_options_set_max_write_buffer_number_to_maintain(o, 64); CheckCondition(64 == rocksdb_options_get_max_write_buffer_number_to_maintain(o)); rocksdb_options_set_max_write_buffer_size_to_maintain(o, 50000); CheckCondition(50000 == rocksdb_options_get_max_write_buffer_size_to_maintain(o)); rocksdb_options_set_enable_pipelined_write(o, 1); CheckCondition(1 == rocksdb_options_get_enable_pipelined_write(o)); rocksdb_options_set_unordered_write(o, 1); CheckCondition(1 == rocksdb_options_get_unordered_write(o)); rocksdb_options_set_max_subcompactions(o, 123456); CheckCondition(123456 == rocksdb_options_get_max_subcompactions(o)); // Create a copy that should be equal to the original. rocksdb_options_t* copy; copy = rocksdb_options_create_copy(o); CheckCondition(1 == rocksdb_options_get_allow_ingest_behind(copy)); CheckCondition(10 == rocksdb_options_get_compaction_readahead_size(copy)); CheckCondition(1 == rocksdb_options_get_create_if_missing(copy)); CheckCondition(1 == rocksdb_options_get_create_missing_column_families(copy)); CheckCondition(1 == rocksdb_options_get_error_if_exists(copy)); CheckCondition(1 == rocksdb_options_get_paranoid_checks(copy)); CheckCondition(3 == rocksdb_options_get_info_log_level(copy)); CheckCondition(100 == rocksdb_options_get_write_buffer_size(copy)); CheckCondition(1000 == rocksdb_options_get_db_write_buffer_size(copy)); CheckCondition(21 == rocksdb_options_get_max_open_files(copy)); CheckCondition(5 == rocksdb_options_get_max_file_opening_threads(copy)); CheckCondition(400 == rocksdb_options_get_max_total_wal_size(copy)); CheckCondition(7 == rocksdb_options_get_num_levels(copy)); CheckCondition( 4 == rocksdb_options_get_level0_file_num_compaction_trigger(copy)); CheckCondition(6 == rocksdb_options_get_level0_slowdown_writes_trigger(copy)); CheckCondition(8 == rocksdb_options_get_level0_stop_writes_trigger(copy)); CheckCondition(256 == rocksdb_options_get_target_file_size_base(copy)); CheckCondition(3 == rocksdb_options_get_target_file_size_multiplier(copy)); CheckCondition(1024 == rocksdb_options_get_max_bytes_for_level_base(copy)); CheckCondition( 1 == rocksdb_options_get_level_compaction_dynamic_level_bytes(copy)); CheckCondition(2.0 == rocksdb_options_get_max_bytes_for_level_multiplier(copy)); CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(copy)); CheckCondition( 1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(copy)); CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(copy)); CheckCondition(23 == rocksdb_options_get_min_write_buffer_number_to_merge(copy)); CheckCondition( 64 == rocksdb_options_get_max_write_buffer_number_to_maintain(copy)); CheckCondition(50000 == rocksdb_options_get_max_write_buffer_size_to_maintain(copy)); CheckCondition(1 == rocksdb_options_get_enable_pipelined_write(copy)); CheckCondition(1 == rocksdb_options_get_unordered_write(copy)); CheckCondition(123456 == rocksdb_options_get_max_subcompactions(copy)); // Copies should be independent. rocksdb_options_set_allow_ingest_behind(copy, 0); CheckCondition(0 == rocksdb_options_get_allow_ingest_behind(copy)); CheckCondition(1 == rocksdb_options_get_allow_ingest_behind(o)); rocksdb_options_compaction_readahead_size(copy, 20); CheckCondition(20 == rocksdb_options_get_compaction_readahead_size(copy)); CheckCondition(10 == rocksdb_options_get_compaction_readahead_size(o)); rocksdb_options_set_create_if_missing(copy, 0); CheckCondition(0 == rocksdb_options_get_create_if_missing(copy)); CheckCondition(1 == rocksdb_options_get_create_if_missing(o)); rocksdb_options_set_create_missing_column_families(copy, 0); CheckCondition(0 == rocksdb_options_get_create_missing_column_families(copy)); CheckCondition(1 == rocksdb_options_get_create_missing_column_families(o)); rocksdb_options_set_error_if_exists(copy, 0); CheckCondition(0 == rocksdb_options_get_error_if_exists(copy)); CheckCondition(1 == rocksdb_options_get_error_if_exists(o)); rocksdb_options_set_paranoid_checks(copy, 0); CheckCondition(0 == rocksdb_options_get_paranoid_checks(copy)); CheckCondition(1 == rocksdb_options_get_paranoid_checks(o)); rocksdb_options_set_info_log_level(copy, 2); CheckCondition(2 == rocksdb_options_get_info_log_level(copy)); CheckCondition(3 == rocksdb_options_get_info_log_level(o)); rocksdb_options_set_write_buffer_size(copy, 200); CheckCondition(200 == rocksdb_options_get_write_buffer_size(copy)); CheckCondition(100 == rocksdb_options_get_write_buffer_size(o)); rocksdb_options_set_db_write_buffer_size(copy, 2000); CheckCondition(2000 == rocksdb_options_get_db_write_buffer_size(copy)); CheckCondition(1000 == rocksdb_options_get_db_write_buffer_size(o)); rocksdb_options_set_max_open_files(copy, 42); CheckCondition(42 == rocksdb_options_get_max_open_files(copy)); CheckCondition(21 == rocksdb_options_get_max_open_files(o)); rocksdb_options_set_max_file_opening_threads(copy, 3); CheckCondition(3 == rocksdb_options_get_max_file_opening_threads(copy)); CheckCondition(5 == rocksdb_options_get_max_file_opening_threads(o)); rocksdb_options_set_max_total_wal_size(copy, 4000); CheckCondition(4000 == rocksdb_options_get_max_total_wal_size(copy)); CheckCondition(400 == rocksdb_options_get_max_total_wal_size(o)); rocksdb_options_set_num_levels(copy, 6); CheckCondition(6 == rocksdb_options_get_num_levels(copy)); CheckCondition(7 == rocksdb_options_get_num_levels(o)); rocksdb_options_set_level0_file_num_compaction_trigger(copy, 14); CheckCondition( 14 == rocksdb_options_get_level0_file_num_compaction_trigger(copy)); CheckCondition(4 == rocksdb_options_get_level0_file_num_compaction_trigger(o)); rocksdb_options_set_level0_slowdown_writes_trigger(copy, 61); CheckCondition(61 == rocksdb_options_get_level0_slowdown_writes_trigger(copy)); CheckCondition(6 == rocksdb_options_get_level0_slowdown_writes_trigger(o)); rocksdb_options_set_level0_stop_writes_trigger(copy, 17); CheckCondition(17 == rocksdb_options_get_level0_stop_writes_trigger(copy)); CheckCondition(8 == rocksdb_options_get_level0_stop_writes_trigger(o)); rocksdb_options_set_target_file_size_base(copy, 128); CheckCondition(128 == rocksdb_options_get_target_file_size_base(copy)); CheckCondition(256 == rocksdb_options_get_target_file_size_base(o)); rocksdb_options_set_target_file_size_multiplier(copy, 13); CheckCondition(13 == rocksdb_options_get_target_file_size_multiplier(copy)); CheckCondition(3 == rocksdb_options_get_target_file_size_multiplier(o)); rocksdb_options_set_max_bytes_for_level_base(copy, 900); CheckCondition(900 == rocksdb_options_get_max_bytes_for_level_base(copy)); CheckCondition(1024 == rocksdb_options_get_max_bytes_for_level_base(o)); rocksdb_options_set_level_compaction_dynamic_level_bytes(copy, 0); CheckCondition( 0 == rocksdb_options_get_level_compaction_dynamic_level_bytes(copy)); CheckCondition(1 == rocksdb_options_get_level_compaction_dynamic_level_bytes(o)); rocksdb_options_set_max_bytes_for_level_multiplier(copy, 8.0); CheckCondition(8.0 == rocksdb_options_get_max_bytes_for_level_multiplier(copy)); CheckCondition(2.0 == rocksdb_options_get_max_bytes_for_level_multiplier(o)); rocksdb_options_set_skip_stats_update_on_db_open(copy, 0); CheckCondition(0 == rocksdb_options_get_skip_stats_update_on_db_open(copy)); CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o)); rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(copy, 0); CheckCondition( 0 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(copy)); CheckCondition( 1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(o)); rocksdb_options_set_max_write_buffer_number(copy, 2000); CheckCondition(2000 == rocksdb_options_get_max_write_buffer_number(copy)); CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(o)); rocksdb_options_set_min_write_buffer_number_to_merge(copy, 146); CheckCondition(146 == rocksdb_options_get_min_write_buffer_number_to_merge(copy)); CheckCondition(23 == rocksdb_options_get_min_write_buffer_number_to_merge(o)); rocksdb_options_set_max_write_buffer_number_to_maintain(copy, 128); CheckCondition( 128 == rocksdb_options_get_max_write_buffer_number_to_maintain(copy)); CheckCondition(64 == rocksdb_options_get_max_write_buffer_number_to_maintain(o)); rocksdb_options_set_max_write_buffer_size_to_maintain(copy, 9000); CheckCondition(9000 == rocksdb_options_get_max_write_buffer_size_to_maintain(copy)); CheckCondition(50000 == rocksdb_options_get_max_write_buffer_size_to_maintain(o)); rocksdb_options_set_enable_pipelined_write(copy, 0); CheckCondition(0 == rocksdb_options_get_enable_pipelined_write(copy)); CheckCondition(1 == rocksdb_options_get_enable_pipelined_write(o)); rocksdb_options_set_unordered_write(copy, 0); CheckCondition(0 == rocksdb_options_get_unordered_write(copy)); CheckCondition(1 == rocksdb_options_get_unordered_write(o)); rocksdb_options_set_max_subcompactions(copy, 90001); CheckCondition(90001 == rocksdb_options_get_max_subcompactions(copy)); CheckCondition(123456 == rocksdb_options_get_max_subcompactions(o)); rocksdb_options_destroy(copy); rocksdb_options_destroy(o); } StartPhase("iterate_upper_bound"); { // Create new empty database rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); CheckNoError(err); rocksdb_options_set_prefix_extractor(options, NULL); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_put(db, woptions, "a", 1, "0", 1, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo", 3, "bar", 3, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo1", 4, "bar1", 4, &err); CheckNoError(err); rocksdb_put(db, woptions, "g1", 2, "0", 1, &err); CheckNoError(err); // testing basic case with no iterate_upper_bound and no prefix_extractor { rocksdb_readoptions_set_iterate_upper_bound(roptions, NULL, 0); rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions); rocksdb_iter_seek(iter, "foo", 3); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "foo", "bar"); rocksdb_iter_next(iter); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "foo1", "bar1"); rocksdb_iter_next(iter); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "g1", "0"); rocksdb_iter_destroy(iter); } // testing iterate_upper_bound and forward iterator // to make sure it stops at bound { // iterate_upper_bound points beyond the last expected entry rocksdb_readoptions_set_iterate_upper_bound(roptions, "foo2", 4); rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions); rocksdb_iter_seek(iter, "foo", 3); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "foo", "bar"); rocksdb_iter_next(iter); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "foo1", "bar1"); rocksdb_iter_next(iter); // should stop here... CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_destroy(iter); rocksdb_readoptions_set_iterate_upper_bound(roptions, NULL, 0); } } StartPhase("transactions"); { rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); CheckNoError(err); // open a TransactionDB txn_db_options = rocksdb_transactiondb_options_create(); txn_options = rocksdb_transaction_options_create(); rocksdb_options_set_create_if_missing(options, 1); txn_db = rocksdb_transactiondb_open(options, txn_db_options, dbname, &err); CheckNoError(err); // put outside a transaction rocksdb_transactiondb_put(txn_db, woptions, "foo", 3, "hello", 5, &err); CheckNoError(err); CheckTxnDBGet(txn_db, roptions, "foo", "hello"); // delete from outside transaction rocksdb_transactiondb_delete(txn_db, woptions, "foo", 3, &err); CheckNoError(err); CheckTxnDBGet(txn_db, roptions, "foo", NULL); // write batch into TransactionDB rocksdb_writebatch_t* wb = rocksdb_writebatch_create(); rocksdb_writebatch_put(wb, "foo", 3, "a", 1); rocksdb_writebatch_clear(wb); rocksdb_writebatch_put(wb, "bar", 3, "b", 1); rocksdb_writebatch_put(wb, "box", 3, "c", 1); rocksdb_writebatch_delete(wb, "bar", 3); rocksdb_transactiondb_write(txn_db, woptions, wb, &err); rocksdb_writebatch_destroy(wb); CheckTxnDBGet(txn_db, roptions, "box", "c"); CheckNoError(err); // begin a transaction txn = rocksdb_transaction_begin(txn_db, woptions, txn_options, NULL); // put rocksdb_transaction_put(txn, "foo", 3, "hello", 5, &err); CheckNoError(err); CheckTxnGet(txn, roptions, "foo", "hello"); // delete rocksdb_transaction_delete(txn, "foo", 3, &err); CheckNoError(err); CheckTxnGet(txn, roptions, "foo", NULL); rocksdb_transaction_put(txn, "foo", 3, "hello", 5, &err); CheckNoError(err); // read from outside transaction, before commit CheckTxnDBGet(txn_db, roptions, "foo", NULL); // commit rocksdb_transaction_commit(txn, &err); CheckNoError(err); // read from outside transaction, after commit CheckTxnDBGet(txn_db, roptions, "foo", "hello"); // reuse old transaction txn = rocksdb_transaction_begin(txn_db, woptions, txn_options, txn); // snapshot const rocksdb_snapshot_t* snapshot; snapshot = rocksdb_transactiondb_create_snapshot(txn_db); rocksdb_readoptions_set_snapshot(roptions, snapshot); rocksdb_transactiondb_put(txn_db, woptions, "foo", 3, "hey", 3, &err); CheckNoError(err); CheckTxnDBGet(txn_db, roptions, "foo", "hello"); rocksdb_readoptions_set_snapshot(roptions, NULL); rocksdb_transactiondb_release_snapshot(txn_db, snapshot); CheckTxnDBGet(txn_db, roptions, "foo", "hey"); // iterate rocksdb_transaction_put(txn, "bar", 3, "hi", 2, &err); rocksdb_iterator_t* iter = rocksdb_transaction_create_iterator(txn, roptions); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "bar", "hi"); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); // rollback rocksdb_transaction_rollback(txn, &err); CheckNoError(err); CheckTxnDBGet(txn_db, roptions, "bar", NULL); // save point rocksdb_transaction_put(txn, "foo1", 4, "hi1", 3, &err); rocksdb_transaction_set_savepoint(txn); CheckTxnGet(txn, roptions, "foo1", "hi1"); rocksdb_transaction_put(txn, "foo2", 4, "hi2", 3, &err); CheckTxnGet(txn, roptions, "foo2", "hi2"); // rollback to savepoint rocksdb_transaction_rollback_to_savepoint(txn, &err); CheckNoError(err); CheckTxnGet(txn, roptions, "foo2", NULL); CheckTxnGet(txn, roptions, "foo1", "hi1"); CheckTxnDBGet(txn_db, roptions, "foo1", NULL); CheckTxnDBGet(txn_db, roptions, "foo2", NULL); rocksdb_transaction_commit(txn, &err); CheckNoError(err); CheckTxnDBGet(txn_db, roptions, "foo1", "hi1"); CheckTxnDBGet(txn_db, roptions, "foo2", NULL); // Column families. rocksdb_column_family_handle_t* cfh; cfh = rocksdb_transactiondb_create_column_family(txn_db, options, "txn_db_cf", &err); CheckNoError(err); rocksdb_transactiondb_put_cf(txn_db, woptions, cfh, "cf_foo", 6, "cf_hello", 8, &err); CheckNoError(err); CheckTxnDBGetCF(txn_db, roptions, cfh, "cf_foo", "cf_hello"); rocksdb_transactiondb_delete_cf(txn_db, woptions, cfh, "cf_foo", 6, &err); CheckNoError(err); CheckTxnDBGetCF(txn_db, roptions, cfh, "cf_foo", NULL); rocksdb_column_family_handle_destroy(cfh); // close and destroy rocksdb_transaction_destroy(txn); rocksdb_transactiondb_close(txn_db); rocksdb_destroy_db(options, dbname, &err); CheckNoError(err); rocksdb_transaction_options_destroy(txn_options); rocksdb_transactiondb_options_destroy(txn_db_options); } StartPhase("optimistic_transactions"); { rocksdb_options_t* db_options = rocksdb_options_create(); rocksdb_options_set_create_if_missing(db_options, 1); rocksdb_options_set_allow_concurrent_memtable_write(db_options, 1); otxn_db = rocksdb_optimistictransactiondb_open(db_options, dbname, &err); otxn_options = rocksdb_optimistictransaction_options_create(); rocksdb_transaction_t* txn1 = rocksdb_optimistictransaction_begin( otxn_db, woptions, otxn_options, NULL); rocksdb_transaction_t* txn2 = rocksdb_optimistictransaction_begin( otxn_db, woptions, otxn_options, NULL); rocksdb_transaction_put(txn1, "key", 3, "value", 5, &err); CheckNoError(err); rocksdb_transaction_put(txn2, "key1", 4, "value1", 6, &err); CheckNoError(err); CheckTxnGet(txn1, roptions, "key", "value"); rocksdb_transaction_commit(txn1, &err); CheckNoError(err); rocksdb_transaction_commit(txn2, &err); CheckNoError(err); rocksdb_transaction_destroy(txn1); rocksdb_transaction_destroy(txn2); // Check column family db = rocksdb_optimistictransactiondb_get_base_db(otxn_db); rocksdb_put(db, woptions, "key", 3, "value", 5, &err); CheckNoError(err); rocksdb_column_family_handle_t *cfh1, *cfh2; cfh1 = rocksdb_create_column_family(db, db_options, "txn_db_cf1", &err); cfh2 = rocksdb_create_column_family(db, db_options, "txn_db_cf2", &err); txn = rocksdb_optimistictransaction_begin(otxn_db, woptions, otxn_options, NULL); rocksdb_transaction_put_cf(txn, cfh1, "key_cf1", 7, "val_cf1", 7, &err); CheckNoError(err); rocksdb_transaction_put_cf(txn, cfh2, "key_cf2", 7, "val_cf2", 7, &err); CheckNoError(err); rocksdb_transaction_commit(txn, &err); CheckNoError(err); txn = rocksdb_optimistictransaction_begin(otxn_db, woptions, otxn_options, txn); CheckGetCF(db, roptions, cfh1, "key_cf1", "val_cf1"); CheckTxnGetCF(txn, roptions, cfh1, "key_cf1", "val_cf1"); // Check iterator with column family rocksdb_transaction_put_cf(txn, cfh1, "key1_cf", 7, "val1_cf", 7, &err); CheckNoError(err); rocksdb_iterator_t* iter = rocksdb_transaction_create_iterator_cf(txn, roptions, cfh1); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); CheckCondition(rocksdb_iter_valid(iter)); CheckIter(iter, "key1_cf", "val1_cf"); rocksdb_iter_get_error(iter, &err); CheckNoError(err); rocksdb_iter_destroy(iter); rocksdb_transaction_destroy(txn); rocksdb_column_family_handle_destroy(cfh1); rocksdb_column_family_handle_destroy(cfh2); rocksdb_optimistictransactiondb_close_base_db(db); rocksdb_optimistictransactiondb_close(otxn_db); // Check open optimistic transaction db with column families size_t cf_len; char** column_fams = rocksdb_list_column_families(db_options, dbname, &cf_len, &err); CheckNoError(err); CheckEqual("default", column_fams[0], 7); CheckEqual("txn_db_cf1", column_fams[1], 10); CheckEqual("txn_db_cf2", column_fams[2], 10); CheckCondition(cf_len == 3); rocksdb_list_column_families_destroy(column_fams, cf_len); const char* cf_names[3] = {"default", "txn_db_cf1", "txn_db_cf2"}; rocksdb_options_t* cf_options = rocksdb_options_create(); const rocksdb_options_t* cf_opts[3] = {cf_options, cf_options, cf_options}; rocksdb_options_set_error_if_exists(cf_options, 0); rocksdb_column_family_handle_t* cf_handles[3]; otxn_db = rocksdb_optimistictransactiondb_open_column_families( db_options, dbname, 3, cf_names, cf_opts, cf_handles, &err); CheckNoError(err); rocksdb_transaction_t* txn_cf = rocksdb_optimistictransaction_begin( otxn_db, woptions, otxn_options, NULL); CheckTxnGetCF(txn_cf, roptions, cf_handles[0], "key", "value"); CheckTxnGetCF(txn_cf, roptions, cf_handles[1], "key_cf1", "val_cf1"); CheckTxnGetCF(txn_cf, roptions, cf_handles[2], "key_cf2", "val_cf2"); rocksdb_transaction_destroy(txn_cf); rocksdb_options_destroy(cf_options); rocksdb_column_family_handle_destroy(cf_handles[0]); rocksdb_column_family_handle_destroy(cf_handles[1]); rocksdb_column_family_handle_destroy(cf_handles[2]); rocksdb_optimistictransactiondb_close(otxn_db); rocksdb_destroy_db(db_options, dbname, &err); rocksdb_options_destroy(db_options); rocksdb_optimistictransaction_options_destroy(otxn_options); CheckNoError(err); } // Simple sanity check that setting memtable rep works. StartPhase("memtable_reps"); { // Create database with vector memtable. rocksdb_options_set_memtable_vector_rep(options); db = rocksdb_open(options, dbname, &err); CheckNoError(err); // Create database with hash skiplist memtable. rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); CheckNoError(err); rocksdb_options_set_hash_skip_list_rep(options, 5000, 4, 4); db = rocksdb_open(options, dbname, &err); CheckNoError(err); } // Check that secondary instance works. StartPhase("open_as_secondary"); { rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); rocksdb_options_t* db_options = rocksdb_options_create(); rocksdb_options_set_create_if_missing(db_options, 1); db = rocksdb_open(db_options, dbname, &err); CheckNoError(err); rocksdb_t* db1; rocksdb_options_t* opts = rocksdb_options_create(); rocksdb_options_set_max_open_files(opts, -1); rocksdb_options_set_create_if_missing(opts, 1); snprintf(secondary_path, sizeof(secondary_path), "%s/rocksdb_c_test_secondary-%d", GetTempDir(), ((int)geteuid())); db1 = rocksdb_open_as_secondary(opts, dbname, secondary_path, &err); CheckNoError(err); rocksdb_writeoptions_set_sync(woptions, 0); rocksdb_writeoptions_disable_WAL(woptions, 1); rocksdb_put(db, woptions, "key0", 4, "value0", 6, &err); CheckNoError(err); rocksdb_flushoptions_t* flush_opts = rocksdb_flushoptions_create(); rocksdb_flushoptions_set_wait(flush_opts, 1); rocksdb_flush(db, flush_opts, &err); CheckNoError(err); rocksdb_try_catch_up_with_primary(db1, &err); CheckNoError(err); rocksdb_readoptions_t* ropts = rocksdb_readoptions_create(); rocksdb_readoptions_set_verify_checksums(ropts, 1); rocksdb_readoptions_set_snapshot(ropts, NULL); CheckGet(db, ropts, "key0", "value0"); CheckGet(db1, ropts, "key0", "value0"); rocksdb_writeoptions_disable_WAL(woptions, 0); rocksdb_put(db, woptions, "key1", 4, "value1", 6, &err); CheckNoError(err); rocksdb_try_catch_up_with_primary(db1, &err); CheckNoError(err); CheckGet(db1, ropts, "key0", "value0"); CheckGet(db1, ropts, "key1", "value1"); rocksdb_close(db1); rocksdb_destroy_db(opts, secondary_path, &err); CheckNoError(err); rocksdb_options_destroy(db_options); rocksdb_options_destroy(opts); rocksdb_readoptions_destroy(ropts); rocksdb_flushoptions_destroy(flush_opts); } // Simple sanity check that options setting db_paths work. StartPhase("open_db_paths"); { rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); const rocksdb_dbpath_t* paths[1] = {dbpath}; rocksdb_options_set_db_paths(options, paths, 1); db = rocksdb_open(options, dbname, &err); CheckNoError(err); } StartPhase("cancel_all_background_work"); rocksdb_cancel_all_background_work(db, 1); StartPhase("cleanup"); rocksdb_close(db); rocksdb_options_destroy(options); rocksdb_block_based_options_destroy(table_options); rocksdb_readoptions_destroy(roptions); rocksdb_writeoptions_destroy(woptions); rocksdb_compactoptions_destroy(coptions); rocksdb_cache_destroy(cache); rocksdb_comparator_destroy(cmp); rocksdb_dbpath_destroy(dbpath); rocksdb_env_destroy(env); fprintf(stderr, "PASS\n"); return 0; } #else int main() { fprintf(stderr, "SKIPPED\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/column_family.cc000066400000000000000000001655151370372246700170140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/column_family.h" #include #include #include #include #include #include "db/compaction/compaction_picker.h" #include "db/compaction/compaction_picker_fifo.h" #include "db/compaction/compaction_picker_level.h" #include "db/compaction/compaction_picker_universal.h" #include "db/db_impl/db_impl.h" #include "db/internal_stats.h" #include "db/job_context.h" #include "db/range_del_aggregator.h" #include "db/table_properties_collector.h" #include "db/version_set.h" #include "db/write_controller.h" #include "file/sst_file_manager_impl.h" #include "memtable/hash_skiplist_rep.h" #include "monitoring/thread_status_util.h" #include "options/options_helper.h" #include "port/port.h" #include "table/block_based/block_based_table_factory.h" #include "table/merging_iterator.h" #include "util/autovector.h" #include "util/compression.h" namespace ROCKSDB_NAMESPACE { ColumnFamilyHandleImpl::ColumnFamilyHandleImpl( ColumnFamilyData* column_family_data, DBImpl* db, InstrumentedMutex* mutex) : cfd_(column_family_data), db_(db), mutex_(mutex) { if (cfd_ != nullptr) { cfd_->Ref(); } } ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() { if (cfd_ != nullptr) { #ifndef ROCKSDB_LITE for (auto& listener : cfd_->ioptions()->listeners) { listener->OnColumnFamilyHandleDeletionStarted(this); } #endif // ROCKSDB_LITE // Job id == 0 means that this is not our background process, but rather // user thread // Need to hold some shared pointers owned by the initial_cf_options // before final cleaning up finishes. ColumnFamilyOptions initial_cf_options_copy = cfd_->initial_cf_options(); JobContext job_context(0); mutex_->Lock(); bool dropped = cfd_->IsDropped(); if (cfd_->UnrefAndTryDelete()) { if (dropped) { db_->FindObsoleteFiles(&job_context, false, true); } } mutex_->Unlock(); if (job_context.HaveSomethingToDelete()) { bool defer_purge = db_->immutable_db_options().avoid_unnecessary_blocking_io; db_->PurgeObsoleteFiles(job_context, defer_purge); if (defer_purge) { mutex_->Lock(); db_->SchedulePurge(); mutex_->Unlock(); } } job_context.Clean(); } } uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd()->GetID(); } const std::string& ColumnFamilyHandleImpl::GetName() const { return cfd()->GetName(); } Status ColumnFamilyHandleImpl::GetDescriptor(ColumnFamilyDescriptor* desc) { #ifndef ROCKSDB_LITE // accessing mutable cf-options requires db mutex. InstrumentedMutexLock l(mutex_); *desc = ColumnFamilyDescriptor(cfd()->GetName(), cfd()->GetLatestCFOptions()); return Status::OK(); #else (void)desc; return Status::NotSupported(); #endif // !ROCKSDB_LITE } const Comparator* ColumnFamilyHandleImpl::GetComparator() const { return cfd()->user_comparator(); } void GetIntTblPropCollectorFactory( const ImmutableCFOptions& ioptions, std::vector>* int_tbl_prop_collector_factories) { auto& collector_factories = ioptions.table_properties_collector_factories; for (size_t i = 0; i < ioptions.table_properties_collector_factories.size(); ++i) { assert(collector_factories[i]); int_tbl_prop_collector_factories->emplace_back( new UserKeyTablePropertiesCollectorFactory(collector_factories[i])); } } Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options) { if (!cf_options.compression_per_level.empty()) { for (size_t level = 0; level < cf_options.compression_per_level.size(); ++level) { if (!CompressionTypeSupported(cf_options.compression_per_level[level])) { return Status::InvalidArgument( "Compression type " + CompressionTypeToString(cf_options.compression_per_level[level]) + " is not linked with the binary."); } } } else { if (!CompressionTypeSupported(cf_options.compression)) { return Status::InvalidArgument( "Compression type " + CompressionTypeToString(cf_options.compression) + " is not linked with the binary."); } } if (cf_options.compression_opts.zstd_max_train_bytes > 0) { if (!ZSTD_TrainDictionarySupported()) { return Status::InvalidArgument( "zstd dictionary trainer cannot be used because ZSTD 1.1.3+ " "is not linked with the binary."); } if (cf_options.compression_opts.max_dict_bytes == 0) { return Status::InvalidArgument( "The dictionary size limit (`CompressionOptions::max_dict_bytes`) " "should be nonzero if we're using zstd's dictionary generator."); } } return Status::OK(); } Status CheckConcurrentWritesSupported(const ColumnFamilyOptions& cf_options) { if (cf_options.inplace_update_support) { return Status::InvalidArgument( "In-place memtable updates (inplace_update_support) is not compatible " "with concurrent writes (allow_concurrent_memtable_write)"); } if (!cf_options.memtable_factory->IsInsertConcurrentlySupported()) { return Status::InvalidArgument( "Memtable doesn't concurrent writes (allow_concurrent_memtable_write)"); } return Status::OK(); } Status CheckCFPathsSupported(const DBOptions& db_options, const ColumnFamilyOptions& cf_options) { // More than one cf_paths are supported only in universal // and level compaction styles. This function also checks the case // in which cf_paths is not specified, which results in db_paths // being used. if ((cf_options.compaction_style != kCompactionStyleUniversal) && (cf_options.compaction_style != kCompactionStyleLevel)) { if (cf_options.cf_paths.size() > 1) { return Status::NotSupported( "More than one CF paths are only supported in " "universal and level compaction styles. "); } else if (cf_options.cf_paths.empty() && db_options.db_paths.size() > 1) { return Status::NotSupported( "More than one DB paths are only supported in " "universal and level compaction styles. "); } } return Status::OK(); } namespace { const uint64_t kDefaultTtl = 0xfffffffffffffffe; const uint64_t kDefaultPeriodicCompSecs = 0xfffffffffffffffe; }; // namespace ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, const ColumnFamilyOptions& src) { ColumnFamilyOptions result = src; size_t clamp_max = std::conditional< sizeof(size_t) == 4, std::integral_constant, std::integral_constant>::type::value; ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max); // if user sets arena_block_size, we trust user to use this value. Otherwise, // calculate a proper value from writer_buffer_size; if (result.arena_block_size <= 0) { result.arena_block_size = result.write_buffer_size / 8; // Align up to 4k const size_t align = 4 * 1024; result.arena_block_size = ((result.arena_block_size + align - 1) / align) * align; } result.min_write_buffer_number_to_merge = std::min(result.min_write_buffer_number_to_merge, result.max_write_buffer_number - 1); if (result.min_write_buffer_number_to_merge < 1) { result.min_write_buffer_number_to_merge = 1; } if (result.num_levels < 1) { result.num_levels = 1; } if (result.compaction_style == kCompactionStyleLevel && result.num_levels < 2) { result.num_levels = 2; } if (result.compaction_style == kCompactionStyleUniversal && db_options.allow_ingest_behind && result.num_levels < 3) { result.num_levels = 3; } if (result.max_write_buffer_number < 2) { result.max_write_buffer_number = 2; } // fall back max_write_buffer_number_to_maintain if // max_write_buffer_size_to_maintain is not set if (result.max_write_buffer_size_to_maintain < 0) { result.max_write_buffer_size_to_maintain = result.max_write_buffer_number * static_cast(result.write_buffer_size); } else if (result.max_write_buffer_size_to_maintain == 0 && result.max_write_buffer_number_to_maintain < 0) { result.max_write_buffer_number_to_maintain = result.max_write_buffer_number; } // bloom filter size shouldn't exceed 1/4 of memtable size. if (result.memtable_prefix_bloom_size_ratio > 0.25) { result.memtable_prefix_bloom_size_ratio = 0.25; } else if (result.memtable_prefix_bloom_size_ratio < 0) { result.memtable_prefix_bloom_size_ratio = 0; } if (!result.prefix_extractor) { assert(result.memtable_factory); Slice name = result.memtable_factory->Name(); if (name.compare("HashSkipListRepFactory") == 0 || name.compare("HashLinkListRepFactory") == 0) { result.memtable_factory = std::make_shared(); } } if (result.compaction_style == kCompactionStyleFIFO) { result.num_levels = 1; // since we delete level0 files in FIFO compaction when there are too many // of them, these options don't really mean anything result.level0_slowdown_writes_trigger = std::numeric_limits::max(); result.level0_stop_writes_trigger = std::numeric_limits::max(); } if (result.max_bytes_for_level_multiplier <= 0) { result.max_bytes_for_level_multiplier = 1; } if (result.level0_file_num_compaction_trigger == 0) { ROCKS_LOG_WARN(db_options.info_log.get(), "level0_file_num_compaction_trigger cannot be 0"); result.level0_file_num_compaction_trigger = 1; } if (result.level0_stop_writes_trigger < result.level0_slowdown_writes_trigger || result.level0_slowdown_writes_trigger < result.level0_file_num_compaction_trigger) { ROCKS_LOG_WARN(db_options.info_log.get(), "This condition must be satisfied: " "level0_stop_writes_trigger(%d) >= " "level0_slowdown_writes_trigger(%d) >= " "level0_file_num_compaction_trigger(%d)", result.level0_stop_writes_trigger, result.level0_slowdown_writes_trigger, result.level0_file_num_compaction_trigger); if (result.level0_slowdown_writes_trigger < result.level0_file_num_compaction_trigger) { result.level0_slowdown_writes_trigger = result.level0_file_num_compaction_trigger; } if (result.level0_stop_writes_trigger < result.level0_slowdown_writes_trigger) { result.level0_stop_writes_trigger = result.level0_slowdown_writes_trigger; } ROCKS_LOG_WARN(db_options.info_log.get(), "Adjust the value to " "level0_stop_writes_trigger(%d)" "level0_slowdown_writes_trigger(%d)" "level0_file_num_compaction_trigger(%d)", result.level0_stop_writes_trigger, result.level0_slowdown_writes_trigger, result.level0_file_num_compaction_trigger); } if (result.soft_pending_compaction_bytes_limit == 0) { result.soft_pending_compaction_bytes_limit = result.hard_pending_compaction_bytes_limit; } else if (result.hard_pending_compaction_bytes_limit > 0 && result.soft_pending_compaction_bytes_limit > result.hard_pending_compaction_bytes_limit) { result.soft_pending_compaction_bytes_limit = result.hard_pending_compaction_bytes_limit; } #ifndef ROCKSDB_LITE // When the DB is stopped, it's possible that there are some .trash files that // were not deleted yet, when we open the DB we will find these .trash files // and schedule them to be deleted (or delete immediately if SstFileManager // was not used) auto sfm = static_cast(db_options.sst_file_manager.get()); for (size_t i = 0; i < result.cf_paths.size(); i++) { DeleteScheduler::CleanupDirectory(db_options.env, sfm, result.cf_paths[i].path); } #endif if (result.cf_paths.empty()) { result.cf_paths = db_options.db_paths; } if (result.level_compaction_dynamic_level_bytes) { if (result.compaction_style != kCompactionStyleLevel || result.cf_paths.size() > 1U) { // 1. level_compaction_dynamic_level_bytes only makes sense for // level-based compaction. // 2. we don't yet know how to make both of this feature and multiple // DB path work. result.level_compaction_dynamic_level_bytes = false; } } if (result.max_compaction_bytes == 0) { result.max_compaction_bytes = result.target_file_size_base * 25; } bool is_block_based_table = (result.table_factory->Name() == BlockBasedTableFactory().Name()); const uint64_t kAdjustedTtl = 30 * 24 * 60 * 60; if (result.ttl == kDefaultTtl) { if (is_block_based_table && result.compaction_style != kCompactionStyleFIFO) { result.ttl = kAdjustedTtl; } else { result.ttl = 0; } } const uint64_t kAdjustedPeriodicCompSecs = 30 * 24 * 60 * 60; // Turn on periodic compactions and set them to occur once every 30 days if // compaction filters are used and periodic_compaction_seconds is set to the // default value. if (result.compaction_style != kCompactionStyleFIFO) { if ((result.compaction_filter != nullptr || result.compaction_filter_factory != nullptr) && result.periodic_compaction_seconds == kDefaultPeriodicCompSecs && is_block_based_table) { result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs; } } else { // result.compaction_style == kCompactionStyleFIFO if (result.ttl == 0) { if (is_block_based_table) { if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) { result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs; } result.ttl = result.periodic_compaction_seconds; } } else if (result.periodic_compaction_seconds != 0) { result.ttl = std::min(result.ttl, result.periodic_compaction_seconds); } } // TTL compactions would work similar to Periodic Compactions in Universal in // most of the cases. So, if ttl is set, execute the periodic compaction // codepath. if (result.compaction_style == kCompactionStyleUniversal && result.ttl != 0) { if (result.periodic_compaction_seconds != 0) { result.periodic_compaction_seconds = std::min(result.ttl, result.periodic_compaction_seconds); } else { result.periodic_compaction_seconds = result.ttl; } } if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs) { result.periodic_compaction_seconds = 0; } return result; } int SuperVersion::dummy = 0; void* const SuperVersion::kSVInUse = &SuperVersion::dummy; void* const SuperVersion::kSVObsolete = nullptr; SuperVersion::~SuperVersion() { for (auto td : to_delete) { delete td; } } SuperVersion* SuperVersion::Ref() { refs.fetch_add(1, std::memory_order_relaxed); return this; } bool SuperVersion::Unref() { // fetch_sub returns the previous value of ref uint32_t previous_refs = refs.fetch_sub(1); assert(previous_refs > 0); return previous_refs == 1; } void SuperVersion::Cleanup() { assert(refs.load(std::memory_order_relaxed) == 0); imm->Unref(&to_delete); MemTable* m = mem->Unref(); if (m != nullptr) { auto* memory_usage = current->cfd()->imm()->current_memory_usage(); assert(*memory_usage >= m->ApproximateMemoryUsage()); *memory_usage -= m->ApproximateMemoryUsage(); to_delete.push_back(m); } current->Unref(); if (cfd->Unref()) { delete cfd; } } void SuperVersion::Init(ColumnFamilyData* new_cfd, MemTable* new_mem, MemTableListVersion* new_imm, Version* new_current) { cfd = new_cfd; mem = new_mem; imm = new_imm; current = new_current; cfd->Ref(); mem->Ref(); imm->Ref(); current->Ref(); refs.store(1, std::memory_order_relaxed); } namespace { void SuperVersionUnrefHandle(void* ptr) { // UnrefHandle is called when a thread exists or a ThreadLocalPtr gets // destroyed. When former happens, the thread shouldn't see kSVInUse. // When latter happens, we are in ~ColumnFamilyData(), no get should happen as // well. SuperVersion* sv = static_cast(ptr); bool was_last_ref __attribute__((__unused__)); was_last_ref = sv->Unref(); // Thread-local SuperVersions can't outlive ColumnFamilyData::super_version_. // This is important because we can't do SuperVersion cleanup here. // That would require locking DB mutex, which would deadlock because // SuperVersionUnrefHandle is called with locked ThreadLocalPtr mutex. assert(!was_last_ref); } } // anonymous namespace std::vector ColumnFamilyData::GetDbPaths() const { std::vector paths; paths.reserve(ioptions_.cf_paths.size()); for (const DbPath& db_path : ioptions_.cf_paths) { paths.emplace_back(db_path.path); } return paths; } const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId = port::kMaxUint32; ColumnFamilyData::ColumnFamilyData( uint32_t id, const std::string& name, Version* _dummy_versions, Cache* _table_cache, WriteBufferManager* write_buffer_manager, const ColumnFamilyOptions& cf_options, const ImmutableDBOptions& db_options, const FileOptions& file_options, ColumnFamilySet* column_family_set, BlockCacheTracer* const block_cache_tracer) : id_(id), name_(name), dummy_versions_(_dummy_versions), current_(nullptr), refs_(0), initialized_(false), dropped_(false), internal_comparator_(cf_options.comparator), initial_cf_options_(SanitizeOptions(db_options, cf_options)), ioptions_(db_options, initial_cf_options_), mutable_cf_options_(initial_cf_options_), is_delete_range_supported_( cf_options.table_factory->IsDeleteRangeSupported()), write_buffer_manager_(write_buffer_manager), mem_(nullptr), imm_(ioptions_.min_write_buffer_number_to_merge, ioptions_.max_write_buffer_number_to_maintain, ioptions_.max_write_buffer_size_to_maintain), super_version_(nullptr), super_version_number_(0), local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)), next_(nullptr), prev_(nullptr), log_number_(0), flush_reason_(FlushReason::kOthers), column_family_set_(column_family_set), queued_for_flush_(false), queued_for_compaction_(false), prev_compaction_needed_bytes_(0), allow_2pc_(db_options.allow_2pc), last_memtable_id_(0), db_paths_registered_(false) { if (id_ != kDummyColumnFamilyDataId) { // TODO(cc): RegisterDbPaths can be expensive, considering moving it // outside of this constructor which might be called with db mutex held. // TODO(cc): considering using ioptions_.fs, currently some tests rely on // EnvWrapper, that's the main reason why we use env here. Status s = ioptions_.env->RegisterDbPaths(GetDbPaths()); if (s.ok()) { db_paths_registered_ = true; } else { ROCKS_LOG_ERROR( ioptions_.info_log, "Failed to register data paths of column family (id: %d, name: %s)", id_, name_.c_str()); } } Ref(); // Convert user defined table properties collector factories to internal ones. GetIntTblPropCollectorFactory(ioptions_, &int_tbl_prop_collector_factories_); // if _dummy_versions is nullptr, then this is a dummy column family. if (_dummy_versions != nullptr) { internal_stats_.reset( new InternalStats(ioptions_.num_levels, db_options.env, this)); table_cache_.reset(new TableCache(ioptions_, file_options, _table_cache, block_cache_tracer)); if (ioptions_.compaction_style == kCompactionStyleLevel) { compaction_picker_.reset( new LevelCompactionPicker(ioptions_, &internal_comparator_)); #ifndef ROCKSDB_LITE } else if (ioptions_.compaction_style == kCompactionStyleUniversal) { compaction_picker_.reset( new UniversalCompactionPicker(ioptions_, &internal_comparator_)); } else if (ioptions_.compaction_style == kCompactionStyleFIFO) { compaction_picker_.reset( new FIFOCompactionPicker(ioptions_, &internal_comparator_)); } else if (ioptions_.compaction_style == kCompactionStyleNone) { compaction_picker_.reset(new NullCompactionPicker( ioptions_, &internal_comparator_)); ROCKS_LOG_WARN(ioptions_.info_log, "Column family %s does not use any background compaction. " "Compactions can only be done via CompactFiles\n", GetName().c_str()); #endif // !ROCKSDB_LITE } else { ROCKS_LOG_ERROR(ioptions_.info_log, "Unable to recognize the specified compaction style %d. " "Column family %s will use kCompactionStyleLevel.\n", ioptions_.compaction_style, GetName().c_str()); compaction_picker_.reset( new LevelCompactionPicker(ioptions_, &internal_comparator_)); } if (column_family_set_->NumberOfColumnFamilies() < 10) { ROCKS_LOG_INFO(ioptions_.info_log, "--------------- Options for column family [%s]:\n", name.c_str()); initial_cf_options_.Dump(ioptions_.info_log); } else { ROCKS_LOG_INFO(ioptions_.info_log, "\t(skipping printing options)\n"); } } RecalculateWriteStallConditions(mutable_cf_options_); } // DB mutex held ColumnFamilyData::~ColumnFamilyData() { assert(refs_.load(std::memory_order_relaxed) == 0); // remove from linked list auto prev = prev_; auto next = next_; prev->next_ = next; next->prev_ = prev; if (!dropped_ && column_family_set_ != nullptr) { // If it's dropped, it's already removed from column family set // If column_family_set_ == nullptr, this is dummy CFD and not in // ColumnFamilySet column_family_set_->RemoveColumnFamily(this); } if (current_ != nullptr) { current_->Unref(); } // It would be wrong if this ColumnFamilyData is in flush_queue_ or // compaction_queue_ and we destroyed it assert(!queued_for_flush_); assert(!queued_for_compaction_); assert(super_version_ == nullptr); if (dummy_versions_ != nullptr) { // List must be empty assert(dummy_versions_->TEST_Next() == dummy_versions_); bool deleted __attribute__((__unused__)); deleted = dummy_versions_->Unref(); assert(deleted); } if (mem_ != nullptr) { delete mem_->Unref(); } autovector to_delete; imm_.current()->Unref(&to_delete); for (MemTable* m : to_delete) { delete m; } if (db_paths_registered_) { // TODO(cc): considering using ioptions_.fs, currently some tests rely on // EnvWrapper, that's the main reason why we use env here. Status s = ioptions_.env->UnregisterDbPaths(GetDbPaths()); if (!s.ok()) { ROCKS_LOG_ERROR( ioptions_.info_log, "Failed to unregister data paths of column family (id: %d, name: %s)", id_, name_.c_str()); } } } bool ColumnFamilyData::UnrefAndTryDelete() { int old_refs = refs_.fetch_sub(1); assert(old_refs > 0); if (old_refs == 1) { assert(super_version_ == nullptr); delete this; return true; } if (old_refs == 2 && super_version_ != nullptr) { // Only the super_version_ holds me SuperVersion* sv = super_version_; super_version_ = nullptr; // Release SuperVersion reference kept in ThreadLocalPtr. // This must be done outside of mutex_ since unref handler can lock mutex. sv->db_mutex->Unlock(); local_sv_.reset(); sv->db_mutex->Lock(); if (sv->Unref()) { // May delete this ColumnFamilyData after calling Cleanup() sv->Cleanup(); delete sv; return true; } } return false; } void ColumnFamilyData::SetDropped() { // can't drop default CF assert(id_ != 0); dropped_ = true; write_controller_token_.reset(); // remove from column_family_set column_family_set_->RemoveColumnFamily(this); } ColumnFamilyOptions ColumnFamilyData::GetLatestCFOptions() const { return BuildColumnFamilyOptions(initial_cf_options_, mutable_cf_options_); } uint64_t ColumnFamilyData::OldestLogToKeep() { auto current_log = GetLogNumber(); if (allow_2pc_) { autovector empty_list; auto imm_prep_log = imm()->PrecomputeMinLogContainingPrepSection(empty_list); auto mem_prep_log = mem()->GetMinLogContainingPrepSection(); if (imm_prep_log > 0 && imm_prep_log < current_log) { current_log = imm_prep_log; } if (mem_prep_log > 0 && mem_prep_log < current_log) { current_log = mem_prep_log; } } return current_log; } const double kIncSlowdownRatio = 0.8; const double kDecSlowdownRatio = 1 / kIncSlowdownRatio; const double kNearStopSlowdownRatio = 0.6; const double kDelayRecoverSlowdownRatio = 1.4; namespace { // If penalize_stop is true, we further reduce slowdown rate. std::unique_ptr SetupDelay( WriteController* write_controller, uint64_t compaction_needed_bytes, uint64_t prev_compaction_need_bytes, bool penalize_stop, bool auto_comapctions_disabled) { const uint64_t kMinWriteRate = 16 * 1024u; // Minimum write rate 16KB/s. uint64_t max_write_rate = write_controller->max_delayed_write_rate(); uint64_t write_rate = write_controller->delayed_write_rate(); if (auto_comapctions_disabled) { // When auto compaction is disabled, always use the value user gave. write_rate = max_write_rate; } else if (write_controller->NeedsDelay() && max_write_rate > kMinWriteRate) { // If user gives rate less than kMinWriteRate, don't adjust it. // // If already delayed, need to adjust based on previous compaction debt. // When there are two or more column families require delay, we always // increase or reduce write rate based on information for one single // column family. It is likely to be OK but we can improve if there is a // problem. // Ignore compaction_needed_bytes = 0 case because compaction_needed_bytes // is only available in level-based compaction // // If the compaction debt stays the same as previously, we also further slow // down. It usually means a mem table is full. It's mainly for the case // where both of flush and compaction are much slower than the speed we // insert to mem tables, so we need to actively slow down before we get // feedback signal from compaction and flushes to avoid the full stop // because of hitting the max write buffer number. // // If DB just falled into the stop condition, we need to further reduce // the write rate to avoid the stop condition. if (penalize_stop) { // Penalize the near stop or stop condition by more aggressive slowdown. // This is to provide the long term slowdown increase signal. // The penalty is more than the reward of recovering to the normal // condition. write_rate = static_cast(static_cast(write_rate) * kNearStopSlowdownRatio); if (write_rate < kMinWriteRate) { write_rate = kMinWriteRate; } } else if (prev_compaction_need_bytes > 0 && prev_compaction_need_bytes <= compaction_needed_bytes) { write_rate = static_cast(static_cast(write_rate) * kIncSlowdownRatio); if (write_rate < kMinWriteRate) { write_rate = kMinWriteRate; } } else if (prev_compaction_need_bytes > compaction_needed_bytes) { // We are speeding up by ratio of kSlowdownRatio when we have paid // compaction debt. But we'll never speed up to faster than the write rate // given by users. write_rate = static_cast(static_cast(write_rate) * kDecSlowdownRatio); if (write_rate > max_write_rate) { write_rate = max_write_rate; } } } return write_controller->GetDelayToken(write_rate); } int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger, int level0_slowdown_writes_trigger) { // SanitizeOptions() ensures it. assert(level0_file_num_compaction_trigger <= level0_slowdown_writes_trigger); if (level0_file_num_compaction_trigger < 0) { return std::numeric_limits::max(); } const int64_t twice_level0_trigger = static_cast(level0_file_num_compaction_trigger) * 2; const int64_t one_fourth_trigger_slowdown = static_cast(level0_file_num_compaction_trigger) + ((level0_slowdown_writes_trigger - level0_file_num_compaction_trigger) / 4); assert(twice_level0_trigger >= 0); assert(one_fourth_trigger_slowdown >= 0); // 1/4 of the way between L0 compaction trigger threshold and slowdown // condition. // Or twice as compaction trigger, if it is smaller. int64_t res = std::min(twice_level0_trigger, one_fourth_trigger_slowdown); if (res >= port::kMaxInt32) { return port::kMaxInt32; } else { // res fits in int return static_cast(res); } } } // namespace std::pair ColumnFamilyData::GetWriteStallConditionAndCause( int num_unflushed_memtables, int num_l0_files, uint64_t num_compaction_needed_bytes, const MutableCFOptions& mutable_cf_options) { if (num_unflushed_memtables >= mutable_cf_options.max_write_buffer_number) { return {WriteStallCondition::kStopped, WriteStallCause::kMemtableLimit}; } else if (!mutable_cf_options.disable_auto_compactions && num_l0_files >= mutable_cf_options.level0_stop_writes_trigger) { return {WriteStallCondition::kStopped, WriteStallCause::kL0FileCountLimit}; } else if (!mutable_cf_options.disable_auto_compactions && mutable_cf_options.hard_pending_compaction_bytes_limit > 0 && num_compaction_needed_bytes >= mutable_cf_options.hard_pending_compaction_bytes_limit) { return {WriteStallCondition::kStopped, WriteStallCause::kPendingCompactionBytes}; } else if (mutable_cf_options.max_write_buffer_number > 3 && num_unflushed_memtables >= mutable_cf_options.max_write_buffer_number - 1) { return {WriteStallCondition::kDelayed, WriteStallCause::kMemtableLimit}; } else if (!mutable_cf_options.disable_auto_compactions && mutable_cf_options.level0_slowdown_writes_trigger >= 0 && num_l0_files >= mutable_cf_options.level0_slowdown_writes_trigger) { return {WriteStallCondition::kDelayed, WriteStallCause::kL0FileCountLimit}; } else if (!mutable_cf_options.disable_auto_compactions && mutable_cf_options.soft_pending_compaction_bytes_limit > 0 && num_compaction_needed_bytes >= mutable_cf_options.soft_pending_compaction_bytes_limit) { return {WriteStallCondition::kDelayed, WriteStallCause::kPendingCompactionBytes}; } return {WriteStallCondition::kNormal, WriteStallCause::kNone}; } WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions( const MutableCFOptions& mutable_cf_options) { auto write_stall_condition = WriteStallCondition::kNormal; if (current_ != nullptr) { auto* vstorage = current_->storage_info(); auto write_controller = column_family_set_->write_controller_; uint64_t compaction_needed_bytes = vstorage->estimated_compaction_needed_bytes(); auto write_stall_condition_and_cause = GetWriteStallConditionAndCause( imm()->NumNotFlushed(), vstorage->l0_delay_trigger_count(), vstorage->estimated_compaction_needed_bytes(), mutable_cf_options); write_stall_condition = write_stall_condition_and_cause.first; auto write_stall_cause = write_stall_condition_and_cause.second; bool was_stopped = write_controller->IsStopped(); bool needed_delay = write_controller->NeedsDelay(); if (write_stall_condition == WriteStallCondition::kStopped && write_stall_cause == WriteStallCause::kMemtableLimit) { write_controller_token_ = write_controller->GetStopToken(); internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_STOPS, 1); ROCKS_LOG_WARN( ioptions_.info_log, "[%s] Stopping writes because we have %d immutable memtables " "(waiting for flush), max_write_buffer_number is set to %d", name_.c_str(), imm()->NumNotFlushed(), mutable_cf_options.max_write_buffer_number); } else if (write_stall_condition == WriteStallCondition::kStopped && write_stall_cause == WriteStallCause::kL0FileCountLimit) { write_controller_token_ = write_controller->GetStopToken(); internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_STOPS, 1); if (compaction_picker_->IsLevel0CompactionInProgress()) { internal_stats_->AddCFStats( InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_STOPS, 1); } ROCKS_LOG_WARN(ioptions_.info_log, "[%s] Stopping writes because we have %d level-0 files", name_.c_str(), vstorage->l0_delay_trigger_count()); } else if (write_stall_condition == WriteStallCondition::kStopped && write_stall_cause == WriteStallCause::kPendingCompactionBytes) { write_controller_token_ = write_controller->GetStopToken(); internal_stats_->AddCFStats( InternalStats::PENDING_COMPACTION_BYTES_LIMIT_STOPS, 1); ROCKS_LOG_WARN( ioptions_.info_log, "[%s] Stopping writes because of estimated pending compaction " "bytes %" PRIu64, name_.c_str(), compaction_needed_bytes); } else if (write_stall_condition == WriteStallCondition::kDelayed && write_stall_cause == WriteStallCause::kMemtableLimit) { write_controller_token_ = SetupDelay(write_controller, compaction_needed_bytes, prev_compaction_needed_bytes_, was_stopped, mutable_cf_options.disable_auto_compactions); internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_SLOWDOWNS, 1); ROCKS_LOG_WARN( ioptions_.info_log, "[%s] Stalling writes because we have %d immutable memtables " "(waiting for flush), max_write_buffer_number is set to %d " "rate %" PRIu64, name_.c_str(), imm()->NumNotFlushed(), mutable_cf_options.max_write_buffer_number, write_controller->delayed_write_rate()); } else if (write_stall_condition == WriteStallCondition::kDelayed && write_stall_cause == WriteStallCause::kL0FileCountLimit) { // L0 is the last two files from stopping. bool near_stop = vstorage->l0_delay_trigger_count() >= mutable_cf_options.level0_stop_writes_trigger - 2; write_controller_token_ = SetupDelay(write_controller, compaction_needed_bytes, prev_compaction_needed_bytes_, was_stopped || near_stop, mutable_cf_options.disable_auto_compactions); internal_stats_->AddCFStats(InternalStats::L0_FILE_COUNT_LIMIT_SLOWDOWNS, 1); if (compaction_picker_->IsLevel0CompactionInProgress()) { internal_stats_->AddCFStats( InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS, 1); } ROCKS_LOG_WARN(ioptions_.info_log, "[%s] Stalling writes because we have %d level-0 files " "rate %" PRIu64, name_.c_str(), vstorage->l0_delay_trigger_count(), write_controller->delayed_write_rate()); } else if (write_stall_condition == WriteStallCondition::kDelayed && write_stall_cause == WriteStallCause::kPendingCompactionBytes) { // If the distance to hard limit is less than 1/4 of the gap between soft // and // hard bytes limit, we think it is near stop and speed up the slowdown. bool near_stop = mutable_cf_options.hard_pending_compaction_bytes_limit > 0 && (compaction_needed_bytes - mutable_cf_options.soft_pending_compaction_bytes_limit) > 3 * (mutable_cf_options.hard_pending_compaction_bytes_limit - mutable_cf_options.soft_pending_compaction_bytes_limit) / 4; write_controller_token_ = SetupDelay(write_controller, compaction_needed_bytes, prev_compaction_needed_bytes_, was_stopped || near_stop, mutable_cf_options.disable_auto_compactions); internal_stats_->AddCFStats( InternalStats::PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS, 1); ROCKS_LOG_WARN( ioptions_.info_log, "[%s] Stalling writes because of estimated pending compaction " "bytes %" PRIu64 " rate %" PRIu64, name_.c_str(), vstorage->estimated_compaction_needed_bytes(), write_controller->delayed_write_rate()); } else { assert(write_stall_condition == WriteStallCondition::kNormal); if (vstorage->l0_delay_trigger_count() >= GetL0ThresholdSpeedupCompaction( mutable_cf_options.level0_file_num_compaction_trigger, mutable_cf_options.level0_slowdown_writes_trigger)) { write_controller_token_ = write_controller->GetCompactionPressureToken(); ROCKS_LOG_INFO( ioptions_.info_log, "[%s] Increasing compaction threads because we have %d level-0 " "files ", name_.c_str(), vstorage->l0_delay_trigger_count()); } else if (vstorage->estimated_compaction_needed_bytes() >= mutable_cf_options.soft_pending_compaction_bytes_limit / 4) { // Increase compaction threads if bytes needed for compaction exceeds // 1/4 of threshold for slowing down. // If soft pending compaction byte limit is not set, always speed up // compaction. write_controller_token_ = write_controller->GetCompactionPressureToken(); if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) { ROCKS_LOG_INFO( ioptions_.info_log, "[%s] Increasing compaction threads because of estimated pending " "compaction " "bytes %" PRIu64, name_.c_str(), vstorage->estimated_compaction_needed_bytes()); } } else { write_controller_token_.reset(); } // If the DB recovers from delay conditions, we reward with reducing // double the slowdown ratio. This is to balance the long term slowdown // increase signal. if (needed_delay) { uint64_t write_rate = write_controller->delayed_write_rate(); write_controller->set_delayed_write_rate(static_cast( static_cast(write_rate) * kDelayRecoverSlowdownRatio)); // Set the low pri limit to be 1/4 the delayed write rate. // Note we don't reset this value even after delay condition is relased. // Low-pri rate will continue to apply if there is a compaction // pressure. write_controller->low_pri_rate_limiter()->SetBytesPerSecond(write_rate / 4); } } prev_compaction_needed_bytes_ = compaction_needed_bytes; } return write_stall_condition; } const FileOptions* ColumnFamilyData::soptions() const { return &(column_family_set_->file_options_); } void ColumnFamilyData::SetCurrent(Version* current_version) { current_ = current_version; } uint64_t ColumnFamilyData::GetNumLiveVersions() const { return VersionSet::GetNumLiveVersions(dummy_versions_); } uint64_t ColumnFamilyData::GetTotalSstFilesSize() const { return VersionSet::GetTotalSstFilesSize(dummy_versions_); } uint64_t ColumnFamilyData::GetLiveSstFilesSize() const { return current_->GetSstFilesSize(); } MemTable* ColumnFamilyData::ConstructNewMemtable( const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) { return new MemTable(internal_comparator_, ioptions_, mutable_cf_options, write_buffer_manager_, earliest_seq, id_); } void ColumnFamilyData::CreateNewMemtable( const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) { if (mem_ != nullptr) { delete mem_->Unref(); } SetMemtable(ConstructNewMemtable(mutable_cf_options, earliest_seq)); mem_->Ref(); } bool ColumnFamilyData::NeedsCompaction() const { return compaction_picker_->NeedsCompaction(current_->storage_info()); } Compaction* ColumnFamilyData::PickCompaction( const MutableCFOptions& mutable_options, LogBuffer* log_buffer) { SequenceNumber earliest_mem_seqno = std::min(mem_->GetEarliestSequenceNumber(), imm_.current()->GetEarliestSequenceNumber(false)); auto* result = compaction_picker_->PickCompaction( GetName(), mutable_options, current_->storage_info(), log_buffer, earliest_mem_seqno); if (result != nullptr) { result->SetInputVersion(current_); } return result; } bool ColumnFamilyData::RangeOverlapWithCompaction( const Slice& smallest_user_key, const Slice& largest_user_key, int level) const { return compaction_picker_->RangeOverlapWithCompaction( smallest_user_key, largest_user_key, level); } Status ColumnFamilyData::RangesOverlapWithMemtables( const autovector& ranges, SuperVersion* super_version, bool* overlap) { assert(overlap != nullptr); *overlap = false; // Create an InternalIterator over all unflushed memtables Arena arena; ReadOptions read_opts; read_opts.total_order_seek = true; MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena); merge_iter_builder.AddIterator( super_version->mem->NewIterator(read_opts, &arena)); super_version->imm->AddIterators(read_opts, &merge_iter_builder); ScopedArenaIterator memtable_iter(merge_iter_builder.Finish()); auto read_seq = super_version->current->version_set()->LastSequence(); ReadRangeDelAggregator range_del_agg(&internal_comparator_, read_seq); auto* active_range_del_iter = super_version->mem->NewRangeTombstoneIterator(read_opts, read_seq); range_del_agg.AddTombstones( std::unique_ptr(active_range_del_iter)); super_version->imm->AddRangeTombstoneIterators(read_opts, nullptr /* arena */, &range_del_agg); Status status; for (size_t i = 0; i < ranges.size() && status.ok() && !*overlap; ++i) { auto* vstorage = super_version->current->storage_info(); auto* ucmp = vstorage->InternalComparator()->user_comparator(); InternalKey range_start(ranges[i].start, kMaxSequenceNumber, kValueTypeForSeek); memtable_iter->Seek(range_start.Encode()); status = memtable_iter->status(); ParsedInternalKey seek_result; if (status.ok()) { if (memtable_iter->Valid() && !ParseInternalKey(memtable_iter->key(), &seek_result)) { status = Status::Corruption("DB have corrupted keys"); } } if (status.ok()) { if (memtable_iter->Valid() && ucmp->Compare(seek_result.user_key, ranges[i].limit) <= 0) { *overlap = true; } else if (range_del_agg.IsRangeOverlapped(ranges[i].start, ranges[i].limit)) { *overlap = true; } } } return status; } const int ColumnFamilyData::kCompactAllLevels = -1; const int ColumnFamilyData::kCompactToBaseLevel = -2; Compaction* ColumnFamilyData::CompactRange( const MutableCFOptions& mutable_cf_options, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* conflict, uint64_t max_file_num_to_ignore) { auto* result = compaction_picker_->CompactRange( GetName(), mutable_cf_options, current_->storage_info(), input_level, output_level, compact_range_options, begin, end, compaction_end, conflict, max_file_num_to_ignore); if (result != nullptr) { result->SetInputVersion(current_); } return result; } SuperVersion* ColumnFamilyData::GetReferencedSuperVersion(DBImpl* db) { SuperVersion* sv = GetThreadLocalSuperVersion(db); sv->Ref(); if (!ReturnThreadLocalSuperVersion(sv)) { // This Unref() corresponds to the Ref() in GetThreadLocalSuperVersion() // when the thread-local pointer was populated. So, the Ref() earlier in // this function still prevents the returned SuperVersion* from being // deleted out from under the caller. sv->Unref(); } return sv; } SuperVersion* ColumnFamilyData::GetThreadLocalSuperVersion(DBImpl* db) { // The SuperVersion is cached in thread local storage to avoid acquiring // mutex when SuperVersion does not change since the last use. When a new // SuperVersion is installed, the compaction or flush thread cleans up // cached SuperVersion in all existing thread local storage. To avoid // acquiring mutex for this operation, we use atomic Swap() on the thread // local pointer to guarantee exclusive access. If the thread local pointer // is being used while a new SuperVersion is installed, the cached // SuperVersion can become stale. In that case, the background thread would // have swapped in kSVObsolete. We re-check the value at when returning // SuperVersion back to thread local, with an atomic compare and swap. // The superversion will need to be released if detected to be stale. void* ptr = local_sv_->Swap(SuperVersion::kSVInUse); // Invariant: // (1) Scrape (always) installs kSVObsolete in ThreadLocal storage // (2) the Swap above (always) installs kSVInUse, ThreadLocal storage // should only keep kSVInUse before ReturnThreadLocalSuperVersion call // (if no Scrape happens). assert(ptr != SuperVersion::kSVInUse); SuperVersion* sv = static_cast(ptr); if (sv == SuperVersion::kSVObsolete || sv->version_number != super_version_number_.load()) { RecordTick(ioptions_.statistics, NUMBER_SUPERVERSION_ACQUIRES); SuperVersion* sv_to_delete = nullptr; if (sv && sv->Unref()) { RecordTick(ioptions_.statistics, NUMBER_SUPERVERSION_CLEANUPS); db->mutex()->Lock(); // NOTE: underlying resources held by superversion (sst files) might // not be released until the next background job. sv->Cleanup(); if (db->immutable_db_options().avoid_unnecessary_blocking_io) { db->AddSuperVersionsToFreeQueue(sv); db->SchedulePurge(); } else { sv_to_delete = sv; } } else { db->mutex()->Lock(); } sv = super_version_->Ref(); db->mutex()->Unlock(); delete sv_to_delete; } assert(sv != nullptr); return sv; } bool ColumnFamilyData::ReturnThreadLocalSuperVersion(SuperVersion* sv) { assert(sv != nullptr); // Put the SuperVersion back void* expected = SuperVersion::kSVInUse; if (local_sv_->CompareAndSwap(static_cast(sv), expected)) { // When we see kSVInUse in the ThreadLocal, we are sure ThreadLocal // storage has not been altered and no Scrape has happened. The // SuperVersion is still current. return true; } else { // ThreadLocal scrape happened in the process of this GetImpl call (after // thread local Swap() at the beginning and before CompareAndSwap()). // This means the SuperVersion it holds is obsolete. assert(expected == SuperVersion::kSVObsolete); } return false; } void ColumnFamilyData::InstallSuperVersion( SuperVersionContext* sv_context, InstrumentedMutex* db_mutex) { db_mutex->AssertHeld(); return InstallSuperVersion(sv_context, db_mutex, mutable_cf_options_); } void ColumnFamilyData::InstallSuperVersion( SuperVersionContext* sv_context, InstrumentedMutex* db_mutex, const MutableCFOptions& mutable_cf_options) { SuperVersion* new_superversion = sv_context->new_superversion.release(); new_superversion->db_mutex = db_mutex; new_superversion->mutable_cf_options = mutable_cf_options; new_superversion->Init(this, mem_, imm_.current(), current_); SuperVersion* old_superversion = super_version_; super_version_ = new_superversion; ++super_version_number_; super_version_->version_number = super_version_number_; super_version_->write_stall_condition = RecalculateWriteStallConditions(mutable_cf_options); if (old_superversion != nullptr) { // Reset SuperVersions cached in thread local storage. // This should be done before old_superversion->Unref(). That's to ensure // that local_sv_ never holds the last reference to SuperVersion, since // it has no means to safely do SuperVersion cleanup. ResetThreadLocalSuperVersions(); if (old_superversion->mutable_cf_options.write_buffer_size != mutable_cf_options.write_buffer_size) { mem_->UpdateWriteBufferSize(mutable_cf_options.write_buffer_size); } if (old_superversion->write_stall_condition != new_superversion->write_stall_condition) { sv_context->PushWriteStallNotification( old_superversion->write_stall_condition, new_superversion->write_stall_condition, GetName(), ioptions()); } if (old_superversion->Unref()) { old_superversion->Cleanup(); sv_context->superversions_to_free.push_back(old_superversion); } } } void ColumnFamilyData::ResetThreadLocalSuperVersions() { autovector sv_ptrs; local_sv_->Scrape(&sv_ptrs, SuperVersion::kSVObsolete); for (auto ptr : sv_ptrs) { assert(ptr); if (ptr == SuperVersion::kSVInUse) { continue; } auto sv = static_cast(ptr); bool was_last_ref __attribute__((__unused__)); was_last_ref = sv->Unref(); // sv couldn't have been the last reference because // ResetThreadLocalSuperVersions() is called before // unref'ing super_version_. assert(!was_last_ref); } } Status ColumnFamilyData::ValidateOptions( const DBOptions& db_options, const ColumnFamilyOptions& cf_options) { Status s; s = CheckCompressionSupported(cf_options); if (s.ok() && db_options.allow_concurrent_memtable_write) { s = CheckConcurrentWritesSupported(cf_options); } if (s.ok() && db_options.unordered_write && cf_options.max_successive_merges != 0) { s = Status::InvalidArgument( "max_successive_merges > 0 is incompatible with unordered_write"); } if (s.ok()) { s = CheckCFPathsSupported(db_options, cf_options); } if (!s.ok()) { return s; } if (cf_options.ttl > 0 && cf_options.ttl != kDefaultTtl) { if (cf_options.table_factory->Name() != BlockBasedTableFactory().Name()) { return Status::NotSupported( "TTL is only supported in Block-Based Table format. "); } } if (cf_options.periodic_compaction_seconds > 0 && cf_options.periodic_compaction_seconds != kDefaultPeriodicCompSecs) { if (cf_options.table_factory->Name() != BlockBasedTableFactory().Name()) { return Status::NotSupported( "Periodic Compaction is only supported in " "Block-Based Table format. "); } } return s; } #ifndef ROCKSDB_LITE Status ColumnFamilyData::SetOptions( const DBOptions& db_options, const std::unordered_map& options_map) { MutableCFOptions new_mutable_cf_options; Status s = GetMutableOptionsFromStrings(mutable_cf_options_, options_map, ioptions_.info_log, &new_mutable_cf_options); if (s.ok()) { ColumnFamilyOptions cf_options = BuildColumnFamilyOptions(initial_cf_options_, new_mutable_cf_options); s = ValidateOptions(db_options, cf_options); } if (s.ok()) { mutable_cf_options_ = new_mutable_cf_options; mutable_cf_options_.RefreshDerivedOptions(ioptions_); } return s; } #endif // ROCKSDB_LITE // REQUIRES: DB mutex held Env::WriteLifeTimeHint ColumnFamilyData::CalculateSSTWriteHint(int level) { if (initial_cf_options_.compaction_style != kCompactionStyleLevel) { return Env::WLTH_NOT_SET; } if (level == 0) { return Env::WLTH_MEDIUM; } int base_level = current_->storage_info()->base_level(); // L1: medium, L2: long, ... if (level - base_level >= 2) { return Env::WLTH_EXTREME; } else if (level < base_level) { // There is no restriction which prevents level passed in to be smaller // than base_level. return Env::WLTH_MEDIUM; } return static_cast(level - base_level + static_cast(Env::WLTH_MEDIUM)); } Status ColumnFamilyData::AddDirectories( std::map>* created_dirs) { Status s; assert(created_dirs != nullptr); assert(data_dirs_.empty()); for (auto& p : ioptions_.cf_paths) { auto existing_dir = created_dirs->find(p.path); if (existing_dir == created_dirs->end()) { std::unique_ptr path_directory; s = DBImpl::CreateAndNewDirectory(ioptions_.fs, p.path, &path_directory); if (!s.ok()) { return s; } assert(path_directory != nullptr); data_dirs_.emplace_back(path_directory.release()); (*created_dirs)[p.path] = data_dirs_.back(); } else { data_dirs_.emplace_back(existing_dir->second); } } assert(data_dirs_.size() == ioptions_.cf_paths.size()); return s; } FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const { if (data_dirs_.empty()) { return nullptr; } assert(path_id < data_dirs_.size()); return data_dirs_[path_id].get(); } ColumnFamilySet::ColumnFamilySet(const std::string& dbname, const ImmutableDBOptions* db_options, const FileOptions& file_options, Cache* table_cache, WriteBufferManager* _write_buffer_manager, WriteController* _write_controller, BlockCacheTracer* const block_cache_tracer) : max_column_family_(0), dummy_cfd_(new ColumnFamilyData( ColumnFamilyData::kDummyColumnFamilyDataId, "", nullptr, nullptr, nullptr, ColumnFamilyOptions(), *db_options, file_options, nullptr, block_cache_tracer)), default_cfd_cache_(nullptr), db_name_(dbname), db_options_(db_options), file_options_(file_options), table_cache_(table_cache), write_buffer_manager_(_write_buffer_manager), write_controller_(_write_controller), block_cache_tracer_(block_cache_tracer) { // initialize linked list dummy_cfd_->prev_ = dummy_cfd_; dummy_cfd_->next_ = dummy_cfd_; } ColumnFamilySet::~ColumnFamilySet() { while (column_family_data_.size() > 0) { // cfd destructor will delete itself from column_family_data_ auto cfd = column_family_data_.begin()->second; bool last_ref __attribute__((__unused__)); last_ref = cfd->UnrefAndTryDelete(); assert(last_ref); } bool dummy_last_ref __attribute__((__unused__)); dummy_last_ref = dummy_cfd_->UnrefAndTryDelete(); assert(dummy_last_ref); } ColumnFamilyData* ColumnFamilySet::GetDefault() const { assert(default_cfd_cache_ != nullptr); return default_cfd_cache_; } ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const { auto cfd_iter = column_family_data_.find(id); if (cfd_iter != column_family_data_.end()) { return cfd_iter->second; } else { return nullptr; } } ColumnFamilyData* ColumnFamilySet::GetColumnFamily(const std::string& name) const { auto cfd_iter = column_families_.find(name); if (cfd_iter != column_families_.end()) { auto cfd = GetColumnFamily(cfd_iter->second); assert(cfd != nullptr); return cfd; } else { return nullptr; } } uint32_t ColumnFamilySet::GetNextColumnFamilyID() { return ++max_column_family_; } uint32_t ColumnFamilySet::GetMaxColumnFamily() { return max_column_family_; } void ColumnFamilySet::UpdateMaxColumnFamily(uint32_t new_max_column_family) { max_column_family_ = std::max(new_max_column_family, max_column_family_); } size_t ColumnFamilySet::NumberOfColumnFamilies() const { return column_families_.size(); } // under a DB mutex AND write thread ColumnFamilyData* ColumnFamilySet::CreateColumnFamily( const std::string& name, uint32_t id, Version* dummy_versions, const ColumnFamilyOptions& options) { assert(column_families_.find(name) == column_families_.end()); ColumnFamilyData* new_cfd = new ColumnFamilyData( id, name, dummy_versions, table_cache_, write_buffer_manager_, options, *db_options_, file_options_, this, block_cache_tracer_); column_families_.insert({name, id}); column_family_data_.insert({id, new_cfd}); max_column_family_ = std::max(max_column_family_, id); // add to linked list new_cfd->next_ = dummy_cfd_; auto prev = dummy_cfd_->prev_; new_cfd->prev_ = prev; prev->next_ = new_cfd; dummy_cfd_->prev_ = new_cfd; if (id == 0) { default_cfd_cache_ = new_cfd; } return new_cfd; } // REQUIRES: DB mutex held void ColumnFamilySet::FreeDeadColumnFamilies() { autovector to_delete; for (auto cfd = dummy_cfd_->next_; cfd != dummy_cfd_; cfd = cfd->next_) { if (cfd->refs_.load(std::memory_order_relaxed) == 0) { to_delete.push_back(cfd); } } for (auto cfd : to_delete) { // this is very rare, so it's not a problem that we do it under a mutex delete cfd; } } // under a DB mutex AND from a write thread void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) { auto cfd_iter = column_family_data_.find(cfd->GetID()); assert(cfd_iter != column_family_data_.end()); column_family_data_.erase(cfd_iter); column_families_.erase(cfd->GetName()); } // under a DB mutex OR from a write thread bool ColumnFamilyMemTablesImpl::Seek(uint32_t column_family_id) { if (column_family_id == 0) { // optimization for common case current_ = column_family_set_->GetDefault(); } else { current_ = column_family_set_->GetColumnFamily(column_family_id); } handle_.SetCFD(current_); return current_ != nullptr; } uint64_t ColumnFamilyMemTablesImpl::GetLogNumber() const { assert(current_ != nullptr); return current_->GetLogNumber(); } MemTable* ColumnFamilyMemTablesImpl::GetMemTable() const { assert(current_ != nullptr); return current_->mem(); } ColumnFamilyHandle* ColumnFamilyMemTablesImpl::GetColumnFamilyHandle() { assert(current_ != nullptr); return &handle_; } uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family) { uint32_t column_family_id = 0; if (column_family != nullptr) { auto cfh = reinterpret_cast(column_family); column_family_id = cfh->GetID(); } return column_family_id; } const Comparator* GetColumnFamilyUserComparator( ColumnFamilyHandle* column_family) { if (column_family != nullptr) { return column_family->GetComparator(); } return nullptr; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/column_family.h000066400000000000000000001000361370372246700166410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "db/memtable_list.h" #include "db/table_cache.h" #include "db/table_properties_collector.h" #include "db/write_batch_internal.h" #include "db/write_controller.h" #include "options/cf_options.h" #include "rocksdb/compaction_job_stats.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "trace_replay/block_cache_tracer.h" #include "util/thread_local.h" namespace ROCKSDB_NAMESPACE { class Version; class VersionSet; class VersionStorageInfo; class MemTable; class MemTableListVersion; class CompactionPicker; class Compaction; class InternalKey; class InternalStats; class ColumnFamilyData; class DBImpl; class LogBuffer; class InstrumentedMutex; class InstrumentedMutexLock; struct SuperVersionContext; extern const double kIncSlowdownRatio; // This file contains a list of data structures for managing column family // level metadata. // // The basic relationships among classes declared here are illustrated as // following: // // +----------------------+ +----------------------+ +--------+ // +---+ ColumnFamilyHandle 1 | +--+ ColumnFamilyHandle 2 | | DBImpl | // | +----------------------+ | +----------------------+ +----+---+ // | +--------------------------+ | // | | +-----------------------------+ // | | | // | | +-----------------------------v-------------------------------+ // | | | | // | | | ColumnFamilySet | // | | | | // | | +-------------+--------------------------+----------------+---+ // | | | | | // | +-------------------------------------+ | | // | | | | v // | +-------------v-------------+ +-----v----v---------+ // | | | | | // | | ColumnFamilyData 1 | | ColumnFamilyData 2 | ...... // | | | | | // +---> | | | // | +---------+ | | // | | MemTable| | | // | | List | | | // +--------+---+--+-+----+----+ +--------------------++ // | | | | // | | | | // | | | +-----------------------+ // | | +-----------+ | // v +--------+ | | // +--------+--------+ | | | // | | | | +----------v----------+ // +---> |SuperVersion 1.a +-----------------> | // | +------+ | | MemTableListVersion | // +---+-------------+ | | | | | // | | | | +----+------------+---+ // | current | | | | | // | +-------------+ | |mem | | // | | | | | | // +-v---v-------+ +---v--v---+ +-----v----+ +----v-----+ // | | | | | | | | // | Version 1.a | | memtable | | memtable | | memtable | // | | | 1.a | | 1.b | | 1.c | // +-------------+ | | | | | | // +----------+ +----------+ +----------+ // // DBImpl keeps a ColumnFamilySet, which references to all column families by // pointing to respective ColumnFamilyData object of each column family. // This is how DBImpl can list and operate on all the column families. // ColumnFamilyHandle also points to ColumnFamilyData directly, so that // when a user executes a query, it can directly find memtables and Version // as well as SuperVersion to the column family, without going through // ColumnFamilySet. // // ColumnFamilySet points to the latest view of the LSM-tree (list of memtables // and SST files) indirectly, while ongoing operations may hold references // to a current or an out-of-date SuperVersion, which in turn points to a // point-in-time view of the LSM-tree. This guarantees the memtables and SST // files being operated on will not go away, until the SuperVersion is // unreferenced to 0 and destoryed. // // The following graph illustrates a possible referencing relationships: // // Column +--------------+ current +-----------+ // Family +---->+ +------------------->+ | // Data | SuperVersion +----------+ | Version A | // | 3 | imm | | | // Iter2 +----->+ | +-------v------+ +-----------+ // +-----+--------+ | MemtableList +----------------> Empty // | | Version r | +-----------+ // | +--------------+ | | // +------------------+ current| Version B | // +--------------+ | +----->+ | // | | | | +-----+-----+ // Compaction +>+ SuperVersion +-------------+ ^ // Job | 2 +------+ | |current // | +----+ | | mem | +------------+ // +--------------+ | | +---------------------> | // | +------------------------> MemTable a | // | mem | | | // +--------------+ | | +------------+ // | +--------------------------+ // Iter1 +-----> SuperVersion | | +------------+ // | 1 +------------------------------>+ | // | +-+ | mem | MemTable b | // +--------------+ | | | | // | | +--------------+ +-----^------+ // | |imm | MemtableList | | // | +--->+ Version s +------------+ // | +--------------+ // | +--------------+ // | | MemtableList | // +------>+ Version t +--------> Empty // imm +--------------+ // // In this example, even if the current LSM-tree consists of Version A and // memtable a, which is also referenced by SuperVersion, two older SuperVersion // SuperVersion2 and Superversion1 still exist, and are referenced by a // compaction job and an old iterator Iter1, respectively. SuperVersion2 // contains Version B, memtable a and memtable b; SuperVersion1 contains // Version B and memtable b (mutable). As a result, Version B and memtable b // are prevented from being destroyed or deleted. // ColumnFamilyHandleImpl is the class that clients use to access different // column families. It has non-trivial destructor, which gets called when client // is done using the column family class ColumnFamilyHandleImpl : public ColumnFamilyHandle { public: // create while holding the mutex ColumnFamilyHandleImpl( ColumnFamilyData* cfd, DBImpl* db, InstrumentedMutex* mutex); // destroy without mutex virtual ~ColumnFamilyHandleImpl(); virtual ColumnFamilyData* cfd() const { return cfd_; } virtual uint32_t GetID() const override; virtual const std::string& GetName() const override; virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) override; virtual const Comparator* GetComparator() const override; private: ColumnFamilyData* cfd_; DBImpl* db_; InstrumentedMutex* mutex_; }; // Does not ref-count ColumnFamilyData // We use this dummy ColumnFamilyHandleImpl because sometimes MemTableInserter // calls DBImpl methods. When this happens, MemTableInserter need access to // ColumnFamilyHandle (same as the client would need). In that case, we feed // MemTableInserter dummy ColumnFamilyHandle and enable it to call DBImpl // methods class ColumnFamilyHandleInternal : public ColumnFamilyHandleImpl { public: ColumnFamilyHandleInternal() : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), internal_cfd_(nullptr) {} void SetCFD(ColumnFamilyData* _cfd) { internal_cfd_ = _cfd; } virtual ColumnFamilyData* cfd() const override { return internal_cfd_; } private: ColumnFamilyData* internal_cfd_; }; // holds references to memtable, all immutable memtables and version struct SuperVersion { // Accessing members of this class is not thread-safe and requires external // synchronization (ie db mutex held or on write thread). ColumnFamilyData* cfd; MemTable* mem; MemTableListVersion* imm; Version* current; MutableCFOptions mutable_cf_options; // Version number of the current SuperVersion uint64_t version_number; WriteStallCondition write_stall_condition; InstrumentedMutex* db_mutex; // should be called outside the mutex SuperVersion() = default; ~SuperVersion(); SuperVersion* Ref(); // If Unref() returns true, Cleanup() should be called with mutex held // before deleting this SuperVersion. bool Unref(); // call these two methods with db mutex held // Cleanup unrefs mem, imm and current. Also, it stores all memtables // that needs to be deleted in to_delete vector. Unrefing those // objects needs to be done in the mutex void Cleanup(); void Init(ColumnFamilyData* new_cfd, MemTable* new_mem, MemTableListVersion* new_imm, Version* new_current); // The value of dummy is not actually used. kSVInUse takes its address as a // mark in the thread local storage to indicate the SuperVersion is in use // by thread. This way, the value of kSVInUse is guaranteed to have no // conflict with SuperVersion object address and portable on different // platform. static int dummy; static void* const kSVInUse; static void* const kSVObsolete; private: std::atomic refs; // We need to_delete because during Cleanup(), imm->Unref() returns // all memtables that we need to free through this vector. We then // delete all those memtables outside of mutex, during destruction autovector to_delete; }; extern Status CheckCompressionSupported(const ColumnFamilyOptions& cf_options); extern Status CheckConcurrentWritesSupported( const ColumnFamilyOptions& cf_options); extern Status CheckCFPathsSupported(const DBOptions& db_options, const ColumnFamilyOptions& cf_options); extern ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, const ColumnFamilyOptions& src); // Wrap user defined table proproties collector factories `from cf_options` // into internal ones in int_tbl_prop_collector_factories. Add a system internal // one too. extern void GetIntTblPropCollectorFactory( const ImmutableCFOptions& ioptions, std::vector>* int_tbl_prop_collector_factories); class ColumnFamilySet; // This class keeps all the data that a column family needs. // Most methods require DB mutex held, unless otherwise noted class ColumnFamilyData { public: ~ColumnFamilyData(); // thread-safe uint32_t GetID() const { return id_; } // thread-safe const std::string& GetName() const { return name_; } // Ref() can only be called from a context where the caller can guarantee // that ColumnFamilyData is alive (while holding a non-zero ref already, // holding a DB mutex, or as the leader in a write batch group). void Ref() { refs_.fetch_add(1); } // Unref decreases the reference count, but does not handle deletion // when the count goes to 0. If this method returns true then the // caller should delete the instance immediately, or later, by calling // FreeDeadColumnFamilies(). Unref() can only be called while holding // a DB mutex, or during single-threaded recovery. bool Unref() { int old_refs = refs_.fetch_sub(1); assert(old_refs > 0); return old_refs == 1; } // UnrefAndTryDelete() decreases the reference count and do free if needed, // return true if this is freed else false, UnrefAndTryDelete() can only // be called while holding a DB mutex, or during single-threaded recovery. bool UnrefAndTryDelete(); // SetDropped() can only be called under following conditions: // 1) Holding a DB mutex, // 2) from single-threaded write thread, AND // 3) from single-threaded VersionSet::LogAndApply() // After dropping column family no other operation on that column family // will be executed. All the files and memory will be, however, kept around // until client drops the column family handle. That way, client can still // access data from dropped column family. // Column family can be dropped and still alive. In that state: // *) Compaction and flush is not executed on the dropped column family. // *) Client can continue reading from column family. Writes will fail unless // WriteOptions::ignore_missing_column_families is true // When the dropped column family is unreferenced, then we: // *) Remove column family from the linked list maintained by ColumnFamilySet // *) delete all memory associated with that column family // *) delete all the files associated with that column family void SetDropped(); bool IsDropped() const { return dropped_.load(std::memory_order_relaxed); } // thread-safe int NumberLevels() const { return ioptions_.num_levels; } void SetLogNumber(uint64_t log_number) { log_number_ = log_number; } uint64_t GetLogNumber() const { return log_number_; } void SetFlushReason(FlushReason flush_reason) { flush_reason_ = flush_reason; } FlushReason GetFlushReason() const { return flush_reason_; } // thread-safe const FileOptions* soptions() const; const ImmutableCFOptions* ioptions() const { return &ioptions_; } // REQUIRES: DB mutex held // This returns the MutableCFOptions used by current SuperVersion // You should use this API to reference MutableCFOptions most of the time. const MutableCFOptions* GetCurrentMutableCFOptions() const { return &(super_version_->mutable_cf_options); } // REQUIRES: DB mutex held // This returns the latest MutableCFOptions, which may be not in effect yet. const MutableCFOptions* GetLatestMutableCFOptions() const { return &mutable_cf_options_; } // REQUIRES: DB mutex held // Build ColumnFamiliesOptions with immutable options and latest mutable // options. ColumnFamilyOptions GetLatestCFOptions() const; bool is_delete_range_supported() { return is_delete_range_supported_; } // Validate CF options against DB options static Status ValidateOptions(const DBOptions& db_options, const ColumnFamilyOptions& cf_options); #ifndef ROCKSDB_LITE // REQUIRES: DB mutex held Status SetOptions( const DBOptions& db_options, const std::unordered_map& options_map); #endif // ROCKSDB_LITE InternalStats* internal_stats() { return internal_stats_.get(); } MemTableList* imm() { return &imm_; } MemTable* mem() { return mem_; } Version* current() { return current_; } Version* dummy_versions() { return dummy_versions_; } void SetCurrent(Version* _current); uint64_t GetNumLiveVersions() const; // REQUIRE: DB mutex held uint64_t GetTotalSstFilesSize() const; // REQUIRE: DB mutex held uint64_t GetLiveSstFilesSize() const; // REQUIRE: DB mutex held void SetMemtable(MemTable* new_mem) { uint64_t memtable_id = last_memtable_id_.fetch_add(1) + 1; new_mem->SetID(memtable_id); mem_ = new_mem; } // calculate the oldest log needed for the durability of this column family uint64_t OldestLogToKeep(); // See Memtable constructor for explanation of earliest_seq param. MemTable* ConstructNewMemtable(const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq); void CreateNewMemtable(const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq); TableCache* table_cache() const { return table_cache_.get(); } // See documentation in compaction_picker.h // REQUIRES: DB mutex held bool NeedsCompaction() const; // REQUIRES: DB mutex held Compaction* PickCompaction(const MutableCFOptions& mutable_options, LogBuffer* log_buffer); // Check if the passed range overlap with any running compactions. // REQUIRES: DB mutex held bool RangeOverlapWithCompaction(const Slice& smallest_user_key, const Slice& largest_user_key, int level) const; // Check if the passed ranges overlap with any unflushed memtables // (immutable or mutable). // // @param super_version A referenced SuperVersion that will be held for the // duration of this function. // // Thread-safe Status RangesOverlapWithMemtables(const autovector& ranges, SuperVersion* super_version, bool* overlap); // A flag to tell a manual compaction is to compact all levels together // instead of a specific level. static const int kCompactAllLevels; // A flag to tell a manual compaction's output is base level. static const int kCompactToBaseLevel; // REQUIRES: DB mutex held Compaction* CompactRange(const MutableCFOptions& mutable_cf_options, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict, uint64_t max_file_num_to_ignore); CompactionPicker* compaction_picker() { return compaction_picker_.get(); } // thread-safe const Comparator* user_comparator() const { return internal_comparator_.user_comparator(); } // thread-safe const InternalKeyComparator& internal_comparator() const { return internal_comparator_; } const std::vector>* int_tbl_prop_collector_factories() const { return &int_tbl_prop_collector_factories_; } SuperVersion* GetSuperVersion() { return super_version_; } // thread-safe // Return a already referenced SuperVersion to be used safely. SuperVersion* GetReferencedSuperVersion(DBImpl* db); // thread-safe // Get SuperVersion stored in thread local storage. If it does not exist, // get a reference from a current SuperVersion. SuperVersion* GetThreadLocalSuperVersion(DBImpl* db); // Try to return SuperVersion back to thread local storage. Retrun true on // success and false on failure. It fails when the thread local storage // contains anything other than SuperVersion::kSVInUse flag. bool ReturnThreadLocalSuperVersion(SuperVersion* sv); // thread-safe uint64_t GetSuperVersionNumber() const { return super_version_number_.load(); } // will return a pointer to SuperVersion* if previous SuperVersion // if its reference count is zero and needs deletion or nullptr if not // As argument takes a pointer to allocated SuperVersion to enable // the clients to allocate SuperVersion outside of mutex. // IMPORTANT: Only call this from DBImpl::InstallSuperVersion() void InstallSuperVersion(SuperVersionContext* sv_context, InstrumentedMutex* db_mutex, const MutableCFOptions& mutable_cf_options); void InstallSuperVersion(SuperVersionContext* sv_context, InstrumentedMutex* db_mutex); void ResetThreadLocalSuperVersions(); // Protected by DB mutex void set_queued_for_flush(bool value) { queued_for_flush_ = value; } void set_queued_for_compaction(bool value) { queued_for_compaction_ = value; } bool queued_for_flush() { return queued_for_flush_; } bool queued_for_compaction() { return queued_for_compaction_; } enum class WriteStallCause { kNone, kMemtableLimit, kL0FileCountLimit, kPendingCompactionBytes, }; static std::pair GetWriteStallConditionAndCause(int num_unflushed_memtables, int num_l0_files, uint64_t num_compaction_needed_bytes, const MutableCFOptions& mutable_cf_options); // Recalculate some small conditions, which are changed only during // compaction, adding new memtable and/or // recalculation of compaction score. These values are used in // DBImpl::MakeRoomForWrite function to decide, if it need to make // a write stall WriteStallCondition RecalculateWriteStallConditions( const MutableCFOptions& mutable_cf_options); void set_initialized() { initialized_.store(true); } bool initialized() const { return initialized_.load(); } const ColumnFamilyOptions& initial_cf_options() { return initial_cf_options_; } Env::WriteLifeTimeHint CalculateSSTWriteHint(int level); // created_dirs remembers directory created, so that we don't need to call // the same data creation operation again. Status AddDirectories( std::map>* created_dirs); FSDirectory* GetDataDir(size_t path_id) const; ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); } private: friend class ColumnFamilySet; static const uint32_t kDummyColumnFamilyDataId; ColumnFamilyData(uint32_t id, const std::string& name, Version* dummy_versions, Cache* table_cache, WriteBufferManager* write_buffer_manager, const ColumnFamilyOptions& options, const ImmutableDBOptions& db_options, const FileOptions& file_options, ColumnFamilySet* column_family_set, BlockCacheTracer* const block_cache_tracer); std::vector GetDbPaths() const; uint32_t id_; const std::string name_; Version* dummy_versions_; // Head of circular doubly-linked list of versions. Version* current_; // == dummy_versions->prev_ std::atomic refs_; // outstanding references to ColumnFamilyData std::atomic initialized_; std::atomic dropped_; // true if client dropped it const InternalKeyComparator internal_comparator_; std::vector> int_tbl_prop_collector_factories_; const ColumnFamilyOptions initial_cf_options_; const ImmutableCFOptions ioptions_; MutableCFOptions mutable_cf_options_; const bool is_delete_range_supported_; std::unique_ptr table_cache_; std::unique_ptr internal_stats_; WriteBufferManager* write_buffer_manager_; MemTable* mem_; MemTableList imm_; SuperVersion* super_version_; // An ordinal representing the current SuperVersion. Updated by // InstallSuperVersion(), i.e. incremented every time super_version_ // changes. std::atomic super_version_number_; // Thread's local copy of SuperVersion pointer // This needs to be destructed before mutex_ std::unique_ptr local_sv_; // pointers for a circular linked list. we use it to support iterations over // all column families that are alive (note: dropped column families can also // be alive as long as client holds a reference) ColumnFamilyData* next_; ColumnFamilyData* prev_; // This is the earliest log file number that contains data from this // Column Family. All earlier log files must be ignored and not // recovered from uint64_t log_number_; std::atomic flush_reason_; // An object that keeps all the compaction stats // and picks the next compaction std::unique_ptr compaction_picker_; ColumnFamilySet* column_family_set_; std::unique_ptr write_controller_token_; // If true --> this ColumnFamily is currently present in DBImpl::flush_queue_ bool queued_for_flush_; // If true --> this ColumnFamily is currently present in // DBImpl::compaction_queue_ bool queued_for_compaction_; uint64_t prev_compaction_needed_bytes_; // if the database was opened with 2pc enabled bool allow_2pc_; // Memtable id to track flush. std::atomic last_memtable_id_; // Directories corresponding to cf_paths. std::vector> data_dirs_; bool db_paths_registered_; }; // ColumnFamilySet has interesting thread-safety requirements // * CreateColumnFamily() or RemoveColumnFamily() -- need to be protected by DB // mutex AND executed in the write thread. // CreateColumnFamily() should ONLY be called from VersionSet::LogAndApply() AND // single-threaded write thread. It is also called during Recovery and in // DumpManifest(). // RemoveColumnFamily() is only called from SetDropped(). DB mutex needs to be // held and it needs to be executed from the write thread. SetDropped() also // guarantees that it will be called only from single-threaded LogAndApply(), // but this condition is not that important. // * Iteration -- hold DB mutex, but you can release it in the body of // iteration. If you release DB mutex in body, reference the column // family before the mutex and unreference after you unlock, since the column // family might get dropped when the DB mutex is released // * GetDefault() -- thread safe // * GetColumnFamily() -- either inside of DB mutex or from a write thread // * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily(), // NumberOfColumnFamilies -- inside of DB mutex class ColumnFamilySet { public: // ColumnFamilySet supports iteration class iterator { public: explicit iterator(ColumnFamilyData* cfd) : current_(cfd) {} iterator& operator++() { // dropped column families might still be included in this iteration // (we're only removing them when client drops the last reference to the // column family). // dummy is never dead, so this will never be infinite do { current_ = current_->next_; } while (current_->refs_.load(std::memory_order_relaxed) == 0); return *this; } bool operator!=(const iterator& other) { return this->current_ != other.current_; } ColumnFamilyData* operator*() { return current_; } private: ColumnFamilyData* current_; }; ColumnFamilySet(const std::string& dbname, const ImmutableDBOptions* db_options, const FileOptions& file_options, Cache* table_cache, WriteBufferManager* _write_buffer_manager, WriteController* _write_controller, BlockCacheTracer* const block_cache_tracer); ~ColumnFamilySet(); ColumnFamilyData* GetDefault() const; // GetColumnFamily() calls return nullptr if column family is not found ColumnFamilyData* GetColumnFamily(uint32_t id) const; ColumnFamilyData* GetColumnFamily(const std::string& name) const; // this call will return the next available column family ID. it guarantees // that there is no column family with id greater than or equal to the // returned value in the current running instance or anytime in RocksDB // instance history. uint32_t GetNextColumnFamilyID(); uint32_t GetMaxColumnFamily(); void UpdateMaxColumnFamily(uint32_t new_max_column_family); size_t NumberOfColumnFamilies() const; ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id, Version* dummy_version, const ColumnFamilyOptions& options); iterator begin() { return iterator(dummy_cfd_->next_); } iterator end() { return iterator(dummy_cfd_); } // REQUIRES: DB mutex held // Don't call while iterating over ColumnFamilySet void FreeDeadColumnFamilies(); Cache* get_table_cache() { return table_cache_; } WriteBufferManager* write_buffer_manager() { return write_buffer_manager_; } WriteController* write_controller() { return write_controller_; } private: friend class ColumnFamilyData; // helper function that gets called from cfd destructor // REQUIRES: DB mutex held void RemoveColumnFamily(ColumnFamilyData* cfd); // column_families_ and column_family_data_ need to be protected: // * when mutating both conditions have to be satisfied: // 1. DB mutex locked // 2. thread currently in single-threaded write thread // * when reading, at least one condition needs to be satisfied: // 1. DB mutex locked // 2. accessed from a single-threaded write thread std::unordered_map column_families_; std::unordered_map column_family_data_; uint32_t max_column_family_; ColumnFamilyData* dummy_cfd_; // We don't hold the refcount here, since default column family always exists // We are also not responsible for cleaning up default_cfd_cache_. This is // just a cache that makes common case (accessing default column family) // faster ColumnFamilyData* default_cfd_cache_; const std::string db_name_; const ImmutableDBOptions* const db_options_; const FileOptions file_options_; Cache* table_cache_; WriteBufferManager* write_buffer_manager_; WriteController* write_controller_; BlockCacheTracer* const block_cache_tracer_; }; // We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access // memtables of different column families (specified by ID in the write batch) class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables { public: explicit ColumnFamilyMemTablesImpl(ColumnFamilySet* column_family_set) : column_family_set_(column_family_set), current_(nullptr) {} // Constructs a ColumnFamilyMemTablesImpl equivalent to one constructed // with the arguments used to construct *orig. explicit ColumnFamilyMemTablesImpl(ColumnFamilyMemTablesImpl* orig) : column_family_set_(orig->column_family_set_), current_(nullptr) {} // sets current_ to ColumnFamilyData with column_family_id // returns false if column family doesn't exist // REQUIRES: use this function of DBImpl::column_family_memtables_ should be // under a DB mutex OR from a write thread bool Seek(uint32_t column_family_id) override; // Returns log number of the selected column family // REQUIRES: under a DB mutex OR from a write thread uint64_t GetLogNumber() const override; // REQUIRES: Seek() called first // REQUIRES: use this function of DBImpl::column_family_memtables_ should be // under a DB mutex OR from a write thread virtual MemTable* GetMemTable() const override; // Returns column family handle for the selected column family // REQUIRES: use this function of DBImpl::column_family_memtables_ should be // under a DB mutex OR from a write thread virtual ColumnFamilyHandle* GetColumnFamilyHandle() override; // Cannot be called while another thread is calling Seek(). // REQUIRES: use this function of DBImpl::column_family_memtables_ should be // under a DB mutex OR from a write thread virtual ColumnFamilyData* current() override { return current_; } private: ColumnFamilySet* column_family_set_; ColumnFamilyData* current_; ColumnFamilyHandleInternal handle_; }; extern uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family); extern const Comparator* GetColumnFamilyUserComparator( ColumnFamilyHandle* column_family); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/column_family_test.cc000066400000000000000000003434051370372246700200470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "options/options_parser.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/utilities/object_registry.h" #include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { static const int kValueSize = 1000; namespace { std::string RandomString(Random* rnd, int len) { std::string r; test::RandomString(rnd, len, &r); return r; } } // anonymous namespace // counts how many operations were performed class EnvCounter : public EnvWrapper { public: explicit EnvCounter(Env* base) : EnvWrapper(base), num_new_writable_file_(0) {} int GetNumberOfNewWritableFileCalls() { return num_new_writable_file_; } Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& soptions) override { ++num_new_writable_file_; return EnvWrapper::NewWritableFile(f, r, soptions); } private: std::atomic num_new_writable_file_; }; class ColumnFamilyTestBase : public testing::Test { public: explicit ColumnFamilyTestBase(uint32_t format) : rnd_(139), format_(format) { Env* base_env = Env::Default(); #ifndef ROCKSDB_LITE const char* test_env_uri = getenv("TEST_ENV_URI"); if (test_env_uri) { Env* test_env = nullptr; Status s = Env::LoadEnv(test_env_uri, &test_env, &env_guard_); base_env = test_env; EXPECT_OK(s); EXPECT_NE(Env::Default(), base_env); } #endif // !ROCKSDB_LITE EXPECT_NE(nullptr, base_env); env_ = new EnvCounter(base_env); dbname_ = test::PerThreadDBPath("column_family_test"); db_options_.create_if_missing = true; db_options_.fail_if_options_file_error = true; db_options_.env = env_; DestroyDB(dbname_, Options(db_options_, column_family_options_)); } ~ColumnFamilyTestBase() override { std::vector column_families; for (auto h : handles_) { ColumnFamilyDescriptor cfdescriptor; h->GetDescriptor(&cfdescriptor); column_families.push_back(cfdescriptor); } Close(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Destroy(column_families); delete env_; } BlockBasedTableOptions GetBlockBasedTableOptions() { BlockBasedTableOptions options; options.format_version = format_; return options; } // Return the value to associate with the specified key Slice Value(int k, std::string* storage) { if (k == 0) { // Ugh. Random seed of 0 used to produce no entropy. This code // preserves the implementation that was in place when all of the // magic values in this file were picked. *storage = std::string(kValueSize, ' '); return Slice(*storage); } else { Random r(k); return test::RandomString(&r, kValueSize, storage); } } void Build(int base, int n, int flush_every = 0) { std::string key_space, value_space; WriteBatch batch; for (int i = 0; i < n; i++) { if (flush_every != 0 && i != 0 && i % flush_every == 0) { DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); } int keyi = base + i; Slice key(DBTestBase::Key(keyi)); batch.Clear(); batch.Put(handles_[0], key, Value(keyi, &value_space)); batch.Put(handles_[1], key, Value(keyi, &value_space)); batch.Put(handles_[2], key, Value(keyi, &value_space)); ASSERT_OK(db_->Write(WriteOptions(), &batch)); } } void CheckMissed() { uint64_t next_expected = 0; uint64_t missed = 0; int bad_keys = 0; int bad_values = 0; int correct = 0; std::string value_space; for (int cf = 0; cf < 3; cf++) { next_expected = 0; Iterator* iter = db_->NewIterator(ReadOptions(false, true), handles_[cf]); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { uint64_t key; Slice in(iter->key()); in.remove_prefix(3); if (!ConsumeDecimalNumber(&in, &key) || !in.empty() || key < next_expected) { bad_keys++; continue; } missed += (key - next_expected); next_expected = key + 1; if (iter->value() != Value(static_cast(key), &value_space)) { bad_values++; } else { correct++; } } delete iter; } ASSERT_EQ(0, bad_keys); ASSERT_EQ(0, bad_values); ASSERT_EQ(0, missed); (void)correct; } void Close() { for (auto h : handles_) { if (h) { db_->DestroyColumnFamilyHandle(h); } } handles_.clear(); names_.clear(); delete db_; db_ = nullptr; } Status TryOpen(std::vector cf, std::vector options = {}) { std::vector column_families; names_.clear(); for (size_t i = 0; i < cf.size(); ++i) { column_families.push_back(ColumnFamilyDescriptor( cf[i], options.size() == 0 ? column_family_options_ : options[i])); names_.push_back(cf[i]); } return DB::Open(db_options_, dbname_, column_families, &handles_, &db_); } Status OpenReadOnly(std::vector cf, std::vector options = {}) { std::vector column_families; names_.clear(); for (size_t i = 0; i < cf.size(); ++i) { column_families.push_back(ColumnFamilyDescriptor( cf[i], options.size() == 0 ? column_family_options_ : options[i])); names_.push_back(cf[i]); } return DB::OpenForReadOnly(db_options_, dbname_, column_families, &handles_, &db_); } #ifndef ROCKSDB_LITE // ReadOnlyDB is not supported void AssertOpenReadOnly(std::vector cf, std::vector options = {}) { ASSERT_OK(OpenReadOnly(cf, options)); } #endif // !ROCKSDB_LITE void Open(std::vector cf, std::vector options = {}) { ASSERT_OK(TryOpen(cf, options)); } void Open() { Open({"default"}); } DBImpl* dbfull() { return reinterpret_cast(db_); } int GetProperty(int cf, std::string property) { std::string value; EXPECT_TRUE(dbfull()->GetProperty(handles_[cf], property, &value)); #ifndef CYGWIN return std::stoi(value); #else return std::strtol(value.c_str(), 0 /* off */, 10 /* base */); #endif } bool IsDbWriteStopped() { #ifndef ROCKSDB_LITE uint64_t v; EXPECT_TRUE(dbfull()->GetIntProperty("rocksdb.is-write-stopped", &v)); return (v == 1); #else return dbfull()->TEST_write_controler().IsStopped(); #endif // !ROCKSDB_LITE } uint64_t GetDbDelayedWriteRate() { #ifndef ROCKSDB_LITE uint64_t v; EXPECT_TRUE( dbfull()->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)); return v; #else if (!dbfull()->TEST_write_controler().NeedsDelay()) { return 0; } return dbfull()->TEST_write_controler().delayed_write_rate(); #endif // !ROCKSDB_LITE } void Destroy(const std::vector& column_families = std::vector()) { Close(); ASSERT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_), column_families)); } void CreateColumnFamilies( const std::vector& cfs, const std::vector options = {}) { int cfi = static_cast(handles_.size()); handles_.resize(cfi + cfs.size()); names_.resize(cfi + cfs.size()); for (size_t i = 0; i < cfs.size(); ++i) { const auto& current_cf_opt = options.size() == 0 ? column_family_options_ : options[i]; ASSERT_OK( db_->CreateColumnFamily(current_cf_opt, cfs[i], &handles_[cfi])); names_[cfi] = cfs[i]; #ifndef ROCKSDB_LITE // RocksDBLite does not support GetDescriptor // Verify the CF options of the returned CF handle. ColumnFamilyDescriptor desc; ASSERT_OK(handles_[cfi]->GetDescriptor(&desc)); RocksDBOptionsParser::VerifyCFOptions(ConfigOptions(), desc.options, current_cf_opt); #endif // !ROCKSDB_LITE cfi++; } } void Reopen(const std::vector options = {}) { std::vector names; for (auto name : names_) { if (name != "") { names.push_back(name); } } Close(); assert(options.size() == 0 || names.size() == options.size()); Open(names, options); } void CreateColumnFamiliesAndReopen(const std::vector& cfs) { CreateColumnFamilies(cfs); Reopen(); } void DropColumnFamilies(const std::vector& cfs) { for (auto cf : cfs) { ASSERT_OK(db_->DropColumnFamily(handles_[cf])); db_->DestroyColumnFamilyHandle(handles_[cf]); handles_[cf] = nullptr; names_[cf] = ""; } } void PutRandomData(int cf, int num, int key_value_size, bool save = false) { if (cf >= static_cast(keys_.size())) { keys_.resize(cf + 1); } for (int i = 0; i < num; ++i) { // 10 bytes for key, rest is value if (!save) { ASSERT_OK(Put(cf, test::RandomKey(&rnd_, 11), RandomString(&rnd_, key_value_size - 10))); } else { std::string key = test::RandomKey(&rnd_, 11); keys_[cf].insert(key); ASSERT_OK(Put(cf, key, RandomString(&rnd_, key_value_size - 10))); } } db_->FlushWAL(false); } #ifndef ROCKSDB_LITE // TEST functions in DB are not supported in lite void WaitForFlush(int cf) { ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); } void WaitForCompaction() { ASSERT_OK(dbfull()->TEST_WaitForCompact()); } uint64_t MaxTotalInMemoryState() { return dbfull()->TEST_MaxTotalInMemoryState(); } void AssertMaxTotalInMemoryState(uint64_t value) { ASSERT_EQ(value, MaxTotalInMemoryState()); } #endif // !ROCKSDB_LITE Status Put(int cf, const std::string& key, const std::string& value) { return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(value)); } Status Merge(int cf, const std::string& key, const std::string& value) { return db_->Merge(WriteOptions(), handles_[cf], Slice(key), Slice(value)); } Status Flush(int cf) { return db_->Flush(FlushOptions(), handles_[cf]); } std::string Get(int cf, const std::string& key) { ReadOptions options; options.verify_checksums = true; std::string result; Status s = db_->Get(options, handles_[cf], Slice(key), &result); if (s.IsNotFound()) { result = "NOT_FOUND"; } else if (!s.ok()) { result = s.ToString(); } return result; } void CompactAll(int cf) { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[cf], nullptr, nullptr)); } void Compact(int cf, const Slice& start, const Slice& limit) { ASSERT_OK( db_->CompactRange(CompactRangeOptions(), handles_[cf], &start, &limit)); } int NumTableFilesAtLevel(int level, int cf) { return GetProperty(cf, "rocksdb.num-files-at-level" + ToString(level)); } #ifndef ROCKSDB_LITE // Return spread of files per level std::string FilesPerLevel(int cf) { std::string result; int last_non_zero_offset = 0; for (int level = 0; level < dbfull()->NumberLevels(handles_[cf]); level++) { int f = NumTableFilesAtLevel(level, cf); char buf[100]; snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); result += buf; if (f > 0) { last_non_zero_offset = static_cast(result.size()); } } result.resize(last_non_zero_offset); return result; } #endif void AssertFilesPerLevel(const std::string& value, int cf) { #ifndef ROCKSDB_LITE ASSERT_EQ(value, FilesPerLevel(cf)); #else (void) value; (void) cf; #endif } #ifndef ROCKSDB_LITE // GetLiveFilesMetaData is not supported int CountLiveFiles() { std::vector metadata; db_->GetLiveFilesMetaData(&metadata); return static_cast(metadata.size()); } #endif // !ROCKSDB_LITE void AssertCountLiveFiles(int expected_value) { #ifndef ROCKSDB_LITE ASSERT_EQ(expected_value, CountLiveFiles()); #else (void) expected_value; #endif } // Do n memtable flushes, each of which produces an sstable // covering the range [small,large]. void MakeTables(int cf, int n, const std::string& small, const std::string& large) { for (int i = 0; i < n; i++) { ASSERT_OK(Put(cf, small, "begin")); ASSERT_OK(Put(cf, large, "end")); ASSERT_OK(db_->Flush(FlushOptions(), handles_[cf])); } } #ifndef ROCKSDB_LITE // GetSortedWalFiles is not supported int CountLiveLogFiles() { int micros_wait_for_log_deletion = 20000; env_->SleepForMicroseconds(micros_wait_for_log_deletion); int ret = 0; VectorLogPtr wal_files; Status s; // GetSortedWalFiles is a flakey function -- it gets all the wal_dir // children files and then later checks for their existence. if some of the // log files doesn't exist anymore, it reports an error. it does all of this // without DB mutex held, so if a background process deletes the log file // while the function is being executed, it returns an error. We retry the // function 10 times to avoid the error failing the test for (int retries = 0; retries < 10; ++retries) { wal_files.clear(); s = db_->GetSortedWalFiles(wal_files); if (s.ok()) { break; } } EXPECT_OK(s); for (const auto& wal : wal_files) { if (wal->Type() == kAliveLogFile) { ++ret; } } return ret; return 0; } #endif // !ROCKSDB_LITE void AssertCountLiveLogFiles(int value) { #ifndef ROCKSDB_LITE // GetSortedWalFiles is not supported ASSERT_EQ(value, CountLiveLogFiles()); #else (void) value; #endif // !ROCKSDB_LITE } void AssertNumberOfImmutableMemtables(std::vector num_per_cf) { assert(num_per_cf.size() == handles_.size()); #ifndef ROCKSDB_LITE // GetProperty is not supported in lite for (size_t i = 0; i < num_per_cf.size(); ++i) { ASSERT_EQ(num_per_cf[i], GetProperty(static_cast(i), "rocksdb.num-immutable-mem-table")); } #endif // !ROCKSDB_LITE } void CopyFile(const std::string& source, const std::string& destination, uint64_t size = 0) { const EnvOptions soptions; std::unique_ptr srcfile; ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions)); std::unique_ptr destfile; ASSERT_OK(env_->NewWritableFile(destination, &destfile, soptions)); if (size == 0) { // default argument means copy everything ASSERT_OK(env_->GetFileSize(source, &size)); } char buffer[4096]; Slice slice; while (size > 0) { uint64_t one = std::min(uint64_t(sizeof(buffer)), size); ASSERT_OK(srcfile->Read(one, &slice, buffer)); ASSERT_OK(destfile->Append(slice)); size -= slice.size(); } ASSERT_OK(destfile->Close()); } int GetSstFileCount(std::string path) { std::vector files; DBTestBase::GetSstFiles(env_, path, &files); return static_cast(files.size()); } void RecalculateWriteStallConditions(ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options) { // add lock to avoid race condition between // `RecalculateWriteStallConditions` which writes to CFStats and // background `DBImpl::DumpStats()` threads which read CFStats dbfull()->TEST_LockMutex(); cfd->RecalculateWriteStallConditions(mutable_cf_options); dbfull()-> TEST_UnlockMutex(); } std::vector handles_; std::vector names_; std::vector> keys_; ColumnFamilyOptions column_family_options_; DBOptions db_options_; std::string dbname_; DB* db_ = nullptr; EnvCounter* env_; std::shared_ptr env_guard_; Random rnd_; uint32_t format_; }; class ColumnFamilyTest : public ColumnFamilyTestBase, virtual public ::testing::WithParamInterface { public: ColumnFamilyTest() : ColumnFamilyTestBase(GetParam()) {} }; INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest, testing::Values(test::kLatestFormatVersion)); TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) { for (int iter = 0; iter < 3; ++iter) { Open(); CreateColumnFamilies({"one", "two", "three"}); for (size_t i = 0; i < handles_.size(); ++i) { auto cfh = reinterpret_cast(handles_[i]); ASSERT_EQ(i, cfh->GetID()); } if (iter == 1) { Reopen(); } DropColumnFamilies({3}); Reopen(); if (iter == 2) { // this tests if max_column_family is correctly persisted with // WriteSnapshot() Reopen(); } CreateColumnFamilies({"three2"}); // ID 3 that was used for dropped column family "three" should not be // reused auto cfh3 = reinterpret_cast(handles_[3]); ASSERT_EQ(4U, cfh3->GetID()); Close(); Destroy(); } } #ifndef ROCKSDB_LITE TEST_P(ColumnFamilyTest, CreateCFRaceWithGetAggProperty) { Open(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::WriteOptionsFile:1", "ColumnFamilyTest.CreateCFRaceWithGetAggProperty:1"}, {"ColumnFamilyTest.CreateCFRaceWithGetAggProperty:2", "DBImpl::WriteOptionsFile:2"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread thread( [&] { CreateColumnFamilies({"one"}); }); TEST_SYNC_POINT("ColumnFamilyTest.CreateCFRaceWithGetAggProperty:1"); uint64_t pv; db_->GetAggregatedIntProperty(DB::Properties::kEstimateTableReadersMem, &pv); TEST_SYNC_POINT("ColumnFamilyTest.CreateCFRaceWithGetAggProperty:2"); thread.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } #endif // !ROCKSDB_LITE class FlushEmptyCFTestWithParam : public ColumnFamilyTestBase, virtual public testing::WithParamInterface> { public: FlushEmptyCFTestWithParam() : ColumnFamilyTestBase(std::get<0>(GetParam())), allow_2pc_(std::get<1>(GetParam())) {} // Required if inheriting from testing::WithParamInterface<> static void SetUpTestCase() {} static void TearDownTestCase() {} bool allow_2pc_; }; TEST_P(FlushEmptyCFTestWithParam, FlushEmptyCFTest) { std::unique_ptr fault_env( new FaultInjectionTestEnv(env_)); db_options_.env = fault_env.get(); db_options_.allow_2pc = allow_2pc_; Open(); CreateColumnFamilies({"one", "two"}); // Generate log file A. ASSERT_OK(Put(1, "foo", "v1")); // seqID 1 Reopen(); // Log file A is not dropped after reopening because default column family's // min log number is 0. // It flushes to SST file X ASSERT_OK(Put(1, "foo", "v1")); // seqID 2 ASSERT_OK(Put(1, "bar", "v2")); // seqID 3 // Current log file is file B now. While flushing, a new log file C is created // and is set to current. Boths' min log number is set to file C in memory, so // after flushing file B is deleted. At the same time, the min log number of // default CF is not written to manifest. Log file A still remains. // Flushed to SST file Y. Flush(1); Flush(0); ASSERT_OK(Put(1, "bar", "v3")); // seqID 4 ASSERT_OK(Put(1, "foo", "v4")); // seqID 5 db_->FlushWAL(false); // Preserve file system state up to here to simulate a crash condition. fault_env->SetFilesystemActive(false); std::vector names; for (auto name : names_) { if (name != "") { names.push_back(name); } } Close(); fault_env->ResetState(); // Before opening, there are four files: // Log file A contains seqID 1 // Log file C contains seqID 4, 5 // SST file X contains seqID 1 // SST file Y contains seqID 2, 3 // Min log number: // default CF: 0 // CF one, two: C // When opening the DB, all the seqID should be preserved. Open(names, {}); ASSERT_EQ("v4", Get(1, "foo")); ASSERT_EQ("v3", Get(1, "bar")); Close(); db_options_.env = env_; } TEST_P(FlushEmptyCFTestWithParam, FlushEmptyCFTest2) { std::unique_ptr fault_env( new FaultInjectionTestEnv(env_)); db_options_.env = fault_env.get(); db_options_.allow_2pc = allow_2pc_; Open(); CreateColumnFamilies({"one", "two"}); // Generate log file A. ASSERT_OK(Put(1, "foo", "v1")); // seqID 1 Reopen(); // Log file A is not dropped after reopening because default column family's // min log number is 0. // It flushes to SST file X ASSERT_OK(Put(1, "foo", "v1")); // seqID 2 ASSERT_OK(Put(1, "bar", "v2")); // seqID 3 // Current log file is file B now. While flushing, a new log file C is created // and is set to current. Both CFs' min log number is set to file C so after // flushing file B is deleted. Log file A still remains. // Flushed to SST file Y. Flush(1); ASSERT_OK(Put(0, "bar", "v2")); // seqID 4 ASSERT_OK(Put(2, "bar", "v2")); // seqID 5 ASSERT_OK(Put(1, "bar", "v3")); // seqID 6 // Flushing all column families. This forces all CFs' min log to current. This // is written to the manifest file. Log file C is cleared. Flush(0); Flush(1); Flush(2); // Write to log file D ASSERT_OK(Put(1, "bar", "v4")); // seqID 7 ASSERT_OK(Put(1, "bar", "v5")); // seqID 8 db_->FlushWAL(false); // Preserve file system state up to here to simulate a crash condition. fault_env->SetFilesystemActive(false); std::vector names; for (auto name : names_) { if (name != "") { names.push_back(name); } } Close(); fault_env->ResetState(); // Before opening, there are two logfiles: // Log file A contains seqID 1 // Log file D contains seqID 7, 8 // Min log number: // default CF: D // CF one, two: D // When opening the DB, log file D should be replayed using the seqID // specified in the file. Open(names, {}); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v5", Get(1, "bar")); Close(); db_options_.env = env_; } INSTANTIATE_TEST_CASE_P( FormatDef, FlushEmptyCFTestWithParam, testing::Values(std::make_tuple(test::kDefaultFormatVersion, true), std::make_tuple(test::kDefaultFormatVersion, false))); INSTANTIATE_TEST_CASE_P( FormatLatest, FlushEmptyCFTestWithParam, testing::Values(std::make_tuple(test::kLatestFormatVersion, true), std::make_tuple(test::kLatestFormatVersion, false))); TEST_P(ColumnFamilyTest, AddDrop) { Open(); CreateColumnFamilies({"one", "two", "three"}); ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); ASSERT_EQ("NOT_FOUND", Get(2, "fodor")); DropColumnFamilies({2}); ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); CreateColumnFamilies({"four"}); ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_EQ("mirko", Get(1, "fodor")); ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); Close(); ASSERT_TRUE(TryOpen({"default"}).IsInvalidArgument()); Open({"default", "one", "three", "four"}); DropColumnFamilies({1}); Reopen(); Close(); std::vector families; ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families)); std::sort(families.begin(), families.end()); ASSERT_TRUE(families == std::vector({"default", "four", "three"})); } TEST_P(ColumnFamilyTest, BulkAddDrop) { constexpr int kNumCF = 1000; ColumnFamilyOptions cf_options; WriteOptions write_options; Open(); std::vector cf_names; std::vector cf_handles; for (int i = 1; i <= kNumCF; i++) { cf_names.push_back("cf1-" + ToString(i)); } ASSERT_OK(db_->CreateColumnFamilies(cf_options, cf_names, &cf_handles)); for (int i = 1; i <= kNumCF; i++) { ASSERT_OK(db_->Put(write_options, cf_handles[i - 1], "foo", "bar")); } ASSERT_OK(db_->DropColumnFamilies(cf_handles)); std::vector cf_descriptors; for (auto* handle : cf_handles) { delete handle; } cf_handles.clear(); for (int i = 1; i <= kNumCF; i++) { cf_descriptors.emplace_back("cf2-" + ToString(i), ColumnFamilyOptions()); } ASSERT_OK(db_->CreateColumnFamilies(cf_descriptors, &cf_handles)); for (int i = 1; i <= kNumCF; i++) { ASSERT_OK(db_->Put(write_options, cf_handles[i - 1], "foo", "bar")); } ASSERT_OK(db_->DropColumnFamilies(cf_handles)); for (auto* handle : cf_handles) { delete handle; } Close(); std::vector families; ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families)); std::sort(families.begin(), families.end()); ASSERT_TRUE(families == std::vector({"default"})); } TEST_P(ColumnFamilyTest, DropTest) { // first iteration - don't reopen DB before dropping // second iteration - reopen DB before dropping for (int iter = 0; iter < 2; ++iter) { Open({"default"}); CreateColumnFamiliesAndReopen({"pikachu"}); for (int i = 0; i < 100; ++i) { ASSERT_OK(Put(1, ToString(i), "bar" + ToString(i))); } ASSERT_OK(Flush(1)); if (iter == 1) { Reopen(); } ASSERT_EQ("bar1", Get(1, "1")); AssertCountLiveFiles(1); DropColumnFamilies({1}); // make sure that all files are deleted when we drop the column family AssertCountLiveFiles(0); Destroy(); } } TEST_P(ColumnFamilyTest, WriteBatchFailure) { Open(); CreateColumnFamiliesAndReopen({"one", "two"}); WriteBatch batch; batch.Put(handles_[0], Slice("existing"), Slice("column-family")); batch.Put(handles_[1], Slice("non-existing"), Slice("column-family")); ASSERT_OK(db_->Write(WriteOptions(), &batch)); DropColumnFamilies({1}); WriteOptions woptions_ignore_missing_cf; woptions_ignore_missing_cf.ignore_missing_column_families = true; batch.Put(handles_[0], Slice("still here"), Slice("column-family")); ASSERT_OK(db_->Write(woptions_ignore_missing_cf, &batch)); ASSERT_EQ("column-family", Get(0, "still here")); Status s = db_->Write(WriteOptions(), &batch); ASSERT_TRUE(s.IsInvalidArgument()); Close(); } TEST_P(ColumnFamilyTest, ReadWrite) { Open(); CreateColumnFamiliesAndReopen({"one", "two"}); ASSERT_OK(Put(0, "foo", "v1")); ASSERT_OK(Put(0, "bar", "v2")); ASSERT_OK(Put(1, "mirko", "v3")); ASSERT_OK(Put(0, "foo", "v2")); ASSERT_OK(Put(2, "fodor", "v5")); for (int iter = 0; iter <= 3; ++iter) { ASSERT_EQ("v2", Get(0, "foo")); ASSERT_EQ("v2", Get(0, "bar")); ASSERT_EQ("v3", Get(1, "mirko")); ASSERT_EQ("v5", Get(2, "fodor")); ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); ASSERT_EQ("NOT_FOUND", Get(2, "foo")); if (iter <= 1) { Reopen(); } } Close(); } TEST_P(ColumnFamilyTest, IgnoreRecoveredLog) { std::string backup_logs = dbname_ + "/backup_logs"; // delete old files in backup_logs directory ASSERT_OK(env_->CreateDirIfMissing(dbname_)); ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); std::vector old_files; env_->GetChildren(backup_logs, &old_files); for (auto& file : old_files) { if (file != "." && file != "..") { env_->DeleteFile(backup_logs + "/" + file); } } column_family_options_.merge_operator = MergeOperators::CreateUInt64AddOperator(); db_options_.wal_dir = dbname_ + "/logs"; Destroy(); Open(); CreateColumnFamilies({"cf1", "cf2"}); // fill up the DB std::string one, two, three; PutFixed64(&one, 1); PutFixed64(&two, 2); PutFixed64(&three, 3); ASSERT_OK(Merge(0, "foo", one)); ASSERT_OK(Merge(1, "mirko", one)); ASSERT_OK(Merge(0, "foo", one)); ASSERT_OK(Merge(2, "bla", one)); ASSERT_OK(Merge(2, "fodor", one)); ASSERT_OK(Merge(0, "bar", one)); ASSERT_OK(Merge(2, "bla", one)); ASSERT_OK(Merge(1, "mirko", two)); ASSERT_OK(Merge(1, "franjo", one)); // copy the logs to backup std::vector logs; env_->GetChildren(db_options_.wal_dir, &logs); for (auto& log : logs) { if (log != ".." && log != ".") { CopyFile(db_options_.wal_dir + "/" + log, backup_logs + "/" + log); } } // recover the DB Close(); // 1. check consistency // 2. copy the logs from backup back to WAL dir. if the recovery happens // again on the same log files, this should lead to incorrect results // due to applying merge operator twice // 3. check consistency for (int iter = 0; iter < 2; ++iter) { // assert consistency Open({"default", "cf1", "cf2"}); ASSERT_EQ(two, Get(0, "foo")); ASSERT_EQ(one, Get(0, "bar")); ASSERT_EQ(three, Get(1, "mirko")); ASSERT_EQ(one, Get(1, "franjo")); ASSERT_EQ(one, Get(2, "fodor")); ASSERT_EQ(two, Get(2, "bla")); Close(); if (iter == 0) { // copy the logs from backup back to wal dir for (auto& log : logs) { if (log != ".." && log != ".") { CopyFile(backup_logs + "/" + log, db_options_.wal_dir + "/" + log); } } } } } #ifndef ROCKSDB_LITE // TEST functions used are not supported TEST_P(ColumnFamilyTest, FlushTest) { Open(); CreateColumnFamiliesAndReopen({"one", "two"}); ASSERT_OK(Put(0, "foo", "v1")); ASSERT_OK(Put(0, "bar", "v2")); ASSERT_OK(Put(1, "mirko", "v3")); ASSERT_OK(Put(0, "foo", "v2")); ASSERT_OK(Put(2, "fodor", "v5")); for (int j = 0; j < 2; j++) { ReadOptions ro; std::vector iterators; // Hold super version. if (j == 0) { ASSERT_OK(db_->NewIterators(ro, handles_, &iterators)); } for (int i = 0; i < 3; ++i) { uint64_t max_total_in_memory_state = MaxTotalInMemoryState(); Flush(i); AssertMaxTotalInMemoryState(max_total_in_memory_state); } ASSERT_OK(Put(1, "foofoo", "bar")); ASSERT_OK(Put(0, "foofoo", "bar")); for (auto* it : iterators) { delete it; } } Reopen(); for (int iter = 0; iter <= 2; ++iter) { ASSERT_EQ("v2", Get(0, "foo")); ASSERT_EQ("v2", Get(0, "bar")); ASSERT_EQ("v3", Get(1, "mirko")); ASSERT_EQ("v5", Get(2, "fodor")); ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); ASSERT_EQ("NOT_FOUND", Get(2, "foo")); if (iter <= 1) { Reopen(); } } Close(); } // Makes sure that obsolete log files get deleted TEST_P(ColumnFamilyTest, LogDeletionTest) { db_options_.max_total_wal_size = std::numeric_limits::max(); column_family_options_.arena_block_size = 4 * 1024; column_family_options_.write_buffer_size = 128000; // 128KB Open(); CreateColumnFamilies({"one", "two", "three", "four"}); // Each bracket is one log file. if number is in (), it means // we don't need it anymore (it's been flushed) // [] AssertCountLiveLogFiles(0); PutRandomData(0, 1, 128); // [0] PutRandomData(1, 1, 128); // [0, 1] PutRandomData(1, 1000, 128); WaitForFlush(1); // [0, (1)] [1] AssertCountLiveLogFiles(2); PutRandomData(0, 1, 128); // [0, (1)] [0, 1] AssertCountLiveLogFiles(2); PutRandomData(2, 1, 128); // [0, (1)] [0, 1, 2] PutRandomData(2, 1000, 128); WaitForFlush(2); // [0, (1)] [0, 1, (2)] [2] AssertCountLiveLogFiles(3); PutRandomData(2, 1000, 128); WaitForFlush(2); // [0, (1)] [0, 1, (2)] [(2)] [2] AssertCountLiveLogFiles(4); PutRandomData(3, 1, 128); // [0, (1)] [0, 1, (2)] [(2)] [2, 3] PutRandomData(1, 1, 128); // [0, (1)] [0, 1, (2)] [(2)] [1, 2, 3] AssertCountLiveLogFiles(4); PutRandomData(1, 1000, 128); WaitForFlush(1); // [0, (1)] [0, (1), (2)] [(2)] [(1), 2, 3] [1] AssertCountLiveLogFiles(5); PutRandomData(0, 1000, 128); WaitForFlush(0); // [(0), (1)] [(0), (1), (2)] [(2)] [(1), 2, 3] [1, (0)] [0] // delete obsolete logs --> // [(1), 2, 3] [1, (0)] [0] AssertCountLiveLogFiles(3); PutRandomData(0, 1000, 128); WaitForFlush(0); // [(1), 2, 3] [1, (0)], [(0)] [0] AssertCountLiveLogFiles(4); PutRandomData(1, 1000, 128); WaitForFlush(1); // [(1), 2, 3] [(1), (0)] [(0)] [0, (1)] [1] AssertCountLiveLogFiles(5); PutRandomData(2, 1000, 128); WaitForFlush(2); // [(1), (2), 3] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2] AssertCountLiveLogFiles(6); PutRandomData(3, 1000, 128); WaitForFlush(3); // [(1), (2), (3)] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2, (3)] [3] // delete obsolete logs --> // [0, (1)] [1, (2)], [2, (3)] [3] AssertCountLiveLogFiles(4); Close(); } #endif // !ROCKSDB_LITE TEST_P(ColumnFamilyTest, CrashAfterFlush) { std::unique_ptr fault_env( new FaultInjectionTestEnv(env_)); db_options_.env = fault_env.get(); Open(); CreateColumnFamilies({"one"}); WriteBatch batch; batch.Put(handles_[0], Slice("foo"), Slice("bar")); batch.Put(handles_[1], Slice("foo"), Slice("bar")); ASSERT_OK(db_->Write(WriteOptions(), &batch)); Flush(0); fault_env->SetFilesystemActive(false); std::vector names; for (auto name : names_) { if (name != "") { names.push_back(name); } } Close(); fault_env->DropUnsyncedFileData(); fault_env->ResetState(); Open(names, {}); // Write batch should be atomic. ASSERT_EQ(Get(0, "foo"), Get(1, "foo")); Close(); db_options_.env = env_; } TEST_P(ColumnFamilyTest, OpenNonexistentColumnFamily) { ASSERT_OK(TryOpen({"default"})); Close(); ASSERT_TRUE(TryOpen({"default", "dne"}).IsInvalidArgument()); } #ifndef ROCKSDB_LITE // WaitForFlush() is not supported // Makes sure that obsolete log files get deleted TEST_P(ColumnFamilyTest, DifferentWriteBufferSizes) { // disable flushing stale column families db_options_.max_total_wal_size = std::numeric_limits::max(); Open(); CreateColumnFamilies({"one", "two", "three"}); ColumnFamilyOptions default_cf, one, two, three; // setup options. all column families have max_write_buffer_number setup to 10 // "default" -> 100KB memtable, start flushing immediatelly // "one" -> 200KB memtable, start flushing with two immutable memtables // "two" -> 1MB memtable, start flushing with three immutable memtables // "three" -> 90KB memtable, start flushing with four immutable memtables default_cf.write_buffer_size = 100000; default_cf.arena_block_size = 4 * 4096; default_cf.max_write_buffer_number = 10; default_cf.min_write_buffer_number_to_merge = 1; default_cf.max_write_buffer_size_to_maintain = 0; one.write_buffer_size = 200000; one.arena_block_size = 4 * 4096; one.max_write_buffer_number = 10; one.min_write_buffer_number_to_merge = 2; one.max_write_buffer_size_to_maintain = static_cast(one.write_buffer_size); two.write_buffer_size = 1000000; two.arena_block_size = 4 * 4096; two.max_write_buffer_number = 10; two.min_write_buffer_number_to_merge = 3; two.max_write_buffer_size_to_maintain = static_cast(two.write_buffer_size); three.write_buffer_size = 4096 * 22; three.arena_block_size = 4096; three.max_write_buffer_number = 10; three.min_write_buffer_number_to_merge = 4; three.max_write_buffer_size_to_maintain = static_cast(three.write_buffer_size); Reopen({default_cf, one, two, three}); int micros_wait_for_flush = 10000; PutRandomData(0, 100, 1000); WaitForFlush(0); AssertNumberOfImmutableMemtables({0, 0, 0, 0}); AssertCountLiveLogFiles(1); PutRandomData(1, 200, 1000); env_->SleepForMicroseconds(micros_wait_for_flush); AssertNumberOfImmutableMemtables({0, 1, 0, 0}); AssertCountLiveLogFiles(2); PutRandomData(2, 1000, 1000); env_->SleepForMicroseconds(micros_wait_for_flush); AssertNumberOfImmutableMemtables({0, 1, 1, 0}); AssertCountLiveLogFiles(3); PutRandomData(2, 1000, 1000); env_->SleepForMicroseconds(micros_wait_for_flush); AssertNumberOfImmutableMemtables({0, 1, 2, 0}); AssertCountLiveLogFiles(4); PutRandomData(3, 93, 990); env_->SleepForMicroseconds(micros_wait_for_flush); AssertNumberOfImmutableMemtables({0, 1, 2, 1}); AssertCountLiveLogFiles(5); PutRandomData(3, 88, 990); env_->SleepForMicroseconds(micros_wait_for_flush); AssertNumberOfImmutableMemtables({0, 1, 2, 2}); AssertCountLiveLogFiles(6); PutRandomData(3, 88, 990); env_->SleepForMicroseconds(micros_wait_for_flush); AssertNumberOfImmutableMemtables({0, 1, 2, 3}); AssertCountLiveLogFiles(7); PutRandomData(0, 100, 1000); WaitForFlush(0); AssertNumberOfImmutableMemtables({0, 1, 2, 3}); AssertCountLiveLogFiles(8); PutRandomData(2, 100, 10000); WaitForFlush(2); AssertNumberOfImmutableMemtables({0, 1, 0, 3}); AssertCountLiveLogFiles(9); PutRandomData(3, 88, 990); WaitForFlush(3); AssertNumberOfImmutableMemtables({0, 1, 0, 0}); AssertCountLiveLogFiles(10); PutRandomData(3, 88, 990); env_->SleepForMicroseconds(micros_wait_for_flush); AssertNumberOfImmutableMemtables({0, 1, 0, 1}); AssertCountLiveLogFiles(11); PutRandomData(1, 200, 1000); WaitForFlush(1); AssertNumberOfImmutableMemtables({0, 0, 0, 1}); AssertCountLiveLogFiles(5); PutRandomData(3, 88 * 3, 990); WaitForFlush(3); PutRandomData(3, 88 * 4, 990); WaitForFlush(3); AssertNumberOfImmutableMemtables({0, 0, 0, 0}); AssertCountLiveLogFiles(12); PutRandomData(0, 100, 1000); WaitForFlush(0); AssertNumberOfImmutableMemtables({0, 0, 0, 0}); AssertCountLiveLogFiles(12); PutRandomData(2, 3 * 1000, 1000); WaitForFlush(2); AssertNumberOfImmutableMemtables({0, 0, 0, 0}); AssertCountLiveLogFiles(12); PutRandomData(1, 2*200, 1000); WaitForFlush(1); AssertNumberOfImmutableMemtables({0, 0, 0, 0}); AssertCountLiveLogFiles(7); Close(); } #endif // !ROCKSDB_LITE // The test is commented out because we want to test that snapshot is // not created for memtables not supported it, but There isn't a memtable // that doesn't support snapshot right now. If we have one later, we can // re-enable the test. // // #ifndef ROCKSDB_LITE // Cuckoo is not supported in lite // TEST_P(ColumnFamilyTest, MemtableNotSupportSnapshot) { // db_options_.allow_concurrent_memtable_write = false; // Open(); // auto* s1 = dbfull()->GetSnapshot(); // ASSERT_TRUE(s1 != nullptr); // dbfull()->ReleaseSnapshot(s1); // // Add a column family that doesn't support snapshot // ColumnFamilyOptions first; // first.memtable_factory.reset(new DummyMemtableNotSupportingSnapshot()); // CreateColumnFamilies({"first"}, {first}); // auto* s2 = dbfull()->GetSnapshot(); // ASSERT_TRUE(s2 == nullptr); // // Add a column family that supports snapshot. Snapshot stays not // supported. ColumnFamilyOptions second; CreateColumnFamilies({"second"}, // {second}); auto* s3 = dbfull()->GetSnapshot(); ASSERT_TRUE(s3 == nullptr); // Close(); // } // #endif // !ROCKSDB_LITE class TestComparator : public Comparator { int Compare(const ROCKSDB_NAMESPACE::Slice& /*a*/, const ROCKSDB_NAMESPACE::Slice& /*b*/) const override { return 0; } const char* Name() const override { return "Test"; } void FindShortestSeparator( std::string* /*start*/, const ROCKSDB_NAMESPACE::Slice& /*limit*/) const override {} void FindShortSuccessor(std::string* /*key*/) const override {} }; static TestComparator third_comparator; static TestComparator fourth_comparator; // Test that we can retrieve the comparator from a created CF TEST_P(ColumnFamilyTest, GetComparator) { Open(); // Add a column family with no comparator specified CreateColumnFamilies({"first"}); const Comparator* comp = handles_[0]->GetComparator(); ASSERT_EQ(comp, BytewiseComparator()); // Add three column families - one with no comparator and two // with comparators specified ColumnFamilyOptions second, third, fourth; second.comparator = &third_comparator; third.comparator = &fourth_comparator; CreateColumnFamilies({"second", "third", "fourth"}, {second, third, fourth}); ASSERT_EQ(handles_[1]->GetComparator(), BytewiseComparator()); ASSERT_EQ(handles_[2]->GetComparator(), &third_comparator); ASSERT_EQ(handles_[3]->GetComparator(), &fourth_comparator); Close(); } TEST_P(ColumnFamilyTest, DifferentMergeOperators) { Open(); CreateColumnFamilies({"first", "second"}); ColumnFamilyOptions default_cf, first, second; first.merge_operator = MergeOperators::CreateUInt64AddOperator(); second.merge_operator = MergeOperators::CreateStringAppendOperator(); Reopen({default_cf, first, second}); std::string one, two, three; PutFixed64(&one, 1); PutFixed64(&two, 2); PutFixed64(&three, 3); ASSERT_OK(Put(0, "foo", two)); ASSERT_OK(Put(0, "foo", one)); ASSERT_TRUE(Merge(0, "foo", two).IsNotSupported()); ASSERT_EQ(Get(0, "foo"), one); ASSERT_OK(Put(1, "foo", two)); ASSERT_OK(Put(1, "foo", one)); ASSERT_OK(Merge(1, "foo", two)); ASSERT_EQ(Get(1, "foo"), three); ASSERT_OK(Put(2, "foo", two)); ASSERT_OK(Put(2, "foo", one)); ASSERT_OK(Merge(2, "foo", two)); ASSERT_EQ(Get(2, "foo"), one + "," + two); Close(); } #ifndef ROCKSDB_LITE // WaitForFlush() is not supported TEST_P(ColumnFamilyTest, DifferentCompactionStyles) { Open(); CreateColumnFamilies({"one", "two"}); ColumnFamilyOptions default_cf, one, two; db_options_.max_open_files = 20; // only 10 files in file cache default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = static_cast(1) << 60; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleUniversal; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; two.write_buffer_size = 100000; Reopen({default_cf, one, two}); // SETUP column family "one" -- universal style for (int i = 0; i < one.level0_file_num_compaction_trigger - 1; ++i) { PutRandomData(1, 10, 12000); PutRandomData(1, 1, 10); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } // SETUP column family "two" -- level style with 4 levels for (int i = 0; i < two.level0_file_num_compaction_trigger - 1; ++i) { PutRandomData(2, 10, 12000); PutRandomData(2, 1, 10); WaitForFlush(2); AssertFilesPerLevel(ToString(i + 1), 2); } // TRIGGER compaction "one" PutRandomData(1, 10, 12000); PutRandomData(1, 1, 10); // TRIGGER compaction "two" PutRandomData(2, 10, 12000); PutRandomData(2, 1, 10); // WAIT for compactions WaitForCompaction(); // VERIFY compaction "one" AssertFilesPerLevel("1", 1); // VERIFY compaction "two" AssertFilesPerLevel("0,1", 2); CompactAll(2); AssertFilesPerLevel("0,1", 2); Close(); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // Sync points not supported in RocksDB Lite TEST_P(ColumnFamilyTest, MultipleManualCompactions) { Open(); CreateColumnFamilies({"one", "two"}); ColumnFamilyOptions default_cf, one, two; db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleUniversal; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; two.write_buffer_size = 100000; Reopen({default_cf, one, two}); // SETUP column family "one" -- universal style for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } bool cf_1_1 = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ColumnFamilyTest::MultiManual:4", "ColumnFamilyTest::MultiManual:1"}, {"ColumnFamilyTest::MultiManual:2", "ColumnFamilyTest::MultiManual:5"}, {"ColumnFamilyTest::MultiManual:2", "ColumnFamilyTest::MultiManual:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (cf_1_1) { TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:4"); cf_1_1 = false; TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:3"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::vector threads; threads.emplace_back([&] { CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK( db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); }); // SETUP column family "two" -- level style with 4 levels for (int i = 0; i < two.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(2, 10, 12000); PutRandomData(2, 1, 10); WaitForFlush(2); AssertFilesPerLevel(ToString(i + 1), 2); } threads.emplace_back([&] { TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:1"); CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK( db_->CompactRange(compact_options, handles_[2], nullptr, nullptr)); TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:2"); }); TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:5"); for (auto& t : threads) { t.join(); } // VERIFY compaction "one" AssertFilesPerLevel("1", 1); // VERIFY compaction "two" AssertFilesPerLevel("0,1", 2); CompactAll(2); AssertFilesPerLevel("0,1", 2); // Compare against saved keys std::set::iterator key_iter = keys_[1].begin(); while (key_iter != keys_[1].end()) { ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); key_iter++; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); Close(); } TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) { Open(); CreateColumnFamilies({"one", "two"}); ColumnFamilyOptions default_cf, one, two; db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); ; table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleUniversal; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; two.write_buffer_size = 100000; Reopen({default_cf, one, two}); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); bool cf_1_1 = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:1"}, {"ColumnFamilyTest::AutoManual:2", "ColumnFamilyTest::AutoManual:5"}, {"ColumnFamilyTest::AutoManual:2", "ColumnFamilyTest::AutoManual:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (cf_1_1) { cf_1_1 = false; TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:4"); TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:3"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // SETUP column family "one" -- universal style for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1"); // SETUP column family "two" -- level style with 4 levels for (int i = 0; i < two.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(2, 10, 12000); PutRandomData(2, 1, 10); WaitForFlush(2); AssertFilesPerLevel(ToString(i + 1), 2); } ROCKSDB_NAMESPACE::port::Thread threads([&] { CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK( db_->CompactRange(compact_options, handles_[2], nullptr, nullptr)); TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:2"); }); TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5"); threads.join(); // WAIT for compactions WaitForCompaction(); // VERIFY compaction "one" AssertFilesPerLevel("1", 1); // VERIFY compaction "two" AssertFilesPerLevel("0,1", 2); CompactAll(2); AssertFilesPerLevel("0,1", 2); // Compare against saved keys std::set::iterator key_iter = keys_[1].begin(); while (key_iter != keys_[1].end()) { ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); key_iter++; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) { Open(); CreateColumnFamilies({"one", "two"}); ColumnFamilyOptions default_cf, one, two; db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); ; table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleUniversal; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; two.compaction_style = kCompactionStyleLevel; two.num_levels = 4; two.level0_file_num_compaction_trigger = 3; two.write_buffer_size = 100000; Reopen({default_cf, one, two}); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); // SETUP column family "one" -- universal style for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } bool cf_1_1 = true; bool cf_1_2 = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:1"}, {"ColumnFamilyTest::ManualAuto:5", "ColumnFamilyTest::ManualAuto:2"}, {"ColumnFamilyTest::ManualAuto:2", "ColumnFamilyTest::ManualAuto:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (cf_1_1) { TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4"); cf_1_1 = false; TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3"); } else if (cf_1_2) { TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2"); cf_1_2 = false; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread threads([&] { CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK( db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); }); TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1"); // SETUP column family "two" -- level style with 4 levels for (int i = 0; i < two.level0_file_num_compaction_trigger; ++i) { PutRandomData(2, 10, 12000); PutRandomData(2, 1, 10); WaitForFlush(2); AssertFilesPerLevel(ToString(i + 1), 2); } TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5"); threads.join(); // WAIT for compactions WaitForCompaction(); // VERIFY compaction "one" AssertFilesPerLevel("1", 1); // VERIFY compaction "two" AssertFilesPerLevel("0,1", 2); CompactAll(2); AssertFilesPerLevel("0,1", 2); // Compare against saved keys std::set::iterator key_iter = keys_[1].begin(); while (key_iter != keys_[1].end()) { ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); key_iter++; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(ColumnFamilyTest, SameCFManualManualCompactions) { Open(); CreateColumnFamilies({"one"}); ColumnFamilyOptions default_cf, one; db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); ; table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleUniversal; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; Reopen({default_cf, one}); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); // SETUP column family "one" -- universal style for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } bool cf_1_1 = true; bool cf_1_2 = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ColumnFamilyTest::ManualManual:4", "ColumnFamilyTest::ManualManual:2"}, {"ColumnFamilyTest::ManualManual:4", "ColumnFamilyTest::ManualManual:5"}, {"ColumnFamilyTest::ManualManual:1", "ColumnFamilyTest::ManualManual:2"}, {"ColumnFamilyTest::ManualManual:1", "ColumnFamilyTest::ManualManual:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (cf_1_1) { TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:4"); cf_1_1 = false; TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:3"); } else if (cf_1_2) { TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:2"); cf_1_2 = false; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread threads([&] { CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = true; ASSERT_OK( db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); }); TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:5"); WaitForFlush(1); // Add more L0 files and force another manual compaction for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i), 1); } ROCKSDB_NAMESPACE::port::Thread threads1([&] { CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK( db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); }); TEST_SYNC_POINT("ColumnFamilyTest::ManualManual:1"); threads.join(); threads1.join(); WaitForCompaction(); // VERIFY compaction "one" ASSERT_LE(NumTableFilesAtLevel(0, 1), 2); // Compare against saved keys std::set::iterator key_iter = keys_[1].begin(); while (key_iter != keys_[1].end()) { ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); key_iter++; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactions) { Open(); CreateColumnFamilies({"one"}); ColumnFamilyOptions default_cf, one; db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); ; table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleUniversal; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; Reopen({default_cf, one}); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); // SETUP column family "one" -- universal style for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } bool cf_1_1 = true; bool cf_1_2 = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:2"}, {"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:5"}, {"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:2"}, {"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (cf_1_1) { TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4"); cf_1_1 = false; TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3"); } else if (cf_1_2) { TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2"); cf_1_2 = false; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread threads([&] { CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK( db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); }); TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5"); WaitForFlush(1); // Add more L0 files and force automatic compaction for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i), 1); } TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1"); threads.join(); WaitForCompaction(); // VERIFY compaction "one" ASSERT_LE(NumTableFilesAtLevel(0, 1), 2); // Compare against saved keys std::set::iterator key_iter = keys_[1].begin(); while (key_iter != keys_[1].end()) { ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); key_iter++; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactionsLevel) { Open(); CreateColumnFamilies({"one"}); ColumnFamilyOptions default_cf, one; db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); ; table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleLevel; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 3; one.write_buffer_size = 120000; Reopen({default_cf, one}); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); // SETUP column family "one" -- level style for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } bool cf_1_1 = true; bool cf_1_2 = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:2"}, {"ColumnFamilyTest::ManualAuto:4", "ColumnFamilyTest::ManualAuto:5"}, {"ColumnFamilyTest::ManualAuto:3", "ColumnFamilyTest::ManualAuto:2"}, {"LevelCompactionPicker::PickCompactionBySize:0", "ColumnFamilyTest::ManualAuto:3"}, {"ColumnFamilyTest::ManualAuto:1", "ColumnFamilyTest::ManualAuto:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (cf_1_1) { TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:4"); cf_1_1 = false; TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:3"); } else if (cf_1_2) { TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:2"); cf_1_2 = false; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread threads([&] { CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK( db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); }); TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5"); // Add more L0 files and force automatic compaction for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i), 1); } TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1"); threads.join(); WaitForCompaction(); // VERIFY compaction "one" AssertFilesPerLevel("0,1", 1); // Compare against saved keys std::set::iterator key_iter = keys_[1].begin(); while (key_iter != keys_[1].end()) { ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); key_iter++; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } // In this test, we generate enough files to trigger automatic compactions. // The automatic compaction waits in NonTrivial:AfterRun // We generate more files and then trigger an automatic compaction // This will wait because the automatic compaction has files it needs. // Once the conflict is hit, the automatic compaction starts and ends // Then the manual will run and end. TEST_P(ColumnFamilyTest, SameCFAutomaticManualCompactions) { Open(); CreateColumnFamilies({"one"}); ColumnFamilyOptions default_cf, one; db_options_.max_open_files = 20; // only 10 files in file cache db_options_.max_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; default_cf.write_buffer_size = 64 << 10; // 64KB default_cf.target_file_size_base = 30 << 10; default_cf.max_compaction_bytes = default_cf.target_file_size_base * 1100; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); ; table_options.no_block_cache = true; default_cf.table_factory.reset(NewBlockBasedTableFactory(table_options)); one.compaction_style = kCompactionStyleUniversal; one.num_levels = 1; // trigger compaction if there are >= 4 files one.level0_file_num_compaction_trigger = 4; one.write_buffer_size = 120000; Reopen({default_cf, one}); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); bool cf_1_1 = true; bool cf_1_2 = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:2"}, {"ColumnFamilyTest::AutoManual:4", "ColumnFamilyTest::AutoManual:5"}, {"CompactionPicker::CompactRange:Conflict", "ColumnFamilyTest::AutoManual:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (cf_1_1) { TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:4"); cf_1_1 = false; TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:3"); } else if (cf_1_2) { TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:2"); cf_1_2 = false; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // SETUP column family "one" -- universal style for (int i = 0; i < one.level0_file_num_compaction_trigger; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); AssertFilesPerLevel(ToString(i + 1), 1); } TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5"); // Add another L0 file and force automatic compaction for (int i = 0; i < one.level0_file_num_compaction_trigger - 2; ++i) { PutRandomData(1, 10, 12000, true); PutRandomData(1, 1, 10, true); WaitForFlush(1); } CompactRangeOptions compact_options; compact_options.exclusive_manual_compaction = false; ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1"); WaitForCompaction(); // VERIFY compaction "one" AssertFilesPerLevel("1", 1); // Compare against saved keys std::set::iterator key_iter = keys_[1].begin(); while (key_iter != keys_[1].end()) { ASSERT_NE("NOT_FOUND", Get(1, *key_iter)); key_iter++; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // Tailing iterator not supported namespace { std::string IterStatus(Iterator* iter) { std::string result; if (iter->Valid()) { result = iter->key().ToString() + "->" + iter->value().ToString(); } else { result = "(invalid)"; } return result; } } // anonymous namespace TEST_P(ColumnFamilyTest, NewIteratorsTest) { // iter == 0 -- no tailing // iter == 2 -- tailing for (int iter = 0; iter < 2; ++iter) { Open(); CreateColumnFamiliesAndReopen({"one", "two"}); ASSERT_OK(Put(0, "a", "b")); ASSERT_OK(Put(1, "b", "a")); ASSERT_OK(Put(2, "c", "m")); ASSERT_OK(Put(2, "v", "t")); std::vector iterators; ReadOptions options; options.tailing = (iter == 1); ASSERT_OK(db_->NewIterators(options, handles_, &iterators)); for (auto it : iterators) { it->SeekToFirst(); } ASSERT_EQ(IterStatus(iterators[0]), "a->b"); ASSERT_EQ(IterStatus(iterators[1]), "b->a"); ASSERT_EQ(IterStatus(iterators[2]), "c->m"); ASSERT_OK(Put(1, "x", "x")); for (auto it : iterators) { it->Next(); } ASSERT_EQ(IterStatus(iterators[0]), "(invalid)"); if (iter == 0) { // no tailing ASSERT_EQ(IterStatus(iterators[1]), "(invalid)"); } else { // tailing ASSERT_EQ(IterStatus(iterators[1]), "x->x"); } ASSERT_EQ(IterStatus(iterators[2]), "v->t"); for (auto it : iterators) { delete it; } Destroy(); } } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // ReadOnlyDB is not supported TEST_P(ColumnFamilyTest, ReadOnlyDBTest) { Open(); CreateColumnFamiliesAndReopen({"one", "two", "three", "four"}); ASSERT_OK(Put(0, "a", "b")); ASSERT_OK(Put(1, "foo", "bla")); ASSERT_OK(Put(2, "foo", "blabla")); ASSERT_OK(Put(3, "foo", "blablabla")); ASSERT_OK(Put(4, "foo", "blablablabla")); DropColumnFamilies({2}); Close(); // open only a subset of column families AssertOpenReadOnly({"default", "one", "four"}); ASSERT_EQ("NOT_FOUND", Get(0, "foo")); ASSERT_EQ("bla", Get(1, "foo")); ASSERT_EQ("blablablabla", Get(2, "foo")); // test newiterators { std::vector iterators; ASSERT_OK(db_->NewIterators(ReadOptions(), handles_, &iterators)); for (auto it : iterators) { it->SeekToFirst(); } ASSERT_EQ(IterStatus(iterators[0]), "a->b"); ASSERT_EQ(IterStatus(iterators[1]), "foo->bla"); ASSERT_EQ(IterStatus(iterators[2]), "foo->blablablabla"); for (auto it : iterators) { it->Next(); } ASSERT_EQ(IterStatus(iterators[0]), "(invalid)"); ASSERT_EQ(IterStatus(iterators[1]), "(invalid)"); ASSERT_EQ(IterStatus(iterators[2]), "(invalid)"); for (auto it : iterators) { delete it; } } Close(); // can't open dropped column family Status s = OpenReadOnly({"default", "one", "two"}); ASSERT_TRUE(!s.ok()); // Can't open without specifying default column family s = OpenReadOnly({"one", "four"}); ASSERT_TRUE(!s.ok()); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // WaitForFlush() is not supported in lite TEST_P(ColumnFamilyTest, DontRollEmptyLogs) { Open(); CreateColumnFamiliesAndReopen({"one", "two", "three", "four"}); for (size_t i = 0; i < handles_.size(); ++i) { PutRandomData(static_cast(i), 10, 100); } int num_writable_file_start = env_->GetNumberOfNewWritableFileCalls(); // this will trigger the flushes for (int i = 0; i <= 4; ++i) { ASSERT_OK(Flush(i)); } for (int i = 0; i < 4; ++i) { WaitForFlush(i); } int total_new_writable_files = env_->GetNumberOfNewWritableFileCalls() - num_writable_file_start; ASSERT_EQ(static_cast(total_new_writable_files), handles_.size() + 1); Close(); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // WaitForCompaction() is not supported in lite TEST_P(ColumnFamilyTest, FlushStaleColumnFamilies) { Open(); CreateColumnFamilies({"one", "two"}); ColumnFamilyOptions default_cf, one, two; default_cf.write_buffer_size = 100000; // small write buffer size default_cf.arena_block_size = 4096; default_cf.disable_auto_compactions = true; one.disable_auto_compactions = true; two.disable_auto_compactions = true; db_options_.max_total_wal_size = 210000; Reopen({default_cf, one, two}); PutRandomData(2, 1, 10); // 10 bytes for (int i = 0; i < 2; ++i) { PutRandomData(0, 100, 1000); // flush WaitForFlush(0); AssertCountLiveFiles(i + 1); } // third flush. now, CF [two] should be detected as stale and flushed // column family 1 should not be flushed since it's empty PutRandomData(0, 100, 1000); // flush WaitForFlush(0); WaitForFlush(2); // 3 files for default column families, 1 file for column family [two], zero // files for column family [one], because it's empty AssertCountLiveFiles(4); Flush(0); ASSERT_EQ(0, dbfull()->TEST_total_log_size()); Close(); } #endif // !ROCKSDB_LITE TEST_P(ColumnFamilyTest, CreateMissingColumnFamilies) { Status s = TryOpen({"one", "two"}); ASSERT_TRUE(!s.ok()); db_options_.create_missing_column_families = true; s = TryOpen({"default", "one", "two"}); ASSERT_TRUE(s.ok()); Close(); } TEST_P(ColumnFamilyTest, SanitizeOptions) { DBOptions db_options; for (int s = kCompactionStyleLevel; s <= kCompactionStyleUniversal; ++s) { for (int l = 0; l <= 2; l++) { for (int i = 1; i <= 3; i++) { for (int j = 1; j <= 3; j++) { for (int k = 1; k <= 3; k++) { ColumnFamilyOptions original; original.compaction_style = static_cast(s); original.num_levels = l; original.level0_stop_writes_trigger = i; original.level0_slowdown_writes_trigger = j; original.level0_file_num_compaction_trigger = k; original.write_buffer_size = l * 4 * 1024 * 1024 + i * 1024 * 1024 + j * 1024 + k; ColumnFamilyOptions result = SanitizeOptions(ImmutableDBOptions(db_options), original); ASSERT_TRUE(result.level0_stop_writes_trigger >= result.level0_slowdown_writes_trigger); ASSERT_TRUE(result.level0_slowdown_writes_trigger >= result.level0_file_num_compaction_trigger); ASSERT_TRUE(result.level0_file_num_compaction_trigger == original.level0_file_num_compaction_trigger); if (s == kCompactionStyleLevel) { ASSERT_GE(result.num_levels, 2); } else { ASSERT_GE(result.num_levels, 1); if (original.num_levels >= 1) { ASSERT_EQ(result.num_levels, original.num_levels); } } // Make sure Sanitize options sets arena_block_size to 1/8 of // the write_buffer_size, rounded up to a multiple of 4k. size_t expected_arena_block_size = l * 4 * 1024 * 1024 / 8 + i * 1024 * 1024 / 8; if (j + k != 0) { // not a multiple of 4k, round up 4k expected_arena_block_size += 4 * 1024; } ASSERT_EQ(expected_arena_block_size, result.arena_block_size); } } } } } } TEST_P(ColumnFamilyTest, ReadDroppedColumnFamily) { // iter 0 -- drop CF, don't reopen // iter 1 -- delete CF, reopen for (int iter = 0; iter < 2; ++iter) { db_options_.create_missing_column_families = true; db_options_.max_open_files = 20; // delete obsolete files always db_options_.delete_obsolete_files_period_micros = 0; Open({"default", "one", "two"}); ColumnFamilyOptions options; options.level0_file_num_compaction_trigger = 100; options.level0_slowdown_writes_trigger = 200; options.level0_stop_writes_trigger = 200; options.write_buffer_size = 100000; // small write buffer size Reopen({options, options, options}); // 1MB should create ~10 files for each CF int kKeysNum = 10000; PutRandomData(0, kKeysNum, 100); PutRandomData(1, kKeysNum, 100); PutRandomData(2, kKeysNum, 100); { std::unique_ptr iterator( db_->NewIterator(ReadOptions(), handles_[2])); iterator->SeekToFirst(); if (iter == 0) { // Drop CF two ASSERT_OK(db_->DropColumnFamily(handles_[2])); } else { // delete CF two db_->DestroyColumnFamilyHandle(handles_[2]); handles_[2] = nullptr; } // Make sure iterator created can still be used. int count = 0; for (; iterator->Valid(); iterator->Next()) { ASSERT_OK(iterator->status()); ++count; } ASSERT_OK(iterator->status()); ASSERT_EQ(count, kKeysNum); } // Add bunch more data to other CFs PutRandomData(0, kKeysNum, 100); PutRandomData(1, kKeysNum, 100); if (iter == 1) { Reopen(); } // Since we didn't delete CF handle, RocksDB's contract guarantees that // we're still able to read dropped CF for (int i = 0; i < 3; ++i) { std::unique_ptr iterator( db_->NewIterator(ReadOptions(), handles_[i])); int count = 0; for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { ASSERT_OK(iterator->status()); ++count; } ASSERT_OK(iterator->status()); ASSERT_EQ(count, kKeysNum * ((i == 2) ? 1 : 2)); } Close(); Destroy(); } } TEST_P(ColumnFamilyTest, LiveIteratorWithDroppedColumnFamily) { db_options_.create_missing_column_families = true; db_options_.max_open_files = 20; // delete obsolete files always db_options_.delete_obsolete_files_period_micros = 0; Open({"default", "one", "two"}); ColumnFamilyOptions options; options.level0_file_num_compaction_trigger = 100; options.level0_slowdown_writes_trigger = 200; options.level0_stop_writes_trigger = 200; options.write_buffer_size = 100000; // small write buffer size Reopen({options, options, options}); // 1MB should create ~10 files for each CF int kKeysNum = 10000; PutRandomData(1, kKeysNum, 100); { std::unique_ptr iterator( db_->NewIterator(ReadOptions(), handles_[1])); iterator->SeekToFirst(); DropColumnFamilies({1}); // Make sure iterator created can still be used. int count = 0; for (; iterator->Valid(); iterator->Next()) { ASSERT_OK(iterator->status()); ++count; } ASSERT_OK(iterator->status()); ASSERT_EQ(count, kKeysNum); } Reopen(); Close(); Destroy(); } TEST_P(ColumnFamilyTest, FlushAndDropRaceCondition) { db_options_.create_missing_column_families = true; Open({"default", "one"}); ColumnFamilyOptions options; options.level0_file_num_compaction_trigger = 100; options.level0_slowdown_writes_trigger = 200; options.level0_stop_writes_trigger = 200; options.max_write_buffer_number = 20; options.write_buffer_size = 100000; // small write buffer size Reopen({options, options}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"VersionSet::LogAndApply::ColumnFamilyDrop:0", "FlushJob::WriteLevel0Table"}, {"VersionSet::LogAndApply::ColumnFamilyDrop:1", "FlushJob::InstallResults"}, {"FlushJob::InstallResults", "VersionSet::LogAndApply::ColumnFamilyDrop:2"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); test::SleepingBackgroundTask sleeping_task; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); // 1MB should create ~10 files for each CF int kKeysNum = 10000; PutRandomData(1, kKeysNum, 100); std::vector threads; threads.emplace_back([&] { ASSERT_OK(db_->DropColumnFamily(handles_[1])); }); sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); sleeping_task.Reset(); // now we sleep again. this is just so we're certain that flush job finished env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); { // Since we didn't delete CF handle, RocksDB's contract guarantees that // we're still able to read dropped CF std::unique_ptr iterator( db_->NewIterator(ReadOptions(), handles_[1])); int count = 0; for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { ASSERT_OK(iterator->status()); ++count; } ASSERT_OK(iterator->status()); ASSERT_EQ(count, kKeysNum); } for (auto& t : threads) { t.join(); } Close(); Destroy(); } #ifndef ROCKSDB_LITE // skipped as persisting options is not supported in ROCKSDB_LITE namespace { std::atomic test_stage(0); std::atomic ordered_by_writethread(false); const int kMainThreadStartPersistingOptionsFile = 1; const int kChildThreadFinishDroppingColumnFamily = 2; void DropSingleColumnFamily(ColumnFamilyTest* cf_test, int cf_id, std::vector* comparators) { while (test_stage < kMainThreadStartPersistingOptionsFile && !ordered_by_writethread) { Env::Default()->SleepForMicroseconds(100); } cf_test->DropColumnFamilies({cf_id}); if ((*comparators)[cf_id]) { delete (*comparators)[cf_id]; (*comparators)[cf_id] = nullptr; } test_stage = kChildThreadFinishDroppingColumnFamily; } } // namespace TEST_P(ColumnFamilyTest, CreateAndDropRace) { const int kCfCount = 5; std::vector cf_opts; std::vector comparators; for (int i = 0; i < kCfCount; ++i) { cf_opts.emplace_back(); comparators.push_back(new test::SimpleSuffixReverseComparator()); cf_opts.back().comparator = comparators.back(); } db_options_.create_if_missing = true; db_options_.create_missing_column_families = true; auto main_thread_id = std::this_thread::get_id(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "PersistRocksDBOptions:start", [&](void* /*arg*/) { auto current_thread_id = std::this_thread::get_id(); // If it's the main thread hitting this sync-point, then it // will be blocked until some other thread update the test_stage. if (main_thread_id == current_thread_id) { test_stage = kMainThreadStartPersistingOptionsFile; while (test_stage < kChildThreadFinishDroppingColumnFamily && !ordered_by_writethread) { Env::Default()->SleepForMicroseconds(100); } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WriteThread::EnterUnbatched:Wait", [&](void* /*arg*/) { // This means a thread doing DropColumnFamily() is waiting for // other thread to finish persisting options. // In such case, we update the test_stage to unblock the main thread. ordered_by_writethread = true; }); // Create a database with four column families Open({"default", "one", "two", "three"}, {cf_opts[0], cf_opts[1], cf_opts[2], cf_opts[3]}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Start a thread that will drop the first column family // and its comparator ROCKSDB_NAMESPACE::port::Thread drop_cf_thread(DropSingleColumnFamily, this, 1, &comparators); DropColumnFamilies({2}); drop_cf_thread.join(); Close(); Destroy(); for (auto* comparator : comparators) { if (comparator) { delete comparator; } } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } #endif // !ROCKSDB_LITE TEST_P(ColumnFamilyTest, WriteStallSingleColumnFamily) { const uint64_t kBaseRate = 800000u; db_options_.delayed_write_rate = kBaseRate; db_options_.max_background_compactions = 6; Open({"default"}); ColumnFamilyData* cfd = static_cast(db_->DefaultColumnFamily())->cfd(); VersionStorageInfo* vstorage = cfd->current()->storage_info(); MutableCFOptions mutable_cf_options(column_family_options_); mutable_cf_options.level0_slowdown_writes_trigger = 20; mutable_cf_options.level0_stop_writes_trigger = 10000; mutable_cf_options.soft_pending_compaction_bytes_limit = 200; mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; mutable_cf_options.disable_auto_compactions = false; vstorage->TEST_set_estimated_compaction_needed_bytes(50); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); vstorage->TEST_set_estimated_compaction_needed_bytes(201); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(400); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(500); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(450); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(205); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(202); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(201); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(198); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); vstorage->TEST_set_estimated_compaction_needed_bytes(399); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(599); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(2001); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(3001); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); vstorage->TEST_set_estimated_compaction_needed_bytes(390); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(100); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); vstorage->set_l0_delay_trigger_count(100); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(101); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); vstorage->set_l0_delay_trigger_count(0); vstorage->TEST_set_estimated_compaction_needed_bytes(300); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); vstorage->set_l0_delay_trigger_count(101); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25 / 1.25 / 1.25, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(200); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); vstorage->set_l0_delay_trigger_count(0); vstorage->TEST_set_estimated_compaction_needed_bytes(0); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); mutable_cf_options.disable_auto_compactions = true; dbfull()->TEST_write_controler().set_delayed_write_rate(kBaseRate); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); vstorage->set_l0_delay_trigger_count(50); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(0, GetDbDelayedWriteRate()); ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate()); vstorage->set_l0_delay_trigger_count(60); vstorage->TEST_set_estimated_compaction_needed_bytes(300); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(0, GetDbDelayedWriteRate()); ASSERT_EQ(kBaseRate, dbfull()->TEST_write_controler().delayed_write_rate()); mutable_cf_options.disable_auto_compactions = false; vstorage->set_l0_delay_trigger_count(70); vstorage->TEST_set_estimated_compaction_needed_bytes(500); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); vstorage->set_l0_delay_trigger_count(71); vstorage->TEST_set_estimated_compaction_needed_bytes(501); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); } TEST_P(ColumnFamilyTest, CompactionSpeedupSingleColumnFamily) { db_options_.max_background_compactions = 6; Open({"default"}); ColumnFamilyData* cfd = static_cast(db_->DefaultColumnFamily())->cfd(); VersionStorageInfo* vstorage = cfd->current()->storage_info(); MutableCFOptions mutable_cf_options(column_family_options_); // Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8 mutable_cf_options.level0_file_num_compaction_trigger = 4; mutable_cf_options.level0_slowdown_writes_trigger = 36; mutable_cf_options.level0_stop_writes_trigger = 50; // Speedup threshold = 200 / 4 = 50 mutable_cf_options.soft_pending_compaction_bytes_limit = 200; mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; vstorage->TEST_set_estimated_compaction_needed_bytes(40); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(50); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(300); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(45); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(7); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(9); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(6); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); // Speed up threshold = min(4 * 2, 4 + (12 - 4)/4) = 6 mutable_cf_options.level0_file_num_compaction_trigger = 4; mutable_cf_options.level0_slowdown_writes_trigger = 16; mutable_cf_options.level0_stop_writes_trigger = 30; vstorage->set_l0_delay_trigger_count(5); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(7); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(3); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); } TEST_P(ColumnFamilyTest, WriteStallTwoColumnFamilies) { const uint64_t kBaseRate = 810000u; db_options_.delayed_write_rate = kBaseRate; Open(); CreateColumnFamilies({"one"}); ColumnFamilyData* cfd = static_cast(db_->DefaultColumnFamily())->cfd(); VersionStorageInfo* vstorage = cfd->current()->storage_info(); ColumnFamilyData* cfd1 = static_cast(handles_[1])->cfd(); VersionStorageInfo* vstorage1 = cfd1->current()->storage_info(); MutableCFOptions mutable_cf_options(column_family_options_); mutable_cf_options.level0_slowdown_writes_trigger = 20; mutable_cf_options.level0_stop_writes_trigger = 10000; mutable_cf_options.soft_pending_compaction_bytes_limit = 200; mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; MutableCFOptions mutable_cf_options1 = mutable_cf_options; mutable_cf_options1.soft_pending_compaction_bytes_limit = 500; vstorage->TEST_set_estimated_compaction_needed_bytes(50); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); vstorage1->TEST_set_estimated_compaction_needed_bytes(201); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); vstorage1->TEST_set_estimated_compaction_needed_bytes(600); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(70); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate, GetDbDelayedWriteRate()); vstorage1->TEST_set_estimated_compaction_needed_bytes(800); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(300); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); vstorage1->TEST_set_estimated_compaction_needed_bytes(700); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); vstorage->TEST_set_estimated_compaction_needed_bytes(500); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25 / 1.25, GetDbDelayedWriteRate()); vstorage1->TEST_set_estimated_compaction_needed_bytes(600); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_TRUE(!IsDbWriteStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_EQ(kBaseRate / 1.25, GetDbDelayedWriteRate()); } TEST_P(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) { db_options_.max_background_compactions = 6; column_family_options_.soft_pending_compaction_bytes_limit = 200; column_family_options_.hard_pending_compaction_bytes_limit = 2000; Open(); CreateColumnFamilies({"one"}); ColumnFamilyData* cfd = static_cast(db_->DefaultColumnFamily())->cfd(); VersionStorageInfo* vstorage = cfd->current()->storage_info(); ColumnFamilyData* cfd1 = static_cast(handles_[1])->cfd(); VersionStorageInfo* vstorage1 = cfd1->current()->storage_info(); MutableCFOptions mutable_cf_options(column_family_options_); // Speed up threshold = min(4 * 2, 4 + (36 - 4)/4) = 8 mutable_cf_options.level0_file_num_compaction_trigger = 4; mutable_cf_options.level0_slowdown_writes_trigger = 36; mutable_cf_options.level0_stop_writes_trigger = 30; // Speedup threshold = 200 / 4 = 50 mutable_cf_options.soft_pending_compaction_bytes_limit = 200; mutable_cf_options.hard_pending_compaction_bytes_limit = 2000; MutableCFOptions mutable_cf_options1 = mutable_cf_options; mutable_cf_options1.level0_slowdown_writes_trigger = 16; vstorage->TEST_set_estimated_compaction_needed_bytes(40); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(60); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage1->TEST_set_estimated_compaction_needed_bytes(30); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage1->TEST_set_estimated_compaction_needed_bytes(70); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->TEST_set_estimated_compaction_needed_bytes(20); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage1->TEST_set_estimated_compaction_needed_bytes(3); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(9); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage1->set_l0_delay_trigger_count(2); RecalculateWriteStallConditions(cfd1, mutable_cf_options); ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); vstorage->set_l0_delay_trigger_count(0); RecalculateWriteStallConditions(cfd, mutable_cf_options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); } TEST_P(ColumnFamilyTest, CreateAndDestoryOptions) { std::unique_ptr cfo(new ColumnFamilyOptions()); ColumnFamilyHandle* cfh; Open(); ASSERT_OK(db_->CreateColumnFamily(*(cfo.get()), "yoyo", &cfh)); cfo.reset(); ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar")); ASSERT_OK(db_->Flush(FlushOptions(), cfh)); ASSERT_OK(db_->DropColumnFamily(cfh)); ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh)); } TEST_P(ColumnFamilyTest, CreateDropAndDestroy) { ColumnFamilyHandle* cfh; Open(); ASSERT_OK(db_->CreateColumnFamily(ColumnFamilyOptions(), "yoyo", &cfh)); ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar")); ASSERT_OK(db_->Flush(FlushOptions(), cfh)); ASSERT_OK(db_->DropColumnFamily(cfh)); ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh)); } #ifndef ROCKSDB_LITE TEST_P(ColumnFamilyTest, CreateDropAndDestroyWithoutFileDeletion) { ColumnFamilyHandle* cfh; Open(); ASSERT_OK(db_->CreateColumnFamily(ColumnFamilyOptions(), "yoyo", &cfh)); ASSERT_OK(db_->Put(WriteOptions(), cfh, "foo", "bar")); ASSERT_OK(db_->Flush(FlushOptions(), cfh)); ASSERT_OK(db_->DisableFileDeletions()); ASSERT_OK(db_->DropColumnFamily(cfh)); ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh)); } TEST_P(ColumnFamilyTest, FlushCloseWALFiles) { SpecialEnv env(Env::Default()); db_options_.env = &env; db_options_.max_background_flushes = 1; column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2)); Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(0, "fodor", "mirko")); ASSERT_OK(Put(1, "fodor", "mirko")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::BGWorkFlush:done", "FlushCloseWALFiles:0"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Block flush jobs from running test::SleepingBackgroundTask sleeping_task; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); WriteOptions wo; wo.sync = true; ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); ASSERT_EQ(2, env.num_open_wal_file_.load()); sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); TEST_SYNC_POINT("FlushCloseWALFiles:0"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(1, env.num_open_wal_file_.load()); Reopen(); ASSERT_EQ("mirko", Get(0, "fodor")); ASSERT_EQ("mirko", Get(1, "fodor")); db_options_.env = env_; Close(); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // WaitForFlush() is not supported TEST_P(ColumnFamilyTest, IteratorCloseWALFile1) { SpecialEnv env(Env::Default()); db_options_.env = &env; db_options_.max_background_flushes = 1; column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2)); Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); // Create an iterator holding the current super version. Iterator* it = db_->NewIterator(ReadOptions(), handles_[1]); // A flush will make `it` hold the last reference of its super version. Flush(1); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(0, "fodor", "mirko")); ASSERT_OK(Put(1, "fodor", "mirko")); // Flush jobs will close previous WAL files after finishing. By // block flush jobs from running, we trigger a condition where // the iterator destructor should close the WAL files. test::SleepingBackgroundTask sleeping_task; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); WriteOptions wo; wo.sync = true; ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); ASSERT_EQ(2, env.num_open_wal_file_.load()); // Deleting the iterator will clear its super version, triggering // closing all files delete it; ASSERT_EQ(1, env.num_open_wal_file_.load()); sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); WaitForFlush(1); Reopen(); ASSERT_EQ("mirko", Get(0, "fodor")); ASSERT_EQ("mirko", Get(1, "fodor")); db_options_.env = env_; Close(); } TEST_P(ColumnFamilyTest, IteratorCloseWALFile2) { SpecialEnv env(Env::Default()); // Allow both of flush and purge job to schedule. env.SetBackgroundThreads(2, Env::HIGH); db_options_.env = &env; db_options_.max_background_flushes = 1; column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2)); Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); // Create an iterator holding the current super version. ReadOptions ro; ro.background_purge_on_iterator_cleanup = true; Iterator* it = db_->NewIterator(ro, handles_[1]); // A flush will make `it` hold the last reference of its super version. Flush(1); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(0, "fodor", "mirko")); ASSERT_OK(Put(1, "fodor", "mirko")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"ColumnFamilyTest::IteratorCloseWALFile2:0", "DBImpl::BGWorkPurge:start"}, {"ColumnFamilyTest::IteratorCloseWALFile2:2", "DBImpl::BackgroundCallFlush:start"}, {"DBImpl::BGWorkPurge:end", "ColumnFamilyTest::IteratorCloseWALFile2:1"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; wo.sync = true; ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); ASSERT_EQ(2, env.num_open_wal_file_.load()); // Deleting the iterator will clear its super version, triggering // closing all files delete it; ASSERT_EQ(2, env.num_open_wal_file_.load()); TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:0"); TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:1"); ASSERT_EQ(1, env.num_open_wal_file_.load()); TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:2"); WaitForFlush(1); ASSERT_EQ(1, env.num_open_wal_file_.load()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Reopen(); ASSERT_EQ("mirko", Get(0, "fodor")); ASSERT_EQ("mirko", Get(1, "fodor")); db_options_.env = env_; Close(); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // TEST functions are not supported in lite TEST_P(ColumnFamilyTest, ForwardIteratorCloseWALFile) { SpecialEnv env(Env::Default()); // Allow both of flush and purge job to schedule. env.SetBackgroundThreads(2, Env::HIGH); db_options_.env = &env; db_options_.max_background_flushes = 1; column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(3)); column_family_options_.level0_file_num_compaction_trigger = 2; Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(1, "fodar2", "mirko")); Flush(1); // Create an iterator holding the current super version, as well as // the SST file just flushed. ReadOptions ro; ro.tailing = true; ro.background_purge_on_iterator_cleanup = true; Iterator* it = db_->NewIterator(ro, handles_[1]); // A flush will make `it` hold the last reference of its super version. ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(1, "fodar2", "mirko")); Flush(1); WaitForCompaction(); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(0, "fodor", "mirko")); ASSERT_OK(Put(1, "fodor", "mirko")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"ColumnFamilyTest::IteratorCloseWALFile2:0", "DBImpl::BGWorkPurge:start"}, {"ColumnFamilyTest::IteratorCloseWALFile2:2", "DBImpl::BackgroundCallFlush:start"}, {"DBImpl::BGWorkPurge:end", "ColumnFamilyTest::IteratorCloseWALFile2:1"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; wo.sync = true; ASSERT_OK(db_->Put(wo, handles_[1], "fodor", "mirko")); env.delete_count_.store(0); ASSERT_EQ(2, env.num_open_wal_file_.load()); // Deleting the iterator will clear its super version, triggering // closing all files it->Seek(""); ASSERT_EQ(2, env.num_open_wal_file_.load()); ASSERT_EQ(0, env.delete_count_.load()); TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:0"); TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:1"); ASSERT_EQ(1, env.num_open_wal_file_.load()); ASSERT_EQ(1, env.delete_count_.load()); TEST_SYNC_POINT("ColumnFamilyTest::IteratorCloseWALFile2:2"); WaitForFlush(1); ASSERT_EQ(1, env.num_open_wal_file_.load()); ASSERT_EQ(1, env.delete_count_.load()); delete it; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Reopen(); ASSERT_EQ("mirko", Get(0, "fodor")); ASSERT_EQ("mirko", Get(1, "fodor")); db_options_.env = env_; Close(); } #endif // !ROCKSDB_LITE // Disable on windows because SyncWAL requires env->IsSyncThreadSafe() // to return true which is not so in unbuffered mode. #ifndef OS_WIN TEST_P(ColumnFamilyTest, LogSyncConflictFlush) { Open(); CreateColumnFamiliesAndReopen({"one", "two"}); Put(0, "", ""); Put(1, "foo", "bar"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::SyncWAL:BeforeMarkLogsSynced:1", "ColumnFamilyTest::LogSyncConflictFlush:1"}, {"ColumnFamilyTest::LogSyncConflictFlush:2", "DBImpl::SyncWAL:BeforeMarkLogsSynced:2"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread thread([&] { db_->SyncWAL(); }); TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:1"); Flush(1); Put(1, "foo", "bar"); Flush(1); TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:2"); thread.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Close(); } #endif // this test is placed here, because the infrastructure for Column Family // test is being used to ensure a roll of wal files. // Basic idea is to test that WAL truncation is being detected and not // ignored TEST_P(ColumnFamilyTest, DISABLED_LogTruncationTest) { Open(); CreateColumnFamiliesAndReopen({"one", "two"}); Build(0, 100); // Flush the 0th column family to force a roll of the wal log Flush(0); // Add some more entries Build(100, 100); std::vector filenames; ASSERT_OK(env_->GetChildren(dbname_, &filenames)); // collect wal files std::vector logfs; for (size_t i = 0; i < filenames.size(); i++) { uint64_t number; FileType type; if (!(ParseFileName(filenames[i], &number, &type))) continue; if (type != kLogFile) continue; logfs.push_back(filenames[i]); } std::sort(logfs.begin(), logfs.end()); ASSERT_GE(logfs.size(), 2); // Take the last but one file, and truncate it std::string fpath = dbname_ + "/" + logfs[logfs.size() - 2]; std::vector names_save = names_; uint64_t fsize; ASSERT_OK(env_->GetFileSize(fpath, &fsize)); ASSERT_GT(fsize, 0); Close(); std::string backup_logs = dbname_ + "/backup_logs"; std::string t_fpath = backup_logs + "/" + logfs[logfs.size() - 2]; ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); // Not sure how easy it is to make this data driven. // need to read back the WAL file and truncate last 10 // entries CopyFile(fpath, t_fpath, fsize - 9180); ASSERT_OK(env_->DeleteFile(fpath)); ASSERT_OK(env_->RenameFile(t_fpath, fpath)); db_options_.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; OpenReadOnly(names_save); CheckMissed(); Close(); Open(names_save); CheckMissed(); Close(); // cleanup env_->DeleteDir(backup_logs); } TEST_P(ColumnFamilyTest, DefaultCfPathsTest) { Open(); // Leave cf_paths for one column families to be empty. // Files should be generated according to db_paths for that // column family. ColumnFamilyOptions cf_opt1, cf_opt2; cf_opt1.cf_paths.emplace_back(dbname_ + "_one_1", std::numeric_limits::max()); CreateColumnFamilies({"one", "two"}, {cf_opt1, cf_opt2}); Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); // Fill Column family 1. PutRandomData(1, 100, 100); Flush(1); ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Fill column family 2 PutRandomData(2, 100, 100); Flush(2); // SST from Column family 2 should be generated in // db_paths which is dbname_ in this case. ASSERT_EQ(1, GetSstFileCount(dbname_)); } TEST_P(ColumnFamilyTest, MultipleCFPathsTest) { Open(); // Configure Column family specific paths. ColumnFamilyOptions cf_opt1, cf_opt2; cf_opt1.cf_paths.emplace_back(dbname_ + "_one_1", std::numeric_limits::max()); cf_opt2.cf_paths.emplace_back(dbname_ + "_two_1", std::numeric_limits::max()); CreateColumnFamilies({"one", "two"}, {cf_opt1, cf_opt2}); Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); PutRandomData(1, 100, 100, true /* save */); Flush(1); // Check that files are generated in appropriate paths. ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); PutRandomData(2, 100, 100, true /* save */); Flush(2); ASSERT_EQ(1, GetSstFileCount(cf_opt2.cf_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Re-open and verify the keys. Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); DBImpl* dbi = reinterpret_cast(db_); for (int cf = 1; cf != 3; ++cf) { ReadOptions read_options; read_options.readahead_size = 0; auto it = dbi->NewIterator(read_options, handles_[cf]); for (it->SeekToFirst(); it->Valid(); it->Next()) { Slice key(it->key()); ASSERT_NE(keys_[cf].end(), keys_[cf].find(key.ToString())); } delete it; for (const auto& key : keys_[cf]) { ASSERT_NE("NOT_FOUND", Get(cf, key)); } } } } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/compact_files_test.cc000066400000000000000000000322341370372246700200140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include #include #include #include "db/db_impl/db_impl.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class CompactFilesTest : public testing::Test { public: CompactFilesTest() { env_ = Env::Default(); db_name_ = test::PerThreadDBPath("compact_files_test"); } std::string db_name_; Env* env_; }; // A class which remembers the name of each flushed file. class FlushedFileCollector : public EventListener { public: FlushedFileCollector() {} ~FlushedFileCollector() override {} void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { std::lock_guard lock(mutex_); flushed_files_.push_back(info.file_path); } std::vector GetFlushedFiles() { std::lock_guard lock(mutex_); std::vector result; for (auto fname : flushed_files_) { result.push_back(fname); } return result; } void ClearFlushedFiles() { std::lock_guard lock(mutex_); flushed_files_.clear(); } private: std::vector flushed_files_; std::mutex mutex_; }; TEST_F(CompactFilesTest, L0ConflictsFiles) { Options options; // to trigger compaction more easily const int kWriteBufferSize = 10000; const int kLevel0Trigger = 2; options.create_if_missing = true; options.compaction_style = kCompactionStyleLevel; // Small slowdown and stop trigger for experimental purpose. options.level0_slowdown_writes_trigger = 20; options.level0_stop_writes_trigger = 20; options.level0_stop_writes_trigger = 20; options.write_buffer_size = kWriteBufferSize; options.level0_file_num_compaction_trigger = kLevel0Trigger; options.compression = kNoCompression; DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); assert(s.ok()); assert(db); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"CompactFilesImpl:0", "BackgroundCallCompaction:0"}, {"BackgroundCallCompaction:1", "CompactFilesImpl:1"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // create couple files // Background compaction starts and waits in BackgroundCallCompaction:0 for (int i = 0; i < kLevel0Trigger * 4; ++i) { db->Put(WriteOptions(), ToString(i), ""); db->Put(WriteOptions(), ToString(100 - i), ""); db->Flush(FlushOptions()); } ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta; db->GetColumnFamilyMetaData(&meta); std::string file1; for (auto& file : meta.levels[0].files) { ASSERT_EQ(0, meta.levels[0].level); if (file1 == "") { file1 = file.db_path + "/" + file.name; } else { std::string file2 = file.db_path + "/" + file.name; // Another thread starts a compact files and creates an L0 compaction // The background compaction then notices that there is an L0 compaction // already in progress and doesn't do an L0 compaction // Once the background compaction finishes, the compact files finishes ASSERT_OK(db->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), {file1, file2}, 0)); break; } } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); delete db; } TEST_F(CompactFilesTest, ObsoleteFiles) { Options options; // to trigger compaction more easily const int kWriteBufferSize = 65536; options.create_if_missing = true; // Disable RocksDB background compaction. options.compaction_style = kCompactionStyleNone; options.level0_slowdown_writes_trigger = (1 << 30); options.level0_stop_writes_trigger = (1 << 30); options.write_buffer_size = kWriteBufferSize; options.max_write_buffer_number = 2; options.compression = kNoCompression; // Add listener FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); assert(s.ok()); assert(db); // create couple files for (int i = 1000; i < 2000; ++i) { db->Put(WriteOptions(), ToString(i), std::string(kWriteBufferSize / 10, 'a' + (i % 26))); } auto l0_files = collector->GetFlushedFiles(); ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); reinterpret_cast(db)->TEST_WaitForCompact(); // verify all compaction input files are deleted for (auto fname : l0_files) { ASSERT_EQ(Status::NotFound(), env_->FileExists(fname)); } delete db; } TEST_F(CompactFilesTest, NotCutOutputOnLevel0) { Options options; options.create_if_missing = true; // Disable RocksDB background compaction. options.compaction_style = kCompactionStyleNone; options.level0_slowdown_writes_trigger = 1000; options.level0_stop_writes_trigger = 1000; options.write_buffer_size = 65536; options.max_write_buffer_number = 2; options.compression = kNoCompression; options.max_compaction_bytes = 5000; // Add listener FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); assert(s.ok()); assert(db); // create couple files for (int i = 0; i < 500; ++i) { db->Put(WriteOptions(), ToString(i), std::string(1000, 'a' + (i % 26))); } reinterpret_cast(db)->TEST_WaitForFlushMemTable(); auto l0_files_1 = collector->GetFlushedFiles(); collector->ClearFlushedFiles(); for (int i = 0; i < 500; ++i) { db->Put(WriteOptions(), ToString(i), std::string(1000, 'a' + (i % 26))); } reinterpret_cast(db)->TEST_WaitForFlushMemTable(); auto l0_files_2 = collector->GetFlushedFiles(); ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_1, 0)); ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_2, 0)); // no assertion failure delete db; } TEST_F(CompactFilesTest, CapturingPendingFiles) { Options options; options.create_if_missing = true; // Disable RocksDB background compaction. options.compaction_style = kCompactionStyleNone; // Always do full scans for obsolete files (needed to reproduce the issue). options.delete_obsolete_files_period_micros = 0; // Add listener. FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); assert(s.ok()); assert(db); // Create 5 files. for (int i = 0; i < 5; ++i) { db->Put(WriteOptions(), "key" + ToString(i), "value"); db->Flush(FlushOptions()); } auto l0_files = collector->GetFlushedFiles(); EXPECT_EQ(5, l0_files.size()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"CompactFilesImpl:2", "CompactFilesTest.CapturingPendingFiles:0"}, {"CompactFilesTest.CapturingPendingFiles:1", "CompactFilesImpl:3"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Start compacting files. ROCKSDB_NAMESPACE::port::Thread compaction_thread( [&] { EXPECT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); }); // In the meantime flush another file. TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:0"); db->Put(WriteOptions(), "key5", "value"); db->Flush(FlushOptions()); TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:1"); compaction_thread.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); delete db; // Make sure we can reopen the DB. s = DB::Open(options, db_name_, &db); ASSERT_TRUE(s.ok()); assert(db); delete db; } TEST_F(CompactFilesTest, CompactionFilterWithGetSv) { class FilterWithGet : public CompactionFilter { public: bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { if (db_ == nullptr) { return true; } std::string res; db_->Get(ReadOptions(), "", &res); return true; } void SetDB(DB* db) { db_ = db; } const char* Name() const override { return "FilterWithGet"; } private: DB* db_; }; std::shared_ptr cf(new FilterWithGet()); Options options; options.create_if_missing = true; options.compaction_filter = cf.get(); DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); ASSERT_OK(s); cf->SetDB(db); // Write one L0 file db->Put(WriteOptions(), "K1", "V1"); db->Flush(FlushOptions()); // Compact all L0 files using CompactFiles ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta; db->GetColumnFamilyMetaData(&meta); for (auto& file : meta.levels[0].files) { std::string fname = file.db_path + "/" + file.name; ASSERT_OK( db->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), {fname}, 0)); } delete db; } TEST_F(CompactFilesTest, SentinelCompressionType) { if (!Zlib_Supported()) { fprintf(stderr, "zlib compression not supported, skip this test\n"); return; } if (!Snappy_Supported()) { fprintf(stderr, "snappy compression not supported, skip this test\n"); return; } // Check that passing `CompressionType::kDisableCompressionOption` to // `CompactFiles` causes it to use the column family compression options. for (auto compaction_style : {CompactionStyle::kCompactionStyleLevel, CompactionStyle::kCompactionStyleUniversal, CompactionStyle::kCompactionStyleNone}) { DestroyDB(db_name_, Options()); Options options; options.compaction_style = compaction_style; // L0: Snappy, L1: ZSTD, L2: Snappy options.compression_per_level = {CompressionType::kSnappyCompression, CompressionType::kZlibCompression, CompressionType::kSnappyCompression}; options.create_if_missing = true; FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); DB* db = nullptr; ASSERT_OK(DB::Open(options, db_name_, &db)); db->Put(WriteOptions(), "key", "val"); db->Flush(FlushOptions()); auto l0_files = collector->GetFlushedFiles(); ASSERT_EQ(1, l0_files.size()); // L0->L1 compaction, so output should be ZSTD-compressed CompactionOptions compaction_opts; compaction_opts.compression = CompressionType::kDisableCompressionOption; ASSERT_OK(db->CompactFiles(compaction_opts, l0_files, 1)); ROCKSDB_NAMESPACE::TablePropertiesCollection all_tables_props; ASSERT_OK(db->GetPropertiesOfAllTables(&all_tables_props)); for (const auto& name_and_table_props : all_tables_props) { ASSERT_EQ(CompressionTypeToString(CompressionType::kZlibCompression), name_and_table_props.second->compression_name); } delete db; } } TEST_F(CompactFilesTest, GetCompactionJobInfo) { Options options; options.create_if_missing = true; // Disable RocksDB background compaction. options.compaction_style = kCompactionStyleNone; options.level0_slowdown_writes_trigger = 1000; options.level0_stop_writes_trigger = 1000; options.write_buffer_size = 65536; options.max_write_buffer_number = 2; options.compression = kNoCompression; options.max_compaction_bytes = 5000; // Add listener FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); assert(s.ok()); assert(db); // create couple files for (int i = 0; i < 500; ++i) { db->Put(WriteOptions(), ToString(i), std::string(1000, 'a' + (i % 26))); } reinterpret_cast(db)->TEST_WaitForFlushMemTable(); auto l0_files_1 = collector->GetFlushedFiles(); CompactionOptions co; co.compression = CompressionType::kLZ4Compression; CompactionJobInfo compaction_job_info{}; ASSERT_OK( db->CompactFiles(co, l0_files_1, 0, -1, nullptr, &compaction_job_info)); ASSERT_EQ(compaction_job_info.base_input_level, 0); ASSERT_EQ(compaction_job_info.cf_id, db->DefaultColumnFamily()->GetID()); ASSERT_EQ(compaction_job_info.cf_name, db->DefaultColumnFamily()->GetName()); ASSERT_EQ(compaction_job_info.compaction_reason, CompactionReason::kManualCompaction); ASSERT_EQ(compaction_job_info.compression, CompressionType::kLZ4Compression); ASSERT_EQ(compaction_job_info.output_level, 0); ASSERT_OK(compaction_job_info.status); // no assertion failure delete db; } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as DBImpl::CompactFiles is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/compacted_db_impl.cc000066400000000000000000000132471370372246700175750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "db/compacted_db_impl.h" #include "db/db_impl/db_impl.h" #include "db/version_set.h" #include "table/get_context.h" namespace ROCKSDB_NAMESPACE { extern void MarkKeyMayExist(void* arg); extern bool SaveValue(void* arg, const ParsedInternalKey& parsed_key, const Slice& v, bool hit_and_return); CompactedDBImpl::CompactedDBImpl( const DBOptions& options, const std::string& dbname) : DBImpl(options, dbname), cfd_(nullptr), version_(nullptr), user_comparator_(nullptr) { } CompactedDBImpl::~CompactedDBImpl() { } size_t CompactedDBImpl::FindFile(const Slice& key) { size_t right = files_.num_files - 1; auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool { return user_comparator_->Compare(ExtractUserKey(f.largest_key), k) < 0; }; return static_cast(std::lower_bound(files_.files, files_.files + right, key, cmp) - files_.files); } Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*, const Slice& key, PinnableSlice* value) { GetContext get_context(user_comparator_, nullptr, nullptr, nullptr, GetContext::kNotFound, key, value, nullptr, nullptr, nullptr, true, nullptr, nullptr); LookupKey lkey(key, kMaxSequenceNumber); files_.files[FindFile(key)].fd.table_reader->Get(options, lkey.internal_key(), &get_context, nullptr); if (get_context.State() == GetContext::kFound) { return Status::OK(); } return Status::NotFound(); } std::vector CompactedDBImpl::MultiGet(const ReadOptions& options, const std::vector&, const std::vector& keys, std::vector* values) { autovector reader_list; for (const auto& key : keys) { const FdWithKeyRange& f = files_.files[FindFile(key)]; if (user_comparator_->Compare(key, ExtractUserKey(f.smallest_key)) < 0) { reader_list.push_back(nullptr); } else { LookupKey lkey(key, kMaxSequenceNumber); f.fd.table_reader->Prepare(lkey.internal_key()); reader_list.push_back(f.fd.table_reader); } } std::vector statuses(keys.size(), Status::NotFound()); values->resize(keys.size()); int idx = 0; for (auto* r : reader_list) { if (r != nullptr) { PinnableSlice pinnable_val; std::string& value = (*values)[idx]; GetContext get_context(user_comparator_, nullptr, nullptr, nullptr, GetContext::kNotFound, keys[idx], &pinnable_val, nullptr, nullptr, nullptr, true, nullptr, nullptr); LookupKey lkey(keys[idx], kMaxSequenceNumber); r->Get(options, lkey.internal_key(), &get_context, nullptr); value.assign(pinnable_val.data(), pinnable_val.size()); if (get_context.State() == GetContext::kFound) { statuses[idx] = Status::OK(); } } ++idx; } return statuses; } Status CompactedDBImpl::Init(const Options& options) { SuperVersionContext sv_context(/* create_superversion */ true); mutex_.Lock(); ColumnFamilyDescriptor cf(kDefaultColumnFamilyName, ColumnFamilyOptions(options)); Status s = Recover({cf}, true /* read only */, false, true); if (s.ok()) { cfd_ = reinterpret_cast( DefaultColumnFamily())->cfd(); cfd_->InstallSuperVersion(&sv_context, &mutex_); } mutex_.Unlock(); sv_context.Clean(); if (!s.ok()) { return s; } NewThreadStatusCfInfo(cfd_); version_ = cfd_->GetSuperVersion()->current; user_comparator_ = cfd_->user_comparator(); auto* vstorage = version_->storage_info(); if (vstorage->num_non_empty_levels() == 0) { return Status::NotSupported("no file exists"); } const LevelFilesBrief& l0 = vstorage->LevelFilesBrief(0); // L0 should not have files if (l0.num_files > 1) { return Status::NotSupported("L0 contain more than 1 file"); } if (l0.num_files == 1) { if (vstorage->num_non_empty_levels() > 1) { return Status::NotSupported("Both L0 and other level contain files"); } files_ = l0; return Status::OK(); } for (int i = 1; i < vstorage->num_non_empty_levels() - 1; ++i) { if (vstorage->LevelFilesBrief(i).num_files > 0) { return Status::NotSupported("Other levels also contain files"); } } int level = vstorage->num_non_empty_levels() - 1; if (vstorage->LevelFilesBrief(level).num_files > 0) { files_ = vstorage->LevelFilesBrief(level); return Status::OK(); } return Status::NotSupported("no file exists"); } Status CompactedDBImpl::Open(const Options& options, const std::string& dbname, DB** dbptr) { *dbptr = nullptr; if (options.max_open_files != -1) { return Status::InvalidArgument("require max_open_files = -1"); } if (options.merge_operator.get() != nullptr) { return Status::InvalidArgument("merge operator is not supported"); } DBOptions db_options(options); std::unique_ptr db(new CompactedDBImpl(db_options, dbname)); Status s = db->Init(options); if (s.ok()) { db->StartTimedTasks(); ROCKS_LOG_INFO(db->immutable_db_options_.info_log, "Opened the db as fully compacted mode"); LogFlush(db->immutable_db_options_.info_log); *dbptr = db.release(); } return s; } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/compacted_db_impl.h000066400000000000000000000106221370372246700174310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "db/db_impl/db_impl.h" namespace ROCKSDB_NAMESPACE { class CompactedDBImpl : public DBImpl { public: CompactedDBImpl(const DBOptions& options, const std::string& dbname); // No copying allowed CompactedDBImpl(const CompactedDBImpl&) = delete; void operator=(const CompactedDBImpl&) = delete; virtual ~CompactedDBImpl(); static Status Open(const Options& options, const std::string& dbname, DB** dbptr); // Implementations of the DB interface using DB::Get; virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; using DB::MultiGet; virtual std::vector MultiGet( const ReadOptions& options, const std::vector&, const std::vector& keys, std::vector* values) override; using DBImpl::Put; virtual Status Put(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*value*/) override { return Status::NotSupported("Not supported in compacted db mode."); } using DBImpl::Merge; virtual Status Merge(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*value*/) override { return Status::NotSupported("Not supported in compacted db mode."); } using DBImpl::Delete; virtual Status Delete(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/) override { return Status::NotSupported("Not supported in compacted db mode."); } virtual Status Write(const WriteOptions& /*options*/, WriteBatch* /*updates*/) override { return Status::NotSupported("Not supported in compacted db mode."); } using DBImpl::CompactRange; virtual Status CompactRange(const CompactRangeOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice* /*begin*/, const Slice* /*end*/) override { return Status::NotSupported("Not supported in compacted db mode."); } virtual Status DisableFileDeletions() override { return Status::NotSupported("Not supported in compacted db mode."); } virtual Status EnableFileDeletions(bool /*force*/) override { return Status::NotSupported("Not supported in compacted db mode."); } virtual Status GetLiveFiles(std::vector& ret, uint64_t* manifest_file_size, bool /*flush_memtable*/) override { return DBImpl::GetLiveFiles(ret, manifest_file_size, false /* flush_memtable */); } using DBImpl::Flush; virtual Status Flush(const FlushOptions& /*options*/, ColumnFamilyHandle* /*column_family*/) override { return Status::NotSupported("Not supported in compacted db mode."); } using DB::IngestExternalFile; virtual Status IngestExternalFile( ColumnFamilyHandle* /*column_family*/, const std::vector& /*external_files*/, const IngestExternalFileOptions& /*ingestion_options*/) override { return Status::NotSupported("Not supported in compacted db mode."); } using DB::CreateColumnFamilyWithImport; virtual Status CreateColumnFamilyWithImport( const ColumnFamilyOptions& /*options*/, const std::string& /*column_family_name*/, const ImportColumnFamilyOptions& /*import_options*/, const ExportImportFilesMetaData& /*metadata*/, ColumnFamilyHandle** /*handle*/) override { return Status::NotSupported("Not supported in compacted db mode."); } private: friend class DB; inline size_t FindFile(const Slice& key); Status Init(const Options& options); ColumnFamilyData* cfd_; Version* version_; const Comparator* user_comparator_; LevelFilesBrief files_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/compaction/000077500000000000000000000000001370372246700157665ustar00rootroot00000000000000rocksdb-6.11.4/db/compaction/compaction.cc000066400000000000000000000462451370372246700204440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include "db/column_family.h" #include "db/compaction/compaction.h" #include "rocksdb/compaction_filter.h" #include "test_util/sync_point.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { const uint64_t kRangeTombstoneSentinel = PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion); int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, const InternalKey& b) { auto c = user_cmp->CompareWithoutTimestamp(a.user_key(), b.user_key()); if (c != 0) { return c; } auto a_footer = ExtractInternalKeyFooter(a.Encode()); auto b_footer = ExtractInternalKeyFooter(b.Encode()); if (a_footer == kRangeTombstoneSentinel) { if (b_footer != kRangeTombstoneSentinel) { return -1; } } else if (b_footer == kRangeTombstoneSentinel) { return 1; } return 0; } int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a, const InternalKey& b) { if (a == nullptr) { return -1; } return sstableKeyCompare(user_cmp, *a, b); } int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, const InternalKey* b) { if (b == nullptr) { return -1; } return sstableKeyCompare(user_cmp, a, *b); } uint64_t TotalFileSize(const std::vector& files) { uint64_t sum = 0; for (size_t i = 0; i < files.size() && files[i]; i++) { sum += files[i]->fd.GetFileSize(); } return sum; } void Compaction::SetInputVersion(Version* _input_version) { input_version_ = _input_version; cfd_ = input_version_->cfd(); cfd_->Ref(); input_version_->Ref(); edit_.SetColumnFamily(cfd_->GetID()); } void Compaction::GetBoundaryKeys( VersionStorageInfo* vstorage, const std::vector& inputs, Slice* smallest_user_key, Slice* largest_user_key) { bool initialized = false; const Comparator* ucmp = vstorage->InternalComparator()->user_comparator(); for (size_t i = 0; i < inputs.size(); ++i) { if (inputs[i].files.empty()) { continue; } if (inputs[i].level == 0) { // we need to consider all files on level 0 for (const auto* f : inputs[i].files) { const Slice& start_user_key = f->smallest.user_key(); if (!initialized || ucmp->Compare(start_user_key, *smallest_user_key) < 0) { *smallest_user_key = start_user_key; } const Slice& end_user_key = f->largest.user_key(); if (!initialized || ucmp->Compare(end_user_key, *largest_user_key) > 0) { *largest_user_key = end_user_key; } initialized = true; } } else { // we only need to consider the first and last file const Slice& start_user_key = inputs[i].files[0]->smallest.user_key(); if (!initialized || ucmp->Compare(start_user_key, *smallest_user_key) < 0) { *smallest_user_key = start_user_key; } const Slice& end_user_key = inputs[i].files.back()->largest.user_key(); if (!initialized || ucmp->Compare(end_user_key, *largest_user_key) > 0) { *largest_user_key = end_user_key; } initialized = true; } } } std::vector Compaction::PopulateWithAtomicBoundaries( VersionStorageInfo* vstorage, std::vector inputs) { const Comparator* ucmp = vstorage->InternalComparator()->user_comparator(); for (size_t i = 0; i < inputs.size(); i++) { if (inputs[i].level == 0 || inputs[i].files.empty()) { continue; } inputs[i].atomic_compaction_unit_boundaries.reserve(inputs[i].files.size()); AtomicCompactionUnitBoundary cur_boundary; size_t first_atomic_idx = 0; auto add_unit_boundary = [&](size_t to) { if (first_atomic_idx == to) return; for (size_t k = first_atomic_idx; k < to; k++) { inputs[i].atomic_compaction_unit_boundaries.push_back(cur_boundary); } first_atomic_idx = to; }; for (size_t j = 0; j < inputs[i].files.size(); j++) { const auto* f = inputs[i].files[j]; if (j == 0) { // First file in a level. cur_boundary.smallest = &f->smallest; cur_boundary.largest = &f->largest; } else if (sstableKeyCompare(ucmp, *cur_boundary.largest, f->smallest) == 0) { // SSTs overlap but the end key of the previous file was not // artificially extended by a range tombstone. Extend the current // boundary. cur_boundary.largest = &f->largest; } else { // Atomic compaction unit has ended. add_unit_boundary(j); cur_boundary.smallest = &f->smallest; cur_boundary.largest = &f->largest; } } add_unit_boundary(inputs[i].files.size()); assert(inputs[i].files.size() == inputs[i].atomic_compaction_unit_boundaries.size()); } return inputs; } // helper function to determine if compaction is creating files at the // bottommost level bool Compaction::IsBottommostLevel( int output_level, VersionStorageInfo* vstorage, const std::vector& inputs) { int output_l0_idx; if (output_level == 0) { output_l0_idx = 0; for (const auto* file : vstorage->LevelFiles(0)) { if (inputs[0].files.back() == file) { break; } ++output_l0_idx; } assert(static_cast(output_l0_idx) < vstorage->LevelFiles(0).size()); } else { output_l0_idx = -1; } Slice smallest_key, largest_key; GetBoundaryKeys(vstorage, inputs, &smallest_key, &largest_key); return !vstorage->RangeMightExistAfterSortedRun(smallest_key, largest_key, output_level, output_l0_idx); } // test function to validate the functionality of IsBottommostLevel() // function -- determines if compaction with inputs and storage is bottommost bool Compaction::TEST_IsBottommostLevel( int output_level, VersionStorageInfo* vstorage, const std::vector& inputs) { return IsBottommostLevel(output_level, vstorage, inputs); } bool Compaction::IsFullCompaction( VersionStorageInfo* vstorage, const std::vector& inputs) { size_t num_files_in_compaction = 0; size_t total_num_files = 0; for (int l = 0; l < vstorage->num_levels(); l++) { total_num_files += vstorage->NumLevelFiles(l); } for (size_t i = 0; i < inputs.size(); i++) { num_files_in_compaction += inputs[i].size(); } return num_files_in_compaction == total_num_files; } Compaction::Compaction(VersionStorageInfo* vstorage, const ImmutableCFOptions& _immutable_cf_options, const MutableCFOptions& _mutable_cf_options, std::vector _inputs, int _output_level, uint64_t _target_file_size, uint64_t _max_compaction_bytes, uint32_t _output_path_id, CompressionType _compression, CompressionOptions _compression_opts, uint32_t _max_subcompactions, std::vector _grandparents, bool _manual_compaction, double _score, bool _deletion_compaction, CompactionReason _compaction_reason) : input_vstorage_(vstorage), start_level_(_inputs[0].level), output_level_(_output_level), max_output_file_size_(_target_file_size), max_compaction_bytes_(_max_compaction_bytes), max_subcompactions_(_max_subcompactions), immutable_cf_options_(_immutable_cf_options), mutable_cf_options_(_mutable_cf_options), input_version_(nullptr), number_levels_(vstorage->num_levels()), cfd_(nullptr), output_path_id_(_output_path_id), output_compression_(_compression), output_compression_opts_(_compression_opts), deletion_compaction_(_deletion_compaction), inputs_(PopulateWithAtomicBoundaries(vstorage, std::move(_inputs))), grandparents_(std::move(_grandparents)), score_(_score), bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)), is_full_compaction_(IsFullCompaction(vstorage, inputs_)), is_manual_compaction_(_manual_compaction), is_trivial_move_(false), compaction_reason_(_compaction_reason) { MarkFilesBeingCompacted(true); if (is_manual_compaction_) { compaction_reason_ = CompactionReason::kManualCompaction; } if (max_subcompactions_ == 0) { max_subcompactions_ = immutable_cf_options_.max_subcompactions; } if (!bottommost_level_) { // Currently we only enable dictionary compression during compaction to the // bottommost level. output_compression_opts_.max_dict_bytes = 0; output_compression_opts_.zstd_max_train_bytes = 0; } #ifndef NDEBUG for (size_t i = 1; i < inputs_.size(); ++i) { assert(inputs_[i].level > inputs_[i - 1].level); } #endif // setup input_levels_ { input_levels_.resize(num_input_levels()); for (size_t which = 0; which < num_input_levels(); which++) { DoGenerateLevelFilesBrief(&input_levels_[which], inputs_[which].files, &arena_); } } GetBoundaryKeys(vstorage, inputs_, &smallest_user_key_, &largest_user_key_); } Compaction::~Compaction() { if (input_version_ != nullptr) { input_version_->Unref(); } if (cfd_ != nullptr) { cfd_->UnrefAndTryDelete(); } } bool Compaction::InputCompressionMatchesOutput() const { int base_level = input_vstorage_->base_level(); bool matches = (GetCompressionType(immutable_cf_options_, input_vstorage_, mutable_cf_options_, start_level_, base_level) == output_compression_); if (matches) { TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:Matches"); return true; } TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:DidntMatch"); return matches; } bool Compaction::IsTrivialMove() const { // Avoid a move if there is lots of overlapping grandparent data. // Otherwise, the move could create a parent file that will require // a very expensive merge later on. // If start_level_== output_level_, the purpose is to force compaction // filter to be applied to that level, and thus cannot be a trivial move. // Check if start level have files with overlapping ranges if (start_level_ == 0 && input_vstorage_->level0_non_overlapping() == false) { // We cannot move files from L0 to L1 if the files are overlapping return false; } if (is_manual_compaction_ && (immutable_cf_options_.compaction_filter != nullptr || immutable_cf_options_.compaction_filter_factory != nullptr)) { // This is a manual compaction and we have a compaction filter that should // be executed, we cannot do a trivial move return false; } // Used in universal compaction, where trivial move can be done if the // input files are non overlapping if ((mutable_cf_options_.compaction_options_universal.allow_trivial_move) && (output_level_ != 0)) { return is_trivial_move_; } if (!(start_level_ != output_level_ && num_input_levels() == 1 && input(0, 0)->fd.GetPathId() == output_path_id() && InputCompressionMatchesOutput())) { return false; } // assert inputs_.size() == 1 for (const auto& file : inputs_.front().files) { std::vector file_grand_parents; if (output_level_ + 1 >= number_levels_) { continue; } input_vstorage_->GetOverlappingInputs(output_level_ + 1, &file->smallest, &file->largest, &file_grand_parents); const auto compaction_size = file->fd.GetFileSize() + TotalFileSize(file_grand_parents); if (compaction_size > max_compaction_bytes_) { return false; } } return true; } void Compaction::AddInputDeletions(VersionEdit* out_edit) { for (size_t which = 0; which < num_input_levels(); which++) { for (size_t i = 0; i < inputs_[which].size(); i++) { out_edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber()); } } } bool Compaction::KeyNotExistsBeyondOutputLevel( const Slice& user_key, std::vector* level_ptrs) const { assert(input_version_ != nullptr); assert(level_ptrs != nullptr); assert(level_ptrs->size() == static_cast(number_levels_)); if (bottommost_level_) { return true; } else if (output_level_ != 0 && cfd_->ioptions()->compaction_style == kCompactionStyleLevel) { // Maybe use binary search to find right entry instead of linear search? const Comparator* user_cmp = cfd_->user_comparator(); for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) { const std::vector& files = input_vstorage_->LevelFiles(lvl); for (; level_ptrs->at(lvl) < files.size(); level_ptrs->at(lvl)++) { auto* f = files[level_ptrs->at(lvl)]; if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) { // We've advanced far enough if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) { // Key falls in this file's range, so it may // exist beyond output level return false; } break; } } } return true; } return false; } // Mark (or clear) each file that is being compacted void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) { for (size_t i = 0; i < num_input_levels(); i++) { for (size_t j = 0; j < inputs_[i].size(); j++) { assert(mark_as_compacted ? !inputs_[i][j]->being_compacted : inputs_[i][j]->being_compacted); inputs_[i][j]->being_compacted = mark_as_compacted; } } } // Sample output: // If compacting 3 L0 files, 2 L3 files and 1 L4 file, and outputting to L5, // print: "3@0 + 2@3 + 1@4 files to L5" const char* Compaction::InputLevelSummary( InputLevelSummaryBuffer* scratch) const { int len = 0; bool is_first = true; for (auto& input_level : inputs_) { if (input_level.empty()) { continue; } if (!is_first) { len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, " + "); len = std::min(len, static_cast(sizeof(scratch->buffer))); } else { is_first = false; } len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "%" ROCKSDB_PRIszt "@%d", input_level.size(), input_level.level); len = std::min(len, static_cast(sizeof(scratch->buffer))); } snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, " files to L%d", output_level()); return scratch->buffer; } uint64_t Compaction::CalculateTotalInputSize() const { uint64_t size = 0; for (auto& input_level : inputs_) { for (auto f : input_level.files) { size += f->fd.GetFileSize(); } } return size; } void Compaction::ReleaseCompactionFiles(Status status) { MarkFilesBeingCompacted(false); cfd_->compaction_picker()->ReleaseCompactionFiles(this, status); } void Compaction::ResetNextCompactionIndex() { assert(input_version_ != nullptr); input_vstorage_->ResetNextCompactionIndex(start_level_); } namespace { int InputSummary(const std::vector& files, char* output, int len) { *output = '\0'; int write = 0; for (size_t i = 0; i < files.size(); i++) { int sz = len - write; int ret; char sztxt[16]; AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16); ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ", files.at(i)->fd.GetNumber(), sztxt); if (ret < 0 || ret >= sz) break; write += ret; } // if files.size() is non-zero, overwrite the last space return write - !!files.size(); } } // namespace void Compaction::Summary(char* output, int len) { int write = snprintf(output, len, "Base version %" PRIu64 " Base level %d, inputs: [", input_version_->GetVersionNumber(), start_level_); if (write < 0 || write >= len) { return; } for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) { if (level_iter > 0) { write += snprintf(output + write, len - write, "], ["); if (write < 0 || write >= len) { return; } } write += InputSummary(inputs_[level_iter].files, output + write, len - write); if (write < 0 || write >= len) { return; } } snprintf(output + write, len - write, "]"); } uint64_t Compaction::OutputFilePreallocationSize() const { uint64_t preallocation_size = 0; for (const auto& level_files : inputs_) { for (const auto& file : level_files.files) { preallocation_size += file->fd.GetFileSize(); } } if (max_output_file_size_ != port::kMaxUint64 && (immutable_cf_options_.compaction_style == kCompactionStyleLevel || output_level() > 0)) { preallocation_size = std::min(max_output_file_size_, preallocation_size); } // Over-estimate slightly so we don't end up just barely crossing // the threshold // No point to prellocate more than 1GB. return std::min(uint64_t{1073741824}, preallocation_size + (preallocation_size / 10)); } std::unique_ptr Compaction::CreateCompactionFilter() const { if (!cfd_->ioptions()->compaction_filter_factory) { return nullptr; } CompactionFilter::Context context; context.is_full_compaction = is_full_compaction_; context.is_manual_compaction = is_manual_compaction_; context.column_family_id = cfd_->GetID(); return cfd_->ioptions()->compaction_filter_factory->CreateCompactionFilter( context); } bool Compaction::IsOutputLevelEmpty() const { return inputs_.back().level != output_level_ || inputs_.back().empty(); } bool Compaction::ShouldFormSubcompactions() const { if (max_subcompactions_ <= 1 || cfd_ == nullptr) { return false; } if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) { return (start_level_ == 0 || is_manual_compaction_) && output_level_ > 0 && !IsOutputLevelEmpty(); } else if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) { return number_levels_ > 1 && output_level_ > 0; } else { return false; } } uint64_t Compaction::MinInputFileOldestAncesterTime() const { uint64_t min_oldest_ancester_time = port::kMaxUint64; for (const auto& level_files : inputs_) { for (const auto& file : level_files.files) { uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime(); if (oldest_ancester_time != 0) { min_oldest_ancester_time = std::min(min_oldest_ancester_time, oldest_ancester_time); } } } return min_oldest_ancester_time; } int Compaction::GetInputBaseLevel() const { return input_vstorage_->base_level(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction.h000066400000000000000000000356051370372246700203040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "db/version_set.h" #include "memory/arena.h" #include "options/cf_options.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { // The file contains class Compaction, as well as some helper functions // and data structures used by the class. // Utility for comparing sstable boundary keys. Returns -1 if either a or b is // null which provides the property that a==null indicates a key that is less // than any key and b==null indicates a key that is greater than any key. Note // that the comparison is performed primarily on the user-key portion of the // key. If the user-keys compare equal, an additional test is made to sort // range tombstone sentinel keys before other keys with the same user-key. The // result is that 2 user-keys will compare equal if they differ purely on // their sequence number and value, but the range tombstone sentinel for that // user-key will compare not equal. This is necessary because the range // tombstone sentinel key is set as the largest key for an sstable even though // that key never appears in the database. We don't want adjacent sstables to // be considered overlapping if they are separated by the range tombstone // sentinel. int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, const InternalKey& b); int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a, const InternalKey& b); int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, const InternalKey* b); // An AtomicCompactionUnitBoundary represents a range of keys [smallest, // largest] that exactly spans one ore more neighbouring SSTs on the same // level. Every pair of SSTs in this range "overlap" (i.e., the largest // user key of one file is the smallest user key of the next file). These // boundaries are propagated down to RangeDelAggregator during compaction // to provide safe truncation boundaries for range tombstones. struct AtomicCompactionUnitBoundary { const InternalKey* smallest = nullptr; const InternalKey* largest = nullptr; }; // The structure that manages compaction input files associated // with the same physical level. struct CompactionInputFiles { int level; std::vector files; std::vector atomic_compaction_unit_boundaries; inline bool empty() const { return files.empty(); } inline size_t size() const { return files.size(); } inline void clear() { files.clear(); } inline FileMetaData* operator[](size_t i) const { return files[i]; } }; class Version; class ColumnFamilyData; class VersionStorageInfo; class CompactionFilter; // A Compaction encapsulates metadata about a compaction. class Compaction { public: Compaction(VersionStorageInfo* input_version, const ImmutableCFOptions& immutable_cf_options, const MutableCFOptions& mutable_cf_options, std::vector inputs, int output_level, uint64_t target_file_size, uint64_t max_compaction_bytes, uint32_t output_path_id, CompressionType compression, CompressionOptions compression_opts, uint32_t max_subcompactions, std::vector grandparents, bool manual_compaction = false, double score = -1, bool deletion_compaction = false, CompactionReason compaction_reason = CompactionReason::kUnknown); // No copying allowed Compaction(const Compaction&) = delete; void operator=(const Compaction&) = delete; ~Compaction(); // Returns the level associated to the specified compaction input level. // If compaction_input_level is not specified, then input_level is set to 0. int level(size_t compaction_input_level = 0) const { return inputs_[compaction_input_level].level; } int start_level() const { return start_level_; } // Outputs will go to this level int output_level() const { return output_level_; } // Returns the number of input levels in this compaction. size_t num_input_levels() const { return inputs_.size(); } // Return the object that holds the edits to the descriptor done // by this compaction. VersionEdit* edit() { return &edit_; } // Returns the number of input files associated to the specified // compaction input level. // The function will return 0 if when "compaction_input_level" < 0 // or "compaction_input_level" >= "num_input_levels()". size_t num_input_files(size_t compaction_input_level) const { if (compaction_input_level < inputs_.size()) { return inputs_[compaction_input_level].size(); } return 0; } // Returns input version of the compaction Version* input_version() const { return input_version_; } // Returns the ColumnFamilyData associated with the compaction. ColumnFamilyData* column_family_data() const { return cfd_; } // Returns the file meta data of the 'i'th input file at the // specified compaction input level. // REQUIREMENT: "compaction_input_level" must be >= 0 and // < "input_levels()" FileMetaData* input(size_t compaction_input_level, size_t i) const { assert(compaction_input_level < inputs_.size()); return inputs_[compaction_input_level][i]; } const std::vector* boundaries( size_t compaction_input_level) const { assert(compaction_input_level < inputs_.size()); return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries; } // Returns the list of file meta data of the specified compaction // input level. // REQUIREMENT: "compaction_input_level" must be >= 0 and // < "input_levels()" const std::vector* inputs( size_t compaction_input_level) const { assert(compaction_input_level < inputs_.size()); return &inputs_[compaction_input_level].files; } const std::vector* inputs() { return &inputs_; } // Returns the LevelFilesBrief of the specified compaction input level. const LevelFilesBrief* input_levels(size_t compaction_input_level) const { return &input_levels_[compaction_input_level]; } // Maximum size of files to build during this compaction. uint64_t max_output_file_size() const { return max_output_file_size_; } // What compression for output CompressionType output_compression() const { return output_compression_; } // What compression options for output CompressionOptions output_compression_opts() const { return output_compression_opts_; } // Whether need to write output file to second DB path. uint32_t output_path_id() const { return output_path_id_; } // Is this a trivial compaction that can be implemented by just // moving a single input file to the next level (no merging or splitting) bool IsTrivialMove() const; // If true, then the compaction can be done by simply deleting input files. bool deletion_compaction() const { return deletion_compaction_; } // Add all inputs to this compaction as delete operations to *edit. void AddInputDeletions(VersionEdit* edit); // Returns true if the available information we have guarantees that // the input "user_key" does not exist in any level beyond "output_level()". bool KeyNotExistsBeyondOutputLevel(const Slice& user_key, std::vector* level_ptrs) const; // Clear all files to indicate that they are not being compacted // Delete this compaction from the list of running compactions. // // Requirement: DB mutex held void ReleaseCompactionFiles(Status status); // Returns the summary of the compaction in "output" with maximum "len" // in bytes. The caller is responsible for the memory management of // "output". void Summary(char* output, int len); // Return the score that was used to pick this compaction run. double score() const { return score_; } // Is this compaction creating a file in the bottom most level? bool bottommost_level() const { return bottommost_level_; } // Does this compaction include all sst files? bool is_full_compaction() const { return is_full_compaction_; } // Was this compaction triggered manually by the client? bool is_manual_compaction() const { return is_manual_compaction_; } // Used when allow_trivial_move option is set in // Universal compaction. If all the input files are // non overlapping, then is_trivial_move_ variable // will be set true, else false void set_is_trivial_move(bool trivial_move) { is_trivial_move_ = trivial_move; } // Used when allow_trivial_move option is set in // Universal compaction. Returns true, if the input files // are non-overlapping and can be trivially moved. bool is_trivial_move() const { return is_trivial_move_; } // How many total levels are there? int number_levels() const { return number_levels_; } // Return the ImmutableCFOptions that should be used throughout the compaction // procedure const ImmutableCFOptions* immutable_cf_options() const { return &immutable_cf_options_; } // Return the MutableCFOptions that should be used throughout the compaction // procedure const MutableCFOptions* mutable_cf_options() const { return &mutable_cf_options_; } // Returns the size in bytes that the output file should be preallocated to. // In level compaction, that is max_file_size_. In universal compaction, that // is the sum of all input file sizes. uint64_t OutputFilePreallocationSize() const; void SetInputVersion(Version* input_version); struct InputLevelSummaryBuffer { char buffer[128]; }; const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const; uint64_t CalculateTotalInputSize() const; // In case of compaction error, reset the nextIndex that is used // to pick up the next file to be compacted from files_by_size_ void ResetNextCompactionIndex(); // Create a CompactionFilter from compaction_filter_factory std::unique_ptr CreateCompactionFilter() const; // Is the input level corresponding to output_level_ empty? bool IsOutputLevelEmpty() const; // Should this compaction be broken up into smaller ones run in parallel? bool ShouldFormSubcompactions() const; // test function to validate the functionality of IsBottommostLevel() // function -- determines if compaction with inputs and storage is bottommost static bool TEST_IsBottommostLevel( int output_level, VersionStorageInfo* vstorage, const std::vector& inputs); TablePropertiesCollection GetOutputTableProperties() const { return output_table_properties_; } void SetOutputTableProperties(TablePropertiesCollection tp) { output_table_properties_ = std::move(tp); } Slice GetSmallestUserKey() const { return smallest_user_key_; } Slice GetLargestUserKey() const { return largest_user_key_; } int GetInputBaseLevel() const; CompactionReason compaction_reason() { return compaction_reason_; } const std::vector& grandparents() const { return grandparents_; } uint64_t max_compaction_bytes() const { return max_compaction_bytes_; } uint32_t max_subcompactions() const { return max_subcompactions_; } uint64_t MinInputFileOldestAncesterTime() const; private: // mark (or clear) all files that are being compacted void MarkFilesBeingCompacted(bool mark_as_compacted); // get the smallest and largest key present in files to be compacted static void GetBoundaryKeys(VersionStorageInfo* vstorage, const std::vector& inputs, Slice* smallest_key, Slice* largest_key); // Get the atomic file boundaries for all files in the compaction. Necessary // in order to avoid the scenario described in // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb // down appropriate key boundaries to RangeDelAggregator during compaction. static std::vector PopulateWithAtomicBoundaries( VersionStorageInfo* vstorage, std::vector inputs); // helper function to determine if compaction with inputs and storage is // bottommost static bool IsBottommostLevel( int output_level, VersionStorageInfo* vstorage, const std::vector& inputs); static bool IsFullCompaction(VersionStorageInfo* vstorage, const std::vector& inputs); VersionStorageInfo* input_vstorage_; const int start_level_; // the lowest level to be compacted const int output_level_; // levels to which output files are stored uint64_t max_output_file_size_; uint64_t max_compaction_bytes_; uint32_t max_subcompactions_; const ImmutableCFOptions immutable_cf_options_; const MutableCFOptions mutable_cf_options_; Version* input_version_; VersionEdit edit_; const int number_levels_; ColumnFamilyData* cfd_; Arena arena_; // Arena used to allocate space for file_levels_ const uint32_t output_path_id_; CompressionType output_compression_; CompressionOptions output_compression_opts_; // If true, then the comaction can be done by simply deleting input files. const bool deletion_compaction_; // Compaction input files organized by level. Constant after construction const std::vector inputs_; // A copy of inputs_, organized more closely in memory autovector input_levels_; // State used to check for number of overlapping grandparent files // (grandparent == "output_level_ + 1") std::vector grandparents_; const double score_; // score that was used to pick this compaction. // Is this compaction creating a file in the bottom most level? const bool bottommost_level_; // Does this compaction include all sst files? const bool is_full_compaction_; // Is this compaction requested by the client? const bool is_manual_compaction_; // True if we can do trivial move in Universal multi level // compaction bool is_trivial_move_; // Does input compression match the output compression? bool InputCompressionMatchesOutput() const; // table properties of output files TablePropertiesCollection output_table_properties_; // smallest user keys in compaction Slice smallest_user_key_; // largest user keys in compaction Slice largest_user_key_; // Reason for compaction CompactionReason compaction_reason_; }; // Return sum of sizes of all files in `files`. extern uint64_t TotalFileSize(const std::vector& files); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_iteration_stats.h000066400000000000000000000024361370372246700235740ustar00rootroot00000000000000// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/rocksdb_namespace.h" struct CompactionIterationStats { // Compaction statistics // Doesn't include records skipped because of // CompactionFilter::Decision::kRemoveAndSkipUntil. int64_t num_record_drop_user = 0; int64_t num_record_drop_hidden = 0; int64_t num_record_drop_obsolete = 0; int64_t num_record_drop_range_del = 0; int64_t num_range_del_drop_obsolete = 0; // Deletions obsoleted before bottom level due to file gap optimization. int64_t num_optimized_del_drop_obsolete = 0; uint64_t total_filter_time = 0; // Input statistics // TODO(noetzli): The stats are incomplete. They are lacking everything // consumed by MergeHelper. uint64_t num_input_records = 0; uint64_t num_input_deletion_records = 0; uint64_t num_input_corrupt_records = 0; uint64_t total_input_raw_key_bytes = 0; uint64_t total_input_raw_value_bytes = 0; // Single-Delete diagnostics for exceptional situations uint64_t num_single_del_fallthru = 0; uint64_t num_single_del_mismatch = 0; }; rocksdb-6.11.4/db/compaction/compaction_iterator.cc000066400000000000000000001007321370372246700223450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "db/compaction/compaction_iterator.h" #include "db/snapshot_checker.h" #include "port/likely.h" #include "rocksdb/listener.h" #include "table/internal_iterator.h" #include "test_util/sync_point.h" #define DEFINITELY_IN_SNAPSHOT(seq, snapshot) \ ((seq) <= (snapshot) && \ (snapshot_checker_ == nullptr || \ LIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == \ SnapshotCheckerResult::kInSnapshot))) #define DEFINITELY_NOT_IN_SNAPSHOT(seq, snapshot) \ ((seq) > (snapshot) || \ (snapshot_checker_ != nullptr && \ UNLIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == \ SnapshotCheckerResult::kNotInSnapshot))) #define IN_EARLIEST_SNAPSHOT(seq) \ ((seq) <= earliest_snapshot_ && \ (snapshot_checker_ == nullptr || LIKELY(IsInEarliestSnapshot(seq)))) namespace ROCKSDB_NAMESPACE { CompactionIterator::CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber last_sequence, std::vector* snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, CompactionRangeDelAggregator* range_del_agg, const Compaction* compaction, const CompactionFilter* compaction_filter, const std::atomic* shutting_down, const SequenceNumber preserve_deletes_seqnum, const std::atomic* manual_compaction_paused, const std::shared_ptr info_log) : CompactionIterator( input, cmp, merge_helper, last_sequence, snapshots, earliest_write_conflict_snapshot, snapshot_checker, env, report_detailed_time, expect_valid_internal_key, range_del_agg, std::unique_ptr( compaction ? new CompactionProxy(compaction) : nullptr), compaction_filter, shutting_down, preserve_deletes_seqnum, manual_compaction_paused, info_log) {} CompactionIterator::CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber /*last_sequence*/, std::vector* snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, CompactionRangeDelAggregator* range_del_agg, std::unique_ptr compaction, const CompactionFilter* compaction_filter, const std::atomic* shutting_down, const SequenceNumber preserve_deletes_seqnum, const std::atomic* manual_compaction_paused, const std::shared_ptr info_log) : input_(input), cmp_(cmp), merge_helper_(merge_helper), snapshots_(snapshots), earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot), snapshot_checker_(snapshot_checker), env_(env), report_detailed_time_(report_detailed_time), expect_valid_internal_key_(expect_valid_internal_key), range_del_agg_(range_del_agg), compaction_(std::move(compaction)), compaction_filter_(compaction_filter), shutting_down_(shutting_down), manual_compaction_paused_(manual_compaction_paused), preserve_deletes_seqnum_(preserve_deletes_seqnum), current_user_key_sequence_(0), current_user_key_snapshot_(0), merge_out_iter_(merge_helper_), current_key_committed_(false), info_log_(info_log) { assert(compaction_filter_ == nullptr || compaction_ != nullptr); assert(snapshots_ != nullptr); bottommost_level_ = compaction_ == nullptr ? false : compaction_->bottommost_level(); if (compaction_ != nullptr) { level_ptrs_ = std::vector(compaction_->number_levels(), 0); } if (snapshots_->size() == 0) { // optimize for fast path if there are no snapshots visible_at_tip_ = true; earliest_snapshot_iter_ = snapshots_->end(); earliest_snapshot_ = kMaxSequenceNumber; latest_snapshot_ = 0; } else { visible_at_tip_ = false; earliest_snapshot_iter_ = snapshots_->begin(); earliest_snapshot_ = snapshots_->at(0); latest_snapshot_ = snapshots_->back(); } #ifndef NDEBUG // findEarliestVisibleSnapshot assumes this ordering. for (size_t i = 1; i < snapshots_->size(); ++i) { assert(snapshots_->at(i - 1) < snapshots_->at(i)); } #endif input_->SetPinnedItersMgr(&pinned_iters_mgr_); TEST_SYNC_POINT_CALLBACK("CompactionIterator:AfterInit", compaction_.get()); } CompactionIterator::~CompactionIterator() { // input_ Iteartor lifetime is longer than pinned_iters_mgr_ lifetime input_->SetPinnedItersMgr(nullptr); } void CompactionIterator::ResetRecordCounts() { iter_stats_.num_record_drop_user = 0; iter_stats_.num_record_drop_hidden = 0; iter_stats_.num_record_drop_obsolete = 0; iter_stats_.num_record_drop_range_del = 0; iter_stats_.num_range_del_drop_obsolete = 0; iter_stats_.num_optimized_del_drop_obsolete = 0; } void CompactionIterator::SeekToFirst() { NextFromInput(); PrepareOutput(); } void CompactionIterator::Next() { // If there is a merge output, return it before continuing to process the // input. if (merge_out_iter_.Valid()) { merge_out_iter_.Next(); // Check if we returned all records of the merge output. if (merge_out_iter_.Valid()) { key_ = merge_out_iter_.key(); value_ = merge_out_iter_.value(); bool valid_key __attribute__((__unused__)); valid_key = ParseInternalKey(key_, &ikey_); // MergeUntil stops when it encounters a corrupt key and does not // include them in the result, so we expect the keys here to be valid. assert(valid_key); if (!valid_key) { ROCKS_LOG_FATAL(info_log_, "Invalid key (%s) in compaction", key_.ToString(true).c_str()); } // Keep current_key_ in sync. current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); key_ = current_key_.GetInternalKey(); ikey_.user_key = current_key_.GetUserKey(); valid_ = true; } else { // We consumed all pinned merge operands, release pinned iterators pinned_iters_mgr_.ReleasePinnedData(); // MergeHelper moves the iterator to the first record after the merged // records, so even though we reached the end of the merge output, we do // not want to advance the iterator. NextFromInput(); } } else { // Only advance the input iterator if there is no merge output and the // iterator is not already at the next record. if (!at_next_) { input_->Next(); } NextFromInput(); } if (valid_) { // Record that we've outputted a record for the current key. has_outputted_key_ = true; } PrepareOutput(); } void CompactionIterator::InvokeFilterIfNeeded(bool* need_skip, Slice* skip_until) { if (compaction_filter_ != nullptr && (ikey_.type == kTypeValue || ikey_.type == kTypeBlobIndex)) { // If the user has specified a compaction filter and the sequence // number is greater than any external snapshot, then invoke the // filter. If the return value of the compaction filter is true, // replace the entry with a deletion marker. CompactionFilter::Decision filter; compaction_filter_value_.clear(); compaction_filter_skip_until_.Clear(); CompactionFilter::ValueType value_type = ikey_.type == kTypeValue ? CompactionFilter::ValueType::kValue : CompactionFilter::ValueType::kBlobIndex; // Hack: pass internal key to BlobIndexCompactionFilter since it needs // to get sequence number. Slice& filter_key = ikey_.type == kTypeValue ? ikey_.user_key : key_; { StopWatchNano timer(env_, report_detailed_time_); filter = compaction_filter_->FilterV2( compaction_->level(), filter_key, value_type, value_, &compaction_filter_value_, compaction_filter_skip_until_.rep()); iter_stats_.total_filter_time += env_ != nullptr && report_detailed_time_ ? timer.ElapsedNanos() : 0; } if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil && cmp_->Compare(*compaction_filter_skip_until_.rep(), ikey_.user_key) <= 0) { // Can't skip to a key smaller than the current one. // Keep the key as per FilterV2 documentation. filter = CompactionFilter::Decision::kKeep; } if (filter == CompactionFilter::Decision::kRemove) { // convert the current key to a delete; key_ is pointing into // current_key_ at this point, so updating current_key_ updates key() ikey_.type = kTypeDeletion; current_key_.UpdateInternalKey(ikey_.sequence, kTypeDeletion); // no value associated with delete value_.clear(); iter_stats_.num_record_drop_user++; } else if (filter == CompactionFilter::Decision::kChangeValue) { value_ = compaction_filter_value_; } else if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil) { *need_skip = true; compaction_filter_skip_until_.ConvertFromUserKey(kMaxSequenceNumber, kValueTypeForSeek); *skip_until = compaction_filter_skip_until_.Encode(); } } } void CompactionIterator::NextFromInput() { at_next_ = false; valid_ = false; while (!valid_ && input_->Valid() && !IsPausingManualCompaction() && !IsShuttingDown()) { key_ = input_->key(); value_ = input_->value(); iter_stats_.num_input_records++; if (!ParseInternalKey(key_, &ikey_)) { iter_stats_.num_input_corrupt_records++; // If `expect_valid_internal_key_` is false, return the corrupted key // and let the caller decide what to do with it. // TODO(noetzli): We should have a more elegant solution for this. if (expect_valid_internal_key_) { assert(!"Corrupted internal key not expected."); status_ = Status::Corruption("Corrupted internal key not expected."); break; } key_ = current_key_.SetInternalKey(key_); has_current_user_key_ = false; current_user_key_sequence_ = kMaxSequenceNumber; current_user_key_snapshot_ = 0; valid_ = true; break; } TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_); // Update input statistics if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion) { iter_stats_.num_input_deletion_records++; } iter_stats_.total_input_raw_key_bytes += key_.size(); iter_stats_.total_input_raw_value_bytes += value_.size(); // If need_skip is true, we should seek the input iterator // to internal key skip_until and continue from there. bool need_skip = false; // Points either into compaction_filter_skip_until_ or into // merge_helper_->compaction_filter_skip_until_. Slice skip_until; // Check whether the user key changed. After this if statement current_key_ // is a copy of the current input key (maybe converted to a delete by the // compaction filter). ikey_.user_key is pointing to the copy. if (!has_current_user_key_ || !cmp_->Equal(ikey_.user_key, current_user_key_)) { // First occurrence of this user key // Copy key for output key_ = current_key_.SetInternalKey(key_, &ikey_); current_user_key_ = ikey_.user_key; has_current_user_key_ = true; has_outputted_key_ = false; current_user_key_sequence_ = kMaxSequenceNumber; current_user_key_snapshot_ = 0; current_key_committed_ = KeyCommitted(ikey_.sequence); // Apply the compaction filter to the first committed version of the user // key. if (current_key_committed_) { InvokeFilterIfNeeded(&need_skip, &skip_until); } } else { // Update the current key to reflect the new sequence number/type without // copying the user key. // TODO(rven): Compaction filter does not process keys in this path // Need to have the compaction filter process multiple versions // if we have versions on both sides of a snapshot current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); key_ = current_key_.GetInternalKey(); ikey_.user_key = current_key_.GetUserKey(); // Note that newer version of a key is ordered before older versions. If a // newer version of a key is committed, so as the older version. No need // to query snapshot_checker_ in that case. if (UNLIKELY(!current_key_committed_)) { assert(snapshot_checker_ != nullptr); current_key_committed_ = KeyCommitted(ikey_.sequence); // Apply the compaction filter to the first committed version of the // user key. if (current_key_committed_) { InvokeFilterIfNeeded(&need_skip, &skip_until); } } } if (UNLIKELY(!current_key_committed_)) { assert(snapshot_checker_ != nullptr); valid_ = true; break; } // If there are no snapshots, then this kv affect visibility at tip. // Otherwise, search though all existing snapshots to find the earliest // snapshot that is affected by this kv. SequenceNumber last_sequence __attribute__((__unused__)); last_sequence = current_user_key_sequence_; current_user_key_sequence_ = ikey_.sequence; SequenceNumber last_snapshot = current_user_key_snapshot_; SequenceNumber prev_snapshot = 0; // 0 means no previous snapshot current_user_key_snapshot_ = visible_at_tip_ ? earliest_snapshot_ : findEarliestVisibleSnapshot(ikey_.sequence, &prev_snapshot); if (need_skip) { // This case is handled below. } else if (clear_and_output_next_key_) { // In the previous iteration we encountered a single delete that we could // not compact out. We will keep this Put, but can drop it's data. // (See Optimization 3, below.) assert(ikey_.type == kTypeValue); if (ikey_.type != kTypeValue) { ROCKS_LOG_FATAL(info_log_, "Unexpected key type %d for compaction output", ikey_.type); } assert(current_user_key_snapshot_ == last_snapshot); if (current_user_key_snapshot_ != last_snapshot) { ROCKS_LOG_FATAL(info_log_, "current_user_key_snapshot_ (%" PRIu64 ") != last_snapshot (%" PRIu64 ")", current_user_key_snapshot_, last_snapshot); } value_.clear(); valid_ = true; clear_and_output_next_key_ = false; } else if (ikey_.type == kTypeSingleDeletion) { // We can compact out a SingleDelete if: // 1) We encounter the corresponding PUT -OR- we know that this key // doesn't appear past this output level // =AND= // 2) We've already returned a record in this snapshot -OR- // there are no earlier earliest_write_conflict_snapshot. // // Rule 1 is needed for SingleDelete correctness. Rule 2 is needed to // allow Transactions to do write-conflict checking (if we compacted away // all keys, then we wouldn't know that a write happened in this // snapshot). If there is no earlier snapshot, then we know that there // are no active transactions that need to know about any writes. // // Optimization 3: // If we encounter a SingleDelete followed by a PUT and Rule 2 is NOT // true, then we must output a SingleDelete. In this case, we will decide // to also output the PUT. While we are compacting less by outputting the // PUT now, hopefully this will lead to better compaction in the future // when Rule 2 is later true (Ie, We are hoping we can later compact out // both the SingleDelete and the Put, while we couldn't if we only // outputted the SingleDelete now). // In this case, we can save space by removing the PUT's value as it will // never be read. // // Deletes and Merges are not supported on the same key that has a // SingleDelete as it is not possible to correctly do any partial // compaction of such a combination of operations. The result of mixing // those operations for a given key is documented as being undefined. So // we can choose how to handle such a combinations of operations. We will // try to compact out as much as we can in these cases. // We will report counts on these anomalous cases. // The easiest way to process a SingleDelete during iteration is to peek // ahead at the next key. ParsedInternalKey next_ikey; input_->Next(); // Check whether the next key exists, is not corrupt, and is the same key // as the single delete. if (input_->Valid() && ParseInternalKey(input_->key(), &next_ikey) && cmp_->Equal(ikey_.user_key, next_ikey.user_key)) { // Check whether the next key belongs to the same snapshot as the // SingleDelete. if (prev_snapshot == 0 || DEFINITELY_NOT_IN_SNAPSHOT(next_ikey.sequence, prev_snapshot)) { if (next_ikey.type == kTypeSingleDeletion) { // We encountered two SingleDeletes in a row. This could be due to // unexpected user input. // Skip the first SingleDelete and let the next iteration decide how // to handle the second SingleDelete // First SingleDelete has been skipped since we already called // input_->Next(). ++iter_stats_.num_record_drop_obsolete; ++iter_stats_.num_single_del_mismatch; } else if (has_outputted_key_ || DEFINITELY_IN_SNAPSHOT( ikey_.sequence, earliest_write_conflict_snapshot_)) { // Found a matching value, we can drop the single delete and the // value. It is safe to drop both records since we've already // outputted a key in this snapshot, or there is no earlier // snapshot (Rule 2 above). // Note: it doesn't matter whether the second key is a Put or if it // is an unexpected Merge or Delete. We will compact it out // either way. We will maintain counts of how many mismatches // happened if (next_ikey.type != kTypeValue && next_ikey.type != kTypeBlobIndex) { ++iter_stats_.num_single_del_mismatch; } ++iter_stats_.num_record_drop_hidden; ++iter_stats_.num_record_drop_obsolete; // Already called input_->Next() once. Call it a second time to // skip past the second key. input_->Next(); } else { // Found a matching value, but we cannot drop both keys since // there is an earlier snapshot and we need to leave behind a record // to know that a write happened in this snapshot (Rule 2 above). // Clear the value and output the SingleDelete. (The value will be // outputted on the next iteration.) // Setting valid_ to true will output the current SingleDelete valid_ = true; // Set up the Put to be outputted in the next iteration. // (Optimization 3). clear_and_output_next_key_ = true; } } else { // We hit the next snapshot without hitting a put, so the iterator // returns the single delete. valid_ = true; } } else { // We are at the end of the input, could not parse the next key, or hit // a different key. The iterator returns the single delete if the key // possibly exists beyond the current output level. We set // has_current_user_key to false so that if the iterator is at the next // key, we do not compare it again against the previous key at the next // iteration. If the next key is corrupt, we return before the // comparison, so the value of has_current_user_key does not matter. has_current_user_key_ = false; if (compaction_ != nullptr && IN_EARLIEST_SNAPSHOT(ikey_.sequence) && compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key, &level_ptrs_)) { // Key doesn't exist outside of this range. // Can compact out this SingleDelete. ++iter_stats_.num_record_drop_obsolete; ++iter_stats_.num_single_del_fallthru; if (!bottommost_level_) { ++iter_stats_.num_optimized_del_drop_obsolete; } } else { // Output SingleDelete valid_ = true; } } if (valid_) { at_next_ = true; } } else if (last_snapshot == current_user_key_snapshot_ || (last_snapshot > 0 && last_snapshot < current_user_key_snapshot_)) { // If the earliest snapshot is which this key is visible in // is the same as the visibility of a previous instance of the // same key, then this kv is not visible in any snapshot. // Hidden by an newer entry for same user key // // Note: Dropping this key will not affect TransactionDB write-conflict // checking since there has already been a record returned for this key // in this snapshot. assert(last_sequence >= current_user_key_sequence_); if (last_sequence < current_user_key_sequence_) { ROCKS_LOG_FATAL(info_log_, "last_sequence (%" PRIu64 ") < current_user_key_sequence_ (%" PRIu64 ")", last_sequence, current_user_key_sequence_); } ++iter_stats_.num_record_drop_hidden; // (A) input_->Next(); } else if (compaction_ != nullptr && ikey_.type == kTypeDeletion && IN_EARLIEST_SNAPSHOT(ikey_.sequence) && ikeyNotNeededForIncrementalSnapshot() && compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key, &level_ptrs_)) { // TODO(noetzli): This is the only place where we use compaction_ // (besides the constructor). We should probably get rid of this // dependency and find a way to do similar filtering during flushes. // // For this user key: // (1) there is no data in higher levels // (2) data in lower levels will have larger sequence numbers // (3) data in layers that are being compacted here and have // smaller sequence numbers will be dropped in the next // few iterations of this loop (by rule (A) above). // Therefore this deletion marker is obsolete and can be dropped. // // Note: Dropping this Delete will not affect TransactionDB // write-conflict checking since it is earlier than any snapshot. // // It seems that we can also drop deletion later than earliest snapshot // given that: // (1) The deletion is earlier than earliest_write_conflict_snapshot, and // (2) No value exist earlier than the deletion. ++iter_stats_.num_record_drop_obsolete; if (!bottommost_level_) { ++iter_stats_.num_optimized_del_drop_obsolete; } input_->Next(); } else if ((ikey_.type == kTypeDeletion) && bottommost_level_ && ikeyNotNeededForIncrementalSnapshot()) { // Handle the case where we have a delete key at the bottom most level // We can skip outputting the key iff there are no subsequent puts for this // key ParsedInternalKey next_ikey; input_->Next(); // Skip over all versions of this key that happen to occur in the same snapshot // range as the delete while (input_->Valid() && ParseInternalKey(input_->key(), &next_ikey) && cmp_->Equal(ikey_.user_key, next_ikey.user_key) && (prev_snapshot == 0 || DEFINITELY_NOT_IN_SNAPSHOT(next_ikey.sequence, prev_snapshot))) { input_->Next(); } // If you find you still need to output a row with this key, we need to output the // delete too if (input_->Valid() && ParseInternalKey(input_->key(), &next_ikey) && cmp_->Equal(ikey_.user_key, next_ikey.user_key)) { valid_ = true; at_next_ = true; } } else if (ikey_.type == kTypeMerge) { if (!merge_helper_->HasOperator()) { status_ = Status::InvalidArgument( "merge_operator is not properly initialized."); return; } pinned_iters_mgr_.StartPinning(); // We know the merge type entry is not hidden, otherwise we would // have hit (A) // We encapsulate the merge related state machine in a different // object to minimize change to the existing flow. Status s = merge_helper_->MergeUntil(input_, range_del_agg_, prev_snapshot, bottommost_level_); merge_out_iter_.SeekToFirst(); if (!s.ok() && !s.IsMergeInProgress()) { status_ = s; return; } else if (merge_out_iter_.Valid()) { // NOTE: key, value, and ikey_ refer to old entries. // These will be correctly set below. key_ = merge_out_iter_.key(); value_ = merge_out_iter_.value(); bool valid_key __attribute__((__unused__)); valid_key = ParseInternalKey(key_, &ikey_); // MergeUntil stops when it encounters a corrupt key and does not // include them in the result, so we expect the keys here to valid. assert(valid_key); if (!valid_key) { ROCKS_LOG_FATAL(info_log_, "Invalid key (%s) in compaction", key_.ToString(true).c_str()); } // Keep current_key_ in sync. current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); key_ = current_key_.GetInternalKey(); ikey_.user_key = current_key_.GetUserKey(); valid_ = true; } else { // all merge operands were filtered out. reset the user key, since the // batch consumed by the merge operator should not shadow any keys // coming after the merges has_current_user_key_ = false; pinned_iters_mgr_.ReleasePinnedData(); if (merge_helper_->FilteredUntil(&skip_until)) { need_skip = true; } } } else { // 1. new user key -OR- // 2. different snapshot stripe bool should_delete = range_del_agg_->ShouldDelete( key_, RangeDelPositioningMode::kForwardTraversal); if (should_delete) { ++iter_stats_.num_record_drop_hidden; ++iter_stats_.num_record_drop_range_del; input_->Next(); } else { valid_ = true; } } if (need_skip) { input_->Seek(skip_until); } } if (!valid_ && IsShuttingDown()) { status_ = Status::ShutdownInProgress(); } if (IsPausingManualCompaction()) { status_ = Status::Incomplete(Status::SubCode::kManualCompactionPaused); } } void CompactionIterator::PrepareOutput() { if (valid_) { if (compaction_filter_ && ikey_.type == kTypeBlobIndex) { const auto blob_decision = compaction_filter_->PrepareBlobOutput( user_key(), value_, &compaction_filter_value_); if (blob_decision == CompactionFilter::BlobDecision::kCorruption) { status_ = Status::Corruption( "Corrupted blob reference encountered during GC"); valid_ = false; } else if (blob_decision == CompactionFilter::BlobDecision::kIOError) { status_ = Status::IOError("Could not relocate blob during GC"); valid_ = false; } else if (blob_decision == CompactionFilter::BlobDecision::kChangeValue) { value_ = compaction_filter_value_; } } // Zeroing out the sequence number leads to better compression. // If this is the bottommost level (no files in lower levels) // and the earliest snapshot is larger than this seqno // and the userkey differs from the last userkey in compaction // then we can squash the seqno to zero. // // This is safe for TransactionDB write-conflict checking since transactions // only care about sequence number larger than any active snapshots. // // Can we do the same for levels above bottom level as long as // KeyNotExistsBeyondOutputLevel() return true? if (valid_ && compaction_ != nullptr && !compaction_->allow_ingest_behind() && ikeyNotNeededForIncrementalSnapshot() && bottommost_level_ && IN_EARLIEST_SNAPSHOT(ikey_.sequence) && ikey_.type != kTypeMerge) { assert(ikey_.type != kTypeDeletion && ikey_.type != kTypeSingleDeletion); if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion) { ROCKS_LOG_FATAL(info_log_, "Unexpected key type %d for seq-zero optimization", ikey_.type); } ikey_.sequence = 0; current_key_.UpdateInternalKey(0, ikey_.type); } } } inline SequenceNumber CompactionIterator::findEarliestVisibleSnapshot( SequenceNumber in, SequenceNumber* prev_snapshot) { assert(snapshots_->size()); if (snapshots_->size() == 0) { ROCKS_LOG_FATAL(info_log_, "No snapshot left in findEarliestVisibleSnapshot"); } auto snapshots_iter = std::lower_bound( snapshots_->begin(), snapshots_->end(), in); if (snapshots_iter == snapshots_->begin()) { *prev_snapshot = 0; } else { *prev_snapshot = *std::prev(snapshots_iter); assert(*prev_snapshot < in); if (*prev_snapshot >= in) { ROCKS_LOG_FATAL(info_log_, "*prev_snapshot >= in in findEarliestVisibleSnapshot"); } } if (snapshot_checker_ == nullptr) { return snapshots_iter != snapshots_->end() ? *snapshots_iter : kMaxSequenceNumber; } bool has_released_snapshot = !released_snapshots_.empty(); for (; snapshots_iter != snapshots_->end(); ++snapshots_iter) { auto cur = *snapshots_iter; assert(in <= cur); if (in > cur) { ROCKS_LOG_FATAL(info_log_, "in > cur in findEarliestVisibleSnapshot"); } // Skip if cur is in released_snapshots. if (has_released_snapshot && released_snapshots_.count(cur) > 0) { continue; } auto res = snapshot_checker_->CheckInSnapshot(in, cur); if (res == SnapshotCheckerResult::kInSnapshot) { return cur; } else if (res == SnapshotCheckerResult::kSnapshotReleased) { released_snapshots_.insert(cur); } *prev_snapshot = cur; } return kMaxSequenceNumber; } // used in 2 places - prevents deletion markers to be dropped if they may be // needed and disables seqnum zero-out in PrepareOutput for recent keys. inline bool CompactionIterator::ikeyNotNeededForIncrementalSnapshot() { return (!compaction_->preserve_deletes()) || (ikey_.sequence < preserve_deletes_seqnum_); } bool CompactionIterator::IsInEarliestSnapshot(SequenceNumber sequence) { assert(snapshot_checker_ != nullptr); bool pre_condition = (earliest_snapshot_ == kMaxSequenceNumber || (earliest_snapshot_iter_ != snapshots_->end() && *earliest_snapshot_iter_ == earliest_snapshot_)); assert(pre_condition); if (!pre_condition) { ROCKS_LOG_FATAL(info_log_, "Pre-Condition is not hold in IsInEarliestSnapshot"); } auto in_snapshot = snapshot_checker_->CheckInSnapshot(sequence, earliest_snapshot_); while (UNLIKELY(in_snapshot == SnapshotCheckerResult::kSnapshotReleased)) { // Avoid the the current earliest_snapshot_ being return as // earliest visible snapshot for the next value. So if a value's sequence // is zero-ed out by PrepareOutput(), the next value will be compact out. released_snapshots_.insert(earliest_snapshot_); earliest_snapshot_iter_++; if (earliest_snapshot_iter_ == snapshots_->end()) { earliest_snapshot_ = kMaxSequenceNumber; } else { earliest_snapshot_ = *earliest_snapshot_iter_; } in_snapshot = snapshot_checker_->CheckInSnapshot(sequence, earliest_snapshot_); } assert(in_snapshot != SnapshotCheckerResult::kSnapshotReleased); if (in_snapshot == SnapshotCheckerResult::kSnapshotReleased) { ROCKS_LOG_FATAL(info_log_, "Unexpected released snapshot in IsInEarliestSnapshot"); } return in_snapshot == SnapshotCheckerResult::kInSnapshot; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_iterator.h000066400000000000000000000227161370372246700222140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include "db/compaction/compaction.h" #include "db/compaction/compaction_iteration_stats.h" #include "db/merge_helper.h" #include "db/pinned_iterators_manager.h" #include "db/range_del_aggregator.h" #include "db/snapshot_checker.h" #include "options/cf_options.h" #include "rocksdb/compaction_filter.h" namespace ROCKSDB_NAMESPACE { class CompactionIterator { public: // A wrapper around Compaction. Has a much smaller interface, only what // CompactionIterator uses. Tests can override it. class CompactionProxy { public: explicit CompactionProxy(const Compaction* compaction) : compaction_(compaction) {} virtual ~CompactionProxy() = default; virtual int level(size_t /*compaction_input_level*/ = 0) const { return compaction_->level(); } virtual bool KeyNotExistsBeyondOutputLevel( const Slice& user_key, std::vector* level_ptrs) const { return compaction_->KeyNotExistsBeyondOutputLevel(user_key, level_ptrs); } virtual bool bottommost_level() const { return compaction_->bottommost_level(); } virtual int number_levels() const { return compaction_->number_levels(); } virtual Slice GetLargestUserKey() const { return compaction_->GetLargestUserKey(); } virtual bool allow_ingest_behind() const { return compaction_->immutable_cf_options()->allow_ingest_behind; } virtual bool preserve_deletes() const { return compaction_->immutable_cf_options()->preserve_deletes; } protected: CompactionProxy() = default; private: const Compaction* compaction_; }; CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber last_sequence, std::vector* snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, CompactionRangeDelAggregator* range_del_agg, const Compaction* compaction = nullptr, const CompactionFilter* compaction_filter = nullptr, const std::atomic* shutting_down = nullptr, const SequenceNumber preserve_deletes_seqnum = 0, const std::atomic* manual_compaction_paused = nullptr, const std::shared_ptr info_log = nullptr); // Constructor with custom CompactionProxy, used for tests. CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber last_sequence, std::vector* snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, CompactionRangeDelAggregator* range_del_agg, std::unique_ptr compaction, const CompactionFilter* compaction_filter = nullptr, const std::atomic* shutting_down = nullptr, const SequenceNumber preserve_deletes_seqnum = 0, const std::atomic* manual_compaction_paused = nullptr, const std::shared_ptr info_log = nullptr); ~CompactionIterator(); void ResetRecordCounts(); // Seek to the beginning of the compaction iterator output. // // REQUIRED: Call only once. void SeekToFirst(); // Produces the next record in the compaction. // // REQUIRED: SeekToFirst() has been called. void Next(); // Getters const Slice& key() const { return key_; } const Slice& value() const { return value_; } const Status& status() const { return status_; } const ParsedInternalKey& ikey() const { return ikey_; } bool Valid() const { return valid_; } const Slice& user_key() const { return current_user_key_; } const CompactionIterationStats& iter_stats() const { return iter_stats_; } private: // Processes the input stream to find the next output void NextFromInput(); // Do last preparations before presenting the output to the callee. At this // point this only zeroes out the sequence number if possible for better // compression. void PrepareOutput(); // Invoke compaction filter if needed. void InvokeFilterIfNeeded(bool* need_skip, Slice* skip_until); // Given a sequence number, return the sequence number of the // earliest snapshot that this sequence number is visible in. // The snapshots themselves are arranged in ascending order of // sequence numbers. // Employ a sequential search because the total number of // snapshots are typically small. inline SequenceNumber findEarliestVisibleSnapshot( SequenceNumber in, SequenceNumber* prev_snapshot); // Checks whether the currently seen ikey_ is needed for // incremental (differential) snapshot and hence can't be dropped // or seqnum be zero-ed out even if all other conditions for it are met. inline bool ikeyNotNeededForIncrementalSnapshot(); inline bool KeyCommitted(SequenceNumber sequence) { return snapshot_checker_ == nullptr || snapshot_checker_->CheckInSnapshot(sequence, kMaxSequenceNumber) == SnapshotCheckerResult::kInSnapshot; } bool IsInEarliestSnapshot(SequenceNumber sequence); InternalIterator* input_; const Comparator* cmp_; MergeHelper* merge_helper_; const std::vector* snapshots_; // List of snapshots released during compaction. // findEarliestVisibleSnapshot() find them out from return of // snapshot_checker, and make sure they will not be returned as // earliest visible snapshot of an older value. // See WritePreparedTransactionTest::ReleaseSnapshotDuringCompaction3. std::unordered_set released_snapshots_; std::vector::const_iterator earliest_snapshot_iter_; const SequenceNumber earliest_write_conflict_snapshot_; const SnapshotChecker* const snapshot_checker_; Env* env_; bool report_detailed_time_; bool expect_valid_internal_key_; CompactionRangeDelAggregator* range_del_agg_; std::unique_ptr compaction_; const CompactionFilter* compaction_filter_; const std::atomic* shutting_down_; const std::atomic* manual_compaction_paused_; const SequenceNumber preserve_deletes_seqnum_; bool bottommost_level_; bool valid_ = false; bool visible_at_tip_; SequenceNumber earliest_snapshot_; SequenceNumber latest_snapshot_; // State // // Points to a copy of the current compaction iterator output (current_key_) // if valid_. Slice key_; // Points to the value in the underlying iterator that corresponds to the // current output. Slice value_; // The status is OK unless compaction iterator encounters a merge operand // while not having a merge operator defined. Status status_; // Stores the user key, sequence number and type of the current compaction // iterator output (or current key in the underlying iterator during // NextFromInput()). ParsedInternalKey ikey_; // Stores whether ikey_.user_key is valid. If set to false, the user key is // not compared against the current key in the underlying iterator. bool has_current_user_key_ = false; bool at_next_ = false; // If false, the iterator // Holds a copy of the current compaction iterator output (or current key in // the underlying iterator during NextFromInput()). IterKey current_key_; Slice current_user_key_; SequenceNumber current_user_key_sequence_; SequenceNumber current_user_key_snapshot_; // True if the iterator has already returned a record for the current key. bool has_outputted_key_ = false; // truncated the value of the next key and output it without applying any // compaction rules. This is used for outputting a put after a single delete. bool clear_and_output_next_key_ = false; MergeOutputIterator merge_out_iter_; // PinnedIteratorsManager used to pin input_ Iterator blocks while reading // merge operands and then releasing them after consuming them. PinnedIteratorsManager pinned_iters_mgr_; std::string compaction_filter_value_; InternalKey compaction_filter_skip_until_; // "level_ptrs" holds indices that remember which file of an associated // level we were last checking during the last call to compaction-> // KeyNotExistsBeyondOutputLevel(). This allows future calls to the function // to pick off where it left off since each subcompaction's key range is // increasing so a later call to the function must be looking for a key that // is in or beyond the last file checked during the previous call std::vector level_ptrs_; CompactionIterationStats iter_stats_; // Used to avoid purging uncommitted values. The application can specify // uncommitted values by providing a SnapshotChecker object. bool current_key_committed_; std::shared_ptr info_log_; bool IsShuttingDown() { // This is a best-effort facility, so memory_order_relaxed is sufficient. return shutting_down_ && shutting_down_->load(std::memory_order_relaxed); } bool IsPausingManualCompaction() { // This is a best-effort facility, so memory_order_relaxed is sufficient. return manual_compaction_paused_ && manual_compaction_paused_->load(std::memory_order_relaxed); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_iterator_test.cc000066400000000000000000001105341370372246700234050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include "db/compaction/compaction_iterator.h" #include "port/port.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { // Expects no merging attempts. class NoMergingMergeOp : public MergeOperator { public: bool FullMergeV2(const MergeOperationInput& /*merge_in*/, MergeOperationOutput* /*merge_out*/) const override { ADD_FAILURE(); return false; } bool PartialMergeMulti(const Slice& /*key*/, const std::deque& /*operand_list*/, std::string* /*new_value*/, Logger* /*logger*/) const override { ADD_FAILURE(); return false; } const char* Name() const override { return "CompactionIteratorTest NoMergingMergeOp"; } }; // Compaction filter that gets stuck when it sees a particular key, // then gets unstuck when told to. // Always returns Decition::kRemove. class StallingFilter : public CompactionFilter { public: Decision FilterV2(int /*level*/, const Slice& key, ValueType /*type*/, const Slice& /*existing_value*/, std::string* /*new_value*/, std::string* /*skip_until*/) const override { int k = std::atoi(key.ToString().c_str()); last_seen.store(k); while (k >= stall_at.load()) { std::this_thread::yield(); } return Decision::kRemove; } const char* Name() const override { return "CompactionIteratorTest StallingFilter"; } // Wait until the filter sees a key >= k and stalls at that key. // If `exact`, asserts that the seen key is equal to k. void WaitForStall(int k, bool exact = true) { stall_at.store(k); while (last_seen.load() < k) { std::this_thread::yield(); } if (exact) { EXPECT_EQ(k, last_seen.load()); } } // Filter will stall on key >= stall_at. Advance stall_at to unstall. mutable std::atomic stall_at{0}; // Last key the filter was called with. mutable std::atomic last_seen{0}; }; // Compaction filter that filter out all keys. class FilterAllKeysCompactionFilter : public CompactionFilter { public: Decision FilterV2(int /*level*/, const Slice& /*key*/, ValueType /*type*/, const Slice& /*existing_value*/, std::string* /*new_value*/, std::string* /*skip_until*/) const override { return Decision::kRemove; } const char* Name() const override { return "AllKeysCompactionFilter"; } }; class LoggingForwardVectorIterator : public InternalIterator { public: struct Action { enum class Type { SEEK_TO_FIRST, SEEK, NEXT, }; Type type; std::string arg; explicit Action(Type _type, std::string _arg = "") : type(_type), arg(_arg) {} bool operator==(const Action& rhs) const { return std::tie(type, arg) == std::tie(rhs.type, rhs.arg); } }; LoggingForwardVectorIterator(const std::vector& keys, const std::vector& values) : keys_(keys), values_(values), current_(keys.size()) { assert(keys_.size() == values_.size()); } bool Valid() const override { return current_ < keys_.size(); } void SeekToFirst() override { log.emplace_back(Action::Type::SEEK_TO_FIRST); current_ = 0; } void SeekToLast() override { assert(false); } void Seek(const Slice& target) override { log.emplace_back(Action::Type::SEEK, target.ToString()); current_ = std::lower_bound(keys_.begin(), keys_.end(), target.ToString()) - keys_.begin(); } void SeekForPrev(const Slice& /*target*/) override { assert(false); } void Next() override { assert(Valid()); log.emplace_back(Action::Type::NEXT); current_++; } void Prev() override { assert(false); } Slice key() const override { assert(Valid()); return Slice(keys_[current_]); } Slice value() const override { assert(Valid()); return Slice(values_[current_]); } Status status() const override { return Status::OK(); } std::vector log; private: std::vector keys_; std::vector values_; size_t current_; }; class FakeCompaction : public CompactionIterator::CompactionProxy { public: FakeCompaction() = default; int level(size_t /*compaction_input_level*/) const override { return 0; } bool KeyNotExistsBeyondOutputLevel( const Slice& /*user_key*/, std::vector* /*level_ptrs*/) const override { return is_bottommost_level || key_not_exists_beyond_output_level; } bool bottommost_level() const override { return is_bottommost_level; } int number_levels() const override { return 1; } Slice GetLargestUserKey() const override { return "\xff\xff\xff\xff\xff\xff\xff\xff\xff"; } bool allow_ingest_behind() const override { return false; } bool preserve_deletes() const override { return false; } bool key_not_exists_beyond_output_level = false; bool is_bottommost_level = false; }; // A simplifed snapshot checker which assumes each snapshot has a global // last visible sequence. class TestSnapshotChecker : public SnapshotChecker { public: explicit TestSnapshotChecker( SequenceNumber last_committed_sequence, const std::unordered_map& snapshots = {{}}) : last_committed_sequence_(last_committed_sequence), snapshots_(snapshots) {} SnapshotCheckerResult CheckInSnapshot( SequenceNumber seq, SequenceNumber snapshot_seq) const override { if (snapshot_seq == kMaxSequenceNumber) { return seq <= last_committed_sequence_ ? SnapshotCheckerResult::kInSnapshot : SnapshotCheckerResult::kNotInSnapshot; } assert(snapshots_.count(snapshot_seq) > 0); return seq <= snapshots_.at(snapshot_seq) ? SnapshotCheckerResult::kInSnapshot : SnapshotCheckerResult::kNotInSnapshot; } private: SequenceNumber last_committed_sequence_; // A map of valid snapshot to last visible sequence to the snapshot. std::unordered_map snapshots_; }; // Test param: // bool: whether to pass snapshot_checker to compaction iterator. class CompactionIteratorTest : public testing::TestWithParam { public: CompactionIteratorTest() : cmp_(BytewiseComparator()), icmp_(cmp_), snapshots_({}) {} void InitIterators( const std::vector& ks, const std::vector& vs, const std::vector& range_del_ks, const std::vector& range_del_vs, SequenceNumber last_sequence, SequenceNumber last_committed_sequence = kMaxSequenceNumber, MergeOperator* merge_op = nullptr, CompactionFilter* filter = nullptr, bool bottommost_level = false, SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber) { std::unique_ptr unfragmented_range_del_iter( new test::VectorIterator(range_del_ks, range_del_vs)); auto tombstone_list = std::make_shared( std::move(unfragmented_range_del_iter), icmp_); std::unique_ptr range_del_iter( new FragmentedRangeTombstoneIterator(tombstone_list, icmp_, kMaxSequenceNumber)); range_del_agg_.reset(new CompactionRangeDelAggregator(&icmp_, snapshots_)); range_del_agg_->AddTombstones(std::move(range_del_iter)); std::unique_ptr compaction; if (filter || bottommost_level) { compaction_proxy_ = new FakeCompaction(); compaction_proxy_->is_bottommost_level = bottommost_level; compaction.reset(compaction_proxy_); } bool use_snapshot_checker = UseSnapshotChecker() || GetParam(); if (use_snapshot_checker || last_committed_sequence < kMaxSequenceNumber) { snapshot_checker_.reset( new TestSnapshotChecker(last_committed_sequence, snapshot_map_)); } merge_helper_.reset( new MergeHelper(Env::Default(), cmp_, merge_op, filter, nullptr, false, 0 /*latest_snapshot*/, snapshot_checker_.get(), 0 /*level*/, nullptr /*statistics*/, &shutting_down_)); iter_.reset(new LoggingForwardVectorIterator(ks, vs)); iter_->SeekToFirst(); c_iter_.reset(new CompactionIterator( iter_.get(), cmp_, merge_helper_.get(), last_sequence, &snapshots_, earliest_write_conflict_snapshot, snapshot_checker_.get(), Env::Default(), false /* report_detailed_time */, false, range_del_agg_.get(), std::move(compaction), filter, &shutting_down_)); } void AddSnapshot(SequenceNumber snapshot, SequenceNumber last_visible_seq = kMaxSequenceNumber) { snapshots_.push_back(snapshot); snapshot_map_[snapshot] = last_visible_seq; } virtual bool UseSnapshotChecker() const { return false; } void RunTest( const std::vector& input_keys, const std::vector& input_values, const std::vector& expected_keys, const std::vector& expected_values, SequenceNumber last_committed_seq = kMaxSequenceNumber, MergeOperator* merge_operator = nullptr, CompactionFilter* compaction_filter = nullptr, bool bottommost_level = false, SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber) { InitIterators(input_keys, input_values, {}, {}, kMaxSequenceNumber, last_committed_seq, merge_operator, compaction_filter, bottommost_level, earliest_write_conflict_snapshot); c_iter_->SeekToFirst(); for (size_t i = 0; i < expected_keys.size(); i++) { std::string info = "i = " + ToString(i); ASSERT_TRUE(c_iter_->Valid()) << info; ASSERT_OK(c_iter_->status()) << info; ASSERT_EQ(expected_keys[i], c_iter_->key().ToString()) << info; ASSERT_EQ(expected_values[i], c_iter_->value().ToString()) << info; c_iter_->Next(); } ASSERT_FALSE(c_iter_->Valid()); } const Comparator* cmp_; const InternalKeyComparator icmp_; std::vector snapshots_; // A map of valid snapshot to last visible sequence to the snapshot. std::unordered_map snapshot_map_; std::unique_ptr merge_helper_; std::unique_ptr iter_; std::unique_ptr c_iter_; std::unique_ptr range_del_agg_; std::unique_ptr snapshot_checker_; std::atomic shutting_down_{false}; FakeCompaction* compaction_proxy_; }; // It is possible that the output of the compaction iterator is empty even if // the input is not. TEST_P(CompactionIteratorTest, EmptyResult) { InitIterators({test::KeyStr("a", 5, kTypeSingleDeletion), test::KeyStr("a", 3, kTypeValue)}, {"", "val"}, {}, {}, 5); c_iter_->SeekToFirst(); ASSERT_FALSE(c_iter_->Valid()); } // If there is a corruption after a single deletion, the corrupted key should // be preserved. TEST_P(CompactionIteratorTest, CorruptionAfterSingleDeletion) { InitIterators({test::KeyStr("a", 5, kTypeSingleDeletion), test::KeyStr("a", 3, kTypeValue, true), test::KeyStr("b", 10, kTypeValue)}, {"", "val", "val2"}, {}, {}, 10); c_iter_->SeekToFirst(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("a", 5, kTypeSingleDeletion), c_iter_->key().ToString()); c_iter_->Next(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("a", 3, kTypeValue, true), c_iter_->key().ToString()); c_iter_->Next(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("b", 10, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); ASSERT_FALSE(c_iter_->Valid()); } TEST_P(CompactionIteratorTest, SimpleRangeDeletion) { InitIterators({test::KeyStr("morning", 5, kTypeValue), test::KeyStr("morning", 2, kTypeValue), test::KeyStr("night", 3, kTypeValue)}, {"zao", "zao", "wan"}, {test::KeyStr("ma", 4, kTypeRangeDeletion)}, {"mz"}, 5); c_iter_->SeekToFirst(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("morning", 5, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("night", 3, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); ASSERT_FALSE(c_iter_->Valid()); } TEST_P(CompactionIteratorTest, RangeDeletionWithSnapshots) { AddSnapshot(10); std::vector ks1; ks1.push_back(test::KeyStr("ma", 28, kTypeRangeDeletion)); std::vector vs1{"mz"}; std::vector ks2{test::KeyStr("morning", 15, kTypeValue), test::KeyStr("morning", 5, kTypeValue), test::KeyStr("night", 40, kTypeValue), test::KeyStr("night", 20, kTypeValue)}; std::vector vs2{"zao 15", "zao 5", "wan 40", "wan 20"}; InitIterators(ks2, vs2, ks1, vs1, 40); c_iter_->SeekToFirst(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("morning", 5, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("night", 40, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); ASSERT_FALSE(c_iter_->Valid()); } TEST_P(CompactionIteratorTest, CompactionFilterSkipUntil) { class Filter : public CompactionFilter { Decision FilterV2(int /*level*/, const Slice& key, ValueType t, const Slice& existing_value, std::string* /*new_value*/, std::string* skip_until) const override { std::string k = key.ToString(); std::string v = existing_value.ToString(); // See InitIterators() call below for the sequence of keys and their // filtering decisions. Here we closely assert that compaction filter is // called with the expected keys and only them, and with the right values. if (k == "a") { EXPECT_EQ(ValueType::kValue, t); EXPECT_EQ("av50", v); return Decision::kKeep; } if (k == "b") { EXPECT_EQ(ValueType::kValue, t); EXPECT_EQ("bv60", v); *skip_until = "d+"; return Decision::kRemoveAndSkipUntil; } if (k == "e") { EXPECT_EQ(ValueType::kMergeOperand, t); EXPECT_EQ("em71", v); return Decision::kKeep; } if (k == "f") { if (v == "fm65") { EXPECT_EQ(ValueType::kMergeOperand, t); *skip_until = "f"; } else { EXPECT_EQ("fm30", v); EXPECT_EQ(ValueType::kMergeOperand, t); *skip_until = "g+"; } return Decision::kRemoveAndSkipUntil; } if (k == "h") { EXPECT_EQ(ValueType::kValue, t); EXPECT_EQ("hv91", v); return Decision::kKeep; } if (k == "i") { EXPECT_EQ(ValueType::kMergeOperand, t); EXPECT_EQ("im95", v); *skip_until = "z"; return Decision::kRemoveAndSkipUntil; } ADD_FAILURE(); return Decision::kKeep; } const char* Name() const override { return "CompactionIteratorTest.CompactionFilterSkipUntil::Filter"; } }; NoMergingMergeOp merge_op; Filter filter; InitIterators( {test::KeyStr("a", 50, kTypeValue), // keep test::KeyStr("a", 45, kTypeMerge), test::KeyStr("b", 60, kTypeValue), // skip to "d+" test::KeyStr("b", 40, kTypeValue), test::KeyStr("c", 35, kTypeValue), test::KeyStr("d", 70, kTypeMerge), test::KeyStr("e", 71, kTypeMerge), // keep test::KeyStr("f", 65, kTypeMerge), // skip to "f", aka keep test::KeyStr("f", 30, kTypeMerge), // skip to "g+" test::KeyStr("f", 25, kTypeValue), test::KeyStr("g", 90, kTypeValue), test::KeyStr("h", 91, kTypeValue), // keep test::KeyStr("i", 95, kTypeMerge), // skip to "z" test::KeyStr("j", 99, kTypeValue)}, {"av50", "am45", "bv60", "bv40", "cv35", "dm70", "em71", "fm65", "fm30", "fv25", "gv90", "hv91", "im95", "jv99"}, {}, {}, kMaxSequenceNumber, kMaxSequenceNumber, &merge_op, &filter); // Compaction should output just "a", "e" and "h" keys. c_iter_->SeekToFirst(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("a", 50, kTypeValue), c_iter_->key().ToString()); ASSERT_EQ("av50", c_iter_->value().ToString()); c_iter_->Next(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("e", 71, kTypeMerge), c_iter_->key().ToString()); ASSERT_EQ("em71", c_iter_->value().ToString()); c_iter_->Next(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("h", 91, kTypeValue), c_iter_->key().ToString()); ASSERT_EQ("hv91", c_iter_->value().ToString()); c_iter_->Next(); ASSERT_FALSE(c_iter_->Valid()); // Check that the compaction iterator did the correct sequence of calls on // the underlying iterator. using A = LoggingForwardVectorIterator::Action; using T = A::Type; std::vector expected_actions = { A(T::SEEK_TO_FIRST), A(T::NEXT), A(T::NEXT), A(T::SEEK, test::KeyStr("d+", kMaxSequenceNumber, kValueTypeForSeek)), A(T::NEXT), A(T::NEXT), A(T::SEEK, test::KeyStr("g+", kMaxSequenceNumber, kValueTypeForSeek)), A(T::NEXT), A(T::SEEK, test::KeyStr("z", kMaxSequenceNumber, kValueTypeForSeek))}; ASSERT_EQ(expected_actions, iter_->log); } TEST_P(CompactionIteratorTest, ShuttingDownInFilter) { NoMergingMergeOp merge_op; StallingFilter filter; InitIterators( {test::KeyStr("1", 1, kTypeValue), test::KeyStr("2", 2, kTypeValue), test::KeyStr("3", 3, kTypeValue), test::KeyStr("4", 4, kTypeValue)}, {"v1", "v2", "v3", "v4"}, {}, {}, kMaxSequenceNumber, kMaxSequenceNumber, &merge_op, &filter); // Don't leave tombstones (kTypeDeletion) for filtered keys. compaction_proxy_->key_not_exists_beyond_output_level = true; std::atomic seek_done{false}; ROCKSDB_NAMESPACE::port::Thread compaction_thread([&] { c_iter_->SeekToFirst(); EXPECT_FALSE(c_iter_->Valid()); EXPECT_TRUE(c_iter_->status().IsShutdownInProgress()); seek_done.store(true); }); // Let key 1 through. filter.WaitForStall(1); // Shutdown during compaction filter call for key 2. filter.WaitForStall(2); shutting_down_.store(true); EXPECT_FALSE(seek_done.load()); // Unstall filter and wait for SeekToFirst() to return. filter.stall_at.store(3); compaction_thread.join(); assert(seek_done.load()); // Check that filter was never called again. EXPECT_EQ(2, filter.last_seen.load()); } // Same as ShuttingDownInFilter, but shutdown happens during filter call for // a merge operand, not for a value. TEST_P(CompactionIteratorTest, ShuttingDownInMerge) { NoMergingMergeOp merge_op; StallingFilter filter; InitIterators( {test::KeyStr("1", 1, kTypeValue), test::KeyStr("2", 2, kTypeMerge), test::KeyStr("3", 3, kTypeMerge), test::KeyStr("4", 4, kTypeValue)}, {"v1", "v2", "v3", "v4"}, {}, {}, kMaxSequenceNumber, kMaxSequenceNumber, &merge_op, &filter); compaction_proxy_->key_not_exists_beyond_output_level = true; std::atomic seek_done{false}; ROCKSDB_NAMESPACE::port::Thread compaction_thread([&] { c_iter_->SeekToFirst(); ASSERT_FALSE(c_iter_->Valid()); ASSERT_TRUE(c_iter_->status().IsShutdownInProgress()); seek_done.store(true); }); // Let key 1 through. filter.WaitForStall(1); // Shutdown during compaction filter call for key 2. filter.WaitForStall(2); shutting_down_.store(true); EXPECT_FALSE(seek_done.load()); // Unstall filter and wait for SeekToFirst() to return. filter.stall_at.store(3); compaction_thread.join(); assert(seek_done.load()); // Check that filter was never called again. EXPECT_EQ(2, filter.last_seen.load()); } TEST_P(CompactionIteratorTest, SingleMergeOperand) { class Filter : public CompactionFilter { Decision FilterV2(int /*level*/, const Slice& key, ValueType t, const Slice& existing_value, std::string* /*new_value*/, std::string* /*skip_until*/) const override { std::string k = key.ToString(); std::string v = existing_value.ToString(); // See InitIterators() call below for the sequence of keys and their // filtering decisions. Here we closely assert that compaction filter is // called with the expected keys and only them, and with the right values. if (k == "a") { EXPECT_EQ(ValueType::kMergeOperand, t); EXPECT_EQ("av1", v); return Decision::kKeep; } else if (k == "b") { EXPECT_EQ(ValueType::kMergeOperand, t); return Decision::kKeep; } else if (k == "c") { return Decision::kKeep; } ADD_FAILURE(); return Decision::kKeep; } const char* Name() const override { return "CompactionIteratorTest.SingleMergeOperand::Filter"; } }; class SingleMergeOp : public MergeOperator { public: bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override { // See InitIterators() call below for why "c" is the only key for which // FullMergeV2 should be called. EXPECT_EQ("c", merge_in.key.ToString()); std::string temp_value; if (merge_in.existing_value != nullptr) { temp_value = merge_in.existing_value->ToString(); } for (auto& operand : merge_in.operand_list) { temp_value.append(operand.ToString()); } merge_out->new_value = temp_value; return true; } bool PartialMergeMulti(const Slice& key, const std::deque& operand_list, std::string* new_value, Logger* /*logger*/) const override { std::string string_key = key.ToString(); EXPECT_TRUE(string_key == "a" || string_key == "b"); if (string_key == "a") { EXPECT_EQ(1, operand_list.size()); } else if (string_key == "b") { EXPECT_EQ(2, operand_list.size()); } std::string temp_value; for (auto& operand : operand_list) { temp_value.append(operand.ToString()); } swap(temp_value, *new_value); return true; } const char* Name() const override { return "CompactionIteratorTest SingleMergeOp"; } bool AllowSingleOperand() const override { return true; } }; SingleMergeOp merge_op; Filter filter; InitIterators( // a should invoke PartialMergeMulti with a single merge operand. {test::KeyStr("a", 50, kTypeMerge), // b should invoke PartialMergeMulti with two operands. test::KeyStr("b", 70, kTypeMerge), test::KeyStr("b", 60, kTypeMerge), // c should invoke FullMerge due to kTypeValue at the beginning. test::KeyStr("c", 90, kTypeMerge), test::KeyStr("c", 80, kTypeValue)}, {"av1", "bv2", "bv1", "cv2", "cv1"}, {}, {}, kMaxSequenceNumber, kMaxSequenceNumber, &merge_op, &filter); c_iter_->SeekToFirst(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), c_iter_->key().ToString()); ASSERT_EQ("av1", c_iter_->value().ToString()); c_iter_->Next(); ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ("bv1bv2", c_iter_->value().ToString()); c_iter_->Next(); ASSERT_EQ("cv1cv2", c_iter_->value().ToString()); } // In bottommost level, values earlier than earliest snapshot can be output // with sequence = 0. TEST_P(CompactionIteratorTest, ZeroOutSequenceAtBottomLevel) { AddSnapshot(1); RunTest({test::KeyStr("a", 1, kTypeValue), test::KeyStr("b", 2, kTypeValue)}, {"v1", "v2"}, {test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 2, kTypeValue)}, {"v1", "v2"}, kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } // In bottommost level, deletions earlier than earliest snapshot can be removed // permanently. TEST_P(CompactionIteratorTest, RemoveDeletionAtBottomLevel) { AddSnapshot(1); RunTest({test::KeyStr("a", 1, kTypeDeletion), test::KeyStr("b", 3, kTypeDeletion), test::KeyStr("b", 1, kTypeValue)}, {"", "", ""}, {test::KeyStr("b", 3, kTypeDeletion), test::KeyStr("b", 0, kTypeValue)}, {"", ""}, kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } // In bottommost level, single deletions earlier than earliest snapshot can be // removed permanently. TEST_P(CompactionIteratorTest, RemoveSingleDeletionAtBottomLevel) { AddSnapshot(1); RunTest({test::KeyStr("a", 1, kTypeSingleDeletion), test::KeyStr("b", 2, kTypeSingleDeletion)}, {"", ""}, {test::KeyStr("b", 2, kTypeSingleDeletion)}, {""}, kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } INSTANTIATE_TEST_CASE_P(CompactionIteratorTestInstance, CompactionIteratorTest, testing::Values(true, false)); // Tests how CompactionIterator work together with SnapshotChecker. class CompactionIteratorWithSnapshotCheckerTest : public CompactionIteratorTest { public: bool UseSnapshotChecker() const override { return true; } }; // Uncommitted keys (keys with seq > last_committed_seq) should be output as-is // while committed version of these keys should get compacted as usual. TEST_F(CompactionIteratorWithSnapshotCheckerTest, PreserveUncommittedKeys_Value) { RunTest( {test::KeyStr("foo", 3, kTypeValue), test::KeyStr("foo", 2, kTypeValue), test::KeyStr("foo", 1, kTypeValue)}, {"v3", "v2", "v1"}, {test::KeyStr("foo", 3, kTypeValue), test::KeyStr("foo", 2, kTypeValue)}, {"v3", "v2"}, 2 /*last_committed_seq*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, PreserveUncommittedKeys_Deletion) { RunTest({test::KeyStr("foo", 2, kTypeDeletion), test::KeyStr("foo", 1, kTypeValue)}, {"", "v1"}, {test::KeyStr("foo", 2, kTypeDeletion), test::KeyStr("foo", 1, kTypeValue)}, {"", "v1"}, 1 /*last_committed_seq*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, PreserveUncommittedKeys_Merge) { auto merge_op = MergeOperators::CreateStringAppendOperator(); RunTest( {test::KeyStr("foo", 3, kTypeMerge), test::KeyStr("foo", 2, kTypeMerge), test::KeyStr("foo", 1, kTypeValue)}, {"v3", "v2", "v1"}, {test::KeyStr("foo", 3, kTypeMerge), test::KeyStr("foo", 2, kTypeValue)}, {"v3", "v1,v2"}, 2 /*last_committed_seq*/, merge_op.get()); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, PreserveUncommittedKeys_SingleDelete) { RunTest({test::KeyStr("foo", 2, kTypeSingleDeletion), test::KeyStr("foo", 1, kTypeValue)}, {"", "v1"}, {test::KeyStr("foo", 2, kTypeSingleDeletion), test::KeyStr("foo", 1, kTypeValue)}, {"", "v1"}, 1 /*last_committed_seq*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, PreserveUncommittedKeys_BlobIndex) { RunTest({test::KeyStr("foo", 3, kTypeBlobIndex), test::KeyStr("foo", 2, kTypeBlobIndex), test::KeyStr("foo", 1, kTypeBlobIndex)}, {"v3", "v2", "v1"}, {test::KeyStr("foo", 3, kTypeBlobIndex), test::KeyStr("foo", 2, kTypeBlobIndex)}, {"v3", "v2"}, 2 /*last_committed_seq*/); } // Test compaction iterator dedup keys visible to the same snapshot. TEST_F(CompactionIteratorWithSnapshotCheckerTest, DedupSameSnapshot_Value) { AddSnapshot(2, 1); RunTest( {test::KeyStr("foo", 4, kTypeValue), test::KeyStr("foo", 3, kTypeValue), test::KeyStr("foo", 2, kTypeValue), test::KeyStr("foo", 1, kTypeValue)}, {"v4", "v3", "v2", "v1"}, {test::KeyStr("foo", 4, kTypeValue), test::KeyStr("foo", 3, kTypeValue), test::KeyStr("foo", 1, kTypeValue)}, {"v4", "v3", "v1"}, 3 /*last_committed_seq*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, DedupSameSnapshot_Deletion) { AddSnapshot(2, 1); RunTest( {test::KeyStr("foo", 4, kTypeValue), test::KeyStr("foo", 3, kTypeDeletion), test::KeyStr("foo", 2, kTypeValue), test::KeyStr("foo", 1, kTypeValue)}, {"v4", "", "v2", "v1"}, {test::KeyStr("foo", 4, kTypeValue), test::KeyStr("foo", 3, kTypeDeletion), test::KeyStr("foo", 1, kTypeValue)}, {"v4", "", "v1"}, 3 /*last_committed_seq*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, DedupSameSnapshot_Merge) { AddSnapshot(2, 1); AddSnapshot(4, 3); auto merge_op = MergeOperators::CreateStringAppendOperator(); RunTest( {test::KeyStr("foo", 5, kTypeMerge), test::KeyStr("foo", 4, kTypeMerge), test::KeyStr("foo", 3, kTypeMerge), test::KeyStr("foo", 2, kTypeMerge), test::KeyStr("foo", 1, kTypeValue)}, {"v5", "v4", "v3", "v2", "v1"}, {test::KeyStr("foo", 5, kTypeMerge), test::KeyStr("foo", 4, kTypeMerge), test::KeyStr("foo", 3, kTypeMerge), test::KeyStr("foo", 1, kTypeValue)}, {"v5", "v4", "v2,v3", "v1"}, 4 /*last_committed_seq*/, merge_op.get()); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, DedupSameSnapshot_SingleDeletion) { AddSnapshot(2, 1); RunTest( {test::KeyStr("foo", 4, kTypeValue), test::KeyStr("foo", 3, kTypeSingleDeletion), test::KeyStr("foo", 2, kTypeValue), test::KeyStr("foo", 1, kTypeValue)}, {"v4", "", "v2", "v1"}, {test::KeyStr("foo", 4, kTypeValue), test::KeyStr("foo", 1, kTypeValue)}, {"v4", "v1"}, 3 /*last_committed_seq*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, DedupSameSnapshot_BlobIndex) { AddSnapshot(2, 1); RunTest({test::KeyStr("foo", 4, kTypeBlobIndex), test::KeyStr("foo", 3, kTypeBlobIndex), test::KeyStr("foo", 2, kTypeBlobIndex), test::KeyStr("foo", 1, kTypeBlobIndex)}, {"v4", "v3", "v2", "v1"}, {test::KeyStr("foo", 4, kTypeBlobIndex), test::KeyStr("foo", 3, kTypeBlobIndex), test::KeyStr("foo", 1, kTypeBlobIndex)}, {"v4", "v3", "v1"}, 3 /*last_committed_seq*/); } // At bottom level, sequence numbers can be zero out, and deletions can be // removed, but only when they are visible to earliest snapshot. TEST_F(CompactionIteratorWithSnapshotCheckerTest, NotZeroOutSequenceIfNotVisibleToEarliestSnapshot) { AddSnapshot(2, 1); RunTest({test::KeyStr("a", 1, kTypeValue), test::KeyStr("b", 2, kTypeValue), test::KeyStr("c", 3, kTypeValue)}, {"v1", "v2", "v3"}, {test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 2, kTypeValue), test::KeyStr("c", 3, kTypeValue)}, {"v1", "v2", "v3"}, kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, NotRemoveDeletionIfNotVisibleToEarliestSnapshot) { AddSnapshot(2, 1); RunTest( {test::KeyStr("a", 1, kTypeDeletion), test::KeyStr("b", 2, kTypeDeletion), test::KeyStr("c", 3, kTypeDeletion)}, {"", "", ""}, {}, {"", ""}, kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, NotRemoveDeletionIfValuePresentToEarlierSnapshot) { AddSnapshot(2,1); RunTest( {test::KeyStr("a", 4, kTypeDeletion), test::KeyStr("a", 1, kTypeValue), test::KeyStr("b", 3, kTypeValue)}, {"", "", ""}, {test::KeyStr("a", 4, kTypeDeletion), test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 3, kTypeValue)}, {"", "", ""}, kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, NotRemoveSingleDeletionIfNotVisibleToEarliestSnapshot) { AddSnapshot(2, 1); RunTest({test::KeyStr("a", 1, kTypeSingleDeletion), test::KeyStr("b", 2, kTypeSingleDeletion), test::KeyStr("c", 3, kTypeSingleDeletion)}, {"", "", ""}, {test::KeyStr("b", 2, kTypeSingleDeletion), test::KeyStr("c", 3, kTypeSingleDeletion)}, {"", ""}, kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } // Single delete should not cancel out values that not visible to the // same set of snapshots TEST_F(CompactionIteratorWithSnapshotCheckerTest, SingleDeleteAcrossSnapshotBoundary) { AddSnapshot(2, 1); RunTest({test::KeyStr("a", 2, kTypeSingleDeletion), test::KeyStr("a", 1, kTypeValue)}, {"", "v1"}, {test::KeyStr("a", 2, kTypeSingleDeletion), test::KeyStr("a", 1, kTypeValue)}, {"", "v1"}, 2 /*last_committed_seq*/); } // Single delete should be kept in case it is not visible to the // earliest write conflict snapshot. If a single delete is kept for this reason, // corresponding value can be trimmed to save space. TEST_F(CompactionIteratorWithSnapshotCheckerTest, KeepSingleDeletionForWriteConflictChecking) { AddSnapshot(2, 0); RunTest({test::KeyStr("a", 2, kTypeSingleDeletion), test::KeyStr("a", 1, kTypeValue)}, {"", "v1"}, {test::KeyStr("a", 2, kTypeSingleDeletion), test::KeyStr("a", 1, kTypeValue)}, {"", ""}, 2 /*last_committed_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, false /*bottommost_level*/, 2 /*earliest_write_conflict_snapshot*/); } // Compaction filter should keep uncommitted key as-is, and // * Convert the latest velue to deletion, and/or // * if latest value is a merge, apply filter to all suequent merges. TEST_F(CompactionIteratorWithSnapshotCheckerTest, CompactionFilter_Value) { std::unique_ptr compaction_filter( new FilterAllKeysCompactionFilter()); RunTest( {test::KeyStr("a", 2, kTypeValue), test::KeyStr("a", 1, kTypeValue), test::KeyStr("b", 3, kTypeValue), test::KeyStr("c", 1, kTypeValue)}, {"v2", "v1", "v3", "v4"}, {test::KeyStr("a", 2, kTypeValue), test::KeyStr("a", 1, kTypeDeletion), test::KeyStr("b", 3, kTypeValue), test::KeyStr("c", 1, kTypeDeletion)}, {"v2", "", "v3", ""}, 1 /*last_committed_seq*/, nullptr /*merge_operator*/, compaction_filter.get()); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, CompactionFilter_Deletion) { std::unique_ptr compaction_filter( new FilterAllKeysCompactionFilter()); RunTest( {test::KeyStr("a", 2, kTypeDeletion), test::KeyStr("a", 1, kTypeValue)}, {"", "v1"}, {test::KeyStr("a", 2, kTypeDeletion), test::KeyStr("a", 1, kTypeDeletion)}, {"", ""}, 1 /*last_committed_seq*/, nullptr /*merge_operator*/, compaction_filter.get()); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, CompactionFilter_PartialMerge) { std::shared_ptr merge_op = MergeOperators::CreateStringAppendOperator(); std::unique_ptr compaction_filter( new FilterAllKeysCompactionFilter()); RunTest({test::KeyStr("a", 3, kTypeMerge), test::KeyStr("a", 2, kTypeMerge), test::KeyStr("a", 1, kTypeMerge)}, {"v3", "v2", "v1"}, {test::KeyStr("a", 3, kTypeMerge)}, {"v3"}, 2 /*last_committed_seq*/, merge_op.get(), compaction_filter.get()); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, CompactionFilter_FullMerge) { std::shared_ptr merge_op = MergeOperators::CreateStringAppendOperator(); std::unique_ptr compaction_filter( new FilterAllKeysCompactionFilter()); RunTest( {test::KeyStr("a", 3, kTypeMerge), test::KeyStr("a", 2, kTypeMerge), test::KeyStr("a", 1, kTypeValue)}, {"v3", "v2", "v1"}, {test::KeyStr("a", 3, kTypeMerge), test::KeyStr("a", 1, kTypeDeletion)}, {"v3", ""}, 2 /*last_committed_seq*/, merge_op.get(), compaction_filter.get()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/compaction/compaction_job.cc000066400000000000000000002047641370372246700213000ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include #include #include #include #include #include #include #include "db/builder.h" #include "db/compaction/compaction_job.h" #include "db/db_impl/db_impl.h" #include "db/db_iter.h" #include "db/dbformat.h" #include "db/error_handler.h" #include "db/event_helpers.h" #include "db/log_reader.h" #include "db/log_writer.h" #include "db/memtable.h" #include "db/memtable_list.h" #include "db/merge_context.h" #include "db/merge_helper.h" #include "db/range_del_aggregator.h" #include "db/version_set.h" #include "file/filename.h" #include "file/read_write_util.h" #include "file/sst_file_manager_impl.h" #include "file/writable_file_writer.h" #include "logging/log_buffer.h" #include "logging/logging.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_util.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" #include "table/merging_iterator.h" #include "table/table_builder.h" #include "test_util/sync_point.h" #include "util/coding.h" #include "util/mutexlock.h" #include "util/random.h" #include "util/stop_watch.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { const char* GetCompactionReasonString(CompactionReason compaction_reason) { switch (compaction_reason) { case CompactionReason::kUnknown: return "Unknown"; case CompactionReason::kLevelL0FilesNum: return "LevelL0FilesNum"; case CompactionReason::kLevelMaxLevelSize: return "LevelMaxLevelSize"; case CompactionReason::kUniversalSizeAmplification: return "UniversalSizeAmplification"; case CompactionReason::kUniversalSizeRatio: return "UniversalSizeRatio"; case CompactionReason::kUniversalSortedRunNum: return "UniversalSortedRunNum"; case CompactionReason::kFIFOMaxSize: return "FIFOMaxSize"; case CompactionReason::kFIFOReduceNumFiles: return "FIFOReduceNumFiles"; case CompactionReason::kFIFOTtl: return "FIFOTtl"; case CompactionReason::kManualCompaction: return "ManualCompaction"; case CompactionReason::kFilesMarkedForCompaction: return "FilesMarkedForCompaction"; case CompactionReason::kBottommostFiles: return "BottommostFiles"; case CompactionReason::kTtl: return "Ttl"; case CompactionReason::kFlush: return "Flush"; case CompactionReason::kExternalSstIngestion: return "ExternalSstIngestion"; case CompactionReason::kPeriodicCompaction: return "PeriodicCompaction"; case CompactionReason::kNumOfReasons: // fall through default: assert(false); return "Invalid"; } } // Maintains state for each sub-compaction struct CompactionJob::SubcompactionState { const Compaction* compaction; std::unique_ptr c_iter; // The boundaries of the key-range this compaction is interested in. No two // subcompactions may have overlapping key-ranges. // 'start' is inclusive, 'end' is exclusive, and nullptr means unbounded Slice *start, *end; // The return status of this subcompaction Status status; // Files produced by this subcompaction struct Output { FileMetaData meta; bool finished; std::shared_ptr table_properties; }; // State kept for output being generated std::vector outputs; std::unique_ptr outfile; std::unique_ptr builder; Output* current_output() { if (outputs.empty()) { // This subcompaction's outptut could be empty if compaction was aborted // before this subcompaction had a chance to generate any output files. // When subcompactions are executed sequentially this is more likely and // will be particulalry likely for the later subcompactions to be empty. // Once they are run in parallel however it should be much rarer. return nullptr; } else { return &outputs.back(); } } uint64_t current_output_file_size; // State during the subcompaction uint64_t total_bytes; uint64_t num_output_records; CompactionJobStats compaction_job_stats; uint64_t approx_size; // An index that used to speed up ShouldStopBefore(). size_t grandparent_index = 0; // The number of bytes overlapping between the current output and // grandparent files used in ShouldStopBefore(). uint64_t overlapped_bytes = 0; // A flag determine whether the key has been seen in ShouldStopBefore() bool seen_key = false; SubcompactionState(Compaction* c, Slice* _start, Slice* _end, uint64_t size = 0) : compaction(c), start(_start), end(_end), outfile(nullptr), builder(nullptr), current_output_file_size(0), total_bytes(0), num_output_records(0), approx_size(size), grandparent_index(0), overlapped_bytes(0), seen_key(false) { assert(compaction != nullptr); } SubcompactionState(SubcompactionState&& o) { *this = std::move(o); } SubcompactionState& operator=(SubcompactionState&& o) { compaction = std::move(o.compaction); start = std::move(o.start); end = std::move(o.end); status = std::move(o.status); outputs = std::move(o.outputs); outfile = std::move(o.outfile); builder = std::move(o.builder); current_output_file_size = std::move(o.current_output_file_size); total_bytes = std::move(o.total_bytes); num_output_records = std::move(o.num_output_records); compaction_job_stats = std::move(o.compaction_job_stats); approx_size = std::move(o.approx_size); grandparent_index = std::move(o.grandparent_index); overlapped_bytes = std::move(o.overlapped_bytes); seen_key = std::move(o.seen_key); return *this; } // Because member std::unique_ptrs do not have these. SubcompactionState(const SubcompactionState&) = delete; SubcompactionState& operator=(const SubcompactionState&) = delete; // Returns true iff we should stop building the current output // before processing "internal_key". bool ShouldStopBefore(const Slice& internal_key, uint64_t curr_file_size) { const InternalKeyComparator* icmp = &compaction->column_family_data()->internal_comparator(); const std::vector& grandparents = compaction->grandparents(); // Scan to find earliest grandparent file that contains key. while (grandparent_index < grandparents.size() && icmp->Compare(internal_key, grandparents[grandparent_index]->largest.Encode()) > 0) { if (seen_key) { overlapped_bytes += grandparents[grandparent_index]->fd.GetFileSize(); } assert(grandparent_index + 1 >= grandparents.size() || icmp->Compare( grandparents[grandparent_index]->largest.Encode(), grandparents[grandparent_index + 1]->smallest.Encode()) <= 0); grandparent_index++; } seen_key = true; if (overlapped_bytes + curr_file_size > compaction->max_compaction_bytes()) { // Too much overlap for current output; start new output overlapped_bytes = 0; return true; } return false; } }; // Maintains state for the entire compaction struct CompactionJob::CompactionState { Compaction* const compaction; // REQUIRED: subcompaction states are stored in order of increasing // key-range std::vector sub_compact_states; Status status; uint64_t total_bytes; uint64_t num_output_records; explicit CompactionState(Compaction* c) : compaction(c), total_bytes(0), num_output_records(0) {} size_t NumOutputFiles() { size_t total = 0; for (auto& s : sub_compact_states) { total += s.outputs.size(); } return total; } Slice SmallestUserKey() { for (const auto& sub_compact_state : sub_compact_states) { if (!sub_compact_state.outputs.empty() && sub_compact_state.outputs[0].finished) { return sub_compact_state.outputs[0].meta.smallest.user_key(); } } // If there is no finished output, return an empty slice. return Slice(nullptr, 0); } Slice LargestUserKey() { for (auto it = sub_compact_states.rbegin(); it < sub_compact_states.rend(); ++it) { if (!it->outputs.empty() && it->current_output()->finished) { assert(it->current_output() != nullptr); return it->current_output()->meta.largest.user_key(); } } // If there is no finished output, return an empty slice. return Slice(nullptr, 0); } }; void CompactionJob::AggregateStatistics() { for (SubcompactionState& sc : compact_->sub_compact_states) { compact_->total_bytes += sc.total_bytes; compact_->num_output_records += sc.num_output_records; } if (compaction_job_stats_) { for (SubcompactionState& sc : compact_->sub_compact_states) { compaction_job_stats_->Add(sc.compaction_job_stats); } } } CompactionJob::CompactionJob( int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, const FileOptions& file_options, VersionSet* versions, const std::atomic* shutting_down, const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer, FSDirectory* db_directory, FSDirectory* output_directory, Statistics* stats, InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler, std::vector existing_snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, std::shared_ptr table_cache, EventLogger* event_logger, bool paranoid_file_checks, bool measure_io_stats, const std::string& dbname, CompactionJobStats* compaction_job_stats, Env::Priority thread_pri, const std::atomic* manual_compaction_paused) : job_id_(job_id), compact_(new CompactionState(compaction)), compaction_job_stats_(compaction_job_stats), compaction_stats_(compaction->compaction_reason(), 1), dbname_(dbname), db_options_(db_options), file_options_(file_options), env_(db_options.env), fs_(db_options.fs.get()), file_options_for_read_( fs_->OptimizeForCompactionTableRead(file_options, db_options_)), versions_(versions), shutting_down_(shutting_down), manual_compaction_paused_(manual_compaction_paused), preserve_deletes_seqnum_(preserve_deletes_seqnum), log_buffer_(log_buffer), db_directory_(db_directory), output_directory_(output_directory), stats_(stats), db_mutex_(db_mutex), db_error_handler_(db_error_handler), existing_snapshots_(std::move(existing_snapshots)), earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot), snapshot_checker_(snapshot_checker), table_cache_(std::move(table_cache)), event_logger_(event_logger), bottommost_level_(false), paranoid_file_checks_(paranoid_file_checks), measure_io_stats_(measure_io_stats), write_hint_(Env::WLTH_NOT_SET), thread_pri_(thread_pri) { assert(log_buffer_ != nullptr); const auto* cfd = compact_->compaction->column_family_data(); ThreadStatusUtil::SetColumnFamily(cfd, cfd->ioptions()->env, db_options_.enable_thread_tracking); ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION); ReportStartedCompaction(compaction); } CompactionJob::~CompactionJob() { assert(compact_ == nullptr); ThreadStatusUtil::ResetThreadStatus(); } void CompactionJob::ReportStartedCompaction(Compaction* compaction) { const auto* cfd = compact_->compaction->column_family_data(); ThreadStatusUtil::SetColumnFamily(cfd, cfd->ioptions()->env, db_options_.enable_thread_tracking); ThreadStatusUtil::SetThreadOperationProperty(ThreadStatus::COMPACTION_JOB_ID, job_id_); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_INPUT_OUTPUT_LEVEL, (static_cast(compact_->compaction->start_level()) << 32) + compact_->compaction->output_level()); // In the current design, a CompactionJob is always created // for non-trivial compaction. assert(compaction->IsTrivialMove() == false || compaction->is_manual_compaction() == true); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_PROP_FLAGS, compaction->is_manual_compaction() + (compaction->deletion_compaction() << 1)); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_TOTAL_INPUT_BYTES, compaction->CalculateTotalInputSize()); IOSTATS_RESET(bytes_written); IOSTATS_RESET(bytes_read); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_BYTES_WRITTEN, 0); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_BYTES_READ, 0); // Set the thread operation after operation properties // to ensure GetThreadList() can always show them all together. ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION); if (compaction_job_stats_) { compaction_job_stats_->is_manual_compaction = compaction->is_manual_compaction(); } } void CompactionJob::Prepare() { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_PREPARE); // Generate file_levels_ for compaction berfore making Iterator auto* c = compact_->compaction; assert(c->column_family_data() != nullptr); assert(c->column_family_data()->current()->storage_info()->NumLevelFiles( compact_->compaction->level()) > 0); write_hint_ = c->column_family_data()->CalculateSSTWriteHint(c->output_level()); bottommost_level_ = c->bottommost_level(); if (c->ShouldFormSubcompactions()) { { StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME); GenSubcompactionBoundaries(); } assert(sizes_.size() == boundaries_.size() + 1); for (size_t i = 0; i <= boundaries_.size(); i++) { Slice* start = i == 0 ? nullptr : &boundaries_[i - 1]; Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i]; compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i]); } RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED, compact_->sub_compact_states.size()); } else { compact_->sub_compact_states.emplace_back(c, nullptr, nullptr); } } struct RangeWithSize { Range range; uint64_t size; RangeWithSize(const Slice& a, const Slice& b, uint64_t s = 0) : range(a, b), size(s) {} }; void CompactionJob::GenSubcompactionBoundaries() { auto* c = compact_->compaction; auto* cfd = c->column_family_data(); const Comparator* cfd_comparator = cfd->user_comparator(); std::vector bounds; int start_lvl = c->start_level(); int out_lvl = c->output_level(); // Add the starting and/or ending key of certain input files as a potential // boundary for (size_t lvl_idx = 0; lvl_idx < c->num_input_levels(); lvl_idx++) { int lvl = c->level(lvl_idx); if (lvl >= start_lvl && lvl <= out_lvl) { const LevelFilesBrief* flevel = c->input_levels(lvl_idx); size_t num_files = flevel->num_files; if (num_files == 0) { continue; } if (lvl == 0) { // For level 0 add the starting and ending key of each file since the // files may have greatly differing key ranges (not range-partitioned) for (size_t i = 0; i < num_files; i++) { bounds.emplace_back(flevel->files[i].smallest_key); bounds.emplace_back(flevel->files[i].largest_key); } } else { // For all other levels add the smallest/largest key in the level to // encompass the range covered by that level bounds.emplace_back(flevel->files[0].smallest_key); bounds.emplace_back(flevel->files[num_files - 1].largest_key); if (lvl == out_lvl) { // For the last level include the starting keys of all files since // the last level is the largest and probably has the widest key // range. Since it's range partitioned, the ending key of one file // and the starting key of the next are very close (or identical). for (size_t i = 1; i < num_files; i++) { bounds.emplace_back(flevel->files[i].smallest_key); } } } } } std::sort(bounds.begin(), bounds.end(), [cfd_comparator](const Slice& a, const Slice& b) -> bool { return cfd_comparator->Compare(ExtractUserKey(a), ExtractUserKey(b)) < 0; }); // Remove duplicated entries from bounds bounds.erase( std::unique(bounds.begin(), bounds.end(), [cfd_comparator](const Slice& a, const Slice& b) -> bool { return cfd_comparator->Compare(ExtractUserKey(a), ExtractUserKey(b)) == 0; }), bounds.end()); // Combine consecutive pairs of boundaries into ranges with an approximate // size of data covered by keys in that range uint64_t sum = 0; std::vector ranges; // Get input version from CompactionState since it's already referenced // earlier in SetInputVersioCompaction::SetInputVersion and will not change // when db_mutex_ is released below auto* v = compact_->compaction->input_version(); for (auto it = bounds.begin();;) { const Slice a = *it; ++it; if (it == bounds.end()) { break; } const Slice b = *it; // ApproximateSize could potentially create table reader iterator to seek // to the index block and may incur I/O cost in the process. Unlock db // mutex to reduce contention db_mutex_->Unlock(); uint64_t size = versions_->ApproximateSize(SizeApproximationOptions(), v, a, b, start_lvl, out_lvl + 1, TableReaderCaller::kCompaction); db_mutex_->Lock(); ranges.emplace_back(a, b, size); sum += size; } // Group the ranges into subcompactions const double min_file_fill_percent = 4.0 / 5; int base_level = v->storage_info()->base_level(); uint64_t max_output_files = static_cast(std::ceil( sum / min_file_fill_percent / MaxFileSizeForLevel(*(c->mutable_cf_options()), out_lvl, c->immutable_cf_options()->compaction_style, base_level, c->immutable_cf_options()->level_compaction_dynamic_level_bytes))); uint64_t subcompactions = std::min({static_cast(ranges.size()), static_cast(c->max_subcompactions()), max_output_files}); if (subcompactions > 1) { double mean = sum * 1.0 / subcompactions; // Greedily add ranges to the subcompaction until the sum of the ranges' // sizes becomes >= the expected mean size of a subcompaction sum = 0; for (size_t i = 0; i + 1 < ranges.size(); i++) { sum += ranges[i].size; if (subcompactions == 1) { // If there's only one left to schedule then it goes to the end so no // need to put an end boundary continue; } if (sum >= mean) { boundaries_.emplace_back(ExtractUserKey(ranges[i].range.limit)); sizes_.emplace_back(sum); subcompactions--; sum = 0; } } sizes_.emplace_back(sum + ranges.back().size); } else { // Only one range so its size is the total sum of sizes computed above sizes_.emplace_back(sum); } } Status CompactionJob::Run() { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_RUN); TEST_SYNC_POINT("CompactionJob::Run():Start"); log_buffer_->FlushBufferToLog(); LogCompaction(); const size_t num_threads = compact_->sub_compact_states.size(); assert(num_threads > 0); const uint64_t start_micros = env_->NowMicros(); // Launch a thread for each of subcompactions 1...num_threads-1 std::vector thread_pool; thread_pool.reserve(num_threads - 1); for (size_t i = 1; i < compact_->sub_compact_states.size(); i++) { thread_pool.emplace_back(&CompactionJob::ProcessKeyValueCompaction, this, &compact_->sub_compact_states[i]); } // Always schedule the first subcompaction (whether or not there are also // others) in the current thread to be efficient with resources ProcessKeyValueCompaction(&compact_->sub_compact_states[0]); // Wait for all other threads (if there are any) to finish execution for (auto& thread : thread_pool) { thread.join(); } compaction_stats_.micros = env_->NowMicros() - start_micros; compaction_stats_.cpu_micros = 0; for (size_t i = 0; i < compact_->sub_compact_states.size(); i++) { compaction_stats_.cpu_micros += compact_->sub_compact_states[i].compaction_job_stats.cpu_micros; } RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros); RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME, compaction_stats_.cpu_micros); TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify"); // Check if any thread encountered an error during execution Status status; for (const auto& state : compact_->sub_compact_states) { if (!state.status.ok()) { status = state.status; break; } } IOStatus io_s; if (status.ok() && output_directory_) { io_s = output_directory_->Fsync(IOOptions(), nullptr); } if (!io_s.ok()) { io_status_ = io_s; status = io_s; } if (status.ok()) { thread_pool.clear(); std::vector files_meta; for (const auto& state : compact_->sub_compact_states) { for (const auto& output : state.outputs) { files_meta.emplace_back(&output.meta); } } ColumnFamilyData* cfd = compact_->compaction->column_family_data(); auto prefix_extractor = compact_->compaction->mutable_cf_options()->prefix_extractor.get(); std::atomic next_file_meta_idx(0); auto verify_table = [&](Status& output_status) { while (true) { size_t file_idx = next_file_meta_idx.fetch_add(1); if (file_idx >= files_meta.size()) { break; } // Verify that the table is usable // We set for_compaction to false and don't OptimizeForCompactionTableRead // here because this is a special case after we finish the table building // No matter whether use_direct_io_for_flush_and_compaction is true, // we will regard this verification as user reads since the goal is // to cache it here for further user reads InternalIterator* iter = cfd->table_cache()->NewIterator( ReadOptions(), file_options_, cfd->internal_comparator(), *files_meta[file_idx], /*range_del_agg=*/nullptr, prefix_extractor, /*table_reader_ptr=*/nullptr, cfd->internal_stats()->GetFileReadHist( compact_->compaction->output_level()), TableReaderCaller::kCompactionRefill, /*arena=*/nullptr, /*skip_filters=*/false, compact_->compaction->output_level(), MaxFileSizeForL0MetaPin( *compact_->compaction->mutable_cf_options()), /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, /*allow_unprepared_value=*/false); auto s = iter->status(); if (s.ok() && paranoid_file_checks_) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {} s = iter->status(); } delete iter; if (!s.ok()) { output_status = s; break; } } }; for (size_t i = 1; i < compact_->sub_compact_states.size(); i++) { thread_pool.emplace_back(verify_table, std::ref(compact_->sub_compact_states[i].status)); } verify_table(compact_->sub_compact_states[0].status); for (auto& thread : thread_pool) { thread.join(); } for (const auto& state : compact_->sub_compact_states) { if (!state.status.ok()) { status = state.status; break; } } } TablePropertiesCollection tp; for (const auto& state : compact_->sub_compact_states) { for (const auto& output : state.outputs) { auto fn = TableFileName(state.compaction->immutable_cf_options()->cf_paths, output.meta.fd.GetNumber(), output.meta.fd.GetPathId()); tp[fn] = output.table_properties; } } compact_->compaction->SetOutputTableProperties(std::move(tp)); // Finish up all book-keeping to unify the subcompaction results AggregateStatistics(); UpdateCompactionStats(); RecordCompactionIOStats(); LogFlush(db_options_.info_log); TEST_SYNC_POINT("CompactionJob::Run():End"); compact_->status = status; return status; } Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_INSTALL); db_mutex_->AssertHeld(); Status status = compact_->status; ColumnFamilyData* cfd = compact_->compaction->column_family_data(); cfd->internal_stats()->AddCompactionStats( compact_->compaction->output_level(), thread_pri_, compaction_stats_); if (status.ok()) { status = InstallCompactionResults(mutable_cf_options); } if (!versions_->io_status().ok()) { io_status_ = versions_->io_status(); } VersionStorageInfo::LevelSummaryStorage tmp; auto vstorage = cfd->current()->storage_info(); const auto& stats = compaction_stats_; double read_write_amp = 0.0; double write_amp = 0.0; double bytes_read_per_sec = 0; double bytes_written_per_sec = 0; if (stats.bytes_read_non_output_levels > 0) { read_write_amp = (stats.bytes_written + stats.bytes_read_output_level + stats.bytes_read_non_output_levels) / static_cast(stats.bytes_read_non_output_levels); write_amp = stats.bytes_written / static_cast(stats.bytes_read_non_output_levels); } if (stats.micros > 0) { bytes_read_per_sec = (stats.bytes_read_non_output_levels + stats.bytes_read_output_level) / static_cast(stats.micros); bytes_written_per_sec = stats.bytes_written / static_cast(stats.micros); } ROCKS_LOG_BUFFER( log_buffer_, "[%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, " "files in(%d, %d) out(%d) " "MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) " "write-amplify(%.1f) %s, records in: %" PRIu64 ", records dropped: %" PRIu64 " output_compression: %s\n", cfd->GetName().c_str(), vstorage->LevelSummary(&tmp), bytes_read_per_sec, bytes_written_per_sec, compact_->compaction->output_level(), stats.num_input_files_in_non_output_levels, stats.num_input_files_in_output_level, stats.num_output_files, stats.bytes_read_non_output_levels / 1048576.0, stats.bytes_read_output_level / 1048576.0, stats.bytes_written / 1048576.0, read_write_amp, write_amp, status.ToString().c_str(), stats.num_input_records, stats.num_dropped_records, CompressionTypeToString(compact_->compaction->output_compression()) .c_str()); UpdateCompactionJobStats(stats); auto stream = event_logger_->LogToBuffer(log_buffer_); stream << "job" << job_id_ << "event" << "compaction_finished" << "compaction_time_micros" << stats.micros << "compaction_time_cpu_micros" << stats.cpu_micros << "output_level" << compact_->compaction->output_level() << "num_output_files" << compact_->NumOutputFiles() << "total_output_size" << compact_->total_bytes << "num_input_records" << stats.num_input_records << "num_output_records" << compact_->num_output_records << "num_subcompactions" << compact_->sub_compact_states.size() << "output_compression" << CompressionTypeToString(compact_->compaction->output_compression()); if (compaction_job_stats_ != nullptr) { stream << "num_single_delete_mismatches" << compaction_job_stats_->num_single_del_mismatch; stream << "num_single_delete_fallthrough" << compaction_job_stats_->num_single_del_fallthru; } if (measure_io_stats_ && compaction_job_stats_ != nullptr) { stream << "file_write_nanos" << compaction_job_stats_->file_write_nanos; stream << "file_range_sync_nanos" << compaction_job_stats_->file_range_sync_nanos; stream << "file_fsync_nanos" << compaction_job_stats_->file_fsync_nanos; stream << "file_prepare_write_nanos" << compaction_job_stats_->file_prepare_write_nanos; } stream << "lsm_state"; stream.StartArray(); for (int level = 0; level < vstorage->num_levels(); ++level) { stream << vstorage->NumLevelFiles(level); } stream.EndArray(); CleanupCompaction(); return status; } void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { assert(sub_compact != nullptr); uint64_t prev_cpu_micros = env_->NowCPUNanos() / 1000; ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); // Create compaction filter and fail the compaction if // IgnoreSnapshots() = false because it is not supported anymore const CompactionFilter* compaction_filter = cfd->ioptions()->compaction_filter; std::unique_ptr compaction_filter_from_factory = nullptr; if (compaction_filter == nullptr) { compaction_filter_from_factory = sub_compact->compaction->CreateCompactionFilter(); compaction_filter = compaction_filter_from_factory.get(); } if (compaction_filter != nullptr && !compaction_filter->IgnoreSnapshots()) { sub_compact->status = Status::NotSupported( "CompactionFilter::IgnoreSnapshots() = false is not supported " "anymore."); return; } CompactionRangeDelAggregator range_del_agg(&cfd->internal_comparator(), existing_snapshots_); // Although the v2 aggregator is what the level iterator(s) know about, // the AddTombstones calls will be propagated down to the v1 aggregator. std::unique_ptr input(versions_->MakeInputIterator( sub_compact->compaction, &range_del_agg, file_options_for_read_)); AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_PROCESS_KV); // I/O measurement variables PerfLevel prev_perf_level = PerfLevel::kEnableTime; const uint64_t kRecordStatsEvery = 1000; uint64_t prev_write_nanos = 0; uint64_t prev_fsync_nanos = 0; uint64_t prev_range_sync_nanos = 0; uint64_t prev_prepare_write_nanos = 0; uint64_t prev_cpu_write_nanos = 0; uint64_t prev_cpu_read_nanos = 0; if (measure_io_stats_) { prev_perf_level = GetPerfLevel(); SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); prev_write_nanos = IOSTATS(write_nanos); prev_fsync_nanos = IOSTATS(fsync_nanos); prev_range_sync_nanos = IOSTATS(range_sync_nanos); prev_prepare_write_nanos = IOSTATS(prepare_write_nanos); prev_cpu_write_nanos = IOSTATS(cpu_write_nanos); prev_cpu_read_nanos = IOSTATS(cpu_read_nanos); } MergeHelper merge( env_, cfd->user_comparator(), cfd->ioptions()->merge_operator, compaction_filter, db_options_.info_log.get(), false /* internal key corruption is expected */, existing_snapshots_.empty() ? 0 : existing_snapshots_.back(), snapshot_checker_, compact_->compaction->level(), db_options_.statistics.get()); TEST_SYNC_POINT("CompactionJob::Run():Inprogress"); TEST_SYNC_POINT_CALLBACK( "CompactionJob::Run():PausingManualCompaction:1", reinterpret_cast( const_cast*>(manual_compaction_paused_))); Slice* start = sub_compact->start; Slice* end = sub_compact->end; if (start != nullptr) { IterKey start_iter; start_iter.SetInternalKey(*start, kMaxSequenceNumber, kValueTypeForSeek); input->Seek(start_iter.GetInternalKey()); } else { input->SeekToFirst(); } Status status; sub_compact->c_iter.reset(new CompactionIterator( input.get(), cfd->user_comparator(), &merge, versions_->LastSequence(), &existing_snapshots_, earliest_write_conflict_snapshot_, snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_), /*expect_valid_internal_key=*/true, &range_del_agg, sub_compact->compaction, compaction_filter, shutting_down_, preserve_deletes_seqnum_, manual_compaction_paused_, db_options_.info_log)); auto c_iter = sub_compact->c_iter.get(); c_iter->SeekToFirst(); if (c_iter->Valid() && sub_compact->compaction->output_level() != 0) { // ShouldStopBefore() maintains state based on keys processed so far. The // compaction loop always calls it on the "next" key, thus won't tell it the // first key. So we do that here. sub_compact->ShouldStopBefore(c_iter->key(), sub_compact->current_output_file_size); } const auto& c_iter_stats = c_iter->iter_stats(); while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) { // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid() // returns true. const Slice& key = c_iter->key(); const Slice& value = c_iter->value(); // If an end key (exclusive) is specified, check if the current key is // >= than it and exit if it is because the iterator is out of its range if (end != nullptr && cfd->user_comparator()->Compare(c_iter->user_key(), *end) >= 0) { break; } if (c_iter_stats.num_input_records % kRecordStatsEvery == kRecordStatsEvery - 1) { RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats); c_iter->ResetRecordCounts(); RecordCompactionIOStats(); } // Open output file if necessary if (sub_compact->builder == nullptr) { status = OpenCompactionOutputFile(sub_compact); if (!status.ok()) { break; } } assert(sub_compact->builder != nullptr); assert(sub_compact->current_output() != nullptr); sub_compact->builder->Add(key, value); sub_compact->current_output_file_size = sub_compact->builder->EstimatedFileSize(); const ParsedInternalKey& ikey = c_iter->ikey(); sub_compact->current_output()->meta.UpdateBoundaries( key, value, ikey.sequence, ikey.type); sub_compact->num_output_records++; // Close output file if it is big enough. Two possibilities determine it's // time to close it: (1) the current key should be this file's last key, (2) // the next key should not be in this file. // // TODO(aekmekji): determine if file should be closed earlier than this // during subcompactions (i.e. if output size, estimated by input size, is // going to be 1.2MB and max_output_file_size = 1MB, prefer to have 0.6MB // and 0.6MB instead of 1MB and 0.2MB) bool output_file_ended = false; Status input_status; if (sub_compact->compaction->output_level() != 0 && sub_compact->current_output_file_size >= sub_compact->compaction->max_output_file_size()) { // (1) this key terminates the file. For historical reasons, the iterator // status before advancing will be given to FinishCompactionOutputFile(). input_status = input->status(); output_file_ended = true; } TEST_SYNC_POINT_CALLBACK( "CompactionJob::Run():PausingManualCompaction:2", reinterpret_cast( const_cast*>(manual_compaction_paused_))); c_iter->Next(); if (c_iter->status().IsManualCompactionPaused()) { break; } if (!output_file_ended && c_iter->Valid() && sub_compact->compaction->output_level() != 0 && sub_compact->ShouldStopBefore(c_iter->key(), sub_compact->current_output_file_size) && sub_compact->builder != nullptr) { // (2) this key belongs to the next file. For historical reasons, the // iterator status after advancing will be given to // FinishCompactionOutputFile(). input_status = input->status(); output_file_ended = true; } if (output_file_ended) { const Slice* next_key = nullptr; if (c_iter->Valid()) { next_key = &c_iter->key(); } CompactionIterationStats range_del_out_stats; status = FinishCompactionOutputFile(input_status, sub_compact, &range_del_agg, &range_del_out_stats, next_key); RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats); } } sub_compact->compaction_job_stats.num_input_deletion_records = c_iter_stats.num_input_deletion_records; sub_compact->compaction_job_stats.num_corrupt_keys = c_iter_stats.num_input_corrupt_records; sub_compact->compaction_job_stats.num_single_del_fallthru = c_iter_stats.num_single_del_fallthru; sub_compact->compaction_job_stats.num_single_del_mismatch = c_iter_stats.num_single_del_mismatch; sub_compact->compaction_job_stats.total_input_raw_key_bytes += c_iter_stats.total_input_raw_key_bytes; sub_compact->compaction_job_stats.total_input_raw_value_bytes += c_iter_stats.total_input_raw_value_bytes; RecordTick(stats_, FILTER_OPERATION_TOTAL_TIME, c_iter_stats.total_filter_time); RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats); RecordCompactionIOStats(); if (status.ok() && cfd->IsDropped()) { status = Status::ColumnFamilyDropped("Column family dropped during compaction"); } if ((status.ok() || status.IsColumnFamilyDropped()) && shutting_down_->load(std::memory_order_relaxed)) { status = Status::ShutdownInProgress("Database shutdown"); } if ((status.ok() || status.IsColumnFamilyDropped()) && (manual_compaction_paused_ && manual_compaction_paused_->load(std::memory_order_relaxed))) { status = Status::Incomplete(Status::SubCode::kManualCompactionPaused); } if (status.ok()) { status = input->status(); } if (status.ok()) { status = c_iter->status(); } if (status.ok() && sub_compact->builder == nullptr && sub_compact->outputs.size() == 0 && !range_del_agg.IsEmpty()) { // handle subcompaction containing only range deletions status = OpenCompactionOutputFile(sub_compact); } // Call FinishCompactionOutputFile() even if status is not ok: it needs to // close the output file. if (sub_compact->builder != nullptr) { CompactionIterationStats range_del_out_stats; Status s = FinishCompactionOutputFile(status, sub_compact, &range_del_agg, &range_del_out_stats); if (status.ok()) { status = s; } RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats); } sub_compact->compaction_job_stats.cpu_micros = env_->NowCPUNanos() / 1000 - prev_cpu_micros; if (measure_io_stats_) { sub_compact->compaction_job_stats.file_write_nanos += IOSTATS(write_nanos) - prev_write_nanos; sub_compact->compaction_job_stats.file_fsync_nanos += IOSTATS(fsync_nanos) - prev_fsync_nanos; sub_compact->compaction_job_stats.file_range_sync_nanos += IOSTATS(range_sync_nanos) - prev_range_sync_nanos; sub_compact->compaction_job_stats.file_prepare_write_nanos += IOSTATS(prepare_write_nanos) - prev_prepare_write_nanos; sub_compact->compaction_job_stats.cpu_micros -= (IOSTATS(cpu_write_nanos) - prev_cpu_write_nanos + IOSTATS(cpu_read_nanos) - prev_cpu_read_nanos) / 1000; if (prev_perf_level != PerfLevel::kEnableTimeAndCPUTimeExceptForMutex) { SetPerfLevel(prev_perf_level); } } sub_compact->c_iter.reset(); input.reset(); sub_compact->status = status; } void CompactionJob::RecordDroppedKeys( const CompactionIterationStats& c_iter_stats, CompactionJobStats* compaction_job_stats) { if (c_iter_stats.num_record_drop_user > 0) { RecordTick(stats_, COMPACTION_KEY_DROP_USER, c_iter_stats.num_record_drop_user); } if (c_iter_stats.num_record_drop_hidden > 0) { RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY, c_iter_stats.num_record_drop_hidden); if (compaction_job_stats) { compaction_job_stats->num_records_replaced += c_iter_stats.num_record_drop_hidden; } } if (c_iter_stats.num_record_drop_obsolete > 0) { RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE, c_iter_stats.num_record_drop_obsolete); if (compaction_job_stats) { compaction_job_stats->num_expired_deletion_records += c_iter_stats.num_record_drop_obsolete; } } if (c_iter_stats.num_record_drop_range_del > 0) { RecordTick(stats_, COMPACTION_KEY_DROP_RANGE_DEL, c_iter_stats.num_record_drop_range_del); } if (c_iter_stats.num_range_del_drop_obsolete > 0) { RecordTick(stats_, COMPACTION_RANGE_DEL_DROP_OBSOLETE, c_iter_stats.num_range_del_drop_obsolete); } if (c_iter_stats.num_optimized_del_drop_obsolete > 0) { RecordTick(stats_, COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, c_iter_stats.num_optimized_del_drop_obsolete); } } Status CompactionJob::FinishCompactionOutputFile( const Status& input_status, SubcompactionState* sub_compact, CompactionRangeDelAggregator* range_del_agg, CompactionIterationStats* range_del_out_stats, const Slice* next_table_min_key /* = nullptr */) { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_SYNC_FILE); assert(sub_compact != nullptr); assert(sub_compact->outfile); assert(sub_compact->builder != nullptr); assert(sub_compact->current_output() != nullptr); uint64_t output_number = sub_compact->current_output()->meta.fd.GetNumber(); assert(output_number != 0); ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); const Comparator* ucmp = cfd->user_comparator(); // Check for iterator errors Status s = input_status; auto meta = &sub_compact->current_output()->meta; assert(meta != nullptr); if (s.ok()) { Slice lower_bound_guard, upper_bound_guard; std::string smallest_user_key; const Slice *lower_bound, *upper_bound; bool lower_bound_from_sub_compact = false; if (sub_compact->outputs.size() == 1) { // For the first output table, include range tombstones before the min key // but after the subcompaction boundary. lower_bound = sub_compact->start; lower_bound_from_sub_compact = true; } else if (meta->smallest.size() > 0) { // For subsequent output tables, only include range tombstones from min // key onwards since the previous file was extended to contain range // tombstones falling before min key. smallest_user_key = meta->smallest.user_key().ToString(false /*hex*/); lower_bound_guard = Slice(smallest_user_key); lower_bound = &lower_bound_guard; } else { lower_bound = nullptr; } if (next_table_min_key != nullptr) { // This may be the last file in the subcompaction in some cases, so we // need to compare the end key of subcompaction with the next file start // key. When the end key is chosen by the subcompaction, we know that // it must be the biggest key in output file. Therefore, it is safe to // use the smaller key as the upper bound of the output file, to ensure // that there is no overlapping between different output files. upper_bound_guard = ExtractUserKey(*next_table_min_key); if (sub_compact->end != nullptr && ucmp->Compare(upper_bound_guard, *sub_compact->end) >= 0) { upper_bound = sub_compact->end; } else { upper_bound = &upper_bound_guard; } } else { // This is the last file in the subcompaction, so extend until the // subcompaction ends. upper_bound = sub_compact->end; } auto earliest_snapshot = kMaxSequenceNumber; if (existing_snapshots_.size() > 0) { earliest_snapshot = existing_snapshots_[0]; } bool has_overlapping_endpoints; if (upper_bound != nullptr && meta->largest.size() > 0) { has_overlapping_endpoints = ucmp->Compare(meta->largest.user_key(), *upper_bound) == 0; } else { has_overlapping_endpoints = false; } // The end key of the subcompaction must be bigger or equal to the upper // bound. If the end of subcompaction is null or the upper bound is null, // it means that this file is the last file in the compaction. So there // will be no overlapping between this file and others. assert(sub_compact->end == nullptr || upper_bound == nullptr || ucmp->Compare(*upper_bound , *sub_compact->end) <= 0); auto it = range_del_agg->NewIterator(lower_bound, upper_bound, has_overlapping_endpoints); // Position the range tombstone output iterator. There may be tombstone // fragments that are entirely out of range, so make sure that we do not // include those. if (lower_bound != nullptr) { it->Seek(*lower_bound); } else { it->SeekToFirst(); } for (; it->Valid(); it->Next()) { auto tombstone = it->Tombstone(); if (upper_bound != nullptr) { int cmp = ucmp->Compare(*upper_bound, tombstone.start_key_); if ((has_overlapping_endpoints && cmp < 0) || (!has_overlapping_endpoints && cmp <= 0)) { // Tombstones starting after upper_bound only need to be included in // the next table. If the current SST ends before upper_bound, i.e., // `has_overlapping_endpoints == false`, we can also skip over range // tombstones that start exactly at upper_bound. Such range tombstones // will be included in the next file and are not relevant to the point // keys or endpoints of the current file. break; } } if (bottommost_level_ && tombstone.seq_ <= earliest_snapshot) { // TODO(andrewkr): tombstones that span multiple output files are // counted for each compaction output file, so lots of double counting. range_del_out_stats->num_range_del_drop_obsolete++; range_del_out_stats->num_record_drop_obsolete++; continue; } auto kv = tombstone.Serialize(); assert(lower_bound == nullptr || ucmp->Compare(*lower_bound, kv.second) < 0); sub_compact->builder->Add(kv.first.Encode(), kv.second); InternalKey smallest_candidate = std::move(kv.first); if (lower_bound != nullptr && ucmp->Compare(smallest_candidate.user_key(), *lower_bound) <= 0) { // Pretend the smallest key has the same user key as lower_bound // (the max key in the previous table or subcompaction) in order for // files to appear key-space partitioned. // // When lower_bound is chosen by a subcompaction, we know that // subcompactions over smaller keys cannot contain any keys at // lower_bound. We also know that smaller subcompactions exist, because // otherwise the subcompaction woud be unbounded on the left. As a // result, we know that no other files on the output level will contain // actual keys at lower_bound (an output file may have a largest key of // lower_bound@kMaxSequenceNumber, but this only indicates a large range // tombstone was truncated). Therefore, it is safe to use the // tombstone's sequence number, to ensure that keys at lower_bound at // lower levels are covered by truncated tombstones. // // If lower_bound was chosen by the smallest data key in the file, // choose lowest seqnum so this file's smallest internal key comes after // the previous file's largest. The fake seqnum is OK because the read // path's file-picking code only considers user key. smallest_candidate = InternalKey( *lower_bound, lower_bound_from_sub_compact ? tombstone.seq_ : 0, kTypeRangeDeletion); } InternalKey largest_candidate = tombstone.SerializeEndKey(); if (upper_bound != nullptr && ucmp->Compare(*upper_bound, largest_candidate.user_key()) <= 0) { // Pretend the largest key has the same user key as upper_bound (the // min key in the following table or subcompaction) in order for files // to appear key-space partitioned. // // Choose highest seqnum so this file's largest internal key comes // before the next file's/subcompaction's smallest. The fake seqnum is // OK because the read path's file-picking code only considers the user // key portion. // // Note Seek() also creates InternalKey with (user_key, // kMaxSequenceNumber), but with kTypeDeletion (0x7) instead of // kTypeRangeDeletion (0xF), so the range tombstone comes before the // Seek() key in InternalKey's ordering. So Seek() will look in the // next file for the user key. largest_candidate = InternalKey(*upper_bound, kMaxSequenceNumber, kTypeRangeDeletion); } #ifndef NDEBUG SequenceNumber smallest_ikey_seqnum = kMaxSequenceNumber; if (meta->smallest.size() > 0) { smallest_ikey_seqnum = GetInternalKeySeqno(meta->smallest.Encode()); } #endif meta->UpdateBoundariesForRange(smallest_candidate, largest_candidate, tombstone.seq_, cfd->internal_comparator()); // The smallest key in a file is used for range tombstone truncation, so // it cannot have a seqnum of 0 (unless the smallest data key in a file // has a seqnum of 0). Otherwise, the truncated tombstone may expose // deleted keys at lower levels. assert(smallest_ikey_seqnum == 0 || ExtractInternalKeyFooter(meta->smallest.Encode()) != PackSequenceAndType(0, kTypeRangeDeletion)); } meta->marked_for_compaction = sub_compact->builder->NeedCompact(); } const uint64_t current_entries = sub_compact->builder->NumEntries(); if (s.ok()) { s = sub_compact->builder->Finish(); } else { sub_compact->builder->Abandon(); } if (!sub_compact->builder->io_status().ok()) { io_status_ = sub_compact->builder->io_status(); s = io_status_; } const uint64_t current_bytes = sub_compact->builder->FileSize(); if (s.ok()) { meta->fd.file_size = current_bytes; } sub_compact->current_output()->finished = true; sub_compact->total_bytes += current_bytes; // Finish and check for file errors IOStatus io_s; if (s.ok()) { StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS); io_s = sub_compact->outfile->Sync(db_options_.use_fsync); } if (io_s.ok()) { io_s = sub_compact->outfile->Close(); } if (io_s.ok()) { // Add the checksum information to file metadata. meta->file_checksum = sub_compact->outfile->GetFileChecksum(); meta->file_checksum_func_name = sub_compact->outfile->GetFileChecksumFuncName(); } if (!io_s.ok()) { io_status_ = io_s; s = io_s; } sub_compact->outfile.reset(); TableProperties tp; if (s.ok()) { tp = sub_compact->builder->GetTableProperties(); } if (s.ok() && current_entries == 0 && tp.num_range_deletions == 0) { // If there is nothing to output, no necessary to generate a sst file. // This happens when the output level is bottom level, at the same time // the sub_compact output nothing. std::string fname = TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths, meta->fd.GetNumber(), meta->fd.GetPathId()); env_->DeleteFile(fname); // Also need to remove the file from outputs, or it will be added to the // VersionEdit. assert(!sub_compact->outputs.empty()); sub_compact->outputs.pop_back(); meta = nullptr; } if (s.ok() && (current_entries > 0 || tp.num_range_deletions > 0)) { // Output to event logger and fire events. sub_compact->current_output()->table_properties = std::make_shared(tp); ROCKS_LOG_INFO(db_options_.info_log, "[%s] [JOB %d] Generated table #%" PRIu64 ": %" PRIu64 " keys, %" PRIu64 " bytes%s", cfd->GetName().c_str(), job_id_, output_number, current_entries, current_bytes, meta->marked_for_compaction ? " (need compaction)" : ""); } std::string fname; FileDescriptor output_fd; uint64_t oldest_blob_file_number = kInvalidBlobFileNumber; if (meta != nullptr) { fname = TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths, meta->fd.GetNumber(), meta->fd.GetPathId()); output_fd = meta->fd; oldest_blob_file_number = meta->oldest_blob_file_number; } else { fname = "(nil)"; } EventHelpers::LogAndNotifyTableFileCreationFinished( event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname, job_id_, output_fd, oldest_blob_file_number, tp, TableFileCreationReason::kCompaction, s); #ifndef ROCKSDB_LITE // Report new file to SstFileManagerImpl auto sfm = static_cast(db_options_.sst_file_manager.get()); if (sfm && meta != nullptr && meta->fd.GetPathId() == 0) { sfm->OnAddFile(fname); if (sfm->IsMaxAllowedSpaceReached()) { // TODO(ajkr): should we return OK() if max space was reached by the final // compaction output file (similarly to how flush works when full)? s = Status::SpaceLimit("Max allowed space was reached"); TEST_SYNC_POINT( "CompactionJob::FinishCompactionOutputFile:" "MaxAllowedSpaceReached"); InstrumentedMutexLock l(db_mutex_); db_error_handler_->SetBGError(s, BackgroundErrorReason::kCompaction); } } #endif sub_compact->builder.reset(); sub_compact->current_output_file_size = 0; return s; } Status CompactionJob::InstallCompactionResults( const MutableCFOptions& mutable_cf_options) { db_mutex_->AssertHeld(); auto* compaction = compact_->compaction; // paranoia: verify that the files that we started with // still exist in the current version and in the same original level. // This ensures that a concurrent compaction did not erroneously // pick the same files to compact_. if (!versions_->VerifyCompactionFileConsistency(compaction)) { Compaction::InputLevelSummaryBuffer inputs_summary; ROCKS_LOG_ERROR(db_options_.info_log, "[%s] [JOB %d] Compaction %s aborted", compaction->column_family_data()->GetName().c_str(), job_id_, compaction->InputLevelSummary(&inputs_summary)); return Status::Corruption("Compaction input files inconsistent"); } { Compaction::InputLevelSummaryBuffer inputs_summary; ROCKS_LOG_INFO( db_options_.info_log, "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes", compaction->column_family_data()->GetName().c_str(), job_id_, compaction->InputLevelSummary(&inputs_summary), compact_->total_bytes); } // Add compaction inputs compaction->AddInputDeletions(compact_->compaction->edit()); for (const auto& sub_compact : compact_->sub_compact_states) { for (const auto& out : sub_compact.outputs) { compaction->edit()->AddFile(compaction->output_level(), out.meta); } } return versions_->LogAndApply(compaction->column_family_data(), mutable_cf_options, compaction->edit(), db_mutex_, db_directory_); } void CompactionJob::RecordCompactionIOStats() { RecordTick(stats_, COMPACT_READ_BYTES, IOSTATS(bytes_read)); ThreadStatusUtil::IncreaseThreadOperationProperty( ThreadStatus::COMPACTION_BYTES_READ, IOSTATS(bytes_read)); IOSTATS_RESET(bytes_read); RecordTick(stats_, COMPACT_WRITE_BYTES, IOSTATS(bytes_written)); ThreadStatusUtil::IncreaseThreadOperationProperty( ThreadStatus::COMPACTION_BYTES_WRITTEN, IOSTATS(bytes_written)); IOSTATS_RESET(bytes_written); } Status CompactionJob::OpenCompactionOutputFile( SubcompactionState* sub_compact) { assert(sub_compact != nullptr); assert(sub_compact->builder == nullptr); // no need to lock because VersionSet::next_file_number_ is atomic uint64_t file_number = versions_->NewFileNumber(); std::string fname = TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths, file_number, sub_compact->compaction->output_path_id()); // Fire events. ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); #ifndef ROCKSDB_LITE EventHelpers::NotifyTableFileCreationStarted( cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname, job_id_, TableFileCreationReason::kCompaction); #endif // !ROCKSDB_LITE // Make the output file std::unique_ptr writable_file; #ifndef NDEBUG bool syncpoint_arg = file_options_.use_direct_writes; TEST_SYNC_POINT_CALLBACK("CompactionJob::OpenCompactionOutputFile", &syncpoint_arg); #endif Status s = NewWritableFile(fs_, fname, &writable_file, file_options_); if (!s.ok()) { ROCKS_LOG_ERROR( db_options_.info_log, "[%s] [JOB %d] OpenCompactionOutputFiles for table #%" PRIu64 " fails at NewWritableFile with status %s", sub_compact->compaction->column_family_data()->GetName().c_str(), job_id_, file_number, s.ToString().c_str()); LogFlush(db_options_.info_log); EventHelpers::LogAndNotifyTableFileCreationFinished( event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname, job_id_, FileDescriptor(), kInvalidBlobFileNumber, TableProperties(), TableFileCreationReason::kCompaction, s); return s; } // Try to figure out the output file's oldest ancester time. int64_t temp_current_time = 0; auto get_time_status = env_->GetCurrentTime(&temp_current_time); // Safe to proceed even if GetCurrentTime fails. So, log and proceed. if (!get_time_status.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Failed to get current time. Status: %s", get_time_status.ToString().c_str()); } uint64_t current_time = static_cast(temp_current_time); uint64_t oldest_ancester_time = sub_compact->compaction->MinInputFileOldestAncesterTime(); if (oldest_ancester_time == port::kMaxUint64) { oldest_ancester_time = current_time; } // Initialize a SubcompactionState::Output and add it to sub_compact->outputs { SubcompactionState::Output out; out.meta.fd = FileDescriptor(file_number, sub_compact->compaction->output_path_id(), 0); out.meta.oldest_ancester_time = oldest_ancester_time; out.meta.file_creation_time = current_time; out.finished = false; sub_compact->outputs.push_back(out); } writable_file->SetIOPriority(Env::IOPriority::IO_LOW); writable_file->SetWriteLifeTimeHint(write_hint_); writable_file->SetPreallocationBlockSize(static_cast( sub_compact->compaction->OutputFilePreallocationSize())); const auto& listeners = sub_compact->compaction->immutable_cf_options()->listeners; sub_compact->outfile.reset( new WritableFileWriter(std::move(writable_file), fname, file_options_, env_, db_options_.statistics.get(), listeners, db_options_.file_checksum_gen_factory.get())); // If the Column family flag is to only optimize filters for hits, // we can skip creating filters if this is the bottommost_level where // data is going to be found bool skip_filters = cfd->ioptions()->optimize_filters_for_hits && bottommost_level_; sub_compact->builder.reset(NewTableBuilder( *cfd->ioptions(), *(sub_compact->compaction->mutable_cf_options()), cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(), sub_compact->outfile.get(), sub_compact->compaction->output_compression(), 0 /*sample_for_compression */, sub_compact->compaction->output_compression_opts(), sub_compact->compaction->output_level(), skip_filters, oldest_ancester_time, 0 /* oldest_key_time */, sub_compact->compaction->max_output_file_size(), current_time)); LogFlush(db_options_.info_log); return s; } void CompactionJob::CleanupCompaction() { for (SubcompactionState& sub_compact : compact_->sub_compact_states) { const auto& sub_status = sub_compact.status; if (sub_compact.builder != nullptr) { // May happen if we get a shutdown call in the middle of compaction sub_compact.builder->Abandon(); sub_compact.builder.reset(); } else { assert(!sub_status.ok() || sub_compact.outfile == nullptr); } for (const auto& out : sub_compact.outputs) { // If this file was inserted into the table cache then remove // them here because this compaction was not committed. if (!sub_status.ok()) { TableCache::Evict(table_cache_.get(), out.meta.fd.GetNumber()); } } } delete compact_; compact_ = nullptr; } #ifndef ROCKSDB_LITE namespace { void CopyPrefix(const Slice& src, size_t prefix_length, std::string* dst) { assert(prefix_length > 0); size_t length = src.size() > prefix_length ? prefix_length : src.size(); dst->assign(src.data(), length); } } // namespace #endif // !ROCKSDB_LITE void CompactionJob::UpdateCompactionStats() { Compaction* compaction = compact_->compaction; compaction_stats_.num_input_files_in_non_output_levels = 0; compaction_stats_.num_input_files_in_output_level = 0; for (int input_level = 0; input_level < static_cast(compaction->num_input_levels()); ++input_level) { if (compaction->level(input_level) != compaction->output_level()) { UpdateCompactionInputStatsHelper( &compaction_stats_.num_input_files_in_non_output_levels, &compaction_stats_.bytes_read_non_output_levels, input_level); } else { UpdateCompactionInputStatsHelper( &compaction_stats_.num_input_files_in_output_level, &compaction_stats_.bytes_read_output_level, input_level); } } uint64_t num_output_records = 0; for (const auto& sub_compact : compact_->sub_compact_states) { size_t num_output_files = sub_compact.outputs.size(); if (sub_compact.builder != nullptr) { // An error occurred so ignore the last output. assert(num_output_files > 0); --num_output_files; } compaction_stats_.num_output_files += static_cast(num_output_files); num_output_records += sub_compact.num_output_records; for (const auto& out : sub_compact.outputs) { compaction_stats_.bytes_written += out.meta.fd.file_size; } } if (compaction_stats_.num_input_records > num_output_records) { compaction_stats_.num_dropped_records = compaction_stats_.num_input_records - num_output_records; } } void CompactionJob::UpdateCompactionInputStatsHelper(int* num_files, uint64_t* bytes_read, int input_level) { const Compaction* compaction = compact_->compaction; auto num_input_files = compaction->num_input_files(input_level); *num_files += static_cast(num_input_files); for (size_t i = 0; i < num_input_files; ++i) { const auto* file_meta = compaction->input(input_level, i); *bytes_read += file_meta->fd.GetFileSize(); compaction_stats_.num_input_records += static_cast(file_meta->num_entries); } } void CompactionJob::UpdateCompactionJobStats( const InternalStats::CompactionStats& stats) const { #ifndef ROCKSDB_LITE if (compaction_job_stats_) { compaction_job_stats_->elapsed_micros = stats.micros; // input information compaction_job_stats_->total_input_bytes = stats.bytes_read_non_output_levels + stats.bytes_read_output_level; compaction_job_stats_->num_input_records = stats.num_input_records; compaction_job_stats_->num_input_files = stats.num_input_files_in_non_output_levels + stats.num_input_files_in_output_level; compaction_job_stats_->num_input_files_at_output_level = stats.num_input_files_in_output_level; // output information compaction_job_stats_->total_output_bytes = stats.bytes_written; compaction_job_stats_->num_output_records = compact_->num_output_records; compaction_job_stats_->num_output_files = stats.num_output_files; if (compact_->NumOutputFiles() > 0U) { CopyPrefix(compact_->SmallestUserKey(), CompactionJobStats::kMaxPrefixLength, &compaction_job_stats_->smallest_output_key_prefix); CopyPrefix(compact_->LargestUserKey(), CompactionJobStats::kMaxPrefixLength, &compaction_job_stats_->largest_output_key_prefix); } } #else (void)stats; #endif // !ROCKSDB_LITE } void CompactionJob::LogCompaction() { Compaction* compaction = compact_->compaction; ColumnFamilyData* cfd = compaction->column_family_data(); // Let's check if anything will get logged. Don't prepare all the info if // we're not logging if (db_options_.info_log_level <= InfoLogLevel::INFO_LEVEL) { Compaction::InputLevelSummaryBuffer inputs_summary; ROCKS_LOG_INFO( db_options_.info_log, "[%s] [JOB %d] Compacting %s, score %.2f", cfd->GetName().c_str(), job_id_, compaction->InputLevelSummary(&inputs_summary), compaction->score()); char scratch[2345]; compaction->Summary(scratch, sizeof(scratch)); ROCKS_LOG_INFO(db_options_.info_log, "[%s] Compaction start summary: %s\n", cfd->GetName().c_str(), scratch); // build event logger report auto stream = event_logger_->Log(); stream << "job" << job_id_ << "event" << "compaction_started" << "compaction_reason" << GetCompactionReasonString(compaction->compaction_reason()); for (size_t i = 0; i < compaction->num_input_levels(); ++i) { stream << ("files_L" + ToString(compaction->level(i))); stream.StartArray(); for (auto f : *compaction->inputs(i)) { stream << f->fd.GetNumber(); } stream.EndArray(); } stream << "score" << compaction->score() << "input_data_size" << compaction->CalculateTotalInputSize(); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_job.h000066400000000000000000000165151370372246700211350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include #include "db/column_family.h" #include "db/compaction/compaction_iterator.h" #include "db/dbformat.h" #include "db/flush_scheduler.h" #include "db/internal_stats.h" #include "db/job_context.h" #include "db/log_writer.h" #include "db/memtable_list.h" #include "db/range_del_aggregator.h" #include "db/version_edit.h" #include "db/write_controller.h" #include "db/write_thread.h" #include "logging/event_logger.h" #include "options/cf_options.h" #include "options/db_options.h" #include "port/port.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/compaction_job_stats.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/transaction_log.h" #include "table/scoped_arena_iterator.h" #include "util/autovector.h" #include "util/stop_watch.h" #include "util/thread_local.h" namespace ROCKSDB_NAMESPACE { class Arena; class ErrorHandler; class MemTable; class SnapshotChecker; class TableCache; class Version; class VersionEdit; class VersionSet; // CompactionJob is responsible for executing the compaction. Each (manual or // automated) compaction corresponds to a CompactionJob object, and usually // goes through the stages of `Prepare()`->`Run()`->`Install()`. CompactionJob // will divide the compaction into subcompactions and execute them in parallel // if needed. class CompactionJob { public: CompactionJob(int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, const FileOptions& file_options, VersionSet* versions, const std::atomic* shutting_down, const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer, FSDirectory* db_directory, FSDirectory* output_directory, Statistics* stats, InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler, std::vector existing_snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, std::shared_ptr table_cache, EventLogger* event_logger, bool paranoid_file_checks, bool measure_io_stats, const std::string& dbname, CompactionJobStats* compaction_job_stats, Env::Priority thread_pri, const std::atomic* manual_compaction_paused = nullptr); ~CompactionJob(); // no copy/move CompactionJob(CompactionJob&& job) = delete; CompactionJob(const CompactionJob& job) = delete; CompactionJob& operator=(const CompactionJob& job) = delete; // REQUIRED: mutex held // Prepare for the compaction by setting up boundaries for each subcompaction void Prepare(); // REQUIRED mutex not held // Launch threads for each subcompaction and wait for them to finish. After // that, verify table is usable and finally do bookkeeping to unify // subcompaction results Status Run(); // REQUIRED: mutex held // Add compaction input/output to the current version Status Install(const MutableCFOptions& mutable_cf_options); // Return the IO status IOStatus io_status() const { return io_status_; } private: struct SubcompactionState; void AggregateStatistics(); // Generates a histogram representing potential divisions of key ranges from // the input. It adds the starting and/or ending keys of certain input files // to the working set and then finds the approximate size of data in between // each consecutive pair of slices. Then it divides these ranges into // consecutive groups such that each group has a similar size. void GenSubcompactionBoundaries(); // update the thread status for starting a compaction. void ReportStartedCompaction(Compaction* compaction); void AllocateCompactionOutputFileNumbers(); // Call compaction filter. Then iterate through input and compact the // kv-pairs void ProcessKeyValueCompaction(SubcompactionState* sub_compact); Status FinishCompactionOutputFile( const Status& input_status, SubcompactionState* sub_compact, CompactionRangeDelAggregator* range_del_agg, CompactionIterationStats* range_del_out_stats, const Slice* next_table_min_key = nullptr); Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options); void RecordCompactionIOStats(); Status OpenCompactionOutputFile(SubcompactionState* sub_compact); void CleanupCompaction(); void UpdateCompactionJobStats( const InternalStats::CompactionStats& stats) const; void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats, CompactionJobStats* compaction_job_stats = nullptr); void UpdateCompactionStats(); void UpdateCompactionInputStatsHelper( int* num_files, uint64_t* bytes_read, int input_level); void LogCompaction(); int job_id_; // CompactionJob state struct CompactionState; CompactionState* compact_; CompactionJobStats* compaction_job_stats_; InternalStats::CompactionStats compaction_stats_; // DBImpl state const std::string& dbname_; const ImmutableDBOptions& db_options_; const FileOptions file_options_; Env* env_; FileSystem* fs_; // env_option optimized for compaction table reads FileOptions file_options_for_read_; VersionSet* versions_; const std::atomic* shutting_down_; const std::atomic* manual_compaction_paused_; const SequenceNumber preserve_deletes_seqnum_; LogBuffer* log_buffer_; FSDirectory* db_directory_; FSDirectory* output_directory_; Statistics* stats_; InstrumentedMutex* db_mutex_; ErrorHandler* db_error_handler_; // If there were two snapshots with seq numbers s1 and // s2 and s1 < s2, and if we find two instances of a key k1 then lies // entirely within s1 and s2, then the earlier version of k1 can be safely // deleted because that version is not visible in any snapshot. std::vector existing_snapshots_; // This is the earliest snapshot that could be used for write-conflict // checking by a transaction. For any user-key newer than this snapshot, we // should make sure not to remove evidence that a write occurred. SequenceNumber earliest_write_conflict_snapshot_; const SnapshotChecker* const snapshot_checker_; std::shared_ptr table_cache_; EventLogger* event_logger_; // Is this compaction creating a file in the bottom most level? bool bottommost_level_; bool paranoid_file_checks_; bool measure_io_stats_; // Stores the Slices that designate the boundaries for each subcompaction std::vector boundaries_; // Stores the approx size of keys covered in the range of each subcompaction std::vector sizes_; Env::WriteLifeTimeHint write_hint_; Env::Priority thread_pri_; IOStatus io_status_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_job_stats_test.cc000066400000000000000000001072071370372246700235470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/job_context.h" #include "db/version_set.h" #include "db/write_batch_internal.h" #include "env/mock_env.h" #include "file/filename.h" #include "logging/logging.h" #include "memtable/hash_linklist_rep.h" #include "monitoring/statistics.h" #include "monitoring/thread_status_util.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/experimental.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "rocksdb/thread_status.h" #include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "table/block_based/block_based_table_factory.h" #include "table/mock_table.h" #include "table/plain/plain_table_factory.h" #include "table/scoped_arena_iterator.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/compression.h" #include "util/hash.h" #include "util/mutexlock.h" #include "util/rate_limiter.h" #include "util/string_util.h" #include "utilities/merge_operators.h" #if !defined(IOS_CROSS_COMPILE) #ifndef ROCKSDB_LITE namespace ROCKSDB_NAMESPACE { static std::string RandomString(Random* rnd, int len, double ratio) { std::string r; test::CompressibleString(rnd, ratio, len, &r); return r; } std::string Key(uint64_t key, int length) { const int kBufSize = 1000; char buf[kBufSize]; if (length > kBufSize) { length = kBufSize; } snprintf(buf, kBufSize, "%0*" PRIu64, length, key); return std::string(buf); } class CompactionJobStatsTest : public testing::Test, public testing::WithParamInterface { public: std::string dbname_; std::string alternative_wal_dir_; Env* env_; DB* db_; std::vector handles_; uint32_t max_subcompactions_; Options last_options_; CompactionJobStatsTest() : env_(Env::Default()) { env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); dbname_ = test::PerThreadDBPath("compaction_job_stats_test"); alternative_wal_dir_ = dbname_ + "/wal"; Options options; options.create_if_missing = true; max_subcompactions_ = GetParam(); options.max_subcompactions = max_subcompactions_; auto delete_options = options; delete_options.wal_dir = alternative_wal_dir_; EXPECT_OK(DestroyDB(dbname_, delete_options)); // Destroy it for not alternative WAL dir is used. EXPECT_OK(DestroyDB(dbname_, options)); db_ = nullptr; Reopen(options); } ~CompactionJobStatsTest() override { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); Close(); Options options; options.db_paths.emplace_back(dbname_, 0); options.db_paths.emplace_back(dbname_ + "_2", 0); options.db_paths.emplace_back(dbname_ + "_3", 0); options.db_paths.emplace_back(dbname_ + "_4", 0); EXPECT_OK(DestroyDB(dbname_, options)); } // Required if inheriting from testing::WithParamInterface<> static void SetUpTestCase() {} static void TearDownTestCase() {} DBImpl* dbfull() { return reinterpret_cast(db_); } void CreateColumnFamilies(const std::vector& cfs, const Options& options) { ColumnFamilyOptions cf_opts(options); size_t cfi = handles_.size(); handles_.resize(cfi + cfs.size()); for (auto cf : cfs) { ASSERT_OK(db_->CreateColumnFamily(cf_opts, cf, &handles_[cfi++])); } } void CreateAndReopenWithCF(const std::vector& cfs, const Options& options) { CreateColumnFamilies(cfs, options); std::vector cfs_plus_default = cfs; cfs_plus_default.insert(cfs_plus_default.begin(), kDefaultColumnFamilyName); ReopenWithColumnFamilies(cfs_plus_default, options); } void ReopenWithColumnFamilies(const std::vector& cfs, const std::vector& options) { ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); } void ReopenWithColumnFamilies(const std::vector& cfs, const Options& options) { ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); } Status TryReopenWithColumnFamilies( const std::vector& cfs, const std::vector& options) { Close(); EXPECT_EQ(cfs.size(), options.size()); std::vector column_families; for (size_t i = 0; i < cfs.size(); ++i) { column_families.push_back(ColumnFamilyDescriptor(cfs[i], options[i])); } DBOptions db_opts = DBOptions(options[0]); return DB::Open(db_opts, dbname_, column_families, &handles_, &db_); } Status TryReopenWithColumnFamilies(const std::vector& cfs, const Options& options) { Close(); std::vector v_opts(cfs.size(), options); return TryReopenWithColumnFamilies(cfs, v_opts); } void Reopen(const Options& options) { ASSERT_OK(TryReopen(options)); } void Close() { for (auto h : handles_) { delete h; } handles_.clear(); delete db_; db_ = nullptr; } void DestroyAndReopen(const Options& options) { // Destroy using last options Destroy(last_options_); ASSERT_OK(TryReopen(options)); } void Destroy(const Options& options) { Close(); ASSERT_OK(DestroyDB(dbname_, options)); } Status ReadOnlyReopen(const Options& options) { return DB::OpenForReadOnly(options, dbname_, &db_); } Status TryReopen(const Options& options) { Close(); last_options_ = options; return DB::Open(options, dbname_, &db_); } Status Flush(int cf = 0) { if (cf == 0) { return db_->Flush(FlushOptions()); } else { return db_->Flush(FlushOptions(), handles_[cf]); } } Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()) { return db_->Put(wo, k, v); } Status Put(int cf, const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()) { return db_->Put(wo, handles_[cf], k, v); } Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); } Status Delete(int cf, const std::string& k) { return db_->Delete(WriteOptions(), handles_[cf], k); } std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) { ReadOptions options; options.verify_checksums = true; options.snapshot = snapshot; std::string result; Status s = db_->Get(options, k, &result); if (s.IsNotFound()) { result = "NOT_FOUND"; } else if (!s.ok()) { result = s.ToString(); } return result; } std::string Get(int cf, const std::string& k, const Snapshot* snapshot = nullptr) { ReadOptions options; options.verify_checksums = true; options.snapshot = snapshot; std::string result; Status s = db_->Get(options, handles_[cf], k, &result); if (s.IsNotFound()) { result = "NOT_FOUND"; } else if (!s.ok()) { result = s.ToString(); } return result; } int NumTableFilesAtLevel(int level, int cf = 0) { std::string property; if (cf == 0) { // default cfd EXPECT_TRUE(db_->GetProperty( "rocksdb.num-files-at-level" + NumberToString(level), &property)); } else { EXPECT_TRUE(db_->GetProperty( handles_[cf], "rocksdb.num-files-at-level" + NumberToString(level), &property)); } return atoi(property.c_str()); } // Return spread of files per level std::string FilesPerLevel(int cf = 0) { int num_levels = (cf == 0) ? db_->NumberLevels() : db_->NumberLevels(handles_[1]); std::string result; size_t last_non_zero_offset = 0; for (int level = 0; level < num_levels; level++) { int f = NumTableFilesAtLevel(level, cf); char buf[100]; snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); result += buf; if (f > 0) { last_non_zero_offset = result.size(); } } result.resize(last_non_zero_offset); return result; } uint64_t Size(const Slice& start, const Slice& limit, int cf = 0) { Range r(start, limit); uint64_t size; if (cf == 0) { db_->GetApproximateSizes(&r, 1, &size); } else { db_->GetApproximateSizes(handles_[1], &r, 1, &size); } return size; } void Compact(int cf, const Slice& start, const Slice& limit, uint32_t target_path_id) { CompactRangeOptions compact_options; compact_options.target_path_id = target_path_id; ASSERT_OK(db_->CompactRange(compact_options, handles_[cf], &start, &limit)); } void Compact(int cf, const Slice& start, const Slice& limit) { ASSERT_OK( db_->CompactRange(CompactRangeOptions(), handles_[cf], &start, &limit)); } void Compact(const Slice& start, const Slice& limit) { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &limit)); } void TEST_Compact(int level, int cf, const Slice& start, const Slice& limit) { ASSERT_OK(dbfull()->TEST_CompactRange(level, &start, &limit, handles_[cf], true /* disallow trivial move */)); } // Do n memtable compactions, each of which produces an sstable // covering the range [small,large]. void MakeTables(int n, const std::string& small, const std::string& large, int cf = 0) { for (int i = 0; i < n; i++) { ASSERT_OK(Put(cf, small, "begin")); ASSERT_OK(Put(cf, large, "end")); ASSERT_OK(Flush(cf)); } } static void SetDeletionCompactionStats( CompactionJobStats *stats, uint64_t input_deletions, uint64_t expired_deletions, uint64_t records_replaced) { stats->num_input_deletion_records = input_deletions; stats->num_expired_deletion_records = expired_deletions; stats->num_records_replaced = records_replaced; } void MakeTableWithKeyValues( Random* rnd, uint64_t smallest, uint64_t largest, int key_size, int value_size, uint64_t interval, double ratio, int cf = 0) { for (auto key = smallest; key < largest; key += interval) { ASSERT_OK(Put(cf, Slice(Key(key, key_size)), Slice(RandomString(rnd, value_size, ratio)))); } ASSERT_OK(Flush(cf)); } // This function behaves with the implicit understanding that two // rounds of keys are inserted into the database, as per the behavior // of the DeletionStatsTest. void SelectivelyDeleteKeys(uint64_t smallest, uint64_t largest, uint64_t interval, int deletion_interval, int key_size, uint64_t cutoff_key_num, CompactionJobStats* stats, int cf = 0) { // interval needs to be >= 2 so that deletion entries can be inserted // that are intended to not result in an actual key deletion by using // an offset of 1 from another existing key ASSERT_GE(interval, 2); uint64_t ctr = 1; uint32_t deletions_made = 0; uint32_t num_deleted = 0; uint32_t num_expired = 0; for (auto key = smallest; key <= largest; key += interval, ctr++) { if (ctr % deletion_interval == 0) { ASSERT_OK(Delete(cf, Key(key, key_size))); deletions_made++; num_deleted++; if (key > cutoff_key_num) { num_expired++; } } } // Insert some deletions for keys that don't exist that // are both in and out of the key range ASSERT_OK(Delete(cf, Key(smallest+1, key_size))); deletions_made++; ASSERT_OK(Delete(cf, Key(smallest-1, key_size))); deletions_made++; num_expired++; ASSERT_OK(Delete(cf, Key(smallest-9, key_size))); deletions_made++; num_expired++; ASSERT_OK(Flush(cf)); SetDeletionCompactionStats(stats, deletions_made, num_expired, num_deleted); } }; // An EventListener which helps verify the compaction results in // test CompactionJobStatsTest. class CompactionJobStatsChecker : public EventListener { public: CompactionJobStatsChecker() : compression_enabled_(false), verify_next_comp_io_stats_(false) {} size_t NumberOfUnverifiedStats() { return expected_stats_.size(); } void set_verify_next_comp_io_stats(bool v) { verify_next_comp_io_stats_ = v; } // Once a compaction completed, this function will verify the returned // CompactionJobInfo with the oldest CompactionJobInfo added earlier // in "expected_stats_" which has not yet being used for verification. void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { if (verify_next_comp_io_stats_) { ASSERT_GT(ci.stats.file_write_nanos, 0); ASSERT_GT(ci.stats.file_range_sync_nanos, 0); ASSERT_GT(ci.stats.file_fsync_nanos, 0); ASSERT_GT(ci.stats.file_prepare_write_nanos, 0); verify_next_comp_io_stats_ = false; } std::lock_guard lock(mutex_); if (expected_stats_.size()) { Verify(ci.stats, expected_stats_.front()); expected_stats_.pop(); } } // A helper function which verifies whether two CompactionJobStats // match. The verification of all compaction stats are done by // ASSERT_EQ except for the total input / output bytes, which we // use ASSERT_GE and ASSERT_LE with a reasonable bias --- // 10% in uncompressed case and 20% when compression is used. virtual void Verify(const CompactionJobStats& current_stats, const CompactionJobStats& stats) { // time ASSERT_GT(current_stats.elapsed_micros, 0U); ASSERT_EQ(current_stats.num_input_records, stats.num_input_records); ASSERT_EQ(current_stats.num_input_files, stats.num_input_files); ASSERT_EQ(current_stats.num_input_files_at_output_level, stats.num_input_files_at_output_level); ASSERT_EQ(current_stats.num_output_records, stats.num_output_records); ASSERT_EQ(current_stats.num_output_files, stats.num_output_files); ASSERT_EQ(current_stats.is_manual_compaction, stats.is_manual_compaction); // file size double kFileSizeBias = compression_enabled_ ? 0.20 : 0.10; ASSERT_GE(current_stats.total_input_bytes * (1.00 + kFileSizeBias), stats.total_input_bytes); ASSERT_LE(current_stats.total_input_bytes, stats.total_input_bytes * (1.00 + kFileSizeBias)); ASSERT_GE(current_stats.total_output_bytes * (1.00 + kFileSizeBias), stats.total_output_bytes); ASSERT_LE(current_stats.total_output_bytes, stats.total_output_bytes * (1.00 + kFileSizeBias)); ASSERT_EQ(current_stats.total_input_raw_key_bytes, stats.total_input_raw_key_bytes); ASSERT_EQ(current_stats.total_input_raw_value_bytes, stats.total_input_raw_value_bytes); ASSERT_EQ(current_stats.num_records_replaced, stats.num_records_replaced); ASSERT_EQ(current_stats.num_corrupt_keys, stats.num_corrupt_keys); ASSERT_EQ( std::string(current_stats.smallest_output_key_prefix), std::string(stats.smallest_output_key_prefix)); ASSERT_EQ( std::string(current_stats.largest_output_key_prefix), std::string(stats.largest_output_key_prefix)); } // Add an expected compaction stats, which will be used to // verify the CompactionJobStats returned by the OnCompactionCompleted() // callback. void AddExpectedStats(const CompactionJobStats& stats) { std::lock_guard lock(mutex_); expected_stats_.push(stats); } void EnableCompression(bool flag) { compression_enabled_ = flag; } bool verify_next_comp_io_stats() const { return verify_next_comp_io_stats_; } private: std::mutex mutex_; std::queue expected_stats_; bool compression_enabled_; bool verify_next_comp_io_stats_; }; // An EventListener which helps verify the compaction statistics in // the test DeletionStatsTest. class CompactionJobDeletionStatsChecker : public CompactionJobStatsChecker { public: // Verifies whether two CompactionJobStats match. void Verify(const CompactionJobStats& current_stats, const CompactionJobStats& stats) override { ASSERT_EQ( current_stats.num_input_deletion_records, stats.num_input_deletion_records); ASSERT_EQ( current_stats.num_expired_deletion_records, stats.num_expired_deletion_records); ASSERT_EQ( current_stats.num_records_replaced, stats.num_records_replaced); ASSERT_EQ(current_stats.num_corrupt_keys, stats.num_corrupt_keys); } }; namespace { uint64_t EstimatedFileSize( uint64_t num_records, size_t key_size, size_t value_size, double compression_ratio = 1.0, size_t block_size = 4096, int bloom_bits_per_key = 10) { const size_t kPerKeyOverhead = 8; const size_t kFooterSize = 512; uint64_t data_size = static_cast( num_records * (key_size + value_size * compression_ratio + kPerKeyOverhead)); return data_size + kFooterSize + num_records * bloom_bits_per_key / 8 // filter block + data_size * (key_size + 8) / block_size; // index block } namespace { void CopyPrefix( const Slice& src, size_t prefix_length, std::string* dst) { assert(prefix_length > 0); size_t length = src.size() > prefix_length ? prefix_length : src.size(); dst->assign(src.data(), length); } } // namespace CompactionJobStats NewManualCompactionJobStats( const std::string& smallest_key, const std::string& largest_key, size_t num_input_files, size_t num_input_files_at_output_level, uint64_t num_input_records, size_t key_size, size_t value_size, size_t num_output_files, uint64_t num_output_records, double compression_ratio, uint64_t num_records_replaced, bool is_manual = true) { CompactionJobStats stats; stats.Reset(); stats.num_input_records = num_input_records; stats.num_input_files = num_input_files; stats.num_input_files_at_output_level = num_input_files_at_output_level; stats.num_output_records = num_output_records; stats.num_output_files = num_output_files; stats.total_input_bytes = EstimatedFileSize( num_input_records / num_input_files, key_size, value_size, compression_ratio) * num_input_files; stats.total_output_bytes = EstimatedFileSize( num_output_records / num_output_files, key_size, value_size, compression_ratio) * num_output_files; stats.total_input_raw_key_bytes = num_input_records * (key_size + 8); stats.total_input_raw_value_bytes = num_input_records * value_size; stats.is_manual_compaction = is_manual; stats.num_records_replaced = num_records_replaced; CopyPrefix(smallest_key, CompactionJobStats::kMaxPrefixLength, &stats.smallest_output_key_prefix); CopyPrefix(largest_key, CompactionJobStats::kMaxPrefixLength, &stats.largest_output_key_prefix); return stats; } CompressionType GetAnyCompression() { if (Snappy_Supported()) { return kSnappyCompression; } else if (Zlib_Supported()) { return kZlibCompression; } else if (BZip2_Supported()) { return kBZip2Compression; } else if (LZ4_Supported()) { return kLZ4Compression; } else if (XPRESS_Supported()) { return kXpressCompression; } return kNoCompression; } } // namespace TEST_P(CompactionJobStatsTest, CompactionJobStatsTest) { Random rnd(301); const int kBufSize = 100; char buf[kBufSize]; uint64_t key_base = 100000000l; // Note: key_base must be multiple of num_keys_per_L0_file int num_keys_per_L0_file = 100; const int kTestScale = 8; const int kKeySize = 10; const int kValueSize = 1000; const double kCompressionRatio = 0.5; double compression_ratio = 1.0; uint64_t key_interval = key_base / num_keys_per_L0_file; // Whenever a compaction completes, this listener will try to // verify whether the returned CompactionJobStats matches // what we expect. The expected CompactionJobStats is added // via AddExpectedStats(). auto* stats_checker = new CompactionJobStatsChecker(); Options options; options.listeners.emplace_back(stats_checker); options.create_if_missing = true; // just enough setting to hold off auto-compaction. options.level0_file_num_compaction_trigger = kTestScale + 1; options.num_levels = 3; options.compression = kNoCompression; options.max_subcompactions = max_subcompactions_; options.bytes_per_sync = 512 * 1024; options.report_bg_io_stats = true; for (int test = 0; test < 2; ++test) { DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // 1st Phase: generate "num_L0_files" L0 files. int num_L0_files = 0; for (uint64_t start_key = key_base; start_key <= key_base * kTestScale; start_key += key_base) { MakeTableWithKeyValues( &rnd, start_key, start_key + key_base - 1, kKeySize, kValueSize, key_interval, compression_ratio, 1); snprintf(buf, kBufSize, "%d", ++num_L0_files); ASSERT_EQ(std::string(buf), FilesPerLevel(1)); } ASSERT_EQ(ToString(num_L0_files), FilesPerLevel(1)); // 2nd Phase: perform L0 -> L1 compaction. int L0_compaction_count = 6; int count = 1; std::string smallest_key; std::string largest_key; for (uint64_t start_key = key_base; start_key <= key_base * L0_compaction_count; start_key += key_base, count++) { smallest_key = Key(start_key, 10); largest_key = Key(start_key + key_base - key_interval, 10); stats_checker->AddExpectedStats( NewManualCompactionJobStats( smallest_key, largest_key, 1, 0, num_keys_per_L0_file, kKeySize, kValueSize, 1, num_keys_per_L0_file, compression_ratio, 0)); ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U); TEST_Compact(0, 1, smallest_key, largest_key); snprintf(buf, kBufSize, "%d,%d", num_L0_files - count, count); ASSERT_EQ(std::string(buf), FilesPerLevel(1)); } // compact two files into one in the last L0 -> L1 compaction int num_remaining_L0 = num_L0_files - L0_compaction_count; smallest_key = Key(key_base * (L0_compaction_count + 1), 10); largest_key = Key(key_base * (kTestScale + 1) - key_interval, 10); stats_checker->AddExpectedStats( NewManualCompactionJobStats( smallest_key, largest_key, num_remaining_L0, 0, num_keys_per_L0_file * num_remaining_L0, kKeySize, kValueSize, 1, num_keys_per_L0_file * num_remaining_L0, compression_ratio, 0)); ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U); TEST_Compact(0, 1, smallest_key, largest_key); int num_L1_files = num_L0_files - num_remaining_L0 + 1; num_L0_files = 0; snprintf(buf, kBufSize, "%d,%d", num_L0_files, num_L1_files); ASSERT_EQ(std::string(buf), FilesPerLevel(1)); // 3rd Phase: generate sparse L0 files (wider key-range, same num of keys) int sparseness = 2; for (uint64_t start_key = key_base; start_key <= key_base * kTestScale; start_key += key_base * sparseness) { MakeTableWithKeyValues( &rnd, start_key, start_key + key_base * sparseness - 1, kKeySize, kValueSize, key_base * sparseness / num_keys_per_L0_file, compression_ratio, 1); snprintf(buf, kBufSize, "%d,%d", ++num_L0_files, num_L1_files); ASSERT_EQ(std::string(buf), FilesPerLevel(1)); } // 4th Phase: perform L0 -> L1 compaction again, expect higher write amp // When subcompactions are enabled, the number of output files increases // by 1 because multiple threads are consuming the input and generating // output files without coordinating to see if the output could fit into // a smaller number of files like it does when it runs sequentially int num_output_files = options.max_subcompactions > 1 ? 2 : 1; for (uint64_t start_key = key_base; num_L0_files > 1; start_key += key_base * sparseness) { smallest_key = Key(start_key, 10); largest_key = Key(start_key + key_base * sparseness - key_interval, 10); stats_checker->AddExpectedStats( NewManualCompactionJobStats( smallest_key, largest_key, 3, 2, num_keys_per_L0_file * 3, kKeySize, kValueSize, num_output_files, num_keys_per_L0_file * 2, // 1/3 of the data will be updated. compression_ratio, num_keys_per_L0_file)); ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U); Compact(1, smallest_key, largest_key); if (options.max_subcompactions == 1) { --num_L1_files; } snprintf(buf, kBufSize, "%d,%d", --num_L0_files, num_L1_files); ASSERT_EQ(std::string(buf), FilesPerLevel(1)); } // 5th Phase: Do a full compaction, which involves in two sub-compactions. // Here we expect to have 1 L0 files and 4 L1 files // In the first sub-compaction, we expect L0 compaction. smallest_key = Key(key_base, 10); largest_key = Key(key_base * (kTestScale + 1) - key_interval, 10); stats_checker->AddExpectedStats( NewManualCompactionJobStats( Key(key_base * (kTestScale + 1 - sparseness), 10), largest_key, 2, 1, num_keys_per_L0_file * 3, kKeySize, kValueSize, 1, num_keys_per_L0_file * 2, compression_ratio, num_keys_per_L0_file)); ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U); Compact(1, smallest_key, largest_key); num_L1_files = options.max_subcompactions > 1 ? 7 : 4; char L1_buf[4]; snprintf(L1_buf, sizeof(L1_buf), "0,%d", num_L1_files); std::string L1_files(L1_buf); ASSERT_EQ(L1_files, FilesPerLevel(1)); options.compression = GetAnyCompression(); if (options.compression == kNoCompression) { break; } stats_checker->EnableCompression(true); compression_ratio = kCompressionRatio; for (int i = 0; i < 5; i++) { ASSERT_OK(Put(1, Slice(Key(key_base + i, 10)), Slice(RandomString(&rnd, 512 * 1024, 1)))); } ASSERT_OK(Flush(1)); reinterpret_cast(db_)->TEST_WaitForCompact(); stats_checker->set_verify_next_comp_io_stats(true); std::atomic first_prepare_write(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::Append:BeforePrepareWrite", [&](void* /*arg*/) { if (first_prepare_write.load()) { options.env->SleepForMicroseconds(3); first_prepare_write.store(false); } }); std::atomic first_flush(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::Flush:BeforeAppend", [&](void* /*arg*/) { if (first_flush.load()) { options.env->SleepForMicroseconds(3); first_flush.store(false); } }); std::atomic first_sync(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::SyncInternal:0", [&](void* /*arg*/) { if (first_sync.load()) { options.env->SleepForMicroseconds(3); first_sync.store(false); } }); std::atomic first_range_sync(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::RangeSync:0", [&](void* /*arg*/) { if (first_range_sync.load()) { options.env->SleepForMicroseconds(3); first_range_sync.store(false); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Compact(1, smallest_key, largest_key); ASSERT_TRUE(!stats_checker->verify_next_comp_io_stats()); ASSERT_TRUE(!first_prepare_write.load()); ASSERT_TRUE(!first_flush.load()); ASSERT_TRUE(!first_sync.load()); ASSERT_TRUE(!first_range_sync.load()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 0U); } TEST_P(CompactionJobStatsTest, DeletionStatsTest) { Random rnd(301); uint64_t key_base = 100000l; // Note: key_base must be multiple of num_keys_per_L0_file int num_keys_per_L0_file = 20; const int kTestScale = 8; // make sure this is even const int kKeySize = 10; const int kValueSize = 100; double compression_ratio = 1.0; uint64_t key_interval = key_base / num_keys_per_L0_file; uint64_t largest_key_num = key_base * (kTestScale + 1) - key_interval; uint64_t cutoff_key_num = key_base * (kTestScale / 2 + 1) - key_interval; const std::string smallest_key = Key(key_base - 10, kKeySize); const std::string largest_key = Key(largest_key_num + 10, kKeySize); // Whenever a compaction completes, this listener will try to // verify whether the returned CompactionJobStats matches // what we expect. auto* stats_checker = new CompactionJobDeletionStatsChecker(); Options options; options.listeners.emplace_back(stats_checker); options.create_if_missing = true; options.level0_file_num_compaction_trigger = kTestScale+1; options.num_levels = 3; options.compression = kNoCompression; options.max_bytes_for_level_multiplier = 2; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Stage 1: Generate several L0 files and then send them to L2 by // using CompactRangeOptions and CompactRange(). These files will // have a strict subset of the keys from the full key-range for (uint64_t start_key = key_base; start_key <= key_base * kTestScale / 2; start_key += key_base) { MakeTableWithKeyValues( &rnd, start_key, start_key + key_base - 1, kKeySize, kValueSize, key_interval, compression_ratio, 1); } CompactRangeOptions cr_options; cr_options.change_level = true; cr_options.target_level = 2; db_->CompactRange(cr_options, handles_[1], nullptr, nullptr); ASSERT_GT(NumTableFilesAtLevel(2, 1), 0); // Stage 2: Generate files including keys from the entire key range for (uint64_t start_key = key_base; start_key <= key_base * kTestScale; start_key += key_base) { MakeTableWithKeyValues( &rnd, start_key, start_key + key_base - 1, kKeySize, kValueSize, key_interval, compression_ratio, 1); } // Send these L0 files to L1 TEST_Compact(0, 1, smallest_key, largest_key); ASSERT_GT(NumTableFilesAtLevel(1, 1), 0); // Add a new record and flush so now there is a L0 file // with a value too (not just deletions from the next step) ASSERT_OK(Put(1, Key(key_base-6, kKeySize), "test")); ASSERT_OK(Flush(1)); // Stage 3: Generate L0 files with some deletions so now // there are files with the same key range in L0, L1, and L2 int deletion_interval = 3; CompactionJobStats first_compaction_stats; SelectivelyDeleteKeys(key_base, largest_key_num, key_interval, deletion_interval, kKeySize, cutoff_key_num, &first_compaction_stats, 1); stats_checker->AddExpectedStats(first_compaction_stats); // Stage 4: Trigger compaction and verify the stats TEST_Compact(0, 1, smallest_key, largest_key); } namespace { int GetUniversalCompactionInputUnits(uint32_t num_flushes) { uint32_t compaction_input_units; for (compaction_input_units = 1; num_flushes >= compaction_input_units; compaction_input_units *= 2) { if ((num_flushes & compaction_input_units) != 0) { return compaction_input_units > 1 ? compaction_input_units : 0; } } return 0; } } // namespace TEST_P(CompactionJobStatsTest, UniversalCompactionTest) { Random rnd(301); uint64_t key_base = 100000000l; // Note: key_base must be multiple of num_keys_per_L0_file int num_keys_per_table = 100; const uint32_t kTestScale = 6; const int kKeySize = 10; const int kValueSize = 900; double compression_ratio = 1.0; uint64_t key_interval = key_base / num_keys_per_table; auto* stats_checker = new CompactionJobStatsChecker(); Options options; options.listeners.emplace_back(stats_checker); options.create_if_missing = true; options.num_levels = 3; options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 2; options.target_file_size_base = num_keys_per_table * 1000; options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.size_ratio = 1; options.compaction_options_universal.max_size_amplification_percent = 1000; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Generates the expected CompactionJobStats for each compaction for (uint32_t num_flushes = 2; num_flushes <= kTestScale; num_flushes++) { // Here we treat one newly flushed file as an unit. // // For example, if a newly flushed file is 100k, and a compaction has // 4 input units, then this compaction inputs 400k. uint32_t num_input_units = GetUniversalCompactionInputUnits(num_flushes); if (num_input_units == 0) { continue; } // The following statement determines the expected smallest key // based on whether it is a full compaction. A full compaction only // happens when the number of flushes equals to the number of compaction // input runs. uint64_t smallest_key = (num_flushes == num_input_units) ? key_base : key_base * (num_flushes - 1); stats_checker->AddExpectedStats( NewManualCompactionJobStats( Key(smallest_key, 10), Key(smallest_key + key_base * num_input_units - key_interval, 10), num_input_units, num_input_units > 2 ? num_input_units / 2 : 0, num_keys_per_table * num_input_units, kKeySize, kValueSize, num_input_units, num_keys_per_table * num_input_units, 1.0, 0, false)); dbfull()->TEST_WaitForCompact(); } ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 3U); for (uint64_t start_key = key_base; start_key <= key_base * kTestScale; start_key += key_base) { MakeTableWithKeyValues( &rnd, start_key, start_key + key_base - 1, kKeySize, kValueSize, key_interval, compression_ratio, 1); reinterpret_cast(db_)->TEST_WaitForCompact(); } ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 0U); } INSTANTIATE_TEST_CASE_P(CompactionJobStatsTest, CompactionJobStatsTest, ::testing::Values(1, 4)); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED, not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE #else int main(int /*argc*/, char** /*argv*/) { return 0; } #endif // !defined(IOS_CROSS_COMPILE) rocksdb-6.11.4/db/compaction/compaction_job_test.cc000066400000000000000000001211771370372246700223330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include #include #include #include #include #include "db/blob/blob_index.h" #include "db/column_family.h" #include "db/compaction/compaction_job.h" #include "db/db_impl/db_impl.h" #include "db/error_handler.h" #include "db/version_set.h" #include "file/writable_file_writer.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/write_buffer_manager.h" #include "table/mock_table.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { namespace { void VerifyInitializationOfCompactionJobStats( const CompactionJobStats& compaction_job_stats) { #if !defined(IOS_CROSS_COMPILE) ASSERT_EQ(compaction_job_stats.elapsed_micros, 0U); ASSERT_EQ(compaction_job_stats.num_input_records, 0U); ASSERT_EQ(compaction_job_stats.num_input_files, 0U); ASSERT_EQ(compaction_job_stats.num_input_files_at_output_level, 0U); ASSERT_EQ(compaction_job_stats.num_output_records, 0U); ASSERT_EQ(compaction_job_stats.num_output_files, 0U); ASSERT_EQ(compaction_job_stats.is_manual_compaction, true); ASSERT_EQ(compaction_job_stats.total_input_bytes, 0U); ASSERT_EQ(compaction_job_stats.total_output_bytes, 0U); ASSERT_EQ(compaction_job_stats.total_input_raw_key_bytes, 0U); ASSERT_EQ(compaction_job_stats.total_input_raw_value_bytes, 0U); ASSERT_EQ(compaction_job_stats.smallest_output_key_prefix[0], 0); ASSERT_EQ(compaction_job_stats.largest_output_key_prefix[0], 0); ASSERT_EQ(compaction_job_stats.num_records_replaced, 0U); ASSERT_EQ(compaction_job_stats.num_input_deletion_records, 0U); ASSERT_EQ(compaction_job_stats.num_expired_deletion_records, 0U); ASSERT_EQ(compaction_job_stats.num_corrupt_keys, 0U); #endif // !defined(IOS_CROSS_COMPILE) } } // namespace // TODO(icanadi) Make it simpler once we mock out VersionSet class CompactionJobTest : public testing::Test { public: CompactionJobTest() : env_(Env::Default()), fs_(std::make_shared(env_)), dbname_(test::PerThreadDBPath("compaction_job_test")), db_options_(), mutable_cf_options_(cf_options_), table_cache_(NewLRUCache(50000, 16)), write_buffer_manager_(db_options_.db_write_buffer_size), versions_(new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr)), shutting_down_(false), preserve_deletes_seqnum_(0), mock_table_factory_(new mock::MockTableFactory()), error_handler_(nullptr, db_options_, &mutex_) { EXPECT_OK(env_->CreateDirIfMissing(dbname_)); db_options_.env = env_; db_options_.fs = fs_; db_options_.db_paths.emplace_back(dbname_, std::numeric_limits::max()); } std::string GenerateFileName(uint64_t file_number) { FileMetaData meta; std::vector db_paths; db_paths.emplace_back(dbname_, std::numeric_limits::max()); meta.fd = FileDescriptor(file_number, 0, 0); return TableFileName(db_paths, meta.fd.GetNumber(), meta.fd.GetPathId()); } static std::string KeyStr(const std::string& user_key, const SequenceNumber seq_num, const ValueType t) { return InternalKey(user_key, seq_num, t).Encode().ToString(); } static std::string BlobStr(uint64_t blob_file_number, uint64_t offset, uint64_t size) { std::string blob_index; BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size, kNoCompression); return blob_index; } static std::string BlobStrTTL(uint64_t blob_file_number, uint64_t offset, uint64_t size, uint64_t expiration) { std::string blob_index; BlobIndex::EncodeBlobTTL(&blob_index, expiration, blob_file_number, offset, size, kNoCompression); return blob_index; } static std::string BlobStrInlinedTTL(const Slice& value, uint64_t expiration) { std::string blob_index; BlobIndex::EncodeInlinedTTL(&blob_index, expiration, value); return blob_index; } void AddMockFile(const stl_wrappers::KVMap& contents, int level = 0) { assert(contents.size() > 0); bool first_key = true; std::string smallest, largest; InternalKey smallest_key, largest_key; SequenceNumber smallest_seqno = kMaxSequenceNumber; SequenceNumber largest_seqno = 0; uint64_t oldest_blob_file_number = kInvalidBlobFileNumber; for (auto kv : contents) { ParsedInternalKey key; std::string skey; std::string value; std::tie(skey, value) = kv; bool parsed = ParseInternalKey(skey, &key); smallest_seqno = std::min(smallest_seqno, key.sequence); largest_seqno = std::max(largest_seqno, key.sequence); if (first_key || cfd_->user_comparator()->Compare(key.user_key, smallest) < 0) { smallest.assign(key.user_key.data(), key.user_key.size()); smallest_key.DecodeFrom(skey); } if (first_key || cfd_->user_comparator()->Compare(key.user_key, largest) > 0) { largest.assign(key.user_key.data(), key.user_key.size()); largest_key.DecodeFrom(skey); } first_key = false; if (parsed && key.type == kTypeBlobIndex) { BlobIndex blob_index; const Status s = blob_index.DecodeFrom(value); if (!s.ok()) { continue; } if (blob_index.IsInlined() || blob_index.HasTTL() || blob_index.file_number() == kInvalidBlobFileNumber) { continue; } if (oldest_blob_file_number == kInvalidBlobFileNumber || oldest_blob_file_number > blob_index.file_number()) { oldest_blob_file_number = blob_index.file_number(); } } } uint64_t file_number = versions_->NewFileNumber(); EXPECT_OK(mock_table_factory_->CreateMockTable( env_, GenerateFileName(file_number), std::move(contents))); VersionEdit edit; edit.AddFile(level, file_number, 0, 10, smallest_key, largest_key, smallest_seqno, largest_seqno, false, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); mutex_.Lock(); versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, &edit, &mutex_); mutex_.Unlock(); } void SetLastSequence(const SequenceNumber sequence_number) { versions_->SetLastAllocatedSequence(sequence_number + 1); versions_->SetLastPublishedSequence(sequence_number + 1); versions_->SetLastSequence(sequence_number + 1); } // returns expected result after compaction stl_wrappers::KVMap CreateTwoFiles(bool gen_corrupted_keys) { auto expected_results = mock::MakeMockFile(); const int kKeysPerFile = 10000; const int kCorruptKeysPerFile = 200; const int kMatchingKeys = kKeysPerFile / 2; SequenceNumber sequence_number = 0; auto corrupt_id = [&](int id) { return gen_corrupted_keys && id > 0 && id <= kCorruptKeysPerFile; }; for (int i = 0; i < 2; ++i) { auto contents = mock::MakeMockFile(); for (int k = 0; k < kKeysPerFile; ++k) { auto key = ToString(i * kMatchingKeys + k); auto value = ToString(i * kKeysPerFile + k); InternalKey internal_key(key, ++sequence_number, kTypeValue); // This is how the key will look like once it's written in bottommost // file InternalKey bottommost_internal_key( key, 0, kTypeValue); if (corrupt_id(k)) { test::CorruptKeyType(&internal_key); test::CorruptKeyType(&bottommost_internal_key); } contents.insert({ internal_key.Encode().ToString(), value }); if (i == 1 || k < kMatchingKeys || corrupt_id(k - kMatchingKeys)) { expected_results.insert( { bottommost_internal_key.Encode().ToString(), value }); } } AddMockFile(contents); } SetLastSequence(sequence_number); return expected_results; } void NewDB() { DestroyDB(dbname_, Options()); EXPECT_OK(env_->CreateDirIfMissing(dbname_)); versions_.reset(new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr)); compaction_job_stats_.Reset(); SetIdentityFile(env_, dbname_); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { DBImpl* impl = new DBImpl(DBOptions(), dbname_); std::string db_id; impl->GetDbIdentityFromIdentityFile(&db_id); new_db.SetDBId(db_id); } new_db.SetLogNumber(0); new_db.SetNextFile(2); new_db.SetLastSequence(0); const std::string manifest = DescriptorFileName(dbname_, 1); std::unique_ptr file; Status s = env_->NewWritableFile( manifest, &file, env_->OptimizeForManifestWrite(env_options_)); ASSERT_OK(s); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), manifest, env_options_)); { log::Writer log(std::move(file_writer), 0, false); std::string record; new_db.EncodeTo(&record); s = log.AddRecord(record); } ASSERT_OK(s); // Make "CURRENT" file that points to the new manifest file. s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); std::vector column_families; cf_options_.table_factory = mock_table_factory_; cf_options_.merge_operator = merge_op_; cf_options_.compaction_filter = compaction_filter_.get(); column_families.emplace_back(kDefaultColumnFamilyName, cf_options_); EXPECT_OK(versions_->Recover(column_families, false)); cfd_ = versions_->GetColumnFamilySet()->GetDefault(); } void RunCompaction( const std::vector>& input_files, const stl_wrappers::KVMap& expected_results, const std::vector& snapshots = {}, SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber, int output_level = 1, bool verify = true, uint64_t expected_oldest_blob_file_number = kInvalidBlobFileNumber) { auto cfd = versions_->GetColumnFamilySet()->GetDefault(); size_t num_input_files = 0; std::vector compaction_input_files; for (size_t level = 0; level < input_files.size(); level++) { auto level_files = input_files[level]; CompactionInputFiles compaction_level; compaction_level.level = static_cast(level); compaction_level.files.insert(compaction_level.files.end(), level_files.begin(), level_files.end()); compaction_input_files.push_back(compaction_level); num_input_files += level_files.size(); } Compaction compaction( cfd->current()->storage_info(), *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(), compaction_input_files, output_level, 1024 * 1024, 10 * 1024 * 1024, 0, kNoCompression, cfd->GetLatestMutableCFOptions()->compression_opts, 0, {}, true); compaction.SetInputVersion(cfd->current()); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get()); mutex_.Lock(); EventLogger event_logger(db_options_.info_log.get()); // TODO(yiwu) add a mock snapshot checker and add test for it. SnapshotChecker* snapshot_checker = nullptr; CompactionJob compaction_job( 0, &compaction, db_options_, env_options_, versions_.get(), &shutting_down_, preserve_deletes_seqnum_, &log_buffer, nullptr, nullptr, nullptr, &mutex_, &error_handler_, snapshots, earliest_write_conflict_snapshot, snapshot_checker, table_cache_, &event_logger, false, false, dbname_, &compaction_job_stats_, Env::Priority::USER); VerifyInitializationOfCompactionJobStats(compaction_job_stats_); compaction_job.Prepare(); mutex_.Unlock(); Status s; s = compaction_job.Run(); ASSERT_OK(s); mutex_.Lock(); ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions())); mutex_.Unlock(); if (verify) { ASSERT_GE(compaction_job_stats_.elapsed_micros, 0U); ASSERT_EQ(compaction_job_stats_.num_input_files, num_input_files); if (expected_results.empty()) { ASSERT_EQ(compaction_job_stats_.num_output_files, 0U); } else { ASSERT_EQ(compaction_job_stats_.num_output_files, 1U); mock_table_factory_->AssertLatestFile(expected_results); auto output_files = cfd->current()->storage_info()->LevelFiles(output_level); ASSERT_EQ(output_files.size(), 1); ASSERT_EQ(output_files[0]->oldest_blob_file_number, expected_oldest_blob_file_number); } } } Env* env_; std::shared_ptr fs_; std::string dbname_; EnvOptions env_options_; ImmutableDBOptions db_options_; ColumnFamilyOptions cf_options_; MutableCFOptions mutable_cf_options_; std::shared_ptr table_cache_; WriteController write_controller_; WriteBufferManager write_buffer_manager_; std::unique_ptr versions_; InstrumentedMutex mutex_; std::atomic shutting_down_; SequenceNumber preserve_deletes_seqnum_; std::shared_ptr mock_table_factory_; CompactionJobStats compaction_job_stats_; ColumnFamilyData* cfd_; std::unique_ptr compaction_filter_; std::shared_ptr merge_op_; ErrorHandler error_handler_; }; TEST_F(CompactionJobTest, Simple) { NewDB(); auto expected_results = CreateTwoFiles(false); auto cfd = versions_->GetColumnFamilySet()->GetDefault(); auto files = cfd->current()->storage_info()->LevelFiles(0); ASSERT_EQ(2U, files.size()); RunCompaction({ files }, expected_results); } TEST_F(CompactionJobTest, DISABLED_SimpleCorrupted) { NewDB(); auto expected_results = CreateTwoFiles(true); auto cfd = versions_->GetColumnFamilySet()->GetDefault(); auto files = cfd->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); ASSERT_EQ(compaction_job_stats_.num_corrupt_keys, 400U); } TEST_F(CompactionJobTest, SimpleDeletion) { NewDB(); auto file1 = mock::MakeMockFile({{KeyStr("c", 4U, kTypeDeletion), ""}, {KeyStr("c", 3U, kTypeValue), "val"}}); AddMockFile(file1); auto file2 = mock::MakeMockFile({{KeyStr("b", 2U, kTypeValue), "val"}, {KeyStr("b", 1U, kTypeValue), "val"}}); AddMockFile(file2); auto expected_results = mock::MakeMockFile({{KeyStr("b", 0U, kTypeValue), "val"}}); SetLastSequence(4U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } TEST_F(CompactionJobTest, OutputNothing) { NewDB(); auto file1 = mock::MakeMockFile({{KeyStr("a", 1U, kTypeValue), "val"}}); AddMockFile(file1); auto file2 = mock::MakeMockFile({{KeyStr("a", 2U, kTypeDeletion), ""}}); AddMockFile(file2); auto expected_results = mock::MakeMockFile(); SetLastSequence(4U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } TEST_F(CompactionJobTest, SimpleOverwrite) { NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("a", 3U, kTypeValue), "val2"}, {KeyStr("b", 4U, kTypeValue), "val3"}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile({{KeyStr("a", 1U, kTypeValue), "val"}, {KeyStr("b", 2U, kTypeValue), "val"}}); AddMockFile(file2); auto expected_results = mock::MakeMockFile({{KeyStr("a", 0U, kTypeValue), "val2"}, {KeyStr("b", 0U, kTypeValue), "val3"}}); SetLastSequence(4U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } TEST_F(CompactionJobTest, SimpleNonLastLevel) { NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("a", 5U, kTypeValue), "val2"}, {KeyStr("b", 6U, kTypeValue), "val3"}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile({{KeyStr("a", 3U, kTypeValue), "val"}, {KeyStr("b", 4U, kTypeValue), "val"}}); AddMockFile(file2, 1); auto file3 = mock::MakeMockFile({{KeyStr("a", 1U, kTypeValue), "val"}, {KeyStr("b", 2U, kTypeValue), "val"}}); AddMockFile(file3, 2); // Because level 1 is not the last level, the sequence numbers of a and b // cannot be set to 0 auto expected_results = mock::MakeMockFile({{KeyStr("a", 5U, kTypeValue), "val2"}, {KeyStr("b", 6U, kTypeValue), "val3"}}); SetLastSequence(6U); auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0); auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1); RunCompaction({lvl0_files, lvl1_files}, expected_results); } TEST_F(CompactionJobTest, SimpleMerge) { merge_op_ = MergeOperators::CreateStringAppendOperator(); NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("a", 5U, kTypeMerge), "5"}, {KeyStr("a", 4U, kTypeMerge), "4"}, {KeyStr("a", 3U, kTypeValue), "3"}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile( {{KeyStr("b", 2U, kTypeMerge), "2"}, {KeyStr("b", 1U, kTypeValue), "1"}}); AddMockFile(file2); auto expected_results = mock::MakeMockFile({{KeyStr("a", 0U, kTypeValue), "3,4,5"}, {KeyStr("b", 0U, kTypeValue), "1,2"}}); SetLastSequence(5U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } TEST_F(CompactionJobTest, NonAssocMerge) { merge_op_ = MergeOperators::CreateStringAppendTESTOperator(); NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("a", 5U, kTypeMerge), "5"}, {KeyStr("a", 4U, kTypeMerge), "4"}, {KeyStr("a", 3U, kTypeMerge), "3"}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile( {{KeyStr("b", 2U, kTypeMerge), "2"}, {KeyStr("b", 1U, kTypeMerge), "1"}}); AddMockFile(file2); auto expected_results = mock::MakeMockFile({{KeyStr("a", 0U, kTypeValue), "3,4,5"}, {KeyStr("b", 0U, kTypeValue), "1,2"}}); SetLastSequence(5U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } // Filters merge operands with value 10. TEST_F(CompactionJobTest, MergeOperandFilter) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); compaction_filter_.reset(new test::FilterNumber(10U)); NewDB(); auto file1 = mock::MakeMockFile( {{KeyStr("a", 5U, kTypeMerge), test::EncodeInt(5U)}, {KeyStr("a", 4U, kTypeMerge), test::EncodeInt(10U)}, // Filtered {KeyStr("a", 3U, kTypeMerge), test::EncodeInt(3U)}}); AddMockFile(file1); auto file2 = mock::MakeMockFile({ {KeyStr("b", 2U, kTypeMerge), test::EncodeInt(2U)}, {KeyStr("b", 1U, kTypeMerge), test::EncodeInt(10U)} // Filtered }); AddMockFile(file2); auto expected_results = mock::MakeMockFile({{KeyStr("a", 0U, kTypeValue), test::EncodeInt(8U)}, {KeyStr("b", 0U, kTypeValue), test::EncodeInt(2U)}}); SetLastSequence(5U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } TEST_F(CompactionJobTest, FilterSomeMergeOperands) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); compaction_filter_.reset(new test::FilterNumber(10U)); NewDB(); auto file1 = mock::MakeMockFile( {{KeyStr("a", 5U, kTypeMerge), test::EncodeInt(5U)}, {KeyStr("a", 4U, kTypeMerge), test::EncodeInt(10U)}, // Filtered {KeyStr("a", 3U, kTypeValue), test::EncodeInt(5U)}, {KeyStr("d", 8U, kTypeMerge), test::EncodeInt(10U)}}); AddMockFile(file1); auto file2 = mock::MakeMockFile({{KeyStr("b", 2U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 1U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("c", 2U, kTypeMerge), test::EncodeInt(3U)}, {KeyStr("c", 1U, kTypeValue), test::EncodeInt(7U)}, {KeyStr("d", 1U, kTypeValue), test::EncodeInt(6U)}}); AddMockFile(file2); auto file3 = mock::MakeMockFile({{KeyStr("a", 1U, kTypeMerge), test::EncodeInt(3U)}}); AddMockFile(file3, 2); auto expected_results = mock::MakeMockFile({ {KeyStr("a", 5U, kTypeValue), test::EncodeInt(10U)}, {KeyStr("c", 2U, kTypeValue), test::EncodeInt(10U)}, {KeyStr("d", 1U, kTypeValue), test::EncodeInt(6U)} // b does not appear because the operands are filtered }); SetLastSequence(5U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } // Test where all operands/merge results are filtered out. TEST_F(CompactionJobTest, FilterAllMergeOperands) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); compaction_filter_.reset(new test::FilterNumber(10U)); NewDB(); auto file1 = mock::MakeMockFile({{KeyStr("a", 11U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("a", 10U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("a", 9U, kTypeMerge), test::EncodeInt(10U)}}); AddMockFile(file1); auto file2 = mock::MakeMockFile({{KeyStr("b", 8U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 7U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 6U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 5U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 4U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 3U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 2U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("c", 2U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("c", 1U, kTypeMerge), test::EncodeInt(10U)}}); AddMockFile(file2); auto file3 = mock::MakeMockFile({{KeyStr("a", 2U, kTypeMerge), test::EncodeInt(10U)}, {KeyStr("b", 1U, kTypeMerge), test::EncodeInt(10U)}}); AddMockFile(file3, 2); SetLastSequence(11U); auto files = cfd_->current()->storage_info()->LevelFiles(0); stl_wrappers::KVMap empty_map; RunCompaction({files}, empty_map); } TEST_F(CompactionJobTest, SimpleSingleDelete) { NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("a", 5U, kTypeDeletion), ""}, {KeyStr("b", 6U, kTypeSingleDeletion), ""}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile({{KeyStr("a", 3U, kTypeValue), "val"}, {KeyStr("b", 4U, kTypeValue), "val"}}); AddMockFile(file2); auto file3 = mock::MakeMockFile({ {KeyStr("a", 1U, kTypeValue), "val"}, }); AddMockFile(file3, 2); auto expected_results = mock::MakeMockFile({{KeyStr("a", 5U, kTypeDeletion), ""}}); SetLastSequence(6U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } TEST_F(CompactionJobTest, SingleDeleteSnapshots) { NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("A", 12U, kTypeSingleDeletion), ""}, {KeyStr("a", 12U, kTypeSingleDeletion), ""}, {KeyStr("b", 21U, kTypeSingleDeletion), ""}, {KeyStr("c", 22U, kTypeSingleDeletion), ""}, {KeyStr("d", 9U, kTypeSingleDeletion), ""}, {KeyStr("f", 21U, kTypeSingleDeletion), ""}, {KeyStr("j", 11U, kTypeSingleDeletion), ""}, {KeyStr("j", 9U, kTypeSingleDeletion), ""}, {KeyStr("k", 12U, kTypeSingleDeletion), ""}, {KeyStr("k", 11U, kTypeSingleDeletion), ""}, {KeyStr("l", 3U, kTypeSingleDeletion), ""}, {KeyStr("l", 2U, kTypeSingleDeletion), ""}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile({ {KeyStr("0", 2U, kTypeSingleDeletion), ""}, {KeyStr("a", 11U, kTypeValue), "val1"}, {KeyStr("b", 11U, kTypeValue), "val2"}, {KeyStr("c", 21U, kTypeValue), "val3"}, {KeyStr("d", 8U, kTypeValue), "val4"}, {KeyStr("e", 2U, kTypeSingleDeletion), ""}, {KeyStr("f", 1U, kTypeValue), "val1"}, {KeyStr("g", 11U, kTypeSingleDeletion), ""}, {KeyStr("h", 2U, kTypeSingleDeletion), ""}, {KeyStr("m", 12U, kTypeValue), "val1"}, {KeyStr("m", 11U, kTypeSingleDeletion), ""}, {KeyStr("m", 8U, kTypeValue), "val2"}, }); AddMockFile(file2); auto file3 = mock::MakeMockFile({ {KeyStr("A", 1U, kTypeValue), "val"}, {KeyStr("e", 1U, kTypeValue), "val"}, }); AddMockFile(file3, 2); auto expected_results = mock::MakeMockFile({ {KeyStr("A", 12U, kTypeSingleDeletion), ""}, {KeyStr("a", 12U, kTypeSingleDeletion), ""}, {KeyStr("a", 11U, kTypeValue), ""}, {KeyStr("b", 21U, kTypeSingleDeletion), ""}, {KeyStr("b", 11U, kTypeValue), "val2"}, {KeyStr("c", 22U, kTypeSingleDeletion), ""}, {KeyStr("c", 21U, kTypeValue), ""}, {KeyStr("e", 2U, kTypeSingleDeletion), ""}, {KeyStr("f", 21U, kTypeSingleDeletion), ""}, {KeyStr("f", 1U, kTypeValue), "val1"}, {KeyStr("g", 11U, kTypeSingleDeletion), ""}, {KeyStr("j", 11U, kTypeSingleDeletion), ""}, {KeyStr("k", 11U, kTypeSingleDeletion), ""}, {KeyStr("m", 12U, kTypeValue), "val1"}, {KeyStr("m", 11U, kTypeSingleDeletion), ""}, {KeyStr("m", 8U, kTypeValue), "val2"}, }); SetLastSequence(22U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results, {10U, 20U}, 10U); } TEST_F(CompactionJobTest, EarliestWriteConflictSnapshot) { NewDB(); // Test multiple snapshots where the earliest snapshot is not a // write-conflic-snapshot. auto file1 = mock::MakeMockFile({ {KeyStr("A", 24U, kTypeSingleDeletion), ""}, {KeyStr("A", 23U, kTypeValue), "val"}, {KeyStr("B", 24U, kTypeSingleDeletion), ""}, {KeyStr("B", 23U, kTypeValue), "val"}, {KeyStr("D", 24U, kTypeSingleDeletion), ""}, {KeyStr("G", 32U, kTypeSingleDeletion), ""}, {KeyStr("G", 31U, kTypeValue), "val"}, {KeyStr("G", 24U, kTypeSingleDeletion), ""}, {KeyStr("G", 23U, kTypeValue), "val2"}, {KeyStr("H", 31U, kTypeValue), "val"}, {KeyStr("H", 24U, kTypeSingleDeletion), ""}, {KeyStr("H", 23U, kTypeValue), "val"}, {KeyStr("I", 35U, kTypeSingleDeletion), ""}, {KeyStr("I", 34U, kTypeValue), "val2"}, {KeyStr("I", 33U, kTypeSingleDeletion), ""}, {KeyStr("I", 32U, kTypeValue), "val3"}, {KeyStr("I", 31U, kTypeSingleDeletion), ""}, {KeyStr("J", 34U, kTypeValue), "val"}, {KeyStr("J", 33U, kTypeSingleDeletion), ""}, {KeyStr("J", 25U, kTypeValue), "val2"}, {KeyStr("J", 24U, kTypeSingleDeletion), ""}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile({ {KeyStr("A", 14U, kTypeSingleDeletion), ""}, {KeyStr("A", 13U, kTypeValue), "val2"}, {KeyStr("C", 14U, kTypeSingleDeletion), ""}, {KeyStr("C", 13U, kTypeValue), "val"}, {KeyStr("E", 12U, kTypeSingleDeletion), ""}, {KeyStr("F", 4U, kTypeSingleDeletion), ""}, {KeyStr("F", 3U, kTypeValue), "val"}, {KeyStr("G", 14U, kTypeSingleDeletion), ""}, {KeyStr("G", 13U, kTypeValue), "val3"}, {KeyStr("H", 14U, kTypeSingleDeletion), ""}, {KeyStr("H", 13U, kTypeValue), "val2"}, {KeyStr("I", 13U, kTypeValue), "val4"}, {KeyStr("I", 12U, kTypeSingleDeletion), ""}, {KeyStr("I", 11U, kTypeValue), "val5"}, {KeyStr("J", 15U, kTypeValue), "val3"}, {KeyStr("J", 14U, kTypeSingleDeletion), ""}, }); AddMockFile(file2); auto expected_results = mock::MakeMockFile({ {KeyStr("A", 24U, kTypeSingleDeletion), ""}, {KeyStr("A", 23U, kTypeValue), ""}, {KeyStr("B", 24U, kTypeSingleDeletion), ""}, {KeyStr("B", 23U, kTypeValue), ""}, {KeyStr("D", 24U, kTypeSingleDeletion), ""}, {KeyStr("E", 12U, kTypeSingleDeletion), ""}, {KeyStr("G", 32U, kTypeSingleDeletion), ""}, {KeyStr("G", 31U, kTypeValue), ""}, {KeyStr("H", 31U, kTypeValue), "val"}, {KeyStr("I", 35U, kTypeSingleDeletion), ""}, {KeyStr("I", 34U, kTypeValue), ""}, {KeyStr("I", 31U, kTypeSingleDeletion), ""}, {KeyStr("I", 13U, kTypeValue), "val4"}, {KeyStr("J", 34U, kTypeValue), "val"}, {KeyStr("J", 33U, kTypeSingleDeletion), ""}, {KeyStr("J", 25U, kTypeValue), "val2"}, {KeyStr("J", 24U, kTypeSingleDeletion), ""}, {KeyStr("J", 15U, kTypeValue), "val3"}, {KeyStr("J", 14U, kTypeSingleDeletion), ""}, }); SetLastSequence(24U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results, {10U, 20U, 30U}, 20U); } TEST_F(CompactionJobTest, SingleDeleteZeroSeq) { NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("A", 10U, kTypeSingleDeletion), ""}, {KeyStr("dummy", 5U, kTypeValue), "val2"}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile({ {KeyStr("A", 0U, kTypeValue), "val"}, }); AddMockFile(file2); auto expected_results = mock::MakeMockFile({ {KeyStr("dummy", 0U, kTypeValue), "val2"}, }); SetLastSequence(22U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results, {}); } TEST_F(CompactionJobTest, MultiSingleDelete) { // Tests three scenarios involving multiple single delete/put pairs: // // A: Put Snapshot SDel Put SDel -> Put Snapshot SDel // B: Snapshot Put SDel Put SDel Snapshot -> Snapshot SDel Snapshot // C: SDel Put SDel Snapshot Put -> Snapshot Put // D: (Put) SDel Snapshot Put SDel -> (Put) SDel Snapshot SDel // E: Put SDel Snapshot Put SDel -> Snapshot SDel // F: Put SDel Put Sdel Snapshot -> removed // G: Snapshot SDel Put SDel Put -> Snapshot Put SDel // H: (Put) Put SDel Put Sdel Snapshot -> Removed // I: (Put) Snapshot Put SDel Put SDel -> SDel // J: Put Put SDel Put SDel SDel Snapshot Put Put SDel SDel Put // -> Snapshot Put // K: SDel SDel Put SDel Put Put Snapshot SDel Put SDel SDel Put SDel // -> Snapshot Put Snapshot SDel // L: SDel Put Del Put SDel Snapshot Del Put Del SDel Put SDel // -> Snapshot SDel // M: (Put) SDel Put Del Put SDel Snapshot Put Del SDel Put SDel Del // -> SDel Snapshot Del NewDB(); auto file1 = mock::MakeMockFile({ {KeyStr("A", 14U, kTypeSingleDeletion), ""}, {KeyStr("A", 13U, kTypeValue), "val5"}, {KeyStr("A", 12U, kTypeSingleDeletion), ""}, {KeyStr("B", 14U, kTypeSingleDeletion), ""}, {KeyStr("B", 13U, kTypeValue), "val2"}, {KeyStr("C", 14U, kTypeValue), "val3"}, {KeyStr("D", 12U, kTypeSingleDeletion), ""}, {KeyStr("D", 11U, kTypeValue), "val4"}, {KeyStr("G", 15U, kTypeValue), "val"}, {KeyStr("G", 14U, kTypeSingleDeletion), ""}, {KeyStr("G", 13U, kTypeValue), "val"}, {KeyStr("I", 14U, kTypeSingleDeletion), ""}, {KeyStr("I", 13U, kTypeValue), "val"}, {KeyStr("J", 15U, kTypeValue), "val"}, {KeyStr("J", 14U, kTypeSingleDeletion), ""}, {KeyStr("J", 13U, kTypeSingleDeletion), ""}, {KeyStr("J", 12U, kTypeValue), "val"}, {KeyStr("J", 11U, kTypeValue), "val"}, {KeyStr("K", 16U, kTypeSingleDeletion), ""}, {KeyStr("K", 15U, kTypeValue), "val1"}, {KeyStr("K", 14U, kTypeSingleDeletion), ""}, {KeyStr("K", 13U, kTypeSingleDeletion), ""}, {KeyStr("K", 12U, kTypeValue), "val2"}, {KeyStr("K", 11U, kTypeSingleDeletion), ""}, {KeyStr("L", 16U, kTypeSingleDeletion), ""}, {KeyStr("L", 15U, kTypeValue), "val"}, {KeyStr("L", 14U, kTypeSingleDeletion), ""}, {KeyStr("L", 13U, kTypeDeletion), ""}, {KeyStr("L", 12U, kTypeValue), "val"}, {KeyStr("L", 11U, kTypeDeletion), ""}, {KeyStr("M", 16U, kTypeDeletion), ""}, {KeyStr("M", 15U, kTypeSingleDeletion), ""}, {KeyStr("M", 14U, kTypeValue), "val"}, {KeyStr("M", 13U, kTypeSingleDeletion), ""}, {KeyStr("M", 12U, kTypeDeletion), ""}, {KeyStr("M", 11U, kTypeValue), "val"}, }); AddMockFile(file1); auto file2 = mock::MakeMockFile({ {KeyStr("A", 10U, kTypeValue), "val"}, {KeyStr("B", 12U, kTypeSingleDeletion), ""}, {KeyStr("B", 11U, kTypeValue), "val2"}, {KeyStr("C", 10U, kTypeSingleDeletion), ""}, {KeyStr("C", 9U, kTypeValue), "val6"}, {KeyStr("C", 8U, kTypeSingleDeletion), ""}, {KeyStr("D", 10U, kTypeSingleDeletion), ""}, {KeyStr("E", 12U, kTypeSingleDeletion), ""}, {KeyStr("E", 11U, kTypeValue), "val"}, {KeyStr("E", 5U, kTypeSingleDeletion), ""}, {KeyStr("E", 4U, kTypeValue), "val"}, {KeyStr("F", 6U, kTypeSingleDeletion), ""}, {KeyStr("F", 5U, kTypeValue), "val"}, {KeyStr("F", 4U, kTypeSingleDeletion), ""}, {KeyStr("F", 3U, kTypeValue), "val"}, {KeyStr("G", 12U, kTypeSingleDeletion), ""}, {KeyStr("H", 6U, kTypeSingleDeletion), ""}, {KeyStr("H", 5U, kTypeValue), "val"}, {KeyStr("H", 4U, kTypeSingleDeletion), ""}, {KeyStr("H", 3U, kTypeValue), "val"}, {KeyStr("I", 12U, kTypeSingleDeletion), ""}, {KeyStr("I", 11U, kTypeValue), "val"}, {KeyStr("J", 6U, kTypeSingleDeletion), ""}, {KeyStr("J", 5U, kTypeSingleDeletion), ""}, {KeyStr("J", 4U, kTypeValue), "val"}, {KeyStr("J", 3U, kTypeSingleDeletion), ""}, {KeyStr("J", 2U, kTypeValue), "val"}, {KeyStr("K", 8U, kTypeValue), "val3"}, {KeyStr("K", 7U, kTypeValue), "val4"}, {KeyStr("K", 6U, kTypeSingleDeletion), ""}, {KeyStr("K", 5U, kTypeValue), "val5"}, {KeyStr("K", 2U, kTypeSingleDeletion), ""}, {KeyStr("K", 1U, kTypeSingleDeletion), ""}, {KeyStr("L", 5U, kTypeSingleDeletion), ""}, {KeyStr("L", 4U, kTypeValue), "val"}, {KeyStr("L", 3U, kTypeDeletion), ""}, {KeyStr("L", 2U, kTypeValue), "val"}, {KeyStr("L", 1U, kTypeSingleDeletion), ""}, {KeyStr("M", 10U, kTypeSingleDeletion), ""}, {KeyStr("M", 7U, kTypeValue), "val"}, {KeyStr("M", 5U, kTypeDeletion), ""}, {KeyStr("M", 4U, kTypeValue), "val"}, {KeyStr("M", 3U, kTypeSingleDeletion), ""}, }); AddMockFile(file2); auto file3 = mock::MakeMockFile({ {KeyStr("D", 1U, kTypeValue), "val"}, {KeyStr("H", 1U, kTypeValue), "val"}, {KeyStr("I", 2U, kTypeValue), "val"}, }); AddMockFile(file3, 2); auto file4 = mock::MakeMockFile({ {KeyStr("M", 1U, kTypeValue), "val"}, }); AddMockFile(file4, 2); auto expected_results = mock::MakeMockFile({{KeyStr("A", 14U, kTypeSingleDeletion), ""}, {KeyStr("A", 13U, kTypeValue), ""}, {KeyStr("A", 12U, kTypeSingleDeletion), ""}, {KeyStr("A", 10U, kTypeValue), "val"}, {KeyStr("B", 14U, kTypeSingleDeletion), ""}, {KeyStr("B", 13U, kTypeValue), ""}, {KeyStr("C", 14U, kTypeValue), "val3"}, {KeyStr("D", 12U, kTypeSingleDeletion), ""}, {KeyStr("D", 11U, kTypeValue), ""}, {KeyStr("D", 10U, kTypeSingleDeletion), ""}, {KeyStr("E", 12U, kTypeSingleDeletion), ""}, {KeyStr("E", 11U, kTypeValue), ""}, {KeyStr("G", 15U, kTypeValue), "val"}, {KeyStr("G", 12U, kTypeSingleDeletion), ""}, {KeyStr("I", 14U, kTypeSingleDeletion), ""}, {KeyStr("I", 13U, kTypeValue), ""}, {KeyStr("J", 15U, kTypeValue), "val"}, {KeyStr("K", 16U, kTypeSingleDeletion), ""}, {KeyStr("K", 15U, kTypeValue), ""}, {KeyStr("K", 11U, kTypeSingleDeletion), ""}, {KeyStr("K", 8U, kTypeValue), "val3"}, {KeyStr("L", 16U, kTypeSingleDeletion), ""}, {KeyStr("L", 15U, kTypeValue), ""}, {KeyStr("M", 16U, kTypeDeletion), ""}, {KeyStr("M", 3U, kTypeSingleDeletion), ""}}); SetLastSequence(22U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results, {10U}, 10U); } // This test documents the behavior where a corrupt key follows a deletion or a // single deletion and the (single) deletion gets removed while the corrupt key // gets written out. TODO(noetzli): We probably want a better way to treat // corrupt keys. TEST_F(CompactionJobTest, DISABLED_CorruptionAfterDeletion) { NewDB(); auto file1 = mock::MakeMockFile({{test::KeyStr("A", 6U, kTypeValue), "val3"}, {test::KeyStr("a", 5U, kTypeDeletion), ""}, {test::KeyStr("a", 4U, kTypeValue, true), "val"}}); AddMockFile(file1); auto file2 = mock::MakeMockFile({{test::KeyStr("b", 3U, kTypeSingleDeletion), ""}, {test::KeyStr("b", 2U, kTypeValue, true), "val"}, {test::KeyStr("c", 1U, kTypeValue), "val2"}}); AddMockFile(file2); auto expected_results = mock::MakeMockFile({{test::KeyStr("A", 0U, kTypeValue), "val3"}, {test::KeyStr("a", 0U, kTypeValue, true), "val"}, {test::KeyStr("b", 0U, kTypeValue, true), "val"}, {test::KeyStr("c", 0U, kTypeValue), "val2"}}); SetLastSequence(6U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results); } TEST_F(CompactionJobTest, OldestBlobFileNumber) { NewDB(); // Note: blob1 is inlined TTL, so it will not be considered for the purposes // of identifying the oldest referenced blob file. Similarly, blob6 will be // ignored because it has TTL and hence refers to a TTL blob file. const stl_wrappers::KVMap::value_type blob1( KeyStr("a", 1U, kTypeBlobIndex), BlobStrInlinedTTL("foo", 1234567890ULL)); const stl_wrappers::KVMap::value_type blob2(KeyStr("b", 2U, kTypeBlobIndex), BlobStr(59, 123456, 999)); const stl_wrappers::KVMap::value_type blob3(KeyStr("c", 3U, kTypeBlobIndex), BlobStr(138, 1000, 1 << 8)); auto file1 = mock::MakeMockFile({blob1, blob2, blob3}); AddMockFile(file1); const stl_wrappers::KVMap::value_type blob4(KeyStr("d", 4U, kTypeBlobIndex), BlobStr(199, 3 << 10, 1 << 20)); const stl_wrappers::KVMap::value_type blob5(KeyStr("e", 5U, kTypeBlobIndex), BlobStr(19, 6789, 333)); const stl_wrappers::KVMap::value_type blob6( KeyStr("f", 6U, kTypeBlobIndex), BlobStrTTL(5, 2048, 1 << 7, 1234567890ULL)); auto file2 = mock::MakeMockFile({blob4, blob5, blob6}); AddMockFile(file2); const stl_wrappers::KVMap::value_type expected_blob1( KeyStr("a", 0U, kTypeBlobIndex), blob1.second); const stl_wrappers::KVMap::value_type expected_blob2( KeyStr("b", 0U, kTypeBlobIndex), blob2.second); const stl_wrappers::KVMap::value_type expected_blob3( KeyStr("c", 0U, kTypeBlobIndex), blob3.second); const stl_wrappers::KVMap::value_type expected_blob4( KeyStr("d", 0U, kTypeBlobIndex), blob4.second); const stl_wrappers::KVMap::value_type expected_blob5( KeyStr("e", 0U, kTypeBlobIndex), blob5.second); const stl_wrappers::KVMap::value_type expected_blob6( KeyStr("f", 0U, kTypeBlobIndex), blob6.second); auto expected_results = mock::MakeMockFile({expected_blob1, expected_blob2, expected_blob3, expected_blob4, expected_blob5, expected_blob6}); SetLastSequence(6U); auto files = cfd_->current()->storage_info()->LevelFiles(0); RunCompaction({files}, expected_results, std::vector(), kMaxSequenceNumber, /* output_level */ 1, /* verify */ true, /* expected_oldest_blob_file_number */ 19); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as CompactionJobStats is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/db/compaction/compaction_picker.cc000066400000000000000000001250101370372246700217650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/compaction/compaction_picker.h" #include #include #include #include #include #include #include "db/column_family.h" #include "file/filename.h" #include "logging/log_buffer.h" #include "monitoring/statistics.h" #include "test_util/sync_point.h" #include "util/random.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { uint64_t TotalCompensatedFileSize(const std::vector& files) { uint64_t sum = 0; for (size_t i = 0; i < files.size() && files[i]; i++) { sum += files[i]->compensated_file_size; } return sum; } } // anonymous namespace bool FindIntraL0Compaction(const std::vector& level_files, size_t min_files_to_compact, uint64_t max_compact_bytes_per_del_file, uint64_t max_compaction_bytes, CompactionInputFiles* comp_inputs, SequenceNumber earliest_mem_seqno) { // Do not pick ingested file when there is at least one memtable not flushed // which of seqno is overlap with the sst. TEST_SYNC_POINT("FindIntraL0Compaction"); size_t start = 0; for (; start < level_files.size(); start++) { if (level_files[start]->being_compacted) { return false; } // If there is no data in memtable, the earliest sequence number would the // largest sequence number in last memtable. // Because all files are sorted in descending order by largest_seqno, so we // only need to check the first one. if (level_files[start]->fd.largest_seqno <= earliest_mem_seqno) { break; } } if (start >= level_files.size()) { return false; } size_t compact_bytes = static_cast(level_files[start]->fd.file_size); uint64_t compensated_compact_bytes = level_files[start]->compensated_file_size; size_t compact_bytes_per_del_file = port::kMaxSizet; // Compaction range will be [start, limit). size_t limit; // Pull in files until the amount of compaction work per deleted file begins // increasing or maximum total compaction size is reached. size_t new_compact_bytes_per_del_file = 0; for (limit = start + 1; limit < level_files.size(); ++limit) { compact_bytes += static_cast(level_files[limit]->fd.file_size); compensated_compact_bytes += level_files[limit]->compensated_file_size; new_compact_bytes_per_del_file = compact_bytes / (limit - start); if (level_files[limit]->being_compacted || new_compact_bytes_per_del_file > compact_bytes_per_del_file || compensated_compact_bytes > max_compaction_bytes) { break; } compact_bytes_per_del_file = new_compact_bytes_per_del_file; } if ((limit - start) >= min_files_to_compact && compact_bytes_per_del_file < max_compact_bytes_per_del_file) { assert(comp_inputs != nullptr); comp_inputs->level = 0; for (size_t i = start; i < limit; ++i) { comp_inputs->files.push_back(level_files[i]); } return true; } return false; } // Determine compression type, based on user options, level of the output // file and whether compression is disabled. // If enable_compression is false, then compression is always disabled no // matter what the values of the other two parameters are. // Otherwise, the compression type is determined based on options and level. CompressionType GetCompressionType(const ImmutableCFOptions& ioptions, const VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options, int level, int base_level, const bool enable_compression) { if (!enable_compression) { // disable compression return kNoCompression; } // If bottommost_compression is set and we are compacting to the // bottommost level then we should use it. if (mutable_cf_options.bottommost_compression != kDisableCompressionOption && level >= (vstorage->num_non_empty_levels() - 1)) { return mutable_cf_options.bottommost_compression; } // If the user has specified a different compression level for each level, // then pick the compression for that level. if (!ioptions.compression_per_level.empty()) { assert(level == 0 || level >= base_level); int idx = (level == 0) ? 0 : level - base_level + 1; const int n = static_cast(ioptions.compression_per_level.size()) - 1; // It is possible for level_ to be -1; in that case, we use level // 0's compression. This occurs mostly in backwards compatibility // situations when the builder doesn't know what level the file // belongs to. Likewise, if level is beyond the end of the // specified compression levels, use the last value. return ioptions.compression_per_level[std::max(0, std::min(idx, n))]; } else { return mutable_cf_options.compression; } } CompressionOptions GetCompressionOptions(const MutableCFOptions& cf_options, const VersionStorageInfo* vstorage, int level, const bool enable_compression) { if (!enable_compression) { return cf_options.compression_opts; } // If bottommost_compression is set and we are compacting to the // bottommost level then we should use the specified compression options // for the bottmomost_compression. if (cf_options.bottommost_compression != kDisableCompressionOption && level >= (vstorage->num_non_empty_levels() - 1) && cf_options.bottommost_compression_opts.enabled) { return cf_options.bottommost_compression_opts; } return cf_options.compression_opts; } CompactionPicker::CompactionPicker(const ImmutableCFOptions& ioptions, const InternalKeyComparator* icmp) : ioptions_(ioptions), icmp_(icmp) {} CompactionPicker::~CompactionPicker() {} // Delete this compaction from the list of running compactions. void CompactionPicker::ReleaseCompactionFiles(Compaction* c, Status status) { UnregisterCompaction(c); if (!status.ok()) { c->ResetNextCompactionIndex(); } } void CompactionPicker::GetRange(const CompactionInputFiles& inputs, InternalKey* smallest, InternalKey* largest) const { const int level = inputs.level; assert(!inputs.empty()); smallest->Clear(); largest->Clear(); if (level == 0) { for (size_t i = 0; i < inputs.size(); i++) { FileMetaData* f = inputs[i]; if (i == 0) { *smallest = f->smallest; *largest = f->largest; } else { if (icmp_->Compare(f->smallest, *smallest) < 0) { *smallest = f->smallest; } if (icmp_->Compare(f->largest, *largest) > 0) { *largest = f->largest; } } } } else { *smallest = inputs[0]->smallest; *largest = inputs[inputs.size() - 1]->largest; } } void CompactionPicker::GetRange(const CompactionInputFiles& inputs1, const CompactionInputFiles& inputs2, InternalKey* smallest, InternalKey* largest) const { assert(!inputs1.empty() || !inputs2.empty()); if (inputs1.empty()) { GetRange(inputs2, smallest, largest); } else if (inputs2.empty()) { GetRange(inputs1, smallest, largest); } else { InternalKey smallest1, smallest2, largest1, largest2; GetRange(inputs1, &smallest1, &largest1); GetRange(inputs2, &smallest2, &largest2); *smallest = icmp_->Compare(smallest1, smallest2) < 0 ? smallest1 : smallest2; *largest = icmp_->Compare(largest1, largest2) < 0 ? largest2 : largest1; } } void CompactionPicker::GetRange(const std::vector& inputs, InternalKey* smallest, InternalKey* largest) const { InternalKey current_smallest; InternalKey current_largest; bool initialized = false; for (const auto& in : inputs) { if (in.empty()) { continue; } GetRange(in, ¤t_smallest, ¤t_largest); if (!initialized) { *smallest = current_smallest; *largest = current_largest; initialized = true; } else { if (icmp_->Compare(current_smallest, *smallest) < 0) { *smallest = current_smallest; } if (icmp_->Compare(current_largest, *largest) > 0) { *largest = current_largest; } } } assert(initialized); } bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/, VersionStorageInfo* vstorage, CompactionInputFiles* inputs, InternalKey** next_smallest) { // This isn't good compaction assert(!inputs->empty()); const int level = inputs->level; // GetOverlappingInputs will always do the right thing for level-0. // So we don't need to do any expansion if level == 0. if (level == 0) { return true; } InternalKey smallest, largest; // Keep expanding inputs until we are sure that there is a "clean cut" // boundary between the files in input and the surrounding files. // This will ensure that no parts of a key are lost during compaction. int hint_index = -1; size_t old_size; do { old_size = inputs->size(); GetRange(*inputs, &smallest, &largest); inputs->clear(); vstorage->GetOverlappingInputs(level, &smallest, &largest, &inputs->files, hint_index, &hint_index, true, next_smallest); } while (inputs->size() > old_size); // we started off with inputs non-empty and the previous loop only grew // inputs. thus, inputs should be non-empty here assert(!inputs->empty()); // If, after the expansion, there are files that are already under // compaction, then we must drop/cancel this compaction. if (AreFilesInCompaction(inputs->files)) { return false; } return true; } bool CompactionPicker::RangeOverlapWithCompaction( const Slice& smallest_user_key, const Slice& largest_user_key, int level) const { const Comparator* ucmp = icmp_->user_comparator(); for (Compaction* c : compactions_in_progress_) { if (c->output_level() == level && ucmp->Compare(smallest_user_key, c->GetLargestUserKey()) <= 0 && ucmp->Compare(largest_user_key, c->GetSmallestUserKey()) >= 0) { // Overlap return true; } } // Did not overlap with any running compaction in level `level` return false; } bool CompactionPicker::FilesRangeOverlapWithCompaction( const std::vector& inputs, int level) const { bool is_empty = true; for (auto& in : inputs) { if (!in.empty()) { is_empty = false; break; } } if (is_empty) { // No files in inputs return false; } InternalKey smallest, largest; GetRange(inputs, &smallest, &largest); return RangeOverlapWithCompaction(smallest.user_key(), largest.user_key(), level); } // Returns true if any one of specified files are being compacted bool CompactionPicker::AreFilesInCompaction( const std::vector& files) { for (size_t i = 0; i < files.size(); i++) { if (files[i]->being_compacted) { return true; } } return false; } Compaction* CompactionPicker::CompactFiles( const CompactionOptions& compact_options, const std::vector& input_files, int output_level, VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options, uint32_t output_path_id) { assert(input_files.size()); // This compaction output should not overlap with a running compaction as // `SanitizeCompactionInputFiles` should've checked earlier and db mutex // shouldn't have been released since. assert(!FilesRangeOverlapWithCompaction(input_files, output_level)); CompressionType compression_type; if (compact_options.compression == kDisableCompressionOption) { int base_level; if (ioptions_.compaction_style == kCompactionStyleLevel) { base_level = vstorage->base_level(); } else { base_level = 1; } compression_type = GetCompressionType(ioptions_, vstorage, mutable_cf_options, output_level, base_level); } else { // TODO(ajkr): `CompactionOptions` offers configurable `CompressionType` // without configurable `CompressionOptions`, which is inconsistent. compression_type = compact_options.compression; } auto c = new Compaction( vstorage, ioptions_, mutable_cf_options, input_files, output_level, compact_options.output_file_size_limit, mutable_cf_options.max_compaction_bytes, output_path_id, compression_type, GetCompressionOptions(mutable_cf_options, vstorage, output_level), compact_options.max_subcompactions, /* grandparents */ {}, true); RegisterCompaction(c); return c; } Status CompactionPicker::GetCompactionInputsFromFileNumbers( std::vector* input_files, std::unordered_set* input_set, const VersionStorageInfo* vstorage, const CompactionOptions& /*compact_options*/) const { if (input_set->size() == 0U) { return Status::InvalidArgument( "Compaction must include at least one file."); } assert(input_files); std::vector matched_input_files; matched_input_files.resize(vstorage->num_levels()); int first_non_empty_level = -1; int last_non_empty_level = -1; // TODO(yhchiang): use a lazy-initialized mapping from // file_number to FileMetaData in Version. for (int level = 0; level < vstorage->num_levels(); ++level) { for (auto file : vstorage->LevelFiles(level)) { auto iter = input_set->find(file->fd.GetNumber()); if (iter != input_set->end()) { matched_input_files[level].files.push_back(file); input_set->erase(iter); last_non_empty_level = level; if (first_non_empty_level == -1) { first_non_empty_level = level; } } } } if (!input_set->empty()) { std::string message( "Cannot find matched SST files for the following file numbers:"); for (auto fn : *input_set) { message += " "; message += ToString(fn); } return Status::InvalidArgument(message); } for (int level = first_non_empty_level; level <= last_non_empty_level; ++level) { matched_input_files[level].level = level; input_files->emplace_back(std::move(matched_input_files[level])); } return Status::OK(); } // Returns true if any one of the parent files are being compacted bool CompactionPicker::IsRangeInCompaction(VersionStorageInfo* vstorage, const InternalKey* smallest, const InternalKey* largest, int level, int* level_index) { std::vector inputs; assert(level < NumberLevels()); vstorage->GetOverlappingInputs(level, smallest, largest, &inputs, level_index ? *level_index : 0, level_index); return AreFilesInCompaction(inputs); } // Populates the set of inputs of all other levels that overlap with the // start level. // Now we assume all levels except start level and output level are empty. // Will also attempt to expand "start level" if that doesn't expand // "output level" or cause "level" to include a file for compaction that has an // overlapping user-key with another file. // REQUIRES: input_level and output_level are different // REQUIRES: inputs->empty() == false // Returns false if files on parent level are currently in compaction, which // means that we can't compact them bool CompactionPicker::SetupOtherInputs( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, CompactionInputFiles* inputs, CompactionInputFiles* output_level_inputs, int* parent_index, int base_index) { assert(!inputs->empty()); assert(output_level_inputs->empty()); const int input_level = inputs->level; const int output_level = output_level_inputs->level; if (input_level == output_level) { // no possibility of conflict return true; } // For now, we only support merging two levels, start level and output level. // We need to assert other levels are empty. for (int l = input_level + 1; l < output_level; l++) { assert(vstorage->NumLevelFiles(l) == 0); } InternalKey smallest, largest; // Get the range one last time. GetRange(*inputs, &smallest, &largest); // Populate the set of next-level files (inputs_GetOutputLevelInputs()) to // include in compaction vstorage->GetOverlappingInputs(output_level, &smallest, &largest, &output_level_inputs->files, *parent_index, parent_index); if (AreFilesInCompaction(output_level_inputs->files)) { return false; } if (!output_level_inputs->empty()) { if (!ExpandInputsToCleanCut(cf_name, vstorage, output_level_inputs)) { return false; } } // See if we can further grow the number of inputs in "level" without // changing the number of "level+1" files we pick up. We also choose NOT // to expand if this would cause "level" to include some entries for some // user key, while excluding other entries for the same user key. This // can happen when one user key spans multiple files. if (!output_level_inputs->empty()) { const uint64_t limit = mutable_cf_options.max_compaction_bytes; const uint64_t output_level_inputs_size = TotalCompensatedFileSize(output_level_inputs->files); const uint64_t inputs_size = TotalCompensatedFileSize(inputs->files); bool expand_inputs = false; CompactionInputFiles expanded_inputs; expanded_inputs.level = input_level; // Get closed interval of output level InternalKey all_start, all_limit; GetRange(*inputs, *output_level_inputs, &all_start, &all_limit); bool try_overlapping_inputs = true; vstorage->GetOverlappingInputs(input_level, &all_start, &all_limit, &expanded_inputs.files, base_index, nullptr); uint64_t expanded_inputs_size = TotalCompensatedFileSize(expanded_inputs.files); if (!ExpandInputsToCleanCut(cf_name, vstorage, &expanded_inputs)) { try_overlapping_inputs = false; } if (try_overlapping_inputs && expanded_inputs.size() > inputs->size() && output_level_inputs_size + expanded_inputs_size < limit && !AreFilesInCompaction(expanded_inputs.files)) { InternalKey new_start, new_limit; GetRange(expanded_inputs, &new_start, &new_limit); CompactionInputFiles expanded_output_level_inputs; expanded_output_level_inputs.level = output_level; vstorage->GetOverlappingInputs(output_level, &new_start, &new_limit, &expanded_output_level_inputs.files, *parent_index, parent_index); assert(!expanded_output_level_inputs.empty()); if (!AreFilesInCompaction(expanded_output_level_inputs.files) && ExpandInputsToCleanCut(cf_name, vstorage, &expanded_output_level_inputs) && expanded_output_level_inputs.size() == output_level_inputs->size()) { expand_inputs = true; } } if (!expand_inputs) { vstorage->GetCleanInputsWithinInterval(input_level, &all_start, &all_limit, &expanded_inputs.files, base_index, nullptr); expanded_inputs_size = TotalCompensatedFileSize(expanded_inputs.files); if (expanded_inputs.size() > inputs->size() && output_level_inputs_size + expanded_inputs_size < limit && !AreFilesInCompaction(expanded_inputs.files)) { expand_inputs = true; } } if (expand_inputs) { ROCKS_LOG_INFO(ioptions_.info_log, "[%s] Expanding@%d %" ROCKSDB_PRIszt "+%" ROCKSDB_PRIszt "(%" PRIu64 "+%" PRIu64 " bytes) to %" ROCKSDB_PRIszt "+%" ROCKSDB_PRIszt " (%" PRIu64 "+%" PRIu64 " bytes)\n", cf_name.c_str(), input_level, inputs->size(), output_level_inputs->size(), inputs_size, output_level_inputs_size, expanded_inputs.size(), output_level_inputs->size(), expanded_inputs_size, output_level_inputs_size); inputs->files = expanded_inputs.files; } } return true; } void CompactionPicker::GetGrandparents( VersionStorageInfo* vstorage, const CompactionInputFiles& inputs, const CompactionInputFiles& output_level_inputs, std::vector* grandparents) { InternalKey start, limit; GetRange(inputs, output_level_inputs, &start, &limit); // Compute the set of grandparent files that overlap this compaction // (parent == level+1; grandparent == level+2) if (output_level_inputs.level + 1 < NumberLevels()) { vstorage->GetOverlappingInputs(output_level_inputs.level + 1, &start, &limit, grandparents); } } Compaction* CompactionPicker::CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict, uint64_t max_file_num_to_ignore) { // CompactionPickerFIFO has its own implementation of compact range assert(ioptions_.compaction_style != kCompactionStyleFIFO); if (input_level == ColumnFamilyData::kCompactAllLevels) { assert(ioptions_.compaction_style == kCompactionStyleUniversal); // Universal compaction with more than one level always compacts all the // files together to the last level. assert(vstorage->num_levels() > 1); // DBImpl::CompactRange() set output level to be the last level if (ioptions_.allow_ingest_behind) { assert(output_level == vstorage->num_levels() - 2); } else { assert(output_level == vstorage->num_levels() - 1); } // DBImpl::RunManualCompaction will make full range for universal compaction assert(begin == nullptr); assert(end == nullptr); *compaction_end = nullptr; int start_level = 0; for (; start_level < vstorage->num_levels() && vstorage->NumLevelFiles(start_level) == 0; start_level++) { } if (start_level == vstorage->num_levels()) { return nullptr; } if ((start_level == 0) && (!level0_compactions_in_progress_.empty())) { *manual_conflict = true; // Only one level 0 compaction allowed return nullptr; } std::vector inputs(vstorage->num_levels() - start_level); for (int level = start_level; level < vstorage->num_levels(); level++) { inputs[level - start_level].level = level; auto& files = inputs[level - start_level].files; for (FileMetaData* f : vstorage->LevelFiles(level)) { files.push_back(f); } if (AreFilesInCompaction(files)) { *manual_conflict = true; return nullptr; } } // 2 non-exclusive manual compactions could run at the same time producing // overlaping outputs in the same level. if (FilesRangeOverlapWithCompaction(inputs, output_level)) { // This compaction output could potentially conflict with the output // of a currently running compaction, we cannot run it. *manual_conflict = true; return nullptr; } Compaction* c = new Compaction( vstorage, ioptions_, mutable_cf_options, std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options, output_level, ioptions_.compaction_style), /* max_compaction_bytes */ LLONG_MAX, compact_range_options.target_path_id, GetCompressionType(ioptions_, vstorage, mutable_cf_options, output_level, 1), GetCompressionOptions(mutable_cf_options, vstorage, output_level), compact_range_options.max_subcompactions, /* grandparents */ {}, /* is manual */ true); RegisterCompaction(c); return c; } CompactionInputFiles inputs; inputs.level = input_level; bool covering_the_whole_range = true; // All files are 'overlapping' in universal style compaction. // We have to compact the entire range in one shot. if (ioptions_.compaction_style == kCompactionStyleUniversal) { begin = nullptr; end = nullptr; } vstorage->GetOverlappingInputs(input_level, begin, end, &inputs.files); if (inputs.empty()) { return nullptr; } if ((input_level == 0) && (!level0_compactions_in_progress_.empty())) { // Only one level 0 compaction allowed TEST_SYNC_POINT("CompactionPicker::CompactRange:Conflict"); *manual_conflict = true; return nullptr; } // Avoid compacting too much in one shot in case the range is large. // But we cannot do this for level-0 since level-0 files can overlap // and we must not pick one file and drop another older file if the // two files overlap. if (input_level > 0) { const uint64_t limit = mutable_cf_options.max_compaction_bytes; uint64_t total = 0; for (size_t i = 0; i + 1 < inputs.size(); ++i) { uint64_t s = inputs[i]->compensated_file_size; total += s; if (total >= limit) { covering_the_whole_range = false; inputs.files.resize(i + 1); break; } } } assert(compact_range_options.target_path_id < static_cast(ioptions_.cf_paths.size())); // for BOTTOM LEVEL compaction only, use max_file_num_to_ignore to filter out // files that are created during the current compaction. if (compact_range_options.bottommost_level_compaction == BottommostLevelCompaction::kForceOptimized && max_file_num_to_ignore != port::kMaxUint64) { assert(input_level == output_level); // inputs_shrunk holds a continuous subset of input files which were all // created before the current manual compaction std::vector inputs_shrunk; size_t skip_input_index = inputs.size(); for (size_t i = 0; i < inputs.size(); ++i) { if (inputs[i]->fd.GetNumber() < max_file_num_to_ignore) { inputs_shrunk.push_back(inputs[i]); } else if (!inputs_shrunk.empty()) { // inputs[i] was created during the current manual compaction and // need to be skipped skip_input_index = i; break; } } if (inputs_shrunk.empty()) { return nullptr; } if (inputs.size() != inputs_shrunk.size()) { inputs.files.swap(inputs_shrunk); } // set covering_the_whole_range to false if there is any file that need to // be compacted in the range of inputs[skip_input_index+1, inputs.size()) for (size_t i = skip_input_index + 1; i < inputs.size(); ++i) { if (inputs[i]->fd.GetNumber() < max_file_num_to_ignore) { covering_the_whole_range = false; } } } InternalKey key_storage; InternalKey* next_smallest = &key_storage; if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs, &next_smallest) == false) { // manual compaction is now multi-threaded, so it can // happen that ExpandWhileOverlapping fails // we handle it higher in RunManualCompaction *manual_conflict = true; return nullptr; } if (covering_the_whole_range || !next_smallest) { *compaction_end = nullptr; } else { **compaction_end = *next_smallest; } CompactionInputFiles output_level_inputs; if (output_level == ColumnFamilyData::kCompactToBaseLevel) { assert(input_level == 0); output_level = vstorage->base_level(); assert(output_level > 0); } output_level_inputs.level = output_level; if (input_level != output_level) { int parent_index = -1; if (!SetupOtherInputs(cf_name, mutable_cf_options, vstorage, &inputs, &output_level_inputs, &parent_index, -1)) { // manual compaction is now multi-threaded, so it can // happen that SetupOtherInputs fails // we handle it higher in RunManualCompaction *manual_conflict = true; return nullptr; } } std::vector compaction_inputs({inputs}); if (!output_level_inputs.empty()) { compaction_inputs.push_back(output_level_inputs); } for (size_t i = 0; i < compaction_inputs.size(); i++) { if (AreFilesInCompaction(compaction_inputs[i].files)) { *manual_conflict = true; return nullptr; } } // 2 non-exclusive manual compactions could run at the same time producing // overlaping outputs in the same level. if (FilesRangeOverlapWithCompaction(compaction_inputs, output_level)) { // This compaction output could potentially conflict with the output // of a currently running compaction, we cannot run it. *manual_conflict = true; return nullptr; } std::vector grandparents; GetGrandparents(vstorage, inputs, output_level_inputs, &grandparents); Compaction* compaction = new Compaction( vstorage, ioptions_, mutable_cf_options, std::move(compaction_inputs), output_level, MaxFileSizeForLevel(mutable_cf_options, output_level, ioptions_.compaction_style, vstorage->base_level(), ioptions_.level_compaction_dynamic_level_bytes), mutable_cf_options.max_compaction_bytes, compact_range_options.target_path_id, GetCompressionType(ioptions_, vstorage, mutable_cf_options, output_level, vstorage->base_level()), GetCompressionOptions(mutable_cf_options, vstorage, output_level), compact_range_options.max_subcompactions, std::move(grandparents), /* is manual compaction */ true); TEST_SYNC_POINT_CALLBACK("CompactionPicker::CompactRange:Return", compaction); RegisterCompaction(compaction); // Creating a compaction influences the compaction score because the score // takes running compactions into account (by skipping files that are already // being compacted). Since we just changed compaction score, we recalculate it // here vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options); return compaction; } #ifndef ROCKSDB_LITE namespace { // Test whether two files have overlapping key-ranges. bool HaveOverlappingKeyRanges(const Comparator* c, const SstFileMetaData& a, const SstFileMetaData& b) { if (c->Compare(a.smallestkey, b.smallestkey) >= 0) { if (c->Compare(a.smallestkey, b.largestkey) <= 0) { // b.smallestkey <= a.smallestkey <= b.largestkey return true; } } else if (c->Compare(a.largestkey, b.smallestkey) >= 0) { // a.smallestkey < b.smallestkey <= a.largestkey return true; } if (c->Compare(a.largestkey, b.largestkey) <= 0) { if (c->Compare(a.largestkey, b.smallestkey) >= 0) { // b.smallestkey <= a.largestkey <= b.largestkey return true; } } else if (c->Compare(a.smallestkey, b.largestkey) <= 0) { // a.smallestkey <= b.largestkey < a.largestkey return true; } return false; } } // namespace Status CompactionPicker::SanitizeCompactionInputFilesForAllLevels( std::unordered_set* input_files, const ColumnFamilyMetaData& cf_meta, const int output_level) const { auto& levels = cf_meta.levels; auto comparator = icmp_->user_comparator(); // TODO(yhchiang): add is_adjustable to CompactionOptions // the smallest and largest key of the current compaction input std::string smallestkey; std::string largestkey; // a flag for initializing smallest and largest key bool is_first = false; const int kNotFound = -1; // For each level, it does the following things: // 1. Find the first and the last compaction input files // in the current level. // 2. Include all files between the first and the last // compaction input files. // 3. Update the compaction key-range. // 4. For all remaining levels, include files that have // overlapping key-range with the compaction key-range. for (int l = 0; l <= output_level; ++l) { auto& current_files = levels[l].files; int first_included = static_cast(current_files.size()); int last_included = kNotFound; // identify the first and the last compaction input files // in the current level. for (size_t f = 0; f < current_files.size(); ++f) { if (input_files->find(TableFileNameToNumber(current_files[f].name)) != input_files->end()) { first_included = std::min(first_included, static_cast(f)); last_included = std::max(last_included, static_cast(f)); if (is_first == false) { smallestkey = current_files[f].smallestkey; largestkey = current_files[f].largestkey; is_first = true; } } } if (last_included == kNotFound) { continue; } if (l != 0) { // expend the compaction input of the current level if it // has overlapping key-range with other non-compaction input // files in the same level. while (first_included > 0) { if (comparator->Compare(current_files[first_included - 1].largestkey, current_files[first_included].smallestkey) < 0) { break; } first_included--; } while (last_included < static_cast(current_files.size()) - 1) { if (comparator->Compare(current_files[last_included + 1].smallestkey, current_files[last_included].largestkey) > 0) { break; } last_included++; } } else if (output_level > 0) { last_included = static_cast(current_files.size() - 1); } // include all files between the first and the last compaction input files. for (int f = first_included; f <= last_included; ++f) { if (current_files[f].being_compacted) { return Status::Aborted("Necessary compaction input file " + current_files[f].name + " is currently being compacted."); } input_files->insert(TableFileNameToNumber(current_files[f].name)); } // update smallest and largest key if (l == 0) { for (int f = first_included; f <= last_included; ++f) { if (comparator->Compare(smallestkey, current_files[f].smallestkey) > 0) { smallestkey = current_files[f].smallestkey; } if (comparator->Compare(largestkey, current_files[f].largestkey) < 0) { largestkey = current_files[f].largestkey; } } } else { if (comparator->Compare(smallestkey, current_files[first_included].smallestkey) > 0) { smallestkey = current_files[first_included].smallestkey; } if (comparator->Compare(largestkey, current_files[last_included].largestkey) < 0) { largestkey = current_files[last_included].largestkey; } } SstFileMetaData aggregated_file_meta; aggregated_file_meta.smallestkey = smallestkey; aggregated_file_meta.largestkey = largestkey; // For all lower levels, include all overlapping files. // We need to add overlapping files from the current level too because even // if there no input_files in level l, we would still need to add files // which overlap with the range containing the input_files in levels 0 to l // Level 0 doesn't need to be handled this way because files are sorted by // time and not by key for (int m = std::max(l, 1); m <= output_level; ++m) { for (auto& next_lv_file : levels[m].files) { if (HaveOverlappingKeyRanges(comparator, aggregated_file_meta, next_lv_file)) { if (next_lv_file.being_compacted) { return Status::Aborted( "File " + next_lv_file.name + " that has overlapping key range with one of the compaction " " input file is currently being compacted."); } input_files->insert(TableFileNameToNumber(next_lv_file.name)); } } } } if (RangeOverlapWithCompaction(smallestkey, largestkey, output_level)) { return Status::Aborted( "A running compaction is writing to the same output level in an " "overlapping key range"); } return Status::OK(); } Status CompactionPicker::SanitizeCompactionInputFiles( std::unordered_set* input_files, const ColumnFamilyMetaData& cf_meta, const int output_level) const { assert(static_cast(cf_meta.levels.size()) - 1 == cf_meta.levels[cf_meta.levels.size() - 1].level); if (output_level >= static_cast(cf_meta.levels.size())) { return Status::InvalidArgument( "Output level for column family " + cf_meta.name + " must between [0, " + ToString(cf_meta.levels[cf_meta.levels.size() - 1].level) + "]."); } if (output_level > MaxOutputLevel()) { return Status::InvalidArgument( "Exceed the maximum output level defined by " "the current compaction algorithm --- " + ToString(MaxOutputLevel())); } if (output_level < 0) { return Status::InvalidArgument("Output level cannot be negative."); } if (input_files->size() == 0) { return Status::InvalidArgument( "A compaction must contain at least one file."); } Status s = SanitizeCompactionInputFilesForAllLevels(input_files, cf_meta, output_level); if (!s.ok()) { return s; } // for all input files, check whether the file number matches // any currently-existing files. for (auto file_num : *input_files) { bool found = false; for (const auto& level_meta : cf_meta.levels) { for (const auto& file_meta : level_meta.files) { if (file_num == TableFileNameToNumber(file_meta.name)) { if (file_meta.being_compacted) { return Status::Aborted("Specified compaction input file " + MakeTableFileName("", file_num) + " is already being compacted."); } found = true; break; } } if (found) { break; } } if (!found) { return Status::InvalidArgument( "Specified compaction input file " + MakeTableFileName("", file_num) + " does not exist in column family " + cf_meta.name + "."); } } return Status::OK(); } #endif // !ROCKSDB_LITE void CompactionPicker::RegisterCompaction(Compaction* c) { if (c == nullptr) { return; } assert(ioptions_.compaction_style != kCompactionStyleLevel || c->output_level() == 0 || !FilesRangeOverlapWithCompaction(*c->inputs(), c->output_level())); if (c->start_level() == 0 || ioptions_.compaction_style == kCompactionStyleUniversal) { level0_compactions_in_progress_.insert(c); } compactions_in_progress_.insert(c); } void CompactionPicker::UnregisterCompaction(Compaction* c) { if (c == nullptr) { return; } if (c->start_level() == 0 || ioptions_.compaction_style == kCompactionStyleUniversal) { level0_compactions_in_progress_.erase(c); } compactions_in_progress_.erase(c); } void CompactionPicker::PickFilesMarkedForCompaction( const std::string& cf_name, VersionStorageInfo* vstorage, int* start_level, int* output_level, CompactionInputFiles* start_level_inputs) { if (vstorage->FilesMarkedForCompaction().empty()) { return; } auto continuation = [&, cf_name](std::pair level_file) { // If it's being compacted it has nothing to do here. // If this assert() fails that means that some function marked some // files as being_compacted, but didn't call ComputeCompactionScore() assert(!level_file.second->being_compacted); *start_level = level_file.first; *output_level = (*start_level == 0) ? vstorage->base_level() : *start_level + 1; if (*start_level == 0 && !level0_compactions_in_progress()->empty()) { return false; } start_level_inputs->files = {level_file.second}; start_level_inputs->level = *start_level; return ExpandInputsToCleanCut(cf_name, vstorage, start_level_inputs); }; // take a chance on a random file first Random64 rnd(/* seed */ reinterpret_cast(vstorage)); size_t random_file_index = static_cast(rnd.Uniform( static_cast(vstorage->FilesMarkedForCompaction().size()))); TEST_SYNC_POINT_CALLBACK("CompactionPicker::PickFilesMarkedForCompaction", &random_file_index); if (continuation(vstorage->FilesMarkedForCompaction()[random_file_index])) { // found the compaction! return; } for (auto& level_file : vstorage->FilesMarkedForCompaction()) { if (continuation(level_file)) { // found the compaction! return; } } start_level_inputs->files.clear(); } bool CompactionPicker::GetOverlappingL0Files( VersionStorageInfo* vstorage, CompactionInputFiles* start_level_inputs, int output_level, int* parent_index) { // Two level 0 compaction won't run at the same time, so don't need to worry // about files on level 0 being compacted. assert(level0_compactions_in_progress()->empty()); InternalKey smallest, largest; GetRange(*start_level_inputs, &smallest, &largest); // Note that the next call will discard the file we placed in // c->inputs_[0] earlier and replace it with an overlapping set // which will include the picked file. start_level_inputs->files.clear(); vstorage->GetOverlappingInputs(0, &smallest, &largest, &(start_level_inputs->files)); // If we include more L0 files in the same compaction run it can // cause the 'smallest' and 'largest' key to get extended to a // larger range. So, re-invoke GetRange to get the new key range GetRange(*start_level_inputs, &smallest, &largest); if (IsRangeInCompaction(vstorage, &smallest, &largest, output_level, parent_index)) { return false; } assert(!start_level_inputs->files.empty()); return true; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_picker.h000066400000000000000000000330301370372246700216270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include "db/compaction/compaction.h" #include "db/version_set.h" #include "options/cf_options.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { // The file contains an abstract class CompactionPicker, and its two // sub-classes LevelCompactionPicker and NullCompactionPicker, as // well as some helper functions used by them. class LogBuffer; class Compaction; class VersionStorageInfo; struct CompactionInputFiles; // An abstract class to pick compactions from an existing LSM-tree. // // Each compaction style inherits the class and implement the // interface to form automatic compactions. If NeedCompaction() is true, // then call PickCompaction() to find what files need to be compacted // and where to put the output files. // // Non-virtual functions CompactRange() and CompactFiles() are used to // pick files to compact based on users' DB::CompactRange() and // DB::CompactFiles() requests, respectively. There is little // compaction style specific logic for them. class CompactionPicker { public: CompactionPicker(const ImmutableCFOptions& ioptions, const InternalKeyComparator* icmp); virtual ~CompactionPicker(); // Pick level and inputs for a new compaction. // Returns nullptr if there is no compaction to be done. // Otherwise returns a pointer to a heap-allocated object that // describes the compaction. Caller should delete the result. virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) = 0; // Return a compaction object for compacting the range [begin,end] in // the specified level. Returns nullptr if there is nothing in that // level that overlaps the specified range. Caller should delete // the result. // // The returned Compaction might not include the whole requested range. // In that case, compaction_end will be set to the next key that needs // compacting. In case the compaction will compact the whole range, // compaction_end will be set to nullptr. // Client is responsible for compaction_end storage -- when called, // *compaction_end should point to valid InternalKey! virtual Compaction* CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict, uint64_t max_file_num_to_ignore); // The maximum allowed output level. Default value is NumberLevels() - 1. virtual int MaxOutputLevel() const { return NumberLevels() - 1; } virtual bool NeedsCompaction(const VersionStorageInfo* vstorage) const = 0; // Sanitize the input set of compaction input files. // When the input parameters do not describe a valid compaction, the // function will try to fix the input_files by adding necessary // files. If it's not possible to conver an invalid input_files // into a valid one by adding more files, the function will return a // non-ok status with specific reason. #ifndef ROCKSDB_LITE Status SanitizeCompactionInputFiles(std::unordered_set* input_files, const ColumnFamilyMetaData& cf_meta, const int output_level) const; #endif // ROCKSDB_LITE // Free up the files that participated in a compaction // // Requirement: DB mutex held void ReleaseCompactionFiles(Compaction* c, Status status); // Returns true if any one of the specified files are being compacted bool AreFilesInCompaction(const std::vector& files); // Takes a list of CompactionInputFiles and returns a (manual) Compaction // object. // // Caller must provide a set of input files that has been passed through // `SanitizeCompactionInputFiles` earlier. The lock should not be released // between that call and this one. Compaction* CompactFiles(const CompactionOptions& compact_options, const std::vector& input_files, int output_level, VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options, uint32_t output_path_id); // Converts a set of compaction input file numbers into // a list of CompactionInputFiles. Status GetCompactionInputsFromFileNumbers( std::vector* input_files, std::unordered_set* input_set, const VersionStorageInfo* vstorage, const CompactionOptions& compact_options) const; // Is there currently a compaction involving level 0 taking place bool IsLevel0CompactionInProgress() const { return !level0_compactions_in_progress_.empty(); } // Return true if the passed key range overlap with a compaction output // that is currently running. bool RangeOverlapWithCompaction(const Slice& smallest_user_key, const Slice& largest_user_key, int level) const; // Stores the minimal range that covers all entries in inputs in // *smallest, *largest. // REQUIRES: inputs is not empty void GetRange(const CompactionInputFiles& inputs, InternalKey* smallest, InternalKey* largest) const; // Stores the minimal range that covers all entries in inputs1 and inputs2 // in *smallest, *largest. // REQUIRES: inputs is not empty void GetRange(const CompactionInputFiles& inputs1, const CompactionInputFiles& inputs2, InternalKey* smallest, InternalKey* largest) const; // Stores the minimal range that covers all entries in inputs // in *smallest, *largest. // REQUIRES: inputs is not empty (at least on entry have one file) void GetRange(const std::vector& inputs, InternalKey* smallest, InternalKey* largest) const; int NumberLevels() const { return ioptions_.num_levels; } // Add more files to the inputs on "level" to make sure that // no newer version of a key is compacted to "level+1" while leaving an older // version in a "level". Otherwise, any Get() will search "level" first, // and will likely return an old/stale value for the key, since it always // searches in increasing order of level to find the value. This could // also scramble the order of merge operands. This function should be // called any time a new Compaction is created, and its inputs_[0] are // populated. // // Will return false if it is impossible to apply this compaction. bool ExpandInputsToCleanCut(const std::string& cf_name, VersionStorageInfo* vstorage, CompactionInputFiles* inputs, InternalKey** next_smallest = nullptr); // Returns true if any one of the parent files are being compacted bool IsRangeInCompaction(VersionStorageInfo* vstorage, const InternalKey* smallest, const InternalKey* largest, int level, int* index); // Returns true if the key range that `inputs` files cover overlap with the // key range of a currently running compaction. bool FilesRangeOverlapWithCompaction( const std::vector& inputs, int level) const; bool SetupOtherInputs(const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, CompactionInputFiles* inputs, CompactionInputFiles* output_level_inputs, int* parent_index, int base_index); void GetGrandparents(VersionStorageInfo* vstorage, const CompactionInputFiles& inputs, const CompactionInputFiles& output_level_inputs, std::vector* grandparents); void PickFilesMarkedForCompaction(const std::string& cf_name, VersionStorageInfo* vstorage, int* start_level, int* output_level, CompactionInputFiles* start_level_inputs); bool GetOverlappingL0Files(VersionStorageInfo* vstorage, CompactionInputFiles* start_level_inputs, int output_level, int* parent_index); // Register this compaction in the set of running compactions void RegisterCompaction(Compaction* c); // Remove this compaction from the set of running compactions void UnregisterCompaction(Compaction* c); std::set* level0_compactions_in_progress() { return &level0_compactions_in_progress_; } std::unordered_set* compactions_in_progress() { return &compactions_in_progress_; } protected: const ImmutableCFOptions& ioptions_; // A helper function to SanitizeCompactionInputFiles() that // sanitizes "input_files" by adding necessary files. #ifndef ROCKSDB_LITE virtual Status SanitizeCompactionInputFilesForAllLevels( std::unordered_set* input_files, const ColumnFamilyMetaData& cf_meta, const int output_level) const; #endif // ROCKSDB_LITE // Keeps track of all compactions that are running on Level0. // Protected by DB mutex std::set level0_compactions_in_progress_; // Keeps track of all compactions that are running. // Protected by DB mutex std::unordered_set compactions_in_progress_; const InternalKeyComparator* const icmp_; }; #ifndef ROCKSDB_LITE // A dummy compaction that never triggers any automatic // compaction. class NullCompactionPicker : public CompactionPicker { public: NullCompactionPicker(const ImmutableCFOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual ~NullCompactionPicker() {} // Always return "nullptr" Compaction* PickCompaction( const std::string& /*cf_name*/, const MutableCFOptions& /*mutable_cf_options*/, VersionStorageInfo* /*vstorage*/, LogBuffer* /* log_buffer */, SequenceNumber /* earliest_memtable_seqno */) override { return nullptr; } // Always return "nullptr" Compaction* CompactRange(const std::string& /*cf_name*/, const MutableCFOptions& /*mutable_cf_options*/, VersionStorageInfo* /*vstorage*/, int /*input_level*/, int /*output_level*/, const CompactRangeOptions& /*compact_range_options*/, const InternalKey* /*begin*/, const InternalKey* /*end*/, InternalKey** /*compaction_end*/, bool* /*manual_conflict*/, uint64_t /*max_file_num_to_ignore*/) override { return nullptr; } // Always returns false. virtual bool NeedsCompaction( const VersionStorageInfo* /*vstorage*/) const override { return false; } }; #endif // !ROCKSDB_LITE // Attempts to find an intra L0 compaction conforming to the given parameters. // // @param level_files Metadata for L0 files. // @param min_files_to_compact Minimum number of files required to // do the compaction. // @param max_compact_bytes_per_del_file Maximum average size in bytes per // file that is going to get deleted by // the compaction. // @param max_compaction_bytes Maximum total size in bytes (in terms // of compensated file size) for files // to be compacted. // @param [out] comp_inputs If a compaction was found, will be // initialized with corresponding input // files. Cannot be nullptr. // // @return true iff compaction was found. bool FindIntraL0Compaction( const std::vector& level_files, size_t min_files_to_compact, uint64_t max_compact_bytes_per_del_file, uint64_t max_compaction_bytes, CompactionInputFiles* comp_inputs, SequenceNumber earliest_mem_seqno = kMaxSequenceNumber); CompressionType GetCompressionType(const ImmutableCFOptions& ioptions, const VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options, int level, int base_level, const bool enable_compression = true); CompressionOptions GetCompressionOptions( const MutableCFOptions& mutable_cf_options, const VersionStorageInfo* vstorage, int level, const bool enable_compression = true); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_picker_fifo.cc000066400000000000000000000222041370372246700227710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/compaction/compaction_picker_fifo.h" #ifndef ROCKSDB_LITE #include #include #include #include "db/column_family.h" #include "logging/log_buffer.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { uint64_t GetTotalFilesSize(const std::vector& files) { uint64_t total_size = 0; for (const auto& f : files) { total_size += f->fd.file_size; } return total_size; } } // anonymous namespace bool FIFOCompactionPicker::NeedsCompaction( const VersionStorageInfo* vstorage) const { const int kLevel0 = 0; return vstorage->CompactionScore(kLevel0) >= 1; } Compaction* FIFOCompactionPicker::PickTTLCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer) { assert(mutable_cf_options.ttl > 0); const int kLevel0 = 0; const std::vector& level_files = vstorage->LevelFiles(kLevel0); uint64_t total_size = GetTotalFilesSize(level_files); int64_t _current_time; auto status = ioptions_.env->GetCurrentTime(&_current_time); if (!status.ok()) { ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: Couldn't get current time: %s. " "Not doing compactions based on TTL. ", cf_name.c_str(), status.ToString().c_str()); return nullptr; } const uint64_t current_time = static_cast(_current_time); if (!level0_compactions_in_progress_.empty()) { ROCKS_LOG_BUFFER( log_buffer, "[%s] FIFO compaction: Already executing compaction. No need " "to run parallel compactions since compactions are very fast", cf_name.c_str()); return nullptr; } std::vector inputs; inputs.emplace_back(); inputs[0].level = 0; // avoid underflow if (current_time > mutable_cf_options.ttl) { for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { FileMetaData* f = *ritr; assert(f); if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { uint64_t creation_time = f->fd.table_reader->GetTableProperties()->creation_time; if (creation_time == 0 || creation_time >= (current_time - mutable_cf_options.ttl)) { break; } } total_size -= f->compensated_file_size; inputs[0].files.push_back(f); } } // Return a nullptr and proceed to size-based FIFO compaction if: // 1. there are no files older than ttl OR // 2. there are a few files older than ttl, but deleting them will not bring // the total size to be less than max_table_files_size threshold. if (inputs[0].files.empty() || total_size > mutable_cf_options.compaction_options_fifo.max_table_files_size) { return nullptr; } for (const auto& f : inputs[0].files) { uint64_t creation_time = 0; assert(f); if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { creation_time = f->fd.table_reader->GetTableProperties()->creation_time; } ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 " with creation time %" PRIu64 " for deletion", cf_name.c_str(), f->fd.GetNumber(), creation_time); } Compaction* c = new Compaction( vstorage, ioptions_, mutable_cf_options, std::move(inputs), 0, 0, 0, 0, kNoCompression, mutable_cf_options.compression_opts, /* max_subcompactions */ 0, {}, /* is manual */ false, vstorage->CompactionScore(0), /* is deletion compaction */ true, CompactionReason::kFIFOTtl); return c; } Compaction* FIFOCompactionPicker::PickSizeCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer) { const int kLevel0 = 0; const std::vector& level_files = vstorage->LevelFiles(kLevel0); uint64_t total_size = GetTotalFilesSize(level_files); if (total_size <= mutable_cf_options.compaction_options_fifo.max_table_files_size || level_files.size() == 0) { // total size not exceeded if (mutable_cf_options.compaction_options_fifo.allow_compaction && level_files.size() > 0) { CompactionInputFiles comp_inputs; // try to prevent same files from being compacted multiple times, which // could produce large files that may never TTL-expire. Achieve this by // disallowing compactions with files larger than memtable (inflate its // size by 10% to account for uncompressed L0 files that may have size // slightly greater than memtable size limit). size_t max_compact_bytes_per_del_file = static_cast(MultiplyCheckOverflow( static_cast(mutable_cf_options.write_buffer_size), 1.1)); if (FindIntraL0Compaction( level_files, mutable_cf_options .level0_file_num_compaction_trigger /* min_files_to_compact */ , max_compact_bytes_per_del_file, mutable_cf_options.max_compaction_bytes, &comp_inputs)) { Compaction* c = new Compaction( vstorage, ioptions_, mutable_cf_options, {comp_inputs}, 0, 16 * 1024 * 1024 /* output file size limit */, 0 /* max compaction bytes, not applicable */, 0 /* output path ID */, mutable_cf_options.compression, mutable_cf_options.compression_opts, 0 /* max_subcompactions */, {}, /* is manual */ false, vstorage->CompactionScore(0), /* is deletion compaction */ false, CompactionReason::kFIFOReduceNumFiles); return c; } } ROCKS_LOG_BUFFER( log_buffer, "[%s] FIFO compaction: nothing to do. Total size %" PRIu64 ", max size %" PRIu64 "\n", cf_name.c_str(), total_size, mutable_cf_options.compaction_options_fifo.max_table_files_size); return nullptr; } if (!level0_compactions_in_progress_.empty()) { ROCKS_LOG_BUFFER( log_buffer, "[%s] FIFO compaction: Already executing compaction. No need " "to run parallel compactions since compactions are very fast", cf_name.c_str()); return nullptr; } std::vector inputs; inputs.emplace_back(); inputs[0].level = 0; for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { auto f = *ritr; total_size -= f->compensated_file_size; inputs[0].files.push_back(f); char tmp_fsize[16]; AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize)); ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 " with size %s for deletion", cf_name.c_str(), f->fd.GetNumber(), tmp_fsize); if (total_size <= mutable_cf_options.compaction_options_fifo.max_table_files_size) { break; } } Compaction* c = new Compaction( vstorage, ioptions_, mutable_cf_options, std::move(inputs), 0, 0, 0, 0, kNoCompression, mutable_cf_options.compression_opts, /* max_subcompactions */ 0, {}, /* is manual */ false, vstorage->CompactionScore(0), /* is deletion compaction */ true, CompactionReason::kFIFOMaxSize); return c; } Compaction* FIFOCompactionPicker::PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer, SequenceNumber /*earliest_memtable_seqno*/) { assert(vstorage->num_levels() == 1); Compaction* c = nullptr; if (mutable_cf_options.ttl > 0) { c = PickTTLCompaction(cf_name, mutable_cf_options, vstorage, log_buffer); } if (c == nullptr) { c = PickSizeCompaction(cf_name, mutable_cf_options, vstorage, log_buffer); } RegisterCompaction(c); return c; } Compaction* FIFOCompactionPicker::CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, int input_level, int output_level, const CompactRangeOptions& /*compact_range_options*/, const InternalKey* /*begin*/, const InternalKey* /*end*/, InternalKey** compaction_end, bool* /*manual_conflict*/, uint64_t /*max_file_num_to_ignore*/) { #ifdef NDEBUG (void)input_level; (void)output_level; #endif assert(input_level == 0); assert(output_level == 0); *compaction_end = nullptr; LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, ioptions_.info_log); Compaction* c = PickCompaction(cf_name, mutable_cf_options, vstorage, &log_buffer); log_buffer.FlushBufferToLog(); return c; } } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/compaction/compaction_picker_fifo.h000066400000000000000000000043361370372246700226410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include "db/compaction/compaction_picker.h" namespace ROCKSDB_NAMESPACE { class FIFOCompactionPicker : public CompactionPicker { public: FIFOCompactionPicker(const ImmutableCFOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* version, LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) override; virtual Compaction* CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict, uint64_t max_file_num_to_ignore) override; // The maximum allowed output level. Always returns 0. virtual int MaxOutputLevel() const override { return 0; } virtual bool NeedsCompaction( const VersionStorageInfo* vstorage) const override; private: Compaction* PickTTLCompaction(const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* version, LogBuffer* log_buffer); Compaction* PickSizeCompaction(const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* version, LogBuffer* log_buffer); }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/compaction/compaction_picker_level.cc000066400000000000000000000451121370372246700231600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include "db/compaction/compaction_picker_level.h" #include "logging/log_buffer.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { bool LevelCompactionPicker::NeedsCompaction( const VersionStorageInfo* vstorage) const { if (!vstorage->ExpiredTtlFiles().empty()) { return true; } if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) { return true; } if (!vstorage->BottommostFilesMarkedForCompaction().empty()) { return true; } if (!vstorage->FilesMarkedForCompaction().empty()) { return true; } for (int i = 0; i <= vstorage->MaxInputLevel(); i++) { if (vstorage->CompactionScore(i) >= 1) { return true; } } return false; } namespace { // A class to build a leveled compaction step-by-step. class LevelCompactionBuilder { public: LevelCompactionBuilder(const std::string& cf_name, VersionStorageInfo* vstorage, SequenceNumber earliest_mem_seqno, CompactionPicker* compaction_picker, LogBuffer* log_buffer, const MutableCFOptions& mutable_cf_options, const ImmutableCFOptions& ioptions) : cf_name_(cf_name), vstorage_(vstorage), earliest_mem_seqno_(earliest_mem_seqno), compaction_picker_(compaction_picker), log_buffer_(log_buffer), mutable_cf_options_(mutable_cf_options), ioptions_(ioptions) {} // Pick and return a compaction. Compaction* PickCompaction(); // Pick the initial files to compact to the next level. (or together // in Intra-L0 compactions) void SetupInitialFiles(); // If the initial files are from L0 level, pick other L0 // files if needed. bool SetupOtherL0FilesIfNeeded(); // Based on initial files, setup other files need to be compacted // in this compaction, accordingly. bool SetupOtherInputsIfNeeded(); Compaction* GetCompaction(); // For the specfied level, pick a file that we want to compact. // Returns false if there is no file to compact. // If it returns true, inputs->files.size() will be exactly one. // If level is 0 and there is already a compaction on that level, this // function will return false. bool PickFileToCompact(); // For L0->L0, picks the longest span of files that aren't currently // undergoing compaction for which work-per-deleted-file decreases. The span // always starts from the newest L0 file. // // Intra-L0 compaction is independent of all other files, so it can be // performed even when L0->base_level compactions are blocked. // // Returns true if `inputs` is populated with a span of files to be compacted; // otherwise, returns false. bool PickIntraL0Compaction(); // Picks a file from level_files to compact. // level_files is a vector of (level, file metadata) in ascending order of // level. If compact_to_next_level is true, compact the file to the next // level, otherwise, compact to the same level as the input file. void PickFileToCompact( const autovector>& level_files, bool compact_to_next_level); const std::string& cf_name_; VersionStorageInfo* vstorage_; SequenceNumber earliest_mem_seqno_; CompactionPicker* compaction_picker_; LogBuffer* log_buffer_; int start_level_ = -1; int output_level_ = -1; int parent_index_ = -1; int base_index_ = -1; double start_level_score_ = 0; bool is_manual_ = false; CompactionInputFiles start_level_inputs_; std::vector compaction_inputs_; CompactionInputFiles output_level_inputs_; std::vector grandparents_; CompactionReason compaction_reason_ = CompactionReason::kUnknown; const MutableCFOptions& mutable_cf_options_; const ImmutableCFOptions& ioptions_; // Pick a path ID to place a newly generated file, with its level static uint32_t GetPathId(const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, int level); static const int kMinFilesForIntraL0Compaction = 4; }; void LevelCompactionBuilder::PickFileToCompact( const autovector>& level_files, bool compact_to_next_level) { for (auto& level_file : level_files) { // If it's being compacted it has nothing to do here. // If this assert() fails that means that some function marked some // files as being_compacted, but didn't call ComputeCompactionScore() assert(!level_file.second->being_compacted); start_level_ = level_file.first; if ((compact_to_next_level && start_level_ == vstorage_->num_non_empty_levels() - 1) || (start_level_ == 0 && !compaction_picker_->level0_compactions_in_progress()->empty())) { continue; } if (compact_to_next_level) { output_level_ = (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; } else { output_level_ = start_level_; } start_level_inputs_.files = {level_file.second}; start_level_inputs_.level = start_level_; if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, &start_level_inputs_)) { return; } } start_level_inputs_.files.clear(); } void LevelCompactionBuilder::SetupInitialFiles() { // Find the compactions by size on all levels. bool skipped_l0_to_base = false; for (int i = 0; i < compaction_picker_->NumberLevels() - 1; i++) { start_level_score_ = vstorage_->CompactionScore(i); start_level_ = vstorage_->CompactionScoreLevel(i); assert(i == 0 || start_level_score_ <= vstorage_->CompactionScore(i - 1)); if (start_level_score_ >= 1) { if (skipped_l0_to_base && start_level_ == vstorage_->base_level()) { // If L0->base_level compaction is pending, don't schedule further // compaction from base level. Otherwise L0->base_level compaction // may starve. continue; } output_level_ = (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; if (PickFileToCompact()) { // found the compaction! if (start_level_ == 0) { // L0 score = `num L0 files` / `level0_file_num_compaction_trigger` compaction_reason_ = CompactionReason::kLevelL0FilesNum; } else { // L1+ score = `Level files size` / `MaxBytesForLevel` compaction_reason_ = CompactionReason::kLevelMaxLevelSize; } break; } else { // didn't find the compaction, clear the inputs start_level_inputs_.clear(); if (start_level_ == 0) { skipped_l0_to_base = true; // L0->base_level may be blocked due to ongoing L0->base_level // compactions. It may also be blocked by an ongoing compaction from // base_level downwards. // // In these cases, to reduce L0 file count and thus reduce likelihood // of write stalls, we can attempt compacting a span of files within // L0. if (PickIntraL0Compaction()) { output_level_ = 0; compaction_reason_ = CompactionReason::kLevelL0FilesNum; break; } } } } else { // Compaction scores are sorted in descending order, no further scores // will be >= 1. break; } } if (!start_level_inputs_.empty()) { return; } // if we didn't find a compaction, check if there are any files marked for // compaction parent_index_ = base_index_ = -1; compaction_picker_->PickFilesMarkedForCompaction( cf_name_, vstorage_, &start_level_, &output_level_, &start_level_inputs_); if (!start_level_inputs_.empty()) { compaction_reason_ = CompactionReason::kFilesMarkedForCompaction; return; } // Bottommost Files Compaction on deleting tombstones PickFileToCompact(vstorage_->BottommostFilesMarkedForCompaction(), false); if (!start_level_inputs_.empty()) { compaction_reason_ = CompactionReason::kBottommostFiles; return; } // TTL Compaction PickFileToCompact(vstorage_->ExpiredTtlFiles(), true); if (!start_level_inputs_.empty()) { compaction_reason_ = CompactionReason::kTtl; return; } // Periodic Compaction PickFileToCompact(vstorage_->FilesMarkedForPeriodicCompaction(), false); if (!start_level_inputs_.empty()) { compaction_reason_ = CompactionReason::kPeriodicCompaction; return; } } bool LevelCompactionBuilder::SetupOtherL0FilesIfNeeded() { if (start_level_ == 0 && output_level_ != 0) { return compaction_picker_->GetOverlappingL0Files( vstorage_, &start_level_inputs_, output_level_, &parent_index_); } return true; } bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() { // Setup input files from output level. For output to L0, we only compact // spans of files that do not interact with any pending compactions, so don't // need to consider other levels. if (output_level_ != 0) { output_level_inputs_.level = output_level_; if (!compaction_picker_->SetupOtherInputs( cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_, &output_level_inputs_, &parent_index_, base_index_)) { return false; } compaction_inputs_.push_back(start_level_inputs_); if (!output_level_inputs_.empty()) { compaction_inputs_.push_back(output_level_inputs_); } // In some edge cases we could pick a compaction that will be compacting // a key range that overlap with another running compaction, and both // of them have the same output level. This could happen if // (1) we are running a non-exclusive manual compaction // (2) AddFile ingest a new file into the LSM tree // We need to disallow this from happening. if (compaction_picker_->FilesRangeOverlapWithCompaction(compaction_inputs_, output_level_)) { // This compaction output could potentially conflict with the output // of a currently running compaction, we cannot run it. return false; } compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_, output_level_inputs_, &grandparents_); } else { compaction_inputs_.push_back(start_level_inputs_); } return true; } Compaction* LevelCompactionBuilder::PickCompaction() { // Pick up the first file to start compaction. It may have been extended // to a clean cut. SetupInitialFiles(); if (start_level_inputs_.empty()) { return nullptr; } assert(start_level_ >= 0 && output_level_ >= 0); // If it is a L0 -> base level compaction, we need to set up other L0 // files if needed. if (!SetupOtherL0FilesIfNeeded()) { return nullptr; } // Pick files in the output level and expand more files in the start level // if needed. if (!SetupOtherInputsIfNeeded()) { return nullptr; } // Form a compaction object containing the files we picked. Compaction* c = GetCompaction(); TEST_SYNC_POINT_CALLBACK("LevelCompactionPicker::PickCompaction:Return", c); return c; } Compaction* LevelCompactionBuilder::GetCompaction() { auto c = new Compaction( vstorage_, ioptions_, mutable_cf_options_, std::move(compaction_inputs_), output_level_, MaxFileSizeForLevel(mutable_cf_options_, output_level_, ioptions_.compaction_style, vstorage_->base_level(), ioptions_.level_compaction_dynamic_level_bytes), mutable_cf_options_.max_compaction_bytes, GetPathId(ioptions_, mutable_cf_options_, output_level_), GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, output_level_, vstorage_->base_level()), GetCompressionOptions(mutable_cf_options_, vstorage_, output_level_), /* max_subcompactions */ 0, std::move(grandparents_), is_manual_, start_level_score_, false /* deletion_compaction */, compaction_reason_); // If it's level 0 compaction, make sure we don't execute any other level 0 // compactions in parallel compaction_picker_->RegisterCompaction(c); // Creating a compaction influences the compaction score because the score // takes running compactions into account (by skipping files that are already // being compacted). Since we just changed compaction score, we recalculate it // here vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); return c; } /* * Find the optimal path to place a file * Given a level, finds the path where levels up to it will fit in levels * up to and including this path */ uint32_t LevelCompactionBuilder::GetPathId( const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, int level) { uint32_t p = 0; assert(!ioptions.cf_paths.empty()); // size remaining in the most recent path uint64_t current_path_size = ioptions.cf_paths[0].target_size; uint64_t level_size; int cur_level = 0; // max_bytes_for_level_base denotes L1 size. // We estimate L0 size to be the same as L1. level_size = mutable_cf_options.max_bytes_for_level_base; // Last path is the fallback while (p < ioptions.cf_paths.size() - 1) { if (level_size <= current_path_size) { if (cur_level == level) { // Does desired level fit in this path? return p; } else { current_path_size -= level_size; if (cur_level > 0) { if (ioptions.level_compaction_dynamic_level_bytes) { // Currently, level_compaction_dynamic_level_bytes is ignored when // multiple db paths are specified. https://github.com/facebook/ // rocksdb/blob/master/db/column_family.cc. // Still, adding this check to avoid accidentally using // max_bytes_for_level_multiplier_additional level_size = static_cast( level_size * mutable_cf_options.max_bytes_for_level_multiplier); } else { level_size = static_cast( level_size * mutable_cf_options.max_bytes_for_level_multiplier * mutable_cf_options.MaxBytesMultiplerAdditional(cur_level)); } } cur_level++; continue; } } p++; current_path_size = ioptions.cf_paths[p].target_size; } return p; } bool LevelCompactionBuilder::PickFileToCompact() { // level 0 files are overlapping. So we cannot pick more // than one concurrent compactions at this level. This // could be made better by looking at key-ranges that are // being compacted at level 0. if (start_level_ == 0 && !compaction_picker_->level0_compactions_in_progress()->empty()) { TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0"); return false; } start_level_inputs_.clear(); assert(start_level_ >= 0); // Pick the largest file in this level that is not already // being compacted const std::vector& file_size = vstorage_->FilesByCompactionPri(start_level_); const std::vector& level_files = vstorage_->LevelFiles(start_level_); unsigned int cmp_idx; for (cmp_idx = vstorage_->NextCompactionIndex(start_level_); cmp_idx < file_size.size(); cmp_idx++) { int index = file_size[cmp_idx]; auto* f = level_files[index]; // do not pick a file to compact if it is being compacted // from n-1 level. if (f->being_compacted) { continue; } start_level_inputs_.files.push_back(f); start_level_inputs_.level = start_level_; if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, &start_level_inputs_) || compaction_picker_->FilesRangeOverlapWithCompaction( {start_level_inputs_}, output_level_)) { // A locked (pending compaction) input-level file was pulled in due to // user-key overlap. start_level_inputs_.clear(); continue; } // Now that input level is fully expanded, we check whether any output files // are locked due to pending compaction. // // Note we rely on ExpandInputsToCleanCut() to tell us whether any output- // level files are locked, not just the extra ones pulled in for user-key // overlap. InternalKey smallest, largest; compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest); CompactionInputFiles output_level_inputs; output_level_inputs.level = output_level_; vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, &output_level_inputs.files); if (!output_level_inputs.empty() && !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, &output_level_inputs)) { start_level_inputs_.clear(); continue; } base_index_ = index; break; } // store where to start the iteration in the next call to PickCompaction vstorage_->SetNextCompactionIndex(start_level_, cmp_idx); return start_level_inputs_.size() > 0; } bool LevelCompactionBuilder::PickIntraL0Compaction() { start_level_inputs_.clear(); const std::vector& level_files = vstorage_->LevelFiles(0 /* level */); if (level_files.size() < static_cast( mutable_cf_options_.level0_file_num_compaction_trigger + 2) || level_files[0]->being_compacted) { // If L0 isn't accumulating much files beyond the regular trigger, don't // resort to L0->L0 compaction yet. return false; } return FindIntraL0Compaction(level_files, kMinFilesForIntraL0Compaction, port::kMaxUint64, mutable_cf_options_.max_compaction_bytes, &start_level_inputs_, earliest_mem_seqno_); } } // namespace Compaction* LevelCompactionPicker::PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer, SequenceNumber earliest_mem_seqno) { LevelCompactionBuilder builder(cf_name, vstorage, earliest_mem_seqno, this, log_buffer, mutable_cf_options, ioptions_); return builder.PickCompaction(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_picker_level.h000066400000000000000000000025031370372246700230170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "db/compaction/compaction_picker.h" namespace ROCKSDB_NAMESPACE { // Picking compactions for leveled compaction. See wiki page // https://github.com/facebook/rocksdb/wiki/Leveled-Compaction // for description of Leveled compaction. class LevelCompactionPicker : public CompactionPicker { public: LevelCompactionPicker(const ImmutableCFOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) override; virtual bool NeedsCompaction( const VersionStorageInfo* vstorage) const override; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/compaction/compaction_picker_test.cc000066400000000000000000002317011370372246700230310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include "db/compaction/compaction.h" #include "db/compaction/compaction_picker_fifo.h" #include "db/compaction/compaction_picker_level.h" #include "db/compaction/compaction_picker_universal.h" #include "logging/logging.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class CountingLogger : public Logger { public: using Logger::Logv; void Logv(const char* /*format*/, va_list /*ap*/) override { log_count++; } size_t log_count; }; class CompactionPickerTest : public testing::Test { public: const Comparator* ucmp_; InternalKeyComparator icmp_; Options options_; ImmutableCFOptions ioptions_; MutableCFOptions mutable_cf_options_; LevelCompactionPicker level_compaction_picker; std::string cf_name_; CountingLogger logger_; LogBuffer log_buffer_; uint32_t file_num_; CompactionOptionsFIFO fifo_options_; std::unique_ptr vstorage_; std::vector> files_; // does not own FileMetaData std::unordered_map> file_map_; // input files to compaction process. std::vector input_files_; int compaction_level_start_; CompactionPickerTest() : ucmp_(BytewiseComparator()), icmp_(ucmp_), ioptions_(options_), mutable_cf_options_(options_), level_compaction_picker(ioptions_, &icmp_), cf_name_("dummy"), log_buffer_(InfoLogLevel::INFO_LEVEL, &logger_), file_num_(1), vstorage_(nullptr) { mutable_cf_options_.ttl = 0; mutable_cf_options_.periodic_compaction_seconds = 0; // ioptions_.compaction_pri = kMinOverlappingRatio has its own set of // tests to cover. ioptions_.compaction_pri = kByCompensatedSize; fifo_options_.max_table_files_size = 1; mutable_cf_options_.RefreshDerivedOptions(ioptions_); ioptions_.cf_paths.emplace_back("dummy", std::numeric_limits::max()); } ~CompactionPickerTest() override {} void NewVersionStorage(int num_levels, CompactionStyle style) { DeleteVersionStorage(); options_.num_levels = num_levels; vstorage_.reset(new VersionStorageInfo(&icmp_, ucmp_, options_.num_levels, style, nullptr, false)); vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_); } // Create a new VersionStorageInfo object so we can add mode files and then // merge it with the existing VersionStorageInfo void AddVersionStorage() { temp_vstorage_.reset(new VersionStorageInfo( &icmp_, ucmp_, options_.num_levels, ioptions_.compaction_style, vstorage_.get(), false)); } void DeleteVersionStorage() { vstorage_.reset(); temp_vstorage_.reset(); files_.clear(); file_map_.clear(); input_files_.clear(); } void Add(int level, uint32_t file_number, const char* smallest, const char* largest, uint64_t file_size = 1, uint32_t path_id = 0, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100, size_t compensated_file_size = 0, bool marked_for_compact = false) { VersionStorageInfo* vstorage; if (temp_vstorage_) { vstorage = temp_vstorage_.get(); } else { vstorage = vstorage_.get(); } assert(level < vstorage->num_levels()); FileMetaData* f = new FileMetaData( file_number, path_id, file_size, InternalKey(smallest, smallest_seq, kTypeValue), InternalKey(largest, largest_seq, kTypeValue), smallest_seq, largest_seq, marked_for_compact, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); f->compensated_file_size = (compensated_file_size != 0) ? compensated_file_size : file_size; vstorage->AddFile(level, f); files_.emplace_back(f); file_map_.insert({file_number, {f, level}}); } void SetCompactionInputFilesLevels(int level_count, int start_level) { input_files_.resize(level_count); for (int i = 0; i < level_count; ++i) { input_files_[i].level = start_level + i; } compaction_level_start_ = start_level; } void AddToCompactionFiles(uint32_t file_number) { auto iter = file_map_.find(file_number); assert(iter != file_map_.end()); int level = iter->second.second; assert(level < vstorage_->num_levels()); input_files_[level - compaction_level_start_].files.emplace_back( iter->second.first); } void UpdateVersionStorageInfo() { if (temp_vstorage_) { VersionBuilder builder(FileOptions(), &ioptions_, nullptr, vstorage_.get(), nullptr); builder.SaveTo(temp_vstorage_.get()); vstorage_ = std::move(temp_vstorage_); } vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_); vstorage_->UpdateFilesByCompactionPri(ioptions_.compaction_pri); vstorage_->UpdateNumNonEmptyLevels(); vstorage_->GenerateFileIndexer(); vstorage_->GenerateLevelFilesBrief(); vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); vstorage_->GenerateLevel0NonOverlapping(); vstorage_->ComputeFilesMarkedForCompaction(); vstorage_->SetFinalized(); } void AddFileToVersionStorage(int level, uint32_t file_number, const char* smallest, const char* largest, uint64_t file_size = 1, uint32_t path_id = 0, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100, size_t compensated_file_size = 0, bool marked_for_compact = false) { VersionStorageInfo* base_vstorage = vstorage_.release(); vstorage_.reset(new VersionStorageInfo(&icmp_, ucmp_, options_.num_levels, kCompactionStyleUniversal, base_vstorage, false)); Add(level, file_number, smallest, largest, file_size, path_id, smallest_seq, largest_seq, compensated_file_size, marked_for_compact); VersionBuilder builder(FileOptions(), &ioptions_, nullptr, base_vstorage, nullptr); builder.SaveTo(vstorage_.get()); UpdateVersionStorageInfo(); } private: std::unique_ptr temp_vstorage_; }; TEST_F(CompactionPickerTest, Empty) { NewVersionStorage(6, kCompactionStyleLevel); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } TEST_F(CompactionPickerTest, Single) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.level0_file_num_compaction_trigger = 2; Add(0, 1U, "p", "q"); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } TEST_F(CompactionPickerTest, Level0Trigger) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.level0_file_num_compaction_trigger = 2; Add(0, 1U, "150", "200"); Add(0, 2U, "200", "250"); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); } TEST_F(CompactionPickerTest, Level1Trigger) { NewVersionStorage(6, kCompactionStyleLevel); Add(1, 66U, "150", "200", 1000000000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(66U, compaction->input(0, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, Level1Trigger2) { mutable_cf_options_.target_file_size_base = 10000000000; mutable_cf_options_.RefreshDerivedOptions(ioptions_); NewVersionStorage(6, kCompactionStyleLevel); Add(1, 66U, "150", "200", 1000000001U); Add(1, 88U, "201", "300", 1000000000U); Add(2, 6U, "150", "179", 1000000000U); Add(2, 7U, "180", "220", 1000000000U); Add(2, 8U, "221", "300", 1000000000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(2U, compaction->num_input_files(1)); ASSERT_EQ(66U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(6U, compaction->input(1, 0)->fd.GetNumber()); ASSERT_EQ(7U, compaction->input(1, 1)->fd.GetNumber()); ASSERT_EQ(uint64_t{1073741824}, compaction->OutputFilePreallocationSize()); } TEST_F(CompactionPickerTest, LevelMaxScore) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.target_file_size_base = 10000000; mutable_cf_options_.max_bytes_for_level_base = 10 * 1024 * 1024; mutable_cf_options_.RefreshDerivedOptions(ioptions_); Add(0, 1U, "150", "200", 1000000U); // Level 1 score 1.2 Add(1, 66U, "150", "200", 6000000U); Add(1, 88U, "201", "300", 6000000U); // Level 2 score 1.8. File 7 is the largest. Should be picked Add(2, 6U, "150", "179", 60000000U); Add(2, 7U, "180", "220", 60000001U); Add(2, 8U, "221", "300", 60000000U); // Level 3 score slightly larger than 1 Add(3, 26U, "150", "170", 260000000U); Add(3, 27U, "171", "179", 260000000U); Add(3, 28U, "191", "220", 260000000U); Add(3, 29U, "221", "300", 260000000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(mutable_cf_options_.target_file_size_base + mutable_cf_options_.target_file_size_base / 10, compaction->OutputFilePreallocationSize()); } TEST_F(CompactionPickerTest, NeedsCompactionLevel) { const int kLevels = 6; const int kFileCount = 20; for (int level = 0; level < kLevels - 1; ++level) { NewVersionStorage(kLevels, kCompactionStyleLevel); uint64_t file_size = vstorage_->MaxBytesForLevel(level) * 2 / kFileCount; for (int file_count = 1; file_count <= kFileCount; ++file_count) { // start a brand new version in each test. NewVersionStorage(kLevels, kCompactionStyleLevel); for (int i = 0; i < file_count; ++i) { Add(level, i, ToString((i + 100) * 1000).c_str(), ToString((i + 100) * 1000 + 999).c_str(), file_size, 0, i * 100, i * 100 + 99); } UpdateVersionStorageInfo(); ASSERT_EQ(vstorage_->CompactionScoreLevel(0), level); ASSERT_EQ(level_compaction_picker.NeedsCompaction(vstorage_.get()), vstorage_->CompactionScore(0) >= 1); // release the version storage DeleteVersionStorage(); } } } TEST_F(CompactionPickerTest, Level0TriggerDynamic) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); Add(0, 2U, "200", "250"); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(1, static_cast(compaction->num_input_levels())); ASSERT_EQ(num_levels - 1, compaction->output_level()); } TEST_F(CompactionPickerTest, Level0TriggerDynamic2) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); Add(0, 2U, "200", "250"); Add(num_levels - 1, 3U, "200", "250", 300U); UpdateVersionStorageInfo(); ASSERT_EQ(vstorage_->base_level(), num_levels - 2); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(1, static_cast(compaction->num_input_levels())); ASSERT_EQ(num_levels - 2, compaction->output_level()); } TEST_F(CompactionPickerTest, Level0TriggerDynamic3) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); Add(0, 2U, "200", "250"); Add(num_levels - 1, 3U, "200", "250", 300U); Add(num_levels - 1, 4U, "300", "350", 3000U); UpdateVersionStorageInfo(); ASSERT_EQ(vstorage_->base_level(), num_levels - 3); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(1, static_cast(compaction->num_input_levels())); ASSERT_EQ(num_levels - 3, compaction->output_level()); } TEST_F(CompactionPickerTest, Level0TriggerDynamic4) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); Add(0, 2U, "200", "250"); Add(num_levels - 1, 3U, "200", "250", 300U); Add(num_levels - 1, 4U, "300", "350", 3000U); Add(num_levels - 3, 5U, "150", "180", 3U); Add(num_levels - 3, 6U, "181", "300", 3U); Add(num_levels - 3, 7U, "400", "450", 3U); UpdateVersionStorageInfo(); ASSERT_EQ(vstorage_->base_level(), num_levels - 3); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(2U, compaction->num_input_files(1)); ASSERT_EQ(num_levels - 3, compaction->level(1)); ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber()); ASSERT_EQ(6U, compaction->input(1, 1)->fd.GetNumber()); ASSERT_EQ(2, static_cast(compaction->num_input_levels())); ASSERT_EQ(num_levels - 3, compaction->output_level()); } TEST_F(CompactionPickerTest, LevelTriggerDynamic4) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = true; ioptions_.compaction_pri = kMinOverlappingRatio; mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); Add(num_levels - 1, 2U, "200", "250", 300U); Add(num_levels - 1, 3U, "300", "350", 3000U); Add(num_levels - 1, 4U, "400", "450", 3U); Add(num_levels - 2, 5U, "150", "180", 300U); Add(num_levels - 2, 6U, "181", "350", 500U); Add(num_levels - 2, 7U, "400", "450", 200U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(5U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(0, compaction->num_input_files(1)); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(num_levels - 1, compaction->output_level()); } // Universal and FIFO Compactions are not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(CompactionPickerTest, NeedsCompactionUniversal) { NewVersionStorage(1, kCompactionStyleUniversal); UniversalCompactionPicker universal_compaction_picker( ioptions_, &icmp_); UpdateVersionStorageInfo(); // must return false when there's no files. ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()), false); // verify the trigger given different number of L0 files. for (int i = 1; i <= mutable_cf_options_.level0_file_num_compaction_trigger * 2; ++i) { NewVersionStorage(1, kCompactionStyleUniversal); Add(0, i, ToString((i + 100) * 1000).c_str(), ToString((i + 100) * 1000 + 999).c_str(), 1000000, 0, i * 100, i * 100 + 99); UpdateVersionStorageInfo(); ASSERT_EQ(level_compaction_picker.NeedsCompaction(vstorage_.get()), vstorage_->CompactionScore(0) >= 1); } } TEST_F(CompactionPickerTest, CompactionUniversalIngestBehindReservedLevel) { const uint64_t kFileSize = 100000; NewVersionStorage(1, kCompactionStyleUniversal); ioptions_.allow_ingest_behind = true; ioptions_.num_levels = 3; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); UpdateVersionStorageInfo(); // must return false when there's no files. ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()), false); NewVersionStorage(3, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(0, 2U, "201", "250", kFileSize, 0, 401, 450); Add(0, 4U, "260", "300", kFileSize, 0, 260, 300); Add(1, 5U, "100", "151", kFileSize, 0, 200, 251); Add(1, 3U, "301", "350", kFileSize, 0, 101, 150); Add(2, 6U, "120", "200", kFileSize, 0, 20, 100); UpdateVersionStorageInfo(); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); // output level should be the one above the bottom-most ASSERT_EQ(1, compaction->output_level()); } // Tests if the files can be trivially moved in multi level // universal compaction when allow_trivial_move option is set // In this test as the input files overlaps, they cannot // be trivially moved. TEST_F(CompactionPickerTest, CannotTrivialMoveUniversal) { const uint64_t kFileSize = 100000; mutable_cf_options_.compaction_options_universal.allow_trivial_move = true; NewVersionStorage(1, kCompactionStyleUniversal); UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); UpdateVersionStorageInfo(); // must return false when there's no files. ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()), false); NewVersionStorage(3, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(0, 2U, "201", "250", kFileSize, 0, 401, 450); Add(0, 4U, "260", "300", kFileSize, 0, 260, 300); Add(1, 5U, "100", "151", kFileSize, 0, 200, 251); Add(1, 3U, "301", "350", kFileSize, 0, 101, 150); Add(2, 6U, "120", "200", kFileSize, 0, 20, 100); UpdateVersionStorageInfo(); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(!compaction->is_trivial_move()); } // Tests if the files can be trivially moved in multi level // universal compaction when allow_trivial_move option is set // In this test as the input files doesn't overlaps, they should // be trivially moved. TEST_F(CompactionPickerTest, AllowsTrivialMoveUniversal) { const uint64_t kFileSize = 100000; mutable_cf_options_.compaction_options_universal.allow_trivial_move = true; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(3, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(0, 2U, "201", "250", kFileSize, 0, 401, 450); Add(0, 4U, "260", "300", kFileSize, 0, 260, 300); Add(1, 5U, "010", "080", kFileSize, 0, 200, 251); Add(2, 3U, "301", "350", kFileSize, 0, 101, 150); UpdateVersionStorageInfo(); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction->is_trivial_move()); } TEST_F(CompactionPickerTest, UniversalPeriodicCompaction1) { // The case where universal periodic compaction can be picked // with some newer files being compacted. const uint64_t kFileSize = 100000; mutable_cf_options_.periodic_compaction_seconds = 1000; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(5, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(0, 2U, "201", "250", kFileSize, 0, 401, 450); Add(0, 4U, "260", "300", kFileSize, 0, 260, 300); Add(3, 5U, "010", "080", kFileSize, 0, 200, 251); Add(4, 3U, "301", "350", kFileSize, 0, 101, 150); Add(4, 6U, "501", "750", kFileSize, 0, 101, 150); file_map_[2].first->being_compacted = true; UpdateVersionStorageInfo(); vstorage_->TEST_AddFileMarkedForPeriodicCompaction(4, file_map_[3].first); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction); ASSERT_EQ(4, compaction->output_level()); ASSERT_EQ(0, compaction->start_level()); ASSERT_EQ(1U, compaction->num_input_files(0)); } TEST_F(CompactionPickerTest, UniversalPeriodicCompaction2) { // The case where universal periodic compaction does not // pick up only level to compact if it doesn't cover // any file marked as periodic compaction. const uint64_t kFileSize = 100000; mutable_cf_options_.periodic_compaction_seconds = 1000; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(5, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(3, 5U, "010", "080", kFileSize, 0, 200, 251); Add(4, 3U, "301", "350", kFileSize, 0, 101, 150); Add(4, 6U, "501", "750", kFileSize, 0, 101, 150); file_map_[5].first->being_compacted = true; UpdateVersionStorageInfo(); vstorage_->TEST_AddFileMarkedForPeriodicCompaction(0, file_map_[1].first); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_FALSE(compaction); } TEST_F(CompactionPickerTest, UniversalPeriodicCompaction3) { // The case where universal periodic compaction does not // pick up only the last sorted run which is an L0 file if it isn't // marked as periodic compaction. const uint64_t kFileSize = 100000; mutable_cf_options_.periodic_compaction_seconds = 1000; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(5, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(0, 5U, "010", "080", kFileSize, 0, 200, 251); Add(0, 6U, "501", "750", kFileSize, 0, 101, 150); file_map_[5].first->being_compacted = true; UpdateVersionStorageInfo(); vstorage_->TEST_AddFileMarkedForPeriodicCompaction(0, file_map_[1].first); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_FALSE(compaction); } TEST_F(CompactionPickerTest, UniversalPeriodicCompaction4) { // The case where universal periodic compaction couldn't form // a compaction that inlcudes any file marked for periodic compaction. // Right now we form the compaction anyway if it is more than one // sorted run. Just put the case here to validate that it doesn't // crash. const uint64_t kFileSize = 100000; mutable_cf_options_.periodic_compaction_seconds = 1000; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(5, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(2, 2U, "010", "080", kFileSize, 0, 200, 251); Add(3, 5U, "010", "080", kFileSize, 0, 200, 251); Add(4, 3U, "301", "350", kFileSize, 0, 101, 150); Add(4, 6U, "501", "750", kFileSize, 0, 101, 150); file_map_[2].first->being_compacted = true; UpdateVersionStorageInfo(); vstorage_->TEST_AddFileMarkedForPeriodicCompaction(0, file_map_[2].first); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(!compaction || compaction->start_level() != compaction->output_level()); } TEST_F(CompactionPickerTest, UniversalPeriodicCompaction5) { // Test single L0 file periodic compaction triggering. const uint64_t kFileSize = 100000; mutable_cf_options_.periodic_compaction_seconds = 1000; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(5, kCompactionStyleUniversal); Add(0, 6U, "150", "200", kFileSize, 0, 500, 550); UpdateVersionStorageInfo(); vstorage_->TEST_AddFileMarkedForPeriodicCompaction(0, file_map_[6].first); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction); ASSERT_EQ(0, compaction->start_level()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(6U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(4, compaction->output_level()); } TEST_F(CompactionPickerTest, UniversalPeriodicCompaction6) { // Test single sorted run non-L0 periodic compaction const uint64_t kFileSize = 100000; mutable_cf_options_.periodic_compaction_seconds = 1000; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(5, kCompactionStyleUniversal); Add(4, 5U, "150", "200", kFileSize, 0, 500, 550); Add(4, 6U, "350", "400", kFileSize, 0, 500, 550); UpdateVersionStorageInfo(); vstorage_->TEST_AddFileMarkedForPeriodicCompaction(4, file_map_[6].first); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction); ASSERT_EQ(4, compaction->start_level()); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(5U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(6U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(4, compaction->output_level()); } TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { NewVersionStorage(1, kCompactionStyleFIFO); const int kFileCount = mutable_cf_options_.level0_file_num_compaction_trigger * 3; const uint64_t kFileSize = 100000; const uint64_t kMaxSize = kFileSize * kFileCount / 2; fifo_options_.max_table_files_size = kMaxSize; mutable_cf_options_.compaction_options_fifo = fifo_options_; FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); UpdateVersionStorageInfo(); // must return false when there's no files. ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), false); // verify whether compaction is needed based on the current // size of L0 files. uint64_t current_size = 0; for (int i = 1; i <= kFileCount; ++i) { NewVersionStorage(1, kCompactionStyleFIFO); Add(0, i, ToString((i + 100) * 1000).c_str(), ToString((i + 100) * 1000 + 999).c_str(), kFileSize, 0, i * 100, i * 100 + 99); current_size += kFileSize; UpdateVersionStorageInfo(); ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), vstorage_->CompactionScore(0) >= 1); } } #endif // ROCKSDB_LITE TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) { NewVersionStorage(6, kCompactionStyleLevel); ioptions_.compaction_pri = kMinOverlappingRatio; mutable_cf_options_.target_file_size_base = 100000000000; mutable_cf_options_.target_file_size_multiplier = 10; mutable_cf_options_.max_bytes_for_level_base = 10 * 1024 * 1024; mutable_cf_options_.RefreshDerivedOptions(ioptions_); Add(2, 6U, "150", "179", 50000000U); Add(2, 7U, "180", "220", 50000000U); Add(2, 8U, "321", "400", 50000000U); // File not overlapping Add(2, 9U, "721", "800", 50000000U); Add(3, 26U, "150", "170", 260000000U); Add(3, 27U, "171", "179", 260000000U); Add(3, 28U, "191", "220", 260000000U); Add(3, 29U, "221", "300", 260000000U); Add(3, 30U, "750", "900", 260000000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Pick file 8 because it overlaps with 0 files on level 3. ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber()); // Compaction input size * 1.1 ASSERT_GE(uint64_t{55000000}, compaction->OutputFilePreallocationSize()); } TEST_F(CompactionPickerTest, CompactionPriMinOverlapping2) { NewVersionStorage(6, kCompactionStyleLevel); ioptions_.compaction_pri = kMinOverlappingRatio; mutable_cf_options_.target_file_size_base = 10000000; mutable_cf_options_.target_file_size_multiplier = 10; mutable_cf_options_.max_bytes_for_level_base = 10 * 1024 * 1024; Add(2, 6U, "150", "175", 60000000U); // Overlaps with file 26, 27, total size 521M Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27, 28, total size // 520M, the smalelst overlapping Add(2, 8U, "201", "300", 60000000U); // Overlaps with file 28, 29, total size 521M Add(3, 25U, "100", "110", 261000000U); Add(3, 26U, "150", "170", 261000000U); Add(3, 27U, "171", "179", 260000000U); Add(3, 28U, "191", "220", 260000000U); Add(3, 29U, "221", "300", 261000000U); Add(3, 30U, "321", "400", 261000000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Picking file 7 because overlapping ratio is the biggest. ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, CompactionPriMinOverlapping3) { NewVersionStorage(6, kCompactionStyleLevel); ioptions_.compaction_pri = kMinOverlappingRatio; mutable_cf_options_.max_bytes_for_level_base = 10000000; mutable_cf_options_.max_bytes_for_level_multiplier = 10; // file 7 and 8 over lap with the same file, but file 8 is smaller so // it will be picked. Add(2, 6U, "150", "167", 60000000U); // Overlaps with file 26, 27 Add(2, 7U, "168", "169", 60000000U); // Overlaps with file 27 Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 28, but the file // itself is larger. Should be picked. Add(3, 26U, "160", "165", 260000000U); Add(3, 27U, "166", "170", 260000000U); Add(3, 28U, "180", "400", 260000000U); Add(3, 29U, "401", "500", 260000000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Picking file 8 because overlapping ratio is the biggest. ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, CompactionPriMinOverlapping4) { NewVersionStorage(6, kCompactionStyleLevel); ioptions_.compaction_pri = kMinOverlappingRatio; mutable_cf_options_.max_bytes_for_level_base = 10000000; mutable_cf_options_.max_bytes_for_level_multiplier = 10; // file 7 and 8 over lap with the same file, but file 8 is smaller so // it will be picked. // Overlaps with file 26, 27. And the file is compensated so will be // picked up. Add(2, 6U, "150", "167", 60000000U, 0, 100, 100, 180000000U); Add(2, 7U, "168", "169", 60000000U); // Overlaps with file 27 Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 28 Add(3, 26U, "160", "165", 60000000U); // Boosted file size in output level is not considered. Add(3, 27U, "166", "170", 60000000U, 0, 100, 100, 260000000U); Add(3, 28U, "180", "400", 60000000U); Add(3, 29U, "401", "500", 60000000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Picking file 8 because overlapping ratio is the biggest. ASSERT_EQ(6U, compaction->input(0, 0)->fd.GetNumber()); } // This test exhibits the bug where we don't properly reset parent_index in // PickCompaction() TEST_F(CompactionPickerTest, ParentIndexResetBug) { int num_levels = ioptions_.num_levels; mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); // <- marked for compaction Add(1, 3U, "400", "500", 600); // <- this one needs compacting Add(2, 4U, "150", "200"); Add(2, 5U, "201", "210"); Add(2, 6U, "300", "310"); Add(2, 7U, "400", "500"); // <- being compacted vstorage_->LevelFiles(2)[3]->being_compacted = true; vstorage_->LevelFiles(0)[0]->marked_for_compaction = true; UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); } // This test checks ExpandWhileOverlapping() by having overlapping user keys // ranges (with different sequence numbers) in the input files. TEST_F(CompactionPickerTest, OverlappingUserKeys) { NewVersionStorage(6, kCompactionStyleLevel); ioptions_.compaction_pri = kByCompensatedSize; Add(1, 1U, "100", "150", 1U); // Overlapping user keys Add(1, 2U, "200", "400", 1U); Add(1, 3U, "400", "500", 1000000000U, 0, 0); Add(2, 4U, "600", "700", 1U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(3U, compaction->input(0, 1)->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys2) { NewVersionStorage(6, kCompactionStyleLevel); // Overlapping user keys on same level and output level Add(1, 1U, "200", "400", 1000000000U); Add(1, 2U, "400", "500", 1U, 0, 0); Add(2, 3U, "000", "100", 1U); Add(2, 4U, "100", "600", 1U, 0, 0); Add(2, 5U, "600", "700", 1U, 0, 0); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(3U, compaction->num_input_files(1)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(3U, compaction->input(1, 0)->fd.GetNumber()); ASSERT_EQ(4U, compaction->input(1, 1)->fd.GetNumber()); ASSERT_EQ(5U, compaction->input(1, 2)->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys3) { NewVersionStorage(6, kCompactionStyleLevel); // Chain of overlapping user key ranges (forces ExpandWhileOverlapping() to // expand multiple times) Add(1, 1U, "100", "150", 1U); Add(1, 2U, "150", "200", 1U, 0, 0); Add(1, 3U, "200", "250", 1000000000U, 0, 0); Add(1, 4U, "250", "300", 1U, 0, 0); Add(1, 5U, "300", "350", 1U, 0, 0); // Output level overlaps with the beginning and the end of the chain Add(2, 6U, "050", "100", 1U); Add(2, 7U, "350", "400", 1U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(5U, compaction->num_input_files(0)); ASSERT_EQ(2U, compaction->num_input_files(1)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(3U, compaction->input(0, 2)->fd.GetNumber()); ASSERT_EQ(4U, compaction->input(0, 3)->fd.GetNumber()); ASSERT_EQ(5U, compaction->input(0, 4)->fd.GetNumber()); ASSERT_EQ(6U, compaction->input(1, 0)->fd.GetNumber()); ASSERT_EQ(7U, compaction->input(1, 1)->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys4) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.max_bytes_for_level_base = 1000000; Add(1, 1U, "100", "150", 1U); Add(1, 2U, "150", "199", 1U, 0, 0); Add(1, 3U, "200", "250", 1100000U, 0, 0); Add(1, 4U, "251", "300", 1U, 0, 0); Add(1, 5U, "300", "350", 1U, 0, 0); Add(2, 6U, "100", "115", 1U); Add(2, 7U, "125", "325", 1U); Add(2, 8U, "350", "400", 1U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->num_input_files(1)); ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(7U, compaction->input(1, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys5) { NewVersionStorage(6, kCompactionStyleLevel); // Overlapping user keys on same level and output level Add(1, 1U, "200", "400", 1000000000U); Add(1, 2U, "400", "500", 1U, 0, 0); Add(2, 3U, "000", "100", 1U); Add(2, 4U, "100", "600", 1U, 0, 0); Add(2, 5U, "600", "700", 1U, 0, 0); vstorage_->LevelFiles(2)[2]->being_compacted = true; UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } TEST_F(CompactionPickerTest, OverlappingUserKeys6) { NewVersionStorage(6, kCompactionStyleLevel); // Overlapping user keys on same level and output level Add(1, 1U, "200", "400", 1U, 0, 0); Add(1, 2U, "401", "500", 1U, 0, 0); Add(2, 3U, "000", "100", 1U); Add(2, 4U, "100", "300", 1U, 0, 0); Add(2, 5U, "305", "450", 1U, 0, 0); Add(2, 6U, "460", "600", 1U, 0, 0); Add(2, 7U, "600", "700", 1U, 0, 0); vstorage_->LevelFiles(1)[0]->marked_for_compaction = true; vstorage_->LevelFiles(1)[1]->marked_for_compaction = true; UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(3U, compaction->num_input_files(1)); } TEST_F(CompactionPickerTest, OverlappingUserKeys7) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.max_compaction_bytes = 100000000000u; // Overlapping user keys on same level and output level Add(1, 1U, "200", "400", 1U, 0, 0); Add(1, 2U, "401", "500", 1000000000U, 0, 0); Add(2, 3U, "100", "250", 1U); Add(2, 4U, "300", "600", 1U, 0, 0); Add(2, 5U, "600", "800", 1U, 0, 0); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_GE(1U, compaction->num_input_files(0)); ASSERT_GE(2U, compaction->num_input_files(1)); // File 5 has to be included in the compaction ASSERT_EQ(5U, compaction->inputs(1)->back()->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys8) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.max_compaction_bytes = 100000000000u; // grow the number of inputs in "level" without // changing the number of "level+1" files we pick up // Expand input level as much as possible // no overlapping case Add(1, 1U, "101", "150", 1U); Add(1, 2U, "151", "200", 1U); Add(1, 3U, "201", "300", 1000000000U); Add(1, 4U, "301", "400", 1U); Add(1, 5U, "401", "500", 1U); Add(2, 6U, "150", "200", 1U); Add(2, 7U, "200", "450", 1U, 0, 0); Add(2, 8U, "500", "600", 1U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(3U, compaction->num_input_files(0)); ASSERT_EQ(2U, compaction->num_input_files(1)); ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(3U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(4U, compaction->input(0, 2)->fd.GetNumber()); ASSERT_EQ(6U, compaction->input(1, 0)->fd.GetNumber()); ASSERT_EQ(7U, compaction->input(1, 1)->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys9) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.max_compaction_bytes = 100000000000u; // grow the number of inputs in "level" without // changing the number of "level+1" files we pick up // Expand input level as much as possible // overlapping case Add(1, 1U, "121", "150", 1U); Add(1, 2U, "151", "200", 1U); Add(1, 3U, "201", "300", 1000000000U); Add(1, 4U, "301", "400", 1U); Add(1, 5U, "401", "500", 1U); Add(2, 6U, "100", "120", 1U); Add(2, 7U, "150", "200", 1U); Add(2, 8U, "200", "450", 1U, 0, 0); Add(2, 9U, "501", "600", 1U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(5U, compaction->num_input_files(0)); ASSERT_EQ(2U, compaction->num_input_files(1)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(3U, compaction->input(0, 2)->fd.GetNumber()); ASSERT_EQ(4U, compaction->input(0, 3)->fd.GetNumber()); ASSERT_EQ(7U, compaction->input(1, 0)->fd.GetNumber()); ASSERT_EQ(8U, compaction->input(1, 1)->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys10) { // Locked file encountered when pulling in extra input-level files with same // user keys. Verify we pick the next-best file from the same input level. NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.max_compaction_bytes = 100000000000u; // file_number 2U is largest and thus first choice. But it overlaps with // file_number 1U which is being compacted. So instead we pick the next- // biggest file, 3U, which is eligible for compaction. Add(1 /* level */, 1U /* file_number */, "100" /* smallest */, "150" /* largest */, 1U /* file_size */); file_map_[1U].first->being_compacted = true; Add(1 /* level */, 2U /* file_number */, "150" /* smallest */, "200" /* largest */, 1000000000U /* file_size */, 0 /* smallest_seq */, 0 /* largest_seq */); Add(1 /* level */, 3U /* file_number */, "201" /* smallest */, "250" /* largest */, 900000000U /* file_size */); Add(2 /* level */, 4U /* file_number */, "100" /* smallest */, "150" /* largest */, 1U /* file_size */); Add(2 /* level */, 5U /* file_number */, "151" /* smallest */, "200" /* largest */, 1U /* file_size */); Add(2 /* level */, 6U /* file_number */, "201" /* smallest */, "250" /* largest */, 1U /* file_size */); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->num_input_files(1)); ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(6U, compaction->input(1, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, OverlappingUserKeys11) { // Locked file encountered when pulling in extra output-level files with same // user keys. Expected to skip that compaction and pick the next-best choice. NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.max_compaction_bytes = 100000000000u; // score(L1) = 3.7 // score(L2) = 1.85 // There is no eligible file in L1 to compact since both candidates pull in // file_number 5U, which overlaps with a file pending compaction (6U). The // first eligible compaction is from L2->L3. Add(1 /* level */, 2U /* file_number */, "151" /* smallest */, "200" /* largest */, 1000000000U /* file_size */); Add(1 /* level */, 3U /* file_number */, "201" /* smallest */, "250" /* largest */, 1U /* file_size */); Add(2 /* level */, 4U /* file_number */, "100" /* smallest */, "149" /* largest */, 5000000000U /* file_size */); Add(2 /* level */, 5U /* file_number */, "150" /* smallest */, "201" /* largest */, 1U /* file_size */); Add(2 /* level */, 6U /* file_number */, "201" /* smallest */, "249" /* largest */, 1U /* file_size */, 0 /* smallest_seq */, 0 /* largest_seq */); file_map_[6U].first->being_compacted = true; Add(3 /* level */, 7U /* file_number */, "100" /* smallest */, "149" /* largest */, 1U /* file_size */); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->num_input_files(1)); ASSERT_EQ(4U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(7U, compaction->input(1, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, NotScheduleL1IfL0WithHigherPri1) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 900000000U; // 6 L0 files, score 3. Add(0, 1U, "000", "400", 1U); Add(0, 2U, "001", "400", 1U, 0, 0); Add(0, 3U, "001", "400", 1000000000U, 0, 0); Add(0, 31U, "001", "400", 1000000000U, 0, 0); Add(0, 32U, "001", "400", 1000000000U, 0, 0); Add(0, 33U, "001", "400", 1000000000U, 0, 0); // L1 total size 2GB, score 2.2. If one file being comapcted, score 1.1. Add(1, 4U, "050", "300", 1000000000U, 0, 0); file_map_[4u].first->being_compacted = true; Add(1, 5U, "301", "350", 1000000000U, 0, 0); // Output level overlaps with the beginning and the end of the chain Add(2, 6U, "050", "100", 1U); Add(2, 7U, "300", "400", 1U); // No compaction should be scheduled, if L0 has higher priority than L1 // but L0->L1 compaction is blocked by a file in L1 being compacted. UpdateVersionStorageInfo(); ASSERT_EQ(0, vstorage_->CompactionScoreLevel(0)); ASSERT_EQ(1, vstorage_->CompactionScoreLevel(1)); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } TEST_F(CompactionPickerTest, NotScheduleL1IfL0WithHigherPri2) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 900000000U; // 6 L0 files, score 3. Add(0, 1U, "000", "400", 1U); Add(0, 2U, "001", "400", 1U, 0, 0); Add(0, 3U, "001", "400", 1000000000U, 0, 0); Add(0, 31U, "001", "400", 1000000000U, 0, 0); Add(0, 32U, "001", "400", 1000000000U, 0, 0); Add(0, 33U, "001", "400", 1000000000U, 0, 0); // L1 total size 2GB, score 2.2. If one file being comapcted, score 1.1. Add(1, 4U, "050", "300", 1000000000U, 0, 0); Add(1, 5U, "301", "350", 1000000000U, 0, 0); // Output level overlaps with the beginning and the end of the chain Add(2, 6U, "050", "100", 1U); Add(2, 7U, "300", "400", 1U); // If no file in L1 being compacted, L0->L1 compaction will be scheduled. UpdateVersionStorageInfo(); // being_compacted flag is cleared here. ASSERT_EQ(0, vstorage_->CompactionScoreLevel(0)); ASSERT_EQ(1, vstorage_->CompactionScoreLevel(1)); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); } TEST_F(CompactionPickerTest, NotScheduleL1IfL0WithHigherPri3) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 900000000U; // 6 L0 files, score 3. Add(0, 1U, "000", "400", 1U); Add(0, 2U, "001", "400", 1U, 0, 0); Add(0, 3U, "001", "400", 1000000000U, 0, 0); Add(0, 31U, "001", "400", 1000000000U, 0, 0); Add(0, 32U, "001", "400", 1000000000U, 0, 0); Add(0, 33U, "001", "400", 1000000000U, 0, 0); // L1 score more than 6. Add(1, 4U, "050", "300", 1000000000U, 0, 0); file_map_[4u].first->being_compacted = true; Add(1, 5U, "301", "350", 1000000000U, 0, 0); Add(1, 51U, "351", "400", 6000000000U, 0, 0); // Output level overlaps with the beginning and the end of the chain Add(2, 6U, "050", "100", 1U); Add(2, 7U, "300", "400", 1U); // If score in L1 is larger than L0, L1 compaction goes through despite // there is pending L0 compaction. UpdateVersionStorageInfo(); ASSERT_EQ(1, vstorage_->CompactionScoreLevel(0)); ASSERT_EQ(0, vstorage_->CompactionScoreLevel(1)); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); } TEST_F(CompactionPickerTest, EstimateCompactionBytesNeeded1) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = false; mutable_cf_options_.level0_file_num_compaction_trigger = 4; mutable_cf_options_.max_bytes_for_level_base = 1000; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200", 200); Add(0, 2U, "150", "200", 200); Add(0, 3U, "150", "200", 200); // Level 1 is over target by 200 Add(1, 4U, "400", "500", 600); Add(1, 5U, "600", "700", 600); // Level 2 is less than target 10000 even added size of level 1 // Size ratio of L2/L1 is 9600 / 1200 = 8 Add(2, 6U, "150", "200", 2500); Add(2, 7U, "201", "210", 2000); Add(2, 8U, "300", "310", 2600); Add(2, 9U, "400", "500", 2500); // Level 3 exceeds target 100,000 of 1000 Add(3, 10U, "400", "500", 101000); // Level 4 exceeds target 1,000,000 by 900 after adding size from level 3 // Size ratio L4/L3 is 9.9 // After merge from L3, L4 size is 1000900 Add(4, 11U, "400", "500", 999900); Add(5, 12U, "400", "500", 8007200); UpdateVersionStorageInfo(); ASSERT_EQ(200u * 9u + 10900u + 900u * 9, vstorage_->estimated_compaction_needed_bytes()); } TEST_F(CompactionPickerTest, EstimateCompactionBytesNeeded2) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = false; mutable_cf_options_.level0_file_num_compaction_trigger = 3; mutable_cf_options_.max_bytes_for_level_base = 1000; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200", 200); Add(0, 2U, "150", "200", 200); Add(0, 4U, "150", "200", 200); Add(0, 5U, "150", "200", 200); Add(0, 6U, "150", "200", 200); // Level 1 size will be 1400 after merging with L0 Add(1, 7U, "400", "500", 200); Add(1, 8U, "600", "700", 200); // Level 2 is less than target 10000 even added size of level 1 Add(2, 9U, "150", "200", 9100); // Level 3 over the target, but since level 4 is empty, we assume it will be // a trivial move. Add(3, 10U, "400", "500", 101000); UpdateVersionStorageInfo(); // estimated L1->L2 merge: 400 * (9100.0 / 1400.0 + 1.0) ASSERT_EQ(1400u + 3000u, vstorage_->estimated_compaction_needed_bytes()); } TEST_F(CompactionPickerTest, EstimateCompactionBytesNeeded3) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = false; mutable_cf_options_.level0_file_num_compaction_trigger = 3; mutable_cf_options_.max_bytes_for_level_base = 1000; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200", 2000); Add(0, 2U, "150", "200", 2000); Add(0, 4U, "150", "200", 2000); Add(0, 5U, "150", "200", 2000); Add(0, 6U, "150", "200", 1000); // Level 1 size will be 10000 after merging with L0 Add(1, 7U, "400", "500", 500); Add(1, 8U, "600", "700", 500); Add(2, 9U, "150", "200", 10000); UpdateVersionStorageInfo(); ASSERT_EQ(10000u + 18000u, vstorage_->estimated_compaction_needed_bytes()); } TEST_F(CompactionPickerTest, EstimateCompactionBytesNeededDynamicLevel) { int num_levels = ioptions_.num_levels; ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.level0_file_num_compaction_trigger = 3; mutable_cf_options_.max_bytes_for_level_base = 1000; mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); // Set Last level size 50000 // num_levels - 1 target 5000 // num_levels - 2 is base level with target 1000 (rounded up to // max_bytes_for_level_base). Add(num_levels - 1, 10U, "400", "500", 50000); Add(0, 1U, "150", "200", 200); Add(0, 2U, "150", "200", 200); Add(0, 4U, "150", "200", 200); Add(0, 5U, "150", "200", 200); Add(0, 6U, "150", "200", 200); // num_levels - 3 is over target by 100 + 1000 Add(num_levels - 3, 7U, "400", "500", 550); Add(num_levels - 3, 8U, "600", "700", 550); // num_levels - 2 is over target by 1100 + 200 Add(num_levels - 2, 9U, "150", "200", 5200); UpdateVersionStorageInfo(); // Merging to the second last level: (5200 / 2100 + 1) * 1100 // Merging to the last level: (50000 / 6300 + 1) * 1300 ASSERT_EQ(2100u + 3823u + 11617u, vstorage_->estimated_compaction_needed_bytes()); } TEST_F(CompactionPickerTest, IsBottommostLevelTest) { // case 1: Higher levels are empty NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "a", "m"); Add(0, 2U, "c", "z"); Add(1, 3U, "d", "e"); Add(1, 4U, "l", "p"); Add(2, 5U, "g", "i"); Add(2, 6U, "x", "z"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(2, 1); AddToCompactionFiles(3U); AddToCompactionFiles(5U); bool result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_TRUE(result); // case 2: Higher levels have no overlap NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "a", "m"); Add(0, 2U, "c", "z"); Add(1, 3U, "d", "e"); Add(1, 4U, "l", "p"); Add(2, 5U, "g", "i"); Add(2, 6U, "x", "z"); Add(3, 7U, "k", "p"); Add(3, 8U, "t", "w"); Add(4, 9U, "a", "b"); Add(5, 10U, "c", "cc"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(2, 1); AddToCompactionFiles(3U); AddToCompactionFiles(5U); result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_TRUE(result); // case 3.1: Higher levels (level 3) have overlap NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "a", "m"); Add(0, 2U, "c", "z"); Add(1, 3U, "d", "e"); Add(1, 4U, "l", "p"); Add(2, 5U, "g", "i"); Add(2, 6U, "x", "z"); Add(3, 7U, "e", "g"); Add(3, 8U, "h", "k"); Add(4, 9U, "a", "b"); Add(5, 10U, "c", "cc"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(2, 1); AddToCompactionFiles(3U); AddToCompactionFiles(5U); result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_FALSE(result); // case 3.2: Higher levels (level 5) have overlap DeleteVersionStorage(); NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "a", "m"); Add(0, 2U, "c", "z"); Add(1, 3U, "d", "e"); Add(1, 4U, "l", "p"); Add(2, 5U, "g", "i"); Add(2, 6U, "x", "z"); Add(3, 7U, "j", "k"); Add(3, 8U, "l", "m"); Add(4, 9U, "a", "b"); Add(5, 10U, "c", "cc"); Add(5, 11U, "h", "k"); Add(5, 12U, "y", "yy"); Add(5, 13U, "z", "zz"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(2, 1); AddToCompactionFiles(3U); AddToCompactionFiles(5U); result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_FALSE(result); // case 3.3: Higher levels (level 5) have overlap, but it's only overlapping // one key ("d") NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "a", "m"); Add(0, 2U, "c", "z"); Add(1, 3U, "d", "e"); Add(1, 4U, "l", "p"); Add(2, 5U, "g", "i"); Add(2, 6U, "x", "z"); Add(3, 7U, "j", "k"); Add(3, 8U, "l", "m"); Add(4, 9U, "a", "b"); Add(5, 10U, "c", "cc"); Add(5, 11U, "ccc", "d"); Add(5, 12U, "y", "yy"); Add(5, 13U, "z", "zz"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(2, 1); AddToCompactionFiles(3U); AddToCompactionFiles(5U); result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_FALSE(result); // Level 0 files overlap NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "s", "t"); Add(0, 2U, "a", "m"); Add(0, 3U, "b", "z"); Add(0, 4U, "e", "f"); Add(5, 10U, "y", "z"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(1, 0); AddToCompactionFiles(1U); AddToCompactionFiles(2U); AddToCompactionFiles(3U); AddToCompactionFiles(4U); result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_FALSE(result); // Level 0 files don't overlap NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "s", "t"); Add(0, 2U, "a", "m"); Add(0, 3U, "b", "k"); Add(0, 4U, "e", "f"); Add(5, 10U, "y", "z"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(1, 0); AddToCompactionFiles(1U); AddToCompactionFiles(2U); AddToCompactionFiles(3U); AddToCompactionFiles(4U); result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_TRUE(result); // Level 1 files overlap NewVersionStorage(6, kCompactionStyleLevel); Add(0, 1U, "s", "t"); Add(0, 2U, "a", "m"); Add(0, 3U, "b", "k"); Add(0, 4U, "e", "f"); Add(1, 5U, "a", "m"); Add(1, 6U, "n", "o"); Add(1, 7U, "w", "y"); Add(5, 10U, "y", "z"); UpdateVersionStorageInfo(); SetCompactionInputFilesLevels(2, 0); AddToCompactionFiles(1U); AddToCompactionFiles(2U); AddToCompactionFiles(3U); AddToCompactionFiles(4U); AddToCompactionFiles(5U); AddToCompactionFiles(6U); AddToCompactionFiles(7U); result = Compaction::TEST_IsBottommostLevel(2, vstorage_.get(), input_files_); ASSERT_FALSE(result); DeleteVersionStorage(); } TEST_F(CompactionPickerTest, MaxCompactionBytesHit) { mutable_cf_options_.max_bytes_for_level_base = 1000000u; mutable_cf_options_.max_compaction_bytes = 800000u; ioptions_.level_compaction_dynamic_level_bytes = false; NewVersionStorage(6, kCompactionStyleLevel); // A compaction should be triggered and pick file 2 and 5. // It can expand because adding file 1 and 3, the compaction size will // exceed mutable_cf_options_.max_bytes_for_level_base. Add(1, 1U, "100", "150", 300000U); Add(1, 2U, "151", "200", 300001U, 0, 0); Add(1, 3U, "201", "250", 300000U, 0, 0); Add(1, 4U, "251", "300", 300000U, 0, 0); Add(2, 5U, "100", "256", 1U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->num_input_files(1)); ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, MaxCompactionBytesNotHit) { mutable_cf_options_.max_bytes_for_level_base = 800000u; mutable_cf_options_.max_compaction_bytes = 1000000u; ioptions_.level_compaction_dynamic_level_bytes = false; NewVersionStorage(6, kCompactionStyleLevel); // A compaction should be triggered and pick file 2 and 5. // and it expands to file 1 and 3 too. Add(1, 1U, "100", "150", 300000U); Add(1, 2U, "151", "200", 300001U, 0, 0); Add(1, 3U, "201", "250", 300000U, 0, 0); Add(1, 4U, "251", "300", 300000U, 0, 0); Add(2, 5U, "000", "251", 1U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(3U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->num_input_files(1)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); ASSERT_EQ(3U, compaction->input(0, 2)->fd.GetNumber()); ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber()); } TEST_F(CompactionPickerTest, IsTrivialMoveOn) { mutable_cf_options_.max_bytes_for_level_base = 10000u; mutable_cf_options_.max_compaction_bytes = 10001u; ioptions_.level_compaction_dynamic_level_bytes = false; NewVersionStorage(6, kCompactionStyleLevel); // A compaction should be triggered and pick file 2 Add(1, 1U, "100", "150", 3000U); Add(1, 2U, "151", "200", 3001U); Add(1, 3U, "201", "250", 3000U); Add(1, 4U, "251", "300", 3000U); Add(3, 5U, "120", "130", 7000U); Add(3, 6U, "170", "180", 7000U); Add(3, 7U, "220", "230", 7000U); Add(3, 8U, "270", "280", 7000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_TRUE(compaction->IsTrivialMove()); } TEST_F(CompactionPickerTest, IsTrivialMoveOff) { mutable_cf_options_.max_bytes_for_level_base = 1000000u; mutable_cf_options_.max_compaction_bytes = 10000u; ioptions_.level_compaction_dynamic_level_bytes = false; NewVersionStorage(6, kCompactionStyleLevel); // A compaction should be triggered and pick all files from level 1 Add(1, 1U, "100", "150", 300000U, 0, 0); Add(1, 2U, "150", "200", 300000U, 0, 0); Add(1, 3U, "200", "250", 300000U, 0, 0); Add(1, 4U, "250", "300", 300000U, 0, 0); Add(3, 5U, "120", "130", 6000U); Add(3, 6U, "140", "150", 6000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_FALSE(compaction->IsTrivialMove()); } TEST_F(CompactionPickerTest, CacheNextCompactionIndex) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.max_compaction_bytes = 100000000000u; Add(1 /* level */, 1U /* file_number */, "100" /* smallest */, "149" /* largest */, 1000000000U /* file_size */); file_map_[1U].first->being_compacted = true; Add(1 /* level */, 2U /* file_number */, "150" /* smallest */, "199" /* largest */, 900000000U /* file_size */); Add(1 /* level */, 3U /* file_number */, "200" /* smallest */, "249" /* largest */, 800000000U /* file_size */); Add(1 /* level */, 4U /* file_number */, "250" /* smallest */, "299" /* largest */, 700000000U /* file_size */); Add(2 /* level */, 5U /* file_number */, "150" /* smallest */, "199" /* largest */, 1U /* file_size */); file_map_[5U].first->being_compacted = true; UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(0U, compaction->num_input_files(1)); ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(2, vstorage_->NextCompactionIndex(1 /* level */)); compaction.reset(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(0U, compaction->num_input_files(1)); ASSERT_EQ(4U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(3, vstorage_->NextCompactionIndex(1 /* level */)); compaction.reset(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); ASSERT_EQ(4, vstorage_->NextCompactionIndex(1 /* level */)); } TEST_F(CompactionPickerTest, IntraL0MaxCompactionBytesNotHit) { // Intra L0 compaction triggers only if there are at least // level0_file_num_compaction_trigger + 2 L0 files. mutable_cf_options_.level0_file_num_compaction_trigger = 3; mutable_cf_options_.max_compaction_bytes = 1000000u; NewVersionStorage(6, kCompactionStyleLevel); // All 5 L0 files will be picked for intra L0 compaction. The one L1 file // spans entire L0 key range and is marked as being compacted to avoid // L0->L1 compaction. Add(0, 1U, "100", "150", 200000U, 0, 100, 101); Add(0, 2U, "151", "200", 200000U, 0, 102, 103); Add(0, 3U, "201", "250", 200000U, 0, 104, 105); Add(0, 4U, "251", "300", 200000U, 0, 106, 107); Add(0, 5U, "301", "350", 200000U, 0, 108, 109); Add(1, 6U, "100", "350", 200000U, 0, 110, 111); vstorage_->LevelFiles(1)[0]->being_compacted = true; UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(5U, compaction->num_input_files(0)); ASSERT_EQ(CompactionReason::kLevelL0FilesNum, compaction->compaction_reason()); ASSERT_EQ(0, compaction->output_level()); } TEST_F(CompactionPickerTest, IntraL0MaxCompactionBytesHit) { // Intra L0 compaction triggers only if there are at least // level0_file_num_compaction_trigger + 2 L0 files. mutable_cf_options_.level0_file_num_compaction_trigger = 3; mutable_cf_options_.max_compaction_bytes = 999999u; NewVersionStorage(6, kCompactionStyleLevel); // 4 out of 5 L0 files will be picked for intra L0 compaction due to // max_compaction_bytes limit (the minimum number of files for triggering // intra L0 compaction is 4). The one L1 file spans entire L0 key range and // is marked as being compacted to avoid L0->L1 compaction. Add(0, 1U, "100", "150", 200000U, 0, 100, 101); Add(0, 2U, "151", "200", 200000U, 0, 102, 103); Add(0, 3U, "201", "250", 200000U, 0, 104, 105); Add(0, 4U, "251", "300", 200000U, 0, 106, 107); Add(0, 5U, "301", "350", 200000U, 0, 108, 109); Add(1, 6U, "100", "350", 200000U, 0, 109, 110); vstorage_->LevelFiles(1)[0]->being_compacted = true; UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(4U, compaction->num_input_files(0)); ASSERT_EQ(CompactionReason::kLevelL0FilesNum, compaction->compaction_reason()); ASSERT_EQ(0, compaction->output_level()); } TEST_F(CompactionPickerTest, IntraL0ForEarliestSeqno) { // Intra L0 compaction triggers only if there are at least // level0_file_num_compaction_trigger + 2 L0 files. mutable_cf_options_.level0_file_num_compaction_trigger = 3; mutable_cf_options_.max_compaction_bytes = 999999u; NewVersionStorage(6, kCompactionStyleLevel); // 4 out of 6 L0 files will be picked for intra L0 compaction due to // being_compact limit. And the latest one L0 will be skipped due to earliest // seqno. The one L1 file spans entire L0 key range and is marked as being // compacted to avoid L0->L1 compaction. Add(1, 1U, "100", "350", 200000U, 0, 110, 111); Add(0, 2U, "301", "350", 1U, 0, 108, 109); Add(0, 3U, "251", "300", 1U, 0, 106, 107); Add(0, 4U, "201", "250", 1U, 0, 104, 105); Add(0, 5U, "151", "200", 1U, 0, 102, 103); Add(0, 6U, "100", "150", 1U, 0, 100, 101); Add(0, 7U, "100", "100", 1U, 0, 99, 100); vstorage_->LevelFiles(0)[5]->being_compacted = true; vstorage_->LevelFiles(1)[0]->being_compacted = true; UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_, 107)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(4U, compaction->num_input_files(0)); ASSERT_EQ(CompactionReason::kLevelL0FilesNum, compaction->compaction_reason()); ASSERT_EQ(0, compaction->output_level()); } #ifndef ROCKSDB_LITE TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) { const uint64_t kFileSize = 100000; ioptions_.compaction_style = kCompactionStyleUniversal; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); // This test covers the case where a "regular" universal compaction is // scheduled first, followed by a delete triggered compaction. The latter // should fail NewVersionStorage(5, kCompactionStyleUniversal); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450); Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300); Add(3, 5U, "010", "080", 8 * kFileSize, 0, 200, 251); Add(4, 3U, "301", "350", 8 * kFileSize, 0, 101, 150); Add(4, 6U, "501", "750", 8 * kFileSize, 0, 101, 150); UpdateVersionStorageInfo(); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction); // Validate that its a compaction to reduce sorted runs ASSERT_EQ(CompactionReason::kUniversalSortedRunNum, compaction->compaction_reason()); ASSERT_EQ(0, compaction->output_level()); ASSERT_EQ(0, compaction->start_level()); ASSERT_EQ(2U, compaction->num_input_files(0)); AddVersionStorage(); // Simulate a flush and mark the file for compaction Add(0, 7U, "150", "200", kFileSize, 0, 551, 600, 0, true); UpdateVersionStorageInfo(); std::unique_ptr compaction2( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_FALSE(compaction2); } TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap2) { const uint64_t kFileSize = 100000; ioptions_.compaction_style = kCompactionStyleUniversal; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); // This test covers the case where a delete triggered compaction is // scheduled first, followed by a "regular" compaction. The latter // should fail NewVersionStorage(5, kCompactionStyleUniversal); // Mark file number 4 for compaction Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300, 0, true); Add(3, 5U, "240", "290", 8 * kFileSize, 0, 201, 250); Add(4, 3U, "301", "350", 8 * kFileSize, 0, 101, 150); Add(4, 6U, "501", "750", 8 * kFileSize, 0, 101, 150); UpdateVersionStorageInfo(); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction); // Validate that its a delete triggered compaction ASSERT_EQ(CompactionReason::kFilesMarkedForCompaction, compaction->compaction_reason()); ASSERT_EQ(3, compaction->output_level()); ASSERT_EQ(0, compaction->start_level()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->num_input_files(1)); AddVersionStorage(); Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450); UpdateVersionStorageInfo(); std::unique_ptr compaction2( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_FALSE(compaction2); } TEST_F(CompactionPickerTest, UniversalMarkedCompactionStartOutputOverlap) { // The case where universal periodic compaction can be picked // with some newer files being compacted. const uint64_t kFileSize = 100000; ioptions_.compaction_style = kCompactionStyleUniversal; bool input_level_overlap = false; bool output_level_overlap = false; // Let's mark 2 files in 2 different levels for compaction. The // compaction picker will randomly pick one, so use the sync point to // ensure a deterministic order. Loop until both cases are covered size_t random_index = 0; SyncPoint::GetInstance()->SetCallBack( "CompactionPicker::PickFilesMarkedForCompaction", [&](void* arg) { size_t* index = static_cast(arg); *index = random_index; }); SyncPoint::GetInstance()->EnableProcessing(); while (!input_level_overlap || !output_level_overlap) { // Ensure that the L0 file gets picked first random_index = !input_level_overlap ? 0 : 1; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(5, kCompactionStyleUniversal); Add(0, 1U, "260", "300", 4 * kFileSize, 0, 260, 300, 0, true); Add(3, 2U, "010", "020", 2 * kFileSize, 0, 201, 248); Add(3, 3U, "250", "270", 2 * kFileSize, 0, 202, 249); Add(3, 4U, "290", "310", 2 * kFileSize, 0, 203, 250); Add(3, 5U, "310", "320", 2 * kFileSize, 0, 204, 251, 0, true); Add(4, 6U, "301", "350", 8 * kFileSize, 0, 101, 150); Add(4, 7U, "501", "750", 8 * kFileSize, 0, 101, 150); UpdateVersionStorageInfo(); std::unique_ptr compaction( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction); // Validate that its a delete triggered compaction ASSERT_EQ(CompactionReason::kFilesMarkedForCompaction, compaction->compaction_reason()); ASSERT_TRUE(compaction->start_level() == 0 || compaction->start_level() == 3); if (compaction->start_level() == 0) { // The L0 file was picked. The next compaction will detect an // overlap on its input level input_level_overlap = true; ASSERT_EQ(3, compaction->output_level()); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(3U, compaction->num_input_files(1)); } else { // The level 3 file was picked. The next compaction will pick // the L0 file and will detect overlap when adding output // level inputs output_level_overlap = true; ASSERT_EQ(4, compaction->output_level()); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->num_input_files(1)); } vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); // After recomputing the compaction score, only one marked file will remain random_index = 0; std::unique_ptr compaction2( universal_compaction_picker.PickCompaction( cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_FALSE(compaction2); DeleteVersionStorage(); } } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/compaction/compaction_picker_universal.cc000066400000000000000000001206041370372246700240610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/compaction/compaction_picker_universal.h" #ifndef ROCKSDB_LITE #include #include #include #include #include #include "db/column_family.h" #include "file/filename.h" #include "logging/log_buffer.h" #include "monitoring/statistics.h" #include "test_util/sync_point.h" #include "util/random.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { // A helper class that form universal compactions. The class is used by // UniversalCompactionPicker::PickCompaction(). // The usage is to create the class, and get the compaction object by calling // PickCompaction(). class UniversalCompactionBuilder { public: UniversalCompactionBuilder(const ImmutableCFOptions& ioptions, const InternalKeyComparator* icmp, const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, UniversalCompactionPicker* picker, LogBuffer* log_buffer) : ioptions_(ioptions), icmp_(icmp), cf_name_(cf_name), mutable_cf_options_(mutable_cf_options), vstorage_(vstorage), picker_(picker), log_buffer_(log_buffer) {} // Form and return the compaction object. The caller owns return object. Compaction* PickCompaction(); private: struct SortedRun { SortedRun(int _level, FileMetaData* _file, uint64_t _size, uint64_t _compensated_file_size, bool _being_compacted) : level(_level), file(_file), size(_size), compensated_file_size(_compensated_file_size), being_compacted(_being_compacted) { assert(compensated_file_size > 0); assert(level != 0 || file != nullptr); } void Dump(char* out_buf, size_t out_buf_size, bool print_path = false) const; // sorted_run_count is added into the string to print void DumpSizeInfo(char* out_buf, size_t out_buf_size, size_t sorted_run_count) const; int level; // `file` Will be null for level > 0. For level = 0, the sorted run is // for this file. FileMetaData* file; // For level > 0, `size` and `compensated_file_size` are sum of sizes all // files in the level. `being_compacted` should be the same for all files // in a non-zero level. Use the value here. uint64_t size; uint64_t compensated_file_size; bool being_compacted; }; // Pick Universal compaction to limit read amplification Compaction* PickCompactionToReduceSortedRuns( unsigned int ratio, unsigned int max_number_of_files_to_compact); // Pick Universal compaction to limit space amplification. Compaction* PickCompactionToReduceSizeAmp(); Compaction* PickDeleteTriggeredCompaction(); // Form a compaction from the sorted run indicated by start_index to the // oldest sorted run. // The caller is responsible for making sure that those files are not in // compaction. Compaction* PickCompactionToOldest(size_t start_index, CompactionReason compaction_reason); // Try to pick periodic compaction. The caller should only call it // if there is at least one file marked for periodic compaction. // null will be returned if no such a compaction can be formed // because some files are being compacted. Compaction* PickPeriodicCompaction(); // Used in universal compaction when the enabled_trivial_move // option is set. Checks whether there are any overlapping files // in the input. Returns true if the input files are non // overlapping. bool IsInputFilesNonOverlapping(Compaction* c); const ImmutableCFOptions& ioptions_; const InternalKeyComparator* icmp_; double score_; std::vector sorted_runs_; const std::string& cf_name_; const MutableCFOptions& mutable_cf_options_; VersionStorageInfo* vstorage_; UniversalCompactionPicker* picker_; LogBuffer* log_buffer_; static std::vector CalculateSortedRuns( const VersionStorageInfo& vstorage); // Pick a path ID to place a newly generated file, with its estimated file // size. static uint32_t GetPathId(const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, uint64_t file_size); }; // Used in universal compaction when trivial move is enabled. // This structure is used for the construction of min heap // that contains the file meta data, the level of the file // and the index of the file in that level struct InputFileInfo { InputFileInfo() : f(nullptr), level(0), index(0) {} FileMetaData* f; size_t level; size_t index; }; // Used in universal compaction when trivial move is enabled. // This comparator is used for the construction of min heap // based on the smallest key of the file. struct SmallestKeyHeapComparator { explicit SmallestKeyHeapComparator(const Comparator* ucmp) { ucmp_ = ucmp; } bool operator()(InputFileInfo i1, InputFileInfo i2) const { return (ucmp_->Compare(i1.f->smallest.user_key(), i2.f->smallest.user_key()) > 0); } private: const Comparator* ucmp_; }; typedef std::priority_queue, SmallestKeyHeapComparator> SmallestKeyHeap; // This function creates the heap that is used to find if the files are // overlapping during universal compaction when the allow_trivial_move // is set. SmallestKeyHeap create_level_heap(Compaction* c, const Comparator* ucmp) { SmallestKeyHeap smallest_key_priority_q = SmallestKeyHeap(SmallestKeyHeapComparator(ucmp)); InputFileInfo input_file; for (size_t l = 0; l < c->num_input_levels(); l++) { if (c->num_input_files(l) != 0) { if (l == 0 && c->start_level() == 0) { for (size_t i = 0; i < c->num_input_files(0); i++) { input_file.f = c->input(0, i); input_file.level = 0; input_file.index = i; smallest_key_priority_q.push(std::move(input_file)); } } else { input_file.f = c->input(l, 0); input_file.level = l; input_file.index = 0; smallest_key_priority_q.push(std::move(input_file)); } } } return smallest_key_priority_q; } #ifndef NDEBUG // smallest_seqno and largest_seqno are set iff. `files` is not empty. void GetSmallestLargestSeqno(const std::vector& files, SequenceNumber* smallest_seqno, SequenceNumber* largest_seqno) { bool is_first = true; for (FileMetaData* f : files) { assert(f->fd.smallest_seqno <= f->fd.largest_seqno); if (is_first) { is_first = false; *smallest_seqno = f->fd.smallest_seqno; *largest_seqno = f->fd.largest_seqno; } else { if (f->fd.smallest_seqno < *smallest_seqno) { *smallest_seqno = f->fd.smallest_seqno; } if (f->fd.largest_seqno > *largest_seqno) { *largest_seqno = f->fd.largest_seqno; } } } } #endif } // namespace // Algorithm that checks to see if there are any overlapping // files in the input bool UniversalCompactionBuilder::IsInputFilesNonOverlapping(Compaction* c) { auto comparator = icmp_->user_comparator(); int first_iter = 1; InputFileInfo prev, curr, next; SmallestKeyHeap smallest_key_priority_q = create_level_heap(c, icmp_->user_comparator()); while (!smallest_key_priority_q.empty()) { curr = smallest_key_priority_q.top(); smallest_key_priority_q.pop(); if (first_iter) { prev = curr; first_iter = 0; } else { if (comparator->Compare(prev.f->largest.user_key(), curr.f->smallest.user_key()) >= 0) { // found overlapping files, return false return false; } assert(comparator->Compare(curr.f->largest.user_key(), prev.f->largest.user_key()) > 0); prev = curr; } next.f = nullptr; if (c->level(curr.level) != 0 && curr.index < c->num_input_files(curr.level) - 1) { next.f = c->input(curr.level, curr.index + 1); next.level = curr.level; next.index = curr.index + 1; } if (next.f) { smallest_key_priority_q.push(std::move(next)); } } return true; } bool UniversalCompactionPicker::NeedsCompaction( const VersionStorageInfo* vstorage) const { const int kLevel0 = 0; if (vstorage->CompactionScore(kLevel0) >= 1) { return true; } if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) { return true; } if (!vstorage->FilesMarkedForCompaction().empty()) { return true; } return false; } Compaction* UniversalCompactionPicker::PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer, SequenceNumber /* earliest_memtable_seqno */) { UniversalCompactionBuilder builder(ioptions_, icmp_, cf_name, mutable_cf_options, vstorage, this, log_buffer); return builder.PickCompaction(); } void UniversalCompactionBuilder::SortedRun::Dump(char* out_buf, size_t out_buf_size, bool print_path) const { if (level == 0) { assert(file != nullptr); if (file->fd.GetPathId() == 0 || !print_path) { snprintf(out_buf, out_buf_size, "file %" PRIu64, file->fd.GetNumber()); } else { snprintf(out_buf, out_buf_size, "file %" PRIu64 "(path " "%" PRIu32 ")", file->fd.GetNumber(), file->fd.GetPathId()); } } else { snprintf(out_buf, out_buf_size, "level %d", level); } } void UniversalCompactionBuilder::SortedRun::DumpSizeInfo( char* out_buf, size_t out_buf_size, size_t sorted_run_count) const { if (level == 0) { assert(file != nullptr); snprintf(out_buf, out_buf_size, "file %" PRIu64 "[%" ROCKSDB_PRIszt "] " "with size %" PRIu64 " (compensated size %" PRIu64 ")", file->fd.GetNumber(), sorted_run_count, file->fd.GetFileSize(), file->compensated_file_size); } else { snprintf(out_buf, out_buf_size, "level %d[%" ROCKSDB_PRIszt "] " "with size %" PRIu64 " (compensated size %" PRIu64 ")", level, sorted_run_count, size, compensated_file_size); } } std::vector UniversalCompactionBuilder::CalculateSortedRuns( const VersionStorageInfo& vstorage) { std::vector ret; for (FileMetaData* f : vstorage.LevelFiles(0)) { ret.emplace_back(0, f, f->fd.GetFileSize(), f->compensated_file_size, f->being_compacted); } for (int level = 1; level < vstorage.num_levels(); level++) { uint64_t total_compensated_size = 0U; uint64_t total_size = 0U; bool being_compacted = false; for (FileMetaData* f : vstorage.LevelFiles(level)) { total_compensated_size += f->compensated_file_size; total_size += f->fd.GetFileSize(); // Size amp, read amp and periodic compactions always include all files // for a non-zero level. However, a delete triggered compaction and // a trivial move might pick a subset of files in a sorted run. So // always check all files in a sorted run and mark the entire run as // being compacted if one or more files are being compacted if (f->being_compacted) { being_compacted = f->being_compacted; } } if (total_compensated_size > 0) { ret.emplace_back(level, nullptr, total_size, total_compensated_size, being_compacted); } } return ret; } // Universal style of compaction. Pick files that are contiguous in // time-range to compact. Compaction* UniversalCompactionBuilder::PickCompaction() { const int kLevel0 = 0; score_ = vstorage_->CompactionScore(kLevel0); sorted_runs_ = CalculateSortedRuns(*vstorage_); if (sorted_runs_.size() == 0 || (vstorage_->FilesMarkedForPeriodicCompaction().empty() && vstorage_->FilesMarkedForCompaction().empty() && sorted_runs_.size() < (unsigned int)mutable_cf_options_ .level0_file_num_compaction_trigger)) { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: nothing to do\n", cf_name_.c_str()); TEST_SYNC_POINT_CALLBACK( "UniversalCompactionBuilder::PickCompaction:Return", nullptr); return nullptr; } VersionStorageInfo::LevelSummaryStorage tmp; ROCKS_LOG_BUFFER_MAX_SZ( log_buffer_, 3072, "[%s] Universal: sorted runs: %" ROCKSDB_PRIszt " files: %s\n", cf_name_.c_str(), sorted_runs_.size(), vstorage_->LevelSummary(&tmp)); Compaction* c = nullptr; // Periodic compaction has higher priority than other type of compaction // because it's a hard requirement. if (!vstorage_->FilesMarkedForPeriodicCompaction().empty()) { // Always need to do a full compaction for periodic compaction. c = PickPeriodicCompaction(); } // Check for size amplification. if (c == nullptr && sorted_runs_.size() >= static_cast( mutable_cf_options_.level0_file_num_compaction_trigger)) { if ((c = PickCompactionToReduceSizeAmp()) != nullptr) { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: compacting for size amp\n", cf_name_.c_str()); } else { // Size amplification is within limits. Try reducing read // amplification while maintaining file size ratios. unsigned int ratio = mutable_cf_options_.compaction_options_universal.size_ratio; if ((c = PickCompactionToReduceSortedRuns(ratio, UINT_MAX)) != nullptr) { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: compacting for size ratio\n", cf_name_.c_str()); } else { // Size amplification and file size ratios are within configured limits. // If max read amplification is exceeding configured limits, then force // compaction without looking at filesize ratios and try to reduce // the number of files to fewer than level0_file_num_compaction_trigger. // This is guaranteed by NeedsCompaction() assert(sorted_runs_.size() >= static_cast( mutable_cf_options_.level0_file_num_compaction_trigger)); // Get the total number of sorted runs that are not being compacted int num_sr_not_compacted = 0; for (size_t i = 0; i < sorted_runs_.size(); i++) { if (sorted_runs_[i].being_compacted == false) { num_sr_not_compacted++; } } // The number of sorted runs that are not being compacted is greater // than the maximum allowed number of sorted runs if (num_sr_not_compacted > mutable_cf_options_.level0_file_num_compaction_trigger) { unsigned int num_files = num_sr_not_compacted - mutable_cf_options_.level0_file_num_compaction_trigger + 1; if ((c = PickCompactionToReduceSortedRuns(UINT_MAX, num_files)) != nullptr) { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: compacting for file num -- %u\n", cf_name_.c_str(), num_files); } } } } } if (c == nullptr) { if ((c = PickDeleteTriggeredCompaction()) != nullptr) { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: delete triggered compaction\n", cf_name_.c_str()); } } if (c == nullptr) { TEST_SYNC_POINT_CALLBACK( "UniversalCompactionBuilder::PickCompaction:Return", nullptr); return nullptr; } if (mutable_cf_options_.compaction_options_universal.allow_trivial_move == true && c->compaction_reason() != CompactionReason::kPeriodicCompaction) { c->set_is_trivial_move(IsInputFilesNonOverlapping(c)); } // validate that all the chosen files of L0 are non overlapping in time #ifndef NDEBUG SequenceNumber prev_smallest_seqno = 0U; bool is_first = true; size_t level_index = 0U; if (c->start_level() == 0) { for (auto f : *c->inputs(0)) { assert(f->fd.smallest_seqno <= f->fd.largest_seqno); if (is_first) { is_first = false; } prev_smallest_seqno = f->fd.smallest_seqno; } level_index = 1U; } for (; level_index < c->num_input_levels(); level_index++) { if (c->num_input_files(level_index) != 0) { SequenceNumber smallest_seqno = 0U; SequenceNumber largest_seqno = 0U; GetSmallestLargestSeqno(*(c->inputs(level_index)), &smallest_seqno, &largest_seqno); if (is_first) { is_first = false; } else if (prev_smallest_seqno > 0) { // A level is considered as the bottommost level if there are // no files in higher levels or if files in higher levels do // not overlap with the files being compacted. Sequence numbers // of files in bottommost level can be set to 0 to help // compression. As a result, the following assert may not hold // if the prev_smallest_seqno is 0. assert(prev_smallest_seqno > largest_seqno); } prev_smallest_seqno = smallest_seqno; } } #endif // update statistics RecordInHistogram(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION, c->inputs(0)->size()); picker_->RegisterCompaction(c); vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); TEST_SYNC_POINT_CALLBACK("UniversalCompactionBuilder::PickCompaction:Return", c); return c; } uint32_t UniversalCompactionBuilder::GetPathId( const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, uint64_t file_size) { // Two conditions need to be satisfied: // (1) the target path needs to be able to hold the file's size // (2) Total size left in this and previous paths need to be not // smaller than expected future file size before this new file is // compacted, which is estimated based on size_ratio. // For example, if now we are compacting files of size (1, 1, 2, 4, 8), // we will make sure the target file, probably with size of 16, will be // placed in a path so that eventually when new files are generated and // compacted to (1, 1, 2, 4, 8, 16), all those files can be stored in or // before the path we chose. // // TODO(sdong): now the case of multiple column families is not // considered in this algorithm. So the target size can be violated in // that case. We need to improve it. uint64_t accumulated_size = 0; uint64_t future_size = file_size * (100 - mutable_cf_options.compaction_options_universal.size_ratio) / 100; uint32_t p = 0; assert(!ioptions.cf_paths.empty()); for (; p < ioptions.cf_paths.size() - 1; p++) { uint64_t target_size = ioptions.cf_paths[p].target_size; if (target_size > file_size && accumulated_size + (target_size - file_size) > future_size) { return p; } accumulated_size += target_size; } return p; } // // Consider compaction files based on their size differences with // the next file in time order. // Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns( unsigned int ratio, unsigned int max_number_of_files_to_compact) { unsigned int min_merge_width = mutable_cf_options_.compaction_options_universal.min_merge_width; unsigned int max_merge_width = mutable_cf_options_.compaction_options_universal.max_merge_width; const SortedRun* sr = nullptr; bool done = false; size_t start_index = 0; unsigned int candidate_count = 0; unsigned int max_files_to_compact = std::min(max_merge_width, max_number_of_files_to_compact); min_merge_width = std::max(min_merge_width, 2U); // Caller checks the size before executing this function. This invariant is // important because otherwise we may have a possible integer underflow when // dealing with unsigned types. assert(sorted_runs_.size() > 0); // Considers a candidate file only if it is smaller than the // total size accumulated so far. for (size_t loop = 0; loop < sorted_runs_.size(); loop++) { candidate_count = 0; // Skip files that are already being compacted for (sr = nullptr; loop < sorted_runs_.size(); loop++) { sr = &sorted_runs_[loop]; if (!sr->being_compacted) { candidate_count = 1; break; } char file_num_buf[kFormatFileNumberBufSize]; sr->Dump(file_num_buf, sizeof(file_num_buf)); ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: %s" "[%d] being compacted, skipping", cf_name_.c_str(), file_num_buf, loop); sr = nullptr; } // This file is not being compacted. Consider it as the // first candidate to be compacted. uint64_t candidate_size = sr != nullptr ? sr->compensated_file_size : 0; if (sr != nullptr) { char file_num_buf[kFormatFileNumberBufSize]; sr->Dump(file_num_buf, sizeof(file_num_buf), true); ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Possible candidate %s[%d].", cf_name_.c_str(), file_num_buf, loop); } // Check if the succeeding files need compaction. for (size_t i = loop + 1; candidate_count < max_files_to_compact && i < sorted_runs_.size(); i++) { const SortedRun* succeeding_sr = &sorted_runs_[i]; if (succeeding_sr->being_compacted) { break; } // Pick files if the total/last candidate file size (increased by the // specified ratio) is still larger than the next candidate file. // candidate_size is the total size of files picked so far with the // default kCompactionStopStyleTotalSize; with // kCompactionStopStyleSimilarSize, it's simply the size of the last // picked file. double sz = candidate_size * (100.0 + ratio) / 100.0; if (sz < static_cast(succeeding_sr->size)) { break; } if (mutable_cf_options_.compaction_options_universal.stop_style == kCompactionStopStyleSimilarSize) { // Similar-size stopping rule: also check the last picked file isn't // far larger than the next candidate file. sz = (succeeding_sr->size * (100.0 + ratio)) / 100.0; if (sz < static_cast(candidate_size)) { // If the small file we've encountered begins a run of similar-size // files, we'll pick them up on a future iteration of the outer // loop. If it's some lonely straggler, it'll eventually get picked // by the last-resort read amp strategy which disregards size ratios. break; } candidate_size = succeeding_sr->compensated_file_size; } else { // default kCompactionStopStyleTotalSize candidate_size += succeeding_sr->compensated_file_size; } candidate_count++; } // Found a series of consecutive files that need compaction. if (candidate_count >= (unsigned int)min_merge_width) { start_index = loop; done = true; break; } else { for (size_t i = loop; i < loop + candidate_count && i < sorted_runs_.size(); i++) { const SortedRun* skipping_sr = &sorted_runs_[i]; char file_num_buf[256]; skipping_sr->DumpSizeInfo(file_num_buf, sizeof(file_num_buf), loop); ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Skipping %s", cf_name_.c_str(), file_num_buf); } } } if (!done || candidate_count <= 1) { return nullptr; } size_t first_index_after = start_index + candidate_count; // Compression is enabled if files compacted earlier already reached // size ratio of compression. bool enable_compression = true; int ratio_to_compress = mutable_cf_options_.compaction_options_universal.compression_size_percent; if (ratio_to_compress >= 0) { uint64_t total_size = 0; for (auto& sorted_run : sorted_runs_) { total_size += sorted_run.compensated_file_size; } uint64_t older_file_size = 0; for (size_t i = sorted_runs_.size() - 1; i >= first_index_after; i--) { older_file_size += sorted_runs_[i].size; if (older_file_size * 100L >= total_size * (long)ratio_to_compress) { enable_compression = false; break; } } } uint64_t estimated_total_size = 0; for (unsigned int i = 0; i < first_index_after; i++) { estimated_total_size += sorted_runs_[i].size; } uint32_t path_id = GetPathId(ioptions_, mutable_cf_options_, estimated_total_size); int start_level = sorted_runs_[start_index].level; int output_level; if (first_index_after == sorted_runs_.size()) { output_level = vstorage_->num_levels() - 1; } else if (sorted_runs_[first_index_after].level == 0) { output_level = 0; } else { output_level = sorted_runs_[first_index_after].level - 1; } // last level is reserved for the files ingested behind if (ioptions_.allow_ingest_behind && (output_level == vstorage_->num_levels() - 1)) { assert(output_level > 1); output_level--; } std::vector inputs(vstorage_->num_levels()); for (size_t i = 0; i < inputs.size(); ++i) { inputs[i].level = start_level + static_cast(i); } for (size_t i = start_index; i < first_index_after; i++) { auto& picking_sr = sorted_runs_[i]; if (picking_sr.level == 0) { FileMetaData* picking_file = picking_sr.file; inputs[0].files.push_back(picking_file); } else { auto& files = inputs[picking_sr.level - start_level].files; for (auto* f : vstorage_->LevelFiles(picking_sr.level)) { files.push_back(f); } } char file_num_buf[256]; picking_sr.DumpSizeInfo(file_num_buf, sizeof(file_num_buf), i); ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Picking %s", cf_name_.c_str(), file_num_buf); } CompactionReason compaction_reason; if (max_number_of_files_to_compact == UINT_MAX) { compaction_reason = CompactionReason::kUniversalSizeRatio; } else { compaction_reason = CompactionReason::kUniversalSortedRunNum; } return new Compaction( vstorage_, ioptions_, mutable_cf_options_, std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options_, output_level, kCompactionStyleUniversal), LLONG_MAX, path_id, GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, start_level, 1, enable_compression), GetCompressionOptions(mutable_cf_options_, vstorage_, start_level, enable_compression), /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, score_, false /* deletion_compaction */, compaction_reason); } // Look at overall size amplification. If size amplification // exceeeds the configured value, then do a compaction // of the candidate files all the way upto the earliest // base file (overrides configured values of file-size ratios, // min_merge_width and max_merge_width). // Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() { // percentage flexibility while reducing size amplification uint64_t ratio = mutable_cf_options_.compaction_options_universal .max_size_amplification_percent; unsigned int candidate_count = 0; uint64_t candidate_size = 0; size_t start_index = 0; const SortedRun* sr = nullptr; assert(!sorted_runs_.empty()); if (sorted_runs_.back().being_compacted) { return nullptr; } // Skip files that are already being compacted for (size_t loop = 0; loop + 1 < sorted_runs_.size(); loop++) { sr = &sorted_runs_[loop]; if (!sr->being_compacted) { start_index = loop; // Consider this as the first candidate. break; } char file_num_buf[kFormatFileNumberBufSize]; sr->Dump(file_num_buf, sizeof(file_num_buf), true); ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: skipping %s[%d] compacted %s", cf_name_.c_str(), file_num_buf, loop, " cannot be a candidate to reduce size amp.\n"); sr = nullptr; } if (sr == nullptr) { return nullptr; // no candidate files } { char file_num_buf[kFormatFileNumberBufSize]; sr->Dump(file_num_buf, sizeof(file_num_buf), true); ROCKS_LOG_BUFFER( log_buffer_, "[%s] Universal: First candidate %s[%" ROCKSDB_PRIszt "] %s", cf_name_.c_str(), file_num_buf, start_index, " to reduce size amp.\n"); } // keep adding up all the remaining files for (size_t loop = start_index; loop + 1 < sorted_runs_.size(); loop++) { sr = &sorted_runs_[loop]; if (sr->being_compacted) { char file_num_buf[kFormatFileNumberBufSize]; sr->Dump(file_num_buf, sizeof(file_num_buf), true); ROCKS_LOG_BUFFER( log_buffer_, "[%s] Universal: Possible candidate %s[%d] %s", cf_name_.c_str(), file_num_buf, start_index, " is already being compacted. No size amp reduction possible.\n"); return nullptr; } candidate_size += sr->compensated_file_size; candidate_count++; } if (candidate_count == 0) { return nullptr; } // size of earliest file uint64_t earliest_file_size = sorted_runs_.back().size; // size amplification = percentage of additional size if (candidate_size * 100 < ratio * earliest_file_size) { ROCKS_LOG_BUFFER( log_buffer_, "[%s] Universal: size amp not needed. newer-files-total-size %" PRIu64 " earliest-file-size %" PRIu64, cf_name_.c_str(), candidate_size, earliest_file_size); return nullptr; } else { ROCKS_LOG_BUFFER( log_buffer_, "[%s] Universal: size amp needed. newer-files-total-size %" PRIu64 " earliest-file-size %" PRIu64, cf_name_.c_str(), candidate_size, earliest_file_size); } return PickCompactionToOldest(start_index, CompactionReason::kUniversalSizeAmplification); } // Pick files marked for compaction. Typically, files are marked by // CompactOnDeleteCollector due to the presence of tombstones. Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() { CompactionInputFiles start_level_inputs; int output_level; std::vector inputs; if (vstorage_->num_levels() == 1) { #if defined(ENABLE_SINGLE_LEVEL_DTC) // This is single level universal. Since we're basically trying to reclaim // space by processing files marked for compaction due to high tombstone // density, let's do the same thing as compaction to reduce size amp which // has the same goals. bool compact = false; start_level_inputs.level = 0; start_level_inputs.files.clear(); output_level = 0; for (FileMetaData* f : vstorage_->LevelFiles(0)) { if (f->marked_for_compaction) { compact = true; } if (compact) { start_level_inputs.files.push_back(f); } } if (start_level_inputs.size() <= 1) { // If only the last file in L0 is marked for compaction, ignore it return nullptr; } inputs.push_back(start_level_inputs); #else // Disable due to a known race condition. // TODO: Reenable once the race condition is fixed return nullptr; #endif // ENABLE_SINGLE_LEVEL_DTC } else { int start_level; // For multi-level universal, the strategy is to make this look more like // leveled. We pick one of the files marked for compaction and compact with // overlapping files in the adjacent level. picker_->PickFilesMarkedForCompaction(cf_name_, vstorage_, &start_level, &output_level, &start_level_inputs); if (start_level_inputs.empty()) { return nullptr; } // Pick the first non-empty level after the start_level for (output_level = start_level + 1; output_level < vstorage_->num_levels(); output_level++) { if (vstorage_->NumLevelFiles(output_level) != 0) { break; } } // If all higher levels are empty, pick the highest level as output level if (output_level == vstorage_->num_levels()) { if (start_level == 0) { output_level = vstorage_->num_levels() - 1; } else { // If start level is non-zero and all higher levels are empty, this // compaction will translate into a trivial move. Since the idea is // to reclaim space and trivial move doesn't help with that, we // skip compaction in this case and return nullptr return nullptr; } } if (ioptions_.allow_ingest_behind && output_level == vstorage_->num_levels() - 1) { assert(output_level > 1); output_level--; } if (output_level != 0) { if (start_level == 0) { if (!picker_->GetOverlappingL0Files(vstorage_, &start_level_inputs, output_level, nullptr)) { return nullptr; } } CompactionInputFiles output_level_inputs; int parent_index = -1; output_level_inputs.level = output_level; if (!picker_->SetupOtherInputs(cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs, &output_level_inputs, &parent_index, -1)) { return nullptr; } inputs.push_back(start_level_inputs); if (!output_level_inputs.empty()) { inputs.push_back(output_level_inputs); } if (picker_->FilesRangeOverlapWithCompaction(inputs, output_level)) { return nullptr; } } else { inputs.push_back(start_level_inputs); } } uint64_t estimated_total_size = 0; // Use size of the output level as estimated file size for (FileMetaData* f : vstorage_->LevelFiles(output_level)) { estimated_total_size += f->fd.GetFileSize(); } uint32_t path_id = GetPathId(ioptions_, mutable_cf_options_, estimated_total_size); return new Compaction( vstorage_, ioptions_, mutable_cf_options_, std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options_, output_level, kCompactionStyleUniversal), /* max_grandparent_overlap_bytes */ LLONG_MAX, path_id, GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, output_level, 1), GetCompressionOptions(mutable_cf_options_, vstorage_, output_level), /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, score_, false /* deletion_compaction */, CompactionReason::kFilesMarkedForCompaction); } Compaction* UniversalCompactionBuilder::PickCompactionToOldest( size_t start_index, CompactionReason compaction_reason) { assert(start_index < sorted_runs_.size()); // Estimate total file size uint64_t estimated_total_size = 0; for (size_t loop = start_index; loop < sorted_runs_.size(); loop++) { estimated_total_size += sorted_runs_[loop].size; } uint32_t path_id = GetPathId(ioptions_, mutable_cf_options_, estimated_total_size); int start_level = sorted_runs_[start_index].level; std::vector inputs(vstorage_->num_levels()); for (size_t i = 0; i < inputs.size(); ++i) { inputs[i].level = start_level + static_cast(i); } for (size_t loop = start_index; loop < sorted_runs_.size(); loop++) { auto& picking_sr = sorted_runs_[loop]; if (picking_sr.level == 0) { FileMetaData* f = picking_sr.file; inputs[0].files.push_back(f); } else { auto& files = inputs[picking_sr.level - start_level].files; for (auto* f : vstorage_->LevelFiles(picking_sr.level)) { files.push_back(f); } } std::string comp_reason_print_string; if (compaction_reason == CompactionReason::kPeriodicCompaction) { comp_reason_print_string = "periodic compaction"; } else if (compaction_reason == CompactionReason::kUniversalSizeAmplification) { comp_reason_print_string = "size amp"; } else { assert(false); } char file_num_buf[256]; picking_sr.DumpSizeInfo(file_num_buf, sizeof(file_num_buf), loop); ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: %s picking %s", cf_name_.c_str(), comp_reason_print_string.c_str(), file_num_buf); } // output files at the bottom most level, unless it's reserved int output_level = vstorage_->num_levels() - 1; // last level is reserved for the files ingested behind if (ioptions_.allow_ingest_behind) { assert(output_level > 1); output_level--; } // We never check size for // compaction_options_universal.compression_size_percent, // because we always compact all the files, so always compress. return new Compaction( vstorage_, ioptions_, mutable_cf_options_, std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options_, output_level, kCompactionStyleUniversal), LLONG_MAX, path_id, GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, start_level, 1, true /* enable_compression */), GetCompressionOptions(mutable_cf_options_, vstorage_, start_level, true /* enable_compression */), /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, score_, false /* deletion_compaction */, compaction_reason); } Compaction* UniversalCompactionBuilder::PickPeriodicCompaction() { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Periodic Compaction", cf_name_.c_str()); // In universal compaction, sorted runs contain older data are almost always // generated earlier too. To simplify the problem, we just try to trigger // a full compaction. We start from the oldest sorted run and include // all sorted runs, until we hit a sorted already being compacted. // Since usually the largest (which is usually the oldest) sorted run is // included anyway, doing a full compaction won't increase write // amplification much. // Get some information from marked files to check whether a file is // included in the compaction. size_t start_index = sorted_runs_.size(); while (start_index > 0 && !sorted_runs_[start_index - 1].being_compacted) { start_index--; } if (start_index == sorted_runs_.size()) { return nullptr; } // There is a rare corner case where we can't pick up all the files // because some files are being compacted and we end up with picking files // but none of them need periodic compaction. Unless we simply recompact // the last sorted run (either the last level or last L0 file), we would just // execute the compaction, in order to simplify the logic. if (start_index == sorted_runs_.size() - 1) { bool included_file_marked = false; int start_level = sorted_runs_[start_index].level; FileMetaData* start_file = sorted_runs_[start_index].file; for (const std::pair& level_file_pair : vstorage_->FilesMarkedForPeriodicCompaction()) { if (start_level != 0) { // Last sorted run is a level if (start_level == level_file_pair.first) { included_file_marked = true; break; } } else { // Last sorted run is a L0 file. if (start_file == level_file_pair.second) { included_file_marked = true; break; } } } if (!included_file_marked) { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Universal: Cannot form a compaction covering file " "marked for periodic compaction", cf_name_.c_str()); return nullptr; } } Compaction* c = PickCompactionToOldest(start_index, CompactionReason::kPeriodicCompaction); TEST_SYNC_POINT_CALLBACK( "UniversalCompactionPicker::PickPeriodicCompaction:Return", c); return c; } } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/compaction/compaction_picker_universal.h000066400000000000000000000024431370372246700237230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include "db/compaction/compaction_picker.h" namespace ROCKSDB_NAMESPACE { class UniversalCompactionPicker : public CompactionPicker { public: UniversalCompactionPicker(const ImmutableCFOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, VersionStorageInfo* vstorage, LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) override; virtual int MaxOutputLevel() const override { return NumberLevels() - 1; } virtual bool NeedsCompaction( const VersionStorageInfo* vstorage) const override; }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/comparator_db_test.cc000066400000000000000000000450521370372246700200220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include "memtable/stl_wrappers.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/hash.h" #include "util/kv_map.h" #include "util/string_util.h" #include "utilities/merge_operators.h" using std::unique_ptr; namespace ROCKSDB_NAMESPACE { namespace { static const Comparator* kTestComparator = nullptr; class KVIter : public Iterator { public: explicit KVIter(const stl_wrappers::KVMap* map) : map_(map), iter_(map_->end()) {} bool Valid() const override { return iter_ != map_->end(); } void SeekToFirst() override { iter_ = map_->begin(); } void SeekToLast() override { if (map_->empty()) { iter_ = map_->end(); } else { iter_ = map_->find(map_->rbegin()->first); } } void Seek(const Slice& k) override { iter_ = map_->lower_bound(k.ToString()); } void SeekForPrev(const Slice& k) override { iter_ = map_->upper_bound(k.ToString()); Prev(); } void Next() override { ++iter_; } void Prev() override { if (iter_ == map_->begin()) { iter_ = map_->end(); return; } --iter_; } Slice key() const override { return iter_->first; } Slice value() const override { return iter_->second; } Status status() const override { return Status::OK(); } private: const stl_wrappers::KVMap* const map_; stl_wrappers::KVMap::const_iterator iter_; }; void AssertItersEqual(Iterator* iter1, Iterator* iter2) { ASSERT_EQ(iter1->Valid(), iter2->Valid()); if (iter1->Valid()) { ASSERT_EQ(iter1->key().ToString(), iter2->key().ToString()); ASSERT_EQ(iter1->value().ToString(), iter2->value().ToString()); } } // Measuring operations on DB (expect to be empty). // source_strings are candidate keys void DoRandomIteraratorTest(DB* db, std::vector source_strings, Random* rnd, int num_writes, int num_iter_ops, int num_trigger_flush) { stl_wrappers::KVMap map((stl_wrappers::LessOfComparator(kTestComparator))); for (int i = 0; i < num_writes; i++) { if (num_trigger_flush > 0 && i != 0 && i % num_trigger_flush == 0) { db->Flush(FlushOptions()); } int type = rnd->Uniform(2); int index = rnd->Uniform(static_cast(source_strings.size())); auto& key = source_strings[index]; switch (type) { case 0: // put map[key] = key; ASSERT_OK(db->Put(WriteOptions(), key, key)); break; case 1: // delete if (map.find(key) != map.end()) { map.erase(key); } ASSERT_OK(db->Delete(WriteOptions(), key)); break; default: assert(false); } } std::unique_ptr iter(db->NewIterator(ReadOptions())); std::unique_ptr result_iter(new KVIter(&map)); bool is_valid = false; for (int i = 0; i < num_iter_ops; i++) { // Random walk and make sure iter and result_iter returns the // same key and value int type = rnd->Uniform(6); ASSERT_OK(iter->status()); switch (type) { case 0: // Seek to First iter->SeekToFirst(); result_iter->SeekToFirst(); break; case 1: // Seek to last iter->SeekToLast(); result_iter->SeekToLast(); break; case 2: { // Seek to random key auto key_idx = rnd->Uniform(static_cast(source_strings.size())); auto key = source_strings[key_idx]; iter->Seek(key); result_iter->Seek(key); break; } case 3: // Next if (is_valid) { iter->Next(); result_iter->Next(); } else { continue; } break; case 4: // Prev if (is_valid) { iter->Prev(); result_iter->Prev(); } else { continue; } break; default: { assert(type == 5); auto key_idx = rnd->Uniform(static_cast(source_strings.size())); auto key = source_strings[key_idx]; std::string result; auto status = db->Get(ReadOptions(), key, &result); if (map.find(key) == map.end()) { ASSERT_TRUE(status.IsNotFound()); } else { ASSERT_EQ(map[key], result); } break; } } AssertItersEqual(iter.get(), result_iter.get()); is_valid = iter->Valid(); } } class DoubleComparator : public Comparator { public: DoubleComparator() {} const char* Name() const override { return "DoubleComparator"; } int Compare(const Slice& a, const Slice& b) const override { #ifndef CYGWIN double da = std::stod(a.ToString()); double db = std::stod(b.ToString()); #else double da = std::strtod(a.ToString().c_str(), 0 /* endptr */); double db = std::strtod(a.ToString().c_str(), 0 /* endptr */); #endif if (da == db) { return a.compare(b); } else if (da > db) { return 1; } else { return -1; } } void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const override {} void FindShortSuccessor(std::string* /*key*/) const override {} }; class HashComparator : public Comparator { public: HashComparator() {} const char* Name() const override { return "HashComparator"; } int Compare(const Slice& a, const Slice& b) const override { uint32_t ha = Hash(a.data(), a.size(), 66); uint32_t hb = Hash(b.data(), b.size(), 66); if (ha == hb) { return a.compare(b); } else if (ha > hb) { return 1; } else { return -1; } } void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const override {} void FindShortSuccessor(std::string* /*key*/) const override {} }; class TwoStrComparator : public Comparator { public: TwoStrComparator() {} const char* Name() const override { return "TwoStrComparator"; } int Compare(const Slice& a, const Slice& b) const override { assert(a.size() >= 2); assert(b.size() >= 2); size_t size_a1 = static_cast(a[0]); size_t size_b1 = static_cast(b[0]); size_t size_a2 = static_cast(a[1]); size_t size_b2 = static_cast(b[1]); assert(size_a1 + size_a2 + 2 == a.size()); assert(size_b1 + size_b2 + 2 == b.size()); Slice a1 = Slice(a.data() + 2, size_a1); Slice b1 = Slice(b.data() + 2, size_b1); Slice a2 = Slice(a.data() + 2 + size_a1, size_a2); Slice b2 = Slice(b.data() + 2 + size_b1, size_b2); if (a1 != b1) { return a1.compare(b1); } return a2.compare(b2); } void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const override {} void FindShortSuccessor(std::string* /*key*/) const override {} }; } // namespace class ComparatorDBTest : public testing::Test, virtual public ::testing::WithParamInterface { private: std::string dbname_; Env* env_; DB* db_; Options last_options_; std::unique_ptr comparator_guard; public: ComparatorDBTest() : env_(Env::Default()), db_(nullptr) { kTestComparator = BytewiseComparator(); dbname_ = test::PerThreadDBPath("comparator_db_test"); BlockBasedTableOptions toptions; toptions.format_version = GetParam(); last_options_.table_factory.reset( ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(toptions)); EXPECT_OK(DestroyDB(dbname_, last_options_)); } ~ComparatorDBTest() override { delete db_; EXPECT_OK(DestroyDB(dbname_, last_options_)); kTestComparator = BytewiseComparator(); } DB* GetDB() { return db_; } void SetOwnedComparator(const Comparator* cmp, bool owner = true) { if (owner) { comparator_guard.reset(cmp); } else { comparator_guard.reset(); } kTestComparator = cmp; last_options_.comparator = cmp; } // Return the current option configuration. Options* GetOptions() { return &last_options_; } void DestroyAndReopen() { // Destroy using last options Destroy(); ASSERT_OK(TryReopen()); } void Destroy() { delete db_; db_ = nullptr; ASSERT_OK(DestroyDB(dbname_, last_options_)); } Status TryReopen() { delete db_; db_ = nullptr; last_options_.create_if_missing = true; return DB::Open(last_options_, dbname_, &db_); } }; INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest, testing::Values(test::kLatestFormatVersion)); TEST_P(ComparatorDBTest, Bytewise) { for (int rand_seed = 301; rand_seed < 306; rand_seed++) { DestroyAndReopen(); Random rnd(rand_seed); DoRandomIteraratorTest(GetDB(), {"a", "b", "c", "d", "e", "f", "g", "h", "i"}, &rnd, 8, 100, 3); } } TEST_P(ComparatorDBTest, SimpleSuffixReverseComparator) { SetOwnedComparator(new test::SimpleSuffixReverseComparator()); for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { Options* opt = GetOptions(); opt->comparator = kTestComparator; DestroyAndReopen(); Random rnd(rnd_seed); std::vector source_strings; std::vector source_prefixes; // Randomly generate 5 prefixes for (int i = 0; i < 5; i++) { source_prefixes.push_back(test::RandomHumanReadableString(&rnd, 8)); } for (int j = 0; j < 20; j++) { int prefix_index = rnd.Uniform(static_cast(source_prefixes.size())); std::string key = source_prefixes[prefix_index] + test::RandomHumanReadableString(&rnd, rnd.Uniform(8)); source_strings.push_back(key); } DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 30, 600, 66); } } TEST_P(ComparatorDBTest, Uint64Comparator) { SetOwnedComparator(test::Uint64Comparator(), false /* owner */); for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { Options* opt = GetOptions(); opt->comparator = kTestComparator; DestroyAndReopen(); Random rnd(rnd_seed); Random64 rnd64(rnd_seed); std::vector source_strings; // Randomly generate source keys for (int i = 0; i < 100; i++) { uint64_t r = rnd64.Next(); std::string str; str.resize(8); memcpy(&str[0], static_cast(&r), 8); source_strings.push_back(str); } DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); } } TEST_P(ComparatorDBTest, DoubleComparator) { SetOwnedComparator(new DoubleComparator()); for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { Options* opt = GetOptions(); opt->comparator = kTestComparator; DestroyAndReopen(); Random rnd(rnd_seed); std::vector source_strings; // Randomly generate source keys for (int i = 0; i < 100; i++) { uint32_t r = rnd.Next(); uint32_t divide_order = rnd.Uniform(8); double to_divide = 1.0; for (uint32_t j = 0; j < divide_order; j++) { to_divide *= 10.0; } source_strings.push_back(ToString(r / to_divide)); } DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); } } TEST_P(ComparatorDBTest, HashComparator) { SetOwnedComparator(new HashComparator()); for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { Options* opt = GetOptions(); opt->comparator = kTestComparator; DestroyAndReopen(); Random rnd(rnd_seed); std::vector source_strings; // Randomly generate source keys for (int i = 0; i < 100; i++) { source_strings.push_back(test::RandomKey(&rnd, 8)); } DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); } } TEST_P(ComparatorDBTest, TwoStrComparator) { SetOwnedComparator(new TwoStrComparator()); for (int rnd_seed = 301; rnd_seed < 316; rnd_seed++) { Options* opt = GetOptions(); opt->comparator = kTestComparator; DestroyAndReopen(); Random rnd(rnd_seed); std::vector source_strings; // Randomly generate source keys for (int i = 0; i < 100; i++) { std::string str; uint32_t size1 = rnd.Uniform(8); uint32_t size2 = rnd.Uniform(8); str.append(1, static_cast(size1)); str.append(1, static_cast(size2)); str.append(test::RandomKey(&rnd, size1)); str.append(test::RandomKey(&rnd, size2)); source_strings.push_back(str); } DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66); } } TEST_P(ComparatorDBTest, IsSameLengthImmediateSuccessor) { { // different length Slice s("abcxy"); Slice t("abcxyz"); ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { Slice s("abcxyz"); Slice t("abcxy"); ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { // not last byte different Slice s("abc1xyz"); Slice t("abc2xyz"); ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { // same string Slice s("abcxyz"); Slice t("abcxyz"); ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { Slice s("abcxy"); Slice t("abcxz"); ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { Slice s("abcxz"); Slice t("abcxy"); ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { const char s_array[] = "\x50\x8a\xac"; const char t_array[] = "\x50\x8a\xad"; Slice s(s_array); Slice t(t_array); ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { const char s_array[] = "\x50\x8a\xff"; const char t_array[] = "\x50\x8b\x00"; Slice s(s_array, 3); Slice t(t_array, 3); ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { const char s_array[] = "\x50\x8a\xff\xff"; const char t_array[] = "\x50\x8b\x00\x00"; Slice s(s_array, 4); Slice t(t_array, 4); ASSERT_TRUE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } { const char s_array[] = "\x50\x8a\xff\xff"; const char t_array[] = "\x50\x8b\x00\x01"; Slice s(s_array, 4); Slice t(t_array, 4); ASSERT_FALSE(BytewiseComparator()->IsSameLengthImmediateSuccessor(s, t)); } } TEST_P(ComparatorDBTest, FindShortestSeparator) { std::string s1 = "abc1xyz"; std::string s2 = "abc3xy"; BytewiseComparator()->FindShortestSeparator(&s1, s2); ASSERT_EQ("abc2", s1); s1 = "abc5xyztt"; ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); ASSERT_EQ("abc5", s1); s1 = "abc3"; s2 = "abc2xy"; ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); ASSERT_EQ("abc3", s1); s1 = "abc3xyz"; s2 = "abc2xy"; ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); ASSERT_EQ("abc3", s1); s1 = "abc3xyz"; s2 = "abc2"; ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); ASSERT_EQ("abc3", s1); std::string old_s1 = s1 = "abc2xy"; s2 = "abc2"; ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2); ASSERT_TRUE(old_s1 >= s1); ASSERT_TRUE(s1 > s2); } TEST_P(ComparatorDBTest, SeparatorSuccessorRandomizeTest) { // Char list for boundary cases. std::array char_list{{0, 1, 2, 253, 254, 255}}; Random rnd(301); for (int attempts = 0; attempts < 1000; attempts++) { uint32_t size1 = rnd.Skewed(4); uint32_t size2; if (rnd.OneIn(2)) { // size2 to be random size size2 = rnd.Skewed(4); } else { // size1 is within [-2, +2] of size1 int diff = static_cast(rnd.Uniform(5)) - 2; int tmp_size2 = static_cast(size1) + diff; if (tmp_size2 < 0) { tmp_size2 = 0; } size2 = static_cast(tmp_size2); } std::string s1; std::string s2; for (uint32_t i = 0; i < size1; i++) { if (rnd.OneIn(2)) { // Use random byte s1 += static_cast(rnd.Uniform(256)); } else { // Use one byte in char_list char c = static_cast(char_list[rnd.Uniform(sizeof(char_list))]); s1 += c; } } // First set s2 to be the same as s1, and then modify s2. s2 = s1; s2.resize(size2); // We start from the back of the string if (size2 > 0) { uint32_t pos = size2 - 1; do { if (pos >= size1 || rnd.OneIn(4)) { // For 1/4 chance, use random byte s2[pos] = static_cast(rnd.Uniform(256)); } else if (rnd.OneIn(4)) { // In 1/4 chance, stop here. break; } else { // Create a char within [-2, +2] of the matching char of s1. int diff = static_cast(rnd.Uniform(5)) - 2; // char may be signed or unsigned based on platform. int s1_char = static_cast(static_cast(s1[pos])); int s2_char = s1_char + diff; if (s2_char < 0) { s2_char = 0; } if (s2_char > 255) { s2_char = 255; } s2[pos] = static_cast(s2_char); } } while (pos-- != 0); } // Test separators for (int rev = 0; rev < 2; rev++) { if (rev == 1) { // switch s1 and s2 std::string t = s1; s1 = s2; s2 = t; } std::string separator = s1; BytewiseComparator()->FindShortestSeparator(&separator, s2); std::string rev_separator = s1; ReverseBytewiseComparator()->FindShortestSeparator(&rev_separator, s2); if (s1 == s2) { ASSERT_EQ(s1, separator); ASSERT_EQ(s2, rev_separator); } else if (s1 < s2) { ASSERT_TRUE(s1 <= separator); ASSERT_TRUE(s2 > separator); ASSERT_LE(separator.size(), std::max(s1.size(), s2.size())); ASSERT_EQ(s1, rev_separator); } else { ASSERT_TRUE(s1 >= rev_separator); ASSERT_TRUE(s2 < rev_separator); ASSERT_LE(rev_separator.size(), std::max(s1.size(), s2.size())); ASSERT_EQ(s1, separator); } } // Test successors std::string succ = s1; BytewiseComparator()->FindShortSuccessor(&succ); ASSERT_TRUE(succ >= s1); succ = s1; ReverseBytewiseComparator()->FindShortSuccessor(&succ); ASSERT_TRUE(succ <= s1); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/convenience.cc000066400000000000000000000056461370372246700164500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include "rocksdb/convenience.h" #include "db/db_impl/db_impl.h" #include "util/cast_util.h" namespace ROCKSDB_NAMESPACE { void CancelAllBackgroundWork(DB* db, bool wait) { (static_cast_with_check(db->GetRootDB())) ->CancelAllBackgroundWork(wait); } Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end, bool include_end) { RangePtr range(begin, end); return DeleteFilesInRanges(db, column_family, &range, 1, include_end); } Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family, const RangePtr* ranges, size_t n, bool include_end) { return (static_cast_with_check(db->GetRootDB())) ->DeleteFilesInRanges(column_family, ranges, n, include_end); } Status VerifySstFileChecksum(const Options& options, const EnvOptions& env_options, const std::string& file_path) { return VerifySstFileChecksum(options, env_options, ReadOptions(), file_path); } Status VerifySstFileChecksum(const Options& options, const EnvOptions& env_options, const ReadOptions& read_options, const std::string& file_path) { std::unique_ptr file; uint64_t file_size; InternalKeyComparator internal_comparator(options.comparator); ImmutableCFOptions ioptions(options); Status s = ioptions.fs->NewRandomAccessFile(file_path, FileOptions(env_options), &file, nullptr); if (s.ok()) { s = ioptions.fs->GetFileSize(file_path, IOOptions(), &file_size, nullptr); } else { return s; } std::unique_ptr table_reader; std::unique_ptr file_reader( new RandomAccessFileReader(std::move(file), file_path)); const bool kImmortal = true; s = ioptions.table_factory->NewTableReader( TableReaderOptions(ioptions, options.prefix_extractor.get(), env_options, internal_comparator, false /* skip_filters */, !kImmortal, false /* force_direct_prefetch */, -1 /* level */), std::move(file_reader), file_size, &table_reader, false /* prefetch_index_and_filter_in_cache */); if (!s.ok()) { return s; } s = table_reader->VerifyChecksum(read_options, TableReaderCaller::kUserVerifyChecksum); return s; } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/corruption_test.cc000066400000000000000000000430351370372246700174110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include "rocksdb/db.h" #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/log_format.h" #include "db/version_set.h" #include "env/composite_env_wrapper.h" #include "file/filename.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/env.h" #include "rocksdb/table.h" #include "rocksdb/write_batch.h" #include "table/block_based/block_based_table_builder.h" #include "table/meta_blocks.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { static const int kValueSize = 1000; class CorruptionTest : public testing::Test { public: test::ErrorEnv env_; std::string dbname_; std::shared_ptr tiny_cache_; Options options_; DB* db_; CorruptionTest() { // If LRU cache shard bit is smaller than 2 (or -1 which will automatically // set it to 0), test SequenceNumberRecovery will fail, likely because of a // bug in recovery code. Keep it 4 for now to make the test passes. tiny_cache_ = NewLRUCache(100, 4); options_.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; options_.env = &env_; dbname_ = test::PerThreadDBPath("corruption_test"); DestroyDB(dbname_, options_); db_ = nullptr; options_.create_if_missing = true; BlockBasedTableOptions table_options; table_options.block_size_deviation = 0; // make unit test pass for now options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(); options_.create_if_missing = false; } ~CorruptionTest() override { delete db_; DestroyDB(dbname_, Options()); } void CloseDb() { delete db_; db_ = nullptr; } Status TryReopen(Options* options = nullptr) { delete db_; db_ = nullptr; Options opt = (options ? *options : options_); if (opt.env == Options().env) { // If env is not overridden, replace it with ErrorEnv. // Otherwise, the test already uses a non-default Env. opt.env = &env_; } opt.arena_block_size = 4096; BlockBasedTableOptions table_options; table_options.block_cache = tiny_cache_; table_options.block_size_deviation = 0; opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); return DB::Open(opt, dbname_, &db_); } void Reopen(Options* options = nullptr) { ASSERT_OK(TryReopen(options)); } void RepairDB() { delete db_; db_ = nullptr; ASSERT_OK(::ROCKSDB_NAMESPACE::RepairDB(dbname_, options_)); } void Build(int n, int flush_every = 0) { std::string key_space, value_space; WriteBatch batch; for (int i = 0; i < n; i++) { if (flush_every != 0 && i != 0 && i % flush_every == 0) { DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); } //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); Slice key = Key(i, &key_space); batch.Clear(); batch.Put(key, Value(i, &value_space)); ASSERT_OK(db_->Write(WriteOptions(), &batch)); } } void Check(int min_expected, int max_expected) { uint64_t next_expected = 0; uint64_t missed = 0; int bad_keys = 0; int bad_values = 0; int correct = 0; std::string value_space; // Do not verify checksums. If we verify checksums then the // db itself will raise errors because data is corrupted. // Instead, we want the reads to be successful and this test // will detect whether the appropriate corruptions have // occurred. Iterator* iter = db_->NewIterator(ReadOptions(false, true)); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { uint64_t key; Slice in(iter->key()); if (!ConsumeDecimalNumber(&in, &key) || !in.empty() || key < next_expected) { bad_keys++; continue; } missed += (key - next_expected); next_expected = key + 1; if (iter->value() != Value(static_cast(key), &value_space)) { bad_values++; } else { correct++; } } delete iter; fprintf(stderr, "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%llu\n", min_expected, max_expected, correct, bad_keys, bad_values, static_cast(missed)); ASSERT_LE(min_expected, correct); ASSERT_GE(max_expected, correct); } void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { // Pick file to corrupt std::vector filenames; ASSERT_OK(env_.GetChildren(dbname_, &filenames)); uint64_t number; FileType type; std::string fname; int picked_number = -1; for (size_t i = 0; i < filenames.size(); i++) { if (ParseFileName(filenames[i], &number, &type) && type == filetype && static_cast(number) > picked_number) { // Pick latest file fname = dbname_ + "/" + filenames[i]; picked_number = static_cast(number); } } ASSERT_TRUE(!fname.empty()) << filetype; test::CorruptFile(fname, offset, bytes_to_corrupt); } // corrupts exactly one file at level `level`. if no file found at level, // asserts void CorruptTableFileAtLevel(int level, int offset, int bytes_to_corrupt) { std::vector metadata; db_->GetLiveFilesMetaData(&metadata); for (const auto& m : metadata) { if (m.level == level) { test::CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt); return; } } FAIL() << "no file found at level"; } int Property(const std::string& name) { std::string property; int result; if (db_->GetProperty(name, &property) && sscanf(property.c_str(), "%d", &result) == 1) { return result; } else { return -1; } } // Return the ith key Slice Key(int i, std::string* storage) { char buf[100]; snprintf(buf, sizeof(buf), "%016d", i); storage->assign(buf, strlen(buf)); return Slice(*storage); } // Return the value to associate with the specified key Slice Value(int k, std::string* storage) { if (k == 0) { // Ugh. Random seed of 0 used to produce no entropy. This code // preserves the implementation that was in place when all of the // magic values in this file were picked. *storage = std::string(kValueSize, ' '); return Slice(*storage); } else { Random r(k); return test::RandomString(&r, kValueSize, storage); } } }; TEST_F(CorruptionTest, Recovery) { Build(100); Check(100, 100); #ifdef OS_WIN // On Wndows OS Disk cache does not behave properly // We do not call FlushBuffers on every Flush. If we do not close // the log file prior to the corruption we end up with the first // block not corrupted but only the second. However, under the debugger // things work just fine but never pass when running normally // For that reason people may want to run with unbuffered I/O. That option // is not available for WAL though. CloseDb(); #endif Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block ASSERT_TRUE(!TryReopen().ok()); options_.paranoid_checks = false; Reopen(&options_); // The 64 records in the first two log blocks are completely lost. Check(36, 36); } TEST_F(CorruptionTest, RecoverWriteError) { env_.writable_file_error_ = true; Status s = TryReopen(); ASSERT_TRUE(!s.ok()); } TEST_F(CorruptionTest, NewFileErrorDuringWrite) { // Do enough writing to force minor compaction env_.writable_file_error_ = true; const int num = static_cast(3 + (Options().write_buffer_size / kValueSize)); std::string value_storage; Status s; bool failed = false; for (int i = 0; i < num; i++) { WriteBatch batch; batch.Put("a", Value(100, &value_storage)); s = db_->Write(WriteOptions(), &batch); if (!s.ok()) { failed = true; } ASSERT_TRUE(!failed || !s.ok()); } ASSERT_TRUE(!s.ok()); ASSERT_GE(env_.num_writable_file_errors_, 1); env_.writable_file_error_ = false; Reopen(); } TEST_F(CorruptionTest, TableFile) { Build(100); DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); dbi->TEST_CompactRange(0, nullptr, nullptr); dbi->TEST_CompactRange(1, nullptr, nullptr); Corrupt(kTableFile, 100, 1); Check(99, 99); ASSERT_NOK(dbi->VerifyChecksum()); } TEST_F(CorruptionTest, VerifyChecksumReadahead) { Options options; SpecialEnv senv(Env::Default()); options.env = &senv; // Disable block cache as we are going to check checksum for // the same file twice and measure number of reads. BlockBasedTableOptions table_options_no_bc; table_options_no_bc.no_block_cache = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options_no_bc)); Reopen(&options); Build(10000); DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); dbi->TEST_CompactRange(0, nullptr, nullptr); dbi->TEST_CompactRange(1, nullptr, nullptr); senv.count_random_reads_ = true; senv.random_read_counter_.Reset(); ASSERT_OK(dbi->VerifyChecksum()); // Make sure the counter is enabled. ASSERT_GT(senv.random_read_counter_.Read(), 0); // The SST file is about 10MB. Default readahead size is 256KB. // Give a conservative 20 reads for metadata blocks, The number // of random reads should be within 10 MB / 256KB + 20 = 60. ASSERT_LT(senv.random_read_counter_.Read(), 60); senv.random_read_bytes_counter_ = 0; ReadOptions ro; ro.readahead_size = size_t{32 * 1024}; ASSERT_OK(dbi->VerifyChecksum(ro)); // The SST file is about 10MB. We set readahead size to 32KB. // Give 0 to 20 reads for metadata blocks, and allow real read // to range from 24KB to 48KB. The lower bound would be: // 10MB / 48KB + 0 = 213 // The higher bound is // 10MB / 24KB + 20 = 447. ASSERT_GE(senv.random_read_counter_.Read(), 213); ASSERT_LE(senv.random_read_counter_.Read(), 447); // Test readahead shouldn't break mmap mode (where it should be // disabled). options.allow_mmap_reads = true; Reopen(&options); dbi = static_cast(db_); ASSERT_OK(dbi->VerifyChecksum(ro)); CloseDb(); } TEST_F(CorruptionTest, TableFileIndexData) { Options options; // very big, we'll trigger flushes manually options.write_buffer_size = 100 * 1024 * 1024; Reopen(&options); // build 2 tables, flush at 5000 Build(10000, 5000); DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); // corrupt an index block of an entire file Corrupt(kTableFile, -2000, 500); options.paranoid_checks = false; Reopen(&options); dbi = reinterpret_cast(db_); // one full file may be readable, since only one was corrupted // the other file should be fully non-readable, since index was corrupted Check(0, 5000); ASSERT_NOK(dbi->VerifyChecksum()); // In paranoid mode, the db cannot be opened due to the corrupted file. ASSERT_TRUE(TryReopen().IsCorruption()); } TEST_F(CorruptionTest, MissingDescriptor) { Build(1000); RepairDB(); Reopen(); Check(1000, 1000); } TEST_F(CorruptionTest, SequenceNumberRecovery) { ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3")); ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4")); ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5")); RepairDB(); Reopen(); std::string v; ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); ASSERT_EQ("v5", v); // Write something. If sequence number was not recovered properly, // it will be hidden by an earlier write. ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6")); ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); ASSERT_EQ("v6", v); Reopen(); ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); ASSERT_EQ("v6", v); } TEST_F(CorruptionTest, CorruptedDescriptor) { ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); dbi->TEST_CompactRange(0, nullptr, nullptr); Corrupt(kDescriptorFile, 0, 1000); Status s = TryReopen(); ASSERT_TRUE(!s.ok()); RepairDB(); Reopen(); std::string v; ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); ASSERT_EQ("hello", v); } TEST_F(CorruptionTest, CompactionInputError) { Options options; Reopen(&options); Build(10); DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); dbi->TEST_CompactRange(0, nullptr, nullptr); dbi->TEST_CompactRange(1, nullptr, nullptr); ASSERT_EQ(1, Property("rocksdb.num-files-at-level2")); Corrupt(kTableFile, 100, 1); Check(9, 9); ASSERT_NOK(dbi->VerifyChecksum()); // Force compactions by writing lots of values Build(10000); Check(10000, 10000); ASSERT_NOK(dbi->VerifyChecksum()); } TEST_F(CorruptionTest, CompactionInputErrorParanoid) { Options options; options.paranoid_checks = true; options.write_buffer_size = 131072; options.max_write_buffer_number = 2; Reopen(&options); DBImpl* dbi = reinterpret_cast(db_); // Fill levels >= 1 for (int level = 1; level < dbi->NumberLevels(); level++) { dbi->Put(WriteOptions(), "", "begin"); dbi->Put(WriteOptions(), "~", "end"); dbi->TEST_FlushMemTable(); for (int comp_level = 0; comp_level < dbi->NumberLevels() - level; ++comp_level) { dbi->TEST_CompactRange(comp_level, nullptr, nullptr); } } Reopen(&options); dbi = reinterpret_cast(db_); Build(10); dbi->TEST_FlushMemTable(); dbi->TEST_WaitForCompact(); ASSERT_EQ(1, Property("rocksdb.num-files-at-level0")); CorruptTableFileAtLevel(0, 100, 1); Check(9, 9); ASSERT_NOK(dbi->VerifyChecksum()); // Write must eventually fail because of corrupted table Status s; std::string tmp1, tmp2; bool failed = false; for (int i = 0; i < 10000; i++) { s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2)); if (!s.ok()) { failed = true; } // if one write failed, every subsequent write must fail, too ASSERT_TRUE(!failed || !s.ok()) << "write did not fail in a corrupted db"; } ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db"; } TEST_F(CorruptionTest, UnrelatedKeys) { Build(10); DBImpl* dbi = reinterpret_cast(db_); dbi->TEST_FlushMemTable(); Corrupt(kTableFile, 100, 1); ASSERT_NOK(dbi->VerifyChecksum()); std::string tmp1, tmp2; ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2))); std::string v; ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); ASSERT_EQ(Value(1000, &tmp2).ToString(), v); dbi->TEST_FlushMemTable(); ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); ASSERT_EQ(Value(1000, &tmp2).ToString(), v); } TEST_F(CorruptionTest, RangeDeletionCorrupted) { ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); ASSERT_OK(db_->Flush(FlushOptions())); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(static_cast(1), metadata.size()); std::string filename = dbname_ + metadata[0].name; std::unique_ptr file; ASSERT_OK(options_.env->NewRandomAccessFile(filename, &file, EnvOptions())); std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file), filename)); uint64_t file_size; ASSERT_OK(options_.env->GetFileSize(filename, &file_size)); BlockHandle range_del_handle; ASSERT_OK(FindMetaBlock( file_reader.get(), file_size, kBlockBasedTableMagicNumber, ImmutableCFOptions(options_), kRangeDelBlock, &range_del_handle)); ASSERT_OK(TryReopen()); test::CorruptFile(filename, static_cast(range_del_handle.offset()), 1); ASSERT_TRUE(TryReopen().IsCorruption()); } TEST_F(CorruptionTest, FileSystemStateCorrupted) { for (int iter = 0; iter < 2; ++iter) { Options options; options.paranoid_checks = true; options.create_if_missing = true; Reopen(&options); Build(10); ASSERT_OK(db_->Flush(FlushOptions())); DBImpl* dbi = reinterpret_cast(db_); std::vector metadata; dbi->GetLiveFilesMetaData(&metadata); ASSERT_GT(metadata.size(), size_t(0)); std::string filename = dbname_ + metadata[0].name; delete db_; db_ = nullptr; if (iter == 0) { // corrupt file size std::unique_ptr file; env_.NewWritableFile(filename, &file, EnvOptions()); file->Append(Slice("corrupted sst")); file.reset(); Status x = TryReopen(&options); ASSERT_TRUE(x.IsCorruption()); } else { // delete the file env_.DeleteFile(filename); Status x = TryReopen(&options); ASSERT_TRUE(x.IsPathNotFound()); } DestroyDB(dbname_, options_); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as RepairDB() is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/cuckoo_table_db_test.cc000066400000000000000000000244451370372246700203100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "db/db_impl/db_impl.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "table/cuckoo/cuckoo_table_factory.h" #include "table/cuckoo/cuckoo_table_reader.h" #include "table/meta_blocks.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class CuckooTableDBTest : public testing::Test { private: std::string dbname_; Env* env_; DB* db_; public: CuckooTableDBTest() : env_(Env::Default()) { dbname_ = test::PerThreadDBPath("cuckoo_table_db_test"); EXPECT_OK(DestroyDB(dbname_, Options())); db_ = nullptr; Reopen(); } ~CuckooTableDBTest() override { delete db_; EXPECT_OK(DestroyDB(dbname_, Options())); } Options CurrentOptions() { Options options; options.table_factory.reset(NewCuckooTableFactory()); options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); options.allow_mmap_reads = true; options.create_if_missing = true; options.allow_concurrent_memtable_write = false; return options; } DBImpl* dbfull() { return reinterpret_cast(db_); } // The following util methods are copied from plain_table_db_test. void Reopen(Options* options = nullptr) { delete db_; db_ = nullptr; Options opts; if (options != nullptr) { opts = *options; } else { opts = CurrentOptions(); opts.create_if_missing = true; } ASSERT_OK(DB::Open(opts, dbname_, &db_)); } Status Put(const Slice& k, const Slice& v) { return db_->Put(WriteOptions(), k, v); } Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); } std::string Get(const std::string& k) { ReadOptions options; std::string result; Status s = db_->Get(options, k, &result); if (s.IsNotFound()) { result = "NOT_FOUND"; } else if (!s.ok()) { result = s.ToString(); } return result; } int NumTableFilesAtLevel(int level) { std::string property; EXPECT_TRUE(db_->GetProperty( "rocksdb.num-files-at-level" + NumberToString(level), &property)); return atoi(property.c_str()); } // Return spread of files per level std::string FilesPerLevel() { std::string result; size_t last_non_zero_offset = 0; for (int level = 0; level < db_->NumberLevels(); level++) { int f = NumTableFilesAtLevel(level); char buf[100]; snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); result += buf; if (f > 0) { last_non_zero_offset = result.size(); } } result.resize(last_non_zero_offset); return result; } }; TEST_F(CuckooTableDBTest, Flush) { // Try with empty DB first. ASSERT_TRUE(dbfull() != nullptr); ASSERT_EQ("NOT_FOUND", Get("key2")); // Add some values to db. Options options = CurrentOptions(); Reopen(&options); ASSERT_OK(Put("key1", "v1")); ASSERT_OK(Put("key2", "v2")); ASSERT_OK(Put("key3", "v3")); dbfull()->TEST_FlushMemTable(); TablePropertiesCollection ptc; reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); ASSERT_EQ(1U, ptc.size()); ASSERT_EQ(3U, ptc.begin()->second->num_entries); ASSERT_EQ("1", FilesPerLevel()); ASSERT_EQ("v1", Get("key1")); ASSERT_EQ("v2", Get("key2")); ASSERT_EQ("v3", Get("key3")); ASSERT_EQ("NOT_FOUND", Get("key4")); // Now add more keys and flush. ASSERT_OK(Put("key4", "v4")); ASSERT_OK(Put("key5", "v5")); ASSERT_OK(Put("key6", "v6")); dbfull()->TEST_FlushMemTable(); reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); ASSERT_EQ(2U, ptc.size()); auto row = ptc.begin(); ASSERT_EQ(3U, row->second->num_entries); ASSERT_EQ(3U, (++row)->second->num_entries); ASSERT_EQ("2", FilesPerLevel()); ASSERT_EQ("v1", Get("key1")); ASSERT_EQ("v2", Get("key2")); ASSERT_EQ("v3", Get("key3")); ASSERT_EQ("v4", Get("key4")); ASSERT_EQ("v5", Get("key5")); ASSERT_EQ("v6", Get("key6")); ASSERT_OK(Delete("key6")); ASSERT_OK(Delete("key5")); ASSERT_OK(Delete("key4")); dbfull()->TEST_FlushMemTable(); reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); ASSERT_EQ(3U, ptc.size()); row = ptc.begin(); ASSERT_EQ(3U, row->second->num_entries); ASSERT_EQ(3U, (++row)->second->num_entries); ASSERT_EQ(3U, (++row)->second->num_entries); ASSERT_EQ("3", FilesPerLevel()); ASSERT_EQ("v1", Get("key1")); ASSERT_EQ("v2", Get("key2")); ASSERT_EQ("v3", Get("key3")); ASSERT_EQ("NOT_FOUND", Get("key4")); ASSERT_EQ("NOT_FOUND", Get("key5")); ASSERT_EQ("NOT_FOUND", Get("key6")); } TEST_F(CuckooTableDBTest, FlushWithDuplicateKeys) { Options options = CurrentOptions(); Reopen(&options); ASSERT_OK(Put("key1", "v1")); ASSERT_OK(Put("key2", "v2")); ASSERT_OK(Put("key1", "v3")); // Duplicate dbfull()->TEST_FlushMemTable(); TablePropertiesCollection ptc; reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); ASSERT_EQ(1U, ptc.size()); ASSERT_EQ(2U, ptc.begin()->second->num_entries); ASSERT_EQ("1", FilesPerLevel()); ASSERT_EQ("v3", Get("key1")); ASSERT_EQ("v2", Get("key2")); } namespace { static std::string Key(int i) { char buf[100]; snprintf(buf, sizeof(buf), "key_______%06d", i); return std::string(buf); } static std::string Uint64Key(uint64_t i) { std::string str; str.resize(8); memcpy(&str[0], static_cast(&i), 8); return str; } } // namespace. TEST_F(CuckooTableDBTest, Uint64Comparator) { Options options = CurrentOptions(); options.comparator = test::Uint64Comparator(); Reopen(&options); ASSERT_OK(Put(Uint64Key(1), "v1")); ASSERT_OK(Put(Uint64Key(2), "v2")); ASSERT_OK(Put(Uint64Key(3), "v3")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v1", Get(Uint64Key(1))); ASSERT_EQ("v2", Get(Uint64Key(2))); ASSERT_EQ("v3", Get(Uint64Key(3))); ASSERT_EQ("NOT_FOUND", Get(Uint64Key(4))); // Add more keys. ASSERT_OK(Delete(Uint64Key(2))); // Delete. dbfull()->TEST_FlushMemTable(); ASSERT_OK(Put(Uint64Key(3), "v0")); // Update. ASSERT_OK(Put(Uint64Key(4), "v4")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v1", Get(Uint64Key(1))); ASSERT_EQ("NOT_FOUND", Get(Uint64Key(2))); ASSERT_EQ("v0", Get(Uint64Key(3))); ASSERT_EQ("v4", Get(Uint64Key(4))); } TEST_F(CuckooTableDBTest, CompactionIntoMultipleFiles) { // Create a big L0 file and check it compacts into multiple files in L1. Options options = CurrentOptions(); options.write_buffer_size = 270 << 10; // Two SST files should be created, each containing 14 keys. // Number of buckets will be 16. Total size ~156 KB. options.target_file_size_base = 160 << 10; Reopen(&options); // Write 28 values, each 10016 B ~ 10KB for (int idx = 0; idx < 28; ++idx) { ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx)))); } dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ("1", FilesPerLevel()); dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow trivial move */); ASSERT_EQ("0,2", FilesPerLevel()); for (int idx = 0; idx < 28; ++idx) { ASSERT_EQ(std::string(10000, 'a' + char(idx)), Get(Key(idx))); } } TEST_F(CuckooTableDBTest, SameKeyInsertedInTwoDifferentFilesAndCompacted) { // Insert same key twice so that they go to different SST files. Then wait for // compaction and check if the latest value is stored and old value removed. Options options = CurrentOptions(); options.write_buffer_size = 100 << 10; // 100KB options.level0_file_num_compaction_trigger = 2; Reopen(&options); // Write 11 values, each 10016 B for (int idx = 0; idx < 11; ++idx) { ASSERT_OK(Put(Key(idx), std::string(10000, 'a'))); } dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ("1", FilesPerLevel()); // Generate one more file in level-0, and should trigger level-0 compaction for (int idx = 0; idx < 11; ++idx) { ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx)))); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_EQ("0,1", FilesPerLevel()); for (int idx = 0; idx < 11; ++idx) { ASSERT_EQ(std::string(10000, 'a' + char(idx)), Get(Key(idx))); } } TEST_F(CuckooTableDBTest, AdaptiveTable) { Options options = CurrentOptions(); // Ensure options compatible with PlainTable options.prefix_extractor.reset(NewCappedPrefixTransform(8)); // Write some keys using cuckoo table. options.table_factory.reset(NewCuckooTableFactory()); Reopen(&options); ASSERT_OK(Put("key1", "v1")); ASSERT_OK(Put("key2", "v2")); ASSERT_OK(Put("key3", "v3")); dbfull()->TEST_FlushMemTable(); // Write some keys using plain table. std::shared_ptr block_based_factory( NewBlockBasedTableFactory()); std::shared_ptr plain_table_factory( NewPlainTableFactory()); std::shared_ptr cuckoo_table_factory( NewCuckooTableFactory()); options.create_if_missing = false; options.table_factory.reset(NewAdaptiveTableFactory( plain_table_factory, block_based_factory, plain_table_factory, cuckoo_table_factory)); Reopen(&options); ASSERT_OK(Put("key4", "v4")); ASSERT_OK(Put("key1", "v5")); dbfull()->TEST_FlushMemTable(); // Write some keys using block based table. options.table_factory.reset(NewAdaptiveTableFactory( block_based_factory, block_based_factory, plain_table_factory, cuckoo_table_factory)); Reopen(&options); ASSERT_OK(Put("key5", "v6")); ASSERT_OK(Put("key2", "v7")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v5", Get("key1")); ASSERT_EQ("v7", Get("key2")); ASSERT_EQ("v3", Get("key3")); ASSERT_EQ("v4", Get("key4")); ASSERT_EQ("v6", Get("key5")); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { if (ROCKSDB_NAMESPACE::port::kLittleEndian) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } else { fprintf(stderr, "SKIPPED as Cuckoo table doesn't support Big Endian\n"); return 0; } } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/db/db_basic_test.cc000066400000000000000000003117661370372246700167440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/merge_operator.h" #include "rocksdb/perf_context.h" #include "rocksdb/utilities/debug.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "test_util/fault_injection_test_env.h" #if !defined(ROCKSDB_LITE) #include "test_util/sync_point.h" #endif #include "utilities/merge_operators.h" #include "utilities/merge_operators/string_append/stringappend.h" namespace ROCKSDB_NAMESPACE { class DBBasicTest : public DBTestBase { public: DBBasicTest() : DBTestBase("/db_basic_test") {} }; TEST_F(DBBasicTest, OpenWhenOpen) { Options options = CurrentOptions(); options.env = env_; ROCKSDB_NAMESPACE::DB* db2 = nullptr; ROCKSDB_NAMESPACE::Status s = DB::Open(options, dbname_, &db2); ASSERT_EQ(Status::Code::kIOError, s.code()); ASSERT_EQ(Status::SubCode::kNone, s.subcode()); ASSERT_TRUE(strstr(s.getState(), "lock ") != nullptr); delete db2; } #ifndef ROCKSDB_LITE TEST_F(DBBasicTest, ReadOnlyDB) { ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put("bar", "v2")); ASSERT_OK(Put("foo", "v3")); Close(); auto options = CurrentOptions(); assert(options.env == env_); ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v2", Get("bar")); Iterator* iter = db_->NewIterator(ReadOptions()); int count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); ++count; } ASSERT_EQ(count, 2); delete iter; Close(); // Reopen and flush memtable. Reopen(options); Flush(); Close(); // Now check keys in read only mode. ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v2", Get("bar")); ASSERT_TRUE(db_->SyncWAL().IsNotSupported()); } TEST_F(DBBasicTest, ReadOnlyDBWithWriteDBIdToManifestSet) { ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put("bar", "v2")); ASSERT_OK(Put("foo", "v3")); Close(); auto options = CurrentOptions(); options.write_dbid_to_manifest = true; assert(options.env == env_); ASSERT_OK(ReadOnlyReopen(options)); std::string db_id1; db_->GetDbIdentity(db_id1); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v2", Get("bar")); Iterator* iter = db_->NewIterator(ReadOptions()); int count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); ++count; } ASSERT_EQ(count, 2); delete iter; Close(); // Reopen and flush memtable. Reopen(options); Flush(); Close(); // Now check keys in read only mode. ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v2", Get("bar")); ASSERT_TRUE(db_->SyncWAL().IsNotSupported()); std::string db_id2; db_->GetDbIdentity(db_id2); ASSERT_EQ(db_id1, db_id2); } TEST_F(DBBasicTest, CompactedDB) { const uint64_t kFileSize = 1 << 20; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.write_buffer_size = kFileSize; options.target_file_size_base = kFileSize; options.max_bytes_for_level_base = 1 << 30; options.compression = kNoCompression; Reopen(options); // 1 L0 file, use CompactedDB if max_open_files = -1 ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, '1'))); Flush(); Close(); ASSERT_OK(ReadOnlyReopen(options)); Status s = Put("new", "value"); ASSERT_EQ(s.ToString(), "Not implemented: Not supported operation in read only mode."); ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa")); Close(); options.max_open_files = -1; ASSERT_OK(ReadOnlyReopen(options)); s = Put("new", "value"); ASSERT_EQ(s.ToString(), "Not implemented: Not supported in compacted db mode."); ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa")); Close(); Reopen(options); // Add more L0 files ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, '2'))); Flush(); ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, 'a'))); Flush(); ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, 'b'))); ASSERT_OK(Put("eee", DummyString(kFileSize / 2, 'e'))); Flush(); Close(); ASSERT_OK(ReadOnlyReopen(options)); // Fallback to read-only DB s = Put("new", "value"); ASSERT_EQ(s.ToString(), "Not implemented: Not supported operation in read only mode."); Close(); // Full compaction Reopen(options); // Add more keys ASSERT_OK(Put("fff", DummyString(kFileSize / 2, 'f'))); ASSERT_OK(Put("hhh", DummyString(kFileSize / 2, 'h'))); ASSERT_OK(Put("iii", DummyString(kFileSize / 2, 'i'))); ASSERT_OK(Put("jjj", DummyString(kFileSize / 2, 'j'))); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(3, NumTableFilesAtLevel(1)); Close(); // CompactedDB ASSERT_OK(ReadOnlyReopen(options)); s = Put("new", "value"); ASSERT_EQ(s.ToString(), "Not implemented: Not supported in compacted db mode."); ASSERT_EQ("NOT_FOUND", Get("abc")); ASSERT_EQ(DummyString(kFileSize / 2, 'a'), Get("aaa")); ASSERT_EQ(DummyString(kFileSize / 2, 'b'), Get("bbb")); ASSERT_EQ("NOT_FOUND", Get("ccc")); ASSERT_EQ(DummyString(kFileSize / 2, 'e'), Get("eee")); ASSERT_EQ(DummyString(kFileSize / 2, 'f'), Get("fff")); ASSERT_EQ("NOT_FOUND", Get("ggg")); ASSERT_EQ(DummyString(kFileSize / 2, 'h'), Get("hhh")); ASSERT_EQ(DummyString(kFileSize / 2, 'i'), Get("iii")); ASSERT_EQ(DummyString(kFileSize / 2, 'j'), Get("jjj")); ASSERT_EQ("NOT_FOUND", Get("kkk")); // MultiGet std::vector values; std::vector status_list = dbfull()->MultiGet( ReadOptions(), std::vector({Slice("aaa"), Slice("ccc"), Slice("eee"), Slice("ggg"), Slice("iii"), Slice("kkk")}), &values); ASSERT_EQ(status_list.size(), static_cast(6)); ASSERT_EQ(values.size(), static_cast(6)); ASSERT_OK(status_list[0]); ASSERT_EQ(DummyString(kFileSize / 2, 'a'), values[0]); ASSERT_TRUE(status_list[1].IsNotFound()); ASSERT_OK(status_list[2]); ASSERT_EQ(DummyString(kFileSize / 2, 'e'), values[2]); ASSERT_TRUE(status_list[3].IsNotFound()); ASSERT_OK(status_list[4]); ASSERT_EQ(DummyString(kFileSize / 2, 'i'), values[4]); ASSERT_TRUE(status_list[5].IsNotFound()); Reopen(options); // Add a key ASSERT_OK(Put("fff", DummyString(kFileSize / 2, 'f'))); Close(); ASSERT_OK(ReadOnlyReopen(options)); s = Put("new", "value"); ASSERT_EQ(s.ToString(), "Not implemented: Not supported operation in read only mode."); } TEST_F(DBBasicTest, LevelLimitReopen) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); const std::string value(1024 * 1024, ' '); int i = 0; while (NumTableFilesAtLevel(2, 1) == 0) { ASSERT_OK(Put(1, Key(i++), value)); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } options.num_levels = 1; options.max_bytes_for_level_multiplier_additional.resize(1, 1); Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ(s.IsInvalidArgument(), true); ASSERT_EQ(s.ToString(), "Invalid argument: db has more levels than options.num_levels"); options.num_levels = 10; options.max_bytes_for_level_multiplier_additional.resize(10, 1); ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); } #endif // ROCKSDB_LITE TEST_F(DBBasicTest, PutDeleteGet) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_OK(Put(1, "foo", "v2")); ASSERT_EQ("v2", Get(1, "foo")); ASSERT_OK(Delete(1, "foo")); ASSERT_EQ("NOT_FOUND", Get(1, "foo")); } while (ChangeOptions()); } TEST_F(DBBasicTest, PutSingleDeleteGet) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_OK(Put(1, "foo2", "v2")); ASSERT_EQ("v2", Get(1, "foo2")); ASSERT_OK(SingleDelete(1, "foo")); ASSERT_EQ("NOT_FOUND", Get(1, "foo")); // Ski FIFO and universal compaction because they do not apply to the test // case. Skip MergePut because single delete does not get removed when it // encounters a merge. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut)); } TEST_F(DBBasicTest, EmptyFlush) { // It is possible to produce empty flushes when using single deletes. Tests // whether empty flushes cause issues. do { Random rnd(301); Options options = CurrentOptions(); options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); Put(1, "a", Slice()); SingleDelete(1, "a"); ASSERT_OK(Flush(1)); ASSERT_EQ("[ ]", AllEntriesFor("a", 1)); // Skip FIFO and universal compaction as they do not apply to the test // case. Skip MergePut because merges cannot be combined with single // deletions. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut)); } TEST_F(DBBasicTest, GetFromVersions) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Flush(1)); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("NOT_FOUND", Get(0, "foo")); } while (ChangeOptions()); } #ifndef ROCKSDB_LITE TEST_F(DBBasicTest, GetSnapshot) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); // Try with both a short key and a long key for (int i = 0; i < 2; i++) { std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x'); ASSERT_OK(Put(1, key, "v1")); const Snapshot* s1 = db_->GetSnapshot(); ASSERT_OK(Put(1, key, "v2")); ASSERT_EQ("v2", Get(1, key)); ASSERT_EQ("v1", Get(1, key, s1)); ASSERT_OK(Flush(1)); ASSERT_EQ("v2", Get(1, key)); ASSERT_EQ("v1", Get(1, key, s1)); db_->ReleaseSnapshot(s1); } } while (ChangeOptions()); } #endif // ROCKSDB_LITE TEST_F(DBBasicTest, CheckLock) { do { DB* localdb; Options options = CurrentOptions(); ASSERT_OK(TryReopen(options)); // second open should fail Status s = DB::Open(options, dbname_, &localdb); ASSERT_NOK(s); #ifdef OS_LINUX ASSERT_TRUE(s.ToString().find("lock hold by current process") != std::string::npos); #endif // OS_LINUX } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, FlushMultipleMemtable) { do { Options options = CurrentOptions(); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 3; options.max_write_buffer_size_to_maintain = -1; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); ASSERT_OK(Flush(1)); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v1", Get(1, "bar")); ASSERT_OK(Flush(1)); } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, FlushEmptyColumnFamily) { // Block flush thread and disable compaction thread env_->SetBackgroundThreads(1, Env::HIGH); env_->SetBackgroundThreads(1, Env::LOW); test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); test::SleepingBackgroundTask sleeping_task_high; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_high, Env::Priority::HIGH); Options options = CurrentOptions(); // disable compaction options.disable_auto_compactions = true; WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; options.max_write_buffer_number = 2; options.min_write_buffer_number_to_merge = 1; options.max_write_buffer_size_to_maintain = static_cast(options.write_buffer_size); CreateAndReopenWithCF({"pikachu"}, options); // Compaction can still go through even if no thread can flush the // mem table. ASSERT_OK(Flush(0)); ASSERT_OK(Flush(1)); // Insert can go through ASSERT_OK(dbfull()->Put(writeOpt, handles_[0], "foo", "v1")); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); ASSERT_EQ("v1", Get(0, "foo")); ASSERT_EQ("v1", Get(1, "bar")); sleeping_task_high.WakeUp(); sleeping_task_high.WaitUntilDone(); // Flush can still go through. ASSERT_OK(Flush(0)); ASSERT_OK(Flush(1)); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); } TEST_F(DBBasicTest, Flush) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; SetPerfLevel(kEnableTime); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); // this will now also flush the last 2 writes ASSERT_OK(Flush(1)); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); get_perf_context()->Reset(); Get(1, "foo"); ASSERT_TRUE((int)get_perf_context()->get_from_output_files_time > 0); ASSERT_EQ(2, (int)get_perf_context()->get_read_bytes); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v1", Get(1, "bar")); writeOpt.disableWAL = true; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2")); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2")); ASSERT_OK(Flush(1)); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("v2", Get(1, "bar")); get_perf_context()->Reset(); ASSERT_EQ("v2", Get(1, "foo")); ASSERT_TRUE((int)get_perf_context()->get_from_output_files_time > 0); writeOpt.disableWAL = false; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3")); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3")); ASSERT_OK(Flush(1)); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); // 'foo' should be there because its put // has WAL enabled. ASSERT_EQ("v3", Get(1, "foo")); ASSERT_EQ("v3", Get(1, "bar")); SetPerfLevel(kDisable); } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, ManifestRollOver) { do { Options options; options.max_manifest_file_size = 10; // 10 bytes options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); { ASSERT_OK(Put(1, "manifest_key1", std::string(1000, '1'))); ASSERT_OK(Put(1, "manifest_key2", std::string(1000, '2'))); ASSERT_OK(Put(1, "manifest_key3", std::string(1000, '3'))); uint64_t manifest_before_flush = dbfull()->TEST_Current_Manifest_FileNo(); ASSERT_OK(Flush(1)); // This should trigger LogAndApply. uint64_t manifest_after_flush = dbfull()->TEST_Current_Manifest_FileNo(); ASSERT_GT(manifest_after_flush, manifest_before_flush); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_GT(dbfull()->TEST_Current_Manifest_FileNo(), manifest_after_flush); // check if a new manifest file got inserted or not. ASSERT_EQ(std::string(1000, '1'), Get(1, "manifest_key1")); ASSERT_EQ(std::string(1000, '2'), Get(1, "manifest_key2")); ASSERT_EQ(std::string(1000, '3'), Get(1, "manifest_key3")); } } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, IdentityAcrossRestarts1) { do { std::string id1; ASSERT_OK(db_->GetDbIdentity(id1)); Options options = CurrentOptions(); Reopen(options); std::string id2; ASSERT_OK(db_->GetDbIdentity(id2)); // id1 should match id2 because identity was not regenerated ASSERT_EQ(id1.compare(id2), 0); std::string idfilename = IdentityFileName(dbname_); ASSERT_OK(env_->DeleteFile(idfilename)); Reopen(options); std::string id3; ASSERT_OK(db_->GetDbIdentity(id3)); if (options.write_dbid_to_manifest) { ASSERT_EQ(id1.compare(id3), 0); } else { // id1 should NOT match id3 because identity was regenerated ASSERT_NE(id1.compare(id3), 0); } } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, IdentityAcrossRestarts2) { do { std::string id1; ASSERT_OK(db_->GetDbIdentity(id1)); Options options = CurrentOptions(); options.write_dbid_to_manifest = true; Reopen(options); std::string id2; ASSERT_OK(db_->GetDbIdentity(id2)); // id1 should match id2 because identity was not regenerated ASSERT_EQ(id1.compare(id2), 0); std::string idfilename = IdentityFileName(dbname_); ASSERT_OK(env_->DeleteFile(idfilename)); Reopen(options); std::string id3; ASSERT_OK(db_->GetDbIdentity(id3)); // id1 should NOT match id3 because identity was regenerated ASSERT_EQ(id1, id3); } while (ChangeCompactOptions()); } #ifndef ROCKSDB_LITE TEST_F(DBBasicTest, Snapshot) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); Put(0, "foo", "0v1"); Put(1, "foo", "1v1"); const Snapshot* s1 = db_->GetSnapshot(); ASSERT_EQ(1U, GetNumSnapshots()); uint64_t time_snap1 = GetTimeOldestSnapshots(); ASSERT_GT(time_snap1, 0U); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); Put(0, "foo", "0v2"); Put(1, "foo", "1v2"); env_->addon_time_.fetch_add(1); const Snapshot* s2 = db_->GetSnapshot(); ASSERT_EQ(2U, GetNumSnapshots()); ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); Put(0, "foo", "0v3"); Put(1, "foo", "1v3"); { ManagedSnapshot s3(db_); ASSERT_EQ(3U, GetNumSnapshots()); ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); Put(0, "foo", "0v4"); Put(1, "foo", "1v4"); ASSERT_EQ("0v1", Get(0, "foo", s1)); ASSERT_EQ("1v1", Get(1, "foo", s1)); ASSERT_EQ("0v2", Get(0, "foo", s2)); ASSERT_EQ("1v2", Get(1, "foo", s2)); ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot())); ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot())); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); } ASSERT_EQ(2U, GetNumSnapshots()); ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); ASSERT_EQ("0v1", Get(0, "foo", s1)); ASSERT_EQ("1v1", Get(1, "foo", s1)); ASSERT_EQ("0v2", Get(0, "foo", s2)); ASSERT_EQ("1v2", Get(1, "foo", s2)); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); db_->ReleaseSnapshot(s1); ASSERT_EQ("0v2", Get(0, "foo", s2)); ASSERT_EQ("1v2", Get(1, "foo", s2)); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); ASSERT_EQ(1U, GetNumSnapshots()); ASSERT_LT(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s2->GetSequenceNumber()); db_->ReleaseSnapshot(s2); ASSERT_EQ(0U, GetNumSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), 0); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); } while (ChangeOptions()); } #endif // ROCKSDB_LITE TEST_F(DBBasicTest, CompactBetweenSnapshots) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { Options options = CurrentOptions(options_override); options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); FillLevels("a", "z", 1); Put(1, "foo", "first"); const Snapshot* snapshot1 = db_->GetSnapshot(); Put(1, "foo", "second"); Put(1, "foo", "third"); Put(1, "foo", "fourth"); const Snapshot* snapshot2 = db_->GetSnapshot(); Put(1, "foo", "fifth"); Put(1, "foo", "sixth"); // All entries (including duplicates) exist // before any compaction or flush is triggered. ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fifth, fourth, third, second, first ]"); ASSERT_EQ("sixth", Get(1, "foo")); ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); ASSERT_EQ("first", Get(1, "foo", snapshot1)); // After a flush, "second", "third" and "fifth" should // be removed ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]"); // after we release the snapshot1, only two values left db_->ReleaseSnapshot(snapshot1); FillLevels("a", "z", 1); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); // We have only one valid snapshot snapshot2. Since snapshot1 is // not valid anymore, "first" should be removed by a compaction. ASSERT_EQ("sixth", Get(1, "foo")); ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]"); // after we release the snapshot2, only one value should be left db_->ReleaseSnapshot(snapshot2); FillLevels("a", "z", 1); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ("sixth", Get(1, "foo")); ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]"); } while (ChangeOptions(kSkipFIFOCompaction)); } TEST_F(DBBasicTest, DBOpen_Options) { Options options = CurrentOptions(); Close(); Destroy(options); // Does not exist, and create_if_missing == false: error DB* db = nullptr; options.create_if_missing = false; Status s = DB::Open(options, dbname_, &db); ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr); ASSERT_TRUE(db == nullptr); // Does not exist, and create_if_missing == true: OK options.create_if_missing = true; s = DB::Open(options, dbname_, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); delete db; db = nullptr; // Does exist, and error_if_exists == true: error options.create_if_missing = false; options.error_if_exists = true; s = DB::Open(options, dbname_, &db); ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr); ASSERT_TRUE(db == nullptr); // Does exist, and error_if_exists == false: OK options.create_if_missing = true; options.error_if_exists = false; s = DB::Open(options, dbname_, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); delete db; db = nullptr; } TEST_F(DBBasicTest, CompactOnFlush) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { Options options = CurrentOptions(options_override); options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); Put(1, "foo", "v1"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v1 ]"); // Write two new keys Put(1, "a", "begin"); Put(1, "z", "end"); Flush(1); // Case1: Delete followed by a put Delete(1, "foo"); Put(1, "foo", "v2"); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]"); // After the current memtable is flushed, the DEL should // have been removed ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]"); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]"); // Case 2: Delete followed by another delete Delete(1, "foo"); Delete(1, "foo"); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, DEL, v2 ]"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v2 ]"); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 3: Put followed by a delete Put(1, "foo", "v3"); Delete(1, "foo"); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v3 ]"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL ]"); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 4: Put followed by another Put Put(1, "foo", "v4"); Put(1, "foo", "v5"); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5, v4 ]"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]"); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]"); // clear database Delete(1, "foo"); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 5: Put followed by snapshot followed by another Put // Both puts should remain. Put(1, "foo", "v6"); const Snapshot* snapshot = db_->GetSnapshot(); Put(1, "foo", "v7"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v7, v6 ]"); db_->ReleaseSnapshot(snapshot); // clear database Delete(1, "foo"); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 5: snapshot followed by a put followed by another Put // Only the last put should remain. const Snapshot* snapshot1 = db_->GetSnapshot(); Put(1, "foo", "v8"); Put(1, "foo", "v9"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v9 ]"); db_->ReleaseSnapshot(snapshot1); } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, FlushOneColumnFamily) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}, options); ASSERT_OK(Put(0, "Default", "Default")); ASSERT_OK(Put(1, "pikachu", "pikachu")); ASSERT_OK(Put(2, "ilya", "ilya")); ASSERT_OK(Put(3, "muromec", "muromec")); ASSERT_OK(Put(4, "dobrynia", "dobrynia")); ASSERT_OK(Put(5, "nikitich", "nikitich")); ASSERT_OK(Put(6, "alyosha", "alyosha")); ASSERT_OK(Put(7, "popovich", "popovich")); for (int i = 0; i < 8; ++i) { Flush(i); auto tables = ListTableFiles(env_, dbname_); ASSERT_EQ(tables.size(), i + 1U); } } TEST_F(DBBasicTest, MultiGetSimple) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); SetPerfLevel(kEnableCount); ASSERT_OK(Put(1, "k1", "v1")); ASSERT_OK(Put(1, "k2", "v2")); ASSERT_OK(Put(1, "k3", "v3")); ASSERT_OK(Put(1, "k4", "v4")); ASSERT_OK(Delete(1, "k4")); ASSERT_OK(Put(1, "k5", "v5")); ASSERT_OK(Delete(1, "no_key")); std::vector keys({"k1", "k2", "k3", "k4", "k5", "no_key"}); std::vector values(20, "Temporary data to be overwritten"); std::vector cfs(keys.size(), handles_[1]); get_perf_context()->Reset(); std::vector s = db_->MultiGet(ReadOptions(), cfs, keys, &values); ASSERT_EQ(values.size(), keys.size()); ASSERT_EQ(values[0], "v1"); ASSERT_EQ(values[1], "v2"); ASSERT_EQ(values[2], "v3"); ASSERT_EQ(values[4], "v5"); // four kv pairs * two bytes per value ASSERT_EQ(8, (int)get_perf_context()->multiget_read_bytes); ASSERT_OK(s[0]); ASSERT_OK(s[1]); ASSERT_OK(s[2]); ASSERT_TRUE(s[3].IsNotFound()); ASSERT_OK(s[4]); ASSERT_TRUE(s[5].IsNotFound()); SetPerfLevel(kDisable); } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, MultiGetEmpty) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); // Empty Key Set std::vector keys; std::vector values; std::vector cfs; std::vector s = db_->MultiGet(ReadOptions(), cfs, keys, &values); ASSERT_EQ(s.size(), 0U); // Empty Database, Empty Key Set Options options = CurrentOptions(); options.create_if_missing = true; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); s = db_->MultiGet(ReadOptions(), cfs, keys, &values); ASSERT_EQ(s.size(), 0U); // Empty Database, Search for Keys keys.resize(2); keys[0] = "a"; keys[1] = "b"; cfs.push_back(handles_[0]); cfs.push_back(handles_[1]); s = db_->MultiGet(ReadOptions(), cfs, keys, &values); ASSERT_EQ(static_cast(s.size()), 2); ASSERT_TRUE(s[0].IsNotFound() && s[1].IsNotFound()); } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, ChecksumTest) { BlockBasedTableOptions table_options; Options options = CurrentOptions(); // change when new checksum type added int max_checksum = static_cast(kxxHash64); const int kNumPerFile = 2; // generate one table with each type of checksum for (int i = 0; i <= max_checksum; ++i) { table_options.checksum = static_cast(i); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); for (int j = 0; j < kNumPerFile; ++j) { ASSERT_OK(Put(Key(i * kNumPerFile + j), Key(i * kNumPerFile + j))); } ASSERT_OK(Flush()); } // with each valid checksum type setting... for (int i = 0; i <= max_checksum; ++i) { table_options.checksum = static_cast(i); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); // verify every type of checksum (should be regardless of that setting) for (int j = 0; j < (max_checksum + 1) * kNumPerFile; ++j) { ASSERT_EQ(Key(j), Get(Key(j))); } } } // On Windows you can have either memory mapped file or a file // with unbuffered access. So this asserts and does not make // sense to run #ifndef OS_WIN TEST_F(DBBasicTest, MmapAndBufferOptions) { if (!IsMemoryMappedAccessSupported()) { return; } Options options = CurrentOptions(); options.use_direct_reads = true; options.allow_mmap_reads = true; ASSERT_NOK(TryReopen(options)); // All other combinations are acceptable options.use_direct_reads = false; ASSERT_OK(TryReopen(options)); if (IsDirectIOSupported()) { options.use_direct_reads = true; options.allow_mmap_reads = false; ASSERT_OK(TryReopen(options)); } options.use_direct_reads = false; ASSERT_OK(TryReopen(options)); } #endif class TestEnv : public EnvWrapper { public: explicit TestEnv(Env* base_env) : EnvWrapper(base_env), close_count(0) {} class TestLogger : public Logger { public: using Logger::Logv; explicit TestLogger(TestEnv* env_ptr) : Logger() { env = env_ptr; } ~TestLogger() override { if (!closed_) { CloseHelper(); } } void Logv(const char* /*format*/, va_list /*ap*/) override {} protected: Status CloseImpl() override { return CloseHelper(); } private: Status CloseHelper() { env->CloseCountInc(); ; return Status::IOError(); } TestEnv* env; }; void CloseCountInc() { close_count++; } int GetCloseCount() { return close_count; } Status NewLogger(const std::string& /*fname*/, std::shared_ptr* result) override { result->reset(new TestLogger(this)); return Status::OK(); } private: int close_count; }; TEST_F(DBBasicTest, DBClose) { Options options = GetDefaultOptions(); std::string dbname = test::PerThreadDBPath("db_close_test"); ASSERT_OK(DestroyDB(dbname, options)); DB* db = nullptr; TestEnv* env = new TestEnv(env_); std::unique_ptr local_env_guard(env); options.create_if_missing = true; options.env = env; Status s = DB::Open(options, dbname, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); s = db->Close(); ASSERT_EQ(env->GetCloseCount(), 1); ASSERT_EQ(s, Status::IOError()); delete db; ASSERT_EQ(env->GetCloseCount(), 1); // Do not call DB::Close() and ensure our logger Close() still gets called s = DB::Open(options, dbname, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); delete db; ASSERT_EQ(env->GetCloseCount(), 2); // Provide our own logger and ensure DB::Close() does not close it options.info_log.reset(new TestEnv::TestLogger(env)); options.create_if_missing = false; s = DB::Open(options, dbname, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); s = db->Close(); ASSERT_EQ(s, Status::OK()); delete db; ASSERT_EQ(env->GetCloseCount(), 2); options.info_log.reset(); ASSERT_EQ(env->GetCloseCount(), 3); } TEST_F(DBBasicTest, DBCloseFlushError) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); Options options = GetDefaultOptions(); options.create_if_missing = true; options.manual_wal_flush = true; options.write_buffer_size = 100; options.env = fault_injection_env.get(); Reopen(options); ASSERT_OK(Put("key1", "value1")); ASSERT_OK(Put("key2", "value2")); ASSERT_OK(dbfull()->TEST_SwitchMemtable()); ASSERT_OK(Put("key3", "value3")); fault_injection_env->SetFilesystemActive(false); Status s = dbfull()->Close(); fault_injection_env->SetFilesystemActive(true); ASSERT_NE(s, Status::OK()); Destroy(options); } class DBMultiGetTestWithParam : public DBBasicTest, public testing::WithParamInterface {}; TEST_P(DBMultiGetTestWithParam, MultiGetMultiCF) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}, options); // tuples std::vector> cf_kv_vec; static const int num_keys = 24; cf_kv_vec.reserve(num_keys); for (int i = 0; i < num_keys; ++i) { int cf = i / 3; int cf_key = 1 % 3; cf_kv_vec.emplace_back(std::make_tuple( cf, "cf" + std::to_string(cf) + "_key_" + std::to_string(cf_key), "cf" + std::to_string(cf) + "_val_" + std::to_string(cf_key))); ASSERT_OK(Put(std::get<0>(cf_kv_vec[i]), std::get<1>(cf_kv_vec[i]), std::get<2>(cf_kv_vec[i]))); } int get_sv_count = 0; ROCKSDB_NAMESPACE::DBImpl* db = reinterpret_cast(db_); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { if (++get_sv_count == 2) { // After MultiGet refs a couple of CFs, flush all CFs so MultiGet // is forced to repeat the process for (int i = 0; i < num_keys; ++i) { int cf = i / 3; int cf_key = i % 8; if (cf_key == 0) { ASSERT_OK(Flush(cf)); } ASSERT_OK(Put(std::get<0>(cf_kv_vec[i]), std::get<1>(cf_kv_vec[i]), std::get<2>(cf_kv_vec[i]) + "_2")); } } if (get_sv_count == 11) { for (int i = 0; i < 8; ++i) { auto* cfd = reinterpret_cast( db->GetColumnFamilyHandle(i)) ->cfd(); ASSERT_EQ(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::vector cfs; std::vector keys; std::vector values; for (int i = 0; i < num_keys; ++i) { cfs.push_back(std::get<0>(cf_kv_vec[i])); keys.push_back(std::get<1>(cf_kv_vec[i])); } values = MultiGet(cfs, keys, nullptr, GetParam()); ASSERT_EQ(values.size(), num_keys); for (unsigned int j = 0; j < values.size(); ++j) { ASSERT_EQ(values[j], std::get<2>(cf_kv_vec[j]) + "_2"); } keys.clear(); cfs.clear(); cfs.push_back(std::get<0>(cf_kv_vec[0])); keys.push_back(std::get<1>(cf_kv_vec[0])); cfs.push_back(std::get<0>(cf_kv_vec[3])); keys.push_back(std::get<1>(cf_kv_vec[3])); cfs.push_back(std::get<0>(cf_kv_vec[4])); keys.push_back(std::get<1>(cf_kv_vec[4])); values = MultiGet(cfs, keys, nullptr, GetParam()); ASSERT_EQ(values[0], std::get<2>(cf_kv_vec[0]) + "_2"); ASSERT_EQ(values[1], std::get<2>(cf_kv_vec[3]) + "_2"); ASSERT_EQ(values[2], std::get<2>(cf_kv_vec[4]) + "_2"); keys.clear(); cfs.clear(); cfs.push_back(std::get<0>(cf_kv_vec[7])); keys.push_back(std::get<1>(cf_kv_vec[7])); cfs.push_back(std::get<0>(cf_kv_vec[6])); keys.push_back(std::get<1>(cf_kv_vec[6])); cfs.push_back(std::get<0>(cf_kv_vec[1])); keys.push_back(std::get<1>(cf_kv_vec[1])); values = MultiGet(cfs, keys, nullptr, GetParam()); ASSERT_EQ(values[0], std::get<2>(cf_kv_vec[7]) + "_2"); ASSERT_EQ(values[1], std::get<2>(cf_kv_vec[6]) + "_2"); ASSERT_EQ(values[2], std::get<2>(cf_kv_vec[1]) + "_2"); for (int cf = 0; cf < 8; ++cf) { auto* cfd = reinterpret_cast( reinterpret_cast(db_)->GetColumnFamilyHandle(cf)) ->cfd(); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVObsolete); } } TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFMutex) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}, options); for (int i = 0; i < 8; ++i) { ASSERT_OK(Put(i, "cf" + std::to_string(i) + "_key", "cf" + std::to_string(i) + "_val")); } int get_sv_count = 0; int retries = 0; bool last_try = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MultiGet::LastTry", [&](void* /*arg*/) { last_try = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { if (last_try) { return; } if (++get_sv_count == 2) { ++retries; get_sv_count = 0; for (int i = 0; i < 8; ++i) { ASSERT_OK(Flush(i)); ASSERT_OK(Put( i, "cf" + std::to_string(i) + "_key", "cf" + std::to_string(i) + "_val" + std::to_string(retries))); } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::vector cfs; std::vector keys; std::vector values; for (int i = 0; i < 8; ++i) { cfs.push_back(i); keys.push_back("cf" + std::to_string(i) + "_key"); } values = MultiGet(cfs, keys, nullptr, GetParam()); ASSERT_TRUE(last_try); ASSERT_EQ(values.size(), 8); for (unsigned int j = 0; j < values.size(); ++j) { ASSERT_EQ(values[j], "cf" + std::to_string(j) + "_val" + std::to_string(retries)); } for (int i = 0; i < 8; ++i) { auto* cfd = reinterpret_cast( reinterpret_cast(db_)->GetColumnFamilyHandle(i)) ->cfd(); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); } } TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}, options); for (int i = 0; i < 8; ++i) { ASSERT_OK(Put(i, "cf" + std::to_string(i) + "_key", "cf" + std::to_string(i) + "_val")); } int get_sv_count = 0; ROCKSDB_NAMESPACE::DBImpl* db = reinterpret_cast(db_); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { if (++get_sv_count == 2) { for (int i = 0; i < 8; ++i) { ASSERT_OK(Flush(i)); ASSERT_OK(Put(i, "cf" + std::to_string(i) + "_key", "cf" + std::to_string(i) + "_val2")); } } if (get_sv_count == 8) { for (int i = 0; i < 8; ++i) { auto* cfd = reinterpret_cast( db->GetColumnFamilyHandle(i)) ->cfd(); ASSERT_TRUE( (cfd->TEST_GetLocalSV()->Get() == SuperVersion::kSVInUse) || (cfd->TEST_GetLocalSV()->Get() == SuperVersion::kSVObsolete)); } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::vector cfs; std::vector keys; std::vector values; for (int i = 0; i < 8; ++i) { cfs.push_back(i); keys.push_back("cf" + std::to_string(i) + "_key"); } const Snapshot* snapshot = db_->GetSnapshot(); values = MultiGet(cfs, keys, snapshot, GetParam()); db_->ReleaseSnapshot(snapshot); ASSERT_EQ(values.size(), 8); for (unsigned int j = 0; j < values.size(); ++j) { ASSERT_EQ(values[j], "cf" + std::to_string(j) + "_val"); } for (int i = 0; i < 8; ++i) { auto* cfd = reinterpret_cast( reinterpret_cast(db_)->GetColumnFamilyHandle(i)) ->cfd(); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); } } INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam, testing::Bool()); TEST_F(DBBasicTest, MultiGetBatchedSimpleUnsorted) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); SetPerfLevel(kEnableCount); ASSERT_OK(Put(1, "k1", "v1")); ASSERT_OK(Put(1, "k2", "v2")); ASSERT_OK(Put(1, "k3", "v3")); ASSERT_OK(Put(1, "k4", "v4")); ASSERT_OK(Delete(1, "k4")); ASSERT_OK(Put(1, "k5", "v5")); ASSERT_OK(Delete(1, "no_key")); get_perf_context()->Reset(); std::vector keys({"no_key", "k5", "k4", "k3", "k2", "k1"}); std::vector values(keys.size()); std::vector cfs(keys.size(), handles_[1]); std::vector s(keys.size()); db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(), values.data(), s.data(), false); ASSERT_EQ(values.size(), keys.size()); ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v1"); ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v2"); ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v3"); ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v5"); // four kv pairs * two bytes per value ASSERT_EQ(8, (int)get_perf_context()->multiget_read_bytes); ASSERT_TRUE(s[0].IsNotFound()); ASSERT_OK(s[1]); ASSERT_TRUE(s[2].IsNotFound()); ASSERT_OK(s[3]); ASSERT_OK(s[4]); ASSERT_OK(s[5]); SetPerfLevel(kDisable); } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, MultiGetBatchedSortedMultiFile) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); SetPerfLevel(kEnableCount); // To expand the power of this test, generate > 1 table file and // mix with memtable ASSERT_OK(Put(1, "k1", "v1")); ASSERT_OK(Put(1, "k2", "v2")); Flush(1); ASSERT_OK(Put(1, "k3", "v3")); ASSERT_OK(Put(1, "k4", "v4")); Flush(1); ASSERT_OK(Delete(1, "k4")); ASSERT_OK(Put(1, "k5", "v5")); ASSERT_OK(Delete(1, "no_key")); get_perf_context()->Reset(); std::vector keys({"k1", "k2", "k3", "k4", "k5", "no_key"}); std::vector values(keys.size()); std::vector cfs(keys.size(), handles_[1]); std::vector s(keys.size()); db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(), values.data(), s.data(), true); ASSERT_EQ(values.size(), keys.size()); ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v1"); ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v2"); ASSERT_EQ(std::string(values[2].data(), values[2].size()), "v3"); ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v5"); // four kv pairs * two bytes per value ASSERT_EQ(8, (int)get_perf_context()->multiget_read_bytes); ASSERT_OK(s[0]); ASSERT_OK(s[1]); ASSERT_OK(s[2]); ASSERT_TRUE(s[3].IsNotFound()); ASSERT_OK(s[4]); ASSERT_TRUE(s[5].IsNotFound()); SetPerfLevel(kDisable); } while (ChangeOptions()); } TEST_F(DBBasicTest, MultiGetBatchedDuplicateKeys) { Options opts = CurrentOptions(); opts.merge_operator = MergeOperators::CreateStringAppendOperator(); CreateAndReopenWithCF({"pikachu"}, opts); SetPerfLevel(kEnableCount); // To expand the power of this test, generate > 1 table file and // mix with memtable ASSERT_OK(Merge(1, "k1", "v1")); ASSERT_OK(Merge(1, "k2", "v2")); Flush(1); MoveFilesToLevel(2, 1); ASSERT_OK(Merge(1, "k3", "v3")); ASSERT_OK(Merge(1, "k4", "v4")); Flush(1); MoveFilesToLevel(2, 1); ASSERT_OK(Merge(1, "k4", "v4_2")); ASSERT_OK(Merge(1, "k6", "v6")); Flush(1); MoveFilesToLevel(2, 1); ASSERT_OK(Merge(1, "k7", "v7")); ASSERT_OK(Merge(1, "k8", "v8")); Flush(1); MoveFilesToLevel(2, 1); get_perf_context()->Reset(); std::vector keys({"k8", "k8", "k8", "k4", "k4", "k1", "k3"}); std::vector values(keys.size()); std::vector cfs(keys.size(), handles_[1]); std::vector s(keys.size()); db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(), values.data(), s.data(), false); ASSERT_EQ(values.size(), keys.size()); ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v8"); ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v8"); ASSERT_EQ(std::string(values[2].data(), values[2].size()), "v8"); ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v4,v4_2"); ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v4,v4_2"); ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v1"); ASSERT_EQ(std::string(values[6].data(), values[6].size()), "v3"); ASSERT_EQ(24, (int)get_perf_context()->multiget_read_bytes); for (Status& status : s) { ASSERT_OK(status); } SetPerfLevel(kDisable); } TEST_F(DBBasicTest, MultiGetBatchedMultiLevel) { Options options = CurrentOptions(); options.disable_auto_compactions = true; Reopen(options); int num_keys = 0; for (int i = 0; i < 128; ++i) { ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } MoveFilesToLevel(2); for (int i = 0; i < 128; i += 3) { ASSERT_OK(Put("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } MoveFilesToLevel(1); for (int i = 0; i < 128; i += 5) { ASSERT_OK(Put("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } ASSERT_EQ(0, num_keys); for (int i = 0; i < 128; i += 9) { ASSERT_OK(Put("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); } std::vector keys; std::vector values; for (int i = 64; i < 80; ++i) { keys.push_back("key_" + std::to_string(i)); } values = MultiGet(keys, nullptr); ASSERT_EQ(values.size(), 16); for (unsigned int j = 0; j < values.size(); ++j) { int key = j + 64; if (key % 9 == 0) { ASSERT_EQ(values[j], "val_mem_" + std::to_string(key)); } else if (key % 5 == 0) { ASSERT_EQ(values[j], "val_l0_" + std::to_string(key)); } else if (key % 3 == 0) { ASSERT_EQ(values[j], "val_l1_" + std::to_string(key)); } else { ASSERT_EQ(values[j], "val_l2_" + std::to_string(key)); } } } TEST_F(DBBasicTest, MultiGetBatchedMultiLevelMerge) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.merge_operator = MergeOperators::CreateStringAppendOperator(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); int num_keys = 0; for (int i = 0; i < 128; ++i) { ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } MoveFilesToLevel(2); for (int i = 0; i < 128; i += 3) { ASSERT_OK(Merge("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } MoveFilesToLevel(1); for (int i = 0; i < 128; i += 5) { ASSERT_OK(Merge("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } ASSERT_EQ(0, num_keys); for (int i = 0; i < 128; i += 9) { ASSERT_OK( Merge("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); } std::vector keys; std::vector values; for (int i = 32; i < 80; ++i) { keys.push_back("key_" + std::to_string(i)); } values = MultiGet(keys, nullptr); ASSERT_EQ(values.size(), keys.size()); for (unsigned int j = 0; j < 48; ++j) { int key = j + 32; std::string value; value.append("val_l2_" + std::to_string(key)); if (key % 3 == 0) { value.append(","); value.append("val_l1_" + std::to_string(key)); } if (key % 5 == 0) { value.append(","); value.append("val_l0_" + std::to_string(key)); } if (key % 9 == 0) { value.append(","); value.append("val_mem_" + std::to_string(key)); } ASSERT_EQ(values[j], value); } } TEST_F(DBBasicTest, MultiGetBatchedValueSizeInMemory) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); SetPerfLevel(kEnableCount); ASSERT_OK(Put(1, "k1", "v_1")); ASSERT_OK(Put(1, "k2", "v_2")); ASSERT_OK(Put(1, "k3", "v_3")); ASSERT_OK(Put(1, "k4", "v_4")); ASSERT_OK(Put(1, "k5", "v_5")); ASSERT_OK(Put(1, "k6", "v_6")); std::vector keys = {"k1", "k2", "k3", "k4", "k5", "k6"}; std::vector values(keys.size()); std::vector s(keys.size()); std::vector cfs(keys.size(), handles_[1]); get_perf_context()->Reset(); ReadOptions ro; ro.value_size_soft_limit = 11; db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), s.data(), false); ASSERT_EQ(values.size(), keys.size()); for (unsigned int i = 0; i < 4; i++) { ASSERT_EQ(std::string(values[i].data(), values[i].size()), "v_" + std::to_string(i + 1)); } for (unsigned int i = 4; i < 6; i++) { ASSERT_TRUE(s[i].IsAborted()); } ASSERT_EQ(12, (int)get_perf_context()->multiget_read_bytes); SetPerfLevel(kDisable); } TEST_F(DBBasicTest, MultiGetBatchedValueSize) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); SetPerfLevel(kEnableCount); ASSERT_OK(Put(1, "k6", "v6")); ASSERT_OK(Put(1, "k7", "v7_")); ASSERT_OK(Put(1, "k3", "v3_")); ASSERT_OK(Put(1, "k4", "v4")); Flush(1); ASSERT_OK(Delete(1, "k4")); ASSERT_OK(Put(1, "k11", "v11")); ASSERT_OK(Delete(1, "no_key")); ASSERT_OK(Put(1, "k8", "v8_")); ASSERT_OK(Put(1, "k13", "v13")); ASSERT_OK(Put(1, "k14", "v14")); ASSERT_OK(Put(1, "k15", "v15")); ASSERT_OK(Put(1, "k16", "v16")); ASSERT_OK(Put(1, "k17", "v17")); Flush(1); ASSERT_OK(Put(1, "k1", "v1_")); ASSERT_OK(Put(1, "k2", "v2_")); ASSERT_OK(Put(1, "k5", "v5_")); ASSERT_OK(Put(1, "k9", "v9_")); ASSERT_OK(Put(1, "k10", "v10")); ASSERT_OK(Delete(1, "k2")); ASSERT_OK(Delete(1, "k6")); get_perf_context()->Reset(); std::vector keys({"k1", "k10", "k11", "k12", "k13", "k14", "k15", "k16", "k17", "k2", "k3", "k4", "k5", "k6", "k7", "k8", "k9", "no_key"}); std::vector values(keys.size()); std::vector cfs(keys.size(), handles_[1]); std::vector s(keys.size()); ReadOptions ro; ro.value_size_soft_limit = 20; db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), s.data(), false); ASSERT_EQ(values.size(), keys.size()); // In memory keys ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v1_"); ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v10"); ASSERT_TRUE(s[9].IsNotFound()); // k2 ASSERT_EQ(std::string(values[12].data(), values[12].size()), "v5_"); ASSERT_TRUE(s[13].IsNotFound()); // k6 ASSERT_EQ(std::string(values[16].data(), values[16].size()), "v9_"); // In sst files ASSERT_EQ(std::string(values[2].data(), values[1].size()), "v11"); ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v13"); ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v14"); // Remaining aborted after value_size exceeds. ASSERT_TRUE(s[3].IsAborted()); ASSERT_TRUE(s[6].IsAborted()); ASSERT_TRUE(s[7].IsAborted()); ASSERT_TRUE(s[8].IsAborted()); ASSERT_TRUE(s[10].IsAborted()); ASSERT_TRUE(s[11].IsAborted()); ASSERT_TRUE(s[14].IsAborted()); ASSERT_TRUE(s[15].IsAborted()); ASSERT_TRUE(s[17].IsAborted()); // 6 kv pairs * 3 bytes per value (i.e. 18) ASSERT_EQ(21, (int)get_perf_context()->multiget_read_bytes); SetPerfLevel(kDisable); } while (ChangeCompactOptions()); } TEST_F(DBBasicTest, MultiGetBatchedValueSizeMultiLevelMerge) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.merge_operator = MergeOperators::CreateStringAppendOperator(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); int num_keys = 0; for (int i = 0; i < 64; ++i) { ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } MoveFilesToLevel(2); for (int i = 0; i < 64; i += 3) { ASSERT_OK(Merge("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } MoveFilesToLevel(1); for (int i = 0; i < 64; i += 5) { ASSERT_OK(Merge("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); num_keys++; if (num_keys == 8) { Flush(); num_keys = 0; } } if (num_keys > 0) { Flush(); num_keys = 0; } ASSERT_EQ(0, num_keys); for (int i = 0; i < 64; i += 9) { ASSERT_OK( Merge("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); } std::vector keys_str; for (int i = 10; i < 50; ++i) { keys_str.push_back("key_" + std::to_string(i)); } std::vector keys(keys_str.size()); for (int i = 0; i < 40; i++) { keys[i] = Slice(keys_str[i]); } std::vector values(keys_str.size()); std::vector statuses(keys_str.size()); ReadOptions read_options; read_options.verify_checksums = true; read_options.value_size_soft_limit = 380; db_->MultiGet(read_options, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data()); ASSERT_EQ(values.size(), keys.size()); uint64_t curr_value_size = 0; for (unsigned int j = 0; j < 26; ++j) { int key = j + 10; std::string value; value.append("val_l2_" + std::to_string(key)); if (key % 3 == 0) { value.append(","); value.append("val_l1_" + std::to_string(key)); } if (key % 5 == 0) { value.append(","); value.append("val_l0_" + std::to_string(key)); } if (key % 9 == 0) { value.append(","); value.append("val_mem_" + std::to_string(key)); } curr_value_size += value.size(); ASSERT_EQ(values[j], value); ASSERT_OK(statuses[j]); } // ASSERT_TRUE(curr_value_size <= read_options.value_size_hard_limit); // All remaning keys status is set Status::Abort for (unsigned int j = 26; j < 40; j++) { ASSERT_TRUE(statuses[j].IsAborted()); } } // Test class for batched MultiGet with prefix extractor // Param bool - If true, use partitioned filters // If false, use full filter block class MultiGetPrefixExtractorTest : public DBBasicTest, public ::testing::WithParamInterface { }; TEST_P(MultiGetPrefixExtractorTest, Batched) { Options options = CurrentOptions(); options.prefix_extractor.reset(NewFixedPrefixTransform(2)); options.memtable_prefix_bloom_size_ratio = 10; BlockBasedTableOptions bbto; if (GetParam()) { bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; bbto.partition_filters = true; } bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; bbto.cache_index_and_filter_blocks = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); SetPerfLevel(kEnableCount); get_perf_context()->Reset(); // First key is not in the prefix_extractor domain ASSERT_OK(Put("k", "v0")); ASSERT_OK(Put("kk1", "v1")); ASSERT_OK(Put("kk2", "v2")); ASSERT_OK(Put("kk3", "v3")); ASSERT_OK(Put("kk4", "v4")); std::vector mem_keys( {"k", "kk1", "kk2", "kk3", "kk4", "rofl", "lmho"}); std::vector inmem_values; inmem_values = MultiGet(mem_keys, nullptr); ASSERT_EQ(inmem_values[0], "v0"); ASSERT_EQ(inmem_values[1], "v1"); ASSERT_EQ(inmem_values[2], "v2"); ASSERT_EQ(inmem_values[3], "v3"); ASSERT_EQ(inmem_values[4], "v4"); ASSERT_EQ(get_perf_context()->bloom_memtable_miss_count, 2); ASSERT_EQ(get_perf_context()->bloom_memtable_hit_count, 5); ASSERT_OK(Flush()); std::vector keys({"k", "kk1", "kk2", "kk3", "kk4"}); std::vector values; get_perf_context()->Reset(); values = MultiGet(keys, nullptr); ASSERT_EQ(values[0], "v0"); ASSERT_EQ(values[1], "v1"); ASSERT_EQ(values[2], "v2"); ASSERT_EQ(values[3], "v3"); ASSERT_EQ(values[4], "v4"); // Filter hits for 4 in-domain keys ASSERT_EQ(get_perf_context()->bloom_sst_hit_count, 4); } INSTANTIATE_TEST_CASE_P(MultiGetPrefix, MultiGetPrefixExtractorTest, ::testing::Bool()); #ifndef ROCKSDB_LITE class DBMultiGetRowCacheTest : public DBBasicTest, public ::testing::WithParamInterface {}; TEST_P(DBMultiGetRowCacheTest, MultiGetBatched) { do { option_config_ = kRowCache; Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); CreateAndReopenWithCF({"pikachu"}, options); SetPerfLevel(kEnableCount); ASSERT_OK(Put(1, "k1", "v1")); ASSERT_OK(Put(1, "k2", "v2")); ASSERT_OK(Put(1, "k3", "v3")); ASSERT_OK(Put(1, "k4", "v4")); Flush(1); ASSERT_OK(Put(1, "k5", "v5")); const Snapshot* snap1 = dbfull()->GetSnapshot(); ASSERT_OK(Delete(1, "k4")); Flush(1); const Snapshot* snap2 = dbfull()->GetSnapshot(); get_perf_context()->Reset(); std::vector keys({"no_key", "k5", "k4", "k3", "k1"}); std::vector values(keys.size()); std::vector cfs(keys.size(), handles_[1]); std::vector s(keys.size()); ReadOptions ro; bool use_snapshots = GetParam(); if (use_snapshots) { ro.snapshot = snap2; } db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), s.data(), false); ASSERT_EQ(values.size(), keys.size()); ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v1"); ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v3"); ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v5"); // four kv pairs * two bytes per value ASSERT_EQ(6, (int)get_perf_context()->multiget_read_bytes); ASSERT_TRUE(s[0].IsNotFound()); ASSERT_OK(s[1]); ASSERT_TRUE(s[2].IsNotFound()); ASSERT_OK(s[3]); ASSERT_OK(s[4]); // Call MultiGet() again with some intersection with the previous set of // keys. Those should already be in the row cache. keys.assign({"no_key", "k5", "k3", "k2"}); for (size_t i = 0; i < keys.size(); ++i) { values[i].Reset(); s[i] = Status::OK(); } get_perf_context()->Reset(); if (use_snapshots) { ro.snapshot = snap1; } db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(), values.data(), s.data(), false); ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v2"); ASSERT_EQ(std::string(values[2].data(), values[2].size()), "v3"); ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v5"); // four kv pairs * two bytes per value ASSERT_EQ(6, (int)get_perf_context()->multiget_read_bytes); ASSERT_TRUE(s[0].IsNotFound()); ASSERT_OK(s[1]); ASSERT_OK(s[2]); ASSERT_OK(s[3]); if (use_snapshots) { // Only reads from the first SST file would have been cached, since // snapshot seq no is > fd.largest_seqno ASSERT_EQ(1, TestGetTickerCount(options, ROW_CACHE_HIT)); } else { ASSERT_EQ(2, TestGetTickerCount(options, ROW_CACHE_HIT)); } SetPerfLevel(kDisable); dbfull()->ReleaseSnapshot(snap1); dbfull()->ReleaseSnapshot(snap2); } while (ChangeCompactOptions()); } INSTANTIATE_TEST_CASE_P(DBMultiGetRowCacheTest, DBMultiGetRowCacheTest, testing::Values(true, false)); TEST_F(DBBasicTest, GetAllKeyVersions) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_EQ(2, handles_.size()); const size_t kNumInserts = 4; const size_t kNumDeletes = 4; const size_t kNumUpdates = 4; // Check default column family for (size_t i = 0; i != kNumInserts; ++i) { ASSERT_OK(Put(std::to_string(i), "value")); } for (size_t i = 0; i != kNumUpdates; ++i) { ASSERT_OK(Put(std::to_string(i), "value1")); } for (size_t i = 0; i != kNumDeletes; ++i) { ASSERT_OK(Delete(std::to_string(i))); } std::vector key_versions; ASSERT_OK(ROCKSDB_NAMESPACE::GetAllKeyVersions( db_, Slice(), Slice(), std::numeric_limits::max(), &key_versions)); ASSERT_EQ(kNumInserts + kNumDeletes + kNumUpdates, key_versions.size()); ASSERT_OK(ROCKSDB_NAMESPACE::GetAllKeyVersions( db_, handles_[0], Slice(), Slice(), std::numeric_limits::max(), &key_versions)); ASSERT_EQ(kNumInserts + kNumDeletes + kNumUpdates, key_versions.size()); // Check non-default column family for (size_t i = 0; i + 1 != kNumInserts; ++i) { ASSERT_OK(Put(1, std::to_string(i), "value")); } for (size_t i = 0; i + 1 != kNumUpdates; ++i) { ASSERT_OK(Put(1, std::to_string(i), "value1")); } for (size_t i = 0; i + 1 != kNumDeletes; ++i) { ASSERT_OK(Delete(1, std::to_string(i))); } ASSERT_OK(ROCKSDB_NAMESPACE::GetAllKeyVersions( db_, handles_[1], Slice(), Slice(), std::numeric_limits::max(), &key_versions)); ASSERT_EQ(kNumInserts + kNumDeletes + kNumUpdates - 3, key_versions.size()); } #endif // !ROCKSDB_LITE TEST_F(DBBasicTest, MultiGetIOBufferOverrun) { Options options = CurrentOptions(); Random rnd(301); BlockBasedTableOptions table_options; table_options.pin_l0_filter_and_index_blocks_in_cache = true; table_options.block_size = 16 * 1024; ASSERT_TRUE(table_options.block_size > BlockBasedTable::kMultiGetReadStackBufSize); options.table_factory.reset(new BlockBasedTableFactory(table_options)); Reopen(options); std::string zero_str(128, '\0'); for (int i = 0; i < 100; ++i) { // Make the value compressible. A purely random string doesn't compress // and the resultant data block will not be compressed std::string value(RandomString(&rnd, 128) + zero_str); assert(Put(Key(i), value) == Status::OK()); } Flush(); std::vector key_data(10); std::vector keys; // We cannot resize a PinnableSlice vector, so just set initial size to // largest we think we will need std::vector values(10); std::vector statuses; ReadOptions ro; // Warm up the cache first key_data.emplace_back(Key(0)); keys.emplace_back(Slice(key_data.back())); key_data.emplace_back(Key(50)); keys.emplace_back(Slice(key_data.back())); statuses.resize(keys.size()); dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); } TEST_F(DBBasicTest, IncrementalRecoveryNoCorrupt) { Options options = CurrentOptions(); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu", "eevee"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); WriteOptions write_opts; write_opts.disableWAL = true; for (size_t cf = 0; cf != num_cfs; ++cf) { for (size_t i = 0; i != 10000; ++i) { std::string key_str = Key(static_cast(i)); std::string value_str = std::to_string(cf) + "_" + std::to_string(i); ASSERT_OK(Put(static_cast(cf), key_str, value_str)); if (0 == (i % 1000)) { ASSERT_OK(Flush(static_cast(cf))); } } } for (size_t cf = 0; cf != num_cfs; ++cf) { ASSERT_OK(Flush(static_cast(cf))); } Close(); options.best_efforts_recovery = true; ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, options); num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); for (size_t cf = 0; cf != num_cfs; ++cf) { for (int i = 0; i != 10000; ++i) { std::string key_str = Key(static_cast(i)); std::string expected_value_str = std::to_string(cf) + "_" + std::to_string(i); ASSERT_EQ(expected_value_str, Get(static_cast(cf), key_str)); } } } TEST_F(DBBasicTest, BestEffortsRecoveryWithVersionBuildingFailure) { Options options = CurrentOptions(); DestroyAndReopen(options); ASSERT_OK(Put("foo", "value")); ASSERT_OK(Flush()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { ASSERT_NE(nullptr, arg); *(reinterpret_cast(arg)) = Status::Corruption("Inject corruption"); }); SyncPoint::GetInstance()->EnableProcessing(); options.best_efforts_recovery = true; Status s = TryReopen(options); ASSERT_TRUE(s.IsCorruption()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } #ifndef ROCKSDB_LITE namespace { class TableFileListener : public EventListener { public: void OnTableFileCreated(const TableFileCreationInfo& info) override { InstrumentedMutexLock lock(&mutex_); cf_to_paths_[info.cf_name].push_back(info.file_path); } std::vector& GetFiles(const std::string& cf_name) { InstrumentedMutexLock lock(&mutex_); return cf_to_paths_[cf_name]; } private: InstrumentedMutex mutex_; std::unordered_map> cf_to_paths_; }; } // namespace TEST_F(DBBasicTest, RecoverWithMissingFiles) { Options options = CurrentOptions(); DestroyAndReopen(options); TableFileListener* listener = new TableFileListener(); // Disable auto compaction to simplify SST file name tracking. options.disable_auto_compactions = true; options.listeners.emplace_back(listener); CreateAndReopenWithCF({"pikachu", "eevee"}, options); std::vector all_cf_names = {kDefaultColumnFamilyName, "pikachu", "eevee"}; size_t num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); for (size_t cf = 0; cf != num_cfs; ++cf) { ASSERT_OK(Put(static_cast(cf), "a", "0_value")); ASSERT_OK(Flush(static_cast(cf))); ASSERT_OK(Put(static_cast(cf), "b", "0_value")); ASSERT_OK(Flush(static_cast(cf))); ASSERT_OK(Put(static_cast(cf), "c", "0_value")); ASSERT_OK(Flush(static_cast(cf))); } // Delete and corrupt files for (size_t i = 0; i < all_cf_names.size(); ++i) { std::vector& files = listener->GetFiles(all_cf_names[i]); ASSERT_EQ(3, files.size()); std::string corrupted_data; ASSERT_OK(ReadFileToString(env_, files[files.size() - 1], &corrupted_data)); ASSERT_OK(WriteStringToFile( env_, corrupted_data.substr(0, corrupted_data.size() - 2), files[files.size() - 1], /*should_sync=*/true)); for (int j = static_cast(files.size() - 2); j >= static_cast(i); --j) { ASSERT_OK(env_->DeleteFile(files[j])); } } options.best_efforts_recovery = true; ReopenWithColumnFamilies(all_cf_names, options); // Verify data ReadOptions read_opts; read_opts.total_order_seek = true; { std::unique_ptr iter(db_->NewIterator(read_opts, handles_[0])); iter->SeekToFirst(); ASSERT_FALSE(iter->Valid()); iter.reset(db_->NewIterator(read_opts, handles_[1])); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("a", iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); iter.reset(db_->NewIterator(read_opts, handles_[2])); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("a", iter->key()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("b", iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); } } TEST_F(DBBasicTest, BestEffortsRecoveryTryMultipleManifests) { Options options = CurrentOptions(); options.env = env_; DestroyAndReopen(options); ASSERT_OK(Put("foo", "value0")); ASSERT_OK(Flush()); Close(); { // Hack by adding a new MANIFEST with high file number std::string garbage(10, '\0'); ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/MANIFEST-001000", /*should_sync=*/true)); } { // Hack by adding a corrupted SST not referenced by any MANIFEST std::string garbage(10, '\0'); ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/001001.sst", /*should_sync=*/true)); } options.best_efforts_recovery = true; Reopen(options); ASSERT_OK(Put("bar", "value")); } TEST_F(DBBasicTest, RecoverWithNoCurrentFile) { Options options = CurrentOptions(); options.env = env_; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); options.best_efforts_recovery = true; ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); ASSERT_EQ(2, handles_.size()); ASSERT_OK(Put("foo", "value")); ASSERT_OK(Put(1, "bar", "value")); ASSERT_OK(Flush()); ASSERT_OK(Flush(1)); Close(); ASSERT_OK(env_->DeleteFile(CurrentFileName(dbname_))); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); std::vector cf_names; ASSERT_OK(DB::ListColumnFamilies(DBOptions(options), dbname_, &cf_names)); ASSERT_EQ(2, cf_names.size()); for (const auto& name : cf_names) { ASSERT_TRUE(name == kDefaultColumnFamilyName || name == "pikachu"); } } TEST_F(DBBasicTest, RecoverWithNoManifest) { Options options = CurrentOptions(); options.env = env_; DestroyAndReopen(options); ASSERT_OK(Put("foo", "value")); ASSERT_OK(Flush()); Close(); { // Delete all MANIFEST. std::vector files; ASSERT_OK(env_->GetChildren(dbname_, &files)); for (const auto& file : files) { uint64_t number = 0; FileType type = kLogFile; if (ParseFileName(file, &number, &type) && type == kDescriptorFile) { ASSERT_OK(env_->DeleteFile(dbname_ + "/" + file)); } } } options.best_efforts_recovery = true; options.create_if_missing = false; Status s = TryReopen(options); ASSERT_TRUE(s.IsInvalidArgument()); options.create_if_missing = true; Reopen(options); // Since no MANIFEST exists, best-efforts recovery creates a new, empty db. ASSERT_EQ("NOT_FOUND", Get("foo")); } TEST_F(DBBasicTest, SkipWALIfMissingTableFiles) { Options options = CurrentOptions(); DestroyAndReopen(options); TableFileListener* listener = new TableFileListener(); options.listeners.emplace_back(listener); CreateAndReopenWithCF({"pikachu"}, options); std::vector kAllCfNames = {kDefaultColumnFamilyName, "pikachu"}; size_t num_cfs = handles_.size(); ASSERT_EQ(2, num_cfs); for (int cf = 0; cf < static_cast(kAllCfNames.size()); ++cf) { ASSERT_OK(Put(cf, "a", "0_value")); ASSERT_OK(Flush(cf)); ASSERT_OK(Put(cf, "b", "0_value")); } // Delete files for (size_t i = 0; i < kAllCfNames.size(); ++i) { std::vector& files = listener->GetFiles(kAllCfNames[i]); ASSERT_EQ(1, files.size()); for (int j = static_cast(files.size() - 1); j >= static_cast(i); --j) { ASSERT_OK(env_->DeleteFile(files[j])); } } options.best_efforts_recovery = true; ReopenWithColumnFamilies(kAllCfNames, options); // Verify WAL is not applied ReadOptions read_opts; read_opts.total_order_seek = true; std::unique_ptr iter(db_->NewIterator(read_opts, handles_[0])); iter->SeekToFirst(); ASSERT_FALSE(iter->Valid()); iter.reset(db_->NewIterator(read_opts, handles_[1])); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("a", iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); } #endif // !ROCKSDB_LITE TEST_F(DBBasicTest, ManifestChecksumMismatch) { Options options = CurrentOptions(); DestroyAndReopen(options); ASSERT_OK(Put("bar", "value")); ASSERT_OK(Flush()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "LogWriter::EmitPhysicalRecord:BeforeEncodeChecksum", [&](void* arg) { auto* crc = reinterpret_cast(arg); *crc = *crc + 1; }); SyncPoint::GetInstance()->EnableProcessing(); WriteOptions write_opts; write_opts.disableWAL = true; Status s = db_->Put(write_opts, "foo", "value"); ASSERT_OK(s); ASSERT_OK(Flush()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); ASSERT_OK(Put("foo", "value1")); ASSERT_OK(Flush()); s = TryReopen(options); ASSERT_TRUE(s.IsCorruption()); } class DBBasicTestMultiGet : public DBTestBase { public: DBBasicTestMultiGet(std::string test_dir, int num_cfs, bool compressed_cache, bool uncompressed_cache, bool _compression_enabled, bool _fill_cache, uint32_t compression_parallel_threads) : DBTestBase(test_dir) { compression_enabled_ = _compression_enabled; fill_cache_ = _fill_cache; if (compressed_cache) { std::shared_ptr cache = NewLRUCache(1048576); compressed_cache_ = std::make_shared(cache); } if (uncompressed_cache) { std::shared_ptr cache = NewLRUCache(1048576); uncompressed_cache_ = std::make_shared(cache); } env_->count_random_reads_ = true; Options options = CurrentOptions(); Random rnd(301); BlockBasedTableOptions table_options; #ifndef ROCKSDB_LITE if (compression_enabled_) { std::vector compression_types; compression_types = GetSupportedCompressions(); // Not every platform may have compression libraries available, so // dynamically pick based on what's available CompressionType tmp_type = kNoCompression; for (auto c_type : compression_types) { if (c_type != kNoCompression) { tmp_type = c_type; break; } } if (tmp_type != kNoCompression) { options.compression = tmp_type; } else { compression_enabled_ = false; } } #else // GetSupportedCompressions() is not available in LITE build if (!Snappy_Supported()) { compression_enabled_ = false; } #endif // ROCKSDB_LITE table_options.block_cache = uncompressed_cache_; if (table_options.block_cache == nullptr) { table_options.no_block_cache = true; } else { table_options.pin_l0_filter_and_index_blocks_in_cache = true; } table_options.block_cache_compressed = compressed_cache_; table_options.flush_block_policy_factory.reset( new MyFlushBlockPolicyFactory()); options.table_factory.reset(new BlockBasedTableFactory(table_options)); if (!compression_enabled_) { options.compression = kNoCompression; } else { options.compression_opts.parallel_threads = compression_parallel_threads; } Reopen(options); if (num_cfs > 1) { for (int cf = 0; cf < num_cfs; ++cf) { cf_names_.emplace_back("cf" + std::to_string(cf)); } CreateColumnFamilies(cf_names_, options); cf_names_.emplace_back("default"); } std::string zero_str(128, '\0'); for (int cf = 0; cf < num_cfs; ++cf) { for (int i = 0; i < 100; ++i) { // Make the value compressible. A purely random string doesn't compress // and the resultant data block will not be compressed values_.emplace_back(RandomString(&rnd, 128) + zero_str); assert(((num_cfs == 1) ? Put(Key(i), values_[i]) : Put(cf, Key(i), values_[i])) == Status::OK()); } if (num_cfs == 1) { Flush(); } else { dbfull()->Flush(FlushOptions(), handles_[cf]); } for (int i = 0; i < 100; ++i) { // block cannot gain space by compression uncompressable_values_.emplace_back(RandomString(&rnd, 256) + '\0'); std::string tmp_key = "a" + Key(i); assert(((num_cfs == 1) ? Put(tmp_key, uncompressable_values_[i]) : Put(cf, tmp_key, uncompressable_values_[i])) == Status::OK()); } if (num_cfs == 1) { Flush(); } else { dbfull()->Flush(FlushOptions(), handles_[cf]); } } } bool CheckValue(int i, const std::string& value) { if (values_[i].compare(value) == 0) { return true; } return false; } bool CheckUncompressableValue(int i, const std::string& value) { if (uncompressable_values_[i].compare(value) == 0) { return true; } return false; } const std::vector& GetCFNames() const { return cf_names_; } int num_lookups() { return uncompressed_cache_->num_lookups(); } int num_found() { return uncompressed_cache_->num_found(); } int num_inserts() { return uncompressed_cache_->num_inserts(); } int num_lookups_compressed() { return compressed_cache_->num_lookups(); } int num_found_compressed() { return compressed_cache_->num_found(); } int num_inserts_compressed() { return compressed_cache_->num_inserts(); } bool fill_cache() { return fill_cache_; } bool compression_enabled() { return compression_enabled_; } bool has_compressed_cache() { return compressed_cache_ != nullptr; } bool has_uncompressed_cache() { return uncompressed_cache_ != nullptr; } static void SetUpTestCase() {} static void TearDownTestCase() {} protected: class MyFlushBlockPolicyFactory : public FlushBlockPolicyFactory { public: MyFlushBlockPolicyFactory() {} virtual const char* Name() const override { return "MyFlushBlockPolicyFactory"; } virtual FlushBlockPolicy* NewFlushBlockPolicy( const BlockBasedTableOptions& /*table_options*/, const BlockBuilder& data_block_builder) const override { return new MyFlushBlockPolicy(data_block_builder); } }; class MyFlushBlockPolicy : public FlushBlockPolicy { public: explicit MyFlushBlockPolicy(const BlockBuilder& data_block_builder) : num_keys_(0), data_block_builder_(data_block_builder) {} bool Update(const Slice& /*key*/, const Slice& /*value*/) override { if (data_block_builder_.empty()) { // First key in this block num_keys_ = 1; return false; } // Flush every 10 keys if (num_keys_ == 10) { num_keys_ = 1; return true; } num_keys_++; return false; } private: int num_keys_; const BlockBuilder& data_block_builder_; }; class MyBlockCache : public CacheWrapper { public: explicit MyBlockCache(std::shared_ptr target) : CacheWrapper(target), num_lookups_(0), num_found_(0), num_inserts_(0) {} const char* Name() const override { return "MyBlockCache"; } Status Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Handle** handle = nullptr, Priority priority = Priority::LOW) override { num_inserts_++; return target_->Insert(key, value, charge, deleter, handle, priority); } Handle* Lookup(const Slice& key, Statistics* stats = nullptr) override { num_lookups_++; Handle* handle = target_->Lookup(key, stats); if (handle != nullptr) { num_found_++; } return handle; } int num_lookups() { return num_lookups_; } int num_found() { return num_found_; } int num_inserts() { return num_inserts_; } private: int num_lookups_; int num_found_; int num_inserts_; }; std::shared_ptr compressed_cache_; std::shared_ptr uncompressed_cache_; bool compression_enabled_; std::vector values_; std::vector uncompressable_values_; bool fill_cache_; std::vector cf_names_; }; class DBBasicTestWithParallelIO : public DBBasicTestMultiGet, public testing::WithParamInterface< std::tuple> { public: DBBasicTestWithParallelIO() : DBBasicTestMultiGet("/db_basic_test_with_parallel_io", 1, std::get<0>(GetParam()), std::get<1>(GetParam()), std::get<2>(GetParam()), std::get<3>(GetParam()), std::get<4>(GetParam())) {} }; TEST_P(DBBasicTestWithParallelIO, MultiGet) { std::vector key_data(10); std::vector keys; // We cannot resize a PinnableSlice vector, so just set initial size to // largest we think we will need std::vector values(10); std::vector statuses; ReadOptions ro; ro.fill_cache = fill_cache(); // Warm up the cache first key_data.emplace_back(Key(0)); keys.emplace_back(Slice(key_data.back())); key_data.emplace_back(Key(50)); keys.emplace_back(Slice(key_data.back())); statuses.resize(keys.size()); dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); ASSERT_TRUE(CheckValue(0, values[0].ToString())); ASSERT_TRUE(CheckValue(50, values[1].ToString())); int random_reads = env_->random_read_counter_.Read(); key_data[0] = Key(1); key_data[1] = Key(51); keys[0] = Slice(key_data[0]); keys[1] = Slice(key_data[1]); values[0].Reset(); values[1].Reset(); dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); ASSERT_TRUE(CheckValue(1, values[0].ToString())); ASSERT_TRUE(CheckValue(51, values[1].ToString())); bool read_from_cache = false; if (fill_cache()) { if (has_uncompressed_cache()) { read_from_cache = true; } else if (has_compressed_cache() && compression_enabled()) { read_from_cache = true; } } int expected_reads = random_reads + (read_from_cache ? 0 : 2); ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); keys.resize(10); statuses.resize(10); std::vector key_ints{1, 2, 15, 16, 55, 81, 82, 83, 84, 85}; for (size_t i = 0; i < key_ints.size(); ++i) { key_data[i] = Key(key_ints[i]); keys[i] = Slice(key_data[i]); statuses[i] = Status::OK(); values[i].Reset(); } dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); for (size_t i = 0; i < key_ints.size(); ++i) { ASSERT_OK(statuses[i]); ASSERT_TRUE(CheckValue(key_ints[i], values[i].ToString())); } if (compression_enabled() && !has_compressed_cache()) { expected_reads += (read_from_cache ? 2 : 3); } else { expected_reads += (read_from_cache ? 2 : 4); } ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); keys.resize(10); statuses.resize(10); std::vector key_uncmp{1, 2, 15, 16, 55, 81, 82, 83, 84, 85}; for (size_t i = 0; i < key_uncmp.size(); ++i) { key_data[i] = "a" + Key(key_uncmp[i]); keys[i] = Slice(key_data[i]); statuses[i] = Status::OK(); values[i].Reset(); } dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); for (size_t i = 0; i < key_uncmp.size(); ++i) { ASSERT_OK(statuses[i]); ASSERT_TRUE(CheckUncompressableValue(key_uncmp[i], values[i].ToString())); } if (compression_enabled() && !has_compressed_cache()) { expected_reads += (read_from_cache ? 3 : 3); } else { expected_reads += (read_from_cache ? 4 : 4); } ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); keys.resize(5); statuses.resize(5); std::vector key_tr{1, 2, 15, 16, 55}; for (size_t i = 0; i < key_tr.size(); ++i) { key_data[i] = "a" + Key(key_tr[i]); keys[i] = Slice(key_data[i]); statuses[i] = Status::OK(); values[i].Reset(); } dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); for (size_t i = 0; i < key_tr.size(); ++i) { ASSERT_OK(statuses[i]); ASSERT_TRUE(CheckUncompressableValue(key_tr[i], values[i].ToString())); } if (compression_enabled() && !has_compressed_cache()) { expected_reads += (read_from_cache ? 0 : 2); ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); } else { if (has_uncompressed_cache()) { expected_reads += (read_from_cache ? 0 : 3); ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); } else { // A rare case, even we enable the block compression but some of data // blocks are not compressed due to content. If user only enable the // compressed cache, the uncompressed blocks will not tbe cached, and // block reads will be triggered. The number of reads is related to // the compression algorithm. ASSERT_TRUE(env_->random_read_counter_.Read() >= expected_reads); } } } TEST_P(DBBasicTestWithParallelIO, MultiGetWithChecksumMismatch) { std::vector key_data(10); std::vector keys; // We cannot resize a PinnableSlice vector, so just set initial size to // largest we think we will need std::vector values(10); std::vector statuses; int read_count = 0; ReadOptions ro; ro.fill_cache = fill_cache(); SyncPoint::GetInstance()->SetCallBack( "RetrieveMultipleBlocks:VerifyChecksum", [&](void* status) { Status* s = static_cast(status); read_count++; if (read_count == 2) { *s = Status::Corruption(); } }); SyncPoint::GetInstance()->EnableProcessing(); // Warm up the cache first key_data.emplace_back(Key(0)); keys.emplace_back(Slice(key_data.back())); key_data.emplace_back(Key(50)); keys.emplace_back(Slice(key_data.back())); statuses.resize(keys.size()); dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); ASSERT_TRUE(CheckValue(0, values[0].ToString())); // ASSERT_TRUE(CheckValue(50, values[1].ToString())); ASSERT_EQ(statuses[0], Status::OK()); ASSERT_EQ(statuses[1], Status::Corruption()); SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBBasicTestWithParallelIO, MultiGetWithMissingFile) { std::vector key_data(10); std::vector keys; // We cannot resize a PinnableSlice vector, so just set initial size to // largest we think we will need std::vector values(10); std::vector statuses; ReadOptions ro; ro.fill_cache = fill_cache(); SyncPoint::GetInstance()->SetCallBack( "TableCache::MultiGet:FindTable", [&](void* status) { Status* s = static_cast(status); *s = Status::IOError(); }); // DB open will create table readers unless we reduce the table cache // capacity. // SanitizeOptions will set max_open_files to minimum of 20. Table cache // is allocated with max_open_files - 10 as capacity. So override // max_open_files to 11 so table cache capacity will become 1. This will // prevent file open during DB open and force the file to be opened // during MultiGet SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { int* max_open_files = (int*)arg; *max_open_files = 11; }); SyncPoint::GetInstance()->EnableProcessing(); Reopen(CurrentOptions()); // Warm up the cache first key_data.emplace_back(Key(0)); keys.emplace_back(Slice(key_data.back())); key_data.emplace_back(Key(50)); keys.emplace_back(Slice(key_data.back())); statuses.resize(keys.size()); dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); ASSERT_EQ(statuses[0], Status::IOError()); ASSERT_EQ(statuses[1], Status::IOError()); SyncPoint::GetInstance()->DisableProcessing(); } INSTANTIATE_TEST_CASE_P(ParallelIO, DBBasicTestWithParallelIO, // Params are as follows - // Param 0 - Compressed cache enabled // Param 1 - Uncompressed cache enabled // Param 2 - Data compression enabled // Param 3 - ReadOptions::fill_cache // Param 4 - CompressionOptions::parallel_threads ::testing::Combine(::testing::Bool(), ::testing::Bool(), ::testing::Bool(), ::testing::Bool(), ::testing::Values(1, 4))); // A test class for intercepting random reads and injecting artificial // delays. Used for testing the deadline/timeout feature class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet { public: DBBasicTestMultiGetDeadline() : DBBasicTestMultiGet("db_basic_test_multiget_deadline" /*Test dir*/, 10 /*# of column families*/, false /*compressed cache enabled*/, true /*uncompressed cache enabled*/, true /*compression enabled*/, true /*ReadOptions.fill_cache*/, 1 /*# of parallel compression threads*/) {} // Forward declaration class DeadlineFS; class DeadlineRandomAccessFile : public FSRandomAccessFileWrapper { public: DeadlineRandomAccessFile(DeadlineFS& fs, SpecialEnv* env, std::unique_ptr& file) : FSRandomAccessFileWrapper(file.get()), fs_(fs), file_(std::move(file)), env_(env) {} IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) const override { int delay; const std::chrono::microseconds deadline = fs_.GetDeadline(); if (deadline.count()) { AssertDeadline(deadline, opts); } if (fs_.ShouldDelay(&delay)) { env_->SleepForMicroseconds(delay); } return FSRandomAccessFileWrapper::Read(offset, len, opts, result, scratch, dbg); } IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, const IOOptions& options, IODebugContext* dbg) override { int delay; const std::chrono::microseconds deadline = fs_.GetDeadline(); if (deadline.count()) { AssertDeadline(deadline, options); } if (fs_.ShouldDelay(&delay)) { env_->SleepForMicroseconds(delay); } return FSRandomAccessFileWrapper::MultiRead(reqs, num_reqs, options, dbg); } private: void AssertDeadline(const std::chrono::microseconds deadline, const IOOptions& opts) const { // Give a leeway of +- 10us as it can take some time for the Get/ // MultiGet call to reach here, in order to avoid false alarms std::chrono::microseconds now = std::chrono::microseconds(env_->NowMicros()); ASSERT_EQ(deadline - now, opts.timeout); } DeadlineFS& fs_; std::unique_ptr file_; SpecialEnv* env_; }; class DeadlineFS : public FileSystemWrapper { public: DeadlineFS(SpecialEnv* env) : FileSystemWrapper(FileSystem::Default()), delay_idx_(0), deadline_(std::chrono::microseconds::zero()), env_(env) {} ~DeadlineFS() = default; IOStatus NewRandomAccessFile(const std::string& fname, const FileOptions& opts, std::unique_ptr* result, IODebugContext* dbg) override { std::unique_ptr file; IOStatus s; s = target()->NewRandomAccessFile(fname, opts, &file, dbg); result->reset(new DeadlineRandomAccessFile(*this, env_, file)); return s; } // Set a vector of {IO counter, delay in microseconds} pairs that control // when to inject a delay and duration of the delay void SetDelaySequence(const std::chrono::microseconds deadline, const std::vector>&& seq) { int total_delay = 0; for (auto& seq_iter : seq) { // Ensure no individual delay is > 500ms ASSERT_LT(seq_iter.second, 500000); total_delay += seq_iter.second; } // ASSERT total delay is < 1s. This is mainly to keep the test from // timing out in CI test frameworks ASSERT_LT(total_delay, 1000000); delay_seq_ = seq; delay_idx_ = 0; io_count_ = 0; deadline_ = deadline; } // Increment the IO counter and return a delay in microseconds bool ShouldDelay(int* delay) { if (delay_idx_ < delay_seq_.size() && delay_seq_[delay_idx_].first == io_count_++) { *delay = delay_seq_[delay_idx_].second; delay_idx_++; return true; } return false; } const std::chrono::microseconds GetDeadline() { return deadline_; } private: std::vector> delay_seq_; size_t delay_idx_; int io_count_; std::chrono::microseconds deadline_; SpecialEnv* env_; }; inline void CheckStatus(std::vector& statuses, size_t num_ok) { for (size_t i = 0; i < statuses.size(); ++i) { if (i < num_ok) { EXPECT_OK(statuses[i]); } else { EXPECT_EQ(statuses[i], Status::TimedOut()); } } } }; TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) { std::shared_ptr fs( new DBBasicTestMultiGetDeadline::DeadlineFS(env_)); std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); Options options = CurrentOptions(); env_->SetTimeElapseOnlySleep(&options); std::shared_ptr cache = NewLRUCache(1048576); BlockBasedTableOptions table_options; table_options.block_cache = cache; options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.env = env.get(); ReopenWithColumnFamilies(GetCFNames(), options); // Test the non-batched version of MultiGet with multiple column // families std::vector key_str; size_t i; for (i = 0; i < 5; ++i) { key_str.emplace_back(Key(static_cast(i))); } std::vector cfs(key_str.size()); ; std::vector keys(key_str.size()); std::vector values(key_str.size()); for (i = 0; i < key_str.size(); ++i) { cfs[i] = handles_[i]; keys[i] = Slice(key_str[i].data(), key_str[i].size()); } ReadOptions ro; ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; // Delay the first IO by 200ms fs->SetDelaySequence(ro.deadline, {{0, 20000}}); std::vector statuses = dbfull()->MultiGet(ro, cfs, keys, &values); // The first key is successful because we check after the lookup, but // subsequent keys fail due to deadline exceeded CheckStatus(statuses, 1); // Clear the cache cache->SetCapacity(0); cache->SetCapacity(1048576); // Test non-batched Multiget with multiple column families and // introducing an IO delay in one of the middle CFs key_str.clear(); for (i = 0; i < 10; ++i) { key_str.emplace_back(Key(static_cast(i))); } cfs.resize(key_str.size()); keys.resize(key_str.size()); values.resize(key_str.size()); for (i = 0; i < key_str.size(); ++i) { // 2 keys per CF cfs[i] = handles_[i / 2]; keys[i] = Slice(key_str[i].data(), key_str[i].size()); } ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; fs->SetDelaySequence(ro.deadline, {{1, 20000}}); statuses = dbfull()->MultiGet(ro, cfs, keys, &values); CheckStatus(statuses, 3); // Test batched MultiGet with an IO delay in the first data block read. // Both keys in the first CF should succeed as they're in the same data // block and would form one batch, and we check for deadline between // batches. std::vector pin_values(keys.size()); cache->SetCapacity(0); cache->SetCapacity(1048576); statuses.clear(); statuses.resize(keys.size()); ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; fs->SetDelaySequence(ro.deadline, {{0, 20000}}); dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), pin_values.data(), statuses.data()); CheckStatus(statuses, 2); // Similar to the previous one, but an IO delay in the third CF data block // read for (PinnableSlice& value : pin_values) { value.Reset(); } cache->SetCapacity(0); cache->SetCapacity(1048576); statuses.clear(); statuses.resize(keys.size()); ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; fs->SetDelaySequence(ro.deadline, {{2, 20000}}); dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), pin_values.data(), statuses.data()); CheckStatus(statuses, 6); // Similar to the previous one, but an IO delay in the last but one CF for (PinnableSlice& value : pin_values) { value.Reset(); } cache->SetCapacity(0); cache->SetCapacity(1048576); statuses.clear(); statuses.resize(keys.size()); ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; fs->SetDelaySequence(ro.deadline, {{3, 20000}}); dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), pin_values.data(), statuses.data()); CheckStatus(statuses, 8); // Test batched MultiGet with single CF and lots of keys. Inject delay // into the second batch of keys. As each batch is 32, the first 64 keys, // i.e first two batches, should succeed and the rest should time out for (PinnableSlice& value : pin_values) { value.Reset(); } cache->SetCapacity(0); cache->SetCapacity(1048576); key_str.clear(); for (i = 0; i < 100; ++i) { key_str.emplace_back(Key(static_cast(i))); } keys.resize(key_str.size()); pin_values.clear(); pin_values.resize(key_str.size()); for (i = 0; i < key_str.size(); ++i) { keys[i] = Slice(key_str[i].data(), key_str[i].size()); } statuses.clear(); statuses.resize(keys.size()); ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; fs->SetDelaySequence(ro.deadline, {{1, 20000}}); dbfull()->MultiGet(ro, handles_[0], keys.size(), keys.data(), pin_values.data(), statuses.data()); CheckStatus(statuses, 64); Close(); } TEST_F(DBBasicTest, ManifestWriteFailure) { Options options = GetDefaultOptions(); options.create_if_missing = true; options.disable_auto_compactions = true; options.env = env_; DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "VersionSet::ProcessManifestWrites:AfterSyncManifest", [&](void* arg) { ASSERT_NE(nullptr, arg); auto* s = reinterpret_cast(arg); ASSERT_OK(*s); // Manually overwrite return status *s = Status::IOError(); }); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("key", "value")); ASSERT_NOK(Flush()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); } } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_block_cache_test.cc000066400000000000000000001040431370372246700200640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include "cache/lru_cache.h" #include "db/db_test_util.h" #include "port/stack_trace.h" #include "util/compression.h" namespace ROCKSDB_NAMESPACE { class DBBlockCacheTest : public DBTestBase { private: size_t miss_count_ = 0; size_t hit_count_ = 0; size_t insert_count_ = 0; size_t failure_count_ = 0; size_t compression_dict_miss_count_ = 0; size_t compression_dict_hit_count_ = 0; size_t compression_dict_insert_count_ = 0; size_t compressed_miss_count_ = 0; size_t compressed_hit_count_ = 0; size_t compressed_insert_count_ = 0; size_t compressed_failure_count_ = 0; public: const size_t kNumBlocks = 10; const size_t kValueSize = 100; DBBlockCacheTest() : DBTestBase("/db_block_cache_test") {} BlockBasedTableOptions GetTableOptions() { BlockBasedTableOptions table_options; // Set a small enough block size so that each key-value get its own block. table_options.block_size = 1; return table_options; } Options GetOptions(const BlockBasedTableOptions& table_options) { Options options = CurrentOptions(); options.create_if_missing = true; options.avoid_flush_during_recovery = false; // options.compression = kNoCompression; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.table_factory.reset(new BlockBasedTableFactory(table_options)); return options; } void InitTable(const Options& /*options*/) { std::string value(kValueSize, 'a'); for (size_t i = 0; i < kNumBlocks; i++) { ASSERT_OK(Put(ToString(i), value.c_str())); } } void RecordCacheCounters(const Options& options) { miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS); hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT); insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD); failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES); compressed_miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS); compressed_hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT); compressed_insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD); compressed_failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); } void RecordCacheCountersForCompressionDict(const Options& options) { compression_dict_miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS); compression_dict_hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT); compression_dict_insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD); } void CheckCacheCounters(const Options& options, size_t expected_misses, size_t expected_hits, size_t expected_inserts, size_t expected_failures) { size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS); size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT); size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD); size_t new_failure_count = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES); ASSERT_EQ(miss_count_ + expected_misses, new_miss_count); ASSERT_EQ(hit_count_ + expected_hits, new_hit_count); ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count); ASSERT_EQ(failure_count_ + expected_failures, new_failure_count); miss_count_ = new_miss_count; hit_count_ = new_hit_count; insert_count_ = new_insert_count; failure_count_ = new_failure_count; } void CheckCacheCountersForCompressionDict( const Options& options, size_t expected_compression_dict_misses, size_t expected_compression_dict_hits, size_t expected_compression_dict_inserts) { size_t new_compression_dict_miss_count = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS); size_t new_compression_dict_hit_count = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT); size_t new_compression_dict_insert_count = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD); ASSERT_EQ(compression_dict_miss_count_ + expected_compression_dict_misses, new_compression_dict_miss_count); ASSERT_EQ(compression_dict_hit_count_ + expected_compression_dict_hits, new_compression_dict_hit_count); ASSERT_EQ( compression_dict_insert_count_ + expected_compression_dict_inserts, new_compression_dict_insert_count); compression_dict_miss_count_ = new_compression_dict_miss_count; compression_dict_hit_count_ = new_compression_dict_hit_count; compression_dict_insert_count_ = new_compression_dict_insert_count; } void CheckCompressedCacheCounters(const Options& options, size_t expected_misses, size_t expected_hits, size_t expected_inserts, size_t expected_failures) { size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS); size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT); size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD); size_t new_failure_count = TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); ASSERT_EQ(compressed_miss_count_ + expected_misses, new_miss_count); ASSERT_EQ(compressed_hit_count_ + expected_hits, new_hit_count); ASSERT_EQ(compressed_insert_count_ + expected_inserts, new_insert_count); ASSERT_EQ(compressed_failure_count_ + expected_failures, new_failure_count); compressed_miss_count_ = new_miss_count; compressed_hit_count_ = new_hit_count; compressed_insert_count_ = new_insert_count; compressed_failure_count_ = new_failure_count; } }; TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) { ReadOptions read_options; read_options.fill_cache = false; auto table_options = GetTableOptions(); auto options = GetOptions(table_options); InitTable(options); std::shared_ptr cache = NewLRUCache(0, 0, false); table_options.block_cache = cache; options.table_factory.reset(new BlockBasedTableFactory(table_options)); Reopen(options); RecordCacheCounters(options); std::vector> iterators(kNumBlocks - 1); Iterator* iter = nullptr; ASSERT_EQ(0, cache->GetUsage()); iter = db_->NewIterator(read_options); iter->Seek(ToString(0)); ASSERT_LT(0, cache->GetUsage()); delete iter; iter = nullptr; ASSERT_EQ(0, cache->GetUsage()); } TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) { ReadOptions read_options; auto table_options = GetTableOptions(); auto options = GetOptions(table_options); InitTable(options); std::shared_ptr cache = NewLRUCache(0, 0, false); table_options.block_cache = cache; options.table_factory.reset(new BlockBasedTableFactory(table_options)); Reopen(options); RecordCacheCounters(options); std::vector> iterators(kNumBlocks - 1); Iterator* iter = nullptr; // Load blocks into cache. for (size_t i = 0; i + 1 < kNumBlocks; i++) { iter = db_->NewIterator(read_options); iter->Seek(ToString(i)); ASSERT_OK(iter->status()); CheckCacheCounters(options, 1, 0, 1, 0); iterators[i].reset(iter); } size_t usage = cache->GetUsage(); ASSERT_LT(0, usage); cache->SetCapacity(usage); ASSERT_EQ(usage, cache->GetPinnedUsage()); // Test with strict capacity limit. cache->SetStrictCapacityLimit(true); iter = db_->NewIterator(read_options); iter->Seek(ToString(kNumBlocks - 1)); ASSERT_TRUE(iter->status().IsIncomplete()); CheckCacheCounters(options, 1, 0, 0, 1); delete iter; iter = nullptr; // Release iterators and access cache again. for (size_t i = 0; i + 1 < kNumBlocks; i++) { iterators[i].reset(); CheckCacheCounters(options, 0, 0, 0, 0); } ASSERT_EQ(0, cache->GetPinnedUsage()); for (size_t i = 0; i + 1 < kNumBlocks; i++) { iter = db_->NewIterator(read_options); iter->Seek(ToString(i)); ASSERT_OK(iter->status()); CheckCacheCounters(options, 0, 1, 0, 0); iterators[i].reset(iter); } } #ifdef SNAPPY TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) { ReadOptions read_options; auto table_options = GetTableOptions(); auto options = GetOptions(table_options); options.compression = CompressionType::kSnappyCompression; InitTable(options); std::shared_ptr cache = NewLRUCache(0, 0, false); std::shared_ptr compressed_cache = NewLRUCache(1 << 25, 0, false); table_options.block_cache = cache; table_options.block_cache_compressed = compressed_cache; options.table_factory.reset(new BlockBasedTableFactory(table_options)); Reopen(options); RecordCacheCounters(options); std::vector> iterators(kNumBlocks - 1); Iterator* iter = nullptr; // Load blocks into cache. for (size_t i = 0; i + 1 < kNumBlocks; i++) { iter = db_->NewIterator(read_options); iter->Seek(ToString(i)); ASSERT_OK(iter->status()); CheckCacheCounters(options, 1, 0, 1, 0); CheckCompressedCacheCounters(options, 1, 0, 1, 0); iterators[i].reset(iter); } size_t usage = cache->GetUsage(); ASSERT_LT(0, usage); ASSERT_EQ(usage, cache->GetPinnedUsage()); size_t compressed_usage = compressed_cache->GetUsage(); ASSERT_LT(0, compressed_usage); // Compressed block cache cannot be pinned. ASSERT_EQ(0, compressed_cache->GetPinnedUsage()); // Set strict capacity limit flag. Now block will only load into compressed // block cache. cache->SetCapacity(usage); cache->SetStrictCapacityLimit(true); ASSERT_EQ(usage, cache->GetPinnedUsage()); iter = db_->NewIterator(read_options); iter->Seek(ToString(kNumBlocks - 1)); ASSERT_TRUE(iter->status().IsIncomplete()); CheckCacheCounters(options, 1, 0, 0, 1); CheckCompressedCacheCounters(options, 1, 0, 1, 0); delete iter; iter = nullptr; // Clear strict capacity limit flag. This time we shall hit compressed block // cache. cache->SetStrictCapacityLimit(false); iter = db_->NewIterator(read_options); iter->Seek(ToString(kNumBlocks - 1)); ASSERT_OK(iter->status()); CheckCacheCounters(options, 1, 0, 1, 0); CheckCompressedCacheCounters(options, 0, 1, 0, 0); delete iter; iter = nullptr; } #endif // SNAPPY #ifndef ROCKSDB_LITE // Make sure that when options.block_cache is set, after a new table is // created its index/filter blocks are added to block cache. TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(NewBloomFilterPolicy(20)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "key", "val")); // Create a new table. ASSERT_OK(Flush(1)); // index/filter blocks added to block cache right after table creation. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(2, /* only index/filter were added */ TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); uint64_t int_num; ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_EQ(int_num, 0U); // Make sure filter block is in cache. std::string value; ReadOptions ropt; db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); // Miss count should remain the same. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); // Make sure index block is in cache. auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT); value = Get(1, "key"); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(index_block_hit + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); value = Get(1, "key"); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(index_block_hit + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } // With fill_cache = false, fills up the cache, then iterates over the entire // db, verify dummy entries inserted in `BlockBasedTable::NewDataBlockIterator` // does not cause heap-use-after-free errors in COMPILE_WITH_ASAN=1 runs TEST_F(DBBlockCacheTest, FillCacheAndIterateDB) { ReadOptions read_options; read_options.fill_cache = false; auto table_options = GetTableOptions(); auto options = GetOptions(table_options); InitTable(options); std::shared_ptr cache = NewLRUCache(10, 0, true); table_options.block_cache = cache; options.table_factory.reset(new BlockBasedTableFactory(table_options)); Reopen(options); ASSERT_OK(Put("key1", "val1")); ASSERT_OK(Put("key2", "val2")); ASSERT_OK(Flush()); ASSERT_OK(Put("key3", "val3")); ASSERT_OK(Put("key4", "val4")); ASSERT_OK(Flush()); ASSERT_OK(Put("key5", "val5")); ASSERT_OK(Put("key6", "val6")); ASSERT_OK(Flush()); Iterator* iter = nullptr; iter = db_->NewIterator(read_options); iter->Seek(ToString(0)); while (iter->Valid()) { iter->Next(); } delete iter; iter = nullptr; } TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; LRUCacheOptions co; // 500 bytes are enough to hold the first two blocks co.capacity = 500; co.num_shard_bits = 0; co.strict_capacity_limit = false; co.metadata_charge_policy = kDontChargeCacheMetadata; std::shared_ptr cache = NewLRUCache(co); table_options.block_cache = cache; table_options.filter_policy.reset(NewBloomFilterPolicy(20, true)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "longer_key", "val")); // Create a new table ASSERT_OK(Flush(1)); size_t index_bytes_insert = TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT); size_t filter_bytes_insert = TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT); ASSERT_GT(index_bytes_insert, 0); ASSERT_GT(filter_bytes_insert, 0); ASSERT_EQ(cache->GetUsage(), index_bytes_insert + filter_bytes_insert); // set the cache capacity to the current usage cache->SetCapacity(index_bytes_insert + filter_bytes_insert); // The index and filter eviction statistics were broken by the refactoring // that moved the readers out of the block cache. Disabling these until we can // bring the stats back. // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT), 0); // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT), 0); // Note that the second key needs to be no longer than the first one. // Otherwise the second index block may not fit in cache. ASSERT_OK(Put(1, "key", "val")); // Create a new table ASSERT_OK(Flush(1)); // cache evicted old index and block entries ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT), index_bytes_insert); ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT), filter_bytes_insert); // The index and filter eviction statistics were broken by the refactoring // that moved the readers out of the block cache. Disabling these until we can // bring the stats back. // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT), // index_bytes_insert); // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT), // filter_bytes_insert); } namespace { // A mock cache wraps LRUCache, and record how many entries have been // inserted for each priority. class MockCache : public LRUCache { public: static uint32_t high_pri_insert_count; static uint32_t low_pri_insert_count; MockCache() : LRUCache((size_t)1 << 25 /*capacity*/, 0 /*num_shard_bits*/, false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/) { } Status Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Handle** handle, Priority priority) override { if (priority == Priority::LOW) { low_pri_insert_count++; } else { high_pri_insert_count++; } return LRUCache::Insert(key, value, charge, deleter, handle, priority); } }; uint32_t MockCache::high_pri_insert_count = 0; uint32_t MockCache::low_pri_insert_count = 0; } // anonymous namespace TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) { for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.block_cache.reset(new MockCache()); table_options.filter_policy.reset(NewBloomFilterPolicy(20)); table_options.cache_index_and_filter_blocks_with_high_priority = priority == Cache::Priority::HIGH ? true : false; options.table_factory.reset(new BlockBasedTableFactory(table_options)); DestroyAndReopen(options); MockCache::high_pri_insert_count = 0; MockCache::low_pri_insert_count = 0; // Create a new table. ASSERT_OK(Put("foo", "value")); ASSERT_OK(Put("bar", "value")); ASSERT_OK(Flush()); ASSERT_EQ(1, NumTableFilesAtLevel(0)); // index/filter blocks added to block cache right after table creation. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(2, /* only index/filter were added */ TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); if (priority == Cache::Priority::LOW) { ASSERT_EQ(0u, MockCache::high_pri_insert_count); ASSERT_EQ(2u, MockCache::low_pri_insert_count); } else { ASSERT_EQ(2u, MockCache::high_pri_insert_count); ASSERT_EQ(0u, MockCache::low_pri_insert_count); } // Access data block. ASSERT_EQ("value", Get("foo")); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(3, /*adding data block*/ TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); // Data block should be inserted with low priority. if (priority == Cache::Priority::LOW) { ASSERT_EQ(0u, MockCache::high_pri_insert_count); ASSERT_EQ(3u, MockCache::low_pri_insert_count); } else { ASSERT_EQ(2u, MockCache::high_pri_insert_count); ASSERT_EQ(1u, MockCache::low_pri_insert_count); } } } namespace { // An LRUCache wrapper that can falsely report "not found" on Lookup. // This allows us to manipulate BlockBasedTableReader into thinking // another thread inserted the data in between Lookup and Insert, // while mostly preserving the LRUCache interface/behavior. class LookupLiarCache : public CacheWrapper { int nth_lookup_not_found_ = 0; public: explicit LookupLiarCache(std::shared_ptr target) : CacheWrapper(std::move(target)) {} Handle* Lookup(const Slice& key, Statistics* stats) override { if (nth_lookup_not_found_ == 1) { nth_lookup_not_found_ = 0; return nullptr; } if (nth_lookup_not_found_ > 1) { --nth_lookup_not_found_; } return CacheWrapper::Lookup(key, stats); } // 1 == next lookup, 2 == after next, etc. void SetNthLookupNotFound(int n) { nth_lookup_not_found_ = n; } }; } // anonymous namespace TEST_F(DBBlockCacheTest, AddRedundantStats) { const size_t capacity = size_t{1} << 25; const int num_shard_bits = 0; // 1 shard int iterations_tested = 0; for (std::shared_ptr base_cache : {NewLRUCache(capacity, num_shard_bits), NewClockCache(capacity, num_shard_bits)}) { if (!base_cache) { // Skip clock cache when not supported continue; } ++iterations_tested; Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); std::shared_ptr cache = std::make_shared(base_cache); BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.block_cache = cache; table_options.filter_policy.reset(NewBloomFilterPolicy(50)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); DestroyAndReopen(options); // Create a new table. ASSERT_OK(Put("foo", "value")); ASSERT_OK(Put("bar", "value")); ASSERT_OK(Flush()); ASSERT_EQ(1, NumTableFilesAtLevel(0)); // Normal access filter+index+data. ASSERT_EQ("value", Get("foo")); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); // -------- ASSERT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); // -------- ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); // Againt access filter+index+data, but force redundant load+insert on index cache->SetNthLookupNotFound(2); ASSERT_EQ("value", Get("bar")); ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); // -------- ASSERT_EQ(4, TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); // -------- ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); // Access just filter (with high probability), and force redundant // load+insert cache->SetNthLookupNotFound(1); ASSERT_EQ("NOT_FOUND", Get("this key was not added")); EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); // -------- EXPECT_EQ(5, TestGetTickerCount(options, BLOCK_CACHE_ADD)); EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); EXPECT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); // -------- EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); // Access just data, forcing redundant load+insert ReadOptions read_options; std::unique_ptr iter{db_->NewIterator(read_options)}; cache->SetNthLookupNotFound(1); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key(), "bar"); EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); // -------- EXPECT_EQ(6, TestGetTickerCount(options, BLOCK_CACHE_ADD)); EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); // -------- EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); } EXPECT_GE(iterations_tested, 1); } TEST_F(DBBlockCacheTest, ParanoidFileChecks) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.level0_file_num_compaction_trigger = 2; options.paranoid_file_checks = true; BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = false; table_options.filter_policy.reset(NewBloomFilterPolicy(20)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "1_key", "val")); ASSERT_OK(Put(1, "9_key", "val")); // Create a new table. ASSERT_OK(Flush(1)); ASSERT_EQ(1, /* read and cache data block */ TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_OK(Put(1, "1_key2", "val2")); ASSERT_OK(Put(1, "9_key2", "val2")); // Create a new SST file. This will further trigger a compaction // and generate another file. ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(3, /* Totally 3 files created up to now */ TestGetTickerCount(options, BLOCK_CACHE_ADD)); // After disabling options.paranoid_file_checks. NO further block // is added after generating a new file. ASSERT_OK( dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}})); ASSERT_OK(Put(1, "1_key3", "val3")); ASSERT_OK(Put(1, "9_key3", "val3")); ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "1_key4", "val4")); ASSERT_OK(Put(1, "9_key4", "val4")); ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(3, /* Totally 3 files created up to now */ TestGetTickerCount(options, BLOCK_CACHE_ADD)); } TEST_F(DBBlockCacheTest, CompressedCache) { if (!Snappy_Supported()) { return; } int num_iter = 80; // Run this test three iterations. // Iteration 1: only a uncompressed block cache // Iteration 2: only a compressed block cache // Iteration 3: both block cache and compressed cache // Iteration 4: both block cache and compressed cache, but DB is not // compressed for (int iter = 0; iter < 4; iter++) { Options options = CurrentOptions(); options.write_buffer_size = 64 * 1024; // small write buffer options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; switch (iter) { case 0: // only uncompressed block cache table_options.block_cache = NewLRUCache(8 * 1024); table_options.block_cache_compressed = nullptr; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); break; case 1: // no block cache, only compressed cache table_options.no_block_cache = true; table_options.block_cache = nullptr; table_options.block_cache_compressed = NewLRUCache(8 * 1024); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); break; case 2: // both compressed and uncompressed block cache table_options.block_cache = NewLRUCache(1024); table_options.block_cache_compressed = NewLRUCache(8 * 1024); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); break; case 3: // both block cache and compressed cache, but DB is not compressed // also, make block cache sizes bigger, to trigger block cache hits table_options.block_cache = NewLRUCache(1024 * 1024); table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.compression = kNoCompression; break; default: FAIL(); } CreateAndReopenWithCF({"pikachu"}, options); // default column family doesn't have block cache Options no_block_cache_opts; no_block_cache_opts.statistics = options.statistics; no_block_cache_opts = CurrentOptions(no_block_cache_opts); BlockBasedTableOptions table_options_no_bc; table_options_no_bc.no_block_cache = true; no_block_cache_opts.table_factory.reset( NewBlockBasedTableFactory(table_options_no_bc)); ReopenWithColumnFamilies( {"default", "pikachu"}, std::vector({no_block_cache_opts, options})); Random rnd(301); // Write 8MB (80 values, each 100K) ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); std::vector values; std::string str; for (int i = 0; i < num_iter; i++) { if (i % 4 == 0) { // high compression ratio str = RandomString(&rnd, 1000); } values.push_back(str); ASSERT_OK(Put(1, Key(i), values[i])); } // flush all data from memtable so that reads are from block cache ASSERT_OK(Flush(1)); for (int i = 0; i < num_iter; i++) { ASSERT_EQ(Get(1, Key(i)), values[i]); } // check that we triggered the appropriate code paths in the cache switch (iter) { case 0: // only uncompressed block cache ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); break; case 1: // no block cache, only compressed cache ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); break; case 2: // both compressed and uncompressed block cache ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); break; case 3: // both compressed and uncompressed block cache ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0); ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0); ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0); // compressed doesn't have any hits since blocks are not compressed on // storage ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0); break; default: FAIL(); } options.create_if_missing = true; DestroyAndReopen(options); } } TEST_F(DBBlockCacheTest, CacheCompressionDict) { const int kNumFiles = 4; const int kNumEntriesPerFile = 128; const int kNumBytesPerEntry = 1024; // Try all the available libraries that support dictionary compression std::vector compression_types; if (Zlib_Supported()) { compression_types.push_back(kZlibCompression); } if (LZ4_Supported()) { compression_types.push_back(kLZ4Compression); compression_types.push_back(kLZ4HCCompression); } if (ZSTD_Supported()) { compression_types.push_back(kZSTD); } else if (ZSTDNotFinal_Supported()) { compression_types.push_back(kZSTDNotFinalCompression); } Random rnd(301); for (auto compression_type : compression_types) { Options options = CurrentOptions(); options.compression = compression_type; options.compression_opts.max_dict_bytes = 4096; options.create_if_missing = true; options.num_levels = 2; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry; BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.block_cache.reset(new MockCache()); options.table_factory.reset(new BlockBasedTableFactory(table_options)); DestroyAndReopen(options); RecordCacheCountersForCompressionDict(options); for (int i = 0; i < kNumFiles; ++i) { ASSERT_EQ(i, NumTableFilesAtLevel(0, 0)); for (int j = 0; j < kNumEntriesPerFile; ++j) { std::string value = RandomString(&rnd, kNumBytesPerEntry); ASSERT_OK(Put(Key(j * kNumFiles + i), value.c_str())); } ASSERT_OK(Flush()); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(1)); // Compression dictionary blocks are preloaded. CheckCacheCountersForCompressionDict( options, kNumFiles /* expected_compression_dict_misses */, 0 /* expected_compression_dict_hits */, kNumFiles /* expected_compression_dict_inserts */); // Seek to a key in a file. It should cause the SST's dictionary meta-block // to be read. RecordCacheCounters(options); RecordCacheCountersForCompressionDict(options); ReadOptions read_options; ASSERT_NE("NOT_FOUND", Get(Key(kNumFiles * kNumEntriesPerFile - 1))); // Two block hits: index and dictionary since they are prefetched // One block missed/added: data block CheckCacheCounters(options, 1 /* expected_misses */, 2 /* expected_hits */, 1 /* expected_inserts */, 0 /* expected_failures */); CheckCacheCountersForCompressionDict( options, 0 /* expected_compression_dict_misses */, 1 /* expected_compression_dict_hits */, 0 /* expected_compression_dict_inserts */); } } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_bloom_filter_test.cc000066400000000000000000002373211370372246700203320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "options/options_helper.h" #include "port/stack_trace.h" #include "rocksdb/perf_context.h" #include "table/block_based/filter_policy_internal.h" namespace ROCKSDB_NAMESPACE { namespace { using BFP = BloomFilterPolicy; } // namespace // DB tests related to bloom filter. class DBBloomFilterTest : public DBTestBase { public: DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {} }; class DBBloomFilterTestWithParam : public DBTestBase, public testing::WithParamInterface< std::tuple> { // public testing::WithParamInterface { protected: BFP::Mode bfp_impl_; bool partition_filters_; uint32_t format_version_; public: DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {} ~DBBloomFilterTestWithParam() override {} void SetUp() override { bfp_impl_ = std::get<0>(GetParam()); partition_filters_ = std::get<1>(GetParam()); format_version_ = std::get<2>(GetParam()); } }; class DBBloomFilterTestDefFormatVersion : public DBBloomFilterTestWithParam {}; class SliceTransformLimitedDomainGeneric : public SliceTransform { const char* Name() const override { return "SliceTransformLimitedDomainGeneric"; } Slice Transform(const Slice& src) const override { return Slice(src.data(), 5); } bool InDomain(const Slice& src) const override { // prefix will be x???? return src.size() >= 5; } bool InRange(const Slice& dst) const override { // prefix will be x???? return dst.size() == 5; } }; // KeyMayExist can lead to a few false positives, but not false negatives. // To make test deterministic, use a much larger number of bits per key-20 than // bits in the key, so that false positives are eliminated TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { do { ReadOptions ropts; std::string value; anon::OptionsOverride options_override; options_override.filter_policy.reset(new BFP(20, bfp_impl_)); options_override.partition_filters = partition_filters_; options_override.metadata_block_size = 32; Options options = CurrentOptions(options_override); if (partition_filters_ && static_cast( options.table_factory->GetOptions()) ->index_type != BlockBasedTableOptions::kTwoLevelIndexSearch) { // In the current implementation partitioned filters depend on partitioned // indexes continue; } options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); ASSERT_OK(Put(1, "a", "b")); bool value_found = false; ASSERT_TRUE( db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); ASSERT_TRUE(value_found); ASSERT_EQ("b", value); ASSERT_OK(Flush(1)); value.clear(); uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); ASSERT_TRUE( db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); ASSERT_TRUE(!value_found); // assert that no new files were opened and no new blocks were // read into block cache. ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_OK(Delete(1, "a")); numopen = TestGetTickerCount(options, NO_FILE_OPENS); cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_OK(Flush(1)); dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], true /* disallow trivial move */); numopen = TestGetTickerCount(options, NO_FILE_OPENS); cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_OK(Delete(1, "c")); numopen = TestGetTickerCount(options, NO_FILE_OPENS); cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value)); ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); // KeyMayExist function only checks data in block caches, which is not used // by plain table format. } while ( ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); } TEST_F(DBBloomFilterTest, GetFilterByPrefixBloomCustomPrefixExtractor) { for (bool partition_filters : {true, false}) { Options options = last_options_; options.prefix_extractor = std::make_shared(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); WriteOptions wo; ReadOptions ro; FlushOptions fo; fo.wait = true; std::string value; ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); dbfull()->Flush(fo); ASSERT_EQ("foo", Get("barbarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ( 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("foo2", Get("barbarbar2")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ( 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ( 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ( 1, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ASSERT_EQ( 2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ro.total_order_seek = true; ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound()); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ASSERT_EQ( 2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); get_perf_context()->Reset(); } } TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { for (bool partition_filters : {true, false}) { Options options = last_options_; options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); WriteOptions wo; ReadOptions ro; FlushOptions fo; fo.wait = true; std::string value; ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); dbfull()->Flush(fo); ASSERT_EQ("foo", Get("barbarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("foo2", Get("barbarbar2")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("foobarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ro.total_order_seek = true; ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound()); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ASSERT_EQ( 2, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); get_perf_context()->Reset(); } } TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { for (bool partition_filters : {true, false}) { Options options = last_options_; options.prefix_extractor.reset(NewFixedPrefixTransform(3)); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; if (partition_filters) { bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); WriteOptions wo; ReadOptions ro; FlushOptions fo; fo.wait = true; std::string value; ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); // Needs insert some keys to make sure files are not filtered out by key // ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); dbfull()->Flush(fo); Reopen(options); ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ("NOT_FOUND", Get("bar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); // Reopen with whole key filtering enabled and prefix extractor // NULL. Bloom filter should be off for both of whole key and // prefix bloom. bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.prefix_extractor.reset(); Reopen(options); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("bar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); // Write DB with only full key filtering. ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); // Needs insert some keys to make sure files are not filtered out by key // ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); // Reopen with both of whole key off and prefix extractor enabled. // Still no bloom filter should be used. options.prefix_extractor.reset(NewFixedPrefixTransform(3)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("bar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("foo", Get("foobar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); // Try to create a DB with mixed files: ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); // Needs insert some keys to make sure files are not filtered out by key // ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); options.prefix_extractor.reset(); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); // Try to create a DB with mixed files. ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar")); // In this case needs insert some keys to make sure files are // not filtered out by key ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); Flush(); // Now we have two files: // File 1: An older file with prefix bloom. // File 2: A newer file with whole bloom filter. ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); ASSERT_EQ("NOT_FOUND", Get("bar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); ASSERT_EQ("foo", Get("foobar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); ASSERT_EQ("bar", Get("barfoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); // Reopen with the same setting: only whole key is used Reopen(options); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5); ASSERT_EQ("NOT_FOUND", Get("bar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6); ASSERT_EQ("foo", Get("foobar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); ASSERT_EQ("bar", Get("barfoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); // Restart with both filters are allowed options.prefix_extractor.reset(NewFixedPrefixTransform(3)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); // File 1 will has it filtered out. // File 2 will not, as prefix `foo` exists in the file. ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8); ASSERT_EQ("NOT_FOUND", Get("bar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10); ASSERT_EQ("foo", Get("foobar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); ASSERT_EQ("bar", Get("barfoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); // Restart with only prefix bloom is allowed. options.prefix_extractor.reset(NewFixedPrefixTransform(3)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(options); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); ASSERT_EQ("NOT_FOUND", Get("bar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); ASSERT_EQ("foo", Get("foobar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); ASSERT_EQ("bar", Get("barfoo")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); uint64_t bloom_filter_useful_all_levels = 0; for (auto& kv : (*(get_perf_context()->level_to_perf_context))) { if (kv.second.bloom_filter_useful > 0) { bloom_filter_useful_all_levels += kv.second.bloom_filter_useful; } } ASSERT_EQ(12, bloom_filter_useful_all_levels); get_perf_context()->Reset(); } } TEST_P(DBBloomFilterTestWithParam, BloomFilter) { do { Options options = CurrentOptions(); env_->count_random_reads_ = true; options.env = env_; // ChangeCompactOptions() only changes compaction style, which does not // trigger reset of table_factory BlockBasedTableOptions table_options; table_options.no_block_cache = true; table_options.filter_policy.reset(new BFP(10, bfp_impl_)); table_options.partition_filters = partition_filters_; if (partition_filters_) { table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } table_options.format_version = format_version_; if (format_version_ >= 4) { // value delta encoding challenged more with index interval > 1 table_options.index_block_restart_interval = 8; } table_options.metadata_block_size = 32; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); // Populate multiple layers const int N = 10000; for (int i = 0; i < N; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } Compact(1, "a", "z"); for (int i = 0; i < N; i += 100) { ASSERT_OK(Put(1, Key(i), Key(i))); } Flush(1); // Prevent auto compactions triggered by seeks env_->delay_sstable_sync_.store(true, std::memory_order_release); // Lookup present keys. Should rarely read from small sstable. env_->random_read_counter_.Reset(); for (int i = 0; i < N; i++) { ASSERT_EQ(Key(i), Get(1, Key(i))); } int reads = env_->random_read_counter_.Read(); fprintf(stderr, "%d present => %d reads\n", N, reads); ASSERT_GE(reads, N); if (partition_filters_) { // Without block cache, we read an extra partition filter per each // level*read and a partition index per each read ASSERT_LE(reads, 4 * N + 2 * N / 100); } else { ASSERT_LE(reads, N + 2 * N / 100); } // Lookup present keys. Should rarely read from either sstable. env_->random_read_counter_.Reset(); for (int i = 0; i < N; i++) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing")); } reads = env_->random_read_counter_.Read(); fprintf(stderr, "%d missing => %d reads\n", N, reads); if (partition_filters_) { // With partitioned filter we read one extra filter per level per each // missed read. ASSERT_LE(reads, 2 * N + 3 * N / 100); } else { ASSERT_LE(reads, 3 * N / 100); } env_->delay_sstable_sync_.store(false, std::memory_order_release); Close(); } while (ChangeCompactOptions()); } #ifndef ROCKSDB_VALGRIND_RUN INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestDefFormatVersion, ::testing::Values( std::make_tuple(BFP::kDeprecatedBlock, false, test::kDefaultFormatVersion), std::make_tuple(BFP::kAuto, true, test::kDefaultFormatVersion), std::make_tuple(BFP::kAuto, false, test::kDefaultFormatVersion))); INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestWithParam, ::testing::Values( std::make_tuple(BFP::kDeprecatedBlock, false, test::kDefaultFormatVersion), std::make_tuple(BFP::kAuto, true, test::kDefaultFormatVersion), std::make_tuple(BFP::kAuto, false, test::kDefaultFormatVersion))); INSTANTIATE_TEST_CASE_P( FormatLatest, DBBloomFilterTestWithParam, ::testing::Values( std::make_tuple(BFP::kDeprecatedBlock, false, test::kLatestFormatVersion), std::make_tuple(BFP::kAuto, true, test::kLatestFormatVersion), std::make_tuple(BFP::kAuto, false, test::kLatestFormatVersion))); #endif // ROCKSDB_VALGRIND_RUN TEST_F(DBBloomFilterTest, BloomFilterRate) { while (ChangeFilterOptions()) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->EnablePerLevelPerfContext(); CreateAndReopenWithCF({"pikachu"}, options); const int maxKey = 10000; for (int i = 0; i < maxKey; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } // Add a large key to make the file contain wide range ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); Flush(1); // Check if they can be found for (int i = 0; i < maxKey; i++) { ASSERT_EQ(Key(i), Get(1, Key(i))); } ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); // Check if filter is useful for (int i = 0; i < maxKey; i++) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); ASSERT_GE( (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful, maxKey * 0.98); get_perf_context()->Reset(); } } TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); // Create with block based filter CreateAndReopenWithCF({"pikachu"}, options); const int maxKey = 10000; for (int i = 0; i < maxKey; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); Flush(1); // Check db with full filter table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ReopenWithColumnFamilies({"default", "pikachu"}, options); // Check if they can be found for (int i = 0; i < maxKey; i++) { ASSERT_EQ(Key(i), Get(1, Key(i))); } ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); // Check db with partitioned full filter table_options.partition_filters = true; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ReopenWithColumnFamilies({"default", "pikachu"}, options); // Check if they can be found for (int i = 0; i < maxKey; i++) { ASSERT_EQ(Key(i), Get(1, Key(i))); } ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); } TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) { for (bool partition_filters : {true, false}) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; if (partition_filters) { table_options.partition_filters = true; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); // Create with full filter CreateAndReopenWithCF({"pikachu"}, options); const int maxKey = 10000; for (int i = 0; i < maxKey; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); Flush(1); // Check db with block_based filter table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ReopenWithColumnFamilies({"default", "pikachu"}, options); // Check if they can be found for (int i = 0; i < maxKey; i++) { ASSERT_EQ(Key(i), Get(1, Key(i))); } ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); } } namespace { // A wrapped bloom over block-based FilterPolicy class TestingWrappedBlockBasedFilterPolicy : public FilterPolicy { public: explicit TestingWrappedBlockBasedFilterPolicy(int bits_per_key) : filter_(NewBloomFilterPolicy(bits_per_key, true)), counter_(0) {} ~TestingWrappedBlockBasedFilterPolicy() override { delete filter_; } const char* Name() const override { return "TestingWrappedBlockBasedFilterPolicy"; } void CreateFilter(const ROCKSDB_NAMESPACE::Slice* keys, int n, std::string* dst) const override { std::unique_ptr user_keys( new ROCKSDB_NAMESPACE::Slice[n]); for (int i = 0; i < n; ++i) { user_keys[i] = convertKey(keys[i]); } return filter_->CreateFilter(user_keys.get(), n, dst); } bool KeyMayMatch(const ROCKSDB_NAMESPACE::Slice& key, const ROCKSDB_NAMESPACE::Slice& filter) const override { counter_++; return filter_->KeyMayMatch(convertKey(key), filter); } uint32_t GetCounter() { return counter_; } private: const FilterPolicy* filter_; mutable uint32_t counter_; ROCKSDB_NAMESPACE::Slice convertKey( const ROCKSDB_NAMESPACE::Slice& key) const { return key; } }; } // namespace TEST_F(DBBloomFilterTest, WrappedBlockBasedFilterPolicy) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; TestingWrappedBlockBasedFilterPolicy* policy = new TestingWrappedBlockBasedFilterPolicy(10); table_options.filter_policy.reset(policy); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); const int maxKey = 10000; for (int i = 0; i < maxKey; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } // Add a large key to make the file contain wide range ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); ASSERT_EQ(0U, policy->GetCounter()); Flush(1); // Check if they can be found for (int i = 0; i < maxKey; i++) { ASSERT_EQ(Key(i), Get(1, Key(i))); } ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); ASSERT_EQ(1U * maxKey, policy->GetCounter()); // Check if filter is useful for (int i = 0; i < maxKey; i++) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); ASSERT_EQ(2U * maxKey, policy->GetCounter()); } namespace { // NOTE: This class is referenced by HISTORY.md as a model for a wrapper // FilterPolicy selecting among configurations based on context. class LevelAndStyleCustomFilterPolicy : public FilterPolicy { public: explicit LevelAndStyleCustomFilterPolicy(int bpk_fifo, int bpk_l0_other, int bpk_otherwise) : policy_fifo_(NewBloomFilterPolicy(bpk_fifo)), policy_l0_other_(NewBloomFilterPolicy(bpk_l0_other)), policy_otherwise_(NewBloomFilterPolicy(bpk_otherwise)) {} // OK to use built-in policy name because we are deferring to a // built-in builder. We aren't changing the serialized format. const char* Name() const override { return policy_fifo_->Name(); } FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext& context) const override { if (context.compaction_style == kCompactionStyleFIFO) { return policy_fifo_->GetBuilderWithContext(context); } else if (context.level_at_creation == 0) { return policy_l0_other_->GetBuilderWithContext(context); } else { return policy_otherwise_->GetBuilderWithContext(context); } } FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override { // OK to defer to any of them; they all can parse built-in filters // from any settings. return policy_fifo_->GetFilterBitsReader(contents); } // Defer just in case configuration uses block-based filter void CreateFilter(const Slice* keys, int n, std::string* dst) const override { policy_otherwise_->CreateFilter(keys, n, dst); } bool KeyMayMatch(const Slice& key, const Slice& filter) const override { return policy_otherwise_->KeyMayMatch(key, filter); } private: const std::unique_ptr policy_fifo_; const std::unique_ptr policy_l0_other_; const std::unique_ptr policy_otherwise_; }; class TestingContextCustomFilterPolicy : public LevelAndStyleCustomFilterPolicy { public: explicit TestingContextCustomFilterPolicy(int bpk_fifo, int bpk_l0_other, int bpk_otherwise) : LevelAndStyleCustomFilterPolicy(bpk_fifo, bpk_l0_other, bpk_otherwise) { } FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext& context) const override { test_report_ += "cf="; test_report_ += context.column_family_name; test_report_ += ",cs="; test_report_ += OptionsHelper::compaction_style_to_string[context.compaction_style]; test_report_ += ",lv="; test_report_ += std::to_string(context.level_at_creation); test_report_ += "\n"; return LevelAndStyleCustomFilterPolicy::GetBuilderWithContext(context); } std::string DumpTestReport() { std::string rv; std::swap(rv, test_report_); return rv; } private: mutable std::string test_report_; }; } // namespace TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { for (bool fifo : {true, false}) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.compaction_style = fifo ? kCompactionStyleFIFO : kCompactionStyleLevel; BlockBasedTableOptions table_options; auto policy = std::make_shared(15, 8, 5); table_options.filter_policy = policy; table_options.format_version = 5; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({fifo ? "abe" : "bob"}, options); const int maxKey = 10000; for (int i = 0; i < maxKey / 2; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } // Add a large key to make the file contain wide range ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); Flush(1); EXPECT_EQ(policy->DumpTestReport(), fifo ? "cf=abe,cs=kCompactionStyleFIFO,lv=0\n" : "cf=bob,cs=kCompactionStyleLevel,lv=0\n"); for (int i = maxKey / 2; i < maxKey; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } Flush(1); EXPECT_EQ(policy->DumpTestReport(), fifo ? "cf=abe,cs=kCompactionStyleFIFO,lv=0\n" : "cf=bob,cs=kCompactionStyleLevel,lv=0\n"); // Check that they can be found for (int i = 0; i < maxKey; i++) { ASSERT_EQ(Key(i), Get(1, Key(i))); } // Since we have two tables / two filters, we might have Bloom checks on // our queries, but no more than one "useful" per query on a found key. EXPECT_LE(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey); // Check that we have two filters, each about // fifo: 0.12% FP rate (15 bits per key) // level: 2.3% FP rate (8 bits per key) for (int i = 0; i < maxKey; i++) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } { auto useful_count = TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); EXPECT_GE(useful_count, maxKey * 2 * (fifo ? 0.9980 : 0.975)); EXPECT_LE(useful_count, maxKey * 2 * (fifo ? 0.9995 : 0.98)); } if (!fifo) { // FIFO only has L0 // Full compaction ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); EXPECT_EQ(policy->DumpTestReport(), "cf=bob,cs=kCompactionStyleLevel,lv=1\n"); // Check that we now have one filter, about 9.2% FP rate (5 bits per key) for (int i = 0; i < maxKey; i++) { ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); } { auto useful_count = TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); EXPECT_GE(useful_count, maxKey * 0.90); EXPECT_LE(useful_count, maxKey * 0.91); } } // Destroy ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); dbfull()->DestroyColumnFamilyHandle(handles_[1]); handles_[1] = nullptr; } } class SliceTransformLimitedDomain : public SliceTransform { const char* Name() const override { return "SliceTransformLimitedDomain"; } Slice Transform(const Slice& src) const override { return Slice(src.data(), 5); } bool InDomain(const Slice& src) const override { // prefix will be x???? return src.size() >= 5 && src[0] == 'x'; } bool InRange(const Slice& dst) const override { // prefix will be x???? return dst.size() == 5 && dst[0] == 'x'; } }; TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) { BlockBasedTableOptions bbto; // Full Filter Block bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; Options options = CurrentOptions(); options.prefix_extractor = std::make_shared(); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); ASSERT_OK(Put("x1111_AAAA", "val1")); ASSERT_OK(Put("x1112_AAAA", "val2")); ASSERT_OK(Put("x1113_AAAA", "val3")); ASSERT_OK(Put("x1114_AAAA", "val4")); // Not in domain, wont be added to filter ASSERT_OK(Put("zzzzz_AAAA", "val5")); ASSERT_OK(Flush()); ASSERT_EQ(Get("x1111_AAAA"), "val1"); ASSERT_EQ(Get("x1112_AAAA"), "val2"); ASSERT_EQ(Get("x1113_AAAA"), "val3"); ASSERT_EQ(Get("x1114_AAAA"), "val4"); // Was not added to filter but rocksdb will try to read it from the filter ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); } TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) { BlockBasedTableOptions bbto; // Block Filter Block bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, true)); Options options = CurrentOptions(); options.prefix_extractor = std::make_shared(); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); ASSERT_OK(Put("x1113_AAAA", "val3")); ASSERT_OK(Put("x1114_AAAA", "val4")); // Not in domain, wont be added to filter ASSERT_OK(Put("zzzzz_AAAA", "val1")); ASSERT_OK(Put("zzzzz_AAAB", "val2")); ASSERT_OK(Put("zzzzz_AAAC", "val3")); ASSERT_OK(Put("zzzzz_AAAD", "val4")); ASSERT_OK(Flush()); std::vector iter_res; auto iter = db_->NewIterator(ReadOptions()); // Seek to a key that was not in Domain for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { iter_res.emplace_back(iter->value().ToString()); } std::vector expected_res = {"val1", "val2", "val3", "val4"}; ASSERT_EQ(iter_res, expected_res); delete iter; } TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilter) { // regression test for #2743. the range delete tombstones in memtable should // be added even when Get() skips searching due to its prefix bloom filter const int kMemtableSize = 1 << 20; // 1MB const int kMemtablePrefixFilterSize = 1 << 13; // 8KB const int kPrefixLen = 4; Options options = CurrentOptions(); options.memtable_prefix_bloom_size_ratio = static_cast(kMemtablePrefixFilterSize) / kMemtableSize; options.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen)); options.write_buffer_size = kMemtableSize; options.memtable_whole_key_filtering = false; Reopen(options); std::string key1("AAAABBBB"); std::string key2("AAAACCCC"); // not in DB std::string key3("AAAADDDD"); std::string key4("AAAAEEEE"); std::string value1("Value1"); std::string value3("Value3"); std::string value4("Value4"); ASSERT_OK(Put(key1, value1, WriteOptions())); // check memtable bloom stats ASSERT_EQ("NOT_FOUND", Get(key2)); ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); // same prefix, bloom filter false positive ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); // enable whole key bloom filter options.memtable_whole_key_filtering = true; Reopen(options); // check memtable bloom stats ASSERT_OK(Put(key3, value3, WriteOptions())); ASSERT_EQ("NOT_FOUND", Get(key2)); // whole key bloom filter kicks in and determines it's a miss ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); // verify whole key filtering does not depend on prefix_extractor options.prefix_extractor.reset(); Reopen(options); // check memtable bloom stats ASSERT_OK(Put(key4, value4, WriteOptions())); ASSERT_EQ("NOT_FOUND", Get(key2)); // whole key bloom filter kicks in and determines it's a miss ASSERT_EQ(2, get_perf_context()->bloom_memtable_miss_count); ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); } TEST_F(DBBloomFilterTest, MemtablePrefixBloomOutOfDomain) { constexpr size_t kPrefixSize = 8; const std::string kKey = "key"; assert(kKey.size() < kPrefixSize); Options options = CurrentOptions(); options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixSize)); options.memtable_prefix_bloom_size_ratio = 0.25; Reopen(options); ASSERT_OK(Put(kKey, "v")); ASSERT_EQ("v", Get(kKey)); std::unique_ptr iter(dbfull()->NewIterator(ReadOptions())); iter->Seek(kKey); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(kKey, iter->key()); iter->SeekForPrev(kKey); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(kKey, iter->key()); } class DBBloomFilterTestVaryPrefixAndFormatVer : public DBTestBase, public testing::WithParamInterface> { protected: bool use_prefix_; uint32_t format_version_; public: DBBloomFilterTestVaryPrefixAndFormatVer() : DBTestBase("/db_bloom_filter_tests") {} ~DBBloomFilterTestVaryPrefixAndFormatVer() override {} void SetUp() override { use_prefix_ = std::get<0>(GetParam()); format_version_ = std::get<1>(GetParam()); } static std::string UKey(uint32_t i) { return Key(static_cast(i)); } }; TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) { Options options = CurrentOptions(); if (use_prefix_) { // Entire key from UKey() options.prefix_extractor.reset(NewCappedPrefixTransform(9)); } options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(20)); bbto.partition_filters = true; bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; bbto.whole_key_filtering = !use_prefix_; if (use_prefix_) { // (not related to prefix, just alternating between) // Make sure code appropriately deals with metadata block size setting // that is "too small" (smaller than minimum size for filter builder) bbto.metadata_block_size = 63; } else { // Make sure the test will work even on platforms with large minimum // filter size, due to large cache line size. // (Largest cache line size + 10+% overhead.) bbto.metadata_block_size = 290; } options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); ReadOptions ropts; constexpr uint32_t N = 12000; // Add N/2 evens for (uint32_t i = 0; i < N; i += 2) { ASSERT_OK(Put(UKey(i), UKey(i))); } ASSERT_OK(Flush()); #ifndef ROCKSDB_LITE ASSERT_EQ(TotalTableFiles(), 1); #endif constexpr uint32_t Q = 29; // MultiGet In std::array keys; std::array key_slices; std::array column_families; // MultiGet Out std::array statuses; std::array values; TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT); TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL); TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED); TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE); TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE); // Check that initial clump of keys only loads one partition filter from // block cache. // And that spread out keys load many partition filters. // In both cases, mix present vs. not present keys. for (uint32_t stride : {uint32_t{1}, (N / Q) | 1}) { for (uint32_t i = 0; i < Q; ++i) { keys[i] = UKey(i * stride); key_slices[i] = Slice(keys[i]); column_families[i] = db_->DefaultColumnFamily(); statuses[i] = Status(); values[i] = PinnableSlice(); } db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0], /*timestamps=*/nullptr, &statuses[0], true); // Confirm correct status results uint32_t number_not_found = 0; for (uint32_t i = 0; i < Q; ++i) { if ((i * stride % 2) == 0) { ASSERT_OK(statuses[i]); } else { ASSERT_TRUE(statuses[i].IsNotFound()); ++number_not_found; } } // Confirm correct Bloom stats (no FPs) uint64_t filter_useful = TestGetAndResetTickerCount( options, use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); uint64_t filter_checked = TestGetAndResetTickerCount(options, use_prefix_ ? BLOOM_FILTER_PREFIX_CHECKED : BLOOM_FILTER_FULL_POSITIVE) + (use_prefix_ ? 0 : filter_useful); EXPECT_EQ(filter_useful, number_not_found); EXPECT_EQ(filter_checked, Q); if (!use_prefix_) { EXPECT_EQ( TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), Q - number_not_found); } // Confirm no duplicate loading same filter partition uint64_t filter_accesses = TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); if (stride == 1) { EXPECT_EQ(filter_accesses, 1); } else { // for large stride EXPECT_GE(filter_accesses, Q / 2 + 1); } } // Check that a clump of keys (present and not) works when spanning // two partitions int found_spanning = 0; for (uint32_t start = 0; start < N / 2;) { for (uint32_t i = 0; i < Q; ++i) { keys[i] = UKey(start + i); key_slices[i] = Slice(keys[i]); column_families[i] = db_->DefaultColumnFamily(); statuses[i] = Status(); values[i] = PinnableSlice(); } db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0], /*timestamps=*/nullptr, &statuses[0], true); // Confirm correct status results uint32_t number_not_found = 0; for (uint32_t i = 0; i < Q; ++i) { if (((start + i) % 2) == 0) { ASSERT_OK(statuses[i]); } else { ASSERT_TRUE(statuses[i].IsNotFound()); ++number_not_found; } } // Confirm correct Bloom stats (might see some FPs) uint64_t filter_useful = TestGetAndResetTickerCount( options, use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); uint64_t filter_checked = TestGetAndResetTickerCount(options, use_prefix_ ? BLOOM_FILTER_PREFIX_CHECKED : BLOOM_FILTER_FULL_POSITIVE) + (use_prefix_ ? 0 : filter_useful); EXPECT_GE(filter_useful, number_not_found - 2); // possible FP EXPECT_EQ(filter_checked, Q); if (!use_prefix_) { EXPECT_EQ( TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), Q - number_not_found); } // Confirm no duplicate loading of same filter partition uint64_t filter_accesses = TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); if (filter_accesses == 2) { // Spanned across partitions. ++found_spanning; if (found_spanning >= 2) { break; } else { // Ensure that at least once we have at least one present and // one non-present key on both sides of partition boundary. start += 2; } } else { EXPECT_EQ(filter_accesses, 1); // See explanation at "start += 2" start += Q - 4; } } EXPECT_TRUE(found_spanning >= 2); } INSTANTIATE_TEST_CASE_P(DBBloomFilterTestVaryPrefixAndFormatVer, DBBloomFilterTestVaryPrefixAndFormatVer, ::testing::Values( // (use_prefix, format_version) std::make_tuple(false, 2), std::make_tuple(false, 3), std::make_tuple(false, 4), std::make_tuple(false, 5), std::make_tuple(true, 2), std::make_tuple(true, 3), std::make_tuple(true, 4), std::make_tuple(true, 5))); #ifndef ROCKSDB_LITE namespace { namespace BFP2 { // Extends BFP::Mode with option to use Plain table using PseudoMode = int; static constexpr PseudoMode kPlainTable = -1; } // namespace BFP2 } // namespace class BloomStatsTestWithParam : public DBBloomFilterTest, public testing::WithParamInterface> { public: BloomStatsTestWithParam() { bfp_impl_ = std::get<0>(GetParam()); partition_filters_ = std::get<1>(GetParam()); options_.create_if_missing = true; options_.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(4)); options_.memtable_prefix_bloom_size_ratio = 8.0 * 1024.0 / static_cast(options_.write_buffer_size); if (bfp_impl_ == BFP2::kPlainTable) { assert(!partition_filters_); // not supported in plain table PlainTableOptions table_options; options_.table_factory.reset(NewPlainTableFactory(table_options)); } else { BlockBasedTableOptions table_options; table_options.hash_index_allow_collision = false; if (partition_filters_) { assert(bfp_impl_ != BFP::kDeprecatedBlock); table_options.partition_filters = partition_filters_; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } table_options.filter_policy.reset( new BFP(10, static_cast(bfp_impl_))); options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); } options_.env = env_; get_perf_context()->Reset(); DestroyAndReopen(options_); } ~BloomStatsTestWithParam() override { get_perf_context()->Reset(); Destroy(options_); } // Required if inheriting from testing::WithParamInterface<> static void SetUpTestCase() {} static void TearDownTestCase() {} BFP2::PseudoMode bfp_impl_; bool partition_filters_; Options options_; }; // 1 Insert 2 K-V pairs into DB // 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 // 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 // 4 Call Flush() to create SST // 5 Call Get() for both keys - expext SST bloom hit stat to be 2 // 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1 // Test both: block and plain SST TEST_P(BloomStatsTestWithParam, BloomStatsTest) { std::string key1("AAAA"); std::string key2("RXDB"); // not in DB std::string key3("ZBRA"); std::string value1("Value1"); std::string value3("Value3"); ASSERT_OK(Put(key1, value1, WriteOptions())); ASSERT_OK(Put(key3, value3, WriteOptions())); // check memtable bloom stats ASSERT_EQ(value1, Get(key1)); ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); ASSERT_EQ(value3, Get(key3)); ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); ASSERT_EQ("NOT_FOUND", Get(key2)); ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); // sanity checks ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count); Flush(); // sanity checks ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count); // check SST bloom stats ASSERT_EQ(value1, Get(key1)); ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ(value3, Get(key3)); ASSERT_EQ(2, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ("NOT_FOUND", Get(key2)); ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count); } // Same scenario as in BloomStatsTest but using an iterator TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { std::string key1("AAAA"); std::string key2("RXDB"); // not in DB std::string key3("ZBRA"); std::string value1("Value1"); std::string value3("Value3"); ASSERT_OK(Put(key1, value1, WriteOptions())); ASSERT_OK(Put(key3, value3, WriteOptions())); std::unique_ptr iter(dbfull()->NewIterator(ReadOptions())); // check memtable bloom stats iter->Seek(key1); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(value1, iter->value().ToString()); ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); iter->Seek(key3); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(value3, iter->value().ToString()); ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); ASSERT_EQ(0, get_perf_context()->bloom_memtable_miss_count); iter->Seek(key2); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); Flush(); iter.reset(dbfull()->NewIterator(ReadOptions())); // Check SST bloom stats iter->Seek(key1); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(value1, iter->value().ToString()); ASSERT_EQ(1, get_perf_context()->bloom_sst_hit_count); iter->Seek(key3); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(value3, iter->value().ToString()); // The seek doesn't check block-based bloom filter because last index key // starts with the same prefix we're seeking to. uint64_t expected_hits = bfp_impl_ == BFP::kDeprecatedBlock ? 1 : 2; ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); iter->Seek(key2); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count); ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); } INSTANTIATE_TEST_CASE_P( BloomStatsTestWithParam, BloomStatsTestWithParam, ::testing::Values(std::make_tuple(BFP::kDeprecatedBlock, false), std::make_tuple(BFP::kLegacyBloom, false), std::make_tuple(BFP::kLegacyBloom, true), std::make_tuple(BFP::kFastLocalBloom, false), std::make_tuple(BFP::kFastLocalBloom, true), std::make_tuple(BFP2::kPlainTable, false))); namespace { void PrefixScanInit(DBBloomFilterTest* dbtest) { char buf[100]; std::string keystr; const int small_range_sstfiles = 5; const int big_range_sstfiles = 5; // Generate 11 sst files with the following prefix ranges. // GROUP 0: [0,10] (level 1) // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0) // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0) // // A seek with the previous API would do 11 random I/Os (to all the // files). With the new API and a prefix filter enabled, we should // only do 2 random I/O, to the 2 files containing the key. // GROUP 0 snprintf(buf, sizeof(buf), "%02d______:start", 0); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); snprintf(buf, sizeof(buf), "%02d______:end", 10); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); dbtest->Flush(); dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); // move to level 1 // GROUP 1 for (int i = 1; i <= small_range_sstfiles; i++) { snprintf(buf, sizeof(buf), "%02d______:start", i); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); snprintf(buf, sizeof(buf), "%02d______:end", i + 1); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); dbtest->Flush(); } // GROUP 2 for (int i = 1; i <= big_range_sstfiles; i++) { snprintf(buf, sizeof(buf), "%02d______:start", 0); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); dbtest->Flush(); } } } // namespace TEST_F(DBBloomFilterTest, PrefixScan) { while (ChangeFilterOptions()) { int count; Slice prefix; Slice key; char buf[100]; Iterator* iter; snprintf(buf, sizeof(buf), "03______:"); prefix = Slice(buf, 8); key = Slice(buf, 9); ASSERT_EQ(key.difference_offset(prefix), 8); ASSERT_EQ(prefix.difference_offset(key), 8); // db configs env_->count_random_reads_ = true; Options options = CurrentOptions(); options.env = env_; options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.disable_auto_compactions = true; options.max_background_compactions = 2; options.create_if_missing = true; options.memtable_factory.reset(NewHashSkipListRepFactory(16)); assert(!options.unordered_write); // It is incompatible with allow_concurrent_memtable_write=false options.allow_concurrent_memtable_write = false; BlockBasedTableOptions table_options; table_options.no_block_cache = true; table_options.filter_policy.reset(NewBloomFilterPolicy(10)); table_options.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); // 11 RAND I/Os DestroyAndReopen(options); PrefixScanInit(this); count = 0; env_->random_read_counter_.Reset(); iter = db_->NewIterator(ReadOptions()); for (iter->Seek(prefix); iter->Valid(); iter->Next()) { if (!iter->key().starts_with(prefix)) { break; } count++; } ASSERT_OK(iter->status()); delete iter; ASSERT_EQ(count, 2); ASSERT_EQ(env_->random_read_counter_.Read(), 2); Close(); } // end of while } TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { Options options = CurrentOptions(); options.write_buffer_size = 64 * 1024; options.arena_block_size = 4 * 1024; options.target_file_size_base = 64 * 1024; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 4; options.max_bytes_for_level_base = 256 * 1024; options.max_write_buffer_number = 2; options.max_background_compactions = 8; options.max_background_flushes = 8; options.compression = kNoCompression; options.compaction_style = kCompactionStyleLevel; options.level_compaction_dynamic_level_bytes = true; BlockBasedTableOptions bbto; bbto.cache_index_and_filter_blocks = true; bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.optimize_filters_for_hits = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); get_perf_context()->Reset(); get_perf_context()->EnablePerLevelPerfContext(); CreateAndReopenWithCF({"mypikachu"}, options); int numkeys = 200000; // Generate randomly shuffled keys, so the updates are almost // random. std::vector keys; keys.reserve(numkeys); for (int i = 0; i < numkeys; i += 2) { keys.push_back(i); } RandomShuffle(std::begin(keys), std::end(keys)); int num_inserted = 0; for (int key : keys) { ASSERT_OK(Put(1, Key(key), "val")); if (++num_inserted % 1000 == 0) { dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } } ASSERT_OK(Put(1, Key(0), "val")); ASSERT_OK(Put(1, Key(numkeys), "val")); ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); if (NumTableFilesAtLevel(0, 1) == 0) { // No Level 0 file. Create one. ASSERT_OK(Put(1, Key(0), "val")); ASSERT_OK(Put(1, Key(numkeys), "val")); ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); } for (int i = 1; i < numkeys; i += 2) { ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND"); } ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); // Now we have three sorted run, L0, L5 and L6 with most files in L6 have // no bloom filter. Most keys be checked bloom filters twice. ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); uint64_t bloom_filter_useful_all_levels = 0; for (auto& kv : (*(get_perf_context()->level_to_perf_context))) { if (kv.second.bloom_filter_useful > 0) { bloom_filter_useful_all_levels += kv.second.bloom_filter_useful; } } ASSERT_GT(bloom_filter_useful_all_levels, 65000 * 2); ASSERT_LT(bloom_filter_useful_all_levels, 120000 * 2); for (int i = 0; i < numkeys; i += 2) { ASSERT_EQ(Get(1, Key(i)), "val"); } // Part 2 (read path): rewrite last level with blooms, then verify they get // cached only if !optimize_filters_for_hits options.disable_auto_compactions = true; options.num_levels = 9; options.optimize_filters_for_hits = false; options.statistics = CreateDBStatistics(); bbto.block_cache.reset(); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); ReopenWithColumnFamilies({"default", "mypikachu"}, options); MoveFilesToLevel(7 /* level */, 1 /* column family index */); std::string value = Get(1, Key(0)); uint64_t prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); value = Get(1, Key(0)); ASSERT_EQ(prev_cache_filter_hits + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); // Now that we know the filter blocks exist in the last level files, see if // filter caching is skipped for this optimization options.optimize_filters_for_hits = true; options.statistics = CreateDBStatistics(); bbto.block_cache.reset(); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); ReopenWithColumnFamilies({"default", "mypikachu"}, options); value = Get(1, Key(0)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(2 /* index and data block */, TestGetTickerCount(options, BLOCK_CACHE_ADD)); // Check filter block ignored for files preloaded during DB::Open() options.max_open_files = -1; options.statistics = CreateDBStatistics(); bbto.block_cache.reset(); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); ReopenWithColumnFamilies({"default", "mypikachu"}, options); uint64_t prev_cache_filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); Get(1, Key(0)); ASSERT_EQ(prev_cache_filter_misses, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(prev_cache_filter_hits, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); // Check filter block ignored for file trivially-moved to bottom level bbto.block_cache.reset(); options.max_open_files = 100; // setting > -1 makes it not preload all files options.statistics = CreateDBStatistics(); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); ReopenWithColumnFamilies({"default", "mypikachu"}, options); ASSERT_OK(Put(1, Key(numkeys + 1), "val")); ASSERT_OK(Flush(1)); int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); CompactRangeOptions compact_options; compact_options.bottommost_level_compaction = BottommostLevelCompaction::kSkip; compact_options.change_level = true; compact_options.target_level = 7; db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); prev_cache_filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); value = Get(1, Key(numkeys + 1)); ASSERT_EQ(prev_cache_filter_hits, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(prev_cache_filter_misses, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); // Check filter block not cached for iterator bbto.block_cache.reset(); options.statistics = CreateDBStatistics(); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); ReopenWithColumnFamilies({"default", "mypikachu"}, options); std::unique_ptr iter(db_->NewIterator(ReadOptions(), handles_[1])); iter->SeekToFirst(); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(2 /* index and data block */, TestGetTickerCount(options, BLOCK_CACHE_ADD)); get_perf_context()->Reset(); } int CountIter(std::unique_ptr& iter, const Slice& key) { int count = 0; for (iter->Seek(key); iter->Valid() && iter->status() == Status::OK(); iter->Next()) { count++; } return count; } // use iterate_upper_bound to hint compatiability of existing bloom filters. // The BF is considered compatible if 1) upper bound and seek key transform // into the same string, or 2) the transformed seek key is of the same length // as the upper bound and two keys are adjacent according to the comparator. TEST_F(DBBloomFilterTest, DynamicBloomFilterUpperBound) { for (auto bfp_impl : BFP::kAllFixedImpls) { int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; Options options; options.create_if_missing = true; options.prefix_extractor.reset(NewCappedPrefixTransform(4)); options.disable_auto_compactions = true; options.statistics = CreateDBStatistics(); // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(new BFP(10, bfp_impl)); table_options.index_shortening = BlockBasedTableOptions:: IndexShorteningMode::kShortenSeparatorsAndSuccessor; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(Put("abcdxxx0", "val1")); ASSERT_OK(Put("abcdxxx1", "val2")); ASSERT_OK(Put("abcdxxx2", "val3")); ASSERT_OK(Put("abcdxxx3", "val4")); dbfull()->Flush(FlushOptions()); { // prefix_extractor has not changed, BF will always be read Slice upper_bound("abce"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcd0000"), 4); } { Slice upper_bound("abcdzzzz"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcd0000"), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:5"}})); ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), "rocksdb.FixedPrefix.5")); { // BF changed, [abcdxx00, abce) is a valid bound, will trigger BF read Slice upper_bound("abce"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx00"), 4); // should check bloom filter since upper bound meets requirement ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2 + using_full_builder); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { // [abcdxx01, abcey) is not valid bound since upper bound is too long for // the BF in SST (capped:4) Slice upper_bound("abcey"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx01"), 4); // should skip bloom filter since upper bound is too long ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2 + using_full_builder); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { // [abcdxx02, abcdy) is a valid bound since the prefix is the same Slice upper_bound("abcdy"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abcdxx02"), 4); // should check bloom filter since upper bound matches transformed seek // key ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2 + using_full_builder * 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } { // [aaaaaaaa, abce) is not a valid bound since 1) they don't share the // same prefix, 2) the prefixes are not consecutive Slice upper_bound("abce"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "aaaaaaaa"), 0); // should skip bloom filter since mismatch is found ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2 + using_full_builder * 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:3"}})); { // [abc, abd) is not a valid bound since the upper bound is too short // for BF (capped:4) Slice upper_bound("abd"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2 + using_full_builder * 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:4"}})); { // set back to capped:4 and verify BF is always read Slice upper_bound("abd"); ReadOptions read_options; read_options.prefix_same_as_start = true; read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "abc"), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3 + using_full_builder * 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); } } } // Create multiple SST files each with a different prefix_extractor config, // verify iterators can read all SST files using the latest config. TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { for (auto bfp_impl : BFP::kAllFixedImpls) { int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; Options options; options.create_if_missing = true; options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; options.statistics = CreateDBStatistics(); // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.filter_policy.reset(new BFP(10, bfp_impl)); table_options.cache_index_and_filter_blocks = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); Slice upper_bound("foz90000"); ReadOptions read_options; read_options.prefix_same_as_start = true; // first SST with fixed:1 BF ASSERT_OK(Put("foo2", "bar2")); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foq1", "bar1")); ASSERT_OK(Put("fpa", "0")); dbfull()->Flush(FlushOptions()); std::unique_ptr iter_old(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_old, "foo"), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), "rocksdb.CappedPrefix.3")); read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1 + using_full_builder); ASSERT_EQ(CountIter(iter, "gpk"), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1 + using_full_builder); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); // second SST with capped:3 BF ASSERT_OK(Put("foo3", "bar3")); ASSERT_OK(Put("foo4", "bar4")); ASSERT_OK(Put("foq5", "bar5")); ASSERT_OK(Put("fpb", "1")); dbfull()->Flush(FlushOptions()); { // BF is cappped:3 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 2 + using_full_builder * 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // both counters are incremented because BF is "not changed" for 1 of the // 2 SST files, so filter is checked once and found no match. ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3 + using_full_builder * 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}})); ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), "rocksdb.FixedPrefix.2")); // third SST with fixed:2 BF ASSERT_OK(Put("foo6", "bar6")); ASSERT_OK(Put("foo7", "bar7")); ASSERT_OK(Put("foq8", "bar8")); ASSERT_OK(Put("fpc", "2")); dbfull()->Flush(FlushOptions()); { // BF is fixed:2 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_tmp, "foo"), 9); // the first and last BF are checked ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 4 + using_full_builder * 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 1); ASSERT_EQ(CountIter(iter_tmp, "gpk"), 0); // only last BF is checked and not found ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 5 + using_full_builder * 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); } // iter_old can only see the first SST, so checked plus 1 ASSERT_EQ(CountIter(iter_old, "foo"), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6 + using_full_builder * 3); // iter was created after the first setoptions call so only full filter // will check the filter ASSERT_EQ(CountIter(iter, "foo"), 2); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6 + using_full_builder * 4); { // keys in all three SSTs are visible to iterator // The range of [foo, foz90000] is compatible with (fixed:1) and (fixed:2) // so +2 for checked counter std::unique_ptr iter_all(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_all, "foo"), 9); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 7 + using_full_builder * 5); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 2); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 8 + using_full_builder * 5); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), "rocksdb.CappedPrefix.3")); { std::unique_ptr iter_all(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_all, "foo"), 6); // all three SST are checked because the current options has the same as // the remaining SST (capped:3) ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9 + using_full_builder * 7); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); ASSERT_EQ(CountIter(iter_all, "gpk"), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 10 + using_full_builder * 7); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 4); } // TODO(Zhongyi): Maybe also need to add Get calls to test point look up? } } // Create a new column family in a running DB, change prefix_extractor // dynamically, verify the iterator created on the new column family behaves // as expected TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { int iteration = 0; for (auto bfp_impl : BFP::kAllFixedImpls) { Options options = CurrentOptions(); options.create_if_missing = true; options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; options.statistics = CreateDBStatistics(); // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(new BFP(10, bfp_impl)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu" + std::to_string(iteration)}, options); ReadOptions read_options; read_options.prefix_same_as_start = true; // create a new CF and set prefix_extractor dynamically options.prefix_extractor.reset(NewCappedPrefixTransform(3)); CreateColumnFamilies({"ramen_dojo_" + std::to_string(iteration)}, options); ASSERT_EQ(0, strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(), "rocksdb.CappedPrefix.3")); ASSERT_OK(Put(2, "foo3", "bar3")); ASSERT_OK(Put(2, "foo4", "bar4")); ASSERT_OK(Put(2, "foo5", "bar5")); ASSERT_OK(Put(2, "foq6", "bar6")); ASSERT_OK(Put(2, "fpq7", "bar7")); dbfull()->Flush(FlushOptions()); { std::unique_ptr iter( db_->NewIterator(read_options, handles_[2])); ASSERT_EQ(CountIter(iter, "foo"), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK( dbfull()->SetOptions(handles_[2], {{"prefix_extractor", "fixed:2"}})); ASSERT_EQ(0, strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(), "rocksdb.FixedPrefix.2")); { std::unique_ptr iter( db_->NewIterator(read_options, handles_[2])); ASSERT_EQ(CountIter(iter, "foo"), 4); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->DropColumnFamily(handles_[2])); dbfull()->DestroyColumnFamilyHandle(handles_[2]); handles_[2] = nullptr; ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); dbfull()->DestroyColumnFamilyHandle(handles_[1]); handles_[1] = nullptr; iteration++; } } // Verify it's possible to change prefix_extractor at runtime and iterators // behaves as expected TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { for (auto bfp_impl : BFP::kAllFixedImpls) { Options options; options.create_if_missing = true; options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; options.statistics = CreateDBStatistics(); // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(new BFP(10, bfp_impl)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(Put("foo2", "bar2")); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("fpa", "0")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("foo3", "bar3")); ASSERT_OK(Put("foo4", "bar4")); ASSERT_OK(Put("foo5", "bar5")); ASSERT_OK(Put("fpb", "1")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("foo6", "bar6")); ASSERT_OK(Put("foo7", "bar7")); ASSERT_OK(Put("foo8", "bar8")); ASSERT_OK(Put("fpc", "2")); dbfull()->Flush(FlushOptions()); ReadOptions read_options; read_options.prefix_same_as_start = true; { std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 3); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } std::unique_ptr iter_old(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_old, "foo"), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), "rocksdb.CappedPrefix.3")); { std::unique_ptr iter(db_->NewIterator(read_options)); // "fp*" should be skipped ASSERT_EQ(CountIter(iter, "foo"), 9); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 6); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } // iterator created before should not be affected and see all keys ASSERT_EQ(CountIter(iter_old, "foo"), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 9); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_EQ(CountIter(iter_old, "abc"), 0); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 12); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); } } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_compaction_filter_test.cc000066400000000000000000000676541370372246700213700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { static int cfilter_count = 0; static int cfilter_skips = 0; // This is a static filter used for filtering // kvs during the compaction process. static std::string NEW_VALUE = "NewValue"; class DBTestCompactionFilter : public DBTestBase { public: DBTestCompactionFilter() : DBTestBase("/db_compaction_filter_test") {} }; // Param variant of DBTestBase::ChangeCompactOptions class DBTestCompactionFilterWithCompactParam : public DBTestCompactionFilter, public ::testing::WithParamInterface { public: DBTestCompactionFilterWithCompactParam() : DBTestCompactionFilter() { option_config_ = GetParam(); Destroy(last_options_); auto options = CurrentOptions(); if (option_config_ == kDefault || option_config_ == kUniversalCompaction || option_config_ == kUniversalCompactionMultiLevel) { options.create_if_missing = true; } if (option_config_ == kLevelSubcompactions || option_config_ == kUniversalSubcompactions) { assert(options.max_subcompactions > 1); } TryReopen(options); } }; #ifndef ROCKSDB_VALGRIND_RUN INSTANTIATE_TEST_CASE_P( CompactionFilterWithOption, DBTestCompactionFilterWithCompactParam, ::testing::Values(DBTestBase::OptionConfig::kDefault, DBTestBase::OptionConfig::kUniversalCompaction, DBTestBase::OptionConfig::kUniversalCompactionMultiLevel, DBTestBase::OptionConfig::kLevelSubcompactions, DBTestBase::OptionConfig::kUniversalSubcompactions)); #else // Run fewer cases in valgrind INSTANTIATE_TEST_CASE_P(CompactionFilterWithOption, DBTestCompactionFilterWithCompactParam, ::testing::Values(DBTestBase::OptionConfig::kDefault)); #endif // ROCKSDB_VALGRIND_RUN class KeepFilter : public CompactionFilter { public: bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { cfilter_count++; return false; } const char* Name() const override { return "KeepFilter"; } }; class DeleteFilter : public CompactionFilter { public: bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { cfilter_count++; return true; } const char* Name() const override { return "DeleteFilter"; } }; class DeleteISFilter : public CompactionFilter { public: bool Filter(int /*level*/, const Slice& key, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { cfilter_count++; int i = std::stoi(key.ToString()); if (i > 5 && i <= 105) { return true; } return false; } bool IgnoreSnapshots() const override { return true; } const char* Name() const override { return "DeleteFilter"; } }; // Skip x if floor(x/10) is even, use range skips. Requires that keys are // zero-padded to length 10. class SkipEvenFilter : public CompactionFilter { public: Decision FilterV2(int /*level*/, const Slice& key, ValueType /*value_type*/, const Slice& /*existing_value*/, std::string* /*new_value*/, std::string* skip_until) const override { cfilter_count++; int i = std::stoi(key.ToString()); if (i / 10 % 2 == 0) { char key_str[100]; snprintf(key_str, sizeof(key_str), "%010d", i / 10 * 10 + 10); *skip_until = key_str; ++cfilter_skips; return Decision::kRemoveAndSkipUntil; } return Decision::kKeep; } bool IgnoreSnapshots() const override { return true; } const char* Name() const override { return "DeleteFilter"; } }; class DelayFilter : public CompactionFilter { public: explicit DelayFilter(DBTestBase* d) : db_test(d) {} bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { db_test->env_->addon_time_.fetch_add(1000); return true; } const char* Name() const override { return "DelayFilter"; } private: DBTestBase* db_test; }; class ConditionalFilter : public CompactionFilter { public: explicit ConditionalFilter(const std::string* filtered_value) : filtered_value_(filtered_value) {} bool Filter(int /*level*/, const Slice& /*key*/, const Slice& value, std::string* /*new_value*/, bool* /*value_changed*/) const override { return value.ToString() == *filtered_value_; } const char* Name() const override { return "ConditionalFilter"; } private: const std::string* filtered_value_; }; class ChangeFilter : public CompactionFilter { public: explicit ChangeFilter() {} bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* new_value, bool* value_changed) const override { assert(new_value != nullptr); *new_value = NEW_VALUE; *value_changed = true; return false; } const char* Name() const override { return "ChangeFilter"; } }; class KeepFilterFactory : public CompactionFilterFactory { public: explicit KeepFilterFactory(bool check_context = false, bool check_context_cf_id = false) : check_context_(check_context), check_context_cf_id_(check_context_cf_id), compaction_filter_created_(false) {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { if (check_context_) { EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction); EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction); } if (check_context_cf_id_) { EXPECT_EQ(expect_cf_id_.load(), context.column_family_id); } compaction_filter_created_ = true; return std::unique_ptr(new KeepFilter()); } bool compaction_filter_created() const { return compaction_filter_created_; } const char* Name() const override { return "KeepFilterFactory"; } bool check_context_; bool check_context_cf_id_; std::atomic_bool expect_full_compaction_; std::atomic_bool expect_manual_compaction_; std::atomic expect_cf_id_; bool compaction_filter_created_; }; class DeleteFilterFactory : public CompactionFilterFactory { public: std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { if (context.is_manual_compaction) { return std::unique_ptr(new DeleteFilter()); } else { return std::unique_ptr(nullptr); } } const char* Name() const override { return "DeleteFilterFactory"; } }; // Delete Filter Factory which ignores snapshots class DeleteISFilterFactory : public CompactionFilterFactory { public: std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { if (context.is_manual_compaction) { return std::unique_ptr(new DeleteISFilter()); } else { return std::unique_ptr(nullptr); } } const char* Name() const override { return "DeleteFilterFactory"; } }; class SkipEvenFilterFactory : public CompactionFilterFactory { public: std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { if (context.is_manual_compaction) { return std::unique_ptr(new SkipEvenFilter()); } else { return std::unique_ptr(nullptr); } } const char* Name() const override { return "SkipEvenFilterFactory"; } }; class DelayFilterFactory : public CompactionFilterFactory { public: explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { return std::unique_ptr(new DelayFilter(db_test)); } const char* Name() const override { return "DelayFilterFactory"; } private: DBTestBase* db_test; }; class ConditionalFilterFactory : public CompactionFilterFactory { public: explicit ConditionalFilterFactory(const Slice& filtered_value) : filtered_value_(filtered_value.ToString()) {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { return std::unique_ptr( new ConditionalFilter(&filtered_value_)); } const char* Name() const override { return "ConditionalFilterFactory"; } private: std::string filtered_value_; }; class ChangeFilterFactory : public CompactionFilterFactory { public: explicit ChangeFilterFactory() {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { return std::unique_ptr(new ChangeFilter()); } const char* Name() const override { return "ChangeFilterFactory"; } }; #ifndef ROCKSDB_LITE TEST_F(DBTestCompactionFilter, CompactionFilter) { Options options = CurrentOptions(); options.max_open_files = -1; options.num_levels = 3; options.compaction_filter_factory = std::make_shared(); options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); // Write 100K keys, these are written to a few files in L0. const std::string value(10, 'x'); for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); Put(1, key, value); } ASSERT_OK(Flush(1)); // Push all files to the highest level L2. Verify that // the compaction is each level invokes the filter for // all the keys in that level. cfilter_count = 0; dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); ASSERT_EQ(cfilter_count, 100000); cfilter_count = 0; dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); ASSERT_EQ(cfilter_count, 100000); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); ASSERT_NE(NumTableFilesAtLevel(2, 1), 0); cfilter_count = 0; // All the files are in the lowest level. // Verify that all but the 100001st record // has sequence number zero. The 100001st record // is at the tip of this snapshot and cannot // be zeroed out. int count = 0; int total = 0; Arena arena; { InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); ScopedArenaIterator iter(dbfull()->NewInternalIterator( &arena, &range_del_agg, kMaxSequenceNumber, handles_[1])); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { ParsedInternalKey ikey(Slice(), 0, kTypeValue); ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true); total++; if (ikey.sequence != 0) { count++; } iter->Next(); } } ASSERT_EQ(total, 100000); ASSERT_EQ(count, 0); // overwrite all the 100K keys once again. for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); ASSERT_OK(Put(1, key, value)); } ASSERT_OK(Flush(1)); // push all files to the highest level L2. This // means that all keys should pass at least once // via the compaction filter cfilter_count = 0; dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); ASSERT_EQ(cfilter_count, 100000); cfilter_count = 0; dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); ASSERT_EQ(cfilter_count, 100000); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); ASSERT_NE(NumTableFilesAtLevel(2, 1), 0); // create a new database with the compaction // filter in such a way that it deletes all keys options.compaction_filter_factory = std::make_shared(); options.create_if_missing = true; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // write all the keys once again. for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); ASSERT_OK(Put(1, key, value)); } ASSERT_OK(Flush(1)); ASSERT_NE(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(2, 1), 0); // Push all files to the highest level L2. This // triggers the compaction filter to delete all keys, // verify that at the end of the compaction process, // nothing is left. cfilter_count = 0; dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); ASSERT_EQ(cfilter_count, 100000); cfilter_count = 0; dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); ASSERT_EQ(cfilter_count, 0); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); { // Scan the entire database to ensure that nothing is left std::unique_ptr iter( db_->NewIterator(ReadOptions(), handles_[1])); iter->SeekToFirst(); count = 0; while (iter->Valid()) { count++; iter->Next(); } ASSERT_EQ(count, 0); } // The sequence number of the remaining record // is not zeroed out even though it is at the // level Lmax because this record is at the tip count = 0; { InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); ScopedArenaIterator iter(dbfull()->NewInternalIterator( &arena, &range_del_agg, kMaxSequenceNumber, handles_[1])); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { ParsedInternalKey ikey(Slice(), 0, kTypeValue); ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true); ASSERT_NE(ikey.sequence, (unsigned)0); count++; iter->Next(); } ASSERT_EQ(count, 0); } } // Tests the edge case where compaction does not produce any output -- all // entries are deleted. The compaction should create bunch of 'DeleteFile' // entries in VersionEdit, but none of the 'AddFile's. TEST_F(DBTestCompactionFilter, CompactionFilterDeletesAll) { Options options = CurrentOptions(); options.compaction_filter_factory = std::make_shared(); options.disable_auto_compactions = true; options.create_if_missing = true; DestroyAndReopen(options); // put some data for (int table = 0; table < 4; ++table) { for (int i = 0; i < 10 + table; ++i) { Put(ToString(table * 100 + i), "val"); } Flush(); } // this will produce empty file (delete compaction filter) ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(0U, CountLiveFiles()); Reopen(options); Iterator* itr = db_->NewIterator(ReadOptions()); itr->SeekToFirst(); // empty db ASSERT_TRUE(!itr->Valid()); delete itr; } #endif // ROCKSDB_LITE TEST_P(DBTestCompactionFilterWithCompactParam, CompactionFilterWithValueChange) { Options options = CurrentOptions(); options.num_levels = 3; options.compaction_filter_factory = std::make_shared(); CreateAndReopenWithCF({"pikachu"}, options); // Write 100K+1 keys, these are written to a few files // in L0. We do this so that the current snapshot points // to the 100001 key.The compaction filter is not invoked // on keys that are visible via a snapshot because we // anyways cannot delete it. const std::string value(10, 'x'); for (int i = 0; i < 100001; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); Put(1, key, value); } // push all files to lower levels ASSERT_OK(Flush(1)); if (option_config_ != kUniversalCompactionMultiLevel && option_config_ != kUniversalSubcompactions) { dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); } else { dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); } // re-write all data again for (int i = 0; i < 100001; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); Put(1, key, value); } // push all files to lower levels. This should // invoke the compaction filter for all 100000 keys. ASSERT_OK(Flush(1)); if (option_config_ != kUniversalCompactionMultiLevel && option_config_ != kUniversalSubcompactions) { dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); } else { dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); } // verify that all keys now have the new value that // was set by the compaction process. for (int i = 0; i < 100001; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); std::string newvalue = Get(1, key); ASSERT_EQ(newvalue.compare(NEW_VALUE), 0); } } TEST_F(DBTestCompactionFilter, CompactionFilterWithMergeOperator) { std::string one, two, three, four; PutFixed64(&one, 1); PutFixed64(&two, 2); PutFixed64(&three, 3); PutFixed64(&four, 4); Options options = CurrentOptions(); options.create_if_missing = true; options.merge_operator = MergeOperators::CreateUInt64AddOperator(); options.num_levels = 3; // Filter out keys with value is 2. options.compaction_filter_factory = std::make_shared(two); DestroyAndReopen(options); // In the same compaction, a value type needs to be deleted based on // compaction filter, and there is a merge type for the key. compaction // filter result is ignored. ASSERT_OK(db_->Put(WriteOptions(), "foo", two)); ASSERT_OK(Flush()); ASSERT_OK(db_->Merge(WriteOptions(), "foo", one)); ASSERT_OK(Flush()); std::string newvalue = Get("foo"); ASSERT_EQ(newvalue, three); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); newvalue = Get("foo"); ASSERT_EQ(newvalue, three); // value key can be deleted based on compaction filter, leaving only // merge keys. ASSERT_OK(db_->Put(WriteOptions(), "bar", two)); ASSERT_OK(Flush()); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); newvalue = Get("bar"); ASSERT_EQ("NOT_FOUND", newvalue); ASSERT_OK(db_->Merge(WriteOptions(), "bar", two)); ASSERT_OK(Flush()); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); newvalue = Get("bar"); ASSERT_EQ(two, two); // Compaction filter never applies to merge keys. ASSERT_OK(db_->Put(WriteOptions(), "foobar", one)); ASSERT_OK(Flush()); ASSERT_OK(db_->Merge(WriteOptions(), "foobar", two)); ASSERT_OK(Flush()); newvalue = Get("foobar"); ASSERT_EQ(newvalue, three); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); newvalue = Get("foobar"); ASSERT_EQ(newvalue, three); // In the same compaction, both of value type and merge type keys need to be // deleted based on compaction filter, and there is a merge type for the key. // For both keys, compaction filter results are ignored. ASSERT_OK(db_->Put(WriteOptions(), "barfoo", two)); ASSERT_OK(Flush()); ASSERT_OK(db_->Merge(WriteOptions(), "barfoo", two)); ASSERT_OK(Flush()); newvalue = Get("barfoo"); ASSERT_EQ(newvalue, four); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); newvalue = Get("barfoo"); ASSERT_EQ(newvalue, four); } #ifndef ROCKSDB_LITE TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { KeepFilterFactory* filter = new KeepFilterFactory(true, true); Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.compaction_filter_factory.reset(filter); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 8; Reopen(options); int num_keys_per_file = 400; for (int j = 0; j < 3; j++) { // Write several keys. const std::string value(10, 'x'); for (int i = 0; i < num_keys_per_file; i++) { char key[100]; snprintf(key, sizeof(key), "B%08d%02d", i, j); Put(key, value); } dbfull()->TEST_FlushMemTable(); // Make sure next file is much smaller so automatic compaction will not // be triggered. num_keys_per_file /= 2; } dbfull()->TEST_WaitForCompact(); // Force a manual compaction cfilter_count = 0; filter->expect_manual_compaction_.store(true); filter->expect_full_compaction_.store(true); filter->expect_cf_id_.store(0); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(cfilter_count, 700); ASSERT_EQ(NumSortedRuns(0), 1); ASSERT_TRUE(filter->compaction_filter_created()); // Verify total number of keys is correct after manual compaction. { int count = 0; int total = 0; Arena arena; InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* snapshots */); ScopedArenaIterator iter(dbfull()->NewInternalIterator( &arena, &range_del_agg, kMaxSequenceNumber)); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { ParsedInternalKey ikey(Slice(), 0, kTypeValue); ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true); total++; if (ikey.sequence != 0) { count++; } iter->Next(); } ASSERT_EQ(total, 700); ASSERT_EQ(count, 0); } } #endif // ROCKSDB_LITE TEST_F(DBTestCompactionFilter, CompactionFilterContextCfId) { KeepFilterFactory* filter = new KeepFilterFactory(false, true); filter->expect_cf_id_.store(1); Options options = CurrentOptions(); options.compaction_filter_factory.reset(filter); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 2; CreateAndReopenWithCF({"pikachu"}, options); int num_keys_per_file = 400; for (int j = 0; j < 3; j++) { // Write several keys. const std::string value(10, 'x'); for (int i = 0; i < num_keys_per_file; i++) { char key[100]; snprintf(key, sizeof(key), "B%08d%02d", i, j); Put(1, key, value); } Flush(1); // Make sure next file is much smaller so automatic compaction will not // be triggered. num_keys_per_file /= 2; } dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(filter->compaction_filter_created()); } #ifndef ROCKSDB_LITE // Compaction filters aplies to all records, regardless snapshots. TEST_F(DBTestCompactionFilter, CompactionFilterIgnoreSnapshot) { std::string five = ToString(5); Options options = CurrentOptions(); options.compaction_filter_factory = std::make_shared(); options.disable_auto_compactions = true; options.create_if_missing = true; DestroyAndReopen(options); // Put some data. const Snapshot* snapshot = nullptr; for (int table = 0; table < 4; ++table) { for (int i = 0; i < 10; ++i) { Put(ToString(table * 100 + i), "val"); } Flush(); if (table == 0) { snapshot = db_->GetSnapshot(); } } assert(snapshot != nullptr); cfilter_count = 0; ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // The filter should delete 40 records. ASSERT_EQ(40, cfilter_count); { // Scan the entire database as of the snapshot to ensure // that nothing is left ReadOptions read_options; read_options.snapshot = snapshot; std::unique_ptr iter(db_->NewIterator(read_options)); iter->SeekToFirst(); int count = 0; while (iter->Valid()) { count++; iter->Next(); } ASSERT_EQ(count, 6); read_options.snapshot = nullptr; std::unique_ptr iter1(db_->NewIterator(read_options)); iter1->SeekToFirst(); count = 0; while (iter1->Valid()) { count++; iter1->Next(); } // We have deleted 10 keys from 40 using the compaction filter // Keys 6-9 before the snapshot and 100-105 after the snapshot ASSERT_EQ(count, 30); } // Release the snapshot and compact again -> now all records should be // removed. db_->ReleaseSnapshot(snapshot); } #endif // ROCKSDB_LITE TEST_F(DBTestCompactionFilter, SkipUntil) { Options options = CurrentOptions(); options.compaction_filter_factory = std::make_shared(); options.disable_auto_compactions = true; options.create_if_missing = true; DestroyAndReopen(options); // Write 100K keys, these are written to a few files in L0. for (int table = 0; table < 4; ++table) { // Key ranges in tables are [0, 38], [106, 149], [212, 260], [318, 371]. for (int i = table * 6; i < 39 + table * 11; ++i) { char key[100]; snprintf(key, sizeof(key), "%010d", table * 100 + i); Put(key, std::to_string(table * 1000 + i)); } Flush(); } cfilter_skips = 0; ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Number of skips in tables: 2, 3, 3, 3. ASSERT_EQ(11, cfilter_skips); for (int table = 0; table < 4; ++table) { for (int i = table * 6; i < 39 + table * 11; ++i) { int k = table * 100 + i; char key[100]; snprintf(key, sizeof(key), "%010d", table * 100 + i); auto expected = std::to_string(table * 1000 + i); std::string val; Status s = db_->Get(ReadOptions(), key, &val); if (k / 10 % 2 == 0) { ASSERT_TRUE(s.IsNotFound()); } else { ASSERT_OK(s); ASSERT_EQ(expected, val); } } } } TEST_F(DBTestCompactionFilter, SkipUntilWithBloomFilter) { BlockBasedTableOptions table_options; table_options.whole_key_filtering = false; table_options.filter_policy.reset(NewBloomFilterPolicy(100, false)); Options options = CurrentOptions(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewCappedPrefixTransform(9)); options.compaction_filter_factory = std::make_shared(); options.disable_auto_compactions = true; options.create_if_missing = true; DestroyAndReopen(options); Put("0000000010", "v10"); Put("0000000020", "v20"); // skipped Put("0000000050", "v50"); Flush(); cfilter_skips = 0; EXPECT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); EXPECT_EQ(1, cfilter_skips); Status s; std::string val; s = db_->Get(ReadOptions(), "0000000010", &val); ASSERT_OK(s); EXPECT_EQ("v10", val); s = db_->Get(ReadOptions(), "0000000020", &val); EXPECT_TRUE(s.IsNotFound()); s = db_->Get(ReadOptions(), "0000000050", &val); ASSERT_OK(s); EXPECT_EQ("v50", val); } class TestNotSupportedFilter : public CompactionFilter { public: bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { return true; } const char* Name() const override { return "NotSupported"; } bool IgnoreSnapshots() const override { return false; } }; TEST_F(DBTestCompactionFilter, IgnoreSnapshotsFalse) { Options options = CurrentOptions(); options.compaction_filter = new TestNotSupportedFilter(); DestroyAndReopen(options); Put("a", "v10"); Put("z", "v20"); Flush(); Put("a", "v10"); Put("z", "v20"); Flush(); // Comapction should fail because IgnoreSnapshots() = false EXPECT_TRUE(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr) .IsNotSupported()); delete options.compaction_filter; } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_compaction_test.cc000066400000000000000000005530061370372246700200120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/concurrent_task_limiter.h" #include "rocksdb/experimental.h" #include "rocksdb/sst_file_writer.h" #include "rocksdb/utilities/convenience.h" #include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" #include "util/concurrent_task_limiter_impl.h" namespace ROCKSDB_NAMESPACE { // SYNC_POINT is not supported in released Windows mode. #if !defined(ROCKSDB_LITE) class DBCompactionTest : public DBTestBase { public: DBCompactionTest() : DBTestBase("/db_compaction_test") {} }; class DBCompactionTestWithParam : public DBTestBase, public testing::WithParamInterface> { public: DBCompactionTestWithParam() : DBTestBase("/db_compaction_test") { max_subcompactions_ = std::get<0>(GetParam()); exclusive_manual_compaction_ = std::get<1>(GetParam()); } // Required if inheriting from testing::WithParamInterface<> static void SetUpTestCase() {} static void TearDownTestCase() {} uint32_t max_subcompactions_; bool exclusive_manual_compaction_; }; class DBCompactionDirectIOTest : public DBCompactionTest, public ::testing::WithParamInterface { public: DBCompactionDirectIOTest() : DBCompactionTest() {} }; namespace { class FlushedFileCollector : public EventListener { public: FlushedFileCollector() {} ~FlushedFileCollector() override {} void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { std::lock_guard lock(mutex_); flushed_files_.push_back(info.file_path); } std::vector GetFlushedFiles() { std::lock_guard lock(mutex_); std::vector result; for (auto fname : flushed_files_) { result.push_back(fname); } return result; } void ClearFlushedFiles() { flushed_files_.clear(); } private: std::vector flushed_files_; std::mutex mutex_; }; class CompactionStatsCollector : public EventListener { public: CompactionStatsCollector() : compaction_completed_(static_cast(CompactionReason::kNumOfReasons)) { for (auto& v : compaction_completed_) { v.store(0); } } ~CompactionStatsCollector() override {} void OnCompactionCompleted(DB* /* db */, const CompactionJobInfo& info) override { int k = static_cast(info.compaction_reason); int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); assert(k >= 0 && k < num_of_reasons); compaction_completed_[k]++; } void OnExternalFileIngested( DB* /* db */, const ExternalFileIngestionInfo& /* info */) override { int k = static_cast(CompactionReason::kExternalSstIngestion); compaction_completed_[k]++; } void OnFlushCompleted(DB* /* db */, const FlushJobInfo& /* info */) override { int k = static_cast(CompactionReason::kFlush); compaction_completed_[k]++; } int NumberOfCompactions(CompactionReason reason) const { int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); int k = static_cast(reason); assert(k >= 0 && k < num_of_reasons); return compaction_completed_.at(k).load(); } private: std::vector> compaction_completed_; }; class SstStatsCollector : public EventListener { public: SstStatsCollector() : num_ssts_creation_started_(0) {} void OnTableFileCreationStarted( const TableFileCreationBriefInfo& /* info */) override { ++num_ssts_creation_started_; } int num_ssts_creation_started() { return num_ssts_creation_started_; } private: std::atomic num_ssts_creation_started_; }; static const int kCDTValueSize = 1000; static const int kCDTKeysPerBuffer = 4; static const int kCDTNumLevels = 8; Options DeletionTriggerOptions(Options options) { options.compression = kNoCompression; options.write_buffer_size = kCDTKeysPerBuffer * (kCDTValueSize + 24); options.min_write_buffer_number_to_merge = 1; options.max_write_buffer_size_to_maintain = 0; options.num_levels = kCDTNumLevels; options.level0_file_num_compaction_trigger = 1; options.target_file_size_base = options.write_buffer_size * 2; options.target_file_size_multiplier = 2; options.max_bytes_for_level_base = options.target_file_size_base * options.target_file_size_multiplier; options.max_bytes_for_level_multiplier = 2; options.disable_auto_compactions = false; return options; } bool HaveOverlappingKeyRanges( const Comparator* c, const SstFileMetaData& a, const SstFileMetaData& b) { if (c->CompareWithoutTimestamp(a.smallestkey, b.smallestkey) >= 0) { if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) { // b.smallestkey <= a.smallestkey <= b.largestkey return true; } } else if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) { // a.smallestkey < b.smallestkey <= a.largestkey return true; } if (c->CompareWithoutTimestamp(a.largestkey, b.largestkey) <= 0) { if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) { // b.smallestkey <= a.largestkey <= b.largestkey return true; } } else if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) { // a.smallestkey <= b.largestkey < a.largestkey return true; } return false; } // Identifies all files between level "min_level" and "max_level" // which has overlapping key range with "input_file_meta". void GetOverlappingFileNumbersForLevelCompaction( const ColumnFamilyMetaData& cf_meta, const Comparator* comparator, int min_level, int max_level, const SstFileMetaData* input_file_meta, std::set* overlapping_file_names) { std::set overlapping_files; overlapping_files.insert(input_file_meta); for (int m = min_level; m <= max_level; ++m) { for (auto& file : cf_meta.levels[m].files) { for (auto* included_file : overlapping_files) { if (HaveOverlappingKeyRanges( comparator, *included_file, file)) { overlapping_files.insert(&file); overlapping_file_names->insert(file.name); break; } } } } } void VerifyCompactionResult( const ColumnFamilyMetaData& cf_meta, const std::set& overlapping_file_numbers) { #ifndef NDEBUG for (auto& level : cf_meta.levels) { for (auto& file : level.files) { assert(overlapping_file_numbers.find(file.name) == overlapping_file_numbers.end()); } } #endif } /* * Verifies compaction stats of cfd are valid. * * For each level of cfd, its compaction stats are valid if * 1) sum(stat.counts) == stat.count, and * 2) stat.counts[i] == collector.NumberOfCompactions(i) */ void VerifyCompactionStats(ColumnFamilyData& cfd, const CompactionStatsCollector& collector) { #ifndef NDEBUG InternalStats* internal_stats_ptr = cfd.internal_stats(); ASSERT_TRUE(internal_stats_ptr != nullptr); const std::vector& comp_stats = internal_stats_ptr->TEST_GetCompactionStats(); const int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); std::vector counts(num_of_reasons, 0); // Count the number of compactions caused by each CompactionReason across // all levels. for (const auto& stat : comp_stats) { int sum = 0; for (int i = 0; i < num_of_reasons; i++) { counts[i] += stat.counts[i]; sum += stat.counts[i]; } ASSERT_EQ(sum, stat.count); } // Verify InternalStats bookkeeping matches that of CompactionStatsCollector, // assuming that all compactions complete. for (int i = 0; i < num_of_reasons; i++) { ASSERT_EQ(collector.NumberOfCompactions(static_cast(i)), counts[i]); } #endif /* NDEBUG */ } const SstFileMetaData* PickFileRandomly( const ColumnFamilyMetaData& cf_meta, Random* rand, int* level = nullptr) { auto file_id = rand->Uniform(static_cast( cf_meta.file_count)) + 1; for (auto& level_meta : cf_meta.levels) { if (file_id <= level_meta.files.size()) { if (level != nullptr) { *level = level_meta.level; } auto result = rand->Uniform(file_id); return &(level_meta.files[result]); } file_id -= static_cast(level_meta.files.size()); } assert(false); return nullptr; } } // anonymous namespace #ifndef ROCKSDB_VALGRIND_RUN // All the TEST_P tests run once with sub_compactions disabled (i.e. // options.max_subcompactions = 1) and once with it enabled TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) { for (int tid = 0; tid < 3; ++tid) { uint64_t db_size[2]; Options options = DeletionTriggerOptions(CurrentOptions()); options.max_subcompactions = max_subcompactions_; if (tid == 1) { // the following only disable stats update in DB::Open() // and should not affect the result of this test. options.skip_stats_update_on_db_open = true; } else if (tid == 2) { // third pass with universal compaction options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; } DestroyAndReopen(options); Random rnd(301); const int kTestSize = kCDTKeysPerBuffer * 1024; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[0] = Size(Key(0), Key(kTestSize - 1)); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[1] = Size(Key(0), Key(kTestSize - 1)); // must have much smaller db size. ASSERT_GT(db_size[0] / 3, db_size[1]); } } #endif // ROCKSDB_VALGRIND_RUN TEST_P(DBCompactionTestWithParam, CompactionsPreserveDeletes) { // For each options type we test following // - Enable preserve_deletes // - write bunch of keys and deletes // - Set start_seqnum to the beginning; compact; check that keys are present // - rewind start_seqnum way forward; compact; check that keys are gone for (int tid = 0; tid < 3; ++tid) { Options options = DeletionTriggerOptions(CurrentOptions()); options.max_subcompactions = max_subcompactions_; options.preserve_deletes=true; options.num_levels = 2; if (tid == 1) { options.skip_stats_update_on_db_open = true; } else if (tid == 2) { // third pass with universal compaction options.compaction_style = kCompactionStyleUniversal; } DestroyAndReopen(options); Random rnd(301); // highlight the default; all deletes should be preserved SetPreserveDeletesSequenceNumber(0); const int kTestSize = kCDTKeysPerBuffer; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } // to ensure we tackle all tombstones CompactRangeOptions cro; cro.change_level = true; cro.target_level = 2; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; dbfull()->TEST_WaitForFlushMemTable(); dbfull()->CompactRange(cro, nullptr, nullptr); // check that normal user iterator doesn't see anything Iterator* db_iter = dbfull()->NewIterator(ReadOptions()); int i = 0; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { i++; } ASSERT_EQ(i, 0); delete db_iter; // check that iterator that sees internal keys sees tombstones ReadOptions ro; ro.iter_start_seqnum=1; db_iter = dbfull()->NewIterator(ro); i = 0; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { i++; } ASSERT_EQ(i, 4); delete db_iter; // now all deletes should be gone SetPreserveDeletesSequenceNumber(100000000); dbfull()->CompactRange(cro, nullptr, nullptr); db_iter = dbfull()->NewIterator(ro); i = 0; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { i++; } ASSERT_EQ(i, 0); delete db_iter; } } TEST_F(DBCompactionTest, SkipStatsUpdateTest) { // This test verify UpdateAccumulatedStats is not on // if options.skip_stats_update_on_db_open = true // The test will need to be updated if the internal behavior changes. Options options = DeletionTriggerOptions(CurrentOptions()); options.disable_auto_compactions = true; options.env = env_; DestroyAndReopen(options); Random rnd(301); const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } ASSERT_OK(Flush()); Close(); int update_acc_stats_called = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionStorageInfo::UpdateAccumulatedStats", [&](void* /* arg */) { ++update_acc_stats_called; }); SyncPoint::GetInstance()->EnableProcessing(); // Reopen the DB with stats-update disabled options.skip_stats_update_on_db_open = true; options.max_open_files = 20; Reopen(options); ASSERT_EQ(update_acc_stats_called, 0); // Repeat the reopen process, but this time we enable // stats-update. options.skip_stats_update_on_db_open = false; Reopen(options); ASSERT_GT(update_acc_stats_called, 0); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, TestTableReaderForCompaction) { Options options = CurrentOptions(); options.env = env_; options.new_table_reader_for_compaction_inputs = true; options.max_open_files = 20; options.level0_file_num_compaction_trigger = 3; DestroyAndReopen(options); Random rnd(301); int num_table_cache_lookup = 0; int num_new_table_reader = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "TableCache::FindTable:0", [&](void* arg) { assert(arg != nullptr); bool no_io = *(reinterpret_cast(arg)); if (!no_io) { // filter out cases for table properties queries. num_table_cache_lookup++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "TableCache::GetTableReader:0", [&](void* /*arg*/) { num_new_table_reader++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); for (int k = 0; k < options.level0_file_num_compaction_trigger; ++k) { ASSERT_OK(Put(Key(k), Key(k))); ASSERT_OK(Put(Key(10 - k), "bar")); if (k < options.level0_file_num_compaction_trigger - 1) { num_table_cache_lookup = 0; Flush(); dbfull()->TEST_WaitForCompact(); // preloading iterator issues one table cache lookup and create // a new table reader, if not preloaded. int old_num_table_cache_lookup = num_table_cache_lookup; ASSERT_GE(num_table_cache_lookup, 1); ASSERT_EQ(num_new_table_reader, 1); num_table_cache_lookup = 0; num_new_table_reader = 0; ASSERT_EQ(Key(k), Get(Key(k))); // lookup iterator from table cache and no need to create a new one. ASSERT_EQ(old_num_table_cache_lookup + num_table_cache_lookup, 2); ASSERT_EQ(num_new_table_reader, 0); } } num_table_cache_lookup = 0; num_new_table_reader = 0; Flush(); dbfull()->TEST_WaitForCompact(); // Preloading iterator issues one table cache lookup and creates // a new table reader. One file is created for flush and one for compaction. // Compaction inputs make no table cache look-up for data/range deletion // iterators // May preload table cache too. ASSERT_GE(num_table_cache_lookup, 2); int old_num_table_cache_lookup2 = num_table_cache_lookup; // Create new iterator for: // (1) 1 for verifying flush results // (2) 1 for verifying compaction results. // (3) New TableReaders will not be created for compaction inputs ASSERT_EQ(num_new_table_reader, 2); num_table_cache_lookup = 0; num_new_table_reader = 0; ASSERT_EQ(Key(1), Get(Key(1))); ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 5); ASSERT_EQ(num_new_table_reader, 0); num_table_cache_lookup = 0; num_new_table_reader = 0; CompactRangeOptions cro; cro.change_level = true; cro.target_level = 2; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; db_->CompactRange(cro, nullptr, nullptr); // Only verifying compaction outputs issues one table cache lookup // for both data block and range deletion block). // May preload table cache too. ASSERT_GE(num_table_cache_lookup, 1); old_num_table_cache_lookup2 = num_table_cache_lookup; // One for verifying compaction results. // No new iterator created for compaction. ASSERT_EQ(num_new_table_reader, 1); num_table_cache_lookup = 0; num_new_table_reader = 0; ASSERT_EQ(Key(1), Get(Key(1))); ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 3); ASSERT_EQ(num_new_table_reader, 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) { for (int tid = 0; tid < 2; ++tid) { uint64_t db_size[3]; Options options = DeletionTriggerOptions(CurrentOptions()); options.max_subcompactions = max_subcompactions_; if (tid == 1) { // second pass with universal compaction options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; } DestroyAndReopen(options); Random rnd(301); // round 1 --- insert key/value pairs. const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[0] = Size(Key(0), Key(kTestSize - 1)); Close(); // round 2 --- disable auto-compactions and issue deletions. options.create_if_missing = false; options.disable_auto_compactions = true; Reopen(options); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } db_size[1] = Size(Key(0), Key(kTestSize - 1)); Close(); // as auto_compaction is off, we shouldn't see too much reduce // in db size. ASSERT_LT(db_size[0] / 3, db_size[1]); // round 3 --- reopen db with auto_compaction on and see if // deletion compensation still work. options.disable_auto_compactions = false; Reopen(options); // insert relatively small amount of data to trigger auto compaction. for (int k = 0; k < kTestSize / 10; ++k) { ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[2] = Size(Key(0), Key(kTestSize - 1)); // this time we're expecting significant drop in size. ASSERT_GT(db_size[0] / 3, db_size[2]); } } TEST_F(DBCompactionTest, CompactRangeBottomPri) { ASSERT_OK(Put(Key(50), "")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(100), "")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(200), "")); ASSERT_OK(Flush()); { CompactRangeOptions cro; cro.change_level = true; cro.target_level = 2; dbfull()->CompactRange(cro, nullptr, nullptr); } ASSERT_EQ("0,0,3", FilesPerLevel(0)); ASSERT_OK(Put(Key(1), "")); ASSERT_OK(Put(Key(199), "")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(2), "")); ASSERT_OK(Put(Key(199), "")); ASSERT_OK(Flush()); ASSERT_EQ("2,0,3", FilesPerLevel(0)); // Now we have 2 L0 files, and 3 L2 files, and a manual compaction will // be triggered. // Two compaction jobs will run. One compacts 2 L0 files in Low Pri Pool // and one compact to L2 in bottom pri pool. int low_pri_count = 0; int bottom_pri_count = 0; SyncPoint::GetInstance()->SetCallBack( "ThreadPoolImpl::Impl::BGThread:BeforeRun", [&](void* arg) { Env::Priority* pri = reinterpret_cast(arg); // First time is low pri pool in the test case. if (low_pri_count == 0 && bottom_pri_count == 0) { ASSERT_EQ(Env::Priority::LOW, *pri); } if (*pri == Env::Priority::LOW) { low_pri_count++; } else { bottom_pri_count++; } }); SyncPoint::GetInstance()->EnableProcessing(); env_->SetBackgroundThreads(1, Env::Priority::BOTTOM); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(1, low_pri_count); ASSERT_EQ(1, bottom_pri_count); ASSERT_EQ("0,0,2", FilesPerLevel(0)); // Recompact bottom most level uses bottom pool CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; dbfull()->CompactRange(cro, nullptr, nullptr); ASSERT_EQ(1, low_pri_count); ASSERT_EQ(2, bottom_pri_count); env_->SetBackgroundThreads(0, Env::Priority::BOTTOM); dbfull()->CompactRange(cro, nullptr, nullptr); // Low pri pool is used if bottom pool has size 0. ASSERT_EQ(2, low_pri_count); ASSERT_EQ(2, bottom_pri_count); SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, DisableStatsUpdateReopen) { uint64_t db_size[3]; for (int test = 0; test < 2; ++test) { Options options = DeletionTriggerOptions(CurrentOptions()); options.skip_stats_update_on_db_open = (test == 0); env_->random_read_counter_.Reset(); DestroyAndReopen(options); Random rnd(301); // round 1 --- insert key/value pairs. const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { values.push_back(RandomString(&rnd, kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[0] = Size(Key(0), Key(kTestSize - 1)); Close(); // round 2 --- disable auto-compactions and issue deletions. options.create_if_missing = false; options.disable_auto_compactions = true; env_->random_read_counter_.Reset(); Reopen(options); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } db_size[1] = Size(Key(0), Key(kTestSize - 1)); Close(); // as auto_compaction is off, we shouldn't see too much reduce // in db size. ASSERT_LT(db_size[0] / 3, db_size[1]); // round 3 --- reopen db with auto_compaction on and see if // deletion compensation still work. options.disable_auto_compactions = false; Reopen(options); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); db_size[2] = Size(Key(0), Key(kTestSize - 1)); if (options.skip_stats_update_on_db_open) { // If update stats on DB::Open is disable, we don't expect // deletion entries taking effect. ASSERT_LT(db_size[0] / 3, db_size[2]); } else { // Otherwise, we should see a significant drop in db size. ASSERT_GT(db_size[0] / 3, db_size[2]); } } } TEST_P(DBCompactionTestWithParam, CompactionTrigger) { const int kNumKeysPerFile = 100; Options options = CurrentOptions(); options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.num_levels = 3; options.level0_file_num_compaction_trigger = 3; options.max_subcompactions = max_subcompactions_; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { std::vector values; // Write 100KB (100 values, each 1K) for (int i = 0; i < kNumKeysPerFile; i++) { values.push_back(RandomString(&rnd, 990)); ASSERT_OK(Put(1, Key(i), values[i])); } // put extra key to trigger flush ASSERT_OK(Put(1, "", "")); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); ASSERT_EQ(NumTableFilesAtLevel(0, 1), num + 1); } // generate one more file in level-0, and should trigger level-0 compaction std::vector values; for (int i = 0; i < kNumKeysPerFile; i++) { values.push_back(RandomString(&rnd, 990)); ASSERT_OK(Put(1, Key(i), values[i])); } // put extra key to trigger flush ASSERT_OK(Put(1, "", "")); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1); } TEST_F(DBCompactionTest, BGCompactionsAllowed) { // Create several column families. Make compaction triggers in all of them // and see number of compactions scheduled to be less than allowed. const int kNumKeysPerFile = 100; Options options = CurrentOptions(); options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.num_levels = 3; // Should speed up compaction when there are 4 files. options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 20; options.soft_pending_compaction_bytes_limit = 1 << 30; // Infinitely large options.max_background_compactions = 3; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); // Block all threads in thread pool. const size_t kTotalTasks = 4; env_->SetBackgroundThreads(4, Env::LOW); test::SleepingBackgroundTask sleeping_tasks[kTotalTasks]; for (size_t i = 0; i < kTotalTasks; i++) { env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_tasks[i], Env::Priority::LOW); sleeping_tasks[i].WaitUntilSleeping(); } CreateAndReopenWithCF({"one", "two", "three"}, options); Random rnd(301); for (int cf = 0; cf < 4; cf++) { for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { for (int i = 0; i < kNumKeysPerFile; i++) { ASSERT_OK(Put(cf, Key(i), "")); } // put extra key to trigger flush ASSERT_OK(Put(cf, "", "")); dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); } } // Now all column families qualify compaction but only one should be // scheduled, because no column family hits speed up condition. ASSERT_EQ(1u, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); // Create two more files for one column family, which triggers speed up // condition, three compactions will be scheduled. for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { for (int i = 0; i < kNumKeysPerFile; i++) { ASSERT_OK(Put(2, Key(i), "")); } // put extra key to trigger flush ASSERT_OK(Put(2, "", "")); dbfull()->TEST_WaitForFlushMemTable(handles_[2]); ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1, NumTableFilesAtLevel(0, 2)); } ASSERT_EQ(3U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); // Unblock all threads to unblock all compactions. for (size_t i = 0; i < kTotalTasks; i++) { sleeping_tasks[i].WakeUp(); sleeping_tasks[i].WaitUntilDone(); } dbfull()->TEST_WaitForCompact(); // Verify number of compactions allowed will come back to 1. for (size_t i = 0; i < kTotalTasks; i++) { sleeping_tasks[i].Reset(); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_tasks[i], Env::Priority::LOW); sleeping_tasks[i].WaitUntilSleeping(); } for (int cf = 0; cf < 4; cf++) { for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { for (int i = 0; i < kNumKeysPerFile; i++) { ASSERT_OK(Put(cf, Key(i), "")); } // put extra key to trigger flush ASSERT_OK(Put(cf, "", "")); dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); } } // Now all column families qualify compaction but only one should be // scheduled, because no column family hits speed up condition. ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); for (size_t i = 0; i < kTotalTasks; i++) { sleeping_tasks[i].WakeUp(); sleeping_tasks[i].WaitUntilDone(); } } TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) { Options options = CurrentOptions(); options.write_buffer_size = 100000000; // Large write buffer options.max_subcompactions = max_subcompactions_; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); // Write 8MB (80 values, each 100K) ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); std::vector values; for (int i = 0; i < 80; i++) { values.push_back(RandomString(&rnd, 100000)); ASSERT_OK(Put(1, Key(i), values[i])); } // Reopening moves updates to level-0 ReopenWithColumnFamilies({"default", "pikachu"}, options); dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], true /* disallow trivial move */); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_GT(NumTableFilesAtLevel(1, 1), 1); for (int i = 0; i < 80; i++) { ASSERT_EQ(Get(1, Key(i)), values[i]); } } TEST_F(DBCompactionTest, MinorCompactionsHappen) { do { Options options = CurrentOptions(); options.write_buffer_size = 10000; CreateAndReopenWithCF({"pikachu"}, options); const int N = 500; int starting_num_tables = TotalTableFiles(1); for (int i = 0; i < N; i++) { ASSERT_OK(Put(1, Key(i), Key(i) + std::string(1000, 'v'))); } int ending_num_tables = TotalTableFiles(1); ASSERT_GT(ending_num_tables, starting_num_tables); for (int i = 0; i < N; i++) { ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i))); } ReopenWithColumnFamilies({"default", "pikachu"}, options); for (int i = 0; i < N; i++) { ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i))); } } while (ChangeCompactOptions()); } TEST_F(DBCompactionTest, UserKeyCrossFile1) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.level0_file_num_compaction_trigger = 3; DestroyAndReopen(options); // create first file and flush to l0 Put("4", "A"); Put("3", "A"); Flush(); dbfull()->TEST_WaitForFlushMemTable(); Put("2", "A"); Delete("3"); Flush(); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ("NOT_FOUND", Get("3")); // move both files down to l1 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ("NOT_FOUND", Get("3")); for (int i = 0; i < 3; i++) { Put("2", "B"); Flush(); dbfull()->TEST_WaitForFlushMemTable(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("NOT_FOUND", Get("3")); } TEST_F(DBCompactionTest, UserKeyCrossFile2) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.level0_file_num_compaction_trigger = 3; DestroyAndReopen(options); // create first file and flush to l0 Put("4", "A"); Put("3", "A"); Flush(); dbfull()->TEST_WaitForFlushMemTable(); Put("2", "A"); SingleDelete("3"); Flush(); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ("NOT_FOUND", Get("3")); // move both files down to l1 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ("NOT_FOUND", Get("3")); for (int i = 0; i < 3; i++) { Put("2", "B"); Flush(); dbfull()->TEST_WaitForFlushMemTable(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("NOT_FOUND", Get("3")); } TEST_F(DBCompactionTest, ZeroSeqIdCompaction) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.level0_file_num_compaction_trigger = 3; FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); // compaction options CompactionOptions compact_opt; compact_opt.compression = kNoCompression; compact_opt.output_file_size_limit = 4096; const size_t key_len = static_cast(compact_opt.output_file_size_limit) / 5; DestroyAndReopen(options); std::vector snaps; // create first file and flush to l0 for (auto& key : {"1", "2", "3", "3", "3", "3"}) { Put(key, std::string(key_len, 'A')); snaps.push_back(dbfull()->GetSnapshot()); } Flush(); dbfull()->TEST_WaitForFlushMemTable(); // create second file and flush to l0 for (auto& key : {"3", "4", "5", "6", "7", "8"}) { Put(key, std::string(key_len, 'A')); snaps.push_back(dbfull()->GetSnapshot()); } Flush(); dbfull()->TEST_WaitForFlushMemTable(); // move both files down to l1 dbfull()->CompactFiles(compact_opt, collector->GetFlushedFiles(), 1); // release snap so that first instance of key(3) can have seqId=0 for (auto snap : snaps) { dbfull()->ReleaseSnapshot(snap); } // create 3 files in l0 so to trigger compaction for (int i = 0; i < options.level0_file_num_compaction_trigger; i++) { Put("2", std::string(1, 'A')); Flush(); dbfull()->TEST_WaitForFlushMemTable(); } dbfull()->TEST_WaitForCompact(); ASSERT_OK(Put("", "")); } TEST_F(DBCompactionTest, ManualCompactionUnknownOutputSize) { // github issue #2249 Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.level0_file_num_compaction_trigger = 3; DestroyAndReopen(options); // create two files in l1 that we can compact for (int i = 0; i < 2; ++i) { for (int j = 0; j < options.level0_file_num_compaction_trigger; j++) { // make l0 files' ranges overlap to avoid trivial move Put(std::to_string(2 * i), std::string(1, 'A')); Put(std::to_string(2 * i + 1), std::string(1, 'A')); Flush(); dbfull()->TEST_WaitForFlushMemTable(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 0), i + 1); } ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); ASSERT_EQ(2, cf_meta.levels[1].files.size()); std::vector input_filenames; for (const auto& sst_file : cf_meta.levels[1].files) { input_filenames.push_back(sst_file.name); } // note CompactionOptions::output_file_size_limit is unset. CompactionOptions compact_opt; compact_opt.compression = kNoCompression; dbfull()->CompactFiles(compact_opt, input_filenames, 1); } // Check that writes done during a memtable compaction are recovered // if the database is shutdown during the memtable compaction. TEST_F(DBCompactionTest, RecoverDuringMemtableCompaction) { do { Options options = CurrentOptions(); options.env = env_; CreateAndReopenWithCF({"pikachu"}, options); // Trigger a long memtable compaction and reopen the database during it ASSERT_OK(Put(1, "foo", "v1")); // Goes to 1st log file ASSERT_OK(Put(1, "big1", std::string(10000000, 'x'))); // Fills memtable ASSERT_OK(Put(1, "big2", std::string(1000, 'y'))); // Triggers compaction ASSERT_OK(Put(1, "bar", "v2")); // Goes to new log file ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v2", Get(1, "bar")); ASSERT_EQ(std::string(10000000, 'x'), Get(1, "big1")); ASSERT_EQ(std::string(1000, 'y'), Get(1, "big2")); } while (ChangeOptions()); } TEST_P(DBCompactionTestWithParam, TrivialMoveOneFile) { int32_t trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.write_buffer_size = 100000000; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); int32_t num_keys = 80; int32_t value_size = 100 * 1024; // 100 KB Random rnd(301); std::vector values; for (int i = 0; i < num_keys; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } // Reopening moves updates to L0 Reopen(options); ASSERT_EQ(NumTableFilesAtLevel(0, 0), 1); // 1 file in L0 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // 0 files in L1 std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 1U); LiveFileMetaData level0_file = metadata[0]; // L0 file meta CompactRangeOptions cro; cro.exclusive_manual_compaction = exclusive_manual_compaction_; // Compaction will initiate a trivial move from L0 to L1 dbfull()->CompactRange(cro, nullptr, nullptr); // File moved From L0 to L1 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); // 0 files in L0 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1); // 1 file in L1 metadata.clear(); db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 1U); ASSERT_EQ(metadata[0].name /* level1_file.name */, level0_file.name); ASSERT_EQ(metadata[0].size /* level1_file.size */, level0_file.size); for (int i = 0; i < num_keys; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } ASSERT_EQ(trivial_move, 1); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, TrivialMoveNonOverlappingFiles) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.disable_auto_compactions = true; options.write_buffer_size = 10 * 1024 * 1024; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); // non overlapping ranges std::vector> ranges = { {100, 199}, {300, 399}, {0, 99}, {200, 299}, {600, 699}, {400, 499}, {500, 550}, {551, 599}, }; int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::map values; for (size_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { values[j] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(j), values[j])); } ASSERT_OK(Flush()); } int32_t level0_files = NumTableFilesAtLevel(0, 0); ASSERT_EQ(level0_files, ranges.size()); // Multiple files in L0 ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1 CompactRangeOptions cro; cro.exclusive_manual_compaction = exclusive_manual_compaction_; // Since data is non-overlapping we expect compaction to initiate // a trivial move db_->CompactRange(cro, nullptr, nullptr); // We expect that all the files were trivially moved from L0 to L1 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 0) /* level1_files */, level0_files); for (size_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { ASSERT_EQ(Get(Key(j)), values[j]); } } ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); trivial_move = 0; non_trivial_move = 0; values.clear(); DestroyAndReopen(options); // Same ranges as above but overlapping ranges = { {100, 199}, {300, 399}, {0, 99}, {200, 299}, {600, 699}, {400, 499}, {500, 560}, // this range overlap with the next one {551, 599}, }; for (size_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { values[j] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(j), values[j])); } ASSERT_OK(Flush()); } db_->CompactRange(cro, nullptr, nullptr); for (size_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { ASSERT_EQ(Get(Key(j)), values[j]); } } ASSERT_EQ(trivial_move, 0); ASSERT_EQ(non_trivial_move, 1); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, TrivialMoveTargetLevel) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.disable_auto_compactions = true; options.write_buffer_size = 10 * 1024 * 1024; options.num_levels = 7; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB // Add 2 non-overlapping files Random rnd(301); std::map values; // file 1 [0 => 300] for (int32_t i = 0; i <= 300; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [600 => 700] for (int32_t i = 600; i <= 700; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // 2 files in L0 ASSERT_EQ("2", FilesPerLevel(0)); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 6; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); // 2 files in L6 ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); for (int32_t i = 0; i <= 300; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } for (int32_t i = 600; i <= 700; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } } TEST_P(DBCompactionTestWithParam, ManualCompactionPartial) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial_move++; }); bool first = true; // Purpose of dependencies: // 4 -> 1: ensure the order of two non-trivial compactions // 5 -> 2 and 5 -> 3: ensure we do a check before two non-trivial compactions // are installed ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBCompaction::ManualPartial:4", "DBCompaction::ManualPartial:1"}, {"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:2"}, {"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (first) { first = false; TEST_SYNC_POINT("DBCompaction::ManualPartial:4"); TEST_SYNC_POINT("DBCompaction::ManualPartial:3"); } else { // second non-trivial compaction TEST_SYNC_POINT("DBCompaction::ManualPartial:2"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.write_buffer_size = 10 * 1024 * 1024; options.num_levels = 7; options.max_subcompactions = max_subcompactions_; options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 3; options.target_file_size_base = 1 << 23; // 8 MB DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB // Add 2 non-overlapping files Random rnd(301); std::map values; // file 1 [0 => 100] for (int32_t i = 0; i < 100; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [100 => 300] for (int32_t i = 100; i < 300; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // 2 files in L0 ASSERT_EQ("2", FilesPerLevel(0)); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 6; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; // Trivial move the two non-overlapping files to level 6 ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); // 2 files in L6 ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); // file 3 [ 0 => 200] for (int32_t i = 0; i < 200; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // 1 files in L0 ASSERT_EQ("1,0,0,0,0,0,2", FilesPerLevel(0)); ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false)); ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, false)); ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, nullptr, false)); ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, false)); ASSERT_OK(dbfull()->TEST_CompactRange(4, nullptr, nullptr, nullptr, false)); // 2 files in L6, 1 file in L5 ASSERT_EQ("0,0,0,0,0,1,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 6); ASSERT_EQ(non_trivial_move, 0); ROCKSDB_NAMESPACE::port::Thread threads([&] { compact_options.change_level = false; compact_options.exclusive_manual_compaction = false; std::string begin_string = Key(0); std::string end_string = Key(199); Slice begin(begin_string); Slice end(end_string); // First non-trivial compaction is triggered ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); }); TEST_SYNC_POINT("DBCompaction::ManualPartial:1"); // file 4 [300 => 400) for (int32_t i = 300; i <= 400; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 5 [400 => 500) for (int32_t i = 400; i <= 500; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 6 [500 => 600) for (int32_t i = 500; i <= 600; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } // Second non-trivial compaction is triggered ASSERT_OK(Flush()); // Before two non-trivial compactions are installed, there are 3 files in L0 ASSERT_EQ("3,0,0,0,0,1,2", FilesPerLevel(0)); TEST_SYNC_POINT("DBCompaction::ManualPartial:5"); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); // After two non-trivial compactions are installed, there is 1 file in L6, and // 1 file in L1 ASSERT_EQ("0,1,0,0,0,0,1", FilesPerLevel(0)); threads.join(); for (int32_t i = 0; i < 600; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } } // Disable as the test is flaky. TEST_F(DBCompactionTest, DISABLED_ManualPartialFill) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial_move++; }); bool first = true; bool second = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBCompaction::PartialFill:4", "DBCompaction::PartialFill:1"}, {"DBCompaction::PartialFill:2", "DBCompaction::PartialFill:3"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { if (first) { TEST_SYNC_POINT("DBCompaction::PartialFill:4"); first = false; TEST_SYNC_POINT("DBCompaction::PartialFill:3"); } else if (second) { } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.write_buffer_size = 10 * 1024 * 1024; options.max_bytes_for_level_multiplier = 2; options.num_levels = 4; options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 3; DestroyAndReopen(options); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); int32_t value_size = 10 * 1024; // 10 KB // Add 2 non-overlapping files Random rnd(301); std::map values; // file 1 [0 => 100] for (int32_t i = 0; i < 100; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [100 => 300] for (int32_t i = 100; i < 300; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // 2 files in L0 ASSERT_EQ("2", FilesPerLevel(0)); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); // 2 files in L2 ASSERT_EQ("0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); // file 3 [ 0 => 200] for (int32_t i = 0; i < 200; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // 2 files in L2, 1 in L0 ASSERT_EQ("1,0,2", FilesPerLevel(0)); ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false)); // 2 files in L2, 1 in L1 ASSERT_EQ("0,1,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 2); ASSERT_EQ(non_trivial_move, 0); ROCKSDB_NAMESPACE::port::Thread threads([&] { compact_options.change_level = false; compact_options.exclusive_manual_compaction = false; std::string begin_string = Key(0); std::string end_string = Key(199); Slice begin(begin_string); Slice end(end_string); ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); }); TEST_SYNC_POINT("DBCompaction::PartialFill:1"); // Many files 4 [300 => 4300) for (int32_t i = 0; i <= 5; i++) { for (int32_t j = 300; j < 4300; j++) { if (j == 2300) { ASSERT_OK(Flush()); dbfull()->TEST_WaitForFlushMemTable(); } values[j] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(j), values[j])); } } // Verify level sizes uint64_t target_size = 4 * options.max_bytes_for_level_base; for (int32_t i = 1; i < options.num_levels; i++) { ASSERT_LE(SizeAtLevel(i), target_size); target_size = static_cast(target_size * options.max_bytes_for_level_multiplier); } TEST_SYNC_POINT("DBCompaction::PartialFill:2"); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); threads.join(); for (int32_t i = 0; i < 4300; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } } TEST_F(DBCompactionTest, ManualCompactionWithUnorderedWrite) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL", "DBCompactionTest::ManualCompactionWithUnorderedWrite:WaitWriteWAL"}, {"DBImpl::WaitForPendingWrites:BeforeBlock", "DBImpl::WriteImpl:BeforeUnorderedWriteMemtable"}}); Options options = CurrentOptions(); options.unordered_write = true; DestroyAndReopen(options); Put("foo", "v1"); ASSERT_OK(Flush()); Put("bar", "v1"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); port::Thread writer([&]() { Put("foo", "v2"); }); TEST_SYNC_POINT( "DBCompactionTest::ManualCompactionWithUnorderedWrite:WaitWriteWAL"); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); writer.join(); ASSERT_EQ(Get("foo"), "v2"); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); Reopen(options); ASSERT_EQ(Get("foo"), "v2"); } TEST_F(DBCompactionTest, DeleteFileRange) { Options options = CurrentOptions(); options.write_buffer_size = 10 * 1024 * 1024; options.max_bytes_for_level_multiplier = 2; options.num_levels = 4; options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 3; DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB // Add 2 non-overlapping files Random rnd(301); std::map values; // file 1 [0 => 100] for (int32_t i = 0; i < 100; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [100 => 300] for (int32_t i = 100; i < 300; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // 2 files in L0 ASSERT_EQ("2", FilesPerLevel(0)); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); // 2 files in L2 ASSERT_EQ("0,0,2", FilesPerLevel(0)); // file 3 [ 0 => 200] for (int32_t i = 0; i < 200; i++) { values[i] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // Many files 4 [300 => 4300) for (int32_t i = 0; i <= 5; i++) { for (int32_t j = 300; j < 4300; j++) { if (j == 2300) { ASSERT_OK(Flush()); dbfull()->TEST_WaitForFlushMemTable(); } values[j] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(j), values[j])); } } ASSERT_OK(Flush()); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); // Verify level sizes uint64_t target_size = 4 * options.max_bytes_for_level_base; for (int32_t i = 1; i < options.num_levels; i++) { ASSERT_LE(SizeAtLevel(i), target_size); target_size = static_cast(target_size * options.max_bytes_for_level_multiplier); } size_t old_num_files = CountFiles(); std::string begin_string = Key(1000); std::string end_string = Key(2000); Slice begin(begin_string); Slice end(end_string); ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end)); int32_t deleted_count = 0; for (int32_t i = 0; i < 4300; i++) { if (i < 1000 || i > 2000) { ASSERT_EQ(Get(Key(i)), values[i]); } else { ReadOptions roptions; std::string result; Status s = db_->Get(roptions, Key(i), &result); ASSERT_TRUE(s.IsNotFound() || s.ok()); if (s.IsNotFound()) { deleted_count++; } } } ASSERT_GT(deleted_count, 0); begin_string = Key(5000); end_string = Key(6000); Slice begin1(begin_string); Slice end1(end_string); // Try deleting files in range which contain no keys ASSERT_OK( DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin1, &end1)); // Push data from level 0 to level 1 to force all data to be deleted // Note that we don't delete level 0 files compact_options.change_level = true; compact_options.target_level = 1; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); dbfull()->TEST_WaitForCompact(); ASSERT_OK( DeleteFilesInRange(db_, db_->DefaultColumnFamily(), nullptr, nullptr)); int32_t deleted_count2 = 0; for (int32_t i = 0; i < 4300; i++) { ReadOptions roptions; std::string result; Status s = db_->Get(roptions, Key(i), &result); ASSERT_TRUE(s.IsNotFound()); deleted_count2++; } ASSERT_GT(deleted_count2, deleted_count); size_t new_num_files = CountFiles(); ASSERT_GT(old_num_files, new_num_files); } TEST_F(DBCompactionTest, DeleteFilesInRanges) { Options options = CurrentOptions(); options.write_buffer_size = 10 * 1024 * 1024; options.max_bytes_for_level_multiplier = 2; options.num_levels = 4; options.max_background_compactions = 3; options.disable_auto_compactions = true; DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::map values; // file [0 => 100), [100 => 200), ... [900, 1000) for (auto i = 0; i < 10; i++) { for (auto j = 0; j < 100; j++) { auto k = i * 100 + j; values[k] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(k), values[k])); } ASSERT_OK(Flush()); } ASSERT_EQ("10", FilesPerLevel(0)); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,10", FilesPerLevel(0)); // file [0 => 100), [200 => 300), ... [800, 900) for (auto i = 0; i < 10; i+=2) { for (auto j = 0; j < 100; j++) { auto k = i * 100 + j; ASSERT_OK(Put(Key(k), values[k])); } ASSERT_OK(Flush()); } ASSERT_EQ("5,0,10", FilesPerLevel(0)); ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); ASSERT_EQ("0,5,10", FilesPerLevel(0)); // Delete files in range [0, 299] (inclusive) { auto begin_str1 = Key(0), end_str1 = Key(100); auto begin_str2 = Key(100), end_str2 = Key(200); auto begin_str3 = Key(200), end_str3 = Key(299); Slice begin1(begin_str1), end1(end_str1); Slice begin2(begin_str2), end2(end_str2); Slice begin3(begin_str3), end3(end_str3); std::vector ranges; ranges.push_back(RangePtr(&begin1, &end1)); ranges.push_back(RangePtr(&begin2, &end2)); ranges.push_back(RangePtr(&begin3, &end3)); ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), ranges.data(), ranges.size())); ASSERT_EQ("0,3,7", FilesPerLevel(0)); // Keys [0, 300) should not exist. for (auto i = 0; i < 300; i++) { ReadOptions ropts; std::string result; auto s = db_->Get(ropts, Key(i), &result); ASSERT_TRUE(s.IsNotFound()); } for (auto i = 300; i < 1000; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } } // Delete files in range [600, 999) (exclusive) { auto begin_str1 = Key(600), end_str1 = Key(800); auto begin_str2 = Key(700), end_str2 = Key(900); auto begin_str3 = Key(800), end_str3 = Key(999); Slice begin1(begin_str1), end1(end_str1); Slice begin2(begin_str2), end2(end_str2); Slice begin3(begin_str3), end3(end_str3); std::vector ranges; ranges.push_back(RangePtr(&begin1, &end1)); ranges.push_back(RangePtr(&begin2, &end2)); ranges.push_back(RangePtr(&begin3, &end3)); ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), ranges.data(), ranges.size(), false)); ASSERT_EQ("0,1,4", FilesPerLevel(0)); // Keys [600, 900) should not exist. for (auto i = 600; i < 900; i++) { ReadOptions ropts; std::string result; auto s = db_->Get(ropts, Key(i), &result); ASSERT_TRUE(s.IsNotFound()); } for (auto i = 300; i < 600; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } for (auto i = 900; i < 1000; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } } // Delete all files. { RangePtr range; ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), &range, 1)); ASSERT_EQ("", FilesPerLevel(0)); for (auto i = 0; i < 1000; i++) { ReadOptions ropts; std::string result; auto s = db_->Get(ropts, Key(i), &result); ASSERT_TRUE(s.IsNotFound()); } } } TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) { // regression test for #2833: groups of files whose user-keys overlap at the // endpoints could be split by `DeleteFilesInRange`. This caused old data to // reappear, either because a new version of the key was removed, or a range // deletion was partially dropped. It could also cause non-overlapping // invariant to be violated if the files dropped by DeleteFilesInRange were // a subset of files that a range deletion spans. const int kNumL0Files = 2; const int kValSize = 8 << 10; // 8KB Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = kNumL0Files; options.target_file_size_base = 1 << 10; // 1KB DestroyAndReopen(options); // The snapshot prevents key 1 from having its old version dropped. The low // `target_file_size_base` ensures two keys will be in each output file. const Snapshot* snapshot = nullptr; Random rnd(301); // The value indicates which flush the key belonged to, which is enough // for us to determine the keys' relative ages. After L0 flushes finish, // files look like: // // File 0: 0 -> vals[0], 1 -> vals[0] // File 1: 1 -> vals[1], 2 -> vals[1] // // Then L0->L1 compaction happens, which outputs keys as follows: // // File 0: 0 -> vals[0], 1 -> vals[1] // File 1: 1 -> vals[0], 2 -> vals[1] // // DeleteFilesInRange shouldn't be allowed to drop just file 0, as that // would cause `1 -> vals[0]` (an older key) to reappear. std::string vals[kNumL0Files]; for (int i = 0; i < kNumL0Files; ++i) { vals[i] = RandomString(&rnd, kValSize); Put(Key(i), vals[i]); Put(Key(i + 1), vals[i]); Flush(); if (i == 0) { snapshot = db_->GetSnapshot(); } } dbfull()->TEST_WaitForCompact(); // Verify `DeleteFilesInRange` can't drop only file 0 which would cause // "1 -> vals[0]" to reappear. std::string begin_str = Key(0), end_str = Key(1); Slice begin = begin_str, end = end_str; ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end)); ASSERT_EQ(vals[1], Get(Key(1))); db_->ReleaseSnapshot(snapshot); } TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.write_buffer_size = 100000000; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::vector values; // File with keys [ 0 => 99 ] for (int i = 0; i < 100; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1", FilesPerLevel(0)); // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 3; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); // File with keys [ 100 => 199 ] for (int i = 100; i < 200; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); CompactRangeOptions cro; cro.exclusive_manual_compaction = exclusive_manual_compaction_; // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ASSERT_EQ("0,0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 4); ASSERT_EQ(non_trivial_move, 0); for (int i = 0; i < 200; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, LevelCompactionThirdPath) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 500 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; // options = CurrentOptions(options); std::vector filenames; env_->GetChildren(options.db_paths[1].path, &filenames); // Delete archival files. for (size_t i = 0; i < filenames.size(); ++i) { env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); } env_->DeleteDir(options.db_paths[1].path); Reopen(options); Random rnd(301); int key_idx = 0; // First three 110KB files are not going to second path. // After that, (100K, 200K) for (int num = 0; num < 3; num++) { GenerateNewFile(&rnd, &key_idx); } // Another 110KB triggers a compaction to 400K file to fill up first path GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(3, GetSstFileCount(options.db_paths[1].path)); // (1, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4", FilesPerLevel(0)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 1) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,1", FilesPerLevel(0)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 2) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,2", FilesPerLevel(0)); ASSERT_EQ(2, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 3) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,3", FilesPerLevel(0)); ASSERT_EQ(3, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,4", FilesPerLevel(0)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 5) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,5", FilesPerLevel(0)); ASSERT_EQ(5, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 6) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,6", FilesPerLevel(0)); ASSERT_EQ(6, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 7) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,7", FilesPerLevel(0)); ASSERT_EQ(7, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 4, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,8", FilesPerLevel(0)); ASSERT_EQ(8, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } TEST_P(DBCompactionTestWithParam, LevelCompactionPathUse) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 500 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; // options = CurrentOptions(options); std::vector filenames; env_->GetChildren(options.db_paths[1].path, &filenames); // Delete archival files. for (size_t i = 0; i < filenames.size(); ++i) { env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); } env_->DeleteDir(options.db_paths[1].path); Reopen(options); Random rnd(301); int key_idx = 0; // Always gets compacted into 1 Level1 file, // 0/1 Level 0 file for (int num = 0; num < 3; num++) { key_idx = 0; GenerateNewFile(&rnd, &key_idx); } key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("0,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); key_idx = 0; GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,1", FilesPerLevel(0)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } TEST_P(DBCompactionTestWithParam, LevelCompactionCFPathUse) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 500 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; std::vector option_vector; option_vector.emplace_back(options); ColumnFamilyOptions cf_opt1(options), cf_opt2(options); // Configure CF1 specific paths. cf_opt1.cf_paths.emplace_back(dbname_ + "cf1", 500 * 1024); cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_2", 4 * 1024 * 1024); cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_3", 1024 * 1024 * 1024); option_vector.emplace_back(DBOptions(options), cf_opt1); CreateColumnFamilies({"one"},option_vector[1]); // Configura CF2 specific paths. cf_opt2.cf_paths.emplace_back(dbname_ + "cf2", 500 * 1024); cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_2", 4 * 1024 * 1024); cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_3", 1024 * 1024 * 1024); option_vector.emplace_back(DBOptions(options), cf_opt2); CreateColumnFamilies({"two"},option_vector[2]); ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); Random rnd(301); int key_idx = 0; int key_idx1 = 0; int key_idx2 = 0; auto generate_file = [&]() { GenerateNewFile(0, &rnd, &key_idx); GenerateNewFile(1, &rnd, &key_idx1); GenerateNewFile(2, &rnd, &key_idx2); }; auto check_sstfilecount = [&](int path_id, int expected) { ASSERT_EQ(expected, GetSstFileCount(options.db_paths[path_id].path)); ASSERT_EQ(expected, GetSstFileCount(cf_opt1.cf_paths[path_id].path)); ASSERT_EQ(expected, GetSstFileCount(cf_opt2.cf_paths[path_id].path)); }; auto check_filesperlevel = [&](const std::string& expected) { ASSERT_EQ(expected, FilesPerLevel(0)); ASSERT_EQ(expected, FilesPerLevel(1)); ASSERT_EQ(expected, FilesPerLevel(2)); }; auto check_getvalues = [&]() { for (int i = 0; i < key_idx; i++) { auto v = Get(0, Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } for (int i = 0; i < key_idx1; i++) { auto v = Get(1, Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } for (int i = 0; i < key_idx2; i++) { auto v = Get(2, Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } }; // Check that default column family uses db_paths. // And Column family "one" uses cf_paths. // First three 110KB files are not going to second path. // After that, (100K, 200K) for (int num = 0; num < 3; num++) { generate_file(); } // Another 110KB triggers a compaction to 400K file to fill up first path generate_file(); check_sstfilecount(1, 3); // (1, 4) generate_file(); check_filesperlevel("1,4"); check_sstfilecount(1, 4); check_sstfilecount(0, 1); // (1, 4, 1) generate_file(); check_filesperlevel("1,4,1"); check_sstfilecount(2, 1); check_sstfilecount(1, 4); check_sstfilecount(0, 1); // (1, 4, 2) generate_file(); check_filesperlevel("1,4,2"); check_sstfilecount(2, 2); check_sstfilecount(1, 4); check_sstfilecount(0, 1); check_getvalues(); ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); check_getvalues(); Destroy(options, true); } TEST_P(DBCompactionTestWithParam, ConvertCompactionStyle) { Random rnd(301); int max_key_level_insert = 200; int max_key_universal_insert = 600; // Stage 1: generate a db with level compaction Options options = CurrentOptions(); options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.num_levels = 4; options.level0_file_num_compaction_trigger = 3; options.max_bytes_for_level_base = 500 << 10; // 500KB options.max_bytes_for_level_multiplier = 1; options.target_file_size_base = 200 << 10; // 200KB options.target_file_size_multiplier = 1; options.max_subcompactions = max_subcompactions_; CreateAndReopenWithCF({"pikachu"}, options); for (int i = 0; i <= max_key_level_insert; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); ASSERT_GT(TotalTableFiles(1, 4), 1); int non_level0_num_files = 0; for (int i = 1; i < options.num_levels; i++) { non_level0_num_files += NumTableFilesAtLevel(i, 1); } ASSERT_GT(non_level0_num_files, 0); // Stage 2: reopen with universal compaction - should fail options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options = CurrentOptions(options); Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_TRUE(s.IsInvalidArgument()); // Stage 3: compact into a single file and move the file to level 0 options = CurrentOptions(); options.disable_auto_compactions = true; options.target_file_size_base = INT_MAX; options.target_file_size_multiplier = 1; options.max_bytes_for_level_base = INT_MAX; options.max_bytes_for_level_multiplier = 1; options.num_levels = 4; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 0; // cannot use kForceOptimized here because the compaction here is expected // to generate one output file compact_options.bottommost_level_compaction = BottommostLevelCompaction::kForce; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr); // Only 1 file in L0 ASSERT_EQ("1", FilesPerLevel(1)); // Stage 4: re-open in universal compaction style and do some db operations options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 4; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 3; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); options.num_levels = 1; ReopenWithColumnFamilies({"default", "pikachu"}, options); for (int i = max_key_level_insert / 2; i <= max_key_universal_insert; i++) { ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); } dbfull()->Flush(FlushOptions()); ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); for (int i = 1; i < options.num_levels; i++) { ASSERT_EQ(NumTableFilesAtLevel(i, 1), 0); } // verify keys inserted in both level compaction style and universal // compaction style std::string keys_in_db; Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { keys_in_db.append(iter->key().ToString()); keys_in_db.push_back(','); } delete iter; std::string expected_keys; for (int i = 0; i <= max_key_universal_insert; i++) { expected_keys.append(Key(i)); expected_keys.push_back(','); } ASSERT_EQ(keys_in_db, expected_keys); } TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_a) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "b", "v")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_OK(Delete(1, "b")); ASSERT_OK(Delete(1, "a")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_OK(Delete(1, "a")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "a", "v")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("(a->v)", Contents(1)); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish ASSERT_EQ("(a->v)", Contents(1)); } while (ChangeCompactOptions()); } TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_b) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Delete(1, "e"); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "c", "cv"); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "", ""); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "d", "dv"); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Put(1, "", ""); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); Delete(1, "d"); Delete(1, "b"); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("(->)(c->cv)", Contents(1)); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish ASSERT_EQ("(->)(c->cv)", Contents(1)); } while (ChangeCompactOptions()); } TEST_F(DBCompactionTest, ManualAutoRace) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BGWorkCompaction", "DBCompactionTest::ManualAutoRace:1"}, {"DBImpl::RunManualCompaction:WaitScheduled", "BackgroundCallCompaction:0"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put(1, "foo", ""); Put(1, "bar", ""); Flush(1); Put(1, "foo", ""); Put(1, "bar", ""); // Generate four files in CF 0, which should trigger an auto compaction Put("foo", ""); Put("bar", ""); Flush(); Put("foo", ""); Put("bar", ""); Flush(); Put("foo", ""); Put("bar", ""); Flush(); Put("foo", ""); Put("bar", ""); Flush(); // The auto compaction is scheduled but waited until here TEST_SYNC_POINT("DBCompactionTest::ManualAutoRace:1"); // The auto compaction will wait until the manual compaction is registerd // before processing so that it will be cancelled. dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ("0,1", FilesPerLevel(1)); // Eventually the cancelled compaction will be rescheduled and executed. dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,1", FilesPerLevel(0)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, ManualCompaction) { Options options = CurrentOptions(); options.max_subcompactions = max_subcompactions_; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); CreateAndReopenWithCF({"pikachu"}, options); // iter - 0 with 7 levels // iter - 1 with 3 levels for (int iter = 0; iter < 2; ++iter) { MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range falls before files Compact(1, "", "c"); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range falls after files Compact(1, "r", "z"); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range overlaps files Compact(1, "p", "q"); ASSERT_EQ("0,0,1", FilesPerLevel(1)); // Populate a different range MakeTables(3, "c", "e", 1); ASSERT_EQ("1,1,2", FilesPerLevel(1)); // Compact just the new range Compact(1, "b", "f"); ASSERT_EQ("0,0,2", FilesPerLevel(1)); // Compact all MakeTables(1, "a", "z", 1); ASSERT_EQ("1,0,2", FilesPerLevel(1)); uint64_t prev_block_cache_add = options.statistics->getTickerCount(BLOCK_CACHE_ADD); CompactRangeOptions cro; cro.exclusive_manual_compaction = exclusive_manual_compaction_; db_->CompactRange(cro, handles_[1], nullptr, nullptr); // Verify manual compaction doesn't fill block cache ASSERT_EQ(prev_block_cache_add, options.statistics->getTickerCount(BLOCK_CACHE_ADD)); ASSERT_EQ("0,0,1", FilesPerLevel(1)); if (iter == 0) { options = CurrentOptions(); options.num_levels = 3; options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); } } } TEST_P(DBCompactionTestWithParam, ManualLevelCompactionOutputPathId) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760); options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760); options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760); options.max_subcompactions = max_subcompactions_; CreateAndReopenWithCF({"pikachu"}, options); // iter - 0 with 7 levels // iter - 1 with 3 levels for (int iter = 0; iter < 2; ++iter) { for (int i = 0; i < 3; ++i) { ASSERT_OK(Put(1, "p", "begin")); ASSERT_OK(Put(1, "q", "end")); ASSERT_OK(Flush(1)); } ASSERT_EQ("3", FilesPerLevel(1)); ASSERT_EQ(3, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Compaction range falls before files Compact(1, "", "c"); ASSERT_EQ("3", FilesPerLevel(1)); // Compaction range falls after files Compact(1, "r", "z"); ASSERT_EQ("3", FilesPerLevel(1)); // Compaction range overlaps files Compact(1, "p", "q", 1); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,1", FilesPerLevel(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Populate a different range for (int i = 0; i < 3; ++i) { ASSERT_OK(Put(1, "c", "begin")); ASSERT_OK(Put(1, "e", "end")); ASSERT_OK(Flush(1)); } ASSERT_EQ("3,1", FilesPerLevel(1)); // Compact just the new range Compact(1, "b", "f", 1); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,2", FilesPerLevel(1)); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Compact all ASSERT_OK(Put(1, "a", "begin")); ASSERT_OK(Put(1, "z", "end")); ASSERT_OK(Flush(1)); ASSERT_EQ("1,2", FilesPerLevel(1)); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); CompactRangeOptions compact_options; compact_options.target_path_id = 1; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,1", FilesPerLevel(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); if (iter == 0) { DestroyAndReopen(options); options = CurrentOptions(); options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760); options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760); options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760); options.max_background_flushes = 1; options.num_levels = 3; options.create_if_missing = true; CreateAndReopenWithCF({"pikachu"}, options); } } } TEST_F(DBCompactionTest, FilesDeletedAfterCompaction) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v2")); Compact(1, "a", "z"); const size_t num_files = CountLiveFiles(); for (int i = 0; i < 10; i++) { ASSERT_OK(Put(1, "foo", "v2")); Compact(1, "a", "z"); } ASSERT_EQ(CountLiveFiles(), num_files); } while (ChangeCompactOptions()); } // Check level comapction with compact files TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) { const int kTestKeySize = 16; const int kTestValueSize = 984; const int kEntrySize = kTestKeySize + kTestValueSize; const int kEntriesPerBuffer = 100; Options options; options.create_if_missing = true; options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.compaction_style = kCompactionStyleLevel; options.target_file_size_base = options.write_buffer_size; options.max_bytes_for_level_base = options.target_file_size_base * 2; options.level0_stop_writes_trigger = 2; options.max_bytes_for_level_multiplier = 2; options.compression = kNoCompression; options.max_subcompactions = max_subcompactions_; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { ASSERT_OK(Put(1, ToString(key), RandomString(&rnd, kTestValueSize))); } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForCompact(); ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); int output_level = static_cast(cf_meta.levels.size()) - 1; for (int file_picked = 5; file_picked > 0; --file_picked) { std::set overlapping_file_names; std::vector compaction_input_file_names; for (int f = 0; f < file_picked; ++f) { int level = 0; auto file_meta = PickFileRandomly(cf_meta, &rnd, &level); compaction_input_file_names.push_back(file_meta->name); GetOverlappingFileNumbersForLevelCompaction( cf_meta, options.comparator, level, output_level, file_meta, &overlapping_file_names); } ASSERT_OK(dbfull()->CompactFiles( CompactionOptions(), handles_[1], compaction_input_file_names, output_level)); // Make sure all overlapping files do not exist after compaction dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); VerifyCompactionResult(cf_meta, overlapping_file_names); } // make sure all key-values are still there. for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { ASSERT_NE(Get(1, ToString(key)), "NOT_FOUND"); } } TEST_P(DBCompactionTestWithParam, PartialCompactionFailure) { Options options; const int kKeySize = 16; const int kKvSize = 1000; const int kKeysPerBuffer = 100; const int kNumL1Files = 5; options.create_if_missing = true; options.write_buffer_size = kKeysPerBuffer * kKvSize; options.max_write_buffer_number = 2; options.target_file_size_base = options.write_buffer_size * (options.max_write_buffer_number - 1); options.level0_file_num_compaction_trigger = kNumL1Files; options.max_bytes_for_level_base = options.level0_file_num_compaction_trigger * options.target_file_size_base; options.max_bytes_for_level_multiplier = 2; options.compression = kNoCompression; options.max_subcompactions = max_subcompactions_; env_->SetBackgroundThreads(1, Env::HIGH); env_->SetBackgroundThreads(1, Env::LOW); // stop the compaction thread until we simulate the file creation failure. test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); options.env = env_; DestroyAndReopen(options); const int kNumInsertedKeys = options.level0_file_num_compaction_trigger * (options.max_write_buffer_number - 1) * kKeysPerBuffer; Random rnd(301); std::vector keys; std::vector values; for (int k = 0; k < kNumInsertedKeys; ++k) { keys.emplace_back(RandomString(&rnd, kKeySize)); values.emplace_back(RandomString(&rnd, kKvSize - kKeySize)); ASSERT_OK(Put(Slice(keys[k]), Slice(values[k]))); dbfull()->TEST_WaitForFlushMemTable(); } dbfull()->TEST_FlushMemTable(true); // Make sure the number of L0 files can trigger compaction. ASSERT_GE(NumTableFilesAtLevel(0), options.level0_file_num_compaction_trigger); auto previous_num_level0_files = NumTableFilesAtLevel(0); // Fail the first file creation. env_->non_writable_count_ = 1; sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); // Expect compaction to fail here as one file will fail its // creation. ASSERT_TRUE(!dbfull()->TEST_WaitForCompact().ok()); // Verify L0 -> L1 compaction does fail. ASSERT_EQ(NumTableFilesAtLevel(1), 0); // Verify all L0 files are still there. ASSERT_EQ(NumTableFilesAtLevel(0), previous_num_level0_files); // All key-values must exist after compaction fails. for (int k = 0; k < kNumInsertedKeys; ++k) { ASSERT_EQ(values[k], Get(keys[k])); } env_->non_writable_count_ = 0; // Make sure RocksDB will not get into corrupted state. Reopen(options); // Verify again after reopen. for (int k = 0; k < kNumInsertedKeys; ++k) { ASSERT_EQ(values[k], Get(keys[k])); } } TEST_P(DBCompactionTestWithParam, DeleteMovedFileAfterCompaction) { // iter 1 -- delete_obsolete_files_period_micros == 0 for (int iter = 0; iter < 2; ++iter) { // This test triggers move compaction and verifies that the file is not // deleted when it's part of move compaction Options options = CurrentOptions(); options.env = env_; if (iter == 1) { options.delete_obsolete_files_period_micros = 0; } options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; // trigger compaction when we have 2 files OnFileDeletionListener* listener = new OnFileDeletionListener(); options.listeners.emplace_back(listener); options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); Random rnd(301); // Create two 1MB sst files for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); } ASSERT_OK(Flush()); } // this should execute L0->L1 dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,1", FilesPerLevel(0)); // block compactions test::SleepingBackgroundTask sleeping_task; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::LOW); options.max_bytes_for_level_base = 1024 * 1024; // 1 MB Reopen(options); std::unique_ptr iterator(db_->NewIterator(ReadOptions())); ASSERT_EQ("0,1", FilesPerLevel(0)); // let compactions go sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); // this should execute L1->L2 (move) dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,0,1", FilesPerLevel(0)); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 1U); auto moved_file_name = metadata[0].name; // Create two more 1MB sst files for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { ASSERT_OK(Put(Key(i * 50 + j + 100), RandomString(&rnd, 10 * 1024))); } ASSERT_OK(Flush()); } // this should execute both L0->L1 and L1->L2 (merge with previous file) dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,0,2", FilesPerLevel(0)); // iterator is holding the file ASSERT_OK(env_->FileExists(dbname_ + moved_file_name)); listener->SetExpectedFileName(dbname_ + moved_file_name); iterator.reset(); // this file should have been compacted away ASSERT_NOK(env_->FileExists(dbname_ + moved_file_name)); listener->VerifyMatchedCount(1); } } TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) { if (!Zlib_Supported()) { return; } Options options = CurrentOptions(); options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; // First two levels have no compression, so that a trivial move between // them will be allowed. Level 2 has Zlib compression so that a trivial // move to level 3 will not be allowed options.compression_per_level = {kNoCompression, kNoCompression, kZlibCompression}; int matches = 0, didnt_match = 0, trivial_move = 0, non_trivial = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Compaction::InputCompressionMatchesOutput:Matches", [&](void* /*arg*/) { matches++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Compaction::InputCompressionMatchesOutput:DidntMatch", [&](void* /*arg*/) { didnt_match++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); Random rnd(301); int key_idx = 0; // First three 110KB files are going to level 0 // After that, (100K, 200K) for (int num = 0; num < 3; num++) { GenerateNewFile(&rnd, &key_idx); } // Another 110KB triggers a compaction to 400K file to fill up level 0 GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(4, GetSstFileCount(dbname_)); // (1, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4", FilesPerLevel(0)); // (1, 4, 1) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,1", FilesPerLevel(0)); // (1, 4, 2) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,2", FilesPerLevel(0)); // (1, 4, 3) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,3", FilesPerLevel(0)); // (1, 4, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,4", FilesPerLevel(0)); // (1, 4, 5) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,5", FilesPerLevel(0)); // (1, 4, 6) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,6", FilesPerLevel(0)); // (1, 4, 7) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,7", FilesPerLevel(0)); // (1, 4, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ("1,4,8", FilesPerLevel(0)); ASSERT_EQ(matches, 12); // Currently, the test relies on the number of calls to // InputCompressionMatchesOutput() per compaction. const int kCallsToInputCompressionMatch = 2; ASSERT_EQ(didnt_match, 8 * kCallsToInputCompressionMatch); ASSERT_EQ(trivial_move, 12); ASSERT_EQ(non_trivial, 8); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } TEST_F(DBCompactionTest, SanitizeCompactionOptionsTest) { Options options = CurrentOptions(); options.max_background_compactions = 5; options.soft_pending_compaction_bytes_limit = 0; options.hard_pending_compaction_bytes_limit = 100; options.create_if_missing = true; DestroyAndReopen(options); ASSERT_EQ(100, db_->GetOptions().soft_pending_compaction_bytes_limit); options.max_background_compactions = 3; options.soft_pending_compaction_bytes_limit = 200; options.hard_pending_compaction_bytes_limit = 150; DestroyAndReopen(options); ASSERT_EQ(150, db_->GetOptions().soft_pending_compaction_bytes_limit); } // This tests for a bug that could cause two level0 compactions running // concurrently // TODO(aekmekji): Make sure that the reason this fails when run with // max_subcompactions > 1 is not a correctness issue but just inherent to // running parallel L0-L1 compactions TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 4; options.num_levels = 4; options.compression = kNoCompression; options.max_bytes_for_level_base = 450 << 10; options.target_file_size_base = 98 << 10; options.max_write_buffer_number = 2; options.max_background_compactions = 2; DestroyAndReopen(options); // fill up the DB Random rnd(301); for (int num = 0; num < 10; num++) { GenerateNewRandomFile(&rnd); } db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"CompactionJob::Run():Start", "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"}, {"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2", "CompactionJob::Run():End"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // trigger L0 compaction for (int num = 0; num < options.level0_file_num_compaction_trigger + 1; num++) { GenerateNewRandomFile(&rnd, /* nowait */ true); ASSERT_OK(Flush()); } TEST_SYNC_POINT( "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"); GenerateNewRandomFile(&rnd, /* nowait */ true); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr)); for (int num = 0; num < options.level0_file_num_compaction_trigger + 1; num++) { GenerateNewRandomFile(&rnd, /* nowait */ true); ASSERT_OK(Flush()); } TEST_SYNC_POINT( "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2"); dbfull()->TEST_WaitForCompact(); } static std::string ShortKey(int i) { assert(i < 10000); char buf[100]; snprintf(buf, sizeof(buf), "key%04d", i); return std::string(buf); } TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // The key size is guaranteed to be <= 8 class ShortKeyComparator : public Comparator { int Compare(const ROCKSDB_NAMESPACE::Slice& a, const ROCKSDB_NAMESPACE::Slice& b) const override { assert(a.size() <= 8); assert(b.size() <= 8); return BytewiseComparator()->Compare(a, b); } const char* Name() const override { return "ShortKeyComparator"; } void FindShortestSeparator( std::string* start, const ROCKSDB_NAMESPACE::Slice& limit) const override { return BytewiseComparator()->FindShortestSeparator(start, limit); } void FindShortSuccessor(std::string* key) const override { return BytewiseComparator()->FindShortSuccessor(key); } } short_key_cmp; Options options = CurrentOptions(); options.target_file_size_base = 100000000; options.write_buffer_size = 100000000; options.max_subcompactions = max_subcompactions_; options.comparator = &short_key_cmp; DestroyAndReopen(options); int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::vector values; // File with keys [ 0 => 99 ] for (int i = 0; i < 100; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1", FilesPerLevel(0)); // Compaction will do L0=>L1 (trivial move) then move L1 files to L3 CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 3; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); // File with keys [ 100 => 199 ] for (int i = 100; i < 200; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) // then compacte the bottommost level L3=>L3 (non trivial move) compact_options = CompactRangeOptions(); compact_options.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 4); ASSERT_EQ(non_trivial_move, 1); // File with keys [ 200 => 299 ] for (int i = 200; i < 300; i++) { values.push_back(RandomString(&rnd, value_size)); ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); ASSERT_EQ("1,0,0,1", FilesPerLevel(0)); trivial_move = 0; non_trivial_move = 0; compact_options = CompactRangeOptions(); compact_options.bottommost_level_compaction = BottommostLevelCompaction::kSkip; // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves) // and will skip bottommost level compaction ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,0,0,2", FilesPerLevel(0)); ASSERT_EQ(trivial_move, 3); ASSERT_EQ(non_trivial_move, 0); for (int i = 0; i < 300; i++) { ASSERT_EQ(Get(ShortKey(i)), values[i]); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBCompactionTestWithParam, IntraL0Compaction) { Options options = CurrentOptions(); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 5; options.max_background_compactions = 2; options.max_subcompactions = max_subcompactions_; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.write_buffer_size = 2 << 20; // 2MB BlockBasedTableOptions table_options; table_options.block_cache = NewLRUCache(64 << 20); // 64MB table_options.cache_index_and_filter_blocks = true; table_options.pin_l0_filter_and_index_blocks_in_cache = true; options.table_factory.reset(new BlockBasedTableFactory(table_options)); DestroyAndReopen(options); const size_t kValueSize = 1 << 20; Random rnd(301); std::string value(RandomString(&rnd, kValueSize)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"LevelCompactionPicker::PickCompactionBySize:0", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // index: 0 1 2 3 4 5 6 7 8 9 // size: 1MB 1MB 1MB 1MB 1MB 2MB 1MB 1MB 1MB 1MB // score: 1.5 1.3 1.5 2.0 inf // // Files 0-4 will be included in an L0->L1 compaction. // // L0->L0 will be triggered since the sync points guarantee compaction to base // level is still blocked when files 5-9 trigger another compaction. // // Files 6-9 are the longest span of available files for which // work-per-deleted-file decreases (see "score" row above). for (int i = 0; i < 10; ++i) { ASSERT_OK(Put(Key(0), "")); // prevents trivial move if (i == 5) { ASSERT_OK(Put(Key(i + 1), value + value)); } else { ASSERT_OK(Put(Key(i + 1), value)); } ASSERT_OK(Flush()); ASSERT_EQ(i + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); } dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); std::vector> level_to_files; dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), &level_to_files); ASSERT_GE(level_to_files.size(), 2); // at least L0 and L1 // L0 has the 2MB file (not compacted) and 4MB file (output of L0->L0) ASSERT_EQ(2, level_to_files[0].size()); ASSERT_GT(level_to_files[1].size(), 0); for (int i = 0; i < 2; ++i) { ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21); } // The index/filter in the file produced by intra-L0 should not be pinned. // That means clearing unref'd entries in block cache and re-accessing the // file produced by intra-L0 should bump the index block miss count. uint64_t prev_index_misses = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); table_options.block_cache->EraseUnRefEntries(); ASSERT_EQ("", Get(Key(0))); ASSERT_EQ(prev_index_misses + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); } TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) { // regression test for issue #2722: L0->L0 compaction can resurrect deleted // keys from older L0 files if L1+ files' key-ranges do not include the key. Options options = CurrentOptions(); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 5; options.max_background_compactions = 2; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); const size_t kValueSize = 1 << 20; Random rnd(301); std::string value(RandomString(&rnd, kValueSize)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"LevelCompactionPicker::PickCompactionBySize:0", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // index: 0 1 2 3 4 5 6 7 8 9 // size: 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB // score: 1.25 1.33 1.5 2.0 inf // // Files 0-4 will be included in an L0->L1 compaction. // // L0->L0 will be triggered since the sync points guarantee compaction to base // level is still blocked when files 5-9 trigger another compaction. All files // 5-9 are included in the L0->L0 due to work-per-deleted file decreasing. // // Put a key-value in files 0-4. Delete that key in files 5-9. Verify the // L0->L0 preserves the deletion such that the key remains deleted. for (int i = 0; i < 10; ++i) { // key 0 serves both to prevent trivial move and as the key we want to // verify is not resurrected by L0->L0 compaction. if (i < 5) { ASSERT_OK(Put(Key(0), "")); } else { ASSERT_OK(Delete(Key(0))); } ASSERT_OK(Put(Key(i + 1), value)); ASSERT_OK(Flush()); } dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); std::vector> level_to_files; dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), &level_to_files); ASSERT_GE(level_to_files.size(), 2); // at least L0 and L1 // L0 has a single output file from L0->L0 ASSERT_EQ(1, level_to_files[0].size()); ASSERT_GT(level_to_files[1].size(), 0); ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 22); ReadOptions roptions; std::string result; ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound()); } TEST_P(DBCompactionTestWithParam, FullCompactionInBottomPriThreadPool) { const int kNumFilesTrigger = 3; Env::Default()->SetBackgroundThreads(1, Env::Priority::BOTTOM); for (bool use_universal_compaction : {false, true}) { Options options = CurrentOptions(); if (use_universal_compaction) { options.compaction_style = kCompactionStyleUniversal; } else { options.compaction_style = kCompactionStyleLevel; options.level_compaction_dynamic_level_bytes = true; } options.num_levels = 4; options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = kNumFilesTrigger; // Trigger compaction if size amplification exceeds 110% options.compaction_options_universal.max_size_amplification_percent = 110; DestroyAndReopen(options); int num_bottom_pri_compactions = 0; SyncPoint::GetInstance()->SetCallBack( "DBImpl::BGWorkBottomCompaction", [&](void* /*arg*/) { ++num_bottom_pri_compactions; }); SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int num = 0; num < kNumFilesTrigger; num++) { ASSERT_EQ(NumSortedRuns(), num); int key_idx = 0; GenerateNewFile(&rnd, &key_idx); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(1, num_bottom_pri_compactions); // Verify that size amplification did occur ASSERT_EQ(NumSortedRuns(), 1); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM); } TEST_F(DBCompactionTest, OptimizedDeletionObsoleting) { // Deletions can be dropped when compacted to non-last level if they fall // outside the lower-level files' key-ranges. const int kNumL0Files = 4; Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = kNumL0Files; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); // put key 1 and 3 in separate L1, L2 files. // So key 0, 2, and 4+ fall outside these levels' key-ranges. for (int level = 2; level >= 1; --level) { for (int i = 0; i < 2; ++i) { Put(Key(2 * i + 1), "val"); Flush(); } MoveFilesToLevel(level); ASSERT_EQ(2, NumTableFilesAtLevel(level)); } // Delete keys in range [1, 4]. These L0 files will be compacted with L1: // - Tombstones for keys 2 and 4 can be dropped early. // - Tombstones for keys 1 and 3 must be kept due to L2 files' key-ranges. for (int i = 0; i < kNumL0Files; ++i) { Put(Key(0), "val"); // sentinel to prevent trivial move Delete(Key(i + 1)); Flush(); } dbfull()->TEST_WaitForCompact(); for (int i = 0; i < kNumL0Files; ++i) { std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), Key(i + 1), &value).IsNotFound()); } ASSERT_EQ(2, options.statistics->getTickerCount( COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE)); ASSERT_EQ(2, options.statistics->getTickerCount(COMPACTION_KEY_DROP_OBSOLETE)); } TEST_F(DBCompactionTest, CompactFilesPendingL0Bug) { // https://www.facebook.com/groups/rocksdb.dev/permalink/1389452781153232/ // CompactFiles() had a bug where it failed to pick a compaction when an L0 // compaction existed, but marked it as scheduled anyways. It'd never be // unmarked as scheduled, so future compactions or DB close could hang. const int kNumL0Files = 5; Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = kNumL0Files - 1; options.max_background_compactions = 2; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"LevelCompactionPicker::PickCompaction:Return", "DBCompactionTest::CompactFilesPendingL0Bug:Picked"}, {"DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted", "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); auto schedule_multi_compaction_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); // Files 0-3 will be included in an L0->L1 compaction. // // File 4 will be included in a call to CompactFiles() while the first // compaction is running. for (int i = 0; i < kNumL0Files - 1; ++i) { ASSERT_OK(Put(Key(0), "val")); // sentinel to prevent trivial move ASSERT_OK(Put(Key(i + 1), "val")); ASSERT_OK(Flush()); } TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:Picked"); // file 4 flushed after 0-3 picked ASSERT_OK(Put(Key(kNumL0Files), "val")); ASSERT_OK(Flush()); // previously DB close would hang forever as this situation caused scheduled // compactions count to never decrement to zero. ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); ASSERT_EQ(kNumL0Files, cf_meta.levels[0].files.size()); std::vector input_filenames; input_filenames.push_back(cf_meta.levels[0].files.front().name); ASSERT_OK(dbfull() ->CompactFiles(CompactionOptions(), input_filenames, 0 /* output_level */)); TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, CompactFilesOverlapInL0Bug) { // Regression test for bug of not pulling in L0 files that overlap the user- // specified input files in time- and key-ranges. Put(Key(0), "old_val"); Flush(); Put(Key(0), "new_val"); Flush(); ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); ASSERT_GE(cf_meta.levels.size(), 2); ASSERT_EQ(2, cf_meta.levels[0].files.size()); // Compacting {new L0 file, L1 file} should pull in the old L0 file since it // overlaps in key-range and time-range. std::vector input_filenames; input_filenames.push_back(cf_meta.levels[0].files.front().name); ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), input_filenames, 1 /* output_level */)); ASSERT_EQ("new_val", Get(Key(0))); } TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) { // bottom-level files may contain deletions due to snapshots protecting the // deleted keys. Once the snapshot is released, we should see files with many // such deletions undergo single-file compactions. const int kNumKeysPerFile = 1024; const int kNumLevelFiles = 4; const int kValueSize = 128; Options options = CurrentOptions(); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = kNumLevelFiles; // inflate it a bit to account for key/metadata overhead options.target_file_size_base = 120 * kNumKeysPerFile * kValueSize / 100; CreateAndReopenWithCF({"one"}, options); Random rnd(301); const Snapshot* snapshot = nullptr; for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } if (i == kNumLevelFiles - 1) { snapshot = db_->GetSnapshot(); // delete every other key after grabbing a snapshot, so these deletions // and the keys they cover can't be dropped until after the snapshot is // released. for (int j = 0; j < kNumLevelFiles * kNumKeysPerFile; j += 2) { ASSERT_OK(Delete(Key(j))); } } Flush(); if (i < kNumLevelFiles - 1) { ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); } } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1)); std::vector pre_release_metadata, post_release_metadata; db_->GetLiveFilesMetaData(&pre_release_metadata); // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST // files does not need to be preserved in case of a future snapshot. ASSERT_OK(Put(Key(0), "val")); ASSERT_NE(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); // release snapshot and wait for compactions to finish. Single-file // compactions should be triggered, which reduce the size of each bottom-level // file without changing file count. db_->ReleaseSnapshot(snapshot); ASSERT_EQ(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kBottommostFiles); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); dbfull()->TEST_WaitForCompact(); db_->GetLiveFilesMetaData(&post_release_metadata); ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size()); for (size_t i = 0; i < pre_release_metadata.size(); ++i) { const auto& pre_file = pre_release_metadata[i]; const auto& post_file = post_release_metadata[i]; ASSERT_EQ(1, pre_file.level); ASSERT_EQ(1, post_file.level); // each file is smaller than it was before as it was rewritten without // deletion markers/deleted keys. ASSERT_LT(post_file.size, pre_file.size); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) { const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2; const int kValueSize = 1024; Options options = CurrentOptions(); options.compression = kNoCompression; options.ttl = 24 * 60 * 60; // 24 hours options.max_open_files = -1; env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); } dbfull()->TEST_WaitForCompact(); MoveFilesToLevel(3); ASSERT_EQ("0,0,0,2", FilesPerLevel()); // Delete previously written keys. for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); } Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("2,0,0,2", FilesPerLevel()); MoveFilesToLevel(1); ASSERT_EQ("0,2,0,2", FilesPerLevel()); env_->addon_time_.fetch_add(36 * 60 * 60); // 36 hours ASSERT_EQ("0,2,0,2", FilesPerLevel()); // Just do a simple write + flush so that the Ttl expired files get // compacted. ASSERT_OK(Put("a", "1")); Flush(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); dbfull()->TEST_WaitForCompact(); // All non-L0 files are deleted, as they contained only deleted data. ASSERT_EQ("1", FilesPerLevel()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); // Test dynamically changing ttl. env_->addon_time_.store(0); DestroyAndReopen(options); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); } dbfull()->TEST_WaitForCompact(); MoveFilesToLevel(3); ASSERT_EQ("0,0,0,2", FilesPerLevel()); // Delete previously written keys. for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); } Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("2,0,0,2", FilesPerLevel()); MoveFilesToLevel(1); ASSERT_EQ("0,2,0,2", FilesPerLevel()); // Move time forward by 12 hours, and make sure that compaction still doesn't // trigger as ttl is set to 24 hours. env_->addon_time_.fetch_add(12 * 60 * 60); ASSERT_OK(Put("a", "1")); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ("1,2,0,2", FilesPerLevel()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Dynamically change ttl to 10 hours. // This should trigger a ttl compaction, as 12 hours have already passed. ASSERT_OK(dbfull()->SetOptions({{"ttl", "36000"}})); dbfull()->TEST_WaitForCompact(); // All non-L0 files are deleted, as they contained only deleted data. ASSERT_EQ("1", FilesPerLevel()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, LevelTtlCascadingCompactions) { const int kValueSize = 100; for (bool if_restart : {false, true}) { for (bool if_open_all_files : {false, true}) { Options options = CurrentOptions(); options.compression = kNoCompression; options.ttl = 24 * 60 * 60; // 24 hours if (if_open_all_files) { options.max_open_files = -1; } else { options.max_open_files = 20; } // RocksDB sanitize max open files to at least 20. Modify it back. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { int* max_open_files = static_cast(arg); *max_open_files = 2; }); // In the case where all files are opened and doing DB restart // forcing the oldest ancester time in manifest file to be 0 to // simulate the case of reading from an old version. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionEdit::EncodeTo:VarintOldestAncesterTime", [&](void* arg) { if (if_restart && if_open_all_files) { std::string* encoded_fieled = static_cast(arg); *encoded_fieled = ""; PutVarint64(encoded_fieled, 0); } }); env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); DestroyAndReopen(options); int ttl_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); auto compaction_reason = compaction->compaction_reason(); if (compaction_reason == CompactionReason::kTtl) { ttl_compactions++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Add two L6 files with key ranges: [1 .. 100], [101 .. 200]. Random rnd(301); for (int i = 1; i <= 100; ++i) { ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); } Flush(); // Get the first file's creation time. This will be the oldest file in the // DB. Compactions inolving this file's descendents should keep getting // this time. std::vector> level_to_files; dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), &level_to_files); uint64_t oldest_time = level_to_files[0][0].oldest_ancester_time; // Add 1 hour and do another flush. env_->addon_time_.fetch_add(1 * 60 * 60); for (int i = 101; i <= 200; ++i) { ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); } Flush(); MoveFilesToLevel(6); ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel()); env_->addon_time_.fetch_add(1 * 60 * 60); // Add two L4 files with key ranges: [1 .. 50], [51 .. 150]. for (int i = 1; i <= 50; ++i) { ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); } Flush(); env_->addon_time_.fetch_add(1 * 60 * 60); for (int i = 51; i <= 150; ++i) { ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); } Flush(); MoveFilesToLevel(4); ASSERT_EQ("0,0,0,0,2,0,2", FilesPerLevel()); env_->addon_time_.fetch_add(1 * 60 * 60); // Add one L1 file with key range: [26, 75]. for (int i = 26; i <= 75; ++i) { ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); } Flush(); dbfull()->TEST_WaitForCompact(); MoveFilesToLevel(1); ASSERT_EQ("0,1,0,0,2,0,2", FilesPerLevel()); // LSM tree: // L1: [26 .. 75] // L4: [1 .. 50][51 ..... 150] // L6: [1 ........ 100][101 .... 200] // // On TTL expiry, TTL compaction should be initiated on L1 file, and the // compactions should keep going on until the key range hits bottom level. // In other words: the compaction on this data range "cascasdes" until // reaching the bottom level. // // Order of events on TTL expiry: // 1. L1 file falls to L3 via 2 trivial moves which are initiated by the // ttl // compaction. // 2. A TTL compaction happens between L3 and L4 files. Output file in L4. // 3. The new output file from L4 falls to L5 via 1 trival move initiated // by the ttl compaction. // 4. A TTL compaction happens between L5 and L6 files. Ouptut in L6. // Add 25 hours and do a write env_->addon_time_.fetch_add(25 * 60 * 60); ASSERT_OK(Put(Key(1), "1")); if (if_restart) { Reopen(options); } else { Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel()); ASSERT_EQ(5, ttl_compactions); dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), &level_to_files); ASSERT_EQ(oldest_time, level_to_files[6][0].oldest_ancester_time); env_->addon_time_.fetch_add(25 * 60 * 60); ASSERT_OK(Put(Key(2), "1")); if (if_restart) { Reopen(options); } else { Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel()); ASSERT_GE(ttl_compactions, 6); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } } TEST_F(DBCompactionTest, LevelPeriodicCompaction) { const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2; const int kValueSize = 100; for (bool if_restart : {false, true}) { for (bool if_open_all_files : {false, true}) { Options options = CurrentOptions(); options.periodic_compaction_seconds = 48 * 60 * 60; // 2 days if (if_open_all_files) { options.max_open_files = -1; // needed for ttl compaction } else { options.max_open_files = 20; } // RocksDB sanitize max open files to at least 20. Modify it back. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { int* max_open_files = static_cast(arg); *max_open_files = 0; }); // In the case where all files are opened and doing DB restart // forcing the file creation time in manifest file to be 0 to // simulate the case of reading from an old version. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionEdit::EncodeTo:VarintFileCreationTime", [&](void* arg) { if (if_restart && if_open_all_files) { std::string* encoded_fieled = static_cast(arg); *encoded_fieled = ""; PutVarint64(encoded_fieled, 0); } }); env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); DestroyAndReopen(options); int periodic_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); auto compaction_reason = compaction->compaction_reason(); if (compaction_reason == CompactionReason::kPeriodicCompaction) { periodic_compactions++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("2", FilesPerLevel()); ASSERT_EQ(0, periodic_compactions); // Add 50 hours and do a write env_->addon_time_.fetch_add(50 * 60 * 60); ASSERT_OK(Put("a", "1")); Flush(); dbfull()->TEST_WaitForCompact(); // Assert that the files stay in the same level ASSERT_EQ("3", FilesPerLevel()); // The two old files go through the periodic compaction process ASSERT_EQ(2, periodic_compactions); MoveFilesToLevel(1); ASSERT_EQ("0,3", FilesPerLevel()); // Add another 50 hours and do another write env_->addon_time_.fetch_add(50 * 60 * 60); ASSERT_OK(Put("b", "2")); if (if_restart) { Reopen(options); } else { Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("1,3", FilesPerLevel()); // The three old files now go through the periodic compaction process. 2 // + 3. ASSERT_EQ(5, periodic_compactions); // Add another 50 hours and do another write env_->addon_time_.fetch_add(50 * 60 * 60); ASSERT_OK(Put("c", "3")); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ("2,3", FilesPerLevel()); // The four old files now go through the periodic compaction process. 5 // + 4. ASSERT_EQ(9, periodic_compactions); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } } TEST_F(DBCompactionTest, LevelPeriodicCompactionWithOldDB) { // This test makes sure that periodic compactions are working with a DB // where file_creation_time of some files is 0. // After compactions the new files are created with a valid file_creation_time const int kNumKeysPerFile = 32; const int kNumFiles = 4; const int kValueSize = 100; Options options = CurrentOptions(); env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); DestroyAndReopen(options); int periodic_compactions = 0; bool set_file_creation_time_to_zero = true; bool set_creation_time_to_zero = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); auto compaction_reason = compaction->compaction_reason(); if (compaction_reason == CompactionReason::kPeriodicCompaction) { periodic_compactions++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) { TableProperties* props = reinterpret_cast(arg); if (set_file_creation_time_to_zero) { props->file_creation_time = 0; } if (set_creation_time_to_zero) { props->creation_time = 0; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); // Move the first two files to L2. if (i == 1) { MoveFilesToLevel(2); set_creation_time_to_zero = false; } } ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("2,0,2", FilesPerLevel()); ASSERT_EQ(0, periodic_compactions); Close(); set_file_creation_time_to_zero = false; // Forward the clock by 2 days. env_->addon_time_.fetch_add(2 * 24 * 60 * 60); options.periodic_compaction_seconds = 1 * 24 * 60 * 60; // 1 day Reopen(options); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("2,0,2", FilesPerLevel()); // Make sure that all files go through periodic compaction. ASSERT_EQ(kNumFiles, periodic_compactions); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, LevelPeriodicAndTtlCompaction) { const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2; const int kValueSize = 100; Options options = CurrentOptions(); options.ttl = 10 * 60 * 60; // 10 hours options.periodic_compaction_seconds = 48 * 60 * 60; // 2 days options.max_open_files = -1; // needed for both periodic and ttl compactions env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); DestroyAndReopen(options); int periodic_compactions = 0; int ttl_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); auto compaction_reason = compaction->compaction_reason(); if (compaction_reason == CompactionReason::kPeriodicCompaction) { periodic_compactions++; } else if (compaction_reason == CompactionReason::kTtl) { ttl_compactions++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); } dbfull()->TEST_WaitForCompact(); MoveFilesToLevel(3); ASSERT_EQ("0,0,0,2", FilesPerLevel()); ASSERT_EQ(0, periodic_compactions); ASSERT_EQ(0, ttl_compactions); // Add some time greater than periodic_compaction_time. env_->addon_time_.fetch_add(50 * 60 * 60); ASSERT_OK(Put("a", "1")); Flush(); dbfull()->TEST_WaitForCompact(); // Files in the bottom level go through periodic compactions. ASSERT_EQ("1,0,0,2", FilesPerLevel()); ASSERT_EQ(2, periodic_compactions); ASSERT_EQ(0, ttl_compactions); // Add a little more time than ttl env_->addon_time_.fetch_add(11 * 60 * 60); ASSERT_OK(Put("b", "1")); Flush(); dbfull()->TEST_WaitForCompact(); // Notice that the previous file in level 1 falls down to the bottom level // due to ttl compactions, one level at a time. // And bottom level files don't get picked up for ttl compactions. ASSERT_EQ("1,0,0,3", FilesPerLevel()); ASSERT_EQ(2, periodic_compactions); ASSERT_EQ(3, ttl_compactions); // Add some time greater than periodic_compaction_time. env_->addon_time_.fetch_add(50 * 60 * 60); ASSERT_OK(Put("c", "1")); Flush(); dbfull()->TEST_WaitForCompact(); // Previous L0 file falls one level at a time to bottom level due to ttl. // And all 4 bottom files go through periodic compactions. ASSERT_EQ("1,0,0,4", FilesPerLevel()); ASSERT_EQ(6, periodic_compactions); ASSERT_EQ(6, ttl_compactions); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, LevelPeriodicCompactionWithCompactionFilters) { class TestCompactionFilter : public CompactionFilter { const char* Name() const override { return "TestCompactionFilter"; } }; class TestCompactionFilterFactory : public CompactionFilterFactory { const char* Name() const override { return "TestCompactionFilterFactory"; } std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { return std::unique_ptr(new TestCompactionFilter()); } }; const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2; const int kValueSize = 100; Random rnd(301); Options options = CurrentOptions(); TestCompactionFilter test_compaction_filter; env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); enum CompactionFilterType { kUseCompactionFilter, kUseCompactionFilterFactory }; for (CompactionFilterType comp_filter_type : {kUseCompactionFilter, kUseCompactionFilterFactory}) { // Assert that periodic compactions are not enabled. ASSERT_EQ(port::kMaxUint64 - 1, options.periodic_compaction_seconds); if (comp_filter_type == kUseCompactionFilter) { options.compaction_filter = &test_compaction_filter; options.compaction_filter_factory.reset(); } else if (comp_filter_type == kUseCompactionFilterFactory) { options.compaction_filter = nullptr; options.compaction_filter_factory.reset( new TestCompactionFilterFactory()); } DestroyAndReopen(options); // periodic_compaction_seconds should be set to the sanitized value when // a compaction filter or a compaction filter factory is used. ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().periodic_compaction_seconds); int periodic_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); auto compaction_reason = compaction->compaction_reason(); if (compaction_reason == CompactionReason::kPeriodicCompaction) { periodic_compactions++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ("2", FilesPerLevel()); ASSERT_EQ(0, periodic_compactions); // Add 31 days and do a write env_->addon_time_.fetch_add(31 * 24 * 60 * 60); ASSERT_OK(Put("a", "1")); Flush(); dbfull()->TEST_WaitForCompact(); // Assert that the files stay in the same level ASSERT_EQ("3", FilesPerLevel()); // The two old files go through the periodic compaction process ASSERT_EQ(2, periodic_compactions); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(DBCompactionTest, CompactRangeDelayedByL0FileCount) { // Verify that, when `CompactRangeOptions::allow_write_stall == false`, manual // compaction only triggers flush after it's sure stall won't be triggered for // L0 file count going too high. const int kNumL0FilesTrigger = 4; const int kNumL0FilesLimit = 8; // i == 0: verifies normal case where stall is avoided by delay // i == 1: verifies no delay in edge case where stall trigger is same as // compaction trigger, so stall can't be avoided for (int i = 0; i < 2; ++i) { Options options = CurrentOptions(); options.level0_slowdown_writes_trigger = kNumL0FilesLimit; if (i == 0) { options.level0_file_num_compaction_trigger = kNumL0FilesTrigger; } else { options.level0_file_num_compaction_trigger = kNumL0FilesLimit; } Reopen(options); if (i == 0) { // ensure the auto compaction doesn't finish until manual compaction has // had a chance to be delayed. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", "CompactionJob::Run():End"}}); } else { // ensure the auto-compaction doesn't finish until manual compaction has // continued without delay. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::FlushMemTable:StallWaitDone", "CompactionJob::Run():End"}}); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int j = 0; j < kNumL0FilesLimit - 1; ++j) { for (int k = 0; k < 2; ++k) { ASSERT_OK(Put(Key(k), RandomString(&rnd, 1024))); } Flush(); } auto manual_compaction_thread = port::Thread([this]() { CompactRangeOptions cro; cro.allow_write_stall = false; db_->CompactRange(cro, nullptr, nullptr); }); manual_compaction_thread.join(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(1), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(DBCompactionTest, CompactRangeDelayedByImmMemTableCount) { // Verify that, when `CompactRangeOptions::allow_write_stall == false`, manual // compaction only triggers flush after it's sure stall won't be triggered for // immutable memtable count going too high. const int kNumImmMemTableLimit = 8; // i == 0: verifies normal case where stall is avoided by delay // i == 1: verifies no delay in edge case where stall trigger is same as flush // trigger, so stall can't be avoided for (int i = 0; i < 2; ++i) { Options options = CurrentOptions(); options.disable_auto_compactions = true; // the delay limit is one less than the stop limit. This test focuses on // avoiding delay limit, but this option sets stop limit, so add one. options.max_write_buffer_number = kNumImmMemTableLimit + 1; if (i == 1) { options.min_write_buffer_number_to_merge = kNumImmMemTableLimit; } Reopen(options); if (i == 0) { // ensure the flush doesn't finish until manual compaction has had a // chance to be delayed. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", "FlushJob::WriteLevel0Table"}}); } else { // ensure the flush doesn't finish until manual compaction has continued // without delay. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::FlushMemTable:StallWaitDone", "FlushJob::WriteLevel0Table"}}); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int j = 0; j < kNumImmMemTableLimit - 1; ++j) { ASSERT_OK(Put(Key(0), RandomString(&rnd, 1024))); FlushOptions flush_opts; flush_opts.wait = false; flush_opts.allow_write_stall = true; dbfull()->Flush(flush_opts); } auto manual_compaction_thread = port::Thread([this]() { CompactRangeOptions cro; cro.allow_write_stall = false; db_->CompactRange(cro, nullptr, nullptr); }); manual_compaction_thread.join(); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(1), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(DBCompactionTest, CompactRangeShutdownWhileDelayed) { // Verify that, when `CompactRangeOptions::allow_write_stall == false`, delay // does not hang if CF is dropped or DB is closed const int kNumL0FilesTrigger = 4; const int kNumL0FilesLimit = 8; Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = kNumL0FilesTrigger; options.level0_slowdown_writes_trigger = kNumL0FilesLimit; // i == 0: DB::DropColumnFamily() on CompactRange's target CF unblocks it // i == 1: DB::CancelAllBackgroundWork() unblocks CompactRange. This is to // simulate what happens during Close as we can't call Close (it // blocks on the auto-compaction, making a cycle). for (int i = 0; i < 2; ++i) { CreateAndReopenWithCF({"one"}, options); // The calls to close CF/DB wait until the manual compaction stalls. // The auto-compaction waits until the manual compaction finishes to ensure // the signal comes from closing CF/DB, not from compaction making progress. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", "DBCompactionTest::CompactRangeShutdownWhileDelayed:PreShutdown"}, {"DBCompactionTest::CompactRangeShutdownWhileDelayed:PostManual", "CompactionJob::Run():End"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int j = 0; j < kNumL0FilesLimit - 1; ++j) { for (int k = 0; k < 2; ++k) { ASSERT_OK(Put(1, Key(k), RandomString(&rnd, 1024))); } Flush(1); } auto manual_compaction_thread = port::Thread([this, i]() { CompactRangeOptions cro; cro.allow_write_stall = false; Status s = db_->CompactRange(cro, handles_[1], nullptr, nullptr); if (i == 0) { ASSERT_TRUE(db_->CompactRange(cro, handles_[1], nullptr, nullptr) .IsColumnFamilyDropped()); } else { ASSERT_TRUE(db_->CompactRange(cro, handles_[1], nullptr, nullptr) .IsShutdownInProgress()); } }); TEST_SYNC_POINT( "DBCompactionTest::CompactRangeShutdownWhileDelayed:PreShutdown"); if (i == 0) { ASSERT_OK(db_->DropColumnFamily(handles_[1])); } else { dbfull()->CancelAllBackgroundWork(false /* wait */); } manual_compaction_thread.join(); TEST_SYNC_POINT( "DBCompactionTest::CompactRangeShutdownWhileDelayed:PostManual"); dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(DBCompactionTest, CompactRangeSkipFlushAfterDelay) { // Verify that, when `CompactRangeOptions::allow_write_stall == false`, // CompactRange skips its flush if the delay is long enough that the memtables // existing at the beginning of the call have already been flushed. const int kNumL0FilesTrigger = 4; const int kNumL0FilesLimit = 8; Options options = CurrentOptions(); options.level0_slowdown_writes_trigger = kNumL0FilesLimit; options.level0_file_num_compaction_trigger = kNumL0FilesTrigger; Reopen(options); Random rnd(301); // The manual flush includes the memtable that was active when CompactRange // began. So it unblocks CompactRange and precludes its flush. Throughout the // test, stall conditions are upheld via high L0 file count. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait", "DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush"}, {"DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush", "DBImpl::FlushMemTable:StallWaitDone"}, {"DBImpl::FlushMemTable:StallWaitDone", "CompactionJob::Run():End"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); //used for the delayable flushes FlushOptions flush_opts; flush_opts.allow_write_stall = true; for (int i = 0; i < kNumL0FilesLimit - 1; ++i) { for (int j = 0; j < 2; ++j) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024))); } dbfull()->Flush(flush_opts); } auto manual_compaction_thread = port::Thread([this]() { CompactRangeOptions cro; cro.allow_write_stall = false; db_->CompactRange(cro, nullptr, nullptr); }); TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush"); Put(ToString(0), RandomString(&rnd, 1024)); dbfull()->Flush(flush_opts); Put(ToString(0), RandomString(&rnd, 1024)); TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush"); manual_compaction_thread.join(); // If CompactRange's flush was skipped, the final Put above will still be // in the active memtable. std::string num_keys_in_memtable; db_->GetProperty(DB::Properties::kNumEntriesActiveMemTable, &num_keys_in_memtable); ASSERT_EQ(ToString(1), num_keys_in_memtable); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, CompactRangeFlushOverlappingMemtable) { // Verify memtable only gets flushed if it contains data overlapping the range // provided to `CompactRange`. Tests all kinds of overlap/non-overlap. const int kNumEndpointKeys = 5; std::string keys[kNumEndpointKeys] = {"a", "b", "c", "d", "e"}; Options options = CurrentOptions(); options.disable_auto_compactions = true; Reopen(options); // One extra iteration for nullptr, which means left side of interval is // unbounded. for (int i = 0; i <= kNumEndpointKeys; ++i) { Slice begin; Slice* begin_ptr; if (i == 0) { begin_ptr = nullptr; } else { begin = keys[i - 1]; begin_ptr = &begin; } // Start at `i` so right endpoint comes after left endpoint. One extra // iteration for nullptr, which means right side of interval is unbounded. for (int j = std::max(0, i - 1); j <= kNumEndpointKeys; ++j) { Slice end; Slice* end_ptr; if (j == kNumEndpointKeys) { end_ptr = nullptr; } else { end = keys[j]; end_ptr = &end; } ASSERT_OK(Put("b", "val")); ASSERT_OK(Put("d", "val")); CompactRangeOptions compact_range_opts; ASSERT_OK(db_->CompactRange(compact_range_opts, begin_ptr, end_ptr)); uint64_t get_prop_tmp, num_memtable_entries = 0; ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesImmMemTables, &get_prop_tmp)); num_memtable_entries += get_prop_tmp; ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, &get_prop_tmp)); num_memtable_entries += get_prop_tmp; if (begin_ptr == nullptr || end_ptr == nullptr || (i <= 4 && j >= 1 && (begin != "c" || end != "c"))) { // In this case `CompactRange`'s range overlapped in some way with the // memtable's range, so flush should've happened. Then "b" and "d" won't // be in the memtable. ASSERT_EQ(0, num_memtable_entries); } else { ASSERT_EQ(2, num_memtable_entries); // flush anyways to prepare for next iteration db_->Flush(FlushOptions()); } } } } TEST_F(DBCompactionTest, CompactionStatsTest) { Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 2; CompactionStatsCollector* collector = new CompactionStatsCollector(); options.listeners.emplace_back(collector); DestroyAndReopen(options); for (int i = 0; i < 32; i++) { for (int j = 0; j < 5000; j++) { Put(std::to_string(j), std::string(1, 'A')); } ASSERT_OK(Flush()); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } dbfull()->TEST_WaitForCompact(); ColumnFamilyHandleImpl* cfh = static_cast(dbfull()->DefaultColumnFamily()); ColumnFamilyData* cfd = cfh->cfd(); VerifyCompactionStats(*cfd, *collector); } TEST_F(DBCompactionTest, CompactFilesOutputRangeConflict) { // LSM setup: // L1: [ba bz] // L2: [a b] [c d] // L3: [a b] [c d] // // Thread 1: Thread 2: // Begin compacting all L2->L3 // Compact [ba bz] L1->L3 // End compacting all L2->L3 // // The compaction operation in thread 2 should be disallowed because the range // overlaps with the compaction in thread 1, which also covers that range in // L3. Options options = CurrentOptions(); FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); Reopen(options); for (int level = 3; level >= 2; --level) { ASSERT_OK(Put("a", "val")); ASSERT_OK(Put("b", "val")); ASSERT_OK(Flush()); ASSERT_OK(Put("c", "val")); ASSERT_OK(Put("d", "val")); ASSERT_OK(Flush()); MoveFilesToLevel(level); } ASSERT_OK(Put("ba", "val")); ASSERT_OK(Put("bz", "val")); ASSERT_OK(Flush()); MoveFilesToLevel(1); SyncPoint::GetInstance()->LoadDependency({ {"CompactFilesImpl:0", "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2Begin"}, {"DBCompactionTest::CompactFilesOutputRangeConflict:Thread2End", "CompactFilesImpl:1"}, }); SyncPoint::GetInstance()->EnableProcessing(); auto bg_thread = port::Thread([&]() { // Thread 1 std::vector filenames = collector->GetFlushedFiles(); filenames.pop_back(); ASSERT_OK(db_->CompactFiles(CompactionOptions(), filenames, 3 /* output_level */)); }); // Thread 2 TEST_SYNC_POINT( "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2Begin"); std::string filename = collector->GetFlushedFiles().back(); ASSERT_FALSE( db_->CompactFiles(CompactionOptions(), {filename}, 3 /* output_level */) .ok()); TEST_SYNC_POINT( "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2End"); bg_thread.join(); } TEST_F(DBCompactionTest, CompactionHasEmptyOutput) { Options options = CurrentOptions(); SstStatsCollector* collector = new SstStatsCollector(); options.level0_file_num_compaction_trigger = 2; options.listeners.emplace_back(collector); Reopen(options); // Make sure the L0 files overlap to prevent trivial move. ASSERT_OK(Put("a", "val")); ASSERT_OK(Put("b", "val")); ASSERT_OK(Flush()); ASSERT_OK(Delete("a")); ASSERT_OK(Delete("b")); ASSERT_OK(Flush()); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(1), 0); // Expect one file creation to start for each flush, and zero for compaction // since no keys are written. ASSERT_EQ(2, collector->num_ssts_creation_started()); } TEST_F(DBCompactionTest, CompactionLimiter) { const int kNumKeysPerFile = 10; const int kMaxBackgroundThreads = 64; struct CompactionLimiter { std::string name; int limit_tasks; int max_tasks; int tasks; std::shared_ptr limiter; }; std::vector limiter_settings; limiter_settings.push_back({"limiter_1", 1, 0, 0, nullptr}); limiter_settings.push_back({"limiter_2", 2, 0, 0, nullptr}); limiter_settings.push_back({"limiter_3", 3, 0, 0, nullptr}); for (auto& ls : limiter_settings) { ls.limiter.reset(NewConcurrentTaskLimiter(ls.name, ls.limit_tasks)); } std::shared_ptr unique_limiter( NewConcurrentTaskLimiter("unique_limiter", -1)); const char* cf_names[] = {"default", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f" }; const unsigned int cf_count = sizeof cf_names / sizeof cf_names[0]; std::unordered_map cf_to_limiter; Options options = CurrentOptions(); options.write_buffer_size = 110 * 1024; // 110KB options.arena_block_size = 4096; options.num_levels = 3; options.level0_file_num_compaction_trigger = 4; options.level0_slowdown_writes_trigger = 64; options.level0_stop_writes_trigger = 64; options.max_background_jobs = kMaxBackgroundThreads; // Enough threads options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); options.max_write_buffer_number = 10; // Enough memtables DestroyAndReopen(options); std::vector option_vector; option_vector.reserve(cf_count); for (unsigned int cf = 0; cf < cf_count; cf++) { ColumnFamilyOptions cf_opt(options); if (cf == 0) { // "Default" CF does't use compaction limiter cf_opt.compaction_thread_limiter = nullptr; } else if (cf == 1) { // "1" CF uses bypass compaction limiter unique_limiter->SetMaxOutstandingTask(-1); cf_opt.compaction_thread_limiter = unique_limiter; } else { // Assign limiter by mod auto& ls = limiter_settings[cf % 3]; cf_opt.compaction_thread_limiter = ls.limiter; cf_to_limiter[cf_names[cf]] = &ls; } option_vector.emplace_back(DBOptions(options), cf_opt); } for (unsigned int cf = 1; cf < cf_count; cf++) { CreateColumnFamilies({cf_names[cf]}, option_vector[cf]); } ReopenWithColumnFamilies(std::vector(cf_names, cf_names + cf_count), option_vector); port::Mutex mutex; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:BeforeCompaction", [&](void* arg) { const auto& cf_name = static_cast(arg)->GetName(); auto iter = cf_to_limiter.find(cf_name); if (iter != cf_to_limiter.end()) { MutexLock l(&mutex); ASSERT_GE(iter->second->limit_tasks, ++iter->second->tasks); iter->second->max_tasks = std::max(iter->second->max_tasks, iter->second->limit_tasks); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:AfterCompaction", [&](void* arg) { const auto& cf_name = static_cast(arg)->GetName(); auto iter = cf_to_limiter.find(cf_name); if (iter != cf_to_limiter.end()) { MutexLock l(&mutex); ASSERT_GE(--iter->second->tasks, 0); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Block all compact threads in thread pool. const size_t kTotalFlushTasks = kMaxBackgroundThreads / 4; const size_t kTotalCompactTasks = kMaxBackgroundThreads - kTotalFlushTasks; env_->SetBackgroundThreads((int)kTotalFlushTasks, Env::HIGH); env_->SetBackgroundThreads((int)kTotalCompactTasks, Env::LOW); test::SleepingBackgroundTask sleeping_compact_tasks[kTotalCompactTasks]; // Block all compaction threads in thread pool. for (size_t i = 0; i < kTotalCompactTasks; i++) { env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_compact_tasks[i], Env::LOW); sleeping_compact_tasks[i].WaitUntilSleeping(); } int keyIndex = 0; for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) { for (unsigned int cf = 0; cf < cf_count; cf++) { for (int i = 0; i < kNumKeysPerFile; i++) { ASSERT_OK(Put(cf, Key(keyIndex++), "")); } // put extra key to trigger flush ASSERT_OK(Put(cf, "", "")); } for (unsigned int cf = 0; cf < cf_count; cf++) { dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); } } // Enough L0 files to trigger compaction for (unsigned int cf = 0; cf < cf_count; cf++) { ASSERT_EQ(NumTableFilesAtLevel(0, cf), options.level0_file_num_compaction_trigger); } // Create more files for one column family, which triggers speed up // condition, all compactions will be scheduled. for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { for (int i = 0; i < kNumKeysPerFile; i++) { ASSERT_OK(Put(0, Key(i), "")); } // put extra key to trigger flush ASSERT_OK(Put(0, "", "")); dbfull()->TEST_WaitForFlushMemTable(handles_[0]); ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1, NumTableFilesAtLevel(0, 0)); } // All CFs are pending compaction ASSERT_EQ(cf_count, env_->GetThreadPoolQueueLen(Env::LOW)); // Unblock all compaction threads for (size_t i = 0; i < kTotalCompactTasks; i++) { sleeping_compact_tasks[i].WakeUp(); sleeping_compact_tasks[i].WaitUntilDone(); } for (unsigned int cf = 0; cf < cf_count; cf++) { dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Max outstanding compact tasks reached limit for (auto& ls : limiter_settings) { ASSERT_EQ(ls.limit_tasks, ls.max_tasks); ASSERT_EQ(0, ls.limiter->GetOutstandingTask()); } // test manual compaction under a fully throttled limiter int cf_test = 1; unique_limiter->SetMaxOutstandingTask(0); // flush one more file to cf 1 for (int i = 0; i < kNumKeysPerFile; i++) { ASSERT_OK(Put(cf_test, Key(keyIndex++), "")); } // put extra key to trigger flush ASSERT_OK(Put(cf_test, "", "")); dbfull()->TEST_WaitForFlushMemTable(handles_[cf_test]); ASSERT_EQ(1, NumTableFilesAtLevel(0, cf_test)); Compact(cf_test, Key(0), Key(keyIndex)); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, ::testing::Values(std::make_tuple(1, true), std::make_tuple(1, false), std::make_tuple(4, true), std::make_tuple(4, false))); TEST_P(DBCompactionDirectIOTest, DirectIO) { Options options = CurrentOptions(); Destroy(options); options.create_if_missing = true; options.disable_auto_compactions = true; options.use_direct_io_for_flush_and_compaction = GetParam(); options.env = new MockEnv(Env::Default()); Reopen(options); bool readahead = false; SyncPoint::GetInstance()->SetCallBack( "CompactionJob::OpenCompactionOutputFile", [&](void* arg) { bool* use_direct_writes = static_cast(arg); ASSERT_EQ(*use_direct_writes, options.use_direct_io_for_flush_and_compaction); }); if (options.use_direct_io_for_flush_and_compaction) { SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions:direct_io", [&](void* /*arg*/) { readahead = true; }); } SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu"}, options); MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); Compact(1, "p", "q"); ASSERT_EQ(readahead, options.use_direct_reads); ASSERT_EQ("0,0,1", FilesPerLevel(1)); Destroy(options); delete options.env; } INSTANTIATE_TEST_CASE_P(DBCompactionDirectIOTest, DBCompactionDirectIOTest, testing::Bool()); class CompactionPriTest : public DBTestBase, public testing::WithParamInterface { public: CompactionPriTest() : DBTestBase("/compaction_pri_test") { compaction_pri_ = GetParam(); } // Required if inheriting from testing::WithParamInterface<> static void SetUpTestCase() {} static void TearDownTestCase() {} uint32_t compaction_pri_; }; TEST_P(CompactionPriTest, Test) { Options options = CurrentOptions(); options.write_buffer_size = 16 * 1024; options.compaction_pri = static_cast(compaction_pri_); options.hard_pending_compaction_bytes_limit = 256 * 1024; options.max_bytes_for_level_base = 64 * 1024; options.max_bytes_for_level_multiplier = 4; options.compression = kNoCompression; DestroyAndReopen(options); Random rnd(301); const int kNKeys = 5000; int keys[kNKeys]; for (int i = 0; i < kNKeys; i++) { keys[i] = i; } RandomShuffle(std::begin(keys), std::end(keys), rnd.Next()); for (int i = 0; i < kNKeys; i++) { ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 102))); } dbfull()->TEST_WaitForCompact(); for (int i = 0; i < kNKeys; i++) { ASSERT_NE("NOT_FOUND", Get(Key(i))); } } INSTANTIATE_TEST_CASE_P( CompactionPriTest, CompactionPriTest, ::testing::Values(CompactionPri::kByCompensatedSize, CompactionPri::kOldestLargestSeqFirst, CompactionPri::kOldestSmallestSeqFirst, CompactionPri::kMinOverlappingRatio)); class NoopMergeOperator : public MergeOperator { public: NoopMergeOperator() {} bool FullMergeV2(const MergeOperationInput& /*merge_in*/, MergeOperationOutput* merge_out) const override { std::string val("bar"); merge_out->new_value = val; return true; } const char* Name() const override { return "Noop"; } }; TEST_F(DBCompactionTest, PartialManualCompaction) { Options opts = CurrentOptions(); opts.num_levels = 3; opts.level0_file_num_compaction_trigger = 10; opts.compression = kNoCompression; opts.merge_operator.reset(new NoopMergeOperator()); opts.target_file_size_base = 10240; DestroyAndReopen(opts); Random rnd(301); for (auto i = 0; i < 8; ++i) { for (auto j = 0; j < 10; ++j) { Merge("foo", RandomString(&rnd, 1024)); } Flush(); } MoveFilesToLevel(2); std::string prop; EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop)); uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2; ASSERT_OK(dbfull()->SetOptions( {{"max_compaction_bytes", std::to_string(max_compaction_bytes)}})); CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; dbfull()->CompactRange(cro, nullptr, nullptr); } TEST_F(DBCompactionTest, ManualCompactionFailsInReadOnlyMode) { // Regression test for bug where manual compaction hangs forever when the DB // is in read-only mode. Verify it now at least returns, despite failing. const int kNumL0Files = 4; std::unique_ptr mock_env( new FaultInjectionTestEnv(Env::Default())); Options opts = CurrentOptions(); opts.disable_auto_compactions = true; opts.env = mock_env.get(); DestroyAndReopen(opts); Random rnd(301); for (int i = 0; i < kNumL0Files; ++i) { // Make sure files are overlapping in key-range to prevent trivial move. Put("key1", RandomString(&rnd, 1024)); Put("key2", RandomString(&rnd, 1024)); Flush(); } ASSERT_EQ(kNumL0Files, NumTableFilesAtLevel(0)); // Enter read-only mode by failing a write. mock_env->SetFilesystemActive(false); // Make sure this is outside `CompactRange`'s range so that it doesn't fail // early trying to flush memtable. ASSERT_NOK(Put("key3", RandomString(&rnd, 1024))); // In the bug scenario, the first manual compaction would fail and forget to // unregister itself, causing the second one to hang forever due to conflict // with a non-running compaction. CompactRangeOptions cro; cro.exclusive_manual_compaction = false; Slice begin_key("key1"); Slice end_key("key2"); ASSERT_NOK(dbfull()->CompactRange(cro, &begin_key, &end_key)); ASSERT_NOK(dbfull()->CompactRange(cro, &begin_key, &end_key)); // Close before mock_env destruct. Close(); } // ManualCompactionBottomLevelOptimization tests the bottom level manual // compaction optimization to skip recompacting files created by Ln-1 to Ln // compaction TEST_F(DBCompactionTest, ManualCompactionBottomLevelOptimized) { Options opts = CurrentOptions(); opts.num_levels = 3; opts.level0_file_num_compaction_trigger = 5; opts.compression = kNoCompression; opts.merge_operator.reset(new NoopMergeOperator()); opts.target_file_size_base = 1024; opts.max_bytes_for_level_multiplier = 2; opts.disable_auto_compactions = true; DestroyAndReopen(opts); ColumnFamilyHandleImpl* cfh = static_cast(dbfull()->DefaultColumnFamily()); ColumnFamilyData* cfd = cfh->cfd(); InternalStats* internal_stats_ptr = cfd->internal_stats(); ASSERT_NE(internal_stats_ptr, nullptr); Random rnd(301); for (auto i = 0; i < 8; ++i) { for (auto j = 0; j < 10; ++j) { ASSERT_OK( Put("foo" + std::to_string(i * 10 + j), RandomString(&rnd, 1024))); } Flush(); } MoveFilesToLevel(2); for (auto i = 0; i < 8; ++i) { for (auto j = 0; j < 10; ++j) { ASSERT_OK( Put("bar" + std::to_string(i * 10 + j), RandomString(&rnd, 1024))); } Flush(); } const std::vector& comp_stats = internal_stats_ptr->TEST_GetCompactionStats(); int num = comp_stats[2].num_input_files_in_output_level; ASSERT_EQ(num, 0); CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; dbfull()->CompactRange(cro, nullptr, nullptr); const std::vector& comp_stats2 = internal_stats_ptr->TEST_GetCompactionStats(); num = comp_stats2[2].num_input_files_in_output_level; ASSERT_EQ(num, 0); } TEST_F(DBCompactionTest, CompactionDuringShutdown) { Options opts = CurrentOptions(); opts.level0_file_num_compaction_trigger = 2; opts.disable_auto_compactions = true; DestroyAndReopen(opts); ColumnFamilyHandleImpl* cfh = static_cast(dbfull()->DefaultColumnFamily()); ColumnFamilyData* cfd = cfh->cfd(); InternalStats* internal_stats_ptr = cfd->internal_stats(); ASSERT_NE(internal_stats_ptr, nullptr); Random rnd(301); for (auto i = 0; i < 2; ++i) { for (auto j = 0; j < 10; ++j) { ASSERT_OK( Put("foo" + std::to_string(i * 10 + j), RandomString(&rnd, 1024))); } Flush(); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", [&](void* /*arg*/) { dbfull()->shutting_down_.store(true); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_OK(dbfull()->error_handler_.GetBGError()); } // FixFileIngestionCompactionDeadlock tests and verifies that compaction and // file ingestion do not cause deadlock in the event of write stall triggered // by number of L0 files reaching level0_stop_writes_trigger. TEST_P(DBCompactionTestWithParam, FixFileIngestionCompactionDeadlock) { const int kNumKeysPerFile = 100; // Generate SST files. Options options = CurrentOptions(); // Generate an external SST file containing a single key, i.e. 99 std::string sst_files_dir = dbname_ + "/sst_files/"; test::DestroyDir(env_, sst_files_dir); ASSERT_OK(env_->CreateDir(sst_files_dir)); SstFileWriter sst_writer(EnvOptions(), options); const std::string sst_file_path = sst_files_dir + "test.sst"; ASSERT_OK(sst_writer.Open(sst_file_path)); ASSERT_OK(sst_writer.Put(Key(kNumKeysPerFile - 1), "value")); ASSERT_OK(sst_writer.Finish()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::IngestExternalFile:AfterIncIngestFileCounter", "BackgroundCallCompaction:0"}, }); SyncPoint::GetInstance()->EnableProcessing(); options.write_buffer_size = 110 << 10; // 110KB options.level0_file_num_compaction_trigger = options.level0_stop_writes_trigger; options.max_subcompactions = max_subcompactions_; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); DestroyAndReopen(options); Random rnd(301); // Generate level0_stop_writes_trigger L0 files to trigger write stop for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) { for (int j = 0; j != kNumKeysPerFile; ++j) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 990))); } if (0 == i) { // When we reach here, the memtables have kNumKeysPerFile keys. Note that // flush is not yet triggered. We need to write an extra key so that the // write path will call PreprocessWrite and flush the previous key-value // pairs to e flushed. After that, there will be the newest key in the // memtable, and a bunch of L0 files. Since there is already one key in // the memtable, then for i = 1, 2, ..., we do not have to write this // extra key to trigger flush. ASSERT_OK(Put("", "")); } dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(NumTableFilesAtLevel(0 /*level*/, 0 /*cf*/), i + 1); } // When we reach this point, there will be level0_stop_writes_trigger L0 // files and one extra key (99) in memory, which overlaps with the external // SST file. Write stall triggers, and can be cleared only after compaction // reduces the number of L0 files. // Compaction will also be triggered since we have reached the threshold for // auto compaction. Note that compaction may begin after the following file // ingestion thread and waits for ingestion to finish. // Thread to ingest file with overlapping key range with the current // memtable. Consequently ingestion will trigger a flush. The flush MUST // proceed without waiting for the write stall condition to clear, otherwise // deadlock can happen. port::Thread ingestion_thr([&]() { IngestExternalFileOptions ifo; Status s = db_->IngestExternalFile({sst_file_path}, ifo); ASSERT_OK(s); }); // More write to trigger write stop ingestion_thr.join(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); Close(); } TEST_F(DBCompactionTest, ConsistencyFailTest) { Options options = CurrentOptions(); options.force_consistency_checks = true; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionBuilder::CheckConsistency0", [&](void* arg) { auto p = reinterpret_cast*>(arg); // just swap the two FileMetaData so that we hit error // in CheckConsistency funcion FileMetaData* temp = *(p->first); *(p->first) = *(p->second); *(p->second) = temp; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); for (int k = 0; k < 2; ++k) { ASSERT_OK(Put("foo", "bar")); Flush(); } ASSERT_NOK(Put("foo", "bar")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_F(DBCompactionTest, ConsistencyFailTest2) { Options options = CurrentOptions(); options.force_consistency_checks = true; options.target_file_size_base = 1000; options.level0_file_num_compaction_trigger = 2; BlockBasedTableOptions bbto; bbto.block_size = 400; // small block size options.table_factory.reset(new BlockBasedTableFactory(bbto)); DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionBuilder::CheckConsistency1", [&](void* arg) { auto p = reinterpret_cast*>(arg); // just swap the two FileMetaData so that we hit error // in CheckConsistency funcion FileMetaData* temp = *(p->first); *(p->first) = *(p->second); *(p->second) = temp; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); std::string value = RandomString(&rnd, 1000); ASSERT_OK(Put("foo1", value)); ASSERT_OK(Put("z", "")); Flush(); ASSERT_OK(Put("foo2", value)); ASSERT_OK(Put("z", "")); Flush(); // This probably returns non-OK, but we rely on the next Put() // to determine the DB is frozen. dbfull()->TEST_WaitForCompact(); ASSERT_NOK(Put("foo", "bar")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } void IngestOneKeyValue(DBImpl* db, const std::string& key, const std::string& value, const Options& options) { ExternalSstFileInfo info; std::string f = test::PerThreadDBPath("sst_file" + key); EnvOptions env; ROCKSDB_NAMESPACE::SstFileWriter writer(env, options); auto s = writer.Open(f); ASSERT_OK(s); // ASSERT_OK(writer.Put(Key(), "")); ASSERT_OK(writer.Put(key, value)); ASSERT_OK(writer.Finish(&info)); IngestExternalFileOptions ingest_opt; ASSERT_OK(db->IngestExternalFile({info.file_path}, ingest_opt)); } TEST_P(DBCompactionTestWithParam, FlushAfterIntraL0CompactionCheckConsistencyFail) { Options options = CurrentOptions(); options.force_consistency_checks = true; options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 5; options.max_background_compactions = 2; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); const size_t kValueSize = 1 << 20; Random rnd(301); std::atomic pick_intra_l0_count(0); std::string value(RandomString(&rnd, kValueSize)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBCompactionTestWithParam::FlushAfterIntraL0:1", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FindIntraL0Compaction", [&](void* /*arg*/) { pick_intra_l0_count.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // prevents trivial move for (int i = 0; i < 10; ++i) { ASSERT_OK(Put(Key(i), "")); // prevents trivial move } ASSERT_OK(Flush()); Compact("", Key(99)); ASSERT_EQ(0, NumTableFilesAtLevel(0)); // Flush 5 L0 sst. for (int i = 0; i < 5; ++i) { ASSERT_OK(Put(Key(i + 1), value)); ASSERT_OK(Flush()); } ASSERT_EQ(5, NumTableFilesAtLevel(0)); // Put one key, to make smallest log sequence number in this memtable is less // than sst which would be ingested in next step. ASSERT_OK(Put(Key(0), "a")); ASSERT_EQ(5, NumTableFilesAtLevel(0)); // Ingest 5 L0 sst. And this files would trigger PickIntraL0Compaction. for (int i = 5; i < 10; i++) { IngestOneKeyValue(dbfull(), Key(i), value, options); ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); } TEST_SYNC_POINT("DBCompactionTestWithParam::FlushAfterIntraL0:1"); // Put one key, to make biggest log sequence number in this memtable is bigger // than sst which would be ingested in next step. ASSERT_OK(Put(Key(2), "b")); ASSERT_EQ(10, NumTableFilesAtLevel(0)); dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); std::vector> level_to_files; dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), &level_to_files); ASSERT_GT(level_to_files[0].size(), 0); ASSERT_GT(pick_intra_l0_count.load(), 0); ASSERT_OK(Flush()); } TEST_P(DBCompactionTestWithParam, IntraL0CompactionAfterFlushCheckConsistencyFail) { Options options = CurrentOptions(); options.force_consistency_checks = true; options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 5; options.max_background_compactions = 2; options.max_subcompactions = max_subcompactions_; options.write_buffer_size = 2 << 20; options.max_write_buffer_number = 6; DestroyAndReopen(options); const size_t kValueSize = 1 << 20; Random rnd(301); std::string value(RandomString(&rnd, kValueSize)); std::string value2(RandomString(&rnd, kValueSize)); std::string bigvalue = value + value; // prevents trivial move for (int i = 0; i < 10; ++i) { ASSERT_OK(Put(Key(i), "")); // prevents trivial move } ASSERT_OK(Flush()); Compact("", Key(99)); ASSERT_EQ(0, NumTableFilesAtLevel(0)); std::atomic pick_intra_l0_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBCompactionTestWithParam::IntraL0CompactionAfterFlush:1", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FindIntraL0Compaction", [&](void* /*arg*/) { pick_intra_l0_count.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Make 6 L0 sst. for (int i = 0; i < 6; ++i) { if (i % 2 == 0) { IngestOneKeyValue(dbfull(), Key(i), value, options); } else { ASSERT_OK(Put(Key(i), value)); ASSERT_OK(Flush()); } } ASSERT_EQ(6, NumTableFilesAtLevel(0)); // Stop run flush job env_->SetBackgroundThreads(1, Env::HIGH); test::SleepingBackgroundTask sleeping_tasks; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_tasks, Env::Priority::HIGH); sleeping_tasks.WaitUntilSleeping(); // Put many keys to make memtable request to flush for (int i = 0; i < 6; ++i) { ASSERT_OK(Put(Key(i), bigvalue)); } ASSERT_EQ(6, NumTableFilesAtLevel(0)); // ingest file to trigger IntraL0Compaction for (int i = 6; i < 10; ++i) { ASSERT_EQ(i, NumTableFilesAtLevel(0)); IngestOneKeyValue(dbfull(), Key(i), value2, options); } ASSERT_EQ(10, NumTableFilesAtLevel(0)); // Wake up flush job sleeping_tasks.WakeUp(); sleeping_tasks.WaitUntilDone(); TEST_SYNC_POINT("DBCompactionTestWithParam::IntraL0CompactionAfterFlush:1"); dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); uint64_t error_count = 0; db_->GetIntProperty("rocksdb.background-errors", &error_count); ASSERT_EQ(error_count, 0); ASSERT_GT(pick_intra_l0_count.load(), 0); for (int i = 0; i < 6; ++i) { ASSERT_EQ(bigvalue, Get(Key(i))); } for (int i = 6; i < 10; ++i) { ASSERT_EQ(value2, Get(Key(i))); } } #endif // !defined(ROCKSDB_LITE) } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { #if !defined(ROCKSDB_LITE) ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); #else (void) argc; (void) argv; return 0; #endif } rocksdb-6.11.4/db/db_dynamic_level_test.cc000066400000000000000000000415601370372246700204660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // Introduction of SyncPoint effectively disabled building and running this test // in Release build. // which is a pity, it is a good test #if !defined(ROCKSDB_LITE) #include "db/db_test_util.h" #include "port/port.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { class DBTestDynamicLevel : public DBTestBase { public: DBTestDynamicLevel() : DBTestBase("/db_dynamic_level_test") {} }; TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase) { if (!Snappy_Supported() || !LZ4_Supported()) { return; } // Use InMemoryEnv, or it would be too slow. std::unique_ptr env(new MockEnv(env_)); const int kNKeys = 1000; int keys[kNKeys]; auto verify_func = [&]() { for (int i = 0; i < kNKeys; i++) { ASSERT_NE("NOT_FOUND", Get(Key(i))); ASSERT_NE("NOT_FOUND", Get(Key(kNKeys * 2 + i))); if (i < kNKeys / 10) { ASSERT_EQ("NOT_FOUND", Get(Key(kNKeys + keys[i]))); } else { ASSERT_NE("NOT_FOUND", Get(Key(kNKeys + keys[i]))); } } }; Random rnd(301); for (int ordered_insert = 0; ordered_insert <= 1; ordered_insert++) { for (int i = 0; i < kNKeys; i++) { keys[i] = i; } if (ordered_insert == 0) { RandomShuffle(std::begin(keys), std::end(keys), rnd.Next()); } for (int max_background_compactions = 1; max_background_compactions < 4; max_background_compactions += 2) { Options options; options.env = env.get(); options.create_if_missing = true; options.write_buffer_size = 2048; options.max_write_buffer_number = 2; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 2; options.target_file_size_base = 2048; options.level_compaction_dynamic_level_bytes = true; options.max_bytes_for_level_base = 10240; options.max_bytes_for_level_multiplier = 4; options.soft_rate_limit = 1.1; options.max_background_compactions = max_background_compactions; options.num_levels = 5; options.compression_per_level.resize(3); options.compression_per_level[0] = kNoCompression; options.compression_per_level[1] = kLZ4Compression; options.compression_per_level[2] = kSnappyCompression; options.env = env_; DestroyAndReopen(options); for (int i = 0; i < kNKeys; i++) { int key = keys[i]; ASSERT_OK(Put(Key(kNKeys + key), RandomString(&rnd, 102))); ASSERT_OK(Put(Key(key), RandomString(&rnd, 102))); ASSERT_OK(Put(Key(kNKeys * 2 + key), RandomString(&rnd, 102))); ASSERT_OK(Delete(Key(kNKeys + keys[i / 10]))); env_->SleepForMicroseconds(5000); } uint64_t int_prop; ASSERT_TRUE(db_->GetIntProperty("rocksdb.background-errors", &int_prop)); ASSERT_EQ(0U, int_prop); // Verify DB for (int j = 0; j < 2; j++) { verify_func(); if (j == 0) { Reopen(options); } } // Test compact range works dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); // All data should be in the last level. ColumnFamilyMetaData cf_meta; db_->GetColumnFamilyMetaData(&cf_meta); ASSERT_EQ(5U, cf_meta.levels.size()); for (int i = 0; i < 4; i++) { ASSERT_EQ(0U, cf_meta.levels[i].files.size()); } ASSERT_GT(cf_meta.levels[4U].files.size(), 0U); verify_func(); Close(); } } env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); } // Test specific cases in dynamic max bytes TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase2) { Random rnd(301); int kMaxKey = 1000000; Options options = CurrentOptions(); options.compression = kNoCompression; options.create_if_missing = true; options.write_buffer_size = 20480; options.max_write_buffer_number = 2; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 9999; options.level0_stop_writes_trigger = 9999; options.target_file_size_base = 9102; options.level_compaction_dynamic_level_bytes = true; options.max_bytes_for_level_base = 40960; options.max_bytes_for_level_multiplier = 4; options.max_background_compactions = 2; options.num_levels = 5; options.max_compaction_bytes = 0; // Force not expanding in compactions BlockBasedTableOptions table_options; table_options.block_size = 1024; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "true"}, })); uint64_t int_prop; std::string str_prop; // Initial base level is the last level ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(4U, int_prop); // Put about 28K to L0 for (int i = 0; i < 70; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), RandomString(&rnd, 380))); } ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(4U, int_prop); // Insert extra about 28K to L0. After they are compacted to L4, the base // level should be changed to L3. ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "true"}, })); for (int i = 0; i < 70; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), RandomString(&rnd, 380))); } ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(3U, int_prop); ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop)); ASSERT_EQ("0", str_prop); ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop)); ASSERT_EQ("0", str_prop); // Write even more data while leaving the base level at L3. ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "true"}, })); // Write about 40K more for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), RandomString(&rnd, 380))); } ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(3U, int_prop); // Fill up L0, and then run an (auto) L0->Lmax compaction to raise the base // level to 2. ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "true"}, })); // Write about 650K more. // Each file is about 11KB, with 9KB of data. for (int i = 0; i < 1300; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), RandomString(&rnd, 380))); } // Make sure that the compaction starts before the last bit of data is // flushed, so that the base level isn't raised to L1. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"CompactionJob::Run():Start", "DynamicLevelMaxBytesBase2:0"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:0"); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(2U, int_prop); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); // Write more data until the base level changes to L1. There will be // a manual compaction going on at the same time. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"CompactionJob::Run():Start", "DynamicLevelMaxBytesBase2:1"}, {"DynamicLevelMaxBytesBase2:2", "CompactionJob::Run():End"}, {"DynamicLevelMaxBytesBase2:compact_range_finish", "FlushJob::WriteLevel0Table"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread thread([this] { TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:compact_range_start"); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:compact_range_finish"); }); TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:1"); for (int i = 0; i < 2; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), RandomString(&rnd, 380))); } TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:2"); Flush(); thread.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(1U, int_prop); } // Test specific cases in dynamic max bytes TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesCompactRange) { Random rnd(301); int kMaxKey = 1000000; Options options = CurrentOptions(); options.create_if_missing = true; options.write_buffer_size = 2048; options.max_write_buffer_number = 2; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 9999; options.level0_stop_writes_trigger = 9999; options.target_file_size_base = 2; options.level_compaction_dynamic_level_bytes = true; options.max_bytes_for_level_base = 10240; options.max_bytes_for_level_multiplier = 4; options.max_background_compactions = 1; const int kNumLevels = 5; options.num_levels = kNumLevels; options.max_compaction_bytes = 1; // Force not expanding in compactions BlockBasedTableOptions table_options; table_options.block_size = 1024; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); // Compact against empty DB dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); uint64_t int_prop; std::string str_prop; // Initial base level is the last level ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(4U, int_prop); // Put about 7K to L0 for (int i = 0; i < 140; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), RandomString(&rnd, 80))); } Flush(); dbfull()->TEST_WaitForCompact(); if (NumTableFilesAtLevel(0) == 0) { // Make sure level 0 is not empty ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), RandomString(&rnd, 80))); Flush(); } ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(3U, int_prop); ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop)); ASSERT_EQ("0", str_prop); ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level2", &str_prop)); ASSERT_EQ("0", str_prop); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); std::set output_levels; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionPicker::CompactRange:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); output_levels.insert(compaction->output_level()); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(output_levels.size(), 2); ASSERT_TRUE(output_levels.find(3) != output_levels.end()); ASSERT_TRUE(output_levels.find(4) != output_levels.end()); ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &str_prop)); ASSERT_EQ("0", str_prop); ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level3", &str_prop)); ASSERT_EQ("0", str_prop); // Base level is still level 3. ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(3U, int_prop); } TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBaseInc) { Options options = CurrentOptions(); options.create_if_missing = true; options.write_buffer_size = 2048; options.max_write_buffer_number = 2; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 2; options.target_file_size_base = 2048; options.level_compaction_dynamic_level_bytes = true; options.max_bytes_for_level_base = 10240; options.max_bytes_for_level_multiplier = 4; options.soft_rate_limit = 1.1; options.max_background_compactions = 2; options.num_levels = 5; options.max_compaction_bytes = 100000000; DestroyAndReopen(options); int non_trivial = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* /*arg*/) { non_trivial++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); const int total_keys = 3000; const int random_part_size = 100; for (int i = 0; i < total_keys; i++) { std::string value = RandomString(&rnd, random_part_size); PutFixed32(&value, static_cast(i)); ASSERT_OK(Put(Key(i), value)); } Flush(); dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(non_trivial, 0); for (int i = 0; i < total_keys; i++) { std::string value = Get(Key(i)); ASSERT_EQ(DecodeFixed32(value.c_str() + random_part_size), static_cast(i)); } env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); } TEST_F(DBTestDynamicLevel, DISABLED_MigrateToDynamicLevelMaxBytesBase) { Random rnd(301); const int kMaxKey = 2000; Options options; options.create_if_missing = true; options.write_buffer_size = 2048; options.max_write_buffer_number = 8; options.level0_file_num_compaction_trigger = 4; options.level0_slowdown_writes_trigger = 4; options.level0_stop_writes_trigger = 8; options.target_file_size_base = 2048; options.level_compaction_dynamic_level_bytes = false; options.max_bytes_for_level_base = 10240; options.max_bytes_for_level_multiplier = 4; options.soft_rate_limit = 1.1; options.num_levels = 8; DestroyAndReopen(options); auto verify_func = [&](int num_keys, bool if_sleep) { for (int i = 0; i < num_keys; i++) { ASSERT_NE("NOT_FOUND", Get(Key(kMaxKey + i))); if (i < num_keys / 10) { ASSERT_EQ("NOT_FOUND", Get(Key(i))); } else { ASSERT_NE("NOT_FOUND", Get(Key(i))); } if (if_sleep && i % 1000 == 0) { // Without it, valgrind may choose not to give another // thread a chance to run before finishing the function, // causing the test to be extremely slow. env_->SleepForMicroseconds(1); } } }; int total_keys = 1000; for (int i = 0; i < total_keys; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 102))); ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102))); ASSERT_OK(Delete(Key(i / 10))); } verify_func(total_keys, false); dbfull()->TEST_WaitForCompact(); options.level_compaction_dynamic_level_bytes = true; options.disable_auto_compactions = true; Reopen(options); verify_func(total_keys, false); std::atomic_bool compaction_finished; compaction_finished = false; // Issue manual compaction in one thread and still verify DB state // in main thread. ROCKSDB_NAMESPACE::port::Thread t([&]() { CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = options.num_levels - 1; dbfull()->CompactRange(compact_options, nullptr, nullptr); compaction_finished.store(true); }); do { verify_func(total_keys, true); } while (!compaction_finished.load()); t.join(); ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); int total_keys2 = 2000; for (int i = total_keys; i < total_keys2; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 102))); ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102))); ASSERT_OK(Delete(Key(i / 10))); } verify_func(total_keys2, false); dbfull()->TEST_WaitForCompact(); verify_func(total_keys2, false); // Base level is not level 1 ASSERT_EQ(NumTableFilesAtLevel(1), 0); ASSERT_EQ(NumTableFilesAtLevel(2), 0); } } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) int main(int argc, char** argv) { #if !defined(ROCKSDB_LITE) ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); #else (void) argc; (void) argv; return 0; #endif } rocksdb-6.11.4/db/db_encryption_test.cc000066400000000000000000000067401370372246700200460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/perf_context.h" #if !defined(ROCKSDB_LITE) #include "test_util/sync_point.h" #endif #include #include namespace ROCKSDB_NAMESPACE { class DBEncryptionTest : public DBTestBase { public: DBEncryptionTest() : DBTestBase("/db_encryption_test") {} }; #ifndef ROCKSDB_LITE TEST_F(DBEncryptionTest, CheckEncrypted) { ASSERT_OK(Put("foo567", "v1.fetdq")); ASSERT_OK(Put("bar123", "v2.dfgkjdfghsd")); Close(); // Open all files and look for the values we've put in there. // They should not be found if encrypted, otherwise // they should be found. std::vector fileNames; auto status = env_->GetChildren(dbname_, &fileNames); ASSERT_OK(status); auto defaultEnv = Env::Default(); int hits = 0; for (auto it = fileNames.begin() ; it != fileNames.end(); ++it) { if ((*it == "..") || (*it == ".")) { continue; } auto filePath = dbname_ + "/" + *it; std::unique_ptr seqFile; auto envOptions = EnvOptions(CurrentOptions()); status = defaultEnv->NewSequentialFile(filePath, &seqFile, envOptions); ASSERT_OK(status); uint64_t fileSize; status = defaultEnv->GetFileSize(filePath, &fileSize); ASSERT_OK(status); std::string scratch; scratch.reserve(fileSize); Slice data; status = seqFile->Read(fileSize, &data, (char*)scratch.data()); ASSERT_OK(status); if (data.ToString().find("foo567") != std::string::npos) { hits++; //std::cout << "Hit in " << filePath << "\n"; } if (data.ToString().find("v1.fetdq") != std::string::npos) { hits++; //std::cout << "Hit in " << filePath << "\n"; } if (data.ToString().find("bar123") != std::string::npos) { hits++; //std::cout << "Hit in " << filePath << "\n"; } if (data.ToString().find("v2.dfgkjdfghsd") != std::string::npos) { hits++; //std::cout << "Hit in " << filePath << "\n"; } if (data.ToString().find("dfgk") != std::string::npos) { hits++; //std::cout << "Hit in " << filePath << "\n"; } } if (encrypted_env_) { ASSERT_EQ(hits, 0); } else { ASSERT_GE(hits, 4); } } TEST_F(DBEncryptionTest, ReadEmptyFile) { auto defaultEnv = Env::Default(); // create empty file for reading it back in later auto envOptions = EnvOptions(CurrentOptions()); auto filePath = dbname_ + "/empty.empty"; Status status; { std::unique_ptr writableFile; status = defaultEnv->NewWritableFile(filePath, &writableFile, envOptions); ASSERT_OK(status); } std::unique_ptr seqFile; status = defaultEnv->NewSequentialFile(filePath, &seqFile, envOptions); ASSERT_OK(status); std::string scratch; Slice data; // reading back 16 bytes from the empty file shouldn't trigger an assertion. // it should just work and return an empty string status = seqFile->Read(16, &data, (char*)scratch.data()); ASSERT_OK(status); ASSERT_TRUE(data.empty()); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_filesnapshot.cc000066400000000000000000000105161370372246700173100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/job_context.h" #include "db/version_set.h" #include "file/file_util.h" #include "file/filename.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { Status DBImpl::GetLiveFiles(std::vector& ret, uint64_t* manifest_file_size, bool flush_memtable) { *manifest_file_size = 0; mutex_.Lock(); if (flush_memtable) { // flush all dirty data to disk. Status status; if (immutable_db_options_.atomic_flush) { autovector cfds; SelectColumnFamiliesForAtomicFlush(&cfds); mutex_.Unlock(); status = AtomicFlushMemTables(cfds, FlushOptions(), FlushReason::kGetLiveFiles); if (status.IsColumnFamilyDropped()) { status = Status::OK(); } mutex_.Lock(); } else { for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } cfd->Ref(); mutex_.Unlock(); status = FlushMemTable(cfd, FlushOptions(), FlushReason::kGetLiveFiles); TEST_SYNC_POINT("DBImpl::GetLiveFiles:1"); TEST_SYNC_POINT("DBImpl::GetLiveFiles:2"); mutex_.Lock(); cfd->UnrefAndTryDelete(); if (!status.ok() && !status.IsColumnFamilyDropped()) { break; } else if (status.IsColumnFamilyDropped()) { status = Status::OK(); } } } versions_->GetColumnFamilySet()->FreeDeadColumnFamilies(); if (!status.ok()) { mutex_.Unlock(); ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Cannot Flush data %s\n", status.ToString().c_str()); return status; } } // Make a set of all of the live table and blob files std::vector live_table_files; std::vector live_blob_files; for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } cfd->current()->AddLiveFiles(&live_table_files, &live_blob_files); } ret.clear(); ret.reserve(live_table_files.size() + live_blob_files.size() + 3); // for CURRENT + MANIFEST + OPTIONS // create names of the live files. The names are not absolute // paths, instead they are relative to dbname_; for (const auto& table_file_number : live_table_files) { ret.emplace_back(MakeTableFileName("", table_file_number)); } for (const auto& blob_file_number : live_blob_files) { ret.emplace_back(BlobFileName("", blob_file_number)); } ret.emplace_back(CurrentFileName("")); ret.emplace_back(DescriptorFileName("", versions_->manifest_file_number())); ret.emplace_back(OptionsFileName("", versions_->options_file_number())); // find length of manifest file while holding the mutex lock *manifest_file_size = versions_->manifest_file_size(); mutex_.Unlock(); return Status::OK(); } Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { { // If caller disabled deletions, this function should return files that are // guaranteed not to be deleted until deletions are re-enabled. We need to // wait for pending purges to finish since WalManager doesn't know which // files are going to be purged. Additional purges won't be scheduled as // long as deletions are disabled (so the below loop must terminate). InstrumentedMutexLock l(&mutex_); while (disable_delete_obsolete_files_ > 0 && pending_purge_obsolete_files_ > 0) { bg_cv_.Wait(); } } return wal_manager_.GetSortedWalFiles(files); } Status DBImpl::GetCurrentWalFile(std::unique_ptr* current_log_file) { uint64_t current_logfile_number; { InstrumentedMutexLock l(&mutex_); current_logfile_number = logfile_number_; } return wal_manager_.GetLiveWalFile(current_logfile_number, current_log_file); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/db_flush_test.cc000066400000000000000000000675521370372246700170050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "port/port.h" #include "port/stack_trace.h" #include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" #include "util/cast_util.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { class DBFlushTest : public DBTestBase { public: DBFlushTest() : DBTestBase("/db_flush_test") {} }; class DBFlushDirectIOTest : public DBFlushTest, public ::testing::WithParamInterface { public: DBFlushDirectIOTest() : DBFlushTest() {} }; class DBAtomicFlushTest : public DBFlushTest, public ::testing::WithParamInterface { public: DBAtomicFlushTest() : DBFlushTest() {} }; // We had issue when two background threads trying to flush at the same time, // only one of them get committed. The test verifies the issue is fixed. TEST_F(DBFlushTest, FlushWhileWritingManifest) { Options options; options.disable_auto_compactions = true; options.max_background_flushes = 2; options.env = env_; Reopen(options); FlushOptions no_wait; no_wait.wait = false; no_wait.allow_write_stall=true; SyncPoint::GetInstance()->LoadDependency( {{"VersionSet::LogAndApply:WriteManifest", "DBFlushTest::FlushWhileWritingManifest:1"}, {"MemTableList::TryInstallMemtableFlushResults:InProgress", "VersionSet::LogAndApply:WriteManifestDone"}}); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("foo", "v")); ASSERT_OK(dbfull()->Flush(no_wait)); TEST_SYNC_POINT("DBFlushTest::FlushWhileWritingManifest:1"); ASSERT_OK(Put("bar", "v")); ASSERT_OK(dbfull()->Flush(no_wait)); // If the issue is hit we will wait here forever. dbfull()->TEST_WaitForFlushMemTable(); #ifndef ROCKSDB_LITE ASSERT_EQ(2, TotalTableFiles()); #endif // ROCKSDB_LITE } // Disable this test temporarily on Travis as it fails intermittently. // Github issue: #4151 TEST_F(DBFlushTest, SyncFail) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); Options options; options.disable_auto_compactions = true; options.env = fault_injection_env.get(); SyncPoint::GetInstance()->LoadDependency( {{"DBFlushTest::SyncFail:GetVersionRefCount:1", "DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"}, {"DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", "DBFlushTest::SyncFail:GetVersionRefCount:2"}, {"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"}, {"DBImpl::SyncClosedLogs:Failed", "DBFlushTest::SyncFail:2"}}); SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu"}, options); Put("key", "value"); auto* cfd = reinterpret_cast(db_->DefaultColumnFamily()) ->cfd(); FlushOptions flush_options; flush_options.wait = false; ASSERT_OK(dbfull()->Flush(flush_options)); // Flush installs a new super-version. Get the ref count after that. auto current_before = cfd->current(); int refs_before = cfd->current()->TEST_refs(); TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:1"); TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:2"); int refs_after_picking_memtables = cfd->current()->TEST_refs(); ASSERT_EQ(refs_before + 1, refs_after_picking_memtables); fault_injection_env->SetFilesystemActive(false); TEST_SYNC_POINT("DBFlushTest::SyncFail:1"); TEST_SYNC_POINT("DBFlushTest::SyncFail:2"); fault_injection_env->SetFilesystemActive(true); // Now the background job will do the flush; wait for it. dbfull()->TEST_WaitForFlushMemTable(); #ifndef ROCKSDB_LITE ASSERT_EQ("", FilesPerLevel()); // flush failed. #endif // ROCKSDB_LITE // Backgroun flush job should release ref count to current version. ASSERT_EQ(current_before, cfd->current()); ASSERT_EQ(refs_before, cfd->current()->TEST_refs()); Destroy(options); } TEST_F(DBFlushTest, SyncSkip) { Options options = CurrentOptions(); SyncPoint::GetInstance()->LoadDependency( {{"DBFlushTest::SyncSkip:1", "DBImpl::SyncClosedLogs:Skip"}, {"DBImpl::SyncClosedLogs:Skip", "DBFlushTest::SyncSkip:2"}}); SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); Put("key", "value"); FlushOptions flush_options; flush_options.wait = false; ASSERT_OK(dbfull()->Flush(flush_options)); TEST_SYNC_POINT("DBFlushTest::SyncSkip:1"); TEST_SYNC_POINT("DBFlushTest::SyncSkip:2"); // Now the background job will do the flush; wait for it. dbfull()->TEST_WaitForFlushMemTable(); Destroy(options); } TEST_F(DBFlushTest, FlushInLowPriThreadPool) { // Verify setting an empty high-pri (flush) thread pool causes flushes to be // scheduled in the low-pri (compaction) thread pool. Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 4; options.memtable_factory.reset(new SpecialSkipListFactory(1)); Reopen(options); env_->SetBackgroundThreads(0, Env::HIGH); std::thread::id tid; int num_flushes = 0, num_compactions = 0; SyncPoint::GetInstance()->SetCallBack( "DBImpl::BGWorkFlush", [&](void* /*arg*/) { if (tid == std::thread::id()) { tid = std::this_thread::get_id(); } else { ASSERT_EQ(tid, std::this_thread::get_id()); } ++num_flushes; }); SyncPoint::GetInstance()->SetCallBack( "DBImpl::BGWorkCompaction", [&](void* /*arg*/) { ASSERT_EQ(tid, std::this_thread::get_id()); ++num_compactions; }); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("key", "val")); for (int i = 0; i < 4; ++i) { ASSERT_OK(Put("key", "val")); dbfull()->TEST_WaitForFlushMemTable(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(4, num_flushes); ASSERT_EQ(1, num_compactions); } TEST_F(DBFlushTest, ManualFlushWithMinWriteBufferNumberToMerge) { Options options = CurrentOptions(); options.write_buffer_size = 100; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 3; Reopen(options); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BGWorkFlush", "DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:1"}, {"DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:2", "FlushJob::WriteLevel0Table"}}); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("key1", "value1")); port::Thread t([&]() { // The call wait for flush to finish, i.e. with flush_options.wait = true. ASSERT_OK(Flush()); }); // Wait for flush start. TEST_SYNC_POINT("DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:1"); // Insert a second memtable before the manual flush finish. // At the end of the manual flush job, it will check if further flush // is needed, but it will not trigger flush of the second memtable because // min_write_buffer_number_to_merge is not reached. ASSERT_OK(Put("key2", "value2")); ASSERT_OK(dbfull()->TEST_SwitchMemtable()); TEST_SYNC_POINT("DBFlushTest::ManualFlushWithMinWriteBufferNumberToMerge:2"); // Manual flush should return, without waiting for flush indefinitely. t.join(); } TEST_F(DBFlushTest, ScheduleOnlyOneBgThread) { Options options = CurrentOptions(); Reopen(options); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); int called = 0; SyncPoint::GetInstance()->SetCallBack( "DBImpl::MaybeScheduleFlushOrCompaction:AfterSchedule:0", [&](void* arg) { ASSERT_NE(nullptr, arg); auto unscheduled_flushes = *reinterpret_cast(arg); ASSERT_EQ(0, unscheduled_flushes); ++called; }); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("a", "foo")); FlushOptions flush_opts; ASSERT_OK(dbfull()->Flush(flush_opts)); ASSERT_EQ(1, called); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(DBFlushDirectIOTest, DirectIO) { Options options; options.create_if_missing = true; options.disable_auto_compactions = true; options.max_background_flushes = 2; options.use_direct_io_for_flush_and_compaction = GetParam(); options.env = new MockEnv(Env::Default()); SyncPoint::GetInstance()->SetCallBack( "BuildTable:create_file", [&](void* arg) { bool* use_direct_writes = static_cast(arg); ASSERT_EQ(*use_direct_writes, options.use_direct_io_for_flush_and_compaction); }); SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); ASSERT_OK(Put("foo", "v")); FlushOptions flush_options; flush_options.wait = true; ASSERT_OK(dbfull()->Flush(flush_options)); Destroy(options); delete options.env; } TEST_F(DBFlushTest, FlushError) { Options options; std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); options.write_buffer_size = 100; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 3; options.disable_auto_compactions = true; options.env = fault_injection_env.get(); Reopen(options); ASSERT_OK(Put("key1", "value1")); ASSERT_OK(Put("key2", "value2")); fault_injection_env->SetFilesystemActive(false); Status s = dbfull()->TEST_SwitchMemtable(); fault_injection_env->SetFilesystemActive(true); Destroy(options); ASSERT_NE(s, Status::OK()); } TEST_F(DBFlushTest, ManualFlushFailsInReadOnlyMode) { // Regression test for bug where manual flush hangs forever when the DB // is in read-only mode. Verify it now at least returns, despite failing. Options options; std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); options.env = fault_injection_env.get(); options.max_write_buffer_number = 2; Reopen(options); // Trigger a first flush but don't let it run ASSERT_OK(db_->PauseBackgroundWork()); ASSERT_OK(Put("key1", "value1")); FlushOptions flush_opts; flush_opts.wait = false; ASSERT_OK(db_->Flush(flush_opts)); // Write a key to the second memtable so we have something to flush later // after the DB is in read-only mode. ASSERT_OK(Put("key2", "value2")); // Let the first flush continue, hit an error, and put the DB in read-only // mode. fault_injection_env->SetFilesystemActive(false); ASSERT_OK(db_->ContinueBackgroundWork()); dbfull()->TEST_WaitForFlushMemTable(); #ifndef ROCKSDB_LITE uint64_t num_bg_errors; ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBackgroundErrors, &num_bg_errors)); ASSERT_GT(num_bg_errors, 0); #endif // ROCKSDB_LITE // In the bug scenario, triggering another flush would cause the second flush // to hang forever. After the fix we expect it to return an error. ASSERT_NOK(db_->Flush(FlushOptions())); Close(); } TEST_F(DBFlushTest, CFDropRaceWithWaitForFlushMemTables) { Options options = CurrentOptions(); options.create_if_missing = true; CreateAndReopenWithCF({"pikachu"}, options); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::FlushMemTable:AfterScheduleFlush", "DBFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"}, {"DBFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree", "DBImpl::BackgroundCallFlush:start"}, {"DBImpl::BackgroundCallFlush:start", "DBImpl::FlushMemTable:BeforeWaitForBgFlush"}}); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_EQ(2, handles_.size()); ASSERT_OK(Put(1, "key", "value")); auto* cfd = static_cast(handles_[1])->cfd(); port::Thread drop_cf_thr([&]() { TEST_SYNC_POINT( "DBFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"); ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); handles_.resize(1); TEST_SYNC_POINT( "DBFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree"); }); FlushOptions flush_opts; flush_opts.allow_write_stall = true; ASSERT_NOK(dbfull()->TEST_FlushMemTable(cfd, flush_opts)); drop_cf_thr.join(); Close(); SyncPoint::GetInstance()->DisableProcessing(); } #ifndef ROCKSDB_LITE TEST_F(DBFlushTest, FireOnFlushCompletedAfterCommittedResult) { class TestListener : public EventListener { public: void OnFlushCompleted(DB* db, const FlushJobInfo& info) override { // There's only one key in each flush. ASSERT_EQ(info.smallest_seqno, info.largest_seqno); ASSERT_NE(0, info.smallest_seqno); if (info.smallest_seqno == seq1) { // First flush completed ASSERT_FALSE(completed1); completed1 = true; CheckFlushResultCommitted(db, seq1); } else { // Second flush completed ASSERT_FALSE(completed2); completed2 = true; ASSERT_EQ(info.smallest_seqno, seq2); CheckFlushResultCommitted(db, seq2); } } void CheckFlushResultCommitted(DB* db, SequenceNumber seq) { DBImpl* db_impl = static_cast_with_check(db); InstrumentedMutex* mutex = db_impl->mutex(); mutex->Lock(); auto* cfd = reinterpret_cast(db->DefaultColumnFamily()) ->cfd(); ASSERT_LT(seq, cfd->imm()->current()->GetEarliestSequenceNumber()); mutex->Unlock(); } std::atomic seq1{0}; std::atomic seq2{0}; std::atomic completed1{false}; std::atomic completed2{false}; }; std::shared_ptr listener = std::make_shared(); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BackgroundCallFlush:start", "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitFirst"}, {"DBImpl::FlushMemTableToOutputFile:Finish", "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitSecond"}}); SyncPoint::GetInstance()->SetCallBack( "FlushJob::WriteLevel0Table", [&listener](void* arg) { // Wait for the second flush finished, out of mutex. auto* mems = reinterpret_cast*>(arg); if (mems->front()->GetEarliestSequenceNumber() == listener->seq1 - 1) { TEST_SYNC_POINT( "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:" "WaitSecond"); } }); Options options = CurrentOptions(); options.create_if_missing = true; options.listeners.push_back(listener); // Setting max_flush_jobs = max_background_jobs / 4 = 2. options.max_background_jobs = 8; // Allow 2 immutable memtables. options.max_write_buffer_number = 3; Reopen(options); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("foo", "v")); listener->seq1 = db_->GetLatestSequenceNumber(); // t1 will wait for the second flush complete before committing flush result. auto t1 = port::Thread([&]() { // flush_opts.wait = true ASSERT_OK(db_->Flush(FlushOptions())); }); // Wait for first flush started. TEST_SYNC_POINT( "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitFirst"); // The second flush will exit early without commit its result. The work // is delegated to the first flush. ASSERT_OK(Put("bar", "v")); listener->seq2 = db_->GetLatestSequenceNumber(); FlushOptions flush_opts; flush_opts.wait = false; ASSERT_OK(db_->Flush(flush_opts)); t1.join(); ASSERT_TRUE(listener->completed1); ASSERT_TRUE(listener->completed2); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } #endif // !ROCKSDB_LITE TEST_P(DBAtomicFlushTest, ManualAtomicFlush) { Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = GetParam(); options.write_buffer_size = (static_cast(64) << 20); CreateAndReopenWithCF({"pikachu", "eevee"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); WriteOptions wopts; wopts.disableWAL = true; for (size_t i = 0; i != num_cfs; ++i) { ASSERT_OK(Put(static_cast(i) /*cf*/, "key", "value", wopts)); } std::vector cf_ids; for (size_t i = 0; i != num_cfs; ++i) { cf_ids.emplace_back(static_cast(i)); } ASSERT_OK(Flush(cf_ids)); for (size_t i = 0; i != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); } } TEST_P(DBAtomicFlushTest, AtomicFlushTriggeredByMemTableFull) { Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = GetParam(); // 4KB so that we can easily trigger auto flush. options.write_buffer_size = 4096; SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BackgroundCallFlush:FlushFinish:0", "DBAtomicFlushTest::AtomicFlushTriggeredByMemTableFull:BeforeCheck"}}); SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); WriteOptions wopts; wopts.disableWAL = true; for (size_t i = 0; i != num_cfs; ++i) { ASSERT_OK(Put(static_cast(i) /*cf*/, "key", "value", wopts)); } // Keep writing to one of them column families to trigger auto flush. for (int i = 0; i != 4000; ++i) { ASSERT_OK(Put(static_cast(num_cfs) - 1 /*cf*/, "key" + std::to_string(i), "value" + std::to_string(i), wopts)); } TEST_SYNC_POINT( "DBAtomicFlushTest::AtomicFlushTriggeredByMemTableFull:BeforeCheck"); if (options.atomic_flush) { for (size_t i = 0; i + 1 != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); } } else { for (size_t i = 0; i + 1 != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); ASSERT_FALSE(cfh->cfd()->mem()->IsEmpty()); } } SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBAtomicFlushTest, AtomicFlushRollbackSomeJobs) { bool atomic_flush = GetParam(); if (!atomic_flush) { return; } std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = atomic_flush; options.env = fault_injection_env.get(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:1", "DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:1"}, {"DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:2", "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:2"}}); SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); WriteOptions wopts; wopts.disableWAL = true; for (size_t i = 0; i != num_cfs; ++i) { int cf_id = static_cast(i); ASSERT_OK(Put(cf_id, "key", "value", wopts)); } FlushOptions flush_opts; flush_opts.wait = false; ASSERT_OK(dbfull()->Flush(flush_opts, handles_)); TEST_SYNC_POINT("DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:1"); fault_injection_env->SetFilesystemActive(false); TEST_SYNC_POINT("DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:2"); for (auto* cfh : handles_) { dbfull()->TEST_WaitForFlushMemTable(cfh); } for (size_t i = 0; i != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); ASSERT_EQ(1, cfh->cfd()->imm()->NumNotFlushed()); ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); } fault_injection_env->SetFilesystemActive(true); Destroy(options); } TEST_P(DBAtomicFlushTest, FlushMultipleCFs_DropSomeBeforeRequestFlush) { bool atomic_flush = GetParam(); if (!atomic_flush) { return; } Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = atomic_flush; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); WriteOptions wopts; wopts.disableWAL = true; std::vector cf_ids; for (size_t i = 0; i != num_cfs; ++i) { int cf_id = static_cast(i); ASSERT_OK(Put(cf_id, "key", "value", wopts)); cf_ids.push_back(cf_id); } ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); ASSERT_TRUE(Flush(cf_ids).IsColumnFamilyDropped()); Destroy(options); } TEST_P(DBAtomicFlushTest, FlushMultipleCFs_DropSomeAfterScheduleFlushBeforeFlushJobRun) { bool atomic_flush = GetParam(); if (!atomic_flush) { return; } Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = atomic_flush; CreateAndReopenWithCF({"pikachu", "eevee"}, options); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::AtomicFlushMemTables:AfterScheduleFlush", "DBAtomicFlushTest::BeforeDropCF"}, {"DBAtomicFlushTest::AfterDropCF", "DBImpl::BackgroundCallFlush:start"}}); SyncPoint::GetInstance()->EnableProcessing(); size_t num_cfs = handles_.size(); ASSERT_EQ(3, num_cfs); WriteOptions wopts; wopts.disableWAL = true; for (size_t i = 0; i != num_cfs; ++i) { int cf_id = static_cast(i); ASSERT_OK(Put(cf_id, "key", "value", wopts)); } port::Thread user_thread([&]() { TEST_SYNC_POINT("DBAtomicFlushTest::BeforeDropCF"); ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); TEST_SYNC_POINT("DBAtomicFlushTest::AfterDropCF"); }); FlushOptions flush_opts; flush_opts.wait = true; ASSERT_OK(dbfull()->Flush(flush_opts, handles_)); user_thread.join(); for (size_t i = 0; i != num_cfs; ++i) { int cf_id = static_cast(i); ASSERT_EQ("value", Get(cf_id, "key")); } ReopenWithColumnFamilies({kDefaultColumnFamilyName, "eevee"}, options); num_cfs = handles_.size(); ASSERT_EQ(2, num_cfs); for (size_t i = 0; i != num_cfs; ++i) { int cf_id = static_cast(i); ASSERT_EQ("value", Get(cf_id, "key")); } Destroy(options); } TEST_P(DBAtomicFlushTest, TriggerFlushAndClose) { bool atomic_flush = GetParam(); if (!atomic_flush) { return; } const int kNumKeysTriggerFlush = 4; Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = atomic_flush; options.memtable_factory.reset( new SpecialSkipListFactory(kNumKeysTriggerFlush)); CreateAndReopenWithCF({"pikachu"}, options); for (int i = 0; i != kNumKeysTriggerFlush; ++i) { ASSERT_OK(Put(0, "key" + std::to_string(i), "value" + std::to_string(i))); } SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put(0, "key", "value")); Close(); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); ASSERT_EQ("value", Get(0, "key")); } TEST_P(DBAtomicFlushTest, PickMemtablesRaceWithBackgroundFlush) { bool atomic_flush = GetParam(); Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = atomic_flush; options.max_write_buffer_number = 4; // Set min_write_buffer_number_to_merge to be greater than 1, so that // a column family with one memtable in the imm will not cause IsFlushPending // to return true when flush_requested_ is false. options.min_write_buffer_number_to_merge = 2; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_EQ(2, handles_.size()); ASSERT_OK(dbfull()->PauseBackgroundWork()); ASSERT_OK(Put(0, "key00", "value00")); ASSERT_OK(Put(1, "key10", "value10")); FlushOptions flush_opts; flush_opts.wait = false; ASSERT_OK(dbfull()->Flush(flush_opts, handles_)); ASSERT_OK(Put(0, "key01", "value01")); // Since max_write_buffer_number is 4, the following flush won't cause write // stall. ASSERT_OK(dbfull()->Flush(flush_opts)); ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); handles_[1] = nullptr; ASSERT_OK(dbfull()->ContinueBackgroundWork()); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0])); delete handles_[0]; handles_.clear(); } TEST_P(DBAtomicFlushTest, CFDropRaceWithWaitForFlushMemTables) { bool atomic_flush = GetParam(); if (!atomic_flush) { return; } Options options = CurrentOptions(); options.create_if_missing = true; options.atomic_flush = atomic_flush; CreateAndReopenWithCF({"pikachu"}, options); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::AtomicFlushMemTables:AfterScheduleFlush", "DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"}, {"DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree", "DBImpl::BackgroundCallFlush:start"}, {"DBImpl::BackgroundCallFlush:start", "DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush"}}); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_EQ(2, handles_.size()); ASSERT_OK(Put(0, "key", "value")); ASSERT_OK(Put(1, "key", "value")); auto* cfd_default = static_cast(dbfull()->DefaultColumnFamily()) ->cfd(); auto* cfd_pikachu = static_cast(handles_[1])->cfd(); port::Thread drop_cf_thr([&]() { TEST_SYNC_POINT( "DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:BeforeDrop"); ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); delete handles_[1]; handles_.resize(1); TEST_SYNC_POINT( "DBAtomicFlushTest::CFDropRaceWithWaitForFlushMemTables:AfterFree"); }); FlushOptions flush_opts; flush_opts.allow_write_stall = true; ASSERT_OK(dbfull()->TEST_AtomicFlushMemTables({cfd_default, cfd_pikachu}, flush_opts)); drop_cf_thr.join(); Close(); SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBAtomicFlushTest, RollbackAfterFailToInstallResults) { bool atomic_flush = GetParam(); if (!atomic_flush) { return; } auto fault_injection_env = std::make_shared(env_); Options options = CurrentOptions(); options.env = fault_injection_env.get(); options.create_if_missing = true; options.atomic_flush = atomic_flush; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_EQ(2, handles_.size()); for (size_t cf = 0; cf < handles_.size(); ++cf) { ASSERT_OK(Put(static_cast(cf), "a", "value")); } SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0", [&](void* /*arg*/) { fault_injection_env->SetFilesystemActive(false); }); SyncPoint::GetInstance()->EnableProcessing(); FlushOptions flush_opts; Status s = db_->Flush(flush_opts, handles_); ASSERT_NOK(s); fault_injection_env->SetFilesystemActive(true); Close(); SyncPoint::GetInstance()->ClearAllCallBacks(); } INSTANTIATE_TEST_CASE_P(DBFlushDirectIOTest, DBFlushDirectIOTest, testing::Bool()); INSTANTIATE_TEST_CASE_P(DBAtomicFlushTest, DBAtomicFlushTest, testing::Bool()); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_impl/000077500000000000000000000000001370372246700152405ustar00rootroot00000000000000rocksdb-6.11.4/db/db_impl/db_impl.cc000066400000000000000000005222461370372246700171700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_impl/db_impl.h" #include #ifdef OS_SOLARIS #include #endif #include #include #include #include #include #include #include #include #include #include #include "db/arena_wrapped_db_iter.h" #include "db/builder.h" #include "db/compaction/compaction_job.h" #include "db/db_info_dumper.h" #include "db/db_iter.h" #include "db/dbformat.h" #include "db/error_handler.h" #include "db/event_helpers.h" #include "db/external_sst_file_ingestion_job.h" #include "db/flush_job.h" #include "db/forward_iterator.h" #include "db/import_column_family_job.h" #include "db/job_context.h" #include "db/log_reader.h" #include "db/log_writer.h" #include "db/malloc_stats.h" #include "db/memtable.h" #include "db/memtable_list.h" #include "db/merge_context.h" #include "db/merge_helper.h" #include "db/range_tombstone_fragmenter.h" #include "db/table_cache.h" #include "db/table_properties_collector.h" #include "db/transaction_log_impl.h" #include "db/version_set.h" #include "db/write_batch_internal.h" #include "db/write_callback.h" #include "env/composite_env_wrapper.h" #include "file/file_util.h" #include "file/filename.h" #include "file/random_access_file_reader.h" #include "file/sst_file_manager_impl.h" #include "logging/auto_roll_logger.h" #include "logging/log_buffer.h" #include "logging/logging.h" #include "memtable/hash_linklist_rep.h" #include "memtable/hash_skiplist_rep.h" #include "monitoring/in_memory_stats_history.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/perf_context_imp.h" #include "monitoring/persistent_stats_history.h" #include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_util.h" #include "options/cf_options.h" #include "options/options_helper.h" #include "options/options_parser.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" #include "rocksdb/statistics.h" #include "rocksdb/stats_history.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "rocksdb/write_buffer_manager.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" #include "table/get_context.h" #include "table/merging_iterator.h" #include "table/multiget_context.h" #include "table/table_builder.h" #include "table/two_level_iterator.h" #include "test_util/sync_point.h" #include "tools/sst_dump_tool_imp.h" #include "util/autovector.h" #include "util/build_version.h" #include "util/cast_util.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" #include "util/mutexlock.h" #include "util/stop_watch.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { const std::string kDefaultColumnFamilyName("default"); const std::string kPersistentStatsColumnFamilyName( "___rocksdb_stats_history___"); void DumpRocksDBBuildVersion(Logger* log); CompressionType GetCompressionFlush( const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options) { // Compressing memtable flushes might not help unless the sequential load // optimization is used for leveled compaction. Otherwise the CPU and // latency overhead is not offset by saving much space. if (ioptions.compaction_style == kCompactionStyleUniversal) { if (mutable_cf_options.compaction_options_universal .compression_size_percent < 0) { return mutable_cf_options.compression; } else { return kNoCompression; } } else if (!ioptions.compression_per_level.empty()) { // For leveled compress when min_level_to_compress != 0. return ioptions.compression_per_level[0]; } else { return mutable_cf_options.compression; } } namespace { void DumpSupportInfo(Logger* logger) { ROCKS_LOG_HEADER(logger, "Compression algorithms supported:"); for (auto& compression : OptionsHelper::compression_type_string_map) { if (compression.second != kNoCompression && compression.second != kDisableCompressionOption) { ROCKS_LOG_HEADER(logger, "\t%s supported: %d", compression.first.c_str(), CompressionTypeSupported(compression.second)); } } ROCKS_LOG_HEADER(logger, "Fast CRC32 supported: %s", crc32c::IsFastCrc32Supported().c_str()); } } // namespace DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, const bool seq_per_batch, const bool batch_per_txn) : dbname_(dbname), own_info_log_(options.info_log == nullptr), initial_db_options_(SanitizeOptions(dbname, options)), env_(initial_db_options_.env), fs_(initial_db_options_.env->GetFileSystem()), immutable_db_options_(initial_db_options_), mutable_db_options_(initial_db_options_), stats_(immutable_db_options_.statistics.get()), mutex_(stats_, env_, DB_MUTEX_WAIT_MICROS, immutable_db_options_.use_adaptive_mutex), default_cf_handle_(nullptr), max_total_in_memory_state_(0), file_options_(BuildDBOptions(immutable_db_options_, mutable_db_options_)), file_options_for_compaction_(fs_->OptimizeForCompactionTableWrite( file_options_, immutable_db_options_)), seq_per_batch_(seq_per_batch), batch_per_txn_(batch_per_txn), db_lock_(nullptr), shutting_down_(false), manual_compaction_paused_(false), bg_cv_(&mutex_), logfile_number_(0), log_dir_synced_(false), log_empty_(true), persist_stats_cf_handle_(nullptr), log_sync_cv_(&mutex_), total_log_size_(0), is_snapshot_supported_(true), write_buffer_manager_(immutable_db_options_.write_buffer_manager.get()), write_thread_(immutable_db_options_), nonmem_write_thread_(immutable_db_options_), write_controller_(mutable_db_options_.delayed_write_rate), last_batch_group_size_(0), unscheduled_flushes_(0), unscheduled_compactions_(0), bg_bottom_compaction_scheduled_(0), bg_compaction_scheduled_(0), num_running_compactions_(0), bg_flush_scheduled_(0), num_running_flushes_(0), bg_purge_scheduled_(0), disable_delete_obsolete_files_(0), pending_purge_obsolete_files_(0), delete_obsolete_files_last_run_(env_->NowMicros()), last_stats_dump_time_microsec_(0), next_job_id_(1), has_unpersisted_data_(false), unable_to_release_oldest_log_(false), num_running_ingest_file_(0), #ifndef ROCKSDB_LITE wal_manager_(immutable_db_options_, file_options_, seq_per_batch), #endif // ROCKSDB_LITE event_logger_(immutable_db_options_.info_log.get()), bg_work_paused_(0), bg_compaction_paused_(0), refitting_level_(false), opened_successfully_(false), two_write_queues_(options.two_write_queues), manual_wal_flush_(options.manual_wal_flush), // last_sequencee_ is always maintained by the main queue that also writes // to the memtable. When two_write_queues_ is disabled last seq in // memtable is the same as last seq published to the readers. When it is // enabled but seq_per_batch_ is disabled, last seq in memtable still // indicates last published seq since wal-only writes that go to the 2nd // queue do not consume a sequence number. Otherwise writes performed by // the 2nd queue could change what is visible to the readers. In this // cases, last_seq_same_as_publish_seq_==false, the 2nd queue maintains a // separate variable to indicate the last published sequence. last_seq_same_as_publish_seq_( !(seq_per_batch && options.two_write_queues)), // Since seq_per_batch_ is currently set only by WritePreparedTxn which // requires a custom gc for compaction, we use that to set use_custom_gc_ // as well. use_custom_gc_(seq_per_batch), shutdown_initiated_(false), own_sfm_(options.sst_file_manager == nullptr), preserve_deletes_(options.preserve_deletes), closed_(false), error_handler_(this, immutable_db_options_, &mutex_), atomic_flush_install_cv_(&mutex_) { // !batch_per_trx_ implies seq_per_batch_ because it is only unset for // WriteUnprepared, which should use seq_per_batch_. assert(batch_per_txn_ || seq_per_batch_); env_->GetAbsolutePath(dbname, &db_absolute_path_); // Reserve ten files or so for other uses and give the rest to TableCache. // Give a large number for setting of "infinite" open files. const int table_cache_size = (mutable_db_options_.max_open_files == -1) ? TableCache::kInfiniteCapacity : mutable_db_options_.max_open_files - 10; LRUCacheOptions co; co.capacity = table_cache_size; co.num_shard_bits = immutable_db_options_.table_cache_numshardbits; co.metadata_charge_policy = kDontChargeCacheMetadata; table_cache_ = NewLRUCache(co); versions_.reset(new VersionSet(dbname_, &immutable_db_options_, file_options_, table_cache_.get(), write_buffer_manager_, &write_controller_, &block_cache_tracer_)); column_family_memtables_.reset( new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet())); DumpRocksDBBuildVersion(immutable_db_options_.info_log.get()); DumpDBFileSummary(immutable_db_options_, dbname_); immutable_db_options_.Dump(immutable_db_options_.info_log.get()); mutable_db_options_.Dump(immutable_db_options_.info_log.get()); DumpSupportInfo(immutable_db_options_.info_log.get()); // always open the DB with 0 here, which means if preserve_deletes_==true // we won't drop any deletion markers until SetPreserveDeletesSequenceNumber() // is called by client and this seqnum is advanced. preserve_deletes_seqnum_.store(0); } Status DBImpl::Resume() { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Resuming DB"); InstrumentedMutexLock db_mutex(&mutex_); if (!error_handler_.IsDBStopped() && !error_handler_.IsBGWorkStopped()) { // Nothing to do return Status::OK(); } if (error_handler_.IsRecoveryInProgress()) { // Don't allow a mix of manual and automatic recovery return Status::Busy(); } mutex_.Unlock(); Status s = error_handler_.RecoverFromBGError(true); mutex_.Lock(); return s; } // This function implements the guts of recovery from a background error. It // is eventually called for both manual as well as automatic recovery. It does // the following - // 1. Wait for currently scheduled background flush/compaction to exit, in // order to inadvertently causing an error and thinking recovery failed // 2. Flush memtables if there's any data for all the CFs. This may result // another error, which will be saved by error_handler_ and reported later // as the recovery status // 3. Find and delete any obsolete files // 4. Schedule compactions if needed for all the CFs. This is needed as the // flush in the prior step might have been a no-op for some CFs, which // means a new super version wouldn't have been installed Status DBImpl::ResumeImpl() { mutex_.AssertHeld(); WaitForBackgroundWork(); Status bg_error = error_handler_.GetBGError(); Status s; if (shutdown_initiated_) { // Returning shutdown status to SFM during auto recovery will cause it // to abort the recovery and allow the shutdown to progress s = Status::ShutdownInProgress(); } if (s.ok() && bg_error.severity() > Status::Severity::kHardError) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "DB resume requested but failed due to Fatal/Unrecoverable error"); s = bg_error; } // Make sure the IO Status stored in version set is set to OK. bool file_deletion_disabled = !IsFileDeletionsEnabled(); if (s.ok()) { IOStatus io_s = versions_->io_status(); if (io_s.IsIOError()) { // If resuming from IOError resulted from MANIFEST write, then assert // that we must have already set the MANIFEST writer to nullptr during // clean-up phase MANIFEST writing. We must have also disabled file // deletions. assert(!versions_->descriptor_log_); assert(file_deletion_disabled); // Since we are trying to recover from MANIFEST write error, we need to // switch to a new MANIFEST anyway. The old MANIFEST can be corrupted. // Therefore, force writing a dummy version edit because we do not know // whether there are flush jobs with non-empty data to flush, triggering // appends to MANIFEST. VersionEdit edit; auto cfh = reinterpret_cast(default_cf_handle_); assert(cfh); ColumnFamilyData* cfd = cfh->cfd(); const MutableCFOptions& cf_opts = *cfd->GetLatestMutableCFOptions(); s = versions_->LogAndApply(cfd, cf_opts, &edit, &mutex_, directories_.GetDbDir()); if (!s.ok()) { io_s = versions_->io_status(); if (!io_s.ok()) { s = error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite); } } } } // We cannot guarantee consistency of the WAL. So force flush Memtables of // all the column families if (s.ok()) { FlushOptions flush_opts; // We allow flush to stall write since we are trying to resume from error. flush_opts.allow_write_stall = true; if (immutable_db_options_.atomic_flush) { autovector cfds; SelectColumnFamiliesForAtomicFlush(&cfds); mutex_.Unlock(); s = AtomicFlushMemTables(cfds, flush_opts, FlushReason::kErrorRecovery); mutex_.Lock(); } else { for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } cfd->Ref(); mutex_.Unlock(); s = FlushMemTable(cfd, flush_opts, FlushReason::kErrorRecovery); mutex_.Lock(); cfd->UnrefAndTryDelete(); if (!s.ok()) { break; } } } if (!s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB resume requested but failed due to Flush failure [%s]", s.ToString().c_str()); } } JobContext job_context(0); FindObsoleteFiles(&job_context, true); if (s.ok()) { s = error_handler_.ClearBGError(); } mutex_.Unlock(); job_context.manifest_file_number = 1; if (job_context.HaveSomethingToDelete()) { PurgeObsoleteFiles(job_context); } job_context.Clean(); if (s.ok()) { assert(versions_->io_status().ok()); // If we reach here, we should re-enable file deletions if it was disabled // during previous error handling. if (file_deletion_disabled) { // Always return ok EnableFileDeletions(/*force=*/true); } ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB"); } mutex_.Lock(); // Check for shutdown again before scheduling further compactions, // since we released and re-acquired the lock above if (shutdown_initiated_) { s = Status::ShutdownInProgress(); } if (s.ok()) { for (auto cfd : *versions_->GetColumnFamilySet()) { SchedulePendingCompaction(cfd); } MaybeScheduleFlushOrCompaction(); } // Wake up any waiters - in this case, it could be the shutdown thread bg_cv_.SignalAll(); // No need to check BGError again. If something happened, event listener would // be notified and the operation causing it would have failed return s; } void DBImpl::WaitForBackgroundWork() { // Wait for background work to finish while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ || bg_flush_scheduled_) { bg_cv_.Wait(); } } // Will lock the mutex_, will wait for completion if wait is true void DBImpl::CancelAllBackgroundWork(bool wait) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Shutdown: canceling all background work"); if (thread_dump_stats_ != nullptr) { thread_dump_stats_->cancel(); thread_dump_stats_.reset(); } if (thread_persist_stats_ != nullptr) { thread_persist_stats_->cancel(); thread_persist_stats_.reset(); } InstrumentedMutexLock l(&mutex_); if (!shutting_down_.load(std::memory_order_acquire) && has_unpersisted_data_.load(std::memory_order_relaxed) && !mutable_db_options_.avoid_flush_during_shutdown) { if (immutable_db_options_.atomic_flush) { autovector cfds; SelectColumnFamiliesForAtomicFlush(&cfds); mutex_.Unlock(); AtomicFlushMemTables(cfds, FlushOptions(), FlushReason::kShutDown); mutex_.Lock(); } else { for (auto cfd : *versions_->GetColumnFamilySet()) { if (!cfd->IsDropped() && cfd->initialized() && !cfd->mem()->IsEmpty()) { cfd->Ref(); mutex_.Unlock(); FlushMemTable(cfd, FlushOptions(), FlushReason::kShutDown); mutex_.Lock(); cfd->UnrefAndTryDelete(); } } } versions_->GetColumnFamilySet()->FreeDeadColumnFamilies(); } shutting_down_.store(true, std::memory_order_release); bg_cv_.SignalAll(); if (!wait) { return; } WaitForBackgroundWork(); } Status DBImpl::CloseHelper() { // Guarantee that there is no background error recovery in progress before // continuing with the shutdown mutex_.Lock(); shutdown_initiated_ = true; error_handler_.CancelErrorRecovery(); while (error_handler_.IsRecoveryInProgress()) { bg_cv_.Wait(); } mutex_.Unlock(); // CancelAllBackgroundWork called with false means we just set the shutdown // marker. After this we do a variant of the waiting and unschedule work // (to consider: moving all the waiting into CancelAllBackgroundWork(true)) CancelAllBackgroundWork(false); int bottom_compactions_unscheduled = env_->UnSchedule(this, Env::Priority::BOTTOM); int compactions_unscheduled = env_->UnSchedule(this, Env::Priority::LOW); int flushes_unscheduled = env_->UnSchedule(this, Env::Priority::HIGH); Status ret; mutex_.Lock(); bg_bottom_compaction_scheduled_ -= bottom_compactions_unscheduled; bg_compaction_scheduled_ -= compactions_unscheduled; bg_flush_scheduled_ -= flushes_unscheduled; // Wait for background work to finish while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ || bg_flush_scheduled_ || bg_purge_scheduled_ || pending_purge_obsolete_files_ || error_handler_.IsRecoveryInProgress()) { TEST_SYNC_POINT("DBImpl::~DBImpl:WaitJob"); bg_cv_.Wait(); } TEST_SYNC_POINT_CALLBACK("DBImpl::CloseHelper:PendingPurgeFinished", &files_grabbed_for_purge_); EraseThreadStatusDbInfo(); flush_scheduler_.Clear(); trim_history_scheduler_.Clear(); while (!flush_queue_.empty()) { const FlushRequest& flush_req = PopFirstFromFlushQueue(); for (const auto& iter : flush_req) { iter.first->UnrefAndTryDelete(); } } while (!compaction_queue_.empty()) { auto cfd = PopFirstFromCompactionQueue(); cfd->UnrefAndTryDelete(); } if (default_cf_handle_ != nullptr || persist_stats_cf_handle_ != nullptr) { // we need to delete handle outside of lock because it does its own locking mutex_.Unlock(); if (default_cf_handle_) { delete default_cf_handle_; default_cf_handle_ = nullptr; } if (persist_stats_cf_handle_) { delete persist_stats_cf_handle_; persist_stats_cf_handle_ = nullptr; } mutex_.Lock(); } // Clean up obsolete files due to SuperVersion release. // (1) Need to delete to obsolete files before closing because RepairDB() // scans all existing files in the file system and builds manifest file. // Keeping obsolete files confuses the repair process. // (2) Need to check if we Open()/Recover() the DB successfully before // deleting because if VersionSet recover fails (may be due to corrupted // manifest file), it is not able to identify live files correctly. As a // result, all "live" files can get deleted by accident. However, corrupted // manifest is recoverable by RepairDB(). if (opened_successfully_) { JobContext job_context(next_job_id_.fetch_add(1)); FindObsoleteFiles(&job_context, true); mutex_.Unlock(); // manifest number starting from 2 job_context.manifest_file_number = 1; if (job_context.HaveSomethingToDelete()) { PurgeObsoleteFiles(job_context); } job_context.Clean(); mutex_.Lock(); } for (auto l : logs_to_free_) { delete l; } for (auto& log : logs_) { uint64_t log_number = log.writer->get_log_number(); Status s = log.ClearWriter(); if (!s.ok()) { ROCKS_LOG_WARN( immutable_db_options_.info_log, "Unable to Sync WAL file %s with error -- %s", LogFileName(immutable_db_options_.wal_dir, log_number).c_str(), s.ToString().c_str()); // Retain the first error if (ret.ok()) { ret = s; } } } logs_.clear(); // Table cache may have table handles holding blocks from the block cache. // We need to release them before the block cache is destroyed. The block // cache may be destroyed inside versions_.reset(), when column family data // list is destroyed, so leaving handles in table cache after // versions_.reset() may cause issues. // Here we clean all unreferenced handles in table cache. // Now we assume all user queries have finished, so only version set itself // can possibly hold the blocks from block cache. After releasing unreferenced // handles here, only handles held by version set left and inside // versions_.reset(), we will release them. There, we need to make sure every // time a handle is released, we erase it from the cache too. By doing that, // we can guarantee that after versions_.reset(), table cache is empty // so the cache can be safely destroyed. table_cache_->EraseUnRefEntries(); for (auto& txn_entry : recovered_transactions_) { delete txn_entry.second; } // versions need to be destroyed before table_cache since it can hold // references to table_cache. versions_.reset(); mutex_.Unlock(); if (db_lock_ != nullptr) { env_->UnlockFile(db_lock_); } ROCKS_LOG_INFO(immutable_db_options_.info_log, "Shutdown complete"); LogFlush(immutable_db_options_.info_log); #ifndef ROCKSDB_LITE // If the sst_file_manager was allocated by us during DB::Open(), ccall // Close() on it before closing the info_log. Otherwise, background thread // in SstFileManagerImpl might try to log something if (immutable_db_options_.sst_file_manager && own_sfm_) { auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); sfm->Close(); } #endif // ROCKSDB_LITE if (immutable_db_options_.info_log && own_info_log_) { Status s = immutable_db_options_.info_log->Close(); if (ret.ok()) { ret = s; } } if (ret.IsAborted()) { // Reserve IsAborted() error for those where users didn't release // certain resource and they can release them and come back and // retry. In this case, we wrap this exception to something else. return Status::Incomplete(ret.ToString()); } return ret; } Status DBImpl::CloseImpl() { return CloseHelper(); } DBImpl::~DBImpl() { if (!closed_) { closed_ = true; CloseHelper(); } } void DBImpl::MaybeIgnoreError(Status* s) const { if (s->ok() || immutable_db_options_.paranoid_checks) { // No change needed } else { ROCKS_LOG_WARN(immutable_db_options_.info_log, "Ignoring error %s", s->ToString().c_str()); *s = Status::OK(); } } const Status DBImpl::CreateArchivalDirectory() { if (immutable_db_options_.wal_ttl_seconds > 0 || immutable_db_options_.wal_size_limit_mb > 0) { std::string archivalPath = ArchivalDirectory(immutable_db_options_.wal_dir); return env_->CreateDirIfMissing(archivalPath); } return Status::OK(); } void DBImpl::PrintStatistics() { auto dbstats = immutable_db_options_.statistics.get(); if (dbstats) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "STATISTICS:\n %s", dbstats->ToString().c_str()); } } void DBImpl::StartTimedTasks() { unsigned int stats_dump_period_sec = 0; unsigned int stats_persist_period_sec = 0; { InstrumentedMutexLock l(&mutex_); stats_dump_period_sec = mutable_db_options_.stats_dump_period_sec; if (stats_dump_period_sec > 0) { if (!thread_dump_stats_) { thread_dump_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( [this]() { DBImpl::DumpStats(); }, "dump_st", env_, static_cast(stats_dump_period_sec) * kMicrosInSecond)); } } stats_persist_period_sec = mutable_db_options_.stats_persist_period_sec; if (stats_persist_period_sec > 0) { if (!thread_persist_stats_) { thread_persist_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( [this]() { DBImpl::PersistStats(); }, "pst_st", env_, static_cast(stats_persist_period_sec) * kMicrosInSecond)); } } } } // esitmate the total size of stats_history_ size_t DBImpl::EstimateInMemoryStatsHistorySize() const { size_t size_total = sizeof(std::map>); if (stats_history_.size() == 0) return size_total; size_t size_per_slice = sizeof(uint64_t) + sizeof(std::map); // non-empty map, stats_history_.begin() guaranteed to exist std::map sample_slice(stats_history_.begin()->second); for (const auto& pairs : sample_slice) { size_per_slice += pairs.first.capacity() + sizeof(pairs.first) + sizeof(pairs.second); } size_total = size_per_slice * stats_history_.size(); return size_total; } void DBImpl::PersistStats() { TEST_SYNC_POINT("DBImpl::PersistStats:Entry"); #ifndef ROCKSDB_LITE if (shutdown_initiated_) { return; } uint64_t now_seconds = env_->NowMicros() / kMicrosInSecond; Statistics* statistics = immutable_db_options_.statistics.get(); if (!statistics) { return; } size_t stats_history_size_limit = 0; { InstrumentedMutexLock l(&mutex_); stats_history_size_limit = mutable_db_options_.stats_history_buffer_size; } std::map stats_map; if (!statistics->getTickerMap(&stats_map)) { return; } ROCKS_LOG_INFO(immutable_db_options_.info_log, "------- PERSISTING STATS -------"); if (immutable_db_options_.persist_stats_to_disk) { WriteBatch batch; if (stats_slice_initialized_) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Reading %" ROCKSDB_PRIszt " stats from statistics\n", stats_slice_.size()); for (const auto& stat : stats_map) { char key[100]; int length = EncodePersistentStatsKey(now_seconds, stat.first, 100, key); // calculate the delta from last time if (stats_slice_.find(stat.first) != stats_slice_.end()) { uint64_t delta = stat.second - stats_slice_[stat.first]; batch.Put(persist_stats_cf_handle_, Slice(key, std::min(100, length)), ToString(delta)); } } } stats_slice_initialized_ = true; std::swap(stats_slice_, stats_map); WriteOptions wo; wo.low_pri = true; wo.no_slowdown = true; wo.sync = false; Status s = Write(wo, &batch); if (!s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Writing to persistent stats CF failed -- %s", s.ToString().c_str()); } else { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Writing %" ROCKSDB_PRIszt " stats with timestamp %" PRIu64 " to persistent stats CF succeeded", stats_slice_.size(), now_seconds); } // TODO(Zhongyi): add purging for persisted data } else { InstrumentedMutexLock l(&stats_history_mutex_); // calculate the delta from last time if (stats_slice_initialized_) { std::map stats_delta; for (const auto& stat : stats_map) { if (stats_slice_.find(stat.first) != stats_slice_.end()) { stats_delta[stat.first] = stat.second - stats_slice_[stat.first]; } } ROCKS_LOG_INFO(immutable_db_options_.info_log, "Storing %" ROCKSDB_PRIszt " stats with timestamp %" PRIu64 " to in-memory stats history", stats_slice_.size(), now_seconds); stats_history_[now_seconds] = stats_delta; } stats_slice_initialized_ = true; std::swap(stats_slice_, stats_map); TEST_SYNC_POINT("DBImpl::PersistStats:StatsCopied"); // delete older stats snapshots to control memory consumption size_t stats_history_size = EstimateInMemoryStatsHistorySize(); bool purge_needed = stats_history_size > stats_history_size_limit; ROCKS_LOG_INFO(immutable_db_options_.info_log, "[Pre-GC] In-memory stats history size: %" ROCKSDB_PRIszt " bytes, slice count: %" ROCKSDB_PRIszt, stats_history_size, stats_history_.size()); while (purge_needed && !stats_history_.empty()) { stats_history_.erase(stats_history_.begin()); purge_needed = EstimateInMemoryStatsHistorySize() > stats_history_size_limit; } ROCKS_LOG_INFO(immutable_db_options_.info_log, "[Post-GC] In-memory stats history size: %" ROCKSDB_PRIszt " bytes, slice count: %" ROCKSDB_PRIszt, stats_history_size, stats_history_.size()); } #endif // !ROCKSDB_LITE } bool DBImpl::FindStatsByTime(uint64_t start_time, uint64_t end_time, uint64_t* new_time, std::map* stats_map) { assert(new_time); assert(stats_map); if (!new_time || !stats_map) return false; // lock when search for start_time { InstrumentedMutexLock l(&stats_history_mutex_); auto it = stats_history_.lower_bound(start_time); if (it != stats_history_.end() && it->first < end_time) { // make a copy for timestamp and stats_map *new_time = it->first; *stats_map = it->second; return true; } else { return false; } } } Status DBImpl::GetStatsHistory( uint64_t start_time, uint64_t end_time, std::unique_ptr* stats_iterator) { if (!stats_iterator) { return Status::InvalidArgument("stats_iterator not preallocated."); } if (immutable_db_options_.persist_stats_to_disk) { stats_iterator->reset( new PersistentStatsHistoryIterator(start_time, end_time, this)); } else { stats_iterator->reset( new InMemoryStatsHistoryIterator(start_time, end_time, this)); } return (*stats_iterator)->status(); } void DBImpl::DumpStats() { TEST_SYNC_POINT("DBImpl::DumpStats:1"); #ifndef ROCKSDB_LITE const DBPropertyInfo* cf_property_info = GetPropertyInfo(DB::Properties::kCFStats); assert(cf_property_info != nullptr); const DBPropertyInfo* db_property_info = GetPropertyInfo(DB::Properties::kDBStats); assert(db_property_info != nullptr); std::string stats; if (shutdown_initiated_) { return; } { InstrumentedMutexLock l(&mutex_); default_cf_internal_stats_->GetStringProperty( *db_property_info, DB::Properties::kDBStats, &stats); for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->initialized()) { cfd->internal_stats()->GetStringProperty( *cf_property_info, DB::Properties::kCFStatsNoFileHistogram, &stats); } } for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->initialized()) { cfd->internal_stats()->GetStringProperty( *cf_property_info, DB::Properties::kCFFileHistogram, &stats); } } } TEST_SYNC_POINT("DBImpl::DumpStats:2"); ROCKS_LOG_INFO(immutable_db_options_.info_log, "------- DUMPING STATS -------"); ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s", stats.c_str()); if (immutable_db_options_.dump_malloc_stats) { stats.clear(); DumpMallocStats(&stats); if (!stats.empty()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "------- Malloc STATS -------"); ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s", stats.c_str()); } } #endif // !ROCKSDB_LITE PrintStatistics(); } Status DBImpl::TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family, int max_entries_to_print, std::string* out_str) { auto* cfh = static_cast_with_check(column_family); ColumnFamilyData* cfd = cfh->cfd(); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); Version* version = super_version->current; Status s = version->TablesRangeTombstoneSummary(max_entries_to_print, out_str); CleanupSuperVersion(super_version); return s; } void DBImpl::ScheduleBgLogWriterClose(JobContext* job_context) { if (!job_context->logs_to_free.empty()) { for (auto l : job_context->logs_to_free) { AddToLogsToFreeQueue(l); } job_context->logs_to_free.clear(); } } FSDirectory* DBImpl::GetDataDir(ColumnFamilyData* cfd, size_t path_id) const { assert(cfd); FSDirectory* ret_dir = cfd->GetDataDir(path_id); if (ret_dir == nullptr) { return directories_.GetDataDir(path_id); } return ret_dir; } Status DBImpl::SetOptions( ColumnFamilyHandle* column_family, const std::unordered_map& options_map) { #ifdef ROCKSDB_LITE (void)column_family; (void)options_map; return Status::NotSupported("Not supported in ROCKSDB LITE"); #else auto* cfd = reinterpret_cast(column_family)->cfd(); if (options_map.empty()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "SetOptions() on column family [%s], empty input", cfd->GetName().c_str()); return Status::InvalidArgument("empty input"); } MutableCFOptions new_options; Status s; Status persist_options_status; SuperVersionContext sv_context(/* create_superversion */ true); { auto db_options = GetDBOptions(); InstrumentedMutexLock l(&mutex_); s = cfd->SetOptions(db_options, options_map); if (s.ok()) { new_options = *cfd->GetLatestMutableCFOptions(); // Append new version to recompute compaction score. VersionEdit dummy_edit; versions_->LogAndApply(cfd, new_options, &dummy_edit, &mutex_, directories_.GetDbDir()); // Trigger possible flush/compactions. This has to be before we persist // options to file, otherwise there will be a deadlock with writer // thread. InstallSuperVersionAndScheduleWork(cfd, &sv_context, new_options); persist_options_status = WriteOptionsFile( false /*need_mutex_lock*/, true /*need_enter_write_thread*/); bg_cv_.SignalAll(); } } sv_context.Clean(); ROCKS_LOG_INFO( immutable_db_options_.info_log, "SetOptions() on column family [%s], inputs:", cfd->GetName().c_str()); for (const auto& o : options_map) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s: %s\n", o.first.c_str(), o.second.c_str()); } if (s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] SetOptions() succeeded", cfd->GetName().c_str()); new_options.Dump(immutable_db_options_.info_log.get()); if (!persist_options_status.ok()) { s = persist_options_status; } } else { ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] SetOptions() failed", cfd->GetName().c_str()); } LogFlush(immutable_db_options_.info_log); return s; #endif // ROCKSDB_LITE } Status DBImpl::SetDBOptions( const std::unordered_map& options_map) { #ifdef ROCKSDB_LITE (void)options_map; return Status::NotSupported("Not supported in ROCKSDB LITE"); #else if (options_map.empty()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "SetDBOptions(), empty input."); return Status::InvalidArgument("empty input"); } MutableDBOptions new_options; Status s; Status persist_options_status; bool wal_changed = false; WriteContext write_context; { InstrumentedMutexLock l(&mutex_); s = GetMutableDBOptionsFromStrings(mutable_db_options_, options_map, &new_options); if (new_options.bytes_per_sync == 0) { new_options.bytes_per_sync = 1024 * 1024; } DBOptions new_db_options = BuildDBOptions(immutable_db_options_, new_options); if (s.ok()) { s = ValidateOptions(new_db_options); } if (s.ok()) { for (auto c : *versions_->GetColumnFamilySet()) { if (!c->IsDropped()) { auto cf_options = c->GetLatestCFOptions(); s = ColumnFamilyData::ValidateOptions(new_db_options, cf_options); if (!s.ok()) { break; } } } } if (s.ok()) { const BGJobLimits current_bg_job_limits = GetBGJobLimits(mutable_db_options_.max_background_flushes, mutable_db_options_.max_background_compactions, mutable_db_options_.max_background_jobs, /* parallelize_compactions */ true); const BGJobLimits new_bg_job_limits = GetBGJobLimits( new_options.max_background_flushes, new_options.max_background_compactions, new_options.max_background_jobs, /* parallelize_compactions */ true); const bool max_flushes_increased = new_bg_job_limits.max_flushes > current_bg_job_limits.max_flushes; const bool max_compactions_increased = new_bg_job_limits.max_compactions > current_bg_job_limits.max_compactions; if (max_flushes_increased || max_compactions_increased) { if (max_flushes_increased) { env_->IncBackgroundThreadsIfNeeded(new_bg_job_limits.max_flushes, Env::Priority::HIGH); } if (max_compactions_increased) { env_->IncBackgroundThreadsIfNeeded(new_bg_job_limits.max_compactions, Env::Priority::LOW); } MaybeScheduleFlushOrCompaction(); } if (new_options.stats_dump_period_sec != mutable_db_options_.stats_dump_period_sec) { if (thread_dump_stats_) { mutex_.Unlock(); thread_dump_stats_->cancel(); mutex_.Lock(); } if (new_options.stats_dump_period_sec > 0) { thread_dump_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( [this]() { DBImpl::DumpStats(); }, "dump_st", env_, static_cast(new_options.stats_dump_period_sec) * kMicrosInSecond)); } else { thread_dump_stats_.reset(); } } if (new_options.stats_persist_period_sec != mutable_db_options_.stats_persist_period_sec) { if (thread_persist_stats_) { mutex_.Unlock(); thread_persist_stats_->cancel(); mutex_.Lock(); } if (new_options.stats_persist_period_sec > 0) { thread_persist_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( [this]() { DBImpl::PersistStats(); }, "pst_st", env_, static_cast(new_options.stats_persist_period_sec) * kMicrosInSecond)); } else { thread_persist_stats_.reset(); } } write_controller_.set_max_delayed_write_rate( new_options.delayed_write_rate); table_cache_.get()->SetCapacity(new_options.max_open_files == -1 ? TableCache::kInfiniteCapacity : new_options.max_open_files - 10); wal_changed = mutable_db_options_.wal_bytes_per_sync != new_options.wal_bytes_per_sync; mutable_db_options_ = new_options; file_options_for_compaction_ = FileOptions(new_db_options); file_options_for_compaction_ = fs_->OptimizeForCompactionTableWrite( file_options_for_compaction_, immutable_db_options_); versions_->ChangeFileOptions(mutable_db_options_); //TODO(xiez): clarify why apply optimize for read to write options file_options_for_compaction_ = fs_->OptimizeForCompactionTableRead( file_options_for_compaction_, immutable_db_options_); file_options_for_compaction_.compaction_readahead_size = mutable_db_options_.compaction_readahead_size; WriteThread::Writer w; write_thread_.EnterUnbatched(&w, &mutex_); if (total_log_size_ > GetMaxTotalWalSize() || wal_changed) { Status purge_wal_status = SwitchWAL(&write_context); if (!purge_wal_status.ok()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "Unable to purge WAL files in SetDBOptions() -- %s", purge_wal_status.ToString().c_str()); } } persist_options_status = WriteOptionsFile( false /*need_mutex_lock*/, false /*need_enter_write_thread*/); write_thread_.ExitUnbatched(&w); } } ROCKS_LOG_INFO(immutable_db_options_.info_log, "SetDBOptions(), inputs:"); for (const auto& o : options_map) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s: %s\n", o.first.c_str(), o.second.c_str()); } if (s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "SetDBOptions() succeeded"); new_options.Dump(immutable_db_options_.info_log.get()); if (!persist_options_status.ok()) { if (immutable_db_options_.fail_if_options_file_error) { s = Status::IOError( "SetDBOptions() succeeded, but unable to persist options", persist_options_status.ToString()); } ROCKS_LOG_WARN(immutable_db_options_.info_log, "Unable to persist options in SetDBOptions() -- %s", persist_options_status.ToString().c_str()); } } else { ROCKS_LOG_WARN(immutable_db_options_.info_log, "SetDBOptions failed"); } LogFlush(immutable_db_options_.info_log); return s; #endif // ROCKSDB_LITE } // return the same level if it cannot be moved int DBImpl::FindMinimumEmptyLevelFitting( ColumnFamilyData* cfd, const MutableCFOptions& /*mutable_cf_options*/, int level) { mutex_.AssertHeld(); const auto* vstorage = cfd->current()->storage_info(); int minimum_level = level; for (int i = level - 1; i > 0; --i) { // stop if level i is not empty if (vstorage->NumLevelFiles(i) > 0) break; // stop if level i is too small (cannot fit the level files) if (vstorage->MaxBytesForLevel(i) < vstorage->NumLevelBytes(level)) { break; } minimum_level = i; } return minimum_level; } Status DBImpl::FlushWAL(bool sync) { if (manual_wal_flush_) { IOStatus io_s; { // We need to lock log_write_mutex_ since logs_ might change concurrently InstrumentedMutexLock wl(&log_write_mutex_); log::Writer* cur_log_writer = logs_.back().writer; io_s = cur_log_writer->WriteBuffer(); } if (!io_s.ok()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s", io_s.ToString().c_str()); // In case there is a fs error we should set it globally to prevent the // future writes IOStatusCheck(io_s); // whether sync or not, we should abort the rest of function upon error return std::move(io_s); } if (!sync) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "FlushWAL sync=false"); return std::move(io_s); } } if (!sync) { return Status::OK(); } // sync = true ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "FlushWAL sync=true"); return SyncWAL(); } Status DBImpl::SyncWAL() { autovector logs_to_sync; bool need_log_dir_sync; uint64_t current_log_number; { InstrumentedMutexLock l(&mutex_); assert(!logs_.empty()); // This SyncWAL() call only cares about logs up to this number. current_log_number = logfile_number_; while (logs_.front().number <= current_log_number && logs_.front().getting_synced) { log_sync_cv_.Wait(); } // First check that logs are safe to sync in background. for (auto it = logs_.begin(); it != logs_.end() && it->number <= current_log_number; ++it) { if (!it->writer->file()->writable_file()->IsSyncThreadSafe()) { return Status::NotSupported( "SyncWAL() is not supported for this implementation of WAL file", immutable_db_options_.allow_mmap_writes ? "try setting Options::allow_mmap_writes to false" : Slice()); } } for (auto it = logs_.begin(); it != logs_.end() && it->number <= current_log_number; ++it) { auto& log = *it; assert(!log.getting_synced); log.getting_synced = true; logs_to_sync.push_back(log.writer); } need_log_dir_sync = !log_dir_synced_; } TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:1"); RecordTick(stats_, WAL_FILE_SYNCED); Status status; IOStatus io_s; for (log::Writer* log : logs_to_sync) { io_s = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync); if (!io_s.ok()) { status = io_s; break; } } if (!io_s.ok()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL Sync error %s", io_s.ToString().c_str()); // In case there is a fs error we should set it globally to prevent the // future writes IOStatusCheck(io_s); } if (status.ok() && need_log_dir_sync) { status = directories_.GetWalDir()->Fsync(IOOptions(), nullptr); } TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:2"); TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:1"); { InstrumentedMutexLock l(&mutex_); MarkLogsSynced(current_log_number, need_log_dir_sync, status); } TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:2"); return status; } Status DBImpl::LockWAL() { log_write_mutex_.Lock(); auto cur_log_writer = logs_.back().writer; auto status = cur_log_writer->WriteBuffer(); if (!status.ok()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s", status.ToString().c_str()); // In case there is a fs error we should set it globally to prevent the // future writes WriteStatusCheck(status); } return std::move(status); } Status DBImpl::UnlockWAL() { log_write_mutex_.Unlock(); return Status::OK(); } void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, const Status& status) { mutex_.AssertHeld(); if (synced_dir && logfile_number_ == up_to && status.ok()) { log_dir_synced_ = true; } for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;) { auto& log = *it; assert(log.getting_synced); if (status.ok() && logs_.size() > 1) { logs_to_free_.push_back(log.ReleaseWriter()); // To modify logs_ both mutex_ and log_write_mutex_ must be held InstrumentedMutexLock l(&log_write_mutex_); it = logs_.erase(it); } else { log.getting_synced = false; ++it; } } assert(!status.ok() || logs_.empty() || logs_[0].number > up_to || (logs_.size() == 1 && !logs_[0].getting_synced)); log_sync_cv_.SignalAll(); } SequenceNumber DBImpl::GetLatestSequenceNumber() const { return versions_->LastSequence(); } void DBImpl::SetLastPublishedSequence(SequenceNumber seq) { versions_->SetLastPublishedSequence(seq); } bool DBImpl::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) { if (seqnum > preserve_deletes_seqnum_.load()) { preserve_deletes_seqnum_.store(seqnum); return true; } else { return false; } } InternalIterator* DBImpl::NewInternalIterator( Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence, ColumnFamilyHandle* column_family, bool allow_unprepared_value) { ColumnFamilyData* cfd; if (column_family == nullptr) { cfd = default_cf_handle_->cfd(); } else { auto cfh = reinterpret_cast(column_family); cfd = cfh->cfd(); } mutex_.Lock(); SuperVersion* super_version = cfd->GetSuperVersion()->Ref(); mutex_.Unlock(); ReadOptions roptions; return NewInternalIterator(roptions, cfd, super_version, arena, range_del_agg, sequence, allow_unprepared_value); } void DBImpl::SchedulePurge() { mutex_.AssertHeld(); assert(opened_successfully_); // Purge operations are put into High priority queue bg_purge_scheduled_++; env_->Schedule(&DBImpl::BGWorkPurge, this, Env::Priority::HIGH, nullptr); } void DBImpl::BackgroundCallPurge() { mutex_.Lock(); while (!logs_to_free_queue_.empty()) { assert(!logs_to_free_queue_.empty()); log::Writer* log_writer = *(logs_to_free_queue_.begin()); logs_to_free_queue_.pop_front(); mutex_.Unlock(); delete log_writer; mutex_.Lock(); } while (!superversions_to_free_queue_.empty()) { assert(!superversions_to_free_queue_.empty()); SuperVersion* sv = superversions_to_free_queue_.front(); superversions_to_free_queue_.pop_front(); mutex_.Unlock(); delete sv; mutex_.Lock(); } // Can't use iterator to go over purge_files_ because inside the loop we're // unlocking the mutex that protects purge_files_. while (!purge_files_.empty()) { auto it = purge_files_.begin(); // Need to make a copy of the PurgeFilesInfo before unlocking the mutex. PurgeFileInfo purge_file = it->second; const std::string& fname = purge_file.fname; const std::string& dir_to_sync = purge_file.dir_to_sync; FileType type = purge_file.type; uint64_t number = purge_file.number; int job_id = purge_file.job_id; purge_files_.erase(it); mutex_.Unlock(); DeleteObsoleteFileImpl(job_id, fname, dir_to_sync, type, number); mutex_.Lock(); } bg_purge_scheduled_--; bg_cv_.SignalAll(); // IMPORTANT:there should be no code after calling SignalAll. This call may // signal the DB destructor that it's OK to proceed with destruction. In // that case, all DB variables will be dealloacated and referencing them // will cause trouble. mutex_.Unlock(); } namespace { struct IterState { IterState(DBImpl* _db, InstrumentedMutex* _mu, SuperVersion* _super_version, bool _background_purge) : db(_db), mu(_mu), super_version(_super_version), background_purge(_background_purge) {} DBImpl* db; InstrumentedMutex* mu; SuperVersion* super_version; bool background_purge; }; static void CleanupIteratorState(void* arg1, void* /*arg2*/) { IterState* state = reinterpret_cast(arg1); if (state->super_version->Unref()) { // Job id == 0 means that this is not our background process, but rather // user thread JobContext job_context(0); state->mu->Lock(); state->super_version->Cleanup(); state->db->FindObsoleteFiles(&job_context, false, true); if (state->background_purge) { state->db->ScheduleBgLogWriterClose(&job_context); state->db->AddSuperVersionsToFreeQueue(state->super_version); state->db->SchedulePurge(); } state->mu->Unlock(); if (!state->background_purge) { delete state->super_version; } if (job_context.HaveSomethingToDelete()) { if (state->background_purge) { // PurgeObsoleteFiles here does not delete files. Instead, it adds the // files to be deleted to a job queue, and deletes it in a separate // background thread. state->db->PurgeObsoleteFiles(job_context, true /* schedule only */); state->mu->Lock(); state->db->SchedulePurge(); state->mu->Unlock(); } else { state->db->PurgeObsoleteFiles(job_context); } } job_context.Clean(); } delete state; } } // namespace InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options, ColumnFamilyData* cfd, SuperVersion* super_version, Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence, bool allow_unprepared_value) { InternalIterator* internal_iter; assert(arena != nullptr); assert(range_del_agg != nullptr); // Need to create internal iterator from the arena. MergeIteratorBuilder merge_iter_builder( &cfd->internal_comparator(), arena, !read_options.total_order_seek && super_version->mutable_cf_options.prefix_extractor != nullptr); // Collect iterator for mutable mem merge_iter_builder.AddIterator( super_version->mem->NewIterator(read_options, arena)); std::unique_ptr range_del_iter; Status s; if (!read_options.ignore_range_deletions) { range_del_iter.reset( super_version->mem->NewRangeTombstoneIterator(read_options, sequence)); range_del_agg->AddTombstones(std::move(range_del_iter)); } // Collect all needed child iterators for immutable memtables if (s.ok()) { super_version->imm->AddIterators(read_options, &merge_iter_builder); if (!read_options.ignore_range_deletions) { s = super_version->imm->AddRangeTombstoneIterators(read_options, arena, range_del_agg); } } TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s); if (s.ok()) { // Collect iterators for files in L0 - Ln if (read_options.read_tier != kMemtableTier) { super_version->current->AddIterators(read_options, file_options_, &merge_iter_builder, range_del_agg, allow_unprepared_value); } internal_iter = merge_iter_builder.Finish(); IterState* cleanup = new IterState(this, &mutex_, super_version, read_options.background_purge_on_iterator_cleanup || immutable_db_options_.avoid_unnecessary_blocking_io); internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr); return internal_iter; } else { CleanupSuperVersion(super_version); } return NewErrorInternalIterator(s, arena); } ColumnFamilyHandle* DBImpl::DefaultColumnFamily() const { return default_cf_handle_; } ColumnFamilyHandle* DBImpl::PersistentStatsColumnFamily() const { return persist_stats_cf_handle_; } Status DBImpl::Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { return Get(read_options, column_family, key, value, /*timestamp=*/nullptr); } Status DBImpl::Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, std::string* timestamp) { GetImplOptions get_impl_options; get_impl_options.column_family = column_family; get_impl_options.value = value; get_impl_options.timestamp = timestamp; Status s = GetImpl(read_options, key, get_impl_options); return s; } Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key, GetImplOptions& get_impl_options) { assert(get_impl_options.value != nullptr || get_impl_options.merge_operands != nullptr); #ifndef NDEBUG assert(get_impl_options.column_family); ColumnFamilyHandle* cf = get_impl_options.column_family; const Comparator* const ucmp = cf->GetComparator(); assert(ucmp); if (ucmp->timestamp_size() > 0) { assert(read_options.timestamp); assert(read_options.timestamp->size() == ucmp->timestamp_size()); } else { assert(!read_options.timestamp); } #endif // NDEBUG PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); StopWatch sw(env_, stats_, DB_GET); PERF_TIMER_GUARD(get_snapshot_time); auto cfh = reinterpret_cast(get_impl_options.column_family); auto cfd = cfh->cfd(); if (tracer_) { // TODO: This mutex should be removed later, to improve performance when // tracing is enabled. InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { tracer_->Get(get_impl_options.column_family, key); } } // Acquire SuperVersion SuperVersion* sv = GetAndRefSuperVersion(cfd); TEST_SYNC_POINT("DBImpl::GetImpl:1"); TEST_SYNC_POINT("DBImpl::GetImpl:2"); SequenceNumber snapshot; if (read_options.snapshot != nullptr) { if (get_impl_options.callback) { // Already calculated based on read_options.snapshot snapshot = get_impl_options.callback->max_visible_seq(); } else { snapshot = reinterpret_cast(read_options.snapshot)->number_; } } else { // Note that the snapshot is assigned AFTER referencing the super // version because otherwise a flush happening in between may compact away // data for the snapshot, so the reader would see neither data that was be // visible to the snapshot before compaction nor the newer data inserted // afterwards. snapshot = last_seq_same_as_publish_seq_ ? versions_->LastSequence() : versions_->LastPublishedSequence(); if (get_impl_options.callback) { // The unprep_seqs are not published for write unprepared, so it could be // that max_visible_seq is larger. Seek to the std::max of the two. // However, we still want our callback to contain the actual snapshot so // that it can do the correct visibility filtering. get_impl_options.callback->Refresh(snapshot); // Internally, WriteUnpreparedTxnReadCallback::Refresh would set // max_visible_seq = max(max_visible_seq, snapshot) // // Currently, the commented out assert is broken by // InvalidSnapshotReadCallback, but if write unprepared recovery followed // the regular transaction flow, then this special read callback would not // be needed. // // assert(callback->max_visible_seq() >= snapshot); snapshot = get_impl_options.callback->max_visible_seq(); } } TEST_SYNC_POINT("DBImpl::GetImpl:3"); TEST_SYNC_POINT("DBImpl::GetImpl:4"); // Prepare to store a list of merge operations if merge occurs. MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; Status s; // First look in the memtable, then in the immutable memtable (if any). // s is both in/out. When in, s could either be OK or MergeInProgress. // merge_operands will contain the sequence of merges in the latter case. LookupKey lkey(key, snapshot, read_options.timestamp); PERF_TIMER_STOP(get_snapshot_time); bool skip_memtable = (read_options.read_tier == kPersistedTier && has_unpersisted_data_.load(std::memory_order_relaxed)); bool done = false; const Comparator* comparator = get_impl_options.column_family->GetComparator(); size_t ts_sz = comparator->timestamp_size(); std::string* timestamp = ts_sz > 0 ? get_impl_options.timestamp : nullptr; if (!skip_memtable) { // Get value associated with key if (get_impl_options.get_value) { if (sv->mem->Get(lkey, get_impl_options.value->GetSelf(), timestamp, &s, &merge_context, &max_covering_tombstone_seq, read_options, get_impl_options.callback, get_impl_options.is_blob_index)) { done = true; get_impl_options.value->PinSelf(); RecordTick(stats_, MEMTABLE_HIT); } else if ((s.ok() || s.IsMergeInProgress()) && sv->imm->Get(lkey, get_impl_options.value->GetSelf(), timestamp, &s, &merge_context, &max_covering_tombstone_seq, read_options, get_impl_options.callback, get_impl_options.is_blob_index)) { done = true; get_impl_options.value->PinSelf(); RecordTick(stats_, MEMTABLE_HIT); } } else { // Get Merge Operands associated with key, Merge Operands should not be // merged and raw values should be returned to the user. if (sv->mem->Get(lkey, /*value*/ nullptr, /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, read_options, nullptr, nullptr, false)) { done = true; RecordTick(stats_, MEMTABLE_HIT); } else if ((s.ok() || s.IsMergeInProgress()) && sv->imm->GetMergeOperands(lkey, &s, &merge_context, &max_covering_tombstone_seq, read_options)) { done = true; RecordTick(stats_, MEMTABLE_HIT); } } if (!done && !s.ok() && !s.IsMergeInProgress()) { ReturnAndCleanupSuperVersion(cfd, sv); return s; } } if (!done) { PERF_TIMER_GUARD(get_from_output_files_time); sv->current->Get( read_options, lkey, get_impl_options.value, timestamp, &s, &merge_context, &max_covering_tombstone_seq, get_impl_options.get_value ? get_impl_options.value_found : nullptr, nullptr, nullptr, get_impl_options.get_value ? get_impl_options.callback : nullptr, get_impl_options.get_value ? get_impl_options.is_blob_index : nullptr, get_impl_options.get_value); RecordTick(stats_, MEMTABLE_MISS); } { PERF_TIMER_GUARD(get_post_process_time); ReturnAndCleanupSuperVersion(cfd, sv); RecordTick(stats_, NUMBER_KEYS_READ); size_t size = 0; if (s.ok()) { if (get_impl_options.get_value) { size = get_impl_options.value->size(); } else { // Return all merge operands for get_impl_options.key *get_impl_options.number_of_operands = static_cast(merge_context.GetNumOperands()); if (*get_impl_options.number_of_operands > get_impl_options.get_merge_operands_options ->expected_max_number_of_operands) { s = Status::Incomplete( Status::SubCode::KMergeOperandsInsufficientCapacity); } else { for (const Slice& sl : merge_context.GetOperands()) { size += sl.size(); get_impl_options.merge_operands->PinSelf(sl); get_impl_options.merge_operands++; } } } RecordTick(stats_, BYTES_READ, size); PERF_COUNTER_ADD(get_read_bytes, size); } RecordInHistogram(stats_, BYTES_PER_READ, size); } return s; } std::vector DBImpl::MultiGet( const ReadOptions& read_options, const std::vector& column_family, const std::vector& keys, std::vector* values) { return MultiGet(read_options, column_family, keys, values, /*timestamps=*/nullptr); } std::vector DBImpl::MultiGet( const ReadOptions& read_options, const std::vector& column_family, const std::vector& keys, std::vector* values, std::vector* timestamps) { PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); StopWatch sw(env_, stats_, DB_MULTIGET); PERF_TIMER_GUARD(get_snapshot_time); #ifndef NDEBUG for (const auto* cfh : column_family) { assert(cfh); const Comparator* const ucmp = cfh->GetComparator(); assert(ucmp); if (ucmp->timestamp_size() > 0) { assert(read_options.timestamp); assert(ucmp->timestamp_size() == read_options.timestamp->size()); } else { assert(!read_options.timestamp); } } #endif // NDEBUG SequenceNumber consistent_seqnum; std::unordered_map multiget_cf_data( column_family.size()); for (auto cf : column_family) { auto cfh = reinterpret_cast(cf); auto cfd = cfh->cfd(); if (multiget_cf_data.find(cfd->GetID()) == multiget_cf_data.end()) { multiget_cf_data.emplace(cfd->GetID(), MultiGetColumnFamilyData(cfh, nullptr)); } } std::function::iterator&)> iter_deref_lambda = [](std::unordered_map::iterator& cf_iter) { return &cf_iter->second; }; bool unref_only = MultiCFSnapshot>( read_options, nullptr, iter_deref_lambda, &multiget_cf_data, &consistent_seqnum); // Contain a list of merge operations if merge occurs. MergeContext merge_context; // Note: this always resizes the values array size_t num_keys = keys.size(); std::vector stat_list(num_keys); values->resize(num_keys); if (timestamps) { timestamps->resize(num_keys); } // Keep track of bytes that we read for statistics-recording later uint64_t bytes_read = 0; PERF_TIMER_STOP(get_snapshot_time); // For each of the given keys, apply the entire "get" process as follows: // First look in the memtable, then in the immutable memtable (if any). // s is both in/out. When in, s could either be OK or MergeInProgress. // merge_operands will contain the sequence of merges in the latter case. size_t num_found = 0; size_t keys_read; uint64_t curr_value_size = 0; for (keys_read = 0; keys_read < num_keys; ++keys_read) { merge_context.Clear(); Status& s = stat_list[keys_read]; std::string* value = &(*values)[keys_read]; std::string* timestamp = timestamps ? &(*timestamps)[keys_read] : nullptr; LookupKey lkey(keys[keys_read], consistent_seqnum, read_options.timestamp); auto cfh = static_cast_with_check(column_family[keys_read]); SequenceNumber max_covering_tombstone_seq = 0; auto mgd_iter = multiget_cf_data.find(cfh->cfd()->GetID()); assert(mgd_iter != multiget_cf_data.end()); auto mgd = mgd_iter->second; auto super_version = mgd.super_version; bool skip_memtable = (read_options.read_tier == kPersistedTier && has_unpersisted_data_.load(std::memory_order_relaxed)); bool done = false; if (!skip_memtable) { if (super_version->mem->Get(lkey, value, timestamp, &s, &merge_context, &max_covering_tombstone_seq, read_options)) { done = true; RecordTick(stats_, MEMTABLE_HIT); } else if (super_version->imm->Get( lkey, value, timestamp, &s, &merge_context, &max_covering_tombstone_seq, read_options)) { done = true; RecordTick(stats_, MEMTABLE_HIT); } } if (!done) { PinnableSlice pinnable_val; PERF_TIMER_GUARD(get_from_output_files_time); super_version->current->Get(read_options, lkey, &pinnable_val, timestamp, &s, &merge_context, &max_covering_tombstone_seq); value->assign(pinnable_val.data(), pinnable_val.size()); RecordTick(stats_, MEMTABLE_MISS); } if (s.ok()) { bytes_read += value->size(); num_found++; curr_value_size += value->size(); if (curr_value_size > read_options.value_size_soft_limit) { while (++keys_read < num_keys) { stat_list[keys_read] = Status::Aborted(); } break; } } if (read_options.deadline.count() && env_->NowMicros() > static_cast(read_options.deadline.count())) { break; } } if (keys_read < num_keys) { // The only reason to break out of the loop is when the deadline is // exceeded assert(env_->NowMicros() > static_cast(read_options.deadline.count())); for (++keys_read; keys_read < num_keys; ++keys_read) { stat_list[keys_read] = Status::TimedOut(); } } // Post processing (decrement reference counts and record statistics) PERF_TIMER_GUARD(get_post_process_time); autovector superversions_to_delete; for (auto mgd_iter : multiget_cf_data) { auto mgd = mgd_iter.second; if (!unref_only) { ReturnAndCleanupSuperVersion(mgd.cfd, mgd.super_version); } else { mgd.cfd->GetSuperVersion()->Unref(); } } RecordTick(stats_, NUMBER_MULTIGET_CALLS); RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys); RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found); RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read); RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read); PERF_COUNTER_ADD(multiget_read_bytes, bytes_read); PERF_TIMER_STOP(get_post_process_time); return stat_list; } template bool DBImpl::MultiCFSnapshot( const ReadOptions& read_options, ReadCallback* callback, std::function& iter_deref_func, T* cf_list, SequenceNumber* snapshot) { PERF_TIMER_GUARD(get_snapshot_time); bool last_try = false; if (cf_list->size() == 1) { // Fast path for a single column family. We can simply get the thread loca // super version auto cf_iter = cf_list->begin(); auto node = iter_deref_func(cf_iter); node->super_version = GetAndRefSuperVersion(node->cfd); if (read_options.snapshot != nullptr) { // Note: In WritePrepared txns this is not necessary but not harmful // either. Because prep_seq > snapshot => commit_seq > snapshot so if // a snapshot is specified we should be fine with skipping seq numbers // that are greater than that. // // In WriteUnprepared, we cannot set snapshot in the lookup key because we // may skip uncommitted data that should be visible to the transaction for // reading own writes. *snapshot = static_cast(read_options.snapshot)->number_; if (callback) { *snapshot = std::max(*snapshot, callback->max_visible_seq()); } } else { // Since we get and reference the super version before getting // the snapshot number, without a mutex protection, it is possible // that a memtable switch happened in the middle and not all the // data for this snapshot is available. But it will contain all // the data available in the super version we have, which is also // a valid snapshot to read from. // We shouldn't get snapshot before finding and referencing the super // version because a flush happening in between may compact away data for // the snapshot, but the snapshot is earlier than the data overwriting it, // so users may see wrong results. *snapshot = last_seq_same_as_publish_seq_ ? versions_->LastSequence() : versions_->LastPublishedSequence(); } } else { // If we end up with the same issue of memtable geting sealed during 2 // consecutive retries, it means the write rate is very high. In that case // its probably ok to take the mutex on the 3rd try so we can succeed for // sure static const int num_retries = 3; for (int i = 0; i < num_retries; ++i) { last_try = (i == num_retries - 1); bool retry = false; if (i > 0) { for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end(); ++cf_iter) { auto node = iter_deref_func(cf_iter); SuperVersion* super_version = node->super_version; ColumnFamilyData* cfd = node->cfd; if (super_version != nullptr) { ReturnAndCleanupSuperVersion(cfd, super_version); } node->super_version = nullptr; } } if (read_options.snapshot == nullptr) { if (last_try) { TEST_SYNC_POINT("DBImpl::MultiGet::LastTry"); // We're close to max number of retries. For the last retry, // acquire the lock so we're sure to succeed mutex_.Lock(); } *snapshot = last_seq_same_as_publish_seq_ ? versions_->LastSequence() : versions_->LastPublishedSequence(); } else { *snapshot = reinterpret_cast(read_options.snapshot) ->number_; } for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end(); ++cf_iter) { auto node = iter_deref_func(cf_iter); if (!last_try) { node->super_version = GetAndRefSuperVersion(node->cfd); } else { node->super_version = node->cfd->GetSuperVersion()->Ref(); } TEST_SYNC_POINT("DBImpl::MultiGet::AfterRefSV"); if (read_options.snapshot != nullptr || last_try) { // If user passed a snapshot, then we don't care if a memtable is // sealed or compaction happens because the snapshot would ensure // that older key versions are kept around. If this is the last // retry, then we have the lock so nothing bad can happen continue; } // We could get the earliest sequence number for the whole list of // memtables, which will include immutable memtables as well, but that // might be tricky to maintain in case we decide, in future, to do // memtable compaction. if (!last_try) { SequenceNumber seq = node->super_version->mem->GetEarliestSequenceNumber(); if (seq > *snapshot) { retry = true; break; } } } if (!retry) { if (last_try) { mutex_.Unlock(); } break; } } } // Keep track of bytes that we read for statistics-recording later PERF_TIMER_STOP(get_snapshot_time); return last_try; } void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input) { return MultiGet(read_options, num_keys, column_families, keys, values, /*timestamps=*/nullptr, statuses, sorted_input); } void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input) { if (num_keys == 0) { return; } #ifndef NDEBUG for (size_t i = 0; i < num_keys; ++i) { ColumnFamilyHandle* cfh = column_families[i]; assert(cfh); const Comparator* const ucmp = cfh->GetComparator(); assert(ucmp); if (ucmp->timestamp_size() > 0) { assert(read_options.timestamp); assert(read_options.timestamp->size() == ucmp->timestamp_size()); } else { assert(!read_options.timestamp); } } #endif // NDEBUG autovector key_context; autovector sorted_keys; sorted_keys.resize(num_keys); for (size_t i = 0; i < num_keys; ++i) { key_context.emplace_back(column_families[i], keys[i], &values[i], timestamps ? ×tamps[i] : nullptr, &statuses[i]); } for (size_t i = 0; i < num_keys; ++i) { sorted_keys[i] = &key_context[i]; } PrepareMultiGetKeys(num_keys, sorted_input, &sorted_keys); autovector multiget_cf_data; size_t cf_start = 0; ColumnFamilyHandle* cf = sorted_keys[0]->column_family; for (size_t i = 0; i < num_keys; ++i) { KeyContext* key_ctx = sorted_keys[i]; if (key_ctx->column_family != cf) { multiget_cf_data.emplace_back( MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr)); cf_start = i; cf = key_ctx->column_family; } } { // multiget_cf_data.emplace_back( // MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr)); multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr); } std::function::iterator&)> iter_deref_lambda = [](autovector::iterator& cf_iter) { return &(*cf_iter); }; SequenceNumber consistent_seqnum; bool unref_only = MultiCFSnapshot< autovector>( read_options, nullptr, iter_deref_lambda, &multiget_cf_data, &consistent_seqnum); Status s; auto cf_iter = multiget_cf_data.begin(); for (; cf_iter != multiget_cf_data.end(); ++cf_iter) { s = MultiGetImpl(read_options, cf_iter->start, cf_iter->num_keys, &sorted_keys, cf_iter->super_version, consistent_seqnum, nullptr, nullptr); if (!s.ok()) { break; } } if (!s.ok()) { assert(s.IsTimedOut() || s.IsAborted()); for (++cf_iter; cf_iter != multiget_cf_data.end(); ++cf_iter) { for (size_t i = cf_iter->start; i < cf_iter->start + cf_iter->num_keys; ++i) { *sorted_keys[i]->s = s; } } } for (const auto& iter : multiget_cf_data) { if (!unref_only) { ReturnAndCleanupSuperVersion(iter.cfd, iter.super_version); } else { iter.cfd->GetSuperVersion()->Unref(); } } } namespace { // Order keys by CF ID, followed by key contents struct CompareKeyContext { inline bool operator()(const KeyContext* lhs, const KeyContext* rhs) { ColumnFamilyHandleImpl* cfh = static_cast(lhs->column_family); uint32_t cfd_id1 = cfh->cfd()->GetID(); const Comparator* comparator = cfh->cfd()->user_comparator(); cfh = static_cast(lhs->column_family); uint32_t cfd_id2 = cfh->cfd()->GetID(); if (cfd_id1 < cfd_id2) { return true; } else if (cfd_id1 > cfd_id2) { return false; } // Both keys are from the same column family int cmp = comparator->CompareWithoutTimestamp( *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false); if (cmp < 0) { return true; } return false; } }; } // anonymous namespace void DBImpl::PrepareMultiGetKeys( size_t num_keys, bool sorted_input, autovector* sorted_keys) { #ifndef NDEBUG if (sorted_input) { for (size_t index = 0; index < sorted_keys->size(); ++index) { if (index > 0) { KeyContext* lhs = (*sorted_keys)[index - 1]; KeyContext* rhs = (*sorted_keys)[index]; ColumnFamilyHandleImpl* cfh = reinterpret_cast(lhs->column_family); uint32_t cfd_id1 = cfh->cfd()->GetID(); const Comparator* comparator = cfh->cfd()->user_comparator(); cfh = reinterpret_cast(lhs->column_family); uint32_t cfd_id2 = cfh->cfd()->GetID(); assert(cfd_id1 <= cfd_id2); if (cfd_id1 < cfd_id2) { continue; } // Both keys are from the same column family int cmp = comparator->CompareWithoutTimestamp( *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false); assert(cmp <= 0); } index++; } } #endif if (!sorted_input) { CompareKeyContext sort_comparator; std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys, sort_comparator); } } void DBImpl::MultiGet(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input) { return MultiGet(read_options, column_family, num_keys, keys, values, /*timestamp=*/nullptr, statuses, sorted_input); } void DBImpl::MultiGet(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input) { autovector key_context; autovector sorted_keys; sorted_keys.resize(num_keys); for (size_t i = 0; i < num_keys; ++i) { key_context.emplace_back(column_family, keys[i], &values[i], timestamps ? ×tamps[i] : nullptr, &statuses[i]); } for (size_t i = 0; i < num_keys; ++i) { sorted_keys[i] = &key_context[i]; } PrepareMultiGetKeys(num_keys, sorted_input, &sorted_keys); MultiGetWithCallback(read_options, column_family, nullptr, &sorted_keys); } void DBImpl::MultiGetWithCallback( const ReadOptions& read_options, ColumnFamilyHandle* column_family, ReadCallback* callback, autovector* sorted_keys) { std::array multiget_cf_data; multiget_cf_data[0] = MultiGetColumnFamilyData(column_family, nullptr); std::function::iterator&)> iter_deref_lambda = [](std::array::iterator& cf_iter) { return &(*cf_iter); }; size_t num_keys = sorted_keys->size(); SequenceNumber consistent_seqnum; bool unref_only = MultiCFSnapshot>( read_options, callback, iter_deref_lambda, &multiget_cf_data, &consistent_seqnum); #ifndef NDEBUG assert(!unref_only); #else // Silence unused variable warning (void)unref_only; #endif // NDEBUG if (callback && read_options.snapshot == nullptr) { // The unprep_seqs are not published for write unprepared, so it could be // that max_visible_seq is larger. Seek to the std::max of the two. // However, we still want our callback to contain the actual snapshot so // that it can do the correct visibility filtering. callback->Refresh(consistent_seqnum); // Internally, WriteUnpreparedTxnReadCallback::Refresh would set // max_visible_seq = max(max_visible_seq, snapshot) // // Currently, the commented out assert is broken by // InvalidSnapshotReadCallback, but if write unprepared recovery followed // the regular transaction flow, then this special read callback would not // be needed. // // assert(callback->max_visible_seq() >= snapshot); consistent_seqnum = callback->max_visible_seq(); } Status s = MultiGetImpl(read_options, 0, num_keys, sorted_keys, multiget_cf_data[0].super_version, consistent_seqnum, nullptr, nullptr); assert(s.ok() || s.IsTimedOut() || s.IsAborted()); ReturnAndCleanupSuperVersion(multiget_cf_data[0].cfd, multiget_cf_data[0].super_version); } // The actual implementation of batched MultiGet. Parameters - // start_key - Index in the sorted_keys vector to start processing from // num_keys - Number of keys to lookup, starting with sorted_keys[start_key] // sorted_keys - The entire batch of sorted keys for this CF // // The per key status is returned in the KeyContext structures pointed to by // sorted_keys. An overall Status is also returned, with the only possible // values being Status::OK() and Status::TimedOut(). The latter indicates // that the call exceeded read_options.deadline Status DBImpl::MultiGetImpl( const ReadOptions& read_options, size_t start_key, size_t num_keys, autovector* sorted_keys, SuperVersion* super_version, SequenceNumber snapshot, ReadCallback* callback, bool* is_blob_index) { PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); StopWatch sw(env_, stats_, DB_MULTIGET); // For each of the given keys, apply the entire "get" process as follows: // First look in the memtable, then in the immutable memtable (if any). // s is both in/out. When in, s could either be OK or MergeInProgress. // merge_operands will contain the sequence of merges in the latter case. size_t keys_left = num_keys; Status s; uint64_t curr_value_size = 0; while (keys_left) { if (read_options.deadline.count() && env_->NowMicros() > static_cast(read_options.deadline.count())) { s = Status::TimedOut(); break; } size_t batch_size = (keys_left > MultiGetContext::MAX_BATCH_SIZE) ? MultiGetContext::MAX_BATCH_SIZE : keys_left; MultiGetContext ctx(sorted_keys, start_key + num_keys - keys_left, batch_size, snapshot, read_options); MultiGetRange range = ctx.GetMultiGetRange(); range.AddValueSize(curr_value_size); bool lookup_current = false; keys_left -= batch_size; for (auto mget_iter = range.begin(); mget_iter != range.end(); ++mget_iter) { mget_iter->merge_context.Clear(); *mget_iter->s = Status::OK(); } bool skip_memtable = (read_options.read_tier == kPersistedTier && has_unpersisted_data_.load(std::memory_order_relaxed)); if (!skip_memtable) { super_version->mem->MultiGet(read_options, &range, callback, is_blob_index); if (!range.empty()) { super_version->imm->MultiGet(read_options, &range, callback, is_blob_index); } if (!range.empty()) { lookup_current = true; uint64_t left = range.KeysLeft(); RecordTick(stats_, MEMTABLE_MISS, left); } } if (lookup_current) { PERF_TIMER_GUARD(get_from_output_files_time); super_version->current->MultiGet(read_options, &range, callback, is_blob_index); } curr_value_size = range.GetValueSize(); if (curr_value_size > read_options.value_size_soft_limit) { s = Status::Aborted(); break; } } // Post processing (decrement reference counts and record statistics) PERF_TIMER_GUARD(get_post_process_time); size_t num_found = 0; uint64_t bytes_read = 0; for (size_t i = start_key; i < start_key + num_keys - keys_left; ++i) { KeyContext* key = (*sorted_keys)[i]; if (key->s->ok()) { bytes_read += key->value->size(); num_found++; } } if (keys_left) { assert(s.IsTimedOut() || s.IsAborted()); for (size_t i = start_key + num_keys - keys_left; i < start_key + num_keys; ++i) { KeyContext* key = (*sorted_keys)[i]; *key->s = s; } } RecordTick(stats_, NUMBER_MULTIGET_CALLS); RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys); RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found); RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read); RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read); PERF_COUNTER_ADD(multiget_read_bytes, bytes_read); PERF_TIMER_STOP(get_post_process_time); return s; } Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options, const std::string& column_family, ColumnFamilyHandle** handle) { assert(handle != nullptr); Status s = CreateColumnFamilyImpl(cf_options, column_family, handle); if (s.ok()) { s = WriteOptionsFile(true /*need_mutex_lock*/, true /*need_enter_write_thread*/); } return s; } Status DBImpl::CreateColumnFamilies( const ColumnFamilyOptions& cf_options, const std::vector& column_family_names, std::vector* handles) { assert(handles != nullptr); handles->clear(); size_t num_cf = column_family_names.size(); Status s; bool success_once = false; for (size_t i = 0; i < num_cf; i++) { ColumnFamilyHandle* handle; s = CreateColumnFamilyImpl(cf_options, column_family_names[i], &handle); if (!s.ok()) { break; } handles->push_back(handle); success_once = true; } if (success_once) { Status persist_options_status = WriteOptionsFile( true /*need_mutex_lock*/, true /*need_enter_write_thread*/); if (s.ok() && !persist_options_status.ok()) { s = persist_options_status; } } return s; } Status DBImpl::CreateColumnFamilies( const std::vector& column_families, std::vector* handles) { assert(handles != nullptr); handles->clear(); size_t num_cf = column_families.size(); Status s; bool success_once = false; for (size_t i = 0; i < num_cf; i++) { ColumnFamilyHandle* handle; s = CreateColumnFamilyImpl(column_families[i].options, column_families[i].name, &handle); if (!s.ok()) { break; } handles->push_back(handle); success_once = true; } if (success_once) { Status persist_options_status = WriteOptionsFile( true /*need_mutex_lock*/, true /*need_enter_write_thread*/); if (s.ok() && !persist_options_status.ok()) { s = persist_options_status; } } return s; } Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, const std::string& column_family_name, ColumnFamilyHandle** handle) { Status s; Status persist_options_status; *handle = nullptr; DBOptions db_options = BuildDBOptions(immutable_db_options_, mutable_db_options_); s = ColumnFamilyData::ValidateOptions(db_options, cf_options); if (s.ok()) { for (auto& cf_path : cf_options.cf_paths) { s = env_->CreateDirIfMissing(cf_path.path); if (!s.ok()) { break; } } } if (!s.ok()) { return s; } SuperVersionContext sv_context(/* create_superversion */ true); { InstrumentedMutexLock l(&mutex_); if (versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name) != nullptr) { return Status::InvalidArgument("Column family already exists"); } VersionEdit edit; edit.AddColumnFamily(column_family_name); uint32_t new_id = versions_->GetColumnFamilySet()->GetNextColumnFamilyID(); edit.SetColumnFamily(new_id); edit.SetLogNumber(logfile_number_); edit.SetComparatorName(cf_options.comparator->Name()); // LogAndApply will both write the creation in MANIFEST and create // ColumnFamilyData object { // write thread WriteThread::Writer w; write_thread_.EnterUnbatched(&w, &mutex_); // LogAndApply will both write the creation in MANIFEST and create // ColumnFamilyData object s = versions_->LogAndApply(nullptr, MutableCFOptions(cf_options), &edit, &mutex_, directories_.GetDbDir(), false, &cf_options); write_thread_.ExitUnbatched(&w); } if (s.ok()) { auto* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name); assert(cfd != nullptr); std::map> dummy_created_dirs; s = cfd->AddDirectories(&dummy_created_dirs); } if (s.ok()) { single_column_family_mode_ = false; auto* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name); assert(cfd != nullptr); InstallSuperVersionAndScheduleWork(cfd, &sv_context, *cfd->GetLatestMutableCFOptions()); if (!cfd->mem()->IsSnapshotSupported()) { is_snapshot_supported_ = false; } cfd->set_initialized(); *handle = new ColumnFamilyHandleImpl(cfd, this, &mutex_); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Created column family [%s] (ID %u)", column_family_name.c_str(), (unsigned)cfd->GetID()); } else { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Creating column family [%s] FAILED -- %s", column_family_name.c_str(), s.ToString().c_str()); } } // InstrumentedMutexLock l(&mutex_) sv_context.Clean(); // this is outside the mutex if (s.ok()) { NewThreadStatusCfInfo( reinterpret_cast(*handle)->cfd()); } return s; } Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { assert(column_family != nullptr); Status s = DropColumnFamilyImpl(column_family); if (s.ok()) { s = WriteOptionsFile(true /*need_mutex_lock*/, true /*need_enter_write_thread*/); } return s; } Status DBImpl::DropColumnFamilies( const std::vector& column_families) { Status s; bool success_once = false; for (auto* handle : column_families) { s = DropColumnFamilyImpl(handle); if (!s.ok()) { break; } success_once = true; } if (success_once) { Status persist_options_status = WriteOptionsFile( true /*need_mutex_lock*/, true /*need_enter_write_thread*/); if (s.ok() && !persist_options_status.ok()) { s = persist_options_status; } } return s; } Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) { auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); if (cfd->GetID() == 0) { return Status::InvalidArgument("Can't drop default column family"); } bool cf_support_snapshot = cfd->mem()->IsSnapshotSupported(); VersionEdit edit; edit.DropColumnFamily(); edit.SetColumnFamily(cfd->GetID()); Status s; { InstrumentedMutexLock l(&mutex_); if (cfd->IsDropped()) { s = Status::InvalidArgument("Column family already dropped!\n"); } if (s.ok()) { // we drop column family from a single write thread WriteThread::Writer w; write_thread_.EnterUnbatched(&w, &mutex_); s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, &mutex_); write_thread_.ExitUnbatched(&w); } if (s.ok()) { auto* mutable_cf_options = cfd->GetLatestMutableCFOptions(); max_total_in_memory_state_ -= mutable_cf_options->write_buffer_size * mutable_cf_options->max_write_buffer_number; } if (!cf_support_snapshot) { // Dropped Column Family doesn't support snapshot. Need to recalculate // is_snapshot_supported_. bool new_is_snapshot_supported = true; for (auto c : *versions_->GetColumnFamilySet()) { if (!c->IsDropped() && !c->mem()->IsSnapshotSupported()) { new_is_snapshot_supported = false; break; } } is_snapshot_supported_ = new_is_snapshot_supported; } bg_cv_.SignalAll(); } if (s.ok()) { // Note that here we erase the associated cf_info of the to-be-dropped // cfd before its ref-count goes to zero to avoid having to erase cf_info // later inside db_mutex. EraseThreadStatusCfInfo(cfd); assert(cfd->IsDropped()); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Dropped column family with id %u\n", cfd->GetID()); } else { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Dropping column family with id %u FAILED -- %s\n", cfd->GetID(), s.ToString().c_str()); } return s; } bool DBImpl::KeyMayExist(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value, std::string* timestamp, bool* value_found) { assert(value != nullptr); if (value_found != nullptr) { // falsify later if key-may-exist but can't fetch value *value_found = true; } ReadOptions roptions = read_options; roptions.read_tier = kBlockCacheTier; // read from block cache only PinnableSlice pinnable_val; GetImplOptions get_impl_options; get_impl_options.column_family = column_family; get_impl_options.value = &pinnable_val; get_impl_options.value_found = value_found; get_impl_options.timestamp = timestamp; auto s = GetImpl(roptions, key, get_impl_options); value->assign(pinnable_val.data(), pinnable_val.size()); // If block_cache is enabled and the index block of the table didn't // not present in block_cache, the return value will be Status::Incomplete. // In this case, key may still exist in the table. return s.ok() || s.IsIncomplete(); } Iterator* DBImpl::NewIterator(const ReadOptions& read_options, ColumnFamilyHandle* column_family) { if (read_options.managed) { return NewErrorIterator( Status::NotSupported("Managed iterator is not supported anymore.")); } // We will eventually support deadline for iterators too, but safeguard // for now if (read_options.deadline != std::chrono::microseconds::zero()) { return NewErrorIterator( Status::NotSupported("ReadOptions deadline is not supported")); } Iterator* result = nullptr; if (read_options.read_tier == kPersistedTier) { return NewErrorIterator(Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators.")); } // if iterator wants internal keys, we can only proceed if // we can guarantee the deletes haven't been processed yet if (immutable_db_options_.preserve_deletes && read_options.iter_start_seqnum > 0 && read_options.iter_start_seqnum < preserve_deletes_seqnum_.load()) { return NewErrorIterator(Status::InvalidArgument( "Iterator requested internal keys which are too old and are not" " guaranteed to be preserved, try larger iter_start_seqnum opt.")); } auto cfh = reinterpret_cast(column_family); ColumnFamilyData* cfd = cfh->cfd(); assert(cfd != nullptr); ReadCallback* read_callback = nullptr; // No read callback provided. if (read_options.tailing) { #ifdef ROCKSDB_LITE // not supported in lite version result = nullptr; #else SuperVersion* sv = cfd->GetReferencedSuperVersion(this); auto iter = new ForwardIterator(this, read_options, cfd, sv, /* allow_unprepared_value */ true); result = NewDBIterator( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, cfd->user_comparator(), iter, kMaxSequenceNumber, sv->mutable_cf_options.max_sequential_skip_in_iterations, read_callback, this, cfd); #endif } else { // Note: no need to consider the special case of // last_seq_same_as_publish_seq_==false since NewIterator is overridden in // WritePreparedTxnDB result = NewIteratorImpl(read_options, cfd, (read_options.snapshot != nullptr) ? read_options.snapshot->GetSequenceNumber() : kMaxSequenceNumber, read_callback); } return result; } ArenaWrappedDBIter* DBImpl::NewIteratorImpl(const ReadOptions& read_options, ColumnFamilyData* cfd, SequenceNumber snapshot, ReadCallback* read_callback, bool allow_blob, bool allow_refresh) { SuperVersion* sv = cfd->GetReferencedSuperVersion(this); TEST_SYNC_POINT("DBImpl::NewIterator:1"); TEST_SYNC_POINT("DBImpl::NewIterator:2"); if (snapshot == kMaxSequenceNumber) { // Note that the snapshot is assigned AFTER referencing the super // version because otherwise a flush happening in between may compact away // data for the snapshot, so the reader would see neither data that was be // visible to the snapshot before compaction nor the newer data inserted // afterwards. // Note that the super version might not contain all the data available // to this snapshot, but in that case it can see all the data in the // super version, which is a valid consistent state after the user // calls NewIterator(). snapshot = versions_->LastSequence(); TEST_SYNC_POINT("DBImpl::NewIterator:3"); TEST_SYNC_POINT("DBImpl::NewIterator:4"); } // Try to generate a DB iterator tree in continuous memory area to be // cache friendly. Here is an example of result: // +-------------------------------+ // | | // | ArenaWrappedDBIter | // | + | // | +---> Inner Iterator ------------+ // | | | | // | | +-- -- -- -- -- -- -- --+ | // | +--- | Arena | | // | | | | // | Allocated Memory: | | // | | +-------------------+ | // | | | DBIter | <---+ // | | + | // | | | +-> iter_ ------------+ // | | | | | // | | +-------------------+ | // | | | MergingIterator | <---+ // | | + | // | | | +->child iter1 ------------+ // | | | | | | // | | +->child iter2 ----------+ | // | | | | | | | // | | | +->child iter3 --------+ | | // | | | | | | // | | +-------------------+ | | | // | | | Iterator1 | <--------+ // | | +-------------------+ | | // | | | Iterator2 | <------+ // | | +-------------------+ | // | | | Iterator3 | <----+ // | | +-------------------+ // | | | // +-------+-----------------------+ // // ArenaWrappedDBIter inlines an arena area where all the iterators in // the iterator tree are allocated in the order of being accessed when // querying. // Laying out the iterators in the order of being accessed makes it more // likely that any iterator pointer is close to the iterator it points to so // that they are likely to be in the same cache line and/or page. ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->version_number, read_callback, this, cfd, allow_blob, read_options.snapshot != nullptr ? false : allow_refresh); InternalIterator* internal_iter = NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(), db_iter->GetRangeDelAggregator(), snapshot, /* allow_unprepared_value */ true); db_iter->SetIterUnderDBIter(internal_iter); return db_iter; } Status DBImpl::NewIterators( const ReadOptions& read_options, const std::vector& column_families, std::vector* iterators) { if (read_options.managed) { return Status::NotSupported("Managed iterator is not supported anymore."); } if (read_options.read_tier == kPersistedTier) { return Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators."); } ReadCallback* read_callback = nullptr; // No read callback provided. iterators->clear(); iterators->reserve(column_families.size()); if (read_options.tailing) { #ifdef ROCKSDB_LITE return Status::InvalidArgument( "Tailing iterator not supported in RocksDB lite"); #else for (auto cfh : column_families) { auto cfd = reinterpret_cast(cfh)->cfd(); SuperVersion* sv = cfd->GetReferencedSuperVersion(this); auto iter = new ForwardIterator(this, read_options, cfd, sv, /* allow_unprepared_value */ true); iterators->push_back(NewDBIterator( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, cfd->user_comparator(), iter, kMaxSequenceNumber, sv->mutable_cf_options.max_sequential_skip_in_iterations, read_callback, this, cfd)); } #endif } else { // Note: no need to consider the special case of // last_seq_same_as_publish_seq_==false since NewIterators is overridden in // WritePreparedTxnDB auto snapshot = read_options.snapshot != nullptr ? read_options.snapshot->GetSequenceNumber() : versions_->LastSequence(); for (size_t i = 0; i < column_families.size(); ++i) { auto* cfd = reinterpret_cast(column_families[i])->cfd(); iterators->push_back( NewIteratorImpl(read_options, cfd, snapshot, read_callback)); } } return Status::OK(); } const Snapshot* DBImpl::GetSnapshot() { return GetSnapshotImpl(false); } #ifndef ROCKSDB_LITE const Snapshot* DBImpl::GetSnapshotForWriteConflictBoundary() { return GetSnapshotImpl(true); } #endif // ROCKSDB_LITE SnapshotImpl* DBImpl::GetSnapshotImpl(bool is_write_conflict_boundary, bool lock) { int64_t unix_time = 0; env_->GetCurrentTime(&unix_time); // Ignore error SnapshotImpl* s = new SnapshotImpl; if (lock) { mutex_.Lock(); } // returns null if the underlying memtable does not support snapshot. if (!is_snapshot_supported_) { if (lock) { mutex_.Unlock(); } delete s; return nullptr; } auto snapshot_seq = last_seq_same_as_publish_seq_ ? versions_->LastSequence() : versions_->LastPublishedSequence(); SnapshotImpl* snapshot = snapshots_.New(s, snapshot_seq, unix_time, is_write_conflict_boundary); if (lock) { mutex_.Unlock(); } return snapshot; } namespace { typedef autovector CfdList; bool CfdListContains(const CfdList& list, ColumnFamilyData* cfd) { for (const ColumnFamilyData* t : list) { if (t == cfd) { return true; } } return false; } } // namespace void DBImpl::ReleaseSnapshot(const Snapshot* s) { const SnapshotImpl* casted_s = reinterpret_cast(s); { InstrumentedMutexLock l(&mutex_); snapshots_.Delete(casted_s); uint64_t oldest_snapshot; if (snapshots_.empty()) { oldest_snapshot = last_seq_same_as_publish_seq_ ? versions_->LastSequence() : versions_->LastPublishedSequence(); } else { oldest_snapshot = snapshots_.oldest()->number_; } // Avoid to go through every column family by checking a global threshold // first. if (oldest_snapshot > bottommost_files_mark_threshold_) { CfdList cf_scheduled; for (auto* cfd : *versions_->GetColumnFamilySet()) { cfd->current()->storage_info()->UpdateOldestSnapshot(oldest_snapshot); if (!cfd->current() ->storage_info() ->BottommostFilesMarkedForCompaction() .empty()) { SchedulePendingCompaction(cfd); MaybeScheduleFlushOrCompaction(); cf_scheduled.push_back(cfd); } } // Calculate a new threshold, skipping those CFs where compactions are // scheduled. We do not do the same pass as the previous loop because // mutex might be unlocked during the loop, making the result inaccurate. SequenceNumber new_bottommost_files_mark_threshold = kMaxSequenceNumber; for (auto* cfd : *versions_->GetColumnFamilySet()) { if (CfdListContains(cf_scheduled, cfd)) { continue; } new_bottommost_files_mark_threshold = std::min( new_bottommost_files_mark_threshold, cfd->current()->storage_info()->bottommost_files_mark_threshold()); } bottommost_files_mark_threshold_ = new_bottommost_files_mark_threshold; } } delete casted_s; } #ifndef ROCKSDB_LITE Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, TablePropertiesCollection* props) { auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); // Increment the ref count mutex_.Lock(); auto version = cfd->current(); version->Ref(); mutex_.Unlock(); auto s = version->GetPropertiesOfAllTables(props); // Decrement the ref count mutex_.Lock(); version->Unref(); mutex_.Unlock(); return s; } Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family, const Range* range, std::size_t n, TablePropertiesCollection* props) { auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); // Increment the ref count mutex_.Lock(); auto version = cfd->current(); version->Ref(); mutex_.Unlock(); auto s = version->GetPropertiesOfTablesInRange(range, n, props); // Decrement the ref count mutex_.Lock(); version->Unref(); mutex_.Unlock(); return s; } #endif // ROCKSDB_LITE const std::string& DBImpl::GetName() const { return dbname_; } Env* DBImpl::GetEnv() const { return env_; } FileSystem* DB::GetFileSystem() const { static LegacyFileSystemWrapper fs_wrap(GetEnv()); return &fs_wrap; } FileSystem* DBImpl::GetFileSystem() const { return immutable_db_options_.fs.get(); } Options DBImpl::GetOptions(ColumnFamilyHandle* column_family) const { InstrumentedMutexLock l(&mutex_); auto cfh = reinterpret_cast(column_family); return Options(BuildDBOptions(immutable_db_options_, mutable_db_options_), cfh->cfd()->GetLatestCFOptions()); } DBOptions DBImpl::GetDBOptions() const { InstrumentedMutexLock l(&mutex_); return BuildDBOptions(immutable_db_options_, mutable_db_options_); } bool DBImpl::GetProperty(ColumnFamilyHandle* column_family, const Slice& property, std::string* value) { const DBPropertyInfo* property_info = GetPropertyInfo(property); value->clear(); auto cfd = reinterpret_cast(column_family)->cfd(); if (property_info == nullptr) { return false; } else if (property_info->handle_int) { uint64_t int_value; bool ret_value = GetIntPropertyInternal(cfd, *property_info, false, &int_value); if (ret_value) { *value = ToString(int_value); } return ret_value; } else if (property_info->handle_string) { InstrumentedMutexLock l(&mutex_); return cfd->internal_stats()->GetStringProperty(*property_info, property, value); } else if (property_info->handle_string_dbimpl) { std::string tmp_value; bool ret_value = (this->*(property_info->handle_string_dbimpl))(&tmp_value); if (ret_value) { *value = tmp_value; } return ret_value; } // Shouldn't reach here since exactly one of handle_string and handle_int // should be non-nullptr. assert(false); return false; } bool DBImpl::GetMapProperty(ColumnFamilyHandle* column_family, const Slice& property, std::map* value) { const DBPropertyInfo* property_info = GetPropertyInfo(property); value->clear(); auto cfd = reinterpret_cast(column_family)->cfd(); if (property_info == nullptr) { return false; } else if (property_info->handle_map) { InstrumentedMutexLock l(&mutex_); return cfd->internal_stats()->GetMapProperty(*property_info, property, value); } // If we reach this point it means that handle_map is not provided for the // requested property return false; } bool DBImpl::GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) { const DBPropertyInfo* property_info = GetPropertyInfo(property); if (property_info == nullptr || property_info->handle_int == nullptr) { return false; } auto cfd = reinterpret_cast(column_family)->cfd(); return GetIntPropertyInternal(cfd, *property_info, false, value); } bool DBImpl::GetIntPropertyInternal(ColumnFamilyData* cfd, const DBPropertyInfo& property_info, bool is_locked, uint64_t* value) { assert(property_info.handle_int != nullptr); if (!property_info.need_out_of_mutex) { if (is_locked) { mutex_.AssertHeld(); return cfd->internal_stats()->GetIntProperty(property_info, value, this); } else { InstrumentedMutexLock l(&mutex_); return cfd->internal_stats()->GetIntProperty(property_info, value, this); } } else { SuperVersion* sv = nullptr; if (!is_locked) { sv = GetAndRefSuperVersion(cfd); } else { sv = cfd->GetSuperVersion(); } bool ret = cfd->internal_stats()->GetIntPropertyOutOfMutex( property_info, sv->current, value); if (!is_locked) { ReturnAndCleanupSuperVersion(cfd, sv); } return ret; } } bool DBImpl::GetPropertyHandleOptionsStatistics(std::string* value) { assert(value != nullptr); Statistics* statistics = immutable_db_options_.statistics.get(); if (!statistics) { return false; } *value = statistics->ToString(); return true; } #ifndef ROCKSDB_LITE Status DBImpl::ResetStats() { InstrumentedMutexLock l(&mutex_); for (auto* cfd : *versions_->GetColumnFamilySet()) { if (cfd->initialized()) { cfd->internal_stats()->Clear(); } } return Status::OK(); } #endif // ROCKSDB_LITE bool DBImpl::GetAggregatedIntProperty(const Slice& property, uint64_t* aggregated_value) { const DBPropertyInfo* property_info = GetPropertyInfo(property); if (property_info == nullptr || property_info->handle_int == nullptr) { return false; } uint64_t sum = 0; { // Needs mutex to protect the list of column families. InstrumentedMutexLock l(&mutex_); uint64_t value; for (auto* cfd : *versions_->GetColumnFamilySet()) { if (!cfd->initialized()) { continue; } if (GetIntPropertyInternal(cfd, *property_info, true, &value)) { sum += value; } else { return false; } } } *aggregated_value = sum; return true; } SuperVersion* DBImpl::GetAndRefSuperVersion(ColumnFamilyData* cfd) { // TODO(ljin): consider using GetReferencedSuperVersion() directly return cfd->GetThreadLocalSuperVersion(this); } // REQUIRED: this function should only be called on the write thread or if the // mutex is held. SuperVersion* DBImpl::GetAndRefSuperVersion(uint32_t column_family_id) { auto column_family_set = versions_->GetColumnFamilySet(); auto cfd = column_family_set->GetColumnFamily(column_family_id); if (!cfd) { return nullptr; } return GetAndRefSuperVersion(cfd); } void DBImpl::CleanupSuperVersion(SuperVersion* sv) { // Release SuperVersion if (sv->Unref()) { bool defer_purge = immutable_db_options().avoid_unnecessary_blocking_io; { InstrumentedMutexLock l(&mutex_); sv->Cleanup(); if (defer_purge) { AddSuperVersionsToFreeQueue(sv); SchedulePurge(); } } if (!defer_purge) { delete sv; } RecordTick(stats_, NUMBER_SUPERVERSION_CLEANUPS); } RecordTick(stats_, NUMBER_SUPERVERSION_RELEASES); } void DBImpl::ReturnAndCleanupSuperVersion(ColumnFamilyData* cfd, SuperVersion* sv) { if (!cfd->ReturnThreadLocalSuperVersion(sv)) { CleanupSuperVersion(sv); } } // REQUIRED: this function should only be called on the write thread. void DBImpl::ReturnAndCleanupSuperVersion(uint32_t column_family_id, SuperVersion* sv) { auto column_family_set = versions_->GetColumnFamilySet(); auto cfd = column_family_set->GetColumnFamily(column_family_id); // If SuperVersion is held, and we successfully fetched a cfd using // GetAndRefSuperVersion(), it must still exist. assert(cfd != nullptr); ReturnAndCleanupSuperVersion(cfd, sv); } // REQUIRED: this function should only be called on the write thread or if the // mutex is held. ColumnFamilyHandle* DBImpl::GetColumnFamilyHandle(uint32_t column_family_id) { ColumnFamilyMemTables* cf_memtables = column_family_memtables_.get(); if (!cf_memtables->Seek(column_family_id)) { return nullptr; } return cf_memtables->GetColumnFamilyHandle(); } // REQUIRED: mutex is NOT held. std::unique_ptr DBImpl::GetColumnFamilyHandleUnlocked( uint32_t column_family_id) { InstrumentedMutexLock l(&mutex_); auto* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family_id); if (cfd == nullptr) { return nullptr; } return std::unique_ptr( new ColumnFamilyHandleImpl(cfd, this, &mutex_)); } void DBImpl::GetApproximateMemTableStats(ColumnFamilyHandle* column_family, const Range& range, uint64_t* const count, uint64_t* const size) { ColumnFamilyHandleImpl* cfh = reinterpret_cast(column_family); ColumnFamilyData* cfd = cfh->cfd(); SuperVersion* sv = GetAndRefSuperVersion(cfd); // Convert user_key into a corresponding internal key. InternalKey k1(range.start, kMaxSequenceNumber, kValueTypeForSeek); InternalKey k2(range.limit, kMaxSequenceNumber, kValueTypeForSeek); MemTable::MemTableStats memStats = sv->mem->ApproximateStats(k1.Encode(), k2.Encode()); MemTable::MemTableStats immStats = sv->imm->ApproximateStats(k1.Encode(), k2.Encode()); *count = memStats.count + immStats.count; *size = memStats.size + immStats.size; ReturnAndCleanupSuperVersion(cfd, sv); } Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options, ColumnFamilyHandle* column_family, const Range* range, int n, uint64_t* sizes) { if (!options.include_memtabtles && !options.include_files) { return Status::InvalidArgument("Invalid options"); } Version* v; auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); SuperVersion* sv = GetAndRefSuperVersion(cfd); v = sv->current; for (int i = 0; i < n; i++) { // Convert user_key into a corresponding internal key. InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); sizes[i] = 0; if (options.include_files) { sizes[i] += versions_->ApproximateSize( options, v, k1.Encode(), k2.Encode(), /*start_level=*/0, /*end_level=*/-1, TableReaderCaller::kUserApproximateSize); } if (options.include_memtabtles) { sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size; sizes[i] += sv->imm->ApproximateStats(k1.Encode(), k2.Encode()).size; } } ReturnAndCleanupSuperVersion(cfd, sv); return Status::OK(); } std::list::iterator DBImpl::CaptureCurrentFileNumberInPendingOutputs() { // We need to remember the iterator of our insert, because after the // background job is done, we need to remove that element from // pending_outputs_. pending_outputs_.push_back(versions_->current_next_file_number()); auto pending_outputs_inserted_elem = pending_outputs_.end(); --pending_outputs_inserted_elem; return pending_outputs_inserted_elem; } void DBImpl::ReleaseFileNumberFromPendingOutputs( std::unique_ptr::iterator>& v) { if (v.get() != nullptr) { pending_outputs_.erase(*v.get()); v.reset(); } } #ifndef ROCKSDB_LITE Status DBImpl::GetUpdatesSince( SequenceNumber seq, std::unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options) { RecordTick(stats_, GET_UPDATES_SINCE_CALLS); if (seq > versions_->LastSequence()) { return Status::NotFound("Requested sequence not yet written in the db"); } return wal_manager_.GetUpdatesSince(seq, iter, read_options, versions_.get()); } Status DBImpl::DeleteFile(std::string name) { uint64_t number; FileType type; WalFileType log_type; if (!ParseFileName(name, &number, &type, &log_type) || (type != kTableFile && type != kLogFile)) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "DeleteFile %s failed.\n", name.c_str()); return Status::InvalidArgument("Invalid file name"); } Status status; if (type == kLogFile) { // Only allow deleting archived log files if (log_type != kArchivedLogFile) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "DeleteFile %s failed - not archived log.\n", name.c_str()); return Status::NotSupported("Delete only supported for archived logs"); } status = wal_manager_.DeleteFile(name, number); if (!status.ok()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "DeleteFile %s failed -- %s.\n", name.c_str(), status.ToString().c_str()); } return status; } int level; FileMetaData* metadata; ColumnFamilyData* cfd; VersionEdit edit; JobContext job_context(next_job_id_.fetch_add(1), true); { InstrumentedMutexLock l(&mutex_); status = versions_->GetMetadataForFile(number, &level, &metadata, &cfd); if (!status.ok()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "DeleteFile %s failed. File not found\n", name.c_str()); job_context.Clean(); return Status::InvalidArgument("File not found"); } assert(level < cfd->NumberLevels()); // If the file is being compacted no need to delete. if (metadata->being_compacted) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "DeleteFile %s Skipped. File about to be compacted\n", name.c_str()); job_context.Clean(); return Status::OK(); } // Only the files in the last level can be deleted externally. // This is to make sure that any deletion tombstones are not // lost. Check that the level passed is the last level. auto* vstoreage = cfd->current()->storage_info(); for (int i = level + 1; i < cfd->NumberLevels(); i++) { if (vstoreage->NumLevelFiles(i) != 0) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "DeleteFile %s FAILED. File not in last level\n", name.c_str()); job_context.Clean(); return Status::InvalidArgument("File not in last level"); } } // if level == 0, it has to be the oldest file if (level == 0 && vstoreage->LevelFiles(0).back()->fd.GetNumber() != number) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "DeleteFile %s failed ---" " target file in level 0 must be the oldest.", name.c_str()); job_context.Clean(); return Status::InvalidArgument("File in level 0, but not oldest"); } edit.SetColumnFamily(cfd->GetID()); edit.DeleteFile(level, number); status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, &job_context.superversion_contexts[0], *cfd->GetLatestMutableCFOptions()); } FindObsoleteFiles(&job_context, false); } // lock released here LogFlush(immutable_db_options_.info_log); // remove files outside the db-lock if (job_context.HaveSomethingToDelete()) { // Call PurgeObsoleteFiles() without holding mutex. PurgeObsoleteFiles(job_context); } job_context.Clean(); return status; } Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family, const RangePtr* ranges, size_t n, bool include_end) { Status status; auto cfh = reinterpret_cast(column_family); ColumnFamilyData* cfd = cfh->cfd(); VersionEdit edit; std::set deleted_files; JobContext job_context(next_job_id_.fetch_add(1), true); { InstrumentedMutexLock l(&mutex_); Version* input_version = cfd->current(); auto* vstorage = input_version->storage_info(); for (size_t r = 0; r < n; r++) { auto begin = ranges[r].start, end = ranges[r].limit; for (int i = 1; i < cfd->NumberLevels(); i++) { if (vstorage->LevelFiles(i).empty() || !vstorage->OverlapInLevel(i, begin, end)) { continue; } std::vector level_files; InternalKey begin_storage, end_storage, *begin_key, *end_key; if (begin == nullptr) { begin_key = nullptr; } else { begin_storage.SetMinPossibleForUserKey(*begin); begin_key = &begin_storage; } if (end == nullptr) { end_key = nullptr; } else { end_storage.SetMaxPossibleForUserKey(*end); end_key = &end_storage; } vstorage->GetCleanInputsWithinInterval( i, begin_key, end_key, &level_files, -1 /* hint_index */, nullptr /* file_index */); FileMetaData* level_file; for (uint32_t j = 0; j < level_files.size(); j++) { level_file = level_files[j]; if (level_file->being_compacted) { continue; } if (deleted_files.find(level_file) != deleted_files.end()) { continue; } if (!include_end && end != nullptr && cfd->user_comparator()->Compare(level_file->largest.user_key(), *end) == 0) { continue; } edit.SetColumnFamily(cfd->GetID()); edit.DeleteFile(i, level_file->fd.GetNumber()); deleted_files.insert(level_file); level_file->being_compacted = true; } } } if (edit.GetDeletedFiles().empty()) { job_context.Clean(); return Status::OK(); } input_version->Ref(); status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, &job_context.superversion_contexts[0], *cfd->GetLatestMutableCFOptions()); } for (auto* deleted_file : deleted_files) { deleted_file->being_compacted = false; } input_version->Unref(); FindObsoleteFiles(&job_context, false); } // lock released here LogFlush(immutable_db_options_.info_log); // remove files outside the db-lock if (job_context.HaveSomethingToDelete()) { // Call PurgeObsoleteFiles() without holding mutex. PurgeObsoleteFiles(job_context); } job_context.Clean(); return status; } void DBImpl::GetLiveFilesMetaData(std::vector* metadata) { InstrumentedMutexLock l(&mutex_); versions_->GetLiveFilesMetaData(metadata); } void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ColumnFamilyMetaData* cf_meta) { assert(column_family); auto* cfd = reinterpret_cast(column_family)->cfd(); auto* sv = GetAndRefSuperVersion(cfd); { // Without mutex, Version::GetColumnFamilyMetaData will have data race with // Compaction::MarkFilesBeingCompacted. One solution is to use mutex, but // this may cause regression. An alternative is to make // FileMetaData::being_compacted atomic, but it will make FileMetaData // non-copy-able. Another option is to separate these variables from // original FileMetaData struct, and this requires re-organization of data // structures. For now, we take the easy approach. If // DB::GetColumnFamilyMetaData is not called frequently, the regression // should not be big. We still need to keep an eye on it. InstrumentedMutexLock l(&mutex_); sv->current->GetColumnFamilyMetaData(cf_meta); } ReturnAndCleanupSuperVersion(cfd, sv); } #endif // ROCKSDB_LITE Status DBImpl::CheckConsistency() { mutex_.AssertHeld(); std::vector metadata; versions_->GetLiveFilesMetaData(&metadata); TEST_SYNC_POINT("DBImpl::CheckConsistency:AfterGetLiveFilesMetaData"); std::string corruption_messages; if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) { // Instead of calling GetFileSize() for each expected file, call // GetChildren() for the DB directory and check that all expected files // are listed, without checking their sizes. // Since sst files might be in different directories, do it for each // directory separately. std::map> files_by_directory; for (const auto& md : metadata) { // md.name has a leading "/". Remove it. std::string fname = md.name; if (!fname.empty() && fname[0] == '/') { fname = fname.substr(1); } files_by_directory[md.db_path].push_back(fname); } for (const auto& dir_files : files_by_directory) { std::string directory = dir_files.first; std::vector existing_files; Status s = env_->GetChildren(directory, &existing_files); if (!s.ok()) { corruption_messages += "Can't list files in " + directory + ": " + s.ToString() + "\n"; continue; } std::sort(existing_files.begin(), existing_files.end()); for (const std::string& fname : dir_files.second) { if (!std::binary_search(existing_files.begin(), existing_files.end(), fname) && !std::binary_search(existing_files.begin(), existing_files.end(), Rocks2LevelTableFileName(fname))) { corruption_messages += "Missing sst file " + fname + " in " + directory + "\n"; } } } } else { for (const auto& md : metadata) { // md.name has a leading "/". std::string file_path = md.db_path + md.name; uint64_t fsize = 0; TEST_SYNC_POINT("DBImpl::CheckConsistency:BeforeGetFileSize"); Status s = env_->GetFileSize(file_path, &fsize); if (!s.ok() && env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok()) { s = Status::OK(); } if (!s.ok()) { corruption_messages += "Can't access " + md.name + ": " + s.ToString() + "\n"; } else if (fsize != md.size) { corruption_messages += "Sst file size mismatch: " + file_path + ". Size recorded in manifest " + ToString(md.size) + ", actual size " + ToString(fsize) + "\n"; } } } if (corruption_messages.size() == 0) { return Status::OK(); } else { return Status::Corruption(corruption_messages); } } Status DBImpl::GetDbIdentity(std::string& identity) const { identity.assign(db_id_); return Status::OK(); } Status DBImpl::GetDbIdentityFromIdentityFile(std::string* identity) const { std::string idfilename = IdentityFileName(dbname_); const FileOptions soptions; Status s = ReadFileToString(fs_.get(), idfilename, identity); if (!s.ok()) { return s; } // If last character is '\n' remove it from identity if (identity->size() > 0 && identity->back() == '\n') { identity->pop_back(); } return s; } // Default implementation -- returns not supported status Status DB::CreateColumnFamily(const ColumnFamilyOptions& /*cf_options*/, const std::string& /*column_family_name*/, ColumnFamilyHandle** /*handle*/) { return Status::NotSupported(""); } Status DB::CreateColumnFamilies( const ColumnFamilyOptions& /*cf_options*/, const std::vector& /*column_family_names*/, std::vector* /*handles*/) { return Status::NotSupported(""); } Status DB::CreateColumnFamilies( const std::vector& /*column_families*/, std::vector* /*handles*/) { return Status::NotSupported(""); } Status DB::DropColumnFamily(ColumnFamilyHandle* /*column_family*/) { return Status::NotSupported(""); } Status DB::DropColumnFamilies( const std::vector& /*column_families*/) { return Status::NotSupported(""); } Status DB::DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family) { delete column_family; return Status::OK(); } DB::~DB() {} Status DBImpl::Close() { if (!closed_) { { InstrumentedMutexLock l(&mutex_); // If there is unreleased snapshot, fail the close call if (!snapshots_.empty()) { return Status::Aborted("Cannot close DB with unreleased snapshot."); } } closed_ = true; return CloseImpl(); } return Status::OK(); } Status DB::ListColumnFamilies(const DBOptions& db_options, const std::string& name, std::vector* column_families) { const std::shared_ptr& fs = db_options.env->GetFileSystem(); return VersionSet::ListColumnFamilies(column_families, name, fs.get()); } Snapshot::~Snapshot() {} Status DestroyDB(const std::string& dbname, const Options& options, const std::vector& column_families) { ImmutableDBOptions soptions(SanitizeOptions(dbname, options)); Env* env = soptions.env; std::vector filenames; bool wal_in_db_path = IsWalDirSameAsDBPath(&soptions); // Reset the logger because it holds a handle to the // log file and prevents cleanup and directory removal soptions.info_log.reset(); // Ignore error in case directory does not exist env->GetChildren(dbname, &filenames); FileLock* lock; const std::string lockname = LockFileName(dbname); Status result = env->LockFile(lockname, &lock); if (result.ok()) { uint64_t number; FileType type; InfoLogPrefix info_log_prefix(!soptions.db_log_dir.empty(), dbname); for (const auto& fname : filenames) { if (ParseFileName(fname, &number, info_log_prefix.prefix, &type) && type != kDBLockFile) { // Lock file will be deleted at end Status del; std::string path_to_delete = dbname + "/" + fname; if (type == kMetaDatabase) { del = DestroyDB(path_to_delete, options); } else if (type == kTableFile || type == kLogFile) { del = DeleteDBFile(&soptions, path_to_delete, dbname, /*force_bg=*/false, /*force_fg=*/!wal_in_db_path); } else { del = env->DeleteFile(path_to_delete); } if (result.ok() && !del.ok()) { result = del; } } } std::set paths; for (const DbPath& db_path : options.db_paths) { paths.insert(db_path.path); } for (const ColumnFamilyDescriptor& cf : column_families) { for (const DbPath& cf_path : cf.options.cf_paths) { paths.insert(cf_path.path); } } for (const auto& path : paths) { if (env->GetChildren(path, &filenames).ok()) { for (const auto& fname : filenames) { if (ParseFileName(fname, &number, &type) && type == kTableFile) { // Lock file will be deleted at end std::string table_path = path + "/" + fname; Status del = DeleteDBFile(&soptions, table_path, dbname, /*force_bg=*/false, /*force_fg=*/false); if (result.ok() && !del.ok()) { result = del; } } } env->DeleteDir(path); } } std::vector walDirFiles; std::string archivedir = ArchivalDirectory(dbname); bool wal_dir_exists = false; if (dbname != soptions.wal_dir) { wal_dir_exists = env->GetChildren(soptions.wal_dir, &walDirFiles).ok(); archivedir = ArchivalDirectory(soptions.wal_dir); } // Archive dir may be inside wal dir or dbname and should be // processed and removed before those otherwise we have issues // removing them std::vector archiveFiles; if (env->GetChildren(archivedir, &archiveFiles).ok()) { // Delete archival files. for (const auto& file : archiveFiles) { if (ParseFileName(file, &number, &type) && type == kLogFile) { Status del = DeleteDBFile(&soptions, archivedir + "/" + file, archivedir, /*force_bg=*/false, /*force_fg=*/!wal_in_db_path); if (result.ok() && !del.ok()) { result = del; } } } env->DeleteDir(archivedir); } // Delete log files in the WAL dir if (wal_dir_exists) { for (const auto& file : walDirFiles) { if (ParseFileName(file, &number, &type) && type == kLogFile) { Status del = DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number), soptions.wal_dir, /*force_bg=*/false, /*force_fg=*/!wal_in_db_path); if (result.ok() && !del.ok()) { result = del; } } } env->DeleteDir(soptions.wal_dir); } env->UnlockFile(lock); // Ignore error since state is already gone env->DeleteFile(lockname); // sst_file_manager holds a ref to the logger. Make sure the logger is // gone before trying to remove the directory. soptions.sst_file_manager.reset(); env->DeleteDir(dbname); // Ignore error in case dir contains other files } return result; } Status DBImpl::WriteOptionsFile(bool need_mutex_lock, bool need_enter_write_thread) { #ifndef ROCKSDB_LITE WriteThread::Writer w; if (need_mutex_lock) { mutex_.Lock(); } else { mutex_.AssertHeld(); } if (need_enter_write_thread) { write_thread_.EnterUnbatched(&w, &mutex_); } std::vector cf_names; std::vector cf_opts; // This part requires mutex to protect the column family options for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } cf_names.push_back(cfd->GetName()); cf_opts.push_back(cfd->GetLatestCFOptions()); } // Unlock during expensive operations. New writes cannot get here // because the single write thread ensures all new writes get queued. DBOptions db_options = BuildDBOptions(immutable_db_options_, mutable_db_options_); mutex_.Unlock(); TEST_SYNC_POINT("DBImpl::WriteOptionsFile:1"); TEST_SYNC_POINT("DBImpl::WriteOptionsFile:2"); std::string file_name = TempOptionsFileName(GetName(), versions_->NewFileNumber()); Status s = PersistRocksDBOptions(db_options, cf_names, cf_opts, file_name, GetFileSystem()); if (s.ok()) { s = RenameTempFileToOptionsFile(file_name); } // restore lock if (!need_mutex_lock) { mutex_.Lock(); } if (need_enter_write_thread) { write_thread_.ExitUnbatched(&w); } if (!s.ok()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "Unnable to persist options -- %s", s.ToString().c_str()); if (immutable_db_options_.fail_if_options_file_error) { return Status::IOError("Unable to persist options.", s.ToString().c_str()); } } #else (void)need_mutex_lock; (void)need_enter_write_thread; #endif // !ROCKSDB_LITE return Status::OK(); } #ifndef ROCKSDB_LITE namespace { void DeleteOptionsFilesHelper(const std::map& filenames, const size_t num_files_to_keep, const std::shared_ptr& info_log, Env* env) { if (filenames.size() <= num_files_to_keep) { return; } for (auto iter = std::next(filenames.begin(), num_files_to_keep); iter != filenames.end(); ++iter) { if (!env->DeleteFile(iter->second).ok()) { ROCKS_LOG_WARN(info_log, "Unable to delete options file %s", iter->second.c_str()); } } } } // namespace #endif // !ROCKSDB_LITE Status DBImpl::DeleteObsoleteOptionsFiles() { #ifndef ROCKSDB_LITE std::vector filenames; // use ordered map to store keep the filenames sorted from the newest // to the oldest. std::map options_filenames; Status s; s = GetEnv()->GetChildren(GetName(), &filenames); if (!s.ok()) { return s; } for (auto& filename : filenames) { uint64_t file_number; FileType type; if (ParseFileName(filename, &file_number, &type) && type == kOptionsFile) { options_filenames.insert( {std::numeric_limits::max() - file_number, GetName() + "/" + filename}); } } // Keeps the latest 2 Options file const size_t kNumOptionsFilesKept = 2; DeleteOptionsFilesHelper(options_filenames, kNumOptionsFilesKept, immutable_db_options_.info_log, GetEnv()); return Status::OK(); #else return Status::OK(); #endif // !ROCKSDB_LITE } Status DBImpl::RenameTempFileToOptionsFile(const std::string& file_name) { #ifndef ROCKSDB_LITE Status s; uint64_t options_file_number = versions_->NewFileNumber(); std::string options_file_name = OptionsFileName(GetName(), options_file_number); // Retry if the file name happen to conflict with an existing one. s = GetEnv()->RenameFile(file_name, options_file_name); if (s.ok()) { InstrumentedMutexLock l(&mutex_); versions_->options_file_number_ = options_file_number; } if (0 == disable_delete_obsolete_files_) { DeleteObsoleteOptionsFiles(); } return s; #else (void)file_name; return Status::OK(); #endif // !ROCKSDB_LITE } #ifdef ROCKSDB_USING_THREAD_STATUS void DBImpl::NewThreadStatusCfInfo(ColumnFamilyData* cfd) const { if (immutable_db_options_.enable_thread_tracking) { ThreadStatusUtil::NewColumnFamilyInfo(this, cfd, cfd->GetName(), cfd->ioptions()->env); } } void DBImpl::EraseThreadStatusCfInfo(ColumnFamilyData* cfd) const { if (immutable_db_options_.enable_thread_tracking) { ThreadStatusUtil::EraseColumnFamilyInfo(cfd); } } void DBImpl::EraseThreadStatusDbInfo() const { if (immutable_db_options_.enable_thread_tracking) { ThreadStatusUtil::EraseDatabaseInfo(this); } } #else void DBImpl::NewThreadStatusCfInfo(ColumnFamilyData* /*cfd*/) const {} void DBImpl::EraseThreadStatusCfInfo(ColumnFamilyData* /*cfd*/) const {} void DBImpl::EraseThreadStatusDbInfo() const {} #endif // ROCKSDB_USING_THREAD_STATUS // // A global method that can dump out the build version void DumpRocksDBBuildVersion(Logger* log) { #if !defined(IOS_CROSS_COMPILE) // if we compile with Xcode, we don't run build_detect_version, so we don't // generate util/build_version.cc ROCKS_LOG_HEADER(log, "RocksDB version: %d.%d.%d\n", ROCKSDB_MAJOR, ROCKSDB_MINOR, ROCKSDB_PATCH); ROCKS_LOG_HEADER(log, "Git sha %s", rocksdb_build_git_sha); ROCKS_LOG_HEADER(log, "Compile date %s", rocksdb_build_compile_date); #else (void)log; // ignore "-Wunused-parameter" #endif } #ifndef ROCKSDB_LITE SequenceNumber DBImpl::GetEarliestMemTableSequenceNumber(SuperVersion* sv, bool include_history) { // Find the earliest sequence number that we know we can rely on reading // from the memtable without needing to check sst files. SequenceNumber earliest_seq = sv->imm->GetEarliestSequenceNumber(include_history); if (earliest_seq == kMaxSequenceNumber) { earliest_seq = sv->mem->GetEarliestSequenceNumber(); } assert(sv->mem->GetEarliestSequenceNumber() >= earliest_seq); return earliest_seq; } #endif // ROCKSDB_LITE #ifndef ROCKSDB_LITE Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key, bool cache_only, SequenceNumber lower_bound_seq, SequenceNumber* seq, bool* found_record_for_key, bool* is_blob_index) { Status s; MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; ReadOptions read_options; SequenceNumber current_seq = versions_->LastSequence(); LookupKey lkey(key, current_seq); *seq = kMaxSequenceNumber; *found_record_for_key = false; // Check if there is a record for this key in the latest memtable sv->mem->Get(lkey, nullptr, nullptr, &s, &merge_context, &max_covering_tombstone_seq, seq, read_options, nullptr /*read_callback*/, is_blob_index); if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) { // unexpected error reading memtable. ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Unexpected status returned from MemTable::Get: %s\n", s.ToString().c_str()); return s; } if (*seq != kMaxSequenceNumber) { // Found a sequence number, no need to check immutable memtables *found_record_for_key = true; return Status::OK(); } SequenceNumber lower_bound_in_mem = sv->mem->GetEarliestSequenceNumber(); if (lower_bound_in_mem != kMaxSequenceNumber && lower_bound_in_mem < lower_bound_seq) { *found_record_for_key = false; return Status::OK(); } // Check if there is a record for this key in the immutable memtables sv->imm->Get(lkey, nullptr, nullptr, &s, &merge_context, &max_covering_tombstone_seq, seq, read_options, nullptr /*read_callback*/, is_blob_index); if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) { // unexpected error reading memtable. ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Unexpected status returned from MemTableList::Get: %s\n", s.ToString().c_str()); return s; } if (*seq != kMaxSequenceNumber) { // Found a sequence number, no need to check memtable history *found_record_for_key = true; return Status::OK(); } SequenceNumber lower_bound_in_imm = sv->imm->GetEarliestSequenceNumber(); if (lower_bound_in_imm != kMaxSequenceNumber && lower_bound_in_imm < lower_bound_seq) { *found_record_for_key = false; return Status::OK(); } // Check if there is a record for this key in the immutable memtables sv->imm->GetFromHistory(lkey, nullptr, nullptr, &s, &merge_context, &max_covering_tombstone_seq, seq, read_options, is_blob_index); if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) { // unexpected error reading memtable. ROCKS_LOG_ERROR( immutable_db_options_.info_log, "Unexpected status returned from MemTableList::GetFromHistory: %s\n", s.ToString().c_str()); return s; } if (*seq != kMaxSequenceNumber) { // Found a sequence number, no need to check SST files *found_record_for_key = true; return Status::OK(); } // We could do a sv->imm->GetEarliestSequenceNumber(/*include_history*/ true) // check here to skip the history if possible. But currently the caller // already does that. Maybe we should move the logic here later. // TODO(agiardullo): possible optimization: consider checking cached // SST files if cache_only=true? if (!cache_only) { // Check tables sv->current->Get(read_options, lkey, nullptr, nullptr, &s, &merge_context, &max_covering_tombstone_seq, nullptr /* value_found */, found_record_for_key, seq, nullptr /*read_callback*/, is_blob_index); if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) { // unexpected error reading SST files ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Unexpected status returned from Version::Get: %s\n", s.ToString().c_str()); } } return s; } Status DBImpl::IngestExternalFile( ColumnFamilyHandle* column_family, const std::vector& external_files, const IngestExternalFileOptions& ingestion_options) { IngestExternalFileArg arg; arg.column_family = column_family; arg.external_files = external_files; arg.options = ingestion_options; return IngestExternalFiles({arg}); } Status DBImpl::IngestExternalFiles( const std::vector& args) { if (args.empty()) { return Status::InvalidArgument("ingestion arg list is empty"); } { std::unordered_set unique_cfhs; for (const auto& arg : args) { if (arg.column_family == nullptr) { return Status::InvalidArgument("column family handle is null"); } else if (unique_cfhs.count(arg.column_family) > 0) { return Status::InvalidArgument( "ingestion args have duplicate column families"); } unique_cfhs.insert(arg.column_family); } } // Ingest multiple external SST files atomically. size_t num_cfs = args.size(); for (size_t i = 0; i != num_cfs; ++i) { if (args[i].external_files.empty()) { char err_msg[128] = {0}; snprintf(err_msg, 128, "external_files[%zu] is empty", i); return Status::InvalidArgument(err_msg); } } for (const auto& arg : args) { const IngestExternalFileOptions& ingest_opts = arg.options; if (ingest_opts.ingest_behind && !immutable_db_options_.allow_ingest_behind) { return Status::InvalidArgument( "can't ingest_behind file in DB with allow_ingest_behind=false"); } } // TODO (yanqin) maybe handle the case in which column_families have // duplicates std::unique_ptr::iterator> pending_output_elem; size_t total = 0; for (const auto& arg : args) { total += arg.external_files.size(); } uint64_t next_file_number = 0; Status status = ReserveFileNumbersBeforeIngestion( static_cast(args[0].column_family)->cfd(), total, pending_output_elem, &next_file_number); if (!status.ok()) { InstrumentedMutexLock l(&mutex_); ReleaseFileNumberFromPendingOutputs(pending_output_elem); return status; } std::vector ingestion_jobs; for (const auto& arg : args) { auto* cfd = static_cast(arg.column_family)->cfd(); ingestion_jobs.emplace_back( env_, versions_.get(), cfd, immutable_db_options_, file_options_, &snapshots_, arg.options, &directories_, &event_logger_); } std::vector> exec_results; for (size_t i = 0; i != num_cfs; ++i) { exec_results.emplace_back(false, Status::OK()); } // TODO(yanqin) maybe make jobs run in parallel uint64_t start_file_number = next_file_number; for (size_t i = 1; i != num_cfs; ++i) { start_file_number += args[i - 1].external_files.size(); auto* cfd = static_cast(args[i].column_family)->cfd(); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); exec_results[i].second = ingestion_jobs[i].Prepare( args[i].external_files, args[i].files_checksums, args[i].files_checksum_func_names, start_file_number, super_version); exec_results[i].first = true; CleanupSuperVersion(super_version); } TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeLastJobPrepare:0"); TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeLastJobPrepare:1"); { auto* cfd = static_cast(args[0].column_family)->cfd(); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); exec_results[0].second = ingestion_jobs[0].Prepare( args[0].external_files, args[0].files_checksums, args[0].files_checksum_func_names, next_file_number, super_version); exec_results[0].first = true; CleanupSuperVersion(super_version); } for (const auto& exec_result : exec_results) { if (!exec_result.second.ok()) { status = exec_result.second; break; } } if (!status.ok()) { for (size_t i = 0; i != num_cfs; ++i) { if (exec_results[i].first) { ingestion_jobs[i].Cleanup(status); } } InstrumentedMutexLock l(&mutex_); ReleaseFileNumberFromPendingOutputs(pending_output_elem); return status; } std::vector sv_ctxs; for (size_t i = 0; i != num_cfs; ++i) { sv_ctxs.emplace_back(true /* create_superversion */); } TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeJobsRun:0"); TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeJobsRun:1"); TEST_SYNC_POINT("DBImpl::AddFile:Start"); { InstrumentedMutexLock l(&mutex_); TEST_SYNC_POINT("DBImpl::AddFile:MutexLock"); // Stop writes to the DB by entering both write threads WriteThread::Writer w; write_thread_.EnterUnbatched(&w, &mutex_); WriteThread::Writer nonmem_w; if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } // When unordered_write is enabled, the keys are writing to memtable in an // unordered way. If the ingestion job checks memtable key range before the // key landing in memtable, the ingestion job may skip the necessary // memtable flush. // So wait here to ensure there is no pending write to memtable. WaitForPendingWrites(); num_running_ingest_file_ += static_cast(num_cfs); TEST_SYNC_POINT("DBImpl::IngestExternalFile:AfterIncIngestFileCounter"); bool at_least_one_cf_need_flush = false; std::vector need_flush(num_cfs, false); for (size_t i = 0; i != num_cfs; ++i) { auto* cfd = static_cast(args[i].column_family)->cfd(); if (cfd->IsDropped()) { // TODO (yanqin) investigate whether we should abort ingestion or // proceed with other non-dropped column families. status = Status::InvalidArgument( "cannot ingest an external file into a dropped CF"); break; } bool tmp = false; status = ingestion_jobs[i].NeedsFlush(&tmp, cfd->GetSuperVersion()); need_flush[i] = tmp; at_least_one_cf_need_flush = (at_least_one_cf_need_flush || tmp); if (!status.ok()) { break; } } TEST_SYNC_POINT_CALLBACK("DBImpl::IngestExternalFile:NeedFlush", &at_least_one_cf_need_flush); if (status.ok() && at_least_one_cf_need_flush) { FlushOptions flush_opts; flush_opts.allow_write_stall = true; if (immutable_db_options_.atomic_flush) { autovector cfds_to_flush; SelectColumnFamiliesForAtomicFlush(&cfds_to_flush); mutex_.Unlock(); status = AtomicFlushMemTables(cfds_to_flush, flush_opts, FlushReason::kExternalFileIngestion, true /* writes_stopped */); mutex_.Lock(); } else { for (size_t i = 0; i != num_cfs; ++i) { if (need_flush[i]) { mutex_.Unlock(); auto* cfd = static_cast(args[i].column_family) ->cfd(); status = FlushMemTable(cfd, flush_opts, FlushReason::kExternalFileIngestion, true /* writes_stopped */); mutex_.Lock(); if (!status.ok()) { break; } } } } } // Run ingestion jobs. if (status.ok()) { for (size_t i = 0; i != num_cfs; ++i) { status = ingestion_jobs[i].Run(); if (!status.ok()) { break; } } } if (status.ok()) { int consumed_seqno_count = ingestion_jobs[0].ConsumedSequenceNumbersCount(); #ifndef NDEBUG for (size_t i = 1; i != num_cfs; ++i) { assert(!!consumed_seqno_count == !!ingestion_jobs[i].ConsumedSequenceNumbersCount()); consumed_seqno_count += ingestion_jobs[i].ConsumedSequenceNumbersCount(); } #endif if (consumed_seqno_count > 0) { const SequenceNumber last_seqno = versions_->LastSequence(); versions_->SetLastAllocatedSequence(last_seqno + consumed_seqno_count); versions_->SetLastPublishedSequence(last_seqno + consumed_seqno_count); versions_->SetLastSequence(last_seqno + consumed_seqno_count); } autovector cfds_to_commit; autovector mutable_cf_options_list; autovector> edit_lists; uint32_t num_entries = 0; for (size_t i = 0; i != num_cfs; ++i) { auto* cfd = static_cast(args[i].column_family)->cfd(); if (cfd->IsDropped()) { continue; } cfds_to_commit.push_back(cfd); mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions()); autovector edit_list; edit_list.push_back(ingestion_jobs[i].edit()); edit_lists.push_back(edit_list); ++num_entries; } // Mark the version edits as an atomic group if the number of version // edits exceeds 1. if (cfds_to_commit.size() > 1) { for (auto& edits : edit_lists) { assert(edits.size() == 1); edits[0]->MarkAtomicGroup(--num_entries); } assert(0 == num_entries); } status = versions_->LogAndApply(cfds_to_commit, mutable_cf_options_list, edit_lists, &mutex_, directories_.GetDbDir()); } if (status.ok()) { for (size_t i = 0; i != num_cfs; ++i) { auto* cfd = static_cast(args[i].column_family)->cfd(); if (!cfd->IsDropped()) { InstallSuperVersionAndScheduleWork(cfd, &sv_ctxs[i], *cfd->GetLatestMutableCFOptions()); #ifndef NDEBUG if (0 == i && num_cfs > 1) { TEST_SYNC_POINT( "DBImpl::IngestExternalFiles:InstallSVForFirstCF:0"); TEST_SYNC_POINT( "DBImpl::IngestExternalFiles:InstallSVForFirstCF:1"); } #endif // !NDEBUG } } } else if (versions_->io_status().IsIOError()) { // Error while writing to MANIFEST. // In fact, versions_->io_status() can also be the result of renaming // CURRENT file. With current code, it's just difficult to tell. So just // be pessimistic and try write to a new MANIFEST. // TODO: distinguish between MANIFEST write and CURRENT renaming const IOStatus& io_s = versions_->io_status(); error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite); } // Resume writes to the DB if (two_write_queues_) { nonmem_write_thread_.ExitUnbatched(&nonmem_w); } write_thread_.ExitUnbatched(&w); if (status.ok()) { for (auto& job : ingestion_jobs) { job.UpdateStats(); } } ReleaseFileNumberFromPendingOutputs(pending_output_elem); num_running_ingest_file_ -= static_cast(num_cfs); if (0 == num_running_ingest_file_) { bg_cv_.SignalAll(); } TEST_SYNC_POINT("DBImpl::AddFile:MutexUnlock"); } // mutex_ is unlocked here // Cleanup for (size_t i = 0; i != num_cfs; ++i) { sv_ctxs[i].Clean(); // This may rollback jobs that have completed successfully. This is // intended for atomicity. ingestion_jobs[i].Cleanup(status); } if (status.ok()) { for (size_t i = 0; i != num_cfs; ++i) { auto* cfd = static_cast(args[i].column_family)->cfd(); if (!cfd->IsDropped()) { NotifyOnExternalFileIngested(cfd, ingestion_jobs[i]); } } } return status; } Status DBImpl::CreateColumnFamilyWithImport( const ColumnFamilyOptions& options, const std::string& column_family_name, const ImportColumnFamilyOptions& import_options, const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) { assert(handle != nullptr); assert(*handle == nullptr); std::string cf_comparator_name = options.comparator->Name(); if (cf_comparator_name != metadata.db_comparator_name) { return Status::InvalidArgument("Comparator name mismatch"); } // Create column family. auto status = CreateColumnFamily(options, column_family_name, handle); if (!status.ok()) { return status; } // Import sst files from metadata. auto cfh = reinterpret_cast(*handle); auto cfd = cfh->cfd(); ImportColumnFamilyJob import_job(env_, versions_.get(), cfd, immutable_db_options_, file_options_, import_options, metadata.files); SuperVersionContext dummy_sv_ctx(/* create_superversion */ true); VersionEdit dummy_edit; uint64_t next_file_number = 0; std::unique_ptr::iterator> pending_output_elem; { // Lock db mutex InstrumentedMutexLock l(&mutex_); if (error_handler_.IsDBStopped()) { // Don't import files when there is a bg_error status = error_handler_.GetBGError(); } // Make sure that bg cleanup wont delete the files that we are importing pending_output_elem.reset(new std::list::iterator( CaptureCurrentFileNumberInPendingOutputs())); if (status.ok()) { // If crash happen after a hard link established, Recover function may // reuse the file number that has already assigned to the internal file, // and this will overwrite the external file. To protect the external // file, we have to make sure the file number will never being reused. next_file_number = versions_->FetchAddFileNumber(metadata.files.size()); auto cf_options = cfd->GetLatestMutableCFOptions(); status = versions_->LogAndApply(cfd, *cf_options, &dummy_edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options); } } } dummy_sv_ctx.Clean(); if (status.ok()) { SuperVersion* sv = cfd->GetReferencedSuperVersion(this); status = import_job.Prepare(next_file_number, sv); CleanupSuperVersion(sv); } if (status.ok()) { SuperVersionContext sv_context(true /*create_superversion*/); { // Lock db mutex InstrumentedMutexLock l(&mutex_); // Stop writes to the DB by entering both write threads WriteThread::Writer w; write_thread_.EnterUnbatched(&w, &mutex_); WriteThread::Writer nonmem_w; if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } num_running_ingest_file_++; assert(!cfd->IsDropped()); status = import_job.Run(); // Install job edit [Mutex will be unlocked here] if (status.ok()) { auto cf_options = cfd->GetLatestMutableCFOptions(); status = versions_->LogAndApply(cfd, *cf_options, import_job.edit(), &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, &sv_context, *cf_options); } } // Resume writes to the DB if (two_write_queues_) { nonmem_write_thread_.ExitUnbatched(&nonmem_w); } write_thread_.ExitUnbatched(&w); num_running_ingest_file_--; if (num_running_ingest_file_ == 0) { bg_cv_.SignalAll(); } } // mutex_ is unlocked here sv_context.Clean(); } { InstrumentedMutexLock l(&mutex_); ReleaseFileNumberFromPendingOutputs(pending_output_elem); } import_job.Cleanup(status); if (!status.ok()) { DropColumnFamily(*handle); DestroyColumnFamilyHandle(*handle); *handle = nullptr; } return status; } Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { Status s; std::vector cfd_list; { InstrumentedMutexLock l(&mutex_); for (auto cfd : *versions_->GetColumnFamilySet()) { if (!cfd->IsDropped() && cfd->initialized()) { cfd->Ref(); cfd_list.push_back(cfd); } } } std::vector sv_list; for (auto cfd : cfd_list) { sv_list.push_back(cfd->GetReferencedSuperVersion(this)); } for (auto& sv : sv_list) { VersionStorageInfo* vstorage = sv->current->storage_info(); ColumnFamilyData* cfd = sv->current->cfd(); Options opts; { InstrumentedMutexLock l(&mutex_); opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_), cfd->GetLatestCFOptions()); } for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) { for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok(); j++) { const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd; std::string fname = TableFileName(cfd->ioptions()->cf_paths, fd.GetNumber(), fd.GetPathId()); s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, read_options, fname); } } if (!s.ok()) { break; } } bool defer_purge = immutable_db_options().avoid_unnecessary_blocking_io; { InstrumentedMutexLock l(&mutex_); for (auto sv : sv_list) { if (sv && sv->Unref()) { sv->Cleanup(); if (defer_purge) { AddSuperVersionsToFreeQueue(sv); } else { delete sv; } } } if (defer_purge) { SchedulePurge(); } for (auto cfd : cfd_list) { cfd->UnrefAndTryDelete(); } } return s; } void DBImpl::NotifyOnExternalFileIngested( ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job) { if (immutable_db_options_.listeners.empty()) { return; } for (const IngestedFileInfo& f : ingestion_job.files_to_ingest()) { ExternalFileIngestionInfo info; info.cf_name = cfd->GetName(); info.external_file_path = f.external_file_path; info.internal_file_path = f.internal_file_path; info.global_seqno = f.assigned_seqno; info.table_properties = f.table_properties; for (auto listener : immutable_db_options_.listeners) { listener->OnExternalFileIngested(this, info); } } } void DBImpl::WaitForIngestFile() { mutex_.AssertHeld(); while (num_running_ingest_file_ > 0) { bg_cv_.Wait(); } } Status DBImpl::StartTrace(const TraceOptions& trace_options, std::unique_ptr&& trace_writer) { InstrumentedMutexLock lock(&trace_mutex_); tracer_.reset(new Tracer(env_, trace_options, std::move(trace_writer))); return Status::OK(); } Status DBImpl::EndTrace() { InstrumentedMutexLock lock(&trace_mutex_); Status s; if (tracer_ != nullptr) { s = tracer_->Close(); tracer_.reset(); } else { return Status::IOError("No trace file to close"); } return s; } Status DBImpl::StartBlockCacheTrace( const TraceOptions& trace_options, std::unique_ptr&& trace_writer) { return block_cache_tracer_.StartTrace(env_, trace_options, std::move(trace_writer)); } Status DBImpl::EndBlockCacheTrace() { block_cache_tracer_.EndTrace(); return Status::OK(); } Status DBImpl::TraceIteratorSeek(const uint32_t& cf_id, const Slice& key) { Status s; if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { s = tracer_->IteratorSeek(cf_id, key); } } return s; } Status DBImpl::TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key) { Status s; if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { s = tracer_->IteratorSeekForPrev(cf_id, key); } } return s; } Status DBImpl::ReserveFileNumbersBeforeIngestion( ColumnFamilyData* cfd, uint64_t num, std::unique_ptr::iterator>& pending_output_elem, uint64_t* next_file_number) { Status s; SuperVersionContext dummy_sv_ctx(true /* create_superversion */); assert(nullptr != next_file_number); InstrumentedMutexLock l(&mutex_); if (error_handler_.IsDBStopped()) { // Do not ingest files when there is a bg_error return error_handler_.GetBGError(); } pending_output_elem.reset(new std::list::iterator( CaptureCurrentFileNumberInPendingOutputs())); *next_file_number = versions_->FetchAddFileNumber(static_cast(num)); auto cf_options = cfd->GetLatestMutableCFOptions(); VersionEdit dummy_edit; // If crash happen after a hard link established, Recover function may // reuse the file number that has already assigned to the internal file, // and this will overwrite the external file. To protect the external // file, we have to make sure the file number will never being reused. s = versions_->LogAndApply(cfd, *cf_options, &dummy_edit, &mutex_, directories_.GetDbDir()); if (s.ok()) { InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options); } dummy_sv_ctx.Clean(); return s; } Status DBImpl::GetCreationTimeOfOldestFile(uint64_t* creation_time) { if (mutable_db_options_.max_open_files == -1) { uint64_t oldest_time = port::kMaxUint64; for (auto cfd : *versions_->GetColumnFamilySet()) { if (!cfd->IsDropped()) { uint64_t ctime; { SuperVersion* sv = GetAndRefSuperVersion(cfd); Version* version = sv->current; version->GetCreationTimeOfOldestFile(&ctime); ReturnAndCleanupSuperVersion(cfd, sv); } if (ctime < oldest_time) { oldest_time = ctime; } if (oldest_time == 0) { break; } } } *creation_time = oldest_time; return Status::OK(); } else { return Status::NotSupported("This API only works if max_open_files = -1"); } } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl.h000066400000000000000000002654621370372246700170360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include #include #include #include "db/column_family.h" #include "db/compaction/compaction_job.h" #include "db/dbformat.h" #include "db/error_handler.h" #include "db/event_helpers.h" #include "db/external_sst_file_ingestion_job.h" #include "db/flush_job.h" #include "db/flush_scheduler.h" #include "db/import_column_family_job.h" #include "db/internal_stats.h" #include "db/log_writer.h" #include "db/logs_with_prep_tracker.h" #include "db/memtable_list.h" #include "db/pre_release_callback.h" #include "db/range_del_aggregator.h" #include "db/read_callback.h" #include "db/snapshot_checker.h" #include "db/snapshot_impl.h" #include "db/trim_history_scheduler.h" #include "db/version_edit.h" #include "db/wal_manager.h" #include "db/write_controller.h" #include "db/write_thread.h" #include "logging/event_logger.h" #include "monitoring/instrumented_mutex.h" #include "options/db_options.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/status.h" #include "rocksdb/trace_reader_writer.h" #include "rocksdb/transaction_log.h" #include "rocksdb/write_buffer_manager.h" #include "table/scoped_arena_iterator.h" #include "trace_replay/block_cache_tracer.h" #include "trace_replay/trace_replay.h" #include "util/autovector.h" #include "util/hash.h" #include "util/repeatable_thread.h" #include "util/stop_watch.h" #include "util/thread_local.h" namespace ROCKSDB_NAMESPACE { class Arena; class ArenaWrappedDBIter; class InMemoryStatsHistoryIterator; class MemTable; class PersistentStatsHistoryIterator; class TableCache; class TaskLimiterToken; class Version; class VersionEdit; class VersionSet; class WriteCallback; struct JobContext; struct ExternalSstFileInfo; struct MemTableInfo; // Class to maintain directories for all database paths other than main one. class Directories { public: IOStatus SetDirectories(FileSystem* fs, const std::string& dbname, const std::string& wal_dir, const std::vector& data_paths); FSDirectory* GetDataDir(size_t path_id) const { assert(path_id < data_dirs_.size()); FSDirectory* ret_dir = data_dirs_[path_id].get(); if (ret_dir == nullptr) { // Should use db_dir_ return db_dir_.get(); } return ret_dir; } FSDirectory* GetWalDir() { if (wal_dir_) { return wal_dir_.get(); } return db_dir_.get(); } FSDirectory* GetDbDir() { return db_dir_.get(); } private: std::unique_ptr db_dir_; std::vector> data_dirs_; std::unique_ptr wal_dir_; }; // While DB is the public interface of RocksDB, and DBImpl is the actual // class implementing it. It's the entrance of the core RocksdB engine. // All other DB implementations, e.g. TransactionDB, BlobDB, etc, wrap a // DBImpl internally. // Other than functions implementing the DB interface, some public // functions are there for other internal components to call. For // example, TransactionDB directly calls DBImpl::WriteImpl() and // BlobDB directly calls DBImpl::GetImpl(). Some other functions // are for sub-components to call. For example, ColumnFamilyHandleImpl // calls DBImpl::FindObsoleteFiles(). // // Since it's a very large class, the definition of the functions is // divided in several db_impl_*.cc files, besides db_impl.cc. class DBImpl : public DB { public: DBImpl(const DBOptions& options, const std::string& dbname, const bool seq_per_batch = false, const bool batch_per_txn = true); // No copying allowed DBImpl(const DBImpl&) = delete; void operator=(const DBImpl&) = delete; virtual ~DBImpl(); // ---- Implementations of the DB interface ---- using DB::Resume; virtual Status Resume() override; using DB::Put; virtual Status Put(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) override; using DB::Merge; virtual Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) override; using DB::Delete; virtual Status Delete(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key) override; using DB::SingleDelete; virtual Status SingleDelete(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key) override; using DB::Write; virtual Status Write(const WriteOptions& options, WriteBatch* updates) override; using DB::Get; virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, std::string* timestamp) override; using DB::GetMergeOperands; Status GetMergeOperands(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* merge_operands, GetMergeOperandsOptions* get_merge_operands_options, int* number_of_operands) override { GetImplOptions get_impl_options; get_impl_options.column_family = column_family; get_impl_options.merge_operands = merge_operands; get_impl_options.get_merge_operands_options = get_merge_operands_options; get_impl_options.number_of_operands = number_of_operands; get_impl_options.get_value = false; return GetImpl(options, key, get_impl_options); } using DB::MultiGet; virtual std::vector MultiGet( const ReadOptions& options, const std::vector& column_family, const std::vector& keys, std::vector* values) override; virtual std::vector MultiGet( const ReadOptions& options, const std::vector& column_family, const std::vector& keys, std::vector* values, std::vector* timestamps) override; // This MultiGet is a batched version, which may be faster than calling Get // multiple times, especially if the keys have some spatial locality that // enables them to be queried in the same SST files/set of files. The larger // the batch size, the more scope for batching and performance improvement // The values and statuses parameters are arrays with number of elements // equal to keys.size(). This allows the storage for those to be alloacted // by the caller on the stack for small batches virtual void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input = false) override; virtual void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input = false) override; virtual void MultiGet(const ReadOptions& options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input = false) override; virtual void MultiGet(const ReadOptions& options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input = false) override; virtual void MultiGetWithCallback( const ReadOptions& options, ColumnFamilyHandle* column_family, ReadCallback* callback, autovector* sorted_keys); virtual Status CreateColumnFamily(const ColumnFamilyOptions& cf_options, const std::string& column_family, ColumnFamilyHandle** handle) override; virtual Status CreateColumnFamilies( const ColumnFamilyOptions& cf_options, const std::vector& column_family_names, std::vector* handles) override; virtual Status CreateColumnFamilies( const std::vector& column_families, std::vector* handles) override; virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override; virtual Status DropColumnFamilies( const std::vector& column_families) override; // Returns false if key doesn't exist in the database and true if it may. // If value_found is not passed in as null, then return the value if found in // memory. On return, if value was found, then value_found will be set to true // , otherwise false. using DB::KeyMayExist; virtual bool KeyMayExist(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value, std::string* timestamp, bool* value_found = nullptr) override; using DB::NewIterator; virtual Iterator* NewIterator(const ReadOptions& options, ColumnFamilyHandle* column_family) override; virtual Status NewIterators( const ReadOptions& options, const std::vector& column_families, std::vector* iterators) override; virtual const Snapshot* GetSnapshot() override; virtual void ReleaseSnapshot(const Snapshot* snapshot) override; using DB::GetProperty; virtual bool GetProperty(ColumnFamilyHandle* column_family, const Slice& property, std::string* value) override; using DB::GetMapProperty; virtual bool GetMapProperty( ColumnFamilyHandle* column_family, const Slice& property, std::map* value) override; using DB::GetIntProperty; virtual bool GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) override; using DB::GetAggregatedIntProperty; virtual bool GetAggregatedIntProperty(const Slice& property, uint64_t* aggregated_value) override; using DB::GetApproximateSizes; virtual Status GetApproximateSizes(const SizeApproximationOptions& options, ColumnFamilyHandle* column_family, const Range* range, int n, uint64_t* sizes) override; using DB::GetApproximateMemTableStats; virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family, const Range& range, uint64_t* const count, uint64_t* const size) override; using DB::CompactRange; virtual Status CompactRange(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) override; using DB::CompactFiles; virtual Status CompactFiles( const CompactionOptions& compact_options, ColumnFamilyHandle* column_family, const std::vector& input_file_names, const int output_level, const int output_path_id = -1, std::vector* const output_file_names = nullptr, CompactionJobInfo* compaction_job_info = nullptr) override; virtual Status PauseBackgroundWork() override; virtual Status ContinueBackgroundWork() override; virtual Status EnableAutoCompaction( const std::vector& column_family_handles) override; virtual void EnableManualCompaction() override; virtual void DisableManualCompaction() override; using DB::SetOptions; Status SetOptions( ColumnFamilyHandle* column_family, const std::unordered_map& options_map) override; virtual Status SetDBOptions( const std::unordered_map& options_map) override; using DB::NumberLevels; virtual int NumberLevels(ColumnFamilyHandle* column_family) override; using DB::MaxMemCompactionLevel; virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) override; using DB::Level0StopWriteTrigger; virtual int Level0StopWriteTrigger( ColumnFamilyHandle* column_family) override; virtual const std::string& GetName() const override; virtual Env* GetEnv() const override; virtual FileSystem* GetFileSystem() const override; using DB::GetOptions; virtual Options GetOptions(ColumnFamilyHandle* column_family) const override; using DB::GetDBOptions; virtual DBOptions GetDBOptions() const override; using DB::Flush; virtual Status Flush(const FlushOptions& options, ColumnFamilyHandle* column_family) override; virtual Status Flush( const FlushOptions& options, const std::vector& column_families) override; virtual Status FlushWAL(bool sync) override; bool TEST_WALBufferIsEmpty(bool lock = true); virtual Status SyncWAL() override; virtual Status LockWAL() override; virtual Status UnlockWAL() override; virtual SequenceNumber GetLatestSequenceNumber() const override; virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) override; virtual Status GetDbIdentity(std::string& identity) const override; virtual Status GetDbIdentityFromIdentityFile(std::string* identity) const; ColumnFamilyHandle* DefaultColumnFamily() const override; ColumnFamilyHandle* PersistentStatsColumnFamily() const; virtual Status Close() override; virtual Status DisableFileDeletions() override; virtual Status EnableFileDeletions(bool force) override; virtual bool IsFileDeletionsEnabled() const; Status GetStatsHistory( uint64_t start_time, uint64_t end_time, std::unique_ptr* stats_iterator) override; #ifndef ROCKSDB_LITE using DB::ResetStats; virtual Status ResetStats() override; // All the returned filenames start with "/" virtual Status GetLiveFiles(std::vector&, uint64_t* manifest_file_size, bool flush_memtable = true) override; virtual Status GetSortedWalFiles(VectorLogPtr& files) override; virtual Status GetCurrentWalFile( std::unique_ptr* current_log_file) override; virtual Status GetCreationTimeOfOldestFile( uint64_t* creation_time) override; virtual Status GetUpdatesSince( SequenceNumber seq_number, std::unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options = TransactionLogIterator::ReadOptions()) override; virtual Status DeleteFile(std::string name) override; Status DeleteFilesInRanges(ColumnFamilyHandle* column_family, const RangePtr* ranges, size_t n, bool include_end = true); virtual void GetLiveFilesMetaData( std::vector* metadata) override; // Obtains the meta data of the specified column family of the DB. // Status::NotFound() will be returned if the current DB does not have // any column family match the specified name. // TODO(yhchiang): output parameter is placed in the end in this codebase. virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ColumnFamilyMetaData* metadata) override; Status SuggestCompactRange(ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) override; Status PromoteL0(ColumnFamilyHandle* column_family, int target_level) override; using DB::IngestExternalFile; virtual Status IngestExternalFile( ColumnFamilyHandle* column_family, const std::vector& external_files, const IngestExternalFileOptions& ingestion_options) override; using DB::IngestExternalFiles; virtual Status IngestExternalFiles( const std::vector& args) override; using DB::CreateColumnFamilyWithImport; virtual Status CreateColumnFamilyWithImport( const ColumnFamilyOptions& options, const std::string& column_family_name, const ImportColumnFamilyOptions& import_options, const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) override; using DB::VerifyChecksum; virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override; using DB::StartTrace; virtual Status StartTrace( const TraceOptions& options, std::unique_ptr&& trace_writer) override; using DB::EndTrace; virtual Status EndTrace() override; using DB::StartBlockCacheTrace; Status StartBlockCacheTrace( const TraceOptions& options, std::unique_ptr&& trace_writer) override; using DB::EndBlockCacheTrace; Status EndBlockCacheTrace() override; using DB::GetPropertiesOfAllTables; virtual Status GetPropertiesOfAllTables( ColumnFamilyHandle* column_family, TablePropertiesCollection* props) override; virtual Status GetPropertiesOfTablesInRange( ColumnFamilyHandle* column_family, const Range* range, std::size_t n, TablePropertiesCollection* props) override; #endif // ROCKSDB_LITE // ---- End of implementations of the DB interface ---- struct GetImplOptions { ColumnFamilyHandle* column_family = nullptr; PinnableSlice* value = nullptr; std::string* timestamp = nullptr; bool* value_found = nullptr; ReadCallback* callback = nullptr; bool* is_blob_index = nullptr; // If true return value associated with key via value pointer else return // all merge operands for key via merge_operands pointer bool get_value = true; // Pointer to an array of size // get_merge_operands_options.expected_max_number_of_operands allocated by // user PinnableSlice* merge_operands = nullptr; GetMergeOperandsOptions* get_merge_operands_options = nullptr; int* number_of_operands = nullptr; }; // Function that Get and KeyMayExist call with no_io true or false // Note: 'value_found' from KeyMayExist propagates here // This function is also called by GetMergeOperands // If get_impl_options.get_value = true get value associated with // get_impl_options.key via get_impl_options.value // If get_impl_options.get_value = false get merge operands associated with // get_impl_options.key via get_impl_options.merge_operands Status GetImpl(const ReadOptions& options, const Slice& key, GetImplOptions& get_impl_options); // If `snapshot` == kMaxSequenceNumber, set a recent one inside the file. ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& options, ColumnFamilyData* cfd, SequenceNumber snapshot, ReadCallback* read_callback, bool allow_blob = false, bool allow_refresh = true); virtual SequenceNumber GetLastPublishedSequence() const { if (last_seq_same_as_publish_seq_) { return versions_->LastSequence(); } else { return versions_->LastPublishedSequence(); } } // REQUIRES: joined the main write queue if two_write_queues is disabled, and // the second write queue otherwise. virtual void SetLastPublishedSequence(SequenceNumber seq); // Returns LastSequence in last_seq_same_as_publish_seq_ // mode and LastAllocatedSequence otherwise. This is useful when visiblility // depends also on data written to the WAL but not to the memtable. SequenceNumber TEST_GetLastVisibleSequence() const; #ifndef ROCKSDB_LITE // Similar to Write() but will call the callback once on the single write // thread to determine whether it is safe to perform the write. virtual Status WriteWithCallback(const WriteOptions& write_options, WriteBatch* my_batch, WriteCallback* callback); // Returns the sequence number that is guaranteed to be smaller than or equal // to the sequence number of any key that could be inserted into the current // memtables. It can then be assumed that any write with a larger(or equal) // sequence number will be present in this memtable or a later memtable. // // If the earliest sequence number could not be determined, // kMaxSequenceNumber will be returned. // // If include_history=true, will also search Memtables in MemTableList // History. SequenceNumber GetEarliestMemTableSequenceNumber(SuperVersion* sv, bool include_history); // For a given key, check to see if there are any records for this key // in the memtables, including memtable history. If cache_only is false, // SST files will also be checked. // // If a key is found, *found_record_for_key will be set to true and // *seq will be set to the stored sequence number for the latest // operation on this key or kMaxSequenceNumber if unknown. // If no key is found, *found_record_for_key will be set to false. // // Note: If cache_only=false, it is possible for *seq to be set to 0 if // the sequence number has been cleared from the record. If the caller is // holding an active db snapshot, we know the missing sequence must be less // than the snapshot's sequence number (sequence numbers are only cleared // when there are no earlier active snapshots). // // If NotFound is returned and found_record_for_key is set to false, then no // record for this key was found. If the caller is holding an active db // snapshot, we know that no key could have existing after this snapshot // (since we do not compact keys that have an earlier snapshot). // // Only records newer than or at `lower_bound_seq` are guaranteed to be // returned. Memtables and files may not be checked if it only contains data // older than `lower_bound_seq`. // // Returns OK or NotFound on success, // other status on unexpected error. // TODO(andrewkr): this API need to be aware of range deletion operations Status GetLatestSequenceForKey(SuperVersion* sv, const Slice& key, bool cache_only, SequenceNumber lower_bound_seq, SequenceNumber* seq, bool* found_record_for_key, bool* is_blob_index = nullptr); Status TraceIteratorSeek(const uint32_t& cf_id, const Slice& key); Status TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key); #endif // ROCKSDB_LITE // Similar to GetSnapshot(), but also lets the db know that this snapshot // will be used for transaction write-conflict checking. The DB can then // make sure not to compact any keys that would prevent a write-conflict from // being detected. const Snapshot* GetSnapshotForWriteConflictBoundary(); // checks if all live files exist on file system and that their file sizes // match to our in-memory records virtual Status CheckConsistency(); // max_file_num_to_ignore allows bottom level compaction to filter out newly // compacted SST files. Setting max_file_num_to_ignore to kMaxUint64 will // disable the filtering Status RunManualCompaction(ColumnFamilyData* cfd, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const Slice* begin, const Slice* end, bool exclusive, bool disallow_trivial_move, uint64_t max_file_num_to_ignore); // Return an internal iterator over the current state of the database. // The keys of this iterator are internal keys (see format.h). // The returned iterator should be deleted when no longer needed. // If allow_unprepared_value is true, the returned iterator may defer reading // the value and so will require PrepareValue() to be called before value(); // allow_unprepared_value = false is convenient when this optimization is not // useful, e.g. when reading the whole column family. InternalIterator* NewInternalIterator( Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence, ColumnFamilyHandle* column_family = nullptr, bool allow_unprepared_value = false); LogsWithPrepTracker* logs_with_prep_tracker() { return &logs_with_prep_tracker_; } struct BGJobLimits { int max_flushes; int max_compactions; }; // Returns maximum background flushes and compactions allowed to be scheduled BGJobLimits GetBGJobLimits() const; // Need a static version that can be called during SanitizeOptions(). static BGJobLimits GetBGJobLimits(int max_background_flushes, int max_background_compactions, int max_background_jobs, bool parallelize_compactions); // move logs pending closing from job_context to the DB queue and // schedule a purge void ScheduleBgLogWriterClose(JobContext* job_context); uint64_t MinLogNumberToKeep(); // Returns the lower bound file number for SSTs that won't be deleted, even if // they're obsolete. This lower bound is used internally to prevent newly // created flush/compaction output files from being deleted before they're // installed. This technique avoids the need for tracking the exact numbers of // files pending creation, although it prevents more files than necessary from // being deleted. uint64_t MinObsoleteSstNumberToKeep(); // Returns the list of live files in 'live' and the list // of all files in the filesystem in 'candidate_files'. // If force == false and the last call was less than // db_options_.delete_obsolete_files_period_micros microseconds ago, // it will not fill up the job_context void FindObsoleteFiles(JobContext* job_context, bool force, bool no_full_scan = false); // Diffs the files listed in filenames and those that do not // belong to live files are possibly removed. Also, removes all the // files in sst_delete_files and log_delete_files. // It is not necessary to hold the mutex when invoking this method. // If FindObsoleteFiles() was run, we need to also run // PurgeObsoleteFiles(), even if disable_delete_obsolete_files_ is true void PurgeObsoleteFiles(JobContext& background_contet, bool schedule_only = false); // Schedule a background job to actually delete obsolete files. void SchedulePurge(); const SnapshotList& snapshots() const { return snapshots_; } // load list of snapshots to `snap_vector` that is no newer than `max_seq` // in ascending order. // `oldest_write_conflict_snapshot` is filled with the oldest snapshot // which satisfies SnapshotImpl.is_write_conflict_boundary_ = true. void LoadSnapshots(std::vector* snap_vector, SequenceNumber* oldest_write_conflict_snapshot, const SequenceNumber& max_seq) const { InstrumentedMutexLock l(mutex()); snapshots().GetAll(snap_vector, oldest_write_conflict_snapshot, max_seq); } const ImmutableDBOptions& immutable_db_options() const { return immutable_db_options_; } // Cancel all background jobs, including flush, compaction, background // purging, stats dumping threads, etc. If `wait` = true, wait for the // running jobs to abort or finish before returning. Otherwise, only // sends the signals. void CancelAllBackgroundWork(bool wait); // Find Super version and reference it. Based on options, it might return // the thread local cached one. // Call ReturnAndCleanupSuperVersion() when it is no longer needed. SuperVersion* GetAndRefSuperVersion(ColumnFamilyData* cfd); // Similar to the previous function but looks up based on a column family id. // nullptr will be returned if this column family no longer exists. // REQUIRED: this function should only be called on the write thread or if the // mutex is held. SuperVersion* GetAndRefSuperVersion(uint32_t column_family_id); // Un-reference the super version and clean it up if it is the last reference. void CleanupSuperVersion(SuperVersion* sv); // Un-reference the super version and return it to thread local cache if // needed. If it is the last reference of the super version. Clean it up // after un-referencing it. void ReturnAndCleanupSuperVersion(ColumnFamilyData* cfd, SuperVersion* sv); // Similar to the previous function but looks up based on a column family id. // nullptr will be returned if this column family no longer exists. // REQUIRED: this function should only be called on the write thread. void ReturnAndCleanupSuperVersion(uint32_t colun_family_id, SuperVersion* sv); // REQUIRED: this function should only be called on the write thread or if the // mutex is held. Return value only valid until next call to this function or // mutex is released. ColumnFamilyHandle* GetColumnFamilyHandle(uint32_t column_family_id); // Same as above, should called without mutex held and not on write thread. std::unique_ptr GetColumnFamilyHandleUnlocked( uint32_t column_family_id); // Returns the number of currently running flushes. // REQUIREMENT: mutex_ must be held when calling this function. int num_running_flushes() { mutex_.AssertHeld(); return num_running_flushes_; } // Returns the number of currently running compactions. // REQUIREMENT: mutex_ must be held when calling this function. int num_running_compactions() { mutex_.AssertHeld(); return num_running_compactions_; } const WriteController& write_controller() { return write_controller_; } InternalIterator* NewInternalIterator( const ReadOptions&, ColumnFamilyData* cfd, SuperVersion* super_version, Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence, bool allow_unprepared_value); // hollow transactions shell used for recovery. // these will then be passed to TransactionDB so that // locks can be reacquired before writing can resume. struct RecoveredTransaction { std::string name_; bool unprepared_; struct BatchInfo { uint64_t log_number_; // TODO(lth): For unprepared, the memory usage here can be big for // unprepared transactions. This is only useful for rollbacks, and we // can in theory just keep keyset for that. WriteBatch* batch_; // Number of sub-batches. A new sub-batch is created if txn attempts to // insert a duplicate key,seq to memtable. This is currently used in // WritePreparedTxn/WriteUnpreparedTxn. size_t batch_cnt_; }; // This maps the seq of the first key in the batch to BatchInfo, which // contains WriteBatch and other information relevant to the batch. // // For WriteUnprepared, batches_ can have size greater than 1, but for // other write policies, it must be of size 1. std::map batches_; explicit RecoveredTransaction(const uint64_t log, const std::string& name, WriteBatch* batch, SequenceNumber seq, size_t batch_cnt, bool unprepared) : name_(name), unprepared_(unprepared) { batches_[seq] = {log, batch, batch_cnt}; } ~RecoveredTransaction() { for (auto& it : batches_) { delete it.second.batch_; } } void AddBatch(SequenceNumber seq, uint64_t log_number, WriteBatch* batch, size_t batch_cnt, bool unprepared) { assert(batches_.count(seq) == 0); batches_[seq] = {log_number, batch, batch_cnt}; // Prior state must be unprepared, since the prepare batch must be the // last batch. assert(unprepared_); unprepared_ = unprepared; } }; bool allow_2pc() const { return immutable_db_options_.allow_2pc; } std::unordered_map recovered_transactions() { return recovered_transactions_; } RecoveredTransaction* GetRecoveredTransaction(const std::string& name) { auto it = recovered_transactions_.find(name); if (it == recovered_transactions_.end()) { return nullptr; } else { return it->second; } } void InsertRecoveredTransaction(const uint64_t log, const std::string& name, WriteBatch* batch, SequenceNumber seq, size_t batch_cnt, bool unprepared_batch) { // For WriteUnpreparedTxn, InsertRecoveredTransaction is called multiple // times for every unprepared batch encountered during recovery. // // If the transaction is prepared, then the last call to // InsertRecoveredTransaction will have unprepared_batch = false. auto rtxn = recovered_transactions_.find(name); if (rtxn == recovered_transactions_.end()) { recovered_transactions_[name] = new RecoveredTransaction( log, name, batch, seq, batch_cnt, unprepared_batch); } else { rtxn->second->AddBatch(seq, log, batch, batch_cnt, unprepared_batch); } logs_with_prep_tracker_.MarkLogAsContainingPrepSection(log); } void DeleteRecoveredTransaction(const std::string& name) { auto it = recovered_transactions_.find(name); assert(it != recovered_transactions_.end()); auto* trx = it->second; recovered_transactions_.erase(it); for (const auto& info : trx->batches_) { logs_with_prep_tracker_.MarkLogAsHavingPrepSectionFlushed( info.second.log_number_); } delete trx; } void DeleteAllRecoveredTransactions() { for (auto it = recovered_transactions_.begin(); it != recovered_transactions_.end(); ++it) { delete it->second; } recovered_transactions_.clear(); } void AddToLogsToFreeQueue(log::Writer* log_writer) { logs_to_free_queue_.push_back(log_writer); } void AddSuperVersionsToFreeQueue(SuperVersion* sv) { superversions_to_free_queue_.push_back(sv); } void SetSnapshotChecker(SnapshotChecker* snapshot_checker); // Fill JobContext with snapshot information needed by flush and compaction. void GetSnapshotContext(JobContext* job_context, std::vector* snapshot_seqs, SequenceNumber* earliest_write_conflict_snapshot, SnapshotChecker** snapshot_checker); // Not thread-safe. void SetRecoverableStatePreReleaseCallback(PreReleaseCallback* callback); InstrumentedMutex* mutex() const { return &mutex_; } // Initialize a brand new DB. The DB directory is expected to be empty before // calling it. Status NewDB(); // This is to be used only by internal rocksdb classes. static Status Open(const DBOptions& db_options, const std::string& name, const std::vector& column_families, std::vector* handles, DB** dbptr, const bool seq_per_batch, const bool batch_per_txn); static IOStatus CreateAndNewDirectory( FileSystem* fs, const std::string& dirname, std::unique_ptr* directory); // find stats map from stats_history_ with smallest timestamp in // the range of [start_time, end_time) bool FindStatsByTime(uint64_t start_time, uint64_t end_time, uint64_t* new_time, std::map* stats_map); // Print information of all tombstones of all iterators to the std::string // This is only used by ldb. The output might be capped. Tombstones // printed out are not guaranteed to be in any order. Status TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family, int max_entries_to_print, std::string* out_str); #ifndef NDEBUG // Compact any files in the named level that overlap [*begin, *end] Status TEST_CompactRange(int level, const Slice* begin, const Slice* end, ColumnFamilyHandle* column_family = nullptr, bool disallow_trivial_move = false); void TEST_SwitchWAL(); bool TEST_UnableToReleaseOldestLog() { return unable_to_release_oldest_log_; } bool TEST_IsLogGettingFlushed() { return alive_log_files_.begin()->getting_flushed; } Status TEST_SwitchMemtable(ColumnFamilyData* cfd = nullptr); // Force current memtable contents to be flushed. Status TEST_FlushMemTable(bool wait = true, bool allow_write_stall = false, ColumnFamilyHandle* cfh = nullptr); Status TEST_FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& flush_opts); // Flush (multiple) ColumnFamilyData without using ColumnFamilyHandle. This // is because in certain cases, we can flush column families, wait for the // flush to complete, but delete the column family handle before the wait // finishes. For example in CompactRange. Status TEST_AtomicFlushMemTables(const autovector& cfds, const FlushOptions& flush_opts); // Wait for memtable compaction Status TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family = nullptr); // Wait for any compaction // We add a bool parameter to wait for unscheduledCompactions_ == 0, but this // is only for the special test of CancelledCompactions Status TEST_WaitForCompact(bool waitUnscheduled = false); // Return the maximum overlapping data (in bytes) at next level for any // file at a level >= 1. int64_t TEST_MaxNextLevelOverlappingBytes( ColumnFamilyHandle* column_family = nullptr); // Return the current manifest file no. uint64_t TEST_Current_Manifest_FileNo(); // Returns the number that'll be assigned to the next file that's created. uint64_t TEST_Current_Next_FileNo(); // get total level0 file size. Only for testing. uint64_t TEST_GetLevel0TotalSize(); void TEST_GetFilesMetaData(ColumnFamilyHandle* column_family, std::vector>* metadata); void TEST_LockMutex(); void TEST_UnlockMutex(); // REQUIRES: mutex locked void* TEST_BeginWrite(); // REQUIRES: mutex locked // pass the pointer that you got from TEST_BeginWrite() void TEST_EndWrite(void* w); uint64_t TEST_MaxTotalInMemoryState() const { return max_total_in_memory_state_; } size_t TEST_LogsToFreeSize(); uint64_t TEST_LogfileNumber(); uint64_t TEST_total_log_size() const { return total_log_size_; } // Returns column family name to ImmutableCFOptions map. Status TEST_GetAllImmutableCFOptions( std::unordered_map* iopts_map); // Return the lastest MutableCFOptions of a column family Status TEST_GetLatestMutableCFOptions(ColumnFamilyHandle* column_family, MutableCFOptions* mutable_cf_options); Cache* TEST_table_cache() { return table_cache_.get(); } WriteController& TEST_write_controler() { return write_controller_; } uint64_t TEST_FindMinLogContainingOutstandingPrep(); uint64_t TEST_FindMinPrepLogReferencedByMemTable(); size_t TEST_PreparedSectionCompletedSize(); size_t TEST_LogsWithPrepSize(); int TEST_BGCompactionsAllowed() const; int TEST_BGFlushesAllowed() const; size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const; void TEST_WaitForDumpStatsRun(std::function callback) const; void TEST_WaitForPersistStatsRun(std::function callback) const; bool TEST_IsPersistentStatsEnabled() const; size_t TEST_EstimateInMemoryStatsHistorySize() const; VersionSet* TEST_GetVersionSet() const { return versions_.get(); } const std::unordered_set& TEST_GetFilesGrabbedForPurge() const { return files_grabbed_for_purge_; } #endif // NDEBUG protected: const std::string dbname_; std::string db_id_; std::unique_ptr versions_; // Flag to check whether we allocated and own the info log file bool own_info_log_; const DBOptions initial_db_options_; Env* const env_; std::shared_ptr fs_; const ImmutableDBOptions immutable_db_options_; MutableDBOptions mutable_db_options_; Statistics* stats_; std::unordered_map recovered_transactions_; std::unique_ptr tracer_; InstrumentedMutex trace_mutex_; BlockCacheTracer block_cache_tracer_; // State below is protected by mutex_ // With two_write_queues enabled, some of the variables that accessed during // WriteToWAL need different synchronization: log_empty_, alive_log_files_, // logs_, logfile_number_. Refer to the definition of each variable below for // more description. mutable InstrumentedMutex mutex_; ColumnFamilyHandleImpl* default_cf_handle_; InternalStats* default_cf_internal_stats_; // only used for dynamically adjusting max_total_wal_size. it is a sum of // [write_buffer_size * max_write_buffer_number] over all column families uint64_t max_total_in_memory_state_; // If true, we have only one (default) column family. We use this to optimize // some code-paths bool single_column_family_mode_; // The options to access storage files const FileOptions file_options_; // Additonal options for compaction and flush FileOptions file_options_for_compaction_; std::unique_ptr column_family_memtables_; // Increase the sequence number after writing each batch, whether memtable is // disabled for that or not. Otherwise the sequence number is increased after // writing each key into memtable. This implies that when disable_memtable is // set, the seq is not increased at all. // // Default: false const bool seq_per_batch_; // This determines during recovery whether we expect one writebatch per // recovered transaction, or potentially multiple writebatches per // transaction. For WriteUnprepared, this is set to false, since multiple // batches can exist per transaction. // // Default: true const bool batch_per_txn_; // Except in DB::Open(), WriteOptionsFile can only be called when: // Persist options to options file. // If need_mutex_lock = false, the method will lock DB mutex. // If need_enter_write_thread = false, the method will enter write thread. Status WriteOptionsFile(bool need_mutex_lock, bool need_enter_write_thread); // The following two functions can only be called when: // 1. WriteThread::Writer::EnterUnbatched() is used. // 2. db_mutex is NOT held Status RenameTempFileToOptionsFile(const std::string& file_name); Status DeleteObsoleteOptionsFiles(); void NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta, const MutableCFOptions& mutable_cf_options, int job_id); void NotifyOnFlushCompleted( ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, std::list>* flush_jobs_info); void NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& job_stats, int job_id); void NotifyOnCompactionCompleted(ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& job_stats, int job_id); void NotifyOnMemTableSealed(ColumnFamilyData* cfd, const MemTableInfo& mem_table_info); #ifndef ROCKSDB_LITE void NotifyOnExternalFileIngested( ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job); #endif // !ROCKSDB_LITE void NewThreadStatusCfInfo(ColumnFamilyData* cfd) const; void EraseThreadStatusCfInfo(ColumnFamilyData* cfd) const; void EraseThreadStatusDbInfo() const; // If disable_memtable is set the application logic must guarantee that the // batch will still be skipped from memtable during the recovery. An excption // to this is seq_per_batch_ mode, in which since each batch already takes one // seq, it is ok for the batch to write to memtable during recovery as long as // it only takes one sequence number: i.e., no duplicate keys. // In WriteCommitted it is guarnateed since disable_memtable is used for // prepare batch which will be written to memtable later during the commit, // and in WritePrepared it is guaranteed since it will be used only for WAL // markers which will never be written to memtable. If the commit marker is // accompanied with CommitTimeWriteBatch that is not written to memtable as // long as it has no duplicate keys, it does not violate the one-seq-per-batch // policy. // batch_cnt is expected to be non-zero in seq_per_batch mode and // indicates the number of sub-patches. A sub-patch is a subset of the write // batch that does not have duplicate keys. Status WriteImpl(const WriteOptions& options, WriteBatch* updates, WriteCallback* callback = nullptr, uint64_t* log_used = nullptr, uint64_t log_ref = 0, bool disable_memtable = false, uint64_t* seq_used = nullptr, size_t batch_cnt = 0, PreReleaseCallback* pre_release_callback = nullptr); Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates, WriteCallback* callback = nullptr, uint64_t* log_used = nullptr, uint64_t log_ref = 0, bool disable_memtable = false, uint64_t* seq_used = nullptr); // Write only to memtables without joining any write queue Status UnorderedWriteMemtable(const WriteOptions& write_options, WriteBatch* my_batch, WriteCallback* callback, uint64_t log_ref, SequenceNumber seq, const size_t sub_batch_cnt); // Whether the batch requires to be assigned with an order enum AssignOrder : bool { kDontAssignOrder, kDoAssignOrder }; // Whether it requires publishing last sequence or not enum PublishLastSeq : bool { kDontPublishLastSeq, kDoPublishLastSeq }; // Join the write_thread to write the batch only to the WAL. It is the // responsibility of the caller to also write the write batch to the memtable // if it required. // // sub_batch_cnt is expected to be non-zero when assign_order = kDoAssignOrder // indicating the number of sub-batches in my_batch. A sub-patch is a subset // of the write batch that does not have duplicate keys. When seq_per_batch is // not set, each key is a separate sub_batch. Otherwise each duplicate key // marks start of a new sub-batch. Status WriteImplWALOnly( WriteThread* write_thread, const WriteOptions& options, WriteBatch* updates, WriteCallback* callback, uint64_t* log_used, const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt, PreReleaseCallback* pre_release_callback, const AssignOrder assign_order, const PublishLastSeq publish_last_seq, const bool disable_memtable); // write cached_recoverable_state_ to memtable if it is not empty // The writer must be the leader in write_thread_ and holding mutex_ Status WriteRecoverableState(); // Actual implementation of Close() Status CloseImpl(); // Recover the descriptor from persistent storage. May do a significant // amount of work to recover recently logged updates. Any changes to // be made to the descriptor are added to *edit. // recovered_seq is set to less than kMaxSequenceNumber if the log's tail is // skipped. virtual Status Recover( const std::vector& column_families, bool read_only = false, bool error_if_log_file_exist = false, bool error_if_data_exists_in_logs = false, uint64_t* recovered_seq = nullptr); virtual bool OwnTablesAndLogs() const { return true; } // REQUIRES: db mutex held when calling this function, but the db mutex can // be released and re-acquired. Db mutex will be held when the function // returns. // After best-efforts recovery, there may be SST files in db/cf paths that are // not referenced in the MANIFEST. We delete these SST files. In the // meantime, we find out the largest file number present in the paths, and // bump up the version set's next_file_number_ to be 1 + largest_file_number. Status FinishBestEffortsRecovery(); private: friend class DB; friend class ErrorHandler; friend class InternalStats; friend class PessimisticTransaction; friend class TransactionBaseImpl; friend class WriteCommittedTxn; friend class WritePreparedTxn; friend class WritePreparedTxnDB; friend class WriteBatchWithIndex; friend class WriteUnpreparedTxnDB; friend class WriteUnpreparedTxn; #ifndef ROCKSDB_LITE friend class ForwardIterator; #endif friend struct SuperVersion; friend class CompactedDBImpl; friend class DBTest_ConcurrentFlushWAL_Test; friend class DBTest_MixedSlowdownOptionsStop_Test; friend class DBCompactionTest_CompactBottomLevelFilesWithDeletions_Test; friend class DBCompactionTest_CompactionDuringShutdown_Test; friend class StatsHistoryTest_PersistentStatsCreateColumnFamilies_Test; #ifndef NDEBUG friend class DBTest2_ReadCallbackTest_Test; friend class WriteCallbackTest_WriteWithCallbackTest_Test; friend class XFTransactionWriteHandler; friend class DBBlobIndexTest; friend class WriteUnpreparedTransactionTest_RecoveryTest_Test; #endif struct CompactionState; struct PrepickedCompaction; struct PurgeFileInfo; struct WriteContext { SuperVersionContext superversion_context; autovector memtables_to_free_; explicit WriteContext(bool create_superversion = false) : superversion_context(create_superversion) {} ~WriteContext() { superversion_context.Clean(); for (auto& m : memtables_to_free_) { delete m; } } }; struct LogFileNumberSize { explicit LogFileNumberSize(uint64_t _number) : number(_number) {} void AddSize(uint64_t new_size) { size += new_size; } uint64_t number; uint64_t size = 0; bool getting_flushed = false; }; struct LogWriterNumber { // pass ownership of _writer LogWriterNumber(uint64_t _number, log::Writer* _writer) : number(_number), writer(_writer) {} log::Writer* ReleaseWriter() { auto* w = writer; writer = nullptr; return w; } Status ClearWriter() { Status s = writer->WriteBuffer(); delete writer; writer = nullptr; return s; } uint64_t number; // Visual Studio doesn't support deque's member to be noncopyable because // of a std::unique_ptr as a member. log::Writer* writer; // own // true for some prefix of logs_ bool getting_synced = false; }; // PurgeFileInfo is a structure to hold information of files to be deleted in // purge_files_ struct PurgeFileInfo { std::string fname; std::string dir_to_sync; FileType type; uint64_t number; int job_id; PurgeFileInfo(std::string fn, std::string d, FileType t, uint64_t num, int jid) : fname(fn), dir_to_sync(d), type(t), number(num), job_id(jid) {} }; // Argument required by background flush thread. struct BGFlushArg { BGFlushArg() : cfd_(nullptr), max_memtable_id_(0), superversion_context_(nullptr) {} BGFlushArg(ColumnFamilyData* cfd, uint64_t max_memtable_id, SuperVersionContext* superversion_context) : cfd_(cfd), max_memtable_id_(max_memtable_id), superversion_context_(superversion_context) {} // Column family to flush. ColumnFamilyData* cfd_; // Maximum ID of memtable to flush. In this column family, memtables with // IDs smaller than this value must be flushed before this flush completes. uint64_t max_memtable_id_; // Pointer to a SuperVersionContext object. After flush completes, RocksDB // installs a new superversion for the column family. This operation // requires a SuperVersionContext object (currently embedded in JobContext). SuperVersionContext* superversion_context_; }; // Argument passed to flush thread. struct FlushThreadArg { DBImpl* db_; Env::Priority thread_pri_; }; // Information for a manual compaction struct ManualCompactionState { ColumnFamilyData* cfd; int input_level; int output_level; uint32_t output_path_id; Status status; bool done; bool in_progress; // compaction request being processed? bool incomplete; // only part of requested range compacted bool exclusive; // current behavior of only one manual bool disallow_trivial_move; // Force actual compaction to run const InternalKey* begin; // nullptr means beginning of key range const InternalKey* end; // nullptr means end of key range InternalKey* manual_end; // how far we are compacting InternalKey tmp_storage; // Used to keep track of compaction progress InternalKey tmp_storage1; // Used to keep track of compaction progress }; struct PrepickedCompaction { // background compaction takes ownership of `compaction`. Compaction* compaction; // caller retains ownership of `manual_compaction_state` as it is reused // across background compactions. ManualCompactionState* manual_compaction_state; // nullptr if non-manual // task limiter token is requested during compaction picking. std::unique_ptr task_token; }; struct CompactionArg { // caller retains ownership of `db`. DBImpl* db; // background compaction takes ownership of `prepicked_compaction`. PrepickedCompaction* prepicked_compaction; }; // Initialize the built-in column family for persistent stats. Depending on // whether on-disk persistent stats have been enabled before, it may either // create a new column family and column family handle or just a column family // handle. // Required: DB mutex held Status InitPersistStatsColumnFamily(); // Persistent Stats column family has two format version key which are used // for compatibility check. Write format version if it's created for the // first time, read format version and check compatibility if recovering // from disk. This function requires DB mutex held at entrance but may // release and re-acquire DB mutex in the process. // Required: DB mutex held Status PersistentStatsProcessFormatVersion(); Status ResumeImpl(); void MaybeIgnoreError(Status* s) const; const Status CreateArchivalDirectory(); Status CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, const std::string& cf_name, ColumnFamilyHandle** handle); Status DropColumnFamilyImpl(ColumnFamilyHandle* column_family); // Delete any unneeded files and stale in-memory entries. void DeleteObsoleteFiles(); // Delete obsolete files and log status and information of file deletion void DeleteObsoleteFileImpl(int job_id, const std::string& fname, const std::string& path_to_sync, FileType type, uint64_t number); // Background process needs to call // auto x = CaptureCurrentFileNumberInPendingOutputs() // auto file_num = versions_->NewFileNumber(); // // ReleaseFileNumberFromPendingOutputs(x) // This will protect any file with number `file_num` or greater from being // deleted while is running. // ----------- // This function will capture current file number and append it to // pending_outputs_. This will prevent any background process to delete any // file created after this point. std::list::iterator CaptureCurrentFileNumberInPendingOutputs(); // This function should be called with the result of // CaptureCurrentFileNumberInPendingOutputs(). It then marks that any file // created between the calls CaptureCurrentFileNumberInPendingOutputs() and // ReleaseFileNumberFromPendingOutputs() can now be deleted (if it's not live // and blocked by any other pending_outputs_ calls) void ReleaseFileNumberFromPendingOutputs( std::unique_ptr::iterator>& v); IOStatus SyncClosedLogs(JobContext* job_context); // Flush the in-memory write buffer to storage. Switches to a new // log-file/memtable and writes a new descriptor iff successful. Then // installs a new super version for the column family. Status FlushMemTableToOutputFile( ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, bool* madeProgress, JobContext* job_context, SuperVersionContext* superversion_context, std::vector& snapshot_seqs, SequenceNumber earliest_write_conflict_snapshot, SnapshotChecker* snapshot_checker, LogBuffer* log_buffer, Env::Priority thread_pri); // Flush the memtables of (multiple) column families to multiple files on // persistent storage. Status FlushMemTablesToOutputFiles( const autovector& bg_flush_args, bool* made_progress, JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri); Status AtomicFlushMemTablesToOutputFiles( const autovector& bg_flush_args, bool* made_progress, JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri); // REQUIRES: log_numbers are sorted in ascending order // corrupted_log_found is set to true if we recover from a corrupted log file. Status RecoverLogFiles(const std::vector& log_numbers, SequenceNumber* next_sequence, bool read_only, bool* corrupted_log_found); // The following two methods are used to flush a memtable to // storage. The first one is used at database RecoveryTime (when the // database is opened) and is heavyweight because it holds the mutex // for the entire period. The second method WriteLevel0Table supports // concurrent flush memtables to storage. Status WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, MemTable* mem, VersionEdit* edit); // Restore alive_log_files_ and total_log_size_ after recovery. // It needs to run only when there's no flush during recovery // (e.g. avoid_flush_during_recovery=true). May also trigger flush // in case total_log_size > max_total_wal_size. Status RestoreAliveLogFiles(const std::vector& log_numbers); // num_bytes: for slowdown case, delay time is calculated based on // `num_bytes` going through. Status DelayWrite(uint64_t num_bytes, const WriteOptions& write_options); Status ThrottleLowPriWritesIfNeeded(const WriteOptions& write_options, WriteBatch* my_batch); // REQUIRES: mutex locked and in write thread. Status ScheduleFlushes(WriteContext* context); void MaybeFlushStatsCF(autovector* cfds); Status TrimMemtableHistory(WriteContext* context); Status SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context); void SelectColumnFamiliesForAtomicFlush(autovector* cfds); // Force current memtable contents to be flushed. Status FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& options, FlushReason flush_reason, bool writes_stopped = false); Status AtomicFlushMemTables( const autovector& column_family_datas, const FlushOptions& options, FlushReason flush_reason, bool writes_stopped = false); // Wait until flushing this column family won't stall writes Status WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd, bool* flush_needed); // Wait for memtable flushed. // If flush_memtable_id is non-null, wait until the memtable with the ID // gets flush. Otherwise, wait until the column family don't have any // memtable pending flush. // resuming_from_bg_err indicates whether the caller is attempting to resume // from background error. Status WaitForFlushMemTable(ColumnFamilyData* cfd, const uint64_t* flush_memtable_id = nullptr, bool resuming_from_bg_err = false) { return WaitForFlushMemTables({cfd}, {flush_memtable_id}, resuming_from_bg_err); } // Wait for memtables to be flushed for multiple column families. Status WaitForFlushMemTables( const autovector& cfds, const autovector& flush_memtable_ids, bool resuming_from_bg_err); inline void WaitForPendingWrites() { mutex_.AssertHeld(); TEST_SYNC_POINT("DBImpl::WaitForPendingWrites:BeforeBlock"); // In case of pipelined write is enabled, wait for all pending memtable // writers. if (immutable_db_options_.enable_pipelined_write) { // Memtable writers may call DB::Get in case max_successive_merges > 0, // which may lock mutex. Unlocking mutex here to avoid deadlock. mutex_.Unlock(); write_thread_.WaitForMemTableWriters(); mutex_.Lock(); } if (!immutable_db_options_.unordered_write) { // Then the writes are finished before the next write group starts return; } // Wait for the ones who already wrote to the WAL to finish their // memtable write. if (pending_memtable_writes_.load() != 0) { std::unique_lock guard(switch_mutex_); switch_cv_.wait(guard, [&] { return pending_memtable_writes_.load() == 0; }); } } // REQUIRES: mutex locked and in write thread. void AssignAtomicFlushSeq(const autovector& cfds); // REQUIRES: mutex locked and in write thread. Status SwitchWAL(WriteContext* write_context); // REQUIRES: mutex locked and in write thread. Status HandleWriteBufferFull(WriteContext* write_context); // REQUIRES: mutex locked Status PreprocessWrite(const WriteOptions& write_options, bool* need_log_sync, WriteContext* write_context); WriteBatch* MergeBatch(const WriteThread::WriteGroup& write_group, WriteBatch* tmp_batch, size_t* write_with_wal, WriteBatch** to_be_cached_state); IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer, uint64_t* log_used, uint64_t* log_size); IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group, log::Writer* log_writer, uint64_t* log_used, bool need_log_sync, bool need_log_dir_sync, SequenceNumber sequence); IOStatus ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group, uint64_t* log_used, SequenceNumber* last_sequence, size_t seq_inc); // Used by WriteImpl to update bg_error_ if paranoid check is enabled. void WriteStatusCheck(const Status& status); // Used by WriteImpl to update bg_error_ when IO error happens, e.g., write // WAL, sync WAL fails, if paranoid check is enabled. void IOStatusCheck(const IOStatus& status); // Used by WriteImpl to update bg_error_ in case of memtable insert error. void MemTableInsertStatusCheck(const Status& memtable_insert_status); #ifndef ROCKSDB_LITE Status CompactFilesImpl(const CompactionOptions& compact_options, ColumnFamilyData* cfd, Version* version, const std::vector& input_file_names, std::vector* const output_file_names, const int output_level, int output_path_id, JobContext* job_context, LogBuffer* log_buffer, CompactionJobInfo* compaction_job_info); // Wait for current IngestExternalFile() calls to finish. // REQUIRES: mutex_ held void WaitForIngestFile(); #else // IngestExternalFile is not supported in ROCKSDB_LITE so this function // will be no-op void WaitForIngestFile() {} #endif // ROCKSDB_LITE ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name); void MaybeScheduleFlushOrCompaction(); // A flush request specifies the column families to flush as well as the // largest memtable id to persist for each column family. Once all the // memtables whose IDs are smaller than or equal to this per-column-family // specified value, this flush request is considered to have completed its // work of flushing this column family. After completing the work for all // column families in this request, this flush is considered complete. typedef std::vector> FlushRequest; void GenerateFlushRequest(const autovector& cfds, FlushRequest* req); void SchedulePendingFlush(const FlushRequest& req, FlushReason flush_reason); void SchedulePendingCompaction(ColumnFamilyData* cfd); void SchedulePendingPurge(std::string fname, std::string dir_to_sync, FileType type, uint64_t number, int job_id); static void BGWorkCompaction(void* arg); // Runs a pre-chosen universal compaction involving bottom level in a // separate, bottom-pri thread pool. static void BGWorkBottomCompaction(void* arg); static void BGWorkFlush(void* arg); static void BGWorkPurge(void* arg); static void UnscheduleCompactionCallback(void* arg); static void UnscheduleFlushCallback(void* arg); void BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction, Env::Priority thread_pri); void BackgroundCallFlush(Env::Priority thread_pri); void BackgroundCallPurge(); Status BackgroundCompaction(bool* madeProgress, JobContext* job_context, LogBuffer* log_buffer, PrepickedCompaction* prepicked_compaction, Env::Priority thread_pri); Status BackgroundFlush(bool* madeProgress, JobContext* job_context, LogBuffer* log_buffer, FlushReason* reason, Env::Priority thread_pri); bool EnoughRoomForCompaction(ColumnFamilyData* cfd, const std::vector& inputs, bool* sfm_bookkeeping, LogBuffer* log_buffer); // Request compaction tasks token from compaction thread limiter. // It always succeeds if force = true or limiter is disable. bool RequestCompactionToken(ColumnFamilyData* cfd, bool force, std::unique_ptr* token, LogBuffer* log_buffer); // Schedule background tasks void StartTimedTasks(); void PrintStatistics(); size_t EstimateInMemoryStatsHistorySize() const; // persist stats to column family "_persistent_stats" void PersistStats(); // dump rocksdb.stats to LOG void DumpStats(); // Return the minimum empty level that could hold the total data in the // input level. Return the input level, if such level could not be found. int FindMinimumEmptyLevelFitting(ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, int level); // Move the files in the input level to the target level. // If target_level < 0, automatically calculate the minimum level that could // hold the data set. Status ReFitLevel(ColumnFamilyData* cfd, int level, int target_level = -1); // helper functions for adding and removing from flush & compaction queues void AddToCompactionQueue(ColumnFamilyData* cfd); ColumnFamilyData* PopFirstFromCompactionQueue(); FlushRequest PopFirstFromFlushQueue(); // Pick the first unthrottled compaction with task token from queue. ColumnFamilyData* PickCompactionFromQueue( std::unique_ptr* token, LogBuffer* log_buffer); // helper function to call after some of the logs_ were synced void MarkLogsSynced(uint64_t up_to, bool synced_dir, const Status& status); SnapshotImpl* GetSnapshotImpl(bool is_write_conflict_boundary, bool lock = true); uint64_t GetMaxTotalWalSize() const; FSDirectory* GetDataDir(ColumnFamilyData* cfd, size_t path_id) const; Status CloseHelper(); void WaitForBackgroundWork(); // Background threads call this function, which is just a wrapper around // the InstallSuperVersion() function. Background threads carry // sv_context which can have new_superversion already // allocated. // All ColumnFamily state changes go through this function. Here we analyze // the new state and we schedule background work if we detect that the new // state needs flush or compaction. void InstallSuperVersionAndScheduleWork( ColumnFamilyData* cfd, SuperVersionContext* sv_context, const MutableCFOptions& mutable_cf_options); bool GetIntPropertyInternal(ColumnFamilyData* cfd, const DBPropertyInfo& property_info, bool is_locked, uint64_t* value); bool GetPropertyHandleOptionsStatistics(std::string* value); bool HasPendingManualCompaction(); bool HasExclusiveManualCompaction(); void AddManualCompaction(ManualCompactionState* m); void RemoveManualCompaction(ManualCompactionState* m); bool ShouldntRunManualCompaction(ManualCompactionState* m); bool HaveManualCompaction(ColumnFamilyData* cfd); bool MCOverlap(ManualCompactionState* m, ManualCompactionState* m1); #ifndef ROCKSDB_LITE void BuildCompactionJobInfo(const ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& compaction_job_stats, const int job_id, const Version* current, CompactionJobInfo* compaction_job_info) const; // Reserve the next 'num' file numbers for to-be-ingested external SST files, // and return the current file_number in 'next_file_number'. // Write a version edit to the MANIFEST. Status ReserveFileNumbersBeforeIngestion( ColumnFamilyData* cfd, uint64_t num, std::unique_ptr::iterator>& pending_output_elem, uint64_t* next_file_number); #endif //! ROCKSDB_LITE bool ShouldPurge(uint64_t file_number) const; void MarkAsGrabbedForPurge(uint64_t file_number); size_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const; Env::WriteLifeTimeHint CalculateWALWriteHint() { return Env::WLTH_SHORT; } Status CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number, size_t preallocate_block_size, log::Writer** new_log); // Validate self-consistency of DB options static Status ValidateOptions(const DBOptions& db_options); // Validate self-consistency of DB options and its consistency with cf options static Status ValidateOptions( const DBOptions& db_options, const std::vector& column_families); // Utility function to do some debug validation and sort the given vector // of MultiGet keys void PrepareMultiGetKeys( const size_t num_keys, bool sorted, autovector* key_ptrs); // A structure to hold the information required to process MultiGet of keys // belonging to one column family. For a multi column family MultiGet, there // will be a container of these objects. struct MultiGetColumnFamilyData { ColumnFamilyHandle* cf; ColumnFamilyData* cfd; // For the batched MultiGet which relies on sorted keys, start specifies // the index of first key belonging to this column family in the sorted // list. size_t start; // For the batched MultiGet case, num_keys specifies the number of keys // belonging to this column family in the sorted list size_t num_keys; // SuperVersion for the column family obtained in a manner that ensures a // consistent view across all column families in the DB SuperVersion* super_version; MultiGetColumnFamilyData(ColumnFamilyHandle* column_family, SuperVersion* sv) : cf(column_family), cfd(static_cast(cf)->cfd()), start(0), num_keys(0), super_version(sv) {} MultiGetColumnFamilyData(ColumnFamilyHandle* column_family, size_t first, size_t count, SuperVersion* sv) : cf(column_family), cfd(static_cast(cf)->cfd()), start(first), num_keys(count), super_version(sv) {} MultiGetColumnFamilyData() = default; }; // A common function to obtain a consistent snapshot, which can be implicit // if the user doesn't specify a snapshot in read_options, across // multiple column families for MultiGet. It will attempt to get an implicit // snapshot without acquiring the db_mutes, but will give up after a few // tries and acquire the mutex if a memtable flush happens. The template // allows both the batched and non-batched MultiGet to call this with // either an std::unordered_map or autovector of column families. // // If callback is non-null, the callback is refreshed with the snapshot // sequence number // // A return value of true indicates that the SuperVersions were obtained // from the ColumnFamilyData, whereas false indicates they are thread // local template bool MultiCFSnapshot( const ReadOptions& read_options, ReadCallback* callback, std::function& iter_deref_func, T* cf_list, SequenceNumber* snapshot); // The actual implementation of the batching MultiGet. The caller is expected // to have acquired the SuperVersion and pass in a snapshot sequence number // in order to construct the LookupKeys. The start_key and num_keys specify // the range of keys in the sorted_keys vector for a single column family. Status MultiGetImpl( const ReadOptions& read_options, size_t start_key, size_t num_keys, autovector* sorted_keys, SuperVersion* sv, SequenceNumber snap_seqnum, ReadCallback* callback, bool* is_blob_index); Status DisableFileDeletionsWithLock(); // table_cache_ provides its own synchronization std::shared_ptr table_cache_; // Lock over the persistent DB state. Non-nullptr iff successfully acquired. FileLock* db_lock_; // In addition to mutex_, log_write_mutex_ protected writes to stats_history_ InstrumentedMutex stats_history_mutex_; // In addition to mutex_, log_write_mutex_ protected writes to logs_ and // logfile_number_. With two_write_queues it also protects alive_log_files_, // and log_empty_. Refer to the definition of each variable below for more // details. // Note: to avoid dealock, if needed to acquire both log_write_mutex_ and // mutex_, the order should be first mutex_ and then log_write_mutex_. InstrumentedMutex log_write_mutex_; std::atomic shutting_down_; std::atomic manual_compaction_paused_; // This condition variable is signaled on these conditions: // * whenever bg_compaction_scheduled_ goes down to 0 // * if AnyManualCompaction, whenever a compaction finishes, even if it hasn't // made any progress // * whenever a compaction made any progress // * whenever bg_flush_scheduled_ or bg_purge_scheduled_ value decreases // (i.e. whenever a flush is done, even if it didn't make any progress) // * whenever there is an error in background purge, flush or compaction // * whenever num_running_ingest_file_ goes to 0. // * whenever pending_purge_obsolete_files_ goes to 0. // * whenever disable_delete_obsolete_files_ goes to 0. // * whenever SetOptions successfully updates options. // * whenever a column family is dropped. InstrumentedCondVar bg_cv_; // Writes are protected by locking both mutex_ and log_write_mutex_, and reads // must be under either mutex_ or log_write_mutex_. Since after ::Open, // logfile_number_ is currently updated only in write_thread_, it can be read // from the same write_thread_ without any locks. uint64_t logfile_number_; std::deque log_recycle_files_; // a list of log files that we can recycle bool log_dir_synced_; // Without two_write_queues, read and writes to log_empty_ are protected by // mutex_. Since it is currently updated/read only in write_thread_, it can be // accessed from the same write_thread_ without any locks. With // two_write_queues writes, where it can be updated in different threads, // read and writes are protected by log_write_mutex_ instead. This is to avoid // expesnive mutex_ lock during WAL write, which update log_empty_. bool log_empty_; ColumnFamilyHandleImpl* persist_stats_cf_handle_; bool persistent_stats_cfd_exists_ = true; // Without two_write_queues, read and writes to alive_log_files_ are // protected by mutex_. However since back() is never popped, and push_back() // is done only from write_thread_, the same thread can access the item // reffered by back() without mutex_. With two_write_queues_, writes // are protected by locking both mutex_ and log_write_mutex_, and reads must // be under either mutex_ or log_write_mutex_. std::deque alive_log_files_; // Log files that aren't fully synced, and the current log file. // Synchronization: // - push_back() is done from write_thread_ with locked mutex_ and // log_write_mutex_ // - pop_front() is done from any thread with locked mutex_ and // log_write_mutex_ // - reads are done with either locked mutex_ or log_write_mutex_ // - back() and items with getting_synced=true are not popped, // - The same thread that sets getting_synced=true will reset it. // - it follows that the object referred by back() can be safely read from // the write_thread_ without using mutex // - it follows that the items with getting_synced=true can be safely read // from the same thread that has set getting_synced=true std::deque logs_; // Signaled when getting_synced becomes false for some of the logs_. InstrumentedCondVar log_sync_cv_; // This is the app-level state that is written to the WAL but will be used // only during recovery. Using this feature enables not writing the state to // memtable on normal writes and hence improving the throughput. Each new // write of the state will replace the previous state entirely even if the // keys in the two consecuitive states do not overlap. // It is protected by log_write_mutex_ when two_write_queues_ is enabled. // Otherwise only the heaad of write_thread_ can access it. WriteBatch cached_recoverable_state_; std::atomic cached_recoverable_state_empty_ = {true}; std::atomic total_log_size_; // If this is non-empty, we need to delete these log files in background // threads. Protected by db mutex. autovector logs_to_free_; bool is_snapshot_supported_; std::map> stats_history_; std::map stats_slice_; bool stats_slice_initialized_ = false; Directories directories_; WriteBufferManager* write_buffer_manager_; WriteThread write_thread_; WriteBatch tmp_batch_; // The write thread when the writers have no memtable write. This will be used // in 2PC to batch the prepares separately from the serial commit. WriteThread nonmem_write_thread_; WriteController write_controller_; // Size of the last batch group. In slowdown mode, next write needs to // sleep if it uses up the quota. // Note: This is to protect memtable and compaction. If the batch only writes // to the WAL its size need not to be included in this. uint64_t last_batch_group_size_; FlushScheduler flush_scheduler_; TrimHistoryScheduler trim_history_scheduler_; SnapshotList snapshots_; // For each background job, pending_outputs_ keeps the current file number at // the time that background job started. // FindObsoleteFiles()/PurgeObsoleteFiles() never deletes any file that has // number bigger than any of the file number in pending_outputs_. Since file // numbers grow monotonically, this also means that pending_outputs_ is always // sorted. After a background job is done executing, its file number is // deleted from pending_outputs_, which allows PurgeObsoleteFiles() to clean // it up. // State is protected with db mutex. std::list pending_outputs_; // flush_queue_ and compaction_queue_ hold column families that we need to // flush and compact, respectively. // A column family is inserted into flush_queue_ when it satisfies condition // cfd->imm()->IsFlushPending() // A column family is inserted into compaction_queue_ when it satisfied // condition cfd->NeedsCompaction() // Column families in this list are all Ref()-erenced // TODO(icanadi) Provide some kind of ReferencedColumnFamily class that will // do RAII on ColumnFamilyData // Column families are in this queue when they need to be flushed or // compacted. Consumers of these queues are flush and compaction threads. When // column family is put on this queue, we increase unscheduled_flushes_ and // unscheduled_compactions_. When these variables are bigger than zero, that // means we need to schedule background threads for flush and compaction. // Once the background threads are scheduled, we decrease unscheduled_flushes_ // and unscheduled_compactions_. That way we keep track of number of // compaction and flush threads we need to schedule. This scheduling is done // in MaybeScheduleFlushOrCompaction() // invariant(column family present in flush_queue_ <==> // ColumnFamilyData::pending_flush_ == true) std::deque flush_queue_; // invariant(column family present in compaction_queue_ <==> // ColumnFamilyData::pending_compaction_ == true) std::deque compaction_queue_; // A map to store file numbers and filenames of the files to be purged std::unordered_map purge_files_; // A vector to store the file numbers that have been assigned to certain // JobContext. Current implementation tracks table and blob files only. std::unordered_set files_grabbed_for_purge_; // A queue to store log writers to close std::deque logs_to_free_queue_; std::deque superversions_to_free_queue_; int unscheduled_flushes_; int unscheduled_compactions_; // count how many background compactions are running or have been scheduled in // the BOTTOM pool int bg_bottom_compaction_scheduled_; // count how many background compactions are running or have been scheduled int bg_compaction_scheduled_; // stores the number of compactions are currently running int num_running_compactions_; // number of background memtable flush jobs, submitted to the HIGH pool int bg_flush_scheduled_; // stores the number of flushes are currently running int num_running_flushes_; // number of background obsolete file purge jobs, submitted to the HIGH pool int bg_purge_scheduled_; std::deque manual_compaction_dequeue_; // shall we disable deletion of obsolete files // if 0 the deletion is enabled. // if non-zero, files will not be getting deleted // This enables two different threads to call // EnableFileDeletions() and DisableFileDeletions() // without any synchronization int disable_delete_obsolete_files_; // Number of times FindObsoleteFiles has found deletable files and the // corresponding call to PurgeObsoleteFiles has not yet finished. int pending_purge_obsolete_files_; // last time when DeleteObsoleteFiles with full scan was executed. Originally // initialized with startup time. uint64_t delete_obsolete_files_last_run_; // last time stats were dumped to LOG std::atomic last_stats_dump_time_microsec_; // The thread that wants to switch memtable, can wait on this cv until the // pending writes to memtable finishes. std::condition_variable switch_cv_; // The mutex used by switch_cv_. mutex_ should be acquired beforehand. std::mutex switch_mutex_; // Number of threads intending to write to memtable std::atomic pending_memtable_writes_ = {}; // Each flush or compaction gets its own job id. this counter makes sure // they're unique std::atomic next_job_id_; // A flag indicating whether the current rocksdb database has any // data that is not yet persisted into either WAL or SST file. // Used when disableWAL is true. std::atomic has_unpersisted_data_; // if an attempt was made to flush all column families that // the oldest log depends on but uncommitted data in the oldest // log prevents the log from being released. // We must attempt to free the dependent memtables again // at a later time after the transaction in the oldest // log is fully commited. bool unable_to_release_oldest_log_; static const int KEEP_LOG_FILE_NUM = 1000; // MSVC version 1800 still does not have constexpr for ::max() static const uint64_t kNoTimeOut = port::kMaxUint64; std::string db_absolute_path_; // Number of running IngestExternalFile() or CreateColumnFamilyWithImport() // calls. // REQUIRES: mutex held int num_running_ingest_file_; #ifndef ROCKSDB_LITE WalManager wal_manager_; #endif // ROCKSDB_LITE // Unified interface for logging events EventLogger event_logger_; // A value of > 0 temporarily disables scheduling of background work int bg_work_paused_; // A value of > 0 temporarily disables scheduling of background compaction int bg_compaction_paused_; // Guard against multiple concurrent refitting bool refitting_level_; // Indicate DB was opened successfully bool opened_successfully_; // The min threshold to triggere bottommost compaction for removing // garbages, among all column families. SequenceNumber bottommost_files_mark_threshold_ = kMaxSequenceNumber; LogsWithPrepTracker logs_with_prep_tracker_; // Callback for compaction to check if a key is visible to a snapshot. // REQUIRES: mutex held std::unique_ptr snapshot_checker_; // Callback for when the cached_recoverable_state_ is written to memtable // Only to be set during initialization std::unique_ptr recoverable_state_pre_release_callback_; // handle for scheduling stats dumping at fixed intervals // REQUIRES: mutex locked std::unique_ptr thread_dump_stats_; // handle for scheduling stats snapshoting at fixed intervals // REQUIRES: mutex locked std::unique_ptr thread_persist_stats_; // When set, we use a separate queue for writes that don't write to memtable. // In 2PC these are the writes at Prepare phase. const bool two_write_queues_; const bool manual_wal_flush_; // LastSequence also indicates last published sequence visibile to the // readers. Otherwise LastPublishedSequence should be used. const bool last_seq_same_as_publish_seq_; // It indicates that a customized gc algorithm must be used for // flush/compaction and if it is not provided vis SnapshotChecker, we should // disable gc to be safe. const bool use_custom_gc_; // Flag to indicate that the DB instance shutdown has been initiated. This // different from shutting_down_ atomic in that it is set at the beginning // of shutdown sequence, specifically in order to prevent any background // error recovery from going on in parallel. The latter, shutting_down_, // is set a little later during the shutdown after scheduling memtable // flushes std::atomic shutdown_initiated_; // Flag to indicate whether sst_file_manager object was allocated in // DB::Open() or passed to us bool own_sfm_; // Clients must periodically call SetPreserveDeletesSequenceNumber() // to advance this seqnum. Default value is 0 which means ALL deletes are // preserved. Note that this has no effect if DBOptions.preserve_deletes // is set to false. std::atomic preserve_deletes_seqnum_; const bool preserve_deletes_; // Flag to check whether Close() has been called on this DB bool closed_; ErrorHandler error_handler_; // Conditional variable to coordinate installation of atomic flush results. // With atomic flush, each bg thread installs the result of flushing multiple // column families, and different threads can flush different column // families. It's difficult to rely on one thread to perform batch // installation for all threads. This is different from the non-atomic flush // case. // atomic_flush_install_cv_ makes sure that threads install atomic flush // results sequentially. Flush results of memtables with lower IDs get // installed to MANIFEST first. InstrumentedCondVar atomic_flush_install_cv_; bool wal_in_db_path_; }; extern Options SanitizeOptions(const std::string& db, const Options& src); extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src); extern CompressionType GetCompressionFlush( const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options); // Return the earliest log file to keep after the memtable flush is // finalized. // `cfd_to_flush` is the column family whose memtable (specified in // `memtables_to_flush`) will be flushed and thus will not depend on any WAL // file. // The function is only applicable to 2pc mode. extern uint64_t PrecomputeMinLogNumberToKeep( VersionSet* vset, const ColumnFamilyData& cfd_to_flush, autovector edit_list, const autovector& memtables_to_flush, LogsWithPrepTracker* prep_tracker); // `cfd_to_flush` is the column family whose memtable will be flushed and thus // will not depend on any WAL file. nullptr means no memtable is being flushed. // The function is only applicable to 2pc mode. extern uint64_t FindMinPrepLogReferencedByMemTable( VersionSet* vset, const ColumnFamilyData* cfd_to_flush, const autovector& memtables_to_flush); // Fix user-supplied options to be reasonable template static void ClipToRange(T* ptr, V minvalue, V maxvalue) { if (static_cast(*ptr) > maxvalue) *ptr = maxvalue; if (static_cast(*ptr) < minvalue) *ptr = minvalue; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl_compaction_flush.cc000066400000000000000000003625651370372246700226130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_impl/db_impl.h" #include #include "db/builder.h" #include "db/error_handler.h" #include "db/event_helpers.h" #include "file/sst_file_manager_impl.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_util.h" #include "test_util/sync_point.h" #include "util/cast_util.h" #include "util/concurrent_task_limiter_impl.h" namespace ROCKSDB_NAMESPACE { bool DBImpl::EnoughRoomForCompaction( ColumnFamilyData* cfd, const std::vector& inputs, bool* sfm_reserved_compact_space, LogBuffer* log_buffer) { // Check if we have enough room to do the compaction bool enough_room = true; #ifndef ROCKSDB_LITE auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); if (sfm) { // Pass the current bg_error_ to SFM so it can decide what checks to // perform. If this DB instance hasn't seen any error yet, the SFM can be // optimistic and not do disk space checks enough_room = sfm->EnoughRoomForCompaction(cfd, inputs, error_handler_.GetBGError()); if (enough_room) { *sfm_reserved_compact_space = true; } } #else (void)cfd; (void)inputs; (void)sfm_reserved_compact_space; #endif // ROCKSDB_LITE if (!enough_room) { // Just in case tests want to change the value of enough_room TEST_SYNC_POINT_CALLBACK( "DBImpl::BackgroundCompaction():CancelledCompaction", &enough_room); ROCKS_LOG_BUFFER(log_buffer, "Cancelled compaction because not enough room"); RecordTick(stats_, COMPACTION_CANCELLED, 1); } return enough_room; } bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force, std::unique_ptr* token, LogBuffer* log_buffer) { assert(*token == nullptr); auto limiter = static_cast( cfd->ioptions()->compaction_thread_limiter.get()); if (limiter == nullptr) { return true; } *token = limiter->GetToken(force); if (*token != nullptr) { ROCKS_LOG_BUFFER(log_buffer, "Thread limiter [%s] increase [%s] compaction task, " "force: %s, tasks after: %d", limiter->GetName().c_str(), cfd->GetName().c_str(), force ? "true" : "false", limiter->GetOutstandingTask()); return true; } return false; } IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) { TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start"); mutex_.AssertHeld(); autovector logs_to_sync; uint64_t current_log_number = logfile_number_; while (logs_.front().number < current_log_number && logs_.front().getting_synced) { log_sync_cv_.Wait(); } for (auto it = logs_.begin(); it != logs_.end() && it->number < current_log_number; ++it) { auto& log = *it; assert(!log.getting_synced); log.getting_synced = true; logs_to_sync.push_back(log.writer); } IOStatus io_s; if (!logs_to_sync.empty()) { mutex_.Unlock(); for (log::Writer* log : logs_to_sync) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "[JOB %d] Syncing log #%" PRIu64, job_context->job_id, log->get_log_number()); io_s = log->file()->Sync(immutable_db_options_.use_fsync); if (!io_s.ok()) { break; } if (immutable_db_options_.recycle_log_file_num > 0) { io_s = log->Close(); if (!io_s.ok()) { break; } } } if (io_s.ok()) { io_s = directories_.GetWalDir()->Fsync(IOOptions(), nullptr); } mutex_.Lock(); // "number <= current_log_number - 1" is equivalent to // "number < current_log_number". MarkLogsSynced(current_log_number - 1, true, io_s); if (!io_s.ok()) { error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush); TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed"); return io_s; } } return io_s; } Status DBImpl::FlushMemTableToOutputFile( ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, bool* made_progress, JobContext* job_context, SuperVersionContext* superversion_context, std::vector& snapshot_seqs, SequenceNumber earliest_write_conflict_snapshot, SnapshotChecker* snapshot_checker, LogBuffer* log_buffer, Env::Priority thread_pri) { mutex_.AssertHeld(); assert(cfd->imm()->NumNotFlushed() != 0); assert(cfd->imm()->IsFlushPending()); FlushJob flush_job( dbname_, cfd, immutable_db_options_, mutable_cf_options, nullptr /* memtable_id */, file_options_for_compaction_, versions_.get(), &mutex_, &shutting_down_, snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, job_context, log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U), GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_, &event_logger_, mutable_cf_options.report_bg_io_stats, true /* sync_output_directory */, true /* write_manifest */, thread_pri); FileMetaData file_meta; TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"); flush_job.PickMemTable(); TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:AfterPickMemtables"); #ifndef ROCKSDB_LITE // may temporarily unlock and lock the mutex. NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id); #endif // ROCKSDB_LITE Status s; IOStatus io_s; if (logfile_number_ > 0 && versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1) { // If there are more than one column families, we need to make sure that // all the log files except the most recent one are synced. Otherwise if // the host crashes after flushing and before WAL is persistent, the // flushed SST may contain data from write batches whose updates to // other column families are missing. // SyncClosedLogs() may unlock and re-lock the db_mutex. io_s = SyncClosedLogs(job_context); s = io_s; } else { TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip"); } // Within flush_job.Run, rocksdb may call event listener to notify // file creation and deletion. // // Note that flush_job.Run will unlock and lock the db_mutex, // and EventListener callback will be called when the db_mutex // is unlocked by the current thread. if (s.ok()) { s = flush_job.Run(&logs_with_prep_tracker_, &file_meta); } else { flush_job.Cancel(); } io_s = flush_job.io_status(); if (s.ok()) { InstallSuperVersionAndScheduleWork(cfd, superversion_context, mutable_cf_options); if (made_progress) { *made_progress = true; } VersionStorageInfo::LevelSummaryStorage tmp; ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n", cfd->GetName().c_str(), cfd->current()->storage_info()->LevelSummary(&tmp)); } if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) { if (!io_s.ok() && !io_s.IsShutdownInProgress() && !io_s.IsColumnFamilyDropped()) { // Error while writing to MANIFEST. // In fact, versions_->io_status() can also be the result of renaming // CURRENT file. With current code, it's just difficult to tell. So just // be pessimistic and try write to a new MANIFEST. // TODO: distinguish between MANIFEST write and CURRENT renaming auto err_reason = versions_->io_status().ok() ? BackgroundErrorReason::kFlush : BackgroundErrorReason::kManifestWrite; error_handler_.SetBGError(io_s, err_reason); } else { Status new_bg_error = s; error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); } } if (s.ok()) { #ifndef ROCKSDB_LITE // may temporarily unlock and lock the mutex. NotifyOnFlushCompleted(cfd, mutable_cf_options, flush_job.GetCommittedFlushJobsInfo()); auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); if (sfm) { // Notify sst_file_manager that a new file was added std::string file_path = MakeTableFileName( cfd->ioptions()->cf_paths[0].path, file_meta.fd.GetNumber()); sfm->OnAddFile(file_path); if (sfm->IsMaxAllowedSpaceReached()) { Status new_bg_error = Status::SpaceLimit("Max allowed space was reached"); TEST_SYNC_POINT_CALLBACK( "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached", &new_bg_error); error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); } } #endif // ROCKSDB_LITE } TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:Finish"); return s; } Status DBImpl::FlushMemTablesToOutputFiles( const autovector& bg_flush_args, bool* made_progress, JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri) { if (immutable_db_options_.atomic_flush) { return AtomicFlushMemTablesToOutputFiles( bg_flush_args, made_progress, job_context, log_buffer, thread_pri); } std::vector snapshot_seqs; SequenceNumber earliest_write_conflict_snapshot; SnapshotChecker* snapshot_checker; GetSnapshotContext(job_context, &snapshot_seqs, &earliest_write_conflict_snapshot, &snapshot_checker); Status status; for (auto& arg : bg_flush_args) { ColumnFamilyData* cfd = arg.cfd_; MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions(); SuperVersionContext* superversion_context = arg.superversion_context_; Status s = FlushMemTableToOutputFile( cfd, mutable_cf_options, made_progress, job_context, superversion_context, snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, log_buffer, thread_pri); if (!s.ok()) { status = s; if (!s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) { // At this point, DB is not shutting down, nor is cfd dropped. // Something is wrong, thus we break out of the loop. break; } } } return status; } /* * Atomically flushes multiple column families. * * For each column family, all memtables with ID smaller than or equal to the * ID specified in bg_flush_args will be flushed. Only after all column * families finish flush will this function commit to MANIFEST. If any of the * column families are not flushed successfully, this function does not have * any side-effect on the state of the database. */ Status DBImpl::AtomicFlushMemTablesToOutputFiles( const autovector& bg_flush_args, bool* made_progress, JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri) { mutex_.AssertHeld(); autovector cfds; for (const auto& arg : bg_flush_args) { cfds.emplace_back(arg.cfd_); } #ifndef NDEBUG for (const auto cfd : cfds) { assert(cfd->imm()->NumNotFlushed() != 0); assert(cfd->imm()->IsFlushPending()); } #endif /* !NDEBUG */ std::vector snapshot_seqs; SequenceNumber earliest_write_conflict_snapshot; SnapshotChecker* snapshot_checker; GetSnapshotContext(job_context, &snapshot_seqs, &earliest_write_conflict_snapshot, &snapshot_checker); autovector distinct_output_dirs; autovector distinct_output_dir_paths; std::vector> jobs; std::vector all_mutable_cf_options; int num_cfs = static_cast(cfds.size()); all_mutable_cf_options.reserve(num_cfs); for (int i = 0; i < num_cfs; ++i) { auto cfd = cfds[i]; FSDirectory* data_dir = GetDataDir(cfd, 0U); const std::string& curr_path = cfd->ioptions()->cf_paths[0].path; // Add to distinct output directories if eligible. Use linear search. Since // the number of elements in the vector is not large, performance should be // tolerable. bool found = false; for (const auto& path : distinct_output_dir_paths) { if (path == curr_path) { found = true; break; } } if (!found) { distinct_output_dir_paths.emplace_back(curr_path); distinct_output_dirs.emplace_back(data_dir); } all_mutable_cf_options.emplace_back(*cfd->GetLatestMutableCFOptions()); const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.back(); const uint64_t* max_memtable_id = &(bg_flush_args[i].max_memtable_id_); jobs.emplace_back(new FlushJob( dbname_, cfd, immutable_db_options_, mutable_cf_options, max_memtable_id, file_options_for_compaction_, versions_.get(), &mutex_, &shutting_down_, snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, job_context, log_buffer, directories_.GetDbDir(), data_dir, GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_, &event_logger_, mutable_cf_options.report_bg_io_stats, false /* sync_output_directory */, false /* write_manifest */, thread_pri)); jobs.back()->PickMemTable(); } std::vector file_meta(num_cfs); Status s; IOStatus io_s; assert(num_cfs == static_cast(jobs.size())); #ifndef ROCKSDB_LITE for (int i = 0; i != num_cfs; ++i) { const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.at(i); // may temporarily unlock and lock the mutex. NotifyOnFlushBegin(cfds[i], &file_meta[i], mutable_cf_options, job_context->job_id); } #endif /* !ROCKSDB_LITE */ if (logfile_number_ > 0) { // TODO (yanqin) investigate whether we should sync the closed logs for // single column family case. io_s = SyncClosedLogs(job_context); s = io_s; } // exec_status stores the execution status of flush_jobs as // autovector> exec_status; autovector io_status; for (int i = 0; i != num_cfs; ++i) { // Initially all jobs are not executed, with status OK. exec_status.emplace_back(false, Status::OK()); io_status.emplace_back(IOStatus::OK()); } if (s.ok()) { // TODO (yanqin): parallelize jobs with threads. for (int i = 1; i != num_cfs; ++i) { exec_status[i].second = jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i]); exec_status[i].first = true; io_status[i] = jobs[i]->io_status(); } if (num_cfs > 1) { TEST_SYNC_POINT( "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:1"); TEST_SYNC_POINT( "DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:2"); } assert(exec_status.size() > 0); assert(!file_meta.empty()); exec_status[0].second = jobs[0]->Run(&logs_with_prep_tracker_, &file_meta[0]); exec_status[0].first = true; io_status[0] = jobs[0]->io_status(); Status error_status; for (const auto& e : exec_status) { if (!e.second.ok()) { s = e.second; if (!e.second.IsShutdownInProgress() && !e.second.IsColumnFamilyDropped()) { // If a flush job did not return OK, and the CF is not dropped, and // the DB is not shutting down, then we have to return this result to // caller later. error_status = e.second; } } } s = error_status.ok() ? s : error_status; } if (io_s.ok()) { IOStatus io_error = IOStatus::OK(); for (int i = 0; i != static_cast(io_status.size()); i++) { if (!io_status[i].ok() && !io_status[i].IsShutdownInProgress() && !io_status[i].IsColumnFamilyDropped()) { io_error = io_status[i]; } } io_s = io_error; if (s.ok() && !io_s.ok()) { s = io_s; } } if (s.IsColumnFamilyDropped()) { s = Status::OK(); } if (s.ok() || s.IsShutdownInProgress()) { // Sync on all distinct output directories. for (auto dir : distinct_output_dirs) { if (dir != nullptr) { Status error_status = dir->Fsync(IOOptions(), nullptr); if (!error_status.ok()) { s = error_status; break; } } } } else { // Need to undo atomic flush if something went wrong, i.e. s is not OK and // it is not because of CF drop. // Have to cancel the flush jobs that have NOT executed because we need to // unref the versions. for (int i = 0; i != num_cfs; ++i) { if (!exec_status[i].first) { jobs[i]->Cancel(); } } for (int i = 0; i != num_cfs; ++i) { if (exec_status[i].first && exec_status[i].second.ok()) { auto& mems = jobs[i]->GetMemTables(); cfds[i]->imm()->RollbackMemtableFlush(mems, file_meta[i].fd.GetNumber()); } } } if (s.ok()) { auto wait_to_install_func = [&]() { bool ready = true; for (size_t i = 0; i != cfds.size(); ++i) { const auto& mems = jobs[i]->GetMemTables(); if (cfds[i]->IsDropped()) { // If the column family is dropped, then do not wait. continue; } else if (!mems.empty() && cfds[i]->imm()->GetEarliestMemTableID() < mems[0]->GetID()) { // If a flush job needs to install the flush result for mems and // mems[0] is not the earliest memtable, it means another thread must // be installing flush results for the same column family, then the // current thread needs to wait. ready = false; break; } else if (mems.empty() && cfds[i]->imm()->GetEarliestMemTableID() <= bg_flush_args[i].max_memtable_id_) { // If a flush job does not need to install flush results, then it has // to wait until all memtables up to max_memtable_id_ (inclusive) are // installed. ready = false; break; } } return ready; }; bool resuming_from_bg_err = error_handler_.IsDBStopped(); while ((!error_handler_.IsDBStopped() || error_handler_.GetRecoveryError().ok()) && !wait_to_install_func()) { atomic_flush_install_cv_.Wait(); } s = resuming_from_bg_err ? error_handler_.GetRecoveryError() : error_handler_.GetBGError(); } if (s.ok()) { autovector tmp_cfds; autovector*> mems_list; autovector mutable_cf_options_list; autovector tmp_file_meta; for (int i = 0; i != num_cfs; ++i) { const auto& mems = jobs[i]->GetMemTables(); if (!cfds[i]->IsDropped() && !mems.empty()) { tmp_cfds.emplace_back(cfds[i]); mems_list.emplace_back(&mems); mutable_cf_options_list.emplace_back(&all_mutable_cf_options[i]); tmp_file_meta.emplace_back(&file_meta[i]); } } s = InstallMemtableAtomicFlushResults( nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list, versions_.get(), &mutex_, tmp_file_meta, &job_context->memtables_to_free, directories_.GetDbDir(), log_buffer); } if (s.ok()) { assert(num_cfs == static_cast(job_context->superversion_contexts.size())); for (int i = 0; i != num_cfs; ++i) { if (cfds[i]->IsDropped()) { continue; } InstallSuperVersionAndScheduleWork(cfds[i], &job_context->superversion_contexts[i], all_mutable_cf_options[i]); VersionStorageInfo::LevelSummaryStorage tmp; ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n", cfds[i]->GetName().c_str(), cfds[i]->current()->storage_info()->LevelSummary(&tmp)); } if (made_progress) { *made_progress = true; } #ifndef ROCKSDB_LITE auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); assert(all_mutable_cf_options.size() == static_cast(num_cfs)); for (int i = 0; i != num_cfs; ++i) { if (cfds[i]->IsDropped()) { continue; } NotifyOnFlushCompleted(cfds[i], all_mutable_cf_options[i], jobs[i]->GetCommittedFlushJobsInfo()); if (sfm) { std::string file_path = MakeTableFileName( cfds[i]->ioptions()->cf_paths[0].path, file_meta[i].fd.GetNumber()); sfm->OnAddFile(file_path); if (sfm->IsMaxAllowedSpaceReached() && error_handler_.GetBGError().ok()) { Status new_bg_error = Status::SpaceLimit("Max allowed space was reached"); error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); } } } #endif // ROCKSDB_LITE } // Need to undo atomic flush if something went wrong, i.e. s is not OK and // it is not because of CF drop. if (!s.ok() && !s.IsColumnFamilyDropped()) { if (!io_s.ok() && !io_s.IsColumnFamilyDropped()) { // Error while writing to MANIFEST. // In fact, versions_->io_status() can also be the result of renaming // CURRENT file. With current code, it's just difficult to tell. So just // be pessimistic and try write to a new MANIFEST. // TODO: distinguish between MANIFEST write and CURRENT renaming auto err_reason = versions_->io_status().ok() ? BackgroundErrorReason::kFlush : BackgroundErrorReason::kManifestWrite; error_handler_.SetBGError(io_s, err_reason); } else { Status new_bg_error = s; error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); } } return s; } void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta, const MutableCFOptions& mutable_cf_options, int job_id) { #ifndef ROCKSDB_LITE if (immutable_db_options_.listeners.size() == 0U) { return; } mutex_.AssertHeld(); if (shutting_down_.load(std::memory_order_acquire)) { return; } bool triggered_writes_slowdown = (cfd->current()->storage_info()->NumLevelFiles(0) >= mutable_cf_options.level0_slowdown_writes_trigger); bool triggered_writes_stop = (cfd->current()->storage_info()->NumLevelFiles(0) >= mutable_cf_options.level0_stop_writes_trigger); // release lock while notifying events mutex_.Unlock(); { FlushJobInfo info{}; info.cf_id = cfd->GetID(); info.cf_name = cfd->GetName(); // TODO(yhchiang): make db_paths dynamic in case flush does not // go to L0 in the future. const uint64_t file_number = file_meta->fd.GetNumber(); info.file_path = MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_number); info.file_number = file_number; info.thread_id = env_->GetThreadID(); info.job_id = job_id; info.triggered_writes_slowdown = triggered_writes_slowdown; info.triggered_writes_stop = triggered_writes_stop; info.smallest_seqno = file_meta->fd.smallest_seqno; info.largest_seqno = file_meta->fd.largest_seqno; info.flush_reason = cfd->GetFlushReason(); for (auto listener : immutable_db_options_.listeners) { listener->OnFlushBegin(this, info); } } mutex_.Lock(); // no need to signal bg_cv_ as it will be signaled at the end of the // flush process. #else (void)cfd; (void)file_meta; (void)mutable_cf_options; (void)job_id; #endif // ROCKSDB_LITE } void DBImpl::NotifyOnFlushCompleted( ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, std::list>* flush_jobs_info) { #ifndef ROCKSDB_LITE assert(flush_jobs_info != nullptr); if (immutable_db_options_.listeners.size() == 0U) { return; } mutex_.AssertHeld(); if (shutting_down_.load(std::memory_order_acquire)) { return; } bool triggered_writes_slowdown = (cfd->current()->storage_info()->NumLevelFiles(0) >= mutable_cf_options.level0_slowdown_writes_trigger); bool triggered_writes_stop = (cfd->current()->storage_info()->NumLevelFiles(0) >= mutable_cf_options.level0_stop_writes_trigger); // release lock while notifying events mutex_.Unlock(); { for (auto& info : *flush_jobs_info) { info->triggered_writes_slowdown = triggered_writes_slowdown; info->triggered_writes_stop = triggered_writes_stop; for (auto listener : immutable_db_options_.listeners) { listener->OnFlushCompleted(this, *info); } } flush_jobs_info->clear(); } mutex_.Lock(); // no need to signal bg_cv_ as it will be signaled at the end of the // flush process. #else (void)cfd; (void)mutable_cf_options; (void)flush_jobs_info; #endif // ROCKSDB_LITE } Status DBImpl::CompactRange(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) { auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); if (options.target_path_id >= cfd->ioptions()->cf_paths.size()) { return Status::InvalidArgument("Invalid target path ID"); } bool flush_needed = true; if (begin != nullptr && end != nullptr) { // TODO(ajkr): We could also optimize away the flush in certain cases where // one/both sides of the interval are unbounded. But it requires more // changes to RangesOverlapWithMemtables. Range range(*begin, *end); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); cfd->RangesOverlapWithMemtables({range}, super_version, &flush_needed); CleanupSuperVersion(super_version); } Status s; if (flush_needed) { FlushOptions fo; fo.allow_write_stall = options.allow_write_stall; if (immutable_db_options_.atomic_flush) { autovector cfds; mutex_.Lock(); SelectColumnFamiliesForAtomicFlush(&cfds); mutex_.Unlock(); s = AtomicFlushMemTables(cfds, fo, FlushReason::kManualCompaction, false /* writes_stopped */); } else { s = FlushMemTable(cfd, fo, FlushReason::kManualCompaction, false /* writes_stopped*/); } if (!s.ok()) { LogFlush(immutable_db_options_.info_log); return s; } } constexpr int kInvalidLevel = -1; int final_output_level = kInvalidLevel; bool exclusive = options.exclusive_manual_compaction; if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal && cfd->NumberLevels() > 1) { // Always compact all files together. final_output_level = cfd->NumberLevels() - 1; // if bottom most level is reserved if (immutable_db_options_.allow_ingest_behind) { final_output_level--; } s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels, final_output_level, options, begin, end, exclusive, false, port::kMaxUint64); } else { int first_overlapped_level = kInvalidLevel; int max_overlapped_level = kInvalidLevel; { SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); Version* current_version = super_version->current; ReadOptions ro; ro.total_order_seek = true; bool overlap; for (int level = 0; level < current_version->storage_info()->num_non_empty_levels(); level++) { overlap = true; if (begin != nullptr && end != nullptr) { Status status = current_version->OverlapWithLevelIterator( ro, file_options_, *begin, *end, level, &overlap); if (!status.ok()) { overlap = current_version->storage_info()->OverlapInLevel( level, begin, end); } } else { overlap = current_version->storage_info()->OverlapInLevel(level, begin, end); } if (overlap) { if (first_overlapped_level == kInvalidLevel) { first_overlapped_level = level; } max_overlapped_level = level; } } CleanupSuperVersion(super_version); } if (s.ok() && first_overlapped_level != kInvalidLevel) { // max_file_num_to_ignore can be used to filter out newly created SST // files, useful for bottom level compaction in a manual compaction uint64_t max_file_num_to_ignore = port::kMaxUint64; uint64_t next_file_number = versions_->current_next_file_number(); final_output_level = max_overlapped_level; int output_level; for (int level = first_overlapped_level; level <= max_overlapped_level; level++) { // in case the compaction is universal or if we're compacting the // bottom-most level, the output level will be the same as input one. // level 0 can never be the bottommost level (i.e. if all files are in // level 0, we will compact to level 1) if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal || cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { output_level = level; } else if (level == max_overlapped_level && level > 0) { if (options.bottommost_level_compaction == BottommostLevelCompaction::kSkip) { // Skip bottommost level compaction continue; } else if (options.bottommost_level_compaction == BottommostLevelCompaction::kIfHaveCompactionFilter && cfd->ioptions()->compaction_filter == nullptr && cfd->ioptions()->compaction_filter_factory == nullptr) { // Skip bottommost level compaction since we don't have a compaction // filter continue; } output_level = level; // update max_file_num_to_ignore only for bottom level compaction // because data in newly compacted files in middle levels may still // need to be pushed down max_file_num_to_ignore = next_file_number; } else { output_level = level + 1; if (cfd->ioptions()->compaction_style == kCompactionStyleLevel && cfd->ioptions()->level_compaction_dynamic_level_bytes && level == 0) { output_level = ColumnFamilyData::kCompactToBaseLevel; } } s = RunManualCompaction(cfd, level, output_level, options, begin, end, exclusive, false, max_file_num_to_ignore); if (!s.ok()) { break; } if (output_level == ColumnFamilyData::kCompactToBaseLevel) { final_output_level = cfd->NumberLevels() - 1; } else if (output_level > final_output_level) { final_output_level = output_level; } TEST_SYNC_POINT("DBImpl::RunManualCompaction()::1"); TEST_SYNC_POINT("DBImpl::RunManualCompaction()::2"); } } } if (!s.ok() || final_output_level == kInvalidLevel) { LogFlush(immutable_db_options_.info_log); return s; } if (options.change_level) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "[RefitLevel] waiting for background threads to stop"); s = PauseBackgroundWork(); if (s.ok()) { s = ReFitLevel(cfd, final_output_level, options.target_level); } ContinueBackgroundWork(); } LogFlush(immutable_db_options_.info_log); { InstrumentedMutexLock l(&mutex_); // an automatic compaction that has been scheduled might have been // preempted by the manual compactions. Need to schedule it back. MaybeScheduleFlushOrCompaction(); } return s; } Status DBImpl::CompactFiles(const CompactionOptions& compact_options, ColumnFamilyHandle* column_family, const std::vector& input_file_names, const int output_level, const int output_path_id, std::vector* const output_file_names, CompactionJobInfo* compaction_job_info) { #ifdef ROCKSDB_LITE (void)compact_options; (void)column_family; (void)input_file_names; (void)output_level; (void)output_path_id; (void)output_file_names; (void)compaction_job_info; // not supported in lite version return Status::NotSupported("Not supported in ROCKSDB LITE"); #else if (column_family == nullptr) { return Status::InvalidArgument("ColumnFamilyHandle must be non-null."); } auto cfd = reinterpret_cast(column_family)->cfd(); assert(cfd); Status s; JobContext job_context(0, true); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, immutable_db_options_.info_log.get()); // Perform CompactFiles TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile2"); { InstrumentedMutexLock l(&mutex_); // This call will unlock/lock the mutex to wait for current running // IngestExternalFile() calls to finish. WaitForIngestFile(); // We need to get current after `WaitForIngestFile`, because // `IngestExternalFile` may add files that overlap with `input_file_names` auto* current = cfd->current(); current->Ref(); s = CompactFilesImpl(compact_options, cfd, current, input_file_names, output_file_names, output_level, output_path_id, &job_context, &log_buffer, compaction_job_info); current->Unref(); } // Find and delete obsolete files { InstrumentedMutexLock l(&mutex_); // If !s.ok(), this means that Compaction failed. In that case, we want // to delete all obsolete files we might have created and we force // FindObsoleteFiles(). This is because job_context does not // catch all created files if compaction failed. FindObsoleteFiles(&job_context, !s.ok()); } // release the mutex // delete unnecessary files if any, this is done outside the mutex if (job_context.HaveSomethingToClean() || job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) { // Have to flush the info logs before bg_compaction_scheduled_-- // because if bg_flush_scheduled_ becomes 0 and the lock is // released, the deconstructor of DB can kick in and destroy all the // states of DB so info_log might not be available after that point. // It also applies to access other states that DB owns. log_buffer.FlushBufferToLog(); if (job_context.HaveSomethingToDelete()) { // no mutex is locked here. No need to Unlock() and Lock() here. PurgeObsoleteFiles(job_context); } job_context.Clean(); } return s; #endif // ROCKSDB_LITE } #ifndef ROCKSDB_LITE Status DBImpl::CompactFilesImpl( const CompactionOptions& compact_options, ColumnFamilyData* cfd, Version* version, const std::vector& input_file_names, std::vector* const output_file_names, const int output_level, int output_path_id, JobContext* job_context, LogBuffer* log_buffer, CompactionJobInfo* compaction_job_info) { mutex_.AssertHeld(); if (shutting_down_.load(std::memory_order_acquire)) { return Status::ShutdownInProgress(); } if (manual_compaction_paused_.load(std::memory_order_acquire)) { return Status::Incomplete(Status::SubCode::kManualCompactionPaused); } std::unordered_set input_set; for (const auto& file_name : input_file_names) { input_set.insert(TableFileNameToNumber(file_name)); } ColumnFamilyMetaData cf_meta; // TODO(yhchiang): can directly use version here if none of the // following functions call is pluggable to external developers. version->GetColumnFamilyMetaData(&cf_meta); if (output_path_id < 0) { if (cfd->ioptions()->cf_paths.size() == 1U) { output_path_id = 0; } else { return Status::NotSupported( "Automatic output path selection is not " "yet supported in CompactFiles()"); } } Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles( &input_set, cf_meta, output_level); if (!s.ok()) { return s; } std::vector input_files; s = cfd->compaction_picker()->GetCompactionInputsFromFileNumbers( &input_files, &input_set, version->storage_info(), compact_options); if (!s.ok()) { return s; } for (const auto& inputs : input_files) { if (cfd->compaction_picker()->AreFilesInCompaction(inputs.files)) { return Status::Aborted( "Some of the necessary compaction input " "files are already being compacted"); } } bool sfm_reserved_compact_space = false; // First check if we have enough room to do the compaction bool enough_room = EnoughRoomForCompaction( cfd, input_files, &sfm_reserved_compact_space, log_buffer); if (!enough_room) { // m's vars will get set properly at the end of this function, // as long as status == CompactionTooLarge return Status::CompactionTooLarge(); } // At this point, CompactFiles will be run. bg_compaction_scheduled_++; std::unique_ptr c; assert(cfd->compaction_picker()); c.reset(cfd->compaction_picker()->CompactFiles( compact_options, input_files, output_level, version->storage_info(), *cfd->GetLatestMutableCFOptions(), output_path_id)); // we already sanitized the set of input files and checked for conflicts // without releasing the lock, so we're guaranteed a compaction can be formed. assert(c != nullptr); c->SetInputVersion(version); // deletion compaction currently not allowed in CompactFiles. assert(!c->deletion_compaction()); std::vector snapshot_seqs; SequenceNumber earliest_write_conflict_snapshot; SnapshotChecker* snapshot_checker; GetSnapshotContext(job_context, &snapshot_seqs, &earliest_write_conflict_snapshot, &snapshot_checker); std::unique_ptr::iterator> pending_outputs_inserted_elem( new std::list::iterator( CaptureCurrentFileNumberInPendingOutputs())); assert(is_snapshot_supported_ || snapshots_.empty()); CompactionJobStats compaction_job_stats; CompactionJob compaction_job( job_context->job_id, c.get(), immutable_db_options_, file_options_for_compaction_, versions_.get(), &shutting_down_, preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(), GetDataDir(c->column_family_data(), c->output_path_id()), stats_, &mutex_, &error_handler_, snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, table_cache_, &event_logger_, c->mutable_cf_options()->paranoid_file_checks, c->mutable_cf_options()->report_bg_io_stats, dbname_, &compaction_job_stats, Env::Priority::USER, &manual_compaction_paused_); // Creating a compaction influences the compaction score because the score // takes running compactions into account (by skipping files that are already // being compacted). Since we just changed compaction score, we recalculate it // here. version->storage_info()->ComputeCompactionScore(*cfd->ioptions(), *c->mutable_cf_options()); compaction_job.Prepare(); mutex_.Unlock(); TEST_SYNC_POINT("CompactFilesImpl:0"); TEST_SYNC_POINT("CompactFilesImpl:1"); compaction_job.Run(); TEST_SYNC_POINT("CompactFilesImpl:2"); TEST_SYNC_POINT("CompactFilesImpl:3"); mutex_.Lock(); Status status = compaction_job.Install(*c->mutable_cf_options()); if (status.ok()) { InstallSuperVersionAndScheduleWork(c->column_family_data(), &job_context->superversion_contexts[0], *c->mutable_cf_options()); } c->ReleaseCompactionFiles(s); #ifndef ROCKSDB_LITE // Need to make sure SstFileManager does its bookkeeping auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); if (sfm && sfm_reserved_compact_space) { sfm->OnCompactionCompletion(c.get()); } #endif // ROCKSDB_LITE ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); if (compaction_job_info != nullptr) { BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats, job_context->job_id, version, compaction_job_info); } if (status.ok()) { // Done } else if (status.IsColumnFamilyDropped() || status.IsShutdownInProgress()) { // Ignore compaction errors found during shutting down } else if (status.IsManualCompactionPaused()) { // Don't report stopping manual compaction as error ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] [JOB %d] Stopping manual compaction", c->column_family_data()->GetName().c_str(), job_context->job_id); } else { ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] [JOB %d] Compaction error: %s", c->column_family_data()->GetName().c_str(), job_context->job_id, status.ToString().c_str()); error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction); } if (output_file_names != nullptr) { for (const auto& newf : c->edit()->GetNewFiles()) { (*output_file_names) .push_back(TableFileName(c->immutable_cf_options()->cf_paths, newf.second.fd.GetNumber(), newf.second.fd.GetPathId())); } } c.reset(); bg_compaction_scheduled_--; if (bg_compaction_scheduled_ == 0) { bg_cv_.SignalAll(); } MaybeScheduleFlushOrCompaction(); TEST_SYNC_POINT("CompactFilesImpl:End"); return status; } #endif // ROCKSDB_LITE Status DBImpl::PauseBackgroundWork() { InstrumentedMutexLock guard_lock(&mutex_); bg_compaction_paused_++; while (bg_bottom_compaction_scheduled_ > 0 || bg_compaction_scheduled_ > 0 || bg_flush_scheduled_ > 0) { bg_cv_.Wait(); } bg_work_paused_++; return Status::OK(); } Status DBImpl::ContinueBackgroundWork() { InstrumentedMutexLock guard_lock(&mutex_); if (bg_work_paused_ == 0) { return Status::InvalidArgument(); } assert(bg_work_paused_ > 0); assert(bg_compaction_paused_ > 0); bg_compaction_paused_--; bg_work_paused_--; // It's sufficient to check just bg_work_paused_ here since // bg_work_paused_ is always no greater than bg_compaction_paused_ if (bg_work_paused_ == 0) { MaybeScheduleFlushOrCompaction(); } return Status::OK(); } void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& job_stats, int job_id) { #ifndef ROCKSDB_LITE if (immutable_db_options_.listeners.empty()) { return; } mutex_.AssertHeld(); if (shutting_down_.load(std::memory_order_acquire)) { return; } if (c->is_manual_compaction() && manual_compaction_paused_.load(std::memory_order_acquire)) { return; } Version* current = cfd->current(); current->Ref(); // release lock while notifying events mutex_.Unlock(); TEST_SYNC_POINT("DBImpl::NotifyOnCompactionBegin::UnlockMutex"); { CompactionJobInfo info{}; info.cf_name = cfd->GetName(); info.status = st; info.thread_id = env_->GetThreadID(); info.job_id = job_id; info.base_input_level = c->start_level(); info.output_level = c->output_level(); info.stats = job_stats; info.table_properties = c->GetOutputTableProperties(); info.compaction_reason = c->compaction_reason(); info.compression = c->output_compression(); for (size_t i = 0; i < c->num_input_levels(); ++i) { for (const auto fmd : *c->inputs(i)) { const FileDescriptor& desc = fmd->fd; const uint64_t file_number = desc.GetNumber(); auto fn = TableFileName(c->immutable_cf_options()->cf_paths, file_number, desc.GetPathId()); info.input_files.push_back(fn); info.input_file_infos.push_back(CompactionFileInfo{ static_cast(i), file_number, fmd->oldest_blob_file_number}); if (info.table_properties.count(fn) == 0) { std::shared_ptr tp; auto s = current->GetTableProperties(&tp, fmd, &fn); if (s.ok()) { info.table_properties[fn] = tp; } } } } for (const auto& newf : c->edit()->GetNewFiles()) { const FileMetaData& meta = newf.second; const FileDescriptor& desc = meta.fd; const uint64_t file_number = desc.GetNumber(); info.output_files.push_back(TableFileName( c->immutable_cf_options()->cf_paths, file_number, desc.GetPathId())); info.output_file_infos.push_back(CompactionFileInfo{ newf.first, file_number, meta.oldest_blob_file_number}); } for (auto listener : immutable_db_options_.listeners) { listener->OnCompactionBegin(this, info); } } mutex_.Lock(); current->Unref(); #else (void)cfd; (void)c; (void)st; (void)job_stats; (void)job_id; #endif // ROCKSDB_LITE } void DBImpl::NotifyOnCompactionCompleted( ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& compaction_job_stats, const int job_id) { #ifndef ROCKSDB_LITE if (immutable_db_options_.listeners.size() == 0U) { return; } mutex_.AssertHeld(); if (shutting_down_.load(std::memory_order_acquire)) { return; } if (c->is_manual_compaction() && manual_compaction_paused_.load(std::memory_order_acquire)) { return; } Version* current = cfd->current(); current->Ref(); // release lock while notifying events mutex_.Unlock(); TEST_SYNC_POINT("DBImpl::NotifyOnCompactionCompleted::UnlockMutex"); { CompactionJobInfo info{}; BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id, current, &info); for (auto listener : immutable_db_options_.listeners) { listener->OnCompactionCompleted(this, info); } } mutex_.Lock(); current->Unref(); // no need to signal bg_cv_ as it will be signaled at the end of the // flush process. #else (void)cfd; (void)c; (void)st; (void)compaction_job_stats; (void)job_id; #endif // ROCKSDB_LITE } // REQUIREMENT: block all background work by calling PauseBackgroundWork() // before calling this function Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { assert(level < cfd->NumberLevels()); if (target_level >= cfd->NumberLevels()) { return Status::InvalidArgument("Target level exceeds number of levels"); } SuperVersionContext sv_context(/* create_superversion */ true); Status status; InstrumentedMutexLock guard_lock(&mutex_); // only allow one thread refitting if (refitting_level_) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "[ReFitLevel] another thread is refitting"); return Status::NotSupported("another thread is refitting"); } refitting_level_ = true; const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions(); // move to a smaller level int to_level = target_level; if (target_level < 0) { to_level = FindMinimumEmptyLevelFitting(cfd, mutable_cf_options, level); } auto* vstorage = cfd->current()->storage_info(); if (to_level > level) { if (level == 0) { return Status::NotSupported( "Cannot change from level 0 to other levels."); } // Check levels are empty for a trivial move for (int l = level + 1; l <= to_level; l++) { if (vstorage->NumLevelFiles(l) > 0) { return Status::NotSupported( "Levels between source and target are not empty for a move."); } } } if (to_level != level) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Before refitting:\n%s", cfd->GetName().c_str(), cfd->current()->DebugString().data()); VersionEdit edit; edit.SetColumnFamily(cfd->GetID()); for (const auto& f : vstorage->LevelFiles(level)) { edit.DeleteFile(level, f->fd.GetNumber()); edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno, f->marked_for_compaction, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->file_checksum, f->file_checksum_func_name); } ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Apply version edit:\n%s", cfd->GetName().c_str(), edit.DebugString().data()); status = versions_->LogAndApply(cfd, mutable_cf_options, &edit, &mutex_, directories_.GetDbDir()); InstallSuperVersionAndScheduleWork(cfd, &sv_context, mutable_cf_options); ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] LogAndApply: %s\n", cfd->GetName().c_str(), status.ToString().data()); if (status.ok()) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] After refitting:\n%s", cfd->GetName().c_str(), cfd->current()->DebugString().data()); } } sv_context.Clean(); refitting_level_ = false; return status; } int DBImpl::NumberLevels(ColumnFamilyHandle* column_family) { auto cfh = reinterpret_cast(column_family); return cfh->cfd()->NumberLevels(); } int DBImpl::MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) { return 0; } int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) { auto cfh = reinterpret_cast(column_family); InstrumentedMutexLock l(&mutex_); return cfh->cfd() ->GetSuperVersion() ->mutable_cf_options.level0_stop_writes_trigger; } Status DBImpl::Flush(const FlushOptions& flush_options, ColumnFamilyHandle* column_family) { auto cfh = reinterpret_cast(column_family); ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual flush start.", cfh->GetName().c_str()); Status s; if (immutable_db_options_.atomic_flush) { s = AtomicFlushMemTables({cfh->cfd()}, flush_options, FlushReason::kManualFlush); } else { s = FlushMemTable(cfh->cfd(), flush_options, FlushReason::kManualFlush); } ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual flush finished, status: %s\n", cfh->GetName().c_str(), s.ToString().c_str()); return s; } Status DBImpl::Flush(const FlushOptions& flush_options, const std::vector& column_families) { Status s; if (!immutable_db_options_.atomic_flush) { for (auto cfh : column_families) { s = Flush(flush_options, cfh); if (!s.ok()) { break; } } } else { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Manual atomic flush start.\n" "=====Column families:====="); for (auto cfh : column_families) { auto cfhi = static_cast(cfh); ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s", cfhi->GetName().c_str()); } ROCKS_LOG_INFO(immutable_db_options_.info_log, "=====End of column families list====="); autovector cfds; std::for_each(column_families.begin(), column_families.end(), [&cfds](ColumnFamilyHandle* elem) { auto cfh = static_cast(elem); cfds.emplace_back(cfh->cfd()); }); s = AtomicFlushMemTables(cfds, flush_options, FlushReason::kManualFlush); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Manual atomic flush finished, status: %s\n" "=====Column families:=====", s.ToString().c_str()); for (auto cfh : column_families) { auto cfhi = static_cast(cfh); ROCKS_LOG_INFO(immutable_db_options_.info_log, "%s", cfhi->GetName().c_str()); } ROCKS_LOG_INFO(immutable_db_options_.info_log, "=====End of column families list====="); } return s; } Status DBImpl::RunManualCompaction( ColumnFamilyData* cfd, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const Slice* begin, const Slice* end, bool exclusive, bool disallow_trivial_move, uint64_t max_file_num_to_ignore) { assert(input_level == ColumnFamilyData::kCompactAllLevels || input_level >= 0); InternalKey begin_storage, end_storage; CompactionArg* ca; bool scheduled = false; bool manual_conflict = false; ManualCompactionState manual; manual.cfd = cfd; manual.input_level = input_level; manual.output_level = output_level; manual.output_path_id = compact_range_options.target_path_id; manual.done = false; manual.in_progress = false; manual.incomplete = false; manual.exclusive = exclusive; manual.disallow_trivial_move = disallow_trivial_move; // For universal compaction, we enforce every manual compaction to compact // all files. if (begin == nullptr || cfd->ioptions()->compaction_style == kCompactionStyleUniversal || cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { manual.begin = nullptr; } else { begin_storage.SetMinPossibleForUserKey(*begin); manual.begin = &begin_storage; } if (end == nullptr || cfd->ioptions()->compaction_style == kCompactionStyleUniversal || cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { manual.end = nullptr; } else { end_storage.SetMaxPossibleForUserKey(*end); manual.end = &end_storage; } TEST_SYNC_POINT("DBImpl::RunManualCompaction:0"); TEST_SYNC_POINT("DBImpl::RunManualCompaction:1"); InstrumentedMutexLock l(&mutex_); // When a manual compaction arrives, temporarily disable scheduling of // non-manual compactions and wait until the number of scheduled compaction // jobs drops to zero. This is needed to ensure that this manual compaction // can compact any range of keys/files. // // HasPendingManualCompaction() is true when at least one thread is inside // RunManualCompaction(), i.e. during that time no other compaction will // get scheduled (see MaybeScheduleFlushOrCompaction). // // Note that the following loop doesn't stop more that one thread calling // RunManualCompaction() from getting to the second while loop below. // However, only one of them will actually schedule compaction, while // others will wait on a condition variable until it completes. AddManualCompaction(&manual); TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_); if (exclusive) { while (bg_bottom_compaction_scheduled_ > 0 || bg_compaction_scheduled_ > 0) { TEST_SYNC_POINT("DBImpl::RunManualCompaction:WaitScheduled"); ROCKS_LOG_INFO( immutable_db_options_.info_log, "[%s] Manual compaction waiting for all other scheduled background " "compactions to finish", cfd->GetName().c_str()); bg_cv_.Wait(); } } ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual compaction starting", cfd->GetName().c_str()); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, immutable_db_options_.info_log.get()); // We don't check bg_error_ here, because if we get the error in compaction, // the compaction will set manual.status to bg_error_ and set manual.done to // true. while (!manual.done) { assert(HasPendingManualCompaction()); manual_conflict = false; Compaction* compaction = nullptr; if (ShouldntRunManualCompaction(&manual) || (manual.in_progress == true) || scheduled || (((manual.manual_end = &manual.tmp_storage1) != nullptr) && ((compaction = manual.cfd->CompactRange( *manual.cfd->GetLatestMutableCFOptions(), manual.input_level, manual.output_level, compact_range_options, manual.begin, manual.end, &manual.manual_end, &manual_conflict, max_file_num_to_ignore)) == nullptr && manual_conflict))) { // exclusive manual compactions should not see a conflict during // CompactRange assert(!exclusive || !manual_conflict); // Running either this or some other manual compaction bg_cv_.Wait(); if (scheduled && manual.incomplete == true) { assert(!manual.in_progress); scheduled = false; manual.incomplete = false; } } else if (!scheduled) { if (compaction == nullptr) { manual.done = true; bg_cv_.SignalAll(); continue; } ca = new CompactionArg; ca->db = this; ca->prepicked_compaction = new PrepickedCompaction; ca->prepicked_compaction->manual_compaction_state = &manual; ca->prepicked_compaction->compaction = compaction; if (!RequestCompactionToken( cfd, true, &ca->prepicked_compaction->task_token, &log_buffer)) { // Don't throttle manual compaction, only count outstanding tasks. assert(false); } manual.incomplete = false; bg_compaction_scheduled_++; Env::Priority thread_pool_pri = Env::Priority::LOW; if (compaction->bottommost_level() && env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) { thread_pool_pri = Env::Priority::BOTTOM; } env_->Schedule(&DBImpl::BGWorkCompaction, ca, thread_pool_pri, this, &DBImpl::UnscheduleCompactionCallback); scheduled = true; } } log_buffer.FlushBufferToLog(); assert(!manual.in_progress); assert(HasPendingManualCompaction()); RemoveManualCompaction(&manual); bg_cv_.SignalAll(); return manual.status; } void DBImpl::GenerateFlushRequest(const autovector& cfds, FlushRequest* req) { assert(req != nullptr); req->reserve(cfds.size()); for (const auto cfd : cfds) { if (nullptr == cfd) { // cfd may be null, see DBImpl::ScheduleFlushes continue; } uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID(); req->emplace_back(cfd, max_memtable_id); } } Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& flush_options, FlushReason flush_reason, bool writes_stopped) { Status s; uint64_t flush_memtable_id = 0; if (!flush_options.allow_write_stall) { bool flush_needed = true; s = WaitUntilFlushWouldNotStallWrites(cfd, &flush_needed); TEST_SYNC_POINT("DBImpl::FlushMemTable:StallWaitDone"); if (!s.ok() || !flush_needed) { return s; } } FlushRequest flush_req; { WriteContext context; InstrumentedMutexLock guard_lock(&mutex_); WriteThread::Writer w; WriteThread::Writer nonmem_w; if (!writes_stopped) { write_thread_.EnterUnbatched(&w, &mutex_); if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } } WaitForPendingWrites(); if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) { s = SwitchMemtable(cfd, &context); } if (s.ok()) { if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) { flush_memtable_id = cfd->imm()->GetLatestMemTableID(); flush_req.emplace_back(cfd, flush_memtable_id); } if (immutable_db_options_.persist_stats_to_disk) { ColumnFamilyData* cfd_stats = versions_->GetColumnFamilySet()->GetColumnFamily( kPersistentStatsColumnFamilyName); if (cfd_stats != nullptr && cfd_stats != cfd && !cfd_stats->mem()->IsEmpty()) { // only force flush stats CF when it will be the only CF lagging // behind after the current flush bool stats_cf_flush_needed = true; for (auto* loop_cfd : *versions_->GetColumnFamilySet()) { if (loop_cfd == cfd_stats || loop_cfd == cfd) { continue; } if (loop_cfd->GetLogNumber() <= cfd_stats->GetLogNumber()) { stats_cf_flush_needed = false; } } if (stats_cf_flush_needed) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Force flushing stats CF with manual flush of %s " "to avoid holding old logs", cfd->GetName().c_str()); s = SwitchMemtable(cfd_stats, &context); flush_memtable_id = cfd_stats->imm()->GetLatestMemTableID(); flush_req.emplace_back(cfd_stats, flush_memtable_id); } } } } if (s.ok() && !flush_req.empty()) { for (auto& elem : flush_req) { ColumnFamilyData* loop_cfd = elem.first; loop_cfd->imm()->FlushRequested(); } // If the caller wants to wait for this flush to complete, it indicates // that the caller expects the ColumnFamilyData not to be free'ed by // other threads which may drop the column family concurrently. // Therefore, we increase the cfd's ref count. if (flush_options.wait) { for (auto& elem : flush_req) { ColumnFamilyData* loop_cfd = elem.first; loop_cfd->Ref(); } } SchedulePendingFlush(flush_req, flush_reason); MaybeScheduleFlushOrCompaction(); } if (!writes_stopped) { write_thread_.ExitUnbatched(&w); if (two_write_queues_) { nonmem_write_thread_.ExitUnbatched(&nonmem_w); } } } TEST_SYNC_POINT("DBImpl::FlushMemTable:AfterScheduleFlush"); TEST_SYNC_POINT("DBImpl::FlushMemTable:BeforeWaitForBgFlush"); if (s.ok() && flush_options.wait) { autovector cfds; autovector flush_memtable_ids; for (auto& iter : flush_req) { cfds.push_back(iter.first); flush_memtable_ids.push_back(&(iter.second)); } s = WaitForFlushMemTables(cfds, flush_memtable_ids, (flush_reason == FlushReason::kErrorRecovery)); InstrumentedMutexLock lock_guard(&mutex_); for (auto* tmp_cfd : cfds) { tmp_cfd->UnrefAndTryDelete(); } } TEST_SYNC_POINT("DBImpl::FlushMemTable:FlushMemTableFinished"); return s; } // Flush all elements in 'column_family_datas' // and atomically record the result to the MANIFEST. Status DBImpl::AtomicFlushMemTables( const autovector& column_family_datas, const FlushOptions& flush_options, FlushReason flush_reason, bool writes_stopped) { Status s; if (!flush_options.allow_write_stall) { int num_cfs_to_flush = 0; for (auto cfd : column_family_datas) { bool flush_needed = true; s = WaitUntilFlushWouldNotStallWrites(cfd, &flush_needed); if (!s.ok()) { return s; } else if (flush_needed) { ++num_cfs_to_flush; } } if (0 == num_cfs_to_flush) { return s; } } FlushRequest flush_req; autovector cfds; { WriteContext context; InstrumentedMutexLock guard_lock(&mutex_); WriteThread::Writer w; WriteThread::Writer nonmem_w; if (!writes_stopped) { write_thread_.EnterUnbatched(&w, &mutex_); if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } } WaitForPendingWrites(); for (auto cfd : column_family_datas) { if (cfd->IsDropped()) { continue; } if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) { cfds.emplace_back(cfd); } } for (auto cfd : cfds) { if (cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) { continue; } cfd->Ref(); s = SwitchMemtable(cfd, &context); cfd->UnrefAndTryDelete(); if (!s.ok()) { break; } } if (s.ok()) { AssignAtomicFlushSeq(cfds); for (auto cfd : cfds) { cfd->imm()->FlushRequested(); } // If the caller wants to wait for this flush to complete, it indicates // that the caller expects the ColumnFamilyData not to be free'ed by // other threads which may drop the column family concurrently. // Therefore, we increase the cfd's ref count. if (flush_options.wait) { for (auto cfd : cfds) { cfd->Ref(); } } GenerateFlushRequest(cfds, &flush_req); SchedulePendingFlush(flush_req, flush_reason); MaybeScheduleFlushOrCompaction(); } if (!writes_stopped) { write_thread_.ExitUnbatched(&w); if (two_write_queues_) { nonmem_write_thread_.ExitUnbatched(&nonmem_w); } } } TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:AfterScheduleFlush"); TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush"); if (s.ok() && flush_options.wait) { autovector flush_memtable_ids; for (auto& iter : flush_req) { flush_memtable_ids.push_back(&(iter.second)); } s = WaitForFlushMemTables(cfds, flush_memtable_ids, (flush_reason == FlushReason::kErrorRecovery)); InstrumentedMutexLock lock_guard(&mutex_); for (auto* cfd : cfds) { cfd->UnrefAndTryDelete(); } } return s; } // Calling FlushMemTable(), whether from DB::Flush() or from Backup Engine, can // cause write stall, for example if one memtable is being flushed already. // This method tries to avoid write stall (similar to CompactRange() behavior) // it emulates how the SuperVersion / LSM would change if flush happens, checks // it against various constrains and delays flush if it'd cause write stall. // Called should check status and flush_needed to see if flush already happened. Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd, bool* flush_needed) { { *flush_needed = true; InstrumentedMutexLock l(&mutex_); uint64_t orig_active_memtable_id = cfd->mem()->GetID(); WriteStallCondition write_stall_condition = WriteStallCondition::kNormal; do { if (write_stall_condition != WriteStallCondition::kNormal) { // Same error handling as user writes: Don't wait if there's a // background error, even if it's a soft error. We might wait here // indefinitely as the pending flushes/compactions may never finish // successfully, resulting in the stall condition lasting indefinitely if (error_handler_.IsBGWorkStopped()) { return error_handler_.GetBGError(); } TEST_SYNC_POINT("DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait"); ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] WaitUntilFlushWouldNotStallWrites" " waiting on stall conditions to clear", cfd->GetName().c_str()); bg_cv_.Wait(); } if (cfd->IsDropped()) { return Status::ColumnFamilyDropped(); } if (shutting_down_.load(std::memory_order_acquire)) { return Status::ShutdownInProgress(); } uint64_t earliest_memtable_id = std::min(cfd->mem()->GetID(), cfd->imm()->GetEarliestMemTableID()); if (earliest_memtable_id > orig_active_memtable_id) { // We waited so long that the memtable we were originally waiting on was // flushed. *flush_needed = false; return Status::OK(); } const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions(); const auto* vstorage = cfd->current()->storage_info(); // Skip stalling check if we're below auto-flush and auto-compaction // triggers. If it stalled in these conditions, that'd mean the stall // triggers are so low that stalling is needed for any background work. In // that case we shouldn't wait since background work won't be scheduled. if (cfd->imm()->NumNotFlushed() < cfd->ioptions()->min_write_buffer_number_to_merge && vstorage->l0_delay_trigger_count() < mutable_cf_options.level0_file_num_compaction_trigger) { break; } // check whether one extra immutable memtable or an extra L0 file would // cause write stalling mode to be entered. It could still enter stall // mode due to pending compaction bytes, but that's less common write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause( cfd->imm()->NumNotFlushed() + 1, vstorage->l0_delay_trigger_count() + 1, vstorage->estimated_compaction_needed_bytes(), mutable_cf_options) .first; } while (write_stall_condition != WriteStallCondition::kNormal); } return Status::OK(); } // Wait for memtables to be flushed for multiple column families. // let N = cfds.size() // for i in [0, N), // 1) if flush_memtable_ids[i] is not null, then the memtables with lower IDs // have to be flushed for THIS column family; // 2) if flush_memtable_ids[i] is null, then all memtables in THIS column // family have to be flushed. // Finish waiting when ALL column families finish flushing memtables. // resuming_from_bg_err indicates whether the caller is trying to resume from // background error or in normal processing. Status DBImpl::WaitForFlushMemTables( const autovector& cfds, const autovector& flush_memtable_ids, bool resuming_from_bg_err) { int num = static_cast(cfds.size()); // Wait until the compaction completes InstrumentedMutexLock l(&mutex_); // If the caller is trying to resume from bg error, then // error_handler_.IsDBStopped() is true. while (resuming_from_bg_err || !error_handler_.IsDBStopped()) { if (shutting_down_.load(std::memory_order_acquire)) { return Status::ShutdownInProgress(); } // If an error has occurred during resumption, then no need to wait. if (!error_handler_.GetRecoveryError().ok()) { break; } // Number of column families that have been dropped. int num_dropped = 0; // Number of column families that have finished flush. int num_finished = 0; for (int i = 0; i < num; ++i) { if (cfds[i]->IsDropped()) { ++num_dropped; } else if (cfds[i]->imm()->NumNotFlushed() == 0 || (flush_memtable_ids[i] != nullptr && cfds[i]->imm()->GetEarliestMemTableID() > *flush_memtable_ids[i])) { ++num_finished; } } if (1 == num_dropped && 1 == num) { return Status::ColumnFamilyDropped(); } // Column families involved in this flush request have either been dropped // or finished flush. Then it's time to finish waiting. if (num_dropped + num_finished == num) { break; } bg_cv_.Wait(); } Status s; // If not resuming from bg error, and an error has caused the DB to stop, // then report the bg error to caller. if (!resuming_from_bg_err && error_handler_.IsDBStopped()) { s = error_handler_.GetBGError(); } return s; } Status DBImpl::EnableAutoCompaction( const std::vector& column_family_handles) { Status s; for (auto cf_ptr : column_family_handles) { Status status = this->SetOptions(cf_ptr, {{"disable_auto_compactions", "false"}}); if (!status.ok()) { s = status; } } return s; } void DBImpl::DisableManualCompaction() { manual_compaction_paused_.store(true, std::memory_order_release); } void DBImpl::EnableManualCompaction() { manual_compaction_paused_.store(false, std::memory_order_release); } void DBImpl::MaybeScheduleFlushOrCompaction() { mutex_.AssertHeld(); if (!opened_successfully_) { // Compaction may introduce data race to DB open return; } if (bg_work_paused_ > 0) { // we paused the background work return; } else if (error_handler_.IsBGWorkStopped() && !error_handler_.IsRecoveryInProgress()) { // There has been a hard error and this call is not part of the recovery // sequence. Bail out here so we don't get into an endless loop of // scheduling BG work which will again call this function return; } else if (shutting_down_.load(std::memory_order_acquire)) { // DB is being deleted; no more background compactions return; } auto bg_job_limits = GetBGJobLimits(); bool is_flush_pool_empty = env_->GetBackgroundThreads(Env::Priority::HIGH) == 0; while (!is_flush_pool_empty && unscheduled_flushes_ > 0 && bg_flush_scheduled_ < bg_job_limits.max_flushes) { bg_flush_scheduled_++; FlushThreadArg* fta = new FlushThreadArg; fta->db_ = this; fta->thread_pri_ = Env::Priority::HIGH; env_->Schedule(&DBImpl::BGWorkFlush, fta, Env::Priority::HIGH, this, &DBImpl::UnscheduleFlushCallback); --unscheduled_flushes_; TEST_SYNC_POINT_CALLBACK( "DBImpl::MaybeScheduleFlushOrCompaction:AfterSchedule:0", &unscheduled_flushes_); } // special case -- if high-pri (flush) thread pool is empty, then schedule // flushes in low-pri (compaction) thread pool. if (is_flush_pool_empty) { while (unscheduled_flushes_ > 0 && bg_flush_scheduled_ + bg_compaction_scheduled_ < bg_job_limits.max_flushes) { bg_flush_scheduled_++; FlushThreadArg* fta = new FlushThreadArg; fta->db_ = this; fta->thread_pri_ = Env::Priority::LOW; env_->Schedule(&DBImpl::BGWorkFlush, fta, Env::Priority::LOW, this, &DBImpl::UnscheduleFlushCallback); --unscheduled_flushes_; } } if (bg_compaction_paused_ > 0) { // we paused the background compaction return; } else if (error_handler_.IsBGWorkStopped()) { // Compaction is not part of the recovery sequence from a hard error. We // might get here because recovery might do a flush and install a new // super version, which will try to schedule pending compactions. Bail // out here and let the higher level recovery handle compactions return; } if (HasExclusiveManualCompaction()) { // only manual compactions are allowed to run. don't schedule automatic // compactions TEST_SYNC_POINT("DBImpl::MaybeScheduleFlushOrCompaction:Conflict"); return; } while (bg_compaction_scheduled_ < bg_job_limits.max_compactions && unscheduled_compactions_ > 0) { CompactionArg* ca = new CompactionArg; ca->db = this; ca->prepicked_compaction = nullptr; bg_compaction_scheduled_++; unscheduled_compactions_--; env_->Schedule(&DBImpl::BGWorkCompaction, ca, Env::Priority::LOW, this, &DBImpl::UnscheduleCompactionCallback); } } DBImpl::BGJobLimits DBImpl::GetBGJobLimits() const { mutex_.AssertHeld(); return GetBGJobLimits(mutable_db_options_.max_background_flushes, mutable_db_options_.max_background_compactions, mutable_db_options_.max_background_jobs, write_controller_.NeedSpeedupCompaction()); } DBImpl::BGJobLimits DBImpl::GetBGJobLimits(int max_background_flushes, int max_background_compactions, int max_background_jobs, bool parallelize_compactions) { BGJobLimits res; if (max_background_flushes == -1 && max_background_compactions == -1) { // for our first stab implementing max_background_jobs, simply allocate a // quarter of the threads to flushes. res.max_flushes = std::max(1, max_background_jobs / 4); res.max_compactions = std::max(1, max_background_jobs - res.max_flushes); } else { // compatibility code in case users haven't migrated to max_background_jobs, // which automatically computes flush/compaction limits res.max_flushes = std::max(1, max_background_flushes); res.max_compactions = std::max(1, max_background_compactions); } if (!parallelize_compactions) { // throttle background compactions until we deem necessary res.max_compactions = 1; } return res; } void DBImpl::AddToCompactionQueue(ColumnFamilyData* cfd) { assert(!cfd->queued_for_compaction()); cfd->Ref(); compaction_queue_.push_back(cfd); cfd->set_queued_for_compaction(true); } ColumnFamilyData* DBImpl::PopFirstFromCompactionQueue() { assert(!compaction_queue_.empty()); auto cfd = *compaction_queue_.begin(); compaction_queue_.pop_front(); assert(cfd->queued_for_compaction()); cfd->set_queued_for_compaction(false); return cfd; } DBImpl::FlushRequest DBImpl::PopFirstFromFlushQueue() { assert(!flush_queue_.empty()); FlushRequest flush_req = flush_queue_.front(); flush_queue_.pop_front(); // TODO: need to unset flush reason? return flush_req; } ColumnFamilyData* DBImpl::PickCompactionFromQueue( std::unique_ptr* token, LogBuffer* log_buffer) { assert(!compaction_queue_.empty()); assert(*token == nullptr); autovector throttled_candidates; ColumnFamilyData* cfd = nullptr; while (!compaction_queue_.empty()) { auto first_cfd = *compaction_queue_.begin(); compaction_queue_.pop_front(); assert(first_cfd->queued_for_compaction()); if (!RequestCompactionToken(first_cfd, false, token, log_buffer)) { throttled_candidates.push_back(first_cfd); continue; } cfd = first_cfd; cfd->set_queued_for_compaction(false); break; } // Add throttled compaction candidates back to queue in the original order. for (auto iter = throttled_candidates.rbegin(); iter != throttled_candidates.rend(); ++iter) { compaction_queue_.push_front(*iter); } return cfd; } void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req, FlushReason flush_reason) { if (flush_req.empty()) { return; } for (auto& iter : flush_req) { ColumnFamilyData* cfd = iter.first; cfd->Ref(); cfd->SetFlushReason(flush_reason); } ++unscheduled_flushes_; flush_queue_.push_back(flush_req); } void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) { if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) { AddToCompactionQueue(cfd); ++unscheduled_compactions_; } } void DBImpl::SchedulePendingPurge(std::string fname, std::string dir_to_sync, FileType type, uint64_t number, int job_id) { mutex_.AssertHeld(); PurgeFileInfo file_info(fname, dir_to_sync, type, number, job_id); purge_files_.insert({{number, std::move(file_info)}}); } void DBImpl::BGWorkFlush(void* arg) { FlushThreadArg fta = *(reinterpret_cast(arg)); delete reinterpret_cast(arg); IOSTATS_SET_THREAD_POOL_ID(fta.thread_pri_); TEST_SYNC_POINT("DBImpl::BGWorkFlush"); static_cast_with_check(fta.db_)->BackgroundCallFlush(fta.thread_pri_); TEST_SYNC_POINT("DBImpl::BGWorkFlush:done"); } void DBImpl::BGWorkCompaction(void* arg) { CompactionArg ca = *(reinterpret_cast(arg)); delete reinterpret_cast(arg); IOSTATS_SET_THREAD_POOL_ID(Env::Priority::LOW); TEST_SYNC_POINT("DBImpl::BGWorkCompaction"); auto prepicked_compaction = static_cast(ca.prepicked_compaction); static_cast_with_check(ca.db)->BackgroundCallCompaction( prepicked_compaction, Env::Priority::LOW); delete prepicked_compaction; } void DBImpl::BGWorkBottomCompaction(void* arg) { CompactionArg ca = *(static_cast(arg)); delete static_cast(arg); IOSTATS_SET_THREAD_POOL_ID(Env::Priority::BOTTOM); TEST_SYNC_POINT("DBImpl::BGWorkBottomCompaction"); auto* prepicked_compaction = ca.prepicked_compaction; assert(prepicked_compaction && prepicked_compaction->compaction && !prepicked_compaction->manual_compaction_state); ca.db->BackgroundCallCompaction(prepicked_compaction, Env::Priority::BOTTOM); delete prepicked_compaction; } void DBImpl::BGWorkPurge(void* db) { IOSTATS_SET_THREAD_POOL_ID(Env::Priority::HIGH); TEST_SYNC_POINT("DBImpl::BGWorkPurge:start"); reinterpret_cast(db)->BackgroundCallPurge(); TEST_SYNC_POINT("DBImpl::BGWorkPurge:end"); } void DBImpl::UnscheduleCompactionCallback(void* arg) { CompactionArg ca = *(reinterpret_cast(arg)); delete reinterpret_cast(arg); if (ca.prepicked_compaction != nullptr) { if (ca.prepicked_compaction->compaction != nullptr) { delete ca.prepicked_compaction->compaction; } delete ca.prepicked_compaction; } TEST_SYNC_POINT("DBImpl::UnscheduleCompactionCallback"); } void DBImpl::UnscheduleFlushCallback(void* arg) { delete reinterpret_cast(arg); TEST_SYNC_POINT("DBImpl::UnscheduleFlushCallback"); } Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context, LogBuffer* log_buffer, FlushReason* reason, Env::Priority thread_pri) { mutex_.AssertHeld(); Status status; *reason = FlushReason::kOthers; // If BG work is stopped due to an error, but a recovery is in progress, // that means this flush is part of the recovery. So allow it to go through if (!error_handler_.IsBGWorkStopped()) { if (shutting_down_.load(std::memory_order_acquire)) { status = Status::ShutdownInProgress(); } } else if (!error_handler_.IsRecoveryInProgress()) { status = error_handler_.GetBGError(); } if (!status.ok()) { return status; } autovector bg_flush_args; std::vector& superversion_contexts = job_context->superversion_contexts; autovector column_families_not_to_flush; while (!flush_queue_.empty()) { // This cfd is already referenced const FlushRequest& flush_req = PopFirstFromFlushQueue(); superversion_contexts.clear(); superversion_contexts.reserve(flush_req.size()); for (const auto& iter : flush_req) { ColumnFamilyData* cfd = iter.first; if (cfd->IsDropped() || !cfd->imm()->IsFlushPending()) { // can't flush this CF, try next one column_families_not_to_flush.push_back(cfd); continue; } superversion_contexts.emplace_back(SuperVersionContext(true)); bg_flush_args.emplace_back(cfd, iter.second, &(superversion_contexts.back())); } if (!bg_flush_args.empty()) { break; } } if (!bg_flush_args.empty()) { auto bg_job_limits = GetBGJobLimits(); for (const auto& arg : bg_flush_args) { ColumnFamilyData* cfd = arg.cfd_; ROCKS_LOG_BUFFER( log_buffer, "Calling FlushMemTableToOutputFile with column " "family [%s], flush slots available %d, compaction slots available " "%d, " "flush slots scheduled %d, compaction slots scheduled %d", cfd->GetName().c_str(), bg_job_limits.max_flushes, bg_job_limits.max_compactions, bg_flush_scheduled_, bg_compaction_scheduled_); } status = FlushMemTablesToOutputFiles(bg_flush_args, made_progress, job_context, log_buffer, thread_pri); TEST_SYNC_POINT("DBImpl::BackgroundFlush:BeforeFlush"); // All the CFDs in the FlushReq must have the same flush reason, so just // grab the first one *reason = bg_flush_args[0].cfd_->GetFlushReason(); for (auto& arg : bg_flush_args) { ColumnFamilyData* cfd = arg.cfd_; if (cfd->UnrefAndTryDelete()) { arg.cfd_ = nullptr; } } } for (auto cfd : column_families_not_to_flush) { cfd->UnrefAndTryDelete(); } return status; } void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) { bool made_progress = false; JobContext job_context(next_job_id_.fetch_add(1), true); TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:start"); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, immutable_db_options_.info_log.get()); { InstrumentedMutexLock l(&mutex_); assert(bg_flush_scheduled_); num_running_flushes_++; std::unique_ptr::iterator> pending_outputs_inserted_elem(new std::list::iterator( CaptureCurrentFileNumberInPendingOutputs())); FlushReason reason; Status s = BackgroundFlush(&made_progress, &job_context, &log_buffer, &reason, thread_pri); if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped() && reason != FlushReason::kErrorRecovery) { // Wait a little bit before retrying background flush in // case this is an environmental problem and we do not want to // chew up resources for failed flushes for the duration of // the problem. uint64_t error_cnt = default_cf_internal_stats_->BumpAndGetBackgroundErrorCount(); bg_cv_.SignalAll(); // In case a waiter can proceed despite the error mutex_.Unlock(); ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Waiting after background flush error: %s" "Accumulated background error counts: %" PRIu64, s.ToString().c_str(), error_cnt); log_buffer.FlushBufferToLog(); LogFlush(immutable_db_options_.info_log); env_->SleepForMicroseconds(1000000); mutex_.Lock(); } TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FlushFinish:0"); ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); // If flush failed, we want to delete all temporary files that we might have // created. Thus, we force full scan in FindObsoleteFiles() FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()); // delete unnecessary files if any, this is done outside the mutex if (job_context.HaveSomethingToClean() || job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) { mutex_.Unlock(); TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound"); // Have to flush the info logs before bg_flush_scheduled_-- // because if bg_flush_scheduled_ becomes 0 and the lock is // released, the deconstructor of DB can kick in and destroy all the // states of DB so info_log might not be available after that point. // It also applies to access other states that DB owns. log_buffer.FlushBufferToLog(); if (job_context.HaveSomethingToDelete()) { PurgeObsoleteFiles(job_context); } job_context.Clean(); mutex_.Lock(); } TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp"); assert(num_running_flushes_ > 0); num_running_flushes_--; bg_flush_scheduled_--; // See if there's more work to be done MaybeScheduleFlushOrCompaction(); atomic_flush_install_cv_.SignalAll(); bg_cv_.SignalAll(); // IMPORTANT: there should be no code after calling SignalAll. This call may // signal the DB destructor that it's OK to proceed with destruction. In // that case, all DB variables will be dealloacated and referencing them // will cause trouble. } } void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction, Env::Priority bg_thread_pri) { bool made_progress = false; JobContext job_context(next_job_id_.fetch_add(1), true); TEST_SYNC_POINT("BackgroundCallCompaction:0"); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, immutable_db_options_.info_log.get()); { InstrumentedMutexLock l(&mutex_); // This call will unlock/lock the mutex to wait for current running // IngestExternalFile() calls to finish. WaitForIngestFile(); num_running_compactions_++; std::unique_ptr::iterator> pending_outputs_inserted_elem(new std::list::iterator( CaptureCurrentFileNumberInPendingOutputs())); assert((bg_thread_pri == Env::Priority::BOTTOM && bg_bottom_compaction_scheduled_) || (bg_thread_pri == Env::Priority::LOW && bg_compaction_scheduled_)); Status s = BackgroundCompaction(&made_progress, &job_context, &log_buffer, prepicked_compaction, bg_thread_pri); TEST_SYNC_POINT("BackgroundCallCompaction:1"); if (s.IsBusy()) { bg_cv_.SignalAll(); // In case a waiter can proceed despite the error mutex_.Unlock(); env_->SleepForMicroseconds(10000); // prevent hot loop mutex_.Lock(); } else if (!s.ok() && !s.IsShutdownInProgress() && !s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) { // Wait a little bit before retrying background compaction in // case this is an environmental problem and we do not want to // chew up resources for failed compactions for the duration of // the problem. uint64_t error_cnt = default_cf_internal_stats_->BumpAndGetBackgroundErrorCount(); bg_cv_.SignalAll(); // In case a waiter can proceed despite the error mutex_.Unlock(); log_buffer.FlushBufferToLog(); ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Waiting after background compaction error: %s, " "Accumulated background error counts: %" PRIu64, s.ToString().c_str(), error_cnt); LogFlush(immutable_db_options_.info_log); env_->SleepForMicroseconds(1000000); mutex_.Lock(); } else if (s.IsManualCompactionPaused()) { ManualCompactionState* m = prepicked_compaction->manual_compaction_state; assert(m); ROCKS_LOG_BUFFER(&log_buffer, "[%s] [JOB %d] Manual compaction paused", m->cfd->GetName().c_str(), job_context.job_id); } ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); // If compaction failed, we want to delete all temporary files that we might // have created (they might not be all recorded in job_context in case of a // failure). Thus, we force full scan in FindObsoleteFiles() FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() && !s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()); TEST_SYNC_POINT("DBImpl::BackgroundCallCompaction:FoundObsoleteFiles"); // delete unnecessary files if any, this is done outside the mutex if (job_context.HaveSomethingToClean() || job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) { mutex_.Unlock(); // Have to flush the info logs before bg_compaction_scheduled_-- // because if bg_flush_scheduled_ becomes 0 and the lock is // released, the deconstructor of DB can kick in and destroy all the // states of DB so info_log might not be available after that point. // It also applies to access other states that DB owns. log_buffer.FlushBufferToLog(); if (job_context.HaveSomethingToDelete()) { PurgeObsoleteFiles(job_context); TEST_SYNC_POINT("DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles"); } job_context.Clean(); mutex_.Lock(); } assert(num_running_compactions_ > 0); num_running_compactions_--; if (bg_thread_pri == Env::Priority::LOW) { bg_compaction_scheduled_--; } else { assert(bg_thread_pri == Env::Priority::BOTTOM); bg_bottom_compaction_scheduled_--; } versions_->GetColumnFamilySet()->FreeDeadColumnFamilies(); // See if there's more work to be done MaybeScheduleFlushOrCompaction(); if (made_progress || (bg_compaction_scheduled_ == 0 && bg_bottom_compaction_scheduled_ == 0) || HasPendingManualCompaction() || unscheduled_compactions_ == 0) { // signal if // * made_progress -- need to wakeup DelayWrite // * bg_{bottom,}_compaction_scheduled_ == 0 -- need to wakeup ~DBImpl // * HasPendingManualCompaction -- need to wakeup RunManualCompaction // If none of this is true, there is no need to signal since nobody is // waiting for it bg_cv_.SignalAll(); } // IMPORTANT: there should be no code after calling SignalAll. This call may // signal the DB destructor that it's OK to proceed with destruction. In // that case, all DB variables will be dealloacated and referencing them // will cause trouble. } } Status DBImpl::BackgroundCompaction(bool* made_progress, JobContext* job_context, LogBuffer* log_buffer, PrepickedCompaction* prepicked_compaction, Env::Priority thread_pri) { ManualCompactionState* manual_compaction = prepicked_compaction == nullptr ? nullptr : prepicked_compaction->manual_compaction_state; *made_progress = false; mutex_.AssertHeld(); TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Start"); bool is_manual = (manual_compaction != nullptr); std::unique_ptr c; if (prepicked_compaction != nullptr && prepicked_compaction->compaction != nullptr) { c.reset(prepicked_compaction->compaction); } bool is_prepicked = is_manual || c; // (manual_compaction->in_progress == false); bool trivial_move_disallowed = is_manual && manual_compaction->disallow_trivial_move; CompactionJobStats compaction_job_stats; Status status; if (!error_handler_.IsBGWorkStopped()) { if (shutting_down_.load(std::memory_order_acquire)) { status = Status::ShutdownInProgress(); } else if (is_manual && manual_compaction_paused_.load(std::memory_order_acquire)) { status = Status::Incomplete(Status::SubCode::kManualCompactionPaused); } } else { status = error_handler_.GetBGError(); // If we get here, it means a hard error happened after this compaction // was scheduled by MaybeScheduleFlushOrCompaction(), but before it got // a chance to execute. Since we didn't pop a cfd from the compaction // queue, increment unscheduled_compactions_ unscheduled_compactions_++; } if (!status.ok()) { if (is_manual) { manual_compaction->status = status; manual_compaction->done = true; manual_compaction->in_progress = false; manual_compaction = nullptr; } if (c) { c->ReleaseCompactionFiles(status); c.reset(); } return status; } if (is_manual) { // another thread cannot pick up the same work manual_compaction->in_progress = true; } std::unique_ptr task_token; // InternalKey manual_end_storage; // InternalKey* manual_end = &manual_end_storage; bool sfm_reserved_compact_space = false; if (is_manual) { ManualCompactionState* m = manual_compaction; assert(m->in_progress); if (!c) { m->done = true; m->manual_end = nullptr; ROCKS_LOG_BUFFER(log_buffer, "[%s] Manual compaction from level-%d from %s .. " "%s; nothing to do\n", m->cfd->GetName().c_str(), m->input_level, (m->begin ? m->begin->DebugString(true).c_str() : "(begin)"), (m->end ? m->end->DebugString(true).c_str() : "(end)")); } else { // First check if we have enough room to do the compaction bool enough_room = EnoughRoomForCompaction( m->cfd, *(c->inputs()), &sfm_reserved_compact_space, log_buffer); if (!enough_room) { // Then don't do the compaction c->ReleaseCompactionFiles(status); c.reset(); // m's vars will get set properly at the end of this function, // as long as status == CompactionTooLarge status = Status::CompactionTooLarge(); } else { ROCKS_LOG_BUFFER( log_buffer, "[%s] Manual compaction from level-%d to level-%d from %s .. " "%s; will stop at %s\n", m->cfd->GetName().c_str(), m->input_level, c->output_level(), (m->begin ? m->begin->DebugString(true).c_str() : "(begin)"), (m->end ? m->end->DebugString(true).c_str() : "(end)"), ((m->done || m->manual_end == nullptr) ? "(end)" : m->manual_end->DebugString(true).c_str())); } } } else if (!is_prepicked && !compaction_queue_.empty()) { if (HasExclusiveManualCompaction()) { // Can't compact right now, but try again later TEST_SYNC_POINT("DBImpl::BackgroundCompaction()::Conflict"); // Stay in the compaction queue. unscheduled_compactions_++; return Status::OK(); } auto cfd = PickCompactionFromQueue(&task_token, log_buffer); if (cfd == nullptr) { // Can't find any executable task from the compaction queue. // All tasks have been throttled by compaction thread limiter. ++unscheduled_compactions_; return Status::Busy(); } // We unreference here because the following code will take a Ref() on // this cfd if it is going to use it (Compaction class holds a // reference). // This will all happen under a mutex so we don't have to be afraid of // somebody else deleting it. if (cfd->UnrefAndTryDelete()) { // This was the last reference of the column family, so no need to // compact. return Status::OK(); } // Pick up latest mutable CF Options and use it throughout the // compaction job // Compaction makes a copy of the latest MutableCFOptions. It should be used // throughout the compaction procedure to make sure consistency. It will // eventually be installed into SuperVersion auto* mutable_cf_options = cfd->GetLatestMutableCFOptions(); if (!mutable_cf_options->disable_auto_compactions && !cfd->IsDropped()) { // NOTE: try to avoid unnecessary copy of MutableCFOptions if // compaction is not necessary. Need to make sure mutex is held // until we make a copy in the following code TEST_SYNC_POINT("DBImpl::BackgroundCompaction():BeforePickCompaction"); c.reset(cfd->PickCompaction(*mutable_cf_options, log_buffer)); TEST_SYNC_POINT("DBImpl::BackgroundCompaction():AfterPickCompaction"); if (c != nullptr) { bool enough_room = EnoughRoomForCompaction( cfd, *(c->inputs()), &sfm_reserved_compact_space, log_buffer); if (!enough_room) { // Then don't do the compaction c->ReleaseCompactionFiles(status); c->column_family_data() ->current() ->storage_info() ->ComputeCompactionScore(*(c->immutable_cf_options()), *(c->mutable_cf_options())); AddToCompactionQueue(cfd); ++unscheduled_compactions_; c.reset(); // Don't need to sleep here, because BackgroundCallCompaction // will sleep if !s.ok() status = Status::CompactionTooLarge(); } else { // update statistics RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION, c->inputs(0)->size()); // There are three things that can change compaction score: // 1) When flush or compaction finish. This case is covered by // InstallSuperVersionAndScheduleWork // 2) When MutableCFOptions changes. This case is also covered by // InstallSuperVersionAndScheduleWork, because this is when the new // options take effect. // 3) When we Pick a new compaction, we "remove" those files being // compacted from the calculation, which then influences compaction // score. Here we check if we need the new compaction even without the // files that are currently being compacted. If we need another // compaction, we might be able to execute it in parallel, so we add // it to the queue and schedule a new thread. if (cfd->NeedsCompaction()) { // Yes, we need more compactions! AddToCompactionQueue(cfd); ++unscheduled_compactions_; MaybeScheduleFlushOrCompaction(); } } } } } IOStatus io_s; if (!c) { // Nothing to do ROCKS_LOG_BUFFER(log_buffer, "Compaction nothing to do"); } else if (c->deletion_compaction()) { // TODO(icanadi) Do we want to honor snapshots here? i.e. not delete old // file if there is alive snapshot pointing to it TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction", c->column_family_data()); assert(c->num_input_files(1) == 0); assert(c->level() == 0); assert(c->column_family_data()->ioptions()->compaction_style == kCompactionStyleFIFO); compaction_job_stats.num_input_files = c->num_input_files(0); NotifyOnCompactionBegin(c->column_family_data(), c.get(), status, compaction_job_stats, job_context->job_id); for (const auto& f : *c->inputs(0)) { c->edit()->DeleteFile(c->level(), f->fd.GetNumber()); } status = versions_->LogAndApply(c->column_family_data(), *c->mutable_cf_options(), c->edit(), &mutex_, directories_.GetDbDir()); io_s = versions_->io_status(); InstallSuperVersionAndScheduleWork(c->column_family_data(), &job_context->superversion_contexts[0], *c->mutable_cf_options()); ROCKS_LOG_BUFFER(log_buffer, "[%s] Deleted %d files\n", c->column_family_data()->GetName().c_str(), c->num_input_files(0)); *made_progress = true; TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction", c->column_family_data()); } else if (!trivial_move_disallowed && c->IsTrivialMove()) { TEST_SYNC_POINT("DBImpl::BackgroundCompaction:TrivialMove"); TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction", c->column_family_data()); // Instrument for event update // TODO(yhchiang): add op details for showing trivial-move. ThreadStatusUtil::SetColumnFamily( c->column_family_data(), c->column_family_data()->ioptions()->env, immutable_db_options_.enable_thread_tracking); ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION); compaction_job_stats.num_input_files = c->num_input_files(0); NotifyOnCompactionBegin(c->column_family_data(), c.get(), status, compaction_job_stats, job_context->job_id); // Move files to next level int32_t moved_files = 0; int64_t moved_bytes = 0; for (unsigned int l = 0; l < c->num_input_levels(); l++) { if (c->level(l) == c->output_level()) { continue; } for (size_t i = 0; i < c->num_input_files(l); i++) { FileMetaData* f = c->input(l, i); c->edit()->DeleteFile(c->level(l), f->fd.GetNumber()); c->edit()->AddFile(c->output_level(), f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno, f->marked_for_compaction, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->file_checksum, f->file_checksum_func_name); ROCKS_LOG_BUFFER( log_buffer, "[%s] Moving #%" PRIu64 " to level-%d %" PRIu64 " bytes\n", c->column_family_data()->GetName().c_str(), f->fd.GetNumber(), c->output_level(), f->fd.GetFileSize()); ++moved_files; moved_bytes += f->fd.GetFileSize(); } } status = versions_->LogAndApply(c->column_family_data(), *c->mutable_cf_options(), c->edit(), &mutex_, directories_.GetDbDir()); io_s = versions_->io_status(); // Use latest MutableCFOptions InstallSuperVersionAndScheduleWork(c->column_family_data(), &job_context->superversion_contexts[0], *c->mutable_cf_options()); VersionStorageInfo::LevelSummaryStorage tmp; c->column_family_data()->internal_stats()->IncBytesMoved(c->output_level(), moved_bytes); { event_logger_.LogToBuffer(log_buffer) << "job" << job_context->job_id << "event" << "trivial_move" << "destination_level" << c->output_level() << "files" << moved_files << "total_files_size" << moved_bytes; } ROCKS_LOG_BUFFER( log_buffer, "[%s] Moved #%d files to level-%d %" PRIu64 " bytes %s: %s\n", c->column_family_data()->GetName().c_str(), moved_files, c->output_level(), moved_bytes, status.ToString().c_str(), c->column_family_data()->current()->storage_info()->LevelSummary(&tmp)); *made_progress = true; // Clear Instrument ThreadStatusUtil::ResetThreadStatus(); TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction", c->column_family_data()); } else if (!is_prepicked && c->output_level() > 0 && c->output_level() == c->column_family_data() ->current() ->storage_info() ->MaxOutputLevel( immutable_db_options_.allow_ingest_behind) && env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) { // Forward compactions involving last level to the bottom pool if it exists, // such that compactions unlikely to contribute to write stalls can be // delayed or deprioritized. TEST_SYNC_POINT("DBImpl::BackgroundCompaction:ForwardToBottomPriPool"); CompactionArg* ca = new CompactionArg; ca->db = this; ca->prepicked_compaction = new PrepickedCompaction; ca->prepicked_compaction->compaction = c.release(); ca->prepicked_compaction->manual_compaction_state = nullptr; // Transfer requested token, so it doesn't need to do it again. ca->prepicked_compaction->task_token = std::move(task_token); ++bg_bottom_compaction_scheduled_; env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca, Env::Priority::BOTTOM, this, &DBImpl::UnscheduleCompactionCallback); } else { TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:BeforeCompaction", c->column_family_data()); int output_level __attribute__((__unused__)); output_level = c->output_level(); TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:NonTrivial", &output_level); std::vector snapshot_seqs; SequenceNumber earliest_write_conflict_snapshot; SnapshotChecker* snapshot_checker; GetSnapshotContext(job_context, &snapshot_seqs, &earliest_write_conflict_snapshot, &snapshot_checker); assert(is_snapshot_supported_ || snapshots_.empty()); CompactionJob compaction_job( job_context->job_id, c.get(), immutable_db_options_, file_options_for_compaction_, versions_.get(), &shutting_down_, preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(), GetDataDir(c->column_family_data(), c->output_path_id()), stats_, &mutex_, &error_handler_, snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, table_cache_, &event_logger_, c->mutable_cf_options()->paranoid_file_checks, c->mutable_cf_options()->report_bg_io_stats, dbname_, &compaction_job_stats, thread_pri, is_manual ? &manual_compaction_paused_ : nullptr); compaction_job.Prepare(); NotifyOnCompactionBegin(c->column_family_data(), c.get(), status, compaction_job_stats, job_context->job_id); mutex_.Unlock(); TEST_SYNC_POINT_CALLBACK( "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", nullptr); compaction_job.Run(); TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun"); mutex_.Lock(); status = compaction_job.Install(*c->mutable_cf_options()); io_s = compaction_job.io_status(); if (status.ok()) { InstallSuperVersionAndScheduleWork(c->column_family_data(), &job_context->superversion_contexts[0], *c->mutable_cf_options()); } *made_progress = true; TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction", c->column_family_data()); } if (status.ok() && !io_s.ok()) { status = io_s; } if (c != nullptr) { c->ReleaseCompactionFiles(status); *made_progress = true; #ifndef ROCKSDB_LITE // Need to make sure SstFileManager does its bookkeeping auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); if (sfm && sfm_reserved_compact_space) { sfm->OnCompactionCompletion(c.get()); } #endif // ROCKSDB_LITE NotifyOnCompactionCompleted(c->column_family_data(), c.get(), status, compaction_job_stats, job_context->job_id); } if (status.ok() || status.IsCompactionTooLarge() || status.IsManualCompactionPaused()) { // Done } else if (status.IsColumnFamilyDropped() || status.IsShutdownInProgress()) { // Ignore compaction errors found during shutting down } else { ROCKS_LOG_WARN(immutable_db_options_.info_log, "Compaction error: %s", status.ToString().c_str()); if (!io_s.ok()) { // Error while writing to MANIFEST. // In fact, versions_->io_status() can also be the result of renaming // CURRENT file. With current code, it's just difficult to tell. So just // be pessimistic and try write to a new MANIFEST. // TODO: distinguish between MANIFEST write and CURRENT renaming auto err_reason = versions_->io_status().ok() ? BackgroundErrorReason::kCompaction : BackgroundErrorReason::kManifestWrite; error_handler_.SetBGError(io_s, err_reason); } else { error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction); } if (c != nullptr && !is_manual && !error_handler_.IsBGWorkStopped()) { // Put this cfd back in the compaction queue so we can retry after some // time auto cfd = c->column_family_data(); assert(cfd != nullptr); // Since this compaction failed, we need to recompute the score so it // takes the original input files into account c->column_family_data() ->current() ->storage_info() ->ComputeCompactionScore(*(c->immutable_cf_options()), *(c->mutable_cf_options())); if (!cfd->queued_for_compaction()) { AddToCompactionQueue(cfd); ++unscheduled_compactions_; } } } // this will unref its input_version and column_family_data c.reset(); if (is_manual) { ManualCompactionState* m = manual_compaction; if (!status.ok()) { m->status = status; m->done = true; } // For universal compaction: // Because universal compaction always happens at level 0, so one // compaction will pick up all overlapped files. No files will be // filtered out due to size limit and left for a successive compaction. // So we can safely conclude the current compaction. // // Also note that, if we don't stop here, then the current compaction // writes a new file back to level 0, which will be used in successive // compaction. Hence the manual compaction will never finish. // // Stop the compaction if manual_end points to nullptr -- this means // that we compacted the whole range. manual_end should always point // to nullptr in case of universal compaction if (m->manual_end == nullptr) { m->done = true; } if (!m->done) { // We only compacted part of the requested range. Update *m // to the range that is left to be compacted. // Universal and FIFO compactions should always compact the whole range assert(m->cfd->ioptions()->compaction_style != kCompactionStyleUniversal || m->cfd->ioptions()->num_levels > 1); assert(m->cfd->ioptions()->compaction_style != kCompactionStyleFIFO); m->tmp_storage = *m->manual_end; m->begin = &m->tmp_storage; m->incomplete = true; } m->in_progress = false; // not being processed anymore } TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Finish"); return status; } bool DBImpl::HasPendingManualCompaction() { return (!manual_compaction_dequeue_.empty()); } void DBImpl::AddManualCompaction(DBImpl::ManualCompactionState* m) { manual_compaction_dequeue_.push_back(m); } void DBImpl::RemoveManualCompaction(DBImpl::ManualCompactionState* m) { // Remove from queue std::deque::iterator it = manual_compaction_dequeue_.begin(); while (it != manual_compaction_dequeue_.end()) { if (m == (*it)) { it = manual_compaction_dequeue_.erase(it); return; } ++it; } assert(false); return; } bool DBImpl::ShouldntRunManualCompaction(ManualCompactionState* m) { if (num_running_ingest_file_ > 0) { // We need to wait for other IngestExternalFile() calls to finish // before running a manual compaction. return true; } if (m->exclusive) { return (bg_bottom_compaction_scheduled_ > 0 || bg_compaction_scheduled_ > 0); } std::deque::iterator it = manual_compaction_dequeue_.begin(); bool seen = false; while (it != manual_compaction_dequeue_.end()) { if (m == (*it)) { ++it; seen = true; continue; } else if (MCOverlap(m, (*it)) && (!seen && !(*it)->in_progress)) { // Consider the other manual compaction *it, conflicts if: // overlaps with m // and (*it) is ahead in the queue and is not yet in progress return true; } ++it; } return false; } bool DBImpl::HaveManualCompaction(ColumnFamilyData* cfd) { // Remove from priority queue std::deque::iterator it = manual_compaction_dequeue_.begin(); while (it != manual_compaction_dequeue_.end()) { if ((*it)->exclusive) { return true; } if ((cfd == (*it)->cfd) && (!((*it)->in_progress || (*it)->done))) { // Allow automatic compaction if manual compaction is // in progress return true; } ++it; } return false; } bool DBImpl::HasExclusiveManualCompaction() { // Remove from priority queue std::deque::iterator it = manual_compaction_dequeue_.begin(); while (it != manual_compaction_dequeue_.end()) { if ((*it)->exclusive) { return true; } ++it; } return false; } bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) { if ((m->exclusive) || (m1->exclusive)) { return true; } if (m->cfd != m1->cfd) { return false; } return true; } #ifndef ROCKSDB_LITE void DBImpl::BuildCompactionJobInfo( const ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& compaction_job_stats, const int job_id, const Version* current, CompactionJobInfo* compaction_job_info) const { assert(compaction_job_info != nullptr); compaction_job_info->cf_id = cfd->GetID(); compaction_job_info->cf_name = cfd->GetName(); compaction_job_info->status = st; compaction_job_info->thread_id = env_->GetThreadID(); compaction_job_info->job_id = job_id; compaction_job_info->base_input_level = c->start_level(); compaction_job_info->output_level = c->output_level(); compaction_job_info->stats = compaction_job_stats; compaction_job_info->table_properties = c->GetOutputTableProperties(); compaction_job_info->compaction_reason = c->compaction_reason(); compaction_job_info->compression = c->output_compression(); for (size_t i = 0; i < c->num_input_levels(); ++i) { for (const auto fmd : *c->inputs(i)) { const FileDescriptor& desc = fmd->fd; const uint64_t file_number = desc.GetNumber(); auto fn = TableFileName(c->immutable_cf_options()->cf_paths, file_number, desc.GetPathId()); compaction_job_info->input_files.push_back(fn); compaction_job_info->input_file_infos.push_back(CompactionFileInfo{ static_cast(i), file_number, fmd->oldest_blob_file_number}); if (compaction_job_info->table_properties.count(fn) == 0) { std::shared_ptr tp; auto s = current->GetTableProperties(&tp, fmd, &fn); if (s.ok()) { compaction_job_info->table_properties[fn] = tp; } } } } for (const auto& newf : c->edit()->GetNewFiles()) { const FileMetaData& meta = newf.second; const FileDescriptor& desc = meta.fd; const uint64_t file_number = desc.GetNumber(); compaction_job_info->output_files.push_back(TableFileName( c->immutable_cf_options()->cf_paths, file_number, desc.GetPathId())); compaction_job_info->output_file_infos.push_back(CompactionFileInfo{ newf.first, file_number, meta.oldest_blob_file_number}); } } #endif // SuperVersionContext gets created and destructed outside of the lock -- // we use this conveniently to: // * malloc one SuperVersion() outside of the lock -- new_superversion // * delete SuperVersion()s outside of the lock -- superversions_to_free // // However, if InstallSuperVersionAndScheduleWork() gets called twice with the // same sv_context, we can't reuse the SuperVersion() that got // malloced because // first call already used it. In that rare case, we take a hit and create a // new SuperVersion() inside of the mutex. We do similar thing // for superversion_to_free void DBImpl::InstallSuperVersionAndScheduleWork( ColumnFamilyData* cfd, SuperVersionContext* sv_context, const MutableCFOptions& mutable_cf_options) { mutex_.AssertHeld(); // Update max_total_in_memory_state_ size_t old_memtable_size = 0; auto* old_sv = cfd->GetSuperVersion(); if (old_sv) { old_memtable_size = old_sv->mutable_cf_options.write_buffer_size * old_sv->mutable_cf_options.max_write_buffer_number; } // this branch is unlikely to step in if (UNLIKELY(sv_context->new_superversion == nullptr)) { sv_context->NewSuperVersion(); } cfd->InstallSuperVersion(sv_context, &mutex_, mutable_cf_options); // There may be a small data race here. The snapshot tricking bottommost // compaction may already be released here. But assuming there will always be // newer snapshot created and released frequently, the compaction will be // triggered soon anyway. bottommost_files_mark_threshold_ = kMaxSequenceNumber; for (auto* my_cfd : *versions_->GetColumnFamilySet()) { bottommost_files_mark_threshold_ = std::min( bottommost_files_mark_threshold_, my_cfd->current()->storage_info()->bottommost_files_mark_threshold()); } // Whenever we install new SuperVersion, we might need to issue new flushes or // compactions. SchedulePendingCompaction(cfd); MaybeScheduleFlushOrCompaction(); // Update max_total_in_memory_state_ max_total_in_memory_state_ = max_total_in_memory_state_ - old_memtable_size + mutable_cf_options.write_buffer_size * mutable_cf_options.max_write_buffer_number; } // ShouldPurge is called by FindObsoleteFiles when doing a full scan, // and db mutex (mutex_) should already be held. // Actually, the current implementation of FindObsoleteFiles with // full_scan=true can issue I/O requests to obtain list of files in // directories, e.g. env_->getChildren while holding db mutex. bool DBImpl::ShouldPurge(uint64_t file_number) const { return files_grabbed_for_purge_.find(file_number) == files_grabbed_for_purge_.end() && purge_files_.find(file_number) == purge_files_.end(); } // MarkAsGrabbedForPurge is called by FindObsoleteFiles, and db mutex // (mutex_) should already be held. void DBImpl::MarkAsGrabbedForPurge(uint64_t file_number) { files_grabbed_for_purge_.insert(file_number); } void DBImpl::SetSnapshotChecker(SnapshotChecker* snapshot_checker) { InstrumentedMutexLock l(&mutex_); // snapshot_checker_ should only set once. If we need to set it multiple // times, we need to make sure the old one is not deleted while it is still // using by a compaction job. assert(!snapshot_checker_); snapshot_checker_.reset(snapshot_checker); } void DBImpl::GetSnapshotContext( JobContext* job_context, std::vector* snapshot_seqs, SequenceNumber* earliest_write_conflict_snapshot, SnapshotChecker** snapshot_checker_ptr) { mutex_.AssertHeld(); assert(job_context != nullptr); assert(snapshot_seqs != nullptr); assert(earliest_write_conflict_snapshot != nullptr); assert(snapshot_checker_ptr != nullptr); *snapshot_checker_ptr = snapshot_checker_.get(); if (use_custom_gc_ && *snapshot_checker_ptr == nullptr) { *snapshot_checker_ptr = DisableGCSnapshotChecker::Instance(); } if (*snapshot_checker_ptr != nullptr) { // If snapshot_checker is used, that means the flush/compaction may // contain values not visible to snapshot taken after // flush/compaction job starts. Take a snapshot and it will appear // in snapshot_seqs and force compaction iterator to consider such // snapshots. const Snapshot* job_snapshot = GetSnapshotImpl(false /*write_conflict_boundary*/, false /*lock*/); job_context->job_snapshot.reset(new ManagedSnapshot(this, job_snapshot)); } *snapshot_seqs = snapshots_.GetAll(earliest_write_conflict_snapshot); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl_debug.cc000066400000000000000000000215731370372246700203330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef NDEBUG #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/error_handler.h" #include "monitoring/thread_status_updater.h" #include "util/cast_util.h" namespace ROCKSDB_NAMESPACE { uint64_t DBImpl::TEST_GetLevel0TotalSize() { InstrumentedMutexLock l(&mutex_); return default_cf_handle_->cfd()->current()->storage_info()->NumLevelBytes(0); } void DBImpl::TEST_SwitchWAL() { WriteContext write_context; InstrumentedMutexLock l(&mutex_); void* writer = TEST_BeginWrite(); SwitchWAL(&write_context); TEST_EndWrite(writer); } bool DBImpl::TEST_WALBufferIsEmpty(bool lock) { if (lock) { log_write_mutex_.Lock(); } log::Writer* cur_log_writer = logs_.back().writer; auto res = cur_log_writer->TEST_BufferIsEmpty(); if (lock) { log_write_mutex_.Unlock(); } return res; } int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes( ColumnFamilyHandle* column_family) { ColumnFamilyData* cfd; if (column_family == nullptr) { cfd = default_cf_handle_->cfd(); } else { auto cfh = reinterpret_cast(column_family); cfd = cfh->cfd(); } InstrumentedMutexLock l(&mutex_); return cfd->current()->storage_info()->MaxNextLevelOverlappingBytes(); } void DBImpl::TEST_GetFilesMetaData( ColumnFamilyHandle* column_family, std::vector>* metadata) { auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); InstrumentedMutexLock l(&mutex_); metadata->resize(NumberLevels()); for (int level = 0; level < NumberLevels(); level++) { const std::vector& files = cfd->current()->storage_info()->LevelFiles(level); (*metadata)[level].clear(); for (const auto& f : files) { (*metadata)[level].push_back(*f); } } } uint64_t DBImpl::TEST_Current_Manifest_FileNo() { return versions_->manifest_file_number(); } uint64_t DBImpl::TEST_Current_Next_FileNo() { return versions_->current_next_file_number(); } Status DBImpl::TEST_CompactRange(int level, const Slice* begin, const Slice* end, ColumnFamilyHandle* column_family, bool disallow_trivial_move) { ColumnFamilyData* cfd; if (column_family == nullptr) { cfd = default_cf_handle_->cfd(); } else { auto cfh = reinterpret_cast(column_family); cfd = cfh->cfd(); } int output_level = (cfd->ioptions()->compaction_style == kCompactionStyleUniversal || cfd->ioptions()->compaction_style == kCompactionStyleFIFO) ? level : level + 1; return RunManualCompaction(cfd, level, output_level, CompactRangeOptions(), begin, end, true, disallow_trivial_move, port::kMaxUint64 /*max_file_num_to_ignore*/); } Status DBImpl::TEST_SwitchMemtable(ColumnFamilyData* cfd) { WriteContext write_context; InstrumentedMutexLock l(&mutex_); if (cfd == nullptr) { cfd = default_cf_handle_->cfd(); } Status s; void* writer = TEST_BeginWrite(); if (two_write_queues_) { WriteThread::Writer nonmem_w; nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); s = SwitchMemtable(cfd, &write_context); nonmem_write_thread_.ExitUnbatched(&nonmem_w); } else { s = SwitchMemtable(cfd, &write_context); } TEST_EndWrite(writer); return s; } Status DBImpl::TEST_FlushMemTable(bool wait, bool allow_write_stall, ColumnFamilyHandle* cfh) { FlushOptions fo; fo.wait = wait; fo.allow_write_stall = allow_write_stall; ColumnFamilyData* cfd; if (cfh == nullptr) { cfd = default_cf_handle_->cfd(); } else { auto cfhi = reinterpret_cast(cfh); cfd = cfhi->cfd(); } return FlushMemTable(cfd, fo, FlushReason::kTest); } Status DBImpl::TEST_FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& flush_opts) { return FlushMemTable(cfd, flush_opts, FlushReason::kTest); } Status DBImpl::TEST_AtomicFlushMemTables( const autovector& cfds, const FlushOptions& flush_opts) { return AtomicFlushMemTables(cfds, flush_opts, FlushReason::kTest); } Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) { ColumnFamilyData* cfd; if (column_family == nullptr) { cfd = default_cf_handle_->cfd(); } else { auto cfh = reinterpret_cast(column_family); cfd = cfh->cfd(); } return WaitForFlushMemTable(cfd, nullptr, false); } Status DBImpl::TEST_WaitForCompact(bool wait_unscheduled) { // Wait until the compaction completes // TODO: a bug here. This function actually does not necessarily // wait for compact. It actually waits for scheduled compaction // OR flush to finish. InstrumentedMutexLock l(&mutex_); while ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ || bg_flush_scheduled_ || (wait_unscheduled && unscheduled_compactions_)) && (error_handler_.GetBGError() == Status::OK())) { bg_cv_.Wait(); } return error_handler_.GetBGError(); } void DBImpl::TEST_LockMutex() { mutex_.Lock(); } void DBImpl::TEST_UnlockMutex() { mutex_.Unlock(); } void* DBImpl::TEST_BeginWrite() { auto w = new WriteThread::Writer(); write_thread_.EnterUnbatched(w, &mutex_); return reinterpret_cast(w); } void DBImpl::TEST_EndWrite(void* w) { auto writer = reinterpret_cast(w); write_thread_.ExitUnbatched(writer); delete writer; } size_t DBImpl::TEST_LogsToFreeSize() { InstrumentedMutexLock l(&mutex_); return logs_to_free_.size(); } uint64_t DBImpl::TEST_LogfileNumber() { InstrumentedMutexLock l(&mutex_); return logfile_number_; } Status DBImpl::TEST_GetAllImmutableCFOptions( std::unordered_map* iopts_map) { std::vector cf_names; std::vector iopts; { InstrumentedMutexLock l(&mutex_); for (auto cfd : *versions_->GetColumnFamilySet()) { cf_names.push_back(cfd->GetName()); iopts.push_back(cfd->ioptions()); } } iopts_map->clear(); for (size_t i = 0; i < cf_names.size(); ++i) { iopts_map->insert({cf_names[i], iopts[i]}); } return Status::OK(); } uint64_t DBImpl::TEST_FindMinLogContainingOutstandingPrep() { return logs_with_prep_tracker_.FindMinLogContainingOutstandingPrep(); } size_t DBImpl::TEST_PreparedSectionCompletedSize() { return logs_with_prep_tracker_.TEST_PreparedSectionCompletedSize(); } size_t DBImpl::TEST_LogsWithPrepSize() { return logs_with_prep_tracker_.TEST_LogsWithPrepSize(); } uint64_t DBImpl::TEST_FindMinPrepLogReferencedByMemTable() { autovector empty_list; return FindMinPrepLogReferencedByMemTable(versions_.get(), nullptr, empty_list); } Status DBImpl::TEST_GetLatestMutableCFOptions( ColumnFamilyHandle* column_family, MutableCFOptions* mutable_cf_options) { InstrumentedMutexLock l(&mutex_); auto cfh = reinterpret_cast(column_family); *mutable_cf_options = *cfh->cfd()->GetLatestMutableCFOptions(); return Status::OK(); } int DBImpl::TEST_BGCompactionsAllowed() const { InstrumentedMutexLock l(&mutex_); return GetBGJobLimits().max_compactions; } int DBImpl::TEST_BGFlushesAllowed() const { InstrumentedMutexLock l(&mutex_); return GetBGJobLimits().max_flushes; } SequenceNumber DBImpl::TEST_GetLastVisibleSequence() const { if (last_seq_same_as_publish_seq_) { return versions_->LastSequence(); } else { return versions_->LastAllocatedSequence(); } } size_t DBImpl::TEST_GetWalPreallocateBlockSize( uint64_t write_buffer_size) const { InstrumentedMutexLock l(&mutex_); return GetWalPreallocateBlockSize(write_buffer_size); } void DBImpl::TEST_WaitForDumpStatsRun(std::function callback) const { if (thread_dump_stats_ != nullptr) { thread_dump_stats_->TEST_WaitForRun(callback); } } void DBImpl::TEST_WaitForPersistStatsRun(std::function callback) const { if (thread_persist_stats_ != nullptr) { thread_persist_stats_->TEST_WaitForRun(callback); } } bool DBImpl::TEST_IsPersistentStatsEnabled() const { return thread_persist_stats_ && thread_persist_stats_->IsRunning(); } size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const { return EstimateInMemoryStatsHistorySize(); } } // namespace ROCKSDB_NAMESPACE #endif // NDEBUG rocksdb-6.11.4/db/db_impl/db_impl_experimental.cc000066400000000000000000000131371370372246700217370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_impl/db_impl.h" #include #include #include "db/column_family.h" #include "db/job_context.h" #include "db/version_set.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE Status DBImpl::SuggestCompactRange(ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) { auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); InternalKey start_key, end_key; if (begin != nullptr) { start_key.SetMinPossibleForUserKey(*begin); } if (end != nullptr) { end_key.SetMaxPossibleForUserKey(*end); } { InstrumentedMutexLock l(&mutex_); auto vstorage = cfd->current()->storage_info(); for (int level = 0; level < vstorage->num_non_empty_levels() - 1; ++level) { std::vector inputs; vstorage->GetOverlappingInputs( level, begin == nullptr ? nullptr : &start_key, end == nullptr ? nullptr : &end_key, &inputs); for (auto f : inputs) { f->marked_for_compaction = true; } } // Since we have some more files to compact, we should also recompute // compaction score vstorage->ComputeCompactionScore(*cfd->ioptions(), *cfd->GetLatestMutableCFOptions()); SchedulePendingCompaction(cfd); MaybeScheduleFlushOrCompaction(); } return Status::OK(); } Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) { assert(column_family); if (target_level < 1) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "PromoteL0 FAILED. Invalid target level %d\n", target_level); return Status::InvalidArgument("Invalid target level"); } Status status; VersionEdit edit; JobContext job_context(next_job_id_.fetch_add(1), true); { InstrumentedMutexLock l(&mutex_); auto* cfd = static_cast(column_family)->cfd(); const auto* vstorage = cfd->current()->storage_info(); if (target_level >= vstorage->num_levels()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "PromoteL0 FAILED. Target level %d does not exist\n", target_level); job_context.Clean(); return Status::InvalidArgument("Target level does not exist"); } // Sort L0 files by range. const InternalKeyComparator* icmp = &cfd->internal_comparator(); auto l0_files = vstorage->LevelFiles(0); std::sort(l0_files.begin(), l0_files.end(), [icmp](FileMetaData* f1, FileMetaData* f2) { return icmp->Compare(f1->largest, f2->largest) < 0; }); // Check that no L0 file is being compacted and that they have // non-overlapping ranges. for (size_t i = 0; i < l0_files.size(); ++i) { auto f = l0_files[i]; if (f->being_compacted) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "PromoteL0 FAILED. File %" PRIu64 " being compacted\n", f->fd.GetNumber()); job_context.Clean(); return Status::InvalidArgument("PromoteL0 called during L0 compaction"); } if (i == 0) continue; auto prev_f = l0_files[i - 1]; if (icmp->Compare(prev_f->largest, f->smallest) >= 0) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "PromoteL0 FAILED. Files %" PRIu64 " and %" PRIu64 " have overlapping ranges\n", prev_f->fd.GetNumber(), f->fd.GetNumber()); job_context.Clean(); return Status::InvalidArgument("L0 has overlapping files"); } } // Check that all levels up to target_level are empty. for (int level = 1; level <= target_level; ++level) { if (vstorage->NumLevelFiles(level) > 0) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "PromoteL0 FAILED. Level %d not empty\n", level); job_context.Clean(); return Status::InvalidArgument( "All levels up to target_level " "must be empty"); } } edit.SetColumnFamily(cfd->GetID()); for (const auto& f : l0_files) { edit.DeleteFile(0, f->fd.GetNumber()); edit.AddFile(target_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno, f->marked_for_compaction, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->file_checksum, f->file_checksum_func_name); } status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, &job_context.superversion_contexts[0], *cfd->GetLatestMutableCFOptions()); } } // lock released here LogFlush(immutable_db_options_.info_log); job_context.Clean(); return status; } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl_files.cc000066400000000000000000000726621370372246700203540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_impl/db_impl.h" #include #include #include #include "db/event_helpers.h" #include "db/memtable_list.h" #include "file/file_util.h" #include "file/filename.h" #include "file/sst_file_manager_impl.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { uint64_t DBImpl::MinLogNumberToKeep() { if (allow_2pc()) { return versions_->min_log_number_to_keep_2pc(); } else { return versions_->MinLogNumberWithUnflushedData(); } } uint64_t DBImpl::MinObsoleteSstNumberToKeep() { mutex_.AssertHeld(); if (!pending_outputs_.empty()) { return *pending_outputs_.begin(); } return std::numeric_limits::max(); } Status DBImpl::DisableFileDeletions() { InstrumentedMutexLock l(&mutex_); return DisableFileDeletionsWithLock(); } Status DBImpl::DisableFileDeletionsWithLock() { mutex_.AssertHeld(); ++disable_delete_obsolete_files_; if (disable_delete_obsolete_files_ == 1) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "File Deletions Disabled"); } else { ROCKS_LOG_WARN(immutable_db_options_.info_log, "File Deletions Disabled, but already disabled. Counter: %d", disable_delete_obsolete_files_); } return Status::OK(); } Status DBImpl::EnableFileDeletions(bool force) { // Job id == 0 means that this is not our background process, but rather // user thread JobContext job_context(0); bool file_deletion_enabled = false; { InstrumentedMutexLock l(&mutex_); if (force) { // if force, we need to enable file deletions right away disable_delete_obsolete_files_ = 0; } else if (disable_delete_obsolete_files_ > 0) { --disable_delete_obsolete_files_; } if (disable_delete_obsolete_files_ == 0) { file_deletion_enabled = true; FindObsoleteFiles(&job_context, true); bg_cv_.SignalAll(); } } if (file_deletion_enabled) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "File Deletions Enabled"); if (job_context.HaveSomethingToDelete()) { PurgeObsoleteFiles(job_context); } } else { ROCKS_LOG_WARN(immutable_db_options_.info_log, "File Deletions Enable, but not really enabled. Counter: %d", disable_delete_obsolete_files_); } job_context.Clean(); LogFlush(immutable_db_options_.info_log); return Status::OK(); } bool DBImpl::IsFileDeletionsEnabled() const { return 0 == disable_delete_obsolete_files_; } // * Returns the list of live files in 'sst_live' and 'blob_live'. // If it's doing full scan: // * Returns the list of all files in the filesystem in // 'full_scan_candidate_files'. // Otherwise, gets obsolete files from VersionSet. // no_full_scan = true -- never do the full scan using GetChildren() // force = false -- don't force the full scan, except every // mutable_db_options_.delete_obsolete_files_period_micros // force = true -- force the full scan void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, bool no_full_scan) { mutex_.AssertHeld(); // if deletion is disabled, do nothing if (disable_delete_obsolete_files_ > 0) { return; } bool doing_the_full_scan = false; // logic for figuring out if we're doing the full scan if (no_full_scan) { doing_the_full_scan = false; } else if (force || mutable_db_options_.delete_obsolete_files_period_micros == 0) { doing_the_full_scan = true; } else { const uint64_t now_micros = env_->NowMicros(); if ((delete_obsolete_files_last_run_ + mutable_db_options_.delete_obsolete_files_period_micros) < now_micros) { doing_the_full_scan = true; delete_obsolete_files_last_run_ = now_micros; } } // don't delete files that might be currently written to from compaction // threads // Since job_context->min_pending_output is set, until file scan finishes, // mutex_ cannot be released. Otherwise, we might see no min_pending_output // here but later find newer generated unfinalized files while scanning. job_context->min_pending_output = MinObsoleteSstNumberToKeep(); // Get obsolete files. This function will also update the list of // pending files in VersionSet(). versions_->GetObsoleteFiles( &job_context->sst_delete_files, &job_context->blob_delete_files, &job_context->manifest_delete_files, job_context->min_pending_output); // Mark the elements in job_context->sst_delete_files and // job_context->blob_delete_files as "grabbed for purge" so that other threads // calling FindObsoleteFiles with full_scan=true will not add these files to // candidate list for purge. for (const auto& sst_to_del : job_context->sst_delete_files) { MarkAsGrabbedForPurge(sst_to_del.metadata->fd.GetNumber()); } for (const auto& blob_file : job_context->blob_delete_files) { MarkAsGrabbedForPurge(blob_file.GetBlobFileNumber()); } // store the current filenum, lognum, etc job_context->manifest_file_number = versions_->manifest_file_number(); job_context->pending_manifest_file_number = versions_->pending_manifest_file_number(); job_context->log_number = MinLogNumberToKeep(); job_context->prev_log_number = versions_->prev_log_number(); versions_->AddLiveFiles(&job_context->sst_live, &job_context->blob_live); if (doing_the_full_scan) { InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(), dbname_); std::set paths; for (size_t path_id = 0; path_id < immutable_db_options_.db_paths.size(); path_id++) { paths.insert(immutable_db_options_.db_paths[path_id].path); } // Note that if cf_paths is not specified in the ColumnFamilyOptions // of a particular column family, we use db_paths as the cf_paths // setting. Hence, there can be multiple duplicates of files from db_paths // in the following code. The duplicate are removed while identifying // unique files in PurgeObsoleteFiles. for (auto cfd : *versions_->GetColumnFamilySet()) { for (size_t path_id = 0; path_id < cfd->ioptions()->cf_paths.size(); path_id++) { auto& path = cfd->ioptions()->cf_paths[path_id].path; if (paths.find(path) == paths.end()) { paths.insert(path); } } } for (auto& path : paths) { // set of all files in the directory. We'll exclude files that are still // alive in the subsequent processings. std::vector files; env_->GetChildren(path, &files); // Ignore errors for (const std::string& file : files) { uint64_t number; FileType type; // 1. If we cannot parse the file name, we skip; // 2. If the file with file_number equals number has already been // grabbed for purge by another compaction job, or it has already been // schedule for purge, we also skip it if we // are doing full scan in order to avoid double deletion of the same // file under race conditions. See // https://github.com/facebook/rocksdb/issues/3573 if (!ParseFileName(file, &number, info_log_prefix.prefix, &type) || !ShouldPurge(number)) { continue; } // TODO(icanadi) clean up this mess to avoid having one-off "/" prefixes job_context->full_scan_candidate_files.emplace_back("/" + file, path); } } // Add log files in wal_dir if (immutable_db_options_.wal_dir != dbname_) { std::vector log_files; env_->GetChildren(immutable_db_options_.wal_dir, &log_files); // Ignore errors for (const std::string& log_file : log_files) { job_context->full_scan_candidate_files.emplace_back( log_file, immutable_db_options_.wal_dir); } } // Add info log files in db_log_dir if (!immutable_db_options_.db_log_dir.empty() && immutable_db_options_.db_log_dir != dbname_) { std::vector info_log_files; // Ignore errors env_->GetChildren(immutable_db_options_.db_log_dir, &info_log_files); for (std::string& log_file : info_log_files) { job_context->full_scan_candidate_files.emplace_back( log_file, immutable_db_options_.db_log_dir); } } } // logs_ is empty when called during recovery, in which case there can't yet // be any tracked obsolete logs if (!alive_log_files_.empty() && !logs_.empty()) { uint64_t min_log_number = job_context->log_number; size_t num_alive_log_files = alive_log_files_.size(); // find newly obsoleted log files while (alive_log_files_.begin()->number < min_log_number) { auto& earliest = *alive_log_files_.begin(); if (immutable_db_options_.recycle_log_file_num > log_recycle_files_.size()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "adding log %" PRIu64 " to recycle list\n", earliest.number); log_recycle_files_.push_back(earliest.number); } else { job_context->log_delete_files.push_back(earliest.number); } if (job_context->size_log_to_delete == 0) { job_context->prev_total_log_size = total_log_size_; job_context->num_alive_log_files = num_alive_log_files; } job_context->size_log_to_delete += earliest.size; total_log_size_ -= earliest.size; if (two_write_queues_) { log_write_mutex_.Lock(); } alive_log_files_.pop_front(); if (two_write_queues_) { log_write_mutex_.Unlock(); } // Current log should always stay alive since it can't have // number < MinLogNumber(). assert(alive_log_files_.size()); } while (!logs_.empty() && logs_.front().number < min_log_number) { auto& log = logs_.front(); if (log.getting_synced) { log_sync_cv_.Wait(); // logs_ could have changed while we were waiting. continue; } logs_to_free_.push_back(log.ReleaseWriter()); { InstrumentedMutexLock wl(&log_write_mutex_); logs_.pop_front(); } } // Current log cannot be obsolete. assert(!logs_.empty()); } // We're just cleaning up for DB::Write(). assert(job_context->logs_to_free.empty()); job_context->logs_to_free = logs_to_free_; job_context->log_recycle_files.assign(log_recycle_files_.begin(), log_recycle_files_.end()); if (job_context->HaveSomethingToDelete()) { ++pending_purge_obsolete_files_; } logs_to_free_.clear(); } namespace { bool CompareCandidateFile(const JobContext::CandidateFileInfo& first, const JobContext::CandidateFileInfo& second) { if (first.file_name > second.file_name) { return true; } else if (first.file_name < second.file_name) { return false; } else { return (first.file_path > second.file_path); } } }; // namespace // Delete obsolete files and log status and information of file deletion void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname, const std::string& path_to_sync, FileType type, uint64_t number) { TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl::BeforeDeletion", const_cast(&fname)); Status file_deletion_status; if (type == kTableFile || type == kBlobFile || type == kLogFile) { file_deletion_status = DeleteDBFile(&immutable_db_options_, fname, path_to_sync, /*force_bg=*/false, /*force_fg=*/!wal_in_db_path_); } else { file_deletion_status = env_->DeleteFile(fname); } TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion", &file_deletion_status); if (file_deletion_status.ok()) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", job_id, fname.c_str(), type, number, file_deletion_status.ToString().c_str()); } else if (env_->FileExists(fname).IsNotFound()) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "[JOB %d] Tried to delete a non-existing file %s type=%d #%" PRIu64 " -- %s\n", job_id, fname.c_str(), type, number, file_deletion_status.ToString().c_str()); } else { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "[JOB %d] Failed to delete %s type=%d #%" PRIu64 " -- %s\n", job_id, fname.c_str(), type, number, file_deletion_status.ToString().c_str()); } if (type == kTableFile) { EventHelpers::LogAndNotifyTableFileDeletion( &event_logger_, job_id, number, fname, file_deletion_status, GetName(), immutable_db_options_.listeners); } } // Diffs the files listed in filenames and those that do not // belong to live files are possibly removed. Also, removes all the // files in sst_delete_files and log_delete_files. // It is not necessary to hold the mutex when invoking this method. void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) { TEST_SYNC_POINT("DBImpl::PurgeObsoleteFiles:Begin"); // we'd better have sth to delete assert(state.HaveSomethingToDelete()); // FindObsoleteFiles() should've populated this so nonzero assert(state.manifest_file_number != 0); // Now, convert lists to unordered sets, WITHOUT mutex held; set is slow. std::unordered_set sst_live_set(state.sst_live.begin(), state.sst_live.end()); std::unordered_set blob_live_set(state.blob_live.begin(), state.blob_live.end()); std::unordered_set log_recycle_files_set( state.log_recycle_files.begin(), state.log_recycle_files.end()); auto candidate_files = state.full_scan_candidate_files; candidate_files.reserve( candidate_files.size() + state.sst_delete_files.size() + state.blob_delete_files.size() + state.log_delete_files.size() + state.manifest_delete_files.size()); // We may ignore the dbname when generating the file names. for (auto& file : state.sst_delete_files) { candidate_files.emplace_back( MakeTableFileName(file.metadata->fd.GetNumber()), file.path); if (file.metadata->table_reader_handle) { table_cache_->Release(file.metadata->table_reader_handle); } file.DeleteMetadata(); } for (const auto& blob_file : state.blob_delete_files) { candidate_files.emplace_back(BlobFileName(blob_file.GetBlobFileNumber()), blob_file.GetPath()); } for (auto file_num : state.log_delete_files) { if (file_num > 0) { candidate_files.emplace_back(LogFileName(file_num), immutable_db_options_.wal_dir); } } for (const auto& filename : state.manifest_delete_files) { candidate_files.emplace_back(filename, dbname_); } // dedup state.candidate_files so we don't try to delete the same // file twice std::sort(candidate_files.begin(), candidate_files.end(), CompareCandidateFile); candidate_files.erase( std::unique(candidate_files.begin(), candidate_files.end()), candidate_files.end()); if (state.prev_total_log_size > 0) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "[JOB %d] Try to delete WAL files size %" PRIu64 ", prev total WAL file size %" PRIu64 ", number of live WAL files %" ROCKSDB_PRIszt ".\n", state.job_id, state.size_log_to_delete, state.prev_total_log_size, state.num_alive_log_files); } std::vector old_info_log_files; InfoLogPrefix info_log_prefix(!immutable_db_options_.db_log_dir.empty(), dbname_); // File numbers of most recent two OPTIONS file in candidate_files (found in // previos FindObsoleteFiles(full_scan=true)) // At this point, there must not be any duplicate file numbers in // candidate_files. uint64_t optsfile_num1 = std::numeric_limits::min(); uint64_t optsfile_num2 = std::numeric_limits::min(); for (const auto& candidate_file : candidate_files) { const std::string& fname = candidate_file.file_name; uint64_t number; FileType type; if (!ParseFileName(fname, &number, info_log_prefix.prefix, &type) || type != kOptionsFile) { continue; } if (number > optsfile_num1) { optsfile_num2 = optsfile_num1; optsfile_num1 = number; } else if (number > optsfile_num2) { optsfile_num2 = number; } } // Close WALs before trying to delete them. for (const auto w : state.logs_to_free) { // TODO: maybe check the return value of Close. w->Close(); } bool own_files = OwnTablesAndLogs(); std::unordered_set files_to_del; for (const auto& candidate_file : candidate_files) { const std::string& to_delete = candidate_file.file_name; uint64_t number; FileType type; // Ignore file if we cannot recognize it. if (!ParseFileName(to_delete, &number, info_log_prefix.prefix, &type)) { continue; } bool keep = true; switch (type) { case kLogFile: keep = ((number >= state.log_number) || (number == state.prev_log_number) || (log_recycle_files_set.find(number) != log_recycle_files_set.end())); break; case kDescriptorFile: // Keep my manifest file, and any newer incarnations' // (can happen during manifest roll) keep = (number >= state.manifest_file_number); break; case kTableFile: // If the second condition is not there, this makes // DontDeletePendingOutputs fail keep = (sst_live_set.find(number) != sst_live_set.end()) || number >= state.min_pending_output; if (!keep) { files_to_del.insert(number); } break; case kBlobFile: keep = number >= state.min_pending_output || (blob_live_set.find(number) != blob_live_set.end()); if (!keep) { files_to_del.insert(number); } break; case kTempFile: // Any temp files that are currently being written to must // be recorded in pending_outputs_, which is inserted into "live". // Also, SetCurrentFile creates a temp file when writing out new // manifest, which is equal to state.pending_manifest_file_number. We // should not delete that file // // TODO(yhchiang): carefully modify the third condition to safely // remove the temp options files. keep = (sst_live_set.find(number) != sst_live_set.end()) || (blob_live_set.find(number) != blob_live_set.end()) || (number == state.pending_manifest_file_number) || (to_delete.find(kOptionsFileNamePrefix) != std::string::npos); break; case kInfoLogFile: keep = true; if (number != 0) { old_info_log_files.push_back(to_delete); } break; case kOptionsFile: keep = (number >= optsfile_num2); TEST_SYNC_POINT_CALLBACK( "DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:1", reinterpret_cast(&number)); TEST_SYNC_POINT_CALLBACK( "DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:2", reinterpret_cast(&keep)); break; case kCurrentFile: case kDBLockFile: case kIdentityFile: case kMetaDatabase: keep = true; break; } if (keep) { continue; } std::string fname; std::string dir_to_sync; if (type == kTableFile) { // evict from cache TableCache::Evict(table_cache_.get(), number); fname = MakeTableFileName(candidate_file.file_path, number); dir_to_sync = candidate_file.file_path; } else if (type == kBlobFile) { fname = BlobFileName(candidate_file.file_path, number); dir_to_sync = candidate_file.file_path; } else { dir_to_sync = (type == kLogFile) ? immutable_db_options_.wal_dir : dbname_; fname = dir_to_sync + ((!dir_to_sync.empty() && dir_to_sync.back() == '/') || (!to_delete.empty() && to_delete.front() == '/') ? "" : "/") + to_delete; } #ifndef ROCKSDB_LITE if (type == kLogFile && (immutable_db_options_.wal_ttl_seconds > 0 || immutable_db_options_.wal_size_limit_mb > 0)) { wal_manager_.ArchiveWALFile(fname, number); continue; } #endif // !ROCKSDB_LITE // If I do not own these files, e.g. secondary instance with max_open_files // = -1, then no need to delete or schedule delete these files since they // will be removed by their owner, e.g. the primary instance. if (!own_files) { continue; } Status file_deletion_status; if (schedule_only) { InstrumentedMutexLock guard_lock(&mutex_); SchedulePendingPurge(fname, dir_to_sync, type, number, state.job_id); } else { DeleteObsoleteFileImpl(state.job_id, fname, dir_to_sync, type, number); } } { // After purging obsolete files, remove them from files_grabbed_for_purge_. InstrumentedMutexLock guard_lock(&mutex_); autovector to_be_removed; for (auto fn : files_grabbed_for_purge_) { if (files_to_del.count(fn) != 0) { to_be_removed.emplace_back(fn); } } for (auto fn : to_be_removed) { files_grabbed_for_purge_.erase(fn); } } // Delete old info log files. size_t old_info_log_file_count = old_info_log_files.size(); if (old_info_log_file_count != 0 && old_info_log_file_count >= immutable_db_options_.keep_log_file_num) { std::sort(old_info_log_files.begin(), old_info_log_files.end()); size_t end = old_info_log_file_count - immutable_db_options_.keep_log_file_num; for (unsigned int i = 0; i <= end; i++) { std::string& to_delete = old_info_log_files.at(i); std::string full_path_to_delete = (immutable_db_options_.db_log_dir.empty() ? dbname_ : immutable_db_options_.db_log_dir) + "/" + to_delete; ROCKS_LOG_INFO(immutable_db_options_.info_log, "[JOB %d] Delete info log file %s\n", state.job_id, full_path_to_delete.c_str()); Status s = env_->DeleteFile(full_path_to_delete); if (!s.ok()) { if (env_->FileExists(full_path_to_delete).IsNotFound()) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "[JOB %d] Tried to delete non-existing info log file %s FAILED " "-- %s\n", state.job_id, to_delete.c_str(), s.ToString().c_str()); } else { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "[JOB %d] Delete info log file %s FAILED -- %s\n", state.job_id, to_delete.c_str(), s.ToString().c_str()); } } } } #ifndef ROCKSDB_LITE wal_manager_.PurgeObsoleteWALFiles(); #endif // ROCKSDB_LITE LogFlush(immutable_db_options_.info_log); InstrumentedMutexLock l(&mutex_); --pending_purge_obsolete_files_; assert(pending_purge_obsolete_files_ >= 0); if (pending_purge_obsolete_files_ == 0) { bg_cv_.SignalAll(); } TEST_SYNC_POINT("DBImpl::PurgeObsoleteFiles:End"); } void DBImpl::DeleteObsoleteFiles() { mutex_.AssertHeld(); JobContext job_context(next_job_id_.fetch_add(1)); FindObsoleteFiles(&job_context, true); mutex_.Unlock(); if (job_context.HaveSomethingToDelete()) { PurgeObsoleteFiles(job_context); } job_context.Clean(); mutex_.Lock(); } uint64_t FindMinPrepLogReferencedByMemTable( VersionSet* vset, const ColumnFamilyData* cfd_to_flush, const autovector& memtables_to_flush) { uint64_t min_log = 0; // we must look through the memtables for two phase transactions // that have been committed but not yet flushed for (auto loop_cfd : *vset->GetColumnFamilySet()) { if (loop_cfd->IsDropped() || loop_cfd == cfd_to_flush) { continue; } auto log = loop_cfd->imm()->PrecomputeMinLogContainingPrepSection( memtables_to_flush); if (log > 0 && (min_log == 0 || log < min_log)) { min_log = log; } log = loop_cfd->mem()->GetMinLogContainingPrepSection(); if (log > 0 && (min_log == 0 || log < min_log)) { min_log = log; } } return min_log; } uint64_t PrecomputeMinLogNumberToKeep( VersionSet* vset, const ColumnFamilyData& cfd_to_flush, autovector edit_list, const autovector& memtables_to_flush, LogsWithPrepTracker* prep_tracker) { assert(vset != nullptr); assert(prep_tracker != nullptr); // Calculate updated min_log_number_to_keep // Since the function should only be called in 2pc mode, log number in // the version edit should be sufficient. // Precompute the min log number containing unflushed data for the column // family being flushed (`cfd_to_flush`). uint64_t cf_min_log_number_to_keep = 0; for (auto& e : edit_list) { if (e->HasLogNumber()) { cf_min_log_number_to_keep = std::max(cf_min_log_number_to_keep, e->GetLogNumber()); } } if (cf_min_log_number_to_keep == 0) { // No version edit contains information on log number. The log number // for this column family should stay the same as it is. cf_min_log_number_to_keep = cfd_to_flush.GetLogNumber(); } // Get min log number containing unflushed data for other column families. uint64_t min_log_number_to_keep = vset->PreComputeMinLogNumberWithUnflushedData(&cfd_to_flush); if (cf_min_log_number_to_keep != 0) { min_log_number_to_keep = std::min(cf_min_log_number_to_keep, min_log_number_to_keep); } // if are 2pc we must consider logs containing prepared // sections of outstanding transactions. // // We must check min logs with outstanding prep before we check // logs references by memtables because a log referenced by the // first data structure could transition to the second under us. // // TODO: iterating over all column families under db mutex. // should find more optimal solution auto min_log_in_prep_heap = prep_tracker->FindMinLogContainingOutstandingPrep(); if (min_log_in_prep_heap != 0 && min_log_in_prep_heap < min_log_number_to_keep) { min_log_number_to_keep = min_log_in_prep_heap; } uint64_t min_log_refed_by_mem = FindMinPrepLogReferencedByMemTable( vset, &cfd_to_flush, memtables_to_flush); if (min_log_refed_by_mem != 0 && min_log_refed_by_mem < min_log_number_to_keep) { min_log_number_to_keep = min_log_refed_by_mem; } return min_log_number_to_keep; } Status DBImpl::FinishBestEffortsRecovery() { mutex_.AssertHeld(); std::vector paths; paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator))); for (const auto& db_path : immutable_db_options_.db_paths) { paths.push_back( NormalizePath(db_path.path + std::string(1, kFilePathSeparator))); } for (const auto* cfd : *versions_->GetColumnFamilySet()) { for (const auto& cf_path : cfd->ioptions()->cf_paths) { paths.push_back( NormalizePath(cf_path.path + std::string(1, kFilePathSeparator))); } } // Dedup paths std::sort(paths.begin(), paths.end()); paths.erase(std::unique(paths.begin(), paths.end()), paths.end()); uint64_t next_file_number = versions_->current_next_file_number(); uint64_t largest_file_number = next_file_number; std::set files_to_delete; for (const auto& path : paths) { std::vector files; env_->GetChildren(path, &files); for (const auto& fname : files) { uint64_t number = 0; FileType type; if (!ParseFileName(fname, &number, &type)) { continue; } // path ends with '/' or '\\' const std::string normalized_fpath = path + fname; largest_file_number = std::max(largest_file_number, number); if (type == kTableFile && number >= next_file_number && files_to_delete.find(normalized_fpath) == files_to_delete.end()) { files_to_delete.insert(normalized_fpath); } } } if (largest_file_number > next_file_number) { versions_->next_file_number_.store(largest_file_number + 1); } VersionEdit edit; edit.SetNextFile(versions_->next_file_number_.load()); assert(versions_->GetColumnFamilySet()); ColumnFamilyData* default_cfd = versions_->GetColumnFamilySet()->GetDefault(); assert(default_cfd); // Even if new_descriptor_log is false, we will still switch to a new // MANIFEST and update CURRENT file, since this is in recovery. Status s = versions_->LogAndApply( default_cfd, *default_cfd->GetLatestMutableCFOptions(), &edit, &mutex_, directories_.GetDbDir(), /*new_descriptor_log*/ false); if (!s.ok()) { return s; } mutex_.Unlock(); for (const auto& fname : files_to_delete) { s = env_->DeleteFile(fname); if (!s.ok()) { break; } } mutex_.Lock(); return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl_open.cc000066400000000000000000002002371370372246700202020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_impl/db_impl.h" #include #include "db/builder.h" #include "db/error_handler.h" #include "env/composite_env_wrapper.h" #include "file/read_write_util.h" #include "file/sst_file_manager_impl.h" #include "file/writable_file_writer.h" #include "monitoring/persistent_stats_history.h" #include "options/options_helper.h" #include "rocksdb/wal_filter.h" #include "table/block_based/block_based_table_factory.h" #include "test_util/sync_point.h" #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { Options SanitizeOptions(const std::string& dbname, const Options& src) { auto db_options = SanitizeOptions(dbname, DBOptions(src)); ImmutableDBOptions immutable_db_options(db_options); auto cf_options = SanitizeOptions(immutable_db_options, ColumnFamilyOptions(src)); return Options(db_options, cf_options); } DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { DBOptions result(src); if (result.env == nullptr) { result.env = Env::Default(); } // result.max_open_files means an "infinite" open files. if (result.max_open_files != -1) { int max_max_open_files = port::GetMaxOpenFiles(); if (max_max_open_files == -1) { max_max_open_files = 0x400000; } ClipToRange(&result.max_open_files, 20, max_max_open_files); TEST_SYNC_POINT_CALLBACK("SanitizeOptions::AfterChangeMaxOpenFiles", &result.max_open_files); } if (result.info_log == nullptr) { Status s = CreateLoggerFromOptions(dbname, result, &result.info_log); if (!s.ok()) { // No place suitable for logging result.info_log = nullptr; } } if (!result.write_buffer_manager) { result.write_buffer_manager.reset( new WriteBufferManager(result.db_write_buffer_size)); } auto bg_job_limits = DBImpl::GetBGJobLimits( result.max_background_flushes, result.max_background_compactions, result.max_background_jobs, true /* parallelize_compactions */); result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_compactions, Env::Priority::LOW); result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_flushes, Env::Priority::HIGH); if (result.rate_limiter.get() != nullptr) { if (result.bytes_per_sync == 0) { result.bytes_per_sync = 1024 * 1024; } } if (result.delayed_write_rate == 0) { if (result.rate_limiter.get() != nullptr) { result.delayed_write_rate = result.rate_limiter->GetBytesPerSecond(); } if (result.delayed_write_rate == 0) { result.delayed_write_rate = 16 * 1024 * 1024; } } if (result.WAL_ttl_seconds > 0 || result.WAL_size_limit_MB > 0) { result.recycle_log_file_num = false; } if (result.recycle_log_file_num && (result.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery || result.wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency)) { // kPointInTimeRecovery is inconsistent with recycle log file feature since // we define the "end" of the log as the first corrupt record we encounter. // kAbsoluteConsistency doesn't make sense because even a clean // shutdown leaves old junk at the end of the log file. result.recycle_log_file_num = 0; } if (result.wal_dir.empty()) { // Use dbname as default result.wal_dir = dbname; } if (result.wal_dir.back() == '/') { result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1); } if (result.db_paths.size() == 0) { result.db_paths.emplace_back(dbname, std::numeric_limits::max()); } if (result.use_direct_reads && result.compaction_readahead_size == 0) { TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr); result.compaction_readahead_size = 1024 * 1024 * 2; } if (result.compaction_readahead_size > 0 || result.use_direct_reads) { result.new_table_reader_for_compaction_inputs = true; } // Force flush on DB open if 2PC is enabled, since with 2PC we have no // guarantee that consecutive log files have consecutive sequence id, which // make recovery complicated. if (result.allow_2pc) { result.avoid_flush_during_recovery = false; } #ifndef ROCKSDB_LITE ImmutableDBOptions immutable_db_options(result); if (!IsWalDirSameAsDBPath(&immutable_db_options)) { // Either the WAL dir and db_paths[0]/db_name are not the same, or we // cannot tell for sure. In either case, assume they're different and // explicitly cleanup the trash log files (bypass DeleteScheduler) // Do this first so even if we end up calling // DeleteScheduler::CleanupDirectory on the same dir later, it will be // safe std::vector filenames; result.env->GetChildren(result.wal_dir, &filenames); for (std::string& filename : filenames) { if (filename.find(".log.trash", filename.length() - std::string(".log.trash").length()) != std::string::npos) { std::string trash_file = result.wal_dir + "/" + filename; result.env->DeleteFile(trash_file); } } } // When the DB is stopped, it's possible that there are some .trash files that // were not deleted yet, when we open the DB we will find these .trash files // and schedule them to be deleted (or delete immediately if SstFileManager // was not used) auto sfm = static_cast(result.sst_file_manager.get()); for (size_t i = 0; i < result.db_paths.size(); i++) { DeleteScheduler::CleanupDirectory(result.env, sfm, result.db_paths[i].path); } // Create a default SstFileManager for purposes of tracking compaction size // and facilitating recovery from out of space errors. if (result.sst_file_manager.get() == nullptr) { std::shared_ptr sst_file_manager( NewSstFileManager(result.env, result.info_log)); result.sst_file_manager = sst_file_manager; } #endif if (!result.paranoid_checks) { result.skip_checking_sst_file_sizes_on_db_open = true; ROCKS_LOG_INFO(result.info_log, "file size check will be skipped during open."); } return result; } namespace { Status SanitizeOptionsByTable( const DBOptions& db_opts, const std::vector& column_families) { Status s; for (auto cf : column_families) { s = cf.options.table_factory->SanitizeOptions(db_opts, cf.options); if (!s.ok()) { return s; } } return Status::OK(); } } // namespace Status DBImpl::ValidateOptions( const DBOptions& db_options, const std::vector& column_families) { Status s; for (auto& cfd : column_families) { s = ColumnFamilyData::ValidateOptions(db_options, cfd.options); if (!s.ok()) { return s; } } s = ValidateOptions(db_options); return s; } Status DBImpl::ValidateOptions(const DBOptions& db_options) { if (db_options.db_paths.size() > 4) { return Status::NotSupported( "More than four DB paths are not supported yet. "); } if (db_options.allow_mmap_reads && db_options.use_direct_reads) { // Protect against assert in PosixMMapReadableFile constructor return Status::NotSupported( "If memory mapped reads (allow_mmap_reads) are enabled " "then direct I/O reads (use_direct_reads) must be disabled. "); } if (db_options.allow_mmap_writes && db_options.use_direct_io_for_flush_and_compaction) { return Status::NotSupported( "If memory mapped writes (allow_mmap_writes) are enabled " "then direct I/O writes (use_direct_io_for_flush_and_compaction) must " "be disabled. "); } if (db_options.keep_log_file_num == 0) { return Status::InvalidArgument("keep_log_file_num must be greater than 0"); } if (db_options.unordered_write && !db_options.allow_concurrent_memtable_write) { return Status::InvalidArgument( "unordered_write is incompatible with !allow_concurrent_memtable_write"); } if (db_options.unordered_write && db_options.enable_pipelined_write) { return Status::InvalidArgument( "unordered_write is incompatible with enable_pipelined_write"); } if (db_options.atomic_flush && db_options.enable_pipelined_write) { return Status::InvalidArgument( "atomic_flush is incompatible with enable_pipelined_write"); } // TODO remove this restriction if (db_options.atomic_flush && db_options.best_efforts_recovery) { return Status::InvalidArgument( "atomic_flush is currently incompatible with best-efforts recovery"); } return Status::OK(); } Status DBImpl::NewDB() { VersionEdit new_db; Status s = SetIdentityFile(env_, dbname_); if (!s.ok()) { return s; } if (immutable_db_options_.write_dbid_to_manifest) { std::string temp_db_id; GetDbIdentityFromIdentityFile(&temp_db_id); new_db.SetDBId(temp_db_id); } new_db.SetLogNumber(0); new_db.SetNextFile(2); new_db.SetLastSequence(0); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Creating manifest 1 \n"); const std::string manifest = DescriptorFileName(dbname_, 1); { std::unique_ptr file; FileOptions file_options = fs_->OptimizeForManifestWrite(file_options_); s = NewWritableFile(fs_.get(), manifest, &file, file_options); if (!s.ok()) { return s; } file->SetPreallocationBlockSize( immutable_db_options_.manifest_preallocation_size); std::unique_ptr file_writer(new WritableFileWriter( std::move(file), manifest, file_options, env_, nullptr /* stats */, immutable_db_options_.listeners)); log::Writer log(std::move(file_writer), 0, false); std::string record; new_db.EncodeTo(&record); s = log.AddRecord(record); if (s.ok()) { s = SyncManifest(env_, &immutable_db_options_, log.file()); } } if (s.ok()) { // Make "CURRENT" file that points to the new manifest file. s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir()); } else { fs_->DeleteFile(manifest, IOOptions(), nullptr); } return s; } IOStatus DBImpl::CreateAndNewDirectory( FileSystem* fs, const std::string& dirname, std::unique_ptr* directory) { // We call CreateDirIfMissing() as the directory may already exist (if we // are reopening a DB), when this happens we don't want creating the // directory to cause an error. However, we need to check if creating the // directory fails or else we may get an obscure message about the lock // file not existing. One real-world example of this occurring is if // env->CreateDirIfMissing() doesn't create intermediate directories, e.g. // when dbname_ is "dir/db" but when "dir" doesn't exist. IOStatus io_s = fs->CreateDirIfMissing(dirname, IOOptions(), nullptr); if (!io_s.ok()) { return io_s; } return fs->NewDirectory(dirname, IOOptions(), directory, nullptr); } IOStatus Directories::SetDirectories(FileSystem* fs, const std::string& dbname, const std::string& wal_dir, const std::vector& data_paths) { IOStatus io_s = DBImpl::CreateAndNewDirectory(fs, dbname, &db_dir_); if (!io_s.ok()) { return io_s; } if (!wal_dir.empty() && dbname != wal_dir) { io_s = DBImpl::CreateAndNewDirectory(fs, wal_dir, &wal_dir_); if (!io_s.ok()) { return io_s; } } data_dirs_.clear(); for (auto& p : data_paths) { const std::string db_path = p.path; if (db_path == dbname) { data_dirs_.emplace_back(nullptr); } else { std::unique_ptr path_directory; io_s = DBImpl::CreateAndNewDirectory(fs, db_path, &path_directory); if (!io_s.ok()) { return io_s; } data_dirs_.emplace_back(path_directory.release()); } } assert(data_dirs_.size() == data_paths.size()); return IOStatus::OK(); } Status DBImpl::Recover( const std::vector& column_families, bool read_only, bool error_if_log_file_exist, bool error_if_data_exists_in_logs, uint64_t* recovered_seq) { mutex_.AssertHeld(); bool is_new_db = false; assert(db_lock_ == nullptr); if (!read_only) { Status s = directories_.SetDirectories(fs_.get(), dbname_, immutable_db_options_.wal_dir, immutable_db_options_.db_paths); if (!s.ok()) { return s; } s = env_->LockFile(LockFileName(dbname_), &db_lock_); if (!s.ok()) { return s; } std::string current_fname = CurrentFileName(dbname_); // Path to any MANIFEST file in the db dir. It does not matter which one. // Since best-efforts recovery ignores CURRENT file, existence of a // MANIFEST indicates the recovery to recover existing db. If no MANIFEST // can be found, a new db will be created. std::string manifest_path; if (!immutable_db_options_.best_efforts_recovery) { s = env_->FileExists(current_fname); } else { s = Status::NotFound(); std::vector files; // No need to check return value env_->GetChildren(dbname_, &files); for (const std::string& file : files) { uint64_t number = 0; FileType type = kLogFile; // initialize if (ParseFileName(file, &number, &type) && type == kDescriptorFile) { // Found MANIFEST (descriptor log), thus best-efforts recovery does // not have to treat the db as empty. s = Status::OK(); manifest_path = dbname_ + "/" + file; break; } } } if (s.IsNotFound()) { if (immutable_db_options_.create_if_missing) { s = NewDB(); is_new_db = true; if (!s.ok()) { return s; } } else { return Status::InvalidArgument( current_fname, "does not exist (create_if_missing is false)"); } } else if (s.ok()) { if (immutable_db_options_.error_if_exists) { return Status::InvalidArgument(dbname_, "exists (error_if_exists is true)"); } } else { // Unexpected error reading file assert(s.IsIOError()); return s; } // Verify compatibility of file_options_ and filesystem { std::unique_ptr idfile; FileOptions customized_fs(file_options_); customized_fs.use_direct_reads |= immutable_db_options_.use_direct_io_for_flush_and_compaction; const std::string& fname = manifest_path.empty() ? current_fname : manifest_path; s = fs_->NewRandomAccessFile(fname, customized_fs, &idfile, nullptr); if (!s.ok()) { std::string error_str = s.ToString(); // Check if unsupported Direct I/O is the root cause customized_fs.use_direct_reads = false; s = fs_->NewRandomAccessFile(fname, customized_fs, &idfile, nullptr); if (s.ok()) { return Status::InvalidArgument( "Direct I/O is not supported by the specified DB."); } else { return Status::InvalidArgument( "Found options incompatible with filesystem", error_str.c_str()); } } } } assert(db_id_.empty()); Status s; bool missing_table_file = false; if (!immutable_db_options_.best_efforts_recovery) { s = versions_->Recover(column_families, read_only, &db_id_); } else { s = versions_->TryRecover(column_families, read_only, &db_id_, &missing_table_file); if (s.ok()) { // TryRecover may delete previous column_family_set_. column_family_memtables_.reset( new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet())); s = FinishBestEffortsRecovery(); } } if (!s.ok()) { return s; } // Happens when immutable_db_options_.write_dbid_to_manifest is set to true // the very first time. if (db_id_.empty()) { // Check for the IDENTITY file and create it if not there. s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr); // Typically Identity file is created in NewDB() and for some reason if // it is no longer available then at this point DB ID is not in Identity // file or Manifest. if (s.IsNotFound()) { s = SetIdentityFile(env_, dbname_); if (!s.ok()) { return s; } } else if (!s.ok()) { assert(s.IsIOError()); return s; } s = GetDbIdentityFromIdentityFile(&db_id_); if (immutable_db_options_.write_dbid_to_manifest && s.ok()) { VersionEdit edit; edit.SetDBId(db_id_); Options options; MutableCFOptions mutable_cf_options(options); versions_->db_id_ = db_id_; s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options, &edit, &mutex_, nullptr, false); } } else { s = SetIdentityFile(env_, dbname_, db_id_); } if (immutable_db_options_.paranoid_checks && s.ok()) { s = CheckConsistency(); } if (s.ok() && !read_only) { std::map> created_dirs; for (auto cfd : *versions_->GetColumnFamilySet()) { s = cfd->AddDirectories(&created_dirs); if (!s.ok()) { return s; } } } // DB mutex is already held if (s.ok() && immutable_db_options_.persist_stats_to_disk) { s = InitPersistStatsColumnFamily(); } if (s.ok()) { // Initial max_total_in_memory_state_ before recovery logs. Log recovery // may check this value to decide whether to flush. max_total_in_memory_state_ = 0; for (auto cfd : *versions_->GetColumnFamilySet()) { auto* mutable_cf_options = cfd->GetLatestMutableCFOptions(); max_total_in_memory_state_ += mutable_cf_options->write_buffer_size * mutable_cf_options->max_write_buffer_number; } SequenceNumber next_sequence(kMaxSequenceNumber); default_cf_handle_ = new ColumnFamilyHandleImpl( versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_); default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats(); // TODO(Zhongyi): handle single_column_family_mode_ when // persistent_stats is enabled single_column_family_mode_ = versions_->GetColumnFamilySet()->NumberOfColumnFamilies() == 1; // Recover from all newer log files than the ones named in the // descriptor (new log files may have been added by the previous // incarnation without registering them in the descriptor). // // Note that prev_log_number() is no longer used, but we pay // attention to it in case we are recovering a database // produced by an older version of rocksdb. std::vector filenames; if (!immutable_db_options_.best_efforts_recovery) { s = env_->GetChildren(immutable_db_options_.wal_dir, &filenames); } if (s.IsNotFound()) { return Status::InvalidArgument("wal_dir not found", immutable_db_options_.wal_dir); } else if (!s.ok()) { return s; } std::vector logs; for (size_t i = 0; i < filenames.size(); i++) { uint64_t number; FileType type; if (ParseFileName(filenames[i], &number, &type) && type == kLogFile) { if (is_new_db) { return Status::Corruption( "While creating a new Db, wal_dir contains " "existing log file: ", filenames[i]); } else { logs.push_back(number); } } } if (logs.size() > 0) { if (error_if_log_file_exist) { return Status::Corruption( "The db was opened in readonly mode with error_if_log_file_exist" "flag but a log file already exists"); } else if (error_if_data_exists_in_logs) { for (auto& log : logs) { std::string fname = LogFileName(immutable_db_options_.wal_dir, log); uint64_t bytes; s = env_->GetFileSize(fname, &bytes); if (s.ok()) { if (bytes > 0) { return Status::Corruption( "error_if_data_exists_in_logs is set but there are data " " in log files."); } } } } } if (!logs.empty()) { // Recover in the order in which the logs were generated std::sort(logs.begin(), logs.end()); bool corrupted_log_found = false; s = RecoverLogFiles(logs, &next_sequence, read_only, &corrupted_log_found); if (corrupted_log_found && recovered_seq != nullptr) { *recovered_seq = next_sequence; } if (!s.ok()) { // Clear memtables if recovery failed for (auto cfd : *versions_->GetColumnFamilySet()) { cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); } } } } if (read_only) { // If we are opening as read-only, we need to update options_file_number_ // to reflect the most recent OPTIONS file. It does not matter for regular // read-write db instance because options_file_number_ will later be // updated to versions_->NewFileNumber() in RenameTempFileToOptionsFile. std::vector file_names; if (s.ok()) { s = env_->GetChildren(GetName(), &file_names); } if (s.ok()) { uint64_t number = 0; uint64_t options_file_number = 0; FileType type; for (const auto& fname : file_names) { if (ParseFileName(fname, &number, &type) && type == kOptionsFile) { options_file_number = std::max(number, options_file_number); } } versions_->options_file_number_ = options_file_number; } } return s; } Status DBImpl::PersistentStatsProcessFormatVersion() { mutex_.AssertHeld(); Status s; // persist version when stats CF doesn't exist bool should_persist_format_version = !persistent_stats_cfd_exists_; mutex_.Unlock(); if (persistent_stats_cfd_exists_) { // Check persistent stats format version compatibility. Drop and recreate // persistent stats CF if format version is incompatible uint64_t format_version_recovered = 0; Status s_format = DecodePersistentStatsVersionNumber( this, StatsVersionKeyType::kFormatVersion, &format_version_recovered); uint64_t compatible_version_recovered = 0; Status s_compatible = DecodePersistentStatsVersionNumber( this, StatsVersionKeyType::kCompatibleVersion, &compatible_version_recovered); // abort reading from existing stats CF if any of following is true: // 1. failed to read format version or compatible version from disk // 2. sst's format version is greater than current format version, meaning // this sst is encoded with a newer RocksDB release, and current compatible // version is below the sst's compatible version if (!s_format.ok() || !s_compatible.ok() || (kStatsCFCurrentFormatVersion < format_version_recovered && kStatsCFCompatibleFormatVersion < compatible_version_recovered)) { if (!s_format.ok() || !s_compatible.ok()) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "Reading persistent stats version key failed. Format key: %s, " "compatible key: %s", s_format.ToString().c_str(), s_compatible.ToString().c_str()); } else { ROCKS_LOG_INFO( immutable_db_options_.info_log, "Disable persistent stats due to corrupted or incompatible format " "version\n"); } DropColumnFamily(persist_stats_cf_handle_); DestroyColumnFamilyHandle(persist_stats_cf_handle_); ColumnFamilyHandle* handle = nullptr; ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle); persist_stats_cf_handle_ = static_cast(handle); // should also persist version here because old stats CF is discarded should_persist_format_version = true; } } if (s.ok() && should_persist_format_version) { // Persistent stats CF being created for the first time, need to write // format version key WriteBatch batch; batch.Put(persist_stats_cf_handle_, kFormatVersionKeyString, ToString(kStatsCFCurrentFormatVersion)); batch.Put(persist_stats_cf_handle_, kCompatibleVersionKeyString, ToString(kStatsCFCompatibleFormatVersion)); WriteOptions wo; wo.low_pri = true; wo.no_slowdown = true; wo.sync = false; s = Write(wo, &batch); } mutex_.Lock(); return s; } Status DBImpl::InitPersistStatsColumnFamily() { mutex_.AssertHeld(); assert(!persist_stats_cf_handle_); ColumnFamilyData* persistent_stats_cfd = versions_->GetColumnFamilySet()->GetColumnFamily( kPersistentStatsColumnFamilyName); persistent_stats_cfd_exists_ = persistent_stats_cfd != nullptr; Status s; if (persistent_stats_cfd != nullptr) { // We are recovering from a DB which already contains persistent stats CF, // the CF is already created in VersionSet::ApplyOneVersionEdit, but // column family handle was not. Need to explicitly create handle here. persist_stats_cf_handle_ = new ColumnFamilyHandleImpl(persistent_stats_cfd, this, &mutex_); } else { mutex_.Unlock(); ColumnFamilyHandle* handle = nullptr; ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle); persist_stats_cf_handle_ = static_cast(handle); mutex_.Lock(); } return s; } // REQUIRES: log_numbers are sorted in ascending order Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, SequenceNumber* next_sequence, bool read_only, bool* corrupted_log_found) { struct LogReporter : public log::Reader::Reporter { Env* env; Logger* info_log; const char* fname; Status* status; // nullptr if immutable_db_options_.paranoid_checks==false void Corruption(size_t bytes, const Status& s) override { ROCKS_LOG_WARN(info_log, "%s%s: dropping %d bytes; %s", (status == nullptr ? "(ignoring error) " : ""), fname, static_cast(bytes), s.ToString().c_str()); if (status != nullptr && status->ok()) { *status = s; } } }; mutex_.AssertHeld(); Status status; std::unordered_map version_edits; // no need to refcount because iteration is under mutex for (auto cfd : *versions_->GetColumnFamilySet()) { VersionEdit edit; edit.SetColumnFamily(cfd->GetID()); version_edits.insert({cfd->GetID(), edit}); } int job_id = next_job_id_.fetch_add(1); { auto stream = event_logger_.Log(); stream << "job" << job_id << "event" << "recovery_started"; stream << "log_files"; stream.StartArray(); for (auto log_number : log_numbers) { stream << log_number; } stream.EndArray(); } #ifndef ROCKSDB_LITE if (immutable_db_options_.wal_filter != nullptr) { std::map cf_name_id_map; std::map cf_lognumber_map; for (auto cfd : *versions_->GetColumnFamilySet()) { cf_name_id_map.insert(std::make_pair(cfd->GetName(), cfd->GetID())); cf_lognumber_map.insert( std::make_pair(cfd->GetID(), cfd->GetLogNumber())); } immutable_db_options_.wal_filter->ColumnFamilyLogNumberMap(cf_lognumber_map, cf_name_id_map); } #endif bool stop_replay_by_wal_filter = false; bool stop_replay_for_corruption = false; bool flushed = false; uint64_t corrupted_log_number = kMaxSequenceNumber; uint64_t min_log_number = MinLogNumberToKeep(); for (auto log_number : log_numbers) { if (log_number < min_log_number) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Skipping log #%" PRIu64 " since it is older than min log to keep #%" PRIu64, log_number, min_log_number); continue; } // The previous incarnation may not have written any MANIFEST // records after allocating this log number. So we manually // update the file number allocation counter in VersionSet. versions_->MarkFileNumberUsed(log_number); // Open the log file std::string fname = LogFileName(immutable_db_options_.wal_dir, log_number); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Recovering log #%" PRIu64 " mode %d", log_number, static_cast(immutable_db_options_.wal_recovery_mode)); auto logFileDropped = [this, &fname]() { uint64_t bytes; if (env_->GetFileSize(fname, &bytes).ok()) { auto info_log = immutable_db_options_.info_log.get(); ROCKS_LOG_WARN(info_log, "%s: dropping %d bytes", fname.c_str(), static_cast(bytes)); } }; if (stop_replay_by_wal_filter) { logFileDropped(); continue; } std::unique_ptr file_reader; { std::unique_ptr file; status = fs_->NewSequentialFile(fname, fs_->OptimizeForLogRead(file_options_), &file, nullptr); if (!status.ok()) { MaybeIgnoreError(&status); if (!status.ok()) { return status; } else { // Fail with one log file, but that's ok. // Try next one. continue; } } file_reader.reset(new SequentialFileReader( std::move(file), fname, immutable_db_options_.log_readahead_size)); } // Create the log reader. LogReporter reporter; reporter.env = env_; reporter.info_log = immutable_db_options_.info_log.get(); reporter.fname = fname.c_str(); if (!immutable_db_options_.paranoid_checks || immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kSkipAnyCorruptedRecords) { reporter.status = nullptr; } else { reporter.status = &status; } // We intentially make log::Reader do checksumming even if // paranoid_checks==false so that corruptions cause entire commits // to be skipped instead of propagating bad information (like overly // large sequence numbers). log::Reader reader(immutable_db_options_.info_log, std::move(file_reader), &reporter, true /*checksum*/, log_number); // Determine if we should tolerate incomplete records at the tail end of the // Read all the records and add to a memtable std::string scratch; Slice record; WriteBatch batch; TEST_SYNC_POINT_CALLBACK("DBImpl::RecoverLogFiles:BeforeReadWal", /*arg=*/nullptr); while (!stop_replay_by_wal_filter && reader.ReadRecord(&record, &scratch, immutable_db_options_.wal_recovery_mode) && status.ok()) { if (record.size() < WriteBatchInternal::kHeader) { reporter.Corruption(record.size(), Status::Corruption("log record too small")); continue; } WriteBatchInternal::SetContents(&batch, record); SequenceNumber sequence = WriteBatchInternal::Sequence(&batch); if (immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) { // In point-in-time recovery mode, if sequence id of log files are // consecutive, we continue recovery despite corruption. This could // happen when we open and write to a corrupted DB, where sequence id // will start from the last sequence id we recovered. if (sequence == *next_sequence) { stop_replay_for_corruption = false; } if (stop_replay_for_corruption) { logFileDropped(); break; } } #ifndef ROCKSDB_LITE if (immutable_db_options_.wal_filter != nullptr) { WriteBatch new_batch; bool batch_changed = false; WalFilter::WalProcessingOption wal_processing_option = immutable_db_options_.wal_filter->LogRecordFound( log_number, fname, batch, &new_batch, &batch_changed); switch (wal_processing_option) { case WalFilter::WalProcessingOption::kContinueProcessing: // do nothing, proceeed normally break; case WalFilter::WalProcessingOption::kIgnoreCurrentRecord: // skip current record continue; case WalFilter::WalProcessingOption::kStopReplay: // skip current record and stop replay stop_replay_by_wal_filter = true; continue; case WalFilter::WalProcessingOption::kCorruptedRecord: { status = Status::Corruption("Corruption reported by Wal Filter ", immutable_db_options_.wal_filter->Name()); MaybeIgnoreError(&status); if (!status.ok()) { reporter.Corruption(record.size(), status); continue; } break; } default: { assert(false); // unhandled case status = Status::NotSupported( "Unknown WalProcessingOption returned" " by Wal Filter ", immutable_db_options_.wal_filter->Name()); MaybeIgnoreError(&status); if (!status.ok()) { return status; } else { // Ignore the error with current record processing. continue; } } } if (batch_changed) { // Make sure that the count in the new batch is // within the orignal count. int new_count = WriteBatchInternal::Count(&new_batch); int original_count = WriteBatchInternal::Count(&batch); if (new_count > original_count) { ROCKS_LOG_FATAL( immutable_db_options_.info_log, "Recovering log #%" PRIu64 " mode %d log filter %s returned " "more records (%d) than original (%d) which is not allowed. " "Aborting recovery.", log_number, static_cast(immutable_db_options_.wal_recovery_mode), immutable_db_options_.wal_filter->Name(), new_count, original_count); status = Status::NotSupported( "More than original # of records " "returned by Wal Filter ", immutable_db_options_.wal_filter->Name()); return status; } // Set the same sequence number in the new_batch // as the original batch. WriteBatchInternal::SetSequence(&new_batch, WriteBatchInternal::Sequence(&batch)); batch = new_batch; } } #endif // ROCKSDB_LITE // If column family was not found, it might mean that the WAL write // batch references to the column family that was dropped after the // insert. We don't want to fail the whole write batch in that case -- // we just ignore the update. // That's why we set ignore missing column families to true bool has_valid_writes = false; status = WriteBatchInternal::InsertInto( &batch, column_family_memtables_.get(), &flush_scheduler_, &trim_history_scheduler_, true, log_number, this, false /* concurrent_memtable_writes */, next_sequence, &has_valid_writes, seq_per_batch_, batch_per_txn_); MaybeIgnoreError(&status); if (!status.ok()) { // We are treating this as a failure while reading since we read valid // blocks that do not form coherent data reporter.Corruption(record.size(), status); continue; } if (has_valid_writes && !read_only) { // we can do this because this is called before client has access to the // DB and there is only a single thread operating on DB ColumnFamilyData* cfd; while ((cfd = flush_scheduler_.TakeNextColumnFamily()) != nullptr) { cfd->UnrefAndTryDelete(); // If this asserts, it means that InsertInto failed in // filtering updates to already-flushed column families assert(cfd->GetLogNumber() <= log_number); auto iter = version_edits.find(cfd->GetID()); assert(iter != version_edits.end()); VersionEdit* edit = &iter->second; status = WriteLevel0TableForRecovery(job_id, cfd, cfd->mem(), edit); if (!status.ok()) { // Reflect errors immediately so that conditions like full // file-systems cause the DB::Open() to fail. return status; } flushed = true; cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(), *next_sequence); } } } if (!status.ok()) { if (status.IsNotSupported()) { // We should not treat NotSupported as corruption. It is rather a clear // sign that we are processing a WAL that is produced by an incompatible // version of the code. return status; } if (immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kSkipAnyCorruptedRecords) { // We should ignore all errors unconditionally status = Status::OK(); } else if (immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) { if (status.IsIOError()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "IOError during point-in-time reading log #%" PRIu64 " seq #%" PRIu64 ". %s. This likely mean loss of synced WAL, " "thus recovery fails.", log_number, *next_sequence, status.ToString().c_str()); return status; } // We should ignore the error but not continue replaying status = Status::OK(); stop_replay_for_corruption = true; corrupted_log_number = log_number; if (corrupted_log_found != nullptr) { *corrupted_log_found = true; } ROCKS_LOG_INFO(immutable_db_options_.info_log, "Point in time recovered to log #%" PRIu64 " seq #%" PRIu64, log_number, *next_sequence); } else { assert(immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kTolerateCorruptedTailRecords || immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency); return status; } } flush_scheduler_.Clear(); trim_history_scheduler_.Clear(); auto last_sequence = *next_sequence - 1; if ((*next_sequence != kMaxSequenceNumber) && (versions_->LastSequence() <= last_sequence)) { versions_->SetLastAllocatedSequence(last_sequence); versions_->SetLastPublishedSequence(last_sequence); versions_->SetLastSequence(last_sequence); } } // Compare the corrupted log number to all columnfamily's current log number. // Abort Open() if any column family's log number is greater than // the corrupted log number, which means CF contains data beyond the point of // corruption. This could during PIT recovery when the WAL is corrupted and // some (but not all) CFs are flushed // Exclude the PIT case where no log is dropped after the corruption point. // This is to cover the case for empty logs after corrupted log, in which we // don't reset stop_replay_for_corruption. if (stop_replay_for_corruption == true && (immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery || immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kTolerateCorruptedTailRecords)) { for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->GetLogNumber() > corrupted_log_number) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Column family inconsistency: SST file contains data" " beyond the point of corruption."); return Status::Corruption("SST file is ahead of WALs"); } } } // True if there's any data in the WALs; if not, we can skip re-processing // them later bool data_seen = false; if (!read_only) { // no need to refcount since client still doesn't have access // to the DB and can not drop column families while we iterate auto max_log_number = log_numbers.back(); for (auto cfd : *versions_->GetColumnFamilySet()) { auto iter = version_edits.find(cfd->GetID()); assert(iter != version_edits.end()); VersionEdit* edit = &iter->second; if (cfd->GetLogNumber() > max_log_number) { // Column family cfd has already flushed the data // from all logs. Memtable has to be empty because // we filter the updates based on log_number // (in WriteBatch::InsertInto) assert(cfd->mem()->GetFirstSequenceNumber() == 0); assert(edit->NumEntries() == 0); continue; } TEST_SYNC_POINT_CALLBACK( "DBImpl::RecoverLogFiles:BeforeFlushFinalMemtable", /*arg=*/nullptr); // flush the final memtable (if non-empty) if (cfd->mem()->GetFirstSequenceNumber() != 0) { // If flush happened in the middle of recovery (e.g. due to memtable // being full), we flush at the end. Otherwise we'll need to record // where we were on last flush, which make the logic complicated. if (flushed || !immutable_db_options_.avoid_flush_during_recovery) { status = WriteLevel0TableForRecovery(job_id, cfd, cfd->mem(), edit); if (!status.ok()) { // Recovery failed break; } flushed = true; cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(), versions_->LastSequence()); } data_seen = true; } // Update the log number info in the version edit corresponding to this // column family. Note that the version edits will be written to MANIFEST // together later. // writing log_number in the manifest means that any log file // with number strongly less than (log_number + 1) is already // recovered and should be ignored on next reincarnation. // Since we already recovered max_log_number, we want all logs // with numbers `<= max_log_number` (includes this one) to be ignored if (flushed || cfd->mem()->GetFirstSequenceNumber() == 0) { edit->SetLogNumber(max_log_number + 1); } } if (status.ok()) { // we must mark the next log number as used, even though it's // not actually used. that is because VersionSet assumes // VersionSet::next_file_number_ always to be strictly greater than any // log number versions_->MarkFileNumberUsed(max_log_number + 1); autovector cfds; autovector cf_opts; autovector> edit_lists; for (auto* cfd : *versions_->GetColumnFamilySet()) { cfds.push_back(cfd); cf_opts.push_back(cfd->GetLatestMutableCFOptions()); auto iter = version_edits.find(cfd->GetID()); assert(iter != version_edits.end()); edit_lists.push_back({&iter->second}); } // write MANIFEST with update status = versions_->LogAndApply(cfds, cf_opts, edit_lists, &mutex_, directories_.GetDbDir(), /*new_descriptor_log=*/true); } } if (status.ok() && data_seen && !flushed) { status = RestoreAliveLogFiles(log_numbers); } event_logger_.Log() << "job" << job_id << "event" << "recovery_finished"; return status; } Status DBImpl::RestoreAliveLogFiles(const std::vector& log_numbers) { if (log_numbers.empty()) { return Status::OK(); } Status s; mutex_.AssertHeld(); assert(immutable_db_options_.avoid_flush_during_recovery); if (two_write_queues_) { log_write_mutex_.Lock(); } // Mark these as alive so they'll be considered for deletion later by // FindObsoleteFiles() total_log_size_ = 0; log_empty_ = false; for (auto log_number : log_numbers) { LogFileNumberSize log(log_number); std::string fname = LogFileName(immutable_db_options_.wal_dir, log_number); // This gets the appear size of the logs, not including preallocated space. s = env_->GetFileSize(fname, &log.size); if (!s.ok()) { break; } total_log_size_ += log.size; alive_log_files_.push_back(log); // We preallocate space for logs, but then after a crash and restart, those // preallocated space are not needed anymore. It is likely only the last // log has such preallocated space, so we only truncate for the last log. if (log_number == log_numbers.back()) { std::unique_ptr last_log; Status truncate_status = fs_->ReopenWritableFile( fname, fs_->OptimizeForLogWrite( file_options_, BuildDBOptions(immutable_db_options_, mutable_db_options_)), &last_log, nullptr); if (truncate_status.ok()) { truncate_status = last_log->Truncate(log.size, IOOptions(), nullptr); } if (truncate_status.ok()) { truncate_status = last_log->Close(IOOptions(), nullptr); } // Not a critical error if fail to truncate. if (!truncate_status.ok()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "Failed to truncate log #%" PRIu64 ": %s", log_number, truncate_status.ToString().c_str()); } } } if (two_write_queues_) { log_write_mutex_.Unlock(); } return s; } Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, MemTable* mem, VersionEdit* edit) { mutex_.AssertHeld(); const uint64_t start_micros = env_->NowMicros(); FileMetaData meta; std::unique_ptr::iterator> pending_outputs_inserted_elem( new std::list::iterator( CaptureCurrentFileNumberInPendingOutputs())); meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0); ReadOptions ro; ro.total_order_seek = true; Arena arena; Status s; TableProperties table_properties; { ScopedArenaIterator iter(mem->NewIterator(ro, &arena)); ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] [WriteLevel0TableForRecovery]" " Level-0 table #%" PRIu64 ": started", cfd->GetName().c_str(), meta.fd.GetNumber()); // Get the latest mutable cf options while the mutex is still locked const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions(); bool paranoid_file_checks = cfd->GetLatestMutableCFOptions()->paranoid_file_checks; int64_t _current_time = 0; env_->GetCurrentTime(&_current_time); // ignore error const uint64_t current_time = static_cast(_current_time); meta.oldest_ancester_time = current_time; { auto write_hint = cfd->CalculateSSTWriteHint(0); mutex_.Unlock(); SequenceNumber earliest_write_conflict_snapshot; std::vector snapshot_seqs = snapshots_.GetAll(&earliest_write_conflict_snapshot); auto snapshot_checker = snapshot_checker_.get(); if (use_custom_gc_ && snapshot_checker == nullptr) { snapshot_checker = DisableGCSnapshotChecker::Instance(); } std::vector> range_del_iters; auto range_del_iter = mem->NewRangeTombstoneIterator(ro, kMaxSequenceNumber); if (range_del_iter != nullptr) { range_del_iters.emplace_back(range_del_iter); } IOStatus io_s; s = BuildTable( dbname_, env_, fs_.get(), *cfd->ioptions(), mutable_cf_options, file_options_for_compaction_, cfd->table_cache(), iter.get(), std::move(range_del_iters), &meta, cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(), snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), mutable_cf_options.sample_for_compression, mutable_cf_options.compression_opts, paranoid_file_checks, cfd->internal_stats(), TableFileCreationReason::kRecovery, &io_s, &event_logger_, job_id, Env::IO_HIGH, nullptr /* table_properties */, -1 /* level */, current_time, write_hint); LogFlush(immutable_db_options_.info_log); ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] [WriteLevel0TableForRecovery]" " Level-0 table #%" PRIu64 ": %" PRIu64 " bytes %s", cfd->GetName().c_str(), meta.fd.GetNumber(), meta.fd.GetFileSize(), s.ToString().c_str()); mutex_.Lock(); } } ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); // Note that if file_size is zero, the file has been deleted and // should not be added to the manifest. int level = 0; if (s.ok() && meta.fd.GetFileSize() > 0) { edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(), meta.smallest, meta.largest, meta.fd.smallest_seqno, meta.fd.largest_seqno, meta.marked_for_compaction, meta.oldest_blob_file_number, meta.oldest_ancester_time, meta.file_creation_time, meta.file_checksum, meta.file_checksum_func_name); } InternalStats::CompactionStats stats(CompactionReason::kFlush, 1); stats.micros = env_->NowMicros() - start_micros; stats.bytes_written = meta.fd.GetFileSize(); stats.num_output_files = 1; cfd->internal_stats()->AddCompactionStats(level, Env::Priority::USER, stats); cfd->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED, meta.fd.GetFileSize()); RecordTick(stats_, COMPACT_WRITE_BYTES, meta.fd.GetFileSize()); return s; } Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) { DBOptions db_options(options); ColumnFamilyOptions cf_options(options); std::vector column_families; column_families.push_back( ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); if (db_options.persist_stats_to_disk) { column_families.push_back( ColumnFamilyDescriptor(kPersistentStatsColumnFamilyName, cf_options)); } std::vector handles; Status s = DB::Open(db_options, dbname, column_families, &handles, dbptr); if (s.ok()) { if (db_options.persist_stats_to_disk) { assert(handles.size() == 2); } else { assert(handles.size() == 1); } // i can delete the handle since DBImpl is always holding a reference to // default column family if (db_options.persist_stats_to_disk && handles[1] != nullptr) { delete handles[1]; } delete handles[0]; } return s; } Status DB::Open(const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr) { const bool kSeqPerBatch = true; const bool kBatchPerTxn = true; return DBImpl::Open(db_options, dbname, column_families, handles, dbptr, !kSeqPerBatch, kBatchPerTxn); } Status DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number, size_t preallocate_block_size, log::Writer** new_log) { Status s; std::unique_ptr lfile; DBOptions db_options = BuildDBOptions(immutable_db_options_, mutable_db_options_); FileOptions opt_file_options = fs_->OptimizeForLogWrite(file_options_, db_options); std::string log_fname = LogFileName(immutable_db_options_.wal_dir, log_file_num); if (recycle_log_number) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "reusing log %" PRIu64 " from recycle list\n", recycle_log_number); std::string old_log_fname = LogFileName(immutable_db_options_.wal_dir, recycle_log_number); TEST_SYNC_POINT("DBImpl::CreateWAL:BeforeReuseWritableFile1"); TEST_SYNC_POINT("DBImpl::CreateWAL:BeforeReuseWritableFile2"); s = fs_->ReuseWritableFile(log_fname, old_log_fname, opt_file_options, &lfile, /*dbg=*/nullptr); } else { s = NewWritableFile(fs_.get(), log_fname, &lfile, opt_file_options); } if (s.ok()) { lfile->SetWriteLifeTimeHint(CalculateWALWriteHint()); lfile->SetPreallocationBlockSize(preallocate_block_size); const auto& listeners = immutable_db_options_.listeners; std::unique_ptr file_writer( new WritableFileWriter(std::move(lfile), log_fname, opt_file_options, env_, nullptr /* stats */, listeners)); *new_log = new log::Writer(std::move(file_writer), log_file_num, immutable_db_options_.recycle_log_file_num > 0, immutable_db_options_.manual_wal_flush); } return s; } Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr, const bool seq_per_batch, const bool batch_per_txn) { Status s = SanitizeOptionsByTable(db_options, column_families); if (!s.ok()) { return s; } s = ValidateOptions(db_options, column_families); if (!s.ok()) { return s; } *dbptr = nullptr; handles->clear(); size_t max_write_buffer_size = 0; for (auto cf : column_families) { max_write_buffer_size = std::max(max_write_buffer_size, cf.options.write_buffer_size); } DBImpl* impl = new DBImpl(db_options, dbname, seq_per_batch, batch_per_txn); s = impl->env_->CreateDirIfMissing(impl->immutable_db_options_.wal_dir); if (s.ok()) { std::vector paths; for (auto& db_path : impl->immutable_db_options_.db_paths) { paths.emplace_back(db_path.path); } for (auto& cf : column_families) { for (auto& cf_path : cf.options.cf_paths) { paths.emplace_back(cf_path.path); } } for (auto& path : paths) { s = impl->env_->CreateDirIfMissing(path); if (!s.ok()) { break; } } // For recovery from NoSpace() error, we can only handle // the case where the database is stored in a single path if (paths.size() <= 1) { impl->error_handler_.EnableAutoRecovery(); } } if (s.ok()) { s = impl->CreateArchivalDirectory(); } if (!s.ok()) { delete impl; return s; } impl->wal_in_db_path_ = IsWalDirSameAsDBPath(&impl->immutable_db_options_); impl->mutex_.Lock(); // Handles create_if_missing, error_if_exists uint64_t recovered_seq(kMaxSequenceNumber); s = impl->Recover(column_families, false, false, false, &recovered_seq); if (s.ok()) { uint64_t new_log_number = impl->versions_->NewFileNumber(); log::Writer* new_log = nullptr; const size_t preallocate_block_size = impl->GetWalPreallocateBlockSize(max_write_buffer_size); s = impl->CreateWAL(new_log_number, 0 /*recycle_log_number*/, preallocate_block_size, &new_log); if (s.ok()) { InstrumentedMutexLock wl(&impl->log_write_mutex_); impl->logfile_number_ = new_log_number; assert(new_log != nullptr); impl->logs_.emplace_back(new_log_number, new_log); } if (s.ok()) { // set column family handles for (auto cf : column_families) { auto cfd = impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name); if (cfd != nullptr) { handles->push_back( new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_)); impl->NewThreadStatusCfInfo(cfd); } else { if (db_options.create_missing_column_families) { // missing column family, create it ColumnFamilyHandle* handle; impl->mutex_.Unlock(); s = impl->CreateColumnFamily(cf.options, cf.name, &handle); impl->mutex_.Lock(); if (s.ok()) { handles->push_back(handle); } else { break; } } else { s = Status::InvalidArgument("Column family not found: ", cf.name); break; } } } } if (s.ok()) { SuperVersionContext sv_context(/* create_superversion */ true); for (auto cfd : *impl->versions_->GetColumnFamilySet()) { impl->InstallSuperVersionAndScheduleWork( cfd, &sv_context, *cfd->GetLatestMutableCFOptions()); } sv_context.Clean(); if (impl->two_write_queues_) { impl->log_write_mutex_.Lock(); } impl->alive_log_files_.push_back( DBImpl::LogFileNumberSize(impl->logfile_number_)); if (impl->two_write_queues_) { impl->log_write_mutex_.Unlock(); } impl->DeleteObsoleteFiles(); s = impl->directories_.GetDbDir()->Fsync(IOOptions(), nullptr); } if (s.ok()) { // In WritePrepared there could be gap in sequence numbers. This breaks // the trick we use in kPointInTimeRecovery which assumes the first seq in // the log right after the corrupted log is one larger than the last seq // we read from the logs. To let this trick keep working, we add a dummy // entry with the expected sequence to the first log right after recovery. // In non-WritePrepared case also the new log after recovery could be // empty, and thus missing the consecutive seq hint to distinguish // middle-log corruption to corrupted-log-remained-after-recovery. This // case also will be addressed by a dummy write. if (recovered_seq != kMaxSequenceNumber) { WriteBatch empty_batch; WriteBatchInternal::SetSequence(&empty_batch, recovered_seq); WriteOptions write_options; uint64_t log_used, log_size; log::Writer* log_writer = impl->logs_.back().writer; s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size); if (s.ok()) { // Need to fsync, otherwise it might get lost after a power reset. s = impl->FlushWAL(false); if (s.ok()) { s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync); } } } } } if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) { // try to read format version but no need to fail Open() even if it fails s = impl->PersistentStatsProcessFormatVersion(); } if (s.ok()) { for (auto cfd : *impl->versions_->GetColumnFamilySet()) { if (cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { auto* vstorage = cfd->current()->storage_info(); for (int i = 1; i < vstorage->num_levels(); ++i) { int num_files = vstorage->NumLevelFiles(i); if (num_files > 0) { s = Status::InvalidArgument( "Not all files are at level 0. Cannot " "open with FIFO compaction style."); break; } } } if (!cfd->mem()->IsSnapshotSupported()) { impl->is_snapshot_supported_ = false; } if (cfd->ioptions()->merge_operator != nullptr && !cfd->mem()->IsMergeOperatorSupported()) { s = Status::InvalidArgument( "The memtable of column family %s does not support merge operator " "its options.merge_operator is non-null", cfd->GetName().c_str()); } if (!s.ok()) { break; } } } TEST_SYNC_POINT("DBImpl::Open:Opened"); Status persist_options_status; if (s.ok()) { // Persist RocksDB Options before scheduling the compaction. // The WriteOptionsFile() will release and lock the mutex internally. persist_options_status = impl->WriteOptionsFile( false /*need_mutex_lock*/, false /*need_enter_write_thread*/); *dbptr = impl; impl->opened_successfully_ = true; impl->MaybeScheduleFlushOrCompaction(); } impl->mutex_.Unlock(); #ifndef ROCKSDB_LITE auto sfm = static_cast( impl->immutable_db_options_.sst_file_manager.get()); if (s.ok() && sfm) { // Set Statistics ptr for SstFileManager to dump the stats of // DeleteScheduler. sfm->SetStatisticsPtr(impl->immutable_db_options_.statistics); ROCKS_LOG_INFO(impl->immutable_db_options_.info_log, "SstFileManager instance %p", sfm); // Notify SstFileManager about all sst files that already exist in // db_paths[0] and cf_paths[0] when the DB is opened. // SstFileManagerImpl needs to know sizes of the files. For files whose size // we already know (sst files that appear in manifest - typically that's the // vast majority of all files), we'll pass the size to SstFileManager. // For all other files SstFileManager will query the size from filesystem. std::vector metadata; impl->mutex_.Lock(); impl->versions_->GetLiveFilesMetaData(&metadata); impl->mutex_.Unlock(); std::unordered_map known_file_sizes; for (const auto& md : metadata) { std::string name = md.name; if (!name.empty() && name[0] == '/') { name = name.substr(1); } known_file_sizes[name] = md.size; } std::vector paths; paths.emplace_back(impl->immutable_db_options_.db_paths[0].path); for (auto& cf : column_families) { if (!cf.options.cf_paths.empty()) { paths.emplace_back(cf.options.cf_paths[0].path); } } // Remove duplicate paths. std::sort(paths.begin(), paths.end()); paths.erase(std::unique(paths.begin(), paths.end()), paths.end()); for (auto& path : paths) { std::vector existing_files; impl->immutable_db_options_.env->GetChildren(path, &existing_files); for (auto& file_name : existing_files) { uint64_t file_number; FileType file_type; std::string file_path = path + "/" + file_name; if (ParseFileName(file_name, &file_number, &file_type) && file_type == kTableFile) { if (known_file_sizes.count(file_name)) { // We're assuming that each sst file name exists in at most one of // the paths. sfm->OnAddFile(file_path, known_file_sizes.at(file_name), /* compaction */ false); } else { sfm->OnAddFile(file_path); } } } } // Reserve some disk buffer space. This is a heuristic - when we run out // of disk space, this ensures that there is atleast write_buffer_size // amount of free space before we resume DB writes. In low disk space // conditions, we want to avoid a lot of small L0 files due to frequent // WAL write failures and resultant forced flushes sfm->ReserveDiskBuffer(max_write_buffer_size, impl->immutable_db_options_.db_paths[0].path); } #endif // !ROCKSDB_LITE if (s.ok()) { ROCKS_LOG_HEADER(impl->immutable_db_options_.info_log, "DB pointer %p", impl); LogFlush(impl->immutable_db_options_.info_log); assert(impl->TEST_WALBufferIsEmpty()); // If the assert above fails then we need to FlushWAL before returning // control back to the user. if (!persist_options_status.ok()) { s = Status::IOError( "DB::Open() failed --- Unable to persist Options file", persist_options_status.ToString()); } } if (s.ok()) { impl->StartTimedTasks(); } if (!s.ok()) { for (auto* h : *handles) { delete h; } handles->clear(); delete impl; *dbptr = nullptr; } return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl_readonly.cc000066400000000000000000000230361370372246700210560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/db_impl/db_impl_readonly.h" #include "db/arena_wrapped_db_iter.h" #include "db/compacted_db_impl.h" #include "db/db_impl/db_impl.h" #include "db/db_iter.h" #include "db/merge_context.h" #include "monitoring/perf_context_imp.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE DBImplReadOnly::DBImplReadOnly(const DBOptions& db_options, const std::string& dbname) : DBImpl(db_options, dbname) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Opening the db in read only mode"); LogFlush(immutable_db_options_.info_log); } DBImplReadOnly::~DBImplReadOnly() {} // Implementations of the DB interface Status DBImplReadOnly::Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val) { assert(pinnable_val != nullptr); // TODO: stopwatch DB_GET needed?, perf timer needed? PERF_TIMER_GUARD(get_snapshot_time); Status s; SequenceNumber snapshot = versions_->LastSequence(); auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { tracer_->Get(column_family, key); } } SuperVersion* super_version = cfd->GetSuperVersion(); MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; LookupKey lkey(key, snapshot); PERF_TIMER_STOP(get_snapshot_time); if (super_version->mem->Get(lkey, pinnable_val->GetSelf(), /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, read_options)) { pinnable_val->PinSelf(); RecordTick(stats_, MEMTABLE_HIT); } else { PERF_TIMER_GUARD(get_from_output_files_time); super_version->current->Get(read_options, lkey, pinnable_val, /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq); RecordTick(stats_, MEMTABLE_MISS); } RecordTick(stats_, NUMBER_KEYS_READ); size_t size = pinnable_val->size(); RecordTick(stats_, BYTES_READ, size); RecordInHistogram(stats_, BYTES_PER_READ, size); PERF_COUNTER_ADD(get_read_bytes, size); return s; } Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options, ColumnFamilyHandle* column_family) { auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); SuperVersion* super_version = cfd->GetSuperVersion()->Ref(); SequenceNumber latest_snapshot = versions_->LastSequence(); SequenceNumber read_seq = read_options.snapshot != nullptr ? reinterpret_cast(read_options.snapshot) ->number_ : latest_snapshot; ReadCallback* read_callback = nullptr; // No read callback provided. auto db_iter = NewArenaWrappedDbIterator( env_, read_options, *cfd->ioptions(), super_version->mutable_cf_options, read_seq, super_version->mutable_cf_options.max_sequential_skip_in_iterations, super_version->version_number, read_callback); auto internal_iter = NewInternalIterator(read_options, cfd, super_version, db_iter->GetArena(), db_iter->GetRangeDelAggregator(), read_seq, /* allow_unprepared_value */ true); db_iter->SetIterUnderDBIter(internal_iter); return db_iter; } Status DBImplReadOnly::NewIterators( const ReadOptions& read_options, const std::vector& column_families, std::vector* iterators) { ReadCallback* read_callback = nullptr; // No read callback provided. if (iterators == nullptr) { return Status::InvalidArgument("iterators not allowed to be nullptr"); } iterators->clear(); iterators->reserve(column_families.size()); SequenceNumber latest_snapshot = versions_->LastSequence(); SequenceNumber read_seq = read_options.snapshot != nullptr ? reinterpret_cast(read_options.snapshot) ->number_ : latest_snapshot; for (auto cfh : column_families) { auto* cfd = reinterpret_cast(cfh)->cfd(); auto* sv = cfd->GetSuperVersion()->Ref(); auto* db_iter = NewArenaWrappedDbIterator( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, read_seq, sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->version_number, read_callback); auto* internal_iter = NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(), db_iter->GetRangeDelAggregator(), read_seq, /* allow_unprepared_value */ true); db_iter->SetIterUnderDBIter(internal_iter); iterators->push_back(db_iter); } return Status::OK(); } namespace { // Return OK if dbname exists in the file system // or create_if_missing is false Status OpenForReadOnlyCheckExistence(const DBOptions& db_options, const std::string& dbname) { Status s; if (!db_options.create_if_missing) { // Attempt to read "CURRENT" file const std::shared_ptr& fs = db_options.env->GetFileSystem(); std::string manifest_path; uint64_t manifest_file_number; s = VersionSet::GetCurrentManifestPath(dbname, fs.get(), &manifest_path, &manifest_file_number); if (!s.ok()) { return Status::NotFound(CurrentFileName(dbname), "does not exist"); } } return s; } } // namespace Status DB::OpenForReadOnly(const Options& options, const std::string& dbname, DB** dbptr, bool /*error_if_log_file_exist*/) { // If dbname does not exist in the file system, should not do anything Status s = OpenForReadOnlyCheckExistence(options, dbname); if (!s.ok()) { return s; } *dbptr = nullptr; // Try to first open DB as fully compacted DB s = CompactedDBImpl::Open(options, dbname, dbptr); if (s.ok()) { return s; } DBOptions db_options(options); ColumnFamilyOptions cf_options(options); std::vector column_families; column_families.push_back( ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); std::vector handles; s = DBImplReadOnly::OpenForReadOnlyWithoutCheck( db_options, dbname, column_families, &handles, dbptr); if (s.ok()) { assert(handles.size() == 1); // i can delete the handle since DBImpl is always holding a // reference to default column family delete handles[0]; } return s; } Status DB::OpenForReadOnly( const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr, bool error_if_log_file_exist) { // If dbname does not exist in the file system, should not do anything Status s = OpenForReadOnlyCheckExistence(db_options, dbname); if (!s.ok()) { return s; } return DBImplReadOnly::OpenForReadOnlyWithoutCheck( db_options, dbname, column_families, handles, dbptr, error_if_log_file_exist); } Status DBImplReadOnly::OpenForReadOnlyWithoutCheck( const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr, bool error_if_log_file_exist) { *dbptr = nullptr; handles->clear(); SuperVersionContext sv_context(/* create_superversion */ true); DBImplReadOnly* impl = new DBImplReadOnly(db_options, dbname); impl->mutex_.Lock(); Status s = impl->Recover(column_families, true /* read only */, error_if_log_file_exist); if (s.ok()) { // set column family handles for (auto cf : column_families) { auto cfd = impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name); if (cfd == nullptr) { s = Status::InvalidArgument("Column family not found: ", cf.name); break; } handles->push_back(new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_)); } } if (s.ok()) { for (auto cfd : *impl->versions_->GetColumnFamilySet()) { sv_context.NewSuperVersion(); cfd->InstallSuperVersion(&sv_context, &impl->mutex_); } } impl->mutex_.Unlock(); sv_context.Clean(); if (s.ok()) { *dbptr = impl; for (auto* h : *handles) { impl->NewThreadStatusCfInfo( reinterpret_cast(h)->cfd()); } } else { for (auto h : *handles) { delete h; } handles->clear(); delete impl; } return s; } #else // !ROCKSDB_LITE Status DB::OpenForReadOnly(const Options& /*options*/, const std::string& /*dbname*/, DB** /*dbptr*/, bool /*error_if_log_file_exist*/) { return Status::NotSupported("Not supported in ROCKSDB_LITE."); } Status DB::OpenForReadOnly( const DBOptions& /*db_options*/, const std::string& /*dbname*/, const std::vector& /*column_families*/, std::vector* /*handles*/, DB** /*dbptr*/, bool /*error_if_log_file_exist*/) { return Status::NotSupported("Not supported in ROCKSDB_LITE."); } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl_readonly.h000066400000000000000000000134341370372246700207210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "db/db_impl/db_impl.h" namespace ROCKSDB_NAMESPACE { class DBImplReadOnly : public DBImpl { public: DBImplReadOnly(const DBOptions& options, const std::string& dbname); // No copying allowed DBImplReadOnly(const DBImplReadOnly&) = delete; void operator=(const DBImplReadOnly&) = delete; virtual ~DBImplReadOnly(); // Implementations of the DB interface using DB::Get; virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; // TODO: Implement ReadOnly MultiGet? using DBImpl::NewIterator; virtual Iterator* NewIterator(const ReadOptions&, ColumnFamilyHandle* column_family) override; virtual Status NewIterators( const ReadOptions& options, const std::vector& column_families, std::vector* iterators) override; using DBImpl::Put; virtual Status Put(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*value*/) override { return Status::NotSupported("Not supported operation in read only mode."); } using DBImpl::Merge; virtual Status Merge(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*value*/) override { return Status::NotSupported("Not supported operation in read only mode."); } using DBImpl::Delete; virtual Status Delete(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/) override { return Status::NotSupported("Not supported operation in read only mode."); } using DBImpl::SingleDelete; virtual Status SingleDelete(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/) override { return Status::NotSupported("Not supported operation in read only mode."); } virtual Status Write(const WriteOptions& /*options*/, WriteBatch* /*updates*/) override { return Status::NotSupported("Not supported operation in read only mode."); } using DBImpl::CompactRange; virtual Status CompactRange(const CompactRangeOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice* /*begin*/, const Slice* /*end*/) override { return Status::NotSupported("Not supported operation in read only mode."); } using DBImpl::CompactFiles; virtual Status CompactFiles( const CompactionOptions& /*compact_options*/, ColumnFamilyHandle* /*column_family*/, const std::vector& /*input_file_names*/, const int /*output_level*/, const int /*output_path_id*/ = -1, std::vector* const /*output_file_names*/ = nullptr, CompactionJobInfo* /*compaction_job_info*/ = nullptr) override { return Status::NotSupported("Not supported operation in read only mode."); } virtual Status DisableFileDeletions() override { return Status::NotSupported("Not supported operation in read only mode."); } virtual Status EnableFileDeletions(bool /*force*/) override { return Status::NotSupported("Not supported operation in read only mode."); } virtual Status GetLiveFiles(std::vector& ret, uint64_t* manifest_file_size, bool /*flush_memtable*/) override { return DBImpl::GetLiveFiles(ret, manifest_file_size, false /* flush_memtable */); } using DBImpl::Flush; virtual Status Flush(const FlushOptions& /*options*/, ColumnFamilyHandle* /*column_family*/) override { return Status::NotSupported("Not supported operation in read only mode."); } using DBImpl::SyncWAL; virtual Status SyncWAL() override { return Status::NotSupported("Not supported operation in read only mode."); } using DB::IngestExternalFile; virtual Status IngestExternalFile( ColumnFamilyHandle* /*column_family*/, const std::vector& /*external_files*/, const IngestExternalFileOptions& /*ingestion_options*/) override { return Status::NotSupported("Not supported operation in read only mode."); } using DB::CreateColumnFamilyWithImport; virtual Status CreateColumnFamilyWithImport( const ColumnFamilyOptions& /*options*/, const std::string& /*column_family_name*/, const ImportColumnFamilyOptions& /*import_options*/, const ExportImportFilesMetaData& /*metadata*/, ColumnFamilyHandle** /*handle*/) override { return Status::NotSupported("Not supported operation in read only mode."); } private: // A "helper" function for DB::OpenForReadOnly without column families // to reduce unnecessary I/O // It has the same functionality as DB::OpenForReadOnly with column families // but does not check the existence of dbname in the file system static Status OpenForReadOnlyWithoutCheck( const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr, bool error_if_log_file_exist = false); friend class DB; }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/db_impl/db_impl_secondary.cc000066400000000000000000000620771370372246700212400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/db_impl/db_impl_secondary.h" #include #include "db/arena_wrapped_db_iter.h" #include "db/merge_context.h" #include "logging/auto_roll_logger.h" #include "monitoring/perf_context_imp.h" #include "util/cast_util.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE DBImplSecondary::DBImplSecondary(const DBOptions& db_options, const std::string& dbname) : DBImpl(db_options, dbname) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Opening the db in secondary mode"); LogFlush(immutable_db_options_.info_log); } DBImplSecondary::~DBImplSecondary() {} Status DBImplSecondary::Recover( const std::vector& column_families, bool /*readonly*/, bool /*error_if_log_file_exist*/, bool /*error_if_data_exists_in_logs*/, uint64_t*) { mutex_.AssertHeld(); JobContext job_context(0); Status s; s = static_cast(versions_.get()) ->Recover(column_families, &manifest_reader_, &manifest_reporter_, &manifest_reader_status_); if (!s.ok()) { return s; } if (immutable_db_options_.paranoid_checks && s.ok()) { s = CheckConsistency(); } // Initial max_total_in_memory_state_ before recovery logs. max_total_in_memory_state_ = 0; for (auto cfd : *versions_->GetColumnFamilySet()) { auto* mutable_cf_options = cfd->GetLatestMutableCFOptions(); max_total_in_memory_state_ += mutable_cf_options->write_buffer_size * mutable_cf_options->max_write_buffer_number; } if (s.ok()) { default_cf_handle_ = new ColumnFamilyHandleImpl( versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_); default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats(); single_column_family_mode_ = versions_->GetColumnFamilySet()->NumberOfColumnFamilies() == 1; std::unordered_set cfds_changed; s = FindAndRecoverLogFiles(&cfds_changed, &job_context); } if (s.IsPathNotFound()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Secondary tries to read WAL, but WAL file(s) have already " "been purged by primary."); s = Status::OK(); } // TODO: update options_file_number_ needed? job_context.Clean(); return s; } // find new WAL and apply them in order to the secondary instance Status DBImplSecondary::FindAndRecoverLogFiles( std::unordered_set* cfds_changed, JobContext* job_context) { assert(nullptr != cfds_changed); assert(nullptr != job_context); Status s; std::vector logs; s = FindNewLogNumbers(&logs); if (s.ok() && !logs.empty()) { SequenceNumber next_sequence(kMaxSequenceNumber); s = RecoverLogFiles(logs, &next_sequence, cfds_changed, job_context); } return s; } // List wal_dir and find all new WALs, return these log numbers Status DBImplSecondary::FindNewLogNumbers(std::vector* logs) { assert(logs != nullptr); std::vector filenames; Status s; s = env_->GetChildren(immutable_db_options_.wal_dir, &filenames); if (s.IsNotFound()) { return Status::InvalidArgument("Failed to open wal_dir", immutable_db_options_.wal_dir); } else if (!s.ok()) { return s; } // if log_readers_ is non-empty, it means we have applied all logs with log // numbers smaller than the smallest log in log_readers_, so there is no // need to pass these logs to RecoverLogFiles uint64_t log_number_min = 0; if (!log_readers_.empty()) { log_number_min = log_readers_.begin()->first; } for (size_t i = 0; i < filenames.size(); i++) { uint64_t number; FileType type; if (ParseFileName(filenames[i], &number, &type) && type == kLogFile && number >= log_number_min) { logs->push_back(number); } } // Recover logs in the order that they were generated if (!logs->empty()) { std::sort(logs->begin(), logs->end()); } return s; } Status DBImplSecondary::MaybeInitLogReader( uint64_t log_number, log::FragmentBufferedReader** log_reader) { auto iter = log_readers_.find(log_number); // make sure the log file is still present if (iter == log_readers_.end() || iter->second->reader_->GetLogNumber() != log_number) { // delete the obsolete log reader if log number mismatch if (iter != log_readers_.end()) { log_readers_.erase(iter); } // initialize log reader from log_number // TODO: min_log_number_to_keep_2pc check needed? // Open the log file std::string fname = LogFileName(immutable_db_options_.wal_dir, log_number); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Recovering log #%" PRIu64 " mode %d", log_number, static_cast(immutable_db_options_.wal_recovery_mode)); std::unique_ptr file_reader; { std::unique_ptr file; Status status = fs_->NewSequentialFile( fname, fs_->OptimizeForLogRead(file_options_), &file, nullptr); if (!status.ok()) { *log_reader = nullptr; return status; } file_reader.reset(new SequentialFileReader( std::move(file), fname, immutable_db_options_.log_readahead_size)); } // Create the log reader. LogReaderContainer* log_reader_container = new LogReaderContainer( env_, immutable_db_options_.info_log, std::move(fname), std::move(file_reader), log_number); log_readers_.insert(std::make_pair( log_number, std::unique_ptr(log_reader_container))); } iter = log_readers_.find(log_number); assert(iter != log_readers_.end()); *log_reader = iter->second->reader_; return Status::OK(); } // After manifest recovery, replay WALs and refresh log_readers_ if necessary // REQUIRES: log_numbers are sorted in ascending order Status DBImplSecondary::RecoverLogFiles( const std::vector& log_numbers, SequenceNumber* next_sequence, std::unordered_set* cfds_changed, JobContext* job_context) { assert(nullptr != cfds_changed); assert(nullptr != job_context); mutex_.AssertHeld(); Status status; for (auto log_number : log_numbers) { log::FragmentBufferedReader* reader = nullptr; status = MaybeInitLogReader(log_number, &reader); if (!status.ok()) { return status; } assert(reader != nullptr); } for (auto log_number : log_numbers) { auto it = log_readers_.find(log_number); assert(it != log_readers_.end()); log::FragmentBufferedReader* reader = it->second->reader_; // Manually update the file number allocation counter in VersionSet. versions_->MarkFileNumberUsed(log_number); // Determine if we should tolerate incomplete records at the tail end of the // Read all the records and add to a memtable std::string scratch; Slice record; WriteBatch batch; while (reader->ReadRecord(&record, &scratch, immutable_db_options_.wal_recovery_mode) && status.ok()) { if (record.size() < WriteBatchInternal::kHeader) { reader->GetReporter()->Corruption( record.size(), Status::Corruption("log record too small")); continue; } WriteBatchInternal::SetContents(&batch, record); SequenceNumber seq_of_batch = WriteBatchInternal::Sequence(&batch); std::vector column_family_ids; status = CollectColumnFamilyIdsFromWriteBatch(batch, &column_family_ids); if (status.ok()) { for (const auto id : column_family_ids) { ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(id); if (cfd == nullptr) { continue; } if (cfds_changed->count(cfd) == 0) { cfds_changed->insert(cfd); } const std::vector& l0_files = cfd->current()->storage_info()->LevelFiles(0); SequenceNumber seq = l0_files.empty() ? 0 : l0_files.back()->fd.largest_seqno; // If the write batch's sequence number is smaller than the last // sequence number of the largest sequence persisted for this column // family, then its data must reside in an SST that has already been // added in the prior MANIFEST replay. if (seq_of_batch <= seq) { continue; } auto curr_log_num = port::kMaxUint64; if (cfd_to_current_log_.count(cfd) > 0) { curr_log_num = cfd_to_current_log_[cfd]; } // If the active memtable contains records added by replaying an // earlier WAL, then we need to seal the memtable, add it to the // immutable memtable list and create a new active memtable. if (!cfd->mem()->IsEmpty() && (curr_log_num == port::kMaxUint64 || curr_log_num != log_number)) { const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions(); MemTable* new_mem = cfd->ConstructNewMemtable(mutable_cf_options, seq_of_batch); cfd->mem()->SetNextLogNumber(log_number); cfd->imm()->Add(cfd->mem(), &job_context->memtables_to_free); new_mem->Ref(); cfd->SetMemtable(new_mem); } } bool has_valid_writes = false; status = WriteBatchInternal::InsertInto( &batch, column_family_memtables_.get(), nullptr /* flush_scheduler */, nullptr /* trim_history_scheduler*/, true, log_number, this, false /* concurrent_memtable_writes */, next_sequence, &has_valid_writes, seq_per_batch_, batch_per_txn_); } // If column family was not found, it might mean that the WAL write // batch references to the column family that was dropped after the // insert. We don't want to fail the whole write batch in that case -- // we just ignore the update. // That's why we set ignore missing column families to true // passing null flush_scheduler will disable memtable flushing which is // needed for secondary instances if (status.ok()) { for (const auto id : column_family_ids) { ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(id); if (cfd == nullptr) { continue; } std::unordered_map::iterator iter = cfd_to_current_log_.find(cfd); if (iter == cfd_to_current_log_.end()) { cfd_to_current_log_.insert({cfd, log_number}); } else if (log_number > iter->second) { iter->second = log_number; } } auto last_sequence = *next_sequence - 1; if ((*next_sequence != kMaxSequenceNumber) && (versions_->LastSequence() <= last_sequence)) { versions_->SetLastAllocatedSequence(last_sequence); versions_->SetLastPublishedSequence(last_sequence); versions_->SetLastSequence(last_sequence); } } else { // We are treating this as a failure while reading since we read valid // blocks that do not form coherent data reader->GetReporter()->Corruption(record.size(), status); } } if (!status.ok()) { return status; } } // remove logreaders from map after successfully recovering the WAL if (log_readers_.size() > 1) { auto erase_iter = log_readers_.begin(); std::advance(erase_iter, log_readers_.size() - 1); log_readers_.erase(log_readers_.begin(), erase_iter); } return status; } // Implementation of the DB interface Status DBImplSecondary::Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { return GetImpl(read_options, column_family, key, value); } Status DBImplSecondary::GetImpl(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val) { assert(pinnable_val != nullptr); PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); StopWatch sw(env_, stats_, DB_GET); PERF_TIMER_GUARD(get_snapshot_time); auto cfh = static_cast(column_family); ColumnFamilyData* cfd = cfh->cfd(); if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { tracer_->Get(column_family, key); } } // Acquire SuperVersion SuperVersion* super_version = GetAndRefSuperVersion(cfd); SequenceNumber snapshot = versions_->LastSequence(); MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; Status s; LookupKey lkey(key, snapshot); PERF_TIMER_STOP(get_snapshot_time); bool done = false; if (super_version->mem->Get(lkey, pinnable_val->GetSelf(), /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, read_options)) { done = true; pinnable_val->PinSelf(); RecordTick(stats_, MEMTABLE_HIT); } else if ((s.ok() || s.IsMergeInProgress()) && super_version->imm->Get( lkey, pinnable_val->GetSelf(), /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, read_options)) { done = true; pinnable_val->PinSelf(); RecordTick(stats_, MEMTABLE_HIT); } if (!done && !s.ok() && !s.IsMergeInProgress()) { ReturnAndCleanupSuperVersion(cfd, super_version); return s; } if (!done) { PERF_TIMER_GUARD(get_from_output_files_time); super_version->current->Get(read_options, lkey, pinnable_val, /*timestamp=*/nullptr, &s, &merge_context, &max_covering_tombstone_seq); RecordTick(stats_, MEMTABLE_MISS); } { PERF_TIMER_GUARD(get_post_process_time); ReturnAndCleanupSuperVersion(cfd, super_version); RecordTick(stats_, NUMBER_KEYS_READ); size_t size = pinnable_val->size(); RecordTick(stats_, BYTES_READ, size); RecordTimeToHistogram(stats_, BYTES_PER_READ, size); PERF_COUNTER_ADD(get_read_bytes, size); } return s; } Iterator* DBImplSecondary::NewIterator(const ReadOptions& read_options, ColumnFamilyHandle* column_family) { if (read_options.managed) { return NewErrorIterator( Status::NotSupported("Managed iterator is not supported anymore.")); } if (read_options.read_tier == kPersistedTier) { return NewErrorIterator(Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators.")); } Iterator* result = nullptr; auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); ReadCallback* read_callback = nullptr; // No read callback provided. if (read_options.tailing) { return NewErrorIterator(Status::NotSupported( "tailing iterator not supported in secondary mode")); } else if (read_options.snapshot != nullptr) { // TODO (yanqin) support snapshot. return NewErrorIterator( Status::NotSupported("snapshot not supported in secondary mode")); } else { auto snapshot = versions_->LastSequence(); result = NewIteratorImpl(read_options, cfd, snapshot, read_callback); } return result; } ArenaWrappedDBIter* DBImplSecondary::NewIteratorImpl( const ReadOptions& read_options, ColumnFamilyData* cfd, SequenceNumber snapshot, ReadCallback* read_callback) { assert(nullptr != cfd); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); auto db_iter = NewArenaWrappedDbIterator( env_, read_options, *cfd->ioptions(), super_version->mutable_cf_options, snapshot, super_version->mutable_cf_options.max_sequential_skip_in_iterations, super_version->version_number, read_callback); auto internal_iter = NewInternalIterator(read_options, cfd, super_version, db_iter->GetArena(), db_iter->GetRangeDelAggregator(), snapshot, /* allow_unprepared_value */ true); db_iter->SetIterUnderDBIter(internal_iter); return db_iter; } Status DBImplSecondary::NewIterators( const ReadOptions& read_options, const std::vector& column_families, std::vector* iterators) { if (read_options.managed) { return Status::NotSupported("Managed iterator is not supported anymore."); } if (read_options.read_tier == kPersistedTier) { return Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators."); } ReadCallback* read_callback = nullptr; // No read callback provided. if (iterators == nullptr) { return Status::InvalidArgument("iterators not allowed to be nullptr"); } iterators->clear(); iterators->reserve(column_families.size()); if (read_options.tailing) { return Status::NotSupported( "tailing iterator not supported in secondary mode"); } else if (read_options.snapshot != nullptr) { // TODO (yanqin) support snapshot. return Status::NotSupported("snapshot not supported in secondary mode"); } else { SequenceNumber read_seq = versions_->LastSequence(); for (auto cfh : column_families) { ColumnFamilyData* cfd = static_cast(cfh)->cfd(); iterators->push_back( NewIteratorImpl(read_options, cfd, read_seq, read_callback)); } } return Status::OK(); } Status DBImplSecondary::CheckConsistency() { mutex_.AssertHeld(); Status s = DBImpl::CheckConsistency(); // If DBImpl::CheckConsistency() which is stricter returns success, then we // do not need to give a second chance. if (s.ok()) { return s; } // It's possible that DBImpl::CheckConssitency() can fail because the primary // may have removed certain files, causing the GetFileSize(name) call to // fail and returning a PathNotFound. In this case, we take a best-effort // approach and just proceed. TEST_SYNC_POINT_CALLBACK( "DBImplSecondary::CheckConsistency:AfterFirstAttempt", &s); if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) { return Status::OK(); } std::vector metadata; versions_->GetLiveFilesMetaData(&metadata); std::string corruption_messages; for (const auto& md : metadata) { // md.name has a leading "/". std::string file_path = md.db_path + md.name; uint64_t fsize = 0; s = env_->GetFileSize(file_path, &fsize); if (!s.ok() && (env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok() || s.IsPathNotFound())) { s = Status::OK(); } if (!s.ok()) { corruption_messages += "Can't access " + md.name + ": " + s.ToString() + "\n"; } } return corruption_messages.empty() ? Status::OK() : Status::Corruption(corruption_messages); } Status DBImplSecondary::TryCatchUpWithPrimary() { assert(versions_.get() != nullptr); assert(manifest_reader_.get() != nullptr); Status s; // read the manifest and apply new changes to the secondary instance std::unordered_set cfds_changed; JobContext job_context(0, true /*create_superversion*/); { InstrumentedMutexLock lock_guard(&mutex_); s = static_cast_with_check(versions_.get()) ->ReadAndApply(&mutex_, &manifest_reader_, &cfds_changed); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Last sequence is %" PRIu64, static_cast(versions_->LastSequence())); for (ColumnFamilyData* cfd : cfds_changed) { if (cfd->IsDropped()) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] is dropped\n", cfd->GetName().c_str()); continue; } VersionStorageInfo::LevelSummaryStorage tmp; ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Level summary: %s\n", cfd->GetName().c_str(), cfd->current()->storage_info()->LevelSummary(&tmp)); } // list wal_dir to discover new WALs and apply new changes to the secondary // instance if (s.ok()) { s = FindAndRecoverLogFiles(&cfds_changed, &job_context); } if (s.IsPathNotFound()) { ROCKS_LOG_INFO( immutable_db_options_.info_log, "Secondary tries to read WAL, but WAL file(s) have already " "been purged by primary."); s = Status::OK(); } if (s.ok()) { for (auto cfd : cfds_changed) { cfd->imm()->RemoveOldMemTables(cfd->GetLogNumber(), &job_context.memtables_to_free); auto& sv_context = job_context.superversion_contexts.back(); cfd->InstallSuperVersion(&sv_context, &mutex_); sv_context.NewSuperVersion(); } } } job_context.Clean(); // Cleanup unused, obsolete files. JobContext purge_files_job_context(0); { InstrumentedMutexLock lock_guard(&mutex_); // Currently, secondary instance does not own the database files, thus it // is unnecessary for the secondary to force full scan. FindObsoleteFiles(&purge_files_job_context, /*force=*/false); } if (purge_files_job_context.HaveSomethingToDelete()) { PurgeObsoleteFiles(purge_files_job_context); } purge_files_job_context.Clean(); return s; } Status DB::OpenAsSecondary(const Options& options, const std::string& dbname, const std::string& secondary_path, DB** dbptr) { *dbptr = nullptr; DBOptions db_options(options); ColumnFamilyOptions cf_options(options); std::vector column_families; column_families.emplace_back(kDefaultColumnFamilyName, cf_options); std::vector handles; Status s = DB::OpenAsSecondary(db_options, dbname, secondary_path, column_families, &handles, dbptr); if (s.ok()) { assert(handles.size() == 1); delete handles[0]; } return s; } Status DB::OpenAsSecondary( const DBOptions& db_options, const std::string& dbname, const std::string& secondary_path, const std::vector& column_families, std::vector* handles, DB** dbptr) { *dbptr = nullptr; if (db_options.max_open_files != -1) { // TODO (yanqin) maybe support max_open_files != -1 by creating hard links // on SST files so that db secondary can still have access to old SSTs // while primary instance may delete original. return Status::InvalidArgument("require max_open_files to be -1"); } DBOptions tmp_opts(db_options); Status s; if (nullptr == tmp_opts.info_log) { s = CreateLoggerFromOptions(secondary_path, tmp_opts, &tmp_opts.info_log); if (!s.ok()) { tmp_opts.info_log = nullptr; } } handles->clear(); DBImplSecondary* impl = new DBImplSecondary(tmp_opts, dbname); impl->versions_.reset(new ReactiveVersionSet( dbname, &impl->immutable_db_options_, impl->file_options_, impl->table_cache_.get(), impl->write_buffer_manager_, &impl->write_controller_)); impl->column_family_memtables_.reset( new ColumnFamilyMemTablesImpl(impl->versions_->GetColumnFamilySet())); impl->wal_in_db_path_ = IsWalDirSameAsDBPath(&impl->immutable_db_options_); impl->mutex_.Lock(); s = impl->Recover(column_families, true, false, false); if (s.ok()) { for (auto cf : column_families) { auto cfd = impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name); if (nullptr == cfd) { s = Status::InvalidArgument("Column family not found: ", cf.name); break; } handles->push_back(new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_)); } } SuperVersionContext sv_context(true /* create_superversion */); if (s.ok()) { for (auto cfd : *impl->versions_->GetColumnFamilySet()) { sv_context.NewSuperVersion(); cfd->InstallSuperVersion(&sv_context, &impl->mutex_); } } impl->mutex_.Unlock(); sv_context.Clean(); if (s.ok()) { *dbptr = impl; for (auto h : *handles) { impl->NewThreadStatusCfInfo( reinterpret_cast(h)->cfd()); } } else { for (auto h : *handles) { delete h; } handles->clear(); delete impl; } return s; } #else // !ROCKSDB_LITE Status DB::OpenAsSecondary(const Options& /*options*/, const std::string& /*name*/, const std::string& /*secondary_path*/, DB** /*dbptr*/) { return Status::NotSupported("Not supported in ROCKSDB_LITE."); } Status DB::OpenAsSecondary( const DBOptions& /*db_options*/, const std::string& /*dbname*/, const std::string& /*secondary_path*/, const std::vector& /*column_families*/, std::vector* /*handles*/, DB** /*dbptr*/) { return Status::NotSupported("Not supported in ROCKSDB_LITE."); } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_impl_secondary.h000066400000000000000000000307761370372246700211030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "db/db_impl/db_impl.h" namespace ROCKSDB_NAMESPACE { // A wrapper class to hold log reader, log reporter, log status. class LogReaderContainer { public: LogReaderContainer() : reader_(nullptr), reporter_(nullptr), status_(nullptr) {} LogReaderContainer(Env* env, std::shared_ptr info_log, std::string fname, std::unique_ptr&& file_reader, uint64_t log_number) { LogReporter* reporter = new LogReporter(); status_ = new Status(); reporter->env = env; reporter->info_log = info_log.get(); reporter->fname = std::move(fname); reporter->status = status_; reporter_ = reporter; // We intentially make log::Reader do checksumming even if // paranoid_checks==false so that corruptions cause entire commits // to be skipped instead of propagating bad information (like overly // large sequence numbers). reader_ = new log::FragmentBufferedReader(info_log, std::move(file_reader), reporter, true /*checksum*/, log_number); } log::FragmentBufferedReader* reader_; log::Reader::Reporter* reporter_; Status* status_; ~LogReaderContainer() { delete reader_; delete reporter_; delete status_; } private: struct LogReporter : public log::Reader::Reporter { Env* env; Logger* info_log; std::string fname; Status* status; // nullptr if immutable_db_options_.paranoid_checks==false void Corruption(size_t bytes, const Status& s) override { ROCKS_LOG_WARN(info_log, "%s%s: dropping %d bytes; %s", (this->status == nullptr ? "(ignoring error) " : ""), fname.c_str(), static_cast(bytes), s.ToString().c_str()); if (this->status != nullptr && this->status->ok()) { *this->status = s; } } }; }; // The secondary instance shares access to the storage as the primary. // The secondary is able to read and replay changes described in both the // MANIFEST and the WAL files without coordination with the primary. // The secondary instance can be opened using `DB::OpenAsSecondary`. After // that, it can call `DBImplSecondary::TryCatchUpWithPrimary` to make best // effort attempts to catch up with the primary. class DBImplSecondary : public DBImpl { public: DBImplSecondary(const DBOptions& options, const std::string& dbname); ~DBImplSecondary() override; // Recover by replaying MANIFEST and WAL. Also initialize manifest_reader_ // and log_readers_ to facilitate future operations. Status Recover(const std::vector& column_families, bool read_only, bool error_if_log_file_exist, bool error_if_data_exists_in_logs, uint64_t* = nullptr) override; // Implementations of the DB interface using DB::Get; Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value); using DBImpl::NewIterator; Iterator* NewIterator(const ReadOptions&, ColumnFamilyHandle* column_family) override; ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& read_options, ColumnFamilyData* cfd, SequenceNumber snapshot, ReadCallback* read_callback); Status NewIterators(const ReadOptions& options, const std::vector& column_families, std::vector* iterators) override; using DBImpl::Put; Status Put(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*value*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::Merge; Status Merge(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, const Slice& /*value*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::Delete; Status Delete(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::SingleDelete; Status SingleDelete(const WriteOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } Status Write(const WriteOptions& /*options*/, WriteBatch* /*updates*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::CompactRange; Status CompactRange(const CompactRangeOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice* /*begin*/, const Slice* /*end*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::CompactFiles; Status CompactFiles( const CompactionOptions& /*compact_options*/, ColumnFamilyHandle* /*column_family*/, const std::vector& /*input_file_names*/, const int /*output_level*/, const int /*output_path_id*/ = -1, std::vector* const /*output_file_names*/ = nullptr, CompactionJobInfo* /*compaction_job_info*/ = nullptr) override { return Status::NotSupported("Not supported operation in secondary mode."); } Status DisableFileDeletions() override { return Status::NotSupported("Not supported operation in secondary mode."); } Status EnableFileDeletions(bool /*force*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } Status GetLiveFiles(std::vector&, uint64_t* /*manifest_file_size*/, bool /*flush_memtable*/ = true) override { return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::Flush; Status Flush(const FlushOptions& /*options*/, ColumnFamilyHandle* /*column_family*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::SetDBOptions; Status SetDBOptions(const std::unordered_map& /*options_map*/) override { // Currently not supported because changing certain options may cause // flush/compaction. return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::SetOptions; Status SetOptions( ColumnFamilyHandle* /*cfd*/, const std::unordered_map& /*options_map*/) override { // Currently not supported because changing certain options may cause // flush/compaction and/or write to MANIFEST. return Status::NotSupported("Not supported operation in secondary mode."); } using DBImpl::SyncWAL; Status SyncWAL() override { return Status::NotSupported("Not supported operation in secondary mode."); } using DB::IngestExternalFile; Status IngestExternalFile( ColumnFamilyHandle* /*column_family*/, const std::vector& /*external_files*/, const IngestExternalFileOptions& /*ingestion_options*/) override { return Status::NotSupported("Not supported operation in secondary mode."); } // Try to catch up with the primary by reading as much as possible from the // log files until there is nothing more to read or encounters an error. If // the amount of information in the log files to process is huge, this // method can take long time due to all the I/O and CPU costs. Status TryCatchUpWithPrimary() override; // Try to find log reader using log_number from log_readers_ map, initialize // if it doesn't exist Status MaybeInitLogReader(uint64_t log_number, log::FragmentBufferedReader** log_reader); // Check if all live files exist on file system and that their file sizes // matche to the in-memory records. It is possible that some live files may // have been deleted by the primary. In this case, CheckConsistency() does // not flag the missing file as inconsistency. Status CheckConsistency() override; protected: // ColumnFamilyCollector is a write batch handler which does nothing // except recording unique column family IDs class ColumnFamilyCollector : public WriteBatch::Handler { std::unordered_set column_family_ids_; Status AddColumnFamilyId(uint32_t column_family_id) { if (column_family_ids_.find(column_family_id) == column_family_ids_.end()) { column_family_ids_.insert(column_family_id); } return Status::OK(); } public: explicit ColumnFamilyCollector() {} ~ColumnFamilyCollector() override {} Status PutCF(uint32_t column_family_id, const Slice&, const Slice&) override { return AddColumnFamilyId(column_family_id); } Status DeleteCF(uint32_t column_family_id, const Slice&) override { return AddColumnFamilyId(column_family_id); } Status SingleDeleteCF(uint32_t column_family_id, const Slice&) override { return AddColumnFamilyId(column_family_id); } Status DeleteRangeCF(uint32_t column_family_id, const Slice&, const Slice&) override { return AddColumnFamilyId(column_family_id); } Status MergeCF(uint32_t column_family_id, const Slice&, const Slice&) override { return AddColumnFamilyId(column_family_id); } Status PutBlobIndexCF(uint32_t column_family_id, const Slice&, const Slice&) override { return AddColumnFamilyId(column_family_id); } const std::unordered_set& column_families() const { return column_family_ids_; } }; Status CollectColumnFamilyIdsFromWriteBatch( const WriteBatch& batch, std::vector* column_family_ids) { assert(column_family_ids != nullptr); column_family_ids->clear(); ColumnFamilyCollector handler; Status s = batch.Iterate(&handler); if (s.ok()) { for (const auto& cf : handler.column_families()) { column_family_ids->push_back(cf); } } return s; } bool OwnTablesAndLogs() const override { // Currently, the secondary instance does not own the database files. It // simply opens the files of the primary instance and tracks their file // descriptors until they become obsolete. In the future, the secondary may // create links to database files. OwnTablesAndLogs will return true then. return false; } private: friend class DB; // No copying allowed DBImplSecondary(const DBImplSecondary&); void operator=(const DBImplSecondary&); using DBImpl::Recover; Status FindAndRecoverLogFiles( std::unordered_set* cfds_changed, JobContext* job_context); Status FindNewLogNumbers(std::vector* logs); // After manifest recovery, replay WALs and refresh log_readers_ if necessary // REQUIRES: log_numbers are sorted in ascending order Status RecoverLogFiles(const std::vector& log_numbers, SequenceNumber* next_sequence, std::unordered_set* cfds_changed, JobContext* job_context); std::unique_ptr manifest_reader_; std::unique_ptr manifest_reporter_; std::unique_ptr manifest_reader_status_; // Cache log readers for each log number, used for continue WAL replay // after recovery std::map> log_readers_; // Current WAL number replayed for each column family. std::unordered_map cfd_to_current_log_; }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/db_impl/db_impl_write.cc000066400000000000000000002123171370372246700203750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_impl/db_impl.h" #include #include "db/error_handler.h" #include "db/event_helpers.h" #include "monitoring/perf_context_imp.h" #include "options/options_helper.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { // Convenience methods Status DBImpl::Put(const WriteOptions& o, ColumnFamilyHandle* column_family, const Slice& key, const Slice& val) { return DB::Put(o, column_family, key, val); } Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family, const Slice& key, const Slice& val) { auto cfh = reinterpret_cast(column_family); if (!cfh->cfd()->ioptions()->merge_operator) { return Status::NotSupported("Provide a merge_operator when opening DB"); } else { return DB::Merge(o, column_family, key, val); } } Status DBImpl::Delete(const WriteOptions& write_options, ColumnFamilyHandle* column_family, const Slice& key) { return DB::Delete(write_options, column_family, key); } Status DBImpl::SingleDelete(const WriteOptions& write_options, ColumnFamilyHandle* column_family, const Slice& key) { return DB::SingleDelete(write_options, column_family, key); } void DBImpl::SetRecoverableStatePreReleaseCallback( PreReleaseCallback* callback) { recoverable_state_pre_release_callback_.reset(callback); } Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) { return WriteImpl(write_options, my_batch, nullptr, nullptr); } #ifndef ROCKSDB_LITE Status DBImpl::WriteWithCallback(const WriteOptions& write_options, WriteBatch* my_batch, WriteCallback* callback) { return WriteImpl(write_options, my_batch, callback, nullptr); } #endif // ROCKSDB_LITE // The main write queue. This is the only write queue that updates LastSequence. // When using one write queue, the same sequence also indicates the last // published sequence. Status DBImpl::WriteImpl(const WriteOptions& write_options, WriteBatch* my_batch, WriteCallback* callback, uint64_t* log_used, uint64_t log_ref, bool disable_memtable, uint64_t* seq_used, size_t batch_cnt, PreReleaseCallback* pre_release_callback) { assert(!seq_per_batch_ || batch_cnt != 0); if (my_batch == nullptr) { return Status::Corruption("Batch is nullptr!"); } if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { tracer_->Write(my_batch); } } if (write_options.sync && write_options.disableWAL) { return Status::InvalidArgument("Sync writes has to enable WAL."); } if (two_write_queues_ && immutable_db_options_.enable_pipelined_write) { return Status::NotSupported( "pipelined_writes is not compatible with concurrent prepares"); } if (seq_per_batch_ && immutable_db_options_.enable_pipelined_write) { // TODO(yiwu): update pipeline write with seq_per_batch and batch_cnt return Status::NotSupported( "pipelined_writes is not compatible with seq_per_batch"); } if (immutable_db_options_.unordered_write && immutable_db_options_.enable_pipelined_write) { return Status::NotSupported( "pipelined_writes is not compatible with unordered_write"); } // Otherwise IsLatestPersistentState optimization does not make sense assert(!WriteBatchInternal::IsLatestPersistentState(my_batch) || disable_memtable); Status status; IOStatus io_s; if (write_options.low_pri) { status = ThrottleLowPriWritesIfNeeded(write_options, my_batch); if (!status.ok()) { return status; } } if (two_write_queues_ && disable_memtable) { AssignOrder assign_order = seq_per_batch_ ? kDoAssignOrder : kDontAssignOrder; // Otherwise it is WAL-only Prepare batches in WriteCommitted policy and // they don't consume sequence. return WriteImplWALOnly(&nonmem_write_thread_, write_options, my_batch, callback, log_used, log_ref, seq_used, batch_cnt, pre_release_callback, assign_order, kDontPublishLastSeq, disable_memtable); } if (immutable_db_options_.unordered_write) { const size_t sub_batch_cnt = batch_cnt != 0 ? batch_cnt // every key is a sub-batch consuming a seq : WriteBatchInternal::Count(my_batch); uint64_t seq; // Use a write thread to i) optimize for WAL write, ii) publish last // sequence in in increasing order, iii) call pre_release_callback serially status = WriteImplWALOnly(&write_thread_, write_options, my_batch, callback, log_used, log_ref, &seq, sub_batch_cnt, pre_release_callback, kDoAssignOrder, kDoPublishLastSeq, disable_memtable); TEST_SYNC_POINT("DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL"); if (!status.ok()) { return status; } if (seq_used) { *seq_used = seq; } if (!disable_memtable) { TEST_SYNC_POINT("DBImpl::WriteImpl:BeforeUnorderedWriteMemtable"); status = UnorderedWriteMemtable(write_options, my_batch, callback, log_ref, seq, sub_batch_cnt); } return status; } if (immutable_db_options_.enable_pipelined_write) { return PipelinedWriteImpl(write_options, my_batch, callback, log_used, log_ref, disable_memtable, seq_used); } PERF_TIMER_GUARD(write_pre_and_post_process_time); WriteThread::Writer w(write_options, my_batch, callback, log_ref, disable_memtable, batch_cnt, pre_release_callback); if (!write_options.disableWAL) { RecordTick(stats_, WRITE_WITH_WAL); } StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); write_thread_.JoinBatchGroup(&w); if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) { // we are a non-leader in a parallel group if (w.ShouldWriteToMemtable()) { PERF_TIMER_STOP(write_pre_and_post_process_time); PERF_TIMER_GUARD(write_memtable_time); ColumnFamilyMemTablesImpl column_family_memtables( versions_->GetColumnFamilySet()); w.status = WriteBatchInternal::InsertInto( &w, w.sequence, &column_family_memtables, &flush_scheduler_, &trim_history_scheduler_, write_options.ignore_missing_column_families, 0 /*log_number*/, this, true /*concurrent_memtable_writes*/, seq_per_batch_, w.batch_cnt, batch_per_txn_, write_options.memtable_insert_hint_per_batch); PERF_TIMER_START(write_pre_and_post_process_time); } if (write_thread_.CompleteParallelMemTableWriter(&w)) { // we're responsible for exit batch group // TODO(myabandeh): propagate status to write_group auto last_sequence = w.write_group->last_sequence; versions_->SetLastSequence(last_sequence); MemTableInsertStatusCheck(w.status); write_thread_.ExitAsBatchGroupFollower(&w); } assert(w.state == WriteThread::STATE_COMPLETED); // STATE_COMPLETED conditional below handles exit status = w.FinalStatus(); } if (w.state == WriteThread::STATE_COMPLETED) { if (log_used != nullptr) { *log_used = w.log_used; } if (seq_used != nullptr) { *seq_used = w.sequence; } // write is complete and leader has updated sequence return w.FinalStatus(); } // else we are the leader of the write batch group assert(w.state == WriteThread::STATE_GROUP_LEADER); // Once reaches this point, the current writer "w" will try to do its write // job. It may also pick up some of the remaining writers in the "writers_" // when it finds suitable, and finish them in the same write batch. // This is how a write job could be done by the other writer. WriteContext write_context; WriteThread::WriteGroup write_group; bool in_parallel_group = false; uint64_t last_sequence = kMaxSequenceNumber; mutex_.Lock(); bool need_log_sync = write_options.sync; bool need_log_dir_sync = need_log_sync && !log_dir_synced_; if (!two_write_queues_ || !disable_memtable) { // With concurrent writes we do preprocess only in the write thread that // also does write to memtable to avoid sync issue on shared data structure // with the other thread // PreprocessWrite does its own perf timing. PERF_TIMER_STOP(write_pre_and_post_process_time); status = PreprocessWrite(write_options, &need_log_sync, &write_context); if (!two_write_queues_) { // Assign it after ::PreprocessWrite since the sequence might advance // inside it by WriteRecoverableState last_sequence = versions_->LastSequence(); } PERF_TIMER_START(write_pre_and_post_process_time); } log::Writer* log_writer = logs_.back().writer; mutex_.Unlock(); // Add to log and apply to memtable. We can release the lock // during this phase since &w is currently responsible for logging // and protects against concurrent loggers and concurrent writes // into memtables TEST_SYNC_POINT("DBImpl::WriteImpl:BeforeLeaderEnters"); last_batch_group_size_ = write_thread_.EnterAsBatchGroupLeader(&w, &write_group); if (status.ok()) { // Rules for when we can update the memtable concurrently // 1. supported by memtable // 2. Puts are not okay if inplace_update_support // 3. Merges are not okay // // Rules 1..2 are enforced by checking the options // during startup (CheckConcurrentWritesSupported), so if // options.allow_concurrent_memtable_write is true then they can be // assumed to be true. Rule 3 is checked for each batch. We could // relax rules 2 if we could prevent write batches from referring // more than once to a particular key. bool parallel = immutable_db_options_.allow_concurrent_memtable_write && write_group.size > 1; size_t total_count = 0; size_t valid_batches = 0; size_t total_byte_size = 0; size_t pre_release_callback_cnt = 0; for (auto* writer : write_group) { if (writer->CheckCallback(this)) { valid_batches += writer->batch_cnt; if (writer->ShouldWriteToMemtable()) { total_count += WriteBatchInternal::Count(writer->batch); parallel = parallel && !writer->batch->HasMerge(); } total_byte_size = WriteBatchInternal::AppendedByteSize( total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); if (writer->pre_release_callback) { pre_release_callback_cnt++; } } } // Note about seq_per_batch_: either disableWAL is set for the entire write // group or not. In either case we inc seq for each write batch with no // failed callback. This means that there could be a batch with // disalbe_memtable in between; although we do not write this batch to // memtable it still consumes a seq. Otherwise, if !seq_per_batch_, we inc // the seq per valid written key to mem. size_t seq_inc = seq_per_batch_ ? valid_batches : total_count; const bool concurrent_update = two_write_queues_; // Update stats while we are an exclusive group leader, so we know // that nobody else can be writing to these particular stats. // We're optimistic, updating the stats before we successfully // commit. That lets us release our leader status early. auto stats = default_cf_internal_stats_; stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count, concurrent_update); RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count); stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size, concurrent_update); RecordTick(stats_, BYTES_WRITTEN, total_byte_size); stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1, concurrent_update); RecordTick(stats_, WRITE_DONE_BY_SELF); auto write_done_by_other = write_group.size - 1; if (write_done_by_other > 0) { stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther, write_done_by_other, concurrent_update); RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other); } RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size); if (write_options.disableWAL) { has_unpersisted_data_.store(true, std::memory_order_relaxed); } PERF_TIMER_STOP(write_pre_and_post_process_time); if (!two_write_queues_) { if (status.ok() && !write_options.disableWAL) { PERF_TIMER_GUARD(write_wal_time); io_s = WriteToWAL(write_group, log_writer, log_used, need_log_sync, need_log_dir_sync, last_sequence + 1); } } else { if (status.ok() && !write_options.disableWAL) { PERF_TIMER_GUARD(write_wal_time); // LastAllocatedSequence is increased inside WriteToWAL under // wal_write_mutex_ to ensure ordered events in WAL io_s = ConcurrentWriteToWAL(write_group, log_used, &last_sequence, seq_inc); } else { // Otherwise we inc seq number for memtable writes last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc); } } status = io_s; assert(last_sequence != kMaxSequenceNumber); const SequenceNumber current_sequence = last_sequence + 1; last_sequence += seq_inc; // PreReleaseCallback is called after WAL write and before memtable write if (status.ok()) { SequenceNumber next_sequence = current_sequence; size_t index = 0; // Note: the logic for advancing seq here must be consistent with the // logic in WriteBatchInternal::InsertInto(write_group...) as well as // with WriteBatchInternal::InsertInto(write_batch...) that is called on // the merged batch during recovery from the WAL. for (auto* writer : write_group) { if (writer->CallbackFailed()) { continue; } writer->sequence = next_sequence; if (writer->pre_release_callback) { Status ws = writer->pre_release_callback->Callback( writer->sequence, disable_memtable, writer->log_used, index++, pre_release_callback_cnt); if (!ws.ok()) { status = ws; break; } } if (seq_per_batch_) { assert(writer->batch_cnt); next_sequence += writer->batch_cnt; } else if (writer->ShouldWriteToMemtable()) { next_sequence += WriteBatchInternal::Count(writer->batch); } } } if (status.ok()) { PERF_TIMER_GUARD(write_memtable_time); if (!parallel) { // w.sequence will be set inside InsertInto w.status = WriteBatchInternal::InsertInto( write_group, current_sequence, column_family_memtables_.get(), &flush_scheduler_, &trim_history_scheduler_, write_options.ignore_missing_column_families, 0 /*recovery_log_number*/, this, parallel, seq_per_batch_, batch_per_txn_); } else { write_group.last_sequence = last_sequence; write_thread_.LaunchParallelMemTableWriters(&write_group); in_parallel_group = true; // Each parallel follower is doing each own writes. The leader should // also do its own. if (w.ShouldWriteToMemtable()) { ColumnFamilyMemTablesImpl column_family_memtables( versions_->GetColumnFamilySet()); assert(w.sequence == current_sequence); w.status = WriteBatchInternal::InsertInto( &w, w.sequence, &column_family_memtables, &flush_scheduler_, &trim_history_scheduler_, write_options.ignore_missing_column_families, 0 /*log_number*/, this, true /*concurrent_memtable_writes*/, seq_per_batch_, w.batch_cnt, batch_per_txn_, write_options.memtable_insert_hint_per_batch); } } if (seq_used != nullptr) { *seq_used = w.sequence; } } } PERF_TIMER_START(write_pre_and_post_process_time); if (!w.CallbackFailed()) { if (!io_s.ok()) { IOStatusCheck(io_s); } else { WriteStatusCheck(status); } } if (need_log_sync) { mutex_.Lock(); MarkLogsSynced(logfile_number_, need_log_dir_sync, status); mutex_.Unlock(); // Requesting sync with two_write_queues_ is expected to be very rare. We // hence provide a simple implementation that is not necessarily efficient. if (two_write_queues_) { if (manual_wal_flush_) { status = FlushWAL(true); } else { status = SyncWAL(); } } } bool should_exit_batch_group = true; if (in_parallel_group) { // CompleteParallelWorker returns true if this thread should // handle exit, false means somebody else did should_exit_batch_group = write_thread_.CompleteParallelMemTableWriter(&w); } if (should_exit_batch_group) { if (status.ok()) { // Note: if we are to resume after non-OK statuses we need to revisit how // we reacts to non-OK statuses here. versions_->SetLastSequence(last_sequence); } MemTableInsertStatusCheck(w.status); write_thread_.ExitAsBatchGroupLeader(write_group, status); } if (status.ok()) { status = w.FinalStatus(); } return status; } Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options, WriteBatch* my_batch, WriteCallback* callback, uint64_t* log_used, uint64_t log_ref, bool disable_memtable, uint64_t* seq_used) { PERF_TIMER_GUARD(write_pre_and_post_process_time); StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); WriteContext write_context; WriteThread::Writer w(write_options, my_batch, callback, log_ref, disable_memtable); write_thread_.JoinBatchGroup(&w); if (w.state == WriteThread::STATE_GROUP_LEADER) { WriteThread::WriteGroup wal_write_group; if (w.callback && !w.callback->AllowWriteBatching()) { write_thread_.WaitForMemTableWriters(); } mutex_.Lock(); bool need_log_sync = !write_options.disableWAL && write_options.sync; bool need_log_dir_sync = need_log_sync && !log_dir_synced_; // PreprocessWrite does its own perf timing. PERF_TIMER_STOP(write_pre_and_post_process_time); w.status = PreprocessWrite(write_options, &need_log_sync, &write_context); PERF_TIMER_START(write_pre_and_post_process_time); log::Writer* log_writer = logs_.back().writer; mutex_.Unlock(); // This can set non-OK status if callback fail. last_batch_group_size_ = write_thread_.EnterAsBatchGroupLeader(&w, &wal_write_group); const SequenceNumber current_sequence = write_thread_.UpdateLastSequence(versions_->LastSequence()) + 1; size_t total_count = 0; size_t total_byte_size = 0; if (w.status.ok()) { SequenceNumber next_sequence = current_sequence; for (auto writer : wal_write_group) { if (writer->CheckCallback(this)) { if (writer->ShouldWriteToMemtable()) { writer->sequence = next_sequence; size_t count = WriteBatchInternal::Count(writer->batch); next_sequence += count; total_count += count; } total_byte_size = WriteBatchInternal::AppendedByteSize( total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); } } if (w.disable_wal) { has_unpersisted_data_.store(true, std::memory_order_relaxed); } write_thread_.UpdateLastSequence(current_sequence + total_count - 1); } auto stats = default_cf_internal_stats_; stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count); RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count); stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size); RecordTick(stats_, BYTES_WRITTEN, total_byte_size); RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size); PERF_TIMER_STOP(write_pre_and_post_process_time); IOStatus io_s; if (w.status.ok() && !write_options.disableWAL) { PERF_TIMER_GUARD(write_wal_time); stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1); RecordTick(stats_, WRITE_DONE_BY_SELF, 1); if (wal_write_group.size > 1) { stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther, wal_write_group.size - 1); RecordTick(stats_, WRITE_DONE_BY_OTHER, wal_write_group.size - 1); } io_s = WriteToWAL(wal_write_group, log_writer, log_used, need_log_sync, need_log_dir_sync, current_sequence); w.status = io_s; } if (!w.CallbackFailed()) { if (!io_s.ok()) { IOStatusCheck(io_s); } else { WriteStatusCheck(w.status); } } if (need_log_sync) { mutex_.Lock(); MarkLogsSynced(logfile_number_, need_log_dir_sync, w.status); mutex_.Unlock(); } write_thread_.ExitAsBatchGroupLeader(wal_write_group, w.status); } WriteThread::WriteGroup memtable_write_group; if (w.state == WriteThread::STATE_MEMTABLE_WRITER_LEADER) { PERF_TIMER_GUARD(write_memtable_time); assert(w.ShouldWriteToMemtable()); write_thread_.EnterAsMemTableWriter(&w, &memtable_write_group); if (memtable_write_group.size > 1 && immutable_db_options_.allow_concurrent_memtable_write) { write_thread_.LaunchParallelMemTableWriters(&memtable_write_group); } else { memtable_write_group.status = WriteBatchInternal::InsertInto( memtable_write_group, w.sequence, column_family_memtables_.get(), &flush_scheduler_, &trim_history_scheduler_, write_options.ignore_missing_column_families, 0 /*log_number*/, this, false /*concurrent_memtable_writes*/, seq_per_batch_, batch_per_txn_); versions_->SetLastSequence(memtable_write_group.last_sequence); write_thread_.ExitAsMemTableWriter(&w, memtable_write_group); } } if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) { assert(w.ShouldWriteToMemtable()); ColumnFamilyMemTablesImpl column_family_memtables( versions_->GetColumnFamilySet()); w.status = WriteBatchInternal::InsertInto( &w, w.sequence, &column_family_memtables, &flush_scheduler_, &trim_history_scheduler_, write_options.ignore_missing_column_families, 0 /*log_number*/, this, true /*concurrent_memtable_writes*/, false /*seq_per_batch*/, 0 /*batch_cnt*/, true /*batch_per_txn*/, write_options.memtable_insert_hint_per_batch); if (write_thread_.CompleteParallelMemTableWriter(&w)) { MemTableInsertStatusCheck(w.status); versions_->SetLastSequence(w.write_group->last_sequence); write_thread_.ExitAsMemTableWriter(&w, *w.write_group); } } if (seq_used != nullptr) { *seq_used = w.sequence; } assert(w.state == WriteThread::STATE_COMPLETED); return w.FinalStatus(); } Status DBImpl::UnorderedWriteMemtable(const WriteOptions& write_options, WriteBatch* my_batch, WriteCallback* callback, uint64_t log_ref, SequenceNumber seq, const size_t sub_batch_cnt) { PERF_TIMER_GUARD(write_pre_and_post_process_time); StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); WriteThread::Writer w(write_options, my_batch, callback, log_ref, false /*disable_memtable*/); if (w.CheckCallback(this) && w.ShouldWriteToMemtable()) { w.sequence = seq; size_t total_count = WriteBatchInternal::Count(my_batch); InternalStats* stats = default_cf_internal_stats_; stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count); RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count); ColumnFamilyMemTablesImpl column_family_memtables( versions_->GetColumnFamilySet()); w.status = WriteBatchInternal::InsertInto( &w, w.sequence, &column_family_memtables, &flush_scheduler_, &trim_history_scheduler_, write_options.ignore_missing_column_families, 0 /*log_number*/, this, true /*concurrent_memtable_writes*/, seq_per_batch_, sub_batch_cnt, true /*batch_per_txn*/, write_options.memtable_insert_hint_per_batch); WriteStatusCheck(w.status); if (write_options.disableWAL) { has_unpersisted_data_.store(true, std::memory_order_relaxed); } } size_t pending_cnt = pending_memtable_writes_.fetch_sub(1) - 1; if (pending_cnt == 0) { // switch_cv_ waits until pending_memtable_writes_ = 0. Locking its mutex // before notify ensures that cv is in waiting state when it is notified // thus not missing the update to pending_memtable_writes_ even though it is // not modified under the mutex. std::lock_guard lck(switch_mutex_); switch_cv_.notify_all(); } if (!w.FinalStatus().ok()) { return w.FinalStatus(); } return Status::OK(); } // The 2nd write queue. If enabled it will be used only for WAL-only writes. // This is the only queue that updates LastPublishedSequence which is only // applicable in a two-queue setting. Status DBImpl::WriteImplWALOnly( WriteThread* write_thread, const WriteOptions& write_options, WriteBatch* my_batch, WriteCallback* callback, uint64_t* log_used, const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt, PreReleaseCallback* pre_release_callback, const AssignOrder assign_order, const PublishLastSeq publish_last_seq, const bool disable_memtable) { Status status; PERF_TIMER_GUARD(write_pre_and_post_process_time); WriteThread::Writer w(write_options, my_batch, callback, log_ref, disable_memtable, sub_batch_cnt, pre_release_callback); RecordTick(stats_, WRITE_WITH_WAL); StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); write_thread->JoinBatchGroup(&w); assert(w.state != WriteThread::STATE_PARALLEL_MEMTABLE_WRITER); if (w.state == WriteThread::STATE_COMPLETED) { if (log_used != nullptr) { *log_used = w.log_used; } if (seq_used != nullptr) { *seq_used = w.sequence; } return w.FinalStatus(); } // else we are the leader of the write batch group assert(w.state == WriteThread::STATE_GROUP_LEADER); if (publish_last_seq == kDoPublishLastSeq) { // Currently we only use kDoPublishLastSeq in unordered_write assert(immutable_db_options_.unordered_write); WriteContext write_context; if (error_handler_.IsDBStopped()) { status = error_handler_.GetBGError(); } // TODO(myabandeh): Make preliminary checks thread-safe so we could do them // without paying the cost of obtaining the mutex. if (status.ok()) { InstrumentedMutexLock l(&mutex_); bool need_log_sync = false; status = PreprocessWrite(write_options, &need_log_sync, &write_context); WriteStatusCheck(status); } if (!status.ok()) { WriteThread::WriteGroup write_group; write_thread->EnterAsBatchGroupLeader(&w, &write_group); write_thread->ExitAsBatchGroupLeader(write_group, status); return status; } } WriteThread::WriteGroup write_group; uint64_t last_sequence; write_thread->EnterAsBatchGroupLeader(&w, &write_group); // Note: no need to update last_batch_group_size_ here since the batch writes // to WAL only size_t pre_release_callback_cnt = 0; size_t total_byte_size = 0; for (auto* writer : write_group) { if (writer->CheckCallback(this)) { total_byte_size = WriteBatchInternal::AppendedByteSize( total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); if (writer->pre_release_callback) { pre_release_callback_cnt++; } } } const bool concurrent_update = true; // Update stats while we are an exclusive group leader, so we know // that nobody else can be writing to these particular stats. // We're optimistic, updating the stats before we successfully // commit. That lets us release our leader status early. auto stats = default_cf_internal_stats_; stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size, concurrent_update); RecordTick(stats_, BYTES_WRITTEN, total_byte_size); stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1, concurrent_update); RecordTick(stats_, WRITE_DONE_BY_SELF); auto write_done_by_other = write_group.size - 1; if (write_done_by_other > 0) { stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther, write_done_by_other, concurrent_update); RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other); } RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size); PERF_TIMER_STOP(write_pre_and_post_process_time); PERF_TIMER_GUARD(write_wal_time); // LastAllocatedSequence is increased inside WriteToWAL under // wal_write_mutex_ to ensure ordered events in WAL size_t seq_inc = 0 /* total_count */; if (assign_order == kDoAssignOrder) { size_t total_batch_cnt = 0; for (auto* writer : write_group) { assert(writer->batch_cnt || !seq_per_batch_); if (!writer->CallbackFailed()) { total_batch_cnt += writer->batch_cnt; } } seq_inc = total_batch_cnt; } IOStatus io_s; if (!write_options.disableWAL) { io_s = ConcurrentWriteToWAL(write_group, log_used, &last_sequence, seq_inc); status = io_s; } else { // Otherwise we inc seq number to do solely the seq allocation last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc); } size_t memtable_write_cnt = 0; auto curr_seq = last_sequence + 1; for (auto* writer : write_group) { if (writer->CallbackFailed()) { continue; } writer->sequence = curr_seq; if (assign_order == kDoAssignOrder) { assert(writer->batch_cnt || !seq_per_batch_); curr_seq += writer->batch_cnt; } if (!writer->disable_memtable) { memtable_write_cnt++; } // else seq advances only by memtable writes } if (status.ok() && write_options.sync) { assert(!write_options.disableWAL); // Requesting sync with two_write_queues_ is expected to be very rare. We // hance provide a simple implementation that is not necessarily efficient. if (manual_wal_flush_) { status = FlushWAL(true); } else { status = SyncWAL(); } } PERF_TIMER_START(write_pre_and_post_process_time); if (!w.CallbackFailed()) { if (!io_s.ok()) { IOStatusCheck(io_s); } else { WriteStatusCheck(status); } } if (status.ok()) { size_t index = 0; for (auto* writer : write_group) { if (!writer->CallbackFailed() && writer->pre_release_callback) { assert(writer->sequence != kMaxSequenceNumber); Status ws = writer->pre_release_callback->Callback( writer->sequence, disable_memtable, writer->log_used, index++, pre_release_callback_cnt); if (!ws.ok()) { status = ws; break; } } } } if (publish_last_seq == kDoPublishLastSeq) { versions_->SetLastSequence(last_sequence + seq_inc); // Currently we only use kDoPublishLastSeq in unordered_write assert(immutable_db_options_.unordered_write); } if (immutable_db_options_.unordered_write && status.ok()) { pending_memtable_writes_ += memtable_write_cnt; } write_thread->ExitAsBatchGroupLeader(write_group, status); if (status.ok()) { status = w.FinalStatus(); } if (seq_used != nullptr) { *seq_used = w.sequence; } return status; } void DBImpl::WriteStatusCheck(const Status& status) { // Is setting bg_error_ enough here? This will at least stop // compaction and fail any further writes. if (immutable_db_options_.paranoid_checks && !status.ok() && !status.IsBusy() && !status.IsIncomplete()) { mutex_.Lock(); error_handler_.SetBGError(status, BackgroundErrorReason::kWriteCallback); mutex_.Unlock(); } } void DBImpl::IOStatusCheck(const IOStatus& io_status) { // Is setting bg_error_ enough here? This will at least stop // compaction and fail any further writes. if (immutable_db_options_.paranoid_checks && !io_status.ok() && !io_status.IsBusy() && !io_status.IsIncomplete()) { mutex_.Lock(); error_handler_.SetBGError(io_status, BackgroundErrorReason::kWriteCallback); mutex_.Unlock(); } } void DBImpl::MemTableInsertStatusCheck(const Status& status) { // A non-OK status here indicates that the state implied by the // WAL has diverged from the in-memory state. This could be // because of a corrupt write_batch (very bad), or because the // client specified an invalid column family and didn't specify // ignore_missing_column_families. if (!status.ok()) { mutex_.Lock(); assert(!error_handler_.IsBGWorkStopped()); error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable); mutex_.Unlock(); } } Status DBImpl::PreprocessWrite(const WriteOptions& write_options, bool* need_log_sync, WriteContext* write_context) { mutex_.AssertHeld(); assert(write_context != nullptr && need_log_sync != nullptr); Status status; if (error_handler_.IsDBStopped()) { status = error_handler_.GetBGError(); } PERF_TIMER_GUARD(write_scheduling_flushes_compactions_time); assert(!single_column_family_mode_ || versions_->GetColumnFamilySet()->NumberOfColumnFamilies() == 1); if (UNLIKELY(status.ok() && !single_column_family_mode_ && total_log_size_ > GetMaxTotalWalSize())) { WaitForPendingWrites(); status = SwitchWAL(write_context); } if (UNLIKELY(status.ok() && write_buffer_manager_->ShouldFlush())) { // Before a new memtable is added in SwitchMemtable(), // write_buffer_manager_->ShouldFlush() will keep returning true. If another // thread is writing to another DB with the same write buffer, they may also // be flushed. We may end up with flushing much more DBs than needed. It's // suboptimal but still correct. WaitForPendingWrites(); status = HandleWriteBufferFull(write_context); } if (UNLIKELY(status.ok() && !trim_history_scheduler_.Empty())) { status = TrimMemtableHistory(write_context); } if (UNLIKELY(status.ok() && !flush_scheduler_.Empty())) { WaitForPendingWrites(); status = ScheduleFlushes(write_context); } PERF_TIMER_STOP(write_scheduling_flushes_compactions_time); PERF_TIMER_GUARD(write_pre_and_post_process_time); if (UNLIKELY(status.ok() && (write_controller_.IsStopped() || write_controller_.NeedsDelay()))) { PERF_TIMER_STOP(write_pre_and_post_process_time); PERF_TIMER_GUARD(write_delay_time); // We don't know size of curent batch so that we always use the size // for previous one. It might create a fairness issue that expiration // might happen for smaller writes but larger writes can go through. // Can optimize it if it is an issue. status = DelayWrite(last_batch_group_size_, write_options); PERF_TIMER_START(write_pre_and_post_process_time); } if (status.ok() && *need_log_sync) { // Wait until the parallel syncs are finished. Any sync process has to sync // the front log too so it is enough to check the status of front() // We do a while loop since log_sync_cv_ is signalled when any sync is // finished // Note: there does not seem to be a reason to wait for parallel sync at // this early step but it is not important since parallel sync (SyncWAL) and // need_log_sync are usually not used together. while (logs_.front().getting_synced) { log_sync_cv_.Wait(); } for (auto& log : logs_) { assert(!log.getting_synced); // This is just to prevent the logs to be synced by a parallel SyncWAL // call. We will do the actual syncing later after we will write to the // WAL. // Note: there does not seem to be a reason to set this early before we // actually write to the WAL log.getting_synced = true; } } else { *need_log_sync = false; } return status; } WriteBatch* DBImpl::MergeBatch(const WriteThread::WriteGroup& write_group, WriteBatch* tmp_batch, size_t* write_with_wal, WriteBatch** to_be_cached_state) { assert(write_with_wal != nullptr); assert(tmp_batch != nullptr); assert(*to_be_cached_state == nullptr); WriteBatch* merged_batch = nullptr; *write_with_wal = 0; auto* leader = write_group.leader; assert(!leader->disable_wal); // Same holds for all in the batch group if (write_group.size == 1 && !leader->CallbackFailed() && leader->batch->GetWalTerminationPoint().is_cleared()) { // we simply write the first WriteBatch to WAL if the group only // contains one batch, that batch should be written to the WAL, // and the batch is not wanting to be truncated merged_batch = leader->batch; if (WriteBatchInternal::IsLatestPersistentState(merged_batch)) { *to_be_cached_state = merged_batch; } *write_with_wal = 1; } else { // WAL needs all of the batches flattened into a single batch. // We could avoid copying here with an iov-like AddRecord // interface merged_batch = tmp_batch; for (auto writer : write_group) { if (!writer->CallbackFailed()) { WriteBatchInternal::Append(merged_batch, writer->batch, /*WAL_only*/ true); if (WriteBatchInternal::IsLatestPersistentState(writer->batch)) { // We only need to cache the last of such write batch *to_be_cached_state = writer->batch; } (*write_with_wal)++; } } } return merged_batch; } // When two_write_queues_ is disabled, this function is called from the only // write thread. Otherwise this must be called holding log_write_mutex_. IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer, uint64_t* log_used, uint64_t* log_size) { assert(log_size != nullptr); Slice log_entry = WriteBatchInternal::Contents(&merged_batch); *log_size = log_entry.size(); // When two_write_queues_ WriteToWAL has to be protected from concurretn calls // from the two queues anyway and log_write_mutex_ is already held. Otherwise // if manual_wal_flush_ is enabled we need to protect log_writer->AddRecord // from possible concurrent calls via the FlushWAL by the application. const bool needs_locking = manual_wal_flush_ && !two_write_queues_; // Due to performance cocerns of missed branch prediction penalize the new // manual_wal_flush_ feature (by UNLIKELY) instead of the more common case // when we do not need any locking. if (UNLIKELY(needs_locking)) { log_write_mutex_.Lock(); } IOStatus io_s = log_writer->AddRecord(log_entry); if (UNLIKELY(needs_locking)) { log_write_mutex_.Unlock(); } if (log_used != nullptr) { *log_used = logfile_number_; } total_log_size_ += log_entry.size(); // TODO(myabandeh): it might be unsafe to access alive_log_files_.back() here // since alive_log_files_ might be modified concurrently alive_log_files_.back().AddSize(log_entry.size()); log_empty_ = false; return io_s; } IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, log::Writer* log_writer, uint64_t* log_used, bool need_log_sync, bool need_log_dir_sync, SequenceNumber sequence) { IOStatus io_s; assert(!write_group.leader->disable_wal); // Same holds for all in the batch group size_t write_with_wal = 0; WriteBatch* to_be_cached_state = nullptr; WriteBatch* merged_batch = MergeBatch(write_group, &tmp_batch_, &write_with_wal, &to_be_cached_state); if (merged_batch == write_group.leader->batch) { write_group.leader->log_used = logfile_number_; } else if (write_with_wal > 1) { for (auto writer : write_group) { writer->log_used = logfile_number_; } } WriteBatchInternal::SetSequence(merged_batch, sequence); uint64_t log_size; io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size); if (to_be_cached_state) { cached_recoverable_state_ = *to_be_cached_state; cached_recoverable_state_empty_ = false; } if (io_s.ok() && need_log_sync) { StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS); // It's safe to access logs_ with unlocked mutex_ here because: // - we've set getting_synced=true for all logs, // so other threads won't pop from logs_ while we're here, // - only writer thread can push to logs_, and we're in // writer thread, so no one will push to logs_, // - as long as other threads don't modify it, it's safe to read // from std::deque from multiple threads concurrently. for (auto& log : logs_) { io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync); if (!io_s.ok()) { break; } } if (io_s.ok() && need_log_dir_sync) { // We only sync WAL directory the first time WAL syncing is // requested, so that in case users never turn on WAL sync, // we can avoid the disk I/O in the write code path. io_s = directories_.GetWalDir()->Fsync(IOOptions(), nullptr); } } if (merged_batch == &tmp_batch_) { tmp_batch_.Clear(); } if (io_s.ok()) { auto stats = default_cf_internal_stats_; if (need_log_sync) { stats->AddDBStats(InternalStats::kIntStatsWalFileSynced, 1); RecordTick(stats_, WAL_FILE_SYNCED); } stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size); RecordTick(stats_, WAL_FILE_BYTES, log_size); stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal); RecordTick(stats_, WRITE_WITH_WAL, write_with_wal); } return io_s; } IOStatus DBImpl::ConcurrentWriteToWAL( const WriteThread::WriteGroup& write_group, uint64_t* log_used, SequenceNumber* last_sequence, size_t seq_inc) { IOStatus io_s; assert(!write_group.leader->disable_wal); // Same holds for all in the batch group WriteBatch tmp_batch; size_t write_with_wal = 0; WriteBatch* to_be_cached_state = nullptr; WriteBatch* merged_batch = MergeBatch(write_group, &tmp_batch, &write_with_wal, &to_be_cached_state); // We need to lock log_write_mutex_ since logs_ and alive_log_files might be // pushed back concurrently log_write_mutex_.Lock(); if (merged_batch == write_group.leader->batch) { write_group.leader->log_used = logfile_number_; } else if (write_with_wal > 1) { for (auto writer : write_group) { writer->log_used = logfile_number_; } } *last_sequence = versions_->FetchAddLastAllocatedSequence(seq_inc); auto sequence = *last_sequence + 1; WriteBatchInternal::SetSequence(merged_batch, sequence); log::Writer* log_writer = logs_.back().writer; uint64_t log_size; io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size); if (to_be_cached_state) { cached_recoverable_state_ = *to_be_cached_state; cached_recoverable_state_empty_ = false; } log_write_mutex_.Unlock(); if (io_s.ok()) { const bool concurrent = true; auto stats = default_cf_internal_stats_; stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size, concurrent); RecordTick(stats_, WAL_FILE_BYTES, log_size); stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal, concurrent); RecordTick(stats_, WRITE_WITH_WAL, write_with_wal); } return io_s; } Status DBImpl::WriteRecoverableState() { mutex_.AssertHeld(); if (!cached_recoverable_state_empty_) { bool dont_care_bool; SequenceNumber next_seq; if (two_write_queues_) { log_write_mutex_.Lock(); } SequenceNumber seq; if (two_write_queues_) { seq = versions_->FetchAddLastAllocatedSequence(0); } else { seq = versions_->LastSequence(); } WriteBatchInternal::SetSequence(&cached_recoverable_state_, seq + 1); auto status = WriteBatchInternal::InsertInto( &cached_recoverable_state_, column_family_memtables_.get(), &flush_scheduler_, &trim_history_scheduler_, true, 0 /*recovery_log_number*/, this, false /* concurrent_memtable_writes */, &next_seq, &dont_care_bool, seq_per_batch_); auto last_seq = next_seq - 1; if (two_write_queues_) { versions_->FetchAddLastAllocatedSequence(last_seq - seq); versions_->SetLastPublishedSequence(last_seq); } versions_->SetLastSequence(last_seq); if (two_write_queues_) { log_write_mutex_.Unlock(); } if (status.ok() && recoverable_state_pre_release_callback_) { const bool DISABLE_MEMTABLE = true; for (uint64_t sub_batch_seq = seq + 1; sub_batch_seq < next_seq && status.ok(); sub_batch_seq++) { uint64_t const no_log_num = 0; // Unlock it since the callback might end up locking mutex. e.g., // AddCommitted -> AdvanceMaxEvictedSeq -> GetSnapshotListFromDB mutex_.Unlock(); status = recoverable_state_pre_release_callback_->Callback( sub_batch_seq, !DISABLE_MEMTABLE, no_log_num, 0, 1); mutex_.Lock(); } } if (status.ok()) { cached_recoverable_state_.Clear(); cached_recoverable_state_empty_ = true; } return status; } return Status::OK(); } void DBImpl::SelectColumnFamiliesForAtomicFlush( autovector* cfds) { for (ColumnFamilyData* cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) { cfds->push_back(cfd); } } } // Assign sequence number for atomic flush. void DBImpl::AssignAtomicFlushSeq(const autovector& cfds) { assert(immutable_db_options_.atomic_flush); auto seq = versions_->LastSequence(); for (auto cfd : cfds) { cfd->imm()->AssignAtomicFlushSeq(seq); } } Status DBImpl::SwitchWAL(WriteContext* write_context) { mutex_.AssertHeld(); assert(write_context != nullptr); Status status; if (alive_log_files_.begin()->getting_flushed) { return status; } auto oldest_alive_log = alive_log_files_.begin()->number; bool flush_wont_release_oldest_log = false; if (allow_2pc()) { auto oldest_log_with_uncommitted_prep = logs_with_prep_tracker_.FindMinLogContainingOutstandingPrep(); assert(oldest_log_with_uncommitted_prep == 0 || oldest_log_with_uncommitted_prep >= oldest_alive_log); if (oldest_log_with_uncommitted_prep > 0 && oldest_log_with_uncommitted_prep == oldest_alive_log) { if (unable_to_release_oldest_log_) { // we already attempted to flush all column families dependent on // the oldest alive log but the log still contained uncommitted // transactions so there is still nothing that we can do. return status; } else { ROCKS_LOG_WARN( immutable_db_options_.info_log, "Unable to release oldest log due to uncommitted transaction"); unable_to_release_oldest_log_ = true; flush_wont_release_oldest_log = true; } } } if (!flush_wont_release_oldest_log) { // we only mark this log as getting flushed if we have successfully // flushed all data in this log. If this log contains outstanding prepared // transactions then we cannot flush this log until those transactions are // commited. unable_to_release_oldest_log_ = false; alive_log_files_.begin()->getting_flushed = true; } ROCKS_LOG_INFO( immutable_db_options_.info_log, "Flushing all column families with data in WAL number %" PRIu64 ". Total log size is %" PRIu64 " while max_total_wal_size is %" PRIu64, oldest_alive_log, total_log_size_.load(), GetMaxTotalWalSize()); // no need to refcount because drop is happening in write thread, so can't // happen while we're in the write thread autovector cfds; if (immutable_db_options_.atomic_flush) { SelectColumnFamiliesForAtomicFlush(&cfds); } else { for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } if (cfd->OldestLogToKeep() <= oldest_alive_log) { cfds.push_back(cfd); } } MaybeFlushStatsCF(&cfds); } WriteThread::Writer nonmem_w; if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } for (const auto cfd : cfds) { cfd->Ref(); status = SwitchMemtable(cfd, write_context); cfd->UnrefAndTryDelete(); if (!status.ok()) { break; } } if (two_write_queues_) { nonmem_write_thread_.ExitUnbatched(&nonmem_w); } if (status.ok()) { if (immutable_db_options_.atomic_flush) { AssignAtomicFlushSeq(cfds); } for (auto cfd : cfds) { cfd->imm()->FlushRequested(); } FlushRequest flush_req; GenerateFlushRequest(cfds, &flush_req); SchedulePendingFlush(flush_req, FlushReason::kWriteBufferManager); MaybeScheduleFlushOrCompaction(); } return status; } Status DBImpl::HandleWriteBufferFull(WriteContext* write_context) { mutex_.AssertHeld(); assert(write_context != nullptr); Status status; // Before a new memtable is added in SwitchMemtable(), // write_buffer_manager_->ShouldFlush() will keep returning true. If another // thread is writing to another DB with the same write buffer, they may also // be flushed. We may end up with flushing much more DBs than needed. It's // suboptimal but still correct. ROCKS_LOG_INFO( immutable_db_options_.info_log, "Flushing column family with oldest memtable entry. Write buffer is " "using %" ROCKSDB_PRIszt " bytes out of a total of %" ROCKSDB_PRIszt ".", write_buffer_manager_->memory_usage(), write_buffer_manager_->buffer_size()); // no need to refcount because drop is happening in write thread, so can't // happen while we're in the write thread autovector cfds; if (immutable_db_options_.atomic_flush) { SelectColumnFamiliesForAtomicFlush(&cfds); } else { ColumnFamilyData* cfd_picked = nullptr; SequenceNumber seq_num_for_cf_picked = kMaxSequenceNumber; for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } if (!cfd->mem()->IsEmpty()) { // We only consider active mem table, hoping immutable memtable is // already in the process of flushing. uint64_t seq = cfd->mem()->GetCreationSeq(); if (cfd_picked == nullptr || seq < seq_num_for_cf_picked) { cfd_picked = cfd; seq_num_for_cf_picked = seq; } } } if (cfd_picked != nullptr) { cfds.push_back(cfd_picked); } MaybeFlushStatsCF(&cfds); } WriteThread::Writer nonmem_w; if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } for (const auto cfd : cfds) { if (cfd->mem()->IsEmpty()) { continue; } cfd->Ref(); status = SwitchMemtable(cfd, write_context); cfd->UnrefAndTryDelete(); if (!status.ok()) { break; } } if (two_write_queues_) { nonmem_write_thread_.ExitUnbatched(&nonmem_w); } if (status.ok()) { if (immutable_db_options_.atomic_flush) { AssignAtomicFlushSeq(cfds); } for (const auto cfd : cfds) { cfd->imm()->FlushRequested(); } FlushRequest flush_req; GenerateFlushRequest(cfds, &flush_req); SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull); MaybeScheduleFlushOrCompaction(); } return status; } uint64_t DBImpl::GetMaxTotalWalSize() const { mutex_.AssertHeld(); return mutable_db_options_.max_total_wal_size == 0 ? 4 * max_total_in_memory_state_ : mutable_db_options_.max_total_wal_size; } // REQUIRES: mutex_ is held // REQUIRES: this thread is currently at the front of the writer queue Status DBImpl::DelayWrite(uint64_t num_bytes, const WriteOptions& write_options) { uint64_t time_delayed = 0; bool delayed = false; { StopWatch sw(env_, stats_, WRITE_STALL, &time_delayed); uint64_t delay = write_controller_.GetDelay(env_, num_bytes); if (delay > 0) { if (write_options.no_slowdown) { return Status::Incomplete("Write stall"); } TEST_SYNC_POINT("DBImpl::DelayWrite:Sleep"); // Notify write_thread_ about the stall so it can setup a barrier and // fail any pending writers with no_slowdown write_thread_.BeginWriteStall(); TEST_SYNC_POINT("DBImpl::DelayWrite:BeginWriteStallDone"); mutex_.Unlock(); // We will delay the write until we have slept for delay ms or // we don't need a delay anymore const uint64_t kDelayInterval = 1000; uint64_t stall_end = sw.start_time() + delay; while (write_controller_.NeedsDelay()) { if (env_->NowMicros() >= stall_end) { // We already delayed this write `delay` microseconds break; } delayed = true; // Sleep for 0.001 seconds env_->SleepForMicroseconds(kDelayInterval); } mutex_.Lock(); write_thread_.EndWriteStall(); } // Don't wait if there's a background error, even if its a soft error. We // might wait here indefinitely as the background compaction may never // finish successfully, resulting in the stall condition lasting // indefinitely while (error_handler_.GetBGError().ok() && write_controller_.IsStopped()) { if (write_options.no_slowdown) { return Status::Incomplete("Write stall"); } delayed = true; // Notify write_thread_ about the stall so it can setup a barrier and // fail any pending writers with no_slowdown write_thread_.BeginWriteStall(); TEST_SYNC_POINT("DBImpl::DelayWrite:Wait"); bg_cv_.Wait(); write_thread_.EndWriteStall(); } } assert(!delayed || !write_options.no_slowdown); if (delayed) { default_cf_internal_stats_->AddDBStats( InternalStats::kIntStatsWriteStallMicros, time_delayed); RecordTick(stats_, STALL_MICROS, time_delayed); } // If DB is not in read-only mode and write_controller is not stopping // writes, we can ignore any background errors and allow the write to // proceed Status s; if (write_controller_.IsStopped()) { // If writes are still stopped, it means we bailed due to a background // error s = Status::Incomplete(error_handler_.GetBGError().ToString()); } if (error_handler_.IsDBStopped()) { s = error_handler_.GetBGError(); } return s; } Status DBImpl::ThrottleLowPriWritesIfNeeded(const WriteOptions& write_options, WriteBatch* my_batch) { assert(write_options.low_pri); // This is called outside the DB mutex. Although it is safe to make the call, // the consistency condition is not guaranteed to hold. It's OK to live with // it in this case. // If we need to speed compaction, it means the compaction is left behind // and we start to limit low pri writes to a limit. if (write_controller_.NeedSpeedupCompaction()) { if (allow_2pc() && (my_batch->HasCommit() || my_batch->HasRollback())) { // For 2PC, we only rate limit prepare, not commit. return Status::OK(); } if (write_options.no_slowdown) { return Status::Incomplete("Low priority write stall"); } else { assert(my_batch != nullptr); // Rate limit those writes. The reason that we don't completely wait // is that in case the write is heavy, low pri writes may never have // a chance to run. Now we guarantee we are still slowly making // progress. PERF_TIMER_GUARD(write_delay_time); write_controller_.low_pri_rate_limiter()->Request( my_batch->GetDataSize(), Env::IO_HIGH, nullptr /* stats */, RateLimiter::OpType::kWrite); } } return Status::OK(); } void DBImpl::MaybeFlushStatsCF(autovector* cfds) { assert(cfds != nullptr); if (!cfds->empty() && immutable_db_options_.persist_stats_to_disk) { ColumnFamilyData* cfd_stats = versions_->GetColumnFamilySet()->GetColumnFamily( kPersistentStatsColumnFamilyName); if (cfd_stats != nullptr && !cfd_stats->mem()->IsEmpty()) { for (ColumnFamilyData* cfd : *cfds) { if (cfd == cfd_stats) { // stats CF already included in cfds return; } } // force flush stats CF when its log number is less than all other CF's // log numbers bool force_flush_stats_cf = true; for (auto* loop_cfd : *versions_->GetColumnFamilySet()) { if (loop_cfd == cfd_stats) { continue; } if (loop_cfd->GetLogNumber() <= cfd_stats->GetLogNumber()) { force_flush_stats_cf = false; } } if (force_flush_stats_cf) { cfds->push_back(cfd_stats); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Force flushing stats CF with automated flush " "to avoid holding old logs"); } } } } Status DBImpl::TrimMemtableHistory(WriteContext* context) { autovector cfds; ColumnFamilyData* tmp_cfd; while ((tmp_cfd = trim_history_scheduler_.TakeNextColumnFamily()) != nullptr) { cfds.push_back(tmp_cfd); } for (auto& cfd : cfds) { autovector to_delete; cfd->imm()->TrimHistory(&to_delete, cfd->mem()->ApproximateMemoryUsage()); if (!to_delete.empty()) { for (auto m : to_delete) { delete m; } context->superversion_context.NewSuperVersion(); assert(context->superversion_context.new_superversion.get() != nullptr); cfd->InstallSuperVersion(&context->superversion_context, &mutex_); } if (cfd->UnrefAndTryDelete()) { cfd = nullptr; } } return Status::OK(); } Status DBImpl::ScheduleFlushes(WriteContext* context) { autovector cfds; if (immutable_db_options_.atomic_flush) { SelectColumnFamiliesForAtomicFlush(&cfds); for (auto cfd : cfds) { cfd->Ref(); } flush_scheduler_.Clear(); } else { ColumnFamilyData* tmp_cfd; while ((tmp_cfd = flush_scheduler_.TakeNextColumnFamily()) != nullptr) { cfds.push_back(tmp_cfd); } MaybeFlushStatsCF(&cfds); } Status status; WriteThread::Writer nonmem_w; if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } for (auto& cfd : cfds) { if (!cfd->mem()->IsEmpty()) { status = SwitchMemtable(cfd, context); } if (cfd->UnrefAndTryDelete()) { cfd = nullptr; } if (!status.ok()) { break; } } if (two_write_queues_) { nonmem_write_thread_.ExitUnbatched(&nonmem_w); } if (status.ok()) { if (immutable_db_options_.atomic_flush) { AssignAtomicFlushSeq(cfds); } FlushRequest flush_req; GenerateFlushRequest(cfds, &flush_req); SchedulePendingFlush(flush_req, FlushReason::kWriteBufferFull); MaybeScheduleFlushOrCompaction(); } return status; } #ifndef ROCKSDB_LITE void DBImpl::NotifyOnMemTableSealed(ColumnFamilyData* /*cfd*/, const MemTableInfo& mem_table_info) { if (immutable_db_options_.listeners.size() == 0U) { return; } if (shutting_down_.load(std::memory_order_acquire)) { return; } for (auto listener : immutable_db_options_.listeners) { listener->OnMemTableSealed(mem_table_info); } } #endif // ROCKSDB_LITE // REQUIRES: mutex_ is held // REQUIRES: this thread is currently at the front of the writer queue // REQUIRES: this thread is currently at the front of the 2nd writer queue if // two_write_queues_ is true (This is to simplify the reasoning.) Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { mutex_.AssertHeld(); WriteThread::Writer nonmem_w; std::unique_ptr lfile; log::Writer* new_log = nullptr; MemTable* new_mem = nullptr; // Recoverable state is persisted in WAL. After memtable switch, WAL might // be deleted, so we write the state to memtable to be persisted as well. Status s = WriteRecoverableState(); if (!s.ok()) { return s; } // Attempt to switch to a new memtable and trigger flush of old. // Do this without holding the dbmutex lock. assert(versions_->prev_log_number() == 0); if (two_write_queues_) { log_write_mutex_.Lock(); } bool creating_new_log = !log_empty_; if (two_write_queues_) { log_write_mutex_.Unlock(); } uint64_t recycle_log_number = 0; if (creating_new_log && immutable_db_options_.recycle_log_file_num && !log_recycle_files_.empty()) { recycle_log_number = log_recycle_files_.front(); } uint64_t new_log_number = creating_new_log ? versions_->NewFileNumber() : logfile_number_; const MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions(); // Set memtable_info for memtable sealed callback #ifndef ROCKSDB_LITE MemTableInfo memtable_info; memtable_info.cf_name = cfd->GetName(); memtable_info.first_seqno = cfd->mem()->GetFirstSequenceNumber(); memtable_info.earliest_seqno = cfd->mem()->GetEarliestSequenceNumber(); memtable_info.num_entries = cfd->mem()->num_entries(); memtable_info.num_deletes = cfd->mem()->num_deletes(); #endif // ROCKSDB_LITE // Log this later after lock release. It may be outdated, e.g., if background // flush happens before logging, but that should be ok. int num_imm_unflushed = cfd->imm()->NumNotFlushed(); const auto preallocate_block_size = GetWalPreallocateBlockSize(mutable_cf_options.write_buffer_size); mutex_.Unlock(); if (creating_new_log) { // TODO: Write buffer size passed in should be max of all CF's instead // of mutable_cf_options.write_buffer_size. s = CreateWAL(new_log_number, recycle_log_number, preallocate_block_size, &new_log); } if (s.ok()) { SequenceNumber seq = versions_->LastSequence(); new_mem = cfd->ConstructNewMemtable(mutable_cf_options, seq); context->superversion_context.NewSuperVersion(); } ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] New memtable created with log file: #%" PRIu64 ". Immutable memtables: %d.\n", cfd->GetName().c_str(), new_log_number, num_imm_unflushed); mutex_.Lock(); if (recycle_log_number != 0) { // Since renaming the file is done outside DB mutex, we need to ensure // concurrent full purges don't delete the file while we're recycling it. // To achieve that we hold the old log number in the recyclable list until // after it has been renamed. assert(log_recycle_files_.front() == recycle_log_number); log_recycle_files_.pop_front(); } if (s.ok() && creating_new_log) { log_write_mutex_.Lock(); assert(new_log != nullptr); if (!logs_.empty()) { // Alway flush the buffer of the last log before switching to a new one log::Writer* cur_log_writer = logs_.back().writer; s = cur_log_writer->WriteBuffer(); if (!s.ok()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "[%s] Failed to switch from #%" PRIu64 " to #%" PRIu64 " WAL file\n", cfd->GetName().c_str(), cur_log_writer->get_log_number(), new_log_number); } } if (s.ok()) { logfile_number_ = new_log_number; log_empty_ = true; log_dir_synced_ = false; logs_.emplace_back(logfile_number_, new_log); alive_log_files_.push_back(LogFileNumberSize(logfile_number_)); } log_write_mutex_.Unlock(); } if (!s.ok()) { // how do we fail if we're not creating new log? assert(creating_new_log); if (new_mem) { delete new_mem; } if (new_log) { delete new_log; } SuperVersion* new_superversion = context->superversion_context.new_superversion.release(); if (new_superversion != nullptr) { delete new_superversion; } // We may have lost data from the WritableFileBuffer in-memory buffer for // the current log, so treat it as a fatal error and set bg_error error_handler_.SetBGError(s, BackgroundErrorReason::kMemTable); // Read back bg_error in order to get the right severity s = error_handler_.GetBGError(); return s; } for (auto loop_cfd : *versions_->GetColumnFamilySet()) { // all this is just optimization to delete logs that // are no longer needed -- if CF is empty, that means it // doesn't need that particular log to stay alive, so we just // advance the log number. no need to persist this in the manifest if (loop_cfd->mem()->GetFirstSequenceNumber() == 0 && loop_cfd->imm()->NumNotFlushed() == 0) { if (creating_new_log) { loop_cfd->SetLogNumber(logfile_number_); } loop_cfd->mem()->SetCreationSeq(versions_->LastSequence()); } } cfd->mem()->SetNextLogNumber(logfile_number_); cfd->imm()->Add(cfd->mem(), &context->memtables_to_free_); new_mem->Ref(); cfd->SetMemtable(new_mem); InstallSuperVersionAndScheduleWork(cfd, &context->superversion_context, mutable_cf_options); #ifndef ROCKSDB_LITE mutex_.Unlock(); // Notify client that memtable is sealed, now that we have successfully // installed a new memtable NotifyOnMemTableSealed(cfd, memtable_info); mutex_.Lock(); #endif // ROCKSDB_LITE return s; } size_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const { mutex_.AssertHeld(); size_t bsize = static_cast(write_buffer_size / 10 + write_buffer_size); // Some users might set very high write_buffer_size and rely on // max_total_wal_size or other parameters to control the WAL size. if (mutable_db_options_.max_total_wal_size > 0) { bsize = std::min( bsize, static_cast(mutable_db_options_.max_total_wal_size)); } if (immutable_db_options_.db_write_buffer_size > 0) { bsize = std::min(bsize, immutable_db_options_.db_write_buffer_size); } if (immutable_db_options_.write_buffer_manager && immutable_db_options_.write_buffer_manager->enabled()) { bsize = std::min( bsize, immutable_db_options_.write_buffer_manager->buffer_size()); } return bsize; } // Default implementations of convenience methods that subclasses of DB // can call if they wish Status DB::Put(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { if (nullptr == opt.timestamp) { // Pre-allocate size of write batch conservatively. // 8 bytes are taken by header, 4 bytes for count, 1 byte for type, // and we allocate 11 extra bytes for key length, as well as value length. WriteBatch batch(key.size() + value.size() + 24); Status s = batch.Put(column_family, key, value); if (!s.ok()) { return s; } return Write(opt, &batch); } const Slice* ts = opt.timestamp; assert(nullptr != ts); size_t ts_sz = ts->size(); assert(column_family->GetComparator()); assert(ts_sz == column_family->GetComparator()->timestamp_size()); WriteBatch batch(key.size() + ts_sz + value.size() + 24, /*max_bytes=*/0, ts_sz); Status s = batch.Put(column_family, key, value); if (!s.ok()) { return s; } s = batch.AssignTimestamp(*ts); if (!s.ok()) { return s; } return Write(opt, &batch); } Status DB::Delete(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& key) { if (nullptr == opt.timestamp) { WriteBatch batch; Status s = batch.Delete(column_family, key); if (!s.ok()) { return s; } return Write(opt, &batch); } const Slice* ts = opt.timestamp; assert(ts != nullptr); const size_t ts_sz = ts->size(); constexpr size_t kKeyAndValueLenSize = 11; constexpr size_t kWriteBatchOverhead = WriteBatchInternal::kHeader + sizeof(ValueType) + kKeyAndValueLenSize; WriteBatch batch(key.size() + ts_sz + kWriteBatchOverhead, /*max_bytes=*/0, ts_sz); Status s = batch.Delete(column_family, key); if (!s.ok()) { return s; } s = batch.AssignTimestamp(*ts); if (!s.ok()) { return s; } return Write(opt, &batch); } Status DB::SingleDelete(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& key) { WriteBatch batch; batch.SingleDelete(column_family, key); return Write(opt, &batch); } Status DB::DeleteRange(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key) { WriteBatch batch; batch.DeleteRange(column_family, begin_key, end_key); return Write(opt, &batch); } Status DB::Merge(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { WriteBatch batch; Status s = batch.Merge(column_family, key, value); if (!s.ok()) { return s; } return Write(opt, &batch); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_impl/db_secondary_test.cc000066400000000000000000000733501370372246700212520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_impl/db_impl_secondary.h" #include "db/db_test_util.h" #include "port/stack_trace.h" #include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE class DBSecondaryTest : public DBTestBase { public: DBSecondaryTest() : DBTestBase("/db_secondary_test"), secondary_path_(), handles_secondary_(), db_secondary_(nullptr) { secondary_path_ = test::PerThreadDBPath(env_, "/db_secondary_test_secondary"); } ~DBSecondaryTest() override { CloseSecondary(); if (getenv("KEEP_DB") != nullptr) { fprintf(stdout, "Secondary DB is still at %s\n", secondary_path_.c_str()); } else { Options options; options.env = env_; EXPECT_OK(DestroyDB(secondary_path_, options)); } } protected: Status ReopenAsSecondary(const Options& options) { return DB::OpenAsSecondary(options, dbname_, secondary_path_, &db_); } void OpenSecondary(const Options& options); Status TryOpenSecondary(const Options& options); void OpenSecondaryWithColumnFamilies( const std::vector& column_families, const Options& options); void CloseSecondary() { for (auto h : handles_secondary_) { db_secondary_->DestroyColumnFamilyHandle(h); } handles_secondary_.clear(); delete db_secondary_; db_secondary_ = nullptr; } DBImplSecondary* db_secondary_full() { return static_cast(db_secondary_); } void CheckFileTypeCounts(const std::string& dir, int expected_log, int expected_sst, int expected_manifest) const; std::string secondary_path_; std::vector handles_secondary_; DB* db_secondary_; }; void DBSecondaryTest::OpenSecondary(const Options& options) { ASSERT_OK(TryOpenSecondary(options)); } Status DBSecondaryTest::TryOpenSecondary(const Options& options) { Status s = DB::OpenAsSecondary(options, dbname_, secondary_path_, &db_secondary_); return s; } void DBSecondaryTest::OpenSecondaryWithColumnFamilies( const std::vector& column_families, const Options& options) { std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, options); for (const auto& cf_name : column_families) { cf_descs.emplace_back(cf_name, options); } Status s = DB::OpenAsSecondary(options, dbname_, secondary_path_, cf_descs, &handles_secondary_, &db_secondary_); ASSERT_OK(s); } void DBSecondaryTest::CheckFileTypeCounts(const std::string& dir, int expected_log, int expected_sst, int expected_manifest) const { std::vector filenames; env_->GetChildren(dir, &filenames); int log_cnt = 0, sst_cnt = 0, manifest_cnt = 0; for (auto file : filenames) { uint64_t number; FileType type; if (ParseFileName(file, &number, &type)) { log_cnt += (type == kLogFile); sst_cnt += (type == kTableFile); manifest_cnt += (type == kDescriptorFile); } } ASSERT_EQ(expected_log, log_cnt); ASSERT_EQ(expected_sst, sst_cnt); ASSERT_EQ(expected_manifest, manifest_cnt); } TEST_F(DBSecondaryTest, ReopenAsSecondary) { Options options; options.env = env_; Reopen(options); ASSERT_OK(Put("foo", "foo_value")); ASSERT_OK(Put("bar", "bar_value")); ASSERT_OK(dbfull()->Flush(FlushOptions())); Close(); ASSERT_OK(ReopenAsSecondary(options)); ASSERT_EQ("foo_value", Get("foo")); ASSERT_EQ("bar_value", Get("bar")); ReadOptions ropts; ropts.verify_checksums = true; auto db1 = static_cast(db_); ASSERT_NE(nullptr, db1); Iterator* iter = db1->NewIterator(ropts); ASSERT_NE(nullptr, iter); size_t count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { if (0 == count) { ASSERT_EQ("bar", iter->key().ToString()); ASSERT_EQ("bar_value", iter->value().ToString()); } else if (1 == count) { ASSERT_EQ("foo", iter->key().ToString()); ASSERT_EQ("foo_value", iter->value().ToString()); } ++count; } delete iter; ASSERT_EQ(2, count); } TEST_F(DBSecondaryTest, OpenAsSecondary) { Options options; options.env = env_; options.level0_file_num_compaction_trigger = 4; Reopen(options); for (int i = 0; i < 3; ++i) { ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); ASSERT_OK(Flush()); } Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ReadOptions ropts; ropts.verify_checksums = true; const auto verify_db_func = [&](const std::string& foo_val, const std::string& bar_val) { std::string value; ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); ASSERT_EQ(foo_val, value); ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); ASSERT_EQ(bar_val, value); Iterator* iter = db_secondary_->NewIterator(ropts); ASSERT_NE(nullptr, iter); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); ASSERT_EQ(foo_val, iter->value().ToString()); iter->Seek("bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar", iter->key().ToString()); ASSERT_EQ(bar_val, iter->value().ToString()); size_t count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ++count; } ASSERT_EQ(2, count); delete iter; }; verify_db_func("foo_value2", "bar_value2"); ASSERT_OK(Put("foo", "new_foo_value")); ASSERT_OK(Put("bar", "new_bar_value")); ASSERT_OK(Flush()); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); verify_db_func("new_foo_value", "new_bar_value"); } namespace { class TraceFileEnv : public EnvWrapper { public: explicit TraceFileEnv(Env* _target) : EnvWrapper(_target) {} Status NewRandomAccessFile(const std::string& f, std::unique_ptr* r, const EnvOptions& env_options) override { class TracedRandomAccessFile : public RandomAccessFile { public: TracedRandomAccessFile(std::unique_ptr&& target, std::atomic& counter) : target_(std::move(target)), files_closed_(counter) {} ~TracedRandomAccessFile() override { files_closed_.fetch_add(1, std::memory_order_relaxed); } Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { return target_->Read(offset, n, result, scratch); } private: std::unique_ptr target_; std::atomic& files_closed_; }; Status s = target()->NewRandomAccessFile(f, r, env_options); if (s.ok()) { r->reset(new TracedRandomAccessFile(std::move(*r), files_closed_)); } return s; } int files_closed() const { return files_closed_.load(std::memory_order_relaxed); } private: std::atomic files_closed_{0}; }; } // namespace TEST_F(DBSecondaryTest, SecondaryCloseFiles) { Options options; options.env = env_; options.max_open_files = 1; options.disable_auto_compactions = true; Reopen(options); Options options1; std::unique_ptr traced_env(new TraceFileEnv(env_)); options1.env = traced_env.get(); OpenSecondary(options1); static const auto verify_db = [&]() { std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); std::unique_ptr iter2(db_secondary_->NewIterator(ReadOptions())); for (iter1->SeekToFirst(), iter2->SeekToFirst(); iter1->Valid() && iter2->Valid(); iter1->Next(), iter2->Next()) { ASSERT_EQ(iter1->key(), iter2->key()); ASSERT_EQ(iter1->value(), iter2->value()); } ASSERT_FALSE(iter1->Valid()); ASSERT_FALSE(iter2->Valid()); }; ASSERT_OK(Put("a", "value")); ASSERT_OK(Put("c", "value")); ASSERT_OK(Flush()); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); verify_db(); ASSERT_OK(Put("b", "value")); ASSERT_OK(Put("d", "value")); ASSERT_OK(Flush()); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); verify_db(); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); ASSERT_EQ(2, static_cast(traced_env.get())->files_closed()); Status s = db_secondary_->SetDBOptions({{"max_open_files", "-1"}}); ASSERT_TRUE(s.IsNotSupported()); CloseSecondary(); } TEST_F(DBSecondaryTest, OpenAsSecondaryWALTailing) { Options options; options.env = env_; options.level0_file_num_compaction_trigger = 4; Reopen(options); for (int i = 0; i < 3; ++i) { ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); } Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); ReadOptions ropts; ropts.verify_checksums = true; const auto verify_db_func = [&](const std::string& foo_val, const std::string& bar_val) { std::string value; ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); ASSERT_EQ(foo_val, value); ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); ASSERT_EQ(bar_val, value); Iterator* iter = db_secondary_->NewIterator(ropts); ASSERT_NE(nullptr, iter); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); ASSERT_EQ(foo_val, iter->value().ToString()); iter->Seek("bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar", iter->key().ToString()); ASSERT_EQ(bar_val, iter->value().ToString()); size_t count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ++count; } ASSERT_EQ(2, count); delete iter; }; verify_db_func("foo_value2", "bar_value2"); ASSERT_OK(Put("foo", "new_foo_value")); ASSERT_OK(Put("bar", "new_bar_value")); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); verify_db_func("new_foo_value", "new_bar_value"); ASSERT_OK(Flush()); ASSERT_OK(Put("foo", "new_foo_value_1")); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); verify_db_func("new_foo_value_1", "new_bar_value"); } TEST_F(DBSecondaryTest, OpenWithNonExistColumnFamily) { Options options; options.env = env_; CreateAndReopenWithCF({"pikachu"}, options); Options options1; options1.env = env_; options1.max_open_files = -1; std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, options1); cf_descs.emplace_back("pikachu", options1); cf_descs.emplace_back("eevee", options1); Status s = DB::OpenAsSecondary(options1, dbname_, secondary_path_, cf_descs, &handles_secondary_, &db_secondary_); ASSERT_NOK(s); } TEST_F(DBSecondaryTest, OpenWithSubsetOfColumnFamilies) { Options options; options.env = env_; CreateAndReopenWithCF({"pikachu"}, options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); ASSERT_EQ(0, handles_secondary_.size()); ASSERT_NE(nullptr, db_secondary_); ASSERT_OK(Put(0 /*cf*/, "foo", "foo_value")); ASSERT_OK(Put(1 /*cf*/, "foo", "foo_value")); ASSERT_OK(Flush(0 /*cf*/)); ASSERT_OK(Flush(1 /*cf*/)); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); ReadOptions ropts; ropts.verify_checksums = true; std::string value; ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); ASSERT_EQ("foo_value", value); } TEST_F(DBSecondaryTest, SwitchToNewManifestDuringOpen) { Options options; options.env = env_; Reopen(options); Close(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency( {{"ReactiveVersionSet::MaybeSwitchManifest:AfterGetCurrentManifestPath:0", "VersionSet::ProcessManifestWrites:BeforeNewManifest"}, {"VersionSet::ProcessManifestWrites:AfterNewManifest", "ReactiveVersionSet::MaybeSwitchManifest:AfterGetCurrentManifestPath:" "1"}}); SyncPoint::GetInstance()->EnableProcessing(); // Make sure db calls RecoverLogFiles so as to trigger a manifest write, // which causes the db to switch to a new MANIFEST upon start. port::Thread ro_db_thread([&]() { Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); CloseSecondary(); }); Reopen(options); ro_db_thread.join(); } TEST_F(DBSecondaryTest, MissingTableFileDuringOpen) { Options options; options.env = env_; options.level0_file_num_compaction_trigger = 4; Reopen(options); for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) { ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); ASSERT_OK(dbfull()->Flush(FlushOptions())); } ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_OK(dbfull()->TEST_WaitForCompact()); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); ReadOptions ropts; ropts.verify_checksums = true; std::string value; ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); ASSERT_EQ("foo_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), value); ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); ASSERT_EQ("bar_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), value); Iterator* iter = db_secondary_->NewIterator(ropts); ASSERT_NE(nullptr, iter); iter->Seek("bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar", iter->key().ToString()); ASSERT_EQ("bar_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), iter->value().ToString()); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); ASSERT_EQ("foo_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), iter->value().ToString()); size_t count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ++count; } ASSERT_EQ(2, count); delete iter; } TEST_F(DBSecondaryTest, MissingTableFile) { int table_files_not_exist = 0; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "ReactiveVersionSet::ApplyOneVersionEditToBuilder:AfterLoadTableHandlers", [&](void* arg) { Status s = *reinterpret_cast(arg); if (s.IsPathNotFound()) { ++table_files_not_exist; } else if (!s.ok()) { assert(false); // Should not reach here } }); SyncPoint::GetInstance()->EnableProcessing(); Options options; options.env = env_; options.level0_file_num_compaction_trigger = 4; Reopen(options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) { ASSERT_OK(Put("foo", "foo_value" + std::to_string(i))); ASSERT_OK(Put("bar", "bar_value" + std::to_string(i))); ASSERT_OK(dbfull()->Flush(FlushOptions())); } ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_NE(nullptr, db_secondary_full()); ReadOptions ropts; ropts.verify_checksums = true; std::string value; ASSERT_NOK(db_secondary_->Get(ropts, "foo", &value)); ASSERT_NOK(db_secondary_->Get(ropts, "bar", &value)); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); ASSERT_EQ(options.level0_file_num_compaction_trigger, table_files_not_exist); ASSERT_OK(db_secondary_->Get(ropts, "foo", &value)); ASSERT_EQ("foo_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), value); ASSERT_OK(db_secondary_->Get(ropts, "bar", &value)); ASSERT_EQ("bar_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), value); Iterator* iter = db_secondary_->NewIterator(ropts); ASSERT_NE(nullptr, iter); iter->Seek("bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar", iter->key().ToString()); ASSERT_EQ("bar_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), iter->value().ToString()); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); ASSERT_EQ("foo_value" + std::to_string(options.level0_file_num_compaction_trigger - 1), iter->value().ToString()); size_t count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ++count; } ASSERT_EQ(2, count); delete iter; } TEST_F(DBSecondaryTest, PrimaryDropColumnFamily) { Options options; options.env = env_; const std::string kCfName1 = "pikachu"; CreateAndReopenWithCF({kCfName1}, options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondaryWithColumnFamilies({kCfName1}, options1); ASSERT_EQ(2, handles_secondary_.size()); ASSERT_OK(Put(1 /*cf*/, "foo", "foo_val_1")); ASSERT_OK(Flush(1 /*cf*/)); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); ReadOptions ropts; ropts.verify_checksums = true; std::string value; ASSERT_OK(db_secondary_->Get(ropts, handles_secondary_[1], "foo", &value)); ASSERT_EQ("foo_val_1", value); ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); Close(); CheckFileTypeCounts(dbname_, 1, 0, 1); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); value.clear(); ASSERT_OK(db_secondary_->Get(ropts, handles_secondary_[1], "foo", &value)); ASSERT_EQ("foo_val_1", value); } TEST_F(DBSecondaryTest, SwitchManifest) { Options options; options.env = env_; options.level0_file_num_compaction_trigger = 4; Reopen(options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); const int kNumFiles = options.level0_file_num_compaction_trigger - 1; // Keep it smaller than 10 so that key0, key1, ..., key9 are sorted as 0, 1, // ..., 9. const int kNumKeys = 10; // Create two sst for (int i = 0; i != kNumFiles; ++i) { for (int j = 0; j != kNumKeys; ++j) { ASSERT_OK(Put("key" + std::to_string(j), "value_" + std::to_string(i))); } ASSERT_OK(Flush()); } ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); const auto& range_scan_db = [&]() { ReadOptions tmp_ropts; tmp_ropts.total_order_seek = true; tmp_ropts.verify_checksums = true; std::unique_ptr iter(db_secondary_->NewIterator(tmp_ropts)); int cnt = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++cnt) { ASSERT_EQ("key" + std::to_string(cnt), iter->key().ToString()); ASSERT_EQ("value_" + std::to_string(kNumFiles - 1), iter->value().ToString()); } }; range_scan_db(); // While secondary instance still keeps old MANIFEST open, we close primary, // restart primary, performs full compaction, close again, restart again so // that next time secondary tries to catch up with primary, the secondary // will skip the MANIFEST in middle. Reopen(options); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact()); Reopen(options); ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); range_scan_db(); } // Here, "Snapshot" refers to the version edits written by // VersionSet::WriteSnapshot() at the beginning of the new MANIFEST after // switching from the old one. TEST_F(DBSecondaryTest, SkipSnapshotAfterManifestSwitch) { Options options; options.env = env_; options.disable_auto_compactions = true; Reopen(options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); ASSERT_OK(Put("0", "value0")); ASSERT_OK(Flush()); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); std::string value; ReadOptions ropts; ropts.verify_checksums = true; ASSERT_OK(db_secondary_->Get(ropts, "0", &value)); ASSERT_EQ("value0", value); Reopen(options); ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); } TEST_F(DBSecondaryTest, SwitchWAL) { const int kNumKeysPerMemtable = 1; Options options; options.env = env_; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 2; options.memtable_factory.reset( new SpecialSkipListFactory(kNumKeysPerMemtable)); Reopen(options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); const auto& verify_db = [](DB* db1, DB* db2) { ASSERT_NE(nullptr, db1); ASSERT_NE(nullptr, db2); ReadOptions read_opts; read_opts.verify_checksums = true; std::unique_ptr it1(db1->NewIterator(read_opts)); std::unique_ptr it2(db2->NewIterator(read_opts)); it1->SeekToFirst(); it2->SeekToFirst(); for (; it1->Valid() && it2->Valid(); it1->Next(), it2->Next()) { ASSERT_EQ(it1->key(), it2->key()); ASSERT_EQ(it1->value(), it2->value()); } ASSERT_FALSE(it1->Valid()); ASSERT_FALSE(it2->Valid()); for (it1->SeekToFirst(); it1->Valid(); it1->Next()) { std::string value; ASSERT_OK(db2->Get(read_opts, it1->key(), &value)); ASSERT_EQ(it1->value(), value); } for (it2->SeekToFirst(); it2->Valid(); it2->Next()) { std::string value; ASSERT_OK(db1->Get(read_opts, it2->key(), &value)); ASSERT_EQ(it2->value(), value); } }; for (int k = 0; k != 16; ++k) { ASSERT_OK(Put("key" + std::to_string(k), "value" + std::to_string(k))); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); verify_db(dbfull(), db_secondary_); } } TEST_F(DBSecondaryTest, SwitchWALMultiColumnFamilies) { const int kNumKeysPerMemtable = 1; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BackgroundCallFlush:ContextCleanedUp", "DBSecondaryTest::SwitchWALMultipleColumnFamilies:BeforeCatchUp"}}); SyncPoint::GetInstance()->EnableProcessing(); const std::string kCFName1 = "pikachu"; Options options; options.env = env_; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 2; options.memtable_factory.reset( new SpecialSkipListFactory(kNumKeysPerMemtable)); CreateAndReopenWithCF({kCFName1}, options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondaryWithColumnFamilies({kCFName1}, options1); ASSERT_EQ(2, handles_secondary_.size()); const auto& verify_db = [](DB* db1, const std::vector& handles1, DB* db2, const std::vector& handles2) { ASSERT_NE(nullptr, db1); ASSERT_NE(nullptr, db2); ReadOptions read_opts; read_opts.verify_checksums = true; ASSERT_EQ(handles1.size(), handles2.size()); for (size_t i = 0; i != handles1.size(); ++i) { std::unique_ptr it1(db1->NewIterator(read_opts, handles1[i])); std::unique_ptr it2(db2->NewIterator(read_opts, handles2[i])); it1->SeekToFirst(); it2->SeekToFirst(); for (; it1->Valid() && it2->Valid(); it1->Next(), it2->Next()) { ASSERT_EQ(it1->key(), it2->key()); ASSERT_EQ(it1->value(), it2->value()); } ASSERT_FALSE(it1->Valid()); ASSERT_FALSE(it2->Valid()); for (it1->SeekToFirst(); it1->Valid(); it1->Next()) { std::string value; ASSERT_OK(db2->Get(read_opts, handles2[i], it1->key(), &value)); ASSERT_EQ(it1->value(), value); } for (it2->SeekToFirst(); it2->Valid(); it2->Next()) { std::string value; ASSERT_OK(db1->Get(read_opts, handles1[i], it2->key(), &value)); ASSERT_EQ(it2->value(), value); } } }; for (int k = 0; k != 8; ++k) { ASSERT_OK( Put(0 /*cf*/, "key" + std::to_string(k), "value" + std::to_string(k))); ASSERT_OK( Put(1 /*cf*/, "key" + std::to_string(k), "value" + std::to_string(k))); TEST_SYNC_POINT( "DBSecondaryTest::SwitchWALMultipleColumnFamilies:BeforeCatchUp"); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); verify_db(dbfull(), handles_, db_secondary_, handles_secondary_); SyncPoint::GetInstance()->ClearTrace(); } } TEST_F(DBSecondaryTest, CatchUpAfterFlush) { const int kNumKeysPerMemtable = 16; Options options; options.env = env_; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 2; options.memtable_factory.reset( new SpecialSkipListFactory(kNumKeysPerMemtable)); Reopen(options); Options options1; options1.env = env_; options1.max_open_files = -1; OpenSecondary(options1); WriteOptions write_opts; WriteBatch wb; wb.Put("key0", "value0"); wb.Put("key1", "value1"); ASSERT_OK(dbfull()->Write(write_opts, &wb)); ReadOptions read_opts; std::unique_ptr iter1(db_secondary_->NewIterator(read_opts)); iter1->Seek("key0"); ASSERT_FALSE(iter1->Valid()); iter1->Seek("key1"); ASSERT_FALSE(iter1->Valid()); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); iter1->Seek("key0"); ASSERT_FALSE(iter1->Valid()); iter1->Seek("key1"); ASSERT_FALSE(iter1->Valid()); std::unique_ptr iter2(db_secondary_->NewIterator(read_opts)); iter2->Seek("key0"); ASSERT_TRUE(iter2->Valid()); ASSERT_EQ("value0", iter2->value()); iter2->Seek("key1"); ASSERT_TRUE(iter2->Valid()); ASSERT_EQ("value1", iter2->value()); { WriteBatch wb1; wb1.Put("key0", "value01"); wb1.Put("key1", "value11"); ASSERT_OK(dbfull()->Write(write_opts, &wb1)); } { WriteBatch wb2; wb2.Put("key0", "new_value0"); wb2.Delete("key1"); ASSERT_OK(dbfull()->Write(write_opts, &wb2)); } ASSERT_OK(Flush()); ASSERT_OK(db_secondary_->TryCatchUpWithPrimary()); std::unique_ptr iter3(db_secondary_->NewIterator(read_opts)); // iter3 should not see value01 and value11 at all. iter3->Seek("key0"); ASSERT_TRUE(iter3->Valid()); ASSERT_EQ("new_value0", iter3->value()); iter3->Seek("key1"); ASSERT_FALSE(iter3->Valid()); } TEST_F(DBSecondaryTest, CheckConsistencyWhenOpen) { bool called = false; Options options; options.env = env_; options.disable_auto_compactions = true; Reopen(options); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "DBImplSecondary::CheckConsistency:AfterFirstAttempt", [&](void* arg) { ASSERT_NE(nullptr, arg); called = true; auto* s = reinterpret_cast(arg); ASSERT_NOK(*s); }); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::CheckConsistency:AfterGetLiveFilesMetaData", "BackgroundCallCompaction:0"}, {"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles", "DBImpl::CheckConsistency:BeforeGetFileSize"}}); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("a", "value0")); ASSERT_OK(Put("c", "value0")); ASSERT_OK(Flush()); ASSERT_OK(Put("b", "value1")); ASSERT_OK(Put("d", "value1")); ASSERT_OK(Flush()); port::Thread thread([this]() { Options opts; opts.env = env_; opts.max_open_files = -1; OpenSecondary(opts); }); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact()); thread.join(); ASSERT_TRUE(called); } TEST_F(DBSecondaryTest, StartFromInconsistent) { Options options = CurrentOptions(); DestroyAndReopen(options); ASSERT_OK(Put("foo", "value")); ASSERT_OK(Flush()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { ASSERT_NE(nullptr, arg); *(reinterpret_cast(arg)) = Status::Corruption("Inject corruption"); }); SyncPoint::GetInstance()->EnableProcessing(); Options options1; Status s = TryOpenSecondary(options1); ASSERT_TRUE(s.IsCorruption()); } TEST_F(DBSecondaryTest, InconsistencyDuringCatchUp) { Options options = CurrentOptions(); DestroyAndReopen(options); ASSERT_OK(Put("foo", "value")); ASSERT_OK(Flush()); Options options1; OpenSecondary(options1); { std::string value; ASSERT_OK(db_secondary_->Get(ReadOptions(), "foo", &value)); ASSERT_EQ("value", value); } ASSERT_OK(Put("bar", "value1")); ASSERT_OK(Flush()); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { ASSERT_NE(nullptr, arg); *(reinterpret_cast(arg)) = Status::Corruption("Inject corruption"); }); SyncPoint::GetInstance()->EnableProcessing(); Status s = db_secondary_->TryCatchUpWithPrimary(); ASSERT_TRUE(s.IsCorruption()); } #endif //! ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_info_dumper.cc000066400000000000000000000072401370372246700171200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/db_info_dumper.h" #include #include #include #include #include #include "file/filename.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { void DumpDBFileSummary(const ImmutableDBOptions& options, const std::string& dbname) { if (options.info_log == nullptr) { return; } auto* env = options.env; uint64_t number = 0; FileType type = kInfoLogFile; std::vector files; uint64_t file_num = 0; uint64_t file_size; std::string file_info, wal_info; Header(options.info_log, "DB SUMMARY\n"); // Get files in dbname dir if (!env->GetChildren(dbname, &files).ok()) { Error(options.info_log, "Error when reading %s dir\n", dbname.c_str()); } std::sort(files.begin(), files.end()); for (const std::string& file : files) { if (!ParseFileName(file, &number, &type)) { continue; } switch (type) { case kCurrentFile: Header(options.info_log, "CURRENT file: %s\n", file.c_str()); break; case kIdentityFile: Header(options.info_log, "IDENTITY file: %s\n", file.c_str()); break; case kDescriptorFile: env->GetFileSize(dbname + "/" + file, &file_size); Header(options.info_log, "MANIFEST file: %s size: %" PRIu64 " Bytes\n", file.c_str(), file_size); break; case kLogFile: env->GetFileSize(dbname + "/" + file, &file_size); char str[16]; snprintf(str, sizeof(str), "%" PRIu64, file_size); wal_info.append(file).append(" size: "). append(str).append(" ; "); break; case kTableFile: if (++file_num < 10) { file_info.append(file).append(" "); } break; default: break; } } // Get sst files in db_path dir for (auto& db_path : options.db_paths) { if (dbname.compare(db_path.path) != 0) { if (!env->GetChildren(db_path.path, &files).ok()) { Error(options.info_log, "Error when reading %s dir\n", db_path.path.c_str()); continue; } std::sort(files.begin(), files.end()); for (const std::string& file : files) { if (ParseFileName(file, &number, &type)) { if (type == kTableFile && ++file_num < 10) { file_info.append(file).append(" "); } } } } Header(options.info_log, "SST files in %s dir, Total Num: %" PRIu64 ", files: %s\n", db_path.path.c_str(), file_num, file_info.c_str()); file_num = 0; file_info.clear(); } // Get wal file in wal_dir if (dbname.compare(options.wal_dir) != 0) { if (!env->GetChildren(options.wal_dir, &files).ok()) { Error(options.info_log, "Error when reading %s dir\n", options.wal_dir.c_str()); return; } wal_info.clear(); for (const std::string& file : files) { if (ParseFileName(file, &number, &type)) { if (type == kLogFile) { env->GetFileSize(options.wal_dir + "/" + file, &file_size); char str[16]; snprintf(str, sizeof(str), "%" PRIu64, file_size); wal_info.append(file).append(" size: "). append(str).append(" ; "); } } } } Header(options.info_log, "Write Ahead Log file in %s: %s\n", options.wal_dir.c_str(), wal_info.c_str()); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_info_dumper.h000066400000000000000000000007651370372246700167670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "options/db_options.h" namespace ROCKSDB_NAMESPACE { void DumpDBFileSummary(const ImmutableDBOptions& options, const std::string& dbname); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_inplace_update_test.cc000066400000000000000000000133271370372246700206300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { class DBTestInPlaceUpdate : public DBTestBase { public: DBTestInPlaceUpdate() : DBTestBase("/db_inplace_update_test") {} }; TEST_F(DBTestInPlaceUpdate, InPlaceUpdate) { do { Options options = CurrentOptions(); options.create_if_missing = true; options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; options.allow_concurrent_memtable_write = false; Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of smaller size int numValues = 10; for (int i = numValues; i > 0; i--) { std::string value = DummyString(i, 'a'); ASSERT_OK(Put(1, "key", value)); ASSERT_EQ(value, Get(1, "key")); } // Only 1 instance for that key. validateNumberOfEntries(1, 1); } while (ChangeCompactOptions()); } TEST_F(DBTestInPlaceUpdate, InPlaceUpdateLargeNewValue) { do { Options options = CurrentOptions(); options.create_if_missing = true; options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; options.allow_concurrent_memtable_write = false; Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of larger size int numValues = 10; for (int i = 0; i < numValues; i++) { std::string value = DummyString(i, 'a'); ASSERT_OK(Put(1, "key", value)); ASSERT_EQ(value, Get(1, "key")); } // All 10 updates exist in the internal iterator validateNumberOfEntries(numValues, 1); } while (ChangeCompactOptions()); } TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerSize) { do { Options options = CurrentOptions(); options.create_if_missing = true; options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; options.inplace_callback = ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceSmallerSize; options.allow_concurrent_memtable_write = false; Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of smaller size int numValues = 10; ASSERT_OK(Put(1, "key", DummyString(numValues, 'a'))); ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key")); for (int i = numValues; i > 0; i--) { ASSERT_OK(Put(1, "key", DummyString(i, 'a'))); ASSERT_EQ(DummyString(i - 1, 'b'), Get(1, "key")); } // Only 1 instance for that key. validateNumberOfEntries(1, 1); } while (ChangeCompactOptions()); } TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackSmallerVarintSize) { do { Options options = CurrentOptions(); options.create_if_missing = true; options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; options.inplace_callback = ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceSmallerVarintSize; options.allow_concurrent_memtable_write = false; Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of smaller varint size int numValues = 265; ASSERT_OK(Put(1, "key", DummyString(numValues, 'a'))); ASSERT_EQ(DummyString(numValues, 'c'), Get(1, "key")); for (int i = numValues; i > 0; i--) { ASSERT_OK(Put(1, "key", DummyString(i, 'a'))); ASSERT_EQ(DummyString(1, 'b'), Get(1, "key")); } // Only 1 instance for that key. validateNumberOfEntries(1, 1); } while (ChangeCompactOptions()); } TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackLargeNewValue) { do { Options options = CurrentOptions(); options.create_if_missing = true; options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; options.inplace_callback = ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceLargerSize; options.allow_concurrent_memtable_write = false; Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Update key with values of larger size int numValues = 10; for (int i = 0; i < numValues; i++) { ASSERT_OK(Put(1, "key", DummyString(i, 'a'))); ASSERT_EQ(DummyString(i, 'c'), Get(1, "key")); } // No inplace updates. All updates are puts with new seq number // All 10 updates exist in the internal iterator validateNumberOfEntries(numValues, 1); } while (ChangeCompactOptions()); } TEST_F(DBTestInPlaceUpdate, InPlaceUpdateCallbackNoAction) { do { Options options = CurrentOptions(); options.create_if_missing = true; options.inplace_update_support = true; options.env = env_; options.write_buffer_size = 100000; options.inplace_callback = ROCKSDB_NAMESPACE::DBTestInPlaceUpdate::updateInPlaceNoAction; options.allow_concurrent_memtable_write = false; Reopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Callback function requests no actions from db ASSERT_OK(Put(1, "key", DummyString(1, 'a'))); ASSERT_EQ(Get(1, "key"), "NOT_FOUND"); } while (ChangeCompactOptions()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_io_failure_test.cc000066400000000000000000000454431370372246700177750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { class DBIOFailureTest : public DBTestBase { public: DBIOFailureTest() : DBTestBase("/db_io_failure_test") {} }; #ifndef ROCKSDB_LITE // Check that number of files does not grow when writes are dropped TEST_F(DBIOFailureTest, DropWrites) { do { Options options = CurrentOptions(); options.env = env_; options.paranoid_checks = false; Reopen(options); ASSERT_OK(Put("foo", "v1")); ASSERT_EQ("v1", Get("foo")); Compact("a", "z"); const size_t num_files = CountFiles(); // Force out-of-space errors env_->drop_writes_.store(true, std::memory_order_release); env_->sleep_counter_.Reset(); env_->no_slowdown_ = true; for (int i = 0; i < 5; i++) { if (option_config_ != kUniversalCompactionMultiLevel && option_config_ != kUniversalSubcompactions) { for (int level = 0; level < dbfull()->NumberLevels(); level++) { if (level > 0 && level == dbfull()->NumberLevels() - 1) { break; } dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr, true /* disallow trivial move */); } } else { dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); } } std::string property_value; ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); ASSERT_EQ("5", property_value); env_->drop_writes_.store(false, std::memory_order_release); ASSERT_LT(CountFiles(), num_files + 3); // Check that compaction attempts slept after errors // TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler // versions ASSERT_GE(env_->sleep_counter_.Read(), 4); } while (ChangeCompactOptions()); } // Check background error counter bumped on flush failures. TEST_F(DBIOFailureTest, DropWritesFlush) { do { Options options = CurrentOptions(); options.env = env_; options.max_background_flushes = 1; Reopen(options); ASSERT_OK(Put("foo", "v1")); // Force out-of-space errors env_->drop_writes_.store(true, std::memory_order_release); std::string property_value; // Background error count is 0 now. ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); ASSERT_EQ("0", property_value); dbfull()->TEST_FlushMemTable(true); ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); ASSERT_EQ("1", property_value); env_->drop_writes_.store(false, std::memory_order_release); } while (ChangeCompactOptions()); } // Check that CompactRange() returns failure if there is not enough space left // on device TEST_F(DBIOFailureTest, NoSpaceCompactRange) { do { Options options = CurrentOptions(); options.env = env_; options.disable_auto_compactions = true; Reopen(options); // generate 5 tables for (int i = 0; i < 5; ++i) { ASSERT_OK(Put(Key(i), Key(i) + "v")); ASSERT_OK(Flush()); } // Force out-of-space errors env_->no_space_.store(true, std::memory_order_release); Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow trivial move */); ASSERT_TRUE(s.IsIOError()); ASSERT_TRUE(s.IsNoSpace()); env_->no_space_.store(false, std::memory_order_release); } while (ChangeCompactOptions()); } #endif // ROCKSDB_LITE TEST_F(DBIOFailureTest, NonWritableFileSystem) { do { Options options = CurrentOptions(); options.write_buffer_size = 4096; options.arena_block_size = 4096; options.env = env_; Reopen(options); ASSERT_OK(Put("foo", "v1")); env_->non_writeable_rate_.store(100); std::string big(100000, 'x'); int errors = 0; for (int i = 0; i < 20; i++) { if (!Put("foo", big).ok()) { errors++; env_->SleepForMicroseconds(100000); } } ASSERT_GT(errors, 0); env_->non_writeable_rate_.store(0); } while (ChangeCompactOptions()); } #ifndef ROCKSDB_LITE TEST_F(DBIOFailureTest, ManifestWriteError) { // Test for the following problem: // (a) Compaction produces file F // (b) Log record containing F is written to MANIFEST file, but Sync() fails // (c) GC deletes F // (d) After reopening DB, reads fail since deleted F is named in log record // We iterate twice. In the second iteration, everything is the // same except the log record never makes it to the MANIFEST file. for (int iter = 0; iter < 2; iter++) { std::atomic* error_type = (iter == 0) ? &env_->manifest_sync_error_ : &env_->manifest_write_error_; // Insert foo=>bar mapping Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_EQ("bar", Get("foo")); // Memtable compaction (will succeed) Flush(); ASSERT_EQ("bar", Get("foo")); const int last = 2; MoveFilesToLevel(2); ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level // Merging compaction (will fail) error_type->store(true, std::memory_order_release); dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail ASSERT_EQ("bar", Get("foo")); error_type->store(false, std::memory_order_release); // Since paranoid_checks=true, writes should fail ASSERT_NOK(Put("foo2", "bar2")); // Recovery: should not lose data ASSERT_EQ("bar", Get("foo")); // Try again with paranoid_checks=false Close(); options.paranoid_checks = false; Reopen(options); // Merging compaction (will fail) error_type->store(true, std::memory_order_release); dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail ASSERT_EQ("bar", Get("foo")); // Recovery: should not lose data error_type->store(false, std::memory_order_release); Reopen(options); ASSERT_EQ("bar", Get("foo")); // Since paranoid_checks=false, writes should succeed ASSERT_OK(Put("foo2", "bar2")); ASSERT_EQ("bar", Get("foo")); ASSERT_EQ("bar2", Get("foo2")); } } TEST_F(DBIOFailureTest, PutFailsParanoid) { // Test the following: // (a) A random put fails in paranoid mode (simulate by sync fail) // (b) All other puts have to fail, even if writes would succeed // (c) All of that should happen ONLY if paranoid_checks = true Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Status s; ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "foo1", "bar1")); // simulate error env_->log_write_error_.store(true, std::memory_order_release); s = Put(1, "foo2", "bar2"); ASSERT_TRUE(!s.ok()); env_->log_write_error_.store(false, std::memory_order_release); s = Put(1, "foo3", "bar3"); // the next put should fail, too ASSERT_TRUE(!s.ok()); // but we're still able to read ASSERT_EQ("bar", Get(1, "foo")); // do the same thing with paranoid checks off options.paranoid_checks = false; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "foo1", "bar1")); // simulate error env_->log_write_error_.store(true, std::memory_order_release); s = Put(1, "foo2", "bar2"); ASSERT_TRUE(!s.ok()); env_->log_write_error_.store(false, std::memory_order_release); s = Put(1, "foo3", "bar3"); // the next put should NOT fail ASSERT_TRUE(s.ok()); } #if !(defined NDEBUG) || !defined(OS_WIN) TEST_F(DBIOFailureTest, FlushSstRangeSyncError) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; options.write_buffer_size = 256 * 1024 * 1024; options.writable_file_max_buffer_size = 128 * 1024; options.bytes_per_sync = 128 * 1024; options.level0_file_num_compaction_trigger = 4; options.memtable_factory.reset(new SpecialSkipListFactory(10)); BlockBasedTableOptions table_options; table_options.filter_policy.reset(NewBloomFilterPolicy(10)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Status s; std::atomic range_sync_called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SpecialEnv::SStableFile::RangeSync", [&](void* arg) { if (range_sync_called.fetch_add(1) == 0) { Status* st = static_cast(arg); *st = Status::IOError("range sync dummy error"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); std::string rnd_str = RandomString(&rnd, static_cast(options.bytes_per_sync / 2)); std::string rnd_str_512kb = RandomString(&rnd, 512 * 1024); ASSERT_OK(Put(1, "foo", "bar")); // First 1MB doesn't get range synced ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb)); ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb)); ASSERT_OK(Put(1, "foo1_1", rnd_str)); ASSERT_OK(Put(1, "foo1_2", rnd_str)); ASSERT_OK(Put(1, "foo1_3", rnd_str)); ASSERT_OK(Put(1, "foo2", "bar")); ASSERT_OK(Put(1, "foo3_1", rnd_str)); ASSERT_OK(Put(1, "foo3_2", rnd_str)); ASSERT_OK(Put(1, "foo3_3", rnd_str)); ASSERT_OK(Put(1, "foo4", "bar")); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); // Following writes should fail as flush failed. ASSERT_NOK(Put(1, "foo2", "bar3")); ASSERT_EQ("bar", Get(1, "foo")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_GE(1, range_sync_called.load()); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("bar", Get(1, "foo")); } TEST_F(DBIOFailureTest, CompactSstRangeSyncError) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; options.write_buffer_size = 256 * 1024 * 1024; options.writable_file_max_buffer_size = 128 * 1024; options.bytes_per_sync = 128 * 1024; options.level0_file_num_compaction_trigger = 2; options.target_file_size_base = 256 * 1024 * 1024; options.disable_auto_compactions = true; BlockBasedTableOptions table_options; table_options.filter_policy.reset(NewBloomFilterPolicy(10)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Status s; Random rnd(301); std::string rnd_str = RandomString(&rnd, static_cast(options.bytes_per_sync / 2)); std::string rnd_str_512kb = RandomString(&rnd, 512 * 1024); ASSERT_OK(Put(1, "foo", "bar")); // First 1MB doesn't get range synced ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb)); ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb)); ASSERT_OK(Put(1, "foo1_1", rnd_str)); ASSERT_OK(Put(1, "foo1_2", rnd_str)); ASSERT_OK(Put(1, "foo1_3", rnd_str)); Flush(1); ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "foo3_1", rnd_str)); ASSERT_OK(Put(1, "foo3_2", rnd_str)); ASSERT_OK(Put(1, "foo3_3", rnd_str)); ASSERT_OK(Put(1, "foo4", "bar")); Flush(1); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); std::atomic range_sync_called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SpecialEnv::SStableFile::RangeSync", [&](void* arg) { if (range_sync_called.fetch_add(1) == 0) { Status* st = static_cast(arg); *st = Status::IOError("range sync dummy error"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(dbfull()->SetOptions(handles_[1], { {"disable_auto_compactions", "false"}, })); dbfull()->TEST_WaitForCompact(); // Following writes should fail as flush failed. ASSERT_NOK(Put(1, "foo2", "bar3")); ASSERT_EQ("bar", Get(1, "foo")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_GE(1, range_sync_called.load()); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("bar", Get(1, "foo")); } TEST_F(DBIOFailureTest, FlushSstCloseError) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; options.level0_file_num_compaction_trigger = 4; options.memtable_factory.reset(new SpecialSkipListFactory(2)); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Status s; std::atomic close_called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SpecialEnv::SStableFile::Close", [&](void* arg) { if (close_called.fetch_add(1) == 0) { Status* st = static_cast(arg); *st = Status::IOError("close dummy error"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "foo1", "bar1")); ASSERT_OK(Put(1, "foo", "bar2")); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); // Following writes should fail as flush failed. ASSERT_NOK(Put(1, "foo2", "bar3")); ASSERT_EQ("bar2", Get(1, "foo")); ASSERT_EQ("bar1", Get(1, "foo1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("bar2", Get(1, "foo")); ASSERT_EQ("bar1", Get(1, "foo1")); } TEST_F(DBIOFailureTest, CompactionSstCloseError) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; options.level0_file_num_compaction_trigger = 2; options.disable_auto_compactions = true; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Status s; ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "foo2", "bar")); Flush(1); ASSERT_OK(Put(1, "foo", "bar2")); ASSERT_OK(Put(1, "foo2", "bar")); Flush(1); ASSERT_OK(Put(1, "foo", "bar3")); ASSERT_OK(Put(1, "foo2", "bar")); Flush(1); dbfull()->TEST_WaitForCompact(); std::atomic close_called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SpecialEnv::SStableFile::Close", [&](void* arg) { if (close_called.fetch_add(1) == 0) { Status* st = static_cast(arg); *st = Status::IOError("close dummy error"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(dbfull()->SetOptions(handles_[1], { {"disable_auto_compactions", "false"}, })); dbfull()->TEST_WaitForCompact(); // Following writes should fail as compaction failed. ASSERT_NOK(Put(1, "foo2", "bar3")); ASSERT_EQ("bar3", Get(1, "foo")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("bar3", Get(1, "foo")); } TEST_F(DBIOFailureTest, FlushSstSyncError) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; options.use_fsync = false; options.level0_file_num_compaction_trigger = 4; options.memtable_factory.reset(new SpecialSkipListFactory(2)); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Status s; std::atomic sync_called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SpecialEnv::SStableFile::Sync", [&](void* arg) { if (sync_called.fetch_add(1) == 0) { Status* st = static_cast(arg); *st = Status::IOError("sync dummy error"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "foo1", "bar1")); ASSERT_OK(Put(1, "foo", "bar2")); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); // Following writes should fail as flush failed. ASSERT_NOK(Put(1, "foo2", "bar3")); ASSERT_EQ("bar2", Get(1, "foo")); ASSERT_EQ("bar1", Get(1, "foo1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("bar2", Get(1, "foo")); ASSERT_EQ("bar1", Get(1, "foo1")); } TEST_F(DBIOFailureTest, CompactionSstSyncError) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.error_if_exists = false; options.paranoid_checks = true; options.level0_file_num_compaction_trigger = 2; options.disable_auto_compactions = true; options.use_fsync = false; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Status s; ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "foo2", "bar")); Flush(1); ASSERT_OK(Put(1, "foo", "bar2")); ASSERT_OK(Put(1, "foo2", "bar")); Flush(1); ASSERT_OK(Put(1, "foo", "bar3")); ASSERT_OK(Put(1, "foo2", "bar")); Flush(1); dbfull()->TEST_WaitForCompact(); std::atomic sync_called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SpecialEnv::SStableFile::Sync", [&](void* arg) { if (sync_called.fetch_add(1) == 0) { Status* st = static_cast(arg); *st = Status::IOError("close dummy error"); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(dbfull()->SetOptions(handles_[1], { {"disable_auto_compactions", "false"}, })); dbfull()->TEST_WaitForCompact(); // Following writes should fail as compaction failed. ASSERT_NOK(Put(1, "foo2", "bar3")); ASSERT_EQ("bar3", Get(1, "foo")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ("bar3", Get(1, "foo")); } #endif // !(defined NDEBUG) || !defined(OS_WIN) #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_iter.cc000066400000000000000000001410411370372246700155520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_iter.h" #include #include #include #include "db/dbformat.h" #include "db/merge_context.h" #include "db/merge_helper.h" #include "db/pinned_iterators_manager.h" #include "file/filename.h" #include "logging/logging.h" #include "memory/arena.h" #include "monitoring/perf_context_imp.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "table/internal_iterator.h" #include "table/iterator_wrapper.h" #include "trace_replay/trace_replay.h" #include "util/mutexlock.h" #include "util/string_util.h" #include "util/user_comparator_wrapper.h" namespace ROCKSDB_NAMESPACE { DBIter::DBIter(Env* _env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const Comparator* cmp, InternalIterator* iter, SequenceNumber s, bool arena_mode, uint64_t max_sequential_skip_in_iterations, ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, bool allow_blob) : prefix_extractor_(mutable_cf_options.prefix_extractor.get()), env_(_env), logger_(cf_options.info_log), user_comparator_(cmp), merge_operator_(cf_options.merge_operator), iter_(iter), read_callback_(read_callback), sequence_(s), statistics_(cf_options.statistics), max_skip_(max_sequential_skip_in_iterations), max_skippable_internal_keys_(read_options.max_skippable_internal_keys), num_internal_keys_skipped_(0), iterate_lower_bound_(read_options.iterate_lower_bound), iterate_upper_bound_(read_options.iterate_upper_bound), direction_(kForward), valid_(false), current_entry_is_merged_(false), is_key_seqnum_zero_(false), prefix_same_as_start_(mutable_cf_options.prefix_extractor ? read_options.prefix_same_as_start : false), pin_thru_lifetime_(read_options.pin_data), expect_total_order_inner_iter_(prefix_extractor_ == nullptr || read_options.total_order_seek || read_options.auto_prefix_mode), allow_blob_(allow_blob), is_blob_(false), arena_mode_(arena_mode), range_del_agg_(&cf_options.internal_comparator, s), db_impl_(db_impl), cfd_(cfd), start_seqnum_(read_options.iter_start_seqnum), timestamp_ub_(read_options.timestamp), timestamp_lb_(read_options.iter_start_ts), timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0) { RecordTick(statistics_, NO_ITERATOR_CREATED); if (pin_thru_lifetime_) { pinned_iters_mgr_.StartPinning(); } if (iter_.iter()) { iter_.iter()->SetPinnedItersMgr(&pinned_iters_mgr_); } assert(timestamp_size_ == user_comparator_.timestamp_size()); } Status DBIter::GetProperty(std::string prop_name, std::string* prop) { if (prop == nullptr) { return Status::InvalidArgument("prop is nullptr"); } if (prop_name == "rocksdb.iterator.super-version-number") { // First try to pass the value returned from inner iterator. return iter_.iter()->GetProperty(prop_name, prop); } else if (prop_name == "rocksdb.iterator.is-key-pinned") { if (valid_) { *prop = (pin_thru_lifetime_ && saved_key_.IsKeyPinned()) ? "1" : "0"; } else { *prop = "Iterator is not valid."; } return Status::OK(); } else if (prop_name == "rocksdb.iterator.internal-key") { *prop = saved_key_.GetUserKey().ToString(); return Status::OK(); } return Status::InvalidArgument("Unidentified property."); } bool DBIter::ParseKey(ParsedInternalKey* ikey) { if (!ParseInternalKey(iter_.key(), ikey)) { status_ = Status::Corruption("corrupted internal key in DBIter"); valid_ = false; ROCKS_LOG_ERROR(logger_, "corrupted internal key in DBIter: %s", iter_.key().ToString(true).c_str()); return false; } else { return true; } } void DBIter::Next() { assert(valid_); assert(status_.ok()); PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, env_); // Release temporarily pinned blocks from last operation ReleaseTempPinnedData(); local_stats_.skip_count_ += num_internal_keys_skipped_; local_stats_.skip_count_--; num_internal_keys_skipped_ = 0; bool ok = true; if (direction_ == kReverse) { is_key_seqnum_zero_ = false; if (!ReverseToForward()) { ok = false; } } else if (!current_entry_is_merged_) { // If the current value is not a merge, the iter position is the // current key, which is already returned. We can safely issue a // Next() without checking the current key. // If the current key is a merge, very likely iter already points // to the next internal position. assert(iter_.Valid()); iter_.Next(); PERF_COUNTER_ADD(internal_key_skipped_count, 1); } local_stats_.next_count_++; if (ok && iter_.Valid()) { Slice prefix; if (prefix_same_as_start_) { assert(prefix_extractor_ != nullptr); prefix = prefix_.GetUserKey(); } FindNextUserEntry(true /* skipping the current user key */, prefix_same_as_start_ ? &prefix : nullptr); } else { is_key_seqnum_zero_ = false; valid_ = false; } if (statistics_ != nullptr && valid_) { local_stats_.next_found_count_++; local_stats_.bytes_read_ += (key().size() + value().size()); } } // PRE: saved_key_ has the current user key if skipping_saved_key // POST: saved_key_ should have the next user key if valid_, // if the current entry is a result of merge // current_entry_is_merged_ => true // saved_value_ => the merged value // // NOTE: In between, saved_key_ can point to a user key that has // a delete marker or a sequence number higher than sequence_ // saved_key_ MUST have a proper user_key before calling this function // // The prefix parameter, if not null, indicates that we need to iterate // within the prefix, and the iterator needs to be made invalid, if no // more entry for the prefix can be found. bool DBIter::FindNextUserEntry(bool skipping_saved_key, const Slice* prefix) { PERF_TIMER_GUARD(find_next_user_entry_time); return FindNextUserEntryInternal(skipping_saved_key, prefix); } // Actual implementation of DBIter::FindNextUserEntry() bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key, const Slice* prefix) { // Loop until we hit an acceptable entry to yield assert(iter_.Valid()); assert(status_.ok()); assert(direction_ == kForward); current_entry_is_merged_ = false; // How many times in a row we have skipped an entry with user key less than // or equal to saved_key_. We could skip these entries either because // sequence numbers were too high or because skipping_saved_key = true. // What saved_key_ contains throughout this method: // - if skipping_saved_key : saved_key_ contains the key that we need // to skip, and we haven't seen any keys greater // than that, // - if num_skipped > 0 : saved_key_ contains the key that we have skipped // num_skipped times, and we haven't seen any keys // greater than that, // - none of the above : saved_key_ can contain anything, it doesn't // matter. uint64_t num_skipped = 0; // For write unprepared, the target sequence number in reseek could be larger // than the snapshot, and thus needs to be skipped again. This could result in // an infinite loop of reseeks. To avoid that, we limit the number of reseeks // to one. bool reseek_done = false; is_blob_ = false; do { // Will update is_key_seqnum_zero_ as soon as we parsed the current key // but we need to save the previous value to be used in the loop. bool is_prev_key_seqnum_zero = is_key_seqnum_zero_; if (!ParseKey(&ikey_)) { is_key_seqnum_zero_ = false; return false; } is_key_seqnum_zero_ = (ikey_.sequence == 0); assert(iterate_upper_bound_ == nullptr || iter_.MayBeOutOfUpperBound() || user_comparator_.CompareWithoutTimestamp( ikey_.user_key, /*a_has_ts=*/true, *iterate_upper_bound_, /*b_has_ts=*/false) < 0); if (iterate_upper_bound_ != nullptr && iter_.MayBeOutOfUpperBound() && user_comparator_.CompareWithoutTimestamp( ikey_.user_key, /*a_has_ts=*/true, *iterate_upper_bound_, /*b_has_ts=*/false) >= 0) { break; } assert(prefix == nullptr || prefix_extractor_ != nullptr); if (prefix != nullptr && prefix_extractor_->Transform(ikey_.user_key).compare(*prefix) != 0) { assert(prefix_same_as_start_); break; } if (TooManyInternalKeysSkipped()) { return false; } assert(ikey_.user_key.size() >= timestamp_size_); Slice ts; bool more_recent = false; if (timestamp_size_ > 0) { ts = ExtractTimestampFromUserKey(ikey_.user_key, timestamp_size_); } if (IsVisible(ikey_.sequence, ts, &more_recent)) { // If the previous entry is of seqnum 0, the current entry will not // possibly be skipped. This condition can potentially be relaxed to // prev_key.seq <= ikey_.sequence. We are cautious because it will be more // prone to bugs causing the same user key with the same sequence number. // Note that with current timestamp implementation, the same user key can // have different timestamps and zero sequence number on the bottommost // level. This may change in the future. if ((!is_prev_key_seqnum_zero || timestamp_size_ > 0) && skipping_saved_key && CompareKeyForSkip(ikey_.user_key, saved_key_.GetUserKey()) <= 0) { num_skipped++; // skip this entry PERF_COUNTER_ADD(internal_key_skipped_count, 1); } else { assert(!skipping_saved_key || CompareKeyForSkip(ikey_.user_key, saved_key_.GetUserKey()) > 0); if (!iter_.PrepareValue()) { assert(!iter_.status().ok()); valid_ = false; return false; } num_skipped = 0; reseek_done = false; switch (ikey_.type) { case kTypeDeletion: case kTypeDeletionWithTimestamp: case kTypeSingleDeletion: // Arrange to skip all upcoming entries for this key since // they are hidden by this deletion. // if iterartor specified start_seqnum we // 1) return internal key, including the type // 2) return ikey only if ikey.seqnum >= start_seqnum_ // note that if deletion seqnum is < start_seqnum_ we // just skip it like in normal iterator. if (start_seqnum_ > 0 && ikey_.sequence >= start_seqnum_) { saved_key_.SetInternalKey(ikey_); valid_ = true; return true; } else { saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); skipping_saved_key = true; PERF_COUNTER_ADD(internal_delete_skipped_count, 1); } break; case kTypeValue: case kTypeBlobIndex: if (start_seqnum_ > 0) { // we are taking incremental snapshot here // incremental snapshots aren't supported on DB with range deletes assert(ikey_.type != kTypeBlobIndex); if (ikey_.sequence >= start_seqnum_) { saved_key_.SetInternalKey(ikey_); valid_ = true; return true; } else { // this key and all previous versions shouldn't be included, // skipping_saved_key saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); skipping_saved_key = true; } } else { saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); if (range_del_agg_.ShouldDelete( ikey_, RangeDelPositioningMode::kForwardTraversal)) { // Arrange to skip all upcoming entries for this key since // they are hidden by this deletion. skipping_saved_key = true; num_skipped = 0; reseek_done = false; PERF_COUNTER_ADD(internal_delete_skipped_count, 1); } else if (ikey_.type == kTypeBlobIndex) { if (!allow_blob_) { ROCKS_LOG_ERROR(logger_, "Encounter unexpected blob index."); status_ = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); valid_ = false; return false; } is_blob_ = true; valid_ = true; return true; } else { valid_ = true; return true; } } break; case kTypeMerge: saved_key_.SetUserKey( ikey_.user_key, !pin_thru_lifetime_ || !iter_.iter()->IsKeyPinned() /* copy */); if (range_del_agg_.ShouldDelete( ikey_, RangeDelPositioningMode::kForwardTraversal)) { // Arrange to skip all upcoming entries for this key since // they are hidden by this deletion. skipping_saved_key = true; num_skipped = 0; reseek_done = false; PERF_COUNTER_ADD(internal_delete_skipped_count, 1); } else { // By now, we are sure the current ikey is going to yield a // value current_entry_is_merged_ = true; valid_ = true; return MergeValuesNewToOld(); // Go to a different state machine } break; default: assert(false); break; } } } else { if (more_recent) { PERF_COUNTER_ADD(internal_recent_skipped_count, 1); } // This key was inserted after our snapshot was taken or skipped by // timestamp range. If this happens too many times in a row for the same // user key, we want to seek to the target sequence number. int cmp = user_comparator_.CompareWithoutTimestamp( ikey_.user_key, saved_key_.GetUserKey()); if (cmp == 0 || (skipping_saved_key && cmp < 0)) { num_skipped++; } else { saved_key_.SetUserKey( ikey_.user_key, !iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */); skipping_saved_key = false; num_skipped = 0; reseek_done = false; } } // If we have sequentially iterated via numerous equal keys, then it's // better to seek so that we can avoid too many key comparisons. // // To avoid infinite loops, do not reseek if we have already attempted to // reseek previously. // // TODO(lth): If we reseek to sequence number greater than ikey_.sequence, // then it does not make sense to reseek as we would actually land further // away from the desired key. There is opportunity for optimization here. if (num_skipped > max_skip_ && !reseek_done) { is_key_seqnum_zero_ = false; num_skipped = 0; reseek_done = true; std::string last_key; if (skipping_saved_key) { // We're looking for the next user-key but all we see are the same // user-key with decreasing sequence numbers. Fast forward to // sequence number 0 and type deletion (the smallest type). if (timestamp_size_ == 0) { AppendInternalKey( &last_key, ParsedInternalKey(saved_key_.GetUserKey(), 0, kTypeDeletion)); } else { std::string min_ts(timestamp_size_, static_cast(0)); AppendInternalKeyWithDifferentTimestamp( &last_key, ParsedInternalKey(saved_key_.GetUserKey(), 0, kTypeDeletion), min_ts); } // Don't set skipping_saved_key = false because we may still see more // user-keys equal to saved_key_. } else { // We saw multiple entries with this user key and sequence numbers // higher than sequence_. Fast forward to sequence_. // Note that this only covers a case when a higher key was overwritten // many times since our snapshot was taken, not the case when a lot of // different keys were inserted after our snapshot was taken. if (timestamp_size_ == 0) { AppendInternalKey( &last_key, ParsedInternalKey(saved_key_.GetUserKey(), sequence_, kValueTypeForSeek)); } else { AppendInternalKeyWithDifferentTimestamp( &last_key, ParsedInternalKey(saved_key_.GetUserKey(), sequence_, kValueTypeForSeek), *timestamp_ub_); } } iter_.Seek(last_key); RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION); } else { iter_.Next(); } } while (iter_.Valid()); valid_ = false; return iter_.status().ok(); } // Merge values of the same user key starting from the current iter_ position // Scan from the newer entries to older entries. // PRE: iter_.key() points to the first merge type entry // saved_key_ stores the user key // iter_.PrepareValue() has been called // POST: saved_value_ has the merged value for the user key // iter_ points to the next entry (or invalid) bool DBIter::MergeValuesNewToOld() { if (!merge_operator_) { ROCKS_LOG_ERROR(logger_, "Options::merge_operator is null."); status_ = Status::InvalidArgument("merge_operator_ must be set."); valid_ = false; return false; } // Temporarily pin the blocks that hold merge operands TempPinData(); merge_context_.Clear(); // Start the merge process by pushing the first operand merge_context_.PushOperand( iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */); TEST_SYNC_POINT("DBIter::MergeValuesNewToOld:PushedFirstOperand"); ParsedInternalKey ikey; Status s; for (iter_.Next(); iter_.Valid(); iter_.Next()) { TEST_SYNC_POINT("DBIter::MergeValuesNewToOld:SteppedToNextOperand"); if (!ParseKey(&ikey)) { return false; } if (!user_comparator_.Equal(ikey.user_key, saved_key_.GetUserKey())) { // hit the next user key, stop right here break; } if (kTypeDeletion == ikey.type || kTypeSingleDeletion == ikey.type || range_del_agg_.ShouldDelete( ikey, RangeDelPositioningMode::kForwardTraversal)) { // hit a delete with the same user key, stop right here // iter_ is positioned after delete iter_.Next(); break; } if (!iter_.PrepareValue()) { valid_ = false; return false; } if (kTypeValue == ikey.type) { // hit a put, merge the put value with operands and store the // final result in saved_value_. We are done! const Slice val = iter_.value(); s = MergeHelper::TimedFullMerge( merge_operator_, ikey.user_key, &val, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, &pinned_value_, true); if (!s.ok()) { valid_ = false; status_ = s; return false; } // iter_ is positioned after put iter_.Next(); if (!iter_.status().ok()) { valid_ = false; return false; } return true; } else if (kTypeMerge == ikey.type) { // hit a merge, add the value as an operand and run associative merge. // when complete, add result to operands and continue. merge_context_.PushOperand( iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */); PERF_COUNTER_ADD(internal_merge_count, 1); } else if (kTypeBlobIndex == ikey.type) { if (!allow_blob_) { ROCKS_LOG_ERROR(logger_, "Encounter unexpected blob index."); status_ = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); } else { status_ = Status::NotSupported("Blob DB does not support merge operator."); } valid_ = false; return false; } else { assert(false); } } if (!iter_.status().ok()) { valid_ = false; return false; } // we either exhausted all internal keys under this user key, or hit // a deletion marker. // feed null as the existing value to the merge operator, such that // client can differentiate this scenario and do things accordingly. s = MergeHelper::TimedFullMerge(merge_operator_, saved_key_.GetUserKey(), nullptr, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, &pinned_value_, true); if (!s.ok()) { valid_ = false; status_ = s; return false; } assert(status_.ok()); return true; } void DBIter::Prev() { if (timestamp_size_ > 0) { valid_ = false; status_ = Status::NotSupported( "SeekToLast/SeekForPrev/Prev currently not supported with timestamp."); return; } assert(valid_); assert(status_.ok()); PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, env_); ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); bool ok = true; if (direction_ == kForward) { if (!ReverseToBackward()) { ok = false; } } if (ok) { Slice prefix; if (prefix_same_as_start_) { assert(prefix_extractor_ != nullptr); prefix = prefix_.GetUserKey(); } PrevInternal(prefix_same_as_start_ ? &prefix : nullptr); } if (statistics_ != nullptr) { local_stats_.prev_count_++; if (valid_) { local_stats_.prev_found_count_++; local_stats_.bytes_read_ += (key().size() + value().size()); } } } bool DBIter::ReverseToForward() { assert(iter_.status().ok()); // When moving backwards, iter_ is positioned on _previous_ key, which may // not exist or may have different prefix than the current key(). // If that's the case, seek iter_ to current key. if (!expect_total_order_inner_iter() || !iter_.Valid()) { IterKey last_key; last_key.SetInternalKey(ParsedInternalKey( saved_key_.GetUserKey(), kMaxSequenceNumber, kValueTypeForSeek)); iter_.Seek(last_key.GetInternalKey()); } direction_ = kForward; // Skip keys less than the current key() (a.k.a. saved_key_). while (iter_.Valid()) { ParsedInternalKey ikey; if (!ParseKey(&ikey)) { return false; } if (user_comparator_.Compare(ikey.user_key, saved_key_.GetUserKey()) >= 0) { return true; } iter_.Next(); } if (!iter_.status().ok()) { valid_ = false; return false; } return true; } // Move iter_ to the key before saved_key_. bool DBIter::ReverseToBackward() { assert(iter_.status().ok()); // When current_entry_is_merged_ is true, iter_ may be positioned on the next // key, which may not exist or may have prefix different from current. // If that's the case, seek to saved_key_. if (current_entry_is_merged_ && (!expect_total_order_inner_iter() || !iter_.Valid())) { IterKey last_key; // Using kMaxSequenceNumber and kValueTypeForSeek // (not kValueTypeForSeekForPrev) to seek to a key strictly smaller // than saved_key_. last_key.SetInternalKey(ParsedInternalKey( saved_key_.GetUserKey(), kMaxSequenceNumber, kValueTypeForSeek)); if (!expect_total_order_inner_iter()) { iter_.SeekForPrev(last_key.GetInternalKey()); } else { // Some iterators may not support SeekForPrev(), so we avoid using it // when prefix seek mode is disabled. This is somewhat expensive // (an extra Prev(), as well as an extra change of direction of iter_), // so we may need to reconsider it later. iter_.Seek(last_key.GetInternalKey()); if (!iter_.Valid() && iter_.status().ok()) { iter_.SeekToLast(); } } } direction_ = kReverse; return FindUserKeyBeforeSavedKey(); } void DBIter::PrevInternal(const Slice* prefix) { while (iter_.Valid()) { saved_key_.SetUserKey( ExtractUserKey(iter_.key()), !iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */); assert(prefix == nullptr || prefix_extractor_ != nullptr); if (prefix != nullptr && prefix_extractor_->Transform(saved_key_.GetUserKey()) .compare(*prefix) != 0) { assert(prefix_same_as_start_); // Current key does not have the same prefix as start valid_ = false; return; } assert(iterate_lower_bound_ == nullptr || iter_.MayBeOutOfLowerBound() || user_comparator_.Compare(saved_key_.GetUserKey(), *iterate_lower_bound_) >= 0); if (iterate_lower_bound_ != nullptr && iter_.MayBeOutOfLowerBound() && user_comparator_.Compare(saved_key_.GetUserKey(), *iterate_lower_bound_) < 0) { // We've iterated earlier than the user-specified lower bound. valid_ = false; return; } if (!FindValueForCurrentKey()) { // assigns valid_ return; } // Whether or not we found a value for current key, we need iter_ to end up // on a smaller key. if (!FindUserKeyBeforeSavedKey()) { return; } if (valid_) { // Found the value. return; } if (TooManyInternalKeysSkipped(false)) { return; } } // We haven't found any key - iterator is not valid valid_ = false; } // Used for backwards iteration. // Looks at the entries with user key saved_key_ and finds the most up-to-date // value for it, or executes a merge, or determines that the value was deleted. // Sets valid_ to true if the value is found and is ready to be presented to // the user through value(). // Sets valid_ to false if the value was deleted, and we should try another key. // Returns false if an error occurred, and !status().ok() and !valid_. // // PRE: iter_ is positioned on the last entry with user key equal to saved_key_. // POST: iter_ is positioned on one of the entries equal to saved_key_, or on // the entry just before them, or on the entry just after them. bool DBIter::FindValueForCurrentKey() { assert(iter_.Valid()); merge_context_.Clear(); current_entry_is_merged_ = false; // last entry before merge (could be kTypeDeletion, kTypeSingleDeletion or // kTypeValue) ValueType last_not_merge_type = kTypeDeletion; ValueType last_key_entry_type = kTypeDeletion; // Temporarily pin blocks that hold (merge operands / the value) ReleaseTempPinnedData(); TempPinData(); size_t num_skipped = 0; while (iter_.Valid()) { ParsedInternalKey ikey; if (!ParseKey(&ikey)) { return false; } assert(ikey.user_key.size() >= timestamp_size_); Slice ts; if (timestamp_size_ > 0) { ts = Slice(ikey.user_key.data() + ikey.user_key.size() - timestamp_size_, timestamp_size_); } if (!IsVisible(ikey.sequence, ts) || !user_comparator_.Equal(ikey.user_key, saved_key_.GetUserKey())) { break; } if (TooManyInternalKeysSkipped()) { return false; } // This user key has lots of entries. // We're going from old to new, and it's taking too long. Let's do a Seek() // and go from new to old. This helps when a key was overwritten many times. if (num_skipped >= max_skip_) { return FindValueForCurrentKeyUsingSeek(); } if (!iter_.PrepareValue()) { valid_ = false; return false; } last_key_entry_type = ikey.type; switch (last_key_entry_type) { case kTypeValue: case kTypeBlobIndex: if (range_del_agg_.ShouldDelete( ikey, RangeDelPositioningMode::kBackwardTraversal)) { last_key_entry_type = kTypeRangeDeletion; PERF_COUNTER_ADD(internal_delete_skipped_count, 1); } else { assert(iter_.iter()->IsValuePinned()); pinned_value_ = iter_.value(); } merge_context_.Clear(); last_not_merge_type = last_key_entry_type; break; case kTypeDeletion: case kTypeSingleDeletion: merge_context_.Clear(); last_not_merge_type = last_key_entry_type; PERF_COUNTER_ADD(internal_delete_skipped_count, 1); break; case kTypeMerge: if (range_del_agg_.ShouldDelete( ikey, RangeDelPositioningMode::kBackwardTraversal)) { merge_context_.Clear(); last_key_entry_type = kTypeRangeDeletion; last_not_merge_type = last_key_entry_type; PERF_COUNTER_ADD(internal_delete_skipped_count, 1); } else { assert(merge_operator_ != nullptr); merge_context_.PushOperandBack( iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */); PERF_COUNTER_ADD(internal_merge_count, 1); } break; default: assert(false); } PERF_COUNTER_ADD(internal_key_skipped_count, 1); iter_.Prev(); ++num_skipped; } if (!iter_.status().ok()) { valid_ = false; return false; } Status s; is_blob_ = false; switch (last_key_entry_type) { case kTypeDeletion: case kTypeSingleDeletion: case kTypeRangeDeletion: valid_ = false; return true; case kTypeMerge: current_entry_is_merged_ = true; if (last_not_merge_type == kTypeDeletion || last_not_merge_type == kTypeSingleDeletion || last_not_merge_type == kTypeRangeDeletion) { s = MergeHelper::TimedFullMerge( merge_operator_, saved_key_.GetUserKey(), nullptr, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, &pinned_value_, true); } else if (last_not_merge_type == kTypeBlobIndex) { if (!allow_blob_) { ROCKS_LOG_ERROR(logger_, "Encounter unexpected blob index."); status_ = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); } else { status_ = Status::NotSupported("Blob DB does not support merge operator."); } valid_ = false; return false; } else { assert(last_not_merge_type == kTypeValue); s = MergeHelper::TimedFullMerge( merge_operator_, saved_key_.GetUserKey(), &pinned_value_, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, &pinned_value_, true); } break; case kTypeValue: // do nothing - we've already has value in pinned_value_ break; case kTypeBlobIndex: if (!allow_blob_) { ROCKS_LOG_ERROR(logger_, "Encounter unexpected blob index."); status_ = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); valid_ = false; return false; } is_blob_ = true; break; default: assert(false); break; } if (!s.ok()) { valid_ = false; status_ = s; return false; } valid_ = true; return true; } // This function is used in FindValueForCurrentKey. // We use Seek() function instead of Prev() to find necessary value // TODO: This is very similar to FindNextUserEntry() and MergeValuesNewToOld(). // Would be nice to reuse some code. bool DBIter::FindValueForCurrentKeyUsingSeek() { // FindValueForCurrentKey will enable pinning before calling // FindValueForCurrentKeyUsingSeek() assert(pinned_iters_mgr_.PinningEnabled()); std::string last_key; AppendInternalKey(&last_key, ParsedInternalKey(saved_key_.GetUserKey(), sequence_, kValueTypeForSeek)); iter_.Seek(last_key); RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION); // In case read_callback presents, the value we seek to may not be visible. // Find the next value that's visible. ParsedInternalKey ikey; is_blob_ = false; while (true) { if (!iter_.Valid()) { valid_ = false; return iter_.status().ok(); } if (!ParseKey(&ikey)) { return false; } assert(ikey.user_key.size() >= timestamp_size_); Slice ts; if (timestamp_size_ > 0) { ts = Slice(ikey.user_key.data() + ikey.user_key.size() - timestamp_size_, timestamp_size_); } if (!user_comparator_.Equal(ikey.user_key, saved_key_.GetUserKey())) { // No visible values for this key, even though FindValueForCurrentKey() // has seen some. This is possible if we're using a tailing iterator, and // the entries were discarded in a compaction. valid_ = false; return true; } if (IsVisible(ikey.sequence, ts)) { break; } iter_.Next(); } if (ikey.type == kTypeDeletion || ikey.type == kTypeSingleDeletion || range_del_agg_.ShouldDelete( ikey, RangeDelPositioningMode::kBackwardTraversal)) { valid_ = false; return true; } if (ikey.type == kTypeBlobIndex && !allow_blob_) { ROCKS_LOG_ERROR(logger_, "Encounter unexpected blob index."); status_ = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); valid_ = false; return false; } if (!iter_.PrepareValue()) { valid_ = false; return false; } if (ikey.type == kTypeValue || ikey.type == kTypeBlobIndex) { assert(iter_.iter()->IsValuePinned()); pinned_value_ = iter_.value(); is_blob_ = (ikey.type == kTypeBlobIndex); valid_ = true; return true; } // kTypeMerge. We need to collect all kTypeMerge values and save them // in operands assert(ikey.type == kTypeMerge); current_entry_is_merged_ = true; merge_context_.Clear(); merge_context_.PushOperand( iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */); while (true) { iter_.Next(); if (!iter_.Valid()) { if (!iter_.status().ok()) { valid_ = false; return false; } break; } if (!ParseKey(&ikey)) { return false; } if (!user_comparator_.Equal(ikey.user_key, saved_key_.GetUserKey())) { break; } if (ikey.type == kTypeDeletion || ikey.type == kTypeSingleDeletion || range_del_agg_.ShouldDelete( ikey, RangeDelPositioningMode::kForwardTraversal)) { break; } if (!iter_.PrepareValue()) { valid_ = false; return false; } if (ikey.type == kTypeValue) { const Slice val = iter_.value(); Status s = MergeHelper::TimedFullMerge( merge_operator_, saved_key_.GetUserKey(), &val, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, &pinned_value_, true); if (!s.ok()) { valid_ = false; status_ = s; return false; } valid_ = true; return true; } else if (ikey.type == kTypeMerge) { merge_context_.PushOperand( iter_.value(), iter_.iter()->IsValuePinned() /* operand_pinned */); PERF_COUNTER_ADD(internal_merge_count, 1); } else if (ikey.type == kTypeBlobIndex) { if (!allow_blob_) { ROCKS_LOG_ERROR(logger_, "Encounter unexpected blob index."); status_ = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); } else { status_ = Status::NotSupported("Blob DB does not support merge operator."); } valid_ = false; return false; } else { assert(false); } } Status s = MergeHelper::TimedFullMerge( merge_operator_, saved_key_.GetUserKey(), nullptr, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, &pinned_value_, true); if (!s.ok()) { valid_ = false; status_ = s; return false; } // Make sure we leave iter_ in a good state. If it's valid and we don't care // about prefixes, that's already good enough. Otherwise it needs to be // seeked to the current key. if (!expect_total_order_inner_iter() || !iter_.Valid()) { if (!expect_total_order_inner_iter()) { iter_.SeekForPrev(last_key); } else { iter_.Seek(last_key); if (!iter_.Valid() && iter_.status().ok()) { iter_.SeekToLast(); } } RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION); } valid_ = true; return true; } // Move backwards until the key smaller than saved_key_. // Changes valid_ only if return value is false. bool DBIter::FindUserKeyBeforeSavedKey() { assert(status_.ok()); size_t num_skipped = 0; while (iter_.Valid()) { ParsedInternalKey ikey; if (!ParseKey(&ikey)) { return false; } if (user_comparator_.Compare(ikey.user_key, saved_key_.GetUserKey()) < 0) { return true; } if (TooManyInternalKeysSkipped()) { return false; } assert(ikey.sequence != kMaxSequenceNumber); assert(ikey.user_key.size() >= timestamp_size_); Slice ts; if (timestamp_size_ > 0) { ts = Slice(ikey.user_key.data() + ikey.user_key.size() - timestamp_size_, timestamp_size_); } if (!IsVisible(ikey.sequence, ts)) { PERF_COUNTER_ADD(internal_recent_skipped_count, 1); } else { PERF_COUNTER_ADD(internal_key_skipped_count, 1); } if (num_skipped >= max_skip_) { num_skipped = 0; IterKey last_key; last_key.SetInternalKey(ParsedInternalKey( saved_key_.GetUserKey(), kMaxSequenceNumber, kValueTypeForSeek)); // It would be more efficient to use SeekForPrev() here, but some // iterators may not support it. iter_.Seek(last_key.GetInternalKey()); RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION); if (!iter_.Valid()) { break; } } else { ++num_skipped; } iter_.Prev(); } if (!iter_.status().ok()) { valid_ = false; return false; } return true; } bool DBIter::TooManyInternalKeysSkipped(bool increment) { if ((max_skippable_internal_keys_ > 0) && (num_internal_keys_skipped_ > max_skippable_internal_keys_)) { valid_ = false; status_ = Status::Incomplete("Too many internal keys skipped."); return true; } else if (increment) { num_internal_keys_skipped_++; } return false; } bool DBIter::IsVisible(SequenceNumber sequence, const Slice& ts, bool* more_recent) { // Remember that comparator orders preceding timestamp as larger. // TODO(yanqin): support timestamp in read_callback_. bool visible_by_seq = (read_callback_ == nullptr) ? sequence <= sequence_ : read_callback_->IsVisible(sequence); bool visible_by_ts = (timestamp_ub_ == nullptr || user_comparator_.CompareTimestamp(ts, *timestamp_ub_) <= 0) && (timestamp_lb_ == nullptr || user_comparator_.CompareTimestamp(ts, *timestamp_lb_) >= 0); if (more_recent) { *more_recent = !visible_by_seq; } return visible_by_seq && visible_by_ts; } void DBIter::SetSavedKeyToSeekTarget(const Slice& target) { is_key_seqnum_zero_ = false; SequenceNumber seq = sequence_; saved_key_.Clear(); saved_key_.SetInternalKey(target, seq, kValueTypeForSeek, timestamp_ub_); if (iterate_lower_bound_ != nullptr && user_comparator_.CompareWithoutTimestamp( saved_key_.GetUserKey(), /*a_has_ts=*/true, *iterate_lower_bound_, /*b_has_ts=*/false) < 0) { // Seek key is smaller than the lower bound. saved_key_.Clear(); saved_key_.SetInternalKey(*iterate_lower_bound_, seq, kValueTypeForSeek, timestamp_ub_); } } void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) { is_key_seqnum_zero_ = false; saved_key_.Clear(); // now saved_key is used to store internal key. saved_key_.SetInternalKey(target, 0 /* sequence_number */, kValueTypeForSeekForPrev); if (iterate_upper_bound_ != nullptr && user_comparator_.Compare(saved_key_.GetUserKey(), *iterate_upper_bound_) >= 0) { saved_key_.Clear(); saved_key_.SetInternalKey(*iterate_upper_bound_, kMaxSequenceNumber); } } void DBIter::Seek(const Slice& target) { PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); StopWatch sw(env_, statistics_, DB_SEEK); #ifndef ROCKSDB_LITE if (db_impl_ != nullptr && cfd_ != nullptr) { db_impl_->TraceIteratorSeek(cfd_->GetID(), target); } #endif // ROCKSDB_LITE status_ = Status::OK(); ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); // Seek the inner iterator based on the target key. { PERF_TIMER_GUARD(seek_internal_seek_time); SetSavedKeyToSeekTarget(target); iter_.Seek(saved_key_.GetInternalKey()); range_del_agg_.InvalidateRangeDelMapPositions(); RecordTick(statistics_, NUMBER_DB_SEEK); } if (!iter_.Valid()) { valid_ = false; return; } direction_ = kForward; // Now the inner iterator is placed to the target position. From there, // we need to find out the next key that is visible to the user. ClearSavedValue(); if (prefix_same_as_start_) { // The case where the iterator needs to be invalidated if it has exausted // keys within the same prefix of the seek key. assert(prefix_extractor_ != nullptr); Slice target_prefix = prefix_extractor_->Transform(target); FindNextUserEntry(false /* not skipping saved_key */, &target_prefix /* prefix */); if (valid_) { // Remember the prefix of the seek key for the future Next() call to // check. prefix_.SetUserKey(target_prefix); } } else { FindNextUserEntry(false /* not skipping saved_key */, nullptr); } if (!valid_) { return; } // Updating stats and perf context counters. if (statistics_ != nullptr) { // Decrement since we don't want to count this key as skipped RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); } PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); } void DBIter::SeekForPrev(const Slice& target) { PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); StopWatch sw(env_, statistics_, DB_SEEK); #ifndef ROCKSDB_LITE if (db_impl_ != nullptr && cfd_ != nullptr) { db_impl_->TraceIteratorSeekForPrev(cfd_->GetID(), target); } #endif // ROCKSDB_LITE if (timestamp_size_ > 0) { valid_ = false; status_ = Status::NotSupported( "SeekToLast/SeekForPrev/Prev currently not supported with timestamp."); return; } status_ = Status::OK(); ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); // Seek the inner iterator based on the target key. { PERF_TIMER_GUARD(seek_internal_seek_time); SetSavedKeyToSeekForPrevTarget(target); iter_.SeekForPrev(saved_key_.GetInternalKey()); range_del_agg_.InvalidateRangeDelMapPositions(); RecordTick(statistics_, NUMBER_DB_SEEK); } if (!iter_.Valid()) { valid_ = false; return; } direction_ = kReverse; // Now the inner iterator is placed to the target position. From there, // we need to find out the first key that is visible to the user in the // backward direction. ClearSavedValue(); if (prefix_same_as_start_) { // The case where the iterator needs to be invalidated if it has exausted // keys within the same prefix of the seek key. assert(prefix_extractor_ != nullptr); Slice target_prefix = prefix_extractor_->Transform(target); PrevInternal(&target_prefix); if (valid_) { // Remember the prefix of the seek key for the future Prev() call to // check. prefix_.SetUserKey(target_prefix); } } else { PrevInternal(nullptr); } // Report stats and perf context. if (statistics_ != nullptr && valid_) { RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); } } void DBIter::SeekToFirst() { if (iterate_lower_bound_ != nullptr) { Seek(*iterate_lower_bound_); return; } PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); // Don't use iter_::Seek() if we set a prefix extractor // because prefix seek will be used. if (!expect_total_order_inner_iter()) { max_skip_ = std::numeric_limits::max(); } status_ = Status::OK(); direction_ = kForward; ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); ClearSavedValue(); is_key_seqnum_zero_ = false; { PERF_TIMER_GUARD(seek_internal_seek_time); iter_.SeekToFirst(); range_del_agg_.InvalidateRangeDelMapPositions(); } RecordTick(statistics_, NUMBER_DB_SEEK); if (iter_.Valid()) { saved_key_.SetUserKey( ExtractUserKey(iter_.key()), !iter_.iter()->IsKeyPinned() || !pin_thru_lifetime_ /* copy */); FindNextUserEntry(false /* not skipping saved_key */, nullptr /* no prefix check */); if (statistics_ != nullptr) { if (valid_) { RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); } } } else { valid_ = false; } if (valid_ && prefix_same_as_start_) { assert(prefix_extractor_ != nullptr); prefix_.SetUserKey(prefix_extractor_->Transform(saved_key_.GetUserKey())); } } void DBIter::SeekToLast() { if (timestamp_size_ > 0) { valid_ = false; status_ = Status::NotSupported( "SeekToLast/SeekForPrev/Prev currently not supported with timestamp."); return; } if (iterate_upper_bound_ != nullptr) { // Seek to last key strictly less than ReadOptions.iterate_upper_bound. SeekForPrev(*iterate_upper_bound_); if (Valid() && user_comparator_.Equal(*iterate_upper_bound_, key())) { ReleaseTempPinnedData(); PrevInternal(nullptr); } return; } PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); // Don't use iter_::Seek() if we set a prefix extractor // because prefix seek will be used. if (!expect_total_order_inner_iter()) { max_skip_ = std::numeric_limits::max(); } status_ = Status::OK(); direction_ = kReverse; ReleaseTempPinnedData(); ResetInternalKeysSkippedCounter(); ClearSavedValue(); is_key_seqnum_zero_ = false; { PERF_TIMER_GUARD(seek_internal_seek_time); iter_.SeekToLast(); range_del_agg_.InvalidateRangeDelMapPositions(); } PrevInternal(nullptr); if (statistics_ != nullptr) { RecordTick(statistics_, NUMBER_DB_SEEK); if (valid_) { RecordTick(statistics_, NUMBER_DB_SEEK_FOUND); RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size()); PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size()); } } if (valid_ && prefix_same_as_start_) { assert(prefix_extractor_ != nullptr); prefix_.SetUserKey(prefix_extractor_->Transform(saved_key_.GetUserKey())); } } Iterator* NewDBIterator(Env* env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const Comparator* user_key_comparator, InternalIterator* internal_iter, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, bool allow_blob) { DBIter* db_iter = new DBIter( env, read_options, cf_options, mutable_cf_options, user_key_comparator, internal_iter, sequence, false, max_sequential_skip_in_iterations, read_callback, db_impl, cfd, allow_blob); return db_iter; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_iter.h000066400000000000000000000335101370372246700154150ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/range_del_aggregator.h" #include "memory/arena.h" #include "options/cf_options.h" #include "rocksdb/db.h" #include "rocksdb/iterator.h" #include "table/iterator_wrapper.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { // This file declares the factory functions of DBIter, in its original form // or a wrapped form with class ArenaWrappedDBIter, which is defined here. // Class DBIter, which is declared and implemented inside db_iter.cc, is // an iterator that converts internal keys (yielded by an InternalIterator) // that were live at the specified sequence number into appropriate user // keys. // Each internal key consists of a user key, a sequence number, and a value // type. DBIter deals with multiple key versions, tombstones, merge operands, // etc, and exposes an Iterator. // For example, DBIter may wrap following InternalIterator: // user key: AAA value: v3 seqno: 100 type: Put // user key: AAA value: v2 seqno: 97 type: Put // user key: AAA value: v1 seqno: 95 type: Put // user key: BBB value: v1 seqno: 90 type: Put // user key: BBC value: N/A seqno: 98 type: Delete // user key: BBC value: v1 seqno: 95 type: Put // If the snapshot passed in is 102, then the DBIter is expected to // expose the following iterator: // key: AAA value: v3 // key: BBB value: v1 // If the snapshot passed in is 96, then it should expose: // key: AAA value: v1 // key: BBB value: v1 // key: BBC value: v1 // // Memtables and sstables that make the DB representation contain // (userkey,seq,type) => uservalue entries. DBIter // combines multiple entries for the same userkey found in the DB // representation into a single entry while accounting for sequence // numbers, deletion markers, overwrites, etc. class DBIter final : public Iterator { public: // The following is grossly complicated. TODO: clean it up // Which direction is the iterator currently moving? // (1) When moving forward: // (1a) if current_entry_is_merged_ = false, the internal iterator is // positioned at the exact entry that yields this->key(), this->value() // (1b) if current_entry_is_merged_ = true, the internal iterator is // positioned immediately after the last entry that contributed to the // current this->value(). That entry may or may not have key equal to // this->key(). // (2) When moving backwards, the internal iterator is positioned // just before all entries whose user key == this->key(). enum Direction { kForward, kReverse }; // LocalStatistics contain Statistics counters that will be aggregated per // each iterator instance and then will be sent to the global statistics when // the iterator is destroyed. // // The purpose of this approach is to avoid perf regression happening // when multiple threads bump the atomic counters from a DBIter::Next(). struct LocalStatistics { explicit LocalStatistics() { ResetCounters(); } void ResetCounters() { next_count_ = 0; next_found_count_ = 0; prev_count_ = 0; prev_found_count_ = 0; bytes_read_ = 0; skip_count_ = 0; } void BumpGlobalStatistics(Statistics* global_statistics) { RecordTick(global_statistics, NUMBER_DB_NEXT, next_count_); RecordTick(global_statistics, NUMBER_DB_NEXT_FOUND, next_found_count_); RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_); RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_); RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_); RecordTick(global_statistics, NUMBER_ITER_SKIP, skip_count_); PERF_COUNTER_ADD(iter_read_bytes, bytes_read_); ResetCounters(); } // Map to Tickers::NUMBER_DB_NEXT uint64_t next_count_; // Map to Tickers::NUMBER_DB_NEXT_FOUND uint64_t next_found_count_; // Map to Tickers::NUMBER_DB_PREV uint64_t prev_count_; // Map to Tickers::NUMBER_DB_PREV_FOUND uint64_t prev_found_count_; // Map to Tickers::ITER_BYTES_READ uint64_t bytes_read_; // Map to Tickers::NUMBER_ITER_SKIP uint64_t skip_count_; }; DBIter(Env* _env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const Comparator* cmp, InternalIterator* iter, SequenceNumber s, bool arena_mode, uint64_t max_sequential_skip_in_iterations, ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, bool allow_blob); // No copying allowed DBIter(const DBIter&) = delete; void operator=(const DBIter&) = delete; ~DBIter() override { // Release pinned data if any if (pinned_iters_mgr_.PinningEnabled()) { pinned_iters_mgr_.ReleasePinnedData(); } RecordTick(statistics_, NO_ITERATOR_DELETED); ResetInternalKeysSkippedCounter(); local_stats_.BumpGlobalStatistics(statistics_); iter_.DeleteIter(arena_mode_); } void SetIter(InternalIterator* iter) { assert(iter_.iter() == nullptr); iter_.Set(iter); iter_.iter()->SetPinnedItersMgr(&pinned_iters_mgr_); } ReadRangeDelAggregator* GetRangeDelAggregator() { return &range_del_agg_; } bool Valid() const override { return valid_; } Slice key() const override { assert(valid_); if (start_seqnum_ > 0) { return saved_key_.GetInternalKey(); } else { const Slice ukey_and_ts = saved_key_.GetUserKey(); return Slice(ukey_and_ts.data(), ukey_and_ts.size() - timestamp_size_); } } Slice value() const override { assert(valid_); if (current_entry_is_merged_) { // If pinned_value_ is set then the result of merge operator is one of // the merge operands and we should return it. return pinned_value_.data() ? pinned_value_ : saved_value_; } else if (direction_ == kReverse) { return pinned_value_; } else { return iter_.value(); } } Status status() const override { if (status_.ok()) { return iter_.status(); } else { assert(!valid_); return status_; } } Slice timestamp() const override { assert(valid_); assert(timestamp_size_ > 0); const Slice ukey_and_ts = saved_key_.GetUserKey(); assert(timestamp_size_ < ukey_and_ts.size()); return ExtractTimestampFromUserKey(ukey_and_ts, timestamp_size_); } bool IsBlob() const { assert(valid_ && (allow_blob_ || !is_blob_)); return is_blob_; } Status GetProperty(std::string prop_name, std::string* prop) override; void Next() final override; void Prev() final override; // 'target' does not contain timestamp, even if user timestamp feature is // enabled. void Seek(const Slice& target) final override; void SeekForPrev(const Slice& target) final override; void SeekToFirst() final override; void SeekToLast() final override; Env* env() const { return env_; } void set_sequence(uint64_t s) { sequence_ = s; if (read_callback_) { read_callback_->Refresh(s); } } void set_valid(bool v) { valid_ = v; } private: // For all methods in this block: // PRE: iter_->Valid() && status_.ok() // Return false if there was an error, and status() is non-ok, valid_ = false; // in this case callers would usually stop what they were doing and return. bool ReverseToForward(); bool ReverseToBackward(); // Set saved_key_ to the seek key to target, with proper sequence number set. // It might get adjusted if the seek key is smaller than iterator lower bound. void SetSavedKeyToSeekTarget(const Slice& target); // Set saved_key_ to the seek key to target, with proper sequence number set. // It might get adjusted if the seek key is larger than iterator upper bound. void SetSavedKeyToSeekForPrevTarget(const Slice& target); bool FindValueForCurrentKey(); bool FindValueForCurrentKeyUsingSeek(); bool FindUserKeyBeforeSavedKey(); // If `skipping_saved_key` is true, the function will keep iterating until it // finds a user key that is larger than `saved_key_`. // If `prefix` is not null, the iterator needs to stop when all keys for the // prefix are exhausted and the interator is set to invalid. bool FindNextUserEntry(bool skipping_saved_key, const Slice* prefix); // Internal implementation of FindNextUserEntry(). bool FindNextUserEntryInternal(bool skipping_saved_key, const Slice* prefix); bool ParseKey(ParsedInternalKey* key); bool MergeValuesNewToOld(); // If prefix is not null, we need to set the iterator to invalid if no more // entry can be found within the prefix. void PrevInternal(const Slice* prefix); bool TooManyInternalKeysSkipped(bool increment = true); bool IsVisible(SequenceNumber sequence, const Slice& ts, bool* more_recent = nullptr); // Temporarily pin the blocks that we encounter until ReleaseTempPinnedData() // is called void TempPinData() { if (!pin_thru_lifetime_) { pinned_iters_mgr_.StartPinning(); } } // Release blocks pinned by TempPinData() void ReleaseTempPinnedData() { if (!pin_thru_lifetime_ && pinned_iters_mgr_.PinningEnabled()) { pinned_iters_mgr_.ReleasePinnedData(); } } inline void ClearSavedValue() { if (saved_value_.capacity() > 1048576) { std::string empty; swap(empty, saved_value_); } else { saved_value_.clear(); } } inline void ResetInternalKeysSkippedCounter() { local_stats_.skip_count_ += num_internal_keys_skipped_; if (valid_) { local_stats_.skip_count_--; } num_internal_keys_skipped_ = 0; } bool expect_total_order_inner_iter() { assert(expect_total_order_inner_iter_ || prefix_extractor_ != nullptr); return expect_total_order_inner_iter_; } // If lower bound of timestamp is given by ReadOptions.iter_start_ts, we need // to return versions of the same key. We cannot just skip if the key value // is the same but timestamps are different but fall in timestamp range. inline int CompareKeyForSkip(const Slice& a, const Slice& b) { return timestamp_lb_ != nullptr ? user_comparator_.Compare(a, b) : user_comparator_.CompareWithoutTimestamp(a, b); } const SliceTransform* prefix_extractor_; Env* const env_; Logger* logger_; UserComparatorWrapper user_comparator_; const MergeOperator* const merge_operator_; IteratorWrapper iter_; ReadCallback* read_callback_; // Max visible sequence number. It is normally the snapshot seq unless we have // uncommitted data in db as in WriteUnCommitted. SequenceNumber sequence_; IterKey saved_key_; // Reusable internal key data structure. This is only used inside one function // and should not be used across functions. Reusing this object can reduce // overhead of calling construction of the function if creating it each time. ParsedInternalKey ikey_; std::string saved_value_; Slice pinned_value_; // for prefix seek mode to support prev() Statistics* statistics_; uint64_t max_skip_; uint64_t max_skippable_internal_keys_; uint64_t num_internal_keys_skipped_; const Slice* iterate_lower_bound_; const Slice* iterate_upper_bound_; // The prefix of the seek key. It is only used when prefix_same_as_start_ // is true and prefix extractor is not null. In Next() or Prev(), current keys // will be checked against this prefix, so that the iterator can be // invalidated if the keys in this prefix has been exhausted. Set it using // SetUserKey() and use it using GetUserKey(). IterKey prefix_; Status status_; Direction direction_; bool valid_; bool current_entry_is_merged_; // True if we know that the current entry's seqnum is 0. // This information is used as that the next entry will be for another // user key. bool is_key_seqnum_zero_; const bool prefix_same_as_start_; // Means that we will pin all data blocks we read as long the Iterator // is not deleted, will be true if ReadOptions::pin_data is true const bool pin_thru_lifetime_; // Expect the inner iterator to maintain a total order. // prefix_extractor_ must be non-NULL if the value is false. const bool expect_total_order_inner_iter_; bool allow_blob_; bool is_blob_; bool arena_mode_; // List of operands for merge operator. MergeContext merge_context_; ReadRangeDelAggregator range_del_agg_; LocalStatistics local_stats_; PinnedIteratorsManager pinned_iters_mgr_; #ifdef ROCKSDB_LITE ROCKSDB_FIELD_UNUSED #endif DBImpl* db_impl_; #ifdef ROCKSDB_LITE ROCKSDB_FIELD_UNUSED #endif ColumnFamilyData* cfd_; // for diff snapshots we want the lower bound on the seqnum; // if this value > 0 iterator will return internal keys SequenceNumber start_seqnum_; const Slice* const timestamp_ub_; const Slice* const timestamp_lb_; const size_t timestamp_size_; }; // Return a new iterator that converts internal keys (yielded by // "*internal_iter") that were live at the specified `sequence` number // into appropriate user keys. extern Iterator* NewDBIterator( Env* env, const ReadOptions& read_options, const ImmutableCFOptions& cf_options, const MutableCFOptions& mutable_cf_options, const Comparator* user_key_comparator, InternalIterator* internal_iter, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, ReadCallback* read_callback, DBImpl* db_impl = nullptr, ColumnFamilyData* cfd = nullptr, bool allow_blob = false); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_iter_stress_test.cc000066400000000000000000000516621370372246700202250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/db_iter.h" #include "db/dbformat.h" #include "rocksdb/comparator.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "test_util/testharness.h" #include "util/random.h" #include "util/string_util.h" #include "utilities/merge_operators.h" #ifdef GFLAGS #include "util/gflags_compat.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; DEFINE_bool(verbose, false, "Print huge, detailed trace. Intended for debugging failures."); #else void ParseCommandLineFlags(int*, char***, bool) {} bool FLAGS_verbose = false; #endif namespace ROCKSDB_NAMESPACE { class DBIteratorStressTest : public testing::Test { public: Env* env_; DBIteratorStressTest() : env_(Env::Default()) {} }; namespace { struct Entry { std::string key; ValueType type; // kTypeValue, kTypeDeletion, kTypeMerge uint64_t sequence; std::string ikey; // internal key, made from `key`, `sequence` and `type` std::string value; // If false, we'll pretend that this entry doesn't exist. bool visible = true; bool operator<(const Entry& e) const { if (key != e.key) return key < e.key; return std::tie(sequence, type) > std::tie(e.sequence, e.type); } }; struct Data { std::vector entries; // Indices in `entries` with `visible` = false. std::vector hidden; // Keys of entries whose `visible` changed since the last seek of iterators. std::set recently_touched_keys; }; struct StressTestIterator : public InternalIterator { Data* data; Random64* rnd; InternalKeyComparator cmp; // Each operation will return error with this probability... double error_probability = 0; // ... and add/remove entries with this probability. double mutation_probability = 0; // The probability of adding vs removing entries will be chosen so that the // amount of removed entries stays somewhat close to this number. double target_hidden_fraction = 0; // If true, print all mutations to stdout for debugging. bool trace = false; int iter = -1; Status status_; StressTestIterator(Data* _data, Random64* _rnd, const Comparator* _cmp) : data(_data), rnd(_rnd), cmp(_cmp) {} bool Valid() const override { if (iter >= 0 && iter < (int)data->entries.size()) { assert(status_.ok()); return true; } return false; } Status status() const override { return status_; } bool MaybeFail() { if (rnd->Next() >= std::numeric_limits::max() * error_probability) { return false; } if (rnd->Next() % 2) { status_ = Status::Incomplete("test"); } else { status_ = Status::IOError("test"); } if (trace) { std::cout << "injecting " << status_.ToString() << std::endl; } iter = -1; return true; } void MaybeMutate() { if (rnd->Next() >= std::numeric_limits::max() * mutation_probability) { return; } do { // If too many entries are hidden, hide less, otherwise hide more. double hide_probability = data->hidden.size() > data->entries.size() * target_hidden_fraction ? 1. / 3 : 2. / 3; if (data->hidden.empty()) { hide_probability = 1; } bool do_hide = rnd->Next() < std::numeric_limits::max() * hide_probability; if (do_hide) { // Hide a random entry. size_t idx = rnd->Next() % data->entries.size(); Entry& e = data->entries[idx]; if (e.visible) { if (trace) { std::cout << "hiding idx " << idx << std::endl; } e.visible = false; data->hidden.push_back(idx); data->recently_touched_keys.insert(e.key); } else { // Already hidden. Let's go unhide something instead, just because // it's easy and it doesn't really matter what we do. do_hide = false; } } if (!do_hide) { // Unhide a random entry. size_t hi = rnd->Next() % data->hidden.size(); size_t idx = data->hidden[hi]; if (trace) { std::cout << "unhiding idx " << idx << std::endl; } Entry& e = data->entries[idx]; assert(!e.visible); e.visible = true; data->hidden[hi] = data->hidden.back(); data->hidden.pop_back(); data->recently_touched_keys.insert(e.key); } } while (rnd->Next() % 3 != 0); // do 3 mutations on average } void SkipForward() { while (iter < (int)data->entries.size() && !data->entries[iter].visible) { ++iter; } } void SkipBackward() { while (iter >= 0 && !data->entries[iter].visible) { --iter; } } void SeekToFirst() override { if (MaybeFail()) return; MaybeMutate(); status_ = Status::OK(); iter = 0; SkipForward(); } void SeekToLast() override { if (MaybeFail()) return; MaybeMutate(); status_ = Status::OK(); iter = (int)data->entries.size() - 1; SkipBackward(); } void Seek(const Slice& target) override { if (MaybeFail()) return; MaybeMutate(); status_ = Status::OK(); // Binary search. auto it = std::partition_point( data->entries.begin(), data->entries.end(), [&](const Entry& e) { return cmp.Compare(e.ikey, target) < 0; }); iter = (int)(it - data->entries.begin()); SkipForward(); } void SeekForPrev(const Slice& target) override { if (MaybeFail()) return; MaybeMutate(); status_ = Status::OK(); // Binary search. auto it = std::partition_point( data->entries.begin(), data->entries.end(), [&](const Entry& e) { return cmp.Compare(e.ikey, target) <= 0; }); iter = (int)(it - data->entries.begin()); --iter; SkipBackward(); } void Next() override { assert(Valid()); if (MaybeFail()) return; MaybeMutate(); ++iter; SkipForward(); } void Prev() override { assert(Valid()); if (MaybeFail()) return; MaybeMutate(); --iter; SkipBackward(); } Slice key() const override { assert(Valid()); return data->entries[iter].ikey; } Slice value() const override { assert(Valid()); return data->entries[iter].value; } bool IsKeyPinned() const override { return true; } bool IsValuePinned() const override { return true; } }; // A small reimplementation of DBIter, supporting only some of the features, // and doing everything in O(log n). // Skips all keys that are in recently_touched_keys. struct ReferenceIterator { Data* data; uint64_t sequence; // ignore entries with sequence number below this bool valid = false; std::string key; std::string value; ReferenceIterator(Data* _data, uint64_t _sequence) : data(_data), sequence(_sequence) {} bool Valid() const { return valid; } // Finds the first entry with key // greater/less/greater-or-equal/less-or-equal than `key`, depending on // arguments: if `skip`, inequality is strict; if `forward`, it's // greater/greater-or-equal, otherwise less/less-or-equal. // Sets `key` to the result. // If no such key exists, returns false. Doesn't check `visible`. bool FindNextKey(bool skip, bool forward) { valid = false; auto it = std::partition_point(data->entries.begin(), data->entries.end(), [&](const Entry& e) { if (forward != skip) { return e.key < key; } else { return e.key <= key; } }); if (forward) { if (it != data->entries.end()) { key = it->key; return true; } } else { if (it != data->entries.begin()) { --it; key = it->key; return true; } } return false; } bool FindValueForCurrentKey() { if (data->recently_touched_keys.count(key)) { return false; } // Find the first entry for the key. The caller promises that it exists. auto it = std::partition_point(data->entries.begin(), data->entries.end(), [&](const Entry& e) { if (e.key != key) { return e.key < key; } return e.sequence > sequence; }); // Find the first visible entry. for (;; ++it) { if (it == data->entries.end()) { return false; } Entry& e = *it; if (e.key != key) { return false; } assert(e.sequence <= sequence); if (!e.visible) continue; if (e.type == kTypeDeletion) { return false; } if (e.type == kTypeValue) { value = e.value; valid = true; return true; } assert(e.type == kTypeMerge); break; } // Collect merge operands. std::vector operands; for (; it != data->entries.end(); ++it) { Entry& e = *it; if (e.key != key) { break; } assert(e.sequence <= sequence); if (!e.visible) continue; if (e.type == kTypeDeletion) { break; } operands.push_back(e.value); if (e.type == kTypeValue) { break; } } // Do a merge. value = operands.back().ToString(); for (int i = (int)operands.size() - 2; i >= 0; --i) { value.append(","); value.append(operands[i].data(), operands[i].size()); } valid = true; return true; } // Start at `key` and move until we encounter a valid value. // `forward` defines the direction of movement. // If `skip` is true, we're looking for key not equal to `key`. void DoTheThing(bool skip, bool forward) { while (FindNextKey(skip, forward) && !FindValueForCurrentKey()) { skip = true; } } void Seek(const Slice& target) { key = target.ToString(); DoTheThing(false, true); } void SeekForPrev(const Slice& target) { key = target.ToString(); DoTheThing(false, false); } void SeekToFirst() { Seek(""); } void SeekToLast() { key = data->entries.back().key; DoTheThing(false, false); } void Next() { assert(Valid()); DoTheThing(true, true); } void Prev() { assert(Valid()); DoTheThing(true, false); } }; } // namespace // Use an internal iterator that sometimes returns errors and sometimes // adds/removes entries on the fly. Do random operations on a DBIter and // check results. // TODO: can be improved for more coverage: // * Override IsKeyPinned() and IsValuePinned() to actually use // PinnedIteratorManager and check that there's no use-after free. // * Try different combinations of prefix_extractor, total_order_seek, // prefix_same_as_start, iterate_lower_bound, iterate_upper_bound. TEST_F(DBIteratorStressTest, StressTest) { // We use a deterministic RNG, and everything happens in a single thread. Random64 rnd(826909345792864532ll); auto gen_key = [&](int max_key) { assert(max_key > 0); int len = 0; int a = max_key; while (a) { a /= 10; ++len; } std::string s = ToString(rnd.Next() % static_cast(max_key)); s.insert(0, len - (int)s.size(), '0'); return s; }; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); ReadOptions ropt; size_t num_matching = 0; size_t num_at_end = 0; size_t num_not_ok = 0; size_t num_recently_removed = 0; // Number of iterations for each combination of parameters // (there are ~250 of those). // Tweak this to change the test run time. // As of the time of writing, the test takes ~4 seconds for value of 5000. const int num_iterations = 5000; // Enable this to print all the operations for debugging. bool trace = FLAGS_verbose; for (int num_entries : {5, 10, 100}) { for (double key_space : {0.1, 1.0, 3.0}) { for (ValueType prevalent_entry_type : {kTypeValue, kTypeDeletion, kTypeMerge}) { for (double error_probability : {0.01, 0.1}) { for (double mutation_probability : {0.01, 0.5}) { for (double target_hidden_fraction : {0.1, 0.5}) { std::string trace_str = "entries: " + ToString(num_entries) + ", key_space: " + ToString(key_space) + ", error_probability: " + ToString(error_probability) + ", mutation_probability: " + ToString(mutation_probability) + ", target_hidden_fraction: " + ToString(target_hidden_fraction); SCOPED_TRACE(trace_str); if (trace) { std::cout << trace_str << std::endl; } // Generate data. Data data; int max_key = (int)(num_entries * key_space) + 1; for (int i = 0; i < num_entries; ++i) { Entry e; e.key = gen_key(max_key); if (rnd.Next() % 10 != 0) { e.type = prevalent_entry_type; } else { const ValueType types[] = {kTypeValue, kTypeDeletion, kTypeMerge}; e.type = types[rnd.Next() % (sizeof(types) / sizeof(types[0]))]; } e.sequence = i; e.value = "v" + ToString(i); ParsedInternalKey internal_key(e.key, e.sequence, e.type); AppendInternalKey(&e.ikey, internal_key); data.entries.push_back(e); } std::sort(data.entries.begin(), data.entries.end()); if (trace) { std::cout << "entries:"; for (size_t i = 0; i < data.entries.size(); ++i) { Entry& e = data.entries[i]; std::cout << "\n idx " << i << ": \"" << e.key << "\": \"" << e.value << "\" seq: " << e.sequence << " type: " << (e.type == kTypeValue ? "val" : e.type == kTypeDeletion ? "del" : "merge"); } std::cout << std::endl; } std::unique_ptr db_iter; std::unique_ptr ref_iter; for (int iteration = 0; iteration < num_iterations; ++iteration) { SCOPED_TRACE(iteration); // Create a new iterator every ~30 operations. if (db_iter == nullptr || rnd.Next() % 30 == 0) { uint64_t sequence = rnd.Next() % (data.entries.size() + 2); ref_iter.reset(new ReferenceIterator(&data, sequence)); if (trace) { std::cout << "new iterator, seq: " << sequence << std::endl; } auto internal_iter = new StressTestIterator(&data, &rnd, BytewiseComparator()); internal_iter->error_probability = error_probability; internal_iter->mutation_probability = mutation_probability; internal_iter->target_hidden_fraction = target_hidden_fraction; internal_iter->trace = trace; db_iter.reset(NewDBIterator( env_, ropt, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, sequence, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); } // Do a random operation. It's important to do it on ref_it // later than on db_iter to make sure ref_it sees the correct // recently_touched_keys. std::string old_key; bool forward = rnd.Next() % 2 > 0; // Do Next()/Prev() ~90% of the time. bool seek = !ref_iter->Valid() || rnd.Next() % 10 == 0; if (trace) { std::cout << iteration << ": "; } if (!seek) { assert(db_iter->Valid()); old_key = ref_iter->key; if (trace) { std::cout << (forward ? "Next" : "Prev") << std::endl; } if (forward) { db_iter->Next(); ref_iter->Next(); } else { db_iter->Prev(); ref_iter->Prev(); } } else { data.recently_touched_keys.clear(); // Do SeekToFirst less often than Seek. if (rnd.Next() % 4 == 0) { if (trace) { std::cout << (forward ? "SeekToFirst" : "SeekToLast") << std::endl; } if (forward) { old_key = ""; db_iter->SeekToFirst(); ref_iter->SeekToFirst(); } else { old_key = data.entries.back().key; db_iter->SeekToLast(); ref_iter->SeekToLast(); } } else { old_key = gen_key(max_key); if (trace) { std::cout << (forward ? "Seek" : "SeekForPrev") << " \"" << old_key << '"' << std::endl; } if (forward) { db_iter->Seek(old_key); ref_iter->Seek(old_key); } else { db_iter->SeekForPrev(old_key); ref_iter->SeekForPrev(old_key); } } } // Check the result. if (db_iter->Valid()) { ASSERT_TRUE(db_iter->status().ok()); if (data.recently_touched_keys.count( db_iter->key().ToString())) { // Ended on a key that may have been mutated during the // operation. Reference iterator skips such keys, so we // can't check the exact result. // Check that the key moved in the right direction. if (forward) { if (seek) ASSERT_GE(db_iter->key().ToString(), old_key); else ASSERT_GT(db_iter->key().ToString(), old_key); } else { if (seek) ASSERT_LE(db_iter->key().ToString(), old_key); else ASSERT_LT(db_iter->key().ToString(), old_key); } if (ref_iter->Valid()) { // Check that DBIter didn't miss any non-mutated key. if (forward) { ASSERT_LT(db_iter->key().ToString(), ref_iter->key); } else { ASSERT_GT(db_iter->key().ToString(), ref_iter->key); } } // Tell the next iteration of the loop to reseek the // iterators. ref_iter->valid = false; ++num_recently_removed; } else { ASSERT_TRUE(ref_iter->Valid()); ASSERT_EQ(ref_iter->key, db_iter->key().ToString()); ASSERT_EQ(ref_iter->value, db_iter->value()); ++num_matching; } } else if (db_iter->status().ok()) { ASSERT_FALSE(ref_iter->Valid()); ++num_at_end; } else { // Non-ok status. Nothing to check here. // Tell the next iteration of the loop to reseek the // iterators. ref_iter->valid = false; ++num_not_ok; } } } } } } } } // Check that all cases were hit many times. EXPECT_GT(num_matching, 10000); EXPECT_GT(num_at_end, 10000); EXPECT_GT(num_not_ok, 10000); EXPECT_GT(num_recently_removed, 10000); std::cout << "stats:\n exact matches: " << num_matching << "\n end reached: " << num_at_end << "\n non-ok status: " << num_not_ok << "\n mutated on the fly: " << num_recently_removed << std::endl; } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); ParseCommandLineFlags(&argc, &argv, true); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_iter_test.cc000066400000000000000000003270341370372246700166210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include "db/db_iter.h" #include "db/dbformat.h" #include "rocksdb/comparator.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "table/iterator_wrapper.h" #include "table/merging_iterator.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { static uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) { return options.statistics->getTickerCount(ticker_type); } class TestIterator : public InternalIterator { public: explicit TestIterator(const Comparator* comparator) : initialized_(false), valid_(false), sequence_number_(0), iter_(0), cmp(comparator) { data_.reserve(16); } void AddPut(std::string argkey, std::string argvalue) { Add(argkey, kTypeValue, argvalue); } void AddDeletion(std::string argkey) { Add(argkey, kTypeDeletion, std::string()); } void AddSingleDeletion(std::string argkey) { Add(argkey, kTypeSingleDeletion, std::string()); } void AddMerge(std::string argkey, std::string argvalue) { Add(argkey, kTypeMerge, argvalue); } void Add(std::string argkey, ValueType type, std::string argvalue) { Add(argkey, type, argvalue, sequence_number_++); } void Add(std::string argkey, ValueType type, std::string argvalue, size_t seq_num, bool update_iter = false) { valid_ = true; ParsedInternalKey internal_key(argkey, seq_num, type); data_.push_back( std::pair(std::string(), argvalue)); AppendInternalKey(&data_.back().first, internal_key); if (update_iter && valid_ && cmp.Compare(data_.back().first, key()) < 0) { // insert a key smaller than current key Finish(); // data_[iter_] is not anymore the current element of the iterator. // Increment it to reposition it to the right position. iter_++; } } // should be called before operations with iterator void Finish() { initialized_ = true; std::sort(data_.begin(), data_.end(), [this](std::pair a, std::pair b) { return (cmp.Compare(a.first, b.first) < 0); }); } // Removes the key from the set of keys over which this iterator iterates. // Not to be confused with AddDeletion(). // If the iterator is currently positioned on this key, the deletion will // apply next time the iterator moves. // Used for simulating ForwardIterator updating to a new version that doesn't // have some of the keys (e.g. after compaction with a filter). void Vanish(std::string _key) { if (valid_ && data_[iter_].first == _key) { delete_current_ = true; return; } for (auto it = data_.begin(); it != data_.end(); ++it) { ParsedInternalKey ikey; bool ok __attribute__((__unused__)) = ParseInternalKey(it->first, &ikey); assert(ok); if (ikey.user_key != _key) { continue; } if (valid_ && data_.begin() + iter_ > it) { --iter_; } data_.erase(it); return; } assert(false); } // Number of operations done on this iterator since construction. size_t steps() const { return steps_; } bool Valid() const override { assert(initialized_); return valid_; } void SeekToFirst() override { assert(initialized_); ++steps_; DeleteCurrentIfNeeded(); valid_ = (data_.size() > 0); iter_ = 0; } void SeekToLast() override { assert(initialized_); ++steps_; DeleteCurrentIfNeeded(); valid_ = (data_.size() > 0); iter_ = data_.size() - 1; } void Seek(const Slice& target) override { assert(initialized_); SeekToFirst(); ++steps_; if (!valid_) { return; } while (iter_ < data_.size() && (cmp.Compare(data_[iter_].first, target) < 0)) { ++iter_; } if (iter_ == data_.size()) { valid_ = false; } } void SeekForPrev(const Slice& target) override { assert(initialized_); DeleteCurrentIfNeeded(); SeekForPrevImpl(target, &cmp); } void Next() override { assert(initialized_); assert(valid_); assert(iter_ < data_.size()); ++steps_; if (delete_current_) { DeleteCurrentIfNeeded(); } else { ++iter_; } valid_ = iter_ < data_.size(); } void Prev() override { assert(initialized_); assert(valid_); assert(iter_ < data_.size()); ++steps_; DeleteCurrentIfNeeded(); if (iter_ == 0) { valid_ = false; } else { --iter_; } } Slice key() const override { assert(initialized_); return data_[iter_].first; } Slice value() const override { assert(initialized_); return data_[iter_].second; } Status status() const override { assert(initialized_); return Status::OK(); } bool IsKeyPinned() const override { return true; } bool IsValuePinned() const override { return true; } private: bool initialized_; bool valid_; size_t sequence_number_; size_t iter_; size_t steps_ = 0; InternalKeyComparator cmp; std::vector> data_; bool delete_current_ = false; void DeleteCurrentIfNeeded() { if (!delete_current_) { return; } data_.erase(data_.begin() + iter_); delete_current_ = false; } }; class DBIteratorTest : public testing::Test { public: Env* env_; DBIteratorTest() : env_(Env::Default()) {} }; TEST_F(DBIteratorTest, DBIteratorPrevNext) { Options options; ImmutableCFOptions cf_options = ImmutableCFOptions(options); MutableCFOptions mutable_cf_options = MutableCFOptions(options); { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddDeletion("a"); internal_iter->AddDeletion("a"); internal_iter->AddDeletion("a"); internal_iter->AddDeletion("a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); ReadOptions ro; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val_b"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val_b"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); } // Test to check the SeekToLast() with iterate_upper_bound not set { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); ReadOptions ro; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); } // Test to check the SeekToLast() with iterate_upper_bound set { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("d", "val_d"); internal_iter->AddPut("e", "val_e"); internal_iter->AddPut("f", "val_f"); internal_iter->Finish(); Slice prefix("d"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); } // Test to check the SeekToLast() iterate_upper_bound set to a key that // is not Put yet { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("d", "val_d"); internal_iter->Finish(); Slice prefix("z"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); } // Test to check the SeekToLast() with iterate_upper_bound set to the // first key { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); Slice prefix("a"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); } // Test case to check SeekToLast with iterate_upper_bound set // (same key put may times - SeekToLast should start with the // maximum sequence id of the upper bound) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); Slice prefix("c"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 7, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); SetPerfLevel(kEnableCount); ASSERT_TRUE(GetPerfLevel() == kEnableCount); get_perf_context()->Reset(); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(static_cast(get_perf_context()->internal_key_skipped_count), 1); ASSERT_EQ(db_iter->key().ToString(), "b"); SetPerfLevel(kDisable); } // Test to check the SeekToLast() with the iterate_upper_bound set // (Checking the value of the key which has sequence ids greater than // and less that the iterator's sequence id) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a1"); internal_iter->AddPut("a", "val_a2"); internal_iter->AddPut("b", "val_b1"); internal_iter->AddPut("c", "val_c1"); internal_iter->AddPut("c", "val_c2"); internal_iter->AddPut("c", "val_c3"); internal_iter->AddPut("b", "val_b2"); internal_iter->AddPut("d", "val_d1"); internal_iter->Finish(); Slice prefix("c"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 4, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val_b1"); } // Test to check the SeekToLast() with the iterate_upper_bound set to the // key that is deleted { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddDeletion("a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); Slice prefix("a"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); } // Test to check the SeekToLast() with the iterate_upper_bound set // (Deletion cases) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddDeletion("b"); internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); Slice prefix("c"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); } // Test to check the SeekToLast() with iterate_upper_bound set // (Deletion cases - Lot of internal keys after the upper_bound // is deleted) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddDeletion("c"); internal_iter->AddDeletion("d"); internal_iter->AddDeletion("e"); internal_iter->AddDeletion("f"); internal_iter->AddDeletion("g"); internal_iter->AddDeletion("h"); internal_iter->Finish(); Slice prefix("c"); ReadOptions ro; ro.iterate_upper_bound = &prefix; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 7, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); SetPerfLevel(kEnableCount); ASSERT_TRUE(GetPerfLevel() == kEnableCount); get_perf_context()->Reset(); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(static_cast(get_perf_context()->internal_delete_skipped_count), 0); ASSERT_EQ(db_iter->key().ToString(), "b"); SetPerfLevel(kDisable); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddDeletion("a"); internal_iter->AddDeletion("a"); internal_iter->AddDeletion("a"); internal_iter->AddDeletion("a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); ReadOptions ro; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val_b"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); ReadOptions ro; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val_b"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val_b"); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); ReadOptions ro; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val_b"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); } } TEST_F(DBIteratorTest, DBIteratorEmpty) { Options options; ImmutableCFOptions cf_options = ImmutableCFOptions(options); MutableCFOptions mutable_cf_options = MutableCFOptions(options); ReadOptions ro; { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 0, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 0, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(!db_iter->Valid()); } } TEST_F(DBIteratorTest, DBIteratorUseSkipCountSkips) { ReadOptions ro; Options options; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (size_t i = 0; i < 200; ++i) { internal_iter->AddPut("a", "a"); internal_iter->AddPut("b", "b"); internal_iter->AddPut("c", "c"); } internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "c"); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1u); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "b"); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2u); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "a"); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3u); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3u); } TEST_F(DBIteratorTest, DBIteratorUseSkip) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); ImmutableCFOptions cf_options = ImmutableCFOptions(options); MutableCFOptions mutable_cf_options = MutableCFOptions(options); { for (size_t i = 0; i < 200; ++i) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("b", "merge_1"); internal_iter->AddMerge("a", "merge_2"); for (size_t k = 0; k < 200; ++k) { internal_iter->AddPut("c", ToString(k)); } internal_iter->Finish(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, i + 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), ToString(i)); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } } { for (size_t i = 0; i < 200; ++i) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("b", "merge_1"); internal_iter->AddMerge("a", "merge_2"); for (size_t k = 0; k < 200; ++k) { internal_iter->AddDeletion("c"); } internal_iter->AddPut("c", "200"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, i + 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("b", "merge_1"); internal_iter->AddMerge("a", "merge_2"); for (size_t i = 0; i < 200; ++i) { internal_iter->AddDeletion("c"); } internal_iter->AddPut("c", "200"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 202, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "200"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } } { for (size_t i = 0; i < 200; ++i) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (size_t k = 0; k < 200; ++k) { internal_iter->AddDeletion("c"); } internal_iter->AddPut("c", "200"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, i, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); db_iter->SeekToFirst(); ASSERT_TRUE(!db_iter->Valid()); } TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (size_t i = 0; i < 200; ++i) { internal_iter->AddDeletion("c"); } internal_iter->AddPut("c", "200"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 200, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "200"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "200"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); } { for (size_t i = 0; i < 200; ++i) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("b", "merge_1"); internal_iter->AddMerge("a", "merge_2"); for (size_t k = 0; k < 200; ++k) { internal_iter->AddPut("d", ToString(k)); } for (size_t k = 0; k < 200; ++k) { internal_iter->AddPut("c", ToString(k)); } internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, i + 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), ToString(i)); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } } { for (size_t i = 0; i < 200; ++i) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("b", "b"); internal_iter->AddMerge("a", "a"); for (size_t k = 0; k < 200; ++k) { internal_iter->AddMerge("c", ToString(k)); } internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, i + 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); std::string merge_result = "0"; for (size_t j = 1; j <= i; ++j) { merge_result += "," + ToString(j); } ASSERT_EQ(db_iter->value().ToString(), merge_result); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "b"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "a"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } } } TEST_F(DBIteratorTest, DBIteratorSkipInternalKeys) { Options options; ImmutableCFOptions cf_options = ImmutableCFOptions(options); MutableCFOptions mutable_cf_options = MutableCFOptions(options); ReadOptions ro; // Basic test case ... Make sure explicityly passing the default value works. // Skipping internal keys is disabled by default, when the value is 0. { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("b"); internal_iter->AddPut("c", "val_c"); internal_iter->AddPut("c", "val_c"); internal_iter->AddDeletion("c"); internal_iter->AddPut("d", "val_d"); internal_iter->Finish(); ro.max_skippable_internal_keys = 0; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), "val_d"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().ok()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), "val_d"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().ok()); } // Test to make sure that the request will *not* fail as incomplete if // num_internal_keys_skipped is *equal* to max_skippable_internal_keys // threshold. (It will fail as incomplete only when the threshold is // exceeded.) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("b"); internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); ro.max_skippable_internal_keys = 2; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().ok()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Prev(); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().ok()); } // Fail the request as incomplete when num_internal_keys_skipped > // max_skippable_internal_keys { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("b"); internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); ro.max_skippable_internal_keys = 2; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } // Test that the num_internal_keys_skipped counter resets after a successful // read. { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("b"); internal_iter->AddPut("c", "val_c"); internal_iter->AddDeletion("d"); internal_iter->AddDeletion("d"); internal_iter->AddDeletion("d"); internal_iter->AddPut("e", "val_e"); internal_iter->Finish(); ro.max_skippable_internal_keys = 2; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Next(); // num_internal_keys_skipped counter resets here. ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } // Test that the num_internal_keys_skipped counter resets after a successful // read. // Reverse direction { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("b"); internal_iter->AddPut("c", "val_c"); internal_iter->AddDeletion("d"); internal_iter->AddDeletion("d"); internal_iter->AddPut("e", "val_e"); internal_iter->Finish(); ro.max_skippable_internal_keys = 2; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "e"); ASSERT_EQ(db_iter->value().ToString(), "val_e"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Prev(); // num_internal_keys_skipped counter resets here. ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } // Test that skipping separate keys is handled { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("c"); internal_iter->AddDeletion("d"); internal_iter->AddPut("e", "val_e"); internal_iter->Finish(); ro.max_skippable_internal_keys = 2; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "e"); ASSERT_EQ(db_iter->value().ToString(), "val_e"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } // Test if alternating puts and deletes of the same key are handled correctly. { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddPut("b", "val_b"); internal_iter->AddDeletion("b"); internal_iter->AddPut("c", "val_c"); internal_iter->AddDeletion("c"); internal_iter->AddPut("d", "val_d"); internal_iter->AddDeletion("d"); internal_iter->AddPut("e", "val_e"); internal_iter->Finish(); ro.max_skippable_internal_keys = 2; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "e"); ASSERT_EQ(db_iter->value().ToString(), "val_e"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } // Test for large number of skippable internal keys with *default* // max_sequential_skip_in_iterations. { for (size_t i = 1; i <= 200; ++i) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); for (size_t j = 1; j <= i; ++j) { internal_iter->AddPut("b", "val_b"); internal_iter->AddDeletion("b"); } internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); ro.max_skippable_internal_keys = i; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 2 * i + 1, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); if ((options.max_sequential_skip_in_iterations + 1) >= ro.max_skippable_internal_keys) { ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } else { ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); } db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Prev(); if ((options.max_sequential_skip_in_iterations + 1) >= ro.max_skippable_internal_keys) { ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } else { ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); } } } // Test for large number of skippable internal keys with a *non-default* // max_sequential_skip_in_iterations. { for (size_t i = 1; i <= 200; ++i) { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); for (size_t j = 1; j <= i; ++j) { internal_iter->AddPut("b", "val_b"); internal_iter->AddDeletion("b"); } internal_iter->AddPut("c", "val_c"); internal_iter->Finish(); options.max_sequential_skip_in_iterations = 1000; ro.max_skippable_internal_keys = i; std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 2 * i + 1, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "val_a"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "val_c"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); ASSERT_TRUE(db_iter->status().IsIncomplete()); } } } TEST_F(DBIteratorTest, DBIterator1) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "0"); internal_iter->AddPut("b", "0"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("a", "1"); internal_iter->AddMerge("b", "2"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 1, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "0"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); db_iter->Next(); ASSERT_FALSE(db_iter->Valid()); } TEST_F(DBIteratorTest, DBIterator2) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "0"); internal_iter->AddPut("b", "0"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("a", "1"); internal_iter->AddMerge("b", "2"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 0, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "0"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); } TEST_F(DBIteratorTest, DBIterator3) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "0"); internal_iter->AddPut("b", "0"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("a", "1"); internal_iter->AddMerge("b", "2"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "0"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); } TEST_F(DBIteratorTest, DBIterator4) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "0"); internal_iter->AddPut("b", "0"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("a", "1"); internal_iter->AddMerge("b", "2"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 4, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "0,1"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "2"); db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); } TEST_F(DBIteratorTest, DBIterator5) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); ImmutableCFOptions cf_options = ImmutableCFOptions(options); MutableCFOptions mutable_cf_options = MutableCFOptions(options); { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddPut("a", "put_1"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 0, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddPut("a", "put_1"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 1, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddPut("a", "put_1"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2,merge_3"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddPut("a", "put_1"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 3, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "put_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddPut("a", "put_1"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 4, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddPut("a", "put_1"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 5, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddPut("a", "put_1"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 6, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4,merge_5,merge_6"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { // put, singledelete, merge TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "val_a"); internal_iter->AddSingleDeletion("a"); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddPut("b", "val_b"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->Seek("b"); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); } } TEST_F(DBIteratorTest, DBIterator6) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); ImmutableCFOptions cf_options = ImmutableCFOptions(options); MutableCFOptions mutable_cf_options = MutableCFOptions(options); { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddDeletion("a"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 0, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddDeletion("a"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 1, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddDeletion("a"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2,merge_3"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddDeletion("a"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 3, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddDeletion("a"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 4, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_4"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddDeletion("a"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 5, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("a", "merge_3"); internal_iter->AddDeletion("a"); internal_iter->AddMerge("a", "merge_4"); internal_iter->AddMerge("a", "merge_5"); internal_iter->AddMerge("a", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 6, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5,merge_6"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } } TEST_F(DBIteratorTest, DBIterator7) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); ImmutableCFOptions cf_options = ImmutableCFOptions(options); MutableCFOptions mutable_cf_options = MutableCFOptions(options); { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 0, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 2, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "val,merge_2"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 4, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 5, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "merge_4"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 6, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 7, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 9, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_6,merge_7"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 13, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_6,merge_7,merge_8,merge_9,merge_10,merge_11"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddPut("b", "val"); internal_iter->AddMerge("b", "merge_2"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("c", "merge_4"); internal_iter->AddMerge("c", "merge_5"); internal_iter->AddDeletion("b"); internal_iter->AddMerge("b", "merge_6"); internal_iter->AddMerge("b", "merge_7"); internal_iter->AddMerge("b", "merge_8"); internal_iter->AddMerge("b", "merge_9"); internal_iter->AddMerge("b", "merge_10"); internal_iter->AddMerge("b", "merge_11"); internal_iter->AddDeletion("c"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, cf_options, mutable_cf_options, BytewiseComparator(), internal_iter, 14, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_6,merge_7,merge_8,merge_9,merge_10,merge_11"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); } } TEST_F(DBIteratorTest, DBIterator8) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddDeletion("a"); internal_iter->AddPut("a", "0"); internal_iter->AddPut("b", "0"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "0"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "0"); } // TODO(3.13): fix the issue of Seek() then Prev() which might not necessary // return the biggest element smaller than the seek key. TEST_F(DBIteratorTest, DBIterator9) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); { TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddMerge("a", "merge_1"); internal_iter->AddMerge("a", "merge_2"); internal_iter->AddMerge("b", "merge_3"); internal_iter->AddMerge("b", "merge_4"); internal_iter->AddMerge("d", "merge_5"); internal_iter->AddMerge("d", "merge_6"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); db_iter->Seek("b"); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); db_iter->SeekForPrev("b"); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); db_iter->Seek("c"); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); db_iter->SeekForPrev("c"); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "merge_3,merge_4"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), "merge_5,merge_6"); } } // TODO(3.13): fix the issue of Seek() then Prev() which might not necessary // return the biggest element smaller than the seek key. TEST_F(DBIteratorTest, DBIterator10) { ReadOptions ro; Options options; TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "1"); internal_iter->AddPut("b", "2"); internal_iter->AddPut("c", "3"); internal_iter->AddPut("d", "4"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->Seek("c"); ASSERT_TRUE(db_iter->Valid()); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "2"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "3"); db_iter->SeekForPrev("c"); ASSERT_TRUE(db_iter->Valid()); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "d"); ASSERT_EQ(db_iter->value().ToString(), "4"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "3"); } TEST_F(DBIteratorTest, SeekToLastOccurrenceSeq0) { ReadOptions ro; Options options; options.merge_operator = nullptr; TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "1"); internal_iter->AddPut("b", "2"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10, 0 /* force seek */, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "1"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "2"); db_iter->Next(); ASSERT_FALSE(db_iter->Valid()); } TEST_F(DBIteratorTest, DBIterator11) { ReadOptions ro; Options options; options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "0"); internal_iter->AddPut("b", "0"); internal_iter->AddSingleDeletion("b"); internal_iter->AddMerge("a", "1"); internal_iter->AddMerge("b", "2"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 1, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekToFirst(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "0"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); db_iter->Next(); ASSERT_FALSE(db_iter->Valid()); } TEST_F(DBIteratorTest, DBIterator12) { ReadOptions ro; Options options; options.merge_operator = nullptr; TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "1"); internal_iter->AddPut("b", "2"); internal_iter->AddPut("c", "3"); internal_iter->AddSingleDeletion("b"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10, 0, nullptr /*read_callback*/)); db_iter->SeekToLast(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "c"); ASSERT_EQ(db_iter->value().ToString(), "3"); db_iter->Prev(); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "1"); db_iter->Prev(); ASSERT_FALSE(db_iter->Valid()); } TEST_F(DBIteratorTest, DBIterator13) { ReadOptions ro; Options options; options.merge_operator = nullptr; std::string key; key.resize(9); key.assign(9, static_cast(0)); key[0] = 'b'; TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut(key, "0"); internal_iter->AddPut(key, "1"); internal_iter->AddPut(key, "2"); internal_iter->AddPut(key, "3"); internal_iter->AddPut(key, "4"); internal_iter->AddPut(key, "5"); internal_iter->AddPut(key, "6"); internal_iter->AddPut(key, "7"); internal_iter->AddPut(key, "8"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 2, 3, nullptr /*read_callback*/)); db_iter->Seek("b"); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), key); ASSERT_EQ(db_iter->value().ToString(), "2"); } TEST_F(DBIteratorTest, DBIterator14) { ReadOptions ro; Options options; options.merge_operator = nullptr; std::string key("b"); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("b", "0"); internal_iter->AddPut("b", "1"); internal_iter->AddPut("b", "2"); internal_iter->AddPut("b", "3"); internal_iter->AddPut("a", "4"); internal_iter->AddPut("a", "5"); internal_iter->AddPut("a", "6"); internal_iter->AddPut("c", "7"); internal_iter->AddPut("c", "8"); internal_iter->AddPut("c", "9"); internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 4, 1, nullptr /*read_callback*/)); db_iter->Seek("b"); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(db_iter->key().ToString(), "b"); ASSERT_EQ(db_iter->value().ToString(), "3"); db_iter->SeekToFirst(); ASSERT_EQ(db_iter->key().ToString(), "a"); ASSERT_EQ(db_iter->value().ToString(), "4"); } TEST_F(DBIteratorTest, DBIteratorTestDifferentialSnapshots) { { // test that KVs earlier that iter_start_seqnum are filtered out ReadOptions ro; ro.iter_start_seqnum=5; Options options; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (size_t i = 0; i < 10; ++i) { internal_iter->AddPut(std::to_string(i), std::to_string(i) + "a"); internal_iter->AddPut(std::to_string(i), std::to_string(i) + "b"); internal_iter->AddPut(std::to_string(i), std::to_string(i) + "c"); } internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 13, options.max_sequential_skip_in_iterations, nullptr)); // Expecting InternalKeys in [5,8] range with correct type int seqnums[4] = {5,8,11,13}; std::string user_keys[4] = {"1","2","3","4"}; std::string values[4] = {"1c", "2c", "3c", "4b"}; int i = 0; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { FullKey fkey; ParseFullKey(db_iter->key(), &fkey); ASSERT_EQ(user_keys[i], fkey.user_key.ToString()); ASSERT_EQ(EntryType::kEntryPut, fkey.type); ASSERT_EQ(seqnums[i], fkey.sequence); ASSERT_EQ(values[i], db_iter->value().ToString()); i++; } ASSERT_EQ(i, 4); } { // Test that deletes are returned correctly as internal KVs ReadOptions ro; ro.iter_start_seqnum=5; Options options; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (size_t i = 0; i < 10; ++i) { internal_iter->AddPut(std::to_string(i), std::to_string(i) + "a"); internal_iter->AddPut(std::to_string(i), std::to_string(i) + "b"); internal_iter->AddDeletion(std::to_string(i)); } internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 13, options.max_sequential_skip_in_iterations, nullptr)); // Expecting InternalKeys in [5,8] range with correct type int seqnums[4] = {5,8,11,13}; EntryType key_types[4] = {EntryType::kEntryDelete,EntryType::kEntryDelete, EntryType::kEntryDelete,EntryType::kEntryPut}; std::string user_keys[4] = {"1","2","3","4"}; std::string values[4] = {"", "", "", "4b"}; int i = 0; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { FullKey fkey; ParseFullKey(db_iter->key(), &fkey); ASSERT_EQ(user_keys[i], fkey.user_key.ToString()); ASSERT_EQ(key_types[i], fkey.type); ASSERT_EQ(seqnums[i], fkey.sequence); ASSERT_EQ(values[i], db_iter->value().ToString()); i++; } ASSERT_EQ(i, 4); } } class DBIterWithMergeIterTest : public testing::Test { public: DBIterWithMergeIterTest() : env_(Env::Default()), icomp_(BytewiseComparator()) { options_.merge_operator = nullptr; internal_iter1_ = new TestIterator(BytewiseComparator()); internal_iter1_->Add("a", kTypeValue, "1", 3u); internal_iter1_->Add("f", kTypeValue, "2", 5u); internal_iter1_->Add("g", kTypeValue, "3", 7u); internal_iter1_->Finish(); internal_iter2_ = new TestIterator(BytewiseComparator()); internal_iter2_->Add("a", kTypeValue, "4", 6u); internal_iter2_->Add("b", kTypeValue, "5", 1u); internal_iter2_->Add("c", kTypeValue, "6", 2u); internal_iter2_->Add("d", kTypeValue, "7", 3u); internal_iter2_->Finish(); std::vector child_iters; child_iters.push_back(internal_iter1_); child_iters.push_back(internal_iter2_); InternalKeyComparator icomp(BytewiseComparator()); InternalIterator* merge_iter = NewMergingIterator(&icomp_, &child_iters[0], 2u); db_iter_.reset(NewDBIterator( env_, ro_, ImmutableCFOptions(options_), MutableCFOptions(options_), BytewiseComparator(), merge_iter, 8 /* read data earlier than seqId 8 */, 3 /* max iterators before reseek */, nullptr /*read_callback*/)); } Env* env_; ReadOptions ro_; Options options_; TestIterator* internal_iter1_; TestIterator* internal_iter2_; InternalKeyComparator icomp_; Iterator* merge_iter_; std::unique_ptr db_iter_; }; TEST_F(DBIterWithMergeIterTest, InnerMergeIterator1) { db_iter_->SeekToFirst(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); db_iter_->Next(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Next(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Next(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Next(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); db_iter_->Next(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "g"); ASSERT_EQ(db_iter_->value().ToString(), "3"); db_iter_->Next(); ASSERT_FALSE(db_iter_->Valid()); } TEST_F(DBIterWithMergeIterTest, InnerMergeIterator2) { // Test Prev() when one child iterator is at its end. db_iter_->SeekForPrev("g"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "g"); ASSERT_EQ(db_iter_->value().ToString(), "3"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace1) { // Test Prev() when one child iterator is at its end but more rows // are added. db_iter_->Seek("f"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); // Test call back inserts a key in the end of the mem table after // MergeIterator::Prev() realized the mem table iterator is at its end // and before an SeekToLast() is called. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* /*arg*/) { internal_iter2_->Add("z", kTypeValue, "7", 12u); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace2) { // Test Prev() when one child iterator is at its end but more rows // are added. db_iter_->Seek("f"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); // Test call back inserts entries for update a key in the end of the // mem table after MergeIterator::Prev() realized the mem tableiterator is at // its end and before an SeekToLast() is called. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* /*arg*/) { internal_iter2_->Add("z", kTypeValue, "7", 12u); internal_iter2_->Add("z", kTypeValue, "7", 11u); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace3) { // Test Prev() when one child iterator is at its end but more rows // are added and max_skipped is triggered. db_iter_->Seek("f"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); // Test call back inserts entries for update a key in the end of the // mem table after MergeIterator::Prev() realized the mem table iterator is at // its end and before an SeekToLast() is called. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* /*arg*/) { internal_iter2_->Add("z", kTypeValue, "7", 16u, true); internal_iter2_->Add("z", kTypeValue, "7", 15u, true); internal_iter2_->Add("z", kTypeValue, "7", 14u, true); internal_iter2_->Add("z", kTypeValue, "7", 13u, true); internal_iter2_->Add("z", kTypeValue, "7", 12u, true); internal_iter2_->Add("z", kTypeValue, "7", 11u, true); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace4) { // Test Prev() when one child iterator has more rows inserted // between Seek() and Prev() when changing directions. internal_iter2_->Add("z", kTypeValue, "9", 4u); db_iter_->Seek("g"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "g"); ASSERT_EQ(db_iter_->value().ToString(), "3"); // Test call back inserts entries for update a key before "z" in // mem table after MergeIterator::Prev() calls mem table iterator's // Seek() and before calling Prev() ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* arg) { IteratorWrapper* it = reinterpret_cast(arg); if (it->key().starts_with("z")) { internal_iter2_->Add("x", kTypeValue, "7", 16u, true); internal_iter2_->Add("x", kTypeValue, "7", 15u, true); internal_iter2_->Add("x", kTypeValue, "7", 14u, true); internal_iter2_->Add("x", kTypeValue, "7", 13u, true); internal_iter2_->Add("x", kTypeValue, "7", 12u, true); internal_iter2_->Add("x", kTypeValue, "7", 11u, true); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace5) { internal_iter2_->Add("z", kTypeValue, "9", 4u); // Test Prev() when one child iterator has more rows inserted // between Seek() and Prev() when changing directions. db_iter_->Seek("g"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "g"); ASSERT_EQ(db_iter_->value().ToString(), "3"); // Test call back inserts entries for update a key before "z" in // mem table after MergeIterator::Prev() calls mem table iterator's // Seek() and before calling Prev() ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* arg) { IteratorWrapper* it = reinterpret_cast(arg); if (it->key().starts_with("z")) { internal_iter2_->Add("x", kTypeValue, "7", 16u, true); internal_iter2_->Add("x", kTypeValue, "7", 15u, true); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace6) { internal_iter2_->Add("z", kTypeValue, "9", 4u); // Test Prev() when one child iterator has more rows inserted // between Seek() and Prev() when changing directions. db_iter_->Seek("g"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "g"); ASSERT_EQ(db_iter_->value().ToString(), "3"); // Test call back inserts an entry for update a key before "z" in // mem table after MergeIterator::Prev() calls mem table iterator's // Seek() and before calling Prev() ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* arg) { IteratorWrapper* it = reinterpret_cast(arg); if (it->key().starts_with("z")) { internal_iter2_->Add("x", kTypeValue, "7", 16u, true); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace7) { internal_iter1_->Add("u", kTypeValue, "10", 4u); internal_iter1_->Add("v", kTypeValue, "11", 4u); internal_iter1_->Add("w", kTypeValue, "12", 4u); internal_iter2_->Add("z", kTypeValue, "9", 4u); // Test Prev() when one child iterator has more rows inserted // between Seek() and Prev() when changing directions. db_iter_->Seek("g"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "g"); ASSERT_EQ(db_iter_->value().ToString(), "3"); // Test call back inserts entries for update a key before "z" in // mem table after MergeIterator::Prev() calls mem table iterator's // Seek() and before calling Prev() ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* arg) { IteratorWrapper* it = reinterpret_cast(arg); if (it->key().starts_with("z")) { internal_iter2_->Add("x", kTypeValue, "7", 16u, true); internal_iter2_->Add("x", kTypeValue, "7", 15u, true); internal_iter2_->Add("x", kTypeValue, "7", 14u, true); internal_iter2_->Add("x", kTypeValue, "7", 13u, true); internal_iter2_->Add("x", kTypeValue, "7", 12u, true); internal_iter2_->Add("x", kTypeValue, "7", 11u, true); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "c"); ASSERT_EQ(db_iter_->value().ToString(), "6"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "b"); ASSERT_EQ(db_iter_->value().ToString(), "5"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "a"); ASSERT_EQ(db_iter_->value().ToString(), "4"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIterWithMergeIterTest, InnerMergeIteratorDataRace8) { // internal_iter1_: a, f, g // internal_iter2_: a, b, c, d, adding (z) internal_iter2_->Add("z", kTypeValue, "9", 4u); // Test Prev() when one child iterator has more rows inserted // between Seek() and Prev() when changing directions. db_iter_->Seek("g"); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "g"); ASSERT_EQ(db_iter_->value().ToString(), "3"); // Test call back inserts two keys before "z" in mem table after // MergeIterator::Prev() calls mem table iterator's Seek() and // before calling Prev() ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "MergeIterator::Prev:BeforePrev", [&](void* arg) { IteratorWrapper* it = reinterpret_cast(arg); if (it->key().starts_with("z")) { internal_iter2_->Add("x", kTypeValue, "7", 16u, true); internal_iter2_->Add("y", kTypeValue, "7", 17u, true); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "f"); ASSERT_EQ(db_iter_->value().ToString(), "2"); db_iter_->Prev(); ASSERT_TRUE(db_iter_->Valid()); ASSERT_EQ(db_iter_->key().ToString(), "d"); ASSERT_EQ(db_iter_->value().ToString(), "7"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBIteratorTest, SeekPrefixTombstones) { ReadOptions ro; Options options; options.prefix_extractor.reset(NewNoopTransform()); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddDeletion("b"); internal_iter->AddDeletion("c"); internal_iter->AddDeletion("d"); internal_iter->AddDeletion("e"); internal_iter->AddDeletion("f"); internal_iter->AddDeletion("g"); internal_iter->Finish(); ro.prefix_same_as_start = true; std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); int skipped_keys = 0; get_perf_context()->Reset(); db_iter->SeekForPrev("z"); skipped_keys = static_cast(get_perf_context()->internal_key_skipped_count); ASSERT_EQ(skipped_keys, 0); get_perf_context()->Reset(); db_iter->Seek("a"); skipped_keys = static_cast(get_perf_context()->internal_key_skipped_count); ASSERT_EQ(skipped_keys, 0); } TEST_F(DBIteratorTest, SeekToFirstLowerBound) { const int kNumKeys = 3; for (int i = 0; i < kNumKeys + 2; ++i) { // + 2 for two special cases: lower bound before and lower bound after the // internal iterator's keys TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (int j = 1; j <= kNumKeys; ++j) { internal_iter->AddPut(std::to_string(j), "val"); } internal_iter->Finish(); ReadOptions ro; auto lower_bound_str = std::to_string(i); Slice lower_bound(lower_bound_str); ro.iterate_lower_bound = &lower_bound; Options options; std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10 /* sequence */, options.max_sequential_skip_in_iterations, nullptr /* read_callback */)); db_iter->SeekToFirst(); if (i == kNumKeys + 1) { // lower bound was beyond the last key ASSERT_FALSE(db_iter->Valid()); } else { ASSERT_TRUE(db_iter->Valid()); int expected; if (i == 0) { // lower bound was before the first key expected = 1; } else { // lower bound was at the ith key expected = i; } ASSERT_EQ(std::to_string(expected), db_iter->key().ToString()); } } } TEST_F(DBIteratorTest, PrevLowerBound) { const int kNumKeys = 3; const int kLowerBound = 2; TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (int j = 1; j <= kNumKeys; ++j) { internal_iter->AddPut(std::to_string(j), "val"); } internal_iter->Finish(); ReadOptions ro; auto lower_bound_str = std::to_string(kLowerBound); Slice lower_bound(lower_bound_str); ro.iterate_lower_bound = &lower_bound; Options options; std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10 /* sequence */, options.max_sequential_skip_in_iterations, nullptr /* read_callback */)); db_iter->SeekToLast(); for (int i = kNumKeys; i >= kLowerBound; --i) { ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(std::to_string(i), db_iter->key().ToString()); db_iter->Prev(); } ASSERT_FALSE(db_iter->Valid()); } TEST_F(DBIteratorTest, SeekLessLowerBound) { const int kNumKeys = 3; const int kLowerBound = 2; TestIterator* internal_iter = new TestIterator(BytewiseComparator()); for (int j = 1; j <= kNumKeys; ++j) { internal_iter->AddPut(std::to_string(j), "val"); } internal_iter->Finish(); ReadOptions ro; auto lower_bound_str = std::to_string(kLowerBound); Slice lower_bound(lower_bound_str); ro.iterate_lower_bound = &lower_bound; Options options; std::unique_ptr db_iter(NewDBIterator( env_, ro, ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10 /* sequence */, options.max_sequential_skip_in_iterations, nullptr /* read_callback */)); auto before_lower_bound_str = std::to_string(kLowerBound - 1); Slice before_lower_bound(lower_bound_str); db_iter->Seek(before_lower_bound); ASSERT_TRUE(db_iter->Valid()); ASSERT_EQ(lower_bound_str, db_iter->key().ToString()); } TEST_F(DBIteratorTest, ReverseToForwardWithDisappearingKeys) { Options options; options.prefix_extractor.reset(NewCappedPrefixTransform(0)); TestIterator* internal_iter = new TestIterator(BytewiseComparator()); internal_iter->AddPut("a", "A"); internal_iter->AddPut("b", "B"); for (int i = 0; i < 100; ++i) { internal_iter->AddPut("c" + ToString(i), ""); } internal_iter->Finish(); std::unique_ptr db_iter(NewDBIterator( env_, ReadOptions(), ImmutableCFOptions(options), MutableCFOptions(options), BytewiseComparator(), internal_iter, 10, options.max_sequential_skip_in_iterations, nullptr /*read_callback*/)); db_iter->SeekForPrev("a"); ASSERT_TRUE(db_iter->Valid()); ASSERT_OK(db_iter->status()); ASSERT_EQ("a", db_iter->key().ToString()); internal_iter->Vanish("a"); db_iter->Next(); ASSERT_TRUE(db_iter->Valid()); ASSERT_OK(db_iter->status()); ASSERT_EQ("b", db_iter->key().ToString()); // A (sort of) bug used to cause DBIter to pointlessly drag the internal // iterator all the way to the end. But this doesn't really matter at the time // of writing because the only iterator that can see disappearing keys is // ForwardIterator, which doesn't support SeekForPrev(). EXPECT_LT(internal_iter->steps(), 20); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_iterator_test.cc000066400000000000000000002653441370372246700175140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include "db/arena_wrapped_db_iter.h" #include "db/db_iter.h" #include "db/db_test_util.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/iostats_context.h" #include "rocksdb/perf_context.h" #include "table/block_based/flush_block_policy.h" namespace ROCKSDB_NAMESPACE { // A dumb ReadCallback which saying every key is committed. class DummyReadCallback : public ReadCallback { public: DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {} bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; } void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; } }; // Test param: // bool: whether to pass read_callback to NewIterator(). class DBIteratorTest : public DBTestBase, public testing::WithParamInterface { public: DBIteratorTest() : DBTestBase("/db_iterator_test") {} Iterator* NewIterator(const ReadOptions& read_options, ColumnFamilyHandle* column_family = nullptr) { if (column_family == nullptr) { column_family = db_->DefaultColumnFamily(); } auto* cfd = reinterpret_cast(column_family)->cfd(); SequenceNumber seq = read_options.snapshot != nullptr ? read_options.snapshot->GetSequenceNumber() : db_->GetLatestSequenceNumber(); bool use_read_callback = GetParam(); DummyReadCallback* read_callback = nullptr; if (use_read_callback) { read_callback = new DummyReadCallback(); read_callback->SetSnapshot(seq); InstrumentedMutexLock lock(&mutex_); read_callbacks_.push_back( std::unique_ptr(read_callback)); } return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback); } private: InstrumentedMutex mutex_; std::vector> read_callbacks_; }; TEST_P(DBIteratorTest, IteratorProperty) { // The test needs to be changed if kPersistedTier is supported in iterator. Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); Put(1, "1", "2"); Delete(1, "2"); ReadOptions ropt; ropt.pin_data = false; { std::unique_ptr iter(NewIterator(ropt, handles_[1])); iter->SeekToFirst(); std::string prop_value; ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value)); ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("0", prop_value); ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value)); ASSERT_EQ("1", prop_value); iter->Next(); ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("Iterator is not valid.", prop_value); // Get internal key at which the iteration stopped (tombstone in this case). ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value)); ASSERT_EQ("2", prop_value); } Close(); } TEST_P(DBIteratorTest, PersistedTierOnIterator) { // The test needs to be changed if kPersistedTier is supported in iterator. Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); ReadOptions ropt; ropt.read_tier = kPersistedTier; auto* iter = db_->NewIterator(ropt, handles_[1]); ASSERT_TRUE(iter->status().IsNotSupported()); delete iter; std::vector iters; ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported()); Close(); } TEST_P(DBIteratorTest, NonBlockingIteration) { do { ReadOptions non_blocking_opts, regular_opts; Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); non_blocking_opts.read_tier = kBlockCacheTier; CreateAndReopenWithCF({"pikachu"}, options); // write one kv to the database. ASSERT_OK(Put(1, "a", "b")); // scan using non-blocking iterator. We should find it because // it is in memtable. Iterator* iter = NewIterator(non_blocking_opts, handles_[1]); int count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); count++; } ASSERT_EQ(count, 1); delete iter; // flush memtable to storage. Now, the key should not be in the // memtable neither in the block cache. ASSERT_OK(Flush(1)); // verify that a non-blocking iterator does not find any // kvs. Neither does it do any IOs to storage. uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); iter = NewIterator(non_blocking_opts, handles_[1]); count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { count++; } ASSERT_EQ(count, 0); ASSERT_TRUE(iter->status().IsIncomplete()); ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); delete iter; // read in the specified block via a regular get ASSERT_EQ(Get(1, "a"), "b"); // verify that we can find it via a non-blocking scan numopen = TestGetTickerCount(options, NO_FILE_OPENS); cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); iter = NewIterator(non_blocking_opts, handles_[1]); count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); count++; } ASSERT_EQ(count, 1); ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); delete iter; // This test verifies block cache behaviors, which is not used by plain // table format. } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads)); } TEST_P(DBIteratorTest, IterSeekBeforePrev) { ASSERT_OK(Put("a", "b")); ASSERT_OK(Put("c", "d")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("0", "f")); ASSERT_OK(Put("1", "h")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("2", "j")); auto iter = NewIterator(ReadOptions()); iter->Seek(Slice("c")); iter->Prev(); iter->Seek(Slice("a")); iter->Prev(); delete iter; } TEST_P(DBIteratorTest, IterReseekNewUpperBound) { Random rnd(301); Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.block_size = 1024; table_options.block_size_deviation = 50; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.compression = kNoCompression; Reopen(options); ASSERT_OK(Put("a", RandomString(&rnd, 400))); ASSERT_OK(Put("aabb", RandomString(&rnd, 400))); ASSERT_OK(Put("aaef", RandomString(&rnd, 400))); ASSERT_OK(Put("b", RandomString(&rnd, 400))); dbfull()->Flush(FlushOptions()); ReadOptions opts; Slice ub = Slice("aa"); opts.iterate_upper_bound = &ub; auto iter = NewIterator(opts); iter->Seek(Slice("a")); ub = Slice("b"); iter->Seek(Slice("aabc")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "aaef"); delete iter; } TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) { ASSERT_OK(Put("a", "b")); ASSERT_OK(Put("c", "d")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("0", "f")); ASSERT_OK(Put("1", "h")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("2", "j")); auto iter = NewIterator(ReadOptions()); iter->SeekForPrev(Slice("0")); iter->Next(); iter->SeekForPrev(Slice("1")); iter->Next(); delete iter; } namespace { std::string MakeLongKey(size_t length, char c) { return std::string(length, c); } } // namespace TEST_P(DBIteratorTest, IterLongKeys) { ASSERT_OK(Put(MakeLongKey(20, 0), "0")); ASSERT_OK(Put(MakeLongKey(32, 2), "2")); ASSERT_OK(Put("a", "b")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put(MakeLongKey(50, 1), "1")); ASSERT_OK(Put(MakeLongKey(127, 3), "3")); ASSERT_OK(Put(MakeLongKey(64, 4), "4")); auto iter = NewIterator(ReadOptions()); // Create a key that needs to be skipped for Seq too new iter->Seek(MakeLongKey(20, 0)); ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0"); iter->Next(); ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); iter->Next(); ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); iter->Next(); ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); iter->Next(); ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4"); iter->SeekForPrev(MakeLongKey(127, 3)); ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); iter->Prev(); ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); iter->Prev(); ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); delete iter; iter = NewIterator(ReadOptions()); iter->Seek(MakeLongKey(50, 1)); ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1"); iter->Next(); ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2"); iter->Next(); ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3"); delete iter; } TEST_P(DBIteratorTest, IterNextWithNewerSeq) { ASSERT_OK(Put("0", "0")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("a", "b")); ASSERT_OK(Put("c", "d")); ASSERT_OK(Put("d", "e")); auto iter = NewIterator(ReadOptions()); // Create a key that needs to be skipped for Seq too new for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; i++) { ASSERT_OK(Put("b", "f")); } iter->Seek(Slice("a")); ASSERT_EQ(IterStatus(iter), "a->b"); iter->Next(); ASSERT_EQ(IterStatus(iter), "c->d"); iter->SeekForPrev(Slice("b")); ASSERT_EQ(IterStatus(iter), "a->b"); iter->Next(); ASSERT_EQ(IterStatus(iter), "c->d"); delete iter; } TEST_P(DBIteratorTest, IterPrevWithNewerSeq) { ASSERT_OK(Put("0", "0")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("a", "b")); ASSERT_OK(Put("c", "d")); ASSERT_OK(Put("d", "e")); auto iter = NewIterator(ReadOptions()); // Create a key that needs to be skipped for Seq too new for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; i++) { ASSERT_OK(Put("b", "f")); } iter->Seek(Slice("d")); ASSERT_EQ(IterStatus(iter), "d->e"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "c->d"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "a->b"); iter->Prev(); iter->SeekForPrev(Slice("d")); ASSERT_EQ(IterStatus(iter), "d->e"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "c->d"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "a->b"); iter->Prev(); delete iter; } TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) { ASSERT_OK(Put("0", "0")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("a", "b")); ASSERT_OK(Put("c", "d")); ASSERT_OK(Put("e", "f")); auto iter = NewIterator(ReadOptions()); auto iter2 = NewIterator(ReadOptions()); iter->Seek(Slice("c")); iter2->SeekForPrev(Slice("d")); ASSERT_EQ(IterStatus(iter), "c->d"); ASSERT_EQ(IterStatus(iter2), "c->d"); // Create a key that needs to be skipped for Seq too new for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; i++) { ASSERT_OK(Put("b", "f")); } iter->Prev(); ASSERT_EQ(IterStatus(iter), "a->b"); iter->Prev(); iter2->Prev(); ASSERT_EQ(IterStatus(iter2), "a->b"); iter2->Prev(); delete iter; delete iter2; } TEST_P(DBIteratorTest, IterEmpty) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); Iterator* iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->Seek("foo"); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekForPrev("foo"); ASSERT_EQ(IterStatus(iter), "(invalid)"); delete iter; } while (ChangeCompactOptions()); } TEST_P(DBIteratorTest, IterSingle) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "a", "va")); Iterator* iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->Seek(""); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekForPrev(""); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->Seek("a"); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekForPrev("a"); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->Seek("b"); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekForPrev("b"); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); delete iter; } while (ChangeCompactOptions()); } TEST_P(DBIteratorTest, IterMulti) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "a", "va")); ASSERT_OK(Put(1, "b", "vb")); ASSERT_OK(Put(1, "c", "vc")); Iterator* iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Next(); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->Next(); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->Seek(""); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Seek("a"); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Seek("ax"); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->SeekForPrev("d"); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->SeekForPrev("c"); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->SeekForPrev("bx"); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->Seek("b"); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->Seek("z"); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekForPrev("b"); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->SeekForPrev(""); ASSERT_EQ(IterStatus(iter), "(invalid)"); // Switch from reverse to forward iter->SeekToLast(); iter->Prev(); iter->Prev(); iter->Next(); ASSERT_EQ(IterStatus(iter), "b->vb"); // Switch from forward to reverse iter->SeekToFirst(); iter->Next(); iter->Next(); iter->Prev(); ASSERT_EQ(IterStatus(iter), "b->vb"); // Make sure iter stays at snapshot ASSERT_OK(Put(1, "a", "va2")); ASSERT_OK(Put(1, "a2", "va3")); ASSERT_OK(Put(1, "b", "vb2")); ASSERT_OK(Put(1, "c", "vc2")); ASSERT_OK(Delete(1, "b")); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Next(); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->Next(); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "b->vb"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); delete iter; } while (ChangeCompactOptions()); } // Check that we can skip over a run of user keys // by using reseek rather than sequential scan TEST_P(DBIteratorTest, IterReseek) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; Options options = CurrentOptions(options_override); options.max_sequential_skip_in_iterations = 3; options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // insert three keys with same userkey and verify that // reseek is not invoked. For each of these test cases, // verify that we can find the next key "b". ASSERT_OK(Put(1, "a", "zero")); ASSERT_OK(Put(1, "a", "one")); ASSERT_OK(Put(1, "a", "two")); ASSERT_OK(Put(1, "b", "bone")); Iterator* iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToFirst(); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); ASSERT_EQ(IterStatus(iter), "a->two"); iter->Next(); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); ASSERT_EQ(IterStatus(iter), "b->bone"); delete iter; // insert a total of three keys with same userkey and verify // that reseek is still not invoked. ASSERT_OK(Put(1, "a", "three")); iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->three"); iter->Next(); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); ASSERT_EQ(IterStatus(iter), "b->bone"); delete iter; // insert a total of four keys with same userkey and verify // that reseek is invoked. ASSERT_OK(Put(1, "a", "four")); iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->four"); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0); iter->Next(); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1); ASSERT_EQ(IterStatus(iter), "b->bone"); delete iter; // Testing reverse iterator // At this point, we have three versions of "a" and one version of "b". // The reseek statistics is already at 1. int num_reseeks = static_cast( TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION)); // Insert another version of b and assert that reseek is not invoked ASSERT_OK(Put(1, "b", "btwo")); iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "b->btwo"); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks); iter->Prev(); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks + 1); ASSERT_EQ(IterStatus(iter), "a->four"); delete iter; // insert two more versions of b. This makes a total of 4 versions // of b and 4 versions of a. ASSERT_OK(Put(1, "b", "bthree")); ASSERT_OK(Put(1, "b", "bfour")); iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "b->bfour"); ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks + 2); iter->Prev(); // the previous Prev call should have invoked reseek ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks + 3); ASSERT_EQ(IterStatus(iter), "a->four"); delete iter; } TEST_P(DBIteratorTest, IterSmallAndLargeMix) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "a", "va")); ASSERT_OK(Put(1, "b", std::string(100000, 'b'))); ASSERT_OK(Put(1, "c", "vc")); ASSERT_OK(Put(1, "d", std::string(100000, 'd'))); ASSERT_OK(Put(1, "e", std::string(100000, 'e'))); Iterator* iter = NewIterator(ReadOptions(), handles_[1]); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Next(); ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); iter->Next(); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->Next(); ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); iter->Next(); ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); iter->Next(); ASSERT_EQ(IterStatus(iter), "(invalid)"); iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); iter->Prev(); ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); iter->Prev(); ASSERT_EQ(IterStatus(iter), "c->vc"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); iter->Prev(); ASSERT_EQ(IterStatus(iter), "a->va"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "(invalid)"); delete iter; } while (ChangeCompactOptions()); } TEST_P(DBIteratorTest, IterMultiWithDelete) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "ka", "va")); ASSERT_OK(Put(1, "kb", "vb")); ASSERT_OK(Put(1, "kc", "vc")); ASSERT_OK(Delete(1, "kb")); ASSERT_EQ("NOT_FOUND", Get(1, "kb")); Iterator* iter = NewIterator(ReadOptions(), handles_[1]); iter->Seek("kc"); ASSERT_EQ(IterStatus(iter), "kc->vc"); if (!CurrentOptions().merge_operator) { // TODO: merge operator does not support backward iteration yet if (kPlainTableAllBytesPrefix != option_config_ && kBlockBasedTableWithWholeKeyHashIndex != option_config_ && kHashLinkList != option_config_ && kHashSkipList != option_config_) { // doesn't support SeekToLast iter->Prev(); ASSERT_EQ(IterStatus(iter), "ka->va"); } } delete iter; } while (ChangeOptions()); } TEST_P(DBIteratorTest, IterPrevMaxSkip) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); for (int i = 0; i < 2; i++) { ASSERT_OK(Put(1, "key1", "v1")); ASSERT_OK(Put(1, "key2", "v2")); ASSERT_OK(Put(1, "key3", "v3")); ASSERT_OK(Put(1, "key4", "v4")); ASSERT_OK(Put(1, "key5", "v5")); } VerifyIterLast("key5->v5", 1); ASSERT_OK(Delete(1, "key5")); VerifyIterLast("key4->v4", 1); ASSERT_OK(Delete(1, "key4")); VerifyIterLast("key3->v3", 1); ASSERT_OK(Delete(1, "key3")); VerifyIterLast("key2->v2", 1); ASSERT_OK(Delete(1, "key2")); VerifyIterLast("key1->v1", 1); ASSERT_OK(Delete(1, "key1")); VerifyIterLast("(invalid)", 1); } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast)); } TEST_P(DBIteratorTest, IterWithSnapshot) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); ASSERT_OK(Put(1, "key1", "val1")); ASSERT_OK(Put(1, "key2", "val2")); ASSERT_OK(Put(1, "key3", "val3")); ASSERT_OK(Put(1, "key4", "val4")); ASSERT_OK(Put(1, "key5", "val5")); const Snapshot* snapshot = db_->GetSnapshot(); ReadOptions options; options.snapshot = snapshot; Iterator* iter = NewIterator(options, handles_[1]); ASSERT_OK(Put(1, "key0", "val0")); // Put more values after the snapshot ASSERT_OK(Put(1, "key100", "val100")); ASSERT_OK(Put(1, "key101", "val101")); iter->Seek("key5"); ASSERT_EQ(IterStatus(iter), "key5->val5"); if (!CurrentOptions().merge_operator) { // TODO: merge operator does not support backward iteration yet if (kPlainTableAllBytesPrefix != option_config_ && kBlockBasedTableWithWholeKeyHashIndex != option_config_ && kHashLinkList != option_config_ && kHashSkipList != option_config_) { iter->Prev(); ASSERT_EQ(IterStatus(iter), "key4->val4"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "key3->val3"); iter->Next(); ASSERT_EQ(IterStatus(iter), "key4->val4"); iter->Next(); ASSERT_EQ(IterStatus(iter), "key5->val5"); } iter->Next(); ASSERT_TRUE(!iter->Valid()); } if (!CurrentOptions().merge_operator) { // TODO(gzh): merge operator does not support backward iteration yet if (kPlainTableAllBytesPrefix != option_config_ && kBlockBasedTableWithWholeKeyHashIndex != option_config_ && kHashLinkList != option_config_ && kHashSkipList != option_config_) { iter->SeekForPrev("key1"); ASSERT_EQ(IterStatus(iter), "key1->val1"); iter->Next(); ASSERT_EQ(IterStatus(iter), "key2->val2"); iter->Next(); ASSERT_EQ(IterStatus(iter), "key3->val3"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "key2->val2"); iter->Prev(); ASSERT_EQ(IterStatus(iter), "key1->val1"); iter->Prev(); ASSERT_TRUE(!iter->Valid()); } } db_->ReleaseSnapshot(snapshot); delete iter; } while (ChangeOptions()); } TEST_P(DBIteratorTest, IteratorPinsRef) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); Put(1, "foo", "hello"); // Get iterator that will yield the current contents of the DB. Iterator* iter = NewIterator(ReadOptions(), handles_[1]); // Write to force compactions Put(1, "foo", "newvalue1"); for (int i = 0; i < 100; i++) { // 100K values ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v'))); } Put(1, "foo", "newvalue2"); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); ASSERT_EQ("hello", iter->value().ToString()); iter->Next(); ASSERT_TRUE(!iter->Valid()); delete iter; } while (ChangeCompactOptions()); } TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); Put(1, "foo", "delete-cf-then-delete-iter"); Put(1, "hello", "value2"); ColumnFamilyHandle* cf = handles_[1]; ReadOptions ro; auto* iter = db_->NewIterator(ro, cf); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter"); // delete CF handle db_->DestroyColumnFamilyHandle(cf); handles_.erase(std::begin(handles_) + 1); // delete Iterator after CF handle is deleted iter->Next(); ASSERT_EQ(IterStatus(iter), "hello->value2"); delete iter; } TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); Put(1, "foo", "drop-cf-then-delete-iter"); ReadOptions ro; ColumnFamilyHandle* cf = handles_[1]; auto* iter = db_->NewIterator(ro, cf); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter"); // drop and delete CF db_->DropColumnFamily(cf); db_->DestroyColumnFamilyHandle(cf); handles_.erase(std::begin(handles_) + 1); // delete Iterator after CF handle is dropped delete iter; } // SetOptions not defined in ROCKSDB LITE #ifndef ROCKSDB_LITE TEST_P(DBIteratorTest, DBIteratorBoundTest) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.prefix_extractor = nullptr; DestroyAndReopen(options); ASSERT_OK(Put("a", "0")); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("g1", "0")); // testing basic case with no iterate_upper_bound and no prefix_extractor { ReadOptions ro; ro.iterate_upper_bound = nullptr; std::unique_ptr iter(NewIterator(ro)); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("g1")), 0); iter->SeekForPrev("g1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("g1")), 0); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo")), 0); } // testing iterate_upper_bound and forward iterator // to make sure it stops at bound { ReadOptions ro; // iterate_upper_bound points beyond the last expected entry Slice prefix("foo2"); ro.iterate_upper_bound = &prefix; std::unique_ptr iter(NewIterator(ro)); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(("foo1")), 0); iter->Next(); // should stop here... ASSERT_TRUE(!iter->Valid()); } // Testing SeekToLast with iterate_upper_bound set { ReadOptions ro; Slice prefix("foo"); ro.iterate_upper_bound = &prefix; std::unique_ptr iter(NewIterator(ro)); iter->SeekToLast(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("a")), 0); } // prefix is the first letter of the key ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}})); ASSERT_OK(Put("a", "0")); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("g1", "0")); // testing with iterate_upper_bound and prefix_extractor // Seek target and iterate_upper_bound are not is same prefix // This should be an error { ReadOptions ro; Slice upper_bound("g"); ro.iterate_upper_bound = &upper_bound; std::unique_ptr iter(NewIterator(ro)); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo1", iter->key().ToString()); iter->Next(); ASSERT_TRUE(!iter->Valid()); } // testing that iterate_upper_bound prevents iterating over deleted items // if the bound has already reached { options.prefix_extractor = nullptr; DestroyAndReopen(options); ASSERT_OK(Put("a", "0")); ASSERT_OK(Put("b", "0")); ASSERT_OK(Put("b1", "0")); ASSERT_OK(Put("c", "0")); ASSERT_OK(Put("d", "0")); ASSERT_OK(Put("e", "0")); ASSERT_OK(Delete("c")); ASSERT_OK(Delete("d")); // base case with no bound ReadOptions ro; ro.iterate_upper_bound = nullptr; std::unique_ptr iter(NewIterator(ro)); iter->Seek("b"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("b")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(("b1")), 0); get_perf_context()->Reset(); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(static_cast(get_perf_context()->internal_delete_skipped_count), 2); // now testing with iterate_bound Slice prefix("c"); ro.iterate_upper_bound = &prefix; iter.reset(NewIterator(ro)); get_perf_context()->Reset(); iter->Seek("b"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("b")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(("b1")), 0); iter->Next(); // the iteration should stop as soon as the bound key is reached // even though the key is deleted // hence internal_delete_skipped_count should be 0 ASSERT_TRUE(!iter->Valid()); ASSERT_EQ(static_cast(get_perf_context()->internal_delete_skipped_count), 0); } } TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.prefix_extractor = nullptr; DestroyAndReopen(options); ASSERT_OK(Put("a", "0")); ASSERT_OK(Put("z", "0")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("foo2", "bar2")); ASSERT_OK(Put("foo3", "bar3")); ASSERT_OK(Put("foo4", "bar4")); { std::string up_str = "foo5"; Slice up(up_str); ReadOptions ro; ro.iterate_upper_bound = &up; std::unique_ptr iter(NewIterator(ro)); iter->Seek("foo1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); uint64_t prev_block_cache_hit = TestGetTickerCount(options, BLOCK_CACHE_HIT); uint64_t prev_block_cache_miss = TestGetTickerCount(options, BLOCK_CACHE_MISS); ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0); iter->Seek("foo4"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo4")), 0); ASSERT_EQ(prev_block_cache_hit, TestGetTickerCount(options, BLOCK_CACHE_HIT)); ASSERT_EQ(prev_block_cache_miss, TestGetTickerCount(options, BLOCK_CACHE_MISS)); iter->Seek("foo2"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo2")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo3")), 0); ASSERT_EQ(prev_block_cache_hit, TestGetTickerCount(options, BLOCK_CACHE_HIT)); ASSERT_EQ(prev_block_cache_miss, TestGetTickerCount(options, BLOCK_CACHE_MISS)); } } #endif TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) { for (auto format_version : {2, 3, 4}) { int upper_bound_hits = 0; Options options = CurrentOptions(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BlockBasedTableIterator:out_of_bound", [&upper_bound_hits](void*) { upper_bound_hits++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); options.env = env_; options.create_if_missing = true; options.prefix_extractor = nullptr; BlockBasedTableOptions table_options; table_options.format_version = format_version; table_options.flush_block_policy_factory = std::make_shared(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("foo2", "bar2")); ASSERT_OK(Put("foo4", "bar4")); ASSERT_OK(Flush()); Slice ub("foo3"); ReadOptions ro; ro.iterate_upper_bound = &ub; std::unique_ptr iter(NewIterator(ro)); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo1")), 0); ASSERT_EQ(upper_bound_hits, 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("foo2")), 0); ASSERT_EQ(upper_bound_hits, 0); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_EQ(upper_bound_hits, 1); } } // Enable kBinarySearchWithFirstKey, do some iterator operations and check that // they don't do unnecessary block reads. TEST_P(DBIteratorTest, IndexWithFirstKey) { for (int tailing = 0; tailing < 2; ++tailing) { SCOPED_TRACE("tailing = " + std::to_string(tailing)); Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.prefix_extractor = nullptr; options.merge_operator = MergeOperators::CreateStringAppendOperator(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); Statistics* stats = options.statistics.get(); BlockBasedTableOptions table_options; table_options.index_type = BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey; table_options.index_shortening = BlockBasedTableOptions::IndexShorteningMode::kNoShortening; table_options.flush_block_policy_factory = std::make_shared(); table_options.block_cache = NewLRUCache(8000); // fits all blocks and their cache metadata overhead options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(Merge("a1", "x1")); ASSERT_OK(Merge("b1", "y1")); ASSERT_OK(Merge("c0", "z1")); ASSERT_OK(Flush()); ASSERT_OK(Merge("a2", "x2")); ASSERT_OK(Merge("b2", "y2")); ASSERT_OK(Merge("c0", "z2")); ASSERT_OK(Flush()); ASSERT_OK(Merge("a3", "x3")); ASSERT_OK(Merge("b3", "y3")); ASSERT_OK(Merge("c3", "z3")); ASSERT_OK(Flush()); // Block cache is not important for this test. // We use BLOCK_CACHE_DATA_* counters just because they're the most readily // available way of counting block accesses. ReadOptions ropt; ropt.tailing = tailing; std::unique_ptr iter(NewIterator(ropt)); ropt.read_tier = ReadTier::kBlockCacheTier; std::unique_ptr nonblocking_iter(NewIterator(ropt)); iter->Seek("b10"); ASSERT_TRUE(iter->Valid()); EXPECT_EQ("b2", iter->key().ToString()); EXPECT_EQ("y2", iter->value().ToString()); EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); // The cache-only iterator should succeed too, using the blocks pulled into // the cache by the previous iterator. nonblocking_iter->Seek("b10"); ASSERT_TRUE(nonblocking_iter->Valid()); EXPECT_EQ("b2", nonblocking_iter->key().ToString()); EXPECT_EQ("y2", nonblocking_iter->value().ToString()); EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // ... but it shouldn't be able to step forward since the next block is // not in cache yet. nonblocking_iter->Next(); ASSERT_FALSE(nonblocking_iter->Valid()); ASSERT_TRUE(nonblocking_iter->status().IsIncomplete()); // ... nor should a seek to the next key succeed. nonblocking_iter->Seek("b20"); ASSERT_FALSE(nonblocking_iter->Valid()); ASSERT_TRUE(nonblocking_iter->status().IsIncomplete()); iter->Next(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ("b3", iter->key().ToString()); EXPECT_EQ("y3", iter->value().ToString()); EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // After the blocking iterator loaded the next block, the nonblocking // iterator's seek should succeed. nonblocking_iter->Seek("b20"); ASSERT_TRUE(nonblocking_iter->Valid()); EXPECT_EQ("b3", nonblocking_iter->key().ToString()); EXPECT_EQ("y3", nonblocking_iter->value().ToString()); EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); iter->Seek("c0"); ASSERT_TRUE(iter->Valid()); EXPECT_EQ("c0", iter->key().ToString()); EXPECT_EQ("z1,z2", iter->value().ToString()); EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(6, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); iter->Next(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ("c3", iter->key().ToString()); EXPECT_EQ("z3", iter->value().ToString()); EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); iter.reset(); // Enable iterate_upper_bound and check that iterator is not trying to read // blocks that are fully above upper bound. std::string ub = "b3"; Slice ub_slice(ub); ropt.iterate_upper_bound = &ub_slice; iter.reset(NewIterator(ropt)); iter->Seek("b2"); ASSERT_TRUE(iter->Valid()); EXPECT_EQ("b2", iter->key().ToString()); EXPECT_EQ("y2", iter->value().ToString()); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); iter->Next(); ASSERT_FALSE(iter->Valid()); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); } } TEST_P(DBIteratorTest, IndexWithFirstKeyGet) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.prefix_extractor = nullptr; options.merge_operator = MergeOperators::CreateStringAppendOperator(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); Statistics* stats = options.statistics.get(); BlockBasedTableOptions table_options; table_options.index_type = BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey; table_options.index_shortening = BlockBasedTableOptions::IndexShorteningMode::kNoShortening; table_options.flush_block_policy_factory = std::make_shared(); table_options.block_cache = NewLRUCache(1000); // fits all blocks options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(Merge("a", "x1")); ASSERT_OK(Merge("c", "y1")); ASSERT_OK(Merge("e", "z1")); ASSERT_OK(Flush()); ASSERT_OK(Merge("c", "y2")); ASSERT_OK(Merge("e", "z2")); ASSERT_OK(Flush()); // Get() between blocks shouldn't read any blocks. ASSERT_EQ("NOT_FOUND", Get("b")); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Get() of an existing key shouldn't read any unnecessary blocks when there's // only one key per block. ASSERT_EQ("y1,y2", Get("c")); EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); ASSERT_EQ("x1", Get("a")); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(std::vector({"NOT_FOUND", "z1,z2"}), MultiGet({"b", "e"})); } // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary // return the biggest key which is smaller than the seek key. TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) { Options options; options.create_if_missing = true; options.merge_operator = MergeOperators::CreatePutOperator(); options.env = env_; DestroyAndReopen(options); // write three entries with different keys using Merge() WriteOptions wopts; db_->Merge(wopts, "1", "data1"); db_->Merge(wopts, "2", "data2"); db_->Merge(wopts, "3", "data3"); std::unique_ptr it(NewIterator(ReadOptions())); it->Seek("2"); ASSERT_TRUE(it->Valid()); ASSERT_EQ("2", it->key().ToString()); it->Prev(); ASSERT_TRUE(it->Valid()); ASSERT_EQ("1", it->key().ToString()); it->SeekForPrev("1"); ASSERT_TRUE(it->Valid()); ASSERT_EQ("1", it->key().ToString()); it->Next(); ASSERT_TRUE(it->Valid()); ASSERT_EQ("2", it->key().ToString()); } class DBIteratorTestForPinnedData : public DBIteratorTest { public: enum TestConfig { NORMAL, CLOSE_AND_OPEN, COMPACT_BEFORE_READ, FLUSH_EVERY_1000, MAX }; DBIteratorTestForPinnedData() : DBIteratorTest() {} void PinnedDataIteratorRandomized(TestConfig run_config) { // Generate Random data Random rnd(301); int puts = 100000; int key_pool = static_cast(puts * 0.7); int key_size = 100; int val_size = 1000; int seeks_percentage = 20; // 20% of keys will be used to test seek() int delete_percentage = 20; // 20% of keys will be deleted int merge_percentage = 20; // 20% of keys will be added using Merge() Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.use_delta_encoding = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.merge_operator = MergeOperators::CreatePutOperator(); DestroyAndReopen(options); std::vector generated_keys(key_pool); for (int i = 0; i < key_pool; i++) { generated_keys[i] = RandomString(&rnd, key_size); } std::map true_data; std::vector random_keys; std::vector deleted_keys; for (int i = 0; i < puts; i++) { auto& k = generated_keys[rnd.Next() % key_pool]; auto v = RandomString(&rnd, val_size); // Insert data to true_data map and to DB true_data[k] = v; if (rnd.PercentTrue(merge_percentage)) { ASSERT_OK(db_->Merge(WriteOptions(), k, v)); } else { ASSERT_OK(Put(k, v)); } // Pick random keys to be used to test Seek() if (rnd.PercentTrue(seeks_percentage)) { random_keys.push_back(k); } // Delete some random keys if (rnd.PercentTrue(delete_percentage)) { deleted_keys.push_back(k); true_data.erase(k); ASSERT_OK(Delete(k)); } if (run_config == TestConfig::FLUSH_EVERY_1000) { if (i && i % 1000 == 0) { Flush(); } } } if (run_config == TestConfig::CLOSE_AND_OPEN) { Close(); Reopen(options); } else if (run_config == TestConfig::COMPACT_BEFORE_READ) { db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); } ReadOptions ro; ro.pin_data = true; auto iter = NewIterator(ro); { // Test Seek to random keys std::vector keys_slices; std::vector true_keys; for (auto& k : random_keys) { iter->Seek(k); if (!iter->Valid()) { ASSERT_EQ(true_data.lower_bound(k), true_data.end()); continue; } std::string prop_value; ASSERT_OK( iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); keys_slices.push_back(iter->key()); true_keys.push_back(true_data.lower_bound(k)->first); } for (size_t i = 0; i < keys_slices.size(); i++) { ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]); } } { // Test SeekForPrev to random keys std::vector keys_slices; std::vector true_keys; for (auto& k : random_keys) { iter->SeekForPrev(k); if (!iter->Valid()) { ASSERT_EQ(true_data.upper_bound(k), true_data.begin()); continue; } std::string prop_value; ASSERT_OK( iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); keys_slices.push_back(iter->key()); true_keys.push_back((--true_data.upper_bound(k))->first); } for (size_t i = 0; i < keys_slices.size(); i++) { ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]); } } { // Test iterating all data forward std::vector all_keys; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; ASSERT_OK( iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); all_keys.push_back(iter->key()); } ASSERT_EQ(all_keys.size(), true_data.size()); // Verify that all keys slices are valid auto data_iter = true_data.begin(); for (size_t i = 0; i < all_keys.size(); i++) { ASSERT_EQ(all_keys[i].ToString(), data_iter->first); data_iter++; } } { // Test iterating all data backward std::vector all_keys; for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { std::string prop_value; ASSERT_OK( iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); all_keys.push_back(iter->key()); } ASSERT_EQ(all_keys.size(), true_data.size()); // Verify that all keys slices are valid (backward) auto data_iter = true_data.rbegin(); for (size_t i = 0; i < all_keys.size(); i++) { ASSERT_EQ(all_keys[i].ToString(), data_iter->first); data_iter++; } } delete iter; } }; TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) { PinnedDataIteratorRandomized(TestConfig::NORMAL); } TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) { PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN); } TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCompactBeforeRead) { PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ); } TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) { PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000); } #ifndef ROCKSDB_LITE TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) { Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.use_delta_encoding = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.disable_auto_compactions = true; options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb DestroyAndReopen(options); std::map true_data; // Generate 4 sst files in L2 Random rnd(301); for (int i = 1; i <= 1000; i++) { std::string k = Key(i * 3); std::string v = RandomString(&rnd, 100); ASSERT_OK(Put(k, v)); true_data[k] = v; if (i % 250 == 0) { ASSERT_OK(Flush()); } } ASSERT_EQ(FilesPerLevel(0), "4"); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(FilesPerLevel(0), "0,4"); // Generate 4 sst files in L0 for (int i = 1; i <= 1000; i++) { std::string k = Key(i * 2); std::string v = RandomString(&rnd, 100); ASSERT_OK(Put(k, v)); true_data[k] = v; if (i % 250 == 0) { ASSERT_OK(Flush()); } } ASSERT_EQ(FilesPerLevel(0), "4,4"); // Add some keys/values in memtables for (int i = 1; i <= 1000; i++) { std::string k = Key(i); std::string v = RandomString(&rnd, 100); ASSERT_OK(Put(k, v)); true_data[k] = v; } ASSERT_EQ(FilesPerLevel(0), "4,4"); ReadOptions ro; ro.pin_data = true; auto iter = NewIterator(ro); std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } ASSERT_EQ(results.size(), true_data.size()); auto data_iter = true_data.begin(); for (size_t i = 0; i < results.size(); i++, data_iter++) { auto& kv = results[i]; ASSERT_EQ(kv.first, data_iter->first); ASSERT_EQ(kv.second, data_iter->second); } delete iter; } #endif TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) { Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.use_delta_encoding = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.merge_operator = MergeOperators::CreateUInt64AddOperator(); DestroyAndReopen(options); std::string numbers[7]; for (int val = 0; val <= 6; val++) { PutFixed64(numbers + val, val); } // +1 all keys in range [ 0 => 999] for (int i = 0; i < 1000; i++) { WriteOptions wo; ASSERT_OK(db_->Merge(wo, Key(i), numbers[1])); } // +2 all keys divisible by 2 in range [ 0 => 999] for (int i = 0; i < 1000; i += 2) { WriteOptions wo; ASSERT_OK(db_->Merge(wo, Key(i), numbers[2])); } // +3 all keys divisible by 5 in range [ 0 => 999] for (int i = 0; i < 1000; i += 5) { WriteOptions wo; ASSERT_OK(db_->Merge(wo, Key(i), numbers[3])); } ReadOptions ro; ro.pin_data = true; auto iter = NewIterator(ro); std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } ASSERT_EQ(results.size(), 1000); for (size_t i = 0; i < results.size(); i++) { auto& kv = results[i]; ASSERT_EQ(kv.first, Key(static_cast(i))); int expected_val = 1; if (i % 2 == 0) { expected_val += 2; } if (i % 5 == 0) { expected_val += 3; } ASSERT_EQ(kv.second, numbers[expected_val]); } delete iter; } TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) { Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.use_delta_encoding = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.write_buffer_size = 100000; DestroyAndReopen(options); Random rnd(301); std::map true_data; for (int i = 0; i < 1000; i++) { std::string k = RandomString(&rnd, 10); std::string v = RandomString(&rnd, 1000); ASSERT_OK(Put(k, v)); true_data[k] = v; } ReadOptions ro; ro.pin_data = true; auto iter = NewIterator(ro); // Delete 50% of the keys and update the other 50% for (auto& kv : true_data) { if (rnd.OneIn(2)) { ASSERT_OK(Delete(kv.first)); } else { std::string new_val = RandomString(&rnd, 1000); ASSERT_OK(Put(kv.first, new_val)); } } std::vector> results; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string prop_value; ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value)); ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } auto data_iter = true_data.begin(); for (size_t i = 0; i < results.size(); i++, data_iter++) { auto& kv = results[i]; ASSERT_EQ(kv.first, data_iter->first); ASSERT_EQ(kv.second, data_iter->second); } delete iter; } class SliceTransformLimitedDomainGeneric : public SliceTransform { const char* Name() const override { return "SliceTransformLimitedDomainGeneric"; } Slice Transform(const Slice& src) const override { return Slice(src.data(), 1); } bool InDomain(const Slice& src) const override { // prefix will be x???? return src.size() >= 1; } bool InRange(const Slice& dst) const override { // prefix will be x???? return dst.size() == 1; } }; TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) { Options options = CurrentOptions(); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(Put("a1", "va1")); ASSERT_OK(Put("a2", "va2")); ASSERT_OK(Put("a3", "va3")); ASSERT_OK(Flush()); ASSERT_OK(Put("b1", "vb1")); ASSERT_OK(Put("b2", "vb2")); ASSERT_OK(Put("b3", "vb3")); ASSERT_OK(Flush()); ASSERT_OK(Put("b4", "vb4")); ASSERT_OK(Put("d1", "vd1")); ASSERT_OK(Put("d2", "vd2")); ASSERT_OK(Put("d4", "vd4")); ASSERT_OK(Flush()); MoveFilesToLevel(1); { ReadOptions ro; Iterator* iter = NewIterator(ro); iter->SeekForPrev("a4"); ASSERT_EQ(iter->key().ToString(), "a3"); ASSERT_EQ(iter->value().ToString(), "va3"); iter->SeekForPrev("c2"); ASSERT_EQ(iter->key().ToString(), "b3"); iter->SeekForPrev("d3"); ASSERT_EQ(iter->key().ToString(), "d2"); iter->SeekForPrev("b5"); ASSERT_EQ(iter->key().ToString(), "b4"); delete iter; } { ReadOptions ro; ro.prefix_same_as_start = true; Iterator* iter = NewIterator(ro); iter->SeekForPrev("c2"); ASSERT_TRUE(!iter->Valid()); delete iter; } } TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) { Options options = CurrentOptions(); options.prefix_extractor = std::make_shared(); options.disable_auto_compactions = true; // Enable prefix bloom for SST files BlockBasedTableOptions table_options; table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); ASSERT_OK(Put("a1", "va1")); ASSERT_OK(Put("a2", "va2")); ASSERT_OK(Put("a3", "va3")); ASSERT_OK(Flush()); ASSERT_OK(Put("b1", "vb1")); ASSERT_OK(Put("b2", "vb2")); ASSERT_OK(Put("b3", "vb3")); ASSERT_OK(Flush()); ASSERT_OK(Put("b4", "vb4")); ASSERT_OK(Put("d1", "vd1")); ASSERT_OK(Put("d2", "vd2")); ASSERT_OK(Put("d4", "vd4")); ASSERT_OK(Flush()); MoveFilesToLevel(1); { ReadOptions ro; Iterator* iter = NewIterator(ro); iter->SeekForPrev("a4"); ASSERT_EQ(iter->key().ToString(), "a3"); ASSERT_EQ(iter->value().ToString(), "va3"); iter->SeekForPrev("c2"); ASSERT_EQ(iter->key().ToString(), "b3"); iter->SeekForPrev("d3"); ASSERT_EQ(iter->key().ToString(), "d2"); iter->SeekForPrev("b5"); ASSERT_EQ(iter->key().ToString(), "b4"); delete iter; } { ReadOptions ro; ro.prefix_same_as_start = true; Iterator* iter = NewIterator(ro); iter->SeekForPrev("c2"); ASSERT_TRUE(!iter->Valid()); delete iter; } } TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) { Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.block_size = 1; // every block will contain one entry options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); options.disable_auto_compactions = true; options.max_sequential_skip_in_iterations = 8; DestroyAndReopen(options); // Putting such deletes will force DBIter::Prev() to fallback to a Seek for (int file_num = 0; file_num < 10; file_num++) { ASSERT_OK(Delete("key4")); ASSERT_OK(Flush()); } // First File containing 5 blocks of puts ASSERT_OK(Put("key1", "val1.0")); ASSERT_OK(Put("key2", "val2.0")); ASSERT_OK(Put("key3", "val3.0")); ASSERT_OK(Put("key4", "val4.0")); ASSERT_OK(Put("key5", "val5.0")); ASSERT_OK(Flush()); // Second file containing 9 blocks of merge operands ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1")); ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2")); ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1")); ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2")); ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3")); ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1")); ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2")); ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3")); ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4")); ASSERT_OK(Flush()); { ReadOptions ro; ro.fill_cache = false; Iterator* iter = NewIterator(ro); iter->SeekToLast(); ASSERT_EQ(iter->key().ToString(), "key5"); ASSERT_EQ(iter->value().ToString(), "val5.0"); iter->Prev(); ASSERT_EQ(iter->key().ToString(), "key4"); ASSERT_EQ(iter->value().ToString(), "val4.0"); iter->Prev(); ASSERT_EQ(iter->key().ToString(), "key3"); ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4"); iter->Prev(); ASSERT_EQ(iter->key().ToString(), "key2"); ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3"); iter->Prev(); ASSERT_EQ(iter->key().ToString(), "key1"); ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2"); delete iter; } } TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) { Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); options.disable_auto_compactions = true; options.level0_slowdown_writes_trigger = (1 << 30); options.level0_stop_writes_trigger = (1 << 30); options.max_sequential_skip_in_iterations = 8; DestroyAndReopen(options); const int kNumKeys = 500; // Small number of merge operands to make sure that DBIter::Prev() don't // fall back to Seek() const int kNumMergeOperands = 3; // Use value size that will make sure that every block contain 1 key const int kValSize = static_cast(BlockBasedTableOptions().block_size) * 4; // Percentage of keys that wont get merge operations const int kNoMergeOpPercentage = 20; // Percentage of keys that will be deleted const int kDeletePercentage = 10; // For half of the key range we will write multiple deletes first to // force DBIter::Prev() to fall back to Seek() for (int file_num = 0; file_num < 10; file_num++) { for (int i = 0; i < kNumKeys; i += 2) { ASSERT_OK(Delete(Key(i))); } ASSERT_OK(Flush()); } Random rnd(301); std::map true_data; std::string gen_key; std::string gen_val; for (int i = 0; i < kNumKeys; i++) { gen_key = Key(i); gen_val = RandomString(&rnd, kValSize); ASSERT_OK(Put(gen_key, gen_val)); true_data[gen_key] = gen_val; } ASSERT_OK(Flush()); // Separate values and merge operands in different file so that we // make sure that we don't merge them while flushing but actually // merge them in the read path for (int i = 0; i < kNumKeys; i++) { if (rnd.PercentTrue(kNoMergeOpPercentage)) { // Dont give merge operations for some keys continue; } for (int j = 0; j < kNumMergeOperands; j++) { gen_key = Key(i); gen_val = RandomString(&rnd, kValSize); ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val)); true_data[gen_key] += "," + gen_val; } } ASSERT_OK(Flush()); for (int i = 0; i < kNumKeys; i++) { if (rnd.PercentTrue(kDeletePercentage)) { gen_key = Key(i); ASSERT_OK(Delete(gen_key)); true_data.erase(gen_key); } } ASSERT_OK(Flush()); { ReadOptions ro; ro.fill_cache = false; Iterator* iter = NewIterator(ro); auto data_iter = true_data.rbegin(); for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { ASSERT_EQ(iter->key().ToString(), data_iter->first); ASSERT_EQ(iter->value().ToString(), data_iter->second); data_iter++; } ASSERT_EQ(data_iter, true_data.rend()); delete iter; } { ReadOptions ro; ro.fill_cache = false; Iterator* iter = NewIterator(ro); auto data_iter = true_data.rbegin(); int entries_right = 0; std::string seek_key; for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { // Verify key/value of current position ASSERT_EQ(iter->key().ToString(), data_iter->first); ASSERT_EQ(iter->value().ToString(), data_iter->second); bool restore_position_with_seek = rnd.Uniform(2); if (restore_position_with_seek) { seek_key = iter->key().ToString(); } // Do some Next() operations the restore the iterator to orignal position int next_count = entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0; for (int i = 0; i < next_count; i++) { iter->Next(); data_iter--; ASSERT_EQ(iter->key().ToString(), data_iter->first); ASSERT_EQ(iter->value().ToString(), data_iter->second); } if (restore_position_with_seek) { // Restore orignal position using Seek() iter->Seek(seek_key); for (int i = 0; i < next_count; i++) { data_iter++; } ASSERT_EQ(iter->key().ToString(), data_iter->first); ASSERT_EQ(iter->value().ToString(), data_iter->second); } else { // Restore original position using Prev() for (int i = 0; i < next_count; i++) { iter->Prev(); data_iter++; ASSERT_EQ(iter->key().ToString(), data_iter->first); ASSERT_EQ(iter->value().ToString(), data_iter->second); } } entries_right++; data_iter++; } ASSERT_EQ(data_iter, true_data.rend()); delete iter; } } TEST_P(DBIteratorTest, IteratorWithLocalStatistics) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 1000; i++) { // Key 10 bytes / Value 10 bytes ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10))); } std::atomic total_next(0); std::atomic total_next_found(0); std::atomic total_prev(0); std::atomic total_prev_found(0); std::atomic total_bytes(0); std::vector threads; std::function reader_func_next = [&]() { SetPerfLevel(kEnableCount); get_perf_context()->Reset(); Iterator* iter = NewIterator(ReadOptions()); iter->SeekToFirst(); // Seek will bump ITER_BYTES_READ uint64_t bytes = 0; bytes += iter->key().size(); bytes += iter->value().size(); while (true) { iter->Next(); total_next++; if (!iter->Valid()) { break; } total_next_found++; bytes += iter->key().size(); bytes += iter->value().size(); } delete iter; ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes); SetPerfLevel(kDisable); total_bytes += bytes; }; std::function reader_func_prev = [&]() { SetPerfLevel(kEnableCount); Iterator* iter = NewIterator(ReadOptions()); iter->SeekToLast(); // Seek will bump ITER_BYTES_READ uint64_t bytes = 0; bytes += iter->key().size(); bytes += iter->value().size(); while (true) { iter->Prev(); total_prev++; if (!iter->Valid()) { break; } total_prev_found++; bytes += iter->key().size(); bytes += iter->value().size(); } delete iter; ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes); SetPerfLevel(kDisable); total_bytes += bytes; }; for (int i = 0; i < 10; i++) { threads.emplace_back(reader_func_next); } for (int i = 0; i < 15; i++) { threads.emplace_back(reader_func_prev); } for (auto& t : threads) { t.join(); } ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next); ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND), (uint64_t)total_next_found); ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev); ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND), (uint64_t)total_prev_found); ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), (uint64_t)total_bytes); } TEST_P(DBIteratorTest, ReadAhead) { Options options; env_->count_random_reads_ = true; options.env = env_; options.disable_auto_compactions = true; options.write_buffer_size = 4 << 20; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.block_size = 1024; table_options.no_block_cache = true; options.table_factory.reset(new BlockBasedTableFactory(table_options)); Reopen(options); std::string value(1024, 'a'); for (int i = 0; i < 100; i++) { Put(Key(i), value); } ASSERT_OK(Flush()); MoveFilesToLevel(2); for (int i = 0; i < 100; i++) { Put(Key(i), value); } ASSERT_OK(Flush()); MoveFilesToLevel(1); for (int i = 0; i < 100; i++) { Put(Key(i), value); } ASSERT_OK(Flush()); #ifndef ROCKSDB_LITE ASSERT_EQ("1,1,1", FilesPerLevel()); #endif // !ROCKSDB_LITE env_->random_read_bytes_counter_ = 0; options.statistics->setTickerCount(NO_FILE_OPENS, 0); ReadOptions read_options; auto* iter = NewIterator(read_options); iter->SeekToFirst(); int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS); size_t bytes_read = env_->random_read_bytes_counter_; delete iter; int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES); env_->random_read_bytes_counter_ = 0; options.statistics->setTickerCount(NO_FILE_OPENS, 0); read_options.readahead_size = 1024 * 10; iter = NewIterator(read_options); iter->SeekToFirst(); int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS); size_t bytes_read_readahead = env_->random_read_bytes_counter_; delete iter; int64_t num_file_closes_readahead = TestGetTickerCount(options, NO_FILE_CLOSES); ASSERT_EQ(num_file_opens, num_file_opens_readahead); ASSERT_EQ(num_file_closes, num_file_closes_readahead); ASSERT_GT(bytes_read_readahead, bytes_read); ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3); // Verify correctness. iter = NewIterator(read_options); int count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_EQ(value, iter->value()); count++; } ASSERT_EQ(100, count); for (int i = 0; i < 100; i++) { iter->Seek(Key(i)); ASSERT_EQ(value, iter->value()); } delete iter; } // Insert a key, create a snapshot iterator, overwrite key lots of times, // seek to a smaller key. Expect DBIter to fall back to a seek instead of // going through all the overwrites linearly. TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.max_sequential_skip_in_iterations = 3; options.prefix_extractor = nullptr; options.write_buffer_size = 1 << 27; // big enough to avoid flush options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); // Insert. ASSERT_OK(Put("b", "0")); // Create iterator. ReadOptions ro; std::unique_ptr iter(NewIterator(ro)); // Insert a lot. for (int i = 0; i < 100; ++i) { ASSERT_OK(Put("b", std::to_string(i + 1).c_str())); } #ifndef ROCKSDB_LITE // Check that memtable wasn't flushed. std::string val; ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val)); EXPECT_EQ("0", val); #endif // Seek iterator to a smaller key. get_perf_context()->Reset(); iter->Seek("a"); ASSERT_TRUE(iter->Valid()); EXPECT_EQ("b", iter->key().ToString()); EXPECT_EQ("0", iter->value().ToString()); // Check that the seek didn't do too much work. // Checks are not tight, just make sure that everything is well below 100. EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4); EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8); EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10); EXPECT_LT(get_perf_context()->next_on_memtable_count, 10); EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10); // Check that iterator did something like what we expect. EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0); EXPECT_EQ(get_perf_context()->internal_merge_count, 0); EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2); EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2); EXPECT_EQ(1, options.statistics->getTickerCount( NUMBER_OF_RESEEKS_IN_ITERATION)); } TEST_P(DBIteratorTest, Refresh) { ASSERT_OK(Put("x", "y")); std::unique_ptr iter(NewIterator(ReadOptions())); iter->Seek(Slice("a")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_OK(Put("c", "d")); iter->Seek(Slice("a")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); iter->Refresh(); iter->Seek(Slice("a")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("c")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("m", "n")); iter->Seek(Slice("a")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("c")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); iter->Refresh(); iter->Seek(Slice("a")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("c")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("m")), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); iter.reset(); } TEST_P(DBIteratorTest, RefreshWithSnapshot) { ASSERT_OK(Put("x", "y")); const Snapshot* snapshot = db_->GetSnapshot(); ReadOptions options; options.snapshot = snapshot; Iterator* iter = NewIterator(options); iter->Seek(Slice("a")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_OK(Put("c", "d")); iter->Seek(Slice("a")); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); Status s; s = iter->Refresh(); ASSERT_TRUE(s.IsNotSupported()); db_->ReleaseSnapshot(snapshot); delete iter; } TEST_P(DBIteratorTest, CreationFailure) { SyncPoint::GetInstance()->SetCallBack( "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) { *(reinterpret_cast(arg)) = Status::Corruption("test status"); }); SyncPoint::GetInstance()->EnableProcessing(); Iterator* iter = NewIterator(ReadOptions()); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->status().IsCorruption()); delete iter; } TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) { Options options = CurrentOptions(); options.max_sequential_skip_in_iterations = 3; DestroyAndReopen(options); // write a bunch of kvs to the database. ASSERT_OK(Put("a", "1")); ASSERT_OK(Put("y", "1")); ASSERT_OK(Put("y1", "1")); ASSERT_OK(Put("y2", "1")); ASSERT_OK(Put("y3", "1")); ASSERT_OK(Put("z", "1")); ASSERT_OK(Flush()); ASSERT_OK(Put("a", "1")); ASSERT_OK(Put("z", "1")); ASSERT_OK(Put("bar", "1")); ASSERT_OK(Put("foo", "1")); std::string upper_bound = "x"; Slice ub_slice(upper_bound); ReadOptions ro; ro.iterate_upper_bound = &ub_slice; ro.max_skippable_internal_keys = 1000; Iterator* iter = NewIterator(ro); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("bar", iter->key().ToString()); delete iter; } TEST_P(DBIteratorTest, TableFilter) { ASSERT_OK(Put("a", "1")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("b", "2")); ASSERT_OK(Put("c", "3")); dbfull()->Flush(FlushOptions()); ASSERT_OK(Put("d", "4")); ASSERT_OK(Put("e", "5")); ASSERT_OK(Put("f", "6")); dbfull()->Flush(FlushOptions()); // Ensure the table_filter callback is called once for each table. { std::set unseen{1, 2, 3}; ReadOptions opts; opts.table_filter = [&](const TableProperties& props) { auto it = unseen.find(props.num_entries); if (it == unseen.end()) { ADD_FAILURE() << "saw table properties with an unexpected " << props.num_entries << " entries"; } else { unseen.erase(it); } return true; }; auto iter = NewIterator(opts); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->1"); iter->Next(); ASSERT_EQ(IterStatus(iter), "b->2"); iter->Next(); ASSERT_EQ(IterStatus(iter), "c->3"); iter->Next(); ASSERT_EQ(IterStatus(iter), "d->4"); iter->Next(); ASSERT_EQ(IterStatus(iter), "e->5"); iter->Next(); ASSERT_EQ(IterStatus(iter), "f->6"); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(unseen.empty()); delete iter; } // Ensure returning false in the table_filter hides the keys from that table // during iteration. { ReadOptions opts; opts.table_filter = [](const TableProperties& props) { return props.num_entries != 2; }; auto iter = NewIterator(opts); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->1"); iter->Next(); ASSERT_EQ(IterStatus(iter), "d->4"); iter->Next(); ASSERT_EQ(IterStatus(iter), "e->5"); iter->Next(); ASSERT_EQ(IterStatus(iter), "f->6"); iter->Next(); ASSERT_FALSE(iter->Valid()); delete iter; } } TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) { Options options = CurrentOptions(); options.max_sequential_skip_in_iterations = 3; DestroyAndReopen(options); // write a bunch of kvs to the database. ASSERT_OK(Put("a", "1")); ASSERT_OK(Put("y", "1")); ASSERT_OK(Put("z", "1")); ASSERT_OK(Flush()); ASSERT_OK(Put("a", "1")); ASSERT_OK(Put("z", "1")); ASSERT_OK(Put("bar", "1")); ASSERT_OK(Put("foo", "1")); ASSERT_OK(Put("foo", "2")); ASSERT_OK(Put("foo", "3")); ASSERT_OK(Put("foo", "4")); ASSERT_OK(Put("foo", "5")); const Snapshot* snapshot = db_->GetSnapshot(); ASSERT_OK(Put("foo", "6")); std::string upper_bound = "x"; Slice ub_slice(upper_bound); ReadOptions ro; ro.snapshot = snapshot; ro.iterate_upper_bound = &ub_slice; Iterator* iter = NewIterator(ro); iter->SeekForPrev("goo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key().ToString()); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar", iter->key().ToString()); delete iter; db_->ReleaseSnapshot(snapshot); } TEST_P(DBIteratorTest, SkipStatistics) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); int skip_count = 0; // write a bunch of kvs to the database. ASSERT_OK(Put("a", "1")); ASSERT_OK(Put("b", "1")); ASSERT_OK(Put("c", "1")); ASSERT_OK(Flush()); ASSERT_OK(Put("d", "1")); ASSERT_OK(Put("e", "1")); ASSERT_OK(Put("f", "1")); ASSERT_OK(Put("a", "2")); ASSERT_OK(Put("b", "2")); ASSERT_OK(Flush()); ASSERT_OK(Delete("d")); ASSERT_OK(Delete("e")); ASSERT_OK(Delete("f")); Iterator* iter = NewIterator(ReadOptions()); int count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); count++; } ASSERT_EQ(count, 3); delete iter; skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); iter = NewIterator(ReadOptions()); count = 0; for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { ASSERT_OK(iter->status()); count++; } ASSERT_EQ(count, 3); delete iter; skip_count += 8; // Same as above, but in reverse order ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); ASSERT_OK(Put("aa", "1")); ASSERT_OK(Put("ab", "1")); ASSERT_OK(Put("ac", "1")); ASSERT_OK(Put("ad", "1")); ASSERT_OK(Flush()); ASSERT_OK(Delete("ab")); ASSERT_OK(Delete("ac")); ASSERT_OK(Delete("ad")); ReadOptions ro; Slice prefix("b"); ro.iterate_upper_bound = &prefix; iter = NewIterator(ro); count = 0; for(iter->Seek("aa"); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); count++; } ASSERT_EQ(count, 1); delete iter; skip_count += 6; // 3 deletes + 3 original keys ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); iter = NewIterator(ro); count = 0; for(iter->SeekToLast(); iter->Valid(); iter->Prev()) { ASSERT_OK(iter->status()); count++; } ASSERT_EQ(count, 2); delete iter; // 3 deletes + 3 original keys + lower sequence of "a" skip_count += 7; ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP)); } TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) { Options options = CurrentOptions(); DestroyAndReopen(options); ReadOptions ropts; ropts.max_skippable_internal_keys = 2; Put("1", "val_1"); // Add more tombstones than max_skippable_internal_keys so that Next() fails. Delete("2"); Delete("3"); Delete("4"); Delete("5"); Put("6", "val_6"); std::unique_ptr iter(NewIterator(ropts)); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "1"); ASSERT_EQ(iter->value().ToString(), "val_1"); // This should fail as incomplete due to too many non-visible internal keys on // the way to the next valid user key. iter->Next(); ASSERT_TRUE(!iter->Valid()); ASSERT_TRUE(iter->status().IsIncomplete()); // Get the internal key at which Next() failed. std::string prop_value; ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value)); ASSERT_EQ("4", prop_value); // Create a new iterator to seek to the internal key. std::unique_ptr iter2(NewIterator(ropts)); iter2->Seek(prop_value); ASSERT_TRUE(iter2->Valid()); ASSERT_OK(iter2->status()); ASSERT_EQ(iter2->key().ToString(), "6"); ASSERT_EQ(iter2->value().ToString(), "val_6"); } // Reproduces a former bug where iterator would skip some records when DBIter // re-seeks subiterator with Incomplete status. TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) { Options options = CurrentOptions(); BlockBasedTableOptions table_options; // Make sure the sst file has more than one block. table_options.flush_block_policy_factory = std::make_shared(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); // Two records in sst file, each in its own block. Put("b", ""); Put("d", ""); Flush(); // Create a nonblocking iterator before writing to memtable. ReadOptions ropt; ropt.read_tier = kBlockCacheTier; std::unique_ptr iter(NewIterator(ropt)); // Overwrite a key in memtable many times to hit // max_sequential_skip_in_iterations (which is 8 by default). for (int i = 0; i < 20; ++i) { Put("c", ""); } // Load the second block in sst file into the block cache. { std::unique_ptr iter2(NewIterator(ReadOptions())); iter2->Seek("d"); } // Finally seek the nonblocking iterator. iter->Seek("a"); // With the bug, the status used to be OK, and the iterator used to point to // "d". EXPECT_TRUE(iter->status().IsIncomplete()); } TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) { Put("a", ""); Put("b", ""); Flush(); ReadOptions ropt; Slice ub = "b"; ropt.iterate_upper_bound = &ub; std::unique_ptr it(dbfull()->NewIterator(ropt)); it->SeekForPrev("a"); ASSERT_TRUE(it->Valid()); ASSERT_OK(it->status()); ASSERT_EQ("a", it->key().ToString()); it->Next(); ASSERT_FALSE(it->Valid()); ASSERT_OK(it->status()); it->SeekForPrev("a"); ASSERT_OK(it->status()); ASSERT_TRUE(it->Valid()); ASSERT_EQ("a", it->key().ToString()); } TEST_P(DBIteratorTest, AvoidReseekLevelIterator) { Options options = CurrentOptions(); options.compression = CompressionType::kNoCompression; BlockBasedTableOptions table_options; table_options.block_size = 800; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); Random rnd(301); std::string random_str = RandomString(&rnd, 180); ASSERT_OK(Put("1", random_str)); ASSERT_OK(Put("2", random_str)); ASSERT_OK(Put("3", random_str)); ASSERT_OK(Put("4", random_str)); // A new block ASSERT_OK(Put("5", random_str)); ASSERT_OK(Put("6", random_str)); ASSERT_OK(Put("7", random_str)); ASSERT_OK(Flush()); ASSERT_OK(Put("8", random_str)); ASSERT_OK(Put("9", random_str)); ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); int num_find_file_in_level = 0; int num_idx_blk_seek = 0; SyncPoint::GetInstance()->SetCallBack( "LevelIterator::Seek:BeforeFindFile", [&](void* /*arg*/) { num_find_file_in_level++; }); SyncPoint::GetInstance()->SetCallBack( "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; }); SyncPoint::GetInstance()->EnableProcessing(); { std::unique_ptr iter(NewIterator(ReadOptions())); iter->Seek("1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(1, num_find_file_in_level); ASSERT_EQ(1, num_idx_blk_seek); iter->Seek("2"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(1, num_find_file_in_level); ASSERT_EQ(1, num_idx_blk_seek); iter->Seek("3"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(1, num_find_file_in_level); ASSERT_EQ(1, num_idx_blk_seek); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(1, num_find_file_in_level); ASSERT_EQ(1, num_idx_blk_seek); iter->Seek("5"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(1, num_find_file_in_level); ASSERT_EQ(2, num_idx_blk_seek); iter->Seek("6"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(1, num_find_file_in_level); ASSERT_EQ(2, num_idx_blk_seek); iter->Seek("7"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(1, num_find_file_in_level); ASSERT_EQ(3, num_idx_blk_seek); iter->Seek("8"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(2, num_find_file_in_level); // Still re-seek because "8" is the boundary key, which has // the same user key as the seek key. ASSERT_EQ(4, num_idx_blk_seek); iter->Seek("5"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(3, num_find_file_in_level); ASSERT_EQ(5, num_idx_blk_seek); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(3, num_find_file_in_level); ASSERT_EQ(5, num_idx_blk_seek); // Seek backward never triggers the index block seek to be skipped iter->Seek("5"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(3, num_find_file_in_level); ASSERT_EQ(6, num_idx_blk_seek); } SyncPoint::GetInstance()->DisableProcessing(); } // MyRocks may change iterate bounds before seek. Simply test to make sure such // usage doesn't break iterator. TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) { Options options = CurrentOptions(); options.compression = CompressionType::kNoCompression; BlockBasedTableOptions table_options; table_options.block_size = 100; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); std::string value(50, 'v'); Reopen(options); ASSERT_OK(Put("aaa", value)); ASSERT_OK(Flush()); ASSERT_OK(Put("bbb", "v")); ASSERT_OK(Put("ccc", "v")); ASSERT_OK(Put("ddd", "v")); ASSERT_OK(Flush()); ASSERT_OK(Put("eee", "v")); ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); std::string ub1 = "e"; std::string ub2 = "c"; Slice ub(ub1); ReadOptions read_opts1; read_opts1.iterate_upper_bound = &ub; Iterator* iter = NewIterator(read_opts1); // Seek and iterate accross block boundary. iter->Seek("b"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("bbb", iter->key()); ub = Slice(ub2); iter->Seek("b"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("bbb", iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_OK(iter->status()); delete iter; std::string lb1 = "a"; std::string lb2 = "c"; Slice lb(lb1); ReadOptions read_opts2; read_opts2.iterate_lower_bound = &lb; iter = NewIterator(read_opts2); iter->SeekForPrev("d"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("ccc", iter->key()); lb = Slice(lb2); iter->SeekForPrev("d"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("ccc", iter->key()); iter->Prev(); ASSERT_FALSE(iter->Valid()); ASSERT_OK(iter->status()); delete iter; } TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) { ASSERT_OK(Put("aaa", "v")); ASSERT_OK(Put("bbb", "v")); ASSERT_OK(Flush()); ASSERT_OK(Put("ccc", "v")); ASSERT_OK(Put("ddd", "v")); ASSERT_OK(Flush()); // Move both files to bottom level. ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); Slice lower_bound("b"); ReadOptions read_opts; read_opts.iterate_lower_bound = &lower_bound; std::unique_ptr iter(NewIterator(read_opts)); iter->SeekForPrev("d"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("ccc", iter->key()); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("bbb", iter->key()); iter->Prev(); ASSERT_FALSE(iter->Valid()); ASSERT_OK(iter->status()); } INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest, testing::Values(true, false)); // Tests how DBIter work with ReadCallback class DBIteratorWithReadCallbackTest : public DBIteratorTest {}; TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) { class TestReadCallback : public ReadCallback { public: explicit TestReadCallback(SequenceNumber _max_visible_seq) : ReadCallback(_max_visible_seq) {} bool IsVisibleFullCheck(SequenceNumber seq) override { return seq <= max_visible_seq_; } }; ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put("foo", "v2")); ASSERT_OK(Put("foo", "v3")); ASSERT_OK(Put("a", "va")); ASSERT_OK(Put("z", "vz")); SequenceNumber seq1 = db_->GetLatestSequenceNumber(); TestReadCallback callback1(seq1); ASSERT_OK(Put("foo", "v4")); ASSERT_OK(Put("foo", "v5")); ASSERT_OK(Put("bar", "v7")); SequenceNumber seq2 = db_->GetLatestSequenceNumber(); auto* cfd = reinterpret_cast(db_->DefaultColumnFamily()) ->cfd(); // The iterator are suppose to see data before seq1. Iterator* iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1); // Seek // The latest value of "foo" before seq1 is "v3" iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("foo", iter->key()); ASSERT_EQ("v3", iter->value()); // "bar" is not visible to the iterator. It will move on to the next key // "foo". iter->Seek("bar"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("foo", iter->key()); ASSERT_EQ("v3", iter->value()); // Next // Seek to "a" iter->Seek("a"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("va", iter->value()); // "bar" is not visible to the iterator. It will move on to the next key // "foo". iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("foo", iter->key()); ASSERT_EQ("v3", iter->value()); // Prev // Seek to "z" iter->Seek("z"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("vz", iter->value()); // The previous key is "foo", which is visible to the iterator. iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("foo", iter->key()); ASSERT_EQ("v3", iter->value()); // "bar" is not visible to the iterator. It will move on to the next key "a". iter->Prev(); // skipping "bar" ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("a", iter->key()); ASSERT_EQ("va", iter->value()); // SeekForPrev // The previous key is "foo", which is visible to the iterator. iter->SeekForPrev("y"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("foo", iter->key()); ASSERT_EQ("v3", iter->value()); // "bar" is not visible to the iterator. It will move on to the next key "a". iter->SeekForPrev("bar"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("a", iter->key()); ASSERT_EQ("va", iter->value()); delete iter; // Prev beyond max_sequential_skip_in_iterations uint64_t num_versions = CurrentOptions().max_sequential_skip_in_iterations + 10; for (uint64_t i = 0; i < num_versions; i++) { ASSERT_OK(Put("bar", ToString(i))); } SequenceNumber seq3 = db_->GetLatestSequenceNumber(); TestReadCallback callback2(seq3); ASSERT_OK(Put("bar", "v8")); SequenceNumber seq4 = db_->GetLatestSequenceNumber(); // The iterator is suppose to see data before seq3. iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2); // Seek to "z", which is visible. iter->Seek("z"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("vz", iter->value()); // Previous key is "foo" and the last value "v5" is visible. iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("foo", iter->key()); ASSERT_EQ("v5", iter->value()); // Since the number of values of "bar" is more than // max_sequential_skip_in_iterations, Prev() will ultimately fallback to // seek in forward direction. Here we test the fallback seek is correct. // The last visible value should be (num_versions - 1), as "v8" is not // visible. iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); ASSERT_EQ("bar", iter->key()); ASSERT_EQ(ToString(num_versions - 1), iter->value()); delete iter; } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_log_iter_test.cc000066400000000000000000000232751370372246700174620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // Introduction of SyncPoint effectively disabled building and running this test // in Release build. // which is a pity, it is a good test #if !defined(ROCKSDB_LITE) #include "db/db_test_util.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { class DBTestXactLogIterator : public DBTestBase { public: DBTestXactLogIterator() : DBTestBase("/db_log_iter_test") {} std::unique_ptr OpenTransactionLogIter( const SequenceNumber seq) { std::unique_ptr iter; Status status = dbfull()->GetUpdatesSince(seq, &iter); EXPECT_OK(status); EXPECT_TRUE(iter->Valid()); return iter; } }; namespace { SequenceNumber ReadRecords( std::unique_ptr& iter, int& count) { count = 0; SequenceNumber lastSequence = 0; BatchResult res; while (iter->Valid()) { res = iter->GetBatch(); EXPECT_TRUE(res.sequence > lastSequence); ++count; lastSequence = res.sequence; EXPECT_OK(iter->status()); iter->Next(); } return res.sequence; } void ExpectRecords( const int expected_no_records, std::unique_ptr& iter) { int num_records; ReadRecords(iter, num_records); ASSERT_EQ(num_records, expected_no_records); } } // namespace TEST_F(DBTestXactLogIterator, TransactionLogIterator) { do { Options options = OptionsForLogIterTest(); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Put(0, "key1", DummyString(1024)); Put(1, "key2", DummyString(1024)); Put(1, "key2", DummyString(1024)); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3U); { auto iter = OpenTransactionLogIter(0); ExpectRecords(3, iter); } ReopenWithColumnFamilies({"default", "pikachu"}, options); env_->SleepForMicroseconds(2 * 1000 * 1000); { Put(0, "key4", DummyString(1024)); Put(1, "key5", DummyString(1024)); Put(0, "key6", DummyString(1024)); } { auto iter = OpenTransactionLogIter(0); ExpectRecords(6, iter); } } while (ChangeCompactOptions()); } #ifndef NDEBUG // sync point is not included with DNDEBUG build TEST_F(DBTestXactLogIterator, TransactionLogIteratorRace) { static const int LOG_ITERATOR_RACE_TEST_COUNT = 2; static const char* sync_points[LOG_ITERATOR_RACE_TEST_COUNT][4] = { {"WalManager::GetSortedWalFiles:1", "WalManager::PurgeObsoleteFiles:1", "WalManager::PurgeObsoleteFiles:2", "WalManager::GetSortedWalFiles:2"}, {"WalManager::GetSortedWalsOfType:1", "WalManager::PurgeObsoleteFiles:1", "WalManager::PurgeObsoleteFiles:2", "WalManager::GetSortedWalsOfType:2"}}; for (int test = 0; test < LOG_ITERATOR_RACE_TEST_COUNT; ++test) { // Setup sync point dependency to reproduce the race condition of // a log file moved to archived dir, in the middle of GetSortedWalFiles ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {sync_points[test][0], sync_points[test][1]}, {sync_points[test][2], sync_points[test][3]}, }); do { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Options options = OptionsForLogIterTest(); DestroyAndReopen(options); Put("key1", DummyString(1024)); dbfull()->Flush(FlushOptions()); Put("key2", DummyString(1024)); dbfull()->Flush(FlushOptions()); Put("key3", DummyString(1024)); dbfull()->Flush(FlushOptions()); Put("key4", DummyString(1024)); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4U); dbfull()->FlushWAL(false); { auto iter = OpenTransactionLogIter(0); ExpectRecords(4, iter); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // trigger async flush, and log move. Well, log move will // wait until the GetSortedWalFiles:1 to reproduce the race // condition FlushOptions flush_options; flush_options.wait = false; dbfull()->Flush(flush_options); // "key5" would be written in a new memtable and log Put("key5", DummyString(1024)); dbfull()->FlushWAL(false); { // this iter would miss "key4" if not fixed auto iter = OpenTransactionLogIter(0); ExpectRecords(5, iter); } } while (ChangeCompactOptions()); } } #endif TEST_F(DBTestXactLogIterator, TransactionLogIteratorStallAtLastRecord) { do { Options options = OptionsForLogIterTest(); DestroyAndReopen(options); Put("key1", DummyString(1024)); auto iter = OpenTransactionLogIter(0); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); iter->Next(); ASSERT_TRUE(!iter->Valid()); ASSERT_OK(iter->status()); Put("key2", DummyString(1024)); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); } while (ChangeCompactOptions()); } TEST_F(DBTestXactLogIterator, TransactionLogIteratorCheckAfterRestart) { do { Options options = OptionsForLogIterTest(); DestroyAndReopen(options); Put("key1", DummyString(1024)); Put("key2", DummyString(1023)); dbfull()->Flush(FlushOptions()); Reopen(options); auto iter = OpenTransactionLogIter(0); ExpectRecords(2, iter); } while (ChangeCompactOptions()); } TEST_F(DBTestXactLogIterator, TransactionLogIteratorCorruptedLog) { do { Options options = OptionsForLogIterTest(); DestroyAndReopen(options); for (int i = 0; i < 1024; i++) { Put("key"+ToString(i), DummyString(10)); } dbfull()->Flush(FlushOptions()); dbfull()->FlushWAL(false); // Corrupt this log to create a gap ROCKSDB_NAMESPACE::VectorLogPtr wal_files; ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); const auto logfile_path = dbname_ + "/" + wal_files.front()->PathName(); if (mem_env_) { mem_env_->Truncate(logfile_path, wal_files.front()->SizeFileBytes() / 2); } else { ASSERT_EQ(0, truncate(logfile_path.c_str(), wal_files.front()->SizeFileBytes() / 2)); } // Insert a new entry to a new log file Put("key1025", DummyString(10)); dbfull()->FlushWAL(false); // Try to read from the beginning. Should stop before the gap and read less // than 1025 entries auto iter = OpenTransactionLogIter(0); int count; SequenceNumber last_sequence_read = ReadRecords(iter, count); ASSERT_LT(last_sequence_read, 1025U); // Try to read past the gap, should be able to seek to key1025 auto iter2 = OpenTransactionLogIter(last_sequence_read + 1); ExpectRecords(1, iter2); } while (ChangeCompactOptions()); } TEST_F(DBTestXactLogIterator, TransactionLogIteratorBatchOperations) { do { Options options = OptionsForLogIterTest(); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); WriteBatch batch; batch.Put(handles_[1], "key1", DummyString(1024)); batch.Put(handles_[0], "key2", DummyString(1024)); batch.Put(handles_[1], "key3", DummyString(1024)); batch.Delete(handles_[0], "key2"); dbfull()->Write(WriteOptions(), &batch); Flush(1); Flush(0); ReopenWithColumnFamilies({"default", "pikachu"}, options); Put(1, "key4", DummyString(1024)); auto iter = OpenTransactionLogIter(3); ExpectRecords(2, iter); } while (ChangeCompactOptions()); } TEST_F(DBTestXactLogIterator, TransactionLogIteratorBlobs) { Options options = OptionsForLogIterTest(); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); { WriteBatch batch; batch.Put(handles_[1], "key1", DummyString(1024)); batch.Put(handles_[0], "key2", DummyString(1024)); batch.PutLogData(Slice("blob1")); batch.Put(handles_[1], "key3", DummyString(1024)); batch.PutLogData(Slice("blob2")); batch.Delete(handles_[0], "key2"); dbfull()->Write(WriteOptions(), &batch); ReopenWithColumnFamilies({"default", "pikachu"}, options); } auto res = OpenTransactionLogIter(0)->GetBatch(); struct Handler : public WriteBatch::Handler { std::string seen; Status PutCF(uint32_t cf, const Slice& key, const Slice& value) override { seen += "Put(" + ToString(cf) + ", " + key.ToString() + ", " + ToString(value.size()) + ")"; return Status::OK(); } Status MergeCF(uint32_t cf, const Slice& key, const Slice& value) override { seen += "Merge(" + ToString(cf) + ", " + key.ToString() + ", " + ToString(value.size()) + ")"; return Status::OK(); } void LogData(const Slice& blob) override { seen += "LogData(" + blob.ToString() + ")"; } Status DeleteCF(uint32_t cf, const Slice& key) override { seen += "Delete(" + ToString(cf) + ", " + key.ToString() + ")"; return Status::OK(); } } handler; res.writeBatchPtr->Iterate(&handler); ASSERT_EQ( "Put(1, key1, 1024)" "Put(0, key2, 1024)" "LogData(blob1)" "Put(1, key3, 1024)" "LogData(blob2)" "Delete(0, key2)", handler.seen); } } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) int main(int argc, char** argv) { #if !defined(ROCKSDB_LITE) ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); #else (void) argc; (void) argv; return 0; #endif } rocksdb-6.11.4/db/db_logical_block_size_cache_test.cc000066400000000000000000000425431370372246700226160ustar00rootroot00000000000000// Copyright (c) 2020-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "test_util/testharness.h" #ifdef OS_LINUX #include "env/io_posix.h" #include "rocksdb/db.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { class EnvWithCustomLogicalBlockSizeCache : public EnvWrapper { public: EnvWithCustomLogicalBlockSizeCache(Env* env, LogicalBlockSizeCache* cache) : EnvWrapper(env), cache_(cache) {} Status RegisterDbPaths(const std::vector& paths) override { return cache_->RefAndCacheLogicalBlockSize(paths); } Status UnregisterDbPaths(const std::vector& paths) override { cache_->UnrefAndTryRemoveCachedLogicalBlockSize(paths); return Status::OK(); } private: LogicalBlockSizeCache* cache_; }; class DBLogicalBlockSizeCacheTest : public testing::Test { public: DBLogicalBlockSizeCacheTest() : dbname_(test::PerThreadDBPath("logical_block_size_cache_test")), data_path_0_(dbname_ + "/data_path_0"), data_path_1_(dbname_ + "/data_path_1"), cf_path_0_(dbname_ + "/cf_path_0"), cf_path_1_(dbname_ + "/cf_path_1") { auto get_fd_block_size = [&](int fd) { return fd; }; auto get_dir_block_size = [&](const std::string& /*dir*/, size_t* size) { *size = 1024; return Status::OK(); }; cache_.reset( new LogicalBlockSizeCache(get_fd_block_size, get_dir_block_size)); env_.reset( new EnvWithCustomLogicalBlockSizeCache(Env::Default(), cache_.get())); } protected: std::string dbname_; std::string data_path_0_; std::string data_path_1_; std::string cf_path_0_; std::string cf_path_1_; std::unique_ptr cache_; std::unique_ptr env_; }; TEST_F(DBLogicalBlockSizeCacheTest, OpenClose) { // Tests that Open will cache the logical block size for data paths, // and Close will remove the cached sizes. Options options; options.create_if_missing = true; options.env = env_.get(); options.db_paths = {{data_path_0_, 2048}, {data_path_1_, 2048}}; for (int i = 0; i < 2; i++) { DB* db; if (!i) { printf("Open\n"); ASSERT_OK(DB::Open(options, dbname_, &db)); } else { #ifdef ROCKSDB_LITE break; #else printf("OpenForReadOnly\n"); ASSERT_OK(DB::OpenForReadOnly(options, dbname_, &db)); #endif } ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(data_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); ASSERT_OK(db->Close()); ASSERT_EQ(0, cache_->Size()); delete db; } ASSERT_OK(DestroyDB(dbname_, options, {})); } TEST_F(DBLogicalBlockSizeCacheTest, OpenDelete) { // Tests that Open will cache the logical block size for data paths, // and delete the db pointer will remove the cached sizes. Options options; options.create_if_missing = true; options.env = env_.get(); for (int i = 0; i < 2; i++) { DB* db; if (!i) { printf("Open\n"); ASSERT_OK(DB::Open(options, dbname_, &db)); } else { #ifdef ROCKSDB_LITE break; #else printf("OpenForReadOnly\n"); ASSERT_OK(DB::OpenForReadOnly(options, dbname_, &db)); #endif } ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); delete db; ASSERT_EQ(0, cache_->Size()); } ASSERT_OK(DestroyDB(dbname_, options, {})); } TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamily) { // Tests that CreateColumnFamily will cache the cf_paths, // drop the column family handle won't drop the cache, // drop and then delete the column family handle will drop the cache. Options options; options.create_if_missing = true; options.env = env_.get(); ColumnFamilyOptions cf_options; cf_options.cf_paths = {{cf_path_0_, 1024}, {cf_path_1_, 2048}}; DB* db; ASSERT_OK(DB::Open(options, dbname_, &db)); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ColumnFamilyHandle* cf = nullptr; ASSERT_OK(db->CreateColumnFamily(cf_options, "cf", &cf)); ASSERT_EQ(3, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); // Drop column family does not drop cache. ASSERT_OK(db->DropColumnFamily(cf)); ASSERT_EQ(3, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); // Delete handle will drop cache. ASSERT_OK(db->DestroyColumnFamilyHandle(cf)); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); delete db; ASSERT_EQ(0, cache_->Size()); ASSERT_OK(DestroyDB(dbname_, options, {{"cf", cf_options}})); } TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) { // Tests that CreateColumnFamilies will cache the cf_paths, // drop the column family handle won't drop the cache, // drop and then delete the column family handle will drop the cache. Options options; options.create_if_missing = true; options.env = env_.get(); ColumnFamilyOptions cf_options; cf_options.cf_paths = {{cf_path_0_, 1024}}; DB* db; ASSERT_OK(DB::Open(options, dbname_, &db)); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); std::vector cfs; ASSERT_OK(db->CreateColumnFamilies(cf_options, {"cf1", "cf2"}, &cfs)); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); // Drop column family does not drop cache. for (ColumnFamilyHandle* cf : cfs) { ASSERT_OK(db->DropColumnFamily(cf)); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); } // Delete one handle will not drop cache because another handle is still // referencing cf_path_0_. ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0])); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); // Delete the last handle will drop cache. ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1])); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); delete db; ASSERT_EQ(0, cache_->Size()); ASSERT_OK(DestroyDB(dbname_, options, {{"cf1", cf_options}, {"cf2", cf_options}})); } TEST_F(DBLogicalBlockSizeCacheTest, OpenWithColumnFamilies) { // Tests that Open two column families with the same cf_path will cache the // cf_path and have 2 references to the cached size, // drop the column family handle won't drop the cache, // drop and then delete the column family handle will drop the cache. Options options; options.create_if_missing = true; options.env = env_.get(); ColumnFamilyOptions cf_options; cf_options.cf_paths = {{cf_path_0_, 1024}}; for (int i = 0; i < 2; i++) { DB* db; ColumnFamilyHandle* cf1 = nullptr; ColumnFamilyHandle* cf2 = nullptr; ASSERT_OK(DB::Open(options, dbname_, &db)); ASSERT_OK(db->CreateColumnFamily(cf_options, "cf1", &cf1)); ASSERT_OK(db->CreateColumnFamily(cf_options, "cf2", &cf2)); ASSERT_OK(db->DestroyColumnFamilyHandle(cf1)); ASSERT_OK(db->DestroyColumnFamilyHandle(cf2)); delete db; ASSERT_EQ(0, cache_->Size()); std::vector cfs; if (!i) { printf("Open\n"); ASSERT_OK(DB::Open(options, dbname_, {{"cf1", cf_options}, {"cf2", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db)); } else { #ifdef ROCKSDB_LITE break; #else printf("OpenForReadOnly\n"); ASSERT_OK(DB::OpenForReadOnly(options, dbname_, {{"cf1", cf_options}, {"cf2", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db)); #endif } // Logical block sizes of dbname_ and cf_path_0_ are cached during Open. ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); // Drop handles won't drop the cache. ASSERT_OK(db->DropColumnFamily(cfs[0])); ASSERT_OK(db->DropColumnFamily(cfs[1])); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); // Delete 1st handle won't drop the cache for cf_path_0_. ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0])); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); // Delete 2nd handle will drop the cache for cf_path_0_. ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1])); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); // Delete the default handle won't affect the cache because db still refers // to the default CF. ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[2])); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); delete db; ASSERT_EQ(0, cache_->Size()); } ASSERT_OK(DestroyDB(dbname_, options, {{"cf1", cf_options}, {"cf2", cf_options}})); } TEST_F(DBLogicalBlockSizeCacheTest, DestroyColumnFamilyHandle) { // Tests that destroy column family without dropping won't drop the cache, // because compaction and flush might still need to get logical block size // when opening new files. Options options; options.create_if_missing = true; options.env = env_.get(); ColumnFamilyOptions cf_options; cf_options.cf_paths = {{cf_path_0_, 1024}}; DB* db; ASSERT_OK(DB::Open(options, dbname_, &db)); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ColumnFamilyHandle* cf = nullptr; ASSERT_OK(db->CreateColumnFamily(cf_options, "cf", &cf)); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); // Delete handle won't drop cache. ASSERT_OK(db->DestroyColumnFamilyHandle(cf)); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); delete db; ASSERT_EQ(0, cache_->Size()); // Open with column families. std::vector cfs; for (int i = 0; i < 2; i++) { if (!i) { printf("Open\n"); ASSERT_OK(DB::Open( options, dbname_, {{"cf", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db)); } else { #ifdef ROCKSDB_LITE break; #else printf("OpenForReadOnly\n"); ASSERT_OK(DB::OpenForReadOnly( options, dbname_, {{"cf", cf_options}, {"default", ColumnFamilyOptions()}}, &cfs, &db)); #endif } // cf_path_0_ and dbname_ are cached. ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); // Deleting handle won't drop cache. ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0])); ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1])); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); ASSERT_EQ(1, cache_->GetRefCount(dbname_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); delete db; ASSERT_EQ(0, cache_->Size()); } ASSERT_OK(DestroyDB(dbname_, options, {{"cf", cf_options}})); } TEST_F(DBLogicalBlockSizeCacheTest, MultiDBWithDifferentPaths) { // Tests the cache behavior when there are multiple DBs sharing the same env // with different db_paths and cf_paths. Options options; options.create_if_missing = true; options.env = env_.get(); ASSERT_OK(env_->CreateDirIfMissing(dbname_)); DB* db0; ASSERT_OK(DB::Open(options, data_path_0_, &db0)); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ColumnFamilyOptions cf_options0; cf_options0.cf_paths = {{cf_path_0_, 1024}}; ColumnFamilyHandle* cf0; db0->CreateColumnFamily(cf_options0, "cf", &cf0); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); DB* db1; ASSERT_OK(DB::Open(options, data_path_1_, &db1)); ASSERT_EQ(3, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); ASSERT_TRUE(cache_->Contains(data_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); ColumnFamilyOptions cf_options1; cf_options1.cf_paths = {{cf_path_1_, 1024}}; ColumnFamilyHandle* cf1; db1->CreateColumnFamily(cf_options1, "cf", &cf1); ASSERT_EQ(4, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); ASSERT_TRUE(cache_->Contains(data_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); ASSERT_TRUE(cache_->Contains(cf_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); db0->DestroyColumnFamilyHandle(cf0); delete db0; ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_1_)); ASSERT_TRUE(cache_->Contains(cf_path_1_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_1_)); ASSERT_OK(DestroyDB(data_path_0_, options, {{"cf", cf_options0}})); db1->DestroyColumnFamilyHandle(cf1); delete db1; ASSERT_EQ(0, cache_->Size()); ASSERT_OK(DestroyDB(data_path_1_, options, {{"cf", cf_options1}})); } TEST_F(DBLogicalBlockSizeCacheTest, MultiDBWithSamePaths) { // Tests the cache behavior when there are multiple DBs sharing the same env // with the same db_paths and cf_paths. Options options; options.create_if_missing = true; options.env = env_.get(); options.db_paths = {{data_path_0_, 1024}}; ColumnFamilyOptions cf_options; cf_options.cf_paths = {{cf_path_0_, 1024}}; ASSERT_OK(env_->CreateDirIfMissing(dbname_)); DB* db0; ASSERT_OK(DB::Open(options, dbname_ + "/db0", &db0)); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); ColumnFamilyHandle* cf0; db0->CreateColumnFamily(cf_options, "cf", &cf0); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); DB* db1; ASSERT_OK(DB::Open(options, dbname_ + "/db1", &db1)); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); ColumnFamilyHandle* cf1; db1->CreateColumnFamily(cf_options, "cf", &cf1); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); db0->DestroyColumnFamilyHandle(cf0); delete db0; ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(data_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(data_path_0_)); ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); ASSERT_OK(DestroyDB(dbname_ + "/db0", options, {{"cf", cf_options}})); db1->DestroyColumnFamilyHandle(cf1); delete db1; ASSERT_EQ(0, cache_->Size()); ASSERT_OK(DestroyDB(dbname_ + "/db1", options, {{"cf", cf_options}})); } } // namespace ROCKSDB_NAMESPACE #endif // OS_LINUX int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_memtable_test.cc000066400000000000000000000263651370372246700174470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include "db/db_test_util.h" #include "db/memtable.h" #include "db/range_del_aggregator.h" #include "port/stack_trace.h" #include "rocksdb/memtablerep.h" #include "rocksdb/slice_transform.h" namespace ROCKSDB_NAMESPACE { class DBMemTableTest : public DBTestBase { public: DBMemTableTest() : DBTestBase("/db_memtable_test") {} }; class MockMemTableRep : public MemTableRep { public: explicit MockMemTableRep(Allocator* allocator, MemTableRep* rep) : MemTableRep(allocator), rep_(rep), num_insert_with_hint_(0) {} KeyHandle Allocate(const size_t len, char** buf) override { return rep_->Allocate(len, buf); } void Insert(KeyHandle handle) override { rep_->Insert(handle); } void InsertWithHint(KeyHandle handle, void** hint) override { num_insert_with_hint_++; EXPECT_NE(nullptr, hint); last_hint_in_ = *hint; rep_->InsertWithHint(handle, hint); last_hint_out_ = *hint; } bool Contains(const char* key) const override { return rep_->Contains(key); } void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override { rep_->Get(k, callback_args, callback_func); } size_t ApproximateMemoryUsage() override { return rep_->ApproximateMemoryUsage(); } Iterator* GetIterator(Arena* arena) override { return rep_->GetIterator(arena); } void* last_hint_in() { return last_hint_in_; } void* last_hint_out() { return last_hint_out_; } int num_insert_with_hint() { return num_insert_with_hint_; } private: std::unique_ptr rep_; void* last_hint_in_; void* last_hint_out_; int num_insert_with_hint_; }; class MockMemTableRepFactory : public MemTableRepFactory { public: MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp, Allocator* allocator, const SliceTransform* transform, Logger* logger) override { SkipListFactory factory; MemTableRep* skiplist_rep = factory.CreateMemTableRep(cmp, allocator, transform, logger); mock_rep_ = new MockMemTableRep(allocator, skiplist_rep); return mock_rep_; } MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& cmp, Allocator* allocator, const SliceTransform* transform, Logger* logger, uint32_t column_family_id) override { last_column_family_id_ = column_family_id; return CreateMemTableRep(cmp, allocator, transform, logger); } const char* Name() const override { return "MockMemTableRepFactory"; } MockMemTableRep* rep() { return mock_rep_; } bool IsInsertConcurrentlySupported() const override { return false; } uint32_t GetLastColumnFamilyId() { return last_column_family_id_; } private: MockMemTableRep* mock_rep_; // workaround since there's no port::kMaxUint32 yet. uint32_t last_column_family_id_ = static_cast(-1); }; class TestPrefixExtractor : public SliceTransform { public: const char* Name() const override { return "TestPrefixExtractor"; } Slice Transform(const Slice& key) const override { const char* p = separator(key); if (p == nullptr) { return Slice(); } return Slice(key.data(), p - key.data() + 1); } bool InDomain(const Slice& key) const override { return separator(key) != nullptr; } bool InRange(const Slice& /*key*/) const override { return false; } private: const char* separator(const Slice& key) const { return reinterpret_cast(memchr(key.data(), '_', key.size())); } }; // Test that ::Add properly returns false when inserting duplicate keys TEST_F(DBMemTableTest, DuplicateSeq) { SequenceNumber seq = 123; std::string value; Status s; MergeContext merge_context; Options options; InternalKeyComparator ikey_cmp(options.comparator); ReadRangeDelAggregator range_del_agg(&ikey_cmp, kMaxSequenceNumber /* upper_bound */); // Create a MemTable InternalKeyComparator cmp(BytewiseComparator()); auto factory = std::make_shared(); options.memtable_factory = factory; ImmutableCFOptions ioptions(options); WriteBufferManager wb(options.db_write_buffer_size); MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); // Write some keys and make sure it returns false on duplicates bool res; res = mem->Add(seq, kTypeValue, "key", "value2"); ASSERT_TRUE(res); res = mem->Add(seq, kTypeValue, "key", "value2"); ASSERT_FALSE(res); // Changing the type should still cause the duplicatae key res = mem->Add(seq, kTypeMerge, "key", "value2"); ASSERT_FALSE(res); // Changing the seq number will make the key fresh res = mem->Add(seq + 1, kTypeMerge, "key", "value2"); ASSERT_TRUE(res); // Test with different types for duplicate keys res = mem->Add(seq, kTypeDeletion, "key", ""); ASSERT_FALSE(res); res = mem->Add(seq, kTypeSingleDeletion, "key", ""); ASSERT_FALSE(res); // Test the duplicate keys under stress for (int i = 0; i < 10000; i++) { bool insert_dup = i % 10 == 1; if (!insert_dup) { seq++; } res = mem->Add(seq, kTypeValue, "foo", "value" + ToString(seq)); if (insert_dup) { ASSERT_FALSE(res); } else { ASSERT_TRUE(res); } } delete mem; // Test with InsertWithHint options.memtable_insert_with_hint_prefix_extractor.reset( new TestPrefixExtractor()); // which uses _ to extract the prefix ioptions = ImmutableCFOptions(options); mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); // Insert a duplicate key with _ in it res = mem->Add(seq, kTypeValue, "key_1", "value"); ASSERT_TRUE(res); res = mem->Add(seq, kTypeValue, "key_1", "value"); ASSERT_FALSE(res); delete mem; // Test when InsertConcurrently will be invoked options.allow_concurrent_memtable_write = true; ioptions = ImmutableCFOptions(options); mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); MemTablePostProcessInfo post_process_info; res = mem->Add(seq, kTypeValue, "key", "value", true, &post_process_info); ASSERT_TRUE(res); res = mem->Add(seq, kTypeValue, "key", "value", true, &post_process_info); ASSERT_FALSE(res); delete mem; } // A simple test to verify that the concurrent merge writes is functional TEST_F(DBMemTableTest, ConcurrentMergeWrite) { int num_ops = 1000; std::string value; Status s; MergeContext merge_context; Options options; // A merge operator that is not sensitive to concurrent writes since in this // test we don't order the writes. options.merge_operator = MergeOperators::CreateUInt64AddOperator(); // Create a MemTable InternalKeyComparator cmp(BytewiseComparator()); auto factory = std::make_shared(); options.memtable_factory = factory; options.allow_concurrent_memtable_write = true; ImmutableCFOptions ioptions(options); WriteBufferManager wb(options.db_write_buffer_size); MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); // Put 0 as the base PutFixed64(&value, static_cast(0)); bool res = mem->Add(0, kTypeValue, "key", value); ASSERT_TRUE(res); value.clear(); // Write Merge concurrently ROCKSDB_NAMESPACE::port::Thread write_thread1([&]() { MemTablePostProcessInfo post_process_info1; std::string v1; for (int seq = 1; seq < num_ops / 2; seq++) { PutFixed64(&v1, seq); bool res1 = mem->Add(seq, kTypeMerge, "key", v1, true, &post_process_info1); ASSERT_TRUE(res1); v1.clear(); } }); ROCKSDB_NAMESPACE::port::Thread write_thread2([&]() { MemTablePostProcessInfo post_process_info2; std::string v2; for (int seq = num_ops / 2; seq < num_ops; seq++) { PutFixed64(&v2, seq); bool res2 = mem->Add(seq, kTypeMerge, "key", v2, true, &post_process_info2); ASSERT_TRUE(res2); v2.clear(); } }); write_thread1.join(); write_thread2.join(); Status status; ReadOptions roptions; SequenceNumber max_covering_tombstone_seq = 0; LookupKey lkey("key", kMaxSequenceNumber); res = mem->Get(lkey, &value, /*timestamp=*/nullptr, &status, &merge_context, &max_covering_tombstone_seq, roptions); ASSERT_TRUE(res); uint64_t ivalue = DecodeFixed64(Slice(value).data()); uint64_t sum = 0; for (int seq = 0; seq < num_ops; seq++) { sum += seq; } ASSERT_EQ(ivalue, sum); delete mem; } TEST_F(DBMemTableTest, InsertWithHint) { Options options; options.allow_concurrent_memtable_write = false; options.create_if_missing = true; options.memtable_factory.reset(new MockMemTableRepFactory()); options.memtable_insert_with_hint_prefix_extractor.reset( new TestPrefixExtractor()); options.env = env_; Reopen(options); MockMemTableRep* rep = reinterpret_cast(options.memtable_factory.get()) ->rep(); ASSERT_OK(Put("foo_k1", "foo_v1")); ASSERT_EQ(nullptr, rep->last_hint_in()); void* hint_foo = rep->last_hint_out(); ASSERT_OK(Put("foo_k2", "foo_v2")); ASSERT_EQ(hint_foo, rep->last_hint_in()); ASSERT_EQ(hint_foo, rep->last_hint_out()); ASSERT_OK(Put("foo_k3", "foo_v3")); ASSERT_EQ(hint_foo, rep->last_hint_in()); ASSERT_EQ(hint_foo, rep->last_hint_out()); ASSERT_OK(Put("bar_k1", "bar_v1")); ASSERT_EQ(nullptr, rep->last_hint_in()); void* hint_bar = rep->last_hint_out(); ASSERT_NE(hint_foo, hint_bar); ASSERT_OK(Put("bar_k2", "bar_v2")); ASSERT_EQ(hint_bar, rep->last_hint_in()); ASSERT_EQ(hint_bar, rep->last_hint_out()); ASSERT_EQ(5, rep->num_insert_with_hint()); ASSERT_OK(Put("whitelisted", "vvv")); ASSERT_EQ(5, rep->num_insert_with_hint()); ASSERT_EQ("foo_v1", Get("foo_k1")); ASSERT_EQ("foo_v2", Get("foo_k2")); ASSERT_EQ("foo_v3", Get("foo_k3")); ASSERT_EQ("bar_v1", Get("bar_k1")); ASSERT_EQ("bar_v2", Get("bar_k2")); ASSERT_EQ("vvv", Get("whitelisted")); } TEST_F(DBMemTableTest, ColumnFamilyId) { // Verifies MemTableRepFactory is told the right column family id. Options options; options.allow_concurrent_memtable_write = false; options.create_if_missing = true; options.memtable_factory.reset(new MockMemTableRepFactory()); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); for (uint32_t cf = 0; cf < 2; ++cf) { ASSERT_OK(Put(cf, "key", "val")); ASSERT_OK(Flush(cf)); ASSERT_EQ( cf, static_cast(options.memtable_factory.get()) ->GetLastColumnFamilyId()); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_merge_operand_test.cc000066400000000000000000000203511370372246700204550ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/perf_context.h" #include "rocksdb/utilities/debug.h" #include "table/block_based/block_builder.h" #include "test_util/fault_injection_test_env.h" #if !defined(ROCKSDB_LITE) #include "test_util/sync_point.h" #endif #include "rocksdb/merge_operator.h" #include "utilities/merge_operators.h" #include "utilities/merge_operators/sortlist.h" #include "utilities/merge_operators/string_append/stringappend2.h" namespace ROCKSDB_NAMESPACE { class DBMergeOperandTest : public DBTestBase { public: DBMergeOperandTest() : DBTestBase("/db_merge_operand_test") {} }; TEST_F(DBMergeOperandTest, GetMergeOperandsBasic) { class LimitedStringAppendMergeOp : public StringAppendTESTOperator { public: LimitedStringAppendMergeOp(int limit, char delim) : StringAppendTESTOperator(delim), limit_(limit) {} const char* Name() const override { return "DBMergeOperatorTest::LimitedStringAppendMergeOp"; } bool ShouldMerge(const std::vector& operands) const override { if (operands.size() > 0 && limit_ > 0 && operands.size() >= limit_) { return true; } return false; } private: size_t limit_ = 0; }; Options options; options.create_if_missing = true; // Use only the latest two merge operands. options.merge_operator = std::make_shared(2, ','); options.env = env_; Reopen(options); int num_records = 4; int number_of_operands = 0; std::vector values(num_records); GetMergeOperandsOptions merge_operands_info; merge_operands_info.expected_max_number_of_operands = num_records; // k0 value in memtable Put("k0", "PutARock"); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k0", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "PutARock"); // k0.1 value in SST Put("k0.1", "RockInSST"); ASSERT_OK(Flush()); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k0.1", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "RockInSST"); // All k1 values are in memtable. ASSERT_OK(Merge("k1", "a")); Put("k1", "x"); ASSERT_OK(Merge("k1", "b")); ASSERT_OK(Merge("k1", "c")); ASSERT_OK(Merge("k1", "d")); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k1", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "x"); ASSERT_EQ(values[1], "b"); ASSERT_EQ(values[2], "c"); ASSERT_EQ(values[3], "d"); // expected_max_number_of_operands is less than number of merge operands so // status should be Incomplete. merge_operands_info.expected_max_number_of_operands = num_records - 1; Status status = db_->GetMergeOperands( ReadOptions(), db_->DefaultColumnFamily(), "k1", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(status.IsIncomplete(), true); merge_operands_info.expected_max_number_of_operands = num_records; // All k1.1 values are in memtable. ASSERT_OK(Merge("k1.1", "r")); Delete("k1.1"); ASSERT_OK(Merge("k1.1", "c")); ASSERT_OK(Merge("k1.1", "k")); ASSERT_OK(Merge("k1.1", "s")); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k1.1", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "c"); ASSERT_EQ(values[1], "k"); ASSERT_EQ(values[2], "s"); // All k2 values are flushed to L0 into a single file. ASSERT_OK(Merge("k2", "q")); ASSERT_OK(Merge("k2", "w")); ASSERT_OK(Merge("k2", "e")); ASSERT_OK(Merge("k2", "r")); ASSERT_OK(Flush()); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k2", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "q"); ASSERT_EQ(values[1], "w"); ASSERT_EQ(values[2], "e"); ASSERT_EQ(values[3], "r"); // All k2.1 values are flushed to L0 into a single file. ASSERT_OK(Merge("k2.1", "m")); Put("k2.1", "l"); ASSERT_OK(Merge("k2.1", "n")); ASSERT_OK(Merge("k2.1", "o")); ASSERT_OK(Flush()); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k2.1", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "l,n,o"); // All k2.2 values are flushed to L0 into a single file. ASSERT_OK(Merge("k2.2", "g")); Delete("k2.2"); ASSERT_OK(Merge("k2.2", "o")); ASSERT_OK(Merge("k2.2", "t")); ASSERT_OK(Flush()); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k2.2", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "o,t"); // Do some compaction that will make the following tests more predictable // Slice start("PutARock"); // Slice end("t"); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); // All k3 values are flushed and are in different files. ASSERT_OK(Merge("k3", "ab")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3", "bc")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3", "cd")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3", "de")); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k3", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "ab"); ASSERT_EQ(values[1], "bc"); ASSERT_EQ(values[2], "cd"); ASSERT_EQ(values[3], "de"); // All k3.1 values are flushed and are in different files. ASSERT_OK(Merge("k3.1", "ab")); ASSERT_OK(Flush()); Put("k3.1", "bc"); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3.1", "cd")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3.1", "de")); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k3.1", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "bc"); ASSERT_EQ(values[1], "cd"); ASSERT_EQ(values[2], "de"); // All k3.2 values are flushed and are in different files. ASSERT_OK(Merge("k3.2", "ab")); ASSERT_OK(Flush()); Delete("k3.2"); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3.2", "cd")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3.2", "de")); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k3.2", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "cd"); ASSERT_EQ(values[1], "de"); // All K4 values are in different levels ASSERT_OK(Merge("k4", "ba")); ASSERT_OK(Flush()); MoveFilesToLevel(4); ASSERT_OK(Merge("k4", "cb")); ASSERT_OK(Flush()); MoveFilesToLevel(3); ASSERT_OK(Merge("k4", "dc")); ASSERT_OK(Flush()); MoveFilesToLevel(1); ASSERT_OK(Merge("k4", "ed")); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k4", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "ba"); ASSERT_EQ(values[1], "cb"); ASSERT_EQ(values[2], "dc"); ASSERT_EQ(values[3], "ed"); // First 3 k5 values are in SST and next 4 k5 values are in Immutable Memtable ASSERT_OK(Merge("k5", "who")); ASSERT_OK(Merge("k5", "am")); ASSERT_OK(Merge("k5", "i")); ASSERT_OK(Flush()); Put("k5", "remember"); ASSERT_OK(Merge("k5", "i")); ASSERT_OK(Merge("k5", "am")); ASSERT_OK(Merge("k5", "rocks")); dbfull()->TEST_SwitchMemtable(); db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), "k5", values.data(), &merge_operands_info, &number_of_operands); ASSERT_EQ(values[0], "remember"); ASSERT_EQ(values[1], "i"); ASSERT_EQ(values[2], "am"); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_merge_operator_test.cc000066400000000000000000000526701370372246700206710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include "db/db_test_util.h" #include "db/forward_iterator.h" #include "port/stack_trace.h" #include "rocksdb/merge_operator.h" #include "utilities/merge_operators.h" #include "utilities/merge_operators/string_append/stringappend2.h" namespace ROCKSDB_NAMESPACE { class TestReadCallback : public ReadCallback { public: TestReadCallback(SnapshotChecker* snapshot_checker, SequenceNumber snapshot_seq) : ReadCallback(snapshot_seq), snapshot_checker_(snapshot_checker), snapshot_seq_(snapshot_seq) {} bool IsVisibleFullCheck(SequenceNumber seq) override { return snapshot_checker_->CheckInSnapshot(seq, snapshot_seq_) == SnapshotCheckerResult::kInSnapshot; } private: SnapshotChecker* snapshot_checker_; SequenceNumber snapshot_seq_; }; // Test merge operator functionality. class DBMergeOperatorTest : public DBTestBase { public: DBMergeOperatorTest() : DBTestBase("/db_merge_operator_test") {} std::string GetWithReadCallback(SnapshotChecker* snapshot_checker, const Slice& key, const Snapshot* snapshot = nullptr) { SequenceNumber seq = snapshot == nullptr ? db_->GetLatestSequenceNumber() : snapshot->GetSequenceNumber(); TestReadCallback read_callback(snapshot_checker, seq); ReadOptions read_opt; read_opt.snapshot = snapshot; PinnableSlice value; DBImpl::GetImplOptions get_impl_options; get_impl_options.column_family = db_->DefaultColumnFamily(); get_impl_options.value = &value; get_impl_options.callback = &read_callback; Status s = dbfull()->GetImpl(read_opt, key, get_impl_options); if (!s.ok()) { return s.ToString(); } return value.ToString(); } }; TEST_F(DBMergeOperatorTest, LimitMergeOperands) { class LimitedStringAppendMergeOp : public StringAppendTESTOperator { public: LimitedStringAppendMergeOp(int limit, char delim) : StringAppendTESTOperator(delim), limit_(limit) {} const char* Name() const override { return "DBMergeOperatorTest::LimitedStringAppendMergeOp"; } bool ShouldMerge(const std::vector& operands) const override { if (operands.size() > 0 && limit_ > 0 && operands.size() >= limit_) { return true; } return false; } private: size_t limit_ = 0; }; Options options; options.create_if_missing = true; // Use only the latest two merge operands. options.merge_operator = std::make_shared(2, ','); options.env = env_; Reopen(options); // All K1 values are in memtable. ASSERT_OK(Merge("k1", "a")); ASSERT_OK(Merge("k1", "b")); ASSERT_OK(Merge("k1", "c")); ASSERT_OK(Merge("k1", "d")); std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), "k1", &value).ok()); // Make sure that only the latest two merge operands are used. If this was // not the case the value would be "a,b,c,d". ASSERT_EQ(value, "c,d"); // All K2 values are flushed to L0 into a single file. ASSERT_OK(Merge("k2", "a")); ASSERT_OK(Merge("k2", "b")); ASSERT_OK(Merge("k2", "c")); ASSERT_OK(Merge("k2", "d")); ASSERT_OK(Flush()); ASSERT_TRUE(db_->Get(ReadOptions(), "k2", &value).ok()); ASSERT_EQ(value, "c,d"); // All K3 values are flushed and are in different files. ASSERT_OK(Merge("k3", "ab")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3", "bc")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3", "cd")); ASSERT_OK(Flush()); ASSERT_OK(Merge("k3", "de")); ASSERT_TRUE(db_->Get(ReadOptions(), "k3", &value).ok()); ASSERT_EQ(value, "cd,de"); // All K4 values are in different levels ASSERT_OK(Merge("k4", "ab")); ASSERT_OK(Flush()); MoveFilesToLevel(4); ASSERT_OK(Merge("k4", "bc")); ASSERT_OK(Flush()); MoveFilesToLevel(3); ASSERT_OK(Merge("k4", "cd")); ASSERT_OK(Flush()); MoveFilesToLevel(1); ASSERT_OK(Merge("k4", "de")); ASSERT_TRUE(db_->Get(ReadOptions(), "k4", &value).ok()); ASSERT_EQ(value, "cd,de"); } TEST_F(DBMergeOperatorTest, MergeErrorOnRead) { Options options; options.create_if_missing = true; options.merge_operator.reset(new TestPutOperator()); options.env = env_; Reopen(options); ASSERT_OK(Merge("k1", "v1")); ASSERT_OK(Merge("k1", "corrupted")); std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), "k1", &value).IsCorruption()); VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v1"}}); } TEST_F(DBMergeOperatorTest, MergeErrorOnWrite) { Options options; options.create_if_missing = true; options.merge_operator.reset(new TestPutOperator()); options.max_successive_merges = 3; options.env = env_; Reopen(options); ASSERT_OK(Merge("k1", "v1")); ASSERT_OK(Merge("k1", "v2")); // Will trigger a merge when hitting max_successive_merges and the merge // will fail. The delta will be inserted nevertheless. ASSERT_OK(Merge("k1", "corrupted")); // Data should stay unmerged after the error. VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v2"}, {"k1", "v1"}}); } TEST_F(DBMergeOperatorTest, MergeErrorOnIteration) { Options options; options.create_if_missing = true; options.merge_operator.reset(new TestPutOperator()); options.env = env_; DestroyAndReopen(options); ASSERT_OK(Merge("k1", "v1")); ASSERT_OK(Merge("k1", "corrupted")); ASSERT_OK(Put("k2", "v2")); auto* iter = db_->NewIterator(ReadOptions()); iter->Seek("k1"); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->status().IsCorruption()); delete iter; iter = db_->NewIterator(ReadOptions()); iter->Seek("k2"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); iter->Prev(); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->status().IsCorruption()); delete iter; VerifyDBInternal({{"k1", "corrupted"}, {"k1", "v1"}, {"k2", "v2"}}); DestroyAndReopen(options); ASSERT_OK(Merge("k1", "v1")); ASSERT_OK(Put("k2", "v2")); ASSERT_OK(Merge("k2", "corrupted")); iter = db_->NewIterator(ReadOptions()); iter->Seek("k1"); ASSERT_TRUE(iter->Valid()); ASSERT_OK(iter->status()); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->status().IsCorruption()); delete iter; VerifyDBInternal({{"k1", "v1"}, {"k2", "corrupted"}, {"k2", "v2"}}); } class MergeOperatorPinningTest : public DBMergeOperatorTest, public testing::WithParamInterface { public: MergeOperatorPinningTest() { disable_block_cache_ = GetParam(); } bool disable_block_cache_; }; INSTANTIATE_TEST_CASE_P(MergeOperatorPinningTest, MergeOperatorPinningTest, ::testing::Bool()); #ifndef ROCKSDB_LITE TEST_P(MergeOperatorPinningTest, OperandsMultiBlocks) { Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.block_size = 1; // every block will contain one entry table_options.no_block_cache = disable_block_cache_; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); options.level0_slowdown_writes_trigger = (1 << 30); options.level0_stop_writes_trigger = (1 << 30); options.disable_auto_compactions = true; DestroyAndReopen(options); const int kKeysPerFile = 10; const int kOperandsPerKeyPerFile = 7; const int kOperandSize = 100; // Filse to write in L0 before compacting to lower level const int kFilesPerLevel = 3; Random rnd(301); std::map true_data; int batch_num = 1; int lvl_to_fill = 4; int key_id = 0; while (true) { for (int j = 0; j < kKeysPerFile; j++) { std::string key = Key(key_id % 35); key_id++; for (int k = 0; k < kOperandsPerKeyPerFile; k++) { std::string val = RandomString(&rnd, kOperandSize); ASSERT_OK(db_->Merge(WriteOptions(), key, val)); if (true_data[key].size() == 0) { true_data[key] = val; } else { true_data[key] += "," + val; } } } if (lvl_to_fill == -1) { // Keep last batch in memtable and stop break; } ASSERT_OK(Flush()); if (batch_num % kFilesPerLevel == 0) { if (lvl_to_fill != 0) { MoveFilesToLevel(lvl_to_fill); } lvl_to_fill--; } batch_num++; } // 3 L0 files // 1 L1 file // 3 L2 files // 1 L3 file // 3 L4 Files ASSERT_EQ(FilesPerLevel(), "3,1,3,1,3"); VerifyDBFromMap(true_data); } class MergeOperatorHook : public MergeOperator { public: explicit MergeOperatorHook(std::shared_ptr _merge_op) : merge_op_(_merge_op) {} bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override { before_merge_(); bool res = merge_op_->FullMergeV2(merge_in, merge_out); after_merge_(); return res; } const char* Name() const override { return merge_op_->Name(); } std::shared_ptr merge_op_; std::function before_merge_ = []() {}; std::function after_merge_ = []() {}; }; TEST_P(MergeOperatorPinningTest, EvictCacheBeforeMerge) { Options options = CurrentOptions(); auto merge_hook = std::make_shared(MergeOperators::CreateMaxOperator()); options.merge_operator = merge_hook; options.disable_auto_compactions = true; options.level0_slowdown_writes_trigger = (1 << 30); options.level0_stop_writes_trigger = (1 << 30); options.max_open_files = 20; BlockBasedTableOptions bbto; bbto.no_block_cache = disable_block_cache_; if (bbto.no_block_cache == false) { bbto.block_cache = NewLRUCache(64 * 1024 * 1024); } else { bbto.block_cache = nullptr; } options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); const int kNumOperands = 30; const int kNumKeys = 1000; const int kOperandSize = 100; Random rnd(301); // 1000 keys every key have 30 operands, every operand is in a different file std::map true_data; for (int i = 0; i < kNumOperands; i++) { for (int j = 0; j < kNumKeys; j++) { std::string k = Key(j); std::string v = RandomString(&rnd, kOperandSize); ASSERT_OK(db_->Merge(WriteOptions(), k, v)); true_data[k] = std::max(true_data[k], v); } ASSERT_OK(Flush()); } std::vector file_numbers = ListTableFiles(env_, dbname_); ASSERT_EQ(file_numbers.size(), kNumOperands); int merge_cnt = 0; // Code executed before merge operation merge_hook->before_merge_ = [&]() { // Evict all tables from cache before every merge operation for (uint64_t num : file_numbers) { TableCache::Evict(dbfull()->TEST_table_cache(), num); } // Decrease cache capacity to force all unrefed blocks to be evicted if (bbto.block_cache) { bbto.block_cache->SetCapacity(1); } merge_cnt++; }; // Code executed after merge operation merge_hook->after_merge_ = [&]() { // Increase capacity again after doing the merge if (bbto.block_cache) { bbto.block_cache->SetCapacity(64 * 1024 * 1024); } }; size_t total_reads; VerifyDBFromMap(true_data, &total_reads); ASSERT_EQ(merge_cnt, total_reads); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); VerifyDBFromMap(true_data, &total_reads); } TEST_P(MergeOperatorPinningTest, TailingIterator) { Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateMaxOperator(); BlockBasedTableOptions bbto; bbto.no_block_cache = disable_block_cache_; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); const int kNumOperands = 100; const int kNumWrites = 100000; std::function writer_func = [&]() { int k = 0; for (int i = 0; i < kNumWrites; i++) { db_->Merge(WriteOptions(), Key(k), Key(k)); if (i && i % kNumOperands == 0) { k++; } if (i && i % 127 == 0) { ASSERT_OK(Flush()); } if (i && i % 317 == 0) { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); } } }; std::function reader_func = [&]() { ReadOptions ro; ro.tailing = true; Iterator* iter = db_->NewIterator(ro); iter->SeekToFirst(); for (int i = 0; i < (kNumWrites / kNumOperands); i++) { while (!iter->Valid()) { // wait for the key to be written env_->SleepForMicroseconds(100); iter->Seek(Key(i)); } ASSERT_EQ(iter->key(), Key(i)); ASSERT_EQ(iter->value(), Key(i)); iter->Next(); } delete iter; }; ROCKSDB_NAMESPACE::port::Thread writer_thread(writer_func); ROCKSDB_NAMESPACE::port::Thread reader_thread(reader_func); writer_thread.join(); reader_thread.join(); } TEST_F(DBMergeOperatorTest, TailingIteratorMemtableUnrefedBySomeoneElse) { Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateStringAppendOperator(); DestroyAndReopen(options); // Overview of the test: // * There are two merge operands for the same key: one in an sst file, // another in a memtable. // * Seek a tailing iterator to this key. // * As part of the seek, the iterator will: // (a) first visit the operand in the memtable and tell ForwardIterator // to pin this operand, then // (b) move on to the operand in the sst file, then pass both operands // to merge operator. // * The memtable may get flushed and unreferenced by another thread between // (a) and (b). The test simulates it by flushing the memtable inside a // SyncPoint callback located between (a) and (b). // * In this case it's ForwardIterator's responsibility to keep the memtable // pinned until (b) is complete. There used to be a bug causing // ForwardIterator to not pin it in some circumstances. This test // reproduces it. db_->Merge(WriteOptions(), "key", "sst"); db_->Flush(FlushOptions()); // Switch to SuperVersion A db_->Merge(WriteOptions(), "key", "memtable"); // Pin SuperVersion A std::unique_ptr someone_else(db_->NewIterator(ReadOptions())); bool pushed_first_operand = false; bool stepped_to_next_operand = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBIter::MergeValuesNewToOld:PushedFirstOperand", [&](void*) { EXPECT_FALSE(pushed_first_operand); pushed_first_operand = true; db_->Flush(FlushOptions()); // Switch to SuperVersion B }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBIter::MergeValuesNewToOld:SteppedToNextOperand", [&](void*) { EXPECT_FALSE(stepped_to_next_operand); stepped_to_next_operand = true; someone_else.reset(); // Unpin SuperVersion A }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ReadOptions ro; ro.tailing = true; std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek("key"); ASSERT_TRUE(iter->status().ok()); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(std::string("sst,memtable"), iter->value().ToString()); EXPECT_TRUE(pushed_first_operand); EXPECT_TRUE(stepped_to_next_operand); } #endif // ROCKSDB_LITE TEST_F(DBMergeOperatorTest, SnapshotCheckerAndReadCallback) { Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateStringAppendOperator(); DestroyAndReopen(options); class TestSnapshotChecker : public SnapshotChecker { public: SnapshotCheckerResult CheckInSnapshot( SequenceNumber seq, SequenceNumber snapshot_seq) const override { return IsInSnapshot(seq, snapshot_seq) ? SnapshotCheckerResult::kInSnapshot : SnapshotCheckerResult::kNotInSnapshot; } bool IsInSnapshot(SequenceNumber seq, SequenceNumber snapshot_seq) const { switch (snapshot_seq) { case 0: return seq == 0; case 1: return seq <= 1; case 2: // seq = 2 not visible to snapshot with seq = 2 return seq <= 1; case 3: return seq <= 3; case 4: // seq = 4 not visible to snpahost with seq = 4 return seq <= 3; default: // seq >=4 is uncommitted return seq <= 4; }; } }; TestSnapshotChecker* snapshot_checker = new TestSnapshotChecker(); dbfull()->SetSnapshotChecker(snapshot_checker); std::string value; ASSERT_OK(Merge("foo", "v1")); ASSERT_EQ(1, db_->GetLatestSequenceNumber()); ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo")); ASSERT_OK(Merge("foo", "v2")); ASSERT_EQ(2, db_->GetLatestSequenceNumber()); // v2 is not visible to latest snapshot, which has seq = 2. ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo")); // Take a snapshot with seq = 2. const Snapshot* snapshot1 = db_->GetSnapshot(); ASSERT_EQ(2, snapshot1->GetSequenceNumber()); // v2 is not visible to snapshot1, which has seq = 2 ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); // Verify flush doesn't alter the result. ASSERT_OK(Flush()); ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo")); ASSERT_OK(Merge("foo", "v3")); ASSERT_EQ(3, db_->GetLatestSequenceNumber()); ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo")); ASSERT_OK(Merge("foo", "v4")); ASSERT_EQ(4, db_->GetLatestSequenceNumber()); // v4 is not visible to latest snapshot, which has seq = 4. ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo")); const Snapshot* snapshot2 = db_->GetSnapshot(); ASSERT_EQ(4, snapshot2->GetSequenceNumber()); // v4 is not visible to snapshot2, which has seq = 4. ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo", snapshot2)); // Verify flush doesn't alter the result. ASSERT_OK(Flush()); ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo", snapshot2)); ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo")); ASSERT_OK(Merge("foo", "v5")); ASSERT_EQ(5, db_->GetLatestSequenceNumber()); // v5 is uncommitted ASSERT_EQ("v1,v2,v3,v4", GetWithReadCallback(snapshot_checker, "foo")); // full manual compaction. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Verify compaction doesn't alter the result. ASSERT_EQ("v1", GetWithReadCallback(snapshot_checker, "foo", snapshot1)); ASSERT_EQ("v1,v2,v3", GetWithReadCallback(snapshot_checker, "foo", snapshot2)); ASSERT_EQ("v1,v2,v3,v4", GetWithReadCallback(snapshot_checker, "foo")); db_->ReleaseSnapshot(snapshot1); db_->ReleaseSnapshot(snapshot2); } class PerConfigMergeOperatorPinningTest : public DBMergeOperatorTest, public testing::WithParamInterface> { public: PerConfigMergeOperatorPinningTest() { std::tie(disable_block_cache_, option_config_) = GetParam(); } bool disable_block_cache_; }; INSTANTIATE_TEST_CASE_P( MergeOperatorPinningTest, PerConfigMergeOperatorPinningTest, ::testing::Combine(::testing::Bool(), ::testing::Range(static_cast(DBTestBase::kDefault), static_cast(DBTestBase::kEnd)))); TEST_P(PerConfigMergeOperatorPinningTest, Randomized) { if (ShouldSkipOptions(option_config_, kSkipMergePut)) { return; } Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateMaxOperator(); BlockBasedTableOptions table_options; table_options.no_block_cache = disable_block_cache_; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); Random rnd(301); std::map true_data; const int kTotalMerges = 5000; // Every key gets ~10 operands const int kKeyRange = kTotalMerges / 10; const int kOperandSize = 20; const int kNumPutBefore = kKeyRange / 10; // 10% value const int kNumPutAfter = kKeyRange / 10; // 10% overwrite const int kNumDelete = kKeyRange / 10; // 10% delete // kNumPutBefore keys will have base values for (int i = 0; i < kNumPutBefore; i++) { std::string key = Key(rnd.Next() % kKeyRange); std::string value = RandomString(&rnd, kOperandSize); ASSERT_OK(db_->Put(WriteOptions(), key, value)); true_data[key] = value; } // Do kTotalMerges merges for (int i = 0; i < kTotalMerges; i++) { std::string key = Key(rnd.Next() % kKeyRange); std::string value = RandomString(&rnd, kOperandSize); ASSERT_OK(db_->Merge(WriteOptions(), key, value)); if (true_data[key] < value) { true_data[key] = value; } } // Overwrite random kNumPutAfter keys for (int i = 0; i < kNumPutAfter; i++) { std::string key = Key(rnd.Next() % kKeyRange); std::string value = RandomString(&rnd, kOperandSize); ASSERT_OK(db_->Put(WriteOptions(), key, value)); true_data[key] = value; } // Delete random kNumDelete keys for (int i = 0; i < kNumDelete; i++) { std::string key = Key(rnd.Next() % kKeyRange); ASSERT_OK(db_->Delete(WriteOptions(), key)); true_data.erase(key); } VerifyDBFromMap(true_data); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_options_test.cc000066400000000000000000001001151370372246700173360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "options/options_helper.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/stats_history.h" #include "test_util/sync_point.h" #include "test_util/testutil.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { class DBOptionsTest : public DBTestBase { public: DBOptionsTest() : DBTestBase("/db_options_test") {} #ifndef ROCKSDB_LITE std::unordered_map GetMutableDBOptionsMap( const DBOptions& options) { std::string options_str; ConfigOptions config_options; config_options.delimiter = "; "; GetStringFromDBOptions(config_options, options, &options_str); std::unordered_map options_map; StringToMap(options_str, &options_map); std::unordered_map mutable_map; for (const auto opt : db_options_type_info) { if (opt.second.IsMutable() && opt.second.ShouldSerialize()) { mutable_map[opt.first] = options_map[opt.first]; } } return mutable_map; } std::unordered_map GetMutableCFOptionsMap( const ColumnFamilyOptions& options) { std::string options_str; ConfigOptions config_options; config_options.delimiter = "; "; GetStringFromColumnFamilyOptions(config_options, options, &options_str); std::unordered_map options_map; StringToMap(options_str, &options_map); std::unordered_map mutable_map; for (const auto opt : cf_options_type_info) { if (opt.second.IsMutable() && opt.second.ShouldSerialize()) { mutable_map[opt.first] = options_map[opt.first]; } } return mutable_map; } std::unordered_map GetRandomizedMutableCFOptionsMap( Random* rnd) { Options options = CurrentOptions(); options.env = env_; ImmutableDBOptions db_options(options); test::RandomInitCFOptions(&options, options, rnd); auto sanitized_options = SanitizeOptions(db_options, options); auto opt_map = GetMutableCFOptionsMap(sanitized_options); delete options.compaction_filter; return opt_map; } std::unordered_map GetRandomizedMutableDBOptionsMap( Random* rnd) { DBOptions db_options; test::RandomInitDBOptions(&db_options, rnd); auto sanitized_options = SanitizeOptions(dbname_, db_options); return GetMutableDBOptionsMap(sanitized_options); } #endif // ROCKSDB_LITE }; // RocksDB lite don't support dynamic options. #ifndef ROCKSDB_LITE TEST_F(DBOptionsTest, GetLatestDBOptions) { // GetOptions should be able to get latest option changed by SetOptions. Options options; options.create_if_missing = true; options.env = env_; Random rnd(228); Reopen(options); auto new_options = GetRandomizedMutableDBOptionsMap(&rnd); ASSERT_OK(dbfull()->SetDBOptions(new_options)); ASSERT_EQ(new_options, GetMutableDBOptionsMap(dbfull()->GetDBOptions())); } TEST_F(DBOptionsTest, GetLatestCFOptions) { // GetOptions should be able to get latest option changed by SetOptions. Options options; options.create_if_missing = true; options.env = env_; Random rnd(228); Reopen(options); CreateColumnFamilies({"foo"}, options); ReopenWithColumnFamilies({"default", "foo"}, options); auto options_default = GetRandomizedMutableCFOptionsMap(&rnd); auto options_foo = GetRandomizedMutableCFOptionsMap(&rnd); ASSERT_OK(dbfull()->SetOptions(handles_[0], options_default)); ASSERT_OK(dbfull()->SetOptions(handles_[1], options_foo)); ASSERT_EQ(options_default, GetMutableCFOptionsMap(dbfull()->GetOptions(handles_[0]))); ASSERT_EQ(options_foo, GetMutableCFOptionsMap(dbfull()->GetOptions(handles_[1]))); } TEST_F(DBOptionsTest, SetBytesPerSync) { const size_t kValueSize = 1024 * 1024; // 1MB Options options; options.create_if_missing = true; options.bytes_per_sync = 1024 * 1024; options.use_direct_reads = false; options.write_buffer_size = 400 * kValueSize; options.disable_auto_compactions = true; options.compression = kNoCompression; options.env = env_; Reopen(options); int counter = 0; int low_bytes_per_sync = 0; int i = 0; const std::string kValue(kValueSize, 'v'); ASSERT_EQ(options.bytes_per_sync, dbfull()->GetDBOptions().bytes_per_sync); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::RangeSync:0", [&](void* /*arg*/) { counter++; }); WriteOptions write_opts; // should sync approximately 40MB/1MB ~= 40 times. for (i = 0; i < 40; i++) { Put(Key(i), kValue, write_opts); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); low_bytes_per_sync = counter; ASSERT_GT(low_bytes_per_sync, 35); ASSERT_LT(low_bytes_per_sync, 45); counter = 0; // 8388608 = 8 * 1024 * 1024 ASSERT_OK(dbfull()->SetDBOptions({{"bytes_per_sync", "8388608"}})); ASSERT_EQ(8388608, dbfull()->GetDBOptions().bytes_per_sync); // should sync approximately 40MB*2/8MB ~= 10 times. // data will be 40*2MB because of previous Puts too. for (i = 0; i < 40; i++) { Put(Key(i), kValue, write_opts); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_GT(counter, 5); ASSERT_LT(counter, 15); // Redundant assert. But leaving it here just to get the point across that // low_bytes_per_sync > counter. ASSERT_GT(low_bytes_per_sync, counter); } TEST_F(DBOptionsTest, SetWalBytesPerSync) { const size_t kValueSize = 1024 * 1024 * 3; Options options; options.create_if_missing = true; options.wal_bytes_per_sync = 512; options.write_buffer_size = 100 * kValueSize; options.disable_auto_compactions = true; options.compression = kNoCompression; options.env = env_; Reopen(options); ASSERT_EQ(512, dbfull()->GetDBOptions().wal_bytes_per_sync); int counter = 0; int low_bytes_per_sync = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::RangeSync:0", [&](void* /*arg*/) { counter++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); const std::string kValue(kValueSize, 'v'); int i = 0; for (; i < 10; i++) { Put(Key(i), kValue); } // Do not flush. If we flush here, SwitchWAL will reuse old WAL file since its // empty and will not get the new wal_bytes_per_sync value. low_bytes_per_sync = counter; //5242880 = 1024 * 1024 * 5 ASSERT_OK(dbfull()->SetDBOptions({{"wal_bytes_per_sync", "5242880"}})); ASSERT_EQ(5242880, dbfull()->GetDBOptions().wal_bytes_per_sync); counter = 0; i = 0; for (; i < 10; i++) { Put(Key(i), kValue); } ASSERT_GT(counter, 0); ASSERT_GT(low_bytes_per_sync, 0); ASSERT_GT(low_bytes_per_sync, counter); } TEST_F(DBOptionsTest, WritableFileMaxBufferSize) { Options options; options.create_if_missing = true; options.writable_file_max_buffer_size = 1024 * 1024; options.level0_file_num_compaction_trigger = 3; options.max_manifest_file_size = 1; options.env = env_; int buffer_size = 1024 * 1024; Reopen(options); ASSERT_EQ(buffer_size, dbfull()->GetDBOptions().writable_file_max_buffer_size); std::atomic match_cnt(0); std::atomic unmatch_cnt(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::WritableFileWriter:0", [&](void* arg) { int value = static_cast(reinterpret_cast(arg)); if (value == buffer_size) { match_cnt++; } else { unmatch_cnt++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); int i = 0; for (; i < 3; i++) { ASSERT_OK(Put("foo", ToString(i))); ASSERT_OK(Put("bar", ToString(i))); Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(unmatch_cnt, 0); ASSERT_GE(match_cnt, 11); ASSERT_OK( dbfull()->SetDBOptions({{"writable_file_max_buffer_size", "524288"}})); buffer_size = 512 * 1024; match_cnt = 0; unmatch_cnt = 0; // SetDBOptions() will create a WriteableFileWriter ASSERT_EQ(buffer_size, dbfull()->GetDBOptions().writable_file_max_buffer_size); i = 0; for (; i < 3; i++) { ASSERT_OK(Put("foo", ToString(i))); ASSERT_OK(Put("bar", ToString(i))); Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(unmatch_cnt, 0); ASSERT_GE(match_cnt, 11); } TEST_F(DBOptionsTest, SetOptionsAndReopen) { Random rnd(1044); auto rand_opts = GetRandomizedMutableCFOptionsMap(&rnd); ASSERT_OK(dbfull()->SetOptions(rand_opts)); // Verify if DB can be reopen after setting options. Options options; options.env = env_; ASSERT_OK(TryReopen(options)); } TEST_F(DBOptionsTest, EnableAutoCompactionAndTriggerStall) { const std::string kValue(1024, 'v'); for (int method_type = 0; method_type < 2; method_type++) { for (int option_type = 0; option_type < 4; option_type++) { Options options; options.create_if_missing = true; options.disable_auto_compactions = true; options.write_buffer_size = 1024 * 1024 * 10; options.compression = CompressionType::kNoCompression; options.level0_file_num_compaction_trigger = 1; options.level0_stop_writes_trigger = std::numeric_limits::max(); options.level0_slowdown_writes_trigger = std::numeric_limits::max(); options.hard_pending_compaction_bytes_limit = std::numeric_limits::max(); options.soft_pending_compaction_bytes_limit = std::numeric_limits::max(); options.env = env_; DestroyAndReopen(options); int i = 0; for (; i < 1024; i++) { Put(Key(i), kValue); } Flush(); for (; i < 1024 * 2; i++) { Put(Key(i), kValue); } Flush(); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(2, NumTableFilesAtLevel(0)); uint64_t l0_size = SizeAtLevel(0); switch (option_type) { case 0: // test with level0_stop_writes_trigger options.level0_stop_writes_trigger = 2; options.level0_slowdown_writes_trigger = 2; break; case 1: options.level0_slowdown_writes_trigger = 2; break; case 2: options.hard_pending_compaction_bytes_limit = l0_size; options.soft_pending_compaction_bytes_limit = l0_size; break; case 3: options.soft_pending_compaction_bytes_limit = l0_size; break; } Reopen(options); dbfull()->TEST_WaitForCompact(); ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); ASSERT_FALSE(dbfull()->TEST_write_controler().NeedsDelay()); SyncPoint::GetInstance()->LoadDependency( {{"DBOptionsTest::EnableAutoCompactionAndTriggerStall:1", "BackgroundCallCompaction:0"}, {"DBImpl::BackgroundCompaction():BeforePickCompaction", "DBOptionsTest::EnableAutoCompactionAndTriggerStall:2"}, {"DBOptionsTest::EnableAutoCompactionAndTriggerStall:3", "DBImpl::BackgroundCompaction():AfterPickCompaction"}}); // Block background compaction. SyncPoint::GetInstance()->EnableProcessing(); switch (method_type) { case 0: ASSERT_OK( dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); break; case 1: ASSERT_OK(dbfull()->EnableAutoCompaction( {dbfull()->DefaultColumnFamily()})); break; } TEST_SYNC_POINT("DBOptionsTest::EnableAutoCompactionAndTriggerStall:1"); // Wait for stall condition recalculate. TEST_SYNC_POINT("DBOptionsTest::EnableAutoCompactionAndTriggerStall:2"); switch (option_type) { case 0: ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped()); break; case 1: ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); break; case 2: ASSERT_TRUE(dbfull()->TEST_write_controler().IsStopped()); break; case 3: ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); break; } TEST_SYNC_POINT("DBOptionsTest::EnableAutoCompactionAndTriggerStall:3"); // Background compaction executed. dbfull()->TEST_WaitForCompact(); ASSERT_FALSE(dbfull()->TEST_write_controler().IsStopped()); ASSERT_FALSE(dbfull()->TEST_write_controler().NeedsDelay()); } } } TEST_F(DBOptionsTest, SetOptionsMayTriggerCompaction) { Options options; options.create_if_missing = true; options.level0_file_num_compaction_trigger = 1000; options.env = env_; Reopen(options); for (int i = 0; i < 3; i++) { // Need to insert two keys to avoid trivial move. ASSERT_OK(Put("foo", ToString(i))); ASSERT_OK(Put("bar", ToString(i))); Flush(); } ASSERT_EQ("3", FilesPerLevel()); ASSERT_OK( dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "3"}})); dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,1", FilesPerLevel()); } TEST_F(DBOptionsTest, SetBackgroundCompactionThreads) { Options options; options.create_if_missing = true; options.max_background_compactions = 1; // default value options.env = env_; Reopen(options); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); ASSERT_OK(dbfull()->SetDBOptions({{"max_background_compactions", "3"}})); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); auto stop_token = dbfull()->TEST_write_controler().GetStopToken(); ASSERT_EQ(3, dbfull()->TEST_BGCompactionsAllowed()); } TEST_F(DBOptionsTest, SetBackgroundFlushThreads) { Options options; options.create_if_missing = true; options.max_background_flushes = 1; options.env = env_; Reopen(options); ASSERT_EQ(1, dbfull()->TEST_BGFlushesAllowed()); ASSERT_EQ(1, env_->GetBackgroundThreads(Env::Priority::HIGH)); ASSERT_OK(dbfull()->SetDBOptions({{"max_background_flushes", "3"}})); ASSERT_EQ(3, env_->GetBackgroundThreads(Env::Priority::HIGH)); ASSERT_EQ(3, dbfull()->TEST_BGFlushesAllowed()); } TEST_F(DBOptionsTest, SetBackgroundJobs) { Options options; options.create_if_missing = true; options.max_background_jobs = 8; options.env = env_; Reopen(options); for (int i = 0; i < 2; ++i) { if (i > 0) { options.max_background_jobs = 12; ASSERT_OK(dbfull()->SetDBOptions( {{"max_background_jobs", std::to_string(options.max_background_jobs)}})); } const int expected_max_flushes = options.max_background_jobs / 4; ASSERT_EQ(expected_max_flushes, dbfull()->TEST_BGFlushesAllowed()); ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); auto stop_token = dbfull()->TEST_write_controler().GetStopToken(); const int expected_max_compactions = 3 * expected_max_flushes; ASSERT_EQ(expected_max_flushes, dbfull()->TEST_BGFlushesAllowed()); ASSERT_EQ(expected_max_compactions, dbfull()->TEST_BGCompactionsAllowed()); ASSERT_EQ(expected_max_flushes, env_->GetBackgroundThreads(Env::Priority::HIGH)); ASSERT_EQ(expected_max_compactions, env_->GetBackgroundThreads(Env::Priority::LOW)); } } TEST_F(DBOptionsTest, AvoidFlushDuringShutdown) { Options options; options.create_if_missing = true; options.disable_auto_compactions = true; options.env = env_; WriteOptions write_without_wal; write_without_wal.disableWAL = true; ASSERT_FALSE(options.avoid_flush_during_shutdown); DestroyAndReopen(options); ASSERT_OK(Put("foo", "v1", write_without_wal)); Reopen(options); ASSERT_EQ("v1", Get("foo")); ASSERT_EQ("1", FilesPerLevel()); DestroyAndReopen(options); ASSERT_OK(Put("foo", "v2", write_without_wal)); ASSERT_OK(dbfull()->SetDBOptions({{"avoid_flush_during_shutdown", "true"}})); Reopen(options); ASSERT_EQ("NOT_FOUND", Get("foo")); ASSERT_EQ("", FilesPerLevel()); } TEST_F(DBOptionsTest, SetDelayedWriteRateOption) { Options options; options.create_if_missing = true; options.delayed_write_rate = 2 * 1024U * 1024U; options.env = env_; Reopen(options); ASSERT_EQ(2 * 1024U * 1024U, dbfull()->TEST_write_controler().max_delayed_write_rate()); ASSERT_OK(dbfull()->SetDBOptions({{"delayed_write_rate", "20000"}})); ASSERT_EQ(20000, dbfull()->TEST_write_controler().max_delayed_write_rate()); } TEST_F(DBOptionsTest, MaxTotalWalSizeChange) { Random rnd(1044); const auto value_size = size_t(1024); std::string value; test::RandomString(&rnd, value_size, &value); Options options; options.create_if_missing = true; options.env = env_; CreateColumnFamilies({"1", "2", "3"}, options); ReopenWithColumnFamilies({"default", "1", "2", "3"}, options); WriteOptions write_options; const int key_count = 100; for (int i = 0; i < key_count; ++i) { for (size_t cf = 0; cf < handles_.size(); ++cf) { ASSERT_OK(Put(static_cast(cf), Key(i), value)); } } ASSERT_OK(dbfull()->SetDBOptions({{"max_total_wal_size", "10"}})); for (size_t cf = 0; cf < handles_.size(); ++cf) { dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); ASSERT_EQ("1", FilesPerLevel(static_cast(cf))); } } TEST_F(DBOptionsTest, SetStatsDumpPeriodSec) { Options options; options.create_if_missing = true; options.stats_dump_period_sec = 5; options.env = env_; Reopen(options); ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_dump_period_sec); for (int i = 0; i < 20; i++) { unsigned int num = rand() % 5000 + 1; ASSERT_OK( dbfull()->SetDBOptions({{"stats_dump_period_sec", ToString(num)}})); ASSERT_EQ(num, dbfull()->GetDBOptions().stats_dump_period_sec); } Close(); } TEST_F(DBOptionsTest, SetOptionsStatsPersistPeriodSec) { Options options; options.create_if_missing = true; options.stats_persist_period_sec = 5; options.env = env_; Reopen(options); ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_persist_period_sec); ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "12345"}})); ASSERT_EQ(12345u, dbfull()->GetDBOptions().stats_persist_period_sec); ASSERT_NOK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "abcde"}})); ASSERT_EQ(12345u, dbfull()->GetDBOptions().stats_persist_period_sec); } static void assert_candidate_files_empty(DBImpl* dbfull, const bool empty) { dbfull->TEST_LockMutex(); JobContext job_context(0); dbfull->FindObsoleteFiles(&job_context, false); ASSERT_EQ(empty, job_context.full_scan_candidate_files.empty()); dbfull->TEST_UnlockMutex(); if (job_context.HaveSomethingToDelete()) { // fulfill the contract of FindObsoleteFiles by calling PurgeObsoleteFiles // afterwards; otherwise the test may hang on shutdown dbfull->PurgeObsoleteFiles(job_context); } job_context.Clean(); } TEST_F(DBOptionsTest, DeleteObsoleteFilesPeriodChange) { SpecialEnv env(env_); env.time_elapse_only_sleep_ = true; Options options; options.env = &env; options.create_if_missing = true; ASSERT_OK(TryReopen(options)); // Verify that candidate files set is empty when no full scan requested. assert_candidate_files_empty(dbfull(), true); ASSERT_OK( dbfull()->SetDBOptions({{"delete_obsolete_files_period_micros", "0"}})); // After delete_obsolete_files_period_micros updated to 0, the next call // to FindObsoleteFiles should make a full scan assert_candidate_files_empty(dbfull(), false); ASSERT_OK( dbfull()->SetDBOptions({{"delete_obsolete_files_period_micros", "20"}})); assert_candidate_files_empty(dbfull(), true); env.addon_time_.store(20); assert_candidate_files_empty(dbfull(), true); env.addon_time_.store(21); assert_candidate_files_empty(dbfull(), false); Close(); } TEST_F(DBOptionsTest, MaxOpenFilesChange) { SpecialEnv env(env_); Options options; options.env = CurrentOptions().env; options.max_open_files = -1; Reopen(options); Cache* tc = dbfull()->TEST_table_cache(); ASSERT_EQ(-1, dbfull()->GetDBOptions().max_open_files); ASSERT_LT(2000, tc->GetCapacity()); ASSERT_OK(dbfull()->SetDBOptions({{"max_open_files", "1024"}})); ASSERT_EQ(1024, dbfull()->GetDBOptions().max_open_files); // examine the table cache (actual size should be 1014) ASSERT_GT(1500, tc->GetCapacity()); Close(); } TEST_F(DBOptionsTest, SanitizeDelayedWriteRate) { Options options; options.delayed_write_rate = 0; Reopen(options); ASSERT_EQ(16 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate); options.rate_limiter.reset(NewGenericRateLimiter(31 * 1024 * 1024)); Reopen(options); ASSERT_EQ(31 * 1024 * 1024, dbfull()->GetDBOptions().delayed_write_rate); } TEST_F(DBOptionsTest, SanitizeUniversalTTLCompaction) { Options options; options.compaction_style = kCompactionStyleUniversal; options.ttl = 0; options.periodic_compaction_seconds = 0; Reopen(options); ASSERT_EQ(0, dbfull()->GetOptions().ttl); ASSERT_EQ(0, dbfull()->GetOptions().periodic_compaction_seconds); options.ttl = 0; options.periodic_compaction_seconds = 100; Reopen(options); ASSERT_EQ(0, dbfull()->GetOptions().ttl); ASSERT_EQ(100, dbfull()->GetOptions().periodic_compaction_seconds); options.ttl = 100; options.periodic_compaction_seconds = 0; Reopen(options); ASSERT_EQ(100, dbfull()->GetOptions().ttl); ASSERT_EQ(100, dbfull()->GetOptions().periodic_compaction_seconds); options.ttl = 100; options.periodic_compaction_seconds = 500; Reopen(options); ASSERT_EQ(100, dbfull()->GetOptions().ttl); ASSERT_EQ(100, dbfull()->GetOptions().periodic_compaction_seconds); } TEST_F(DBOptionsTest, SanitizeTtlDefault) { Options options; Reopen(options); ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().ttl); options.compaction_style = kCompactionStyleLevel; options.ttl = 0; Reopen(options); ASSERT_EQ(0, dbfull()->GetOptions().ttl); options.ttl = 100; Reopen(options); ASSERT_EQ(100, dbfull()->GetOptions().ttl); } TEST_F(DBOptionsTest, SanitizeFIFOPeriodicCompaction) { Options options; options.compaction_style = kCompactionStyleFIFO; options.ttl = 0; Reopen(options); ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().ttl); options.ttl = 100; Reopen(options); ASSERT_EQ(100, dbfull()->GetOptions().ttl); options.ttl = 100 * 24 * 60 * 60; Reopen(options); ASSERT_EQ(100 * 24 * 60 * 60, dbfull()->GetOptions().ttl); options.ttl = 200; options.periodic_compaction_seconds = 300; Reopen(options); ASSERT_EQ(200, dbfull()->GetOptions().ttl); options.ttl = 500; options.periodic_compaction_seconds = 300; Reopen(options); ASSERT_EQ(300, dbfull()->GetOptions().ttl); } TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { Options options; options.compaction_style = kCompactionStyleFIFO; options.write_buffer_size = 10 << 10; // 10KB options.arena_block_size = 4096; options.compression = kNoCompression; options.create_if_missing = true; options.compaction_options_fifo.allow_compaction = false; env_->time_elapse_only_sleep_ = false; options.env = env_; // Test dynamically changing ttl. env_->addon_time_.store(0); options.ttl = 1 * 60 * 60; // 1 hour ASSERT_OK(TryReopen(options)); Random rnd(301); for (int i = 0; i < 10; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // Add 61 seconds to the time. env_->addon_time_.fetch_add(61); // No files should be compacted as ttl is set to 1 hour. ASSERT_EQ(dbfull()->GetOptions().ttl, 3600); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // Set ttl to 1 minute. So all files should get deleted. ASSERT_OK(dbfull()->SetOptions({{"ttl", "60"}})); ASSERT_EQ(dbfull()->GetOptions().ttl, 60); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 0); // Test dynamically changing compaction_options_fifo.max_table_files_size env_->addon_time_.store(0); options.compaction_options_fifo.max_table_files_size = 500 << 10; // 00KB options.ttl = 0; DestroyAndReopen(options); for (int i = 0; i < 10; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // No files should be compacted as max_table_files_size is set to 500 KB. ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 500 << 10); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // Set max_table_files_size to 12 KB. So only 1 file should remain now. ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{max_table_files_size=12288;}"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 12 << 10); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 1); // Test dynamically changing compaction_options_fifo.allow_compaction options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB options.ttl = 0; options.compaction_options_fifo.allow_compaction = false; options.level0_file_num_compaction_trigger = 6; DestroyAndReopen(options); for (int i = 0; i < 10; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // No files should be compacted as max_table_files_size is set to 500 KB and // allow_compaction is false ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, false); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // Set allow_compaction to true. So number of files should be between 1 and 5. ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{allow_compaction=true;}"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, true); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_GE(NumTableFilesAtLevel(0), 1); ASSERT_LE(NumTableFilesAtLevel(0), 5); } TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { SpecialEnv env(env_); Options options; options.env = &env; options.compaction_readahead_size = 0; options.new_table_reader_for_compaction_inputs = true; options.level0_file_num_compaction_trigger = 2; const std::string kValue(1024, 'v'); Reopen(options); ASSERT_EQ(0, dbfull()->GetDBOptions().compaction_readahead_size); ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); for (int i = 0; i < 1024; i++) { Put(Key(i), kValue); } Flush(); for (int i = 0; i < 1024 * 2; i++) { Put(Key(i), kValue); } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(256, env_->compaction_readahead_size_); Close(); } TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { Options options; options.compaction_style = kCompactionStyleFIFO; options.write_buffer_size = 10 << 10; // 10KB options.create_if_missing = true; ASSERT_OK(TryReopen(options)); Random rnd(301); for (int i = 0; i < 10; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // In release 6.0, ttl was promoted from a secondary level option under // compaction_options_fifo to a top level option under ColumnFamilyOptions. // We still need to handle old SetOptions calls but should ignore // ttl under compaction_options_fifo. ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{allow_compaction=true;max_table_files_size=1024;ttl=731;}"}, {"ttl", "60"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, true); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 1024); ASSERT_EQ(dbfull()->GetOptions().ttl, 60); // Put ttl as the first option inside compaction_options_fifo. That works as // it doesn't overwrite any other option. ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{ttl=985;allow_compaction=true;max_table_files_size=1024;}"}, {"ttl", "191"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, true); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 1024); ASSERT_EQ(dbfull()->GetOptions().ttl, 191); } TEST_F(DBOptionsTest, ChangeCompression) { if (!Snappy_Supported() || !LZ4_Supported()) { return; } Options options; options.write_buffer_size = 10 << 10; // 10KB options.level0_file_num_compaction_trigger = 2; options.create_if_missing = true; options.compression = CompressionType::kLZ4Compression; options.bottommost_compression = CompressionType::kNoCompression; options.bottommost_compression_opts.level = 2; options.bottommost_compression_opts.parallel_threads = 1; ASSERT_OK(TryReopen(options)); CompressionType compression_used = CompressionType::kLZ4Compression; CompressionOptions compression_opt_used; bool compacted = false; SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* c = reinterpret_cast(arg); compression_used = c->output_compression(); compression_opt_used = c->output_compression_opts(); compacted = true; }); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put("foo", "foofoofoo")); ASSERT_OK(Put("bar", "foofoofoo")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo", "foofoofoo")); ASSERT_OK(Put("bar", "foofoofoo")); ASSERT_OK(Flush()); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(compacted); ASSERT_EQ(CompressionType::kNoCompression, compression_used); ASSERT_EQ(options.compression_opts.level, compression_opt_used.level); ASSERT_EQ(options.compression_opts.parallel_threads, compression_opt_used.parallel_threads); compression_used = CompressionType::kLZ4Compression; compacted = false; ASSERT_OK(dbfull()->SetOptions( {{"bottommost_compression", "kSnappyCompression"}, {"bottommost_compression_opts", "0:6:0:0:4:true"}})); ASSERT_OK(Put("foo", "foofoofoo")); ASSERT_OK(Put("bar", "foofoofoo")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo", "foofoofoo")); ASSERT_OK(Put("bar", "foofoofoo")); ASSERT_OK(Flush()); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(compacted); ASSERT_EQ(CompressionType::kSnappyCompression, compression_used); ASSERT_EQ(6, compression_opt_used.level); // Right now parallel_level is not yet allowed to be changed. SyncPoint::GetInstance()->DisableProcessing(); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_properties_test.cc000066400000000000000000001765601370372246700200600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/listener.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" #include "rocksdb/perf_level.h" #include "rocksdb/table.h" #include "util/random.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class DBPropertiesTest : public DBTestBase { public: DBPropertiesTest() : DBTestBase("/db_properties_test") {} }; #ifndef ROCKSDB_LITE TEST_F(DBPropertiesTest, Empty) { do { Options options; options.env = env_; options.write_buffer_size = 100000; // Small write buffer options.allow_concurrent_memtable_write = false; options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); std::string num; ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ("0", num); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ("1", num); // Block sync calls env_->delay_sstable_sync_.store(true, std::memory_order_release); Put(1, "k1", std::string(100000, 'x')); // Fill memtable ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ("2", num); Put(1, "k2", std::string(100000, 'y')); // Trigger compaction ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ("1", num); ASSERT_EQ("v1", Get(1, "foo")); // Release sync calls env_->delay_sstable_sync_.store(false, std::memory_order_release); ASSERT_OK(db_->DisableFileDeletions()); ASSERT_TRUE( dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("0", num); ASSERT_OK(db_->DisableFileDeletions()); ASSERT_TRUE( dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("0", num); ASSERT_OK(db_->DisableFileDeletions()); ASSERT_TRUE( dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("0", num); ASSERT_OK(db_->EnableFileDeletions(false)); ASSERT_TRUE( dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("0", num); ASSERT_OK(db_->EnableFileDeletions()); ASSERT_TRUE( dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("1", num); } while (ChangeOptions()); } TEST_F(DBPropertiesTest, CurrentVersionNumber) { uint64_t v1, v2, v3; ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v1)); Put("12345678", ""); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v2)); Flush(); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.current-super-version-number", &v3)); ASSERT_EQ(v1, v2); ASSERT_GT(v3, v2); } TEST_F(DBPropertiesTest, GetAggregatedIntPropertyTest) { const int kKeySize = 100; const int kValueSize = 500; const int kKeyNum = 100; Options options; options.env = env_; options.create_if_missing = true; options.write_buffer_size = (kKeySize + kValueSize) * kKeyNum / 10; // Make them never flush options.min_write_buffer_number_to_merge = 1000; options.max_write_buffer_number = 1000; options = CurrentOptions(options); CreateAndReopenWithCF({"one", "two", "three", "four"}, options); Random rnd(301); for (auto* handle : handles_) { for (int i = 0; i < kKeyNum; ++i) { db_->Put(WriteOptions(), handle, RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } } uint64_t manual_sum = 0; uint64_t api_sum = 0; uint64_t value = 0; for (auto* handle : handles_) { ASSERT_TRUE( db_->GetIntProperty(handle, DB::Properties::kSizeAllMemTables, &value)); manual_sum += value; } ASSERT_TRUE(db_->GetAggregatedIntProperty(DB::Properties::kSizeAllMemTables, &api_sum)); ASSERT_GT(manual_sum, 0); ASSERT_EQ(manual_sum, api_sum); ASSERT_FALSE(db_->GetAggregatedIntProperty(DB::Properties::kDBStats, &value)); uint64_t before_flush_trm; uint64_t after_flush_trm; for (auto* handle : handles_) { ASSERT_TRUE(db_->GetAggregatedIntProperty( DB::Properties::kEstimateTableReadersMem, &before_flush_trm)); // Issue flush and expect larger memory usage of table readers. db_->Flush(FlushOptions(), handle); ASSERT_TRUE(db_->GetAggregatedIntProperty( DB::Properties::kEstimateTableReadersMem, &after_flush_trm)); ASSERT_GT(after_flush_trm, before_flush_trm); } } namespace { void ResetTableProperties(TableProperties* tp) { tp->data_size = 0; tp->index_size = 0; tp->filter_size = 0; tp->raw_key_size = 0; tp->raw_value_size = 0; tp->num_data_blocks = 0; tp->num_entries = 0; tp->num_deletions = 0; tp->num_merge_operands = 0; tp->num_range_deletions = 0; } void ParseTablePropertiesString(std::string tp_string, TableProperties* tp) { double dummy_double; std::replace(tp_string.begin(), tp_string.end(), ';', ' '); std::replace(tp_string.begin(), tp_string.end(), '=', ' '); ResetTableProperties(tp); sscanf(tp_string.c_str(), "# data blocks %" SCNu64 " # entries %" SCNu64 " # deletions %" SCNu64 " # merge operands %" SCNu64 " # range deletions %" SCNu64 " raw key size %" SCNu64 " raw average key size %lf " " raw value size %" SCNu64 " raw average value size %lf " " data block size %" SCNu64 " index block size (user-key? %" SCNu64 ", delta-value? %" SCNu64 ") %" SCNu64 " filter block size %" SCNu64, &tp->num_data_blocks, &tp->num_entries, &tp->num_deletions, &tp->num_merge_operands, &tp->num_range_deletions, &tp->raw_key_size, &dummy_double, &tp->raw_value_size, &dummy_double, &tp->data_size, &tp->index_key_is_user_key, &tp->index_value_is_delta_encoded, &tp->index_size, &tp->filter_size); } void VerifySimilar(uint64_t a, uint64_t b, double bias) { ASSERT_EQ(a == 0U, b == 0U); if (a == 0) { return; } double dbl_a = static_cast(a); double dbl_b = static_cast(b); if (dbl_a > dbl_b) { ASSERT_LT(static_cast(dbl_a - dbl_b) / (dbl_a + dbl_b), bias); } else { ASSERT_LT(static_cast(dbl_b - dbl_a) / (dbl_a + dbl_b), bias); } } void VerifyTableProperties( const TableProperties& base_tp, const TableProperties& new_tp, double filter_size_bias = CACHE_LINE_SIZE >= 256 ? 0.15 : 0.1, double index_size_bias = 0.1, double data_size_bias = 0.1, double num_data_blocks_bias = 0.05) { VerifySimilar(base_tp.data_size, new_tp.data_size, data_size_bias); VerifySimilar(base_tp.index_size, new_tp.index_size, index_size_bias); VerifySimilar(base_tp.filter_size, new_tp.filter_size, filter_size_bias); VerifySimilar(base_tp.num_data_blocks, new_tp.num_data_blocks, num_data_blocks_bias); ASSERT_EQ(base_tp.raw_key_size, new_tp.raw_key_size); ASSERT_EQ(base_tp.raw_value_size, new_tp.raw_value_size); ASSERT_EQ(base_tp.num_entries, new_tp.num_entries); ASSERT_EQ(base_tp.num_deletions, new_tp.num_deletions); ASSERT_EQ(base_tp.num_range_deletions, new_tp.num_range_deletions); // Merge operands may become Puts, so we only have an upper bound the exact // number of merge operands. ASSERT_GE(base_tp.num_merge_operands, new_tp.num_merge_operands); } void GetExpectedTableProperties( TableProperties* expected_tp, const int kKeySize, const int kValueSize, const int kPutsPerTable, const int kDeletionsPerTable, const int kMergeOperandsPerTable, const int kRangeDeletionsPerTable, const int kTableCount, const int kBloomBitsPerKey, const size_t kBlockSize, const bool index_key_is_user_key, const bool value_delta_encoding) { const int kKeysPerTable = kPutsPerTable + kDeletionsPerTable + kMergeOperandsPerTable; const int kPutCount = kTableCount * kPutsPerTable; const int kDeletionCount = kTableCount * kDeletionsPerTable; const int kMergeCount = kTableCount * kMergeOperandsPerTable; const int kRangeDeletionCount = kTableCount * kRangeDeletionsPerTable; const int kKeyCount = kPutCount + kDeletionCount + kMergeCount + kRangeDeletionCount; const int kAvgSuccessorSize = kKeySize / 5; const int kEncodingSavePerKey = kKeySize / 4; expected_tp->raw_key_size = kKeyCount * (kKeySize + 8); expected_tp->raw_value_size = (kPutCount + kMergeCount + kRangeDeletionCount) * kValueSize; expected_tp->num_entries = kKeyCount; expected_tp->num_deletions = kDeletionCount + kRangeDeletionCount; expected_tp->num_merge_operands = kMergeCount; expected_tp->num_range_deletions = kRangeDeletionCount; expected_tp->num_data_blocks = kTableCount * (kKeysPerTable * (kKeySize - kEncodingSavePerKey + kValueSize)) / kBlockSize; expected_tp->data_size = kTableCount * (kKeysPerTable * (kKeySize + 8 + kValueSize)); expected_tp->index_size = expected_tp->num_data_blocks * (kAvgSuccessorSize + (index_key_is_user_key ? 0 : 8) - // discount 1 byte as value size is not encoded in value delta encoding (value_delta_encoding ? 1 : 0)); expected_tp->filter_size = kTableCount * ((kKeysPerTable * kBloomBitsPerKey + 7) / 8 + /*average-ish overhead*/ CACHE_LINE_SIZE / 2); } } // anonymous namespace TEST_F(DBPropertiesTest, ValidatePropertyInfo) { for (const auto& ppt_name_and_info : InternalStats::ppt_name_to_info) { // If C++ gets a std::string_literal, this would be better to check at // compile-time using static_assert. ASSERT_TRUE(ppt_name_and_info.first.empty() || !isdigit(ppt_name_and_info.first.back())); int count = 0; count += (ppt_name_and_info.second.handle_string == nullptr) ? 0 : 1; count += (ppt_name_and_info.second.handle_int == nullptr) ? 0 : 1; count += (ppt_name_and_info.second.handle_string_dbimpl == nullptr) ? 0 : 1; ASSERT_TRUE(count == 1); } } TEST_F(DBPropertiesTest, ValidateSampleNumber) { // When "max_open_files" is -1, we read all the files for // "rocksdb.estimate-num-keys" computation, which is the ground truth. // Otherwise, we sample 20 newest files to make an estimation. // Formula: lastest_20_files_active_key_ratio * total_files Options options = CurrentOptions(); options.disable_auto_compactions = true; options.level0_stop_writes_trigger = 1000; DestroyAndReopen(options); int key = 0; for (int files = 20; files >= 10; files -= 10) { for (int i = 0; i < files; i++) { int rows = files / 10; for (int j = 0; j < rows; j++) { db_->Put(WriteOptions(), std::to_string(++key), "foo"); } db_->Flush(FlushOptions()); } } std::string num; Reopen(options); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); ASSERT_EQ("45", num); options.max_open_files = -1; Reopen(options); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); ASSERT_EQ("50", num); } TEST_F(DBPropertiesTest, AggregatedTableProperties) { for (int kTableCount = 40; kTableCount <= 100; kTableCount += 30) { const int kDeletionsPerTable = 5; const int kMergeOperandsPerTable = 15; const int kRangeDeletionsPerTable = 5; const int kPutsPerTable = 100; const int kKeySize = 80; const int kValueSize = 200; const int kBloomBitsPerKey = 20; Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 8; options.compression = kNoCompression; options.create_if_missing = true; options.preserve_deletes = true; options.merge_operator.reset(new TestPutOperator()); BlockBasedTableOptions table_options; table_options.filter_policy.reset( NewBloomFilterPolicy(kBloomBitsPerKey, false)); table_options.block_size = 1024; options.table_factory.reset(new BlockBasedTableFactory(table_options)); DestroyAndReopen(options); // Hold open a snapshot to prevent range tombstones from being compacted // away. ManagedSnapshot snapshot(db_); Random rnd(5632); for (int table = 1; table <= kTableCount; ++table) { for (int i = 0; i < kPutsPerTable; ++i) { db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } for (int i = 0; i < kDeletionsPerTable; i++) { db_->Delete(WriteOptions(), RandomString(&rnd, kKeySize)); } for (int i = 0; i < kMergeOperandsPerTable; i++) { db_->Merge(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } for (int i = 0; i < kRangeDeletionsPerTable; i++) { std::string start = RandomString(&rnd, kKeySize); std::string end = start; end.resize(kValueSize); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), start, end); } db_->Flush(FlushOptions()); } std::string property; db_->GetProperty(DB::Properties::kAggregatedTableProperties, &property); TableProperties output_tp; ParseTablePropertiesString(property, &output_tp); bool index_key_is_user_key = output_tp.index_key_is_user_key > 0; bool value_is_delta_encoded = output_tp.index_value_is_delta_encoded > 0; TableProperties expected_tp; GetExpectedTableProperties( &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, kMergeOperandsPerTable, kRangeDeletionsPerTable, kTableCount, kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, value_is_delta_encoded); VerifyTableProperties(expected_tp, output_tp); } } TEST_F(DBPropertiesTest, ReadLatencyHistogramByLevel) { Options options = CurrentOptions(); options.write_buffer_size = 110 << 10; options.level0_file_num_compaction_trigger = 6; options.num_levels = 4; options.compression = kNoCompression; options.max_bytes_for_level_base = 4500 << 10; options.target_file_size_base = 98 << 10; options.max_write_buffer_number = 2; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.max_open_files = 11; // Make sure no proloading of table readers // RocksDB sanitize max open files to at least 20. Modify it back. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { int* max_open_files = static_cast(arg); *max_open_files = 11; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); BlockBasedTableOptions table_options; table_options.no_block_cache = true; CreateAndReopenWithCF({"pikachu"}, options); int key_index = 0; Random rnd(301); for (int num = 0; num < 8; num++) { Put("foo", "bar"); GenerateNewFile(&rnd, &key_index); dbfull()->TEST_WaitForCompact(); } dbfull()->TEST_WaitForCompact(); std::string prop; ASSERT_TRUE(dbfull()->GetProperty("rocksdb.dbstats", &prop)); // Get() after flushes, See latency histogram tracked. for (int key = 0; key < key_index; key++) { Get(Key(key)); } ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cfstats", &prop)); ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); // Reopen and issue Get(). See thee latency tracked ReopenWithColumnFamilies({"default", "pikachu"}, options); dbfull()->TEST_WaitForCompact(); for (int key = 0; key < key_index; key++) { Get(Key(key)); } // Test for getting immutable_db_options_.statistics ASSERT_TRUE(dbfull()->GetProperty(dbfull()->DefaultColumnFamily(), "rocksdb.options-statistics", &prop)); ASSERT_NE(std::string::npos, prop.find("rocksdb.block.cache.miss")); ASSERT_EQ(std::string::npos, prop.find("rocksdb.db.f.micros")); ASSERT_TRUE(dbfull()->GetProperty(dbfull()->DefaultColumnFamily(), "rocksdb.cf-file-histogram", &prop)); ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); // Reopen and issue iterating. See thee latency tracked ReopenWithColumnFamilies({"default", "pikachu"}, options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cf-file-histogram", &prop)); ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); { std::unique_ptr iter(db_->NewIterator(ReadOptions())); for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { } } ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cf-file-histogram", &prop)); ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); // CF 1 should show no histogram. ASSERT_TRUE( dbfull()->GetProperty(handles_[1], "rocksdb.cf-file-histogram", &prop)); ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); // put something and read it back , CF 1 should show histogram. Put(1, "foo", "bar"); Flush(1); dbfull()->TEST_WaitForCompact(); ASSERT_EQ("bar", Get(1, "foo")); ASSERT_TRUE( dbfull()->GetProperty(handles_[1], "rocksdb.cf-file-histogram", &prop)); ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); // options.max_open_files preloads table readers. options.max_open_files = -1; ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_TRUE(dbfull()->GetProperty(dbfull()->DefaultColumnFamily(), "rocksdb.cf-file-histogram", &prop)); ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); for (int key = 0; key < key_index; key++) { Get(Key(key)); } ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cfstats", &prop)); ASSERT_NE(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_NE(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); // Clear internal stats dbfull()->ResetStats(); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.cfstats", &prop)); ASSERT_EQ(std::string::npos, prop.find("** Level 0 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 1 read latency histogram")); ASSERT_EQ(std::string::npos, prop.find("** Level 2 read latency histogram")); } TEST_F(DBPropertiesTest, AggregatedTablePropertiesAtLevel) { const int kTableCount = 100; const int kDeletionsPerTable = 2; const int kMergeOperandsPerTable = 2; const int kRangeDeletionsPerTable = 2; const int kPutsPerTable = 10; const int kKeySize = 50; const int kValueSize = 400; const int kMaxLevel = 7; const int kBloomBitsPerKey = 20; Random rnd(301); Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 8; options.compression = kNoCompression; options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; options.target_file_size_base = 8192; options.max_bytes_for_level_base = 10000; options.max_bytes_for_level_multiplier = 2; // This ensures there no compaction happening when we call GetProperty(). options.disable_auto_compactions = true; options.preserve_deletes = true; options.merge_operator.reset(new TestPutOperator()); BlockBasedTableOptions table_options; table_options.filter_policy.reset( NewBloomFilterPolicy(kBloomBitsPerKey, false)); table_options.block_size = 1024; options.table_factory.reset(new BlockBasedTableFactory(table_options)); DestroyAndReopen(options); // Hold open a snapshot to prevent range tombstones from being compacted away. ManagedSnapshot snapshot(db_); std::string level_tp_strings[kMaxLevel]; std::string tp_string; TableProperties level_tps[kMaxLevel]; TableProperties tp, sum_tp, expected_tp; for (int table = 1; table <= kTableCount; ++table) { for (int i = 0; i < kPutsPerTable; ++i) { db_->Put(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } for (int i = 0; i < kDeletionsPerTable; i++) { db_->Delete(WriteOptions(), RandomString(&rnd, kKeySize)); } for (int i = 0; i < kMergeOperandsPerTable; i++) { db_->Merge(WriteOptions(), RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } for (int i = 0; i < kRangeDeletionsPerTable; i++) { std::string start = RandomString(&rnd, kKeySize); std::string end = start; end.resize(kValueSize); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), start, end); } db_->Flush(FlushOptions()); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); ResetTableProperties(&sum_tp); for (int level = 0; level < kMaxLevel; ++level) { db_->GetProperty( DB::Properties::kAggregatedTablePropertiesAtLevel + ToString(level), &level_tp_strings[level]); ParseTablePropertiesString(level_tp_strings[level], &level_tps[level]); sum_tp.data_size += level_tps[level].data_size; sum_tp.index_size += level_tps[level].index_size; sum_tp.filter_size += level_tps[level].filter_size; sum_tp.raw_key_size += level_tps[level].raw_key_size; sum_tp.raw_value_size += level_tps[level].raw_value_size; sum_tp.num_data_blocks += level_tps[level].num_data_blocks; sum_tp.num_entries += level_tps[level].num_entries; sum_tp.num_deletions += level_tps[level].num_deletions; sum_tp.num_merge_operands += level_tps[level].num_merge_operands; sum_tp.num_range_deletions += level_tps[level].num_range_deletions; } db_->GetProperty(DB::Properties::kAggregatedTableProperties, &tp_string); ParseTablePropertiesString(tp_string, &tp); bool index_key_is_user_key = tp.index_key_is_user_key > 0; bool value_is_delta_encoded = tp.index_value_is_delta_encoded > 0; ASSERT_EQ(sum_tp.data_size, tp.data_size); ASSERT_EQ(sum_tp.index_size, tp.index_size); ASSERT_EQ(sum_tp.filter_size, tp.filter_size); ASSERT_EQ(sum_tp.raw_key_size, tp.raw_key_size); ASSERT_EQ(sum_tp.raw_value_size, tp.raw_value_size); ASSERT_EQ(sum_tp.num_data_blocks, tp.num_data_blocks); ASSERT_EQ(sum_tp.num_entries, tp.num_entries); ASSERT_EQ(sum_tp.num_deletions, tp.num_deletions); ASSERT_EQ(sum_tp.num_merge_operands, tp.num_merge_operands); ASSERT_EQ(sum_tp.num_range_deletions, tp.num_range_deletions); if (table > 3) { GetExpectedTableProperties( &expected_tp, kKeySize, kValueSize, kPutsPerTable, kDeletionsPerTable, kMergeOperandsPerTable, kRangeDeletionsPerTable, table, kBloomBitsPerKey, table_options.block_size, index_key_is_user_key, value_is_delta_encoded); // Gives larger bias here as index block size, filter block size, // and data block size become much harder to estimate in this test. VerifyTableProperties(expected_tp, tp, 0.5, 0.4, 0.4, 0.25); } } } TEST_F(DBPropertiesTest, NumImmutableMemTable) { do { Options options = CurrentOptions(); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 3; options.write_buffer_size = 1000000; options.max_write_buffer_size_to_maintain = 5 * static_cast(options.write_buffer_size); CreateAndReopenWithCF({"pikachu"}, options); std::string big_value(1000000 * 2, 'x'); std::string num; uint64_t value; SetPerfLevel(kEnableTime); ASSERT_TRUE(GetPerfLevel() == kEnableTime); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k1", big_value)); ASSERT_TRUE(dbfull()->GetProperty(handles_[1], "rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], DB::Properties::kNumImmutableMemTableFlushed, &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ(num, "1"); get_perf_context()->Reset(); Get(1, "k1"); ASSERT_EQ(1, static_cast(get_perf_context()->get_from_memtable_count)); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); ASSERT_TRUE(dbfull()->GetProperty(handles_[1], "rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "1"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ(num, "1"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); ASSERT_EQ(num, "1"); get_perf_context()->Reset(); Get(1, "k1"); ASSERT_EQ(2, static_cast(get_perf_context()->get_from_memtable_count)); get_perf_context()->Reset(); Get(1, "k2"); ASSERT_EQ(1, static_cast(get_perf_context()->get_from_memtable_count)); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", big_value)); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.cur-size-active-mem-table", &num)); ASSERT_TRUE(dbfull()->GetProperty(handles_[1], "rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "2"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &num)); ASSERT_EQ(num, "1"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.num-entries-imm-mem-tables", &num)); ASSERT_EQ(num, "2"); get_perf_context()->Reset(); Get(1, "k2"); ASSERT_EQ(2, static_cast(get_perf_context()->get_from_memtable_count)); get_perf_context()->Reset(); Get(1, "k3"); ASSERT_EQ(1, static_cast(get_perf_context()->get_from_memtable_count)); get_perf_context()->Reset(); Get(1, "k1"); ASSERT_EQ(3, static_cast(get_perf_context()->get_from_memtable_count)); ASSERT_OK(Flush(1)); ASSERT_TRUE(dbfull()->GetProperty(handles_[1], "rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], DB::Properties::kNumImmutableMemTableFlushed, &num)); ASSERT_EQ(num, "3"); ASSERT_TRUE(dbfull()->GetIntProperty( handles_[1], "rocksdb.cur-size-active-mem-table", &value)); // "192" is the size of the metadata of two empty skiplists, this would // break if we change the default skiplist implementation ASSERT_GE(value, 192); uint64_t int_num; uint64_t base_total_size; ASSERT_TRUE(dbfull()->GetIntProperty( handles_[1], "rocksdb.estimate-num-keys", &base_total_size)); ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k2")); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k3", "")); ASSERT_OK(dbfull()->Delete(writeOpt, handles_[1], "k3")); ASSERT_TRUE(dbfull()->GetIntProperty( handles_[1], "rocksdb.num-deletes-active-mem-table", &int_num)); ASSERT_EQ(int_num, 2U); ASSERT_TRUE(dbfull()->GetIntProperty( handles_[1], "rocksdb.num-entries-active-mem-table", &int_num)); ASSERT_EQ(int_num, 3U); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "k2", big_value)); ASSERT_TRUE(dbfull()->GetIntProperty( handles_[1], "rocksdb.num-entries-imm-mem-tables", &int_num)); ASSERT_EQ(int_num, 4U); ASSERT_TRUE(dbfull()->GetIntProperty( handles_[1], "rocksdb.num-deletes-imm-mem-tables", &int_num)); ASSERT_EQ(int_num, 2U); ASSERT_TRUE(dbfull()->GetIntProperty( handles_[1], "rocksdb.estimate-num-keys", &int_num)); ASSERT_EQ(int_num, base_total_size + 1); SetPerfLevel(kDisable); ASSERT_TRUE(GetPerfLevel() == kDisable); } while (ChangeCompactOptions()); } // TODO(techdept) : Disabled flaky test #12863555 TEST_F(DBPropertiesTest, DISABLED_GetProperty) { // Set sizes to both background thread pool to be 1 and block them. env_->SetBackgroundThreads(1, Env::HIGH); env_->SetBackgroundThreads(1, Env::LOW); test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); test::SleepingBackgroundTask sleeping_task_high; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_high, Env::Priority::HIGH); Options options = CurrentOptions(); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; options.compaction_style = kCompactionStyleUniversal; options.level0_file_num_compaction_trigger = 1; options.compaction_options_universal.size_ratio = 50; options.max_background_compactions = 1; options.max_background_flushes = 1; options.max_write_buffer_number = 10; options.min_write_buffer_number_to_merge = 1; options.max_write_buffer_size_to_maintain = 0; options.write_buffer_size = 1000000; Reopen(options); std::string big_value(1000000 * 2, 'x'); std::string num; uint64_t int_num; SetPerfLevel(kEnableTime); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_EQ(int_num, 0U); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-live-data-size", &int_num)); ASSERT_EQ(int_num, 0U); ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); ASSERT_EQ(num, "1"); get_perf_context()->Reset(); ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "1"); ASSERT_OK(dbfull()->Delete(writeOpt, "k-non-existing")); ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); ASSERT_EQ(num, "2"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); ASSERT_EQ(num, "1"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); ASSERT_EQ(num, "2"); // Verify the same set of properties through GetIntProperty ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.num-immutable-mem-table", &int_num)); ASSERT_EQ(int_num, 2U); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.mem-table-flush-pending", &int_num)); ASSERT_EQ(int_num, 1U); ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.compaction-pending", &int_num)); ASSERT_EQ(int_num, 0U); ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); ASSERT_EQ(int_num, 2U); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_EQ(int_num, 0U); sleeping_task_high.WakeUp(); sleeping_task_high.WaitUntilDone(); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_OK(dbfull()->Put(writeOpt, "k4", big_value)); ASSERT_OK(dbfull()->Put(writeOpt, "k5", big_value)); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); ASSERT_EQ(num, "1"); ASSERT_TRUE(dbfull()->GetProperty("rocksdb.estimate-num-keys", &num)); ASSERT_EQ(num, "4"); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_GT(int_num, 0U); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); // Wait for compaction to be done. This is important because otherwise RocksDB // might schedule a compaction when reopening the database, failing assertion // (A) as a result. dbfull()->TEST_WaitForCompact(); options.max_open_files = 10; Reopen(options); // After reopening, no table reader is loaded, so no memory for table readers ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_EQ(int_num, 0U); // (A) ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &int_num)); ASSERT_GT(int_num, 0U); // After reading a key, at least one table reader is loaded. Get("k5"); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_GT(int_num, 0U); // Test rocksdb.num-live-versions { options.level0_file_num_compaction_trigger = 20; Reopen(options); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); ASSERT_EQ(int_num, 1U); // Use an iterator to hold current version std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); ASSERT_OK(dbfull()->Put(writeOpt, "k6", big_value)); Flush(); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); ASSERT_EQ(int_num, 2U); // Use an iterator to hold current version std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); ASSERT_OK(dbfull()->Put(writeOpt, "k7", big_value)); Flush(); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); ASSERT_EQ(int_num, 3U); iter2.reset(); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); ASSERT_EQ(int_num, 2U); iter1.reset(); ASSERT_TRUE( dbfull()->GetIntProperty("rocksdb.num-live-versions", &int_num)); ASSERT_EQ(int_num, 1U); } } TEST_F(DBPropertiesTest, ApproximateMemoryUsage) { const int kNumRounds = 10; // TODO(noetzli) kFlushesPerRound does not really correlate with how many // flushes happen. const int kFlushesPerRound = 10; const int kWritesPerFlush = 10; const int kKeySize = 100; const int kValueSize = 1000; Options options; options.write_buffer_size = 1000; // small write buffer options.min_write_buffer_number_to_merge = 4; options.compression = kNoCompression; options.create_if_missing = true; options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); std::vector iters; uint64_t active_mem; uint64_t unflushed_mem; uint64_t all_mem; uint64_t prev_all_mem; // Phase 0. The verify the initial value of all these properties are the same // as we have no mem-tables. dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); ASSERT_EQ(all_mem, active_mem); ASSERT_EQ(all_mem, unflushed_mem); // Phase 1. Simply issue Put() and expect "cur-size-all-mem-tables" equals to // "size-all-mem-tables" for (int r = 0; r < kNumRounds; ++r) { for (int f = 0; f < kFlushesPerRound; ++f) { for (int w = 0; w < kWritesPerFlush; ++w) { Put(RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } } // Make sure that there is no flush between getting the two properties. dbfull()->TEST_WaitForFlushMemTable(); dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); // in no iterator case, these two number should be the same. ASSERT_EQ(unflushed_mem, all_mem); } prev_all_mem = all_mem; // Phase 2. Keep issuing Put() but also create new iterators. This time we // expect "size-all-mem-tables" > "cur-size-all-mem-tables". for (int r = 0; r < kNumRounds; ++r) { iters.push_back(db_->NewIterator(ReadOptions())); for (int f = 0; f < kFlushesPerRound; ++f) { for (int w = 0; w < kWritesPerFlush; ++w) { Put(RandomString(&rnd, kKeySize), RandomString(&rnd, kValueSize)); } } // Force flush to prevent flush from happening between getting the // properties or after getting the properties and before the new round. Flush(); // In the second round, add iterators. dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); ASSERT_GT(all_mem, active_mem); ASSERT_GT(all_mem, unflushed_mem); ASSERT_GT(all_mem, prev_all_mem); prev_all_mem = all_mem; } // Phase 3. Delete iterators and expect "size-all-mem-tables" shrinks // whenever we release an iterator. for (auto* iter : iters) { delete iter; dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); // Expect the size shrinking ASSERT_LT(all_mem, prev_all_mem); prev_all_mem = all_mem; } // Expect all these three counters to be the same. dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); ASSERT_EQ(active_mem, unflushed_mem); ASSERT_EQ(unflushed_mem, all_mem); // Phase 5. Reopen, and expect all these three counters to be the same again. Reopen(options); dbfull()->GetIntProperty("rocksdb.cur-size-active-mem-table", &active_mem); dbfull()->GetIntProperty("rocksdb.cur-size-all-mem-tables", &unflushed_mem); dbfull()->GetIntProperty("rocksdb.size-all-mem-tables", &all_mem); ASSERT_EQ(active_mem, unflushed_mem); ASSERT_EQ(unflushed_mem, all_mem); } TEST_F(DBPropertiesTest, EstimatePendingCompBytes) { // Set sizes to both background thread pool to be 1 and block them. env_->SetBackgroundThreads(1, Env::HIGH); env_->SetBackgroundThreads(1, Env::LOW); test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); Options options = CurrentOptions(); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; options.compaction_style = kCompactionStyleLevel; options.level0_file_num_compaction_trigger = 2; options.max_background_compactions = 1; options.max_background_flushes = 1; options.max_write_buffer_number = 10; options.min_write_buffer_number_to_merge = 1; options.max_write_buffer_size_to_maintain = 0; options.write_buffer_size = 1000000; Reopen(options); std::string big_value(1000000 * 2, 'x'); std::string num; uint64_t int_num; ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); Flush(); ASSERT_TRUE(dbfull()->GetIntProperty( "rocksdb.estimate-pending-compaction-bytes", &int_num)); ASSERT_EQ(int_num, 0U); ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); Flush(); ASSERT_TRUE(dbfull()->GetIntProperty( "rocksdb.estimate-pending-compaction-bytes", &int_num)); ASSERT_GT(int_num, 0U); ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); Flush(); ASSERT_TRUE(dbfull()->GetIntProperty( "rocksdb.estimate-pending-compaction-bytes", &int_num)); ASSERT_GT(int_num, 0U); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(dbfull()->GetIntProperty( "rocksdb.estimate-pending-compaction-bytes", &int_num)); ASSERT_EQ(int_num, 0U); } TEST_F(DBPropertiesTest, EstimateCompressionRatio) { if (!Snappy_Supported()) { return; } const int kNumL0Files = 3; const int kNumEntriesPerFile = 1000; Options options = CurrentOptions(); options.compression_per_level = {kNoCompression, kSnappyCompression}; options.disable_auto_compactions = true; options.num_levels = 2; Reopen(options); // compression ratio is -1.0 when no open files at level ASSERT_EQ(CompressionRatioAtLevel(0), -1.0); const std::string kVal(100, 'a'); for (int i = 0; i < kNumL0Files; ++i) { for (int j = 0; j < kNumEntriesPerFile; ++j) { // Put common data ("key") at end to prevent delta encoding from // compressing the key effectively std::string key = ToString(i) + ToString(j) + "key"; ASSERT_OK(dbfull()->Put(WriteOptions(), key, kVal)); } Flush(); } // no compression at L0, so ratio is less than one ASSERT_LT(CompressionRatioAtLevel(0), 1.0); ASSERT_GT(CompressionRatioAtLevel(0), 0.0); ASSERT_EQ(CompressionRatioAtLevel(1), -1.0); dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_EQ(CompressionRatioAtLevel(0), -1.0); // Data at L1 should be highly compressed thanks to Snappy and redundant data // in values (ratio is 12.846 as of 4/19/2016). ASSERT_GT(CompressionRatioAtLevel(1), 10.0); } #endif // ROCKSDB_LITE class CountingUserTblPropCollector : public TablePropertiesCollector { public: const char* Name() const override { return "CountingUserTblPropCollector"; } Status Finish(UserCollectedProperties* properties) override { std::string encoded; PutVarint32(&encoded, count_); *properties = UserCollectedProperties{ {"CountingUserTblPropCollector", message_}, {"Count", encoded}, }; return Status::OK(); } Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/, EntryType /*type*/, SequenceNumber /*seq*/, uint64_t /*file_size*/) override { ++count_; return Status::OK(); } UserCollectedProperties GetReadableProperties() const override { return UserCollectedProperties{}; } private: std::string message_ = "Rocksdb"; uint32_t count_ = 0; }; class CountingUserTblPropCollectorFactory : public TablePropertiesCollectorFactory { public: explicit CountingUserTblPropCollectorFactory( uint32_t expected_column_family_id) : expected_column_family_id_(expected_column_family_id), num_created_(0) {} TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context context) override { EXPECT_EQ(expected_column_family_id_, context.column_family_id); num_created_++; return new CountingUserTblPropCollector(); } const char* Name() const override { return "CountingUserTblPropCollectorFactory"; } void set_expected_column_family_id(uint32_t v) { expected_column_family_id_ = v; } uint32_t expected_column_family_id_; uint32_t num_created_; }; class CountingDeleteTabPropCollector : public TablePropertiesCollector { public: const char* Name() const override { return "CountingDeleteTabPropCollector"; } Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/, EntryType type, SequenceNumber /*seq*/, uint64_t /*file_size*/) override { if (type == kEntryDelete) { num_deletes_++; } return Status::OK(); } bool NeedCompact() const override { return num_deletes_ > 10; } UserCollectedProperties GetReadableProperties() const override { return UserCollectedProperties{}; } Status Finish(UserCollectedProperties* properties) override { *properties = UserCollectedProperties{{"num_delete", ToString(num_deletes_)}}; return Status::OK(); } private: uint32_t num_deletes_ = 0; }; class CountingDeleteTabPropCollectorFactory : public TablePropertiesCollectorFactory { public: TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context /*context*/) override { return new CountingDeleteTabPropCollector(); } const char* Name() const override { return "CountingDeleteTabPropCollectorFactory"; } }; #ifndef ROCKSDB_LITE TEST_F(DBPropertiesTest, GetUserDefinedTableProperties) { Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = (1 << 30); options.table_properties_collector_factories.resize(1); std::shared_ptr collector_factory = std::make_shared(0); options.table_properties_collector_factories[0] = collector_factory; Reopen(options); // Create 4 tables for (int table = 0; table < 4; ++table) { for (int i = 0; i < 10 + table; ++i) { db_->Put(WriteOptions(), ToString(table * 100 + i), "val"); } db_->Flush(FlushOptions()); } TablePropertiesCollection props; ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); ASSERT_EQ(4U, props.size()); uint32_t sum = 0; for (const auto& item : props) { auto& user_collected = item.second->user_collected_properties; ASSERT_TRUE(user_collected.find("CountingUserTblPropCollector") != user_collected.end()); ASSERT_EQ(user_collected.at("CountingUserTblPropCollector"), "Rocksdb"); ASSERT_TRUE(user_collected.find("Count") != user_collected.end()); Slice key(user_collected.at("Count")); uint32_t count; ASSERT_TRUE(GetVarint32(&key, &count)); sum += count; } ASSERT_EQ(10u + 11u + 12u + 13u, sum); ASSERT_GT(collector_factory->num_created_, 0U); collector_factory->num_created_ = 0; dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_GT(collector_factory->num_created_, 0U); } #endif // ROCKSDB_LITE TEST_F(DBPropertiesTest, UserDefinedTablePropertiesContext) { Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 3; options.table_properties_collector_factories.resize(1); std::shared_ptr collector_factory = std::make_shared(1); options.table_properties_collector_factories[0] = collector_factory, CreateAndReopenWithCF({"pikachu"}, options); // Create 2 files for (int table = 0; table < 2; ++table) { for (int i = 0; i < 10 + table; ++i) { Put(1, ToString(table * 100 + i), "val"); } Flush(1); } ASSERT_GT(collector_factory->num_created_, 0U); collector_factory->num_created_ = 0; // Trigger automatic compactions. for (int table = 0; table < 3; ++table) { for (int i = 0; i < 10 + table; ++i) { Put(1, ToString(table * 100 + i), "val"); } Flush(1); dbfull()->TEST_WaitForCompact(); } ASSERT_GT(collector_factory->num_created_, 0U); collector_factory->num_created_ = 0; dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); ASSERT_GT(collector_factory->num_created_, 0U); // Come back to write to default column family collector_factory->num_created_ = 0; collector_factory->set_expected_column_family_id(0); // default CF // Create 4 tables in default column family for (int table = 0; table < 2; ++table) { for (int i = 0; i < 10 + table; ++i) { Put(ToString(table * 100 + i), "val"); } Flush(); } ASSERT_GT(collector_factory->num_created_, 0U); collector_factory->num_created_ = 0; // Trigger automatic compactions. for (int table = 0; table < 3; ++table) { for (int i = 0; i < 10 + table; ++i) { Put(ToString(table * 100 + i), "val"); } Flush(); dbfull()->TEST_WaitForCompact(); } ASSERT_GT(collector_factory->num_created_, 0U); collector_factory->num_created_ = 0; dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_GT(collector_factory->num_created_, 0U); } #ifndef ROCKSDB_LITE TEST_F(DBPropertiesTest, TablePropertiesNeedCompactTest) { Random rnd(301); Options options; options.create_if_missing = true; options.write_buffer_size = 4096; options.max_write_buffer_number = 8; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 4; options.target_file_size_base = 2048; options.max_bytes_for_level_base = 10240; options.max_bytes_for_level_multiplier = 4; options.soft_pending_compaction_bytes_limit = 1024 * 1024; options.num_levels = 8; options.env = env_; std::shared_ptr collector_factory = std::make_shared(); options.table_properties_collector_factories.resize(1); options.table_properties_collector_factories[0] = collector_factory; DestroyAndReopen(options); const int kMaxKey = 1000; for (int i = 0; i < kMaxKey; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 102))); ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102))); } Flush(); dbfull()->TEST_WaitForCompact(); if (NumTableFilesAtLevel(0) == 1) { // Clear Level 0 so that when later flush a file with deletions, // we don't trigger an organic compaction. ASSERT_OK(Put(Key(0), "")); ASSERT_OK(Put(Key(kMaxKey * 2), "")); Flush(); dbfull()->TEST_WaitForCompact(); } ASSERT_EQ(NumTableFilesAtLevel(0), 0); { int c = 0; std::unique_ptr iter(db_->NewIterator(ReadOptions())); iter->Seek(Key(kMaxKey - 100)); while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { iter->Next(); ++c; } ASSERT_EQ(c, 200); } Delete(Key(0)); for (int i = kMaxKey - 100; i < kMaxKey + 100; i++) { Delete(Key(i)); } Delete(Key(kMaxKey * 2)); Flush(); dbfull()->TEST_WaitForCompact(); { SetPerfLevel(kEnableCount); get_perf_context()->Reset(); int c = 0; std::unique_ptr iter(db_->NewIterator(ReadOptions())); iter->Seek(Key(kMaxKey - 100)); while (iter->Valid() && iter->key().compare(Key(kMaxKey + 100)) < 0) { iter->Next(); } ASSERT_EQ(c, 0); ASSERT_LT(get_perf_context()->internal_delete_skipped_count, 30u); ASSERT_LT(get_perf_context()->internal_key_skipped_count, 30u); SetPerfLevel(kDisable); } } TEST_F(DBPropertiesTest, NeedCompactHintPersistentTest) { Random rnd(301); Options options; options.create_if_missing = true; options.max_write_buffer_number = 8; options.level0_file_num_compaction_trigger = 10; options.level0_slowdown_writes_trigger = 10; options.level0_stop_writes_trigger = 10; options.disable_auto_compactions = true; options.env = env_; std::shared_ptr collector_factory = std::make_shared(); options.table_properties_collector_factories.resize(1); options.table_properties_collector_factories[0] = collector_factory; DestroyAndReopen(options); const int kMaxKey = 100; for (int i = 0; i < kMaxKey; i++) { ASSERT_OK(Put(Key(i), "")); } Flush(); dbfull()->TEST_WaitForFlushMemTable(); for (int i = 1; i < kMaxKey - 1; i++) { Delete(Key(i)); } Flush(); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(NumTableFilesAtLevel(0), 2); // Restart the DB. Although number of files didn't reach // options.level0_file_num_compaction_trigger, compaction should // still be triggered because of the need-compaction hint. options.disable_auto_compactions = false; Reopen(options); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0), 0); { SetPerfLevel(kEnableCount); get_perf_context()->Reset(); int c = 0; std::unique_ptr iter(db_->NewIterator(ReadOptions())); for (iter->Seek(Key(0)); iter->Valid(); iter->Next()) { c++; } ASSERT_EQ(c, 2); ASSERT_EQ(get_perf_context()->internal_delete_skipped_count, 0); // We iterate every key twice. Is it a bug? ASSERT_LE(get_perf_context()->internal_key_skipped_count, 2); SetPerfLevel(kDisable); } } TEST_F(DBPropertiesTest, EstimateNumKeysUnderflow) { Options options; Reopen(options); Put("foo", "bar"); Delete("foo"); Delete("foo"); uint64_t num_keys = 0; ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.estimate-num-keys", &num_keys)); ASSERT_EQ(0, num_keys); } TEST_F(DBPropertiesTest, EstimateOldestKeyTime) { std::unique_ptr mock_env(new MockTimeEnv(Env::Default())); uint64_t oldest_key_time = 0; Options options; options.env = mock_env.get(); // "rocksdb.estimate-oldest-key-time" only available to fifo compaction. mock_env->set_current_time(100); for (auto compaction : {kCompactionStyleLevel, kCompactionStyleUniversal, kCompactionStyleNone}) { options.compaction_style = compaction; options.create_if_missing = true; DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_FALSE(dbfull()->GetIntProperty( DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); } options.compaction_style = kCompactionStyleFIFO; options.ttl = 300; options.compaction_options_fifo.allow_compaction = false; DestroyAndReopen(options); mock_env->set_current_time(100); ASSERT_OK(Put("k1", "v1")); ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); ASSERT_EQ(100, oldest_key_time); ASSERT_OK(Flush()); ASSERT_EQ("1", FilesPerLevel()); ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); ASSERT_EQ(100, oldest_key_time); mock_env->set_current_time(200); ASSERT_OK(Put("k2", "v2")); ASSERT_OK(Flush()); ASSERT_EQ("2", FilesPerLevel()); ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); ASSERT_EQ(100, oldest_key_time); mock_env->set_current_time(300); ASSERT_OK(Put("k3", "v3")); ASSERT_OK(Flush()); ASSERT_EQ("3", FilesPerLevel()); ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); ASSERT_EQ(100, oldest_key_time); mock_env->set_current_time(450); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("2", FilesPerLevel()); ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); ASSERT_EQ(200, oldest_key_time); mock_env->set_current_time(550); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("1", FilesPerLevel()); ASSERT_TRUE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); ASSERT_EQ(300, oldest_key_time); mock_env->set_current_time(650); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("", FilesPerLevel()); ASSERT_FALSE(dbfull()->GetIntProperty(DB::Properties::kEstimateOldestKeyTime, &oldest_key_time)); // Close before mock_env destructs. Close(); } TEST_F(DBPropertiesTest, SstFilesSize) { struct TestListener : public EventListener { void OnCompactionCompleted(DB* db, const CompactionJobInfo& /*info*/) override { assert(callback_triggered == false); assert(size_before_compaction > 0); callback_triggered = true; uint64_t total_sst_size = 0; uint64_t live_sst_size = 0; bool ok = db->GetIntProperty(DB::Properties::kTotalSstFilesSize, &total_sst_size); ASSERT_TRUE(ok); // total_sst_size include files before and after compaction. ASSERT_GT(total_sst_size, size_before_compaction); ok = db->GetIntProperty(DB::Properties::kLiveSstFilesSize, &live_sst_size); ASSERT_TRUE(ok); // live_sst_size only include files after compaction. ASSERT_GT(live_sst_size, 0); ASSERT_LT(live_sst_size, size_before_compaction); } uint64_t size_before_compaction = 0; bool callback_triggered = false; }; std::shared_ptr listener = std::make_shared(); Options options; options.disable_auto_compactions = true; options.listeners.push_back(listener); Reopen(options); for (int i = 0; i < 10; i++) { ASSERT_OK(Put("key" + ToString(i), std::string(1000, 'v'))); } ASSERT_OK(Flush()); for (int i = 0; i < 5; i++) { ASSERT_OK(Delete("key" + ToString(i))); } ASSERT_OK(Flush()); uint64_t sst_size; bool ok = db_->GetIntProperty(DB::Properties::kTotalSstFilesSize, &sst_size); ASSERT_TRUE(ok); ASSERT_GT(sst_size, 0); listener->size_before_compaction = sst_size; // Compact to clean all keys and trigger listener. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_TRUE(listener->callback_triggered); } TEST_F(DBPropertiesTest, MinObsoleteSstNumberToKeep) { class TestListener : public EventListener { public: void OnTableFileCreated(const TableFileCreationInfo& info) override { if (info.reason == TableFileCreationReason::kCompaction) { // Verify the property indicates that SSTs created by a running // compaction cannot be deleted. uint64_t created_file_num; FileType created_file_type; std::string filename = info.file_path.substr(info.file_path.rfind('/') + 1); ASSERT_TRUE( ParseFileName(filename, &created_file_num, &created_file_type)); ASSERT_EQ(kTableFile, created_file_type); uint64_t keep_sst_lower_bound; ASSERT_TRUE( db_->GetIntProperty(DB::Properties::kMinObsoleteSstNumberToKeep, &keep_sst_lower_bound)); ASSERT_LE(keep_sst_lower_bound, created_file_num); validated_ = true; } } void SetDB(DB* db) { db_ = db; } int GetNumCompactions() { return num_compactions_; } // True if we've verified the property for at least one output file bool Validated() { return validated_; } private: int num_compactions_ = 0; bool validated_ = false; DB* db_ = nullptr; }; const int kNumL0Files = 4; std::shared_ptr listener = std::make_shared(); Options options = CurrentOptions(); options.listeners.push_back(listener); options.level0_file_num_compaction_trigger = kNumL0Files; DestroyAndReopen(options); listener->SetDB(db_); for (int i = 0; i < kNumL0Files; ++i) { // Make sure they overlap in keyspace to prevent trivial move Put("key1", "val"); Put("key2", "val"); Flush(); } dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(listener->Validated()); } TEST_F(DBPropertiesTest, BlockCacheProperties) { Options options; uint64_t value; // Block cache properties are not available for tables other than // block-based table. options.table_factory.reset(NewPlainTableFactory()); Reopen(options); ASSERT_FALSE( db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_FALSE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); ASSERT_FALSE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); options.table_factory.reset(NewCuckooTableFactory()); Reopen(options); ASSERT_FALSE( db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_FALSE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); ASSERT_FALSE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); // Block cache properties are not available if block cache is not used. BlockBasedTableOptions table_options; table_options.no_block_cache = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); ASSERT_FALSE( db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_FALSE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); ASSERT_FALSE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); // Test with empty block cache. constexpr size_t kCapacity = 100; LRUCacheOptions co; co.capacity = kCapacity; co.num_shard_bits = 0; co.metadata_charge_policy = kDontChargeCacheMetadata; auto block_cache = NewLRUCache(co); table_options.block_cache = block_cache; table_options.no_block_cache = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_EQ(kCapacity, value); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); ASSERT_EQ(0, value); ASSERT_TRUE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); ASSERT_EQ(0, value); // Insert unpinned item to the cache and check size. constexpr size_t kSize1 = 50; block_cache->Insert("item1", nullptr /*value*/, kSize1, nullptr /*deleter*/); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_EQ(kCapacity, value); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); ASSERT_EQ(kSize1, value); ASSERT_TRUE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); ASSERT_EQ(0, value); // Insert pinned item to the cache and check size. constexpr size_t kSize2 = 30; Cache::Handle* item2 = nullptr; block_cache->Insert("item2", nullptr /*value*/, kSize2, nullptr /*deleter*/, &item2); ASSERT_NE(nullptr, item2); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_EQ(kCapacity, value); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); ASSERT_EQ(kSize1 + kSize2, value); ASSERT_TRUE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); ASSERT_EQ(kSize2, value); // Insert another pinned item to make the cache over-sized. constexpr size_t kSize3 = 80; Cache::Handle* item3 = nullptr; block_cache->Insert("item3", nullptr /*value*/, kSize3, nullptr /*deleter*/, &item3); ASSERT_NE(nullptr, item2); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_EQ(kCapacity, value); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); // Item 1 is evicted. ASSERT_EQ(kSize2 + kSize3, value); ASSERT_TRUE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); ASSERT_EQ(kSize2 + kSize3, value); // Check size after release. block_cache->Release(item2); block_cache->Release(item3); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheCapacity, &value)); ASSERT_EQ(kCapacity, value); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlockCacheUsage, &value)); // item2 will be evicted, while item3 remain in cache after release. ASSERT_EQ(kSize3, value); ASSERT_TRUE( db_->GetIntProperty(DB::Properties::kBlockCachePinnedUsage, &value)); ASSERT_EQ(0, value); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_range_del_test.cc000066400000000000000000002000171370372246700175650ustar00rootroot00000000000000// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "test_util/testutil.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { class DBRangeDelTest : public DBTestBase { public: DBRangeDelTest() : DBTestBase("/db_range_del_test") {} std::string GetNumericStr(int key) { uint64_t uint64_key = static_cast(key); std::string str; str.resize(8); memcpy(&str[0], static_cast(&uint64_key), 8); return str; } }; // PlainTableFactory, WriteBatchWithIndex, and NumTableFilesAtLevel() are not // supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) { // TODO: figure out why MmapReads trips the iterator pinning assertion in // RangeDelAggregator. Ideally it would be supported; otherwise it should at // least be explicitly unsupported. for (auto config : {kPlainTableAllBytesPrefix, /* kWalDirAndMmapReads */}) { option_config_ = config; DestroyAndReopen(CurrentOptions()); ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1", "dr1") .IsNotSupported()); } } TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) { WriteBatchWithIndex indexedBatch{}; ASSERT_TRUE(indexedBatch.DeleteRange(db_->DefaultColumnFamily(), "dr1", "dr1") .IsNotSupported()); ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported()); } TEST_F(DBRangeDelTest, EndSameAsStartCoversNothing) { ASSERT_OK(db_->Put(WriteOptions(), "b", "val")); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "b")); ASSERT_EQ("val", Get("b")); } TEST_F(DBRangeDelTest, EndComesBeforeStartInvalidArgument) { db_->Put(WriteOptions(), "b", "val"); ASSERT_TRUE( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "a") .IsInvalidArgument()); ASSERT_EQ("val", Get("b")); } TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) { do { DestroyAndReopen(CurrentOptions()); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1", "dr2")); ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(1, NumTableFilesAtLevel(0)); } while (ChangeOptions(kRangeDelSkipConfigs)); } TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) { do { Options opts = CurrentOptions(); opts.disable_auto_compactions = true; opts.statistics = CreateDBStatistics(); DestroyAndReopen(opts); // snapshot protects range tombstone from dropping due to becoming obsolete. const Snapshot* snapshot = db_->GetSnapshot(); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"); db_->Flush(FlushOptions()); ASSERT_EQ(1, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(1)); ASSERT_EQ(0, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE)); db_->ReleaseSnapshot(snapshot); // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled // compactions as the above assertions about the number of files in a level // do not hold true. } while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction | kSkipFIFOCompaction)); } TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) { // regression test for exactly filled compaction output files. Previously // another file would be generated containing all range deletions, which // could invalidate the non-overlapping file boundary invariant. const int kNumPerFile = 4, kNumFiles = 2, kFileBytes = 9 << 10; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.level0_file_num_compaction_trigger = kNumFiles; options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); options.num_levels = 2; options.target_file_size_base = kFileBytes; BlockBasedTableOptions table_options; table_options.block_size_deviation = 50; // each block holds two keys options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); // snapshot protects range tombstone from dropping due to becoming obsolete. const Snapshot* snapshot = db_->GetSnapshot(); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(1)); Random rnd(301); for (int i = 0; i < kNumFiles; ++i) { std::vector values; // Write 12K (4 values, each 3K) for (int j = 0; j < kNumPerFile; j++) { values.push_back(RandomString(&rnd, 3 << 10)); ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); if (j == 0 && i > 0) { dbfull()->TEST_WaitForFlushMemTable(); } } } // put extra key to trigger final flush ASSERT_OK(Put("", "")); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(2, NumTableFilesAtLevel(1)); db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) { // Ensures range deletion spanning multiple compaction output files that are // cut by max_compaction_bytes will have non-overlapping key-ranges. // https://github.com/facebook/rocksdb/issues/1778 const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12; Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); opts.disable_auto_compactions = true; opts.level0_file_num_compaction_trigger = kNumFiles; opts.max_compaction_bytes = kNumPerFile * kBytesPerVal; opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); // Want max_compaction_bytes to trigger the end of compaction output file, not // target_file_size_base, so make the latter much bigger opts.target_file_size_base = 100 * opts.max_compaction_bytes; Reopen(opts); // snapshot protects range tombstone from dropping due to becoming obsolete. const Snapshot* snapshot = db_->GetSnapshot(); // It spans the whole key-range, thus will be included in all output files ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), GetNumericStr(0), GetNumericStr(kNumFiles * kNumPerFile - 1))); Random rnd(301); for (int i = 0; i < kNumFiles; ++i) { std::vector values; // Write 1MB (256 values, each 4K) for (int j = 0; j < kNumPerFile; j++) { values.push_back(RandomString(&rnd, kBytesPerVal)); ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j])); } // extra entry to trigger SpecialSkipListFactory's flush ASSERT_OK(Put(GetNumericStr(kNumPerFile), "")); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); } dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GE(NumTableFilesAtLevel(1), 2); std::vector> files; dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); for (size_t i = 0; i + 1 < files[1].size(); ++i) { ASSERT_TRUE(InternalKeyComparator(opts.comparator) .Compare(files[1][i].largest, files[1][i + 1].smallest) < 0); } db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, SentinelsOmittedFromOutputFile) { // Regression test for bug where sentinel range deletions (i.e., ones with // sequence number of zero) were included in output files. // snapshot protects range tombstone from dropping due to becoming obsolete. const Snapshot* snapshot = db_->GetSnapshot(); // gaps between ranges creates sentinels in our internal representation std::vector> range_dels = {{"a", "b"}, {"c", "d"}, {"e", "f"}}; for (const auto& range_del : range_dels) { ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), range_del.first, range_del.second)); } ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(1, NumTableFilesAtLevel(0)); std::vector> files; dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); ASSERT_GT(files[0][0].fd.smallest_seqno, 0); db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) { db_->Put(WriteOptions(), "b1", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); db_->Put(WriteOptions(), "b2", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); // first iteration verifies query correctness in memtable, second verifies // query correctness for a single SST file for (int i = 0; i < 2; ++i) { if (i > 0) { ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(1, NumTableFilesAtLevel(0)); } std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound()); ASSERT_OK(db_->Get(ReadOptions(), "b2", &value)); } } TEST_F(DBRangeDelTest, CompactRangeDelsSameStartKey) { db_->Put(WriteOptions(), "unused", "val"); // prevents empty after compaction db_->Put(WriteOptions(), "b1", "val"); ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b")); ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(3, NumTableFilesAtLevel(0)); for (int i = 0; i < 2; ++i) { if (i > 0) { dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(1)); } std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound()); } } #endif // ROCKSDB_LITE TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) { const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250; Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); Reopen(opts); // Write a third before snapshot, a third between snapshot and tombstone, and // a third after the tombstone. Keys older than snapshot or newer than the // tombstone should be preserved. const Snapshot* snapshot = nullptr; for (int i = 0; i < kNum; ++i) { if (i == kNum / 3) { snapshot = db_->GetSnapshot(); } else if (i == 2 * kNum / 3) { db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); } db_->Put(WriteOptions(), GetNumericStr(i), "val"); } db_->Flush(FlushOptions()); for (int i = 0; i < kNum; ++i) { ReadOptions read_opts; read_opts.ignore_range_deletions = true; std::string value; if (i < kRangeBegin || i > kRangeEnd || i < kNum / 3 || i >= 2 * kNum / 3) { ASSERT_OK(db_->Get(read_opts, GetNumericStr(i), &value)); } else { ASSERT_TRUE(db_->Get(read_opts, GetNumericStr(i), &value).IsNotFound()); } } db_->ReleaseSnapshot(snapshot); } // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) { const int kNumPerFile = 100, kNumFiles = 4; Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); opts.disable_auto_compactions = true; opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); opts.num_levels = 2; opts.statistics = CreateDBStatistics(); Reopen(opts); for (int i = 0; i < kNumFiles; ++i) { if (i > 0) { // range tombstone covers first half of the previous file db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), GetNumericStr((i - 1) * kNumPerFile), GetNumericStr((i - 1) * kNumPerFile + kNumPerFile / 2)); } // Make sure a given key appears in each file so compaction won't be able to // use trivial move, which would happen if the ranges were non-overlapping. // Also, we need an extra element since flush is only triggered when the // number of keys is one greater than SpecialSkipListFactory's limit. // We choose a key outside the key-range used by the test to avoid conflict. db_->Put(WriteOptions(), GetNumericStr(kNumPerFile * kNumFiles), "val"); for (int j = 0; j < kNumPerFile; ++j) { db_->Put(WriteOptions(), GetNumericStr(i * kNumPerFile + j), "val"); } dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); } db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(1), 0); ASSERT_EQ((kNumFiles - 1) * kNumPerFile / 2, TestGetTickerCount(opts, COMPACTION_KEY_DROP_RANGE_DEL)); for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kNumPerFile; ++j) { ReadOptions read_opts; read_opts.ignore_range_deletions = true; std::string value; if (i == kNumFiles - 1 || j >= kNumPerFile / 2) { ASSERT_OK( db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value)); } else { ASSERT_TRUE( db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value) .IsNotFound()); } } } } TEST_F(DBRangeDelTest, ValidLevelSubcompactionBoundaries) { const int kNumPerFile = 100, kNumFiles = 4, kFileBytes = 100 << 10; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.level0_file_num_compaction_trigger = kNumFiles; options.max_bytes_for_level_base = 2 * kFileBytes; options.max_subcompactions = 4; options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); options.num_levels = 3; options.target_file_size_base = kFileBytes; options.target_file_size_multiplier = 1; Reopen(options); Random rnd(301); for (int i = 0; i < 2; ++i) { for (int j = 0; j < kNumFiles; ++j) { if (i > 0) { // delete [95,105) in two files, [295,305) in next two int mid = (j + (1 - j % 2)) * kNumPerFile; db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(mid - 5), Key(mid + 5)); } std::vector values; // Write 100KB (100 values, each 1K) for (int k = 0; k < kNumPerFile; k++) { values.push_back(RandomString(&rnd, 990)); ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k])); } // put extra key to trigger flush ASSERT_OK(Put("", "")); dbfull()->TEST_WaitForFlushMemTable(); if (j < kNumFiles - 1) { // background compaction may happen early for kNumFiles'th file ASSERT_EQ(NumTableFilesAtLevel(0), j + 1); } if (j == options.level0_file_num_compaction_trigger - 1) { // When i == 1, compaction will output some files to L1, at which point // L1 is not bottommost so range deletions cannot be compacted away. The // new L1 files must be generated with non-overlapping key ranges even // though multiple subcompactions see the same ranges deleted, else an // assertion will fail. // // Only enable auto-compactions when we're ready; otherwise, the // oversized L0 (relative to base_level) causes the compaction to run // earlier. ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()})); dbfull()->TEST_WaitForCompact(); ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(), {{"disable_auto_compactions", "true"}})); ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_GT(NumTableFilesAtLevel(1), 0); ASSERT_GT(NumTableFilesAtLevel(2), 0); } } } } TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) { const int kNumPerFile = 100, kFilesPerLevel = 4, kNumLevels = 4; Options options = CurrentOptions(); options.compaction_options_universal.min_merge_width = kFilesPerLevel; options.compaction_options_universal.max_merge_width = kFilesPerLevel; options.compaction_options_universal.size_ratio = 10; options.compaction_style = kCompactionStyleUniversal; options.level0_file_num_compaction_trigger = kFilesPerLevel; options.max_subcompactions = 4; options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); options.num_levels = kNumLevels; options.target_file_size_base = kNumPerFile << 10; options.target_file_size_multiplier = 1; Reopen(options); Random rnd(301); for (int i = 0; i < kNumLevels - 1; ++i) { for (int j = 0; j < kFilesPerLevel; ++j) { if (i == kNumLevels - 2) { // insert range deletions [95,105) in two files, [295,305) in next two // to prepare L1 for later manual compaction. int mid = (j + (1 - j % 2)) * kNumPerFile; db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(mid - 5), Key(mid + 5)); } std::vector values; // Write 100KB (100 values, each 1K) for (int k = 0; k < kNumPerFile; k++) { values.push_back(RandomString(&rnd, 990)); ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k])); } // put extra key to trigger flush ASSERT_OK(Put("", "")); dbfull()->TEST_WaitForFlushMemTable(); if (j < kFilesPerLevel - 1) { // background compaction may happen early for kFilesPerLevel'th file ASSERT_EQ(NumTableFilesAtLevel(0), j + 1); } } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1); } // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions // happen since input level > 0; (2) range deletions are not dropped since // output level is not bottommost. If no file boundary assertion fails, that // probably means universal compaction + subcompaction + range deletion are // compatible. ASSERT_OK(dbfull()->RunManualCompaction( reinterpret_cast(db_->DefaultColumnFamily()) ->cfd(), 1 /* input_level */, 2 /* output_level */, CompactRangeOptions(), nullptr /* begin */, nullptr /* end */, true /* exclusive */, true /* disallow_trivial_move */, port::kMaxUint64 /* max_file_num_to_ignore */)); } #endif // ROCKSDB_LITE TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) { const int kNumPerFile = 3, kNumFiles = 3; Options opts = CurrentOptions(); opts.disable_auto_compactions = true; opts.memtable_factory.reset(new SpecialSkipListFactory(2 * kNumPerFile)); opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); opts.num_levels = 2; Reopen(opts); // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file // requires an extra entry. for (int i = 0; i <= kNumFiles * kNumPerFile; ++i) { if (i % kNumPerFile == 0 && i / kNumPerFile == kNumFiles - 1) { // Delete merge operands from all but the last file db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key", "key_"); } std::string val; PutFixed64(&val, i); db_->Merge(WriteOptions(), "key", val); // we need to prevent trivial move using Puts so compaction will actually // process the merge operands. db_->Put(WriteOptions(), "prevent_trivial_move", ""); if (i > 0 && i % kNumPerFile == 0) { dbfull()->TEST_WaitForFlushMemTable(); } } ReadOptions read_opts; read_opts.ignore_range_deletions = true; std::string expected, actual; ASSERT_OK(db_->Get(read_opts, "key", &actual)); PutFixed64(&expected, 45); // 1+2+...+9 ASSERT_EQ(expected, actual); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); expected.clear(); ASSERT_OK(db_->Get(read_opts, "key", &actual)); uint64_t tmp; Slice tmp2(actual); GetFixed64(&tmp2, &tmp); PutFixed64(&expected, 30); // 6+7+8+9 (earlier operands covered by tombstone) ASSERT_EQ(expected, actual); } TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) { // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4) // Flush. The `CompactionIterator` previously had a bug where we forgot to // check for covering range tombstones when processing the (1) Put, causing // it to reappear after the flush. Options opts = CurrentOptions(); opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); Reopen(opts); std::string val; PutFixed64(&val, 1); ASSERT_OK(db_->Put(WriteOptions(), "key", val)); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key", "key_")); ASSERT_OK(db_->Merge(WriteOptions(), "key", val)); ASSERT_OK(db_->Flush(FlushOptions())); ReadOptions read_opts; std::string expected, actual; ASSERT_OK(db_->Get(read_opts, "key", &actual)); PutFixed64(&expected, 1); ASSERT_EQ(expected, actual); } // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) { // During compaction to bottommost level, verify range tombstones older than // the oldest snapshot are removed, while others are preserved. Options opts = CurrentOptions(); opts.disable_auto_compactions = true; opts.num_levels = 2; opts.statistics = CreateDBStatistics(); Reopen(opts); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1", "dr10"); // obsolete after compaction db_->Put(WriteOptions(), "key", "val"); db_->Flush(FlushOptions()); const Snapshot* snapshot = db_->GetSnapshot(); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr2", "dr20"); // protected by snapshot db_->Put(WriteOptions(), "key", "val"); db_->Flush(FlushOptions()); ASSERT_EQ(2, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(1)); ASSERT_EQ(1, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE)); db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, TableEvictedDuringScan) { // The RangeDelAggregator holds pointers into range deletion blocks created by // table readers. This test ensures the aggregator can still access those // blocks even if it outlives the table readers that created them. // // DBIter always keeps readers open for L0 files. So, in order to test // aggregator outliving reader, we need to have deletions in L1 files, which // are opened/closed on-demand during the scan. This is accomplished by // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions // from all lingering in L0 (there is at most one range deletion per L0 file). // // The first L1 file will contain a range deletion since its begin key is 0. // SeekToFirst() references that table's reader and adds its range tombstone // to the aggregator. Upon advancing beyond that table's key-range via Next(), // the table reader will be unreferenced by the iterator. Since we manually // call Evict() on all readers before the full scan, this unreference causes // the reader's refcount to drop to zero and thus be destroyed. // // When it is destroyed, we do not remove its range deletions from the // aggregator. So, subsequent calls to Next() must be able to use these // deletions to decide whether a key is covered. This will work as long as // the aggregator properly references the range deletion block. const int kNum = 25, kRangeBegin = 0, kRangeEnd = 7, kNumRanges = 5; Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); opts.level0_file_num_compaction_trigger = 4; opts.level0_stop_writes_trigger = 4; opts.memtable_factory.reset(new SpecialSkipListFactory(1)); opts.num_levels = 2; BlockBasedTableOptions bbto; bbto.cache_index_and_filter_blocks = true; bbto.block_cache = NewLRUCache(8 << 20); opts.table_factory.reset(NewBlockBasedTableFactory(bbto)); Reopen(opts); // Hold a snapshot so range deletions can't become obsolete during compaction // to bottommost level (i.e., L1). const Snapshot* snapshot = db_->GetSnapshot(); for (int i = 0; i < kNum; ++i) { db_->Put(WriteOptions(), GetNumericStr(i), "val"); if (i > 0) { dbfull()->TEST_WaitForFlushMemTable(); } if (i >= kNum / 2 && i < kNum / 2 + kNumRanges) { db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); } } // Must be > 1 so the first L1 file can be closed before scan finishes dbfull()->TEST_WaitForCompact(); ASSERT_GT(NumTableFilesAtLevel(1), 1); std::vector file_numbers = ListTableFiles(env_, dbname_); ReadOptions read_opts; auto* iter = db_->NewIterator(read_opts); int expected = kRangeEnd; iter->SeekToFirst(); for (auto file_number : file_numbers) { // This puts table caches in the state of being externally referenced only // so they are destroyed immediately upon iterator unreferencing. TableCache::Evict(dbfull()->TEST_table_cache(), file_number); } for (; iter->Valid(); iter->Next()) { ASSERT_EQ(GetNumericStr(expected), iter->key()); ++expected; // Keep clearing block cache's LRU so range deletion block can be freed as // soon as its refcount drops to zero. bbto.block_cache->EraseUnRefEntries(); } ASSERT_EQ(kNum, expected); delete iter; db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, GetCoveredKeyFromMutableMemtable) { do { DestroyAndReopen(CurrentOptions()); db_->Put(WriteOptions(), "key", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); ReadOptions read_opts; std::string value; ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); } while (ChangeOptions(kRangeDelSkipConfigs)); } TEST_F(DBRangeDelTest, GetCoveredKeyFromImmutableMemtable) { do { Options opts = CurrentOptions(); opts.max_write_buffer_number = 3; opts.min_write_buffer_number_to_merge = 2; // SpecialSkipListFactory lets us specify maximum number of elements the // memtable can hold. It switches the active memtable to immutable (flush is // prevented by the above options) upon inserting an element that would // overflow the memtable. opts.memtable_factory.reset(new SpecialSkipListFactory(1)); DestroyAndReopen(opts); db_->Put(WriteOptions(), "key", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); db_->Put(WriteOptions(), "blah", "val"); ReadOptions read_opts; std::string value; ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); } while (ChangeOptions(kRangeDelSkipConfigs)); } TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) { do { DestroyAndReopen(CurrentOptions()); db_->Put(WriteOptions(), "key", "val"); // snapshot prevents key from being deleted during flush const Snapshot* snapshot = db_->GetSnapshot(); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); ASSERT_OK(db_->Flush(FlushOptions())); ReadOptions read_opts; std::string value; ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound()); db_->ReleaseSnapshot(snapshot); } while (ChangeOptions(kRangeDelSkipConfigs)); } TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) { const int kNumMergeOps = 10; Options opts = CurrentOptions(); opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); Reopen(opts); for (int i = 0; i < kNumMergeOps; ++i) { std::string val; PutFixed64(&val, i); db_->Merge(WriteOptions(), "key", val); if (i == kNumMergeOps / 2) { // deletes [0, 5] db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key", "key_"); } } ReadOptions read_opts; std::string expected, actual; ASSERT_OK(db_->Get(read_opts, "key", &actual)); PutFixed64(&expected, 30); // 6+7+8+9 ASSERT_EQ(expected, actual); expected.clear(); read_opts.ignore_range_deletions = true; ASSERT_OK(db_->Get(read_opts, "key", &actual)); PutFixed64(&expected, 45); // 0+1+2+...+9 ASSERT_EQ(expected, actual); } TEST_F(DBRangeDelTest, GetIgnoresRangeDeletions) { Options opts = CurrentOptions(); opts.max_write_buffer_number = 4; opts.min_write_buffer_number_to_merge = 3; opts.memtable_factory.reset(new SpecialSkipListFactory(1)); Reopen(opts); db_->Put(WriteOptions(), "sst_key", "val"); // snapshot prevents key from being deleted during flush const Snapshot* snapshot = db_->GetSnapshot(); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); ASSERT_OK(db_->Flush(FlushOptions())); db_->Put(WriteOptions(), "imm_key", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); db_->Put(WriteOptions(), "mem_key", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); ReadOptions read_opts; read_opts.ignore_range_deletions = true; for (std::string key : {"sst_key", "imm_key", "mem_key"}) { std::string value; ASSERT_OK(db_->Get(read_opts, key, &value)); } db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) { const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25; Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); Reopen(opts); // Write half of the keys before the tombstone and half after the tombstone. // Only covered keys (i.e., within the range and older than the tombstone) // should be deleted. for (int i = 0; i < kNum; ++i) { if (i == kNum / 2) { db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); } db_->Put(WriteOptions(), GetNumericStr(i), "val"); } ReadOptions read_opts; auto* iter = db_->NewIterator(read_opts); int expected = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_EQ(GetNumericStr(expected), iter->key()); if (expected == kRangeBegin - 1) { expected = kNum / 2; } else { ++expected; } } ASSERT_EQ(kNum, expected); delete iter; } TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) { const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25; Options opts = CurrentOptions(); opts.comparator = test::Uint64Comparator(); opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile)); Reopen(opts); const Snapshot* snapshot = nullptr; // Put a snapshot before the range tombstone, verify an iterator using that // snapshot sees all inserted keys. for (int i = 0; i < kNum; ++i) { if (i == kNum / 2) { snapshot = db_->GetSnapshot(); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd)); } db_->Put(WriteOptions(), GetNumericStr(i), "val"); } ReadOptions read_opts; read_opts.snapshot = snapshot; auto* iter = db_->NewIterator(read_opts); int expected = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_EQ(GetNumericStr(expected), iter->key()); ++expected; } ASSERT_EQ(kNum / 2, expected); delete iter; db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) { Options opts = CurrentOptions(); opts.max_write_buffer_number = 4; opts.min_write_buffer_number_to_merge = 3; opts.memtable_factory.reset(new SpecialSkipListFactory(1)); Reopen(opts); db_->Put(WriteOptions(), "sst_key", "val"); // snapshot prevents key from being deleted during flush const Snapshot* snapshot = db_->GetSnapshot(); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); ASSERT_OK(db_->Flush(FlushOptions())); db_->Put(WriteOptions(), "imm_key", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); db_->Put(WriteOptions(), "mem_key", "val"); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); ReadOptions read_opts; read_opts.ignore_range_deletions = true; auto* iter = db_->NewIterator(read_opts); int i = 0; std::string expected[] = {"imm_key", "mem_key", "sst_key"}; for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++i) { std::string key; ASSERT_EQ(expected[i], iter->key()); } ASSERT_EQ(3, i); delete iter; db_->ReleaseSnapshot(snapshot); } #ifndef ROCKSDB_UBSAN_RUN TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) { db_->Put(WriteOptions(), "key", "val"); // snapshot prevents key from being deleted during flush const Snapshot* snapshot = db_->GetSnapshot(); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); // iterations check unsupported in memtable, l0, and then l1 for (int i = 0; i < 3; ++i) { ReadOptions read_opts; read_opts.tailing = true; auto* iter = db_->NewIterator(read_opts); if (i == 2) { // For L1+, iterators over files are created on-demand, so need seek iter->SeekToFirst(); } ASSERT_TRUE(iter->status().IsNotSupported()); delete iter; if (i == 0) { ASSERT_OK(db_->Flush(FlushOptions())); } else if (i == 1) { MoveFilesToLevel(1); } } db_->ReleaseSnapshot(snapshot); } #endif // !ROCKSDB_UBSAN_RUN TEST_F(DBRangeDelTest, SubcompactionHasEmptyDedicatedRangeDelFile) { const int kNumFiles = 2, kNumKeysPerFile = 4; Options options = CurrentOptions(); options.compression = kNoCompression; options.disable_auto_compactions = true; options.level0_file_num_compaction_trigger = kNumFiles; options.max_subcompactions = 2; options.num_levels = 2; options.target_file_size_base = 4096; Reopen(options); // need a L1 file for subcompaction to be triggered ASSERT_OK( db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(0), "val")); ASSERT_OK(db_->Flush(FlushOptions())); MoveFilesToLevel(1); // put enough keys to fill up the first subcompaction, and later range-delete // them so that the first subcompaction outputs no key-values. In that case // it'll consider making an SST file dedicated to range deletions. for (int i = 0; i < kNumKeysPerFile; ++i) { ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), std::string(1024, 'a'))); } ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(kNumKeysPerFile))); // the above range tombstone can be dropped, so that one alone won't cause a // dedicated file to be opened. We can make one protected by snapshot that // must be considered. Make its range outside the first subcompaction's range // to exercise the tricky part of the code. const Snapshot* snapshot = db_->GetSnapshot(); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(kNumKeysPerFile + 1), Key(kNumKeysPerFile + 2))); ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(1)); db_->EnableAutoCompaction({db_->DefaultColumnFamily()}); dbfull()->TEST_WaitForCompact(); db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, MemtableBloomFilter) { // regression test for #2743. the range delete tombstones in memtable should // be added even when Get() skips searching due to its prefix bloom filter const int kMemtableSize = 1 << 20; // 1MB const int kMemtablePrefixFilterSize = 1 << 13; // 8KB const int kNumKeys = 1000; const int kPrefixLen = 8; Options options = CurrentOptions(); options.memtable_prefix_bloom_size_ratio = static_cast(kMemtablePrefixFilterSize) / kMemtableSize; options.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen)); options.write_buffer_size = kMemtableSize; Reopen(options); for (int i = 0; i < kNumKeys; ++i) { ASSERT_OK(Put(Key(i), "val")); } Flush(); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(kNumKeys))); for (int i = 0; i < kNumKeys; ++i) { std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &value).IsNotFound()); } } TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) { // This test originally verified that compaction treated files containing a // split range deletion in the input level as an atomic unit. I.e., // compacting any input-level file(s) containing a portion of the range // deletion causes all other input-level files containing portions of that // same range deletion to be included in the compaction. Range deletion // tombstones are now truncated to sstable boundaries which removed the need // for that behavior (which could lead to excessively large // compactions). const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10; Options options = CurrentOptions(); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = kNumFilesPerLevel; options.memtable_factory.reset( new SpecialSkipListFactory(2 /* num_entries_flush */)); options.target_file_size_base = kValueBytes; // i == 0: CompactFiles // i == 1: CompactRange // i == 2: automatic compaction for (int i = 0; i < 3; ++i) { DestroyAndReopen(options); ASSERT_OK(Put(Key(0), "")); ASSERT_OK(db_->Flush(FlushOptions())); MoveFilesToLevel(2); ASSERT_EQ(1, NumTableFilesAtLevel(2)); // snapshot protects range tombstone from dropping due to becoming obsolete. const Snapshot* snapshot = db_->GetSnapshot(); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(2 * kNumFilesPerLevel)); Random rnd(301); std::string value = RandomString(&rnd, kValueBytes); for (int j = 0; j < kNumFilesPerLevel; ++j) { // give files overlapping key-ranges to prevent trivial move ASSERT_OK(Put(Key(j), value)); ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value)); if (j > 0) { dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(j, NumTableFilesAtLevel(0)); } } // put extra key to trigger final flush ASSERT_OK(Put("", "")); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(kNumFilesPerLevel, NumTableFilesAtLevel(1)); ColumnFamilyMetaData meta; db_->GetColumnFamilyMetaData(&meta); if (i == 0) { ASSERT_OK(db_->CompactFiles( CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); } else if (i == 1) { auto begin_str = Key(0), end_str = Key(1); Slice begin = begin_str, end = end_str; ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end)); ASSERT_EQ(3, NumTableFilesAtLevel(1)); } else if (i == 2) { ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(), {{"max_bytes_for_level_base", "10000"}})); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(1, NumTableFilesAtLevel(1)); } ASSERT_GT(NumTableFilesAtLevel(2), 0); db_->ReleaseSnapshot(snapshot); } } TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) { // Test the handling of the range-tombstone end-key as the // upper-bound for an sstable. const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10; Options options = CurrentOptions(); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = kNumFilesPerLevel; options.memtable_factory.reset( new SpecialSkipListFactory(2 /* num_entries_flush */)); options.target_file_size_base = kValueBytes; options.disable_auto_compactions = true; DestroyAndReopen(options); // Create an initial sstable at L2: // [key000000#1,1, key000000#1,1] ASSERT_OK(Put(Key(0), "")); ASSERT_OK(db_->Flush(FlushOptions())); MoveFilesToLevel(2); ASSERT_EQ(1, NumTableFilesAtLevel(2)); // A snapshot protects the range tombstone from dropping due to // becoming obsolete. const Snapshot* snapshot = db_->GetSnapshot(); db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(2 * kNumFilesPerLevel)); // Create 2 additional sstables in L0. Note that the first sstable // contains the range tombstone. // [key000000#3,1, key000004#72057594037927935,15] // [key000001#5,1, key000002#6,1] Random rnd(301); std::string value = RandomString(&rnd, kValueBytes); for (int j = 0; j < kNumFilesPerLevel; ++j) { // Give files overlapping key-ranges to prevent a trivial move when we // compact from L0 to L1. ASSERT_OK(Put(Key(j), value)); ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value)); ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(j + 1, NumTableFilesAtLevel(0)); } // Compact the 2 L0 sstables to L1, resulting in the following LSM. There // are 2 sstables generated in L1 due to the target_file_size_base setting. // L1: // [key000000#3,1, key000002#72057594037927935,15] // [key000002#6,1, key000004#72057594037927935,15] // L2: // [key000000#1,1, key000000#1,1] MoveFilesToLevel(1); ASSERT_EQ(2, NumTableFilesAtLevel(1)); { // Compact the second sstable in L1: // L1: // [key000000#3,1, key000002#72057594037927935,15] // L2: // [key000000#1,1, key000000#1,1] // [key000002#6,1, key000004#72057594037927935,15] // // At the same time, verify the compaction does not cause the key at the // endpoint (key000002#6,1) to disappear. ASSERT_EQ(value, Get(Key(2))); auto begin_str = Key(3); const ROCKSDB_NAMESPACE::Slice begin = begin_str; dbfull()->TEST_CompactRange(1, &begin, nullptr); ASSERT_EQ(1, NumTableFilesAtLevel(1)); ASSERT_EQ(2, NumTableFilesAtLevel(2)); ASSERT_EQ(value, Get(Key(2))); } { // Compact the first sstable in L1. This should be copacetic, but // was previously resulting in overlapping sstables in L2 due to // mishandling of the range tombstone end-key when used as the // largest key for an sstable. The resulting LSM structure should // be: // // L2: // [key000000#1,1, key000001#72057594037927935,15] // [key000001#5,1, key000002#72057594037927935,15] // [key000002#6,1, key000004#72057594037927935,15] auto begin_str = Key(0); const ROCKSDB_NAMESPACE::Slice begin = begin_str; dbfull()->TEST_CompactRange(1, &begin, &begin); ASSERT_EQ(0, NumTableFilesAtLevel(1)); ASSERT_EQ(3, NumTableFilesAtLevel(2)); } db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, UnorderedTombstones) { // Regression test for #2752. Range delete tombstones between // different snapshot stripes are not stored in order, so the first // tombstone of each snapshot stripe should be checked as a smallest // candidate. Options options = CurrentOptions(); DestroyAndReopen(options); auto cf = db_->DefaultColumnFamily(); ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a")); ASSERT_OK(db_->Flush(FlushOptions(), cf)); ASSERT_EQ(1, NumTableFilesAtLevel(0)); ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); ASSERT_EQ(1, NumTableFilesAtLevel(1)); ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c")); // Hold a snapshot to separate these two delete ranges. auto snapshot = db_->GetSnapshot(); ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b")); ASSERT_OK(db_->Flush(FlushOptions(), cf)); db_->ReleaseSnapshot(snapshot); std::vector> files; dbfull()->TEST_GetFilesMetaData(cf, &files); ASSERT_EQ(1, files[0].size()); ASSERT_EQ("a", files[0][0].smallest.user_key()); ASSERT_EQ("c", files[0][0].largest.user_key()); std::string v; auto s = db_->Get(ReadOptions(), "a", &v); ASSERT_TRUE(s.IsNotFound()); } class MockMergeOperator : public MergeOperator { // Mock non-associative operator. Non-associativity is expressed by lack of // implementation for any `PartialMerge*` functions. public: bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override { assert(merge_out != nullptr); merge_out->new_value = merge_in.operand_list.back().ToString(); return true; } const char* Name() const override { return "MockMergeOperator"; } }; TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) { // This test uses a non-associative merge operator since that is a convenient // way to get compaction to write out files with overlapping user-keys at the // endpoints. Note, however, overlapping endpoints can also occur with other // value types (Put, etc.), assuming the right snapshots are present. const int kFileBytes = 1 << 20; const int kValueBytes = 1 << 10; const int kNumFiles = 4; Options options = CurrentOptions(); options.compression = kNoCompression; options.disable_auto_compactions = true; options.merge_operator.reset(new MockMergeOperator()); options.target_file_size_base = kFileBytes; Reopen(options); // Push dummy data to L3 so that our actual test files on L0-L2 // will not be considered "bottommost" level, otherwise compaction // may prevent us from creating overlapping user keys // as on the bottommost layer MergeHelper ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy")); ASSERT_OK(db_->Flush(FlushOptions())); MoveFilesToLevel(3); Random rnd(301); const Snapshot* snapshot = nullptr; for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kFileBytes / kValueBytes; ++j) { auto value = RandomString(&rnd, kValueBytes); ASSERT_OK(db_->Merge(WriteOptions(), "key", value)); } if (i == kNumFiles - 1) { // Take snapshot to prevent covered merge operands from being dropped by // compaction. snapshot = db_->GetSnapshot(); // The DeleteRange is the last write so all merge operands are covered. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key", "key_")); } ASSERT_OK(db_->Flush(FlushOptions())); } ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); std::string value; ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */, nullptr /* end */, nullptr /* column_family */, true /* disallow_trivial_move */); ASSERT_EQ(0, NumTableFilesAtLevel(0)); // Now we have multiple files at L1 all containing a single user key, thus // guaranteeing overlap in the file endpoints. ASSERT_GT(NumTableFilesAtLevel(1), 1); // Verify no merge operands reappeared after the compaction. ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); // Compact and verify again. It's worthwhile because now the files have // tighter endpoints, so we can verify that doesn't mess anything up. dbfull()->TEST_CompactRange(1 /* level */, nullptr /* begin */, nullptr /* end */, nullptr /* column_family */, true /* disallow_trivial_move */); ASSERT_GT(NumTableFilesAtLevel(2), 1); ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound()); db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) { // Verify a key newer than a range tombstone cannot be deleted by being // compacted to the bottom level (and thus having its seqnum zeroed) before // the range tombstone. This used to happen when range tombstones were // untruncated on reads such that they extended past their file boundaries. // // Test summary: // // - L1 is bottommost. // - A couple snapshots are strategically taken to prevent seqnums from being // zeroed, range tombstone from being dropped, merge operands from being // dropped, and merge operands from being combined. // - Left half of files in L1 all have same user key, ensuring their file // boundaries overlap. In the past this would cause range tombstones to be // untruncated. // - Right half of L1 files all have different keys, ensuring no overlap. // - A range tombstone spans all L1 keys, so it is stored in every L1 file. // - Keys in the right side of the key-range are overwritten. These are // compacted down to L1 after releasing snapshots such that their seqnums // will be zeroed. // - A full range scan is performed. If the tombstone in the left L1 files // were untruncated, it would now cover keys newer than it (but with zeroed // seqnums) in the right L1 files. const int kFileBytes = 1 << 20; const int kValueBytes = 1 << 10; const int kNumFiles = 4; const int kMaxKey = kNumFiles* kFileBytes / kValueBytes; const int kKeysOverwritten = 10; Options options = CurrentOptions(); options.compression = kNoCompression; options.disable_auto_compactions = true; options.merge_operator.reset(new MockMergeOperator()); options.num_levels = 2; options.target_file_size_base = kFileBytes; Reopen(options); Random rnd(301); // - snapshots[0] prevents merge operands from being combined during // compaction. // - snapshots[1] prevents merge operands from being dropped due to the // covering range tombstone. const Snapshot* snapshots[] = {nullptr, nullptr}; for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kFileBytes / kValueBytes; ++j) { auto value = RandomString(&rnd, kValueBytes); std::string key; if (i < kNumFiles / 2) { key = Key(0); } else { key = Key(1 + i * kFileBytes / kValueBytes + j); } ASSERT_OK(db_->Merge(WriteOptions(), key, value)); } if (i == 0) { snapshots[0] = db_->GetSnapshot(); } if (i == kNumFiles - 1) { snapshots[1] = db_->GetSnapshot(); // The DeleteRange is the last write so all merge operands are covered. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(kMaxKey + 1))); } ASSERT_OK(db_->Flush(FlushOptions())); } ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); auto get_key_count = [this]() -> int { auto* iter = db_->NewIterator(ReadOptions()); iter->SeekToFirst(); int keys_found = 0; for (; iter->Valid(); iter->Next()) { ++keys_found; } delete iter; return keys_found; }; // All keys should be covered ASSERT_EQ(0, get_key_count()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */, nullptr /* end_key */)); ASSERT_EQ(0, NumTableFilesAtLevel(0)); // Roughly the left half of L1 files should have overlapping boundary keys, // while the right half should not. ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles); // Now overwrite a few keys that are in L1 files that definitely don't have // overlapping boundary keys. for (int i = kMaxKey; i > kMaxKey - kKeysOverwritten; --i) { auto value = RandomString(&rnd, kValueBytes); ASSERT_OK(db_->Merge(WriteOptions(), Key(i), value)); } ASSERT_OK(db_->Flush(FlushOptions())); // The overwritten keys are in L0 now, so clearly aren't covered by the range // tombstone in L1. ASSERT_EQ(kKeysOverwritten, get_key_count()); // Release snapshots so seqnums can be zeroed when L0->L1 happens. db_->ReleaseSnapshot(snapshots[0]); db_->ReleaseSnapshot(snapshots[1]); auto begin_key_storage = Key(kMaxKey - kKeysOverwritten + 1); auto end_key_storage = Key(kMaxKey); Slice begin_key(begin_key_storage); Slice end_key(end_key_storage); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_key, &end_key)); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles); ASSERT_EQ(kKeysOverwritten, get_key_count()); } TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) { // Exposes a bug where we were using // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands // in the forward direction. Confusingly, this case happened during // `DBIter::Prev`. It could cause assertion failure, or reappearing keys. const int kFileBytes = 1 << 20; const int kValueBytes = 1 << 10; // Need multiple keys so we can get results when calling `Prev()` after // `SeekToLast()`. const int kNumKeys = 3; const int kNumFiles = 4; Options options = CurrentOptions(); options.compression = kNoCompression; options.disable_auto_compactions = true; options.merge_operator.reset(new MockMergeOperator()); options.target_file_size_base = kFileBytes; Reopen(options); Random rnd(301); const Snapshot* snapshot = nullptr; for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kFileBytes / kValueBytes; ++j) { auto value = RandomString(&rnd, kValueBytes); ASSERT_OK(db_->Merge(WriteOptions(), Key(j % kNumKeys), value)); if (i == 0 && j == kNumKeys) { // Take snapshot to prevent covered merge operands from being dropped or // merged by compaction. snapshot = db_->GetSnapshot(); // Do a DeleteRange near the beginning so only the oldest merge operand // for each key is covered. This ensures the sequence of events: // // - `DBIter::Prev()` is called // - After several same versions of the same user key are encountered, // it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`. // - Binary searches to the newest version of the key, which is in the // leftmost file containing the user key. // - Scans forwards to collect all merge operands. Eventually reaches // the rightmost file containing the oldest merge operand, which // should be covered by the `DeleteRange`. If `RangeDelAggregator` // were not properly using `kForwardTraversal` here, that operand // would reappear. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(kNumKeys + 1))); } } ASSERT_OK(db_->Flush(FlushOptions())); } ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0)); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */, nullptr /* end_key */)); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(1), 1); auto* iter = db_->NewIterator(ReadOptions()); iter->SeekToLast(); int keys_found = 0; for (; iter->Valid(); iter->Prev()) { ++keys_found; } delete iter; ASSERT_EQ(kNumKeys, keys_found); db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) { const int kFileBytes = 1 << 20; Options options = CurrentOptions(); options.compression = kNoCompression; options.disable_auto_compactions = true; options.target_file_size_base = kFileBytes; Reopen(options); ASSERT_OK(Put(Key(0), "a")); const Snapshot* snapshot = db_->GetSnapshot(); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(10))); db_->Flush(FlushOptions()); ReadOptions read_opts; read_opts.snapshot = snapshot; auto* iter = db_->NewIterator(read_opts); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(Key(0), iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); delete iter; db_->ReleaseSnapshot(snapshot); } TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) { const int kFileBytes = 1 << 20; Options options = CurrentOptions(); options.compression = kNoCompression; options.disable_auto_compactions = true; options.target_file_size_base = kFileBytes; Reopen(options); // block flush thread -> pin immtables in memory SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency({ {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator", "DBImpl::BGWorkFlush"}, }); SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(Put(Key(0), "a")); std::unique_ptr> snapshot(db_->GetSnapshot(), [this](const Snapshot* s) { db_->ReleaseSnapshot(s); }); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(10))); ASSERT_OK(dbfull()->TEST_SwitchMemtable()); ReadOptions read_opts; read_opts.snapshot = snapshot.get(); std::unique_ptr iter(db_->NewIterator(read_opts)); TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator"); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(Key(0), iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); } TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) { // Adapted from // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398. // Regression test for issue where range tombstone was written to more files // than necessary when it began exactly at the begin key in the next // compaction output file. const int kFileBytes = 1 << 20; const int kValueBytes = 4 << 10; Options options = CurrentOptions(); options.compression = kNoCompression; options.disable_auto_compactions = true; // Have a bit of slack in the size limits but we enforce them more strictly // when manually flushing/compacting. options.max_compaction_bytes = 2 * kFileBytes; options.target_file_size_base = 2 * kFileBytes; options.write_buffer_size = 2 * kFileBytes; Reopen(options); Random rnd(301); for (char first_char : {'a', 'b', 'c'}) { for (int i = 0; i < kFileBytes / kValueBytes; ++i) { std::string key(1, first_char); key.append(Key(i)); std::string value = RandomString(&rnd, kValueBytes); ASSERT_OK(Put(key, value)); } db_->Flush(FlushOptions()); MoveFilesToLevel(2); } ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(3, NumTableFilesAtLevel(2)); // Populate the memtable lightly while spanning the whole key-space. The // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple // files to prevent a large L1->L2 compaction later. ASSERT_OK(Put("a", "val")); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "c" + Key(1), "d")); // Our compaction output file cutting logic currently only considers point // keys. So, in order for the range tombstone to have a chance at landing at // the start of a new file, we need a point key at the range tombstone's // start. // TODO(ajkr): remove this `Put` after file cutting accounts for range // tombstones (#3977). ASSERT_OK(Put("c" + Key(1), "value")); db_->Flush(FlushOptions()); // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone // and the range tombstone is only placed in the second SST. std::string begin_key_storage("c" + Key(1)); Slice begin_key(begin_key_storage); std::string end_key_storage("d"); Slice end_key(end_key_storage); dbfull()->TEST_CompactRange(0 /* level */, &begin_key /* begin */, &end_key /* end */, nullptr /* column_family */, true /* disallow_trivial_move */); ASSERT_EQ(2, NumTableFilesAtLevel(1)); std::vector all_metadata; std::vector l1_metadata; db_->GetLiveFilesMetaData(&all_metadata); for (const auto& metadata : all_metadata) { if (metadata.level == 1) { l1_metadata.push_back(metadata); } } std::sort(l1_metadata.begin(), l1_metadata.end(), [&](const LiveFileMetaData& a, const LiveFileMetaData& b) { return options.comparator->Compare(a.smallestkey, b.smallestkey) < 0; }); ASSERT_EQ("a", l1_metadata[0].smallestkey); ASSERT_EQ("a", l1_metadata[0].largestkey); ASSERT_EQ("c" + Key(1), l1_metadata[1].smallestkey); ASSERT_EQ("d", l1_metadata[1].largestkey); TablePropertiesCollection all_table_props; ASSERT_OK(db_->GetPropertiesOfAllTables(&all_table_props)); int64_t num_range_deletions = 0; for (const auto& name_and_table_props : all_table_props) { const auto& name = name_and_table_props.first; const auto& table_props = name_and_table_props.second; // The range tombstone should only be output to the second L1 SST. if (name.size() >= l1_metadata[1].name.size() && name.substr(name.size() - l1_metadata[1].name.size()).compare(l1_metadata[1].name) == 0) { ASSERT_EQ(1, table_props->num_range_deletions); ++num_range_deletions; } else { ASSERT_EQ(0, table_props->num_range_deletions); } } ASSERT_EQ(1, num_range_deletions); } TEST_F(DBRangeDelTest, OverlappedTombstones) { const int kNumPerFile = 4, kNumFiles = 2; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.max_compaction_bytes = 9 * 1024; DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < kNumFiles; ++i) { std::vector values; // Write 12K (4 values, each 3K) for (int j = 0; j < kNumPerFile; j++) { values.push_back(RandomString(&rnd, 3 << 10)); ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); } } ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(1, NumTableFilesAtLevel(0)); MoveFilesToLevel(2); ASSERT_EQ(2, NumTableFilesAtLevel(2)); ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1), Key((kNumFiles)*kNumPerFile + 1))); ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(1, NumTableFilesAtLevel(0)); dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); // The tombstone range is not broken up into multiple SSTs which may incur a // large compaction with L2. ASSERT_EQ(1, NumTableFilesAtLevel(1)); std::vector> files; dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); ASSERT_EQ(1, NumTableFilesAtLevel(2)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); } TEST_F(DBRangeDelTest, OverlappedKeys) { const int kNumPerFile = 4, kNumFiles = 2; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.max_compaction_bytes = 9 * 1024; DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < kNumFiles; ++i) { std::vector values; // Write 12K (4 values, each 3K) for (int j = 0; j < kNumPerFile; j++) { values.push_back(RandomString(&rnd, 3 << 10)); ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j])); } } ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(1, NumTableFilesAtLevel(0)); MoveFilesToLevel(2); ASSERT_EQ(2, NumTableFilesAtLevel(2)); for (int i = 1; i < kNumFiles * kNumPerFile + 1; i++) { ASSERT_OK(Put(Key(i), "0x123")); } ASSERT_OK(db_->Flush(FlushOptions())); ASSERT_EQ(1, NumTableFilesAtLevel(0)); // The key range is broken up into three SSTs to avoid a future big compaction // with the grandparent dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); ASSERT_EQ(3, NumTableFilesAtLevel(1)); std::vector> files; dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, true /* disallow_trivial_move */); ASSERT_EQ(1, NumTableFilesAtLevel(2)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_sst_test.cc000066400000000000000000001230201370372246700164540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "file/sst_file_manager_impl.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/sst_file_manager.h" namespace ROCKSDB_NAMESPACE { class DBSSTTest : public DBTestBase { public: DBSSTTest() : DBTestBase("/db_sst_test") {} }; #ifndef ROCKSDB_LITE // A class which remembers the name of each flushed file. class FlushedFileCollector : public EventListener { public: FlushedFileCollector() {} ~FlushedFileCollector() override {} void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { std::lock_guard lock(mutex_); flushed_files_.push_back(info.file_path); } std::vector GetFlushedFiles() { std::lock_guard lock(mutex_); std::vector result; for (auto fname : flushed_files_) { result.push_back(fname); } return result; } void ClearFlushedFiles() { std::lock_guard lock(mutex_); flushed_files_.clear(); } private: std::vector flushed_files_; std::mutex mutex_; }; #endif // ROCKSDB_LITE TEST_F(DBSSTTest, DontDeletePendingOutputs) { Options options; options.env = env_; options.create_if_missing = true; DestroyAndReopen(options); // Every time we write to a table file, call FOF/POF with full DB scan. This // will make sure our pending_outputs_ protection work correctly std::function purge_obsolete_files_function = [&]() { JobContext job_context(0); dbfull()->TEST_LockMutex(); dbfull()->FindObsoleteFiles(&job_context, true /*force*/); dbfull()->TEST_UnlockMutex(); dbfull()->PurgeObsoleteFiles(job_context); job_context.Clean(); }; env_->table_write_callback_ = &purge_obsolete_files_function; for (int i = 0; i < 2; ++i) { ASSERT_OK(Put("a", "begin")); ASSERT_OK(Put("z", "end")); ASSERT_OK(Flush()); } // If pending output guard does not work correctly, PurgeObsoleteFiles() will // delete the file that Compaction is trying to create, causing this: error // db/db_test.cc:975: IO error: // /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory Compact("a", "b"); } // 1 Create some SST files by inserting K-V pairs into DB // 2 Close DB and change suffix from ".sst" to ".ldb" for every other SST file // 3 Open DB and check if all key can be read TEST_F(DBSSTTest, SSTsWithLdbSuffixHandling) { Options options = CurrentOptions(); options.write_buffer_size = 110 << 10; // 110KB options.num_levels = 4; DestroyAndReopen(options); Random rnd(301); int key_id = 0; for (int i = 0; i < 10; ++i) { GenerateNewFile(&rnd, &key_id, false); } Flush(); Close(); int const num_files = GetSstFileCount(dbname_); ASSERT_GT(num_files, 0); Reopen(options); std::vector values; values.reserve(key_id); for (int k = 0; k < key_id; ++k) { values.push_back(Get(Key(k))); } Close(); std::vector filenames; GetSstFiles(env_, dbname_, &filenames); int num_ldb_files = 0; for (size_t i = 0; i < filenames.size(); ++i) { if (i & 1) { continue; } std::string const rdb_name = dbname_ + "/" + filenames[i]; std::string const ldb_name = Rocks2LevelTableFileName(rdb_name); ASSERT_TRUE(env_->RenameFile(rdb_name, ldb_name).ok()); ++num_ldb_files; } ASSERT_GT(num_ldb_files, 0); ASSERT_EQ(num_files, GetSstFileCount(dbname_)); Reopen(options); for (int k = 0; k < key_id; ++k) { ASSERT_EQ(values[k], Get(Key(k))); } Destroy(options); } // Check that we don't crash when opening DB with // DBOptions::skip_checking_sst_file_sizes_on_db_open = true. TEST_F(DBSSTTest, SkipCheckingSSTFileSizesOnDBOpen) { ASSERT_OK(Put("pika", "choo")); ASSERT_OK(Flush()); // Just open the DB with the option set to true and check that we don't crash. Options options; options.skip_checking_sst_file_sizes_on_db_open = true; Reopen(options); ASSERT_EQ("choo", Get("pika")); } #ifndef ROCKSDB_LITE TEST_F(DBSSTTest, DontDeleteMovedFile) { // This test triggers move compaction and verifies that the file is not // deleted when it's part of move compaction Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; options.max_bytes_for_level_base = 1024 * 1024; // 1 MB options.level0_file_num_compaction_trigger = 2; // trigger compaction when we have 2 files DestroyAndReopen(options); Random rnd(301); // Create two 1MB sst files for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); } ASSERT_OK(Flush()); } // this should execute both L0->L1 and L1->(move)->L2 compactions dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,0,1", FilesPerLevel(0)); // If the moved file is actually deleted (the move-safeguard in // ~Version::Version() is not there), we get this failure: // Corruption: Can't access /000009.sst Reopen(options); } // This reproduces a bug where we don't delete a file because when it was // supposed to be deleted, it was blocked by pending_outputs // Consider: // 1. current file_number is 13 // 2. compaction (1) starts, blocks deletion of all files starting with 13 // (pending outputs) // 3. file 13 is created by compaction (2) // 4. file 13 is consumed by compaction (3) and file 15 was created. Since file // 13 has no references, it is put into VersionSet::obsolete_files_ // 5. FindObsoleteFiles() gets file 13 from VersionSet::obsolete_files_. File 13 // is deleted from obsolete_files_ set. // 6. PurgeObsoleteFiles() tries to delete file 13, but this file is blocked by // pending outputs since compaction (1) is still running. It is not deleted and // it is not present in obsolete_files_ anymore. Therefore, we never delete it. TEST_F(DBSSTTest, DeleteObsoleteFilesPendingOutputs) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 2 * 1024 * 1024; // 2 MB options.max_bytes_for_level_base = 1024 * 1024; // 1 MB options.level0_file_num_compaction_trigger = 2; // trigger compaction when we have 2 files options.max_background_flushes = 2; options.max_background_compactions = 2; OnFileDeletionListener* listener = new OnFileDeletionListener(); options.listeners.emplace_back(listener); Reopen(options); Random rnd(301); // Create two 1MB sst files for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); } ASSERT_OK(Flush()); } // this should execute both L0->L1 and L1->(move)->L2 compactions dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,0,1", FilesPerLevel(0)); test::SleepingBackgroundTask blocking_thread; port::Mutex mutex_; bool already_blocked(false); // block the flush std::function block_first_time = [&]() { bool blocking = false; { MutexLock l(&mutex_); if (!already_blocked) { blocking = true; already_blocked = true; } } if (blocking) { blocking_thread.DoSleep(); } }; env_->table_write_callback_ = &block_first_time; // Insert 2.5MB data, which should trigger a flush because we exceed // write_buffer_size. The flush will be blocked with block_first_time // pending_file is protecting all the files created after for (int j = 0; j < 256; ++j) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 10 * 1024))); } blocking_thread.WaitUntilSleeping(); ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr)); ASSERT_EQ("0,0,0,1", FilesPerLevel(0)); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 1U); auto file_on_L2 = metadata[0].name; listener->SetExpectedFileName(dbname_ + file_on_L2); ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, true /* disallow trivial move */)); ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0)); // finish the flush! blocking_thread.WakeUp(); blocking_thread.WaitUntilDone(); dbfull()->TEST_WaitForFlushMemTable(); // File just flushed is too big for L0 and L1 so gets moved to L2. dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,0,1,0,1", FilesPerLevel(0)); metadata.clear(); db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(metadata.size(), 2U); // This file should have been deleted during last compaction ASSERT_EQ(Status::NotFound(), env_->FileExists(dbname_ + file_on_L2)); listener->VerifyMatchedCount(1); } TEST_F(DBSSTTest, DBWithSstFileManager) { std::shared_ptr sst_file_manager(NewSstFileManager(env_)); auto sfm = static_cast(sst_file_manager.get()); int files_added = 0; int files_deleted = 0; int files_moved = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SstFileManagerImpl::OnAddFile", [&](void* /*arg*/) { files_added++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SstFileManagerImpl::OnDeleteFile", [&](void* /*arg*/) { files_deleted++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.sst_file_manager = sst_file_manager; DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 25; i++) { GenerateNewRandomFile(&rnd); ASSERT_OK(Flush()); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); // Verify that we are tracking all sst files in dbname_ ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles()); } ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); auto files_in_db = GetAllSSTFiles(); // Verify that we are tracking all sst files in dbname_ ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); // Verify the total files size uint64_t total_files_size = 0; for (auto& file_to_size : files_in_db) { total_files_size += file_to_size.second; } ASSERT_EQ(sfm->GetTotalSize(), total_files_size); // We flushed at least 25 files ASSERT_GE(files_added, 25); // Compaction must have deleted some files ASSERT_GT(files_deleted, 0); // No files were moved ASSERT_EQ(files_moved, 0); Close(); Reopen(options); ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); ASSERT_EQ(sfm->GetTotalSize(), total_files_size); // Verify that we track all the files again after the DB is closed and opened Close(); sst_file_manager.reset(NewSstFileManager(env_)); options.sst_file_manager = sst_file_manager; sfm = static_cast(sst_file_manager.get()); Reopen(options); ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); ASSERT_EQ(sfm->GetTotalSize(), total_files_size); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBSSTTest, RateLimitedDelete) { Destroy(last_options_); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBSSTTest::RateLimitedDelete:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); std::vector penalties; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { // Turn timed wait into a simulated sleep uint64_t* abs_time_us = static_cast(arg); uint64_t cur_time = env_->NowMicros(); if (*abs_time_us > cur_time) { env_->addon_time_.fetch_add(*abs_time_us - cur_time); } // Randomly sleep shortly env_->addon_time_.fetch_add( static_cast(Random::GetTLSInstance()->Uniform(10))); // Set wait until time to before (actual) current time to force not // to sleep *abs_time_us = Env::Default()->NowMicros(); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); env_->SetTimeElapseOnlySleep(&options); options.disable_auto_compactions = true; options.env = env_; options.statistics = CreateDBStatistics(); int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec Status s; options.sst_file_manager.reset( NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); ASSERT_OK(s); options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); auto sfm = static_cast(options.sst_file_manager.get()); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1); WriteOptions wo; wo.disableWAL = true; ASSERT_OK(TryReopen(options)); // Create 4 files in L0 for (char v = 'a'; v <= 'd'; v++) { ASSERT_OK(Put("Key2", DummyString(1024, v), wo)); ASSERT_OK(Put("Key3", DummyString(1024, v), wo)); ASSERT_OK(Put("Key4", DummyString(1024, v), wo)); ASSERT_OK(Put("Key1", DummyString(1024, v), wo)); ASSERT_OK(Put("Key4", DummyString(1024, v), wo)); ASSERT_OK(Flush()); } // We created 4 sst files in L0 ASSERT_EQ("4", FilesPerLevel(0)); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); // Compaction will move the 4 files in L0 to trash and create 1 L1 file ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); ASSERT_EQ("0,1", FilesPerLevel(0)); uint64_t delete_start_time = env_->NowMicros(); // Hold BackgroundEmptyTrash TEST_SYNC_POINT("DBSSTTest::RateLimitedDelete:1"); sfm->WaitForEmptyTrash(); uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; uint64_t total_files_size = 0; uint64_t expected_penlty = 0; ASSERT_EQ(penalties.size(), metadata.size()); for (size_t i = 0; i < metadata.size(); i++) { total_files_size += metadata[i].size; expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec); ASSERT_EQ(expected_penlty, penalties[i]); } ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); ASSERT_LT(time_spent_deleting, expected_penlty * 1.1); ASSERT_EQ(4, options.statistics->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ( 0, options.statistics->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBSSTTest, RateLimitedWALDelete) { Destroy(last_options_); std::vector penalties; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); env_->no_slowdown_ = true; env_->time_elapse_only_sleep_ = true; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.compression = kNoCompression; options.env = env_; int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec Status s; options.sst_file_manager.reset( NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); ASSERT_OK(s); options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); auto sfm = static_cast(options.sst_file_manager.get()); sfm->delete_scheduler()->SetMaxTrashDBRatio(3.1); ASSERT_OK(TryReopen(options)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Create 4 files in L0 for (char v = 'a'; v <= 'd'; v++) { ASSERT_OK(Put("Key2", DummyString(1024, v))); ASSERT_OK(Put("Key3", DummyString(1024, v))); ASSERT_OK(Put("Key4", DummyString(1024, v))); ASSERT_OK(Put("Key1", DummyString(1024, v))); ASSERT_OK(Put("Key4", DummyString(1024, v))); ASSERT_OK(Flush()); } // We created 4 sst files in L0 ASSERT_EQ("4", FilesPerLevel(0)); // Compaction will move the 4 files in L0 to trash and create 1 L1 file CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); ASSERT_EQ("0,1", FilesPerLevel(0)); sfm->WaitForEmptyTrash(); ASSERT_EQ(penalties.size(), 8); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } class DBWALTestWithParam : public DBSSTTest, public testing::WithParamInterface> { public: DBWALTestWithParam() { wal_dir_ = std::get<0>(GetParam()); wal_dir_same_as_dbname_ = std::get<1>(GetParam()); } std::string wal_dir_; bool wal_dir_same_as_dbname_; }; TEST_P(DBWALTestWithParam, WALTrashCleanupOnOpen) { class MyEnv : public EnvWrapper { public: MyEnv(Env* t) : EnvWrapper(t), fake_log_delete(false) {} Status DeleteFile(const std::string& fname) { if (fname.find(".log.trash") != std::string::npos && fake_log_delete) { return Status::OK(); } return target()->DeleteFile(fname); } void set_fake_log_delete(bool fake) { fake_log_delete = fake; } private: bool fake_log_delete; }; std::unique_ptr env(new MyEnv(Env::Default())); Destroy(last_options_); env->set_fake_log_delete(true); Options options = CurrentOptions(); options.disable_auto_compactions = true; options.compression = kNoCompression; options.env = env.get(); options.wal_dir = dbname_ + wal_dir_; int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec Status s; options.sst_file_manager.reset( NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); ASSERT_OK(s); options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); auto sfm = static_cast(options.sst_file_manager.get()); sfm->delete_scheduler()->SetMaxTrashDBRatio(3.1); ASSERT_OK(TryReopen(options)); // Create 4 files in L0 for (char v = 'a'; v <= 'd'; v++) { ASSERT_OK(Put("Key2", DummyString(1024, v))); ASSERT_OK(Put("Key3", DummyString(1024, v))); ASSERT_OK(Put("Key4", DummyString(1024, v))); ASSERT_OK(Put("Key1", DummyString(1024, v))); ASSERT_OK(Put("Key4", DummyString(1024, v))); ASSERT_OK(Flush()); } // We created 4 sst files in L0 ASSERT_EQ("4", FilesPerLevel(0)); Close(); options.sst_file_manager.reset(); std::vector filenames; int trash_log_count = 0; if (!wal_dir_same_as_dbname_) { // Forcibly create some trash log files std::unique_ptr result; env->NewWritableFile(options.wal_dir + "/1000.log.trash", &result, EnvOptions()); result.reset(); } env->GetChildren(options.wal_dir, &filenames); for (const std::string& fname : filenames) { if (fname.find(".log.trash") != std::string::npos) { trash_log_count++; } } ASSERT_GE(trash_log_count, 1); env->set_fake_log_delete(false); ASSERT_OK(TryReopen(options)); filenames.clear(); trash_log_count = 0; env->GetChildren(options.wal_dir, &filenames); for (const std::string& fname : filenames) { if (fname.find(".log.trash") != std::string::npos) { trash_log_count++; } } ASSERT_EQ(trash_log_count, 0); Close(); } INSTANTIATE_TEST_CASE_P(DBWALTestWithParam, DBWALTestWithParam, ::testing::Values(std::make_tuple("", true), std::make_tuple("_wal_dir", false))); TEST_F(DBSSTTest, OpenDBWithExistingTrash) { Options options = CurrentOptions(); options.sst_file_manager.reset( NewSstFileManager(env_, nullptr, "", 1024 * 1024 /* 1 MB/sec */)); auto sfm = static_cast(options.sst_file_manager.get()); Destroy(last_options_); // Add some trash files to the db directory so the DB can clean them up env_->CreateDirIfMissing(dbname_); ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash")); ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash")); ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash")); // Reopen the DB and verify that it deletes existing trash files ASSERT_OK(TryReopen(options)); sfm->WaitForEmptyTrash(); ASSERT_NOK(env_->FileExists(dbname_ + "/" + "001.sst.trash")); ASSERT_NOK(env_->FileExists(dbname_ + "/" + "002.sst.trash")); ASSERT_NOK(env_->FileExists(dbname_ + "/" + "003.sst.trash")); } // Create a DB with 2 db_paths, and generate multiple files in the 2 // db_paths using CompactRangeOptions, make sure that files that were // deleted from first db_path were deleted using DeleteScheduler and // files in the second path were not. TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) { std::atomic bg_delete_file(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); // The deletion scheduler sometimes skips marking file as trash according to // a heuristic. In that case the deletion will go through the below SyncPoint. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); Options options = CurrentOptions(); options.disable_auto_compactions = true; options.db_paths.emplace_back(dbname_, 1024 * 100); options.db_paths.emplace_back(dbname_ + "_2", 1024 * 100); options.env = env_; int64_t rate_bytes_per_sec = 1024 * 1024; // 1 Mb / Sec Status s; options.sst_file_manager.reset( NewSstFileManager(env_, nullptr, "", rate_bytes_per_sec, false, &s, /* max_trash_db_ratio= */ 1.1)); ASSERT_OK(s); auto sfm = static_cast(options.sst_file_manager.get()); DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; wo.disableWAL = true; // Create 4 files in L0 for (int i = 0; i < 4; i++) { ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'), wo)); ASSERT_OK(Flush()); } // We created 4 sst files in L0 ASSERT_EQ("4", FilesPerLevel(0)); // Compaction will delete files from L0 in first db path and generate a new // file in L1 in second db path CompactRangeOptions compact_options; compact_options.target_path_id = 1; Slice begin("Key0"); Slice end("Key3"); ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); ASSERT_EQ("0,1", FilesPerLevel(0)); // Create 4 files in L0 for (int i = 4; i < 8; i++) { ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'), wo)); ASSERT_OK(Flush()); } ASSERT_EQ("4,1", FilesPerLevel(0)); // Compaction will delete files from L0 in first db path and generate a new // file in L1 in second db path begin = "Key4"; end = "Key7"; ASSERT_OK(db_->CompactRange(compact_options, &begin, &end)); ASSERT_EQ("0,2", FilesPerLevel(0)); sfm->WaitForEmptyTrash(); ASSERT_EQ(bg_delete_file, 8); // Compaction will delete both files and regenerate a file in L1 in second // db path. The deleted files should still be cleaned up via delete scheduler. compact_options.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); ASSERT_EQ("0,1", FilesPerLevel(0)); sfm->WaitForEmptyTrash(); ASSERT_EQ(bg_delete_file, 10); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) { int bg_delete_file = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Status s; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.env = env_; options.sst_file_manager.reset( NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); ASSERT_OK(s); DestroyAndReopen(options); // Create 4 files in L0 for (int i = 0; i < 4; i++) { ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'))); ASSERT_OK(Flush()); } // We created 4 sst files in L0 ASSERT_EQ("4", FilesPerLevel(0)); // Close DB and destroy it using DeleteScheduler Close(); int num_sst_files = 0; int num_wal_files = 0; std::vector db_files; env_->GetChildren(dbname_, &db_files); for (std::string f : db_files) { if (f.substr(f.find_last_of(".") + 1) == "sst") { num_sst_files++; } else if (f.substr(f.find_last_of(".") + 1) == "log") { num_wal_files++; } } ASSERT_GT(num_sst_files, 0); ASSERT_GT(num_wal_files, 0); auto sfm = static_cast(options.sst_file_manager.get()); sfm->SetDeleteRateBytesPerSecond(1024 * 1024); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1); ASSERT_OK(DestroyDB(dbname_, options)); sfm->WaitForEmptyTrash(); ASSERT_EQ(bg_delete_file, num_sst_files + num_wal_files); } TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) { std::shared_ptr sst_file_manager(NewSstFileManager(env_)); auto sfm = static_cast(sst_file_manager.get()); Options options = CurrentOptions(); options.sst_file_manager = sst_file_manager; options.disable_auto_compactions = true; DestroyAndReopen(options); Random rnd(301); // Generate a file containing 100 keys. for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); uint64_t first_file_size = 0; auto files_in_db = GetAllSSTFiles(&first_file_size); ASSERT_EQ(sfm->GetTotalSize(), first_file_size); // Set the maximum allowed space usage to the current total size sfm->SetMaxAllowedSpaceUsage(first_file_size + 1); ASSERT_OK(Put("key1", "val1")); // This flush will cause bg_error_ and will fail ASSERT_NOK(Flush()); } TEST_F(DBSSTTest, CancellingCompactionsWorks) { std::shared_ptr sst_file_manager(NewSstFileManager(env_)); auto sfm = static_cast(sst_file_manager.get()); Options options = CurrentOptions(); options.sst_file_manager = sst_file_manager; options.level0_file_num_compaction_trigger = 2; options.statistics = CreateDBStatistics(); DestroyAndReopen(options); int completed_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction():CancelledCompaction", [&](void* /*arg*/) { sfm->SetMaxAllowedSpaceUsage(0); ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) { completed_compactions++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); // Generate a file containing 10 keys. for (int i = 0; i < 10; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); uint64_t total_file_size = 0; auto files_in_db = GetAllSSTFiles(&total_file_size); // Set the maximum allowed space usage to the current total size sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1); // Generate another file to trigger compaction. for (int i = 0; i < 10; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); dbfull()->TEST_WaitForCompact(true); // Because we set a callback in CancelledCompaction, we actually // let the compaction run ASSERT_GT(completed_compactions, 0); ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); // Make sure the stat is bumped ASSERT_GT(dbfull()->immutable_db_options().statistics.get()->getTickerCount(COMPACTION_CANCELLED), 0); ASSERT_EQ(0, dbfull()->immutable_db_options().statistics.get()->getTickerCount( FILES_MARKED_TRASH)); ASSERT_EQ(4, dbfull()->immutable_db_options().statistics.get()->getTickerCount( FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBSSTTest, CancellingManualCompactionsWorks) { std::shared_ptr sst_file_manager(NewSstFileManager(env_)); auto sfm = static_cast(sst_file_manager.get()); Options options = CurrentOptions(); options.sst_file_manager = sst_file_manager; options.statistics = CreateDBStatistics(); FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); DestroyAndReopen(options); Random rnd(301); // Generate a file containing 10 keys. for (int i = 0; i < 10; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); uint64_t total_file_size = 0; auto files_in_db = GetAllSSTFiles(&total_file_size); // Set the maximum allowed space usage to the current total size sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1); // Generate another file to trigger compaction. for (int i = 0; i < 10; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); // OK, now trigger a manual compaction dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); // Wait for manual compaction to get scheduled and finish dbfull()->TEST_WaitForCompact(true); ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); // Make sure the stat is bumped ASSERT_EQ(dbfull()->immutable_db_options().statistics.get()->getTickerCount( COMPACTION_CANCELLED), 1); // Now make sure CompactFiles also gets cancelled auto l0_files = collector->GetFlushedFiles(); dbfull()->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), l0_files, 0); // Wait for manual compaction to get scheduled and finish dbfull()->TEST_WaitForCompact(true); ASSERT_EQ(dbfull()->immutable_db_options().statistics.get()->getTickerCount( COMPACTION_CANCELLED), 2); ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); // Now let the flush through and make sure GetCompactionsReservedSize // returns to normal sfm->SetMaxAllowedSpaceUsage(0); int completed_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactFilesImpl:End", [&](void* /*arg*/) { completed_compactions++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); dbfull()->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), l0_files, 0); dbfull()->TEST_WaitForCompact(true); ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0); ASSERT_GT(completed_compactions, 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBSSTTest, DBWithMaxSpaceAllowedRandomized) { // This test will set a maximum allowed space for the DB, then it will // keep filling the DB until the limit is reached and bg_error_ is set. // When bg_error_ is set we will verify that the DB size is greater // than the limit. std::vector max_space_limits_mbs = {1, 10}; std::atomic bg_error_set(false); std::atomic reached_max_space_on_flush(0); std::atomic reached_max_space_on_compaction(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::FlushMemTableToOutputFile:MaxAllowedSpaceReached", [&](void* arg) { Status* bg_error = static_cast(arg); bg_error_set = true; reached_max_space_on_flush++; // clear error to ensure compaction callback is called *bg_error = Status::OK(); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction():CancelledCompaction", [&](void* arg) { bool* enough_room = static_cast(arg); *enough_room = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::FinishCompactionOutputFile:MaxAllowedSpaceReached", [&](void* /*arg*/) { bg_error_set = true; reached_max_space_on_compaction++; }); for (auto limit_mb : max_space_limits_mbs) { bg_error_set = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::shared_ptr sst_file_manager(NewSstFileManager(env_)); auto sfm = static_cast(sst_file_manager.get()); Options options = CurrentOptions(); options.sst_file_manager = sst_file_manager; options.write_buffer_size = 1024 * 512; // 512 Kb DestroyAndReopen(options); Random rnd(301); sfm->SetMaxAllowedSpaceUsage(limit_mb * 1024 * 1024); // It is easy to detect if the test is stuck in a loop. No need for // complex termination logic. while (true) { auto s = Put(RandomString(&rnd, 10), RandomString(&rnd, 50)); if (!s.ok()) { break; } } ASSERT_TRUE(bg_error_set); uint64_t total_sst_files_size = 0; GetAllSSTFiles(&total_sst_files_size); ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } ASSERT_GT(reached_max_space_on_flush, 0); ASSERT_GT(reached_max_space_on_compaction, 0); } TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFiles) { // Open DB with infinite max open files // - First iteration use 1 thread to open files // - Second iteration use 5 threads to open files for (int iter = 0; iter < 2; iter++) { Options options; options.create_if_missing = true; options.write_buffer_size = 100000; options.disable_auto_compactions = true; options.max_open_files = -1; if (iter == 0) { options.max_file_opening_threads = 1; } else { options.max_file_opening_threads = 5; } options = CurrentOptions(options); DestroyAndReopen(options); // Create 12 Files in L0 (then move then to L2) for (int i = 0; i < 12; i++) { std::string k = "L2_" + Key(i); ASSERT_OK(Put(k, k + std::string(1000, 'a'))); ASSERT_OK(Flush()); } CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; db_->CompactRange(compact_options, nullptr, nullptr); // Create 12 Files in L0 for (int i = 0; i < 12; i++) { std::string k = "L0_" + Key(i); ASSERT_OK(Put(k, k + std::string(1000, 'a'))); ASSERT_OK(Flush()); } Close(); // Reopening the DB will load all existing files Reopen(options); ASSERT_EQ("12,0,12", FilesPerLevel(0)); std::vector> files; dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files); for (const auto& level : files) { for (const auto& file : level) { ASSERT_TRUE(file.table_reader_handle != nullptr); } } for (int i = 0; i < 12; i++) { ASSERT_EQ(Get("L0_" + Key(i)), "L0_" + Key(i) + std::string(1000, 'a')); ASSERT_EQ(Get("L2_" + Key(i)), "L2_" + Key(i) + std::string(1000, 'a')); } } } TEST_F(DBSSTTest, GetTotalSstFilesSize) { // We don't propagate oldest-key-time table property on compaction and // just write 0 as default value. This affect the exact table size, since // we encode table properties as varint64. Force time to be 0 to work around // it. Should remove the workaround after we propagate the property on // compaction. std::unique_ptr mock_env(new MockTimeEnv(Env::Default())); mock_env->set_current_time(0); Options options = CurrentOptions(); options.disable_auto_compactions = true; options.compression = kNoCompression; options.env = mock_env.get(); DestroyAndReopen(options); // Generate 5 files in L0 for (int i = 0; i < 5; i++) { for (int j = 0; j < 10; j++) { std::string val = "val_file_" + ToString(i); ASSERT_OK(Put(Key(j), val)); } Flush(); } ASSERT_EQ("5", FilesPerLevel(0)); std::vector live_files_meta; dbfull()->GetLiveFilesMetaData(&live_files_meta); ASSERT_EQ(live_files_meta.size(), 5); uint64_t single_file_size = live_files_meta[0].size; uint64_t live_sst_files_size = 0; uint64_t total_sst_files_size = 0; for (const auto& file_meta : live_files_meta) { live_sst_files_size += file_meta.size; } ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 5 // Total SST files = 5 ASSERT_EQ(live_sst_files_size, 5 * single_file_size); ASSERT_EQ(total_sst_files_size, 5 * single_file_size); // hold current version std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); // Compact 5 files into 1 file in L0 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("0,1", FilesPerLevel(0)); live_files_meta.clear(); dbfull()->GetLiveFilesMetaData(&live_files_meta); ASSERT_EQ(live_files_meta.size(), 1); live_sst_files_size = 0; total_sst_files_size = 0; for (const auto& file_meta : live_files_meta) { live_sst_files_size += file_meta.size; } ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 1 (compacted file) // Total SST files = 6 (5 original files + compacted file) ASSERT_EQ(live_sst_files_size, 1 * single_file_size); ASSERT_EQ(total_sst_files_size, 6 * single_file_size); // hold current version std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); // Delete all keys and compact, this will delete all live files for (int i = 0; i < 10; i++) { ASSERT_OK(Delete(Key(i))); } Flush(); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("", FilesPerLevel(0)); live_files_meta.clear(); dbfull()->GetLiveFilesMetaData(&live_files_meta); ASSERT_EQ(live_files_meta.size(), 0); ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 0 // Total SST files = 6 (5 original files + compacted file) ASSERT_EQ(total_sst_files_size, 6 * single_file_size); iter1.reset(); ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 0 // Total SST files = 1 (compacted file) ASSERT_EQ(total_sst_files_size, 1 * single_file_size); iter2.reset(); ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 0 // Total SST files = 0 ASSERT_EQ(total_sst_files_size, 0); // Close db before mock_env destruct. Close(); } TEST_F(DBSSTTest, GetTotalSstFilesSizeVersionsFilesShared) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.compression = kNoCompression; DestroyAndReopen(options); // Generate 5 files in L0 for (int i = 0; i < 5; i++) { ASSERT_OK(Put(Key(i), "val")); Flush(); } ASSERT_EQ("5", FilesPerLevel(0)); std::vector live_files_meta; dbfull()->GetLiveFilesMetaData(&live_files_meta); ASSERT_EQ(live_files_meta.size(), 5); uint64_t single_file_size = live_files_meta[0].size; uint64_t live_sst_files_size = 0; uint64_t total_sst_files_size = 0; for (const auto& file_meta : live_files_meta) { live_sst_files_size += file_meta.size; } ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 5 // Total SST files = 5 ASSERT_EQ(live_sst_files_size, 5 * single_file_size); ASSERT_EQ(total_sst_files_size, 5 * single_file_size); // hold current version std::unique_ptr iter1(dbfull()->NewIterator(ReadOptions())); // Compaction will do trivial move from L0 to L1 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("0,5", FilesPerLevel(0)); live_files_meta.clear(); dbfull()->GetLiveFilesMetaData(&live_files_meta); ASSERT_EQ(live_files_meta.size(), 5); live_sst_files_size = 0; total_sst_files_size = 0; for (const auto& file_meta : live_files_meta) { live_sst_files_size += file_meta.size; } ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 5 // Total SST files = 5 (used in 2 version) ASSERT_EQ(live_sst_files_size, 5 * single_file_size); ASSERT_EQ(total_sst_files_size, 5 * single_file_size); // hold current version std::unique_ptr iter2(dbfull()->NewIterator(ReadOptions())); // Delete all keys and compact, this will delete all live files for (int i = 0; i < 5; i++) { ASSERT_OK(Delete(Key(i))); } Flush(); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("", FilesPerLevel(0)); live_files_meta.clear(); dbfull()->GetLiveFilesMetaData(&live_files_meta); ASSERT_EQ(live_files_meta.size(), 0); ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 0 // Total SST files = 5 (used in 2 version) ASSERT_EQ(total_sst_files_size, 5 * single_file_size); iter1.reset(); iter2.reset(); ASSERT_TRUE(dbfull()->GetIntProperty("rocksdb.total-sst-files-size", &total_sst_files_size)); // Live SST files = 0 // Total SST files = 0 ASSERT_EQ(total_sst_files_size, 0); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_statistics_test.cc000066400000000000000000000123411370372246700200400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "db/db_test_util.h" #include "monitoring/thread_status_util.h" #include "port/stack_trace.h" #include "rocksdb/statistics.h" namespace ROCKSDB_NAMESPACE { class DBStatisticsTest : public DBTestBase { public: DBStatisticsTest() : DBTestBase("/db_statistics_test") {} }; TEST_F(DBStatisticsTest, CompressionStatsTest) { CompressionType type; if (Snappy_Supported()) { type = kSnappyCompression; fprintf(stderr, "using snappy\n"); } else if (Zlib_Supported()) { type = kZlibCompression; fprintf(stderr, "using zlib\n"); } else if (BZip2_Supported()) { type = kBZip2Compression; fprintf(stderr, "using bzip2\n"); } else if (LZ4_Supported()) { type = kLZ4Compression; fprintf(stderr, "using lz4\n"); } else if (XPRESS_Supported()) { type = kXpressCompression; fprintf(stderr, "using xpress\n"); } else if (ZSTD_Supported()) { type = kZSTD; fprintf(stderr, "using ZSTD\n"); } else { fprintf(stderr, "skipping test, compression disabled\n"); return; } Options options = CurrentOptions(); options.compression = type; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.statistics->set_stats_level(StatsLevel::kExceptTimeForMutex); DestroyAndReopen(options); int kNumKeysWritten = 100000; // Check that compressions occur and are counted when compression is turned on Random rnd(301); for (int i = 0; i < kNumKeysWritten; ++i) { // compressible string ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a'))); } ASSERT_OK(Flush()); ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED), 0); for (int i = 0; i < kNumKeysWritten; ++i) { auto r = Get(Key(i)); } ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED), 0); options.compression = kNoCompression; DestroyAndReopen(options); uint64_t currentCompressions = options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); uint64_t currentDecompressions = options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED); // Check that compressions do not occur when turned off for (int i = 0; i < kNumKeysWritten; ++i) { // compressible string ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a'))); } ASSERT_OK(Flush()); ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED) - currentCompressions, 0); for (int i = 0; i < kNumKeysWritten; ++i) { auto r = Get(Key(i)); } ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED) - currentDecompressions, 0); } TEST_F(DBStatisticsTest, MutexWaitStatsDisabledByDefault) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); CreateAndReopenWithCF({"pikachu"}, options); const uint64_t kMutexWaitDelay = 100; ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, kMutexWaitDelay); ASSERT_OK(Put("hello", "rocksdb")); ASSERT_EQ(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), 0); ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0); } TEST_F(DBStatisticsTest, MutexWaitStats) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.statistics->set_stats_level(StatsLevel::kAll); CreateAndReopenWithCF({"pikachu"}, options); const uint64_t kMutexWaitDelay = 100; ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, kMutexWaitDelay); ASSERT_OK(Put("hello", "rocksdb")); ASSERT_GE(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), kMutexWaitDelay); ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0); } TEST_F(DBStatisticsTest, ResetStats) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); for (int i = 0; i < 2; ++i) { // pick arbitrary ticker and histogram. On first iteration they're zero // because db is unused. On second iteration they're zero due to Reset(). ASSERT_EQ(0, TestGetTickerCount(options, NUMBER_KEYS_WRITTEN)); HistogramData histogram_data; options.statistics->histogramData(DB_WRITE, &histogram_data); ASSERT_EQ(0.0, histogram_data.max); if (i == 0) { // The Put() makes some of the ticker/histogram stats nonzero until we // Reset(). ASSERT_OK(Put("hello", "rocksdb")); ASSERT_EQ(1, TestGetTickerCount(options, NUMBER_KEYS_WRITTEN)); options.statistics->histogramData(DB_WRITE, &histogram_data); ASSERT_GT(histogram_data.max, 0.0); options.statistics->Reset(); } } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_table_properties_test.cc000066400000000000000000000260701370372246700212150ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/db.h" #include "rocksdb/utilities/table_properties_collectors.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #ifndef ROCKSDB_LITE namespace ROCKSDB_NAMESPACE { // A helper function that ensures the table properties returned in // `GetPropertiesOfAllTablesTest` is correct. // This test assumes entries size is different for each of the tables. namespace { void VerifyTableProperties(DB* db, uint64_t expected_entries_size) { TablePropertiesCollection props; ASSERT_OK(db->GetPropertiesOfAllTables(&props)); ASSERT_EQ(4U, props.size()); std::unordered_set unique_entries; // Indirect test uint64_t sum = 0; for (const auto& item : props) { unique_entries.insert(item.second->num_entries); sum += item.second->num_entries; } ASSERT_EQ(props.size(), unique_entries.size()); ASSERT_EQ(expected_entries_size, sum); } } // namespace class DBTablePropertiesTest : public DBTestBase, public testing::WithParamInterface { public: DBTablePropertiesTest() : DBTestBase("/db_table_properties_test") {} TablePropertiesCollection TestGetPropertiesOfTablesInRange( std::vector ranges, std::size_t* num_properties = nullptr, std::size_t* num_files = nullptr); }; TEST_F(DBTablePropertiesTest, GetPropertiesOfAllTablesTest) { Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 8; Reopen(options); // Create 4 tables for (int table = 0; table < 4; ++table) { for (int i = 0; i < 10 + table; ++i) { db_->Put(WriteOptions(), ToString(table * 100 + i), "val"); } db_->Flush(FlushOptions()); } // 1. Read table properties directly from file Reopen(options); VerifyTableProperties(db_, 10 + 11 + 12 + 13); // 2. Put two tables to table cache and Reopen(options); // fetch key from 1st and 2nd table, which will internally place that table to // the table cache. for (int i = 0; i < 2; ++i) { Get(ToString(i * 100 + 0)); } VerifyTableProperties(db_, 10 + 11 + 12 + 13); // 3. Put all tables to table cache Reopen(options); // fetch key from 1st and 2nd table, which will internally place that table to // the table cache. for (int i = 0; i < 4; ++i) { Get(ToString(i * 100 + 0)); } VerifyTableProperties(db_, 10 + 11 + 12 + 13); } TablePropertiesCollection DBTablePropertiesTest::TestGetPropertiesOfTablesInRange( std::vector ranges, std::size_t* num_properties, std::size_t* num_files) { // Since we deref zero element in the vector it can not be empty // otherwise we pass an address to some random memory EXPECT_GT(ranges.size(), 0U); // run the query TablePropertiesCollection props; EXPECT_OK(db_->GetPropertiesOfTablesInRange( db_->DefaultColumnFamily(), &ranges[0], ranges.size(), &props)); // Make sure that we've received properties for those and for those files // only which fall within requested ranges std::vector vmd; db_->GetLiveFilesMetaData(&vmd); for (auto& md : vmd) { std::string fn = md.db_path + md.name; bool in_range = false; for (auto& r : ranges) { // smallestkey < limit && largestkey >= start if (r.limit.compare(md.smallestkey) >= 0 && r.start.compare(md.largestkey) <= 0) { in_range = true; EXPECT_GT(props.count(fn), 0); } } if (!in_range) { EXPECT_EQ(props.count(fn), 0); } } if (num_properties) { *num_properties = props.size(); } if (num_files) { *num_files = vmd.size(); } return props; } TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) { // Fixed random sead Random rnd(301); Options options; options.create_if_missing = true; options.write_buffer_size = 4096; options.max_write_buffer_number = 2; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 2; options.target_file_size_base = 2048; options.max_bytes_for_level_base = 40960; options.max_bytes_for_level_multiplier = 4; options.hard_pending_compaction_bytes_limit = 16 * 1024; options.num_levels = 8; options.env = env_; DestroyAndReopen(options); // build a decent LSM for (int i = 0; i < 10000; i++) { ASSERT_OK(Put(test::RandomKey(&rnd, 5), RandomString(&rnd, 102))); } Flush(); dbfull()->TEST_WaitForCompact(); if (NumTableFilesAtLevel(0) == 0) { ASSERT_OK(Put(test::RandomKey(&rnd, 5), RandomString(&rnd, 102))); Flush(); } db_->PauseBackgroundWork(); // Ensure that we have at least L0, L1 and L2 ASSERT_GT(NumTableFilesAtLevel(0), 0); ASSERT_GT(NumTableFilesAtLevel(1), 0); ASSERT_GT(NumTableFilesAtLevel(2), 0); // Query the largest range std::size_t num_properties, num_files; TestGetPropertiesOfTablesInRange( {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST), test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))}, &num_properties, &num_files); ASSERT_EQ(num_properties, num_files); // Query the empty range TestGetPropertiesOfTablesInRange( {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST), test::RandomKey(&rnd, 5, test::RandomKeyType::SMALLEST))}, &num_properties, &num_files); ASSERT_GT(num_files, 0); ASSERT_EQ(num_properties, 0); // Query the middle rangee TestGetPropertiesOfTablesInRange( {Range(test::RandomKey(&rnd, 5, test::RandomKeyType::MIDDLE), test::RandomKey(&rnd, 5, test::RandomKeyType::LARGEST))}, &num_properties, &num_files); ASSERT_GT(num_files, 0); ASSERT_GT(num_files, num_properties); ASSERT_GT(num_properties, 0); // Query a bunch of random ranges for (int j = 0; j < 100; j++) { // create a bunch of ranges std::vector random_keys; // Random returns numbers with zero included // when we pass empty ranges TestGetPropertiesOfTablesInRange() // derefs random memory in the empty ranges[0] // so want to be greater than zero and even since // the below loop requires that random_keys.size() to be even. auto n = 2 * (rnd.Uniform(50) + 1); for (uint32_t i = 0; i < n; ++i) { random_keys.push_back(test::RandomKey(&rnd, 5)); } ASSERT_GT(random_keys.size(), 0U); ASSERT_EQ((random_keys.size() % 2), 0U); std::vector ranges; auto it = random_keys.begin(); while (it != random_keys.end()) { ranges.push_back(Range(*it, *(it + 1))); it += 2; } TestGetPropertiesOfTablesInRange(std::move(ranges)); } } TEST_F(DBTablePropertiesTest, GetColumnFamilyNameProperty) { std::string kExtraCfName = "pikachu"; CreateAndReopenWithCF({kExtraCfName}, CurrentOptions()); // Create one table per CF, then verify it was created with the column family // name property. for (uint32_t cf = 0; cf < 2; ++cf) { Put(cf, "key", "val"); Flush(cf); TablePropertiesCollection fname_to_props; ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props)); ASSERT_EQ(1U, fname_to_props.size()); std::string expected_cf_name; if (cf > 0) { expected_cf_name = kExtraCfName; } else { expected_cf_name = kDefaultColumnFamilyName; } ASSERT_EQ(expected_cf_name, fname_to_props.begin()->second->column_family_name); ASSERT_EQ(cf, static_cast( fname_to_props.begin()->second->column_family_id)); } } class DeletionTriggeredCompactionTestListener : public EventListener { public: void OnCompactionBegin(DB* , const CompactionJobInfo& ci) override { ASSERT_EQ(ci.compaction_reason, CompactionReason::kFilesMarkedForCompaction); } void OnCompactionCompleted(DB* , const CompactionJobInfo& ci) override { ASSERT_EQ(ci.compaction_reason, CompactionReason::kFilesMarkedForCompaction); } }; TEST_P(DBTablePropertiesTest, DeletionTriggeredCompactionMarking) { int kNumKeys = 1000; int kWindowSize = 100; int kNumDelsTrigger = 90; std::shared_ptr compact_on_del = NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger); Options opts = CurrentOptions(); opts.table_properties_collector_factories.emplace_back(compact_on_del); if(GetParam() == "kCompactionStyleUniversal") { opts.compaction_style = kCompactionStyleUniversal; } Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence // during flush Put(Key(0), "val"); Flush(); MoveFilesToLevel(1); DeletionTriggeredCompactionTestListener *listener = new DeletionTriggeredCompactionTestListener(); opts.listeners.emplace_back(listener); Reopen(opts); for (int i = 0; i < kNumKeys; ++i) { if (i >= kNumKeys - kWindowSize && i < kNumKeys - kWindowSize + kNumDelsTrigger) { Delete(Key(i)); } else { Put(Key(i), "val"); } } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); // Change the window size and deletion trigger and ensure new values take // effect kWindowSize = 50; kNumDelsTrigger = 40; static_cast (compact_on_del.get())->SetWindowSize(kWindowSize); static_cast (compact_on_del.get())->SetDeletionTrigger(kNumDelsTrigger); for (int i = 0; i < kNumKeys; ++i) { if (i >= kNumKeys - kWindowSize && i < kNumKeys - kWindowSize + kNumDelsTrigger) { Delete(Key(i)); } else { Put(Key(i), "val"); } } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); // Change the window size to disable delete triggered compaction kWindowSize = 0; static_cast (compact_on_del.get())->SetWindowSize(kWindowSize); static_cast (compact_on_del.get())->SetDeletionTrigger(kNumDelsTrigger); for (int i = 0; i < kNumKeys; ++i) { if (i >= kNumKeys - kWindowSize && i < kNumKeys - kWindowSize + kNumDelsTrigger) { Delete(Key(i)); } else { Put(Key(i), "val"); } } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(1, NumTableFilesAtLevel(0)); } INSTANTIATE_TEST_CASE_P( DBTablePropertiesTest, DBTablePropertiesTest, ::testing::Values( "kCompactionStyleLevel", "kCompactionStyleUniversal" )); } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_tailing_iter_test.cc000066400000000000000000000407601370372246700203260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // Introduction of SyncPoint effectively disabled building and running this test // in Release build. // which is a pity, it is a good test #if !defined(ROCKSDB_LITE) #include "db/db_test_util.h" #include "db/forward_iterator.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { class DBTestTailingIterator : public DBTestBase { public: DBTestTailingIterator() : DBTestBase("/db_tailing_iterator_test") {} }; TEST_F(DBTestTailingIterator, TailingIteratorSingle) { ReadOptions read_options; read_options.tailing = true; std::unique_ptr iter(db_->NewIterator(read_options)); iter->SeekToFirst(); ASSERT_TRUE(!iter->Valid()); // add a record and check that iter can see it ASSERT_OK(db_->Put(WriteOptions(), "mirko", "fodor")); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "mirko"); iter->Next(); ASSERT_TRUE(!iter->Valid()); } TEST_F(DBTestTailingIterator, TailingIteratorKeepAdding) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ReadOptions read_options; read_options.tailing = true; std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); std::string value(1024, 'a'); const int num_records = 10000; for (int i = 0; i < num_records; ++i) { char buf[32]; snprintf(buf, sizeof(buf), "%016d", i); Slice key(buf, 16); ASSERT_OK(Put(1, key, value)); iter->Seek(key); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(key), 0); } } TEST_F(DBTestTailingIterator, TailingIteratorSeekToNext) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ReadOptions read_options; read_options.tailing = true; std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); std::unique_ptr itern(db_->NewIterator(read_options, handles_[1])); std::string value(1024, 'a'); const int num_records = 1000; for (int i = 1; i < num_records; ++i) { char buf1[32]; char buf2[32]; snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); Slice key(buf1, 20); ASSERT_OK(Put(1, key, value)); if (i % 100 == 99) { ASSERT_OK(Flush(1)); } snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); Slice target(buf2, 20); iter->Seek(target); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(key), 0); if (i == 1) { itern->SeekToFirst(); } else { itern->Next(); } ASSERT_TRUE(itern->Valid()); ASSERT_EQ(itern->key().compare(key), 0); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); for (int i = 2 * num_records; i > 0; --i) { char buf1[32]; char buf2[32]; snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); Slice key(buf1, 20); ASSERT_OK(Put(1, key, value)); if (i % 100 == 99) { ASSERT_OK(Flush(1)); } snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); Slice target(buf2, 20); iter->Seek(target); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(key), 0); } } TEST_F(DBTestTailingIterator, TailingIteratorTrimSeekToNext) { const uint64_t k150KB = 150 * 1024; Options options; options.write_buffer_size = k150KB; options.max_write_buffer_number = 3; options.min_write_buffer_number_to_merge = 2; options.env = env_; CreateAndReopenWithCF({"pikachu"}, options); ReadOptions read_options; read_options.tailing = true; int num_iters, deleted_iters; char bufe[32]; snprintf(bufe, sizeof(bufe), "00b0%016d", 0); Slice keyu(bufe, 20); read_options.iterate_upper_bound = &keyu; std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); std::unique_ptr itern(db_->NewIterator(read_options, handles_[1])); std::unique_ptr iterh(db_->NewIterator(read_options, handles_[1])); std::string value(1024, 'a'); bool file_iters_deleted = false; bool file_iters_renewed_null = false; bool file_iters_renewed_copy = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ForwardIterator::SeekInternal:Return", [&](void* arg) { ForwardIterator* fiter = reinterpret_cast(arg); ASSERT_TRUE(!file_iters_deleted || fiter->TEST_CheckDeletedIters(&deleted_iters, &num_iters)); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ForwardIterator::Next:Return", [&](void* arg) { ForwardIterator* fiter = reinterpret_cast(arg); ASSERT_TRUE(!file_iters_deleted || fiter->TEST_CheckDeletedIters(&deleted_iters, &num_iters)); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ForwardIterator::RenewIterators:Null", [&](void* /*arg*/) { file_iters_renewed_null = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ForwardIterator::RenewIterators:Copy", [&](void* /*arg*/) { file_iters_renewed_copy = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); const int num_records = 1000; for (int i = 1; i < num_records; ++i) { char buf1[32]; char buf2[32]; char buf3[32]; char buf4[32]; snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); snprintf(buf3, sizeof(buf3), "00b0%016d", i * 5); Slice key(buf1, 20); ASSERT_OK(Put(1, key, value)); Slice keyn(buf3, 20); ASSERT_OK(Put(1, keyn, value)); if (i % 100 == 99) { ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); if (i == 299) { file_iters_deleted = true; } snprintf(buf4, sizeof(buf4), "00a0%016d", i * 5 / 2); Slice target(buf4, 20); iterh->Seek(target); ASSERT_TRUE(iter->Valid()); for (int j = (i + 1) * 5 / 2; j < i * 5; j += 5) { iterh->Next(); ASSERT_TRUE(iterh->Valid()); } if (i == 299) { file_iters_deleted = false; } } file_iters_deleted = true; snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); Slice target(buf2, 20); iter->Seek(target); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(key), 0); ASSERT_LE(num_iters, 1); if (i == 1) { itern->SeekToFirst(); } else { itern->Next(); } ASSERT_TRUE(itern->Valid()); ASSERT_EQ(itern->key().compare(key), 0); ASSERT_LE(num_iters, 1); file_iters_deleted = false; } ASSERT_TRUE(file_iters_renewed_null); ASSERT_TRUE(file_iters_renewed_copy); iter = nullptr; itern = nullptr; iterh = nullptr; BlockBasedTableOptions table_options; table_options.no_block_cache = true; table_options.block_cache_compressed = nullptr; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ReopenWithColumnFamilies({"default", "pikachu"}, options); read_options.read_tier = kBlockCacheTier; std::unique_ptr iteri(db_->NewIterator(read_options, handles_[1])); char buf5[32]; snprintf(buf5, sizeof(buf5), "00a0%016d", (num_records / 2) * 5 - 2); Slice target1(buf5, 20); iteri->Seek(target1); ASSERT_TRUE(iteri->status().IsIncomplete()); iteri = nullptr; read_options.read_tier = kReadAllTier; options.table_factory.reset(NewBlockBasedTableFactory()); ReopenWithColumnFamilies({"default", "pikachu"}, options); iter.reset(db_->NewIterator(read_options, handles_[1])); for (int i = 2 * num_records; i > 0; --i) { char buf1[32]; char buf2[32]; snprintf(buf1, sizeof(buf1), "00a0%016d", i * 5); Slice key(buf1, 20); ASSERT_OK(Put(1, key, value)); if (i % 100 == 99) { ASSERT_OK(Flush(1)); } snprintf(buf2, sizeof(buf2), "00a0%016d", i * 5 - 2); Slice target(buf2, 20); iter->Seek(target); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(key), 0); } } TEST_F(DBTestTailingIterator, TailingIteratorDeletes) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ReadOptions read_options; read_options.tailing = true; std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); // write a single record, read it using the iterator, then delete it ASSERT_OK(Put(1, "0test", "test")); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "0test"); ASSERT_OK(Delete(1, "0test")); // write many more records const int num_records = 10000; std::string value(1024, 'A'); for (int i = 0; i < num_records; ++i) { char buf[32]; snprintf(buf, sizeof(buf), "1%015d", i); Slice key(buf, 16); ASSERT_OK(Put(1, key, value)); } // force a flush to make sure that no records are read from memtable ASSERT_OK(Flush(1)); // skip "0test" iter->Next(); // make sure we can read all new records using the existing iterator int count = 0; for (; iter->Valid(); iter->Next(), ++count) ; ASSERT_EQ(count, num_records); } TEST_F(DBTestTailingIterator, TailingIteratorPrefixSeek) { ReadOptions read_options; read_options.tailing = true; Options options = CurrentOptions(); options.create_if_missing = true; options.disable_auto_compactions = true; options.prefix_extractor.reset(NewFixedPrefixTransform(2)); options.memtable_factory.reset(NewHashSkipListRepFactory(16)); options.allow_concurrent_memtable_write = false; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); std::unique_ptr iter(db_->NewIterator(read_options, handles_[1])); ASSERT_OK(Put(1, "0101", "test")); ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "0202", "test")); // Seek(0102) shouldn't find any records since 0202 has a different prefix iter->Seek("0102"); ASSERT_TRUE(!iter->Valid()); iter->Seek("0202"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "0202"); iter->Next(); ASSERT_TRUE(!iter->Valid()); } TEST_F(DBTestTailingIterator, TailingIteratorIncomplete) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ReadOptions read_options; read_options.tailing = true; read_options.read_tier = kBlockCacheTier; std::string key("key"); std::string value("value"); ASSERT_OK(db_->Put(WriteOptions(), key, value)); std::unique_ptr iter(db_->NewIterator(read_options)); iter->SeekToFirst(); // we either see the entry or it's not in cache ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); iter->SeekToFirst(); // should still be true after compaction ASSERT_TRUE(iter->Valid() || iter->status().IsIncomplete()); } TEST_F(DBTestTailingIterator, TailingIteratorSeekToSame) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 1000; CreateAndReopenWithCF({"pikachu"}, options); ReadOptions read_options; read_options.tailing = true; const int NROWS = 10000; // Write rows with keys 00000, 00002, 00004 etc. for (int i = 0; i < NROWS; ++i) { char buf[100]; snprintf(buf, sizeof(buf), "%05d", 2*i); std::string key(buf); std::string value("value"); ASSERT_OK(db_->Put(WriteOptions(), key, value)); } std::unique_ptr iter(db_->NewIterator(read_options)); // Seek to 00001. We expect to find 00002. std::string start_key = "00001"; iter->Seek(start_key); ASSERT_TRUE(iter->Valid()); std::string found = iter->key().ToString(); ASSERT_EQ("00002", found); // Now seek to the same key. The iterator should remain in the same // position. iter->Seek(found); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(found, iter->key().ToString()); } // Sets iterate_upper_bound and verifies that ForwardIterator doesn't call // Seek() on immutable iterators when target key is >= prev_key and all // iterators, including the memtable iterator, are over the upper bound. TEST_F(DBTestTailingIterator, TailingIteratorUpperBound) { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); const Slice upper_bound("20", 3); ReadOptions read_options; read_options.tailing = true; read_options.iterate_upper_bound = &upper_bound; ASSERT_OK(Put(1, "11", "11")); ASSERT_OK(Put(1, "12", "12")); ASSERT_OK(Put(1, "22", "22")); ASSERT_OK(Flush(1)); // flush all those keys to an immutable SST file // Add another key to the memtable. ASSERT_OK(Put(1, "21", "21")); std::unique_ptr it(db_->NewIterator(read_options, handles_[1])); it->Seek("12"); ASSERT_TRUE(it->Valid()); ASSERT_EQ("12", it->key().ToString()); it->Next(); // Not valid since "21" is over the upper bound. ASSERT_FALSE(it->Valid()); // This keeps track of the number of times NeedToSeekImmutable() was true. int immutable_seeks = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ForwardIterator::SeekInternal:Immutable", [&](void* /*arg*/) { ++immutable_seeks; }); // Seek to 13. This should not require any immutable seeks. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); it->Seek("13"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_FALSE(it->Valid()); ASSERT_EQ(0, immutable_seeks); } TEST_F(DBTestTailingIterator, TailingIteratorGap) { // level 1: [20, 25] [35, 40] // level 2: [10 - 15] [45 - 50] // level 3: [20, 30, 40] // Previously there is a bug in tailing_iterator that if there is a gap in // lower level, the key will be skipped if it is within the range between // the largest key of index n file and the smallest key of index n+1 file // if both file fit in that gap. In this example, 25 < key < 35 // https://github.com/facebook/rocksdb/issues/1372 CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ReadOptions read_options; read_options.tailing = true; ASSERT_OK(Put(1, "20", "20")); ASSERT_OK(Put(1, "30", "30")); ASSERT_OK(Put(1, "40", "40")); ASSERT_OK(Flush(1)); MoveFilesToLevel(3, 1); ASSERT_OK(Put(1, "10", "10")); ASSERT_OK(Put(1, "15", "15")); ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "45", "45")); ASSERT_OK(Put(1, "50", "50")); ASSERT_OK(Flush(1)); MoveFilesToLevel(2, 1); ASSERT_OK(Put(1, "20", "20")); ASSERT_OK(Put(1, "25", "25")); ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "35", "35")); ASSERT_OK(Put(1, "40", "40")); ASSERT_OK(Flush(1)); MoveFilesToLevel(1, 1); ColumnFamilyMetaData meta; db_->GetColumnFamilyMetaData(handles_[1], &meta); std::unique_ptr it(db_->NewIterator(read_options, handles_[1])); it->Seek("30"); ASSERT_TRUE(it->Valid()); ASSERT_EQ("30", it->key().ToString()); it->Next(); ASSERT_TRUE(it->Valid()); ASSERT_EQ("35", it->key().ToString()); it->Next(); ASSERT_TRUE(it->Valid()); ASSERT_EQ("40", it->key().ToString()); } TEST_F(DBTestTailingIterator, SeekWithUpperBoundBug) { ReadOptions read_options; read_options.tailing = true; const Slice upper_bound("cc", 3); read_options.iterate_upper_bound = &upper_bound; // 1st L0 file ASSERT_OK(db_->Put(WriteOptions(), "aa", "SEEN")); ASSERT_OK(Flush()); // 2nd L0 file ASSERT_OK(db_->Put(WriteOptions(), "zz", "NOT-SEEN")); ASSERT_OK(Flush()); std::unique_ptr iter(db_->NewIterator(read_options)); iter->Seek("aa"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "aa"); } TEST_F(DBTestTailingIterator, SeekToFirstWithUpperBoundBug) { ReadOptions read_options; read_options.tailing = true; const Slice upper_bound("cc", 3); read_options.iterate_upper_bound = &upper_bound; // 1st L0 file ASSERT_OK(db_->Put(WriteOptions(), "aa", "SEEN")); ASSERT_OK(Flush()); // 2nd L0 file ASSERT_OK(db_->Put(WriteOptions(), "zz", "NOT-SEEN")); ASSERT_OK(Flush()); std::unique_ptr iter(db_->NewIterator(read_options)); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "aa"); iter->Next(); ASSERT_FALSE(iter->Valid()); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().ToString(), "aa"); } } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) int main(int argc, char** argv) { #if !defined(ROCKSDB_LITE) ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); #else (void) argc; (void) argv; return 0; #endif } rocksdb-6.11.4/db/db_test.cc000066400000000000000000006761741370372246700156120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // Introduction of SyncPoint effectively disabled building and running this test // in Release build. // which is a pity, it is a good test #include #include #include #include #include #include #ifndef OS_WIN #include #endif #ifdef OS_SOLARIS #include #endif #include "cache/lru_cache.h" #include "db/blob/blob_index.h" #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/dbformat.h" #include "db/job_context.h" #include "db/version_set.h" #include "db/write_batch_internal.h" #include "env/mock_env.h" #include "file/filename.h" #include "memtable/hash_linklist_rep.h" #include "monitoring/thread_status_util.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/experimental.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/snapshot.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "rocksdb/thread_status.h" #include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "table/block_based/block_based_table_factory.h" #include "table/mock_table.h" #include "table/plain/plain_table_factory.h" #include "table/scoped_arena_iterator.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/compression.h" #include "util/mutexlock.h" #include "util/rate_limiter.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { class DBTest : public DBTestBase { public: DBTest() : DBTestBase("/db_test") {} }; class DBTestWithParam : public DBTest, public testing::WithParamInterface> { public: DBTestWithParam() { max_subcompactions_ = std::get<0>(GetParam()); exclusive_manual_compaction_ = std::get<1>(GetParam()); } // Required if inheriting from testing::WithParamInterface<> static void SetUpTestCase() {} static void TearDownTestCase() {} uint32_t max_subcompactions_; bool exclusive_manual_compaction_; }; TEST_F(DBTest, MockEnvTest) { std::unique_ptr env{new MockEnv(Env::Default())}; Options options; options.create_if_missing = true; options.env = env.get(); DB* db; const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; ASSERT_OK(DB::Open(options, "/dir/db", &db)); for (size_t i = 0; i < 3; ++i) { ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); } for (size_t i = 0; i < 3; ++i) { std::string res; ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); ASSERT_TRUE(res == vals[i]); } Iterator* iterator = db->NewIterator(ReadOptions()); iterator->SeekToFirst(); for (size_t i = 0; i < 3; ++i) { ASSERT_TRUE(iterator->Valid()); ASSERT_TRUE(keys[i] == iterator->key()); ASSERT_TRUE(vals[i] == iterator->value()); iterator->Next(); } ASSERT_TRUE(!iterator->Valid()); delete iterator; // TEST_FlushMemTable() is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE DBImpl* dbi = reinterpret_cast(db); ASSERT_OK(dbi->TEST_FlushMemTable()); for (size_t i = 0; i < 3; ++i) { std::string res; ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); ASSERT_TRUE(res == vals[i]); } #endif // ROCKSDB_LITE delete db; } // NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't // defined. #ifndef ROCKSDB_LITE TEST_F(DBTest, MemEnvTest) { std::unique_ptr env{NewMemEnv(Env::Default())}; Options options; options.create_if_missing = true; options.env = env.get(); DB* db; const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; ASSERT_OK(DB::Open(options, "/dir/db", &db)); for (size_t i = 0; i < 3; ++i) { ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); } for (size_t i = 0; i < 3; ++i) { std::string res; ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); ASSERT_TRUE(res == vals[i]); } Iterator* iterator = db->NewIterator(ReadOptions()); iterator->SeekToFirst(); for (size_t i = 0; i < 3; ++i) { ASSERT_TRUE(iterator->Valid()); ASSERT_TRUE(keys[i] == iterator->key()); ASSERT_TRUE(vals[i] == iterator->value()); iterator->Next(); } ASSERT_TRUE(!iterator->Valid()); delete iterator; DBImpl* dbi = reinterpret_cast(db); ASSERT_OK(dbi->TEST_FlushMemTable()); for (size_t i = 0; i < 3; ++i) { std::string res; ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); ASSERT_TRUE(res == vals[i]); } delete db; options.create_if_missing = false; ASSERT_OK(DB::Open(options, "/dir/db", &db)); for (size_t i = 0; i < 3; ++i) { std::string res; ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); ASSERT_TRUE(res == vals[i]); } delete db; } #endif // ROCKSDB_LITE TEST_F(DBTest, WriteEmptyBatch) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "foo", "bar")); WriteOptions wo; wo.sync = true; wo.disableWAL = false; WriteBatch empty_batch; ASSERT_OK(dbfull()->Write(wo, &empty_batch)); // make sure we can re-open it. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); ASSERT_EQ("bar", Get(1, "foo")); } TEST_F(DBTest, SkipDelay) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; CreateAndReopenWithCF({"pikachu"}, options); for (bool sync : {true, false}) { for (bool disableWAL : {true, false}) { if (sync && disableWAL) { // sync and disableWAL is incompatible. continue; } // Use a small number to ensure a large delay that is still effective // when we do Put // TODO(myabandeh): this is time dependent and could potentially make // the test flaky auto token = dbfull()->TEST_write_controler().GetDelayToken(1); std::atomic sleep_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:Sleep", [&](void* /*arg*/) { sleep_count.fetch_add(1); }); std::atomic wait_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) { wait_count.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; wo.sync = sync; wo.disableWAL = disableWAL; wo.no_slowdown = true; dbfull()->Put(wo, "foo", "bar"); // We need the 2nd write to trigger delay. This is because delay is // estimated based on the last write size which is 0 for the first write. ASSERT_NOK(dbfull()->Put(wo, "foo2", "bar2")); ASSERT_GE(sleep_count.load(), 0); ASSERT_GE(wait_count.load(), 0); token.reset(); token = dbfull()->TEST_write_controler().GetDelayToken(1000000000); wo.no_slowdown = false; ASSERT_OK(dbfull()->Put(wo, "foo3", "bar3")); ASSERT_GE(sleep_count.load(), 1); token.reset(); } } } TEST_F(DBTest, MixedSlowdownOptions) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; CreateAndReopenWithCF({"pikachu"}, options); std::vector threads; std::atomic thread_num(0); std::function write_slowdown_func = [&]() { int a = thread_num.fetch_add(1); std::string key = "foo" + std::to_string(a); WriteOptions wo; wo.no_slowdown = false; ASSERT_OK(dbfull()->Put(wo, key, "bar")); }; std::function write_no_slowdown_func = [&]() { int a = thread_num.fetch_add(1); std::string key = "foo" + std::to_string(a); WriteOptions wo; wo.no_slowdown = true; ASSERT_NOK(dbfull()->Put(wo, key, "bar")); }; // Use a small number to ensure a large delay that is still effective // when we do Put // TODO(myabandeh): this is time dependent and could potentially make // the test flaky auto token = dbfull()->TEST_write_controler().GetDelayToken(1); std::atomic sleep_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:BeginWriteStallDone", [&](void* /*arg*/) { sleep_count.fetch_add(1); if (threads.empty()) { for (int i = 0; i < 2; ++i) { threads.emplace_back(write_slowdown_func); } for (int i = 0; i < 2; ++i) { threads.emplace_back(write_no_slowdown_func); } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; wo.sync = false; wo.disableWAL = false; wo.no_slowdown = false; dbfull()->Put(wo, "foo", "bar"); // We need the 2nd write to trigger delay. This is because delay is // estimated based on the last write size which is 0 for the first write. ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2")); token.reset(); for (auto& t : threads) { t.join(); } ASSERT_GE(sleep_count.load(), 1); wo.no_slowdown = true; ASSERT_OK(dbfull()->Put(wo, "foo3", "bar")); } TEST_F(DBTest, MixedSlowdownOptionsInQueue) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; CreateAndReopenWithCF({"pikachu"}, options); std::vector threads; std::atomic thread_num(0); std::function write_no_slowdown_func = [&]() { int a = thread_num.fetch_add(1); std::string key = "foo" + std::to_string(a); WriteOptions wo; wo.no_slowdown = true; ASSERT_NOK(dbfull()->Put(wo, key, "bar")); }; // Use a small number to ensure a large delay that is still effective // when we do Put // TODO(myabandeh): this is time dependent and could potentially make // the test flaky auto token = dbfull()->TEST_write_controler().GetDelayToken(1); std::atomic sleep_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:Sleep", [&](void* /*arg*/) { sleep_count.fetch_add(1); if (threads.empty()) { for (int i = 0; i < 2; ++i) { threads.emplace_back(write_no_slowdown_func); } // Sleep for 2s to allow the threads to insert themselves into the // write queue env_->SleepForMicroseconds(3000000ULL); } }); std::atomic wait_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) { wait_count.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; wo.sync = false; wo.disableWAL = false; wo.no_slowdown = false; dbfull()->Put(wo, "foo", "bar"); // We need the 2nd write to trigger delay. This is because delay is // estimated based on the last write size which is 0 for the first write. ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2")); token.reset(); for (auto& t : threads) { t.join(); } ASSERT_EQ(sleep_count.load(), 1); ASSERT_GE(wait_count.load(), 0); } TEST_F(DBTest, MixedSlowdownOptionsStop) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; CreateAndReopenWithCF({"pikachu"}, options); std::vector threads; std::atomic thread_num(0); std::function write_slowdown_func = [&]() { int a = thread_num.fetch_add(1); std::string key = "foo" + std::to_string(a); WriteOptions wo; wo.no_slowdown = false; ASSERT_OK(dbfull()->Put(wo, key, "bar")); }; std::function write_no_slowdown_func = [&]() { int a = thread_num.fetch_add(1); std::string key = "foo" + std::to_string(a); WriteOptions wo; wo.no_slowdown = true; ASSERT_NOK(dbfull()->Put(wo, key, "bar")); }; std::function wakeup_writer = [&]() { dbfull()->mutex_.Lock(); dbfull()->bg_cv_.SignalAll(); dbfull()->mutex_.Unlock(); }; // Use a small number to ensure a large delay that is still effective // when we do Put // TODO(myabandeh): this is time dependent and could potentially make // the test flaky auto token = dbfull()->TEST_write_controler().GetStopToken(); std::atomic wait_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) { wait_count.fetch_add(1); if (threads.empty()) { for (int i = 0; i < 2; ++i) { threads.emplace_back(write_slowdown_func); } for (int i = 0; i < 2; ++i) { threads.emplace_back(write_no_slowdown_func); } // Sleep for 2s to allow the threads to insert themselves into the // write queue env_->SleepForMicroseconds(3000000ULL); } token.reset(); threads.emplace_back(wakeup_writer); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; wo.sync = false; wo.disableWAL = false; wo.no_slowdown = false; dbfull()->Put(wo, "foo", "bar"); // We need the 2nd write to trigger delay. This is because delay is // estimated based on the last write size which is 0 for the first write. ASSERT_OK(dbfull()->Put(wo, "foo2", "bar2")); token.reset(); for (auto& t : threads) { t.join(); } ASSERT_GE(wait_count.load(), 1); wo.no_slowdown = true; ASSERT_OK(dbfull()->Put(wo, "foo3", "bar")); } #ifndef ROCKSDB_LITE TEST_F(DBTest, LevelLimitReopen) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); const std::string value(1024 * 1024, ' '); int i = 0; while (NumTableFilesAtLevel(2, 1) == 0) { ASSERT_OK(Put(1, Key(i++), value)); } options.num_levels = 1; options.max_bytes_for_level_multiplier_additional.resize(1, 1); Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ(s.IsInvalidArgument(), true); ASSERT_EQ(s.ToString(), "Invalid argument: db has more levels than options.num_levels"); options.num_levels = 10; options.max_bytes_for_level_multiplier_additional.resize(10, 1); ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); } #endif // ROCKSDB_LITE TEST_F(DBTest, PutSingleDeleteGet) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_OK(Put(1, "foo2", "v2")); ASSERT_EQ("v2", Get(1, "foo2")); ASSERT_OK(SingleDelete(1, "foo")); ASSERT_EQ("NOT_FOUND", Get(1, "foo")); // Skip FIFO and universal compaction beccause they do not apply to the test // case. Skip MergePut because single delete does not get removed when it // encounters a merge. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut)); } TEST_F(DBTest, ReadFromPersistedTier) { do { Random rnd(301); Options options = CurrentOptions(); for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) { CreateAndReopenWithCF({"pikachu"}, options); WriteOptions wopt; wopt.disableWAL = (disableWAL == 1); // 1st round: put but not flush ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first")); ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one")); ASSERT_EQ("first", Get(1, "foo")); ASSERT_EQ("one", Get(1, "bar")); // Read directly from persited data. ReadOptions ropt; ropt.read_tier = kPersistedTier; std::string value; if (wopt.disableWAL) { // as data has not yet being flushed, we expect not found. ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound()); ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound()); } else { ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value)); ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value)); } // Multiget std::vector multiget_cfs; multiget_cfs.push_back(handles_[1]); multiget_cfs.push_back(handles_[1]); std::vector multiget_keys; multiget_keys.push_back("foo"); multiget_keys.push_back("bar"); std::vector multiget_values; auto statuses = db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); if (wopt.disableWAL) { ASSERT_TRUE(statuses[0].IsNotFound()); ASSERT_TRUE(statuses[1].IsNotFound()); } else { ASSERT_OK(statuses[0]); ASSERT_OK(statuses[1]); } // 2nd round: flush and put a new value in memtable. ASSERT_OK(Flush(1)); ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello")); // once the data has been flushed, we are able to get the // data when kPersistedTier is used. ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok()); ASSERT_EQ(value, "first"); ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok()); ASSERT_EQ(value, "one"); if (wopt.disableWAL) { ASSERT_TRUE( db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound()); } else { ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value)); ASSERT_EQ(value, "hello"); } // Expect same result in multiget multiget_cfs.push_back(handles_[1]); multiget_keys.push_back("rocksdb"); statuses = db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); ASSERT_TRUE(statuses[0].ok()); ASSERT_EQ("first", multiget_values[0]); ASSERT_TRUE(statuses[1].ok()); ASSERT_EQ("one", multiget_values[1]); if (wopt.disableWAL) { ASSERT_TRUE(statuses[2].IsNotFound()); } else { ASSERT_OK(statuses[2]); } // 3rd round: delete and flush ASSERT_OK(db_->Delete(wopt, handles_[1], "foo")); Flush(1); ASSERT_OK(db_->Delete(wopt, handles_[1], "bar")); ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound()); if (wopt.disableWAL) { // Still expect finding the value as its delete has not yet being // flushed. ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok()); ASSERT_EQ(value, "one"); } else { ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound()); } ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok()); ASSERT_EQ(value, "hello"); statuses = db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); ASSERT_TRUE(statuses[0].IsNotFound()); if (wopt.disableWAL) { ASSERT_TRUE(statuses[1].ok()); ASSERT_EQ("one", multiget_values[1]); } else { ASSERT_TRUE(statuses[1].IsNotFound()); } ASSERT_TRUE(statuses[2].ok()); ASSERT_EQ("hello", multiget_values[2]); if (wopt.disableWAL == 0) { DestroyAndReopen(options); } } } while (ChangeOptions()); } TEST_F(DBTest, SingleDeleteFlush) { // Test to check whether flushing preserves a single delete hidden // behind a put. do { Random rnd(301); Options options = CurrentOptions(); options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); // Put values on second level (so that they will not be in the same // compaction as the other operations. Put(1, "foo", "first"); Put(1, "bar", "one"); ASSERT_OK(Flush(1)); MoveFilesToLevel(2, 1); // (Single) delete hidden by a put SingleDelete(1, "foo"); Put(1, "foo", "second"); Delete(1, "bar"); Put(1, "bar", "two"); ASSERT_OK(Flush(1)); SingleDelete(1, "foo"); Delete(1, "bar"); ASSERT_OK(Flush(1)); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ("NOT_FOUND", Get(1, "bar")); ASSERT_EQ("NOT_FOUND", Get(1, "foo")); // Skip FIFO and universal compaction beccause they do not apply to the test // case. Skip MergePut because single delete does not get removed when it // encounters a merge. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut)); } TEST_F(DBTest, SingleDeletePutFlush) { // Single deletes that encounter the matching put in a flush should get // removed. do { Random rnd(301); Options options = CurrentOptions(); options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); Put(1, "foo", Slice()); Put(1, "a", Slice()); SingleDelete(1, "a"); ASSERT_OK(Flush(1)); ASSERT_EQ("[ ]", AllEntriesFor("a", 1)); // Skip FIFO and universal compaction beccause they do not apply to the test // case. Skip MergePut because single delete does not get removed when it // encounters a merge. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut)); } // Disable because not all platform can run it. // It requires more than 9GB memory to run it, With single allocation // of more than 3GB. TEST_F(DBTest, DISABLED_SanitizeVeryVeryLargeValue) { const size_t kValueSize = 4 * size_t{1024 * 1024 * 1024}; // 4GB value std::string raw(kValueSize, 'v'); Options options = CurrentOptions(); options.env = env_; options.merge_operator = MergeOperators::CreatePutOperator(); options.write_buffer_size = 100000; // Small write buffer options.paranoid_checks = true; DestroyAndReopen(options); ASSERT_OK(Put("boo", "v1")); ASSERT_TRUE(Put("foo", raw).IsInvalidArgument()); ASSERT_TRUE(Merge("foo", raw).IsInvalidArgument()); WriteBatch wb; ASSERT_TRUE(wb.Put("foo", raw).IsInvalidArgument()); ASSERT_TRUE(wb.Merge("foo", raw).IsInvalidArgument()); Slice value_slice = raw; Slice key_slice = "foo"; SliceParts sp_key(&key_slice, 1); SliceParts sp_value(&value_slice, 1); ASSERT_TRUE(wb.Put(sp_key, sp_value).IsInvalidArgument()); ASSERT_TRUE(wb.Merge(sp_key, sp_value).IsInvalidArgument()); } // Disable because not all platform can run it. // It requires more than 9GB memory to run it, With single allocation // of more than 3GB. TEST_F(DBTest, DISABLED_VeryLargeValue) { const size_t kValueSize = 3221225472u; // 3GB value const size_t kKeySize = 8388608u; // 8MB key std::string raw(kValueSize, 'v'); std::string key1(kKeySize, 'c'); std::string key2(kKeySize, 'd'); Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; // Small write buffer options.paranoid_checks = true; DestroyAndReopen(options); ASSERT_OK(Put("boo", "v1")); ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put(key1, raw)); raw[0] = 'w'; ASSERT_OK(Put(key2, raw)); dbfull()->TEST_WaitForFlushMemTable(); #ifndef ROCKSDB_LITE ASSERT_EQ(1, NumTableFilesAtLevel(0)); #endif // !ROCKSDB_LITE std::string value; Status s = db_->Get(ReadOptions(), key1, &value); ASSERT_OK(s); ASSERT_EQ(kValueSize, value.size()); ASSERT_EQ('v', value[0]); s = db_->Get(ReadOptions(), key2, &value); ASSERT_OK(s); ASSERT_EQ(kValueSize, value.size()); ASSERT_EQ('w', value[0]); // Compact all files. Flush(); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); // Check DB is not in read-only state. ASSERT_OK(Put("boo", "v1")); s = db_->Get(ReadOptions(), key1, &value); ASSERT_OK(s); ASSERT_EQ(kValueSize, value.size()); ASSERT_EQ('v', value[0]); s = db_->Get(ReadOptions(), key2, &value); ASSERT_OK(s); ASSERT_EQ(kValueSize, value.size()); ASSERT_EQ('w', value[0]); } TEST_F(DBTest, GetFromImmutableLayer) { do { Options options = CurrentOptions(); options.env = env_; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_EQ("v1", Get(1, "foo")); // Block sync calls env_->delay_sstable_sync_.store(true, std::memory_order_release); Put(1, "k1", std::string(100000, 'x')); // Fill memtable Put(1, "k2", std::string(100000, 'y')); // Trigger flush ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("NOT_FOUND", Get(0, "foo")); // Release sync calls env_->delay_sstable_sync_.store(false, std::memory_order_release); } while (ChangeOptions()); } TEST_F(DBTest, GetLevel0Ordering) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); // Check that we process level-0 files in correct order. The code // below generates two level-0 files where the earlier one comes // before the later one in the level-0 file list since the earlier // one has a smaller "smallest" key. ASSERT_OK(Put(1, "bar", "b")); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "foo", "v2")); ASSERT_OK(Flush(1)); ASSERT_EQ("v2", Get(1, "foo")); } while (ChangeOptions()); } TEST_F(DBTest, WrongLevel0Config) { Options options = CurrentOptions(); Close(); ASSERT_OK(DestroyDB(dbname_, options)); options.level0_stop_writes_trigger = 1; options.level0_slowdown_writes_trigger = 2; options.level0_file_num_compaction_trigger = 3; ASSERT_OK(DB::Open(options, dbname_, &db_)); } #ifndef ROCKSDB_LITE TEST_F(DBTest, GetOrderedByLevels) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); Compact(1, "a", "z"); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_OK(Put(1, "foo", "v2")); ASSERT_EQ("v2", Get(1, "foo")); ASSERT_OK(Flush(1)); ASSERT_EQ("v2", Get(1, "foo")); } while (ChangeOptions()); } TEST_F(DBTest, GetPicksCorrectFile) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); // Arrange to have multiple files in a non-level-0 level. ASSERT_OK(Put(1, "a", "va")); Compact(1, "a", "b"); ASSERT_OK(Put(1, "x", "vx")); Compact(1, "x", "y"); ASSERT_OK(Put(1, "f", "vf")); Compact(1, "f", "g"); ASSERT_EQ("va", Get(1, "a")); ASSERT_EQ("vf", Get(1, "f")); ASSERT_EQ("vx", Get(1, "x")); } while (ChangeOptions()); } TEST_F(DBTest, GetEncountersEmptyLevel) { do { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); // Arrange for the following to happen: // * sstable A in level 0 // * nothing in level 1 // * sstable B in level 2 // Then do enough Get() calls to arrange for an automatic compaction // of sstable A. A bug would cause the compaction to be marked as // occurring at level 1 (instead of the correct level 0). // Step 1: First place sstables in levels 0 and 2 Put(1, "a", "begin"); Put(1, "z", "end"); ASSERT_OK(Flush(1)); dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); Put(1, "a", "begin"); Put(1, "z", "end"); ASSERT_OK(Flush(1)); ASSERT_GT(NumTableFilesAtLevel(0, 1), 0); ASSERT_GT(NumTableFilesAtLevel(2, 1), 0); // Step 2: clear level 1 if necessary. dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1); // Step 3: read a bunch of times for (int i = 0; i < 1000; i++) { ASSERT_EQ("NOT_FOUND", Get(1, "missing")); } // Step 4: Wait for compaction to finish dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1); // XXX } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction)); } #endif // ROCKSDB_LITE TEST_F(DBTest, FlushMultipleMemtable) { do { Options options = CurrentOptions(); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; options.max_write_buffer_number = 4; options.min_write_buffer_number_to_merge = 3; options.max_write_buffer_size_to_maintain = -1; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); ASSERT_OK(Flush(1)); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v1", Get(1, "bar")); ASSERT_OK(Flush(1)); } while (ChangeCompactOptions()); } #ifndef ROCKSDB_LITE TEST_F(DBTest, FlushSchedule) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.level0_stop_writes_trigger = 1 << 10; options.level0_slowdown_writes_trigger = 1 << 10; options.min_write_buffer_number_to_merge = 1; options.max_write_buffer_size_to_maintain = static_cast(options.write_buffer_size); options.max_write_buffer_number = 2; options.write_buffer_size = 120 * 1024; CreateAndReopenWithCF({"pikachu"}, options); std::vector threads; std::atomic thread_num(0); // each column family will have 5 thread, each thread generating 2 memtables. // each column family should end up with 10 table files std::function fill_memtable_func = [&]() { int a = thread_num.fetch_add(1); Random rnd(a); WriteOptions wo; // this should fill up 2 memtables for (int k = 0; k < 5000; ++k) { ASSERT_OK(db_->Put(wo, handles_[a & 1], RandomString(&rnd, 13), "")); } }; for (int i = 0; i < 10; ++i) { threads.emplace_back(fill_memtable_func); } for (auto& t : threads) { t.join(); } auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default"); auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu"); ASSERT_LE(default_tables, static_cast(10)); ASSERT_GT(default_tables, static_cast(0)); ASSERT_LE(pikachu_tables, static_cast(10)); ASSERT_GT(pikachu_tables, static_cast(0)); } #endif // ROCKSDB_LITE namespace { class KeepFilter : public CompactionFilter { public: bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { return false; } const char* Name() const override { return "KeepFilter"; } }; class KeepFilterFactory : public CompactionFilterFactory { public: explicit KeepFilterFactory(bool check_context = false) : check_context_(check_context) {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { if (check_context_) { EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction); EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction); } return std::unique_ptr(new KeepFilter()); } const char* Name() const override { return "KeepFilterFactory"; } bool check_context_; std::atomic_bool expect_full_compaction_; std::atomic_bool expect_manual_compaction_; }; class DelayFilter : public CompactionFilter { public: explicit DelayFilter(DBTestBase* d) : db_test(d) {} bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { db_test->env_->addon_time_.fetch_add(1000); return true; } const char* Name() const override { return "DelayFilter"; } private: DBTestBase* db_test; }; class DelayFilterFactory : public CompactionFilterFactory { public: explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { return std::unique_ptr(new DelayFilter(db_test)); } const char* Name() const override { return "DelayFilterFactory"; } private: DBTestBase* db_test; }; } // namespace #ifndef ROCKSDB_LITE static std::string CompressibleString(Random* rnd, int len) { std::string r; test::CompressibleString(rnd, 0.8, len, &r); return r; } #endif // ROCKSDB_LITE TEST_F(DBTest, FailMoreDbPaths) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 10000000); options.db_paths.emplace_back(dbname_ + "_2", 1000000); options.db_paths.emplace_back(dbname_ + "_3", 1000000); options.db_paths.emplace_back(dbname_ + "_4", 1000000); options.db_paths.emplace_back(dbname_ + "_5", 1000000); ASSERT_TRUE(TryReopen(options).IsNotSupported()); } void CheckColumnFamilyMeta( const ColumnFamilyMetaData& cf_meta, const std::vector>& files_by_level, uint64_t start_time, uint64_t end_time) { ASSERT_EQ(cf_meta.name, kDefaultColumnFamilyName); ASSERT_EQ(cf_meta.levels.size(), files_by_level.size()); uint64_t cf_size = 0; size_t file_count = 0; for (size_t i = 0; i < cf_meta.levels.size(); ++i) { const auto& level_meta_from_cf = cf_meta.levels[i]; const auto& level_meta_from_files = files_by_level[i]; ASSERT_EQ(level_meta_from_cf.level, i); ASSERT_EQ(level_meta_from_cf.files.size(), level_meta_from_files.size()); file_count += level_meta_from_cf.files.size(); uint64_t level_size = 0; for (size_t j = 0; j < level_meta_from_cf.files.size(); ++j) { const auto& file_meta_from_cf = level_meta_from_cf.files[j]; const auto& file_meta_from_files = level_meta_from_files[j]; level_size += file_meta_from_cf.size; ASSERT_EQ(file_meta_from_cf.file_number, file_meta_from_files.fd.GetNumber()); ASSERT_EQ(file_meta_from_cf.file_number, TableFileNameToNumber(file_meta_from_cf.name)); ASSERT_EQ(file_meta_from_cf.size, file_meta_from_files.fd.file_size); ASSERT_EQ(file_meta_from_cf.smallest_seqno, file_meta_from_files.fd.smallest_seqno); ASSERT_EQ(file_meta_from_cf.largest_seqno, file_meta_from_files.fd.largest_seqno); ASSERT_EQ(file_meta_from_cf.smallestkey, file_meta_from_files.smallest.user_key().ToString()); ASSERT_EQ(file_meta_from_cf.largestkey, file_meta_from_files.largest.user_key().ToString()); ASSERT_EQ(file_meta_from_cf.oldest_blob_file_number, file_meta_from_files.oldest_blob_file_number); ASSERT_EQ(file_meta_from_cf.oldest_ancester_time, file_meta_from_files.oldest_ancester_time); ASSERT_EQ(file_meta_from_cf.file_creation_time, file_meta_from_files.file_creation_time); ASSERT_GE(file_meta_from_cf.file_creation_time, start_time); ASSERT_LE(file_meta_from_cf.file_creation_time, end_time); ASSERT_GE(file_meta_from_cf.oldest_ancester_time, start_time); ASSERT_LE(file_meta_from_cf.oldest_ancester_time, end_time); } ASSERT_EQ(level_meta_from_cf.size, level_size); cf_size += level_size; } ASSERT_EQ(cf_meta.file_count, file_count); ASSERT_EQ(cf_meta.size, cf_size); } void CheckLiveFilesMeta( const std::vector& live_file_meta, const std::vector>& files_by_level) { size_t total_file_count = 0; for (const auto& f : files_by_level) { total_file_count += f.size(); } ASSERT_EQ(live_file_meta.size(), total_file_count); int level = 0; int i = 0; for (const auto& meta : live_file_meta) { if (level != meta.level) { level = meta.level; i = 0; } ASSERT_LT(i, files_by_level[level].size()); const auto& expected_meta = files_by_level[level][i]; ASSERT_EQ(meta.column_family_name, kDefaultColumnFamilyName); ASSERT_EQ(meta.file_number, expected_meta.fd.GetNumber()); ASSERT_EQ(meta.file_number, TableFileNameToNumber(meta.name)); ASSERT_EQ(meta.size, expected_meta.fd.file_size); ASSERT_EQ(meta.smallest_seqno, expected_meta.fd.smallest_seqno); ASSERT_EQ(meta.largest_seqno, expected_meta.fd.largest_seqno); ASSERT_EQ(meta.smallestkey, expected_meta.smallest.user_key().ToString()); ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString()); ASSERT_EQ(meta.oldest_blob_file_number, expected_meta.oldest_blob_file_number); ++i; } } #ifndef ROCKSDB_LITE TEST_F(DBTest, MetaDataTest) { Options options = CurrentOptions(); options.create_if_missing = true; options.disable_auto_compactions = true; int64_t temp_time = 0; options.env->GetCurrentTime(&temp_time); uint64_t start_time = static_cast(temp_time); DestroyAndReopen(options); Random rnd(301); int key_index = 0; for (int i = 0; i < 100; ++i) { // Add a single blob reference to each file std::string blob_index; BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000, /* offset */ 1234, /* size */ 5678, kNoCompression); WriteBatch batch; ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index), blob_index)); ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); ++key_index; // Fill up the rest of the file with random values. GenerateNewFile(&rnd, &key_index, /* nowait */ true); Flush(); } std::vector> files_by_level; dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level); options.env->GetCurrentTime(&temp_time); uint64_t end_time = static_cast(temp_time); ColumnFamilyMetaData cf_meta; db_->GetColumnFamilyMetaData(&cf_meta); CheckColumnFamilyMeta(cf_meta, files_by_level, start_time, end_time); std::vector live_file_meta; db_->GetLiveFilesMetaData(&live_file_meta); CheckLiveFilesMeta(live_file_meta, files_by_level); } namespace { void MinLevelHelper(DBTest* self, Options& options) { Random rnd(301); for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { std::vector values; // Write 120KB (12 values, each 10K) for (int i = 0; i < 12; i++) { values.push_back(DBTestBase::RandomString(&rnd, 10000)); ASSERT_OK(self->Put(DBTestBase::Key(i), values[i])); } self->dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1); } // generate one more file in level-0, and should trigger level-0 compaction std::vector values; for (int i = 0; i < 12; i++) { values.push_back(DBTestBase::RandomString(&rnd, 10000)); ASSERT_OK(self->Put(DBTestBase::Key(i), values[i])); } self->dbfull()->TEST_WaitForCompact(); ASSERT_EQ(self->NumTableFilesAtLevel(0), 0); ASSERT_EQ(self->NumTableFilesAtLevel(1), 1); } // returns false if the calling-Test should be skipped bool MinLevelToCompress(CompressionType& type, Options& options, int wbits, int lev, int strategy) { fprintf(stderr, "Test with compression options : window_bits = %d, level = %d, " "strategy = %d}\n", wbits, lev, strategy); options.write_buffer_size = 100 << 10; // 100KB options.arena_block_size = 4096; options.num_levels = 3; options.level0_file_num_compaction_trigger = 3; options.create_if_missing = true; if (Snappy_Supported()) { type = kSnappyCompression; fprintf(stderr, "using snappy\n"); } else if (Zlib_Supported()) { type = kZlibCompression; fprintf(stderr, "using zlib\n"); } else if (BZip2_Supported()) { type = kBZip2Compression; fprintf(stderr, "using bzip2\n"); } else if (LZ4_Supported()) { type = kLZ4Compression; fprintf(stderr, "using lz4\n"); } else if (XPRESS_Supported()) { type = kXpressCompression; fprintf(stderr, "using xpress\n"); } else if (ZSTD_Supported()) { type = kZSTD; fprintf(stderr, "using ZSTD\n"); } else { fprintf(stderr, "skipping test, compression disabled\n"); return false; } options.compression_per_level.resize(options.num_levels); // do not compress L0 for (int i = 0; i < 1; i++) { options.compression_per_level[i] = kNoCompression; } for (int i = 1; i < options.num_levels; i++) { options.compression_per_level[i] = type; } return true; } } // namespace TEST_F(DBTest, MinLevelToCompress1) { Options options = CurrentOptions(); CompressionType type = kSnappyCompression; if (!MinLevelToCompress(type, options, -14, -1, 0)) { return; } Reopen(options); MinLevelHelper(this, options); // do not compress L0 and L1 for (int i = 0; i < 2; i++) { options.compression_per_level[i] = kNoCompression; } for (int i = 2; i < options.num_levels; i++) { options.compression_per_level[i] = type; } DestroyAndReopen(options); MinLevelHelper(this, options); } TEST_F(DBTest, MinLevelToCompress2) { Options options = CurrentOptions(); CompressionType type = kSnappyCompression; if (!MinLevelToCompress(type, options, 15, -1, 0)) { return; } Reopen(options); MinLevelHelper(this, options); // do not compress L0 and L1 for (int i = 0; i < 2; i++) { options.compression_per_level[i] = kNoCompression; } for (int i = 2; i < options.num_levels; i++) { options.compression_per_level[i] = type; } DestroyAndReopen(options); MinLevelHelper(this, options); } // This test may fail because of a legit case that multiple L0 files // are trivial moved to L1. TEST_F(DBTest, DISABLED_RepeatedWritesToSameKey) { do { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; // Small write buffer CreateAndReopenWithCF({"pikachu"}, options); // We must have at most one file per level except for level-0, // which may have up to kL0_StopWritesTrigger files. const int kMaxFiles = options.num_levels + options.level0_stop_writes_trigger; Random rnd(301); std::string value = RandomString(&rnd, static_cast(2 * options.write_buffer_size)); for (int i = 0; i < 5 * kMaxFiles; i++) { ASSERT_OK(Put(1, "key", value)); ASSERT_LE(TotalTableFiles(1), kMaxFiles); } } while (ChangeCompactOptions()); } #endif // ROCKSDB_LITE TEST_F(DBTest, SparseMerge) { do { Options options = CurrentOptions(); options.compression = kNoCompression; CreateAndReopenWithCF({"pikachu"}, options); FillLevels("A", "Z", 1); // Suppose there is: // small amount of data with prefix A // large amount of data with prefix B // small amount of data with prefix C // and that recent updates have made small changes to all three prefixes. // Check that we do not do a compaction that merges all of B in one shot. const std::string value(1000, 'x'); Put(1, "A", "va"); // Write approximately 100MB of "B" values for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); Put(1, key, value); } Put(1, "C", "vc"); ASSERT_OK(Flush(1)); dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); // Make sparse update Put(1, "A", "va2"); Put(1, "B100", "bvalue2"); Put(1, "C", "vc2"); ASSERT_OK(Flush(1)); // Compactions should not cause us to create a situation where // a file overlaps too much data at the next level. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]), 20 * 1048576); dbfull()->TEST_CompactRange(0, nullptr, nullptr); ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]), 20 * 1048576); dbfull()->TEST_CompactRange(1, nullptr, nullptr); ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]), 20 * 1048576); } while (ChangeCompactOptions()); } #ifndef ROCKSDB_LITE static bool Between(uint64_t val, uint64_t low, uint64_t high) { bool result = (val >= low) && (val <= high); if (!result) { fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", (unsigned long long)(val), (unsigned long long)(low), (unsigned long long)(high)); } return result; } TEST_F(DBTest, ApproximateSizesMemTable) { Options options = CurrentOptions(); options.write_buffer_size = 100000000; // Large write buffer options.compression = kNoCompression; options.create_if_missing = true; DestroyAndReopen(options); auto default_cf = db_->DefaultColumnFamily(); const int N = 128; Random rnd(301); for (int i = 0; i < N; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024))); } uint64_t size; std::string start = Key(50); std::string end = Key(60); Range r(start, end); SizeApproximationOptions size_approx_options; size_approx_options.include_memtabtles = true; size_approx_options.include_files = true; db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_GT(size, 6000); ASSERT_LT(size, 204800); // Zero if not including mem table db_->GetApproximateSizes(&r, 1, &size); ASSERT_EQ(size, 0); start = Key(500); end = Key(600); r = Range(start, end); db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_EQ(size, 0); for (int i = 0; i < N; i++) { ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024))); } start = Key(500); end = Key(600); r = Range(start, end); db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_EQ(size, 0); start = Key(100); end = Key(1020); r = Range(start, end); db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_GT(size, 6000); options.max_write_buffer_number = 8; options.min_write_buffer_number_to_merge = 5; options.write_buffer_size = 1024 * N; // Not very large DestroyAndReopen(options); default_cf = db_->DefaultColumnFamily(); int keys[N * 3]; for (int i = 0; i < N; i++) { keys[i * 3] = i * 5; keys[i * 3 + 1] = i * 5 + 1; keys[i * 3 + 2] = i * 5 + 2; } RandomShuffle(std::begin(keys), std::end(keys)); for (int i = 0; i < N * 3; i++) { ASSERT_OK(Put(Key(keys[i] + 1000), RandomString(&rnd, 1024))); } start = Key(100); end = Key(300); r = Range(start, end); db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_EQ(size, 0); start = Key(1050); end = Key(1080); r = Range(start, end); db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_GT(size, 6000); start = Key(2100); end = Key(2300); r = Range(start, end); db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_EQ(size, 0); start = Key(1050); end = Key(1080); r = Range(start, end); uint64_t size_with_mt, size_without_mt; db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size_with_mt); ASSERT_GT(size_with_mt, 6000); db_->GetApproximateSizes(&r, 1, &size_without_mt); ASSERT_EQ(size_without_mt, 0); Flush(); for (int i = 0; i < N; i++) { ASSERT_OK(Put(Key(i + 1000), RandomString(&rnd, 1024))); } start = Key(1050); end = Key(1080); r = Range(start, end); db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size_with_mt); db_->GetApproximateSizes(&r, 1, &size_without_mt); ASSERT_GT(size_with_mt, size_without_mt); ASSERT_GT(size_without_mt, 6000); // Check that include_memtabtles flag works as expected size_approx_options.include_memtabtles = false; db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_EQ(size, size_without_mt); // Check that files_size_error_margin works as expected, when the heuristic // conditions are not met start = Key(1); end = Key(1000 + N - 2); r = Range(start, end); size_approx_options.files_size_error_margin = -1.0; // disabled db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); uint64_t size2; size_approx_options.files_size_error_margin = 0.5; // enabled, but not used db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2); ASSERT_EQ(size, size2); } TEST_F(DBTest, ApproximateSizesFilesWithErrorMargin) { // Roughly 4 keys per data block, 1000 keys per file, // with filter substantially larger than a data block BlockBasedTableOptions table_options; table_options.filter_policy.reset(NewBloomFilterPolicy(16)); table_options.block_size = 100; Options options = CurrentOptions(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.write_buffer_size = 24 * 1024; options.compression = kNoCompression; options.create_if_missing = true; options.target_file_size_base = 24 * 1024; DestroyAndReopen(options); const auto default_cf = db_->DefaultColumnFamily(); const int N = 64000; Random rnd(301); for (int i = 0; i < N; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 24))); } // Flush everything to files Flush(); // Compact the entire key space into the next level db_->CompactRange(CompactRangeOptions(), default_cf, nullptr, nullptr); // Write more keys for (int i = N; i < (N + N / 4); i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 24))); } // Flush everything to files again Flush(); // Wait for compaction to finish ASSERT_OK(dbfull()->TEST_WaitForCompact()); { const std::string start = Key(0); const std::string end = Key(2 * N); const Range r(start, end); SizeApproximationOptions size_approx_options; size_approx_options.include_memtabtles = false; size_approx_options.include_files = true; size_approx_options.files_size_error_margin = -1.0; // disabled // Get the precise size without any approximation heuristic uint64_t size; db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size); ASSERT_NE(size, 0); // Get the size with an approximation heuristic uint64_t size2; const double error_margin = 0.2; size_approx_options.files_size_error_margin = error_margin; db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size2); ASSERT_LT(size2, size * (1 + error_margin)); ASSERT_GT(size2, size * (1 - error_margin)); } { // Ensure that metadata is not falsely attributed only to the last data in // the file. (In some applications, filters can be large portion of data // size.) // Perform many queries over small range, enough to ensure crossing file // boundary, and make sure we never see a spike for large filter. for (int i = 0; i < 3000; i += 10) { const std::string start = Key(i); const std::string end = Key(i + 11); // overlap by 1 key const Range r(start, end); uint64_t size; db_->GetApproximateSizes(&r, 1, &size); ASSERT_LE(size, 11 * 100); } } } TEST_F(DBTest, GetApproximateMemTableStats) { Options options = CurrentOptions(); options.write_buffer_size = 100000000; options.compression = kNoCompression; options.create_if_missing = true; DestroyAndReopen(options); const int N = 128; Random rnd(301); for (int i = 0; i < N; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024))); } uint64_t count; uint64_t size; std::string start = Key(50); std::string end = Key(60); Range r(start, end); db_->GetApproximateMemTableStats(r, &count, &size); ASSERT_GT(count, 0); ASSERT_LE(count, N); ASSERT_GT(size, 6000); ASSERT_LT(size, 204800); start = Key(500); end = Key(600); r = Range(start, end); db_->GetApproximateMemTableStats(r, &count, &size); ASSERT_EQ(count, 0); ASSERT_EQ(size, 0); Flush(); start = Key(50); end = Key(60); r = Range(start, end); db_->GetApproximateMemTableStats(r, &count, &size); ASSERT_EQ(count, 0); ASSERT_EQ(size, 0); for (int i = 0; i < N; i++) { ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024))); } start = Key(100); end = Key(1020); r = Range(start, end); db_->GetApproximateMemTableStats(r, &count, &size); ASSERT_GT(count, 20); ASSERT_GT(size, 6000); } TEST_F(DBTest, ApproximateSizes) { do { Options options = CurrentOptions(); options.write_buffer_size = 100000000; // Large write buffer options.compression = kNoCompression; options.create_if_missing = true; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0)); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0)); // Write 8MB (80 values, each 100K) ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); const int N = 80; static const int S1 = 100000; static const int S2 = 105000; // Allow some expansion from metadata Random rnd(301); for (int i = 0; i < N; i++) { ASSERT_OK(Put(1, Key(i), RandomString(&rnd, S1))); } // 0 because GetApproximateSizes() does not account for memtable space ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0)); // Check sizes across recovery by reopening a few times for (int run = 0; run < 3; run++) { ReopenWithColumnFamilies({"default", "pikachu"}, options); for (int compact_start = 0; compact_start < N; compact_start += 10) { for (int i = 0; i < N; i += 10) { ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i)); ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1), S2 * (i + 1))); ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10)); } ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50)); ASSERT_TRUE( Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50)); std::string cstart_str = Key(compact_start); std::string cend_str = Key(compact_start + 9); Slice cstart = cstart_str; Slice cend = cend_str; dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]); } ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_GT(NumTableFilesAtLevel(1, 1), 0); } // ApproximateOffsetOf() is not yet implemented in plain table format. } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction | kSkipPlainTable | kSkipHashIndex)); } TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) { do { Options options = CurrentOptions(); options.compression = kNoCompression; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); std::string big1 = RandomString(&rnd, 100000); ASSERT_OK(Put(1, Key(0), RandomString(&rnd, 10000))); ASSERT_OK(Put(1, Key(1), RandomString(&rnd, 10000))); ASSERT_OK(Put(1, Key(2), big1)); ASSERT_OK(Put(1, Key(3), RandomString(&rnd, 10000))); ASSERT_OK(Put(1, Key(4), big1)); ASSERT_OK(Put(1, Key(5), RandomString(&rnd, 10000))); ASSERT_OK(Put(1, Key(6), RandomString(&rnd, 300000))); ASSERT_OK(Put(1, Key(7), RandomString(&rnd, 10000))); // Check sizes across recovery by reopening a few times for (int run = 0; run < 3; run++) { ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0)); ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000)); ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000)); ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000)); ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000)); ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 232000)); ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 242000)); // Ensure some overhead is accounted for, even without including all ASSERT_TRUE(Between(Size("", Key(7), 1), 540500, 545000)); ASSERT_TRUE(Between(Size("", Key(8), 1), 550500, 555000)); ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110100, 111000)); dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); } // ApproximateOffsetOf() is not yet implemented in plain table format. } while (ChangeOptions(kSkipPlainTable)); } #endif // ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBTest, Snapshot) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); Put(0, "foo", "0v1"); Put(1, "foo", "1v1"); const Snapshot* s1 = db_->GetSnapshot(); ASSERT_EQ(1U, GetNumSnapshots()); uint64_t time_snap1 = GetTimeOldestSnapshots(); ASSERT_GT(time_snap1, 0U); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); Put(0, "foo", "0v2"); Put(1, "foo", "1v2"); env_->addon_time_.fetch_add(1); const Snapshot* s2 = db_->GetSnapshot(); ASSERT_EQ(2U, GetNumSnapshots()); ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); Put(0, "foo", "0v3"); Put(1, "foo", "1v3"); { ManagedSnapshot s3(db_); ASSERT_EQ(3U, GetNumSnapshots()); ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); Put(0, "foo", "0v4"); Put(1, "foo", "1v4"); ASSERT_EQ("0v1", Get(0, "foo", s1)); ASSERT_EQ("1v1", Get(1, "foo", s1)); ASSERT_EQ("0v2", Get(0, "foo", s2)); ASSERT_EQ("1v2", Get(1, "foo", s2)); ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot())); ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot())); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); } ASSERT_EQ(2U, GetNumSnapshots()); ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); ASSERT_EQ("0v1", Get(0, "foo", s1)); ASSERT_EQ("1v1", Get(1, "foo", s1)); ASSERT_EQ("0v2", Get(0, "foo", s2)); ASSERT_EQ("1v2", Get(1, "foo", s2)); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); db_->ReleaseSnapshot(s1); ASSERT_EQ("0v2", Get(0, "foo", s2)); ASSERT_EQ("1v2", Get(1, "foo", s2)); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); ASSERT_EQ(1U, GetNumSnapshots()); ASSERT_LT(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s2->GetSequenceNumber()); db_->ReleaseSnapshot(s2); ASSERT_EQ(0U, GetNumSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), 0); ASSERT_EQ("0v4", Get(0, "foo")); ASSERT_EQ("1v4", Get(1, "foo")); } while (ChangeOptions()); } TEST_F(DBTest, HiddenValuesAreRemoved) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { Options options = CurrentOptions(options_override); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); FillLevels("a", "z", 1); std::string big = RandomString(&rnd, 50000); Put(1, "foo", big); Put(1, "pastfoo", "v"); const Snapshot* snapshot = db_->GetSnapshot(); Put(1, "foo", "tiny"); Put(1, "pastfoo2", "v2"); // Advance sequence number one more ASSERT_OK(Flush(1)); ASSERT_GT(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(big, Get(1, "foo", snapshot)); ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000)); db_->ReleaseSnapshot(snapshot); ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]"); Slice x("x"); dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]); ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]"); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_GE(NumTableFilesAtLevel(1, 1), 1); dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]); ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]"); ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000)); // ApproximateOffsetOf() is not yet implemented in plain table format, // which is used by Size(). } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction | kSkipPlainTable)); } #endif // ROCKSDB_LITE TEST_F(DBTest, UnremovableSingleDelete) { // If we compact: // // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2) // // We do not want to end up with: // // Put(A, v1) Snapshot Put(A, v2) // // Because a subsequent SingleDelete(A) would delete the Put(A, v2) // but not Put(A, v1), so Get(A) would return v1. anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { Options options = CurrentOptions(options_override); options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); Put(1, "foo", "first"); const Snapshot* snapshot = db_->GetSnapshot(); SingleDelete(1, "foo"); Put(1, "foo", "second"); ASSERT_OK(Flush(1)); ASSERT_EQ("first", Get(1, "foo", snapshot)); ASSERT_EQ("second", Get(1, "foo")); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1)); SingleDelete(1, "foo"); ASSERT_EQ("first", Get(1, "foo", snapshot)); ASSERT_EQ("NOT_FOUND", Get(1, "foo")); dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ("first", Get(1, "foo", snapshot)); ASSERT_EQ("NOT_FOUND", Get(1, "foo")); db_->ReleaseSnapshot(snapshot); // Skip FIFO and universal compaction beccause they do not apply to the test // case. Skip MergePut because single delete does not get removed when it // encounters a merge. } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut)); } #ifndef ROCKSDB_LITE TEST_F(DBTest, DeletionMarkers1) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); Put(1, "foo", "v1"); ASSERT_OK(Flush(1)); const int last = 2; MoveFilesToLevel(last, 1); // foo => v1 is now in last level ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); // Place a table at level last-1 to prevent merging with preceding mutation Put(1, "a", "begin"); Put(1, "z", "end"); Flush(1); MoveFilesToLevel(last - 1, 1); ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1); Delete(1, "foo"); Put(1, "foo", "v2"); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]"); ASSERT_OK(Flush(1)); // Moves to level last-2 ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]"); Slice z("z"); dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]); // DEL eliminated, but v1 remains because we aren't compacting that level // (DEL can be eliminated because v2 hides v1). ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]"); dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]); // Merging last-1 w/ last, so we are the base level for "foo", so // DEL is removed. (as is v1). ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]"); } TEST_F(DBTest, DeletionMarkers2) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); Put(1, "foo", "v1"); ASSERT_OK(Flush(1)); const int last = 2; MoveFilesToLevel(last, 1); // foo => v1 is now in last level ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); // Place a table at level last-1 to prevent merging with preceding mutation Put(1, "a", "begin"); Put(1, "z", "end"); Flush(1); MoveFilesToLevel(last - 1, 1); ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1); ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1); Delete(1, "foo"); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]"); ASSERT_OK(Flush(1)); // Moves to level last-2 ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]"); dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]); // DEL kept: "last" file overlaps ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]"); dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]); // Merging last-1 w/ last, so we are the base level for "foo", so // DEL is removed. (as is v1). ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); } TEST_F(DBTest, OverlapInLevel0) { do { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); // Fill levels 1 and 2 to disable the pushing of new memtables to levels > // 0. ASSERT_OK(Put(1, "100", "v100")); ASSERT_OK(Put(1, "999", "v999")); Flush(1); MoveFilesToLevel(2, 1); ASSERT_OK(Delete(1, "100")); ASSERT_OK(Delete(1, "999")); Flush(1); MoveFilesToLevel(1, 1); ASSERT_EQ("0,1,1", FilesPerLevel(1)); // Make files spanning the following ranges in level-0: // files[0] 200 .. 900 // files[1] 300 .. 500 // Note that files are sorted by smallest key. ASSERT_OK(Put(1, "300", "v300")); ASSERT_OK(Put(1, "500", "v500")); Flush(1); ASSERT_OK(Put(1, "200", "v200")); ASSERT_OK(Put(1, "600", "v600")); ASSERT_OK(Put(1, "900", "v900")); Flush(1); ASSERT_EQ("2,1,1", FilesPerLevel(1)); // Compact away the placeholder files we created initially dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]); ASSERT_EQ("2", FilesPerLevel(1)); // Do a memtable compaction. Before bug-fix, the compaction would // not detect the overlap with level-0 files and would incorrectly place // the deletion in a deeper level. ASSERT_OK(Delete(1, "600")); Flush(1); ASSERT_EQ("3", FilesPerLevel(1)); ASSERT_EQ("NOT_FOUND", Get(1, "600")); } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction)); } #endif // ROCKSDB_LITE TEST_F(DBTest, ComparatorCheck) { class NewComparator : public Comparator { public: const char* Name() const override { return "rocksdb.NewComparator"; } int Compare(const Slice& a, const Slice& b) const override { return BytewiseComparator()->Compare(a, b); } void FindShortestSeparator(std::string* s, const Slice& l) const override { BytewiseComparator()->FindShortestSeparator(s, l); } void FindShortSuccessor(std::string* key) const override { BytewiseComparator()->FindShortSuccessor(key); } }; Options new_options, options; NewComparator cmp; do { options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); new_options = CurrentOptions(); new_options.comparator = &cmp; // only the non-default column family has non-matching comparator Status s = TryReopenWithColumnFamilies( {"default", "pikachu"}, std::vector({options, new_options})); ASSERT_TRUE(!s.ok()); ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) << s.ToString(); } while (ChangeCompactOptions()); } TEST_F(DBTest, CustomComparator) { class NumberComparator : public Comparator { public: const char* Name() const override { return "test.NumberComparator"; } int Compare(const Slice& a, const Slice& b) const override { return ToNumber(a) - ToNumber(b); } void FindShortestSeparator(std::string* s, const Slice& l) const override { ToNumber(*s); // Check format ToNumber(l); // Check format } void FindShortSuccessor(std::string* key) const override { ToNumber(*key); // Check format } private: static int ToNumber(const Slice& x) { // Check that there are no extra characters. EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']') << EscapeString(x); int val; char ignored; EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1) << EscapeString(x); return val; } }; Options new_options; NumberComparator cmp; do { new_options = CurrentOptions(); new_options.create_if_missing = true; new_options.comparator = &cmp; new_options.write_buffer_size = 4096; // Compact more often new_options.arena_block_size = 4096; new_options = CurrentOptions(new_options); DestroyAndReopen(new_options); CreateAndReopenWithCF({"pikachu"}, new_options); ASSERT_OK(Put(1, "[10]", "ten")); ASSERT_OK(Put(1, "[0x14]", "twenty")); for (int i = 0; i < 2; i++) { ASSERT_EQ("ten", Get(1, "[10]")); ASSERT_EQ("ten", Get(1, "[0xa]")); ASSERT_EQ("twenty", Get(1, "[20]")); ASSERT_EQ("twenty", Get(1, "[0x14]")); ASSERT_EQ("NOT_FOUND", Get(1, "[15]")); ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]")); Compact(1, "[0]", "[9999]"); } for (int run = 0; run < 2; run++) { for (int i = 0; i < 1000; i++) { char buf[100]; snprintf(buf, sizeof(buf), "[%d]", i * 10); ASSERT_OK(Put(1, buf, buf)); } Compact(1, "[0]", "[1000000]"); } } while (ChangeCompactOptions()); } TEST_F(DBTest, DBOpen_Options) { Options options = CurrentOptions(); std::string dbname = test::PerThreadDBPath("db_options_test"); ASSERT_OK(DestroyDB(dbname, options)); // Does not exist, and create_if_missing == false: error DB* db = nullptr; options.create_if_missing = false; Status s = DB::Open(options, dbname, &db); ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr); ASSERT_TRUE(db == nullptr); // Does not exist, and create_if_missing == true: OK options.create_if_missing = true; s = DB::Open(options, dbname, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); delete db; db = nullptr; // Does exist, and error_if_exists == true: error options.create_if_missing = false; options.error_if_exists = true; s = DB::Open(options, dbname, &db); ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr); ASSERT_TRUE(db == nullptr); // Does exist, and error_if_exists == false: OK options.create_if_missing = true; options.error_if_exists = false; s = DB::Open(options, dbname, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); delete db; db = nullptr; } TEST_F(DBTest, DBOpen_Change_NumLevels) { Options options = CurrentOptions(); options.create_if_missing = true; DestroyAndReopen(options); ASSERT_TRUE(db_ != nullptr); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "a", "123")); ASSERT_OK(Put(1, "b", "234")); Flush(1); MoveFilesToLevel(3, 1); Close(); options.create_if_missing = false; options.num_levels = 2; Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr); ASSERT_TRUE(db_ == nullptr); } TEST_F(DBTest, DestroyDBMetaDatabase) { std::string dbname = test::PerThreadDBPath("db_meta"); ASSERT_OK(env_->CreateDirIfMissing(dbname)); std::string metadbname = MetaDatabaseName(dbname, 0); ASSERT_OK(env_->CreateDirIfMissing(metadbname)); std::string metametadbname = MetaDatabaseName(metadbname, 0); ASSERT_OK(env_->CreateDirIfMissing(metametadbname)); // Destroy previous versions if they exist. Using the long way. Options options = CurrentOptions(); ASSERT_OK(DestroyDB(metametadbname, options)); ASSERT_OK(DestroyDB(metadbname, options)); ASSERT_OK(DestroyDB(dbname, options)); // Setup databases DB* db = nullptr; ASSERT_OK(DB::Open(options, dbname, &db)); delete db; db = nullptr; ASSERT_OK(DB::Open(options, metadbname, &db)); delete db; db = nullptr; ASSERT_OK(DB::Open(options, metametadbname, &db)); delete db; db = nullptr; // Delete databases ASSERT_OK(DestroyDB(dbname, options)); // Check if deletion worked. options.create_if_missing = false; ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok()); ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok()); ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok()); } #ifndef ROCKSDB_LITE TEST_F(DBTest, SnapshotFiles) { do { Options options = CurrentOptions(); options.write_buffer_size = 100000000; // Large write buffer CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); // Write 8MB (80 values, each 100K) ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); std::vector values; for (int i = 0; i < 80; i++) { values.push_back(RandomString(&rnd, 100000)); ASSERT_OK(Put((i < 40), Key(i), values[i])); } // assert that nothing makes it to disk yet. ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); // get a file snapshot uint64_t manifest_number = 0; uint64_t manifest_size = 0; std::vector files; dbfull()->DisableFileDeletions(); dbfull()->GetLiveFiles(files, &manifest_size); // CURRENT, MANIFEST, OPTIONS, *.sst files (one for each CF) ASSERT_EQ(files.size(), 5U); uint64_t number = 0; FileType type; // copy these files to a new snapshot directory std::string snapdir = dbname_ + ".snapdir/"; ASSERT_OK(env_->CreateDirIfMissing(snapdir)); for (size_t i = 0; i < files.size(); i++) { // our clients require that GetLiveFiles returns // files with "/" as first character! ASSERT_EQ(files[i][0], '/'); std::string src = dbname_ + files[i]; std::string dest = snapdir + files[i]; uint64_t size; ASSERT_OK(env_->GetFileSize(src, &size)); // record the number and the size of the // latest manifest file if (ParseFileName(files[i].substr(1), &number, &type)) { if (type == kDescriptorFile) { if (number > manifest_number) { manifest_number = number; ASSERT_GE(size, manifest_size); size = manifest_size; // copy only valid MANIFEST data } } } CopyFile(src, dest, size); } // release file snapshot dbfull()->DisableFileDeletions(); // overwrite one key, this key should not appear in the snapshot std::vector extras; for (unsigned int i = 0; i < 1; i++) { extras.push_back(RandomString(&rnd, 100000)); ASSERT_OK(Put(0, Key(i), extras[i])); } // verify that data in the snapshot are correct std::vector column_families; column_families.emplace_back("default", ColumnFamilyOptions()); column_families.emplace_back("pikachu", ColumnFamilyOptions()); std::vector cf_handles; DB* snapdb; DBOptions opts; opts.env = env_; opts.create_if_missing = false; Status stat = DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb); ASSERT_OK(stat); ReadOptions roptions; std::string val; for (unsigned int i = 0; i < 80; i++) { stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val); ASSERT_EQ(values[i].compare(val), 0); } for (auto cfh : cf_handles) { delete cfh; } delete snapdb; // look at the new live files after we added an 'extra' key // and after we took the first snapshot. uint64_t new_manifest_number = 0; uint64_t new_manifest_size = 0; std::vector newfiles; dbfull()->DisableFileDeletions(); dbfull()->GetLiveFiles(newfiles, &new_manifest_size); // find the new manifest file. assert that this manifest file is // the same one as in the previous snapshot. But its size should be // larger because we added an extra key after taking the // previous shapshot. for (size_t i = 0; i < newfiles.size(); i++) { std::string src = dbname_ + "/" + newfiles[i]; // record the lognumber and the size of the // latest manifest file if (ParseFileName(newfiles[i].substr(1), &number, &type)) { if (type == kDescriptorFile) { if (number > new_manifest_number) { uint64_t size; new_manifest_number = number; ASSERT_OK(env_->GetFileSize(src, &size)); ASSERT_GE(size, new_manifest_size); } } } } ASSERT_EQ(manifest_number, new_manifest_number); ASSERT_GT(new_manifest_size, manifest_size); // release file snapshot dbfull()->DisableFileDeletions(); } while (ChangeCompactOptions()); } TEST_F(DBTest, ReadonlyDBGetLiveManifestSize) { do { Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 2; DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); ASSERT_OK(dbfull()->TEST_WaitForCompact()); Close(); ASSERT_OK(ReadOnlyReopen(options)); uint64_t manifest_size = 0; std::vector files; dbfull()->GetLiveFiles(files, &manifest_size); for (const std::string& f : files) { uint64_t number = 0; FileType type; if (ParseFileName(f.substr(1), &number, &type)) { if (type == kDescriptorFile) { uint64_t size_on_disk; env_->GetFileSize(dbname_ + "/" + f, &size_on_disk); ASSERT_EQ(manifest_size, size_on_disk); break; } } } Close(); } while (ChangeCompactOptions()); } TEST_F(DBTest, GetLiveBlobFiles) { VersionSet* const versions = dbfull()->TEST_GetVersionSet(); assert(versions); assert(versions->GetColumnFamilySet()); ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); assert(cfd); // Add a live blob file. VersionEdit edit; constexpr uint64_t blob_file_number = 234; constexpr uint64_t total_blob_count = 555; constexpr uint64_t total_blob_bytes = 66666; constexpr char checksum_method[] = "CRC32"; constexpr char checksum_value[] = "3d87ff57"; edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); dbfull()->TEST_LockMutex(); Status s = versions->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, dbfull()->mutex()); dbfull()->TEST_UnlockMutex(); ASSERT_OK(s); // Make sure it appears in the results returned by GetLiveFiles. uint64_t manifest_size = 0; std::vector files; ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size)); ASSERT_FALSE(files.empty()); ASSERT_EQ(files[0], BlobFileName("", blob_file_number)); } #endif TEST_F(DBTest, PurgeInfoLogs) { Options options = CurrentOptions(); options.keep_log_file_num = 5; options.create_if_missing = true; for (int mode = 0; mode <= 1; mode++) { if (mode == 1) { options.db_log_dir = dbname_ + "_logs"; env_->CreateDirIfMissing(options.db_log_dir); } else { options.db_log_dir = ""; } for (int i = 0; i < 8; i++) { Reopen(options); } std::vector files; env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir, &files); int info_log_count = 0; for (std::string file : files) { if (file.find("LOG") != std::string::npos) { info_log_count++; } } ASSERT_EQ(5, info_log_count); Destroy(options); // For mode (1), test DestroyDB() to delete all the logs under DB dir. // For mode (2), no info log file should have been put under DB dir. std::vector db_files; env_->GetChildren(dbname_, &db_files); for (std::string file : db_files) { ASSERT_TRUE(file.find("LOG") == std::string::npos); } if (mode == 1) { // Cleaning up env_->GetChildren(options.db_log_dir, &files); for (std::string file : files) { env_->DeleteFile(options.db_log_dir + "/" + file); } env_->DeleteDir(options.db_log_dir); } } } #ifndef ROCKSDB_LITE // Multi-threaded test: namespace { static const int kColumnFamilies = 10; static const int kNumThreads = 10; static const int kTestSeconds = 10; static const int kNumKeys = 1000; struct MTState { DBTest* test; std::atomic stop; std::atomic counter[kNumThreads]; std::atomic thread_done[kNumThreads]; }; struct MTThread { MTState* state; int id; bool multiget_batched; }; static void MTThreadBody(void* arg) { MTThread* t = reinterpret_cast(arg); int id = t->id; DB* db = t->state->test->db_; int counter = 0; fprintf(stderr, "... starting thread %d\n", id); Random rnd(1000 + id); char valbuf[1500]; while (t->state->stop.load(std::memory_order_acquire) == false) { t->state->counter[id].store(counter, std::memory_order_release); int key = rnd.Uniform(kNumKeys); char keybuf[20]; snprintf(keybuf, sizeof(keybuf), "%016d", key); if (rnd.OneIn(2)) { // Write values of the form . // into each of the CFs // We add some padding for force compactions. int unique_id = rnd.Uniform(1000000); // Half of the time directly use WriteBatch. Half of the time use // WriteBatchWithIndex. if (rnd.OneIn(2)) { WriteBatch batch; for (int cf = 0; cf < kColumnFamilies; ++cf) { snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id, static_cast(counter), cf, unique_id); batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf)); } ASSERT_OK(db->Write(WriteOptions(), &batch)); } else { WriteBatchWithIndex batch(db->GetOptions().comparator); for (int cf = 0; cf < kColumnFamilies; ++cf) { snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id, static_cast(counter), cf, unique_id); batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf)); } ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch())); } } else { // Read a value and verify that it matches the pattern written above // and that writes to all column families were atomic (unique_id is the // same) std::vector keys(kColumnFamilies, Slice(keybuf)); std::vector values; std::vector statuses; if (!t->multiget_batched) { statuses = db->MultiGet(ReadOptions(), t->state->test->handles_, keys, &values); } else { std::vector pin_values(keys.size()); statuses.resize(keys.size()); const Snapshot* snapshot = db->GetSnapshot(); ReadOptions ro; ro.snapshot = snapshot; for (int cf = 0; cf < kColumnFamilies; ++cf) { db->MultiGet(ro, t->state->test->handles_[cf], 1, &keys[cf], &pin_values[cf], &statuses[cf]); } db->ReleaseSnapshot(snapshot); values.resize(keys.size()); for (int cf = 0; cf < kColumnFamilies; ++cf) { if (statuses[cf].ok()) { values[cf].assign(pin_values[cf].data(), pin_values[cf].size()); } } } Status s = statuses[0]; // all statuses have to be the same for (size_t i = 1; i < statuses.size(); ++i) { // they are either both ok or both not-found ASSERT_TRUE((s.ok() && statuses[i].ok()) || (s.IsNotFound() && statuses[i].IsNotFound())); } if (s.IsNotFound()) { // Key has not yet been written } else { // Check that the writer thread counter is >= the counter in the value ASSERT_OK(s); int unique_id = -1; for (int i = 0; i < kColumnFamilies; ++i) { int k, w, c, cf, u; ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w, &c, &cf, &u)) << values[i]; ASSERT_EQ(k, key); ASSERT_GE(w, 0); ASSERT_LT(w, kNumThreads); ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire)); ASSERT_EQ(cf, i); if (i == 0) { unique_id = u; } else { // this checks that updates across column families happened // atomically -- all unique ids are the same ASSERT_EQ(u, unique_id); } } } } counter++; } t->state->thread_done[id].store(true, std::memory_order_release); fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter)); } } // namespace class MultiThreadedDBTest : public DBTest, public ::testing::WithParamInterface> { public: void SetUp() override { std::tie(option_config_, multiget_batched_) = GetParam(); } static std::vector GenerateOptionConfigs() { std::vector optionConfigs; for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) { optionConfigs.push_back(optionConfig); } return optionConfigs; } bool multiget_batched_; }; TEST_P(MultiThreadedDBTest, MultiThreaded) { if (option_config_ == kPipelinedWrite) return; anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; Options options = CurrentOptions(options_override); std::vector cfs; for (int i = 1; i < kColumnFamilies; ++i) { cfs.push_back(ToString(i)); } Reopen(options); CreateAndReopenWithCF(cfs, options); // Initialize state MTState mt; mt.test = this; mt.stop.store(false, std::memory_order_release); for (int id = 0; id < kNumThreads; id++) { mt.counter[id].store(0, std::memory_order_release); mt.thread_done[id].store(false, std::memory_order_release); } // Start threads MTThread thread[kNumThreads]; for (int id = 0; id < kNumThreads; id++) { thread[id].state = &mt; thread[id].id = id; thread[id].multiget_batched = multiget_batched_; env_->StartThread(MTThreadBody, &thread[id]); } // Let them run for a while env_->SleepForMicroseconds(kTestSeconds * 1000000); // Stop the threads and wait for them to finish mt.stop.store(true, std::memory_order_release); for (int id = 0; id < kNumThreads; id++) { while (mt.thread_done[id].load(std::memory_order_acquire) == false) { env_->SleepForMicroseconds(100000); } } } INSTANTIATE_TEST_CASE_P( MultiThreaded, MultiThreadedDBTest, ::testing::Combine( ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()), ::testing::Bool())); #endif // ROCKSDB_LITE // Group commit test: #if !defined(TRAVIS) && !defined(OS_WIN) // Disable this test temporarily on Travis and appveyor as it fails // intermittently. Github issue: #4151 namespace { static const int kGCNumThreads = 4; static const int kGCNumKeys = 1000; struct GCThread { DB* db; int id; std::atomic done; }; static void GCThreadBody(void* arg) { GCThread* t = reinterpret_cast(arg); int id = t->id; DB* db = t->db; WriteOptions wo; for (int i = 0; i < kGCNumKeys; ++i) { std::string kv(ToString(i + id * kGCNumKeys)); ASSERT_OK(db->Put(wo, kv, kv)); } t->done = true; } } // namespace TEST_F(DBTest, GroupCommitTest) { do { Options options = CurrentOptions(); options.env = env_; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); Reopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"WriteThread::JoinBatchGroup:BeganWaiting", "DBImpl::WriteImpl:BeforeLeaderEnters"}, {"WriteThread::AwaitState:BlockingWaiting", "WriteThread::EnterAsBatchGroupLeader:End"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Start threads GCThread thread[kGCNumThreads]; for (int id = 0; id < kGCNumThreads; id++) { thread[id].id = id; thread[id].db = db_; thread[id].done = false; env_->StartThread(GCThreadBody, &thread[id]); } env_->WaitForJoin(); ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0); std::vector expected_db; for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) { expected_db.push_back(ToString(i)); } std::sort(expected_db.begin(), expected_db.end()); Iterator* itr = db_->NewIterator(ReadOptions()); itr->SeekToFirst(); for (auto x : expected_db) { ASSERT_TRUE(itr->Valid()); ASSERT_EQ(itr->key().ToString(), x); ASSERT_EQ(itr->value().ToString(), x); itr->Next(); } ASSERT_TRUE(!itr->Valid()); delete itr; HistogramData hist_data; options.statistics->histogramData(DB_WRITE, &hist_data); ASSERT_GT(hist_data.average, 0.0); } while (ChangeOptions(kSkipNoSeekToLast)); } #endif // TRAVIS namespace { typedef std::map KVMap; } class ModelDB : public DB { public: class ModelSnapshot : public Snapshot { public: KVMap map_; SequenceNumber GetSequenceNumber() const override { // no need to call this assert(false); return 0; } }; explicit ModelDB(const Options& options) : options_(options) {} using DB::Put; Status Put(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k, const Slice& v) override { WriteBatch batch; batch.Put(cf, k, v); return Write(o, &batch); } using DB::Close; Status Close() override { return Status::OK(); } using DB::Delete; Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& key) override { WriteBatch batch; batch.Delete(cf, key); return Write(o, &batch); } using DB::SingleDelete; Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& key) override { WriteBatch batch; batch.SingleDelete(cf, key); return Write(o, &batch); } using DB::Merge; Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf, const Slice& k, const Slice& v) override { WriteBatch batch; batch.Merge(cf, k, v); return Write(o, &batch); } using DB::Get; Status Get(const ReadOptions& /*options*/, ColumnFamilyHandle* /*cf*/, const Slice& key, PinnableSlice* /*value*/) override { return Status::NotSupported(key); } using DB::GetMergeOperands; virtual Status GetMergeOperands( const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& key, PinnableSlice* /*slice*/, GetMergeOperandsOptions* /*merge_operands_options*/, int* /*number_of_operands*/) override { return Status::NotSupported(key); } using DB::MultiGet; std::vector MultiGet( const ReadOptions& /*options*/, const std::vector& /*column_family*/, const std::vector& keys, std::vector* /*values*/) override { std::vector s(keys.size(), Status::NotSupported("Not implemented.")); return s; } #ifndef ROCKSDB_LITE using DB::IngestExternalFile; Status IngestExternalFile( ColumnFamilyHandle* /*column_family*/, const std::vector& /*external_files*/, const IngestExternalFileOptions& /*options*/) override { return Status::NotSupported("Not implemented."); } using DB::IngestExternalFiles; Status IngestExternalFiles( const std::vector& /*args*/) override { return Status::NotSupported("Not implemented"); } using DB::CreateColumnFamilyWithImport; virtual Status CreateColumnFamilyWithImport( const ColumnFamilyOptions& /*options*/, const std::string& /*column_family_name*/, const ImportColumnFamilyOptions& /*import_options*/, const ExportImportFilesMetaData& /*metadata*/, ColumnFamilyHandle** /*handle*/) override { return Status::NotSupported("Not implemented."); } using DB::VerifyChecksum; Status VerifyChecksum(const ReadOptions&) override { return Status::NotSupported("Not implemented."); } using DB::GetPropertiesOfAllTables; Status GetPropertiesOfAllTables( ColumnFamilyHandle* /*column_family*/, TablePropertiesCollection* /*props*/) override { return Status(); } Status GetPropertiesOfTablesInRange( ColumnFamilyHandle* /*column_family*/, const Range* /*range*/, std::size_t /*n*/, TablePropertiesCollection* /*props*/) override { return Status(); } #endif // ROCKSDB_LITE using DB::KeyMayExist; bool KeyMayExist(const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, std::string* /*value*/, bool* value_found = nullptr) override { if (value_found != nullptr) { *value_found = false; } return true; // Not Supported directly } using DB::NewIterator; Iterator* NewIterator(const ReadOptions& options, ColumnFamilyHandle* /*column_family*/) override { if (options.snapshot == nullptr) { KVMap* saved = new KVMap; *saved = map_; return new ModelIter(saved, true); } else { const KVMap* snapshot_state = &(reinterpret_cast(options.snapshot)->map_); return new ModelIter(snapshot_state, false); } } Status NewIterators(const ReadOptions& /*options*/, const std::vector& /*column_family*/, std::vector* /*iterators*/) override { return Status::NotSupported("Not supported yet"); } const Snapshot* GetSnapshot() override { ModelSnapshot* snapshot = new ModelSnapshot; snapshot->map_ = map_; return snapshot; } void ReleaseSnapshot(const Snapshot* snapshot) override { delete reinterpret_cast(snapshot); } Status Write(const WriteOptions& /*options*/, WriteBatch* batch) override { class Handler : public WriteBatch::Handler { public: KVMap* map_; void Put(const Slice& key, const Slice& value) override { (*map_)[key.ToString()] = value.ToString(); } void Merge(const Slice& /*key*/, const Slice& /*value*/) override { // ignore merge for now // (*map_)[key.ToString()] = value.ToString(); } void Delete(const Slice& key) override { map_->erase(key.ToString()); } }; Handler handler; handler.map_ = &map_; return batch->Iterate(&handler); } using DB::GetProperty; bool GetProperty(ColumnFamilyHandle* /*column_family*/, const Slice& /*property*/, std::string* /*value*/) override { return false; } using DB::GetIntProperty; bool GetIntProperty(ColumnFamilyHandle* /*column_family*/, const Slice& /*property*/, uint64_t* /*value*/) override { return false; } using DB::GetMapProperty; bool GetMapProperty(ColumnFamilyHandle* /*column_family*/, const Slice& /*property*/, std::map* /*value*/) override { return false; } using DB::GetAggregatedIntProperty; bool GetAggregatedIntProperty(const Slice& /*property*/, uint64_t* /*value*/) override { return false; } using DB::GetApproximateSizes; Status GetApproximateSizes(const SizeApproximationOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Range* /*range*/, int n, uint64_t* sizes) override { for (int i = 0; i < n; i++) { sizes[i] = 0; } return Status::OK(); } using DB::GetApproximateMemTableStats; void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/, const Range& /*range*/, uint64_t* const count, uint64_t* const size) override { *count = 0; *size = 0; } using DB::CompactRange; Status CompactRange(const CompactRangeOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice* /*start*/, const Slice* /*end*/) override { return Status::NotSupported("Not supported operation."); } Status SetDBOptions( const std::unordered_map& /*new_options*/) override { return Status::NotSupported("Not supported operation."); } using DB::CompactFiles; Status CompactFiles( const CompactionOptions& /*compact_options*/, ColumnFamilyHandle* /*column_family*/, const std::vector& /*input_file_names*/, const int /*output_level*/, const int /*output_path_id*/ = -1, std::vector* const /*output_file_names*/ = nullptr, CompactionJobInfo* /*compaction_job_info*/ = nullptr) override { return Status::NotSupported("Not supported operation."); } Status PauseBackgroundWork() override { return Status::NotSupported("Not supported operation."); } Status ContinueBackgroundWork() override { return Status::NotSupported("Not supported operation."); } Status EnableAutoCompaction( const std::vector& /*column_family_handles*/) override { return Status::NotSupported("Not supported operation."); } void EnableManualCompaction() override { return; } void DisableManualCompaction() override { return; } using DB::NumberLevels; int NumberLevels(ColumnFamilyHandle* /*column_family*/) override { return 1; } using DB::MaxMemCompactionLevel; int MaxMemCompactionLevel(ColumnFamilyHandle* /*column_family*/) override { return 1; } using DB::Level0StopWriteTrigger; int Level0StopWriteTrigger(ColumnFamilyHandle* /*column_family*/) override { return -1; } const std::string& GetName() const override { return name_; } Env* GetEnv() const override { return nullptr; } using DB::GetOptions; Options GetOptions(ColumnFamilyHandle* /*column_family*/) const override { return options_; } using DB::GetDBOptions; DBOptions GetDBOptions() const override { return options_; } using DB::Flush; Status Flush(const ROCKSDB_NAMESPACE::FlushOptions& /*options*/, ColumnFamilyHandle* /*column_family*/) override { Status ret; return ret; } Status Flush( const ROCKSDB_NAMESPACE::FlushOptions& /*options*/, const std::vector& /*column_families*/) override { return Status::OK(); } Status SyncWAL() override { return Status::OK(); } Status DisableFileDeletions() override { return Status::OK(); } Status EnableFileDeletions(bool /*force*/) override { return Status::OK(); } #ifndef ROCKSDB_LITE Status GetLiveFiles(std::vector&, uint64_t* /*size*/, bool /*flush_memtable*/ = true) override { return Status::OK(); } Status GetSortedWalFiles(VectorLogPtr& /*files*/) override { return Status::OK(); } Status GetCurrentWalFile( std::unique_ptr* /*current_log_file*/) override { return Status::OK(); } virtual Status GetCreationTimeOfOldestFile( uint64_t* /*creation_time*/) override { return Status::NotSupported(); } Status DeleteFile(std::string /*name*/) override { return Status::OK(); } Status GetUpdatesSince( ROCKSDB_NAMESPACE::SequenceNumber, std::unique_ptr*, const TransactionLogIterator::ReadOptions& /*read_options*/ = TransactionLogIterator::ReadOptions()) override { return Status::NotSupported("Not supported in Model DB"); } void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, ColumnFamilyMetaData* /*metadata*/) override {} #endif // ROCKSDB_LITE Status GetDbIdentity(std::string& /*identity*/) const override { return Status::OK(); } SequenceNumber GetLatestSequenceNumber() const override { return 0; } bool SetPreserveDeletesSequenceNumber(SequenceNumber /*seqnum*/) override { return true; } ColumnFamilyHandle* DefaultColumnFamily() const override { return nullptr; } private: class ModelIter : public Iterator { public: ModelIter(const KVMap* map, bool owned) : map_(map), owned_(owned), iter_(map_->end()) {} ~ModelIter() override { if (owned_) delete map_; } bool Valid() const override { return iter_ != map_->end(); } void SeekToFirst() override { iter_ = map_->begin(); } void SeekToLast() override { if (map_->empty()) { iter_ = map_->end(); } else { iter_ = map_->find(map_->rbegin()->first); } } void Seek(const Slice& k) override { iter_ = map_->lower_bound(k.ToString()); } void SeekForPrev(const Slice& k) override { iter_ = map_->upper_bound(k.ToString()); Prev(); } void Next() override { ++iter_; } void Prev() override { if (iter_ == map_->begin()) { iter_ = map_->end(); return; } --iter_; } Slice key() const override { return iter_->first; } Slice value() const override { return iter_->second; } Status status() const override { return Status::OK(); } private: const KVMap* const map_; const bool owned_; // Do we own map_ KVMap::const_iterator iter_; }; const Options options_; KVMap map_; std::string name_ = ""; }; #ifndef ROCKSDB_VALGRIND_RUN static std::string RandomKey(Random* rnd, int minimum = 0) { int len; do { len = (rnd->OneIn(3) ? 1 // Short sometimes to encourage collisions : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); } while (len < minimum); return test::RandomKey(rnd, len); } static bool CompareIterators(int step, DB* model, DB* db, const Snapshot* model_snap, const Snapshot* db_snap) { ReadOptions options; options.snapshot = model_snap; Iterator* miter = model->NewIterator(options); options.snapshot = db_snap; Iterator* dbiter = db->NewIterator(options); bool ok = true; int count = 0; for (miter->SeekToFirst(), dbiter->SeekToFirst(); ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) { count++; if (miter->key().compare(dbiter->key()) != 0) { fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", step, EscapeString(miter->key()).c_str(), EscapeString(dbiter->key()).c_str()); ok = false; break; } if (miter->value().compare(dbiter->value()) != 0) { fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", step, EscapeString(miter->key()).c_str(), EscapeString(miter->value()).c_str(), EscapeString(miter->value()).c_str()); ok = false; } } if (ok) { if (miter->Valid() != dbiter->Valid()) { fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n", step, miter->Valid(), dbiter->Valid()); ok = false; } } delete miter; delete dbiter; return ok; } class DBTestRandomized : public DBTest, public ::testing::WithParamInterface { public: void SetUp() override { option_config_ = GetParam(); } static std::vector GenerateOptionConfigs() { std::vector option_configs; // skip cuckoo hash as it does not support snapshot. for (int option_config = kDefault; option_config < kEnd; ++option_config) { if (!ShouldSkipOptions(option_config, kSkipDeletesFilterFirst | kSkipNoSeekToLast)) { option_configs.push_back(option_config); } } option_configs.push_back(kBlockBasedTableWithIndexRestartInterval); return option_configs; } }; INSTANTIATE_TEST_CASE_P( DBTestRandomized, DBTestRandomized, ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs())); TEST_P(DBTestRandomized, Randomized) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; Options options = CurrentOptions(options_override); DestroyAndReopen(options); Random rnd(test::RandomSeed() + GetParam()); ModelDB model(options); const int N = 10000; const Snapshot* model_snap = nullptr; const Snapshot* db_snap = nullptr; std::string k, v; for (int step = 0; step < N; step++) { // TODO(sanjay): Test Get() works int p = rnd.Uniform(100); int minimum = 0; if (option_config_ == kHashSkipList || option_config_ == kHashLinkList || option_config_ == kPlainTableFirstBytePrefix || option_config_ == kBlockBasedTableWithWholeKeyHashIndex || option_config_ == kBlockBasedTableWithPrefixHashIndex) { minimum = 1; } if (p < 45) { // Put k = RandomKey(&rnd, minimum); v = RandomString(&rnd, rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8)); ASSERT_OK(model.Put(WriteOptions(), k, v)); ASSERT_OK(db_->Put(WriteOptions(), k, v)); } else if (p < 90) { // Delete k = RandomKey(&rnd, minimum); ASSERT_OK(model.Delete(WriteOptions(), k)); ASSERT_OK(db_->Delete(WriteOptions(), k)); } else { // Multi-element batch WriteBatch b; const int num = rnd.Uniform(8); for (int i = 0; i < num; i++) { if (i == 0 || !rnd.OneIn(10)) { k = RandomKey(&rnd, minimum); } else { // Periodically re-use the same key from the previous iter, so // we have multiple entries in the write batch for the same key } if (rnd.OneIn(2)) { v = RandomString(&rnd, rnd.Uniform(10)); b.Put(k, v); } else { b.Delete(k); } } ASSERT_OK(model.Write(WriteOptions(), &b)); ASSERT_OK(db_->Write(WriteOptions(), &b)); } if ((step % 100) == 0) { // For DB instances that use the hash index + block-based table, the // iterator will be invalid right when seeking a non-existent key, right // than return a key that is close to it. if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex && option_config_ != kBlockBasedTableWithPrefixHashIndex) { ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); } // Save a snapshot from each DB this time that we'll use next // time we compare things, to make sure the current state is // preserved with the snapshot if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); Reopen(options); ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); model_snap = model.GetSnapshot(); db_snap = db_->GetSnapshot(); } } if (model_snap != nullptr) model.ReleaseSnapshot(model_snap); if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap); } #endif // ROCKSDB_VALGRIND_RUN TEST_F(DBTest, BlockBasedTablePrefixIndexTest) { // create a DB with block prefix index BlockBasedTableOptions table_options; Options options = CurrentOptions(); table_options.index_type = BlockBasedTableOptions::kHashSearch; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); Reopen(options); ASSERT_OK(Put("k1", "v1")); Flush(); ASSERT_OK(Put("k2", "v2")); // Reopen it without prefix extractor, make sure everything still works. // RocksDB should just fall back to the binary index. table_options.index_type = BlockBasedTableOptions::kBinarySearch; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(); Reopen(options); ASSERT_EQ("v1", Get("k1")); ASSERT_EQ("v2", Get("k2")); } TEST_F(DBTest, BlockBasedTablePrefixIndexTotalOrderSeek) { // create a DB with block prefix index BlockBasedTableOptions table_options; Options options = CurrentOptions(); options.max_open_files = 10; table_options.index_type = BlockBasedTableOptions::kHashSearch; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); // RocksDB sanitize max open files to at least 20. Modify it back. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { int* max_open_files = static_cast(arg); *max_open_files = 11; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); ASSERT_OK(Put("k1", "v1")); Flush(); CompactRangeOptions cro; cro.change_level = true; cro.target_level = 1; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); // Force evict tables dbfull()->TEST_table_cache()->SetCapacity(0); // Make table cache to keep one entry. dbfull()->TEST_table_cache()->SetCapacity(1); ReadOptions read_options; read_options.total_order_seek = true; { std::unique_ptr iter(db_->NewIterator(read_options)); iter->Seek("k1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("k1", iter->key().ToString()); } // After total order seek, prefix index should still be used. read_options.total_order_seek = false; { std::unique_ptr iter(db_->NewIterator(read_options)); iter->Seek("k1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("k1", iter->key().ToString()); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest, ChecksumTest) { BlockBasedTableOptions table_options; Options options = CurrentOptions(); table_options.checksum = kCRC32c; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); ASSERT_OK(Put("a", "b")); ASSERT_OK(Put("c", "d")); ASSERT_OK(Flush()); // table with crc checksum table_options.checksum = kxxHash; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); ASSERT_OK(Put("e", "f")); ASSERT_OK(Put("g", "h")); ASSERT_OK(Flush()); // table with xxhash checksum table_options.checksum = kCRC32c; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); ASSERT_EQ("b", Get("a")); ASSERT_EQ("d", Get("c")); ASSERT_EQ("f", Get("e")); ASSERT_EQ("h", Get("g")); table_options.checksum = kCRC32c; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); ASSERT_EQ("b", Get("a")); ASSERT_EQ("d", Get("c")); ASSERT_EQ("f", Get("e")); ASSERT_EQ("h", Get("g")); } #ifndef ROCKSDB_LITE TEST_P(DBTestWithParam, FIFOCompactionTest) { for (int iter = 0; iter < 2; ++iter) { // first iteration -- auto compaction // second iteration -- manual compaction Options options; options.compaction_style = kCompactionStyleFIFO; options.write_buffer_size = 100 << 10; // 100KB options.arena_block_size = 4096; options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB options.compression = kNoCompression; options.create_if_missing = true; options.max_subcompactions = max_subcompactions_; if (iter == 1) { options.disable_auto_compactions = true; } options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 6; ++i) { for (int j = 0; j < 110; ++j) { ASSERT_OK(Put(ToString(i * 100 + j), RandomString(&rnd, 980))); } // flush should happen here ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } if (iter == 0) { ASSERT_OK(dbfull()->TEST_WaitForCompact()); } else { CompactRangeOptions cro; cro.exclusive_manual_compaction = exclusive_manual_compaction_; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); } // only 5 files should survive ASSERT_EQ(NumTableFilesAtLevel(0), 5); for (int i = 0; i < 50; ++i) { // these keys should be deleted in previous compaction ASSERT_EQ("NOT_FOUND", Get(ToString(i))); } } } TEST_F(DBTest, FIFOCompactionTestWithCompaction) { Options options; options.compaction_style = kCompactionStyleFIFO; options.write_buffer_size = 20 << 10; // 20K options.arena_block_size = 4096; options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB options.compaction_options_fifo.allow_compaction = true; options.level0_file_num_compaction_trigger = 6; options.compression = kNoCompression; options.create_if_missing = true; options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 60; i++) { // Generate and flush a file about 20KB. for (int j = 0; j < 20; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } // It should be compacted to 10 files. ASSERT_EQ(NumTableFilesAtLevel(0), 10); for (int i = 0; i < 60; i++) { // Generate and flush a file about 20KB. for (int j = 0; j < 20; j++) { ASSERT_OK(Put(ToString(i * 20 + j + 2000), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } // It should be compacted to no more than 20 files. ASSERT_GT(NumTableFilesAtLevel(0), 10); ASSERT_LT(NumTableFilesAtLevel(0), 18); // Size limit is still guaranteed. ASSERT_LE(SizeAtLevel(0), options.compaction_options_fifo.max_table_files_size); } TEST_F(DBTest, FIFOCompactionStyleWithCompactionAndDelete) { Options options; options.compaction_style = kCompactionStyleFIFO; options.write_buffer_size = 20 << 10; // 20K options.arena_block_size = 4096; options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1MB options.compaction_options_fifo.allow_compaction = true; options.level0_file_num_compaction_trigger = 3; options.compression = kNoCompression; options.create_if_missing = true; options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 3; i++) { // Each file contains a different key which will be dropped later. ASSERT_OK(Put("a" + ToString(i), RandomString(&rnd, 500))); ASSERT_OK(Put("key" + ToString(i), "")); ASSERT_OK(Put("z" + ToString(i), RandomString(&rnd, 500))); Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } ASSERT_EQ(NumTableFilesAtLevel(0), 1); for (int i = 0; i < 3; i++) { ASSERT_EQ("", Get("key" + ToString(i))); } for (int i = 0; i < 3; i++) { // Each file contains a different key which will be dropped later. ASSERT_OK(Put("a" + ToString(i), RandomString(&rnd, 500))); ASSERT_OK(Delete("key" + ToString(i))); ASSERT_OK(Put("z" + ToString(i), RandomString(&rnd, 500))); Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } ASSERT_EQ(NumTableFilesAtLevel(0), 2); for (int i = 0; i < 3; i++) { ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i))); } } // Check that FIFO-with-TTL is not supported with max_open_files != -1. TEST_F(DBTest, FIFOCompactionWithTTLAndMaxOpenFilesTest) { Options options; options.compaction_style = kCompactionStyleFIFO; options.create_if_missing = true; options.ttl = 600; // seconds // TTL is now supported with max_open_files != -1. options.max_open_files = 100; options = CurrentOptions(options); ASSERT_OK(TryReopen(options)); options.max_open_files = -1; ASSERT_OK(TryReopen(options)); } // Check that FIFO-with-TTL is supported only with BlockBasedTableFactory. TEST_F(DBTest, FIFOCompactionWithTTLAndVariousTableFormatsTest) { Options options; options.compaction_style = kCompactionStyleFIFO; options.create_if_missing = true; options.ttl = 600; // seconds options = CurrentOptions(options); options.table_factory.reset(NewBlockBasedTableFactory()); ASSERT_OK(TryReopen(options)); Destroy(options); options.table_factory.reset(NewPlainTableFactory()); ASSERT_TRUE(TryReopen(options).IsNotSupported()); Destroy(options); options.table_factory.reset(NewAdaptiveTableFactory()); ASSERT_TRUE(TryReopen(options).IsNotSupported()); } TEST_F(DBTest, FIFOCompactionWithTTLTest) { Options options; options.compaction_style = kCompactionStyleFIFO; options.write_buffer_size = 10 << 10; // 10KB options.arena_block_size = 4096; options.compression = kNoCompression; options.create_if_missing = true; env_->time_elapse_only_sleep_ = false; options.env = env_; // Test to make sure that all files with expired ttl are deleted on next // manual compaction. { env_->addon_time_.store(0); options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB options.compaction_options_fifo.allow_compaction = false; options.ttl = 1 * 60 * 60 ; // 1 hour options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 10; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } ASSERT_EQ(NumTableFilesAtLevel(0), 10); // Sleep for 2 hours -- which is much greater than TTL. // Note: Couldn't use SleepForMicroseconds because it takes an int instead // of uint64_t. Hence used addon_time_ directly. // env_->SleepForMicroseconds(2 * 60 * 60 * 1000 * 1000); env_->addon_time_.fetch_add(2 * 60 * 60); // Since no flushes and compactions have run, the db should still be in // the same state even after considerable time has passed. ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 10); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); } // Test to make sure that all files with expired ttl are deleted on next // automatic compaction. { options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB options.compaction_options_fifo.allow_compaction = false; options.ttl = 1 * 60 * 60; // 1 hour options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 10; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } ASSERT_EQ(NumTableFilesAtLevel(0), 10); // Sleep for 2 hours -- which is much greater than TTL. env_->addon_time_.fetch_add(2 * 60 * 60); // Just to make sure that we are in the same state even after sleeping. ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 10); // Create 1 more file to trigger TTL compaction. The old files are dropped. for (int i = 0; i < 1; i++) { for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Only the new 10 files remain. ASSERT_EQ(NumTableFilesAtLevel(0), 1); ASSERT_LE(SizeAtLevel(0), options.compaction_options_fifo.max_table_files_size); } // Test that shows the fall back to size-based FIFO compaction if TTL-based // deletion doesn't move the total size to be less than max_table_files_size. { options.write_buffer_size = 10 << 10; // 10KB options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB options.compaction_options_fifo.allow_compaction = false; options.ttl = 1 * 60 * 60; // 1 hour options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 3; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } ASSERT_EQ(NumTableFilesAtLevel(0), 3); // Sleep for 2 hours -- which is much greater than TTL. env_->addon_time_.fetch_add(2 * 60 * 60); // Just to make sure that we are in the same state even after sleeping. ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 3); for (int i = 0; i < 5; i++) { for (int j = 0; j < 140; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } // Size limit is still guaranteed. ASSERT_LE(SizeAtLevel(0), options.compaction_options_fifo.max_table_files_size); } // Test with TTL + Intra-L0 compactions. { options.compaction_options_fifo.max_table_files_size = 150 << 10; // 150KB options.compaction_options_fifo.allow_compaction = true; options.ttl = 1 * 60 * 60; // 1 hour options.level0_file_num_compaction_trigger = 6; options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 10; i++) { // Generate and flush a file about 10KB. for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } // With Intra-L0 compaction, out of 10 files, 6 files will be compacted to 1 // (due to level0_file_num_compaction_trigger = 6). // So total files = 1 + remaining 4 = 5. ASSERT_EQ(NumTableFilesAtLevel(0), 5); // Sleep for 2 hours -- which is much greater than TTL. env_->addon_time_.fetch_add(2 * 60 * 60); // Just to make sure that we are in the same state even after sleeping. ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 5); // Create 10 more files. The old 5 files are dropped as their ttl expired. for (int i = 0; i < 10; i++) { for (int j = 0; j < 10; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } ASSERT_EQ(NumTableFilesAtLevel(0), 5); ASSERT_LE(SizeAtLevel(0), options.compaction_options_fifo.max_table_files_size); } // Test with large TTL + Intra-L0 compactions. // Files dropped based on size, as ttl doesn't kick in. { options.write_buffer_size = 20 << 10; // 20K options.compaction_options_fifo.max_table_files_size = 1500 << 10; // 1.5MB options.compaction_options_fifo.allow_compaction = true; options.ttl = 1 * 60 * 60; // 1 hour options.level0_file_num_compaction_trigger = 6; options = CurrentOptions(options); DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < 60; i++) { // Generate and flush a file about 20KB. for (int j = 0; j < 20; j++) { ASSERT_OK(Put(ToString(i * 20 + j), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } // It should be compacted to 10 files. ASSERT_EQ(NumTableFilesAtLevel(0), 10); for (int i = 0; i < 60; i++) { // Generate and flush a file about 20KB. for (int j = 0; j < 20; j++) { ASSERT_OK(Put(ToString(i * 20 + j + 2000), RandomString(&rnd, 980))); } Flush(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); } // It should be compacted to no more than 20 files. ASSERT_GT(NumTableFilesAtLevel(0), 10); ASSERT_LT(NumTableFilesAtLevel(0), 18); // Size limit is still guaranteed. ASSERT_LE(SizeAtLevel(0), options.compaction_options_fifo.max_table_files_size); } } #endif // ROCKSDB_LITE #ifndef ROCKSDB_LITE /* * This test is not reliable enough as it heavily depends on disk behavior. * Disable as it is flaky. */ TEST_F(DBTest, DISABLED_RateLimitingTest) { Options options = CurrentOptions(); options.write_buffer_size = 1 << 20; // 1MB options.level0_file_num_compaction_trigger = 2; options.target_file_size_base = 1 << 20; // 1MB options.max_bytes_for_level_base = 4 << 20; // 4MB options.max_bytes_for_level_multiplier = 4; options.compression = kNoCompression; options.create_if_missing = true; options.env = env_; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.IncreaseParallelism(4); DestroyAndReopen(options); WriteOptions wo; wo.disableWAL = true; // # no rate limiting Random rnd(301); uint64_t start = env_->NowMicros(); // Write ~96M data for (int64_t i = 0; i < (96 << 10); ++i) { ASSERT_OK( Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo)); } uint64_t elapsed = env_->NowMicros() - start; double raw_rate = env_->bytes_written_ * 1000000.0 / elapsed; uint64_t rate_limiter_drains = TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS); ASSERT_EQ(0, rate_limiter_drains); Close(); // # rate limiting with 0.7 x threshold options.rate_limiter.reset( NewGenericRateLimiter(static_cast(0.7 * raw_rate))); env_->bytes_written_ = 0; DestroyAndReopen(options); start = env_->NowMicros(); // Write ~96M data for (int64_t i = 0; i < (96 << 10); ++i) { ASSERT_OK( Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo)); } rate_limiter_drains = TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) - rate_limiter_drains; elapsed = env_->NowMicros() - start; Close(); ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_); // Most intervals should've been drained (interval time is 100ms, elapsed is // micros) ASSERT_GT(rate_limiter_drains, 0); ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1); double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate; fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio); ASSERT_TRUE(ratio < 0.8); // # rate limiting with half of the raw_rate options.rate_limiter.reset( NewGenericRateLimiter(static_cast(raw_rate / 2))); env_->bytes_written_ = 0; DestroyAndReopen(options); start = env_->NowMicros(); // Write ~96M data for (int64_t i = 0; i < (96 << 10); ++i) { ASSERT_OK( Put(RandomString(&rnd, 32), RandomString(&rnd, (1 << 10) + 1), wo)); } elapsed = env_->NowMicros() - start; rate_limiter_drains = TestGetTickerCount(options, NUMBER_RATE_LIMITER_DRAINS) - rate_limiter_drains; Close(); ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_); // Most intervals should've been drained (interval time is 100ms, elapsed is // micros) ASSERT_GT(rate_limiter_drains, elapsed / 100000 / 2); ASSERT_LE(rate_limiter_drains, elapsed / 100000 + 1); ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate; fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio); ASSERT_LT(ratio, 0.6); } TEST_F(DBTest, TableOptionsSanitizeTest) { Options options = CurrentOptions(); options.create_if_missing = true; DestroyAndReopen(options); ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false); options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewNoopTransform()); Destroy(options); ASSERT_TRUE(!TryReopen(options).IsNotSupported()); // Test for check of prefix_extractor when hash index is used for // block-based table BlockBasedTableOptions to; to.index_type = BlockBasedTableOptions::kHashSearch; options = CurrentOptions(); options.create_if_missing = true; options.table_factory.reset(NewBlockBasedTableFactory(to)); ASSERT_TRUE(TryReopen(options).IsInvalidArgument()); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); ASSERT_OK(TryReopen(options)); } TEST_F(DBTest, ConcurrentMemtableNotSupported) { Options options = CurrentOptions(); options.allow_concurrent_memtable_write = true; options.soft_pending_compaction_bytes_limit = 0; options.hard_pending_compaction_bytes_limit = 100; options.create_if_missing = true; DestroyDB(dbname_, options); options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4)); ASSERT_NOK(TryReopen(options)); options.memtable_factory.reset(new SkipListFactory); ASSERT_OK(TryReopen(options)); ColumnFamilyOptions cf_options(options); cf_options.memtable_factory.reset( NewHashLinkListRepFactory(4, 0, 3, true, 4)); ColumnFamilyHandle* handle; ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle)); } #endif // ROCKSDB_LITE TEST_F(DBTest, SanitizeNumThreads) { for (int attempt = 0; attempt < 2; attempt++) { const size_t kTotalTasks = 8; test::SleepingBackgroundTask sleeping_tasks[kTotalTasks]; Options options = CurrentOptions(); if (attempt == 0) { options.max_background_compactions = 3; options.max_background_flushes = 2; } options.create_if_missing = true; DestroyAndReopen(options); for (size_t i = 0; i < kTotalTasks; i++) { // Insert 5 tasks to low priority queue and 5 tasks to high priority queue env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_tasks[i], (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH); } // Wait until 10s for they are scheduled. for (int i = 0; i < 10000; i++) { if (options.env->GetThreadPoolQueueLen(Env::Priority::LOW) <= 1 && options.env->GetThreadPoolQueueLen(Env::Priority::HIGH) <= 2) { break; } env_->SleepForMicroseconds(1000); } // pool size 3, total task 4. Queue size should be 1. ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW)); // pool size 2, total task 4. Queue size should be 2. ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH)); for (size_t i = 0; i < kTotalTasks; i++) { sleeping_tasks[i].WakeUp(); sleeping_tasks[i].WaitUntilDone(); } ASSERT_OK(Put("abc", "def")); ASSERT_EQ("def", Get("abc")); Flush(); ASSERT_EQ("def", Get("abc")); } } TEST_F(DBTest, WriteSingleThreadEntry) { std::vector threads; dbfull()->TEST_LockMutex(); auto w = dbfull()->TEST_BeginWrite(); threads.emplace_back([&] { Put("a", "b"); }); env_->SleepForMicroseconds(10000); threads.emplace_back([&] { Flush(); }); env_->SleepForMicroseconds(10000); dbfull()->TEST_UnlockMutex(); dbfull()->TEST_LockMutex(); dbfull()->TEST_EndWrite(w); dbfull()->TEST_UnlockMutex(); for (auto& t : threads) { t.join(); } } TEST_F(DBTest, ConcurrentFlushWAL) { const size_t cnt = 100; Options options; WriteOptions wopt; ReadOptions ropt; for (bool two_write_queues : {false, true}) { for (bool manual_wal_flush : {false, true}) { options.two_write_queues = two_write_queues; options.manual_wal_flush = manual_wal_flush; options.create_if_missing = true; DestroyAndReopen(options); std::vector threads; threads.emplace_back([&] { for (size_t i = 0; i < cnt; i++) { auto istr = ToString(i); db_->Put(wopt, db_->DefaultColumnFamily(), "a" + istr, "b" + istr); } }); if (two_write_queues) { threads.emplace_back([&] { for (size_t i = cnt; i < 2 * cnt; i++) { auto istr = ToString(i); WriteBatch batch; batch.Put("a" + istr, "b" + istr); dbfull()->WriteImpl(wopt, &batch, nullptr, nullptr, 0, true); } }); } threads.emplace_back([&] { for (size_t i = 0; i < cnt * 100; i++) { // FlushWAL is faster than Put db_->FlushWAL(false); } }); for (auto& t : threads) { t.join(); } options.create_if_missing = false; // Recover from the wal and make sure that it is not corrupted Reopen(options); for (size_t i = 0; i < cnt; i++) { PinnableSlice pval; auto istr = ToString(i); ASSERT_OK( db_->Get(ropt, db_->DefaultColumnFamily(), "a" + istr, &pval)); ASSERT_TRUE(pval == ("b" + istr)); } } } } #ifndef ROCKSDB_LITE TEST_F(DBTest, DynamicMemtableOptions) { const uint64_t k64KB = 1 << 16; const uint64_t k128KB = 1 << 17; const uint64_t k5KB = 5 * 1024; Options options; options.env = env_; options.create_if_missing = true; options.compression = kNoCompression; options.max_background_compactions = 1; options.write_buffer_size = k64KB; options.arena_block_size = 16 * 1024; options.max_write_buffer_number = 2; // Don't trigger compact/slowdown/stop options.level0_file_num_compaction_trigger = 1024; options.level0_slowdown_writes_trigger = 1024; options.level0_stop_writes_trigger = 1024; DestroyAndReopen(options); auto gen_l0_kb = [this](int size) { const int kNumPutsBeforeWaitForFlush = 64; Random rnd(301); for (int i = 0; i < size; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024))); // The following condition prevents a race condition between flush jobs // acquiring work and this thread filling up multiple memtables. Without // this, the flush might produce less files than expected because // multiple memtables are flushed into a single L0 file. This race // condition affects assertion (A). if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) { dbfull()->TEST_WaitForFlushMemTable(); } } dbfull()->TEST_WaitForFlushMemTable(); }; // Test write_buffer_size gen_l0_kb(64); ASSERT_EQ(NumTableFilesAtLevel(0), 1); ASSERT_LT(SizeAtLevel(0), k64KB + k5KB); ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2); // Clean up L0 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); // Increase buffer size ASSERT_OK(dbfull()->SetOptions({ {"write_buffer_size", "131072"}, })); // The existing memtable inflated 64KB->128KB when we invoked SetOptions(). // Write 192KB, we should have a 128KB L0 file and a memtable with 64KB data. gen_l0_kb(192); ASSERT_EQ(NumTableFilesAtLevel(0), 1); // (A) ASSERT_LT(SizeAtLevel(0), k128KB + 2 * k5KB); ASSERT_GT(SizeAtLevel(0), k128KB - 4 * k5KB); // Decrease buffer size below current usage ASSERT_OK(dbfull()->SetOptions({ {"write_buffer_size", "65536"}, })); // The existing memtable became eligible for flush when we reduced its // capacity to 64KB. Two keys need to be added to trigger flush: first causes // memtable to be marked full, second schedules the flush. Then we should have // a 128KB L0 file, a 64KB L0 file, and a memtable with just one key. gen_l0_kb(2); ASSERT_EQ(NumTableFilesAtLevel(0), 2); ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB); ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB); // Test max_write_buffer_number // Block compaction thread, which will also block the flushes because // max_background_flushes == 0, so flushes are getting executed by the // compaction thread env_->SetBackgroundThreads(1, Env::LOW); test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); // Start from scratch and disable compaction/flush. Flush can only happen // during compaction but trigger is pretty high options.disable_auto_compactions = true; DestroyAndReopen(options); env_->SetBackgroundThreads(0, Env::HIGH); // Put until writes are stopped, bounded by 256 puts. We should see stop at // ~128KB int count = 0; Random rnd(301); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) { sleeping_task_low.WakeUp(); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); while (!sleeping_task_low.WokenUp() && count < 256) { ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions())); count++; } ASSERT_GT(static_cast(count), 128 * 0.8); ASSERT_LT(static_cast(count), 128 * 1.2); sleeping_task_low.WaitUntilDone(); // Increase ASSERT_OK(dbfull()->SetOptions({ {"max_write_buffer_number", "8"}, })); // Clean up memtable and L0 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); sleeping_task_low.Reset(); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); count = 0; while (!sleeping_task_low.WokenUp() && count < 1024) { ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions())); count++; } // Windows fails this test. Will tune in the future and figure out // approp number #ifndef OS_WIN ASSERT_GT(static_cast(count), 512 * 0.8); ASSERT_LT(static_cast(count), 512 * 1.2); #endif sleeping_task_low.WaitUntilDone(); // Decrease ASSERT_OK(dbfull()->SetOptions({ {"max_write_buffer_number", "4"}, })); // Clean up memtable and L0 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); sleeping_task_low.Reset(); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); count = 0; while (!sleeping_task_low.WokenUp() && count < 1024) { ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions())); count++; } // Windows fails this test. Will tune in the future and figure out // approp number #ifndef OS_WIN ASSERT_GT(static_cast(count), 256 * 0.8); ASSERT_LT(static_cast(count), 266 * 1.2); #endif sleeping_task_low.WaitUntilDone(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } #endif // ROCKSDB_LITE #ifdef ROCKSDB_USING_THREAD_STATUS namespace { void VerifyOperationCount(Env* env, ThreadStatus::OperationType op_type, int expected_count) { int op_count = 0; std::vector thread_list; ASSERT_OK(env->GetThreadList(&thread_list)); for (auto thread : thread_list) { if (thread.operation_type == op_type) { op_count++; } } ASSERT_EQ(op_count, expected_count); } } // namespace TEST_F(DBTest, GetThreadStatus) { Options options; options.env = env_; options.enable_thread_tracking = true; TryReopen(options); std::vector thread_list; Status s = env_->GetThreadList(&thread_list); for (int i = 0; i < 2; ++i) { // repeat the test with differet number of high / low priority threads const int kTestCount = 3; const unsigned int kHighPriCounts[kTestCount] = {3, 2, 5}; const unsigned int kLowPriCounts[kTestCount] = {10, 15, 3}; const unsigned int kBottomPriCounts[kTestCount] = {2, 1, 4}; for (int test = 0; test < kTestCount; ++test) { // Change the number of threads in high / low priority pool. env_->SetBackgroundThreads(kHighPriCounts[test], Env::HIGH); env_->SetBackgroundThreads(kLowPriCounts[test], Env::LOW); env_->SetBackgroundThreads(kBottomPriCounts[test], Env::BOTTOM); // Wait to ensure the all threads has been registered unsigned int thread_type_counts[ThreadStatus::NUM_THREAD_TYPES]; // TODO(ajkr): it'd be better if SetBackgroundThreads returned only after // all threads have been registered. // Try up to 60 seconds. for (int num_try = 0; num_try < 60000; num_try++) { env_->SleepForMicroseconds(1000); thread_list.clear(); s = env_->GetThreadList(&thread_list); ASSERT_OK(s); memset(thread_type_counts, 0, sizeof(thread_type_counts)); for (auto thread : thread_list) { ASSERT_LT(thread.thread_type, ThreadStatus::NUM_THREAD_TYPES); thread_type_counts[thread.thread_type]++; } if (thread_type_counts[ThreadStatus::HIGH_PRIORITY] == kHighPriCounts[test] && thread_type_counts[ThreadStatus::LOW_PRIORITY] == kLowPriCounts[test] && thread_type_counts[ThreadStatus::BOTTOM_PRIORITY] == kBottomPriCounts[test]) { break; } } // Verify the number of high-priority threads ASSERT_EQ(thread_type_counts[ThreadStatus::HIGH_PRIORITY], kHighPriCounts[test]); // Verify the number of low-priority threads ASSERT_EQ(thread_type_counts[ThreadStatus::LOW_PRIORITY], kLowPriCounts[test]); // Verify the number of bottom-priority threads ASSERT_EQ(thread_type_counts[ThreadStatus::BOTTOM_PRIORITY], kBottomPriCounts[test]); } if (i == 0) { // repeat the test with multiple column families CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options); env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, true); } } db_->DropColumnFamily(handles_[2]); delete handles_[2]; handles_.erase(handles_.begin() + 2); env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, true); Close(); env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, true); } TEST_F(DBTest, DisableThreadStatus) { Options options; options.env = env_; options.enable_thread_tracking = false; TryReopen(options); CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options); // Verify non of the column family info exists env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, false); } TEST_F(DBTest, ThreadStatusFlush) { Options options; options.env = env_; options.write_buffer_size = 100000; // Small write buffer options.enable_thread_tracking = true; options = CurrentOptions(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"FlushJob::FlushJob()", "DBTest::ThreadStatusFlush:1"}, {"DBTest::ThreadStatusFlush:2", "FlushJob::WriteLevel0Table"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu"}, options); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_EQ("v1", Get(1, "foo")); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 0); uint64_t num_running_flushes = 0; db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes); ASSERT_EQ(num_running_flushes, 0); Put(1, "k1", std::string(100000, 'x')); // Fill memtable Put(1, "k2", std::string(100000, 'y')); // Trigger flush // The first sync point is to make sure there's one flush job // running when we perform VerifyOperationCount(). TEST_SYNC_POINT("DBTest::ThreadStatusFlush:1"); VerifyOperationCount(env_, ThreadStatus::OP_FLUSH, 1); db_->GetIntProperty(DB::Properties::kNumRunningFlushes, &num_running_flushes); ASSERT_EQ(num_running_flushes, 1); // This second sync point is to ensure the flush job will not // be completed until we already perform VerifyOperationCount(). TEST_SYNC_POINT("DBTest::ThreadStatusFlush:2"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBTestWithParam, ThreadStatusSingleCompaction) { const int kTestKeySize = 16; const int kTestValueSize = 984; const int kEntrySize = kTestKeySize + kTestValueSize; const int kEntriesPerBuffer = 100; Options options; options.create_if_missing = true; options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.compaction_style = kCompactionStyleLevel; options.target_file_size_base = options.write_buffer_size; options.max_bytes_for_level_base = options.target_file_size_base * 2; options.max_bytes_for_level_multiplier = 2; options.compression = kNoCompression; options = CurrentOptions(options); options.env = env_; options.enable_thread_tracking = true; const int kNumL0Files = 4; options.level0_file_num_compaction_trigger = kNumL0Files; options.max_subcompactions = max_subcompactions_; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBTest::ThreadStatusSingleCompaction:0", "DBImpl::BGWorkCompaction"}, {"CompactionJob::Run():Start", "DBTest::ThreadStatusSingleCompaction:1"}, {"DBTest::ThreadStatusSingleCompaction:2", "CompactionJob::Run():End"}, }); for (int tests = 0; tests < 2; ++tests) { DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); // The Put Phase. for (int file = 0; file < kNumL0Files; ++file) { for (int key = 0; key < kEntriesPerBuffer; ++key) { ASSERT_OK(Put(ToString(key + file * kEntriesPerBuffer), RandomString(&rnd, kTestValueSize))); } Flush(); } // This makes sure a compaction won't be scheduled until // we have done with the above Put Phase. uint64_t num_running_compactions = 0; db_->GetIntProperty(DB::Properties::kNumRunningCompactions, &num_running_compactions); ASSERT_EQ(num_running_compactions, 0); TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:0"); ASSERT_GE(NumTableFilesAtLevel(0), options.level0_file_num_compaction_trigger); // This makes sure at least one compaction is running. TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:1"); if (options.enable_thread_tracking) { // expecting one single L0 to L1 compaction VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 1); } else { // If thread tracking is not enabled, compaction count should be 0. VerifyOperationCount(env_, ThreadStatus::OP_COMPACTION, 0); } db_->GetIntProperty(DB::Properties::kNumRunningCompactions, &num_running_compactions); ASSERT_EQ(num_running_compactions, 1); // TODO(yhchiang): adding assert to verify each compaction stage. TEST_SYNC_POINT("DBTest::ThreadStatusSingleCompaction:2"); // repeat the test with disabling thread tracking. options.enable_thread_tracking = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_P(DBTestWithParam, PreShutdownManualCompaction) { Options options = CurrentOptions(); options.max_subcompactions = max_subcompactions_; CreateAndReopenWithCF({"pikachu"}, options); // iter - 0 with 7 levels // iter - 1 with 3 levels for (int iter = 0; iter < 2; ++iter) { MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range falls before files Compact(1, "", "c"); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range falls after files Compact(1, "r", "z"); ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range overlaps files Compact(1, "p", "q"); ASSERT_EQ("0,0,1", FilesPerLevel(1)); // Populate a different range MakeTables(3, "c", "e", 1); ASSERT_EQ("1,1,2", FilesPerLevel(1)); // Compact just the new range Compact(1, "b", "f"); ASSERT_EQ("0,0,2", FilesPerLevel(1)); // Compact all MakeTables(1, "a", "z", 1); ASSERT_EQ("1,0,2", FilesPerLevel(1)); CancelAllBackgroundWork(db_); db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_EQ("1,0,2", FilesPerLevel(1)); if (iter == 0) { options = CurrentOptions(); options.num_levels = 3; options.create_if_missing = true; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); } } } TEST_F(DBTest, PreShutdownFlush) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "key", "value")); CancelAllBackgroundWork(db_); Status s = db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); ASSERT_TRUE(s.IsShutdownInProgress()); } TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) { const int kTestKeySize = 16; const int kTestValueSize = 984; const int kEntrySize = kTestKeySize + kTestValueSize; const int kEntriesPerBuffer = 40; const int kNumL0Files = 4; const int kHighPriCount = 3; const int kLowPriCount = 5; env_->SetBackgroundThreads(kHighPriCount, Env::HIGH); env_->SetBackgroundThreads(kLowPriCount, Env::LOW); Options options; options.create_if_missing = true; options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.compaction_style = kCompactionStyleLevel; options.target_file_size_base = options.write_buffer_size; options.max_bytes_for_level_base = options.target_file_size_base * kNumL0Files; options.compression = kNoCompression; options = CurrentOptions(options); options.env = env_; options.enable_thread_tracking = true; options.level0_file_num_compaction_trigger = kNumL0Files; options.max_bytes_for_level_multiplier = 2; options.max_background_compactions = kLowPriCount; options.level0_stop_writes_trigger = 1 << 10; options.level0_slowdown_writes_trigger = 1 << 10; options.max_subcompactions = max_subcompactions_; TryReopen(options); Random rnd(301); std::vector thread_list; // Delay both flush and compaction ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"FlushJob::FlushJob()", "CompactionJob::Run():Start"}, {"CompactionJob::Run():Start", "DBTest::PreShutdownMultipleCompaction:Preshutdown"}, {"CompactionJob::Run():Start", "DBTest::PreShutdownMultipleCompaction:VerifyCompaction"}, {"DBTest::PreShutdownMultipleCompaction:Preshutdown", "CompactionJob::Run():End"}, {"CompactionJob::Run():End", "DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Make rocksdb busy int key = 0; // check how many threads are doing compaction using GetThreadList int operation_count[ThreadStatus::NUM_OP_TYPES] = {0}; for (int file = 0; file < 16 * kNumL0Files; ++file) { for (int k = 0; k < kEntriesPerBuffer; ++k) { ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize))); } Status s = env_->GetThreadList(&thread_list); for (auto thread : thread_list) { operation_count[thread.operation_type]++; } // Speed up the test if (operation_count[ThreadStatus::OP_FLUSH] > 1 && operation_count[ThreadStatus::OP_COMPACTION] > 0.6 * options.max_background_compactions) { break; } if (file == 15 * kNumL0Files) { TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown"); } } TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:Preshutdown"); ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1); CancelAllBackgroundWork(db_); TEST_SYNC_POINT("DBTest::PreShutdownMultipleCompaction:VerifyPreshutdown"); dbfull()->TEST_WaitForCompact(); // Record the number of compactions at a time. for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) { operation_count[i] = 0; } Status s = env_->GetThreadList(&thread_list); for (auto thread : thread_list) { operation_count[thread.operation_type]++; } ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0); } TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) { const int kTestKeySize = 16; const int kTestValueSize = 984; const int kEntrySize = kTestKeySize + kTestValueSize; const int kEntriesPerBuffer = 40; const int kNumL0Files = 4; const int kHighPriCount = 3; const int kLowPriCount = 5; env_->SetBackgroundThreads(kHighPriCount, Env::HIGH); env_->SetBackgroundThreads(kLowPriCount, Env::LOW); Options options; options.create_if_missing = true; options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.compaction_style = kCompactionStyleLevel; options.target_file_size_base = options.write_buffer_size; options.max_bytes_for_level_base = options.target_file_size_base * kNumL0Files; options.compression = kNoCompression; options = CurrentOptions(options); options.env = env_; options.enable_thread_tracking = true; options.level0_file_num_compaction_trigger = kNumL0Files; options.max_bytes_for_level_multiplier = 2; options.max_background_compactions = kLowPriCount; options.level0_stop_writes_trigger = 1 << 10; options.level0_slowdown_writes_trigger = 1 << 10; options.max_subcompactions = max_subcompactions_; TryReopen(options); Random rnd(301); std::vector thread_list; // Delay both flush and compaction ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBTest::PreShutdownCompactionMiddle:Preshutdown", "CompactionJob::Run():Inprogress"}, {"CompactionJob::Run():Start", "DBTest::PreShutdownCompactionMiddle:VerifyCompaction"}, {"CompactionJob::Run():Inprogress", "CompactionJob::Run():End"}, {"CompactionJob::Run():End", "DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Make rocksdb busy int key = 0; // check how many threads are doing compaction using GetThreadList int operation_count[ThreadStatus::NUM_OP_TYPES] = {0}; for (int file = 0; file < 16 * kNumL0Files; ++file) { for (int k = 0; k < kEntriesPerBuffer; ++k) { ASSERT_OK(Put(ToString(key++), RandomString(&rnd, kTestValueSize))); } Status s = env_->GetThreadList(&thread_list); for (auto thread : thread_list) { operation_count[thread.operation_type]++; } // Speed up the test if (operation_count[ThreadStatus::OP_FLUSH] > 1 && operation_count[ThreadStatus::OP_COMPACTION] > 0.6 * options.max_background_compactions) { break; } if (file == 15 * kNumL0Files) { TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyCompaction"); } } ASSERT_GE(operation_count[ThreadStatus::OP_COMPACTION], 1); CancelAllBackgroundWork(db_); TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:Preshutdown"); TEST_SYNC_POINT("DBTest::PreShutdownCompactionMiddle:VerifyPreshutdown"); dbfull()->TEST_WaitForCompact(); // Record the number of compactions at a time. for (int i = 0; i < ThreadStatus::NUM_OP_TYPES; ++i) { operation_count[i] = 0; } Status s = env_->GetThreadList(&thread_list); for (auto thread : thread_list) { operation_count[thread.operation_type]++; } ASSERT_EQ(operation_count[ThreadStatus::OP_COMPACTION], 0); } #endif // ROCKSDB_USING_THREAD_STATUS #ifndef ROCKSDB_LITE TEST_F(DBTest, FlushOnDestroy) { WriteOptions wo; wo.disableWAL = true; ASSERT_OK(Put("foo", "v1", wo)); CancelAllBackgroundWork(db_); } TEST_F(DBTest, DynamicLevelCompressionPerLevel) { if (!Snappy_Supported()) { return; } const int kNKeys = 120; int keys[kNKeys]; for (int i = 0; i < kNKeys; i++) { keys[i] = i; } RandomShuffle(std::begin(keys), std::end(keys)); Random rnd(301); Options options; options.create_if_missing = true; options.db_write_buffer_size = 20480; options.write_buffer_size = 20480; options.max_write_buffer_number = 2; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 2; options.target_file_size_base = 20480; options.level_compaction_dynamic_level_bytes = true; options.max_bytes_for_level_base = 102400; options.max_bytes_for_level_multiplier = 4; options.max_background_compactions = 1; options.num_levels = 5; options.compression_per_level.resize(3); options.compression_per_level[0] = kNoCompression; options.compression_per_level[1] = kNoCompression; options.compression_per_level[2] = kSnappyCompression; OnFileDeletionListener* listener = new OnFileDeletionListener(); options.listeners.emplace_back(listener); DestroyAndReopen(options); // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should // be compressed, so total data size should be more than 80K. for (int i = 0; i < 20; i++) { ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000))); } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(1), 0); ASSERT_EQ(NumTableFilesAtLevel(2), 0); ASSERT_EQ(NumTableFilesAtLevel(3), 0); // Assuming each files' metadata is at least 50 bytes/ ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U + 50U * 4); // Insert 400KB. Some data will be compressed for (int i = 21; i < 120; i++) { ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000))); } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(1), 0); ASSERT_EQ(NumTableFilesAtLevel(2), 0); ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4), 120U * 4000U + 50U * 24); // Make sure data in files in L3 is not compacted by removing all files // in L4 and calculate number of rows ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "true"}, })); ColumnFamilyMetaData cf_meta; db_->GetColumnFamilyMetaData(&cf_meta); for (auto file : cf_meta.levels[4].files) { listener->SetExpectedFileName(dbname_ + file.name); ASSERT_OK(dbfull()->DeleteFile(file.name)); } listener->VerifyMatchedCount(cf_meta.levels[4].files.size()); int num_keys = 0; std::unique_ptr iter(db_->NewIterator(ReadOptions())); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { num_keys++; } ASSERT_OK(iter->status()); ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U + num_keys * 10U); } TEST_F(DBTest, DynamicLevelCompressionPerLevel2) { if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) { return; } const int kNKeys = 500; int keys[kNKeys]; for (int i = 0; i < kNKeys; i++) { keys[i] = i; } RandomShuffle(std::begin(keys), std::end(keys)); Random rnd(301); Options options; options.create_if_missing = true; options.db_write_buffer_size = 6000000; options.write_buffer_size = 600000; options.max_write_buffer_number = 2; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 2; options.soft_pending_compaction_bytes_limit = 1024 * 1024; options.target_file_size_base = 20; options.level_compaction_dynamic_level_bytes = true; options.max_bytes_for_level_base = 200; options.max_bytes_for_level_multiplier = 8; options.max_background_compactions = 1; options.num_levels = 5; std::shared_ptr mtf(new mock::MockTableFactory); options.table_factory = mtf; options.compression_per_level.resize(3); options.compression_per_level[0] = kNoCompression; options.compression_per_level[1] = kLZ4Compression; options.compression_per_level[2] = kZlibCompression; DestroyAndReopen(options); // When base level is L4, L4 is LZ4. std::atomic num_zlib(0); std::atomic num_lz4(0); std::atomic num_no(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); if (compaction->output_level() == 4) { ASSERT_TRUE(compaction->output_compression() == kLZ4Compression); num_lz4.fetch_add(1); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) { auto* compression = reinterpret_cast(arg); ASSERT_TRUE(*compression == kNoCompression); num_no.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); for (int i = 0; i < 100; i++) { std::string value = RandomString(&rnd, 200); ASSERT_OK(Put(Key(keys[i]), value)); if (i % 25 == 24) { Flush(); dbfull()->TEST_WaitForCompact(); } } Flush(); dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ASSERT_EQ(NumTableFilesAtLevel(1), 0); ASSERT_EQ(NumTableFilesAtLevel(2), 0); ASSERT_EQ(NumTableFilesAtLevel(3), 0); ASSERT_GT(NumTableFilesAtLevel(4), 0); ASSERT_GT(num_no.load(), 2); ASSERT_GT(num_lz4.load(), 0); int prev_num_files_l4 = NumTableFilesAtLevel(4); // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib num_lz4.store(0); num_no.store(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); if (compaction->output_level() == 4 && compaction->start_level() == 3) { ASSERT_TRUE(compaction->output_compression() == kZlibCompression); num_zlib.fetch_add(1); } else { ASSERT_TRUE(compaction->output_compression() == kLZ4Compression); num_lz4.fetch_add(1); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) { auto* compression = reinterpret_cast(arg); ASSERT_TRUE(*compression == kNoCompression); num_no.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); for (int i = 101; i < 500; i++) { std::string value = RandomString(&rnd, 200); ASSERT_OK(Put(Key(keys[i]), value)); if (i % 100 == 99) { Flush(); dbfull()->TEST_WaitForCompact(); } } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(NumTableFilesAtLevel(1), 0); ASSERT_EQ(NumTableFilesAtLevel(2), 0); ASSERT_GT(NumTableFilesAtLevel(3), 0); ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4); ASSERT_GT(num_no.load(), 2); ASSERT_GT(num_lz4.load(), 0); ASSERT_GT(num_zlib.load(), 0); } TEST_F(DBTest, DynamicCompactionOptions) { // minimum write buffer size is enforced at 64KB const uint64_t k32KB = 1 << 15; const uint64_t k64KB = 1 << 16; const uint64_t k128KB = 1 << 17; const uint64_t k1MB = 1 << 20; const uint64_t k4KB = 1 << 12; Options options; options.env = env_; options.create_if_missing = true; options.compression = kNoCompression; options.soft_pending_compaction_bytes_limit = 1024 * 1024; options.write_buffer_size = k64KB; options.arena_block_size = 4 * k4KB; options.max_write_buffer_number = 2; // Compaction related options options.level0_file_num_compaction_trigger = 3; options.level0_slowdown_writes_trigger = 4; options.level0_stop_writes_trigger = 8; options.target_file_size_base = k64KB; options.max_compaction_bytes = options.target_file_size_base * 10; options.target_file_size_multiplier = 1; options.max_bytes_for_level_base = k128KB; options.max_bytes_for_level_multiplier = 4; // Block flush thread and disable compaction thread env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); DestroyAndReopen(options); auto gen_l0_kb = [this](int start, int size, int stride) { Random rnd(301); for (int i = 0; i < size; i++) { ASSERT_OK(Put(Key(start + stride * i), RandomString(&rnd, 1024))); } dbfull()->TEST_WaitForFlushMemTable(); }; // Write 3 files that have the same key range. // Since level0_file_num_compaction_trigger is 3, compaction should be // triggered. The compaction should result in one L1 file gen_l0_kb(0, 64, 1); ASSERT_EQ(NumTableFilesAtLevel(0), 1); gen_l0_kb(0, 64, 1); ASSERT_EQ(NumTableFilesAtLevel(0), 2); gen_l0_kb(0, 64, 1); dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,1", FilesPerLevel()); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(1U, metadata.size()); ASSERT_LE(metadata[0].size, k64KB + k4KB); ASSERT_GE(metadata[0].size, k64KB - k4KB); // Test compaction trigger and target_file_size_base // Reduce compaction trigger to 2, and reduce L1 file size to 32KB. // Writing to 64KB L0 files should trigger a compaction. Since these // 2 L0 files have the same key range, compaction merge them and should // result in 2 32KB L1 files. ASSERT_OK(dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}, {"target_file_size_base", ToString(k32KB)}})); gen_l0_kb(0, 64, 1); ASSERT_EQ("1,1", FilesPerLevel()); gen_l0_kb(0, 64, 1); dbfull()->TEST_WaitForCompact(); ASSERT_EQ("0,2", FilesPerLevel()); metadata.clear(); db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(2U, metadata.size()); ASSERT_LE(metadata[0].size, k32KB + k4KB); ASSERT_GE(metadata[0].size, k32KB - k4KB); ASSERT_LE(metadata[1].size, k32KB + k4KB); ASSERT_GE(metadata[1].size, k32KB - k4KB); // Test max_bytes_for_level_base // Increase level base size to 256KB and write enough data that will // fill L1 and L2. L1 size should be around 256KB while L2 size should be // around 256KB x 4. ASSERT_OK( dbfull()->SetOptions({{"max_bytes_for_level_base", ToString(k1MB)}})); // writing 96 x 64KB => 6 * 1024KB // (L1 + L2) = (1 + 4) * 1024KB for (int i = 0; i < 96; ++i) { gen_l0_kb(i, 64, 96); } dbfull()->TEST_WaitForCompact(); ASSERT_GT(SizeAtLevel(1), k1MB / 2); ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2); // Within (0.5, 1.5) of 4MB. ASSERT_GT(SizeAtLevel(2), 2 * k1MB); ASSERT_LT(SizeAtLevel(2), 6 * k1MB); // Test max_bytes_for_level_multiplier and // max_bytes_for_level_base. Now, reduce both mulitplier and level base, // After filling enough data that can fit in L1 - L3, we should see L1 size // reduces to 128KB from 256KB which was asserted previously. Same for L2. ASSERT_OK( dbfull()->SetOptions({{"max_bytes_for_level_multiplier", "2"}, {"max_bytes_for_level_base", ToString(k128KB)}})); // writing 20 x 64KB = 10 x 128KB // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB for (int i = 0; i < 20; ++i) { gen_l0_kb(i, 64, 32); } dbfull()->TEST_WaitForCompact(); uint64_t total_size = SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3); ASSERT_TRUE(total_size < k128KB * 7 * 1.5); // Test level0_stop_writes_trigger. // Clean up memtable and L0. Block compaction threads. If continue to write // and flush memtables. We should see put stop after 8 memtable flushes // since level0_stop_writes_trigger = 8 dbfull()->TEST_FlushMemTable(true, true); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); // Block compaction test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); sleeping_task_low.WaitUntilSleeping(); ASSERT_EQ(NumTableFilesAtLevel(0), 0); int count = 0; Random rnd(301); WriteOptions wo; while (count < 64) { ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo)); dbfull()->TEST_FlushMemTable(true, true); count++; if (dbfull()->TEST_write_controler().IsStopped()) { sleeping_task_low.WakeUp(); break; } } // Stop trigger = 8 ASSERT_EQ(count, 8); // Unblock sleeping_task_low.WaitUntilDone(); // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0. // Block compaction thread again. Perform the put and memtable flushes // until we see the stop after 6 memtable flushes. ASSERT_OK(dbfull()->SetOptions({{"level0_stop_writes_trigger", "6"}})); dbfull()->TEST_FlushMemTable(true); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); // Block compaction again sleeping_task_low.Reset(); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); sleeping_task_low.WaitUntilSleeping(); count = 0; while (count < 64) { ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo)); dbfull()->TEST_FlushMemTable(true, true); count++; if (dbfull()->TEST_write_controler().IsStopped()) { sleeping_task_low.WakeUp(); break; } } ASSERT_EQ(count, 6); // Unblock sleeping_task_low.WaitUntilDone(); // Test disable_auto_compactions // Compaction thread is unblocked but auto compaction is disabled. Write // 4 L0 files and compaction should be triggered. If auto compaction is // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of // L0 files do not change after the call. ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}})); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); for (int i = 0; i < 4; ++i) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024))); // Wait for compaction so that put won't stop dbfull()->TEST_FlushMemTable(true); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0), 4); // Enable auto compaction and perform the same test, # of L0 files should be // reduced after compaction. ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(0), 0); for (int i = 0; i < 4; ++i) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024))); // Wait for compaction so that put won't stop dbfull()->TEST_FlushMemTable(true); } dbfull()->TEST_WaitForCompact(); ASSERT_LT(NumTableFilesAtLevel(0), 4); } // Test dynamic FIFO compaction options. // This test covers just option parsing and makes sure that the options are // correctly assigned. Also look at DBOptionsTest.SetFIFOCompactionOptions // test which makes sure that the FIFO compaction funcionality is working // as expected on dynamically changing the options. // Even more FIFOCompactionTests are at DBTest.FIFOCompaction* . TEST_F(DBTest, DynamicFIFOCompactionOptions) { Options options; options.ttl = 0; options.create_if_missing = true; DestroyAndReopen(options); // Initial defaults ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 1024 * 1024 * 1024); ASSERT_EQ(dbfull()->GetOptions().ttl, 0); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, false); ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{max_table_files_size=23;}"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 23); ASSERT_EQ(dbfull()->GetOptions().ttl, 0); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, false); ASSERT_OK(dbfull()->SetOptions({{"ttl", "97"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 23); ASSERT_EQ(dbfull()->GetOptions().ttl, 97); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, false); ASSERT_OK(dbfull()->SetOptions({{"ttl", "203"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 23); ASSERT_EQ(dbfull()->GetOptions().ttl, 203); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, false); ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{allow_compaction=true;}"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 23); ASSERT_EQ(dbfull()->GetOptions().ttl, 203); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, true); ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{max_table_files_size=31;}"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 31); ASSERT_EQ(dbfull()->GetOptions().ttl, 203); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, true); ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{max_table_files_size=51;allow_compaction=true;}"}})); ASSERT_OK(dbfull()->SetOptions({{"ttl", "49"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.max_table_files_size, 51); ASSERT_EQ(dbfull()->GetOptions().ttl, 49); ASSERT_EQ(dbfull()->GetOptions().compaction_options_fifo.allow_compaction, true); } TEST_F(DBTest, DynamicUniversalCompactionOptions) { Options options; options.create_if_missing = true; DestroyAndReopen(options); // Initial defaults ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 1U); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width, 2u); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width, UINT_MAX); ASSERT_EQ(dbfull() ->GetOptions() .compaction_options_universal.max_size_amplification_percent, 200u); ASSERT_EQ(dbfull() ->GetOptions() .compaction_options_universal.compression_size_percent, -1); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style, kCompactionStopStyleTotalSize); ASSERT_EQ( dbfull()->GetOptions().compaction_options_universal.allow_trivial_move, false); ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_universal", "{size_ratio=7;}"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width, 2u); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width, UINT_MAX); ASSERT_EQ(dbfull() ->GetOptions() .compaction_options_universal.max_size_amplification_percent, 200u); ASSERT_EQ(dbfull() ->GetOptions() .compaction_options_universal.compression_size_percent, -1); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style, kCompactionStopStyleTotalSize); ASSERT_EQ( dbfull()->GetOptions().compaction_options_universal.allow_trivial_move, false); ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_universal", "{min_merge_width=11;}"}})); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.size_ratio, 7u); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.min_merge_width, 11u); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.max_merge_width, UINT_MAX); ASSERT_EQ(dbfull() ->GetOptions() .compaction_options_universal.max_size_amplification_percent, 200u); ASSERT_EQ(dbfull() ->GetOptions() .compaction_options_universal.compression_size_percent, -1); ASSERT_EQ(dbfull()->GetOptions().compaction_options_universal.stop_style, kCompactionStopStyleTotalSize); ASSERT_EQ( dbfull()->GetOptions().compaction_options_universal.allow_trivial_move, false); } #endif // ROCKSDB_LITE TEST_F(DBTest, FileCreationRandomFailure) { Options options; options.env = env_; options.create_if_missing = true; options.write_buffer_size = 100000; // Small write buffer options.target_file_size_base = 200000; options.max_bytes_for_level_base = 1000000; options.max_bytes_for_level_multiplier = 2; DestroyAndReopen(options); Random rnd(301); const int kCDTKeysPerBuffer = 4; const int kTestSize = kCDTKeysPerBuffer * 4096; const int kTotalIteration = 100; // the second half of the test involves in random failure // of file creation. const int kRandomFailureTest = kTotalIteration / 2; std::vector values; for (int i = 0; i < kTestSize; ++i) { values.push_back("NOT_FOUND"); } for (int j = 0; j < kTotalIteration; ++j) { if (j == kRandomFailureTest) { env_->non_writeable_rate_.store(90); } for (int k = 0; k < kTestSize; ++k) { // here we expect some of the Put fails. std::string value = RandomString(&rnd, 100); Status s = Put(Key(k), Slice(value)); if (s.ok()) { // update the latest successful put values[k] = value; } // But everything before we simulate the failure-test should succeed. if (j < kRandomFailureTest) { ASSERT_OK(s); } } } // If rocksdb does not do the correct job, internal assert will fail here. dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); // verify we have the latest successful update for (int k = 0; k < kTestSize; ++k) { auto v = Get(Key(k)); ASSERT_EQ(v, values[k]); } // reopen and reverify we have the latest successful update env_->non_writeable_rate_.store(0); Reopen(options); for (int k = 0; k < kTestSize; ++k) { auto v = Get(Key(k)); ASSERT_EQ(v, values[k]); } } #ifndef ROCKSDB_LITE TEST_F(DBTest, DynamicMiscOptions) { // Test max_sequential_skip_in_iterations Options options; options.env = env_; options.create_if_missing = true; options.max_sequential_skip_in_iterations = 16; options.compression = kNoCompression; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); auto assert_reseek_count = [this, &options](int key_start, int num_reseek) { int key0 = key_start; int key1 = key_start + 1; int key2 = key_start + 2; Random rnd(301); ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8))); for (int i = 0; i < 10; ++i) { ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8))); } ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8))); std::unique_ptr iter(db_->NewIterator(ReadOptions())); iter->Seek(Key(key1)); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Key(key1)), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(Key(key2)), 0); ASSERT_EQ(num_reseek, TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION)); }; // No reseek assert_reseek_count(100, 0); ASSERT_OK(dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "4"}})); // Clear memtable and make new option effective dbfull()->TEST_FlushMemTable(true); // Trigger reseek assert_reseek_count(200, 1); ASSERT_OK( dbfull()->SetOptions({{"max_sequential_skip_in_iterations", "16"}})); // Clear memtable and make new option effective dbfull()->TEST_FlushMemTable(true); // No reseek assert_reseek_count(300, 1); MutableCFOptions mutable_cf_options; CreateAndReopenWithCF({"pikachu"}, options); // Test soft_pending_compaction_bytes_limit, // hard_pending_compaction_bytes_limit ASSERT_OK(dbfull()->SetOptions( handles_[1], {{"soft_pending_compaction_bytes_limit", "200"}, {"hard_pending_compaction_bytes_limit", "300"}})); ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], &mutable_cf_options)); ASSERT_EQ(200, mutable_cf_options.soft_pending_compaction_bytes_limit); ASSERT_EQ(300, mutable_cf_options.hard_pending_compaction_bytes_limit); // Test report_bg_io_stats ASSERT_OK( dbfull()->SetOptions(handles_[1], {{"report_bg_io_stats", "true"}})); // sanity check ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], &mutable_cf_options)); ASSERT_TRUE(mutable_cf_options.report_bg_io_stats); // Test compression // sanity check ASSERT_OK(dbfull()->SetOptions({{"compression", "kNoCompression"}})); ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0], &mutable_cf_options)); ASSERT_EQ(CompressionType::kNoCompression, mutable_cf_options.compression); if (Snappy_Supported()) { ASSERT_OK(dbfull()->SetOptions({{"compression", "kSnappyCompression"}})); ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[0], &mutable_cf_options)); ASSERT_EQ(CompressionType::kSnappyCompression, mutable_cf_options.compression); } // Test paranoid_file_checks already done in db_block_cache_test ASSERT_OK( dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "true"}})); ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], &mutable_cf_options)); ASSERT_TRUE(mutable_cf_options.report_bg_io_stats); } #endif // ROCKSDB_LITE TEST_F(DBTest, L0L1L2AndUpHitCounter) { Options options = CurrentOptions(); options.write_buffer_size = 32 * 1024; options.target_file_size_base = 32 * 1024; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 4; options.max_bytes_for_level_base = 64 * 1024; options.max_write_buffer_number = 2; options.max_background_compactions = 8; options.max_background_flushes = 8; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); CreateAndReopenWithCF({"mypikachu"}, options); int numkeys = 20000; for (int i = 0; i < numkeys; i++) { ASSERT_OK(Put(1, Key(i), "val")); } ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); for (int i = 0; i < numkeys; i++) { ASSERT_EQ(Get(1, Key(i)), "val"); } ASSERT_GT(TestGetTickerCount(options, GET_HIT_L0), 100); ASSERT_GT(TestGetTickerCount(options, GET_HIT_L1), 100); ASSERT_GT(TestGetTickerCount(options, GET_HIT_L2_AND_UP), 100); ASSERT_EQ(numkeys, TestGetTickerCount(options, GET_HIT_L0) + TestGetTickerCount(options, GET_HIT_L1) + TestGetTickerCount(options, GET_HIT_L2_AND_UP)); } TEST_F(DBTest, EncodeDecompressedBlockSizeTest) { // iter 0 -- zlib // iter 1 -- bzip2 // iter 2 -- lz4 // iter 3 -- lz4HC // iter 4 -- xpress CompressionType compressions[] = {kZlibCompression, kBZip2Compression, kLZ4Compression, kLZ4HCCompression, kXpressCompression}; for (auto comp : compressions) { if (!CompressionTypeSupported(comp)) { continue; } // first_table_version 1 -- generate with table_version == 1, read with // table_version == 2 // first_table_version 2 -- generate with table_version == 2, read with // table_version == 1 for (int first_table_version = 1; first_table_version <= 2; ++first_table_version) { BlockBasedTableOptions table_options; table_options.format_version = first_table_version; table_options.filter_policy.reset(NewBloomFilterPolicy(10)); Options options = CurrentOptions(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.create_if_missing = true; options.compression = comp; DestroyAndReopen(options); int kNumKeysWritten = 1000; Random rnd(301); for (int i = 0; i < kNumKeysWritten; ++i) { // compressible string ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a'))); } table_options.format_version = first_table_version == 1 ? 2 : 1; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); for (int i = 0; i < kNumKeysWritten; ++i) { auto r = Get(Key(i)); ASSERT_EQ(r.substr(128), std::string(128, 'a')); } } } } TEST_F(DBTest, CloseSpeedup) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_write_buffer_number = 16; // Block background threads env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); test::SleepingBackgroundTask sleeping_task_high; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_high, Env::Priority::HIGH); std::vector filenames; env_->GetChildren(dbname_, &filenames); // Delete archival files. for (size_t i = 0; i < filenames.size(); ++i) { env_->DeleteFile(dbname_ + "/" + filenames[i]); } env_->DeleteDir(dbname_); DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); Random rnd(301); int key_idx = 0; // First three 110KB files are not going to level 2 // After that, (100K, 200K) for (int num = 0; num < 5; num++) { GenerateNewFile(&rnd, &key_idx, true); } ASSERT_EQ(0, GetSstFileCount(dbname_)); Close(); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Unblock background threads sleeping_task_high.WakeUp(); sleeping_task_high.WaitUntilDone(); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); Destroy(options); } class DelayedMergeOperator : public MergeOperator { private: DBTest* db_test_; public: explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {} bool FullMergeV2(const MergeOperationInput& /*merge_in*/, MergeOperationOutput* merge_out) const override { db_test_->env_->addon_time_.fetch_add(1000); merge_out->new_value = ""; return true; } const char* Name() const override { return "DelayedMergeOperator"; } }; // TODO: hangs in CircleCI's Windows env #ifndef OS_WIN TEST_F(DBTest, MergeTestTime) { std::string one, two, three; PutFixed64(&one, 1); PutFixed64(&two, 2); PutFixed64(&three, 3); // Enable time profiling SetPerfLevel(kEnableTime); this->env_->addon_time_.store(0); this->env_->time_elapse_only_sleep_ = true; this->env_->no_slowdown_ = true; Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.merge_operator.reset(new DelayedMergeOperator(this)); DestroyAndReopen(options); ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0); db_->Put(WriteOptions(), "foo", one); ASSERT_OK(Flush()); ASSERT_OK(db_->Merge(WriteOptions(), "foo", two)); ASSERT_OK(Flush()); ASSERT_OK(db_->Merge(WriteOptions(), "foo", three)); ASSERT_OK(Flush()); ReadOptions opt; opt.verify_checksums = true; opt.snapshot = nullptr; std::string result; db_->Get(opt, "foo", &result); ASSERT_EQ(1000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME)); ReadOptions read_options; std::unique_ptr iter(db_->NewIterator(read_options)); int count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); ++count; } ASSERT_EQ(1, count); ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME)); #ifdef ROCKSDB_USING_THREAD_STATUS ASSERT_GT(TestGetTickerCount(options, FLUSH_WRITE_BYTES), 0); #endif // ROCKSDB_USING_THREAD_STATUS this->env_->time_elapse_only_sleep_ = false; } #endif // OS_WIN #ifndef ROCKSDB_LITE TEST_P(DBTestWithParam, MergeCompactionTimeTest) { SetPerfLevel(kEnableTime); Options options = CurrentOptions(); options.compaction_filter_factory = std::make_shared(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.merge_operator.reset(new DelayedMergeOperator(this)); options.compaction_style = kCompactionStyleUniversal; options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); for (int i = 0; i < 1000; i++) { ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST")); ASSERT_OK(Flush()); } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); ASSERT_NE(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0); } TEST_P(DBTestWithParam, FilterCompactionTimeTest) { Options options = CurrentOptions(); options.compaction_filter_factory = std::make_shared(this); options.disable_auto_compactions = true; options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.statistics->set_stats_level(kExceptTimeForMutex); options.max_subcompactions = max_subcompactions_; DestroyAndReopen(options); // put some data for (int table = 0; table < 4; ++table) { for (int i = 0; i < 10 + table; ++i) { Put(ToString(table * 100 + i), "val"); } Flush(); } CompactRangeOptions cro; cro.exclusive_manual_compaction = exclusive_manual_compaction_; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ASSERT_EQ(0U, CountLiveFiles()); Reopen(options); Iterator* itr = db_->NewIterator(ReadOptions()); itr->SeekToFirst(); ASSERT_NE(TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME), 0); delete itr; } #endif // ROCKSDB_LITE TEST_F(DBTest, TestLogCleanup) { Options options = CurrentOptions(); options.write_buffer_size = 64 * 1024; // very small // only two memtables allowed ==> only two log files options.max_write_buffer_number = 2; Reopen(options); for (int i = 0; i < 100000; ++i) { Put(Key(i), "val"); // only 2 memtables will be alive, so logs_to_free needs to always be below // 2 ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast(3)); } } #ifndef ROCKSDB_LITE TEST_F(DBTest, EmptyCompactedDB) { Options options = CurrentOptions(); options.max_open_files = -1; Close(); ASSERT_OK(ReadOnlyReopen(options)); Status s = Put("new", "value"); ASSERT_TRUE(s.IsNotSupported()); Close(); } #endif // ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBTest, DISABLED_SuggestCompactRangeTest) { class CompactionFilterFactoryGetContext : public CompactionFilterFactory { public: std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { saved_context = context; std::unique_ptr empty_filter; return empty_filter; } const char* Name() const override { return "CompactionFilterFactoryGetContext"; } static bool IsManual(CompactionFilterFactory* compaction_filter_factory) { return reinterpret_cast( compaction_filter_factory) ->saved_context.is_manual_compaction; } CompactionFilter::Context saved_context; }; Options options = CurrentOptions(); options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); options.compaction_style = kCompactionStyleLevel; options.compaction_filter_factory.reset( new CompactionFilterFactoryGetContext()); options.write_buffer_size = 200 << 10; options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 4; options.num_levels = 4; options.compression = kNoCompression; options.max_bytes_for_level_base = 450 << 10; options.target_file_size_base = 98 << 10; options.max_compaction_bytes = static_cast(1) << 60; // inf Reopen(options); Random rnd(301); for (int num = 0; num < 3; num++) { GenerateNewRandomFile(&rnd); } GenerateNewRandomFile(&rnd); ASSERT_EQ("0,4", FilesPerLevel(0)); ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual( options.compaction_filter_factory.get())); GenerateNewRandomFile(&rnd); ASSERT_EQ("1,4", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("2,4", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("3,4", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("0,4,4", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("1,4,4", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("2,4,4", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("3,4,4", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("0,4,8", FilesPerLevel(0)); GenerateNewRandomFile(&rnd); ASSERT_EQ("1,4,8", FilesPerLevel(0)); // compact it three times for (int i = 0; i < 3; ++i) { ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr)); dbfull()->TEST_WaitForCompact(); } // All files are compacted ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); GenerateNewRandomFile(&rnd); ASSERT_EQ(1, NumTableFilesAtLevel(0)); // nonoverlapping with the file on level 0 Slice start("a"), end("b"); ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end)); dbfull()->TEST_WaitForCompact(); // should not compact the level 0 file ASSERT_EQ(1, NumTableFilesAtLevel(0)); start = Slice("j"); end = Slice("m"); ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end)); dbfull()->TEST_WaitForCompact(); ASSERT_TRUE(CompactionFilterFactoryGetContext::IsManual( options.compaction_filter_factory.get())); // now it should compact the level 0 file ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(1)); } TEST_F(DBTest, PromoteL0) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.write_buffer_size = 10 * 1024 * 1024; DestroyAndReopen(options); // non overlapping ranges std::vector> ranges = { {81, 160}, {0, 80}, {161, 240}, {241, 320}}; int32_t value_size = 10 * 1024; // 10 KB Random rnd(301); std::map values; for (const auto& range : ranges) { for (int32_t j = range.first; j < range.second; j++) { values[j] = RandomString(&rnd, value_size); ASSERT_OK(Put(Key(j), values[j])); } ASSERT_OK(Flush()); } int32_t level0_files = NumTableFilesAtLevel(0, 0); ASSERT_EQ(level0_files, ranges.size()); ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1 // Promote L0 level to L2. ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2)); // We expect that all the files were trivially moved from L0 to L2 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files); for (const auto& kv : values) { ASSERT_EQ(Get(Key(kv.first)), kv.second); } } TEST_F(DBTest, PromoteL0Failure) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.write_buffer_size = 10 * 1024 * 1024; DestroyAndReopen(options); // Produce two L0 files with overlapping ranges. ASSERT_OK(Put(Key(0), "")); ASSERT_OK(Put(Key(3), "")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(1), "")); ASSERT_OK(Flush()); Status status; // Fails because L0 has overlapping files. status = experimental::PromoteL0(db_, db_->DefaultColumnFamily()); ASSERT_TRUE(status.IsInvalidArgument()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Now there is a file in L1. ASSERT_GE(NumTableFilesAtLevel(1, 0), 1); ASSERT_OK(Put(Key(5), "")); ASSERT_OK(Flush()); // Fails because L1 is non-empty. status = experimental::PromoteL0(db_, db_->DefaultColumnFamily()); ASSERT_TRUE(status.IsInvalidArgument()); } // Github issue #596 TEST_F(DBTest, CompactRangeWithEmptyBottomLevel) { const int kNumLevels = 2; const int kNumL0Files = 2; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.num_levels = kNumLevels; DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < kNumL0Files; ++i) { ASSERT_OK(Put(Key(0), RandomString(&rnd, 1024))); Flush(); } ASSERT_EQ(NumTableFilesAtLevel(0), kNumL0Files); ASSERT_EQ(NumTableFilesAtLevel(1), 0); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(1), kNumL0Files); } #endif // ROCKSDB_LITE TEST_F(DBTest, AutomaticConflictsWithManualCompaction) { const int kNumL0Files = 50; Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 4; // never slowdown / stop options.level0_slowdown_writes_trigger = 999999; options.level0_stop_writes_trigger = 999999; options.max_background_compactions = 10; DestroyAndReopen(options); // schedule automatic compactions after the manual one starts, but before it // finishes to ensure conflict. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BackgroundCompaction:Start", "DBTest::AutomaticConflictsWithManualCompaction:PrePuts"}, {"DBTest::AutomaticConflictsWithManualCompaction:PostPuts", "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}}); std::atomic callback_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MaybeScheduleFlushOrCompaction:Conflict", [&](void* /*arg*/) { callback_count.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int i = 0; i < 2; ++i) { // put two keys to ensure no trivial move for (int j = 0; j < 2; ++j) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024))); } ASSERT_OK(Flush()); } port::Thread manual_compaction_thread([this]() { CompactRangeOptions croptions; croptions.exclusive_manual_compaction = true; ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr)); }); TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PrePuts"); for (int i = 0; i < kNumL0Files; ++i) { // put two keys to ensure no trivial move for (int j = 0; j < 2; ++j) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024))); } ASSERT_OK(Flush()); } TEST_SYNC_POINT("DBTest::AutomaticConflictsWithManualCompaction:PostPuts"); ASSERT_GE(callback_count.load(), 1); for (int i = 0; i < 2; ++i) { ASSERT_NE("NOT_FOUND", Get(Key(i))); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); manual_compaction_thread.join(); dbfull()->TEST_WaitForCompact(); } #ifndef ROCKSDB_LITE TEST_F(DBTest, CompactFilesShouldTriggerAutoCompaction) { Options options = CurrentOptions(); options.max_background_compactions = 1; options.level0_file_num_compaction_trigger = 4; options.level0_slowdown_writes_trigger = 36; options.level0_stop_writes_trigger = 36; DestroyAndReopen(options); // generate files for manual compaction Random rnd(301); for (int i = 0; i < 2; ++i) { // put two keys to ensure no trivial move for (int j = 0; j < 2; ++j) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024))); } ASSERT_OK(Flush()); } ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data; db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data); std::vector input_files; input_files.push_back(cf_meta_data.levels[0].files[0].name); SyncPoint::GetInstance()->LoadDependency({ {"CompactFilesImpl:0", "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"}, {"DBTest::CompactFilesShouldTriggerAutoCompaction:End", "CompactFilesImpl:1"}, }); SyncPoint::GetInstance()->EnableProcessing(); port::Thread manual_compaction_thread([&]() { auto s = db_->CompactFiles(CompactionOptions(), db_->DefaultColumnFamily(), input_files, 0); }); TEST_SYNC_POINT( "DBTest::CompactFilesShouldTriggerAutoCompaction:Begin"); // generate enough files to trigger compaction for (int i = 0; i < 20; ++i) { for (int j = 0; j < 2; ++j) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024))); } ASSERT_OK(Flush()); } db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data); ASSERT_GT(cf_meta_data.levels[0].files.size(), options.level0_file_num_compaction_trigger); TEST_SYNC_POINT( "DBTest::CompactFilesShouldTriggerAutoCompaction:End"); manual_compaction_thread.join(); dbfull()->TEST_WaitForCompact(); db_->GetColumnFamilyMetaData(db_->DefaultColumnFamily(), &cf_meta_data); ASSERT_LE(cf_meta_data.levels[0].files.size(), options.level0_file_num_compaction_trigger); } #endif // ROCKSDB_LITE // Github issue #595 // Large write batch with column families TEST_F(DBTest, LargeBatchWithColumnFamilies) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; // Small write buffer CreateAndReopenWithCF({"pikachu"}, options); int64_t j = 0; for (int i = 0; i < 5; i++) { for (int pass = 1; pass <= 3; pass++) { WriteBatch batch; size_t write_size = 1024 * 1024 * (5 + i); fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n", (write_size / 1024 / 1024), pass); for (;;) { std::string data(3000, j++ % 127 + 20); data += ToString(j); batch.Put(handles_[0], Slice(data), Slice(data)); if (batch.GetDataSize() > write_size) { break; } } fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n", (batch.GetDataSize() / 1024 / 1024)); ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); fprintf(stderr, "done\n"); } } // make sure we can re-open it. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); } // Make sure that Flushes can proceed in parallel with CompactRange() TEST_F(DBTest, FlushesInParallelWithCompactRange) { // iter == 0 -- leveled // iter == 1 -- leveled, but throw in a flush between two levels compacting // iter == 2 -- universal for (int iter = 0; iter < 3; ++iter) { Options options = CurrentOptions(); if (iter < 2) { options.compaction_style = kCompactionStyleLevel; } else { options.compaction_style = kCompactionStyleUniversal; } options.write_buffer_size = 110 << 10; options.level0_file_num_compaction_trigger = 4; options.num_levels = 4; options.compression = kNoCompression; options.max_bytes_for_level_base = 450 << 10; options.target_file_size_base = 98 << 10; options.max_write_buffer_number = 2; DestroyAndReopen(options); Random rnd(301); for (int num = 0; num < 14; num++) { GenerateNewRandomFile(&rnd); } if (iter == 1) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::RunManualCompaction()::1", "DBTest::FlushesInParallelWithCompactRange:1"}, {"DBTest::FlushesInParallelWithCompactRange:2", "DBImpl::RunManualCompaction()::2"}}); } else { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"CompactionJob::Run():Start", "DBTest::FlushesInParallelWithCompactRange:1"}, {"DBTest::FlushesInParallelWithCompactRange:2", "CompactionJob::Run():End"}}); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::vector threads; threads.emplace_back([&]() { Compact("a", "z"); }); TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1"); // this has to start a flush. if flushes are blocked, this will try to // create // 3 memtables, and that will fail because max_write_buffer_number is 2 for (int num = 0; num < 3; num++) { GenerateNewRandomFile(&rnd, /* nowait */ true); } TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2"); for (auto& t : threads) { t.join(); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(DBTest, DelayedWriteRate) { const int kEntriesPerMemTable = 100; const int kTotalFlushes = 12; Options options = CurrentOptions(); env_->SetBackgroundThreads(1, Env::LOW); options.env = env_; env_->no_slowdown_ = true; options.write_buffer_size = 100000000; options.max_write_buffer_number = 256; options.max_background_compactions = 1; options.level0_file_num_compaction_trigger = 3; options.level0_slowdown_writes_trigger = 3; options.level0_stop_writes_trigger = 999999; options.delayed_write_rate = 20000000; // Start with 200MB/s options.memtable_factory.reset( new SpecialSkipListFactory(kEntriesPerMemTable)); CreateAndReopenWithCF({"pikachu"}, options); // Block compactions test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); for (int i = 0; i < 3; i++) { Put(Key(i), std::string(10000, 'x')); Flush(); } // These writes will be slowed down to 1KB/s uint64_t estimated_sleep_time = 0; Random rnd(301); Put("", ""); uint64_t cur_rate = options.delayed_write_rate; for (int i = 0; i < kTotalFlushes; i++) { uint64_t size_memtable = 0; for (int j = 0; j < kEntriesPerMemTable; j++) { auto rand_num = rnd.Uniform(20); // Spread the size range to more. size_t entry_size = rand_num * rand_num * rand_num; WriteOptions wo; Put(Key(i), std::string(entry_size, 'x'), wo); size_memtable += entry_size + 18; // Occasionally sleep a while if (rnd.Uniform(20) == 6) { env_->SleepForMicroseconds(2666); } } dbfull()->TEST_WaitForFlushMemTable(); estimated_sleep_time += size_memtable * 1000000u / cur_rate; // Slow down twice. One for memtable switch and one for flush finishes. cur_rate = static_cast(static_cast(cur_rate) * kIncSlowdownRatio * kIncSlowdownRatio); } // Estimate the total sleep time fall into the rough range. ASSERT_GT(env_->addon_time_.load(), static_cast(estimated_sleep_time / 2)); ASSERT_LT(env_->addon_time_.load(), static_cast(estimated_sleep_time * 2)); env_->no_slowdown_ = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); } TEST_F(DBTest, HardLimit) { Options options = CurrentOptions(); options.env = env_; env_->SetBackgroundThreads(1, Env::LOW); options.max_write_buffer_number = 256; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 * 1024; options.level0_file_num_compaction_trigger = 4; options.level0_slowdown_writes_trigger = 999999; options.level0_stop_writes_trigger = 999999; options.hard_pending_compaction_bytes_limit = 800 << 10; options.max_bytes_for_level_base = 10000000000u; options.max_background_compactions = 1; options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); env_->SetBackgroundThreads(1, Env::LOW); test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); CreateAndReopenWithCF({"pikachu"}, options); std::atomic callback_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DelayWrite:Wait", [&](void* /*arg*/) { callback_count.fetch_add(1); sleeping_task_low.WakeUp(); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); int key_idx = 0; for (int num = 0; num < 5; num++) { GenerateNewFile(&rnd, &key_idx, true); dbfull()->TEST_WaitForFlushMemTable(); } ASSERT_EQ(0, callback_count.load()); for (int num = 0; num < 5; num++) { GenerateNewFile(&rnd, &key_idx, true); dbfull()->TEST_WaitForFlushMemTable(); } ASSERT_GE(callback_count.load(), 1); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); sleeping_task_low.WaitUntilDone(); } #if !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION) class WriteStallListener : public EventListener { public: WriteStallListener() : condition_(WriteStallCondition::kNormal) {} void OnStallConditionsChanged(const WriteStallInfo& info) override { MutexLock l(&mutex_); condition_ = info.condition.cur; } bool CheckCondition(WriteStallCondition expected) { MutexLock l(&mutex_); return expected == condition_; } private: port::Mutex mutex_; WriteStallCondition condition_; }; TEST_F(DBTest, SoftLimit) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; // Small write buffer options.max_write_buffer_number = 256; options.level0_file_num_compaction_trigger = 1; options.level0_slowdown_writes_trigger = 3; options.level0_stop_writes_trigger = 999999; options.delayed_write_rate = 20000; // About 200KB/s limited rate options.soft_pending_compaction_bytes_limit = 160000; options.target_file_size_base = 99999999; // All into one file options.max_bytes_for_level_base = 50000; options.max_bytes_for_level_multiplier = 10; options.max_background_compactions = 1; options.compression = kNoCompression; WriteStallListener* listener = new WriteStallListener(); options.listeners.emplace_back(listener); // FlushMemtable with opt.wait=true does not wait for // `OnStallConditionsChanged` being called. The event listener is triggered // on `JobContext::Clean`, which happens after flush result is installed. // We use sync point to create a custom WaitForFlush that waits for // context cleanup. port::Mutex flush_mutex; port::CondVar flush_cv(&flush_mutex); bool flush_finished = false; auto InstallFlushCallback = [&]() { { MutexLock l(&flush_mutex); flush_finished = false; } SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCallFlush:ContextCleanedUp", [&](void*) { { MutexLock l(&flush_mutex); flush_finished = true; } flush_cv.SignalAll(); }); }; auto WaitForFlush = [&]() { { MutexLock l(&flush_mutex); while (!flush_finished) { flush_cv.Wait(); } } SyncPoint::GetInstance()->ClearCallBack( "DBImpl::BackgroundCallFlush:ContextCleanedUp"); }; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); // Generating 360KB in Level 3 for (int i = 0; i < 72; i++) { Put(Key(i), std::string(5000, 'x')); if (i % 10 == 0) { dbfull()->TEST_FlushMemTable(true, true); } } dbfull()->TEST_WaitForCompact(); MoveFilesToLevel(3); // Generating 360KB in Level 2 for (int i = 0; i < 72; i++) { Put(Key(i), std::string(5000, 'x')); if (i % 10 == 0) { dbfull()->TEST_FlushMemTable(true, true); } } dbfull()->TEST_WaitForCompact(); MoveFilesToLevel(2); Put(Key(0), ""); test::SleepingBackgroundTask sleeping_task_low; // Block compactions env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); sleeping_task_low.WaitUntilSleeping(); // Create 3 L0 files, making score of L0 to be 3. for (int i = 0; i < 3; i++) { Put(Key(i), std::string(5000, 'x')); Put(Key(100 - i), std::string(5000, 'x')); // Flush the file. File size is around 30KB. InstallFlushCallback(); dbfull()->TEST_FlushMemTable(true, true); WaitForFlush(); } ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed)); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); sleeping_task_low.Reset(); dbfull()->TEST_WaitForCompact(); // Now there is one L1 file but doesn't trigger soft_rate_limit // The L1 file size is around 30KB. ASSERT_EQ(NumTableFilesAtLevel(1), 1); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal)); // Only allow one compactin going through. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BackgroundCallCompaction:0", [&](void* /*arg*/) { // Schedule a sleeping task. sleeping_task_low.Reset(); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); }); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); sleeping_task_low.WaitUntilSleeping(); // Create 3 L0 files, making score of L0 to be 3 for (int i = 0; i < 3; i++) { Put(Key(10 + i), std::string(5000, 'x')); Put(Key(90 - i), std::string(5000, 'x')); // Flush the file. File size is around 30KB. InstallFlushCallback(); dbfull()->TEST_FlushMemTable(true, true); WaitForFlush(); } // Wake up sleep task to enable compaction to run and waits // for it to go to sleep state again to make sure one compaction // goes through. sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilSleeping(); // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB // Given level multiplier 10, estimated pending compaction is around 100KB // doesn't trigger soft_pending_compaction_bytes_limit ASSERT_EQ(NumTableFilesAtLevel(1), 1); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal)); // Create 3 L0 files, making score of L0 to be 3, higher than L0. for (int i = 0; i < 3; i++) { Put(Key(20 + i), std::string(5000, 'x')); Put(Key(80 - i), std::string(5000, 'x')); // Flush the file. File size is around 30KB. InstallFlushCallback(); dbfull()->TEST_FlushMemTable(true, true); WaitForFlush(); } // Wake up sleep task to enable compaction to run and waits // for it to go to sleep state again to make sure one compaction // goes through. sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilSleeping(); // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB // L2 size is 360KB, so the estimated level fanout 4, estimated pending // compaction is around 200KB // triggerring soft_pending_compaction_bytes_limit ASSERT_EQ(NumTableFilesAtLevel(1), 1); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed)); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilSleeping(); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kNormal)); // shrink level base so L2 will hit soft limit easier. ASSERT_OK(dbfull()->SetOptions({ {"max_bytes_for_level_base", "5000"}, })); Put("", ""); Flush(); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); ASSERT_TRUE(listener->CheckCondition(WriteStallCondition::kDelayed)); sleeping_task_low.WaitUntilSleeping(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); } TEST_F(DBTest, LastWriteBufferDelay) { Options options = CurrentOptions(); options.env = env_; options.write_buffer_size = 100000; options.max_write_buffer_number = 4; options.delayed_write_rate = 20000; options.compression = kNoCompression; options.disable_auto_compactions = true; int kNumKeysPerMemtable = 3; options.memtable_factory.reset( new SpecialSkipListFactory(kNumKeysPerMemtable)); Reopen(options); test::SleepingBackgroundTask sleeping_task; // Block flushes env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); sleeping_task.WaitUntilSleeping(); // Create 3 L0 files, making score of L0 to be 3. for (int i = 0; i < 3; i++) { // Fill one mem table for (int j = 0; j < kNumKeysPerMemtable; j++) { Put(Key(j), ""); } ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); } // Inserting a new entry would create a new mem table, triggering slow down. Put(Key(0), ""); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); } #endif // !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION) TEST_F(DBTest, FailWhenCompressionNotSupportedTest) { CompressionType compressions[] = {kZlibCompression, kBZip2Compression, kLZ4Compression, kLZ4HCCompression, kXpressCompression}; for (auto comp : compressions) { if (!CompressionTypeSupported(comp)) { // not supported, we should fail the Open() Options options = CurrentOptions(); options.compression = comp; ASSERT_TRUE(!TryReopen(options).ok()); // Try if CreateColumnFamily also fails options.compression = kNoCompression; ASSERT_OK(TryReopen(options)); ColumnFamilyOptions cf_options(options); cf_options.compression = comp; ColumnFamilyHandle* handle; ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok()); } } } TEST_F(DBTest, CreateColumnFamilyShouldFailOnIncompatibleOptions) { Options options = CurrentOptions(); options.max_open_files = 100; Reopen(options); ColumnFamilyOptions cf_options(options); // ttl is now supported when max_open_files is -1. cf_options.ttl = 3600; ColumnFamilyHandle* handle; ASSERT_OK(db_->CreateColumnFamily(cf_options, "pikachu", &handle)); delete handle; } #ifndef ROCKSDB_LITE TEST_F(DBTest, RowCache) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.row_cache = NewLRUCache(8192); DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0); ASSERT_EQ(Get("foo"), "bar"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); ASSERT_EQ(Get("foo"), "bar"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); } TEST_F(DBTest, PinnableSliceAndRowCache) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.row_cache = NewLRUCache(8192); DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); ASSERT_EQ(Get("foo"), "bar"); ASSERT_EQ( reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), 1); { PinnableSlice pin_slice; ASSERT_EQ(Get("foo", &pin_slice), Status::OK()); ASSERT_EQ(pin_slice.ToString(), "bar"); // Entry is already in cache, lookup will remove the element from lru ASSERT_EQ( reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), 0); } // After PinnableSlice destruction element is added back in LRU ASSERT_EQ( reinterpret_cast(options.row_cache.get())->TEST_GetLRUSize(), 1); } #endif // ROCKSDB_LITE TEST_F(DBTest, DeletingOldWalAfterDrop) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"Test:AllowFlushes", "DBImpl::BGWorkFlush"}, {"DBImpl::BGWorkFlush:done", "Test:WaitForFlush"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Options options = CurrentOptions(); options.max_total_wal_size = 8192; options.compression = kNoCompression; options.write_buffer_size = 1 << 20; options.level0_file_num_compaction_trigger = (1 << 30); options.level0_slowdown_writes_trigger = (1 << 30); options.level0_stop_writes_trigger = (1 << 30); options.disable_auto_compactions = true; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); CreateColumnFamilies({"cf1", "cf2"}, options); ASSERT_OK(Put(0, "key1", DummyString(8192))); ASSERT_OK(Put(0, "key2", DummyString(8192))); // the oldest wal should now be getting_flushed ASSERT_OK(db_->DropColumnFamily(handles_[0])); // all flushes should now do nothing because their CF is dropped TEST_SYNC_POINT("Test:AllowFlushes"); TEST_SYNC_POINT("Test:WaitForFlush"); uint64_t lognum1 = dbfull()->TEST_LogfileNumber(); ASSERT_OK(Put(1, "key3", DummyString(8192))); ASSERT_OK(Put(1, "key4", DummyString(8192))); // new wal should have been created uint64_t lognum2 = dbfull()->TEST_LogfileNumber(); EXPECT_GT(lognum2, lognum1); } TEST_F(DBTest, UnsupportedManualSync) { DestroyAndReopen(CurrentOptions()); env_->is_wal_sync_thread_safe_.store(false); Status s = db_->SyncWAL(); ASSERT_TRUE(s.IsNotSupported()); } INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam, ::testing::Combine(::testing::Values(1, 4), ::testing::Bool())); TEST_F(DBTest, PauseBackgroundWorkTest) { Options options = CurrentOptions(); options.write_buffer_size = 100000; // Small write buffer Reopen(options); std::vector threads; std::atomic done(false); db_->PauseBackgroundWork(); threads.emplace_back([&]() { Random rnd(301); for (int i = 0; i < 10000; ++i) { Put(RandomString(&rnd, 10), RandomString(&rnd, 10)); } done.store(true); }); env_->SleepForMicroseconds(200000); // make sure the thread is not done ASSERT_FALSE(done.load()); db_->ContinueBackgroundWork(); for (auto& t : threads) { t.join(); } // now it's done ASSERT_TRUE(done.load()); } // Keep spawning short-living threads that create an iterator and quit. // Meanwhile in another thread keep flushing memtables. // This used to cause a deadlock. TEST_F(DBTest, ThreadLocalPtrDeadlock) { std::atomic flushes_done{0}; std::atomic threads_destroyed{0}; auto done = [&] { return flushes_done.load() > 10; }; port::Thread flushing_thread([&] { for (int i = 0; !done(); ++i) { ASSERT_OK(db_->Put(WriteOptions(), Slice("hi"), Slice(std::to_string(i).c_str()))); ASSERT_OK(db_->Flush(FlushOptions())); int cnt = ++flushes_done; fprintf(stderr, "Flushed %d times\n", cnt); } }); std::vector thread_spawning_threads(10); for (auto& t: thread_spawning_threads) { t = port::Thread([&] { while (!done()) { { port::Thread tmp_thread([&] { auto it = db_->NewIterator(ReadOptions()); delete it; }); tmp_thread.join(); } ++threads_destroyed; } }); } for (auto& t: thread_spawning_threads) { t.join(); } flushing_thread.join(); fprintf(stderr, "Done. Flushed %d times, destroyed %d threads\n", flushes_done.load(), threads_destroyed.load()); } TEST_F(DBTest, LargeBlockSizeTest) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(0, "foo", "bar")); BlockBasedTableOptions table_options; table_options.block_size = 8LL * 1024 * 1024 * 1024LL; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ASSERT_NOK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); } #ifndef ROCKSDB_LITE TEST_F(DBTest, CreationTimeOfOldestFile) { const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2; const int kValueSize = 100; Options options = CurrentOptions(); options.max_open_files = -1; env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); DestroyAndReopen(options); bool set_file_creation_time_to_zero = true; int idx = 0; int64_t time_1 = 0; env_->GetCurrentTime(&time_1); const uint64_t uint_time_1 = static_cast(time_1); // Add 50 hours env_->addon_time_.fetch_add(50 * 60 * 60); int64_t time_2 = 0; env_->GetCurrentTime(&time_2); const uint64_t uint_time_2 = static_cast(time_2); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) { TableProperties* props = reinterpret_cast(arg); if (set_file_creation_time_to_zero) { if (idx == 0) { props->file_creation_time = 0; idx++; } else if (idx == 1) { props->file_creation_time = uint_time_1; idx = 0; } } else { if (idx == 0) { props->file_creation_time = uint_time_1; idx++; } else if (idx == 1) { props->file_creation_time = uint_time_2; } } }); // Set file creation time in manifest all to 0. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FileMetaData::FileMetaData", [&](void* arg) { FileMetaData* meta = static_cast(arg); meta->file_creation_time = 0; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); } // At this point there should be 2 files, one with file_creation_time = 0 and // the other non-zero. GetCreationTimeOfOldestFile API should return 0. uint64_t creation_time; Status s1 = dbfull()->GetCreationTimeOfOldestFile(&creation_time); ASSERT_EQ(0, creation_time); ASSERT_EQ(s1, Status::OK()); // Testing with non-zero file creation time. set_file_creation_time_to_zero = false; options = CurrentOptions(); options.max_open_files = -1; env_->time_elapse_only_sleep_ = false; options.env = env_; env_->addon_time_.store(0); DestroyAndReopen(options); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); } Flush(); } // At this point there should be 2 files with non-zero file creation time. // GetCreationTimeOfOldestFile API should return non-zero value. uint64_t ctime; Status s2 = dbfull()->GetCreationTimeOfOldestFile(&ctime); ASSERT_EQ(uint_time_1, ctime); ASSERT_EQ(s2, Status::OK()); // Testing with max_open_files != -1 options = CurrentOptions(); options.max_open_files = 10; DestroyAndReopen(options); Status s3 = dbfull()->GetCreationTimeOfOldestFile(&ctime); ASSERT_EQ(s3, Status::NotSupported()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } #endif } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_test2.cc000066400000000000000000005443411370372246700156620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include "db/db_test_util.h" #include "db/read_callback.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/persistent_cache.h" #include "rocksdb/wal_filter.h" #include "test_util/fault_injection_test_env.h" namespace ROCKSDB_NAMESPACE { class DBTest2 : public DBTestBase { public: DBTest2() : DBTestBase("/db_test2") {} }; #ifndef ROCKSDB_LITE TEST_F(DBTest2, OpenForReadOnly) { DB* db_ptr = nullptr; std::string dbname = test::PerThreadDBPath("db_readonly"); Options options = CurrentOptions(); options.create_if_missing = true; // OpenForReadOnly should fail but will create in the file system ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr)); // Since is created, we should be able to delete the dir // We first get the list files under // There should not be any subdirectories -- this is not checked here std::vector files; ASSERT_OK(env_->GetChildren(dbname, &files)); for (auto& f : files) { if (f != "." && f != "..") { ASSERT_OK(env_->DeleteFile(dbname + "/" + f)); } } // should be empty now and we should be able to delete it ASSERT_OK(env_->DeleteDir(dbname)); options.create_if_missing = false; // OpenForReadOnly should fail since was successfully deleted ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr)); // With create_if_missing false, there should not be a dir in the file system ASSERT_NOK(env_->FileExists(dbname)); } TEST_F(DBTest2, OpenForReadOnlyWithColumnFamilies) { DB* db_ptr = nullptr; std::string dbname = test::PerThreadDBPath("db_readonly"); Options options = CurrentOptions(); options.create_if_missing = true; ColumnFamilyOptions cf_options(options); std::vector column_families; column_families.push_back( ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); column_families.push_back(ColumnFamilyDescriptor("goku", cf_options)); std::vector handles; // OpenForReadOnly should fail but will create in the file system ASSERT_NOK( DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr)); // Since is created, we should be able to delete the dir // We first get the list files under // There should not be any subdirectories -- this is not checked here std::vector files; ASSERT_OK(env_->GetChildren(dbname, &files)); for (auto& f : files) { if (f != "." && f != "..") { ASSERT_OK(env_->DeleteFile(dbname + "/" + f)); } } // should be empty now and we should be able to delete it ASSERT_OK(env_->DeleteDir(dbname)); options.create_if_missing = false; // OpenForReadOnly should fail since was successfully deleted ASSERT_NOK( DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr)); // With create_if_missing false, there should not be a dir in the file system ASSERT_NOK(env_->FileExists(dbname)); } class TestReadOnlyWithCompressedCache : public DBTestBase, public testing::WithParamInterface> { public: TestReadOnlyWithCompressedCache() : DBTestBase("/test_readonly_with_compressed_cache") { max_open_files_ = std::get<0>(GetParam()); use_mmap_ = std::get<1>(GetParam()); } int max_open_files_; bool use_mmap_; }; TEST_P(TestReadOnlyWithCompressedCache, ReadOnlyWithCompressedCache) { if (use_mmap_ && !IsMemoryMappedAccessSupported()) { return; } ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo2", "barbarbarbarbarbarbarbar")); ASSERT_OK(Flush()); DB* db_ptr = nullptr; Options options = CurrentOptions(); options.allow_mmap_reads = use_mmap_; options.max_open_files = max_open_files_; options.compression = kSnappyCompression; BlockBasedTableOptions table_options; table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); table_options.no_block_cache = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.statistics = CreateDBStatistics(); ASSERT_OK(DB::OpenForReadOnly(options, dbname_, &db_ptr)); std::string v; ASSERT_OK(db_ptr->Get(ReadOptions(), "foo", &v)); ASSERT_EQ("bar", v); ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_HIT)); ASSERT_OK(db_ptr->Get(ReadOptions(), "foo", &v)); ASSERT_EQ("bar", v); if (Snappy_Supported()) { if (use_mmap_) { ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_HIT)); } else { ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_HIT)); } } delete db_ptr; } INSTANTIATE_TEST_CASE_P(TestReadOnlyWithCompressedCache, TestReadOnlyWithCompressedCache, ::testing::Combine(::testing::Values(-1, 100), ::testing::Bool())); class PartitionedIndexTestListener : public EventListener { public: void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { ASSERT_GT(info.table_properties.index_partitions, 1); ASSERT_EQ(info.table_properties.index_key_is_user_key, 0); } }; TEST_F(DBTest2, PartitionedIndexUserToInternalKey) { BlockBasedTableOptions table_options; Options options = CurrentOptions(); table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; PartitionedIndexTestListener* listener = new PartitionedIndexTestListener(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.listeners.emplace_back(listener); std::vector snapshots; Reopen(options); Random rnd(301); for (int i = 0; i < 3000; i++) { int j = i % 30; std::string value = RandomString(&rnd, 10500); ASSERT_OK(Put("keykey_" + std::to_string(j), value)); snapshots.push_back(db_->GetSnapshot()); } Flush(); for (auto s : snapshots) { db_->ReleaseSnapshot(s); } } #endif // ROCKSDB_LITE class PrefixFullBloomWithReverseComparator : public DBTestBase, public ::testing::WithParamInterface { public: PrefixFullBloomWithReverseComparator() : DBTestBase("/prefix_bloom_reverse") {} void SetUp() override { if_cache_filter_ = GetParam(); } bool if_cache_filter_; }; TEST_P(PrefixFullBloomWithReverseComparator, PrefixFullBloomWithReverseComparator) { Options options = last_options_; options.comparator = ReverseBytewiseComparator(); options.prefix_extractor.reset(NewCappedPrefixTransform(3)); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions bbto; if (if_cache_filter_) { bbto.no_block_cache = false; bbto.cache_index_and_filter_blocks = true; bbto.block_cache = NewLRUCache(1); } bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); ASSERT_OK(dbfull()->Put(WriteOptions(), "bar123", "foo")); ASSERT_OK(dbfull()->Put(WriteOptions(), "bar234", "foo2")); ASSERT_OK(dbfull()->Put(WriteOptions(), "foo123", "foo3")); dbfull()->Flush(FlushOptions()); if (bbto.block_cache) { bbto.block_cache->EraseUnRefEntries(); } std::unique_ptr iter(db_->NewIterator(ReadOptions())); iter->Seek("bar345"); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar234", iter->key().ToString()); ASSERT_EQ("foo2", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar123", iter->key().ToString()); ASSERT_EQ("foo", iter->value().ToString()); iter->Seek("foo234"); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo123", iter->key().ToString()); ASSERT_EQ("foo3", iter->value().ToString()); iter->Seek("bar"); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); } INSTANTIATE_TEST_CASE_P(PrefixFullBloomWithReverseComparator, PrefixFullBloomWithReverseComparator, testing::Bool()); TEST_F(DBTest2, IteratorPropertyVersionNumber) { Put("", ""); Iterator* iter1 = db_->NewIterator(ReadOptions()); std::string prop_value; ASSERT_OK( iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number1 = static_cast(std::atoi(prop_value.c_str())); Put("", ""); Flush(); Iterator* iter2 = db_->NewIterator(ReadOptions()); ASSERT_OK( iter2->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number2 = static_cast(std::atoi(prop_value.c_str())); ASSERT_GT(version_number2, version_number1); Put("", ""); Iterator* iter3 = db_->NewIterator(ReadOptions()); ASSERT_OK( iter3->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number3 = static_cast(std::atoi(prop_value.c_str())); ASSERT_EQ(version_number2, version_number3); iter1->SeekToFirst(); ASSERT_OK( iter1->GetProperty("rocksdb.iterator.super-version-number", &prop_value)); uint64_t version_number1_new = static_cast(std::atoi(prop_value.c_str())); ASSERT_EQ(version_number1, version_number1_new); delete iter1; delete iter2; delete iter3; } TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(NewBloomFilterPolicy(20)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); Put(1, "a", "begin"); Put(1, "z", "end"); ASSERT_OK(Flush(1)); TryReopenWithColumnFamilies({"default", "pikachu"}, options); std::string value; value = Get(1, "a"); } TEST_F(DBTest2, MaxSuccessiveMergesChangeWithDBRecovery) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.max_successive_merges = 3; options.merge_operator = MergeOperators::CreatePutOperator(); options.disable_auto_compactions = true; DestroyAndReopen(options); Put("poi", "Finch"); db_->Merge(WriteOptions(), "poi", "Reese"); db_->Merge(WriteOptions(), "poi", "Shaw"); db_->Merge(WriteOptions(), "poi", "Root"); options.max_successive_merges = 2; Reopen(options); } #ifndef ROCKSDB_LITE class DBTestSharedWriteBufferAcrossCFs : public DBTestBase, public testing::WithParamInterface> { public: DBTestSharedWriteBufferAcrossCFs() : DBTestBase("/db_test_shared_write_buffer") {} void SetUp() override { use_old_interface_ = std::get<0>(GetParam()); cost_cache_ = std::get<1>(GetParam()); } bool use_old_interface_; bool cost_cache_; }; TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) { Options options = CurrentOptions(); options.arena_block_size = 4096; // Avoid undeterministic value by malloc_usable_size(); // Force arena block size to 1 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Arena::Arena:0", [&](void* arg) { size_t* block_size = static_cast(arg); *block_size = 1; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Arena::AllocateNewBlock:0", [&](void* arg) { std::pair* pair = static_cast*>(arg); *std::get<0>(*pair) = *std::get<1>(*pair); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // The total soft write buffer size is about 105000 std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); ASSERT_LT(cache->GetUsage(), 256 * 1024); if (use_old_interface_) { options.db_write_buffer_size = 120000; // this is the real limit } else if (!cost_cache_) { options.write_buffer_manager.reset(new WriteBufferManager(114285)); } else { options.write_buffer_manager.reset(new WriteBufferManager(114285, cache)); } options.write_buffer_size = 500000; // this is never hit CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); WriteOptions wo; wo.disableWAL = true; std::function wait_flush = [&]() { dbfull()->TEST_WaitForFlushMemTable(handles_[0]); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForFlushMemTable(handles_[2]); dbfull()->TEST_WaitForFlushMemTable(handles_[3]); }; // Create some data and flush "default" and "nikitich" so that they // are newer CFs created. ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); Flush(3); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); Flush(0); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(1)); ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); if (cost_cache_) { ASSERT_GE(cache->GetUsage(), 256 * 1024); ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024); } wait_flush(); ASSERT_OK(Put(0, Key(1), DummyString(60000), wo)); if (cost_cache_) { ASSERT_GE(cache->GetUsage(), 256 * 1024); ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024); } wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); // No flush should trigger wait_flush(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(1)); } // Trigger a flush. Flushing "nikitich". ASSERT_OK(Put(3, Key(2), DummyString(30000), wo)); wait_flush(); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); wait_flush(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(2)); } // Without hitting the threshold, no flush should trigger. ASSERT_OK(Put(2, Key(1), DummyString(30000), wo)); wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); wait_flush(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(2)); } // Hit the write buffer limit again. "default" // will have been flushed. ASSERT_OK(Put(2, Key(2), DummyString(10000), wo)); wait_flush(); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); wait_flush(); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); wait_flush(); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); wait_flush(); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); wait_flush(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(2)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(2)); } // Trigger another flush. This time "dobrynia". "pikachu" should not // be flushed, althrough it was never flushed. ASSERT_OK(Put(1, Key(1), DummyString(1), wo)); wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(80000), wo)); wait_flush(); ASSERT_OK(Put(1, Key(1), DummyString(1), wo)); wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); wait_flush(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(2)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(2)); } if (cost_cache_) { ASSERT_GE(cache->GetUsage(), 256 * 1024); Close(); options.write_buffer_manager.reset(); last_options_.write_buffer_manager.reset(); ASSERT_LT(cache->GetUsage(), 256 * 1024); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } INSTANTIATE_TEST_CASE_P(DBTestSharedWriteBufferAcrossCFs, DBTestSharedWriteBufferAcrossCFs, ::testing::Values(std::make_tuple(true, false), std::make_tuple(false, false), std::make_tuple(false, true))); TEST_F(DBTest2, SharedWriteBufferLimitAcrossDB) { std::string dbname2 = test::PerThreadDBPath("db_shared_wb_db2"); Options options = CurrentOptions(); options.arena_block_size = 4096; // Avoid undeterministic value by malloc_usable_size(); // Force arena block size to 1 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Arena::Arena:0", [&](void* arg) { size_t* block_size = static_cast(arg); *block_size = 1; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Arena::AllocateNewBlock:0", [&](void* arg) { std::pair* pair = static_cast*>(arg); *std::get<0>(*pair) = *std::get<1>(*pair); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); options.write_buffer_size = 500000; // this is never hit // Use a write buffer total size so that the soft limit is about // 105000. options.write_buffer_manager.reset(new WriteBufferManager(120000)); CreateAndReopenWithCF({"cf1", "cf2"}, options); ASSERT_OK(DestroyDB(dbname2, options)); DB* db2 = nullptr; ASSERT_OK(DB::Open(options, dbname2, &db2)); WriteOptions wo; wo.disableWAL = true; std::function wait_flush = [&]() { dbfull()->TEST_WaitForFlushMemTable(handles_[0]); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForFlushMemTable(handles_[2]); static_cast(db2)->TEST_WaitForFlushMemTable(); }; // Trigger a flush on cf2 ASSERT_OK(Put(2, Key(1), DummyString(70000), wo)); wait_flush(); ASSERT_OK(Put(0, Key(1), DummyString(20000), wo)); wait_flush(); // Insert to DB2 ASSERT_OK(db2->Put(wo, Key(2), DummyString(20000))); wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); wait_flush(); static_cast(db2)->TEST_WaitForFlushMemTable(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default") + GetNumberOfSstFilesForColumnFamily(db_, "cf1") + GetNumberOfSstFilesForColumnFamily(db_, "cf2"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"), static_cast(0)); } // Triggering to flush another CF in DB1 ASSERT_OK(db2->Put(wo, Key(2), DummyString(70000))); wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); wait_flush(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf1"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf2"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"), static_cast(0)); } // Triggering flush in DB2. ASSERT_OK(db2->Put(wo, Key(3), DummyString(40000))); wait_flush(); ASSERT_OK(db2->Put(wo, Key(1), DummyString(1))); wait_flush(); static_cast(db2)->TEST_WaitForFlushMemTable(); { ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf1"), static_cast(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "cf2"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db2, "default"), static_cast(1)); } delete db2; ASSERT_OK(DestroyDB(dbname2, options)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, TestWriteBufferNoLimitWithCache) { Options options = CurrentOptions(); options.arena_block_size = 4096; std::shared_ptr cache = NewLRUCache(LRUCacheOptions(10000000, 1, false, 0.0)); options.write_buffer_size = 50000; // this is never hit // Use a write buffer total size so that the soft limit is about // 105000. options.write_buffer_manager.reset(new WriteBufferManager(0, cache)); Reopen(options); ASSERT_OK(Put("foo", "bar")); // One dummy entry is 256KB. ASSERT_GT(cache->GetUsage(), 128000); } namespace { void ValidateKeyExistence(DB* db, const std::vector& keys_must_exist, const std::vector& keys_must_not_exist) { // Ensure that expected keys exist std::vector values; if (keys_must_exist.size() > 0) { std::vector status_list = db->MultiGet(ReadOptions(), keys_must_exist, &values); for (size_t i = 0; i < keys_must_exist.size(); i++) { ASSERT_OK(status_list[i]); } } // Ensure that given keys don't exist if (keys_must_not_exist.size() > 0) { std::vector status_list = db->MultiGet(ReadOptions(), keys_must_not_exist, &values); for (size_t i = 0; i < keys_must_not_exist.size(); i++) { ASSERT_TRUE(status_list[i].IsNotFound()); } } } } // namespace TEST_F(DBTest2, WalFilterTest) { class TestWalFilter : public WalFilter { private: // Processing option that is requested to be applied at the given index WalFilter::WalProcessingOption wal_processing_option_; // Index at which to apply wal_processing_option_ // At other indexes default wal_processing_option::kContinueProcessing is // returned. size_t apply_option_at_record_index_; // Current record index, incremented with each record encountered. size_t current_record_index_; public: TestWalFilter(WalFilter::WalProcessingOption wal_processing_option, size_t apply_option_for_record_index) : wal_processing_option_(wal_processing_option), apply_option_at_record_index_(apply_option_for_record_index), current_record_index_(0) {} WalProcessingOption LogRecord(const WriteBatch& /*batch*/, WriteBatch* /*new_batch*/, bool* /*batch_changed*/) const override { WalFilter::WalProcessingOption option_to_return; if (current_record_index_ == apply_option_at_record_index_) { option_to_return = wal_processing_option_; } else { option_to_return = WalProcessingOption::kContinueProcessing; } // Filter is passed as a const object for RocksDB to not modify the // object, however we modify it for our own purpose here and hence // cast the constness away. (const_cast(this)->current_record_index_)++; return option_to_return; } const char* Name() const override { return "TestWalFilter"; } }; // Create 3 batches with two keys each std::vector> batch_keys(3); batch_keys[0].push_back("key1"); batch_keys[0].push_back("key2"); batch_keys[1].push_back("key3"); batch_keys[1].push_back("key4"); batch_keys[2].push_back("key5"); batch_keys[2].push_back("key6"); // Test with all WAL processing options for (int option = 0; option < static_cast( WalFilter::WalProcessingOption::kWalProcessingOptionMax); option++) { Options options = OptionsForLogIterTest(); DestroyAndReopen(options); CreateAndReopenWithCF({ "pikachu" }, options); // Write given keys in given batches for (size_t i = 0; i < batch_keys.size(); i++) { WriteBatch batch; for (size_t j = 0; j < batch_keys[i].size(); j++) { batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)); } dbfull()->Write(WriteOptions(), &batch); } WalFilter::WalProcessingOption wal_processing_option = static_cast(option); // Create a test filter that would apply wal_processing_option at the first // record size_t apply_option_for_record_index = 1; TestWalFilter test_wal_filter(wal_processing_option, apply_option_for_record_index); // Reopen database with option to use WAL filter options = OptionsForLogIterTest(); options.wal_filter = &test_wal_filter; Status status = TryReopenWithColumnFamilies({ "default", "pikachu" }, options); if (wal_processing_option == WalFilter::WalProcessingOption::kCorruptedRecord) { assert(!status.ok()); // In case of corruption we can turn off paranoid_checks to reopen // databse options.paranoid_checks = false; ReopenWithColumnFamilies({ "default", "pikachu" }, options); } else { assert(status.ok()); } // Compute which keys we expect to be found // and which we expect not to be found after recovery. std::vector keys_must_exist; std::vector keys_must_not_exist; switch (wal_processing_option) { case WalFilter::WalProcessingOption::kCorruptedRecord: case WalFilter::WalProcessingOption::kContinueProcessing: { fprintf(stderr, "Testing with complete WAL processing\n"); // we expect all records to be processed for (size_t i = 0; i < batch_keys.size(); i++) { for (size_t j = 0; j < batch_keys[i].size(); j++) { keys_must_exist.push_back(Slice(batch_keys[i][j])); } } break; } case WalFilter::WalProcessingOption::kIgnoreCurrentRecord: { fprintf(stderr, "Testing with ignoring record %" ROCKSDB_PRIszt " only\n", apply_option_for_record_index); // We expect the record with apply_option_for_record_index to be not // found. for (size_t i = 0; i < batch_keys.size(); i++) { for (size_t j = 0; j < batch_keys[i].size(); j++) { if (i == apply_option_for_record_index) { keys_must_not_exist.push_back(Slice(batch_keys[i][j])); } else { keys_must_exist.push_back(Slice(batch_keys[i][j])); } } } break; } case WalFilter::WalProcessingOption::kStopReplay: { fprintf(stderr, "Testing with stopping replay from record %" ROCKSDB_PRIszt "\n", apply_option_for_record_index); // We expect records beyond apply_option_for_record_index to be not // found. for (size_t i = 0; i < batch_keys.size(); i++) { for (size_t j = 0; j < batch_keys[i].size(); j++) { if (i >= apply_option_for_record_index) { keys_must_not_exist.push_back(Slice(batch_keys[i][j])); } else { keys_must_exist.push_back(Slice(batch_keys[i][j])); } } } break; } default: assert(false); // unhandled case } bool checked_after_reopen = false; while (true) { // Ensure that expected keys exists // and not expected keys don't exist after recovery ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist); if (checked_after_reopen) { break; } // reopen database again to make sure previous log(s) are not used //(even if they were skipped) // reopn database with option to use WAL filter options = OptionsForLogIterTest(); ReopenWithColumnFamilies({ "default", "pikachu" }, options); checked_after_reopen = true; } } } TEST_F(DBTest2, WalFilterTestWithChangeBatch) { class ChangeBatchHandler : public WriteBatch::Handler { private: // Batch to insert keys in WriteBatch* new_write_batch_; // Number of keys to add in the new batch size_t num_keys_to_add_in_new_batch_; // Number of keys added to new batch size_t num_keys_added_; public: ChangeBatchHandler(WriteBatch* new_write_batch, size_t num_keys_to_add_in_new_batch) : new_write_batch_(new_write_batch), num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch), num_keys_added_(0) {} void Put(const Slice& key, const Slice& value) override { if (num_keys_added_ < num_keys_to_add_in_new_batch_) { new_write_batch_->Put(key, value); ++num_keys_added_; } } }; class TestWalFilterWithChangeBatch : public WalFilter { private: // Index at which to start changing records size_t change_records_from_index_; // Number of keys to add in the new batch size_t num_keys_to_add_in_new_batch_; // Current record index, incremented with each record encountered. size_t current_record_index_; public: TestWalFilterWithChangeBatch(size_t change_records_from_index, size_t num_keys_to_add_in_new_batch) : change_records_from_index_(change_records_from_index), num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch), current_record_index_(0) {} WalProcessingOption LogRecord(const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed) const override { if (current_record_index_ >= change_records_from_index_) { ChangeBatchHandler handler(new_batch, num_keys_to_add_in_new_batch_); batch.Iterate(&handler); *batch_changed = true; } // Filter is passed as a const object for RocksDB to not modify the // object, however we modify it for our own purpose here and hence // cast the constness away. (const_cast(this) ->current_record_index_)++; return WalProcessingOption::kContinueProcessing; } const char* Name() const override { return "TestWalFilterWithChangeBatch"; } }; std::vector> batch_keys(3); batch_keys[0].push_back("key1"); batch_keys[0].push_back("key2"); batch_keys[1].push_back("key3"); batch_keys[1].push_back("key4"); batch_keys[2].push_back("key5"); batch_keys[2].push_back("key6"); Options options = OptionsForLogIterTest(); DestroyAndReopen(options); CreateAndReopenWithCF({ "pikachu" }, options); // Write given keys in given batches for (size_t i = 0; i < batch_keys.size(); i++) { WriteBatch batch; for (size_t j = 0; j < batch_keys[i].size(); j++) { batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)); } dbfull()->Write(WriteOptions(), &batch); } // Create a test filter that would apply wal_processing_option at the first // record size_t change_records_from_index = 1; size_t num_keys_to_add_in_new_batch = 1; TestWalFilterWithChangeBatch test_wal_filter_with_change_batch( change_records_from_index, num_keys_to_add_in_new_batch); // Reopen database with option to use WAL filter options = OptionsForLogIterTest(); options.wal_filter = &test_wal_filter_with_change_batch; ReopenWithColumnFamilies({ "default", "pikachu" }, options); // Ensure that all keys exist before change_records_from_index_ // And after that index only single key exists // as our filter adds only single key for each batch std::vector keys_must_exist; std::vector keys_must_not_exist; for (size_t i = 0; i < batch_keys.size(); i++) { for (size_t j = 0; j < batch_keys[i].size(); j++) { if (i >= change_records_from_index && j >= num_keys_to_add_in_new_batch) { keys_must_not_exist.push_back(Slice(batch_keys[i][j])); } else { keys_must_exist.push_back(Slice(batch_keys[i][j])); } } } bool checked_after_reopen = false; while (true) { // Ensure that expected keys exists // and not expected keys don't exist after recovery ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist); if (checked_after_reopen) { break; } // reopen database again to make sure previous log(s) are not used //(even if they were skipped) // reopn database with option to use WAL filter options = OptionsForLogIterTest(); ReopenWithColumnFamilies({ "default", "pikachu" }, options); checked_after_reopen = true; } } TEST_F(DBTest2, WalFilterTestWithChangeBatchExtraKeys) { class TestWalFilterWithChangeBatchAddExtraKeys : public WalFilter { public: WalProcessingOption LogRecord(const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed) const override { *new_batch = batch; new_batch->Put("key_extra", "value_extra"); *batch_changed = true; return WalProcessingOption::kContinueProcessing; } const char* Name() const override { return "WalFilterTestWithChangeBatchExtraKeys"; } }; std::vector> batch_keys(3); batch_keys[0].push_back("key1"); batch_keys[0].push_back("key2"); batch_keys[1].push_back("key3"); batch_keys[1].push_back("key4"); batch_keys[2].push_back("key5"); batch_keys[2].push_back("key6"); Options options = OptionsForLogIterTest(); DestroyAndReopen(options); CreateAndReopenWithCF({ "pikachu" }, options); // Write given keys in given batches for (size_t i = 0; i < batch_keys.size(); i++) { WriteBatch batch; for (size_t j = 0; j < batch_keys[i].size(); j++) { batch.Put(handles_[0], batch_keys[i][j], DummyString(1024)); } dbfull()->Write(WriteOptions(), &batch); } // Create a test filter that would add extra keys TestWalFilterWithChangeBatchAddExtraKeys test_wal_filter_extra_keys; // Reopen database with option to use WAL filter options = OptionsForLogIterTest(); options.wal_filter = &test_wal_filter_extra_keys; Status status = TryReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_TRUE(status.IsNotSupported()); // Reopen without filter, now reopen should succeed - previous // attempt to open must not have altered the db. options = OptionsForLogIterTest(); ReopenWithColumnFamilies({ "default", "pikachu" }, options); std::vector keys_must_exist; std::vector keys_must_not_exist; // empty vector for (size_t i = 0; i < batch_keys.size(); i++) { for (size_t j = 0; j < batch_keys[i].size(); j++) { keys_must_exist.push_back(Slice(batch_keys[i][j])); } } ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist); } TEST_F(DBTest2, WalFilterTestWithColumnFamilies) { class TestWalFilterWithColumnFamilies : public WalFilter { private: // column_family_id -> log_number map (provided to WALFilter) std::map cf_log_number_map_; // column_family_name -> column_family_id map (provided to WALFilter) std::map cf_name_id_map_; // column_family_name -> keys_found_in_wal map // We store keys that are applicable to the column_family // during recovery (i.e. aren't already flushed to SST file(s)) // for verification against the keys we expect. std::map> cf_wal_keys_; public: void ColumnFamilyLogNumberMap( const std::map& cf_lognumber_map, const std::map& cf_name_id_map) override { cf_log_number_map_ = cf_lognumber_map; cf_name_id_map_ = cf_name_id_map; } WalProcessingOption LogRecordFound(unsigned long long log_number, const std::string& /*log_file_name*/, const WriteBatch& batch, WriteBatch* /*new_batch*/, bool* /*batch_changed*/) override { class LogRecordBatchHandler : public WriteBatch::Handler { private: const std::map & cf_log_number_map_; std::map> & cf_wal_keys_; unsigned long long log_number_; public: LogRecordBatchHandler(unsigned long long current_log_number, const std::map & cf_log_number_map, std::map> & cf_wal_keys) : cf_log_number_map_(cf_log_number_map), cf_wal_keys_(cf_wal_keys), log_number_(current_log_number){} Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& /*value*/) override { auto it = cf_log_number_map_.find(column_family_id); assert(it != cf_log_number_map_.end()); unsigned long long log_number_for_cf = it->second; // If the current record is applicable for column_family_id // (i.e. isn't flushed to SST file(s) for column_family_id) // add it to the cf_wal_keys_ map for verification. if (log_number_ >= log_number_for_cf) { cf_wal_keys_[column_family_id].push_back(std::string(key.data(), key.size())); } return Status::OK(); } } handler(log_number, cf_log_number_map_, cf_wal_keys_); batch.Iterate(&handler); return WalProcessingOption::kContinueProcessing; } const char* Name() const override { return "WalFilterTestWithColumnFamilies"; } const std::map>& GetColumnFamilyKeys() { return cf_wal_keys_; } const std::map & GetColumnFamilyNameIdMap() { return cf_name_id_map_; } }; std::vector> batch_keys_pre_flush(3); batch_keys_pre_flush[0].push_back("key1"); batch_keys_pre_flush[0].push_back("key2"); batch_keys_pre_flush[1].push_back("key3"); batch_keys_pre_flush[1].push_back("key4"); batch_keys_pre_flush[2].push_back("key5"); batch_keys_pre_flush[2].push_back("key6"); Options options = OptionsForLogIterTest(); DestroyAndReopen(options); CreateAndReopenWithCF({ "pikachu" }, options); // Write given keys in given batches for (size_t i = 0; i < batch_keys_pre_flush.size(); i++) { WriteBatch batch; for (size_t j = 0; j < batch_keys_pre_flush[i].size(); j++) { batch.Put(handles_[0], batch_keys_pre_flush[i][j], DummyString(1024)); batch.Put(handles_[1], batch_keys_pre_flush[i][j], DummyString(1024)); } dbfull()->Write(WriteOptions(), &batch); } //Flush default column-family db_->Flush(FlushOptions(), handles_[0]); // Do some more writes std::vector> batch_keys_post_flush(3); batch_keys_post_flush[0].push_back("key7"); batch_keys_post_flush[0].push_back("key8"); batch_keys_post_flush[1].push_back("key9"); batch_keys_post_flush[1].push_back("key10"); batch_keys_post_flush[2].push_back("key11"); batch_keys_post_flush[2].push_back("key12"); // Write given keys in given batches for (size_t i = 0; i < batch_keys_post_flush.size(); i++) { WriteBatch batch; for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) { batch.Put(handles_[0], batch_keys_post_flush[i][j], DummyString(1024)); batch.Put(handles_[1], batch_keys_post_flush[i][j], DummyString(1024)); } dbfull()->Write(WriteOptions(), &batch); } // On Recovery we should only find the second batch applicable to default CF // But both batches applicable to pikachu CF // Create a test filter that would add extra keys TestWalFilterWithColumnFamilies test_wal_filter_column_families; // Reopen database with option to use WAL filter options = OptionsForLogIterTest(); options.wal_filter = &test_wal_filter_column_families; Status status = TryReopenWithColumnFamilies({ "default", "pikachu" }, options); ASSERT_TRUE(status.ok()); // verify that handles_[0] only has post_flush keys // while handles_[1] has pre and post flush keys auto cf_wal_keys = test_wal_filter_column_families.GetColumnFamilyKeys(); auto name_id_map = test_wal_filter_column_families.GetColumnFamilyNameIdMap(); size_t index = 0; auto keys_cf = cf_wal_keys[name_id_map[kDefaultColumnFamilyName]]; //default column-family, only post_flush keys are expected for (size_t i = 0; i < batch_keys_post_flush.size(); i++) { for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) { Slice key_from_the_log(keys_cf[index++]); Slice batch_key(batch_keys_post_flush[i][j]); ASSERT_TRUE(key_from_the_log.compare(batch_key) == 0); } } ASSERT_TRUE(index == keys_cf.size()); index = 0; keys_cf = cf_wal_keys[name_id_map["pikachu"]]; //pikachu column-family, all keys are expected for (size_t i = 0; i < batch_keys_pre_flush.size(); i++) { for (size_t j = 0; j < batch_keys_pre_flush[i].size(); j++) { Slice key_from_the_log(keys_cf[index++]); Slice batch_key(batch_keys_pre_flush[i][j]); ASSERT_TRUE(key_from_the_log.compare(batch_key) == 0); } } for (size_t i = 0; i < batch_keys_post_flush.size(); i++) { for (size_t j = 0; j < batch_keys_post_flush[i].size(); j++) { Slice key_from_the_log(keys_cf[index++]); Slice batch_key(batch_keys_post_flush[i][j]); ASSERT_TRUE(key_from_the_log.compare(batch_key) == 0); } } ASSERT_TRUE(index == keys_cf.size()); } TEST_F(DBTest2, PresetCompressionDict) { // Verifies that compression ratio improves when dictionary is enabled, and // improves even further when the dictionary is trained by ZSTD. const size_t kBlockSizeBytes = 4 << 10; const size_t kL0FileBytes = 128 << 10; const size_t kApproxPerBlockOverheadBytes = 50; const int kNumL0Files = 5; Options options; // Make sure to use any custom env that the test is configured with. options.env = CurrentOptions().env; options.allow_concurrent_memtable_write = false; options.arena_block_size = kBlockSizeBytes; options.create_if_missing = true; options.disable_auto_compactions = true; options.level0_file_num_compaction_trigger = kNumL0Files; options.memtable_factory.reset( new SpecialSkipListFactory(kL0FileBytes / kBlockSizeBytes)); options.num_levels = 2; options.target_file_size_base = kL0FileBytes; options.target_file_size_multiplier = 2; options.write_buffer_size = kL0FileBytes; BlockBasedTableOptions table_options; table_options.block_size = kBlockSizeBytes; std::vector compression_types; if (Zlib_Supported()) { compression_types.push_back(kZlibCompression); } #if LZ4_VERSION_NUMBER >= 10400 // r124+ compression_types.push_back(kLZ4Compression); compression_types.push_back(kLZ4HCCompression); #endif // LZ4_VERSION_NUMBER >= 10400 if (ZSTD_Supported()) { compression_types.push_back(kZSTD); } enum DictionaryTypes : int { kWithoutDict, kWithDict, kWithZSTDTrainedDict, kDictEnd, }; for (auto compression_type : compression_types) { options.compression = compression_type; size_t bytes_without_dict = 0; size_t bytes_with_dict = 0; size_t bytes_with_zstd_trained_dict = 0; for (int i = kWithoutDict; i < kDictEnd; i++) { // First iteration: compress without preset dictionary // Second iteration: compress with preset dictionary // Third iteration (zstd only): compress with zstd-trained dictionary // // To make sure the compression dictionary has the intended effect, we // verify the compressed size is smaller in successive iterations. Also in // the non-first iterations, verify the data we get out is the same data // we put in. switch (i) { case kWithoutDict: options.compression_opts.max_dict_bytes = 0; options.compression_opts.zstd_max_train_bytes = 0; break; case kWithDict: options.compression_opts.max_dict_bytes = kBlockSizeBytes; options.compression_opts.zstd_max_train_bytes = 0; break; case kWithZSTDTrainedDict: if (compression_type != kZSTD) { continue; } options.compression_opts.max_dict_bytes = kBlockSizeBytes; options.compression_opts.zstd_max_train_bytes = kL0FileBytes; break; default: assert(false); } options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); std::string seq_datas[10]; for (int j = 0; j < 10; ++j) { seq_datas[j] = RandomString(&rnd, kBlockSizeBytes - kApproxPerBlockOverheadBytes); } ASSERT_EQ(0, NumTableFilesAtLevel(0, 1)); for (int j = 0; j < kNumL0Files; ++j) { for (size_t k = 0; k < kL0FileBytes / kBlockSizeBytes + 1; ++k) { auto key_num = j * (kL0FileBytes / kBlockSizeBytes) + k; ASSERT_OK(Put(1, Key(static_cast(key_num)), seq_datas[(key_num / 10) % 10])); } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); ASSERT_EQ(j + 1, NumTableFilesAtLevel(0, 1)); } dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], true /* disallow_trivial_move */); ASSERT_EQ(0, NumTableFilesAtLevel(0, 1)); ASSERT_GT(NumTableFilesAtLevel(1, 1), 0); // Get the live sst files size size_t total_sst_bytes = TotalSize(1); if (i == kWithoutDict) { bytes_without_dict = total_sst_bytes; } else if (i == kWithDict) { bytes_with_dict = total_sst_bytes; } else if (i == kWithZSTDTrainedDict) { bytes_with_zstd_trained_dict = total_sst_bytes; } for (size_t j = 0; j < kNumL0Files * (kL0FileBytes / kBlockSizeBytes); j++) { ASSERT_EQ(seq_datas[(j / 10) % 10], Get(1, Key(static_cast(j)))); } if (i == kWithDict) { ASSERT_GT(bytes_without_dict, bytes_with_dict); } else if (i == kWithZSTDTrainedDict) { // In zstd compression, it is sometimes possible that using a trained // dictionary does not get as good a compression ratio as without // training. // But using a dictionary (with or without training) should always get // better compression ratio than not using one. ASSERT_TRUE(bytes_with_dict > bytes_with_zstd_trained_dict || bytes_without_dict > bytes_with_zstd_trained_dict); } DestroyAndReopen(options); } } } TEST_F(DBTest2, PresetCompressionDictLocality) { if (!ZSTD_Supported()) { return; } // Verifies that compression dictionary is generated from local data. The // verification simply checks all output SSTs have different compression // dictionaries. We do not verify effectiveness as that'd likely be flaky in // the future. const int kNumEntriesPerFile = 1 << 10; // 1KB const int kNumBytesPerEntry = 1 << 10; // 1KB const int kNumFiles = 4; Options options = CurrentOptions(); options.compression = kZSTD; options.compression_opts.max_dict_bytes = 1 << 14; // 16KB options.compression_opts.zstd_max_train_bytes = 1 << 18; // 256KB options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry; BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; options.table_factory.reset(new BlockBasedTableFactory(table_options)); Reopen(options); Random rnd(301); for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kNumEntriesPerFile; ++j) { ASSERT_OK(Put(Key(i * kNumEntriesPerFile + j), RandomString(&rnd, kNumBytesPerEntry))); } ASSERT_OK(Flush()); MoveFilesToLevel(1); ASSERT_EQ(NumTableFilesAtLevel(1), i + 1); } // Store all the dictionaries generated during a full compaction. std::vector compression_dicts; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BlockBasedTableBuilder::WriteCompressionDictBlock:RawDict", [&](void* arg) { compression_dicts.emplace_back(static_cast(arg)->ToString()); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); CompactRangeOptions compact_range_opts; compact_range_opts.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(compact_range_opts, nullptr, nullptr)); // Dictionary compression should not be so good as to compress four totally // random files into one. If it does then there's probably something wrong // with the test. ASSERT_GT(NumTableFilesAtLevel(1), 1); // Furthermore, there should be one compression dictionary generated per file. // And they should all be different from each other. ASSERT_EQ(NumTableFilesAtLevel(1), static_cast(compression_dicts.size())); for (size_t i = 1; i < compression_dicts.size(); ++i) { std::string& a = compression_dicts[i - 1]; std::string& b = compression_dicts[i]; size_t alen = a.size(); size_t blen = b.size(); ASSERT_TRUE(alen != blen || memcmp(a.data(), b.data(), alen) != 0); } } class CompactionCompressionListener : public EventListener { public: explicit CompactionCompressionListener(Options* db_options) : db_options_(db_options) {} void OnCompactionCompleted(DB* db, const CompactionJobInfo& ci) override { // Figure out last level with files int bottommost_level = 0; for (int level = 0; level < db->NumberLevels(); level++) { std::string files_at_level; ASSERT_TRUE( db->GetProperty("rocksdb.num-files-at-level" + NumberToString(level), &files_at_level)); if (files_at_level != "0") { bottommost_level = level; } } if (db_options_->bottommost_compression != kDisableCompressionOption && ci.output_level == bottommost_level) { ASSERT_EQ(ci.compression, db_options_->bottommost_compression); } else if (db_options_->compression_per_level.size() != 0) { ASSERT_EQ(ci.compression, db_options_->compression_per_level[ci.output_level]); } else { ASSERT_EQ(ci.compression, db_options_->compression); } max_level_checked = std::max(max_level_checked, ci.output_level); } int max_level_checked = 0; const Options* db_options_; }; enum CompressionFailureType { kTestCompressionFail, kTestDecompressionFail, kTestDecompressionCorruption }; class CompressionFailuresTest : public DBTest2, public testing::WithParamInterface> { public: CompressionFailuresTest() { std::tie(compression_failure_type_, compression_type_, compression_max_dict_bytes_, compression_parallel_threads_) = GetParam(); } CompressionFailureType compression_failure_type_ = kTestCompressionFail; CompressionType compression_type_ = kNoCompression; uint32_t compression_max_dict_bytes_ = 0; uint32_t compression_parallel_threads_ = 0; }; INSTANTIATE_TEST_CASE_P( DBTest2, CompressionFailuresTest, ::testing::Combine(::testing::Values(kTestCompressionFail, kTestDecompressionFail, kTestDecompressionCorruption), ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Values(0, 10), ::testing::Values(1, 4))); TEST_P(CompressionFailuresTest, CompressionFailures) { if (compression_type_ == kNoCompression) { return; } Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 2; options.max_bytes_for_level_base = 1024; options.max_bytes_for_level_multiplier = 2; options.num_levels = 7; options.max_background_compactions = 1; options.target_file_size_base = 512; BlockBasedTableOptions table_options; table_options.block_size = 512; table_options.verify_compression = true; options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.compression = compression_type_; options.compression_opts.parallel_threads = compression_parallel_threads_; options.compression_opts.max_dict_bytes = compression_max_dict_bytes_; options.bottommost_compression_opts.parallel_threads = compression_parallel_threads_; options.bottommost_compression_opts.max_dict_bytes = compression_max_dict_bytes_; if (compression_failure_type_ == kTestCompressionFail) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BlockBasedTableBuilder::CompressBlockInternal:TamperWithReturnValue", [](void* arg) { bool* ret = static_cast(arg); *ret = false; }); } else if (compression_failure_type_ == kTestDecompressionFail) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "UncompressBlockContentsForCompressionType:TamperWithReturnValue", [](void* arg) { Status* ret = static_cast(arg); ASSERT_OK(*ret); *ret = Status::Corruption("kTestDecompressionFail"); }); } else if (compression_failure_type_ == kTestDecompressionCorruption) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "UncompressBlockContentsForCompressionType:" "TamperWithDecompressionOutput", [](void* arg) { BlockContents* contents = static_cast(arg); // Ensure uncompressed data != original data const size_t len = contents->data.size() + 1; std::unique_ptr fake_data(new char[len]()); *contents = BlockContents(std::move(fake_data), len); }); } std::map key_value_written; const int kKeySize = 5; const int kValUnitSize = 16; const int kValSize = 256; Random rnd(405); Status s = Status::OK(); DestroyAndReopen(options); // Write 10 random files for (int i = 0; i < 10; i++) { for (int j = 0; j < 5; j++) { std::string key = RandomString(&rnd, kKeySize); // Ensure good compression ratio std::string valueUnit = RandomString(&rnd, kValUnitSize); std::string value; for (int k = 0; k < kValSize; k += kValUnitSize) { value += valueUnit; } s = Put(key, value); if (compression_failure_type_ == kTestCompressionFail) { key_value_written[key] = value; ASSERT_OK(s); } } s = Flush(); if (compression_failure_type_ == kTestCompressionFail) { ASSERT_OK(s); } s = dbfull()->TEST_WaitForCompact(); if (compression_failure_type_ == kTestCompressionFail) { ASSERT_OK(s); } if (i == 4) { // Make compression fail at the mid of table building ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); } } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); if (compression_failure_type_ == kTestCompressionFail) { // Should be kNoCompression, check content consistency std::unique_ptr db_iter(db_->NewIterator(ReadOptions())); for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { std::string key = db_iter->key().ToString(); std::string value = db_iter->value().ToString(); ASSERT_NE(key_value_written.find(key), key_value_written.end()); ASSERT_EQ(key_value_written[key], value); key_value_written.erase(key); } ASSERT_EQ(0, key_value_written.size()); } else if (compression_failure_type_ == kTestDecompressionFail) { ASSERT_EQ(std::string(s.getState()), "Could not decompress: kTestDecompressionFail"); } else if (compression_failure_type_ == kTestDecompressionCorruption) { ASSERT_EQ(std::string(s.getState()), "Decompressed block did not match raw block"); } } TEST_F(DBTest2, CompressionOptions) { if (!Zlib_Supported() || !Snappy_Supported()) { return; } Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 2; options.max_bytes_for_level_base = 100; options.max_bytes_for_level_multiplier = 2; options.num_levels = 7; options.max_background_compactions = 1; CompactionCompressionListener* listener = new CompactionCompressionListener(&options); options.listeners.emplace_back(listener); const int kKeySize = 5; const int kValSize = 20; Random rnd(301); std::vector compression_parallel_threads = {1, 4}; std::map key_value_written; for (int iter = 0; iter <= 2; iter++) { listener->max_level_checked = 0; if (iter == 0) { // Use different compression algorithms for different levels but // always use Zlib for bottommost level options.compression_per_level = {kNoCompression, kNoCompression, kNoCompression, kSnappyCompression, kSnappyCompression, kSnappyCompression, kZlibCompression}; options.compression = kNoCompression; options.bottommost_compression = kZlibCompression; } else if (iter == 1) { // Use Snappy except for bottommost level use ZLib options.compression_per_level = {}; options.compression = kSnappyCompression; options.bottommost_compression = kZlibCompression; } else if (iter == 2) { // Use Snappy everywhere options.compression_per_level = {}; options.compression = kSnappyCompression; options.bottommost_compression = kDisableCompressionOption; } for (auto num_threads : compression_parallel_threads) { options.compression_opts.parallel_threads = num_threads; options.bottommost_compression_opts.parallel_threads = num_threads; DestroyAndReopen(options); // Write 10 random files for (int i = 0; i < 10; i++) { for (int j = 0; j < 5; j++) { std::string key = RandomString(&rnd, kKeySize); std::string value = RandomString(&rnd, kValSize); key_value_written[key] = value; ASSERT_OK(Put(key, value)); } ASSERT_OK(Flush()); dbfull()->TEST_WaitForCompact(); } // Make sure that we wrote enough to check all 7 levels ASSERT_EQ(listener->max_level_checked, 6); // Make sure database content is the same as key_value_written std::unique_ptr db_iter(db_->NewIterator(ReadOptions())); for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { std::string key = db_iter->key().ToString(); std::string value = db_iter->value().ToString(); ASSERT_NE(key_value_written.find(key), key_value_written.end()); ASSERT_EQ(key_value_written[key], value); key_value_written.erase(key); } ASSERT_EQ(0, key_value_written.size()); } } } class CompactionStallTestListener : public EventListener { public: CompactionStallTestListener() : compacting_files_cnt_(0), compacted_files_cnt_(0) {} void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override { ASSERT_EQ(ci.cf_name, "default"); ASSERT_EQ(ci.base_input_level, 0); ASSERT_EQ(ci.compaction_reason, CompactionReason::kLevelL0FilesNum); compacting_files_cnt_ += ci.input_files.size(); } void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { ASSERT_EQ(ci.cf_name, "default"); ASSERT_EQ(ci.base_input_level, 0); ASSERT_EQ(ci.compaction_reason, CompactionReason::kLevelL0FilesNum); compacted_files_cnt_ += ci.input_files.size(); } std::atomic compacting_files_cnt_; std::atomic compacted_files_cnt_; }; TEST_F(DBTest2, CompactionStall) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BGWorkCompaction", "DBTest2::CompactionStall:0"}, {"DBImpl::BGWorkCompaction", "DBTest2::CompactionStall:1"}, {"DBTest2::CompactionStall:2", "DBImpl::NotifyOnCompactionBegin::UnlockMutex"}, {"DBTest2::CompactionStall:3", "DBImpl::NotifyOnCompactionCompleted::UnlockMutex"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 4; options.max_background_compactions = 40; CompactionStallTestListener* listener = new CompactionStallTestListener(); options.listeners.emplace_back(listener); DestroyAndReopen(options); // make sure all background compaction jobs can be scheduled auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); Random rnd(301); // 4 Files in L0 for (int i = 0; i < 4; i++) { for (int j = 0; j < 10; j++) { ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10))); } ASSERT_OK(Flush()); } // Wait for compaction to be triggered TEST_SYNC_POINT("DBTest2::CompactionStall:0"); // Clear "DBImpl::BGWorkCompaction" SYNC_POINT since we want to hold it again // at DBTest2::CompactionStall::1 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); // Another 6 L0 files to trigger compaction again for (int i = 0; i < 6; i++) { for (int j = 0; j < 10; j++) { ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10))); } ASSERT_OK(Flush()); } // Wait for another compaction to be triggered TEST_SYNC_POINT("DBTest2::CompactionStall:1"); // Hold NotifyOnCompactionBegin in the unlock mutex section TEST_SYNC_POINT("DBTest2::CompactionStall:2"); // Hold NotifyOnCompactionCompleted in the unlock mutex section TEST_SYNC_POINT("DBTest2::CompactionStall:3"); dbfull()->TEST_WaitForCompact(); ASSERT_LT(NumTableFilesAtLevel(0), options.level0_file_num_compaction_trigger); ASSERT_GT(listener->compacted_files_cnt_.load(), 10 - options.level0_file_num_compaction_trigger); ASSERT_EQ(listener->compacting_files_cnt_.load(), listener->compacted_files_cnt_.load()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } #endif // ROCKSDB_LITE TEST_F(DBTest2, FirstSnapshotTest) { Options options; options.write_buffer_size = 100000; // Small write buffer options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); // This snapshot will have sequence number 0 what is expected behaviour. const Snapshot* s1 = db_->GetSnapshot(); Put(1, "k1", std::string(100000, 'x')); // Fill memtable Put(1, "k2", std::string(100000, 'y')); // Trigger flush db_->ReleaseSnapshot(s1); } #ifndef ROCKSDB_LITE TEST_F(DBTest2, DuplicateSnapshot) { Options options; options = CurrentOptions(options); std::vector snapshots; DBImpl* dbi = reinterpret_cast(db_); SequenceNumber oldest_ww_snap, first_ww_snap; Put("k", "v"); // inc seq snapshots.push_back(db_->GetSnapshot()); snapshots.push_back(db_->GetSnapshot()); Put("k", "v"); // inc seq snapshots.push_back(db_->GetSnapshot()); snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary()); first_ww_snap = snapshots.back()->GetSequenceNumber(); Put("k", "v"); // inc seq snapshots.push_back(dbi->GetSnapshotForWriteConflictBoundary()); snapshots.push_back(db_->GetSnapshot()); Put("k", "v"); // inc seq snapshots.push_back(db_->GetSnapshot()); { InstrumentedMutexLock l(dbi->mutex()); auto seqs = dbi->snapshots().GetAll(&oldest_ww_snap); ASSERT_EQ(seqs.size(), 4); // duplicates are not counted ASSERT_EQ(oldest_ww_snap, first_ww_snap); } for (auto s : snapshots) { db_->ReleaseSnapshot(s); } } #endif // ROCKSDB_LITE class PinL0IndexAndFilterBlocksTest : public DBTestBase, public testing::WithParamInterface> { public: PinL0IndexAndFilterBlocksTest() : DBTestBase("/db_pin_l0_index_bloom_test") {} void SetUp() override { infinite_max_files_ = std::get<0>(GetParam()); disallow_preload_ = std::get<1>(GetParam()); } void CreateTwoLevels(Options* options, bool close_afterwards) { if (infinite_max_files_) { options->max_open_files = -1; } options->create_if_missing = true; options->statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.pin_l0_filter_and_index_blocks_in_cache = true; table_options.filter_policy.reset(NewBloomFilterPolicy(20)); options->table_factory.reset(new BlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, *options); Put(1, "a", "begin"); Put(1, "z", "end"); ASSERT_OK(Flush(1)); // move this table to L1 dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); // reset block cache table_options.block_cache = NewLRUCache(64 * 1024); options->table_factory.reset(NewBlockBasedTableFactory(table_options)); TryReopenWithColumnFamilies({"default", "pikachu"}, *options); // create new table at L0 Put(1, "a2", "begin2"); Put(1, "z2", "end2"); ASSERT_OK(Flush(1)); if (close_afterwards) { Close(); // This ensures that there is no ref to block cache entries } table_options.block_cache->EraseUnRefEntries(); } bool infinite_max_files_; bool disallow_preload_; }; TEST_P(PinL0IndexAndFilterBlocksTest, IndexAndFilterBlocksOfNewTableAddedToCacheWithPinning) { Options options = CurrentOptions(); if (infinite_max_files_) { options.max_open_files = -1; } options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.pin_l0_filter_and_index_blocks_in_cache = true; table_options.filter_policy.reset(NewBloomFilterPolicy(20)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "key", "val")); // Create a new table. ASSERT_OK(Flush(1)); // index/filter blocks added to block cache right after table creation. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); // only index/filter were added ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS)); std::string value; // Miss and hit count should remain the same, they're all pinned. db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); // Miss and hit count should remain the same, they're all pinned. value = Get(1, "key"); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } TEST_P(PinL0IndexAndFilterBlocksTest, MultiLevelIndexAndFilterBlocksCachedWithPinning) { Options options = CurrentOptions(); PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, false); // get base cache values uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT); std::string value; // this should be read from L0 // so cache values don't change value = Get(1, "a2"); ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); // this should be read from L1 // the file is opened, prefetching results in a cache filter miss // the block is loaded and added to the cache, // then the get results in a cache hit for L1 // When we have inifinite max_files, there is still cache miss because we have // reset the block cache value = Get(1, "a"); ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); } TEST_P(PinL0IndexAndFilterBlocksTest, DisablePrefetchingNonL0IndexAndFilter) { Options options = CurrentOptions(); // This ensures that db does not ref anything in the block cache, so // EraseUnRefEntries could clear them up. bool close_afterwards = true; PinL0IndexAndFilterBlocksTest::CreateTwoLevels(&options, close_afterwards); // Get base cache values uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT); if (disallow_preload_) { // Now we have two files. We narrow the max open files to allow 3 entries // so that preloading SST files won't happen. options.max_open_files = 13; // RocksDB sanitize max open files to at least 20. Modify it back. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { int* max_open_files = static_cast(arg); *max_open_files = 13; }); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Reopen database. If max_open_files is set as -1, table readers will be // preloaded. This will trigger a BlockBasedTable::Open() and prefetch // L0 index and filter. Level 1's prefetching is disabled in DB::Open() TryReopenWithColumnFamilies({"default", "pikachu"}, options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); if (!disallow_preload_) { // After reopen, cache miss are increased by one because we read (and only // read) filter and index on L0 ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } else { // If max_open_files is not -1, we do not preload table readers, so there is // no change. ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } std::string value; // this should be read from L0 value = Get(1, "a2"); // If max_open_files is -1, we have pinned index and filter in Rep, so there // will not be changes in index and filter misses or hits. If max_open_files // is not -1, Get() will open a TableReader and prefetch index and filter. ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); // this should be read from L1 value = Get(1, "a"); if (!disallow_preload_) { // In inifinite max files case, there's a cache miss in executing Get() // because index and filter are not prefetched before. ASSERT_EQ(fm + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } else { // In this case, cache miss will be increased by one in // BlockBasedTable::Open() because this is not in DB::Open() code path so we // will prefetch L1's index and filter. Cache hit will also be increased by // one because Get() will read index and filter from the block cache // prefetched in previous Open() call. ASSERT_EQ(fm + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih + 1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } // Force a full compaction to one single file. There will be a block // cache read for both of index and filter. If prefetch doesn't explicitly // happen, it will happen when verifying the file. Compact(1, "a", "zzzzz"); dbfull()->TEST_WaitForCompact(); if (!disallow_preload_) { ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } else { ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih + 4, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } // Bloom and index hit will happen when a Get() happens. value = Get(1, "a"); if (!disallow_preload_) { ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih + 4, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } else { ASSERT_EQ(fm + 3, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); ASSERT_EQ(fh + 2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); ASSERT_EQ(im + 3, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); ASSERT_EQ(ih + 5, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT)); } } INSTANTIATE_TEST_CASE_P(PinL0IndexAndFilterBlocksTest, PinL0IndexAndFilterBlocksTest, ::testing::Values(std::make_tuple(true, false), std::make_tuple(false, false), std::make_tuple(false, true))); #ifndef ROCKSDB_LITE TEST_F(DBTest2, MaxCompactionBytesTest) { Options options = CurrentOptions(); options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 200 << 10; options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 4; options.num_levels = 4; options.compression = kNoCompression; options.max_bytes_for_level_base = 450 << 10; options.target_file_size_base = 100 << 10; // Infinite for full compaction. options.max_compaction_bytes = options.target_file_size_base * 100; Reopen(options); Random rnd(301); for (int num = 0; num < 8; num++) { GenerateNewRandomFile(&rnd); } CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ASSERT_EQ("0,0,8", FilesPerLevel(0)); // When compact from Ln -> Ln+1, cut a file if the file overlaps with // more than three files in Ln+1. options.max_compaction_bytes = options.target_file_size_base * 3; Reopen(options); GenerateNewRandomFile(&rnd); // Add three more small files that overlap with the previous file for (int i = 0; i < 3; i++) { Put("a", "z"); ASSERT_OK(Flush()); } dbfull()->TEST_WaitForCompact(); // Output files to L1 are cut to three pieces, according to // options.max_compaction_bytes ASSERT_EQ("0,3,8", FilesPerLevel(0)); } static void UniqueIdCallback(void* arg) { int* result = reinterpret_cast(arg); if (*result == -1) { *result = 0; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "GetUniqueIdFromFile:FS_IOC_GETVERSION", UniqueIdCallback); } class MockPersistentCache : public PersistentCache { public: explicit MockPersistentCache(const bool is_compressed, const size_t max_size) : is_compressed_(is_compressed), max_size_(max_size) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "GetUniqueIdFromFile:FS_IOC_GETVERSION", UniqueIdCallback); } ~MockPersistentCache() override {} PersistentCache::StatsType Stats() override { return PersistentCache::StatsType(); } Status Insert(const Slice& page_key, const char* data, const size_t size) override { MutexLock _(&lock_); if (size_ > max_size_) { size_ -= data_.begin()->second.size(); data_.erase(data_.begin()); } data_.insert(std::make_pair(page_key.ToString(), std::string(data, size))); size_ += size; return Status::OK(); } Status Lookup(const Slice& page_key, std::unique_ptr* data, size_t* size) override { MutexLock _(&lock_); auto it = data_.find(page_key.ToString()); if (it == data_.end()) { return Status::NotFound(); } assert(page_key.ToString() == it->first); data->reset(new char[it->second.size()]); memcpy(data->get(), it->second.c_str(), it->second.size()); *size = it->second.size(); return Status::OK(); } bool IsCompressed() override { return is_compressed_; } std::string GetPrintableOptions() const override { return "MockPersistentCache"; } port::Mutex lock_; std::map data_; const bool is_compressed_ = true; size_t size_ = 0; const size_t max_size_ = 10 * 1024; // 10KiB }; #ifdef OS_LINUX // Make sure that in CPU time perf context counters, Env::NowCPUNanos() // is used, rather than Env::CPUNanos(); TEST_F(DBTest2, TestPerfContextGetCpuTime) { // force resizing table cache so table handle is not preloaded so that // we can measure find_table_nanos during Get(). dbfull()->TEST_table_cache()->SetCapacity(0); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); env_->now_cpu_count_.store(0); // CPU timing is not enabled with kEnableTimeExceptForMutex SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex); ASSERT_EQ("bar", Get("foo")); ASSERT_EQ(0, get_perf_context()->get_cpu_nanos); ASSERT_EQ(0, env_->now_cpu_count_.load()); uint64_t kDummyAddonTime = uint64_t{1000000000000}; // Add time to NowNanos() reading. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "TableCache::FindTable:0", [&](void* /*arg*/) { env_->addon_time_.fetch_add(kDummyAddonTime); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); ASSERT_EQ("bar", Get("foo")); ASSERT_GT(env_->now_cpu_count_.load(), 2); ASSERT_LT(get_perf_context()->get_cpu_nanos, kDummyAddonTime); ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonTime); SetPerfLevel(PerfLevel::kDisable); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, TestPerfContextIterCpuTime) { DestroyAndReopen(CurrentOptions()); // force resizing table cache so table handle is not preloaded so that // we can measure find_table_nanos during iteration dbfull()->TEST_table_cache()->SetCapacity(0); const size_t kNumEntries = 10; for (size_t i = 0; i < kNumEntries; ++i) { ASSERT_OK(Put("k" + ToString(i), "v" + ToString(i))); } ASSERT_OK(Flush()); for (size_t i = 0; i < kNumEntries; ++i) { ASSERT_EQ("v" + ToString(i), Get("k" + ToString(i))); } std::string last_key = "k" + ToString(kNumEntries - 1); std::string last_value = "v" + ToString(kNumEntries - 1); env_->now_cpu_count_.store(0); // CPU timing is not enabled with kEnableTimeExceptForMutex SetPerfLevel(PerfLevel::kEnableTimeExceptForMutex); Iterator* iter = db_->NewIterator(ReadOptions()); iter->Seek("k0"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); iter->SeekForPrev(last_key); ASSERT_TRUE(iter->Valid()); iter->SeekToLast(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(last_value, iter->value().ToString()); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); ASSERT_EQ(0, get_perf_context()->iter_seek_cpu_nanos); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v1", iter->value().ToString()); ASSERT_EQ(0, get_perf_context()->iter_next_cpu_nanos); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); ASSERT_EQ(0, get_perf_context()->iter_prev_cpu_nanos); ASSERT_EQ(0, env_->now_cpu_count_.load()); delete iter; uint64_t kDummyAddonTime = uint64_t{1000000000000}; // Add time to NowNanos() reading. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "TableCache::FindTable:0", [&](void* /*arg*/) { env_->addon_time_.fetch_add(kDummyAddonTime); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); iter = db_->NewIterator(ReadOptions()); iter->Seek("k0"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); iter->SeekForPrev(last_key); ASSERT_TRUE(iter->Valid()); iter->SeekToLast(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(last_value, iter->value().ToString()); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, 0); ASSERT_LT(get_perf_context()->iter_seek_cpu_nanos, kDummyAddonTime); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v1", iter->value().ToString()); ASSERT_GT(get_perf_context()->iter_next_cpu_nanos, 0); ASSERT_LT(get_perf_context()->iter_next_cpu_nanos, kDummyAddonTime); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); ASSERT_GT(get_perf_context()->iter_prev_cpu_nanos, 0); ASSERT_LT(get_perf_context()->iter_prev_cpu_nanos, kDummyAddonTime); ASSERT_GE(env_->now_cpu_count_.load(), 12); ASSERT_GT(get_perf_context()->find_table_nanos, kDummyAddonTime); SetPerfLevel(PerfLevel::kDisable); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); delete iter; } #endif // OS_LINUX // GetUniqueIdFromFile is not implemented on these platforms. Persistent cache // breaks when that function is not implemented and no regular block cache is // provided. #if !defined(OS_SOLARIS) && !defined(OS_WIN) TEST_F(DBTest2, PersistentCache) { int num_iter = 80; Options options; options.write_buffer_size = 64 * 1024; // small write buffer options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options = CurrentOptions(options); auto bsizes = {/*no block cache*/ 0, /*1M*/ 1 * 1024 * 1024}; auto types = {/*compressed*/ 1, /*uncompressed*/ 0}; for (auto bsize : bsizes) { for (auto type : types) { BlockBasedTableOptions table_options; table_options.persistent_cache.reset( new MockPersistentCache(type, 10 * 1024)); table_options.no_block_cache = true; table_options.block_cache = bsize ? NewLRUCache(bsize) : nullptr; table_options.block_cache_compressed = nullptr; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // default column family doesn't have block cache Options no_block_cache_opts; no_block_cache_opts.statistics = options.statistics; no_block_cache_opts = CurrentOptions(no_block_cache_opts); BlockBasedTableOptions table_options_no_bc; table_options_no_bc.no_block_cache = true; no_block_cache_opts.table_factory.reset( NewBlockBasedTableFactory(table_options_no_bc)); ReopenWithColumnFamilies( {"default", "pikachu"}, std::vector({no_block_cache_opts, options})); Random rnd(301); // Write 8MB (80 values, each 100K) ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); std::vector values; std::string str; for (int i = 0; i < num_iter; i++) { if (i % 4 == 0) { // high compression ratio str = RandomString(&rnd, 1000); } values.push_back(str); ASSERT_OK(Put(1, Key(i), values[i])); } // flush all data from memtable so that reads are from block cache ASSERT_OK(Flush(1)); for (int i = 0; i < num_iter; i++) { ASSERT_EQ(Get(1, Key(i)), values[i]); } auto hit = options.statistics->getTickerCount(PERSISTENT_CACHE_HIT); auto miss = options.statistics->getTickerCount(PERSISTENT_CACHE_MISS); ASSERT_GT(hit, 0); ASSERT_GT(miss, 0); } } } #endif // !defined(OS_SOLARIS) && !defined(OS_WIN) namespace { void CountSyncPoint() { TEST_SYNC_POINT_CALLBACK("DBTest2::MarkedPoint", nullptr /* arg */); } } // namespace TEST_F(DBTest2, SyncPointMarker) { std::atomic sync_point_called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBTest2::MarkedPoint", [&](void* /*arg*/) { sync_point_called.fetch_add(1); }); // The first dependency enforces Marker can be loaded before MarkedPoint. // The second checks that thread 1's MarkedPoint should be disabled here. // Execution order: // | Thread 1 | Thread 2 | // | | Marker | // | MarkedPoint | | // | Thread1First | | // | | MarkedPoint | ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependencyAndMarkers( {{"DBTest2::SyncPointMarker:Thread1First", "DBTest2::MarkedPoint"}}, {{"DBTest2::SyncPointMarker:Marker", "DBTest2::MarkedPoint"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::function func1 = [&]() { CountSyncPoint(); TEST_SYNC_POINT("DBTest2::SyncPointMarker:Thread1First"); }; std::function func2 = [&]() { TEST_SYNC_POINT("DBTest2::SyncPointMarker:Marker"); CountSyncPoint(); }; auto thread1 = port::Thread(func1); auto thread2 = port::Thread(func2); thread1.join(); thread2.join(); // Callback is only executed once ASSERT_EQ(sync_point_called.load(), 1); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } #endif size_t GetEncodedEntrySize(size_t key_size, size_t value_size) { std::string buffer; PutVarint32(&buffer, static_cast(0)); PutVarint32(&buffer, static_cast(key_size)); PutVarint32(&buffer, static_cast(value_size)); return buffer.size() + key_size + value_size; } TEST_F(DBTest2, ReadAmpBitmap) { Options options = CurrentOptions(); BlockBasedTableOptions bbto; uint32_t bytes_per_bit[2] = {1, 16}; for (size_t k = 0; k < 2; k++) { // Disable delta encoding to make it easier to calculate read amplification bbto.use_delta_encoding = false; // Huge block cache to make it easier to calculate read amplification bbto.block_cache = NewLRUCache(1024 * 1024 * 1024); bbto.read_amp_bytes_per_bit = bytes_per_bit[k]; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); const size_t kNumEntries = 10000; Random rnd(301); for (size_t i = 0; i < kNumEntries; i++) { ASSERT_OK(Put(Key(static_cast(i)), RandomString(&rnd, 100))); } ASSERT_OK(Flush()); Close(); Reopen(options); // Read keys/values randomly and verify that reported read amp error // is less than 2% uint64_t total_useful_bytes = 0; std::set read_keys; std::string value; for (size_t i = 0; i < kNumEntries * 5; i++) { int key_idx = rnd.Next() % kNumEntries; std::string key = Key(key_idx); ASSERT_OK(db_->Get(ReadOptions(), key, &value)); if (read_keys.find(key_idx) == read_keys.end()) { auto internal_key = InternalKey(key, 0, ValueType::kTypeValue); total_useful_bytes += GetEncodedEntrySize(internal_key.size(), value.size()); read_keys.insert(key_idx); } double expected_read_amp = static_cast(total_useful_bytes) / options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); double read_amp = static_cast(options.statistics->getTickerCount( READ_AMP_ESTIMATE_USEFUL_BYTES)) / options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); double error_pct = fabs(expected_read_amp - read_amp) * 100; // Error between reported read amp and real read amp should be less than // 2% EXPECT_LE(error_pct, 2); } // Make sure we read every thing in the DB (which is smaller than our cache) Iterator* iter = db_->NewIterator(ReadOptions()); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_EQ(iter->value().ToString(), Get(iter->key().ToString())); } delete iter; // Read amp is on average 100% since we read all what we loaded in memory if (k == 0) { ASSERT_EQ( options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES), options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES)); } else { ASSERT_NEAR( options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES) * 1.0f / options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES), 1, .01); } } } #ifndef OS_SOLARIS // GetUniqueIdFromFile is not implemented TEST_F(DBTest2, ReadAmpBitmapLiveInCacheAfterDBClose) { { const int kIdBufLen = 100; char id_buf[kIdBufLen]; #ifndef OS_WIN // You can't open a directory on windows using random access file std::unique_ptr file; ASSERT_OK(env_->NewRandomAccessFile(dbname_, &file, EnvOptions())); if (file->GetUniqueId(id_buf, kIdBufLen) == 0) { // fs holding db directory doesn't support getting a unique file id, // this means that running this test will fail because lru_cache will load // the blocks again regardless of them being already in the cache return; } #else std::unique_ptr dir; ASSERT_OK(env_->NewDirectory(dbname_, &dir)); if (dir->GetUniqueId(id_buf, kIdBufLen) == 0) { // fs holding db directory doesn't support getting a unique file id, // this means that running this test will fail because lru_cache will load // the blocks again regardless of them being already in the cache return; } #endif } uint32_t bytes_per_bit[2] = {1, 16}; for (size_t k = 0; k < 2; k++) { std::shared_ptr lru_cache = NewLRUCache(1024 * 1024 * 1024); std::shared_ptr stats = ROCKSDB_NAMESPACE::CreateDBStatistics(); Options options = CurrentOptions(); BlockBasedTableOptions bbto; // Disable delta encoding to make it easier to calculate read amplification bbto.use_delta_encoding = false; // Huge block cache to make it easier to calculate read amplification bbto.block_cache = lru_cache; bbto.read_amp_bytes_per_bit = bytes_per_bit[k]; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.statistics = stats; DestroyAndReopen(options); const int kNumEntries = 10000; Random rnd(301); for (int i = 0; i < kNumEntries; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 100))); } ASSERT_OK(Flush()); Close(); Reopen(options); uint64_t total_useful_bytes = 0; std::set read_keys; std::string value; // Iter1: Read half the DB, Read even keys // Key(0), Key(2), Key(4), Key(6), Key(8), ... for (int i = 0; i < kNumEntries; i += 2) { std::string key = Key(i); ASSERT_OK(db_->Get(ReadOptions(), key, &value)); if (read_keys.find(i) == read_keys.end()) { auto internal_key = InternalKey(key, 0, ValueType::kTypeValue); total_useful_bytes += GetEncodedEntrySize(internal_key.size(), value.size()); read_keys.insert(i); } } size_t total_useful_bytes_iter1 = options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES); size_t total_loaded_bytes_iter1 = options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); Close(); std::shared_ptr new_statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); // Destroy old statistics obj that the blocks in lru_cache are pointing to options.statistics.reset(); // Use the statistics object that we just created options.statistics = new_statistics; Reopen(options); // Iter2: Read half the DB, Read odd keys // Key(1), Key(3), Key(5), Key(7), Key(9), ... for (int i = 1; i < kNumEntries; i += 2) { std::string key = Key(i); ASSERT_OK(db_->Get(ReadOptions(), key, &value)); if (read_keys.find(i) == read_keys.end()) { auto internal_key = InternalKey(key, 0, ValueType::kTypeValue); total_useful_bytes += GetEncodedEntrySize(internal_key.size(), value.size()); read_keys.insert(i); } } size_t total_useful_bytes_iter2 = options.statistics->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES); size_t total_loaded_bytes_iter2 = options.statistics->getTickerCount(READ_AMP_TOTAL_READ_BYTES); // Read amp is on average 100% since we read all what we loaded in memory if (k == 0) { ASSERT_EQ(total_useful_bytes_iter1 + total_useful_bytes_iter2, total_loaded_bytes_iter1 + total_loaded_bytes_iter2); } else { ASSERT_NEAR((total_useful_bytes_iter1 + total_useful_bytes_iter2) * 1.0f / (total_loaded_bytes_iter1 + total_loaded_bytes_iter2), 1, .01); } } } #endif // !OS_SOLARIS #ifndef ROCKSDB_LITE TEST_F(DBTest2, AutomaticCompactionOverlapManualCompaction) { Options options = CurrentOptions(); options.num_levels = 3; options.IncreaseParallelism(20); DestroyAndReopen(options); ASSERT_OK(Put(Key(0), "a")); ASSERT_OK(Put(Key(5), "a")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(10), "a")); ASSERT_OK(Put(Key(15), "a")); ASSERT_OK(Flush()); CompactRangeOptions cro; cro.change_level = true; cro.target_level = 2; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); auto get_stat = [](std::string level_str, LevelStatType type, std::map props) { auto prop_str = "compaction." + level_str + "." + InternalStats::compaction_level_stats.at(type).property_name.c_str(); auto prop_item = props.find(prop_str); return prop_item == props.end() ? 0 : std::stod(prop_item->second); }; // Trivial move 2 files to L2 ASSERT_EQ("0,0,2", FilesPerLevel()); // Also test that the stats GetMapProperty API reporting the same result { std::map prop; ASSERT_TRUE(dbfull()->GetMapProperty("rocksdb.cfstats", &prop)); ASSERT_EQ(0, get_stat("L0", LevelStatType::NUM_FILES, prop)); ASSERT_EQ(0, get_stat("L1", LevelStatType::NUM_FILES, prop)); ASSERT_EQ(2, get_stat("L2", LevelStatType::NUM_FILES, prop)); ASSERT_EQ(2, get_stat("Sum", LevelStatType::NUM_FILES, prop)); } // While the compaction is running, we will create 2 new files that // can fit in L2, these 2 files will be moved to L2 and overlap with // the running compaction and break the LSM consistency. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():Start", [&](void* /*arg*/) { ASSERT_OK( dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}, {"max_bytes_for_level_base", "1"}})); ASSERT_OK(Put(Key(6), "a")); ASSERT_OK(Put(Key(7), "a")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(8), "a")); ASSERT_OK(Put(Key(9), "a")); ASSERT_OK(Flush()); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Run a manual compaction that will compact the 2 files in L2 // into 1 file in L2 cro.exclusive_manual_compaction = false; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); // Test that the stats GetMapProperty API reporting 1 file in L2 { std::map prop; ASSERT_TRUE(dbfull()->GetMapProperty("rocksdb.cfstats", &prop)); ASSERT_EQ(1, get_stat("L2", LevelStatType::NUM_FILES, prop)); } } TEST_F(DBTest2, ManualCompactionOverlapManualCompaction) { Options options = CurrentOptions(); options.num_levels = 2; options.IncreaseParallelism(20); options.disable_auto_compactions = true; DestroyAndReopen(options); ASSERT_OK(Put(Key(0), "a")); ASSERT_OK(Put(Key(5), "a")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(10), "a")); ASSERT_OK(Put(Key(15), "a")); ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Trivial move 2 files to L1 ASSERT_EQ("0,2", FilesPerLevel()); std::function bg_manual_compact = [&]() { std::string k1 = Key(6); std::string k2 = Key(9); Slice k1s(k1); Slice k2s(k2); CompactRangeOptions cro; cro.exclusive_manual_compaction = false; ASSERT_OK(db_->CompactRange(cro, &k1s, &k2s)); }; ROCKSDB_NAMESPACE::port::Thread bg_thread; // While the compaction is running, we will create 2 new files that // can fit in L1, these 2 files will be moved to L1 and overlap with // the running compaction and break the LSM consistency. std::atomic flag(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():Start", [&](void* /*arg*/) { if (flag.exchange(true)) { // We want to make sure to call this callback only once return; } ASSERT_OK(Put(Key(6), "a")); ASSERT_OK(Put(Key(7), "a")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(8), "a")); ASSERT_OK(Put(Key(9), "a")); ASSERT_OK(Flush()); // Start a non-exclusive manual compaction in a bg thread bg_thread = port::Thread(bg_manual_compact); // This manual compaction conflict with the other manual compaction // so it should wait until the first compaction finish env_->SleepForMicroseconds(1000000); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Run a manual compaction that will compact the 2 files in L1 // into 1 file in L1 CompactRangeOptions cro; cro.exclusive_manual_compaction = false; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); bg_thread.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, PausingManualCompaction1) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.num_levels = 7; DestroyAndReopen(options); Random rnd(301); // Generate a file containing 10 keys. for (int i = 0; i < 10; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); // Generate another file containing same keys for (int i = 0; i < 10; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); int manual_compactions_paused = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():PausingManualCompaction:1", [&](void* arg) { auto paused = reinterpret_cast*>(arg); ASSERT_FALSE(paused->load(std::memory_order_acquire)); paused->store(true, std::memory_order_release); manual_compactions_paused += 1; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::vector files_before_compact, files_after_compact; // Remember file name before compaction is triggered std::vector files_meta; dbfull()->GetLiveFilesMetaData(&files_meta); for (auto file : files_meta) { files_before_compact.push_back(file.name); } // OK, now trigger a manual compaction dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); // Wait for compactions to get scheduled and stopped dbfull()->TEST_WaitForCompact(true); // Get file names after compaction is stopped files_meta.clear(); dbfull()->GetLiveFilesMetaData(&files_meta); for (auto file : files_meta) { files_after_compact.push_back(file.name); } // Like nothing happened ASSERT_EQ(files_before_compact, files_after_compact); ASSERT_EQ(manual_compactions_paused, 1); manual_compactions_paused = 0; // Now make sure CompactFiles also not run dbfull()->CompactFiles(ROCKSDB_NAMESPACE::CompactionOptions(), files_before_compact, 0); // Wait for manual compaction to get scheduled and finish dbfull()->TEST_WaitForCompact(true); files_meta.clear(); files_after_compact.clear(); dbfull()->GetLiveFilesMetaData(&files_meta); for (auto file : files_meta) { files_after_compact.push_back(file.name); } ASSERT_EQ(files_before_compact, files_after_compact); // CompactFiles returns at entry point ASSERT_EQ(manual_compactions_paused, 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } // PausingManualCompaction does not affect auto compaction TEST_F(DBTest2, PausingManualCompaction2) { Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 2; options.disable_auto_compactions = false; DestroyAndReopen(options); dbfull()->DisableManualCompaction(); Random rnd(301); for (int i = 0; i < 2; i++) { // Generate a file containing 10 keys. for (int j = 0; j < 100; j++) { ASSERT_OK(Put(Key(j), RandomString(&rnd, 50))); } ASSERT_OK(Flush()); } ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); std::vector files_meta; dbfull()->GetLiveFilesMetaData(&files_meta); ASSERT_EQ(files_meta.size(), 1); } TEST_F(DBTest2, PausingManualCompaction3) { CompactRangeOptions compact_options; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.num_levels = 7; Random rnd(301); auto generate_files = [&]() { for (int i = 0; i < options.num_levels; i++) { for (int j = 0; j < options.num_levels - i + 1; j++) { for (int k = 0; k < 1000; k++) { ASSERT_OK(Put(Key(k + j * 1000), RandomString(&rnd, 50))); } Flush(); } for (int l = 1; l < options.num_levels - i; l++) { MoveFilesToLevel(l); } } }; DestroyAndReopen(options); generate_files(); #ifndef ROCKSDB_LITE ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel()); #endif // !ROCKSDB_LITE int run_manual_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():PausingManualCompaction:1", [&](void* /*arg*/) { run_manual_compactions++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); dbfull()->DisableManualCompaction(); dbfull()->CompactRange(compact_options, nullptr, nullptr); dbfull()->TEST_WaitForCompact(true); // As manual compaction disabled, not even reach sync point ASSERT_EQ(run_manual_compactions, 0); #ifndef ROCKSDB_LITE ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel()); #endif // !ROCKSDB_LITE ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack( "CompactionJob::Run():PausingManualCompaction:1"); dbfull()->EnableManualCompaction(); dbfull()->CompactRange(compact_options, nullptr, nullptr); dbfull()->TEST_WaitForCompact(true); #ifndef ROCKSDB_LITE ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel()); #endif // !ROCKSDB_LITE ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, PausingManualCompaction4) { CompactRangeOptions compact_options; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.num_levels = 7; Random rnd(301); auto generate_files = [&]() { for (int i = 0; i < options.num_levels; i++) { for (int j = 0; j < options.num_levels - i + 1; j++) { for (int k = 0; k < 1000; k++) { ASSERT_OK(Put(Key(k + j * 1000), RandomString(&rnd, 50))); } Flush(); } for (int l = 1; l < options.num_levels - i; l++) { MoveFilesToLevel(l); } } }; DestroyAndReopen(options); generate_files(); #ifndef ROCKSDB_LITE ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel()); #endif // !ROCKSDB_LITE int run_manual_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():PausingManualCompaction:2", [&](void* arg) { auto paused = reinterpret_cast*>(arg); ASSERT_FALSE(paused->load(std::memory_order_acquire)); paused->store(true, std::memory_order_release); run_manual_compactions++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); dbfull()->EnableManualCompaction(); dbfull()->CompactRange(compact_options, nullptr, nullptr); dbfull()->TEST_WaitForCompact(true); ASSERT_EQ(run_manual_compactions, 1); #ifndef ROCKSDB_LITE ASSERT_EQ("2,3,4,5,6,7,8", FilesPerLevel()); #endif // !ROCKSDB_LITE ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack( "CompactionJob::Run():PausingManualCompaction:2"); dbfull()->EnableManualCompaction(); dbfull()->CompactRange(compact_options, nullptr, nullptr); dbfull()->TEST_WaitForCompact(true); #ifndef ROCKSDB_LITE ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel()); #endif // !ROCKSDB_LITE ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, OptimizeForPointLookup) { Options options = CurrentOptions(); Close(); options.OptimizeForPointLookup(2); ASSERT_OK(DB::Open(options, dbname_, &db_)); ASSERT_OK(Put("foo", "v1")); ASSERT_EQ("v1", Get("foo")); Flush(); ASSERT_EQ("v1", Get("foo")); } TEST_F(DBTest2, OptimizeForSmallDB) { Options options = CurrentOptions(); Close(); options.OptimizeForSmallDb(); // Find the cache object ASSERT_EQ(std::string(BlockBasedTableFactory::kName), std::string(options.table_factory->Name())); BlockBasedTableOptions* table_options = reinterpret_cast( options.table_factory->GetOptions()); ASSERT_TRUE(table_options != nullptr); std::shared_ptr cache = table_options->block_cache; ASSERT_EQ(0, cache->GetUsage()); ASSERT_OK(DB::Open(options, dbname_, &db_)); ASSERT_OK(Put("foo", "v1")); // memtable size is costed to the block cache ASSERT_NE(0, cache->GetUsage()); ASSERT_EQ("v1", Get("foo")); Flush(); size_t prev_size = cache->GetUsage(); // Remember block cache size, so that we can find that // it is filled after Get(). // Use pinnable slice so that it can ping the block so that // when we check the size it is not evicted. PinnableSlice value; ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), "foo", &value)); ASSERT_GT(cache->GetUsage(), prev_size); value.Reset(); } #endif // ROCKSDB_LITE TEST_F(DBTest2, IterRaceFlush1) { ASSERT_OK(Put("foo", "v1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::NewIterator:1", "DBTest2::IterRaceFlush:1"}, {"DBTest2::IterRaceFlush:2", "DBImpl::NewIterator:2"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread t1([&] { TEST_SYNC_POINT("DBTest2::IterRaceFlush:1"); ASSERT_OK(Put("foo", "v2")); Flush(); TEST_SYNC_POINT("DBTest2::IterRaceFlush:2"); }); // iterator is created after the first Put(), so it should see either // "v1" or "v2". { std::unique_ptr it(db_->NewIterator(ReadOptions())); it->Seek("foo"); ASSERT_TRUE(it->Valid()); ASSERT_EQ("foo", it->key().ToString()); } t1.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, IterRaceFlush2) { ASSERT_OK(Put("foo", "v1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::NewIterator:3", "DBTest2::IterRaceFlush2:1"}, {"DBTest2::IterRaceFlush2:2", "DBImpl::NewIterator:4"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread t1([&] { TEST_SYNC_POINT("DBTest2::IterRaceFlush2:1"); ASSERT_OK(Put("foo", "v2")); Flush(); TEST_SYNC_POINT("DBTest2::IterRaceFlush2:2"); }); // iterator is created after the first Put(), so it should see either // "v1" or "v2". { std::unique_ptr it(db_->NewIterator(ReadOptions())); it->Seek("foo"); ASSERT_TRUE(it->Valid()); ASSERT_EQ("foo", it->key().ToString()); } t1.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, IterRefreshRaceFlush) { ASSERT_OK(Put("foo", "v1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"ArenaWrappedDBIter::Refresh:1", "DBTest2::IterRefreshRaceFlush:1"}, {"DBTest2::IterRefreshRaceFlush:2", "ArenaWrappedDBIter::Refresh:2"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread t1([&] { TEST_SYNC_POINT("DBTest2::IterRefreshRaceFlush:1"); ASSERT_OK(Put("foo", "v2")); Flush(); TEST_SYNC_POINT("DBTest2::IterRefreshRaceFlush:2"); }); // iterator is created after the first Put(), so it should see either // "v1" or "v2". { std::unique_ptr it(db_->NewIterator(ReadOptions())); it->Refresh(); it->Seek("foo"); ASSERT_TRUE(it->Valid()); ASSERT_EQ("foo", it->key().ToString()); } t1.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, GetRaceFlush1) { ASSERT_OK(Put("foo", "v1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::GetImpl:1", "DBTest2::GetRaceFlush:1"}, {"DBTest2::GetRaceFlush:2", "DBImpl::GetImpl:2"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread t1([&] { TEST_SYNC_POINT("DBTest2::GetRaceFlush:1"); ASSERT_OK(Put("foo", "v2")); Flush(); TEST_SYNC_POINT("DBTest2::GetRaceFlush:2"); }); // Get() is issued after the first Put(), so it should see either // "v1" or "v2". ASSERT_NE("NOT_FOUND", Get("foo")); t1.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, GetRaceFlush2) { ASSERT_OK(Put("foo", "v1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::GetImpl:3", "DBTest2::GetRaceFlush:1"}, {"DBTest2::GetRaceFlush:2", "DBImpl::GetImpl:4"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); port::Thread t1([&] { TEST_SYNC_POINT("DBTest2::GetRaceFlush:1"); ASSERT_OK(Put("foo", "v2")); Flush(); TEST_SYNC_POINT("DBTest2::GetRaceFlush:2"); }); // Get() is issued after the first Put(), so it should see either // "v1" or "v2". ASSERT_NE("NOT_FOUND", Get("foo")); t1.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, DirectIO) { if (!IsDirectIOSupported()) { return; } Options options = CurrentOptions(); options.use_direct_reads = options.use_direct_io_for_flush_and_compaction = true; options.allow_mmap_reads = options.allow_mmap_writes = false; DestroyAndReopen(options); ASSERT_OK(Put(Key(0), "a")); ASSERT_OK(Put(Key(5), "a")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(10), "a")); ASSERT_OK(Put(Key(15), "a")); ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); Reopen(options); } TEST_F(DBTest2, MemtableOnlyIterator) { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "foo", "first")); ASSERT_OK(Put(1, "bar", "second")); ReadOptions ropt; ropt.read_tier = kMemtableTier; std::string value; Iterator* it = nullptr; // Before flushing // point lookups ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value)); ASSERT_EQ("first", value); ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value)); ASSERT_EQ("second", value); // Memtable-only iterator (read_tier=kMemtableTier); data not flushed yet. it = db_->NewIterator(ropt, handles_[1]); int count = 0; for (it->SeekToFirst(); it->Valid(); it->Next()) { ASSERT_TRUE(it->Valid()); count++; } ASSERT_TRUE(!it->Valid()); ASSERT_EQ(2, count); delete it; Flush(1); // After flushing // point lookups ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value)); ASSERT_EQ("first", value); ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value)); ASSERT_EQ("second", value); // nothing should be returned using memtable-only iterator after flushing. it = db_->NewIterator(ropt, handles_[1]); count = 0; for (it->SeekToFirst(); it->Valid(); it->Next()) { ASSERT_TRUE(it->Valid()); count++; } ASSERT_TRUE(!it->Valid()); ASSERT_EQ(0, count); delete it; // Add a key to memtable ASSERT_OK(Put(1, "foobar", "third")); it = db_->NewIterator(ropt, handles_[1]); count = 0; for (it->SeekToFirst(); it->Valid(); it->Next()) { ASSERT_TRUE(it->Valid()); ASSERT_EQ("foobar", it->key().ToString()); ASSERT_EQ("third", it->value().ToString()); count++; } ASSERT_TRUE(!it->Valid()); ASSERT_EQ(1, count); delete it; } TEST_F(DBTest2, LowPriWrite) { Options options = CurrentOptions(); // Compaction pressure should trigger since 6 files options.level0_file_num_compaction_trigger = 4; options.level0_slowdown_writes_trigger = 12; options.level0_stop_writes_trigger = 30; options.delayed_write_rate = 8 * 1024 * 1024; Reopen(options); std::atomic rate_limit_count(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "GenericRateLimiter::Request:1", [&](void* arg) { rate_limit_count.fetch_add(1); int64_t* rate_bytes_per_sec = static_cast(arg); ASSERT_EQ(1024 * 1024, *rate_bytes_per_sec); }); // Block compaction ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBTest.LowPriWrite:0", "DBImpl::BGWorkCompaction"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wo; for (int i = 0; i < 6; i++) { wo.low_pri = false; Put("", "", wo); wo.low_pri = true; Put("", "", wo); Flush(); } ASSERT_EQ(0, rate_limit_count.load()); wo.low_pri = true; Put("", "", wo); ASSERT_EQ(1, rate_limit_count.load()); wo.low_pri = false; Put("", "", wo); ASSERT_EQ(1, rate_limit_count.load()); TEST_SYNC_POINT("DBTest.LowPriWrite:0"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); dbfull()->TEST_WaitForCompact(); wo.low_pri = true; Put("", "", wo); ASSERT_EQ(1, rate_limit_count.load()); wo.low_pri = false; Put("", "", wo); ASSERT_EQ(1, rate_limit_count.load()); } #ifndef ROCKSDB_LITE TEST_F(DBTest2, RateLimitedCompactionReads) { // compaction input has 512KB data const int kNumKeysPerFile = 128; const int kBytesPerKey = 1024; const int kNumL0Files = 4; for (auto use_direct_io : {false, true}) { if (use_direct_io && !IsDirectIOSupported()) { continue; } Options options = CurrentOptions(); options.compression = kNoCompression; options.level0_file_num_compaction_trigger = kNumL0Files; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); options.new_table_reader_for_compaction_inputs = true; // takes roughly one second, split into 100 x 10ms intervals. Each interval // permits 5.12KB, which is smaller than the block size, so this test // exercises the code for chunking reads. options.rate_limiter.reset(NewGenericRateLimiter( static_cast(kNumL0Files * kNumKeysPerFile * kBytesPerKey) /* rate_bytes_per_sec */, 10 * 1000 /* refill_period_us */, 10 /* fairness */, RateLimiter::Mode::kReadsOnly)); options.use_direct_reads = options.use_direct_io_for_flush_and_compaction = use_direct_io; BlockBasedTableOptions bbto; bbto.block_size = 16384; bbto.no_block_cache = true; options.table_factory.reset(new BlockBasedTableFactory(bbto)); DestroyAndReopen(options); for (int i = 0; i < kNumL0Files; ++i) { for (int j = 0; j <= kNumKeysPerFile; ++j) { ASSERT_OK(Put(Key(j), DummyString(kBytesPerKey))); } dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(0, options.rate_limiter->GetTotalBytesThrough(Env::IO_HIGH)); // should be slightly above 512KB due to non-data blocks read. Arbitrarily // chose 1MB as the upper bound on the total bytes read. size_t rate_limited_bytes = options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW); // Include the explicit prefetch of the footer in direct I/O case. size_t direct_io_extra = use_direct_io ? 512 * 1024 : 0; ASSERT_GE( rate_limited_bytes, static_cast(kNumKeysPerFile * kBytesPerKey * kNumL0Files)); ASSERT_LT( rate_limited_bytes, static_cast(2 * kNumKeysPerFile * kBytesPerKey * kNumL0Files + direct_io_extra)); Iterator* iter = db_->NewIterator(ReadOptions()); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_EQ(iter->value().ToString(), DummyString(kBytesPerKey)); } delete iter; // bytes read for user iterator shouldn't count against the rate limit. ASSERT_EQ(rate_limited_bytes, static_cast( options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW))); } } #endif // ROCKSDB_LITE // Make sure DB can be reopen with reduced number of levels, given no file // is on levels higher than the new num_levels. TEST_F(DBTest2, ReduceLevel) { Options options; options.disable_auto_compactions = true; options.num_levels = 7; Reopen(options); Put("foo", "bar"); Flush(); MoveFilesToLevel(6); #ifndef ROCKSDB_LITE ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel()); #endif // !ROCKSDB_LITE CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 1; dbfull()->CompactRange(compact_options, nullptr, nullptr); #ifndef ROCKSDB_LITE ASSERT_EQ("0,1", FilesPerLevel()); #endif // !ROCKSDB_LITE options.num_levels = 3; Reopen(options); #ifndef ROCKSDB_LITE ASSERT_EQ("0,1", FilesPerLevel()); #endif // !ROCKSDB_LITE } // Test that ReadCallback is actually used in both memtbale and sst tables TEST_F(DBTest2, ReadCallbackTest) { Options options; options.disable_auto_compactions = true; options.num_levels = 7; Reopen(options); std::vector snapshots; // Try to create a db with multiple layers and a memtable const std::string key = "foo"; const std::string value = "bar"; // This test assumes that the seq start with 1 and increased by 1 after each // write batch of size 1. If that behavior changes, the test needs to be // updated as well. // TODO(myabandeh): update this test to use the seq number that is returned by // the DB instead of assuming what seq the DB used. int i = 1; for (; i < 10; i++) { Put(key, value + std::to_string(i)); // Take a snapshot to avoid the value being removed during compaction auto snapshot = dbfull()->GetSnapshot(); snapshots.push_back(snapshot); } Flush(); for (; i < 20; i++) { Put(key, value + std::to_string(i)); // Take a snapshot to avoid the value being removed during compaction auto snapshot = dbfull()->GetSnapshot(); snapshots.push_back(snapshot); } Flush(); MoveFilesToLevel(6); #ifndef ROCKSDB_LITE ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel()); #endif // !ROCKSDB_LITE for (; i < 30; i++) { Put(key, value + std::to_string(i)); auto snapshot = dbfull()->GetSnapshot(); snapshots.push_back(snapshot); } Flush(); #ifndef ROCKSDB_LITE ASSERT_EQ("1,0,0,0,0,0,2", FilesPerLevel()); #endif // !ROCKSDB_LITE // And also add some values to the memtable for (; i < 40; i++) { Put(key, value + std::to_string(i)); auto snapshot = dbfull()->GetSnapshot(); snapshots.push_back(snapshot); } class TestReadCallback : public ReadCallback { public: explicit TestReadCallback(SequenceNumber snapshot) : ReadCallback(snapshot), snapshot_(snapshot) {} bool IsVisibleFullCheck(SequenceNumber seq) override { return seq <= snapshot_; } private: SequenceNumber snapshot_; }; for (int seq = 1; seq < i; seq++) { PinnableSlice pinnable_val; ReadOptions roptions; TestReadCallback callback(seq); bool dont_care = true; DBImpl::GetImplOptions get_impl_options; get_impl_options.column_family = dbfull()->DefaultColumnFamily(); get_impl_options.value = &pinnable_val; get_impl_options.value_found = &dont_care; get_impl_options.callback = &callback; Status s = dbfull()->GetImpl(roptions, key, get_impl_options); ASSERT_TRUE(s.ok()); // Assuming that after each Put the DB increased seq by one, the value and // seq number must be equal since we also inc value by 1 after each Put. ASSERT_EQ(value + std::to_string(seq), pinnable_val.ToString()); } for (auto snapshot : snapshots) { dbfull()->ReleaseSnapshot(snapshot); } } #ifndef ROCKSDB_LITE TEST_F(DBTest2, LiveFilesOmitObsoleteFiles) { // Regression test for race condition where an obsolete file is returned to // user as a "live file" but then deleted, all while file deletions are // disabled. // // It happened like this: // // 1. [flush thread] Log file "x.log" found by FindObsoleteFiles // 2. [user thread] DisableFileDeletions, GetSortedWalFiles are called and the // latter returned "x.log" // 3. [flush thread] PurgeObsoleteFiles deleted "x.log" // 4. [user thread] Reading "x.log" failed // // Unfortunately the only regression test I can come up with involves sleep. // We cannot set SyncPoints to repro since, once the fix is applied, the // SyncPoints would cause a deadlock as the repro's sequence of events is now // prohibited. // // Instead, if we sleep for a second between Find and Purge, and ensure the // read attempt happens after purge, then the sequence of events will almost // certainly happen on the old code. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::BackgroundCallFlush:FilesFound", "DBTest2::LiveFilesOmitObsoleteFiles:FlushTriggered"}, {"DBImpl::PurgeObsoleteFiles:End", "DBTest2::LiveFilesOmitObsoleteFiles:LiveFilesCaptured"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::PurgeObsoleteFiles:Begin", [&](void* /*arg*/) { env_->SleepForMicroseconds(1000000); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put("key", "val"); FlushOptions flush_opts; flush_opts.wait = false; db_->Flush(flush_opts); TEST_SYNC_POINT("DBTest2::LiveFilesOmitObsoleteFiles:FlushTriggered"); db_->DisableFileDeletions(); VectorLogPtr log_files; db_->GetSortedWalFiles(log_files); TEST_SYNC_POINT("DBTest2::LiveFilesOmitObsoleteFiles:LiveFilesCaptured"); for (const auto& log_file : log_files) { ASSERT_OK(env_->FileExists(LogFileName(dbname_, log_file->LogNumber()))); } db_->EnableFileDeletions(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, TestNumPread) { Options options = CurrentOptions(); // disable block cache BlockBasedTableOptions table_options; table_options.no_block_cache = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); env_->count_random_reads_ = true; env_->random_file_open_counter_.store(0); ASSERT_OK(Put("bar", "foo")); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); // After flush, we'll open the file and read footer, meta block, // property block and index block. ASSERT_EQ(4, env_->random_read_counter_.Read()); ASSERT_EQ(1, env_->random_file_open_counter_.load()); // One pread per a normal data block read env_->random_file_open_counter_.store(0); env_->random_read_counter_.Reset(); ASSERT_EQ("bar", Get("foo")); ASSERT_EQ(1, env_->random_read_counter_.Read()); // All files are already opened. ASSERT_EQ(0, env_->random_file_open_counter_.load()); env_->random_file_open_counter_.store(0); env_->random_read_counter_.Reset(); ASSERT_OK(Put("bar2", "foo2")); ASSERT_OK(Put("foo2", "bar2")); ASSERT_OK(Flush()); // After flush, we'll open the file and read footer, meta block, // property block and index block. ASSERT_EQ(4, env_->random_read_counter_.Read()); ASSERT_EQ(1, env_->random_file_open_counter_.load()); // Compaction needs two input blocks, which requires 2 preads, and // generate a new SST file which needs 4 preads (footer, meta block, // property block and index block). In total 6. env_->random_file_open_counter_.store(0); env_->random_read_counter_.Reset(); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(6, env_->random_read_counter_.Read()); // All compactin input files should have already been opened. ASSERT_EQ(1, env_->random_file_open_counter_.load()); // One pread per a normal data block read env_->random_file_open_counter_.store(0); env_->random_read_counter_.Reset(); ASSERT_EQ("foo2", Get("bar2")); ASSERT_EQ(1, env_->random_read_counter_.Read()); // SST files are already opened. ASSERT_EQ(0, env_->random_file_open_counter_.load()); } TEST_F(DBTest2, TraceAndReplay) { Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreatePutOperator(); ReadOptions ro; WriteOptions wo; TraceOptions trace_opts; EnvOptions env_opts; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); Iterator* single_iter = nullptr; ASSERT_TRUE(db_->EndTrace().IsIOError()); std::string trace_filename = dbname_ + "/rocksdb.trace"; std::unique_ptr trace_writer; ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer)); ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer))); ASSERT_OK(Put(0, "a", "1")); ASSERT_OK(Merge(0, "b", "2")); ASSERT_OK(Delete(0, "c")); ASSERT_OK(SingleDelete(0, "d")); ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f")); WriteBatch batch; ASSERT_OK(batch.Put("f", "11")); ASSERT_OK(batch.Merge("g", "12")); ASSERT_OK(batch.Delete("h")); ASSERT_OK(batch.SingleDelete("i")); ASSERT_OK(batch.DeleteRange("j", "k")); ASSERT_OK(db_->Write(wo, &batch)); single_iter = db_->NewIterator(ro); single_iter->Seek("f"); single_iter->SeekForPrev("g"); delete single_iter; ASSERT_EQ("1", Get(0, "a")); ASSERT_EQ("12", Get(0, "g")); ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "rocksdb", "rocks")); ASSERT_EQ("NOT_FOUND", Get(1, "leveldb")); ASSERT_OK(db_->EndTrace()); // These should not get into the trace file as it is after EndTrace. Put("hello", "world"); Merge("foo", "bar"); // Open another db, replay, and verify the data std::string value; std::string dbname2 = test::TmpDir(env_) + "/db_replay"; ASSERT_OK(DestroyDB(dbname2, options)); // Using a different name than db2, to pacify infer's use-after-lifetime // warnings (http://fbinfer.com). DB* db2_init = nullptr; options.create_if_missing = true; ASSERT_OK(DB::Open(options, dbname2, &db2_init)); ColumnFamilyHandle* cf; ASSERT_OK( db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf)); delete cf; delete db2_init; DB* db2 = nullptr; std::vector column_families; ColumnFamilyOptions cf_options; cf_options.merge_operator = MergeOperators::CreatePutOperator(); column_families.push_back(ColumnFamilyDescriptor("default", cf_options)); column_families.push_back( ColumnFamilyDescriptor("pikachu", ColumnFamilyOptions())); std::vector handles; ASSERT_OK(DB::Open(DBOptions(), dbname2, column_families, &handles, &db2)); env_->SleepForMicroseconds(100); // Verify that the keys don't already exist ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound()); std::unique_ptr trace_reader; ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader)); Replayer replayer(db2, handles_, std::move(trace_reader)); ASSERT_OK(replayer.Replay()); ASSERT_OK(db2->Get(ro, handles[0], "a", &value)); ASSERT_EQ("1", value); ASSERT_OK(db2->Get(ro, handles[0], "g", &value)); ASSERT_EQ("12", value); ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound()); ASSERT_OK(db2->Get(ro, handles[1], "foo", &value)); ASSERT_EQ("bar", value); ASSERT_OK(db2->Get(ro, handles[1], "rocksdb", &value)); ASSERT_EQ("rocks", value); for (auto handle : handles) { delete handle; } delete db2; ASSERT_OK(DestroyDB(dbname2, options)); } TEST_F(DBTest2, TraceWithLimit) { Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreatePutOperator(); ReadOptions ro; WriteOptions wo; TraceOptions trace_opts; EnvOptions env_opts; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); // test the max trace file size options trace_opts.max_trace_file_size = 5; std::string trace_filename = dbname_ + "/rocksdb.trace1"; std::unique_ptr trace_writer; ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer)); ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer))); ASSERT_OK(Put(0, "a", "1")); ASSERT_OK(Put(0, "b", "1")); ASSERT_OK(Put(0, "c", "1")); ASSERT_OK(db_->EndTrace()); std::string dbname2 = test::TmpDir(env_) + "/db_replay2"; std::string value; ASSERT_OK(DestroyDB(dbname2, options)); // Using a different name than db2, to pacify infer's use-after-lifetime // warnings (http://fbinfer.com). DB* db2_init = nullptr; options.create_if_missing = true; ASSERT_OK(DB::Open(options, dbname2, &db2_init)); ColumnFamilyHandle* cf; ASSERT_OK( db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf)); delete cf; delete db2_init; DB* db2 = nullptr; std::vector column_families; ColumnFamilyOptions cf_options; cf_options.merge_operator = MergeOperators::CreatePutOperator(); column_families.push_back(ColumnFamilyDescriptor("default", cf_options)); column_families.push_back( ColumnFamilyDescriptor("pikachu", ColumnFamilyOptions())); std::vector handles; ASSERT_OK(DB::Open(DBOptions(), dbname2, column_families, &handles, &db2)); env_->SleepForMicroseconds(100); // Verify that the keys don't already exist ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound()); std::unique_ptr trace_reader; ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader)); Replayer replayer(db2, handles_, std::move(trace_reader)); ASSERT_OK(replayer.Replay()); ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound()); for (auto handle : handles) { delete handle; } delete db2; ASSERT_OK(DestroyDB(dbname2, options)); } TEST_F(DBTest2, TraceWithSampling) { Options options = CurrentOptions(); ReadOptions ro; WriteOptions wo; TraceOptions trace_opts; EnvOptions env_opts; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); // test the trace file sampling options trace_opts.sampling_frequency = 2; std::string trace_filename = dbname_ + "/rocksdb.trace_sampling"; std::unique_ptr trace_writer; ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer)); ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer))); ASSERT_OK(Put(0, "a", "1")); ASSERT_OK(Put(0, "b", "2")); ASSERT_OK(Put(0, "c", "3")); ASSERT_OK(Put(0, "d", "4")); ASSERT_OK(Put(0, "e", "5")); ASSERT_OK(db_->EndTrace()); std::string dbname2 = test::TmpDir(env_) + "/db_replay_sampling"; std::string value; ASSERT_OK(DestroyDB(dbname2, options)); // Using a different name than db2, to pacify infer's use-after-lifetime // warnings (http://fbinfer.com). DB* db2_init = nullptr; options.create_if_missing = true; ASSERT_OK(DB::Open(options, dbname2, &db2_init)); ColumnFamilyHandle* cf; ASSERT_OK( db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf)); delete cf; delete db2_init; DB* db2 = nullptr; std::vector column_families; ColumnFamilyOptions cf_options; column_families.push_back(ColumnFamilyDescriptor("default", cf_options)); column_families.push_back( ColumnFamilyDescriptor("pikachu", ColumnFamilyOptions())); std::vector handles; ASSERT_OK(DB::Open(DBOptions(), dbname2, column_families, &handles, &db2)); env_->SleepForMicroseconds(100); ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "b", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "d", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "e", &value).IsNotFound()); std::unique_ptr trace_reader; ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader)); Replayer replayer(db2, handles_, std::move(trace_reader)); ASSERT_OK(replayer.Replay()); ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_FALSE(db2->Get(ro, handles[0], "b", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "c", &value).IsNotFound()); ASSERT_FALSE(db2->Get(ro, handles[0], "d", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "e", &value).IsNotFound()); for (auto handle : handles) { delete handle; } delete db2; ASSERT_OK(DestroyDB(dbname2, options)); } TEST_F(DBTest2, TraceWithFilter) { Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreatePutOperator(); ReadOptions ro; WriteOptions wo; TraceOptions trace_opts; EnvOptions env_opts; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); Iterator* single_iter = nullptr; trace_opts.filter = TraceFilterType::kTraceFilterWrite; std::string trace_filename = dbname_ + "/rocksdb.trace"; std::unique_ptr trace_writer; ASSERT_OK(NewFileTraceWriter(env_, env_opts, trace_filename, &trace_writer)); ASSERT_OK(db_->StartTrace(trace_opts, std::move(trace_writer))); ASSERT_OK(Put(0, "a", "1")); ASSERT_OK(Merge(0, "b", "2")); ASSERT_OK(Delete(0, "c")); ASSERT_OK(SingleDelete(0, "d")); ASSERT_OK(db_->DeleteRange(wo, dbfull()->DefaultColumnFamily(), "e", "f")); WriteBatch batch; ASSERT_OK(batch.Put("f", "11")); ASSERT_OK(batch.Merge("g", "12")); ASSERT_OK(batch.Delete("h")); ASSERT_OK(batch.SingleDelete("i")); ASSERT_OK(batch.DeleteRange("j", "k")); ASSERT_OK(db_->Write(wo, &batch)); single_iter = db_->NewIterator(ro); single_iter->Seek("f"); single_iter->SeekForPrev("g"); delete single_iter; ASSERT_EQ("1", Get(0, "a")); ASSERT_EQ("12", Get(0, "g")); ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Put(1, "rocksdb", "rocks")); ASSERT_EQ("NOT_FOUND", Get(1, "leveldb")); ASSERT_OK(db_->EndTrace()); // These should not get into the trace file as it is after EndTrace. Put("hello", "world"); Merge("foo", "bar"); // Open another db, replay, and verify the data std::string value; std::string dbname2 = test::TmpDir(env_) + "/db_replay"; ASSERT_OK(DestroyDB(dbname2, options)); // Using a different name than db2, to pacify infer's use-after-lifetime // warnings (http://fbinfer.com). DB* db2_init = nullptr; options.create_if_missing = true; ASSERT_OK(DB::Open(options, dbname2, &db2_init)); ColumnFamilyHandle* cf; ASSERT_OK( db2_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf)); delete cf; delete db2_init; DB* db2 = nullptr; std::vector column_families; ColumnFamilyOptions cf_options; cf_options.merge_operator = MergeOperators::CreatePutOperator(); column_families.push_back(ColumnFamilyDescriptor("default", cf_options)); column_families.push_back( ColumnFamilyDescriptor("pikachu", ColumnFamilyOptions())); std::vector handles; ASSERT_OK(DB::Open(DBOptions(), dbname2, column_families, &handles, &db2)); env_->SleepForMicroseconds(100); // Verify that the keys don't already exist ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound()); std::unique_ptr trace_reader; ASSERT_OK(NewFileTraceReader(env_, env_opts, trace_filename, &trace_reader)); Replayer replayer(db2, handles_, std::move(trace_reader)); ASSERT_OK(replayer.Replay()); // All the key-values should not present since we filter out the WRITE ops. ASSERT_TRUE(db2->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "g", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "hello", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "world", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "foo", &value).IsNotFound()); ASSERT_TRUE(db2->Get(ro, handles[0], "rocksdb", &value).IsNotFound()); for (auto handle : handles) { delete handle; } delete db2; ASSERT_OK(DestroyDB(dbname2, options)); // Set up a new db. std::string dbname3 = test::TmpDir(env_) + "/db_not_trace_read"; ASSERT_OK(DestroyDB(dbname3, options)); DB* db3_init = nullptr; options.create_if_missing = true; ColumnFamilyHandle* cf3; ASSERT_OK(DB::Open(options, dbname3, &db3_init)); ASSERT_OK( db3_init->CreateColumnFamily(ColumnFamilyOptions(), "pikachu", &cf3)); delete cf3; delete db3_init; column_families.clear(); column_families.push_back(ColumnFamilyDescriptor("default", cf_options)); column_families.push_back( ColumnFamilyDescriptor("pikachu", ColumnFamilyOptions())); handles.clear(); DB* db3 = nullptr; ASSERT_OK(DB::Open(DBOptions(), dbname3, column_families, &handles, &db3)); env_->SleepForMicroseconds(100); // Verify that the keys don't already exist ASSERT_TRUE(db3->Get(ro, handles[0], "a", &value).IsNotFound()); ASSERT_TRUE(db3->Get(ro, handles[0], "g", &value).IsNotFound()); //The tracer will not record the READ ops. trace_opts.filter = TraceFilterType::kTraceFilterGet; std::string trace_filename3 = dbname_ + "/rocksdb.trace_3"; std::unique_ptr trace_writer3; ASSERT_OK( NewFileTraceWriter(env_, env_opts, trace_filename3, &trace_writer3)); ASSERT_OK(db3->StartTrace(trace_opts, std::move(trace_writer3))); ASSERT_OK(db3->Put(wo, handles[0], "a", "1")); ASSERT_OK(db3->Merge(wo, handles[0], "b", "2")); ASSERT_OK(db3->Delete(wo, handles[0], "c")); ASSERT_OK(db3->SingleDelete(wo, handles[0], "d")); ASSERT_OK(db3->Get(ro, handles[0], "a", &value)); ASSERT_EQ(value, "1"); ASSERT_TRUE(db3->Get(ro, handles[0], "c", &value).IsNotFound()); ASSERT_OK(db3->EndTrace()); for (auto handle : handles) { delete handle; } delete db3; ASSERT_OK(DestroyDB(dbname3, options)); std::unique_ptr trace_reader3; ASSERT_OK( NewFileTraceReader(env_, env_opts, trace_filename3, &trace_reader3)); // Count the number of records in the trace file; int count = 0; std::string data; Status s; while (true) { s = trace_reader3->Read(&data); if (!s.ok()) { break; } count += 1; } // We also need to count the header and footer // 4 WRITE + HEADER + FOOTER = 6 ASSERT_EQ(count, 6); } #endif // ROCKSDB_LITE TEST_F(DBTest2, PinnableSliceAndMmapReads) { Options options = CurrentOptions(); options.allow_mmap_reads = true; options.max_open_files = 100; options.compression = kNoCompression; Reopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); PinnableSlice pinned_value; ASSERT_EQ(Get("foo", &pinned_value), Status::OK()); // It is not safe to pin mmap files as they might disappear by compaction ASSERT_FALSE(pinned_value.IsPinned()); ASSERT_EQ(pinned_value.ToString(), "bar"); dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */, nullptr /* end */, nullptr /* column_family */, true /* disallow_trivial_move */); // Ensure pinned_value doesn't rely on memory munmap'd by the above // compaction. It crashes if it does. ASSERT_EQ(pinned_value.ToString(), "bar"); #ifndef ROCKSDB_LITE pinned_value.Reset(); // Unsafe to pin mmap files when they could be kicked out of table cache Close(); ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ(Get("foo", &pinned_value), Status::OK()); ASSERT_FALSE(pinned_value.IsPinned()); ASSERT_EQ(pinned_value.ToString(), "bar"); pinned_value.Reset(); // In read-only mode with infinite capacity on table cache it should pin the // value and avoid the memcpy Close(); options.max_open_files = -1; ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ(Get("foo", &pinned_value), Status::OK()); ASSERT_TRUE(pinned_value.IsPinned()); ASSERT_EQ(pinned_value.ToString(), "bar"); #endif } TEST_F(DBTest2, DISABLED_IteratorPinnedMemory) { Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockBasedTableOptions bbto; bbto.no_block_cache = false; bbto.cache_index_and_filter_blocks = false; bbto.block_cache = NewLRUCache(100000); bbto.block_size = 400; // small block size options.table_factory.reset(new BlockBasedTableFactory(bbto)); Reopen(options); Random rnd(301); std::string v = RandomString(&rnd, 400); // Since v is the size of a block, each key should take a block // of 400+ bytes. Put("1", v); Put("3", v); Put("5", v); Put("7", v); ASSERT_OK(Flush()); ASSERT_EQ(0, bbto.block_cache->GetPinnedUsage()); // Verify that iterators don't pin more than one data block in block cache // at each time. { std::unique_ptr iter(db_->NewIterator(ReadOptions())); iter->SeekToFirst(); for (int i = 0; i < 4; i++) { ASSERT_TRUE(iter->Valid()); // Block cache should contain exactly one block. ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0); ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800); iter->Next(); } ASSERT_FALSE(iter->Valid()); iter->Seek("4"); ASSERT_TRUE(iter->Valid()); ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0); ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800); iter->Seek("3"); ASSERT_TRUE(iter->Valid()); ASSERT_GT(bbto.block_cache->GetPinnedUsage(), 0); ASSERT_LT(bbto.block_cache->GetPinnedUsage(), 800); } ASSERT_EQ(0, bbto.block_cache->GetPinnedUsage()); // Test compaction case Put("2", v); Put("5", v); Put("6", v); Put("8", v); ASSERT_OK(Flush()); // Clear existing data in block cache bbto.block_cache->SetCapacity(0); bbto.block_cache->SetCapacity(100000); // Verify compaction input iterators don't hold more than one data blocks at // one time. std::atomic finished(false); std::atomic block_newed(0); std::atomic block_destroyed(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Block::Block:0", [&](void* /*arg*/) { if (finished) { return; } // Two iterators. At most 2 outstanding blocks. EXPECT_GE(block_newed.load(), block_destroyed.load()); EXPECT_LE(block_newed.load(), block_destroyed.load() + 1); block_newed.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "Block::~Block", [&](void* /*arg*/) { if (finished) { return; } // Two iterators. At most 2 outstanding blocks. EXPECT_GE(block_newed.load(), block_destroyed.load() + 1); EXPECT_LE(block_newed.load(), block_destroyed.load() + 2); block_destroyed.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run:BeforeVerify", [&](void* /*arg*/) { finished = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Two input files. Each of them has 4 data blocks. ASSERT_EQ(8, block_newed.load()); ASSERT_EQ(8, block_destroyed.load()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, TestBBTTailPrefetch) { std::atomic called(false); size_t expected_lower_bound = 512 * 1024; size_t expected_higher_bound = 512 * 1024; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BlockBasedTable::Open::TailPrefetchLen", [&](void* arg) { size_t* prefetch_size = static_cast(arg); EXPECT_LE(expected_lower_bound, *prefetch_size); EXPECT_GE(expected_higher_bound, *prefetch_size); called = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put("1", "1"); Put("9", "1"); Flush(); expected_lower_bound = 0; expected_higher_bound = 8 * 1024; Put("1", "1"); Put("9", "1"); Flush(); Put("1", "1"); Put("9", "1"); Flush(); // Full compaction to make sure there is no L0 file after the open. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_TRUE(called.load()); called = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); std::atomic first_call(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BlockBasedTable::Open::TailPrefetchLen", [&](void* arg) { size_t* prefetch_size = static_cast(arg); if (first_call) { EXPECT_EQ(4 * 1024, *prefetch_size); first_call = false; } else { EXPECT_GE(4 * 1024, *prefetch_size); } called = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.max_file_opening_threads = 1; // one thread BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.max_open_files = -1; Reopen(options); Put("1", "1"); Put("9", "1"); Flush(); Put("1", "1"); Put("9", "1"); Flush(); ASSERT_TRUE(called.load()); called = false; // Parallel loading SST files options.max_file_opening_threads = 16; Reopen(options); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_TRUE(called.load()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_F(DBTest2, TestGetColumnFamilyHandleUnlocked) { // Setup sync point dependency to reproduce the race condition of // DBImpl::GetColumnFamilyHandleUnlocked ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked1", "TestGetColumnFamilyHandleUnlocked::PreGetColumnFamilyHandleUnlocked2"}, {"TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked2", "TestGetColumnFamilyHandleUnlocked::ReadColumnFamilyHandle1"}, }); SyncPoint::GetInstance()->EnableProcessing(); CreateColumnFamilies({"test1", "test2"}, Options()); ASSERT_EQ(handles_.size(), 2); DBImpl* dbi = reinterpret_cast(db_); port::Thread user_thread1([&]() { auto cfh = dbi->GetColumnFamilyHandleUnlocked(handles_[0]->GetID()); ASSERT_EQ(cfh->GetID(), handles_[0]->GetID()); TEST_SYNC_POINT("TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked1"); TEST_SYNC_POINT("TestGetColumnFamilyHandleUnlocked::ReadColumnFamilyHandle1"); ASSERT_EQ(cfh->GetID(), handles_[0]->GetID()); }); port::Thread user_thread2([&]() { TEST_SYNC_POINT("TestGetColumnFamilyHandleUnlocked::PreGetColumnFamilyHandleUnlocked2"); auto cfh = dbi->GetColumnFamilyHandleUnlocked(handles_[1]->GetID()); ASSERT_EQ(cfh->GetID(), handles_[1]->GetID()); TEST_SYNC_POINT("TestGetColumnFamilyHandleUnlocked::GetColumnFamilyHandleUnlocked2"); ASSERT_EQ(cfh->GetID(), handles_[1]->GetID()); }); user_thread1.join(); user_thread2.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } #ifndef ROCKSDB_LITE TEST_F(DBTest2, TestCompactFiles) { // Setup sync point dependency to reproduce the race condition of // DBImpl::GetColumnFamilyHandleUnlocked ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"TestCompactFiles::IngestExternalFile1", "TestCompactFiles::IngestExternalFile2"}, }); SyncPoint::GetInstance()->EnableProcessing(); Options options; options.num_levels = 2; options.disable_auto_compactions = true; Reopen(options); auto* handle = db_->DefaultColumnFamily(); ASSERT_EQ(db_->NumberLevels(handle), 2); ROCKSDB_NAMESPACE::SstFileWriter sst_file_writer{ ROCKSDB_NAMESPACE::EnvOptions(), options}; std::string external_file1 = dbname_ + "/test_compact_files1.sst_t"; std::string external_file2 = dbname_ + "/test_compact_files2.sst_t"; std::string external_file3 = dbname_ + "/test_compact_files3.sst_t"; ASSERT_OK(sst_file_writer.Open(external_file1)); ASSERT_OK(sst_file_writer.Put("1", "1")); ASSERT_OK(sst_file_writer.Put("2", "2")); ASSERT_OK(sst_file_writer.Finish()); ASSERT_OK(sst_file_writer.Open(external_file2)); ASSERT_OK(sst_file_writer.Put("3", "3")); ASSERT_OK(sst_file_writer.Put("4", "4")); ASSERT_OK(sst_file_writer.Finish()); ASSERT_OK(sst_file_writer.Open(external_file3)); ASSERT_OK(sst_file_writer.Put("5", "5")); ASSERT_OK(sst_file_writer.Put("6", "6")); ASSERT_OK(sst_file_writer.Finish()); ASSERT_OK(db_->IngestExternalFile(handle, {external_file1, external_file3}, IngestExternalFileOptions())); ASSERT_EQ(NumTableFilesAtLevel(1, 0), 2); std::vector files; GetSstFiles(env_, dbname_, &files); ASSERT_EQ(files.size(), 2); port::Thread user_thread1( [&]() { db_->CompactFiles(CompactionOptions(), handle, files, 1); }); port::Thread user_thread2([&]() { ASSERT_OK(db_->IngestExternalFile(handle, {external_file2}, IngestExternalFileOptions())); TEST_SYNC_POINT("TestCompactFiles::IngestExternalFile1"); }); user_thread1.join(); user_thread2.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } #endif // ROCKSDB_LITE // TODO: figure out why this test fails in appveyor #ifndef OS_WIN TEST_F(DBTest2, MultiDBParallelOpenTest) { const int kNumDbs = 2; Options options = CurrentOptions(); std::vector dbnames; for (int i = 0; i < kNumDbs; ++i) { dbnames.emplace_back(test::TmpDir(env_) + "/db" + ToString(i)); ASSERT_OK(DestroyDB(dbnames.back(), options)); } // Verify empty DBs can be created in parallel std::vector open_threads; std::vector dbs{static_cast(kNumDbs), nullptr}; options.create_if_missing = true; for (int i = 0; i < kNumDbs; ++i) { open_threads.emplace_back( [&](int dbnum) { ASSERT_OK(DB::Open(options, dbnames[dbnum], &dbs[dbnum])); }, i); } // Now add some data and close, so next we can verify non-empty DBs can be // recovered in parallel for (int i = 0; i < kNumDbs; ++i) { open_threads[i].join(); ASSERT_OK(dbs[i]->Put(WriteOptions(), "xi", "gua")); delete dbs[i]; } // Verify non-empty DBs can be recovered in parallel dbs.clear(); open_threads.clear(); for (int i = 0; i < kNumDbs; ++i) { open_threads.emplace_back( [&](int dbnum) { ASSERT_OK(DB::Open(options, dbnames[dbnum], &dbs[dbnum])); }, i); } // Wait and cleanup for (int i = 0; i < kNumDbs; ++i) { open_threads[i].join(); delete dbs[i]; ASSERT_OK(DestroyDB(dbnames[i], options)); } } #endif // OS_WIN namespace { class DummyOldStats : public Statistics { public: uint64_t getTickerCount(uint32_t /*ticker_type*/) const override { return 0; } void recordTick(uint32_t /* ticker_type */, uint64_t /* count */) override { num_rt++; } void setTickerCount(uint32_t /*ticker_type*/, uint64_t /*count*/) override {} uint64_t getAndResetTickerCount(uint32_t /*ticker_type*/) override { return 0; } void measureTime(uint32_t /*histogram_type*/, uint64_t /*count*/) override { num_mt++; } void histogramData( uint32_t /*histogram_type*/, ROCKSDB_NAMESPACE::HistogramData* const /*data*/) const override {} std::string getHistogramString(uint32_t /*type*/) const override { return ""; } bool HistEnabledForType(uint32_t /*type*/) const override { return false; } std::string ToString() const override { return ""; } int num_rt = 0; int num_mt = 0; }; } // namespace TEST_F(DBTest2, OldStatsInterface) { DummyOldStats* dos = new DummyOldStats(); std::shared_ptr stats(dos); Options options = CurrentOptions(); options.create_if_missing = true; options.statistics = stats; Reopen(options); Put("foo", "bar"); ASSERT_EQ("bar", Get("foo")); ASSERT_OK(Flush()); ASSERT_EQ("bar", Get("foo")); ASSERT_GT(dos->num_rt, 0); ASSERT_GT(dos->num_mt, 0); } TEST_F(DBTest2, CloseWithUnreleasedSnapshot) { const Snapshot* ss = db_->GetSnapshot(); for (auto h : handles_) { db_->DestroyColumnFamilyHandle(h); } handles_.clear(); ASSERT_NOK(db_->Close()); db_->ReleaseSnapshot(ss); ASSERT_OK(db_->Close()); delete db_; db_ = nullptr; } TEST_F(DBTest2, PrefixBloomReseek) { Options options = CurrentOptions(); options.create_if_missing = true; options.prefix_extractor.reset(NewCappedPrefixTransform(3)); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); // Construct two L1 files with keys: // f1:[aaa1 ccc1] f2:[ddd0] ASSERT_OK(Put("aaa1", "")); ASSERT_OK(Put("ccc1", "")); ASSERT_OK(Flush()); ASSERT_OK(Put("ddd0", "")); ASSERT_OK(Flush()); CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ASSERT_OK(Put("bbb1", "")); Iterator* iter = db_->NewIterator(ReadOptions()); // Seeking into f1, the iterator will check bloom filter which returns the // file iterator ot be invalidate, and the cursor will put into f2, with // the next key to be "ddd0". iter->Seek("bbb1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bbb1", iter->key().ToString()); // Reseek ccc1, the L1 iterator needs to go back to f1 and reseek. iter->Seek("ccc1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("ccc1", iter->key().ToString()); delete iter; } TEST_F(DBTest2, PrefixBloomFilteredOut) { Options options = CurrentOptions(); options.create_if_missing = true; options.prefix_extractor.reset(NewCappedPrefixTransform(3)); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); // Construct two L1 files with keys: // f1:[aaa1 ccc1] f2:[ddd0] ASSERT_OK(Put("aaa1", "")); ASSERT_OK(Put("ccc1", "")); ASSERT_OK(Flush()); ASSERT_OK(Put("ddd0", "")); ASSERT_OK(Flush()); CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); Iterator* iter = db_->NewIterator(ReadOptions()); // Bloom filter is filterd out by f1. // This is just one of several valid position following the contract. // Postioning to ccc1 or ddd0 is also valid. This is just to validate // the behavior of the current implementation. If underlying implementation // changes, the test might fail here. iter->Seek("bbb1"); ASSERT_FALSE(iter->Valid()); delete iter; } #ifndef ROCKSDB_LITE TEST_F(DBTest2, RowCacheSnapshot) { Options options = CurrentOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.row_cache = NewLRUCache(8 * 8192); DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar1")); const Snapshot* s1 = db_->GetSnapshot(); ASSERT_OK(Put("foo", "bar2")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo2", "bar")); const Snapshot* s2 = db_->GetSnapshot(); ASSERT_OK(Put("foo3", "bar")); const Snapshot* s3 = db_->GetSnapshot(); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0); ASSERT_EQ(Get("foo"), "bar2"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); ASSERT_EQ(Get("foo"), "bar2"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); ASSERT_EQ(Get("foo", s1), "bar1"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); ASSERT_EQ(Get("foo", s2), "bar2"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); ASSERT_EQ(Get("foo", s1), "bar1"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); ASSERT_EQ(Get("foo", s3), "bar2"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 4); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); db_->ReleaseSnapshot(s1); db_->ReleaseSnapshot(s2); db_->ReleaseSnapshot(s3); } #endif // ROCKSDB_LITE // When DB is reopened with multiple column families, the manifest file // is written after the first CF is flushed, and it is written again // after each flush. If DB crashes between the flushes, the flushed CF // flushed will pass the latest log file, and now we require it not // to be corrupted, and triggering a corruption report. // We need to fix the bug and enable the test. TEST_F(DBTest2, CrashInRecoveryMultipleCF) { const std::vector sync_points = { "DBImpl::RecoverLogFiles:BeforeFlushFinalMemtable", "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0"}; for (const auto& test_sync_point : sync_points) { Options options = CurrentOptions(); // First destroy original db to ensure a clean start. DestroyAndReopen(options); options.create_if_missing = true; options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); ASSERT_OK(Put(1, "foo", "bar")); ASSERT_OK(Flush(1)); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put(1, "foo", "bar")); // The value is large enough to be divided to two blocks. std::string large_value(400, ' '); ASSERT_OK(Put("foo1", large_value)); ASSERT_OK(Put("foo2", large_value)); Close(); // Corrupt the log file in the middle, so that it is not corrupted // in the tail. std::vector filenames; ASSERT_OK(env_->GetChildren(dbname_, &filenames)); for (const auto& f : filenames) { uint64_t number; FileType type; if (ParseFileName(f, &number, &type) && type == FileType::kLogFile) { std::string fname = dbname_ + "/" + f; std::string file_content; ASSERT_OK(ReadFileToString(env_, fname, &file_content)); file_content[400] = 'h'; file_content[401] = 'a'; ASSERT_OK(WriteStringToFile(env_, file_content, fname)); break; } } // Reopen and freeze the file system after the first manifest write. FaultInjectionTestEnv fit_env(options.env); options.env = &fit_env; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( test_sync_point, [&](void* /*arg*/) { fit_env.SetFilesystemActive(false); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_NOK(TryReopenWithColumnFamilies( {kDefaultColumnFamilyName, "pikachu"}, options)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); fit_env.SetFilesystemActive(true); // If we continue using failure ingestion Env, it will conplain something // when renaming current file, which is not expected. Need to investigate // why. options.env = env_; ASSERT_OK(TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options)); } } TEST_F(DBTest2, SeekFileRangeDeleteTail) { Options options = CurrentOptions(); options.prefix_extractor.reset(NewCappedPrefixTransform(1)); options.num_levels = 3; DestroyAndReopen(options); ASSERT_OK(Put("a", "a")); const Snapshot* s1 = db_->GetSnapshot(); ASSERT_OK( db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "f")); ASSERT_OK(Put("b", "a")); ASSERT_OK(Flush()); ASSERT_OK(Put("x", "a")); ASSERT_OK(Put("z", "a")); ASSERT_OK(Flush()); CompactRangeOptions cro; cro.change_level = true; cro.target_level = 2; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); { ReadOptions ro; ro.total_order_seek = true; std::unique_ptr iter(db_->NewIterator(ro)); iter->Seek("e"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("x", iter->key().ToString()); } db_->ReleaseSnapshot(s1); } TEST_F(DBTest2, BackgroundPurgeTest) { Options options = CurrentOptions(); options.write_buffer_manager = std::make_shared(1 << 20); options.avoid_unnecessary_blocking_io = true; DestroyAndReopen(options); size_t base_value = options.write_buffer_manager->memory_usage(); ASSERT_OK(Put("a", "a")); Iterator* iter = db_->NewIterator(ReadOptions()); ASSERT_OK(Flush()); size_t value = options.write_buffer_manager->memory_usage(); ASSERT_GT(value, base_value); db_->GetEnv()->SetBackgroundThreads(1, Env::Priority::HIGH); test::SleepingBackgroundTask sleeping_task_after; db_->GetEnv()->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_after, Env::Priority::HIGH); delete iter; Env::Default()->SleepForMicroseconds(100000); value = options.write_buffer_manager->memory_usage(); ASSERT_GT(value, base_value); sleeping_task_after.WakeUp(); sleeping_task_after.WaitUntilDone(); test::SleepingBackgroundTask sleeping_task_after2; db_->GetEnv()->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_after2, Env::Priority::HIGH); sleeping_task_after2.WakeUp(); sleeping_task_after2.WaitUntilDone(); value = options.write_buffer_manager->memory_usage(); ASSERT_EQ(base_value, value); } TEST_F(DBTest2, SwitchMemtableRaceWithNewManifest) { Options options = CurrentOptions(); DestroyAndReopen(options); options.max_manifest_file_size = 10; options.create_if_missing = true; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_EQ(2, handles_.size()); ASSERT_OK(Put("foo", "value")); const int kL0Files = options.level0_file_num_compaction_trigger; for (int i = 0; i < kL0Files; ++i) { ASSERT_OK(Put(/*cf=*/1, "a", std::to_string(i))); ASSERT_OK(Flush(/*cf=*/1)); } port::Thread thread([&]() { ASSERT_OK(Flush()); }); ASSERT_OK(dbfull()->TEST_WaitForCompact()); thread.join(); } TEST_F(DBTest2, SameSmallestInSameLevel) { // This test validates fractional casacading logic when several files at one // one level only contains the same user key. Options options = CurrentOptions(); options.merge_operator = MergeOperators::CreateStringAppendOperator(); DestroyAndReopen(options); ASSERT_OK(Put("key", "1")); ASSERT_OK(Put("key", "2")); ASSERT_OK(db_->Merge(WriteOptions(), "key", "3")); ASSERT_OK(db_->Merge(WriteOptions(), "key", "4")); Flush(); CompactRangeOptions cro; cro.change_level = true; cro.target_level = 2; ASSERT_OK(dbfull()->CompactRange(cro, db_->DefaultColumnFamily(), nullptr, nullptr)); ASSERT_OK(db_->Merge(WriteOptions(), "key", "5")); Flush(); ASSERT_OK(db_->Merge(WriteOptions(), "key", "6")); Flush(); ASSERT_OK(db_->Merge(WriteOptions(), "key", "7")); Flush(); ASSERT_OK(db_->Merge(WriteOptions(), "key", "8")); Flush(); dbfull()->TEST_WaitForCompact(true); #ifndef ROCKSDB_LITE ASSERT_EQ("0,4,1", FilesPerLevel()); #endif // ROCKSDB_LITE ASSERT_EQ("2,3,4,5,6,7,8", Get("key")); } TEST_F(DBTest2, FileConsistencyCheckInOpen) { Put("foo", "bar"); Flush(); SyncPoint::GetInstance()->SetCallBack( "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { Status* ret_s = static_cast(arg); *ret_s = Status::Corruption("fcc"); }); SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.force_consistency_checks = true; ASSERT_NOK(TryReopen(options)); SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBTest2, BlockBasedTablePrefixIndexSeekForPrev) { // create a DB with block prefix index BlockBasedTableOptions table_options; Options options = CurrentOptions(); table_options.block_size = 300; table_options.index_type = BlockBasedTableOptions::kHashSearch; table_options.index_shortening = BlockBasedTableOptions::IndexShorteningMode::kNoShortening; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); Reopen(options); Random rnd(301); std::string large_value = RandomString(&rnd, 500); ASSERT_OK(Put("a1", large_value)); ASSERT_OK(Put("x1", large_value)); ASSERT_OK(Put("y1", large_value)); Flush(); { std::unique_ptr iterator(db_->NewIterator(ReadOptions())); iterator->SeekForPrev("x3"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("x1", iterator->key().ToString()); iterator->SeekForPrev("a3"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("a1", iterator->key().ToString()); iterator->SeekForPrev("y3"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("y1", iterator->key().ToString()); // Query more than one non-existing prefix to cover the case both // of empty hash bucket and hash bucket conflict. iterator->SeekForPrev("b1"); // Result should be not valid or "a1". if (iterator->Valid()) { ASSERT_EQ("a1", iterator->key().ToString()); } iterator->SeekForPrev("c1"); // Result should be not valid or "a1". if (iterator->Valid()) { ASSERT_EQ("a1", iterator->key().ToString()); } iterator->SeekForPrev("d1"); // Result should be not valid or "a1". if (iterator->Valid()) { ASSERT_EQ("a1", iterator->key().ToString()); } iterator->SeekForPrev("y3"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("y1", iterator->key().ToString()); } } TEST_F(DBTest2, ChangePrefixExtractor) { for (bool use_partitioned_filter : {true, false}) { // create a DB with block prefix index BlockBasedTableOptions table_options; Options options = CurrentOptions(); // Sometimes filter is checked based on upper bound. Assert counters // for that case. Otherwise, only check data correctness. #ifndef ROCKSDB_LITE bool expect_filter_check = !use_partitioned_filter; #else bool expect_filter_check = false; #endif table_options.partition_filters = use_partitioned_filter; if (use_partitioned_filter) { table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; } table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.statistics = CreateDBStatistics(); options.prefix_extractor.reset(NewFixedPrefixTransform(2)); DestroyAndReopen(options); Random rnd(301); ASSERT_OK(Put("aa", "")); ASSERT_OK(Put("xb", "")); ASSERT_OK(Put("xx1", "")); ASSERT_OK(Put("xz1", "")); ASSERT_OK(Put("zz", "")); Flush(); // After reopening DB with prefix size 2 => 1, prefix extractor // won't take effective unless it won't change results based // on upper bound and seek key. options.prefix_extractor.reset(NewFixedPrefixTransform(1)); Reopen(options); { std::unique_ptr iterator(db_->NewIterator(ReadOptions())); iterator->Seek("xa"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); // It's a bug that the counter BLOOM_FILTER_PREFIX_CHECKED is not // correct in this case. So don't check counters in this case. if (expect_filter_check) { ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } iterator->Seek("xz"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xz1", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } } std::string ub_str = "xg9"; Slice ub(ub_str); ReadOptions ro; ro.iterate_upper_bound = &ub; { std::unique_ptr iterator(db_->NewIterator(ro)); // SeekForPrev() never uses prefix bloom if it is changed. iterator->SeekForPrev("xg0"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } } ub_str = "xx9"; ub = Slice(ub_str); { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("x"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } iterator->Seek("xx0"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xx1", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } } CompactRangeOptions compact_range_opts; compact_range_opts.bottommost_level_compaction = BottommostLevelCompaction::kForce; ASSERT_OK(db_->CompactRange(compact_range_opts, nullptr, nullptr)); ASSERT_OK(db_->CompactRange(compact_range_opts, nullptr, nullptr)); // Re-execute similar queries after a full compaction { std::unique_ptr iterator(db_->NewIterator(ReadOptions())); iterator->Seek("x"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } iterator->Seek("xg"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xx1", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } iterator->Seek("xz"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xz1", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } } { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->SeekForPrev("xx0"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(5, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } iterator->Seek("xx0"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xx1", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(6, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } } ub_str = "xg9"; ub = Slice(ub_str); { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->SeekForPrev("xg0"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("xb", iterator->key().ToString()); if (expect_filter_check) { ASSERT_EQ(7, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } } } } TEST_F(DBTest2, BlockBasedTablePrefixGetIndexNotFound) { // create a DB with block prefix index BlockBasedTableOptions table_options; Options options = CurrentOptions(); table_options.block_size = 300; table_options.index_type = BlockBasedTableOptions::kHashSearch; table_options.index_shortening = BlockBasedTableOptions::IndexShorteningMode::kNoShortening; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.level0_file_num_compaction_trigger = 8; Reopen(options); ASSERT_OK(Put("b1", "ok")); Flush(); // Flushing several files so that the chance that hash bucket // is empty fo "b" in at least one of the files is high. ASSERT_OK(Put("a1", "")); ASSERT_OK(Put("c1", "")); Flush(); ASSERT_OK(Put("a2", "")); ASSERT_OK(Put("c2", "")); Flush(); ASSERT_OK(Put("a3", "")); ASSERT_OK(Put("c3", "")); Flush(); ASSERT_OK(Put("a4", "")); ASSERT_OK(Put("c4", "")); Flush(); ASSERT_OK(Put("a5", "")); ASSERT_OK(Put("c5", "")); Flush(); ASSERT_EQ("ok", Get("b1")); } #ifndef ROCKSDB_LITE TEST_F(DBTest2, AutoPrefixMode1) { // create a DB with block prefix index BlockBasedTableOptions table_options; Options options = CurrentOptions(); table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.statistics = CreateDBStatistics(); Reopen(options); Random rnd(301); std::string large_value = RandomString(&rnd, 500); ASSERT_OK(Put("a1", large_value)); ASSERT_OK(Put("x1", large_value)); ASSERT_OK(Put("y1", large_value)); Flush(); ReadOptions ro; ro.total_order_seek = false; ro.auto_prefix_mode = true; { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("x1", iterator->key().ToString()); ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } std::string ub_str = "b9"; Slice ub(ub_str); ro.iterate_upper_bound = &ub; { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } ub_str = "z"; ub = Slice(ub_str); { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("x1", iterator->key().ToString()); ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } ub_str = "c"; ub = Slice(ub_str); { std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); } // The same queries without recreating iterator { ub_str = "b9"; ub = Slice(ub_str); ro.iterate_upper_bound = &ub; std::unique_ptr iterator(db_->NewIterator(ro)); iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); ub_str = "z"; ub = Slice(ub_str); iterator->Seek("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("x1", iterator->key().ToString()); ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); ub_str = "c"; ub = Slice(ub_str); iterator->Seek("b1"); ASSERT_FALSE(iterator->Valid()); ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); ub_str = "b9"; ub = Slice(ub_str); ro.iterate_upper_bound = &ub; iterator->SeekForPrev("b1"); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("a1", iterator->key().ToString()); ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED)); ub_str = "zz"; ub = Slice(ub_str); ro.iterate_upper_bound = &ub; iterator->SeekToLast(); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("y1", iterator->key().ToString()); iterator->SeekToFirst(); ASSERT_TRUE(iterator->Valid()); ASSERT_EQ("a1", iterator->key().ToString()); } } #endif // ROCKSDB_LITE // WAL recovery mode is WALRecoveryMode::kPointInTimeRecovery. TEST_F(DBTest2, PointInTimeRecoveryWithIOErrorWhileReadingWal) { Options options = CurrentOptions(); DestroyAndReopen(options); ASSERT_OK(Put("foo", "value0")); Close(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); bool should_inject_error = false; SyncPoint::GetInstance()->SetCallBack( "DBImpl::RecoverLogFiles:BeforeReadWal", [&](void* /*arg*/) { should_inject_error = true; }); SyncPoint::GetInstance()->SetCallBack( "LogReader::ReadMore:AfterReadFile", [&](void* arg) { if (should_inject_error) { ASSERT_NE(nullptr, arg); *reinterpret_cast(arg) = Status::IOError("Injected IOError"); } }); SyncPoint::GetInstance()->EnableProcessing(); options.avoid_flush_during_recovery = true; options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; Status s = TryReopen(options); ASSERT_TRUE(s.IsIOError()); } } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_test_util.cc000066400000000000000000001425201370372246700166260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "db/forward_iterator.h" #include "rocksdb/env_encryption.h" #include "rocksdb/utilities/object_registry.h" namespace ROCKSDB_NAMESPACE { namespace { int64_t MaybeCurrentTime(Env* env) { int64_t time = 1337346000; // arbitrary fallback default (void)env->GetCurrentTime(&time); return time; } } // namespace // Special Env used to delay background operations SpecialEnv::SpecialEnv(Env* base) : EnvWrapper(base), maybe_starting_time_(MaybeCurrentTime(base)), rnd_(301), sleep_counter_(this), addon_time_(0), time_elapse_only_sleep_(false), no_slowdown_(false) { delay_sstable_sync_.store(false, std::memory_order_release); drop_writes_.store(false, std::memory_order_release); no_space_.store(false, std::memory_order_release); non_writable_.store(false, std::memory_order_release); count_random_reads_ = false; count_sequential_reads_ = false; manifest_sync_error_.store(false, std::memory_order_release); manifest_write_error_.store(false, std::memory_order_release); log_write_error_.store(false, std::memory_order_release); random_file_open_counter_.store(0, std::memory_order_relaxed); delete_count_.store(0, std::memory_order_relaxed); num_open_wal_file_.store(0); log_write_slowdown_ = 0; bytes_written_ = 0; sync_counter_ = 0; non_writeable_rate_ = 0; new_writable_count_ = 0; non_writable_count_ = 0; table_write_callback_ = nullptr; } #ifndef ROCKSDB_LITE ROT13BlockCipher rot13Cipher_(16); #endif // ROCKSDB_LITE DBTestBase::DBTestBase(const std::string path) : mem_env_(nullptr), encrypted_env_(nullptr), option_config_(kDefault) { Env* base_env = Env::Default(); #ifndef ROCKSDB_LITE const char* test_env_uri = getenv("TEST_ENV_URI"); if (test_env_uri) { Env* test_env = nullptr; Status s = Env::LoadEnv(test_env_uri, &test_env, &env_guard_); base_env = test_env; EXPECT_OK(s); EXPECT_NE(Env::Default(), base_env); } #endif // !ROCKSDB_LITE EXPECT_NE(nullptr, base_env); if (getenv("MEM_ENV")) { mem_env_ = new MockEnv(base_env); } #ifndef ROCKSDB_LITE if (getenv("ENCRYPTED_ENV")) { encrypted_env_ = NewEncryptedEnv(mem_env_ ? mem_env_ : base_env, new CTREncryptionProvider(rot13Cipher_)); } #endif // !ROCKSDB_LITE env_ = new SpecialEnv(encrypted_env_ ? encrypted_env_ : (mem_env_ ? mem_env_ : base_env)); env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); dbname_ = test::PerThreadDBPath(env_, path); alternative_wal_dir_ = dbname_ + "/wal"; alternative_db_log_dir_ = dbname_ + "/db_log_dir"; auto options = CurrentOptions(); options.env = env_; auto delete_options = options; delete_options.wal_dir = alternative_wal_dir_; EXPECT_OK(DestroyDB(dbname_, delete_options)); // Destroy it for not alternative WAL dir is used. EXPECT_OK(DestroyDB(dbname_, options)); db_ = nullptr; Reopen(options); Random::GetTLSInstance()->Reset(0xdeadbeef); } DBTestBase::~DBTestBase() { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); Close(); Options options; options.db_paths.emplace_back(dbname_, 0); options.db_paths.emplace_back(dbname_ + "_2", 0); options.db_paths.emplace_back(dbname_ + "_3", 0); options.db_paths.emplace_back(dbname_ + "_4", 0); options.env = env_; if (getenv("KEEP_DB")) { printf("DB is still at %s\n", dbname_.c_str()); } else { EXPECT_OK(DestroyDB(dbname_, options)); } delete env_; } bool DBTestBase::ShouldSkipOptions(int option_config, int skip_mask) { #ifdef ROCKSDB_LITE // These options are not supported in ROCKSDB_LITE if (option_config == kHashSkipList || option_config == kPlainTableFirstBytePrefix || option_config == kPlainTableCappedPrefix || option_config == kPlainTableCappedPrefixNonMmap || option_config == kPlainTableAllBytesPrefix || option_config == kVectorRep || option_config == kHashLinkList || option_config == kUniversalCompaction || option_config == kUniversalCompactionMultiLevel || option_config == kUniversalSubcompactions || option_config == kFIFOCompaction || option_config == kConcurrentSkipList) { return true; } #endif if ((skip_mask & kSkipUniversalCompaction) && (option_config == kUniversalCompaction || option_config == kUniversalCompactionMultiLevel || option_config == kUniversalSubcompactions)) { return true; } if ((skip_mask & kSkipMergePut) && option_config == kMergePut) { return true; } if ((skip_mask & kSkipNoSeekToLast) && (option_config == kHashLinkList || option_config == kHashSkipList)) { return true; } if ((skip_mask & kSkipPlainTable) && (option_config == kPlainTableAllBytesPrefix || option_config == kPlainTableFirstBytePrefix || option_config == kPlainTableCappedPrefix || option_config == kPlainTableCappedPrefixNonMmap)) { return true; } if ((skip_mask & kSkipHashIndex) && (option_config == kBlockBasedTableWithPrefixHashIndex || option_config == kBlockBasedTableWithWholeKeyHashIndex)) { return true; } if ((skip_mask & kSkipFIFOCompaction) && option_config == kFIFOCompaction) { return true; } if ((skip_mask & kSkipMmapReads) && option_config == kWalDirAndMmapReads) { return true; } return false; } // Switch to a fresh database with the next option configuration to // test. Return false if there are no more configurations to test. bool DBTestBase::ChangeOptions(int skip_mask) { for (option_config_++; option_config_ < kEnd; option_config_++) { if (ShouldSkipOptions(option_config_, skip_mask)) { continue; } break; } if (option_config_ >= kEnd) { Destroy(last_options_); return false; } else { auto options = CurrentOptions(); options.create_if_missing = true; DestroyAndReopen(options); return true; } } // Switch between different compaction styles. bool DBTestBase::ChangeCompactOptions() { if (option_config_ == kDefault) { option_config_ = kUniversalCompaction; Destroy(last_options_); auto options = CurrentOptions(); options.create_if_missing = true; TryReopen(options); return true; } else if (option_config_ == kUniversalCompaction) { option_config_ = kUniversalCompactionMultiLevel; Destroy(last_options_); auto options = CurrentOptions(); options.create_if_missing = true; TryReopen(options); return true; } else if (option_config_ == kUniversalCompactionMultiLevel) { option_config_ = kLevelSubcompactions; Destroy(last_options_); auto options = CurrentOptions(); assert(options.max_subcompactions > 1); TryReopen(options); return true; } else if (option_config_ == kLevelSubcompactions) { option_config_ = kUniversalSubcompactions; Destroy(last_options_); auto options = CurrentOptions(); assert(options.max_subcompactions > 1); TryReopen(options); return true; } else { return false; } } // Switch between different WAL settings bool DBTestBase::ChangeWalOptions() { if (option_config_ == kDefault) { option_config_ = kDBLogDir; Destroy(last_options_); auto options = CurrentOptions(); Destroy(options); options.create_if_missing = true; TryReopen(options); return true; } else if (option_config_ == kDBLogDir) { option_config_ = kWalDirAndMmapReads; Destroy(last_options_); auto options = CurrentOptions(); Destroy(options); options.create_if_missing = true; TryReopen(options); return true; } else if (option_config_ == kWalDirAndMmapReads) { option_config_ = kRecycleLogFiles; Destroy(last_options_); auto options = CurrentOptions(); Destroy(options); TryReopen(options); return true; } else { return false; } } // Switch between different filter policy // Jump from kDefault to kFilter to kFullFilter bool DBTestBase::ChangeFilterOptions() { if (option_config_ == kDefault) { option_config_ = kFilter; } else if (option_config_ == kFilter) { option_config_ = kFullFilterWithNewTableReaderForCompactions; } else if (option_config_ == kFullFilterWithNewTableReaderForCompactions) { option_config_ = kPartitionedFilterWithNewTableReaderForCompactions; } else { return false; } Destroy(last_options_); auto options = CurrentOptions(); options.create_if_missing = true; TryReopen(options); return true; } // Switch between different DB options for file ingestion tests. bool DBTestBase::ChangeOptionsForFileIngestionTest() { if (option_config_ == kDefault) { option_config_ = kUniversalCompaction; Destroy(last_options_); auto options = CurrentOptions(); options.create_if_missing = true; TryReopen(options); return true; } else if (option_config_ == kUniversalCompaction) { option_config_ = kUniversalCompactionMultiLevel; Destroy(last_options_); auto options = CurrentOptions(); options.create_if_missing = true; TryReopen(options); return true; } else if (option_config_ == kUniversalCompactionMultiLevel) { option_config_ = kLevelSubcompactions; Destroy(last_options_); auto options = CurrentOptions(); assert(options.max_subcompactions > 1); TryReopen(options); return true; } else if (option_config_ == kLevelSubcompactions) { option_config_ = kUniversalSubcompactions; Destroy(last_options_); auto options = CurrentOptions(); assert(options.max_subcompactions > 1); TryReopen(options); return true; } else if (option_config_ == kUniversalSubcompactions) { option_config_ = kDirectIO; Destroy(last_options_); auto options = CurrentOptions(); TryReopen(options); return true; } else { return false; } } // Return the current option configuration. Options DBTestBase::CurrentOptions( const anon::OptionsOverride& options_override) const { return GetOptions(option_config_, GetDefaultOptions(), options_override); } Options DBTestBase::CurrentOptions( const Options& default_options, const anon::OptionsOverride& options_override) const { return GetOptions(option_config_, default_options, options_override); } Options DBTestBase::GetDefaultOptions() { Options options; options.write_buffer_size = 4090 * 4096; options.target_file_size_base = 2 * 1024 * 1024; options.max_bytes_for_level_base = 10 * 1024 * 1024; options.max_open_files = 5000; options.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; options.compaction_pri = CompactionPri::kByCompensatedSize; return options; } Options DBTestBase::GetOptions( int option_config, const Options& default_options, const anon::OptionsOverride& options_override) const { // this redundant copy is to minimize code change w/o having lint error. Options options = default_options; BlockBasedTableOptions table_options; bool set_block_based_table_factory = true; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ !defined(OS_AIX) ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack( "NewRandomAccessFile:O_DIRECT"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack( "NewWritableFile:O_DIRECT"); #endif bool can_allow_mmap = IsMemoryMappedAccessSupported(); switch (option_config) { #ifndef ROCKSDB_LITE case kHashSkipList: options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.memtable_factory.reset(NewHashSkipListRepFactory(16)); options.allow_concurrent_memtable_write = false; options.unordered_write = false; break; case kPlainTableFirstBytePrefix: options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.allow_mmap_reads = can_allow_mmap; options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; case kPlainTableCappedPrefix: options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewCappedPrefixTransform(8)); options.allow_mmap_reads = can_allow_mmap; options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; case kPlainTableCappedPrefixNonMmap: options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewCappedPrefixTransform(8)); options.allow_mmap_reads = false; options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; case kPlainTableAllBytesPrefix: options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewNoopTransform()); options.allow_mmap_reads = can_allow_mmap; options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; case kVectorRep: options.memtable_factory.reset(new VectorRepFactory(100)); options.allow_concurrent_memtable_write = false; options.unordered_write = false; break; case kHashLinkList: options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.memtable_factory.reset( NewHashLinkListRepFactory(4, 0, 3, true, 4)); options.allow_concurrent_memtable_write = false; options.unordered_write = false; break; case kDirectIO: { options.use_direct_reads = true; options.use_direct_io_for_flush_and_compaction = true; options.compaction_readahead_size = 2 * 1024 * 1024; test::SetupSyncPointsToMockDirectIO(); break; } #endif // ROCKSDB_LITE case kMergePut: options.merge_operator = MergeOperators::CreatePutOperator(); break; case kFilter: table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); break; case kFullFilterWithNewTableReaderForCompactions: table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); options.new_table_reader_for_compaction_inputs = true; options.compaction_readahead_size = 10 * 1024 * 1024; break; case kPartitionedFilterWithNewTableReaderForCompactions: table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); table_options.partition_filters = true; table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; options.new_table_reader_for_compaction_inputs = true; options.compaction_readahead_size = 10 * 1024 * 1024; break; case kUncompressed: options.compression = kNoCompression; break; case kNumLevel_3: options.num_levels = 3; break; case kDBLogDir: options.db_log_dir = alternative_db_log_dir_; break; case kWalDirAndMmapReads: options.wal_dir = alternative_wal_dir_; // mmap reads should be orthogonal to WalDir setting, so we piggyback to // this option config to test mmap reads as well options.allow_mmap_reads = can_allow_mmap; break; case kManifestFileSize: options.max_manifest_file_size = 50; // 50 bytes break; case kPerfOptions: options.soft_rate_limit = 2.0; options.delayed_write_rate = 8 * 1024 * 1024; options.report_bg_io_stats = true; // TODO(3.13) -- test more options break; case kUniversalCompaction: options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; break; case kUniversalCompactionMultiLevel: options.compaction_style = kCompactionStyleUniversal; options.num_levels = 8; break; case kCompressedBlockCache: options.allow_mmap_writes = can_allow_mmap; table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); break; case kInfiniteMaxOpenFiles: options.max_open_files = -1; break; case kxxHashChecksum: { table_options.checksum = kxxHash; break; } case kxxHash64Checksum: { table_options.checksum = kxxHash64; break; } case kFIFOCompaction: { options.compaction_style = kCompactionStyleFIFO; break; } case kBlockBasedTableWithPrefixHashIndex: { table_options.index_type = BlockBasedTableOptions::kHashSearch; options.prefix_extractor.reset(NewFixedPrefixTransform(1)); break; } case kBlockBasedTableWithWholeKeyHashIndex: { table_options.index_type = BlockBasedTableOptions::kHashSearch; options.prefix_extractor.reset(NewNoopTransform()); break; } case kBlockBasedTableWithPartitionedIndex: { table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; options.prefix_extractor.reset(NewNoopTransform()); break; } case kBlockBasedTableWithPartitionedIndexFormat4: { table_options.format_version = 4; // Format 4 changes the binary index format. Since partitioned index is a // super-set of simple indexes, we are also using kTwoLevelIndexSearch to // test this format. table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; // The top-level index in partition filters are also affected by format 4. table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); table_options.partition_filters = true; table_options.index_block_restart_interval = 8; break; } case kBlockBasedTableWithIndexRestartInterval: { table_options.index_block_restart_interval = 8; break; } case kOptimizeFiltersForHits: { options.optimize_filters_for_hits = true; set_block_based_table_factory = true; break; } case kRowCache: { options.row_cache = NewLRUCache(1024 * 1024); break; } case kRecycleLogFiles: { options.recycle_log_file_num = 2; break; } case kLevelSubcompactions: { options.max_subcompactions = 4; break; } case kUniversalSubcompactions: { options.compaction_style = kCompactionStyleUniversal; options.num_levels = 8; options.max_subcompactions = 4; break; } case kConcurrentSkipList: { options.allow_concurrent_memtable_write = true; options.enable_write_thread_adaptive_yield = true; break; } case kPipelinedWrite: { options.enable_pipelined_write = true; break; } case kConcurrentWALWrites: { // This options optimize 2PC commit path options.two_write_queues = true; options.manual_wal_flush = true; break; } case kUnorderedWrite: { options.allow_concurrent_memtable_write = false; options.unordered_write = false; break; } default: break; } if (options_override.filter_policy) { table_options.filter_policy = options_override.filter_policy; table_options.partition_filters = options_override.partition_filters; table_options.metadata_block_size = options_override.metadata_block_size; } if (set_block_based_table_factory) { options.table_factory.reset(NewBlockBasedTableFactory(table_options)); } options.env = env_; options.create_if_missing = true; options.fail_if_options_file_error = true; return options; } void DBTestBase::CreateColumnFamilies(const std::vector& cfs, const Options& options) { ColumnFamilyOptions cf_opts(options); size_t cfi = handles_.size(); handles_.resize(cfi + cfs.size()); for (auto cf : cfs) { Status s = db_->CreateColumnFamily(cf_opts, cf, &handles_[cfi++]); ASSERT_OK(s); } } void DBTestBase::CreateAndReopenWithCF(const std::vector& cfs, const Options& options) { CreateColumnFamilies(cfs, options); std::vector cfs_plus_default = cfs; cfs_plus_default.insert(cfs_plus_default.begin(), kDefaultColumnFamilyName); ReopenWithColumnFamilies(cfs_plus_default, options); } void DBTestBase::ReopenWithColumnFamilies(const std::vector& cfs, const std::vector& options) { ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); } void DBTestBase::ReopenWithColumnFamilies(const std::vector& cfs, const Options& options) { ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); } Status DBTestBase::TryReopenWithColumnFamilies( const std::vector& cfs, const std::vector& options) { Close(); EXPECT_EQ(cfs.size(), options.size()); std::vector column_families; for (size_t i = 0; i < cfs.size(); ++i) { column_families.push_back(ColumnFamilyDescriptor(cfs[i], options[i])); } DBOptions db_opts = DBOptions(options[0]); last_options_ = options[0]; return DB::Open(db_opts, dbname_, column_families, &handles_, &db_); } Status DBTestBase::TryReopenWithColumnFamilies( const std::vector& cfs, const Options& options) { Close(); std::vector v_opts(cfs.size(), options); return TryReopenWithColumnFamilies(cfs, v_opts); } void DBTestBase::Reopen(const Options& options) { ASSERT_OK(TryReopen(options)); } void DBTestBase::Close() { for (auto h : handles_) { db_->DestroyColumnFamilyHandle(h); } handles_.clear(); delete db_; db_ = nullptr; } void DBTestBase::DestroyAndReopen(const Options& options) { // Destroy using last options Destroy(last_options_); ASSERT_OK(TryReopen(options)); } void DBTestBase::Destroy(const Options& options, bool delete_cf_paths) { std::vector column_families; if (delete_cf_paths) { for (size_t i = 0; i < handles_.size(); ++i) { ColumnFamilyDescriptor cfdescriptor; handles_[i]->GetDescriptor(&cfdescriptor); column_families.push_back(cfdescriptor); } } Close(); ASSERT_OK(DestroyDB(dbname_, options, column_families)); } Status DBTestBase::ReadOnlyReopen(const Options& options) { return DB::OpenForReadOnly(options, dbname_, &db_); } Status DBTestBase::TryReopen(const Options& options) { Close(); last_options_.table_factory.reset(); // Note: operator= is an unsafe approach here since it destructs // std::shared_ptr in the same order of their creation, in contrast to // destructors which destructs them in the opposite order of creation. One // particular problme is that the cache destructor might invoke callback // functions that use Option members such as statistics. To work around this // problem, we manually call destructor of table_facotry which eventually // clears the block cache. last_options_ = options; return DB::Open(options, dbname_, &db_); } bool DBTestBase::IsDirectIOSupported() { return test::IsDirectIOSupported(env_, dbname_); } bool DBTestBase::IsMemoryMappedAccessSupported() const { return (!encrypted_env_); } Status DBTestBase::Flush(int cf) { if (cf == 0) { return db_->Flush(FlushOptions()); } else { return db_->Flush(FlushOptions(), handles_[cf]); } } Status DBTestBase::Flush(const std::vector& cf_ids) { std::vector cfhs; std::for_each(cf_ids.begin(), cf_ids.end(), [&cfhs, this](int id) { cfhs.emplace_back(handles_[id]); }); return db_->Flush(FlushOptions(), cfhs); } Status DBTestBase::Put(const Slice& k, const Slice& v, WriteOptions wo) { if (kMergePut == option_config_) { return db_->Merge(wo, k, v); } else { return db_->Put(wo, k, v); } } Status DBTestBase::Put(int cf, const Slice& k, const Slice& v, WriteOptions wo) { if (kMergePut == option_config_) { return db_->Merge(wo, handles_[cf], k, v); } else { return db_->Put(wo, handles_[cf], k, v); } } Status DBTestBase::Merge(const Slice& k, const Slice& v, WriteOptions wo) { return db_->Merge(wo, k, v); } Status DBTestBase::Merge(int cf, const Slice& k, const Slice& v, WriteOptions wo) { return db_->Merge(wo, handles_[cf], k, v); } Status DBTestBase::Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); } Status DBTestBase::Delete(int cf, const std::string& k) { return db_->Delete(WriteOptions(), handles_[cf], k); } Status DBTestBase::SingleDelete(const std::string& k) { return db_->SingleDelete(WriteOptions(), k); } Status DBTestBase::SingleDelete(int cf, const std::string& k) { return db_->SingleDelete(WriteOptions(), handles_[cf], k); } bool DBTestBase::SetPreserveDeletesSequenceNumber(SequenceNumber sn) { return db_->SetPreserveDeletesSequenceNumber(sn); } std::string DBTestBase::Get(const std::string& k, const Snapshot* snapshot) { ReadOptions options; options.verify_checksums = true; options.snapshot = snapshot; std::string result; Status s = db_->Get(options, k, &result); if (s.IsNotFound()) { result = "NOT_FOUND"; } else if (!s.ok()) { result = s.ToString(); } return result; } std::string DBTestBase::Get(int cf, const std::string& k, const Snapshot* snapshot) { ReadOptions options; options.verify_checksums = true; options.snapshot = snapshot; std::string result; Status s = db_->Get(options, handles_[cf], k, &result); if (s.IsNotFound()) { result = "NOT_FOUND"; } else if (!s.ok()) { result = s.ToString(); } return result; } std::vector DBTestBase::MultiGet(std::vector cfs, const std::vector& k, const Snapshot* snapshot, const bool batched) { ReadOptions options; options.verify_checksums = true; options.snapshot = snapshot; std::vector handles; std::vector keys; std::vector result; for (unsigned int i = 0; i < cfs.size(); ++i) { handles.push_back(handles_[cfs[i]]); keys.push_back(k[i]); } std::vector s; if (!batched) { s = db_->MultiGet(options, handles, keys, &result); for (unsigned int i = 0; i < s.size(); ++i) { if (s[i].IsNotFound()) { result[i] = "NOT_FOUND"; } else if (!s[i].ok()) { result[i] = s[i].ToString(); } } } else { std::vector pin_values(cfs.size()); result.resize(cfs.size()); s.resize(cfs.size()); db_->MultiGet(options, cfs.size(), handles.data(), keys.data(), pin_values.data(), s.data()); for (unsigned int i = 0; i < s.size(); ++i) { if (s[i].IsNotFound()) { result[i] = "NOT_FOUND"; } else if (!s[i].ok()) { result[i] = s[i].ToString(); } else { result[i].assign(pin_values[i].data(), pin_values[i].size()); } } } return result; } std::vector DBTestBase::MultiGet(const std::vector& k, const Snapshot* snapshot) { ReadOptions options; options.verify_checksums = true; options.snapshot = snapshot; std::vector keys; std::vector result; std::vector statuses(k.size()); std::vector pin_values(k.size()); for (unsigned int i = 0; i < k.size(); ++i) { keys.push_back(k[i]); } db_->MultiGet(options, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), pin_values.data(), statuses.data()); result.resize(k.size()); for (auto iter = result.begin(); iter != result.end(); ++iter) { iter->assign(pin_values[iter - result.begin()].data(), pin_values[iter - result.begin()].size()); } for (unsigned int i = 0; i < statuses.size(); ++i) { if (statuses[i].IsNotFound()) { result[i] = "NOT_FOUND"; } } return result; } Status DBTestBase::Get(const std::string& k, PinnableSlice* v) { ReadOptions options; options.verify_checksums = true; Status s = dbfull()->Get(options, dbfull()->DefaultColumnFamily(), k, v); return s; } uint64_t DBTestBase::GetNumSnapshots() { uint64_t int_num; EXPECT_TRUE(dbfull()->GetIntProperty("rocksdb.num-snapshots", &int_num)); return int_num; } uint64_t DBTestBase::GetTimeOldestSnapshots() { uint64_t int_num; EXPECT_TRUE( dbfull()->GetIntProperty("rocksdb.oldest-snapshot-time", &int_num)); return int_num; } uint64_t DBTestBase::GetSequenceOldestSnapshots() { uint64_t int_num; EXPECT_TRUE( dbfull()->GetIntProperty("rocksdb.oldest-snapshot-sequence", &int_num)); return int_num; } // Return a string that contains all key,value pairs in order, // formatted like "(k1->v1)(k2->v2)". std::string DBTestBase::Contents(int cf) { std::vector forward; std::string result; Iterator* iter = (cf == 0) ? db_->NewIterator(ReadOptions()) : db_->NewIterator(ReadOptions(), handles_[cf]); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { std::string s = IterStatus(iter); result.push_back('('); result.append(s); result.push_back(')'); forward.push_back(s); } // Check reverse iteration results are the reverse of forward results unsigned int matched = 0; for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { EXPECT_LT(matched, forward.size()); EXPECT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]); matched++; } EXPECT_EQ(matched, forward.size()); delete iter; return result; } std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) { Arena arena; auto options = CurrentOptions(); InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); ScopedArenaIterator iter; if (cf == 0) { iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber)); } else { iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber, handles_[cf])); } InternalKey target(user_key, kMaxSequenceNumber, kTypeValue); iter->Seek(target.Encode()); std::string result; if (!iter->status().ok()) { result = iter->status().ToString(); } else { result = "[ "; bool first = true; while (iter->Valid()) { ParsedInternalKey ikey(Slice(), 0, kTypeValue); if (!ParseInternalKey(iter->key(), &ikey)) { result += "CORRUPTED"; } else { if (!last_options_.comparator->Equal(ikey.user_key, user_key)) { break; } if (!first) { result += ", "; } first = false; switch (ikey.type) { case kTypeValue: result += iter->value().ToString(); break; case kTypeMerge: // keep it the same as kTypeValue for testing kMergePut result += iter->value().ToString(); break; case kTypeDeletion: result += "DEL"; break; case kTypeSingleDeletion: result += "SDEL"; break; default: assert(false); break; } } iter->Next(); } if (!first) { result += " "; } result += "]"; } return result; } #ifndef ROCKSDB_LITE int DBTestBase::NumSortedRuns(int cf) { ColumnFamilyMetaData cf_meta; if (cf == 0) { db_->GetColumnFamilyMetaData(&cf_meta); } else { db_->GetColumnFamilyMetaData(handles_[cf], &cf_meta); } int num_sr = static_cast(cf_meta.levels[0].files.size()); for (size_t i = 1U; i < cf_meta.levels.size(); i++) { if (cf_meta.levels[i].files.size() > 0) { num_sr++; } } return num_sr; } uint64_t DBTestBase::TotalSize(int cf) { ColumnFamilyMetaData cf_meta; if (cf == 0) { db_->GetColumnFamilyMetaData(&cf_meta); } else { db_->GetColumnFamilyMetaData(handles_[cf], &cf_meta); } return cf_meta.size; } uint64_t DBTestBase::SizeAtLevel(int level) { std::vector metadata; db_->GetLiveFilesMetaData(&metadata); uint64_t sum = 0; for (const auto& m : metadata) { if (m.level == level) { sum += m.size; } } return sum; } size_t DBTestBase::TotalLiveFiles(int cf) { ColumnFamilyMetaData cf_meta; if (cf == 0) { db_->GetColumnFamilyMetaData(&cf_meta); } else { db_->GetColumnFamilyMetaData(handles_[cf], &cf_meta); } size_t num_files = 0; for (auto& level : cf_meta.levels) { num_files += level.files.size(); } return num_files; } size_t DBTestBase::CountLiveFiles() { std::vector metadata; db_->GetLiveFilesMetaData(&metadata); return metadata.size(); } int DBTestBase::NumTableFilesAtLevel(int level, int cf) { std::string property; if (cf == 0) { // default cfd EXPECT_TRUE(db_->GetProperty( "rocksdb.num-files-at-level" + NumberToString(level), &property)); } else { EXPECT_TRUE(db_->GetProperty( handles_[cf], "rocksdb.num-files-at-level" + NumberToString(level), &property)); } return atoi(property.c_str()); } double DBTestBase::CompressionRatioAtLevel(int level, int cf) { std::string property; if (cf == 0) { // default cfd EXPECT_TRUE(db_->GetProperty( "rocksdb.compression-ratio-at-level" + NumberToString(level), &property)); } else { EXPECT_TRUE(db_->GetProperty( handles_[cf], "rocksdb.compression-ratio-at-level" + NumberToString(level), &property)); } return std::stod(property); } int DBTestBase::TotalTableFiles(int cf, int levels) { if (levels == -1) { levels = (cf == 0) ? db_->NumberLevels() : db_->NumberLevels(handles_[1]); } int result = 0; for (int level = 0; level < levels; level++) { result += NumTableFilesAtLevel(level, cf); } return result; } // Return spread of files per level std::string DBTestBase::FilesPerLevel(int cf) { int num_levels = (cf == 0) ? db_->NumberLevels() : db_->NumberLevels(handles_[1]); std::string result; size_t last_non_zero_offset = 0; for (int level = 0; level < num_levels; level++) { int f = NumTableFilesAtLevel(level, cf); char buf[100]; snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); result += buf; if (f > 0) { last_non_zero_offset = result.size(); } } result.resize(last_non_zero_offset); return result; } #endif // !ROCKSDB_LITE size_t DBTestBase::CountFiles() { std::vector files; env_->GetChildren(dbname_, &files); std::vector logfiles; if (dbname_ != last_options_.wal_dir) { env_->GetChildren(last_options_.wal_dir, &logfiles); } return files.size() + logfiles.size(); } uint64_t DBTestBase::Size(const Slice& start, const Slice& limit, int cf) { Range r(start, limit); uint64_t size; if (cf == 0) { db_->GetApproximateSizes(&r, 1, &size); } else { db_->GetApproximateSizes(handles_[1], &r, 1, &size); } return size; } void DBTestBase::Compact(int cf, const Slice& start, const Slice& limit, uint32_t target_path_id) { CompactRangeOptions compact_options; compact_options.target_path_id = target_path_id; ASSERT_OK(db_->CompactRange(compact_options, handles_[cf], &start, &limit)); } void DBTestBase::Compact(int cf, const Slice& start, const Slice& limit) { ASSERT_OK( db_->CompactRange(CompactRangeOptions(), handles_[cf], &start, &limit)); } void DBTestBase::Compact(const Slice& start, const Slice& limit) { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &limit)); } // Do n memtable compactions, each of which produces an sstable // covering the range [small,large]. void DBTestBase::MakeTables(int n, const std::string& small, const std::string& large, int cf) { for (int i = 0; i < n; i++) { ASSERT_OK(Put(cf, small, "begin")); ASSERT_OK(Put(cf, large, "end")); ASSERT_OK(Flush(cf)); MoveFilesToLevel(n - i - 1, cf); } } // Prevent pushing of new sstables into deeper levels by adding // tables that cover a specified range to all levels. void DBTestBase::FillLevels(const std::string& smallest, const std::string& largest, int cf) { MakeTables(db_->NumberLevels(handles_[cf]), smallest, largest, cf); } void DBTestBase::MoveFilesToLevel(int level, int cf) { for (int l = 0; l < level; ++l) { if (cf > 0) { dbfull()->TEST_CompactRange(l, nullptr, nullptr, handles_[cf]); } else { dbfull()->TEST_CompactRange(l, nullptr, nullptr); } } } #ifndef ROCKSDB_LITE void DBTestBase::DumpFileCounts(const char* label) { fprintf(stderr, "---\n%s:\n", label); fprintf(stderr, "maxoverlap: %" PRIu64 "\n", dbfull()->TEST_MaxNextLevelOverlappingBytes()); for (int level = 0; level < db_->NumberLevels(); level++) { int num = NumTableFilesAtLevel(level); if (num > 0) { fprintf(stderr, " level %3d : %d files\n", level, num); } } } #endif // !ROCKSDB_LITE std::string DBTestBase::DumpSSTableList() { std::string property; db_->GetProperty("rocksdb.sstables", &property); return property; } void DBTestBase::GetSstFiles(Env* env, std::string path, std::vector* files) { env->GetChildren(path, files); files->erase( std::remove_if(files->begin(), files->end(), [](std::string name) { uint64_t number; FileType type; return !(ParseFileName(name, &number, &type) && type == kTableFile); }), files->end()); } int DBTestBase::GetSstFileCount(std::string path) { std::vector files; DBTestBase::GetSstFiles(env_, path, &files); return static_cast(files.size()); } // this will generate non-overlapping files since it keeps increasing key_idx void DBTestBase::GenerateNewFile(int cf, Random* rnd, int* key_idx, bool nowait) { for (int i = 0; i < KNumKeysByGenerateNewFile; i++) { ASSERT_OK(Put(cf, Key(*key_idx), RandomString(rnd, (i == 99) ? 1 : 990))); (*key_idx)++; } if (!nowait) { dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } } // this will generate non-overlapping files since it keeps increasing key_idx void DBTestBase::GenerateNewFile(Random* rnd, int* key_idx, bool nowait) { for (int i = 0; i < KNumKeysByGenerateNewFile; i++) { ASSERT_OK(Put(Key(*key_idx), RandomString(rnd, (i == 99) ? 1 : 990))); (*key_idx)++; } if (!nowait) { dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } } const int DBTestBase::kNumKeysByGenerateNewRandomFile = 51; void DBTestBase::GenerateNewRandomFile(Random* rnd, bool nowait) { for (int i = 0; i < kNumKeysByGenerateNewRandomFile; i++) { ASSERT_OK(Put("key" + RandomString(rnd, 7), RandomString(rnd, 2000))); } ASSERT_OK(Put("key" + RandomString(rnd, 7), RandomString(rnd, 200))); if (!nowait) { dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } } std::string DBTestBase::IterStatus(Iterator* iter) { std::string result; if (iter->Valid()) { result = iter->key().ToString() + "->" + iter->value().ToString(); } else { result = "(invalid)"; } return result; } Options DBTestBase::OptionsForLogIterTest() { Options options = CurrentOptions(); options.create_if_missing = true; options.WAL_ttl_seconds = 1000; return options; } std::string DBTestBase::DummyString(size_t len, char c) { return std::string(len, c); } void DBTestBase::VerifyIterLast(std::string expected_key, int cf) { Iterator* iter; ReadOptions ro; if (cf == 0) { iter = db_->NewIterator(ro); } else { iter = db_->NewIterator(ro, handles_[cf]); } iter->SeekToLast(); ASSERT_EQ(IterStatus(iter), expected_key); delete iter; } // Used to test InplaceUpdate // If previous value is nullptr or delta is > than previous value, // sets newValue with delta // If previous value is not empty, // updates previous value with 'b' string of previous value size - 1. UpdateStatus DBTestBase::updateInPlaceSmallerSize(char* prevValue, uint32_t* prevSize, Slice delta, std::string* newValue) { if (prevValue == nullptr) { *newValue = std::string(delta.size(), 'c'); return UpdateStatus::UPDATED; } else { *prevSize = *prevSize - 1; std::string str_b = std::string(*prevSize, 'b'); memcpy(prevValue, str_b.c_str(), str_b.size()); return UpdateStatus::UPDATED_INPLACE; } } UpdateStatus DBTestBase::updateInPlaceSmallerVarintSize(char* prevValue, uint32_t* prevSize, Slice delta, std::string* newValue) { if (prevValue == nullptr) { *newValue = std::string(delta.size(), 'c'); return UpdateStatus::UPDATED; } else { *prevSize = 1; std::string str_b = std::string(*prevSize, 'b'); memcpy(prevValue, str_b.c_str(), str_b.size()); return UpdateStatus::UPDATED_INPLACE; } } UpdateStatus DBTestBase::updateInPlaceLargerSize(char* /*prevValue*/, uint32_t* /*prevSize*/, Slice delta, std::string* newValue) { *newValue = std::string(delta.size(), 'c'); return UpdateStatus::UPDATED; } UpdateStatus DBTestBase::updateInPlaceNoAction(char* /*prevValue*/, uint32_t* /*prevSize*/, Slice /*delta*/, std::string* /*newValue*/) { return UpdateStatus::UPDATE_FAILED; } // Utility method to test InplaceUpdate void DBTestBase::validateNumberOfEntries(int numValues, int cf) { Arena arena; auto options = CurrentOptions(); InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); // This should be defined after range_del_agg so that it destructs the // assigned iterator before it range_del_agg is already destructed. ScopedArenaIterator iter; if (cf != 0) { iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber, handles_[cf])); } else { iter.set(dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber)); } iter->SeekToFirst(); ASSERT_EQ(iter->status().ok(), true); int seq = numValues; while (iter->Valid()) { ParsedInternalKey ikey; ikey.clear(); ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true); // checks sequence number for updates ASSERT_EQ(ikey.sequence, (unsigned)seq--); iter->Next(); } ASSERT_EQ(0, seq); } void DBTestBase::CopyFile(const std::string& source, const std::string& destination, uint64_t size) { const EnvOptions soptions; std::unique_ptr srcfile; ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions)); std::unique_ptr destfile; ASSERT_OK(env_->NewWritableFile(destination, &destfile, soptions)); if (size == 0) { // default argument means copy everything ASSERT_OK(env_->GetFileSize(source, &size)); } char buffer[4096]; Slice slice; while (size > 0) { uint64_t one = std::min(uint64_t(sizeof(buffer)), size); ASSERT_OK(srcfile->Read(one, &slice, buffer)); ASSERT_OK(destfile->Append(slice)); size -= slice.size(); } ASSERT_OK(destfile->Close()); } std::unordered_map DBTestBase::GetAllSSTFiles( uint64_t* total_size) { std::unordered_map res; if (total_size) { *total_size = 0; } std::vector files; env_->GetChildren(dbname_, &files); for (auto& file_name : files) { uint64_t number; FileType type; std::string file_path = dbname_ + "/" + file_name; if (ParseFileName(file_name, &number, &type) && type == kTableFile) { uint64_t file_size = 0; env_->GetFileSize(file_path, &file_size); res[file_path] = file_size; if (total_size) { *total_size += file_size; } } } return res; } std::vector DBTestBase::ListTableFiles(Env* env, const std::string& path) { std::vector files; std::vector file_numbers; env->GetChildren(path, &files); uint64_t number; FileType type; for (size_t i = 0; i < files.size(); ++i) { if (ParseFileName(files[i], &number, &type)) { if (type == kTableFile) { file_numbers.push_back(number); } } } return file_numbers; } void DBTestBase::VerifyDBFromMap(std::map true_data, size_t* total_reads_res, bool tailing_iter, std::map status) { size_t total_reads = 0; for (auto& kv : true_data) { Status s = status[kv.first]; if (s.ok()) { ASSERT_EQ(Get(kv.first), kv.second); } else { std::string value; ASSERT_EQ(s, db_->Get(ReadOptions(), kv.first, &value)); } total_reads++; } // Normal Iterator { int iter_cnt = 0; ReadOptions ro; ro.total_order_seek = true; Iterator* iter = db_->NewIterator(ro); // Verify Iterator::Next() iter_cnt = 0; auto data_iter = true_data.begin(); Status s; for (iter->SeekToFirst(); iter->Valid(); iter->Next(), data_iter++) { ASSERT_EQ(iter->key().ToString(), data_iter->first); Status current_status = status[data_iter->first]; if (!current_status.ok()) { s = current_status; } ASSERT_EQ(iter->status(), s); if (current_status.ok()) { ASSERT_EQ(iter->value().ToString(), data_iter->second); } iter_cnt++; total_reads++; } ASSERT_EQ(data_iter, true_data.end()) << iter_cnt << " / " << true_data.size(); delete iter; // Verify Iterator::Prev() // Use a new iterator to make sure its status is clean. iter = db_->NewIterator(ro); iter_cnt = 0; s = Status::OK(); auto data_rev = true_data.rbegin(); for (iter->SeekToLast(); iter->Valid(); iter->Prev(), data_rev++) { ASSERT_EQ(iter->key().ToString(), data_rev->first); Status current_status = status[data_rev->first]; if (!current_status.ok()) { s = current_status; } ASSERT_EQ(iter->status(), s); if (current_status.ok()) { ASSERT_EQ(iter->value().ToString(), data_rev->second); } iter_cnt++; total_reads++; } ASSERT_EQ(data_rev, true_data.rend()) << iter_cnt << " / " << true_data.size(); // Verify Iterator::Seek() for (auto kv : true_data) { iter->Seek(kv.first); ASSERT_EQ(kv.first, iter->key().ToString()); ASSERT_EQ(kv.second, iter->value().ToString()); total_reads++; } delete iter; } if (tailing_iter) { #ifndef ROCKSDB_LITE // Tailing iterator int iter_cnt = 0; ReadOptions ro; ro.tailing = true; ro.total_order_seek = true; Iterator* iter = db_->NewIterator(ro); // Verify ForwardIterator::Next() iter_cnt = 0; auto data_iter = true_data.begin(); for (iter->SeekToFirst(); iter->Valid(); iter->Next(), data_iter++) { ASSERT_EQ(iter->key().ToString(), data_iter->first); ASSERT_EQ(iter->value().ToString(), data_iter->second); iter_cnt++; total_reads++; } ASSERT_EQ(data_iter, true_data.end()) << iter_cnt << " / " << true_data.size(); // Verify ForwardIterator::Seek() for (auto kv : true_data) { iter->Seek(kv.first); ASSERT_EQ(kv.first, iter->key().ToString()); ASSERT_EQ(kv.second, iter->value().ToString()); total_reads++; } delete iter; #endif // ROCKSDB_LITE } if (total_reads_res) { *total_reads_res = total_reads; } } void DBTestBase::VerifyDBInternal( std::vector> true_data) { Arena arena; InternalKeyComparator icmp(last_options_.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); auto iter = dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber); iter->SeekToFirst(); for (auto p : true_data) { ASSERT_TRUE(iter->Valid()); ParsedInternalKey ikey; ASSERT_TRUE(ParseInternalKey(iter->key(), &ikey)); ASSERT_EQ(p.first, ikey.user_key); ASSERT_EQ(p.second, iter->value()); iter->Next(); }; ASSERT_FALSE(iter->Valid()); iter->~InternalIterator(); } #ifndef ROCKSDB_LITE uint64_t DBTestBase::GetNumberOfSstFilesForColumnFamily( DB* db, std::string column_family_name) { std::vector metadata; db->GetLiveFilesMetaData(&metadata); uint64_t result = 0; for (auto& fileMetadata : metadata) { result += (fileMetadata.column_family_name == column_family_name); } return result; } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_test_util.h000066400000000000000000001044201370372246700164650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "env/mock_env.h" #include "file/filename.h" #include "memtable/hash_linklist_rep.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/sst_file_writer.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "rocksdb/utilities/checkpoint.h" #include "table/block_based/block_based_table_factory.h" #include "table/mock_table.h" #include "table/plain/plain_table_factory.h" #include "table/scoped_arena_iterator.h" #include "test_util/mock_time_env.h" #include "util/compression.h" #include "util/mutexlock.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { namespace anon { class AtomicCounter { public: explicit AtomicCounter(Env* env = NULL) : env_(env), cond_count_(&mu_), count_(0) {} void Increment() { MutexLock l(&mu_); count_++; cond_count_.SignalAll(); } int Read() { MutexLock l(&mu_); return count_; } bool WaitFor(int count) { MutexLock l(&mu_); uint64_t start = env_->NowMicros(); while (count_ < count) { uint64_t now = env_->NowMicros(); cond_count_.TimedWait(now + /*1s*/ 1 * 1000 * 1000); if (env_->NowMicros() - start > /*10s*/ 10 * 1000 * 1000) { return false; } if (count_ < count) { GTEST_LOG_(WARNING) << "WaitFor is taking more time than usual"; } } return true; } void Reset() { MutexLock l(&mu_); count_ = 0; cond_count_.SignalAll(); } private: Env* env_; port::Mutex mu_; port::CondVar cond_count_; int count_; }; struct OptionsOverride { std::shared_ptr filter_policy = nullptr; // These will be used only if filter_policy is set bool partition_filters = false; uint64_t metadata_block_size = 1024; // Used as a bit mask of individual enums in which to skip an XF test point int skip_policy = 0; }; } // namespace anon enum SkipPolicy { kSkipNone = 0, kSkipNoSnapshot = 1, kSkipNoPrefix = 2 }; // A hacky skip list mem table that triggers flush after number of entries. class SpecialMemTableRep : public MemTableRep { public: explicit SpecialMemTableRep(Allocator* allocator, MemTableRep* memtable, int num_entries_flush) : MemTableRep(allocator), memtable_(memtable), num_entries_flush_(num_entries_flush), num_entries_(0) {} virtual KeyHandle Allocate(const size_t len, char** buf) override { return memtable_->Allocate(len, buf); } // Insert key into the list. // REQUIRES: nothing that compares equal to key is currently in the list. virtual void Insert(KeyHandle handle) override { num_entries_++; memtable_->Insert(handle); } void InsertConcurrently(KeyHandle handle) override { num_entries_++; memtable_->Insert(handle); } // Returns true iff an entry that compares equal to key is in the list. virtual bool Contains(const char* key) const override { return memtable_->Contains(key); } virtual size_t ApproximateMemoryUsage() override { // Return a high memory usage when number of entries exceeds the threshold // to trigger a flush. return (num_entries_ < num_entries_flush_) ? 0 : 1024 * 1024 * 1024; } virtual void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override { memtable_->Get(k, callback_args, callback_func); } uint64_t ApproximateNumEntries(const Slice& start_ikey, const Slice& end_ikey) override { return memtable_->ApproximateNumEntries(start_ikey, end_ikey); } virtual MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override { return memtable_->GetIterator(arena); } virtual ~SpecialMemTableRep() override {} private: std::unique_ptr memtable_; int num_entries_flush_; int num_entries_; }; // The factory for the hacky skip list mem table that triggers flush after // number of entries exceeds a threshold. class SpecialSkipListFactory : public MemTableRepFactory { public: // After number of inserts exceeds `num_entries_flush` in a mem table, trigger // flush. explicit SpecialSkipListFactory(int num_entries_flush) : num_entries_flush_(num_entries_flush) {} using MemTableRepFactory::CreateMemTableRep; virtual MemTableRep* CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* /*logger*/) override { return new SpecialMemTableRep( allocator, factory_.CreateMemTableRep(compare, allocator, transform, 0), num_entries_flush_); } virtual const char* Name() const override { return "SkipListFactory"; } bool IsInsertConcurrentlySupported() const override { return factory_.IsInsertConcurrentlySupported(); } private: SkipListFactory factory_; int num_entries_flush_; }; // Special Env used to delay background operations class SpecialEnv : public EnvWrapper { public: explicit SpecialEnv(Env* base); Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& soptions) override { class SSTableFile : public WritableFile { private: SpecialEnv* env_; std::unique_ptr base_; public: SSTableFile(SpecialEnv* env, std::unique_ptr&& base) : env_(env), base_(std::move(base)) {} Status Append(const Slice& data) override { if (env_->table_write_callback_) { (*env_->table_write_callback_)(); } if (env_->drop_writes_.load(std::memory_order_acquire)) { // Drop writes on the floor return Status::OK(); } else if (env_->no_space_.load(std::memory_order_acquire)) { return Status::NoSpace("No space left on device"); } else { env_->bytes_written_ += data.size(); return base_->Append(data); } } Status PositionedAppend(const Slice& data, uint64_t offset) override { if (env_->table_write_callback_) { (*env_->table_write_callback_)(); } if (env_->drop_writes_.load(std::memory_order_acquire)) { // Drop writes on the floor return Status::OK(); } else if (env_->no_space_.load(std::memory_order_acquire)) { return Status::NoSpace("No space left on device"); } else { env_->bytes_written_ += data.size(); return base_->PositionedAppend(data, offset); } } Status Truncate(uint64_t size) override { return base_->Truncate(size); } Status RangeSync(uint64_t offset, uint64_t nbytes) override { Status s = base_->RangeSync(offset, nbytes); #if !(defined NDEBUG) || !defined(OS_WIN) TEST_SYNC_POINT_CALLBACK("SpecialEnv::SStableFile::RangeSync", &s); #endif // !(defined NDEBUG) || !defined(OS_WIN) return s; } Status Close() override { // SyncPoint is not supported in Released Windows Mode. #if !(defined NDEBUG) || !defined(OS_WIN) // Check preallocation size // preallocation size is never passed to base file. size_t preallocation_size = preallocation_block_size(); TEST_SYNC_POINT_CALLBACK("DBTestWritableFile.GetPreallocationStatus", &preallocation_size); #endif // !(defined NDEBUG) || !defined(OS_WIN) Status s = base_->Close(); #if !(defined NDEBUG) || !defined(OS_WIN) TEST_SYNC_POINT_CALLBACK("SpecialEnv::SStableFile::Close", &s); #endif // !(defined NDEBUG) || !defined(OS_WIN) return s; } Status Flush() override { return base_->Flush(); } Status Sync() override { ++env_->sync_counter_; while (env_->delay_sstable_sync_.load(std::memory_order_acquire)) { env_->SleepForMicroseconds(100000); } Status s = base_->Sync(); #if !(defined NDEBUG) || !defined(OS_WIN) TEST_SYNC_POINT_CALLBACK("SpecialEnv::SStableFile::Sync", &s); #endif // !(defined NDEBUG) || !defined(OS_WIN) return s; } void SetIOPriority(Env::IOPriority pri) override { base_->SetIOPriority(pri); } Env::IOPriority GetIOPriority() override { return base_->GetIOPriority(); } bool use_direct_io() const override { return base_->use_direct_io(); } Status Allocate(uint64_t offset, uint64_t len) override { return base_->Allocate(offset, len); } }; class ManifestFile : public WritableFile { public: ManifestFile(SpecialEnv* env, std::unique_ptr&& b) : env_(env), base_(std::move(b)) {} Status Append(const Slice& data) override { if (env_->manifest_write_error_.load(std::memory_order_acquire)) { return Status::IOError("simulated writer error"); } else { return base_->Append(data); } } Status Truncate(uint64_t size) override { return base_->Truncate(size); } Status Close() override { return base_->Close(); } Status Flush() override { return base_->Flush(); } Status Sync() override { ++env_->sync_counter_; if (env_->manifest_sync_error_.load(std::memory_order_acquire)) { return Status::IOError("simulated sync error"); } else { return base_->Sync(); } } uint64_t GetFileSize() override { return base_->GetFileSize(); } Status Allocate(uint64_t offset, uint64_t len) override { return base_->Allocate(offset, len); } private: SpecialEnv* env_; std::unique_ptr base_; }; class WalFile : public WritableFile { public: WalFile(SpecialEnv* env, std::unique_ptr&& b) : env_(env), base_(std::move(b)) { env_->num_open_wal_file_.fetch_add(1); } virtual ~WalFile() { env_->num_open_wal_file_.fetch_add(-1); } Status Append(const Slice& data) override { #if !(defined NDEBUG) || !defined(OS_WIN) TEST_SYNC_POINT("SpecialEnv::WalFile::Append:1"); #endif Status s; if (env_->log_write_error_.load(std::memory_order_acquire)) { s = Status::IOError("simulated writer error"); } else { int slowdown = env_->log_write_slowdown_.load(std::memory_order_acquire); if (slowdown > 0) { env_->SleepForMicroseconds(slowdown); } s = base_->Append(data); } #if !(defined NDEBUG) || !defined(OS_WIN) TEST_SYNC_POINT("SpecialEnv::WalFile::Append:2"); #endif return s; } Status Truncate(uint64_t size) override { return base_->Truncate(size); } Status Close() override { // SyncPoint is not supported in Released Windows Mode. #if !(defined NDEBUG) || !defined(OS_WIN) // Check preallocation size // preallocation size is never passed to base file. size_t preallocation_size = preallocation_block_size(); TEST_SYNC_POINT_CALLBACK("DBTestWalFile.GetPreallocationStatus", &preallocation_size); #endif // !(defined NDEBUG) || !defined(OS_WIN) return base_->Close(); } Status Flush() override { return base_->Flush(); } Status Sync() override { ++env_->sync_counter_; return base_->Sync(); } bool IsSyncThreadSafe() const override { return env_->is_wal_sync_thread_safe_.load(); } Status Allocate(uint64_t offset, uint64_t len) override { return base_->Allocate(offset, len); } private: SpecialEnv* env_; std::unique_ptr base_; }; if (non_writeable_rate_.load(std::memory_order_acquire) > 0) { uint32_t random_number; { MutexLock l(&rnd_mutex_); random_number = rnd_.Uniform(100); } if (random_number < non_writeable_rate_.load()) { return Status::IOError("simulated random write error"); } } new_writable_count_++; if (non_writable_count_.load() > 0) { non_writable_count_--; return Status::IOError("simulated write error"); } EnvOptions optimized = soptions; if (strstr(f.c_str(), "MANIFEST") != nullptr || strstr(f.c_str(), "log") != nullptr) { optimized.use_mmap_writes = false; optimized.use_direct_writes = false; } Status s = target()->NewWritableFile(f, r, optimized); if (s.ok()) { if (strstr(f.c_str(), ".sst") != nullptr) { r->reset(new SSTableFile(this, std::move(*r))); } else if (strstr(f.c_str(), "MANIFEST") != nullptr) { r->reset(new ManifestFile(this, std::move(*r))); } else if (strstr(f.c_str(), "log") != nullptr) { r->reset(new WalFile(this, std::move(*r))); } } return s; } Status NewRandomAccessFile(const std::string& f, std::unique_ptr* r, const EnvOptions& soptions) override { class CountingFile : public RandomAccessFile { public: CountingFile(std::unique_ptr&& target, anon::AtomicCounter* counter, std::atomic* bytes_read) : target_(std::move(target)), counter_(counter), bytes_read_(bytes_read) {} virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { counter_->Increment(); Status s = target_->Read(offset, n, result, scratch); *bytes_read_ += result->size(); return s; } virtual Status Prefetch(uint64_t offset, size_t n) override { Status s = target_->Prefetch(offset, n); *bytes_read_ += n; return s; } private: std::unique_ptr target_; anon::AtomicCounter* counter_; std::atomic* bytes_read_; }; Status s = target()->NewRandomAccessFile(f, r, soptions); random_file_open_counter_++; if (s.ok() && count_random_reads_) { r->reset(new CountingFile(std::move(*r), &random_read_counter_, &random_read_bytes_counter_)); } if (s.ok() && soptions.compaction_readahead_size > 0) { compaction_readahead_size_ = soptions.compaction_readahead_size; } return s; } virtual Status NewSequentialFile(const std::string& f, std::unique_ptr* r, const EnvOptions& soptions) override { class CountingFile : public SequentialFile { public: CountingFile(std::unique_ptr&& target, anon::AtomicCounter* counter) : target_(std::move(target)), counter_(counter) {} virtual Status Read(size_t n, Slice* result, char* scratch) override { counter_->Increment(); return target_->Read(n, result, scratch); } virtual Status Skip(uint64_t n) override { return target_->Skip(n); } private: std::unique_ptr target_; anon::AtomicCounter* counter_; }; Status s = target()->NewSequentialFile(f, r, soptions); if (s.ok() && count_sequential_reads_) { r->reset(new CountingFile(std::move(*r), &sequential_read_counter_)); } return s; } virtual void SleepForMicroseconds(int micros) override { sleep_counter_.Increment(); if (no_slowdown_ || time_elapse_only_sleep_) { addon_time_.fetch_add(micros); } if (!no_slowdown_) { target()->SleepForMicroseconds(micros); } } virtual Status GetCurrentTime(int64_t* unix_time) override { Status s; if (time_elapse_only_sleep_) { *unix_time = maybe_starting_time_; } else { s = target()->GetCurrentTime(unix_time); } if (s.ok()) { // FIXME: addon_time_ sometimes used to mean seconds (here) and // sometimes microseconds *unix_time += addon_time_.load(); } return s; } virtual uint64_t NowCPUNanos() override { now_cpu_count_.fetch_add(1); return target()->NowCPUNanos(); } virtual uint64_t NowNanos() override { return (time_elapse_only_sleep_ ? 0 : target()->NowNanos()) + addon_time_.load() * 1000; } virtual uint64_t NowMicros() override { return (time_elapse_only_sleep_ ? 0 : target()->NowMicros()) + addon_time_.load(); } virtual Status DeleteFile(const std::string& fname) override { delete_count_.fetch_add(1); return target()->DeleteFile(fname); } void SetTimeElapseOnlySleep(Options* options) { time_elapse_only_sleep_ = true; no_slowdown_ = true; // Need to disable stats dumping and persisting which also use // RepeatableThread, which uses InstrumentedCondVar::TimedWaitInternal. // With time_elapse_only_sleep_, this can hang on some platforms. // TODO: why? investigate/fix options->stats_dump_period_sec = 0; options->stats_persist_period_sec = 0; } // Something to return when mocking current time const int64_t maybe_starting_time_; Random rnd_; port::Mutex rnd_mutex_; // Lock to pretect rnd_ // sstable Sync() calls are blocked while this pointer is non-nullptr. std::atomic delay_sstable_sync_; // Drop writes on the floor while this pointer is non-nullptr. std::atomic drop_writes_; // Simulate no-space errors while this pointer is non-nullptr. std::atomic no_space_; // Simulate non-writable file system while this pointer is non-nullptr std::atomic non_writable_; // Force sync of manifest files to fail while this pointer is non-nullptr std::atomic manifest_sync_error_; // Force write to manifest files to fail while this pointer is non-nullptr std::atomic manifest_write_error_; // Force write to log files to fail while this pointer is non-nullptr std::atomic log_write_error_; // Slow down every log write, in micro-seconds. std::atomic log_write_slowdown_; // Number of WAL files that are still open for write. std::atomic num_open_wal_file_; bool count_random_reads_; anon::AtomicCounter random_read_counter_; std::atomic random_read_bytes_counter_; std::atomic random_file_open_counter_; bool count_sequential_reads_; anon::AtomicCounter sequential_read_counter_; anon::AtomicCounter sleep_counter_; std::atomic bytes_written_; std::atomic sync_counter_; std::atomic non_writeable_rate_; std::atomic new_writable_count_; std::atomic non_writable_count_; std::function* table_write_callback_; std::atomic addon_time_; std::atomic now_cpu_count_; std::atomic delete_count_; std::atomic time_elapse_only_sleep_; bool no_slowdown_; std::atomic is_wal_sync_thread_safe_{true}; std::atomic compaction_readahead_size_{}; }; #ifndef ROCKSDB_LITE class OnFileDeletionListener : public EventListener { public: OnFileDeletionListener() : matched_count_(0), expected_file_name_("") {} void SetExpectedFileName(const std::string file_name) { expected_file_name_ = file_name; } void VerifyMatchedCount(size_t expected_value) { ASSERT_EQ(matched_count_, expected_value); } void OnTableFileDeleted(const TableFileDeletionInfo& info) override { if (expected_file_name_ != "") { ASSERT_EQ(expected_file_name_, info.file_path); expected_file_name_ = ""; matched_count_++; } } private: size_t matched_count_; std::string expected_file_name_; }; #endif // A test merge operator mimics put but also fails if one of merge operands is // "corrupted". class TestPutOperator : public MergeOperator { public: virtual bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override { if (merge_in.existing_value != nullptr && *(merge_in.existing_value) == "corrupted") { return false; } for (auto value : merge_in.operand_list) { if (value == "corrupted") { return false; } } merge_out->existing_operand = merge_in.operand_list.back(); return true; } virtual const char* Name() const override { return "TestPutOperator"; } }; // A wrapper around Cache that can easily be extended with instrumentation, // etc. class CacheWrapper : public Cache { public: explicit CacheWrapper(std::shared_ptr target) : target_(std::move(target)) {} const char* Name() const override { return target_->Name(); } Status Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Handle** handle = nullptr, Priority priority = Priority::LOW) override { return target_->Insert(key, value, charge, deleter, handle, priority); } Handle* Lookup(const Slice& key, Statistics* stats = nullptr) override { return target_->Lookup(key, stats); } bool Ref(Handle* handle) override { return target_->Ref(handle); } bool Release(Handle* handle, bool force_erase = false) override { return target_->Release(handle, force_erase); } void* Value(Handle* handle) override { return target_->Value(handle); } void Erase(const Slice& key) override { target_->Erase(key); } uint64_t NewId() override { return target_->NewId(); } void SetCapacity(size_t capacity) override { target_->SetCapacity(capacity); } void SetStrictCapacityLimit(bool strict_capacity_limit) override { target_->SetStrictCapacityLimit(strict_capacity_limit); } bool HasStrictCapacityLimit() const override { return target_->HasStrictCapacityLimit(); } size_t GetCapacity() const override { return target_->GetCapacity(); } size_t GetUsage() const override { return target_->GetUsage(); } size_t GetUsage(Handle* handle) const override { return target_->GetUsage(handle); } size_t GetPinnedUsage() const override { return target_->GetPinnedUsage(); } size_t GetCharge(Handle* handle) const override { return target_->GetCharge(handle); } void ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) override { target_->ApplyToAllCacheEntries(callback, thread_safe); } void EraseUnRefEntries() override { target_->EraseUnRefEntries(); } protected: std::shared_ptr target_; }; class DBTestBase : public testing::Test { public: // Sequence of option configurations to try enum OptionConfig : int { kDefault = 0, kBlockBasedTableWithPrefixHashIndex = 1, kBlockBasedTableWithWholeKeyHashIndex = 2, kPlainTableFirstBytePrefix = 3, kPlainTableCappedPrefix = 4, kPlainTableCappedPrefixNonMmap = 5, kPlainTableAllBytesPrefix = 6, kVectorRep = 7, kHashLinkList = 8, kMergePut = 9, kFilter = 10, kFullFilterWithNewTableReaderForCompactions = 11, kUncompressed = 12, kNumLevel_3 = 13, kDBLogDir = 14, kWalDirAndMmapReads = 15, kManifestFileSize = 16, kPerfOptions = 17, kHashSkipList = 18, kUniversalCompaction = 19, kUniversalCompactionMultiLevel = 20, kCompressedBlockCache = 21, kInfiniteMaxOpenFiles = 22, kxxHashChecksum = 23, kFIFOCompaction = 24, kOptimizeFiltersForHits = 25, kRowCache = 26, kRecycleLogFiles = 27, kConcurrentSkipList = 28, kPipelinedWrite = 29, kConcurrentWALWrites = 30, kDirectIO, kLevelSubcompactions, kBlockBasedTableWithIndexRestartInterval, kBlockBasedTableWithPartitionedIndex, kBlockBasedTableWithPartitionedIndexFormat4, kPartitionedFilterWithNewTableReaderForCompactions, kUniversalSubcompactions, kxxHash64Checksum, kUnorderedWrite, // This must be the last line kEnd, }; public: std::string dbname_; std::string alternative_wal_dir_; std::string alternative_db_log_dir_; MockEnv* mem_env_; Env* encrypted_env_; SpecialEnv* env_; std::shared_ptr env_guard_; DB* db_; std::vector handles_; int option_config_; Options last_options_; // Skip some options, as they may not be applicable to a specific test. // To add more skip constants, use values 4, 8, 16, etc. enum OptionSkip { kNoSkip = 0, kSkipDeletesFilterFirst = 1, kSkipUniversalCompaction = 2, kSkipMergePut = 4, kSkipPlainTable = 8, kSkipHashIndex = 16, kSkipNoSeekToLast = 32, kSkipFIFOCompaction = 128, kSkipMmapReads = 256, }; const int kRangeDelSkipConfigs = // Plain tables do not support range deletions. kSkipPlainTable | // MmapReads disables the iterator pinning that RangeDelAggregator // requires. kSkipMmapReads; explicit DBTestBase(const std::string path); ~DBTestBase(); static std::string RandomString(Random* rnd, int len) { std::string r; test::RandomString(rnd, len, &r); return r; } static std::string Key(int i) { char buf[100]; snprintf(buf, sizeof(buf), "key%06d", i); return std::string(buf); } static bool ShouldSkipOptions(int option_config, int skip_mask = kNoSkip); // Switch to a fresh database with the next option configuration to // test. Return false if there are no more configurations to test. bool ChangeOptions(int skip_mask = kNoSkip); // Switch between different compaction styles. bool ChangeCompactOptions(); // Switch between different WAL-realted options. bool ChangeWalOptions(); // Switch between different filter policy // Jump from kDefault to kFilter to kFullFilter bool ChangeFilterOptions(); // Switch between different DB options for file ingestion tests. bool ChangeOptionsForFileIngestionTest(); // Return the current option configuration. Options CurrentOptions(const anon::OptionsOverride& options_override = anon::OptionsOverride()) const; Options CurrentOptions(const Options& default_options, const anon::OptionsOverride& options_override = anon::OptionsOverride()) const; static Options GetDefaultOptions(); Options GetOptions(int option_config, const Options& default_options = GetDefaultOptions(), const anon::OptionsOverride& options_override = anon::OptionsOverride()) const; DBImpl* dbfull() { return reinterpret_cast(db_); } void CreateColumnFamilies(const std::vector& cfs, const Options& options); void CreateAndReopenWithCF(const std::vector& cfs, const Options& options); void ReopenWithColumnFamilies(const std::vector& cfs, const std::vector& options); void ReopenWithColumnFamilies(const std::vector& cfs, const Options& options); Status TryReopenWithColumnFamilies(const std::vector& cfs, const std::vector& options); Status TryReopenWithColumnFamilies(const std::vector& cfs, const Options& options); void Reopen(const Options& options); void Close(); void DestroyAndReopen(const Options& options); void Destroy(const Options& options, bool delete_cf_paths = false); Status ReadOnlyReopen(const Options& options); Status TryReopen(const Options& options); bool IsDirectIOSupported(); bool IsMemoryMappedAccessSupported() const; Status Flush(int cf = 0); Status Flush(const std::vector& cf_ids); Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()); Status Put(int cf, const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()); Status Merge(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()); Status Merge(int cf, const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()); Status Delete(const std::string& k); Status Delete(int cf, const std::string& k); Status SingleDelete(const std::string& k); Status SingleDelete(int cf, const std::string& k); bool SetPreserveDeletesSequenceNumber(SequenceNumber sn); std::string Get(const std::string& k, const Snapshot* snapshot = nullptr); std::string Get(int cf, const std::string& k, const Snapshot* snapshot = nullptr); Status Get(const std::string& k, PinnableSlice* v); std::vector MultiGet(std::vector cfs, const std::vector& k, const Snapshot* snapshot, const bool batched); std::vector MultiGet(const std::vector& k, const Snapshot* snapshot = nullptr); uint64_t GetNumSnapshots(); uint64_t GetTimeOldestSnapshots(); uint64_t GetSequenceOldestSnapshots(); // Return a string that contains all key,value pairs in order, // formatted like "(k1->v1)(k2->v2)". std::string Contents(int cf = 0); std::string AllEntriesFor(const Slice& user_key, int cf = 0); #ifndef ROCKSDB_LITE int NumSortedRuns(int cf = 0); uint64_t TotalSize(int cf = 0); uint64_t SizeAtLevel(int level); size_t TotalLiveFiles(int cf = 0); size_t CountLiveFiles(); int NumTableFilesAtLevel(int level, int cf = 0); double CompressionRatioAtLevel(int level, int cf = 0); int TotalTableFiles(int cf = 0, int levels = -1); #endif // ROCKSDB_LITE // Return spread of files per level std::string FilesPerLevel(int cf = 0); size_t CountFiles(); uint64_t Size(const Slice& start, const Slice& limit, int cf = 0); void Compact(int cf, const Slice& start, const Slice& limit, uint32_t target_path_id); void Compact(int cf, const Slice& start, const Slice& limit); void Compact(const Slice& start, const Slice& limit); // Do n memtable compactions, each of which produces an sstable // covering the range [small,large]. void MakeTables(int n, const std::string& small, const std::string& large, int cf = 0); // Prevent pushing of new sstables into deeper levels by adding // tables that cover a specified range to all levels. void FillLevels(const std::string& smallest, const std::string& largest, int cf); void MoveFilesToLevel(int level, int cf = 0); #ifndef ROCKSDB_LITE void DumpFileCounts(const char* label); #endif // ROCKSDB_LITE std::string DumpSSTableList(); static void GetSstFiles(Env* env, std::string path, std::vector* files); int GetSstFileCount(std::string path); // this will generate non-overlapping files since it keeps increasing key_idx void GenerateNewFile(Random* rnd, int* key_idx, bool nowait = false); void GenerateNewFile(int fd, Random* rnd, int* key_idx, bool nowait = false); static const int kNumKeysByGenerateNewRandomFile; static const int KNumKeysByGenerateNewFile = 100; void GenerateNewRandomFile(Random* rnd, bool nowait = false); std::string IterStatus(Iterator* iter); Options OptionsForLogIterTest(); std::string DummyString(size_t len, char c = 'a'); void VerifyIterLast(std::string expected_key, int cf = 0); // Used to test InplaceUpdate // If previous value is nullptr or delta is > than previous value, // sets newValue with delta // If previous value is not empty, // updates previous value with 'b' string of previous value size - 1. static UpdateStatus updateInPlaceSmallerSize(char* prevValue, uint32_t* prevSize, Slice delta, std::string* newValue); static UpdateStatus updateInPlaceSmallerVarintSize(char* prevValue, uint32_t* prevSize, Slice delta, std::string* newValue); static UpdateStatus updateInPlaceLargerSize(char* prevValue, uint32_t* prevSize, Slice delta, std::string* newValue); static UpdateStatus updateInPlaceNoAction(char* prevValue, uint32_t* prevSize, Slice delta, std::string* newValue); // Utility method to test InplaceUpdate void validateNumberOfEntries(int numValues, int cf = 0); void CopyFile(const std::string& source, const std::string& destination, uint64_t size = 0); std::unordered_map GetAllSSTFiles( uint64_t* total_size = nullptr); std::vector ListTableFiles(Env* env, const std::string& path); void VerifyDBFromMap( std::map true_data, size_t* total_reads_res = nullptr, bool tailing_iter = false, std::map status = std::map()); void VerifyDBInternal( std::vector> true_data); #ifndef ROCKSDB_LITE uint64_t GetNumberOfSstFilesForColumnFamily(DB* db, std::string column_family_name); #endif // ROCKSDB_LITE uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) { return options.statistics->getTickerCount(ticker_type); } uint64_t TestGetAndResetTickerCount(const Options& options, Tickers ticker_type) { return options.statistics->getAndResetTickerCount(ticker_type); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/db_universal_compaction_test.cc000066400000000000000000002326161370372246700221030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/stack_trace.h" #if !defined(ROCKSDB_LITE) #include "rocksdb/utilities/table_properties_collectors.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { static std::string CompressibleString(Random* rnd, int len) { std::string r; test::CompressibleString(rnd, 0.8, len, &r); return r; } class DBTestUniversalCompactionBase : public DBTestBase, public ::testing::WithParamInterface> { public: explicit DBTestUniversalCompactionBase( const std::string& path) : DBTestBase(path) {} void SetUp() override { num_levels_ = std::get<0>(GetParam()); exclusive_manual_compaction_ = std::get<1>(GetParam()); } int num_levels_; bool exclusive_manual_compaction_; }; class DBTestUniversalCompaction : public DBTestUniversalCompactionBase { public: DBTestUniversalCompaction() : DBTestUniversalCompactionBase("/db_universal_compaction_test") {} }; class DBTestUniversalCompaction2 : public DBTestBase { public: DBTestUniversalCompaction2() : DBTestBase("/db_universal_compaction_test2") {} }; namespace { void VerifyCompactionResult( const ColumnFamilyMetaData& cf_meta, const std::set& overlapping_file_numbers) { #ifndef NDEBUG for (auto& level : cf_meta.levels) { for (auto& file : level.files) { assert(overlapping_file_numbers.find(file.name) == overlapping_file_numbers.end()); } } #endif } class KeepFilter : public CompactionFilter { public: bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { return false; } const char* Name() const override { return "KeepFilter"; } }; class KeepFilterFactory : public CompactionFilterFactory { public: explicit KeepFilterFactory(bool check_context = false) : check_context_(check_context) {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { if (check_context_) { EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction); EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction); } return std::unique_ptr(new KeepFilter()); } const char* Name() const override { return "KeepFilterFactory"; } bool check_context_; std::atomic_bool expect_full_compaction_; std::atomic_bool expect_manual_compaction_; }; class DelayFilter : public CompactionFilter { public: explicit DelayFilter(DBTestBase* d) : db_test(d) {} bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { db_test->env_->addon_time_.fetch_add(1000); return true; } const char* Name() const override { return "DelayFilter"; } private: DBTestBase* db_test; }; class DelayFilterFactory : public CompactionFilterFactory { public: explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {} std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { return std::unique_ptr(new DelayFilter(db_test)); } const char* Name() const override { return "DelayFilterFactory"; } private: DBTestBase* db_test; }; } // namespace // Make sure we don't trigger a problem if the trigger condtion is given // to be 0, which is invalid. TEST_P(DBTestUniversalCompaction, UniversalCompactionSingleSortedRun) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; // Config universal compaction to always compact to one single sorted run. options.level0_file_num_compaction_trigger = 0; options.compaction_options_universal.size_ratio = 10; options.compaction_options_universal.min_merge_width = 2; options.compaction_options_universal.max_size_amplification_percent = 0; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files KeepFilterFactory* filter = new KeepFilterFactory(true); filter->expect_manual_compaction_.store(false); options.compaction_filter_factory.reset(filter); DestroyAndReopen(options); ASSERT_EQ(1, db_->GetOptions().level0_file_num_compaction_trigger); Random rnd(301); int key_idx = 0; filter->expect_full_compaction_.store(true); for (int num = 0; num < 16; num++) { // Write 100KB file. And immediately it should be compacted to one file. GenerateNewFile(&rnd, &key_idx); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumSortedRuns(0), 1); } ASSERT_OK(Put(Key(key_idx), "")); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumSortedRuns(0), 1); } TEST_P(DBTestUniversalCompaction, OptimizeFiltersForHits) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.size_ratio = 5; options.num_levels = num_levels_; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; BlockBasedTableOptions bbto; bbto.cache_index_and_filter_blocks = true; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.optimize_filters_for_hits = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.memtable_factory.reset(new SpecialSkipListFactory(3)); DestroyAndReopen(options); // block compaction from happening env_->SetBackgroundThreads(1, Env::LOW); test::SleepingBackgroundTask sleeping_task_low; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::LOW); for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { Put(Key(num * 10), "val"); if (num) { dbfull()->TEST_WaitForFlushMemTable(); } Put(Key(30 + num * 10), "val"); Put(Key(60 + num * 10), "val"); } Put("", ""); dbfull()->TEST_WaitForFlushMemTable(); // Query set of non existing keys for (int i = 5; i < 90; i += 10) { ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); } // Make sure bloom filter is used at least once. ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); auto prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); // Make sure bloom filter is used for all but the last L0 file when looking // up a non-existent key that's in the range of all L0 files. ASSERT_EQ(Get(Key(35)), "NOT_FOUND"); ASSERT_EQ(prev_counter + NumTableFilesAtLevel(0) - 1, TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); prev_counter = TestGetTickerCount(options, BLOOM_FILTER_USEFUL); // Unblock compaction and wait it for happening. sleeping_task_low.WakeUp(); dbfull()->TEST_WaitForCompact(); // The same queries will not trigger bloom filter for (int i = 5; i < 90; i += 10) { ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); } ASSERT_EQ(prev_counter, TestGetTickerCount(options, BLOOM_FILTER_USEFUL)); } // TODO(kailiu) The tests on UniversalCompaction has some issues: // 1. A lot of magic numbers ("11" or "12"). // 2. Made assumption on the memtable flush conditions, which may change from // time to time. TEST_P(DBTestUniversalCompaction, UniversalCompactionTrigger) { Options options; options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.size_ratio = 5; options.num_levels = num_levels_; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; KeepFilterFactory* filter = new KeepFilterFactory(true); filter->expect_manual_compaction_.store(false); options.compaction_filter_factory.reset(filter); options = CurrentOptions(options); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBTestWritableFile.GetPreallocationStatus", [&](void* arg) { ASSERT_TRUE(arg != nullptr); size_t preallocation_size = *(static_cast(arg)); if (num_levels_ > 3) { ASSERT_LE(preallocation_size, options.target_file_size_base * 1.1); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); int key_idx = 0; filter->expect_full_compaction_.store(true); // Stage 1: // Generate a set of files at level 0, but don't trigger level-0 // compaction. for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { // Write 100KB GenerateNewFile(1, &rnd, &key_idx); } // Generate one more file at level-0, which should trigger level-0 // compaction. GenerateNewFile(1, &rnd, &key_idx); // Suppose each file flushed from mem table has size 1. Now we compact // (level0_file_num_compaction_trigger+1)=4 files and should have a big // file of size 4. ASSERT_EQ(NumSortedRuns(1), 1); // Stage 2: // Now we have one file at level 0, with size 4. We also have some data in // mem table. Let's continue generating new files at level 0, but don't // trigger level-0 compaction. // First, clean up memtable before inserting new data. This will generate // a level-0 file, with size around 0.4 (according to previously written // data amount). filter->expect_full_compaction_.store(false); ASSERT_OK(Flush(1)); for (int num = 0; num < options.level0_file_num_compaction_trigger - 3; num++) { GenerateNewFile(1, &rnd, &key_idx); ASSERT_EQ(NumSortedRuns(1), num + 3); } // Generate one more file at level-0, which should trigger level-0 // compaction. GenerateNewFile(1, &rnd, &key_idx); // Before compaction, we have 4 files at level 0, with size 4, 0.4, 1, 1. // After compaction, we should have 2 files, with size 4, 2.4. ASSERT_EQ(NumSortedRuns(1), 2); // Stage 3: // Now we have 2 files at level 0, with size 4 and 2.4. Continue // generating new files at level 0. for (int num = 0; num < options.level0_file_num_compaction_trigger - 3; num++) { GenerateNewFile(1, &rnd, &key_idx); ASSERT_EQ(NumSortedRuns(1), num + 3); } // Generate one more file at level-0, which should trigger level-0 // compaction. GenerateNewFile(1, &rnd, &key_idx); // Before compaction, we have 4 files at level 0, with size 4, 2.4, 1, 1. // After compaction, we should have 3 files, with size 4, 2.4, 2. ASSERT_EQ(NumSortedRuns(1), 3); // Stage 4: // Now we have 3 files at level 0, with size 4, 2.4, 2. Let's generate a // new file of size 1. GenerateNewFile(1, &rnd, &key_idx); dbfull()->TEST_WaitForCompact(); // Level-0 compaction is triggered, but no file will be picked up. ASSERT_EQ(NumSortedRuns(1), 4); // Stage 5: // Now we have 4 files at level 0, with size 4, 2.4, 2, 1. Let's generate // a new file of size 1. filter->expect_full_compaction_.store(true); GenerateNewFile(1, &rnd, &key_idx); dbfull()->TEST_WaitForCompact(); // All files at level 0 will be compacted into a single one. ASSERT_EQ(NumSortedRuns(1), 1); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBTestUniversalCompaction, UniversalCompactionSizeAmplification) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Trigger compaction if size amplification exceeds 110% options.compaction_options_universal.max_size_amplification_percent = 110; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); Random rnd(301); int key_idx = 0; // Generate two files in Level 0. Both files are approx the same size. for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 11; i++) { ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); ASSERT_EQ(NumSortedRuns(1), num + 1); } ASSERT_EQ(NumSortedRuns(1), 2); // Flush whatever is remaining in memtable. This is typically // small, which should not trigger size ratio based compaction // but will instead trigger size amplification. ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); // Verify that size amplification did occur ASSERT_EQ(NumSortedRuns(1), 1); } TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionSizeAmplification) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; // Initial setup of compaction_options_universal will prevent universal // compaction from happening options.compaction_options_universal.size_ratio = 100; options.compaction_options_universal.min_merge_width = 100; DestroyAndReopen(options); int total_picked_compactions = 0; int total_size_amp_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { if (arg) { total_picked_compactions++; Compaction* c = static_cast(arg); if (c->compaction_reason() == CompactionReason::kUniversalSizeAmplification) { total_size_amp_compactions++; } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); MutableCFOptions mutable_cf_options; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); int key_idx = 0; // Generate two files in Level 0. Both files are approx the same size. for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 11; i++) { ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); ASSERT_EQ(NumSortedRuns(1), num + 1); } ASSERT_EQ(NumSortedRuns(1), 2); // Flush whatever is remaining in memtable. This is typically // small, which should not trigger size ratio based compaction // but could instead trigger size amplification if it's set // to 110. ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); // Verify compaction did not happen ASSERT_EQ(NumSortedRuns(1), 3); // Trigger compaction if size amplification exceeds 110% without reopening DB ASSERT_EQ(dbfull() ->GetOptions(handles_[1]) .compaction_options_universal.max_size_amplification_percent, 200U); ASSERT_OK(dbfull()->SetOptions(handles_[1], {{"compaction_options_universal", "{max_size_amplification_percent=110;}"}})); ASSERT_EQ(dbfull() ->GetOptions(handles_[1]) .compaction_options_universal.max_size_amplification_percent, 110u); ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], &mutable_cf_options)); ASSERT_EQ(110u, mutable_cf_options.compaction_options_universal .max_size_amplification_percent); dbfull()->TEST_WaitForCompact(); // Verify that size amplification did happen ASSERT_EQ(NumSortedRuns(1), 1); ASSERT_EQ(total_picked_compactions, 1); ASSERT_EQ(total_size_amp_compactions, 1); } TEST_P(DBTestUniversalCompaction, DynamicUniversalCompactionReadAmplification) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 3; // Initial setup of compaction_options_universal will prevent universal // compaction from happening options.compaction_options_universal.max_size_amplification_percent = 2000; options.compaction_options_universal.size_ratio = 0; options.compaction_options_universal.min_merge_width = 100; DestroyAndReopen(options); int total_picked_compactions = 0; int total_size_ratio_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { if (arg) { total_picked_compactions++; Compaction* c = static_cast(arg); if (c->compaction_reason() == CompactionReason::kUniversalSizeRatio) { total_size_ratio_compactions++; } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); MutableCFOptions mutable_cf_options; CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); int key_idx = 0; // Generate three files in Level 0. All files are approx the same size. for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 11; i++) { ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); ASSERT_EQ(NumSortedRuns(1), num + 1); } ASSERT_EQ(NumSortedRuns(1), options.level0_file_num_compaction_trigger); // Flush whatever is remaining in memtable. This is typically small, about // 30KB. ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); // Verify compaction did not happen ASSERT_EQ(NumSortedRuns(1), options.level0_file_num_compaction_trigger + 1); ASSERT_EQ(total_picked_compactions, 0); ASSERT_OK(dbfull()->SetOptions( handles_[1], {{"compaction_options_universal", "{min_merge_width=2;max_merge_width=2;size_ratio=100;}"}})); ASSERT_EQ(dbfull() ->GetOptions(handles_[1]) .compaction_options_universal.min_merge_width, 2u); ASSERT_EQ(dbfull() ->GetOptions(handles_[1]) .compaction_options_universal.max_merge_width, 2u); ASSERT_EQ( dbfull()->GetOptions(handles_[1]).compaction_options_universal.size_ratio, 100u); ASSERT_OK(dbfull()->TEST_GetLatestMutableCFOptions(handles_[1], &mutable_cf_options)); ASSERT_EQ(mutable_cf_options.compaction_options_universal.size_ratio, 100u); ASSERT_EQ(mutable_cf_options.compaction_options_universal.min_merge_width, 2u); ASSERT_EQ(mutable_cf_options.compaction_options_universal.max_merge_width, 2u); dbfull()->TEST_WaitForCompact(); // Files in L0 are approx: 0.3 (30KB), 1, 1, 1. // On compaction: the files are below the size amp threshold, so we // fallthrough to checking read amp conditions. The configured size ratio is // not big enough to take 0.3 into consideration. So the next files 1 and 1 // are compacted together first as they satisfy size ratio condition and // (min_merge_width, max_merge_width) condition, to give out a file size of 2. // Next, the newly generated 2 and the last file 1 are compacted together. So // at the end: #sortedRuns = 2, #picked_compactions = 2, and all the picked // ones are size ratio based compactions. ASSERT_EQ(NumSortedRuns(1), 2); // If max_merge_width had not been changed dynamically above, and if it // continued to be the default value of UINIT_MAX, total_picked_compactions // would have been 1. ASSERT_EQ(total_picked_compactions, 2); ASSERT_EQ(total_size_ratio_compactions, 2); } TEST_P(DBTestUniversalCompaction, CompactFilesOnUniversalCompaction) { const int kTestKeySize = 16; const int kTestValueSize = 984; const int kEntrySize = kTestKeySize + kTestValueSize; const int kEntriesPerBuffer = 10; ChangeCompactOptions(); Options options; options.create_if_missing = true; options.compaction_style = kCompactionStyleLevel; options.num_levels = 1; options.target_file_size_base = options.write_buffer_size; options.compression = kNoCompression; options = CurrentOptions(options); options.write_buffer_size = kEntrySize * kEntriesPerBuffer; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_EQ(options.compaction_style, kCompactionStyleUniversal); Random rnd(301); for (int key = 1024 * kEntriesPerBuffer; key >= 0; --key) { ASSERT_OK(Put(1, ToString(key), RandomString(&rnd, kTestValueSize))); } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForCompact(); ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); std::vector compaction_input_file_names; for (auto file : cf_meta.levels[0].files) { if (rnd.OneIn(2)) { compaction_input_file_names.push_back(file.name); } } if (compaction_input_file_names.size() == 0) { compaction_input_file_names.push_back( cf_meta.levels[0].files[0].name); } // expect fail since universal compaction only allow L0 output ASSERT_FALSE(dbfull() ->CompactFiles(CompactionOptions(), handles_[1], compaction_input_file_names, 1) .ok()); // expect ok and verify the compacted files no longer exist. ASSERT_OK(dbfull()->CompactFiles( CompactionOptions(), handles_[1], compaction_input_file_names, 0)); dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); VerifyCompactionResult( cf_meta, std::set(compaction_input_file_names.begin(), compaction_input_file_names.end())); compaction_input_file_names.clear(); // Pick the first and the last file, expect everything is // compacted into one single file. compaction_input_file_names.push_back( cf_meta.levels[0].files[0].name); compaction_input_file_names.push_back( cf_meta.levels[0].files[ cf_meta.levels[0].files.size() - 1].name); ASSERT_OK(dbfull()->CompactFiles( CompactionOptions(), handles_[1], compaction_input_file_names, 0)); dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); ASSERT_EQ(cf_meta.levels[0].files.size(), 1U); } TEST_P(DBTestUniversalCompaction, UniversalCompactionTargetLevel) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 100 << 10; // 100KB options.num_levels = 7; options.disable_auto_compactions = true; DestroyAndReopen(options); // Generate 3 overlapping files Random rnd(301); for (int i = 0; i < 210; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 100))); } ASSERT_OK(Flush()); for (int i = 200; i < 300; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 100))); } ASSERT_OK(Flush()); for (int i = 250; i < 260; i++) { ASSERT_OK(Put(Key(i), RandomString(&rnd, 100))); } ASSERT_OK(Flush()); ASSERT_EQ("3", FilesPerLevel(0)); // Compact all files into 1 file and put it in L4 CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 4; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; db_->CompactRange(compact_options, nullptr, nullptr); ASSERT_EQ("0,0,0,0,1", FilesPerLevel(0)); } #ifndef ROCKSDB_VALGRIND_RUN class DBTestUniversalCompactionMultiLevels : public DBTestUniversalCompactionBase { public: DBTestUniversalCompactionMultiLevels() : DBTestUniversalCompactionBase( "/db_universal_compaction_multi_levels_test") {} }; TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.write_buffer_size = 100 << 10; // 100KB options.level0_file_num_compaction_trigger = 8; options.max_background_compactions = 3; options.target_file_size_base = 32 * 1024; CreateAndReopenWithCF({"pikachu"}, options); // Trigger compaction if size amplification exceeds 110% options.compaction_options_universal.max_size_amplification_percent = 110; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); Random rnd(301); int num_keys = 100000; for (int i = 0; i < num_keys * 2; i++) { ASSERT_OK(Put(1, Key(i % num_keys), Key(i))); } dbfull()->TEST_WaitForCompact(); for (int i = num_keys; i < num_keys * 2; i++) { ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i)); } } // Tests universal compaction with trivial move enabled TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionTrivialMove) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { non_trivial_move++; ASSERT_TRUE(arg != nullptr); int output_level = *(static_cast(arg)); ASSERT_EQ(output_level, 0); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.allow_trivial_move = true; options.num_levels = 3; options.write_buffer_size = 100 << 10; // 100KB options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 2; options.target_file_size_base = 32 * 1024; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Trigger compaction if size amplification exceeds 110% options.compaction_options_universal.max_size_amplification_percent = 110; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); Random rnd(301); int num_keys = 150000; for (int i = 0; i < num_keys; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } std::vector values; ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); ASSERT_GT(trivial_move, 0); ASSERT_GT(non_trivial_move, 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } INSTANTIATE_TEST_CASE_P(MultiLevels, DBTestUniversalCompactionMultiLevels, ::testing::Combine(::testing::Values(3, 20), ::testing::Bool())); class DBTestUniversalCompactionParallel : public DBTestUniversalCompactionBase { public: DBTestUniversalCompactionParallel() : DBTestUniversalCompactionBase( "/db_universal_compaction_prallel_test") {} }; TEST_P(DBTestUniversalCompactionParallel, UniversalCompactionParallel) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.write_buffer_size = 1 << 10; // 1KB options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 3; options.max_background_flushes = 3; options.target_file_size_base = 1 * 1024; options.compaction_options_universal.max_size_amplification_percent = 110; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Delay every compaction so multiple compactions will happen. std::atomic num_compactions_running(0); std::atomic has_parallel(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():Start", [&](void* /*arg*/) { if (num_compactions_running.fetch_add(1) > 0) { has_parallel.store(true); return; } for (int nwait = 0; nwait < 20000; nwait++) { if (has_parallel.load() || num_compactions_running.load() > 1) { has_parallel.store(true); break; } env_->SleepForMicroseconds(1000); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():End", [&](void* /*arg*/) { num_compactions_running.fetch_add(-1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); Random rnd(301); int num_keys = 30000; for (int i = 0; i < num_keys * 2; i++) { ASSERT_OK(Put(1, Key(i % num_keys), Key(i))); } dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(num_compactions_running.load(), 0); ASSERT_TRUE(has_parallel.load()); for (int i = num_keys; i < num_keys * 2; i++) { ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i)); } // Reopen and check. ReopenWithColumnFamilies({"default", "pikachu"}, options); for (int i = num_keys; i < num_keys * 2; i++) { ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i)); } } TEST_P(DBTestUniversalCompactionParallel, PickByFileNumberBug) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.write_buffer_size = 1 * 1024; // 1KB options.level0_file_num_compaction_trigger = 7; options.max_background_compactions = 2; options.target_file_size_base = 1024 * 1024; // 1MB // Disable size amplifiction compaction options.compaction_options_universal.max_size_amplification_percent = UINT_MAX; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBTestUniversalCompactionParallel::PickByFileNumberBug:0", "BackgroundCallCompaction:0"}, {"UniversalCompactionBuilder::PickCompaction:Return", "DBTestUniversalCompactionParallel::PickByFileNumberBug:1"}, {"DBTestUniversalCompactionParallel::PickByFileNumberBug:2", "CompactionJob::Run():Start"}}); int total_picked_compactions = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { if (arg) { total_picked_compactions++; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Write 7 files to trigger compaction int key_idx = 1; for (int i = 1; i <= 70; i++) { std::string k = Key(key_idx++); ASSERT_OK(Put(k, k)); if (i % 10 == 0) { ASSERT_OK(Flush()); } } // Wait for the 1st background compaction process to start TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:0"); TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:1"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); // Write 3 files while 1st compaction is held // These 3 files have different sizes to avoid compacting based on size_ratio int num_keys = 1000; for (int i = 0; i < 3; i++) { for (int j = 1; j <= num_keys; j++) { std::string k = Key(key_idx++); ASSERT_OK(Put(k, k)); } ASSERT_OK(Flush()); num_keys -= 100; } // Hold the 1st compaction from finishing TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:2"); dbfull()->TEST_WaitForCompact(); // There should only be one picked compaction as the score drops below one // after the first one is picked. EXPECT_EQ(total_picked_compactions, 1); EXPECT_EQ(TotalTableFiles(), 4); // Stop SyncPoint and destroy the DB and reopen it again ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); key_idx = 1; total_picked_compactions = 0; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Write 7 files to trigger compaction for (int i = 1; i <= 70; i++) { std::string k = Key(key_idx++); ASSERT_OK(Put(k, k)); if (i % 10 == 0) { ASSERT_OK(Flush()); } } // Wait for the 1st background compaction process to start TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:0"); TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:1"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); // Write 8 files while 1st compaction is held // These 8 files have different sizes to avoid compacting based on size_ratio num_keys = 1000; for (int i = 0; i < 8; i++) { for (int j = 1; j <= num_keys; j++) { std::string k = Key(key_idx++); ASSERT_OK(Put(k, k)); } ASSERT_OK(Flush()); num_keys -= 100; } // Wait for the 2nd background compaction process to start TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:0"); TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:1"); // Hold the 1st and 2nd compaction from finishing TEST_SYNC_POINT("DBTestUniversalCompactionParallel::PickByFileNumberBug:2"); dbfull()->TEST_WaitForCompact(); // This time we will trigger a compaction because of size ratio and // another compaction because of number of files that are not compacted // greater than 7 EXPECT_GE(total_picked_compactions, 2); } INSTANTIATE_TEST_CASE_P(Parallel, DBTestUniversalCompactionParallel, ::testing::Combine(::testing::Values(1, 10), ::testing::Values(false))); #endif // ROCKSDB_VALGRIND_RUN TEST_P(DBTestUniversalCompaction, UniversalCompactionOptions) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; // 4KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 4; options.num_levels = num_levels_; options.compaction_options_universal.compression_size_percent = -1; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); int key_idx = 0; for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { // Write 100KB (100 values, each 1K) for (int i = 0; i < 100; i++) { ASSERT_OK(Put(1, Key(key_idx), RandomString(&rnd, 990))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(handles_[1]); if (num < options.level0_file_num_compaction_trigger - 1) { ASSERT_EQ(NumSortedRuns(1), num + 1); } } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumSortedRuns(1), 1); } TEST_P(DBTestUniversalCompaction, UniversalCompactionStopStyleSimilarSize) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; // 4KB options.target_file_size_base = 32 << 10; // 32KB // trigger compaction if there are >= 4 files options.level0_file_num_compaction_trigger = 4; options.compaction_options_universal.size_ratio = 10; options.compaction_options_universal.stop_style = kCompactionStopStyleSimilarSize; options.num_levels = num_levels_; DestroyAndReopen(options); Random rnd(301); int key_idx = 0; // Stage 1: // Generate a set of files at level 0, but don't trigger level-0 // compaction. for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { // Write 100KB (100 values, each 1K) for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 990))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(NumSortedRuns(), num + 1); } // Generate one more file at level-0, which should trigger level-0 // compaction. for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 990))); key_idx++; } dbfull()->TEST_WaitForCompact(); // Suppose each file flushed from mem table has size 1. Now we compact // (level0_file_num_compaction_trigger+1)=4 files and should have a big // file of size 4. ASSERT_EQ(NumSortedRuns(), 1); // Stage 2: // Now we have one file at level 0, with size 4. We also have some data in // mem table. Let's continue generating new files at level 0, but don't // trigger level-0 compaction. // First, clean up memtable before inserting new data. This will generate // a level-0 file, with size around 0.4 (according to previously written // data amount). dbfull()->Flush(FlushOptions()); for (int num = 0; num < options.level0_file_num_compaction_trigger - 3; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 990))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(NumSortedRuns(), num + 3); } // Generate one more file at level-0, which should trigger level-0 // compaction. for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 990))); key_idx++; } dbfull()->TEST_WaitForCompact(); // Before compaction, we have 4 files at level 0, with size 4, 0.4, 1, 1. // After compaction, we should have 3 files, with size 4, 0.4, 2. ASSERT_EQ(NumSortedRuns(), 3); // Stage 3: // Now we have 3 files at level 0, with size 4, 0.4, 2. Generate one // more file at level-0, which should trigger level-0 compaction. for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(key_idx), RandomString(&rnd, 990))); key_idx++; } dbfull()->TEST_WaitForCompact(); // Level-0 compaction is triggered, but no file will be picked up. ASSERT_EQ(NumSortedRuns(), 4); } TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio1) { if (!Snappy_Supported()) { return; } Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 2; options.num_levels = num_levels_; options.compaction_options_universal.compression_size_percent = 70; DestroyAndReopen(options); Random rnd(301); int key_idx = 0; // The first compaction (2) is compressed. for (int num = 0; num < 2; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 11; i++) { ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } ASSERT_LT(TotalSize(), 110000U * 2 * 0.9); // The second compaction (4) is compressed for (int num = 0; num < 2; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 11; i++) { ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } ASSERT_LT(TotalSize(), 110000 * 4 * 0.9); // The third compaction (2 4) is compressed since this time it is // (1 1 3.2) and 3.2/5.2 doesn't reach ratio. for (int num = 0; num < 2; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 11; i++) { ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } ASSERT_LT(TotalSize(), 110000 * 6 * 0.9); // When we start for the compaction up to (2 4 8), the latest // compressed is not compressed. for (int num = 0; num < 8; num++) { // Write 110KB (11 values, each 10K) for (int i = 0; i < 11; i++) { ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } ASSERT_GT(TotalSize(), 110000 * 11 * 0.8 + 110000 * 2); } TEST_P(DBTestUniversalCompaction, UniversalCompactionCompressRatio2) { if (!Snappy_Supported()) { return; } Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = 2; options.num_levels = num_levels_; options.compaction_options_universal.compression_size_percent = 95; DestroyAndReopen(options); Random rnd(301); int key_idx = 0; // When we start for the compaction up to (2 4 8), the latest // compressed is compressed given the size ratio to compress. for (int num = 0; num < 14; num++) { // Write 120KB (12 values, each 10K) for (int i = 0; i < 12; i++) { ASSERT_OK(Put(Key(key_idx), CompressibleString(&rnd, 10000))); key_idx++; } dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); } ASSERT_LT(TotalSize(), 120000U * 12 * 0.82 + 120000 * 2); } #ifndef ROCKSDB_VALGRIND_RUN // Test that checks trivial move in universal compaction TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest1) { int32_t trivial_move = 0; int32_t non_trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { non_trivial_move++; ASSERT_TRUE(arg != nullptr); int output_level = *(static_cast(arg)); ASSERT_EQ(output_level, 0); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.allow_trivial_move = true; options.num_levels = 2; options.write_buffer_size = 100 << 10; // 100KB options.level0_file_num_compaction_trigger = 3; options.max_background_compactions = 1; options.target_file_size_base = 32 * 1024; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Trigger compaction if size amplification exceeds 110% options.compaction_options_universal.max_size_amplification_percent = 110; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); Random rnd(301); int num_keys = 250000; for (int i = 0; i < num_keys; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } std::vector values; ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); ASSERT_GT(trivial_move, 0); ASSERT_GT(non_trivial_move, 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } // Test that checks trivial move in universal compaction TEST_P(DBTestUniversalCompaction, UniversalCompactionTrivialMoveTest2) { int32_t trivial_move = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:TrivialMove", [&](void* /*arg*/) { trivial_move++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial", [&](void* arg) { ASSERT_TRUE(arg != nullptr); int output_level = *(static_cast(arg)); ASSERT_EQ(output_level, 0); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.allow_trivial_move = true; options.num_levels = 15; options.write_buffer_size = 100 << 10; // 100KB options.level0_file_num_compaction_trigger = 8; options.max_background_compactions = 2; options.target_file_size_base = 64 * 1024; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); // Trigger compaction if size amplification exceeds 110% options.compaction_options_universal.max_size_amplification_percent = 110; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); Random rnd(301); int num_keys = 500000; for (int i = 0; i < num_keys; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } std::vector values; ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); ASSERT_GT(trivial_move, 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } #endif // ROCKSDB_VALGRIND_RUN TEST_P(DBTestUniversalCompaction, UniversalCompactionFourPaths) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 300 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 300 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 500 * 1024); options.db_paths.emplace_back(dbname_ + "_4", 1024 * 1024 * 1024); options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; std::vector filenames; env_->GetChildren(options.db_paths[1].path, &filenames); // Delete archival files. for (size_t i = 0; i < filenames.size(); ++i) { env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); } env_->DeleteDir(options.db_paths[1].path); Reopen(options); Random rnd(301); int key_idx = 0; // First three 110KB files are not going to second path. // After that, (100K, 200K) for (int num = 0; num < 3; num++) { GenerateNewFile(&rnd, &key_idx); } // Another 110KB triggers a compaction to 400K file to second path GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); // (1, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1,1,4) -> (2, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // (1, 2, 4) -> (3, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // (1, 3, 4) -> (8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); // (1, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 1, 8) -> (2, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); // (1, 2, 8) -> (3, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // (1, 3, 8) -> (4, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); // (1, 4, 8) -> (5, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[3].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } TEST_P(DBTestUniversalCompaction, UniversalCompactionCFPathUse) { Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 300 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 300 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 500 * 1024); options.db_paths.emplace_back(dbname_ + "_4", 1024 * 1024 * 1024); options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.size_ratio = 10; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; std::vector option_vector; option_vector.emplace_back(options); ColumnFamilyOptions cf_opt1(options), cf_opt2(options); // Configure CF1 specific paths. cf_opt1.cf_paths.emplace_back(dbname_ + "cf1", 300 * 1024); cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_2", 300 * 1024); cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_3", 500 * 1024); cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_4", 1024 * 1024 * 1024); option_vector.emplace_back(DBOptions(options), cf_opt1); CreateColumnFamilies({"one"},option_vector[1]); // Configura CF2 specific paths. cf_opt2.cf_paths.emplace_back(dbname_ + "cf2", 300 * 1024); cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_2", 300 * 1024); cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_3", 500 * 1024); cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_4", 1024 * 1024 * 1024); option_vector.emplace_back(DBOptions(options), cf_opt2); CreateColumnFamilies({"two"},option_vector[2]); ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); Random rnd(301); int key_idx = 0; int key_idx1 = 0; int key_idx2 = 0; auto generate_file = [&]() { GenerateNewFile(0, &rnd, &key_idx); GenerateNewFile(1, &rnd, &key_idx1); GenerateNewFile(2, &rnd, &key_idx2); }; auto check_sstfilecount = [&](int path_id, int expected) { ASSERT_EQ(expected, GetSstFileCount(options.db_paths[path_id].path)); ASSERT_EQ(expected, GetSstFileCount(cf_opt1.cf_paths[path_id].path)); ASSERT_EQ(expected, GetSstFileCount(cf_opt2.cf_paths[path_id].path)); }; auto check_getvalues = [&]() { for (int i = 0; i < key_idx; i++) { auto v = Get(0, Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } for (int i = 0; i < key_idx1; i++) { auto v = Get(1, Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } for (int i = 0; i < key_idx2; i++) { auto v = Get(2, Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } }; // First three 110KB files are not going to second path. // After that, (100K, 200K) for (int num = 0; num < 3; num++) { generate_file(); } // Another 110KB triggers a compaction to 400K file to second path generate_file(); check_sstfilecount(2, 1); // (1, 4) generate_file(); check_sstfilecount(2, 1); check_sstfilecount(0, 1); // (1,1,4) -> (2, 4) generate_file(); check_sstfilecount(2, 1); check_sstfilecount(1, 1); check_sstfilecount(0, 0); // (1, 2, 4) -> (3, 4) generate_file(); check_sstfilecount(2, 1); check_sstfilecount(1, 1); check_sstfilecount(0, 0); // (1, 3, 4) -> (8) generate_file(); check_sstfilecount(3, 1); // (1, 8) generate_file(); check_sstfilecount(3, 1); check_sstfilecount(0, 1); // (1, 1, 8) -> (2, 8) generate_file(); check_sstfilecount(3, 1); check_sstfilecount(1, 1); // (1, 2, 8) -> (3, 8) generate_file(); check_sstfilecount(3, 1); check_sstfilecount(1, 1); check_sstfilecount(0, 0); // (1, 3, 8) -> (4, 8) generate_file(); check_sstfilecount(2, 1); check_sstfilecount(3, 1); // (1, 4, 8) -> (5, 8) generate_file(); check_sstfilecount(3, 1); check_sstfilecount(2, 1); check_sstfilecount(0, 0); check_getvalues(); ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); check_getvalues(); Destroy(options, true); } TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { std::function verify_func = [&](int num_keys_in_db) { std::string keys_in_db; Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { keys_in_db.append(iter->key().ToString()); keys_in_db.push_back(','); } delete iter; std::string expected_keys; for (int i = 0; i <= num_keys_in_db; i++) { expected_keys.append(Key(i)); expected_keys.push_back(','); } ASSERT_EQ(keys_in_db, expected_keys); }; Random rnd(301); int max_key1 = 200; int max_key2 = 600; int max_key3 = 800; const int KNumKeysPerFile = 10; // Stage 1: open a DB with universal compaction, num_levels=1 Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options.write_buffer_size = 200 << 10; // 200KB options.level0_file_num_compaction_trigger = 3; options.memtable_factory.reset(new SpecialSkipListFactory(KNumKeysPerFile)); options = CurrentOptions(options); CreateAndReopenWithCF({"pikachu"}, options); for (int i = 0; i <= max_key1; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForCompact(); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); // Stage 2: reopen with universal compaction, num_levels=4 options.compaction_style = kCompactionStyleUniversal; options.num_levels = 4; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); verify_func(max_key1); // Insert more keys for (int i = max_key1 + 1; i <= max_key2; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForCompact(); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); verify_func(max_key2); // Compaction to non-L0 has happened. ASSERT_GT(NumTableFilesAtLevel(options.num_levels - 1, 1), 0); // Stage 3: Revert it back to one level and revert to num_levels=1. options.num_levels = 4; options.target_file_size_base = INT_MAX; ReopenWithColumnFamilies({"default", "pikachu"}, options); // Compact all to level 0 CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 0; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr); // Need to restart it once to remove higher level records in manifest. ReopenWithColumnFamilies({"default", "pikachu"}, options); // Final reopen options.compaction_style = kCompactionStyleUniversal; options.num_levels = 1; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); // Insert more keys for (int i = max_key2 + 1; i <= max_key3; i++) { // each value is 10K ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); dbfull()->TEST_WaitForFlushMemTable(handles_[1]); dbfull()->TEST_WaitForCompact(); } ASSERT_OK(Flush(1)); dbfull()->TEST_WaitForCompact(); verify_func(max_key3); } TEST_P(DBTestUniversalCompaction, UniversalCompactionSecondPathRatio) { if (!Snappy_Supported()) { return; } Options options = CurrentOptions(); options.db_paths.emplace_back(dbname_, 500 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 1024 * 1024 * 1024); options.compaction_style = kCompactionStyleUniversal; options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 111 << 10; // 114KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; options.num_levels = 1; options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); std::vector filenames; env_->GetChildren(options.db_paths[1].path, &filenames); // Delete archival files. for (size_t i = 0; i < filenames.size(); ++i) { env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); } env_->DeleteDir(options.db_paths[1].path); Reopen(options); Random rnd(301); int key_idx = 0; // First three 110KB files are not going to second path. // After that, (100K, 200K) for (int num = 0; num < 3; num++) { GenerateNewFile(&rnd, &key_idx); } // Another 110KB triggers a compaction to 400K file to second path GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); // (1, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1,1,4) -> (2, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 2, 4) -> (3, 4) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // (1, 3, 4) -> (8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // (1, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 1, 8) -> (2, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(1, GetSstFileCount(dbname_)); // (1, 2, 8) -> (3, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // (1, 3, 8) -> (4, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // (1, 4, 8) -> (5, 8) GenerateNewFile(&rnd, &key_idx); ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Reopen(options); for (int i = 0; i < key_idx; i++) { auto v = Get(Key(i)); ASSERT_NE(v, "NOT_FOUND"); ASSERT_TRUE(v.size() == 1 || v.size() == 990); } Destroy(options); } TEST_P(DBTestUniversalCompaction, ConcurrentBottomPriLowPriCompactions) { if (num_levels_ == 1) { // for single-level universal, everything's bottom level so nothing should // be executed in bottom-pri thread pool. return; } const int kNumFilesTrigger = 3; Env::Default()->SetBackgroundThreads(1, Env::Priority::BOTTOM); Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.write_buffer_size = 100 << 10; // 100KB options.target_file_size_base = 32 << 10; // 32KB options.level0_file_num_compaction_trigger = kNumFilesTrigger; // Trigger compaction if size amplification exceeds 110% options.compaction_options_universal.max_size_amplification_percent = 110; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {// wait for the full compaction to be picked before adding files intended // for the second one. {"DBImpl::BackgroundCompaction:ForwardToBottomPriPool", "DBTestUniversalCompaction:ConcurrentBottomPriLowPriCompactions:0"}, // the full (bottom-pri) compaction waits until a partial (low-pri) // compaction has started to verify they can run in parallel. {"DBImpl::BackgroundCompaction:NonTrivial", "DBImpl::BGWorkBottomCompaction"}}); SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int i = 0; i < 2; ++i) { for (int num = 0; num < kNumFilesTrigger; num++) { int key_idx = 0; GenerateNewFile(&rnd, &key_idx, true /* no_wait */); // use no_wait above because that one waits for flush and compaction. We // don't want to wait for compaction because the full compaction is // intentionally blocked while more files are flushed. dbfull()->TEST_WaitForFlushMemTable(); } if (i == 0) { TEST_SYNC_POINT( "DBTestUniversalCompaction:ConcurrentBottomPriLowPriCompactions:0"); } } dbfull()->TEST_WaitForCompact(); // First compaction should output to bottom level. Second should output to L0 // since older L0 files pending compaction prevent it from being placed lower. ASSERT_EQ(NumSortedRuns(), 2); ASSERT_GT(NumTableFilesAtLevel(0), 0); ASSERT_GT(NumTableFilesAtLevel(num_levels_ - 1), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM); } TEST_P(DBTestUniversalCompaction, RecalculateScoreAfterPicking) { // Regression test for extra compactions scheduled. Once enough compactions // have been scheduled to bring the score below one, we should stop // scheduling more; otherwise, other CFs/DBs may be delayed unnecessarily. const int kNumFilesTrigger = 8; Options options = CurrentOptions(); options.memtable_factory.reset( new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_options_universal.max_merge_width = kNumFilesTrigger / 2; options.compaction_options_universal.max_size_amplification_percent = static_cast(-1); options.compaction_style = kCompactionStyleUniversal; options.level0_file_num_compaction_trigger = kNumFilesTrigger; options.num_levels = num_levels_; Reopen(options); std::atomic num_compactions_attempted(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:Start", [&](void* /*arg*/) { ++num_compactions_attempted; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); for (int num = 0; num < kNumFilesTrigger; num++) { ASSERT_EQ(NumSortedRuns(), num); int key_idx = 0; GenerateNewFile(&rnd, &key_idx); } dbfull()->TEST_WaitForCompact(); // Compacting the first four files was enough to bring the score below one so // there's no need to schedule any more compactions. ASSERT_EQ(1, num_compactions_attempted); ASSERT_EQ(NumSortedRuns(), 5); } TEST_P(DBTestUniversalCompaction, FinalSortedRunCompactFilesConflict) { // Regression test for conflict between: // (1) Running CompactFiles including file in the final sorted run; and // (2) Picking universal size-amp-triggered compaction, which always includes // the final sorted run. if (exclusive_manual_compaction_) { return; } Options opts = CurrentOptions(); opts.compaction_style = kCompactionStyleUniversal; opts.compaction_options_universal.max_size_amplification_percent = 50; opts.compaction_options_universal.min_merge_width = 2; opts.compression = kNoCompression; opts.level0_file_num_compaction_trigger = 2; opts.max_background_compactions = 2; opts.num_levels = num_levels_; Reopen(opts); // make sure compaction jobs can be parallelized auto stop_token = dbfull()->TEST_write_controler().GetCompactionPressureToken(); Put("key", "val"); Flush(); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(NumTableFilesAtLevel(num_levels_ - 1), 1); ColumnFamilyMetaData cf_meta; ColumnFamilyHandle* default_cfh = db_->DefaultColumnFamily(); dbfull()->GetColumnFamilyMetaData(default_cfh, &cf_meta); ASSERT_EQ(1, cf_meta.levels[num_levels_ - 1].files.size()); std::string first_sst_filename = cf_meta.levels[num_levels_ - 1].files[0].name; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"CompactFilesImpl:0", "DBTestUniversalCompaction:FinalSortedRunCompactFilesConflict:0"}, {"DBImpl::BackgroundCompaction():AfterPickCompaction", "CompactFilesImpl:1"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); port::Thread compact_files_thread([&]() { ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), default_cfh, {first_sst_filename}, num_levels_ - 1)); }); TEST_SYNC_POINT( "DBTestUniversalCompaction:FinalSortedRunCompactFilesConflict:0"); for (int i = 0; i < 2; ++i) { Put("key", "val"); Flush(); } dbfull()->TEST_WaitForCompact(); compact_files_thread.join(); } INSTANTIATE_TEST_CASE_P(NumLevels, DBTestUniversalCompaction, ::testing::Combine(::testing::Values(1, 3, 5), ::testing::Bool())); class DBTestUniversalManualCompactionOutputPathId : public DBTestUniversalCompactionBase { public: DBTestUniversalManualCompactionOutputPathId() : DBTestUniversalCompactionBase( "/db_universal_compaction_manual_pid_test") {} }; TEST_P(DBTestUniversalManualCompactionOutputPathId, ManualCompactionOutputPathId) { Options options = CurrentOptions(); options.create_if_missing = true; options.db_paths.emplace_back(dbname_, 1000000000); options.db_paths.emplace_back(dbname_ + "_2", 1000000000); options.compaction_style = kCompactionStyleUniversal; options.num_levels = num_levels_; options.target_file_size_base = 1 << 30; // Big size options.level0_file_num_compaction_trigger = 10; Destroy(options); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); MakeTables(3, "p", "q", 1); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(2, TotalLiveFiles(1)); ASSERT_EQ(2, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[1].path)); // Full compaction to DB path 0 CompactRangeOptions compact_options; compact_options.target_path_id = 1; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); ASSERT_EQ(1, TotalLiveFiles(1)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); ASSERT_EQ(1, TotalLiveFiles(1)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); MakeTables(1, "p", "q", 1); ASSERT_EQ(2, TotalLiveFiles(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); ASSERT_EQ(2, TotalLiveFiles(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); // Full compaction to DB path 0 compact_options.target_path_id = 0; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); ASSERT_EQ(1, TotalLiveFiles(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[1].path)); // Fail when compacting to an invalid path ID compact_options.target_path_id = 2; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; ASSERT_TRUE(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr) .IsInvalidArgument()); } INSTANTIATE_TEST_CASE_P(OutputPathId, DBTestUniversalManualCompactionOutputPathId, ::testing::Combine(::testing::Values(1, 8), ::testing::Bool())); TEST_F(DBTestUniversalCompaction2, BasicL0toL1) { const int kNumKeys = 3000; const int kWindowSize = 100; const int kNumDelsTrigger = 90; Options opts = CurrentOptions(); opts.table_properties_collector_factories.emplace_back( NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); opts.compaction_style = kCompactionStyleUniversal; opts.level0_file_num_compaction_trigger = 2; opts.compression = kNoCompression; opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence // during flush int i; for (i = 0; i < 2000; ++i) { Put(Key(i), "val"); } Flush(); // MoveFilesToLevel(6); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); for (i = 1999; i < kNumKeys; ++i) { if (i >= kNumKeys - kWindowSize && i < kNumKeys - kWindowSize + kNumDelsTrigger) { Delete(Key(i)); } else { Put(Key(i), "val"); } } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(6), 0); } #if defined(ENABLE_SINGLE_LEVEL_DTC) TEST_F(DBTestUniversalCompaction2, SingleLevel) { const int kNumKeys = 3000; const int kWindowSize = 100; const int kNumDelsTrigger = 90; Options opts = CurrentOptions(); opts.table_properties_collector_factories.emplace_back( NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); opts.compaction_style = kCompactionStyleUniversal; opts.level0_file_num_compaction_trigger = 2; opts.compression = kNoCompression; opts.num_levels = 1; opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence // during flush int i; for (i = 0; i < 2000; ++i) { Put(Key(i), "val"); } Flush(); for (i = 1999; i < kNumKeys; ++i) { if (i >= kNumKeys - kWindowSize && i < kNumKeys - kWindowSize + kNumDelsTrigger) { Delete(Key(i)); } else { Put(Key(i), "val"); } } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(1, NumTableFilesAtLevel(0)); } #endif // ENABLE_SINGLE_LEVEL_DTC TEST_F(DBTestUniversalCompaction2, MultipleLevels) { const int kWindowSize = 100; const int kNumDelsTrigger = 90; Options opts = CurrentOptions(); opts.table_properties_collector_factories.emplace_back( NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); opts.compaction_style = kCompactionStyleUniversal; opts.level0_file_num_compaction_trigger = 4; opts.compression = kNoCompression; opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence // during flush int i; for (i = 0; i < 500; ++i) { Put(Key(i), "val"); } Flush(); for (i = 500; i < 1000; ++i) { Put(Key(i), "val"); } Flush(); for (i = 1000; i < 1500; ++i) { Put(Key(i), "val"); } Flush(); for (i = 1500; i < 2000; ++i) { Put(Key(i), "val"); } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(6), 0); for (i = 1999; i < 2333; ++i) { Put(Key(i), "val"); } Flush(); for (i = 2333; i < 2666; ++i) { Put(Key(i), "val"); } Flush(); for (i = 2666; i < 2999; ++i) { Put(Key(i), "val"); } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(6), 0); ASSERT_GT(NumTableFilesAtLevel(5), 0); for (i = 1900; i < 2100; ++i) { Delete(Key(i)); } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(1)); ASSERT_EQ(0, NumTableFilesAtLevel(2)); ASSERT_EQ(0, NumTableFilesAtLevel(3)); ASSERT_EQ(0, NumTableFilesAtLevel(4)); ASSERT_EQ(0, NumTableFilesAtLevel(5)); ASSERT_GT(NumTableFilesAtLevel(6), 0); } TEST_F(DBTestUniversalCompaction2, OverlappingL0) { const int kWindowSize = 100; const int kNumDelsTrigger = 90; Options opts = CurrentOptions(); opts.table_properties_collector_factories.emplace_back( NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); opts.compaction_style = kCompactionStyleUniversal; opts.level0_file_num_compaction_trigger = 5; opts.compression = kNoCompression; opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence // during flush int i; for (i = 0; i < 2000; ++i) { Put(Key(i), "val"); } Flush(); for (i = 2000; i < 3000; ++i) { Put(Key(i), "val"); } Flush(); for (i = 3500; i < 4000; ++i) { Put(Key(i), "val"); } Flush(); for (i = 2900; i < 3100; ++i) { Delete(Key(i)); } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(2, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(6), 0); } TEST_F(DBTestUniversalCompaction2, IngestBehind) { const int kNumKeys = 3000; const int kWindowSize = 100; const int kNumDelsTrigger = 90; Options opts = CurrentOptions(); opts.table_properties_collector_factories.emplace_back( NewCompactOnDeletionCollectorFactory(kWindowSize, kNumDelsTrigger)); opts.compaction_style = kCompactionStyleUniversal; opts.level0_file_num_compaction_trigger = 2; opts.compression = kNoCompression; opts.allow_ingest_behind = true; opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; Reopen(opts); // add an L1 file to prevent tombstones from dropping due to obsolescence // during flush int i; for (i = 0; i < 2000; ++i) { Put(Key(i), "val"); } Flush(); // MoveFilesToLevel(6); dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); for (i = 1999; i < kNumKeys; ++i) { if (i >= kNumKeys - kWindowSize && i < kNumKeys - kWindowSize + kNumDelsTrigger) { Delete(Key(i)); } else { Put(Key(i), "val"); } } Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(6)); ASSERT_GT(NumTableFilesAtLevel(5), 0); } TEST_F(DBTestUniversalCompaction2, PeriodicCompactionDefault) { Options options; options.compaction_style = kCompactionStyleUniversal; KeepFilterFactory* filter = new KeepFilterFactory(true); options.compaction_filter_factory.reset(filter); Reopen(options); ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().periodic_compaction_seconds); KeepFilter df; options.compaction_filter_factory.reset(); options.compaction_filter = &df; Reopen(options); ASSERT_EQ(30 * 24 * 60 * 60, dbfull()->GetOptions().periodic_compaction_seconds); options.ttl = 60 * 24 * 60 * 60; options.compaction_filter = nullptr; Reopen(options); ASSERT_EQ(60 * 24 * 60 * 60, dbfull()->GetOptions().periodic_compaction_seconds); } TEST_F(DBTestUniversalCompaction2, PeriodicCompaction) { Options opts = CurrentOptions(); opts.env = env_; opts.compaction_style = kCompactionStyleUniversal; opts.level0_file_num_compaction_trigger = 10; opts.max_open_files = -1; opts.compaction_options_universal.size_ratio = 10; opts.compaction_options_universal.min_merge_width = 2; opts.compaction_options_universal.max_size_amplification_percent = 200; opts.periodic_compaction_seconds = 48 * 60 * 60; // 2 days opts.num_levels = 5; env_->addon_time_.store(0); Reopen(opts); int periodic_compactions = 0; int start_level = -1; int output_level = -1; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "UniversalCompactionPicker::PickPeriodicCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); ASSERT_TRUE(arg != nullptr); ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kPeriodicCompaction); start_level = compaction->start_level(); output_level = compaction->output_level(); periodic_compactions++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Case 1: Oldest flushed file excceeds periodic compaction threshold. ASSERT_OK(Put("foo", "bar")); Flush(); ASSERT_EQ(0, periodic_compactions); // Move clock forward so that the flushed file would qualify periodic // compaction. env_->addon_time_.store(48 * 60 * 60 + 100); // Another flush would trigger compaction the oldest file. ASSERT_OK(Put("foo", "bar2")); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(1, periodic_compactions); ASSERT_EQ(0, start_level); ASSERT_EQ(4, output_level); // Case 2: Oldest compacted file excceeds periodic compaction threshold periodic_compactions = 0; // A flush doesn't trigger a periodic compaction when threshold not hit ASSERT_OK(Put("foo", "bar2")); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(0, periodic_compactions); // After periodic compaction threshold hits, a flush will trigger // a compaction ASSERT_OK(Put("foo", "bar2")); env_->addon_time_.fetch_add(48 * 60 * 60 + 100); Flush(); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(1, periodic_compactions); ASSERT_EQ(0, start_level); ASSERT_EQ(4, output_level); } } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) int main(int argc, char** argv) { #if !defined(ROCKSDB_LITE) ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); #else (void) argc; (void) argv; return 0; #endif } rocksdb-6.11.4/db/db_wal_test.cc000066400000000000000000001602751370372246700164430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "env/composite_env_wrapper.h" #include "options/options_helper.h" #include "port/port.h" #include "port/stack_trace.h" #include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { class DBWALTest : public DBTestBase { public: DBWALTest() : DBTestBase("/db_wal_test") {} #if defined(ROCKSDB_PLATFORM_POSIX) uint64_t GetAllocatedFileSize(std::string file_name) { struct stat sbuf; int err = stat(file_name.c_str(), &sbuf); assert(err == 0); return sbuf.st_blocks * 512; } #endif }; // A SpecialEnv enriched to give more insight about deleted files class EnrichedSpecialEnv : public SpecialEnv { public: explicit EnrichedSpecialEnv(Env* base) : SpecialEnv(base) {} Status NewSequentialFile(const std::string& f, std::unique_ptr* r, const EnvOptions& soptions) override { InstrumentedMutexLock l(&env_mutex_); if (f == skipped_wal) { deleted_wal_reopened = true; if (IsWAL(f) && largetest_deleted_wal.size() != 0 && f.compare(largetest_deleted_wal) <= 0) { gap_in_wals = true; } } return SpecialEnv::NewSequentialFile(f, r, soptions); } Status DeleteFile(const std::string& fname) override { if (IsWAL(fname)) { deleted_wal_cnt++; InstrumentedMutexLock l(&env_mutex_); // If this is the first WAL, remember its name and skip deleting it. We // remember its name partly because the application might attempt to // delete the file again. if (skipped_wal.size() != 0 && skipped_wal != fname) { if (largetest_deleted_wal.size() == 0 || largetest_deleted_wal.compare(fname) < 0) { largetest_deleted_wal = fname; } } else { skipped_wal = fname; return Status::OK(); } } return SpecialEnv::DeleteFile(fname); } bool IsWAL(const std::string& fname) { // printf("iswal %s\n", fname.c_str()); return fname.compare(fname.size() - 3, 3, "log") == 0; } InstrumentedMutex env_mutex_; // the wal whose actual delete was skipped by the env std::string skipped_wal = ""; // the largest WAL that was requested to be deleted std::string largetest_deleted_wal = ""; // number of WALs that were successfully deleted std::atomic deleted_wal_cnt = {0}; // the WAL whose delete from fs was skipped is reopened during recovery std::atomic deleted_wal_reopened = {false}; // whether a gap in the WALs was detected during recovery std::atomic gap_in_wals = {false}; }; class DBWALTestWithEnrichedEnv : public DBTestBase { public: DBWALTestWithEnrichedEnv() : DBTestBase("/db_wal_test") { enriched_env_ = new EnrichedSpecialEnv(env_->target()); auto options = CurrentOptions(); options.env = enriched_env_; options.allow_2pc = true; Reopen(options); delete env_; // to be deleted by the parent class env_ = enriched_env_; } protected: EnrichedSpecialEnv* enriched_env_; }; // Test that the recovery would successfully avoid the gaps between the logs. // One known scenario that could cause this is that the application issue the // WAL deletion out of order. For the sake of simplicity in the test, here we // create the gap by manipulating the env to skip deletion of the first WAL but // not the ones after it. TEST_F(DBWALTestWithEnrichedEnv, SkipDeletedWALs) { auto options = last_options_; // To cause frequent WAL deletion options.write_buffer_size = 128; Reopen(options); WriteOptions writeOpt = WriteOptions(); for (int i = 0; i < 128 * 5; i++) { ASSERT_OK(dbfull()->Put(writeOpt, "foo", "v1")); } FlushOptions fo; fo.wait = true; ASSERT_OK(db_->Flush(fo)); // some wals are deleted ASSERT_NE(0, enriched_env_->deleted_wal_cnt); // but not the first one ASSERT_NE(0, enriched_env_->skipped_wal.size()); // Test that the WAL that was not deleted will be skipped during recovery options = last_options_; Reopen(options); ASSERT_FALSE(enriched_env_->deleted_wal_reopened); ASSERT_FALSE(enriched_env_->gap_in_wals); } TEST_F(DBWALTest, WAL) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); WriteOptions writeOpt = WriteOptions(); writeOpt.disableWAL = true; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1")); ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v1", Get(1, "bar")); writeOpt.disableWAL = false; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2")); writeOpt.disableWAL = true; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); // Both value's should be present. ASSERT_EQ("v2", Get(1, "bar")); ASSERT_EQ("v2", Get(1, "foo")); writeOpt.disableWAL = true; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3")); writeOpt.disableWAL = false; ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); // again both values should be present. ASSERT_EQ("v3", Get(1, "foo")); ASSERT_EQ("v3", Get(1, "bar")); } while (ChangeWalOptions()); } TEST_F(DBWALTest, RollLog) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Put(1, "baz", "v5")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); for (int i = 0; i < 10; i++) { ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); } ASSERT_OK(Put(1, "foo", "v4")); for (int i = 0; i < 10; i++) { ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); } } while (ChangeWalOptions()); } TEST_F(DBWALTest, SyncWALNotBlockWrite) { Options options = CurrentOptions(); options.max_write_buffer_number = 4; DestroyAndReopen(options); ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("foo5", "bar5")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"WritableFileWriter::SyncWithoutFlush:1", "DBWALTest::SyncWALNotBlockWrite:1"}, {"DBWALTest::SyncWALNotBlockWrite:2", "WritableFileWriter::SyncWithoutFlush:2"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread thread([&]() { ASSERT_OK(db_->SyncWAL()); }); TEST_SYNC_POINT("DBWALTest::SyncWALNotBlockWrite:1"); ASSERT_OK(Put("foo2", "bar2")); ASSERT_OK(Put("foo3", "bar3")); FlushOptions fo; fo.wait = false; ASSERT_OK(db_->Flush(fo)); ASSERT_OK(Put("foo4", "bar4")); TEST_SYNC_POINT("DBWALTest::SyncWALNotBlockWrite:2"); thread.join(); ASSERT_EQ(Get("foo1"), "bar1"); ASSERT_EQ(Get("foo2"), "bar2"); ASSERT_EQ(Get("foo3"), "bar3"); ASSERT_EQ(Get("foo4"), "bar4"); ASSERT_EQ(Get("foo5"), "bar5"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBWALTest, SyncWALNotWaitWrite) { ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("foo3", "bar3")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"SpecialEnv::WalFile::Append:1", "DBWALTest::SyncWALNotWaitWrite:1"}, {"DBWALTest::SyncWALNotWaitWrite:2", "SpecialEnv::WalFile::Append:2"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread thread( [&]() { ASSERT_OK(Put("foo2", "bar2")); }); // Moving this to SyncWAL before the actual fsync // TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:1"); ASSERT_OK(db_->SyncWAL()); // Moving this to SyncWAL after actual fsync // TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:2"); thread.join(); ASSERT_EQ(Get("foo1"), "bar1"); ASSERT_EQ(Get("foo2"), "bar2"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBWALTest, Recover) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Put(1, "baz", "v5")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v1", Get(1, "foo")); ASSERT_EQ("v5", Get(1, "baz")); ASSERT_OK(Put(1, "bar", "v2")); ASSERT_OK(Put(1, "foo", "v3")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("v3", Get(1, "foo")); ASSERT_OK(Put(1, "foo", "v4")); ASSERT_EQ("v4", Get(1, "foo")); ASSERT_EQ("v2", Get(1, "bar")); ASSERT_EQ("v5", Get(1, "baz")); } while (ChangeWalOptions()); } TEST_F(DBWALTest, RecoverWithTableHandle) { do { Options options = CurrentOptions(); options.create_if_missing = true; options.disable_auto_compactions = true; options.avoid_flush_during_recovery = false; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Put(1, "bar", "v2")); ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "foo", "v3")); ASSERT_OK(Put(1, "bar", "v4")); ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "big", std::string(100, 'a'))); options = CurrentOptions(); const int kSmallMaxOpenFiles = 13; if (option_config_ == kDBLogDir) { // Use this option to check not preloading files // Set the max open files to be small enough so no preload will // happen. options.max_open_files = kSmallMaxOpenFiles; // RocksDB sanitize max open files to at least 20. Modify it back. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { int* max_open_files = static_cast(arg); *max_open_files = kSmallMaxOpenFiles; }); } else if (option_config_ == kWalDirAndMmapReads) { // Use this option to check always loading all files. options.max_open_files = 100; } else { options.max_open_files = -1; } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ReopenWithColumnFamilies({"default", "pikachu"}, options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); std::vector> files; dbfull()->TEST_GetFilesMetaData(handles_[1], &files); size_t total_files = 0; for (const auto& level : files) { total_files += level.size(); } ASSERT_EQ(total_files, 3); for (const auto& level : files) { for (const auto& file : level) { if (options.max_open_files == kSmallMaxOpenFiles) { ASSERT_TRUE(file.table_reader_handle == nullptr); } else { ASSERT_TRUE(file.table_reader_handle != nullptr); } } } } while (ChangeWalOptions()); } TEST_F(DBWALTest, IgnoreRecoveredLog) { std::string backup_logs = dbname_ + "/backup_logs"; do { // delete old files in backup_logs directory env_->CreateDirIfMissing(backup_logs); std::vector old_files; env_->GetChildren(backup_logs, &old_files); for (auto& file : old_files) { if (file != "." && file != "..") { env_->DeleteFile(backup_logs + "/" + file); } } Options options = CurrentOptions(); options.create_if_missing = true; options.merge_operator = MergeOperators::CreateUInt64AddOperator(); options.wal_dir = dbname_ + "/logs"; DestroyAndReopen(options); // fill up the DB std::string one, two; PutFixed64(&one, 1); PutFixed64(&two, 2); ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one))); ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one))); // copy the logs to backup std::vector logs; env_->GetChildren(options.wal_dir, &logs); for (auto& log : logs) { if (log != ".." && log != ".") { CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log); } } // recover the DB Reopen(options); ASSERT_EQ(two, Get("foo")); ASSERT_EQ(one, Get("bar")); Close(); // copy the logs from backup back to wal dir for (auto& log : logs) { if (log != ".." && log != ".") { CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); } } // this should ignore the log files, recovery should not happen again // if the recovery happens, the same merge operator would be called twice, // leading to incorrect results Reopen(options); ASSERT_EQ(two, Get("foo")); ASSERT_EQ(one, Get("bar")); Close(); Destroy(options); Reopen(options); Close(); // copy the logs from backup back to wal dir env_->CreateDirIfMissing(options.wal_dir); for (auto& log : logs) { if (log != ".." && log != ".") { CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); } } // assert that we successfully recovered only from logs, even though we // destroyed the DB Reopen(options); ASSERT_EQ(two, Get("foo")); ASSERT_EQ(one, Get("bar")); // Recovery will fail if DB directory doesn't exist. Destroy(options); // copy the logs from backup back to wal dir env_->CreateDirIfMissing(options.wal_dir); for (auto& log : logs) { if (log != ".." && log != ".") { CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log); // we won't be needing this file no more env_->DeleteFile(backup_logs + "/" + log); } } Status s = TryReopen(options); ASSERT_TRUE(!s.ok()); Destroy(options); } while (ChangeWalOptions()); } TEST_F(DBWALTest, RecoveryWithEmptyLog) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Put(1, "foo", "v2")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v3")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("v3", Get(1, "foo")); } while (ChangeWalOptions()); } #if !(defined NDEBUG) || !defined(OS_WIN) TEST_F(DBWALTest, PreallocateBlock) { Options options = CurrentOptions(); options.write_buffer_size = 10 * 1000 * 1000; options.max_total_wal_size = 0; size_t expected_preallocation_size = static_cast( options.write_buffer_size + options.write_buffer_size / 10); DestroyAndReopen(options); std::atomic called(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { ASSERT_TRUE(arg != nullptr); size_t preallocation_size = *(static_cast(arg)); ASSERT_EQ(expected_preallocation_size, preallocation_size); called.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put("", ""); Flush(); Put("", ""); Close(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(2, called.load()); options.max_total_wal_size = 1000 * 1000; expected_preallocation_size = static_cast(options.max_total_wal_size); Reopen(options); called.store(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { ASSERT_TRUE(arg != nullptr); size_t preallocation_size = *(static_cast(arg)); ASSERT_EQ(expected_preallocation_size, preallocation_size); called.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put("", ""); Flush(); Put("", ""); Close(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(2, called.load()); options.db_write_buffer_size = 800 * 1000; expected_preallocation_size = static_cast(options.db_write_buffer_size); Reopen(options); called.store(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { ASSERT_TRUE(arg != nullptr); size_t preallocation_size = *(static_cast(arg)); ASSERT_EQ(expected_preallocation_size, preallocation_size); called.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put("", ""); Flush(); Put("", ""); Close(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(2, called.load()); expected_preallocation_size = 700 * 1000; std::shared_ptr write_buffer_manager = std::make_shared(static_cast(700 * 1000)); options.write_buffer_manager = write_buffer_manager; Reopen(options); called.store(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBTestWalFile.GetPreallocationStatus", [&](void* arg) { ASSERT_TRUE(arg != nullptr); size_t preallocation_size = *(static_cast(arg)); ASSERT_EQ(expected_preallocation_size, preallocation_size); called.fetch_add(1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put("", ""); Flush(); Put("", ""); Close(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(2, called.load()); } #endif // !(defined NDEBUG) || !defined(OS_WIN) #ifndef ROCKSDB_LITE TEST_F(DBWALTest, FullPurgePreservesRecycledLog) { // For github issue #1303 for (int i = 0; i < 2; ++i) { Options options = CurrentOptions(); options.create_if_missing = true; options.recycle_log_file_num = 2; if (i != 0) { options.wal_dir = alternative_wal_dir_; } DestroyAndReopen(options); ASSERT_OK(Put("foo", "v1")); VectorLogPtr log_files; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); ASSERT_GT(log_files.size(), 0); ASSERT_OK(Flush()); // Now the original WAL is in log_files[0] and should be marked for // recycling. // Verify full purge cannot remove this file. JobContext job_context(0); dbfull()->TEST_LockMutex(); dbfull()->FindObsoleteFiles(&job_context, true /* force */); dbfull()->TEST_UnlockMutex(); dbfull()->PurgeObsoleteFiles(job_context); if (i == 0) { ASSERT_OK( env_->FileExists(LogFileName(dbname_, log_files[0]->LogNumber()))); } else { ASSERT_OK(env_->FileExists( LogFileName(alternative_wal_dir_, log_files[0]->LogNumber()))); } } } TEST_F(DBWALTest, FullPurgePreservesLogPendingReuse) { // Ensures full purge cannot delete a WAL while it's in the process of being // recycled. In particular, we force the full purge after a file has been // chosen for reuse, but before it has been renamed. for (int i = 0; i < 2; ++i) { Options options = CurrentOptions(); options.recycle_log_file_num = 1; if (i != 0) { options.wal_dir = alternative_wal_dir_; } DestroyAndReopen(options); // The first flush creates a second log so writes can continue before the // flush finishes. ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); // The second flush can recycle the first log. Sync points enforce the // full purge happens after choosing the log to recycle and before it is // renamed. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::CreateWAL:BeforeReuseWritableFile1", "DBWALTest::FullPurgePreservesLogPendingReuse:PreFullPurge"}, {"DBWALTest::FullPurgePreservesLogPendingReuse:PostFullPurge", "DBImpl::CreateWAL:BeforeReuseWritableFile2"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ROCKSDB_NAMESPACE::port::Thread thread([&]() { TEST_SYNC_POINT( "DBWALTest::FullPurgePreservesLogPendingReuse:PreFullPurge"); ASSERT_OK(db_->EnableFileDeletions(true)); TEST_SYNC_POINT( "DBWALTest::FullPurgePreservesLogPendingReuse:PostFullPurge"); }); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); thread.join(); } } TEST_F(DBWALTest, GetSortedWalFiles) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); VectorLogPtr log_files; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); ASSERT_EQ(0, log_files.size()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); ASSERT_EQ(1, log_files.size()); } while (ChangeWalOptions()); } TEST_F(DBWALTest, GetCurrentWalFile) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); std::unique_ptr* bad_log_file = nullptr; ASSERT_NOK(dbfull()->GetCurrentWalFile(bad_log_file)); std::unique_ptr log_file; ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); // nothing has been written to the log yet ASSERT_EQ(log_file->StartSequence(), 0); ASSERT_EQ(log_file->SizeFileBytes(), 0); ASSERT_EQ(log_file->Type(), kAliveLogFile); ASSERT_GT(log_file->LogNumber(), 0); // add some data and verify that the file size actually moves foward ASSERT_OK(Put(0, "foo", "v1")); ASSERT_OK(Put(0, "foo2", "v2")); ASSERT_OK(Put(0, "foo3", "v3")); ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); ASSERT_EQ(log_file->StartSequence(), 0); ASSERT_GT(log_file->SizeFileBytes(), 0); ASSERT_EQ(log_file->Type(), kAliveLogFile); ASSERT_GT(log_file->LogNumber(), 0); // force log files to cycle and add some more data, then check if // log number moves forward ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); for (int i = 0; i < 10; i++) { ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); } ASSERT_OK(Put(0, "foo4", "v4")); ASSERT_OK(Put(0, "foo5", "v5")); ASSERT_OK(Put(0, "foo6", "v6")); ASSERT_OK(dbfull()->GetCurrentWalFile(&log_file)); ASSERT_EQ(log_file->StartSequence(), 0); ASSERT_GT(log_file->SizeFileBytes(), 0); ASSERT_EQ(log_file->Type(), kAliveLogFile); ASSERT_GT(log_file->LogNumber(), 0); } while (ChangeWalOptions()); } TEST_F(DBWALTest, RecoveryWithLogDataForSomeCFs) { // Test for regression of WAL cleanup missing files that don't contain data // for every column family. do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Put(1, "foo", "v2")); uint64_t earliest_log_nums[2]; for (int i = 0; i < 2; ++i) { if (i > 0) { ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); } VectorLogPtr log_files; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); if (log_files.size() > 0) { earliest_log_nums[i] = log_files[0]->LogNumber(); } else { earliest_log_nums[i] = port::kMaxUint64; } } // Check at least the first WAL was cleaned up during the recovery. ASSERT_LT(earliest_log_nums[0], earliest_log_nums[1]); } while (ChangeWalOptions()); } TEST_F(DBWALTest, RecoverWithLargeLog) { do { { Options options = CurrentOptions(); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "big1", std::string(200000, '1'))); ASSERT_OK(Put(1, "big2", std::string(200000, '2'))); ASSERT_OK(Put(1, "small3", std::string(10, '3'))); ASSERT_OK(Put(1, "small4", std::string(10, '4'))); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); } // Make sure that if we re-open with a small write buffer size that // we flush table files in the middle of a large log file. Options options; options.write_buffer_size = 100000; options = CurrentOptions(options); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3); ASSERT_EQ(std::string(200000, '1'), Get(1, "big1")); ASSERT_EQ(std::string(200000, '2'), Get(1, "big2")); ASSERT_EQ(std::string(10, '3'), Get(1, "small3")); ASSERT_EQ(std::string(10, '4'), Get(1, "small4")); ASSERT_GT(NumTableFilesAtLevel(0, 1), 1); } while (ChangeWalOptions()); } // In https://reviews.facebook.net/D20661 we change // recovery behavior: previously for each log file each column family // memtable was flushed, even it was empty. Now it's changed: // we try to create the smallest number of table files by merging // updates from multiple logs TEST_F(DBWALTest, RecoverCheckFileAmountWithSmallWriteBuffer) { Options options = CurrentOptions(); options.write_buffer_size = 5000000; CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); // Since we will reopen DB with smaller write_buffer_size, // each key will go to new SST file ASSERT_OK(Put(1, Key(10), DummyString(1000000))); ASSERT_OK(Put(1, Key(10), DummyString(1000000))); ASSERT_OK(Put(1, Key(10), DummyString(1000000))); ASSERT_OK(Put(1, Key(10), DummyString(1000000))); ASSERT_OK(Put(3, Key(10), DummyString(1))); // Make 'dobrynia' to be flushed and new WAL file to be created ASSERT_OK(Put(2, Key(10), DummyString(7500000))); ASSERT_OK(Put(2, Key(1), DummyString(1))); dbfull()->TEST_WaitForFlushMemTable(handles_[2]); { auto tables = ListTableFiles(env_, dbname_); ASSERT_EQ(tables.size(), static_cast(1)); // Make sure 'dobrynia' was flushed: check sst files amount ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(1)); } // New WAL file ASSERT_OK(Put(1, Key(1), DummyString(1))); ASSERT_OK(Put(1, Key(1), DummyString(1))); ASSERT_OK(Put(3, Key(10), DummyString(1))); ASSERT_OK(Put(3, Key(10), DummyString(1))); ASSERT_OK(Put(3, Key(10), DummyString(1))); options.write_buffer_size = 4096; options.arena_block_size = 4096; ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, options); { // No inserts => default is empty ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(0)); // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), static_cast(5)); // 1 SST for big key + 1 SST for small one ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(2)); // 1 SST for all keys ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(1)); } } // In https://reviews.facebook.net/D20661 we change // recovery behavior: previously for each log file each column family // memtable was flushed, even it wasn't empty. Now it's changed: // we try to create the smallest number of table files by merging // updates from multiple logs TEST_F(DBWALTest, RecoverCheckFileAmount) { Options options = CurrentOptions(); options.write_buffer_size = 100000; options.arena_block_size = 4 * 1024; options.avoid_flush_during_recovery = false; CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options); ASSERT_OK(Put(0, Key(1), DummyString(1))); ASSERT_OK(Put(1, Key(1), DummyString(1))); ASSERT_OK(Put(2, Key(1), DummyString(1))); // Make 'nikitich' memtable to be flushed ASSERT_OK(Put(3, Key(10), DummyString(1002400))); ASSERT_OK(Put(3, Key(1), DummyString(1))); dbfull()->TEST_WaitForFlushMemTable(handles_[3]); // 4 memtable are not flushed, 1 sst file { auto tables = ListTableFiles(env_, dbname_); ASSERT_EQ(tables.size(), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(1)); } // Memtable for 'nikitich' has flushed, new WAL file has opened // 4 memtable still not flushed // Write to new WAL file ASSERT_OK(Put(0, Key(1), DummyString(1))); ASSERT_OK(Put(1, Key(1), DummyString(1))); ASSERT_OK(Put(2, Key(1), DummyString(1))); // Fill up 'nikitich' one more time ASSERT_OK(Put(3, Key(10), DummyString(1002400))); // make it flush ASSERT_OK(Put(3, Key(1), DummyString(1))); dbfull()->TEST_WaitForFlushMemTable(handles_[3]); // There are still 4 memtable not flushed, and 2 sst tables ASSERT_OK(Put(0, Key(1), DummyString(1))); ASSERT_OK(Put(1, Key(1), DummyString(1))); ASSERT_OK(Put(2, Key(1), DummyString(1))); { auto tables = ListTableFiles(env_, dbname_); ASSERT_EQ(tables.size(), static_cast(2)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(2)); } ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"}, options); { std::vector table_files = ListTableFiles(env_, dbname_); // Check, that records for 'default', 'dobrynia' and 'pikachu' from // first, second and third WALs went to the same SST. // So, there is 6 SSTs: three for 'nikitich', one for 'default', one for // 'dobrynia', one for 'pikachu' ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), static_cast(3)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), static_cast(1)); } } TEST_F(DBWALTest, SyncMultipleLogs) { const uint64_t kNumBatches = 2; const int kBatchSize = 1000; Options options = CurrentOptions(); options.create_if_missing = true; options.write_buffer_size = 4096; Reopen(options); WriteBatch batch; WriteOptions wo; wo.sync = true; for (uint64_t b = 0; b < kNumBatches; b++) { batch.Clear(); for (int i = 0; i < kBatchSize; i++) { batch.Put(Key(i), DummyString(128)); } dbfull()->Write(wo, &batch); } ASSERT_OK(dbfull()->SyncWAL()); } // Github issue 1339. Prior the fix we read sequence id from the first log to // a local variable, then keep increase the variable as we replay logs, // ignoring actual sequence id of the records. This is incorrect if some writes // come with WAL disabled. TEST_F(DBWALTest, PartOfWritesWithWALDisabled) { std::unique_ptr fault_env( new FaultInjectionTestEnv(env_)); Options options = CurrentOptions(); options.env = fault_env.get(); options.disable_auto_compactions = true; WriteOptions wal_on, wal_off; wal_on.sync = true; wal_on.disableWAL = false; wal_off.disableWAL = true; CreateAndReopenWithCF({"dummy"}, options); ASSERT_OK(Put(1, "dummy", "d1", wal_on)); // seq id 1 ASSERT_OK(Put(1, "dummy", "d2", wal_off)); ASSERT_OK(Put(1, "dummy", "d3", wal_off)); ASSERT_OK(Put(0, "key", "v4", wal_on)); // seq id 4 ASSERT_OK(Flush(0)); ASSERT_OK(Put(0, "key", "v5", wal_on)); // seq id 5 ASSERT_EQ("v5", Get(0, "key")); dbfull()->FlushWAL(false); // Simulate a crash. fault_env->SetFilesystemActive(false); Close(); fault_env->ResetState(); ReopenWithColumnFamilies({"default", "dummy"}, options); // Prior to the fix, we may incorrectly recover "v5" with sequence id = 3. ASSERT_EQ("v5", Get(0, "key")); // Destroy DB before destruct fault_env. Destroy(options); } // // Test WAL recovery for the various modes available // class RecoveryTestHelper { public: // Number of WAL files to generate static const int kWALFilesCount = 10; // Starting number for the WAL file name like 00010.log static const int kWALFileOffset = 10; // Keys to be written per WAL file static const int kKeysPerWALFile = 133; // Size of the value static const int kValueSize = 96; // Create WAL files with values filled in static void FillData(DBWALTest* test, const Options& options, const size_t wal_count, size_t* count) { // Calling internal functions requires sanitized options. Options sanitized_options = SanitizeOptions(test->dbname_, options); const ImmutableDBOptions db_options(sanitized_options); *count = 0; std::shared_ptr table_cache = NewLRUCache(50, 0); EnvOptions env_options; WriteBufferManager write_buffer_manager(db_options.db_write_buffer_size); std::unique_ptr versions; std::unique_ptr wal_manager; WriteController write_controller; versions.reset(new VersionSet(test->dbname_, &db_options, env_options, table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr)); wal_manager.reset(new WalManager(db_options, env_options)); std::unique_ptr current_log_writer; for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) { uint64_t current_log_number = j; std::string fname = LogFileName(test->dbname_, current_log_number); std::unique_ptr file; ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), fname, env_options)); current_log_writer.reset( new log::Writer(std::move(file_writer), current_log_number, db_options.recycle_log_file_num > 0)); WriteBatch batch; for (int i = 0; i < kKeysPerWALFile; i++) { std::string key = "key" + ToString((*count)++); std::string value = test->DummyString(kValueSize); assert(current_log_writer.get() != nullptr); uint64_t seq = versions->LastSequence() + 1; batch.Clear(); batch.Put(key, value); WriteBatchInternal::SetSequence(&batch, seq); current_log_writer->AddRecord(WriteBatchInternal::Contents(&batch)); versions->SetLastAllocatedSequence(seq); versions->SetLastPublishedSequence(seq); versions->SetLastSequence(seq); } } } // Recreate and fill the store with some data static size_t FillData(DBWALTest* test, Options* options) { options->create_if_missing = true; test->DestroyAndReopen(*options); test->Close(); size_t count = 0; FillData(test, *options, kWALFilesCount, &count); return count; } // Read back all the keys we wrote and return the number of keys found static size_t GetData(DBWALTest* test) { size_t count = 0; for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) { if (test->Get("key" + ToString(i)) != "NOT_FOUND") { ++count; } } return count; } // Manuall corrupt the specified WAL static void CorruptWAL(DBWALTest* test, const Options& options, const double off, const double len, const int wal_file_id, const bool trunc = false) { Env* env = options.env; std::string fname = LogFileName(test->dbname_, wal_file_id); uint64_t size; ASSERT_OK(env->GetFileSize(fname, &size)); ASSERT_GT(size, 0); #ifdef OS_WIN // Windows disk cache behaves differently. When we truncate // the original content is still in the cache due to the original // handle is still open. Generally, in Windows, one prohibits // shared access to files and it is not needed for WAL but we allow // it to induce corruption at various tests. test->Close(); #endif if (trunc) { ASSERT_EQ(0, truncate(fname.c_str(), static_cast(size * off))); } else { InduceCorruption(fname, static_cast(size * off + 8), static_cast(size * len)); } } // Overwrite data with 'a' from offset for length len static void InduceCorruption(const std::string& filename, size_t offset, size_t len) { ASSERT_GT(len, 0U); int fd = open(filename.c_str(), O_RDWR); // On windows long is 32-bit ASSERT_LE(offset, std::numeric_limits::max()); ASSERT_GT(fd, 0); ASSERT_EQ(offset, lseek(fd, static_cast(offset), SEEK_SET)); void* buf = alloca(len); memset(buf, 'b', len); ASSERT_EQ(len, write(fd, buf, static_cast(len))); close(fd); } }; // Test scope: // - We expect to open the data store when there is incomplete trailing writes // at the end of any of the logs // - We do not expect to open the data store for corruption TEST_F(DBWALTest, kTolerateCorruptedTailRecords) { const int jstart = RecoveryTestHelper::kWALFileOffset; const int jend = jstart + RecoveryTestHelper::kWALFilesCount; for (auto trunc : {true, false}) { /* Corruption style */ for (int i = 0; i < 3; i++) { /* Corruption offset position */ for (int j = jstart; j < jend; j++) { /* WAL file */ // Fill data for testing Options options = CurrentOptions(); const size_t row_count = RecoveryTestHelper::FillData(this, &options); // test checksum failure or parsing RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, /*len%=*/.1, /*wal=*/j, trunc); if (trunc) { options.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; options.create_if_missing = false; ASSERT_OK(TryReopen(options)); const size_t recovered_row_count = RecoveryTestHelper::GetData(this); ASSERT_TRUE(i == 0 || recovered_row_count > 0); ASSERT_LT(recovered_row_count, row_count); } else { options.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; ASSERT_NOK(TryReopen(options)); } } } } } // Test scope: // We don't expect the data store to be opened if there is any corruption // (leading, middle or trailing -- incomplete writes or corruption) TEST_F(DBWALTest, kAbsoluteConsistency) { const int jstart = RecoveryTestHelper::kWALFileOffset; const int jend = jstart + RecoveryTestHelper::kWALFilesCount; // Verify clean slate behavior Options options = CurrentOptions(); const size_t row_count = RecoveryTestHelper::FillData(this, &options); options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; options.create_if_missing = false; ASSERT_OK(TryReopen(options)); ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count); for (auto trunc : {true, false}) { /* Corruption style */ for (int i = 0; i < 4; i++) { /* Corruption offset position */ if (trunc && i == 0) { continue; } for (int j = jstart; j < jend; j++) { /* wal files */ // fill with new date RecoveryTestHelper::FillData(this, &options); // corrupt the wal RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, /*len%=*/.1, j, trunc); // verify options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency; options.create_if_missing = false; ASSERT_NOK(TryReopen(options)); } } } } // Test scope: // We don't expect the data store to be opened if there is any inconsistency // between WAL and SST files TEST_F(DBWALTest, kPointInTimeRecoveryCFConsistency) { Options options = CurrentOptions(); options.avoid_flush_during_recovery = true; // Create DB with multiple column families. CreateAndReopenWithCF({"one", "two"}, options); ASSERT_OK(Put(1, "key1", "val1")); ASSERT_OK(Put(2, "key2", "val2")); // Record the offset at this point Env* env = options.env; uint64_t wal_file_id = dbfull()->TEST_LogfileNumber(); std::string fname = LogFileName(dbname_, wal_file_id); uint64_t offset_to_corrupt; ASSERT_OK(env->GetFileSize(fname, &offset_to_corrupt)); ASSERT_GT(offset_to_corrupt, 0); ASSERT_OK(Put(1, "key3", "val3")); // Corrupt WAL at location of key3 RecoveryTestHelper::InduceCorruption( fname, static_cast(offset_to_corrupt), static_cast(4)); ASSERT_OK(Put(2, "key4", "val4")); ASSERT_OK(Put(1, "key5", "val5")); Flush(2); // PIT recovery & verify options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; ASSERT_NOK(TryReopenWithColumnFamilies({"default", "one", "two"}, options)); } // Test scope: // - We expect to open data store under all circumstances // - We expect only data upto the point where the first error was encountered TEST_F(DBWALTest, kPointInTimeRecovery) { const int jstart = RecoveryTestHelper::kWALFileOffset; const int jend = jstart + RecoveryTestHelper::kWALFilesCount; const int maxkeys = RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile; for (auto trunc : {true, false}) { /* Corruption style */ for (int i = 0; i < 4; i++) { /* Offset of corruption */ for (int j = jstart; j < jend; j++) { /* WAL file */ // Fill data for testing Options options = CurrentOptions(); const size_t row_count = RecoveryTestHelper::FillData(this, &options); // Corrupt the wal RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, /*len%=*/.1, j, trunc); // Verify options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; options.create_if_missing = false; ASSERT_OK(TryReopen(options)); // Probe data for invariants size_t recovered_row_count = RecoveryTestHelper::GetData(this); ASSERT_LT(recovered_row_count, row_count); bool expect_data = true; for (size_t k = 0; k < maxkeys; ++k) { bool found = Get("key" + ToString(i)) != "NOT_FOUND"; if (expect_data && !found) { expect_data = false; } ASSERT_EQ(found, expect_data); } const size_t min = RecoveryTestHelper::kKeysPerWALFile * (j - RecoveryTestHelper::kWALFileOffset); ASSERT_GE(recovered_row_count, min); if (!trunc && i != 0) { const size_t max = RecoveryTestHelper::kKeysPerWALFile * (j - RecoveryTestHelper::kWALFileOffset + 1); ASSERT_LE(recovered_row_count, max); } } } } } // Test scope: // - We expect to open the data store under all scenarios // - We expect to have recovered records past the corruption zone TEST_F(DBWALTest, kSkipAnyCorruptedRecords) { const int jstart = RecoveryTestHelper::kWALFileOffset; const int jend = jstart + RecoveryTestHelper::kWALFilesCount; for (auto trunc : {true, false}) { /* Corruption style */ for (int i = 0; i < 4; i++) { /* Corruption offset */ for (int j = jstart; j < jend; j++) { /* wal files */ // Fill data for testing Options options = CurrentOptions(); const size_t row_count = RecoveryTestHelper::FillData(this, &options); // Corrupt the WAL RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, /*len%=*/.1, j, trunc); // Verify behavior options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords; options.create_if_missing = false; ASSERT_OK(TryReopen(options)); // Probe data for invariants size_t recovered_row_count = RecoveryTestHelper::GetData(this); ASSERT_LT(recovered_row_count, row_count); if (!trunc) { ASSERT_TRUE(i != 0 || recovered_row_count > 0); } } } } } TEST_F(DBWALTest, AvoidFlushDuringRecovery) { Options options = CurrentOptions(); options.disable_auto_compactions = true; options.avoid_flush_during_recovery = false; // Test with flush after recovery. Reopen(options); ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Put("bar", "v2")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo", "v3")); ASSERT_OK(Put("bar", "v4")); ASSERT_EQ(1, TotalTableFiles()); // Reopen DB. Check if WAL logs flushed. Reopen(options); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v4", Get("bar")); ASSERT_EQ(2, TotalTableFiles()); // Test without flush after recovery. options.avoid_flush_during_recovery = true; DestroyAndReopen(options); ASSERT_OK(Put("foo", "v5")); ASSERT_OK(Put("bar", "v6")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo", "v7")); ASSERT_OK(Put("bar", "v8")); ASSERT_EQ(1, TotalTableFiles()); // Reopen DB. WAL logs should not be flushed this time. Reopen(options); ASSERT_EQ("v7", Get("foo")); ASSERT_EQ("v8", Get("bar")); ASSERT_EQ(1, TotalTableFiles()); // Force flush with allow_2pc. options.avoid_flush_during_recovery = true; options.allow_2pc = true; ASSERT_OK(Put("foo", "v9")); ASSERT_OK(Put("bar", "v10")); ASSERT_OK(Flush()); ASSERT_OK(Put("foo", "v11")); ASSERT_OK(Put("bar", "v12")); Reopen(options); ASSERT_EQ("v11", Get("foo")); ASSERT_EQ("v12", Get("bar")); ASSERT_EQ(3, TotalTableFiles()); } TEST_F(DBWALTest, WalCleanupAfterAvoidFlushDuringRecovery) { // Verifies WAL files that were present during recovery, but not flushed due // to avoid_flush_during_recovery, will be considered for deletion at a later // stage. We check at least one such file is deleted during Flush(). Options options = CurrentOptions(); options.disable_auto_compactions = true; options.avoid_flush_during_recovery = true; Reopen(options); ASSERT_OK(Put("foo", "v1")); Reopen(options); for (int i = 0; i < 2; ++i) { if (i > 0) { // Flush() triggers deletion of obsolete tracked files Flush(); } VectorLogPtr log_files; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files)); if (i == 0) { ASSERT_GT(log_files.size(), 0); } else { ASSERT_EQ(0, log_files.size()); } } } TEST_F(DBWALTest, RecoverWithoutFlush) { Options options = CurrentOptions(); options.avoid_flush_during_recovery = true; options.create_if_missing = false; options.disable_auto_compactions = true; options.write_buffer_size = 64 * 1024 * 1024; size_t count = RecoveryTestHelper::FillData(this, &options); auto validateData = [this, count]() { for (size_t i = 0; i < count; i++) { ASSERT_NE(Get("key" + ToString(i)), "NOT_FOUND"); } }; Reopen(options); validateData(); // Insert some data without flush ASSERT_OK(Put("foo", "foo_v1")); ASSERT_OK(Put("bar", "bar_v1")); Reopen(options); validateData(); ASSERT_EQ(Get("foo"), "foo_v1"); ASSERT_EQ(Get("bar"), "bar_v1"); // Insert again and reopen ASSERT_OK(Put("foo", "foo_v2")); ASSERT_OK(Put("bar", "bar_v2")); Reopen(options); validateData(); ASSERT_EQ(Get("foo"), "foo_v2"); ASSERT_EQ(Get("bar"), "bar_v2"); // manual flush and insert again Flush(); ASSERT_EQ(Get("foo"), "foo_v2"); ASSERT_EQ(Get("bar"), "bar_v2"); ASSERT_OK(Put("foo", "foo_v3")); ASSERT_OK(Put("bar", "bar_v3")); Reopen(options); validateData(); ASSERT_EQ(Get("foo"), "foo_v3"); ASSERT_EQ(Get("bar"), "bar_v3"); } TEST_F(DBWALTest, RecoverWithoutFlushMultipleCF) { const std::string kSmallValue = "v"; const std::string kLargeValue = DummyString(1024); Options options = CurrentOptions(); options.avoid_flush_during_recovery = true; options.create_if_missing = false; options.disable_auto_compactions = true; auto countWalFiles = [this]() { VectorLogPtr log_files; dbfull()->GetSortedWalFiles(log_files); return log_files.size(); }; // Create DB with multiple column families and multiple log files. CreateAndReopenWithCF({"one", "two"}, options); ASSERT_OK(Put(0, "key1", kSmallValue)); ASSERT_OK(Put(1, "key2", kLargeValue)); Flush(1); ASSERT_EQ(1, countWalFiles()); ASSERT_OK(Put(0, "key3", kSmallValue)); ASSERT_OK(Put(2, "key4", kLargeValue)); Flush(2); ASSERT_EQ(2, countWalFiles()); // Reopen, insert and flush. options.db_write_buffer_size = 64 * 1024 * 1024; ReopenWithColumnFamilies({"default", "one", "two"}, options); ASSERT_EQ(Get(0, "key1"), kSmallValue); ASSERT_EQ(Get(1, "key2"), kLargeValue); ASSERT_EQ(Get(0, "key3"), kSmallValue); ASSERT_EQ(Get(2, "key4"), kLargeValue); // Insert more data. ASSERT_OK(Put(0, "key5", kLargeValue)); ASSERT_OK(Put(1, "key6", kLargeValue)); ASSERT_EQ(3, countWalFiles()); Flush(1); ASSERT_OK(Put(2, "key7", kLargeValue)); dbfull()->FlushWAL(false); ASSERT_EQ(4, countWalFiles()); // Reopen twice and validate. for (int i = 0; i < 2; i++) { ReopenWithColumnFamilies({"default", "one", "two"}, options); ASSERT_EQ(Get(0, "key1"), kSmallValue); ASSERT_EQ(Get(1, "key2"), kLargeValue); ASSERT_EQ(Get(0, "key3"), kSmallValue); ASSERT_EQ(Get(2, "key4"), kLargeValue); ASSERT_EQ(Get(0, "key5"), kLargeValue); ASSERT_EQ(Get(1, "key6"), kLargeValue); ASSERT_EQ(Get(2, "key7"), kLargeValue); ASSERT_EQ(4, countWalFiles()); } } // In this test we are trying to do the following: // 1. Create a DB with corrupted WAL log; // 2. Open with avoid_flush_during_recovery = true; // 3. Append more data without flushing, which creates new WAL log. // 4. Open again. See if it can correctly handle previous corruption. TEST_F(DBWALTest, RecoverFromCorruptedWALWithoutFlush) { const int jstart = RecoveryTestHelper::kWALFileOffset; const int jend = jstart + RecoveryTestHelper::kWALFilesCount; const int kAppendKeys = 100; Options options = CurrentOptions(); options.avoid_flush_during_recovery = true; options.create_if_missing = false; options.disable_auto_compactions = true; options.write_buffer_size = 64 * 1024 * 1024; auto getAll = [this]() { std::vector> data; ReadOptions ropt; Iterator* iter = dbfull()->NewIterator(ropt); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { data.push_back( std::make_pair(iter->key().ToString(), iter->value().ToString())); } delete iter; return data; }; for (auto& mode : {WALRecoveryMode::kTolerateCorruptedTailRecords, WALRecoveryMode::kAbsoluteConsistency, WALRecoveryMode::kPointInTimeRecovery, WALRecoveryMode::kSkipAnyCorruptedRecords}) { options.wal_recovery_mode = mode; for (auto trunc : {true, false}) { for (int i = 0; i < 4; i++) { for (int j = jstart; j < jend; j++) { // Create corrupted WAL RecoveryTestHelper::FillData(this, &options); RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3, /*len%=*/.1, /*wal=*/j, trunc); // Skip the test if DB won't open. if (!TryReopen(options).ok()) { ASSERT_TRUE(options.wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency || (!trunc && options.wal_recovery_mode == WALRecoveryMode::kTolerateCorruptedTailRecords)); continue; } ASSERT_OK(TryReopen(options)); // Append some more data. for (int k = 0; k < kAppendKeys; k++) { std::string key = "extra_key" + ToString(k); std::string value = DummyString(RecoveryTestHelper::kValueSize); ASSERT_OK(Put(key, value)); } // Save data for comparison. auto data = getAll(); // Reopen. Verify data. ASSERT_OK(TryReopen(options)); auto actual_data = getAll(); ASSERT_EQ(data, actual_data); } } } } } // Tests that total log size is recovered if we set // avoid_flush_during_recovery=true. // Flush should trigger if max_total_wal_size is reached. TEST_F(DBWALTest, RestoreTotalLogSizeAfterRecoverWithoutFlush) { class TestFlushListener : public EventListener { public: std::atomic count{0}; TestFlushListener() = default; void OnFlushBegin(DB* /*db*/, const FlushJobInfo& flush_job_info) override { count++; assert(FlushReason::kWriteBufferManager == flush_job_info.flush_reason); } }; std::shared_ptr test_listener = std::make_shared(); constexpr size_t kKB = 1024; constexpr size_t kMB = 1024 * 1024; Options options = CurrentOptions(); options.avoid_flush_during_recovery = true; options.max_total_wal_size = 1 * kMB; options.listeners.push_back(test_listener); // Have to open DB in multi-CF mode to trigger flush when // max_total_wal_size is reached. CreateAndReopenWithCF({"one"}, options); // Write some keys and we will end up with one log file which is slightly // smaller than 1MB. std::string value_100k(100 * kKB, 'v'); std::string value_300k(300 * kKB, 'v'); ASSERT_OK(Put(0, "foo", "v1")); for (int i = 0; i < 9; i++) { ASSERT_OK(Put(1, "key" + ToString(i), value_100k)); } // Get log files before reopen. VectorLogPtr log_files_before; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_before)); ASSERT_EQ(1, log_files_before.size()); uint64_t log_size_before = log_files_before[0]->SizeFileBytes(); ASSERT_GT(log_size_before, 900 * kKB); ASSERT_LT(log_size_before, 1 * kMB); ReopenWithColumnFamilies({"default", "one"}, options); // Write one more value to make log larger than 1MB. ASSERT_OK(Put(1, "bar", value_300k)); // Get log files again. A new log file will be opened. VectorLogPtr log_files_after_reopen; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_after_reopen)); ASSERT_EQ(2, log_files_after_reopen.size()); ASSERT_EQ(log_files_before[0]->LogNumber(), log_files_after_reopen[0]->LogNumber()); ASSERT_GT(log_files_after_reopen[0]->SizeFileBytes() + log_files_after_reopen[1]->SizeFileBytes(), 1 * kMB); // Write one more key to trigger flush. ASSERT_OK(Put(0, "foo", "v2")); dbfull()->TEST_WaitForFlushMemTable(); // Flushed two column families. ASSERT_EQ(2, test_listener->count.load()); } #if defined(ROCKSDB_PLATFORM_POSIX) #if defined(ROCKSDB_FALLOCATE_PRESENT) // Tests that we will truncate the preallocated space of the last log from // previous. TEST_F(DBWALTest, TruncateLastLogAfterRecoverWithoutFlush) { constexpr size_t kKB = 1024; Options options = CurrentOptions(); options.avoid_flush_during_recovery = true; // Test fallocate support of running file system. // Skip this test if fallocate is not supported. std::string fname_test_fallocate = dbname_ + "/preallocate_testfile"; int fd = -1; do { fd = open(fname_test_fallocate.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); } while (fd < 0 && errno == EINTR); ASSERT_GT(fd, 0); int alloc_status = fallocate(fd, 0, 0, 1); int err_number = errno; close(fd); ASSERT_OK(options.env->DeleteFile(fname_test_fallocate)); if (err_number == ENOSYS || err_number == EOPNOTSUPP) { fprintf(stderr, "Skipped preallocated space check: %s\n", strerror(err_number)); return; } ASSERT_EQ(0, alloc_status); DestroyAndReopen(options); size_t preallocated_size = dbfull()->TEST_GetWalPreallocateBlockSize(options.write_buffer_size); ASSERT_OK(Put("foo", "v1")); VectorLogPtr log_files_before; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_before)); ASSERT_EQ(1, log_files_before.size()); auto& file_before = log_files_before[0]; ASSERT_LT(file_before->SizeFileBytes(), 1 * kKB); // The log file has preallocated space. ASSERT_GE(GetAllocatedFileSize(dbname_ + file_before->PathName()), preallocated_size); Reopen(options); VectorLogPtr log_files_after; ASSERT_OK(dbfull()->GetSortedWalFiles(log_files_after)); ASSERT_EQ(1, log_files_after.size()); ASSERT_LT(log_files_after[0]->SizeFileBytes(), 1 * kKB); // The preallocated space should be truncated. ASSERT_LT(GetAllocatedFileSize(dbname_ + file_before->PathName()), preallocated_size); } #endif // ROCKSDB_FALLOCATE_PRESENT #endif // ROCKSDB_PLATFORM_POSIX #endif // ROCKSDB_LITE TEST_F(DBWALTest, WalTermTest) { Options options = CurrentOptions(); options.env = env_; CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "foo", "bar")); WriteOptions wo; wo.sync = true; wo.disableWAL = false; WriteBatch batch; batch.Put("foo", "bar"); batch.MarkWalTerminationPoint(); batch.Put("foo2", "bar2"); ASSERT_OK(dbfull()->Write(wo, &batch)); // make sure we can re-open it. ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); ASSERT_EQ("bar", Get(1, "foo")); ASSERT_EQ("NOT_FOUND", Get(1, "foo2")); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_with_timestamp_basic_test.cc000066400000000000000000001253241370372246700220530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/perf_context.h" #include "rocksdb/utilities/debug.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "test_util/fault_injection_test_env.h" #if !defined(ROCKSDB_LITE) #include "test_util/sync_point.h" #endif namespace ROCKSDB_NAMESPACE { class DBBasicTestWithTimestampBase : public DBTestBase { public: explicit DBBasicTestWithTimestampBase(const std::string& dbname) : DBTestBase(dbname) {} protected: static std::string Key1(uint64_t k) { std::string ret; PutFixed64(&ret, k); std::reverse(ret.begin(), ret.end()); return ret; } class TestComparator : public Comparator { private: const Comparator* cmp_without_ts_; public: explicit TestComparator(size_t ts_sz) : Comparator(ts_sz), cmp_without_ts_(nullptr) { cmp_without_ts_ = BytewiseComparator(); } const char* Name() const override { return "TestComparator"; } void FindShortSuccessor(std::string*) const override {} void FindShortestSeparator(std::string*, const Slice&) const override {} int Compare(const Slice& a, const Slice& b) const override { int r = CompareWithoutTimestamp(a, b); if (r != 0 || 0 == timestamp_size()) { return r; } return -CompareTimestamp( Slice(a.data() + a.size() - timestamp_size(), timestamp_size()), Slice(b.data() + b.size() - timestamp_size(), timestamp_size())); } using Comparator::CompareWithoutTimestamp; int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b, bool b_has_ts) const override { if (a_has_ts) { assert(a.size() >= timestamp_size()); } if (b_has_ts) { assert(b.size() >= timestamp_size()); } Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, timestamp_size()) : a; Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, timestamp_size()) : b; return cmp_without_ts_->Compare(lhs, rhs); } int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override { if (!ts1.data() && !ts2.data()) { return 0; } else if (ts1.data() && !ts2.data()) { return 1; } else if (!ts1.data() && ts2.data()) { return -1; } assert(ts1.size() == ts2.size()); uint64_t low1 = 0; uint64_t low2 = 0; uint64_t high1 = 0; uint64_t high2 = 0; const size_t kSize = ts1.size(); std::unique_ptr ts1_buf(new char[kSize]); memcpy(ts1_buf.get(), ts1.data(), ts1.size()); std::unique_ptr ts2_buf(new char[kSize]); memcpy(ts2_buf.get(), ts2.data(), ts2.size()); Slice ts1_copy = Slice(ts1_buf.get(), kSize); Slice ts2_copy = Slice(ts2_buf.get(), kSize); auto* ptr1 = const_cast(&ts1_copy); auto* ptr2 = const_cast(&ts2_copy); if (!GetFixed64(ptr1, &low1) || !GetFixed64(ptr1, &high1) || !GetFixed64(ptr2, &low2) || !GetFixed64(ptr2, &high2)) { assert(false); } if (high1 < high2) { return -1; } else if (high1 > high2) { return 1; } if (low1 < low2) { return -1; } else if (low1 > low2) { return 1; } return 0; } }; std::string Timestamp(uint64_t low, uint64_t high) { std::string ts; PutFixed64(&ts, low); PutFixed64(&ts, high); return ts; } void CheckIterUserEntry(const Iterator* it, const Slice& expected_key, const Slice& expected_value, const Slice& expected_ts) const { ASSERT_TRUE(it->Valid()); ASSERT_OK(it->status()); ASSERT_EQ(expected_key, it->key()); ASSERT_EQ(expected_value, it->value()); ASSERT_EQ(expected_ts, it->timestamp()); } void CheckIterEntry(const Iterator* it, const Slice& expected_ukey, SequenceNumber expected_seq, ValueType expected_val_type, const Slice& expected_value, const Slice& expected_ts) { ASSERT_TRUE(it->Valid()); ASSERT_OK(it->status()); std::string ukey_and_ts; ukey_and_ts.assign(expected_ukey.data(), expected_ukey.size()); ukey_and_ts.append(expected_ts.data(), expected_ts.size()); ParsedInternalKey parsed_ikey(ukey_and_ts, expected_seq, expected_val_type); std::string ikey; AppendInternalKey(&ikey, parsed_ikey); ASSERT_EQ(Slice(ikey), it->key()); if (expected_val_type == kTypeValue) { ASSERT_EQ(expected_value, it->value()); } ASSERT_EQ(expected_ts, it->timestamp()); } }; class DBBasicTestWithTimestamp : public DBBasicTestWithTimestampBase { public: DBBasicTestWithTimestamp() : DBBasicTestWithTimestampBase("db_basic_test_with_timestamp") {} }; TEST_F(DBBasicTestWithTimestamp, SimpleForwardIterate) { const int kNumKeysPerFile = 128; const uint64_t kMaxKey = 1024; Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); DestroyAndReopen(options); const std::vector start_keys = {1, 0}; const std::vector write_timestamps = {Timestamp(1, 0), Timestamp(3, 0)}; const std::vector read_timestamps = {Timestamp(2, 0), Timestamp(4, 0)}; for (size_t i = 0; i < write_timestamps.size(); ++i) { WriteOptions write_opts; Slice write_ts = write_timestamps[i]; write_opts.timestamp = &write_ts; for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) { Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(i)); ASSERT_OK(s); } } for (size_t i = 0; i < read_timestamps.size(); ++i) { ReadOptions read_opts; Slice read_ts = read_timestamps[i]; read_opts.timestamp = &read_ts; std::unique_ptr it(db_->NewIterator(read_opts)); int count = 0; uint64_t key = 0; for (it->Seek(Key1(0)), key = start_keys[i]; it->Valid(); it->Next(), ++count, ++key) { CheckIterUserEntry(it.get(), Key1(key), "value" + std::to_string(i), write_timestamps[i]); } size_t expected_count = kMaxKey - start_keys[i] + 1; ASSERT_EQ(expected_count, count); // SeekToFirst() with lower bound. // Then iter with lower and upper bounds. uint64_t l = 0; uint64_t r = kMaxKey + 1; while (l < r) { std::string lb_str = Key1(l); Slice lb = lb_str; std::string ub_str = Key1(r); Slice ub = ub_str; read_opts.iterate_lower_bound = &lb; read_opts.iterate_upper_bound = &ub; it.reset(db_->NewIterator(read_opts)); for (it->SeekToFirst(), key = std::max(l, start_keys[i]), count = 0; it->Valid(); it->Next(), ++key, ++count) { CheckIterUserEntry(it.get(), Key1(key), "value" + std::to_string(i), write_timestamps[i]); } ASSERT_EQ(r - std::max(l, start_keys[i]), count); l += (kMaxKey / 100); r -= (kMaxKey / 100); } } Close(); } TEST_F(DBBasicTestWithTimestamp, SimpleForwardIterateLowerTsBound) { const int kNumKeysPerFile = 128; const uint64_t kMaxKey = 1024; Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); DestroyAndReopen(options); const std::vector write_timestamps = {Timestamp(1, 0), Timestamp(3, 0)}; const std::vector read_timestamps = {Timestamp(2, 0), Timestamp(4, 0)}; const std::vector read_timestamps_lb = {Timestamp(1, 0), Timestamp(1, 0)}; for (size_t i = 0; i < write_timestamps.size(); ++i) { WriteOptions write_opts; Slice write_ts = write_timestamps[i]; write_opts.timestamp = &write_ts; for (uint64_t key = 0; key <= kMaxKey; ++key) { Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(i)); ASSERT_OK(s); } } for (size_t i = 0; i < read_timestamps.size(); ++i) { ReadOptions read_opts; Slice read_ts = read_timestamps[i]; Slice read_ts_lb = read_timestamps_lb[i]; read_opts.timestamp = &read_ts; read_opts.iter_start_ts = &read_ts_lb; std::unique_ptr it(db_->NewIterator(read_opts)); int count = 0; uint64_t key = 0; for (it->Seek(Key1(0)), key = 0; it->Valid(); it->Next(), ++count, ++key) { CheckIterUserEntry(it.get(), Key1(key), "value" + std::to_string(i), write_timestamps[i]); if (i > 0) { it->Next(); CheckIterUserEntry(it.get(), Key1(key), "value" + std::to_string(i - 1), write_timestamps[i - 1]); } } size_t expected_count = kMaxKey + 1; ASSERT_EQ(expected_count, count); } Close(); } TEST_F(DBBasicTestWithTimestamp, ForwardIterateStartSeqnum) { const int kNumKeysPerFile = 128; const uint64_t kMaxKey = 0xffffffffffffffff; const uint64_t kMinKey = kMaxKey - 1023; Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; // Need to disable compaction to bottommost level when sequence number will be // zeroed out, causing the verification of sequence number to fail in this // test. options.disable_auto_compactions = true; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); DestroyAndReopen(options); std::vector start_seqs; const int kNumTimestamps = 4; std::vector write_ts_list; for (int t = 0; t != kNumTimestamps; ++t) { write_ts_list.push_back(Timestamp(2 * t, /*do not care*/ 17)); } WriteOptions write_opts; for (size_t i = 0; i != write_ts_list.size(); ++i) { Slice write_ts = write_ts_list[i]; write_opts.timestamp = &write_ts; uint64_t k = kMinKey; do { Status s = db_->Put(write_opts, Key1(k), "value" + std::to_string(i)); ASSERT_OK(s); if (k == kMaxKey) { break; } ++k; } while (k != 0); start_seqs.push_back(db_->GetLatestSequenceNumber()); } std::vector read_ts_list; for (int t = 0; t != kNumTimestamps - 1; ++t) { read_ts_list.push_back(Timestamp(2 * t + 3, /*do not care*/ 17)); } ReadOptions read_opts; for (size_t i = 0; i != read_ts_list.size(); ++i) { Slice read_ts = read_ts_list[i]; read_opts.timestamp = &read_ts; read_opts.iter_start_seqnum = start_seqs[i]; std::unique_ptr iter(db_->NewIterator(read_opts)); SequenceNumber expected_seq = start_seqs[i] + 1; uint64_t key = kMinKey; for (iter->Seek(Key1(kMinKey)); iter->Valid(); iter->Next()) { CheckIterEntry(iter.get(), Key1(key), expected_seq, kTypeValue, "value" + std::to_string(i + 1), write_ts_list[i + 1]); ++key; ++expected_seq; } } Close(); } TEST_F(DBBasicTestWithTimestamp, ReseekToTargetTimestamp) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; constexpr size_t kNumKeys = 16; options.max_sequential_skip_in_iterations = kNumKeys / 2; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; DestroyAndReopen(options); // Insert kNumKeys WriteOptions write_opts; Status s; for (size_t i = 0; i != kNumKeys; ++i) { std::string ts_str = Timestamp(static_cast(i + 1), 0); Slice ts = ts_str; write_opts.timestamp = &ts; s = db_->Put(write_opts, "foo", "value" + std::to_string(i)); ASSERT_OK(s); } { ReadOptions read_opts; std::string ts_str = Timestamp(1, 0); Slice ts = ts_str; read_opts.timestamp = &ts; std::unique_ptr iter(db_->NewIterator(read_opts)); iter->SeekToFirst(); CheckIterUserEntry(iter.get(), "foo", "value0", ts_str); ASSERT_EQ( 1, options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); } Close(); } TEST_F(DBBasicTestWithTimestamp, ReseekToNextUserKey) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; constexpr size_t kNumKeys = 16; options.max_sequential_skip_in_iterations = kNumKeys / 2; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; DestroyAndReopen(options); // Write kNumKeys + 1 keys WriteOptions write_opts; Status s; for (size_t i = 0; i != kNumKeys; ++i) { std::string ts_str = Timestamp(static_cast(i + 1), 0); Slice ts = ts_str; write_opts.timestamp = &ts; s = db_->Put(write_opts, "a", "value" + std::to_string(i)); ASSERT_OK(s); } { std::string ts_str = Timestamp(static_cast(kNumKeys + 1), 0); WriteBatch batch(0, 0, kTimestampSize); batch.Put("a", "new_value"); batch.Put("b", "new_value"); s = batch.AssignTimestamp(ts_str); ASSERT_OK(s); s = db_->Write(write_opts, &batch); ASSERT_OK(s); } { ReadOptions read_opts; std::string ts_str = Timestamp(static_cast(kNumKeys + 1), 0); Slice ts = ts_str; read_opts.timestamp = &ts; std::unique_ptr iter(db_->NewIterator(read_opts)); iter->Seek("a"); iter->Next(); CheckIterUserEntry(iter.get(), "b", "new_value", ts_str); ASSERT_EQ( 1, options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION)); } Close(); } TEST_F(DBBasicTestWithTimestamp, MaxKeysSkipped) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; DestroyAndReopen(options); constexpr size_t max_skippable_internal_keys = 2; const size_t kNumKeys = max_skippable_internal_keys + 2; WriteOptions write_opts; Status s; { std::string ts_str = Timestamp(1, 0); Slice ts = ts_str; write_opts.timestamp = &ts; ASSERT_OK(db_->Put(write_opts, "a", "value")); } for (size_t i = 0; i < kNumKeys; ++i) { std::string ts_str = Timestamp(static_cast(i + 1), 0); Slice ts = ts_str; write_opts.timestamp = &ts; s = db_->Put(write_opts, "b", "value" + std::to_string(i)); ASSERT_OK(s); } { ReadOptions read_opts; read_opts.max_skippable_internal_keys = max_skippable_internal_keys; std::string ts_str = Timestamp(1, 0); Slice ts = ts_str; read_opts.timestamp = &ts; std::unique_ptr iter(db_->NewIterator(read_opts)); iter->SeekToFirst(); iter->Next(); ASSERT_TRUE(iter->status().IsIncomplete()); } Close(); } // Create two L0, and compact them to a new L1. In this test, L1 is L_bottom. // Two L0s: // f1 f2 // ... // Since f2.smallest < f1.largest < f2.largest // f1 and f2 will be the inputs of a real compaction instead of trivial move. TEST_F(DBBasicTestWithTimestamp, CompactDeletionWithTimestampMarkerToBottom) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; options.num_levels = 2; options.level0_file_num_compaction_trigger = 2; DestroyAndReopen(options); WriteOptions write_opts; std::string ts_str = Timestamp(1, 0); Slice ts = ts_str; write_opts.timestamp = &ts; ASSERT_OK(db_->Put(write_opts, "a", "value0")); ASSERT_OK(Flush()); ts_str = Timestamp(2, 0); ts = ts_str; write_opts.timestamp = &ts; ASSERT_OK(db_->Put(write_opts, "b", "value0")); ts_str = Timestamp(3, 0); ts = ts_str; write_opts.timestamp = &ts; ASSERT_OK(db_->Delete(write_opts, "a")); ASSERT_OK(Flush()); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ReadOptions read_opts; ts_str = Timestamp(1, 0); ts = ts_str; read_opts.timestamp = &ts; std::string value; Status s = db_->Get(read_opts, "a", &value); ASSERT_OK(s); ASSERT_EQ("value0", value); ts_str = Timestamp(3, 0); ts = ts_str; read_opts.timestamp = &ts; s = db_->Get(read_opts, "a", &value); ASSERT_TRUE(s.IsNotFound()); // Time-travel to the past before deletion ts_str = Timestamp(2, 0); ts = ts_str; read_opts.timestamp = &ts; s = db_->Get(read_opts, "a", &value); ASSERT_OK(s); ASSERT_EQ("value0", value); Close(); } class DBBasicTestWithTimestampCompressionSettings : public DBBasicTestWithTimestampBase, public testing::WithParamInterface< std::tuple, CompressionType, uint32_t, uint32_t>> { public: DBBasicTestWithTimestampCompressionSettings() : DBBasicTestWithTimestampBase( "db_basic_test_with_timestamp_compression") {} }; TEST_P(DBBasicTestWithTimestampCompressionSettings, PutAndGet) { const int kNumKeysPerFile = 1024; const size_t kNumTimestamps = 4; Options options = CurrentOptions(); options.create_if_missing = true; options.env = env_; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); size_t ts_sz = Timestamp(0, 0).size(); TestComparator test_cmp(ts_sz); options.comparator = &test_cmp; BlockBasedTableOptions bbto; bbto.filter_policy = std::get<0>(GetParam()); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); const CompressionType comp_type = std::get<1>(GetParam()); #if LZ4_VERSION_NUMBER < 10400 // r124+ if (comp_type == kLZ4Compression || comp_type == kLZ4HCCompression) { return; } #endif // LZ4_VERSION_NUMBER >= 10400 if (!ZSTD_Supported() && comp_type == kZSTD) { return; } if (!Zlib_Supported() && comp_type == kZlibCompression) { return; } options.compression = comp_type; options.compression_opts.max_dict_bytes = std::get<2>(GetParam()); if (comp_type == kZSTD) { options.compression_opts.zstd_max_train_bytes = std::get<2>(GetParam()); } options.compression_opts.parallel_threads = std::get<3>(GetParam()); options.target_file_size_base = 1 << 26; // 64MB DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(2, num_cfs); std::vector write_ts_list; std::vector read_ts_list; for (size_t i = 0; i != kNumTimestamps; ++i) { write_ts_list.push_back(Timestamp(i * 2, 0)); read_ts_list.push_back(Timestamp(1 + i * 2, 0)); const Slice write_ts = write_ts_list.back(); WriteOptions wopts; wopts.timestamp = &write_ts; for (int cf = 0; cf != static_cast(num_cfs); ++cf) { for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; ++j) { ASSERT_OK(Put(cf, Key1(j), "value_" + std::to_string(j) + "_" + std::to_string(i), wopts)); } } } const auto& verify_db_func = [&]() { for (size_t i = 0; i != kNumTimestamps; ++i) { ReadOptions ropts; const Slice read_ts = read_ts_list[i]; ropts.timestamp = &read_ts; for (int cf = 0; cf != static_cast(num_cfs); ++cf) { ColumnFamilyHandle* cfh = handles_[cf]; for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; ++j) { std::string value; ASSERT_OK(db_->Get(ropts, cfh, Key1(j), &value)); ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), value); } } } }; verify_db_func(); Close(); } TEST_P(DBBasicTestWithTimestampCompressionSettings, PutDeleteGet) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; const int kNumKeysPerFile = 1024; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); BlockBasedTableOptions bbto; bbto.filter_policy = std::get<0>(GetParam()); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); const CompressionType comp_type = std::get<1>(GetParam()); #if LZ4_VERSION_NUMBER < 10400 // r124+ if (comp_type == kLZ4Compression || comp_type == kLZ4HCCompression) { return; } #endif // LZ4_VERSION_NUMBER >= 10400 if (!ZSTD_Supported() && comp_type == kZSTD) { return; } if (!Zlib_Supported() && comp_type == kZlibCompression) { return; } options.compression = comp_type; options.compression_opts.max_dict_bytes = std::get<2>(GetParam()); if (comp_type == kZSTD) { options.compression_opts.zstd_max_train_bytes = std::get<2>(GetParam()); } options.compression_opts.parallel_threads = std::get<3>(GetParam()); options.target_file_size_base = 1 << 26; // 64MB DestroyAndReopen(options); const size_t kNumL0Files = static_cast(Options().level0_file_num_compaction_trigger); { // Generate enough L0 files with ts=1 to trigger compaction to L1 std::string ts_str = Timestamp(1, 0); Slice ts = ts_str; WriteOptions wopts; wopts.timestamp = &ts; for (size_t i = 0; i != kNumL0Files; ++i) { for (int j = 0; j != kNumKeysPerFile; ++j) { ASSERT_OK(db_->Put(wopts, Key1(j), "value" + std::to_string(i))); } ASSERT_OK(db_->Flush(FlushOptions())); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Generate another L0 at ts=3 ts_str = Timestamp(3, 0); ts = ts_str; wopts.timestamp = &ts; for (int i = 0; i != kNumKeysPerFile; ++i) { std::string key_str = Key1(i); Slice key(key_str); if ((i % 3) == 0) { ASSERT_OK(db_->Delete(wopts, key)); } else { ASSERT_OK(db_->Put(wopts, key, "new_value")); } } ASSERT_OK(db_->Flush(FlushOptions())); // Populate memtable at ts=5 ts_str = Timestamp(5, 0); ts = ts_str; wopts.timestamp = &ts; for (int i = 0; i != kNumKeysPerFile; ++i) { std::string key_str = Key1(i); Slice key(key_str); if ((i % 3) == 1) { ASSERT_OK(db_->Delete(wopts, key)); } else if ((i % 3) == 2) { ASSERT_OK(db_->Put(wopts, key, "new_value_2")); } } } { std::string ts_str = Timestamp(6, 0); Slice ts = ts_str; ReadOptions ropts; ropts.timestamp = &ts; for (uint64_t i = 0; i != static_cast(kNumKeysPerFile); ++i) { std::string value; Status s = db_->Get(ropts, Key1(i), &value); if ((i % 3) == 2) { ASSERT_OK(s); ASSERT_EQ("new_value_2", value); } else { ASSERT_TRUE(s.IsNotFound()); } } } } #ifndef ROCKSDB_LITE // A class which remembers the name of each flushed file. class FlushedFileCollector : public EventListener { public: FlushedFileCollector() {} ~FlushedFileCollector() override {} void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { InstrumentedMutexLock lock(&mutex_); flushed_files_.push_back(info.file_path); } std::vector GetFlushedFiles() { std::vector result; { InstrumentedMutexLock lock(&mutex_); result = flushed_files_; } return result; } void ClearFlushedFiles() { InstrumentedMutexLock lock(&mutex_); flushed_files_.clear(); } private: std::vector flushed_files_; InstrumentedMutex mutex_; }; TEST_P(DBBasicTestWithTimestampCompressionSettings, PutAndGetWithCompaction) { const int kNumKeysPerFile = 1024; const size_t kNumTimestamps = 2; const size_t kNumKeysPerTimestamp = (kNumKeysPerFile - 1) / kNumTimestamps; const size_t kSplitPosBase = kNumKeysPerTimestamp / 2; Options options = CurrentOptions(); options.create_if_missing = true; options.env = env_; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); FlushedFileCollector* collector = new FlushedFileCollector(); options.listeners.emplace_back(collector); size_t ts_sz = Timestamp(0, 0).size(); TestComparator test_cmp(ts_sz); options.comparator = &test_cmp; BlockBasedTableOptions bbto; bbto.filter_policy = std::get<0>(GetParam()); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); const CompressionType comp_type = std::get<1>(GetParam()); #if LZ4_VERSION_NUMBER < 10400 // r124+ if (comp_type == kLZ4Compression || comp_type == kLZ4HCCompression) { return; } #endif // LZ4_VERSION_NUMBER >= 10400 if (!ZSTD_Supported() && comp_type == kZSTD) { return; } if (!Zlib_Supported() && comp_type == kZlibCompression) { return; } options.compression = comp_type; options.compression_opts.max_dict_bytes = std::get<2>(GetParam()); if (comp_type == kZSTD) { options.compression_opts.zstd_max_train_bytes = std::get<2>(GetParam()); } options.compression_opts.parallel_threads = std::get<3>(GetParam()); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(2, num_cfs); std::vector write_ts_list; std::vector read_ts_list; const auto& verify_records_func = [&](size_t i, size_t begin, size_t end, ColumnFamilyHandle* cfh) { std::string value; std::string timestamp; ReadOptions ropts; const Slice read_ts = read_ts_list[i]; ropts.timestamp = &read_ts; std::string expected_timestamp = std::string(write_ts_list[i].data(), write_ts_list[i].size()); for (size_t j = begin; j <= end; ++j) { ASSERT_OK(db_->Get(ropts, cfh, Key1(j), &value, ×tamp)); ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), value); ASSERT_EQ(expected_timestamp, timestamp); } }; for (size_t i = 0; i != kNumTimestamps; ++i) { write_ts_list.push_back(Timestamp(i * 2, 0)); read_ts_list.push_back(Timestamp(1 + i * 2, 0)); const Slice write_ts = write_ts_list.back(); WriteOptions wopts; wopts.timestamp = &write_ts; for (int cf = 0; cf != static_cast(num_cfs); ++cf) { size_t memtable_get_start = 0; for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { ASSERT_OK(Put(cf, Key1(j), "value_" + std::to_string(j) + "_" + std::to_string(i), wopts)); if (j == kSplitPosBase + i || j == kNumKeysPerTimestamp - 1) { verify_records_func(i, memtable_get_start, j, handles_[cf]); memtable_get_start = j + 1; // flush all keys with the same timestamp to two sst files, split at // incremental positions such that lowerlevel[1].smallest.userkey == // higherlevel[0].largest.userkey ASSERT_OK(Flush(cf)); // compact files (2 at each level) to a lower level such that all // keys with the same timestamp is at one level, with newer versions // at higher levels. CompactionOptions compact_opt; compact_opt.compression = kNoCompression; db_->CompactFiles(compact_opt, handles_[cf], collector->GetFlushedFiles(), static_cast(kNumTimestamps - i)); collector->ClearFlushedFiles(); } } } } const auto& verify_db_func = [&]() { for (size_t i = 0; i != kNumTimestamps; ++i) { ReadOptions ropts; const Slice read_ts = read_ts_list[i]; ropts.timestamp = &read_ts; std::string expected_timestamp(write_ts_list[i].data(), write_ts_list[i].size()); for (int cf = 0; cf != static_cast(num_cfs); ++cf) { ColumnFamilyHandle* cfh = handles_[cf]; verify_records_func(i, 0, kNumKeysPerTimestamp - 1, cfh); } } }; verify_db_func(); Close(); } TEST_F(DBBasicTestWithTimestamp, BatchWriteAndMultiGet) { const int kNumKeysPerFile = 8192; const size_t kNumTimestamps = 2; const size_t kNumKeysPerTimestamp = (kNumKeysPerFile - 1) / kNumTimestamps; Options options = CurrentOptions(); options.create_if_missing = true; options.env = env_; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); size_t ts_sz = Timestamp(0, 0).size(); TestComparator test_cmp(ts_sz); options.comparator = &test_cmp; BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy( 10 /*bits_per_key*/, false /*use_block_based_builder*/)); bbto.whole_key_filtering = true; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); size_t num_cfs = handles_.size(); ASSERT_EQ(2, num_cfs); std::vector write_ts_list; std::vector read_ts_list; const auto& verify_records_func = [&](size_t i, ColumnFamilyHandle* cfh) { std::vector keys; std::vector key_vals; std::vector values; std::vector timestamps; for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { key_vals.push_back(Key1(j)); } for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { keys.push_back(key_vals[j]); } ReadOptions ropts; const Slice read_ts = read_ts_list[i]; ropts.timestamp = &read_ts; std::string expected_timestamp(write_ts_list[i].data(), write_ts_list[i].size()); std::vector cfhs(keys.size(), cfh); std::vector statuses = db_->MultiGet(ropts, cfhs, keys, &values, ×tamps); for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { ASSERT_OK(statuses[j]); ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), values[j]); ASSERT_EQ(expected_timestamp, timestamps[j]); } }; for (size_t i = 0; i != kNumTimestamps; ++i) { write_ts_list.push_back(Timestamp(i * 2, 0)); read_ts_list.push_back(Timestamp(1 + i * 2, 0)); const Slice& write_ts = write_ts_list.back(); for (int cf = 0; cf != static_cast(num_cfs); ++cf) { WriteOptions wopts; WriteBatch batch(0, 0, ts_sz); for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { ASSERT_OK( batch.Put(handles_[cf], Key1(j), "value_" + std::to_string(j) + "_" + std::to_string(i))); } batch.AssignTimestamp(write_ts); ASSERT_OK(db_->Write(wopts, &batch)); verify_records_func(i, handles_[cf]); ASSERT_OK(Flush(cf)); } } const auto& verify_db_func = [&]() { for (size_t i = 0; i != kNumTimestamps; ++i) { ReadOptions ropts; const Slice read_ts = read_ts_list[i]; ropts.timestamp = &read_ts; for (int cf = 0; cf != static_cast(num_cfs); ++cf) { ColumnFamilyHandle* cfh = handles_[cf]; verify_records_func(i, cfh); } } }; verify_db_func(); Close(); } TEST_F(DBBasicTestWithTimestamp, MultiGetNoReturnTs) { Options options = CurrentOptions(); options.env = env_; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; DestroyAndReopen(options); WriteOptions write_opts; std::string ts_str = Timestamp(1, 0); Slice ts = ts_str; write_opts.timestamp = &ts; ASSERT_OK(db_->Put(write_opts, "foo", "value")); ASSERT_OK(db_->Put(write_opts, "bar", "value")); ASSERT_OK(db_->Put(write_opts, "fooxxxxxxxxxxxxxxxx", "value")); ASSERT_OK(db_->Put(write_opts, "barxxxxxxxxxxxxxxxx", "value")); ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily(); ts_str = Timestamp(2, 0); ts = ts_str; ReadOptions read_opts; read_opts.timestamp = &ts; { ColumnFamilyHandle* column_families[] = {cfh, cfh}; Slice keys[] = {"foo", "bar"}; PinnableSlice values[] = {PinnableSlice(), PinnableSlice()}; Status statuses[] = {Status::OK(), Status::OK()}; dbfull()->MultiGet(read_opts, /*num_keys=*/2, &column_families[0], &keys[0], &values[0], &statuses[0], /*sorted_input=*/false); for (const auto& s : statuses) { ASSERT_OK(s); } } { ColumnFamilyHandle* column_families[] = {cfh, cfh, cfh, cfh}; // Make user keys longer than configured timestamp size (16 bytes) to // verify RocksDB does not use the trailing bytes 'x' as timestamp. Slice keys[] = {"fooxxxxxxxxxxxxxxxx", "barxxxxxxxxxxxxxxxx", "foo", "bar"}; PinnableSlice values[] = {PinnableSlice(), PinnableSlice(), PinnableSlice(), PinnableSlice()}; Status statuses[] = {Status::OK(), Status::OK(), Status::OK(), Status::OK()}; dbfull()->MultiGet(read_opts, /*num_keys=*/4, &column_families[0], &keys[0], &values[0], &statuses[0], /*sorted_input=*/false); for (const auto& s : statuses) { ASSERT_OK(s); } } Close(); } #endif // !ROCKSDB_LITE INSTANTIATE_TEST_CASE_P( Timestamp, DBBasicTestWithTimestampCompressionSettings, ::testing::Combine( ::testing::Values(std::shared_ptr(nullptr), std::shared_ptr( NewBloomFilterPolicy(10, false))), ::testing::Values(kNoCompression, kZlibCompression, kLZ4Compression, kLZ4HCCompression, kZSTD), ::testing::Values(0, 1 << 14), ::testing::Values(1, 4))); class DBBasicTestWithTimestampPrefixSeek : public DBBasicTestWithTimestampBase, public testing::WithParamInterface< std::tuple, std::shared_ptr, bool>> { public: DBBasicTestWithTimestampPrefixSeek() : DBBasicTestWithTimestampBase( "/db_basic_test_with_timestamp_prefix_seek") {} }; TEST_P(DBBasicTestWithTimestampPrefixSeek, ForwardIterateWithPrefix) { const size_t kNumKeysPerFile = 128; Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; options.prefix_extractor = std::get<0>(GetParam()); options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); BlockBasedTableOptions bbto; bbto.filter_policy = std::get<1>(GetParam()); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyAndReopen(options); const uint64_t kMaxKey = 0xffffffffffffffff; const uint64_t kMinKey = 0xfffffffffffff000; const std::vector write_ts_list = {Timestamp(3, 0xffffffff), Timestamp(6, 0xffffffff)}; WriteOptions write_opts; { for (size_t i = 0; i != write_ts_list.size(); ++i) { Slice write_ts = write_ts_list[i]; write_opts.timestamp = &write_ts; for (uint64_t key = kMaxKey; key >= kMinKey; --key) { Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(i)); ASSERT_OK(s); } } } const std::vector read_ts_list = {Timestamp(5, 0xffffffff), Timestamp(9, 0xffffffff)}; { ReadOptions read_opts; read_opts.total_order_seek = false; read_opts.prefix_same_as_start = std::get<2>(GetParam()); fprintf(stdout, "%s %s %d\n", options.prefix_extractor->Name(), bbto.filter_policy ? bbto.filter_policy->Name() : "null", static_cast(read_opts.prefix_same_as_start)); for (size_t i = 0; i != read_ts_list.size(); ++i) { Slice read_ts = read_ts_list[i]; read_opts.timestamp = &read_ts; std::unique_ptr iter(db_->NewIterator(read_opts)); // Seek to kMaxKey iter->Seek(Key1(kMaxKey)); CheckIterUserEntry(iter.get(), Key1(kMaxKey), "value" + std::to_string(i), write_ts_list[i]); iter->Next(); ASSERT_FALSE(iter->Valid()); } const std::vector targets = {kMinKey, kMinKey + 0x10, kMinKey + 0x100, kMaxKey}; const SliceTransform* const pe = options.prefix_extractor.get(); ASSERT_NE(nullptr, pe); const size_t kPrefixShift = 8 * (Key1(0).size() - pe->Transform(Key1(0)).size()); const uint64_t kPrefixMask = ~((static_cast(1) << kPrefixShift) - 1); const uint64_t kNumKeysWithinPrefix = (static_cast(1) << kPrefixShift); for (size_t i = 0; i != read_ts_list.size(); ++i) { Slice read_ts = read_ts_list[i]; read_opts.timestamp = &read_ts; std::unique_ptr it(db_->NewIterator(read_opts)); for (size_t j = 0; j != targets.size(); ++j) { std::string start_key = Key1(targets[j]); uint64_t expected_ub = (targets[j] & kPrefixMask) - 1 + kNumKeysWithinPrefix; uint64_t expected_key = targets[j]; size_t count = 0; it->Seek(Key1(targets[j])); while (it->Valid()) { std::string saved_prev_key; saved_prev_key.assign(it->key().data(), it->key().size()); // Out of prefix if (!read_opts.prefix_same_as_start && pe->Transform(saved_prev_key) != pe->Transform(start_key)) { break; } CheckIterUserEntry(it.get(), Key1(expected_key), "value" + std::to_string(i), write_ts_list[i]); ++count; ++expected_key; it->Next(); } ASSERT_EQ(expected_ub - targets[j] + 1, count); } } } Close(); } // TODO(yanqin): consider handling non-fixed-length prefix extractors, e.g. // NoopTransform. INSTANTIATE_TEST_CASE_P( Timestamp, DBBasicTestWithTimestampPrefixSeek, ::testing::Combine( ::testing::Values( std::shared_ptr(NewFixedPrefixTransform(4)), std::shared_ptr(NewFixedPrefixTransform(7)), std::shared_ptr(NewFixedPrefixTransform(8))), ::testing::Values(std::shared_ptr(nullptr), std::shared_ptr( NewBloomFilterPolicy(10 /*bits_per_key*/, false)), std::shared_ptr( NewBloomFilterPolicy(20 /*bits_per_key*/, false))), ::testing::Bool())); class DBBasicTestWithTsIterTombstones : public DBBasicTestWithTimestampBase, public testing::WithParamInterface< std::tuple, std::shared_ptr, int>> { public: DBBasicTestWithTsIterTombstones() : DBBasicTestWithTimestampBase("/db_basic_ts_iter_tombstones") {} }; TEST_P(DBBasicTestWithTsIterTombstones, ForwardIterDelete) { constexpr size_t kNumKeysPerFile = 128; Options options = CurrentOptions(); options.env = env_; const size_t kTimestampSize = Timestamp(0, 0).size(); TestComparator test_cmp(kTimestampSize); options.comparator = &test_cmp; options.prefix_extractor = std::get<0>(GetParam()); options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); BlockBasedTableOptions bbto; bbto.filter_policy = std::get<1>(GetParam()); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.num_levels = std::get<2>(GetParam()); DestroyAndReopen(options); std::vector write_ts_strs = {Timestamp(2, 0), Timestamp(4, 0)}; constexpr uint64_t kMaxKey = 0xffffffffffffffff; constexpr uint64_t kMinKey = 0xfffffffffffff000; // Insert kMinKey...kMaxKey uint64_t key = kMinKey; WriteOptions write_opts; Slice ts = write_ts_strs[0]; write_opts.timestamp = &ts; do { Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key)); ASSERT_OK(s); if (kMaxKey == key) { break; } ++key; } while (true); // Delete them all ts = write_ts_strs[1]; write_opts.timestamp = &ts; for (key = kMaxKey; key >= kMinKey; --key) { Status s; if (0 != (key % 2)) { s = db_->Put(write_opts, Key1(key), "value1" + std::to_string(key)); } else { s = db_->Delete(write_opts, Key1(key)); } ASSERT_OK(s); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); { std::string read_ts = Timestamp(4, 0); ts = read_ts; ReadOptions read_opts; read_opts.total_order_seek = true; read_opts.timestamp = &ts; std::unique_ptr iter(db_->NewIterator(read_opts)); size_t count = 0; key = kMinKey + 1; for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++count, key += 2) { ASSERT_EQ(Key1(key), iter->key()); ASSERT_EQ("value1" + std::to_string(key), iter->value()); } ASSERT_EQ((kMaxKey - kMinKey + 1) / 2, count); } Close(); } INSTANTIATE_TEST_CASE_P( Timestamp, DBBasicTestWithTsIterTombstones, ::testing::Combine( ::testing::Values( std::shared_ptr(NewFixedPrefixTransform(7)), std::shared_ptr(NewFixedPrefixTransform(8))), ::testing::Values(std::shared_ptr(nullptr), std::shared_ptr( NewBloomFilterPolicy(10, false)), std::shared_ptr( NewBloomFilterPolicy(20, false))), ::testing::Values(2, 6))); } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_with_timestamp_compaction_test.cc000066400000000000000000000100421370372246700231140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/compaction/compaction.h" #include "db/db_test_util.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { namespace { std::string Key1(uint64_t key) { std::string ret; PutFixed64(&ret, key); std::reverse(ret.begin(), ret.end()); return ret; } std::string Timestamp(uint64_t ts) { std::string ret; PutFixed64(&ret, ts); return ret; } } // anonymous namespace class TimestampCompatibleCompactionTest : public DBTestBase { public: TimestampCompatibleCompactionTest() : DBTestBase("/ts_compatible_compaction_test") {} std::string Get(const std::string& key, uint64_t ts) { ReadOptions read_opts; std::string ts_str = Timestamp(ts); Slice ts_slice = ts_str; read_opts.timestamp = &ts_slice; std::string value; Status s = db_->Get(read_opts, key, &value); if (s.IsNotFound()) { value.assign("NOT_FOUND"); } else if (!s.ok()) { value.assign(s.ToString()); } return value; } }; TEST_F(TimestampCompatibleCompactionTest, UserKeyCrossFileBoundary) { Options options = CurrentOptions(); options.env = env_; options.compaction_style = kCompactionStyleLevel; options.comparator = test::ComparatorWithU64Ts(); options.level0_file_num_compaction_trigger = 3; constexpr size_t kNumKeysPerFile = 101; options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); DestroyAndReopen(options); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { const auto* compaction = reinterpret_cast(arg); ASSERT_NE(nullptr, compaction); ASSERT_EQ(0, compaction->start_level()); ASSERT_EQ(1, compaction->num_input_levels()); // Check that all 3 L0 ssts are picked for level compaction. ASSERT_EQ(3, compaction->num_input_files(0)); }); SyncPoint::GetInstance()->EnableProcessing(); // Write a L0 with keys 0, 1, ..., 99 with ts from 100 to 199. uint64_t ts = 100; uint64_t key = 0; WriteOptions write_opts; for (; key < kNumKeysPerFile - 1; ++key, ++ts) { std::string ts_str = Timestamp(ts); Slice ts_slice = ts_str; write_opts.timestamp = &ts_slice; ASSERT_OK(db_->Put(write_opts, Key1(key), "foo_" + std::to_string(key))); } // Write another L0 with keys 99 with newer ts. ASSERT_OK(Flush()); uint64_t saved_read_ts1 = ts++; key = 99; for (int i = 0; i < 4; ++i, ++ts) { std::string ts_str = Timestamp(ts); Slice ts_slice = ts_str; write_opts.timestamp = &ts_slice; ASSERT_OK(db_->Put(write_opts, Key1(key), "bar_" + std::to_string(key))); } ASSERT_OK(Flush()); uint64_t saved_read_ts2 = ts++; // Write another L0 with keys 99, 100, 101, ..., 150 for (; key <= 150; ++key, ++ts) { std::string ts_str = Timestamp(ts); Slice ts_slice = ts_str; write_opts.timestamp = &ts_slice; ASSERT_OK(db_->Put(write_opts, Key1(key), "foo1_" + std::to_string(key))); } ASSERT_OK(Flush()); // Wait for compaction to finish ASSERT_OK(dbfull()->TEST_WaitForCompact()); uint64_t read_ts = ts; ASSERT_EQ("foo_99", Get(Key1(99), saved_read_ts1)); ASSERT_EQ("bar_99", Get(Key1(99), saved_read_ts2)); ASSERT_EQ("foo1_99", Get(Key1(99), read_ts)); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/db_write_test.cc000066400000000000000000000271441370372246700170070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include #include "db/db_test_util.h" #include "db/write_batch_internal.h" #include "db/write_thread.h" #include "port/port.h" #include "port/stack_trace.h" #include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // Test variations of WriteImpl. class DBWriteTest : public DBTestBase, public testing::WithParamInterface { public: DBWriteTest() : DBTestBase("/db_write_test") {} Options GetOptions() { return DBTestBase::GetOptions(GetParam()); } void Open() { DBTestBase::Reopen(GetOptions()); } }; // It is invalid to do sync write while disabling WAL. TEST_P(DBWriteTest, SyncAndDisableWAL) { WriteOptions write_options; write_options.sync = true; write_options.disableWAL = true; ASSERT_TRUE(dbfull()->Put(write_options, "foo", "bar").IsInvalidArgument()); WriteBatch batch; ASSERT_OK(batch.Put("foo", "bar")); ASSERT_TRUE(dbfull()->Write(write_options, &batch).IsInvalidArgument()); } TEST_P(DBWriteTest, WriteThreadHangOnWriteStall) { Options options = GetOptions(); options.level0_stop_writes_trigger = options.level0_slowdown_writes_trigger = 4; std::vector threads; std::atomic thread_num(0); port::Mutex mutex; port::CondVar cv(&mutex); Reopen(options); std::function write_slowdown_func = [&]() { int a = thread_num.fetch_add(1); std::string key = "foo" + std::to_string(a); WriteOptions wo; wo.no_slowdown = false; dbfull()->Put(wo, key, "bar"); }; std::function write_no_slowdown_func = [&]() { int a = thread_num.fetch_add(1); std::string key = "foo" + std::to_string(a); WriteOptions wo; wo.no_slowdown = true; dbfull()->Put(wo, key, "bar"); }; std::function unblock_main_thread_func = [&](void *) { mutex.Lock(); cv.SignalAll(); mutex.Unlock(); }; // Create 3 L0 files and schedule 4th without waiting Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); Flush(); Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); Flush(); Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); Flush(); Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WriteThread::JoinBatchGroup:Start", unblock_main_thread_func); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBWriteTest::WriteThreadHangOnWriteStall:1", "DBImpl::BackgroundCallFlush:start"}, {"DBWriteTest::WriteThreadHangOnWriteStall:2", "DBImpl::WriteImpl:BeforeLeaderEnters"}, // Make compaction start wait for the write stall to be detected and // implemented by a write group leader {"DBWriteTest::WriteThreadHangOnWriteStall:3", "BackgroundCallCompaction:0"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Schedule creation of 4th L0 file without waiting. This will seal the // memtable and then wait for a sync point before writing the file. We need // to do it this way because SwitchMemtable() needs to enter the // write_thread FlushOptions fopt; fopt.wait = false; dbfull()->Flush(fopt); // Create a mix of slowdown/no_slowdown write threads mutex.Lock(); // First leader threads.emplace_back(write_slowdown_func); cv.Wait(); // Second leader. Will stall writes threads.emplace_back(write_slowdown_func); cv.Wait(); threads.emplace_back(write_no_slowdown_func); cv.Wait(); threads.emplace_back(write_slowdown_func); cv.Wait(); threads.emplace_back(write_no_slowdown_func); cv.Wait(); threads.emplace_back(write_slowdown_func); cv.Wait(); mutex.Unlock(); TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:1"); dbfull()->TEST_WaitForFlushMemTable(nullptr); // This would have triggered a write stall. Unblock the write group leader TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:2"); // The leader is going to create missing newer links. When the leader finishes, // the next leader is going to delay writes and fail writers with no_slowdown TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:3"); for (auto& t : threads) { t.join(); } } TEST_P(DBWriteTest, IOErrorOnWALWritePropagateToWriteThreadFollower) { constexpr int kNumThreads = 5; std::unique_ptr mock_env( new FaultInjectionTestEnv(Env::Default())); Options options = GetOptions(); options.env = mock_env.get(); Reopen(options); std::atomic ready_count{0}; std::atomic leader_count{0}; std::vector threads; mock_env->SetFilesystemActive(false); // Wait until all threads linked to write threads, to make sure // all threads join the same batch group. SyncPoint::GetInstance()->SetCallBack( "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { ready_count++; auto* w = reinterpret_cast(arg); if (w->state == WriteThread::STATE_GROUP_LEADER) { leader_count++; while (ready_count < kNumThreads) { // busy waiting } } }); SyncPoint::GetInstance()->EnableProcessing(); for (int i = 0; i < kNumThreads; i++) { threads.push_back(port::Thread( [&](int index) { // All threads should fail. auto res = Put("key" + ToString(index), "value"); if (options.manual_wal_flush) { ASSERT_TRUE(res.ok()); // we should see fs error when we do the flush // TSAN reports a false alarm for lock-order-inversion but Open and // FlushWAL are not run concurrently. Disabling this until TSAN is // fixed. // res = dbfull()->FlushWAL(false); // ASSERT_FALSE(res.ok()); } else { ASSERT_FALSE(res.ok()); } }, i)); } for (int i = 0; i < kNumThreads; i++) { threads[i].join(); } ASSERT_EQ(1, leader_count); // Close before mock_env destruct. Close(); } TEST_P(DBWriteTest, ManualWalFlushInEffect) { Options options = GetOptions(); Reopen(options); // try the 1st WAL created during open ASSERT_TRUE(Put("key" + ToString(0), "value").ok()); ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); ASSERT_TRUE(dbfull()->FlushWAL(false).ok()); ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty()); // try the 2nd wal created during SwitchWAL dbfull()->TEST_SwitchWAL(); ASSERT_TRUE(Put("key" + ToString(0), "value").ok()); ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); ASSERT_TRUE(dbfull()->FlushWAL(false).ok()); ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty()); } TEST_P(DBWriteTest, IOErrorOnWALWriteTriggersReadOnlyMode) { std::unique_ptr mock_env( new FaultInjectionTestEnv(Env::Default())); Options options = GetOptions(); options.env = mock_env.get(); Reopen(options); for (int i = 0; i < 2; i++) { // Forcibly fail WAL write for the first Put only. Subsequent Puts should // fail due to read-only mode mock_env->SetFilesystemActive(i != 0); auto res = Put("key" + ToString(i), "value"); // TSAN reports a false alarm for lock-order-inversion but Open and // FlushWAL are not run concurrently. Disabling this until TSAN is // fixed. /* if (options.manual_wal_flush && i == 0) { // even with manual_wal_flush the 2nd Put should return error because of // the read-only mode ASSERT_TRUE(res.ok()); // we should see fs error when we do the flush res = dbfull()->FlushWAL(false); } */ if (!options.manual_wal_flush) { ASSERT_FALSE(res.ok()); } } // Close before mock_env destruct. Close(); } TEST_P(DBWriteTest, IOErrorOnSwitchMemtable) { Random rnd(301); std::unique_ptr mock_env( new FaultInjectionTestEnv(Env::Default())); Options options = GetOptions(); options.env = mock_env.get(); options.writable_file_max_buffer_size = 4 * 1024 * 1024; options.write_buffer_size = 3 * 512 * 1024; options.wal_bytes_per_sync = 256 * 1024; options.manual_wal_flush = true; Reopen(options); mock_env->SetFilesystemActive(false, Status::IOError("Not active")); Status s; for (int i = 0; i < 4 * 512; ++i) { s = Put(Key(i), RandomString(&rnd, 1024)); if (!s.ok()) { break; } } ASSERT_EQ(s.severity(), Status::Severity::kFatalError); mock_env->SetFilesystemActive(true); // Close before mock_env destruct. Close(); } // Test that db->LockWAL() flushes the WAL after locking. TEST_P(DBWriteTest, LockWalInEffect) { Options options = GetOptions(); Reopen(options); // try the 1st WAL created during open ASSERT_OK(Put("key" + ToString(0), "value")); ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); ASSERT_OK(dbfull()->LockWAL()); ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false)); ASSERT_OK(dbfull()->UnlockWAL()); // try the 2nd wal created during SwitchWAL dbfull()->TEST_SwitchWAL(); ASSERT_OK(Put("key" + ToString(0), "value")); ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); ASSERT_OK(dbfull()->LockWAL()); ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false)); ASSERT_OK(dbfull()->UnlockWAL()); } TEST_P(DBWriteTest, ConcurrentlyDisabledWAL) { Options options = GetOptions(); options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.statistics->set_stats_level(StatsLevel::kAll); Reopen(options); std::string wal_key_prefix = "WAL_KEY_"; std::string no_wal_key_prefix = "K_"; // 100 KB value each for NO-WAL operation std::string no_wal_value(1024 * 100, 'X'); // 1B value each for WAL operation std::string wal_value = "0"; std::thread threads[10]; for (int t = 0; t < 10; t++) { threads[t] = std::thread([t, wal_key_prefix, wal_value, no_wal_key_prefix, no_wal_value, this] { for(int i = 0; i < 10; i++) { ROCKSDB_NAMESPACE::WriteOptions write_option_disable; write_option_disable.disableWAL = true; ROCKSDB_NAMESPACE::WriteOptions write_option_default; std::string no_wal_key = no_wal_key_prefix + std::to_string(t) + "_" + std::to_string(i); this->Put(no_wal_key, no_wal_value, write_option_disable); std::string wal_key = wal_key_prefix + std::to_string(i) + "_" + std::to_string(i); this->Put(wal_key, wal_value, write_option_default); dbfull()->SyncWAL(); } return 0; }); } for (auto& t: threads) { t.join(); } uint64_t bytes_num = options.statistics->getTickerCount( ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES); // written WAL size should less than 100KB (even included HEADER & FOOTER overhead) ASSERT_LE(bytes_num, 1024 * 100); } INSTANTIATE_TEST_CASE_P(DBWriteTestInstance, DBWriteTest, testing::Values(DBTestBase::kDefault, DBTestBase::kConcurrentWALWrites, DBTestBase::kPipelinedWrite)); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/dbformat.cc000066400000000000000000000154151370372246700157450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/dbformat.h" #include #include #include "monitoring/perf_context_imp.h" #include "port/port.h" #include "util/coding.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // kValueTypeForSeek defines the ValueType that should be passed when // constructing a ParsedInternalKey object for seeking to a particular // sequence number (since we sort sequence numbers in decreasing order // and the value type is embedded as the low 8 bits in the sequence // number in internal keys, we need to use the highest-numbered // ValueType, not the lowest). const ValueType kValueTypeForSeek = kTypeDeletionWithTimestamp; const ValueType kValueTypeForSeekForPrev = kTypeDeletion; uint64_t PackSequenceAndType(uint64_t seq, ValueType t) { assert(seq <= kMaxSequenceNumber); assert(IsExtendedValueType(t)); return (seq << 8) | t; } EntryType GetEntryType(ValueType value_type) { switch (value_type) { case kTypeValue: return kEntryPut; case kTypeDeletion: return kEntryDelete; case kTypeSingleDeletion: return kEntrySingleDelete; case kTypeMerge: return kEntryMerge; case kTypeRangeDeletion: return kEntryRangeDeletion; case kTypeBlobIndex: return kEntryBlobIndex; default: return kEntryOther; } } bool ParseFullKey(const Slice& internal_key, FullKey* fkey) { ParsedInternalKey ikey; if (!ParseInternalKey(internal_key, &ikey)) { return false; } fkey->user_key = ikey.user_key; fkey->sequence = ikey.sequence; fkey->type = GetEntryType(ikey.type); return true; } void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t) { *seq = packed >> 8; *t = static_cast(packed & 0xff); assert(*seq <= kMaxSequenceNumber); assert(IsExtendedValueType(*t)); } void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { result->append(key.user_key.data(), key.user_key.size()); PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); } void AppendInternalKeyWithDifferentTimestamp(std::string* result, const ParsedInternalKey& key, const Slice& ts) { assert(key.user_key.size() >= ts.size()); result->append(key.user_key.data(), key.user_key.size() - ts.size()); result->append(ts.data(), ts.size()); PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); } void AppendInternalKeyFooter(std::string* result, SequenceNumber s, ValueType t) { PutFixed64(result, PackSequenceAndType(s, t)); } std::string ParsedInternalKey::DebugString(bool hex) const { char buf[50]; snprintf(buf, sizeof(buf), "' seq:%" PRIu64 ", type:%d", sequence, static_cast(type)); std::string result = "'"; result += user_key.ToString(hex); result += buf; return result; } std::string InternalKey::DebugString(bool hex) const { std::string result; ParsedInternalKey parsed; if (ParseInternalKey(rep_, &parsed)) { result = parsed.DebugString(hex); } else { result = "(bad)"; result.append(EscapeString(rep_)); } return result; } const char* InternalKeyComparator::Name() const { return name_.c_str(); } int InternalKeyComparator::Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const { // Order by: // increasing user key (according to user-supplied comparator) // decreasing sequence number // decreasing type (though sequence# should be enough to disambiguate) int r = user_comparator_.Compare(a.user_key, b.user_key); if (r == 0) { if (a.sequence > b.sequence) { r = -1; } else if (a.sequence < b.sequence) { r = +1; } else if (a.type > b.type) { r = -1; } else if (a.type < b.type) { r = +1; } } return r; } void InternalKeyComparator::FindShortestSeparator(std::string* start, const Slice& limit) const { // Attempt to shorten the user portion of the key Slice user_start = ExtractUserKey(*start); Slice user_limit = ExtractUserKey(limit); std::string tmp(user_start.data(), user_start.size()); user_comparator_.FindShortestSeparator(&tmp, user_limit); if (tmp.size() <= user_start.size() && user_comparator_.Compare(user_start, tmp) < 0) { // User key has become shorter physically, but larger logically. // Tack on the earliest possible number to the shortened user key. PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); assert(this->Compare(*start, tmp) < 0); assert(this->Compare(tmp, limit) < 0); start->swap(tmp); } } void InternalKeyComparator::FindShortSuccessor(std::string* key) const { Slice user_key = ExtractUserKey(*key); std::string tmp(user_key.data(), user_key.size()); user_comparator_.FindShortSuccessor(&tmp); if (tmp.size() <= user_key.size() && user_comparator_.Compare(user_key, tmp) < 0) { // User key has become shorter physically, but larger logically. // Tack on the earliest possible number to the shortened user key. PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); assert(this->Compare(*key, tmp) < 0); key->swap(tmp); } } LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s, const Slice* ts) { size_t usize = _user_key.size(); size_t ts_sz = (nullptr == ts) ? 0 : ts->size(); size_t needed = usize + ts_sz + 13; // A conservative estimate char* dst; if (needed <= sizeof(space_)) { dst = space_; } else { dst = new char[needed]; } start_ = dst; // NOTE: We don't support users keys of more than 2GB :) dst = EncodeVarint32(dst, static_cast(usize + ts_sz + 8)); kstart_ = dst; memcpy(dst, _user_key.data(), usize); dst += usize; if (nullptr != ts) { memcpy(dst, ts->data(), ts_sz); dst += ts_sz; } EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); dst += 8; end_ = dst; } void IterKey::EnlargeBuffer(size_t key_size) { // If size is smaller than buffer size, continue using current buffer, // or the static allocated one, as default assert(key_size > buf_size_); // Need to enlarge the buffer. ResetBuffer(); buf_ = new char[key_size]; buf_size_ = key_size; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/dbformat.h000066400000000000000000000570421370372246700156110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "db/lookup_key.h" #include "db/merge_context.h" #include "logging/logging.h" #include "monitoring/perf_context_imp.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/filter_policy.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "rocksdb/types.h" #include "util/coding.h" #include "util/user_comparator_wrapper.h" namespace ROCKSDB_NAMESPACE { // The file declares data structures and functions that deal with internal // keys. // Each internal key contains a user key, a sequence number (SequenceNumber) // and a type (ValueType), and they are usually encoded together. // There are some related helper classes here. class InternalKey; // Value types encoded as the last component of internal keys. // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk // data structures. // The highest bit of the value type needs to be reserved to SST tables // for them to do more flexible encoding. enum ValueType : unsigned char { kTypeDeletion = 0x0, kTypeValue = 0x1, kTypeMerge = 0x2, kTypeLogData = 0x3, // WAL only. kTypeColumnFamilyDeletion = 0x4, // WAL only. kTypeColumnFamilyValue = 0x5, // WAL only. kTypeColumnFamilyMerge = 0x6, // WAL only. kTypeSingleDeletion = 0x7, kTypeColumnFamilySingleDeletion = 0x8, // WAL only. kTypeBeginPrepareXID = 0x9, // WAL only. kTypeEndPrepareXID = 0xA, // WAL only. kTypeCommitXID = 0xB, // WAL only. kTypeRollbackXID = 0xC, // WAL only. kTypeNoop = 0xD, // WAL only. kTypeColumnFamilyRangeDeletion = 0xE, // WAL only. kTypeRangeDeletion = 0xF, // meta block kTypeColumnFamilyBlobIndex = 0x10, // Blob DB only kTypeBlobIndex = 0x11, // Blob DB only // When the prepared record is also persisted in db, we use a different // record. This is to ensure that the WAL that is generated by a WritePolicy // is not mistakenly read by another, which would result into data // inconsistency. kTypeBeginPersistedPrepareXID = 0x12, // WAL only. // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL // generated by WriteUnprepared write policy is not mistakenly read by // another. kTypeBeginUnprepareXID = 0x13, // WAL only. kTypeDeletionWithTimestamp = 0x14, kMaxValue = 0x7F // Not used for storing records. }; // Defined in dbformat.cc extern const ValueType kValueTypeForSeek; extern const ValueType kValueTypeForSeekForPrev; // Checks whether a type is an inline value type // (i.e. a type used in memtable skiplist and sst file datablock). inline bool IsValueType(ValueType t) { return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex || kTypeDeletionWithTimestamp == t; } // Checks whether a type is from user operation // kTypeRangeDeletion is in meta block so this API is separated from above inline bool IsExtendedValueType(ValueType t) { return IsValueType(t) || t == kTypeRangeDeletion; } // We leave eight bits empty at the bottom so a type and sequence# // can be packed together into 64-bits. static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1); static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64; // The data structure that represents an internal key in the way that user_key, // sequence number and type are stored in separated forms. struct ParsedInternalKey { Slice user_key; SequenceNumber sequence; ValueType type; ParsedInternalKey() : sequence(kMaxSequenceNumber) // Make code analyzer happy {} // Intentionally left uninitialized (for speed) // u contains timestamp if user timestamp feature is enabled. ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t) : user_key(u), sequence(seq), type(t) {} std::string DebugString(bool hex = false) const; void clear() { user_key.clear(); sequence = 0; type = kTypeDeletion; } }; // Return the length of the encoding of "key". inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { return key.user_key.size() + 8; } // Pack a sequence number and a ValueType into a uint64_t extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t); // Given the result of PackSequenceAndType, store the sequence number in *seq // and the ValueType in *t. extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t); EntryType GetEntryType(ValueType value_type); // Append the serialization of "key" to *result. extern void AppendInternalKey(std::string* result, const ParsedInternalKey& key); // Append the serialization of "key" to *result, replacing the original // timestamp with argument ts. extern void AppendInternalKeyWithDifferentTimestamp( std::string* result, const ParsedInternalKey& key, const Slice& ts); // Serialized internal key consists of user key followed by footer. // This function appends the footer to *result, assuming that *result already // contains the user key at the end. extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s, ValueType t); // Attempt to parse an internal key from "internal_key". On success, // stores the parsed data in "*result", and returns true. // // On error, returns false, leaves "*result" in an undefined state. extern bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result); // Returns the user key portion of an internal key. inline Slice ExtractUserKey(const Slice& internal_key) { assert(internal_key.size() >= 8); return Slice(internal_key.data(), internal_key.size() - 8); } inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key, size_t ts_sz) { assert(internal_key.size() >= 8 + ts_sz); return Slice(internal_key.data(), internal_key.size() - 8 - ts_sz); } inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) { assert(user_key.size() >= ts_sz); return Slice(user_key.data(), user_key.size() - ts_sz); } inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) { assert(user_key.size() >= ts_sz); return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz); } inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) { assert(internal_key.size() >= 8); const size_t n = internal_key.size(); return DecodeFixed64(internal_key.data() + n - 8); } inline ValueType ExtractValueType(const Slice& internal_key) { uint64_t num = ExtractInternalKeyFooter(internal_key); unsigned char c = num & 0xff; return static_cast(c); } // A comparator for internal keys that uses a specified comparator for // the user key portion and breaks ties by decreasing sequence number. class InternalKeyComparator #ifdef NDEBUG final #endif : public Comparator { private: UserComparatorWrapper user_comparator_; std::string name_; public: explicit InternalKeyComparator(const Comparator* c) : Comparator(c->timestamp_size()), user_comparator_(c), name_("rocksdb.InternalKeyComparator:" + std::string(user_comparator_.Name())) {} virtual ~InternalKeyComparator() {} virtual const char* Name() const override; virtual int Compare(const Slice& a, const Slice& b) const override; // Same as Compare except that it excludes the value type from comparison virtual int CompareKeySeq(const Slice& a, const Slice& b) const; virtual void FindShortestSeparator(std::string* start, const Slice& limit) const override; virtual void FindShortSuccessor(std::string* key) const override; const Comparator* user_comparator() const { return user_comparator_.user_comparator(); } int Compare(const InternalKey& a, const InternalKey& b) const; int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const; virtual const Comparator* GetRootComparator() const override { return user_comparator_.GetRootComparator(); } }; // The class represent the internal key in encoded form. class InternalKey { private: std::string rep_; public: InternalKey() {} // Leave rep_ as empty to indicate it is invalid InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) { AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t)); } // sets the internal key to be bigger or equal to all internal keys with this // user key void SetMaxPossibleForUserKey(const Slice& _user_key) { AppendInternalKey( &rep_, ParsedInternalKey(_user_key, 0, static_cast(0))); } // sets the internal key to be smaller or equal to all internal keys with this // user key void SetMinPossibleForUserKey(const Slice& _user_key) { AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber, kValueTypeForSeek)); } bool Valid() const { ParsedInternalKey parsed; return ParseInternalKey(Slice(rep_), &parsed); } void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); } Slice Encode() const { assert(!rep_.empty()); return rep_; } Slice user_key() const { return ExtractUserKey(rep_); } size_t size() { return rep_.size(); } void Set(const Slice& _user_key, SequenceNumber s, ValueType t) { SetFrom(ParsedInternalKey(_user_key, s, t)); } void SetFrom(const ParsedInternalKey& p) { rep_.clear(); AppendInternalKey(&rep_, p); } void Clear() { rep_.clear(); } // The underlying representation. // Intended only to be used together with ConvertFromUserKey(). std::string* rep() { return &rep_; } // Assuming that *rep() contains a user key, this method makes internal key // out of it in-place. This saves a memcpy compared to Set()/SetFrom(). void ConvertFromUserKey(SequenceNumber s, ValueType t) { AppendInternalKeyFooter(&rep_, s, t); } std::string DebugString(bool hex = false) const; }; inline int InternalKeyComparator::Compare(const InternalKey& a, const InternalKey& b) const { return Compare(a.Encode(), b.Encode()); } inline bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result) { const size_t n = internal_key.size(); if (n < 8) return false; uint64_t num = DecodeFixed64(internal_key.data() + n - 8); unsigned char c = num & 0xff; result->sequence = num >> 8; result->type = static_cast(c); assert(result->type <= ValueType::kMaxValue); result->user_key = Slice(internal_key.data(), n - 8); return IsExtendedValueType(result->type); } // Update the sequence number in the internal key. // Guarantees not to invalidate ikey.data(). inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) { size_t ikey_sz = ikey->size(); assert(ikey_sz >= 8); uint64_t newval = (seq << 8) | t; // Note: Since C++11, strings are guaranteed to be stored contiguously and // string::operator[]() is guaranteed not to change ikey.data(). EncodeFixed64(&(*ikey)[ikey_sz - 8], newval); } // Get the sequence number from the internal key inline uint64_t GetInternalKeySeqno(const Slice& internal_key) { const size_t n = internal_key.size(); assert(n >= 8); uint64_t num = DecodeFixed64(internal_key.data() + n - 8); return num >> 8; } // The class to store keys in an efficient way. It allows: // 1. Users can either copy the key into it, or have it point to an unowned // address. // 2. For copied key, a short inline buffer is kept to reduce memory // allocation for smaller keys. // 3. It tracks user key or internal key, and allow conversion between them. class IterKey { public: IterKey() : buf_(space_), key_(buf_), key_size_(0), buf_size_(sizeof(space_)), is_user_key_(true) {} // No copying allowed IterKey(const IterKey&) = delete; void operator=(const IterKey&) = delete; ~IterKey() { ResetBuffer(); } // The bool will be picked up by the next calls to SetKey void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; } // Returns the key in whichever format that was provided to KeyIter Slice GetKey() const { return Slice(key_, key_size_); } Slice GetInternalKey() const { assert(!IsUserKey()); return Slice(key_, key_size_); } Slice GetUserKey() const { if (IsUserKey()) { return Slice(key_, key_size_); } else { assert(key_size_ >= 8); return Slice(key_, key_size_ - 8); } } size_t Size() const { return key_size_; } void Clear() { key_size_ = 0; } // Append "non_shared_data" to its back, from "shared_len" // This function is used in Block::Iter::ParseNextKey // shared_len: bytes in [0, shard_len-1] would be remained // non_shared_data: data to be append, its length must be >= non_shared_len void TrimAppend(const size_t shared_len, const char* non_shared_data, const size_t non_shared_len) { assert(shared_len <= key_size_); size_t total_size = shared_len + non_shared_len; if (IsKeyPinned() /* key is not in buf_ */) { // Copy the key from external memory to buf_ (copy shared_len bytes) EnlargeBufferIfNeeded(total_size); memcpy(buf_, key_, shared_len); } else if (total_size > buf_size_) { // Need to allocate space, delete previous space char* p = new char[total_size]; memcpy(p, key_, shared_len); if (buf_ != space_) { delete[] buf_; } buf_ = p; buf_size_ = total_size; } memcpy(buf_ + shared_len, non_shared_data, non_shared_len); key_ = buf_; key_size_ = total_size; } Slice SetKey(const Slice& key, bool copy = true) { // is_user_key_ expected to be set already via SetIsUserKey return SetKeyImpl(key, copy); } Slice SetUserKey(const Slice& key, bool copy = true) { is_user_key_ = true; return SetKeyImpl(key, copy); } Slice SetInternalKey(const Slice& key, bool copy = true) { is_user_key_ = false; return SetKeyImpl(key, copy); } // Copies the content of key, updates the reference to the user key in ikey // and returns a Slice referencing the new copy. Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) { size_t key_n = key.size(); assert(key_n >= 8); SetInternalKey(key); ikey->user_key = Slice(key_, key_n - 8); return Slice(key_, key_n); } // Copy the key into IterKey own buf_ void OwnKey() { assert(IsKeyPinned() == true); Reserve(key_size_); memcpy(buf_, key_, key_size_); key_ = buf_; } // Update the sequence number in the internal key. Guarantees not to // invalidate slices to the key (and the user key). void UpdateInternalKey(uint64_t seq, ValueType t) { assert(!IsKeyPinned()); assert(key_size_ >= 8); uint64_t newval = (seq << 8) | t; EncodeFixed64(&buf_[key_size_ - 8], newval); } bool IsKeyPinned() const { return (key_ != buf_); } void SetInternalKey(const Slice& key_prefix, const Slice& user_key, SequenceNumber s, ValueType value_type = kValueTypeForSeek, const Slice* ts = nullptr) { size_t psize = key_prefix.size(); size_t usize = user_key.size(); size_t ts_sz = (ts != nullptr ? ts->size() : 0); EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t) + ts_sz); if (psize > 0) { memcpy(buf_, key_prefix.data(), psize); } memcpy(buf_ + psize, user_key.data(), usize); if (ts) { memcpy(buf_ + psize + usize, ts->data(), ts_sz); } EncodeFixed64(buf_ + usize + psize + ts_sz, PackSequenceAndType(s, value_type)); key_ = buf_; key_size_ = psize + usize + sizeof(uint64_t) + ts_sz; is_user_key_ = false; } void SetInternalKey(const Slice& user_key, SequenceNumber s, ValueType value_type = kValueTypeForSeek, const Slice* ts = nullptr) { SetInternalKey(Slice(), user_key, s, value_type, ts); } void Reserve(size_t size) { EnlargeBufferIfNeeded(size); key_size_ = size; } void SetInternalKey(const ParsedInternalKey& parsed_key) { SetInternalKey(Slice(), parsed_key); } void SetInternalKey(const Slice& key_prefix, const ParsedInternalKey& parsed_key_suffix) { SetInternalKey(key_prefix, parsed_key_suffix.user_key, parsed_key_suffix.sequence, parsed_key_suffix.type); } void EncodeLengthPrefixedKey(const Slice& key) { auto size = key.size(); EnlargeBufferIfNeeded(size + static_cast(VarintLength(size))); char* ptr = EncodeVarint32(buf_, static_cast(size)); memcpy(ptr, key.data(), size); key_ = buf_; is_user_key_ = true; } bool IsUserKey() const { return is_user_key_; } private: char* buf_; const char* key_; size_t key_size_; size_t buf_size_; char space_[32]; // Avoid allocation for short keys bool is_user_key_; Slice SetKeyImpl(const Slice& key, bool copy) { size_t size = key.size(); if (copy) { // Copy key to buf_ EnlargeBufferIfNeeded(size); memcpy(buf_, key.data(), size); key_ = buf_; } else { // Update key_ to point to external memory key_ = key.data(); } key_size_ = size; return Slice(key_, key_size_); } void ResetBuffer() { if (buf_ != space_) { delete[] buf_; buf_ = space_; } buf_size_ = sizeof(space_); key_size_ = 0; } // Enlarge the buffer size if needed based on key_size. // By default, static allocated buffer is used. Once there is a key // larger than the static allocated buffer, another buffer is dynamically // allocated, until a larger key buffer is requested. In that case, we // reallocate buffer and delete the old one. void EnlargeBufferIfNeeded(size_t key_size) { // If size is smaller than buffer size, continue using current buffer, // or the static allocated one, as default if (key_size > buf_size_) { EnlargeBuffer(key_size); } } void EnlargeBuffer(size_t key_size); }; // Convert from a SliceTranform of user keys, to a SliceTransform of // user keys. class InternalKeySliceTransform : public SliceTransform { public: explicit InternalKeySliceTransform(const SliceTransform* transform) : transform_(transform) {} virtual const char* Name() const override { return transform_->Name(); } virtual Slice Transform(const Slice& src) const override { auto user_key = ExtractUserKey(src); return transform_->Transform(user_key); } virtual bool InDomain(const Slice& src) const override { auto user_key = ExtractUserKey(src); return transform_->InDomain(user_key); } virtual bool InRange(const Slice& dst) const override { auto user_key = ExtractUserKey(dst); return transform_->InRange(user_key); } const SliceTransform* user_prefix_extractor() const { return transform_; } private: // Like comparator, InternalKeySliceTransform will not take care of the // deletion of transform_ const SliceTransform* const transform_; }; // Read the key of a record from a write batch. // if this record represent the default column family then cf_record // must be passed as false, otherwise it must be passed as true. extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key, bool cf_record); // Read record from a write batch piece from input. // tag, column_family, key, value and blob are return values. Callers own the // Slice they point to. // Tag is defined as ValueType. // input will be advanced to after the record. extern Status ReadRecordFromWriteBatch(Slice* input, char* tag, uint32_t* column_family, Slice* key, Slice* value, Slice* blob, Slice* xid); // When user call DeleteRange() to delete a range of keys, // we will store a serialized RangeTombstone in MemTable and SST. // the struct here is a easy-understood form // start/end_key_ is the start/end user key of the range to be deleted struct RangeTombstone { Slice start_key_; Slice end_key_; SequenceNumber seq_; RangeTombstone() = default; RangeTombstone(Slice sk, Slice ek, SequenceNumber sn) : start_key_(sk), end_key_(ek), seq_(sn) {} RangeTombstone(ParsedInternalKey parsed_key, Slice value) { start_key_ = parsed_key.user_key; seq_ = parsed_key.sequence; end_key_ = value; } // be careful to use Serialize(), allocates new memory std::pair Serialize() const { auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion); Slice value = end_key_; return std::make_pair(std::move(key), std::move(value)); } // be careful to use SerializeKey(), allocates new memory InternalKey SerializeKey() const { return InternalKey(start_key_, seq_, kTypeRangeDeletion); } // The tombstone end-key is exclusive, so we generate an internal-key here // which has a similar property. Using kMaxSequenceNumber guarantees that // the returned internal-key will compare less than any other internal-key // with the same user-key. This in turn guarantees that the serialized // end-key for a tombstone such as [a-b] will compare less than the key "b". // // be careful to use SerializeEndKey(), allocates new memory InternalKey SerializeEndKey() const { return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion); } }; inline int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const { // Order by: // increasing user key (according to user-supplied comparator) // decreasing sequence number // decreasing type (though sequence# should be enough to disambiguate) int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); if (r == 0) { const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); if (anum > bnum) { r = -1; } else if (anum < bnum) { r = +1; } } return r; } inline int InternalKeyComparator::CompareKeySeq(const Slice& akey, const Slice& bkey) const { // Order by: // increasing user key (according to user-supplied comparator) // decreasing sequence number int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); if (r == 0) { // Shift the number to exclude the last byte which contains the value type const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8) >> 8; const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8) >> 8; if (anum > bnum) { r = -1; } else if (anum < bnum) { r = +1; } } return r; } // Wrap InternalKeyComparator as a comparator class for ParsedInternalKey. struct ParsedInternalKeyComparator { explicit ParsedInternalKeyComparator(const InternalKeyComparator* c) : cmp(c) {} bool operator()(const ParsedInternalKey& a, const ParsedInternalKey& b) const { return cmp->Compare(a, b) < 0; } const InternalKeyComparator* cmp; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/dbformat_test.cc000066400000000000000000000163321370372246700170030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/dbformat.h" #include "logging/logging.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { static std::string IKey(const std::string& user_key, uint64_t seq, ValueType vt) { std::string encoded; AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt)); return encoded; } static std::string Shorten(const std::string& s, const std::string& l) { std::string result = s; InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l); return result; } static std::string ShortSuccessor(const std::string& s) { std::string result = s; InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result); return result; } static void TestKey(const std::string& key, uint64_t seq, ValueType vt) { std::string encoded = IKey(key, seq, vt); Slice in(encoded); ParsedInternalKey decoded("", 0, kTypeValue); ASSERT_TRUE(ParseInternalKey(in, &decoded)); ASSERT_EQ(key, decoded.user_key.ToString()); ASSERT_EQ(seq, decoded.sequence); ASSERT_EQ(vt, decoded.type); ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded)); } class FormatTest : public testing::Test {}; TEST_F(FormatTest, InternalKey_EncodeDecode) { const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" }; const uint64_t seq[] = { 1, 2, 3, (1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1, (1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1, (1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1 }; for (unsigned int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) { for (unsigned int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) { TestKey(keys[k], seq[s], kTypeValue); TestKey("hello", 1, kTypeDeletion); } } } TEST_F(FormatTest, InternalKeyShortSeparator) { // When user keys are same ASSERT_EQ(IKey("foo", 100, kTypeValue), Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 99, kTypeValue))); ASSERT_EQ(IKey("foo", 100, kTypeValue), Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 101, kTypeValue))); ASSERT_EQ(IKey("foo", 100, kTypeValue), Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeValue))); ASSERT_EQ(IKey("foo", 100, kTypeValue), Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeDeletion))); // When user keys are misordered ASSERT_EQ(IKey("foo", 100, kTypeValue), Shorten(IKey("foo", 100, kTypeValue), IKey("bar", 99, kTypeValue))); // When user keys are different, but correctly ordered ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), Shorten(IKey("foo", 100, kTypeValue), IKey("hello", 200, kTypeValue))); ASSERT_EQ(IKey("ABC2", kMaxSequenceNumber, kValueTypeForSeek), Shorten(IKey("ABC1AAAAA", 100, kTypeValue), IKey("ABC2ABB", 200, kTypeValue))); ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek), Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA2AA", 200, kTypeValue))); ASSERT_EQ( IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek), Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA4", 200, kTypeValue))); ASSERT_EQ( IKey("AAA1B", kMaxSequenceNumber, kValueTypeForSeek), Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA2", 200, kTypeValue))); ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek), Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA2A", 200, kTypeValue))); ASSERT_EQ( IKey("AAA1", 100, kTypeValue), Shorten(IKey("AAA1", 100, kTypeValue), IKey("AAA2", 200, kTypeValue))); // When start user key is prefix of limit user key ASSERT_EQ(IKey("foo", 100, kTypeValue), Shorten(IKey("foo", 100, kTypeValue), IKey("foobar", 200, kTypeValue))); // When limit user key is prefix of start user key ASSERT_EQ(IKey("foobar", 100, kTypeValue), Shorten(IKey("foobar", 100, kTypeValue), IKey("foo", 200, kTypeValue))); } TEST_F(FormatTest, InternalKeyShortestSuccessor) { ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), ShortSuccessor(IKey("foo", 100, kTypeValue))); ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue), ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); } TEST_F(FormatTest, IterKeyOperation) { IterKey k; const char p[] = "abcdefghijklmnopqrstuvwxyz"; const char q[] = "0123456789"; ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("")); k.TrimAppend(0, p, 3); ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("abc")); k.TrimAppend(1, p, 3); ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("aabc")); k.TrimAppend(0, p, 26); ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("abcdefghijklmnopqrstuvwxyz")); k.TrimAppend(26, q, 10); ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("abcdefghijklmnopqrstuvwxyz0123456789")); k.TrimAppend(36, q, 1); ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("abcdefghijklmnopqrstuvwxyz01234567890")); k.TrimAppend(26, q, 1); ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("abcdefghijklmnopqrstuvwxyz0")); // Size going up, memory allocation is triggered k.TrimAppend(27, p, 26); ASSERT_EQ(std::string(k.GetUserKey().data(), k.GetUserKey().size()), std::string("abcdefghijklmnopqrstuvwxyz0" "abcdefghijklmnopqrstuvwxyz")); } TEST_F(FormatTest, UpdateInternalKey) { std::string user_key("abcdefghijklmnopqrstuvwxyz"); uint64_t new_seq = 0x123456; ValueType new_val_type = kTypeDeletion; std::string ikey; AppendInternalKey(&ikey, ParsedInternalKey(user_key, 100U, kTypeValue)); size_t ikey_size = ikey.size(); UpdateInternalKey(&ikey, new_seq, new_val_type); ASSERT_EQ(ikey_size, ikey.size()); Slice in(ikey); ParsedInternalKey decoded; ASSERT_TRUE(ParseInternalKey(in, &decoded)); ASSERT_EQ(user_key, decoded.user_key.ToString()); ASSERT_EQ(new_seq, decoded.sequence); ASSERT_EQ(new_val_type, decoded.type); } TEST_F(FormatTest, RangeTombstoneSerializeEndKey) { RangeTombstone t("a", "b", 2); InternalKey k("b", 3, kTypeValue); const InternalKeyComparator cmp(BytewiseComparator()); ASSERT_LT(cmp.Compare(t.SerializeEndKey(), k), 0); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/deletefile_test.cc000066400000000000000000000444021370372246700173060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/version_set.h" #include "db/write_batch_internal.h" #include "file/filename.h" #include "port/stack_trace.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/transaction_log.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class DeleteFileTest : public DBTestBase { public: const int numlevels_; const std::string wal_dir_; DeleteFileTest() : DBTestBase("/deletefile_test"), numlevels_(7), wal_dir_(dbname_ + "/wal_files") {} void SetOptions(Options* options) { assert(options); options->delete_obsolete_files_period_micros = 0; // always do full purge options->enable_thread_tracking = true; options->write_buffer_size = 1024 * 1024 * 1000; options->target_file_size_base = 1024 * 1024 * 1000; options->max_bytes_for_level_base = 1024 * 1024 * 1000; options->WAL_ttl_seconds = 300; // Used to test log files options->WAL_size_limit_MB = 1024; // Used to test log files options->wal_dir = wal_dir_; } void AddKeys(int numkeys, int startkey = 0) { WriteOptions options; options.sync = false; ReadOptions roptions; for (int i = startkey; i < (numkeys + startkey) ; i++) { std::string temp = ToString(i); Slice key(temp); Slice value(temp); ASSERT_OK(db_->Put(options, key, value)); } } int numKeysInLevels( std::vector &metadata, std::vector *keysperlevel = nullptr) { if (keysperlevel != nullptr) { keysperlevel->resize(numlevels_); } int numKeys = 0; for (size_t i = 0; i < metadata.size(); i++) { int startkey = atoi(metadata[i].smallestkey.c_str()); int endkey = atoi(metadata[i].largestkey.c_str()); int numkeysinfile = (endkey - startkey + 1); numKeys += numkeysinfile; if (keysperlevel != nullptr) { (*keysperlevel)[(int)metadata[i].level] += numkeysinfile; } fprintf(stderr, "level %d name %s smallest %s largest %s\n", metadata[i].level, metadata[i].name.c_str(), metadata[i].smallestkey.c_str(), metadata[i].largestkey.c_str()); } return numKeys; } void CreateTwoLevels() { AddKeys(50000, 10000); ASSERT_OK(dbfull()->TEST_FlushMemTable()); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); for (int i = 0; i < 2; ++i) { ASSERT_OK(dbfull()->TEST_CompactRange(i, nullptr, nullptr)); } AddKeys(50000, 10000); ASSERT_OK(dbfull()->TEST_FlushMemTable()); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); } void CheckFileTypeCounts(const std::string& dir, int required_log, int required_sst, int required_manifest) { std::vector filenames; env_->GetChildren(dir, &filenames); int log_cnt = 0, sst_cnt = 0, manifest_cnt = 0; for (auto file : filenames) { uint64_t number; FileType type; if (ParseFileName(file, &number, &type)) { log_cnt += (type == kLogFile); sst_cnt += (type == kTableFile); manifest_cnt += (type == kDescriptorFile); } } ASSERT_EQ(required_log, log_cnt); ASSERT_EQ(required_sst, sst_cnt); ASSERT_EQ(required_manifest, manifest_cnt); } static void DoSleep(void* arg) { auto test = reinterpret_cast(arg); test->env_->SleepForMicroseconds(2 * 1000 * 1000); } // An empty job to guard all jobs are processed static void GuardFinish(void* /*arg*/) { TEST_SYNC_POINT("DeleteFileTest::GuardFinish"); } }; TEST_F(DeleteFileTest, AddKeysAndQueryLevels) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); CreateTwoLevels(); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); std::string level1file = ""; int level1keycount = 0; std::string level2file = ""; int level2keycount = 0; int level1index = 0; int level2index = 1; ASSERT_EQ((int)metadata.size(), 2); if (metadata[0].level == 2) { level1index = 1; level2index = 0; } level1file = metadata[level1index].name; int startkey = atoi(metadata[level1index].smallestkey.c_str()); int endkey = atoi(metadata[level1index].largestkey.c_str()); level1keycount = (endkey - startkey + 1); level2file = metadata[level2index].name; startkey = atoi(metadata[level2index].smallestkey.c_str()); endkey = atoi(metadata[level2index].largestkey.c_str()); level2keycount = (endkey - startkey + 1); // COntrolled setup. Levels 1 and 2 should both have 50K files. // This is a little fragile as it depends on the current // compaction heuristics. ASSERT_EQ(level1keycount, 50000); ASSERT_EQ(level2keycount, 50000); Status status = db_->DeleteFile("0.sst"); ASSERT_TRUE(status.IsInvalidArgument()); // intermediate level files cannot be deleted. status = db_->DeleteFile(level1file); ASSERT_TRUE(status.IsInvalidArgument()); // Lowest level file deletion should succeed. ASSERT_OK(db_->DeleteFile(level2file)); } TEST_F(DeleteFileTest, PurgeObsoleteFilesTest) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); CreateTwoLevels(); // there should be only one (empty) log file because CreateTwoLevels() // flushes the memtables to disk CheckFileTypeCounts(wal_dir_, 1, 0, 0); // 2 ssts, 1 manifest CheckFileTypeCounts(dbname_, 0, 2, 1); std::string first("0"), last("999999"); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; Slice first_slice(first), last_slice(last); db_->CompactRange(compact_options, &first_slice, &last_slice); // 1 sst after compaction CheckFileTypeCounts(dbname_, 0, 1, 1); // this time, we keep an iterator alive Reopen(options); Iterator *itr = nullptr; CreateTwoLevels(); itr = db_->NewIterator(ReadOptions()); db_->CompactRange(compact_options, &first_slice, &last_slice); // 3 sst after compaction with live iterator CheckFileTypeCounts(dbname_, 0, 3, 1); delete itr; // 1 sst after iterator deletion CheckFileTypeCounts(dbname_, 0, 1, 1); } TEST_F(DeleteFileTest, BackgroundPurgeIteratorTest) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); std::string first("0"), last("999999"); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; Slice first_slice(first), last_slice(last); // We keep an iterator alive Iterator* itr = nullptr; CreateTwoLevels(); ReadOptions read_options; read_options.background_purge_on_iterator_cleanup = true; itr = db_->NewIterator(read_options); db_->CompactRange(compact_options, &first_slice, &last_slice); // 3 sst after compaction with live iterator CheckFileTypeCounts(dbname_, 0, 3, 1); test::SleepingBackgroundTask sleeping_task_before; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_before, Env::Priority::HIGH); delete itr; test::SleepingBackgroundTask sleeping_task_after; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_after, Env::Priority::HIGH); // Make sure no purges are executed foreground CheckFileTypeCounts(dbname_, 0, 3, 1); sleeping_task_before.WakeUp(); sleeping_task_before.WaitUntilDone(); // Make sure all background purges are executed sleeping_task_after.WakeUp(); sleeping_task_after.WaitUntilDone(); // 1 sst after iterator deletion CheckFileTypeCounts(dbname_, 0, 1, 1); } TEST_F(DeleteFileTest, BackgroundPurgeCFDropTest) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); auto do_test = [&](bool bg_purge) { ColumnFamilyOptions co; co.max_write_buffer_size_to_maintain = static_cast(co.write_buffer_size); WriteOptions wo; FlushOptions fo; ColumnFamilyHandle* cfh = nullptr; ASSERT_OK(db_->CreateColumnFamily(co, "dropme", &cfh)); ASSERT_OK(db_->Put(wo, cfh, "pika", "chu")); ASSERT_OK(db_->Flush(fo, cfh)); // Expect 1 sst file. CheckFileTypeCounts(dbname_, 0, 1, 1); ASSERT_OK(db_->DropColumnFamily(cfh)); // Still 1 file, it won't be deleted while ColumnFamilyHandle is alive. CheckFileTypeCounts(dbname_, 0, 1, 1); delete cfh; test::SleepingBackgroundTask sleeping_task_after; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_after, Env::Priority::HIGH); // If background purge is enabled, the file should still be there. CheckFileTypeCounts(dbname_, 0, bg_purge ? 1 : 0, 1); TEST_SYNC_POINT("DeleteFileTest::BackgroundPurgeCFDropTest:1"); // Execute background purges. sleeping_task_after.WakeUp(); sleeping_task_after.WaitUntilDone(); // The file should have been deleted. CheckFileTypeCounts(dbname_, 0, 0, 1); }; { SCOPED_TRACE("avoid_unnecessary_blocking_io = false"); do_test(false); } SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency( {{"DeleteFileTest::BackgroundPurgeCFDropTest:1", "DBImpl::BGWorkPurge:start"}}); SyncPoint::GetInstance()->EnableProcessing(); options.avoid_unnecessary_blocking_io = true; options.create_if_missing = false; Reopen(options); { SCOPED_TRACE("avoid_unnecessary_blocking_io = true"); do_test(true); } } // This test is to reproduce a bug that read invalid ReadOption in iterator // cleanup function TEST_F(DeleteFileTest, BackgroundPurgeCopyOptions) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); std::string first("0"), last("999999"); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; Slice first_slice(first), last_slice(last); // We keep an iterator alive Iterator* itr = nullptr; CreateTwoLevels(); { ReadOptions read_options; read_options.background_purge_on_iterator_cleanup = true; itr = db_->NewIterator(read_options); // ReadOptions is deleted, but iterator cleanup function should not be // affected } db_->CompactRange(compact_options, &first_slice, &last_slice); // 3 sst after compaction with live iterator CheckFileTypeCounts(dbname_, 0, 3, 1); delete itr; test::SleepingBackgroundTask sleeping_task_after; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_after, Env::Priority::HIGH); // Make sure all background purges are executed sleeping_task_after.WakeUp(); sleeping_task_after.WaitUntilDone(); // 1 sst after iterator deletion CheckFileTypeCounts(dbname_, 0, 1, 1); } TEST_F(DeleteFileTest, BackgroundPurgeTestMultipleJobs) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); std::string first("0"), last("999999"); CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = 2; Slice first_slice(first), last_slice(last); // We keep an iterator alive CreateTwoLevels(); ReadOptions read_options; read_options.background_purge_on_iterator_cleanup = true; Iterator* itr1 = db_->NewIterator(read_options); CreateTwoLevels(); Iterator* itr2 = db_->NewIterator(read_options); db_->CompactRange(compact_options, &first_slice, &last_slice); // 5 sst files after 2 compactions with 2 live iterators CheckFileTypeCounts(dbname_, 0, 5, 1); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); // ~DBImpl should wait until all BGWorkPurge are finished ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::~DBImpl:WaitJob", "DBImpl::BGWorkPurge"}, {"DeleteFileTest::GuardFinish", "DeleteFileTest::BackgroundPurgeTestMultipleJobs:DBClose"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); delete itr1; env_->Schedule(&DeleteFileTest::DoSleep, this, Env::Priority::HIGH); delete itr2; env_->Schedule(&DeleteFileTest::GuardFinish, nullptr, Env::Priority::HIGH); Close(); TEST_SYNC_POINT("DeleteFileTest::BackgroundPurgeTestMultipleJobs:DBClose"); // 1 sst after iterator deletion CheckFileTypeCounts(dbname_, 0, 1, 1); } TEST_F(DeleteFileTest, DeleteFileWithIterator) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); CreateTwoLevels(); ReadOptions read_options; Iterator* it = db_->NewIterator(read_options); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); std::string level2file; ASSERT_EQ(metadata.size(), static_cast(2)); if (metadata[0].level == 1) { level2file = metadata[1].name; } else { level2file = metadata[0].name; } Status status = db_->DeleteFile(level2file); fprintf(stdout, "Deletion status %s: %s\n", level2file.c_str(), status.ToString().c_str()); ASSERT_TRUE(status.ok()); it->SeekToFirst(); int numKeysIterated = 0; while(it->Valid()) { numKeysIterated++; it->Next(); } ASSERT_EQ(numKeysIterated, 50000); delete it; } TEST_F(DeleteFileTest, DeleteLogFiles) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); AddKeys(10, 0); VectorLogPtr logfiles; db_->GetSortedWalFiles(logfiles); ASSERT_GT(logfiles.size(), 0UL); // Take the last log file which is expected to be alive and try to delete it // Should not succeed because live logs are not allowed to be deleted std::unique_ptr alive_log = std::move(logfiles.back()); ASSERT_EQ(alive_log->Type(), kAliveLogFile); ASSERT_OK(env_->FileExists(wal_dir_ + "/" + alive_log->PathName())); fprintf(stdout, "Deleting alive log file %s\n", alive_log->PathName().c_str()); ASSERT_TRUE(!db_->DeleteFile(alive_log->PathName()).ok()); ASSERT_OK(env_->FileExists(wal_dir_ + "/" + alive_log->PathName())); logfiles.clear(); // Call Flush to bring about a new working log file and add more keys // Call Flush again to flush out memtable and move alive log to archived log // and try to delete the archived log file FlushOptions fopts; db_->Flush(fopts); AddKeys(10, 0); db_->Flush(fopts); db_->GetSortedWalFiles(logfiles); ASSERT_GT(logfiles.size(), 0UL); std::unique_ptr archived_log = std::move(logfiles.front()); ASSERT_EQ(archived_log->Type(), kArchivedLogFile); ASSERT_OK(env_->FileExists(wal_dir_ + "/" + archived_log->PathName())); fprintf(stdout, "Deleting archived log file %s\n", archived_log->PathName().c_str()); ASSERT_OK(db_->DeleteFile(archived_log->PathName())); ASSERT_EQ(Status::NotFound(), env_->FileExists(wal_dir_ + "/" + archived_log->PathName())); } TEST_F(DeleteFileTest, DeleteNonDefaultColumnFamily) { Options options = CurrentOptions(); SetOptions(&options); Destroy(options); options.create_if_missing = true; Reopen(options); CreateAndReopenWithCF({"new_cf"}, options); Random rnd(5); for (int i = 0; i < 1000; ++i) { ASSERT_OK(db_->Put(WriteOptions(), handles_[1], test::RandomKey(&rnd, 10), test::RandomKey(&rnd, 10))); } ASSERT_OK(db_->Flush(FlushOptions(), handles_[1])); for (int i = 0; i < 1000; ++i) { ASSERT_OK(db_->Put(WriteOptions(), handles_[1], test::RandomKey(&rnd, 10), test::RandomKey(&rnd, 10))); } ASSERT_OK(db_->Flush(FlushOptions(), handles_[1])); std::vector metadata; db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(2U, metadata.size()); ASSERT_EQ("new_cf", metadata[0].column_family_name); ASSERT_EQ("new_cf", metadata[1].column_family_name); auto old_file = metadata[0].smallest_seqno < metadata[1].smallest_seqno ? metadata[0].name : metadata[1].name; auto new_file = metadata[0].smallest_seqno > metadata[1].smallest_seqno ? metadata[0].name : metadata[1].name; ASSERT_TRUE(db_->DeleteFile(new_file).IsInvalidArgument()); ASSERT_OK(db_->DeleteFile(old_file)); { std::unique_ptr itr(db_->NewIterator(ReadOptions(), handles_[1])); int count = 0; for (itr->SeekToFirst(); itr->Valid(); itr->Next()) { ASSERT_OK(itr->status()); ++count; } ASSERT_EQ(count, 1000); } Close(); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "new_cf"}, options); { std::unique_ptr itr(db_->NewIterator(ReadOptions(), handles_[1])); int count = 0; for (itr->SeekToFirst(); itr->Valid(); itr->Next()) { ASSERT_OK(itr->status()); ++count; } ASSERT_EQ(count, 1000); } } } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as DBImpl::DeleteFile is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/error_handler.cc000066400000000000000000000360621370372246700167760ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "db/error_handler.h" #include "db/db_impl/db_impl.h" #include "db/event_helpers.h" #include "file/sst_file_manager_impl.h" namespace ROCKSDB_NAMESPACE { // Maps to help decide the severity of an error based on the // BackgroundErrorReason, Code, SubCode and whether db_options.paranoid_checks // is set or not. There are 3 maps, going from most specific to least specific // (i.e from all 4 fields in a tuple to only the BackgroundErrorReason and // paranoid_checks). The less specific map serves as a catch all in case we miss // a specific error code or subcode. std::map, Status::Severity> ErrorSeverityMap = { // Errors during BG compaction {std::make_tuple(BackgroundErrorReason::kCompaction, Status::Code::kIOError, Status::SubCode::kNoSpace, true), Status::Severity::kSoftError}, {std::make_tuple(BackgroundErrorReason::kCompaction, Status::Code::kIOError, Status::SubCode::kNoSpace, false), Status::Severity::kNoError}, {std::make_tuple(BackgroundErrorReason::kCompaction, Status::Code::kIOError, Status::SubCode::kSpaceLimit, true), Status::Severity::kHardError}, // Errors during BG flush {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, Status::SubCode::kNoSpace, true), Status::Severity::kHardError}, {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, Status::SubCode::kNoSpace, false), Status::Severity::kNoError}, {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, Status::SubCode::kSpaceLimit, true), Status::Severity::kHardError}, // Errors during Write {std::make_tuple(BackgroundErrorReason::kWriteCallback, Status::Code::kIOError, Status::SubCode::kNoSpace, true), Status::Severity::kHardError}, {std::make_tuple(BackgroundErrorReason::kWriteCallback, Status::Code::kIOError, Status::SubCode::kNoSpace, false), Status::Severity::kHardError}, // Errors during MANIFEST write {std::make_tuple(BackgroundErrorReason::kManifestWrite, Status::Code::kIOError, Status::SubCode::kNoSpace, true), Status::Severity::kHardError}, {std::make_tuple(BackgroundErrorReason::kManifestWrite, Status::Code::kIOError, Status::SubCode::kNoSpace, false), Status::Severity::kHardError}, }; std::map, Status::Severity> DefaultErrorSeverityMap = { // Errors during BG compaction {std::make_tuple(BackgroundErrorReason::kCompaction, Status::Code::kCorruption, true), Status::Severity::kUnrecoverableError}, {std::make_tuple(BackgroundErrorReason::kCompaction, Status::Code::kCorruption, false), Status::Severity::kNoError}, {std::make_tuple(BackgroundErrorReason::kCompaction, Status::Code::kIOError, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kCompaction, Status::Code::kIOError, false), Status::Severity::kNoError}, // Errors during BG flush {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kCorruption, true), Status::Severity::kUnrecoverableError}, {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kCorruption, false), Status::Severity::kNoError}, {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError, false), Status::Severity::kNoError}, // Errors during Write {std::make_tuple(BackgroundErrorReason::kWriteCallback, Status::Code::kCorruption, true), Status::Severity::kUnrecoverableError}, {std::make_tuple(BackgroundErrorReason::kWriteCallback, Status::Code::kCorruption, false), Status::Severity::kNoError}, {std::make_tuple(BackgroundErrorReason::kWriteCallback, Status::Code::kIOError, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kWriteCallback, Status::Code::kIOError, false), Status::Severity::kNoError}, {std::make_tuple(BackgroundErrorReason::kManifestWrite, Status::Code::kIOError, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kManifestWrite, Status::Code::kIOError, false), Status::Severity::kFatalError}, }; std::map, Status::Severity> DefaultReasonMap = { // Errors during BG compaction {std::make_tuple(BackgroundErrorReason::kCompaction, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kCompaction, false), Status::Severity::kNoError}, // Errors during BG flush {std::make_tuple(BackgroundErrorReason::kFlush, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kFlush, false), Status::Severity::kNoError}, // Errors during Write {std::make_tuple(BackgroundErrorReason::kWriteCallback, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kWriteCallback, false), Status::Severity::kFatalError}, // Errors during Memtable update {std::make_tuple(BackgroundErrorReason::kMemTable, true), Status::Severity::kFatalError}, {std::make_tuple(BackgroundErrorReason::kMemTable, false), Status::Severity::kFatalError}, }; void ErrorHandler::CancelErrorRecovery() { #ifndef ROCKSDB_LITE db_mutex_->AssertHeld(); // We'll release the lock before calling sfm, so make sure no new // recovery gets scheduled at that point auto_recovery_ = false; SstFileManagerImpl* sfm = reinterpret_cast( db_options_.sst_file_manager.get()); if (sfm) { // This may or may not cancel a pending recovery db_mutex_->Unlock(); bool cancelled = sfm->CancelErrorRecovery(this); db_mutex_->Lock(); if (cancelled) { recovery_in_prog_ = false; } } #endif } // This is the main function for looking at an error during a background // operation and deciding the severity, and error recovery strategy. The high // level algorithm is as follows - // 1. Classify the severity of the error based on the ErrorSeverityMap, // DefaultErrorSeverityMap and DefaultReasonMap defined earlier // 2. Call a Status code specific override function to adjust the severity // if needed. The reason for this is our ability to recover may depend on // the exact options enabled in DBOptions // 3. Determine if auto recovery is possible. A listener notification callback // is called, which can disable the auto recovery even if we decide its // feasible // 4. For Status::NoSpace() errors, rely on SstFileManagerImpl to control // the actual recovery. If no sst file manager is specified in DBOptions, // a default one is allocated during DB::Open(), so there will always be // one. // This can also get called as part of a recovery operation. In that case, we // also track the error separately in recovery_error_ so we can tell in the // end whether recovery succeeded or not Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reason) { db_mutex_->AssertHeld(); if (bg_err.ok()) { return Status::OK(); } bool paranoid = db_options_.paranoid_checks; Status::Severity sev = Status::Severity::kFatalError; Status new_bg_err; bool found = false; { auto entry = ErrorSeverityMap.find(std::make_tuple(reason, bg_err.code(), bg_err.subcode(), paranoid)); if (entry != ErrorSeverityMap.end()) { sev = entry->second; found = true; } } if (!found) { auto entry = DefaultErrorSeverityMap.find(std::make_tuple(reason, bg_err.code(), paranoid)); if (entry != DefaultErrorSeverityMap.end()) { sev = entry->second; found = true; } } if (!found) { auto entry = DefaultReasonMap.find(std::make_tuple(reason, paranoid)); if (entry != DefaultReasonMap.end()) { sev = entry->second; } } new_bg_err = Status(bg_err, sev); // Check if recovery is currently in progress. If it is, we will save this // error so we can check it at the end to see if recovery succeeded or not if (recovery_in_prog_ && recovery_error_.ok()) { recovery_error_ = new_bg_err; } bool auto_recovery = auto_recovery_; if (new_bg_err.severity() >= Status::Severity::kFatalError && auto_recovery) { auto_recovery = false; } // Allow some error specific overrides if (new_bg_err == Status::NoSpace()) { new_bg_err = OverrideNoSpaceError(new_bg_err, &auto_recovery); } if (!new_bg_err.ok()) { Status s = new_bg_err; EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s, db_mutex_, &auto_recovery); if (!s.ok() && (s.severity() > bg_error_.severity())) { bg_error_ = s; } else { // This error is less severe than previously encountered error. Don't // take any further action return bg_error_; } } if (auto_recovery) { recovery_in_prog_ = true; // Kick-off error specific recovery if (bg_error_ == Status::NoSpace()) { RecoverFromNoSpace(); } } return bg_error_; } Status ErrorHandler::SetBGError(const IOStatus& bg_io_err, BackgroundErrorReason reason) { db_mutex_->AssertHeld(); if (bg_io_err.ok()) { return Status::OK(); } if (recovery_in_prog_ && recovery_error_.ok()) { recovery_error_ = bg_io_err; } if (BackgroundErrorReason::kManifestWrite == reason) { // Always returns ok db_->DisableFileDeletionsWithLock(); } Status new_bg_io_err = bg_io_err; Status s; if (bg_io_err.GetDataLoss()) { // FIrst, data loss is treated as unrecoverable error. So it can directly // overwrite any existing bg_error_. bool auto_recovery = false; Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError); bg_error_ = bg_err; EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s, db_mutex_, &auto_recovery); return bg_error_; } else if (bg_io_err.GetRetryable()) { // Second, check if the error is a retryable IO error or not. if it is // retryable error and its severity is higher than bg_error_, overwrite // the bg_error_ with new error. // In current stage, treat retryable error as HardError. No automatic // recovery. bool auto_recovery = false; Status bg_err(new_bg_io_err, Status::Severity::kHardError); EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s, db_mutex_, &auto_recovery); if (bg_err.severity() > bg_error_.severity()) { bg_error_ = bg_err; } return bg_error_; } else { s = SetBGError(new_bg_io_err, reason); } return s; } Status ErrorHandler::OverrideNoSpaceError(Status bg_error, bool* auto_recovery) { #ifndef ROCKSDB_LITE if (bg_error.severity() >= Status::Severity::kFatalError) { return bg_error; } if (db_options_.sst_file_manager.get() == nullptr) { // We rely on SFM to poll for enough disk space and recover *auto_recovery = false; return bg_error; } if (db_options_.allow_2pc && (bg_error.severity() <= Status::Severity::kSoftError)) { // Don't know how to recover, as the contents of the current WAL file may // be inconsistent, and it may be needed for 2PC. If 2PC is not enabled, // we can just flush the memtable and discard the log *auto_recovery = false; return Status(bg_error, Status::Severity::kFatalError); } { uint64_t free_space; if (db_options_.env->GetFreeSpace(db_options_.db_paths[0].path, &free_space) == Status::NotSupported()) { *auto_recovery = false; } } return bg_error; #else (void)auto_recovery; return Status(bg_error, Status::Severity::kFatalError); #endif } void ErrorHandler::RecoverFromNoSpace() { #ifndef ROCKSDB_LITE SstFileManagerImpl* sfm = reinterpret_cast(db_options_.sst_file_manager.get()); // Inform SFM of the error, so it can kick-off the recovery if (sfm) { sfm->StartErrorRecovery(this, bg_error_); } #endif } Status ErrorHandler::ClearBGError() { #ifndef ROCKSDB_LITE db_mutex_->AssertHeld(); // Signal that recovery succeeded if (recovery_error_.ok()) { Status old_bg_error = bg_error_; bg_error_ = Status::OK(); recovery_in_prog_ = false; EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners, old_bg_error, db_mutex_); } return recovery_error_; #else return bg_error_; #endif } Status ErrorHandler::RecoverFromBGError(bool is_manual) { #ifndef ROCKSDB_LITE InstrumentedMutexLock l(db_mutex_); if (is_manual) { // If its a manual recovery and there's a background recovery in progress // return busy status if (recovery_in_prog_) { return Status::Busy(); } recovery_in_prog_ = true; } if (bg_error_.severity() == Status::Severity::kSoftError) { // Simply clear the background error and return recovery_error_ = Status::OK(); return ClearBGError(); } // Reset recovery_error_. We will use this to record any errors that happen // during the recovery process. While recovering, the only operations that // can generate background errors should be the flush operations recovery_error_ = Status::OK(); Status s = db_->ResumeImpl(); // For manual recover, shutdown, and fatal error cases, set // recovery_in_prog_ to false. For automatic background recovery, leave it // as is regardless of success or failure as it will be retried if (is_manual || s.IsShutdownInProgress() || bg_error_.severity() >= Status::Severity::kFatalError) { recovery_in_prog_ = false; } return s; #else (void)is_manual; return bg_error_; #endif } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/error_handler.h000066400000000000000000000045311370372246700166340ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "monitoring/instrumented_mutex.h" #include "options/db_options.h" #include "rocksdb/io_status.h" #include "rocksdb/listener.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class DBImpl; class ErrorHandler { public: ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options, InstrumentedMutex* db_mutex) : db_(db), db_options_(db_options), bg_error_(Status::OK()), recovery_error_(Status::OK()), db_mutex_(db_mutex), auto_recovery_(false), recovery_in_prog_(false) {} ~ErrorHandler() {} void EnableAutoRecovery() { auto_recovery_ = true; } Status::Severity GetErrorSeverity(BackgroundErrorReason reason, Status::Code code, Status::SubCode subcode); Status SetBGError(const Status& bg_err, BackgroundErrorReason reason); Status SetBGError(const IOStatus& bg_io_err, BackgroundErrorReason reason); Status GetBGError() { return bg_error_; } Status GetRecoveryError() { return recovery_error_; } Status ClearBGError(); bool IsDBStopped() { return !bg_error_.ok() && bg_error_.severity() >= Status::Severity::kHardError; } bool IsBGWorkStopped() { return !bg_error_.ok() && (bg_error_.severity() >= Status::Severity::kHardError || !auto_recovery_); } bool IsRecoveryInProgress() { return recovery_in_prog_; } Status RecoverFromBGError(bool is_manual = false); void CancelErrorRecovery(); private: DBImpl* db_; const ImmutableDBOptions& db_options_; Status bg_error_; // A separate Status variable used to record any errors during the // recovery process from hard errors Status recovery_error_; InstrumentedMutex* db_mutex_; // A flag indicating whether automatic recovery from errors is enabled bool auto_recovery_; bool recovery_in_prog_; Status OverrideNoSpaceError(Status bg_error, bool* auto_recovery); void RecoverFromNoSpace(); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/error_handler_fs_test.cc000066400000000000000000001155511370372246700205260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/io_status.h" #include "rocksdb/perf_context.h" #include "rocksdb/sst_file_manager.h" #include "test_util/fault_injection_test_env.h" #include "test_util/fault_injection_test_fs.h" #if !defined(ROCKSDB_LITE) #include "test_util/sync_point.h" #endif namespace ROCKSDB_NAMESPACE { class DBErrorHandlingFSTest : public DBTestBase { public: DBErrorHandlingFSTest() : DBTestBase("/db_error_handling_fs_test") {} std::string GetManifestNameFromLiveFiles() { std::vector live_files; uint64_t manifest_size; dbfull()->GetLiveFiles(live_files, &manifest_size, false); for (auto& file : live_files) { uint64_t num = 0; FileType type; if (ParseFileName(file, &num, &type) && type == kDescriptorFile) { return file; } } return ""; } }; class DBErrorHandlingFS : public FileSystemWrapper { public: DBErrorHandlingFS() : FileSystemWrapper(FileSystem::Default()), trig_no_space(false), trig_io_error(false) {} void SetTrigNoSpace() { trig_no_space = true; } void SetTrigIoError() { trig_io_error = true; } private: bool trig_no_space; bool trig_io_error; }; class ErrorHandlerFSListener : public EventListener { public: ErrorHandlerFSListener() : mutex_(), cv_(&mutex_), no_auto_recovery_(false), recovery_complete_(false), file_creation_started_(false), override_bg_error_(false), file_count_(0), fault_fs_(nullptr) {} void OnTableFileCreationStarted( const TableFileCreationBriefInfo& /*ti*/) override { InstrumentedMutexLock l(&mutex_); file_creation_started_ = true; if (file_count_ > 0) { if (--file_count_ == 0) { fault_fs_->SetFilesystemActive(false, file_creation_error_); file_creation_error_ = IOStatus::OK(); } } cv_.SignalAll(); } void OnErrorRecoveryBegin(BackgroundErrorReason /*reason*/, Status /*bg_error*/, bool* auto_recovery) override { if (*auto_recovery && no_auto_recovery_) { *auto_recovery = false; } } void OnErrorRecoveryCompleted(Status /*old_bg_error*/) override { InstrumentedMutexLock l(&mutex_); recovery_complete_ = true; cv_.SignalAll(); } bool WaitForRecovery(uint64_t /*abs_time_us*/) { InstrumentedMutexLock l(&mutex_); while (!recovery_complete_) { cv_.Wait(/*abs_time_us*/); } if (recovery_complete_) { recovery_complete_ = false; return true; } return false; } void WaitForTableFileCreationStarted(uint64_t /*abs_time_us*/) { InstrumentedMutexLock l(&mutex_); while (!file_creation_started_) { cv_.Wait(/*abs_time_us*/); } file_creation_started_ = false; } void OnBackgroundError(BackgroundErrorReason /*reason*/, Status* bg_error) override { if (override_bg_error_) { *bg_error = bg_error_; override_bg_error_ = false; } } void EnableAutoRecovery(bool enable = true) { no_auto_recovery_ = !enable; } void OverrideBGError(Status bg_err) { bg_error_ = bg_err; override_bg_error_ = true; } void InjectFileCreationError(FaultInjectionTestFS* fs, int file_count, IOStatus io_s) { fault_fs_ = fs; file_count_ = file_count; file_creation_error_ = io_s; } private: InstrumentedMutex mutex_; InstrumentedCondVar cv_; bool no_auto_recovery_; bool recovery_complete_; bool file_creation_started_; bool override_bg_error_; int file_count_; IOStatus file_creation_error_; Status bg_error_; FaultInjectionTestFS* fault_fs_; }; TEST_F(DBErrorHandlingFSTest, FLushWriteError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.listeners.emplace_back(listener); Status s; listener->EnableAutoRecovery(false); DestroyAndReopen(options); Put(Key(0), "val"); SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); Destroy(options); } TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.listeners.emplace_back(listener); Status s; listener->EnableAutoRecovery(false); DestroyAndReopen(options); IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); error_msg.SetRetryable(true); Put(Key(1), "val1"); SyncPoint::GetInstance()->SetCallBack( "BuildTable:BeforeFinishBuildTable", [&](void*) { fault_fs->SetFilesystemActive(false, error_msg); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); Reopen(options); ASSERT_EQ("val1", Get(Key(1))); Put(Key(2), "val2"); SyncPoint::GetInstance()->SetCallBack( "BuildTable:BeforeSyncTable", [&](void*) { fault_fs->SetFilesystemActive(false, error_msg); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); Reopen(options); ASSERT_EQ("val2", Get(Key(2))); Put(Key(3), "val3"); SyncPoint::GetInstance()->SetCallBack( "BuildTable:BeforeCloseTableFile", [&](void*) { fault_fs->SetFilesystemActive(false, error_msg); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); Reopen(options); ASSERT_EQ("val3", Get(Key(3))); Destroy(options); } TEST_F(DBErrorHandlingFSTest, ManifestWriteError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.listeners.emplace_back(listener); Status s; std::string old_manifest; std::string new_manifest; listener->EnableAutoRecovery(false); DestroyAndReopen(options); old_manifest = GetManifestNameFromLiveFiles(); Put(Key(0), "val"); Flush(); Put(Key(1), "val"); SyncPoint::GetInstance()->SetCallBack( "VersionSet::LogAndApply:WriteManifest", [&](void*) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); Reopen(options); ASSERT_EQ("val", Get(Key(0))); ASSERT_EQ("val", Get(Key(1))); Close(); } TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.listeners.emplace_back(listener); Status s; std::string old_manifest; std::string new_manifest; listener->EnableAutoRecovery(false); DestroyAndReopen(options); old_manifest = GetManifestNameFromLiveFiles(); IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); error_msg.SetRetryable(true); Put(Key(0), "val"); Flush(); Put(Key(1), "val"); SyncPoint::GetInstance()->SetCallBack( "VersionSet::LogAndApply:WriteManifest", [&](void*) { fault_fs->SetFilesystemActive(false, error_msg); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); Reopen(options); ASSERT_EQ("val", Get(Key(0))); ASSERT_EQ("val", Get(Key(1))); Close(); } TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.listeners.emplace_back(listener); Status s; std::string old_manifest; std::string new_manifest; listener->EnableAutoRecovery(false); DestroyAndReopen(options); old_manifest = GetManifestNameFromLiveFiles(); Put(Key(0), "val"); Flush(); Put(Key(1), "val"); SyncPoint::GetInstance()->SetCallBack( "VersionSet::LogAndApply:WriteManifest", [&](void*) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); fault_fs->SetFilesystemActive(true); // This Resume() will attempt to create a new manifest file and fail again s = dbfull()->Resume(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); fault_fs->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); // A successful Resume() will create a new manifest file s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); Reopen(options); ASSERT_EQ("val", Get(Key(0))); ASSERT_EQ("val", Get(Key(1))); Close(); } TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; options.listeners.emplace_back(listener); Status s; std::string old_manifest; std::string new_manifest; std::atomic fail_manifest(false); DestroyAndReopen(options); old_manifest = GetManifestNameFromLiveFiles(); Put(Key(0), "val"); Put(Key(2), "val"); s = Flush(); ASSERT_EQ(s, Status::OK()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( // Wait for flush of 2nd L0 file before starting compaction {{"DBImpl::FlushMemTable:FlushMemTableFinished", "BackgroundCallCompaction:0"}, // Wait for compaction to detect manifest write error {"BackgroundCallCompaction:1", "CompactionManifestWriteError:0"}, // Make compaction thread wait for error to be cleared {"CompactionManifestWriteError:1", "DBImpl::BackgroundCallCompaction:FoundObsoleteFiles"}, // Wait for DB instance to clear bg_error before calling // TEST_WaitForCompact {"SstFileManagerImpl::ErrorCleared", "CompactionManifestWriteError:2"}}); // trigger manifest write failure in compaction thread ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BackgroundCallCompaction:0", [&](void*) { fail_manifest.store(true); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionSet::LogAndApply:WriteManifest", [&](void*) { if (fail_manifest.load()) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put(Key(1), "val"); // This Flush will trigger a compaction, which will fail when appending to // the manifest s = Flush(); ASSERT_EQ(s, Status::OK()); TEST_SYNC_POINT("CompactionManifestWriteError:0"); // Clear all errors so when the compaction is retried, it will succeed fault_fs->SetFilesystemActive(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); TEST_SYNC_POINT("CompactionManifestWriteError:1"); TEST_SYNC_POINT("CompactionManifestWriteError:2"); s = dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(s, Status::OK()); new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); Reopen(options); ASSERT_EQ("val", Get(Key(0))); ASSERT_EQ("val", Get(Key(1))); ASSERT_EQ("val", Get(Key(2))); Close(); } TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; options.listeners.emplace_back(listener); Status s; std::string old_manifest; std::string new_manifest; std::atomic fail_manifest(false); DestroyAndReopen(options); old_manifest = GetManifestNameFromLiveFiles(); IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); error_msg.SetRetryable(true); Put(Key(0), "val"); Put(Key(2), "val"); s = Flush(); ASSERT_EQ(s, Status::OK()); listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); listener->EnableAutoRecovery(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( // Wait for flush of 2nd L0 file before starting compaction {{"DBImpl::FlushMemTable:FlushMemTableFinished", "BackgroundCallCompaction:0"}, // Wait for compaction to detect manifest write error {"BackgroundCallCompaction:1", "CompactionManifestWriteError:0"}, // Make compaction thread wait for error to be cleared {"CompactionManifestWriteError:1", "DBImpl::BackgroundCallCompaction:FoundObsoleteFiles"}}); // trigger manifest write failure in compaction thread ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BackgroundCallCompaction:0", [&](void*) { fail_manifest.store(true); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionSet::LogAndApply:WriteManifest", [&](void*) { if (fail_manifest.load()) { fault_fs->SetFilesystemActive(false, error_msg); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put(Key(1), "val"); s = Flush(); ASSERT_EQ(s, Status::OK()); TEST_SYNC_POINT("CompactionManifestWriteError:0"); TEST_SYNC_POINT("CompactionManifestWriteError:1"); s = dbfull()->TEST_WaitForCompact(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); fault_fs->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); Reopen(options); ASSERT_EQ("val", Get(Key(0))); ASSERT_EQ("val", Get(Key(1))); ASSERT_EQ("val", Get(Key(2))); Close(); } TEST_F(DBErrorHandlingFSTest, CompactionWriteError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; options.listeners.emplace_back(listener); Status s; DestroyAndReopen(options); Put(Key(0), "va;"); Put(Key(2), "va;"); s = Flush(); ASSERT_EQ(s, Status::OK()); listener->OverrideBGError( Status(Status::NoSpace(), Status::Severity::kHardError)); listener->EnableAutoRecovery(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::FlushMemTable:FlushMemTableFinished", "BackgroundCallCompaction:0"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BackgroundCallCompaction:0", [&](void*) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put(Key(1), "val"); s = Flush(); ASSERT_EQ(s, Status::OK()); s = dbfull()->TEST_WaitForCompact(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); Destroy(options); } TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; options.listeners.emplace_back(listener); Status s; DestroyAndReopen(options); IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); error_msg.SetRetryable(true); Put(Key(0), "va;"); Put(Key(2), "va;"); s = Flush(); ASSERT_EQ(s, Status::OK()); listener->OverrideBGError(Status(error_msg, Status::Severity::kHardError)); listener->EnableAutoRecovery(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::FlushMemTable:FlushMemTableFinished", "BackgroundCallCompaction:0"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BackgroundCallCompaction:0", [&](void*) { fault_fs->SetFilesystemActive(false, error_msg); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put(Key(1), "val"); s = Flush(); ASSERT_EQ(s, Status::OK()); s = dbfull()->TEST_WaitForCompact(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); fault_fs->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); Destroy(options); } TEST_F(DBErrorHandlingFSTest, CorruptionError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; Status s; DestroyAndReopen(options); Put(Key(0), "va;"); Put(Key(2), "va;"); s = Flush(); ASSERT_EQ(s, Status::OK()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::FlushMemTable:FlushMemTableFinished", "BackgroundCallCompaction:0"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BackgroundCallCompaction:0", [&](void*) { fault_fs->SetFilesystemActive(false, IOStatus::Corruption("Corruption")); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Put(Key(1), "val"); s = Flush(); ASSERT_EQ(s, Status::OK()); s = dbfull()->TEST_WaitForCompact(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); fault_fs->SetFilesystemActive(true); s = dbfull()->Resume(); ASSERT_NE(s, Status::OK()); Destroy(options); } TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.listeners.emplace_back(listener); Status s; listener->EnableAutoRecovery(); DestroyAndReopen(options); Put(Key(0), "val"); SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); ASSERT_EQ(listener->WaitForRecovery(5000000), true); s = Put(Key(1), "val"); ASSERT_EQ(s, Status::OK()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); ASSERT_EQ("val", Get(Key(1))); Destroy(options); } TEST_F(DBErrorHandlingFSTest, FailRecoverFlushError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.listeners.emplace_back(listener); Status s; listener->EnableAutoRecovery(); DestroyAndReopen(options); Put(Key(0), "val"); SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); // We should be able to shutdown the database while auto recovery is going // on in the background Close(); DestroyDB(dbname_, options); } TEST_F(DBErrorHandlingFSTest, WALWriteError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.writable_file_max_buffer_size = 32768; options.listeners.emplace_back(listener); Status s; Random rnd(301); listener->EnableAutoRecovery(); DestroyAndReopen(options); { WriteBatch batch; for (auto i = 0; i < 100; ++i) { batch.Put(Key(i), RandomString(&rnd, 1024)); } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(dbfull()->Write(wopts, &batch), Status::OK()); }; { WriteBatch batch; int write_error = 0; for (auto i = 100; i < 199; ++i) { batch.Put(Key(i), RandomString(&rnd, 1024)); } SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { write_error++; if (write_error > 2) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); } }); SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wopts; wopts.sync = true; s = dbfull()->Write(wopts, &batch); ASSERT_EQ(s, s.NoSpace()); } SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); ASSERT_EQ(listener->WaitForRecovery(5000000), true); for (auto i = 0; i < 199; ++i) { if (i < 100) { ASSERT_NE(Get(Key(i)), "NOT_FOUND"); } else { ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); } } Reopen(options); for (auto i = 0; i < 199; ++i) { if (i < 100) { ASSERT_NE(Get(Key(i)), "NOT_FOUND"); } else { ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); } } Close(); } TEST_F(DBErrorHandlingFSTest, WALWriteRetryableError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.writable_file_max_buffer_size = 32768; options.listeners.emplace_back(listener); options.paranoid_checks = true; Status s; Random rnd(301); DestroyAndReopen(options); IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); error_msg.SetRetryable(true); // For the first batch, write is successful, require sync { WriteBatch batch; for (auto i = 0; i < 100; ++i) { batch.Put(Key(i), RandomString(&rnd, 1024)); } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(dbfull()->Write(wopts, &batch), Status::OK()); }; // For the second batch, the first 2 file Append are successful, then the // following Append fails due to file system retryable IOError. { WriteBatch batch; int write_error = 0; for (auto i = 100; i < 200; ++i) { batch.Put(Key(i), RandomString(&rnd, 1024)); } SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { write_error++; if (write_error > 2) { fault_fs->SetFilesystemActive(false, error_msg); } }); SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wopts; wopts.sync = true; s = dbfull()->Write(wopts, &batch); ASSERT_EQ(true, s.IsIOError()); } fault_fs->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); // Data in corrupted WAL are not stored for (auto i = 0; i < 199; ++i) { if (i < 100) { ASSERT_NE(Get(Key(i)), "NOT_FOUND"); } else { ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); } } // Resume and write a new batch, should be in the WAL s = dbfull()->Resume(); ASSERT_EQ(s, Status::OK()); { WriteBatch batch; for (auto i = 200; i < 300; ++i) { batch.Put(Key(i), RandomString(&rnd, 1024)); } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(dbfull()->Write(wopts, &batch), Status::OK()); }; Reopen(options); for (auto i = 0; i < 300; ++i) { if (i < 100 || i >= 200) { ASSERT_NE(Get(Key(i)), "NOT_FOUND"); } else { ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); } } Close(); } TEST_F(DBErrorHandlingFSTest, MultiCFWALWriteError) { std::shared_ptr fault_fs( new FaultInjectionTestFS(FileSystem::Default())); std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); std::shared_ptr listener( new ErrorHandlerFSListener()); Options options = GetDefaultOptions(); options.env = fault_fs_env.get(); options.create_if_missing = true; options.writable_file_max_buffer_size = 32768; options.listeners.emplace_back(listener); Status s; Random rnd(301); listener->EnableAutoRecovery(); CreateAndReopenWithCF({"one", "two", "three"}, options); { WriteBatch batch; for (auto i = 1; i < 4; ++i) { for (auto j = 0; j < 100; ++j) { batch.Put(handles_[i], Key(j), RandomString(&rnd, 1024)); } } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(dbfull()->Write(wopts, &batch), Status::OK()); }; { WriteBatch batch; int write_error = 0; // Write to one CF for (auto i = 100; i < 199; ++i) { batch.Put(handles_[2], Key(i), RandomString(&rnd, 1024)); } SyncPoint::GetInstance()->SetCallBack( "WritableFileWriter::Append:BeforePrepareWrite", [&](void*) { write_error++; if (write_error > 2) { fault_fs->SetFilesystemActive(false, IOStatus::NoSpace("Out of space")); } }); SyncPoint::GetInstance()->EnableProcessing(); WriteOptions wopts; wopts.sync = true; s = dbfull()->Write(wopts, &batch); ASSERT_EQ(s, s.NoSpace()); } SyncPoint::GetInstance()->DisableProcessing(); fault_fs->SetFilesystemActive(true); ASSERT_EQ(listener->WaitForRecovery(5000000), true); for (auto i = 1; i < 4; ++i) { // Every CF should have been flushed ASSERT_EQ(NumTableFilesAtLevel(0, i), 1); } for (auto i = 1; i < 4; ++i) { for (auto j = 0; j < 199; ++j) { if (j < 100) { ASSERT_NE(Get(i, Key(j)), "NOT_FOUND"); } else { ASSERT_EQ(Get(i, Key(j)), "NOT_FOUND"); } } } ReopenWithColumnFamilies({"default", "one", "two", "three"}, options); for (auto i = 1; i < 4; ++i) { for (auto j = 0; j < 199; ++j) { if (j < 100) { ASSERT_NE(Get(i, Key(j)), "NOT_FOUND"); } else { ASSERT_EQ(Get(i, Key(j)), "NOT_FOUND"); } } } Close(); } TEST_F(DBErrorHandlingFSTest, MultiDBCompactionError) { FaultInjectionTestEnv* def_env = new FaultInjectionTestEnv(Env::Default()); std::vector> fault_envs; std::vector fault_fs; std::vector options; std::vector> listener; std::vector db; std::shared_ptr sfm(NewSstFileManager(def_env)); int kNumDbInstances = 3; Random rnd(301); for (auto i = 0; i < kNumDbInstances; ++i) { listener.emplace_back(new ErrorHandlerFSListener()); options.emplace_back(GetDefaultOptions()); fault_fs.emplace_back(new FaultInjectionTestFS(FileSystem::Default())); std::shared_ptr fs(fault_fs.back()); fault_envs.emplace_back(new CompositeEnvWrapper(def_env, fs)); options[i].env = fault_envs.back().get(); options[i].create_if_missing = true; options[i].level0_file_num_compaction_trigger = 2; options[i].writable_file_max_buffer_size = 32768; options[i].listeners.emplace_back(listener[i]); options[i].sst_file_manager = sfm; DB* dbptr; char buf[16]; listener[i]->EnableAutoRecovery(); // Setup for returning error for the 3rd SST, which would be level 1 listener[i]->InjectFileCreationError(fault_fs[i], 3, IOStatus::NoSpace("Out of space")); snprintf(buf, sizeof(buf), "_%d", i); DestroyDB(dbname_ + std::string(buf), options[i]); ASSERT_EQ(DB::Open(options[i], dbname_ + std::string(buf), &dbptr), Status::OK()); db.emplace_back(dbptr); } for (auto i = 0; i < kNumDbInstances; ++i) { WriteBatch batch; for (auto j = 0; j <= 100; ++j) { batch.Put(Key(j), RandomString(&rnd, 1024)); } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(db[i]->Write(wopts, &batch), Status::OK()); ASSERT_EQ(db[i]->Flush(FlushOptions()), Status::OK()); } def_env->SetFilesystemActive(false, Status::NoSpace("Out of space")); for (auto i = 0; i < kNumDbInstances; ++i) { WriteBatch batch; // Write to one CF for (auto j = 100; j < 199; ++j) { batch.Put(Key(j), RandomString(&rnd, 1024)); } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(db[i]->Write(wopts, &batch), Status::OK()); ASSERT_EQ(db[i]->Flush(FlushOptions()), Status::OK()); } for (auto i = 0; i < kNumDbInstances; ++i) { Status s = static_cast(db[i])->TEST_WaitForCompact(true); ASSERT_EQ(s.severity(), Status::Severity::kSoftError); fault_fs[i]->SetFilesystemActive(true); } def_env->SetFilesystemActive(true); for (auto i = 0; i < kNumDbInstances; ++i) { std::string prop; ASSERT_EQ(listener[i]->WaitForRecovery(5000000), true); ASSERT_EQ(static_cast(db[i])->TEST_WaitForCompact(true), Status::OK()); EXPECT_TRUE(db[i]->GetProperty( "rocksdb.num-files-at-level" + NumberToString(0), &prop)); EXPECT_EQ(atoi(prop.c_str()), 0); EXPECT_TRUE(db[i]->GetProperty( "rocksdb.num-files-at-level" + NumberToString(1), &prop)); EXPECT_EQ(atoi(prop.c_str()), 1); } for (auto i = 0; i < kNumDbInstances; ++i) { char buf[16]; snprintf(buf, sizeof(buf), "_%d", i); delete db[i]; fault_fs[i]->SetFilesystemActive(true); if (getenv("KEEP_DB")) { printf("DB is still at %s%s\n", dbname_.c_str(), buf); } else { Status s = DestroyDB(dbname_ + std::string(buf), options[i]); } } options.clear(); sfm.reset(); delete def_env; } TEST_F(DBErrorHandlingFSTest, MultiDBVariousErrors) { FaultInjectionTestEnv* def_env = new FaultInjectionTestEnv(Env::Default()); std::vector> fault_envs; std::vector fault_fs; std::vector options; std::vector> listener; std::vector db; std::shared_ptr sfm(NewSstFileManager(def_env)); int kNumDbInstances = 3; Random rnd(301); for (auto i = 0; i < kNumDbInstances; ++i) { listener.emplace_back(new ErrorHandlerFSListener()); options.emplace_back(GetDefaultOptions()); fault_fs.emplace_back(new FaultInjectionTestFS(FileSystem::Default())); std::shared_ptr fs(fault_fs.back()); fault_envs.emplace_back(new CompositeEnvWrapper(def_env, fs)); options[i].env = fault_envs.back().get(); options[i].create_if_missing = true; options[i].level0_file_num_compaction_trigger = 2; options[i].writable_file_max_buffer_size = 32768; options[i].listeners.emplace_back(listener[i]); options[i].sst_file_manager = sfm; DB* dbptr; char buf[16]; listener[i]->EnableAutoRecovery(); switch (i) { case 0: // Setup for returning error for the 3rd SST, which would be level 1 listener[i]->InjectFileCreationError(fault_fs[i], 3, IOStatus::NoSpace("Out of space")); break; case 1: // Setup for returning error after the 1st SST, which would result // in a hard error listener[i]->InjectFileCreationError(fault_fs[i], 2, IOStatus::NoSpace("Out of space")); break; default: break; } snprintf(buf, sizeof(buf), "_%d", i); DestroyDB(dbname_ + std::string(buf), options[i]); ASSERT_EQ(DB::Open(options[i], dbname_ + std::string(buf), &dbptr), Status::OK()); db.emplace_back(dbptr); } for (auto i = 0; i < kNumDbInstances; ++i) { WriteBatch batch; for (auto j = 0; j <= 100; ++j) { batch.Put(Key(j), RandomString(&rnd, 1024)); } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(db[i]->Write(wopts, &batch), Status::OK()); ASSERT_EQ(db[i]->Flush(FlushOptions()), Status::OK()); } def_env->SetFilesystemActive(false, Status::NoSpace("Out of space")); for (auto i = 0; i < kNumDbInstances; ++i) { WriteBatch batch; // Write to one CF for (auto j = 100; j < 199; ++j) { batch.Put(Key(j), RandomString(&rnd, 1024)); } WriteOptions wopts; wopts.sync = true; ASSERT_EQ(db[i]->Write(wopts, &batch), Status::OK()); if (i != 1) { ASSERT_EQ(db[i]->Flush(FlushOptions()), Status::OK()); } else { ASSERT_EQ(db[i]->Flush(FlushOptions()), Status::NoSpace()); } } for (auto i = 0; i < kNumDbInstances; ++i) { Status s = static_cast(db[i])->TEST_WaitForCompact(true); switch (i) { case 0: ASSERT_EQ(s.severity(), Status::Severity::kSoftError); break; case 1: ASSERT_EQ(s.severity(), Status::Severity::kHardError); break; case 2: ASSERT_EQ(s, Status::OK()); break; } fault_fs[i]->SetFilesystemActive(true); } def_env->SetFilesystemActive(true); for (auto i = 0; i < kNumDbInstances; ++i) { std::string prop; if (i < 2) { ASSERT_EQ(listener[i]->WaitForRecovery(5000000), true); } if (i == 1) { ASSERT_EQ(static_cast(db[i])->TEST_WaitForCompact(true), Status::OK()); } EXPECT_TRUE(db[i]->GetProperty( "rocksdb.num-files-at-level" + NumberToString(0), &prop)); EXPECT_EQ(atoi(prop.c_str()), 0); EXPECT_TRUE(db[i]->GetProperty( "rocksdb.num-files-at-level" + NumberToString(1), &prop)); EXPECT_EQ(atoi(prop.c_str()), 1); } for (auto i = 0; i < kNumDbInstances; ++i) { char buf[16]; snprintf(buf, sizeof(buf), "_%d", i); fault_fs[i]->SetFilesystemActive(true); delete db[i]; if (getenv("KEEP_DB")) { printf("DB is still at %s%s\n", dbname_.c_str(), buf); } else { DestroyDB(dbname_ + std::string(buf), options[i]); } } options.clear(); delete def_env; } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/db/event_helpers.cc000066400000000000000000000166321370372246700170140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/event_helpers.h" namespace ROCKSDB_NAMESPACE { namespace { template inline T SafeDivide(T a, T b) { return b == 0 ? 0 : a / b; } } // namespace void EventHelpers::AppendCurrentTime(JSONWriter* jwriter) { *jwriter << "time_micros" << std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); } #ifndef ROCKSDB_LITE void EventHelpers::NotifyTableFileCreationStarted( const std::vector>& listeners, const std::string& db_name, const std::string& cf_name, const std::string& file_path, int job_id, TableFileCreationReason reason) { TableFileCreationBriefInfo info; info.db_name = db_name; info.cf_name = cf_name; info.file_path = file_path; info.job_id = job_id; info.reason = reason; for (auto& listener : listeners) { listener->OnTableFileCreationStarted(info); } } #endif // !ROCKSDB_LITE void EventHelpers::NotifyOnBackgroundError( const std::vector>& listeners, BackgroundErrorReason reason, Status* bg_error, InstrumentedMutex* db_mutex, bool* auto_recovery) { #ifndef ROCKSDB_LITE if (listeners.size() == 0U) { return; } db_mutex->AssertHeld(); // release lock while notifying events db_mutex->Unlock(); for (auto& listener : listeners) { listener->OnBackgroundError(reason, bg_error); if (*auto_recovery) { listener->OnErrorRecoveryBegin(reason, *bg_error, auto_recovery); } } db_mutex->Lock(); #else (void)listeners; (void)reason; (void)bg_error; (void)db_mutex; (void)auto_recovery; #endif // ROCKSDB_LITE } void EventHelpers::LogAndNotifyTableFileCreationFinished( EventLogger* event_logger, const std::vector>& listeners, const std::string& db_name, const std::string& cf_name, const std::string& file_path, int job_id, const FileDescriptor& fd, uint64_t oldest_blob_file_number, const TableProperties& table_properties, TableFileCreationReason reason, const Status& s) { if (s.ok() && event_logger) { JSONWriter jwriter; AppendCurrentTime(&jwriter); jwriter << "cf_name" << cf_name << "job" << job_id << "event" << "table_file_creation" << "file_number" << fd.GetNumber() << "file_size" << fd.GetFileSize(); // table_properties { jwriter << "table_properties"; jwriter.StartObject(); // basic properties: jwriter << "data_size" << table_properties.data_size << "index_size" << table_properties.index_size << "index_partitions" << table_properties.index_partitions << "top_level_index_size" << table_properties.top_level_index_size << "index_key_is_user_key" << table_properties.index_key_is_user_key << "index_value_is_delta_encoded" << table_properties.index_value_is_delta_encoded << "filter_size" << table_properties.filter_size << "raw_key_size" << table_properties.raw_key_size << "raw_average_key_size" << SafeDivide(table_properties.raw_key_size, table_properties.num_entries) << "raw_value_size" << table_properties.raw_value_size << "raw_average_value_size" << SafeDivide(table_properties.raw_value_size, table_properties.num_entries) << "num_data_blocks" << table_properties.num_data_blocks << "num_entries" << table_properties.num_entries << "num_deletions" << table_properties.num_deletions << "num_merge_operands" << table_properties.num_merge_operands << "num_range_deletions" << table_properties.num_range_deletions << "format_version" << table_properties.format_version << "fixed_key_len" << table_properties.fixed_key_len << "filter_policy" << table_properties.filter_policy_name << "column_family_name" << table_properties.column_family_name << "column_family_id" << table_properties.column_family_id << "comparator" << table_properties.comparator_name << "merge_operator" << table_properties.merge_operator_name << "prefix_extractor_name" << table_properties.prefix_extractor_name << "property_collectors" << table_properties.property_collectors_names << "compression" << table_properties.compression_name << "compression_options" << table_properties.compression_options << "creation_time" << table_properties.creation_time << "oldest_key_time" << table_properties.oldest_key_time << "file_creation_time" << table_properties.file_creation_time; // user collected properties for (const auto& prop : table_properties.readable_properties) { jwriter << prop.first << prop.second; } jwriter.EndObject(); } if (oldest_blob_file_number != kInvalidBlobFileNumber) { jwriter << "oldest_blob_file_number" << oldest_blob_file_number; } jwriter.EndObject(); event_logger->Log(jwriter); } #ifndef ROCKSDB_LITE if (listeners.size() == 0) { return; } TableFileCreationInfo info; info.db_name = db_name; info.cf_name = cf_name; info.file_path = file_path; info.file_size = fd.file_size; info.job_id = job_id; info.table_properties = table_properties; info.reason = reason; info.status = s; for (auto& listener : listeners) { listener->OnTableFileCreated(info); } #else (void)listeners; (void)db_name; (void)cf_name; (void)file_path; (void)reason; #endif // !ROCKSDB_LITE } void EventHelpers::LogAndNotifyTableFileDeletion( EventLogger* event_logger, int job_id, uint64_t file_number, const std::string& file_path, const Status& status, const std::string& dbname, const std::vector>& listeners) { JSONWriter jwriter; AppendCurrentTime(&jwriter); jwriter << "job" << job_id << "event" << "table_file_deletion" << "file_number" << file_number; if (!status.ok()) { jwriter << "status" << status.ToString(); } jwriter.EndObject(); event_logger->Log(jwriter); #ifndef ROCKSDB_LITE TableFileDeletionInfo info; info.db_name = dbname; info.job_id = job_id; info.file_path = file_path; info.status = status; for (auto& listener : listeners) { listener->OnTableFileDeleted(info); } #else (void)file_path; (void)dbname; (void)listeners; #endif // !ROCKSDB_LITE } void EventHelpers::NotifyOnErrorRecoveryCompleted( const std::vector>& listeners, Status old_bg_error, InstrumentedMutex* db_mutex) { #ifndef ROCKSDB_LITE if (listeners.size() == 0U) { return; } db_mutex->AssertHeld(); // release lock while notifying events db_mutex->Unlock(); for (auto& listener : listeners) { listener->OnErrorRecoveryCompleted(old_bg_error); } db_mutex->Lock(); #else (void)listeners; (void)old_bg_error; (void)db_mutex; #endif // ROCKSDB_LITE } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/event_helpers.h000066400000000000000000000043271370372246700166540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "db/column_family.h" #include "db/version_edit.h" #include "logging/event_logger.h" #include "rocksdb/listener.h" #include "rocksdb/table_properties.h" namespace ROCKSDB_NAMESPACE { class EventHelpers { public: static void AppendCurrentTime(JSONWriter* json_writer); #ifndef ROCKSDB_LITE static void NotifyTableFileCreationStarted( const std::vector>& listeners, const std::string& db_name, const std::string& cf_name, const std::string& file_path, int job_id, TableFileCreationReason reason); #endif // !ROCKSDB_LITE static void NotifyOnBackgroundError( const std::vector>& listeners, BackgroundErrorReason reason, Status* bg_error, InstrumentedMutex* db_mutex, bool* auto_recovery); static void LogAndNotifyTableFileCreationFinished( EventLogger* event_logger, const std::vector>& listeners, const std::string& db_name, const std::string& cf_name, const std::string& file_path, int job_id, const FileDescriptor& fd, uint64_t oldest_blob_file_number, const TableProperties& table_properties, TableFileCreationReason reason, const Status& s); static void LogAndNotifyTableFileDeletion( EventLogger* event_logger, int job_id, uint64_t file_number, const std::string& file_path, const Status& status, const std::string& db_name, const std::vector>& listeners); static void NotifyOnErrorRecoveryCompleted( const std::vector>& listeners, Status bg_error, InstrumentedMutex* db_mutex); private: static void LogAndNotifyTableFileCreation( EventLogger* event_logger, const std::vector>& listeners, const FileDescriptor& fd, const TableFileCreationInfo& info); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/experimental.cc000066400000000000000000000030511370372246700166350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/experimental.h" #include "db/db_impl/db_impl.h" namespace ROCKSDB_NAMESPACE { namespace experimental { #ifndef ROCKSDB_LITE Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) { if (db == nullptr) { return Status::InvalidArgument("DB is empty"); } return db->SuggestCompactRange(column_family, begin, end); } Status PromoteL0(DB* db, ColumnFamilyHandle* column_family, int target_level) { if (db == nullptr) { return Status::InvalidArgument("Didn't recognize DB object"); } return db->PromoteL0(column_family, target_level); } #else // ROCKSDB_LITE Status SuggestCompactRange(DB* /*db*/, ColumnFamilyHandle* /*column_family*/, const Slice* /*begin*/, const Slice* /*end*/) { return Status::NotSupported("Not supported in RocksDB LITE"); } Status PromoteL0(DB* /*db*/, ColumnFamilyHandle* /*column_family*/, int /*target_level*/) { return Status::NotSupported("Not supported in RocksDB LITE"); } #endif // ROCKSDB_LITE Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end) { return SuggestCompactRange(db, db->DefaultColumnFamily(), begin, end); } } // namespace experimental } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/external_sst_file_basic_test.cc000066400000000000000000001721611370372246700220630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "db/db_test_util.h" #include "db/version_edit.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/sst_file_writer.h" #include "test_util/fault_injection_test_env.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE class ExternalSSTFileBasicTest : public DBTestBase, public ::testing::WithParamInterface> { public: ExternalSSTFileBasicTest() : DBTestBase("/external_sst_file_basic_test") { sst_files_dir_ = dbname_ + "/sst_files/"; fault_injection_test_env_.reset(new FaultInjectionTestEnv(Env::Default())); DestroyAndRecreateExternalSSTFilesDir(); } void DestroyAndRecreateExternalSSTFilesDir() { test::DestroyDir(env_, sst_files_dir_); env_->CreateDir(sst_files_dir_); } Status DeprecatedAddFile(const std::vector& files, bool move_files = false, bool skip_snapshot_check = false) { IngestExternalFileOptions opts; opts.move_files = move_files; opts.snapshot_consistency = !skip_snapshot_check; opts.allow_global_seqno = false; opts.allow_blocking_flush = false; return db_->IngestExternalFile(files, opts); } Status AddFileWithFileChecksum( const std::vector& files, const std::vector& files_checksums, const std::vector& files_checksum_func_names, bool verify_file_checksum = true, bool move_files = false, bool skip_snapshot_check = false, bool write_global_seqno = true) { IngestExternalFileOptions opts; opts.move_files = move_files; opts.snapshot_consistency = !skip_snapshot_check; opts.allow_global_seqno = false; opts.allow_blocking_flush = false; opts.write_global_seqno = write_global_seqno; opts.verify_file_checksum = verify_file_checksum; IngestExternalFileArg arg; arg.column_family = db_->DefaultColumnFamily(); arg.external_files = files; arg.options = opts; arg.files_checksums = files_checksums; arg.files_checksum_func_names = files_checksum_func_names; return db_->IngestExternalFiles({arg}); } Status GenerateAndAddExternalFile( const Options options, std::vector keys, const std::vector& value_types, std::vector> range_deletions, int file_id, bool write_global_seqno, bool verify_checksums_before_ingest, std::map* true_data) { assert(value_types.size() == 1 || keys.size() == value_types.size()); std::string file_path = sst_files_dir_ + ToString(file_id); SstFileWriter sst_file_writer(EnvOptions(), options); Status s = sst_file_writer.Open(file_path); if (!s.ok()) { return s; } for (size_t i = 0; i < range_deletions.size(); i++) { // Account for the effect of range deletions on true_data before // all point operators, even though sst_file_writer.DeleteRange // must be called before other sst_file_writer methods. This is // because point writes take precedence over range deletions // in the same ingested sst. std::string start_key = Key(range_deletions[i].first); std::string end_key = Key(range_deletions[i].second); s = sst_file_writer.DeleteRange(start_key, end_key); if (!s.ok()) { sst_file_writer.Finish(); return s; } auto start_key_it = true_data->find(start_key); if (start_key_it == true_data->end()) { start_key_it = true_data->upper_bound(start_key); } auto end_key_it = true_data->find(end_key); if (end_key_it == true_data->end()) { end_key_it = true_data->upper_bound(end_key); } true_data->erase(start_key_it, end_key_it); } for (size_t i = 0; i < keys.size(); i++) { std::string key = Key(keys[i]); std::string value = Key(keys[i]) + ToString(file_id); ValueType value_type = (value_types.size() == 1 ? value_types[0] : value_types[i]); switch (value_type) { case ValueType::kTypeValue: s = sst_file_writer.Put(key, value); (*true_data)[key] = value; break; case ValueType::kTypeMerge: s = sst_file_writer.Merge(key, value); // we only use TestPutOperator in this test (*true_data)[key] = value; break; case ValueType::kTypeDeletion: s = sst_file_writer.Delete(key); true_data->erase(key); break; default: return Status::InvalidArgument("Value type is not supported"); } if (!s.ok()) { sst_file_writer.Finish(); return s; } } s = sst_file_writer.Finish(); if (s.ok()) { IngestExternalFileOptions ifo; ifo.allow_global_seqno = true; ifo.write_global_seqno = write_global_seqno; ifo.verify_checksums_before_ingest = verify_checksums_before_ingest; s = db_->IngestExternalFile({file_path}, ifo); } return s; } Status GenerateAndAddExternalFile( const Options options, std::vector keys, const std::vector& value_types, int file_id, bool write_global_seqno, bool verify_checksums_before_ingest, std::map* true_data) { return GenerateAndAddExternalFile( options, keys, value_types, {}, file_id, write_global_seqno, verify_checksums_before_ingest, true_data); } Status GenerateAndAddExternalFile( const Options options, std::vector keys, const ValueType value_type, int file_id, bool write_global_seqno, bool verify_checksums_before_ingest, std::map* true_data) { return GenerateAndAddExternalFile( options, keys, std::vector(1, value_type), file_id, write_global_seqno, verify_checksums_before_ingest, true_data); } ~ExternalSSTFileBasicTest() override { test::DestroyDir(env_, sst_files_dir_); } protected: std::string sst_files_dir_; std::unique_ptr fault_injection_test_env_; }; TEST_F(ExternalSSTFileBasicTest, Basic) { Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); // Current file size should be 0 after sst_file_writer init and before open a // file. ASSERT_EQ(sst_file_writer.FileSize(), 0); // file1.sst (0 => 99) std::string file1 = sst_files_dir_ + "file1.sst"; ASSERT_OK(sst_file_writer.Open(file1)); for (int k = 0; k < 100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file1_info; Status s = sst_file_writer.Finish(&file1_info); ASSERT_TRUE(s.ok()) << s.ToString(); // Current file size should be non-zero after success write. ASSERT_GT(sst_file_writer.FileSize(), 0); ASSERT_EQ(file1_info.file_path, file1); ASSERT_EQ(file1_info.num_entries, 100); ASSERT_EQ(file1_info.smallest_key, Key(0)); ASSERT_EQ(file1_info.largest_key, Key(99)); ASSERT_EQ(file1_info.num_range_del_entries, 0); ASSERT_EQ(file1_info.smallest_range_del_key, ""); ASSERT_EQ(file1_info.largest_range_del_key, ""); ASSERT_EQ(file1_info.file_checksum, kUnknownFileChecksum); ASSERT_EQ(file1_info.file_checksum_func_name, kUnknownFileChecksumFuncName); // sst_file_writer already finished, cannot add this value s = sst_file_writer.Put(Key(100), "bad_val"); ASSERT_FALSE(s.ok()) << s.ToString(); s = sst_file_writer.DeleteRange(Key(100), Key(200)); ASSERT_FALSE(s.ok()) << s.ToString(); DestroyAndReopen(options); // Add file using file path s = DeprecatedAddFile({file1}); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); for (int k = 0; k < 100; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } DestroyAndRecreateExternalSSTFilesDir(); } class ChecksumVerifyHelper { private: Options options_; public: ChecksumVerifyHelper(Options& options) : options_(options) {} ~ChecksumVerifyHelper() {} Status GetSingleFileChecksumAndFuncName( const std::string& file_path, std::string* file_checksum, std::string* file_checksum_func_name) { Status s; EnvOptions soptions; std::unique_ptr file_reader; s = options_.env->NewSequentialFile(file_path, &file_reader, soptions); if (!s.ok()) { return s; } std::unique_ptr scratch(new char[2048]); Slice result; FileChecksumGenFactory* file_checksum_gen_factory = options_.file_checksum_gen_factory.get(); if (file_checksum_gen_factory == nullptr) { *file_checksum = kUnknownFileChecksum; *file_checksum_func_name = kUnknownFileChecksumFuncName; return Status::OK(); } else { FileChecksumGenContext gen_context; std::unique_ptr file_checksum_gen = file_checksum_gen_factory->CreateFileChecksumGenerator(gen_context); *file_checksum_func_name = file_checksum_gen->Name(); s = file_reader->Read(2048, &result, scratch.get()); if (!s.ok()) { return s; } while (result.size() != 0) { file_checksum_gen->Update(scratch.get(), result.size()); s = file_reader->Read(2048, &result, scratch.get()); if (!s.ok()) { return s; } } file_checksum_gen->Finalize(); *file_checksum = file_checksum_gen->GetChecksum(); } return Status::OK(); } }; TEST_F(ExternalSSTFileBasicTest, BasicWithFileChecksumCrc32c) { Options options = CurrentOptions(); options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); ChecksumVerifyHelper checksum_helper(options); SstFileWriter sst_file_writer(EnvOptions(), options); // Current file size should be 0 after sst_file_writer init and before open a // file. ASSERT_EQ(sst_file_writer.FileSize(), 0); // file1.sst (0 => 99) std::string file1 = sst_files_dir_ + "file1.sst"; ASSERT_OK(sst_file_writer.Open(file1)); for (int k = 0; k < 100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file1_info; Status s = sst_file_writer.Finish(&file1_info); ASSERT_TRUE(s.ok()) << s.ToString(); std::string file_checksum, file_checksum_func_name; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( file1, &file_checksum, &file_checksum_func_name)); // Current file size should be non-zero after success write. ASSERT_GT(sst_file_writer.FileSize(), 0); ASSERT_EQ(file1_info.file_path, file1); ASSERT_EQ(file1_info.num_entries, 100); ASSERT_EQ(file1_info.smallest_key, Key(0)); ASSERT_EQ(file1_info.largest_key, Key(99)); ASSERT_EQ(file1_info.num_range_del_entries, 0); ASSERT_EQ(file1_info.smallest_range_del_key, ""); ASSERT_EQ(file1_info.largest_range_del_key, ""); ASSERT_EQ(file1_info.file_checksum, file_checksum); ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name); // sst_file_writer already finished, cannot add this value s = sst_file_writer.Put(Key(100), "bad_val"); ASSERT_FALSE(s.ok()) << s.ToString(); s = sst_file_writer.DeleteRange(Key(100), Key(200)); ASSERT_FALSE(s.ok()) << s.ToString(); DestroyAndReopen(options); // Add file using file path s = DeprecatedAddFile({file1}); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); for (int k = 0; k < 100; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } DestroyAndRecreateExternalSSTFilesDir(); } TEST_F(ExternalSSTFileBasicTest, IngestFileWithFileChecksum) { Options old_options = CurrentOptions(); Options options = CurrentOptions(); options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); const ImmutableCFOptions ioptions(options); ChecksumVerifyHelper checksum_helper(options); SstFileWriter sst_file_writer(EnvOptions(), options); // file01.sst (1000 => 1099) std::string file1 = sst_files_dir_ + "file01.sst"; ASSERT_OK(sst_file_writer.Open(file1)); for (int k = 1000; k < 1100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file1_info; Status s = sst_file_writer.Finish(&file1_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file1_info.file_path, file1); ASSERT_EQ(file1_info.num_entries, 100); ASSERT_EQ(file1_info.smallest_key, Key(1000)); ASSERT_EQ(file1_info.largest_key, Key(1099)); std::string file_checksum1, file_checksum_func_name1; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( file1, &file_checksum1, &file_checksum_func_name1)); ASSERT_EQ(file1_info.file_checksum, file_checksum1); ASSERT_EQ(file1_info.file_checksum_func_name, file_checksum_func_name1); // file02.sst (1100 => 1299) std::string file2 = sst_files_dir_ + "file02.sst"; ASSERT_OK(sst_file_writer.Open(file2)); for (int k = 1100; k < 1300; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file2_info; s = sst_file_writer.Finish(&file2_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file2_info.file_path, file2); ASSERT_EQ(file2_info.num_entries, 200); ASSERT_EQ(file2_info.smallest_key, Key(1100)); ASSERT_EQ(file2_info.largest_key, Key(1299)); std::string file_checksum2, file_checksum_func_name2; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( file2, &file_checksum2, &file_checksum_func_name2)); ASSERT_EQ(file2_info.file_checksum, file_checksum2); ASSERT_EQ(file2_info.file_checksum_func_name, file_checksum_func_name2); // file03.sst (1300 => 1499) std::string file3 = sst_files_dir_ + "file03.sst"; ASSERT_OK(sst_file_writer.Open(file3)); for (int k = 1300; k < 1500; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file3_info; s = sst_file_writer.Finish(&file3_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file3_info.file_path, file3); ASSERT_EQ(file3_info.num_entries, 200); ASSERT_EQ(file3_info.smallest_key, Key(1300)); ASSERT_EQ(file3_info.largest_key, Key(1499)); std::string file_checksum3, file_checksum_func_name3; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( file3, &file_checksum3, &file_checksum_func_name3)); ASSERT_EQ(file3_info.file_checksum, file_checksum3); ASSERT_EQ(file3_info.file_checksum_func_name, file_checksum_func_name3); // file04.sst (1500 => 1799) std::string file4 = sst_files_dir_ + "file04.sst"; ASSERT_OK(sst_file_writer.Open(file4)); for (int k = 1500; k < 1800; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file4_info; s = sst_file_writer.Finish(&file4_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file4_info.file_path, file4); ASSERT_EQ(file4_info.num_entries, 300); ASSERT_EQ(file4_info.smallest_key, Key(1500)); ASSERT_EQ(file4_info.largest_key, Key(1799)); std::string file_checksum4, file_checksum_func_name4; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( file4, &file_checksum4, &file_checksum_func_name4)); ASSERT_EQ(file4_info.file_checksum, file_checksum4); ASSERT_EQ(file4_info.file_checksum_func_name, file_checksum_func_name4); // file05.sst (1800 => 1899) std::string file5 = sst_files_dir_ + "file05.sst"; ASSERT_OK(sst_file_writer.Open(file5)); for (int k = 1800; k < 2000; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file5_info; s = sst_file_writer.Finish(&file5_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file5_info.file_path, file5); ASSERT_EQ(file5_info.num_entries, 200); ASSERT_EQ(file5_info.smallest_key, Key(1800)); ASSERT_EQ(file5_info.largest_key, Key(1999)); std::string file_checksum5, file_checksum_func_name5; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( file5, &file_checksum5, &file_checksum_func_name5)); ASSERT_EQ(file5_info.file_checksum, file_checksum5); ASSERT_EQ(file5_info.file_checksum_func_name, file_checksum_func_name5); // file06.sst (2000 => 2199) std::string file6 = sst_files_dir_ + "file06.sst"; ASSERT_OK(sst_file_writer.Open(file6)); for (int k = 2000; k < 2200; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file6_info; s = sst_file_writer.Finish(&file6_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file6_info.file_path, file6); ASSERT_EQ(file6_info.num_entries, 200); ASSERT_EQ(file6_info.smallest_key, Key(2000)); ASSERT_EQ(file6_info.largest_key, Key(2199)); std::string file_checksum6, file_checksum_func_name6; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( file6, &file_checksum6, &file_checksum_func_name6)); ASSERT_EQ(file6_info.file_checksum, file_checksum6); ASSERT_EQ(file6_info.file_checksum_func_name, file_checksum_func_name6); s = AddFileWithFileChecksum({file1}, {file_checksum1, "xyz"}, {file_checksum1}, true, false, false, false); // does not care the checksum input since db does not enable file checksum ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_OK(env_->FileExists(file1)); std::vector live_files; dbfull()->GetLiveFilesMetaData(&live_files); std::set set1; for (auto f : live_files) { set1.insert(f.name); ASSERT_EQ(f.file_checksum, kUnknownFileChecksum); ASSERT_EQ(f.file_checksum_func_name, kUnknownFileChecksumFuncName); } // Reopen Db with checksum enabled Reopen(options); // Enable verify_file_checksum option // The checksum vector does not match, fail the ingestion s = AddFileWithFileChecksum({file2}, {file_checksum2, "xyz"}, {file_checksum_func_name2}, true, false, false, false); ASSERT_FALSE(s.ok()) << s.ToString(); // Enable verify_file_checksum option // The checksum name does not match, fail the ingestion s = AddFileWithFileChecksum({file2}, {file_checksum2}, {"xyz"}, true, false, false, false); ASSERT_FALSE(s.ok()) << s.ToString(); // Enable verify_file_checksum option // The checksum itself does not match, fail the ingestion s = AddFileWithFileChecksum({file2}, {"xyz"}, {file_checksum_func_name2}, true, false, false, false); ASSERT_FALSE(s.ok()) << s.ToString(); // Enable verify_file_checksum option // All matches, ingestion is successful s = AddFileWithFileChecksum({file2}, {file_checksum2}, {file_checksum_func_name2}, true, false, false, false); ASSERT_TRUE(s.ok()) << s.ToString(); std::vector live_files1; dbfull()->GetLiveFilesMetaData(&live_files1); for (auto f : live_files1) { if (set1.find(f.name) == set1.end()) { ASSERT_EQ(f.file_checksum, file_checksum2); ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name2); set1.insert(f.name); } } ASSERT_OK(env_->FileExists(file2)); // Enable verify_file_checksum option // No checksum information is provided, generate it when ingesting std::vector checksum, checksum_func; s = AddFileWithFileChecksum({file3}, checksum, checksum_func, true, false, false, false); ASSERT_TRUE(s.ok()) << s.ToString(); std::vector live_files2; dbfull()->GetLiveFilesMetaData(&live_files2); for (auto f : live_files2) { if (set1.find(f.name) == set1.end()) { ASSERT_EQ(f.file_checksum, file_checksum3); ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name3); set1.insert(f.name); } } ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_OK(env_->FileExists(file3)); // Does not enable verify_file_checksum options // The checksum name does not match, fail the ingestion s = AddFileWithFileChecksum({file4}, {file_checksum4}, {"xyz"}, false, false, false, false); ASSERT_FALSE(s.ok()) << s.ToString(); // Does not enable verify_file_checksum options // Checksum function name matches, store the checksum being ingested. s = AddFileWithFileChecksum({file4}, {"asd"}, {file_checksum_func_name4}, false, false, false, false); ASSERT_TRUE(s.ok()) << s.ToString(); std::vector live_files3; dbfull()->GetLiveFilesMetaData(&live_files3); for (auto f : live_files3) { if (set1.find(f.name) == set1.end()) { ASSERT_FALSE(f.file_checksum == file_checksum4); ASSERT_EQ(f.file_checksum, "asd"); ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name4); set1.insert(f.name); } } ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_OK(env_->FileExists(file4)); // enable verify_file_checksum options, DB enable checksum, and enable // write_global_seq. So the checksum stored is different from the one // ingested due to the sequence number changes. s = AddFileWithFileChecksum({file5}, {file_checksum5}, {file_checksum_func_name5}, true, false, false, true); ASSERT_OK(s); ASSERT_TRUE(s.ok()) << s.ToString(); std::vector live_files4; dbfull()->GetLiveFilesMetaData(&live_files4); for (auto f : live_files4) { if (set1.find(f.name) == set1.end()) { std::string cur_checksum5, cur_checksum_func_name5; ASSERT_OK(checksum_helper.GetSingleFileChecksumAndFuncName( dbname_ + f.name, &cur_checksum5, &cur_checksum_func_name5)); ASSERT_EQ(f.file_checksum, cur_checksum5); ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name5); set1.insert(f.name); } } ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_OK(env_->FileExists(file5)); // Does not enable verify_file_checksum options and also the ingested file // checksum information is empty. DB will generate and store the checksum // in Manifest. std::vector files_c6, files_name6; s = AddFileWithFileChecksum({file6}, files_c6, files_name6, false, false, false, false); ASSERT_TRUE(s.ok()) << s.ToString(); std::vector live_files6; dbfull()->GetLiveFilesMetaData(&live_files6); for (auto f : live_files6) { if (set1.find(f.name) == set1.end()) { ASSERT_EQ(f.file_checksum, file_checksum6); ASSERT_EQ(f.file_checksum_func_name, file_checksum_func_name6); set1.insert(f.name); } } ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_OK(env_->FileExists(file6)); } TEST_F(ExternalSSTFileBasicTest, NoCopy) { Options options = CurrentOptions(); const ImmutableCFOptions ioptions(options); SstFileWriter sst_file_writer(EnvOptions(), options); // file1.sst (0 => 99) std::string file1 = sst_files_dir_ + "file1.sst"; ASSERT_OK(sst_file_writer.Open(file1)); for (int k = 0; k < 100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file1_info; Status s = sst_file_writer.Finish(&file1_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file1_info.file_path, file1); ASSERT_EQ(file1_info.num_entries, 100); ASSERT_EQ(file1_info.smallest_key, Key(0)); ASSERT_EQ(file1_info.largest_key, Key(99)); // file2.sst (100 => 299) std::string file2 = sst_files_dir_ + "file2.sst"; ASSERT_OK(sst_file_writer.Open(file2)); for (int k = 100; k < 300; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file2_info; s = sst_file_writer.Finish(&file2_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file2_info.file_path, file2); ASSERT_EQ(file2_info.num_entries, 200); ASSERT_EQ(file2_info.smallest_key, Key(100)); ASSERT_EQ(file2_info.largest_key, Key(299)); // file3.sst (110 => 124) .. overlap with file2.sst std::string file3 = sst_files_dir_ + "file3.sst"; ASSERT_OK(sst_file_writer.Open(file3)); for (int k = 110; k < 125; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file3_info; s = sst_file_writer.Finish(&file3_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file3_info.file_path, file3); ASSERT_EQ(file3_info.num_entries, 15); ASSERT_EQ(file3_info.smallest_key, Key(110)); ASSERT_EQ(file3_info.largest_key, Key(124)); s = DeprecatedAddFile({file1}, true /* move file */); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(Status::NotFound(), env_->FileExists(file1)); s = DeprecatedAddFile({file2}, false /* copy file */); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_OK(env_->FileExists(file2)); // This file has overlapping values with the existing data s = DeprecatedAddFile({file3}, true /* move file */); ASSERT_FALSE(s.ok()) << s.ToString(); ASSERT_OK(env_->FileExists(file3)); for (int k = 0; k < 300; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } } TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) { bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); do { Options options = CurrentOptions(); DestroyAndReopen(options); std::map true_data; int file_id = 1; ASSERT_OK(GenerateAndAddExternalFile( options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); ASSERT_OK(GenerateAndAddExternalFile( options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 4, 6}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); ASSERT_OK(GenerateAndAddExternalFile( options, {11, 15, 19}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); ASSERT_OK(GenerateAndAddExternalFile( options, {120, 130}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 130}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); // Write some keys through normal write path for (int i = 0; i < 50; i++) { ASSERT_OK(Put(Key(i), "memtable")); true_data[Key(i)] = "memtable"; } SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); ASSERT_OK(GenerateAndAddExternalFile( options, {60, 61, 62}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); ASSERT_OK(GenerateAndAddExternalFile( options, {40, 41, 42}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); ASSERT_OK(GenerateAndAddExternalFile( options, {20, 30, 40}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); const Snapshot* snapshot = db_->GetSnapshot(); // We will need a seqno for the file regardless if the file overwrite // keys in the DB or not because we have a snapshot ASSERT_OK(GenerateAndAddExternalFile( options, {1000, 1002}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); ASSERT_OK(GenerateAndAddExternalFile( options, {2000, 3002}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 20, 40, 100, 150}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); db_->ReleaseSnapshot(snapshot); ASSERT_OK(GenerateAndAddExternalFile( options, {5000, 5001}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // No snapshot anymore, no need to assign a seqno ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); } while (ChangeOptionsForFileIngestionTest()); } TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) { bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); do { Options options = CurrentOptions(); options.merge_operator.reset(new TestPutOperator()); DestroyAndReopen(options); std::map true_data; int file_id = 1; ASSERT_OK(GenerateAndAddExternalFile( options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); ASSERT_OK(GenerateAndAddExternalFile( options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 4, 6}, ValueType::kTypeMerge, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); ASSERT_OK(GenerateAndAddExternalFile( options, {11, 15, 19}, ValueType::kTypeDeletion, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); ASSERT_OK(GenerateAndAddExternalFile( options, {120, 130}, ValueType::kTypeMerge, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 130}, ValueType::kTypeDeletion, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); ASSERT_OK(GenerateAndAddExternalFile( options, {120}, {ValueType::kTypeValue}, {{120, 135}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4); ASSERT_OK(GenerateAndAddExternalFile( options, {}, {}, {{110, 120}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // The range deletion ends on a key, but it doesn't actually delete // this key because the largest key in the range is exclusive. Still, // it counts as an overlap so a new seqno will be assigned. ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); ASSERT_OK(GenerateAndAddExternalFile( options, {}, {}, {{100, 109}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); // Write some keys through normal write path for (int i = 0; i < 50; i++) { ASSERT_OK(Put(Key(i), "memtable")); true_data[Key(i)] = "memtable"; } SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); ASSERT_OK(GenerateAndAddExternalFile( options, {60, 61, 62}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); ASSERT_OK(GenerateAndAddExternalFile( options, {40, 41, 42}, ValueType::kTypeMerge, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); ASSERT_OK(GenerateAndAddExternalFile( options, {20, 30, 40}, ValueType::kTypeDeletion, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); const Snapshot* snapshot = db_->GetSnapshot(); // We will need a seqno for the file regardless if the file overwrite // keys in the DB or not because we have a snapshot ASSERT_OK(GenerateAndAddExternalFile( options, {1000, 1002}, ValueType::kTypeMerge, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); ASSERT_OK(GenerateAndAddExternalFile( options, {2000, 3002}, ValueType::kTypeMerge, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 20, 40, 100, 150}, ValueType::kTypeMerge, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); db_->ReleaseSnapshot(snapshot); ASSERT_OK(GenerateAndAddExternalFile( options, {5000, 5001}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // No snapshot anymore, no need to assign a seqno ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); } while (ChangeOptionsForFileIngestionTest()); } TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) { bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); do { Options options = CurrentOptions(); options.merge_operator.reset(new TestPutOperator()); DestroyAndReopen(options); std::map true_data; int file_id = 1; ASSERT_OK(GenerateAndAddExternalFile( options, {1, 2, 3, 4, 5, 6}, {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); ASSERT_OK(GenerateAndAddExternalFile( options, {10, 11, 12, 13}, {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 4, 6}, {ValueType::kTypeDeletion, ValueType::kTypeValue, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1); ASSERT_OK(GenerateAndAddExternalFile( options, {11, 15, 19}, {ValueType::kTypeDeletion, ValueType::kTypeMerge, ValueType::kTypeValue}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); ASSERT_OK(GenerateAndAddExternalFile( options, {120, 130}, {ValueType::kTypeValue, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 130}, {ValueType::kTypeMerge, ValueType::kTypeDeletion}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); ASSERT_OK(GenerateAndAddExternalFile( options, {150, 151, 152}, {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeDeletion}, {{150, 160}, {180, 190}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3); ASSERT_OK(GenerateAndAddExternalFile( options, {150, 151, 152}, {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue}, {{200, 250}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4); ASSERT_OK(GenerateAndAddExternalFile( options, {300, 301, 302}, {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeDeletion}, {{1, 2}, {152, 154}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5); // Write some keys through normal write path for (int i = 0; i < 50; i++) { ASSERT_OK(Put(Key(i), "memtable")); true_data[Key(i)] = "memtable"; } SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); ASSERT_OK(GenerateAndAddExternalFile( options, {60, 61, 62}, {ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File doesn't overwrite any keys, no seqno needed ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); ASSERT_OK(GenerateAndAddExternalFile( options, {40, 41, 42}, {ValueType::kTypeValue, ValueType::kTypeDeletion, ValueType::kTypeDeletion}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1); ASSERT_OK(GenerateAndAddExternalFile( options, {20, 30, 40}, {ValueType::kTypeDeletion, ValueType::kTypeDeletion, ValueType::kTypeDeletion}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // File overwrites some keys, a seqno will be assigned ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2); const Snapshot* snapshot = db_->GetSnapshot(); // We will need a seqno for the file regardless if the file overwrite // keys in the DB or not because we have a snapshot ASSERT_OK(GenerateAndAddExternalFile( options, {1000, 1002}, {ValueType::kTypeValue, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3); ASSERT_OK(GenerateAndAddExternalFile( options, {2000, 3002}, {ValueType::kTypeValue, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4); ASSERT_OK(GenerateAndAddExternalFile( options, {1, 20, 40, 100, 150}, {ValueType::kTypeDeletion, ValueType::kTypeDeletion, ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // A global seqno will be assigned anyway because of the snapshot ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); db_->ReleaseSnapshot(snapshot); ASSERT_OK(GenerateAndAddExternalFile( options, {5000, 5001}, {ValueType::kTypeValue, ValueType::kTypeMerge}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); // No snapshot anymore, no need to assign a seqno ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5); size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); } while (ChangeOptionsForFileIngestionTest()); } TEST_F(ExternalSSTFileBasicTest, FadviseTrigger) { Options options = CurrentOptions(); const int kNumKeys = 10000; size_t total_fadvised_bytes = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "SstFileWriter::Rep::InvalidatePageCache", [&](void* arg) { size_t fadvise_size = *(reinterpret_cast(arg)); total_fadvised_bytes += fadvise_size; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::unique_ptr sst_file_writer; std::string sst_file_path = sst_files_dir_ + "file_fadvise_disable.sst"; sst_file_writer.reset( new SstFileWriter(EnvOptions(), options, nullptr, false)); ASSERT_OK(sst_file_writer->Open(sst_file_path)); for (int i = 0; i < kNumKeys; i++) { ASSERT_OK(sst_file_writer->Put(Key(i), Key(i))); } ASSERT_OK(sst_file_writer->Finish()); // fadvise disabled ASSERT_EQ(total_fadvised_bytes, 0); sst_file_path = sst_files_dir_ + "file_fadvise_enable.sst"; sst_file_writer.reset( new SstFileWriter(EnvOptions(), options, nullptr, true)); ASSERT_OK(sst_file_writer->Open(sst_file_path)); for (int i = 0; i < kNumKeys; i++) { ASSERT_OK(sst_file_writer->Put(Key(i), Key(i))); } ASSERT_OK(sst_file_writer->Finish()); // fadvise enabled ASSERT_EQ(total_fadvised_bytes, sst_file_writer->FileSize()); ASSERT_GT(total_fadvised_bytes, 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(ExternalSSTFileBasicTest, SyncFailure) { Options options; options.create_if_missing = true; options.env = fault_injection_test_env_.get(); std::vector> test_cases = { {"ExternalSstFileIngestionJob::BeforeSyncIngestedFile", "ExternalSstFileIngestionJob::AfterSyncIngestedFile"}, {"ExternalSstFileIngestionJob::BeforeSyncDir", "ExternalSstFileIngestionJob::AfterSyncDir"}, {"ExternalSstFileIngestionJob::BeforeSyncGlobalSeqno", "ExternalSstFileIngestionJob::AfterSyncGlobalSeqno"}}; for (size_t i = 0; i < test_cases.size(); i++) { SyncPoint::GetInstance()->SetCallBack(test_cases[i].first, [&](void*) { fault_injection_test_env_->SetFilesystemActive(false); }); SyncPoint::GetInstance()->SetCallBack(test_cases[i].second, [&](void*) { fault_injection_test_env_->SetFilesystemActive(true); }); SyncPoint::GetInstance()->EnableProcessing(); DestroyAndReopen(options); if (i == 2) { ASSERT_OK(Put("foo", "v1")); } Options sst_file_writer_options; std::unique_ptr sst_file_writer( new SstFileWriter(EnvOptions(), sst_file_writer_options)); std::string file_name = sst_files_dir_ + "sync_failure_test_" + ToString(i) + ".sst"; ASSERT_OK(sst_file_writer->Open(file_name)); ASSERT_OK(sst_file_writer->Put("bar", "v2")); ASSERT_OK(sst_file_writer->Finish()); IngestExternalFileOptions ingest_opt; if (i == 0) { ingest_opt.move_files = true; } const Snapshot* snapshot = db_->GetSnapshot(); if (i == 2) { ingest_opt.write_global_seqno = true; } ASSERT_FALSE(db_->IngestExternalFile({file_name}, ingest_opt).ok()); db_->ReleaseSnapshot(snapshot); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); Destroy(options); } } TEST_F(ExternalSSTFileBasicTest, VerifyChecksumReadahead) { Options options; options.create_if_missing = true; SpecialEnv senv(Env::Default()); options.env = &senv; DestroyAndReopen(options); Options sst_file_writer_options; std::unique_ptr sst_file_writer( new SstFileWriter(EnvOptions(), sst_file_writer_options)); std::string file_name = sst_files_dir_ + "verify_checksum_readahead_test.sst"; ASSERT_OK(sst_file_writer->Open(file_name)); Random rnd(301); std::string value = DBTestBase::RandomString(&rnd, 4000); for (int i = 0; i < 5000; i++) { ASSERT_OK(sst_file_writer->Put(DBTestBase::Key(i), value)); } ASSERT_OK(sst_file_writer->Finish()); // Ingest it once without verifying checksums to see the baseline // preads. IngestExternalFileOptions ingest_opt; ingest_opt.move_files = false; senv.count_random_reads_ = true; senv.random_read_bytes_counter_ = 0; ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt)); auto base_num_reads = senv.random_read_counter_.Read(); // Make sure the counter is enabled. ASSERT_GT(base_num_reads, 0); // Ingest again and observe the reads made for for readahead. ingest_opt.move_files = false; ingest_opt.verify_checksums_before_ingest = true; ingest_opt.verify_checksums_readahead_size = size_t{2 * 1024 * 1024}; senv.count_random_reads_ = true; senv.random_read_bytes_counter_ = 0; ASSERT_OK(db_->IngestExternalFile({file_name}, ingest_opt)); // Make sure the counter is enabled. ASSERT_GT(senv.random_read_counter_.Read() - base_num_reads, 0); // The SST file is about 20MB. Readahead size is 2MB. // Give a conservative 15 reads for metadata blocks, the number // of random reads should be within 20 MB / 2MB + 15 = 25. ASSERT_LE(senv.random_read_counter_.Read() - base_num_reads, 40); Destroy(options); } TEST_F(ExternalSSTFileBasicTest, IngestRangeDeletionTombstoneWithGlobalSeqno) { for (int i = 5; i < 25; i++) { ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), Key(i) + "_val")); } Options options = CurrentOptions(); options.disable_auto_compactions = true; Reopen(options); SstFileWriter sst_file_writer(EnvOptions(), options); // file.sst (delete 0 => 30) std::string file = sst_files_dir_ + "file.sst"; ASSERT_OK(sst_file_writer.Open(file)); ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(30))); ExternalSstFileInfo file_info; ASSERT_OK(sst_file_writer.Finish(&file_info)); ASSERT_EQ(file_info.file_path, file); ASSERT_EQ(file_info.num_entries, 0); ASSERT_EQ(file_info.smallest_key, ""); ASSERT_EQ(file_info.largest_key, ""); ASSERT_EQ(file_info.num_range_del_entries, 1); ASSERT_EQ(file_info.smallest_range_del_key, Key(0)); ASSERT_EQ(file_info.largest_range_del_key, Key(30)); IngestExternalFileOptions ifo; ifo.move_files = true; ifo.snapshot_consistency = true; ifo.allow_global_seqno = true; ifo.write_global_seqno = true; ifo.verify_checksums_before_ingest = false; ASSERT_OK(db_->IngestExternalFile({file}, ifo)); for (int i = 5; i < 25; i++) { std::string res; ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &res).IsNotFound()); } } TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) { int kNumLevels = 7; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.num_levels = kNumLevels; Reopen(options); std::map true_data; int file_id = 1; // prevent range deletions from being dropped due to becoming obsolete. const Snapshot* snapshot = db_->GetSnapshot(); // range del [0, 50) in L6 file, [50, 100) in L0 file, [100, 150) in memtable for (int i = 0; i < 3; i++) { if (i != 0) { db_->Flush(FlushOptions()); if (i == 1) { MoveFilesToLevel(kNumLevels - 1); } } ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(50 * i), Key(50 * (i + 1)))); } ASSERT_EQ(1, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2)); ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1)); bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); // overlaps with L0 file but not memtable, so flush is skipped and file is // ingested into L0 SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber(); ASSERT_OK(GenerateAndAddExternalFile( options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue}, {{65, 70}, {70, 85}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); ASSERT_EQ(2, NumTableFilesAtLevel(0)); ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2)); ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); // overlaps with L6 file but not memtable or L0 file, so flush is skipped and // file is ingested into L5 ASSERT_OK(GenerateAndAddExternalFile( options, {10, 40}, {ValueType::kTypeValue, ValueType::kTypeValue}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); ASSERT_EQ(2, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); // overlaps with L5 file but not memtable or L0 file, so flush is skipped and // file is ingested into L4 ASSERT_OK(GenerateAndAddExternalFile( options, {}, {}, {{5, 15}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); ASSERT_EQ(2, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 2)); ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); // ingested file overlaps with memtable, so flush is triggered before the file // is ingested such that the ingested data is considered newest. So L0 file // count increases by two. ASSERT_OK(GenerateAndAddExternalFile( options, {100, 140}, {ValueType::kTypeValue, ValueType::kTypeValue}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno); ASSERT_EQ(4, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); ASSERT_EQ(1, NumTableFilesAtLevel(options.num_levels - 1)); // snapshot unneeded now that all range deletions are persisted db_->ReleaseSnapshot(snapshot); // overlaps with nothing, so places at bottom level and skips incrementing // seqnum. ASSERT_OK(GenerateAndAddExternalFile( options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue}, {{160, 200}}, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data)); ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno); ASSERT_EQ(4, NumTableFilesAtLevel(0)); ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2)); ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1)); } TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) { Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); // file8.sst (delete 300 => 400) std::string file8 = sst_files_dir_ + "file8.sst"; ASSERT_OK(sst_file_writer.Open(file8)); ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400))); ExternalSstFileInfo file8_info; Status s = sst_file_writer.Finish(&file8_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file8_info.file_path, file8); ASSERT_EQ(file8_info.num_entries, 0); ASSERT_EQ(file8_info.smallest_key, ""); ASSERT_EQ(file8_info.largest_key, ""); ASSERT_EQ(file8_info.num_range_del_entries, 1); ASSERT_EQ(file8_info.smallest_range_del_key, Key(300)); ASSERT_EQ(file8_info.largest_range_del_key, Key(400)); // file9.sst (delete 400 => 500) std::string file9 = sst_files_dir_ + "file9.sst"; ASSERT_OK(sst_file_writer.Open(file9)); ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500))); ExternalSstFileInfo file9_info; s = sst_file_writer.Finish(&file9_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file9_info.file_path, file9); ASSERT_EQ(file9_info.num_entries, 0); ASSERT_EQ(file9_info.smallest_key, ""); ASSERT_EQ(file9_info.largest_key, ""); ASSERT_EQ(file9_info.num_range_del_entries, 1); ASSERT_EQ(file9_info.smallest_range_del_key, Key(400)); ASSERT_EQ(file9_info.largest_range_del_key, Key(500)); // Range deletion tombstones are exclusive on their end key, so these SSTs // should not be considered as overlapping. s = DeprecatedAddFile({file8, file9}); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); DestroyAndRecreateExternalSSTFilesDir(); } TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) { bool change_checksum_called = false; const auto& change_checksum = [&](void* arg) { if (!change_checksum_called) { char* buf = reinterpret_cast(arg); assert(nullptr != buf); buf[0] ^= 0x1; change_checksum_called = true; } }; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum", change_checksum); SyncPoint::GetInstance()->EnableProcessing(); int file_id = 0; bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); do { Options options = CurrentOptions(); DestroyAndReopen(options); std::map true_data; Status s = GenerateAndAddExternalFile( options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, write_global_seqno, verify_checksums_before_ingest, &true_data); if (verify_checksums_before_ingest) { ASSERT_NOK(s); } else { ASSERT_OK(s); } change_checksum_called = false; } while (ChangeOptionsForFileIngestionTest()); } TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) { SyncPoint::GetInstance()->DisableProcessing(); int file_id = 0; EnvOptions env_options; do { Options options = CurrentOptions(); std::string file_path = sst_files_dir_ + ToString(file_id++); SstFileWriter sst_file_writer(env_options, options); Status s = sst_file_writer.Open(file_path); ASSERT_OK(s); for (int i = 0; i != 100; ++i) { std::string key = Key(i); std::string value = Key(i) + ToString(0); ASSERT_OK(sst_file_writer.Put(key, value)); } ASSERT_OK(sst_file_writer.Finish()); { // Get file size uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(file_path, &file_size)); ASSERT_GT(file_size, 8); std::unique_ptr rwfile; ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions())); // Manually corrupt the file // We deterministically corrupt the first byte because we currently // cannot choose a random offset. The reason for this limitation is that // we do not checksum property block at present. const uint64_t offset = 0; char scratch[8] = {0}; Slice buf; ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch)); scratch[0] ^= 0xff; // flip one bit ASSERT_OK(rwfile->Write(offset, buf)); } // Ingest file. IngestExternalFileOptions ifo; ifo.write_global_seqno = std::get<0>(GetParam()); ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); s = db_->IngestExternalFile({file_path}, ifo); if (ifo.verify_checksums_before_ingest) { ASSERT_NOK(s); } else { ASSERT_OK(s); } } while (ChangeOptionsForFileIngestionTest()); } TEST_P(ExternalSSTFileBasicTest, IngestExternalFileWithCorruptedPropsBlock) { bool verify_checksums_before_ingest = std::get<1>(GetParam()); if (!verify_checksums_before_ingest) { return; } uint64_t props_block_offset = 0; size_t props_block_size = 0; const auto& get_props_block_offset = [&](void* arg) { props_block_offset = *reinterpret_cast(arg); }; const auto& get_props_block_size = [&](void* arg) { props_block_size = *reinterpret_cast(arg); }; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockOffset", get_props_block_offset); SyncPoint::GetInstance()->SetCallBack( "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockSize", get_props_block_size); SyncPoint::GetInstance()->EnableProcessing(); int file_id = 0; Random64 rand(time(nullptr)); do { std::string file_path = sst_files_dir_ + ToString(file_id++); Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); Status s = sst_file_writer.Open(file_path); ASSERT_OK(s); for (int i = 0; i != 100; ++i) { std::string key = Key(i); std::string value = Key(i) + ToString(0); ASSERT_OK(sst_file_writer.Put(key, value)); } ASSERT_OK(sst_file_writer.Finish()); { std::unique_ptr rwfile; ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions())); // Manually corrupt the file ASSERT_GT(props_block_size, 8); uint64_t offset = props_block_offset + rand.Next() % (props_block_size - 8); char scratch[8] = {0}; Slice buf; ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch)); scratch[0] ^= 0xff; // flip one bit ASSERT_OK(rwfile->Write(offset, buf)); } // Ingest file. IngestExternalFileOptions ifo; ifo.write_global_seqno = std::get<0>(GetParam()); ifo.verify_checksums_before_ingest = true; s = db_->IngestExternalFile({file_path}, ifo); ASSERT_NOK(s); } while (ChangeOptionsForFileIngestionTest()); } TEST_F(ExternalSSTFileBasicTest, OverlappingFiles) { Options options = CurrentOptions(); std::vector files; { SstFileWriter sst_file_writer(EnvOptions(), options); std::string file1 = sst_files_dir_ + "file1.sst"; ASSERT_OK(sst_file_writer.Open(file1)); ASSERT_OK(sst_file_writer.Put("a", "z")); ASSERT_OK(sst_file_writer.Put("i", "m")); ExternalSstFileInfo file1_info; ASSERT_OK(sst_file_writer.Finish(&file1_info)); files.push_back(std::move(file1)); } { SstFileWriter sst_file_writer(EnvOptions(), options); std::string file2 = sst_files_dir_ + "file2.sst"; ASSERT_OK(sst_file_writer.Open(file2)); ASSERT_OK(sst_file_writer.Put("i", "k")); ExternalSstFileInfo file2_info; ASSERT_OK(sst_file_writer.Finish(&file2_info)); files.push_back(std::move(file2)); } IngestExternalFileOptions ifo; ASSERT_OK(db_->IngestExternalFile(files, ifo)); ASSERT_EQ(Get("a"), "z"); ASSERT_EQ(Get("i"), "k"); int total_keys = 0; Iterator* iter = db_->NewIterator(ReadOptions()); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); total_keys++; } delete iter; ASSERT_EQ(total_keys, 2); ASSERT_EQ(2, NumTableFilesAtLevel(0)); } INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest, testing::Values(std::make_tuple(true, true), std::make_tuple(true, false), std::make_tuple(false, true), std::make_tuple(false, false))); #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/external_sst_file_ingestion_job.cc000066400000000000000000001040171370372246700225670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "db/external_sst_file_ingestion_job.h" #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/version_edit.h" #include "file/file_util.h" #include "file/random_access_file_reader.h" #include "table/merging_iterator.h" #include "table/scoped_arena_iterator.h" #include "table/sst_file_writer_collectors.h" #include "table/table_builder.h" #include "test_util/sync_point.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { Status ExternalSstFileIngestionJob::Prepare( const std::vector& external_files_paths, const std::vector& files_checksums, const std::vector& files_checksum_func_names, uint64_t next_file_number, SuperVersion* sv) { Status status; // Read the information of files we are ingesting for (const std::string& file_path : external_files_paths) { IngestedFileInfo file_to_ingest; status = GetIngestedFileInfo(file_path, &file_to_ingest, sv); if (!status.ok()) { return status; } files_to_ingest_.push_back(file_to_ingest); } for (const IngestedFileInfo& f : files_to_ingest_) { if (f.cf_id != TablePropertiesCollectorFactory::Context::kUnknownColumnFamily && f.cf_id != cfd_->GetID()) { return Status::InvalidArgument( "External file column family id don't match"); } } const Comparator* ucmp = cfd_->internal_comparator().user_comparator(); auto num_files = files_to_ingest_.size(); if (num_files == 0) { return Status::InvalidArgument("The list of files is empty"); } else if (num_files > 1) { // Verify that passed files don't have overlapping ranges autovector sorted_files; for (size_t i = 0; i < num_files; i++) { sorted_files.push_back(&files_to_ingest_[i]); } std::sort( sorted_files.begin(), sorted_files.end(), [&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) { return sstableKeyCompare(ucmp, info1->smallest_internal_key, info2->smallest_internal_key) < 0; }); for (size_t i = 0; i + 1 < num_files; i++) { if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key, sorted_files[i + 1]->smallest_internal_key) >= 0) { files_overlap_ = true; break; } } } if (ingestion_options_.ingest_behind && files_overlap_) { return Status::NotSupported("Files have overlapping ranges"); } for (IngestedFileInfo& f : files_to_ingest_) { if (f.num_entries == 0 && f.num_range_deletions == 0) { return Status::InvalidArgument("File contain no entries"); } if (!f.smallest_internal_key.Valid() || !f.largest_internal_key.Valid()) { return Status::Corruption("Generated table have corrupted keys"); } } // Copy/Move external files into DB std::unordered_set ingestion_path_ids; for (IngestedFileInfo& f : files_to_ingest_) { f.fd = FileDescriptor(next_file_number++, 0, f.file_size); f.copy_file = false; const std::string path_outside_db = f.external_file_path; const std::string path_inside_db = TableFileName(cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId()); if (ingestion_options_.move_files) { status = fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr); if (status.ok()) { // It is unsafe to assume application had sync the file and file // directory before ingest the file. For integrity of RocksDB we need // to sync the file. std::unique_ptr file_to_sync; status = fs_->ReopenWritableFile(path_inside_db, env_options_, &file_to_sync, nullptr); if (status.ok()) { TEST_SYNC_POINT( "ExternalSstFileIngestionJob::BeforeSyncIngestedFile"); status = SyncIngestedFile(file_to_sync.get()); TEST_SYNC_POINT("ExternalSstFileIngestionJob::AfterSyncIngestedFile"); if (!status.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Failed to sync ingested file %s: %s", path_inside_db.c_str(), status.ToString().c_str()); } } } else if (status.IsNotSupported() && ingestion_options_.failed_move_fall_back_to_copy) { // Original file is on a different FS, use copy instead of hard linking. f.copy_file = true; } } else { f.copy_file = true; } if (f.copy_file) { TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Prepare:CopyFile", nullptr); // CopyFile also sync the new file. status = CopyFile(fs_, path_outside_db, path_inside_db, 0, db_options_.use_fsync); } TEST_SYNC_POINT("ExternalSstFileIngestionJob::Prepare:FileAdded"); if (!status.ok()) { break; } f.internal_file_path = path_inside_db; // Initialize the checksum information of ingested files. f.file_checksum = kUnknownFileChecksum; f.file_checksum_func_name = kUnknownFileChecksumFuncName; ingestion_path_ids.insert(f.fd.GetPathId()); } TEST_SYNC_POINT("ExternalSstFileIngestionJob::BeforeSyncDir"); if (status.ok()) { for (auto path_id : ingestion_path_ids) { status = directories_->GetDataDir(path_id)->Fsync(IOOptions(), nullptr); if (!status.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Failed to sync directory %" ROCKSDB_PRIszt " while ingest file: %s", path_id, status.ToString().c_str()); break; } } } TEST_SYNC_POINT("ExternalSstFileIngestionJob::AfterSyncDir"); // Generate and check the sst file checksum. Note that, if // IngestExternalFileOptions::write_global_seqno is true, we will not update // the checksum information in the files_to_ingests_ here, since the file is // upadted with the new global_seqno. After global_seqno is updated, DB will // generate the new checksum and store it in the Manifest. In all other cases // if ingestion_options_.write_global_seqno == true and // verify_file_checksum is false, we only check the checksum function name. if (status.ok() && db_options_.file_checksum_gen_factory != nullptr) { if (ingestion_options_.verify_file_checksum == false && files_checksums.size() == files_to_ingest_.size() && files_checksum_func_names.size() == files_to_ingest_.size()) { // Only when verify_file_checksum == false and the checksum for ingested // files are provided, DB will use the provided checksum and does not // generate the checksum for ingested files. need_generate_file_checksum_ = false; } else { need_generate_file_checksum_ = true; } FileChecksumGenContext gen_context; std::unique_ptr file_checksum_gen = db_options_.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); std::vector generated_checksums; std::vector generated_checksum_func_names; // Step 1: generate the checksum for ingested sst file. if (need_generate_file_checksum_) { for (size_t i = 0; i < files_to_ingest_.size(); i++) { std::string generated_checksum, generated_checksum_func_name; IOStatus io_s = GenerateOneFileChecksum( fs_, files_to_ingest_[i].internal_file_path, db_options_.file_checksum_gen_factory.get(), &generated_checksum, &generated_checksum_func_name, ingestion_options_.verify_checksums_readahead_size, db_options_.allow_mmap_reads); if (!io_s.ok()) { status = io_s; ROCKS_LOG_WARN(db_options_.info_log, "Sst file checksum generation of file: %s failed: %s", files_to_ingest_[i].internal_file_path.c_str(), status.ToString().c_str()); break; } if (ingestion_options_.write_global_seqno == false) { files_to_ingest_[i].file_checksum = generated_checksum; files_to_ingest_[i].file_checksum_func_name = generated_checksum_func_name; } generated_checksums.push_back(generated_checksum); generated_checksum_func_names.push_back(generated_checksum_func_name); } } // Step 2: based on the verify_file_checksum and ingested checksum // information, do the verification. if (status.ok()) { if (files_checksums.size() == files_to_ingest_.size() && files_checksum_func_names.size() == files_to_ingest_.size()) { // Verify the checksum and checksum function name. if (ingestion_options_.verify_file_checksum) { for (size_t i = 0; i < files_to_ingest_.size(); i++) { if (files_checksum_func_names[i] != generated_checksum_func_names[i]) { status = Status::InvalidArgument( "Checksum function name does not match with the checksum " "function name of this DB"); ROCKS_LOG_WARN( db_options_.info_log, "Sst file checksum verification of file: %s failed: %s", external_files_paths[i].c_str(), status.ToString().c_str()); break; } if (files_checksums[i] != generated_checksums[i]) { status = Status::Corruption( "Ingested checksum does not match with the generated " "checksum"); ROCKS_LOG_WARN( db_options_.info_log, "Sst file checksum verification of file: %s failed: %s", files_to_ingest_[i].internal_file_path.c_str(), status.ToString().c_str()); break; } } } else { // If verify_file_checksum is not enabled, we only verify the // checksum function name. If it does not match, fail the ingestion. // If matches, we trust the ingested checksum information and store // in the Manifest. for (size_t i = 0; i < files_to_ingest_.size(); i++) { if (files_checksum_func_names[i] != file_checksum_gen->Name()) { status = Status::InvalidArgument( "Checksum function name does not match with the checksum " "function name of this DB"); ROCKS_LOG_WARN( db_options_.info_log, "Sst file checksum verification of file: %s failed: %s", external_files_paths[i].c_str(), status.ToString().c_str()); break; } files_to_ingest_[i].file_checksum = files_checksums[i]; files_to_ingest_[i].file_checksum_func_name = files_checksum_func_names[i]; } } } else if (files_checksums.size() != files_checksum_func_names.size() || (files_checksums.size() == files_checksum_func_names.size() && files_checksums.size() != 0)) { // The checksum or checksum function name vector are not both empty // and they are incomplete. status = Status::InvalidArgument( "The checksum information of ingested sst files are nonempty and " "the size of checksums or the size of the checksum function " "names " "does not match with the number of ingested sst files"); ROCKS_LOG_WARN( db_options_.info_log, "The ingested sst files checksum information is incomplete: %s", status.ToString().c_str()); } } } // TODO: The following is duplicated with Cleanup(). if (!status.ok()) { // We failed, remove all files that we copied into the db for (IngestedFileInfo& f : files_to_ingest_) { if (f.internal_file_path.empty()) { continue; } Status s = env_->DeleteFile(f.internal_file_path); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "AddFile() clean up for file %s failed : %s", f.internal_file_path.c_str(), s.ToString().c_str()); } } } return status; } Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed, SuperVersion* super_version) { autovector ranges; for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) { ranges.emplace_back(file_to_ingest.smallest_internal_key.user_key(), file_to_ingest.largest_internal_key.user_key()); } Status status = cfd_->RangesOverlapWithMemtables(ranges, super_version, flush_needed); if (status.ok() && *flush_needed && !ingestion_options_.allow_blocking_flush) { status = Status::InvalidArgument("External file requires flush"); } return status; } // REQUIRES: we have become the only writer by entering both write_thread_ and // nonmem_write_thread_ Status ExternalSstFileIngestionJob::Run() { Status status; SuperVersion* super_version = cfd_->GetSuperVersion(); #ifndef NDEBUG // We should never run the job with a memtable that is overlapping // with the files we are ingesting bool need_flush = false; status = NeedsFlush(&need_flush, super_version); assert(status.ok() && need_flush == false); #endif bool force_global_seqno = false; if (ingestion_options_.snapshot_consistency && !db_snapshots_->empty()) { // We need to assign a global sequence number to all the files even // if the don't overlap with any ranges since we have snapshots force_global_seqno = true; } // It is safe to use this instead of LastAllocatedSequence since we are // the only active writer, and hence they are equal SequenceNumber last_seqno = versions_->LastSequence(); edit_.SetColumnFamily(cfd_->GetID()); // The levels that the files will be ingested into for (IngestedFileInfo& f : files_to_ingest_) { SequenceNumber assigned_seqno = 0; if (ingestion_options_.ingest_behind) { status = CheckLevelForIngestedBehindFile(&f); } else { status = AssignLevelAndSeqnoForIngestedFile( super_version, force_global_seqno, cfd_->ioptions()->compaction_style, last_seqno, &f, &assigned_seqno); } if (!status.ok()) { return status; } status = AssignGlobalSeqnoForIngestedFile(&f, assigned_seqno); TEST_SYNC_POINT_CALLBACK("ExternalSstFileIngestionJob::Run", &assigned_seqno); if (assigned_seqno > last_seqno) { assert(assigned_seqno == last_seqno + 1); last_seqno = assigned_seqno; ++consumed_seqno_count_; } if (!status.ok()) { return status; } status = GenerateChecksumForIngestedFile(&f); if (!status.ok()) { return status; } // We use the import time as the ancester time. This is the time the data // is written to the database. int64_t temp_current_time = 0; uint64_t current_time = kUnknownFileCreationTime; uint64_t oldest_ancester_time = kUnknownOldestAncesterTime; if (env_->GetCurrentTime(&temp_current_time).ok()) { current_time = oldest_ancester_time = static_cast(temp_current_time); } edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(), f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno, f.assigned_seqno, false, kInvalidBlobFileNumber, oldest_ancester_time, current_time, f.file_checksum, f.file_checksum_func_name); } return status; } void ExternalSstFileIngestionJob::UpdateStats() { // Update internal stats for new ingested files uint64_t total_keys = 0; uint64_t total_l0_files = 0; uint64_t total_time = env_->NowMicros() - job_start_time_; EventLoggerStream stream = event_logger_->Log(); stream << "event" << "ingest_finished"; stream << "files_ingested"; stream.StartArray(); for (IngestedFileInfo& f : files_to_ingest_) { InternalStats::CompactionStats stats(CompactionReason::kExternalSstIngestion, 1); stats.micros = total_time; // If actual copy occurred for this file, then we need to count the file // size as the actual bytes written. If the file was linked, then we ignore // the bytes written for file metadata. // TODO (yanqin) maybe account for file metadata bytes for exact accuracy? if (f.copy_file) { stats.bytes_written = f.fd.GetFileSize(); } else { stats.bytes_moved = f.fd.GetFileSize(); } stats.num_output_files = 1; cfd_->internal_stats()->AddCompactionStats(f.picked_level, Env::Priority::USER, stats); cfd_->internal_stats()->AddCFStats(InternalStats::BYTES_INGESTED_ADD_FILE, f.fd.GetFileSize()); total_keys += f.num_entries; if (f.picked_level == 0) { total_l0_files += 1; } ROCKS_LOG_INFO( db_options_.info_log, "[AddFile] External SST file %s was ingested in L%d with path %s " "(global_seqno=%" PRIu64 ")\n", f.external_file_path.c_str(), f.picked_level, f.internal_file_path.c_str(), f.assigned_seqno); stream << "file" << f.internal_file_path << "level" << f.picked_level; } stream.EndArray(); stream << "lsm_state"; stream.StartArray(); auto vstorage = cfd_->current()->storage_info(); for (int level = 0; level < vstorage->num_levels(); ++level) { stream << vstorage->NumLevelFiles(level); } stream.EndArray(); cfd_->internal_stats()->AddCFStats(InternalStats::INGESTED_NUM_KEYS_TOTAL, total_keys); cfd_->internal_stats()->AddCFStats(InternalStats::INGESTED_NUM_FILES_TOTAL, files_to_ingest_.size()); cfd_->internal_stats()->AddCFStats( InternalStats::INGESTED_LEVEL0_NUM_FILES_TOTAL, total_l0_files); } void ExternalSstFileIngestionJob::Cleanup(const Status& status) { if (!status.ok()) { // We failed to add the files to the database // remove all the files we copied for (IngestedFileInfo& f : files_to_ingest_) { if (f.internal_file_path.empty()) { continue; } Status s = env_->DeleteFile(f.internal_file_path); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "AddFile() clean up for file %s failed : %s", f.internal_file_path.c_str(), s.ToString().c_str()); } } consumed_seqno_count_ = 0; files_overlap_ = false; } else if (status.ok() && ingestion_options_.move_files) { // The files were moved and added successfully, remove original file links for (IngestedFileInfo& f : files_to_ingest_) { Status s = env_->DeleteFile(f.external_file_path); if (!s.ok()) { ROCKS_LOG_WARN( db_options_.info_log, "%s was added to DB successfully but failed to remove original " "file link : %s", f.external_file_path.c_str(), s.ToString().c_str()); } } } } Status ExternalSstFileIngestionJob::GetIngestedFileInfo( const std::string& external_file, IngestedFileInfo* file_to_ingest, SuperVersion* sv) { file_to_ingest->external_file_path = external_file; // Get external file size Status status = fs_->GetFileSize(external_file, IOOptions(), &file_to_ingest->file_size, nullptr); if (!status.ok()) { return status; } // Create TableReader for external file std::unique_ptr table_reader; std::unique_ptr sst_file; std::unique_ptr sst_file_reader; status = fs_->NewRandomAccessFile(external_file, env_options_, &sst_file, nullptr); if (!status.ok()) { return status; } sst_file_reader.reset(new RandomAccessFileReader(std::move(sst_file), external_file)); status = cfd_->ioptions()->table_factory->NewTableReader( TableReaderOptions(*cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor.get(), env_options_, cfd_->internal_comparator()), std::move(sst_file_reader), file_to_ingest->file_size, &table_reader); if (!status.ok()) { return status; } if (ingestion_options_.verify_checksums_before_ingest) { // If customized readahead size is needed, we can pass a user option // all the way to here. Right now we just rely on the default readahead // to keep things simple. ReadOptions ro; ro.readahead_size = ingestion_options_.verify_checksums_readahead_size; status = table_reader->VerifyChecksum( ro, TableReaderCaller::kExternalSSTIngestion); } if (!status.ok()) { return status; } // Get the external file properties auto props = table_reader->GetTableProperties(); const auto& uprops = props->user_collected_properties; // Get table version auto version_iter = uprops.find(ExternalSstFilePropertyNames::kVersion); if (version_iter == uprops.end()) { return Status::Corruption("External file version not found"); } file_to_ingest->version = DecodeFixed32(version_iter->second.c_str()); auto seqno_iter = uprops.find(ExternalSstFilePropertyNames::kGlobalSeqno); if (file_to_ingest->version == 2) { // version 2 imply that we have global sequence number if (seqno_iter == uprops.end()) { return Status::Corruption( "External file global sequence number not found"); } // Set the global sequence number file_to_ingest->original_seqno = DecodeFixed64(seqno_iter->second.c_str()); auto offsets_iter = props->properties_offsets.find( ExternalSstFilePropertyNames::kGlobalSeqno); if (offsets_iter == props->properties_offsets.end() || offsets_iter->second == 0) { file_to_ingest->global_seqno_offset = 0; return Status::Corruption("Was not able to find file global seqno field"); } file_to_ingest->global_seqno_offset = static_cast(offsets_iter->second); } else if (file_to_ingest->version == 1) { // SST file V1 should not have global seqno field assert(seqno_iter == uprops.end()); file_to_ingest->original_seqno = 0; if (ingestion_options_.allow_blocking_flush || ingestion_options_.allow_global_seqno) { return Status::InvalidArgument( "External SST file V1 does not support global seqno"); } } else { return Status::InvalidArgument("External file version is not supported"); } // Get number of entries in table file_to_ingest->num_entries = props->num_entries; file_to_ingest->num_range_deletions = props->num_range_deletions; ParsedInternalKey key; ReadOptions ro; // During reading the external file we can cache blocks that we read into // the block cache, if we later change the global seqno of this file, we will // have block in cache that will include keys with wrong seqno. // We need to disable fill_cache so that we read from the file without // updating the block cache. ro.fill_cache = false; std::unique_ptr iter(table_reader->NewIterator( ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kExternalSSTIngestion)); std::unique_ptr range_del_iter( table_reader->NewRangeTombstoneIterator(ro)); // Get first (smallest) and last (largest) key from file. file_to_ingest->smallest_internal_key = InternalKey("", 0, ValueType::kTypeValue); file_to_ingest->largest_internal_key = InternalKey("", 0, ValueType::kTypeValue); bool bounds_set = false; iter->SeekToFirst(); if (iter->Valid()) { if (!ParseInternalKey(iter->key(), &key)) { return Status::Corruption("external file have corrupted keys"); } if (key.sequence != 0) { return Status::Corruption("external file have non zero sequence number"); } file_to_ingest->smallest_internal_key.SetFrom(key); iter->SeekToLast(); if (!ParseInternalKey(iter->key(), &key)) { return Status::Corruption("external file have corrupted keys"); } if (key.sequence != 0) { return Status::Corruption("external file have non zero sequence number"); } file_to_ingest->largest_internal_key.SetFrom(key); bounds_set = true; } // We may need to adjust these key bounds, depending on whether any range // deletion tombstones extend past them. const Comparator* ucmp = cfd_->internal_comparator().user_comparator(); if (range_del_iter != nullptr) { for (range_del_iter->SeekToFirst(); range_del_iter->Valid(); range_del_iter->Next()) { if (!ParseInternalKey(range_del_iter->key(), &key)) { return Status::Corruption("external file have corrupted keys"); } RangeTombstone tombstone(key, range_del_iter->value()); InternalKey start_key = tombstone.SerializeKey(); if (!bounds_set || sstableKeyCompare(ucmp, start_key, file_to_ingest->smallest_internal_key) < 0) { file_to_ingest->smallest_internal_key = start_key; } InternalKey end_key = tombstone.SerializeEndKey(); if (!bounds_set || sstableKeyCompare(ucmp, end_key, file_to_ingest->largest_internal_key) > 0) { file_to_ingest->largest_internal_key = end_key; } bounds_set = true; } } file_to_ingest->cf_id = static_cast(props->column_family_id); file_to_ingest->table_properties = *props; return status; } Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile( SuperVersion* sv, bool force_global_seqno, CompactionStyle compaction_style, SequenceNumber last_seqno, IngestedFileInfo* file_to_ingest, SequenceNumber* assigned_seqno) { Status status; *assigned_seqno = 0; if (force_global_seqno) { *assigned_seqno = last_seqno + 1; if (compaction_style == kCompactionStyleUniversal || files_overlap_) { file_to_ingest->picked_level = 0; return status; } } bool overlap_with_db = false; Arena arena; ReadOptions ro; ro.total_order_seek = true; int target_level = 0; auto* vstorage = cfd_->current()->storage_info(); for (int lvl = 0; lvl < cfd_->NumberLevels(); lvl++) { if (lvl > 0 && lvl < vstorage->base_level()) { continue; } if (vstorage->NumLevelFiles(lvl) > 0) { bool overlap_with_level = false; status = sv->current->OverlapWithLevelIterator( ro, env_options_, file_to_ingest->smallest_internal_key.user_key(), file_to_ingest->largest_internal_key.user_key(), lvl, &overlap_with_level); if (!status.ok()) { return status; } if (overlap_with_level) { // We must use L0 or any level higher than `lvl` to be able to overwrite // the keys that we overlap with in this level, We also need to assign // this file a seqno to overwrite the existing keys in level `lvl` overlap_with_db = true; break; } if (compaction_style == kCompactionStyleUniversal && lvl != 0) { const std::vector& level_files = vstorage->LevelFiles(lvl); const SequenceNumber level_largest_seqno = (*max_element(level_files.begin(), level_files.end(), [](FileMetaData* f1, FileMetaData* f2) { return f1->fd.largest_seqno < f2->fd.largest_seqno; })) ->fd.largest_seqno; // should only assign seqno to current level's largest seqno when // the file fits if (level_largest_seqno != 0 && IngestedFileFitInLevel(file_to_ingest, lvl)) { *assigned_seqno = level_largest_seqno; } else { continue; } } } else if (compaction_style == kCompactionStyleUniversal) { continue; } // We don't overlap with any keys in this level, but we still need to check // if our file can fit in it if (IngestedFileFitInLevel(file_to_ingest, lvl)) { target_level = lvl; } } // If files overlap, we have to ingest them at level 0 and assign the newest // sequence number if (files_overlap_) { target_level = 0; *assigned_seqno = last_seqno + 1; } TEST_SYNC_POINT_CALLBACK( "ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile", &overlap_with_db); file_to_ingest->picked_level = target_level; if (overlap_with_db && *assigned_seqno == 0) { *assigned_seqno = last_seqno + 1; } return status; } Status ExternalSstFileIngestionJob::CheckLevelForIngestedBehindFile( IngestedFileInfo* file_to_ingest) { auto* vstorage = cfd_->current()->storage_info(); // first check if new files fit in the bottommost level int bottom_lvl = cfd_->NumberLevels() - 1; if(!IngestedFileFitInLevel(file_to_ingest, bottom_lvl)) { return Status::InvalidArgument( "Can't ingest_behind file as it doesn't fit " "at the bottommost level!"); } // second check if despite allow_ingest_behind=true we still have 0 seqnums // at some upper level for (int lvl = 0; lvl < cfd_->NumberLevels() - 1; lvl++) { for (auto file : vstorage->LevelFiles(lvl)) { if (file->fd.smallest_seqno == 0) { return Status::InvalidArgument( "Can't ingest_behind file as despite allow_ingest_behind=true " "there are files with 0 seqno in database at upper levels!"); } } } file_to_ingest->picked_level = bottom_lvl; return Status::OK(); } Status ExternalSstFileIngestionJob::AssignGlobalSeqnoForIngestedFile( IngestedFileInfo* file_to_ingest, SequenceNumber seqno) { if (file_to_ingest->original_seqno == seqno) { // This file already have the correct global seqno return Status::OK(); } else if (!ingestion_options_.allow_global_seqno) { return Status::InvalidArgument("Global seqno is required, but disabled"); } else if (file_to_ingest->global_seqno_offset == 0) { return Status::InvalidArgument( "Trying to set global seqno for a file that don't have a global seqno " "field"); } if (ingestion_options_.write_global_seqno) { // Determine if we can write global_seqno to a given offset of file. // If the file system does not support random write, then we should not. // Otherwise we should. std::unique_ptr rwfile; Status status = fs_->NewRandomRWFile(file_to_ingest->internal_file_path, env_options_, &rwfile, nullptr); if (status.ok()) { std::string seqno_val; PutFixed64(&seqno_val, seqno); status = rwfile->Write(file_to_ingest->global_seqno_offset, seqno_val, IOOptions(), nullptr); if (status.ok()) { TEST_SYNC_POINT("ExternalSstFileIngestionJob::BeforeSyncGlobalSeqno"); status = SyncIngestedFile(rwfile.get()); TEST_SYNC_POINT("ExternalSstFileIngestionJob::AfterSyncGlobalSeqno"); if (!status.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Failed to sync ingested file %s after writing global " "sequence number: %s", file_to_ingest->internal_file_path.c_str(), status.ToString().c_str()); } } if (!status.ok()) { return status; } } else if (!status.IsNotSupported()) { return status; } } file_to_ingest->assigned_seqno = seqno; return Status::OK(); } IOStatus ExternalSstFileIngestionJob::GenerateChecksumForIngestedFile( IngestedFileInfo* file_to_ingest) { if (db_options_.file_checksum_gen_factory == nullptr || need_generate_file_checksum_ == false || ingestion_options_.write_global_seqno == false) { // If file_checksum_gen_factory is not set, we are not able to generate // the checksum. if write_global_seqno is false, it means we will use // file checksum generated during Prepare(). This step will be skipped. return IOStatus::OK(); } std::string file_checksum, file_checksum_func_name; IOStatus io_s = GenerateOneFileChecksum( fs_, file_to_ingest->internal_file_path, db_options_.file_checksum_gen_factory.get(), &file_checksum, &file_checksum_func_name, ingestion_options_.verify_checksums_readahead_size, db_options_.allow_mmap_reads); if (!io_s.ok()) { return io_s; } file_to_ingest->file_checksum = file_checksum; file_to_ingest->file_checksum_func_name = file_checksum_func_name; return IOStatus::OK(); } bool ExternalSstFileIngestionJob::IngestedFileFitInLevel( const IngestedFileInfo* file_to_ingest, int level) { if (level == 0) { // Files can always fit in L0 return true; } auto* vstorage = cfd_->current()->storage_info(); Slice file_smallest_user_key( file_to_ingest->smallest_internal_key.user_key()); Slice file_largest_user_key(file_to_ingest->largest_internal_key.user_key()); if (vstorage->OverlapInLevel(level, &file_smallest_user_key, &file_largest_user_key)) { // File overlap with another files in this level, we cannot // add it to this level return false; } if (cfd_->RangeOverlapWithCompaction(file_smallest_user_key, file_largest_user_key, level)) { // File overlap with a running compaction output that will be stored // in this level, we cannot add this file to this level return false; } // File did not overlap with level files, our compaction output return true; } template Status ExternalSstFileIngestionJob::SyncIngestedFile(TWritableFile* file) { assert(file != nullptr); if (db_options_.use_fsync) { return file->Fsync(IOOptions(), nullptr); } else { return file->Sync(IOOptions(), nullptr); } } } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/external_sst_file_ingestion_job.h000066400000000000000000000161411370372246700224310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "db/column_family.h" #include "db/dbformat.h" #include "db/internal_stats.h" #include "db/snapshot_impl.h" #include "logging/event_logger.h" #include "options/db_options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/sst_file_writer.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class Directories; struct IngestedFileInfo { // External file path std::string external_file_path; // Smallest internal key in external file InternalKey smallest_internal_key; // Largest internal key in external file InternalKey largest_internal_key; // Sequence number for keys in external file SequenceNumber original_seqno; // Offset of the global sequence number field in the file, will // be zero if version is 1 (global seqno is not supported) size_t global_seqno_offset; // External file size uint64_t file_size; // total number of keys in external file uint64_t num_entries; // total number of range deletions in external file uint64_t num_range_deletions; // Id of column family this file shoule be ingested into uint32_t cf_id; // TableProperties read from external file TableProperties table_properties; // Version of external file int version; // FileDescriptor for the file inside the DB FileDescriptor fd; // file path that we picked for file inside the DB std::string internal_file_path; // Global sequence number that we picked for the file inside the DB SequenceNumber assigned_seqno = 0; // Level inside the DB we picked for the external file. int picked_level = 0; // Whether to copy or link the external sst file. copy_file will be set to // false if ingestion_options.move_files is true and underlying FS // supports link operation. Need to provide a default value to make the // undefined-behavior sanity check of llvm happy. Since // ingestion_options.move_files is false by default, thus copy_file is true // by default. bool copy_file = true; // The checksum of ingested file std::string file_checksum; // The name of checksum function that generate the checksum std::string file_checksum_func_name; }; class ExternalSstFileIngestionJob { public: ExternalSstFileIngestionJob( Env* env, VersionSet* versions, ColumnFamilyData* cfd, const ImmutableDBOptions& db_options, const EnvOptions& env_options, SnapshotList* db_snapshots, const IngestExternalFileOptions& ingestion_options, Directories* directories, EventLogger* event_logger) : env_(env), fs_(db_options.fs.get()), versions_(versions), cfd_(cfd), db_options_(db_options), env_options_(env_options), db_snapshots_(db_snapshots), ingestion_options_(ingestion_options), directories_(directories), event_logger_(event_logger), job_start_time_(env_->NowMicros()), consumed_seqno_count_(0) { assert(directories != nullptr); } // Prepare the job by copying external files into the DB. Status Prepare(const std::vector& external_files_paths, const std::vector& files_checksums, const std::vector& files_checksum_func_names, uint64_t next_file_number, SuperVersion* sv); // Check if we need to flush the memtable before running the ingestion job // This will be true if the files we are ingesting are overlapping with any // key range in the memtable. // // @param super_version A referenced SuperVersion that will be held for the // duration of this function. // // Thread-safe Status NeedsFlush(bool* flush_needed, SuperVersion* super_version); // Will execute the ingestion job and prepare edit() to be applied. // REQUIRES: Mutex held Status Run(); // Update column family stats. // REQUIRES: Mutex held void UpdateStats(); // Cleanup after successful/failed job void Cleanup(const Status& status); VersionEdit* edit() { return &edit_; } const autovector& files_to_ingest() const { return files_to_ingest_; } // How many sequence numbers did we consume as part of the ingest job? int ConsumedSequenceNumbersCount() const { return consumed_seqno_count_; } private: // Open the external file and populate `file_to_ingest` with all the // external information we need to ingest this file. Status GetIngestedFileInfo(const std::string& external_file, IngestedFileInfo* file_to_ingest, SuperVersion* sv); // Assign `file_to_ingest` the appropriate sequence number and the lowest // possible level that it can be ingested to according to compaction_style. // REQUIRES: Mutex held Status AssignLevelAndSeqnoForIngestedFile(SuperVersion* sv, bool force_global_seqno, CompactionStyle compaction_style, SequenceNumber last_seqno, IngestedFileInfo* file_to_ingest, SequenceNumber* assigned_seqno); // File that we want to ingest behind always goes to the lowest level; // we just check that it fits in the level, that DB allows ingest_behind, // and that we don't have 0 seqnums at the upper levels. // REQUIRES: Mutex held Status CheckLevelForIngestedBehindFile(IngestedFileInfo* file_to_ingest); // Set the file global sequence number to `seqno` Status AssignGlobalSeqnoForIngestedFile(IngestedFileInfo* file_to_ingest, SequenceNumber seqno); // Generate the file checksum and store in the IngestedFileInfo IOStatus GenerateChecksumForIngestedFile(IngestedFileInfo* file_to_ingest); // Check if `file_to_ingest` can fit in level `level` // REQUIRES: Mutex held bool IngestedFileFitInLevel(const IngestedFileInfo* file_to_ingest, int level); // Helper method to sync given file. template Status SyncIngestedFile(TWritableFile* file); Env* env_; FileSystem* fs_; VersionSet* versions_; ColumnFamilyData* cfd_; const ImmutableDBOptions& db_options_; const EnvOptions& env_options_; SnapshotList* db_snapshots_; autovector files_to_ingest_; const IngestExternalFileOptions& ingestion_options_; Directories* directories_; EventLogger* event_logger_; VersionEdit edit_; uint64_t job_start_time_; int consumed_seqno_count_; // Set in ExternalSstFileIngestionJob::Prepare(), if true all files are // ingested in L0 bool files_overlap_{false}; // Set in ExternalSstFileIngestionJob::Prepare(), if true and DB // file_checksum_gen_factory is set, DB will generate checksum each file. bool need_generate_file_checksum_{true}; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/external_sst_file_test.cc000066400000000000000000003201671370372246700207230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include "db/db_test_util.h" #include "db/dbformat.h" #include "file/filename.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/sst_file_writer.h" #include "test_util/fault_injection_test_env.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { // A test environment that can be configured to fail the Link operation. class ExternalSSTTestEnv : public EnvWrapper { public: ExternalSSTTestEnv(Env* t, bool fail_link) : EnvWrapper(t), fail_link_(fail_link) {} Status LinkFile(const std::string& s, const std::string& t) override { if (fail_link_) { return Status::NotSupported("Link failed"); } return target()->LinkFile(s, t); } void set_fail_link(bool fail_link) { fail_link_ = fail_link; } private: bool fail_link_; }; class ExternSSTFileLinkFailFallbackTest : public DBTestBase, public ::testing::WithParamInterface> { public: ExternSSTFileLinkFailFallbackTest() : DBTestBase("/external_sst_file_test"), test_env_(new ExternalSSTTestEnv(env_, true)) { sst_files_dir_ = dbname_ + "/sst_files/"; test::DestroyDir(env_, sst_files_dir_); env_->CreateDir(sst_files_dir_); options_ = CurrentOptions(); options_.disable_auto_compactions = true; options_.env = test_env_; } void TearDown() override { delete db_; db_ = nullptr; ASSERT_OK(DestroyDB(dbname_, options_)); delete test_env_; test_env_ = nullptr; } protected: std::string sst_files_dir_; Options options_; ExternalSSTTestEnv* test_env_; }; class ExternalSSTFileTest : public DBTestBase, public ::testing::WithParamInterface> { public: ExternalSSTFileTest() : DBTestBase("/external_sst_file_test") { sst_files_dir_ = dbname_ + "/sst_files/"; DestroyAndRecreateExternalSSTFilesDir(); } void DestroyAndRecreateExternalSSTFilesDir() { test::DestroyDir(env_, sst_files_dir_); env_->CreateDir(sst_files_dir_); } Status GenerateOneExternalFile( const Options& options, ColumnFamilyHandle* cfh, std::vector>& data, int file_id, bool sort_data, std::string* external_file_path, std::map* true_data) { // Generate a file id if not provided if (-1 == file_id) { file_id = (++last_file_id_); } // Sort data if asked to do so if (sort_data) { std::sort(data.begin(), data.end(), [&](const std::pair& e1, const std::pair& e2) { return options.comparator->Compare(e1.first, e2.first) < 0; }); auto uniq_iter = std::unique( data.begin(), data.end(), [&](const std::pair& e1, const std::pair& e2) { return options.comparator->Compare(e1.first, e2.first) == 0; }); data.resize(uniq_iter - data.begin()); } std::string file_path = sst_files_dir_ + ToString(file_id); SstFileWriter sst_file_writer(EnvOptions(), options, cfh); Status s = sst_file_writer.Open(file_path); if (!s.ok()) { return s; } for (const auto& entry : data) { s = sst_file_writer.Put(entry.first, entry.second); if (!s.ok()) { sst_file_writer.Finish(); return s; } } s = sst_file_writer.Finish(); if (s.ok() && external_file_path != nullptr) { *external_file_path = file_path; } if (s.ok() && nullptr != true_data) { for (const auto& entry : data) { true_data->insert({entry.first, entry.second}); } } return s; } Status GenerateAndAddExternalFile( const Options options, std::vector> data, int file_id = -1, bool allow_global_seqno = false, bool write_global_seqno = false, bool verify_checksums_before_ingest = true, bool ingest_behind = false, bool sort_data = false, std::map* true_data = nullptr, ColumnFamilyHandle* cfh = nullptr) { // Generate a file id if not provided if (file_id == -1) { file_id = last_file_id_ + 1; last_file_id_++; } // Sort data if asked to do so if (sort_data) { std::sort(data.begin(), data.end(), [&](const std::pair& e1, const std::pair& e2) { return options.comparator->Compare(e1.first, e2.first) < 0; }); auto uniq_iter = std::unique( data.begin(), data.end(), [&](const std::pair& e1, const std::pair& e2) { return options.comparator->Compare(e1.first, e2.first) == 0; }); data.resize(uniq_iter - data.begin()); } std::string file_path = sst_files_dir_ + ToString(file_id); SstFileWriter sst_file_writer(EnvOptions(), options, cfh); Status s = sst_file_writer.Open(file_path); if (!s.ok()) { return s; } for (auto& entry : data) { s = sst_file_writer.Put(entry.first, entry.second); if (!s.ok()) { sst_file_writer.Finish(); return s; } } s = sst_file_writer.Finish(); if (s.ok()) { IngestExternalFileOptions ifo; ifo.allow_global_seqno = allow_global_seqno; ifo.write_global_seqno = allow_global_seqno ? write_global_seqno : false; ifo.verify_checksums_before_ingest = verify_checksums_before_ingest; ifo.ingest_behind = ingest_behind; if (cfh) { s = db_->IngestExternalFile(cfh, {file_path}, ifo); } else { s = db_->IngestExternalFile({file_path}, ifo); } } if (s.ok() && true_data) { for (auto& entry : data) { (*true_data)[entry.first] = entry.second; } } return s; } Status GenerateAndAddExternalFiles( const Options& options, const std::vector& column_families, const std::vector& ifos, std::vector>>& data, int file_id, bool sort_data, std::vector>& true_data) { if (-1 == file_id) { file_id = (++last_file_id_); } // Generate external SST files, one for each column family size_t num_cfs = column_families.size(); assert(ifos.size() == num_cfs); assert(data.size() == num_cfs); Status s; std::vector args(num_cfs); for (size_t i = 0; i != num_cfs; ++i) { std::string external_file_path; s = GenerateOneExternalFile( options, column_families[i], data[i], file_id, sort_data, &external_file_path, true_data.size() == num_cfs ? &true_data[i] : nullptr); if (!s.ok()) { return s; } ++file_id; args[i].column_family = column_families[i]; args[i].external_files.push_back(external_file_path); args[i].options = ifos[i]; } s = db_->IngestExternalFiles(args); return s; } Status GenerateAndAddExternalFile( const Options options, std::vector> data, int file_id = -1, bool allow_global_seqno = false, bool write_global_seqno = false, bool verify_checksums_before_ingest = true, bool ingest_behind = false, bool sort_data = false, std::map* true_data = nullptr, ColumnFamilyHandle* cfh = nullptr) { std::vector> file_data; for (auto& entry : data) { file_data.emplace_back(Key(entry.first), entry.second); } return GenerateAndAddExternalFile(options, file_data, file_id, allow_global_seqno, write_global_seqno, verify_checksums_before_ingest, ingest_behind, sort_data, true_data, cfh); } Status GenerateAndAddExternalFile( const Options options, std::vector keys, int file_id = -1, bool allow_global_seqno = false, bool write_global_seqno = false, bool verify_checksums_before_ingest = true, bool ingest_behind = false, bool sort_data = false, std::map* true_data = nullptr, ColumnFamilyHandle* cfh = nullptr) { std::vector> file_data; for (auto& k : keys) { file_data.emplace_back(Key(k), Key(k) + ToString(file_id)); } return GenerateAndAddExternalFile(options, file_data, file_id, allow_global_seqno, write_global_seqno, verify_checksums_before_ingest, ingest_behind, sort_data, true_data, cfh); } Status DeprecatedAddFile(const std::vector& files, bool move_files = false, bool skip_snapshot_check = false, bool skip_write_global_seqno = false) { IngestExternalFileOptions opts; opts.move_files = move_files; opts.snapshot_consistency = !skip_snapshot_check; opts.allow_global_seqno = false; opts.allow_blocking_flush = false; opts.write_global_seqno = !skip_write_global_seqno; return db_->IngestExternalFile(files, opts); } ~ExternalSSTFileTest() override { test::DestroyDir(env_, sst_files_dir_); } protected: int last_file_id_ = 0; std::string sst_files_dir_; }; TEST_F(ExternalSSTFileTest, Basic) { do { Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); // Current file size should be 0 after sst_file_writer init and before open a file. ASSERT_EQ(sst_file_writer.FileSize(), 0); // file1.sst (0 => 99) std::string file1 = sst_files_dir_ + "file1.sst"; ASSERT_OK(sst_file_writer.Open(file1)); for (int k = 0; k < 100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file1_info; Status s = sst_file_writer.Finish(&file1_info); ASSERT_TRUE(s.ok()) << s.ToString(); // Current file size should be non-zero after success write. ASSERT_GT(sst_file_writer.FileSize(), 0); ASSERT_EQ(file1_info.file_path, file1); ASSERT_EQ(file1_info.num_entries, 100); ASSERT_EQ(file1_info.smallest_key, Key(0)); ASSERT_EQ(file1_info.largest_key, Key(99)); ASSERT_EQ(file1_info.num_range_del_entries, 0); ASSERT_EQ(file1_info.smallest_range_del_key, ""); ASSERT_EQ(file1_info.largest_range_del_key, ""); // sst_file_writer already finished, cannot add this value s = sst_file_writer.Put(Key(100), "bad_val"); ASSERT_FALSE(s.ok()) << s.ToString(); // file2.sst (100 => 199) std::string file2 = sst_files_dir_ + "file2.sst"; ASSERT_OK(sst_file_writer.Open(file2)); for (int k = 100; k < 200; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } // Cannot add this key because it's not after last added key s = sst_file_writer.Put(Key(99), "bad_val"); ASSERT_FALSE(s.ok()) << s.ToString(); ExternalSstFileInfo file2_info; s = sst_file_writer.Finish(&file2_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file2_info.file_path, file2); ASSERT_EQ(file2_info.num_entries, 100); ASSERT_EQ(file2_info.smallest_key, Key(100)); ASSERT_EQ(file2_info.largest_key, Key(199)); // file3.sst (195 => 299) // This file values overlap with file2 values std::string file3 = sst_files_dir_ + "file3.sst"; ASSERT_OK(sst_file_writer.Open(file3)); for (int k = 195; k < 300; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file3_info; s = sst_file_writer.Finish(&file3_info); ASSERT_TRUE(s.ok()) << s.ToString(); // Current file size should be non-zero after success finish. ASSERT_GT(sst_file_writer.FileSize(), 0); ASSERT_EQ(file3_info.file_path, file3); ASSERT_EQ(file3_info.num_entries, 105); ASSERT_EQ(file3_info.smallest_key, Key(195)); ASSERT_EQ(file3_info.largest_key, Key(299)); // file4.sst (30 => 39) // This file values overlap with file1 values std::string file4 = sst_files_dir_ + "file4.sst"; ASSERT_OK(sst_file_writer.Open(file4)); for (int k = 30; k < 40; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file4_info; s = sst_file_writer.Finish(&file4_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file4_info.file_path, file4); ASSERT_EQ(file4_info.num_entries, 10); ASSERT_EQ(file4_info.smallest_key, Key(30)); ASSERT_EQ(file4_info.largest_key, Key(39)); // file5.sst (400 => 499) std::string file5 = sst_files_dir_ + "file5.sst"; ASSERT_OK(sst_file_writer.Open(file5)); for (int k = 400; k < 500; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file5_info; s = sst_file_writer.Finish(&file5_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file5_info.file_path, file5); ASSERT_EQ(file5_info.num_entries, 100); ASSERT_EQ(file5_info.smallest_key, Key(400)); ASSERT_EQ(file5_info.largest_key, Key(499)); // file6.sst (delete 400 => 500) std::string file6 = sst_files_dir_ + "file6.sst"; ASSERT_OK(sst_file_writer.Open(file6)); sst_file_writer.DeleteRange(Key(400), Key(500)); ExternalSstFileInfo file6_info; s = sst_file_writer.Finish(&file6_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file6_info.file_path, file6); ASSERT_EQ(file6_info.num_entries, 0); ASSERT_EQ(file6_info.smallest_key, ""); ASSERT_EQ(file6_info.largest_key, ""); ASSERT_EQ(file6_info.num_range_del_entries, 1); ASSERT_EQ(file6_info.smallest_range_del_key, Key(400)); ASSERT_EQ(file6_info.largest_range_del_key, Key(500)); // file7.sst (delete 500 => 570, put 520 => 599 divisible by 2) std::string file7 = sst_files_dir_ + "file7.sst"; ASSERT_OK(sst_file_writer.Open(file7)); sst_file_writer.DeleteRange(Key(500), Key(550)); for (int k = 520; k < 560; k += 2) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } sst_file_writer.DeleteRange(Key(525), Key(575)); for (int k = 560; k < 600; k += 2) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file7_info; s = sst_file_writer.Finish(&file7_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file7_info.file_path, file7); ASSERT_EQ(file7_info.num_entries, 40); ASSERT_EQ(file7_info.smallest_key, Key(520)); ASSERT_EQ(file7_info.largest_key, Key(598)); ASSERT_EQ(file7_info.num_range_del_entries, 2); ASSERT_EQ(file7_info.smallest_range_del_key, Key(500)); ASSERT_EQ(file7_info.largest_range_del_key, Key(575)); // file8.sst (delete 600 => 700) std::string file8 = sst_files_dir_ + "file8.sst"; ASSERT_OK(sst_file_writer.Open(file8)); sst_file_writer.DeleteRange(Key(600), Key(700)); ExternalSstFileInfo file8_info; s = sst_file_writer.Finish(&file8_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file8_info.file_path, file8); ASSERT_EQ(file8_info.num_entries, 0); ASSERT_EQ(file8_info.smallest_key, ""); ASSERT_EQ(file8_info.largest_key, ""); ASSERT_EQ(file8_info.num_range_del_entries, 1); ASSERT_EQ(file8_info.smallest_range_del_key, Key(600)); ASSERT_EQ(file8_info.largest_range_del_key, Key(700)); // Cannot create an empty sst file std::string file_empty = sst_files_dir_ + "file_empty.sst"; ExternalSstFileInfo file_empty_info; s = sst_file_writer.Finish(&file_empty_info); ASSERT_NOK(s); DestroyAndReopen(options); // Add file using file path s = DeprecatedAddFile({file1}); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); for (int k = 0; k < 100; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } // Add file while holding a snapshot will fail const Snapshot* s1 = db_->GetSnapshot(); if (s1 != nullptr) { ASSERT_NOK(DeprecatedAddFile({file2})); db_->ReleaseSnapshot(s1); } // We can add the file after releaseing the snapshot ASSERT_OK(DeprecatedAddFile({file2})); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); for (int k = 0; k < 200; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } // This file has overlapping values with the existing data s = DeprecatedAddFile({file3}); ASSERT_FALSE(s.ok()) << s.ToString(); // This file has overlapping values with the existing data s = DeprecatedAddFile({file4}); ASSERT_FALSE(s.ok()) << s.ToString(); // Overwrite values of keys divisible by 5 for (int k = 0; k < 200; k += 5) { ASSERT_OK(Put(Key(k), Key(k) + "_val_new")); } ASSERT_NE(db_->GetLatestSequenceNumber(), 0U); // Key range of file5 (400 => 499) don't overlap with any keys in DB ASSERT_OK(DeprecatedAddFile({file5})); // This file has overlapping values with the existing data s = DeprecatedAddFile({file6}); ASSERT_FALSE(s.ok()) << s.ToString(); // Key range of file7 (500 => 598) don't overlap with any keys in DB ASSERT_OK(DeprecatedAddFile({file7})); // Key range of file7 (600 => 700) don't overlap with any keys in DB ASSERT_OK(DeprecatedAddFile({file8})); // Make sure values are correct before and after flush/compaction for (int i = 0; i < 2; i++) { for (int k = 0; k < 200; k++) { std::string value = Key(k) + "_val"; if (k % 5 == 0) { value += "_new"; } ASSERT_EQ(Get(Key(k)), value); } for (int k = 400; k < 500; k++) { std::string value = Key(k) + "_val"; ASSERT_EQ(Get(Key(k)), value); } for (int k = 500; k < 600; k++) { std::string value = Key(k) + "_val"; if (k < 520 || k % 2 == 1) { value = "NOT_FOUND"; } ASSERT_EQ(Get(Key(k)), value); } ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); } Close(); options.disable_auto_compactions = true; Reopen(options); // Delete keys in range (400 => 499) for (int k = 400; k < 500; k++) { ASSERT_OK(Delete(Key(k))); } // We deleted range (400 => 499) but cannot add file5 because // of the range tombstones ASSERT_NOK(DeprecatedAddFile({file5})); // Compacting the DB will remove the tombstones ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Now we can add the file ASSERT_OK(DeprecatedAddFile({file5})); // Verify values of file5 in DB for (int k = 400; k < 500; k++) { std::string value = Key(k) + "_val"; ASSERT_EQ(Get(Key(k)), value); } DestroyAndRecreateExternalSSTFilesDir(); } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction | kRangeDelSkipConfigs)); } class SstFileWriterCollector : public TablePropertiesCollector { public: explicit SstFileWriterCollector(const std::string prefix) : prefix_(prefix) { name_ = prefix_ + "_SstFileWriterCollector"; } const char* Name() const override { return name_.c_str(); } Status Finish(UserCollectedProperties* properties) override { std::string count = std::to_string(count_); *properties = UserCollectedProperties{ {prefix_ + "_SstFileWriterCollector", "YES"}, {prefix_ + "_Count", count}, }; return Status::OK(); } Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/, EntryType /*type*/, SequenceNumber /*seq*/, uint64_t /*file_size*/) override { ++count_; return Status::OK(); } UserCollectedProperties GetReadableProperties() const override { return UserCollectedProperties{}; } private: uint32_t count_ = 0; std::string prefix_; std::string name_; }; class SstFileWriterCollectorFactory : public TablePropertiesCollectorFactory { public: explicit SstFileWriterCollectorFactory(std::string prefix) : prefix_(prefix), num_created_(0) {} TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context /*context*/) override { num_created_++; return new SstFileWriterCollector(prefix_); } const char* Name() const override { return "SstFileWriterCollectorFactory"; } std::string prefix_; uint32_t num_created_; }; TEST_F(ExternalSSTFileTest, AddList) { do { Options options = CurrentOptions(); auto abc_collector = std::make_shared("abc"); auto xyz_collector = std::make_shared("xyz"); options.table_properties_collector_factories.emplace_back(abc_collector); options.table_properties_collector_factories.emplace_back(xyz_collector); SstFileWriter sst_file_writer(EnvOptions(), options); // file1.sst (0 => 99) std::string file1 = sst_files_dir_ + "file1.sst"; ASSERT_OK(sst_file_writer.Open(file1)); for (int k = 0; k < 100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file1_info; Status s = sst_file_writer.Finish(&file1_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file1_info.file_path, file1); ASSERT_EQ(file1_info.num_entries, 100); ASSERT_EQ(file1_info.smallest_key, Key(0)); ASSERT_EQ(file1_info.largest_key, Key(99)); // sst_file_writer already finished, cannot add this value s = sst_file_writer.Put(Key(100), "bad_val"); ASSERT_FALSE(s.ok()) << s.ToString(); // file2.sst (100 => 199) std::string file2 = sst_files_dir_ + "file2.sst"; ASSERT_OK(sst_file_writer.Open(file2)); for (int k = 100; k < 200; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } // Cannot add this key because it's not after last added key s = sst_file_writer.Put(Key(99), "bad_val"); ASSERT_FALSE(s.ok()) << s.ToString(); ExternalSstFileInfo file2_info; s = sst_file_writer.Finish(&file2_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file2_info.file_path, file2); ASSERT_EQ(file2_info.num_entries, 100); ASSERT_EQ(file2_info.smallest_key, Key(100)); ASSERT_EQ(file2_info.largest_key, Key(199)); // file3.sst (195 => 199) // This file values overlap with file2 values std::string file3 = sst_files_dir_ + "file3.sst"; ASSERT_OK(sst_file_writer.Open(file3)); for (int k = 195; k < 200; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file3_info; s = sst_file_writer.Finish(&file3_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file3_info.file_path, file3); ASSERT_EQ(file3_info.num_entries, 5); ASSERT_EQ(file3_info.smallest_key, Key(195)); ASSERT_EQ(file3_info.largest_key, Key(199)); // file4.sst (30 => 39) // This file values overlap with file1 values std::string file4 = sst_files_dir_ + "file4.sst"; ASSERT_OK(sst_file_writer.Open(file4)); for (int k = 30; k < 40; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val_overlap")); } ExternalSstFileInfo file4_info; s = sst_file_writer.Finish(&file4_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file4_info.file_path, file4); ASSERT_EQ(file4_info.num_entries, 10); ASSERT_EQ(file4_info.smallest_key, Key(30)); ASSERT_EQ(file4_info.largest_key, Key(39)); // file5.sst (200 => 299) std::string file5 = sst_files_dir_ + "file5.sst"; ASSERT_OK(sst_file_writer.Open(file5)); for (int k = 200; k < 300; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file5_info; s = sst_file_writer.Finish(&file5_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file5_info.file_path, file5); ASSERT_EQ(file5_info.num_entries, 100); ASSERT_EQ(file5_info.smallest_key, Key(200)); ASSERT_EQ(file5_info.largest_key, Key(299)); // file6.sst (delete 0 => 100) std::string file6 = sst_files_dir_ + "file6.sst"; ASSERT_OK(sst_file_writer.Open(file6)); ASSERT_OK(sst_file_writer.DeleteRange(Key(0), Key(75))); ASSERT_OK(sst_file_writer.DeleteRange(Key(25), Key(100))); ExternalSstFileInfo file6_info; s = sst_file_writer.Finish(&file6_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file6_info.file_path, file6); ASSERT_EQ(file6_info.num_entries, 0); ASSERT_EQ(file6_info.smallest_key, ""); ASSERT_EQ(file6_info.largest_key, ""); ASSERT_EQ(file6_info.num_range_del_entries, 2); ASSERT_EQ(file6_info.smallest_range_del_key, Key(0)); ASSERT_EQ(file6_info.largest_range_del_key, Key(100)); // file7.sst (delete 99 => 201) std::string file7 = sst_files_dir_ + "file7.sst"; ASSERT_OK(sst_file_writer.Open(file7)); ASSERT_OK(sst_file_writer.DeleteRange(Key(99), Key(201))); ExternalSstFileInfo file7_info; s = sst_file_writer.Finish(&file7_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file7_info.file_path, file7); ASSERT_EQ(file7_info.num_entries, 0); ASSERT_EQ(file7_info.smallest_key, ""); ASSERT_EQ(file7_info.largest_key, ""); ASSERT_EQ(file7_info.num_range_del_entries, 1); ASSERT_EQ(file7_info.smallest_range_del_key, Key(99)); ASSERT_EQ(file7_info.largest_range_del_key, Key(201)); // list 1 has internal key range conflict std::vector file_list0({file1, file2}); std::vector file_list1({file3, file2, file1}); std::vector file_list2({file5}); std::vector file_list3({file3, file4}); std::vector file_list4({file5, file7}); std::vector file_list5({file6, file7}); DestroyAndReopen(options); // These lists of files have key ranges that overlap with each other s = DeprecatedAddFile(file_list1); ASSERT_FALSE(s.ok()) << s.ToString(); // Both of the following overlap on the range deletion tombstone. s = DeprecatedAddFile(file_list4); ASSERT_FALSE(s.ok()) << s.ToString(); s = DeprecatedAddFile(file_list5); ASSERT_FALSE(s.ok()) << s.ToString(); // Add files using file path list s = DeprecatedAddFile(file_list0); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); for (int k = 0; k < 200; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } TablePropertiesCollection props; ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); ASSERT_EQ(props.size(), 2); for (auto file_props : props) { auto user_props = file_props.second->user_collected_properties; ASSERT_EQ(user_props["abc_SstFileWriterCollector"], "YES"); ASSERT_EQ(user_props["xyz_SstFileWriterCollector"], "YES"); ASSERT_EQ(user_props["abc_Count"], "100"); ASSERT_EQ(user_props["xyz_Count"], "100"); } // Add file while holding a snapshot will fail const Snapshot* s1 = db_->GetSnapshot(); if (s1 != nullptr) { ASSERT_NOK(DeprecatedAddFile(file_list2)); db_->ReleaseSnapshot(s1); } // We can add the file after releaseing the snapshot ASSERT_OK(DeprecatedAddFile(file_list2)); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); for (int k = 0; k < 300; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); ASSERT_EQ(props.size(), 3); for (auto file_props : props) { auto user_props = file_props.second->user_collected_properties; ASSERT_EQ(user_props["abc_SstFileWriterCollector"], "YES"); ASSERT_EQ(user_props["xyz_SstFileWriterCollector"], "YES"); ASSERT_EQ(user_props["abc_Count"], "100"); ASSERT_EQ(user_props["xyz_Count"], "100"); } // This file list has overlapping values with the existing data s = DeprecatedAddFile(file_list3); ASSERT_FALSE(s.ok()) << s.ToString(); // Overwrite values of keys divisible by 5 for (int k = 0; k < 200; k += 5) { ASSERT_OK(Put(Key(k), Key(k) + "_val_new")); } ASSERT_NE(db_->GetLatestSequenceNumber(), 0U); // Make sure values are correct before and after flush/compaction for (int i = 0; i < 2; i++) { for (int k = 0; k < 200; k++) { std::string value = Key(k) + "_val"; if (k % 5 == 0) { value += "_new"; } ASSERT_EQ(Get(Key(k)), value); } for (int k = 200; k < 300; k++) { std::string value = Key(k) + "_val"; ASSERT_EQ(Get(Key(k)), value); } ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); } // Delete keys in range (200 => 299) for (int k = 200; k < 300; k++) { ASSERT_OK(Delete(Key(k))); } // We deleted range (200 => 299) but cannot add file5 because // of the range tombstones ASSERT_NOK(DeprecatedAddFile(file_list2)); // Compacting the DB will remove the tombstones ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Now we can add the file ASSERT_OK(DeprecatedAddFile(file_list2)); // Verify values of file5 in DB for (int k = 200; k < 300; k++) { std::string value = Key(k) + "_val"; ASSERT_EQ(Get(Key(k)), value); } DestroyAndRecreateExternalSSTFilesDir(); } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction | kRangeDelSkipConfigs)); } TEST_F(ExternalSSTFileTest, AddListAtomicity) { do { Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); // files[0].sst (0 => 99) // files[1].sst (100 => 199) // ... // file[8].sst (800 => 899) int n = 9; std::vector files(n); std::vector files_info(n); for (int i = 0; i < n; i++) { files[i] = sst_files_dir_ + "file" + std::to_string(i) + ".sst"; ASSERT_OK(sst_file_writer.Open(files[i])); for (int k = i * 100; k < (i + 1) * 100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } Status s = sst_file_writer.Finish(&files_info[i]); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(files_info[i].file_path, files[i]); ASSERT_EQ(files_info[i].num_entries, 100); ASSERT_EQ(files_info[i].smallest_key, Key(i * 100)); ASSERT_EQ(files_info[i].largest_key, Key((i + 1) * 100 - 1)); } files.push_back(sst_files_dir_ + "file" + std::to_string(n) + ".sst"); auto s = DeprecatedAddFile(files); ASSERT_NOK(s) << s.ToString(); for (int k = 0; k < n * 100; k++) { ASSERT_EQ("NOT_FOUND", Get(Key(k))); } files.pop_back(); ASSERT_OK(DeprecatedAddFile(files)); for (int k = 0; k < n * 100; k++) { std::string value = Key(k) + "_val"; ASSERT_EQ(Get(Key(k)), value); } DestroyAndRecreateExternalSSTFilesDir(); } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction)); } // This test reporduce a bug that can happen in some cases if the DB started // purging obsolete files when we are adding an external sst file. // This situation may result in deleting the file while it's being added. TEST_F(ExternalSSTFileTest, PurgeObsoleteFilesBug) { Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); // file1.sst (0 => 500) std::string sst_file_path = sst_files_dir_ + "file1.sst"; Status s = sst_file_writer.Open(sst_file_path); ASSERT_OK(s); for (int i = 0; i < 500; i++) { std::string k = Key(i); s = sst_file_writer.Put(k, k + "_val"); ASSERT_OK(s); } ExternalSstFileInfo sst_file_info; s = sst_file_writer.Finish(&sst_file_info); ASSERT_OK(s); options.delete_obsolete_files_period_micros = 0; options.disable_auto_compactions = true; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ExternalSstFileIngestionJob::Prepare:FileAdded", [&](void* /* arg */) { ASSERT_OK(Put("aaa", "bbb")); ASSERT_OK(Flush()); ASSERT_OK(Put("aaa", "xxx")); ASSERT_OK(Flush()); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); s = DeprecatedAddFile({sst_file_path}); ASSERT_OK(s); for (int i = 0; i < 500; i++) { std::string k = Key(i); std::string v = k + "_val"; ASSERT_EQ(Get(k), v); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(ExternalSSTFileTest, SkipSnapshot) { Options options = CurrentOptions(); SstFileWriter sst_file_writer(EnvOptions(), options); // file1.sst (0 => 99) std::string file1 = sst_files_dir_ + "file1.sst"; ASSERT_OK(sst_file_writer.Open(file1)); for (int k = 0; k < 100; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file1_info; Status s = sst_file_writer.Finish(&file1_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file1_info.file_path, file1); ASSERT_EQ(file1_info.num_entries, 100); ASSERT_EQ(file1_info.smallest_key, Key(0)); ASSERT_EQ(file1_info.largest_key, Key(99)); // file2.sst (100 => 299) std::string file2 = sst_files_dir_ + "file2.sst"; ASSERT_OK(sst_file_writer.Open(file2)); for (int k = 100; k < 300; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file2_info; s = sst_file_writer.Finish(&file2_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file2_info.file_path, file2); ASSERT_EQ(file2_info.num_entries, 200); ASSERT_EQ(file2_info.smallest_key, Key(100)); ASSERT_EQ(file2_info.largest_key, Key(299)); ASSERT_OK(DeprecatedAddFile({file1})); // Add file will fail when holding snapshot and use the default // skip_snapshot_check to false const Snapshot* s1 = db_->GetSnapshot(); if (s1 != nullptr) { ASSERT_NOK(DeprecatedAddFile({file2})); } // Add file will success when set skip_snapshot_check to true even db holding // snapshot if (s1 != nullptr) { ASSERT_OK(DeprecatedAddFile({file2}, false, true)); db_->ReleaseSnapshot(s1); } // file3.sst (300 => 399) std::string file3 = sst_files_dir_ + "file3.sst"; ASSERT_OK(sst_file_writer.Open(file3)); for (int k = 300; k < 400; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); } ExternalSstFileInfo file3_info; s = sst_file_writer.Finish(&file3_info); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_EQ(file3_info.file_path, file3); ASSERT_EQ(file3_info.num_entries, 100); ASSERT_EQ(file3_info.smallest_key, Key(300)); ASSERT_EQ(file3_info.largest_key, Key(399)); // check that we have change the old key ASSERT_EQ(Get(Key(300)), "NOT_FOUND"); const Snapshot* s2 = db_->GetSnapshot(); ASSERT_OK(DeprecatedAddFile({file3}, false, true)); ASSERT_EQ(Get(Key(300)), Key(300) + ("_val")); ASSERT_EQ(Get(Key(300), s2), Key(300) + ("_val")); db_->ReleaseSnapshot(s2); } TEST_F(ExternalSSTFileTest, MultiThreaded) { // Bulk load 10 files every file contain 1000 keys int num_files = 10; int keys_per_file = 1000; // Generate file names std::vector file_names; for (int i = 0; i < num_files; i++) { std::string file_name = "file_" + ToString(i) + ".sst"; file_names.push_back(sst_files_dir_ + file_name); } do { Options options = CurrentOptions(); std::atomic thread_num(0); std::function write_file_func = [&]() { int file_idx = thread_num.fetch_add(1); int range_start = file_idx * keys_per_file; int range_end = range_start + keys_per_file; SstFileWriter sst_file_writer(EnvOptions(), options); ASSERT_OK(sst_file_writer.Open(file_names[file_idx])); for (int k = range_start; k < range_end; k++) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k))); } Status s = sst_file_writer.Finish(); ASSERT_TRUE(s.ok()) << s.ToString(); }; // Write num_files files in parallel std::vector sst_writer_threads; for (int i = 0; i < num_files; ++i) { sst_writer_threads.emplace_back(write_file_func); } for (auto& t : sst_writer_threads) { t.join(); } fprintf(stderr, "Wrote %d files (%d keys)\n", num_files, num_files * keys_per_file); thread_num.store(0); std::atomic files_added(0); // Thread 0 -> Load {f0,f1} // Thread 1 -> Load {f0,f1} // Thread 2 -> Load {f2,f3} // Thread 3 -> Load {f2,f3} // Thread 4 -> Load {f4,f5} // Thread 5 -> Load {f4,f5} // ... std::function load_file_func = [&]() { // We intentionally add every file twice, and assert that it was added // only once and the other add failed int thread_id = thread_num.fetch_add(1); int file_idx = (thread_id / 2) * 2; // sometimes we use copy, sometimes link .. the result should be the same bool move_file = (thread_id % 3 == 0); std::vector files_to_add; files_to_add = {file_names[file_idx]}; if (static_cast(file_idx + 1) < file_names.size()) { files_to_add.push_back(file_names[file_idx + 1]); } Status s = DeprecatedAddFile(files_to_add, move_file); if (s.ok()) { files_added += static_cast(files_to_add.size()); } }; // Bulk load num_files files in parallel std::vector add_file_threads; DestroyAndReopen(options); for (int i = 0; i < num_files; ++i) { add_file_threads.emplace_back(load_file_func); } for (auto& t : add_file_threads) { t.join(); } ASSERT_EQ(files_added.load(), num_files); fprintf(stderr, "Loaded %d files (%d keys)\n", num_files, num_files * keys_per_file); // Overwrite values of keys divisible by 100 for (int k = 0; k < num_files * keys_per_file; k += 100) { std::string key = Key(k); Status s = Put(key, key + "_new"); ASSERT_TRUE(s.ok()); } for (int i = 0; i < 2; i++) { // Make sure the values are correct before and after flush/compaction for (int k = 0; k < num_files * keys_per_file; ++k) { std::string key = Key(k); std::string value = (k % 100 == 0) ? (key + "_new") : key; ASSERT_EQ(Get(key), value); } ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); } fprintf(stderr, "Verified %d values\n", num_files * keys_per_file); DestroyAndRecreateExternalSSTFilesDir(); } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction)); } TEST_F(ExternalSSTFileTest, OverlappingRanges) { Random rnd(301); SequenceNumber assigned_seqno = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ExternalSstFileIngestionJob::Run", [&assigned_seqno](void* arg) { ASSERT_TRUE(arg != nullptr); assigned_seqno = *(static_cast(arg)); }); bool need_flush = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::IngestExternalFile:NeedFlush", [&need_flush](void* arg) { ASSERT_TRUE(arg != nullptr); need_flush = *(static_cast(arg)); }); bool overlap_with_db = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile", [&overlap_with_db](void* arg) { ASSERT_TRUE(arg != nullptr); overlap_with_db = *(static_cast(arg)); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); do { Options options = CurrentOptions(); DestroyAndReopen(options); SstFileWriter sst_file_writer(EnvOptions(), options); printf("Option config = %d\n", option_config_); std::vector> key_ranges; for (int i = 0; i < 100; i++) { int range_start = rnd.Uniform(20000); int keys_per_range = 10 + rnd.Uniform(41); key_ranges.emplace_back(range_start, range_start + keys_per_range); } int memtable_add = 0; int success_add_file = 0; int failed_add_file = 0; std::map true_data; for (size_t i = 0; i < key_ranges.size(); i++) { int range_start = key_ranges[i].first; int range_end = key_ranges[i].second; Status s; std::string range_val = "range_" + ToString(i); // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile if (i && i % 5 == 0) { // Use DB::Put to insert range (insert into memtable) range_val += "_put"; for (int k = range_start; k <= range_end; k++) { s = Put(Key(k), range_val); ASSERT_OK(s); } memtable_add++; } else { // Use DB::AddFile to insert range range_val += "_add_file"; // Generate the file containing the range std::string file_name = sst_files_dir_ + env_->GenerateUniqueId(); ASSERT_OK(sst_file_writer.Open(file_name)); for (int k = range_start; k <= range_end; k++) { s = sst_file_writer.Put(Key(k), range_val); ASSERT_OK(s); } ExternalSstFileInfo file_info; s = sst_file_writer.Finish(&file_info); ASSERT_OK(s); // Insert the generated file s = DeprecatedAddFile({file_name}); auto it = true_data.lower_bound(Key(range_start)); if (option_config_ != kUniversalCompaction && option_config_ != kUniversalCompactionMultiLevel && option_config_ != kUniversalSubcompactions) { if (it != true_data.end() && it->first <= Key(range_end)) { // This range overlap with data already exist in DB ASSERT_NOK(s); failed_add_file++; } else { ASSERT_OK(s); success_add_file++; } } else { if ((it != true_data.end() && it->first <= Key(range_end)) || need_flush || assigned_seqno > 0 || overlap_with_db) { // This range overlap with data already exist in DB ASSERT_NOK(s); failed_add_file++; } else { ASSERT_OK(s); success_add_file++; } } } if (s.ok()) { // Update true_data map to include the new inserted data for (int k = range_start; k <= range_end; k++) { true_data[Key(k)] = range_val; } } // Flush / Compact the DB if (i && i % 50 == 0) { Flush(); } if (i && i % 75 == 0) { db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); } } printf("Total: %" ROCKSDB_PRIszt " ranges\n" "AddFile()|Success: %d ranges\n" "AddFile()|RangeConflict: %d ranges\n" "Put(): %d ranges\n", key_ranges.size(), success_add_file, failed_add_file, memtable_add); // Verify the correctness of the data for (const auto& kv : true_data) { ASSERT_EQ(Get(kv.first), kv.second); } printf("keys/values verified\n"); DestroyAndRecreateExternalSSTFilesDir(); } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction)); } TEST_P(ExternalSSTFileTest, PickedLevel) { Options options = CurrentOptions(); options.disable_auto_compactions = false; options.level0_file_num_compaction_trigger = 4; options.num_levels = 4; DestroyAndReopen(options); std::map true_data; // File 0 will go to last level (L3) ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false, true, false, false, &true_data)); EXPECT_EQ(FilesPerLevel(), "0,0,0,1"); // File 1 will go to level L2 (since it overlap with file 0 in L3) ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, true, false, false, &true_data)); EXPECT_EQ(FilesPerLevel(), "0,0,1,1"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"ExternalSSTFileTest::PickedLevel:0", "BackgroundCallCompaction:0"}, {"DBImpl::BackgroundCompaction:Start", "ExternalSSTFileTest::PickedLevel:1"}, {"ExternalSSTFileTest::PickedLevel:2", "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Flush 4 files containing the same keys for (int i = 0; i < 4; i++) { ASSERT_OK(Put(Key(3), Key(3) + "put")); ASSERT_OK(Put(Key(8), Key(8) + "put")); true_data[Key(3)] = Key(3) + "put"; true_data[Key(8)] = Key(8) + "put"; ASSERT_OK(Flush()); } // Wait for BackgroundCompaction() to be called TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:0"); TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:1"); EXPECT_EQ(FilesPerLevel(), "4,0,1,1"); // This file overlaps with file 0 (L3), file 1 (L2) and the // output of compaction going to L1 ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, false, false, true, false, false, &true_data)); EXPECT_EQ(FilesPerLevel(), "5,0,1,1"); // This file does not overlap with any file or with the running compaction ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false, false, false, false, &true_data)); EXPECT_EQ(FilesPerLevel(), "5,0,1,2"); // Hold compaction from finishing TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevel:2"); dbfull()->TEST_WaitForCompact(); EXPECT_EQ(FilesPerLevel(), "1,1,1,2"); size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(ExternalSSTFileTest, PickedLevelBug) { Options options = CurrentOptions(); options.disable_auto_compactions = false; options.level0_file_num_compaction_trigger = 3; options.num_levels = 2; DestroyAndReopen(options); std::vector file_keys; // file #1 in L0 file_keys = {0, 5, 7}; for (int k : file_keys) { ASSERT_OK(Put(Key(k), Key(k))); } ASSERT_OK(Flush()); // file #2 in L0 file_keys = {4, 6, 8, 9}; for (int k : file_keys) { ASSERT_OK(Put(Key(k), Key(k))); } ASSERT_OK(Flush()); // We have 2 overlapping files in L0 EXPECT_EQ(FilesPerLevel(), "2"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::AddFile:MutexLock", "ExternalSSTFileTest::PickedLevelBug:0"}, {"ExternalSSTFileTest::PickedLevelBug:1", "DBImpl::AddFile:MutexUnlock"}, {"ExternalSSTFileTest::PickedLevelBug:2", "DBImpl::RunManualCompaction:0"}, {"ExternalSSTFileTest::PickedLevelBug:3", "DBImpl::RunManualCompaction:1"}}); std::atomic bg_compact_started(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:Start", [&](void* /*arg*/) { bg_compact_started.store(true); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // While writing the MANIFEST start a thread that will ask for compaction ROCKSDB_NAMESPACE::port::Thread bg_compact([&]() { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); }); TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelBug:2"); // Start a thread that will ingest a new file ROCKSDB_NAMESPACE::port::Thread bg_addfile([&]() { file_keys = {1, 2, 3}; ASSERT_OK(GenerateAndAddExternalFile(options, file_keys, 1)); }); // Wait for AddFile to start picking levels and writing MANIFEST TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelBug:0"); TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelBug:3"); // We need to verify that no compactions can run while AddFile is // ingesting the files into the levels it find suitable. So we will // wait for 2 seconds to give a chance for compactions to run during // this period, and then make sure that no compactions where able to run env_->SleepForMicroseconds(1000000 * 2); ASSERT_FALSE(bg_compact_started.load()); // Hold AddFile from finishing writing the MANIFEST TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelBug:1"); bg_addfile.join(); bg_compact.join(); dbfull()->TEST_WaitForCompact(); int total_keys = 0; Iterator* iter = db_->NewIterator(ReadOptions()); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(iter->status()); total_keys++; } ASSERT_EQ(total_keys, 10); delete iter; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(ExternalSSTFileTest, IngestNonExistingFile) { Options options = CurrentOptions(); DestroyAndReopen(options); Status s = db_->IngestExternalFile({"non_existing_file"}, IngestExternalFileOptions()); ASSERT_NOK(s); // Verify file deletion is not impacted (verify a bug fix) ASSERT_OK(Put(Key(1), Key(1))); ASSERT_OK(Put(Key(9), Key(9))); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(1), Key(1))); ASSERT_OK(Put(Key(9), Key(9))); ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); // After full compaction, there should be only 1 file. std::vector files; env_->GetChildren(dbname_, &files); int num_sst_files = 0; for (auto& f : files) { uint64_t number; FileType type; if (ParseFileName(f, &number, &type) && type == kTableFile) { num_sst_files++; } } ASSERT_EQ(1, num_sst_files); } TEST_F(ExternalSSTFileTest, CompactDuringAddFileRandom) { Options options = CurrentOptions(); options.disable_auto_compactions = false; options.level0_file_num_compaction_trigger = 2; options.num_levels = 2; DestroyAndReopen(options); std::function bg_compact = [&]() { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); }; int range_id = 0; std::vector file_keys; std::function bg_addfile = [&]() { ASSERT_OK(GenerateAndAddExternalFile(options, file_keys, range_id)); }; const int num_of_ranges = 1000; std::vector threads; while (range_id < num_of_ranges) { int range_start = range_id * 10; int range_end = range_start + 10; file_keys.clear(); for (int k = range_start + 1; k < range_end; k++) { file_keys.push_back(k); } ASSERT_OK(Put(Key(range_start), Key(range_start))); ASSERT_OK(Put(Key(range_end), Key(range_end))); ASSERT_OK(Flush()); if (range_id % 10 == 0) { threads.emplace_back(bg_compact); } threads.emplace_back(bg_addfile); for (auto& t : threads) { t.join(); } threads.clear(); range_id++; } for (int rid = 0; rid < num_of_ranges; rid++) { int range_start = rid * 10; int range_end = range_start + 10; ASSERT_EQ(Get(Key(range_start)), Key(range_start)) << rid; ASSERT_EQ(Get(Key(range_end)), Key(range_end)) << rid; for (int k = range_start + 1; k < range_end; k++) { std::string v = Key(k) + ToString(rid); ASSERT_EQ(Get(Key(k)), v) << rid; } } } TEST_F(ExternalSSTFileTest, PickedLevelDynamic) { Options options = CurrentOptions(); options.disable_auto_compactions = false; options.level0_file_num_compaction_trigger = 4; options.level_compaction_dynamic_level_bytes = true; options.num_levels = 4; DestroyAndReopen(options); std::map true_data; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"ExternalSSTFileTest::PickedLevelDynamic:0", "BackgroundCallCompaction:0"}, {"DBImpl::BackgroundCompaction:Start", "ExternalSSTFileTest::PickedLevelDynamic:1"}, {"ExternalSSTFileTest::PickedLevelDynamic:2", "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Flush 4 files containing the same keys for (int i = 0; i < 4; i++) { for (int k = 20; k <= 30; k++) { ASSERT_OK(Put(Key(k), Key(k) + "put")); true_data[Key(k)] = Key(k) + "put"; } for (int k = 50; k <= 60; k++) { ASSERT_OK(Put(Key(k), Key(k) + "put")); true_data[Key(k)] = Key(k) + "put"; } ASSERT_OK(Flush()); } // Wait for BackgroundCompaction() to be called TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:0"); TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:1"); // This file overlaps with the output of the compaction (going to L3) // so the file will be added to L0 since L3 is the base level ASSERT_OK(GenerateAndAddExternalFile(options, {31, 32, 33, 34}, -1, false, false, true, false, false, &true_data)); EXPECT_EQ(FilesPerLevel(), "5"); // This file does not overlap with the current running compactiong ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false, true, false, false, &true_data)); EXPECT_EQ(FilesPerLevel(), "5,0,0,1"); // Hold compaction from finishing TEST_SYNC_POINT("ExternalSSTFileTest::PickedLevelDynamic:2"); // Output of the compaction will go to L3 dbfull()->TEST_WaitForCompact(); EXPECT_EQ(FilesPerLevel(), "1,0,0,2"); Close(); options.disable_auto_compactions = true; Reopen(options); ASSERT_OK(GenerateAndAddExternalFile(options, {1, 15, 19}, -1, false, false, true, false, false, &true_data)); ASSERT_EQ(FilesPerLevel(), "1,0,0,3"); ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1001, 1002}, -1, false, false, true, false, false, &true_data)); ASSERT_EQ(FilesPerLevel(), "1,0,0,4"); ASSERT_OK(GenerateAndAddExternalFile(options, {500, 600, 700}, -1, false, false, true, false, false, &true_data)); ASSERT_EQ(FilesPerLevel(), "1,0,0,5"); // File 5 overlaps with file 2 (L3 / base level) ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false, true, false, false, &true_data)); ASSERT_EQ(FilesPerLevel(), "2,0,0,5"); // File 6 overlaps with file 2 (L3 / base level) and file 5 (L0) ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, true, false, false, &true_data)); ASSERT_EQ(FilesPerLevel(), "3,0,0,5"); // Verify data in files size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); // Write range [5 => 10] to L0 for (int i = 5; i <= 10; i++) { std::string k = Key(i); std::string v = k + "put"; ASSERT_OK(Put(k, v)); true_data[k] = v; } ASSERT_OK(Flush()); ASSERT_EQ(FilesPerLevel(), "4,0,0,5"); // File 7 overlaps with file 4 (L3) ASSERT_OK(GenerateAndAddExternalFile(options, {650, 651, 652}, -1, false, false, true, false, false, &true_data)); ASSERT_EQ(FilesPerLevel(), "5,0,0,5"); VerifyDBFromMap(true_data, &kcnt, false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(ExternalSSTFileTest, AddExternalSstFileWithCustomCompartor) { Options options = CurrentOptions(); options.comparator = ReverseBytewiseComparator(); DestroyAndReopen(options); SstFileWriter sst_file_writer(EnvOptions(), options); // Generate files with these key ranges // {14 -> 0} // {24 -> 10} // {34 -> 20} // {44 -> 30} // .. std::vector generated_files; for (int i = 0; i < 10; i++) { std::string file_name = sst_files_dir_ + env_->GenerateUniqueId(); ASSERT_OK(sst_file_writer.Open(file_name)); int range_end = i * 10; int range_start = range_end + 15; for (int k = (range_start - 1); k >= range_end; k--) { ASSERT_OK(sst_file_writer.Put(Key(k), Key(k))); } ExternalSstFileInfo file_info; ASSERT_OK(sst_file_writer.Finish(&file_info)); generated_files.push_back(file_name); } std::vector in_files; // These 2nd and 3rd files overlap with each other in_files = {generated_files[0], generated_files[4], generated_files[5], generated_files[7]}; ASSERT_NOK(DeprecatedAddFile(in_files)); // These 2 files don't overlap with each other in_files = {generated_files[0], generated_files[2]}; ASSERT_OK(DeprecatedAddFile(in_files)); // These 2 files don't overlap with each other but overlap with keys in DB in_files = {generated_files[3], generated_files[7]}; ASSERT_NOK(DeprecatedAddFile(in_files)); // Files don't overlap and don't overlap with DB key range in_files = {generated_files[4], generated_files[6], generated_files[8]}; ASSERT_OK(DeprecatedAddFile(in_files)); for (int i = 0; i < 100; i++) { if (i % 20 <= 14) { ASSERT_EQ(Get(Key(i)), Key(i)); } else { ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); } } } TEST_F(ExternalSSTFileTest, AddFileTrivialMoveBug) { Options options = CurrentOptions(); options.num_levels = 3; options.IncreaseParallelism(20); DestroyAndReopen(options); ASSERT_OK(GenerateAndAddExternalFile(options, {1, 4}, 1)); // L3 ASSERT_OK(GenerateAndAddExternalFile(options, {2, 3}, 2)); // L2 ASSERT_OK(GenerateAndAddExternalFile(options, {10, 14}, 3)); // L3 ASSERT_OK(GenerateAndAddExternalFile(options, {12, 13}, 4)); // L2 ASSERT_OK(GenerateAndAddExternalFile(options, {20, 24}, 5)); // L3 ASSERT_OK(GenerateAndAddExternalFile(options, {22, 23}, 6)); // L2 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "CompactionJob::Run():Start", [&](void* /*arg*/) { // fit in L3 but will overlap with compaction so will be added // to L2 but a compaction will trivially move it to L3 // and break LSM consistency static std::atomic called = {false}; if (!called) { called = true; ASSERT_OK(dbfull()->SetOptions({{"max_bytes_for_level_base", "1"}})); ASSERT_OK(GenerateAndAddExternalFile(options, {15, 16}, 7)); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); CompactRangeOptions cro; cro.exclusive_manual_compaction = false; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); dbfull()->TEST_WaitForCompact(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(ExternalSSTFileTest, CompactAddedFiles) { Options options = CurrentOptions(); options.num_levels = 3; DestroyAndReopen(options); ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, 1)); // L3 ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, 2)); // L2 ASSERT_OK(GenerateAndAddExternalFile(options, {3, 8}, 3)); // L1 ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, 4)); // L0 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); } TEST_F(ExternalSSTFileTest, SstFileWriterNonSharedKeys) { Options options = CurrentOptions(); DestroyAndReopen(options); std::string file_path = sst_files_dir_ + "/not_shared"; SstFileWriter sst_file_writer(EnvOptions(), options); std::string suffix(100, 'X'); ASSERT_OK(sst_file_writer.Open(file_path)); ASSERT_OK(sst_file_writer.Put("A" + suffix, "VAL")); ASSERT_OK(sst_file_writer.Put("BB" + suffix, "VAL")); ASSERT_OK(sst_file_writer.Put("CC" + suffix, "VAL")); ASSERT_OK(sst_file_writer.Put("CXD" + suffix, "VAL")); ASSERT_OK(sst_file_writer.Put("CZZZ" + suffix, "VAL")); ASSERT_OK(sst_file_writer.Put("ZAAAX" + suffix, "VAL")); ASSERT_OK(sst_file_writer.Finish()); ASSERT_OK(DeprecatedAddFile({file_path})); } TEST_F(ExternalSSTFileTest, WithUnorderedWrite) { SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL", "ExternalSSTFileTest::WithUnorderedWrite:WaitWriteWAL"}, {"DBImpl::WaitForPendingWrites:BeforeBlock", "DBImpl::WriteImpl:BeforeUnorderedWriteMemtable"}}); SyncPoint::GetInstance()->SetCallBack( "DBImpl::IngestExternalFile:NeedFlush", [&](void* need_flush) { ASSERT_TRUE(*reinterpret_cast(need_flush)); }); Options options = CurrentOptions(); options.unordered_write = true; DestroyAndReopen(options); Put("foo", "v1"); SyncPoint::GetInstance()->EnableProcessing(); port::Thread writer([&]() { Put("bar", "v2"); }); TEST_SYNC_POINT("ExternalSSTFileTest::WithUnorderedWrite:WaitWriteWAL"); ASSERT_OK(GenerateAndAddExternalFile(options, {{"bar", "v3"}}, -1, true /* allow_global_seqno */)); ASSERT_EQ(Get("bar"), "v3"); writer.join(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) { Options options = CurrentOptions(); options.IncreaseParallelism(20); options.level0_slowdown_writes_trigger = 256; options.level0_stop_writes_trigger = 256; bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); for (int iter = 0; iter < 2; iter++) { bool write_to_memtable = (iter == 0); DestroyAndReopen(options); Random rnd(301); std::map true_data; for (int i = 0; i < 500; i++) { std::vector> random_data; for (int j = 0; j < 100; j++) { std::string k; std::string v; test::RandomString(&rnd, rnd.Next() % 20, &k); test::RandomString(&rnd, rnd.Next() % 50, &v); random_data.emplace_back(k, v); } if (write_to_memtable && rnd.OneIn(4)) { // 25% of writes go through memtable for (auto& entry : random_data) { ASSERT_OK(Put(entry.first, entry.second)); true_data[entry.first] = entry.second; } } else { ASSERT_OK(GenerateAndAddExternalFile( options, random_data, -1, true, write_global_seqno, verify_checksums_before_ingest, false, true, &true_data)); } } size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); VerifyDBFromMap(true_data, &kcnt, false); } } TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) { Options options = CurrentOptions(); options.num_levels = 5; options.disable_auto_compactions = true; DestroyAndReopen(options); std::vector> file_data; std::map true_data; // Insert 100 -> 200 into the memtable for (int i = 100; i <= 200; i++) { ASSERT_OK(Put(Key(i), "memtable")); true_data[Key(i)] = "memtable"; } // Insert 0 -> 20 using AddFile file_data.clear(); for (int i = 0; i <= 20; i++) { file_data.emplace_back(Key(i), "L4"); } bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); ASSERT_OK(GenerateAndAddExternalFile( options, file_data, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); // This file don't overlap with anything in the DB, will go to L4 ASSERT_EQ("0,0,0,0,1", FilesPerLevel()); // Insert 80 -> 130 using AddFile file_data.clear(); for (int i = 80; i <= 130; i++) { file_data.emplace_back(Key(i), "L0"); } ASSERT_OK(GenerateAndAddExternalFile( options, file_data, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); // This file overlap with the memtable, so it will flush it and add // it self to L0 ASSERT_EQ("2,0,0,0,1", FilesPerLevel()); // Insert 30 -> 50 using AddFile file_data.clear(); for (int i = 30; i <= 50; i++) { file_data.emplace_back(Key(i), "L4"); } ASSERT_OK(GenerateAndAddExternalFile( options, file_data, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); // This file don't overlap with anything in the DB and fit in L4 as well ASSERT_EQ("2,0,0,0,2", FilesPerLevel()); // Insert 10 -> 40 using AddFile file_data.clear(); for (int i = 10; i <= 40; i++) { file_data.emplace_back(Key(i), "L3"); } ASSERT_OK(GenerateAndAddExternalFile( options, file_data, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); // This file overlap with files in L4, we will ingest it in L3 ASSERT_EQ("2,0,0,1,2", FilesPerLevel()); size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); } TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) { Options options = CurrentOptions(); DestroyAndReopen(options); uint64_t entries_in_memtable; std::map true_data; for (int k : {10, 20, 40, 80}) { ASSERT_OK(Put(Key(k), "memtable")); true_data[Key(k)] = "memtable"; } db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, &entries_in_memtable); ASSERT_GE(entries_in_memtable, 1); bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); // No need for flush ASSERT_OK(GenerateAndAddExternalFile( options, {90, 100, 110}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, &entries_in_memtable); ASSERT_GE(entries_in_memtable, 1); // This file will flush the memtable ASSERT_OK(GenerateAndAddExternalFile( options, {19, 20, 21}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, &entries_in_memtable); ASSERT_EQ(entries_in_memtable, 0); for (int k : {200, 201, 205, 206}) { ASSERT_OK(Put(Key(k), "memtable")); true_data[Key(k)] = "memtable"; } db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, &entries_in_memtable); ASSERT_GE(entries_in_memtable, 1); // No need for flush, this file keys fit between the memtable keys ASSERT_OK(GenerateAndAddExternalFile( options, {202, 203, 204}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, &entries_in_memtable); ASSERT_GE(entries_in_memtable, 1); // This file will flush the memtable ASSERT_OK(GenerateAndAddExternalFile( options, {206, 207}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false, &true_data)); db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable, &entries_in_memtable); ASSERT_EQ(entries_in_memtable, 0); size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); } TEST_P(ExternalSSTFileTest, L0SortingIssue) { Options options = CurrentOptions(); options.num_levels = 2; DestroyAndReopen(options); std::map true_data; ASSERT_OK(Put(Key(1), "memtable")); ASSERT_OK(Put(Key(10), "memtable")); bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); // No Flush needed, No global seqno needed, Ingest in L1 ASSERT_OK( GenerateAndAddExternalFile(options, {7, 8}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false)); // No Flush needed, but need a global seqno, Ingest in L0 ASSERT_OK( GenerateAndAddExternalFile(options, {7, 8}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, false)); printf("%s\n", FilesPerLevel().c_str()); // Overwrite what we added using external files ASSERT_OK(Put(Key(7), "memtable")); ASSERT_OK(Put(Key(8), "memtable")); // Read values from memtable ASSERT_EQ(Get(Key(7)), "memtable"); ASSERT_EQ(Get(Key(8)), "memtable"); // Flush and read from L0 ASSERT_OK(Flush()); printf("%s\n", FilesPerLevel().c_str()); ASSERT_EQ(Get(Key(7)), "memtable"); ASSERT_EQ(Get(Key(8)), "memtable"); } TEST_F(ExternalSSTFileTest, CompactionDeadlock) { Options options = CurrentOptions(); options.num_levels = 2; options.level0_file_num_compaction_trigger = 4; options.level0_slowdown_writes_trigger = 4; options.level0_stop_writes_trigger = 4; DestroyAndReopen(options); // atomic conter of currently running bg threads std::atomic running_threads(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::DelayWrite:Wait", "ExternalSSTFileTest::DeadLock:0"}, {"ExternalSSTFileTest::DeadLock:1", "DBImpl::AddFile:Start"}, {"DBImpl::AddFile:MutexLock", "ExternalSSTFileTest::DeadLock:2"}, {"ExternalSSTFileTest::DeadLock:3", "BackgroundCallCompaction:0"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Start ingesting and extrnal file in the background ROCKSDB_NAMESPACE::port::Thread bg_ingest_file([&]() { running_threads += 1; ASSERT_OK(GenerateAndAddExternalFile(options, {5, 6})); running_threads -= 1; }); ASSERT_OK(Put(Key(1), "memtable")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(2), "memtable")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(3), "memtable")); ASSERT_OK(Flush()); ASSERT_OK(Put(Key(4), "memtable")); ASSERT_OK(Flush()); // This thread will try to insert into the memtable but since we have 4 L0 // files this thread will be blocked and hold the writer thread ROCKSDB_NAMESPACE::port::Thread bg_block_put([&]() { running_threads += 1; ASSERT_OK(Put(Key(10), "memtable")); running_threads -= 1; }); // Make sure DelayWrite is called first TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:0"); // `DBImpl::AddFile:Start` will wait until we be here TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:1"); // Wait for IngestExternalFile() to start and aquire mutex TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:2"); // Now let compaction start TEST_SYNC_POINT("ExternalSSTFileTest::DeadLock:3"); // Wait for max 5 seconds, if we did not finish all bg threads // then we hit the deadlock bug for (int i = 0; i < 10; i++) { if (running_threads.load() == 0) { break; } env_->SleepForMicroseconds(500000); } ASSERT_EQ(running_threads.load(), 0); bg_ingest_file.join(); bg_block_put.join(); } TEST_F(ExternalSSTFileTest, DirtyExit) { Options options = CurrentOptions(); DestroyAndReopen(options); std::string file_path = sst_files_dir_ + "/dirty_exit"; std::unique_ptr sst_file_writer; // Destruct SstFileWriter without calling Finish() sst_file_writer.reset(new SstFileWriter(EnvOptions(), options)); ASSERT_OK(sst_file_writer->Open(file_path)); sst_file_writer.reset(); // Destruct SstFileWriter with a failing Finish sst_file_writer.reset(new SstFileWriter(EnvOptions(), options)); ASSERT_OK(sst_file_writer->Open(file_path)); ASSERT_NOK(sst_file_writer->Finish()); } TEST_F(ExternalSSTFileTest, FileWithCFInfo) { Options options = CurrentOptions(); CreateAndReopenWithCF({"koko", "toto"}, options); SstFileWriter sfw_default(EnvOptions(), options, handles_[0]); SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); SstFileWriter sfw_cf2(EnvOptions(), options, handles_[2]); SstFileWriter sfw_unknown(EnvOptions(), options); // default_cf.sst const std::string cf_default_sst = sst_files_dir_ + "/default_cf.sst"; ASSERT_OK(sfw_default.Open(cf_default_sst)); ASSERT_OK(sfw_default.Put("K1", "V1")); ASSERT_OK(sfw_default.Put("K2", "V2")); ASSERT_OK(sfw_default.Finish()); // cf1.sst const std::string cf1_sst = sst_files_dir_ + "/cf1.sst"; ASSERT_OK(sfw_cf1.Open(cf1_sst)); ASSERT_OK(sfw_cf1.Put("K3", "V1")); ASSERT_OK(sfw_cf1.Put("K4", "V2")); ASSERT_OK(sfw_cf1.Finish()); // cf_unknown.sst const std::string unknown_sst = sst_files_dir_ + "/cf_unknown.sst"; ASSERT_OK(sfw_unknown.Open(unknown_sst)); ASSERT_OK(sfw_unknown.Put("K5", "V1")); ASSERT_OK(sfw_unknown.Put("K6", "V2")); ASSERT_OK(sfw_unknown.Finish()); IngestExternalFileOptions ifo; // SST CF don't match ASSERT_NOK(db_->IngestExternalFile(handles_[0], {cf1_sst}, ifo)); // SST CF don't match ASSERT_NOK(db_->IngestExternalFile(handles_[2], {cf1_sst}, ifo)); // SST CF match ASSERT_OK(db_->IngestExternalFile(handles_[1], {cf1_sst}, ifo)); // SST CF don't match ASSERT_NOK(db_->IngestExternalFile(handles_[1], {cf_default_sst}, ifo)); // SST CF don't match ASSERT_NOK(db_->IngestExternalFile(handles_[2], {cf_default_sst}, ifo)); // SST CF match ASSERT_OK(db_->IngestExternalFile(handles_[0], {cf_default_sst}, ifo)); // SST CF unknown ASSERT_OK(db_->IngestExternalFile(handles_[1], {unknown_sst}, ifo)); // SST CF unknown ASSERT_OK(db_->IngestExternalFile(handles_[2], {unknown_sst}, ifo)); // SST CF unknown ASSERT_OK(db_->IngestExternalFile(handles_[0], {unknown_sst}, ifo)); // Cannot ingest a file into a dropped CF ASSERT_OK(db_->DropColumnFamily(handles_[1])); ASSERT_NOK(db_->IngestExternalFile(handles_[1], {unknown_sst}, ifo)); // CF was not dropped, ok to Ingest ASSERT_OK(db_->IngestExternalFile(handles_[2], {unknown_sst}, ifo)); } /* * Test and verify the functionality of ingestion_options.move_files and * ingestion_options.failed_move_fall_back_to_copy */ TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) { const bool fail_link = std::get<0>(GetParam()); const bool failed_move_fall_back_to_copy = std::get<1>(GetParam()); test_env_->set_fail_link(fail_link); const EnvOptions env_options; DestroyAndReopen(options_); const int kNumKeys = 10000; IngestExternalFileOptions ifo; ifo.move_files = true; ifo.failed_move_fall_back_to_copy = failed_move_fall_back_to_copy; std::string file_path = sst_files_dir_ + "file1.sst"; // Create SstFileWriter for default column family SstFileWriter sst_file_writer(env_options, options_); ASSERT_OK(sst_file_writer.Open(file_path)); for (int i = 0; i < kNumKeys; i++) { ASSERT_OK(sst_file_writer.Put(Key(i), Key(i) + "_value")); } ASSERT_OK(sst_file_writer.Finish()); uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(file_path, &file_size)); bool copyfile = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "ExternalSstFileIngestionJob::Prepare:CopyFile", [&](void* /* arg */) { copyfile = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); const Status s = db_->IngestExternalFile({file_path}, ifo); ColumnFamilyHandleImpl* cfh = static_cast(dbfull()->DefaultColumnFamily()); ColumnFamilyData* cfd = cfh->cfd(); const InternalStats* internal_stats_ptr = cfd->internal_stats(); const std::vector& comp_stats = internal_stats_ptr->TEST_GetCompactionStats(); uint64_t bytes_copied = 0; uint64_t bytes_moved = 0; for (const auto& stats : comp_stats) { bytes_copied += stats.bytes_written; bytes_moved += stats.bytes_moved; } if (!fail_link) { // Link operation succeeds. External SST should be moved. ASSERT_OK(s); ASSERT_EQ(0, bytes_copied); ASSERT_EQ(file_size, bytes_moved); ASSERT_FALSE(copyfile); } else { // Link operation fails. ASSERT_EQ(0, bytes_moved); if (failed_move_fall_back_to_copy) { ASSERT_OK(s); // Copy file is true since a failed link falls back to copy file. ASSERT_TRUE(copyfile); ASSERT_EQ(file_size, bytes_copied); } else { ASSERT_TRUE(s.IsNotSupported()); // Copy file is false since a failed link does not fall back to copy file. ASSERT_FALSE(copyfile); ASSERT_EQ(0, bytes_copied); } } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } class TestIngestExternalFileListener : public EventListener { public: void OnExternalFileIngested(DB* /*db*/, const ExternalFileIngestionInfo& info) override { ingested_files.push_back(info); } std::vector ingested_files; }; TEST_P(ExternalSSTFileTest, IngestionListener) { Options options = CurrentOptions(); TestIngestExternalFileListener* listener = new TestIngestExternalFileListener(); options.listeners.emplace_back(listener); CreateAndReopenWithCF({"koko", "toto"}, options); bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); // Ingest into default cf ASSERT_OK(GenerateAndAddExternalFile( options, {1, 2}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, true, nullptr, handles_[0])); ASSERT_EQ(listener->ingested_files.size(), 1); ASSERT_EQ(listener->ingested_files.back().cf_name, "default"); ASSERT_EQ(listener->ingested_files.back().global_seqno, 0); ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id, 0); ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name, "default"); // Ingest into cf1 ASSERT_OK(GenerateAndAddExternalFile( options, {1, 2}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, true, nullptr, handles_[1])); ASSERT_EQ(listener->ingested_files.size(), 2); ASSERT_EQ(listener->ingested_files.back().cf_name, "koko"); ASSERT_EQ(listener->ingested_files.back().global_seqno, 0); ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id, 1); ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name, "koko"); // Ingest into cf2 ASSERT_OK(GenerateAndAddExternalFile( options, {1, 2}, -1, true, write_global_seqno, verify_checksums_before_ingest, false, true, nullptr, handles_[2])); ASSERT_EQ(listener->ingested_files.size(), 3); ASSERT_EQ(listener->ingested_files.back().cf_name, "toto"); ASSERT_EQ(listener->ingested_files.back().global_seqno, 0); ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_id, 2); ASSERT_EQ(listener->ingested_files.back().table_properties.column_family_name, "toto"); } TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) { Options options = CurrentOptions(); DestroyAndReopen(options); const int kNumKeys = 10000; // Insert keys using normal path and take a snapshot for (int i = 0; i < kNumKeys; i++) { ASSERT_OK(Put(Key(i), Key(i) + "_V1")); } const Snapshot* snap = db_->GetSnapshot(); // Overwrite all keys using IngestExternalFile std::string sst_file_path = sst_files_dir_ + "file1.sst"; SstFileWriter sst_file_writer(EnvOptions(), options); ASSERT_OK(sst_file_writer.Open(sst_file_path)); for (int i = 0; i < kNumKeys; i++) { ASSERT_OK(sst_file_writer.Put(Key(i), Key(i) + "_V2")); } ASSERT_OK(sst_file_writer.Finish()); IngestExternalFileOptions ifo; ifo.move_files = true; ASSERT_OK(db_->IngestExternalFile({sst_file_path}, ifo)); for (int i = 0; i < kNumKeys; i++) { ASSERT_EQ(Get(Key(i), snap), Key(i) + "_V1"); ASSERT_EQ(Get(Key(i)), Key(i) + "_V2"); } db_->ReleaseSnapshot(snap); } TEST_P(ExternalSSTFileTest, IngestBehind) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; options.num_levels = 3; options.disable_auto_compactions = false; DestroyAndReopen(options); std::vector> file_data; std::map true_data; // Insert 100 -> 200 into the memtable for (int i = 100; i <= 200; i++) { ASSERT_OK(Put(Key(i), "memtable")); true_data[Key(i)] = "memtable"; } // Insert 100 -> 200 using IngestExternalFile file_data.clear(); for (int i = 0; i <= 20; i++) { file_data.emplace_back(Key(i), "ingest_behind"); } bool allow_global_seqno = true; bool ingest_behind = true; bool write_global_seqno = std::get<0>(GetParam()); bool verify_checksums_before_ingest = std::get<1>(GetParam()); // Can't ingest behind since allow_ingest_behind isn't set to true ASSERT_NOK(GenerateAndAddExternalFile( options, file_data, -1, allow_global_seqno, write_global_seqno, verify_checksums_before_ingest, ingest_behind, false /*sort_data*/, &true_data)); options.allow_ingest_behind = true; // check that we still can open the DB, as num_levels should be // sanitized to 3 options.num_levels = 2; DestroyAndReopen(options); options.num_levels = 3; DestroyAndReopen(options); // Insert 100 -> 200 into the memtable for (int i = 100; i <= 200; i++) { ASSERT_OK(Put(Key(i), "memtable")); true_data[Key(i)] = "memtable"; } db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); // Universal picker should go at second from the bottom level ASSERT_EQ("0,1", FilesPerLevel()); ASSERT_OK(GenerateAndAddExternalFile( options, file_data, -1, allow_global_seqno, write_global_seqno, verify_checksums_before_ingest, true /*ingest_behind*/, false /*sort_data*/, &true_data)); ASSERT_EQ("0,1,1", FilesPerLevel()); // this time ingest should fail as the file doesn't fit to the bottom level ASSERT_NOK(GenerateAndAddExternalFile( options, file_data, -1, allow_global_seqno, write_global_seqno, verify_checksums_before_ingest, true /*ingest_behind*/, false /*sort_data*/, &true_data)); ASSERT_EQ("0,1,1", FilesPerLevel()); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); // bottom level should be empty ASSERT_EQ("0,1", FilesPerLevel()); size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); } TEST_F(ExternalSSTFileTest, SkipBloomFilter) { Options options = CurrentOptions(); BlockBasedTableOptions table_options; table_options.filter_policy.reset(NewBloomFilterPolicy(10)); table_options.cache_index_and_filter_blocks = true; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); // Create external SST file and include bloom filters options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); { std::string file_path = sst_files_dir_ + "sst_with_bloom.sst"; SstFileWriter sst_file_writer(EnvOptions(), options); ASSERT_OK(sst_file_writer.Open(file_path)); ASSERT_OK(sst_file_writer.Put("Key1", "Value1")); ASSERT_OK(sst_file_writer.Finish()); ASSERT_OK( db_->IngestExternalFile({file_path}, IngestExternalFileOptions())); ASSERT_EQ(Get("Key1"), "Value1"); ASSERT_GE( options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 1); } // Create external SST file but skip bloom filters options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); DestroyAndReopen(options); { std::string file_path = sst_files_dir_ + "sst_with_no_bloom.sst"; SstFileWriter sst_file_writer(EnvOptions(), options, nullptr, true, Env::IOPriority::IO_TOTAL, true /* skip_filters */); ASSERT_OK(sst_file_writer.Open(file_path)); ASSERT_OK(sst_file_writer.Put("Key1", "Value1")); ASSERT_OK(sst_file_writer.Finish()); ASSERT_OK( db_->IngestExternalFile({file_path}, IngestExternalFileOptions())); ASSERT_EQ(Get("Key1"), "Value1"); ASSERT_EQ( options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 0); } } TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) { if (!ZSTD_Supported()) { return; } const int kNumEntries = 1 << 10; const int kNumBytesPerEntry = 1 << 10; Options options = CurrentOptions(); options.compression = kZSTD; options.compression_opts.max_dict_bytes = 1 << 14; // 16KB options.compression_opts.zstd_max_train_bytes = 1 << 18; // 256KB DestroyAndReopen(options); std::atomic num_compression_dicts(0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "BlockBasedTableBuilder::WriteCompressionDictBlock:RawDict", [&](void* /* arg */) { ++num_compression_dicts; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Random rnd(301); std::vector> random_data; for (int i = 0; i < kNumEntries; i++) { std::string val; test::RandomString(&rnd, kNumBytesPerEntry, &val); random_data.emplace_back(Key(i), std::move(val)); } ASSERT_OK(GenerateAndAddExternalFile(options, std::move(random_data))); ASSERT_EQ(1, num_compression_dicts); } TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_Success) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); Options options = CurrentOptions(); options.env = fault_injection_env.get(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); std::vector column_families; column_families.push_back(handles_[0]); column_families.push_back(handles_[1]); column_families.push_back(handles_[2]); std::vector ifos(column_families.size()); for (auto& ifo : ifos) { ifo.allow_global_seqno = true; // Always allow global_seqno // May or may not write global_seqno ifo.write_global_seqno = std::get<0>(GetParam()); // Whether to verify checksums before ingestion ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); } std::vector>> data; data.push_back( {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); data.push_back( {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); data.push_back( {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); // Resize the true_data vector upon construction to avoid re-alloc std::vector> true_data( column_families.size()); Status s = GenerateAndAddExternalFiles(options, column_families, ifos, data, -1, true, true_data); ASSERT_OK(s); Close(); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, options); ASSERT_EQ(3, handles_.size()); int cf = 0; for (const auto& verify_map : true_data) { for (const auto& elem : verify_map) { const std::string& key = elem.first; const std::string& value = elem.second; ASSERT_EQ(value, Get(cf, key)); } ++cf; } Close(); Destroy(options, true /* delete_cf_paths */); } TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_NoMixedStateWithSnapshot) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::IngestExternalFiles:InstallSVForFirstCF:0", "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" "BeforeRead"}, {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" "AfterRead", "DBImpl::IngestExternalFiles:InstallSVForFirstCF:1"}, }); SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.env = fault_injection_env.get(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); const std::vector> data_before_ingestion = {{{"foo1", "fv1_0"}, {"foo2", "fv2_0"}, {"foo3", "fv3_0"}}, {{"bar1", "bv1_0"}, {"bar2", "bv2_0"}, {"bar3", "bv3_0"}}, {{"bar4", "bv4_0"}, {"bar5", "bv5_0"}, {"bar6", "bv6_0"}}}; for (size_t i = 0; i != handles_.size(); ++i) { int cf = static_cast(i); const auto& orig_data = data_before_ingestion[i]; for (const auto& kv : orig_data) { ASSERT_OK(Put(cf, kv.first, kv.second)); } ASSERT_OK(Flush(cf)); } std::vector column_families; column_families.push_back(handles_[0]); column_families.push_back(handles_[1]); column_families.push_back(handles_[2]); std::vector ifos(column_families.size()); for (auto& ifo : ifos) { ifo.allow_global_seqno = true; // Always allow global_seqno // May or may not write global_seqno ifo.write_global_seqno = std::get<0>(GetParam()); // Whether to verify checksums before ingestion ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); } std::vector>> data; data.push_back( {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); data.push_back( {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); data.push_back( {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); // Resize the true_data vector upon construction to avoid re-alloc std::vector> true_data( column_families.size()); // Take snapshot before ingestion starts ReadOptions read_opts; read_opts.total_order_seek = true; read_opts.snapshot = dbfull()->GetSnapshot(); std::vector iters(handles_.size()); // Range scan checks first kv of each CF before ingestion starts. for (size_t i = 0; i != handles_.size(); ++i) { iters[i] = dbfull()->NewIterator(read_opts, handles_[i]); iters[i]->SeekToFirst(); ASSERT_TRUE(iters[i]->Valid()); const std::string& key = iters[i]->key().ToString(); const std::string& value = iters[i]->value().ToString(); const std::map& orig_data = data_before_ingestion[i]; std::map::const_iterator it = orig_data.find(key); ASSERT_NE(orig_data.end(), it); ASSERT_EQ(it->second, value); iters[i]->Next(); } port::Thread ingest_thread([&]() { ASSERT_OK(GenerateAndAddExternalFiles(options, column_families, ifos, data, -1, true, true_data)); }); TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" "BeforeRead"); // Should see only data before ingestion for (size_t i = 0; i != handles_.size(); ++i) { const auto& orig_data = data_before_ingestion[i]; for (; iters[i]->Valid(); iters[i]->Next()) { const std::string& key = iters[i]->key().ToString(); const std::string& value = iters[i]->value().ToString(); std::map::const_iterator it = orig_data.find(key); ASSERT_NE(orig_data.end(), it); ASSERT_EQ(it->second, value); } } TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_MixedState:" "AfterRead"); ingest_thread.join(); for (auto* iter : iters) { delete iter; } iters.clear(); dbfull()->ReleaseSnapshot(read_opts.snapshot); Close(); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, options); // Should see consistent state after ingestion for all column families even // without snapshot. ASSERT_EQ(3, handles_.size()); int cf = 0; for (const auto& verify_map : true_data) { for (const auto& elem : verify_map) { const std::string& key = elem.first; const std::string& value = elem.second; ASSERT_EQ(value, Get(cf, key)); } ++cf; } Close(); Destroy(options, true /* delete_cf_paths */); } TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_PrepareFail) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); Options options = CurrentOptions(); options.env = fault_injection_env.get(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::IngestExternalFiles:BeforeLastJobPrepare:0", "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_PrepareFail:" "0"}, {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies:PrepareFail:" "1", "DBImpl::IngestExternalFiles:BeforeLastJobPrepare:1"}, }); SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); std::vector column_families; column_families.push_back(handles_[0]); column_families.push_back(handles_[1]); column_families.push_back(handles_[2]); std::vector ifos(column_families.size()); for (auto& ifo : ifos) { ifo.allow_global_seqno = true; // Always allow global_seqno // May or may not write global_seqno ifo.write_global_seqno = std::get<0>(GetParam()); // Whether to verify block checksums before ingest ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); } std::vector>> data; data.push_back( {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); data.push_back( {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); data.push_back( {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); // Resize the true_data vector upon construction to avoid re-alloc std::vector> true_data( column_families.size()); port::Thread ingest_thread([&]() { Status s = GenerateAndAddExternalFiles(options, column_families, ifos, data, -1, true, true_data); ASSERT_NOK(s); }); TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_PrepareFail:" "0"); fault_injection_env->SetFilesystemActive(false); TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies:PrepareFail:" "1"); ingest_thread.join(); fault_injection_env->SetFilesystemActive(true); Close(); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, options); ASSERT_EQ(3, handles_.size()); int cf = 0; for (const auto& verify_map : true_data) { for (const auto& elem : verify_map) { const std::string& key = elem.first; ASSERT_EQ("NOT_FOUND", Get(cf, key)); } ++cf; } Close(); Destroy(options, true /* delete_cf_paths */); } TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_CommitFail) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); Options options = CurrentOptions(); options.env = fault_injection_env.get(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::IngestExternalFiles:BeforeJobsRun:0", "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" "0"}, {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" "1", "DBImpl::IngestExternalFiles:BeforeJobsRun:1"}, }); SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); std::vector column_families; column_families.push_back(handles_[0]); column_families.push_back(handles_[1]); column_families.push_back(handles_[2]); std::vector ifos(column_families.size()); for (auto& ifo : ifos) { ifo.allow_global_seqno = true; // Always allow global_seqno // May or may not write global_seqno ifo.write_global_seqno = std::get<0>(GetParam()); // Whether to verify block checksums before ingestion ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); } std::vector>> data; data.push_back( {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); data.push_back( {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); data.push_back( {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); // Resize the true_data vector upon construction to avoid re-alloc std::vector> true_data( column_families.size()); port::Thread ingest_thread([&]() { Status s = GenerateAndAddExternalFiles(options, column_families, ifos, data, -1, true, true_data); ASSERT_NOK(s); }); TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" "0"); fault_injection_env->SetFilesystemActive(false); TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_CommitFail:" "1"); ingest_thread.join(); fault_injection_env->SetFilesystemActive(true); Close(); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, options); ASSERT_EQ(3, handles_.size()); int cf = 0; for (const auto& verify_map : true_data) { for (const auto& elem : verify_map) { const std::string& key = elem.first; ASSERT_EQ("NOT_FOUND", Get(cf, key)); } ++cf; } Close(); Destroy(options, true /* delete_cf_paths */); } TEST_P(ExternalSSTFileTest, IngestFilesIntoMultipleColumnFamilies_PartialManifestWriteFail) { std::unique_ptr fault_injection_env( new FaultInjectionTestEnv(env_)); Options options = CurrentOptions(); options.env = fault_injection_env.get(); CreateAndReopenWithCF({"pikachu", "eevee"}, options); SyncPoint::GetInstance()->ClearTrace(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->LoadDependency({ {"VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0", "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" "PartialManifestWriteFail:0"}, {"ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" "PartialManifestWriteFail:1", "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:1"}, }); SyncPoint::GetInstance()->EnableProcessing(); std::vector column_families; column_families.push_back(handles_[0]); column_families.push_back(handles_[1]); column_families.push_back(handles_[2]); std::vector ifos(column_families.size()); for (auto& ifo : ifos) { ifo.allow_global_seqno = true; // Always allow global_seqno // May or may not write global_seqno ifo.write_global_seqno = std::get<0>(GetParam()); // Whether to verify block checksums before ingestion ifo.verify_checksums_before_ingest = std::get<1>(GetParam()); } std::vector>> data; data.push_back( {std::make_pair("foo1", "fv1"), std::make_pair("foo2", "fv2")}); data.push_back( {std::make_pair("bar1", "bv1"), std::make_pair("bar2", "bv2")}); data.push_back( {std::make_pair("bar3", "bv3"), std::make_pair("bar4", "bv4")}); // Resize the true_data vector upon construction to avoid re-alloc std::vector> true_data( column_families.size()); port::Thread ingest_thread([&]() { Status s = GenerateAndAddExternalFiles(options, column_families, ifos, data, -1, true, true_data); ASSERT_NOK(s); }); TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" "PartialManifestWriteFail:0"); fault_injection_env->SetFilesystemActive(false); TEST_SYNC_POINT( "ExternalSSTFileTest::IngestFilesIntoMultipleColumnFamilies_" "PartialManifestWriteFail:1"); ingest_thread.join(); fault_injection_env->DropUnsyncedFileData(); fault_injection_env->SetFilesystemActive(true); Close(); ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, options); ASSERT_EQ(3, handles_.size()); int cf = 0; for (const auto& verify_map : true_data) { for (const auto& elem : verify_map) { const std::string& key = elem.first; ASSERT_EQ("NOT_FOUND", Get(cf, key)); } ++cf; } Close(); Destroy(options, true /* delete_cf_paths */); } TEST_P(ExternalSSTFileTest, IngestFilesTriggerFlushingWithTwoWriteQueue) { Options options = CurrentOptions(); // Use large buffer to avoid memtable flush options.write_buffer_size = 1024 * 1024; options.two_write_queues = true; DestroyAndReopen(options); ASSERT_OK(dbfull()->Put(WriteOptions(), "1000", "v1")); ASSERT_OK(dbfull()->Put(WriteOptions(), "1001", "v1")); ASSERT_OK(dbfull()->Put(WriteOptions(), "9999", "v1")); // Put one key which is overlap with keys in memtable. // It will trigger flushing memtable and require this thread is // currently at the front of the 2nd writer queue. We must make // sure that it won't enter the 2nd writer queue for the second time. std::vector> data; data.push_back(std::make_pair("1001", "v2")); GenerateAndAddExternalFile(options, data); } TEST_P(ExternalSSTFileTest, DeltaEncodingWhileGlobalSeqnoPresent) { Options options = CurrentOptions(); DestroyAndReopen(options); constexpr size_t kValueSize = 8; Random rnd(301); std::string value(RandomString(&rnd, kValueSize)); // Write some key to make global seqno larger than zero for (int i = 0; i < 10; i++) { ASSERT_OK(Put("ab" + Key(i), value)); } // Get a Snapshot to make RocksDB assign global seqno to ingested sst files. auto snap = dbfull()->GetSnapshot(); std::string fname = sst_files_dir_ + "test_file"; rocksdb::SstFileWriter writer(EnvOptions(), options); ASSERT_OK(writer.Open(fname)); std::string key1 = "ab"; std::string key2 = "ab"; // Make the prefix of key2 is same with key1 add zero seqno. The tail of every // key is composed as (seqno << 8 | value_type), and here `1` represents // ValueType::kTypeValue PutFixed64(&key2, PackSequenceAndType(0, kTypeValue)); key2 += "cdefghijkl"; ASSERT_OK(writer.Put(key1, value)); ASSERT_OK(writer.Put(key2, value)); ExternalSstFileInfo info; ASSERT_OK(writer.Finish(&info)); ASSERT_OK(dbfull()->IngestExternalFile({info.file_path}, IngestExternalFileOptions())); dbfull()->ReleaseSnapshot(snap); ASSERT_EQ(value, Get(key1)); // You will get error here ASSERT_EQ(value, Get(key2)); } TEST_P(ExternalSSTFileTest, DeltaEncodingWhileGlobalSeqnoPresentIteratorSwitch) { // Regression test for bug where global seqno corrupted the shared bytes // buffer when switching from reverse iteration to forward iteration. constexpr size_t kValueSize = 8; Options options = CurrentOptions(); Random rnd(301); std::string value(RandomString(&rnd, kValueSize)); std::string key0 = "aa"; std::string key1 = "ab"; // Make the prefix of key2 is same with key1 add zero seqno. The tail of every // key is composed as (seqno << 8 | value_type), and here `1` represents // ValueType::kTypeValue std::string key2 = "ab"; PutFixed64(&key2, PackSequenceAndType(0, kTypeValue)); key2 += "cdefghijkl"; std::string key3 = key2 + "_"; // Write some key to make global seqno larger than zero ASSERT_OK(Put(key0, value)); std::string fname = sst_files_dir_ + "test_file"; rocksdb::SstFileWriter writer(EnvOptions(), options); ASSERT_OK(writer.Open(fname)); // key0 is a dummy to ensure the turnaround point (key1) comes from Prev // cache rather than block (restart keys are pinned in block). ASSERT_OK(writer.Put(key0, value)); ASSERT_OK(writer.Put(key1, value)); ASSERT_OK(writer.Put(key2, value)); ASSERT_OK(writer.Put(key3, value)); ExternalSstFileInfo info; ASSERT_OK(writer.Finish(&info)); ASSERT_OK(dbfull()->IngestExternalFile({info.file_path}, IngestExternalFileOptions())); ReadOptions read_opts; // Prevents Seek() when switching directions, which circumvents the bug. read_opts.total_order_seek = true; Iterator* iter = db_->NewIterator(read_opts); // Scan backwards to key2. File iterator will then be positioned at key1. iter->Seek(key3); ASSERT_EQ(key3, iter->key()); iter->Prev(); ASSERT_EQ(key2, iter->key()); // Scan forwards and make sure key3 is present. Previously key3 would be // corrupted by the global seqno from key1. iter->Next(); ASSERT_EQ(key3, iter->key()); delete iter; } INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest, testing::Values(std::make_tuple(false, false), std::make_tuple(false, true), std::make_tuple(true, false), std::make_tuple(true, true))); INSTANTIATE_TEST_CASE_P(ExternSSTFileLinkFailFallbackTest, ExternSSTFileLinkFailFallbackTest, testing::Values(std::make_tuple(true, false), std::make_tuple(true, true), std::make_tuple(false, false))); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as External SST File Writer and Ingestion are not supported " "in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/fault_injection_test.cc000066400000000000000000000422431370372246700203620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright 2014 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // This test uses a custom Env to keep track of the state of a filesystem as of // the last "sync". It then checks for data loss errors by purposely dropping // file data (or entire files) not protected by a "sync". #include "db/db_impl/db_impl.h" #include "db/log_format.h" #include "db/version_set.h" #include "env/mock_env.h" #include "file/filename.h" #include "logging/logging.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/table.h" #include "rocksdb/write_batch.h" #include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { static const int kValueSize = 1000; static const int kMaxNumValues = 2000; static const size_t kNumIterations = 3; enum FaultInjectionOptionConfig { kDefault, kDifferentDataDir, kWalDir, kSyncWal, kWalDirSyncWal, kMultiLevels, kEnd, }; class FaultInjectionTest : public testing::Test, public testing::WithParamInterface> { protected: int option_config_; int non_inclusive_end_range_; // kEnd or equivalent to that // When need to make sure data is persistent, sync WAL bool sync_use_wal_; // When need to make sure data is persistent, call DB::CompactRange() bool sync_use_compact_; bool sequential_order_; protected: public: enum ExpectedVerifResult { kValExpectFound, kValExpectNoError }; enum ResetMethod { kResetDropUnsyncedData, kResetDropRandomUnsyncedData, kResetDeleteUnsyncedFiles, kResetDropAndDeleteUnsynced }; std::unique_ptr base_env_; FaultInjectionTestEnv* env_; std::string dbname_; std::shared_ptr tiny_cache_; Options options_; DB* db_; FaultInjectionTest() : option_config_(std::get<1>(GetParam())), non_inclusive_end_range_(std::get<2>(GetParam())), sync_use_wal_(false), sync_use_compact_(true), base_env_(nullptr), env_(nullptr), db_(nullptr) {} ~FaultInjectionTest() override { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } bool ChangeOptions() { option_config_++; if (option_config_ >= non_inclusive_end_range_) { return false; } else { if (option_config_ == kMultiLevels) { base_env_.reset(new MockEnv(Env::Default())); } return true; } } // Return the current option configuration. Options CurrentOptions() { sync_use_wal_ = false; sync_use_compact_ = true; Options options; switch (option_config_) { case kWalDir: options.wal_dir = test::PerThreadDBPath(env_, "fault_test_wal"); break; case kDifferentDataDir: options.db_paths.emplace_back( test::PerThreadDBPath(env_, "fault_test_data"), 1000000U); break; case kSyncWal: sync_use_wal_ = true; sync_use_compact_ = false; break; case kWalDirSyncWal: options.wal_dir = test::PerThreadDBPath(env_, "/fault_test_wal"); sync_use_wal_ = true; sync_use_compact_ = false; break; case kMultiLevels: options.write_buffer_size = 64 * 1024; options.target_file_size_base = 64 * 1024; options.level0_file_num_compaction_trigger = 2; options.level0_slowdown_writes_trigger = 2; options.level0_stop_writes_trigger = 4; options.max_bytes_for_level_base = 128 * 1024; options.max_write_buffer_number = 2; options.max_background_compactions = 8; options.max_background_flushes = 8; sync_use_wal_ = true; sync_use_compact_ = false; break; default: break; } return options; } Status NewDB() { assert(db_ == nullptr); assert(tiny_cache_ == nullptr); assert(env_ == nullptr); env_ = new FaultInjectionTestEnv(base_env_ ? base_env_.get() : Env::Default()); options_ = CurrentOptions(); options_.env = env_; options_.paranoid_checks = true; BlockBasedTableOptions table_options; tiny_cache_ = NewLRUCache(100); table_options.block_cache = tiny_cache_; options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); dbname_ = test::PerThreadDBPath("fault_test"); EXPECT_OK(DestroyDB(dbname_, options_)); options_.create_if_missing = true; Status s = OpenDB(); options_.create_if_missing = false; return s; } void SetUp() override { sequential_order_ = std::get<0>(GetParam()); ASSERT_OK(NewDB()); } void TearDown() override { CloseDB(); Status s = DestroyDB(dbname_, options_); delete env_; env_ = nullptr; tiny_cache_.reset(); ASSERT_OK(s); } void Build(const WriteOptions& write_options, int start_idx, int num_vals) { std::string key_space, value_space; WriteBatch batch; for (int i = start_idx; i < start_idx + num_vals; i++) { Slice key = Key(i, &key_space); batch.Clear(); batch.Put(key, Value(i, &value_space)); ASSERT_OK(db_->Write(write_options, &batch)); } } Status ReadValue(int i, std::string* val) const { std::string key_space, value_space; Slice key = Key(i, &key_space); Value(i, &value_space); ReadOptions options; return db_->Get(options, key, val); } Status Verify(int start_idx, int num_vals, ExpectedVerifResult expected) const { std::string val; std::string value_space; Status s; for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) { Value(i, &value_space); s = ReadValue(i, &val); if (s.ok()) { EXPECT_EQ(value_space, val); } if (expected == kValExpectFound) { if (!s.ok()) { fprintf(stderr, "Error when read %dth record (expect found): %s\n", i, s.ToString().c_str()); return s; } } else if (!s.ok() && !s.IsNotFound()) { fprintf(stderr, "Error when read %dth record: %s\n", i, s.ToString().c_str()); return s; } } return Status::OK(); } // Return the ith key Slice Key(int i, std::string* storage) const { unsigned long long num = i; if (!sequential_order_) { // random transfer const int m = 0x5bd1e995; num *= m; num ^= num << 24; } char buf[100]; snprintf(buf, sizeof(buf), "%016d", static_cast(num)); storage->assign(buf, strlen(buf)); return Slice(*storage); } // Return the value to associate with the specified key Slice Value(int k, std::string* storage) const { Random r(k); return test::RandomString(&r, kValueSize, storage); } void CloseDB() { delete db_; db_ = nullptr; } Status OpenDB() { CloseDB(); env_->ResetState(); Status s = DB::Open(options_, dbname_, &db_); assert(db_ != nullptr); return s; } void DeleteAllData() { Iterator* iter = db_->NewIterator(ReadOptions()); WriteOptions options; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ASSERT_OK(db_->Delete(WriteOptions(), iter->key())); } delete iter; FlushOptions flush_options; flush_options.wait = true; db_->Flush(flush_options); } // rnd cannot be null for kResetDropRandomUnsyncedData void ResetDBState(ResetMethod reset_method, Random* rnd = nullptr) { env_->AssertNoOpenFile(); switch (reset_method) { case kResetDropUnsyncedData: ASSERT_OK(env_->DropUnsyncedFileData()); break; case kResetDropRandomUnsyncedData: ASSERT_OK(env_->DropRandomUnsyncedFileData(rnd)); break; case kResetDeleteUnsyncedFiles: ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync()); break; case kResetDropAndDeleteUnsynced: ASSERT_OK(env_->DropUnsyncedFileData()); ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync()); break; default: assert(false); } } void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) { DeleteAllData(); WriteOptions write_options; write_options.sync = sync_use_wal_; Build(write_options, 0, num_pre_sync); if (sync_use_compact_) { db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); } write_options.sync = false; Build(write_options, num_pre_sync, num_post_sync); } void PartialCompactTestReopenWithFault(ResetMethod reset_method, int num_pre_sync, int num_post_sync, Random* rnd = nullptr) { env_->SetFilesystemActive(false); CloseDB(); ResetDBState(reset_method, rnd); ASSERT_OK(OpenDB()); ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound)); ASSERT_OK(Verify(num_pre_sync, num_post_sync, FaultInjectionTest::kValExpectNoError)); WaitCompactionFinish(); ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::kValExpectFound)); ASSERT_OK(Verify(num_pre_sync, num_post_sync, FaultInjectionTest::kValExpectNoError)); } void NoWriteTestPreFault() { } void NoWriteTestReopenWithFault(ResetMethod reset_method) { CloseDB(); ResetDBState(reset_method); ASSERT_OK(OpenDB()); } void WaitCompactionFinish() { static_cast(db_->GetRootDB())->TEST_WaitForCompact(); ASSERT_OK(db_->Put(WriteOptions(), "", "")); } }; class FaultInjectionTestSplitted : public FaultInjectionTest {}; TEST_P(FaultInjectionTestSplitted, FaultTest) { do { Random rnd(301); for (size_t idx = 0; idx < kNumIterations; idx++) { int num_pre_sync = rnd.Uniform(kMaxNumValues); int num_post_sync = rnd.Uniform(kMaxNumValues); PartialCompactTestPreFault(num_pre_sync, num_post_sync); PartialCompactTestReopenWithFault(kResetDropUnsyncedData, num_pre_sync, num_post_sync); NoWriteTestPreFault(); NoWriteTestReopenWithFault(kResetDropUnsyncedData); PartialCompactTestPreFault(num_pre_sync, num_post_sync); PartialCompactTestReopenWithFault(kResetDropRandomUnsyncedData, num_pre_sync, num_post_sync, &rnd); NoWriteTestPreFault(); NoWriteTestReopenWithFault(kResetDropUnsyncedData); // Setting a separate data path won't pass the test as we don't sync // it after creating new files, PartialCompactTestPreFault(num_pre_sync, num_post_sync); PartialCompactTestReopenWithFault(kResetDropAndDeleteUnsynced, num_pre_sync, num_post_sync); NoWriteTestPreFault(); NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); PartialCompactTestPreFault(num_pre_sync, num_post_sync); // No new files created so we expect all values since no files will be // dropped. PartialCompactTestReopenWithFault(kResetDeleteUnsyncedFiles, num_pre_sync, num_post_sync); NoWriteTestPreFault(); NoWriteTestReopenWithFault(kResetDeleteUnsyncedFiles); } } while (ChangeOptions()); } // Previous log file is not fsynced if sync is forced after log rolling. TEST_P(FaultInjectionTest, WriteOptionSyncTest) { test::SleepingBackgroundTask sleeping_task_low; env_->SetBackgroundThreads(1, Env::HIGH); // Block the job queue to prevent flush job from running. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::HIGH); sleeping_task_low.WaitUntilSleeping(); WriteOptions write_options; write_options.sync = false; std::string key_space, value_space; ASSERT_OK( db_->Put(write_options, Key(1, &key_space), Value(1, &value_space))); FlushOptions flush_options; flush_options.wait = false; ASSERT_OK(db_->Flush(flush_options)); write_options.sync = true; ASSERT_OK( db_->Put(write_options, Key(2, &key_space), Value(2, &value_space))); db_->FlushWAL(false); env_->SetFilesystemActive(false); NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); ASSERT_OK(OpenDB()); std::string val; Value(2, &value_space); ASSERT_OK(ReadValue(2, &val)); ASSERT_EQ(value_space, val); Value(1, &value_space); ASSERT_OK(ReadValue(1, &val)); ASSERT_EQ(value_space, val); } TEST_P(FaultInjectionTest, UninstalledCompaction) { options_.target_file_size_base = 32 * 1024; options_.write_buffer_size = 100 << 10; // 100KB options_.level0_file_num_compaction_trigger = 6; options_.level0_stop_writes_trigger = 1 << 10; options_.level0_slowdown_writes_trigger = 1 << 10; options_.max_background_compactions = 1; OpenDB(); if (!sequential_order_) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"FaultInjectionTest::FaultTest:0", "DBImpl::BGWorkCompaction"}, {"CompactionJob::Run():End", "FaultInjectionTest::FaultTest:1"}, {"FaultInjectionTest::FaultTest:2", "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}, }); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); int kNumKeys = 1000; Build(WriteOptions(), 0, kNumKeys); FlushOptions flush_options; flush_options.wait = true; db_->Flush(flush_options); ASSERT_OK(db_->Put(WriteOptions(), "", "")); TEST_SYNC_POINT("FaultInjectionTest::FaultTest:0"); TEST_SYNC_POINT("FaultInjectionTest::FaultTest:1"); env_->SetFilesystemActive(false); TEST_SYNC_POINT("FaultInjectionTest::FaultTest:2"); CloseDB(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ResetDBState(kResetDropUnsyncedData); std::atomic opened(false); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::Open:Opened", [&](void* /*arg*/) { opened.store(true); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BGWorkCompaction", [&](void* /*arg*/) { ASSERT_TRUE(opened.load()); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_OK(OpenDB()); ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound)); WaitCompactionFinish(); ASSERT_OK(Verify(0, kNumKeys, FaultInjectionTest::kValExpectFound)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_P(FaultInjectionTest, ManualLogSyncTest) { test::SleepingBackgroundTask sleeping_task_low; env_->SetBackgroundThreads(1, Env::HIGH); // Block the job queue to prevent flush job from running. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, Env::Priority::HIGH); sleeping_task_low.WaitUntilSleeping(); WriteOptions write_options; write_options.sync = false; std::string key_space, value_space; ASSERT_OK( db_->Put(write_options, Key(1, &key_space), Value(1, &value_space))); FlushOptions flush_options; flush_options.wait = false; ASSERT_OK(db_->Flush(flush_options)); ASSERT_OK( db_->Put(write_options, Key(2, &key_space), Value(2, &value_space))); ASSERT_OK(db_->FlushWAL(true)); env_->SetFilesystemActive(false); NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilDone(); ASSERT_OK(OpenDB()); std::string val; Value(2, &value_space); ASSERT_OK(ReadValue(2, &val)); ASSERT_EQ(value_space, val); Value(1, &value_space); ASSERT_OK(ReadValue(1, &val)); ASSERT_EQ(value_space, val); } TEST_P(FaultInjectionTest, WriteBatchWalTerminationTest) { ReadOptions ro; Options options = CurrentOptions(); options.env = env_; WriteOptions wo; wo.sync = true; wo.disableWAL = false; WriteBatch batch; batch.Put("cats", "dogs"); batch.MarkWalTerminationPoint(); batch.Put("boys", "girls"); ASSERT_OK(db_->Write(wo, &batch)); env_->SetFilesystemActive(false); NoWriteTestReopenWithFault(kResetDropAndDeleteUnsynced); ASSERT_OK(OpenDB()); std::string val; ASSERT_OK(db_->Get(ro, "cats", &val)); ASSERT_EQ("dogs", val); ASSERT_EQ(db_->Get(ro, "boys", &val), Status::NotFound()); } INSTANTIATE_TEST_CASE_P( FaultTest, FaultInjectionTest, ::testing::Values(std::make_tuple(false, kDefault, kEnd), std::make_tuple(true, kDefault, kEnd))); INSTANTIATE_TEST_CASE_P( FaultTest, FaultInjectionTestSplitted, ::testing::Values(std::make_tuple(false, kDefault, kSyncWal), std::make_tuple(true, kDefault, kSyncWal), std::make_tuple(false, kSyncWal, kEnd), std::make_tuple(true, kSyncWal, kEnd))); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/file_indexer.cc000066400000000000000000000174211370372246700166030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/file_indexer.h" #include #include #include "db/version_edit.h" #include "rocksdb/comparator.h" namespace ROCKSDB_NAMESPACE { FileIndexer::FileIndexer(const Comparator* ucmp) : num_levels_(0), ucmp_(ucmp), level_rb_(nullptr) {} size_t FileIndexer::NumLevelIndex() const { return next_level_index_.size(); } size_t FileIndexer::LevelIndexSize(size_t level) const { if (level >= next_level_index_.size()) { return 0; } return next_level_index_[level].num_index; } void FileIndexer::GetNextLevelIndex(const size_t level, const size_t file_index, const int cmp_smallest, const int cmp_largest, int32_t* left_bound, int32_t* right_bound) const { assert(level > 0); // Last level, no hint if (level == num_levels_ - 1) { *left_bound = 0; *right_bound = -1; return; } assert(level < num_levels_ - 1); assert(static_cast(file_index) <= level_rb_[level]); const IndexUnit* index_units = next_level_index_[level].index_units; const auto& index = index_units[file_index]; if (cmp_smallest < 0) { *left_bound = (level > 0 && file_index > 0) ? index_units[file_index - 1].largest_lb : 0; *right_bound = index.smallest_rb; } else if (cmp_smallest == 0) { *left_bound = index.smallest_lb; *right_bound = index.smallest_rb; } else if (cmp_smallest > 0 && cmp_largest < 0) { *left_bound = index.smallest_lb; *right_bound = index.largest_rb; } else if (cmp_largest == 0) { *left_bound = index.largest_lb; *right_bound = index.largest_rb; } else if (cmp_largest > 0) { *left_bound = index.largest_lb; *right_bound = level_rb_[level + 1]; } else { assert(false); } assert(*left_bound >= 0); assert(*left_bound <= *right_bound + 1); assert(*right_bound <= level_rb_[level + 1]); } void FileIndexer::UpdateIndex(Arena* arena, const size_t num_levels, std::vector* const files) { if (files == nullptr) { return; } if (num_levels == 0) { // uint_32 0-1 would cause bad behavior num_levels_ = num_levels; return; } assert(level_rb_ == nullptr); // level_rb_ should be init here num_levels_ = num_levels; next_level_index_.resize(num_levels); char* mem = arena->AllocateAligned(num_levels_ * sizeof(int32_t)); level_rb_ = new (mem) int32_t[num_levels_]; for (size_t i = 0; i < num_levels_; i++) { level_rb_[i] = -1; } // L1 - Ln-1 for (size_t level = 1; level < num_levels_ - 1; ++level) { const auto& upper_files = files[level]; const int32_t upper_size = static_cast(upper_files.size()); const auto& lower_files = files[level + 1]; level_rb_[level] = static_cast(upper_files.size()) - 1; if (upper_size == 0) { continue; } IndexLevel& index_level = next_level_index_[level]; index_level.num_index = upper_size; mem = arena->AllocateAligned(upper_size * sizeof(IndexUnit)); index_level.index_units = new (mem) IndexUnit[upper_size]; CalculateLB( upper_files, lower_files, &index_level, [this](const FileMetaData* a, const FileMetaData* b) -> int { return ucmp_->CompareWithoutTimestamp(a->smallest.user_key(), b->largest.user_key()); }, [](IndexUnit* index, int32_t f_idx) { index->smallest_lb = f_idx; }); CalculateLB( upper_files, lower_files, &index_level, [this](const FileMetaData* a, const FileMetaData* b) -> int { return ucmp_->CompareWithoutTimestamp(a->largest.user_key(), b->largest.user_key()); }, [](IndexUnit* index, int32_t f_idx) { index->largest_lb = f_idx; }); CalculateRB( upper_files, lower_files, &index_level, [this](const FileMetaData* a, const FileMetaData* b) -> int { return ucmp_->CompareWithoutTimestamp(a->smallest.user_key(), b->smallest.user_key()); }, [](IndexUnit* index, int32_t f_idx) { index->smallest_rb = f_idx; }); CalculateRB( upper_files, lower_files, &index_level, [this](const FileMetaData* a, const FileMetaData* b) -> int { return ucmp_->CompareWithoutTimestamp(a->largest.user_key(), b->smallest.user_key()); }, [](IndexUnit* index, int32_t f_idx) { index->largest_rb = f_idx; }); } level_rb_[num_levels_ - 1] = static_cast(files[num_levels_ - 1].size()) - 1; } void FileIndexer::CalculateLB( const std::vector& upper_files, const std::vector& lower_files, IndexLevel* index_level, std::function cmp_op, std::function set_index) { const int32_t upper_size = static_cast(upper_files.size()); const int32_t lower_size = static_cast(lower_files.size()); int32_t upper_idx = 0; int32_t lower_idx = 0; IndexUnit* index = index_level->index_units; while (upper_idx < upper_size && lower_idx < lower_size) { int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]); if (cmp == 0) { set_index(&index[upper_idx], lower_idx); ++upper_idx; } else if (cmp > 0) { // Lower level's file (largest) is smaller, a key won't hit in that // file. Move to next lower file ++lower_idx; } else { // Lower level's file becomes larger, update the index, and // move to the next upper file set_index(&index[upper_idx], lower_idx); ++upper_idx; } } while (upper_idx < upper_size) { // Lower files are exhausted, that means the remaining upper files are // greater than any lower files. Set the index to be the lower level size. set_index(&index[upper_idx], lower_size); ++upper_idx; } } void FileIndexer::CalculateRB( const std::vector& upper_files, const std::vector& lower_files, IndexLevel* index_level, std::function cmp_op, std::function set_index) { const int32_t upper_size = static_cast(upper_files.size()); const int32_t lower_size = static_cast(lower_files.size()); int32_t upper_idx = upper_size - 1; int32_t lower_idx = lower_size - 1; IndexUnit* index = index_level->index_units; while (upper_idx >= 0 && lower_idx >= 0) { int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]); if (cmp == 0) { set_index(&index[upper_idx], lower_idx); --upper_idx; } else if (cmp < 0) { // Lower level's file (smallest) is larger, a key won't hit in that // file. Move to next lower file. --lower_idx; } else { // Lower level's file becomes smaller, update the index, and move to // the next the upper file set_index(&index[upper_idx], lower_idx); --upper_idx; } } while (upper_idx >= 0) { // Lower files are exhausted, that means the remaining upper files are // smaller than any lower files. Set it to -1. set_index(&index[upper_idx], -1); --upper_idx; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/file_indexer.h000066400000000000000000000141261370372246700164440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "memory/arena.h" #include "port/port.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class Comparator; struct FileMetaData; struct FdWithKeyRange; struct FileLevel; // The file tree structure in Version is prebuilt and the range of each file // is known. On Version::Get(), it uses binary search to find a potential file // and then check if a target key can be found in the file by comparing the key // to each file's smallest and largest key. The results of these comparisons // can be reused beyond checking if a key falls into a file's range. // With some pre-calculated knowledge, each key comparison that has been done // can serve as a hint to narrow down further searches: if a key compared to // be smaller than a file's smallest or largest, that comparison can be used // to find out the right bound of next binary search. Similarly, if a key // compared to be larger than a file's smallest or largest, it can be utilized // to find out the left bound of next binary search. // With these hints: it can greatly reduce the range of binary search, // especially for bottom levels, given that one file most likely overlaps with // only N files from level below (where N is max_bytes_for_level_multiplier). // So on level L, we will only look at ~N files instead of N^L files on the // naive approach. class FileIndexer { public: explicit FileIndexer(const Comparator* ucmp); size_t NumLevelIndex() const; size_t LevelIndexSize(size_t level) const; // Return a file index range in the next level to search for a key based on // smallest and largest key comparison for the current file specified by // level and file_index. When *left_index < *right_index, both index should // be valid and fit in the vector size. void GetNextLevelIndex(const size_t level, const size_t file_index, const int cmp_smallest, const int cmp_largest, int32_t* left_bound, int32_t* right_bound) const; void UpdateIndex(Arena* arena, const size_t num_levels, std::vector* const files); enum { // MSVC version 1800 still does not have constexpr for ::max() kLevelMaxIndex = ROCKSDB_NAMESPACE::port::kMaxInt32 }; private: size_t num_levels_; const Comparator* ucmp_; struct IndexUnit { IndexUnit() : smallest_lb(0), largest_lb(0), smallest_rb(-1), largest_rb(-1) {} // During file search, a key is compared against smallest and largest // from a FileMetaData. It can have 3 possible outcomes: // (1) key is smaller than smallest, implying it is also smaller than // larger. Precalculated index based on "smallest < smallest" can // be used to provide right bound. // (2) key is in between smallest and largest. // Precalculated index based on "smallest > greatest" can be used to // provide left bound. // Precalculated index based on "largest < smallest" can be used to // provide right bound. // (3) key is larger than largest, implying it is also larger than smallest. // Precalculated index based on "largest > largest" can be used to // provide left bound. // // As a result, we will need to do: // Compare smallest (<=) and largest keys from upper level file with // smallest key from lower level to get a right bound. // Compare smallest (>=) and largest keys from upper level file with // largest key from lower level to get a left bound. // // Example: // level 1: [50 - 60] // level 2: [1 - 40], [45 - 55], [58 - 80] // A key 35, compared to be less than 50, 3rd file on level 2 can be // skipped according to rule (1). LB = 0, RB = 1. // A key 53, sits in the middle 50 and 60. 1st file on level 2 can be // skipped according to rule (2)-a, but the 3rd file cannot be skipped // because 60 is greater than 58. LB = 1, RB = 2. // A key 70, compared to be larger than 60. 1st and 2nd file can be skipped // according to rule (3). LB = 2, RB = 2. // // Point to a left most file in a lower level that may contain a key, // which compares greater than smallest of a FileMetaData (upper level) int32_t smallest_lb; // Point to a left most file in a lower level that may contain a key, // which compares greater than largest of a FileMetaData (upper level) int32_t largest_lb; // Point to a right most file in a lower level that may contain a key, // which compares smaller than smallest of a FileMetaData (upper level) int32_t smallest_rb; // Point to a right most file in a lower level that may contain a key, // which compares smaller than largest of a FileMetaData (upper level) int32_t largest_rb; }; // Data structure to store IndexUnits in a whole level struct IndexLevel { size_t num_index; IndexUnit* index_units; IndexLevel() : num_index(0), index_units(nullptr) {} }; void CalculateLB( const std::vector& upper_files, const std::vector& lower_files, IndexLevel* index_level, std::function cmp_op, std::function set_index); void CalculateRB( const std::vector& upper_files, const std::vector& lower_files, IndexLevel* index_level, std::function cmp_op, std::function set_index); autovector next_level_index_; int32_t* level_rb_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/file_indexer_test.cc000066400000000000000000000235631370372246700176460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/file_indexer.h" #include #include "db/dbformat.h" #include "db/version_edit.h" #include "port/stack_trace.h" #include "rocksdb/comparator.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class IntComparator : public Comparator { public: int Compare(const Slice& a, const Slice& b) const override { assert(a.size() == 8); assert(b.size() == 8); int64_t diff = *reinterpret_cast(a.data()) - *reinterpret_cast(b.data()); if (diff < 0) { return -1; } else if (diff == 0) { return 0; } else { return 1; } } const char* Name() const override { return "IntComparator"; } void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const override {} void FindShortSuccessor(std::string* /*key*/) const override {} }; class FileIndexerTest : public testing::Test { public: FileIndexerTest() : kNumLevels(4), files(new std::vector[kNumLevels]) {} ~FileIndexerTest() override { ClearFiles(); delete[] files; } void AddFile(int level, int64_t smallest, int64_t largest) { auto* f = new FileMetaData(); f->smallest = IntKey(smallest); f->largest = IntKey(largest); files[level].push_back(f); } InternalKey IntKey(int64_t v) { return InternalKey(Slice(reinterpret_cast(&v), 8), 0, kTypeValue); } void ClearFiles() { for (uint32_t i = 0; i < kNumLevels; ++i) { for (auto* f : files[i]) { delete f; } files[i].clear(); } } void GetNextLevelIndex(const uint32_t level, const uint32_t file_index, const int cmp_smallest, const int cmp_largest, int32_t* left_index, int32_t* right_index) { *left_index = 100; *right_index = 100; indexer->GetNextLevelIndex(level, file_index, cmp_smallest, cmp_largest, left_index, right_index); } int32_t left = 100; int32_t right = 100; const uint32_t kNumLevels; IntComparator ucmp; FileIndexer* indexer; std::vector* files; }; // Case 0: Empty TEST_F(FileIndexerTest, Empty) { Arena arena; indexer = new FileIndexer(&ucmp); indexer->UpdateIndex(&arena, 0, files); delete indexer; } // Case 1: no overlap, files are on the left of next level files TEST_F(FileIndexerTest, no_overlap_left) { Arena arena; indexer = new FileIndexer(&ucmp); // level 1 AddFile(1, 100, 200); AddFile(1, 300, 400); AddFile(1, 500, 600); // level 2 AddFile(2, 1500, 1600); AddFile(2, 1601, 1699); AddFile(2, 1700, 1800); // level 3 AddFile(3, 2500, 2600); AddFile(3, 2601, 2699); AddFile(3, 2700, 2800); indexer->UpdateIndex(&arena, kNumLevels, files); for (uint32_t level = 1; level < 3; ++level) { for (uint32_t f = 0; f < 3; ++f) { GetNextLevelIndex(level, f, -1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(level, f, 0, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(level, f, 1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(level, f, 1, 0, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(level, f, 1, 1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(2, right); } } delete indexer; ClearFiles(); } // Case 2: no overlap, files are on the right of next level files TEST_F(FileIndexerTest, no_overlap_right) { Arena arena; indexer = new FileIndexer(&ucmp); // level 1 AddFile(1, 2100, 2200); AddFile(1, 2300, 2400); AddFile(1, 2500, 2600); // level 2 AddFile(2, 1500, 1600); AddFile(2, 1501, 1699); AddFile(2, 1700, 1800); // level 3 AddFile(3, 500, 600); AddFile(3, 501, 699); AddFile(3, 700, 800); indexer->UpdateIndex(&arena, kNumLevels, files); for (uint32_t level = 1; level < 3; ++level) { for (uint32_t f = 0; f < 3; ++f) { GetNextLevelIndex(level, f, -1, -1, &left, &right); ASSERT_EQ(f == 0 ? 0 : 3, left); ASSERT_EQ(2, right); GetNextLevelIndex(level, f, 0, -1, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); GetNextLevelIndex(level, f, 1, -1, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); GetNextLevelIndex(level, f, 1, -1, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); GetNextLevelIndex(level, f, 1, 0, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); GetNextLevelIndex(level, f, 1, 1, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); } } delete indexer; } // Case 3: empty L2 TEST_F(FileIndexerTest, empty_L2) { Arena arena; indexer = new FileIndexer(&ucmp); for (uint32_t i = 1; i < kNumLevels; ++i) { ASSERT_EQ(0U, indexer->LevelIndexSize(i)); } // level 1 AddFile(1, 2100, 2200); AddFile(1, 2300, 2400); AddFile(1, 2500, 2600); // level 3 AddFile(3, 500, 600); AddFile(3, 501, 699); AddFile(3, 700, 800); indexer->UpdateIndex(&arena, kNumLevels, files); for (uint32_t f = 0; f < 3; ++f) { GetNextLevelIndex(1, f, -1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(1, f, 0, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(1, f, 1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(1, f, 1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(1, f, 1, 0, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); GetNextLevelIndex(1, f, 1, 1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(-1, right); } delete indexer; ClearFiles(); } // Case 4: mixed TEST_F(FileIndexerTest, mixed) { Arena arena; indexer = new FileIndexer(&ucmp); // level 1 AddFile(1, 100, 200); AddFile(1, 250, 400); AddFile(1, 450, 500); // level 2 AddFile(2, 100, 150); // 0 AddFile(2, 200, 250); // 1 AddFile(2, 251, 300); // 2 AddFile(2, 301, 350); // 3 AddFile(2, 500, 600); // 4 // level 3 AddFile(3, 0, 50); AddFile(3, 100, 200); AddFile(3, 201, 250); indexer->UpdateIndex(&arena, kNumLevels, files); // level 1, 0 GetNextLevelIndex(1, 0, -1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(0, right); GetNextLevelIndex(1, 0, 0, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(0, right); GetNextLevelIndex(1, 0, 1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(1, right); GetNextLevelIndex(1, 0, 1, 0, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(1, 0, 1, 1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(4, right); // level 1, 1 GetNextLevelIndex(1, 1, -1, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(1, 1, 0, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(1, 1, 1, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(3, right); GetNextLevelIndex(1, 1, 1, 0, &left, &right); ASSERT_EQ(4, left); ASSERT_EQ(3, right); GetNextLevelIndex(1, 1, 1, 1, &left, &right); ASSERT_EQ(4, left); ASSERT_EQ(4, right); // level 1, 2 GetNextLevelIndex(1, 2, -1, -1, &left, &right); ASSERT_EQ(4, left); ASSERT_EQ(3, right); GetNextLevelIndex(1, 2, 0, -1, &left, &right); ASSERT_EQ(4, left); ASSERT_EQ(3, right); GetNextLevelIndex(1, 2, 1, -1, &left, &right); ASSERT_EQ(4, left); ASSERT_EQ(4, right); GetNextLevelIndex(1, 2, 1, 0, &left, &right); ASSERT_EQ(4, left); ASSERT_EQ(4, right); GetNextLevelIndex(1, 2, 1, 1, &left, &right); ASSERT_EQ(4, left); ASSERT_EQ(4, right); // level 2, 0 GetNextLevelIndex(2, 0, -1, -1, &left, &right); ASSERT_EQ(0, left); ASSERT_EQ(1, right); GetNextLevelIndex(2, 0, 0, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(2, 0, 1, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(2, 0, 1, 0, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(2, 0, 1, 1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(2, right); // level 2, 1 GetNextLevelIndex(2, 1, -1, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(2, 1, 0, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(1, right); GetNextLevelIndex(2, 1, 1, -1, &left, &right); ASSERT_EQ(1, left); ASSERT_EQ(2, right); GetNextLevelIndex(2, 1, 1, 0, &left, &right); ASSERT_EQ(2, left); ASSERT_EQ(2, right); GetNextLevelIndex(2, 1, 1, 1, &left, &right); ASSERT_EQ(2, left); ASSERT_EQ(2, right); // level 2, [2 - 4], no overlap for (uint32_t f = 2; f <= 4; ++f) { GetNextLevelIndex(2, f, -1, -1, &left, &right); ASSERT_EQ(f == 2 ? 2 : 3, left); ASSERT_EQ(2, right); GetNextLevelIndex(2, f, 0, -1, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); GetNextLevelIndex(2, f, 1, -1, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); GetNextLevelIndex(2, f, 1, 0, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); GetNextLevelIndex(2, f, 1, 1, &left, &right); ASSERT_EQ(3, left); ASSERT_EQ(2, right); } delete indexer; ClearFiles(); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/filename_test.cc000066400000000000000000000132341370372246700167630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/filename.h" #include "db/dbformat.h" #include "logging/logging.h" #include "port/port.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class FileNameTest : public testing::Test {}; TEST_F(FileNameTest, Parse) { Slice db; FileType type; uint64_t number; char kDefautInfoLogDir = 1; char kDifferentInfoLogDir = 2; char kNoCheckLogDir = 4; char kAllMode = kDefautInfoLogDir | kDifferentInfoLogDir | kNoCheckLogDir; // Successful parses static struct { const char* fname; uint64_t number; FileType type; char mode; } cases[] = { {"100.log", 100, kLogFile, kAllMode}, {"0.log", 0, kLogFile, kAllMode}, {"0.sst", 0, kTableFile, kAllMode}, {"CURRENT", 0, kCurrentFile, kAllMode}, {"LOCK", 0, kDBLockFile, kAllMode}, {"MANIFEST-2", 2, kDescriptorFile, kAllMode}, {"MANIFEST-7", 7, kDescriptorFile, kAllMode}, {"METADB-2", 2, kMetaDatabase, kAllMode}, {"METADB-7", 7, kMetaDatabase, kAllMode}, {"LOG", 0, kInfoLogFile, kDefautInfoLogDir}, {"LOG.old", 0, kInfoLogFile, kDefautInfoLogDir}, {"LOG.old.6688", 6688, kInfoLogFile, kDefautInfoLogDir}, {"rocksdb_dir_LOG", 0, kInfoLogFile, kDifferentInfoLogDir}, {"rocksdb_dir_LOG.old", 0, kInfoLogFile, kDifferentInfoLogDir}, {"rocksdb_dir_LOG.old.6688", 6688, kInfoLogFile, kDifferentInfoLogDir}, {"18446744073709551615.log", 18446744073709551615ull, kLogFile, kAllMode}, }; for (char mode : {kDifferentInfoLogDir, kDefautInfoLogDir, kNoCheckLogDir}) { for (unsigned int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { InfoLogPrefix info_log_prefix(mode != kDefautInfoLogDir, "/rocksdb/dir"); if (cases[i].mode & mode) { std::string f = cases[i].fname; if (mode == kNoCheckLogDir) { ASSERT_TRUE(ParseFileName(f, &number, &type)) << f; } else { ASSERT_TRUE(ParseFileName(f, &number, info_log_prefix.prefix, &type)) << f; } ASSERT_EQ(cases[i].type, type) << f; ASSERT_EQ(cases[i].number, number) << f; } } } // Errors static const char* errors[] = { "", "foo", "foo-dx-100.log", ".log", "", "manifest", "CURREN", "CURRENTX", "MANIFES", "MANIFEST", "MANIFEST-", "XMANIFEST-3", "MANIFEST-3x", "META", "METADB", "METADB-", "XMETADB-3", "METADB-3x", "LOC", "LOCKx", "LO", "LOGx", "18446744073709551616.log", "184467440737095516150.log", "100", "100.", "100.lop" }; for (unsigned int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { std::string f = errors[i]; ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; }; } TEST_F(FileNameTest, InfoLogFileName) { std::string dbname = ("/data/rocksdb"); std::string db_absolute_path; Env::Default()->GetAbsolutePath(dbname, &db_absolute_path); ASSERT_EQ("/data/rocksdb/LOG", InfoLogFileName(dbname, db_absolute_path, "")); ASSERT_EQ("/data/rocksdb/LOG.old.666", OldInfoLogFileName(dbname, 666u, db_absolute_path, "")); ASSERT_EQ("/data/rocksdb_log/data_rocksdb_LOG", InfoLogFileName(dbname, db_absolute_path, "/data/rocksdb_log")); ASSERT_EQ( "/data/rocksdb_log/data_rocksdb_LOG.old.666", OldInfoLogFileName(dbname, 666u, db_absolute_path, "/data/rocksdb_log")); } TEST_F(FileNameTest, Construction) { uint64_t number; FileType type; std::string fname; fname = CurrentFileName("foo"); ASSERT_EQ("foo/", std::string(fname.data(), 4)); ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(0U, number); ASSERT_EQ(kCurrentFile, type); fname = LockFileName("foo"); ASSERT_EQ("foo/", std::string(fname.data(), 4)); ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(0U, number); ASSERT_EQ(kDBLockFile, type); fname = LogFileName("foo", 192); ASSERT_EQ("foo/", std::string(fname.data(), 4)); ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(192U, number); ASSERT_EQ(kLogFile, type); fname = TableFileName({DbPath("bar", 0)}, 200, 0); std::string fname1 = TableFileName({DbPath("foo", 0), DbPath("bar", 0)}, 200, 1); ASSERT_EQ(fname, fname1); ASSERT_EQ("bar/", std::string(fname.data(), 4)); ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(200U, number); ASSERT_EQ(kTableFile, type); fname = DescriptorFileName("bar", 100); ASSERT_EQ("bar/", std::string(fname.data(), 4)); ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(100U, number); ASSERT_EQ(kDescriptorFile, type); fname = TempFileName("tmp", 999); ASSERT_EQ("tmp/", std::string(fname.data(), 4)); ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(999U, number); ASSERT_EQ(kTempFile, type); fname = MetaDatabaseName("met", 100); ASSERT_EQ("met/", std::string(fname.data(), 4)); ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(100U, number); ASSERT_EQ(kMetaDatabase, type); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/flush_job.cc000066400000000000000000000437131370372246700161240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/flush_job.h" #include #include #include #include "db/builder.h" #include "db/db_iter.h" #include "db/dbformat.h" #include "db/event_helpers.h" #include "db/log_reader.h" #include "db/log_writer.h" #include "db/memtable.h" #include "db/memtable_list.h" #include "db/merge_context.h" #include "db/range_tombstone_fragmenter.h" #include "db/version_set.h" #include "file/file_util.h" #include "file/filename.h" #include "logging/event_logger.h" #include "logging/log_buffer.h" #include "logging/logging.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_util.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" #include "table/merging_iterator.h" #include "table/table_builder.h" #include "table/two_level_iterator.h" #include "test_util/sync_point.h" #include "util/coding.h" #include "util/mutexlock.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { const char* GetFlushReasonString (FlushReason flush_reason) { switch (flush_reason) { case FlushReason::kOthers: return "Other Reasons"; case FlushReason::kGetLiveFiles: return "Get Live Files"; case FlushReason::kShutDown: return "Shut down"; case FlushReason::kExternalFileIngestion: return "External File Ingestion"; case FlushReason::kManualCompaction: return "Manual Compaction"; case FlushReason::kWriteBufferManager: return "Write Buffer Manager"; case FlushReason::kWriteBufferFull: return "Write Buffer Full"; case FlushReason::kTest: return "Test"; case FlushReason::kDeleteFiles: return "Delete Files"; case FlushReason::kAutoCompaction: return "Auto Compaction"; case FlushReason::kManualFlush: return "Manual Flush"; case FlushReason::kErrorRecovery: return "Error Recovery"; default: return "Invalid"; } } FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd, const ImmutableDBOptions& db_options, const MutableCFOptions& mutable_cf_options, const uint64_t* max_memtable_id, const FileOptions& file_options, VersionSet* versions, InstrumentedMutex* db_mutex, std::atomic* shutting_down, std::vector existing_snapshots, SequenceNumber earliest_write_conflict_snapshot, SnapshotChecker* snapshot_checker, JobContext* job_context, LogBuffer* log_buffer, FSDirectory* db_directory, FSDirectory* output_file_directory, CompressionType output_compression, Statistics* stats, EventLogger* event_logger, bool measure_io_stats, const bool sync_output_directory, const bool write_manifest, Env::Priority thread_pri) : dbname_(dbname), cfd_(cfd), db_options_(db_options), mutable_cf_options_(mutable_cf_options), max_memtable_id_(max_memtable_id), file_options_(file_options), versions_(versions), db_mutex_(db_mutex), shutting_down_(shutting_down), existing_snapshots_(std::move(existing_snapshots)), earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot), snapshot_checker_(snapshot_checker), job_context_(job_context), log_buffer_(log_buffer), db_directory_(db_directory), output_file_directory_(output_file_directory), output_compression_(output_compression), stats_(stats), event_logger_(event_logger), measure_io_stats_(measure_io_stats), sync_output_directory_(sync_output_directory), write_manifest_(write_manifest), edit_(nullptr), base_(nullptr), pick_memtable_called(false), thread_pri_(thread_pri) { // Update the thread status to indicate flush. ReportStartedFlush(); TEST_SYNC_POINT("FlushJob::FlushJob()"); } FlushJob::~FlushJob() { ThreadStatusUtil::ResetThreadStatus(); } void FlushJob::ReportStartedFlush() { ThreadStatusUtil::SetColumnFamily(cfd_, cfd_->ioptions()->env, db_options_.enable_thread_tracking); ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_FLUSH); ThreadStatusUtil::SetThreadOperationProperty( ThreadStatus::COMPACTION_JOB_ID, job_context_->job_id); IOSTATS_RESET(bytes_written); } void FlushJob::ReportFlushInputSize(const autovector& mems) { uint64_t input_size = 0; for (auto* mem : mems) { input_size += mem->ApproximateMemoryUsage(); } ThreadStatusUtil::IncreaseThreadOperationProperty( ThreadStatus::FLUSH_BYTES_MEMTABLES, input_size); } void FlushJob::RecordFlushIOStats() { RecordTick(stats_, FLUSH_WRITE_BYTES, IOSTATS(bytes_written)); ThreadStatusUtil::IncreaseThreadOperationProperty( ThreadStatus::FLUSH_BYTES_WRITTEN, IOSTATS(bytes_written)); IOSTATS_RESET(bytes_written); } void FlushJob::PickMemTable() { db_mutex_->AssertHeld(); assert(!pick_memtable_called); pick_memtable_called = true; // Save the contents of the earliest memtable as a new Table cfd_->imm()->PickMemtablesToFlush(max_memtable_id_, &mems_); if (mems_.empty()) { return; } ReportFlushInputSize(mems_); // entries mems are (implicitly) sorted in ascending order by their created // time. We will use the first memtable's `edit` to keep the meta info for // this flush. MemTable* m = mems_[0]; edit_ = m->GetEdits(); edit_->SetPrevLogNumber(0); // SetLogNumber(log_num) indicates logs with number smaller than log_num // will no longer be picked up for recovery. edit_->SetLogNumber(mems_.back()->GetNextLogNumber()); edit_->SetColumnFamily(cfd_->GetID()); // path 0 for level 0 file. meta_.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0); base_ = cfd_->current(); base_->Ref(); // it is likely that we do not need this reference } Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta) { TEST_SYNC_POINT("FlushJob::Start"); db_mutex_->AssertHeld(); assert(pick_memtable_called); AutoThreadOperationStageUpdater stage_run( ThreadStatus::STAGE_FLUSH_RUN); if (mems_.empty()) { ROCKS_LOG_BUFFER(log_buffer_, "[%s] Nothing in memtable to flush", cfd_->GetName().c_str()); return Status::OK(); } // I/O measurement variables PerfLevel prev_perf_level = PerfLevel::kEnableTime; uint64_t prev_write_nanos = 0; uint64_t prev_fsync_nanos = 0; uint64_t prev_range_sync_nanos = 0; uint64_t prev_prepare_write_nanos = 0; uint64_t prev_cpu_write_nanos = 0; uint64_t prev_cpu_read_nanos = 0; if (measure_io_stats_) { prev_perf_level = GetPerfLevel(); SetPerfLevel(PerfLevel::kEnableTime); prev_write_nanos = IOSTATS(write_nanos); prev_fsync_nanos = IOSTATS(fsync_nanos); prev_range_sync_nanos = IOSTATS(range_sync_nanos); prev_prepare_write_nanos = IOSTATS(prepare_write_nanos); prev_cpu_write_nanos = IOSTATS(cpu_write_nanos); prev_cpu_read_nanos = IOSTATS(cpu_read_nanos); } // This will release and re-acquire the mutex. Status s = WriteLevel0Table(); if (s.ok() && cfd_->IsDropped()) { s = Status::ColumnFamilyDropped("Column family dropped during compaction"); } if ((s.ok() || s.IsColumnFamilyDropped()) && shutting_down_->load(std::memory_order_acquire)) { s = Status::ShutdownInProgress("Database shutdown"); } if (!s.ok()) { cfd_->imm()->RollbackMemtableFlush(mems_, meta_.fd.GetNumber()); } else if (write_manifest_) { TEST_SYNC_POINT("FlushJob::InstallResults"); // Replace immutable memtable with the generated Table IOStatus tmp_io_s; s = cfd_->imm()->TryInstallMemtableFlushResults( cfd_, mutable_cf_options_, mems_, prep_tracker, versions_, db_mutex_, meta_.fd.GetNumber(), &job_context_->memtables_to_free, db_directory_, log_buffer_, &committed_flush_jobs_info_, &tmp_io_s); if (!tmp_io_s.ok()) { io_status_ = tmp_io_s; } } if (s.ok() && file_meta != nullptr) { *file_meta = meta_; } RecordFlushIOStats(); // When measure_io_stats_ is true, the default 512 bytes is not enough. auto stream = event_logger_->LogToBuffer(log_buffer_, 1024); stream << "job" << job_context_->job_id << "event" << "flush_finished"; stream << "output_compression" << CompressionTypeToString(output_compression_); stream << "lsm_state"; stream.StartArray(); auto vstorage = cfd_->current()->storage_info(); for (int level = 0; level < vstorage->num_levels(); ++level) { stream << vstorage->NumLevelFiles(level); } stream.EndArray(); stream << "immutable_memtables" << cfd_->imm()->NumNotFlushed(); if (measure_io_stats_) { if (prev_perf_level != PerfLevel::kEnableTime) { SetPerfLevel(prev_perf_level); } stream << "file_write_nanos" << (IOSTATS(write_nanos) - prev_write_nanos); stream << "file_range_sync_nanos" << (IOSTATS(range_sync_nanos) - prev_range_sync_nanos); stream << "file_fsync_nanos" << (IOSTATS(fsync_nanos) - prev_fsync_nanos); stream << "file_prepare_write_nanos" << (IOSTATS(prepare_write_nanos) - prev_prepare_write_nanos); stream << "file_cpu_write_nanos" << (IOSTATS(cpu_write_nanos) - prev_cpu_write_nanos); stream << "file_cpu_read_nanos" << (IOSTATS(cpu_read_nanos) - prev_cpu_read_nanos); } return s; } void FlushJob::Cancel() { db_mutex_->AssertHeld(); assert(base_ != nullptr); base_->Unref(); } Status FlushJob::WriteLevel0Table() { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_FLUSH_WRITE_L0); db_mutex_->AssertHeld(); const uint64_t start_micros = db_options_.env->NowMicros(); const uint64_t start_cpu_micros = db_options_.env->NowCPUNanos() / 1000; Status s; { auto write_hint = cfd_->CalculateSSTWriteHint(0); db_mutex_->Unlock(); if (log_buffer_) { log_buffer_->FlushBufferToLog(); } // memtables and range_del_iters store internal iterators over each data // memtable and its associated range deletion memtable, respectively, at // corresponding indexes. std::vector memtables; std::vector> range_del_iters; ReadOptions ro; ro.total_order_seek = true; Arena arena; uint64_t total_num_entries = 0, total_num_deletes = 0; uint64_t total_data_size = 0; size_t total_memory_usage = 0; for (MemTable* m : mems_) { ROCKS_LOG_INFO( db_options_.info_log, "[%s] [JOB %d] Flushing memtable with next log file: %" PRIu64 "\n", cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber()); memtables.push_back(m->NewIterator(ro, &arena)); auto* range_del_iter = m->NewRangeTombstoneIterator(ro, kMaxSequenceNumber); if (range_del_iter != nullptr) { range_del_iters.emplace_back(range_del_iter); } total_num_entries += m->num_entries(); total_num_deletes += m->num_deletes(); total_data_size += m->get_data_size(); total_memory_usage += m->ApproximateMemoryUsage(); } event_logger_->Log() << "job" << job_context_->job_id << "event" << "flush_started" << "num_memtables" << mems_.size() << "num_entries" << total_num_entries << "num_deletes" << total_num_deletes << "total_data_size" << total_data_size << "memory_usage" << total_memory_usage << "flush_reason" << GetFlushReasonString(cfd_->GetFlushReason()); { ScopedArenaIterator iter( NewMergingIterator(&cfd_->internal_comparator(), &memtables[0], static_cast(memtables.size()), &arena)); ROCKS_LOG_INFO(db_options_.info_log, "[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": started", cfd_->GetName().c_str(), job_context_->job_id, meta_.fd.GetNumber()); TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression", &output_compression_); int64_t _current_time = 0; auto status = db_options_.env->GetCurrentTime(&_current_time); // Safe to proceed even if GetCurrentTime fails. So, log and proceed. if (!status.ok()) { ROCKS_LOG_WARN( db_options_.info_log, "Failed to get current time to populate creation_time property. " "Status: %s", status.ToString().c_str()); } const uint64_t current_time = static_cast(_current_time); uint64_t oldest_key_time = mems_.front()->ApproximateOldestKeyTime(); // It's not clear whether oldest_key_time is always available. In case // it is not available, use current_time. meta_.oldest_ancester_time = std::min(current_time, oldest_key_time); meta_.file_creation_time = current_time; uint64_t creation_time = (cfd_->ioptions()->compaction_style == CompactionStyle::kCompactionStyleFIFO) ? current_time : meta_.oldest_ancester_time; IOStatus io_s; s = BuildTable( dbname_, db_options_.env, db_options_.fs.get(), *cfd_->ioptions(), mutable_cf_options_, file_options_, cfd_->table_cache(), iter.get(), std::move(range_del_iters), &meta_, cfd_->internal_comparator(), cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(), cfd_->GetName(), existing_snapshots_, earliest_write_conflict_snapshot_, snapshot_checker_, output_compression_, mutable_cf_options_.sample_for_compression, mutable_cf_options_.compression_opts, mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(), TableFileCreationReason::kFlush, &io_s, event_logger_, job_context_->job_id, Env::IO_HIGH, &table_properties_, 0 /* level */, creation_time, oldest_key_time, write_hint, current_time); if (!io_s.ok()) { io_status_ = io_s; } LogFlush(db_options_.info_log); } ROCKS_LOG_INFO(db_options_.info_log, "[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": %" PRIu64 " bytes %s" "%s", cfd_->GetName().c_str(), job_context_->job_id, meta_.fd.GetNumber(), meta_.fd.GetFileSize(), s.ToString().c_str(), meta_.marked_for_compaction ? " (needs compaction)" : ""); if (s.ok() && output_file_directory_ != nullptr && sync_output_directory_) { s = output_file_directory_->Fsync(IOOptions(), nullptr); } TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table", &mems_); db_mutex_->Lock(); } base_->Unref(); // Note that if file_size is zero, the file has been deleted and // should not be added to the manifest. if (s.ok() && meta_.fd.GetFileSize() > 0) { // if we have more than 1 background thread, then we cannot // insert files directly into higher levels because some other // threads could be concurrently producing compacted files for // that key range. // Add file to L0 edit_->AddFile(0 /* level */, meta_.fd.GetNumber(), meta_.fd.GetPathId(), meta_.fd.GetFileSize(), meta_.smallest, meta_.largest, meta_.fd.smallest_seqno, meta_.fd.largest_seqno, meta_.marked_for_compaction, meta_.oldest_blob_file_number, meta_.oldest_ancester_time, meta_.file_creation_time, meta_.file_checksum, meta_.file_checksum_func_name); } #ifndef ROCKSDB_LITE // Piggyback FlushJobInfo on the first first flushed memtable. mems_[0]->SetFlushJobInfo(GetFlushJobInfo()); #endif // !ROCKSDB_LITE // Note that here we treat flush as level 0 compaction in internal stats InternalStats::CompactionStats stats(CompactionReason::kFlush, 1); stats.micros = db_options_.env->NowMicros() - start_micros; stats.cpu_micros = db_options_.env->NowCPUNanos() / 1000 - start_cpu_micros; stats.bytes_written = meta_.fd.GetFileSize(); RecordTimeToHistogram(stats_, FLUSH_TIME, stats.micros); cfd_->internal_stats()->AddCompactionStats(0 /* level */, thread_pri_, stats); cfd_->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED, meta_.fd.GetFileSize()); RecordFlushIOStats(); return s; } #ifndef ROCKSDB_LITE std::unique_ptr FlushJob::GetFlushJobInfo() const { db_mutex_->AssertHeld(); std::unique_ptr info(new FlushJobInfo{}); info->cf_id = cfd_->GetID(); info->cf_name = cfd_->GetName(); const uint64_t file_number = meta_.fd.GetNumber(); info->file_path = MakeTableFileName(cfd_->ioptions()->cf_paths[0].path, file_number); info->file_number = file_number; info->oldest_blob_file_number = meta_.oldest_blob_file_number; info->thread_id = db_options_.env->GetThreadID(); info->job_id = job_context_->job_id; info->smallest_seqno = meta_.fd.smallest_seqno; info->largest_seqno = meta_.fd.largest_seqno; info->table_properties = table_properties_; info->flush_reason = cfd_->GetFlushReason(); return info; } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/flush_job.h000066400000000000000000000137301370372246700157620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include #include "db/column_family.h" #include "db/dbformat.h" #include "db/flush_scheduler.h" #include "db/internal_stats.h" #include "db/job_context.h" #include "db/log_writer.h" #include "db/logs_with_prep_tracker.h" #include "db/memtable_list.h" #include "db/snapshot_impl.h" #include "db/version_edit.h" #include "db/write_controller.h" #include "db/write_thread.h" #include "logging/event_logger.h" #include "monitoring/instrumented_mutex.h" #include "options/db_options.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/listener.h" #include "rocksdb/memtablerep.h" #include "rocksdb/transaction_log.h" #include "table/scoped_arena_iterator.h" #include "util/autovector.h" #include "util/stop_watch.h" #include "util/thread_local.h" namespace ROCKSDB_NAMESPACE { class DBImpl; class MemTable; class SnapshotChecker; class TableCache; class Version; class VersionEdit; class VersionSet; class Arena; class FlushJob { public: // TODO(icanadi) make effort to reduce number of parameters here // IMPORTANT: mutable_cf_options needs to be alive while FlushJob is alive FlushJob(const std::string& dbname, ColumnFamilyData* cfd, const ImmutableDBOptions& db_options, const MutableCFOptions& mutable_cf_options, const uint64_t* max_memtable_id, const FileOptions& file_options, VersionSet* versions, InstrumentedMutex* db_mutex, std::atomic* shutting_down, std::vector existing_snapshots, SequenceNumber earliest_write_conflict_snapshot, SnapshotChecker* snapshot_checker, JobContext* job_context, LogBuffer* log_buffer, FSDirectory* db_directory, FSDirectory* output_file_directory, CompressionType output_compression, Statistics* stats, EventLogger* event_logger, bool measure_io_stats, const bool sync_output_directory, const bool write_manifest, Env::Priority thread_pri); ~FlushJob(); // Require db_mutex held. // Once PickMemTable() is called, either Run() or Cancel() has to be called. void PickMemTable(); Status Run(LogsWithPrepTracker* prep_tracker = nullptr, FileMetaData* file_meta = nullptr); void Cancel(); const autovector& GetMemTables() const { return mems_; } #ifndef ROCKSDB_LITE std::list>* GetCommittedFlushJobsInfo() { return &committed_flush_jobs_info_; } #endif // !ROCKSDB_LITE // Return the IO status IOStatus io_status() const { return io_status_; } private: void ReportStartedFlush(); void ReportFlushInputSize(const autovector& mems); void RecordFlushIOStats(); Status WriteLevel0Table(); #ifndef ROCKSDB_LITE std::unique_ptr GetFlushJobInfo() const; #endif // !ROCKSDB_LITE const std::string& dbname_; ColumnFamilyData* cfd_; const ImmutableDBOptions& db_options_; const MutableCFOptions& mutable_cf_options_; // Pointer to a variable storing the largest memtable id to flush in this // flush job. RocksDB uses this variable to select the memtables to flush in // this job. All memtables in this column family with an ID smaller than or // equal to *max_memtable_id_ will be selected for flush. If null, then all // memtables in the column family will be selected. const uint64_t* max_memtable_id_; const FileOptions file_options_; VersionSet* versions_; InstrumentedMutex* db_mutex_; std::atomic* shutting_down_; std::vector existing_snapshots_; SequenceNumber earliest_write_conflict_snapshot_; SnapshotChecker* snapshot_checker_; JobContext* job_context_; LogBuffer* log_buffer_; FSDirectory* db_directory_; FSDirectory* output_file_directory_; CompressionType output_compression_; Statistics* stats_; EventLogger* event_logger_; TableProperties table_properties_; bool measure_io_stats_; // True if this flush job should call fsync on the output directory. False // otherwise. // Usually sync_output_directory_ is true. A flush job needs to call sync on // the output directory before committing to the MANIFEST. // However, an individual flush job does not have to call sync on the output // directory if it is part of an atomic flush. After all flush jobs in the // atomic flush succeed, call sync once on each distinct output directory. const bool sync_output_directory_; // True if this flush job should write to MANIFEST after successfully // flushing memtables. False otherwise. // Usually write_manifest_ is true. A flush job commits to the MANIFEST after // flushing the memtables. // However, an individual flush job cannot rashly write to the MANIFEST // immediately after it finishes the flush if it is part of an atomic flush. // In this case, only after all flush jobs succeed in flush can RocksDB // commit to the MANIFEST. const bool write_manifest_; // The current flush job can commit flush result of a concurrent flush job. // We collect FlushJobInfo of all jobs committed by current job and fire // OnFlushCompleted for them. std::list> committed_flush_jobs_info_; // Variables below are set by PickMemTable(): FileMetaData meta_; autovector mems_; VersionEdit* edit_; Version* base_; bool pick_memtable_called; Env::Priority thread_pri_; IOStatus io_status_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/flush_job_test.cc000066400000000000000000000447571370372246700171740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include "db/blob/blob_index.h" #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/flush_job.h" #include "db/version_set.h" #include "file/writable_file_writer.h" #include "rocksdb/cache.h" #include "rocksdb/write_buffer_manager.h" #include "table/mock_table.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // TODO(icanadi) Mock out everything else: // 1. VersionSet // 2. Memtable class FlushJobTest : public testing::Test { public: FlushJobTest() : env_(Env::Default()), fs_(std::make_shared(env_)), dbname_(test::PerThreadDBPath("flush_job_test")), options_(), db_options_(options_), column_family_names_({kDefaultColumnFamilyName, "foo", "bar"}), table_cache_(NewLRUCache(50000, 16)), write_buffer_manager_(db_options_.db_write_buffer_size), shutting_down_(false), mock_table_factory_(new mock::MockTableFactory()) { EXPECT_OK(env_->CreateDirIfMissing(dbname_)); db_options_.db_paths.emplace_back(dbname_, std::numeric_limits::max()); db_options_.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); // TODO(icanadi) Remove this once we mock out VersionSet NewDB(); std::vector column_families; cf_options_.table_factory = mock_table_factory_; for (const auto& cf_name : column_family_names_) { column_families.emplace_back(cf_name, cf_options_); } db_options_.env = env_; db_options_.fs = fs_; versions_.reset(new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr)); EXPECT_OK(versions_->Recover(column_families, false)); } void NewDB() { SetIdentityFile(env_, dbname_); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { DBImpl* impl = new DBImpl(DBOptions(), dbname_); std::string db_id; impl->GetDbIdentityFromIdentityFile(&db_id); new_db.SetDBId(db_id); } new_db.SetLogNumber(0); new_db.SetNextFile(2); new_db.SetLastSequence(0); autovector new_cfs; SequenceNumber last_seq = 1; uint32_t cf_id = 1; for (size_t i = 1; i != column_family_names_.size(); ++i) { VersionEdit new_cf; new_cf.AddColumnFamily(column_family_names_[i]); new_cf.SetColumnFamily(cf_id++); new_cf.SetLogNumber(0); new_cf.SetNextFile(2); new_cf.SetLastSequence(last_seq++); new_cfs.emplace_back(new_cf); } const std::string manifest = DescriptorFileName(dbname_, 1); std::unique_ptr file; Status s = env_->NewWritableFile( manifest, &file, env_->OptimizeForManifestWrite(env_options_)); ASSERT_OK(s); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), manifest, EnvOptions())); { log::Writer log(std::move(file_writer), 0, false); std::string record; new_db.EncodeTo(&record); s = log.AddRecord(record); for (const auto& e : new_cfs) { record.clear(); e.EncodeTo(&record); s = log.AddRecord(record); ASSERT_OK(s); } } ASSERT_OK(s); // Make "CURRENT" file that points to the new manifest file. s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); } Env* env_; std::shared_ptr fs_; std::string dbname_; EnvOptions env_options_; Options options_; ImmutableDBOptions db_options_; const std::vector column_family_names_; std::shared_ptr table_cache_; WriteController write_controller_; WriteBufferManager write_buffer_manager_; ColumnFamilyOptions cf_options_; std::unique_ptr versions_; InstrumentedMutex mutex_; std::atomic shutting_down_; std::shared_ptr mock_table_factory_; }; TEST_F(FlushJobTest, Empty) { JobContext job_context(0); auto cfd = versions_->GetColumnFamilySet()->GetDefault(); EventLogger event_logger(db_options_.info_log.get()); SnapshotChecker* snapshot_checker = nullptr; // not relavant FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, *cfd->GetLatestMutableCFOptions(), nullptr /* memtable_id */, env_options_, versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression, nullptr, &event_logger, false, true /* sync_output_directory */, true /* write_manifest */, Env::Priority::USER); { InstrumentedMutexLock l(&mutex_); flush_job.PickMemTable(); ASSERT_OK(flush_job.Run()); } job_context.Clean(); } TEST_F(FlushJobTest, NonEmpty) { JobContext job_context(0); auto cfd = versions_->GetColumnFamilySet()->GetDefault(); auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); new_mem->Ref(); auto inserted_keys = mock::MakeMockFile(); // Test data: // seqno [ 1, 2 ... 8998, 8999, 9000, 9001, 9002 ... 9999 ] // key [ 1001, 1002 ... 9998, 9999, 0, 1, 2 ... 999 ] // range-delete "9995" -> "9999" at seqno 10000 // blob references with seqnos 10001..10006 for (int i = 1; i < 10000; ++i) { std::string key(ToString((i + 1000) % 10000)); std::string value("value" + key); new_mem->Add(SequenceNumber(i), kTypeValue, key, value); if ((i + 1000) % 10000 < 9995) { InternalKey internal_key(key, SequenceNumber(i), kTypeValue); inserted_keys.insert({internal_key.Encode().ToString(), value}); } } { new_mem->Add(SequenceNumber(10000), kTypeRangeDeletion, "9995", "9999a"); InternalKey internal_key("9995", SequenceNumber(10000), kTypeRangeDeletion); inserted_keys.insert({internal_key.Encode().ToString(), "9999a"}); } #ifndef ROCKSDB_LITE // Note: the first two blob references will not be considered when resolving // the oldest blob file referenced (the first one is inlined TTL, while the // second one is TTL and thus points to a TTL blob file). constexpr std::array blob_file_numbers{{ kInvalidBlobFileNumber, 5, 103, 17, 102, 101}}; for (size_t i = 0; i < blob_file_numbers.size(); ++i) { std::string key(ToString(i + 10001)); std::string blob_index; if (i == 0) { BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 1234567890ULL, "foo"); } else if (i == 1) { BlobIndex::EncodeBlobTTL(&blob_index, /* expiration */ 1234567890ULL, blob_file_numbers[i], /* offset */ i << 10, /* size */ i << 20, kNoCompression); } else { BlobIndex::EncodeBlob(&blob_index, blob_file_numbers[i], /* offset */ i << 10, /* size */ i << 20, kNoCompression); } const SequenceNumber seq(i + 10001); new_mem->Add(seq, kTypeBlobIndex, key, blob_index); InternalKey internal_key(key, seq, kTypeBlobIndex); inserted_keys.emplace_hint(inserted_keys.end(), internal_key.Encode().ToString(), blob_index); } #endif autovector to_delete; cfd->imm()->Add(new_mem, &to_delete); for (auto& m : to_delete) { delete m; } EventLogger event_logger(db_options_.info_log.get()); SnapshotChecker* snapshot_checker = nullptr; // not relavant FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, *cfd->GetLatestMutableCFOptions(), nullptr /* memtable_id */, env_options_, versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, true, true /* sync_output_directory */, true /* write_manifest */, Env::Priority::USER); HistogramData hist; FileMetaData file_meta; mutex_.Lock(); flush_job.PickMemTable(); ASSERT_OK(flush_job.Run(nullptr, &file_meta)); mutex_.Unlock(); db_options_.statistics->histogramData(FLUSH_TIME, &hist); ASSERT_GT(hist.average, 0.0); ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString()); ASSERT_EQ("9999a", file_meta.largest.user_key().ToString()); ASSERT_EQ(1, file_meta.fd.smallest_seqno); #ifndef ROCKSDB_LITE ASSERT_EQ(10006, file_meta.fd.largest_seqno); ASSERT_EQ(17, file_meta.oldest_blob_file_number); #else ASSERT_EQ(10000, file_meta.fd.largest_seqno); #endif mock_table_factory_->AssertSingleFile(inserted_keys); job_context.Clean(); } TEST_F(FlushJobTest, FlushMemTablesSingleColumnFamily) { const size_t num_mems = 2; const size_t num_mems_to_flush = 1; const size_t num_keys_per_table = 100; JobContext job_context(0); ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault(); std::vector memtable_ids; std::vector new_mems; for (size_t i = 0; i != num_mems; ++i) { MemTable* mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); mem->SetID(i); mem->Ref(); new_mems.emplace_back(mem); memtable_ids.push_back(mem->GetID()); for (size_t j = 0; j < num_keys_per_table; ++j) { std::string key(ToString(j + i * num_keys_per_table)); std::string value("value" + key); mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue, key, value); } } autovector to_delete; for (auto mem : new_mems) { cfd->imm()->Add(mem, &to_delete); } EventLogger event_logger(db_options_.info_log.get()); SnapshotChecker* snapshot_checker = nullptr; // not relavant assert(memtable_ids.size() == num_mems); uint64_t smallest_memtable_id = memtable_ids.front(); uint64_t flush_memtable_id = smallest_memtable_id + num_mems_to_flush - 1; FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, *cfd->GetLatestMutableCFOptions(), &flush_memtable_id, env_options_, versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, true, true /* sync_output_directory */, true /* write_manifest */, Env::Priority::USER); HistogramData hist; FileMetaData file_meta; mutex_.Lock(); flush_job.PickMemTable(); ASSERT_OK(flush_job.Run(nullptr /* prep_tracker */, &file_meta)); mutex_.Unlock(); db_options_.statistics->histogramData(FLUSH_TIME, &hist); ASSERT_GT(hist.average, 0.0); ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString()); ASSERT_EQ("99", file_meta.largest.user_key().ToString()); ASSERT_EQ(0, file_meta.fd.smallest_seqno); ASSERT_EQ(SequenceNumber(num_mems_to_flush * num_keys_per_table - 1), file_meta.fd.largest_seqno); ASSERT_EQ(kInvalidBlobFileNumber, file_meta.oldest_blob_file_number); for (auto m : to_delete) { delete m; } to_delete.clear(); job_context.Clean(); } TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) { autovector all_cfds; for (auto cfd : *versions_->GetColumnFamilySet()) { all_cfds.push_back(cfd); } const std::vector num_memtables = {2, 1, 3}; assert(num_memtables.size() == column_family_names_.size()); const size_t num_keys_per_memtable = 1000; JobContext job_context(0); std::vector memtable_ids; std::vector smallest_seqs; std::vector largest_seqs; autovector to_delete; SequenceNumber curr_seqno = 0; size_t k = 0; for (auto cfd : all_cfds) { smallest_seqs.push_back(curr_seqno); for (size_t i = 0; i != num_memtables[k]; ++i) { MemTable* mem = cfd->ConstructNewMemtable( *cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); mem->SetID(i); mem->Ref(); for (size_t j = 0; j != num_keys_per_memtable; ++j) { std::string key(ToString(j + i * num_keys_per_memtable)); std::string value("value" + key); mem->Add(curr_seqno++, kTypeValue, key, value); } cfd->imm()->Add(mem, &to_delete); } largest_seqs.push_back(curr_seqno - 1); memtable_ids.push_back(num_memtables[k++] - 1); } EventLogger event_logger(db_options_.info_log.get()); SnapshotChecker* snapshot_checker = nullptr; // not relevant std::vector> flush_jobs; k = 0; for (auto cfd : all_cfds) { std::vector snapshot_seqs; flush_jobs.emplace_back(new FlushJob( dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(), &memtable_ids[k], env_options_, versions_.get(), &mutex_, &shutting_down_, snapshot_seqs, kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, true, false /* sync_output_directory */, false /* write_manifest */, Env::Priority::USER)); k++; } HistogramData hist; std::vector file_metas; // Call reserve to avoid auto-resizing file_metas.reserve(flush_jobs.size()); mutex_.Lock(); for (auto& job : flush_jobs) { job->PickMemTable(); } for (auto& job : flush_jobs) { FileMetaData meta; // Run will release and re-acquire mutex ASSERT_OK(job->Run(nullptr /**/, &meta)); file_metas.emplace_back(meta); } autovector file_meta_ptrs; for (auto& meta : file_metas) { file_meta_ptrs.push_back(&meta); } autovector*> mems_list; for (size_t i = 0; i != all_cfds.size(); ++i) { const auto& mems = flush_jobs[i]->GetMemTables(); mems_list.push_back(&mems); } autovector mutable_cf_options_list; for (auto cfd : all_cfds) { mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions()); } Status s = InstallMemtableAtomicFlushResults( nullptr /* imm_lists */, all_cfds, mutable_cf_options_list, mems_list, versions_.get(), &mutex_, file_meta_ptrs, &job_context.memtables_to_free, nullptr /* db_directory */, nullptr /* log_buffer */); ASSERT_OK(s); mutex_.Unlock(); db_options_.statistics->histogramData(FLUSH_TIME, &hist); ASSERT_GT(hist.average, 0.0); k = 0; for (const auto& file_meta : file_metas) { ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString()); ASSERT_EQ("999", file_meta.largest.user_key() .ToString()); // max key by bytewise comparator ASSERT_EQ(smallest_seqs[k], file_meta.fd.smallest_seqno); ASSERT_EQ(largest_seqs[k], file_meta.fd.largest_seqno); // Verify that imm is empty ASSERT_EQ(std::numeric_limits::max(), all_cfds[k]->imm()->GetEarliestMemTableID()); ASSERT_EQ(0, all_cfds[k]->imm()->GetLatestMemTableID()); ++k; } for (auto m : to_delete) { delete m; } to_delete.clear(); job_context.Clean(); } TEST_F(FlushJobTest, Snapshots) { JobContext job_context(0); auto cfd = versions_->GetColumnFamilySet()->GetDefault(); auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); std::set snapshots_set; int keys = 10000; int max_inserts_per_keys = 8; Random rnd(301); for (int i = 0; i < keys / 2; ++i) { snapshots_set.insert(rnd.Uniform(keys * (max_inserts_per_keys / 2)) + 1); } // set has already removed the duplicate snapshots std::vector snapshots(snapshots_set.begin(), snapshots_set.end()); new_mem->Ref(); SequenceNumber current_seqno = 0; auto inserted_keys = mock::MakeMockFile(); for (int i = 1; i < keys; ++i) { std::string key(ToString(i)); int insertions = rnd.Uniform(max_inserts_per_keys); for (int j = 0; j < insertions; ++j) { std::string value(test::RandomHumanReadableString(&rnd, 10)); auto seqno = ++current_seqno; new_mem->Add(SequenceNumber(seqno), kTypeValue, key, value); // a key is visible only if: // 1. it's the last one written (j == insertions - 1) // 2. there's a snapshot pointing at it bool visible = (j == insertions - 1) || (snapshots_set.find(seqno) != snapshots_set.end()); if (visible) { InternalKey internal_key(key, seqno, kTypeValue); inserted_keys.insert({internal_key.Encode().ToString(), value}); } } } autovector to_delete; cfd->imm()->Add(new_mem, &to_delete); for (auto& m : to_delete) { delete m; } EventLogger event_logger(db_options_.info_log.get()); SnapshotChecker* snapshot_checker = nullptr; // not relavant FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_, *cfd->GetLatestMutableCFOptions(), nullptr /* memtable_id */, env_options_, versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression, db_options_.statistics.get(), &event_logger, true, true /* sync_output_directory */, true /* write_manifest */, Env::Priority::USER); mutex_.Lock(); flush_job.PickMemTable(); ASSERT_OK(flush_job.Run()); mutex_.Unlock(); mock_table_factory_->AssertSingleFile(inserted_keys); HistogramData hist; db_options_.statistics->histogramData(FLUSH_TIME, &hist); ASSERT_GT(hist.average, 0.0); job_context.Clean(); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/flush_scheduler.cc000066400000000000000000000046741370372246700173330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/flush_scheduler.h" #include #include "db/column_family.h" namespace ROCKSDB_NAMESPACE { void FlushScheduler::ScheduleWork(ColumnFamilyData* cfd) { #ifndef NDEBUG { std::lock_guard lock(checking_mutex_); assert(checking_set_.count(cfd) == 0); checking_set_.insert(cfd); } #endif // NDEBUG cfd->Ref(); // Suppress false positive clang analyzer warnings. #ifndef __clang_analyzer__ Node* node = new Node{cfd, head_.load(std::memory_order_relaxed)}; while (!head_.compare_exchange_strong( node->next, node, std::memory_order_relaxed, std::memory_order_relaxed)) { // failing CAS updates the first param, so we are already set for // retry. TakeNextColumnFamily won't happen until after another // inter-thread synchronization, so we don't even need release // semantics for this CAS } #endif // __clang_analyzer__ } ColumnFamilyData* FlushScheduler::TakeNextColumnFamily() { while (true) { if (head_.load(std::memory_order_relaxed) == nullptr) { return nullptr; } // dequeue the head Node* node = head_.load(std::memory_order_relaxed); head_.store(node->next, std::memory_order_relaxed); ColumnFamilyData* cfd = node->column_family; delete node; #ifndef NDEBUG { std::lock_guard lock(checking_mutex_); auto iter = checking_set_.find(cfd); assert(iter != checking_set_.end()); checking_set_.erase(iter); } #endif // NDEBUG if (!cfd->IsDropped()) { // success return cfd; } // no longer relevant, retry cfd->UnrefAndTryDelete(); } } bool FlushScheduler::Empty() { auto rv = head_.load(std::memory_order_relaxed) == nullptr; #ifndef NDEBUG std::lock_guard lock(checking_mutex_); // Empty is allowed to be called concurrnetly with ScheduleFlush. It would // only miss the recent schedules. assert((rv == checking_set_.empty()) || rv); #endif // NDEBUG return rv; } void FlushScheduler::Clear() { ColumnFamilyData* cfd; while ((cfd = TakeNextColumnFamily()) != nullptr) { cfd->UnrefAndTryDelete(); } assert(head_.load(std::memory_order_relaxed) == nullptr); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/flush_scheduler.h000066400000000000000000000031271370372246700171650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class ColumnFamilyData; // FlushScheduler keeps track of all column families whose memtable may // be full and require flushing. Unless otherwise noted, all methods on // FlushScheduler should be called only with the DB mutex held or from // a single-threaded recovery context. class FlushScheduler { public: FlushScheduler() : head_(nullptr) {} // May be called from multiple threads at once, but not concurrent with // any other method calls on this instance void ScheduleWork(ColumnFamilyData* cfd); // Removes and returns Ref()-ed column family. Client needs to Unref(). // Filters column families that have been dropped. ColumnFamilyData* TakeNextColumnFamily(); // This can be called concurrently with ScheduleWork but it would miss all // the scheduled flushes after the last synchronization. This would result // into less precise enforcement of memtable sizes but should not matter much. bool Empty(); void Clear(); private: struct Node { ColumnFamilyData* column_family; Node* next; }; std::atomic head_; #ifndef NDEBUG std::mutex checking_mutex_; std::set checking_set_; #endif // NDEBUG }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/forward_iterator.cc000066400000000000000000000775661370372246700175430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "db/forward_iterator.h" #include #include #include #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/db_iter.h" #include "db/dbformat.h" #include "db/job_context.h" #include "db/range_del_aggregator.h" #include "db/range_tombstone_fragmenter.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/merging_iterator.h" #include "test_util/sync_point.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // Usage: // ForwardLevelIterator iter; // iter.SetFileIndex(file_index); // iter.Seek(target); // or iter.SeekToFirst(); // iter.Next() class ForwardLevelIterator : public InternalIterator { public: ForwardLevelIterator(const ColumnFamilyData* const cfd, const ReadOptions& read_options, const std::vector& files, const SliceTransform* prefix_extractor, bool allow_unprepared_value) : cfd_(cfd), read_options_(read_options), files_(files), valid_(false), file_index_(std::numeric_limits::max()), file_iter_(nullptr), pinned_iters_mgr_(nullptr), prefix_extractor_(prefix_extractor), allow_unprepared_value_(allow_unprepared_value) {} ~ForwardLevelIterator() override { // Reset current pointer if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) { pinned_iters_mgr_->PinIterator(file_iter_); } else { delete file_iter_; } } void SetFileIndex(uint32_t file_index) { assert(file_index < files_.size()); status_ = Status::OK(); if (file_index != file_index_) { file_index_ = file_index; Reset(); } } void Reset() { assert(file_index_ < files_.size()); // Reset current pointer if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) { pinned_iters_mgr_->PinIterator(file_iter_); } else { delete file_iter_; } ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(), kMaxSequenceNumber /* upper_bound */); file_iter_ = cfd_->table_cache()->NewIterator( read_options_, *(cfd_->soptions()), cfd_->internal_comparator(), *files_[file_index_], read_options_.ignore_range_deletions ? nullptr : &range_del_agg, prefix_extractor_, /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, /*max_file_size_for_l0_meta_pin=*/0, /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, allow_unprepared_value_); file_iter_->SetPinnedItersMgr(pinned_iters_mgr_); valid_ = false; if (!range_del_agg.IsEmpty()) { status_ = Status::NotSupported( "Range tombstones unsupported with ForwardIterator"); } } void SeekToLast() override { status_ = Status::NotSupported("ForwardLevelIterator::SeekToLast()"); valid_ = false; } void Prev() override { status_ = Status::NotSupported("ForwardLevelIterator::Prev()"); valid_ = false; } bool Valid() const override { return valid_; } void SeekToFirst() override { assert(file_iter_ != nullptr); if (!status_.ok()) { assert(!valid_); return; } file_iter_->SeekToFirst(); valid_ = file_iter_->Valid(); } void Seek(const Slice& internal_key) override { assert(file_iter_ != nullptr); // This deviates from the usual convention for InternalIterator::Seek() in // that it doesn't discard pre-existing error status. That's because this // Seek() is only supposed to be called immediately after SetFileIndex() // (which discards pre-existing error status), and SetFileIndex() may set // an error status, which we shouldn't discard. if (!status_.ok()) { assert(!valid_); return; } file_iter_->Seek(internal_key); valid_ = file_iter_->Valid(); } void SeekForPrev(const Slice& /*internal_key*/) override { status_ = Status::NotSupported("ForwardLevelIterator::SeekForPrev()"); valid_ = false; } void Next() override { assert(valid_); file_iter_->Next(); for (;;) { valid_ = file_iter_->Valid(); if (!file_iter_->status().ok()) { assert(!valid_); return; } if (valid_) { return; } if (file_index_ + 1 >= files_.size()) { valid_ = false; return; } SetFileIndex(file_index_ + 1); if (!status_.ok()) { assert(!valid_); return; } file_iter_->SeekToFirst(); } } Slice key() const override { assert(valid_); return file_iter_->key(); } Slice value() const override { assert(valid_); return file_iter_->value(); } Status status() const override { if (!status_.ok()) { return status_; } else if (file_iter_) { return file_iter_->status(); } return Status::OK(); } bool PrepareValue() override { assert(valid_); if (file_iter_->PrepareValue()) { return true; } assert(!file_iter_->Valid()); valid_ = false; return false; } bool IsKeyPinned() const override { return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && file_iter_->IsKeyPinned(); } bool IsValuePinned() const override { return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && file_iter_->IsValuePinned(); } void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { pinned_iters_mgr_ = pinned_iters_mgr; if (file_iter_) { file_iter_->SetPinnedItersMgr(pinned_iters_mgr_); } } private: const ColumnFamilyData* const cfd_; const ReadOptions& read_options_; const std::vector& files_; bool valid_; uint32_t file_index_; Status status_; InternalIterator* file_iter_; PinnedIteratorsManager* pinned_iters_mgr_; const SliceTransform* prefix_extractor_; const bool allow_unprepared_value_; }; ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options, ColumnFamilyData* cfd, SuperVersion* current_sv, bool allow_unprepared_value) : db_(db), read_options_(read_options), cfd_(cfd), prefix_extractor_(current_sv->mutable_cf_options.prefix_extractor.get()), user_comparator_(cfd->user_comparator()), allow_unprepared_value_(allow_unprepared_value), immutable_min_heap_(MinIterComparator(&cfd_->internal_comparator())), sv_(current_sv), mutable_iter_(nullptr), current_(nullptr), valid_(false), status_(Status::OK()), immutable_status_(Status::OK()), has_iter_trimmed_for_upper_bound_(false), current_over_upper_bound_(false), is_prev_set_(false), is_prev_inclusive_(false), pinned_iters_mgr_(nullptr) { if (sv_) { RebuildIterators(false); } } ForwardIterator::~ForwardIterator() { Cleanup(true); } void ForwardIterator::SVCleanup(DBImpl* db, SuperVersion* sv, bool background_purge_on_iterator_cleanup) { if (sv->Unref()) { // Job id == 0 means that this is not our background process, but rather // user thread JobContext job_context(0); db->mutex_.Lock(); sv->Cleanup(); db->FindObsoleteFiles(&job_context, false, true); if (background_purge_on_iterator_cleanup) { db->ScheduleBgLogWriterClose(&job_context); db->AddSuperVersionsToFreeQueue(sv); db->SchedulePurge(); } db->mutex_.Unlock(); if (!background_purge_on_iterator_cleanup) { delete sv; } if (job_context.HaveSomethingToDelete()) { db->PurgeObsoleteFiles(job_context, background_purge_on_iterator_cleanup); } job_context.Clean(); } } namespace { struct SVCleanupParams { DBImpl* db; SuperVersion* sv; bool background_purge_on_iterator_cleanup; }; } // Used in PinnedIteratorsManager to release pinned SuperVersion void ForwardIterator::DeferredSVCleanup(void* arg) { auto d = reinterpret_cast(arg); ForwardIterator::SVCleanup( d->db, d->sv, d->background_purge_on_iterator_cleanup); delete d; } void ForwardIterator::SVCleanup() { if (sv_ == nullptr) { return; } bool background_purge = read_options_.background_purge_on_iterator_cleanup || db_->immutable_db_options().avoid_unnecessary_blocking_io; if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) { // pinned_iters_mgr_ tells us to make sure that all visited key-value slices // are alive until pinned_iters_mgr_->ReleasePinnedData() is called. // The slices may point into some memtables owned by sv_, so we need to keep // sv_ referenced until pinned_iters_mgr_ unpins everything. auto p = new SVCleanupParams{db_, sv_, background_purge}; pinned_iters_mgr_->PinPtr(p, &ForwardIterator::DeferredSVCleanup); } else { SVCleanup(db_, sv_, background_purge); } } void ForwardIterator::Cleanup(bool release_sv) { if (mutable_iter_ != nullptr) { DeleteIterator(mutable_iter_, true /* is_arena */); } for (auto* m : imm_iters_) { DeleteIterator(m, true /* is_arena */); } imm_iters_.clear(); for (auto* f : l0_iters_) { DeleteIterator(f); } l0_iters_.clear(); for (auto* l : level_iters_) { DeleteIterator(l); } level_iters_.clear(); if (release_sv) { SVCleanup(); } } bool ForwardIterator::Valid() const { // See UpdateCurrent(). return valid_ ? !current_over_upper_bound_ : false; } void ForwardIterator::SeekToFirst() { if (sv_ == nullptr) { RebuildIterators(true); } else if (sv_->version_number != cfd_->GetSuperVersionNumber()) { RenewIterators(); } else if (immutable_status_.IsIncomplete()) { ResetIncompleteIterators(); } SeekInternal(Slice(), true); } bool ForwardIterator::IsOverUpperBound(const Slice& internal_key) const { return !(read_options_.iterate_upper_bound == nullptr || cfd_->internal_comparator().user_comparator()->Compare( ExtractUserKey(internal_key), *read_options_.iterate_upper_bound) < 0); } void ForwardIterator::Seek(const Slice& internal_key) { if (sv_ == nullptr) { RebuildIterators(true); } else if (sv_->version_number != cfd_->GetSuperVersionNumber()) { RenewIterators(); } else if (immutable_status_.IsIncomplete()) { ResetIncompleteIterators(); } SeekInternal(internal_key, false); } void ForwardIterator::SeekInternal(const Slice& internal_key, bool seek_to_first) { assert(mutable_iter_); // mutable seek_to_first ? mutable_iter_->SeekToFirst() : mutable_iter_->Seek(internal_key); // immutable // TODO(ljin): NeedToSeekImmutable has negative impact on performance // if it turns to need to seek immutable often. We probably want to have // an option to turn it off. if (seek_to_first || NeedToSeekImmutable(internal_key)) { immutable_status_ = Status::OK(); if (has_iter_trimmed_for_upper_bound_ && ( // prev_ is not set yet is_prev_set_ == false || // We are doing SeekToFirst() and internal_key.size() = 0 seek_to_first || // prev_key_ > internal_key cfd_->internal_comparator().InternalKeyComparator::Compare( prev_key_.GetInternalKey(), internal_key) > 0)) { // Some iterators are trimmed. Need to rebuild. RebuildIterators(true); // Already seeked mutable iter, so seek again seek_to_first ? mutable_iter_->SeekToFirst() : mutable_iter_->Seek(internal_key); } { auto tmp = MinIterHeap(MinIterComparator(&cfd_->internal_comparator())); immutable_min_heap_.swap(tmp); } for (size_t i = 0; i < imm_iters_.size(); i++) { auto* m = imm_iters_[i]; seek_to_first ? m->SeekToFirst() : m->Seek(internal_key); if (!m->status().ok()) { immutable_status_ = m->status(); } else if (m->Valid()) { immutable_min_heap_.push(m); } } Slice target_user_key; if (!seek_to_first) { target_user_key = ExtractUserKey(internal_key); } const VersionStorageInfo* vstorage = sv_->current->storage_info(); const std::vector& l0 = vstorage->LevelFiles(0); for (size_t i = 0; i < l0.size(); ++i) { if (!l0_iters_[i]) { continue; } if (seek_to_first) { l0_iters_[i]->SeekToFirst(); } else { // If the target key passes over the larget key, we are sure Next() // won't go over this file. if (user_comparator_->Compare(target_user_key, l0[i]->largest.user_key()) > 0) { if (read_options_.iterate_upper_bound != nullptr) { has_iter_trimmed_for_upper_bound_ = true; DeleteIterator(l0_iters_[i]); l0_iters_[i] = nullptr; } continue; } l0_iters_[i]->Seek(internal_key); } if (!l0_iters_[i]->status().ok()) { immutable_status_ = l0_iters_[i]->status(); } else if (l0_iters_[i]->Valid() && !IsOverUpperBound(l0_iters_[i]->key())) { immutable_min_heap_.push(l0_iters_[i]); } else { has_iter_trimmed_for_upper_bound_ = true; DeleteIterator(l0_iters_[i]); l0_iters_[i] = nullptr; } } for (int32_t level = 1; level < vstorage->num_levels(); ++level) { const std::vector& level_files = vstorage->LevelFiles(level); if (level_files.empty()) { continue; } if (level_iters_[level - 1] == nullptr) { continue; } uint32_t f_idx = 0; if (!seek_to_first) { f_idx = FindFileInRange(level_files, internal_key, 0, static_cast(level_files.size())); } // Seek if (f_idx < level_files.size()) { level_iters_[level - 1]->SetFileIndex(f_idx); seek_to_first ? level_iters_[level - 1]->SeekToFirst() : level_iters_[level - 1]->Seek(internal_key); if (!level_iters_[level - 1]->status().ok()) { immutable_status_ = level_iters_[level - 1]->status(); } else if (level_iters_[level - 1]->Valid() && !IsOverUpperBound(level_iters_[level - 1]->key())) { immutable_min_heap_.push(level_iters_[level - 1]); } else { // Nothing in this level is interesting. Remove. has_iter_trimmed_for_upper_bound_ = true; DeleteIterator(level_iters_[level - 1]); level_iters_[level - 1] = nullptr; } } } if (seek_to_first) { is_prev_set_ = false; } else { prev_key_.SetInternalKey(internal_key); is_prev_set_ = true; is_prev_inclusive_ = true; } TEST_SYNC_POINT_CALLBACK("ForwardIterator::SeekInternal:Immutable", this); } else if (current_ && current_ != mutable_iter_) { // current_ is one of immutable iterators, push it back to the heap immutable_min_heap_.push(current_); } UpdateCurrent(); TEST_SYNC_POINT_CALLBACK("ForwardIterator::SeekInternal:Return", this); } void ForwardIterator::Next() { assert(valid_); bool update_prev_key = false; if (sv_ == nullptr || sv_->version_number != cfd_->GetSuperVersionNumber()) { std::string current_key = key().ToString(); Slice old_key(current_key.data(), current_key.size()); if (sv_ == nullptr) { RebuildIterators(true); } else { RenewIterators(); } SeekInternal(old_key, false); if (!valid_ || key().compare(old_key) != 0) { return; } } else if (current_ != mutable_iter_) { // It is going to advance immutable iterator if (is_prev_set_ && prefix_extractor_) { // advance prev_key_ to current_ only if they share the same prefix update_prev_key = prefix_extractor_->Transform(prev_key_.GetUserKey()) .compare(prefix_extractor_->Transform(current_->key())) == 0; } else { update_prev_key = true; } if (update_prev_key) { prev_key_.SetInternalKey(current_->key()); is_prev_set_ = true; is_prev_inclusive_ = false; } } current_->Next(); if (current_ != mutable_iter_) { if (!current_->status().ok()) { immutable_status_ = current_->status(); } else if ((current_->Valid()) && (!IsOverUpperBound(current_->key()))) { immutable_min_heap_.push(current_); } else { if ((current_->Valid()) && (IsOverUpperBound(current_->key()))) { // remove the current iterator DeleteCurrentIter(); current_ = nullptr; } if (update_prev_key) { mutable_iter_->Seek(prev_key_.GetInternalKey()); } } } UpdateCurrent(); TEST_SYNC_POINT_CALLBACK("ForwardIterator::Next:Return", this); } Slice ForwardIterator::key() const { assert(valid_); return current_->key(); } Slice ForwardIterator::value() const { assert(valid_); return current_->value(); } Status ForwardIterator::status() const { if (!status_.ok()) { return status_; } else if (!mutable_iter_->status().ok()) { return mutable_iter_->status(); } return immutable_status_; } bool ForwardIterator::PrepareValue() { assert(valid_); if (current_->PrepareValue()) { return true; } assert(!current_->Valid()); assert(!current_->status().ok()); assert(current_ != mutable_iter_); // memtable iterator can't fail assert(immutable_status_.ok()); valid_ = false; immutable_status_ = current_->status(); return false; } Status ForwardIterator::GetProperty(std::string prop_name, std::string* prop) { assert(prop != nullptr); if (prop_name == "rocksdb.iterator.super-version-number") { *prop = ToString(sv_->version_number); return Status::OK(); } return Status::InvalidArgument(); } void ForwardIterator::SetPinnedItersMgr( PinnedIteratorsManager* pinned_iters_mgr) { pinned_iters_mgr_ = pinned_iters_mgr; UpdateChildrenPinnedItersMgr(); } void ForwardIterator::UpdateChildrenPinnedItersMgr() { // Set PinnedIteratorsManager for mutable memtable iterator. if (mutable_iter_) { mutable_iter_->SetPinnedItersMgr(pinned_iters_mgr_); } // Set PinnedIteratorsManager for immutable memtable iterators. for (InternalIterator* child_iter : imm_iters_) { if (child_iter) { child_iter->SetPinnedItersMgr(pinned_iters_mgr_); } } // Set PinnedIteratorsManager for L0 files iterators. for (InternalIterator* child_iter : l0_iters_) { if (child_iter) { child_iter->SetPinnedItersMgr(pinned_iters_mgr_); } } // Set PinnedIteratorsManager for L1+ levels iterators. for (ForwardLevelIterator* child_iter : level_iters_) { if (child_iter) { child_iter->SetPinnedItersMgr(pinned_iters_mgr_); } } } bool ForwardIterator::IsKeyPinned() const { return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && current_->IsKeyPinned(); } bool ForwardIterator::IsValuePinned() const { return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && current_->IsValuePinned(); } void ForwardIterator::RebuildIterators(bool refresh_sv) { // Clean up Cleanup(refresh_sv); if (refresh_sv) { // New sv_ = cfd_->GetReferencedSuperVersion(db_); } ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(), kMaxSequenceNumber /* upper_bound */); mutable_iter_ = sv_->mem->NewIterator(read_options_, &arena_); sv_->imm->AddIterators(read_options_, &imm_iters_, &arena_); if (!read_options_.ignore_range_deletions) { std::unique_ptr range_del_iter( sv_->mem->NewRangeTombstoneIterator( read_options_, sv_->current->version_set()->LastSequence())); range_del_agg.AddTombstones(std::move(range_del_iter)); sv_->imm->AddRangeTombstoneIterators(read_options_, &arena_, &range_del_agg); } has_iter_trimmed_for_upper_bound_ = false; const auto* vstorage = sv_->current->storage_info(); const auto& l0_files = vstorage->LevelFiles(0); l0_iters_.reserve(l0_files.size()); for (const auto* l0 : l0_files) { if ((read_options_.iterate_upper_bound != nullptr) && cfd_->internal_comparator().user_comparator()->Compare( l0->smallest.user_key(), *read_options_.iterate_upper_bound) > 0) { // No need to set has_iter_trimmed_for_upper_bound_: this ForwardIterator // will never be interested in files with smallest key above // iterate_upper_bound, since iterate_upper_bound can't be changed. l0_iters_.push_back(nullptr); continue; } l0_iters_.push_back(cfd_->table_cache()->NewIterator( read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0, read_options_.ignore_range_deletions ? nullptr : &range_del_agg, sv_->mutable_cf_options.prefix_extractor.get(), /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, MaxFileSizeForL0MetaPin(sv_->mutable_cf_options), /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, allow_unprepared_value_)); } BuildLevelIterators(vstorage); current_ = nullptr; is_prev_set_ = false; UpdateChildrenPinnedItersMgr(); if (!range_del_agg.IsEmpty()) { status_ = Status::NotSupported( "Range tombstones unsupported with ForwardIterator"); valid_ = false; } } void ForwardIterator::RenewIterators() { SuperVersion* svnew; assert(sv_); svnew = cfd_->GetReferencedSuperVersion(db_); if (mutable_iter_ != nullptr) { DeleteIterator(mutable_iter_, true /* is_arena */); } for (auto* m : imm_iters_) { DeleteIterator(m, true /* is_arena */); } imm_iters_.clear(); mutable_iter_ = svnew->mem->NewIterator(read_options_, &arena_); svnew->imm->AddIterators(read_options_, &imm_iters_, &arena_); ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(), kMaxSequenceNumber /* upper_bound */); if (!read_options_.ignore_range_deletions) { std::unique_ptr range_del_iter( svnew->mem->NewRangeTombstoneIterator( read_options_, sv_->current->version_set()->LastSequence())); range_del_agg.AddTombstones(std::move(range_del_iter)); svnew->imm->AddRangeTombstoneIterators(read_options_, &arena_, &range_del_agg); } const auto* vstorage = sv_->current->storage_info(); const auto& l0_files = vstorage->LevelFiles(0); const auto* vstorage_new = svnew->current->storage_info(); const auto& l0_files_new = vstorage_new->LevelFiles(0); size_t iold, inew; bool found; std::vector l0_iters_new; l0_iters_new.reserve(l0_files_new.size()); for (inew = 0; inew < l0_files_new.size(); inew++) { found = false; for (iold = 0; iold < l0_files.size(); iold++) { if (l0_files[iold] == l0_files_new[inew]) { found = true; break; } } if (found) { if (l0_iters_[iold] == nullptr) { l0_iters_new.push_back(nullptr); TEST_SYNC_POINT_CALLBACK("ForwardIterator::RenewIterators:Null", this); } else { l0_iters_new.push_back(l0_iters_[iold]); l0_iters_[iold] = nullptr; TEST_SYNC_POINT_CALLBACK("ForwardIterator::RenewIterators:Copy", this); } continue; } l0_iters_new.push_back(cfd_->table_cache()->NewIterator( read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0_files_new[inew], read_options_.ignore_range_deletions ? nullptr : &range_del_agg, svnew->mutable_cf_options.prefix_extractor.get(), /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, MaxFileSizeForL0MetaPin(svnew->mutable_cf_options), /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, allow_unprepared_value_)); } for (auto* f : l0_iters_) { DeleteIterator(f); } l0_iters_.clear(); l0_iters_ = l0_iters_new; for (auto* l : level_iters_) { DeleteIterator(l); } level_iters_.clear(); BuildLevelIterators(vstorage_new); current_ = nullptr; is_prev_set_ = false; SVCleanup(); sv_ = svnew; UpdateChildrenPinnedItersMgr(); if (!range_del_agg.IsEmpty()) { status_ = Status::NotSupported( "Range tombstones unsupported with ForwardIterator"); valid_ = false; } } void ForwardIterator::BuildLevelIterators(const VersionStorageInfo* vstorage) { level_iters_.reserve(vstorage->num_levels() - 1); for (int32_t level = 1; level < vstorage->num_levels(); ++level) { const auto& level_files = vstorage->LevelFiles(level); if ((level_files.empty()) || ((read_options_.iterate_upper_bound != nullptr) && (user_comparator_->Compare(*read_options_.iterate_upper_bound, level_files[0]->smallest.user_key()) < 0))) { level_iters_.push_back(nullptr); if (!level_files.empty()) { has_iter_trimmed_for_upper_bound_ = true; } } else { level_iters_.push_back(new ForwardLevelIterator( cfd_, read_options_, level_files, sv_->mutable_cf_options.prefix_extractor.get(), allow_unprepared_value_)); } } } void ForwardIterator::ResetIncompleteIterators() { const auto& l0_files = sv_->current->storage_info()->LevelFiles(0); for (size_t i = 0; i < l0_iters_.size(); ++i) { assert(i < l0_files.size()); if (!l0_iters_[i] || !l0_iters_[i]->status().IsIncomplete()) { continue; } DeleteIterator(l0_iters_[i]); l0_iters_[i] = cfd_->table_cache()->NewIterator( read_options_, *cfd_->soptions(), cfd_->internal_comparator(), *l0_files[i], /*range_del_agg=*/nullptr, sv_->mutable_cf_options.prefix_extractor.get(), /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kUserIterator, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, MaxFileSizeForL0MetaPin(sv_->mutable_cf_options), /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, allow_unprepared_value_); l0_iters_[i]->SetPinnedItersMgr(pinned_iters_mgr_); } for (auto* level_iter : level_iters_) { if (level_iter && level_iter->status().IsIncomplete()) { level_iter->Reset(); } } current_ = nullptr; is_prev_set_ = false; } void ForwardIterator::UpdateCurrent() { if (immutable_min_heap_.empty() && !mutable_iter_->Valid()) { current_ = nullptr; } else if (immutable_min_heap_.empty()) { current_ = mutable_iter_; } else if (!mutable_iter_->Valid()) { current_ = immutable_min_heap_.top(); immutable_min_heap_.pop(); } else { current_ = immutable_min_heap_.top(); assert(current_ != nullptr); assert(current_->Valid()); int cmp = cfd_->internal_comparator().InternalKeyComparator::Compare( mutable_iter_->key(), current_->key()); assert(cmp != 0); if (cmp > 0) { immutable_min_heap_.pop(); } else { current_ = mutable_iter_; } } valid_ = current_ != nullptr && immutable_status_.ok(); if (!status_.ok()) { status_ = Status::OK(); } // Upper bound doesn't apply to the memtable iterator. We want Valid() to // return false when all iterators are over iterate_upper_bound, but can't // just set valid_ to false, as that would effectively disable the tailing // optimization (Seek() would be called on all immutable iterators regardless // of whether the target key is greater than prev_key_). current_over_upper_bound_ = valid_ && IsOverUpperBound(current_->key()); } bool ForwardIterator::NeedToSeekImmutable(const Slice& target) { // We maintain the interval (prev_key_, immutable_min_heap_.top()->key()) // such that there are no records with keys within that range in // immutable_min_heap_. Since immutable structures (SST files and immutable // memtables) can't change in this version, we don't need to do a seek if // 'target' belongs to that interval (immutable_min_heap_.top() is already // at the correct position). if (!valid_ || !current_ || !is_prev_set_ || !immutable_status_.ok()) { return true; } Slice prev_key = prev_key_.GetInternalKey(); if (prefix_extractor_ && prefix_extractor_->Transform(target).compare( prefix_extractor_->Transform(prev_key)) != 0) { return true; } if (cfd_->internal_comparator().InternalKeyComparator::Compare( prev_key, target) >= (is_prev_inclusive_ ? 1 : 0)) { return true; } if (immutable_min_heap_.empty() && current_ == mutable_iter_) { // Nothing to seek on. return false; } if (cfd_->internal_comparator().InternalKeyComparator::Compare( target, current_ == mutable_iter_ ? immutable_min_heap_.top()->key() : current_->key()) > 0) { return true; } return false; } void ForwardIterator::DeleteCurrentIter() { const VersionStorageInfo* vstorage = sv_->current->storage_info(); const std::vector& l0 = vstorage->LevelFiles(0); for (size_t i = 0; i < l0.size(); ++i) { if (!l0_iters_[i]) { continue; } if (l0_iters_[i] == current_) { has_iter_trimmed_for_upper_bound_ = true; DeleteIterator(l0_iters_[i]); l0_iters_[i] = nullptr; return; } } for (int32_t level = 1; level < vstorage->num_levels(); ++level) { if (level_iters_[level - 1] == nullptr) { continue; } if (level_iters_[level - 1] == current_) { has_iter_trimmed_for_upper_bound_ = true; DeleteIterator(level_iters_[level - 1]); level_iters_[level - 1] = nullptr; } } } bool ForwardIterator::TEST_CheckDeletedIters(int* pdeleted_iters, int* pnum_iters) { bool retval = false; int deleted_iters = 0; int num_iters = 0; const VersionStorageInfo* vstorage = sv_->current->storage_info(); const std::vector& l0 = vstorage->LevelFiles(0); for (size_t i = 0; i < l0.size(); ++i) { if (!l0_iters_[i]) { retval = true; deleted_iters++; } else { num_iters++; } } for (int32_t level = 1; level < vstorage->num_levels(); ++level) { if ((level_iters_[level - 1] == nullptr) && (!vstorage->LevelFiles(level).empty())) { retval = true; deleted_iters++; } else if (!vstorage->LevelFiles(level).empty()) { num_iters++; } } if ((!retval) && num_iters <= 1) { retval = true; } if (pdeleted_iters) { *pdeleted_iters = deleted_iters; } if (pnum_iters) { *pnum_iters = num_iters; } return retval; } uint32_t ForwardIterator::FindFileInRange( const std::vector& files, const Slice& internal_key, uint32_t left, uint32_t right) { auto cmp = [&](const FileMetaData* f, const Slice& key) -> bool { return cfd_->internal_comparator().InternalKeyComparator::Compare( f->largest.Encode(), key) < 0; }; const auto &b = files.begin(); return static_cast(std::lower_bound(b + left, b + right, internal_key, cmp) - b); } void ForwardIterator::DeleteIterator(InternalIterator* iter, bool is_arena) { if (iter == nullptr) { return; } if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) { pinned_iters_mgr_->PinIterator(iter, is_arena); } else { if (is_arena) { iter->~InternalIterator(); } else { delete iter; } } } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/forward_iterator.h000066400000000000000000000132331370372246700173620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include "db/dbformat.h" #include "memory/arena.h" #include "rocksdb/db.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "table/internal_iterator.h" namespace ROCKSDB_NAMESPACE { class DBImpl; class Env; struct SuperVersion; class ColumnFamilyData; class ForwardLevelIterator; class VersionStorageInfo; struct FileMetaData; class MinIterComparator { public: explicit MinIterComparator(const Comparator* comparator) : comparator_(comparator) {} bool operator()(InternalIterator* a, InternalIterator* b) { return comparator_->Compare(a->key(), b->key()) > 0; } private: const Comparator* comparator_; }; typedef std::priority_queue, MinIterComparator> MinIterHeap; /** * ForwardIterator is a special type of iterator that only supports Seek() * and Next(). It is expected to perform better than TailingIterator by * removing the encapsulation and making all information accessible within * the iterator. At the current implementation, snapshot is taken at the * time Seek() is called. The Next() followed do not see new values after. */ class ForwardIterator : public InternalIterator { public: ForwardIterator(DBImpl* db, const ReadOptions& read_options, ColumnFamilyData* cfd, SuperVersion* current_sv = nullptr, bool allow_unprepared_value = false); virtual ~ForwardIterator(); void SeekForPrev(const Slice& /*target*/) override { status_ = Status::NotSupported("ForwardIterator::SeekForPrev()"); valid_ = false; } void SeekToLast() override { status_ = Status::NotSupported("ForwardIterator::SeekToLast()"); valid_ = false; } void Prev() override { status_ = Status::NotSupported("ForwardIterator::Prev"); valid_ = false; } virtual bool Valid() const override; void SeekToFirst() override; virtual void Seek(const Slice& target) override; virtual void Next() override; virtual Slice key() const override; virtual Slice value() const override; virtual Status status() const override; virtual bool PrepareValue() override; virtual Status GetProperty(std::string prop_name, std::string* prop) override; virtual void SetPinnedItersMgr( PinnedIteratorsManager* pinned_iters_mgr) override; virtual bool IsKeyPinned() const override; virtual bool IsValuePinned() const override; bool TEST_CheckDeletedIters(int* deleted_iters, int* num_iters); private: void Cleanup(bool release_sv); // Unreference and, if needed, clean up the current SuperVersion. This is // either done immediately or deferred until this iterator is unpinned by // PinnedIteratorsManager. void SVCleanup(); static void SVCleanup( DBImpl* db, SuperVersion* sv, bool background_purge_on_iterator_cleanup); static void DeferredSVCleanup(void* arg); void RebuildIterators(bool refresh_sv); void RenewIterators(); void BuildLevelIterators(const VersionStorageInfo* vstorage); void ResetIncompleteIterators(); void SeekInternal(const Slice& internal_key, bool seek_to_first); void UpdateCurrent(); bool NeedToSeekImmutable(const Slice& internal_key); void DeleteCurrentIter(); uint32_t FindFileInRange( const std::vector& files, const Slice& internal_key, uint32_t left, uint32_t right); bool IsOverUpperBound(const Slice& internal_key) const; // Set PinnedIteratorsManager for all children Iterators, this function should // be called whenever we update children Iterators or pinned_iters_mgr_. void UpdateChildrenPinnedItersMgr(); // A helper function that will release iter in the proper manner, or pass it // to pinned_iters_mgr_ to release it later if pinning is enabled. void DeleteIterator(InternalIterator* iter, bool is_arena = false); DBImpl* const db_; const ReadOptions read_options_; ColumnFamilyData* const cfd_; const SliceTransform* const prefix_extractor_; const Comparator* user_comparator_; const bool allow_unprepared_value_; MinIterHeap immutable_min_heap_; SuperVersion* sv_; InternalIterator* mutable_iter_; std::vector imm_iters_; std::vector l0_iters_; std::vector level_iters_; InternalIterator* current_; bool valid_; // Internal iterator status; set only by one of the unsupported methods. Status status_; // Status of immutable iterators, maintained here to avoid iterating over // all of them in status(). Status immutable_status_; // Indicates that at least one of the immutable iterators pointed to a key // larger than iterate_upper_bound and was therefore destroyed. Seek() may // need to rebuild such iterators. bool has_iter_trimmed_for_upper_bound_; // Is current key larger than iterate_upper_bound? If so, makes Valid() // return false. bool current_over_upper_bound_; // Left endpoint of the range of keys that immutable iterators currently // cover. When Seek() is called with a key that's within that range, immutable // iterators don't need to be moved; see NeedToSeekImmutable(). This key is // included in the range after a Seek(), but excluded when advancing the // iterator using Next(). IterKey prev_key_; bool is_prev_set_; bool is_prev_inclusive_; PinnedIteratorsManager* pinned_iters_mgr_; Arena arena_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/forward_iterator_bench.cc000066400000000000000000000271251370372246700206640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #if !defined(GFLAGS) || defined(ROCKSDB_LITE) #include int main() { fprintf(stderr, "Please install gflags to run rocksdb tools\n"); return 1; } #elif defined(OS_MACOSX) || defined(OS_WIN) // Block forward_iterator_bench under MAC and Windows int main() { return 0; } #else #include #include #include #include #include #include #include #include #include #include #include #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "test_util/testharness.h" #include "util/gflags_compat.h" const int MAX_SHARDS = 100000; DEFINE_int32(writers, 8, ""); DEFINE_int32(readers, 8, ""); DEFINE_int64(rate, 100000, ""); DEFINE_int64(value_size, 300, ""); DEFINE_int64(shards, 1000, ""); DEFINE_int64(memtable_size, 500000000, ""); DEFINE_int64(block_cache_size, 300000000, ""); DEFINE_int64(block_size, 65536, ""); DEFINE_double(runtime, 300.0, ""); DEFINE_bool(cache_only_first, true, ""); DEFINE_bool(iterate_upper_bound, true, ""); struct Stats { char pad1[128] __attribute__((__unused__)); std::atomic written{0}; char pad2[128] __attribute__((__unused__)); std::atomic read{0}; std::atomic cache_misses{0}; char pad3[128] __attribute__((__unused__)); } stats; struct Key { Key() {} Key(uint64_t shard_in, uint64_t seqno_in) : shard_be(htobe64(shard_in)), seqno_be(htobe64(seqno_in)) {} uint64_t shard() const { return be64toh(shard_be); } uint64_t seqno() const { return be64toh(seqno_be); } private: uint64_t shard_be; uint64_t seqno_be; } __attribute__((__packed__)); struct Reader; struct Writer; struct ShardState { char pad1[128] __attribute__((__unused__)); std::atomic last_written{0}; Writer* writer; Reader* reader; char pad2[128] __attribute__((__unused__)); std::atomic last_read{0}; std::unique_ptr it; std::unique_ptr it_cacheonly; Key upper_bound; ROCKSDB_NAMESPACE::Slice upper_bound_slice; char pad3[128] __attribute__((__unused__)); }; struct Reader { public: explicit Reader(std::vector* shard_states, ROCKSDB_NAMESPACE::DB* db) : shard_states_(shard_states), db_(db) { sem_init(&sem_, 0, 0); thread_ = port::Thread(&Reader::run, this); } void run() { while (1) { sem_wait(&sem_); if (done_.load()) { break; } uint64_t shard; { std::lock_guard guard(queue_mutex_); assert(!shards_pending_queue_.empty()); shard = shards_pending_queue_.front(); shards_pending_queue_.pop(); shards_pending_set_.reset(shard); } readOnceFromShard(shard); } } void readOnceFromShard(uint64_t shard) { ShardState& state = (*shard_states_)[shard]; if (!state.it) { // Initialize iterators ROCKSDB_NAMESPACE::ReadOptions options; options.tailing = true; if (FLAGS_iterate_upper_bound) { state.upper_bound = Key(shard, std::numeric_limits::max()); state.upper_bound_slice = ROCKSDB_NAMESPACE::Slice( (const char*)&state.upper_bound, sizeof(state.upper_bound)); options.iterate_upper_bound = &state.upper_bound_slice; } state.it.reset(db_->NewIterator(options)); if (FLAGS_cache_only_first) { options.read_tier = ROCKSDB_NAMESPACE::ReadTier::kBlockCacheTier; state.it_cacheonly.reset(db_->NewIterator(options)); } } const uint64_t upto = state.last_written.load(); for (ROCKSDB_NAMESPACE::Iterator* it : {state.it_cacheonly.get(), state.it.get()}) { if (it == nullptr) { continue; } if (state.last_read.load() >= upto) { break; } bool need_seek = true; for (uint64_t seq = state.last_read.load() + 1; seq <= upto; ++seq) { if (need_seek) { Key from(shard, state.last_read.load() + 1); it->Seek(ROCKSDB_NAMESPACE::Slice((const char*)&from, sizeof(from))); need_seek = false; } else { it->Next(); } if (it->status().IsIncomplete()) { ++::stats.cache_misses; break; } assert(it->Valid()); assert(it->key().size() == sizeof(Key)); Key key; memcpy(&key, it->key().data(), it->key().size()); // fprintf(stderr, "Expecting (%ld, %ld) read (%ld, %ld)\n", // shard, seq, key.shard(), key.seqno()); assert(key.shard() == shard); assert(key.seqno() == seq); state.last_read.store(seq); ++::stats.read; } } } void onWrite(uint64_t shard) { { std::lock_guard guard(queue_mutex_); if (!shards_pending_set_.test(shard)) { shards_pending_queue_.push(shard); shards_pending_set_.set(shard); sem_post(&sem_); } } } ~Reader() { done_.store(true); sem_post(&sem_); thread_.join(); } private: char pad1[128] __attribute__((__unused__)); std::vector* shard_states_; ROCKSDB_NAMESPACE::DB* db_; ROCKSDB_NAMESPACE::port::Thread thread_; sem_t sem_; std::mutex queue_mutex_; std::bitset shards_pending_set_; std::queue shards_pending_queue_; std::atomic done_{false}; char pad2[128] __attribute__((__unused__)); }; struct Writer { explicit Writer(std::vector* shard_states, ROCKSDB_NAMESPACE::DB* db) : shard_states_(shard_states), db_(db) {} void start() { thread_ = port::Thread(&Writer::run, this); } void run() { std::queue workq; std::chrono::steady_clock::time_point deadline( std::chrono::steady_clock::now() + std::chrono::nanoseconds((uint64_t)(1000000000 * FLAGS_runtime))); std::vector my_shards; for (int i = 1; i <= FLAGS_shards; ++i) { if ((*shard_states_)[i].writer == this) { my_shards.push_back(i); } } std::mt19937 rng{std::random_device()()}; std::uniform_int_distribution shard_dist( 0, static_cast(my_shards.size()) - 1); std::string value(FLAGS_value_size, '*'); while (1) { auto now = std::chrono::steady_clock::now(); if (FLAGS_runtime >= 0 && now >= deadline) { break; } if (workq.empty()) { for (int i = 0; i < FLAGS_rate; i += FLAGS_writers) { std::chrono::nanoseconds offset(1000000000LL * i / FLAGS_rate); workq.push(now + offset); } } while (!workq.empty() && workq.front() < now) { workq.pop(); uint64_t shard = my_shards[shard_dist(rng)]; ShardState& state = (*shard_states_)[shard]; uint64_t seqno = state.last_written.load() + 1; Key key(shard, seqno); // fprintf(stderr, "Writing (%ld, %ld)\n", shard, seqno); ROCKSDB_NAMESPACE::Status status = db_->Put(ROCKSDB_NAMESPACE::WriteOptions(), ROCKSDB_NAMESPACE::Slice((const char*)&key, sizeof(key)), ROCKSDB_NAMESPACE::Slice(value)); assert(status.ok()); state.last_written.store(seqno); state.reader->onWrite(shard); ++::stats.written; } std::this_thread::sleep_for(std::chrono::milliseconds(1)); } // fprintf(stderr, "Writer done\n"); } ~Writer() { thread_.join(); } private: char pad1[128] __attribute__((__unused__)); std::vector* shard_states_; ROCKSDB_NAMESPACE::DB* db_; ROCKSDB_NAMESPACE::port::Thread thread_; char pad2[128] __attribute__((__unused__)); }; struct StatsThread { explicit StatsThread(ROCKSDB_NAMESPACE::DB* db) : db_(db), thread_(&StatsThread::run, this) {} void run() { // using namespace std::chrono; auto tstart = std::chrono::steady_clock::now(), tlast = tstart; uint64_t wlast = 0, rlast = 0; while (!done_.load()) { { std::unique_lock lock(cvm_); cv_.wait_for(lock, std::chrono::seconds(1)); } auto now = std::chrono::steady_clock::now(); double elapsed = std::chrono::duration_cast >( now - tlast).count(); uint64_t w = ::stats.written.load(); uint64_t r = ::stats.read.load(); fprintf(stderr, "%s elapsed %4lds | written %10ld | w/s %10.0f | read %10ld | " "r/s %10.0f | cache misses %10ld\n", db_->GetEnv()->TimeToString(time(nullptr)).c_str(), std::chrono::duration_cast(now - tstart) .count(), w, (w - wlast) / elapsed, r, (r - rlast) / elapsed, ::stats.cache_misses.load()); wlast = w; rlast = r; tlast = now; } } ~StatsThread() { { std::lock_guard guard(cvm_); done_.store(true); } cv_.notify_all(); thread_.join(); } private: ROCKSDB_NAMESPACE::DB* db_; std::mutex cvm_; std::condition_variable cv_; ROCKSDB_NAMESPACE::port::Thread thread_; std::atomic done_{false}; }; int main(int argc, char** argv) { GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true); std::mt19937 rng{std::random_device()()}; ROCKSDB_NAMESPACE::Status status; std::string path = ROCKSDB_NAMESPACE::test::PerThreadDBPath("forward_iterator_test"); fprintf(stderr, "db path is %s\n", path.c_str()); ROCKSDB_NAMESPACE::Options options; options.create_if_missing = true; options.compression = ROCKSDB_NAMESPACE::CompressionType::kNoCompression; options.compaction_style = ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone; options.level0_slowdown_writes_trigger = 99999; options.level0_stop_writes_trigger = 99999; options.use_direct_io_for_flush_and_compaction = true; options.write_buffer_size = FLAGS_memtable_size; ROCKSDB_NAMESPACE::BlockBasedTableOptions table_options; table_options.block_cache = ROCKSDB_NAMESPACE::NewLRUCache(FLAGS_block_cache_size); table_options.block_size = FLAGS_block_size; options.table_factory.reset( ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(table_options)); status = ROCKSDB_NAMESPACE::DestroyDB(path, options); assert(status.ok()); ROCKSDB_NAMESPACE::DB* db_raw; status = ROCKSDB_NAMESPACE::DB::Open(options, path, &db_raw); assert(status.ok()); std::unique_ptr db(db_raw); std::vector shard_states(FLAGS_shards + 1); std::deque readers; while (static_cast(readers.size()) < FLAGS_readers) { readers.emplace_back(&shard_states, db_raw); } std::deque writers; while (static_cast(writers.size()) < FLAGS_writers) { writers.emplace_back(&shard_states, db_raw); } // Each shard gets a random reader and random writer assigned to it for (int i = 1; i <= FLAGS_shards; ++i) { std::uniform_int_distribution reader_dist(0, FLAGS_readers - 1); std::uniform_int_distribution writer_dist(0, FLAGS_writers - 1); shard_states[i].reader = &readers[reader_dist(rng)]; shard_states[i].writer = &writers[writer_dist(rng)]; } StatsThread stats_thread(db_raw); for (Writer& w : writers) { w.start(); } writers.clear(); readers.clear(); } #endif // !defined(GFLAGS) || defined(ROCKSDB_LITE) rocksdb-6.11.4/db/import_column_family_job.cc000066400000000000000000000225611370372246700212310ustar00rootroot00000000000000#ifndef ROCKSDB_LITE #include "db/import_column_family_job.h" #include #include #include #include #include "db/version_edit.h" #include "file/file_util.h" #include "file/random_access_file_reader.h" #include "table/merging_iterator.h" #include "table/scoped_arena_iterator.h" #include "table/sst_file_writer_collectors.h" #include "table/table_builder.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number, SuperVersion* sv) { Status status; // Read the information of files we are importing for (const auto& file_metadata : metadata_) { const auto file_path = file_metadata.db_path + "/" + file_metadata.name; IngestedFileInfo file_to_import; status = GetIngestedFileInfo(file_path, &file_to_import, sv); if (!status.ok()) { return status; } files_to_import_.push_back(file_to_import); } auto num_files = files_to_import_.size(); if (num_files == 0) { return Status::InvalidArgument("The list of files is empty"); } else if (num_files > 1) { // Verify that passed files don't have overlapping ranges in any particular // level. int min_level = 1; // Check for overlaps in Level 1 and above. int max_level = -1; for (const auto& file_metadata : metadata_) { if (file_metadata.level > max_level) { max_level = file_metadata.level; } } for (int level = min_level; level <= max_level; ++level) { autovector sorted_files; for (size_t i = 0; i < num_files; i++) { if (metadata_[i].level == level) { sorted_files.push_back(&files_to_import_[i]); } } std::sort( sorted_files.begin(), sorted_files.end(), [this](const IngestedFileInfo* info1, const IngestedFileInfo* info2) { return cfd_->internal_comparator().Compare( info1->smallest_internal_key, info2->smallest_internal_key) < 0; }); for (size_t i = 0; i + 1 < sorted_files.size(); i++) { if (cfd_->internal_comparator().Compare( sorted_files[i]->largest_internal_key, sorted_files[i + 1]->smallest_internal_key) >= 0) { return Status::InvalidArgument("Files have overlapping ranges"); } } } } for (const auto& f : files_to_import_) { if (f.num_entries == 0) { return Status::InvalidArgument("File contain no entries"); } if (!f.smallest_internal_key.Valid() || !f.largest_internal_key.Valid()) { return Status::Corruption("File has corrupted keys"); } } // Copy/Move external files into DB auto hardlink_files = import_options_.move_files; for (auto& f : files_to_import_) { f.fd = FileDescriptor(next_file_number++, 0, f.file_size); const auto path_outside_db = f.external_file_path; const auto path_inside_db = TableFileName( cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId()); if (hardlink_files) { status = fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr); if (status.IsNotSupported()) { // Original file is on a different FS, use copy instead of hard linking hardlink_files = false; } } if (!hardlink_files) { status = CopyFile(fs_, path_outside_db, path_inside_db, 0, db_options_.use_fsync); } if (!status.ok()) { break; } f.copy_file = !hardlink_files; f.internal_file_path = path_inside_db; } if (!status.ok()) { // We failed, remove all files that we copied into the db for (const auto& f : files_to_import_) { if (f.internal_file_path.empty()) { break; } const auto s = fs_->DeleteFile(f.internal_file_path, IOOptions(), nullptr); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "AddFile() clean up for file %s failed : %s", f.internal_file_path.c_str(), s.ToString().c_str()); } } } return status; } // REQUIRES: we have become the only writer by entering both write_thread_ and // nonmem_write_thread_ Status ImportColumnFamilyJob::Run() { Status status; edit_.SetColumnFamily(cfd_->GetID()); // We use the import time as the ancester time. This is the time the data // is written to the database. int64_t temp_current_time = 0; uint64_t oldest_ancester_time = kUnknownOldestAncesterTime; uint64_t current_time = kUnknownOldestAncesterTime; if (env_->GetCurrentTime(&temp_current_time).ok()) { current_time = oldest_ancester_time = static_cast(temp_current_time); } for (size_t i = 0; i < files_to_import_.size(); ++i) { const auto& f = files_to_import_[i]; const auto& file_metadata = metadata_[i]; edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(), f.smallest_internal_key, f.largest_internal_key, file_metadata.smallest_seqno, file_metadata.largest_seqno, false, kInvalidBlobFileNumber, oldest_ancester_time, current_time, kUnknownFileChecksum, kUnknownFileChecksumFuncName); // If incoming sequence number is higher, update local sequence number. if (file_metadata.largest_seqno > versions_->LastSequence()) { versions_->SetLastAllocatedSequence(file_metadata.largest_seqno); versions_->SetLastPublishedSequence(file_metadata.largest_seqno); versions_->SetLastSequence(file_metadata.largest_seqno); } } return status; } void ImportColumnFamilyJob::Cleanup(const Status& status) { if (!status.ok()) { // We failed to add files to the database remove all the files we copied. for (const auto& f : files_to_import_) { const auto s = fs_->DeleteFile(f.internal_file_path, IOOptions(), nullptr); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "AddFile() clean up for file %s failed : %s", f.internal_file_path.c_str(), s.ToString().c_str()); } } } else if (status.ok() && import_options_.move_files) { // The files were moved and added successfully, remove original file links for (IngestedFileInfo& f : files_to_import_) { const auto s = fs_->DeleteFile(f.external_file_path, IOOptions(), nullptr); if (!s.ok()) { ROCKS_LOG_WARN( db_options_.info_log, "%s was added to DB successfully but failed to remove original " "file link : %s", f.external_file_path.c_str(), s.ToString().c_str()); } } } } Status ImportColumnFamilyJob::GetIngestedFileInfo( const std::string& external_file, IngestedFileInfo* file_to_import, SuperVersion* sv) { file_to_import->external_file_path = external_file; // Get external file size Status status = fs_->GetFileSize(external_file, IOOptions(), &file_to_import->file_size, nullptr); if (!status.ok()) { return status; } // Create TableReader for external file std::unique_ptr table_reader; std::unique_ptr sst_file; std::unique_ptr sst_file_reader; status = fs_->NewRandomAccessFile(external_file, env_options_, &sst_file, nullptr); if (!status.ok()) { return status; } sst_file_reader.reset( new RandomAccessFileReader(std::move(sst_file), external_file)); status = cfd_->ioptions()->table_factory->NewTableReader( TableReaderOptions(*cfd_->ioptions(), sv->mutable_cf_options.prefix_extractor.get(), env_options_, cfd_->internal_comparator()), std::move(sst_file_reader), file_to_import->file_size, &table_reader); if (!status.ok()) { return status; } // Get the external file properties auto props = table_reader->GetTableProperties(); // Set original_seqno to 0. file_to_import->original_seqno = 0; // Get number of entries in table file_to_import->num_entries = props->num_entries; ParsedInternalKey key; ReadOptions ro; // During reading the external file we can cache blocks that we read into // the block cache, if we later change the global seqno of this file, we will // have block in cache that will include keys with wrong seqno. // We need to disable fill_cache so that we read from the file without // updating the block cache. ro.fill_cache = false; std::unique_ptr iter(table_reader->NewIterator( ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kExternalSSTIngestion)); // Get first (smallest) key from file iter->SeekToFirst(); if (!ParseInternalKey(iter->key(), &key)) { return Status::Corruption("external file have corrupted keys"); } file_to_import->smallest_internal_key.SetFrom(key); // Get last (largest) key from file iter->SeekToLast(); if (!ParseInternalKey(iter->key(), &key)) { return Status::Corruption("external file have corrupted keys"); } file_to_import->largest_internal_key.SetFrom(key); file_to_import->cf_id = static_cast(props->column_family_id); file_to_import->table_properties = *props; return status; } } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/import_column_family_job.h000066400000000000000000000043471370372246700210750ustar00rootroot00000000000000#pragma once #include #include #include #include "db/column_family.h" #include "db/dbformat.h" #include "db/external_sst_file_ingestion_job.h" #include "db/snapshot_impl.h" #include "options/db_options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/metadata.h" #include "rocksdb/sst_file_writer.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { // Imports a set of sst files as is into a new column family. Logic is similar // to ExternalSstFileIngestionJob. class ImportColumnFamilyJob { public: ImportColumnFamilyJob(Env* env, VersionSet* versions, ColumnFamilyData* cfd, const ImmutableDBOptions& db_options, const EnvOptions& env_options, const ImportColumnFamilyOptions& import_options, const std::vector& metadata) : env_(env), versions_(versions), cfd_(cfd), db_options_(db_options), fs_(db_options_.fs.get()), env_options_(env_options), import_options_(import_options), metadata_(metadata) {} // Prepare the job by copying external files into the DB. Status Prepare(uint64_t next_file_number, SuperVersion* sv); // Will execute the import job and prepare edit() to be applied. // REQUIRES: Mutex held Status Run(); // Cleanup after successful/failed job void Cleanup(const Status& status); VersionEdit* edit() { return &edit_; } const autovector& files_to_import() const { return files_to_import_; } private: // Open the external file and populate `file_to_import` with all the // external information we need to import this file. Status GetIngestedFileInfo(const std::string& external_file, IngestedFileInfo* file_to_import, SuperVersion* sv); Env* env_; VersionSet* versions_; ColumnFamilyData* cfd_; const ImmutableDBOptions& db_options_; FileSystem* fs_; const EnvOptions& env_options_; autovector files_to_import_; VersionEdit edit_; const ImportColumnFamilyOptions& import_options_; std::vector metadata_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/import_column_family_test.cc000066400000000000000000000524601370372246700214370ustar00rootroot00000000000000#ifndef ROCKSDB_LITE #include #include "db/db_test_util.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/sst_file_writer.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class ImportColumnFamilyTest : public DBTestBase { public: ImportColumnFamilyTest() : DBTestBase("/import_column_family_test") { sst_files_dir_ = dbname_ + "/sst_files/"; DestroyAndRecreateExternalSSTFilesDir(); export_files_dir_ = test::PerThreadDBPath(env_, "export"); import_cfh_ = nullptr; import_cfh2_ = nullptr; metadata_ptr_ = nullptr; } ~ImportColumnFamilyTest() { if (import_cfh_) { db_->DropColumnFamily(import_cfh_); db_->DestroyColumnFamilyHandle(import_cfh_); import_cfh_ = nullptr; } if (import_cfh2_) { db_->DropColumnFamily(import_cfh2_); db_->DestroyColumnFamilyHandle(import_cfh2_); import_cfh2_ = nullptr; } if (metadata_ptr_) { delete metadata_ptr_; metadata_ptr_ = nullptr; } test::DestroyDir(env_, sst_files_dir_); test::DestroyDir(env_, export_files_dir_); } void DestroyAndRecreateExternalSSTFilesDir() { test::DestroyDir(env_, sst_files_dir_); env_->CreateDir(sst_files_dir_); test::DestroyDir(env_, export_files_dir_); } LiveFileMetaData LiveFileMetaDataInit(std::string name, std::string path, int level, SequenceNumber smallest_seqno, SequenceNumber largest_seqno) { LiveFileMetaData metadata; metadata.name = name; metadata.db_path = path; metadata.smallest_seqno = smallest_seqno; metadata.largest_seqno = largest_seqno; metadata.level = level; return metadata; } protected: std::string sst_files_dir_; std::string export_files_dir_; ColumnFamilyHandle* import_cfh_; ColumnFamilyHandle* import_cfh2_; ExportImportFilesMetaData* metadata_ptr_; }; TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFiles) { Options options = CurrentOptions(); CreateAndReopenWithCF({"koko"}, options); SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); SstFileWriter sfw_unknown(EnvOptions(), options); // cf1.sst const std::string cf1_sst_name = "cf1.sst"; const std::string cf1_sst = sst_files_dir_ + cf1_sst_name; ASSERT_OK(sfw_cf1.Open(cf1_sst)); ASSERT_OK(sfw_cf1.Put("K1", "V1")); ASSERT_OK(sfw_cf1.Put("K2", "V2")); ASSERT_OK(sfw_cf1.Finish()); // cf_unknown.sst const std::string unknown_sst_name = "cf_unknown.sst"; const std::string unknown_sst = sst_files_dir_ + unknown_sst_name; ASSERT_OK(sfw_unknown.Open(unknown_sst)); ASSERT_OK(sfw_unknown.Put("K3", "V1")); ASSERT_OK(sfw_unknown.Put("K4", "V2")); ASSERT_OK(sfw_unknown.Finish()); { // Import sst file corresponding to cf1 onto a new cf and verify ExportImportFilesMetaData metadata; metadata.files.push_back( LiveFileMetaDataInit(cf1_sst_name, sst_files_dir_, 0, 10, 19)); metadata.db_comparator_name = options.comparator->Name(); ASSERT_OK(db_->CreateColumnFamilyWithImport( options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_)); ASSERT_NE(import_cfh_, nullptr); std::string value; db_->Get(ReadOptions(), import_cfh_, "K1", &value); ASSERT_EQ(value, "V1"); db_->Get(ReadOptions(), import_cfh_, "K2", &value); ASSERT_EQ(value, "V2"); ASSERT_OK(db_->DropColumnFamily(import_cfh_)); ASSERT_OK(db_->DestroyColumnFamilyHandle(import_cfh_)); import_cfh_ = nullptr; } { // Import sst file corresponding to unknown cf onto a new cf and verify ExportImportFilesMetaData metadata; metadata.files.push_back( LiveFileMetaDataInit(unknown_sst_name, sst_files_dir_, 0, 20, 29)); metadata.db_comparator_name = options.comparator->Name(); ASSERT_OK(db_->CreateColumnFamilyWithImport( options, "yoyo", ImportColumnFamilyOptions(), metadata, &import_cfh_)); ASSERT_NE(import_cfh_, nullptr); std::string value; db_->Get(ReadOptions(), import_cfh_, "K3", &value); ASSERT_EQ(value, "V1"); db_->Get(ReadOptions(), import_cfh_, "K4", &value); ASSERT_EQ(value, "V2"); } } TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFilesWithOverlap) { Options options = CurrentOptions(); CreateAndReopenWithCF({"koko"}, options); SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); // file3.sst const std::string file3_sst_name = "file3.sst"; const std::string file3_sst = sst_files_dir_ + file3_sst_name; ASSERT_OK(sfw_cf1.Open(file3_sst)); for (int i = 0; i < 100; ++i) { sfw_cf1.Put(Key(i), Key(i) + "_val"); } ASSERT_OK(sfw_cf1.Finish()); // file2.sst const std::string file2_sst_name = "file2.sst"; const std::string file2_sst = sst_files_dir_ + file2_sst_name; ASSERT_OK(sfw_cf1.Open(file2_sst)); for (int i = 0; i < 100; i += 2) { sfw_cf1.Put(Key(i), Key(i) + "_overwrite1"); } ASSERT_OK(sfw_cf1.Finish()); // file1a.sst const std::string file1a_sst_name = "file1a.sst"; const std::string file1a_sst = sst_files_dir_ + file1a_sst_name; ASSERT_OK(sfw_cf1.Open(file1a_sst)); for (int i = 0; i < 52; i += 4) { sfw_cf1.Put(Key(i), Key(i) + "_overwrite2"); } ASSERT_OK(sfw_cf1.Finish()); // file1b.sst const std::string file1b_sst_name = "file1b.sst"; const std::string file1b_sst = sst_files_dir_ + file1b_sst_name; ASSERT_OK(sfw_cf1.Open(file1b_sst)); for (int i = 52; i < 100; i += 4) { sfw_cf1.Put(Key(i), Key(i) + "_overwrite2"); } ASSERT_OK(sfw_cf1.Finish()); // file0a.sst const std::string file0a_sst_name = "file0a.sst"; const std::string file0a_sst = sst_files_dir_ + file0a_sst_name; ASSERT_OK(sfw_cf1.Open(file0a_sst)); for (int i = 0; i < 100; i += 16) { sfw_cf1.Put(Key(i), Key(i) + "_overwrite3"); } ASSERT_OK(sfw_cf1.Finish()); // file0b.sst const std::string file0b_sst_name = "file0b.sst"; const std::string file0b_sst = sst_files_dir_ + file0b_sst_name; ASSERT_OK(sfw_cf1.Open(file0b_sst)); for (int i = 0; i < 100; i += 16) { sfw_cf1.Put(Key(i), Key(i) + "_overwrite4"); } ASSERT_OK(sfw_cf1.Finish()); // Import sst files and verify ExportImportFilesMetaData metadata; metadata.files.push_back( LiveFileMetaDataInit(file3_sst_name, sst_files_dir_, 3, 10, 19)); metadata.files.push_back( LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 2, 20, 29)); metadata.files.push_back( LiveFileMetaDataInit(file1a_sst_name, sst_files_dir_, 1, 30, 34)); metadata.files.push_back( LiveFileMetaDataInit(file1b_sst_name, sst_files_dir_, 1, 35, 39)); metadata.files.push_back( LiveFileMetaDataInit(file0a_sst_name, sst_files_dir_, 0, 40, 49)); metadata.files.push_back( LiveFileMetaDataInit(file0b_sst_name, sst_files_dir_, 0, 50, 59)); metadata.db_comparator_name = options.comparator->Name(); ASSERT_OK(db_->CreateColumnFamilyWithImport( options, "toto", ImportColumnFamilyOptions(), metadata, &import_cfh_)); ASSERT_NE(import_cfh_, nullptr); for (int i = 0; i < 100; i++) { std::string value; db_->Get(ReadOptions(), import_cfh_, Key(i), &value); if (i % 16 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite4"); } else if (i % 4 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite2"); } else if (i % 2 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite1"); } else { ASSERT_EQ(value, Key(i) + "_val"); } } for (int i = 0; i < 100; i += 5) { ASSERT_OK( db_->Put(WriteOptions(), import_cfh_, Key(i), Key(i) + "_overwrite5")); } // Flush and check again ASSERT_OK(db_->Flush(FlushOptions(), import_cfh_)); for (int i = 0; i < 100; i++) { std::string value; db_->Get(ReadOptions(), import_cfh_, Key(i), &value); if (i % 5 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite5"); } else if (i % 16 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite4"); } else if (i % 4 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite2"); } else if (i % 2 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite1"); } else { ASSERT_EQ(value, Key(i) + "_val"); } } // Compact and check again. ASSERT_OK( db_->CompactRange(CompactRangeOptions(), import_cfh_, nullptr, nullptr)); for (int i = 0; i < 100; i++) { std::string value; db_->Get(ReadOptions(), import_cfh_, Key(i), &value); if (i % 5 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite5"); } else if (i % 16 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite4"); } else if (i % 4 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite2"); } else if (i % 2 == 0) { ASSERT_EQ(value, Key(i) + "_overwrite1"); } else { ASSERT_EQ(value, Key(i) + "_val"); } } } TEST_F(ImportColumnFamilyTest, ImportExportedSSTFromAnotherCF) { Options options = CurrentOptions(); CreateAndReopenWithCF({"koko"}, options); for (int i = 0; i < 100; ++i) { Put(1, Key(i), Key(i) + "_val"); } ASSERT_OK(Flush(1)); ASSERT_OK( db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); // Overwrite the value in the same set of keys. for (int i = 0; i < 100; ++i) { Put(1, Key(i), Key(i) + "_overwrite"); } // Flush to create L0 file. ASSERT_OK(Flush(1)); for (int i = 0; i < 100; ++i) { Put(1, Key(i), Key(i) + "_overwrite2"); } // Flush again to create another L0 file. It should have higher sequencer. ASSERT_OK(Flush(1)); Checkpoint* checkpoint; ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_, &metadata_ptr_)); ASSERT_NE(metadata_ptr_, nullptr); delete checkpoint; ImportColumnFamilyOptions import_options; import_options.move_files = false; ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "toto", import_options, *metadata_ptr_, &import_cfh_)); ASSERT_NE(import_cfh_, nullptr); import_options.move_files = true; ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "yoyo", import_options, *metadata_ptr_, &import_cfh2_)); ASSERT_NE(import_cfh2_, nullptr); delete metadata_ptr_; metadata_ptr_ = NULL; std::string value1, value2; for (int i = 0; i < 100; ++i) { db_->Get(ReadOptions(), import_cfh_, Key(i), &value1); ASSERT_EQ(Get(1, Key(i)), value1); } for (int i = 0; i < 100; ++i) { db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2); ASSERT_EQ(Get(1, Key(i)), value2); } // Modify keys in cf1 and verify. for (int i = 0; i < 25; i++) { ASSERT_OK(db_->Delete(WriteOptions(), import_cfh_, Key(i))); } for (int i = 25; i < 50; i++) { ASSERT_OK( db_->Put(WriteOptions(), import_cfh_, Key(i), Key(i) + "_overwrite3")); } for (int i = 0; i < 25; ++i) { ASSERT_TRUE( db_->Get(ReadOptions(), import_cfh_, Key(i), &value1).IsNotFound()); } for (int i = 25; i < 50; ++i) { db_->Get(ReadOptions(), import_cfh_, Key(i), &value1); ASSERT_EQ(Key(i) + "_overwrite3", value1); } for (int i = 50; i < 100; ++i) { db_->Get(ReadOptions(), import_cfh_, Key(i), &value1); ASSERT_EQ(Key(i) + "_overwrite2", value1); } for (int i = 0; i < 100; ++i) { db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2); ASSERT_EQ(Get(1, Key(i)), value2); } // Compact and check again. ASSERT_OK(db_->Flush(FlushOptions(), import_cfh_)); ASSERT_OK( db_->CompactRange(CompactRangeOptions(), import_cfh_, nullptr, nullptr)); for (int i = 0; i < 25; ++i) { ASSERT_TRUE( db_->Get(ReadOptions(), import_cfh_, Key(i), &value1).IsNotFound()); } for (int i = 25; i < 50; ++i) { db_->Get(ReadOptions(), import_cfh_, Key(i), &value1); ASSERT_EQ(Key(i) + "_overwrite3", value1); } for (int i = 50; i < 100; ++i) { db_->Get(ReadOptions(), import_cfh_, Key(i), &value1); ASSERT_EQ(Key(i) + "_overwrite2", value1); } for (int i = 0; i < 100; ++i) { db_->Get(ReadOptions(), import_cfh2_, Key(i), &value2); ASSERT_EQ(Get(1, Key(i)), value2); } } TEST_F(ImportColumnFamilyTest, ImportExportedSSTFromAnotherDB) { Options options = CurrentOptions(); CreateAndReopenWithCF({"koko"}, options); for (int i = 0; i < 100; ++i) { Put(1, Key(i), Key(i) + "_val"); } ASSERT_OK(Flush(1)); // Compact to create a L1 file. ASSERT_OK( db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); // Overwrite the value in the same set of keys. for (int i = 0; i < 50; ++i) { Put(1, Key(i), Key(i) + "_overwrite"); } // Flush to create L0 file. ASSERT_OK(Flush(1)); for (int i = 0; i < 25; ++i) { Put(1, Key(i), Key(i) + "_overwrite2"); } // Flush again to create another L0 file. It should have higher sequencer. ASSERT_OK(Flush(1)); Checkpoint* checkpoint; ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_, &metadata_ptr_)); ASSERT_NE(metadata_ptr_, nullptr); delete checkpoint; // Create a new db and import the files. DB* db_copy; test::DestroyDir(env_, dbname_ + "/db_copy"); ASSERT_OK(DB::Open(options, dbname_ + "/db_copy", &db_copy)); ColumnFamilyHandle* cfh = nullptr; ASSERT_OK(db_copy->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", ImportColumnFamilyOptions(), *metadata_ptr_, &cfh)); ASSERT_NE(cfh, nullptr); for (int i = 0; i < 100; ++i) { std::string value; db_copy->Get(ReadOptions(), cfh, Key(i), &value); ASSERT_EQ(Get(1, Key(i)), value); } db_copy->DropColumnFamily(cfh); db_copy->DestroyColumnFamilyHandle(cfh); delete db_copy; test::DestroyDir(env_, dbname_ + "/db_copy"); } TEST_F(ImportColumnFamilyTest, LevelFilesOverlappingAtEndpoints) { // Imports a column family containing a level where two files overlap at their // endpoints. "Overlap" means the largest user key in one file is the same as // the smallest user key in the second file. const int kFileBytes = 128 << 10; // 128KB const int kValueBytes = 1 << 10; // 1KB const int kNumFiles = 4; Options options = CurrentOptions(); options.disable_auto_compactions = true; options.num_levels = 2; CreateAndReopenWithCF({"koko"}, options); Random rnd(301); // Every key is snapshot protected to ensure older versions will not be // dropped during compaction. std::vector snapshots; snapshots.reserve(kFileBytes / kValueBytes * kNumFiles); for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kFileBytes / kValueBytes; ++j) { auto value = RandomString(&rnd, kValueBytes); ASSERT_OK(Put(1, "key", value)); snapshots.push_back(db_->GetSnapshot()); } ASSERT_OK(Flush(1)); } // Compact to create overlapping L1 files. ASSERT_OK( db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); ASSERT_GT(NumTableFilesAtLevel(1, 1), 1); Checkpoint* checkpoint; ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir_, &metadata_ptr_)); ASSERT_NE(metadata_ptr_, nullptr); delete checkpoint; // Create a new db and import the files. DB* db_copy; test::DestroyDir(env_, dbname_ + "/db_copy"); ASSERT_OK(DB::Open(options, dbname_ + "/db_copy", &db_copy)); ColumnFamilyHandle* cfh = nullptr; ASSERT_OK(db_copy->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", ImportColumnFamilyOptions(), *metadata_ptr_, &cfh)); ASSERT_NE(cfh, nullptr); { std::string value; ASSERT_OK(db_copy->Get(ReadOptions(), cfh, "key", &value)); } db_copy->DropColumnFamily(cfh); db_copy->DestroyColumnFamilyHandle(cfh); delete db_copy; test::DestroyDir(env_, dbname_ + "/db_copy"); for (const Snapshot* snapshot : snapshots) { db_->ReleaseSnapshot(snapshot); } } TEST_F(ImportColumnFamilyTest, ImportColumnFamilyNegativeTest) { Options options = CurrentOptions(); CreateAndReopenWithCF({"koko"}, options); { // Create column family with existing cf name. ExportImportFilesMetaData metadata; ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "koko", ImportColumnFamilyOptions(), metadata, &import_cfh_), Status::InvalidArgument("Column family already exists")); ASSERT_EQ(import_cfh_, nullptr); } { // Import with no files specified. ExportImportFilesMetaData metadata; ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", ImportColumnFamilyOptions(), metadata, &import_cfh_), Status::InvalidArgument("The list of files is empty")); ASSERT_EQ(import_cfh_, nullptr); } { // Import with overlapping keys in sst files. ExportImportFilesMetaData metadata; SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); const std::string file1_sst_name = "file1.sst"; const std::string file1_sst = sst_files_dir_ + file1_sst_name; ASSERT_OK(sfw_cf1.Open(file1_sst)); ASSERT_OK(sfw_cf1.Put("K1", "V1")); ASSERT_OK(sfw_cf1.Put("K2", "V2")); ASSERT_OK(sfw_cf1.Finish()); const std::string file2_sst_name = "file2.sst"; const std::string file2_sst = sst_files_dir_ + file2_sst_name; ASSERT_OK(sfw_cf1.Open(file2_sst)); ASSERT_OK(sfw_cf1.Put("K2", "V2")); ASSERT_OK(sfw_cf1.Put("K3", "V3")); ASSERT_OK(sfw_cf1.Finish()); metadata.files.push_back( LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19)); metadata.files.push_back( LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 1, 10, 19)); metadata.db_comparator_name = options.comparator->Name(); ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", ImportColumnFamilyOptions(), metadata, &import_cfh_), Status::InvalidArgument("Files have overlapping ranges")); ASSERT_EQ(import_cfh_, nullptr); } { // Import with a mismatching comparator, should fail with appropriate error. ExportImportFilesMetaData metadata; Options mismatch_options = CurrentOptions(); mismatch_options.comparator = ReverseBytewiseComparator(); SstFileWriter sfw_cf1(EnvOptions(), mismatch_options, handles_[1]); const std::string file1_sst_name = "file1.sst"; const std::string file1_sst = sst_files_dir_ + file1_sst_name; ASSERT_OK(sfw_cf1.Open(file1_sst)); ASSERT_OK(sfw_cf1.Put("K2", "V2")); ASSERT_OK(sfw_cf1.Put("K1", "V1")); ASSERT_OK(sfw_cf1.Finish()); metadata.files.push_back( LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19)); metadata.db_comparator_name = mismatch_options.comparator->Name(); ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "coco", ImportColumnFamilyOptions(), metadata, &import_cfh_), Status::InvalidArgument("Comparator name mismatch")); ASSERT_EQ(import_cfh_, nullptr); } { // Import with non existent sst file should fail with appropriate error ExportImportFilesMetaData metadata; SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); const std::string file1_sst_name = "file1.sst"; const std::string file1_sst = sst_files_dir_ + file1_sst_name; ASSERT_OK(sfw_cf1.Open(file1_sst)); ASSERT_OK(sfw_cf1.Put("K1", "V1")); ASSERT_OK(sfw_cf1.Put("K2", "V2")); ASSERT_OK(sfw_cf1.Finish()); const std::string file3_sst_name = "file3.sst"; metadata.files.push_back( LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 10, 19)); metadata.files.push_back( LiveFileMetaDataInit(file3_sst_name, sst_files_dir_, 1, 10, 19)); metadata.db_comparator_name = options.comparator->Name(); ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", ImportColumnFamilyOptions(), metadata, &import_cfh_), Status::IOError("No such file or directory")); ASSERT_EQ(import_cfh_, nullptr); // Test successful import after a failure with the same CF name. Ensures // there is no side effect with CF when there is a failed import metadata.files.pop_back(); metadata.db_comparator_name = options.comparator->Name(); ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", ImportColumnFamilyOptions(), metadata, &import_cfh_)); ASSERT_NE(import_cfh_, nullptr); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as External SST File Writer and Import are not supported " "in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/internal_stats.cc000066400000000000000000001664611370372246700172110ustar00rootroot00000000000000// This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/internal_stats.h" #include #include #include #include #include #include #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "table/block_based/block_based_table_factory.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE const std::map InternalStats::compaction_level_stats = { {LevelStatType::NUM_FILES, LevelStat{"NumFiles", "Files"}}, {LevelStatType::COMPACTED_FILES, LevelStat{"CompactedFiles", "CompactedFiles"}}, {LevelStatType::SIZE_BYTES, LevelStat{"SizeBytes", "Size"}}, {LevelStatType::SCORE, LevelStat{"Score", "Score"}}, {LevelStatType::READ_GB, LevelStat{"ReadGB", "Read(GB)"}}, {LevelStatType::RN_GB, LevelStat{"RnGB", "Rn(GB)"}}, {LevelStatType::RNP1_GB, LevelStat{"Rnp1GB", "Rnp1(GB)"}}, {LevelStatType::WRITE_GB, LevelStat{"WriteGB", "Write(GB)"}}, {LevelStatType::W_NEW_GB, LevelStat{"WnewGB", "Wnew(GB)"}}, {LevelStatType::MOVED_GB, LevelStat{"MovedGB", "Moved(GB)"}}, {LevelStatType::WRITE_AMP, LevelStat{"WriteAmp", "W-Amp"}}, {LevelStatType::READ_MBPS, LevelStat{"ReadMBps", "Rd(MB/s)"}}, {LevelStatType::WRITE_MBPS, LevelStat{"WriteMBps", "Wr(MB/s)"}}, {LevelStatType::COMP_SEC, LevelStat{"CompSec", "Comp(sec)"}}, {LevelStatType::COMP_CPU_SEC, LevelStat{"CompMergeCPU", "CompMergeCPU(sec)"}}, {LevelStatType::COMP_COUNT, LevelStat{"CompCount", "Comp(cnt)"}}, {LevelStatType::AVG_SEC, LevelStat{"AvgSec", "Avg(sec)"}}, {LevelStatType::KEY_IN, LevelStat{"KeyIn", "KeyIn"}}, {LevelStatType::KEY_DROP, LevelStat{"KeyDrop", "KeyDrop"}}, }; namespace { const double kMB = 1048576.0; const double kGB = kMB * 1024; const double kMicrosInSec = 1000000.0; void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name, const std::string& group_by) { int written_size = snprintf(buf, len, "\n** Compaction Stats [%s] **\n", cf_name.c_str()); auto hdr = [](LevelStatType t) { return InternalStats::compaction_level_stats.at(t).header_name.c_str(); }; int line_size = snprintf( buf + written_size, len - written_size, "%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", // Note that we skip COMPACTED_FILES and merge it with Files column group_by.c_str(), hdr(LevelStatType::NUM_FILES), hdr(LevelStatType::SIZE_BYTES), hdr(LevelStatType::SCORE), hdr(LevelStatType::READ_GB), hdr(LevelStatType::RN_GB), hdr(LevelStatType::RNP1_GB), hdr(LevelStatType::WRITE_GB), hdr(LevelStatType::W_NEW_GB), hdr(LevelStatType::MOVED_GB), hdr(LevelStatType::WRITE_AMP), hdr(LevelStatType::READ_MBPS), hdr(LevelStatType::WRITE_MBPS), hdr(LevelStatType::COMP_SEC), hdr(LevelStatType::COMP_CPU_SEC), hdr(LevelStatType::COMP_COUNT), hdr(LevelStatType::AVG_SEC), hdr(LevelStatType::KEY_IN), hdr(LevelStatType::KEY_DROP)); written_size += line_size; snprintf(buf + written_size, len - written_size, "%s\n", std::string(line_size, '-').c_str()); } void PrepareLevelStats(std::map* level_stats, int num_files, int being_compacted, double total_file_size, double score, double w_amp, const InternalStats::CompactionStats& stats) { uint64_t bytes_read = stats.bytes_read_non_output_levels + stats.bytes_read_output_level; int64_t bytes_new = stats.bytes_written - stats.bytes_read_output_level; double elapsed = (stats.micros + 1) / kMicrosInSec; (*level_stats)[LevelStatType::NUM_FILES] = num_files; (*level_stats)[LevelStatType::COMPACTED_FILES] = being_compacted; (*level_stats)[LevelStatType::SIZE_BYTES] = total_file_size; (*level_stats)[LevelStatType::SCORE] = score; (*level_stats)[LevelStatType::READ_GB] = bytes_read / kGB; (*level_stats)[LevelStatType::RN_GB] = stats.bytes_read_non_output_levels / kGB; (*level_stats)[LevelStatType::RNP1_GB] = stats.bytes_read_output_level / kGB; (*level_stats)[LevelStatType::WRITE_GB] = stats.bytes_written / kGB; (*level_stats)[LevelStatType::W_NEW_GB] = bytes_new / kGB; (*level_stats)[LevelStatType::MOVED_GB] = stats.bytes_moved / kGB; (*level_stats)[LevelStatType::WRITE_AMP] = w_amp; (*level_stats)[LevelStatType::READ_MBPS] = bytes_read / kMB / elapsed; (*level_stats)[LevelStatType::WRITE_MBPS] = stats.bytes_written / kMB / elapsed; (*level_stats)[LevelStatType::COMP_SEC] = stats.micros / kMicrosInSec; (*level_stats)[LevelStatType::COMP_CPU_SEC] = stats.cpu_micros / kMicrosInSec; (*level_stats)[LevelStatType::COMP_COUNT] = stats.count; (*level_stats)[LevelStatType::AVG_SEC] = stats.count == 0 ? 0 : stats.micros / kMicrosInSec / stats.count; (*level_stats)[LevelStatType::KEY_IN] = static_cast(stats.num_input_records); (*level_stats)[LevelStatType::KEY_DROP] = static_cast(stats.num_dropped_records); } void PrintLevelStats(char* buf, size_t len, const std::string& name, const std::map& stat_value) { snprintf( buf, len, "%4s " /* Level */ "%6d/%-3d " /* Files */ "%8s " /* Size */ "%5.1f " /* Score */ "%8.1f " /* Read(GB) */ "%7.1f " /* Rn(GB) */ "%8.1f " /* Rnp1(GB) */ "%9.1f " /* Write(GB) */ "%8.1f " /* Wnew(GB) */ "%9.1f " /* Moved(GB) */ "%5.1f " /* W-Amp */ "%8.1f " /* Rd(MB/s) */ "%8.1f " /* Wr(MB/s) */ "%9.2f " /* Comp(sec) */ "%17.2f " /* CompMergeCPU(sec) */ "%9d " /* Comp(cnt) */ "%8.3f " /* Avg(sec) */ "%7s " /* KeyIn */ "%6s\n", /* KeyDrop */ name.c_str(), static_cast(stat_value.at(LevelStatType::NUM_FILES)), static_cast(stat_value.at(LevelStatType::COMPACTED_FILES)), BytesToHumanString( static_cast(stat_value.at(LevelStatType::SIZE_BYTES))) .c_str(), stat_value.at(LevelStatType::SCORE), stat_value.at(LevelStatType::READ_GB), stat_value.at(LevelStatType::RN_GB), stat_value.at(LevelStatType::RNP1_GB), stat_value.at(LevelStatType::WRITE_GB), stat_value.at(LevelStatType::W_NEW_GB), stat_value.at(LevelStatType::MOVED_GB), stat_value.at(LevelStatType::WRITE_AMP), stat_value.at(LevelStatType::READ_MBPS), stat_value.at(LevelStatType::WRITE_MBPS), stat_value.at(LevelStatType::COMP_SEC), stat_value.at(LevelStatType::COMP_CPU_SEC), static_cast(stat_value.at(LevelStatType::COMP_COUNT)), stat_value.at(LevelStatType::AVG_SEC), NumberToHumanString( static_cast(stat_value.at(LevelStatType::KEY_IN))) .c_str(), NumberToHumanString( static_cast(stat_value.at(LevelStatType::KEY_DROP))) .c_str()); } void PrintLevelStats(char* buf, size_t len, const std::string& name, int num_files, int being_compacted, double total_file_size, double score, double w_amp, const InternalStats::CompactionStats& stats) { std::map level_stats; PrepareLevelStats(&level_stats, num_files, being_compacted, total_file_size, score, w_amp, stats); PrintLevelStats(buf, len, name, level_stats); } // Assumes that trailing numbers represent an optional argument. This requires // property names to not end with numbers. std::pair GetPropertyNameAndArg(const Slice& property) { Slice name = property, arg = property; size_t sfx_len = 0; while (sfx_len < property.size() && isdigit(property[property.size() - sfx_len - 1])) { ++sfx_len; } name.remove_suffix(sfx_len); arg.remove_prefix(property.size() - sfx_len); return {name, arg}; } } // anonymous namespace static const std::string rocksdb_prefix = "rocksdb."; static const std::string num_files_at_level_prefix = "num-files-at-level"; static const std::string compression_ratio_at_level_prefix = "compression-ratio-at-level"; static const std::string allstats = "stats"; static const std::string sstables = "sstables"; static const std::string cfstats = "cfstats"; static const std::string cfstats_no_file_histogram = "cfstats-no-file-histogram"; static const std::string cf_file_histogram = "cf-file-histogram"; static const std::string dbstats = "dbstats"; static const std::string levelstats = "levelstats"; static const std::string num_immutable_mem_table = "num-immutable-mem-table"; static const std::string num_immutable_mem_table_flushed = "num-immutable-mem-table-flushed"; static const std::string mem_table_flush_pending = "mem-table-flush-pending"; static const std::string compaction_pending = "compaction-pending"; static const std::string background_errors = "background-errors"; static const std::string cur_size_active_mem_table = "cur-size-active-mem-table"; static const std::string cur_size_all_mem_tables = "cur-size-all-mem-tables"; static const std::string size_all_mem_tables = "size-all-mem-tables"; static const std::string num_entries_active_mem_table = "num-entries-active-mem-table"; static const std::string num_entries_imm_mem_tables = "num-entries-imm-mem-tables"; static const std::string num_deletes_active_mem_table = "num-deletes-active-mem-table"; static const std::string num_deletes_imm_mem_tables = "num-deletes-imm-mem-tables"; static const std::string estimate_num_keys = "estimate-num-keys"; static const std::string estimate_table_readers_mem = "estimate-table-readers-mem"; static const std::string is_file_deletions_enabled = "is-file-deletions-enabled"; static const std::string num_snapshots = "num-snapshots"; static const std::string oldest_snapshot_time = "oldest-snapshot-time"; static const std::string oldest_snapshot_sequence = "oldest-snapshot-sequence"; static const std::string num_live_versions = "num-live-versions"; static const std::string current_version_number = "current-super-version-number"; static const std::string estimate_live_data_size = "estimate-live-data-size"; static const std::string min_log_number_to_keep_str = "min-log-number-to-keep"; static const std::string min_obsolete_sst_number_to_keep_str = "min-obsolete-sst-number-to-keep"; static const std::string base_level_str = "base-level"; static const std::string total_sst_files_size = "total-sst-files-size"; static const std::string live_sst_files_size = "live-sst-files-size"; static const std::string estimate_pending_comp_bytes = "estimate-pending-compaction-bytes"; static const std::string aggregated_table_properties = "aggregated-table-properties"; static const std::string aggregated_table_properties_at_level = aggregated_table_properties + "-at-level"; static const std::string num_running_compactions = "num-running-compactions"; static const std::string num_running_flushes = "num-running-flushes"; static const std::string actual_delayed_write_rate = "actual-delayed-write-rate"; static const std::string is_write_stopped = "is-write-stopped"; static const std::string estimate_oldest_key_time = "estimate-oldest-key-time"; static const std::string block_cache_capacity = "block-cache-capacity"; static const std::string block_cache_usage = "block-cache-usage"; static const std::string block_cache_pinned_usage = "block-cache-pinned-usage"; static const std::string options_statistics = "options-statistics"; const std::string DB::Properties::kNumFilesAtLevelPrefix = rocksdb_prefix + num_files_at_level_prefix; const std::string DB::Properties::kCompressionRatioAtLevelPrefix = rocksdb_prefix + compression_ratio_at_level_prefix; const std::string DB::Properties::kStats = rocksdb_prefix + allstats; const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables; const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats; const std::string DB::Properties::kCFStatsNoFileHistogram = rocksdb_prefix + cfstats_no_file_histogram; const std::string DB::Properties::kCFFileHistogram = rocksdb_prefix + cf_file_histogram; const std::string DB::Properties::kDBStats = rocksdb_prefix + dbstats; const std::string DB::Properties::kLevelStats = rocksdb_prefix + levelstats; const std::string DB::Properties::kNumImmutableMemTable = rocksdb_prefix + num_immutable_mem_table; const std::string DB::Properties::kNumImmutableMemTableFlushed = rocksdb_prefix + num_immutable_mem_table_flushed; const std::string DB::Properties::kMemTableFlushPending = rocksdb_prefix + mem_table_flush_pending; const std::string DB::Properties::kCompactionPending = rocksdb_prefix + compaction_pending; const std::string DB::Properties::kNumRunningCompactions = rocksdb_prefix + num_running_compactions; const std::string DB::Properties::kNumRunningFlushes = rocksdb_prefix + num_running_flushes; const std::string DB::Properties::kBackgroundErrors = rocksdb_prefix + background_errors; const std::string DB::Properties::kCurSizeActiveMemTable = rocksdb_prefix + cur_size_active_mem_table; const std::string DB::Properties::kCurSizeAllMemTables = rocksdb_prefix + cur_size_all_mem_tables; const std::string DB::Properties::kSizeAllMemTables = rocksdb_prefix + size_all_mem_tables; const std::string DB::Properties::kNumEntriesActiveMemTable = rocksdb_prefix + num_entries_active_mem_table; const std::string DB::Properties::kNumEntriesImmMemTables = rocksdb_prefix + num_entries_imm_mem_tables; const std::string DB::Properties::kNumDeletesActiveMemTable = rocksdb_prefix + num_deletes_active_mem_table; const std::string DB::Properties::kNumDeletesImmMemTables = rocksdb_prefix + num_deletes_imm_mem_tables; const std::string DB::Properties::kEstimateNumKeys = rocksdb_prefix + estimate_num_keys; const std::string DB::Properties::kEstimateTableReadersMem = rocksdb_prefix + estimate_table_readers_mem; const std::string DB::Properties::kIsFileDeletionsEnabled = rocksdb_prefix + is_file_deletions_enabled; const std::string DB::Properties::kNumSnapshots = rocksdb_prefix + num_snapshots; const std::string DB::Properties::kOldestSnapshotTime = rocksdb_prefix + oldest_snapshot_time; const std::string DB::Properties::kOldestSnapshotSequence = rocksdb_prefix + oldest_snapshot_sequence; const std::string DB::Properties::kNumLiveVersions = rocksdb_prefix + num_live_versions; const std::string DB::Properties::kCurrentSuperVersionNumber = rocksdb_prefix + current_version_number; const std::string DB::Properties::kEstimateLiveDataSize = rocksdb_prefix + estimate_live_data_size; const std::string DB::Properties::kMinLogNumberToKeep = rocksdb_prefix + min_log_number_to_keep_str; const std::string DB::Properties::kMinObsoleteSstNumberToKeep = rocksdb_prefix + min_obsolete_sst_number_to_keep_str; const std::string DB::Properties::kTotalSstFilesSize = rocksdb_prefix + total_sst_files_size; const std::string DB::Properties::kLiveSstFilesSize = rocksdb_prefix + live_sst_files_size; const std::string DB::Properties::kBaseLevel = rocksdb_prefix + base_level_str; const std::string DB::Properties::kEstimatePendingCompactionBytes = rocksdb_prefix + estimate_pending_comp_bytes; const std::string DB::Properties::kAggregatedTableProperties = rocksdb_prefix + aggregated_table_properties; const std::string DB::Properties::kAggregatedTablePropertiesAtLevel = rocksdb_prefix + aggregated_table_properties_at_level; const std::string DB::Properties::kActualDelayedWriteRate = rocksdb_prefix + actual_delayed_write_rate; const std::string DB::Properties::kIsWriteStopped = rocksdb_prefix + is_write_stopped; const std::string DB::Properties::kEstimateOldestKeyTime = rocksdb_prefix + estimate_oldest_key_time; const std::string DB::Properties::kBlockCacheCapacity = rocksdb_prefix + block_cache_capacity; const std::string DB::Properties::kBlockCacheUsage = rocksdb_prefix + block_cache_usage; const std::string DB::Properties::kBlockCachePinnedUsage = rocksdb_prefix + block_cache_pinned_usage; const std::string DB::Properties::kOptionsStatistics = rocksdb_prefix + options_statistics; const std::unordered_map InternalStats::ppt_name_to_info = { {DB::Properties::kNumFilesAtLevelPrefix, {false, &InternalStats::HandleNumFilesAtLevel, nullptr, nullptr, nullptr}}, {DB::Properties::kCompressionRatioAtLevelPrefix, {false, &InternalStats::HandleCompressionRatioAtLevelPrefix, nullptr, nullptr, nullptr}}, {DB::Properties::kLevelStats, {false, &InternalStats::HandleLevelStats, nullptr, nullptr, nullptr}}, {DB::Properties::kStats, {false, &InternalStats::HandleStats, nullptr, nullptr, nullptr}}, {DB::Properties::kCFStats, {false, &InternalStats::HandleCFStats, nullptr, &InternalStats::HandleCFMapStats, nullptr}}, {DB::Properties::kCFStatsNoFileHistogram, {false, &InternalStats::HandleCFStatsNoFileHistogram, nullptr, nullptr, nullptr}}, {DB::Properties::kCFFileHistogram, {false, &InternalStats::HandleCFFileHistogram, nullptr, nullptr, nullptr}}, {DB::Properties::kDBStats, {false, &InternalStats::HandleDBStats, nullptr, nullptr, nullptr}}, {DB::Properties::kSSTables, {false, &InternalStats::HandleSsTables, nullptr, nullptr, nullptr}}, {DB::Properties::kAggregatedTableProperties, {false, &InternalStats::HandleAggregatedTableProperties, nullptr, nullptr, nullptr}}, {DB::Properties::kAggregatedTablePropertiesAtLevel, {false, &InternalStats::HandleAggregatedTablePropertiesAtLevel, nullptr, nullptr, nullptr}}, {DB::Properties::kNumImmutableMemTable, {false, nullptr, &InternalStats::HandleNumImmutableMemTable, nullptr, nullptr}}, {DB::Properties::kNumImmutableMemTableFlushed, {false, nullptr, &InternalStats::HandleNumImmutableMemTableFlushed, nullptr, nullptr}}, {DB::Properties::kMemTableFlushPending, {false, nullptr, &InternalStats::HandleMemTableFlushPending, nullptr, nullptr}}, {DB::Properties::kCompactionPending, {false, nullptr, &InternalStats::HandleCompactionPending, nullptr, nullptr}}, {DB::Properties::kBackgroundErrors, {false, nullptr, &InternalStats::HandleBackgroundErrors, nullptr, nullptr}}, {DB::Properties::kCurSizeActiveMemTable, {false, nullptr, &InternalStats::HandleCurSizeActiveMemTable, nullptr, nullptr}}, {DB::Properties::kCurSizeAllMemTables, {false, nullptr, &InternalStats::HandleCurSizeAllMemTables, nullptr, nullptr}}, {DB::Properties::kSizeAllMemTables, {false, nullptr, &InternalStats::HandleSizeAllMemTables, nullptr, nullptr}}, {DB::Properties::kNumEntriesActiveMemTable, {false, nullptr, &InternalStats::HandleNumEntriesActiveMemTable, nullptr, nullptr}}, {DB::Properties::kNumEntriesImmMemTables, {false, nullptr, &InternalStats::HandleNumEntriesImmMemTables, nullptr, nullptr}}, {DB::Properties::kNumDeletesActiveMemTable, {false, nullptr, &InternalStats::HandleNumDeletesActiveMemTable, nullptr, nullptr}}, {DB::Properties::kNumDeletesImmMemTables, {false, nullptr, &InternalStats::HandleNumDeletesImmMemTables, nullptr, nullptr}}, {DB::Properties::kEstimateNumKeys, {false, nullptr, &InternalStats::HandleEstimateNumKeys, nullptr, nullptr}}, {DB::Properties::kEstimateTableReadersMem, {true, nullptr, &InternalStats::HandleEstimateTableReadersMem, nullptr, nullptr}}, {DB::Properties::kIsFileDeletionsEnabled, {false, nullptr, &InternalStats::HandleIsFileDeletionsEnabled, nullptr, nullptr}}, {DB::Properties::kNumSnapshots, {false, nullptr, &InternalStats::HandleNumSnapshots, nullptr, nullptr}}, {DB::Properties::kOldestSnapshotTime, {false, nullptr, &InternalStats::HandleOldestSnapshotTime, nullptr, nullptr}}, {DB::Properties::kOldestSnapshotSequence, {false, nullptr, &InternalStats::HandleOldestSnapshotSequence, nullptr, nullptr}}, {DB::Properties::kNumLiveVersions, {false, nullptr, &InternalStats::HandleNumLiveVersions, nullptr, nullptr}}, {DB::Properties::kCurrentSuperVersionNumber, {false, nullptr, &InternalStats::HandleCurrentSuperVersionNumber, nullptr, nullptr}}, {DB::Properties::kEstimateLiveDataSize, {true, nullptr, &InternalStats::HandleEstimateLiveDataSize, nullptr, nullptr}}, {DB::Properties::kMinLogNumberToKeep, {false, nullptr, &InternalStats::HandleMinLogNumberToKeep, nullptr, nullptr}}, {DB::Properties::kMinObsoleteSstNumberToKeep, {false, nullptr, &InternalStats::HandleMinObsoleteSstNumberToKeep, nullptr, nullptr}}, {DB::Properties::kBaseLevel, {false, nullptr, &InternalStats::HandleBaseLevel, nullptr, nullptr}}, {DB::Properties::kTotalSstFilesSize, {false, nullptr, &InternalStats::HandleTotalSstFilesSize, nullptr, nullptr}}, {DB::Properties::kLiveSstFilesSize, {false, nullptr, &InternalStats::HandleLiveSstFilesSize, nullptr, nullptr}}, {DB::Properties::kEstimatePendingCompactionBytes, {false, nullptr, &InternalStats::HandleEstimatePendingCompactionBytes, nullptr, nullptr}}, {DB::Properties::kNumRunningFlushes, {false, nullptr, &InternalStats::HandleNumRunningFlushes, nullptr, nullptr}}, {DB::Properties::kNumRunningCompactions, {false, nullptr, &InternalStats::HandleNumRunningCompactions, nullptr, nullptr}}, {DB::Properties::kActualDelayedWriteRate, {false, nullptr, &InternalStats::HandleActualDelayedWriteRate, nullptr, nullptr}}, {DB::Properties::kIsWriteStopped, {false, nullptr, &InternalStats::HandleIsWriteStopped, nullptr, nullptr}}, {DB::Properties::kEstimateOldestKeyTime, {false, nullptr, &InternalStats::HandleEstimateOldestKeyTime, nullptr, nullptr}}, {DB::Properties::kBlockCacheCapacity, {false, nullptr, &InternalStats::HandleBlockCacheCapacity, nullptr, nullptr}}, {DB::Properties::kBlockCacheUsage, {false, nullptr, &InternalStats::HandleBlockCacheUsage, nullptr, nullptr}}, {DB::Properties::kBlockCachePinnedUsage, {false, nullptr, &InternalStats::HandleBlockCachePinnedUsage, nullptr, nullptr}}, {DB::Properties::kOptionsStatistics, {false, nullptr, nullptr, nullptr, &DBImpl::GetPropertyHandleOptionsStatistics}}, }; const DBPropertyInfo* GetPropertyInfo(const Slice& property) { std::string ppt_name = GetPropertyNameAndArg(property).first.ToString(); auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name); if (ppt_info_iter == InternalStats::ppt_name_to_info.end()) { return nullptr; } return &ppt_info_iter->second; } bool InternalStats::GetStringProperty(const DBPropertyInfo& property_info, const Slice& property, std::string* value) { assert(value != nullptr); assert(property_info.handle_string != nullptr); Slice arg = GetPropertyNameAndArg(property).second; return (this->*(property_info.handle_string))(value, arg); } bool InternalStats::GetMapProperty(const DBPropertyInfo& property_info, const Slice& /*property*/, std::map* value) { assert(value != nullptr); assert(property_info.handle_map != nullptr); return (this->*(property_info.handle_map))(value); } bool InternalStats::GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value, DBImpl* db) { assert(value != nullptr); assert(property_info.handle_int != nullptr && !property_info.need_out_of_mutex); db->mutex_.AssertHeld(); return (this->*(property_info.handle_int))(value, db, nullptr /* version */); } bool InternalStats::GetIntPropertyOutOfMutex( const DBPropertyInfo& property_info, Version* version, uint64_t* value) { assert(value != nullptr); assert(property_info.handle_int != nullptr && property_info.need_out_of_mutex); return (this->*(property_info.handle_int))(value, nullptr /* db */, version); } bool InternalStats::HandleNumFilesAtLevel(std::string* value, Slice suffix) { uint64_t level; const auto* vstorage = cfd_->current()->storage_info(); bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty(); if (!ok || static_cast(level) >= number_levels_) { return false; } else { char buf[100]; snprintf(buf, sizeof(buf), "%d", vstorage->NumLevelFiles(static_cast(level))); *value = buf; return true; } } bool InternalStats::HandleCompressionRatioAtLevelPrefix(std::string* value, Slice suffix) { uint64_t level; const auto* vstorage = cfd_->current()->storage_info(); bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty(); if (!ok || level >= static_cast(number_levels_)) { return false; } *value = ToString( vstorage->GetEstimatedCompressionRatioAtLevel(static_cast(level))); return true; } bool InternalStats::HandleLevelStats(std::string* value, Slice /*suffix*/) { char buf[1000]; const auto* vstorage = cfd_->current()->storage_info(); snprintf(buf, sizeof(buf), "Level Files Size(MB)\n" "--------------------\n"); value->append(buf); for (int level = 0; level < number_levels_; level++) { snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level, vstorage->NumLevelFiles(level), vstorage->NumLevelBytes(level) / kMB); value->append(buf); } return true; } bool InternalStats::HandleStats(std::string* value, Slice suffix) { if (!HandleCFStats(value, suffix)) { return false; } if (!HandleDBStats(value, suffix)) { return false; } return true; } bool InternalStats::HandleCFMapStats( std::map* cf_stats) { DumpCFMapStats(cf_stats); return true; } bool InternalStats::HandleCFStats(std::string* value, Slice /*suffix*/) { DumpCFStats(value); return true; } bool InternalStats::HandleCFStatsNoFileHistogram(std::string* value, Slice /*suffix*/) { DumpCFStatsNoFileHistogram(value); return true; } bool InternalStats::HandleCFFileHistogram(std::string* value, Slice /*suffix*/) { DumpCFFileHistogram(value); return true; } bool InternalStats::HandleDBStats(std::string* value, Slice /*suffix*/) { DumpDBStats(value); return true; } bool InternalStats::HandleSsTables(std::string* value, Slice /*suffix*/) { auto* current = cfd_->current(); *value = current->DebugString(true, true); return true; } bool InternalStats::HandleAggregatedTableProperties(std::string* value, Slice /*suffix*/) { std::shared_ptr tp; auto s = cfd_->current()->GetAggregatedTableProperties(&tp); if (!s.ok()) { return false; } *value = tp->ToString(); return true; } bool InternalStats::HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix) { uint64_t level; bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty(); if (!ok || static_cast(level) >= number_levels_) { return false; } std::shared_ptr tp; auto s = cfd_->current()->GetAggregatedTableProperties( &tp, static_cast(level)); if (!s.ok()) { return false; } *value = tp->ToString(); return true; } bool InternalStats::HandleNumImmutableMemTable(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = cfd_->imm()->NumNotFlushed(); return true; } bool InternalStats::HandleNumImmutableMemTableFlushed(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = cfd_->imm()->NumFlushed(); return true; } bool InternalStats::HandleMemTableFlushPending(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = (cfd_->imm()->IsFlushPending() ? 1 : 0); return true; } bool InternalStats::HandleNumRunningFlushes(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = db->num_running_flushes(); return true; } bool InternalStats::HandleCompactionPending(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // 1 if the system already determines at least one compaction is needed. // 0 otherwise, const auto* vstorage = cfd_->current()->storage_info(); *value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0); return true; } bool InternalStats::HandleNumRunningCompactions(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = db->num_running_compactions_; return true; } bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Accumulated number of errors in background flushes or compactions. *value = GetBackgroundErrorCount(); return true; } bool InternalStats::HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Current size of the active memtable *value = cfd_->mem()->ApproximateMemoryUsage(); return true; } bool InternalStats::HandleCurSizeAllMemTables(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Current size of the active memtable + immutable memtables *value = cfd_->mem()->ApproximateMemoryUsage() + cfd_->imm()->ApproximateUnflushedMemTablesMemoryUsage(); return true; } bool InternalStats::HandleSizeAllMemTables(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = cfd_->mem()->ApproximateMemoryUsage() + cfd_->imm()->ApproximateMemoryUsage(); return true; } bool InternalStats::HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Current number of entires in the active memtable *value = cfd_->mem()->num_entries(); return true; } bool InternalStats::HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Current number of entries in the immutable memtables *value = cfd_->imm()->current()->GetTotalNumEntries(); return true; } bool InternalStats::HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Current number of entires in the active memtable *value = cfd_->mem()->num_deletes(); return true; } bool InternalStats::HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Current number of entries in the immutable memtables *value = cfd_->imm()->current()->GetTotalNumDeletes(); return true; } bool InternalStats::HandleEstimateNumKeys(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Estimate number of entries in the column family: // Use estimated entries in tables + total entries in memtables. const auto* vstorage = cfd_->current()->storage_info(); uint64_t estimate_keys = cfd_->mem()->num_entries() + cfd_->imm()->current()->GetTotalNumEntries() + vstorage->GetEstimatedActiveKeys(); uint64_t estimate_deletes = cfd_->mem()->num_deletes() + cfd_->imm()->current()->GetTotalNumDeletes(); *value = estimate_keys > estimate_deletes * 2 ? estimate_keys - (estimate_deletes * 2) : 0; return true; } bool InternalStats::HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = db->snapshots().count(); return true; } bool InternalStats::HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = static_cast(db->snapshots().GetOldestSnapshotTime()); return true; } bool InternalStats::HandleOldestSnapshotSequence(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = static_cast(db->snapshots().GetOldestSnapshotSequence()); return true; } bool InternalStats::HandleNumLiveVersions(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = cfd_->GetNumLiveVersions(); return true; } bool InternalStats::HandleCurrentSuperVersionNumber(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = cfd_->GetSuperVersionNumber(); return true; } bool InternalStats::HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = db->IsFileDeletionsEnabled() ? 1 : 0; return true; } bool InternalStats::HandleBaseLevel(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { const auto* vstorage = cfd_->current()->storage_info(); *value = vstorage->base_level(); return true; } bool InternalStats::HandleTotalSstFilesSize(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = cfd_->GetTotalSstFilesSize(); return true; } bool InternalStats::HandleLiveSstFilesSize(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { *value = cfd_->GetLiveSstFilesSize(); return true; } bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { const auto* vstorage = cfd_->current()->storage_info(); *value = vstorage->estimated_compaction_needed_bytes(); return true; } bool InternalStats::HandleEstimateTableReadersMem(uint64_t* value, DBImpl* /*db*/, Version* version) { *value = (version == nullptr) ? 0 : version->GetMemoryUsageByTableReaders(); return true; } bool InternalStats::HandleEstimateLiveDataSize(uint64_t* value, DBImpl* /*db*/, Version* version) { const auto* vstorage = version->storage_info(); *value = vstorage->EstimateLiveDataSize(); return true; } bool InternalStats::HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = db->MinLogNumberToKeep(); return true; } bool InternalStats::HandleMinObsoleteSstNumberToKeep(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = db->MinObsoleteSstNumberToKeep(); return true; } bool InternalStats::HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db, Version* /*version*/) { const WriteController& wc = db->write_controller(); if (!wc.NeedsDelay()) { *value = 0; } else { *value = wc.delayed_write_rate(); } return true; } bool InternalStats::HandleIsWriteStopped(uint64_t* value, DBImpl* db, Version* /*version*/) { *value = db->write_controller().IsStopped() ? 1 : 0; return true; } bool InternalStats::HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // TODO(yiwu): The property is currently available for fifo compaction // with allow_compaction = false. This is because we don't propagate // oldest_key_time on compaction. if (cfd_->ioptions()->compaction_style != kCompactionStyleFIFO || cfd_->GetCurrentMutableCFOptions() ->compaction_options_fifo.allow_compaction) { return false; } TablePropertiesCollection collection; auto s = cfd_->current()->GetPropertiesOfAllTables(&collection); if (!s.ok()) { return false; } *value = std::numeric_limits::max(); for (auto& p : collection) { *value = std::min(*value, p.second->oldest_key_time); if (*value == 0) { break; } } if (*value > 0) { *value = std::min({cfd_->mem()->ApproximateOldestKeyTime(), cfd_->imm()->ApproximateOldestKeyTime(), *value}); } return *value > 0 && *value < std::numeric_limits::max(); } bool InternalStats::HandleBlockCacheStat(Cache** block_cache) { assert(block_cache != nullptr); auto* table_factory = cfd_->ioptions()->table_factory; assert(table_factory != nullptr); if (BlockBasedTableFactory::kName != table_factory->Name()) { return false; } auto* table_options = reinterpret_cast(table_factory->GetOptions()); if (table_options == nullptr) { return false; } *block_cache = table_options->block_cache.get(); if (table_options->no_block_cache || *block_cache == nullptr) { return false; } return true; } bool InternalStats::HandleBlockCacheCapacity(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { Cache* block_cache; bool ok = HandleBlockCacheStat(&block_cache); if (!ok) { return false; } *value = static_cast(block_cache->GetCapacity()); return true; } bool InternalStats::HandleBlockCacheUsage(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { Cache* block_cache; bool ok = HandleBlockCacheStat(&block_cache); if (!ok) { return false; } *value = static_cast(block_cache->GetUsage()); return true; } bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { Cache* block_cache; bool ok = HandleBlockCacheStat(&block_cache); if (!ok) { return false; } *value = static_cast(block_cache->GetPinnedUsage()); return true; } void InternalStats::DumpDBStats(std::string* value) { char buf[1000]; // DB-level stats, only available from default column family double seconds_up = (env_->NowMicros() - started_at_ + 1) / kMicrosInSec; double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up; snprintf(buf, sizeof(buf), "\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n", seconds_up, interval_seconds_up); value->append(buf); // Cumulative uint64_t user_bytes_written = GetDBStats(InternalStats::kIntStatsBytesWritten); uint64_t num_keys_written = GetDBStats(InternalStats::kIntStatsNumKeysWritten); uint64_t write_other = GetDBStats(InternalStats::kIntStatsWriteDoneByOther); uint64_t write_self = GetDBStats(InternalStats::kIntStatsWriteDoneBySelf); uint64_t wal_bytes = GetDBStats(InternalStats::kIntStatsWalFileBytes); uint64_t wal_synced = GetDBStats(InternalStats::kIntStatsWalFileSynced); uint64_t write_with_wal = GetDBStats(InternalStats::kIntStatsWriteWithWal); uint64_t write_stall_micros = GetDBStats(InternalStats::kIntStatsWriteStallMicros); const int kHumanMicrosLen = 32; char human_micros[kHumanMicrosLen]; // Data // writes: total number of write requests. // keys: total number of key updates issued by all the write requests // commit groups: number of group commits issued to the DB. Each group can // contain one or more writes. // so writes/keys is the average number of put in multi-put or put // writes/groups is the average group commit size. // // The format is the same for interval stats. snprintf(buf, sizeof(buf), "Cumulative writes: %s writes, %s keys, %s commit groups, " "%.1f writes per commit group, ingest: %.2f GB, %.2f MB/s\n", NumberToHumanString(write_other + write_self).c_str(), NumberToHumanString(num_keys_written).c_str(), NumberToHumanString(write_self).c_str(), (write_other + write_self) / static_cast(write_self + 1), user_bytes_written / kGB, user_bytes_written / kMB / seconds_up); value->append(buf); // WAL snprintf(buf, sizeof(buf), "Cumulative WAL: %s writes, %s syncs, " "%.2f writes per sync, written: %.2f GB, %.2f MB/s\n", NumberToHumanString(write_with_wal).c_str(), NumberToHumanString(wal_synced).c_str(), write_with_wal / static_cast(wal_synced + 1), wal_bytes / kGB, wal_bytes / kMB / seconds_up); value->append(buf); // Stall AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen, true); snprintf(buf, sizeof(buf), "Cumulative stall: %s, %.1f percent\n", human_micros, // 10000 = divide by 1M to get secs, then multiply by 100 for pct write_stall_micros / 10000.0 / std::max(seconds_up, 0.001)); value->append(buf); // Interval uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other; uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self; uint64_t interval_num_keys_written = num_keys_written - db_stats_snapshot_.num_keys_written; snprintf( buf, sizeof(buf), "Interval writes: %s writes, %s keys, %s commit groups, " "%.1f writes per commit group, ingest: %.2f MB, %.2f MB/s\n", NumberToHumanString(interval_write_other + interval_write_self).c_str(), NumberToHumanString(interval_num_keys_written).c_str(), NumberToHumanString(interval_write_self).c_str(), static_cast(interval_write_other + interval_write_self) / (interval_write_self + 1), (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB, (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB / std::max(interval_seconds_up, 0.001)), value->append(buf); uint64_t interval_write_with_wal = write_with_wal - db_stats_snapshot_.write_with_wal; uint64_t interval_wal_synced = wal_synced - db_stats_snapshot_.wal_synced; uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes; snprintf( buf, sizeof(buf), "Interval WAL: %s writes, %s syncs, " "%.2f writes per sync, written: %.2f MB, %.2f MB/s\n", NumberToHumanString(interval_write_with_wal).c_str(), NumberToHumanString(interval_wal_synced).c_str(), interval_write_with_wal / static_cast(interval_wal_synced + 1), interval_wal_bytes / kGB, interval_wal_bytes / kMB / std::max(interval_seconds_up, 0.001)); value->append(buf); // Stall AppendHumanMicros(write_stall_micros - db_stats_snapshot_.write_stall_micros, human_micros, kHumanMicrosLen, true); snprintf(buf, sizeof(buf), "Interval stall: %s, %.1f percent\n", human_micros, // 10000 = divide by 1M to get secs, then multiply by 100 for pct (write_stall_micros - db_stats_snapshot_.write_stall_micros) / 10000.0 / std::max(interval_seconds_up, 0.001)); value->append(buf); db_stats_snapshot_.seconds_up = seconds_up; db_stats_snapshot_.ingest_bytes = user_bytes_written; db_stats_snapshot_.write_other = write_other; db_stats_snapshot_.write_self = write_self; db_stats_snapshot_.num_keys_written = num_keys_written; db_stats_snapshot_.wal_bytes = wal_bytes; db_stats_snapshot_.wal_synced = wal_synced; db_stats_snapshot_.write_with_wal = write_with_wal; db_stats_snapshot_.write_stall_micros = write_stall_micros; } /** * Dump Compaction Level stats to a map of stat name with "compaction." prefix * to value in double as string. The level in stat name is represented with * a prefix "Lx" where "x" is the level number. A special level "Sum" * represents the sum of a stat for all levels. * The result also contains IO stall counters which keys start with "io_stalls." * and values represent uint64 encoded as strings. */ void InternalStats::DumpCFMapStats( std::map* cf_stats) { CompactionStats compaction_stats_sum; std::map> levels_stats; DumpCFMapStats(&levels_stats, &compaction_stats_sum); for (auto const& level_ent : levels_stats) { auto level_str = level_ent.first == -1 ? "Sum" : "L" + ToString(level_ent.first); for (auto const& stat_ent : level_ent.second) { auto stat_type = stat_ent.first; auto key_str = "compaction." + level_str + "." + InternalStats::compaction_level_stats.at(stat_type).property_name; (*cf_stats)[key_str] = std::to_string(stat_ent.second); } } DumpCFMapStatsIOStalls(cf_stats); } void InternalStats::DumpCFMapStats( std::map>* levels_stats, CompactionStats* compaction_stats_sum) { const VersionStorageInfo* vstorage = cfd_->current()->storage_info(); int num_levels_to_check = (cfd_->ioptions()->compaction_style != kCompactionStyleFIFO) ? vstorage->num_levels() - 1 : 1; // Compaction scores are sorted based on its value. Restore them to the // level order std::vector compaction_score(number_levels_, 0); for (int i = 0; i < num_levels_to_check; ++i) { compaction_score[vstorage->CompactionScoreLevel(i)] = vstorage->CompactionScore(i); } // Count # of files being compacted for each level std::vector files_being_compacted(number_levels_, 0); for (int level = 0; level < number_levels_; ++level) { for (auto* f : vstorage->LevelFiles(level)) { if (f->being_compacted) { ++files_being_compacted[level]; } } } int total_files = 0; int total_files_being_compacted = 0; double total_file_size = 0; uint64_t flush_ingest = cf_stats_value_[BYTES_FLUSHED]; uint64_t add_file_ingest = cf_stats_value_[BYTES_INGESTED_ADD_FILE]; uint64_t curr_ingest = flush_ingest + add_file_ingest; for (int level = 0; level < number_levels_; level++) { int files = vstorage->NumLevelFiles(level); total_files += files; total_files_being_compacted += files_being_compacted[level]; if (comp_stats_[level].micros > 0 || files > 0) { compaction_stats_sum->Add(comp_stats_[level]); total_file_size += vstorage->NumLevelBytes(level); uint64_t input_bytes; if (level == 0) { input_bytes = curr_ingest; } else { input_bytes = comp_stats_[level].bytes_read_non_output_levels; } double w_amp = (input_bytes == 0) ? 0.0 : static_cast(comp_stats_[level].bytes_written) / input_bytes; std::map level_stats; PrepareLevelStats(&level_stats, files, files_being_compacted[level], static_cast(vstorage->NumLevelBytes(level)), compaction_score[level], w_amp, comp_stats_[level]); (*levels_stats)[level] = level_stats; } } // Cumulative summary double w_amp = compaction_stats_sum->bytes_written / static_cast(curr_ingest + 1); // Stats summary across levels std::map sum_stats; PrepareLevelStats(&sum_stats, total_files, total_files_being_compacted, total_file_size, 0, w_amp, *compaction_stats_sum); (*levels_stats)[-1] = sum_stats; // -1 is for the Sum level } void InternalStats::DumpCFMapStatsByPriority( std::map>* priorities_stats) { for (size_t priority = 0; priority < comp_stats_by_pri_.size(); priority++) { if (comp_stats_by_pri_[priority].micros > 0) { std::map priority_stats; PrepareLevelStats(&priority_stats, 0 /* num_files */, 0 /* being_compacted */, 0 /* total_file_size */, 0 /* compaction_score */, 0 /* w_amp */, comp_stats_by_pri_[priority]); (*priorities_stats)[static_cast(priority)] = priority_stats; } } } void InternalStats::DumpCFMapStatsIOStalls( std::map* cf_stats) { (*cf_stats)["io_stalls.level0_slowdown"] = std::to_string(cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS]); (*cf_stats)["io_stalls.level0_slowdown_with_compaction"] = std::to_string(cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS]); (*cf_stats)["io_stalls.level0_numfiles"] = std::to_string(cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS]); (*cf_stats)["io_stalls.level0_numfiles_with_compaction"] = std::to_string(cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_STOPS]); (*cf_stats)["io_stalls.stop_for_pending_compaction_bytes"] = std::to_string(cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS]); (*cf_stats)["io_stalls.slowdown_for_pending_compaction_bytes"] = std::to_string(cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS]); (*cf_stats)["io_stalls.memtable_compaction"] = std::to_string(cf_stats_count_[MEMTABLE_LIMIT_STOPS]); (*cf_stats)["io_stalls.memtable_slowdown"] = std::to_string(cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS]); uint64_t total_stop = cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS] + cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS] + cf_stats_count_[MEMTABLE_LIMIT_STOPS]; uint64_t total_slowdown = cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS] + cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS] + cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS]; (*cf_stats)["io_stalls.total_stop"] = std::to_string(total_stop); (*cf_stats)["io_stalls.total_slowdown"] = std::to_string(total_slowdown); } void InternalStats::DumpCFStats(std::string* value) { DumpCFStatsNoFileHistogram(value); DumpCFFileHistogram(value); } void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) { char buf[2000]; // Per-ColumnFamily stats PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Level"); value->append(buf); // Print stats for each level std::map> levels_stats; CompactionStats compaction_stats_sum; DumpCFMapStats(&levels_stats, &compaction_stats_sum); for (int l = 0; l < number_levels_; ++l) { if (levels_stats.find(l) != levels_stats.end()) { PrintLevelStats(buf, sizeof(buf), "L" + ToString(l), levels_stats[l]); value->append(buf); } } // Print sum of level stats PrintLevelStats(buf, sizeof(buf), "Sum", levels_stats[-1]); value->append(buf); uint64_t flush_ingest = cf_stats_value_[BYTES_FLUSHED]; uint64_t add_file_ingest = cf_stats_value_[BYTES_INGESTED_ADD_FILE]; uint64_t ingest_files_addfile = cf_stats_value_[INGESTED_NUM_FILES_TOTAL]; uint64_t ingest_l0_files_addfile = cf_stats_value_[INGESTED_LEVEL0_NUM_FILES_TOTAL]; uint64_t ingest_keys_addfile = cf_stats_value_[INGESTED_NUM_KEYS_TOTAL]; // Cumulative summary uint64_t total_stall_count = cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS] + cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS] + cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS] + cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS] + cf_stats_count_[MEMTABLE_LIMIT_STOPS] + cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS]; // Interval summary uint64_t interval_flush_ingest = flush_ingest - cf_stats_snapshot_.ingest_bytes_flush; uint64_t interval_add_file_inget = add_file_ingest - cf_stats_snapshot_.ingest_bytes_addfile; uint64_t interval_ingest = interval_flush_ingest + interval_add_file_inget + 1; CompactionStats interval_stats(compaction_stats_sum); interval_stats.Subtract(cf_stats_snapshot_.comp_stats); double w_amp = interval_stats.bytes_written / static_cast(interval_ingest); PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0, w_amp, interval_stats); value->append(buf); PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName(), "Priority"); value->append(buf); std::map> priorities_stats; DumpCFMapStatsByPriority(&priorities_stats); for (size_t priority = 0; priority < comp_stats_by_pri_.size(); ++priority) { if (priorities_stats.find(static_cast(priority)) != priorities_stats.end()) { PrintLevelStats( buf, sizeof(buf), Env::PriorityToString(static_cast(priority)), priorities_stats[static_cast(priority)]); value->append(buf); } } double seconds_up = (env_->NowMicros() - started_at_ + 1) / kMicrosInSec; double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up; snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n", seconds_up, interval_seconds_up); value->append(buf); snprintf(buf, sizeof(buf), "Flush(GB): cumulative %.3f, interval %.3f\n", flush_ingest / kGB, interval_flush_ingest / kGB); value->append(buf); snprintf(buf, sizeof(buf), "AddFile(GB): cumulative %.3f, interval %.3f\n", add_file_ingest / kGB, interval_add_file_inget / kGB); value->append(buf); uint64_t interval_ingest_files_addfile = ingest_files_addfile - cf_stats_snapshot_.ingest_files_addfile; snprintf(buf, sizeof(buf), "AddFile(Total Files): cumulative %" PRIu64 ", interval %" PRIu64 "\n", ingest_files_addfile, interval_ingest_files_addfile); value->append(buf); uint64_t interval_ingest_l0_files_addfile = ingest_l0_files_addfile - cf_stats_snapshot_.ingest_l0_files_addfile; snprintf(buf, sizeof(buf), "AddFile(L0 Files): cumulative %" PRIu64 ", interval %" PRIu64 "\n", ingest_l0_files_addfile, interval_ingest_l0_files_addfile); value->append(buf); uint64_t interval_ingest_keys_addfile = ingest_keys_addfile - cf_stats_snapshot_.ingest_keys_addfile; snprintf(buf, sizeof(buf), "AddFile(Keys): cumulative %" PRIu64 ", interval %" PRIu64 "\n", ingest_keys_addfile, interval_ingest_keys_addfile); value->append(buf); // Compact uint64_t compact_bytes_read = 0; uint64_t compact_bytes_write = 0; uint64_t compact_micros = 0; for (int level = 0; level < number_levels_; level++) { compact_bytes_read += comp_stats_[level].bytes_read_output_level + comp_stats_[level].bytes_read_non_output_levels; compact_bytes_write += comp_stats_[level].bytes_written; compact_micros += comp_stats_[level].micros; } snprintf(buf, sizeof(buf), "Cumulative compaction: %.2f GB write, %.2f MB/s write, " "%.2f GB read, %.2f MB/s read, %.1f seconds\n", compact_bytes_write / kGB, compact_bytes_write / kMB / seconds_up, compact_bytes_read / kGB, compact_bytes_read / kMB / seconds_up, compact_micros / kMicrosInSec); value->append(buf); // Compaction interval uint64_t interval_compact_bytes_write = compact_bytes_write - cf_stats_snapshot_.compact_bytes_write; uint64_t interval_compact_bytes_read = compact_bytes_read - cf_stats_snapshot_.compact_bytes_read; uint64_t interval_compact_micros = compact_micros - cf_stats_snapshot_.compact_micros; snprintf( buf, sizeof(buf), "Interval compaction: %.2f GB write, %.2f MB/s write, " "%.2f GB read, %.2f MB/s read, %.1f seconds\n", interval_compact_bytes_write / kGB, interval_compact_bytes_write / kMB / std::max(interval_seconds_up, 0.001), interval_compact_bytes_read / kGB, interval_compact_bytes_read / kMB / std::max(interval_seconds_up, 0.001), interval_compact_micros / kMicrosInSec); value->append(buf); cf_stats_snapshot_.compact_bytes_write = compact_bytes_write; cf_stats_snapshot_.compact_bytes_read = compact_bytes_read; cf_stats_snapshot_.compact_micros = compact_micros; snprintf(buf, sizeof(buf), "Stalls(count): %" PRIu64 " level0_slowdown, " "%" PRIu64 " level0_slowdown_with_compaction, " "%" PRIu64 " level0_numfiles, " "%" PRIu64 " level0_numfiles_with_compaction, " "%" PRIu64 " stop for pending_compaction_bytes, " "%" PRIu64 " slowdown for pending_compaction_bytes, " "%" PRIu64 " memtable_compaction, " "%" PRIu64 " memtable_slowdown, " "interval %" PRIu64 " total count\n", cf_stats_count_[L0_FILE_COUNT_LIMIT_SLOWDOWNS], cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS], cf_stats_count_[L0_FILE_COUNT_LIMIT_STOPS], cf_stats_count_[LOCKED_L0_FILE_COUNT_LIMIT_STOPS], cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_STOPS], cf_stats_count_[PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS], cf_stats_count_[MEMTABLE_LIMIT_STOPS], cf_stats_count_[MEMTABLE_LIMIT_SLOWDOWNS], total_stall_count - cf_stats_snapshot_.stall_count); value->append(buf); cf_stats_snapshot_.seconds_up = seconds_up; cf_stats_snapshot_.ingest_bytes_flush = flush_ingest; cf_stats_snapshot_.ingest_bytes_addfile = add_file_ingest; cf_stats_snapshot_.ingest_files_addfile = ingest_files_addfile; cf_stats_snapshot_.ingest_l0_files_addfile = ingest_l0_files_addfile; cf_stats_snapshot_.ingest_keys_addfile = ingest_keys_addfile; cf_stats_snapshot_.comp_stats = compaction_stats_sum; cf_stats_snapshot_.stall_count = total_stall_count; } void InternalStats::DumpCFFileHistogram(std::string* value) { char buf[2000]; snprintf(buf, sizeof(buf), "\n** File Read Latency Histogram By Level [%s] **\n", cfd_->GetName().c_str()); value->append(buf); for (int level = 0; level < number_levels_; level++) { if (!file_read_latency_[level].Empty()) { char buf2[5000]; snprintf(buf2, sizeof(buf2), "** Level %d read latency histogram (micros):\n%s\n", level, file_read_latency_[level].ToString().c_str()); value->append(buf2); } } } #else const DBPropertyInfo* GetPropertyInfo(const Slice& /*property*/) { return nullptr; } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/internal_stats.h000066400000000000000000000612441370372246700170440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // #pragma once #include #include #include #include "db/version_set.h" class ColumnFamilyData; namespace ROCKSDB_NAMESPACE { class DBImpl; class MemTableList; // Config for retrieving a property's value. struct DBPropertyInfo { bool need_out_of_mutex; // gcc had an internal error for initializing union of pointer-to-member- // functions. Workaround is to populate exactly one of the following function // pointers with a non-nullptr value. // @param value Value-result argument for storing the property's string value // @param suffix Argument portion of the property. For example, suffix would // be "5" for the property "rocksdb.num-files-at-level5". So far, only // certain string properties take an argument. bool (InternalStats::*handle_string)(std::string* value, Slice suffix); // @param value Value-result argument for storing the property's uint64 value // @param db Many of the int properties rely on DBImpl methods. // @param version Version is needed in case the property is retrieved without // holding db mutex, which is only supported for int properties. bool (InternalStats::*handle_int)(uint64_t* value, DBImpl* db, Version* version); // @param props Map of general properties to populate bool (InternalStats::*handle_map)(std::map* props); // handle the string type properties rely on DBImpl methods // @param value Value-result argument for storing the property's string value bool (DBImpl::*handle_string_dbimpl)(std::string* value); }; extern const DBPropertyInfo* GetPropertyInfo(const Slice& property); #ifndef ROCKSDB_LITE #undef SCORE enum class LevelStatType { INVALID = 0, NUM_FILES, COMPACTED_FILES, SIZE_BYTES, SCORE, READ_GB, RN_GB, RNP1_GB, WRITE_GB, W_NEW_GB, MOVED_GB, WRITE_AMP, READ_MBPS, WRITE_MBPS, COMP_SEC, COMP_CPU_SEC, COMP_COUNT, AVG_SEC, KEY_IN, KEY_DROP, TOTAL // total number of types }; struct LevelStat { // This what will be L?.property_name in the flat map returned to the user std::string property_name; // This will be what we will print in the header in the cli std::string header_name; }; class InternalStats { public: static const std::map compaction_level_stats; enum InternalCFStatsType { L0_FILE_COUNT_LIMIT_SLOWDOWNS, LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS, MEMTABLE_LIMIT_STOPS, MEMTABLE_LIMIT_SLOWDOWNS, L0_FILE_COUNT_LIMIT_STOPS, LOCKED_L0_FILE_COUNT_LIMIT_STOPS, PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS, PENDING_COMPACTION_BYTES_LIMIT_STOPS, WRITE_STALLS_ENUM_MAX, BYTES_FLUSHED, BYTES_INGESTED_ADD_FILE, INGESTED_NUM_FILES_TOTAL, INGESTED_LEVEL0_NUM_FILES_TOTAL, INGESTED_NUM_KEYS_TOTAL, INTERNAL_CF_STATS_ENUM_MAX, }; enum InternalDBStatsType { kIntStatsWalFileBytes, kIntStatsWalFileSynced, kIntStatsBytesWritten, kIntStatsNumKeysWritten, kIntStatsWriteDoneByOther, kIntStatsWriteDoneBySelf, kIntStatsWriteWithWal, kIntStatsWriteStallMicros, kIntStatsNumMax, }; InternalStats(int num_levels, Env* env, ColumnFamilyData* cfd) : db_stats_{}, cf_stats_value_{}, cf_stats_count_{}, comp_stats_(num_levels), comp_stats_by_pri_(Env::Priority::TOTAL), file_read_latency_(num_levels), bg_error_count_(0), number_levels_(num_levels), env_(env), cfd_(cfd), started_at_(env->NowMicros()) {} // Per level compaction stats. comp_stats_[level] stores the stats for // compactions that produced data for the specified "level". struct CompactionStats { uint64_t micros; uint64_t cpu_micros; // The number of bytes read from all non-output levels uint64_t bytes_read_non_output_levels; // The number of bytes read from the compaction output level. uint64_t bytes_read_output_level; // Total number of bytes written during compaction uint64_t bytes_written; // Total number of bytes moved to the output level uint64_t bytes_moved; // The number of compaction input files in all non-output levels. int num_input_files_in_non_output_levels; // The number of compaction input files in the output level. int num_input_files_in_output_level; // The number of compaction output files. int num_output_files; // Total incoming entries during compaction between levels N and N+1 uint64_t num_input_records; // Accumulated diff number of entries // (num input entries - num output entires) for compaction levels N and N+1 uint64_t num_dropped_records; // Number of compactions done int count; // Number of compactions done per CompactionReason int counts[static_cast(CompactionReason::kNumOfReasons)]; explicit CompactionStats() : micros(0), cpu_micros(0), bytes_read_non_output_levels(0), bytes_read_output_level(0), bytes_written(0), bytes_moved(0), num_input_files_in_non_output_levels(0), num_input_files_in_output_level(0), num_output_files(0), num_input_records(0), num_dropped_records(0), count(0) { int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); for (int i = 0; i < num_of_reasons; i++) { counts[i] = 0; } } explicit CompactionStats(CompactionReason reason, int c) : micros(0), cpu_micros(0), bytes_read_non_output_levels(0), bytes_read_output_level(0), bytes_written(0), bytes_moved(0), num_input_files_in_non_output_levels(0), num_input_files_in_output_level(0), num_output_files(0), num_input_records(0), num_dropped_records(0), count(c) { int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); for (int i = 0; i < num_of_reasons; i++) { counts[i] = 0; } int r = static_cast(reason); if (r >= 0 && r < num_of_reasons) { counts[r] = c; } else { count = 0; } } explicit CompactionStats(const CompactionStats& c) : micros(c.micros), cpu_micros(c.cpu_micros), bytes_read_non_output_levels(c.bytes_read_non_output_levels), bytes_read_output_level(c.bytes_read_output_level), bytes_written(c.bytes_written), bytes_moved(c.bytes_moved), num_input_files_in_non_output_levels( c.num_input_files_in_non_output_levels), num_input_files_in_output_level(c.num_input_files_in_output_level), num_output_files(c.num_output_files), num_input_records(c.num_input_records), num_dropped_records(c.num_dropped_records), count(c.count) { int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); for (int i = 0; i < num_of_reasons; i++) { counts[i] = c.counts[i]; } } CompactionStats& operator=(const CompactionStats& c) { micros = c.micros; cpu_micros = c.cpu_micros; bytes_read_non_output_levels = c.bytes_read_non_output_levels; bytes_read_output_level = c.bytes_read_output_level; bytes_written = c.bytes_written; bytes_moved = c.bytes_moved; num_input_files_in_non_output_levels = c.num_input_files_in_non_output_levels; num_input_files_in_output_level = c.num_input_files_in_output_level; num_output_files = c.num_output_files; num_input_records = c.num_input_records; num_dropped_records = c.num_dropped_records; count = c.count; int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); for (int i = 0; i < num_of_reasons; i++) { counts[i] = c.counts[i]; } return *this; } void Clear() { this->micros = 0; this->cpu_micros = 0; this->bytes_read_non_output_levels = 0; this->bytes_read_output_level = 0; this->bytes_written = 0; this->bytes_moved = 0; this->num_input_files_in_non_output_levels = 0; this->num_input_files_in_output_level = 0; this->num_output_files = 0; this->num_input_records = 0; this->num_dropped_records = 0; this->count = 0; int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); for (int i = 0; i < num_of_reasons; i++) { counts[i] = 0; } } void Add(const CompactionStats& c) { this->micros += c.micros; this->cpu_micros += c.cpu_micros; this->bytes_read_non_output_levels += c.bytes_read_non_output_levels; this->bytes_read_output_level += c.bytes_read_output_level; this->bytes_written += c.bytes_written; this->bytes_moved += c.bytes_moved; this->num_input_files_in_non_output_levels += c.num_input_files_in_non_output_levels; this->num_input_files_in_output_level += c.num_input_files_in_output_level; this->num_output_files += c.num_output_files; this->num_input_records += c.num_input_records; this->num_dropped_records += c.num_dropped_records; this->count += c.count; int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); for (int i = 0; i< num_of_reasons; i++) { counts[i] += c.counts[i]; } } void Subtract(const CompactionStats& c) { this->micros -= c.micros; this->cpu_micros -= c.cpu_micros; this->bytes_read_non_output_levels -= c.bytes_read_non_output_levels; this->bytes_read_output_level -= c.bytes_read_output_level; this->bytes_written -= c.bytes_written; this->bytes_moved -= c.bytes_moved; this->num_input_files_in_non_output_levels -= c.num_input_files_in_non_output_levels; this->num_input_files_in_output_level -= c.num_input_files_in_output_level; this->num_output_files -= c.num_output_files; this->num_input_records -= c.num_input_records; this->num_dropped_records -= c.num_dropped_records; this->count -= c.count; int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); for (int i = 0; i < num_of_reasons; i++) { counts[i] -= c.counts[i]; } } }; void Clear() { for (int i = 0; i < kIntStatsNumMax; i++) { db_stats_[i].store(0); } for (int i = 0; i < INTERNAL_CF_STATS_ENUM_MAX; i++) { cf_stats_count_[i] = 0; cf_stats_value_[i] = 0; } for (auto& comp_stat : comp_stats_) { comp_stat.Clear(); } for (auto& h : file_read_latency_) { h.Clear(); } cf_stats_snapshot_.Clear(); db_stats_snapshot_.Clear(); bg_error_count_ = 0; started_at_ = env_->NowMicros(); } void AddCompactionStats(int level, Env::Priority thread_pri, const CompactionStats& stats) { comp_stats_[level].Add(stats); comp_stats_by_pri_[thread_pri].Add(stats); } void IncBytesMoved(int level, uint64_t amount) { comp_stats_[level].bytes_moved += amount; } void AddCFStats(InternalCFStatsType type, uint64_t value) { cf_stats_value_[type] += value; ++cf_stats_count_[type]; } void AddDBStats(InternalDBStatsType type, uint64_t value, bool concurrent = false) { auto& v = db_stats_[type]; if (concurrent) { v.fetch_add(value, std::memory_order_relaxed); } else { v.store(v.load(std::memory_order_relaxed) + value, std::memory_order_relaxed); } } uint64_t GetDBStats(InternalDBStatsType type) { return db_stats_[type].load(std::memory_order_relaxed); } HistogramImpl* GetFileReadHist(int level) { return &file_read_latency_[level]; } uint64_t GetBackgroundErrorCount() const { return bg_error_count_; } uint64_t BumpAndGetBackgroundErrorCount() { return ++bg_error_count_; } bool GetStringProperty(const DBPropertyInfo& property_info, const Slice& property, std::string* value); bool GetMapProperty(const DBPropertyInfo& property_info, const Slice& property, std::map* value); bool GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value, DBImpl* db); bool GetIntPropertyOutOfMutex(const DBPropertyInfo& property_info, Version* version, uint64_t* value); const std::vector& TEST_GetCompactionStats() const { return comp_stats_; } // Store a mapping from the user-facing DB::Properties string to our // DBPropertyInfo struct used internally for retrieving properties. static const std::unordered_map ppt_name_to_info; private: void DumpDBStats(std::string* value); void DumpCFMapStats(std::map* cf_stats); void DumpCFMapStats( std::map>* level_stats, CompactionStats* compaction_stats_sum); void DumpCFMapStatsByPriority( std::map>* priorities_stats); void DumpCFMapStatsIOStalls(std::map* cf_stats); void DumpCFStats(std::string* value); void DumpCFStatsNoFileHistogram(std::string* value); void DumpCFFileHistogram(std::string* value); bool HandleBlockCacheStat(Cache** block_cache); // Per-DB stats std::atomic db_stats_[kIntStatsNumMax]; // Per-ColumnFamily stats uint64_t cf_stats_value_[INTERNAL_CF_STATS_ENUM_MAX]; uint64_t cf_stats_count_[INTERNAL_CF_STATS_ENUM_MAX]; // Per-ColumnFamily/level compaction stats std::vector comp_stats_; std::vector comp_stats_by_pri_; std::vector file_read_latency_; // Used to compute per-interval statistics struct CFStatsSnapshot { // ColumnFamily-level stats CompactionStats comp_stats; uint64_t ingest_bytes_flush; // Bytes written to L0 (Flush) uint64_t stall_count; // Stall count // Stats from compaction jobs - bytes written, bytes read, duration. uint64_t compact_bytes_write; uint64_t compact_bytes_read; uint64_t compact_micros; double seconds_up; // AddFile specific stats uint64_t ingest_bytes_addfile; // Total Bytes ingested uint64_t ingest_files_addfile; // Total number of files ingested uint64_t ingest_l0_files_addfile; // Total number of files ingested to L0 uint64_t ingest_keys_addfile; // Total number of keys ingested CFStatsSnapshot() : ingest_bytes_flush(0), stall_count(0), compact_bytes_write(0), compact_bytes_read(0), compact_micros(0), seconds_up(0), ingest_bytes_addfile(0), ingest_files_addfile(0), ingest_l0_files_addfile(0), ingest_keys_addfile(0) {} void Clear() { comp_stats.Clear(); ingest_bytes_flush = 0; stall_count = 0; compact_bytes_write = 0; compact_bytes_read = 0; compact_micros = 0; seconds_up = 0; ingest_bytes_addfile = 0; ingest_files_addfile = 0; ingest_l0_files_addfile = 0; ingest_keys_addfile = 0; } } cf_stats_snapshot_; struct DBStatsSnapshot { // DB-level stats uint64_t ingest_bytes; // Bytes written by user uint64_t wal_bytes; // Bytes written to WAL uint64_t wal_synced; // Number of times WAL is synced uint64_t write_with_wal; // Number of writes that request WAL // These count the number of writes processed by the calling thread or // another thread. uint64_t write_other; uint64_t write_self; // Total number of keys written. write_self and write_other measure number // of write requests written, Each of the write request can contain updates // to multiple keys. num_keys_written is total number of keys updated by all // those writes. uint64_t num_keys_written; // Total time writes delayed by stalls. uint64_t write_stall_micros; double seconds_up; DBStatsSnapshot() : ingest_bytes(0), wal_bytes(0), wal_synced(0), write_with_wal(0), write_other(0), write_self(0), num_keys_written(0), write_stall_micros(0), seconds_up(0) {} void Clear() { ingest_bytes = 0; wal_bytes = 0; wal_synced = 0; write_with_wal = 0; write_other = 0; write_self = 0; num_keys_written = 0; write_stall_micros = 0; seconds_up = 0; } } db_stats_snapshot_; // Handler functions for getting property values. They use "value" as a value- // result argument, and return true upon successfully setting "value". bool HandleNumFilesAtLevel(std::string* value, Slice suffix); bool HandleCompressionRatioAtLevelPrefix(std::string* value, Slice suffix); bool HandleLevelStats(std::string* value, Slice suffix); bool HandleStats(std::string* value, Slice suffix); bool HandleCFMapStats(std::map* compaction_stats); bool HandleCFStats(std::string* value, Slice suffix); bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix); bool HandleCFFileHistogram(std::string* value, Slice suffix); bool HandleDBStats(std::string* value, Slice suffix); bool HandleSsTables(std::string* value, Slice suffix); bool HandleAggregatedTableProperties(std::string* value, Slice suffix); bool HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix); bool HandleNumImmutableMemTable(uint64_t* value, DBImpl* db, Version* version); bool HandleNumImmutableMemTableFlushed(uint64_t* value, DBImpl* db, Version* version); bool HandleMemTableFlushPending(uint64_t* value, DBImpl* db, Version* version); bool HandleNumRunningFlushes(uint64_t* value, DBImpl* db, Version* version); bool HandleCompactionPending(uint64_t* value, DBImpl* db, Version* version); bool HandleNumRunningCompactions(uint64_t* value, DBImpl* db, Version* version); bool HandleBackgroundErrors(uint64_t* value, DBImpl* db, Version* version); bool HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db, Version* version); bool HandleCurSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version); bool HandleSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version); bool HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* db, Version* version); bool HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* db, Version* version); bool HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* db, Version* version); bool HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* db, Version* version); bool HandleEstimateNumKeys(uint64_t* value, DBImpl* db, Version* version); bool HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* version); bool HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* version); bool HandleOldestSnapshotSequence(uint64_t* value, DBImpl* db, Version* version); bool HandleNumLiveVersions(uint64_t* value, DBImpl* db, Version* version); bool HandleCurrentSuperVersionNumber(uint64_t* value, DBImpl* db, Version* version); bool HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db, Version* version); bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version); bool HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, Version* version); bool HandleLiveSstFilesSize(uint64_t* value, DBImpl* db, Version* version); bool HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* db, Version* version); bool HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db, Version* version); bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db, Version* version); bool HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, Version* version); bool HandleMinObsoleteSstNumberToKeep(uint64_t* value, DBImpl* db, Version* version); bool HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db, Version* version); bool HandleIsWriteStopped(uint64_t* value, DBImpl* db, Version* version); bool HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* db, Version* version); bool HandleBlockCacheCapacity(uint64_t* value, DBImpl* db, Version* version); bool HandleBlockCacheUsage(uint64_t* value, DBImpl* db, Version* version); bool HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* db, Version* version); // Total number of background errors encountered. Every time a flush task // or compaction task fails, this counter is incremented. The failure can // be caused by any possible reason, including file system errors, out of // resources, or input file corruption. Failing when retrying the same flush // or compaction will cause the counter to increase too. uint64_t bg_error_count_; const int number_levels_; Env* env_; ColumnFamilyData* cfd_; uint64_t started_at_; }; #else class InternalStats { public: enum InternalCFStatsType { L0_FILE_COUNT_LIMIT_SLOWDOWNS, LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS, MEMTABLE_LIMIT_STOPS, MEMTABLE_LIMIT_SLOWDOWNS, L0_FILE_COUNT_LIMIT_STOPS, LOCKED_L0_FILE_COUNT_LIMIT_STOPS, PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS, PENDING_COMPACTION_BYTES_LIMIT_STOPS, WRITE_STALLS_ENUM_MAX, BYTES_FLUSHED, BYTES_INGESTED_ADD_FILE, INGESTED_NUM_FILES_TOTAL, INGESTED_LEVEL0_NUM_FILES_TOTAL, INGESTED_NUM_KEYS_TOTAL, INTERNAL_CF_STATS_ENUM_MAX, }; enum InternalDBStatsType { kIntStatsWalFileBytes, kIntStatsWalFileSynced, kIntStatsBytesWritten, kIntStatsNumKeysWritten, kIntStatsWriteDoneByOther, kIntStatsWriteDoneBySelf, kIntStatsWriteWithWal, kIntStatsWriteStallMicros, kIntStatsNumMax, }; InternalStats(int /*num_levels*/, Env* /*env*/, ColumnFamilyData* /*cfd*/) {} struct CompactionStats { uint64_t micros; uint64_t cpu_micros; uint64_t bytes_read_non_output_levels; uint64_t bytes_read_output_level; uint64_t bytes_written; uint64_t bytes_moved; int num_input_files_in_non_output_levels; int num_input_files_in_output_level; int num_output_files; uint64_t num_input_records; uint64_t num_dropped_records; int count; explicit CompactionStats() {} explicit CompactionStats(CompactionReason /*reason*/, int /*c*/) {} explicit CompactionStats(const CompactionStats& /*c*/) {} void Add(const CompactionStats& /*c*/) {} void Subtract(const CompactionStats& /*c*/) {} }; void AddCompactionStats(int /*level*/, Env::Priority /*thread_pri*/, const CompactionStats& /*stats*/) {} void IncBytesMoved(int /*level*/, uint64_t /*amount*/) {} void AddCFStats(InternalCFStatsType /*type*/, uint64_t /*value*/) {} void AddDBStats(InternalDBStatsType /*type*/, uint64_t /*value*/, bool /*concurrent */ = false) {} HistogramImpl* GetFileReadHist(int /*level*/) { return nullptr; } uint64_t GetBackgroundErrorCount() const { return 0; } uint64_t BumpAndGetBackgroundErrorCount() { return 0; } bool GetStringProperty(const DBPropertyInfo& /*property_info*/, const Slice& /*property*/, std::string* /*value*/) { return false; } bool GetMapProperty(const DBPropertyInfo& /*property_info*/, const Slice& /*property*/, std::map* /*value*/) { return false; } bool GetIntProperty(const DBPropertyInfo& /*property_info*/, uint64_t* /*value*/, DBImpl* /*db*/) const { return false; } bool GetIntPropertyOutOfMutex(const DBPropertyInfo& /*property_info*/, Version* /*version*/, uint64_t* /*value*/) const { return false; } }; #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/job_context.h000066400000000000000000000160351370372246700163260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "db/log_writer.h" #include "db/column_family.h" namespace ROCKSDB_NAMESPACE { class MemTable; struct SuperVersion; struct SuperVersionContext { struct WriteStallNotification { WriteStallInfo write_stall_info; const ImmutableCFOptions* immutable_cf_options; }; autovector superversions_to_free; #ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION autovector write_stall_notifications; #endif std::unique_ptr new_superversion; // if nullptr no new superversion explicit SuperVersionContext(bool create_superversion = false) : new_superversion(create_superversion ? new SuperVersion() : nullptr) {} explicit SuperVersionContext(SuperVersionContext&& other) : superversions_to_free(std::move(other.superversions_to_free)), #ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION write_stall_notifications(std::move(other.write_stall_notifications)), #endif new_superversion(std::move(other.new_superversion)) { } void NewSuperVersion() { new_superversion = std::unique_ptr(new SuperVersion()); } inline bool HaveSomethingToDelete() const { #ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION return !superversions_to_free.empty() || !write_stall_notifications.empty(); #else return !superversions_to_free.empty(); #endif } void PushWriteStallNotification( WriteStallCondition old_cond, WriteStallCondition new_cond, const std::string& name, const ImmutableCFOptions* ioptions) { #if !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION) WriteStallNotification notif; notif.write_stall_info.cf_name = name; notif.write_stall_info.condition.prev = old_cond; notif.write_stall_info.condition.cur = new_cond; notif.immutable_cf_options = ioptions; write_stall_notifications.push_back(notif); #else (void)old_cond; (void)new_cond; (void)name; (void)ioptions; #endif // !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION) } void Clean() { #if !defined(ROCKSDB_LITE) && !defined(ROCKSDB_DISABLE_STALL_NOTIFICATION) // notify listeners on changed write stall conditions for (auto& notif : write_stall_notifications) { for (auto& listener : notif.immutable_cf_options->listeners) { listener->OnStallConditionsChanged(notif.write_stall_info); } } write_stall_notifications.clear(); #endif // !ROCKSDB_LITE // free superversions for (auto s : superversions_to_free) { delete s; } superversions_to_free.clear(); } ~SuperVersionContext() { #ifndef ROCKSDB_DISABLE_STALL_NOTIFICATION assert(write_stall_notifications.empty()); #endif assert(superversions_to_free.empty()); } }; struct JobContext { inline bool HaveSomethingToDelete() const { return !(full_scan_candidate_files.empty() && sst_delete_files.empty() && blob_delete_files.empty() && log_delete_files.empty() && manifest_delete_files.empty()); } inline bool HaveSomethingToClean() const { bool sv_have_sth = false; for (const auto& sv_ctx : superversion_contexts) { if (sv_ctx.HaveSomethingToDelete()) { sv_have_sth = true; break; } } return memtables_to_free.size() > 0 || logs_to_free.size() > 0 || sv_have_sth; } // Structure to store information for candidate files to delete. struct CandidateFileInfo { std::string file_name; std::string file_path; CandidateFileInfo(std::string name, std::string path) : file_name(std::move(name)), file_path(std::move(path)) {} bool operator==(const CandidateFileInfo& other) const { return file_name == other.file_name && file_path == other.file_path; } }; // Unique job id int job_id; // a list of all files that we'll consider deleting // (every once in a while this is filled up with all files // in the DB directory) // (filled only if we're doing full scan) std::vector full_scan_candidate_files; // the list of all live sst files that cannot be deleted std::vector sst_live; // the list of sst files that we need to delete std::vector sst_delete_files; // the list of all live blob files that cannot be deleted std::vector blob_live; // the list of blob files that we need to delete std::vector blob_delete_files; // a list of log files that we need to delete std::vector log_delete_files; // a list of log files that we need to preserve during full purge since they // will be reused later std::vector log_recycle_files; // a list of manifest files that we need to delete std::vector manifest_delete_files; // a list of memtables to be free autovector memtables_to_free; // contexts for installing superversions for multiple column families std::vector superversion_contexts; autovector logs_to_free; // the current manifest_file_number, log_number and prev_log_number // that corresponds to the set of files in 'live'. uint64_t manifest_file_number; uint64_t pending_manifest_file_number; uint64_t log_number; uint64_t prev_log_number; uint64_t min_pending_output = 0; uint64_t prev_total_log_size = 0; size_t num_alive_log_files = 0; uint64_t size_log_to_delete = 0; // Snapshot taken before flush/compaction job. std::unique_ptr job_snapshot; explicit JobContext(int _job_id, bool create_superversion = false) { job_id = _job_id; manifest_file_number = 0; pending_manifest_file_number = 0; log_number = 0; prev_log_number = 0; superversion_contexts.emplace_back( SuperVersionContext(create_superversion)); } // For non-empty JobContext Clean() has to be called at least once before // before destruction (see asserts in ~JobContext()). Should be called with // unlocked DB mutex. Destructor doesn't call Clean() to avoid accidentally // doing potentially slow Clean() with locked DB mutex. void Clean() { // free superversions for (auto& sv_context : superversion_contexts) { sv_context.Clean(); } // free pending memtables for (auto m : memtables_to_free) { delete m; } for (auto l : logs_to_free) { delete l; } memtables_to_free.clear(); logs_to_free.clear(); job_snapshot.reset(); } ~JobContext() { assert(memtables_to_free.size() == 0); assert(logs_to_free.size() == 0); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/listener_test.cc000066400000000000000000001041761370372246700170360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/blob/blob_index.h" #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/dbformat.h" #include "db/version_set.h" #include "db/write_batch_internal.h" #include "file/filename.h" #include "logging/logging.h" #include "memtable/hash_linklist_rep.h" #include "monitoring/statistics.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "table/block_based/block_based_table_factory.h" #include "table/plain/plain_table_factory.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/hash.h" #include "util/mutexlock.h" #include "util/rate_limiter.h" #include "util/string_util.h" #include "utilities/merge_operators.h" #ifndef ROCKSDB_LITE namespace ROCKSDB_NAMESPACE { class EventListenerTest : public DBTestBase { public: EventListenerTest() : DBTestBase("/listener_test") {} static std::string BlobStr(uint64_t blob_file_number, uint64_t offset, uint64_t size) { std::string blob_index; BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size, kNoCompression); return blob_index; } const size_t k110KB = 110 << 10; }; struct TestPropertiesCollector : public ROCKSDB_NAMESPACE::TablePropertiesCollector { ROCKSDB_NAMESPACE::Status AddUserKey( const ROCKSDB_NAMESPACE::Slice& /*key*/, const ROCKSDB_NAMESPACE::Slice& /*value*/, ROCKSDB_NAMESPACE::EntryType /*type*/, ROCKSDB_NAMESPACE::SequenceNumber /*seq*/, uint64_t /*file_size*/) override { return Status::OK(); } ROCKSDB_NAMESPACE::Status Finish( ROCKSDB_NAMESPACE::UserCollectedProperties* properties) override { properties->insert({"0", "1"}); return Status::OK(); } const char* Name() const override { return "TestTablePropertiesCollector"; } ROCKSDB_NAMESPACE::UserCollectedProperties GetReadableProperties() const override { ROCKSDB_NAMESPACE::UserCollectedProperties ret; ret["2"] = "3"; return ret; } }; class TestPropertiesCollectorFactory : public TablePropertiesCollectorFactory { public: TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context /*context*/) override { return new TestPropertiesCollector; } const char* Name() const override { return "TestTablePropertiesCollector"; } }; class TestCompactionListener : public EventListener { public: explicit TestCompactionListener(EventListenerTest* test) : test_(test) {} void OnCompactionCompleted(DB *db, const CompactionJobInfo& ci) override { std::lock_guard lock(mutex_); compacted_dbs_.push_back(db); ASSERT_GT(ci.input_files.size(), 0U); ASSERT_EQ(ci.input_files.size(), ci.input_file_infos.size()); for (size_t i = 0; i < ci.input_file_infos.size(); ++i) { ASSERT_EQ(ci.input_file_infos[i].level, ci.base_input_level); ASSERT_EQ(ci.input_file_infos[i].file_number, TableFileNameToNumber(ci.input_files[i])); } ASSERT_GT(ci.output_files.size(), 0U); ASSERT_EQ(ci.output_files.size(), ci.output_file_infos.size()); ASSERT_TRUE(test_); ASSERT_EQ(test_->db_, db); std::vector> files_by_level; test_->dbfull()->TEST_GetFilesMetaData(test_->handles_[ci.cf_id], &files_by_level); ASSERT_GT(files_by_level.size(), ci.output_level); for (size_t i = 0; i < ci.output_file_infos.size(); ++i) { ASSERT_EQ(ci.output_file_infos[i].level, ci.output_level); ASSERT_EQ(ci.output_file_infos[i].file_number, TableFileNameToNumber(ci.output_files[i])); auto it = std::find_if( files_by_level[ci.output_level].begin(), files_by_level[ci.output_level].end(), [&](const FileMetaData& meta) { return meta.fd.GetNumber() == ci.output_file_infos[i].file_number; }); ASSERT_NE(it, files_by_level[ci.output_level].end()); ASSERT_EQ(ci.output_file_infos[i].oldest_blob_file_number, it->oldest_blob_file_number); } ASSERT_EQ(db->GetEnv()->GetThreadID(), ci.thread_id); ASSERT_GT(ci.thread_id, 0U); for (auto fl : {ci.input_files, ci.output_files}) { for (auto fn : fl) { auto it = ci.table_properties.find(fn); ASSERT_NE(it, ci.table_properties.end()); auto tp = it->second; ASSERT_TRUE(tp != nullptr); ASSERT_EQ(tp->user_collected_properties.find("0")->second, "1"); } } } EventListenerTest* test_; std::vector compacted_dbs_; std::mutex mutex_; }; TEST_F(EventListenerTest, OnSingleDBCompactionTest) { const int kTestKeySize = 16; const int kTestValueSize = 984; const int kEntrySize = kTestKeySize + kTestValueSize; const int kEntriesPerBuffer = 100; const int kNumL0Files = 4; Options options; options.env = CurrentOptions().env; options.create_if_missing = true; options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.compaction_style = kCompactionStyleLevel; options.target_file_size_base = options.write_buffer_size; options.max_bytes_for_level_base = options.target_file_size_base * 2; options.max_bytes_for_level_multiplier = 2; options.compression = kNoCompression; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; #endif // ROCKSDB_USING_THREAD_STATUS options.level0_file_num_compaction_trigger = kNumL0Files; options.table_properties_collector_factories.push_back( std::make_shared()); TestCompactionListener* listener = new TestCompactionListener(this); options.listeners.emplace_back(listener); std::vector cf_names = { "pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}; CreateAndReopenWithCF(cf_names, options); ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p'))); WriteBatch batch; ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 1, "ditto", BlobStr(123, 0, 1 << 10))); ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); ASSERT_OK(Put(2, "ilya", std::string(90000, 'i'))); ASSERT_OK(Put(3, "muromec", std::string(90000, 'm'))); ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd'))); ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n'))); ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a'))); ASSERT_OK(Put(7, "popovich", std::string(90000, 'p'))); for (int i = 1; i < 8; ++i) { ASSERT_OK(Flush(i)); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[i], nullptr, nullptr)); dbfull()->TEST_WaitForCompact(); } ASSERT_EQ(listener->compacted_dbs_.size(), cf_names.size()); for (size_t i = 0; i < cf_names.size(); ++i) { ASSERT_EQ(listener->compacted_dbs_[i], db_); } } // This simple Listener can only handle one flush at a time. class TestFlushListener : public EventListener { public: TestFlushListener(Env* env, EventListenerTest* test) : slowdown_count(0), stop_count(0), db_closed(), env_(env), test_(test) { db_closed = false; } void OnTableFileCreated( const TableFileCreationInfo& info) override { // remember the info for later checking the FlushJobInfo. prev_fc_info_ = info; ASSERT_GT(info.db_name.size(), 0U); ASSERT_GT(info.cf_name.size(), 0U); ASSERT_GT(info.file_path.size(), 0U); ASSERT_GT(info.job_id, 0); ASSERT_GT(info.table_properties.data_size, 0U); ASSERT_GT(info.table_properties.raw_key_size, 0U); ASSERT_GT(info.table_properties.raw_value_size, 0U); ASSERT_GT(info.table_properties.num_data_blocks, 0U); ASSERT_GT(info.table_properties.num_entries, 0U); #ifdef ROCKSDB_USING_THREAD_STATUS // Verify the id of the current thread that created this table // file matches the id of any active flush or compaction thread. uint64_t thread_id = env_->GetThreadID(); std::vector thread_list; ASSERT_OK(env_->GetThreadList(&thread_list)); bool found_match = false; for (auto thread_status : thread_list) { if (thread_status.operation_type == ThreadStatus::OP_FLUSH || thread_status.operation_type == ThreadStatus::OP_COMPACTION) { if (thread_id == thread_status.thread_id) { found_match = true; break; } } } ASSERT_TRUE(found_match); #endif // ROCKSDB_USING_THREAD_STATUS } void OnFlushCompleted( DB* db, const FlushJobInfo& info) override { flushed_dbs_.push_back(db); flushed_column_family_names_.push_back(info.cf_name); if (info.triggered_writes_slowdown) { slowdown_count++; } if (info.triggered_writes_stop) { stop_count++; } // verify whether the previously created file matches the flushed file. ASSERT_EQ(prev_fc_info_.db_name, db->GetName()); ASSERT_EQ(prev_fc_info_.cf_name, info.cf_name); ASSERT_EQ(prev_fc_info_.job_id, info.job_id); ASSERT_EQ(prev_fc_info_.file_path, info.file_path); ASSERT_EQ(TableFileNameToNumber(info.file_path), info.file_number); // Note: the following chunk relies on the notification pertaining to the // database pointed to by DBTestBase::db_, and is thus bypassed when // that assumption does not hold (see the test case MultiDBMultiListeners // below). ASSERT_TRUE(test_); if (db == test_->db_) { std::vector> files_by_level; test_->dbfull()->TEST_GetFilesMetaData(test_->handles_[info.cf_id], &files_by_level); ASSERT_FALSE(files_by_level.empty()); auto it = std::find_if(files_by_level[0].begin(), files_by_level[0].end(), [&](const FileMetaData& meta) { return meta.fd.GetNumber() == info.file_number; }); ASSERT_NE(it, files_by_level[0].end()); ASSERT_EQ(info.oldest_blob_file_number, it->oldest_blob_file_number); } ASSERT_EQ(db->GetEnv()->GetThreadID(), info.thread_id); ASSERT_GT(info.thread_id, 0U); ASSERT_EQ(info.table_properties.user_collected_properties.find("0")->second, "1"); } std::vector flushed_column_family_names_; std::vector flushed_dbs_; int slowdown_count; int stop_count; bool db_closing; std::atomic_bool db_closed; TableFileCreationInfo prev_fc_info_; protected: Env* env_; EventListenerTest* test_; }; TEST_F(EventListenerTest, OnSingleDBFlushTest) { Options options; options.env = CurrentOptions().env; options.write_buffer_size = k110KB; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; #endif // ROCKSDB_USING_THREAD_STATUS TestFlushListener* listener = new TestFlushListener(options.env, this); options.listeners.emplace_back(listener); std::vector cf_names = { "pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}; options.table_properties_collector_factories.push_back( std::make_shared()); CreateAndReopenWithCF(cf_names, options); ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p'))); WriteBatch batch; ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 1, "ditto", BlobStr(456, 0, 1 << 10))); ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); ASSERT_OK(Put(2, "ilya", std::string(90000, 'i'))); ASSERT_OK(Put(3, "muromec", std::string(90000, 'm'))); ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd'))); ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n'))); ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a'))); ASSERT_OK(Put(7, "popovich", std::string(90000, 'p'))); for (int i = 1; i < 8; ++i) { ASSERT_OK(Flush(i)); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(listener->flushed_dbs_.size(), i); ASSERT_EQ(listener->flushed_column_family_names_.size(), i); } // make sure callback functions are called in the right order for (size_t i = 0; i < cf_names.size(); ++i) { ASSERT_EQ(listener->flushed_dbs_[i], db_); ASSERT_EQ(listener->flushed_column_family_names_[i], cf_names[i]); } } TEST_F(EventListenerTest, MultiCF) { Options options; options.env = CurrentOptions().env; options.write_buffer_size = k110KB; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; #endif // ROCKSDB_USING_THREAD_STATUS TestFlushListener* listener = new TestFlushListener(options.env, this); options.listeners.emplace_back(listener); options.table_properties_collector_factories.push_back( std::make_shared()); std::vector cf_names = { "pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}; CreateAndReopenWithCF(cf_names, options); ASSERT_OK(Put(1, "pikachu", std::string(90000, 'p'))); ASSERT_OK(Put(2, "ilya", std::string(90000, 'i'))); ASSERT_OK(Put(3, "muromec", std::string(90000, 'm'))); ASSERT_OK(Put(4, "dobrynia", std::string(90000, 'd'))); ASSERT_OK(Put(5, "nikitich", std::string(90000, 'n'))); ASSERT_OK(Put(6, "alyosha", std::string(90000, 'a'))); ASSERT_OK(Put(7, "popovich", std::string(90000, 'p'))); for (int i = 1; i < 8; ++i) { ASSERT_OK(Flush(i)); ASSERT_EQ(listener->flushed_dbs_.size(), i); ASSERT_EQ(listener->flushed_column_family_names_.size(), i); } // make sure callback functions are called in the right order for (size_t i = 0; i < cf_names.size(); i++) { ASSERT_EQ(listener->flushed_dbs_[i], db_); ASSERT_EQ(listener->flushed_column_family_names_[i], cf_names[i]); } } TEST_F(EventListenerTest, MultiDBMultiListeners) { Options options; options.env = CurrentOptions().env; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; #endif // ROCKSDB_USING_THREAD_STATUS options.table_properties_collector_factories.push_back( std::make_shared()); std::vector listeners; const int kNumDBs = 5; const int kNumListeners = 10; for (int i = 0; i < kNumListeners; ++i) { listeners.emplace_back(new TestFlushListener(options.env, this)); } std::vector cf_names = { "pikachu", "ilya", "muromec", "dobrynia", "nikitich", "alyosha", "popovich"}; options.create_if_missing = true; for (int i = 0; i < kNumListeners; ++i) { options.listeners.emplace_back(listeners[i]); } DBOptions db_opts(options); ColumnFamilyOptions cf_opts(options); std::vector dbs; std::vector> vec_handles; for (int d = 0; d < kNumDBs; ++d) { ASSERT_OK(DestroyDB(dbname_ + ToString(d), options)); DB* db; std::vector handles; ASSERT_OK(DB::Open(options, dbname_ + ToString(d), &db)); for (size_t c = 0; c < cf_names.size(); ++c) { ColumnFamilyHandle* handle; db->CreateColumnFamily(cf_opts, cf_names[c], &handle); handles.push_back(handle); } vec_handles.push_back(std::move(handles)); dbs.push_back(db); } for (int d = 0; d < kNumDBs; ++d) { for (size_t c = 0; c < cf_names.size(); ++c) { ASSERT_OK(dbs[d]->Put(WriteOptions(), vec_handles[d][c], cf_names[c], cf_names[c])); } } for (size_t c = 0; c < cf_names.size(); ++c) { for (int d = 0; d < kNumDBs; ++d) { ASSERT_OK(dbs[d]->Flush(FlushOptions(), vec_handles[d][c])); reinterpret_cast(dbs[d])->TEST_WaitForFlushMemTable(); } } for (auto* listener : listeners) { int pos = 0; for (size_t c = 0; c < cf_names.size(); ++c) { for (int d = 0; d < kNumDBs; ++d) { ASSERT_EQ(listener->flushed_dbs_[pos], dbs[d]); ASSERT_EQ(listener->flushed_column_family_names_[pos], cf_names[c]); pos++; } } } for (auto handles : vec_handles) { for (auto h : handles) { delete h; } handles.clear(); } vec_handles.clear(); for (auto db : dbs) { delete db; } } TEST_F(EventListenerTest, DisableBGCompaction) { Options options; options.env = CurrentOptions().env; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; #endif // ROCKSDB_USING_THREAD_STATUS TestFlushListener* listener = new TestFlushListener(options.env, this); const int kCompactionTrigger = 1; const int kSlowdownTrigger = 5; const int kStopTrigger = 100; options.level0_file_num_compaction_trigger = kCompactionTrigger; options.level0_slowdown_writes_trigger = kSlowdownTrigger; options.level0_stop_writes_trigger = kStopTrigger; options.max_write_buffer_number = 10; options.listeners.emplace_back(listener); // BG compaction is disabled. Number of L0 files will simply keeps // increasing in this test. options.compaction_style = kCompactionStyleNone; options.compression = kNoCompression; options.write_buffer_size = 100000; // Small write buffer options.table_properties_collector_factories.push_back( std::make_shared()); CreateAndReopenWithCF({"pikachu"}, options); ColumnFamilyMetaData cf_meta; db_->GetColumnFamilyMetaData(handles_[1], &cf_meta); // keep writing until writes are forced to stop. for (int i = 0; static_cast(cf_meta.file_count) < kSlowdownTrigger * 10; ++i) { Put(1, ToString(i), std::string(10000, 'x'), WriteOptions()); FlushOptions fo; fo.allow_write_stall = true; db_->Flush(fo, handles_[1]); db_->GetColumnFamilyMetaData(handles_[1], &cf_meta); } ASSERT_GE(listener->slowdown_count, kSlowdownTrigger * 9); } class TestCompactionReasonListener : public EventListener { public: void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { std::lock_guard lock(mutex_); compaction_reasons_.push_back(ci.compaction_reason); } std::vector compaction_reasons_; std::mutex mutex_; }; TEST_F(EventListenerTest, CompactionReasonLevel) { Options options; options.env = CurrentOptions().env; options.create_if_missing = true; options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); TestCompactionReasonListener* listener = new TestCompactionReasonListener(); options.listeners.emplace_back(listener); options.level0_file_num_compaction_trigger = 4; options.compaction_style = kCompactionStyleLevel; DestroyAndReopen(options); Random rnd(301); // Write 4 files in L0 for (int i = 0; i < 4; i++) { GenerateNewRandomFile(&rnd); } dbfull()->TEST_WaitForCompact(); ASSERT_EQ(listener->compaction_reasons_.size(), 1); ASSERT_EQ(listener->compaction_reasons_[0], CompactionReason::kLevelL0FilesNum); DestroyAndReopen(options); // Write 3 non-overlapping files in L0 for (int k = 1; k <= 30; k++) { ASSERT_OK(Put(Key(k), Key(k))); if (k % 10 == 0) { Flush(); } } // Do a trivial move from L0 -> L1 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); options.max_bytes_for_level_base = 1; Close(); listener->compaction_reasons_.clear(); Reopen(options); dbfull()->TEST_WaitForCompact(); ASSERT_GT(listener->compaction_reasons_.size(), 1); for (auto compaction_reason : listener->compaction_reasons_) { ASSERT_EQ(compaction_reason, CompactionReason::kLevelMaxLevelSize); } options.disable_auto_compactions = true; Close(); listener->compaction_reasons_.clear(); Reopen(options); Put("key", "value"); CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); ASSERT_GT(listener->compaction_reasons_.size(), 0); for (auto compaction_reason : listener->compaction_reasons_) { ASSERT_EQ(compaction_reason, CompactionReason::kManualCompaction); } } TEST_F(EventListenerTest, CompactionReasonUniversal) { Options options; options.env = CurrentOptions().env; options.create_if_missing = true; options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); TestCompactionReasonListener* listener = new TestCompactionReasonListener(); options.listeners.emplace_back(listener); options.compaction_style = kCompactionStyleUniversal; Random rnd(301); options.level0_file_num_compaction_trigger = 8; options.compaction_options_universal.max_size_amplification_percent = 100000; options.compaction_options_universal.size_ratio = 100000; DestroyAndReopen(options); listener->compaction_reasons_.clear(); // Write 8 files in L0 for (int i = 0; i < 8; i++) { GenerateNewRandomFile(&rnd); } dbfull()->TEST_WaitForCompact(); ASSERT_GT(listener->compaction_reasons_.size(), 0); for (auto compaction_reason : listener->compaction_reasons_) { ASSERT_EQ(compaction_reason, CompactionReason::kUniversalSizeRatio); } options.level0_file_num_compaction_trigger = 8; options.compaction_options_universal.max_size_amplification_percent = 1; options.compaction_options_universal.size_ratio = 100000; DestroyAndReopen(options); listener->compaction_reasons_.clear(); // Write 8 files in L0 for (int i = 0; i < 8; i++) { GenerateNewRandomFile(&rnd); } dbfull()->TEST_WaitForCompact(); ASSERT_GT(listener->compaction_reasons_.size(), 0); for (auto compaction_reason : listener->compaction_reasons_) { ASSERT_EQ(compaction_reason, CompactionReason::kUniversalSizeAmplification); } options.disable_auto_compactions = true; Close(); listener->compaction_reasons_.clear(); Reopen(options); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_GT(listener->compaction_reasons_.size(), 0); for (auto compaction_reason : listener->compaction_reasons_) { ASSERT_EQ(compaction_reason, CompactionReason::kManualCompaction); } } TEST_F(EventListenerTest, CompactionReasonFIFO) { Options options; options.env = CurrentOptions().env; options.create_if_missing = true; options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); TestCompactionReasonListener* listener = new TestCompactionReasonListener(); options.listeners.emplace_back(listener); options.level0_file_num_compaction_trigger = 4; options.compaction_style = kCompactionStyleFIFO; options.compaction_options_fifo.max_table_files_size = 1; DestroyAndReopen(options); Random rnd(301); // Write 4 files in L0 for (int i = 0; i < 4; i++) { GenerateNewRandomFile(&rnd); } dbfull()->TEST_WaitForCompact(); ASSERT_GT(listener->compaction_reasons_.size(), 0); for (auto compaction_reason : listener->compaction_reasons_) { ASSERT_EQ(compaction_reason, CompactionReason::kFIFOMaxSize); } } class TableFileCreationListener : public EventListener { public: class TestEnv : public EnvWrapper { public: TestEnv() : EnvWrapper(Env::Default()) {} void SetStatus(Status s) { status_ = s; } Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { if (fname.size() > 4 && fname.substr(fname.size() - 4) == ".sst") { if (!status_.ok()) { return status_; } } return Env::Default()->NewWritableFile(fname, result, options); } private: Status status_; }; TableFileCreationListener() { for (int i = 0; i < 2; i++) { started_[i] = finished_[i] = failure_[i] = 0; } } int Index(TableFileCreationReason reason) { int idx; switch (reason) { case TableFileCreationReason::kFlush: idx = 0; break; case TableFileCreationReason::kCompaction: idx = 1; break; default: idx = -1; } return idx; } void CheckAndResetCounters(int flush_started, int flush_finished, int flush_failure, int compaction_started, int compaction_finished, int compaction_failure) { ASSERT_EQ(started_[0], flush_started); ASSERT_EQ(finished_[0], flush_finished); ASSERT_EQ(failure_[0], flush_failure); ASSERT_EQ(started_[1], compaction_started); ASSERT_EQ(finished_[1], compaction_finished); ASSERT_EQ(failure_[1], compaction_failure); for (int i = 0; i < 2; i++) { started_[i] = finished_[i] = failure_[i] = 0; } } void OnTableFileCreationStarted( const TableFileCreationBriefInfo& info) override { int idx = Index(info.reason); if (idx >= 0) { started_[idx]++; } ASSERT_GT(info.db_name.size(), 0U); ASSERT_GT(info.cf_name.size(), 0U); ASSERT_GT(info.file_path.size(), 0U); ASSERT_GT(info.job_id, 0); } void OnTableFileCreated(const TableFileCreationInfo& info) override { int idx = Index(info.reason); if (idx >= 0) { finished_[idx]++; } ASSERT_GT(info.db_name.size(), 0U); ASSERT_GT(info.cf_name.size(), 0U); ASSERT_GT(info.file_path.size(), 0U); ASSERT_GT(info.job_id, 0); if (info.status.ok()) { ASSERT_GT(info.table_properties.data_size, 0U); ASSERT_GT(info.table_properties.raw_key_size, 0U); ASSERT_GT(info.table_properties.raw_value_size, 0U); ASSERT_GT(info.table_properties.num_data_blocks, 0U); ASSERT_GT(info.table_properties.num_entries, 0U); } else { if (idx >= 0) { failure_[idx]++; } } } TestEnv test_env; int started_[2]; int finished_[2]; int failure_[2]; }; TEST_F(EventListenerTest, TableFileCreationListenersTest) { auto listener = std::make_shared(); Options options; options.create_if_missing = true; options.listeners.push_back(listener); options.env = &listener->test_env; DestroyAndReopen(options); ASSERT_OK(Put("foo", "aaa")); ASSERT_OK(Put("bar", "bbb")); ASSERT_OK(Flush()); dbfull()->TEST_WaitForFlushMemTable(); listener->CheckAndResetCounters(1, 1, 0, 0, 0, 0); ASSERT_OK(Put("foo", "aaa1")); ASSERT_OK(Put("bar", "bbb1")); listener->test_env.SetStatus(Status::NotSupported("not supported")); ASSERT_NOK(Flush()); listener->CheckAndResetCounters(1, 1, 1, 0, 0, 0); listener->test_env.SetStatus(Status::OK()); Reopen(options); ASSERT_OK(Put("foo", "aaa2")); ASSERT_OK(Put("bar", "bbb2")); ASSERT_OK(Flush()); dbfull()->TEST_WaitForFlushMemTable(); listener->CheckAndResetCounters(1, 1, 0, 0, 0, 0); const Slice kRangeStart = "a"; const Slice kRangeEnd = "z"; dbfull()->CompactRange(CompactRangeOptions(), &kRangeStart, &kRangeEnd); dbfull()->TEST_WaitForCompact(); listener->CheckAndResetCounters(0, 0, 0, 1, 1, 0); ASSERT_OK(Put("foo", "aaa3")); ASSERT_OK(Put("bar", "bbb3")); ASSERT_OK(Flush()); listener->test_env.SetStatus(Status::NotSupported("not supported")); dbfull()->CompactRange(CompactRangeOptions(), &kRangeStart, &kRangeEnd); dbfull()->TEST_WaitForCompact(); listener->CheckAndResetCounters(1, 1, 0, 1, 1, 1); } class MemTableSealedListener : public EventListener { private: SequenceNumber latest_seq_number_; public: MemTableSealedListener() {} void OnMemTableSealed(const MemTableInfo& info) override { latest_seq_number_ = info.first_seqno; } void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& flush_job_info) override { ASSERT_LE(flush_job_info.smallest_seqno, latest_seq_number_); } }; TEST_F(EventListenerTest, MemTableSealedListenerTest) { auto listener = std::make_shared(); Options options; options.create_if_missing = true; options.listeners.push_back(listener); DestroyAndReopen(options); for (unsigned int i = 0; i < 10; i++) { std::string tag = std::to_string(i); ASSERT_OK(Put("foo"+tag, "aaa")); ASSERT_OK(Put("bar"+tag, "bbb")); ASSERT_OK(Flush()); } } class ColumnFamilyHandleDeletionStartedListener : public EventListener { private: std::vector cfs_; int counter; public: explicit ColumnFamilyHandleDeletionStartedListener( const std::vector& cfs) : cfs_(cfs), counter(0) { cfs_.insert(cfs_.begin(), kDefaultColumnFamilyName); } void OnColumnFamilyHandleDeletionStarted( ColumnFamilyHandle* handle) override { ASSERT_EQ(cfs_[handle->GetID()], handle->GetName()); counter++; } int getCounter() { return counter; } }; TEST_F(EventListenerTest, ColumnFamilyHandleDeletionStartedListenerTest) { std::vector cfs{"pikachu", "eevee", "Mewtwo"}; auto listener = std::make_shared(cfs); Options options; options.env = CurrentOptions().env; options.create_if_missing = true; options.listeners.push_back(listener); CreateAndReopenWithCF(cfs, options); ASSERT_EQ(handles_.size(), 4); delete handles_[3]; delete handles_[2]; delete handles_[1]; handles_.resize(1); ASSERT_EQ(listener->getCounter(), 3); } class BackgroundErrorListener : public EventListener { private: SpecialEnv* env_; int counter_; public: BackgroundErrorListener(SpecialEnv* env) : env_(env), counter_(0) {} void OnBackgroundError(BackgroundErrorReason /*reason*/, Status* bg_error) override { if (counter_ == 0) { // suppress the first error and disable write-dropping such that a retry // can succeed. *bg_error = Status::OK(); env_->drop_writes_.store(false, std::memory_order_release); env_->no_slowdown_ = false; } ++counter_; } int counter() { return counter_; } }; TEST_F(EventListenerTest, BackgroundErrorListenerFailedFlushTest) { auto listener = std::make_shared(env_); Options options; options.create_if_missing = true; options.env = env_; options.listeners.push_back(listener); options.memtable_factory.reset(new SpecialSkipListFactory(1)); options.paranoid_checks = true; DestroyAndReopen(options); // the usual TEST_WaitForFlushMemTable() doesn't work for failed flushes, so // forge a custom one for the failed flush case. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::BGWorkFlush:done", "EventListenerTest:BackgroundErrorListenerFailedFlushTest:1"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); env_->drop_writes_.store(true, std::memory_order_release); env_->no_slowdown_ = true; ASSERT_OK(Put("key0", "val")); ASSERT_OK(Put("key1", "val")); TEST_SYNC_POINT("EventListenerTest:BackgroundErrorListenerFailedFlushTest:1"); ASSERT_EQ(1, listener->counter()); ASSERT_OK(Put("key2", "val")); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ(1, NumTableFilesAtLevel(0)); } TEST_F(EventListenerTest, BackgroundErrorListenerFailedCompactionTest) { auto listener = std::make_shared(env_); Options options; options.create_if_missing = true; options.disable_auto_compactions = true; options.env = env_; options.level0_file_num_compaction_trigger = 2; options.listeners.push_back(listener); options.memtable_factory.reset(new SpecialSkipListFactory(2)); options.paranoid_checks = true; DestroyAndReopen(options); // third iteration triggers the second memtable's flush for (int i = 0; i < 3; ++i) { ASSERT_OK(Put("key0", "val")); if (i > 0) { ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } ASSERT_OK(Put("key1", "val")); } ASSERT_EQ(2, NumTableFilesAtLevel(0)); env_->drop_writes_.store(true, std::memory_order_release); env_->no_slowdown_ = true; ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}})); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(1, listener->counter()); // trigger flush so compaction is triggered again; this time it succeeds // The previous failed compaction may get retried automatically, so we may // be left with 0 or 1 files in level 1, depending on when the retry gets // scheduled ASSERT_OK(Put("key0", "val")); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_LE(1, NumTableFilesAtLevel(0)); } class TestFileOperationListener : public EventListener { public: TestFileOperationListener() { file_reads_.store(0); file_reads_success_.store(0); file_writes_.store(0); file_writes_success_.store(0); } void OnFileReadFinish(const FileOperationInfo& info) override { ++file_reads_; if (info.status.ok()) { ++file_reads_success_; } ReportDuration(info); } void OnFileWriteFinish(const FileOperationInfo& info) override { ++file_writes_; if (info.status.ok()) { ++file_writes_success_; } ReportDuration(info); } bool ShouldBeNotifiedOnFileIO() override { return true; } std::atomic file_reads_; std::atomic file_reads_success_; std::atomic file_writes_; std::atomic file_writes_success_; private: void ReportDuration(const FileOperationInfo& info) const { auto duration = std::chrono::duration_cast( info.finish_timestamp - info.start_timestamp); ASSERT_GT(duration.count(), 0); } }; TEST_F(EventListenerTest, OnFileOperationTest) { Options options; options.env = CurrentOptions().env; options.create_if_missing = true; TestFileOperationListener* listener = new TestFileOperationListener(); options.listeners.emplace_back(listener); DestroyAndReopen(options); ASSERT_OK(Put("foo", "aaa")); dbfull()->Flush(FlushOptions()); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_GE(listener->file_writes_.load(), listener->file_writes_success_.load()); ASSERT_GT(listener->file_writes_.load(), 0); Close(); Reopen(options); ASSERT_GE(listener->file_reads_.load(), listener->file_reads_success_.load()); ASSERT_GT(listener->file_reads_.load(), 0); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/log_format.h000066400000000000000000000026211370372246700161350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Log format information shared by reader and writer. // See ../doc/log_format.txt for more detail. #pragma once #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { namespace log { enum RecordType { // Zero is reserved for preallocated files kZeroType = 0, kFullType = 1, // For fragments kFirstType = 2, kMiddleType = 3, kLastType = 4, // For recycled log files kRecyclableFullType = 5, kRecyclableFirstType = 6, kRecyclableMiddleType = 7, kRecyclableLastType = 8, }; static const int kMaxRecordType = kRecyclableLastType; static const unsigned int kBlockSize = 32768; // Header is checksum (4 bytes), length (2 bytes), type (1 byte) static const int kHeaderSize = 4 + 2 + 1; // Recyclable header is checksum (4 bytes), length (2 bytes), type (1 byte), // log number (4 bytes). static const int kRecyclableHeaderSize = 4 + 2 + 1 + 4; } // namespace log } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/log_reader.cc000066400000000000000000000476441370372246700162630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/log_reader.h" #include #include "file/sequence_file_reader.h" #include "port/lang.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" #include "util/coding.h" #include "util/crc32c.h" namespace ROCKSDB_NAMESPACE { namespace log { Reader::Reporter::~Reporter() { } Reader::Reader(std::shared_ptr info_log, std::unique_ptr&& _file, Reporter* reporter, bool checksum, uint64_t log_num) : info_log_(info_log), file_(std::move(_file)), reporter_(reporter), checksum_(checksum), backing_store_(new char[kBlockSize]), buffer_(), eof_(false), read_error_(false), eof_offset_(0), last_record_offset_(0), end_of_buffer_offset_(0), log_number_(log_num), recycled_(false) {} Reader::~Reader() { delete[] backing_store_; } // For kAbsoluteConsistency, on clean shutdown we don't expect any error // in the log files. For other modes, we can ignore only incomplete records // in the last log file, which are presumably due to a write in progress // during restart (or from log recycling). // // TODO krad: Evaluate if we need to move to a more strict mode where we // restrict the inconsistency to only the last log bool Reader::ReadRecord(Slice* record, std::string* scratch, WALRecoveryMode wal_recovery_mode) { scratch->clear(); record->clear(); bool in_fragmented_record = false; // Record offset of the logical record that we're reading // 0 is a dummy value to make compilers happy uint64_t prospective_record_offset = 0; Slice fragment; while (true) { uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size(); size_t drop_size = 0; const unsigned int record_type = ReadPhysicalRecord(&fragment, &drop_size); switch (record_type) { case kFullType: case kRecyclableFullType: if (in_fragmented_record && !scratch->empty()) { // Handle bug in earlier versions of log::Writer where // it could emit an empty kFirstType record at the tail end // of a block followed by a kFullType or kFirstType record // at the beginning of the next block. ReportCorruption(scratch->size(), "partial record without end(1)"); } prospective_record_offset = physical_record_offset; scratch->clear(); *record = fragment; last_record_offset_ = prospective_record_offset; return true; case kFirstType: case kRecyclableFirstType: if (in_fragmented_record && !scratch->empty()) { // Handle bug in earlier versions of log::Writer where // it could emit an empty kFirstType record at the tail end // of a block followed by a kFullType or kFirstType record // at the beginning of the next block. ReportCorruption(scratch->size(), "partial record without end(2)"); } prospective_record_offset = physical_record_offset; scratch->assign(fragment.data(), fragment.size()); in_fragmented_record = true; break; case kMiddleType: case kRecyclableMiddleType: if (!in_fragmented_record) { ReportCorruption(fragment.size(), "missing start of fragmented record(1)"); } else { scratch->append(fragment.data(), fragment.size()); } break; case kLastType: case kRecyclableLastType: if (!in_fragmented_record) { ReportCorruption(fragment.size(), "missing start of fragmented record(2)"); } else { scratch->append(fragment.data(), fragment.size()); *record = Slice(*scratch); last_record_offset_ = prospective_record_offset; return true; } break; case kBadHeader: if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) { // in clean shutdown we don't expect any error in the log files ReportCorruption(drop_size, "truncated header"); } FALLTHROUGH_INTENDED; case kEof: if (in_fragmented_record) { if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) { // in clean shutdown we don't expect any error in the log files ReportCorruption(scratch->size(), "error reading trailing data"); } // This can be caused by the writer dying immediately after // writing a physical record but before completing the next; don't // treat it as a corruption, just ignore the entire logical record. scratch->clear(); } return false; case kOldRecord: if (wal_recovery_mode != WALRecoveryMode::kSkipAnyCorruptedRecords) { // Treat a record from a previous instance of the log as EOF. if (in_fragmented_record) { if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) { // in clean shutdown we don't expect any error in the log files ReportCorruption(scratch->size(), "error reading trailing data"); } // This can be caused by the writer dying immediately after // writing a physical record but before completing the next; don't // treat it as a corruption, just ignore the entire logical record. scratch->clear(); } return false; } FALLTHROUGH_INTENDED; case kBadRecord: if (in_fragmented_record) { ReportCorruption(scratch->size(), "error in middle of record"); in_fragmented_record = false; scratch->clear(); } break; case kBadRecordLen: case kBadRecordChecksum: if (recycled_ && wal_recovery_mode == WALRecoveryMode::kTolerateCorruptedTailRecords) { scratch->clear(); return false; } if (record_type == kBadRecordLen) { ReportCorruption(drop_size, "bad record length"); } else { ReportCorruption(drop_size, "checksum mismatch"); } if (in_fragmented_record) { ReportCorruption(scratch->size(), "error in middle of record"); in_fragmented_record = false; scratch->clear(); } break; default: { char buf[40]; snprintf(buf, sizeof(buf), "unknown record type %u", record_type); ReportCorruption( (fragment.size() + (in_fragmented_record ? scratch->size() : 0)), buf); in_fragmented_record = false; scratch->clear(); break; } } } return false; } uint64_t Reader::LastRecordOffset() { return last_record_offset_; } void Reader::UnmarkEOF() { if (read_error_) { return; } eof_ = false; if (eof_offset_ == 0) { return; } UnmarkEOFInternal(); } void Reader::UnmarkEOFInternal() { // If the EOF was in the middle of a block (a partial block was read) we have // to read the rest of the block as ReadPhysicalRecord can only read full // blocks and expects the file position indicator to be aligned to the start // of a block. // // consumed_bytes + buffer_size() + remaining == kBlockSize size_t consumed_bytes = eof_offset_ - buffer_.size(); size_t remaining = kBlockSize - eof_offset_; // backing_store_ is used to concatenate what is left in buffer_ and // the remainder of the block. If buffer_ already uses backing_store_, // we just append the new data. if (buffer_.data() != backing_store_ + consumed_bytes) { // Buffer_ does not use backing_store_ for storage. // Copy what is left in buffer_ to backing_store. memmove(backing_store_ + consumed_bytes, buffer_.data(), buffer_.size()); } Slice read_buffer; Status status = file_->Read(remaining, &read_buffer, backing_store_ + eof_offset_); size_t added = read_buffer.size(); end_of_buffer_offset_ += added; if (!status.ok()) { if (added > 0) { ReportDrop(added, status); } read_error_ = true; return; } if (read_buffer.data() != backing_store_ + eof_offset_) { // Read did not write to backing_store_ memmove(backing_store_ + eof_offset_, read_buffer.data(), read_buffer.size()); } buffer_ = Slice(backing_store_ + consumed_bytes, eof_offset_ + added - consumed_bytes); if (added < remaining) { eof_ = true; eof_offset_ += added; } else { eof_offset_ = 0; } } void Reader::ReportCorruption(size_t bytes, const char* reason) { ReportDrop(bytes, Status::Corruption(reason)); } void Reader::ReportDrop(size_t bytes, const Status& reason) { if (reporter_ != nullptr) { reporter_->Corruption(bytes, reason); } } bool Reader::ReadMore(size_t* drop_size, int *error) { if (!eof_ && !read_error_) { // Last read was a full read, so this is a trailer to skip buffer_.clear(); Status status = file_->Read(kBlockSize, &buffer_, backing_store_); TEST_SYNC_POINT_CALLBACK("LogReader::ReadMore:AfterReadFile", &status); end_of_buffer_offset_ += buffer_.size(); if (!status.ok()) { buffer_.clear(); ReportDrop(kBlockSize, status); read_error_ = true; *error = kEof; return false; } else if (buffer_.size() < static_cast(kBlockSize)) { eof_ = true; eof_offset_ = buffer_.size(); } return true; } else { // Note that if buffer_ is non-empty, we have a truncated header at the // end of the file, which can be caused by the writer crashing in the // middle of writing the header. Unless explicitly requested we don't // considering this an error, just report EOF. if (buffer_.size()) { *drop_size = buffer_.size(); buffer_.clear(); *error = kBadHeader; return false; } buffer_.clear(); *error = kEof; return false; } } unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size) { while (true) { // We need at least the minimum header size if (buffer_.size() < static_cast(kHeaderSize)) { // the default value of r is meaningless because ReadMore will overwrite // it if it returns false; in case it returns true, the return value will // not be used anyway int r = kEof; if (!ReadMore(drop_size, &r)) { return r; } continue; } // Parse the header const char* header = buffer_.data(); const uint32_t a = static_cast(header[4]) & 0xff; const uint32_t b = static_cast(header[5]) & 0xff; const unsigned int type = header[6]; const uint32_t length = a | (b << 8); int header_size = kHeaderSize; if (type >= kRecyclableFullType && type <= kRecyclableLastType) { if (end_of_buffer_offset_ - buffer_.size() == 0) { recycled_ = true; } header_size = kRecyclableHeaderSize; // We need enough for the larger header if (buffer_.size() < static_cast(kRecyclableHeaderSize)) { int r = kEof; if (!ReadMore(drop_size, &r)) { return r; } continue; } const uint32_t log_num = DecodeFixed32(header + 7); if (log_num != log_number_) { return kOldRecord; } } if (header_size + length > buffer_.size()) { *drop_size = buffer_.size(); buffer_.clear(); if (!eof_) { return kBadRecordLen; } // If the end of the file has been reached without reading |length| // bytes of payload, assume the writer died in the middle of writing the // record. Don't report a corruption unless requested. if (*drop_size) { return kBadHeader; } return kEof; } if (type == kZeroType && length == 0) { // Skip zero length record without reporting any drops since // such records are produced by the mmap based writing code in // env_posix.cc that preallocates file regions. // NOTE: this should never happen in DB written by new RocksDB versions, // since we turn off mmap writes to manifest and log files buffer_.clear(); return kBadRecord; } // Check crc if (checksum_) { uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header)); uint32_t actual_crc = crc32c::Value(header + 6, length + header_size - 6); if (actual_crc != expected_crc) { // Drop the rest of the buffer since "length" itself may have // been corrupted and if we trust it, we could find some // fragment of a real log record that just happens to look // like a valid log record. *drop_size = buffer_.size(); buffer_.clear(); return kBadRecordChecksum; } } buffer_.remove_prefix(header_size + length); *result = Slice(header + header_size, length); return type; } } bool FragmentBufferedReader::ReadRecord(Slice* record, std::string* scratch, WALRecoveryMode /*unused*/) { assert(record != nullptr); assert(scratch != nullptr); record->clear(); scratch->clear(); uint64_t prospective_record_offset = 0; uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size(); size_t drop_size = 0; unsigned int fragment_type_or_err = 0; // Initialize to make compiler happy Slice fragment; while (TryReadFragment(&fragment, &drop_size, &fragment_type_or_err)) { switch (fragment_type_or_err) { case kFullType: case kRecyclableFullType: if (in_fragmented_record_ && !fragments_.empty()) { ReportCorruption(fragments_.size(), "partial record without end(1)"); } fragments_.clear(); *record = fragment; prospective_record_offset = physical_record_offset; last_record_offset_ = prospective_record_offset; in_fragmented_record_ = false; return true; case kFirstType: case kRecyclableFirstType: if (in_fragmented_record_ || !fragments_.empty()) { ReportCorruption(fragments_.size(), "partial record without end(2)"); } prospective_record_offset = physical_record_offset; fragments_.assign(fragment.data(), fragment.size()); in_fragmented_record_ = true; break; case kMiddleType: case kRecyclableMiddleType: if (!in_fragmented_record_) { ReportCorruption(fragment.size(), "missing start of fragmented record(1)"); } else { fragments_.append(fragment.data(), fragment.size()); } break; case kLastType: case kRecyclableLastType: if (!in_fragmented_record_) { ReportCorruption(fragment.size(), "missing start of fragmented record(2)"); } else { fragments_.append(fragment.data(), fragment.size()); scratch->assign(fragments_.data(), fragments_.size()); fragments_.clear(); *record = Slice(*scratch); last_record_offset_ = prospective_record_offset; in_fragmented_record_ = false; return true; } break; case kBadHeader: case kBadRecord: case kEof: case kOldRecord: if (in_fragmented_record_) { ReportCorruption(fragments_.size(), "error in middle of record"); in_fragmented_record_ = false; fragments_.clear(); } break; case kBadRecordChecksum: if (recycled_) { fragments_.clear(); return false; } ReportCorruption(drop_size, "checksum mismatch"); if (in_fragmented_record_) { ReportCorruption(fragments_.size(), "error in middle of record"); in_fragmented_record_ = false; fragments_.clear(); } break; default: { char buf[40]; snprintf(buf, sizeof(buf), "unknown record type %u", fragment_type_or_err); ReportCorruption( fragment.size() + (in_fragmented_record_ ? fragments_.size() : 0), buf); in_fragmented_record_ = false; fragments_.clear(); break; } } } return false; } void FragmentBufferedReader::UnmarkEOF() { if (read_error_) { return; } eof_ = false; UnmarkEOFInternal(); } bool FragmentBufferedReader::TryReadMore(size_t* drop_size, int* error) { if (!eof_ && !read_error_) { // Last read was a full read, so this is a trailer to skip buffer_.clear(); Status status = file_->Read(kBlockSize, &buffer_, backing_store_); end_of_buffer_offset_ += buffer_.size(); if (!status.ok()) { buffer_.clear(); ReportDrop(kBlockSize, status); read_error_ = true; *error = kEof; return false; } else if (buffer_.size() < static_cast(kBlockSize)) { eof_ = true; eof_offset_ = buffer_.size(); TEST_SYNC_POINT_CALLBACK( "FragmentBufferedLogReader::TryReadMore:FirstEOF", nullptr); } return true; } else if (!read_error_) { UnmarkEOF(); } if (!read_error_) { return true; } *error = kEof; *drop_size = buffer_.size(); if (buffer_.size() > 0) { *error = kBadHeader; } buffer_.clear(); return false; } // return true if the caller should process the fragment_type_or_err. bool FragmentBufferedReader::TryReadFragment( Slice* fragment, size_t* drop_size, unsigned int* fragment_type_or_err) { assert(fragment != nullptr); assert(drop_size != nullptr); assert(fragment_type_or_err != nullptr); while (buffer_.size() < static_cast(kHeaderSize)) { size_t old_size = buffer_.size(); int error = kEof; if (!TryReadMore(drop_size, &error)) { *fragment_type_or_err = error; return false; } else if (old_size == buffer_.size()) { return false; } } const char* header = buffer_.data(); const uint32_t a = static_cast(header[4]) & 0xff; const uint32_t b = static_cast(header[5]) & 0xff; const unsigned int type = header[6]; const uint32_t length = a | (b << 8); int header_size = kHeaderSize; if (type >= kRecyclableFullType && type <= kRecyclableLastType) { if (end_of_buffer_offset_ - buffer_.size() == 0) { recycled_ = true; } header_size = kRecyclableHeaderSize; while (buffer_.size() < static_cast(kRecyclableHeaderSize)) { size_t old_size = buffer_.size(); int error = kEof; if (!TryReadMore(drop_size, &error)) { *fragment_type_or_err = error; return false; } else if (old_size == buffer_.size()) { return false; } } const uint32_t log_num = DecodeFixed32(header + 7); if (log_num != log_number_) { *fragment_type_or_err = kOldRecord; return true; } } while (header_size + length > buffer_.size()) { size_t old_size = buffer_.size(); int error = kEof; if (!TryReadMore(drop_size, &error)) { *fragment_type_or_err = error; return false; } else if (old_size == buffer_.size()) { return false; } } if (type == kZeroType && length == 0) { buffer_.clear(); *fragment_type_or_err = kBadRecord; return true; } if (checksum_) { uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header)); uint32_t actual_crc = crc32c::Value(header + 6, length + header_size - 6); if (actual_crc != expected_crc) { *drop_size = buffer_.size(); buffer_.clear(); *fragment_type_or_err = kBadRecordChecksum; return true; } } buffer_.remove_prefix(header_size + length); *fragment = Slice(header + header_size, length); *fragment_type_or_err = type; return true; } } // namespace log } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/log_reader.h000066400000000000000000000147171370372246700161200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "db/log_format.h" #include "file/sequence_file_reader.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Logger; namespace log { /** * Reader is a general purpose log stream reader implementation. The actual job * of reading from the device is implemented by the SequentialFile interface. * * Please see Writer for details on the file and record layout. */ class Reader { public: // Interface for reporting errors. class Reporter { public: virtual ~Reporter(); // Some corruption was detected. "size" is the approximate number // of bytes dropped due to the corruption. virtual void Corruption(size_t bytes, const Status& status) = 0; }; // Create a reader that will return log records from "*file". // "*file" must remain live while this Reader is in use. // // If "reporter" is non-nullptr, it is notified whenever some data is // dropped due to a detected corruption. "*reporter" must remain // live while this Reader is in use. // // If "checksum" is true, verify checksums if available. Reader(std::shared_ptr info_log, std::unique_ptr&& file, Reporter* reporter, bool checksum, uint64_t log_num); // No copying allowed Reader(const Reader&) = delete; void operator=(const Reader&) = delete; virtual ~Reader(); // Read the next record into *record. Returns true if read // successfully, false if we hit end of the input. May use // "*scratch" as temporary storage. The contents filled in *record // will only be valid until the next mutating operation on this // reader or the next mutation to *scratch. virtual bool ReadRecord(Slice* record, std::string* scratch, WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords); // Returns the physical offset of the last record returned by ReadRecord. // // Undefined before the first call to ReadRecord. uint64_t LastRecordOffset(); // returns true if the reader has encountered an eof condition. bool IsEOF() { return eof_; } // returns true if the reader has encountered read error. bool hasReadError() const { return read_error_; } // when we know more data has been written to the file. we can use this // function to force the reader to look again in the file. // Also aligns the file position indicator to the start of the next block // by reading the rest of the data from the EOF position to the end of the // block that was partially read. virtual void UnmarkEOF(); SequentialFileReader* file() { return file_.get(); } Reporter* GetReporter() const { return reporter_; } uint64_t GetLogNumber() const { return log_number_; } size_t GetReadOffset() const { return static_cast(end_of_buffer_offset_); } protected: std::shared_ptr info_log_; const std::unique_ptr file_; Reporter* const reporter_; bool const checksum_; char* const backing_store_; // Internal state variables used for reading records Slice buffer_; bool eof_; // Last Read() indicated EOF by returning < kBlockSize bool read_error_; // Error occurred while reading from file // Offset of the file position indicator within the last block when an // EOF was detected. size_t eof_offset_; // Offset of the last record returned by ReadRecord. uint64_t last_record_offset_; // Offset of the first location past the end of buffer_. uint64_t end_of_buffer_offset_; // which log number this is uint64_t const log_number_; // Whether this is a recycled log file bool recycled_; // Extend record types with the following special values enum { kEof = kMaxRecordType + 1, // Returned whenever we find an invalid physical record. // Currently there are three situations in which this happens: // * The record has an invalid CRC (ReadPhysicalRecord reports a drop) // * The record is a 0-length record (No drop is reported) kBadRecord = kMaxRecordType + 2, // Returned when we fail to read a valid header. kBadHeader = kMaxRecordType + 3, // Returned when we read an old record from a previous user of the log. kOldRecord = kMaxRecordType + 4, // Returned when we get a bad record length kBadRecordLen = kMaxRecordType + 5, // Returned when we get a bad record checksum kBadRecordChecksum = kMaxRecordType + 6, }; // Return type, or one of the preceding special values unsigned int ReadPhysicalRecord(Slice* result, size_t* drop_size); // Read some more bool ReadMore(size_t* drop_size, int *error); void UnmarkEOFInternal(); // Reports dropped bytes to the reporter. // buffer_ must be updated to remove the dropped bytes prior to invocation. void ReportCorruption(size_t bytes, const char* reason); void ReportDrop(size_t bytes, const Status& reason); }; class FragmentBufferedReader : public Reader { public: FragmentBufferedReader(std::shared_ptr info_log, std::unique_ptr&& _file, Reporter* reporter, bool checksum, uint64_t log_num) : Reader(info_log, std::move(_file), reporter, checksum, log_num), fragments_(), in_fragmented_record_(false) {} ~FragmentBufferedReader() override {} bool ReadRecord(Slice* record, std::string* scratch, WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords) override; void UnmarkEOF() override; private: std::string fragments_; bool in_fragmented_record_; bool TryReadFragment(Slice* result, size_t* drop_size, unsigned int* fragment_type_or_err); bool TryReadMore(size_t* drop_size, int* error); // No copy allowed FragmentBufferedReader(const FragmentBufferedReader&); void operator=(const FragmentBufferedReader&); }; } // namespace log } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/log_test.cc000066400000000000000000000702471370372246700157730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/log_reader.h" #include "db/log_writer.h" #include "env/composite_env_wrapper.h" #include "file/sequence_file_reader.h" #include "file/writable_file_writer.h" #include "rocksdb/env.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/crc32c.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { namespace log { // Construct a string of the specified length made out of the supplied // partial string. static std::string BigString(const std::string& partial_string, size_t n) { std::string result; while (result.size() < n) { result.append(partial_string); } result.resize(n); return result; } // Construct a string from a number static std::string NumberString(int n) { char buf[50]; snprintf(buf, sizeof(buf), "%d.", n); return std::string(buf); } // Return a skewed potentially long string static std::string RandomSkewedString(int i, Random* rnd) { return BigString(NumberString(i), rnd->Skewed(17)); } // Param type is tuple // get<0>(tuple): non-zero if recycling log, zero if regular log // get<1>(tuple): true if allow retry after read EOF, false otherwise class LogTest : public ::testing::TestWithParam> { private: class StringSource : public SequentialFile { public: Slice& contents_; bool force_error_; size_t force_error_position_; bool force_eof_; size_t force_eof_position_; bool returned_partial_; bool fail_after_read_partial_; explicit StringSource(Slice& contents, bool fail_after_read_partial) : contents_(contents), force_error_(false), force_error_position_(0), force_eof_(false), force_eof_position_(0), returned_partial_(false), fail_after_read_partial_(fail_after_read_partial) {} Status Read(size_t n, Slice* result, char* scratch) override { if (fail_after_read_partial_) { EXPECT_TRUE(!returned_partial_) << "must not Read() after eof/error"; } if (force_error_) { if (force_error_position_ >= n) { force_error_position_ -= n; } else { *result = Slice(contents_.data(), force_error_position_); contents_.remove_prefix(force_error_position_); force_error_ = false; returned_partial_ = true; return Status::Corruption("read error"); } } if (contents_.size() < n) { n = contents_.size(); returned_partial_ = true; } if (force_eof_) { if (force_eof_position_ >= n) { force_eof_position_ -= n; } else { force_eof_ = false; n = force_eof_position_; returned_partial_ = true; } } // By using scratch we ensure that caller has control over the // lifetime of result.data() memcpy(scratch, contents_.data(), n); *result = Slice(scratch, n); contents_.remove_prefix(n); return Status::OK(); } Status Skip(uint64_t n) override { if (n > contents_.size()) { contents_.clear(); return Status::NotFound("in-memory file skipepd past end"); } contents_.remove_prefix(n); return Status::OK(); } }; inline StringSource* GetStringSourceFromLegacyReader( SequentialFileReader* reader) { LegacySequentialFileWrapper* file = static_cast(reader->file()); return static_cast(file->target()); } class ReportCollector : public Reader::Reporter { public: size_t dropped_bytes_; std::string message_; ReportCollector() : dropped_bytes_(0) { } void Corruption(size_t bytes, const Status& status) override { dropped_bytes_ += bytes; message_.append(status.ToString()); } }; std::string& dest_contents() { auto dest = test::GetStringSinkFromLegacyWriter(writer_.file()); assert(dest); return dest->contents_; } const std::string& dest_contents() const { auto dest = test::GetStringSinkFromLegacyWriter(writer_.file()); assert(dest); return dest->contents_; } void reset_source_contents() { auto src = GetStringSourceFromLegacyReader(reader_->file()); assert(src); src->contents_ = dest_contents(); } Slice reader_contents_; std::unique_ptr dest_holder_; std::unique_ptr source_holder_; ReportCollector report_; Writer writer_; std::unique_ptr reader_; protected: bool allow_retry_read_; public: LogTest() : reader_contents_(), dest_holder_(test::GetWritableFileWriter( new test::StringSink(&reader_contents_), "" /* don't care */)), source_holder_(test::GetSequentialFileReader( new StringSource(reader_contents_, !std::get<1>(GetParam())), "" /* file name */)), writer_(std::move(dest_holder_), 123, std::get<0>(GetParam())), allow_retry_read_(std::get<1>(GetParam())) { if (allow_retry_read_) { reader_.reset(new FragmentBufferedReader( nullptr, std::move(source_holder_), &report_, true /* checksum */, 123 /* log_number */)); } else { reader_.reset(new Reader(nullptr, std::move(source_holder_), &report_, true /* checksum */, 123 /* log_number */)); } } Slice* get_reader_contents() { return &reader_contents_; } void Write(const std::string& msg) { writer_.AddRecord(Slice(msg)); } size_t WrittenBytes() const { return dest_contents().size(); } std::string Read(const WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords) { std::string scratch; Slice record; bool ret = false; ret = reader_->ReadRecord(&record, &scratch, wal_recovery_mode); if (ret) { return record.ToString(); } else { return "EOF"; } } void IncrementByte(int offset, char delta) { dest_contents()[offset] += delta; } void SetByte(int offset, char new_byte) { dest_contents()[offset] = new_byte; } void ShrinkSize(int bytes) { auto dest = test::GetStringSinkFromLegacyWriter(writer_.file()); assert(dest); dest->Drop(bytes); } void FixChecksum(int header_offset, int len, bool recyclable) { // Compute crc of type/len/data int header_size = recyclable ? kRecyclableHeaderSize : kHeaderSize; uint32_t crc = crc32c::Value(&dest_contents()[header_offset + 6], header_size - 6 + len); crc = crc32c::Mask(crc); EncodeFixed32(&dest_contents()[header_offset], crc); } void ForceError(size_t position = 0) { auto src = GetStringSourceFromLegacyReader(reader_->file()); src->force_error_ = true; src->force_error_position_ = position; } size_t DroppedBytes() const { return report_.dropped_bytes_; } std::string ReportMessage() const { return report_.message_; } void ForceEOF(size_t position = 0) { auto src = GetStringSourceFromLegacyReader(reader_->file()); src->force_eof_ = true; src->force_eof_position_ = position; } void UnmarkEOF() { auto src = GetStringSourceFromLegacyReader(reader_->file()); src->returned_partial_ = false; reader_->UnmarkEOF(); } bool IsEOF() { return reader_->IsEOF(); } // Returns OK iff recorded error message contains "msg" std::string MatchError(const std::string& msg) const { if (report_.message_.find(msg) == std::string::npos) { return report_.message_; } else { return "OK"; } } }; TEST_P(LogTest, Empty) { ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, ReadWrite) { Write("foo"); Write("bar"); Write(""); Write("xxxx"); ASSERT_EQ("foo", Read()); ASSERT_EQ("bar", Read()); ASSERT_EQ("", Read()); ASSERT_EQ("xxxx", Read()); ASSERT_EQ("EOF", Read()); ASSERT_EQ("EOF", Read()); // Make sure reads at eof work } TEST_P(LogTest, ManyBlocks) { for (int i = 0; i < 100000; i++) { Write(NumberString(i)); } for (int i = 0; i < 100000; i++) { ASSERT_EQ(NumberString(i), Read()); } ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, Fragmentation) { Write("small"); Write(BigString("medium", 50000)); Write(BigString("large", 100000)); ASSERT_EQ("small", Read()); ASSERT_EQ(BigString("medium", 50000), Read()); ASSERT_EQ(BigString("large", 100000), Read()); ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, MarginalTrailer) { // Make a trailer that is exactly the same length as an empty record. int header_size = std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; const int n = kBlockSize - 2 * header_size; Write(BigString("foo", n)); ASSERT_EQ((unsigned int)(kBlockSize - header_size), WrittenBytes()); Write(""); Write("bar"); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_EQ("", Read()); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, MarginalTrailer2) { // Make a trailer that is exactly the same length as an empty record. int header_size = std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; const int n = kBlockSize - 2 * header_size; Write(BigString("foo", n)); ASSERT_EQ((unsigned int)(kBlockSize - header_size), WrittenBytes()); Write("bar"); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(0U, DroppedBytes()); ASSERT_EQ("", ReportMessage()); } TEST_P(LogTest, ShortTrailer) { int header_size = std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; const int n = kBlockSize - 2 * header_size + 4; Write(BigString("foo", n)); ASSERT_EQ((unsigned int)(kBlockSize - header_size + 4), WrittenBytes()); Write(""); Write("bar"); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_EQ("", Read()); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, AlignedEof) { int header_size = std::get<0>(GetParam()) ? kRecyclableHeaderSize : kHeaderSize; const int n = kBlockSize - 2 * header_size + 4; Write(BigString("foo", n)); ASSERT_EQ((unsigned int)(kBlockSize - header_size + 4), WrittenBytes()); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, RandomRead) { const int N = 500; Random write_rnd(301); for (int i = 0; i < N; i++) { Write(RandomSkewedString(i, &write_rnd)); } Random read_rnd(301); for (int i = 0; i < N; i++) { ASSERT_EQ(RandomSkewedString(i, &read_rnd), Read()); } ASSERT_EQ("EOF", Read()); } // Tests of all the error paths in log_reader.cc follow: TEST_P(LogTest, ReadError) { Write("foo"); ForceError(); ASSERT_EQ("EOF", Read()); ASSERT_EQ((unsigned int)kBlockSize, DroppedBytes()); ASSERT_EQ("OK", MatchError("read error")); } TEST_P(LogTest, BadRecordType) { Write("foo"); // Type is stored in header[6] IncrementByte(6, 100); FixChecksum(0, 3, false); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("unknown record type")); } TEST_P(LogTest, TruncatedTrailingRecordIsIgnored) { Write("foo"); ShrinkSize(4); // Drop all payload as well as a header byte ASSERT_EQ("EOF", Read()); // Truncated last record is ignored, not treated as an error ASSERT_EQ(0U, DroppedBytes()); ASSERT_EQ("", ReportMessage()); } TEST_P(LogTest, TruncatedTrailingRecordIsNotIgnored) { if (allow_retry_read_) { // If read retry is allowed, then truncated trailing record should not // raise an error. return; } Write("foo"); ShrinkSize(4); // Drop all payload as well as a header byte ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); // Truncated last record is ignored, not treated as an error ASSERT_GT(DroppedBytes(), 0U); ASSERT_EQ("OK", MatchError("Corruption: truncated header")); } TEST_P(LogTest, BadLength) { if (allow_retry_read_) { // If read retry is allowed, then we should not raise an error when the // record length specified in header is longer than data currently // available. It's possible that the body of the record is not written yet. return; } bool recyclable_log = (std::get<0>(GetParam()) != 0); int header_size = recyclable_log ? kRecyclableHeaderSize : kHeaderSize; const int kPayloadSize = kBlockSize - header_size; Write(BigString("bar", kPayloadSize)); Write("foo"); // Least significant size byte is stored in header[4]. IncrementByte(4, 1); if (!recyclable_log) { ASSERT_EQ("foo", Read()); ASSERT_EQ(kBlockSize, DroppedBytes()); ASSERT_EQ("OK", MatchError("bad record length")); } else { ASSERT_EQ("EOF", Read()); } } TEST_P(LogTest, BadLengthAtEndIsIgnored) { if (allow_retry_read_) { // If read retry is allowed, then we should not raise an error when the // record length specified in header is longer than data currently // available. It's possible that the body of the record is not written yet. return; } Write("foo"); ShrinkSize(1); ASSERT_EQ("EOF", Read()); ASSERT_EQ(0U, DroppedBytes()); ASSERT_EQ("", ReportMessage()); } TEST_P(LogTest, BadLengthAtEndIsNotIgnored) { if (allow_retry_read_) { // If read retry is allowed, then we should not raise an error when the // record length specified in header is longer than data currently // available. It's possible that the body of the record is not written yet. return; } Write("foo"); ShrinkSize(1); ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); ASSERT_GT(DroppedBytes(), 0U); ASSERT_EQ("OK", MatchError("Corruption: truncated header")); } TEST_P(LogTest, ChecksumMismatch) { Write("foooooo"); IncrementByte(0, 14); ASSERT_EQ("EOF", Read()); bool recyclable_log = (std::get<0>(GetParam()) != 0); if (!recyclable_log) { ASSERT_EQ(14U, DroppedBytes()); ASSERT_EQ("OK", MatchError("checksum mismatch")); } else { ASSERT_EQ(0U, DroppedBytes()); ASSERT_EQ("", ReportMessage()); } } TEST_P(LogTest, UnexpectedMiddleType) { Write("foo"); bool recyclable_log = (std::get<0>(GetParam()) != 0); SetByte(6, static_cast(recyclable_log ? kRecyclableMiddleType : kMiddleType)); FixChecksum(0, 3, !!recyclable_log); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("missing start")); } TEST_P(LogTest, UnexpectedLastType) { Write("foo"); bool recyclable_log = (std::get<0>(GetParam()) != 0); SetByte(6, static_cast(recyclable_log ? kRecyclableLastType : kLastType)); FixChecksum(0, 3, !!recyclable_log); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("missing start")); } TEST_P(LogTest, UnexpectedFullType) { Write("foo"); Write("bar"); bool recyclable_log = (std::get<0>(GetParam()) != 0); SetByte( 6, static_cast(recyclable_log ? kRecyclableFirstType : kFirstType)); FixChecksum(0, 3, !!recyclable_log); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("partial record without end")); } TEST_P(LogTest, UnexpectedFirstType) { Write("foo"); Write(BigString("bar", 100000)); bool recyclable_log = (std::get<0>(GetParam()) != 0); SetByte( 6, static_cast(recyclable_log ? kRecyclableFirstType : kFirstType)); FixChecksum(0, 3, !!recyclable_log); ASSERT_EQ(BigString("bar", 100000), Read()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("partial record without end")); } TEST_P(LogTest, MissingLastIsIgnored) { Write(BigString("bar", kBlockSize)); // Remove the LAST block, including header. ShrinkSize(14); ASSERT_EQ("EOF", Read()); ASSERT_EQ("", ReportMessage()); ASSERT_EQ(0U, DroppedBytes()); } TEST_P(LogTest, MissingLastIsNotIgnored) { if (allow_retry_read_) { // If read retry is allowed, then truncated trailing record should not // raise an error. return; } Write(BigString("bar", kBlockSize)); // Remove the LAST block, including header. ShrinkSize(14); ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); ASSERT_GT(DroppedBytes(), 0U); ASSERT_EQ("OK", MatchError("Corruption: error reading trailing data")); } TEST_P(LogTest, PartialLastIsIgnored) { Write(BigString("bar", kBlockSize)); // Cause a bad record length in the LAST block. ShrinkSize(1); ASSERT_EQ("EOF", Read()); ASSERT_EQ("", ReportMessage()); ASSERT_EQ(0U, DroppedBytes()); } TEST_P(LogTest, PartialLastIsNotIgnored) { if (allow_retry_read_) { // If read retry is allowed, then truncated trailing record should not // raise an error. return; } Write(BigString("bar", kBlockSize)); // Cause a bad record length in the LAST block. ShrinkSize(1); ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency)); ASSERT_GT(DroppedBytes(), 0U); ASSERT_EQ("OK", MatchError( "Corruption: truncated headerCorruption: " "error reading trailing data")); } TEST_P(LogTest, ErrorJoinsRecords) { // Consider two fragmented records: // first(R1) last(R1) first(R2) last(R2) // where the middle two fragments disappear. We do not want // first(R1),last(R2) to get joined and returned as a valid record. // Write records that span two blocks Write(BigString("foo", kBlockSize)); Write(BigString("bar", kBlockSize)); Write("correct"); // Wipe the middle block for (unsigned int offset = kBlockSize; offset < 2*kBlockSize; offset++) { SetByte(offset, 'x'); } bool recyclable_log = (std::get<0>(GetParam()) != 0); if (!recyclable_log) { ASSERT_EQ("correct", Read()); ASSERT_EQ("EOF", Read()); size_t dropped = DroppedBytes(); ASSERT_LE(dropped, 2 * kBlockSize + 100); ASSERT_GE(dropped, 2 * kBlockSize); } else { ASSERT_EQ("EOF", Read()); } } TEST_P(LogTest, ClearEofSingleBlock) { Write("foo"); Write("bar"); bool recyclable_log = (std::get<0>(GetParam()) != 0); int header_size = recyclable_log ? kRecyclableHeaderSize : kHeaderSize; ForceEOF(3 + header_size + 2); ASSERT_EQ("foo", Read()); UnmarkEOF(); ASSERT_EQ("bar", Read()); ASSERT_TRUE(IsEOF()); ASSERT_EQ("EOF", Read()); Write("xxx"); UnmarkEOF(); ASSERT_EQ("xxx", Read()); ASSERT_TRUE(IsEOF()); } TEST_P(LogTest, ClearEofMultiBlock) { size_t num_full_blocks = 5; bool recyclable_log = (std::get<0>(GetParam()) != 0); int header_size = recyclable_log ? kRecyclableHeaderSize : kHeaderSize; size_t n = (kBlockSize - header_size) * num_full_blocks + 25; Write(BigString("foo", n)); Write(BigString("bar", n)); ForceEOF(n + num_full_blocks * header_size + header_size + 3); ASSERT_EQ(BigString("foo", n), Read()); ASSERT_TRUE(IsEOF()); UnmarkEOF(); ASSERT_EQ(BigString("bar", n), Read()); ASSERT_TRUE(IsEOF()); Write(BigString("xxx", n)); UnmarkEOF(); ASSERT_EQ(BigString("xxx", n), Read()); ASSERT_TRUE(IsEOF()); } TEST_P(LogTest, ClearEofError) { // If an error occurs during Read() in UnmarkEOF(), the records contained // in the buffer should be returned on subsequent calls of ReadRecord() // until no more full records are left, whereafter ReadRecord() should return // false to indicate that it cannot read any further. Write("foo"); Write("bar"); UnmarkEOF(); ASSERT_EQ("foo", Read()); ASSERT_TRUE(IsEOF()); Write("xxx"); ForceError(0); UnmarkEOF(); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); } TEST_P(LogTest, ClearEofError2) { Write("foo"); Write("bar"); UnmarkEOF(); ASSERT_EQ("foo", Read()); Write("xxx"); ForceError(3); UnmarkEOF(); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); ASSERT_EQ(3U, DroppedBytes()); ASSERT_EQ("OK", MatchError("read error")); } TEST_P(LogTest, Recycle) { bool recyclable_log = (std::get<0>(GetParam()) != 0); if (!recyclable_log) { return; // test is only valid for recycled logs } Write("foo"); Write("bar"); Write("baz"); Write("bif"); Write("blitz"); while (get_reader_contents()->size() < log::kBlockSize * 2) { Write("xxxxxxxxxxxxxxxx"); } std::unique_ptr dest_holder(test::GetWritableFileWriter( new test::OverwritingStringSink(get_reader_contents()), "" /* don't care */)); Writer recycle_writer(std::move(dest_holder), 123, true); recycle_writer.AddRecord(Slice("foooo")); recycle_writer.AddRecord(Slice("bar")); ASSERT_GE(get_reader_contents()->size(), log::kBlockSize * 2); ASSERT_EQ("foooo", Read()); ASSERT_EQ("bar", Read()); ASSERT_EQ("EOF", Read()); } INSTANTIATE_TEST_CASE_P(bool, LogTest, ::testing::Values(std::make_tuple(0, false), std::make_tuple(0, true), std::make_tuple(1, false), std::make_tuple(1, true))); class RetriableLogTest : public ::testing::TestWithParam { private: class ReportCollector : public Reader::Reporter { public: size_t dropped_bytes_; std::string message_; ReportCollector() : dropped_bytes_(0) {} void Corruption(size_t bytes, const Status& status) override { dropped_bytes_ += bytes; message_.append(status.ToString()); } }; Slice contents_; std::unique_ptr dest_holder_; std::unique_ptr log_writer_; Env* env_; EnvOptions env_options_; const std::string test_dir_; const std::string log_file_; std::unique_ptr writer_; std::unique_ptr reader_; ReportCollector report_; std::unique_ptr log_reader_; public: RetriableLogTest() : contents_(), dest_holder_(nullptr), log_writer_(nullptr), env_(Env::Default()), test_dir_(test::PerThreadDBPath("retriable_log_test")), log_file_(test_dir_ + "/log"), writer_(nullptr), reader_(nullptr), log_reader_(nullptr) {} Status SetupTestEnv() { dest_holder_.reset(test::GetWritableFileWriter( new test::StringSink(&contents_), "" /* file name */)); assert(dest_holder_ != nullptr); log_writer_.reset(new Writer(std::move(dest_holder_), 123, GetParam())); assert(log_writer_ != nullptr); Status s; s = env_->CreateDirIfMissing(test_dir_); std::unique_ptr writable_file; if (s.ok()) { s = env_->NewWritableFile(log_file_, &writable_file, env_options_); } if (s.ok()) { writer_.reset(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), log_file_, env_options_)); assert(writer_ != nullptr); } std::unique_ptr seq_file; if (s.ok()) { s = env_->NewSequentialFile(log_file_, &seq_file, env_options_); } if (s.ok()) { reader_.reset(new SequentialFileReader( NewLegacySequentialFileWrapper(seq_file), log_file_)); assert(reader_ != nullptr); log_reader_.reset(new FragmentBufferedReader( nullptr, std::move(reader_), &report_, true /* checksum */, 123 /* log_number */)); assert(log_reader_ != nullptr); } return s; } std::string contents() { auto file = test::GetStringSinkFromLegacyWriter(log_writer_->file()); assert(file != nullptr); return file->contents_; } void Encode(const std::string& msg) { log_writer_->AddRecord(Slice(msg)); } void Write(const Slice& data) { writer_->Append(data); writer_->Sync(true); } bool TryRead(std::string* result) { assert(result != nullptr); result->clear(); std::string scratch; Slice record; bool r = log_reader_->ReadRecord(&record, &scratch); if (r) { result->assign(record.data(), record.size()); return true; } else { return false; } } }; TEST_P(RetriableLogTest, TailLog_PartialHeader) { ASSERT_OK(SetupTestEnv()); std::vector remaining_bytes_in_last_record; size_t header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; bool eof = false; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency( {{"RetriableLogTest::TailLog:AfterPart1", "RetriableLogTest::TailLog:BeforeReadRecord"}, {"FragmentBufferedLogReader::TryReadMore:FirstEOF", "RetriableLogTest::TailLog:BeforePart2"}}); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "FragmentBufferedLogReader::TryReadMore:FirstEOF", [&](void* /*arg*/) { eof = true; }); SyncPoint::GetInstance()->EnableProcessing(); size_t delta = header_size - 1; port::Thread log_writer_thread([&]() { size_t old_sz = contents().size(); Encode("foo"); size_t new_sz = contents().size(); std::string part1 = contents().substr(old_sz, delta); std::string part2 = contents().substr(old_sz + delta, new_sz - old_sz - delta); Write(Slice(part1)); TEST_SYNC_POINT("RetriableLogTest::TailLog:AfterPart1"); TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforePart2"); Write(Slice(part2)); }); std::string record; port::Thread log_reader_thread([&]() { TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforeReadRecord"); while (!TryRead(&record)) { } }); log_reader_thread.join(); log_writer_thread.join(); ASSERT_EQ("foo", record); ASSERT_TRUE(eof); } TEST_P(RetriableLogTest, TailLog_FullHeader) { ASSERT_OK(SetupTestEnv()); std::vector remaining_bytes_in_last_record; size_t header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; bool eof = false; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency( {{"RetriableLogTest::TailLog:AfterPart1", "RetriableLogTest::TailLog:BeforeReadRecord"}, {"FragmentBufferedLogReader::TryReadMore:FirstEOF", "RetriableLogTest::TailLog:BeforePart2"}}); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "FragmentBufferedLogReader::TryReadMore:FirstEOF", [&](void* /*arg*/) { eof = true; }); SyncPoint::GetInstance()->EnableProcessing(); size_t delta = header_size + 1; port::Thread log_writer_thread([&]() { size_t old_sz = contents().size(); Encode("foo"); size_t new_sz = contents().size(); std::string part1 = contents().substr(old_sz, delta); std::string part2 = contents().substr(old_sz + delta, new_sz - old_sz - delta); Write(Slice(part1)); TEST_SYNC_POINT("RetriableLogTest::TailLog:AfterPart1"); TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforePart2"); Write(Slice(part2)); ASSERT_TRUE(eof); }); std::string record; port::Thread log_reader_thread([&]() { TEST_SYNC_POINT("RetriableLogTest::TailLog:BeforeReadRecord"); while (!TryRead(&record)) { } }); log_reader_thread.join(); log_writer_thread.join(); ASSERT_EQ("foo", record); } TEST_P(RetriableLogTest, NonBlockingReadFullRecord) { // Clear all sync point callbacks even if this test does not use sync point. // It is necessary, otherwise the execute of this test may hit a sync point // with which a callback is registered. The registered callback may access // some dead variable, causing segfault. SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); ASSERT_OK(SetupTestEnv()); size_t header_size = GetParam() ? kRecyclableHeaderSize : kHeaderSize; size_t delta = header_size - 1; size_t old_sz = contents().size(); Encode("foo-bar"); size_t new_sz = contents().size(); std::string part1 = contents().substr(old_sz, delta); std::string part2 = contents().substr(old_sz + delta, new_sz - old_sz - delta); Write(Slice(part1)); std::string record; ASSERT_FALSE(TryRead(&record)); ASSERT_TRUE(record.empty()); Write(Slice(part2)); ASSERT_TRUE(TryRead(&record)); ASSERT_EQ("foo-bar", record); } INSTANTIATE_TEST_CASE_P(bool, RetriableLogTest, ::testing::Values(0, 2)); } // namespace log } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/log_writer.cc000066400000000000000000000116751370372246700163300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/log_writer.h" #include #include "file/writable_file_writer.h" #include "rocksdb/env.h" #include "util/coding.h" #include "util/crc32c.h" namespace ROCKSDB_NAMESPACE { namespace log { Writer::Writer(std::unique_ptr&& dest, uint64_t log_number, bool recycle_log_files, bool manual_flush) : dest_(std::move(dest)), block_offset_(0), log_number_(log_number), recycle_log_files_(recycle_log_files), manual_flush_(manual_flush) { for (int i = 0; i <= kMaxRecordType; i++) { char t = static_cast(i); type_crc_[i] = crc32c::Value(&t, 1); } } Writer::~Writer() { if (dest_) { WriteBuffer(); } } IOStatus Writer::WriteBuffer() { return dest_->Flush(); } IOStatus Writer::Close() { IOStatus s; if (dest_) { s = dest_->Close(); dest_.reset(); } return s; } IOStatus Writer::AddRecord(const Slice& slice) { const char* ptr = slice.data(); size_t left = slice.size(); // Header size varies depending on whether we are recycling or not. const int header_size = recycle_log_files_ ? kRecyclableHeaderSize : kHeaderSize; // Fragment the record if necessary and emit it. Note that if slice // is empty, we still want to iterate once to emit a single // zero-length record IOStatus s; bool begin = true; do { const int64_t leftover = kBlockSize - block_offset_; assert(leftover >= 0); if (leftover < header_size) { // Switch to a new block if (leftover > 0) { // Fill the trailer (literal below relies on kHeaderSize and // kRecyclableHeaderSize being <= 11) assert(header_size <= 11); s = dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", static_cast(leftover))); if (!s.ok()) { break; } } block_offset_ = 0; } // Invariant: we never leave < header_size bytes in a block. assert(static_cast(kBlockSize - block_offset_) >= header_size); const size_t avail = kBlockSize - block_offset_ - header_size; const size_t fragment_length = (left < avail) ? left : avail; RecordType type; const bool end = (left == fragment_length); if (begin && end) { type = recycle_log_files_ ? kRecyclableFullType : kFullType; } else if (begin) { type = recycle_log_files_ ? kRecyclableFirstType : kFirstType; } else if (end) { type = recycle_log_files_ ? kRecyclableLastType : kLastType; } else { type = recycle_log_files_ ? kRecyclableMiddleType : kMiddleType; } s = EmitPhysicalRecord(type, ptr, fragment_length); ptr += fragment_length; left -= fragment_length; begin = false; } while (s.ok() && left > 0); if (s.ok()) { if (!manual_flush_) { s = dest_->Flush(); } } return s; } bool Writer::TEST_BufferIsEmpty() { return dest_->TEST_BufferIsEmpty(); } IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) { assert(n <= 0xffff); // Must fit in two bytes size_t header_size; char buf[kRecyclableHeaderSize]; // Format the header buf[4] = static_cast(n & 0xff); buf[5] = static_cast(n >> 8); buf[6] = static_cast(t); uint32_t crc = type_crc_[t]; if (t < kRecyclableFullType) { // Legacy record format assert(block_offset_ + kHeaderSize + n <= kBlockSize); header_size = kHeaderSize; } else { // Recyclable record format assert(block_offset_ + kRecyclableHeaderSize + n <= kBlockSize); header_size = kRecyclableHeaderSize; // Only encode low 32-bits of the 64-bit log number. This means // we will fail to detect an old record if we recycled a log from // ~4 billion logs ago, but that is effectively impossible, and // even if it were we'dbe far more likely to see a false positive // on the 32-bit CRC. EncodeFixed32(buf + 7, static_cast(log_number_)); crc = crc32c::Extend(crc, buf + 7, 4); } // Compute the crc of the record type and the payload. crc = crc32c::Extend(crc, ptr, n); crc = crc32c::Mask(crc); // Adjust for storage TEST_SYNC_POINT_CALLBACK("LogWriter::EmitPhysicalRecord:BeforeEncodeChecksum", &crc); EncodeFixed32(buf, crc); // Write the header and the payload IOStatus s = dest_->Append(Slice(buf, header_size)); if (s.ok()) { s = dest_->Append(Slice(ptr, n)); } block_offset_ += header_size + n; return s; } } // namespace log } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/log_writer.h000066400000000000000000000076061370372246700161710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "db/log_format.h" #include "rocksdb/io_status.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class WritableFileWriter; namespace log { /** * Writer is a general purpose log stream writer. It provides an append-only * abstraction for writing data. The details of the how the data is written is * handled by the WriteableFile sub-class implementation. * * File format: * * File is broken down into variable sized records. The format of each record * is described below. * +-----+-------------+--+----+----------+------+-- ... ----+ * File | r0 | r1 |P | r2 | r3 | r4 | | * +-----+-------------+--+----+----------+------+-- ... ----+ * <--- kBlockSize ------>|<-- kBlockSize ------>| * rn = variable size records * P = Padding * * Data is written out in kBlockSize chunks. If next record does not fit * into the space left, the leftover space will be padded with \0. * * Legacy record format: * * +---------+-----------+-----------+--- ... ---+ * |CRC (4B) | Size (2B) | Type (1B) | Payload | * +---------+-----------+-----------+--- ... ---+ * * CRC = 32bit hash computed over the record type and payload using CRC * Size = Length of the payload data * Type = Type of record * (kZeroType, kFullType, kFirstType, kLastType, kMiddleType ) * The type is used to group a bunch of records together to represent * blocks that are larger than kBlockSize * Payload = Byte stream as long as specified by the payload size * * Recyclable record format: * * +---------+-----------+-----------+----------------+--- ... ---+ * |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | * +---------+-----------+-----------+----------------+--- ... ---+ * * Same as above, with the addition of * Log number = 32bit log file number, so that we can distinguish between * records written by the most recent log writer vs a previous one. */ class Writer { public: // Create a writer that will append data to "*dest". // "*dest" must be initially empty. // "*dest" must remain live while this Writer is in use. explicit Writer(std::unique_ptr&& dest, uint64_t log_number, bool recycle_log_files, bool manual_flush = false); // No copying allowed Writer(const Writer&) = delete; void operator=(const Writer&) = delete; ~Writer(); IOStatus AddRecord(const Slice& slice); WritableFileWriter* file() { return dest_.get(); } const WritableFileWriter* file() const { return dest_.get(); } uint64_t get_log_number() const { return log_number_; } IOStatus WriteBuffer(); IOStatus Close(); bool TEST_BufferIsEmpty(); private: std::unique_ptr dest_; size_t block_offset_; // Current offset in block uint64_t log_number_; bool recycle_log_files_; // crc32c values for all supported record types. These are // pre-computed to reduce the overhead of computing the crc of the // record type stored in the header. uint32_t type_crc_[kMaxRecordType + 1]; IOStatus EmitPhysicalRecord(RecordType type, const char* ptr, size_t length); // If true, it does not flush after each write. Instead it relies on the upper // layer to manually does the flush by calling ::WriteBuffer() bool manual_flush_; }; } // namespace log } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/logs_with_prep_tracker.cc000066400000000000000000000044071370372246700207060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "db/logs_with_prep_tracker.h" #include "port/likely.h" namespace ROCKSDB_NAMESPACE { void LogsWithPrepTracker::MarkLogAsHavingPrepSectionFlushed(uint64_t log) { assert(log != 0); std::lock_guard lock(prepared_section_completed_mutex_); auto it = prepared_section_completed_.find(log); if (UNLIKELY(it == prepared_section_completed_.end())) { prepared_section_completed_[log] = 1; } else { it->second += 1; } } void LogsWithPrepTracker::MarkLogAsContainingPrepSection(uint64_t log) { assert(log != 0); std::lock_guard lock(logs_with_prep_mutex_); auto rit = logs_with_prep_.rbegin(); bool updated = false; // Most probably the last log is the one that is being marked for // having a prepare section; so search from the end. for (; rit != logs_with_prep_.rend() && rit->log >= log; ++rit) { if (rit->log == log) { rit->cnt++; updated = true; break; } } if (!updated) { // We are either at the start, or at a position with rit->log < log logs_with_prep_.insert(rit.base(), {log, 1}); } } uint64_t LogsWithPrepTracker::FindMinLogContainingOutstandingPrep() { std::lock_guard lock(logs_with_prep_mutex_); auto it = logs_with_prep_.begin(); // start with the smallest log for (; it != logs_with_prep_.end();) { auto min_log = it->log; { std::lock_guard lock2(prepared_section_completed_mutex_); auto completed_it = prepared_section_completed_.find(min_log); if (completed_it == prepared_section_completed_.end() || completed_it->second < it->cnt) { return min_log; } assert(completed_it != prepared_section_completed_.end() && completed_it->second == it->cnt); prepared_section_completed_.erase(completed_it); } // erase from beginning in vector is not efficient but this function is not // on the fast path. it = logs_with_prep_.erase(it); } // no such log found return 0; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/logs_with_prep_tracker.h000066400000000000000000000046201370372246700205450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include #include #include #include #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { // This class is used to track the log files with outstanding prepare entries. class LogsWithPrepTracker { public: // Called when a transaction prepared in `log` has been committed or aborted. void MarkLogAsHavingPrepSectionFlushed(uint64_t log); // Called when a transaction is prepared in `log`. void MarkLogAsContainingPrepSection(uint64_t log); // Return the earliest log file with outstanding prepare entries. uint64_t FindMinLogContainingOutstandingPrep(); size_t TEST_PreparedSectionCompletedSize() { return prepared_section_completed_.size(); } size_t TEST_LogsWithPrepSize() { return logs_with_prep_.size(); } private: // REQUIRES: logs_with_prep_mutex_ held // // sorted list of log numbers still containing prepared data. // this is used by FindObsoleteFiles to determine which // flushed logs we must keep around because they still // contain prepared data which has not been committed or rolled back struct LogCnt { uint64_t log; // the log number uint64_t cnt; // number of prepared sections in the log }; std::vector logs_with_prep_; std::mutex logs_with_prep_mutex_; // REQUIRES: prepared_section_completed_mutex_ held // // to be used in conjunction with logs_with_prep_. // once a transaction with data in log L is committed or rolled back // rather than updating logs_with_prep_ directly we keep track of that // in prepared_section_completed_ which maps LOG -> instance_count. This helps // avoiding contention between a commit thread and the prepare threads. // // when trying to determine the minimum log still active we first // consult logs_with_prep_. while that root value maps to // an equal value in prepared_section_completed_ we erase the log from // both logs_with_prep_ and prepared_section_completed_. std::unordered_map prepared_section_completed_; std::mutex prepared_section_completed_mutex_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/lookup_key.h000066400000000000000000000040071370372246700161650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "rocksdb/db.h" #include "rocksdb/slice.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { // A helper class useful for DBImpl::Get() class LookupKey { public: // Initialize *this for looking up user_key at a snapshot with // the specified sequence number. LookupKey(const Slice& _user_key, SequenceNumber sequence, const Slice* ts = nullptr); ~LookupKey(); // Return a key suitable for lookup in a MemTable. Slice memtable_key() const { return Slice(start_, static_cast(end_ - start_)); } // Return an internal key (suitable for passing to an internal iterator) Slice internal_key() const { return Slice(kstart_, static_cast(end_ - kstart_)); } // Return the user key Slice user_key() const { return Slice(kstart_, static_cast(end_ - kstart_ - 8)); } private: // We construct a char array of the form: // klength varint32 <-- start_ // userkey char[klength] <-- kstart_ // tag uint64 // <-- end_ // The array is a suitable MemTable key. // The suffix starting with "userkey" can be used as an InternalKey. const char* start_; const char* kstart_; const char* end_; char space_[200]; // Avoid allocation for short keys // No copying allowed LookupKey(const LookupKey&); void operator=(const LookupKey&); }; inline LookupKey::~LookupKey() { if (start_ != space_) delete[] start_; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/malloc_stats.cc000066400000000000000000000031071370372246700166270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/malloc_stats.h" #ifndef ROCKSDB_LITE #include #include #include "port/jemalloc_helper.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_JEMALLOC typedef struct { char* cur; char* end; } MallocStatus; static void GetJemallocStatus(void* mstat_arg, const char* status) { MallocStatus* mstat = reinterpret_cast(mstat_arg); size_t status_len = status ? strlen(status) : 0; size_t buf_size = (size_t)(mstat->end - mstat->cur); if (!status_len || status_len > buf_size) { return; } snprintf(mstat->cur, buf_size, "%s", status); mstat->cur += status_len; } void DumpMallocStats(std::string* stats) { if (!HasJemalloc()) { return; } MallocStatus mstat; const unsigned int kMallocStatusLen = 1000000; std::unique_ptr buf{new char[kMallocStatusLen + 1]}; mstat.cur = buf.get(); mstat.end = buf.get() + kMallocStatusLen; malloc_stats_print(GetJemallocStatus, &mstat, ""); stats->append(buf.get()); } #else void DumpMallocStats(std::string*) {} #endif // ROCKSDB_JEMALLOC } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/malloc_stats.h000066400000000000000000000012411370372246700164660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { void DumpMallocStats(std::string*); } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/manual_compaction_test.cc000066400000000000000000000203141370372246700206710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Test for issue 178: a manual compaction causes deleted data to reappear. #include #include "port/port.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/db.h" #include "rocksdb/slice.h" #include "rocksdb/write_batch.h" #include "test_util/testharness.h" using namespace ROCKSDB_NAMESPACE; namespace { // Reasoning: previously the number was 1100000. Since the keys are written to // the batch in one write each write will result into one SST file. each write // will result into one SST file. We reduced the write_buffer_size to 1K to // basically have the same effect with however less number of keys, which // results into less test runtime. const int kNumKeys = 1100; std::string Key1(int i) { char buf[100]; snprintf(buf, sizeof(buf), "my_key_%d", i); return buf; } std::string Key2(int i) { return Key1(i) + "_xxx"; } class ManualCompactionTest : public testing::Test { public: ManualCompactionTest() { // Get rid of any state from an old run. dbname_ = test::PerThreadDBPath("rocksdb_manual_compaction_test"); DestroyDB(dbname_, Options()); } std::string dbname_; }; class DestroyAllCompactionFilter : public CompactionFilter { public: DestroyAllCompactionFilter() {} bool Filter(int /*level*/, const Slice& /*key*/, const Slice& existing_value, std::string* /*new_value*/, bool* /*value_changed*/) const override { return existing_value.ToString() == "destroy"; } const char* Name() const override { return "DestroyAllCompactionFilter"; } }; class LogCompactionFilter : public CompactionFilter { public: const char* Name() const override { return "LogCompactionFilter"; } bool Filter(int level, const Slice& key, const Slice& /*existing_value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { key_level_[key.ToString()] = level; return false; } void Reset() { key_level_.clear(); } size_t NumKeys() const { return key_level_.size(); } int KeyLevel(const Slice& key) { auto it = key_level_.find(key.ToString()); if (it == key_level_.end()) { return -1; } return it->second; } private: mutable std::map key_level_; }; TEST_F(ManualCompactionTest, CompactTouchesAllKeys) { for (int iter = 0; iter < 2; ++iter) { DB* db; Options options; if (iter == 0) { // level compaction options.num_levels = 3; options.compaction_style = kCompactionStyleLevel; } else { // universal compaction options.compaction_style = kCompactionStyleUniversal; } options.create_if_missing = true; options.compression = kNoCompression; options.compaction_filter = new DestroyAllCompactionFilter(); ASSERT_OK(DB::Open(options, dbname_, &db)); db->Put(WriteOptions(), Slice("key1"), Slice("destroy")); db->Put(WriteOptions(), Slice("key2"), Slice("destroy")); db->Put(WriteOptions(), Slice("key3"), Slice("value3")); db->Put(WriteOptions(), Slice("key4"), Slice("destroy")); Slice key4("key4"); db->CompactRange(CompactRangeOptions(), nullptr, &key4); Iterator* itr = db->NewIterator(ReadOptions()); itr->SeekToFirst(); ASSERT_TRUE(itr->Valid()); ASSERT_EQ("key3", itr->key().ToString()); itr->Next(); ASSERT_TRUE(!itr->Valid()); delete itr; delete options.compaction_filter; delete db; DestroyDB(dbname_, options); } } TEST_F(ManualCompactionTest, Test) { // Open database. Disable compression since it affects the creation // of layers and the code below is trying to test against a very // specific scenario. DB* db; Options db_options; db_options.write_buffer_size = 1024; db_options.create_if_missing = true; db_options.compression = kNoCompression; ASSERT_OK(DB::Open(db_options, dbname_, &db)); // create first key range WriteBatch batch; for (int i = 0; i < kNumKeys; i++) { batch.Put(Key1(i), "value for range 1 key"); } ASSERT_OK(db->Write(WriteOptions(), &batch)); // create second key range batch.Clear(); for (int i = 0; i < kNumKeys; i++) { batch.Put(Key2(i), "value for range 2 key"); } ASSERT_OK(db->Write(WriteOptions(), &batch)); // delete second key range batch.Clear(); for (int i = 0; i < kNumKeys; i++) { batch.Delete(Key2(i)); } ASSERT_OK(db->Write(WriteOptions(), &batch)); // compact database std::string start_key = Key1(0); std::string end_key = Key1(kNumKeys - 1); Slice least(start_key.data(), start_key.size()); Slice greatest(end_key.data(), end_key.size()); // commenting out the line below causes the example to work correctly db->CompactRange(CompactRangeOptions(), &least, &greatest); // count the keys Iterator* iter = db->NewIterator(ReadOptions()); int num_keys = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { num_keys++; } delete iter; ASSERT_EQ(kNumKeys, num_keys) << "Bad number of keys"; // close database delete db; DestroyDB(dbname_, Options()); } TEST_F(ManualCompactionTest, SkipLevel) { DB* db; Options options; options.num_levels = 3; // Initially, flushed L0 files won't exceed 100. options.level0_file_num_compaction_trigger = 100; options.compaction_style = kCompactionStyleLevel; options.create_if_missing = true; options.compression = kNoCompression; LogCompactionFilter* filter = new LogCompactionFilter(); options.compaction_filter = filter; ASSERT_OK(DB::Open(options, dbname_, &db)); WriteOptions wo; FlushOptions fo; ASSERT_OK(db->Put(wo, "1", "")); ASSERT_OK(db->Flush(fo)); ASSERT_OK(db->Put(wo, "2", "")); ASSERT_OK(db->Flush(fo)); ASSERT_OK(db->Put(wo, "4", "")); ASSERT_OK(db->Put(wo, "8", "")); ASSERT_OK(db->Flush(fo)); { // L0: 1, 2, [4, 8] // no file has keys in range [5, 7] Slice start("5"); Slice end("7"); filter->Reset(); db->CompactRange(CompactRangeOptions(), &start, &end); ASSERT_EQ(0, filter->NumKeys()); } { // L0: 1, 2, [4, 8] // [3, 7] overlaps with 4 in L0 Slice start("3"); Slice end("7"); filter->Reset(); db->CompactRange(CompactRangeOptions(), &start, &end); ASSERT_EQ(2, filter->NumKeys()); ASSERT_EQ(0, filter->KeyLevel("4")); ASSERT_EQ(0, filter->KeyLevel("8")); } { // L0: 1, 2 // L1: [4, 8] // no file has keys in range (-inf, 0] Slice end("0"); filter->Reset(); db->CompactRange(CompactRangeOptions(), nullptr, &end); ASSERT_EQ(0, filter->NumKeys()); } { // L0: 1, 2 // L1: [4, 8] // no file has keys in range [9, inf) Slice start("9"); filter->Reset(); db->CompactRange(CompactRangeOptions(), &start, nullptr); ASSERT_EQ(0, filter->NumKeys()); } { // L0: 1, 2 // L1: [4, 8] // [2, 2] overlaps with 2 in L0 Slice start("2"); Slice end("2"); filter->Reset(); db->CompactRange(CompactRangeOptions(), &start, &end); ASSERT_EQ(1, filter->NumKeys()); ASSERT_EQ(0, filter->KeyLevel("2")); } { // L0: 1 // L1: 2, [4, 8] // [2, 5] overlaps with 2 and [4, 8) in L1, skip L0 Slice start("2"); Slice end("5"); filter->Reset(); db->CompactRange(CompactRangeOptions(), &start, &end); ASSERT_EQ(3, filter->NumKeys()); ASSERT_EQ(1, filter->KeyLevel("2")); ASSERT_EQ(1, filter->KeyLevel("4")); ASSERT_EQ(1, filter->KeyLevel("8")); } { // L0: 1 // L1: [2, 4, 8] // [0, inf) overlaps all files Slice start("0"); filter->Reset(); db->CompactRange(CompactRangeOptions(), &start, nullptr); ASSERT_EQ(4, filter->NumKeys()); // 1 is first compacted to L1 and then further compacted into [2, 4, 8], // so finally the logged level for 1 is L1. ASSERT_EQ(1, filter->KeyLevel("1")); ASSERT_EQ(1, filter->KeyLevel("2")); ASSERT_EQ(1, filter->KeyLevel("4")); ASSERT_EQ(1, filter->KeyLevel("8")); } delete filter; delete db; DestroyDB(dbname_, options); } } // anonymous namespace int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/memtable.cc000066400000000000000000001236651370372246700157440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/memtable.h" #include #include #include #include #include "db/dbformat.h" #include "db/merge_context.h" #include "db/merge_helper.h" #include "db/pinned_iterators_manager.h" #include "db/range_tombstone_fragmenter.h" #include "db/read_callback.h" #include "memory/arena.h" #include "memory/memory_usage.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" #include "port/lang.h" #include "port/port.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/merge_operator.h" #include "rocksdb/slice_transform.h" #include "rocksdb/write_buffer_manager.h" #include "table/internal_iterator.h" #include "table/iterator_wrapper.h" #include "table/merging_iterator.h" #include "util/autovector.h" #include "util/coding.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { ImmutableMemTableOptions::ImmutableMemTableOptions( const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options) : arena_block_size(mutable_cf_options.arena_block_size), memtable_prefix_bloom_bits( static_cast( static_cast(mutable_cf_options.write_buffer_size) * mutable_cf_options.memtable_prefix_bloom_size_ratio) * 8u), memtable_huge_page_size(mutable_cf_options.memtable_huge_page_size), memtable_whole_key_filtering( mutable_cf_options.memtable_whole_key_filtering), inplace_update_support(ioptions.inplace_update_support), inplace_update_num_locks(mutable_cf_options.inplace_update_num_locks), inplace_callback(ioptions.inplace_callback), max_successive_merges(mutable_cf_options.max_successive_merges), statistics(ioptions.statistics), merge_operator(ioptions.merge_operator), info_log(ioptions.info_log) {} MemTable::MemTable(const InternalKeyComparator& cmp, const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, WriteBufferManager* write_buffer_manager, SequenceNumber latest_seq, uint32_t column_family_id) : comparator_(cmp), moptions_(ioptions, mutable_cf_options), refs_(0), kArenaBlockSize(OptimizeBlockSize(moptions_.arena_block_size)), mem_tracker_(write_buffer_manager), arena_(moptions_.arena_block_size, (write_buffer_manager != nullptr && (write_buffer_manager->enabled() || write_buffer_manager->cost_to_cache())) ? &mem_tracker_ : nullptr, mutable_cf_options.memtable_huge_page_size), table_(ioptions.memtable_factory->CreateMemTableRep( comparator_, &arena_, mutable_cf_options.prefix_extractor.get(), ioptions.info_log, column_family_id)), range_del_table_(SkipListFactory().CreateMemTableRep( comparator_, &arena_, nullptr /* transform */, ioptions.info_log, column_family_id)), is_range_del_table_empty_(true), data_size_(0), num_entries_(0), num_deletes_(0), write_buffer_size_(mutable_cf_options.write_buffer_size), flush_in_progress_(false), flush_completed_(false), file_number_(0), first_seqno_(0), earliest_seqno_(latest_seq), creation_seq_(latest_seq), mem_next_logfile_number_(0), min_prep_log_referenced_(0), locks_(moptions_.inplace_update_support ? moptions_.inplace_update_num_locks : 0), prefix_extractor_(mutable_cf_options.prefix_extractor.get()), flush_state_(FLUSH_NOT_REQUESTED), env_(ioptions.env), insert_with_hint_prefix_extractor_( ioptions.memtable_insert_with_hint_prefix_extractor), oldest_key_time_(std::numeric_limits::max()), atomic_flush_seqno_(kMaxSequenceNumber), approximate_memory_usage_(0) { UpdateFlushState(); // something went wrong if we need to flush before inserting anything assert(!ShouldScheduleFlush()); // use bloom_filter_ for both whole key and prefix bloom filter if ((prefix_extractor_ || moptions_.memtable_whole_key_filtering) && moptions_.memtable_prefix_bloom_bits > 0) { bloom_filter_.reset( new DynamicBloom(&arena_, moptions_.memtable_prefix_bloom_bits, 6 /* hard coded 6 probes */, moptions_.memtable_huge_page_size, ioptions.info_log)); } } MemTable::~MemTable() { mem_tracker_.FreeMem(); assert(refs_ == 0); } size_t MemTable::ApproximateMemoryUsage() { autovector usages = { arena_.ApproximateMemoryUsage(), table_->ApproximateMemoryUsage(), range_del_table_->ApproximateMemoryUsage(), ROCKSDB_NAMESPACE::ApproximateMemoryUsage(insert_hints_)}; size_t total_usage = 0; for (size_t usage : usages) { // If usage + total_usage >= kMaxSizet, return kMaxSizet. // the following variation is to avoid numeric overflow. if (usage >= port::kMaxSizet - total_usage) { return port::kMaxSizet; } total_usage += usage; } approximate_memory_usage_.store(total_usage, std::memory_order_relaxed); // otherwise, return the actual usage return total_usage; } bool MemTable::ShouldFlushNow() { size_t write_buffer_size = write_buffer_size_.load(std::memory_order_relaxed); // In a lot of times, we cannot allocate arena blocks that exactly matches the // buffer size. Thus we have to decide if we should over-allocate or // under-allocate. // This constant variable can be interpreted as: if we still have more than // "kAllowOverAllocationRatio * kArenaBlockSize" space left, we'd try to over // allocate one more block. const double kAllowOverAllocationRatio = 0.6; // If arena still have room for new block allocation, we can safely say it // shouldn't flush. auto allocated_memory = table_->ApproximateMemoryUsage() + range_del_table_->ApproximateMemoryUsage() + arena_.MemoryAllocatedBytes(); approximate_memory_usage_.store(allocated_memory, std::memory_order_relaxed); // if we can still allocate one more block without exceeding the // over-allocation ratio, then we should not flush. if (allocated_memory + kArenaBlockSize < write_buffer_size + kArenaBlockSize * kAllowOverAllocationRatio) { return false; } // if user keeps adding entries that exceeds write_buffer_size, we need to // flush earlier even though we still have much available memory left. if (allocated_memory > write_buffer_size + kArenaBlockSize * kAllowOverAllocationRatio) { return true; } // In this code path, Arena has already allocated its "last block", which // means the total allocatedmemory size is either: // (1) "moderately" over allocated the memory (no more than `0.6 * arena // block size`. Or, // (2) the allocated memory is less than write buffer size, but we'll stop // here since if we allocate a new arena block, we'll over allocate too much // more (half of the arena block size) memory. // // In either case, to avoid over-allocate, the last block will stop allocation // when its usage reaches a certain ratio, which we carefully choose "0.75 // full" as the stop condition because it addresses the following issue with // great simplicity: What if the next inserted entry's size is // bigger than AllocatedAndUnused()? // // The answer is: if the entry size is also bigger than 0.25 * // kArenaBlockSize, a dedicated block will be allocated for it; otherwise // arena will anyway skip the AllocatedAndUnused() and allocate a new, empty // and regular block. In either case, we *overly* over-allocated. // // Therefore, setting the last block to be at most "0.75 full" avoids both // cases. // // NOTE: the average percentage of waste space of this approach can be counted // as: "arena block size * 0.25 / write buffer size". User who specify a small // write buffer size and/or big arena block size may suffer. return arena_.AllocatedAndUnused() < kArenaBlockSize / 4; } void MemTable::UpdateFlushState() { auto state = flush_state_.load(std::memory_order_relaxed); if (state == FLUSH_NOT_REQUESTED && ShouldFlushNow()) { // ignore CAS failure, because that means somebody else requested // a flush flush_state_.compare_exchange_strong(state, FLUSH_REQUESTED, std::memory_order_relaxed, std::memory_order_relaxed); } } void MemTable::UpdateOldestKeyTime() { uint64_t oldest_key_time = oldest_key_time_.load(std::memory_order_relaxed); if (oldest_key_time == std::numeric_limits::max()) { int64_t current_time = 0; auto s = env_->GetCurrentTime(¤t_time); if (s.ok()) { assert(current_time >= 0); // If fail, the timestamp is already set. oldest_key_time_.compare_exchange_strong( oldest_key_time, static_cast(current_time), std::memory_order_relaxed, std::memory_order_relaxed); } } } int MemTable::KeyComparator::operator()(const char* prefix_len_key1, const char* prefix_len_key2) const { // Internal keys are encoded as length-prefixed strings. Slice k1 = GetLengthPrefixedSlice(prefix_len_key1); Slice k2 = GetLengthPrefixedSlice(prefix_len_key2); return comparator.CompareKeySeq(k1, k2); } int MemTable::KeyComparator::operator()(const char* prefix_len_key, const KeyComparator::DecodedType& key) const { // Internal keys are encoded as length-prefixed strings. Slice a = GetLengthPrefixedSlice(prefix_len_key); return comparator.CompareKeySeq(a, key); } void MemTableRep::InsertConcurrently(KeyHandle /*handle*/) { #ifndef ROCKSDB_LITE throw std::runtime_error("concurrent insert not supported"); #else abort(); #endif } Slice MemTableRep::UserKey(const char* key) const { Slice slice = GetLengthPrefixedSlice(key); return Slice(slice.data(), slice.size() - 8); } KeyHandle MemTableRep::Allocate(const size_t len, char** buf) { *buf = allocator_->Allocate(len); return static_cast(*buf); } // Encode a suitable internal key target for "target" and return it. // Uses *scratch as scratch space, and the returned pointer will point // into this scratch space. const char* EncodeKey(std::string* scratch, const Slice& target) { scratch->clear(); PutVarint32(scratch, static_cast(target.size())); scratch->append(target.data(), target.size()); return scratch->data(); } class MemTableIterator : public InternalIterator { public: MemTableIterator(const MemTable& mem, const ReadOptions& read_options, Arena* arena, bool use_range_del_table = false) : bloom_(nullptr), prefix_extractor_(mem.prefix_extractor_), comparator_(mem.comparator_), valid_(false), arena_mode_(arena != nullptr), value_pinned_( !mem.GetImmutableMemTableOptions()->inplace_update_support) { if (use_range_del_table) { iter_ = mem.range_del_table_->GetIterator(arena); } else if (prefix_extractor_ != nullptr && !read_options.total_order_seek && !read_options.auto_prefix_mode) { // Auto prefix mode is not implemented in memtable yet. bloom_ = mem.bloom_filter_.get(); iter_ = mem.table_->GetDynamicPrefixIterator(arena); } else { iter_ = mem.table_->GetIterator(arena); } } // No copying allowed MemTableIterator(const MemTableIterator&) = delete; void operator=(const MemTableIterator&) = delete; ~MemTableIterator() override { #ifndef NDEBUG // Assert that the MemTableIterator is never deleted while // Pinning is Enabled. assert(!pinned_iters_mgr_ || !pinned_iters_mgr_->PinningEnabled()); #endif if (arena_mode_) { iter_->~Iterator(); } else { delete iter_; } } #ifndef NDEBUG void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { pinned_iters_mgr_ = pinned_iters_mgr; } PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; #endif bool Valid() const override { return valid_; } void Seek(const Slice& k) override { PERF_TIMER_GUARD(seek_on_memtable_time); PERF_COUNTER_ADD(seek_on_memtable_count, 1); if (bloom_) { // iterator should only use prefix bloom filter Slice user_k(ExtractUserKey(k)); if (prefix_extractor_->InDomain(user_k) && !bloom_->MayContain(prefix_extractor_->Transform(user_k))) { PERF_COUNTER_ADD(bloom_memtable_miss_count, 1); valid_ = false; return; } else { PERF_COUNTER_ADD(bloom_memtable_hit_count, 1); } } iter_->Seek(k, nullptr); valid_ = iter_->Valid(); } void SeekForPrev(const Slice& k) override { PERF_TIMER_GUARD(seek_on_memtable_time); PERF_COUNTER_ADD(seek_on_memtable_count, 1); if (bloom_) { Slice user_k(ExtractUserKey(k)); if (prefix_extractor_->InDomain(user_k) && !bloom_->MayContain(prefix_extractor_->Transform(user_k))) { PERF_COUNTER_ADD(bloom_memtable_miss_count, 1); valid_ = false; return; } else { PERF_COUNTER_ADD(bloom_memtable_hit_count, 1); } } iter_->Seek(k, nullptr); valid_ = iter_->Valid(); if (!Valid()) { SeekToLast(); } while (Valid() && comparator_.comparator.Compare(k, key()) < 0) { Prev(); } } void SeekToFirst() override { iter_->SeekToFirst(); valid_ = iter_->Valid(); } void SeekToLast() override { iter_->SeekToLast(); valid_ = iter_->Valid(); } void Next() override { PERF_COUNTER_ADD(next_on_memtable_count, 1); assert(Valid()); iter_->Next(); valid_ = iter_->Valid(); } void Prev() override { PERF_COUNTER_ADD(prev_on_memtable_count, 1); assert(Valid()); iter_->Prev(); valid_ = iter_->Valid(); } Slice key() const override { assert(Valid()); return GetLengthPrefixedSlice(iter_->key()); } Slice value() const override { assert(Valid()); Slice key_slice = GetLengthPrefixedSlice(iter_->key()); return GetLengthPrefixedSlice(key_slice.data() + key_slice.size()); } Status status() const override { return Status::OK(); } bool IsKeyPinned() const override { // memtable data is always pinned return true; } bool IsValuePinned() const override { // memtable value is always pinned, except if we allow inplace update. return value_pinned_; } private: DynamicBloom* bloom_; const SliceTransform* const prefix_extractor_; const MemTable::KeyComparator comparator_; MemTableRep::Iterator* iter_; bool valid_; bool arena_mode_; bool value_pinned_; }; InternalIterator* MemTable::NewIterator(const ReadOptions& read_options, Arena* arena) { assert(arena != nullptr); auto mem = arena->AllocateAligned(sizeof(MemTableIterator)); return new (mem) MemTableIterator(*this, read_options, arena); } FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator( const ReadOptions& read_options, SequenceNumber read_seq) { if (read_options.ignore_range_deletions || is_range_del_table_empty_.load(std::memory_order_relaxed)) { return nullptr; } auto* unfragmented_iter = new MemTableIterator( *this, read_options, nullptr /* arena */, true /* use_range_del_table */); if (unfragmented_iter == nullptr) { return nullptr; } auto fragmented_tombstone_list = std::make_shared( std::unique_ptr(unfragmented_iter), comparator_.comparator); auto* fragmented_iter = new FragmentedRangeTombstoneIterator( fragmented_tombstone_list, comparator_.comparator, read_seq); return fragmented_iter; } port::RWMutex* MemTable::GetLock(const Slice& key) { return &locks_[fastrange64(GetSliceNPHash64(key), locks_.size())]; } MemTable::MemTableStats MemTable::ApproximateStats(const Slice& start_ikey, const Slice& end_ikey) { uint64_t entry_count = table_->ApproximateNumEntries(start_ikey, end_ikey); entry_count += range_del_table_->ApproximateNumEntries(start_ikey, end_ikey); if (entry_count == 0) { return {0, 0}; } uint64_t n = num_entries_.load(std::memory_order_relaxed); if (n == 0) { return {0, 0}; } if (entry_count > n) { // (range_del_)table_->ApproximateNumEntries() is just an estimate so it can // be larger than actual entries we have. Cap it to entries we have to limit // the inaccuracy. entry_count = n; } uint64_t data_size = data_size_.load(std::memory_order_relaxed); return {entry_count * (data_size / n), entry_count}; } bool MemTable::Add(SequenceNumber s, ValueType type, const Slice& key, /* user key */ const Slice& value, bool allow_concurrent, MemTablePostProcessInfo* post_process_info, void** hint) { // Format of an entry is concatenation of: // key_size : varint32 of internal_key.size() // key bytes : char[internal_key.size()] // value_size : varint32 of value.size() // value bytes : char[value.size()] uint32_t key_size = static_cast(key.size()); uint32_t val_size = static_cast(value.size()); uint32_t internal_key_size = key_size + 8; const uint32_t encoded_len = VarintLength(internal_key_size) + internal_key_size + VarintLength(val_size) + val_size; char* buf = nullptr; std::unique_ptr& table = type == kTypeRangeDeletion ? range_del_table_ : table_; KeyHandle handle = table->Allocate(encoded_len, &buf); char* p = EncodeVarint32(buf, internal_key_size); memcpy(p, key.data(), key_size); Slice key_slice(p, key_size); p += key_size; uint64_t packed = PackSequenceAndType(s, type); EncodeFixed64(p, packed); p += 8; p = EncodeVarint32(p, val_size); memcpy(p, value.data(), val_size); assert((unsigned)(p + val_size - buf) == (unsigned)encoded_len); size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); if (!allow_concurrent) { // Extract prefix for insert with hint. if (insert_with_hint_prefix_extractor_ != nullptr && insert_with_hint_prefix_extractor_->InDomain(key_slice)) { Slice prefix = insert_with_hint_prefix_extractor_->Transform(key_slice); bool res = table->InsertKeyWithHint(handle, &insert_hints_[prefix]); if (UNLIKELY(!res)) { return res; } } else { bool res = table->InsertKey(handle); if (UNLIKELY(!res)) { return res; } } // this is a bit ugly, but is the way to avoid locked instructions // when incrementing an atomic num_entries_.store(num_entries_.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); data_size_.store(data_size_.load(std::memory_order_relaxed) + encoded_len, std::memory_order_relaxed); if (type == kTypeDeletion) { num_deletes_.store(num_deletes_.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); } if (bloom_filter_ && prefix_extractor_ && prefix_extractor_->InDomain(key)) { bloom_filter_->Add(prefix_extractor_->Transform(key)); } if (bloom_filter_ && moptions_.memtable_whole_key_filtering) { bloom_filter_->Add(StripTimestampFromUserKey(key, ts_sz)); } // The first sequence number inserted into the memtable assert(first_seqno_ == 0 || s >= first_seqno_); if (first_seqno_ == 0) { first_seqno_.store(s, std::memory_order_relaxed); if (earliest_seqno_ == kMaxSequenceNumber) { earliest_seqno_.store(GetFirstSequenceNumber(), std::memory_order_relaxed); } assert(first_seqno_.load() >= earliest_seqno_.load()); } assert(post_process_info == nullptr); UpdateFlushState(); } else { bool res = (hint == nullptr) ? table->InsertKeyConcurrently(handle) : table->InsertKeyWithHintConcurrently(handle, hint); if (UNLIKELY(!res)) { return res; } assert(post_process_info != nullptr); post_process_info->num_entries++; post_process_info->data_size += encoded_len; if (type == kTypeDeletion) { post_process_info->num_deletes++; } if (bloom_filter_ && prefix_extractor_ && prefix_extractor_->InDomain(key)) { bloom_filter_->AddConcurrently(prefix_extractor_->Transform(key)); } if (bloom_filter_ && moptions_.memtable_whole_key_filtering) { bloom_filter_->AddConcurrently(StripTimestampFromUserKey(key, ts_sz)); } // atomically update first_seqno_ and earliest_seqno_. uint64_t cur_seq_num = first_seqno_.load(std::memory_order_relaxed); while ((cur_seq_num == 0 || s < cur_seq_num) && !first_seqno_.compare_exchange_weak(cur_seq_num, s)) { } uint64_t cur_earliest_seqno = earliest_seqno_.load(std::memory_order_relaxed); while ( (cur_earliest_seqno == kMaxSequenceNumber || s < cur_earliest_seqno) && !first_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) { } } if (type == kTypeRangeDeletion) { is_range_del_table_empty_.store(false, std::memory_order_relaxed); } UpdateOldestKeyTime(); return true; } // Callback from MemTable::Get() namespace { struct Saver { Status* status; const LookupKey* key; bool* found_final_value; // Is value set correctly? Used by KeyMayExist bool* merge_in_progress; std::string* value; SequenceNumber seq; std::string* timestamp; const MergeOperator* merge_operator; // the merge operations encountered; MergeContext* merge_context; SequenceNumber max_covering_tombstone_seq; MemTable* mem; Logger* logger; Statistics* statistics; bool inplace_update_support; bool do_merge; Env* env_; ReadCallback* callback_; bool* is_blob_index; bool CheckCallback(SequenceNumber _seq) { if (callback_) { return callback_->IsVisible(_seq); } return true; } }; } // namespace static bool SaveValue(void* arg, const char* entry) { Saver* s = reinterpret_cast(arg); assert(s != nullptr); MergeContext* merge_context = s->merge_context; SequenceNumber max_covering_tombstone_seq = s->max_covering_tombstone_seq; const MergeOperator* merge_operator = s->merge_operator; assert(merge_context != nullptr); // entry format is: // klength varint32 // userkey char[klength-8] // tag uint64 // vlength varint32f // value char[vlength] // Check that it belongs to same user key. We do not check the // sequence number since the Seek() call above should have skipped // all entries with overly large sequence numbers. uint32_t key_length; const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length); Slice user_key_slice = Slice(key_ptr, key_length - 8); const Comparator* user_comparator = s->mem->GetInternalKeyComparator().user_comparator(); size_t ts_sz = user_comparator->timestamp_size(); if (user_comparator->CompareWithoutTimestamp(user_key_slice, s->key->user_key()) == 0) { // Correct user key const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); ValueType type; SequenceNumber seq; UnPackSequenceAndType(tag, &seq, &type); // If the value is not in the snapshot, skip it if (!s->CheckCallback(seq)) { return true; // to continue to the next seq } s->seq = seq; if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex) && max_covering_tombstone_seq > seq) { type = kTypeRangeDeletion; } switch (type) { case kTypeBlobIndex: if (s->is_blob_index == nullptr) { ROCKS_LOG_ERROR(s->logger, "Encounter unexpected blob index."); *(s->status) = Status::NotSupported( "Encounter unsupported blob value. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); } else if (*(s->merge_in_progress)) { *(s->status) = Status::NotSupported("Blob DB does not support merge operator."); } if (!s->status->ok()) { *(s->found_final_value) = true; return false; } FALLTHROUGH_INTENDED; case kTypeValue: { if (s->inplace_update_support) { s->mem->GetLock(s->key->user_key())->ReadLock(); } Slice v = GetLengthPrefixedSlice(key_ptr + key_length); *(s->status) = Status::OK(); if (*(s->merge_in_progress)) { if (s->do_merge) { if (s->value != nullptr) { *(s->status) = MergeHelper::TimedFullMerge( merge_operator, s->key->user_key(), &v, merge_context->GetOperands(), s->value, s->logger, s->statistics, s->env_, nullptr /* result_operand */, true); } } else { // Preserve the value with the goal of returning it as part of // raw merge operands to the user merge_context->PushOperand( v, s->inplace_update_support == false /* operand_pinned */); } } else if (!s->do_merge) { // Preserve the value with the goal of returning it as part of // raw merge operands to the user merge_context->PushOperand( v, s->inplace_update_support == false /* operand_pinned */); } else if (s->value != nullptr) { s->value->assign(v.data(), v.size()); } if (s->inplace_update_support) { s->mem->GetLock(s->key->user_key())->ReadUnlock(); } *(s->found_final_value) = true; if (s->is_blob_index != nullptr) { *(s->is_blob_index) = (type == kTypeBlobIndex); } if (ts_sz > 0 && s->timestamp != nullptr) { Slice ts = ExtractTimestampFromUserKey(user_key_slice, ts_sz); s->timestamp->assign(ts.data(), ts.size()); } return false; } case kTypeDeletion: case kTypeDeletionWithTimestamp: case kTypeSingleDeletion: case kTypeRangeDeletion: { if (*(s->merge_in_progress)) { if (s->value != nullptr) { *(s->status) = MergeHelper::TimedFullMerge( merge_operator, s->key->user_key(), nullptr, merge_context->GetOperands(), s->value, s->logger, s->statistics, s->env_, nullptr /* result_operand */, true); } } else { *(s->status) = Status::NotFound(); } *(s->found_final_value) = true; return false; } case kTypeMerge: { if (!merge_operator) { *(s->status) = Status::InvalidArgument( "merge_operator is not properly initialized."); // Normally we continue the loop (return true) when we see a merge // operand. But in case of an error, we should stop the loop // immediately and pretend we have found the value to stop further // seek. Otherwise, the later call will override this error status. *(s->found_final_value) = true; return false; } Slice v = GetLengthPrefixedSlice(key_ptr + key_length); *(s->merge_in_progress) = true; merge_context->PushOperand( v, s->inplace_update_support == false /* operand_pinned */); if (s->do_merge && merge_operator->ShouldMerge( merge_context->GetOperandsDirectionBackward())) { *(s->status) = MergeHelper::TimedFullMerge( merge_operator, s->key->user_key(), nullptr, merge_context->GetOperands(), s->value, s->logger, s->statistics, s->env_, nullptr /* result_operand */, true); *(s->found_final_value) = true; return false; } return true; } default: assert(false); return true; } } // s->state could be Corrupt, merge or notfound return false; } bool MemTable::Get(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback, bool* is_blob_index, bool do_merge) { // The sequence number is updated synchronously in version_set.h if (IsEmpty()) { // Avoiding recording stats for speed. return false; } PERF_TIMER_GUARD(get_from_memtable_time); std::unique_ptr range_del_iter( NewRangeTombstoneIterator(read_opts, GetInternalKeySeqno(key.internal_key()))); if (range_del_iter != nullptr) { *max_covering_tombstone_seq = std::max(*max_covering_tombstone_seq, range_del_iter->MaxCoveringTombstoneSeqnum(key.user_key())); } Slice user_key = key.user_key(); bool found_final_value = false; bool merge_in_progress = s->IsMergeInProgress(); bool may_contain = true; size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); if (bloom_filter_) { // when both memtable_whole_key_filtering and prefix_extractor_ are set, // only do whole key filtering for Get() to save CPU if (moptions_.memtable_whole_key_filtering) { may_contain = bloom_filter_->MayContain(StripTimestampFromUserKey(user_key, ts_sz)); } else { assert(prefix_extractor_); may_contain = !prefix_extractor_->InDomain(user_key) || bloom_filter_->MayContain(prefix_extractor_->Transform(user_key)); } } if (bloom_filter_ && !may_contain) { // iter is null if prefix bloom says the key does not exist PERF_COUNTER_ADD(bloom_memtable_miss_count, 1); *seq = kMaxSequenceNumber; } else { if (bloom_filter_) { PERF_COUNTER_ADD(bloom_memtable_hit_count, 1); } GetFromTable(key, *max_covering_tombstone_seq, do_merge, callback, is_blob_index, value, timestamp, s, merge_context, seq, &found_final_value, &merge_in_progress); } // No change to value, since we have not yet found a Put/Delete if (!found_final_value && merge_in_progress) { *s = Status::MergeInProgress(); } PERF_COUNTER_ADD(get_from_memtable_count, 1); return found_final_value; } void MemTable::GetFromTable(const LookupKey& key, SequenceNumber max_covering_tombstone_seq, bool do_merge, ReadCallback* callback, bool* is_blob_index, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* seq, bool* found_final_value, bool* merge_in_progress) { Saver saver; saver.status = s; saver.found_final_value = found_final_value; saver.merge_in_progress = merge_in_progress; saver.key = &key; saver.value = value; saver.timestamp = timestamp; saver.seq = kMaxSequenceNumber; saver.mem = this; saver.merge_context = merge_context; saver.max_covering_tombstone_seq = max_covering_tombstone_seq; saver.merge_operator = moptions_.merge_operator; saver.logger = moptions_.info_log; saver.inplace_update_support = moptions_.inplace_update_support; saver.statistics = moptions_.statistics; saver.env_ = env_; saver.callback_ = callback; saver.is_blob_index = is_blob_index; saver.do_merge = do_merge; table_->Get(key, &saver, SaveValue); *seq = saver.seq; } void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range, ReadCallback* callback, bool* is_blob) { // The sequence number is updated synchronously in version_set.h if (IsEmpty()) { // Avoiding recording stats for speed. return; } PERF_TIMER_GUARD(get_from_memtable_time); MultiGetRange temp_range(*range, range->begin(), range->end()); if (bloom_filter_) { std::array keys; std::array may_match = {{true}}; autovector prefixes; int num_keys = 0; for (auto iter = temp_range.begin(); iter != temp_range.end(); ++iter) { if (!prefix_extractor_) { keys[num_keys++] = &iter->ukey; } else if (prefix_extractor_->InDomain(iter->ukey)) { prefixes.emplace_back(prefix_extractor_->Transform(iter->ukey)); keys[num_keys++] = &prefixes.back(); } } bloom_filter_->MayContain(num_keys, &keys[0], &may_match[0]); int idx = 0; for (auto iter = temp_range.begin(); iter != temp_range.end(); ++iter) { if (prefix_extractor_ && !prefix_extractor_->InDomain(iter->ukey)) { PERF_COUNTER_ADD(bloom_memtable_hit_count, 1); continue; } if (!may_match[idx]) { temp_range.SkipKey(iter); PERF_COUNTER_ADD(bloom_memtable_miss_count, 1); } else { PERF_COUNTER_ADD(bloom_memtable_hit_count, 1); } idx++; } } for (auto iter = temp_range.begin(); iter != temp_range.end(); ++iter) { SequenceNumber seq = kMaxSequenceNumber; bool found_final_value{false}; bool merge_in_progress = iter->s->IsMergeInProgress(); std::unique_ptr range_del_iter( NewRangeTombstoneIterator( read_options, GetInternalKeySeqno(iter->lkey->internal_key()))); if (range_del_iter != nullptr) { iter->max_covering_tombstone_seq = std::max( iter->max_covering_tombstone_seq, range_del_iter->MaxCoveringTombstoneSeqnum(iter->lkey->user_key())); } GetFromTable(*(iter->lkey), iter->max_covering_tombstone_seq, true, callback, is_blob, iter->value->GetSelf(), iter->timestamp, iter->s, &(iter->merge_context), &seq, &found_final_value, &merge_in_progress); if (!found_final_value && merge_in_progress) { *(iter->s) = Status::MergeInProgress(); } if (found_final_value) { iter->value->PinSelf(); range->AddValueSize(iter->value->size()); range->MarkKeyDone(iter); RecordTick(moptions_.statistics, MEMTABLE_HIT); if (range->GetValueSize() > read_options.value_size_soft_limit) { // Set all remaining keys in range to Abort for (auto range_iter = range->begin(); range_iter != range->end(); ++range_iter) { range->MarkKeyDone(range_iter); *(range_iter->s) = Status::Aborted(); } break; } } } PERF_COUNTER_ADD(get_from_memtable_count, 1); } void MemTable::Update(SequenceNumber seq, const Slice& key, const Slice& value) { LookupKey lkey(key, seq); Slice mem_key = lkey.memtable_key(); std::unique_ptr iter( table_->GetDynamicPrefixIterator()); iter->Seek(lkey.internal_key(), mem_key.data()); if (iter->Valid()) { // entry format is: // key_length varint32 // userkey char[klength-8] // tag uint64 // vlength varint32 // value char[vlength] // Check that it belongs to same user key. We do not check the // sequence number since the Seek() call above should have skipped // all entries with overly large sequence numbers. const char* entry = iter->key(); uint32_t key_length = 0; const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length); if (comparator_.comparator.user_comparator()->Equal( Slice(key_ptr, key_length - 8), lkey.user_key())) { // Correct user key const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); ValueType type; SequenceNumber existing_seq; UnPackSequenceAndType(tag, &existing_seq, &type); assert(existing_seq != seq); if (type == kTypeValue) { Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length); uint32_t prev_size = static_cast(prev_value.size()); uint32_t new_size = static_cast(value.size()); // Update value, if new value size <= previous value size if (new_size <= prev_size) { char* p = EncodeVarint32(const_cast(key_ptr) + key_length, new_size); WriteLock wl(GetLock(lkey.user_key())); memcpy(p, value.data(), value.size()); assert((unsigned)((p + value.size()) - entry) == (unsigned)(VarintLength(key_length) + key_length + VarintLength(value.size()) + value.size())); RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED); return; } } } } // key doesn't exist bool add_res __attribute__((__unused__)); add_res = Add(seq, kTypeValue, key, value); // We already checked unused != seq above. In that case, Add should not fail. assert(add_res); } bool MemTable::UpdateCallback(SequenceNumber seq, const Slice& key, const Slice& delta) { LookupKey lkey(key, seq); Slice memkey = lkey.memtable_key(); std::unique_ptr iter( table_->GetDynamicPrefixIterator()); iter->Seek(lkey.internal_key(), memkey.data()); if (iter->Valid()) { // entry format is: // key_length varint32 // userkey char[klength-8] // tag uint64 // vlength varint32 // value char[vlength] // Check that it belongs to same user key. We do not check the // sequence number since the Seek() call above should have skipped // all entries with overly large sequence numbers. const char* entry = iter->key(); uint32_t key_length = 0; const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length); if (comparator_.comparator.user_comparator()->Equal( Slice(key_ptr, key_length - 8), lkey.user_key())) { // Correct user key const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); ValueType type; uint64_t unused; UnPackSequenceAndType(tag, &unused, &type); switch (type) { case kTypeValue: { Slice prev_value = GetLengthPrefixedSlice(key_ptr + key_length); uint32_t prev_size = static_cast(prev_value.size()); char* prev_buffer = const_cast(prev_value.data()); uint32_t new_prev_size = prev_size; std::string str_value; WriteLock wl(GetLock(lkey.user_key())); auto status = moptions_.inplace_callback(prev_buffer, &new_prev_size, delta, &str_value); if (status == UpdateStatus::UPDATED_INPLACE) { // Value already updated by callback. assert(new_prev_size <= prev_size); if (new_prev_size < prev_size) { // overwrite the new prev_size char* p = EncodeVarint32(const_cast(key_ptr) + key_length, new_prev_size); if (VarintLength(new_prev_size) < VarintLength(prev_size)) { // shift the value buffer as well. memcpy(p, prev_buffer, new_prev_size); } } RecordTick(moptions_.statistics, NUMBER_KEYS_UPDATED); UpdateFlushState(); return true; } else if (status == UpdateStatus::UPDATED) { Add(seq, kTypeValue, key, Slice(str_value)); RecordTick(moptions_.statistics, NUMBER_KEYS_WRITTEN); UpdateFlushState(); return true; } else if (status == UpdateStatus::UPDATE_FAILED) { // No action required. Return. UpdateFlushState(); return true; } } default: break; } } } // If the latest value is not kTypeValue // or key doesn't exist return false; } size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) { Slice memkey = key.memtable_key(); // A total ordered iterator is costly for some memtablerep (prefix aware // reps). By passing in the user key, we allow efficient iterator creation. // The iterator only needs to be ordered within the same user key. std::unique_ptr iter( table_->GetDynamicPrefixIterator()); iter->Seek(key.internal_key(), memkey.data()); size_t num_successive_merges = 0; for (; iter->Valid(); iter->Next()) { const char* entry = iter->key(); uint32_t key_length = 0; const char* iter_key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length); if (!comparator_.comparator.user_comparator()->Equal( Slice(iter_key_ptr, key_length - 8), key.user_key())) { break; } const uint64_t tag = DecodeFixed64(iter_key_ptr + key_length - 8); ValueType type; uint64_t unused; UnPackSequenceAndType(tag, &unused, &type); if (type != kTypeMerge) { break; } ++num_successive_merges; } return num_successive_merges; } void MemTableRep::Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) { auto iter = GetDynamicPrefixIterator(); for (iter->Seek(k.internal_key(), k.memtable_key().data()); iter->Valid() && callback_func(callback_args, iter->key()); iter->Next()) { } } void MemTable::RefLogContainingPrepSection(uint64_t log) { assert(log > 0); auto cur = min_prep_log_referenced_.load(); while ((log < cur || cur == 0) && !min_prep_log_referenced_.compare_exchange_strong(cur, log)) { cur = min_prep_log_referenced_.load(); } } uint64_t MemTable::GetMinLogContainingPrepSection() { return min_prep_log_referenced_.load(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/memtable.h000066400000000000000000000525511370372246700156010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include "db/dbformat.h" #include "db/range_tombstone_fragmenter.h" #include "db/read_callback.h" #include "db/version_edit.h" #include "memory/allocator.h" #include "memory/concurrent_arena.h" #include "monitoring/instrumented_mutex.h" #include "options/cf_options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "table/multiget_context.h" #include "util/dynamic_bloom.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { struct FlushJobInfo; class Mutex; class MemTableIterator; class MergeContext; struct ImmutableMemTableOptions { explicit ImmutableMemTableOptions(const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options); size_t arena_block_size; uint32_t memtable_prefix_bloom_bits; size_t memtable_huge_page_size; bool memtable_whole_key_filtering; bool inplace_update_support; size_t inplace_update_num_locks; UpdateStatus (*inplace_callback)(char* existing_value, uint32_t* existing_value_size, Slice delta_value, std::string* merged_value); size_t max_successive_merges; Statistics* statistics; MergeOperator* merge_operator; Logger* info_log; }; // Batched counters to updated when inserting keys in one write batch. // In post process of the write batch, these can be updated together. // Only used in concurrent memtable insert case. struct MemTablePostProcessInfo { uint64_t data_size = 0; uint64_t num_entries = 0; uint64_t num_deletes = 0; }; using MultiGetRange = MultiGetContext::Range; // Note: Many of the methods in this class have comments indicating that // external synchronization is required as these methods are not thread-safe. // It is up to higher layers of code to decide how to prevent concurrent // invokation of these methods. This is usually done by acquiring either // the db mutex or the single writer thread. // // Some of these methods are documented to only require external // synchronization if this memtable is immutable. Calling MarkImmutable() is // not sufficient to guarantee immutability. It is up to higher layers of // code to determine if this MemTable can still be modified by other threads. // Eg: The Superversion stores a pointer to the current MemTable (that can // be modified) and a separate list of the MemTables that can no longer be // written to (aka the 'immutable memtables'). class MemTable { public: struct KeyComparator : public MemTableRep::KeyComparator { const InternalKeyComparator comparator; explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } virtual int operator()(const char* prefix_len_key1, const char* prefix_len_key2) const override; virtual int operator()(const char* prefix_len_key, const DecodedType& key) const override; }; // MemTables are reference counted. The initial reference count // is zero and the caller must call Ref() at least once. // // earliest_seq should be the current SequenceNumber in the db such that any // key inserted into this memtable will have an equal or larger seq number. // (When a db is first created, the earliest sequence number will be 0). // If the earliest sequence number is not known, kMaxSequenceNumber may be // used, but this may prevent some transactions from succeeding until the // first key is inserted into the memtable. explicit MemTable(const InternalKeyComparator& comparator, const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, WriteBufferManager* write_buffer_manager, SequenceNumber earliest_seq, uint32_t column_family_id); // No copying allowed MemTable(const MemTable&) = delete; MemTable& operator=(const MemTable&) = delete; // Do not delete this MemTable unless Unref() indicates it not in use. ~MemTable(); // Increase reference count. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. void Ref() { ++refs_; } // Drop reference count. // If the refcount goes to zero return this memtable, otherwise return null. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. MemTable* Unref() { --refs_; assert(refs_ >= 0); if (refs_ <= 0) { return this; } return nullptr; } // Returns an estimate of the number of bytes of data in use by this // data structure. // // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable (unless this Memtable is immutable). size_t ApproximateMemoryUsage(); // As a cheap version of `ApproximateMemoryUsage()`, this function doens't // require external synchronization. The value may be less accurate though size_t ApproximateMemoryUsageFast() const { return approximate_memory_usage_.load(std::memory_order_relaxed); } // This method heuristically determines if the memtable should continue to // host more data. bool ShouldScheduleFlush() const { return flush_state_.load(std::memory_order_relaxed) == FLUSH_REQUESTED; } // Returns true if a flush should be scheduled and the caller should // be the one to schedule it bool MarkFlushScheduled() { auto before = FLUSH_REQUESTED; return flush_state_.compare_exchange_strong(before, FLUSH_SCHEDULED, std::memory_order_relaxed, std::memory_order_relaxed); } // Return an iterator that yields the contents of the memtable. // // The caller must ensure that the underlying MemTable remains live // while the returned iterator is live. The keys returned by this // iterator are internal keys encoded by AppendInternalKey in the // db/dbformat.{h,cc} module. // // By default, it returns an iterator for prefix seek if prefix_extractor // is configured in Options. // arena: If not null, the arena needs to be used to allocate the Iterator. // Calling ~Iterator of the iterator will destroy all the states but // those allocated in arena. InternalIterator* NewIterator(const ReadOptions& read_options, Arena* arena); FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& read_options, SequenceNumber read_seq); // Add an entry into memtable that maps key to value at the // specified sequence number and with the specified type. // Typically value will be empty if type==kTypeDeletion. // // REQUIRES: if allow_concurrent = false, external synchronization to prevent // simultaneous operations on the same MemTable. // // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and // the already exists. bool Add(SequenceNumber seq, ValueType type, const Slice& key, const Slice& value, bool allow_concurrent = false, MemTablePostProcessInfo* post_process_info = nullptr, void** hint = nullptr); // Used to Get value associated with key or Get Merge Operands associated // with key. // If do_merge = true the default behavior which is Get value for key is // executed. Expected behavior is described right below. // If memtable contains a value for key, store it in *value and return true. // If memtable contains a deletion for key, store a NotFound() error // in *status and return true. // If memtable contains Merge operation as the most recent entry for a key, // and the merge process does not stop (not reaching a value or delete), // prepend the current merge operand to *operands. // store MergeInProgress in s, and return false. // Else, return false. // If any operation was found, its most recent sequence number // will be stored in *seq on success (regardless of whether true/false is // returned). Otherwise, *seq will be set to kMaxSequenceNumber. // On success, *s may be set to OK, NotFound, or MergeInProgress. Any other // status returned indicates a corruption or other unexpected error. // If do_merge = false then any Merge Operands encountered for key are simply // stored in merge_context.operands_list and never actually merged to get a // final value. The raw Merge Operands are eventually returned to the user. bool Get(const LookupKey& key, std::string* value, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, bool do_merge = true) { return Get(key, value, /*timestamp=*/nullptr, s, merge_context, max_covering_tombstone_seq, seq, read_opts, callback, is_blob_index, do_merge); } bool Get(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, bool do_merge = true); bool Get(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, bool do_merge = true) { SequenceNumber seq; return Get(key, value, timestamp, s, merge_context, max_covering_tombstone_seq, &seq, read_opts, callback, is_blob_index, do_merge); } void MultiGet(const ReadOptions& read_options, MultiGetRange* range, ReadCallback* callback, bool* is_blob); // Attempts to update the new_value inplace, else does normal Add // Pseudocode // if key exists in current memtable && prev_value is of type kTypeValue // if new sizeof(new_value) <= sizeof(prev_value) // update inplace // else add(key, new_value) // else add(key, new_value) // // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. void Update(SequenceNumber seq, const Slice& key, const Slice& value); // If prev_value for key exists, attempts to update it inplace. // else returns false // Pseudocode // if key exists in current memtable && prev_value is of type kTypeValue // new_value = delta(prev_value) // if sizeof(new_value) <= sizeof(prev_value) // update inplace // else add(key, new_value) // else return false // // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. bool UpdateCallback(SequenceNumber seq, const Slice& key, const Slice& delta); // Returns the number of successive merge entries starting from the newest // entry for the key up to the last non-merge entry or last entry for the // key in the memtable. size_t CountSuccessiveMergeEntries(const LookupKey& key); // Update counters and flush status after inserting a whole write batch // Used in concurrent memtable inserts. void BatchPostProcess(const MemTablePostProcessInfo& update_counters) { num_entries_.fetch_add(update_counters.num_entries, std::memory_order_relaxed); data_size_.fetch_add(update_counters.data_size, std::memory_order_relaxed); if (update_counters.num_deletes != 0) { num_deletes_.fetch_add(update_counters.num_deletes, std::memory_order_relaxed); } UpdateFlushState(); } // Get total number of entries in the mem table. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable (unless this Memtable is immutable). uint64_t num_entries() const { return num_entries_.load(std::memory_order_relaxed); } // Get total number of deletes in the mem table. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable (unless this Memtable is immutable). uint64_t num_deletes() const { return num_deletes_.load(std::memory_order_relaxed); } uint64_t get_data_size() const { return data_size_.load(std::memory_order_relaxed); } // Dynamically change the memtable's capacity. If set below the current usage, // the next key added will trigger a flush. Can only increase size when // memtable prefix bloom is disabled, since we can't easily allocate more // space. void UpdateWriteBufferSize(size_t new_write_buffer_size) { if (bloom_filter_ == nullptr || new_write_buffer_size < write_buffer_size_) { write_buffer_size_.store(new_write_buffer_size, std::memory_order_relaxed); } } // Returns the edits area that is needed for flushing the memtable VersionEdit* GetEdits() { return &edit_; } // Returns if there is no entry inserted to the mem table. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable (unless this Memtable is immutable). bool IsEmpty() const { return first_seqno_ == 0; } // Returns the sequence number of the first element that was inserted // into the memtable. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable (unless this Memtable is immutable). SequenceNumber GetFirstSequenceNumber() { return first_seqno_.load(std::memory_order_relaxed); } // Returns the sequence number that is guaranteed to be smaller than or equal // to the sequence number of any key that could be inserted into this // memtable. It can then be assumed that any write with a larger(or equal) // sequence number will be present in this memtable or a later memtable. // // If the earliest sequence number could not be determined, // kMaxSequenceNumber will be returned. SequenceNumber GetEarliestSequenceNumber() { return earliest_seqno_.load(std::memory_order_relaxed); } // DB's latest sequence ID when the memtable is created. This number // may be updated to a more recent one before any key is inserted. SequenceNumber GetCreationSeq() const { return creation_seq_; } void SetCreationSeq(SequenceNumber sn) { creation_seq_ = sn; } // Returns the next active logfile number when this memtable is about to // be flushed to storage // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. uint64_t GetNextLogNumber() { return mem_next_logfile_number_; } // Sets the next active logfile number when this memtable is about to // be flushed to storage // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. void SetNextLogNumber(uint64_t num) { mem_next_logfile_number_ = num; } // if this memtable contains data from a committed // two phase transaction we must take note of the // log which contains that data so we can know // when to relese that log void RefLogContainingPrepSection(uint64_t log); uint64_t GetMinLogContainingPrepSection(); // Notify the underlying storage that no more items will be added. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. // After MarkImmutable() is called, you should not attempt to // write anything to this MemTable(). (Ie. do not call Add() or Update()). void MarkImmutable() { table_->MarkReadOnly(); mem_tracker_.DoneAllocating(); } // Notify the underlying storage that all data it contained has been // persisted. // REQUIRES: external synchronization to prevent simultaneous // operations on the same MemTable. void MarkFlushed() { table_->MarkFlushed(); } // return true if the current MemTableRep supports merge operator. bool IsMergeOperatorSupported() const { return table_->IsMergeOperatorSupported(); } // return true if the current MemTableRep supports snapshots. // inplace update prevents snapshots, bool IsSnapshotSupported() const { return table_->IsSnapshotSupported() && !moptions_.inplace_update_support; } struct MemTableStats { uint64_t size; uint64_t count; }; MemTableStats ApproximateStats(const Slice& start_ikey, const Slice& end_ikey); // Get the lock associated for the key port::RWMutex* GetLock(const Slice& key); const InternalKeyComparator& GetInternalKeyComparator() const { return comparator_.comparator; } const ImmutableMemTableOptions* GetImmutableMemTableOptions() const { return &moptions_; } uint64_t ApproximateOldestKeyTime() const { return oldest_key_time_.load(std::memory_order_relaxed); } // REQUIRES: db_mutex held. void SetID(uint64_t id) { id_ = id; } uint64_t GetID() const { return id_; } void SetFlushCompleted(bool completed) { flush_completed_ = completed; } uint64_t GetFileNumber() const { return file_number_; } void SetFileNumber(uint64_t file_num) { file_number_ = file_num; } void SetFlushInProgress(bool in_progress) { flush_in_progress_ = in_progress; } #ifndef ROCKSDB_LITE void SetFlushJobInfo(std::unique_ptr&& info) { flush_job_info_ = std::move(info); } std::unique_ptr ReleaseFlushJobInfo() { return std::move(flush_job_info_); } #endif // !ROCKSDB_LITE private: enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED }; friend class MemTableIterator; friend class MemTableBackwardIterator; friend class MemTableList; KeyComparator comparator_; const ImmutableMemTableOptions moptions_; int refs_; const size_t kArenaBlockSize; AllocTracker mem_tracker_; ConcurrentArena arena_; std::unique_ptr table_; std::unique_ptr range_del_table_; std::atomic_bool is_range_del_table_empty_; // Total data size of all data inserted std::atomic data_size_; std::atomic num_entries_; std::atomic num_deletes_; // Dynamically changeable memtable option std::atomic write_buffer_size_; // These are used to manage memtable flushes to storage bool flush_in_progress_; // started the flush bool flush_completed_; // finished the flush uint64_t file_number_; // filled up after flush is complete // The updates to be applied to the transaction log when this // memtable is flushed to storage. VersionEdit edit_; // The sequence number of the kv that was inserted first std::atomic first_seqno_; // The db sequence number at the time of creation or kMaxSequenceNumber // if not set. std::atomic earliest_seqno_; SequenceNumber creation_seq_; // The log files earlier than this number can be deleted. uint64_t mem_next_logfile_number_; // the earliest log containing a prepared section // which has been inserted into this memtable. std::atomic min_prep_log_referenced_; // rw locks for inplace updates std::vector locks_; const SliceTransform* const prefix_extractor_; std::unique_ptr bloom_filter_; std::atomic flush_state_; Env* env_; // Extract sequential insert prefixes. const SliceTransform* insert_with_hint_prefix_extractor_; // Insert hints for each prefix. std::unordered_map insert_hints_; // Timestamp of oldest key std::atomic oldest_key_time_; // Memtable id to track flush. uint64_t id_ = 0; // Sequence number of the atomic flush that is responsible for this memtable. // The sequence number of atomic flush is a seq, such that no writes with // sequence numbers greater than or equal to seq are flushed, while all // writes with sequence number smaller than seq are flushed. SequenceNumber atomic_flush_seqno_; // keep track of memory usage in table_, arena_, and range_del_table_. // Gets refrshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow` std::atomic approximate_memory_usage_; #ifndef ROCKSDB_LITE // Flush job info of the current memtable. std::unique_ptr flush_job_info_; #endif // !ROCKSDB_LITE // Returns a heuristic flush decision bool ShouldFlushNow(); // Updates flush_state_ using ShouldFlushNow() void UpdateFlushState(); void UpdateOldestKeyTime(); void GetFromTable(const LookupKey& key, SequenceNumber max_covering_tombstone_seq, bool do_merge, ReadCallback* callback, bool* is_blob_index, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* seq, bool* found_final_value, bool* merge_in_progress); }; extern const char* EncodeKey(std::string* scratch, const Slice& target); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/memtable_list.cc000066400000000000000000000671361370372246700167770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "db/memtable_list.h" #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/memtable.h" #include "db/range_tombstone_fragmenter.h" #include "db/version_set.h" #include "logging/log_buffer.h" #include "monitoring/thread_status_util.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "table/merging_iterator.h" #include "test_util/sync_point.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { class InternalKeyComparator; class Mutex; class VersionSet; void MemTableListVersion::AddMemTable(MemTable* m) { memlist_.push_front(m); *parent_memtable_list_memory_usage_ += m->ApproximateMemoryUsage(); } void MemTableListVersion::UnrefMemTable(autovector* to_delete, MemTable* m) { if (m->Unref()) { to_delete->push_back(m); assert(*parent_memtable_list_memory_usage_ >= m->ApproximateMemoryUsage()); *parent_memtable_list_memory_usage_ -= m->ApproximateMemoryUsage(); } } MemTableListVersion::MemTableListVersion( size_t* parent_memtable_list_memory_usage, const MemTableListVersion& old) : max_write_buffer_number_to_maintain_( old.max_write_buffer_number_to_maintain_), max_write_buffer_size_to_maintain_( old.max_write_buffer_size_to_maintain_), parent_memtable_list_memory_usage_(parent_memtable_list_memory_usage) { memlist_ = old.memlist_; for (auto& m : memlist_) { m->Ref(); } memlist_history_ = old.memlist_history_; for (auto& m : memlist_history_) { m->Ref(); } } MemTableListVersion::MemTableListVersion( size_t* parent_memtable_list_memory_usage, int max_write_buffer_number_to_maintain, int64_t max_write_buffer_size_to_maintain) : max_write_buffer_number_to_maintain_(max_write_buffer_number_to_maintain), max_write_buffer_size_to_maintain_(max_write_buffer_size_to_maintain), parent_memtable_list_memory_usage_(parent_memtable_list_memory_usage) {} void MemTableListVersion::Ref() { ++refs_; } // called by superversion::clean() void MemTableListVersion::Unref(autovector* to_delete) { assert(refs_ >= 1); --refs_; if (refs_ == 0) { // if to_delete is equal to nullptr it means we're confident // that refs_ will not be zero assert(to_delete != nullptr); for (const auto& m : memlist_) { UnrefMemTable(to_delete, m); } for (const auto& m : memlist_history_) { UnrefMemTable(to_delete, m); } delete this; } } int MemTableList::NumNotFlushed() const { int size = static_cast(current_->memlist_.size()); assert(num_flush_not_started_ <= size); return size; } int MemTableList::NumFlushed() const { return static_cast(current_->memlist_history_.size()); } // Search all the memtables starting from the most recent one. // Return the most recent value found, if any. // Operands stores the list of merge operations to apply, so far. bool MemTableListVersion::Get(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback, bool* is_blob_index) { return GetFromList(&memlist_, key, value, timestamp, s, merge_context, max_covering_tombstone_seq, seq, read_opts, callback, is_blob_index); } void MemTableListVersion::MultiGet(const ReadOptions& read_options, MultiGetRange* range, ReadCallback* callback, bool* is_blob) { for (auto memtable : memlist_) { memtable->MultiGet(read_options, range, callback, is_blob); if (range->empty()) { return; } } } bool MemTableListVersion::GetMergeOperands( const LookupKey& key, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts) { for (MemTable* memtable : memlist_) { bool done = memtable->Get(key, /*value*/ nullptr, /*timestamp*/ nullptr, s, merge_context, max_covering_tombstone_seq, read_opts, nullptr, nullptr, false); if (done) { return true; } } return false; } bool MemTableListVersion::GetFromHistory( const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, bool* is_blob_index) { return GetFromList(&memlist_history_, key, value, timestamp, s, merge_context, max_covering_tombstone_seq, seq, read_opts, nullptr /*read_callback*/, is_blob_index); } bool MemTableListVersion::GetFromList( std::list* list, const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback, bool* is_blob_index) { *seq = kMaxSequenceNumber; for (auto& memtable : *list) { SequenceNumber current_seq = kMaxSequenceNumber; bool done = memtable->Get(key, value, timestamp, s, merge_context, max_covering_tombstone_seq, ¤t_seq, read_opts, callback, is_blob_index); if (*seq == kMaxSequenceNumber) { // Store the most recent sequence number of any operation on this key. // Since we only care about the most recent change, we only need to // return the first operation found when searching memtables in // reverse-chronological order. // current_seq would be equal to kMaxSequenceNumber if the value was to be // skipped. This allows seq to be assigned again when the next value is // read. *seq = current_seq; } if (done) { assert(*seq != kMaxSequenceNumber || s->IsNotFound()); return true; } if (!done && !s->ok() && !s->IsMergeInProgress() && !s->IsNotFound()) { return false; } } return false; } Status MemTableListVersion::AddRangeTombstoneIterators( const ReadOptions& read_opts, Arena* /*arena*/, RangeDelAggregator* range_del_agg) { assert(range_del_agg != nullptr); // Except for snapshot read, using kMaxSequenceNumber is OK because these // are immutable memtables. SequenceNumber read_seq = read_opts.snapshot != nullptr ? read_opts.snapshot->GetSequenceNumber() : kMaxSequenceNumber; for (auto& m : memlist_) { std::unique_ptr range_del_iter( m->NewRangeTombstoneIterator(read_opts, read_seq)); range_del_agg->AddTombstones(std::move(range_del_iter)); } return Status::OK(); } void MemTableListVersion::AddIterators( const ReadOptions& options, std::vector* iterator_list, Arena* arena) { for (auto& m : memlist_) { iterator_list->push_back(m->NewIterator(options, arena)); } } void MemTableListVersion::AddIterators( const ReadOptions& options, MergeIteratorBuilder* merge_iter_builder) { for (auto& m : memlist_) { merge_iter_builder->AddIterator( m->NewIterator(options, merge_iter_builder->GetArena())); } } uint64_t MemTableListVersion::GetTotalNumEntries() const { uint64_t total_num = 0; for (auto& m : memlist_) { total_num += m->num_entries(); } return total_num; } MemTable::MemTableStats MemTableListVersion::ApproximateStats( const Slice& start_ikey, const Slice& end_ikey) { MemTable::MemTableStats total_stats = {0, 0}; for (auto& m : memlist_) { auto mStats = m->ApproximateStats(start_ikey, end_ikey); total_stats.size += mStats.size; total_stats.count += mStats.count; } return total_stats; } uint64_t MemTableListVersion::GetTotalNumDeletes() const { uint64_t total_num = 0; for (auto& m : memlist_) { total_num += m->num_deletes(); } return total_num; } SequenceNumber MemTableListVersion::GetEarliestSequenceNumber( bool include_history) const { if (include_history && !memlist_history_.empty()) { return memlist_history_.back()->GetEarliestSequenceNumber(); } else if (!memlist_.empty()) { return memlist_.back()->GetEarliestSequenceNumber(); } else { return kMaxSequenceNumber; } } // caller is responsible for referencing m void MemTableListVersion::Add(MemTable* m, autovector* to_delete) { assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable AddMemTable(m); TrimHistory(to_delete, m->ApproximateMemoryUsage()); } // Removes m from list of memtables not flushed. Caller should NOT Unref m. void MemTableListVersion::Remove(MemTable* m, autovector* to_delete) { assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable memlist_.remove(m); m->MarkFlushed(); if (max_write_buffer_size_to_maintain_ > 0 || max_write_buffer_number_to_maintain_ > 0) { memlist_history_.push_front(m); // Unable to get size of mutable memtable at this point, pass 0 to // TrimHistory as a best effort. TrimHistory(to_delete, 0); } else { UnrefMemTable(to_delete, m); } } // return the total memory usage assuming the oldest flushed memtable is dropped size_t MemTableListVersion::ApproximateMemoryUsageExcludingLast() const { size_t total_memtable_size = 0; for (auto& memtable : memlist_) { total_memtable_size += memtable->ApproximateMemoryUsage(); } for (auto& memtable : memlist_history_) { total_memtable_size += memtable->ApproximateMemoryUsage(); } if (!memlist_history_.empty()) { total_memtable_size -= memlist_history_.back()->ApproximateMemoryUsage(); } return total_memtable_size; } bool MemTableListVersion::MemtableLimitExceeded(size_t usage) { if (max_write_buffer_size_to_maintain_ > 0) { // calculate the total memory usage after dropping the oldest flushed // memtable, compare with max_write_buffer_size_to_maintain_ to decide // whether to trim history return ApproximateMemoryUsageExcludingLast() + usage >= static_cast(max_write_buffer_size_to_maintain_); } else if (max_write_buffer_number_to_maintain_ > 0) { return memlist_.size() + memlist_history_.size() > static_cast(max_write_buffer_number_to_maintain_); } else { return false; } } // Make sure we don't use up too much space in history void MemTableListVersion::TrimHistory(autovector* to_delete, size_t usage) { while (MemtableLimitExceeded(usage) && !memlist_history_.empty()) { MemTable* x = memlist_history_.back(); memlist_history_.pop_back(); UnrefMemTable(to_delete, x); } } // Returns true if there is at least one memtable on which flush has // not yet started. bool MemTableList::IsFlushPending() const { if ((flush_requested_ && num_flush_not_started_ > 0) || (num_flush_not_started_ >= min_write_buffer_number_to_merge_)) { assert(imm_flush_needed.load(std::memory_order_relaxed)); return true; } return false; } // Returns the memtables that need to be flushed. void MemTableList::PickMemtablesToFlush(const uint64_t* max_memtable_id, autovector* ret) { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_PICK_MEMTABLES_TO_FLUSH); const auto& memlist = current_->memlist_; bool atomic_flush = false; for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) { MemTable* m = *it; if (!atomic_flush && m->atomic_flush_seqno_ != kMaxSequenceNumber) { atomic_flush = true; } if (max_memtable_id != nullptr && m->GetID() > *max_memtable_id) { break; } if (!m->flush_in_progress_) { assert(!m->flush_completed_); num_flush_not_started_--; if (num_flush_not_started_ == 0) { imm_flush_needed.store(false, std::memory_order_release); } m->flush_in_progress_ = true; // flushing will start very soon ret->push_back(m); } } if (!atomic_flush || num_flush_not_started_ == 0) { flush_requested_ = false; // start-flush request is complete } } void MemTableList::RollbackMemtableFlush(const autovector& mems, uint64_t /*file_number*/) { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_MEMTABLE_ROLLBACK); assert(!mems.empty()); // If the flush was not successful, then just reset state. // Maybe a succeeding attempt to flush will be successful. for (MemTable* m : mems) { assert(m->flush_in_progress_); assert(m->file_number_ == 0); m->flush_in_progress_ = false; m->flush_completed_ = false; m->edit_.Clear(); num_flush_not_started_++; } imm_flush_needed.store(true, std::memory_order_release); } // Try record a successful flush in the manifest file. It might just return // Status::OK letting a concurrent flush to do actual the recording.. Status MemTableList::TryInstallMemtableFlushResults( ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, const autovector& mems, LogsWithPrepTracker* prep_tracker, VersionSet* vset, InstrumentedMutex* mu, uint64_t file_number, autovector* to_delete, FSDirectory* db_directory, LogBuffer* log_buffer, std::list>* committed_flush_jobs_info, IOStatus* io_s) { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS); mu->AssertHeld(); // Flush was successful // Record the status on the memtable object. Either this call or a call by a // concurrent flush thread will read the status and write it to manifest. for (size_t i = 0; i < mems.size(); ++i) { // All the edits are associated with the first memtable of this batch. assert(i == 0 || mems[i]->GetEdits()->NumEntries() == 0); mems[i]->flush_completed_ = true; mems[i]->file_number_ = file_number; } // if some other thread is already committing, then return Status s; if (commit_in_progress_) { TEST_SYNC_POINT("MemTableList::TryInstallMemtableFlushResults:InProgress"); return s; } // Only a single thread can be executing this piece of code commit_in_progress_ = true; // Retry until all completed flushes are committed. New flushes can finish // while the current thread is writing manifest where mutex is released. while (s.ok()) { auto& memlist = current_->memlist_; // The back is the oldest; if flush_completed_ is not set to it, it means // that we were assigned a more recent memtable. The memtables' flushes must // be recorded in manifest in order. A concurrent flush thread, who is // assigned to flush the oldest memtable, will later wake up and does all // the pending writes to manifest, in order. if (memlist.empty() || !memlist.back()->flush_completed_) { break; } // scan all memtables from the earliest, and commit those // (in that order) that have finished flushing. Memtables // are always committed in the order that they were created. uint64_t batch_file_number = 0; size_t batch_count = 0; autovector edit_list; autovector memtables_to_flush; // enumerate from the last (earliest) element to see how many batch finished for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) { MemTable* m = *it; if (!m->flush_completed_) { break; } if (it == memlist.rbegin() || batch_file_number != m->file_number_) { batch_file_number = m->file_number_; ROCKS_LOG_BUFFER(log_buffer, "[%s] Level-0 commit table #%" PRIu64 " started", cfd->GetName().c_str(), m->file_number_); edit_list.push_back(&m->edit_); memtables_to_flush.push_back(m); #ifndef ROCKSDB_LITE std::unique_ptr info = m->ReleaseFlushJobInfo(); if (info != nullptr) { committed_flush_jobs_info->push_back(std::move(info)); } #else (void)committed_flush_jobs_info; #endif // !ROCKSDB_LITE } batch_count++; } // TODO(myabandeh): Not sure how batch_count could be 0 here. if (batch_count > 0) { if (vset->db_options()->allow_2pc) { assert(edit_list.size() > 0); // We piggyback the information of earliest log file to keep in the // manifest entry for the last file flushed. edit_list.back()->SetMinLogNumberToKeep(PrecomputeMinLogNumberToKeep( vset, *cfd, edit_list, memtables_to_flush, prep_tracker)); } // this can release and reacquire the mutex. s = vset->LogAndApply(cfd, mutable_cf_options, edit_list, mu, db_directory); *io_s = vset->io_status(); // we will be changing the version in the next code path, // so we better create a new one, since versions are immutable InstallNewVersion(); // All the later memtables that have the same filenum // are part of the same batch. They can be committed now. uint64_t mem_id = 1; // how many memtables have been flushed. // commit new state only if the column family is NOT dropped. // The reason is as follows (refer to // ColumnFamilyTest.FlushAndDropRaceCondition). // If the column family is dropped, then according to LogAndApply, its // corresponding flush operation is NOT written to the MANIFEST. This // means the DB is not aware of the L0 files generated from the flush. // By committing the new state, we remove the memtable from the memtable // list. Creating an iterator on this column family will not be able to // read full data since the memtable is removed, and the DB is not aware // of the L0 files, causing MergingIterator unable to build child // iterators. RocksDB contract requires that the iterator can be created // on a dropped column family, and we must be able to // read full data as long as column family handle is not deleted, even if // the column family is dropped. if (s.ok() && !cfd->IsDropped()) { // commit new state while (batch_count-- > 0) { MemTable* m = current_->memlist_.back(); ROCKS_LOG_BUFFER(log_buffer, "[%s] Level-0 commit table #%" PRIu64 ": memtable #%" PRIu64 " done", cfd->GetName().c_str(), m->file_number_, mem_id); assert(m->file_number_ > 0); current_->Remove(m, to_delete); UpdateCachedValuesFromMemTableListVersion(); ResetTrimHistoryNeeded(); ++mem_id; } } else { for (auto it = current_->memlist_.rbegin(); batch_count-- > 0; ++it) { MemTable* m = *it; // commit failed. setup state so that we can flush again. ROCKS_LOG_BUFFER(log_buffer, "Level-0 commit table #%" PRIu64 ": memtable #%" PRIu64 " failed", m->file_number_, mem_id); m->flush_completed_ = false; m->flush_in_progress_ = false; m->edit_.Clear(); num_flush_not_started_++; m->file_number_ = 0; imm_flush_needed.store(true, std::memory_order_release); ++mem_id; } } } } commit_in_progress_ = false; return s; } // New memtables are inserted at the front of the list. void MemTableList::Add(MemTable* m, autovector* to_delete) { assert(static_cast(current_->memlist_.size()) >= num_flush_not_started_); InstallNewVersion(); // this method is used to move mutable memtable into an immutable list. // since mutable memtable is already refcounted by the DBImpl, // and when moving to the imutable list we don't unref it, // we don't have to ref the memtable here. we just take over the // reference from the DBImpl. current_->Add(m, to_delete); m->MarkImmutable(); num_flush_not_started_++; if (num_flush_not_started_ == 1) { imm_flush_needed.store(true, std::memory_order_release); } UpdateCachedValuesFromMemTableListVersion(); ResetTrimHistoryNeeded(); } void MemTableList::TrimHistory(autovector* to_delete, size_t usage) { InstallNewVersion(); current_->TrimHistory(to_delete, usage); UpdateCachedValuesFromMemTableListVersion(); ResetTrimHistoryNeeded(); } // Returns an estimate of the number of bytes of data in use. size_t MemTableList::ApproximateUnflushedMemTablesMemoryUsage() { size_t total_size = 0; for (auto& memtable : current_->memlist_) { total_size += memtable->ApproximateMemoryUsage(); } return total_size; } size_t MemTableList::ApproximateMemoryUsage() { return current_memory_usage_; } size_t MemTableList::ApproximateMemoryUsageExcludingLast() const { const size_t usage = current_memory_usage_excluding_last_.load(std::memory_order_relaxed); return usage; } bool MemTableList::HasHistory() const { const bool has_history = current_has_history_.load(std::memory_order_relaxed); return has_history; } void MemTableList::UpdateCachedValuesFromMemTableListVersion() { const size_t total_memtable_size = current_->ApproximateMemoryUsageExcludingLast(); current_memory_usage_excluding_last_.store(total_memtable_size, std::memory_order_relaxed); const bool has_history = current_->HasHistory(); current_has_history_.store(has_history, std::memory_order_relaxed); } uint64_t MemTableList::ApproximateOldestKeyTime() const { if (!current_->memlist_.empty()) { return current_->memlist_.back()->ApproximateOldestKeyTime(); } return std::numeric_limits::max(); } void MemTableList::InstallNewVersion() { if (current_->refs_ == 1) { // we're the only one using the version, just keep using it } else { // somebody else holds the current version, we need to create new one MemTableListVersion* version = current_; current_ = new MemTableListVersion(¤t_memory_usage_, *version); current_->Ref(); version->Unref(); } } uint64_t MemTableList::PrecomputeMinLogContainingPrepSection( const autovector& memtables_to_flush) { uint64_t min_log = 0; for (auto& m : current_->memlist_) { // Assume the list is very short, we can live with O(m*n). We can optimize // if the performance has some problem. bool should_skip = false; for (MemTable* m_to_flush : memtables_to_flush) { if (m == m_to_flush) { should_skip = true; break; } } if (should_skip) { continue; } auto log = m->GetMinLogContainingPrepSection(); if (log > 0 && (min_log == 0 || log < min_log)) { min_log = log; } } return min_log; } // Commit a successful atomic flush in the manifest file. Status InstallMemtableAtomicFlushResults( const autovector* imm_lists, const autovector& cfds, const autovector& mutable_cf_options_list, const autovector*>& mems_list, VersionSet* vset, InstrumentedMutex* mu, const autovector& file_metas, autovector* to_delete, FSDirectory* db_directory, LogBuffer* log_buffer) { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS); mu->AssertHeld(); size_t num = mems_list.size(); assert(cfds.size() == num); if (imm_lists != nullptr) { assert(imm_lists->size() == num); } for (size_t k = 0; k != num; ++k) { #ifndef NDEBUG const auto* imm = (imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k); if (!mems_list[k]->empty()) { assert((*mems_list[k])[0]->GetID() == imm->GetEarliestMemTableID()); } #endif assert(nullptr != file_metas[k]); for (size_t i = 0; i != mems_list[k]->size(); ++i) { assert(i == 0 || (*mems_list[k])[i]->GetEdits()->NumEntries() == 0); (*mems_list[k])[i]->SetFlushCompleted(true); (*mems_list[k])[i]->SetFileNumber(file_metas[k]->fd.GetNumber()); } } Status s; autovector> edit_lists; uint32_t num_entries = 0; for (const auto mems : mems_list) { assert(mems != nullptr); autovector edits; assert(!mems->empty()); edits.emplace_back((*mems)[0]->GetEdits()); ++num_entries; edit_lists.emplace_back(edits); } // Mark the version edits as an atomic group if the number of version edits // exceeds 1. if (cfds.size() > 1) { for (auto& edits : edit_lists) { assert(edits.size() == 1); edits[0]->MarkAtomicGroup(--num_entries); } assert(0 == num_entries); } // this can release and reacquire the mutex. s = vset->LogAndApply(cfds, mutable_cf_options_list, edit_lists, mu, db_directory); for (size_t k = 0; k != cfds.size(); ++k) { auto* imm = (imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k); imm->InstallNewVersion(); } if (s.ok() || s.IsColumnFamilyDropped()) { for (size_t i = 0; i != cfds.size(); ++i) { if (cfds[i]->IsDropped()) { continue; } auto* imm = (imm_lists == nullptr) ? cfds[i]->imm() : imm_lists->at(i); for (auto m : *mems_list[i]) { assert(m->GetFileNumber() > 0); uint64_t mem_id = m->GetID(); ROCKS_LOG_BUFFER(log_buffer, "[%s] Level-0 commit table #%" PRIu64 ": memtable #%" PRIu64 " done", cfds[i]->GetName().c_str(), m->GetFileNumber(), mem_id); imm->current_->Remove(m, to_delete); imm->UpdateCachedValuesFromMemTableListVersion(); imm->ResetTrimHistoryNeeded(); } } } else { for (size_t i = 0; i != cfds.size(); ++i) { auto* imm = (imm_lists == nullptr) ? cfds[i]->imm() : imm_lists->at(i); for (auto m : *mems_list[i]) { uint64_t mem_id = m->GetID(); ROCKS_LOG_BUFFER(log_buffer, "[%s] Level-0 commit table #%" PRIu64 ": memtable #%" PRIu64 " failed", cfds[i]->GetName().c_str(), m->GetFileNumber(), mem_id); m->SetFlushCompleted(false); m->SetFlushInProgress(false); m->GetEdits()->Clear(); m->SetFileNumber(0); imm->num_flush_not_started_++; } imm->imm_flush_needed.store(true, std::memory_order_release); } } return s; } void MemTableList::RemoveOldMemTables(uint64_t log_number, autovector* to_delete) { assert(to_delete != nullptr); InstallNewVersion(); auto& memlist = current_->memlist_; autovector old_memtables; for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) { MemTable* mem = *it; if (mem->GetNextLogNumber() > log_number) { break; } old_memtables.push_back(mem); } for (auto it = old_memtables.begin(); it != old_memtables.end(); ++it) { MemTable* mem = *it; current_->Remove(mem, to_delete); --num_flush_not_started_; if (0 == num_flush_not_started_) { imm_flush_needed.store(false, std::memory_order_release); } } UpdateCachedValuesFromMemTableListVersion(); ResetTrimHistoryNeeded(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/memtable_list.h000066400000000000000000000416141370372246700166320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include #include #include #include #include #include "db/dbformat.h" #include "db/logs_with_prep_tracker.h" #include "db/memtable.h" #include "db/range_del_aggregator.h" #include "file/filename.h" #include "logging/log_buffer.h" #include "monitoring/instrumented_mutex.h" #include "rocksdb/db.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "rocksdb/types.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class ColumnFamilyData; class InternalKeyComparator; class InstrumentedMutex; class MergeIteratorBuilder; class MemTableList; struct FlushJobInfo; // keeps a list of immutable memtables in a vector. the list is immutable // if refcount is bigger than one. It is used as a state for Get() and // Iterator code paths // // This class is not thread-safe. External synchronization is required // (such as holding the db mutex or being on the write thread). class MemTableListVersion { public: explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, const MemTableListVersion& old); explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, int max_write_buffer_number_to_maintain, int64_t max_write_buffer_size_to_maintain); void Ref(); void Unref(autovector* to_delete = nullptr); // Search all the memtables starting from the most recent one. // Return the most recent value found, if any. // // If any operation was found for this key, its most recent sequence number // will be stored in *seq on success (regardless of whether true/false is // returned). Otherwise, *seq will be set to kMaxSequenceNumber. bool Get(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr); bool Get(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr) { SequenceNumber seq; return Get(key, value, timestamp, s, merge_context, max_covering_tombstone_seq, &seq, read_opts, callback, is_blob_index); } void MultiGet(const ReadOptions& read_options, MultiGetRange* range, ReadCallback* callback, bool* is_blob); // Returns all the merge operands corresponding to the key by searching all // memtables starting from the most recent one. bool GetMergeOperands(const LookupKey& key, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts); // Similar to Get(), but searches the Memtable history of memtables that // have already been flushed. Should only be used from in-memory only // queries (such as Transaction validation) as the history may contain // writes that are also present in the SST files. bool GetFromHistory(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, bool* is_blob_index = nullptr); bool GetFromHistory(const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, const ReadOptions& read_opts, bool* is_blob_index = nullptr) { SequenceNumber seq; return GetFromHistory(key, value, timestamp, s, merge_context, max_covering_tombstone_seq, &seq, read_opts, is_blob_index); } Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena, RangeDelAggregator* range_del_agg); void AddIterators(const ReadOptions& options, std::vector* iterator_list, Arena* arena); void AddIterators(const ReadOptions& options, MergeIteratorBuilder* merge_iter_builder); uint64_t GetTotalNumEntries() const; uint64_t GetTotalNumDeletes() const; MemTable::MemTableStats ApproximateStats(const Slice& start_ikey, const Slice& end_ikey); // Returns the value of MemTable::GetEarliestSequenceNumber() on the most // recent MemTable in this list or kMaxSequenceNumber if the list is empty. // If include_history=true, will also search Memtables in MemTableList // History. SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const; private: friend class MemTableList; friend Status InstallMemtableAtomicFlushResults( const autovector* imm_lists, const autovector& cfds, const autovector& mutable_cf_options_list, const autovector*>& mems_list, VersionSet* vset, InstrumentedMutex* mu, const autovector& file_meta, autovector* to_delete, FSDirectory* db_directory, LogBuffer* log_buffer); // REQUIRE: m is an immutable memtable void Add(MemTable* m, autovector* to_delete); // REQUIRE: m is an immutable memtable void Remove(MemTable* m, autovector* to_delete); void TrimHistory(autovector* to_delete, size_t usage); bool GetFromList(std::list* list, const LookupKey& key, std::string* value, std::string* timestamp, Status* s, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, SequenceNumber* seq, const ReadOptions& read_opts, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr); void AddMemTable(MemTable* m); void UnrefMemTable(autovector* to_delete, MemTable* m); // Calculate the total amount of memory used by memlist_ and memlist_history_ // excluding the last MemTable in memlist_history_. The reason for excluding // the last MemTable is to see if dropping the last MemTable will keep total // memory usage above or equal to max_write_buffer_size_to_maintain_ size_t ApproximateMemoryUsageExcludingLast() const; // Whether this version contains flushed memtables that are only kept around // for transaction conflict checking. bool HasHistory() const { return !memlist_history_.empty(); } bool MemtableLimitExceeded(size_t usage); // Immutable MemTables that have not yet been flushed. std::list memlist_; // MemTables that have already been flushed // (used during Transaction validation) std::list memlist_history_; // Maximum number of MemTables to keep in memory (including both flushed const int max_write_buffer_number_to_maintain_; // Maximum size of MemTables to keep in memory (including both flushed // and not-yet-flushed tables). const int64_t max_write_buffer_size_to_maintain_; int refs_ = 0; size_t* parent_memtable_list_memory_usage_; }; // This class stores references to all the immutable memtables. // The memtables are flushed to L0 as soon as possible and in // any order. If there are more than one immutable memtable, their // flushes can occur concurrently. However, they are 'committed' // to the manifest in FIFO order to maintain correctness and // recoverability from a crash. // // // Other than imm_flush_needed and imm_trim_needed, this class is not // thread-safe and requires external synchronization (such as holding the db // mutex or being on the write thread.) class MemTableList { public: // A list of memtables. explicit MemTableList(int min_write_buffer_number_to_merge, int max_write_buffer_number_to_maintain, int64_t max_write_buffer_size_to_maintain) : imm_flush_needed(false), imm_trim_needed(false), min_write_buffer_number_to_merge_(min_write_buffer_number_to_merge), current_(new MemTableListVersion(¤t_memory_usage_, max_write_buffer_number_to_maintain, max_write_buffer_size_to_maintain)), num_flush_not_started_(0), commit_in_progress_(false), flush_requested_(false), current_memory_usage_(0), current_memory_usage_excluding_last_(0), current_has_history_(false) { current_->Ref(); } // Should not delete MemTableList without making sure MemTableList::current() // is Unref()'d. ~MemTableList() {} MemTableListVersion* current() const { return current_; } // so that background threads can detect non-nullptr pointer to // determine whether there is anything more to start flushing. std::atomic imm_flush_needed; std::atomic imm_trim_needed; // Returns the total number of memtables in the list that haven't yet // been flushed and logged. int NumNotFlushed() const; // Returns total number of memtables in the list that have been // completely flushed and logged. int NumFlushed() const; // Returns true if there is at least one memtable on which flush has // not yet started. bool IsFlushPending() const; // Returns the earliest memtables that needs to be flushed. The returned // memtables are guaranteed to be in the ascending order of created time. void PickMemtablesToFlush(const uint64_t* max_memtable_id, autovector* mems); // Reset status of the given memtable list back to pending state so that // they can get picked up again on the next round of flush. void RollbackMemtableFlush(const autovector& mems, uint64_t file_number); // Try commit a successful flush in the manifest file. It might just return // Status::OK letting a concurrent flush to do the actual the recording. Status TryInstallMemtableFlushResults( ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, const autovector& m, LogsWithPrepTracker* prep_tracker, VersionSet* vset, InstrumentedMutex* mu, uint64_t file_number, autovector* to_delete, FSDirectory* db_directory, LogBuffer* log_buffer, std::list>* committed_flush_jobs_info, IOStatus* io_s); // New memtables are inserted at the front of the list. // Takes ownership of the referenced held on *m by the caller of Add(). void Add(MemTable* m, autovector* to_delete); // Returns an estimate of the number of bytes of data in use. size_t ApproximateMemoryUsage(); // Returns the cached current_memory_usage_excluding_last_ value. size_t ApproximateMemoryUsageExcludingLast() const; // Returns the cached current_has_history_ value. bool HasHistory() const; // Updates current_memory_usage_excluding_last_ and current_has_history_ // from MemTableListVersion. Must be called whenever InstallNewVersion is // called. void UpdateCachedValuesFromMemTableListVersion(); // `usage` is the current size of the mutable Memtable. When // max_write_buffer_size_to_maintain is used, total size of mutable and // immutable memtables is checked against it to decide whether to trim // memtable list. void TrimHistory(autovector* to_delete, size_t usage); // Returns an estimate of the number of bytes of data used by // the unflushed mem-tables. size_t ApproximateUnflushedMemTablesMemoryUsage(); // Returns an estimate of the timestamp of the earliest key. uint64_t ApproximateOldestKeyTime() const; // Request a flush of all existing memtables to storage. This will // cause future calls to IsFlushPending() to return true if this list is // non-empty (regardless of the min_write_buffer_number_to_merge // parameter). This flush request will persist until the next time // PickMemtablesToFlush() is called. void FlushRequested() { flush_requested_ = true; } bool HasFlushRequested() { return flush_requested_; } // Returns true if a trim history should be scheduled and the caller should // be the one to schedule it bool MarkTrimHistoryNeeded() { auto expected = false; return imm_trim_needed.compare_exchange_strong( expected, true, std::memory_order_relaxed, std::memory_order_relaxed); } void ResetTrimHistoryNeeded() { auto expected = true; imm_trim_needed.compare_exchange_strong( expected, false, std::memory_order_relaxed, std::memory_order_relaxed); } // Copying allowed // MemTableList(const MemTableList&); // void operator=(const MemTableList&); size_t* current_memory_usage() { return ¤t_memory_usage_; } // Returns the min log containing the prep section after memtables listsed in // `memtables_to_flush` are flushed and their status is persisted in manifest. uint64_t PrecomputeMinLogContainingPrepSection( const autovector& memtables_to_flush); uint64_t GetEarliestMemTableID() const { auto& memlist = current_->memlist_; if (memlist.empty()) { return std::numeric_limits::max(); } return memlist.back()->GetID(); } uint64_t GetLatestMemTableID() const { auto& memlist = current_->memlist_; if (memlist.empty()) { return 0; } return memlist.front()->GetID(); } void AssignAtomicFlushSeq(const SequenceNumber& seq) { const auto& memlist = current_->memlist_; // Scan the memtable list from new to old for (auto it = memlist.begin(); it != memlist.end(); ++it) { MemTable* mem = *it; if (mem->atomic_flush_seqno_ == kMaxSequenceNumber) { mem->atomic_flush_seqno_ = seq; } else { // Earlier memtables must have been assigned a atomic flush seq, no // need to continue scan. break; } } } // Used only by DBImplSecondary during log replay. // Remove memtables whose data were written before the WAL with log_number // was created, i.e. mem->GetNextLogNumber() <= log_number. The memtables are // not freed, but put into a vector for future deref and reclamation. void RemoveOldMemTables(uint64_t log_number, autovector* to_delete); private: friend Status InstallMemtableAtomicFlushResults( const autovector* imm_lists, const autovector& cfds, const autovector& mutable_cf_options_list, const autovector*>& mems_list, VersionSet* vset, InstrumentedMutex* mu, const autovector& file_meta, autovector* to_delete, FSDirectory* db_directory, LogBuffer* log_buffer); // DB mutex held void InstallNewVersion(); const int min_write_buffer_number_to_merge_; MemTableListVersion* current_; // the number of elements that still need flushing int num_flush_not_started_; // committing in progress bool commit_in_progress_; // Requested a flush of memtables to storage. It's possible to request that // a subset of memtables be flushed. bool flush_requested_; // The current memory usage. size_t current_memory_usage_; // Cached value of current_->ApproximateMemoryUsageExcludingLast(). std::atomic current_memory_usage_excluding_last_; // Cached value of current_->HasHistory(). std::atomic current_has_history_; }; // Installs memtable atomic flush results. // In most cases, imm_lists is nullptr, and the function simply uses the // immutable memtable lists associated with the cfds. There are unit tests that // installs flush results for external immutable memtable lists other than the // cfds' own immutable memtable lists, e.g. MemTableLIstTest. In this case, // imm_lists parameter is not nullptr. extern Status InstallMemtableAtomicFlushResults( const autovector* imm_lists, const autovector& cfds, const autovector& mutable_cf_options_list, const autovector*>& mems_list, VersionSet* vset, InstrumentedMutex* mu, const autovector& file_meta, autovector* to_delete, FSDirectory* db_directory, LogBuffer* log_buffer); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/memtable_list_test.cc000066400000000000000000001034131370372246700200230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/memtable_list.h" #include #include #include #include "db/merge_context.h" #include "db/version_set.h" #include "db/write_controller.h" #include "rocksdb/db.h" #include "rocksdb/status.h" #include "rocksdb/write_buffer_manager.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class MemTableListTest : public testing::Test { public: std::string dbname; DB* db; Options options; std::vector handles; std::atomic file_number; MemTableListTest() : db(nullptr), file_number(1) { dbname = test::PerThreadDBPath("memtable_list_test"); options.create_if_missing = true; DestroyDB(dbname, options); } // Create a test db if not yet created void CreateDB() { if (db == nullptr) { options.create_if_missing = true; DestroyDB(dbname, options); // Open DB only with default column family ColumnFamilyOptions cf_options; std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, cf_options); Status s = DB::Open(options, dbname, cf_descs, &handles, &db); EXPECT_OK(s); ColumnFamilyOptions cf_opt1, cf_opt2; cf_opt1.cf_paths.emplace_back(dbname + "_one_1", std::numeric_limits::max()); cf_opt2.cf_paths.emplace_back(dbname + "_two_1", std::numeric_limits::max()); int sz = static_cast(handles.size()); handles.resize(sz + 2); s = db->CreateColumnFamily(cf_opt1, "one", &handles[1]); EXPECT_OK(s); s = db->CreateColumnFamily(cf_opt2, "two", &handles[2]); EXPECT_OK(s); cf_descs.emplace_back("one", cf_options); cf_descs.emplace_back("two", cf_options); } } ~MemTableListTest() override { if (db) { std::vector cf_descs(handles.size()); for (int i = 0; i != static_cast(handles.size()); ++i) { handles[i]->GetDescriptor(&cf_descs[i]); } for (auto h : handles) { if (h) { db->DestroyColumnFamilyHandle(h); } } handles.clear(); delete db; db = nullptr; DestroyDB(dbname, options, cf_descs); } } // Calls MemTableList::TryInstallMemtableFlushResults() and sets up all // structures needed to call this function. Status Mock_InstallMemtableFlushResults( MemTableList* list, const MutableCFOptions& mutable_cf_options, const autovector& m, autovector* to_delete) { // Create a mock Logger test::NullLogger logger; LogBuffer log_buffer(DEBUG_LEVEL, &logger); CreateDB(); // Create a mock VersionSet DBOptions db_options; ImmutableDBOptions immutable_db_options(db_options); EnvOptions env_options; std::shared_ptr table_cache(NewLRUCache(50000, 16)); WriteBufferManager write_buffer_manager(db_options.db_write_buffer_size); WriteController write_controller(10000000u); VersionSet versions(dbname, &immutable_db_options, env_options, table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr); std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); cf_descs.emplace_back("one", ColumnFamilyOptions()); cf_descs.emplace_back("two", ColumnFamilyOptions()); EXPECT_OK(versions.Recover(cf_descs, false)); // Create mock default ColumnFamilyData auto column_family_set = versions.GetColumnFamilySet(); LogsWithPrepTracker dummy_prep_tracker; auto cfd = column_family_set->GetDefault(); EXPECT_TRUE(nullptr != cfd); uint64_t file_num = file_number.fetch_add(1); IOStatus io_s; // Create dummy mutex. InstrumentedMutex mutex; InstrumentedMutexLock l(&mutex); std::list> flush_jobs_info; Status s = list->TryInstallMemtableFlushResults( cfd, mutable_cf_options, m, &dummy_prep_tracker, &versions, &mutex, file_num, to_delete, nullptr, &log_buffer, &flush_jobs_info, &io_s); return s; } // Calls MemTableList::InstallMemtableFlushResults() and sets up all // structures needed to call this function. Status Mock_InstallMemtableAtomicFlushResults( autovector& lists, const autovector& cf_ids, const autovector& mutable_cf_options_list, const autovector*>& mems_list, autovector* to_delete) { // Create a mock Logger test::NullLogger logger; LogBuffer log_buffer(DEBUG_LEVEL, &logger); CreateDB(); // Create a mock VersionSet DBOptions db_options; ImmutableDBOptions immutable_db_options(db_options); EnvOptions env_options; std::shared_ptr table_cache(NewLRUCache(50000, 16)); WriteBufferManager write_buffer_manager(db_options.db_write_buffer_size); WriteController write_controller(10000000u); VersionSet versions(dbname, &immutable_db_options, env_options, table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr); std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); cf_descs.emplace_back("one", ColumnFamilyOptions()); cf_descs.emplace_back("two", ColumnFamilyOptions()); EXPECT_OK(versions.Recover(cf_descs, false)); // Create mock default ColumnFamilyData auto column_family_set = versions.GetColumnFamilySet(); LogsWithPrepTracker dummy_prep_tracker; autovector cfds; for (int i = 0; i != static_cast(cf_ids.size()); ++i) { cfds.emplace_back(column_family_set->GetColumnFamily(cf_ids[i])); EXPECT_NE(nullptr, cfds[i]); } std::vector file_metas; file_metas.reserve(cf_ids.size()); for (size_t i = 0; i != cf_ids.size(); ++i) { FileMetaData meta; uint64_t file_num = file_number.fetch_add(1); meta.fd = FileDescriptor(file_num, 0, 0); file_metas.emplace_back(meta); } autovector file_meta_ptrs; for (auto& meta : file_metas) { file_meta_ptrs.push_back(&meta); } InstrumentedMutex mutex; InstrumentedMutexLock l(&mutex); return InstallMemtableAtomicFlushResults( &lists, cfds, mutable_cf_options_list, mems_list, &versions, &mutex, file_meta_ptrs, to_delete, nullptr, &log_buffer); } }; TEST_F(MemTableListTest, Empty) { // Create an empty MemTableList and validate basic functions. MemTableList list(1, 0, 0); ASSERT_EQ(0, list.NumNotFlushed()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); ASSERT_FALSE(list.IsFlushPending()); autovector mems; list.PickMemtablesToFlush(nullptr /* memtable_id */, &mems); ASSERT_EQ(0, mems.size()); autovector to_delete; list.current()->Unref(&to_delete); ASSERT_EQ(0, to_delete.size()); } TEST_F(MemTableListTest, GetTest) { // Create MemTableList int min_write_buffer_number_to_merge = 2; int max_write_buffer_number_to_maintain = 0; int64_t max_write_buffer_size_to_maintain = 0; MemTableList list(min_write_buffer_number_to_merge, max_write_buffer_number_to_maintain, max_write_buffer_size_to_maintain); SequenceNumber seq = 1; std::string value; Status s; MergeContext merge_context; InternalKeyComparator ikey_cmp(options.comparator); SequenceNumber max_covering_tombstone_seq = 0; autovector to_delete; LookupKey lkey("key1", seq); bool found = list.current()->Get( lkey, &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); // Create a MemTable InternalKeyComparator cmp(BytewiseComparator()); auto factory = std::make_shared(); options.memtable_factory = factory; ImmutableCFOptions ioptions(options); WriteBufferManager wb(options.db_write_buffer_size); MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); mem->Ref(); // Write some keys to this memtable. mem->Add(++seq, kTypeDeletion, "key1", ""); mem->Add(++seq, kTypeValue, "key2", "value2"); mem->Add(++seq, kTypeValue, "key1", "value1"); mem->Add(++seq, kTypeValue, "key2", "value2.2"); // Fetch the newly written keys merge_context.Clear(); found = mem->Get(LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(s.ok() && found); ASSERT_EQ(value, "value1"); merge_context.Clear(); found = mem->Get(LookupKey("key1", 2), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); // MemTable found out that this key is *not* found (at this sequence#) ASSERT_TRUE(found && s.IsNotFound()); merge_context.Clear(); found = mem->Get(LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(s.ok() && found); ASSERT_EQ(value, "value2.2"); ASSERT_EQ(4, mem->num_entries()); ASSERT_EQ(1, mem->num_deletes()); // Add memtable to list list.Add(mem, &to_delete); SequenceNumber saved_seq = seq; // Create another memtable and write some keys to it WriteBufferManager wb2(options.db_write_buffer_size); MemTable* mem2 = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb2, kMaxSequenceNumber, 0 /* column_family_id */); mem2->Ref(); mem2->Add(++seq, kTypeDeletion, "key1", ""); mem2->Add(++seq, kTypeValue, "key2", "value2.3"); // Add second memtable to list list.Add(mem2, &to_delete); // Fetch keys via MemTableList merge_context.Clear(); found = list.current()->Get( LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(found && s.IsNotFound()); merge_context.Clear(); found = list.current()->Get( LookupKey("key1", saved_seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(s.ok() && found); ASSERT_EQ("value1", value); merge_context.Clear(); found = list.current()->Get( LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(s.ok() && found); ASSERT_EQ(value, "value2.3"); merge_context.Clear(); found = list.current()->Get( LookupKey("key2", 1), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); ASSERT_EQ(2, list.NumNotFlushed()); list.current()->Unref(&to_delete); for (MemTable* m : to_delete) { delete m; } } TEST_F(MemTableListTest, GetFromHistoryTest) { // Create MemTableList int min_write_buffer_number_to_merge = 2; int max_write_buffer_number_to_maintain = 2; int64_t max_write_buffer_size_to_maintain = 2000; MemTableList list(min_write_buffer_number_to_merge, max_write_buffer_number_to_maintain, max_write_buffer_size_to_maintain); SequenceNumber seq = 1; std::string value; Status s; MergeContext merge_context; InternalKeyComparator ikey_cmp(options.comparator); SequenceNumber max_covering_tombstone_seq = 0; autovector to_delete; LookupKey lkey("key1", seq); bool found = list.current()->Get( lkey, &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); // Create a MemTable InternalKeyComparator cmp(BytewiseComparator()); auto factory = std::make_shared(); options.memtable_factory = factory; ImmutableCFOptions ioptions(options); WriteBufferManager wb(options.db_write_buffer_size); MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); mem->Ref(); // Write some keys to this memtable. mem->Add(++seq, kTypeDeletion, "key1", ""); mem->Add(++seq, kTypeValue, "key2", "value2"); mem->Add(++seq, kTypeValue, "key2", "value2.2"); // Fetch the newly written keys merge_context.Clear(); found = mem->Get(LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); // MemTable found out that this key is *not* found (at this sequence#) ASSERT_TRUE(found && s.IsNotFound()); merge_context.Clear(); found = mem->Get(LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(s.ok() && found); ASSERT_EQ(value, "value2.2"); // Add memtable to list list.Add(mem, &to_delete); ASSERT_EQ(0, to_delete.size()); // Fetch keys via MemTableList merge_context.Clear(); found = list.current()->Get(LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(found && s.IsNotFound()); merge_context.Clear(); found = list.current()->Get(LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(s.ok() && found); ASSERT_EQ("value2.2", value); // Flush this memtable from the list. // (It will then be a part of the memtable history). autovector to_flush; list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush); ASSERT_EQ(1, to_flush.size()); MutableCFOptions mutable_cf_options(options); s = Mock_InstallMemtableFlushResults(&list, mutable_cf_options, to_flush, &to_delete); ASSERT_OK(s); ASSERT_EQ(0, list.NumNotFlushed()); ASSERT_EQ(1, list.NumFlushed()); ASSERT_EQ(0, to_delete.size()); // Verify keys are no longer in MemTableList merge_context.Clear(); found = list.current()->Get(LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); merge_context.Clear(); found = list.current()->Get(LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); // Verify keys are present in history merge_context.Clear(); found = list.current()->GetFromHistory( LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(found && s.IsNotFound()); merge_context.Clear(); found = list.current()->GetFromHistory( LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(found); ASSERT_EQ("value2.2", value); // Create another memtable and write some keys to it WriteBufferManager wb2(options.db_write_buffer_size); MemTable* mem2 = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb2, kMaxSequenceNumber, 0 /* column_family_id */); mem2->Ref(); mem2->Add(++seq, kTypeDeletion, "key1", ""); mem2->Add(++seq, kTypeValue, "key3", "value3"); // Add second memtable to list list.Add(mem2, &to_delete); ASSERT_EQ(0, to_delete.size()); to_flush.clear(); list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush); ASSERT_EQ(1, to_flush.size()); // Flush second memtable s = Mock_InstallMemtableFlushResults(&list, mutable_cf_options, to_flush, &to_delete); ASSERT_OK(s); ASSERT_EQ(0, list.NumNotFlushed()); ASSERT_EQ(2, list.NumFlushed()); ASSERT_EQ(0, to_delete.size()); // Add a third memtable to push the first memtable out of the history WriteBufferManager wb3(options.db_write_buffer_size); MemTable* mem3 = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb3, kMaxSequenceNumber, 0 /* column_family_id */); mem3->Ref(); list.Add(mem3, &to_delete); ASSERT_EQ(1, list.NumNotFlushed()); ASSERT_EQ(1, list.NumFlushed()); ASSERT_EQ(1, to_delete.size()); // Verify keys are no longer in MemTableList merge_context.Clear(); found = list.current()->Get(LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); merge_context.Clear(); found = list.current()->Get(LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); merge_context.Clear(); found = list.current()->Get(LookupKey("key3", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); // Verify that the second memtable's keys are in the history merge_context.Clear(); found = list.current()->GetFromHistory( LookupKey("key1", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(found && s.IsNotFound()); merge_context.Clear(); found = list.current()->GetFromHistory( LookupKey("key3", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_TRUE(found); ASSERT_EQ("value3", value); // Verify that key2 from the first memtable is no longer in the history merge_context.Clear(); found = list.current()->Get(LookupKey("key2", seq), &value, /*timestamp*/nullptr, &s, &merge_context, &max_covering_tombstone_seq, ReadOptions()); ASSERT_FALSE(found); // Cleanup list.current()->Unref(&to_delete); ASSERT_EQ(3, to_delete.size()); for (MemTable* m : to_delete) { delete m; } } TEST_F(MemTableListTest, FlushPendingTest) { const int num_tables = 6; SequenceNumber seq = 1; Status s; auto factory = std::make_shared(); options.memtable_factory = factory; ImmutableCFOptions ioptions(options); InternalKeyComparator cmp(BytewiseComparator()); WriteBufferManager wb(options.db_write_buffer_size); autovector to_delete; // Create MemTableList int min_write_buffer_number_to_merge = 3; int max_write_buffer_number_to_maintain = 7; int64_t max_write_buffer_size_to_maintain = 7 * static_cast(options.write_buffer_size); MemTableList list(min_write_buffer_number_to_merge, max_write_buffer_number_to_maintain, max_write_buffer_size_to_maintain); // Create some MemTables uint64_t memtable_id = 0; std::vector tables; MutableCFOptions mutable_cf_options(options); for (int i = 0; i < num_tables; i++) { MemTable* mem = new MemTable(cmp, ioptions, mutable_cf_options, &wb, kMaxSequenceNumber, 0 /* column_family_id */); mem->SetID(memtable_id++); mem->Ref(); std::string value; MergeContext merge_context; mem->Add(++seq, kTypeValue, "key1", ToString(i)); mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN"); mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value"); mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM"); mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), ""); tables.push_back(mem); } // Nothing to flush ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); autovector to_flush; list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush); ASSERT_EQ(0, to_flush.size()); // Request a flush even though there is nothing to flush list.FlushRequested(); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Attempt to 'flush' to clear request for flush list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush); ASSERT_EQ(0, to_flush.size()); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Request a flush again list.FlushRequested(); // No flush pending since the list is empty. ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Add 2 tables list.Add(tables[0], &to_delete); list.Add(tables[1], &to_delete); ASSERT_EQ(2, list.NumNotFlushed()); ASSERT_EQ(0, to_delete.size()); // Even though we have less than the minimum to flush, a flush is // pending since we had previously requested a flush and never called // PickMemtablesToFlush() to clear the flush. ASSERT_TRUE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); // Pick tables to flush list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush); ASSERT_EQ(2, to_flush.size()); ASSERT_EQ(2, list.NumNotFlushed()); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Revert flush list.RollbackMemtableFlush(to_flush, 0); ASSERT_FALSE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); to_flush.clear(); // Add another table list.Add(tables[2], &to_delete); // We now have the minimum to flush regardles of whether FlushRequested() // was called. ASSERT_TRUE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); ASSERT_EQ(0, to_delete.size()); // Pick tables to flush list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush); ASSERT_EQ(3, to_flush.size()); ASSERT_EQ(3, list.NumNotFlushed()); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Pick tables to flush again autovector to_flush2; list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush2); ASSERT_EQ(0, to_flush2.size()); ASSERT_EQ(3, list.NumNotFlushed()); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Add another table list.Add(tables[3], &to_delete); ASSERT_FALSE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); ASSERT_EQ(0, to_delete.size()); // Request a flush again list.FlushRequested(); ASSERT_TRUE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); // Pick tables to flush again list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush2); ASSERT_EQ(1, to_flush2.size()); ASSERT_EQ(4, list.NumNotFlushed()); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Rollback first pick of tables list.RollbackMemtableFlush(to_flush, 0); ASSERT_TRUE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); to_flush.clear(); // Add another tables list.Add(tables[4], &to_delete); ASSERT_EQ(5, list.NumNotFlushed()); // We now have the minimum to flush regardles of whether FlushRequested() ASSERT_TRUE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); ASSERT_EQ(0, to_delete.size()); // Pick tables to flush list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush); // Should pick 4 of 5 since 1 table has been picked in to_flush2 ASSERT_EQ(4, to_flush.size()); ASSERT_EQ(5, list.NumNotFlushed()); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Pick tables to flush again autovector to_flush3; list.PickMemtablesToFlush(nullptr /* memtable_id */, &to_flush3); ASSERT_EQ(0, to_flush3.size()); // nothing not in progress of being flushed ASSERT_EQ(5, list.NumNotFlushed()); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Flush the 4 memtables that were picked in to_flush s = Mock_InstallMemtableFlushResults(&list, mutable_cf_options, to_flush, &to_delete); ASSERT_OK(s); // Note: now to_flush contains tables[0,1,2,4]. to_flush2 contains // tables[3]. // Current implementation will only commit memtables in the order they were // created. So TryInstallMemtableFlushResults will install the first 3 tables // in to_flush and stop when it encounters a table not yet flushed. ASSERT_EQ(2, list.NumNotFlushed()); int num_in_history = std::min(3, static_cast(max_write_buffer_size_to_maintain) / static_cast(options.write_buffer_size)); ASSERT_EQ(num_in_history, list.NumFlushed()); ASSERT_EQ(5 - list.NumNotFlushed() - num_in_history, to_delete.size()); // Request a flush again. Should be nothing to flush list.FlushRequested(); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Flush the 1 memtable that was picked in to_flush2 s = MemTableListTest::Mock_InstallMemtableFlushResults( &list, mutable_cf_options, to_flush2, &to_delete); ASSERT_OK(s); // This will actually install 2 tables. The 1 we told it to flush, and also // tables[4] which has been waiting for tables[3] to commit. ASSERT_EQ(0, list.NumNotFlushed()); num_in_history = std::min(5, static_cast(max_write_buffer_size_to_maintain) / static_cast(options.write_buffer_size)); ASSERT_EQ(num_in_history, list.NumFlushed()); ASSERT_EQ(5 - list.NumNotFlushed() - num_in_history, to_delete.size()); for (const auto& m : to_delete) { // Refcount should be 0 after calling TryInstallMemtableFlushResults. // Verify this, by Ref'ing then UnRef'ing: m->Ref(); ASSERT_EQ(m, m->Unref()); delete m; } to_delete.clear(); // Add another table list.Add(tables[5], &to_delete); ASSERT_EQ(1, list.NumNotFlushed()); ASSERT_EQ(5, list.GetLatestMemTableID()); memtable_id = 4; // Pick tables to flush. The tables to pick must have ID smaller than or // equal to 4. Therefore, no table will be selected in this case. autovector to_flush4; list.FlushRequested(); ASSERT_TRUE(list.HasFlushRequested()); list.PickMemtablesToFlush(&memtable_id, &to_flush4); ASSERT_TRUE(to_flush4.empty()); ASSERT_EQ(1, list.NumNotFlushed()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); ASSERT_FALSE(list.IsFlushPending()); ASSERT_FALSE(list.HasFlushRequested()); // Pick tables to flush. The tables to pick must have ID smaller than or // equal to 5. Therefore, only tables[5] will be selected. memtable_id = 5; list.FlushRequested(); list.PickMemtablesToFlush(&memtable_id, &to_flush4); ASSERT_EQ(1, static_cast(to_flush4.size())); ASSERT_EQ(1, list.NumNotFlushed()); ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); ASSERT_FALSE(list.IsFlushPending()); to_delete.clear(); list.current()->Unref(&to_delete); int to_delete_size = std::min(num_tables, static_cast(max_write_buffer_size_to_maintain) / static_cast(options.write_buffer_size)); ASSERT_EQ(to_delete_size, to_delete.size()); for (const auto& m : to_delete) { // Refcount should be 0 after calling TryInstallMemtableFlushResults. // Verify this, by Ref'ing then UnRef'ing: m->Ref(); ASSERT_EQ(m, m->Unref()); delete m; } to_delete.clear(); } TEST_F(MemTableListTest, EmptyAtomicFlusTest) { autovector lists; autovector cf_ids; autovector options_list; autovector*> to_flush; autovector to_delete; Status s = Mock_InstallMemtableAtomicFlushResults(lists, cf_ids, options_list, to_flush, &to_delete); ASSERT_OK(s); ASSERT_TRUE(to_delete.empty()); } TEST_F(MemTableListTest, AtomicFlusTest) { const int num_cfs = 3; const int num_tables_per_cf = 2; SequenceNumber seq = 1; auto factory = std::make_shared(); options.memtable_factory = factory; ImmutableCFOptions ioptions(options); InternalKeyComparator cmp(BytewiseComparator()); WriteBufferManager wb(options.db_write_buffer_size); // Create MemTableLists int min_write_buffer_number_to_merge = 3; int max_write_buffer_number_to_maintain = 7; int64_t max_write_buffer_size_to_maintain = 7 * static_cast(options.write_buffer_size); autovector lists; for (int i = 0; i != num_cfs; ++i) { lists.emplace_back(new MemTableList(min_write_buffer_number_to_merge, max_write_buffer_number_to_maintain, max_write_buffer_size_to_maintain)); } autovector cf_ids; std::vector> tables(num_cfs); autovector mutable_cf_options_list; uint32_t cf_id = 0; for (auto& elem : tables) { mutable_cf_options_list.emplace_back(new MutableCFOptions(options)); uint64_t memtable_id = 0; for (int i = 0; i != num_tables_per_cf; ++i) { MemTable* mem = new MemTable(cmp, ioptions, *(mutable_cf_options_list.back()), &wb, kMaxSequenceNumber, cf_id); mem->SetID(memtable_id++); mem->Ref(); std::string value; mem->Add(++seq, kTypeValue, "key1", ToString(i)); mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN"); mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value"); mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM"); mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), ""); elem.push_back(mem); } cf_ids.push_back(cf_id++); } std::vector> flush_candidates(num_cfs); // Nothing to flush for (auto i = 0; i != num_cfs; ++i) { auto* list = lists[i]; ASSERT_FALSE(list->IsFlushPending()); ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire)); list->PickMemtablesToFlush(nullptr /* memtable_id */, &flush_candidates[i]); ASSERT_EQ(0, flush_candidates[i].size()); } // Request flush even though there is nothing to flush for (auto i = 0; i != num_cfs; ++i) { auto* list = lists[i]; list->FlushRequested(); ASSERT_FALSE(list->IsFlushPending()); ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire)); } autovector to_delete; // Add tables to the immutable memtalbe lists associated with column families for (auto i = 0; i != num_cfs; ++i) { for (auto j = 0; j != num_tables_per_cf; ++j) { lists[i]->Add(tables[i][j], &to_delete); } ASSERT_EQ(num_tables_per_cf, lists[i]->NumNotFlushed()); ASSERT_TRUE(lists[i]->IsFlushPending()); ASSERT_TRUE(lists[i]->imm_flush_needed.load(std::memory_order_acquire)); } std::vector flush_memtable_ids = {1, 1, 0}; // +----+ // list[0]: |0 1| // list[1]: |0 1| // | +--+ // list[2]: |0| 1 // +-+ // Pick memtables to flush for (auto i = 0; i != num_cfs; ++i) { flush_candidates[i].clear(); lists[i]->PickMemtablesToFlush(&flush_memtable_ids[i], &flush_candidates[i]); ASSERT_EQ(flush_memtable_ids[i] - 0 + 1, static_cast(flush_candidates[i].size())); } autovector tmp_lists; autovector tmp_cf_ids; autovector tmp_options_list; autovector*> to_flush; for (auto i = 0; i != num_cfs; ++i) { if (!flush_candidates[i].empty()) { to_flush.push_back(&flush_candidates[i]); tmp_lists.push_back(lists[i]); tmp_cf_ids.push_back(i); tmp_options_list.push_back(mutable_cf_options_list[i]); } } Status s = Mock_InstallMemtableAtomicFlushResults( tmp_lists, tmp_cf_ids, tmp_options_list, to_flush, &to_delete); ASSERT_OK(s); for (auto i = 0; i != num_cfs; ++i) { for (auto j = 0; j != num_tables_per_cf; ++j) { if (static_cast(j) <= flush_memtable_ids[i]) { ASSERT_LT(0, tables[i][j]->GetFileNumber()); } } ASSERT_EQ( static_cast(num_tables_per_cf) - flush_candidates[i].size(), lists[i]->NumNotFlushed()); } to_delete.clear(); for (auto list : lists) { list->current()->Unref(&to_delete); delete list; } for (auto& mutable_cf_options : mutable_cf_options_list) { if (mutable_cf_options != nullptr) { delete mutable_cf_options; mutable_cf_options = nullptr; } } // All memtables in tables array must have been flushed, thus ready to be // deleted. ASSERT_EQ(to_delete.size(), tables.size() * tables.front().size()); for (const auto& m : to_delete) { // Refcount should be 0 after calling InstallMemtableFlushResults. // Verify this by Ref'ing and then Unref'ing. m->Ref(); ASSERT_EQ(m, m->Unref()); delete m; } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/merge_context.h000066400000000000000000000073301370372246700166510ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include #include #include #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { const std::vector empty_operand_list; // The merge context for merging a user key. // When doing a Get(), DB will create such a class and pass it when // issuing Get() operation to memtables and version_set. The operands // will be fetched from the context when issuing partial of full merge. class MergeContext { public: // Clear all the operands void Clear() { if (operand_list_) { operand_list_->clear(); copied_operands_->clear(); } } // Push a merge operand void PushOperand(const Slice& operand_slice, bool operand_pinned = false) { Initialize(); SetDirectionBackward(); if (operand_pinned) { operand_list_->push_back(operand_slice); } else { // We need to have our own copy of the operand since it's not pinned copied_operands_->emplace_back( new std::string(operand_slice.data(), operand_slice.size())); operand_list_->push_back(*copied_operands_->back()); } } // Push back a merge operand void PushOperandBack(const Slice& operand_slice, bool operand_pinned = false) { Initialize(); SetDirectionForward(); if (operand_pinned) { operand_list_->push_back(operand_slice); } else { // We need to have our own copy of the operand since it's not pinned copied_operands_->emplace_back( new std::string(operand_slice.data(), operand_slice.size())); operand_list_->push_back(*copied_operands_->back()); } } // return total number of operands in the list size_t GetNumOperands() const { if (!operand_list_) { return 0; } return operand_list_->size(); } // Get the operand at the index. Slice GetOperand(int index) { assert(operand_list_); SetDirectionForward(); return (*operand_list_)[index]; } // Same as GetOperandsDirectionForward const std::vector& GetOperands() { return GetOperandsDirectionForward(); } // Return all the operands in the order as they were merged (passed to // FullMerge or FullMergeV2) const std::vector& GetOperandsDirectionForward() { if (!operand_list_) { return empty_operand_list; } SetDirectionForward(); return *operand_list_; } // Return all the operands in the reversed order relative to how they were // merged (passed to FullMerge or FullMergeV2) const std::vector& GetOperandsDirectionBackward() { if (!operand_list_) { return empty_operand_list; } SetDirectionBackward(); return *operand_list_; } private: void Initialize() { if (!operand_list_) { operand_list_.reset(new std::vector()); copied_operands_.reset(new std::vector>()); } } void SetDirectionForward() { if (operands_reversed_ == true) { std::reverse(operand_list_->begin(), operand_list_->end()); operands_reversed_ = false; } } void SetDirectionBackward() { if (operands_reversed_ == false) { std::reverse(operand_list_->begin(), operand_list_->end()); operands_reversed_ = true; } } // List of operands std::unique_ptr> operand_list_; // Copy of operands that are not pinned. std::unique_ptr>> copied_operands_; bool operands_reversed_ = true; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/merge_helper.cc000066400000000000000000000404561370372246700166100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/merge_helper.h" #include #include "db/dbformat.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" #include "port/likely.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/merge_operator.h" #include "table/format.h" #include "table/internal_iterator.h" namespace ROCKSDB_NAMESPACE { MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator, const MergeOperator* user_merge_operator, const CompactionFilter* compaction_filter, Logger* logger, bool assert_valid_internal_key, SequenceNumber latest_snapshot, const SnapshotChecker* snapshot_checker, int level, Statistics* stats, const std::atomic* shutting_down) : env_(env), user_comparator_(user_comparator), user_merge_operator_(user_merge_operator), compaction_filter_(compaction_filter), shutting_down_(shutting_down), logger_(logger), assert_valid_internal_key_(assert_valid_internal_key), allow_single_operand_(false), latest_snapshot_(latest_snapshot), snapshot_checker_(snapshot_checker), level_(level), keys_(), filter_timer_(env_), total_filter_time_(0U), stats_(stats) { assert(user_comparator_ != nullptr); if (user_merge_operator_) { allow_single_operand_ = user_merge_operator_->AllowSingleOperand(); } } Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator, const Slice& key, const Slice* value, const std::vector& operands, std::string* result, Logger* logger, Statistics* statistics, Env* env, Slice* result_operand, bool update_num_ops_stats) { assert(merge_operator != nullptr); if (operands.size() == 0) { assert(value != nullptr && result != nullptr); result->assign(value->data(), value->size()); return Status::OK(); } if (update_num_ops_stats) { RecordInHistogram(statistics, READ_NUM_MERGE_OPERANDS, static_cast(operands.size())); } bool success; Slice tmp_result_operand(nullptr, 0); const MergeOperator::MergeOperationInput merge_in(key, value, operands, logger); MergeOperator::MergeOperationOutput merge_out(*result, tmp_result_operand); { // Setup to time the merge StopWatchNano timer(env, statistics != nullptr); PERF_TIMER_GUARD(merge_operator_time_nanos); // Do the merge success = merge_operator->FullMergeV2(merge_in, &merge_out); if (tmp_result_operand.data()) { // FullMergeV2 result is an existing operand if (result_operand != nullptr) { *result_operand = tmp_result_operand; } else { result->assign(tmp_result_operand.data(), tmp_result_operand.size()); } } else if (result_operand) { *result_operand = Slice(nullptr, 0); } RecordTick(statistics, MERGE_OPERATION_TOTAL_TIME, statistics ? timer.ElapsedNanos() : 0); } if (!success) { RecordTick(statistics, NUMBER_MERGE_FAILURES); return Status::Corruption("Error: Could not perform merge."); } return Status::OK(); } // PRE: iter points to the first merge type entry // POST: iter points to the first entry beyond the merge process (or the end) // keys_, operands_ are updated to reflect the merge result. // keys_ stores the list of keys encountered while merging. // operands_ stores the list of merge operands encountered while merging. // keys_[i] corresponds to operands_[i] for each i. // // TODO: Avoid the snapshot stripe map lookup in CompactionRangeDelAggregator // and just pass the StripeRep corresponding to the stripe being merged. Status MergeHelper::MergeUntil(InternalIterator* iter, CompactionRangeDelAggregator* range_del_agg, const SequenceNumber stop_before, const bool at_bottom) { // Get a copy of the internal key, before it's invalidated by iter->Next() // Also maintain the list of merge operands seen. assert(HasOperator()); keys_.clear(); merge_context_.Clear(); has_compaction_filter_skip_until_ = false; assert(user_merge_operator_); bool first_key = true; // We need to parse the internal key again as the parsed key is // backed by the internal key! // Assume no internal key corruption as it has been successfully parsed // by the caller. // original_key_is_iter variable is just caching the information: // original_key_is_iter == (iter->key().ToString() == original_key) bool original_key_is_iter = true; std::string original_key = iter->key().ToString(); // Important: // orig_ikey is backed by original_key if keys_.empty() // orig_ikey is backed by keys_.back() if !keys_.empty() ParsedInternalKey orig_ikey; bool succ = ParseInternalKey(original_key, &orig_ikey); assert(succ); if (!succ) { return Status::Corruption("Cannot parse key in MergeUntil"); } Status s; bool hit_the_next_user_key = false; for (; iter->Valid(); iter->Next(), original_key_is_iter = false) { if (IsShuttingDown()) { return Status::ShutdownInProgress(); } ParsedInternalKey ikey; assert(keys_.size() == merge_context_.GetNumOperands()); if (!ParseInternalKey(iter->key(), &ikey)) { // stop at corrupted key if (assert_valid_internal_key_) { assert(!"Corrupted internal key not expected."); return Status::Corruption("Corrupted internal key not expected."); } break; } else if (first_key) { assert(user_comparator_->Equal(ikey.user_key, orig_ikey.user_key)); first_key = false; } else if (!user_comparator_->Equal(ikey.user_key, orig_ikey.user_key)) { // hit a different user key, stop right here hit_the_next_user_key = true; break; } else if (stop_before > 0 && ikey.sequence <= stop_before && LIKELY(snapshot_checker_ == nullptr || snapshot_checker_->CheckInSnapshot(ikey.sequence, stop_before) != SnapshotCheckerResult::kNotInSnapshot)) { // hit an entry that's possibly visible by the previous snapshot, can't // touch that break; } // At this point we are guaranteed that we need to process this key. assert(IsValueType(ikey.type)); if (ikey.type != kTypeMerge) { // hit a put/delete/single delete // => merge the put value or a nullptr with operands_ // => store result in operands_.back() (and update keys_.back()) // => change the entry type to kTypeValue for keys_.back() // We are done! Success! // If there are no operands, just return the Status::OK(). That will cause // the compaction iterator to write out the key we're currently at, which // is the put/delete we just encountered. if (keys_.empty()) { return Status::OK(); } // TODO(noetzli) If the merge operator returns false, we are currently // (almost) silently dropping the put/delete. That's probably not what we // want. Also if we're in compaction and it's a put, it would be nice to // run compaction filter on it. const Slice val = iter->value(); const Slice* val_ptr; if (kTypeValue == ikey.type && (range_del_agg == nullptr || !range_del_agg->ShouldDelete( ikey, RangeDelPositioningMode::kForwardTraversal))) { val_ptr = &val; } else { val_ptr = nullptr; } std::string merge_result; s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr, merge_context_.GetOperands(), &merge_result, logger_, stats_, env_); // We store the result in keys_.back() and operands_.back() // if nothing went wrong (i.e.: no operand corruption on disk) if (s.ok()) { // The original key encountered original_key = std::move(keys_.back()); orig_ikey.type = kTypeValue; UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type); keys_.clear(); merge_context_.Clear(); keys_.emplace_front(std::move(original_key)); merge_context_.PushOperand(merge_result); } // move iter to the next entry iter->Next(); return s; } else { // hit a merge // => if there is a compaction filter, apply it. // => check for range tombstones covering the operand // => merge the operand into the front of the operands_ list // if not filtered // => then continue because we haven't yet seen a Put/Delete. // // Keep queuing keys and operands until we either meet a put / delete // request or later did a partial merge. Slice value_slice = iter->value(); // add an operand to the list if: // 1) it's included in one of the snapshots. in that case we *must* write // it out, no matter what compaction filter says // 2) it's not filtered by a compaction filter CompactionFilter::Decision filter = ikey.sequence <= latest_snapshot_ ? CompactionFilter::Decision::kKeep : FilterMerge(orig_ikey.user_key, value_slice); if (filter != CompactionFilter::Decision::kRemoveAndSkipUntil && range_del_agg != nullptr && range_del_agg->ShouldDelete( iter->key(), RangeDelPositioningMode::kForwardTraversal)) { filter = CompactionFilter::Decision::kRemove; } if (filter == CompactionFilter::Decision::kKeep || filter == CompactionFilter::Decision::kChangeValue) { if (original_key_is_iter) { // this is just an optimization that saves us one memcpy keys_.push_front(std::move(original_key)); } else { keys_.push_front(iter->key().ToString()); } if (keys_.size() == 1) { // we need to re-anchor the orig_ikey because it was anchored by // original_key before ParseInternalKey(keys_.back(), &orig_ikey); } if (filter == CompactionFilter::Decision::kKeep) { merge_context_.PushOperand( value_slice, iter->IsValuePinned() /* operand_pinned */); } else { // kChangeValue // Compaction filter asked us to change the operand from value_slice // to compaction_filter_value_. merge_context_.PushOperand(compaction_filter_value_, false); } } else if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil) { // Compaction filter asked us to remove this key altogether // (not just this operand), along with some keys following it. keys_.clear(); merge_context_.Clear(); has_compaction_filter_skip_until_ = true; return Status::OK(); } } } if (merge_context_.GetNumOperands() == 0) { // we filtered out all the merge operands return Status::OK(); } // We are sure we have seen this key's entire history if: // at_bottom == true (this does not necessarily mean it is the bottommost // layer, but rather that we are confident the key does not appear on any of // the lower layers, at_bottom == false doesn't mean it does appear, just // that we can't be sure, see Compaction::IsBottommostLevel for details) // AND // we have either encountered another key or end of key history on this // layer. // // When these conditions are true we are able to merge all the keys // using full merge. // // For these cases we are not sure about, we simply miss the opportunity // to combine the keys. Since VersionSet::SetupOtherInputs() always makes // sure that all merge-operands on the same level get compacted together, // this will simply lead to these merge operands moving to the next level. bool surely_seen_the_beginning = (hit_the_next_user_key || !iter->Valid()) && at_bottom; if (surely_seen_the_beginning) { // do a final merge with nullptr as the existing value and say // bye to the merge type (it's now converted to a Put) assert(kTypeMerge == orig_ikey.type); assert(merge_context_.GetNumOperands() >= 1); assert(merge_context_.GetNumOperands() == keys_.size()); std::string merge_result; s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr, merge_context_.GetOperands(), &merge_result, logger_, stats_, env_); if (s.ok()) { // The original key encountered // We are certain that keys_ is not empty here (see assertions couple of // lines before). original_key = std::move(keys_.back()); orig_ikey.type = kTypeValue; UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type); keys_.clear(); merge_context_.Clear(); keys_.emplace_front(std::move(original_key)); merge_context_.PushOperand(merge_result); } } else { // We haven't seen the beginning of the key nor a Put/Delete. // Attempt to use the user's associative merge function to // merge the stacked merge operands into a single operand. s = Status::MergeInProgress(); if (merge_context_.GetNumOperands() >= 2 || (allow_single_operand_ && merge_context_.GetNumOperands() == 1)) { bool merge_success = false; std::string merge_result; { StopWatchNano timer(env_, stats_ != nullptr); PERF_TIMER_GUARD(merge_operator_time_nanos); merge_success = user_merge_operator_->PartialMergeMulti( orig_ikey.user_key, std::deque(merge_context_.GetOperands().begin(), merge_context_.GetOperands().end()), &merge_result, logger_); RecordTick(stats_, MERGE_OPERATION_TOTAL_TIME, stats_ ? timer.ElapsedNanosSafe() : 0); } if (merge_success) { // Merging of operands (associative merge) was successful. // Replace operands with the merge result merge_context_.Clear(); merge_context_.PushOperand(merge_result); keys_.erase(keys_.begin(), keys_.end() - 1); } } } return s; } MergeOutputIterator::MergeOutputIterator(const MergeHelper* merge_helper) : merge_helper_(merge_helper) { it_keys_ = merge_helper_->keys().rend(); it_values_ = merge_helper_->values().rend(); } void MergeOutputIterator::SeekToFirst() { const auto& keys = merge_helper_->keys(); const auto& values = merge_helper_->values(); assert(keys.size() == values.size()); it_keys_ = keys.rbegin(); it_values_ = values.rbegin(); } void MergeOutputIterator::Next() { ++it_keys_; ++it_values_; } CompactionFilter::Decision MergeHelper::FilterMerge(const Slice& user_key, const Slice& value_slice) { if (compaction_filter_ == nullptr) { return CompactionFilter::Decision::kKeep; } if (stats_ != nullptr && ShouldReportDetailedTime(env_, stats_)) { filter_timer_.Start(); } compaction_filter_value_.clear(); compaction_filter_skip_until_.Clear(); auto ret = compaction_filter_->FilterV2( level_, user_key, CompactionFilter::ValueType::kMergeOperand, value_slice, &compaction_filter_value_, compaction_filter_skip_until_.rep()); if (ret == CompactionFilter::Decision::kRemoveAndSkipUntil) { if (user_comparator_->Compare(*compaction_filter_skip_until_.rep(), user_key) <= 0) { // Invalid skip_until returned from compaction filter. // Keep the key as per FilterV2 documentation. ret = CompactionFilter::Decision::kKeep; } else { compaction_filter_skip_until_.ConvertFromUserKey(kMaxSequenceNumber, kValueTypeForSeek); } } total_filter_time_ += filter_timer_.ElapsedNanosSafe(); return ret; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/merge_helper.h000066400000000000000000000176731370372246700164570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include #include #include "db/dbformat.h" #include "db/merge_context.h" #include "db/range_del_aggregator.h" #include "db/snapshot_checker.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { class Comparator; class Iterator; class Logger; class MergeOperator; class Statistics; class MergeHelper { public: MergeHelper(Env* env, const Comparator* user_comparator, const MergeOperator* user_merge_operator, const CompactionFilter* compaction_filter, Logger* logger, bool assert_valid_internal_key, SequenceNumber latest_snapshot, const SnapshotChecker* snapshot_checker = nullptr, int level = 0, Statistics* stats = nullptr, const std::atomic* shutting_down = nullptr); // Wrapper around MergeOperator::FullMergeV2() that records perf statistics. // Result of merge will be written to result if status returned is OK. // If operands is empty, the value will simply be copied to result. // Set `update_num_ops_stats` to true if it is from a user read, so that // the latency is sensitive. // Returns one of the following statuses: // - OK: Entries were successfully merged. // - Corruption: Merge operator reported unsuccessful merge. static Status TimedFullMerge(const MergeOperator* merge_operator, const Slice& key, const Slice* value, const std::vector& operands, std::string* result, Logger* logger, Statistics* statistics, Env* env, Slice* result_operand = nullptr, bool update_num_ops_stats = false); // Merge entries until we hit // - a corrupted key // - a Put/Delete, // - a different user key, // - a specific sequence number (snapshot boundary), // - REMOVE_AND_SKIP_UNTIL returned from compaction filter, // or - the end of iteration // iter: (IN) points to the first merge type entry // (OUT) points to the first entry not included in the merge process // range_del_agg: (IN) filters merge operands covered by range tombstones. // stop_before: (IN) a sequence number that merge should not cross. // 0 means no restriction // at_bottom: (IN) true if the iterator covers the bottem level, which means // we could reach the start of the history of this user key. // // Returns one of the following statuses: // - OK: Entries were successfully merged. // - MergeInProgress: Put/Delete not encountered, and didn't reach the start // of key's history. Output consists of merge operands only. // - Corruption: Merge operator reported unsuccessful merge or a corrupted // key has been encountered and not expected (applies only when compiling // with asserts removed). // - ShutdownInProgress: interrupted by shutdown (*shutting_down == true). // // REQUIRED: The first key in the input is not corrupted. Status MergeUntil(InternalIterator* iter, CompactionRangeDelAggregator* range_del_agg = nullptr, const SequenceNumber stop_before = 0, const bool at_bottom = false); // Filters a merge operand using the compaction filter specified // in the constructor. Returns the decision that the filter made. // Uses compaction_filter_value_ and compaction_filter_skip_until_ for the // optional outputs of compaction filter. CompactionFilter::Decision FilterMerge(const Slice& user_key, const Slice& value_slice); // Query the merge result // These are valid until the next MergeUntil call // If the merging was successful: // - keys() contains a single element with the latest sequence number of // the merges. The type will be Put or Merge. See IMPORTANT 1 note, below. // - values() contains a single element with the result of merging all the // operands together // // IMPORTANT 1: the key type could change after the MergeUntil call. // Put/Delete + Merge + ... + Merge => Put // Merge + ... + Merge => Merge // // If the merge operator is not associative, and if a Put/Delete is not found // then the merging will be unsuccessful. In this case: // - keys() contains the list of internal keys seen in order of iteration. // - values() contains the list of values (merges) seen in the same order. // values() is parallel to keys() so that the first entry in // keys() is the key associated with the first entry in values() // and so on. These lists will be the same length. // All of these pairs will be merges over the same user key. // See IMPORTANT 2 note below. // // IMPORTANT 2: The entries were traversed in order from BACK to FRONT. // So keys().back() was the first key seen by iterator. // TODO: Re-style this comment to be like the first one const std::deque& keys() const { return keys_; } const std::vector& values() const { return merge_context_.GetOperands(); } uint64_t TotalFilterTime() const { return total_filter_time_; } bool HasOperator() const { return user_merge_operator_ != nullptr; } // If compaction filter returned REMOVE_AND_SKIP_UNTIL, this method will // return true and fill *until with the key to which we should skip. // If true, keys() and values() are empty. bool FilteredUntil(Slice* skip_until) const { if (!has_compaction_filter_skip_until_) { return false; } assert(compaction_filter_ != nullptr); assert(skip_until != nullptr); assert(compaction_filter_skip_until_.Valid()); *skip_until = compaction_filter_skip_until_.Encode(); return true; } private: Env* env_; const Comparator* user_comparator_; const MergeOperator* user_merge_operator_; const CompactionFilter* compaction_filter_; const std::atomic* shutting_down_; Logger* logger_; bool assert_valid_internal_key_; // enforce no internal key corruption? bool allow_single_operand_; SequenceNumber latest_snapshot_; const SnapshotChecker* const snapshot_checker_; int level_; // the scratch area that holds the result of MergeUntil // valid up to the next MergeUntil call // Keeps track of the sequence of keys seen std::deque keys_; // Parallel with keys_; stores the operands mutable MergeContext merge_context_; StopWatchNano filter_timer_; uint64_t total_filter_time_; Statistics* stats_; bool has_compaction_filter_skip_until_ = false; std::string compaction_filter_value_; InternalKey compaction_filter_skip_until_; bool IsShuttingDown() { // This is a best-effort facility, so memory_order_relaxed is sufficient. return shutting_down_ && shutting_down_->load(std::memory_order_relaxed); } }; // MergeOutputIterator can be used to iterate over the result of a merge. class MergeOutputIterator { public: // The MergeOutputIterator is bound to a MergeHelper instance. explicit MergeOutputIterator(const MergeHelper* merge_helper); // Seeks to the first record in the output. void SeekToFirst(); // Advances to the next record in the output. void Next(); Slice key() { return Slice(*it_keys_); } Slice value() { return Slice(*it_values_); } bool Valid() { return it_keys_ != merge_helper_->keys().rend(); } private: const MergeHelper* merge_helper_; std::deque::const_reverse_iterator it_keys_; std::vector::const_reverse_iterator it_values_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/merge_helper_test.cc000066400000000000000000000263151370372246700176450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include "db/merge_helper.h" #include "rocksdb/comparator.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { class MergeHelperTest : public testing::Test { public: MergeHelperTest() { env_ = Env::Default(); } ~MergeHelperTest() override = default; Status Run(SequenceNumber stop_before, bool at_bottom, SequenceNumber latest_snapshot = 0) { iter_.reset(new test::VectorIterator(ks_, vs_)); iter_->SeekToFirst(); merge_helper_.reset(new MergeHelper(env_, BytewiseComparator(), merge_op_.get(), filter_.get(), nullptr, false, latest_snapshot)); return merge_helper_->MergeUntil(iter_.get(), nullptr /* range_del_agg */, stop_before, at_bottom); } void AddKeyVal(const std::string& user_key, const SequenceNumber& seq, const ValueType& t, const std::string& val, bool corrupt = false) { InternalKey ikey(user_key, seq, t); if (corrupt) { test::CorruptKeyType(&ikey); } ks_.push_back(ikey.Encode().ToString()); vs_.push_back(val); } Env* env_; std::unique_ptr iter_; std::shared_ptr merge_op_; std::unique_ptr merge_helper_; std::vector ks_; std::vector vs_; std::unique_ptr filter_; }; // If MergeHelper encounters a new key on the last level, we know that // the key has no more history and it can merge keys. TEST_F(MergeHelperTest, MergeAtBottomSuccess) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); AddKeyVal("a", 20, kTypeMerge, test::EncodeInt(1U)); AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("b", 10, kTypeMerge, test::EncodeInt(4U)); // <- iter_ after merge ASSERT_TRUE(Run(0, true).ok()); ASSERT_EQ(ks_[2], iter_->key()); ASSERT_EQ(test::KeyStr("a", 20, kTypeValue), merge_helper_->keys()[0]); ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]); ASSERT_EQ(1U, merge_helper_->keys().size()); ASSERT_EQ(1U, merge_helper_->values().size()); } // Merging with a value results in a successful merge. TEST_F(MergeHelperTest, MergeValue) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); AddKeyVal("a", 40, kTypeMerge, test::EncodeInt(1U)); AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("a", 20, kTypeValue, test::EncodeInt(4U)); // <- iter_ after merge AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(1U)); ASSERT_TRUE(Run(0, false).ok()); ASSERT_EQ(ks_[3], iter_->key()); ASSERT_EQ(test::KeyStr("a", 40, kTypeValue), merge_helper_->keys()[0]); ASSERT_EQ(test::EncodeInt(8U), merge_helper_->values()[0]); ASSERT_EQ(1U, merge_helper_->keys().size()); ASSERT_EQ(1U, merge_helper_->values().size()); } // Merging stops before a snapshot. TEST_F(MergeHelperTest, SnapshotBeforeValue) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); AddKeyVal("a", 50, kTypeMerge, test::EncodeInt(1U)); AddKeyVal("a", 40, kTypeMerge, test::EncodeInt(3U)); // <- iter_ after merge AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(1U)); AddKeyVal("a", 20, kTypeValue, test::EncodeInt(4U)); AddKeyVal("a", 10, kTypeMerge, test::EncodeInt(1U)); ASSERT_TRUE(Run(31, true).IsMergeInProgress()); ASSERT_EQ(ks_[2], iter_->key()); ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[0]); ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]); ASSERT_EQ(1U, merge_helper_->keys().size()); ASSERT_EQ(1U, merge_helper_->values().size()); } // MergeHelper preserves the operand stack for merge operators that // cannot do a partial merge. TEST_F(MergeHelperTest, NoPartialMerge) { merge_op_ = MergeOperators::CreateStringAppendTESTOperator(); AddKeyVal("a", 50, kTypeMerge, "v2"); AddKeyVal("a", 40, kTypeMerge, "v"); // <- iter_ after merge AddKeyVal("a", 30, kTypeMerge, "v"); ASSERT_TRUE(Run(31, true).IsMergeInProgress()); ASSERT_EQ(ks_[2], iter_->key()); ASSERT_EQ(test::KeyStr("a", 40, kTypeMerge), merge_helper_->keys()[0]); ASSERT_EQ("v", merge_helper_->values()[0]); ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[1]); ASSERT_EQ("v2", merge_helper_->values()[1]); ASSERT_EQ(2U, merge_helper_->keys().size()); ASSERT_EQ(2U, merge_helper_->values().size()); } // A single operand can not be merged. TEST_F(MergeHelperTest, SingleOperand) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); AddKeyVal("a", 50, kTypeMerge, test::EncodeInt(1U)); ASSERT_TRUE(Run(31, false).IsMergeInProgress()); ASSERT_FALSE(iter_->Valid()); ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), merge_helper_->keys()[0]); ASSERT_EQ(test::EncodeInt(1U), merge_helper_->values()[0]); ASSERT_EQ(1U, merge_helper_->keys().size()); ASSERT_EQ(1U, merge_helper_->values().size()); } // Merging with a deletion turns the deletion into a value TEST_F(MergeHelperTest, MergeDeletion) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("a", 20, kTypeDeletion, ""); ASSERT_TRUE(Run(15, false).ok()); ASSERT_FALSE(iter_->Valid()); ASSERT_EQ(test::KeyStr("a", 30, kTypeValue), merge_helper_->keys()[0]); ASSERT_EQ(test::EncodeInt(3U), merge_helper_->values()[0]); ASSERT_EQ(1U, merge_helper_->keys().size()); ASSERT_EQ(1U, merge_helper_->values().size()); } // The merge helper stops upon encountering a corrupt key TEST_F(MergeHelperTest, CorruptKey) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(1U)); // Corrupt key AddKeyVal("a", 20, kTypeDeletion, "", true); // <- iter_ after merge ASSERT_TRUE(Run(15, false).IsMergeInProgress()); ASSERT_EQ(ks_[2], iter_->key()); ASSERT_EQ(test::KeyStr("a", 30, kTypeMerge), merge_helper_->keys()[0]); ASSERT_EQ(test::EncodeInt(4U), merge_helper_->values()[0]); ASSERT_EQ(1U, merge_helper_->keys().size()); ASSERT_EQ(1U, merge_helper_->values().size()); } // The compaction filter is called on every merge operand TEST_F(MergeHelperTest, FilterMergeOperands) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); filter_.reset(new test::FilterNumber(5U)); AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(5U)); // Filtered AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(1U)); AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); // Filtered AddKeyVal("a", 25, kTypeValue, test::EncodeInt(1U)); ASSERT_TRUE(Run(15, false).ok()); ASSERT_FALSE(iter_->Valid()); MergeOutputIterator merge_output_iter(merge_helper_.get()); merge_output_iter.SeekToFirst(); ASSERT_EQ(test::KeyStr("a", 30, kTypeValue), merge_output_iter.key().ToString()); ASSERT_EQ(test::EncodeInt(8U), merge_output_iter.value().ToString()); merge_output_iter.Next(); ASSERT_FALSE(merge_output_iter.Valid()); } TEST_F(MergeHelperTest, FilterAllMergeOperands) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); filter_.reset(new test::FilterNumber(5U)); AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U)); // filtered out all ASSERT_TRUE(Run(15, false).ok()); ASSERT_FALSE(iter_->Valid()); MergeOutputIterator merge_output_iter(merge_helper_.get()); merge_output_iter.SeekToFirst(); ASSERT_FALSE(merge_output_iter.Valid()); // we have one operand that will survive because it's a delete AddKeyVal("a", 24, kTypeDeletion, test::EncodeInt(5U)); AddKeyVal("b", 23, kTypeValue, test::EncodeInt(5U)); ASSERT_TRUE(Run(15, true).ok()); merge_output_iter = MergeOutputIterator(merge_helper_.get()); ASSERT_TRUE(iter_->Valid()); merge_output_iter.SeekToFirst(); ASSERT_FALSE(merge_output_iter.Valid()); // when all merge operands are filtered out, we leave the iterator pointing to // the Put/Delete that survived ASSERT_EQ(test::KeyStr("a", 24, kTypeDeletion), iter_->key().ToString()); ASSERT_EQ(test::EncodeInt(5U), iter_->value().ToString()); } // Make sure that merge operands are filtered at the beginning TEST_F(MergeHelperTest, FilterFirstMergeOperand) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); filter_.reset(new test::FilterNumber(5U)); AddKeyVal("a", 31, kTypeMerge, test::EncodeInt(5U)); // Filtered AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U)); // Filtered AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(2U)); AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(1U)); AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); // Filtered AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U)); // Filtered AddKeyVal("b", 24, kTypeValue, test::EncodeInt(5U)); // next user key ASSERT_OK(Run(15, true)); ASSERT_TRUE(iter_->Valid()); MergeOutputIterator merge_output_iter(merge_helper_.get()); merge_output_iter.SeekToFirst(); // sequence number is 29 here, because the first merge operand got filtered // out ASSERT_EQ(test::KeyStr("a", 29, kTypeValue), merge_output_iter.key().ToString()); ASSERT_EQ(test::EncodeInt(6U), merge_output_iter.value().ToString()); merge_output_iter.Next(); ASSERT_FALSE(merge_output_iter.Valid()); // make sure that we're passing user keys into the filter ASSERT_EQ("a", filter_->last_merge_operand_key()); } // Make sure that merge operands are not filtered out if there's a snapshot // pointing at them TEST_F(MergeHelperTest, DontFilterMergeOperandsBeforeSnapshotTest) { merge_op_ = MergeOperators::CreateUInt64AddOperator(); filter_.reset(new test::FilterNumber(5U)); AddKeyVal("a", 31, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 30, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 29, kTypeMerge, test::EncodeInt(2U)); AddKeyVal("a", 28, kTypeMerge, test::EncodeInt(1U)); AddKeyVal("a", 27, kTypeMerge, test::EncodeInt(3U)); AddKeyVal("a", 26, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("a", 25, kTypeMerge, test::EncodeInt(5U)); AddKeyVal("b", 24, kTypeValue, test::EncodeInt(5U)); ASSERT_OK(Run(15, true, 32)); ASSERT_TRUE(iter_->Valid()); MergeOutputIterator merge_output_iter(merge_helper_.get()); merge_output_iter.SeekToFirst(); ASSERT_EQ(test::KeyStr("a", 31, kTypeValue), merge_output_iter.key().ToString()); ASSERT_EQ(test::EncodeInt(26U), merge_output_iter.value().ToString()); merge_output_iter.Next(); ASSERT_FALSE(merge_output_iter.Valid()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/merge_operator.cc000066400000000000000000000061201370372246700171520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // /** * Back-end implementation details specific to the Merge Operator. */ #include "rocksdb/merge_operator.h" namespace ROCKSDB_NAMESPACE { bool MergeOperator::FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const { // If FullMergeV2 is not implemented, we convert the operand_list to // std::deque and pass it to FullMerge std::deque operand_list_str; for (auto& op : merge_in.operand_list) { operand_list_str.emplace_back(op.data(), op.size()); } return FullMerge(merge_in.key, merge_in.existing_value, operand_list_str, &merge_out->new_value, merge_in.logger); } // The default implementation of PartialMergeMulti, which invokes // PartialMerge multiple times internally and merges two operands at // a time. bool MergeOperator::PartialMergeMulti(const Slice& key, const std::deque& operand_list, std::string* new_value, Logger* logger) const { assert(operand_list.size() >= 2); // Simply loop through the operands Slice temp_slice(operand_list[0]); for (size_t i = 1; i < operand_list.size(); ++i) { auto& operand = operand_list[i]; std::string temp_value; if (!PartialMerge(key, temp_slice, operand, &temp_value, logger)) { return false; } swap(temp_value, *new_value); temp_slice = Slice(*new_value); } // The result will be in *new_value. All merges succeeded. return true; } // Given a "real" merge from the library, call the user's // associative merge function one-by-one on each of the operands. // NOTE: It is assumed that the client's merge-operator will handle any errors. bool AssociativeMergeOperator::FullMergeV2( const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const { // Simply loop through the operands Slice temp_existing; const Slice* existing_value = merge_in.existing_value; for (const auto& operand : merge_in.operand_list) { std::string temp_value; if (!Merge(merge_in.key, existing_value, operand, &temp_value, merge_in.logger)) { return false; } swap(temp_value, merge_out->new_value); temp_existing = Slice(merge_out->new_value); existing_value = &temp_existing; } // The result will be in *new_value. All merges succeeded. return true; } // Call the user defined simple merge on the operands; // NOTE: It is assumed that the client's merge-operator will handle any errors. bool AssociativeMergeOperator::PartialMerge( const Slice& key, const Slice& left_operand, const Slice& right_operand, std::string* new_value, Logger* logger) const { return Merge(key, &left_operand, right_operand, new_value, logger); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/merge_test.cc000066400000000000000000000327231370372246700163060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include #include #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/write_batch_internal.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" #include "rocksdb/utilities/db_ttl.h" #include "test_util/testharness.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { bool use_compression; class MergeTest : public testing::Test {}; size_t num_merge_operator_calls; void resetNumMergeOperatorCalls() { num_merge_operator_calls = 0; } size_t num_partial_merge_calls; void resetNumPartialMergeCalls() { num_partial_merge_calls = 0; } class CountMergeOperator : public AssociativeMergeOperator { public: CountMergeOperator() { mergeOperator_ = MergeOperators::CreateUInt64AddOperator(); } bool Merge(const Slice& key, const Slice* existing_value, const Slice& value, std::string* new_value, Logger* logger) const override { assert(new_value->empty()); ++num_merge_operator_calls; if (existing_value == nullptr) { new_value->assign(value.data(), value.size()); return true; } return mergeOperator_->PartialMerge( key, *existing_value, value, new_value, logger); } bool PartialMergeMulti(const Slice& key, const std::deque& operand_list, std::string* new_value, Logger* logger) const override { assert(new_value->empty()); ++num_partial_merge_calls; return mergeOperator_->PartialMergeMulti(key, operand_list, new_value, logger); } const char* Name() const override { return "UInt64AddOperator"; } private: std::shared_ptr mergeOperator_; }; std::shared_ptr OpenDb(const std::string& dbname, const bool ttl = false, const size_t max_successive_merges = 0) { DB* db; Options options; options.create_if_missing = true; options.merge_operator = std::make_shared(); options.max_successive_merges = max_successive_merges; Status s; DestroyDB(dbname, Options()); // DBWithTTL is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE if (ttl) { DBWithTTL* db_with_ttl; s = DBWithTTL::Open(options, dbname, &db_with_ttl); db = db_with_ttl; } else { s = DB::Open(options, dbname, &db); } #else assert(!ttl); s = DB::Open(options, dbname, &db); #endif // !ROCKSDB_LITE if (!s.ok()) { std::cerr << s.ToString() << std::endl; assert(false); } return std::shared_ptr(db); } // Imagine we are maintaining a set of uint64 counters. // Each counter has a distinct name. And we would like // to support four high level operations: // set, add, get and remove // This is a quick implementation without a Merge operation. class Counters { protected: std::shared_ptr db_; WriteOptions put_option_; ReadOptions get_option_; WriteOptions delete_option_; uint64_t default_; public: explicit Counters(std::shared_ptr db, uint64_t defaultCount = 0) : db_(db), put_option_(), get_option_(), delete_option_(), default_(defaultCount) { assert(db_); } virtual ~Counters() {} // public interface of Counters. // All four functions return false // if the underlying level db operation failed. // mapped to a levedb Put bool set(const std::string& key, uint64_t value) { // just treat the internal rep of int64 as the string char buf[sizeof(value)]; EncodeFixed64(buf, value); Slice slice(buf, sizeof(value)); auto s = db_->Put(put_option_, key, slice); if (s.ok()) { return true; } else { std::cerr << s.ToString() << std::endl; return false; } } // mapped to a rocksdb Delete bool remove(const std::string& key) { auto s = db_->Delete(delete_option_, key); if (s.ok()) { return true; } else { std::cerr << s.ToString() << std::endl; return false; } } // mapped to a rocksdb Get bool get(const std::string& key, uint64_t* value) { std::string str; auto s = db_->Get(get_option_, key, &str); if (s.IsNotFound()) { // return default value if not found; *value = default_; return true; } else if (s.ok()) { // deserialization if (str.size() != sizeof(uint64_t)) { std::cerr << "value corruption\n"; return false; } *value = DecodeFixed64(&str[0]); return true; } else { std::cerr << s.ToString() << std::endl; return false; } } // 'add' is implemented as get -> modify -> set // An alternative is a single merge operation, see MergeBasedCounters virtual bool add(const std::string& key, uint64_t value) { uint64_t base = default_; return get(key, &base) && set(key, base + value); } // convenience functions for testing void assert_set(const std::string& key, uint64_t value) { assert(set(key, value)); } void assert_remove(const std::string& key) { assert(remove(key)); } uint64_t assert_get(const std::string& key) { uint64_t value = default_; int result = get(key, &value); assert(result); if (result == 0) exit(1); // Disable unused variable warning. return value; } void assert_add(const std::string& key, uint64_t value) { int result = add(key, value); assert(result); if (result == 0) exit(1); // Disable unused variable warning. } }; // Implement 'add' directly with the new Merge operation class MergeBasedCounters : public Counters { private: WriteOptions merge_option_; // for merge public: explicit MergeBasedCounters(std::shared_ptr db, uint64_t defaultCount = 0) : Counters(db, defaultCount), merge_option_() { } // mapped to a rocksdb Merge operation bool add(const std::string& key, uint64_t value) override { char encoded[sizeof(uint64_t)]; EncodeFixed64(encoded, value); Slice slice(encoded, sizeof(uint64_t)); auto s = db_->Merge(merge_option_, key, slice); if (s.ok()) { return true; } else { std::cerr << s.ToString() << std::endl; return false; } } }; void dumpDb(DB* db) { auto it = std::unique_ptr(db->NewIterator(ReadOptions())); for (it->SeekToFirst(); it->Valid(); it->Next()) { //uint64_t value = DecodeFixed64(it->value().data()); //std::cout << it->key().ToString() << ": " << value << std::endl; } assert(it->status().ok()); // Check for any errors found during the scan } void testCounters(Counters& counters, DB* db, bool test_compaction) { FlushOptions o; o.wait = true; counters.assert_set("a", 1); if (test_compaction) db->Flush(o); assert(counters.assert_get("a") == 1); counters.assert_remove("b"); // defaut value is 0 if non-existent assert(counters.assert_get("b") == 0); counters.assert_add("a", 2); if (test_compaction) db->Flush(o); // 1+2 = 3 assert(counters.assert_get("a")== 3); dumpDb(db); // 1+...+49 = ? uint64_t sum = 0; for (int i = 1; i < 50; i++) { counters.assert_add("b", i); sum += i; } assert(counters.assert_get("b") == sum); dumpDb(db); if (test_compaction) { db->Flush(o); db->CompactRange(CompactRangeOptions(), nullptr, nullptr); dumpDb(db); assert(counters.assert_get("a")== 3); assert(counters.assert_get("b") == sum); } } void testSuccessiveMerge(Counters& counters, size_t max_num_merges, size_t num_merges) { counters.assert_remove("z"); uint64_t sum = 0; for (size_t i = 1; i <= num_merges; ++i) { resetNumMergeOperatorCalls(); counters.assert_add("z", i); sum += i; if (i % (max_num_merges + 1) == 0) { assert(num_merge_operator_calls == max_num_merges + 1); } else { assert(num_merge_operator_calls == 0); } resetNumMergeOperatorCalls(); assert(counters.assert_get("z") == sum); assert(num_merge_operator_calls == i % (max_num_merges + 1)); } } void testPartialMerge(Counters* counters, DB* db, size_t max_merge, size_t min_merge, size_t count) { FlushOptions o; o.wait = true; // Test case 1: partial merge should be called when the number of merge // operands exceeds the threshold. uint64_t tmp_sum = 0; resetNumPartialMergeCalls(); for (size_t i = 1; i <= count; i++) { counters->assert_add("b", i); tmp_sum += i; } db->Flush(o); db->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(tmp_sum, counters->assert_get("b")); if (count > max_merge) { // in this case, FullMerge should be called instead. ASSERT_EQ(num_partial_merge_calls, 0U); } else { // if count >= min_merge, then partial merge should be called once. ASSERT_EQ((count >= min_merge), (num_partial_merge_calls == 1)); } // Test case 2: partial merge should not be called when a put is found. resetNumPartialMergeCalls(); tmp_sum = 0; db->Put(ROCKSDB_NAMESPACE::WriteOptions(), "c", "10"); for (size_t i = 1; i <= count; i++) { counters->assert_add("c", i); tmp_sum += i; } db->Flush(o); db->CompactRange(CompactRangeOptions(), nullptr, nullptr); ASSERT_EQ(tmp_sum, counters->assert_get("c")); ASSERT_EQ(num_partial_merge_calls, 0U); } void testSingleBatchSuccessiveMerge(DB* db, size_t max_num_merges, size_t num_merges) { assert(num_merges > max_num_merges); Slice key("BatchSuccessiveMerge"); uint64_t merge_value = 1; char buf[sizeof(merge_value)]; EncodeFixed64(buf, merge_value); Slice merge_value_slice(buf, sizeof(merge_value)); // Create the batch WriteBatch batch; for (size_t i = 0; i < num_merges; ++i) { batch.Merge(key, merge_value_slice); } // Apply to memtable and count the number of merges resetNumMergeOperatorCalls(); { Status s = db->Write(WriteOptions(), &batch); assert(s.ok()); } ASSERT_EQ( num_merge_operator_calls, static_cast(num_merges - (num_merges % (max_num_merges + 1)))); // Get the value resetNumMergeOperatorCalls(); std::string get_value_str; { Status s = db->Get(ReadOptions(), key, &get_value_str); assert(s.ok()); } assert(get_value_str.size() == sizeof(uint64_t)); uint64_t get_value = DecodeFixed64(&get_value_str[0]); ASSERT_EQ(get_value, num_merges * merge_value); ASSERT_EQ(num_merge_operator_calls, static_cast((num_merges % (max_num_merges + 1)))); } void runTest(const std::string& dbname, const bool use_ttl = false) { { auto db = OpenDb(dbname, use_ttl); { Counters counters(db, 0); testCounters(counters, db.get(), true); } { MergeBasedCounters counters(db, 0); testCounters(counters, db.get(), use_compression); } } DestroyDB(dbname, Options()); { size_t max_merge = 5; auto db = OpenDb(dbname, use_ttl, max_merge); MergeBasedCounters counters(db, 0); testCounters(counters, db.get(), use_compression); testSuccessiveMerge(counters, max_merge, max_merge * 2); testSingleBatchSuccessiveMerge(db.get(), 5, 7); DestroyDB(dbname, Options()); } { size_t max_merge = 100; // Min merge is hard-coded to 2. uint32_t min_merge = 2; for (uint32_t count = min_merge - 1; count <= min_merge + 1; count++) { auto db = OpenDb(dbname, use_ttl, max_merge); MergeBasedCounters counters(db, 0); testPartialMerge(&counters, db.get(), max_merge, min_merge, count); DestroyDB(dbname, Options()); } { auto db = OpenDb(dbname, use_ttl, max_merge); MergeBasedCounters counters(db, 0); testPartialMerge(&counters, db.get(), max_merge, min_merge, min_merge * 10); DestroyDB(dbname, Options()); } } { { auto db = OpenDb(dbname); MergeBasedCounters counters(db, 0); counters.add("test-key", 1); counters.add("test-key", 1); counters.add("test-key", 1); db->CompactRange(CompactRangeOptions(), nullptr, nullptr); } DB* reopen_db; ASSERT_OK(DB::Open(Options(), dbname, &reopen_db)); std::string value; ASSERT_TRUE(!(reopen_db->Get(ReadOptions(), "test-key", &value).ok())); delete reopen_db; DestroyDB(dbname, Options()); } /* Temporary remove this test { std::cout << "Test merge-operator not set after reopen (recovery case)\n"; { auto db = OpenDb(dbname); MergeBasedCounters counters(db, 0); counters.add("test-key", 1); counters.add("test-key", 1); counters.add("test-key", 1); } DB* reopen_db; ASSERT_TRUE(DB::Open(Options(), dbname, &reopen_db).IsInvalidArgument()); } */ } TEST_F(MergeTest, MergeDbTest) { runTest(test::PerThreadDBPath("merge_testdb")); } #ifndef ROCKSDB_LITE TEST_F(MergeTest, MergeDbTtlTest) { runTest(test::PerThreadDBPath("merge_testdbttl"), true); // Run test on TTL database } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::use_compression = false; if (argc > 1) { ROCKSDB_NAMESPACE::use_compression = true; } ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/obsolete_files_test.cc000066400000000000000000000273541370372246700202110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/version_set.h" #include "db/write_batch_internal.h" #include "file/filename.h" #include "port/stack_trace.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/transaction_log.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" using std::cerr; using std::cout; using std::endl; using std::flush; namespace ROCKSDB_NAMESPACE { class ObsoleteFilesTest : public DBTestBase { public: ObsoleteFilesTest() : DBTestBase("/obsolete_files_test"), wal_dir_(dbname_ + "/wal_files") {} void AddKeys(int numkeys, int startkey) { WriteOptions options; options.sync = false; for (int i = startkey; i < (numkeys + startkey) ; i++) { std::string temp = ToString(i); Slice key(temp); Slice value(temp); ASSERT_OK(db_->Put(options, key, value)); } } void createLevel0Files(int numFiles, int numKeysPerFile) { int startKey = 0; for (int i = 0; i < numFiles; i++) { AddKeys(numKeysPerFile, startKey); startKey += numKeysPerFile; ASSERT_OK(dbfull()->TEST_FlushMemTable()); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } } void CheckFileTypeCounts(const std::string& dir, int required_log, int required_sst, int required_manifest) { std::vector filenames; env_->GetChildren(dir, &filenames); int log_cnt = 0; int sst_cnt = 0; int manifest_cnt = 0; for (auto file : filenames) { uint64_t number; FileType type; if (ParseFileName(file, &number, &type)) { log_cnt += (type == kLogFile); sst_cnt += (type == kTableFile); manifest_cnt += (type == kDescriptorFile); } } ASSERT_EQ(required_log, log_cnt); ASSERT_EQ(required_sst, sst_cnt); ASSERT_EQ(required_manifest, manifest_cnt); } void ReopenDB() { Options options = CurrentOptions(); // Trigger compaction when the number of level 0 files reaches 2. options.create_if_missing = true; options.level0_file_num_compaction_trigger = 2; options.disable_auto_compactions = false; options.delete_obsolete_files_period_micros = 0; // always do full purge options.enable_thread_tracking = true; options.write_buffer_size = 1024 * 1024 * 1000; options.target_file_size_base = 1024 * 1024 * 1000; options.max_bytes_for_level_base = 1024 * 1024 * 1000; options.WAL_ttl_seconds = 300; // Used to test log files options.WAL_size_limit_MB = 1024; // Used to test log files options.wal_dir = wal_dir_; Destroy(options); Reopen(options); } const std::string wal_dir_; }; TEST_F(ObsoleteFilesTest, RaceForObsoleteFileDeletion) { ReopenDB(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->LoadDependency({ {"DBImpl::BackgroundCallCompaction:FoundObsoleteFiles", "ObsoleteFilesTest::RaceForObsoleteFileDeletion:1"}, {"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles", "ObsoleteFilesTest::RaceForObsoleteFileDeletion:2"}, }); SyncPoint::GetInstance()->SetCallBack( "DBImpl::DeleteObsoleteFileImpl:AfterDeletion", [&](void* arg) { Status* p_status = reinterpret_cast(arg); ASSERT_OK(*p_status); }); SyncPoint::GetInstance()->SetCallBack( "DBImpl::CloseHelper:PendingPurgeFinished", [&](void* arg) { std::unordered_set* files_grabbed_for_purge_ptr = reinterpret_cast*>(arg); ASSERT_TRUE(files_grabbed_for_purge_ptr->empty()); }); SyncPoint::GetInstance()->EnableProcessing(); createLevel0Files(2, 50000); CheckFileTypeCounts(wal_dir_, 1, 0, 0); port::Thread user_thread([this]() { JobContext jobCxt(0); TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:1"); dbfull()->TEST_LockMutex(); dbfull()->FindObsoleteFiles(&jobCxt, true /* force=true */, false /* no_full_scan=false */); dbfull()->TEST_UnlockMutex(); TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:2"); dbfull()->PurgeObsoleteFiles(jobCxt); jobCxt.Clean(); }); user_thread.join(); } TEST_F(ObsoleteFilesTest, DeleteObsoleteOptionsFile) { ReopenDB(); SyncPoint::GetInstance()->DisableProcessing(); std::vector optsfiles_nums; std::vector optsfiles_keep; SyncPoint::GetInstance()->SetCallBack( "DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:1", [&](void* arg) { optsfiles_nums.push_back(*reinterpret_cast(arg)); }); SyncPoint::GetInstance()->SetCallBack( "DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:2", [&](void* arg) { optsfiles_keep.push_back(*reinterpret_cast(arg)); }); SyncPoint::GetInstance()->EnableProcessing(); createLevel0Files(2, 50000); CheckFileTypeCounts(wal_dir_, 1, 0, 0); ASSERT_OK(dbfull()->DisableFileDeletions()); for (int i = 0; i != 4; ++i) { if (i % 2) { ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(), {{"paranoid_file_checks", "false"}})); } else { ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(), {{"paranoid_file_checks", "true"}})); } } ASSERT_OK(dbfull()->EnableFileDeletions(true /* force */)); ASSERT_EQ(optsfiles_nums.size(), optsfiles_keep.size()); Close(); std::vector files; int opts_file_count = 0; ASSERT_OK(env_->GetChildren(dbname_, &files)); for (const auto& file : files) { uint64_t file_num; Slice dummy_info_log_name_prefix; FileType type; WalFileType log_type; if (ParseFileName(file, &file_num, dummy_info_log_name_prefix, &type, &log_type) && type == kOptionsFile) { opts_file_count++; } } ASSERT_EQ(2, opts_file_count); } TEST_F(ObsoleteFilesTest, BlobFiles) { VersionSet* const versions = dbfull()->TEST_GetVersionSet(); assert(versions); assert(versions->GetColumnFamilySet()); ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); assert(cfd); // Add a blob file that consists of nothing but garbage (and is thus obsolete) // and another one that is live. VersionEdit edit; constexpr uint64_t first_blob_file_number = 234; constexpr uint64_t first_total_blob_count = 555; constexpr uint64_t first_total_blob_bytes = 66666; constexpr char first_checksum_method[] = "CRC32"; constexpr char first_checksum_value[] = "3d87ff57"; edit.AddBlobFile(first_blob_file_number, first_total_blob_count, first_total_blob_bytes, first_checksum_method, first_checksum_value); edit.AddBlobFileGarbage(first_blob_file_number, first_total_blob_count, first_total_blob_bytes); constexpr uint64_t second_blob_file_number = 456; constexpr uint64_t second_total_blob_count = 100; constexpr uint64_t second_total_blob_bytes = 2000000; constexpr char second_checksum_method[] = "CRC32B"; constexpr char second_checksum_value[] = "6dbdf23a"; edit.AddBlobFile(second_blob_file_number, second_total_blob_count, second_total_blob_bytes, second_checksum_method, second_checksum_value); dbfull()->TEST_LockMutex(); Status s = versions->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, dbfull()->mutex()); dbfull()->TEST_UnlockMutex(); ASSERT_OK(s); // Check for obsolete files and make sure the first blob file is picked up // and grabbed for purge. The second blob file should be on the live list. constexpr int job_id = 0; JobContext job_context{job_id}; dbfull()->TEST_LockMutex(); constexpr bool force_full_scan = false; dbfull()->FindObsoleteFiles(&job_context, force_full_scan); dbfull()->TEST_UnlockMutex(); ASSERT_TRUE(job_context.HaveSomethingToDelete()); ASSERT_EQ(job_context.blob_delete_files.size(), 1); ASSERT_EQ(job_context.blob_delete_files[0].GetBlobFileNumber(), first_blob_file_number); const auto& files_grabbed_for_purge = dbfull()->TEST_GetFilesGrabbedForPurge(); ASSERT_NE(files_grabbed_for_purge.find(first_blob_file_number), files_grabbed_for_purge.end()); ASSERT_EQ(job_context.blob_live.size(), 1); ASSERT_EQ(job_context.blob_live[0], second_blob_file_number); // Hack the job context a bit by adding a few files to the full scan // list and adjusting the pending file number. We add the two files // above as well as two additional ones, where one is old // and should be cleaned up, and the other is still pending. assert(cfd->ioptions()); assert(!cfd->ioptions()->cf_paths.empty()); const std::string& path = cfd->ioptions()->cf_paths.front().path; constexpr uint64_t old_blob_file_number = 123; constexpr uint64_t pending_blob_file_number = 567; job_context.full_scan_candidate_files.emplace_back( BlobFileName(old_blob_file_number), path); job_context.full_scan_candidate_files.emplace_back( BlobFileName(first_blob_file_number), path); job_context.full_scan_candidate_files.emplace_back( BlobFileName(second_blob_file_number), path); job_context.full_scan_candidate_files.emplace_back( BlobFileName(pending_blob_file_number), path); job_context.min_pending_output = pending_blob_file_number; // Purge obsolete files and make sure we purge the old file and the first file // (and keep the second file and the pending file). std::vector deleted_files; SyncPoint::GetInstance()->SetCallBack( "DBImpl::DeleteObsoleteFileImpl::BeforeDeletion", [&](void* arg) { const std::string* file = static_cast(arg); assert(file); constexpr char blob_extension[] = ".blob"; if (file->find(blob_extension) != std::string::npos) { deleted_files.emplace_back(*file); } }); SyncPoint::GetInstance()->EnableProcessing(); dbfull()->PurgeObsoleteFiles(job_context); job_context.Clean(); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); ASSERT_EQ(files_grabbed_for_purge.find(first_blob_file_number), files_grabbed_for_purge.end()); std::sort(deleted_files.begin(), deleted_files.end()); const std::vector expected_deleted_files{ BlobFileName(path, old_blob_file_number), BlobFileName(path, first_blob_file_number)}; ASSERT_EQ(deleted_files, expected_deleted_files); } } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as DBImpl::DeleteFile is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/options_file_test.cc000066400000000000000000000071341370372246700176770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "rocksdb/options.h" #include "rocksdb/table.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class OptionsFileTest : public testing::Test { public: OptionsFileTest() : dbname_(test::PerThreadDBPath("options_file_test")) {} std::string dbname_; }; namespace { void UpdateOptionsFiles(DB* db, std::unordered_set* filename_history, int* options_files_count) { std::vector filenames; db->GetEnv()->GetChildren(db->GetName(), &filenames); uint64_t number; FileType type; *options_files_count = 0; for (auto filename : filenames) { if (ParseFileName(filename, &number, &type) && type == kOptionsFile) { filename_history->insert(filename); (*options_files_count)++; } } } // Verify whether the current Options Files are the latest ones. void VerifyOptionsFileName( DB* db, const std::unordered_set& past_filenames) { std::vector filenames; std::unordered_set current_filenames; db->GetEnv()->GetChildren(db->GetName(), &filenames); uint64_t number; FileType type; for (auto filename : filenames) { if (ParseFileName(filename, &number, &type) && type == kOptionsFile) { current_filenames.insert(filename); } } for (auto past_filename : past_filenames) { if (current_filenames.find(past_filename) != current_filenames.end()) { continue; } for (auto filename : current_filenames) { ASSERT_GT(filename, past_filename); } } } } // namespace TEST_F(OptionsFileTest, NumberOfOptionsFiles) { const int kReopenCount = 20; Options opt; opt.create_if_missing = true; DestroyDB(dbname_, opt); std::unordered_set filename_history; DB* db; for (int i = 0; i < kReopenCount; ++i) { ASSERT_OK(DB::Open(opt, dbname_, &db)); int num_options_files = 0; UpdateOptionsFiles(db, &filename_history, &num_options_files); ASSERT_GT(num_options_files, 0); ASSERT_LE(num_options_files, 2); // Make sure we always keep the latest option files. VerifyOptionsFileName(db, filename_history); delete db; } } TEST_F(OptionsFileTest, OptionsFileName) { const uint64_t kOptionsFileNum = 12345; uint64_t number; FileType type; auto options_file_name = OptionsFileName("", kOptionsFileNum); ASSERT_TRUE(ParseFileName(options_file_name, &number, &type, nullptr)); ASSERT_EQ(type, kOptionsFile); ASSERT_EQ(number, kOptionsFileNum); const uint64_t kTempOptionsFileNum = 54352; auto temp_options_file_name = TempOptionsFileName("", kTempOptionsFileNum); ASSERT_TRUE(ParseFileName(temp_options_file_name, &number, &type, nullptr)); ASSERT_NE(temp_options_file_name.find(kTempFileNameSuffix), std::string::npos); ASSERT_EQ(type, kTempFile); ASSERT_EQ(number, kTempOptionsFileNum); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { #if !(defined NDEBUG) || !defined(OS_WIN) ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); #else return 0; #endif // !(defined NDEBUG) || !defined(OS_WIN) } #else #include int main(int /*argc*/, char** /*argv*/) { printf("Skipped as Options file is not supported in RocksDBLite.\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/perf_context_test.cc000066400000000000000000001016251370372246700177050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include #include #include #include "monitoring/histogram.h" #include "monitoring/instrumented_mutex.h" #include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_util.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice_transform.h" #include "test_util/testharness.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "utilities/merge_operators.h" bool FLAGS_random_key = false; bool FLAGS_use_set_based_memetable = false; int FLAGS_total_keys = 100; int FLAGS_write_buffer_size = 1000000000; int FLAGS_max_write_buffer_number = 8; int FLAGS_min_write_buffer_number_to_merge = 7; bool FLAGS_verbose = false; // Path to the database on file system const std::string kDbName = ROCKSDB_NAMESPACE::test::PerThreadDBPath("perf_context_test"); namespace ROCKSDB_NAMESPACE { std::shared_ptr OpenDb(bool read_only = false) { DB* db; Options options; options.create_if_missing = true; options.max_open_files = -1; options.write_buffer_size = FLAGS_write_buffer_size; options.max_write_buffer_number = FLAGS_max_write_buffer_number; options.min_write_buffer_number_to_merge = FLAGS_min_write_buffer_number_to_merge; if (FLAGS_use_set_based_memetable) { #ifndef ROCKSDB_LITE options.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(0)); options.memtable_factory.reset(NewHashSkipListRepFactory()); #endif // ROCKSDB_LITE } Status s; if (!read_only) { s = DB::Open(options, kDbName, &db); } else { s = DB::OpenForReadOnly(options, kDbName, &db); } EXPECT_OK(s); return std::shared_ptr(db); } class PerfContextTest : public testing::Test {}; TEST_F(PerfContextTest, SeekIntoDeletion) { DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; for (int i = 0; i < FLAGS_total_keys; ++i) { std::string key = "k" + ToString(i); std::string value = "v" + ToString(i); db->Put(write_options, key, value); } for (int i = 0; i < FLAGS_total_keys -1 ; ++i) { std::string key = "k" + ToString(i); db->Delete(write_options, key); } HistogramImpl hist_get; HistogramImpl hist_get_time; for (int i = 0; i < FLAGS_total_keys - 1; ++i) { std::string key = "k" + ToString(i); std::string value; get_perf_context()->Reset(); StopWatchNano timer(Env::Default()); timer.Start(); auto status = db->Get(read_options, key, &value); auto elapsed_nanos = timer.ElapsedNanos(); ASSERT_TRUE(status.IsNotFound()); hist_get.Add(get_perf_context()->user_key_comparison_count); hist_get_time.Add(elapsed_nanos); } if (FLAGS_verbose) { std::cout << "Get user key comparison: \n" << hist_get.ToString() << "Get time: \n" << hist_get_time.ToString(); } { HistogramImpl hist_seek_to_first; std::unique_ptr iter(db->NewIterator(read_options)); get_perf_context()->Reset(); StopWatchNano timer(Env::Default(), true); iter->SeekToFirst(); hist_seek_to_first.Add(get_perf_context()->user_key_comparison_count); auto elapsed_nanos = timer.ElapsedNanos(); if (FLAGS_verbose) { std::cout << "SeekToFirst uesr key comparison: \n" << hist_seek_to_first.ToString() << "ikey skipped: " << get_perf_context()->internal_key_skipped_count << "\n" << "idelete skipped: " << get_perf_context()->internal_delete_skipped_count << "\n" << "elapsed: " << elapsed_nanos << "\n"; } } HistogramImpl hist_seek; for (int i = 0; i < FLAGS_total_keys; ++i) { std::unique_ptr iter(db->NewIterator(read_options)); std::string key = "k" + ToString(i); get_perf_context()->Reset(); StopWatchNano timer(Env::Default(), true); iter->Seek(key); auto elapsed_nanos = timer.ElapsedNanos(); hist_seek.Add(get_perf_context()->user_key_comparison_count); if (FLAGS_verbose) { std::cout << "seek cmp: " << get_perf_context()->user_key_comparison_count << " ikey skipped " << get_perf_context()->internal_key_skipped_count << " idelete skipped " << get_perf_context()->internal_delete_skipped_count << " elapsed: " << elapsed_nanos << "ns\n"; } get_perf_context()->Reset(); ASSERT_TRUE(iter->Valid()); StopWatchNano timer2(Env::Default(), true); iter->Next(); auto elapsed_nanos2 = timer2.ElapsedNanos(); if (FLAGS_verbose) { std::cout << "next cmp: " << get_perf_context()->user_key_comparison_count << "elapsed: " << elapsed_nanos2 << "ns\n"; } } if (FLAGS_verbose) { std::cout << "Seek uesr key comparison: \n" << hist_seek.ToString(); } } TEST_F(PerfContextTest, StopWatchNanoOverhead) { // profile the timer cost by itself! const int kTotalIterations = 1000000; std::vector timings(kTotalIterations); StopWatchNano timer(Env::Default(), true); for (auto& timing : timings) { timing = timer.ElapsedNanos(true /* reset */); } HistogramImpl histogram; for (const auto timing : timings) { histogram.Add(timing); } if (FLAGS_verbose) { std::cout << histogram.ToString(); } } TEST_F(PerfContextTest, StopWatchOverhead) { // profile the timer cost by itself! const int kTotalIterations = 1000000; uint64_t elapsed = 0; std::vector timings(kTotalIterations); StopWatch timer(Env::Default(), nullptr, 0, &elapsed); for (auto& timing : timings) { timing = elapsed; } HistogramImpl histogram; uint64_t prev_timing = 0; for (const auto timing : timings) { histogram.Add(timing - prev_timing); prev_timing = timing; } if (FLAGS_verbose) { std::cout << histogram.ToString(); } } void ProfileQueries(bool enabled_time = false) { DestroyDB(kDbName, Options()); // Start this test with a fresh DB auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; HistogramImpl hist_put; HistogramImpl hist_get; HistogramImpl hist_get_snapshot; HistogramImpl hist_get_memtable; HistogramImpl hist_get_files; HistogramImpl hist_get_post_process; HistogramImpl hist_num_memtable_checked; HistogramImpl hist_mget; HistogramImpl hist_mget_snapshot; HistogramImpl hist_mget_memtable; HistogramImpl hist_mget_files; HistogramImpl hist_mget_post_process; HistogramImpl hist_mget_num_memtable_checked; HistogramImpl hist_write_pre_post; HistogramImpl hist_write_wal_time; HistogramImpl hist_write_memtable_time; HistogramImpl hist_write_delay_time; HistogramImpl hist_write_thread_wait_nanos; HistogramImpl hist_write_scheduling_time; uint64_t total_db_mutex_nanos = 0; if (FLAGS_verbose) { std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n"; } std::vector keys; const int kFlushFlag = -1; for (int i = 0; i < FLAGS_total_keys; ++i) { keys.push_back(i); if (i == FLAGS_total_keys / 2) { // Issuing a flush in the middle. keys.push_back(kFlushFlag); } } if (FLAGS_random_key) { RandomShuffle(std::begin(keys), std::end(keys)); } #ifndef NDEBUG ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 1U); #endif int num_mutex_waited = 0; for (const int i : keys) { if (i == kFlushFlag) { FlushOptions fo; db->Flush(fo); continue; } std::string key = "k" + ToString(i); std::string value = "v" + ToString(i); std::vector values; get_perf_context()->Reset(); db->Put(write_options, key, value); if (++num_mutex_waited > 3) { #ifndef NDEBUG ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0U); #endif } hist_write_pre_post.Add( get_perf_context()->write_pre_and_post_process_time); hist_write_wal_time.Add(get_perf_context()->write_wal_time); hist_write_memtable_time.Add(get_perf_context()->write_memtable_time); hist_write_delay_time.Add(get_perf_context()->write_delay_time); hist_write_thread_wait_nanos.Add( get_perf_context()->write_thread_wait_nanos); hist_write_scheduling_time.Add( get_perf_context()->write_scheduling_flushes_compactions_time); hist_put.Add(get_perf_context()->user_key_comparison_count); total_db_mutex_nanos += get_perf_context()->db_mutex_lock_nanos; } #ifndef NDEBUG ThreadStatusUtil::TEST_SetStateDelay(ThreadStatus::STATE_MUTEX_WAIT, 0U); #endif for (const int i : keys) { if (i == kFlushFlag) { continue; } std::string key = "k" + ToString(i); std::string expected_value = "v" + ToString(i); std::string value; std::vector multiget_keys = {Slice(key)}; std::vector values; get_perf_context()->Reset(); ASSERT_OK(db->Get(read_options, key, &value)); ASSERT_EQ(expected_value, value); hist_get_snapshot.Add(get_perf_context()->get_snapshot_time); hist_get_memtable.Add(get_perf_context()->get_from_memtable_time); hist_get_files.Add(get_perf_context()->get_from_output_files_time); hist_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count); hist_get_post_process.Add(get_perf_context()->get_post_process_time); hist_get.Add(get_perf_context()->user_key_comparison_count); get_perf_context()->Reset(); db->MultiGet(read_options, multiget_keys, &values); hist_mget_snapshot.Add(get_perf_context()->get_snapshot_time); hist_mget_memtable.Add(get_perf_context()->get_from_memtable_time); hist_mget_files.Add(get_perf_context()->get_from_output_files_time); hist_mget_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count); hist_mget_post_process.Add(get_perf_context()->get_post_process_time); hist_mget.Add(get_perf_context()->user_key_comparison_count); } if (FLAGS_verbose) { std::cout << "Put uesr key comparison: \n" << hist_put.ToString() << "Get uesr key comparison: \n" << hist_get.ToString() << "MultiGet uesr key comparison: \n" << hist_get.ToString(); std::cout << "Put(): Pre and Post Process Time: \n" << hist_write_pre_post.ToString() << " Writing WAL time: \n" << hist_write_wal_time.ToString() << "\n" << " Writing Mem Table time: \n" << hist_write_memtable_time.ToString() << "\n" << " Write Delay: \n" << hist_write_delay_time.ToString() << "\n" << " Waiting for Batch time: \n" << hist_write_thread_wait_nanos.ToString() << "\n" << " Scheduling Flushes and Compactions Time: \n" << hist_write_scheduling_time.ToString() << "\n" << " Total DB mutex nanos: \n" << total_db_mutex_nanos << "\n"; std::cout << "Get(): Time to get snapshot: \n" << hist_get_snapshot.ToString() << " Time to get value from memtables: \n" << hist_get_memtable.ToString() << "\n" << " Time to get value from output files: \n" << hist_get_files.ToString() << "\n" << " Number of memtables checked: \n" << hist_num_memtable_checked.ToString() << "\n" << " Time to post process: \n" << hist_get_post_process.ToString() << "\n"; std::cout << "MultiGet(): Time to get snapshot: \n" << hist_mget_snapshot.ToString() << " Time to get value from memtables: \n" << hist_mget_memtable.ToString() << "\n" << " Time to get value from output files: \n" << hist_mget_files.ToString() << "\n" << " Number of memtables checked: \n" << hist_mget_num_memtable_checked.ToString() << "\n" << " Time to post process: \n" << hist_mget_post_process.ToString() << "\n"; } if (enabled_time) { ASSERT_GT(hist_get.Average(), 0); ASSERT_GT(hist_get_snapshot.Average(), 0); ASSERT_GT(hist_get_memtable.Average(), 0); ASSERT_GT(hist_get_files.Average(), 0); ASSERT_GT(hist_get_post_process.Average(), 0); ASSERT_GT(hist_num_memtable_checked.Average(), 0); ASSERT_GT(hist_mget.Average(), 0); ASSERT_GT(hist_mget_snapshot.Average(), 0); ASSERT_GT(hist_mget_memtable.Average(), 0); ASSERT_GT(hist_mget_files.Average(), 0); ASSERT_GT(hist_mget_post_process.Average(), 0); ASSERT_GT(hist_mget_num_memtable_checked.Average(), 0); EXPECT_GT(hist_write_pre_post.Average(), 0); EXPECT_GT(hist_write_wal_time.Average(), 0); EXPECT_GT(hist_write_memtable_time.Average(), 0); EXPECT_EQ(hist_write_delay_time.Average(), 0); EXPECT_EQ(hist_write_thread_wait_nanos.Average(), 0); EXPECT_GT(hist_write_scheduling_time.Average(), 0); #ifndef NDEBUG ASSERT_GT(total_db_mutex_nanos, 2000U); #endif } db.reset(); db = OpenDb(true); hist_get.Clear(); hist_get_snapshot.Clear(); hist_get_memtable.Clear(); hist_get_files.Clear(); hist_get_post_process.Clear(); hist_num_memtable_checked.Clear(); hist_mget.Clear(); hist_mget_snapshot.Clear(); hist_mget_memtable.Clear(); hist_mget_files.Clear(); hist_mget_post_process.Clear(); hist_mget_num_memtable_checked.Clear(); for (const int i : keys) { if (i == kFlushFlag) { continue; } std::string key = "k" + ToString(i); std::string expected_value = "v" + ToString(i); std::string value; std::vector multiget_keys = {Slice(key)}; std::vector values; get_perf_context()->Reset(); ASSERT_OK(db->Get(read_options, key, &value)); ASSERT_EQ(expected_value, value); hist_get_snapshot.Add(get_perf_context()->get_snapshot_time); hist_get_memtable.Add(get_perf_context()->get_from_memtable_time); hist_get_files.Add(get_perf_context()->get_from_output_files_time); hist_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count); hist_get_post_process.Add(get_perf_context()->get_post_process_time); hist_get.Add(get_perf_context()->user_key_comparison_count); get_perf_context()->Reset(); db->MultiGet(read_options, multiget_keys, &values); hist_mget_snapshot.Add(get_perf_context()->get_snapshot_time); hist_mget_memtable.Add(get_perf_context()->get_from_memtable_time); hist_mget_files.Add(get_perf_context()->get_from_output_files_time); hist_mget_num_memtable_checked.Add(get_perf_context()->get_from_memtable_count); hist_mget_post_process.Add(get_perf_context()->get_post_process_time); hist_mget.Add(get_perf_context()->user_key_comparison_count); } if (FLAGS_verbose) { std::cout << "ReadOnly Get uesr key comparison: \n" << hist_get.ToString() << "ReadOnly MultiGet uesr key comparison: \n" << hist_mget.ToString(); std::cout << "ReadOnly Get(): Time to get snapshot: \n" << hist_get_snapshot.ToString() << " Time to get value from memtables: \n" << hist_get_memtable.ToString() << "\n" << " Time to get value from output files: \n" << hist_get_files.ToString() << "\n" << " Number of memtables checked: \n" << hist_num_memtable_checked.ToString() << "\n" << " Time to post process: \n" << hist_get_post_process.ToString() << "\n"; std::cout << "ReadOnly MultiGet(): Time to get snapshot: \n" << hist_mget_snapshot.ToString() << " Time to get value from memtables: \n" << hist_mget_memtable.ToString() << "\n" << " Time to get value from output files: \n" << hist_mget_files.ToString() << "\n" << " Number of memtables checked: \n" << hist_mget_num_memtable_checked.ToString() << "\n" << " Time to post process: \n" << hist_mget_post_process.ToString() << "\n"; } if (enabled_time) { ASSERT_GT(hist_get.Average(), 0); ASSERT_GT(hist_get_memtable.Average(), 0); ASSERT_GT(hist_get_files.Average(), 0); ASSERT_GT(hist_num_memtable_checked.Average(), 0); // In read-only mode Get(), no super version operation is needed ASSERT_EQ(hist_get_post_process.Average(), 0); ASSERT_GT(hist_get_snapshot.Average(), 0); ASSERT_GT(hist_mget.Average(), 0); ASSERT_GT(hist_mget_snapshot.Average(), 0); ASSERT_GT(hist_mget_memtable.Average(), 0); ASSERT_GT(hist_mget_files.Average(), 0); ASSERT_GT(hist_mget_post_process.Average(), 0); ASSERT_GT(hist_mget_num_memtable_checked.Average(), 0); } } #ifndef ROCKSDB_LITE TEST_F(PerfContextTest, KeyComparisonCount) { SetPerfLevel(kEnableCount); ProfileQueries(); SetPerfLevel(kDisable); ProfileQueries(); SetPerfLevel(kEnableTime); ProfileQueries(true); } #endif // ROCKSDB_LITE // make perf_context_test // export ROCKSDB_TESTS=PerfContextTest.SeekKeyComparison // For one memtable: // ./perf_context_test --write_buffer_size=500000 --total_keys=10000 // For two memtables: // ./perf_context_test --write_buffer_size=250000 --total_keys=10000 // Specify --random_key=1 to shuffle the key before insertion // Results show that, for sequential insertion, worst-case Seek Key comparison // is close to the total number of keys (linear), when there is only one // memtable. When there are two memtables, even the avg Seek Key comparison // starts to become linear to the input size. TEST_F(PerfContextTest, SeekKeyComparison) { DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; if (FLAGS_verbose) { std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n"; } std::vector keys; for (int i = 0; i < FLAGS_total_keys; ++i) { keys.push_back(i); } if (FLAGS_random_key) { RandomShuffle(std::begin(keys), std::end(keys)); } HistogramImpl hist_put_time; HistogramImpl hist_wal_time; HistogramImpl hist_time_diff; SetPerfLevel(kEnableTime); StopWatchNano timer(Env::Default()); for (const int i : keys) { std::string key = "k" + ToString(i); std::string value = "v" + ToString(i); get_perf_context()->Reset(); timer.Start(); db->Put(write_options, key, value); auto put_time = timer.ElapsedNanos(); hist_put_time.Add(put_time); hist_wal_time.Add(get_perf_context()->write_wal_time); hist_time_diff.Add(put_time - get_perf_context()->write_wal_time); } if (FLAGS_verbose) { std::cout << "Put time:\n" << hist_put_time.ToString() << "WAL time:\n" << hist_wal_time.ToString() << "time diff:\n" << hist_time_diff.ToString(); } HistogramImpl hist_seek; HistogramImpl hist_next; for (int i = 0; i < FLAGS_total_keys; ++i) { std::string key = "k" + ToString(i); std::string value = "v" + ToString(i); std::unique_ptr iter(db->NewIterator(read_options)); get_perf_context()->Reset(); iter->Seek(key); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->value().ToString(), value); hist_seek.Add(get_perf_context()->user_key_comparison_count); } std::unique_ptr iter(db->NewIterator(read_options)); for (iter->SeekToFirst(); iter->Valid();) { get_perf_context()->Reset(); iter->Next(); hist_next.Add(get_perf_context()->user_key_comparison_count); } if (FLAGS_verbose) { std::cout << "Seek:\n" << hist_seek.ToString() << "Next:\n" << hist_next.ToString(); } } TEST_F(PerfContextTest, DBMutexLockCounter) { int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; for (PerfLevel perf_level_test : {PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) { for (int c = 0; c < 2; ++c) { InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); mutex.Lock(); ROCKSDB_NAMESPACE::port::Thread child_thread([&] { SetPerfLevel(perf_level_test); get_perf_context()->Reset(); ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0); mutex.Lock(); mutex.Unlock(); if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex || stats_code[c] != DB_MUTEX_WAIT_MICROS) { ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0); } else { // increment the counter only when it's a DB Mutex ASSERT_GT(get_perf_context()->db_mutex_lock_nanos, 0); } }); Env::Default()->SleepForMicroseconds(100); mutex.Unlock(); child_thread.join(); } } } TEST_F(PerfContextTest, FalseDBMutexWait) { SetPerfLevel(kEnableTime); int stats_code[] = {0, static_cast(DB_MUTEX_WAIT_MICROS)}; for (int c = 0; c < 2; ++c) { InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); InstrumentedCondVar lock(&mutex); get_perf_context()->Reset(); mutex.Lock(); lock.TimedWait(100); mutex.Unlock(); if (stats_code[c] == static_cast(DB_MUTEX_WAIT_MICROS)) { // increment the counter only when it's a DB Mutex ASSERT_GT(get_perf_context()->db_condition_wait_nanos, 0); } else { ASSERT_EQ(get_perf_context()->db_condition_wait_nanos, 0); } } } TEST_F(PerfContextTest, ToString) { get_perf_context()->Reset(); get_perf_context()->block_read_count = 12345; std::string zero_included = get_perf_context()->ToString(); ASSERT_NE(std::string::npos, zero_included.find("= 0")); ASSERT_NE(std::string::npos, zero_included.find("= 12345")); std::string zero_excluded = get_perf_context()->ToString(true); ASSERT_EQ(std::string::npos, zero_excluded.find("= 0")); ASSERT_NE(std::string::npos, zero_excluded.find("= 12345")); } TEST_F(PerfContextTest, MergeOperatorTime) { DestroyDB(kDbName, Options()); DB* db; Options options; options.create_if_missing = true; options.merge_operator = MergeOperators::CreateStringAppendOperator(); Status s = DB::Open(options, kDbName, &db); EXPECT_OK(s); std::string val; ASSERT_OK(db->Merge(WriteOptions(), "k1", "val1")); ASSERT_OK(db->Merge(WriteOptions(), "k1", "val2")); ASSERT_OK(db->Merge(WriteOptions(), "k1", "val3")); ASSERT_OK(db->Merge(WriteOptions(), "k1", "val4")); SetPerfLevel(kEnableTime); get_perf_context()->Reset(); ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); #ifdef OS_SOLARIS for (int i = 0; i < 100; i++) { ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); } #endif EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0); ASSERT_OK(db->Flush(FlushOptions())); get_perf_context()->Reset(); ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); #ifdef OS_SOLARIS for (int i = 0; i < 100; i++) { ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); } #endif EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0); ASSERT_OK(db->CompactRange(CompactRangeOptions(), nullptr, nullptr)); get_perf_context()->Reset(); ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); #ifdef OS_SOLARIS for (int i = 0; i < 100; i++) { ASSERT_OK(db->Get(ReadOptions(), "k1", &val)); } #endif EXPECT_GT(get_perf_context()->merge_operator_time_nanos, 0); delete db; } TEST_F(PerfContextTest, CopyAndMove) { // Assignment operator { get_perf_context()->Reset(); get_perf_context()->EnablePerLevelPerfContext(); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); ASSERT_EQ( 1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); PerfContext perf_context_assign; perf_context_assign = *get_perf_context(); ASSERT_EQ( 1, (*(perf_context_assign.level_to_perf_context))[5].bloom_filter_useful); get_perf_context()->ClearPerLevelPerfContext(); get_perf_context()->Reset(); ASSERT_EQ( 1, (*(perf_context_assign.level_to_perf_context))[5].bloom_filter_useful); perf_context_assign.ClearPerLevelPerfContext(); perf_context_assign.Reset(); } // Copy constructor { get_perf_context()->Reset(); get_perf_context()->EnablePerLevelPerfContext(); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); ASSERT_EQ( 1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); PerfContext perf_context_copy(*get_perf_context()); ASSERT_EQ( 1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful); get_perf_context()->ClearPerLevelPerfContext(); get_perf_context()->Reset(); ASSERT_EQ( 1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful); perf_context_copy.ClearPerLevelPerfContext(); perf_context_copy.Reset(); } // Move constructor { get_perf_context()->Reset(); get_perf_context()->EnablePerLevelPerfContext(); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); ASSERT_EQ( 1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); PerfContext perf_context_move = std::move(*get_perf_context()); ASSERT_EQ( 1, (*(perf_context_move.level_to_perf_context))[5].bloom_filter_useful); get_perf_context()->ClearPerLevelPerfContext(); get_perf_context()->Reset(); ASSERT_EQ( 1, (*(perf_context_move.level_to_perf_context))[5].bloom_filter_useful); perf_context_move.ClearPerLevelPerfContext(); perf_context_move.Reset(); } } TEST_F(PerfContextTest, PerfContextDisableEnable) { get_perf_context()->Reset(); get_perf_context()->EnablePerLevelPerfContext(); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0); get_perf_context()->DisablePerLevelPerfContext(); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); get_perf_context()->EnablePerLevelPerfContext(); PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, 0); get_perf_context()->DisablePerLevelPerfContext(); PerfContext perf_context_copy(*get_perf_context()); ASSERT_EQ(1, (*(perf_context_copy.level_to_perf_context))[0] .bloom_filter_full_positive); // this was set when per level perf context is disabled, should not be copied ASSERT_NE( 1, (*(perf_context_copy.level_to_perf_context))[5].bloom_filter_useful); ASSERT_EQ( 1, (*(perf_context_copy.level_to_perf_context))[0].block_cache_hit_count); perf_context_copy.ClearPerLevelPerfContext(); perf_context_copy.Reset(); get_perf_context()->ClearPerLevelPerfContext(); get_perf_context()->Reset(); } TEST_F(PerfContextTest, PerfContextByLevelGetSet) { get_perf_context()->Reset(); get_perf_context()->EnablePerLevelPerfContext(); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, 0); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 5); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, 7); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, 2); PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, 0); PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 5, 2); PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 2, 3); PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 4, 1); ASSERT_EQ( 0, (*(get_perf_context()->level_to_perf_context))[0].bloom_filter_useful); ASSERT_EQ( 1, (*(get_perf_context()->level_to_perf_context))[5].bloom_filter_useful); ASSERT_EQ( 2, (*(get_perf_context()->level_to_perf_context))[7].bloom_filter_useful); ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0] .bloom_filter_full_positive); ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[2] .bloom_filter_full_true_positive); ASSERT_EQ(1, (*(get_perf_context()->level_to_perf_context))[0] .block_cache_hit_count); ASSERT_EQ(5, (*(get_perf_context()->level_to_perf_context))[2] .block_cache_hit_count); ASSERT_EQ(2, (*(get_perf_context()->level_to_perf_context))[3] .block_cache_miss_count); ASSERT_EQ(4, (*(get_perf_context()->level_to_perf_context))[1] .block_cache_miss_count); std::string zero_excluded = get_perf_context()->ToString(true); ASSERT_NE(std::string::npos, zero_excluded.find("bloom_filter_useful = 1@level5, 2@level7")); ASSERT_NE(std::string::npos, zero_excluded.find("bloom_filter_full_positive = 1@level0")); ASSERT_NE(std::string::npos, zero_excluded.find("bloom_filter_full_true_positive = 1@level2")); ASSERT_NE(std::string::npos, zero_excluded.find("block_cache_hit_count = 1@level0, 5@level2")); ASSERT_NE(std::string::npos, zero_excluded.find("block_cache_miss_count = 4@level1, 2@level3")); } TEST_F(PerfContextTest, CPUTimer) { DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; SetPerfLevel(PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); std::string max_str = "0"; for (int i = 0; i < FLAGS_total_keys; ++i) { std::string i_str = ToString(i); std::string key = "k" + i_str; std::string value = "v" + i_str; max_str = max_str > i_str ? max_str : i_str; db->Put(write_options, key, value); } std::string last_key = "k" + max_str; std::string last_value = "v" + max_str; { // Get get_perf_context()->Reset(); std::string value; ASSERT_OK(db->Get(read_options, "k0", &value)); ASSERT_EQ(value, "v0"); if (FLAGS_verbose) { std::cout << "Get CPU time nanos: " << get_perf_context()->get_cpu_nanos << "ns\n"; } // Iter std::unique_ptr iter(db->NewIterator(read_options)); // Seek get_perf_context()->Reset(); iter->Seek(last_key); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(last_value, iter->value().ToString()); if (FLAGS_verbose) { std::cout << "Iter Seek CPU time nanos: " << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; } // SeekForPrev get_perf_context()->Reset(); iter->SeekForPrev(last_key); ASSERT_TRUE(iter->Valid()); if (FLAGS_verbose) { std::cout << "Iter SeekForPrev CPU time nanos: " << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; } // SeekToLast get_perf_context()->Reset(); iter->SeekToLast(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(last_value, iter->value().ToString()); if (FLAGS_verbose) { std::cout << "Iter SeekToLast CPU time nanos: " << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; } // SeekToFirst get_perf_context()->Reset(); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); if (FLAGS_verbose) { std::cout << "Iter SeekToFirst CPU time nanos: " << get_perf_context()->iter_seek_cpu_nanos << "ns\n"; } // Next get_perf_context()->Reset(); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v1", iter->value().ToString()); if (FLAGS_verbose) { std::cout << "Iter Next CPU time nanos: " << get_perf_context()->iter_next_cpu_nanos << "ns\n"; } // Prev get_perf_context()->Reset(); iter->Prev(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v0", iter->value().ToString()); if (FLAGS_verbose) { std::cout << "Iter Prev CPU time nanos: " << get_perf_context()->iter_prev_cpu_nanos << "ns\n"; } // monotonically increasing get_perf_context()->Reset(); auto count = get_perf_context()->iter_seek_cpu_nanos; for (int i = 0; i < FLAGS_total_keys; ++i) { iter->Seek("k" + ToString(i)); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("v" + ToString(i), iter->value().ToString()); auto next_count = get_perf_context()->iter_seek_cpu_nanos; ASSERT_GT(next_count, count); count = next_count; } // iterator creation/destruction; multiple iterators { std::unique_ptr iter2(db->NewIterator(read_options)); ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos); iter2->Seek(last_key); ASSERT_TRUE(iter2->Valid()); ASSERT_EQ(last_value, iter2->value().ToString()); ASSERT_GT(get_perf_context()->iter_seek_cpu_nanos, count); count = get_perf_context()->iter_seek_cpu_nanos; } ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); for (int i = 1; i < argc; i++) { int n; char junk; if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) { FLAGS_write_buffer_size = n; } if (sscanf(argv[i], "--total_keys=%d%c", &n, &junk) == 1) { FLAGS_total_keys = n; } if (sscanf(argv[i], "--random_key=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_random_key = n; } if (sscanf(argv[i], "--use_set_based_memetable=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_use_set_based_memetable = n; } if (sscanf(argv[i], "--verbose=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_verbose = n; } } if (FLAGS_verbose) { std::cout << kDbName << "\n"; } return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/pinned_iterators_manager.h000066400000000000000000000047321370372246700210540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include #include #include #include "table/internal_iterator.h" namespace ROCKSDB_NAMESPACE { // PinnedIteratorsManager will be notified whenever we need to pin an Iterator // and it will be responsible for deleting pinned Iterators when they are // not needed anymore. class PinnedIteratorsManager : public Cleanable { public: PinnedIteratorsManager() : pinning_enabled(false) {} ~PinnedIteratorsManager() { if (pinning_enabled) { ReleasePinnedData(); } } // Enable Iterators pinning void StartPinning() { assert(pinning_enabled == false); pinning_enabled = true; } // Is pinning enabled ? bool PinningEnabled() { return pinning_enabled; } // Take ownership of iter and delete it when ReleasePinnedData() is called void PinIterator(InternalIterator* iter, bool arena = false) { if (arena) { PinPtr(iter, &PinnedIteratorsManager::ReleaseArenaInternalIterator); } else { PinPtr(iter, &PinnedIteratorsManager::ReleaseInternalIterator); } } typedef void (*ReleaseFunction)(void* arg1); void PinPtr(void* ptr, ReleaseFunction release_func) { assert(pinning_enabled); if (ptr == nullptr) { return; } pinned_ptrs_.emplace_back(ptr, release_func); } // Release pinned Iterators inline void ReleasePinnedData() { assert(pinning_enabled == true); pinning_enabled = false; // Remove duplicate pointers std::sort(pinned_ptrs_.begin(), pinned_ptrs_.end()); auto unique_end = std::unique(pinned_ptrs_.begin(), pinned_ptrs_.end()); for (auto i = pinned_ptrs_.begin(); i != unique_end; ++i) { void* ptr = i->first; ReleaseFunction release_func = i->second; release_func(ptr); } pinned_ptrs_.clear(); // Also do cleanups from the base Cleanable Cleanable::Reset(); } private: static void ReleaseInternalIterator(void* ptr) { delete reinterpret_cast(ptr); } static void ReleaseArenaInternalIterator(void* ptr) { reinterpret_cast(ptr)->~InternalIterator(); } bool pinning_enabled; std::vector> pinned_ptrs_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/plain_table_db_test.cc000066400000000000000000001352601370372246700201260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include #include #include "db/db_impl/db_impl.h" #include "db/version_set.h" #include "db/write_batch_internal.h" #include "file/filename.h" #include "logging/logging.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "table/meta_blocks.h" #include "table/plain/plain_table_bloom.h" #include "table/plain/plain_table_factory.h" #include "table/plain/plain_table_key_coding.h" #include "table/plain/plain_table_reader.h" #include "table/table_builder.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/hash.h" #include "util/mutexlock.h" #include "util/string_util.h" #include "utilities/merge_operators.h" using std::unique_ptr; namespace ROCKSDB_NAMESPACE { class PlainTableKeyDecoderTest : public testing::Test {}; TEST_F(PlainTableKeyDecoderTest, ReadNonMmap) { std::string tmp; Random rnd(301); const uint32_t kLength = 2222; Slice contents = test::RandomString(&rnd, kLength, &tmp); test::StringSource* string_source = new test::StringSource(contents, 0, false); std::unique_ptr file_reader( test::GetRandomAccessFileReader(string_source)); std::unique_ptr file_info( new PlainTableReaderFileInfo(std::move(file_reader), EnvOptions(), kLength)); { PlainTableFileReader reader(file_info.get()); const uint32_t kReadSize = 77; for (uint32_t pos = 0; pos < kLength; pos += kReadSize) { uint32_t read_size = std::min(kLength - pos, kReadSize); Slice out; ASSERT_TRUE(reader.Read(pos, read_size, &out)); ASSERT_EQ(0, out.compare(tmp.substr(pos, read_size))); } ASSERT_LT(uint32_t(string_source->total_reads()), kLength / kReadSize / 2); } std::vector>> reads = { {{600, 30}, {590, 30}, {600, 20}, {600, 40}}, {{800, 20}, {100, 20}, {500, 20}, {1500, 20}, {100, 20}, {80, 20}}, {{1000, 20}, {500, 20}, {1000, 50}}, {{1000, 20}, {500, 20}, {500, 20}}, {{1000, 20}, {500, 20}, {200, 20}, {500, 20}}, {{1000, 20}, {500, 20}, {200, 20}, {1000, 50}}, {{600, 500}, {610, 20}, {100, 20}}, {{500, 100}, {490, 100}, {550, 50}}, }; std::vector num_file_reads = {2, 6, 2, 2, 4, 3, 2, 2}; for (size_t i = 0; i < reads.size(); i++) { string_source->set_total_reads(0); PlainTableFileReader reader(file_info.get()); for (auto p : reads[i]) { Slice out; ASSERT_TRUE(reader.Read(p.first, p.second, &out)); ASSERT_EQ(0, out.compare(tmp.substr(p.first, p.second))); } ASSERT_EQ(num_file_reads[i], string_source->total_reads()); } } class PlainTableDBTest : public testing::Test, public testing::WithParamInterface { protected: private: std::string dbname_; Env* env_; DB* db_; bool mmap_mode_; Options last_options_; public: PlainTableDBTest() : env_(Env::Default()) {} ~PlainTableDBTest() override { delete db_; EXPECT_OK(DestroyDB(dbname_, Options())); } void SetUp() override { mmap_mode_ = GetParam(); dbname_ = test::PerThreadDBPath("plain_table_db_test"); EXPECT_OK(DestroyDB(dbname_, Options())); db_ = nullptr; Reopen(); } // Return the current option configuration. Options CurrentOptions() { Options options; PlainTableOptions plain_table_options; plain_table_options.user_key_len = 0; plain_table_options.bloom_bits_per_key = 2; plain_table_options.hash_table_ratio = 0.8; plain_table_options.index_sparseness = 3; plain_table_options.huge_page_tlb_size = 0; plain_table_options.encoding_type = kPrefix; plain_table_options.full_scan_mode = false; plain_table_options.store_index_in_file = false; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.allow_mmap_reads = mmap_mode_; options.allow_concurrent_memtable_write = false; options.unordered_write = false; return options; } DBImpl* dbfull() { return reinterpret_cast(db_); } void Reopen(Options* options = nullptr) { ASSERT_OK(TryReopen(options)); } void Close() { delete db_; db_ = nullptr; } bool mmap_mode() const { return mmap_mode_; } void DestroyAndReopen(Options* options = nullptr) { //Destroy using last options Destroy(&last_options_); ASSERT_OK(TryReopen(options)); } void Destroy(Options* options) { delete db_; db_ = nullptr; ASSERT_OK(DestroyDB(dbname_, *options)); } Status PureReopen(Options* options, DB** db) { return DB::Open(*options, dbname_, db); } Status ReopenForReadOnly(Options* options) { delete db_; db_ = nullptr; return DB::OpenForReadOnly(*options, dbname_, &db_); } Status TryReopen(Options* options = nullptr) { delete db_; db_ = nullptr; Options opts; if (options != nullptr) { opts = *options; } else { opts = CurrentOptions(); opts.create_if_missing = true; } last_options_ = opts; return DB::Open(opts, dbname_, &db_); } Status Put(const Slice& k, const Slice& v) { return db_->Put(WriteOptions(), k, v); } Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); } std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) { ReadOptions options; options.snapshot = snapshot; std::string result; Status s = db_->Get(options, k, &result); if (s.IsNotFound()) { result = "NOT_FOUND"; } else if (!s.ok()) { result = s.ToString(); } return result; } int NumTableFilesAtLevel(int level) { std::string property; EXPECT_TRUE(db_->GetProperty( "rocksdb.num-files-at-level" + NumberToString(level), &property)); return atoi(property.c_str()); } // Return spread of files per level std::string FilesPerLevel() { std::string result; size_t last_non_zero_offset = 0; for (int level = 0; level < db_->NumberLevels(); level++) { int f = NumTableFilesAtLevel(level); char buf[100]; snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); result += buf; if (f > 0) { last_non_zero_offset = result.size(); } } result.resize(last_non_zero_offset); return result; } std::string IterStatus(Iterator* iter) { std::string result; if (iter->Valid()) { result = iter->key().ToString() + "->" + iter->value().ToString(); } else { result = "(invalid)"; } return result; } }; TEST_P(PlainTableDBTest, Empty) { ASSERT_TRUE(dbfull() != nullptr); ASSERT_EQ("NOT_FOUND", Get("0000000000000foo")); } extern const uint64_t kPlainTableMagicNumber; class TestPlainTableReader : public PlainTableReader { public: TestPlainTableReader(const EnvOptions& env_options, const InternalKeyComparator& icomparator, EncodingType encoding_type, uint64_t file_size, int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness, const TableProperties* table_properties, std::unique_ptr&& file, const ImmutableCFOptions& ioptions, const SliceTransform* prefix_extractor, bool* expect_bloom_not_match, bool store_index_in_file, uint32_t column_family_id, const std::string& column_family_name) : PlainTableReader(ioptions, std::move(file), env_options, icomparator, encoding_type, file_size, table_properties, prefix_extractor), expect_bloom_not_match_(expect_bloom_not_match) { Status s = MmapDataIfNeeded(); EXPECT_TRUE(s.ok()); s = PopulateIndex(const_cast(table_properties), bloom_bits_per_key, hash_table_ratio, index_sparseness, 2 * 1024 * 1024); EXPECT_TRUE(s.ok()); TableProperties* props = const_cast(table_properties); EXPECT_EQ(column_family_id, static_cast(props->column_family_id)); EXPECT_EQ(column_family_name, props->column_family_name); if (store_index_in_file) { auto bloom_version_ptr = props->user_collected_properties.find( PlainTablePropertyNames::kBloomVersion); EXPECT_TRUE(bloom_version_ptr != props->user_collected_properties.end()); EXPECT_EQ(bloom_version_ptr->second, std::string("1")); if (ioptions.bloom_locality > 0) { auto num_blocks_ptr = props->user_collected_properties.find( PlainTablePropertyNames::kNumBloomBlocks); EXPECT_TRUE(num_blocks_ptr != props->user_collected_properties.end()); } } table_properties_.reset(props); } ~TestPlainTableReader() override {} private: bool MatchBloom(uint32_t hash) const override { bool ret = PlainTableReader::MatchBloom(hash); if (*expect_bloom_not_match_) { EXPECT_TRUE(!ret); } else { EXPECT_TRUE(ret); } return ret; } bool* expect_bloom_not_match_; }; extern const uint64_t kPlainTableMagicNumber; class TestPlainTableFactory : public PlainTableFactory { public: explicit TestPlainTableFactory(bool* expect_bloom_not_match, const PlainTableOptions& options, uint32_t column_family_id, std::string column_family_name) : PlainTableFactory(options), bloom_bits_per_key_(options.bloom_bits_per_key), hash_table_ratio_(options.hash_table_ratio), index_sparseness_(options.index_sparseness), store_index_in_file_(options.store_index_in_file), expect_bloom_not_match_(expect_bloom_not_match), column_family_id_(column_family_id), column_family_name_(std::move(column_family_name)) {} Status NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool /*prefetch_index_and_filter_in_cache*/) const override { TableProperties* props = nullptr; auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, table_reader_options.ioptions, &props, true /* compression_type_missing */); EXPECT_TRUE(s.ok()); if (store_index_in_file_) { BlockHandle bloom_block_handle; s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber, table_reader_options.ioptions, BloomBlockBuilder::kBloomBlock, &bloom_block_handle, /* compression_type_missing */ true); EXPECT_TRUE(s.ok()); BlockHandle index_block_handle; s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber, table_reader_options.ioptions, PlainTableIndexBuilder::kPlainTableIndexBlock, &index_block_handle, /* compression_type_missing */ true); EXPECT_TRUE(s.ok()); } auto& user_props = props->user_collected_properties; auto encoding_type_prop = user_props.find(PlainTablePropertyNames::kEncodingType); assert(encoding_type_prop != user_props.end()); EncodingType encoding_type = static_cast( DecodeFixed32(encoding_type_prop->second.c_str())); std::unique_ptr new_reader(new TestPlainTableReader( table_reader_options.env_options, table_reader_options.internal_comparator, encoding_type, file_size, bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, props, std::move(file), table_reader_options.ioptions, table_reader_options.prefix_extractor, expect_bloom_not_match_, store_index_in_file_, column_family_id_, column_family_name_)); *table = std::move(new_reader); return s; } private: int bloom_bits_per_key_; double hash_table_ratio_; size_t index_sparseness_; bool store_index_in_file_; bool* expect_bloom_not_match_; const uint32_t column_family_id_; const std::string column_family_name_; }; TEST_P(PlainTableDBTest, BadOptions1) { // Build with a prefix extractor ASSERT_OK(Put("1000000000000foo", "v1")); dbfull()->TEST_FlushMemTable(); // Bad attempt to re-open without a prefix extractor Options options = CurrentOptions(); options.prefix_extractor.reset(); ASSERT_EQ( "Invalid argument: Prefix extractor is missing when opening a PlainTable " "built using a prefix extractor", TryReopen(&options).ToString()); // Bad attempt to re-open with different prefix extractor options.prefix_extractor.reset(NewFixedPrefixTransform(6)); ASSERT_EQ( "Invalid argument: Prefix extractor given doesn't match the one used to " "build PlainTable", TryReopen(&options).ToString()); // Correct prefix extractor options.prefix_extractor.reset(NewFixedPrefixTransform(8)); Reopen(&options); ASSERT_EQ("v1", Get("1000000000000foo")); } TEST_P(PlainTableDBTest, BadOptions2) { Options options = CurrentOptions(); options.prefix_extractor.reset(); options.create_if_missing = true; DestroyAndReopen(&options); // Build without a prefix extractor // (apparently works even if hash_table_ratio > 0) ASSERT_OK(Put("1000000000000foo", "v1")); dbfull()->TEST_FlushMemTable(); // Bad attempt to re-open with hash_table_ratio > 0 and no prefix extractor Status s = TryReopen(&options); ASSERT_EQ( "Not implemented: PlainTable requires a prefix extractor enable prefix " "hash mode.", s.ToString()); // OK to open with hash_table_ratio == 0 and no prefix extractor PlainTableOptions plain_table_options; plain_table_options.hash_table_ratio = 0; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); Reopen(&options); ASSERT_EQ("v1", Get("1000000000000foo")); // OK to open newly with a prefix_extractor and hash table; builds index // in memory. options = CurrentOptions(); Reopen(&options); ASSERT_EQ("v1", Get("1000000000000foo")); } TEST_P(PlainTableDBTest, Flush) { for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; huge_page_tlb_size += 2 * 1024 * 1024) { for (EncodingType encoding_type : {kPlain, kPrefix}) { for (int bloom = -1; bloom <= 117; bloom += 117) { const int bloom_bits = std::max(bloom, 0); const bool full_scan_mode = bloom < 0; for (int total_order = 0; total_order <= 1; total_order++) { for (int store_index_in_file = 0; store_index_in_file <= 1; ++store_index_in_file) { Options options = CurrentOptions(); options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 if (total_order) { options.prefix_extractor.reset(); PlainTableOptions plain_table_options; plain_table_options.user_key_len = 0; plain_table_options.bloom_bits_per_key = bloom_bits; plain_table_options.hash_table_ratio = 0; plain_table_options.index_sparseness = 2; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; plain_table_options.full_scan_mode = full_scan_mode; plain_table_options.store_index_in_file = store_index_in_file; options.table_factory.reset( NewPlainTableFactory(plain_table_options)); } else { PlainTableOptions plain_table_options; plain_table_options.user_key_len = 0; plain_table_options.bloom_bits_per_key = bloom_bits; plain_table_options.hash_table_ratio = 0.75; plain_table_options.index_sparseness = 16; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; plain_table_options.full_scan_mode = full_scan_mode; plain_table_options.store_index_in_file = store_index_in_file; options.table_factory.reset( NewPlainTableFactory(plain_table_options)); } DestroyAndReopen(&options); uint64_t int_num; ASSERT_TRUE(dbfull()->GetIntProperty( "rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_EQ(int_num, 0U); ASSERT_OK(Put("1000000000000foo", "v1")); ASSERT_OK(Put("0000000000000bar", "v2")); ASSERT_OK(Put("1000000000000foo", "v3")); dbfull()->TEST_FlushMemTable(); ASSERT_TRUE(dbfull()->GetIntProperty( "rocksdb.estimate-table-readers-mem", &int_num)); ASSERT_GT(int_num, 0U); TablePropertiesCollection ptc; reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); ASSERT_EQ(1U, ptc.size()); auto row = ptc.begin(); auto tp = row->second; if (full_scan_mode) { // Does not support Get/Seek std::unique_ptr iter(dbfull()->NewIterator(ReadOptions())); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("0000000000000bar", iter->key().ToString()); ASSERT_EQ("v2", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000000foo", iter->key().ToString()); ASSERT_EQ("v3", iter->value().ToString()); iter->Next(); ASSERT_TRUE(!iter->Valid()); ASSERT_TRUE(iter->status().ok()); } else { if (!store_index_in_file) { ASSERT_EQ(total_order ? "4" : "12", (tp->user_collected_properties) .at("plain_table_hash_table_size")); ASSERT_EQ("0", (tp->user_collected_properties) .at("plain_table_sub_index_size")); } else { ASSERT_EQ("0", (tp->user_collected_properties) .at("plain_table_hash_table_size")); ASSERT_EQ("0", (tp->user_collected_properties) .at("plain_table_sub_index_size")); } ASSERT_EQ("v3", Get("1000000000000foo")); ASSERT_EQ("v2", Get("0000000000000bar")); } } } } } } } TEST_P(PlainTableDBTest, Flush2) { for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; huge_page_tlb_size += 2 * 1024 * 1024) { for (EncodingType encoding_type : {kPlain, kPrefix}) { for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { for (int total_order = 0; total_order <= 1; total_order++) { for (int store_index_in_file = 0; store_index_in_file <= 1; ++store_index_in_file) { if (encoding_type == kPrefix && total_order) { continue; } if (!bloom_bits && store_index_in_file) { continue; } if (total_order && store_index_in_file) { continue; } bool expect_bloom_not_match = false; Options options = CurrentOptions(); options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 PlainTableOptions plain_table_options; if (total_order) { options.prefix_extractor = nullptr; plain_table_options.hash_table_ratio = 0; plain_table_options.index_sparseness = 2; } else { plain_table_options.hash_table_ratio = 0.75; plain_table_options.index_sparseness = 16; } plain_table_options.user_key_len = kPlainTableVariableLength; plain_table_options.bloom_bits_per_key = bloom_bits; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; plain_table_options.store_index_in_file = store_index_in_file; options.table_factory.reset(new TestPlainTableFactory( &expect_bloom_not_match, plain_table_options, 0 /* column_family_id */, kDefaultColumnFamilyName)); DestroyAndReopen(&options); ASSERT_OK(Put("0000000000000bar", "b")); ASSERT_OK(Put("1000000000000foo", "v1")); dbfull()->TEST_FlushMemTable(); ASSERT_OK(Put("1000000000000foo", "v2")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v2", Get("1000000000000foo")); ASSERT_OK(Put("0000000000000eee", "v3")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v3", Get("0000000000000eee")); ASSERT_OK(Delete("0000000000000bar")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("NOT_FOUND", Get("0000000000000bar")); ASSERT_OK(Put("0000000000000eee", "v5")); ASSERT_OK(Put("9000000000000eee", "v5")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v5", Get("0000000000000eee")); // Test Bloom Filter if (bloom_bits > 0) { // Neither key nor value should exist. expect_bloom_not_match = true; ASSERT_EQ("NOT_FOUND", Get("5_not00000000bar")); // Key doesn't exist any more but prefix exists. if (total_order) { ASSERT_EQ("NOT_FOUND", Get("1000000000000not")); ASSERT_EQ("NOT_FOUND", Get("0000000000000not")); } expect_bloom_not_match = false; } } } } } } } TEST_P(PlainTableDBTest, Immortal) { for (EncodingType encoding_type : {kPlain, kPrefix}) { Options options = CurrentOptions(); options.create_if_missing = true; options.max_open_files = -1; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 PlainTableOptions plain_table_options; plain_table_options.hash_table_ratio = 0.75; plain_table_options.index_sparseness = 16; plain_table_options.user_key_len = kPlainTableVariableLength; plain_table_options.bloom_bits_per_key = 10; plain_table_options.encoding_type = encoding_type; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); DestroyAndReopen(&options); ASSERT_OK(Put("0000000000000bar", "b")); ASSERT_OK(Put("1000000000000foo", "v1")); dbfull()->TEST_FlushMemTable(); int copied = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "GetContext::SaveValue::PinSelf", [&](void* /*arg*/) { copied++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); ASSERT_EQ("b", Get("0000000000000bar")); ASSERT_EQ("v1", Get("1000000000000foo")); ASSERT_EQ(2, copied); copied = 0; Close(); ASSERT_OK(ReopenForReadOnly(&options)); ASSERT_EQ("b", Get("0000000000000bar")); ASSERT_EQ("v1", Get("1000000000000foo")); ASSERT_EQ("NOT_FOUND", Get("1000000000000bar")); if (mmap_mode()) { ASSERT_EQ(0, copied); } else { ASSERT_EQ(2, copied); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_P(PlainTableDBTest, Iterator) { for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; huge_page_tlb_size += 2 * 1024 * 1024) { for (EncodingType encoding_type : {kPlain, kPrefix}) { for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { for (int total_order = 0; total_order <= 1; total_order++) { if (encoding_type == kPrefix && total_order == 1) { continue; } bool expect_bloom_not_match = false; Options options = CurrentOptions(); options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 if (total_order) { options.prefix_extractor = nullptr; PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = bloom_bits; plain_table_options.hash_table_ratio = 0; plain_table_options.index_sparseness = 2; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; options.table_factory.reset(new TestPlainTableFactory( &expect_bloom_not_match, plain_table_options, 0 /* column_family_id */, kDefaultColumnFamilyName)); } else { PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = bloom_bits; plain_table_options.hash_table_ratio = 0.75; plain_table_options.index_sparseness = 16; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; options.table_factory.reset(new TestPlainTableFactory( &expect_bloom_not_match, plain_table_options, 0 /* column_family_id */, kDefaultColumnFamilyName)); } DestroyAndReopen(&options); ASSERT_OK(Put("1000000000foo002", "v_2")); ASSERT_OK(Put("0000000000000bar", "random")); ASSERT_OK(Put("1000000000foo001", "v1")); ASSERT_OK(Put("3000000000000bar", "bar_v")); ASSERT_OK(Put("1000000000foo003", "v__3")); ASSERT_OK(Put("1000000000foo004", "v__4")); ASSERT_OK(Put("1000000000foo005", "v__5")); ASSERT_OK(Put("1000000000foo007", "v__7")); ASSERT_OK(Put("1000000000foo008", "v__8")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v1", Get("1000000000foo001")); ASSERT_EQ("v__3", Get("1000000000foo003")); Iterator* iter = dbfull()->NewIterator(ReadOptions()); iter->Seek("1000000000foo000"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo001", iter->key().ToString()); ASSERT_EQ("v1", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo002", iter->key().ToString()); ASSERT_EQ("v_2", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo003", iter->key().ToString()); ASSERT_EQ("v__3", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo004", iter->key().ToString()); ASSERT_EQ("v__4", iter->value().ToString()); iter->Seek("3000000000000bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("3000000000000bar", iter->key().ToString()); ASSERT_EQ("bar_v", iter->value().ToString()); iter->Seek("1000000000foo000"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo001", iter->key().ToString()); ASSERT_EQ("v1", iter->value().ToString()); iter->Seek("1000000000foo005"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo005", iter->key().ToString()); ASSERT_EQ("v__5", iter->value().ToString()); iter->Seek("1000000000foo006"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo007", iter->key().ToString()); ASSERT_EQ("v__7", iter->value().ToString()); iter->Seek("1000000000foo008"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo008", iter->key().ToString()); ASSERT_EQ("v__8", iter->value().ToString()); if (total_order == 0) { iter->Seek("1000000000foo009"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("3000000000000bar", iter->key().ToString()); } // Test Bloom Filter if (bloom_bits > 0) { if (!total_order) { // Neither key nor value should exist. expect_bloom_not_match = true; iter->Seek("2not000000000bar"); ASSERT_TRUE(!iter->Valid()); ASSERT_EQ("NOT_FOUND", Get("2not000000000bar")); expect_bloom_not_match = false; } else { expect_bloom_not_match = true; ASSERT_EQ("NOT_FOUND", Get("2not000000000bar")); expect_bloom_not_match = false; } } delete iter; } } } } } namespace { std::string NthKey(size_t n, char filler) { std::string rv(16, filler); rv[0] = n % 10; rv[1] = (n / 10) % 10; rv[2] = (n / 100) % 10; rv[3] = (n / 1000) % 10; return rv; } } // anonymous namespace TEST_P(PlainTableDBTest, BloomSchema) { Options options = CurrentOptions(); options.create_if_missing = true; for (int bloom_locality = 0; bloom_locality <= 1; bloom_locality++) { options.bloom_locality = bloom_locality; PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = 3; // high FP rate for test plain_table_options.hash_table_ratio = 0.75; plain_table_options.index_sparseness = 16; plain_table_options.huge_page_tlb_size = 0; plain_table_options.encoding_type = kPlain; bool expect_bloom_not_match = false; options.table_factory.reset(new TestPlainTableFactory( &expect_bloom_not_match, plain_table_options, 0 /* column_family_id */, kDefaultColumnFamilyName)); DestroyAndReopen(&options); for (unsigned i = 0; i < 2345; ++i) { ASSERT_OK(Put(NthKey(i, 'y'), "added")); } dbfull()->TEST_FlushMemTable(); ASSERT_EQ("added", Get(NthKey(42, 'y'))); for (unsigned i = 0; i < 32; ++i) { // Known pattern of Bloom filter false positives can detect schema change // with high probability. Known FPs stuffed into bits: uint32_t pattern; if (!bloom_locality) { pattern = 1785868347UL; } else if (CACHE_LINE_SIZE == 64U) { pattern = 2421694657UL; } else if (CACHE_LINE_SIZE == 128U) { pattern = 788710956UL; } else { ASSERT_EQ(CACHE_LINE_SIZE, 256U); pattern = 163905UL; } bool expect_fp = pattern & (1UL << i); // fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp); expect_bloom_not_match = !expect_fp; ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n'))); } } } namespace { std::string MakeLongKey(size_t length, char c) { return std::string(length, c); } } // namespace TEST_P(PlainTableDBTest, IteratorLargeKeys) { Options options = CurrentOptions(); PlainTableOptions plain_table_options; plain_table_options.user_key_len = 0; plain_table_options.bloom_bits_per_key = 0; plain_table_options.hash_table_ratio = 0; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); options.create_if_missing = true; options.prefix_extractor.reset(); DestroyAndReopen(&options); std::string key_list[] = { MakeLongKey(30, '0'), MakeLongKey(16, '1'), MakeLongKey(32, '2'), MakeLongKey(60, '3'), MakeLongKey(90, '4'), MakeLongKey(50, '5'), MakeLongKey(26, '6') }; for (size_t i = 0; i < 7; i++) { ASSERT_OK(Put(key_list[i], ToString(i))); } dbfull()->TEST_FlushMemTable(); Iterator* iter = dbfull()->NewIterator(ReadOptions()); iter->Seek(key_list[0]); for (size_t i = 0; i < 7; i++) { ASSERT_TRUE(iter->Valid()); ASSERT_EQ(key_list[i], iter->key().ToString()); ASSERT_EQ(ToString(i), iter->value().ToString()); iter->Next(); } ASSERT_TRUE(!iter->Valid()); delete iter; } namespace { std::string MakeLongKeyWithPrefix(size_t length, char c) { return "00000000" + std::string(length - 8, c); } } // namespace TEST_P(PlainTableDBTest, IteratorLargeKeysWithPrefix) { Options options = CurrentOptions(); PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = 0; plain_table_options.hash_table_ratio = 0.8; plain_table_options.index_sparseness = 3; plain_table_options.huge_page_tlb_size = 0; plain_table_options.encoding_type = kPrefix; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); options.create_if_missing = true; DestroyAndReopen(&options); std::string key_list[] = { MakeLongKeyWithPrefix(30, '0'), MakeLongKeyWithPrefix(16, '1'), MakeLongKeyWithPrefix(32, '2'), MakeLongKeyWithPrefix(60, '3'), MakeLongKeyWithPrefix(90, '4'), MakeLongKeyWithPrefix(50, '5'), MakeLongKeyWithPrefix(26, '6')}; for (size_t i = 0; i < 7; i++) { ASSERT_OK(Put(key_list[i], ToString(i))); } dbfull()->TEST_FlushMemTable(); Iterator* iter = dbfull()->NewIterator(ReadOptions()); iter->Seek(key_list[0]); for (size_t i = 0; i < 7; i++) { ASSERT_TRUE(iter->Valid()); ASSERT_EQ(key_list[i], iter->key().ToString()); ASSERT_EQ(ToString(i), iter->value().ToString()); iter->Next(); } ASSERT_TRUE(!iter->Valid()); delete iter; } TEST_P(PlainTableDBTest, IteratorReverseSuffixComparator) { Options options = CurrentOptions(); options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 test::SimpleSuffixReverseComparator comp; options.comparator = ∁ DestroyAndReopen(&options); ASSERT_OK(Put("1000000000foo002", "v_2")); ASSERT_OK(Put("0000000000000bar", "random")); ASSERT_OK(Put("1000000000foo001", "v1")); ASSERT_OK(Put("3000000000000bar", "bar_v")); ASSERT_OK(Put("1000000000foo003", "v__3")); ASSERT_OK(Put("1000000000foo004", "v__4")); ASSERT_OK(Put("1000000000foo005", "v__5")); ASSERT_OK(Put("1000000000foo007", "v__7")); ASSERT_OK(Put("1000000000foo008", "v__8")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v1", Get("1000000000foo001")); ASSERT_EQ("v__3", Get("1000000000foo003")); Iterator* iter = dbfull()->NewIterator(ReadOptions()); iter->Seek("1000000000foo009"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo008", iter->key().ToString()); ASSERT_EQ("v__8", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo007", iter->key().ToString()); ASSERT_EQ("v__7", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo005", iter->key().ToString()); ASSERT_EQ("v__5", iter->value().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo004", iter->key().ToString()); ASSERT_EQ("v__4", iter->value().ToString()); iter->Seek("3000000000000bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("3000000000000bar", iter->key().ToString()); ASSERT_EQ("bar_v", iter->value().ToString()); iter->Seek("1000000000foo005"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo005", iter->key().ToString()); ASSERT_EQ("v__5", iter->value().ToString()); iter->Seek("1000000000foo006"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo005", iter->key().ToString()); ASSERT_EQ("v__5", iter->value().ToString()); iter->Seek("1000000000foo008"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("1000000000foo008", iter->key().ToString()); ASSERT_EQ("v__8", iter->value().ToString()); iter->Seek("1000000000foo000"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("3000000000000bar", iter->key().ToString()); delete iter; } TEST_P(PlainTableDBTest, HashBucketConflict) { for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; huge_page_tlb_size += 2 * 1024 * 1024) { for (unsigned char i = 1; i <= 3; i++) { Options options = CurrentOptions(); options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = 0; plain_table_options.hash_table_ratio = 0; plain_table_options.index_sparseness = 2 ^ i; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); DestroyAndReopen(&options); ASSERT_OK(Put("5000000000000fo0", "v1")); ASSERT_OK(Put("5000000000000fo1", "v2")); ASSERT_OK(Put("5000000000000fo2", "v")); ASSERT_OK(Put("2000000000000fo0", "v3")); ASSERT_OK(Put("2000000000000fo1", "v4")); ASSERT_OK(Put("2000000000000fo2", "v")); ASSERT_OK(Put("2000000000000fo3", "v")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v1", Get("5000000000000fo0")); ASSERT_EQ("v2", Get("5000000000000fo1")); ASSERT_EQ("v3", Get("2000000000000fo0")); ASSERT_EQ("v4", Get("2000000000000fo1")); ASSERT_EQ("NOT_FOUND", Get("5000000000000bar")); ASSERT_EQ("NOT_FOUND", Get("2000000000000bar")); ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8")); ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8")); ReadOptions ro; Iterator* iter = dbfull()->NewIterator(ro); iter->Seek("5000000000000fo0"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo0", iter->key().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo1", iter->key().ToString()); iter->Seek("5000000000000fo1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo1", iter->key().ToString()); iter->Seek("2000000000000fo0"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo0", iter->key().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo1", iter->key().ToString()); iter->Seek("2000000000000fo1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo1", iter->key().ToString()); iter->Seek("2000000000000bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo0", iter->key().ToString()); iter->Seek("5000000000000bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo0", iter->key().ToString()); iter->Seek("2000000000000fo8"); ASSERT_TRUE(!iter->Valid() || options.comparator->Compare(iter->key(), "20000001") > 0); iter->Seek("5000000000000fo8"); ASSERT_TRUE(!iter->Valid()); iter->Seek("1000000000000fo2"); ASSERT_TRUE(!iter->Valid()); iter->Seek("3000000000000fo2"); ASSERT_TRUE(!iter->Valid()); iter->Seek("8000000000000fo2"); ASSERT_TRUE(!iter->Valid()); delete iter; } } } TEST_P(PlainTableDBTest, HashBucketConflictReverseSuffixComparator) { for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; huge_page_tlb_size += 2 * 1024 * 1024) { for (unsigned char i = 1; i <= 3; i++) { Options options = CurrentOptions(); options.create_if_missing = true; test::SimpleSuffixReverseComparator comp; options.comparator = ∁ // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = 0; plain_table_options.hash_table_ratio = 0; plain_table_options.index_sparseness = 2 ^ i; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); DestroyAndReopen(&options); ASSERT_OK(Put("5000000000000fo0", "v1")); ASSERT_OK(Put("5000000000000fo1", "v2")); ASSERT_OK(Put("5000000000000fo2", "v")); ASSERT_OK(Put("2000000000000fo0", "v3")); ASSERT_OK(Put("2000000000000fo1", "v4")); ASSERT_OK(Put("2000000000000fo2", "v")); ASSERT_OK(Put("2000000000000fo3", "v")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v1", Get("5000000000000fo0")); ASSERT_EQ("v2", Get("5000000000000fo1")); ASSERT_EQ("v3", Get("2000000000000fo0")); ASSERT_EQ("v4", Get("2000000000000fo1")); ASSERT_EQ("NOT_FOUND", Get("5000000000000bar")); ASSERT_EQ("NOT_FOUND", Get("2000000000000bar")); ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8")); ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8")); ReadOptions ro; Iterator* iter = dbfull()->NewIterator(ro); iter->Seek("5000000000000fo1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo1", iter->key().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo0", iter->key().ToString()); iter->Seek("5000000000000fo1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo1", iter->key().ToString()); iter->Seek("2000000000000fo1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo1", iter->key().ToString()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo0", iter->key().ToString()); iter->Seek("2000000000000fo1"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo1", iter->key().ToString()); iter->Seek("2000000000000var"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("2000000000000fo3", iter->key().ToString()); iter->Seek("5000000000000var"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo2", iter->key().ToString()); std::string seek_key = "2000000000000bar"; iter->Seek(seek_key); ASSERT_TRUE(!iter->Valid() || options.prefix_extractor->Transform(iter->key()) != options.prefix_extractor->Transform(seek_key)); iter->Seek("1000000000000fo2"); ASSERT_TRUE(!iter->Valid()); iter->Seek("3000000000000fo2"); ASSERT_TRUE(!iter->Valid()); iter->Seek("8000000000000fo2"); ASSERT_TRUE(!iter->Valid()); delete iter; } } } TEST_P(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) { Options options = CurrentOptions(); options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = 0; plain_table_options.hash_table_ratio = 0; plain_table_options.index_sparseness = 5; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); DestroyAndReopen(&options); ASSERT_OK(Put("5000000000000fo0", "v1")); ASSERT_OK(Put("5000000000000fo1", "v2")); ASSERT_OK(Put("5000000000000fo2", "v3")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v1", Get("5000000000000fo0")); ASSERT_EQ("v2", Get("5000000000000fo1")); ASSERT_EQ("v3", Get("5000000000000fo2")); ASSERT_EQ("NOT_FOUND", Get("8000000000000bar")); ASSERT_EQ("NOT_FOUND", Get("1000000000000bar")); Iterator* iter = dbfull()->NewIterator(ReadOptions()); iter->Seek("5000000000000bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("5000000000000fo0", iter->key().ToString()); iter->Seek("5000000000000fo8"); ASSERT_TRUE(!iter->Valid()); iter->Seek("1000000000000fo2"); ASSERT_TRUE(!iter->Valid()); iter->Seek("8000000000000fo2"); ASSERT_TRUE(!iter->Valid()); delete iter; } static std::string Key(int i) { char buf[100]; snprintf(buf, sizeof(buf), "key_______%06d", i); return std::string(buf); } static std::string RandomString(Random* rnd, int len) { std::string r; test::RandomString(rnd, len, &r); return r; } TEST_P(PlainTableDBTest, CompactionTrigger) { Options options = CurrentOptions(); options.write_buffer_size = 120 << 10; // 120KB options.num_levels = 3; options.level0_file_num_compaction_trigger = 3; Reopen(&options); Random rnd(301); for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; num++) { std::vector values; // Write 120KB (10 values, each 12K) for (int i = 0; i < 10; i++) { values.push_back(RandomString(&rnd, 12 << 10)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Put(Key(999), "")); dbfull()->TEST_WaitForFlushMemTable(); ASSERT_EQ(NumTableFilesAtLevel(0), num + 1); } //generate one more file in level-0, and should trigger level-0 compaction std::vector values; for (int i = 0; i < 12; i++) { values.push_back(RandomString(&rnd, 10000)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Put(Key(999), "")); dbfull()->TEST_WaitForCompact(); ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(1), 1); } TEST_P(PlainTableDBTest, AdaptiveTable) { Options options = CurrentOptions(); options.create_if_missing = true; options.table_factory.reset(NewPlainTableFactory()); DestroyAndReopen(&options); ASSERT_OK(Put("1000000000000foo", "v1")); ASSERT_OK(Put("0000000000000bar", "v2")); ASSERT_OK(Put("1000000000000foo", "v3")); dbfull()->TEST_FlushMemTable(); options.create_if_missing = false; std::shared_ptr block_based_factory( NewBlockBasedTableFactory()); std::shared_ptr plain_table_factory( NewPlainTableFactory()); std::shared_ptr dummy_factory; options.table_factory.reset(NewAdaptiveTableFactory( block_based_factory, block_based_factory, plain_table_factory)); Reopen(&options); ASSERT_EQ("v3", Get("1000000000000foo")); ASSERT_EQ("v2", Get("0000000000000bar")); ASSERT_OK(Put("2000000000000foo", "v4")); ASSERT_OK(Put("3000000000000bar", "v5")); dbfull()->TEST_FlushMemTable(); ASSERT_EQ("v4", Get("2000000000000foo")); ASSERT_EQ("v5", Get("3000000000000bar")); Reopen(&options); ASSERT_EQ("v3", Get("1000000000000foo")); ASSERT_EQ("v2", Get("0000000000000bar")); ASSERT_EQ("v4", Get("2000000000000foo")); ASSERT_EQ("v5", Get("3000000000000bar")); options.paranoid_checks = false; options.table_factory.reset(NewBlockBasedTableFactory()); Reopen(&options); ASSERT_NE("v3", Get("1000000000000foo")); options.paranoid_checks = false; options.table_factory.reset(NewPlainTableFactory()); Reopen(&options); ASSERT_NE("v5", Get("3000000000000bar")); } INSTANTIATE_TEST_CASE_P(PlainTableDBTest, PlainTableDBTest, ::testing::Bool()); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as plain table is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/pre_release_callback.h000066400000000000000000000032201370372246700201020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class DB; class PreReleaseCallback { public: virtual ~PreReleaseCallback() {} // Will be called while on the write thread after the write to the WAL and // before the write to memtable. This is useful if any operation needs to be // done before the write gets visible to the readers, or if we want to reduce // the overhead of locking by updating something sequentially while we are on // the write thread. If the callback fails, this function returns a non-OK // status, the sequence number will not be released, and same status will be // propagated to all the writers in the write group. // seq is the sequence number that is used for this write and will be // released. // is_mem_disabled is currently used for debugging purposes to assert that // the callback is done from the right write queue. // If non-zero, log_number indicates the WAL log to which we wrote. // index >= 0 specifies the order of callback in the same write thread. // total > index specifies the total number of callbacks in the same write // thread. Together with index, could be used to reduce the redundant // operations among the callbacks. virtual Status Callback(SequenceNumber seq, bool is_mem_disabled, uint64_t log_number, size_t index, size_t total) = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/prefix_test.cc000066400000000000000000000723511370372246700165050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #ifndef GFLAGS #include int main() { fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); return 0; } #else #include #include #include #include "db/db_impl/db_impl.h" #include "monitoring/histogram.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/filter_policy.h" #include "rocksdb/memtablerep.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "test_util/testharness.h" #include "util/coding.h" #include "util/gflags_compat.h" #include "util/random.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "utilities/merge_operators.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; DEFINE_bool(trigger_deadlock, false, "issue delete in range scan to trigger PrefixHashMap deadlock"); DEFINE_int32(bucket_count, 100000, "number of buckets"); DEFINE_uint64(num_locks, 10001, "number of locks"); DEFINE_bool(random_prefix, false, "randomize prefix"); DEFINE_uint64(total_prefixes, 100000, "total number of prefixes"); DEFINE_uint64(items_per_prefix, 1, "total number of values per prefix"); DEFINE_int64(write_buffer_size, 33554432, ""); DEFINE_int32(max_write_buffer_number, 2, ""); DEFINE_int32(min_write_buffer_number_to_merge, 1, ""); DEFINE_int32(skiplist_height, 4, ""); DEFINE_double(memtable_prefix_bloom_size_ratio, 0.1, ""); DEFINE_int32(memtable_huge_page_size, 2 * 1024 * 1024, ""); DEFINE_int32(value_size, 40, ""); DEFINE_bool(enable_print, false, "Print options generated to console."); // Path to the database on file system const std::string kDbName = ROCKSDB_NAMESPACE::test::PerThreadDBPath("prefix_test"); namespace ROCKSDB_NAMESPACE { struct TestKey { uint64_t prefix; uint64_t sorted; TestKey(uint64_t _prefix, uint64_t _sorted) : prefix(_prefix), sorted(_sorted) {} }; // return a slice backed by test_key inline Slice TestKeyToSlice(std::string &s, const TestKey& test_key) { s.clear(); PutFixed64(&s, test_key.prefix); PutFixed64(&s, test_key.sorted); return Slice(s.c_str(), s.size()); } inline const TestKey SliceToTestKey(const Slice& slice) { return TestKey(DecodeFixed64(slice.data()), DecodeFixed64(slice.data() + 8)); } class TestKeyComparator : public Comparator { public: // Compare needs to be aware of the possibility of a and/or b is // prefix only int Compare(const Slice& a, const Slice& b) const override { const TestKey kkey_a = SliceToTestKey(a); const TestKey kkey_b = SliceToTestKey(b); const TestKey *key_a = &kkey_a; const TestKey *key_b = &kkey_b; if (key_a->prefix != key_b->prefix) { if (key_a->prefix < key_b->prefix) return -1; if (key_a->prefix > key_b->prefix) return 1; } else { EXPECT_TRUE(key_a->prefix == key_b->prefix); // note, both a and b could be prefix only if (a.size() != b.size()) { // one of them is prefix EXPECT_TRUE( (a.size() == sizeof(uint64_t) && b.size() == sizeof(TestKey)) || (b.size() == sizeof(uint64_t) && a.size() == sizeof(TestKey))); if (a.size() < b.size()) return -1; if (a.size() > b.size()) return 1; } else { // both a and b are prefix if (a.size() == sizeof(uint64_t)) { return 0; } // both a and b are whole key EXPECT_TRUE(a.size() == sizeof(TestKey) && b.size() == sizeof(TestKey)); if (key_a->sorted < key_b->sorted) return -1; if (key_a->sorted > key_b->sorted) return 1; if (key_a->sorted == key_b->sorted) return 0; } } return 0; } bool operator()(const TestKey& a, const TestKey& b) const { std::string sa, sb; return Compare(TestKeyToSlice(sa, a), TestKeyToSlice(sb, b)) < 0; } const char* Name() const override { return "TestKeyComparator"; } void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const override {} void FindShortSuccessor(std::string* /*key*/) const override {} }; namespace { void PutKey(DB* db, WriteOptions write_options, uint64_t prefix, uint64_t suffix, const Slice& value) { TestKey test_key(prefix, suffix); std::string s; Slice key = TestKeyToSlice(s, test_key); ASSERT_OK(db->Put(write_options, key, value)); } void PutKey(DB* db, WriteOptions write_options, const TestKey& test_key, const Slice& value) { std::string s; Slice key = TestKeyToSlice(s, test_key); ASSERT_OK(db->Put(write_options, key, value)); } void MergeKey(DB* db, WriteOptions write_options, const TestKey& test_key, const Slice& value) { std::string s; Slice key = TestKeyToSlice(s, test_key); ASSERT_OK(db->Merge(write_options, key, value)); } void DeleteKey(DB* db, WriteOptions write_options, const TestKey& test_key) { std::string s; Slice key = TestKeyToSlice(s, test_key); ASSERT_OK(db->Delete(write_options, key)); } void SeekIterator(Iterator* iter, uint64_t prefix, uint64_t suffix) { TestKey test_key(prefix, suffix); std::string s; Slice key = TestKeyToSlice(s, test_key); iter->Seek(key); } const std::string kNotFoundResult = "NOT_FOUND"; std::string Get(DB* db, const ReadOptions& read_options, uint64_t prefix, uint64_t suffix) { TestKey test_key(prefix, suffix); std::string s2; Slice key = TestKeyToSlice(s2, test_key); std::string result; Status s = db->Get(read_options, key, &result); if (s.IsNotFound()) { result = kNotFoundResult; } else if (!s.ok()) { result = s.ToString(); } return result; } class SamePrefixTransform : public SliceTransform { private: const Slice prefix_; std::string name_; public: explicit SamePrefixTransform(const Slice& prefix) : prefix_(prefix), name_("rocksdb.SamePrefix." + prefix.ToString()) {} const char* Name() const override { return name_.c_str(); } Slice Transform(const Slice& src) const override { assert(InDomain(src)); return prefix_; } bool InDomain(const Slice& src) const override { if (src.size() >= prefix_.size()) { return Slice(src.data(), prefix_.size()) == prefix_; } return false; } bool InRange(const Slice& dst) const override { return dst == prefix_; } bool FullLengthEnabled(size_t* /*len*/) const override { return false; } }; } // namespace class PrefixTest : public testing::Test { public: std::shared_ptr OpenDb() { DB* db; options.create_if_missing = true; options.write_buffer_size = FLAGS_write_buffer_size; options.max_write_buffer_number = FLAGS_max_write_buffer_number; options.min_write_buffer_number_to_merge = FLAGS_min_write_buffer_number_to_merge; options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_prefix_bloom_size_ratio; options.memtable_huge_page_size = FLAGS_memtable_huge_page_size; options.prefix_extractor.reset(NewFixedPrefixTransform(8)); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); options.allow_concurrent_memtable_write = false; Status s = DB::Open(options, kDbName, &db); EXPECT_OK(s); return std::shared_ptr(db); } void FirstOption() { option_config_ = kBegin; } bool NextOptions(int bucket_count) { // skip some options option_config_++; if (option_config_ < kEnd) { options.prefix_extractor.reset(NewFixedPrefixTransform(8)); switch(option_config_) { case kHashSkipList: options.memtable_factory.reset( NewHashSkipListRepFactory(bucket_count, FLAGS_skiplist_height)); return true; case kHashLinkList: options.memtable_factory.reset( NewHashLinkListRepFactory(bucket_count)); return true; case kHashLinkListHugePageTlb: options.memtable_factory.reset( NewHashLinkListRepFactory(bucket_count, 2 * 1024 * 1024)); return true; case kHashLinkListTriggerSkipList: options.memtable_factory.reset( NewHashLinkListRepFactory(bucket_count, 0, 3)); return true; default: return false; } } return false; } PrefixTest() : option_config_(kBegin) { options.comparator = new TestKeyComparator(); } ~PrefixTest() override { delete options.comparator; } protected: enum OptionConfig { kBegin, kHashSkipList, kHashLinkList, kHashLinkListHugePageTlb, kHashLinkListTriggerSkipList, kEnd }; int option_config_; Options options; }; TEST(SamePrefixTest, InDomainTest) { DB* db; Options options; options.create_if_missing = true; options.prefix_extractor.reset(new SamePrefixTransform("HHKB")); BlockBasedTableOptions bbto; bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); bbto.whole_key_filtering = false; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); WriteOptions write_options; ReadOptions read_options; { ASSERT_OK(DestroyDB(kDbName, Options())); ASSERT_OK(DB::Open(options, kDbName, &db)); ASSERT_OK(db->Put(write_options, "HHKB pro2", "Mar 24, 2006")); ASSERT_OK(db->Put(write_options, "HHKB pro2 Type-S", "June 29, 2011")); ASSERT_OK(db->Put(write_options, "Realforce 87u", "idk")); db->Flush(FlushOptions()); std::string result; auto db_iter = db->NewIterator(ReadOptions()); db_iter->Seek("Realforce 87u"); ASSERT_TRUE(db_iter->Valid()); ASSERT_OK(db_iter->status()); ASSERT_EQ(db_iter->key(), "Realforce 87u"); ASSERT_EQ(db_iter->value(), "idk"); delete db_iter; delete db; ASSERT_OK(DestroyDB(kDbName, Options())); } { ASSERT_OK(DB::Open(options, kDbName, &db)); ASSERT_OK(db->Put(write_options, "pikachu", "1")); ASSERT_OK(db->Put(write_options, "Meowth", "1")); ASSERT_OK(db->Put(write_options, "Mewtwo", "idk")); db->Flush(FlushOptions()); std::string result; auto db_iter = db->NewIterator(ReadOptions()); db_iter->Seek("Mewtwo"); ASSERT_TRUE(db_iter->Valid()); ASSERT_OK(db_iter->status()); delete db_iter; delete db; ASSERT_OK(DestroyDB(kDbName, Options())); } } TEST_F(PrefixTest, TestResult) { for (int num_buckets = 1; num_buckets <= 2; num_buckets++) { FirstOption(); while (NextOptions(num_buckets)) { std::cout << "*** Mem table: " << options.memtable_factory->Name() << " number of buckets: " << num_buckets << std::endl; DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; // 1. Insert one row. Slice v16("v16"); PutKey(db.get(), write_options, 1, 6, v16); std::unique_ptr iter(db->NewIterator(read_options)); SeekIterator(iter.get(), 1, 6); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v16 == iter->value()); SeekIterator(iter.get(), 1, 5); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v16 == iter->value()); SeekIterator(iter.get(), 1, 5); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v16 == iter->value()); iter->Next(); ASSERT_TRUE(!iter->Valid()); SeekIterator(iter.get(), 2, 0); ASSERT_TRUE(!iter->Valid()); ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 5)); ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 7)); ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 0, 6)); ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 2, 6)); // 2. Insert an entry for the same prefix as the last entry in the bucket. Slice v17("v17"); PutKey(db.get(), write_options, 1, 7, v17); iter.reset(db->NewIterator(read_options)); SeekIterator(iter.get(), 1, 7); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); SeekIterator(iter.get(), 1, 6); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v16 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); iter->Next(); ASSERT_TRUE(!iter->Valid()); SeekIterator(iter.get(), 2, 0); ASSERT_TRUE(!iter->Valid()); // 3. Insert an entry for the same prefix as the head of the bucket. Slice v15("v15"); PutKey(db.get(), write_options, 1, 5, v15); iter.reset(db->NewIterator(read_options)); SeekIterator(iter.get(), 1, 7); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); SeekIterator(iter.get(), 1, 5); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v15 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v16 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); SeekIterator(iter.get(), 1, 5); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v15 == iter->value()); ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); // 4. Insert an entry with a larger prefix Slice v22("v22"); PutKey(db.get(), write_options, 2, 2, v22); iter.reset(db->NewIterator(read_options)); SeekIterator(iter.get(), 2, 2); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v22 == iter->value()); SeekIterator(iter.get(), 2, 0); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v22 == iter->value()); SeekIterator(iter.get(), 1, 5); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v15 == iter->value()); SeekIterator(iter.get(), 1, 7); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); // 5. Insert an entry with a smaller prefix Slice v02("v02"); PutKey(db.get(), write_options, 0, 2, v02); iter.reset(db->NewIterator(read_options)); SeekIterator(iter.get(), 0, 2); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v02 == iter->value()); SeekIterator(iter.get(), 0, 0); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v02 == iter->value()); SeekIterator(iter.get(), 2, 0); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v22 == iter->value()); SeekIterator(iter.get(), 1, 5); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v15 == iter->value()); SeekIterator(iter.get(), 1, 7); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); // 6. Insert to the beginning and the end of the first prefix Slice v13("v13"); Slice v18("v18"); PutKey(db.get(), write_options, 1, 3, v13); PutKey(db.get(), write_options, 1, 8, v18); iter.reset(db->NewIterator(read_options)); SeekIterator(iter.get(), 1, 7); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); SeekIterator(iter.get(), 1, 3); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v13 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v15 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v16 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v18 == iter->value()); SeekIterator(iter.get(), 0, 0); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v02 == iter->value()); SeekIterator(iter.get(), 2, 0); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v22 == iter->value()); ASSERT_EQ(v22.ToString(), Get(db.get(), read_options, 2, 2)); ASSERT_EQ(v02.ToString(), Get(db.get(), read_options, 0, 2)); ASSERT_EQ(v13.ToString(), Get(db.get(), read_options, 1, 3)); ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); ASSERT_EQ(v18.ToString(), Get(db.get(), read_options, 1, 8)); } } } // Show results in prefix TEST_F(PrefixTest, PrefixValid) { for (int num_buckets = 1; num_buckets <= 2; num_buckets++) { FirstOption(); while (NextOptions(num_buckets)) { std::cout << "*** Mem table: " << options.memtable_factory->Name() << " number of buckets: " << num_buckets << std::endl; DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; // Insert keys with common prefix and one key with different Slice v16("v16"); Slice v17("v17"); Slice v18("v18"); Slice v19("v19"); PutKey(db.get(), write_options, 12345, 6, v16); PutKey(db.get(), write_options, 12345, 7, v17); PutKey(db.get(), write_options, 12345, 8, v18); PutKey(db.get(), write_options, 12345, 9, v19); PutKey(db.get(), write_options, 12346, 8, v16); db->Flush(FlushOptions()); TestKey test_key(12346, 8); std::string s; db->Delete(write_options, TestKeyToSlice(s, test_key)); db->Flush(FlushOptions()); read_options.prefix_same_as_start = true; std::unique_ptr iter(db->NewIterator(read_options)); SeekIterator(iter.get(), 12345, 6); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v16 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v17 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v18 == iter->value()); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(v19 == iter->value()); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 12346, 8)); // Verify seeking past the prefix won't return a result. SeekIterator(iter.get(), 12345, 10); ASSERT_TRUE(!iter->Valid()); } } } TEST_F(PrefixTest, DynamicPrefixIterator) { while (NextOptions(FLAGS_bucket_count)) { std::cout << "*** Mem table: " << options.memtable_factory->Name() << std::endl; DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; std::vector prefixes; for (uint64_t i = 0; i < FLAGS_total_prefixes; ++i) { prefixes.push_back(i); } if (FLAGS_random_prefix) { std::random_shuffle(prefixes.begin(), prefixes.end()); } HistogramImpl hist_put_time; HistogramImpl hist_put_comparison; // insert x random prefix, each with y continuous element. for (auto prefix : prefixes) { for (uint64_t sorted = 0; sorted < FLAGS_items_per_prefix; sorted++) { TestKey test_key(prefix, sorted); std::string s; Slice key = TestKeyToSlice(s, test_key); std::string value(FLAGS_value_size, 0); get_perf_context()->Reset(); StopWatchNano timer(Env::Default(), true); ASSERT_OK(db->Put(write_options, key, value)); hist_put_time.Add(timer.ElapsedNanos()); hist_put_comparison.Add(get_perf_context()->user_key_comparison_count); } } std::cout << "Put key comparison: \n" << hist_put_comparison.ToString() << "Put time: \n" << hist_put_time.ToString(); // test seek existing keys HistogramImpl hist_seek_time; HistogramImpl hist_seek_comparison; std::unique_ptr iter(db->NewIterator(read_options)); for (auto prefix : prefixes) { TestKey test_key(prefix, FLAGS_items_per_prefix / 2); std::string s; Slice key = TestKeyToSlice(s, test_key); std::string value = "v" + ToString(0); get_perf_context()->Reset(); StopWatchNano timer(Env::Default(), true); auto key_prefix = options.prefix_extractor->Transform(key); uint64_t total_keys = 0; for (iter->Seek(key); iter->Valid() && iter->key().starts_with(key_prefix); iter->Next()) { if (FLAGS_trigger_deadlock) { std::cout << "Behold the deadlock!\n"; db->Delete(write_options, iter->key()); } total_keys++; } hist_seek_time.Add(timer.ElapsedNanos()); hist_seek_comparison.Add(get_perf_context()->user_key_comparison_count); ASSERT_EQ(total_keys, FLAGS_items_per_prefix - FLAGS_items_per_prefix/2); } std::cout << "Seek key comparison: \n" << hist_seek_comparison.ToString() << "Seek time: \n" << hist_seek_time.ToString(); // test non-existing keys HistogramImpl hist_no_seek_time; HistogramImpl hist_no_seek_comparison; for (auto prefix = FLAGS_total_prefixes; prefix < FLAGS_total_prefixes + 10000; prefix++) { TestKey test_key(prefix, 0); std::string s; Slice key = TestKeyToSlice(s, test_key); get_perf_context()->Reset(); StopWatchNano timer(Env::Default(), true); iter->Seek(key); hist_no_seek_time.Add(timer.ElapsedNanos()); hist_no_seek_comparison.Add(get_perf_context()->user_key_comparison_count); ASSERT_TRUE(!iter->Valid()); } std::cout << "non-existing Seek key comparison: \n" << hist_no_seek_comparison.ToString() << "non-existing Seek time: \n" << hist_no_seek_time.ToString(); } } TEST_F(PrefixTest, PrefixSeekModePrev) { // Only for SkipListFactory options.memtable_factory.reset(new SkipListFactory); options.merge_operator = MergeOperators::CreatePutOperator(); options.write_buffer_size = 1024 * 1024; Random rnd(1); for (size_t m = 1; m < 100; m++) { std::cout << "[" + std::to_string(m) + "]" + "*** Mem table: " << options.memtable_factory->Name() << std::endl; DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; std::map entry_maps[3], whole_map; for (uint64_t i = 0; i < 10; i++) { int div = i % 3 + 1; for (uint64_t j = 0; j < 10; j++) { whole_map[TestKey(i, j)] = entry_maps[rnd.Uniform(div)][TestKey(i, j)] = 'v' + std::to_string(i) + std::to_string(j); } } std::map type_map; for (size_t i = 0; i < 3; i++) { for (auto& kv : entry_maps[i]) { if (rnd.OneIn(3)) { PutKey(db.get(), write_options, kv.first, kv.second); type_map[kv.first] = "value"; } else { MergeKey(db.get(), write_options, kv.first, kv.second); type_map[kv.first] = "merge"; } } if (i < 2) { db->Flush(FlushOptions()); } } for (size_t i = 0; i < 2; i++) { for (auto& kv : entry_maps[i]) { if (rnd.OneIn(10)) { whole_map.erase(kv.first); DeleteKey(db.get(), write_options, kv.first); entry_maps[2][kv.first] = "delete"; } } } if (FLAGS_enable_print) { for (size_t i = 0; i < 3; i++) { for (auto& kv : entry_maps[i]) { std::cout << "[" << i << "]" << kv.first.prefix << kv.first.sorted << " " << kv.second + " " + type_map[kv.first] << std::endl; } } } std::unique_ptr iter(db->NewIterator(read_options)); for (uint64_t prefix = 0; prefix < 10; prefix++) { uint64_t start_suffix = rnd.Uniform(9); SeekIterator(iter.get(), prefix, start_suffix); auto it = whole_map.find(TestKey(prefix, start_suffix)); if (it == whole_map.end()) { continue; } ASSERT_NE(it, whole_map.end()); ASSERT_TRUE(iter->Valid()); if (FLAGS_enable_print) { std::cout << "round " << prefix << " iter: " << SliceToTestKey(iter->key()).prefix << SliceToTestKey(iter->key()).sorted << " | map: " << it->first.prefix << it->first.sorted << " | " << iter->value().ToString() << " " << it->second << std::endl; } ASSERT_EQ(iter->value(), it->second); uint64_t stored_prefix = prefix; for (size_t k = 0; k < 9; k++) { if (rnd.OneIn(2) || it == whole_map.begin()) { iter->Next(); ++it; if (FLAGS_enable_print) { std::cout << "Next >> "; } } else { iter->Prev(); it--; if (FLAGS_enable_print) { std::cout << "Prev >> "; } } if (!iter->Valid() || SliceToTestKey(iter->key()).prefix != stored_prefix) { break; } stored_prefix = SliceToTestKey(iter->key()).prefix; ASSERT_TRUE(iter->Valid()); ASSERT_NE(it, whole_map.end()); ASSERT_EQ(iter->value(), it->second); if (FLAGS_enable_print) { std::cout << "iter: " << SliceToTestKey(iter->key()).prefix << SliceToTestKey(iter->key()).sorted << " | map: " << it->first.prefix << it->first.sorted << " | " << iter->value().ToString() << " " << it->second << std::endl; } } } } } TEST_F(PrefixTest, PrefixSeekModePrev2) { // Only for SkipListFactory // test the case // iter1 iter2 // | prefix | suffix | | prefix | suffix | // | 1 | 1 | | 1 | 2 | // | 1 | 3 | | 1 | 4 | // | 2 | 1 | | 3 | 3 | // | 2 | 2 | | 3 | 4 | // after seek(15), iter1 will be at 21 and iter2 will be 33. // Then if call Prev() in prefix mode where SeekForPrev(21) gets called, // iter2 should turn to invalid state because of bloom filter. options.memtable_factory.reset(new SkipListFactory); options.write_buffer_size = 1024 * 1024; std::string v13("v13"); DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; PutKey(db.get(), write_options, TestKey(1, 2), "v12"); PutKey(db.get(), write_options, TestKey(1, 4), "v14"); PutKey(db.get(), write_options, TestKey(3, 3), "v33"); PutKey(db.get(), write_options, TestKey(3, 4), "v34"); db->Flush(FlushOptions()); reinterpret_cast(db.get())->TEST_WaitForFlushMemTable(); PutKey(db.get(), write_options, TestKey(1, 1), "v11"); PutKey(db.get(), write_options, TestKey(1, 3), "v13"); PutKey(db.get(), write_options, TestKey(2, 1), "v21"); PutKey(db.get(), write_options, TestKey(2, 2), "v22"); db->Flush(FlushOptions()); reinterpret_cast(db.get())->TEST_WaitForFlushMemTable(); std::unique_ptr iter(db->NewIterator(read_options)); SeekIterator(iter.get(), 1, 5); iter->Prev(); ASSERT_EQ(iter->value(), v13); } TEST_F(PrefixTest, PrefixSeekModePrev3) { // Only for SkipListFactory // test SeekToLast() with iterate_upper_bound_ in prefix_seek_mode options.memtable_factory.reset(new SkipListFactory); options.write_buffer_size = 1024 * 1024; std::string v14("v14"); TestKey upper_bound_key = TestKey(1, 5); std::string s; Slice upper_bound = TestKeyToSlice(s, upper_bound_key); { DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; read_options.iterate_upper_bound = &upper_bound; PutKey(db.get(), write_options, TestKey(1, 2), "v12"); PutKey(db.get(), write_options, TestKey(1, 4), "v14"); db->Flush(FlushOptions()); reinterpret_cast(db.get())->TEST_WaitForFlushMemTable(); PutKey(db.get(), write_options, TestKey(1, 1), "v11"); PutKey(db.get(), write_options, TestKey(1, 3), "v13"); PutKey(db.get(), write_options, TestKey(2, 1), "v21"); PutKey(db.get(), write_options, TestKey(2, 2), "v22"); db->Flush(FlushOptions()); reinterpret_cast(db.get())->TEST_WaitForFlushMemTable(); std::unique_ptr iter(db->NewIterator(read_options)); iter->SeekToLast(); ASSERT_EQ(iter->value(), v14); } { DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; ReadOptions read_options; read_options.iterate_upper_bound = &upper_bound; PutKey(db.get(), write_options, TestKey(1, 2), "v12"); PutKey(db.get(), write_options, TestKey(1, 4), "v14"); PutKey(db.get(), write_options, TestKey(3, 3), "v33"); PutKey(db.get(), write_options, TestKey(3, 4), "v34"); db->Flush(FlushOptions()); reinterpret_cast(db.get())->TEST_WaitForFlushMemTable(); PutKey(db.get(), write_options, TestKey(1, 1), "v11"); PutKey(db.get(), write_options, TestKey(1, 3), "v13"); db->Flush(FlushOptions()); reinterpret_cast(db.get())->TEST_WaitForFlushMemTable(); std::unique_ptr iter(db->NewIterator(read_options)); iter->SeekToLast(); ASSERT_EQ(iter->value(), v14); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); ParseCommandLineFlags(&argc, &argv, true); return RUN_ALL_TESTS(); } #endif // GFLAGS #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as HashSkipList and HashLinkList are not supported in " "ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/range_del_aggregator.cc000066400000000000000000000410011370372246700202570ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/range_del_aggregator.h" #include "db/compaction/compaction_iteration_stats.h" #include "db/dbformat.h" #include "db/pinned_iterators_manager.h" #include "db/range_del_aggregator.h" #include "db/range_tombstone_fragmenter.h" #include "db/version_edit.h" #include "rocksdb/comparator.h" #include "rocksdb/types.h" #include "table/internal_iterator.h" #include "table/scoped_arena_iterator.h" #include "table/table_builder.h" #include "util/heap.h" #include "util/kv_map.h" #include "util/vector_iterator.h" namespace ROCKSDB_NAMESPACE { TruncatedRangeDelIterator::TruncatedRangeDelIterator( std::unique_ptr iter, const InternalKeyComparator* icmp, const InternalKey* smallest, const InternalKey* largest) : iter_(std::move(iter)), icmp_(icmp), smallest_ikey_(smallest), largest_ikey_(largest) { if (smallest != nullptr) { pinned_bounds_.emplace_back(); auto& parsed_smallest = pinned_bounds_.back(); if (!ParseInternalKey(smallest->Encode(), &parsed_smallest)) { assert(false); } smallest_ = &parsed_smallest; } if (largest != nullptr) { pinned_bounds_.emplace_back(); auto& parsed_largest = pinned_bounds_.back(); if (!ParseInternalKey(largest->Encode(), &parsed_largest)) { assert(false); } if (parsed_largest.type == kTypeRangeDeletion && parsed_largest.sequence == kMaxSequenceNumber) { // The file boundary has been artificially extended by a range tombstone. // We do not need to adjust largest to properly truncate range // tombstones that extend past the boundary. } else if (parsed_largest.sequence == 0) { // The largest key in the sstable has a sequence number of 0. Since we // guarantee that no internal keys with the same user key and sequence // number can exist in a DB, we know that the largest key in this sstable // cannot exist as the smallest key in the next sstable. This further // implies that no range tombstone in this sstable covers largest; // otherwise, the file boundary would have been artificially extended. // // Therefore, we will never truncate a range tombstone at largest, so we // can leave it unchanged. } else { // The same user key may straddle two sstable boundaries. To ensure that // the truncated end key can cover the largest key in this sstable, reduce // its sequence number by 1. parsed_largest.sequence -= 1; } largest_ = &parsed_largest; } } bool TruncatedRangeDelIterator::Valid() const { return iter_->Valid() && (smallest_ == nullptr || icmp_->Compare(*smallest_, iter_->parsed_end_key()) < 0) && (largest_ == nullptr || icmp_->Compare(iter_->parsed_start_key(), *largest_) < 0); } void TruncatedRangeDelIterator::Next() { iter_->TopNext(); } void TruncatedRangeDelIterator::Prev() { iter_->TopPrev(); } void TruncatedRangeDelIterator::InternalNext() { iter_->Next(); } // NOTE: target is a user key void TruncatedRangeDelIterator::Seek(const Slice& target) { if (largest_ != nullptr && icmp_->Compare(*largest_, ParsedInternalKey(target, kMaxSequenceNumber, kTypeRangeDeletion)) <= 0) { iter_->Invalidate(); return; } if (smallest_ != nullptr && icmp_->user_comparator()->Compare(target, smallest_->user_key) < 0) { iter_->Seek(smallest_->user_key); return; } iter_->Seek(target); } // NOTE: target is a user key void TruncatedRangeDelIterator::SeekForPrev(const Slice& target) { if (smallest_ != nullptr && icmp_->Compare(ParsedInternalKey(target, 0, kTypeRangeDeletion), *smallest_) < 0) { iter_->Invalidate(); return; } if (largest_ != nullptr && icmp_->user_comparator()->Compare(largest_->user_key, target) < 0) { iter_->SeekForPrev(largest_->user_key); return; } iter_->SeekForPrev(target); } void TruncatedRangeDelIterator::SeekToFirst() { if (smallest_ != nullptr) { iter_->Seek(smallest_->user_key); return; } iter_->SeekToTopFirst(); } void TruncatedRangeDelIterator::SeekToLast() { if (largest_ != nullptr) { iter_->SeekForPrev(largest_->user_key); return; } iter_->SeekToTopLast(); } std::map> TruncatedRangeDelIterator::SplitBySnapshot( const std::vector& snapshots) { using FragmentedIterPair = std::pair>; auto split_untruncated_iters = iter_->SplitBySnapshot(snapshots); std::map> split_truncated_iters; std::for_each( split_untruncated_iters.begin(), split_untruncated_iters.end(), [&](FragmentedIterPair& iter_pair) { std::unique_ptr truncated_iter( new TruncatedRangeDelIterator(std::move(iter_pair.second), icmp_, smallest_ikey_, largest_ikey_)); split_truncated_iters.emplace(iter_pair.first, std::move(truncated_iter)); }); return split_truncated_iters; } ForwardRangeDelIterator::ForwardRangeDelIterator( const InternalKeyComparator* icmp) : icmp_(icmp), unused_idx_(0), active_seqnums_(SeqMaxComparator()), active_iters_(EndKeyMinComparator(icmp)), inactive_iters_(StartKeyMinComparator(icmp)) {} bool ForwardRangeDelIterator::ShouldDelete(const ParsedInternalKey& parsed) { // Move active iterators that end before parsed. while (!active_iters_.empty() && icmp_->Compare((*active_iters_.top())->end_key(), parsed) <= 0) { TruncatedRangeDelIterator* iter = PopActiveIter(); do { iter->Next(); } while (iter->Valid() && icmp_->Compare(iter->end_key(), parsed) <= 0); PushIter(iter, parsed); assert(active_iters_.size() == active_seqnums_.size()); } // Move inactive iterators that start before parsed. while (!inactive_iters_.empty() && icmp_->Compare(inactive_iters_.top()->start_key(), parsed) <= 0) { TruncatedRangeDelIterator* iter = PopInactiveIter(); while (iter->Valid() && icmp_->Compare(iter->end_key(), parsed) <= 0) { iter->Next(); } PushIter(iter, parsed); assert(active_iters_.size() == active_seqnums_.size()); } return active_seqnums_.empty() ? false : (*active_seqnums_.begin())->seq() > parsed.sequence; } void ForwardRangeDelIterator::Invalidate() { unused_idx_ = 0; active_iters_.clear(); active_seqnums_.clear(); inactive_iters_.clear(); } ReverseRangeDelIterator::ReverseRangeDelIterator( const InternalKeyComparator* icmp) : icmp_(icmp), unused_idx_(0), active_seqnums_(SeqMaxComparator()), active_iters_(StartKeyMaxComparator(icmp)), inactive_iters_(EndKeyMaxComparator(icmp)) {} bool ReverseRangeDelIterator::ShouldDelete(const ParsedInternalKey& parsed) { // Move active iterators that start after parsed. while (!active_iters_.empty() && icmp_->Compare(parsed, (*active_iters_.top())->start_key()) < 0) { TruncatedRangeDelIterator* iter = PopActiveIter(); do { iter->Prev(); } while (iter->Valid() && icmp_->Compare(parsed, iter->start_key()) < 0); PushIter(iter, parsed); assert(active_iters_.size() == active_seqnums_.size()); } // Move inactive iterators that end after parsed. while (!inactive_iters_.empty() && icmp_->Compare(parsed, inactive_iters_.top()->end_key()) < 0) { TruncatedRangeDelIterator* iter = PopInactiveIter(); while (iter->Valid() && icmp_->Compare(parsed, iter->start_key()) < 0) { iter->Prev(); } PushIter(iter, parsed); assert(active_iters_.size() == active_seqnums_.size()); } return active_seqnums_.empty() ? false : (*active_seqnums_.begin())->seq() > parsed.sequence; } void ReverseRangeDelIterator::Invalidate() { unused_idx_ = 0; active_iters_.clear(); active_seqnums_.clear(); inactive_iters_.clear(); } bool RangeDelAggregator::StripeRep::ShouldDelete( const ParsedInternalKey& parsed, RangeDelPositioningMode mode) { if (!InStripe(parsed.sequence) || IsEmpty()) { return false; } switch (mode) { case RangeDelPositioningMode::kForwardTraversal: InvalidateReverseIter(); // Pick up previously unseen iterators. for (auto it = std::next(iters_.begin(), forward_iter_.UnusedIdx()); it != iters_.end(); ++it, forward_iter_.IncUnusedIdx()) { auto& iter = *it; forward_iter_.AddNewIter(iter.get(), parsed); } return forward_iter_.ShouldDelete(parsed); case RangeDelPositioningMode::kBackwardTraversal: InvalidateForwardIter(); // Pick up previously unseen iterators. for (auto it = std::next(iters_.begin(), reverse_iter_.UnusedIdx()); it != iters_.end(); ++it, reverse_iter_.IncUnusedIdx()) { auto& iter = *it; reverse_iter_.AddNewIter(iter.get(), parsed); } return reverse_iter_.ShouldDelete(parsed); default: assert(false); return false; } } bool RangeDelAggregator::StripeRep::IsRangeOverlapped(const Slice& start, const Slice& end) { Invalidate(); // Set the internal start/end keys so that: // - if start_ikey has the same user key and sequence number as the // current end key, start_ikey will be considered greater; and // - if end_ikey has the same user key and sequence number as the current // start key, end_ikey will be considered greater. ParsedInternalKey start_ikey(start, kMaxSequenceNumber, static_cast(0)); ParsedInternalKey end_ikey(end, 0, static_cast(0)); for (auto& iter : iters_) { bool checked_candidate_tombstones = false; for (iter->SeekForPrev(start); iter->Valid() && icmp_->Compare(iter->start_key(), end_ikey) <= 0; iter->Next()) { checked_candidate_tombstones = true; if (icmp_->Compare(start_ikey, iter->end_key()) < 0 && icmp_->Compare(iter->start_key(), end_ikey) <= 0) { return true; } } if (!checked_candidate_tombstones) { // Do an additional check for when the end of the range is the begin // key of a tombstone, which we missed earlier since SeekForPrev'ing // to the start was invalid. iter->SeekForPrev(end); if (iter->Valid() && icmp_->Compare(start_ikey, iter->end_key()) < 0 && icmp_->Compare(iter->start_key(), end_ikey) <= 0) { return true; } } } return false; } void ReadRangeDelAggregator::AddTombstones( std::unique_ptr input_iter, const InternalKey* smallest, const InternalKey* largest) { if (input_iter == nullptr || input_iter->empty()) { return; } rep_.AddTombstones( std::unique_ptr(new TruncatedRangeDelIterator( std::move(input_iter), icmp_, smallest, largest))); } bool ReadRangeDelAggregator::ShouldDeleteImpl(const ParsedInternalKey& parsed, RangeDelPositioningMode mode) { return rep_.ShouldDelete(parsed, mode); } bool ReadRangeDelAggregator::IsRangeOverlapped(const Slice& start, const Slice& end) { InvalidateRangeDelMapPositions(); return rep_.IsRangeOverlapped(start, end); } void CompactionRangeDelAggregator::AddTombstones( std::unique_ptr input_iter, const InternalKey* smallest, const InternalKey* largest) { if (input_iter == nullptr || input_iter->empty()) { return; } assert(input_iter->lower_bound() == 0); assert(input_iter->upper_bound() == kMaxSequenceNumber); parent_iters_.emplace_back(new TruncatedRangeDelIterator( std::move(input_iter), icmp_, smallest, largest)); auto split_iters = parent_iters_.back()->SplitBySnapshot(*snapshots_); for (auto& split_iter : split_iters) { auto it = reps_.find(split_iter.first); if (it == reps_.end()) { bool inserted; SequenceNumber upper_bound = split_iter.second->upper_bound(); SequenceNumber lower_bound = split_iter.second->lower_bound(); std::tie(it, inserted) = reps_.emplace( split_iter.first, StripeRep(icmp_, upper_bound, lower_bound)); assert(inserted); } assert(it != reps_.end()); it->second.AddTombstones(std::move(split_iter.second)); } } bool CompactionRangeDelAggregator::ShouldDelete(const ParsedInternalKey& parsed, RangeDelPositioningMode mode) { auto it = reps_.lower_bound(parsed.sequence); if (it == reps_.end()) { return false; } return it->second.ShouldDelete(parsed, mode); } namespace { class TruncatedRangeDelMergingIter : public InternalIterator { public: TruncatedRangeDelMergingIter( const InternalKeyComparator* icmp, const Slice* lower_bound, const Slice* upper_bound, bool upper_bound_inclusive, const std::vector>& children) : icmp_(icmp), lower_bound_(lower_bound), upper_bound_(upper_bound), upper_bound_inclusive_(upper_bound_inclusive), heap_(StartKeyMinComparator(icmp)) { for (auto& child : children) { if (child != nullptr) { assert(child->lower_bound() == 0); assert(child->upper_bound() == kMaxSequenceNumber); children_.push_back(child.get()); } } } bool Valid() const override { return !heap_.empty() && BeforeEndKey(heap_.top()); } Status status() const override { return Status::OK(); } void SeekToFirst() override { heap_.clear(); for (auto& child : children_) { if (lower_bound_ != nullptr) { child->Seek(*lower_bound_); } else { child->SeekToFirst(); } if (child->Valid()) { heap_.push(child); } } } void Next() override { auto* top = heap_.top(); top->InternalNext(); if (top->Valid()) { heap_.replace_top(top); } else { heap_.pop(); } } Slice key() const override { auto* top = heap_.top(); cur_start_key_.Set(top->start_key().user_key, top->seq(), kTypeRangeDeletion); return cur_start_key_.Encode(); } Slice value() const override { auto* top = heap_.top(); assert(top->end_key().sequence == kMaxSequenceNumber); return top->end_key().user_key; } // Unused InternalIterator methods void Prev() override { assert(false); } void Seek(const Slice& /* target */) override { assert(false); } void SeekForPrev(const Slice& /* target */) override { assert(false); } void SeekToLast() override { assert(false); } private: bool BeforeEndKey(const TruncatedRangeDelIterator* iter) const { if (upper_bound_ == nullptr) { return true; } int cmp = icmp_->user_comparator()->Compare(iter->start_key().user_key, *upper_bound_); return upper_bound_inclusive_ ? cmp <= 0 : cmp < 0; } const InternalKeyComparator* icmp_; const Slice* lower_bound_; const Slice* upper_bound_; bool upper_bound_inclusive_; BinaryHeap heap_; std::vector children_; mutable InternalKey cur_start_key_; }; } // namespace std::unique_ptr CompactionRangeDelAggregator::NewIterator(const Slice* lower_bound, const Slice* upper_bound, bool upper_bound_inclusive) { InvalidateRangeDelMapPositions(); std::unique_ptr merging_iter( new TruncatedRangeDelMergingIter(icmp_, lower_bound, upper_bound, upper_bound_inclusive, parent_iters_)); auto fragmented_tombstone_list = std::make_shared( std::move(merging_iter), *icmp_, true /* for_compaction */, *snapshots_); return std::unique_ptr( new FragmentedRangeTombstoneIterator( fragmented_tombstone_list, *icmp_, kMaxSequenceNumber /* upper_bound */)); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/range_del_aggregator.h000066400000000000000000000323411370372246700201300ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include #include #include "db/compaction/compaction_iteration_stats.h" #include "db/dbformat.h" #include "db/pinned_iterators_manager.h" #include "db/range_del_aggregator.h" #include "db/range_tombstone_fragmenter.h" #include "db/version_edit.h" #include "rocksdb/comparator.h" #include "rocksdb/types.h" #include "table/internal_iterator.h" #include "table/scoped_arena_iterator.h" #include "table/table_builder.h" #include "util/heap.h" #include "util/kv_map.h" namespace ROCKSDB_NAMESPACE { class TruncatedRangeDelIterator { public: TruncatedRangeDelIterator( std::unique_ptr iter, const InternalKeyComparator* icmp, const InternalKey* smallest, const InternalKey* largest); bool Valid() const; void Next(); void Prev(); void InternalNext(); // Seeks to the tombstone with the highest viisble sequence number that covers // target (a user key). If no such tombstone exists, the position will be at // the earliest tombstone that ends after target. void Seek(const Slice& target); // Seeks to the tombstone with the highest viisble sequence number that covers // target (a user key). If no such tombstone exists, the position will be at // the latest tombstone that starts before target. void SeekForPrev(const Slice& target); void SeekToFirst(); void SeekToLast(); ParsedInternalKey start_key() const { return (smallest_ == nullptr || icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0) ? iter_->parsed_start_key() : *smallest_; } ParsedInternalKey end_key() const { return (largest_ == nullptr || icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0) ? iter_->parsed_end_key() : *largest_; } SequenceNumber seq() const { return iter_->seq(); } std::map> SplitBySnapshot(const std::vector& snapshots); SequenceNumber upper_bound() const { return iter_->upper_bound(); } SequenceNumber lower_bound() const { return iter_->lower_bound(); } private: std::unique_ptr iter_; const InternalKeyComparator* icmp_; const ParsedInternalKey* smallest_ = nullptr; const ParsedInternalKey* largest_ = nullptr; std::list pinned_bounds_; const InternalKey* smallest_ikey_; const InternalKey* largest_ikey_; }; struct SeqMaxComparator { bool operator()(const TruncatedRangeDelIterator* a, const TruncatedRangeDelIterator* b) const { return a->seq() > b->seq(); } }; struct StartKeyMinComparator { explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {} bool operator()(const TruncatedRangeDelIterator* a, const TruncatedRangeDelIterator* b) const { return icmp->Compare(a->start_key(), b->start_key()) > 0; } const InternalKeyComparator* icmp; }; class ForwardRangeDelIterator { public: explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp); bool ShouldDelete(const ParsedInternalKey& parsed); void Invalidate(); void AddNewIter(TruncatedRangeDelIterator* iter, const ParsedInternalKey& parsed) { iter->Seek(parsed.user_key); PushIter(iter, parsed); assert(active_iters_.size() == active_seqnums_.size()); } size_t UnusedIdx() const { return unused_idx_; } void IncUnusedIdx() { unused_idx_++; } private: using ActiveSeqSet = std::multiset; struct EndKeyMinComparator { explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {} bool operator()(const ActiveSeqSet::const_iterator& a, const ActiveSeqSet::const_iterator& b) const { return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0; } const InternalKeyComparator* icmp; }; void PushIter(TruncatedRangeDelIterator* iter, const ParsedInternalKey& parsed) { if (!iter->Valid()) { // The iterator has been fully consumed, so we don't need to add it to // either of the heaps. return; } int cmp = icmp_->Compare(parsed, iter->start_key()); if (cmp < 0) { PushInactiveIter(iter); } else { PushActiveIter(iter); } } void PushActiveIter(TruncatedRangeDelIterator* iter) { auto seq_pos = active_seqnums_.insert(iter); active_iters_.push(seq_pos); } TruncatedRangeDelIterator* PopActiveIter() { auto active_top = active_iters_.top(); auto iter = *active_top; active_iters_.pop(); active_seqnums_.erase(active_top); return iter; } void PushInactiveIter(TruncatedRangeDelIterator* iter) { inactive_iters_.push(iter); } TruncatedRangeDelIterator* PopInactiveIter() { auto* iter = inactive_iters_.top(); inactive_iters_.pop(); return iter; } const InternalKeyComparator* icmp_; size_t unused_idx_; ActiveSeqSet active_seqnums_; BinaryHeap active_iters_; BinaryHeap inactive_iters_; }; class ReverseRangeDelIterator { public: explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp); bool ShouldDelete(const ParsedInternalKey& parsed); void Invalidate(); void AddNewIter(TruncatedRangeDelIterator* iter, const ParsedInternalKey& parsed) { iter->SeekForPrev(parsed.user_key); PushIter(iter, parsed); assert(active_iters_.size() == active_seqnums_.size()); } size_t UnusedIdx() const { return unused_idx_; } void IncUnusedIdx() { unused_idx_++; } private: using ActiveSeqSet = std::multiset; struct EndKeyMaxComparator { explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {} bool operator()(const TruncatedRangeDelIterator* a, const TruncatedRangeDelIterator* b) const { return icmp->Compare(a->end_key(), b->end_key()) < 0; } const InternalKeyComparator* icmp; }; struct StartKeyMaxComparator { explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {} bool operator()(const ActiveSeqSet::const_iterator& a, const ActiveSeqSet::const_iterator& b) const { return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0; } const InternalKeyComparator* icmp; }; void PushIter(TruncatedRangeDelIterator* iter, const ParsedInternalKey& parsed) { if (!iter->Valid()) { // The iterator has been fully consumed, so we don't need to add it to // either of the heaps. } else if (icmp_->Compare(iter->end_key(), parsed) <= 0) { PushInactiveIter(iter); } else { PushActiveIter(iter); } } void PushActiveIter(TruncatedRangeDelIterator* iter) { auto seq_pos = active_seqnums_.insert(iter); active_iters_.push(seq_pos); } TruncatedRangeDelIterator* PopActiveIter() { auto active_top = active_iters_.top(); auto iter = *active_top; active_iters_.pop(); active_seqnums_.erase(active_top); return iter; } void PushInactiveIter(TruncatedRangeDelIterator* iter) { inactive_iters_.push(iter); } TruncatedRangeDelIterator* PopInactiveIter() { auto* iter = inactive_iters_.top(); inactive_iters_.pop(); return iter; } const InternalKeyComparator* icmp_; size_t unused_idx_; ActiveSeqSet active_seqnums_; BinaryHeap active_iters_; BinaryHeap inactive_iters_; }; enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal }; class RangeDelAggregator { public: explicit RangeDelAggregator(const InternalKeyComparator* icmp) : icmp_(icmp) {} virtual ~RangeDelAggregator() {} virtual void AddTombstones( std::unique_ptr input_iter, const InternalKey* smallest = nullptr, const InternalKey* largest = nullptr) = 0; bool ShouldDelete(const Slice& key, RangeDelPositioningMode mode) { ParsedInternalKey parsed; if (!ParseInternalKey(key, &parsed)) { return false; } return ShouldDelete(parsed, mode); } virtual bool ShouldDelete(const ParsedInternalKey& parsed, RangeDelPositioningMode mode) = 0; virtual void InvalidateRangeDelMapPositions() = 0; virtual bool IsEmpty() const = 0; bool AddFile(uint64_t file_number) { return files_seen_.insert(file_number).second; } protected: class StripeRep { public: StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound, SequenceNumber lower_bound) : icmp_(icmp), forward_iter_(icmp), reverse_iter_(icmp), upper_bound_(upper_bound), lower_bound_(lower_bound) {} void AddTombstones(std::unique_ptr input_iter) { iters_.push_back(std::move(input_iter)); } bool IsEmpty() const { return iters_.empty(); } bool ShouldDelete(const ParsedInternalKey& parsed, RangeDelPositioningMode mode); void Invalidate() { if (!IsEmpty()) { InvalidateForwardIter(); InvalidateReverseIter(); } } bool IsRangeOverlapped(const Slice& start, const Slice& end); private: bool InStripe(SequenceNumber seq) const { return lower_bound_ <= seq && seq <= upper_bound_; } void InvalidateForwardIter() { forward_iter_.Invalidate(); } void InvalidateReverseIter() { reverse_iter_.Invalidate(); } const InternalKeyComparator* icmp_; std::vector> iters_; ForwardRangeDelIterator forward_iter_; ReverseRangeDelIterator reverse_iter_; SequenceNumber upper_bound_; SequenceNumber lower_bound_; }; const InternalKeyComparator* icmp_; private: std::set files_seen_; }; class ReadRangeDelAggregator final : public RangeDelAggregator { public: ReadRangeDelAggregator(const InternalKeyComparator* icmp, SequenceNumber upper_bound) : RangeDelAggregator(icmp), rep_(icmp, upper_bound, 0 /* lower_bound */) {} ~ReadRangeDelAggregator() override {} using RangeDelAggregator::ShouldDelete; void AddTombstones( std::unique_ptr input_iter, const InternalKey* smallest = nullptr, const InternalKey* largest = nullptr) override; bool ShouldDelete(const ParsedInternalKey& parsed, RangeDelPositioningMode mode) final override { if (rep_.IsEmpty()) { return false; } return ShouldDeleteImpl(parsed, mode); } bool IsRangeOverlapped(const Slice& start, const Slice& end); void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); } bool IsEmpty() const override { return rep_.IsEmpty(); } private: StripeRep rep_; bool ShouldDeleteImpl(const ParsedInternalKey& parsed, RangeDelPositioningMode mode); }; class CompactionRangeDelAggregator : public RangeDelAggregator { public: CompactionRangeDelAggregator(const InternalKeyComparator* icmp, const std::vector& snapshots) : RangeDelAggregator(icmp), snapshots_(&snapshots) {} ~CompactionRangeDelAggregator() override {} void AddTombstones( std::unique_ptr input_iter, const InternalKey* smallest = nullptr, const InternalKey* largest = nullptr) override; using RangeDelAggregator::ShouldDelete; bool ShouldDelete(const ParsedInternalKey& parsed, RangeDelPositioningMode mode) override; bool IsRangeOverlapped(const Slice& start, const Slice& end); void InvalidateRangeDelMapPositions() override { for (auto& rep : reps_) { rep.second.Invalidate(); } } bool IsEmpty() const override { for (const auto& rep : reps_) { if (!rep.second.IsEmpty()) { return false; } } return true; } // Creates an iterator over all the range tombstones in the aggregator, for // use in compaction. Nullptr arguments indicate that the iterator range is // unbounded. // NOTE: the boundaries are used for optimization purposes to reduce the // number of tombstones that are passed to the fragmenter; they do not // guarantee that the resulting iterator only contains range tombstones that // cover keys in the provided range. If required, these bounds must be // enforced during iteration. std::unique_ptr NewIterator( const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr, bool upper_bound_inclusive = false); private: std::vector> parent_iters_; std::map reps_; const std::vector* snapshots_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/range_del_aggregator_bench.cc000066400000000000000000000212121370372246700214200ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef GFLAGS #include int main() { fprintf(stderr, "Please install gflags to run rocksdb tools\n"); return 1; } #else #include #include #include #include #include #include #include #include "db/range_del_aggregator.h" #include "db/range_tombstone_fragmenter.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/random.h" #include "util/stop_watch.h" #include "util/gflags_compat.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; DEFINE_int32(num_range_tombstones, 1000, "number of range tombstones created"); DEFINE_int32(num_runs, 1000, "number of test runs"); DEFINE_int32(tombstone_start_upper_bound, 1000, "exclusive upper bound on range tombstone start keys"); DEFINE_int32(should_delete_upper_bound, 1000, "exclusive upper bound on keys passed to ShouldDelete"); DEFINE_double(tombstone_width_mean, 100.0, "average range tombstone width"); DEFINE_double(tombstone_width_stddev, 0.0, "standard deviation of range tombstone width"); DEFINE_int32(seed, 0, "random number generator seed"); DEFINE_int32(should_deletes_per_run, 1, "number of ShouldDelete calls per run"); DEFINE_int32(add_tombstones_per_run, 1, "number of AddTombstones calls per run"); namespace { struct Stats { uint64_t time_add_tombstones = 0; uint64_t time_first_should_delete = 0; uint64_t time_rest_should_delete = 0; }; std::ostream& operator<<(std::ostream& os, const Stats& s) { std::ios fmt_holder(nullptr); fmt_holder.copyfmt(os); os << std::left; os << std::setw(25) << "AddTombstones: " << s.time_add_tombstones / (FLAGS_add_tombstones_per_run * FLAGS_num_runs * 1.0e3) << " us\n"; os << std::setw(25) << "ShouldDelete (first): " << s.time_first_should_delete / (FLAGS_num_runs * 1.0e3) << " us\n"; if (FLAGS_should_deletes_per_run > 1) { os << std::setw(25) << "ShouldDelete (rest): " << s.time_rest_should_delete / ((FLAGS_should_deletes_per_run - 1) * FLAGS_num_runs * 1.0e3) << " us\n"; } os.copyfmt(fmt_holder); return os; } auto icmp = ROCKSDB_NAMESPACE::InternalKeyComparator( ROCKSDB_NAMESPACE::BytewiseComparator()); } // anonymous namespace namespace ROCKSDB_NAMESPACE { namespace { // A wrapper around RangeTombstones and the underlying data of its start and end // keys. struct PersistentRangeTombstone { std::string start_key; std::string end_key; RangeTombstone tombstone; PersistentRangeTombstone(std::string start, std::string end, SequenceNumber seq) : start_key(std::move(start)), end_key(std::move(end)) { tombstone = RangeTombstone(start_key, end_key, seq); } PersistentRangeTombstone() = default; PersistentRangeTombstone(const PersistentRangeTombstone& t) { *this = t; } PersistentRangeTombstone& operator=(const PersistentRangeTombstone& t) { start_key = t.start_key; end_key = t.end_key; tombstone = RangeTombstone(start_key, end_key, t.tombstone.seq_); return *this; } PersistentRangeTombstone(PersistentRangeTombstone&& t) noexcept { *this = t; } PersistentRangeTombstone& operator=(PersistentRangeTombstone&& t) { start_key = std::move(t.start_key); end_key = std::move(t.end_key); tombstone = RangeTombstone(start_key, end_key, t.tombstone.seq_); return *this; } }; struct TombstoneStartKeyComparator { explicit TombstoneStartKeyComparator(const Comparator* c) : cmp(c) {} bool operator()(const RangeTombstone& a, const RangeTombstone& b) const { return cmp->Compare(a.start_key_, b.start_key_) < 0; } const Comparator* cmp; }; std::unique_ptr MakeRangeDelIterator( const std::vector& range_dels) { std::vector keys, values; for (const auto& range_del : range_dels) { auto key_and_value = range_del.tombstone.Serialize(); keys.push_back(key_and_value.first.Encode().ToString()); values.push_back(key_and_value.second.ToString()); } return std::unique_ptr( new test::VectorIterator(keys, values)); } // convert long to a big-endian slice key static std::string Key(int64_t val) { std::string little_endian_key; std::string big_endian_key; PutFixed64(&little_endian_key, val); assert(little_endian_key.size() == sizeof(val)); big_endian_key.resize(sizeof(val)); for (size_t i = 0; i < sizeof(val); ++i) { big_endian_key[i] = little_endian_key[sizeof(val) - 1 - i]; } return big_endian_key; } } // anonymous namespace } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ParseCommandLineFlags(&argc, &argv, true); Stats stats; ROCKSDB_NAMESPACE::Random64 rnd(FLAGS_seed); std::default_random_engine random_gen(FLAGS_seed); std::normal_distribution normal_dist(FLAGS_tombstone_width_mean, FLAGS_tombstone_width_stddev); std::vector > all_persistent_range_tombstones(FLAGS_add_tombstones_per_run); for (int i = 0; i < FLAGS_add_tombstones_per_run; i++) { all_persistent_range_tombstones[i] = std::vector( FLAGS_num_range_tombstones); } auto mode = ROCKSDB_NAMESPACE::RangeDelPositioningMode::kForwardTraversal; for (int i = 0; i < FLAGS_num_runs; i++) { ROCKSDB_NAMESPACE::ReadRangeDelAggregator range_del_agg( &icmp, ROCKSDB_NAMESPACE::kMaxSequenceNumber /* upper_bound */); std::vector< std::unique_ptr > fragmented_range_tombstone_lists(FLAGS_add_tombstones_per_run); for (auto& persistent_range_tombstones : all_persistent_range_tombstones) { // TODO(abhimadan): consider whether creating the range tombstones right // before AddTombstones is artificially warming the cache compared to // real workloads. for (int j = 0; j < FLAGS_num_range_tombstones; j++) { uint64_t start = rnd.Uniform(FLAGS_tombstone_start_upper_bound); uint64_t end = static_cast( std::round(start + std::max(1.0, normal_dist(random_gen)))); persistent_range_tombstones[j] = ROCKSDB_NAMESPACE::PersistentRangeTombstone( ROCKSDB_NAMESPACE::Key(start), ROCKSDB_NAMESPACE::Key(end), j); } auto range_del_iter = ROCKSDB_NAMESPACE::MakeRangeDelIterator(persistent_range_tombstones); fragmented_range_tombstone_lists.emplace_back( new ROCKSDB_NAMESPACE::FragmentedRangeTombstoneList( ROCKSDB_NAMESPACE::MakeRangeDelIterator( persistent_range_tombstones), icmp)); std::unique_ptr fragmented_range_del_iter( new ROCKSDB_NAMESPACE::FragmentedRangeTombstoneIterator( fragmented_range_tombstone_lists.back().get(), icmp, ROCKSDB_NAMESPACE::kMaxSequenceNumber)); ROCKSDB_NAMESPACE::StopWatchNano stop_watch_add_tombstones( ROCKSDB_NAMESPACE::Env::Default(), true /* auto_start */); range_del_agg.AddTombstones(std::move(fragmented_range_del_iter)); stats.time_add_tombstones += stop_watch_add_tombstones.ElapsedNanos(); } ROCKSDB_NAMESPACE::ParsedInternalKey parsed_key; parsed_key.sequence = FLAGS_num_range_tombstones / 2; parsed_key.type = ROCKSDB_NAMESPACE::kTypeValue; uint64_t first_key = rnd.Uniform(FLAGS_should_delete_upper_bound - FLAGS_should_deletes_per_run + 1); for (int j = 0; j < FLAGS_should_deletes_per_run; j++) { std::string key_string = ROCKSDB_NAMESPACE::Key(first_key + j); parsed_key.user_key = key_string; ROCKSDB_NAMESPACE::StopWatchNano stop_watch_should_delete( ROCKSDB_NAMESPACE::Env::Default(), true /* auto_start */); range_del_agg.ShouldDelete(parsed_key, mode); uint64_t call_time = stop_watch_should_delete.ElapsedNanos(); if (j == 0) { stats.time_first_should_delete += call_time; } else { stats.time_rest_should_delete += call_time; } } } std::cout << "=========================\n" << "Results:\n" << "=========================\n" << stats; return 0; } #endif // GFLAGS rocksdb-6.11.4/db/range_del_aggregator_test.cc000066400000000000000000000753671370372246700213440ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/range_del_aggregator.h" #include #include #include #include "db/db_test_util.h" #include "db/dbformat.h" #include "db/range_tombstone_fragmenter.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class RangeDelAggregatorTest : public testing::Test {}; namespace { static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator()); std::unique_ptr MakeRangeDelIter( const std::vector& range_dels) { std::vector keys, values; for (const auto& range_del : range_dels) { auto key_and_value = range_del.Serialize(); keys.push_back(key_and_value.first.Encode().ToString()); values.push_back(key_and_value.second.ToString()); } return std::unique_ptr( new test::VectorIterator(keys, values)); } std::vector> MakeFragmentedTombstoneLists( const std::vector>& range_dels_list) { std::vector> fragment_lists; for (const auto& range_dels : range_dels_list) { auto range_del_iter = MakeRangeDelIter(range_dels); fragment_lists.emplace_back(new FragmentedRangeTombstoneList( std::move(range_del_iter), bytewise_icmp)); } return fragment_lists; } struct TruncatedIterScanTestCase { ParsedInternalKey start; ParsedInternalKey end; SequenceNumber seq; }; struct TruncatedIterSeekTestCase { Slice target; ParsedInternalKey start; ParsedInternalKey end; SequenceNumber seq; bool invalid; }; struct ShouldDeleteTestCase { ParsedInternalKey lookup_key; bool result; }; struct IsRangeOverlappedTestCase { Slice start; Slice end; bool result; }; ParsedInternalKey UncutEndpoint(const Slice& s) { return ParsedInternalKey(s, kMaxSequenceNumber, kTypeRangeDeletion); } ParsedInternalKey InternalValue(const Slice& key, SequenceNumber seq) { return ParsedInternalKey(key, seq, kTypeValue); } void VerifyIterator( TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp, const std::vector& expected_range_dels) { // Test forward iteration. iter->SeekToFirst(); for (size_t i = 0; i < expected_range_dels.size(); i++, iter->Next()) { ASSERT_TRUE(iter->Valid()); EXPECT_EQ(0, icmp.Compare(iter->start_key(), expected_range_dels[i].start)); EXPECT_EQ(0, icmp.Compare(iter->end_key(), expected_range_dels[i].end)); EXPECT_EQ(expected_range_dels[i].seq, iter->seq()); } EXPECT_FALSE(iter->Valid()); // Test reverse iteration. iter->SeekToLast(); std::vector reverse_expected_range_dels( expected_range_dels.rbegin(), expected_range_dels.rend()); for (size_t i = 0; i < reverse_expected_range_dels.size(); i++, iter->Prev()) { ASSERT_TRUE(iter->Valid()); EXPECT_EQ(0, icmp.Compare(iter->start_key(), reverse_expected_range_dels[i].start)); EXPECT_EQ( 0, icmp.Compare(iter->end_key(), reverse_expected_range_dels[i].end)); EXPECT_EQ(reverse_expected_range_dels[i].seq, iter->seq()); } EXPECT_FALSE(iter->Valid()); } void VerifySeek(TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp, const std::vector& test_cases) { for (const auto& test_case : test_cases) { iter->Seek(test_case.target); if (test_case.invalid) { ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start)); EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end)); EXPECT_EQ(test_case.seq, iter->seq()); } } } void VerifySeekForPrev( TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp, const std::vector& test_cases) { for (const auto& test_case : test_cases) { iter->SeekForPrev(test_case.target); if (test_case.invalid) { ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start)); EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end)); EXPECT_EQ(test_case.seq, iter->seq()); } } } void VerifyShouldDelete(RangeDelAggregator* range_del_agg, const std::vector& test_cases) { for (const auto& test_case : test_cases) { EXPECT_EQ( test_case.result, range_del_agg->ShouldDelete( test_case.lookup_key, RangeDelPositioningMode::kForwardTraversal)); } for (auto it = test_cases.rbegin(); it != test_cases.rend(); ++it) { const auto& test_case = *it; EXPECT_EQ( test_case.result, range_del_agg->ShouldDelete( test_case.lookup_key, RangeDelPositioningMode::kBackwardTraversal)); } } void VerifyIsRangeOverlapped( ReadRangeDelAggregator* range_del_agg, const std::vector& test_cases) { for (const auto& test_case : test_cases) { EXPECT_EQ(test_case.result, range_del_agg->IsRangeOverlapped(test_case.start, test_case.end)); } } void CheckIterPosition(const RangeTombstone& tombstone, const FragmentedRangeTombstoneIterator* iter) { // Test InternalIterator interface. EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key())); EXPECT_EQ(tombstone.end_key_, iter->value()); EXPECT_EQ(tombstone.seq_, iter->seq()); // Test FragmentedRangeTombstoneIterator interface. EXPECT_EQ(tombstone.start_key_, iter->start_key()); EXPECT_EQ(tombstone.end_key_, iter->end_key()); EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key())); } void VerifyFragmentedRangeDels( FragmentedRangeTombstoneIterator* iter, const std::vector& expected_tombstones) { iter->SeekToFirst(); for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) { ASSERT_TRUE(iter->Valid()); CheckIterPosition(expected_tombstones[i], iter); } EXPECT_FALSE(iter->Valid()); } } // namespace TEST_F(RangeDelAggregatorTest, EmptyTruncatedIter) { auto range_del_iter = MakeRangeDelIter({}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, kMaxSequenceNumber)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); iter.SeekToFirst(); ASSERT_FALSE(iter.Valid()); iter.SeekToLast(); ASSERT_FALSE(iter.Valid()); } TEST_F(RangeDelAggregatorTest, UntruncatedIter) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, kMaxSequenceNumber)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); VerifyIterator(&iter, bytewise_icmp, {{UncutEndpoint("a"), UncutEndpoint("e"), 10}, {UncutEndpoint("e"), UncutEndpoint("g"), 8}, {UncutEndpoint("j"), UncutEndpoint("n"), 4}}); VerifySeek( &iter, bytewise_icmp, {{"d", UncutEndpoint("a"), UncutEndpoint("e"), 10}, {"e", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"ia", UncutEndpoint("j"), UncutEndpoint("n"), 4}, {"n", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}, {"", UncutEndpoint("a"), UncutEndpoint("e"), 10}}); VerifySeekForPrev( &iter, bytewise_icmp, {{"d", UncutEndpoint("a"), UncutEndpoint("e"), 10}, {"e", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"ia", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"n", UncutEndpoint("j"), UncutEndpoint("n"), 4}, {"", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}}); } TEST_F(RangeDelAggregatorTest, UntruncatedIterWithSnapshot) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, 9 /* snapshot */)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); VerifyIterator(&iter, bytewise_icmp, {{UncutEndpoint("e"), UncutEndpoint("g"), 8}, {UncutEndpoint("j"), UncutEndpoint("n"), 4}}); VerifySeek( &iter, bytewise_icmp, {{"d", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"e", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"ia", UncutEndpoint("j"), UncutEndpoint("n"), 4}, {"n", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}, {"", UncutEndpoint("e"), UncutEndpoint("g"), 8}}); VerifySeekForPrev( &iter, bytewise_icmp, {{"d", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}, {"e", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"ia", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"n", UncutEndpoint("j"), UncutEndpoint("n"), 4}, {"", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}}); } TEST_F(RangeDelAggregatorTest, TruncatedIterPartiallyCutTombstones) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, kMaxSequenceNumber)); InternalKey smallest("d", 7, kTypeValue); InternalKey largest("m", 9, kTypeValue); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, &smallest, &largest); VerifyIterator(&iter, bytewise_icmp, {{InternalValue("d", 7), UncutEndpoint("e"), 10}, {UncutEndpoint("e"), UncutEndpoint("g"), 8}, {UncutEndpoint("j"), InternalValue("m", 8), 4}}); VerifySeek( &iter, bytewise_icmp, {{"d", InternalValue("d", 7), UncutEndpoint("e"), 10}, {"e", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"ia", UncutEndpoint("j"), InternalValue("m", 8), 4}, {"n", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}, {"", InternalValue("d", 7), UncutEndpoint("e"), 10}}); VerifySeekForPrev( &iter, bytewise_icmp, {{"d", InternalValue("d", 7), UncutEndpoint("e"), 10}, {"e", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"ia", UncutEndpoint("e"), UncutEndpoint("g"), 8}, {"n", UncutEndpoint("j"), InternalValue("m", 8), 4}, {"", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}}); } TEST_F(RangeDelAggregatorTest, TruncatedIterFullyCutTombstones) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, kMaxSequenceNumber)); InternalKey smallest("f", 7, kTypeValue); InternalKey largest("i", 9, kTypeValue); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, &smallest, &largest); VerifyIterator(&iter, bytewise_icmp, {{InternalValue("f", 7), UncutEndpoint("g"), 8}}); VerifySeek( &iter, bytewise_icmp, {{"d", InternalValue("f", 7), UncutEndpoint("g"), 8}, {"f", InternalValue("f", 7), UncutEndpoint("g"), 8}, {"j", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}}); VerifySeekForPrev( &iter, bytewise_icmp, {{"d", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}, {"f", InternalValue("f", 7), UncutEndpoint("g"), 8}, {"j", InternalValue("f", 7), UncutEndpoint("g"), 8}}); } TEST_F(RangeDelAggregatorTest, SingleIterInAggregator) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, kMaxSequenceNumber)); ReadRangeDelAggregator range_del_agg(&bytewise_icmp, kMaxSequenceNumber); range_del_agg.AddTombstones(std::move(input_iter)); VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false}, {InternalValue("b", 9), true}, {InternalValue("d", 9), true}, {InternalValue("e", 7), true}, {InternalValue("g", 7), false}}); VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, {"_", "a", true}, {"a", "c", true}, {"d", "f", true}, {"g", "l", false}}); } TEST_F(RangeDelAggregatorTest, MultipleItersInAggregator) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "e", 10}, {"c", "g", 8}}, {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); ReadRangeDelAggregator range_del_agg(&bytewise_icmp, kMaxSequenceNumber); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), true}, {InternalValue("b", 19), false}, {InternalValue("b", 9), true}, {InternalValue("d", 9), true}, {InternalValue("e", 7), true}, {InternalValue("g", 7), false}, {InternalValue("h", 24), true}, {InternalValue("i", 24), false}, {InternalValue("ii", 14), true}, {InternalValue("j", 14), false}}); VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, {"_", "a", true}, {"a", "c", true}, {"d", "f", true}, {"g", "l", true}, {"x", "y", false}}); } TEST_F(RangeDelAggregatorTest, MultipleItersInAggregatorWithUpperBound) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "e", 10}, {"c", "g", 8}}, {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter)); } VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false}, {InternalValue("a", 9), true}, {InternalValue("b", 9), true}, {InternalValue("d", 9), true}, {InternalValue("e", 7), true}, {InternalValue("g", 7), false}, {InternalValue("h", 24), false}, {InternalValue("i", 24), false}, {InternalValue("ii", 14), true}, {InternalValue("j", 14), false}}); VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, {"_", "a", true}, {"a", "c", true}, {"d", "f", true}, {"g", "l", true}, {"x", "y", false}}); } TEST_F(RangeDelAggregatorTest, MultipleTruncatedItersInAggregator) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}}); std::vector> iter_bounds = { {InternalKey("a", 4, kTypeValue), InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)}, {InternalKey("m", 20, kTypeValue), InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)}, {InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}}; ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19); for (size_t i = 0; i < fragment_lists.size(); i++) { const auto& fragment_list = fragment_lists[i]; const auto& bounds = iter_bounds[i]; std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter), &bounds.first, &bounds.second); } VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false}, {InternalValue("a", 9), false}, {InternalValue("a", 4), true}, {InternalValue("m", 10), false}, {InternalValue("m", 9), true}, {InternalValue("x", 10), false}, {InternalValue("x", 9), false}, {InternalValue("x", 5), true}, {InternalValue("z", 9), false}}); VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, {"_", "a", true}, {"a", "n", true}, {"l", "x", true}, {"w", "z", true}, {"zzz", "zz", false}, {"zz", "zzz", false}}); } TEST_F(RangeDelAggregatorTest, MultipleTruncatedItersInAggregatorSameLevel) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}}); std::vector> iter_bounds = { {InternalKey("a", 4, kTypeValue), InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)}, {InternalKey("m", 20, kTypeValue), InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)}, {InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}}; ReadRangeDelAggregator range_del_agg(&bytewise_icmp, 19); auto add_iter_to_agg = [&](size_t i) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_lists[i].get(), bytewise_icmp, 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter), &iter_bounds[i].first, &iter_bounds[i].second); }; add_iter_to_agg(0); VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false}, {InternalValue("a", 9), false}, {InternalValue("a", 4), true}}); add_iter_to_agg(1); VerifyShouldDelete(&range_del_agg, {{InternalValue("m", 10), false}, {InternalValue("m", 9), true}}); add_iter_to_agg(2); VerifyShouldDelete(&range_del_agg, {{InternalValue("x", 10), false}, {InternalValue("x", 9), false}, {InternalValue("x", 5), true}, {InternalValue("z", 9), false}}); VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false}, {"_", "a", true}, {"a", "n", true}, {"l", "x", true}, {"w", "z", true}, {"zzz", "zz", false}, {"zz", "zzz", false}}); } TEST_F(RangeDelAggregatorTest, CompactionAggregatorNoSnapshots) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "e", 10}, {"c", "g", 8}}, {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); std::vector snapshots; CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), true}, {InternalValue("b", 19), false}, {InternalValue("b", 9), true}, {InternalValue("d", 9), true}, {InternalValue("e", 7), true}, {InternalValue("g", 7), false}, {InternalValue("h", 24), true}, {InternalValue("i", 24), false}, {InternalValue("ii", 14), true}, {InternalValue("j", 14), false}}); auto range_del_compaction_iter = range_del_agg.NewIterator(); VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 20}, {"b", "c", 10}, {"c", "e", 10}, {"e", "g", 8}, {"h", "i", 25}, {"ii", "j", 15}}); } TEST_F(RangeDelAggregatorTest, CompactionAggregatorWithSnapshots) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "e", 10}, {"c", "g", 8}}, {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); std::vector snapshots{9, 19}; CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } VerifyShouldDelete( &range_del_agg, { {InternalValue("a", 19), false}, // [10, 19] {InternalValue("a", 9), false}, // [0, 9] {InternalValue("b", 9), false}, // [0, 9] {InternalValue("d", 9), false}, // [0, 9] {InternalValue("d", 7), true}, // [0, 9] {InternalValue("e", 7), true}, // [0, 9] {InternalValue("g", 7), false}, // [0, 9] {InternalValue("h", 24), true}, // [20, kMaxSequenceNumber] {InternalValue("i", 24), false}, // [20, kMaxSequenceNumber] {InternalValue("ii", 14), true}, // [10, 19] {InternalValue("j", 14), false} // [10, 19] }); auto range_del_compaction_iter = range_del_agg.NewIterator(); VerifyFragmentedRangeDels(range_del_compaction_iter.get(), {{"a", "b", 20}, {"a", "b", 10}, {"b", "c", 10}, {"c", "e", 10}, {"c", "e", 8}, {"e", "g", 8}, {"h", "i", 25}, {"ii", "j", 15}}); } TEST_F(RangeDelAggregatorTest, CompactionAggregatorEmptyIteratorLeft) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "e", 10}, {"c", "g", 8}}, {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); std::vector snapshots{9, 19}; CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } Slice start("_"); Slice end("__"); } TEST_F(RangeDelAggregatorTest, CompactionAggregatorEmptyIteratorRight) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "e", 10}, {"c", "g", 8}}, {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); std::vector snapshots{9, 19}; CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } Slice start("p"); Slice end("q"); auto range_del_compaction_iter1 = range_del_agg.NewIterator(&start, &end, false /* end_key_inclusive */); VerifyFragmentedRangeDels(range_del_compaction_iter1.get(), {}); auto range_del_compaction_iter2 = range_del_agg.NewIterator(&start, &end, true /* end_key_inclusive */); VerifyFragmentedRangeDels(range_del_compaction_iter2.get(), {}); } TEST_F(RangeDelAggregatorTest, CompactionAggregatorBoundedIterator) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "e", 10}, {"c", "g", 8}}, {{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}}); std::vector snapshots{9, 19}; CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } Slice start("bb"); Slice end("e"); auto range_del_compaction_iter1 = range_del_agg.NewIterator(&start, &end, false /* end_key_inclusive */); VerifyFragmentedRangeDels(range_del_compaction_iter1.get(), {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}}); auto range_del_compaction_iter2 = range_del_agg.NewIterator(&start, &end, true /* end_key_inclusive */); VerifyFragmentedRangeDels( range_del_compaction_iter2.get(), {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}, {"e", "g", 8}}); } TEST_F(RangeDelAggregatorTest, CompactionAggregatorBoundedIteratorExtraFragments) { auto fragment_lists = MakeFragmentedTombstoneLists( {{{"a", "d", 10}, {"c", "g", 8}}, {{"b", "c", 20}, {"d", "f", 30}, {"h", "i", 25}, {"ii", "j", 15}}}); std::vector snapshots{9, 19}; CompactionRangeDelAggregator range_del_agg(&bytewise_icmp, snapshots); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } Slice start("bb"); Slice end("e"); auto range_del_compaction_iter1 = range_del_agg.NewIterator(&start, &end, false /* end_key_inclusive */); VerifyFragmentedRangeDels(range_del_compaction_iter1.get(), {{"a", "b", 10}, {"b", "c", 20}, {"b", "c", 10}, {"c", "d", 10}, {"c", "d", 8}, {"d", "f", 30}, {"d", "f", 8}, {"f", "g", 8}}); auto range_del_compaction_iter2 = range_del_agg.NewIterator(&start, &end, true /* end_key_inclusive */); VerifyFragmentedRangeDels(range_del_compaction_iter2.get(), {{"a", "b", 10}, {"b", "c", 20}, {"b", "c", 10}, {"c", "d", 10}, {"c", "d", 8}, {"d", "f", 30}, {"d", "f", 8}, {"f", "g", 8}}); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/range_tombstone_fragmenter.cc000066400000000000000000000377261370372246700215600ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/range_tombstone_fragmenter.h" #include #include #include #include #include #include "util/autovector.h" #include "util/kv_map.h" #include "util/vector_iterator.h" namespace ROCKSDB_NAMESPACE { FragmentedRangeTombstoneList::FragmentedRangeTombstoneList( std::unique_ptr unfragmented_tombstones, const InternalKeyComparator& icmp, bool for_compaction, const std::vector& snapshots) { if (unfragmented_tombstones == nullptr) { return; } bool is_sorted = true; int num_tombstones = 0; InternalKey pinned_last_start_key; Slice last_start_key; for (unfragmented_tombstones->SeekToFirst(); unfragmented_tombstones->Valid(); unfragmented_tombstones->Next(), num_tombstones++) { if (num_tombstones > 0 && icmp.Compare(last_start_key, unfragmented_tombstones->key()) > 0) { is_sorted = false; break; } if (unfragmented_tombstones->IsKeyPinned()) { last_start_key = unfragmented_tombstones->key(); } else { pinned_last_start_key.DecodeFrom(unfragmented_tombstones->key()); last_start_key = pinned_last_start_key.Encode(); } } if (is_sorted) { FragmentTombstones(std::move(unfragmented_tombstones), icmp, for_compaction, snapshots); return; } // Sort the tombstones before fragmenting them. std::vector keys, values; keys.reserve(num_tombstones); values.reserve(num_tombstones); for (unfragmented_tombstones->SeekToFirst(); unfragmented_tombstones->Valid(); unfragmented_tombstones->Next()) { keys.emplace_back(unfragmented_tombstones->key().data(), unfragmented_tombstones->key().size()); values.emplace_back(unfragmented_tombstones->value().data(), unfragmented_tombstones->value().size()); } // VectorIterator implicitly sorts by key during construction. auto iter = std::unique_ptr( new VectorIterator(std::move(keys), std::move(values), &icmp)); FragmentTombstones(std::move(iter), icmp, for_compaction, snapshots); } void FragmentedRangeTombstoneList::FragmentTombstones( std::unique_ptr unfragmented_tombstones, const InternalKeyComparator& icmp, bool for_compaction, const std::vector& snapshots) { Slice cur_start_key(nullptr, 0); auto cmp = ParsedInternalKeyComparator(&icmp); // Stores the end keys and sequence numbers of range tombstones with a start // key less than or equal to cur_start_key. Provides an ordering by end key // for use in flush_current_tombstones. std::set cur_end_keys(cmp); // Given the next start key in unfragmented_tombstones, // flush_current_tombstones writes every tombstone fragment that starts // and ends with a key before next_start_key, and starts with a key greater // than or equal to cur_start_key. auto flush_current_tombstones = [&](const Slice& next_start_key) { auto it = cur_end_keys.begin(); bool reached_next_start_key = false; for (; it != cur_end_keys.end() && !reached_next_start_key; ++it) { Slice cur_end_key = it->user_key; if (icmp.user_comparator()->Compare(cur_start_key, cur_end_key) == 0) { // Empty tombstone. continue; } if (icmp.user_comparator()->Compare(next_start_key, cur_end_key) <= 0) { // All of the end keys in [it, cur_end_keys.end()) are after // next_start_key, so the tombstones they represent can be used in // fragments that start with keys greater than or equal to // next_start_key. However, the end keys we already passed will not be // used in any more tombstone fragments. // // Remove the fully fragmented tombstones and stop iteration after a // final round of flushing to preserve the tombstones we can create more // fragments from. reached_next_start_key = true; cur_end_keys.erase(cur_end_keys.begin(), it); cur_end_key = next_start_key; } // Flush a range tombstone fragment [cur_start_key, cur_end_key), which // should not overlap with the last-flushed tombstone fragment. assert(tombstones_.empty() || icmp.user_comparator()->Compare(tombstones_.back().end_key, cur_start_key) <= 0); // Sort the sequence numbers of the tombstones being fragmented in // descending order, and then flush them in that order. autovector seqnums_to_flush; for (auto flush_it = it; flush_it != cur_end_keys.end(); ++flush_it) { seqnums_to_flush.push_back(flush_it->sequence); } std::sort(seqnums_to_flush.begin(), seqnums_to_flush.end(), std::greater()); size_t start_idx = tombstone_seqs_.size(); size_t end_idx = start_idx + seqnums_to_flush.size(); if (for_compaction) { // Drop all tombstone seqnums that are not preserved by a snapshot. SequenceNumber next_snapshot = kMaxSequenceNumber; for (auto seq : seqnums_to_flush) { if (seq <= next_snapshot) { // This seqnum is visible by a lower snapshot. tombstone_seqs_.push_back(seq); seq_set_.insert(seq); auto upper_bound_it = std::lower_bound(snapshots.begin(), snapshots.end(), seq); if (upper_bound_it == snapshots.begin()) { // This seqnum is the topmost one visible by the earliest // snapshot. None of the seqnums below it will be visible, so we // can skip them. break; } next_snapshot = *std::prev(upper_bound_it); } } end_idx = tombstone_seqs_.size(); } else { // The fragmentation is being done for reads, so preserve all seqnums. tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(), seqnums_to_flush.end()); seq_set_.insert(seqnums_to_flush.begin(), seqnums_to_flush.end()); } assert(start_idx < end_idx); tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx, end_idx); cur_start_key = cur_end_key; } if (!reached_next_start_key) { // There is a gap between the last flushed tombstone fragment and // the next tombstone's start key. Remove all the end keys in // the working set, since we have fully fragmented their corresponding // tombstones. cur_end_keys.clear(); } cur_start_key = next_start_key; }; pinned_iters_mgr_.StartPinning(); bool no_tombstones = true; for (unfragmented_tombstones->SeekToFirst(); unfragmented_tombstones->Valid(); unfragmented_tombstones->Next()) { const Slice& ikey = unfragmented_tombstones->key(); Slice tombstone_start_key = ExtractUserKey(ikey); SequenceNumber tombstone_seq = GetInternalKeySeqno(ikey); if (!unfragmented_tombstones->IsKeyPinned()) { pinned_slices_.emplace_back(tombstone_start_key.data(), tombstone_start_key.size()); tombstone_start_key = pinned_slices_.back(); } no_tombstones = false; Slice tombstone_end_key = unfragmented_tombstones->value(); if (!unfragmented_tombstones->IsValuePinned()) { pinned_slices_.emplace_back(tombstone_end_key.data(), tombstone_end_key.size()); tombstone_end_key = pinned_slices_.back(); } if (!cur_end_keys.empty() && icmp.user_comparator()->Compare( cur_start_key, tombstone_start_key) != 0) { // The start key has changed. Flush all tombstones that start before // this new start key. flush_current_tombstones(tombstone_start_key); } cur_start_key = tombstone_start_key; cur_end_keys.emplace(tombstone_end_key, tombstone_seq, kTypeRangeDeletion); } if (!cur_end_keys.empty()) { ParsedInternalKey last_end_key = *std::prev(cur_end_keys.end()); flush_current_tombstones(last_end_key.user_key); } if (!no_tombstones) { pinned_iters_mgr_.PinIterator(unfragmented_tombstones.release(), false /* arena */); } } bool FragmentedRangeTombstoneList::ContainsRange(SequenceNumber lower, SequenceNumber upper) const { auto seq_it = seq_set_.lower_bound(lower); return seq_it != seq_set_.end() && *seq_it <= upper; } FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( const FragmentedRangeTombstoneList* tombstones, const InternalKeyComparator& icmp, SequenceNumber _upper_bound, SequenceNumber _lower_bound) : tombstone_start_cmp_(icmp.user_comparator()), tombstone_end_cmp_(icmp.user_comparator()), icmp_(&icmp), ucmp_(icmp.user_comparator()), tombstones_(tombstones), upper_bound_(_upper_bound), lower_bound_(_lower_bound) { assert(tombstones_ != nullptr); Invalidate(); } FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, const InternalKeyComparator& icmp, SequenceNumber _upper_bound, SequenceNumber _lower_bound) : tombstone_start_cmp_(icmp.user_comparator()), tombstone_end_cmp_(icmp.user_comparator()), icmp_(&icmp), ucmp_(icmp.user_comparator()), tombstones_ref_(tombstones), tombstones_(tombstones_ref_.get()), upper_bound_(_upper_bound), lower_bound_(_lower_bound) { assert(tombstones_ != nullptr); Invalidate(); } void FragmentedRangeTombstoneIterator::SeekToFirst() { pos_ = tombstones_->begin(); seq_pos_ = tombstones_->seq_begin(); } void FragmentedRangeTombstoneIterator::SeekToTopFirst() { if (tombstones_->empty()) { Invalidate(); return; } pos_ = tombstones_->begin(); seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); ScanForwardToVisibleTombstone(); } void FragmentedRangeTombstoneIterator::SeekToLast() { pos_ = std::prev(tombstones_->end()); seq_pos_ = std::prev(tombstones_->seq_end()); } void FragmentedRangeTombstoneIterator::SeekToTopLast() { if (tombstones_->empty()) { Invalidate(); return; } pos_ = std::prev(tombstones_->end()); seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); ScanBackwardToVisibleTombstone(); } void FragmentedRangeTombstoneIterator::Seek(const Slice& target) { if (tombstones_->empty()) { Invalidate(); return; } SeekToCoveringTombstone(target); ScanForwardToVisibleTombstone(); } void FragmentedRangeTombstoneIterator::SeekForPrev(const Slice& target) { if (tombstones_->empty()) { Invalidate(); return; } SeekForPrevToCoveringTombstone(target); ScanBackwardToVisibleTombstone(); } void FragmentedRangeTombstoneIterator::SeekToCoveringTombstone( const Slice& target) { pos_ = std::upper_bound(tombstones_->begin(), tombstones_->end(), target, tombstone_end_cmp_); if (pos_ == tombstones_->end()) { // All tombstones end before target. seq_pos_ = tombstones_->seq_end(); return; } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); } void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone( const Slice& target) { if (tombstones_->empty()) { Invalidate(); return; } pos_ = std::upper_bound(tombstones_->begin(), tombstones_->end(), target, tombstone_start_cmp_); if (pos_ == tombstones_->begin()) { // All tombstones start after target. Invalidate(); return; } --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); } void FragmentedRangeTombstoneIterator::ScanForwardToVisibleTombstone() { while (pos_ != tombstones_->end() && (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) || *seq_pos_ < lower_bound_)) { ++pos_; if (pos_ == tombstones_->end()) { Invalidate(); return; } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); } } void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() { while (pos_ != tombstones_->end() && (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) || *seq_pos_ < lower_bound_)) { if (pos_ == tombstones_->begin()) { Invalidate(); return; } --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); } } void FragmentedRangeTombstoneIterator::Next() { ++seq_pos_; if (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { ++pos_; } } void FragmentedRangeTombstoneIterator::TopNext() { ++pos_; if (pos_ == tombstones_->end()) { return; } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); ScanForwardToVisibleTombstone(); } void FragmentedRangeTombstoneIterator::Prev() { if (seq_pos_ == tombstones_->seq_begin()) { Invalidate(); return; } --seq_pos_; if (pos_ == tombstones_->end() || seq_pos_ == tombstones_->seq_iter(pos_->seq_start_idx - 1)) { --pos_; } } void FragmentedRangeTombstoneIterator::TopPrev() { if (pos_ == tombstones_->begin()) { Invalidate(); return; } --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), upper_bound_, std::greater()); ScanBackwardToVisibleTombstone(); } bool FragmentedRangeTombstoneIterator::Valid() const { return tombstones_ != nullptr && pos_ != tombstones_->end(); } SequenceNumber FragmentedRangeTombstoneIterator::MaxCoveringTombstoneSeqnum( const Slice& target_user_key) { SeekToCoveringTombstone(target_user_key); return ValidPos() && ucmp_->Compare(start_key(), target_user_key) <= 0 ? seq() : 0; } std::map> FragmentedRangeTombstoneIterator::SplitBySnapshot( const std::vector& snapshots) { std::map> splits; SequenceNumber lower = 0; SequenceNumber upper; for (size_t i = 0; i <= snapshots.size(); i++) { if (i >= snapshots.size()) { upper = kMaxSequenceNumber; } else { upper = snapshots[i]; } if (tombstones_->ContainsRange(lower, upper)) { splits.emplace(upper, std::unique_ptr( new FragmentedRangeTombstoneIterator( tombstones_, *icmp_, upper, lower))); } lower = upper + 1; } return splits; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/range_tombstone_fragmenter.h000066400000000000000000000224621370372246700214110ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include "db/dbformat.h" #include "db/pinned_iterators_manager.h" #include "rocksdb/status.h" #include "table/internal_iterator.h" namespace ROCKSDB_NAMESPACE { struct FragmentedRangeTombstoneList { public: // A compact representation of a "stack" of range tombstone fragments, which // start and end at the same user keys but have different sequence numbers. // The members seq_start_idx and seq_end_idx are intended to be parameters to // seq_iter(). struct RangeTombstoneStack { RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx, size_t end_idx) : start_key(start), end_key(end), seq_start_idx(start_idx), seq_end_idx(end_idx) {} Slice start_key; Slice end_key; size_t seq_start_idx; size_t seq_end_idx; }; FragmentedRangeTombstoneList( std::unique_ptr unfragmented_tombstones, const InternalKeyComparator& icmp, bool for_compaction = false, const std::vector& snapshots = {}); std::vector::const_iterator begin() const { return tombstones_.begin(); } std::vector::const_iterator end() const { return tombstones_.end(); } std::vector::const_iterator seq_iter(size_t idx) const { return std::next(tombstone_seqs_.begin(), idx); } std::vector::const_iterator seq_begin() const { return tombstone_seqs_.begin(); } std::vector::const_iterator seq_end() const { return tombstone_seqs_.end(); } bool empty() const { return tombstones_.empty(); } // Returns true if the stored tombstones contain with one with a sequence // number in [lower, upper]. bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const; private: // Given an ordered range tombstone iterator unfragmented_tombstones, // "fragment" the tombstones into non-overlapping pieces, and store them in // tombstones_ and tombstone_seqs_. void FragmentTombstones( std::unique_ptr unfragmented_tombstones, const InternalKeyComparator& icmp, bool for_compaction, const std::vector& snapshots); std::vector tombstones_; std::vector tombstone_seqs_; std::set seq_set_; std::list pinned_slices_; PinnedIteratorsManager pinned_iters_mgr_; }; // FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del // meta block into an iterator over non-overlapping tombstone fragments. The // tombstone fragmentation process should be more efficient than the range // tombstone collapsing algorithm in RangeDelAggregator because this leverages // the internal key ordering already provided by the input iterator, if // applicable (when the iterator is unsorted, a new sorted iterator is created // before proceeding). If there are few overlaps, creating a // FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator // tombstone collapsing is always O(n log n). class FragmentedRangeTombstoneIterator : public InternalIterator { public: FragmentedRangeTombstoneIterator( const FragmentedRangeTombstoneList* tombstones, const InternalKeyComparator& icmp, SequenceNumber upper_bound, SequenceNumber lower_bound = 0); FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, const InternalKeyComparator& icmp, SequenceNumber upper_bound, SequenceNumber lower_bound = 0); void SeekToFirst() override; void SeekToLast() override; void SeekToTopFirst(); void SeekToTopLast(); // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator // seeking should behave. This is OK because they are not currently used, but // eventually FragmentedRangeTombstoneIterator should no longer implement // InternalIterator. // // Seeks to the range tombstone that covers target at a seqnum in the // snapshot. If no such tombstone exists, seek to the earliest tombstone in // the snapshot that ends after target. void Seek(const Slice& target) override; // Seeks to the range tombstone that covers target at a seqnum in the // snapshot. If no such tombstone exists, seek to the latest tombstone in the // snapshot that starts before target. void SeekForPrev(const Slice& target) override; void Next() override; void Prev() override; void TopNext(); void TopPrev(); bool Valid() const override; Slice key() const override { MaybePinKey(); return current_start_key_.Encode(); } Slice value() const override { return pos_->end_key; } bool IsKeyPinned() const override { return false; } bool IsValuePinned() const override { return true; } Status status() const override { return Status::OK(); } bool empty() const { return tombstones_->empty(); } void Invalidate() { pos_ = tombstones_->end(); seq_pos_ = tombstones_->seq_end(); pinned_pos_ = tombstones_->end(); pinned_seq_pos_ = tombstones_->seq_end(); } RangeTombstone Tombstone() const { return RangeTombstone(start_key(), end_key(), seq()); } Slice start_key() const { return pos_->start_key; } Slice end_key() const { return pos_->end_key; } SequenceNumber seq() const { return *seq_pos_; } ParsedInternalKey parsed_start_key() const { return ParsedInternalKey(pos_->start_key, kMaxSequenceNumber, kTypeRangeDeletion); } ParsedInternalKey parsed_end_key() const { return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber, kTypeRangeDeletion); } SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key); // Splits the iterator into n+1 iterators (where n is the number of // snapshots), each providing a view over a "stripe" of sequence numbers. The // iterators are keyed by the upper bound of their ranges (the provided // snapshots + kMaxSequenceNumber). // // NOTE: the iterators in the returned map are no longer valid if their // parent iterator is deleted, since they do not modify the refcount of the // underlying tombstone list. Therefore, this map should be deleted before // the parent iterator. std::map> SplitBySnapshot(const std::vector& snapshots); SequenceNumber upper_bound() const { return upper_bound_; } SequenceNumber lower_bound() const { return lower_bound_; } private: using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack; struct RangeTombstoneStackStartComparator { explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {} bool operator()(const RangeTombstoneStack& a, const RangeTombstoneStack& b) const { return cmp->Compare(a.start_key, b.start_key) < 0; } bool operator()(const RangeTombstoneStack& a, const Slice& b) const { return cmp->Compare(a.start_key, b) < 0; } bool operator()(const Slice& a, const RangeTombstoneStack& b) const { return cmp->Compare(a, b.start_key) < 0; } const Comparator* cmp; }; struct RangeTombstoneStackEndComparator { explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {} bool operator()(const RangeTombstoneStack& a, const RangeTombstoneStack& b) const { return cmp->Compare(a.end_key, b.end_key) < 0; } bool operator()(const RangeTombstoneStack& a, const Slice& b) const { return cmp->Compare(a.end_key, b) < 0; } bool operator()(const Slice& a, const RangeTombstoneStack& b) const { return cmp->Compare(a, b.end_key) < 0; } const Comparator* cmp; }; void MaybePinKey() const { if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() && (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) { current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion); pinned_pos_ = pos_; pinned_seq_pos_ = seq_pos_; } } void SeekToCoveringTombstone(const Slice& key); void SeekForPrevToCoveringTombstone(const Slice& key); void ScanForwardToVisibleTombstone(); void ScanBackwardToVisibleTombstone(); bool ValidPos() const { return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx); } const RangeTombstoneStackStartComparator tombstone_start_cmp_; const RangeTombstoneStackEndComparator tombstone_end_cmp_; const InternalKeyComparator* icmp_; const Comparator* ucmp_; std::shared_ptr tombstones_ref_; const FragmentedRangeTombstoneList* tombstones_; SequenceNumber upper_bound_; SequenceNumber lower_bound_; std::vector::const_iterator pos_; std::vector::const_iterator seq_pos_; mutable std::vector::const_iterator pinned_pos_; mutable std::vector::const_iterator pinned_seq_pos_; mutable InternalKey current_start_key_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/range_tombstone_fragmenter_test.cc000066400000000000000000000572231370372246700226110ustar00rootroot00000000000000// Copyright (c) 2018-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/range_tombstone_fragmenter.h" #include "db/db_test_util.h" #include "rocksdb/comparator.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class RangeTombstoneFragmenterTest : public testing::Test {}; namespace { static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator()); std::unique_ptr MakeRangeDelIter( const std::vector& range_dels) { std::vector keys, values; for (const auto& range_del : range_dels) { auto key_and_value = range_del.Serialize(); keys.push_back(key_and_value.first.Encode().ToString()); values.push_back(key_and_value.second.ToString()); } return std::unique_ptr( new test::VectorIterator(keys, values)); } void CheckIterPosition(const RangeTombstone& tombstone, const FragmentedRangeTombstoneIterator* iter) { // Test InternalIterator interface. EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key())); EXPECT_EQ(tombstone.end_key_, iter->value()); EXPECT_EQ(tombstone.seq_, iter->seq()); // Test FragmentedRangeTombstoneIterator interface. EXPECT_EQ(tombstone.start_key_, iter->start_key()); EXPECT_EQ(tombstone.end_key_, iter->end_key()); EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key())); } void VerifyFragmentedRangeDels( FragmentedRangeTombstoneIterator* iter, const std::vector& expected_tombstones) { iter->SeekToFirst(); for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) { ASSERT_TRUE(iter->Valid()); CheckIterPosition(expected_tombstones[i], iter); } EXPECT_FALSE(iter->Valid()); } void VerifyVisibleTombstones( FragmentedRangeTombstoneIterator* iter, const std::vector& expected_tombstones) { iter->SeekToTopFirst(); for (size_t i = 0; i < expected_tombstones.size(); i++, iter->TopNext()) { ASSERT_TRUE(iter->Valid()); CheckIterPosition(expected_tombstones[i], iter); } EXPECT_FALSE(iter->Valid()); } struct SeekTestCase { Slice seek_target; RangeTombstone expected_position; bool out_of_range; }; void VerifySeek(FragmentedRangeTombstoneIterator* iter, const std::vector& cases) { for (const auto& testcase : cases) { iter->Seek(testcase.seek_target); if (testcase.out_of_range) { ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); CheckIterPosition(testcase.expected_position, iter); } } } void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter, const std::vector& cases) { for (const auto& testcase : cases) { iter->SeekForPrev(testcase.seek_target); if (testcase.out_of_range) { ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); CheckIterPosition(testcase.expected_position, iter); } } } struct MaxCoveringTombstoneSeqnumTestCase { Slice user_key; SequenceNumber result; }; void VerifyMaxCoveringTombstoneSeqnum( FragmentedRangeTombstoneIterator* iter, const std::vector& cases) { for (const auto& testcase : cases) { EXPECT_EQ(testcase.result, iter->MaxCoveringTombstoneSeqnum(testcase.user_key)); } } } // anonymous namespace TEST_F(RangeTombstoneFragmenterTest, NonOverlappingTombstones) { auto range_del_iter = MakeRangeDelIter({{"a", "b", 10}, {"c", "d", 5}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber); ASSERT_EQ(0, iter.lower_bound()); ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}}); } TEST_F(RangeTombstoneFragmenterTest, OverlappingTombstones) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 15}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber); ASSERT_EQ(0, iter.lower_bound()); ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels( &iter, {{"a", "c", 10}, {"c", "e", 15}, {"c", "e", 10}, {"e", "g", 15}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"c", 15}, {"e", 15}, {"g", 0}}); } TEST_F(RangeTombstoneFragmenterTest, ContiguousTombstones) { auto range_del_iter = MakeRangeDelIter( {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber); ASSERT_EQ(0, iter.lower_bound()); ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels( &iter, {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"c", 20}, {"e", 15}, {"g", 0}}); } TEST_F(RangeTombstoneFragmenterTest, RepeatedStartAndEndKey) { auto range_del_iter = MakeRangeDelIter({{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber); ASSERT_EQ(0, iter.lower_bound()); ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}}); } TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"a", "g", 7}, {"a", "c", 3}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber); ASSERT_EQ(0, iter.lower_bound()); ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}, {"c", "e", 10}, {"c", "e", 7}, {"e", "g", 7}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"c", 10}, {"e", 7}, {"g", 0}}); } TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyMixedEndKeys) { auto range_del_iter = MakeRangeDelIter({{"a", "c", 30}, {"a", "g", 20}, {"a", "e", 10}, {"a", "g", 7}, {"a", "c", 3}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber); ASSERT_EQ(0, iter.lower_bound()); ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 30}, {"a", "c", 20}, {"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}, {"c", "e", 20}, {"c", "e", 10}, {"c", "e", 7}, {"e", "g", 20}, {"e", "g", 7}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 30}, {"c", 20}, {"e", 20}, {"g", 0}}); } TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, {"c", "i", 6}, {"j", "n", 4}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, kMaxSequenceNumber); FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, 9 /* upper_bound */); FragmentedRangeTombstoneIterator iter3(&fragment_list, bytewise_icmp, 7 /* upper_bound */); FragmentedRangeTombstoneIterator iter4(&fragment_list, bytewise_icmp, 5 /* upper_bound */); FragmentedRangeTombstoneIterator iter5(&fragment_list, bytewise_icmp, 3 /* upper_bound */); for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) { VerifyFragmentedRangeDels(iter, {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}, {"c", "e", 6}, {"e", "g", 8}, {"e", "g", 6}, {"g", "i", 6}, {"j", "l", 4}, {"j", "l", 2}, {"l", "n", 4}}); } ASSERT_EQ(0, iter1.lower_bound()); ASSERT_EQ(kMaxSequenceNumber, iter1.upper_bound()); VerifyVisibleTombstones(&iter1, {{"a", "c", 10}, {"c", "e", 10}, {"e", "g", 8}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( &iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); ASSERT_EQ(0, iter2.lower_bound()); ASSERT_EQ(9, iter2.upper_bound()); VerifyVisibleTombstones(&iter2, {{"c", "e", 8}, {"e", "g", 8}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( &iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); ASSERT_EQ(0, iter3.lower_bound()); ASSERT_EQ(7, iter3.upper_bound()); VerifyVisibleTombstones(&iter3, {{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( &iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}}); ASSERT_EQ(0, iter4.lower_bound()); ASSERT_EQ(5, iter4.upper_bound()); VerifyVisibleTombstones(&iter4, {{"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( &iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}}); ASSERT_EQ(0, iter5.lower_bound()); ASSERT_EQ(3, iter5.upper_bound()); VerifyVisibleTombstones(&iter5, {{"j", "l", 2}}); VerifyMaxCoveringTombstoneSeqnum( &iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}}); } TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"j", "n", 4}, {"c", "i", 6}, {"c", "g", 8}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, 9 /* upper_bound */); ASSERT_EQ(0, iter.lower_bound()); ASSERT_EQ(9, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}, {"c", "e", 6}, {"e", "g", 8}, {"e", "g", 6}, {"g", "i", 6}, {"j", "l", 4}, {"j", "l", 2}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( &iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); } TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyForCompaction) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"j", "n", 4}, {"c", "i", 6}, {"c", "g", 8}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, {} /* snapshots */); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber /* upper_bound */); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 10}, {"e", "g", 8}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); } TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyForCompactionWithSnapshot) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"j", "n", 4}, {"c", "i", 6}, {"c", "g", 8}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, {20, 9} /* upper_bounds */); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber /* upper_bound */); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}, {"e", "g", 8}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); } TEST_F(RangeTombstoneFragmenterTest, IteratorSplitNoSnapshots) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"j", "n", 4}, {"c", "i", 6}, {"c", "g", 8}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber /* upper_bound */); auto split_iters = iter.SplitBySnapshot({} /* snapshots */); ASSERT_EQ(1, split_iters.size()); auto* split_iter = split_iters[kMaxSequenceNumber].get(); ASSERT_EQ(0, split_iter->lower_bound()); ASSERT_EQ(kMaxSequenceNumber, split_iter->upper_bound()); VerifyVisibleTombstones(split_iter, {{"a", "c", 10}, {"c", "e", 10}, {"e", "g", 8}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); } TEST_F(RangeTombstoneFragmenterTest, IteratorSplitWithSnapshots) { auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"j", "n", 4}, {"c", "i", 6}, {"c", "g", 8}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber /* upper_bound */); auto split_iters = iter.SplitBySnapshot({3, 5, 7, 9} /* snapshots */); ASSERT_EQ(5, split_iters.size()); auto* split_iter1 = split_iters[3].get(); ASSERT_EQ(0, split_iter1->lower_bound()); ASSERT_EQ(3, split_iter1->upper_bound()); VerifyVisibleTombstones(split_iter1, {{"j", "l", 2}}); auto* split_iter2 = split_iters[5].get(); ASSERT_EQ(4, split_iter2->lower_bound()); ASSERT_EQ(5, split_iter2->upper_bound()); VerifyVisibleTombstones(split_iter2, {{"j", "l", 4}, {"l", "n", 4}}); auto* split_iter3 = split_iters[7].get(); ASSERT_EQ(6, split_iter3->lower_bound()); ASSERT_EQ(7, split_iter3->upper_bound()); VerifyVisibleTombstones(split_iter3, {{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}}); auto* split_iter4 = split_iters[9].get(); ASSERT_EQ(8, split_iter4->lower_bound()); ASSERT_EQ(9, split_iter4->upper_bound()); VerifyVisibleTombstones(split_iter4, {{"c", "e", 8}, {"e", "g", 8}}); auto* split_iter5 = split_iters[kMaxSequenceNumber].get(); ASSERT_EQ(10, split_iter5->lower_bound()); ASSERT_EQ(kMaxSequenceNumber, split_iter5->upper_bound()); VerifyVisibleTombstones(split_iter5, {{"a", "c", 10}, {"c", "e", 10}}); } TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, {"c", "i", 6}, {"j", "n", 4}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, kMaxSequenceNumber); VerifySeek( &iter1, {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); VerifySeekForPrev( &iter1, {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, 3 /* upper_bound */); VerifySeek(&iter2, {{"a", {"j", "l", 2}}, {"e", {"j", "l", 2}}, {"l", {}, true /* out of range */}}); VerifySeekForPrev(&iter2, {{"a", {}, true /* out of range */}, {"e", {}, true /* out of range */}, {"l", {"j", "l", 2}}}); } TEST_F(RangeTombstoneFragmenterTest, SeekCovered) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, {"c", "i", 6}, {"j", "n", 4}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, kMaxSequenceNumber); VerifySeek( &iter1, {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); VerifySeekForPrev( &iter1, {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, 3 /* upper_bound */); VerifySeek(&iter2, {{"b", {"j", "l", 2}}, {"f", {"j", "l", 2}}, {"m", {}, true /* out of range */}}); VerifySeekForPrev(&iter2, {{"b", {}, true /* out of range */}, {"f", {}, true /* out of range */}, {"m", {"j", "l", 2}}}); } TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, {"c", "i", 6}, {"j", "n", 4}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, kMaxSequenceNumber); VerifySeek(&iter1, {{"c", {"c", "e", 10}}, {"g", {"g", "i", 6}}, {"i", {"j", "l", 4}}, {"n", {}, true /* out of range */}}); VerifySeekForPrev(&iter1, {{"c", {"c", "e", 10}}, {"g", {"g", "i", 6}}, {"i", {"g", "i", 6}}, {"n", {"l", "n", 4}}}); FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, 3 /* upper_bound */); VerifySeek(&iter2, {{"c", {"j", "l", 2}}, {"g", {"j", "l", 2}}, {"i", {"j", "l", 2}}, {"n", {}, true /* out of range */}}); VerifySeekForPrev(&iter2, {{"c", {}, true /* out of range */}, {"g", {}, true /* out of range */}, {"i", {}, true /* out of range */}, {"n", {"j", "l", 2}}}); } TEST_F(RangeTombstoneFragmenterTest, SeekOutOfBounds) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, {"c", "i", 6}, {"j", "n", 4}, {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, kMaxSequenceNumber); VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}}); VerifySeekForPrev(&iter, {{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}}); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/read_callback.h000066400000000000000000000034341370372246700165360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { class ReadCallback { public: ReadCallback(SequenceNumber last_visible_seq) : max_visible_seq_(last_visible_seq) {} ReadCallback(SequenceNumber last_visible_seq, SequenceNumber min_uncommitted) : max_visible_seq_(last_visible_seq), min_uncommitted_(min_uncommitted) {} virtual ~ReadCallback() {} // Will be called to see if the seq number visible; if not it moves on to // the next seq number. virtual bool IsVisibleFullCheck(SequenceNumber seq) = 0; inline bool IsVisible(SequenceNumber seq) { assert(min_uncommitted_ > 0); assert(min_uncommitted_ >= kMinUnCommittedSeq); if (seq < min_uncommitted_) { // handles seq == 0 as well assert(seq <= max_visible_seq_); return true; } else if (max_visible_seq_ < seq) { assert(seq != 0); return false; } else { assert(seq != 0); // already handled in the first if-then clause return IsVisibleFullCheck(seq); } } inline SequenceNumber max_visible_seq() { return max_visible_seq_; } // Refresh to a more recent visible seq virtual void Refresh(SequenceNumber seq) { max_visible_seq_ = seq; } protected: // The max visible seq, it is usually the snapshot but could be larger if // transaction has its own writes written to db. SequenceNumber max_visible_seq_ = kMaxSequenceNumber; // Any seq less than min_uncommitted_ is committed. const SequenceNumber min_uncommitted_ = kMinUnCommittedSeq; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/repair.cc000066400000000000000000000614441370372246700154340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Repairer does best effort recovery to recover as much data as possible after // a disaster without compromising consistency. It does not guarantee bringing // the database to a time consistent state. // // Repair process is broken into 4 phases: // (a) Find files // (b) Convert logs to tables // (c) Extract metadata // (d) Write Descriptor // // (a) Find files // // The repairer goes through all the files in the directory, and classifies them // based on their file name. Any file that cannot be identified by name will be // ignored. // // (b) Convert logs to table // // Every log file that is active is replayed. All sections of the file where the // checksum does not match is skipped over. We intentionally give preference to // data consistency. // // (c) Extract metadata // // We scan every table to compute // (1) smallest/largest for the table // (2) largest sequence number in the table // (3) oldest blob file referred to by the table (if applicable) // // If we are unable to scan the file, then we ignore the table. // // (d) Write Descriptor // // We generate descriptor contents: // - log number is set to zero // - next-file-number is set to 1 + largest file number we found // - last-sequence-number is set to largest sequence# found across // all tables (see 2c) // - compaction pointers are cleared // - every table file is added at level 0 // // Possible optimization 1: // (a) Compute total size and use to pick appropriate max-level M // (b) Sort tables by largest sequence# in the table // (c) For each table: if it overlaps earlier table, place in level-0, // else place in level-M. // (d) We can provide options for time consistent recovery and unsafe recovery // (ignore checksum failure when applicable) // Possible optimization 2: // Store per-table metadata (smallest, largest, largest-seq#, ...) // in the table's meta section to speed up ScanTable. #ifndef ROCKSDB_LITE #include #include "db/builder.h" #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/log_reader.h" #include "db/log_writer.h" #include "db/memtable.h" #include "db/table_cache.h" #include "db/version_edit.h" #include "db/write_batch_internal.h" #include "env/composite_env_wrapper.h" #include "file/filename.h" #include "file/writable_file_writer.h" #include "options/cf_options.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/write_buffer_manager.h" #include "table/scoped_arena_iterator.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { class Repairer { public: Repairer(const std::string& dbname, const DBOptions& db_options, const std::vector& column_families, const ColumnFamilyOptions& default_cf_opts, const ColumnFamilyOptions& unknown_cf_opts, bool create_unknown_cfs) : dbname_(dbname), env_(db_options.env), env_options_(), db_options_(SanitizeOptions(dbname_, db_options)), immutable_db_options_(ImmutableDBOptions(db_options_)), icmp_(default_cf_opts.comparator), default_cf_opts_( SanitizeOptions(immutable_db_options_, default_cf_opts)), default_cf_iopts_( ImmutableCFOptions(immutable_db_options_, default_cf_opts_)), unknown_cf_opts_( SanitizeOptions(immutable_db_options_, unknown_cf_opts)), create_unknown_cfs_(create_unknown_cfs), raw_table_cache_( // TableCache can be small since we expect each table to be opened // once. NewLRUCache(10, db_options_.table_cache_numshardbits)), table_cache_(new TableCache(default_cf_iopts_, env_options_, raw_table_cache_.get(), /*block_cache_tracer=*/nullptr)), wb_(db_options_.db_write_buffer_size), wc_(db_options_.delayed_write_rate), vset_(dbname_, &immutable_db_options_, env_options_, raw_table_cache_.get(), &wb_, &wc_, /*block_cache_tracer=*/nullptr), next_file_number_(1), db_lock_(nullptr) { for (const auto& cfd : column_families) { cf_name_to_opts_[cfd.name] = cfd.options; } } const ColumnFamilyOptions* GetColumnFamilyOptions( const std::string& cf_name) { if (cf_name_to_opts_.find(cf_name) == cf_name_to_opts_.end()) { if (create_unknown_cfs_) { return &unknown_cf_opts_; } return nullptr; } return &cf_name_to_opts_[cf_name]; } // Adds a column family to the VersionSet with cf_options_ and updates // manifest. Status AddColumnFamily(const std::string& cf_name, uint32_t cf_id) { const auto* cf_opts = GetColumnFamilyOptions(cf_name); if (cf_opts == nullptr) { return Status::Corruption("Encountered unknown column family with name=" + cf_name + ", id=" + ToString(cf_id)); } Options opts(db_options_, *cf_opts); MutableCFOptions mut_cf_opts(opts); VersionEdit edit; edit.SetComparatorName(opts.comparator->Name()); edit.SetLogNumber(0); edit.SetColumnFamily(cf_id); ColumnFamilyData* cfd; cfd = nullptr; edit.AddColumnFamily(cf_name); mutex_.Lock(); Status status = vset_.LogAndApply(cfd, mut_cf_opts, &edit, &mutex_, nullptr /* db_directory */, false /* new_descriptor_log */, cf_opts); mutex_.Unlock(); return status; } ~Repairer() { if (db_lock_ != nullptr) { env_->UnlockFile(db_lock_); } delete table_cache_; } Status Run() { Status status = env_->LockFile(LockFileName(dbname_), &db_lock_); if (!status.ok()) { return status; } status = FindFiles(); if (status.ok()) { // Discard older manifests and start a fresh one for (size_t i = 0; i < manifests_.size(); i++) { ArchiveFile(dbname_ + "/" + manifests_[i]); } // Just create a DBImpl temporarily so we can reuse NewDB() DBImpl* db_impl = new DBImpl(db_options_, dbname_); status = db_impl->NewDB(); delete db_impl; } if (status.ok()) { // Recover using the fresh manifest created by NewDB() status = vset_.Recover({{kDefaultColumnFamilyName, default_cf_opts_}}, false); } if (status.ok()) { // Need to scan existing SST files first so the column families are // created before we process WAL files ExtractMetaData(); // ExtractMetaData() uses table_fds_ to know which SST files' metadata to // extract -- we need to clear it here since metadata for existing SST // files has been extracted already table_fds_.clear(); ConvertLogFilesToTables(); ExtractMetaData(); status = AddTables(); } if (status.ok()) { uint64_t bytes = 0; for (size_t i = 0; i < tables_.size(); i++) { bytes += tables_[i].meta.fd.GetFileSize(); } ROCKS_LOG_WARN(db_options_.info_log, "**** Repaired rocksdb %s; " "recovered %" ROCKSDB_PRIszt " files; %" PRIu64 " bytes. " "Some data may have been lost. " "****", dbname_.c_str(), tables_.size(), bytes); } return status; } private: struct TableInfo { FileMetaData meta; uint32_t column_family_id; std::string column_family_name; }; std::string const dbname_; Env* const env_; const EnvOptions env_options_; const DBOptions db_options_; const ImmutableDBOptions immutable_db_options_; const InternalKeyComparator icmp_; const ColumnFamilyOptions default_cf_opts_; const ImmutableCFOptions default_cf_iopts_; // table_cache_ holds reference const ColumnFamilyOptions unknown_cf_opts_; const bool create_unknown_cfs_; std::shared_ptr raw_table_cache_; TableCache* table_cache_; WriteBufferManager wb_; WriteController wc_; VersionSet vset_; std::unordered_map cf_name_to_opts_; InstrumentedMutex mutex_; std::vector manifests_; std::vector table_fds_; std::vector logs_; std::vector tables_; uint64_t next_file_number_; // Lock over the persistent DB state. Non-nullptr iff successfully // acquired. FileLock* db_lock_; Status FindFiles() { std::vector filenames; bool found_file = false; std::vector to_search_paths; for (size_t path_id = 0; path_id < db_options_.db_paths.size(); path_id++) { to_search_paths.push_back(db_options_.db_paths[path_id].path); } // search wal_dir if user uses a customize wal_dir bool same = false; Status status = env_->AreFilesSame(db_options_.wal_dir, dbname_, &same); if (status.IsNotSupported()) { same = db_options_.wal_dir == dbname_; status = Status::OK(); } else if (!status.ok()) { return status; } if (!same) { to_search_paths.push_back(db_options_.wal_dir); } for (size_t path_id = 0; path_id < to_search_paths.size(); path_id++) { status = env_->GetChildren(to_search_paths[path_id], &filenames); if (!status.ok()) { return status; } if (!filenames.empty()) { found_file = true; } uint64_t number; FileType type; for (size_t i = 0; i < filenames.size(); i++) { if (ParseFileName(filenames[i], &number, &type)) { if (type == kDescriptorFile) { manifests_.push_back(filenames[i]); } else { if (number + 1 > next_file_number_) { next_file_number_ = number + 1; } if (type == kLogFile) { logs_.push_back(number); } else if (type == kTableFile) { table_fds_.emplace_back(number, static_cast(path_id), 0); } else { // Ignore other files } } } } } if (!found_file) { return Status::Corruption(dbname_, "repair found no files"); } return Status::OK(); } void ConvertLogFilesToTables() { for (size_t i = 0; i < logs_.size(); i++) { // we should use LogFileName(wal_dir, logs_[i]) here. user might uses wal_dir option. std::string logname = LogFileName(db_options_.wal_dir, logs_[i]); Status status = ConvertLogToTable(logs_[i]); if (!status.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Log #%" PRIu64 ": ignoring conversion error: %s", logs_[i], status.ToString().c_str()); } ArchiveFile(logname); } } Status ConvertLogToTable(uint64_t log) { struct LogReporter : public log::Reader::Reporter { Env* env; std::shared_ptr info_log; uint64_t lognum; void Corruption(size_t bytes, const Status& s) override { // We print error messages for corruption, but continue repairing. ROCKS_LOG_ERROR(info_log, "Log #%" PRIu64 ": dropping %d bytes; %s", lognum, static_cast(bytes), s.ToString().c_str()); } }; // Open the log file std::string logname = LogFileName(db_options_.wal_dir, log); std::unique_ptr lfile; Status status = env_->NewSequentialFile( logname, &lfile, env_->OptimizeForLogRead(env_options_)); if (!status.ok()) { return status; } std::unique_ptr lfile_reader(new SequentialFileReader( NewLegacySequentialFileWrapper(lfile), logname)); // Create the log reader. LogReporter reporter; reporter.env = env_; reporter.info_log = db_options_.info_log; reporter.lognum = log; // We intentionally make log::Reader do checksumming so that // corruptions cause entire commits to be skipped instead of // propagating bad information (like overly large sequence // numbers). log::Reader reader(db_options_.info_log, std::move(lfile_reader), &reporter, true /*enable checksum*/, log); // Initialize per-column family memtables for (auto* cfd : *vset_.GetColumnFamilySet()) { cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber); } auto cf_mems = new ColumnFamilyMemTablesImpl(vset_.GetColumnFamilySet()); // Read all the records and add to a memtable std::string scratch; Slice record; WriteBatch batch; int counter = 0; while (reader.ReadRecord(&record, &scratch)) { if (record.size() < WriteBatchInternal::kHeader) { reporter.Corruption( record.size(), Status::Corruption("log record too small")); continue; } WriteBatchInternal::SetContents(&batch, record); status = WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr); if (status.ok()) { counter += WriteBatchInternal::Count(&batch); } else { ROCKS_LOG_WARN(db_options_.info_log, "Log #%" PRIu64 ": ignoring %s", log, status.ToString().c_str()); status = Status::OK(); // Keep going with rest of file } } // Dump a table for each column family with entries in this log file. for (auto* cfd : *vset_.GetColumnFamilySet()) { // Do not record a version edit for this conversion to a Table // since ExtractMetaData() will also generate edits. MemTable* mem = cfd->mem(); if (mem->IsEmpty()) { continue; } FileMetaData meta; meta.fd = FileDescriptor(next_file_number_++, 0, 0); ReadOptions ro; ro.total_order_seek = true; Arena arena; ScopedArenaIterator iter(mem->NewIterator(ro, &arena)); int64_t _current_time = 0; status = env_->GetCurrentTime(&_current_time); // ignore error const uint64_t current_time = static_cast(_current_time); SnapshotChecker* snapshot_checker = DisableGCSnapshotChecker::Instance(); auto write_hint = cfd->CalculateSSTWriteHint(0); std::vector> range_del_iters; auto range_del_iter = mem->NewRangeTombstoneIterator(ro, kMaxSequenceNumber); if (range_del_iter != nullptr) { range_del_iters.emplace_back(range_del_iter); } LegacyFileSystemWrapper fs(env_); IOStatus io_s; status = BuildTable( dbname_, env_, &fs, *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(), env_options_, table_cache_, iter.get(), std::move(range_del_iters), &meta, cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(), {}, kMaxSequenceNumber, snapshot_checker, kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false, nullptr /* internal_stats */, TableFileCreationReason::kRecovery, &io_s, nullptr /* event_logger */, 0 /* job_id */, Env::IO_HIGH, nullptr /* table_properties */, -1 /* level */, current_time, write_hint); ROCKS_LOG_INFO(db_options_.info_log, "Log #%" PRIu64 ": %d ops saved to Table #%" PRIu64 " %s", log, counter, meta.fd.GetNumber(), status.ToString().c_str()); if (status.ok()) { if (meta.fd.GetFileSize() > 0) { table_fds_.push_back(meta.fd); } } else { break; } } delete cf_mems; return status; } void ExtractMetaData() { for (size_t i = 0; i < table_fds_.size(); i++) { TableInfo t; t.meta.fd = table_fds_[i]; Status status = ScanTable(&t); if (!status.ok()) { std::string fname = TableFileName( db_options_.db_paths, t.meta.fd.GetNumber(), t.meta.fd.GetPathId()); char file_num_buf[kFormatFileNumberBufSize]; FormatFileNumber(t.meta.fd.GetNumber(), t.meta.fd.GetPathId(), file_num_buf, sizeof(file_num_buf)); ROCKS_LOG_WARN(db_options_.info_log, "Table #%s: ignoring %s", file_num_buf, status.ToString().c_str()); ArchiveFile(fname); } else { tables_.push_back(t); } } } Status ScanTable(TableInfo* t) { std::string fname = TableFileName( db_options_.db_paths, t->meta.fd.GetNumber(), t->meta.fd.GetPathId()); int counter = 0; uint64_t file_size; Status status = env_->GetFileSize(fname, &file_size); t->meta.fd = FileDescriptor(t->meta.fd.GetNumber(), t->meta.fd.GetPathId(), file_size); std::shared_ptr props; if (status.ok()) { status = table_cache_->GetTableProperties(env_options_, icmp_, t->meta.fd, &props); } if (status.ok()) { t->column_family_id = static_cast(props->column_family_id); if (t->column_family_id == TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) { ROCKS_LOG_WARN( db_options_.info_log, "Table #%" PRIu64 ": column family unknown (probably due to legacy format); " "adding to default column family id 0.", t->meta.fd.GetNumber()); t->column_family_id = 0; } if (vset_.GetColumnFamilySet()->GetColumnFamily(t->column_family_id) == nullptr) { status = AddColumnFamily(props->column_family_name, t->column_family_id); } t->meta.oldest_ancester_time = props->creation_time; } ColumnFamilyData* cfd = nullptr; if (status.ok()) { cfd = vset_.GetColumnFamilySet()->GetColumnFamily(t->column_family_id); if (cfd->GetName() != props->column_family_name) { ROCKS_LOG_ERROR( db_options_.info_log, "Table #%" PRIu64 ": inconsistent column family name '%s'; expected '%s' for column " "family id %" PRIu32 ".", t->meta.fd.GetNumber(), props->column_family_name.c_str(), cfd->GetName().c_str(), t->column_family_id); status = Status::Corruption(dbname_, "inconsistent column family name"); } } if (status.ok()) { ReadOptions ropts; ropts.total_order_seek = true; InternalIterator* iter = table_cache_->NewIterator( ropts, env_options_, cfd->internal_comparator(), t->meta, nullptr /* range_del_agg */, cfd->GetLatestMutableCFOptions()->prefix_extractor.get(), /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kRepair, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, /*max_file_size_for_l0_meta_pin=*/0, /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, /*allow_unprepared_value=*/false); ParsedInternalKey parsed; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { Slice key = iter->key(); if (!ParseInternalKey(key, &parsed)) { ROCKS_LOG_ERROR(db_options_.info_log, "Table #%" PRIu64 ": unparsable key %s", t->meta.fd.GetNumber(), EscapeString(key).c_str()); continue; } counter++; t->meta.UpdateBoundaries(key, iter->value(), parsed.sequence, parsed.type); } if (!iter->status().ok()) { status = iter->status(); } delete iter; ROCKS_LOG_INFO(db_options_.info_log, "Table #%" PRIu64 ": %d entries %s", t->meta.fd.GetNumber(), counter, status.ToString().c_str()); } return status; } Status AddTables() { std::unordered_map> cf_id_to_tables; SequenceNumber max_sequence = 0; for (size_t i = 0; i < tables_.size(); i++) { cf_id_to_tables[tables_[i].column_family_id].push_back(&tables_[i]); if (max_sequence < tables_[i].meta.fd.largest_seqno) { max_sequence = tables_[i].meta.fd.largest_seqno; } } vset_.SetLastAllocatedSequence(max_sequence); vset_.SetLastPublishedSequence(max_sequence); vset_.SetLastSequence(max_sequence); for (const auto& cf_id_and_tables : cf_id_to_tables) { auto* cfd = vset_.GetColumnFamilySet()->GetColumnFamily(cf_id_and_tables.first); VersionEdit edit; edit.SetComparatorName(cfd->user_comparator()->Name()); edit.SetLogNumber(0); edit.SetNextFile(next_file_number_); edit.SetColumnFamily(cfd->GetID()); // TODO(opt): separate out into multiple levels for (const auto* table : cf_id_and_tables.second) { edit.AddFile( 0, table->meta.fd.GetNumber(), table->meta.fd.GetPathId(), table->meta.fd.GetFileSize(), table->meta.smallest, table->meta.largest, table->meta.fd.smallest_seqno, table->meta.fd.largest_seqno, table->meta.marked_for_compaction, table->meta.oldest_blob_file_number, table->meta.oldest_ancester_time, table->meta.file_creation_time, table->meta.file_checksum, table->meta.file_checksum_func_name); } assert(next_file_number_ > 0); vset_.MarkFileNumberUsed(next_file_number_ - 1); mutex_.Lock(); Status status = vset_.LogAndApply( cfd, *cfd->GetLatestMutableCFOptions(), &edit, &mutex_, nullptr /* db_directory */, false /* new_descriptor_log */); mutex_.Unlock(); if (!status.ok()) { return status; } } return Status::OK(); } void ArchiveFile(const std::string& fname) { // Move into another directory. E.g., for // dir/foo // rename to // dir/lost/foo const char* slash = strrchr(fname.c_str(), '/'); std::string new_dir; if (slash != nullptr) { new_dir.assign(fname.data(), slash - fname.data()); } new_dir.append("/lost"); env_->CreateDir(new_dir); // Ignore error std::string new_file = new_dir; new_file.append("/"); new_file.append((slash == nullptr) ? fname.c_str() : slash + 1); Status s = env_->RenameFile(fname, new_file); ROCKS_LOG_INFO(db_options_.info_log, "Archiving %s: %s\n", fname.c_str(), s.ToString().c_str()); } }; Status GetDefaultCFOptions( const std::vector& column_families, ColumnFamilyOptions* res) { assert(res != nullptr); auto iter = std::find_if(column_families.begin(), column_families.end(), [](const ColumnFamilyDescriptor& cfd) { return cfd.name == kDefaultColumnFamilyName; }); if (iter == column_families.end()) { return Status::InvalidArgument( "column_families", "Must contain entry for default column family"); } *res = iter->options; return Status::OK(); } } // anonymous namespace Status RepairDB(const std::string& dbname, const DBOptions& db_options, const std::vector& column_families ) { ColumnFamilyOptions default_cf_opts; Status status = GetDefaultCFOptions(column_families, &default_cf_opts); if (status.ok()) { Repairer repairer(dbname, db_options, column_families, default_cf_opts, ColumnFamilyOptions() /* unknown_cf_opts */, false /* create_unknown_cfs */); status = repairer.Run(); } return status; } Status RepairDB(const std::string& dbname, const DBOptions& db_options, const std::vector& column_families, const ColumnFamilyOptions& unknown_cf_opts) { ColumnFamilyOptions default_cf_opts; Status status = GetDefaultCFOptions(column_families, &default_cf_opts); if (status.ok()) { Repairer repairer(dbname, db_options, column_families, default_cf_opts, unknown_cf_opts, true /* create_unknown_cfs */); status = repairer.Run(); } return status; } Status RepairDB(const std::string& dbname, const Options& options) { Options opts(options); DBOptions db_options(opts); ColumnFamilyOptions cf_options(opts); Repairer repairer(dbname, db_options, {}, cf_options /* default_cf_opts */, cf_options /* unknown_cf_opts */, true /* create_unknown_cfs */); return repairer.Run(); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/repair_test.cc000066400000000000000000000272711370372246700164730ustar00rootroot00000000000000// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include #include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "file/file_util.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/transaction_log.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE class RepairTest : public DBTestBase { public: RepairTest() : DBTestBase("/repair_test") {} std::string GetFirstSstPath() { uint64_t manifest_size; std::vector files; db_->GetLiveFiles(files, &manifest_size); auto sst_iter = std::find_if(files.begin(), files.end(), [](const std::string& file) { uint64_t number; FileType type; bool ok = ParseFileName(file, &number, &type); return ok && type == kTableFile; }); return sst_iter == files.end() ? "" : dbname_ + *sst_iter; } }; TEST_F(RepairTest, LostManifest) { // Add a couple SST files, delete the manifest, and verify RepairDB() saves // the day. Put("key", "val"); Flush(); Put("key2", "val2"); Flush(); // Need to get path before Close() deletes db_, but delete it after Close() to // ensure Close() didn't change the manifest. std::string manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); Close(); ASSERT_OK(env_->FileExists(manifest_path)); ASSERT_OK(env_->DeleteFile(manifest_path)); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); ASSERT_EQ(Get("key"), "val"); ASSERT_EQ(Get("key2"), "val2"); } TEST_F(RepairTest, CorruptManifest) { // Manifest is in an invalid format. Expect a full recovery. Put("key", "val"); Flush(); Put("key2", "val2"); Flush(); // Need to get path before Close() deletes db_, but overwrite it after Close() // to ensure Close() didn't change the manifest. std::string manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); Close(); ASSERT_OK(env_->FileExists(manifest_path)); LegacyFileSystemWrapper fs(env_); CreateFile(&fs, manifest_path, "blah", false /* use_fsync */); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); ASSERT_EQ(Get("key"), "val"); ASSERT_EQ(Get("key2"), "val2"); } TEST_F(RepairTest, IncompleteManifest) { // In this case, the manifest is valid but does not reference all of the SST // files. Expect a full recovery. Put("key", "val"); Flush(); std::string orig_manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); CopyFile(orig_manifest_path, orig_manifest_path + ".tmp"); Put("key2", "val2"); Flush(); // Need to get path before Close() deletes db_, but overwrite it after Close() // to ensure Close() didn't change the manifest. std::string new_manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); Close(); ASSERT_OK(env_->FileExists(new_manifest_path)); // Replace the manifest with one that is only aware of the first SST file. CopyFile(orig_manifest_path + ".tmp", new_manifest_path); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); ASSERT_EQ(Get("key"), "val"); ASSERT_EQ(Get("key2"), "val2"); } TEST_F(RepairTest, PostRepairSstFileNumbering) { // Verify after a DB is repaired, new files will be assigned higher numbers // than old files. Put("key", "val"); Flush(); Put("key2", "val2"); Flush(); uint64_t pre_repair_file_num = dbfull()->TEST_Current_Next_FileNo(); Close(); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); uint64_t post_repair_file_num = dbfull()->TEST_Current_Next_FileNo(); ASSERT_GE(post_repair_file_num, pre_repair_file_num); } TEST_F(RepairTest, LostSst) { // Delete one of the SST files but preserve the manifest that refers to it, // then verify the DB is still usable for the intact SST. Put("key", "val"); Flush(); Put("key2", "val2"); Flush(); auto sst_path = GetFirstSstPath(); ASSERT_FALSE(sst_path.empty()); ASSERT_OK(env_->DeleteFile(sst_path)); Close(); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); // Exactly one of the key-value pairs should be in the DB now. ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2")); } TEST_F(RepairTest, CorruptSst) { // Corrupt one of the SST files but preserve the manifest that refers to it, // then verify the DB is still usable for the intact SST. Put("key", "val"); Flush(); Put("key2", "val2"); Flush(); auto sst_path = GetFirstSstPath(); ASSERT_FALSE(sst_path.empty()); LegacyFileSystemWrapper fs(env_); CreateFile(&fs, sst_path, "blah", false /* use_fsync */); Close(); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); // Exactly one of the key-value pairs should be in the DB now. ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2")); } TEST_F(RepairTest, UnflushedSst) { // This test case invokes repair while some data is unflushed, then verifies // that data is in the db. Put("key", "val"); VectorLogPtr wal_files; ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 1); uint64_t total_ssts_size; GetAllSSTFiles(&total_ssts_size); ASSERT_EQ(total_ssts_size, 0); // Need to get path before Close() deletes db_, but delete it after Close() to // ensure Close() didn't change the manifest. std::string manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); Close(); ASSERT_OK(env_->FileExists(manifest_path)); ASSERT_OK(env_->DeleteFile(manifest_path)); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 0); GetAllSSTFiles(&total_ssts_size); ASSERT_GT(total_ssts_size, 0); ASSERT_EQ(Get("key"), "val"); } TEST_F(RepairTest, SeparateWalDir) { do { Options options = CurrentOptions(); DestroyAndReopen(options); Put("key", "val"); Put("foo", "bar"); VectorLogPtr wal_files; ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 1); uint64_t total_ssts_size; GetAllSSTFiles(&total_ssts_size); ASSERT_EQ(total_ssts_size, 0); std::string manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); Close(); ASSERT_OK(env_->FileExists(manifest_path)); ASSERT_OK(env_->DeleteFile(manifest_path)); ASSERT_OK(RepairDB(dbname_, options)); // make sure that all WALs are converted to SSTables. options.wal_dir = ""; Reopen(options); ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 0); GetAllSSTFiles(&total_ssts_size); ASSERT_GT(total_ssts_size, 0); ASSERT_EQ(Get("key"), "val"); ASSERT_EQ(Get("foo"), "bar"); } while(ChangeWalOptions()); } TEST_F(RepairTest, RepairMultipleColumnFamilies) { // Verify repair logic associates SST files with their original column // families. const int kNumCfs = 3; const int kEntriesPerCf = 2; DestroyAndReopen(CurrentOptions()); CreateAndReopenWithCF({"pikachu1", "pikachu2"}, CurrentOptions()); for (int i = 0; i < kNumCfs; ++i) { for (int j = 0; j < kEntriesPerCf; ++j) { Put(i, "key" + ToString(j), "val" + ToString(j)); if (j == kEntriesPerCf - 1 && i == kNumCfs - 1) { // Leave one unflushed so we can verify WAL entries are properly // associated with column families. continue; } Flush(i); } } // Need to get path before Close() deletes db_, but delete it after Close() to // ensure Close() doesn't re-create the manifest. std::string manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); Close(); ASSERT_OK(env_->FileExists(manifest_path)); ASSERT_OK(env_->DeleteFile(manifest_path)); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); ReopenWithColumnFamilies({"default", "pikachu1", "pikachu2"}, CurrentOptions()); for (int i = 0; i < kNumCfs; ++i) { for (int j = 0; j < kEntriesPerCf; ++j) { ASSERT_EQ(Get(i, "key" + ToString(j)), "val" + ToString(j)); } } } TEST_F(RepairTest, RepairColumnFamilyOptions) { // Verify repair logic uses correct ColumnFamilyOptions when repairing a // database with different options for column families. const int kNumCfs = 2; const int kEntriesPerCf = 2; Options opts(CurrentOptions()), rev_opts(CurrentOptions()); opts.comparator = BytewiseComparator(); rev_opts.comparator = ReverseBytewiseComparator(); DestroyAndReopen(opts); CreateColumnFamilies({"reverse"}, rev_opts); ReopenWithColumnFamilies({"default", "reverse"}, std::vector{opts, rev_opts}); for (int i = 0; i < kNumCfs; ++i) { for (int j = 0; j < kEntriesPerCf; ++j) { Put(i, "key" + ToString(j), "val" + ToString(j)); if (i == kNumCfs - 1 && j == kEntriesPerCf - 1) { // Leave one unflushed so we can verify RepairDB's flush logic continue; } Flush(i); } } Close(); // RepairDB() records the comparator in the manifest, and DB::Open would fail // if a different comparator were used. ASSERT_OK(RepairDB(dbname_, opts, {{"default", opts}, {"reverse", rev_opts}}, opts /* unknown_cf_opts */)); ASSERT_OK(TryReopenWithColumnFamilies({"default", "reverse"}, std::vector{opts, rev_opts})); for (int i = 0; i < kNumCfs; ++i) { for (int j = 0; j < kEntriesPerCf; ++j) { ASSERT_EQ(Get(i, "key" + ToString(j)), "val" + ToString(j)); } } // Examine table properties to verify RepairDB() used the right options when // converting WAL->SST TablePropertiesCollection fname_to_props; db_->GetPropertiesOfAllTables(handles_[1], &fname_to_props); ASSERT_EQ(fname_to_props.size(), 2U); for (const auto& fname_and_props : fname_to_props) { std::string comparator_name ( InternalKeyComparator(rev_opts.comparator).Name()); comparator_name = comparator_name.substr(comparator_name.find(':') + 1); ASSERT_EQ(comparator_name, fname_and_props.second->comparator_name); } Close(); // Also check comparator when it's provided via "unknown" CF options ASSERT_OK(RepairDB(dbname_, opts, {{"default", opts}}, rev_opts /* unknown_cf_opts */)); ASSERT_OK(TryReopenWithColumnFamilies({"default", "reverse"}, std::vector{opts, rev_opts})); for (int i = 0; i < kNumCfs; ++i) { for (int j = 0; j < kEntriesPerCf; ++j) { ASSERT_EQ(Get(i, "key" + ToString(j)), "val" + ToString(j)); } } } TEST_F(RepairTest, DbNameContainsTrailingSlash) { { bool tmp; if (env_->AreFilesSame("", "", &tmp).IsNotSupported()) { fprintf(stderr, "skipping RepairTest.DbNameContainsTrailingSlash due to " "unsupported Env::AreFilesSame\n"); return; } } Put("key", "val"); Flush(); Close(); ASSERT_OK(RepairDB(dbname_ + "/", CurrentOptions())); Reopen(CurrentOptions()); ASSERT_EQ(Get("key"), "val"); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as RepairDB is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/db/snapshot_checker.h000066400000000000000000000036571370372246700173410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { enum class SnapshotCheckerResult : int { kInSnapshot = 0, kNotInSnapshot = 1, // In case snapshot is released and the checker has no clue whether // the given sequence is visible to the snapshot. kSnapshotReleased = 2, }; // Callback class that control GC of duplicate keys in flush/compaction. class SnapshotChecker { public: virtual ~SnapshotChecker() {} virtual SnapshotCheckerResult CheckInSnapshot( SequenceNumber sequence, SequenceNumber snapshot_sequence) const = 0; }; class DisableGCSnapshotChecker : public SnapshotChecker { public: virtual ~DisableGCSnapshotChecker() {} virtual SnapshotCheckerResult CheckInSnapshot( SequenceNumber /*sequence*/, SequenceNumber /*snapshot_sequence*/) const override { // By returning kNotInSnapshot, we prevent all the values from being GCed return SnapshotCheckerResult::kNotInSnapshot; } static DisableGCSnapshotChecker* Instance() { return &instance_; } protected: static DisableGCSnapshotChecker instance_; explicit DisableGCSnapshotChecker() {} }; class WritePreparedTxnDB; // Callback class created by WritePreparedTxnDB to check if a key // is visible by a snapshot. class WritePreparedSnapshotChecker : public SnapshotChecker { public: explicit WritePreparedSnapshotChecker(WritePreparedTxnDB* txn_db); virtual ~WritePreparedSnapshotChecker() {} virtual SnapshotCheckerResult CheckInSnapshot( SequenceNumber sequence, SequenceNumber snapshot_sequence) const override; private: #ifndef ROCKSDB_LITE const WritePreparedTxnDB* const txn_db_; #endif // !ROCKSDB_LITE }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/snapshot_impl.cc000066400000000000000000000014231370372246700170210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/snapshot.h" #include "rocksdb/db.h" namespace ROCKSDB_NAMESPACE { ManagedSnapshot::ManagedSnapshot(DB* db) : db_(db), snapshot_(db->GetSnapshot()) {} ManagedSnapshot::ManagedSnapshot(DB* db, const Snapshot* _snapshot) : db_(db), snapshot_(_snapshot) {} ManagedSnapshot::~ManagedSnapshot() { if (snapshot_) { db_->ReleaseSnapshot(snapshot_); } } const Snapshot* ManagedSnapshot::snapshot() { return snapshot_;} } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/snapshot_impl.h000066400000000000000000000115021370372246700166620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/db.h" namespace ROCKSDB_NAMESPACE { class SnapshotList; // Snapshots are kept in a doubly-linked list in the DB. // Each SnapshotImpl corresponds to a particular sequence number. class SnapshotImpl : public Snapshot { public: SequenceNumber number_; // const after creation // It indicates the smallest uncommitted data at the time the snapshot was // taken. This is currently used by WritePrepared transactions to limit the // scope of queries to IsInSnpashot. SequenceNumber min_uncommitted_ = kMinUnCommittedSeq; virtual SequenceNumber GetSequenceNumber() const override { return number_; } private: friend class SnapshotList; // SnapshotImpl is kept in a doubly-linked circular list SnapshotImpl* prev_; SnapshotImpl* next_; SnapshotList* list_; // just for sanity checks int64_t unix_time_; // Will this snapshot be used by a Transaction to do write-conflict checking? bool is_write_conflict_boundary_; }; class SnapshotList { public: SnapshotList() { list_.prev_ = &list_; list_.next_ = &list_; list_.number_ = 0xFFFFFFFFL; // placeholder marker, for debugging // Set all the variables to make UBSAN happy. list_.list_ = nullptr; list_.unix_time_ = 0; list_.is_write_conflict_boundary_ = false; count_ = 0; } // No copy-construct. SnapshotList(const SnapshotList&) = delete; bool empty() const { return list_.next_ == &list_; } SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; } SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; } SnapshotImpl* New(SnapshotImpl* s, SequenceNumber seq, uint64_t unix_time, bool is_write_conflict_boundary) { s->number_ = seq; s->unix_time_ = unix_time; s->is_write_conflict_boundary_ = is_write_conflict_boundary; s->list_ = this; s->next_ = &list_; s->prev_ = list_.prev_; s->prev_->next_ = s; s->next_->prev_ = s; count_++; return s; } // Do not responsible to free the object. void Delete(const SnapshotImpl* s) { assert(s->list_ == this); s->prev_->next_ = s->next_; s->next_->prev_ = s->prev_; count_--; } // retrieve all snapshot numbers up until max_seq. They are sorted in // ascending order (with no duplicates). std::vector GetAll( SequenceNumber* oldest_write_conflict_snapshot = nullptr, const SequenceNumber& max_seq = kMaxSequenceNumber) const { std::vector ret; GetAll(&ret, oldest_write_conflict_snapshot, max_seq); return ret; } void GetAll(std::vector* snap_vector, SequenceNumber* oldest_write_conflict_snapshot = nullptr, const SequenceNumber& max_seq = kMaxSequenceNumber) const { std::vector& ret = *snap_vector; // So far we have no use case that would pass a non-empty vector assert(ret.size() == 0); if (oldest_write_conflict_snapshot != nullptr) { *oldest_write_conflict_snapshot = kMaxSequenceNumber; } if (empty()) { return; } const SnapshotImpl* s = &list_; while (s->next_ != &list_) { if (s->next_->number_ > max_seq) { break; } // Avoid duplicates if (ret.empty() || ret.back() != s->next_->number_) { ret.push_back(s->next_->number_); } if (oldest_write_conflict_snapshot != nullptr && *oldest_write_conflict_snapshot == kMaxSequenceNumber && s->next_->is_write_conflict_boundary_) { // If this is the first write-conflict boundary snapshot in the list, // it is the oldest *oldest_write_conflict_snapshot = s->next_->number_; } s = s->next_; } return; } // get the sequence number of the most recent snapshot SequenceNumber GetNewest() { if (empty()) { return 0; } return newest()->number_; } int64_t GetOldestSnapshotTime() const { if (empty()) { return 0; } else { return oldest()->unix_time_; } } int64_t GetOldestSnapshotSequence() const { if (empty()) { return 0; } else { return oldest()->GetSequenceNumber(); } } uint64_t count() const { return count_; } private: // Dummy head of doubly-linked list of snapshots SnapshotImpl list_; uint64_t count_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/table_cache.cc000066400000000000000000000632231370372246700163610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/table_cache.h" #include "db/dbformat.h" #include "db/range_tombstone_fragmenter.h" #include "db/snapshot_impl.h" #include "db/version_edit.h" #include "file/filename.h" #include "file/random_access_file_reader.h" #include "monitoring/perf_context_imp.h" #include "rocksdb/statistics.h" #include "table/block_based/block_based_table_reader.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/iterator_wrapper.h" #include "table/multiget_context.h" #include "table/table_builder.h" #include "table/table_reader.h" #include "test_util/sync_point.h" #include "util/cast_util.h" #include "util/coding.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { namespace { template static void DeleteEntry(const Slice& /*key*/, void* value) { T* typed_value = reinterpret_cast(value); delete typed_value; } static void UnrefEntry(void* arg1, void* arg2) { Cache* cache = reinterpret_cast(arg1); Cache::Handle* h = reinterpret_cast(arg2); cache->Release(h); } static Slice GetSliceForFileNumber(const uint64_t* file_number) { return Slice(reinterpret_cast(file_number), sizeof(*file_number)); } #ifndef ROCKSDB_LITE void AppendVarint64(IterKey* key, uint64_t v) { char buf[10]; auto ptr = EncodeVarint64(buf, v); key->TrimAppend(key->Size(), buf, ptr - buf); } #endif // ROCKSDB_LITE } // namespace const int kLoadConcurency = 128; TableCache::TableCache(const ImmutableCFOptions& ioptions, const FileOptions& file_options, Cache* const cache, BlockCacheTracer* const block_cache_tracer) : ioptions_(ioptions), file_options_(file_options), cache_(cache), immortal_tables_(false), block_cache_tracer_(block_cache_tracer), loader_mutex_(kLoadConcurency, GetSliceNPHash64) { if (ioptions_.row_cache) { // If the same cache is shared by multiple instances, we need to // disambiguate its entries. PutVarint64(&row_cache_id_, ioptions_.row_cache->NewId()); } } TableCache::~TableCache() { } TableReader* TableCache::GetTableReaderFromHandle(Cache::Handle* handle) { return reinterpret_cast(cache_->Value(handle)); } void TableCache::ReleaseHandle(Cache::Handle* handle) { cache_->Release(handle); } Status TableCache::GetTableReader( const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, std::unique_ptr* table_reader, const SliceTransform* prefix_extractor, bool skip_filters, int level, bool prefetch_index_and_filter_in_cache, size_t max_file_size_for_l0_meta_pin) { std::string fname = TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId()); std::unique_ptr file; Status s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file, nullptr); RecordTick(ioptions_.statistics, NO_FILE_OPENS); if (s.IsPathNotFound()) { fname = Rocks2LevelTableFileName(fname); s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file, nullptr); RecordTick(ioptions_.statistics, NO_FILE_OPENS); } if (s.ok()) { if (!sequential_mode && ioptions_.advise_random_on_open) { file->Hint(FSRandomAccessFile::kRandom); } StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS); std::unique_ptr file_reader( new RandomAccessFileReader( std::move(file), fname, ioptions_.env, record_read_stats ? ioptions_.statistics : nullptr, SST_READ_MICROS, file_read_hist, ioptions_.rate_limiter, ioptions_.listeners)); s = ioptions_.table_factory->NewTableReader( TableReaderOptions(ioptions_, prefix_extractor, file_options, internal_comparator, skip_filters, immortal_tables_, false /* force_direct_prefetch */, level, fd.largest_seqno, block_cache_tracer_, max_file_size_for_l0_meta_pin), std::move(file_reader), fd.GetFileSize(), table_reader, prefetch_index_and_filter_in_cache); TEST_SYNC_POINT("TableCache::GetTableReader:0"); } return s; } void TableCache::EraseHandle(const FileDescriptor& fd, Cache::Handle* handle) { ReleaseHandle(handle); uint64_t number = fd.GetNumber(); Slice key = GetSliceForFileNumber(&number); cache_->Erase(key); } Status TableCache::FindTable(const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, Cache::Handle** handle, const SliceTransform* prefix_extractor, const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist, bool skip_filters, int level, bool prefetch_index_and_filter_in_cache, size_t max_file_size_for_l0_meta_pin) { PERF_TIMER_GUARD_WITH_ENV(find_table_nanos, ioptions_.env); Status s; uint64_t number = fd.GetNumber(); Slice key = GetSliceForFileNumber(&number); *handle = cache_->Lookup(key); TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0", const_cast(&no_io)); if (*handle == nullptr) { if (no_io) { // Don't do IO and return a not-found status return Status::Incomplete("Table not found in table_cache, no_io is set"); } MutexLock load_lock(loader_mutex_.get(key)); // We check the cache again under loading mutex *handle = cache_->Lookup(key); if (*handle != nullptr) { return s; } std::unique_ptr table_reader; s = GetTableReader(file_options, internal_comparator, fd, false /* sequential mode */, record_read_stats, file_read_hist, &table_reader, prefix_extractor, skip_filters, level, prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin); if (!s.ok()) { assert(table_reader == nullptr); RecordTick(ioptions_.statistics, NO_FILE_ERRORS); // We do not cache error results so that if the error is transient, // or somebody repairs the file, we recover automatically. } else { s = cache_->Insert(key, table_reader.get(), 1, &DeleteEntry, handle); if (s.ok()) { // Release ownership of table reader. table_reader.release(); } } } return s; } InternalIterator* TableCache::NewIterator( const ReadOptions& options, const FileOptions& file_options, const InternalKeyComparator& icomparator, const FileMetaData& file_meta, RangeDelAggregator* range_del_agg, const SliceTransform* prefix_extractor, TableReader** table_reader_ptr, HistogramImpl* file_read_hist, TableReaderCaller caller, Arena* arena, bool skip_filters, int level, size_t max_file_size_for_l0_meta_pin, const InternalKey* smallest_compaction_key, const InternalKey* largest_compaction_key, bool allow_unprepared_value) { PERF_TIMER_GUARD(new_table_iterator_nanos); Status s; TableReader* table_reader = nullptr; Cache::Handle* handle = nullptr; if (table_reader_ptr != nullptr) { *table_reader_ptr = nullptr; } bool for_compaction = caller == TableReaderCaller::kCompaction; auto& fd = file_meta.fd; table_reader = fd.table_reader; if (table_reader == nullptr) { s = FindTable(file_options, icomparator, fd, &handle, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, !for_compaction /* record_read_stats */, file_read_hist, skip_filters, level, true /* prefetch_index_and_filter_in_cache */, max_file_size_for_l0_meta_pin); if (s.ok()) { table_reader = GetTableReaderFromHandle(handle); } } InternalIterator* result = nullptr; if (s.ok()) { if (options.table_filter && !options.table_filter(*table_reader->GetTableProperties())) { result = NewEmptyInternalIterator(arena); } else { result = table_reader->NewIterator(options, prefix_extractor, arena, skip_filters, caller, file_options.compaction_readahead_size, allow_unprepared_value); } if (handle != nullptr) { result->RegisterCleanup(&UnrefEntry, cache_, handle); handle = nullptr; // prevent from releasing below } if (for_compaction) { table_reader->SetupForCompaction(); } if (table_reader_ptr != nullptr) { *table_reader_ptr = table_reader; } } if (s.ok() && range_del_agg != nullptr && !options.ignore_range_deletions) { if (range_del_agg->AddFile(fd.GetNumber())) { std::unique_ptr range_del_iter( static_cast( table_reader->NewRangeTombstoneIterator(options))); if (range_del_iter != nullptr) { s = range_del_iter->status(); } if (s.ok()) { const InternalKey* smallest = &file_meta.smallest; const InternalKey* largest = &file_meta.largest; if (smallest_compaction_key != nullptr) { smallest = smallest_compaction_key; } if (largest_compaction_key != nullptr) { largest = largest_compaction_key; } range_del_agg->AddTombstones(std::move(range_del_iter), smallest, largest); } } } if (handle != nullptr) { ReleaseHandle(handle); } if (!s.ok()) { assert(result == nullptr); result = NewErrorInternalIterator(s, arena); } return result; } Status TableCache::GetRangeTombstoneIterator( const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, std::unique_ptr* out_iter) { const FileDescriptor& fd = file_meta.fd; Status s; TableReader* t = fd.table_reader; Cache::Handle* handle = nullptr; if (t == nullptr) { s = FindTable(file_options_, internal_comparator, fd, &handle); if (s.ok()) { t = GetTableReaderFromHandle(handle); } } if (s.ok()) { out_iter->reset(t->NewRangeTombstoneIterator(options)); assert(out_iter); } return s; } #ifndef ROCKSDB_LITE void TableCache::CreateRowCacheKeyPrefix(const ReadOptions& options, const FileDescriptor& fd, const Slice& internal_key, GetContext* get_context, IterKey& row_cache_key) { uint64_t fd_number = fd.GetNumber(); // We use the user key as cache key instead of the internal key, // otherwise the whole cache would be invalidated every time the // sequence key increases. However, to support caching snapshot // reads, we append the sequence number (incremented by 1 to // distinguish from 0) only in this case. // If the snapshot is larger than the largest seqno in the file, // all data should be exposed to the snapshot, so we treat it // the same as there is no snapshot. The exception is that if // a seq-checking callback is registered, some internal keys // may still be filtered out. uint64_t seq_no = 0; // Maybe we can include the whole file ifsnapshot == fd.largest_seqno. if (options.snapshot != nullptr && (get_context->has_callback() || static_cast_with_check(options.snapshot) ->GetSequenceNumber() <= fd.largest_seqno)) { // We should consider to use options.snapshot->GetSequenceNumber() // instead of GetInternalKeySeqno(k), which will make the code // easier to understand. seq_no = 1 + GetInternalKeySeqno(internal_key); } // Compute row cache key. row_cache_key.TrimAppend(row_cache_key.Size(), row_cache_id_.data(), row_cache_id_.size()); AppendVarint64(&row_cache_key, fd_number); AppendVarint64(&row_cache_key, seq_no); } bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, size_t prefix_size, GetContext* get_context) { bool found = false; row_cache_key.TrimAppend(prefix_size, user_key.data(), user_key.size()); if (auto row_handle = ioptions_.row_cache->Lookup(row_cache_key.GetUserKey())) { // Cleanable routine to release the cache entry Cleanable value_pinner; auto release_cache_entry_func = [](void* cache_to_clean, void* cache_handle) { ((Cache*)cache_to_clean)->Release((Cache::Handle*)cache_handle); }; auto found_row_cache_entry = static_cast(ioptions_.row_cache->Value(row_handle)); // If it comes here value is located on the cache. // found_row_cache_entry points to the value on cache, // and value_pinner has cleanup procedure for the cached entry. // After replayGetContextLog() returns, get_context.pinnable_slice_ // will point to cache entry buffer (or a copy based on that) and // cleanup routine under value_pinner will be delegated to // get_context.pinnable_slice_. Cache entry is released when // get_context.pinnable_slice_ is reset. value_pinner.RegisterCleanup(release_cache_entry_func, ioptions_.row_cache.get(), row_handle); replayGetContextLog(*found_row_cache_entry, user_key, get_context, &value_pinner); RecordTick(ioptions_.statistics, ROW_CACHE_HIT); found = true; } else { RecordTick(ioptions_.statistics, ROW_CACHE_MISS); } return found; } #endif // ROCKSDB_LITE Status TableCache::Get(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, const Slice& k, GetContext* get_context, const SliceTransform* prefix_extractor, HistogramImpl* file_read_hist, bool skip_filters, int level, size_t max_file_size_for_l0_meta_pin) { auto& fd = file_meta.fd; std::string* row_cache_entry = nullptr; bool done = false; #ifndef ROCKSDB_LITE IterKey row_cache_key; std::string row_cache_entry_buffer; // Check row cache if enabled. Since row cache does not currently store // sequence numbers, we cannot use it if we need to fetch the sequence. if (ioptions_.row_cache && !get_context->NeedToReadSequence()) { auto user_key = ExtractUserKey(k); CreateRowCacheKeyPrefix(options, fd, k, get_context, row_cache_key); done = GetFromRowCache(user_key, row_cache_key, row_cache_key.Size(), get_context); if (!done) { row_cache_entry = &row_cache_entry_buffer; } } #endif // ROCKSDB_LITE Status s; TableReader* t = fd.table_reader; Cache::Handle* handle = nullptr; if (!done && s.ok()) { if (t == nullptr) { s = FindTable(file_options_, internal_comparator, fd, &handle, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, true /* record_read_stats */, file_read_hist, skip_filters, level, true /* prefetch_index_and_filter_in_cache */, max_file_size_for_l0_meta_pin); if (s.ok()) { t = GetTableReaderFromHandle(handle); } } SequenceNumber* max_covering_tombstone_seq = get_context->max_covering_tombstone_seq(); if (s.ok() && max_covering_tombstone_seq != nullptr && !options.ignore_range_deletions) { std::unique_ptr range_del_iter( t->NewRangeTombstoneIterator(options)); if (range_del_iter != nullptr) { *max_covering_tombstone_seq = std::max( *max_covering_tombstone_seq, range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k))); } } if (s.ok()) { get_context->SetReplayLog(row_cache_entry); // nullptr if no cache. s = t->Get(options, k, get_context, prefix_extractor, skip_filters); get_context->SetReplayLog(nullptr); } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) { // Couldn't find Table in cache but treat as kFound if no_io set get_context->MarkKeyMayExist(); s = Status::OK(); done = true; } } #ifndef ROCKSDB_LITE // Put the replay log in row cache only if something was found. if (!done && s.ok() && row_cache_entry && !row_cache_entry->empty()) { size_t charge = row_cache_key.Size() + row_cache_entry->size() + sizeof(std::string); void* row_ptr = new std::string(std::move(*row_cache_entry)); ioptions_.row_cache->Insert(row_cache_key.GetUserKey(), row_ptr, charge, &DeleteEntry); } #endif // ROCKSDB_LITE if (handle != nullptr) { ReleaseHandle(handle); } return s; } // Batched version of TableCache::MultiGet. Status TableCache::MultiGet(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, const MultiGetContext::Range* mget_range, const SliceTransform* prefix_extractor, HistogramImpl* file_read_hist, bool skip_filters, int level) { auto& fd = file_meta.fd; Status s; TableReader* t = fd.table_reader; Cache::Handle* handle = nullptr; MultiGetRange table_range(*mget_range, mget_range->begin(), mget_range->end()); #ifndef ROCKSDB_LITE autovector row_cache_entries; IterKey row_cache_key; size_t row_cache_key_prefix_size = 0; KeyContext& first_key = *table_range.begin(); bool lookup_row_cache = ioptions_.row_cache && !first_key.get_context->NeedToReadSequence(); // Check row cache if enabled. Since row cache does not currently store // sequence numbers, we cannot use it if we need to fetch the sequence. if (lookup_row_cache) { GetContext* first_context = first_key.get_context; CreateRowCacheKeyPrefix(options, fd, first_key.ikey, first_context, row_cache_key); row_cache_key_prefix_size = row_cache_key.Size(); for (auto miter = table_range.begin(); miter != table_range.end(); ++miter) { const Slice& user_key = miter->ukey; ; GetContext* get_context = miter->get_context; if (GetFromRowCache(user_key, row_cache_key, row_cache_key_prefix_size, get_context)) { table_range.SkipKey(miter); } else { row_cache_entries.emplace_back(); get_context->SetReplayLog(&(row_cache_entries.back())); } } } #endif // ROCKSDB_LITE // Check that table_range is not empty. Its possible all keys may have been // found in the row cache and thus the range may now be empty if (s.ok() && !table_range.empty()) { if (t == nullptr) { s = FindTable( file_options_, internal_comparator, fd, &handle, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, true /* record_read_stats */, file_read_hist, skip_filters, level); TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s); if (s.ok()) { t = GetTableReaderFromHandle(handle); assert(t); } } if (s.ok() && !options.ignore_range_deletions) { std::unique_ptr range_del_iter( t->NewRangeTombstoneIterator(options)); if (range_del_iter != nullptr) { for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) { SequenceNumber* max_covering_tombstone_seq = iter->get_context->max_covering_tombstone_seq(); *max_covering_tombstone_seq = std::max(*max_covering_tombstone_seq, range_del_iter->MaxCoveringTombstoneSeqnum(iter->ukey)); } } } if (s.ok()) { t->MultiGet(options, &table_range, prefix_extractor, skip_filters); } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) { for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) { Status* status = iter->s; if (status->IsIncomplete()) { // Couldn't find Table in cache but treat as kFound if no_io set iter->get_context->MarkKeyMayExist(); s = Status::OK(); } } } } #ifndef ROCKSDB_LITE if (lookup_row_cache) { size_t row_idx = 0; for (auto miter = table_range.begin(); miter != table_range.end(); ++miter) { std::string& row_cache_entry = row_cache_entries[row_idx++]; const Slice& user_key = miter->ukey; ; GetContext* get_context = miter->get_context; get_context->SetReplayLog(nullptr); // Compute row cache key. row_cache_key.TrimAppend(row_cache_key_prefix_size, user_key.data(), user_key.size()); // Put the replay log in row cache only if something was found. if (s.ok() && !row_cache_entry.empty()) { size_t charge = row_cache_key.Size() + row_cache_entry.size() + sizeof(std::string); void* row_ptr = new std::string(std::move(row_cache_entry)); ioptions_.row_cache->Insert(row_cache_key.GetUserKey(), row_ptr, charge, &DeleteEntry); } } } #endif // ROCKSDB_LITE if (handle != nullptr) { ReleaseHandle(handle); } return s; } Status TableCache::GetTableProperties( const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, std::shared_ptr* properties, const SliceTransform* prefix_extractor, bool no_io) { Status s; auto table_reader = fd.table_reader; // table already been pre-loaded? if (table_reader) { *properties = table_reader->GetTableProperties(); return s; } Cache::Handle* table_handle = nullptr; s = FindTable(file_options, internal_comparator, fd, &table_handle, prefix_extractor, no_io); if (!s.ok()) { return s; } assert(table_handle); auto table = GetTableReaderFromHandle(table_handle); *properties = table->GetTableProperties(); ReleaseHandle(table_handle); return s; } size_t TableCache::GetMemoryUsageByTableReader( const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, const SliceTransform* prefix_extractor) { Status s; auto table_reader = fd.table_reader; // table already been pre-loaded? if (table_reader) { return table_reader->ApproximateMemoryUsage(); } Cache::Handle* table_handle = nullptr; s = FindTable(file_options, internal_comparator, fd, &table_handle, prefix_extractor, true); if (!s.ok()) { return 0; } assert(table_handle); auto table = GetTableReaderFromHandle(table_handle); auto ret = table->ApproximateMemoryUsage(); ReleaseHandle(table_handle); return ret; } void TableCache::Evict(Cache* cache, uint64_t file_number) { cache->Erase(GetSliceForFileNumber(&file_number)); } uint64_t TableCache::ApproximateOffsetOf( const Slice& key, const FileDescriptor& fd, TableReaderCaller caller, const InternalKeyComparator& internal_comparator, const SliceTransform* prefix_extractor) { uint64_t result = 0; TableReader* table_reader = fd.table_reader; Cache::Handle* table_handle = nullptr; if (table_reader == nullptr) { const bool for_compaction = (caller == TableReaderCaller::kCompaction); Status s = FindTable(file_options_, internal_comparator, fd, &table_handle, prefix_extractor, false /* no_io */, !for_compaction /* record_read_stats */); if (s.ok()) { table_reader = GetTableReaderFromHandle(table_handle); } } if (table_reader != nullptr) { result = table_reader->ApproximateOffsetOf(key, caller); } if (table_handle != nullptr) { ReleaseHandle(table_handle); } return result; } uint64_t TableCache::ApproximateSize( const Slice& start, const Slice& end, const FileDescriptor& fd, TableReaderCaller caller, const InternalKeyComparator& internal_comparator, const SliceTransform* prefix_extractor) { uint64_t result = 0; TableReader* table_reader = fd.table_reader; Cache::Handle* table_handle = nullptr; if (table_reader == nullptr) { const bool for_compaction = (caller == TableReaderCaller::kCompaction); Status s = FindTable(file_options_, internal_comparator, fd, &table_handle, prefix_extractor, false /* no_io */, !for_compaction /* record_read_stats */); if (s.ok()) { table_reader = GetTableReaderFromHandle(table_handle); } } if (table_reader != nullptr) { result = table_reader->ApproximateSize(start, end, caller); } if (table_handle != nullptr) { ReleaseHandle(table_handle); } return result; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/table_cache.h000066400000000000000000000256731370372246700162320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Thread-safe (provides internal synchronization) #pragma once #include #include #include #include "db/dbformat.h" #include "db/range_del_aggregator.h" #include "options/cf_options.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/table.h" #include "table/table_reader.h" #include "trace_replay/block_cache_tracer.h" namespace ROCKSDB_NAMESPACE { class Env; class Arena; struct FileDescriptor; class GetContext; class HistogramImpl; // Manages caching for TableReader objects for a column family. The actual // cache is allocated separately and passed to the constructor. TableCache // wraps around the underlying SST file readers by providing Get(), // MultiGet() and NewIterator() methods that hide the instantiation, // caching and access to the TableReader. The main purpose of this is // performance - by caching the TableReader, it avoids unnecessary file opens // and object allocation and instantiation. One exception is compaction, where // a new TableReader may be instantiated - see NewIterator() comments // // Another service provided by TableCache is managing the row cache - if the // DB is configured with a row cache, and the lookup key is present in the row // cache, lookup is very fast. The row cache is obtained from // ioptions.row_cache class TableCache { public: TableCache(const ImmutableCFOptions& ioptions, const FileOptions& storage_options, Cache* cache, BlockCacheTracer* const block_cache_tracer); ~TableCache(); // Return an iterator for the specified file number (the corresponding // file length must be exactly "file_size" bytes). If "table_reader_ptr" // is non-nullptr, also sets "*table_reader_ptr" to point to the Table object // underlying the returned iterator, or nullptr if no Table object underlies // the returned iterator. The returned "*table_reader_ptr" object is owned // by the cache and should not be deleted, and is valid for as long as the // returned iterator is live. // @param range_del_agg If non-nullptr, adds range deletions to the // aggregator. If an error occurs, returns it in a NewErrorInternalIterator // @param for_compaction If true, a new TableReader may be allocated (but // not cached), depending on the CF options // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" InternalIterator* NewIterator( const ReadOptions& options, const FileOptions& toptions, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, RangeDelAggregator* range_del_agg, const SliceTransform* prefix_extractor, TableReader** table_reader_ptr, HistogramImpl* file_read_hist, TableReaderCaller caller, Arena* arena, bool skip_filters, int level, size_t max_file_size_for_l0_meta_pin, const InternalKey* smallest_compaction_key, const InternalKey* largest_compaction_key, bool allow_unprepared_value); // If a seek to internal key "k" in specified file finds an entry, // call get_context->SaveValue() repeatedly until // it returns false. As a side effect, it will insert the TableReader // into the cache and potentially evict another entry // @param get_context Context for get operation. The result of the lookup // can be retrieved by calling get_context->State() // @param file_read_hist If non-nullptr, the file reader statistics are // recorded // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" Status Get(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, const Slice& k, GetContext* get_context, const SliceTransform* prefix_extractor = nullptr, HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, int level = -1, size_t max_file_size_for_l0_meta_pin = 0); // Return the range delete tombstone iterator of the file specified by // `file_meta`. Status GetRangeTombstoneIterator( const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, std::unique_ptr* out_iter); // If a seek to internal key "k" in specified file finds an entry, // call get_context->SaveValue() repeatedly until // it returns false. As a side effect, it will insert the TableReader // into the cache and potentially evict another entry // @param mget_range Pointer to the structure describing a batch of keys to // be looked up in this table file. The result is stored // in the embedded GetContext // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" Status MultiGet(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, const MultiGetContext::Range* mget_range, const SliceTransform* prefix_extractor = nullptr, HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, int level = -1); // Evict any entry for the specified file number static void Evict(Cache* cache, uint64_t file_number); // Clean table handle and erase it from the table cache // Used in DB close, or the file is not live anymore. void EraseHandle(const FileDescriptor& fd, Cache::Handle* handle); // Find table reader // @param skip_filters Disables loading/accessing the filter block // @param level == -1 means not specified Status FindTable(const FileOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_fd, Cache::Handle**, const SliceTransform* prefix_extractor = nullptr, const bool no_io = false, bool record_read_stats = true, HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, int level = -1, bool prefetch_index_and_filter_in_cache = true, size_t max_file_size_for_l0_meta_pin = 0); // Get TableReader from a cache handle. TableReader* GetTableReaderFromHandle(Cache::Handle* handle); // Get the table properties of a given table. // @no_io: indicates if we should load table to the cache if it is not present // in table cache yet. // @returns: `properties` will be reset on success. Please note that we will // return Status::Incomplete() if table is not present in cache and // we set `no_io` to be true. Status GetTableProperties(const FileOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& file_meta, std::shared_ptr* properties, const SliceTransform* prefix_extractor = nullptr, bool no_io = false); // Return total memory usage of the table reader of the file. // 0 if table reader of the file is not loaded. size_t GetMemoryUsageByTableReader( const FileOptions& toptions, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, const SliceTransform* prefix_extractor = nullptr); // Returns approximated offset of a key in a file represented by fd. uint64_t ApproximateOffsetOf( const Slice& key, const FileDescriptor& fd, TableReaderCaller caller, const InternalKeyComparator& internal_comparator, const SliceTransform* prefix_extractor = nullptr); // Returns approximated data size between start and end keys in a file // represented by fd (the start key must not be greater than the end key). uint64_t ApproximateSize(const Slice& start, const Slice& end, const FileDescriptor& fd, TableReaderCaller caller, const InternalKeyComparator& internal_comparator, const SliceTransform* prefix_extractor = nullptr); // Release the handle from a cache void ReleaseHandle(Cache::Handle* handle); Cache* get_cache() const { return cache_; } // Capacity of the backing Cache that indicates inifinite TableCache capacity. // For example when max_open_files is -1 we set the backing Cache to this. static const int kInfiniteCapacity = 0x400000; // The tables opened with this TableCache will be immortal, i.e., their // lifetime is as long as that of the DB. void SetTablesAreImmortal() { if (cache_->GetCapacity() >= kInfiniteCapacity) { immortal_tables_ = true; } } private: // Build a table reader Status GetTableReader(const FileOptions& file_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist, std::unique_ptr* table_reader, const SliceTransform* prefix_extractor = nullptr, bool skip_filters = false, int level = -1, bool prefetch_index_and_filter_in_cache = true, size_t max_file_size_for_l0_meta_pin = 0); // Create a key prefix for looking up the row cache. The prefix is of the // format row_cache_id + fd_number + seq_no. Later, the user key can be // appended to form the full key void CreateRowCacheKeyPrefix(const ReadOptions& options, const FileDescriptor& fd, const Slice& internal_key, GetContext* get_context, IterKey& row_cache_key); // Helper function to lookup the row cache for a key. It appends the // user key to row_cache_key at offset prefix_size bool GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, size_t prefix_size, GetContext* get_context); const ImmutableCFOptions& ioptions_; const FileOptions& file_options_; Cache* const cache_; std::string row_cache_id_; bool immortal_tables_; BlockCacheTracer* const block_cache_tracer_; Striped loader_mutex_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/table_properties_collector.cc000066400000000000000000000045271370372246700215620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/table_properties_collector.h" #include "db/dbformat.h" #include "util/coding.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { uint64_t GetUint64Property(const UserCollectedProperties& props, const std::string& property_name, bool* property_present) { auto pos = props.find(property_name); if (pos == props.end()) { *property_present = false; return 0; } Slice raw = pos->second; uint64_t val = 0; *property_present = true; return GetVarint64(&raw, &val) ? val : 0; } } // namespace Status UserKeyTablePropertiesCollector::InternalAdd(const Slice& key, const Slice& value, uint64_t file_size) { ParsedInternalKey ikey; if (!ParseInternalKey(key, &ikey)) { return Status::InvalidArgument("Invalid internal key"); } return collector_->AddUserKey(ikey.user_key, value, GetEntryType(ikey.type), ikey.sequence, file_size); } void UserKeyTablePropertiesCollector::BlockAdd( uint64_t bLockRawBytes, uint64_t blockCompressedBytesFast, uint64_t blockCompressedBytesSlow) { return collector_->BlockAdd(bLockRawBytes, blockCompressedBytesFast, blockCompressedBytesSlow); } Status UserKeyTablePropertiesCollector::Finish( UserCollectedProperties* properties) { return collector_->Finish(properties); } UserCollectedProperties UserKeyTablePropertiesCollector::GetReadableProperties() const { return collector_->GetReadableProperties(); } uint64_t GetDeletedKeys( const UserCollectedProperties& props) { bool property_present_ignored; return GetUint64Property(props, TablePropertiesNames::kDeletedKeys, &property_present_ignored); } uint64_t GetMergeOperands(const UserCollectedProperties& props, bool* property_present) { return GetUint64Property( props, TablePropertiesNames::kMergeOperands, property_present); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/table_properties_collector.h000066400000000000000000000071051370372246700214170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file defines a collection of statistics collectors. #pragma once #include "rocksdb/table_properties.h" #include #include #include namespace ROCKSDB_NAMESPACE { // Base class for internal table properties collector. class IntTblPropCollector { public: virtual ~IntTblPropCollector() {} virtual Status Finish(UserCollectedProperties* properties) = 0; virtual const char* Name() const = 0; // @params key the user key that is inserted into the table. // @params value the value that is inserted into the table. virtual Status InternalAdd(const Slice& key, const Slice& value, uint64_t file_size) = 0; virtual void BlockAdd(uint64_t blockRawBytes, uint64_t blockCompressedBytesFast, uint64_t blockCompressedBytesSlow) = 0; virtual UserCollectedProperties GetReadableProperties() const = 0; virtual bool NeedCompact() const { return false; } }; // Factory for internal table properties collector. class IntTblPropCollectorFactory { public: virtual ~IntTblPropCollectorFactory() {} // has to be thread-safe virtual IntTblPropCollector* CreateIntTblPropCollector( uint32_t column_family_id) = 0; // The name of the properties collector can be used for debugging purpose. virtual const char* Name() const = 0; }; // When rocksdb creates a new table, it will encode all "user keys" into // "internal keys", which contains meta information of a given entry. // // This class extracts user key from the encoded internal key when Add() is // invoked. class UserKeyTablePropertiesCollector : public IntTblPropCollector { public: // transfer of ownership explicit UserKeyTablePropertiesCollector(TablePropertiesCollector* collector) : collector_(collector) {} virtual ~UserKeyTablePropertiesCollector() {} virtual Status InternalAdd(const Slice& key, const Slice& value, uint64_t file_size) override; virtual void BlockAdd(uint64_t blockRawBytes, uint64_t blockCompressedBytesFast, uint64_t blockCompressedBytesSlow) override; virtual Status Finish(UserCollectedProperties* properties) override; virtual const char* Name() const override { return collector_->Name(); } UserCollectedProperties GetReadableProperties() const override; virtual bool NeedCompact() const override { return collector_->NeedCompact(); } protected: std::unique_ptr collector_; }; class UserKeyTablePropertiesCollectorFactory : public IntTblPropCollectorFactory { public: explicit UserKeyTablePropertiesCollectorFactory( std::shared_ptr user_collector_factory) : user_collector_factory_(user_collector_factory) {} virtual IntTblPropCollector* CreateIntTblPropCollector( uint32_t column_family_id) override { TablePropertiesCollectorFactory::Context context; context.column_family_id = column_family_id; return new UserKeyTablePropertiesCollector( user_collector_factory_->CreateTablePropertiesCollector(context)); } virtual const char* Name() const override { return user_collector_factory_->Name(); } private: std::shared_ptr user_collector_factory_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/table_properties_collector_test.cc000066400000000000000000000452471370372246700226250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/table_properties_collector.h" #include "env/composite_env_wrapper.h" #include "file/sequence_file_reader.h" #include "file/writable_file_writer.h" #include "options/cf_options.h" #include "rocksdb/table.h" #include "table/block_based/block_based_table_factory.h" #include "table/meta_blocks.h" #include "table/plain/plain_table_factory.h" #include "table/table_builder.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { class TablePropertiesTest : public testing::Test, public testing::WithParamInterface { public: void SetUp() override { backward_mode_ = GetParam(); } bool backward_mode_; }; // Utilities test functions namespace { static const uint32_t kTestColumnFamilyId = 66; static const std::string kTestColumnFamilyName = "test_column_fam"; void MakeBuilder(const Options& options, const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, std::unique_ptr* writable, std::unique_ptr* builder) { std::unique_ptr wf(new test::StringSink); writable->reset( new WritableFileWriter(NewLegacyWritableFileWrapper(std::move(wf)), "" /* don't care */, EnvOptions())); int unknown_level = -1; builder->reset(NewTableBuilder( ioptions, moptions, internal_comparator, int_tbl_prop_collector_factories, kTestColumnFamilyId, kTestColumnFamilyName, writable->get(), options.compression, options.sample_for_compression, options.compression_opts, unknown_level)); } } // namespace // Collects keys that starts with "A" in a table. class RegularKeysStartWithA: public TablePropertiesCollector { public: const char* Name() const override { return "RegularKeysStartWithA"; } Status Finish(UserCollectedProperties* properties) override { std::string encoded; std::string encoded_num_puts; std::string encoded_num_deletes; std::string encoded_num_single_deletes; std::string encoded_num_size_changes; PutVarint32(&encoded, count_); PutVarint32(&encoded_num_puts, num_puts_); PutVarint32(&encoded_num_deletes, num_deletes_); PutVarint32(&encoded_num_single_deletes, num_single_deletes_); PutVarint32(&encoded_num_size_changes, num_size_changes_); *properties = UserCollectedProperties{ {"TablePropertiesTest", message_}, {"Count", encoded}, {"NumPuts", encoded_num_puts}, {"NumDeletes", encoded_num_deletes}, {"NumSingleDeletes", encoded_num_single_deletes}, {"NumSizeChanges", encoded_num_size_changes}, }; return Status::OK(); } Status AddUserKey(const Slice& user_key, const Slice& /*value*/, EntryType type, SequenceNumber /*seq*/, uint64_t file_size) override { // simply asssume all user keys are not empty. if (user_key.data()[0] == 'A') { ++count_; } if (type == kEntryPut) { num_puts_++; } else if (type == kEntryDelete) { num_deletes_++; } else if (type == kEntrySingleDelete) { num_single_deletes_++; } if (file_size < file_size_) { message_ = "File size should not decrease."; } else if (file_size != file_size_) { num_size_changes_++; } return Status::OK(); } UserCollectedProperties GetReadableProperties() const override { return UserCollectedProperties{}; } private: std::string message_ = "Rocksdb"; uint32_t count_ = 0; uint32_t num_puts_ = 0; uint32_t num_deletes_ = 0; uint32_t num_single_deletes_ = 0; uint32_t num_size_changes_ = 0; uint64_t file_size_ = 0; }; // Collects keys that starts with "A" in a table. Backward compatible mode // It is also used to test internal key table property collector class RegularKeysStartWithABackwardCompatible : public TablePropertiesCollector { public: const char* Name() const override { return "RegularKeysStartWithA"; } Status Finish(UserCollectedProperties* properties) override { std::string encoded; PutVarint32(&encoded, count_); *properties = UserCollectedProperties{{"TablePropertiesTest", "Rocksdb"}, {"Count", encoded}}; return Status::OK(); } Status Add(const Slice& user_key, const Slice& /*value*/) override { // simply asssume all user keys are not empty. if (user_key.data()[0] == 'A') { ++count_; } return Status::OK(); } UserCollectedProperties GetReadableProperties() const override { return UserCollectedProperties{}; } private: uint32_t count_ = 0; }; class RegularKeysStartWithAInternal : public IntTblPropCollector { public: const char* Name() const override { return "RegularKeysStartWithA"; } Status Finish(UserCollectedProperties* properties) override { std::string encoded; PutVarint32(&encoded, count_); *properties = UserCollectedProperties{{"TablePropertiesTest", "Rocksdb"}, {"Count", encoded}}; return Status::OK(); } Status InternalAdd(const Slice& user_key, const Slice& /*value*/, uint64_t /*file_size*/) override { // simply asssume all user keys are not empty. if (user_key.data()[0] == 'A') { ++count_; } return Status::OK(); } void BlockAdd(uint64_t /* blockRawBytes */, uint64_t /* blockCompressedBytesFast */, uint64_t /* blockCompressedBytesSlow */) override { // Nothing to do. return; } UserCollectedProperties GetReadableProperties() const override { return UserCollectedProperties{}; } private: uint32_t count_ = 0; }; class RegularKeysStartWithAFactory : public IntTblPropCollectorFactory, public TablePropertiesCollectorFactory { public: explicit RegularKeysStartWithAFactory(bool backward_mode) : backward_mode_(backward_mode) {} TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context context) override { EXPECT_EQ(kTestColumnFamilyId, context.column_family_id); if (!backward_mode_) { return new RegularKeysStartWithA(); } else { return new RegularKeysStartWithABackwardCompatible(); } } IntTblPropCollector* CreateIntTblPropCollector( uint32_t /*column_family_id*/) override { return new RegularKeysStartWithAInternal(); } const char* Name() const override { return "RegularKeysStartWithA"; } bool backward_mode_; }; class FlushBlockEveryThreePolicy : public FlushBlockPolicy { public: bool Update(const Slice& /*key*/, const Slice& /*value*/) override { return (++count_ % 3U == 0); } private: uint64_t count_ = 0; }; class FlushBlockEveryThreePolicyFactory : public FlushBlockPolicyFactory { public: explicit FlushBlockEveryThreePolicyFactory() {} const char* Name() const override { return "FlushBlockEveryThreePolicyFactory"; } FlushBlockPolicy* NewFlushBlockPolicy( const BlockBasedTableOptions& /*table_options*/, const BlockBuilder& /*data_block_builder*/) const override { return new FlushBlockEveryThreePolicy; } }; extern const uint64_t kBlockBasedTableMagicNumber; extern const uint64_t kPlainTableMagicNumber; namespace { void TestCustomizedTablePropertiesCollector( bool backward_mode, uint64_t magic_number, bool test_int_tbl_prop_collector, const Options& options, const InternalKeyComparator& internal_comparator) { // make sure the entries will be inserted with order. std::map, std::string> kvs = { {{"About ", kTypeValue}, "val5"}, // starts with 'A' {{"Abstract", kTypeValue}, "val2"}, // starts with 'A' {{"Around ", kTypeValue}, "val7"}, // starts with 'A' {{"Beyond ", kTypeValue}, "val3"}, {{"Builder ", kTypeValue}, "val1"}, {{"Love ", kTypeDeletion}, ""}, {{"Cancel ", kTypeValue}, "val4"}, {{"Find ", kTypeValue}, "val6"}, {{"Rocks ", kTypeDeletion}, ""}, {{"Foo ", kTypeSingleDeletion}, ""}, }; // -- Step 1: build table std::unique_ptr builder; std::unique_ptr writer; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); std::vector> int_tbl_prop_collector_factories; if (test_int_tbl_prop_collector) { int_tbl_prop_collector_factories.emplace_back( new RegularKeysStartWithAFactory(backward_mode)); } else { GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories); } MakeBuilder(options, ioptions, moptions, internal_comparator, &int_tbl_prop_collector_factories, &writer, &builder); SequenceNumber seqNum = 0U; for (const auto& kv : kvs) { InternalKey ikey(kv.first.first, seqNum++, kv.first.second); builder->Add(ikey.Encode(), kv.second); } ASSERT_OK(builder->Finish()); writer->Flush(); // -- Step 2: Read properties LegacyWritableFileWrapper* file = static_cast(writer->writable_file()); test::StringSink* fwf = static_cast(file->target()); std::unique_ptr fake_file_reader( test::GetRandomAccessFileReader( new test::StringSource(fwf->contents()))); TableProperties* props; Status s = ReadTableProperties(fake_file_reader.get(), fwf->contents().size(), magic_number, ioptions, &props, true /* compression_type_missing */); std::unique_ptr props_guard(props); ASSERT_OK(s); auto user_collected = props->user_collected_properties; ASSERT_NE(user_collected.find("TablePropertiesTest"), user_collected.end()); ASSERT_EQ("Rocksdb", user_collected.at("TablePropertiesTest")); uint32_t starts_with_A = 0; ASSERT_NE(user_collected.find("Count"), user_collected.end()); Slice key(user_collected.at("Count")); ASSERT_TRUE(GetVarint32(&key, &starts_with_A)); ASSERT_EQ(3u, starts_with_A); if (!backward_mode && !test_int_tbl_prop_collector) { uint32_t num_puts; ASSERT_NE(user_collected.find("NumPuts"), user_collected.end()); Slice key_puts(user_collected.at("NumPuts")); ASSERT_TRUE(GetVarint32(&key_puts, &num_puts)); ASSERT_EQ(7u, num_puts); uint32_t num_deletes; ASSERT_NE(user_collected.find("NumDeletes"), user_collected.end()); Slice key_deletes(user_collected.at("NumDeletes")); ASSERT_TRUE(GetVarint32(&key_deletes, &num_deletes)); ASSERT_EQ(2u, num_deletes); uint32_t num_single_deletes; ASSERT_NE(user_collected.find("NumSingleDeletes"), user_collected.end()); Slice key_single_deletes(user_collected.at("NumSingleDeletes")); ASSERT_TRUE(GetVarint32(&key_single_deletes, &num_single_deletes)); ASSERT_EQ(1u, num_single_deletes); uint32_t num_size_changes; ASSERT_NE(user_collected.find("NumSizeChanges"), user_collected.end()); Slice key_size_changes(user_collected.at("NumSizeChanges")); ASSERT_TRUE(GetVarint32(&key_size_changes, &num_size_changes)); ASSERT_GE(num_size_changes, 2u); } } } // namespace TEST_P(TablePropertiesTest, CustomizedTablePropertiesCollector) { // Test properties collectors with internal keys or regular keys // for block based table for (bool encode_as_internal : { true, false }) { Options options; BlockBasedTableOptions table_options; table_options.flush_block_policy_factory = std::make_shared(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); test::PlainInternalKeyComparator ikc(options.comparator); std::shared_ptr collector_factory( new RegularKeysStartWithAFactory(backward_mode_)); options.table_properties_collector_factories.resize(1); options.table_properties_collector_factories[0] = collector_factory; TestCustomizedTablePropertiesCollector(backward_mode_, kBlockBasedTableMagicNumber, encode_as_internal, options, ikc); #ifndef ROCKSDB_LITE // PlainTable is not supported in Lite // test plain table PlainTableOptions plain_table_options; plain_table_options.user_key_len = 8; plain_table_options.bloom_bits_per_key = 8; plain_table_options.hash_table_ratio = 0; options.table_factory = std::make_shared(plain_table_options); TestCustomizedTablePropertiesCollector(backward_mode_, kPlainTableMagicNumber, encode_as_internal, options, ikc); #endif // !ROCKSDB_LITE } } namespace { void TestInternalKeyPropertiesCollector( bool backward_mode, uint64_t magic_number, bool sanitized, std::shared_ptr table_factory) { InternalKey keys[] = { InternalKey("A ", 0, ValueType::kTypeValue), InternalKey("B ", 1, ValueType::kTypeValue), InternalKey("C ", 2, ValueType::kTypeValue), InternalKey("W ", 3, ValueType::kTypeDeletion), InternalKey("X ", 4, ValueType::kTypeDeletion), InternalKey("Y ", 5, ValueType::kTypeDeletion), InternalKey("Z ", 6, ValueType::kTypeDeletion), InternalKey("a ", 7, ValueType::kTypeSingleDeletion), InternalKey("b ", 8, ValueType::kTypeMerge), InternalKey("c ", 9, ValueType::kTypeMerge), }; std::unique_ptr builder; std::unique_ptr writable; Options options; test::PlainInternalKeyComparator pikc(options.comparator); std::vector> int_tbl_prop_collector_factories; options.table_factory = table_factory; if (sanitized) { options.table_properties_collector_factories.emplace_back( new RegularKeysStartWithAFactory(backward_mode)); // with sanitization, even regular properties collector will be able to // handle internal keys. auto comparator = options.comparator; // HACK: Set options.info_log to avoid writing log in // SanitizeOptions(). options.info_log = std::make_shared(); options = SanitizeOptions("db", // just a place holder options); ImmutableCFOptions ioptions(options); GetIntTblPropCollectorFactory(ioptions, &int_tbl_prop_collector_factories); options.comparator = comparator; } const ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); for (int iter = 0; iter < 2; ++iter) { MakeBuilder(options, ioptions, moptions, pikc, &int_tbl_prop_collector_factories, &writable, &builder); for (const auto& k : keys) { builder->Add(k.Encode(), "val"); } ASSERT_OK(builder->Finish()); writable->Flush(); LegacyWritableFileWrapper* file = static_cast(writable->writable_file()); test::StringSink* fwf = static_cast(file->target()); std::unique_ptr reader( test::GetRandomAccessFileReader( new test::StringSource(fwf->contents()))); TableProperties* props; Status s = ReadTableProperties(reader.get(), fwf->contents().size(), magic_number, ioptions, &props, true /* compression_type_missing */); ASSERT_OK(s); std::unique_ptr props_guard(props); auto user_collected = props->user_collected_properties; uint64_t deleted = GetDeletedKeys(user_collected); ASSERT_EQ(5u, deleted); // deletes + single-deletes bool property_present; uint64_t merges = GetMergeOperands(user_collected, &property_present); ASSERT_TRUE(property_present); ASSERT_EQ(2u, merges); if (sanitized) { uint32_t starts_with_A = 0; ASSERT_NE(user_collected.find("Count"), user_collected.end()); Slice key(user_collected.at("Count")); ASSERT_TRUE(GetVarint32(&key, &starts_with_A)); ASSERT_EQ(1u, starts_with_A); if (!backward_mode) { uint32_t num_puts; ASSERT_NE(user_collected.find("NumPuts"), user_collected.end()); Slice key_puts(user_collected.at("NumPuts")); ASSERT_TRUE(GetVarint32(&key_puts, &num_puts)); ASSERT_EQ(3u, num_puts); uint32_t num_deletes; ASSERT_NE(user_collected.find("NumDeletes"), user_collected.end()); Slice key_deletes(user_collected.at("NumDeletes")); ASSERT_TRUE(GetVarint32(&key_deletes, &num_deletes)); ASSERT_EQ(4u, num_deletes); uint32_t num_single_deletes; ASSERT_NE(user_collected.find("NumSingleDeletes"), user_collected.end()); Slice key_single_deletes(user_collected.at("NumSingleDeletes")); ASSERT_TRUE(GetVarint32(&key_single_deletes, &num_single_deletes)); ASSERT_EQ(1u, num_single_deletes); } } } } } // namespace TEST_P(TablePropertiesTest, InternalKeyPropertiesCollector) { TestInternalKeyPropertiesCollector( backward_mode_, kBlockBasedTableMagicNumber, true /* sanitize */, std::make_shared()); if (backward_mode_) { TestInternalKeyPropertiesCollector( backward_mode_, kBlockBasedTableMagicNumber, false /* not sanitize */, std::make_shared()); } #ifndef ROCKSDB_LITE // PlainTable is not supported in Lite PlainTableOptions plain_table_options; plain_table_options.user_key_len = 8; plain_table_options.bloom_bits_per_key = 8; plain_table_options.hash_table_ratio = 0; TestInternalKeyPropertiesCollector( backward_mode_, kPlainTableMagicNumber, false /* not sanitize */, std::make_shared(plain_table_options)); #endif // !ROCKSDB_LITE } INSTANTIATE_TEST_CASE_P(InternalKeyPropertiesCollector, TablePropertiesTest, ::testing::Bool()); INSTANTIATE_TEST_CASE_P(CustomizedTablePropertiesCollector, TablePropertiesTest, ::testing::Bool()); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/transaction_log_impl.cc000066400000000000000000000252311370372246700203530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "db/transaction_log_impl.h" #include #include "db/write_batch_internal.h" #include "file/sequence_file_reader.h" namespace ROCKSDB_NAMESPACE { TransactionLogIteratorImpl::TransactionLogIteratorImpl( const std::string& dir, const ImmutableDBOptions* options, const TransactionLogIterator::ReadOptions& read_options, const EnvOptions& soptions, const SequenceNumber seq, std::unique_ptr files, VersionSet const* const versions, const bool seq_per_batch) : dir_(dir), options_(options), read_options_(read_options), soptions_(soptions), starting_sequence_number_(seq), files_(std::move(files)), started_(false), is_valid_(false), current_file_index_(0), current_batch_seq_(0), current_last_seq_(0), versions_(versions), seq_per_batch_(seq_per_batch) { assert(files_ != nullptr); assert(versions_ != nullptr); reporter_.env = options_->env; reporter_.info_log = options_->info_log.get(); SeekToStartSequence(); // Seek till starting sequence } Status TransactionLogIteratorImpl::OpenLogFile( const LogFile* log_file, std::unique_ptr* file_reader) { FileSystem* fs = options_->fs.get(); std::unique_ptr file; std::string fname; Status s; EnvOptions optimized_env_options = fs->OptimizeForLogRead(soptions_); if (log_file->Type() == kArchivedLogFile) { fname = ArchivedLogFileName(dir_, log_file->LogNumber()); s = fs->NewSequentialFile(fname, optimized_env_options, &file, nullptr); } else { fname = LogFileName(dir_, log_file->LogNumber()); s = fs->NewSequentialFile(fname, optimized_env_options, &file, nullptr); if (!s.ok()) { // If cannot open file in DB directory. // Try the archive dir, as it could have moved in the meanwhile. fname = ArchivedLogFileName(dir_, log_file->LogNumber()); s = fs->NewSequentialFile(fname, optimized_env_options, &file, nullptr); } } if (s.ok()) { file_reader->reset(new SequentialFileReader(std::move(file), fname)); } return s; } BatchResult TransactionLogIteratorImpl::GetBatch() { assert(is_valid_); // cannot call in a non valid state. BatchResult result; result.sequence = current_batch_seq_; result.writeBatchPtr = std::move(current_batch_); return result; } Status TransactionLogIteratorImpl::status() { return current_status_; } bool TransactionLogIteratorImpl::Valid() { return started_ && is_valid_; } bool TransactionLogIteratorImpl::RestrictedRead(Slice* record) { // Don't read if no more complete entries to read from logs if (current_last_seq_ >= versions_->LastSequence()) { return false; } return current_log_reader_->ReadRecord(record, &scratch_); } void TransactionLogIteratorImpl::SeekToStartSequence(uint64_t start_file_index, bool strict) { Slice record; started_ = false; is_valid_ = false; if (files_->size() <= start_file_index) { return; } Status s = OpenLogReader(files_->at(static_cast(start_file_index)).get()); if (!s.ok()) { current_status_ = s; reporter_.Info(current_status_.ToString().c_str()); return; } while (RestrictedRead(&record)) { if (record.size() < WriteBatchInternal::kHeader) { reporter_.Corruption( record.size(), Status::Corruption("very small log record")); continue; } UpdateCurrentWriteBatch(record); if (current_last_seq_ >= starting_sequence_number_) { if (strict && current_batch_seq_ != starting_sequence_number_) { current_status_ = Status::Corruption( "Gap in sequence number. Could not " "seek to required sequence number"); reporter_.Info(current_status_.ToString().c_str()); return; } else if (strict) { reporter_.Info("Could seek required sequence number. Iterator will " "continue."); } is_valid_ = true; started_ = true; // set started_ as we could seek till starting sequence return; } else { is_valid_ = false; } } // Could not find start sequence in first file. Normally this must be the // only file. Otherwise log the error and let the iterator return next entry // If strict is set, we want to seek exactly till the start sequence and it // should have been present in the file we scanned above if (strict) { current_status_ = Status::Corruption( "Gap in sequence number. Could not " "seek to required sequence number"); reporter_.Info(current_status_.ToString().c_str()); } else if (files_->size() != 1) { current_status_ = Status::Corruption( "Start sequence was not found, " "skipping to the next available"); reporter_.Info(current_status_.ToString().c_str()); // Let NextImpl find the next available entry. started_ remains false // because we don't want to check for gaps while moving to start sequence NextImpl(true); } } void TransactionLogIteratorImpl::Next() { return NextImpl(false); } void TransactionLogIteratorImpl::NextImpl(bool internal) { Slice record; is_valid_ = false; if (!internal && !started_) { // Runs every time until we can seek to the start sequence return SeekToStartSequence(); } while(true) { assert(current_log_reader_); if (current_log_reader_->IsEOF()) { current_log_reader_->UnmarkEOF(); } while (RestrictedRead(&record)) { if (record.size() < WriteBatchInternal::kHeader) { reporter_.Corruption( record.size(), Status::Corruption("very small log record")); continue; } else { // started_ should be true if called by application assert(internal || started_); // started_ should be false if called internally assert(!internal || !started_); UpdateCurrentWriteBatch(record); if (internal && !started_) { started_ = true; } return; } } // Open the next file if (current_file_index_ < files_->size() - 1) { ++current_file_index_; Status s = OpenLogReader(files_->at(current_file_index_).get()); if (!s.ok()) { is_valid_ = false; current_status_ = s; return; } } else { is_valid_ = false; if (current_last_seq_ == versions_->LastSequence()) { current_status_ = Status::OK(); } else { const char* msg = "Create a new iterator to fetch the new tail."; current_status_ = Status::TryAgain(msg); } return; } } } bool TransactionLogIteratorImpl::IsBatchExpected( const WriteBatch* batch, const SequenceNumber expected_seq) { assert(batch); SequenceNumber batchSeq = WriteBatchInternal::Sequence(batch); if (batchSeq != expected_seq) { char buf[200]; snprintf(buf, sizeof(buf), "Discontinuity in log records. Got seq=%" PRIu64 ", Expected seq=%" PRIu64 ", Last flushed seq=%" PRIu64 ".Log iterator will reseek the correct batch.", batchSeq, expected_seq, versions_->LastSequence()); reporter_.Info(buf); return false; } return true; } void TransactionLogIteratorImpl::UpdateCurrentWriteBatch(const Slice& record) { std::unique_ptr batch(new WriteBatch()); WriteBatchInternal::SetContents(batch.get(), record); SequenceNumber expected_seq = current_last_seq_ + 1; // If the iterator has started, then confirm that we get continuous batches if (started_ && !IsBatchExpected(batch.get(), expected_seq)) { // Seek to the batch having expected sequence number if (expected_seq < files_->at(current_file_index_)->StartSequence()) { // Expected batch must lie in the previous log file // Avoid underflow. if (current_file_index_ != 0) { current_file_index_--; } } starting_sequence_number_ = expected_seq; // currentStatus_ will be set to Ok if reseek succeeds // Note: this is still ok in seq_pre_batch_ && two_write_queuesp_ mode // that allows gaps in the WAL since it will still skip over the gap. current_status_ = Status::NotFound("Gap in sequence numbers"); // In seq_per_batch_ mode, gaps in the seq are possible so the strict mode // should be disabled return SeekToStartSequence(current_file_index_, !seq_per_batch_); } struct BatchCounter : public WriteBatch::Handler { SequenceNumber sequence_; BatchCounter(SequenceNumber sequence) : sequence_(sequence) {} Status MarkNoop(bool empty_batch) override { if (!empty_batch) { sequence_++; } return Status::OK(); } Status MarkEndPrepare(const Slice&) override { sequence_++; return Status::OK(); } Status MarkCommit(const Slice&) override { sequence_++; return Status::OK(); } Status PutCF(uint32_t /*cf*/, const Slice& /*key*/, const Slice& /*val*/) override { return Status::OK(); } Status DeleteCF(uint32_t /*cf*/, const Slice& /*key*/) override { return Status::OK(); } Status SingleDeleteCF(uint32_t /*cf*/, const Slice& /*key*/) override { return Status::OK(); } Status MergeCF(uint32_t /*cf*/, const Slice& /*key*/, const Slice& /*val*/) override { return Status::OK(); } Status MarkBeginPrepare(bool) override { return Status::OK(); } Status MarkRollback(const Slice&) override { return Status::OK(); } }; current_batch_seq_ = WriteBatchInternal::Sequence(batch.get()); if (seq_per_batch_) { BatchCounter counter(current_batch_seq_); batch->Iterate(&counter); current_last_seq_ = counter.sequence_; } else { current_last_seq_ = current_batch_seq_ + WriteBatchInternal::Count(batch.get()) - 1; } // currentBatchSeq_ can only change here assert(current_last_seq_ <= versions_->LastSequence()); current_batch_ = std::move(batch); is_valid_ = true; current_status_ = Status::OK(); } Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* log_file) { std::unique_ptr file; Status s = OpenLogFile(log_file, &file); if (!s.ok()) { return s; } assert(file); current_log_reader_.reset( new log::Reader(options_->info_log, std::move(file), &reporter_, read_options_.verify_checksums_, log_file->LogNumber())); return Status::OK(); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/transaction_log_impl.h000066400000000000000000000105021370372246700202100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include "db/log_reader.h" #include "db/version_set.h" #include "file/filename.h" #include "options/db_options.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/transaction_log.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { class LogFileImpl : public LogFile { public: LogFileImpl(uint64_t logNum, WalFileType logType, SequenceNumber startSeq, uint64_t sizeBytes) : logNumber_(logNum), type_(logType), startSequence_(startSeq), sizeFileBytes_(sizeBytes) { } std::string PathName() const override { if (type_ == kArchivedLogFile) { return ArchivedLogFileName("", logNumber_); } return LogFileName("", logNumber_); } uint64_t LogNumber() const override { return logNumber_; } WalFileType Type() const override { return type_; } SequenceNumber StartSequence() const override { return startSequence_; } uint64_t SizeFileBytes() const override { return sizeFileBytes_; } bool operator < (const LogFile& that) const { return LogNumber() < that.LogNumber(); } private: uint64_t logNumber_; WalFileType type_; SequenceNumber startSequence_; uint64_t sizeFileBytes_; }; class TransactionLogIteratorImpl : public TransactionLogIterator { public: TransactionLogIteratorImpl( const std::string& dir, const ImmutableDBOptions* options, const TransactionLogIterator::ReadOptions& read_options, const EnvOptions& soptions, const SequenceNumber seqNum, std::unique_ptr files, VersionSet const* const versions, const bool seq_per_batch); virtual bool Valid() override; virtual void Next() override; virtual Status status() override; virtual BatchResult GetBatch() override; private: const std::string& dir_; const ImmutableDBOptions* options_; const TransactionLogIterator::ReadOptions read_options_; const EnvOptions& soptions_; SequenceNumber starting_sequence_number_; std::unique_ptr files_; bool started_; bool is_valid_; // not valid when it starts of. Status current_status_; size_t current_file_index_; std::unique_ptr current_batch_; std::unique_ptr current_log_reader_; std::string scratch_; Status OpenLogFile(const LogFile* log_file, std::unique_ptr* file); struct LogReporter : public log::Reader::Reporter { Env* env; Logger* info_log; virtual void Corruption(size_t bytes, const Status& s) override { ROCKS_LOG_ERROR(info_log, "dropping %" ROCKSDB_PRIszt " bytes; %s", bytes, s.ToString().c_str()); } virtual void Info(const char* s) { ROCKS_LOG_INFO(info_log, "%s", s); } } reporter_; SequenceNumber current_batch_seq_; // sequence number at start of current batch SequenceNumber current_last_seq_; // last sequence in the current batch // Used only to get latest seq. num // TODO(icanadi) can this be just a callback? VersionSet const* const versions_; const bool seq_per_batch_; // Reads from transaction log only if the writebatch record has been written bool RestrictedRead(Slice* record); // Seeks to startingSequenceNumber reading from startFileIndex in files_. // If strict is set,then must get a batch starting with startingSequenceNumber void SeekToStartSequence(uint64_t start_file_index = 0, bool strict = false); // Implementation of Next. SeekToStartSequence calls it internally with // internal=true to let it find next entry even if it has to jump gaps because // the iterator may start off from the first available entry but promises to // be continuous after that void NextImpl(bool internal = false); // Check if batch is expected, else return false bool IsBatchExpected(const WriteBatch* batch, SequenceNumber expected_seq); // Update current batch if a continuous batch is found, else return false void UpdateCurrentWriteBatch(const Slice& record); Status OpenLogReader(const LogFile* file); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/db/trim_history_scheduler.cc000066400000000000000000000025321370372246700207350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/trim_history_scheduler.h" #include #include "db/column_family.h" namespace ROCKSDB_NAMESPACE { void TrimHistoryScheduler::ScheduleWork(ColumnFamilyData* cfd) { std::lock_guard lock(checking_mutex_); cfd->Ref(); cfds_.push_back(cfd); is_empty_.store(false, std::memory_order_relaxed); } ColumnFamilyData* TrimHistoryScheduler::TakeNextColumnFamily() { std::lock_guard lock(checking_mutex_); while (true) { if (cfds_.empty()) { return nullptr; } ColumnFamilyData* cfd = cfds_.back(); cfds_.pop_back(); if (cfds_.empty()) { is_empty_.store(true, std::memory_order_relaxed); } if (!cfd->IsDropped()) { // success return cfd; } cfd->UnrefAndTryDelete(); } } bool TrimHistoryScheduler::Empty() { bool is_empty = is_empty_.load(std::memory_order_relaxed); return is_empty; } void TrimHistoryScheduler::Clear() { ColumnFamilyData* cfd; while ((cfd = TakeNextColumnFamily()) != nullptr) { cfd->UnrefAndTryDelete(); } assert(Empty()); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/trim_history_scheduler.h000066400000000000000000000025511370372246700206000ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class ColumnFamilyData; // Similar to FlushScheduler, TrimHistoryScheduler is a FIFO queue that keeps // track of column families whose flushed immutable memtables may need to be // removed (aka trimmed). The actual trimming may be slightly delayed. Due to // the use of the mutex and atomic variable, ScheduleWork, // TakeNextColumnFamily, and, Empty can be called concurrently. class TrimHistoryScheduler { public: TrimHistoryScheduler() : is_empty_(true) {} // When a column family needs history trimming, add cfd to the FIFO queue void ScheduleWork(ColumnFamilyData* cfd); // Remove the column family from the queue, the caller is responsible for // calling `MemtableList::TrimHistory` ColumnFamilyData* TakeNextColumnFamily(); bool Empty(); void Clear(); // Not on critical path, use mutex to ensure thread safety private: std::atomic is_empty_; autovector cfds_; std::mutex checking_mutex_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_builder.cc000066400000000000000000001027011370372246700173350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/version_builder.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "db/blob/blob_file_meta.h" #include "db/dbformat.h" #include "db/internal_stats.h" #include "db/table_cache.h" #include "db/version_set.h" #include "port/port.h" #include "table/table_reader.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) { if (a->fd.largest_seqno != b->fd.largest_seqno) { return a->fd.largest_seqno > b->fd.largest_seqno; } if (a->fd.smallest_seqno != b->fd.smallest_seqno) { return a->fd.smallest_seqno > b->fd.smallest_seqno; } // Break ties by file number return a->fd.GetNumber() > b->fd.GetNumber(); } namespace { bool BySmallestKey(FileMetaData* a, FileMetaData* b, const InternalKeyComparator* cmp) { int r = cmp->Compare(a->smallest, b->smallest); if (r != 0) { return (r < 0); } // Break ties by file number return (a->fd.GetNumber() < b->fd.GetNumber()); } } // namespace class VersionBuilder::Rep { private: // Helper to sort files_ in v // kLevel0 -- NewestFirstBySeqNo // kLevelNon0 -- BySmallestKey struct FileComparator { enum SortMethod { kLevel0 = 0, kLevelNon0 = 1, } sort_method; const InternalKeyComparator* internal_comparator; FileComparator() : internal_comparator(nullptr) {} bool operator()(FileMetaData* f1, FileMetaData* f2) const { switch (sort_method) { case kLevel0: return NewestFirstBySeqNo(f1, f2); case kLevelNon0: return BySmallestKey(f1, f2, internal_comparator); } assert(false); return false; } }; struct LevelState { std::unordered_set deleted_files; // Map from file number to file meta data. std::unordered_map added_files; }; class BlobFileMetaDataDelta { public: bool IsEmpty() const { return !shared_meta_ && !additional_garbage_count_ && !additional_garbage_bytes_ && newly_linked_ssts_.empty() && newly_unlinked_ssts_.empty(); } std::shared_ptr GetSharedMeta() const { return shared_meta_; } uint64_t GetAdditionalGarbageCount() const { return additional_garbage_count_; } uint64_t GetAdditionalGarbageBytes() const { return additional_garbage_bytes_; } const std::unordered_set& GetNewlyLinkedSsts() const { return newly_linked_ssts_; } const std::unordered_set& GetNewlyUnlinkedSsts() const { return newly_unlinked_ssts_; } void SetSharedMeta(std::shared_ptr shared_meta) { assert(!shared_meta_); assert(shared_meta); shared_meta_ = std::move(shared_meta); } void AddGarbage(uint64_t count, uint64_t bytes) { additional_garbage_count_ += count; additional_garbage_bytes_ += bytes; } void LinkSst(uint64_t sst_file_number) { assert(newly_linked_ssts_.find(sst_file_number) == newly_linked_ssts_.end()); // Reconcile with newly unlinked SSTs on the fly. (Note: an SST can be // linked to and unlinked from the same blob file in the case of a trivial // move.) auto it = newly_unlinked_ssts_.find(sst_file_number); if (it != newly_unlinked_ssts_.end()) { newly_unlinked_ssts_.erase(it); } else { newly_linked_ssts_.emplace(sst_file_number); } } void UnlinkSst(uint64_t sst_file_number) { assert(newly_unlinked_ssts_.find(sst_file_number) == newly_unlinked_ssts_.end()); // Reconcile with newly linked SSTs on the fly. (Note: an SST can be // linked to and unlinked from the same blob file in the case of a trivial // move.) auto it = newly_linked_ssts_.find(sst_file_number); if (it != newly_linked_ssts_.end()) { newly_linked_ssts_.erase(it); } else { newly_unlinked_ssts_.emplace(sst_file_number); } } private: std::shared_ptr shared_meta_; uint64_t additional_garbage_count_ = 0; uint64_t additional_garbage_bytes_ = 0; std::unordered_set newly_linked_ssts_; std::unordered_set newly_unlinked_ssts_; }; const FileOptions& file_options_; const ImmutableCFOptions* const ioptions_; TableCache* table_cache_; VersionStorageInfo* base_vstorage_; VersionSet* version_set_; int num_levels_; LevelState* levels_; // Store sizes of levels larger than num_levels_. We do this instead of // storing them in levels_ to avoid regression in case there are no files // on invalid levels. The version is not consistent if in the end the files // on invalid levels don't cancel out. std::unordered_map invalid_level_sizes_; // Whether there are invalid new files or invalid deletion on levels larger // than num_levels_. bool has_invalid_levels_; // Current levels of table files affected by additions/deletions. std::unordered_map table_file_levels_; FileComparator level_zero_cmp_; FileComparator level_nonzero_cmp_; // Metadata delta for all blob files affected by the series of version edits. std::map blob_file_meta_deltas_; public: Rep(const FileOptions& file_options, const ImmutableCFOptions* ioptions, TableCache* table_cache, VersionStorageInfo* base_vstorage, VersionSet* version_set) : file_options_(file_options), ioptions_(ioptions), table_cache_(table_cache), base_vstorage_(base_vstorage), version_set_(version_set), num_levels_(base_vstorage->num_levels()), has_invalid_levels_(false) { assert(ioptions_); levels_ = new LevelState[num_levels_]; level_zero_cmp_.sort_method = FileComparator::kLevel0; level_nonzero_cmp_.sort_method = FileComparator::kLevelNon0; level_nonzero_cmp_.internal_comparator = base_vstorage_->InternalComparator(); } ~Rep() { for (int level = 0; level < num_levels_; level++) { const auto& added = levels_[level].added_files; for (auto& pair : added) { UnrefFile(pair.second); } } delete[] levels_; } void UnrefFile(FileMetaData* f) { f->refs--; if (f->refs <= 0) { if (f->table_reader_handle) { assert(table_cache_ != nullptr); table_cache_->ReleaseHandle(f->table_reader_handle); f->table_reader_handle = nullptr; } delete f; } } bool IsBlobFileInVersion(uint64_t blob_file_number) const { auto delta_it = blob_file_meta_deltas_.find(blob_file_number); if (delta_it != blob_file_meta_deltas_.end()) { if (delta_it->second.GetSharedMeta()) { return true; } } assert(base_vstorage_); const auto& base_blob_files = base_vstorage_->GetBlobFiles(); auto base_it = base_blob_files.find(blob_file_number); if (base_it != base_blob_files.end()) { assert(base_it->second); assert(base_it->second->GetSharedMeta()); return true; } return false; } using ExpectedLinkedSsts = std::unordered_map; static void UpdateExpectedLinkedSsts( uint64_t table_file_number, uint64_t blob_file_number, ExpectedLinkedSsts* expected_linked_ssts) { assert(expected_linked_ssts); if (blob_file_number == kInvalidBlobFileNumber) { return; } (*expected_linked_ssts)[blob_file_number].emplace(table_file_number); } Status CheckConsistency(VersionStorageInfo* vstorage) { #ifdef NDEBUG if (!vstorage->force_consistency_checks()) { // Dont run consistency checks in release mode except if // explicitly asked to return Status::OK(); } #endif // Make sure the files are sorted correctly and that the links between // table files and blob files are consistent. The latter is checked using // the following mapping, which is built using the forward links // (table file -> blob file), and is subsequently compared with the inverse // mapping stored in the BlobFileMetaData objects. ExpectedLinkedSsts expected_linked_ssts; for (int level = 0; level < num_levels_; level++) { auto& level_files = vstorage->LevelFiles(level); if (level_files.empty()) { continue; } assert(level_files[0]); UpdateExpectedLinkedSsts(level_files[0]->fd.GetNumber(), level_files[0]->oldest_blob_file_number, &expected_linked_ssts); for (size_t i = 1; i < level_files.size(); i++) { assert(level_files[i]); UpdateExpectedLinkedSsts(level_files[i]->fd.GetNumber(), level_files[i]->oldest_blob_file_number, &expected_linked_ssts); auto f1 = level_files[i - 1]; auto f2 = level_files[i]; if (level == 0) { #ifndef NDEBUG auto pair = std::make_pair(&f1, &f2); TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistency0", &pair); #endif if (!level_zero_cmp_(f1, f2)) { return Status::Corruption("L0 files are not sorted properly"); } if (f2->fd.smallest_seqno == f2->fd.largest_seqno) { // This is an external file that we ingested SequenceNumber external_file_seqno = f2->fd.smallest_seqno; if (!(external_file_seqno < f1->fd.largest_seqno || external_file_seqno == 0)) { return Status::Corruption( "L0 file with seqno " + NumberToString(f1->fd.smallest_seqno) + " " + NumberToString(f1->fd.largest_seqno) + " vs. file with global_seqno" + NumberToString(external_file_seqno) + " with fileNumber " + NumberToString(f1->fd.GetNumber())); } } else if (f1->fd.smallest_seqno <= f2->fd.smallest_seqno) { return Status::Corruption( "L0 files seqno " + NumberToString(f1->fd.smallest_seqno) + " " + NumberToString(f1->fd.largest_seqno) + " " + NumberToString(f1->fd.GetNumber()) + " vs. " + NumberToString(f2->fd.smallest_seqno) + " " + NumberToString(f2->fd.largest_seqno) + " " + NumberToString(f2->fd.GetNumber())); } } else { #ifndef NDEBUG auto pair = std::make_pair(&f1, &f2); TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistency1", &pair); #endif if (!level_nonzero_cmp_(f1, f2)) { return Status::Corruption("L" + NumberToString(level) + " files are not sorted properly"); } // Make sure there is no overlap in levels > 0 if (vstorage->InternalComparator()->Compare(f1->largest, f2->smallest) >= 0) { return Status::Corruption( "L" + NumberToString(level) + " have overlapping ranges " + (f1->largest).DebugString(true) + " vs. " + (f2->smallest).DebugString(true)); } } } } // Make sure that all blob files in the version have non-garbage data. const auto& blob_files = vstorage->GetBlobFiles(); for (const auto& pair : blob_files) { const uint64_t blob_file_number = pair.first; const auto& blob_file_meta = pair.second; assert(blob_file_meta); if (blob_file_meta->GetGarbageBlobCount() >= blob_file_meta->GetTotalBlobCount()) { std::ostringstream oss; oss << "Blob file #" << blob_file_number << " consists entirely of garbage"; return Status::Corruption("VersionBuilder", oss.str()); } if (blob_file_meta->GetLinkedSsts() != expected_linked_ssts[blob_file_number]) { std::ostringstream oss; oss << "Links are inconsistent between table files and blob file #" << blob_file_number; return Status::Corruption("VersionBuilder", oss.str()); } } Status ret_s; TEST_SYNC_POINT_CALLBACK("VersionBuilder::CheckConsistencyBeforeReturn", &ret_s); return ret_s; } bool CheckConsistencyForNumLevels() const { // Make sure there are no files on or beyond num_levels(). if (has_invalid_levels_) { return false; } for (const auto& pair : invalid_level_sizes_) { const size_t level_size = pair.second; if (level_size != 0) { return false; } } return true; } Status ApplyBlobFileAddition(const BlobFileAddition& blob_file_addition) { const uint64_t blob_file_number = blob_file_addition.GetBlobFileNumber(); if (IsBlobFileInVersion(blob_file_number)) { std::ostringstream oss; oss << "Blob file #" << blob_file_number << " already added"; return Status::Corruption("VersionBuilder", oss.str()); } // Note: we use C++11 for now but in C++14, this could be done in a more // elegant way using generalized lambda capture. VersionSet* const vs = version_set_; const ImmutableCFOptions* const ioptions = ioptions_; auto deleter = [vs, ioptions](SharedBlobFileMetaData* shared_meta) { if (vs) { assert(ioptions); assert(!ioptions->cf_paths.empty()); assert(shared_meta); vs->AddObsoleteBlobFile(shared_meta->GetBlobFileNumber(), ioptions->cf_paths.front().path); } delete shared_meta; }; auto shared_meta = SharedBlobFileMetaData::Create( blob_file_number, blob_file_addition.GetTotalBlobCount(), blob_file_addition.GetTotalBlobBytes(), blob_file_addition.GetChecksumMethod(), blob_file_addition.GetChecksumValue(), deleter); blob_file_meta_deltas_[blob_file_number].SetSharedMeta( std::move(shared_meta)); return Status::OK(); } Status ApplyBlobFileGarbage(const BlobFileGarbage& blob_file_garbage) { const uint64_t blob_file_number = blob_file_garbage.GetBlobFileNumber(); if (!IsBlobFileInVersion(blob_file_number)) { std::ostringstream oss; oss << "Blob file #" << blob_file_number << " not found"; return Status::Corruption("VersionBuilder", oss.str()); } blob_file_meta_deltas_[blob_file_number].AddGarbage( blob_file_garbage.GetGarbageBlobCount(), blob_file_garbage.GetGarbageBlobBytes()); return Status::OK(); } int GetCurrentLevelForTableFile(uint64_t file_number) const { auto it = table_file_levels_.find(file_number); if (it != table_file_levels_.end()) { return it->second; } assert(base_vstorage_); return base_vstorage_->GetFileLocation(file_number).GetLevel(); } uint64_t GetOldestBlobFileNumberForTableFile(int level, uint64_t file_number) const { assert(level < num_levels_); const auto& added_files = levels_[level].added_files; auto it = added_files.find(file_number); if (it != added_files.end()) { const FileMetaData* const meta = it->second; assert(meta); return meta->oldest_blob_file_number; } assert(base_vstorage_); const FileMetaData* const meta = base_vstorage_->GetFileMetaDataByNumber(file_number); assert(meta); return meta->oldest_blob_file_number; } Status ApplyFileDeletion(int level, uint64_t file_number) { assert(level != VersionStorageInfo::FileLocation::Invalid().GetLevel()); const int current_level = GetCurrentLevelForTableFile(file_number); if (level != current_level) { if (level >= num_levels_) { has_invalid_levels_ = true; } std::ostringstream oss; oss << "Cannot delete table file #" << file_number << " from level " << level << " since it is "; if (current_level == VersionStorageInfo::FileLocation::Invalid().GetLevel()) { oss << "not in the LSM tree"; } else { oss << "on level " << current_level; } return Status::Corruption("VersionBuilder", oss.str()); } if (level >= num_levels_) { assert(invalid_level_sizes_[level] > 0); --invalid_level_sizes_[level]; table_file_levels_[file_number] = VersionStorageInfo::FileLocation::Invalid().GetLevel(); return Status::OK(); } const uint64_t blob_file_number = GetOldestBlobFileNumberForTableFile(level, file_number); if (blob_file_number != kInvalidBlobFileNumber && IsBlobFileInVersion(blob_file_number)) { blob_file_meta_deltas_[blob_file_number].UnlinkSst(file_number); } auto& level_state = levels_[level]; auto& add_files = level_state.added_files; auto add_it = add_files.find(file_number); if (add_it != add_files.end()) { UnrefFile(add_it->second); add_files.erase(add_it); } auto& del_files = level_state.deleted_files; assert(del_files.find(file_number) == del_files.end()); del_files.emplace(file_number); table_file_levels_[file_number] = VersionStorageInfo::FileLocation::Invalid().GetLevel(); return Status::OK(); } Status ApplyFileAddition(int level, const FileMetaData& meta) { assert(level != VersionStorageInfo::FileLocation::Invalid().GetLevel()); const uint64_t file_number = meta.fd.GetNumber(); const int current_level = GetCurrentLevelForTableFile(file_number); if (current_level != VersionStorageInfo::FileLocation::Invalid().GetLevel()) { if (level >= num_levels_) { has_invalid_levels_ = true; } std::ostringstream oss; oss << "Cannot add table file #" << file_number << " to level " << level << " since it is already in the LSM tree on level " << current_level; return Status::Corruption("VersionBuilder", oss.str()); } if (level >= num_levels_) { ++invalid_level_sizes_[level]; table_file_levels_[file_number] = level; return Status::OK(); } auto& level_state = levels_[level]; auto& del_files = level_state.deleted_files; auto del_it = del_files.find(file_number); if (del_it != del_files.end()) { del_files.erase(del_it); } FileMetaData* const f = new FileMetaData(meta); f->refs = 1; auto& add_files = level_state.added_files; assert(add_files.find(file_number) == add_files.end()); add_files.emplace(file_number, f); const uint64_t blob_file_number = f->oldest_blob_file_number; if (blob_file_number != kInvalidBlobFileNumber && IsBlobFileInVersion(blob_file_number)) { blob_file_meta_deltas_[blob_file_number].LinkSst(file_number); } table_file_levels_[file_number] = level; return Status::OK(); } // Apply all of the edits in *edit to the current state. Status Apply(VersionEdit* edit) { { const Status s = CheckConsistency(base_vstorage_); if (!s.ok()) { return s; } } // Note: we process the blob file related changes first because the // table file addition/deletion logic depends on the blob files // already being there. // Add new blob files for (const auto& blob_file_addition : edit->GetBlobFileAdditions()) { const Status s = ApplyBlobFileAddition(blob_file_addition); if (!s.ok()) { return s; } } // Increase the amount of garbage for blob files affected by GC for (const auto& blob_file_garbage : edit->GetBlobFileGarbages()) { const Status s = ApplyBlobFileGarbage(blob_file_garbage); if (!s.ok()) { return s; } } // Delete table files for (const auto& deleted_file : edit->GetDeletedFiles()) { const int level = deleted_file.first; const uint64_t file_number = deleted_file.second; const Status s = ApplyFileDeletion(level, file_number); if (!s.ok()) { return s; } } // Add new table files for (const auto& new_file : edit->GetNewFiles()) { const int level = new_file.first; const FileMetaData& meta = new_file.second; const Status s = ApplyFileAddition(level, meta); if (!s.ok()) { return s; } } return Status::OK(); } static BlobFileMetaData::LinkedSsts ApplyLinkedSstChanges( const BlobFileMetaData::LinkedSsts& base, const std::unordered_set& newly_linked, const std::unordered_set& newly_unlinked) { BlobFileMetaData::LinkedSsts result(base); for (uint64_t sst_file_number : newly_unlinked) { assert(result.find(sst_file_number) != result.end()); result.erase(sst_file_number); } for (uint64_t sst_file_number : newly_linked) { assert(result.find(sst_file_number) == result.end()); result.emplace(sst_file_number); } return result; } static std::shared_ptr CreateMetaDataForNewBlobFile( const BlobFileMetaDataDelta& delta) { auto shared_meta = delta.GetSharedMeta(); assert(shared_meta); assert(delta.GetNewlyUnlinkedSsts().empty()); auto meta = BlobFileMetaData::Create( std::move(shared_meta), delta.GetNewlyLinkedSsts(), delta.GetAdditionalGarbageCount(), delta.GetAdditionalGarbageBytes()); return meta; } static std::shared_ptr GetOrCreateMetaDataForExistingBlobFile( const std::shared_ptr& base_meta, const BlobFileMetaDataDelta& delta) { assert(base_meta); assert(!delta.GetSharedMeta()); if (delta.IsEmpty()) { return base_meta; } auto shared_meta = base_meta->GetSharedMeta(); assert(shared_meta); auto linked_ssts = ApplyLinkedSstChanges(base_meta->GetLinkedSsts(), delta.GetNewlyLinkedSsts(), delta.GetNewlyUnlinkedSsts()); auto meta = BlobFileMetaData::Create( std::move(shared_meta), std::move(linked_ssts), base_meta->GetGarbageBlobCount() + delta.GetAdditionalGarbageCount(), base_meta->GetGarbageBlobBytes() + delta.GetAdditionalGarbageBytes()); return meta; } void AddBlobFileIfNeeded( VersionStorageInfo* vstorage, const std::shared_ptr& meta) const { assert(vstorage); assert(meta); if (meta->GetGarbageBlobCount() < meta->GetTotalBlobCount() || !meta->GetLinkedSsts().empty()) { vstorage->AddBlobFile(meta); } } // Merge the blob file metadata from the base version with the changes (edits) // applied, and save the result into *vstorage. void SaveBlobFilesTo(VersionStorageInfo* vstorage) const { assert(base_vstorage_); assert(vstorage); const auto& base_blob_files = base_vstorage_->GetBlobFiles(); auto base_it = base_blob_files.begin(); const auto base_it_end = base_blob_files.end(); auto delta_it = blob_file_meta_deltas_.begin(); const auto delta_it_end = blob_file_meta_deltas_.end(); while (base_it != base_it_end && delta_it != delta_it_end) { const uint64_t base_blob_file_number = base_it->first; const uint64_t delta_blob_file_number = delta_it->first; if (base_blob_file_number < delta_blob_file_number) { const auto& base_meta = base_it->second; assert(base_meta); assert(base_meta->GetGarbageBlobCount() < base_meta->GetTotalBlobCount()); vstorage->AddBlobFile(base_meta); ++base_it; } else if (delta_blob_file_number < base_blob_file_number) { // Note: blob file numbers are strictly increasing over time and // once blob files get marked obsolete, they never reappear. Thus, // this case is not possible. assert(false); ++delta_it; } else { assert(base_blob_file_number == delta_blob_file_number); const auto& base_meta = base_it->second; const auto& delta = delta_it->second; auto meta = GetOrCreateMetaDataForExistingBlobFile(base_meta, delta); AddBlobFileIfNeeded(vstorage, meta); ++base_it; ++delta_it; } } while (base_it != base_it_end) { const auto& base_meta = base_it->second; assert(base_meta); assert(base_meta->GetGarbageBlobCount() < base_meta->GetTotalBlobCount()); vstorage->AddBlobFile(base_meta); ++base_it; } while (delta_it != delta_it_end) { const auto& delta = delta_it->second; auto meta = CreateMetaDataForNewBlobFile(delta); AddBlobFileIfNeeded(vstorage, meta); ++delta_it; } } // Save the current state in *v. Status SaveTo(VersionStorageInfo* vstorage) { Status s = CheckConsistency(base_vstorage_); if (!s.ok()) { return s; } s = CheckConsistency(vstorage); if (!s.ok()) { return s; } for (int level = 0; level < num_levels_; level++) { const auto& cmp = (level == 0) ? level_zero_cmp_ : level_nonzero_cmp_; // Merge the set of added files with the set of pre-existing files. // Drop any deleted files. Store the result in *v. const auto& base_files = base_vstorage_->LevelFiles(level); const auto& unordered_added_files = levels_[level].added_files; vstorage->Reserve(level, base_files.size() + unordered_added_files.size()); // Sort added files for the level. std::vector added_files; added_files.reserve(unordered_added_files.size()); for (const auto& pair : unordered_added_files) { added_files.push_back(pair.second); } std::sort(added_files.begin(), added_files.end(), cmp); #ifndef NDEBUG FileMetaData* prev_added_file = nullptr; for (const auto& added : added_files) { if (level > 0 && prev_added_file != nullptr) { assert(base_vstorage_->InternalComparator()->Compare( prev_added_file->smallest, added->smallest) <= 0); } prev_added_file = added; } #endif auto base_iter = base_files.begin(); auto base_end = base_files.end(); auto added_iter = added_files.begin(); auto added_end = added_files.end(); while (added_iter != added_end || base_iter != base_end) { if (base_iter == base_end || (added_iter != added_end && cmp(*added_iter, *base_iter))) { MaybeAddFile(vstorage, level, *added_iter++); } else { MaybeAddFile(vstorage, level, *base_iter++); } } } SaveBlobFilesTo(vstorage); s = CheckConsistency(vstorage); return s; } Status LoadTableHandlers(InternalStats* internal_stats, int max_threads, bool prefetch_index_and_filter_in_cache, bool is_initial_load, const SliceTransform* prefix_extractor, size_t max_file_size_for_l0_meta_pin) { assert(table_cache_ != nullptr); size_t table_cache_capacity = table_cache_->get_cache()->GetCapacity(); bool always_load = (table_cache_capacity == TableCache::kInfiniteCapacity); size_t max_load = port::kMaxSizet; if (!always_load) { // If it is initial loading and not set to always loading all the // files, we only load up to kInitialLoadLimit files, to limit the // time reopening the DB. const size_t kInitialLoadLimit = 16; size_t load_limit; // If the table cache is not 1/4 full, we pin the table handle to // file metadata to avoid the cache read costs when reading the file. // The downside of pinning those files is that LRU won't be followed // for those files. This doesn't matter much because if number of files // of the DB excceeds table cache capacity, eventually no table reader // will be pinned and LRU will be followed. if (is_initial_load) { load_limit = std::min(kInitialLoadLimit, table_cache_capacity / 4); } else { load_limit = table_cache_capacity / 4; } size_t table_cache_usage = table_cache_->get_cache()->GetUsage(); if (table_cache_usage >= load_limit) { // TODO (yanqin) find a suitable status code. return Status::OK(); } else { max_load = load_limit - table_cache_usage; } } // std::vector> files_meta; std::vector statuses; for (int level = 0; level < num_levels_; level++) { for (auto& file_meta_pair : levels_[level].added_files) { auto* file_meta = file_meta_pair.second; // If the file has been opened before, just skip it. if (!file_meta->table_reader_handle) { files_meta.emplace_back(file_meta, level); statuses.emplace_back(Status::OK()); } if (files_meta.size() >= max_load) { break; } } if (files_meta.size() >= max_load) { break; } } std::atomic next_file_meta_idx(0); std::function load_handlers_func([&]() { while (true) { size_t file_idx = next_file_meta_idx.fetch_add(1); if (file_idx >= files_meta.size()) { break; } auto* file_meta = files_meta[file_idx].first; int level = files_meta[file_idx].second; statuses[file_idx] = table_cache_->FindTable( file_options_, *(base_vstorage_->InternalComparator()), file_meta->fd, &file_meta->table_reader_handle, prefix_extractor, false /*no_io */, true /* record_read_stats */, internal_stats->GetFileReadHist(level), false, level, prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin); if (file_meta->table_reader_handle != nullptr) { // Load table_reader file_meta->fd.table_reader = table_cache_->GetTableReaderFromHandle( file_meta->table_reader_handle); } } }); std::vector threads; for (int i = 1; i < max_threads; i++) { threads.emplace_back(load_handlers_func); } load_handlers_func(); for (auto& t : threads) { t.join(); } for (const auto& s : statuses) { if (!s.ok()) { return s; } } return Status::OK(); } void MaybeAddFile(VersionStorageInfo* vstorage, int level, FileMetaData* f) { const uint64_t file_number = f->fd.GetNumber(); const auto& level_state = levels_[level]; const auto& del_files = level_state.deleted_files; const auto del_it = del_files.find(file_number); if (del_it != del_files.end()) { // f is to-be-deleted table file vstorage->RemoveCurrentStats(f); } else { const auto& add_files = level_state.added_files; const auto add_it = add_files.find(file_number); // Note: if the file appears both in the base version and in the added // list, the added FileMetaData supersedes the one in the base version. if (add_it != add_files.end() && add_it->second != f) { vstorage->RemoveCurrentStats(f); } else { assert(ioptions_); vstorage->AddFile(level, f, ioptions_->info_log); } } } }; VersionBuilder::VersionBuilder(const FileOptions& file_options, const ImmutableCFOptions* ioptions, TableCache* table_cache, VersionStorageInfo* base_vstorage, VersionSet* version_set) : rep_(new Rep(file_options, ioptions, table_cache, base_vstorage, version_set)) {} VersionBuilder::~VersionBuilder() = default; bool VersionBuilder::CheckConsistencyForNumLevels() { return rep_->CheckConsistencyForNumLevels(); } Status VersionBuilder::Apply(VersionEdit* edit) { return rep_->Apply(edit); } Status VersionBuilder::SaveTo(VersionStorageInfo* vstorage) { return rep_->SaveTo(vstorage); } Status VersionBuilder::LoadTableHandlers( InternalStats* internal_stats, int max_threads, bool prefetch_index_and_filter_in_cache, bool is_initial_load, const SliceTransform* prefix_extractor, size_t max_file_size_for_l0_meta_pin) { return rep_->LoadTableHandlers( internal_stats, max_threads, prefetch_index_and_filter_in_cache, is_initial_load, prefix_extractor, max_file_size_for_l0_meta_pin); } BaseReferencedVersionBuilder::BaseReferencedVersionBuilder( ColumnFamilyData* cfd) : version_builder_(new VersionBuilder( cfd->current()->version_set()->file_options(), cfd->ioptions(), cfd->table_cache(), cfd->current()->storage_info(), cfd->current()->version_set())), version_(cfd->current()) { version_->Ref(); } BaseReferencedVersionBuilder::BaseReferencedVersionBuilder( ColumnFamilyData* cfd, Version* v) : version_builder_(new VersionBuilder( cfd->current()->version_set()->file_options(), cfd->ioptions(), cfd->table_cache(), v->storage_info(), v->version_set())), version_(v) { assert(version_ != cfd->current()); } BaseReferencedVersionBuilder::~BaseReferencedVersionBuilder() { version_->Unref(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_builder.h000066400000000000000000000046041370372246700172020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // #pragma once #include #include "rocksdb/file_system.h" #include "rocksdb/slice_transform.h" namespace ROCKSDB_NAMESPACE { struct ImmutableCFOptions; class TableCache; class VersionStorageInfo; class VersionEdit; struct FileMetaData; class InternalStats; class Version; class VersionSet; class ColumnFamilyData; // A helper class so we can efficiently apply a whole sequence // of edits to a particular state without creating intermediate // Versions that contain full copies of the intermediate state. class VersionBuilder { public: VersionBuilder(const FileOptions& file_options, const ImmutableCFOptions* ioptions, TableCache* table_cache, VersionStorageInfo* base_vstorage, VersionSet* version_set); ~VersionBuilder(); bool CheckConsistencyForNumLevels(); Status Apply(VersionEdit* edit); Status SaveTo(VersionStorageInfo* vstorage); Status LoadTableHandlers(InternalStats* internal_stats, int max_threads, bool prefetch_index_and_filter_in_cache, bool is_initial_load, const SliceTransform* prefix_extractor, size_t max_file_size_for_l0_meta_pin); private: class Rep; std::unique_ptr rep_; }; // A wrapper of version builder which references the current version in // constructor and unref it in the destructor. // Both of the constructor and destructor need to be called inside DB Mutex. class BaseReferencedVersionBuilder { public: explicit BaseReferencedVersionBuilder(ColumnFamilyData* cfd); BaseReferencedVersionBuilder(ColumnFamilyData* cfd, Version* v); ~BaseReferencedVersionBuilder(); VersionBuilder* version_builder() const { return version_builder_.get(); } private: std::unique_ptr version_builder_; Version* version_; }; extern bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_builder_test.cc000066400000000000000000001502461370372246700204030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include #include "db/version_edit.h" #include "db/version_set.h" #include "logging/logging.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class VersionBuilderTest : public testing::Test { public: const Comparator* ucmp_; InternalKeyComparator icmp_; Options options_; ImmutableCFOptions ioptions_; MutableCFOptions mutable_cf_options_; VersionStorageInfo vstorage_; uint32_t file_num_; CompactionOptionsFIFO fifo_options_; std::vector size_being_compacted_; VersionBuilderTest() : ucmp_(BytewiseComparator()), icmp_(ucmp_), ioptions_(options_), mutable_cf_options_(options_), vstorage_(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false), file_num_(1) { mutable_cf_options_.RefreshDerivedOptions(ioptions_); size_being_compacted_.resize(options_.num_levels); } ~VersionBuilderTest() override { for (int i = 0; i < vstorage_.num_levels(); i++) { for (auto* f : vstorage_.LevelFiles(i)) { if (--f->refs == 0) { delete f; } } } } InternalKey GetInternalKey(const char* ukey, SequenceNumber smallest_seq = 100) { return InternalKey(ukey, smallest_seq, kTypeValue); } void Add(int level, uint64_t file_number, const char* smallest, const char* largest, uint64_t file_size = 0, uint32_t path_id = 0, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100, uint64_t num_entries = 0, uint64_t num_deletions = 0, bool sampled = false, SequenceNumber smallest_seqno = 0, SequenceNumber largest_seqno = 0, uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) { assert(level < vstorage_.num_levels()); FileMetaData* f = new FileMetaData( file_number, path_id, file_size, GetInternalKey(smallest, smallest_seq), GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, /* marked_for_compact */ false, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); f->compensated_file_size = file_size; f->num_entries = num_entries; f->num_deletions = num_deletions; vstorage_.AddFile(level, f); if (sampled) { f->init_stats_from_file = true; vstorage_.UpdateAccumulatedStats(f); } } void AddBlob(uint64_t blob_file_number, uint64_t total_blob_count, uint64_t total_blob_bytes, std::string checksum_method, std::string checksum_value, BlobFileMetaData::LinkedSsts linked_ssts, uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) { auto shared_meta = SharedBlobFileMetaData::Create( blob_file_number, total_blob_count, total_blob_bytes, std::move(checksum_method), std::move(checksum_value)); auto meta = BlobFileMetaData::Create(std::move(shared_meta), std::move(linked_ssts), garbage_blob_count, garbage_blob_bytes); vstorage_.AddBlobFile(std::move(meta)); } static std::shared_ptr GetBlobFileMetaData( const VersionStorageInfo::BlobFiles& blob_files, uint64_t blob_file_number) { const auto it = blob_files.find(blob_file_number); if (it == blob_files.end()) { return std::shared_ptr(); } const auto& meta = it->second; assert(meta); return meta; } void UpdateVersionStorageInfo() { vstorage_.UpdateFilesByCompactionPri(ioptions_.compaction_pri); vstorage_.UpdateNumNonEmptyLevels(); vstorage_.GenerateFileIndexer(); vstorage_.GenerateLevelFilesBrief(); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); vstorage_.GenerateLevel0NonOverlapping(); vstorage_.SetFinalized(); } }; void UnrefFilesInVersion(VersionStorageInfo* new_vstorage) { for (int i = 0; i < new_vstorage->num_levels(); i++) { for (auto* f : new_vstorage->LevelFiles(i)) { if (--f->refs == 0) { delete f; } } } } TEST_F(VersionBuilderTest, ApplyAndSaveTo) { Add(0, 1U, "150", "200", 100U); Add(1, 66U, "150", "200", 100U); Add(1, 88U, "201", "300", 100U); Add(2, 6U, "150", "179", 100U); Add(2, 7U, "180", "220", 100U); Add(2, 8U, "221", "300", 100U); Add(3, 26U, "150", "170", 100U); Add(3, 27U, "171", "179", 100U); Add(3, 28U, "191", "220", 100U); Add(3, 29U, "221", "300", 100U); UpdateVersionStorageInfo(); VersionEdit version_edit; version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.DeleteFile(3, 27U); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false); version_builder.Apply(&version_edit); version_builder.SaveTo(&new_vstorage); ASSERT_EQ(400U, new_vstorage.NumLevelBytes(2)); ASSERT_EQ(300U, new_vstorage.NumLevelBytes(3)); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { ioptions_.level_compaction_dynamic_level_bytes = true; Add(0, 1U, "150", "200", 100U, 0, 200U, 200U, 0, 0, false, 200U, 200U); Add(0, 88U, "201", "300", 100U, 0, 100U, 100U, 0, 0, false, 100U, 100U); Add(4, 6U, "150", "179", 100U); Add(4, 7U, "180", "220", 100U); Add(4, 8U, "221", "300", 100U); Add(5, 26U, "150", "170", 100U); Add(5, 27U, "171", "179", 100U); UpdateVersionStorageInfo(); VersionEdit version_edit; version_edit.AddFile(3, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false); version_builder.Apply(&version_edit); version_builder.SaveTo(&new_vstorage); ASSERT_EQ(0U, new_vstorage.NumLevelBytes(0)); ASSERT_EQ(100U, new_vstorage.NumLevelBytes(3)); ASSERT_EQ(300U, new_vstorage.NumLevelBytes(4)); ASSERT_EQ(200U, new_vstorage.NumLevelBytes(5)); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { ioptions_.level_compaction_dynamic_level_bytes = true; Add(0, 1U, "150", "200", 100U, 0, 200U, 200U, 0, 0, false, 200U, 200U); Add(0, 88U, "201", "300", 100U, 0, 100U, 100U, 0, 0, false, 100U, 100U); Add(4, 6U, "150", "179", 100U); Add(4, 7U, "180", "220", 100U); Add(4, 8U, "221", "300", 100U); Add(5, 26U, "150", "170", 100U); Add(5, 27U, "171", "179", 100U); UpdateVersionStorageInfo(); VersionEdit version_edit; version_edit.AddFile(4, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); version_edit.DeleteFile(4, 6U); version_edit.DeleteFile(4, 7U); version_edit.DeleteFile(4, 8U); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false); version_builder.Apply(&version_edit); version_builder.SaveTo(&new_vstorage); ASSERT_EQ(0U, new_vstorage.NumLevelBytes(0)); ASSERT_EQ(100U, new_vstorage.NumLevelBytes(4)); ASSERT_EQ(200U, new_vstorage.NumLevelBytes(5)); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { UpdateVersionStorageInfo(); VersionEdit version_edit; version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false); version_builder.Apply(&version_edit); version_builder.SaveTo(&new_vstorage); ASSERT_EQ(500U, new_vstorage.NumLevelBytes(2)); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { UpdateVersionStorageInfo(); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false); VersionEdit version_edit; version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_builder.Apply(&version_edit); VersionEdit version_edit2; version_edit.AddFile(2, 808, 0, 100U, GetInternalKey("901"), GetInternalKey("950"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_edit2.DeleteFile(2, 616); version_edit2.DeleteFile(2, 636); version_edit.AddFile(2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); version_builder.Apply(&version_edit2); version_builder.SaveTo(&new_vstorage); ASSERT_EQ(300U, new_vstorage.NumLevelBytes(2)); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, ApplyFileDeletionIncorrectLevel) { constexpr int level = 1; constexpr uint64_t file_number = 2345; constexpr char smallest[] = "bar"; constexpr char largest[] = "foo"; Add(level, file_number, smallest, largest); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr int incorrect_level = 3; edit.DeleteFile(incorrect_level, file_number); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Cannot delete table file #2345 from level 3 since " "it is on level 1")); } TEST_F(VersionBuilderTest, ApplyFileDeletionNotInLSMTree) { EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr int level = 3; constexpr uint64_t file_number = 1234; edit.DeleteFile(level, file_number); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Cannot delete table file #1234 from level 3 since " "it is not in the LSM tree")); } TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { constexpr int level = 1; constexpr uint64_t file_number = 2345; constexpr char smallest[] = "bar"; constexpr char largest[] = "foo"; constexpr uint64_t file_size = 10000; constexpr uint32_t path_id = 0; constexpr SequenceNumber smallest_seq = 100; constexpr SequenceNumber largest_seq = 500; constexpr uint64_t num_entries = 0; constexpr uint64_t num_deletions = 0; constexpr bool sampled = false; constexpr SequenceNumber smallest_seqno = 1; constexpr SequenceNumber largest_seqno = 1000; Add(level, file_number, smallest, largest, file_size, path_id, smallest_seq, largest_seq, num_entries, num_deletions, sampled, smallest_seqno, largest_seqno); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit deletion; deletion.DeleteFile(level, file_number); ASSERT_OK(builder.Apply(&deletion)); VersionEdit addition; constexpr bool marked_for_compaction = false; addition.AddFile(level, file_number, path_id, file_size, GetInternalKey(smallest, smallest_seq), GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); ASSERT_OK(builder.Apply(&addition)); constexpr bool force_consistency_checks = false; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); ASSERT_EQ(new_vstorage.GetFileLocation(file_number).GetLevel(), level); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) { constexpr int level = 1; constexpr uint64_t file_number = 2345; constexpr char smallest[] = "bar"; constexpr char largest[] = "foo"; Add(level, file_number, smallest, largest); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr int new_level = 2; constexpr uint32_t path_id = 0; constexpr uint64_t file_size = 10000; constexpr SequenceNumber smallest_seqno = 100; constexpr SequenceNumber largest_seqno = 1000; constexpr bool marked_for_compaction = false; edit.AddFile(new_level, file_number, path_id, file_size, GetInternalKey(smallest), GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Cannot add table file #2345 to level 2 since it is " "already in the LSM tree on level 1")); } TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr int level = 3; constexpr uint64_t file_number = 2345; constexpr uint32_t path_id = 0; constexpr uint64_t file_size = 10000; constexpr char smallest[] = "bar"; constexpr char largest[] = "foo"; constexpr SequenceNumber smallest_seqno = 100; constexpr SequenceNumber largest_seqno = 1000; constexpr bool marked_for_compaction = false; edit.AddFile(level, file_number, path_id, file_size, GetInternalKey(smallest), GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); ASSERT_OK(builder.Apply(&edit)); VersionEdit other_edit; constexpr int new_level = 2; other_edit.AddFile(new_level, file_number, path_id, file_size, GetInternalKey(smallest), GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); const Status s = builder.Apply(&other_edit); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Cannot add table file #2345 to level 2 since it is " "already in the LSM tree on level 3")); } TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { constexpr int level = 1; constexpr uint64_t file_number = 2345; constexpr uint32_t path_id = 0; constexpr uint64_t file_size = 10000; constexpr char smallest[] = "bar"; constexpr char largest[] = "foo"; constexpr SequenceNumber smallest_seqno = 100; constexpr SequenceNumber largest_seqno = 1000; constexpr bool marked_for_compaction = false; EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit addition; addition.AddFile(level, file_number, path_id, file_size, GetInternalKey(smallest), GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); ASSERT_OK(builder.Apply(&addition)); VersionEdit deletion; deletion.DeleteFile(level, file_number); ASSERT_OK(builder.Apply(&deletion)); constexpr bool force_consistency_checks = false; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); ASSERT_FALSE(new_vstorage.GetFileLocation(file_number).IsValid()); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, ApplyBlobFileAddition) { EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr uint64_t blob_file_number = 1234; constexpr uint64_t total_blob_count = 5678; constexpr uint64_t total_blob_bytes = 999999; constexpr char checksum_method[] = "SHA1"; constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"; edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); ASSERT_OK(builder.Apply(&edit)); constexpr bool force_consistency_checks = false; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 1); const auto new_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); ASSERT_NE(new_meta, nullptr); ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count); ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes); ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method); ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value); ASSERT_TRUE(new_meta->GetLinkedSsts().empty()); ASSERT_EQ(new_meta->GetGarbageBlobCount(), 0); ASSERT_EQ(new_meta->GetGarbageBlobBytes(), 0); } TEST_F(VersionBuilderTest, ApplyBlobFileAdditionAlreadyInBase) { // Attempt to add a blob file that is already present in the base version. constexpr uint64_t blob_file_number = 1234; constexpr uint64_t total_blob_count = 5678; constexpr uint64_t total_blob_bytes = 999999; constexpr char checksum_method[] = "SHA1"; constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"; constexpr uint64_t garbage_blob_count = 123; constexpr uint64_t garbage_blob_bytes = 456789; AddBlob(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value, BlobFileMetaData::LinkedSsts(), garbage_blob_count, garbage_blob_bytes); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 already added")); } TEST_F(VersionBuilderTest, ApplyBlobFileAdditionAlreadyApplied) { // Attempt to add the same blob file twice using version edits. EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr uint64_t blob_file_number = 1234; constexpr uint64_t total_blob_count = 5678; constexpr uint64_t total_blob_bytes = 999999; constexpr char checksum_method[] = "SHA1"; constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"; edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); ASSERT_OK(builder.Apply(&edit)); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 already added")); } TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) { // Increase the amount of garbage for a blob file present in the base version. constexpr uint64_t blob_file_number = 1234; constexpr uint64_t total_blob_count = 5678; constexpr uint64_t total_blob_bytes = 999999; constexpr char checksum_method[] = "SHA1"; constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"; constexpr uint64_t garbage_blob_count = 123; constexpr uint64_t garbage_blob_bytes = 456789; AddBlob(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value, BlobFileMetaData::LinkedSsts(), garbage_blob_count, garbage_blob_bytes); const auto meta = GetBlobFileMetaData(vstorage_.GetBlobFiles(), blob_file_number); ASSERT_NE(meta, nullptr); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr uint64_t new_garbage_blob_count = 456; constexpr uint64_t new_garbage_blob_bytes = 111111; edit.AddBlobFileGarbage(blob_file_number, new_garbage_blob_count, new_garbage_blob_bytes); ASSERT_OK(builder.Apply(&edit)); constexpr bool force_consistency_checks = false; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 1); const auto new_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); ASSERT_NE(new_meta, nullptr); ASSERT_EQ(new_meta->GetSharedMeta(), meta->GetSharedMeta()); ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count); ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes); ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method); ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value); ASSERT_TRUE(new_meta->GetLinkedSsts().empty()); ASSERT_EQ(new_meta->GetGarbageBlobCount(), garbage_blob_count + new_garbage_blob_count); ASSERT_EQ(new_meta->GetGarbageBlobBytes(), garbage_blob_bytes + new_garbage_blob_bytes); } TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileAdditionApplied) { // Increase the amount of garbage for a blob file added using a version edit. EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit addition; constexpr uint64_t blob_file_number = 1234; constexpr uint64_t total_blob_count = 5678; constexpr uint64_t total_blob_bytes = 999999; constexpr char checksum_method[] = "SHA1"; constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"; addition.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); ASSERT_OK(builder.Apply(&addition)); constexpr uint64_t garbage_blob_count = 123; constexpr uint64_t garbage_blob_bytes = 456789; VersionEdit garbage; garbage.AddBlobFileGarbage(blob_file_number, garbage_blob_count, garbage_blob_bytes); ASSERT_OK(builder.Apply(&garbage)); constexpr bool force_consistency_checks = false; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 1); const auto new_meta = GetBlobFileMetaData(new_blob_files, blob_file_number); ASSERT_NE(new_meta, nullptr); ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number); ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count); ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes); ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method); ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value); ASSERT_TRUE(new_meta->GetLinkedSsts().empty()); ASSERT_EQ(new_meta->GetGarbageBlobCount(), garbage_blob_count); ASSERT_EQ(new_meta->GetGarbageBlobBytes(), garbage_blob_bytes); } TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileNotFound) { // Attempt to increase the amount of garbage for a blob file that is // neither in the base version, nor was it added using a version edit. EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; constexpr uint64_t blob_file_number = 1234; constexpr uint64_t garbage_blob_count = 5678; constexpr uint64_t garbage_blob_bytes = 999999; edit.AddBlobFileGarbage(blob_file_number, garbage_blob_count, garbage_blob_bytes); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 not found")); } TEST_F(VersionBuilderTest, SaveBlobFilesTo) { // Add three blob files to base version. for (uint64_t i = 1; i <= 3; ++i) { const uint64_t blob_file_number = i; const uint64_t total_blob_count = i * 1000; const uint64_t total_blob_bytes = i * 1000000; const uint64_t garbage_blob_count = i * 100; const uint64_t garbage_blob_bytes = i * 20000; AddBlob(blob_file_number, total_blob_count, total_blob_bytes, /* checksum_method */ std::string(), /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts(), garbage_blob_count, garbage_blob_bytes); } EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; // Add some garbage to the second and third blob files. The second blob file // remains valid since it does not consist entirely of garbage yet. The third // blob file is all garbage after the edit and will not be part of the new // version. edit.AddBlobFileGarbage(/* blob_file_number */ 2, /* garbage_blob_count */ 200, /* garbage_blob_bytes */ 100000); edit.AddBlobFileGarbage(/* blob_file_number */ 3, /* garbage_blob_count */ 2700, /* garbage_blob_bytes */ 2940000); // Add a fourth blob file. edit.AddBlobFile(/* blob_file_number */ 4, /* total_blob_count */ 4000, /* total_blob_bytes */ 4000000, /* checksum_method */ std::string(), /* checksum_value */ std::string()); ASSERT_OK(builder.Apply(&edit)); constexpr bool force_consistency_checks = false; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); const auto& new_blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(new_blob_files.size(), 3); const auto meta1 = GetBlobFileMetaData(new_blob_files, 1); ASSERT_NE(meta1, nullptr); ASSERT_EQ(meta1->GetBlobFileNumber(), 1); ASSERT_EQ(meta1->GetTotalBlobCount(), 1000); ASSERT_EQ(meta1->GetTotalBlobBytes(), 1000000); ASSERT_EQ(meta1->GetGarbageBlobCount(), 100); ASSERT_EQ(meta1->GetGarbageBlobBytes(), 20000); const auto meta2 = GetBlobFileMetaData(new_blob_files, 2); ASSERT_NE(meta2, nullptr); ASSERT_EQ(meta2->GetBlobFileNumber(), 2); ASSERT_EQ(meta2->GetTotalBlobCount(), 2000); ASSERT_EQ(meta2->GetTotalBlobBytes(), 2000000); ASSERT_EQ(meta2->GetGarbageBlobCount(), 400); ASSERT_EQ(meta2->GetGarbageBlobBytes(), 140000); const auto meta4 = GetBlobFileMetaData(new_blob_files, 4); ASSERT_NE(meta4, nullptr); ASSERT_EQ(meta4->GetBlobFileNumber(), 4); ASSERT_EQ(meta4->GetTotalBlobCount(), 4000); ASSERT_EQ(meta4->GetTotalBlobBytes(), 4000000); ASSERT_EQ(meta4->GetGarbageBlobCount(), 0); ASSERT_EQ(meta4->GetGarbageBlobBytes(), 0); } TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { // Initialize base version. The first table file points to a valid blob file // in this version; the second one does not refer to any blob files. Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", /* largest */ "200", /* file_size */ 100, /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, /* num_entries */ 0, /* num_deletions */ 0, /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, /* oldest_blob_file_number */ 16); Add(/* level */ 1, /* file_number */ 23, /* smallest */ "201", /* largest */ "300", /* file_size */ 100, /* path_id */ 0, /* smallest_seq */ 200, /* largest_seq */ 200, /* num_entries */ 0, /* num_deletions */ 0, /* sampled */ false, /* smallest_seqno */ 200, /* largest_seqno */ 200, kInvalidBlobFileNumber); AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, /* total_blob_bytes */ 1000000, /* checksum_method */ std::string(), /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, /* garbage_blob_count */ 500, /* garbage_blob_bytes */ 300000); UpdateVersionStorageInfo(); // Add a new table file that points to the existing blob file, and add a // new table file--blob file pair. EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; edit.AddFile(/* level */ 1, /* file_number */ 606, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("701"), /* largest */ GetInternalKey("750"), /* smallest_seqno */ 200, /* largest_seqno */ 200, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("801"), /* largest */ GetInternalKey("850"), /* smallest_seqno */ 200, /* largest_seqno */ 200, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000, /* total_blob_bytes */ 200000, /* checksum_method */ std::string(), /* checksum_value */ std::string()); ASSERT_OK(builder.Apply(&edit)); // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesInconsistentLinks) { // Initialize base version. Links between the table file and the blob file // are inconsistent. Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", /* largest */ "200", /* file_size */ 100, /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, /* num_entries */ 0, /* num_deletions */ 0, /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, /* oldest_blob_file_number */ 256); AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, /* total_blob_bytes */ 1000000, /* checksum_method */ std::string(), /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, /* garbage_blob_count */ 500, /* garbage_blob_bytes */ 300000); UpdateVersionStorageInfo(); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE(std::strstr( s.getState(), "Links are inconsistent between table files and blob file #16")); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbage) { // Initialize base version. The table file points to a blob file that is // all garbage. Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", /* largest */ "200", /* file_size */ 100, /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, /* num_entries */ 0, /* num_deletions */ 0, /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, /* oldest_blob_file_number */ 16); AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, /* total_blob_bytes */ 1000000, /* checksum_method */ std::string(), /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, /* garbage_blob_count */ 1000, /* garbage_blob_bytes */ 1000000); UpdateVersionStorageInfo(); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE( std::strstr(s.getState(), "Blob file #16 consists entirely of garbage")); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbageLinkedSsts) { // Initialize base version, with a table file pointing to a blob file // that has no garbage at this point. Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150", /* largest */ "200", /* file_size */ 100, /* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100, /* num_entries */ 0, /* num_deletions */ 0, /* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100, /* oldest_blob_file_number */ 16); AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000, /* total_blob_bytes */ 1000000, /* checksum_method */ std::string(), /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1}, /* garbage_blob_count */ 0, /* garbage_blob_bytes */ 0); UpdateVersionStorageInfo(); // Mark the entire blob file garbage but do not remove the linked SST. EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionEdit edit; edit.AddBlobFileGarbage(/* blob_file_number */ 16, /* garbage_blob_count */ 1000, /* garbage_blob_bytes */ 1000000); ASSERT_OK(builder.Apply(&edit)); // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); ASSERT_TRUE( std::strstr(s.getState(), "Blob file #16 consists entirely of garbage")); UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { // Initialize base version. Table files 1..10 are linked to blob files 1..5, // while table files 11..20 are not linked to any blob files. for (uint64_t i = 1; i <= 10; ++i) { std::ostringstream oss; oss << std::setw(2) << std::setfill('0') << i; const std::string key = oss.str(); Add(/* level */ 1, /* file_number */ i, /* smallest */ key.c_str(), /* largest */ key.c_str(), /* file_size */ 100, /* path_id */ 0, /* smallest_seq */ i * 100, /* largest_seq */ i * 100, /* num_entries */ 0, /* num_deletions */ 0, /* sampled */ false, /* smallest_seqno */ i * 100, /* largest_seqno */ i * 100, /* oldest_blob_file_number */ ((i - 1) % 5) + 1); } for (uint64_t i = 1; i <= 5; ++i) { AddBlob(/* blob_file_number */ i, /* total_blob_count */ 2000, /* total_blob_bytes */ 2000000, /* checksum_method */ std::string(), /* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{i, i + 5}, /* garbage_blob_count */ 1000, /* garbage_blob_bytes */ 1000000); } for (uint64_t i = 11; i <= 20; ++i) { std::ostringstream oss; oss << std::setw(2) << std::setfill('0') << i; const std::string key = oss.str(); Add(/* level */ 1, /* file_number */ i, /* smallest */ key.c_str(), /* largest */ key.c_str(), /* file_size */ 100, /* path_id */ 0, /* smallest_seq */ i * 100, /* largest_seq */ i * 100, /* num_entries */ 0, /* num_deletions */ 0, /* sampled */ false, /* smallest_seqno */ i * 100, /* largest_seqno */ i * 100, kInvalidBlobFileNumber); } UpdateVersionStorageInfo(); { const auto& blob_files = vstorage_.GetBlobFiles(); ASSERT_EQ(blob_files.size(), 5); const std::vector expected_linked_ssts{ {1, 6}, {2, 7}, {3, 8}, {4, 9}, {5, 10}}; for (size_t i = 0; i < 5; ++i) { const auto meta = GetBlobFileMetaData(blob_files, /* blob_file_number */ i + 1); ASSERT_NE(meta, nullptr); ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]); } } VersionEdit edit; // Add an SST that references a blob file. edit.AddFile( /* level */ 1, /* file_number */ 21, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("21", 2100), /* largest */ GetInternalKey("21", 2100), /* smallest_seqno */ 2100, /* largest_seqno */ 2100, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); // Add an SST that does not reference any blob files. edit.AddFile( /* level */ 1, /* file_number */ 22, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("22", 2200), /* largest */ GetInternalKey("22", 2200), /* smallest_seqno */ 2200, /* largest_seqno */ 2200, /* marked_for_compaction */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); // Delete a file that references a blob file. edit.DeleteFile(/* level */ 1, /* file_number */ 6); // Delete a file that does not reference any blob files. edit.DeleteFile(/* level */ 1, /* file_number */ 16); // Trivially move a file that references a blob file. Note that we save // the original BlobFileMetaData object so we can check that no new object // gets created. auto meta3 = GetBlobFileMetaData(vstorage_.GetBlobFiles(), /* blob_file_number */ 3); edit.DeleteFile(/* level */ 1, /* file_number */ 3); edit.AddFile(/* level */ 2, /* file_number */ 3, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("03", 300), /* largest */ GetInternalKey("03", 300), /* smallest_seqno */ 300, /* largest_seqno */ 300, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); // Trivially move a file that does not reference any blob files. edit.DeleteFile(/* level */ 1, /* file_number */ 13); edit.AddFile(/* level */ 2, /* file_number */ 13, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("13", 1300), /* largest */ GetInternalKey("13", 1300), /* smallest_seqno */ 1300, /* largest_seqno */ 1300, /* marked_for_compaction */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); // Add one more SST file that references a blob file, then promptly // delete it in a second version edit before the new version gets saved. // This file should not show up as linked to the blob file in the new version. edit.AddFile(/* level */ 1, /* file_number */ 23, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("23", 2300), /* largest */ GetInternalKey("23", 2300), /* smallest_seqno */ 2300, /* largest_seqno */ 2300, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); VersionEdit edit2; edit2.DeleteFile(/* level */ 1, /* file_number */ 23); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); ASSERT_OK(builder.Apply(&edit)); ASSERT_OK(builder.Apply(&edit2)); constexpr bool force_consistency_checks = true; VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks); ASSERT_OK(builder.SaveTo(&new_vstorage)); { const auto& blob_files = new_vstorage.GetBlobFiles(); ASSERT_EQ(blob_files.size(), 5); const std::vector expected_linked_ssts{ {1, 21}, {2, 7}, {3, 8}, {4, 9}, {5, 10}}; for (size_t i = 0; i < 5; ++i) { const auto meta = GetBlobFileMetaData(blob_files, /* blob_file_number */ i + 1); ASSERT_NE(meta, nullptr); ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]); } // Make sure that no new BlobFileMetaData got created for the blob file // affected by the trivial move. ASSERT_EQ(GetBlobFileMetaData(blob_files, /* blob_file_number */ 3), meta3); } UnrefFilesInVersion(&new_vstorage); } TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { Add(0, 1U, "150", "200", 100U); UpdateVersionStorageInfo(); VersionEdit version_edit; version_edit.DeleteFile(0, 1U); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; constexpr VersionSet* version_set = nullptr; VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, true /* force_consistency_checks */); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); VersionBuilder version_builder2(env_options, &ioptions_, table_cache, &new_vstorage, version_set); VersionStorageInfo new_vstorage2(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, true /* force_consistency_checks */); ASSERT_NOK(version_builder2.Apply(&version_edit)); UnrefFilesInVersion(&new_vstorage); UnrefFilesInVersion(&new_vstorage2); } TEST_F(VersionBuilderTest, EstimatedActiveKeys) { const uint32_t kTotalSamples = 20; const uint32_t kNumLevels = 5; const uint32_t kFilesPerLevel = 8; const uint32_t kNumFiles = kNumLevels * kFilesPerLevel; const uint32_t kEntriesPerFile = 1000; const uint32_t kDeletionsPerFile = 100; for (uint32_t i = 0; i < kNumFiles; ++i) { Add(static_cast(i / kFilesPerLevel), i + 1, ToString((i + 100) * 1000).c_str(), ToString((i + 100) * 1000 + 999).c_str(), 100U, 0, 100, 100, kEntriesPerFile, kDeletionsPerFile, (i < kTotalSamples)); } // minus 2X for the number of deletion entries because: // 1x for deletion entry does not count as a data entry. // 1x for each deletion entry will actually remove one data entry. ASSERT_EQ(vstorage_.GetEstimatedActiveKeys(), (kEntriesPerFile - 2 * kDeletionsPerFile) * kNumFiles); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/version_edit.cc000066400000000000000000000626761370372246700166540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/version_edit.h" #include "db/blob/blob_index.h" #include "db/version_set.h" #include "logging/event_logger.h" #include "rocksdb/slice.h" #include "test_util/sync_point.h" #include "util/coding.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { // Tag numbers for serialized VersionEdit. These numbers are written to // disk and should not be changed. The number should be forward compatible so // users can down-grade RocksDB safely. A future Tag is ignored by doing '&' // between Tag and kTagSafeIgnoreMask field. enum Tag : uint32_t { kComparator = 1, kLogNumber = 2, kNextFileNumber = 3, kLastSequence = 4, kCompactPointer = 5, kDeletedFile = 6, kNewFile = 7, // 8 was used for large value refs kPrevLogNumber = 9, kMinLogNumberToKeep = 10, // these are new formats divergent from open source leveldb kNewFile2 = 100, kNewFile3 = 102, kNewFile4 = 103, // 4th (the latest) format version of adding files kColumnFamily = 200, // specify column family for version edit kColumnFamilyAdd = 201, kColumnFamilyDrop = 202, kMaxColumnFamily = 203, kInAtomicGroup = 300, // Mask for an unidentified tag from the future which can be safely ignored. kTagSafeIgnoreMask = 1 << 13, // Forward compatible (aka ignorable) records kDbId, kBlobFileAddition, kBlobFileGarbage, }; enum NewFileCustomTag : uint32_t { kTerminate = 1, // The end of customized fields kNeedCompaction = 2, // Since Manifest is not entirely forward-compatible, we currently encode // kMinLogNumberToKeep as part of NewFile as a hack. This should be removed // when manifest becomes forward-comptabile. kMinLogNumberToKeepHack = 3, kOldestBlobFileNumber = 4, kOldestAncesterTime = 5, kFileCreationTime = 6, kFileChecksum = 7, kFileChecksumFuncName = 8, // If this bit for the custom tag is set, opening DB should fail if // we don't know this field. kCustomTagNonSafeIgnoreMask = 1 << 6, // Forward incompatible (aka unignorable) fields kPathId, }; } // anonymous namespace uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id) { assert(number <= kFileNumberMask); return number | (path_id * (kFileNumberMask + 1)); } void FileMetaData::UpdateBoundaries(const Slice& key, const Slice& value, SequenceNumber seqno, ValueType value_type) { if (smallest.size() == 0) { smallest.DecodeFrom(key); } largest.DecodeFrom(key); fd.smallest_seqno = std::min(fd.smallest_seqno, seqno); fd.largest_seqno = std::max(fd.largest_seqno, seqno); #ifndef ROCKSDB_LITE if (value_type == kTypeBlobIndex) { BlobIndex blob_index; const Status s = blob_index.DecodeFrom(value); if (!s.ok()) { return; } if (blob_index.IsInlined()) { return; } if (blob_index.HasTTL()) { return; } // Paranoid check: this should not happen because BlobDB numbers the blob // files starting from 1. if (blob_index.file_number() == kInvalidBlobFileNumber) { return; } if (oldest_blob_file_number == kInvalidBlobFileNumber || oldest_blob_file_number > blob_index.file_number()) { oldest_blob_file_number = blob_index.file_number(); } } #else (void)value; (void)value_type; #endif } void VersionEdit::Clear() { max_level_ = 0; db_id_.clear(); comparator_.clear(); log_number_ = 0; prev_log_number_ = 0; next_file_number_ = 0; max_column_family_ = 0; min_log_number_to_keep_ = 0; last_sequence_ = 0; has_db_id_ = false; has_comparator_ = false; has_log_number_ = false; has_prev_log_number_ = false; has_next_file_number_ = false; has_max_column_family_ = false; has_min_log_number_to_keep_ = false; has_last_sequence_ = false; deleted_files_.clear(); new_files_.clear(); blob_file_additions_.clear(); blob_file_garbages_.clear(); column_family_ = 0; is_column_family_add_ = false; is_column_family_drop_ = false; column_family_name_.clear(); is_in_atomic_group_ = false; remaining_entries_ = 0; } bool VersionEdit::EncodeTo(std::string* dst) const { if (has_db_id_) { PutVarint32(dst, kDbId); PutLengthPrefixedSlice(dst, db_id_); } if (has_comparator_) { PutVarint32(dst, kComparator); PutLengthPrefixedSlice(dst, comparator_); } if (has_log_number_) { PutVarint32Varint64(dst, kLogNumber, log_number_); } if (has_prev_log_number_) { PutVarint32Varint64(dst, kPrevLogNumber, prev_log_number_); } if (has_next_file_number_) { PutVarint32Varint64(dst, kNextFileNumber, next_file_number_); } if (has_max_column_family_) { PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_); } if (has_last_sequence_) { PutVarint32Varint64(dst, kLastSequence, last_sequence_); } for (const auto& deleted : deleted_files_) { PutVarint32Varint32Varint64(dst, kDeletedFile, deleted.first /* level */, deleted.second /* file number */); } bool min_log_num_written = false; for (size_t i = 0; i < new_files_.size(); i++) { const FileMetaData& f = new_files_[i].second; if (!f.smallest.Valid() || !f.largest.Valid()) { return false; } PutVarint32(dst, kNewFile4); PutVarint32Varint64(dst, new_files_[i].first /* level */, f.fd.GetNumber()); PutVarint64(dst, f.fd.GetFileSize()); PutLengthPrefixedSlice(dst, f.smallest.Encode()); PutLengthPrefixedSlice(dst, f.largest.Encode()); PutVarint64Varint64(dst, f.fd.smallest_seqno, f.fd.largest_seqno); // Customized fields' format: // +-----------------------------+ // | 1st field's tag (varint32) | // +-----------------------------+ // | 1st field's size (varint32) | // +-----------------------------+ // | bytes for 1st field | // | (based on size decoded) | // +-----------------------------+ // | | // | ...... | // | | // +-----------------------------+ // | last field's size (varint32)| // +-----------------------------+ // | bytes for last field | // | (based on size decoded) | // +-----------------------------+ // | terminating tag (varint32) | // +-----------------------------+ // // Customized encoding for fields: // tag kPathId: 1 byte as path_id // tag kNeedCompaction: // now only can take one char value 1 indicating need-compaction // PutVarint32(dst, NewFileCustomTag::kOldestAncesterTime); std::string varint_oldest_ancester_time; PutVarint64(&varint_oldest_ancester_time, f.oldest_ancester_time); TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:VarintOldestAncesterTime", &varint_oldest_ancester_time); PutLengthPrefixedSlice(dst, Slice(varint_oldest_ancester_time)); PutVarint32(dst, NewFileCustomTag::kFileCreationTime); std::string varint_file_creation_time; PutVarint64(&varint_file_creation_time, f.file_creation_time); TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:VarintFileCreationTime", &varint_file_creation_time); PutLengthPrefixedSlice(dst, Slice(varint_file_creation_time)); PutVarint32(dst, NewFileCustomTag::kFileChecksum); PutLengthPrefixedSlice(dst, Slice(f.file_checksum)); PutVarint32(dst, NewFileCustomTag::kFileChecksumFuncName); PutLengthPrefixedSlice(dst, Slice(f.file_checksum_func_name)); if (f.fd.GetPathId() != 0) { PutVarint32(dst, NewFileCustomTag::kPathId); char p = static_cast(f.fd.GetPathId()); PutLengthPrefixedSlice(dst, Slice(&p, 1)); } if (f.marked_for_compaction) { PutVarint32(dst, NewFileCustomTag::kNeedCompaction); char p = static_cast(1); PutLengthPrefixedSlice(dst, Slice(&p, 1)); } if (has_min_log_number_to_keep_ && !min_log_num_written) { PutVarint32(dst, NewFileCustomTag::kMinLogNumberToKeepHack); std::string varint_log_number; PutFixed64(&varint_log_number, min_log_number_to_keep_); PutLengthPrefixedSlice(dst, Slice(varint_log_number)); min_log_num_written = true; } if (f.oldest_blob_file_number != kInvalidBlobFileNumber) { PutVarint32(dst, NewFileCustomTag::kOldestBlobFileNumber); std::string oldest_blob_file_number; PutVarint64(&oldest_blob_file_number, f.oldest_blob_file_number); PutLengthPrefixedSlice(dst, Slice(oldest_blob_file_number)); } TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields", dst); PutVarint32(dst, NewFileCustomTag::kTerminate); } for (const auto& blob_file_addition : blob_file_additions_) { PutVarint32(dst, kBlobFileAddition); blob_file_addition.EncodeTo(dst); } for (const auto& blob_file_garbage : blob_file_garbages_) { PutVarint32(dst, kBlobFileGarbage); blob_file_garbage.EncodeTo(dst); } // 0 is default and does not need to be explicitly written if (column_family_ != 0) { PutVarint32Varint32(dst, kColumnFamily, column_family_); } if (is_column_family_add_) { PutVarint32(dst, kColumnFamilyAdd); PutLengthPrefixedSlice(dst, Slice(column_family_name_)); } if (is_column_family_drop_) { PutVarint32(dst, kColumnFamilyDrop); } if (is_in_atomic_group_) { PutVarint32(dst, kInAtomicGroup); PutVarint32(dst, remaining_entries_); } return true; } static bool GetInternalKey(Slice* input, InternalKey* dst) { Slice str; if (GetLengthPrefixedSlice(input, &str)) { dst->DecodeFrom(str); return dst->Valid(); } else { return false; } } bool VersionEdit::GetLevel(Slice* input, int* level, const char** /*msg*/) { uint32_t v = 0; if (GetVarint32(input, &v)) { *level = v; if (max_level_ < *level) { max_level_ = *level; } return true; } else { return false; } } const char* VersionEdit::DecodeNewFile4From(Slice* input) { const char* msg = nullptr; int level = 0; FileMetaData f; uint64_t number = 0; uint32_t path_id = 0; uint64_t file_size = 0; SequenceNumber smallest_seqno = 0; SequenceNumber largest_seqno = kMaxSequenceNumber; if (GetLevel(input, &level, &msg) && GetVarint64(input, &number) && GetVarint64(input, &file_size) && GetInternalKey(input, &f.smallest) && GetInternalKey(input, &f.largest) && GetVarint64(input, &smallest_seqno) && GetVarint64(input, &largest_seqno)) { // See comments in VersionEdit::EncodeTo() for format of customized fields while (true) { uint32_t custom_tag = 0; Slice field; if (!GetVarint32(input, &custom_tag)) { return "new-file4 custom field"; } if (custom_tag == kTerminate) { break; } if (!GetLengthPrefixedSlice(input, &field)) { return "new-file4 custom field length prefixed slice error"; } switch (custom_tag) { case kPathId: if (field.size() != 1) { return "path_id field wrong size"; } path_id = field[0]; if (path_id > 3) { return "path_id wrong vaue"; } break; case kOldestAncesterTime: if (!GetVarint64(&field, &f.oldest_ancester_time)) { return "invalid oldest ancester time"; } break; case kFileCreationTime: if (!GetVarint64(&field, &f.file_creation_time)) { return "invalid file creation time"; } break; case kFileChecksum: f.file_checksum = field.ToString(); break; case kFileChecksumFuncName: f.file_checksum_func_name = field.ToString(); break; case kNeedCompaction: if (field.size() != 1) { return "need_compaction field wrong size"; } f.marked_for_compaction = (field[0] == 1); break; case kMinLogNumberToKeepHack: // This is a hack to encode kMinLogNumberToKeep in a // forward-compatible fashion. if (!GetFixed64(&field, &min_log_number_to_keep_)) { return "deleted log number malformatted"; } has_min_log_number_to_keep_ = true; break; case kOldestBlobFileNumber: if (!GetVarint64(&field, &f.oldest_blob_file_number)) { return "invalid oldest blob file number"; } break; default: if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { // Should not proceed if cannot understand it return "new-file4 custom field not supported"; } break; } } } else { return "new-file4 entry"; } f.fd = FileDescriptor(number, path_id, file_size, smallest_seqno, largest_seqno); new_files_.push_back(std::make_pair(level, f)); return nullptr; } Status VersionEdit::DecodeFrom(const Slice& src) { Clear(); Slice input = src; const char* msg = nullptr; uint32_t tag = 0; // Temporary storage for parsing int level = 0; FileMetaData f; Slice str; InternalKey key; while (msg == nullptr && GetVarint32(&input, &tag)) { switch (tag) { case kDbId: if (GetLengthPrefixedSlice(&input, &str)) { db_id_ = str.ToString(); has_db_id_ = true; } else { msg = "db id"; } break; case kComparator: if (GetLengthPrefixedSlice(&input, &str)) { comparator_ = str.ToString(); has_comparator_ = true; } else { msg = "comparator name"; } break; case kLogNumber: if (GetVarint64(&input, &log_number_)) { has_log_number_ = true; } else { msg = "log number"; } break; case kPrevLogNumber: if (GetVarint64(&input, &prev_log_number_)) { has_prev_log_number_ = true; } else { msg = "previous log number"; } break; case kNextFileNumber: if (GetVarint64(&input, &next_file_number_)) { has_next_file_number_ = true; } else { msg = "next file number"; } break; case kMaxColumnFamily: if (GetVarint32(&input, &max_column_family_)) { has_max_column_family_ = true; } else { msg = "max column family"; } break; case kMinLogNumberToKeep: if (GetVarint64(&input, &min_log_number_to_keep_)) { has_min_log_number_to_keep_ = true; } else { msg = "min log number to kee"; } break; case kLastSequence: if (GetVarint64(&input, &last_sequence_)) { has_last_sequence_ = true; } else { msg = "last sequence number"; } break; case kCompactPointer: if (GetLevel(&input, &level, &msg) && GetInternalKey(&input, &key)) { // we don't use compact pointers anymore, // but we should not fail if they are still // in manifest } else { if (!msg) { msg = "compaction pointer"; } } break; case kDeletedFile: { uint64_t number = 0; if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number)) { deleted_files_.insert(std::make_pair(level, number)); } else { if (!msg) { msg = "deleted file"; } } break; } case kNewFile: { uint64_t number = 0; uint64_t file_size = 0; if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && GetVarint64(&input, &file_size) && GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.largest)) { f.fd = FileDescriptor(number, 0, file_size); new_files_.push_back(std::make_pair(level, f)); } else { if (!msg) { msg = "new-file entry"; } } break; } case kNewFile2: { uint64_t number = 0; uint64_t file_size = 0; SequenceNumber smallest_seqno = 0; SequenceNumber largest_seqno = kMaxSequenceNumber; if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && GetVarint64(&input, &file_size) && GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.largest) && GetVarint64(&input, &smallest_seqno) && GetVarint64(&input, &largest_seqno)) { f.fd = FileDescriptor(number, 0, file_size, smallest_seqno, largest_seqno); new_files_.push_back(std::make_pair(level, f)); } else { if (!msg) { msg = "new-file2 entry"; } } break; } case kNewFile3: { uint64_t number = 0; uint32_t path_id = 0; uint64_t file_size = 0; SequenceNumber smallest_seqno = 0; SequenceNumber largest_seqno = kMaxSequenceNumber; if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && GetVarint32(&input, &path_id) && GetVarint64(&input, &file_size) && GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.largest) && GetVarint64(&input, &smallest_seqno) && GetVarint64(&input, &largest_seqno)) { f.fd = FileDescriptor(number, path_id, file_size, smallest_seqno, largest_seqno); new_files_.push_back(std::make_pair(level, f)); } else { if (!msg) { msg = "new-file3 entry"; } } break; } case kNewFile4: { msg = DecodeNewFile4From(&input); break; } case kBlobFileAddition: { BlobFileAddition blob_file_addition; const Status s = blob_file_addition.DecodeFrom(&input); if (!s.ok()) { return s; } blob_file_additions_.emplace_back(blob_file_addition); break; } case kBlobFileGarbage: { BlobFileGarbage blob_file_garbage; const Status s = blob_file_garbage.DecodeFrom(&input); if (!s.ok()) { return s; } blob_file_garbages_.emplace_back(blob_file_garbage); break; } case kColumnFamily: if (!GetVarint32(&input, &column_family_)) { if (!msg) { msg = "set column family id"; } } break; case kColumnFamilyAdd: if (GetLengthPrefixedSlice(&input, &str)) { is_column_family_add_ = true; column_family_name_ = str.ToString(); } else { if (!msg) { msg = "column family add"; } } break; case kColumnFamilyDrop: is_column_family_drop_ = true; break; case kInAtomicGroup: is_in_atomic_group_ = true; if (!GetVarint32(&input, &remaining_entries_)) { if (!msg) { msg = "remaining entries"; } } break; default: if (tag & kTagSafeIgnoreMask) { // Tag from future which can be safely ignored. // The next field must be the length of the entry. uint32_t field_len; if (!GetVarint32(&input, &field_len) || static_cast(field_len) > input.size()) { if (!msg) { msg = "safely ignoreable tag length error"; } } else { input.remove_prefix(static_cast(field_len)); } } else { msg = "unknown tag"; } break; } } if (msg == nullptr && !input.empty()) { msg = "invalid tag"; } Status result; if (msg != nullptr) { result = Status::Corruption("VersionEdit", msg); } return result; } std::string VersionEdit::DebugString(bool hex_key) const { std::string r; r.append("VersionEdit {"); if (has_db_id_) { r.append("\n DB ID: "); r.append(db_id_); } if (has_comparator_) { r.append("\n Comparator: "); r.append(comparator_); } if (has_log_number_) { r.append("\n LogNumber: "); AppendNumberTo(&r, log_number_); } if (has_prev_log_number_) { r.append("\n PrevLogNumber: "); AppendNumberTo(&r, prev_log_number_); } if (has_next_file_number_) { r.append("\n NextFileNumber: "); AppendNumberTo(&r, next_file_number_); } if (has_max_column_family_) { r.append("\n MaxColumnFamily: "); AppendNumberTo(&r, max_column_family_); } if (has_min_log_number_to_keep_) { r.append("\n MinLogNumberToKeep: "); AppendNumberTo(&r, min_log_number_to_keep_); } if (has_last_sequence_) { r.append("\n LastSeq: "); AppendNumberTo(&r, last_sequence_); } for (const auto& deleted_file : deleted_files_) { r.append("\n DeleteFile: "); AppendNumberTo(&r, deleted_file.first); r.append(" "); AppendNumberTo(&r, deleted_file.second); } for (size_t i = 0; i < new_files_.size(); i++) { const FileMetaData& f = new_files_[i].second; r.append("\n AddFile: "); AppendNumberTo(&r, new_files_[i].first); r.append(" "); AppendNumberTo(&r, f.fd.GetNumber()); r.append(" "); AppendNumberTo(&r, f.fd.GetFileSize()); r.append(" "); r.append(f.smallest.DebugString(hex_key)); r.append(" .. "); r.append(f.largest.DebugString(hex_key)); if (f.oldest_blob_file_number != kInvalidBlobFileNumber) { r.append(" blob_file:"); AppendNumberTo(&r, f.oldest_blob_file_number); } r.append(" oldest_ancester_time:"); AppendNumberTo(&r, f.oldest_ancester_time); r.append(" file_creation_time:"); AppendNumberTo(&r, f.file_creation_time); r.append(" file_checksum:"); r.append(f.file_checksum); r.append(" file_checksum_func_name: "); r.append(f.file_checksum_func_name); } for (const auto& blob_file_addition : blob_file_additions_) { r.append("\n BlobFileAddition: "); r.append(blob_file_addition.DebugString()); } for (const auto& blob_file_garbage : blob_file_garbages_) { r.append("\n BlobFileGarbage: "); r.append(blob_file_garbage.DebugString()); } r.append("\n ColumnFamily: "); AppendNumberTo(&r, column_family_); if (is_column_family_add_) { r.append("\n ColumnFamilyAdd: "); r.append(column_family_name_); } if (is_column_family_drop_) { r.append("\n ColumnFamilyDrop"); } if (is_in_atomic_group_) { r.append("\n AtomicGroup: "); AppendNumberTo(&r, remaining_entries_); r.append(" entries remains"); } r.append("\n}\n"); return r; } std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const { JSONWriter jw; jw << "EditNumber" << edit_num; if (has_db_id_) { jw << "DB ID" << db_id_; } if (has_comparator_) { jw << "Comparator" << comparator_; } if (has_log_number_) { jw << "LogNumber" << log_number_; } if (has_prev_log_number_) { jw << "PrevLogNumber" << prev_log_number_; } if (has_next_file_number_) { jw << "NextFileNumber" << next_file_number_; } if (has_max_column_family_) { jw << "MaxColumnFamily" << max_column_family_; } if (has_min_log_number_to_keep_) { jw << "MinLogNumberToKeep" << min_log_number_to_keep_; } if (has_last_sequence_) { jw << "LastSeq" << last_sequence_; } if (!deleted_files_.empty()) { jw << "DeletedFiles"; jw.StartArray(); for (const auto& deleted_file : deleted_files_) { jw.StartArrayedObject(); jw << "Level" << deleted_file.first; jw << "FileNumber" << deleted_file.second; jw.EndArrayedObject(); } jw.EndArray(); } if (!new_files_.empty()) { jw << "AddedFiles"; jw.StartArray(); for (size_t i = 0; i < new_files_.size(); i++) { jw.StartArrayedObject(); jw << "Level" << new_files_[i].first; const FileMetaData& f = new_files_[i].second; jw << "FileNumber" << f.fd.GetNumber(); jw << "FileSize" << f.fd.GetFileSize(); jw << "SmallestIKey" << f.smallest.DebugString(hex_key); jw << "LargestIKey" << f.largest.DebugString(hex_key); if (f.oldest_blob_file_number != kInvalidBlobFileNumber) { jw << "OldestBlobFile" << f.oldest_blob_file_number; } jw.EndArrayedObject(); } jw.EndArray(); } if (!blob_file_additions_.empty()) { jw << "BlobFileAdditions"; jw.StartArray(); for (const auto& blob_file_addition : blob_file_additions_) { jw.StartArrayedObject(); jw << blob_file_addition; jw.EndArrayedObject(); } jw.EndArray(); } if (!blob_file_garbages_.empty()) { jw << "BlobFileGarbages"; jw.StartArray(); for (const auto& blob_file_garbage : blob_file_garbages_) { jw.StartArrayedObject(); jw << blob_file_garbage; jw.EndArrayedObject(); } jw.EndArray(); } jw << "ColumnFamily" << column_family_; if (is_column_family_add_) { jw << "ColumnFamilyAdd" << column_family_name_; } if (is_column_family_drop_) { jw << "ColumnFamilyDrop" << column_family_name_; } if (is_in_atomic_group_) { jw << "AtomicGroup" << remaining_entries_; } jw.EndObject(); return jw.Get(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_edit.h000066400000000000000000000426661370372246700165130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include "db/blob/blob_file_addition.h" #include "db/blob/blob_file_garbage.h" #include "db/dbformat.h" #include "memory/arena.h" #include "rocksdb/cache.h" #include "table/table_reader.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class VersionSet; constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF; constexpr uint64_t kUnknownOldestAncesterTime = 0; constexpr uint64_t kUnknownFileCreationTime = 0; extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id); // A copyable structure contains information needed to read data from an SST // file. It can contain a pointer to a table reader opened for the file, or // file number and size, which can be used to create a new table reader for it. // The behavior is undefined when a copied of the structure is used when the // file is not in any live version any more. struct FileDescriptor { // Table reader in table_reader_handle TableReader* table_reader; uint64_t packed_number_and_path_id; uint64_t file_size; // File size in bytes SequenceNumber smallest_seqno; // The smallest seqno in this file SequenceNumber largest_seqno; // The largest seqno in this file FileDescriptor() : FileDescriptor(0, 0, 0) {} FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size) : FileDescriptor(number, path_id, _file_size, kMaxSequenceNumber, 0) {} FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size, SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno) : table_reader(nullptr), packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)), file_size(_file_size), smallest_seqno(_smallest_seqno), largest_seqno(_largest_seqno) {} FileDescriptor(const FileDescriptor& fd) { *this = fd; } FileDescriptor& operator=(const FileDescriptor& fd) { table_reader = fd.table_reader; packed_number_and_path_id = fd.packed_number_and_path_id; file_size = fd.file_size; smallest_seqno = fd.smallest_seqno; largest_seqno = fd.largest_seqno; return *this; } uint64_t GetNumber() const { return packed_number_and_path_id & kFileNumberMask; } uint32_t GetPathId() const { return static_cast( packed_number_and_path_id / (kFileNumberMask + 1)); } uint64_t GetFileSize() const { return file_size; } }; struct FileSampledStats { FileSampledStats() : num_reads_sampled(0) {} FileSampledStats(const FileSampledStats& other) { *this = other; } FileSampledStats& operator=(const FileSampledStats& other) { num_reads_sampled = other.num_reads_sampled.load(); return *this; } // number of user reads to this file. mutable std::atomic num_reads_sampled; }; struct FileMetaData { FileDescriptor fd; InternalKey smallest; // Smallest internal key served by table InternalKey largest; // Largest internal key served by table // Needs to be disposed when refs becomes 0. Cache::Handle* table_reader_handle = nullptr; FileSampledStats stats; // Stats for compensating deletion entries during compaction // File size compensated by deletion entry. // This is updated in Version::UpdateAccumulatedStats() first time when the // file is created or loaded. After it is updated (!= 0), it is immutable. uint64_t compensated_file_size = 0; // These values can mutate, but they can only be read or written from // single-threaded LogAndApply thread uint64_t num_entries = 0; // the number of entries. uint64_t num_deletions = 0; // the number of deletion entries. uint64_t raw_key_size = 0; // total uncompressed key size. uint64_t raw_value_size = 0; // total uncompressed value size. int refs = 0; // Reference count bool being_compacted = false; // Is this file undergoing compaction? bool init_stats_from_file = false; // true if the data-entry stats of this // file has initialized from file. bool marked_for_compaction = false; // True if client asked us nicely to // compact this file. // Used only in BlobDB. The file number of the oldest blob file this SST file // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1. uint64_t oldest_blob_file_number = kInvalidBlobFileNumber; // The file could be the compaction output from other SST files, which could // in turn be outputs for compact older SST files. We track the memtable // flush timestamp for the oldest SST file that eventaully contribute data // to this file. 0 means the information is not available. uint64_t oldest_ancester_time = kUnknownOldestAncesterTime; // Unix time when the SST file is created. uint64_t file_creation_time = kUnknownFileCreationTime; // File checksum std::string file_checksum = kUnknownFileChecksum; // File checksum function name std::string file_checksum_func_name = kUnknownFileChecksumFuncName; FileMetaData() = default; FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size, const InternalKey& smallest_key, const InternalKey& largest_key, const SequenceNumber& smallest_seq, const SequenceNumber& largest_seq, bool marked_for_compact, uint64_t oldest_blob_file, uint64_t _oldest_ancester_time, uint64_t _file_creation_time, const std::string& _file_checksum, const std::string& _file_checksum_func_name) : fd(file, file_path_id, file_size, smallest_seq, largest_seq), smallest(smallest_key), largest(largest_key), marked_for_compaction(marked_for_compact), oldest_blob_file_number(oldest_blob_file), oldest_ancester_time(_oldest_ancester_time), file_creation_time(_file_creation_time), file_checksum(_file_checksum), file_checksum_func_name(_file_checksum_func_name) { TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this); } // REQUIRED: Keys must be given to the function in sorted order (it expects // the last key to be the largest). void UpdateBoundaries(const Slice& key, const Slice& value, SequenceNumber seqno, ValueType value_type); // Unlike UpdateBoundaries, ranges do not need to be presented in any // particular order. void UpdateBoundariesForRange(const InternalKey& start, const InternalKey& end, SequenceNumber seqno, const InternalKeyComparator& icmp) { if (smallest.size() == 0 || icmp.Compare(start, smallest) < 0) { smallest = start; } if (largest.size() == 0 || icmp.Compare(largest, end) < 0) { largest = end; } fd.smallest_seqno = std::min(fd.smallest_seqno, seqno); fd.largest_seqno = std::max(fd.largest_seqno, seqno); } // Try to get oldest ancester time from the class itself or table properties // if table reader is already pinned. // 0 means the information is not available. uint64_t TryGetOldestAncesterTime() { if (oldest_ancester_time != kUnknownOldestAncesterTime) { return oldest_ancester_time; } else if (fd.table_reader != nullptr && fd.table_reader->GetTableProperties() != nullptr) { return fd.table_reader->GetTableProperties()->creation_time; } return kUnknownOldestAncesterTime; } uint64_t TryGetFileCreationTime() { if (file_creation_time != kUnknownFileCreationTime) { return file_creation_time; } else if (fd.table_reader != nullptr && fd.table_reader->GetTableProperties() != nullptr) { return fd.table_reader->GetTableProperties()->file_creation_time; } return kUnknownFileCreationTime; } }; // A compressed copy of file meta data that just contain minimum data needed // to server read operations, while still keeping the pointer to full metadata // of the file in case it is needed. struct FdWithKeyRange { FileDescriptor fd; FileMetaData* file_metadata; // Point to all metadata Slice smallest_key; // slice that contain smallest key Slice largest_key; // slice that contain largest key FdWithKeyRange() : fd(), file_metadata(nullptr), smallest_key(), largest_key() { } FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key, FileMetaData* _file_metadata) : fd(_fd), file_metadata(_file_metadata), smallest_key(_smallest_key), largest_key(_largest_key) {} }; // Data structure to store an array of FdWithKeyRange in one level // Actual data is guaranteed to be stored closely struct LevelFilesBrief { size_t num_files; FdWithKeyRange* files; LevelFilesBrief() { num_files = 0; files = nullptr; } }; // The state of a DB at any given time is referred to as a Version. // Any modification to the Version is considered a Version Edit. A Version is // constructed by joining a sequence of Version Edits. Version Edits are written // to the MANIFEST file. class VersionEdit { public: void Clear(); void SetDBId(const std::string& db_id) { has_db_id_ = true; db_id_ = db_id; } bool HasDbId() const { return has_db_id_; } const std::string& GetDbId() const { return db_id_; } void SetComparatorName(const Slice& name) { has_comparator_ = true; comparator_ = name.ToString(); } bool HasComparatorName() const { return has_comparator_; } const std::string& GetComparatorName() const { return comparator_; } void SetLogNumber(uint64_t num) { has_log_number_ = true; log_number_ = num; } bool HasLogNumber() const { return has_log_number_; } uint64_t GetLogNumber() const { return log_number_; } void SetPrevLogNumber(uint64_t num) { has_prev_log_number_ = true; prev_log_number_ = num; } bool HasPrevLogNumber() const { return has_prev_log_number_; } uint64_t GetPrevLogNumber() const { return prev_log_number_; } void SetNextFile(uint64_t num) { has_next_file_number_ = true; next_file_number_ = num; } bool HasNextFile() const { return has_next_file_number_; } uint64_t GetNextFile() const { return next_file_number_; } void SetMaxColumnFamily(uint32_t max_column_family) { has_max_column_family_ = true; max_column_family_ = max_column_family; } bool HasMaxColumnFamily() const { return has_max_column_family_; } uint32_t GetMaxColumnFamily() const { return max_column_family_; } void SetMinLogNumberToKeep(uint64_t num) { has_min_log_number_to_keep_ = true; min_log_number_to_keep_ = num; } bool HasMinLogNumberToKeep() const { return has_min_log_number_to_keep_; } uint64_t GetMinLogNumberToKeep() const { return min_log_number_to_keep_; } void SetLastSequence(SequenceNumber seq) { has_last_sequence_ = true; last_sequence_ = seq; } bool HasLastSequence() const { return has_last_sequence_; } SequenceNumber GetLastSequence() const { return last_sequence_; } // Delete the specified table file from the specified level. void DeleteFile(int level, uint64_t file) { deleted_files_.emplace(level, file); } // Retrieve the table files deleted as well as their associated levels. using DeletedFiles = std::set>; const DeletedFiles& GetDeletedFiles() const { return deleted_files_; } // Add the specified table file at the specified level. // REQUIRES: This version has not been saved (see VersionSet::SaveTo) // REQUIRES: "smallest" and "largest" are smallest and largest keys in file // REQUIRES: "oldest_blob_file_number" is the number of the oldest blob file // referred to by this file if any, kInvalidBlobFileNumber otherwise. void AddFile(int level, uint64_t file, uint32_t file_path_id, uint64_t file_size, const InternalKey& smallest, const InternalKey& largest, const SequenceNumber& smallest_seqno, const SequenceNumber& largest_seqno, bool marked_for_compaction, uint64_t oldest_blob_file_number, uint64_t oldest_ancester_time, uint64_t file_creation_time, const std::string& file_checksum, const std::string& file_checksum_func_name) { assert(smallest_seqno <= largest_seqno); new_files_.emplace_back( level, FileMetaData(file, file_path_id, file_size, smallest, largest, smallest_seqno, largest_seqno, marked_for_compaction, oldest_blob_file_number, oldest_ancester_time, file_creation_time, file_checksum, file_checksum_func_name)); } void AddFile(int level, const FileMetaData& f) { assert(f.fd.smallest_seqno <= f.fd.largest_seqno); new_files_.emplace_back(level, f); } // Retrieve the table files added as well as their associated levels. using NewFiles = std::vector>; const NewFiles& GetNewFiles() const { return new_files_; } // Add a new blob file. void AddBlobFile(uint64_t blob_file_number, uint64_t total_blob_count, uint64_t total_blob_bytes, std::string checksum_method, std::string checksum_value) { blob_file_additions_.emplace_back( blob_file_number, total_blob_count, total_blob_bytes, std::move(checksum_method), std::move(checksum_value)); } // Retrieve all the blob files added. using BlobFileAdditions = std::vector; const BlobFileAdditions& GetBlobFileAdditions() const { return blob_file_additions_; } // Add garbage for an existing blob file. Note: intentionally broken English // follows. void AddBlobFileGarbage(uint64_t blob_file_number, uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) { blob_file_garbages_.emplace_back(blob_file_number, garbage_blob_count, garbage_blob_bytes); } // Retrieve all the blob file garbage added. using BlobFileGarbages = std::vector; const BlobFileGarbages& GetBlobFileGarbages() const { return blob_file_garbages_; } // Number of edits size_t NumEntries() const { return new_files_.size() + deleted_files_.size() + blob_file_additions_.size() + blob_file_garbages_.size(); } void SetColumnFamily(uint32_t column_family_id) { column_family_ = column_family_id; } uint32_t GetColumnFamily() const { return column_family_; } // set column family ID by calling SetColumnFamily() void AddColumnFamily(const std::string& name) { assert(!is_column_family_drop_); assert(!is_column_family_add_); assert(NumEntries() == 0); is_column_family_add_ = true; column_family_name_ = name; } // set column family ID by calling SetColumnFamily() void DropColumnFamily() { assert(!is_column_family_drop_); assert(!is_column_family_add_); assert(NumEntries() == 0); is_column_family_drop_ = true; } bool IsColumnFamilyManipulation() const { return is_column_family_add_ || is_column_family_drop_; } void MarkAtomicGroup(uint32_t remaining_entries) { is_in_atomic_group_ = true; remaining_entries_ = remaining_entries; } bool IsInAtomicGroup() const { return is_in_atomic_group_; } uint32_t GetRemainingEntries() const { return remaining_entries_; } // return true on success. bool EncodeTo(std::string* dst) const; Status DecodeFrom(const Slice& src); std::string DebugString(bool hex_key = false) const; std::string DebugJSON(int edit_num, bool hex_key = false) const; private: friend class ReactiveVersionSet; friend class VersionEditHandler; friend class VersionEditHandlerPointInTime; friend class VersionSet; friend class Version; friend class AtomicGroupReadBuffer; bool GetLevel(Slice* input, int* level, const char** msg); const char* DecodeNewFile4From(Slice* input); int max_level_ = 0; std::string db_id_; std::string comparator_; uint64_t log_number_ = 0; uint64_t prev_log_number_ = 0; uint64_t next_file_number_ = 0; uint32_t max_column_family_ = 0; // The most recent WAL log number that is deleted uint64_t min_log_number_to_keep_ = 0; SequenceNumber last_sequence_ = 0; bool has_db_id_ = false; bool has_comparator_ = false; bool has_log_number_ = false; bool has_prev_log_number_ = false; bool has_next_file_number_ = false; bool has_max_column_family_ = false; bool has_min_log_number_to_keep_ = false; bool has_last_sequence_ = false; DeletedFiles deleted_files_; NewFiles new_files_; BlobFileAdditions blob_file_additions_; BlobFileGarbages blob_file_garbages_; // Each version edit record should have column_family_ set // If it's not set, it is default (0) uint32_t column_family_ = 0; // a version edit can be either column_family add or // column_family drop. If it's column family add, // it also includes column family name. bool is_column_family_drop_ = false; bool is_column_family_add_ = false; std::string column_family_name_; bool is_in_atomic_group_ = false; uint32_t remaining_entries_ = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_edit_handler.cc000066400000000000000000000475211370372246700203410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/version_edit_handler.h" #include "monitoring/persistent_stats_history.h" namespace ROCKSDB_NAMESPACE { VersionEditHandler::VersionEditHandler( bool read_only, const std::vector& column_families, VersionSet* version_set, bool track_missing_files, bool no_error_if_table_files_missing) : read_only_(read_only), column_families_(column_families), status_(), version_set_(version_set), track_missing_files_(track_missing_files), no_error_if_table_files_missing_(no_error_if_table_files_missing), initialized_(false) { assert(version_set_ != nullptr); } void VersionEditHandler::Iterate(log::Reader& reader, Status* log_read_status, std::string* db_id) { Slice record; std::string scratch; assert(log_read_status); assert(log_read_status->ok()); size_t recovered_edits = 0; Status s = Initialize(); while (s.ok() && reader.ReadRecord(&record, &scratch) && log_read_status->ok()) { VersionEdit edit; s = edit.DecodeFrom(record); if (!s.ok()) { break; } if (edit.has_db_id_) { version_set_->db_id_ = edit.GetDbId(); if (db_id != nullptr) { *db_id = version_set_->db_id_; } } s = read_buffer_.AddEdit(&edit); if (!s.ok()) { break; } ColumnFamilyData* cfd = nullptr; if (edit.is_in_atomic_group_) { if (read_buffer_.IsFull()) { for (auto& e : read_buffer_.replay_buffer()) { s = ApplyVersionEdit(e, &cfd); if (!s.ok()) { break; } ++recovered_edits; } if (!s.ok()) { break; } read_buffer_.Clear(); } } else { s = ApplyVersionEdit(edit, &cfd); if (s.ok()) { ++recovered_edits; } } } if (!log_read_status->ok()) { s = *log_read_status; } CheckIterationResult(reader, &s); if (!s.ok()) { status_ = s; } } Status VersionEditHandler::Initialize() { Status s; if (!initialized_) { for (const auto& cf_desc : column_families_) { name_to_options_.emplace(cf_desc.name, cf_desc.options); } auto default_cf_iter = name_to_options_.find(kDefaultColumnFamilyName); if (default_cf_iter == name_to_options_.end()) { s = Status::InvalidArgument("Default column family not specified"); } if (s.ok()) { VersionEdit default_cf_edit; default_cf_edit.AddColumnFamily(kDefaultColumnFamilyName); default_cf_edit.SetColumnFamily(0); ColumnFamilyData* cfd = CreateCfAndInit(default_cf_iter->second, default_cf_edit); assert(cfd != nullptr); #ifdef NDEBUG (void)cfd; #endif initialized_ = true; } } return s; } Status VersionEditHandler::ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) { Status s; if (edit.is_column_family_add_) { s = OnColumnFamilyAdd(edit, cfd); } else if (edit.is_column_family_drop_) { s = OnColumnFamilyDrop(edit, cfd); } else { s = OnNonCfOperation(edit, cfd); } if (s.ok()) { assert(cfd != nullptr); s = ExtractInfoFromVersionEdit(*cfd, edit); } return s; } Status VersionEditHandler::OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd) { bool cf_in_not_found = false; bool cf_in_builders = false; CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders); assert(cfd != nullptr); *cfd = nullptr; Status s; if (cf_in_builders || cf_in_not_found) { s = Status::Corruption("MANIFEST adding the same column family twice: " + edit.column_family_name_); } if (s.ok()) { auto cf_options = name_to_options_.find(edit.column_family_name_); // implicitly add persistent_stats column family without requiring user // to specify ColumnFamilyData* tmp_cfd = nullptr; bool is_persistent_stats_column_family = edit.column_family_name_.compare(kPersistentStatsColumnFamilyName) == 0; if (cf_options == name_to_options_.end() && !is_persistent_stats_column_family) { column_families_not_found_.emplace(edit.column_family_, edit.column_family_name_); } else { if (is_persistent_stats_column_family) { ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); tmp_cfd = CreateCfAndInit(cfo, edit); } else { tmp_cfd = CreateCfAndInit(cf_options->second, edit); } *cfd = tmp_cfd; } } return s; } Status VersionEditHandler::OnColumnFamilyDrop(VersionEdit& edit, ColumnFamilyData** cfd) { bool cf_in_not_found = false; bool cf_in_builders = false; CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders); assert(cfd != nullptr); *cfd = nullptr; ColumnFamilyData* tmp_cfd = nullptr; Status s; if (cf_in_builders) { tmp_cfd = DestroyCfAndCleanup(edit); } else if (cf_in_not_found) { column_families_not_found_.erase(edit.column_family_); } else { s = Status::Corruption("MANIFEST - dropping non-existing column family"); } *cfd = tmp_cfd; return s; } Status VersionEditHandler::OnNonCfOperation(VersionEdit& edit, ColumnFamilyData** cfd) { bool cf_in_not_found = false; bool cf_in_builders = false; CheckColumnFamilyId(edit, &cf_in_not_found, &cf_in_builders); assert(cfd != nullptr); *cfd = nullptr; Status s; if (!cf_in_not_found) { if (!cf_in_builders) { s = Status::Corruption( "MANIFEST record referencing unknown column family"); } ColumnFamilyData* tmp_cfd = nullptr; if (s.ok()) { auto builder_iter = builders_.find(edit.column_family_); assert(builder_iter != builders_.end()); tmp_cfd = version_set_->GetColumnFamilySet()->GetColumnFamily( edit.column_family_); assert(tmp_cfd != nullptr); s = MaybeCreateVersion(edit, tmp_cfd, /*force_create_version=*/false); if (s.ok()) { s = builder_iter->second->version_builder()->Apply(&edit); } } *cfd = tmp_cfd; } return s; } // TODO maybe cache the computation result bool VersionEditHandler::HasMissingFiles() const { bool ret = false; for (const auto& elem : cf_to_missing_files_) { const auto& missing_files = elem.second; if (!missing_files.empty()) { ret = true; break; } } return ret; } void VersionEditHandler::CheckColumnFamilyId(const VersionEdit& edit, bool* cf_in_not_found, bool* cf_in_builders) const { assert(cf_in_not_found != nullptr); assert(cf_in_builders != nullptr); // Not found means that user didn't supply that column // family option AND we encountered column family add // record. Once we encounter column family drop record, // we will delete the column family from // column_families_not_found. bool in_not_found = column_families_not_found_.find(edit.column_family_) != column_families_not_found_.end(); // in builders means that user supplied that column family // option AND that we encountered column family add record bool in_builders = builders_.find(edit.column_family_) != builders_.end(); // They cannot both be true assert(!(in_not_found && in_builders)); *cf_in_not_found = in_not_found; *cf_in_builders = in_builders; } void VersionEditHandler::CheckIterationResult(const log::Reader& reader, Status* s) { assert(s != nullptr); if (!s->ok()) { read_buffer_.Clear(); } else if (!version_edit_params_.has_log_number_ || !version_edit_params_.has_next_file_number_ || !version_edit_params_.has_last_sequence_) { std::string msg("no "); if (!version_edit_params_.has_log_number_) { msg.append("log_file_number, "); } if (!version_edit_params_.has_next_file_number_) { msg.append("next_file_number, "); } if (!version_edit_params_.has_last_sequence_) { msg.append("last_sequence, "); } msg = msg.substr(0, msg.size() - 2); msg.append(" entry in MANIFEST"); *s = Status::Corruption(msg); } if (s->ok() && !read_only_ && !column_families_not_found_.empty()) { std::string msg; for (const auto& cf : column_families_not_found_) { msg.append(", "); msg.append(cf.second); } msg = msg.substr(2); *s = Status::InvalidArgument("Column families not opened: " + msg); } if (s->ok()) { version_set_->GetColumnFamilySet()->UpdateMaxColumnFamily( version_edit_params_.max_column_family_); version_set_->MarkMinLogNumberToKeep2PC( version_edit_params_.min_log_number_to_keep_); version_set_->MarkFileNumberUsed(version_edit_params_.prev_log_number_); version_set_->MarkFileNumberUsed(version_edit_params_.log_number_); for (auto* cfd : *(version_set_->GetColumnFamilySet())) { auto builder_iter = builders_.find(cfd->GetID()); assert(builder_iter != builders_.end()); auto* builder = builder_iter->second->version_builder(); if (!builder->CheckConsistencyForNumLevels()) { *s = Status::InvalidArgument( "db has more levels than options.num_levels"); break; } } } if (s->ok()) { for (auto* cfd : *(version_set_->GetColumnFamilySet())) { if (cfd->IsDropped()) { continue; } if (read_only_) { cfd->table_cache()->SetTablesAreImmortal(); } *s = LoadTables(cfd, /*prefetch_index_and_filter_in_cache=*/false, /*is_initial_load=*/true); if (!s->ok()) { break; } } } if (s->ok()) { for (auto* cfd : *(version_set_->column_family_set_)) { if (cfd->IsDropped()) { continue; } assert(cfd->initialized()); VersionEdit edit; *s = MaybeCreateVersion(edit, cfd, /*force_create_version=*/true); if (!s->ok()) { break; } } } if (s->ok()) { version_set_->manifest_file_size_ = reader.GetReadOffset(); assert(version_set_->manifest_file_size_ > 0); version_set_->next_file_number_.store( version_edit_params_.next_file_number_ + 1); version_set_->last_allocated_sequence_ = version_edit_params_.last_sequence_; version_set_->last_published_sequence_ = version_edit_params_.last_sequence_; version_set_->last_sequence_ = version_edit_params_.last_sequence_; version_set_->prev_log_number_ = version_edit_params_.prev_log_number_; } } ColumnFamilyData* VersionEditHandler::CreateCfAndInit( const ColumnFamilyOptions& cf_options, const VersionEdit& edit) { ColumnFamilyData* cfd = version_set_->CreateColumnFamily(cf_options, &edit); assert(cfd != nullptr); cfd->set_initialized(); assert(builders_.find(edit.column_family_) == builders_.end()); builders_.emplace(edit.column_family_, VersionBuilderUPtr(new BaseReferencedVersionBuilder(cfd))); if (track_missing_files_) { cf_to_missing_files_.emplace(edit.column_family_, std::unordered_set()); } return cfd; } ColumnFamilyData* VersionEditHandler::DestroyCfAndCleanup( const VersionEdit& edit) { auto builder_iter = builders_.find(edit.column_family_); assert(builder_iter != builders_.end()); builders_.erase(builder_iter); if (track_missing_files_) { auto missing_files_iter = cf_to_missing_files_.find(edit.column_family_); assert(missing_files_iter != cf_to_missing_files_.end()); cf_to_missing_files_.erase(missing_files_iter); } ColumnFamilyData* ret = version_set_->GetColumnFamilySet()->GetColumnFamily(edit.column_family_); assert(ret != nullptr); if (ret->UnrefAndTryDelete()) { ret = nullptr; } else { assert(false); } return ret; } Status VersionEditHandler::MaybeCreateVersion(const VersionEdit& /*edit*/, ColumnFamilyData* cfd, bool force_create_version) { assert(cfd->initialized()); Status s; if (force_create_version) { auto builder_iter = builders_.find(cfd->GetID()); assert(builder_iter != builders_.end()); auto* builder = builder_iter->second->version_builder(); auto* v = new Version(cfd, version_set_, version_set_->file_options_, *cfd->GetLatestMutableCFOptions(), version_set_->current_version_number_++); s = builder->SaveTo(v->storage_info()); if (s.ok()) { // Install new version v->PrepareApply( *cfd->GetLatestMutableCFOptions(), !(version_set_->db_options_->skip_stats_update_on_db_open)); version_set_->AppendVersion(cfd, v); } else { delete v; } } return s; } Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd, bool prefetch_index_and_filter_in_cache, bool is_initial_load) { assert(cfd != nullptr); assert(!cfd->IsDropped()); auto builder_iter = builders_.find(cfd->GetID()); assert(builder_iter != builders_.end()); assert(builder_iter->second != nullptr); VersionBuilder* builder = builder_iter->second->version_builder(); assert(builder); Status s = builder->LoadTableHandlers( cfd->internal_stats(), version_set_->db_options_->max_file_opening_threads, prefetch_index_and_filter_in_cache, is_initial_load, cfd->GetLatestMutableCFOptions()->prefix_extractor.get(), MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions())); if ((s.IsPathNotFound() || s.IsCorruption()) && no_error_if_table_files_missing_) { s = Status::OK(); } if (!s.ok() && !version_set_->db_options_->paranoid_checks) { s = Status::OK(); } return s; } Status VersionEditHandler::ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, const VersionEdit& edit) { Status s; if (cfd != nullptr) { if (edit.has_db_id_) { version_edit_params_.SetDBId(edit.db_id_); } if (edit.has_log_number_) { if (cfd->GetLogNumber() > edit.log_number_) { ROCKS_LOG_WARN( version_set_->db_options()->info_log, "MANIFEST corruption detected, but ignored - Log numbers in " "records NOT monotonically increasing"); } else { cfd->SetLogNumber(edit.log_number_); version_edit_params_.SetLogNumber(edit.log_number_); } } if (edit.has_comparator_ && edit.comparator_ != cfd->user_comparator()->Name()) { s = Status::InvalidArgument( cfd->user_comparator()->Name(), "does not match existing comparator " + edit.comparator_); } } if (s.ok()) { if (edit.has_prev_log_number_) { version_edit_params_.SetPrevLogNumber(edit.prev_log_number_); } if (edit.has_next_file_number_) { version_edit_params_.SetNextFile(edit.next_file_number_); } if (edit.has_max_column_family_) { version_edit_params_.SetMaxColumnFamily(edit.max_column_family_); } if (edit.has_min_log_number_to_keep_) { version_edit_params_.min_log_number_to_keep_ = std::max(version_edit_params_.min_log_number_to_keep_, edit.min_log_number_to_keep_); } if (edit.has_last_sequence_) { version_edit_params_.SetLastSequence(edit.last_sequence_); } if (!version_edit_params_.has_prev_log_number_) { version_edit_params_.SetPrevLogNumber(0); } } return s; } VersionEditHandlerPointInTime::VersionEditHandlerPointInTime( bool read_only, const std::vector& column_families, VersionSet* version_set) : VersionEditHandler(read_only, column_families, version_set, /*track_missing_files=*/true, /*no_error_if_table_files_missing=*/true) {} VersionEditHandlerPointInTime::~VersionEditHandlerPointInTime() { for (const auto& elem : versions_) { delete elem.second; } versions_.clear(); } void VersionEditHandlerPointInTime::CheckIterationResult( const log::Reader& reader, Status* s) { VersionEditHandler::CheckIterationResult(reader, s); assert(s != nullptr); if (s->ok()) { for (auto* cfd : *(version_set_->column_family_set_)) { if (cfd->IsDropped()) { continue; } assert(cfd->initialized()); auto v_iter = versions_.find(cfd->GetID()); if (v_iter != versions_.end()) { assert(v_iter->second != nullptr); version_set_->AppendVersion(cfd, v_iter->second); versions_.erase(v_iter); } } } } ColumnFamilyData* VersionEditHandlerPointInTime::DestroyCfAndCleanup( const VersionEdit& edit) { ColumnFamilyData* cfd = VersionEditHandler::DestroyCfAndCleanup(edit); auto v_iter = versions_.find(edit.column_family_); if (v_iter != versions_.end()) { delete v_iter->second; versions_.erase(v_iter); } return cfd; } Status VersionEditHandlerPointInTime::MaybeCreateVersion( const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) { assert(cfd != nullptr); if (!force_create_version) { assert(edit.column_family_ == cfd->GetID()); } auto missing_files_iter = cf_to_missing_files_.find(cfd->GetID()); assert(missing_files_iter != cf_to_missing_files_.end()); std::unordered_set& missing_files = missing_files_iter->second; const bool prev_has_missing_files = !missing_files.empty(); for (const auto& file : edit.GetDeletedFiles()) { uint64_t file_num = file.second; auto fiter = missing_files.find(file_num); if (fiter != missing_files.end()) { missing_files.erase(fiter); } } Status s; for (const auto& elem : edit.GetNewFiles()) { const FileMetaData& meta = elem.second; const FileDescriptor& fd = meta.fd; uint64_t file_num = fd.GetNumber(); const std::string fpath = MakeTableFileName(cfd->ioptions()->cf_paths[0].path, file_num); s = version_set_->VerifyFileMetadata(fpath, meta); if (s.IsPathNotFound() || s.IsNotFound() || s.IsCorruption()) { missing_files.insert(file_num); s = Status::OK(); } else if (!s.ok()) { break; } } bool missing_info = !version_edit_params_.has_log_number_ || !version_edit_params_.has_next_file_number_ || !version_edit_params_.has_last_sequence_; // Create version before apply edit if (s.ok() && !missing_info && ((!missing_files.empty() && !prev_has_missing_files) || (missing_files.empty() && force_create_version))) { auto builder_iter = builders_.find(cfd->GetID()); assert(builder_iter != builders_.end()); auto* builder = builder_iter->second->version_builder(); auto* version = new Version(cfd, version_set_, version_set_->file_options_, *cfd->GetLatestMutableCFOptions(), version_set_->current_version_number_++); s = builder->SaveTo(version->storage_info()); if (s.ok()) { version->PrepareApply( *cfd->GetLatestMutableCFOptions(), !version_set_->db_options_->skip_stats_update_on_db_open); auto v_iter = versions_.find(cfd->GetID()); if (v_iter != versions_.end()) { delete v_iter->second; v_iter->second = version; } else { versions_.emplace(cfd->GetID(), version); } } else { delete version; } } return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_edit_handler.h000066400000000000000000000111731370372246700201750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "db/version_builder.h" #include "db/version_edit.h" #include "db/version_set.h" namespace ROCKSDB_NAMESPACE { typedef std::unique_ptr VersionBuilderUPtr; // A class used for scanning MANIFEST file. // VersionEditHandler reads a MANIFEST file, parses the version edits, and // builds the version set's in-memory state, e.g. the version storage info for // the versions of column families. // To use this class and its subclasses, // 1. Create an object of VersionEditHandler or its subclasses. // VersionEditHandler handler(read_only, column_families, version_set, // track_missing_files, ignore_missing_files); // 2. Status s = handler.Iterate(reader, &db_id); // 3. Check s and handle possible errors. // // Not thread-safe, external synchronization is necessary if an object of // VersionEditHandler is shared by multiple threads. class VersionEditHandler { public: explicit VersionEditHandler( bool read_only, const std::vector& column_families, VersionSet* version_set, bool track_missing_files, bool ignore_missing_files); virtual ~VersionEditHandler() {} void Iterate(log::Reader& reader, Status* log_read_status, std::string* db_id); const Status& status() const { return status_; } bool HasMissingFiles() const; protected: Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd); Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd); Status OnColumnFamilyDrop(VersionEdit& edit, ColumnFamilyData** cfd); Status OnNonCfOperation(VersionEdit& edit, ColumnFamilyData** cfd); Status Initialize(); void CheckColumnFamilyId(const VersionEdit& edit, bool* cf_in_not_found, bool* cf_in_builders) const; virtual void CheckIterationResult(const log::Reader& reader, Status* s); ColumnFamilyData* CreateCfAndInit(const ColumnFamilyOptions& cf_options, const VersionEdit& edit); virtual ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit); virtual Status MaybeCreateVersion(const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version); Status LoadTables(ColumnFamilyData* cfd, bool prefetch_index_and_filter_in_cache, bool is_initial_load); const bool read_only_; const std::vector& column_families_; Status status_; VersionSet* version_set_; AtomicGroupReadBuffer read_buffer_; std::unordered_map builders_; std::unordered_map name_to_options_; std::unordered_map column_families_not_found_; VersionEditParams version_edit_params_; const bool track_missing_files_; std::unordered_map> cf_to_missing_files_; bool no_error_if_table_files_missing_; private: Status ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, const VersionEdit& edit); bool initialized_; }; // A class similar to its base class, i.e. VersionEditHandler. // VersionEditHandlerPointInTime restores the versions to the most recent point // in time such that at this point, the version does not have missing files. // // Not thread-safe, external synchronization is necessary if an object of // VersionEditHandlerPointInTime is shared by multiple threads. class VersionEditHandlerPointInTime : public VersionEditHandler { public: VersionEditHandlerPointInTime( bool read_only, const std::vector& column_families, VersionSet* version_set); ~VersionEditHandlerPointInTime() override; protected: void CheckIterationResult(const log::Reader& reader, Status* s) override; ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit) override; Status MaybeCreateVersion(const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) override; private: std::unordered_map versions_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_edit_test.cc000066400000000000000000000263571370372246700177070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/version_edit.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "util/coding.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { static void TestEncodeDecode(const VersionEdit& edit) { std::string encoded, encoded2; edit.EncodeTo(&encoded); VersionEdit parsed; Status s = parsed.DecodeFrom(encoded); ASSERT_TRUE(s.ok()) << s.ToString(); parsed.EncodeTo(&encoded2); ASSERT_EQ(encoded, encoded2); } class VersionEditTest : public testing::Test {}; TEST_F(VersionEditTest, EncodeDecode) { static const uint64_t kBig = 1ull << 50; static const uint32_t kBig32Bit = 1ull << 30; VersionEdit edit; for (int i = 0; i < 4; i++) { TestEncodeDecode(edit); edit.AddFile(3, kBig + 300 + i, kBig32Bit + 400 + i, 0, InternalKey("foo", kBig + 500 + i, kTypeValue), InternalKey("zoo", kBig + 600 + i, kTypeDeletion), kBig + 500 + i, kBig + 600 + i, false, kInvalidBlobFileNumber, 888, 678, "234", "crc32c"); edit.DeleteFile(4, kBig + 700 + i); } edit.SetComparatorName("foo"); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); TestEncodeDecode(edit); } TEST_F(VersionEditTest, EncodeDecodeNewFile4) { static const uint64_t kBig = 1ull << 50; VersionEdit edit; edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, kBig + 600, true, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue), InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502, kBig + 602, true, kInvalidBlobFileNumber, 666, 888, kUnknownFileChecksum, kUnknownFileChecksumFuncName); edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex), InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503, kBig + 603, true, 1001, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); ; edit.DeleteFile(4, 700); edit.SetComparatorName("foo"); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); TestEncodeDecode(edit); std::string encoded, encoded2; edit.EncodeTo(&encoded); VersionEdit parsed; Status s = parsed.DecodeFrom(encoded); ASSERT_TRUE(s.ok()) << s.ToString(); auto& new_files = parsed.GetNewFiles(); ASSERT_TRUE(new_files[0].second.marked_for_compaction); ASSERT_TRUE(!new_files[1].second.marked_for_compaction); ASSERT_TRUE(new_files[2].second.marked_for_compaction); ASSERT_TRUE(new_files[3].second.marked_for_compaction); ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); ASSERT_EQ(3u, new_files[1].second.fd.GetPathId()); ASSERT_EQ(0u, new_files[2].second.fd.GetPathId()); ASSERT_EQ(0u, new_files[3].second.fd.GetPathId()); ASSERT_EQ(kInvalidBlobFileNumber, new_files[0].second.oldest_blob_file_number); ASSERT_EQ(kInvalidBlobFileNumber, new_files[1].second.oldest_blob_file_number); ASSERT_EQ(kInvalidBlobFileNumber, new_files[2].second.oldest_blob_file_number); ASSERT_EQ(1001, new_files[3].second.oldest_blob_file_number); } TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { static const uint64_t kBig = 1ull << 50; VersionEdit edit; edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, kBig + 600, true, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, kInvalidBlobFileNumber, 686, 868, "234", "crc32c"); edit.DeleteFile(4, 700); edit.SetComparatorName("foo"); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); std::string encoded; // Call back function to add extra customized builds. bool first = true; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionEdit::EncodeTo:NewFile4:CustomizeFields", [&](void* arg) { std::string* str = reinterpret_cast(arg); PutVarint32(str, 33); const std::string str1 = "random_string"; PutLengthPrefixedSlice(str, str1); if (first) { first = false; PutVarint32(str, 22); const std::string str2 = "s"; PutLengthPrefixedSlice(str, str2); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); edit.EncodeTo(&encoded); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); VersionEdit parsed; Status s = parsed.DecodeFrom(encoded); ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(!first); auto& new_files = parsed.GetNewFiles(); ASSERT_TRUE(new_files[0].second.marked_for_compaction); ASSERT_TRUE(!new_files[1].second.marked_for_compaction); ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); ASSERT_EQ(3u, new_files[1].second.fd.GetPathId()); ASSERT_EQ(1u, parsed.GetDeletedFiles().size()); } TEST_F(VersionEditTest, NewFile4NotSupportedField) { static const uint64_t kBig = 1ull << 50; VersionEdit edit; edit.AddFile(3, 300, 3, 100, InternalKey("foo", kBig + 500, kTypeValue), InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, kBig + 600, true, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); edit.SetComparatorName("foo"); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); std::string encoded; // Call back function to add extra customized builds. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionEdit::EncodeTo:NewFile4:CustomizeFields", [&](void* arg) { std::string* str = reinterpret_cast(arg); const std::string str1 = "s"; PutLengthPrefixedSlice(str, str1); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); edit.EncodeTo(&encoded); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); VersionEdit parsed; Status s = parsed.DecodeFrom(encoded); ASSERT_NOK(s); } TEST_F(VersionEditTest, EncodeEmptyFile) { VersionEdit edit; edit.AddFile(0, 0, 0, 0, InternalKey(), InternalKey(), 0, 0, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); std::string buffer; ASSERT_TRUE(!edit.EncodeTo(&buffer)); } TEST_F(VersionEditTest, ColumnFamilyTest) { VersionEdit edit; edit.SetColumnFamily(2); edit.AddColumnFamily("column_family"); edit.SetMaxColumnFamily(5); TestEncodeDecode(edit); edit.Clear(); edit.SetColumnFamily(3); edit.DropColumnFamily(); TestEncodeDecode(edit); } TEST_F(VersionEditTest, MinLogNumberToKeep) { VersionEdit edit; edit.SetMinLogNumberToKeep(13); TestEncodeDecode(edit); edit.Clear(); edit.SetMinLogNumberToKeep(23); TestEncodeDecode(edit); } TEST_F(VersionEditTest, AtomicGroupTest) { VersionEdit edit; edit.MarkAtomicGroup(1); TestEncodeDecode(edit); } TEST_F(VersionEditTest, IgnorableField) { VersionEdit ve; std::string encoded; // Size of ignorable field is too large PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66); // This is a customized ignorable tag PutVarint32Varint64(&encoded, 0x2710 /* A field with kTagSafeIgnoreMask set */, 5 /* fieldlength 5 */); encoded += "abc"; // Only fills 3 bytes, ASSERT_NOK(ve.DecodeFrom(encoded)); encoded.clear(); // Error when seeing unidentified tag that is not ignorable PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66); // This is a customized ignorable tag PutVarint32Varint64(&encoded, 666 /* A field with kTagSafeIgnoreMask unset */, 3 /* fieldlength 3 */); encoded += "abc"; // Fill 3 bytes PutVarint32Varint64(&encoded, 3 /* next file number */, 88); ASSERT_NOK(ve.DecodeFrom(encoded)); // Safely ignore an identified but safely ignorable entry encoded.clear(); PutVarint32Varint64(&encoded, 2 /* kLogNumber */, 66); // This is a customized ignorable tag PutVarint32Varint64(&encoded, 0x2710 /* A field with kTagSafeIgnoreMask set */, 3 /* fieldlength 3 */); encoded += "abc"; // Fill 3 bytes PutVarint32Varint64(&encoded, 3 /* kNextFileNumber */, 88); ASSERT_OK(ve.DecodeFrom(encoded)); ASSERT_TRUE(ve.HasLogNumber()); ASSERT_TRUE(ve.HasNextFile()); ASSERT_EQ(66, ve.GetLogNumber()); ASSERT_EQ(88, ve.GetNextFile()); } TEST_F(VersionEditTest, DbId) { VersionEdit edit; edit.SetDBId("ab34-cd12-435f-er00"); TestEncodeDecode(edit); edit.Clear(); edit.SetDBId("34ba-cd12-435f-er01"); TestEncodeDecode(edit); } TEST_F(VersionEditTest, BlobFileAdditionAndGarbage) { VersionEdit edit; const std::string checksum_method_prefix = "Hash"; const std::string checksum_value_prefix = "Value"; for (uint64_t blob_file_number = 1; blob_file_number <= 10; ++blob_file_number) { const uint64_t total_blob_count = blob_file_number << 10; const uint64_t total_blob_bytes = blob_file_number << 20; std::string checksum_method(checksum_method_prefix); AppendNumberTo(&checksum_method, blob_file_number); std::string checksum_value(checksum_value_prefix); AppendNumberTo(&checksum_value, blob_file_number); edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); const uint64_t garbage_blob_count = total_blob_count >> 2; const uint64_t garbage_blob_bytes = total_blob_bytes >> 1; edit.AddBlobFileGarbage(blob_file_number, garbage_blob_count, garbage_blob_bytes); } TestEncodeDecode(edit); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/version_set.cc000066400000000000000000007236551370372246700165230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/version_set.h" #include #include #include #include #include #include #include #include #include #include #include "compaction/compaction.h" #include "db/internal_stats.h" #include "db/log_reader.h" #include "db/log_writer.h" #include "db/memtable.h" #include "db/merge_context.h" #include "db/merge_helper.h" #include "db/pinned_iterators_manager.h" #include "db/table_cache.h" #include "db/version_builder.h" #include "db/version_edit_handler.h" #include "file/filename.h" #include "file/random_access_file_reader.h" #include "file/read_write_util.h" #include "file/writable_file_writer.h" #include "monitoring/file_read_sample.h" #include "monitoring/perf_context_imp.h" #include "monitoring/persistent_stats_history.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" #include "rocksdb/write_buffer_manager.h" #include "table/format.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/merging_iterator.h" #include "table/meta_blocks.h" #include "table/multiget_context.h" #include "table/plain/plain_table_factory.h" #include "table/table_reader.h" #include "table/two_level_iterator.h" #include "test_util/sync_point.h" #include "util/cast_util.h" #include "util/coding.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/user_comparator_wrapper.h" namespace ROCKSDB_NAMESPACE { namespace { // Find File in LevelFilesBrief data structure // Within an index range defined by left and right int FindFileInRange(const InternalKeyComparator& icmp, const LevelFilesBrief& file_level, const Slice& key, uint32_t left, uint32_t right) { auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool { return icmp.InternalKeyComparator::Compare(f.largest_key, k) < 0; }; const auto &b = file_level.files; return static_cast(std::lower_bound(b + left, b + right, key, cmp) - b); } Status OverlapWithIterator(const Comparator* ucmp, const Slice& smallest_user_key, const Slice& largest_user_key, InternalIterator* iter, bool* overlap) { InternalKey range_start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek); iter->Seek(range_start.Encode()); if (!iter->status().ok()) { return iter->status(); } *overlap = false; if (iter->Valid()) { ParsedInternalKey seek_result; if (!ParseInternalKey(iter->key(), &seek_result)) { return Status::Corruption("DB have corrupted keys"); } if (ucmp->CompareWithoutTimestamp(seek_result.user_key, largest_user_key) <= 0) { *overlap = true; } } return iter->status(); } // Class to help choose the next file to search for the particular key. // Searches and returns files level by level. // We can search level-by-level since entries never hop across // levels. Therefore we are guaranteed that if we find data // in a smaller level, later levels are irrelevant (unless we // are MergeInProgress). class FilePicker { public: FilePicker(std::vector* files, const Slice& user_key, const Slice& ikey, autovector* file_levels, unsigned int num_levels, FileIndexer* file_indexer, const Comparator* user_comparator, const InternalKeyComparator* internal_comparator) : num_levels_(num_levels), curr_level_(static_cast(-1)), returned_file_level_(static_cast(-1)), hit_file_level_(static_cast(-1)), search_left_bound_(0), search_right_bound_(FileIndexer::kLevelMaxIndex), #ifndef NDEBUG files_(files), #endif level_files_brief_(file_levels), is_hit_file_last_in_level_(false), curr_file_level_(nullptr), user_key_(user_key), ikey_(ikey), file_indexer_(file_indexer), user_comparator_(user_comparator), internal_comparator_(internal_comparator) { #ifdef NDEBUG (void)files; #endif // Setup member variables to search first level. search_ended_ = !PrepareNextLevel(); if (!search_ended_) { // Prefetch Level 0 table data to avoid cache miss if possible. for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) { auto* r = (*level_files_brief_)[0].files[i].fd.table_reader; if (r) { r->Prepare(ikey); } } } } int GetCurrentLevel() const { return curr_level_; } FdWithKeyRange* GetNextFile() { while (!search_ended_) { // Loops over different levels. while (curr_index_in_curr_level_ < curr_file_level_->num_files) { // Loops over all files in current level. FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_]; hit_file_level_ = curr_level_; is_hit_file_last_in_level_ = curr_index_in_curr_level_ == curr_file_level_->num_files - 1; int cmp_largest = -1; // Do key range filtering of files or/and fractional cascading if: // (1) not all the files are in level 0, or // (2) there are more than 3 current level files // If there are only 3 or less current level files in the system, we skip // the key range filtering. In this case, more likely, the system is // highly tuned to minimize number of tables queried by each query, // so it is unlikely that key range filtering is more efficient than // querying the files. if (num_levels_ > 1 || curr_file_level_->num_files > 3) { // Check if key is within a file's range. If search left bound and // right bound point to the same find, we are sure key falls in // range. assert(curr_level_ == 0 || curr_index_in_curr_level_ == start_index_in_curr_level_ || user_comparator_->CompareWithoutTimestamp( user_key_, ExtractUserKey(f->smallest_key)) <= 0); int cmp_smallest = user_comparator_->CompareWithoutTimestamp( user_key_, ExtractUserKey(f->smallest_key)); if (cmp_smallest >= 0) { cmp_largest = user_comparator_->CompareWithoutTimestamp( user_key_, ExtractUserKey(f->largest_key)); } // Setup file search bound for the next level based on the // comparison results if (curr_level_ > 0) { file_indexer_->GetNextLevelIndex(curr_level_, curr_index_in_curr_level_, cmp_smallest, cmp_largest, &search_left_bound_, &search_right_bound_); } // Key falls out of current file's range if (cmp_smallest < 0 || cmp_largest > 0) { if (curr_level_ == 0) { ++curr_index_in_curr_level_; continue; } else { // Search next level. break; } } } #ifndef NDEBUG // Sanity check to make sure that the files are correctly sorted if (prev_file_) { if (curr_level_ != 0) { int comp_sign = internal_comparator_->Compare( prev_file_->largest_key, f->smallest_key); assert(comp_sign < 0); } else { // level == 0, the current file cannot be newer than the previous // one. Use compressed data structure, has no attribute seqNo assert(curr_index_in_curr_level_ > 0); assert(!NewestFirstBySeqNo(files_[0][curr_index_in_curr_level_], files_[0][curr_index_in_curr_level_-1])); } } prev_file_ = f; #endif returned_file_level_ = curr_level_; if (curr_level_ > 0 && cmp_largest < 0) { // No more files to search in this level. search_ended_ = !PrepareNextLevel(); } else { ++curr_index_in_curr_level_; } return f; } // Start searching next level. search_ended_ = !PrepareNextLevel(); } // Search ended. return nullptr; } // getter for current file level // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts unsigned int GetHitFileLevel() { return hit_file_level_; } // Returns true if the most recent "hit file" (i.e., one returned by // GetNextFile()) is at the last index in its level. bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; } private: unsigned int num_levels_; unsigned int curr_level_; unsigned int returned_file_level_; unsigned int hit_file_level_; int32_t search_left_bound_; int32_t search_right_bound_; #ifndef NDEBUG std::vector* files_; #endif autovector* level_files_brief_; bool search_ended_; bool is_hit_file_last_in_level_; LevelFilesBrief* curr_file_level_; unsigned int curr_index_in_curr_level_; unsigned int start_index_in_curr_level_; Slice user_key_; Slice ikey_; FileIndexer* file_indexer_; const Comparator* user_comparator_; const InternalKeyComparator* internal_comparator_; #ifndef NDEBUG FdWithKeyRange* prev_file_; #endif // Setup local variables to search next level. // Returns false if there are no more levels to search. bool PrepareNextLevel() { curr_level_++; while (curr_level_ < num_levels_) { curr_file_level_ = &(*level_files_brief_)[curr_level_]; if (curr_file_level_->num_files == 0) { // When current level is empty, the search bound generated from upper // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is // also empty. assert(search_left_bound_ == 0); assert(search_right_bound_ == -1 || search_right_bound_ == FileIndexer::kLevelMaxIndex); // Since current level is empty, it will need to search all files in // the next level search_left_bound_ = 0; search_right_bound_ = FileIndexer::kLevelMaxIndex; curr_level_++; continue; } // Some files may overlap each other. We find // all files that overlap user_key and process them in order from // newest to oldest. In the context of merge-operator, this can occur at // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes // are always compacted into a single entry). int32_t start_index; if (curr_level_ == 0) { // On Level-0, we read through all files to check for overlap. start_index = 0; } else { // On Level-n (n>=1), files are sorted. Binary search to find the // earliest file whose largest key >= ikey. Search left bound and // right bound are used to narrow the range. if (search_left_bound_ <= search_right_bound_) { if (search_right_bound_ == FileIndexer::kLevelMaxIndex) { search_right_bound_ = static_cast(curr_file_level_->num_files) - 1; } // `search_right_bound_` is an inclusive upper-bound, but since it was // determined based on user key, it is still possible the lookup key // falls to the right of `search_right_bound_`'s corresponding file. // So, pass a limit one higher, which allows us to detect this case. start_index = FindFileInRange(*internal_comparator_, *curr_file_level_, ikey_, static_cast(search_left_bound_), static_cast(search_right_bound_) + 1); if (start_index == search_right_bound_ + 1) { // `ikey_` comes after `search_right_bound_`. The lookup key does // not exist on this level, so let's skip this level and do a full // binary search on the next level. search_left_bound_ = 0; search_right_bound_ = FileIndexer::kLevelMaxIndex; curr_level_++; continue; } } else { // search_left_bound > search_right_bound, key does not exist in // this level. Since no comparison is done in this level, it will // need to search all files in the next level. search_left_bound_ = 0; search_right_bound_ = FileIndexer::kLevelMaxIndex; curr_level_++; continue; } } start_index_in_curr_level_ = start_index; curr_index_in_curr_level_ = start_index; #ifndef NDEBUG prev_file_ = nullptr; #endif return true; } // curr_level_ = num_levels_. So, no more levels to search. return false; } }; class FilePickerMultiGet { private: struct FilePickerContext; public: FilePickerMultiGet(MultiGetRange* range, autovector* file_levels, unsigned int num_levels, FileIndexer* file_indexer, const Comparator* user_comparator, const InternalKeyComparator* internal_comparator) : num_levels_(num_levels), curr_level_(static_cast(-1)), returned_file_level_(static_cast(-1)), hit_file_level_(static_cast(-1)), range_(range), batch_iter_(range->begin()), batch_iter_prev_(range->begin()), upper_key_(range->begin()), maybe_repeat_key_(false), current_level_range_(*range, range->begin(), range->end()), current_file_range_(*range, range->begin(), range->end()), level_files_brief_(file_levels), is_hit_file_last_in_level_(false), curr_file_level_(nullptr), file_indexer_(file_indexer), user_comparator_(user_comparator), internal_comparator_(internal_comparator) { for (auto iter = range_->begin(); iter != range_->end(); ++iter) { fp_ctx_array_[iter.index()] = FilePickerContext(0, FileIndexer::kLevelMaxIndex); } // Setup member variables to search first level. search_ended_ = !PrepareNextLevel(); if (!search_ended_) { // REVISIT // Prefetch Level 0 table data to avoid cache miss if possible. // As of now, only PlainTableReader and CuckooTableReader do any // prefetching. This may not be necessary anymore once we implement // batching in those table readers for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) { auto* r = (*level_files_brief_)[0].files[i].fd.table_reader; if (r) { for (auto iter = range_->begin(); iter != range_->end(); ++iter) { r->Prepare(iter->ikey); } } } } } int GetCurrentLevel() const { return curr_level_; } // Iterates through files in the current level until it finds a file that // contains atleast one key from the MultiGet batch bool GetNextFileInLevelWithKeys(MultiGetRange* next_file_range, size_t* file_index, FdWithKeyRange** fd, bool* is_last_key_in_file) { size_t curr_file_index = *file_index; FdWithKeyRange* f = nullptr; bool file_hit = false; int cmp_largest = -1; if (curr_file_index >= curr_file_level_->num_files) { // In the unlikely case the next key is a duplicate of the current key, // and the current key is the last in the level and the internal key // was not found, we need to skip lookup for the remaining keys and // reset the search bounds if (batch_iter_ != current_level_range_.end()) { ++batch_iter_; for (; batch_iter_ != current_level_range_.end(); ++batch_iter_) { struct FilePickerContext& fp_ctx = fp_ctx_array_[batch_iter_.index()]; fp_ctx.search_left_bound = 0; fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex; } } return false; } // Loops over keys in the MultiGet batch until it finds a file with // atleast one of the keys. Then it keeps moving forward until the // last key in the batch that falls in that file while (batch_iter_ != current_level_range_.end() && (fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level == curr_file_index || !file_hit)) { struct FilePickerContext& fp_ctx = fp_ctx_array_[batch_iter_.index()]; f = &curr_file_level_->files[fp_ctx.curr_index_in_curr_level]; Slice& user_key = batch_iter_->ukey; // Do key range filtering of files or/and fractional cascading if: // (1) not all the files are in level 0, or // (2) there are more than 3 current level files // If there are only 3 or less current level files in the system, we // skip the key range filtering. In this case, more likely, the system // is highly tuned to minimize number of tables queried by each query, // so it is unlikely that key range filtering is more efficient than // querying the files. if (num_levels_ > 1 || curr_file_level_->num_files > 3) { // Check if key is within a file's range. If search left bound and // right bound point to the same find, we are sure key falls in // range. assert(curr_level_ == 0 || fp_ctx.curr_index_in_curr_level == fp_ctx.start_index_in_curr_level || user_comparator_->Compare(user_key, ExtractUserKey(f->smallest_key)) <= 0); int cmp_smallest = user_comparator_->Compare( user_key, ExtractUserKey(f->smallest_key)); if (cmp_smallest >= 0) { cmp_largest = user_comparator_->Compare( user_key, ExtractUserKey(f->largest_key)); } else { cmp_largest = -1; } // Setup file search bound for the next level based on the // comparison results if (curr_level_ > 0) { file_indexer_->GetNextLevelIndex( curr_level_, fp_ctx.curr_index_in_curr_level, cmp_smallest, cmp_largest, &fp_ctx.search_left_bound, &fp_ctx.search_right_bound); } // Key falls out of current file's range if (cmp_smallest < 0 || cmp_largest > 0) { next_file_range->SkipKey(batch_iter_); } else { file_hit = true; } } else { file_hit = true; } if (cmp_largest == 0) { // cmp_largest is 0, which means the next key will not be in this // file, so stop looking further. However, its possible there are // duplicates in the batch, so find the upper bound for the batch // in this file (upper_key_) by skipping past the duplicates. We // leave batch_iter_ as is since we may have to pick up from there // for the next file, if this file has a merge value rather than // final value upper_key_ = batch_iter_; ++upper_key_; while (upper_key_ != current_level_range_.end() && user_comparator_->Compare(batch_iter_->ukey, upper_key_->ukey) == 0) { ++upper_key_; } break; } else { if (curr_level_ == 0) { // We need to look through all files in level 0 ++fp_ctx.curr_index_in_curr_level; } ++batch_iter_; } if (!file_hit) { curr_file_index = (batch_iter_ != current_level_range_.end()) ? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level : curr_file_level_->num_files; } } *fd = f; *file_index = curr_file_index; *is_last_key_in_file = cmp_largest == 0; if (!*is_last_key_in_file) { // If the largest key in the batch overlapping the file is not the // largest key in the file, upper_ley_ would not have been updated so // update it here upper_key_ = batch_iter_; } return file_hit; } FdWithKeyRange* GetNextFile() { while (!search_ended_) { // Start searching next level. if (batch_iter_ == current_level_range_.end()) { search_ended_ = !PrepareNextLevel(); continue; } else { if (maybe_repeat_key_) { maybe_repeat_key_ = false; // Check if we found the final value for the last key in the // previous lookup range. If we did, then there's no need to look // any further for that key, so advance batch_iter_. Else, keep // batch_iter_ positioned on that key so we look it up again in // the next file // For L0, always advance the key because we will look in the next // file regardless for all keys not found yet if (current_level_range_.CheckKeyDone(batch_iter_) || curr_level_ == 0) { batch_iter_ = upper_key_; } } // batch_iter_prev_ will become the start key for the next file // lookup batch_iter_prev_ = batch_iter_; } MultiGetRange next_file_range(current_level_range_, batch_iter_prev_, current_level_range_.end()); size_t curr_file_index = (batch_iter_ != current_level_range_.end()) ? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level : curr_file_level_->num_files; FdWithKeyRange* f; bool is_last_key_in_file; if (!GetNextFileInLevelWithKeys(&next_file_range, &curr_file_index, &f, &is_last_key_in_file)) { search_ended_ = !PrepareNextLevel(); } else { if (is_last_key_in_file) { // Since cmp_largest is 0, batch_iter_ still points to the last key // that falls in this file, instead of the next one. Increment // the file index for all keys between batch_iter_ and upper_key_ auto tmp_iter = batch_iter_; while (tmp_iter != upper_key_) { ++(fp_ctx_array_[tmp_iter.index()].curr_index_in_curr_level); ++tmp_iter; } maybe_repeat_key_ = true; } // Set the range for this file current_file_range_ = MultiGetRange(next_file_range, batch_iter_prev_, upper_key_); returned_file_level_ = curr_level_; hit_file_level_ = curr_level_; is_hit_file_last_in_level_ = curr_file_index == curr_file_level_->num_files - 1; return f; } } // Search ended return nullptr; } // getter for current file level // for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts unsigned int GetHitFileLevel() { return hit_file_level_; } // Returns true if the most recent "hit file" (i.e., one returned by // GetNextFile()) is at the last index in its level. bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; } const MultiGetRange& CurrentFileRange() { return current_file_range_; } private: unsigned int num_levels_; unsigned int curr_level_; unsigned int returned_file_level_; unsigned int hit_file_level_; struct FilePickerContext { int32_t search_left_bound; int32_t search_right_bound; unsigned int curr_index_in_curr_level; unsigned int start_index_in_curr_level; FilePickerContext(int32_t left, int32_t right) : search_left_bound(left), search_right_bound(right), curr_index_in_curr_level(0), start_index_in_curr_level(0) {} FilePickerContext() = default; }; std::array fp_ctx_array_; MultiGetRange* range_; // Iterator to iterate through the keys in a MultiGet batch, that gets reset // at the beginning of each level. Each call to GetNextFile() will position // batch_iter_ at or right after the last key that was found in the returned // SST file MultiGetRange::Iterator batch_iter_; // An iterator that records the previous position of batch_iter_, i.e last // key found in the previous SST file, in order to serve as the start of // the batch key range for the next SST file MultiGetRange::Iterator batch_iter_prev_; MultiGetRange::Iterator upper_key_; bool maybe_repeat_key_; MultiGetRange current_level_range_; MultiGetRange current_file_range_; autovector* level_files_brief_; bool search_ended_; bool is_hit_file_last_in_level_; LevelFilesBrief* curr_file_level_; FileIndexer* file_indexer_; const Comparator* user_comparator_; const InternalKeyComparator* internal_comparator_; // Setup local variables to search next level. // Returns false if there are no more levels to search. bool PrepareNextLevel() { if (curr_level_ == 0) { MultiGetRange::Iterator mget_iter = current_level_range_.begin(); if (fp_ctx_array_[mget_iter.index()].curr_index_in_curr_level < curr_file_level_->num_files) { batch_iter_prev_ = current_level_range_.begin(); upper_key_ = batch_iter_ = current_level_range_.begin(); return true; } } curr_level_++; // Reset key range to saved value while (curr_level_ < num_levels_) { bool level_contains_keys = false; curr_file_level_ = &(*level_files_brief_)[curr_level_]; if (curr_file_level_->num_files == 0) { // When current level is empty, the search bound generated from upper // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is // also empty. for (auto mget_iter = current_level_range_.begin(); mget_iter != current_level_range_.end(); ++mget_iter) { struct FilePickerContext& fp_ctx = fp_ctx_array_[mget_iter.index()]; assert(fp_ctx.search_left_bound == 0); assert(fp_ctx.search_right_bound == -1 || fp_ctx.search_right_bound == FileIndexer::kLevelMaxIndex); // Since current level is empty, it will need to search all files in // the next level fp_ctx.search_left_bound = 0; fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex; } // Skip all subsequent empty levels do { ++curr_level_; } while ((curr_level_ < num_levels_) && (*level_files_brief_)[curr_level_].num_files == 0); continue; } // Some files may overlap each other. We find // all files that overlap user_key and process them in order from // newest to oldest. In the context of merge-operator, this can occur at // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes // are always compacted into a single entry). int32_t start_index = -1; current_level_range_ = MultiGetRange(*range_, range_->begin(), range_->end()); for (auto mget_iter = current_level_range_.begin(); mget_iter != current_level_range_.end(); ++mget_iter) { struct FilePickerContext& fp_ctx = fp_ctx_array_[mget_iter.index()]; if (curr_level_ == 0) { // On Level-0, we read through all files to check for overlap. start_index = 0; level_contains_keys = true; } else { // On Level-n (n>=1), files are sorted. Binary search to find the // earliest file whose largest key >= ikey. Search left bound and // right bound are used to narrow the range. if (fp_ctx.search_left_bound <= fp_ctx.search_right_bound) { if (fp_ctx.search_right_bound == FileIndexer::kLevelMaxIndex) { fp_ctx.search_right_bound = static_cast(curr_file_level_->num_files) - 1; } // `search_right_bound_` is an inclusive upper-bound, but since it // was determined based on user key, it is still possible the lookup // key falls to the right of `search_right_bound_`'s corresponding // file. So, pass a limit one higher, which allows us to detect this // case. Slice& ikey = mget_iter->ikey; start_index = FindFileInRange( *internal_comparator_, *curr_file_level_, ikey, static_cast(fp_ctx.search_left_bound), static_cast(fp_ctx.search_right_bound) + 1); if (start_index == fp_ctx.search_right_bound + 1) { // `ikey_` comes after `search_right_bound_`. The lookup key does // not exist on this level, so let's skip this level and do a full // binary search on the next level. fp_ctx.search_left_bound = 0; fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex; current_level_range_.SkipKey(mget_iter); continue; } else { level_contains_keys = true; } } else { // search_left_bound > search_right_bound, key does not exist in // this level. Since no comparison is done in this level, it will // need to search all files in the next level. fp_ctx.search_left_bound = 0; fp_ctx.search_right_bound = FileIndexer::kLevelMaxIndex; current_level_range_.SkipKey(mget_iter); continue; } } fp_ctx.start_index_in_curr_level = start_index; fp_ctx.curr_index_in_curr_level = start_index; } if (level_contains_keys) { batch_iter_prev_ = current_level_range_.begin(); upper_key_ = batch_iter_ = current_level_range_.begin(); return true; } curr_level_++; } // curr_level_ = num_levels_. So, no more levels to search. return false; } }; } // anonymous namespace VersionStorageInfo::~VersionStorageInfo() { delete[] files_; } Version::~Version() { assert(refs_ == 0); // Remove from linked list prev_->next_ = next_; next_->prev_ = prev_; // Drop references to files for (int level = 0; level < storage_info_.num_levels_; level++) { for (size_t i = 0; i < storage_info_.files_[level].size(); i++) { FileMetaData* f = storage_info_.files_[level][i]; assert(f->refs > 0); f->refs--; if (f->refs <= 0) { assert(cfd_ != nullptr); uint32_t path_id = f->fd.GetPathId(); assert(path_id < cfd_->ioptions()->cf_paths.size()); vset_->obsolete_files_.push_back( ObsoleteFileInfo(f, cfd_->ioptions()->cf_paths[path_id].path)); } } } } int FindFile(const InternalKeyComparator& icmp, const LevelFilesBrief& file_level, const Slice& key) { return FindFileInRange(icmp, file_level, key, 0, static_cast(file_level.num_files)); } void DoGenerateLevelFilesBrief(LevelFilesBrief* file_level, const std::vector& files, Arena* arena) { assert(file_level); assert(arena); size_t num = files.size(); file_level->num_files = num; char* mem = arena->AllocateAligned(num * sizeof(FdWithKeyRange)); file_level->files = new (mem)FdWithKeyRange[num]; for (size_t i = 0; i < num; i++) { Slice smallest_key = files[i]->smallest.Encode(); Slice largest_key = files[i]->largest.Encode(); // Copy key slice to sequential memory size_t smallest_size = smallest_key.size(); size_t largest_size = largest_key.size(); mem = arena->AllocateAligned(smallest_size + largest_size); memcpy(mem, smallest_key.data(), smallest_size); memcpy(mem + smallest_size, largest_key.data(), largest_size); FdWithKeyRange& f = file_level->files[i]; f.fd = files[i]->fd; f.file_metadata = files[i]; f.smallest_key = Slice(mem, smallest_size); f.largest_key = Slice(mem + smallest_size, largest_size); } } static bool AfterFile(const Comparator* ucmp, const Slice* user_key, const FdWithKeyRange* f) { // nullptr user_key occurs before all keys and is therefore never after *f return (user_key != nullptr && ucmp->CompareWithoutTimestamp(*user_key, ExtractUserKey(f->largest_key)) > 0); } static bool BeforeFile(const Comparator* ucmp, const Slice* user_key, const FdWithKeyRange* f) { // nullptr user_key occurs after all keys and is therefore never before *f return (user_key != nullptr && ucmp->CompareWithoutTimestamp(*user_key, ExtractUserKey(f->smallest_key)) < 0); } bool SomeFileOverlapsRange( const InternalKeyComparator& icmp, bool disjoint_sorted_files, const LevelFilesBrief& file_level, const Slice* smallest_user_key, const Slice* largest_user_key) { const Comparator* ucmp = icmp.user_comparator(); if (!disjoint_sorted_files) { // Need to check against all files for (size_t i = 0; i < file_level.num_files; i++) { const FdWithKeyRange* f = &(file_level.files[i]); if (AfterFile(ucmp, smallest_user_key, f) || BeforeFile(ucmp, largest_user_key, f)) { // No overlap } else { return true; // Overlap } } return false; } // Binary search over file list uint32_t index = 0; if (smallest_user_key != nullptr) { // Find the leftmost possible internal key for smallest_user_key InternalKey small; small.SetMinPossibleForUserKey(*smallest_user_key); index = FindFile(icmp, file_level, small.Encode()); } if (index >= file_level.num_files) { // beginning of range is after all files, so no overlap. return false; } return !BeforeFile(ucmp, largest_user_key, &file_level.files[index]); } namespace { class LevelIterator final : public InternalIterator { public: LevelIterator(TableCache* table_cache, const ReadOptions& read_options, const FileOptions& file_options, const InternalKeyComparator& icomparator, const LevelFilesBrief* flevel, const SliceTransform* prefix_extractor, bool should_sample, HistogramImpl* file_read_hist, TableReaderCaller caller, bool skip_filters, int level, RangeDelAggregator* range_del_agg, const std::vector* compaction_boundaries = nullptr, bool allow_unprepared_value = false) : table_cache_(table_cache), read_options_(read_options), file_options_(file_options), icomparator_(icomparator), user_comparator_(icomparator.user_comparator()), flevel_(flevel), prefix_extractor_(prefix_extractor), file_read_hist_(file_read_hist), should_sample_(should_sample), caller_(caller), skip_filters_(skip_filters), allow_unprepared_value_(allow_unprepared_value), file_index_(flevel_->num_files), level_(level), range_del_agg_(range_del_agg), pinned_iters_mgr_(nullptr), compaction_boundaries_(compaction_boundaries) { // Empty level is not supported. assert(flevel_ != nullptr && flevel_->num_files > 0); } ~LevelIterator() override { delete file_iter_.Set(nullptr); } void Seek(const Slice& target) override; void SeekForPrev(const Slice& target) override; void SeekToFirst() override; void SeekToLast() override; void Next() final override; bool NextAndGetResult(IterateResult* result) override; void Prev() override; bool Valid() const override { return file_iter_.Valid(); } Slice key() const override { assert(Valid()); return file_iter_.key(); } Slice value() const override { assert(Valid()); return file_iter_.value(); } Status status() const override { return file_iter_.iter() ? file_iter_.status() : Status::OK(); } bool PrepareValue() override { return file_iter_.PrepareValue(); } inline bool MayBeOutOfLowerBound() override { assert(Valid()); return may_be_out_of_lower_bound_ && file_iter_.MayBeOutOfLowerBound(); } inline bool MayBeOutOfUpperBound() override { assert(Valid()); return file_iter_.MayBeOutOfUpperBound(); } void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { pinned_iters_mgr_ = pinned_iters_mgr; if (file_iter_.iter()) { file_iter_.SetPinnedItersMgr(pinned_iters_mgr); } } bool IsKeyPinned() const override { return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && file_iter_.iter() && file_iter_.IsKeyPinned(); } bool IsValuePinned() const override { return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && file_iter_.iter() && file_iter_.IsValuePinned(); } private: // Return true if at least one invalid file is seen and skipped. bool SkipEmptyFileForward(); void SkipEmptyFileBackward(); void SetFileIterator(InternalIterator* iter); void InitFileIterator(size_t new_file_index); // Called by both of Next() and NextAndGetResult(). Force inline. void NextImpl() { assert(Valid()); file_iter_.Next(); SkipEmptyFileForward(); } const Slice& file_smallest_key(size_t file_index) { assert(file_index < flevel_->num_files); return flevel_->files[file_index].smallest_key; } bool KeyReachedUpperBound(const Slice& internal_key) { return read_options_.iterate_upper_bound != nullptr && user_comparator_.CompareWithoutTimestamp( ExtractUserKey(internal_key), /*a_has_ts=*/true, *read_options_.iterate_upper_bound, /*b_has_ts=*/false) >= 0; } InternalIterator* NewFileIterator() { assert(file_index_ < flevel_->num_files); auto file_meta = flevel_->files[file_index_]; if (should_sample_) { sample_file_read_inc(file_meta.file_metadata); } const InternalKey* smallest_compaction_key = nullptr; const InternalKey* largest_compaction_key = nullptr; if (compaction_boundaries_ != nullptr) { smallest_compaction_key = (*compaction_boundaries_)[file_index_].smallest; largest_compaction_key = (*compaction_boundaries_)[file_index_].largest; } CheckMayBeOutOfLowerBound(); return table_cache_->NewIterator( read_options_, file_options_, icomparator_, *file_meta.file_metadata, range_del_agg_, prefix_extractor_, nullptr /* don't need reference to table */, file_read_hist_, caller_, /*arena=*/nullptr, skip_filters_, level_, /*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key, largest_compaction_key, allow_unprepared_value_); } // Check if current file being fully within iterate_lower_bound. // // Note MyRocks may update iterate bounds between seek. To workaround it, // we need to check and update may_be_out_of_lower_bound_ accordingly. void CheckMayBeOutOfLowerBound() { if (read_options_.iterate_lower_bound != nullptr && file_index_ < flevel_->num_files) { may_be_out_of_lower_bound_ = user_comparator_.CompareWithoutTimestamp( ExtractUserKey(file_smallest_key(file_index_)), /*a_has_ts=*/true, *read_options_.iterate_lower_bound, /*b_has_ts=*/false) < 0; } } TableCache* table_cache_; const ReadOptions read_options_; const FileOptions& file_options_; const InternalKeyComparator& icomparator_; const UserComparatorWrapper user_comparator_; const LevelFilesBrief* flevel_; mutable FileDescriptor current_value_; // `prefix_extractor_` may be non-null even for total order seek. Checking // this variable is not the right way to identify whether prefix iterator // is used. const SliceTransform* prefix_extractor_; HistogramImpl* file_read_hist_; bool should_sample_; TableReaderCaller caller_; bool skip_filters_; bool allow_unprepared_value_; bool may_be_out_of_lower_bound_ = true; size_t file_index_; int level_; RangeDelAggregator* range_del_agg_; IteratorWrapper file_iter_; // May be nullptr PinnedIteratorsManager* pinned_iters_mgr_; // To be propagated to RangeDelAggregator in order to safely truncate range // tombstones. const std::vector* compaction_boundaries_; }; void LevelIterator::Seek(const Slice& target) { // Check whether the seek key fall under the same file bool need_to_reseek = true; if (file_iter_.iter() != nullptr && file_index_ < flevel_->num_files) { const FdWithKeyRange& cur_file = flevel_->files[file_index_]; if (icomparator_.InternalKeyComparator::Compare( target, cur_file.largest_key) <= 0 && icomparator_.InternalKeyComparator::Compare( target, cur_file.smallest_key) >= 0) { need_to_reseek = false; assert(static_cast(FindFile(icomparator_, *flevel_, target)) == file_index_); } } if (need_to_reseek) { TEST_SYNC_POINT("LevelIterator::Seek:BeforeFindFile"); size_t new_file_index = FindFile(icomparator_, *flevel_, target); InitFileIterator(new_file_index); } if (file_iter_.iter() != nullptr) { file_iter_.Seek(target); } if (SkipEmptyFileForward() && prefix_extractor_ != nullptr && !read_options_.total_order_seek && !read_options_.auto_prefix_mode && file_iter_.iter() != nullptr && file_iter_.Valid()) { // We've skipped the file we initially positioned to. In the prefix // seek case, it is likely that the file is skipped because of // prefix bloom or hash, where more keys are skipped. We then check // the current key and invalidate the iterator if the prefix is // already passed. // When doing prefix iterator seek, when keys for one prefix have // been exhausted, it can jump to any key that is larger. Here we are // enforcing a stricter contract than that, in order to make it easier for // higher layers (merging and DB iterator) to reason the correctness: // 1. Within the prefix, the result should be accurate. // 2. If keys for the prefix is exhausted, it is either positioned to the // next key after the prefix, or make the iterator invalid. // A side benefit will be that it invalidates the iterator earlier so that // the upper level merging iterator can merge fewer child iterators. Slice target_user_key = ExtractUserKey(target); Slice file_user_key = ExtractUserKey(file_iter_.key()); if (prefix_extractor_->InDomain(target_user_key) && (!prefix_extractor_->InDomain(file_user_key) || user_comparator_.Compare( prefix_extractor_->Transform(target_user_key), prefix_extractor_->Transform(file_user_key)) != 0)) { SetFileIterator(nullptr); } } CheckMayBeOutOfLowerBound(); } void LevelIterator::SeekForPrev(const Slice& target) { size_t new_file_index = FindFile(icomparator_, *flevel_, target); if (new_file_index >= flevel_->num_files) { new_file_index = flevel_->num_files - 1; } InitFileIterator(new_file_index); if (file_iter_.iter() != nullptr) { file_iter_.SeekForPrev(target); SkipEmptyFileBackward(); } CheckMayBeOutOfLowerBound(); } void LevelIterator::SeekToFirst() { InitFileIterator(0); if (file_iter_.iter() != nullptr) { file_iter_.SeekToFirst(); } SkipEmptyFileForward(); CheckMayBeOutOfLowerBound(); } void LevelIterator::SeekToLast() { InitFileIterator(flevel_->num_files - 1); if (file_iter_.iter() != nullptr) { file_iter_.SeekToLast(); } SkipEmptyFileBackward(); CheckMayBeOutOfLowerBound(); } void LevelIterator::Next() { NextImpl(); } bool LevelIterator::NextAndGetResult(IterateResult* result) { NextImpl(); bool is_valid = Valid(); if (is_valid) { result->key = key(); result->may_be_out_of_upper_bound = MayBeOutOfUpperBound(); result->value_prepared = !allow_unprepared_value_; } return is_valid; } void LevelIterator::Prev() { assert(Valid()); file_iter_.Prev(); SkipEmptyFileBackward(); } bool LevelIterator::SkipEmptyFileForward() { bool seen_empty_file = false; while (file_iter_.iter() == nullptr || (!file_iter_.Valid() && file_iter_.status().ok() && !file_iter_.iter()->IsOutOfBound())) { seen_empty_file = true; // Move to next file if (file_index_ >= flevel_->num_files - 1) { // Already at the last file SetFileIterator(nullptr); break; } if (KeyReachedUpperBound(file_smallest_key(file_index_ + 1))) { SetFileIterator(nullptr); break; } InitFileIterator(file_index_ + 1); if (file_iter_.iter() != nullptr) { file_iter_.SeekToFirst(); } } return seen_empty_file; } void LevelIterator::SkipEmptyFileBackward() { while (file_iter_.iter() == nullptr || (!file_iter_.Valid() && file_iter_.status().ok())) { // Move to previous file if (file_index_ == 0) { // Already the first file SetFileIterator(nullptr); return; } InitFileIterator(file_index_ - 1); if (file_iter_.iter() != nullptr) { file_iter_.SeekToLast(); } } } void LevelIterator::SetFileIterator(InternalIterator* iter) { if (pinned_iters_mgr_ && iter) { iter->SetPinnedItersMgr(pinned_iters_mgr_); } InternalIterator* old_iter = file_iter_.Set(iter); if (pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled()) { pinned_iters_mgr_->PinIterator(old_iter); } else { delete old_iter; } } void LevelIterator::InitFileIterator(size_t new_file_index) { if (new_file_index >= flevel_->num_files) { file_index_ = new_file_index; SetFileIterator(nullptr); return; } else { // If the file iterator shows incomplete, we try it again if users seek // to the same file, as this time we may go to a different data block // which is cached in block cache. // if (file_iter_.iter() != nullptr && !file_iter_.status().IsIncomplete() && new_file_index == file_index_) { // file_iter_ is already constructed with this iterator, so // no need to change anything } else { file_index_ = new_file_index; InternalIterator* iter = NewFileIterator(); SetFileIterator(iter); } } } } // anonymous namespace Status Version::GetTableProperties(std::shared_ptr* tp, const FileMetaData* file_meta, const std::string* fname) const { auto table_cache = cfd_->table_cache(); auto ioptions = cfd_->ioptions(); Status s = table_cache->GetTableProperties( file_options_, cfd_->internal_comparator(), file_meta->fd, tp, mutable_cf_options_.prefix_extractor.get(), true /* no io */); if (s.ok()) { return s; } // We only ignore error type `Incomplete` since it's by design that we // disallow table when it's not in table cache. if (!s.IsIncomplete()) { return s; } // 2. Table is not present in table cache, we'll read the table properties // directly from the properties block in the file. std::unique_ptr file; std::string file_name; if (fname != nullptr) { file_name = *fname; } else { file_name = TableFileName(ioptions->cf_paths, file_meta->fd.GetNumber(), file_meta->fd.GetPathId()); } s = ioptions->fs->NewRandomAccessFile(file_name, file_options_, &file, nullptr); if (!s.ok()) { return s; } TableProperties* raw_table_properties; // By setting the magic number to kInvalidTableMagicNumber, we can by // pass the magic number check in the footer. std::unique_ptr file_reader( new RandomAccessFileReader( std::move(file), file_name, nullptr /* env */, nullptr /* stats */, 0 /* hist_type */, nullptr /* file_read_hist */, nullptr /* rate_limiter */, ioptions->listeners)); s = ReadTableProperties( file_reader.get(), file_meta->fd.GetFileSize(), Footer::kInvalidTableMagicNumber /* table's magic number */, *ioptions, &raw_table_properties, false /* compression_type_missing */); if (!s.ok()) { return s; } RecordTick(ioptions->statistics, NUMBER_DIRECT_LOAD_TABLE_PROPERTIES); *tp = std::shared_ptr(raw_table_properties); return s; } Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) { Status s; for (int level = 0; level < storage_info_.num_levels_; level++) { s = GetPropertiesOfAllTables(props, level); if (!s.ok()) { return s; } } return Status::OK(); } Status Version::TablesRangeTombstoneSummary(int max_entries_to_print, std::string* out_str) { if (max_entries_to_print <= 0) { return Status::OK(); } int num_entries_left = max_entries_to_print; std::stringstream ss; for (int level = 0; level < storage_info_.num_levels_; level++) { for (const auto& file_meta : storage_info_.files_[level]) { auto fname = TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(), file_meta->fd.GetPathId()); ss << "=== file : " << fname << " ===\n"; TableCache* table_cache = cfd_->table_cache(); std::unique_ptr tombstone_iter; Status s = table_cache->GetRangeTombstoneIterator( ReadOptions(), cfd_->internal_comparator(), *file_meta, &tombstone_iter); if (!s.ok()) { return s; } if (tombstone_iter) { tombstone_iter->SeekToFirst(); while (tombstone_iter->Valid() && num_entries_left > 0) { ss << "start: " << tombstone_iter->start_key().ToString(true) << " end: " << tombstone_iter->end_key().ToString(true) << " seq: " << tombstone_iter->seq() << '\n'; tombstone_iter->Next(); num_entries_left--; } if (num_entries_left <= 0) { break; } } } if (num_entries_left <= 0) { break; } } assert(num_entries_left >= 0); if (num_entries_left <= 0) { ss << "(results may not be complete)\n"; } *out_str = ss.str(); return Status::OK(); } Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props, int level) { for (const auto& file_meta : storage_info_.files_[level]) { auto fname = TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(), file_meta->fd.GetPathId()); // 1. If the table is already present in table cache, load table // properties from there. std::shared_ptr table_properties; Status s = GetTableProperties(&table_properties, file_meta, &fname); if (s.ok()) { props->insert({fname, table_properties}); } else { return s; } } return Status::OK(); } Status Version::GetPropertiesOfTablesInRange( const Range* range, std::size_t n, TablePropertiesCollection* props) const { for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) { for (decltype(n) i = 0; i < n; i++) { // Convert user_key into a corresponding internal key. InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); std::vector files; storage_info_.GetOverlappingInputs(level, &k1, &k2, &files, -1, nullptr, false); for (const auto& file_meta : files) { auto fname = TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(), file_meta->fd.GetPathId()); if (props->count(fname) == 0) { // 1. If the table is already present in table cache, load table // properties from there. std::shared_ptr table_properties; Status s = GetTableProperties(&table_properties, file_meta, &fname); if (s.ok()) { props->insert({fname, table_properties}); } else { return s; } } } } } return Status::OK(); } Status Version::GetAggregatedTableProperties( std::shared_ptr* tp, int level) { TablePropertiesCollection props; Status s; if (level < 0) { s = GetPropertiesOfAllTables(&props); } else { s = GetPropertiesOfAllTables(&props, level); } if (!s.ok()) { return s; } auto* new_tp = new TableProperties(); for (const auto& item : props) { new_tp->Add(*item.second); } tp->reset(new_tp); return Status::OK(); } size_t Version::GetMemoryUsageByTableReaders() { size_t total_usage = 0; for (auto& file_level : storage_info_.level_files_brief_) { for (size_t i = 0; i < file_level.num_files; i++) { total_usage += cfd_->table_cache()->GetMemoryUsageByTableReader( file_options_, cfd_->internal_comparator(), file_level.files[i].fd, mutable_cf_options_.prefix_extractor.get()); } } return total_usage; } void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { assert(cf_meta); assert(cfd_); cf_meta->name = cfd_->GetName(); cf_meta->size = 0; cf_meta->file_count = 0; cf_meta->levels.clear(); auto* ioptions = cfd_->ioptions(); auto* vstorage = storage_info(); for (int level = 0; level < cfd_->NumberLevels(); level++) { uint64_t level_size = 0; cf_meta->file_count += vstorage->LevelFiles(level).size(); std::vector files; for (const auto& file : vstorage->LevelFiles(level)) { uint32_t path_id = file->fd.GetPathId(); std::string file_path; if (path_id < ioptions->cf_paths.size()) { file_path = ioptions->cf_paths[path_id].path; } else { assert(!ioptions->cf_paths.empty()); file_path = ioptions->cf_paths.back().path; } const uint64_t file_number = file->fd.GetNumber(); files.emplace_back(SstFileMetaData{ MakeTableFileName("", file_number), file_number, file_path, static_cast(file->fd.GetFileSize()), file->fd.smallest_seqno, file->fd.largest_seqno, file->smallest.user_key().ToString(), file->largest.user_key().ToString(), file->stats.num_reads_sampled.load(std::memory_order_relaxed), file->being_compacted, file->oldest_blob_file_number, file->TryGetOldestAncesterTime(), file->TryGetFileCreationTime(), file->file_checksum, file->file_checksum_func_name}); files.back().num_entries = file->num_entries; files.back().num_deletions = file->num_deletions; level_size += file->fd.GetFileSize(); } cf_meta->levels.emplace_back( level, level_size, std::move(files)); cf_meta->size += level_size; } } uint64_t Version::GetSstFilesSize() { uint64_t sst_files_size = 0; for (int level = 0; level < storage_info_.num_levels_; level++) { for (const auto& file_meta : storage_info_.LevelFiles(level)) { sst_files_size += file_meta->fd.GetFileSize(); } } return sst_files_size; } void Version::GetCreationTimeOfOldestFile(uint64_t* creation_time) { uint64_t oldest_time = port::kMaxUint64; for (int level = 0; level < storage_info_.num_non_empty_levels_; level++) { for (FileMetaData* meta : storage_info_.LevelFiles(level)) { assert(meta->fd.table_reader != nullptr); uint64_t file_creation_time = meta->TryGetFileCreationTime(); if (file_creation_time == kUnknownFileCreationTime) { *creation_time = 0; return; } if (file_creation_time < oldest_time) { oldest_time = file_creation_time; } } } *creation_time = oldest_time; } uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const { // Estimation will be inaccurate when: // (1) there exist merge keys // (2) keys are directly overwritten // (3) deletion on non-existing keys // (4) low number of samples if (current_num_samples_ == 0) { return 0; } if (current_num_non_deletions_ <= current_num_deletions_) { return 0; } uint64_t est = current_num_non_deletions_ - current_num_deletions_; uint64_t file_count = 0; for (int level = 0; level < num_levels_; ++level) { file_count += files_[level].size(); } if (current_num_samples_ < file_count) { // casting to avoid overflowing return static_cast( (est * static_cast(file_count) / current_num_samples_) ); } else { return est; } } double VersionStorageInfo::GetEstimatedCompressionRatioAtLevel( int level) const { assert(level < num_levels_); uint64_t sum_file_size_bytes = 0; uint64_t sum_data_size_bytes = 0; for (auto* file_meta : files_[level]) { sum_file_size_bytes += file_meta->fd.GetFileSize(); sum_data_size_bytes += file_meta->raw_key_size + file_meta->raw_value_size; } if (sum_file_size_bytes == 0) { return -1.0; } return static_cast(sum_data_size_bytes) / sum_file_size_bytes; } void Version::AddIterators(const ReadOptions& read_options, const FileOptions& soptions, MergeIteratorBuilder* merge_iter_builder, RangeDelAggregator* range_del_agg, bool allow_unprepared_value) { assert(storage_info_.finalized_); for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) { AddIteratorsForLevel(read_options, soptions, merge_iter_builder, level, range_del_agg, allow_unprepared_value); } } void Version::AddIteratorsForLevel(const ReadOptions& read_options, const FileOptions& soptions, MergeIteratorBuilder* merge_iter_builder, int level, RangeDelAggregator* range_del_agg, bool allow_unprepared_value) { assert(storage_info_.finalized_); if (level >= storage_info_.num_non_empty_levels()) { // This is an empty level return; } else if (storage_info_.LevelFilesBrief(level).num_files == 0) { // No files in this level return; } bool should_sample = should_sample_file_read(); auto* arena = merge_iter_builder->GetArena(); if (level == 0) { // Merge all level zero files together since they may overlap for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) { const auto& file = storage_info_.LevelFilesBrief(0).files[i]; merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator( read_options, soptions, cfd_->internal_comparator(), *file.file_metadata, range_del_agg, mutable_cf_options_.prefix_extractor.get(), nullptr, cfd_->internal_stats()->GetFileReadHist(0), TableReaderCaller::kUserIterator, arena, /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_, /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, allow_unprepared_value)); } if (should_sample) { // Count ones for every L0 files. This is done per iterator creation // rather than Seek(), while files in other levels are recored per seek. // If users execute one range query per iterator, there may be some // discrepancy here. for (FileMetaData* meta : storage_info_.LevelFiles(0)) { sample_file_read_inc(meta); } } } else if (storage_info_.LevelFilesBrief(level).num_files > 0) { // For levels > 0, we can use a concatenating iterator that sequentially // walks through the non-overlapping files in the level, opening them // lazily. auto* mem = arena->AllocateAligned(sizeof(LevelIterator)); merge_iter_builder->AddIterator(new (mem) LevelIterator( cfd_->table_cache(), read_options, soptions, cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level), mutable_cf_options_.prefix_extractor.get(), should_sample_file_read(), cfd_->internal_stats()->GetFileReadHist(level), TableReaderCaller::kUserIterator, IsFilterSkipped(level), level, range_del_agg, /*compaction_boundaries=*/nullptr, allow_unprepared_value)); } } Status Version::OverlapWithLevelIterator(const ReadOptions& read_options, const FileOptions& file_options, const Slice& smallest_user_key, const Slice& largest_user_key, int level, bool* overlap) { assert(storage_info_.finalized_); auto icmp = cfd_->internal_comparator(); auto ucmp = icmp.user_comparator(); Arena arena; Status status; ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); *overlap = false; if (level == 0) { for (size_t i = 0; i < storage_info_.LevelFilesBrief(0).num_files; i++) { const auto file = &storage_info_.LevelFilesBrief(0).files[i]; if (AfterFile(ucmp, &smallest_user_key, file) || BeforeFile(ucmp, &largest_user_key, file)) { continue; } ScopedArenaIterator iter(cfd_->table_cache()->NewIterator( read_options, file_options, cfd_->internal_comparator(), *file->file_metadata, &range_del_agg, mutable_cf_options_.prefix_extractor.get(), nullptr, cfd_->internal_stats()->GetFileReadHist(0), TableReaderCaller::kUserIterator, &arena, /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_, /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, /*allow_unprepared_value=*/false)); status = OverlapWithIterator( ucmp, smallest_user_key, largest_user_key, iter.get(), overlap); if (!status.ok() || *overlap) { break; } } } else if (storage_info_.LevelFilesBrief(level).num_files > 0) { auto mem = arena.AllocateAligned(sizeof(LevelIterator)); ScopedArenaIterator iter(new (mem) LevelIterator( cfd_->table_cache(), read_options, file_options, cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level), mutable_cf_options_.prefix_extractor.get(), should_sample_file_read(), cfd_->internal_stats()->GetFileReadHist(level), TableReaderCaller::kUserIterator, IsFilterSkipped(level), level, &range_del_agg)); status = OverlapWithIterator( ucmp, smallest_user_key, largest_user_key, iter.get(), overlap); } if (status.ok() && *overlap == false && range_del_agg.IsRangeOverlapped(smallest_user_key, largest_user_key)) { *overlap = true; } return status; } VersionStorageInfo::VersionStorageInfo( const InternalKeyComparator* internal_comparator, const Comparator* user_comparator, int levels, CompactionStyle compaction_style, VersionStorageInfo* ref_vstorage, bool _force_consistency_checks) : internal_comparator_(internal_comparator), user_comparator_(user_comparator), // cfd is nullptr if Version is dummy num_levels_(levels), num_non_empty_levels_(0), file_indexer_(user_comparator), compaction_style_(compaction_style), files_(new std::vector[num_levels_]), base_level_(num_levels_ == 1 ? -1 : 1), level_multiplier_(0.0), files_by_compaction_pri_(num_levels_), level0_non_overlapping_(false), next_file_to_compact_by_size_(num_levels_), compaction_score_(num_levels_), compaction_level_(num_levels_), l0_delay_trigger_count_(0), accumulated_file_size_(0), accumulated_raw_key_size_(0), accumulated_raw_value_size_(0), accumulated_num_non_deletions_(0), accumulated_num_deletions_(0), current_num_non_deletions_(0), current_num_deletions_(0), current_num_samples_(0), estimated_compaction_needed_bytes_(0), finalized_(false), force_consistency_checks_(_force_consistency_checks) { if (ref_vstorage != nullptr) { accumulated_file_size_ = ref_vstorage->accumulated_file_size_; accumulated_raw_key_size_ = ref_vstorage->accumulated_raw_key_size_; accumulated_raw_value_size_ = ref_vstorage->accumulated_raw_value_size_; accumulated_num_non_deletions_ = ref_vstorage->accumulated_num_non_deletions_; accumulated_num_deletions_ = ref_vstorage->accumulated_num_deletions_; current_num_non_deletions_ = ref_vstorage->current_num_non_deletions_; current_num_deletions_ = ref_vstorage->current_num_deletions_; current_num_samples_ = ref_vstorage->current_num_samples_; oldest_snapshot_seqnum_ = ref_vstorage->oldest_snapshot_seqnum_; } } Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset, const FileOptions& file_opt, const MutableCFOptions mutable_cf_options, uint64_t version_number) : env_(vset->env_), cfd_(column_family_data), info_log_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->info_log), db_statistics_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->statistics), table_cache_((cfd_ == nullptr) ? nullptr : cfd_->table_cache()), merge_operator_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->merge_operator), storage_info_( (cfd_ == nullptr) ? nullptr : &cfd_->internal_comparator(), (cfd_ == nullptr) ? nullptr : cfd_->user_comparator(), cfd_ == nullptr ? 0 : cfd_->NumberLevels(), cfd_ == nullptr ? kCompactionStyleLevel : cfd_->ioptions()->compaction_style, (cfd_ == nullptr || cfd_->current() == nullptr) ? nullptr : cfd_->current()->storage_info(), cfd_ == nullptr ? false : cfd_->ioptions()->force_consistency_checks), vset_(vset), next_(this), prev_(this), refs_(0), file_options_(file_opt), mutable_cf_options_(mutable_cf_options), max_file_size_for_l0_meta_pin_( MaxFileSizeForL0MetaPin(mutable_cf_options_)), version_number_(version_number) {} void Version::Get(const ReadOptions& read_options, const LookupKey& k, PinnableSlice* value, std::string* timestamp, Status* status, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, bool* value_found, bool* key_exists, SequenceNumber* seq, ReadCallback* callback, bool* is_blob, bool do_merge) { Slice ikey = k.internal_key(); Slice user_key = k.user_key(); assert(status->ok() || status->IsMergeInProgress()); if (key_exists != nullptr) { // will falsify below if not found *key_exists = true; } PinnedIteratorsManager pinned_iters_mgr; uint64_t tracing_get_id = BlockCacheTraceHelper::kReservedGetId; if (vset_ && vset_->block_cache_tracer_ && vset_->block_cache_tracer_->is_tracing_enabled()) { tracing_get_id = vset_->block_cache_tracer_->NextGetId(); } GetContext get_context( user_comparator(), merge_operator_, info_log_, db_statistics_, status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key, do_merge ? value : nullptr, do_merge ? timestamp : nullptr, value_found, merge_context, do_merge, max_covering_tombstone_seq, this->env_, seq, merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob, tracing_get_id); // Pin blocks that we read to hold merge operands if (merge_operator_) { pinned_iters_mgr.StartPinning(); } FilePicker fp( storage_info_.files_, user_key, ikey, &storage_info_.level_files_brief_, storage_info_.num_non_empty_levels_, &storage_info_.file_indexer_, user_comparator(), internal_comparator()); FdWithKeyRange* f = fp.GetNextFile(); while (f != nullptr) { if (*max_covering_tombstone_seq > 0) { // The remaining files we look at will only contain covered keys, so we // stop here. break; } if (get_context.sample()) { sample_file_read_inc(f->file_metadata); } bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex && get_perf_context()->per_level_perf_context_enabled; StopWatchNano timer(env_, timer_enabled /* auto_start */); *status = table_cache_->Get( read_options, *internal_comparator(), *f->file_metadata, ikey, &get_context, mutable_cf_options_.prefix_extractor.get(), cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), IsFilterSkipped(static_cast(fp.GetHitFileLevel()), fp.IsHitFileLastInLevel()), fp.GetHitFileLevel(), max_file_size_for_l0_meta_pin_); // TODO: examine the behavior for corrupted key if (timer_enabled) { PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(), fp.GetHitFileLevel()); } if (!status->ok()) { return; } // report the counters before returning if (get_context.State() != GetContext::kNotFound && get_context.State() != GetContext::kMerge && db_statistics_ != nullptr) { get_context.ReportCounters(); } switch (get_context.State()) { case GetContext::kNotFound: // Keep searching in other files break; case GetContext::kMerge: // TODO: update per-level perfcontext user_key_return_count for kMerge break; case GetContext::kFound: if (fp.GetHitFileLevel() == 0) { RecordTick(db_statistics_, GET_HIT_L0); } else if (fp.GetHitFileLevel() == 1) { RecordTick(db_statistics_, GET_HIT_L1); } else if (fp.GetHitFileLevel() >= 2) { RecordTick(db_statistics_, GET_HIT_L2_AND_UP); } PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, fp.GetHitFileLevel()); return; case GetContext::kDeleted: // Use empty error message for speed *status = Status::NotFound(); return; case GetContext::kCorrupt: *status = Status::Corruption("corrupted key for ", user_key); return; case GetContext::kBlobIndex: ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index."); *status = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); return; } f = fp.GetNextFile(); } if (db_statistics_ != nullptr) { get_context.ReportCounters(); } if (GetContext::kMerge == get_context.State()) { if (!do_merge) { *status = Status::OK(); return; } if (!merge_operator_) { *status = Status::InvalidArgument( "merge_operator is not properly initialized."); return; } // merge_operands are in saver and we hit the beginning of the key history // do a final merge of nullptr and operands; std::string* str_value = value != nullptr ? value->GetSelf() : nullptr; *status = MergeHelper::TimedFullMerge( merge_operator_, user_key, nullptr, merge_context->GetOperands(), str_value, info_log_, db_statistics_, env_, nullptr /* result_operand */, true); if (LIKELY(value != nullptr)) { value->PinSelf(); } } else { if (key_exists != nullptr) { *key_exists = false; } *status = Status::NotFound(); // Use an empty error message for speed } } void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, ReadCallback* callback, bool* is_blob) { PinnedIteratorsManager pinned_iters_mgr; // Pin blocks that we read to hold merge operands if (merge_operator_) { pinned_iters_mgr.StartPinning(); } uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId; if (vset_ && vset_->block_cache_tracer_ && vset_->block_cache_tracer_->is_tracing_enabled()) { tracing_mget_id = vset_->block_cache_tracer_->NextGetId(); } // Even though we know the batch size won't be > MAX_BATCH_SIZE, // use autovector in order to avoid unnecessary construction of GetContext // objects, which is expensive autovector get_ctx; for (auto iter = range->begin(); iter != range->end(); ++iter) { assert(iter->s->ok() || iter->s->IsMergeInProgress()); get_ctx.emplace_back( user_comparator(), merge_operator_, info_log_, db_statistics_, iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge, iter->ukey, iter->value, iter->timestamp, nullptr, &(iter->merge_context), true, &iter->max_covering_tombstone_seq, this->env_, nullptr, merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob, tracing_mget_id); // MergeInProgress status, if set, has been transferred to the get_context // state, so we set status to ok here. From now on, the iter status will // be used for IO errors, and get_context state will be used for any // key level errors *(iter->s) = Status::OK(); } int get_ctx_index = 0; for (auto iter = range->begin(); iter != range->end(); ++iter, get_ctx_index++) { iter->get_context = &(get_ctx[get_ctx_index]); } MultiGetRange file_picker_range(*range, range->begin(), range->end()); FilePickerMultiGet fp( &file_picker_range, &storage_info_.level_files_brief_, storage_info_.num_non_empty_levels_, &storage_info_.file_indexer_, user_comparator(), internal_comparator()); FdWithKeyRange* f = fp.GetNextFile(); Status s; while (f != nullptr) { MultiGetRange file_range = fp.CurrentFileRange(); bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex && get_perf_context()->per_level_perf_context_enabled; StopWatchNano timer(env_, timer_enabled /* auto_start */); s = table_cache_->MultiGet( read_options, *internal_comparator(), *f->file_metadata, &file_range, mutable_cf_options_.prefix_extractor.get(), cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()), IsFilterSkipped(static_cast(fp.GetHitFileLevel()), fp.IsHitFileLastInLevel()), fp.GetHitFileLevel()); // TODO: examine the behavior for corrupted key if (timer_enabled) { PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(), fp.GetHitFileLevel()); } if (!s.ok()) { // TODO: Set status for individual keys appropriately for (auto iter = file_range.begin(); iter != file_range.end(); ++iter) { *iter->s = s; file_range.MarkKeyDone(iter); } return; } uint64_t batch_size = 0; for (auto iter = file_range.begin(); s.ok() && iter != file_range.end(); ++iter) { GetContext& get_context = *iter->get_context; Status* status = iter->s; // The Status in the KeyContext takes precedence over GetContext state // Status may be an error if there were any IO errors in the table // reader. We never expect Status to be NotFound(), as that is // determined by get_context assert(!status->IsNotFound()); if (!status->ok()) { file_range.MarkKeyDone(iter); continue; } if (get_context.sample()) { sample_file_read_inc(f->file_metadata); } batch_size++; // report the counters before returning if (get_context.State() != GetContext::kNotFound && get_context.State() != GetContext::kMerge && db_statistics_ != nullptr) { get_context.ReportCounters(); } else { if (iter->max_covering_tombstone_seq > 0) { // The remaining files we look at will only contain covered keys, so // we stop here for this key file_picker_range.SkipKey(iter); } } switch (get_context.State()) { case GetContext::kNotFound: // Keep searching in other files break; case GetContext::kMerge: // TODO: update per-level perfcontext user_key_return_count for kMerge break; case GetContext::kFound: if (fp.GetHitFileLevel() == 0) { RecordTick(db_statistics_, GET_HIT_L0); } else if (fp.GetHitFileLevel() == 1) { RecordTick(db_statistics_, GET_HIT_L1); } else if (fp.GetHitFileLevel() >= 2) { RecordTick(db_statistics_, GET_HIT_L2_AND_UP); } PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, fp.GetHitFileLevel()); file_range.AddValueSize(iter->value->size()); file_range.MarkKeyDone(iter); if (file_range.GetValueSize() > read_options.value_size_soft_limit) { s = Status::Aborted(); break; } continue; case GetContext::kDeleted: // Use empty error message for speed *status = Status::NotFound(); file_range.MarkKeyDone(iter); continue; case GetContext::kCorrupt: *status = Status::Corruption("corrupted key for ", iter->lkey->user_key()); file_range.MarkKeyDone(iter); continue; case GetContext::kBlobIndex: ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index."); *status = Status::NotSupported( "Encounter unexpected blob index. Please open DB with " "ROCKSDB_NAMESPACE::blob_db::BlobDB instead."); file_range.MarkKeyDone(iter); continue; } } RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size); if (!s.ok() || file_picker_range.empty()) { break; } f = fp.GetNextFile(); } // Process any left over keys for (auto iter = range->begin(); s.ok() && iter != range->end(); ++iter) { GetContext& get_context = *iter->get_context; Status* status = iter->s; Slice user_key = iter->lkey->user_key(); if (db_statistics_ != nullptr) { get_context.ReportCounters(); } if (GetContext::kMerge == get_context.State()) { if (!merge_operator_) { *status = Status::InvalidArgument( "merge_operator is not properly initialized."); range->MarkKeyDone(iter); continue; } // merge_operands are in saver and we hit the beginning of the key history // do a final merge of nullptr and operands; std::string* str_value = iter->value != nullptr ? iter->value->GetSelf() : nullptr; *status = MergeHelper::TimedFullMerge( merge_operator_, user_key, nullptr, iter->merge_context.GetOperands(), str_value, info_log_, db_statistics_, env_, nullptr /* result_operand */, true); if (LIKELY(iter->value != nullptr)) { iter->value->PinSelf(); range->AddValueSize(iter->value->size()); range->MarkKeyDone(iter); if (range->GetValueSize() > read_options.value_size_soft_limit) { s = Status::Aborted(); break; } } } else { range->MarkKeyDone(iter); *status = Status::NotFound(); // Use an empty error message for speed } } for (auto iter = range->begin(); iter != range->end(); ++iter) { range->MarkKeyDone(iter); *(iter->s) = s; } } bool Version::IsFilterSkipped(int level, bool is_file_last_in_level) { // Reaching the bottom level implies misses at all upper levels, so we'll // skip checking the filters when we predict a hit. return cfd_->ioptions()->optimize_filters_for_hits && (level > 0 || is_file_last_in_level) && level == storage_info_.num_non_empty_levels() - 1; } void VersionStorageInfo::GenerateLevelFilesBrief() { level_files_brief_.resize(num_non_empty_levels_); for (int level = 0; level < num_non_empty_levels_; level++) { DoGenerateLevelFilesBrief( &level_files_brief_[level], files_[level], &arena_); } } void Version::PrepareApply( const MutableCFOptions& mutable_cf_options, bool update_stats) { TEST_SYNC_POINT_CALLBACK( "Version::PrepareApply:forced_check", reinterpret_cast(&storage_info_.force_consistency_checks_)); UpdateAccumulatedStats(update_stats); storage_info_.UpdateNumNonEmptyLevels(); storage_info_.CalculateBaseBytes(*cfd_->ioptions(), mutable_cf_options); storage_info_.UpdateFilesByCompactionPri(cfd_->ioptions()->compaction_pri); storage_info_.GenerateFileIndexer(); storage_info_.GenerateLevelFilesBrief(); storage_info_.GenerateLevel0NonOverlapping(); storage_info_.GenerateBottommostFiles(); } bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { if (file_meta->init_stats_from_file || file_meta->compensated_file_size > 0) { return false; } std::shared_ptr tp; Status s = GetTableProperties(&tp, file_meta); file_meta->init_stats_from_file = true; if (!s.ok()) { ROCKS_LOG_ERROR(vset_->db_options_->info_log, "Unable to load table properties for file %" PRIu64 " --- %s\n", file_meta->fd.GetNumber(), s.ToString().c_str()); return false; } if (tp.get() == nullptr) return false; file_meta->num_entries = tp->num_entries; file_meta->num_deletions = tp->num_deletions; file_meta->raw_value_size = tp->raw_value_size; file_meta->raw_key_size = tp->raw_key_size; return true; } void VersionStorageInfo::UpdateAccumulatedStats(FileMetaData* file_meta) { TEST_SYNC_POINT_CALLBACK("VersionStorageInfo::UpdateAccumulatedStats", nullptr); assert(file_meta->init_stats_from_file); accumulated_file_size_ += file_meta->fd.GetFileSize(); accumulated_raw_key_size_ += file_meta->raw_key_size; accumulated_raw_value_size_ += file_meta->raw_value_size; accumulated_num_non_deletions_ += file_meta->num_entries - file_meta->num_deletions; accumulated_num_deletions_ += file_meta->num_deletions; current_num_non_deletions_ += file_meta->num_entries - file_meta->num_deletions; current_num_deletions_ += file_meta->num_deletions; current_num_samples_++; } void VersionStorageInfo::RemoveCurrentStats(FileMetaData* file_meta) { if (file_meta->init_stats_from_file) { current_num_non_deletions_ -= file_meta->num_entries - file_meta->num_deletions; current_num_deletions_ -= file_meta->num_deletions; current_num_samples_--; } } void Version::UpdateAccumulatedStats(bool update_stats) { if (update_stats) { // maximum number of table properties loaded from files. const int kMaxInitCount = 20; int init_count = 0; // here only the first kMaxInitCount files which haven't been // initialized from file will be updated with num_deletions. // The motivation here is to cap the maximum I/O per Version creation. // The reason for choosing files from lower-level instead of higher-level // is that such design is able to propagate the initialization from // lower-level to higher-level: When the num_deletions of lower-level // files are updated, it will make the lower-level files have accurate // compensated_file_size, making lower-level to higher-level compaction // will be triggered, which creates higher-level files whose num_deletions // will be updated here. for (int level = 0; level < storage_info_.num_levels_ && init_count < kMaxInitCount; ++level) { for (auto* file_meta : storage_info_.files_[level]) { if (MaybeInitializeFileMetaData(file_meta)) { // each FileMeta will be initialized only once. storage_info_.UpdateAccumulatedStats(file_meta); // when option "max_open_files" is -1, all the file metadata has // already been read, so MaybeInitializeFileMetaData() won't incur // any I/O cost. "max_open_files=-1" means that the table cache passed // to the VersionSet and then to the ColumnFamilySet has a size of // TableCache::kInfiniteCapacity if (vset_->GetColumnFamilySet()->get_table_cache()->GetCapacity() == TableCache::kInfiniteCapacity) { continue; } if (++init_count >= kMaxInitCount) { break; } } } } // In case all sampled-files contain only deletion entries, then we // load the table-property of a file in higher-level to initialize // that value. for (int level = storage_info_.num_levels_ - 1; storage_info_.accumulated_raw_value_size_ == 0 && level >= 0; --level) { for (int i = static_cast(storage_info_.files_[level].size()) - 1; storage_info_.accumulated_raw_value_size_ == 0 && i >= 0; --i) { if (MaybeInitializeFileMetaData(storage_info_.files_[level][i])) { storage_info_.UpdateAccumulatedStats(storage_info_.files_[level][i]); } } } } storage_info_.ComputeCompensatedSizes(); } void VersionStorageInfo::ComputeCompensatedSizes() { static const int kDeletionWeightOnCompaction = 2; uint64_t average_value_size = GetAverageValueSize(); // compute the compensated size for (int level = 0; level < num_levels_; level++) { for (auto* file_meta : files_[level]) { // Here we only compute compensated_file_size for those file_meta // which compensated_file_size is uninitialized (== 0). This is true only // for files that have been created right now and no other thread has // access to them. That's why we can safely mutate compensated_file_size. if (file_meta->compensated_file_size == 0) { file_meta->compensated_file_size = file_meta->fd.GetFileSize(); // Here we only boost the size of deletion entries of a file only // when the number of deletion entries is greater than the number of // non-deletion entries in the file. The motivation here is that in // a stable workload, the number of deletion entries should be roughly // equal to the number of non-deletion entries. If we compensate the // size of deletion entries in a stable workload, the deletion // compensation logic might introduce unwanted effet which changes the // shape of LSM tree. if (file_meta->num_deletions * 2 >= file_meta->num_entries) { file_meta->compensated_file_size += (file_meta->num_deletions * 2 - file_meta->num_entries) * average_value_size * kDeletionWeightOnCompaction; } } } } } int VersionStorageInfo::MaxInputLevel() const { if (compaction_style_ == kCompactionStyleLevel) { return num_levels() - 2; } return 0; } int VersionStorageInfo::MaxOutputLevel(bool allow_ingest_behind) const { if (allow_ingest_behind) { assert(num_levels() > 1); return num_levels() - 2; } return num_levels() - 1; } void VersionStorageInfo::EstimateCompactionBytesNeeded( const MutableCFOptions& mutable_cf_options) { // Only implemented for level-based compaction if (compaction_style_ != kCompactionStyleLevel) { estimated_compaction_needed_bytes_ = 0; return; } // Start from Level 0, if level 0 qualifies compaction to level 1, // we estimate the size of compaction. // Then we move on to the next level and see whether it qualifies compaction // to the next level. The size of the level is estimated as the actual size // on the level plus the input bytes from the previous level if there is any. // If it exceeds, take the exceeded bytes as compaction input and add the size // of the compaction size to tatal size. // We keep doing it to Level 2, 3, etc, until the last level and return the // accumulated bytes. uint64_t bytes_compact_to_next_level = 0; uint64_t level_size = 0; for (auto* f : files_[0]) { level_size += f->fd.GetFileSize(); } // Level 0 bool level0_compact_triggered = false; if (static_cast(files_[0].size()) >= mutable_cf_options.level0_file_num_compaction_trigger || level_size >= mutable_cf_options.max_bytes_for_level_base) { level0_compact_triggered = true; estimated_compaction_needed_bytes_ = level_size; bytes_compact_to_next_level = level_size; } else { estimated_compaction_needed_bytes_ = 0; } // Level 1 and up. uint64_t bytes_next_level = 0; for (int level = base_level(); level <= MaxInputLevel(); level++) { level_size = 0; if (bytes_next_level > 0) { #ifndef NDEBUG uint64_t level_size2 = 0; for (auto* f : files_[level]) { level_size2 += f->fd.GetFileSize(); } assert(level_size2 == bytes_next_level); #endif level_size = bytes_next_level; bytes_next_level = 0; } else { for (auto* f : files_[level]) { level_size += f->fd.GetFileSize(); } } if (level == base_level() && level0_compact_triggered) { // Add base level size to compaction if level0 compaction triggered. estimated_compaction_needed_bytes_ += level_size; } // Add size added by previous compaction level_size += bytes_compact_to_next_level; bytes_compact_to_next_level = 0; uint64_t level_target = MaxBytesForLevel(level); if (level_size > level_target) { bytes_compact_to_next_level = level_size - level_target; // Estimate the actual compaction fan-out ratio as size ratio between // the two levels. assert(bytes_next_level == 0); if (level + 1 < num_levels_) { for (auto* f : files_[level + 1]) { bytes_next_level += f->fd.GetFileSize(); } } if (bytes_next_level > 0) { assert(level_size > 0); estimated_compaction_needed_bytes_ += static_cast( static_cast(bytes_compact_to_next_level) * (static_cast(bytes_next_level) / static_cast(level_size) + 1)); } } } } namespace { uint32_t GetExpiredTtlFilesCount(const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, const std::vector& files) { uint32_t ttl_expired_files_count = 0; int64_t _current_time; auto status = ioptions.env->GetCurrentTime(&_current_time); if (status.ok()) { const uint64_t current_time = static_cast(_current_time); for (FileMetaData* f : files) { if (!f->being_compacted) { uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime(); if (oldest_ancester_time != 0 && oldest_ancester_time < (current_time - mutable_cf_options.ttl)) { ttl_expired_files_count++; } } } } return ttl_expired_files_count; } } // anonymous namespace void VersionStorageInfo::ComputeCompactionScore( const ImmutableCFOptions& immutable_cf_options, const MutableCFOptions& mutable_cf_options) { for (int level = 0; level <= MaxInputLevel(); level++) { double score; if (level == 0) { // We treat level-0 specially by bounding the number of files // instead of number of bytes for two reasons: // // (1) With larger write-buffer sizes, it is nice not to do too // many level-0 compactions. // // (2) The files in level-0 are merged on every read and // therefore we wish to avoid too many files when the individual // file size is small (perhaps because of a small write-buffer // setting, or very high compression ratios, or lots of // overwrites/deletions). int num_sorted_runs = 0; uint64_t total_size = 0; for (auto* f : files_[level]) { if (!f->being_compacted) { total_size += f->compensated_file_size; num_sorted_runs++; } } if (compaction_style_ == kCompactionStyleUniversal) { // For universal compaction, we use level0 score to indicate // compaction score for the whole DB. Adding other levels as if // they are L0 files. for (int i = 1; i < num_levels(); i++) { // Its possible that a subset of the files in a level may be in a // compaction, due to delete triggered compaction or trivial move. // In that case, the below check may not catch a level being // compacted as it only checks the first file. The worst that can // happen is a scheduled compaction thread will find nothing to do. if (!files_[i].empty() && !files_[i][0]->being_compacted) { num_sorted_runs++; } } } if (compaction_style_ == kCompactionStyleFIFO) { score = static_cast(total_size) / mutable_cf_options.compaction_options_fifo.max_table_files_size; if (mutable_cf_options.compaction_options_fifo.allow_compaction) { score = std::max( static_cast(num_sorted_runs) / mutable_cf_options.level0_file_num_compaction_trigger, score); } if (mutable_cf_options.ttl > 0) { score = std::max( static_cast(GetExpiredTtlFilesCount( immutable_cf_options, mutable_cf_options, files_[level])), score); } } else { score = static_cast(num_sorted_runs) / mutable_cf_options.level0_file_num_compaction_trigger; if (compaction_style_ == kCompactionStyleLevel && num_levels() > 1) { // Level-based involves L0->L0 compactions that can lead to oversized // L0 files. Take into account size as well to avoid later giant // compactions to the base level. score = std::max( score, static_cast(total_size) / mutable_cf_options.max_bytes_for_level_base); } } } else { // Compute the ratio of current size to size limit. uint64_t level_bytes_no_compacting = 0; for (auto f : files_[level]) { if (!f->being_compacted) { level_bytes_no_compacting += f->compensated_file_size; } } score = static_cast(level_bytes_no_compacting) / MaxBytesForLevel(level); } compaction_level_[level] = level; compaction_score_[level] = score; } // sort all the levels based on their score. Higher scores get listed // first. Use bubble sort because the number of entries are small. for (int i = 0; i < num_levels() - 2; i++) { for (int j = i + 1; j < num_levels() - 1; j++) { if (compaction_score_[i] < compaction_score_[j]) { double score = compaction_score_[i]; int level = compaction_level_[i]; compaction_score_[i] = compaction_score_[j]; compaction_level_[i] = compaction_level_[j]; compaction_score_[j] = score; compaction_level_[j] = level; } } } ComputeFilesMarkedForCompaction(); ComputeBottommostFilesMarkedForCompaction(); if (mutable_cf_options.ttl > 0) { ComputeExpiredTtlFiles(immutable_cf_options, mutable_cf_options.ttl); } if (mutable_cf_options.periodic_compaction_seconds > 0) { ComputeFilesMarkedForPeriodicCompaction( immutable_cf_options, mutable_cf_options.periodic_compaction_seconds); } EstimateCompactionBytesNeeded(mutable_cf_options); } void VersionStorageInfo::ComputeFilesMarkedForCompaction() { files_marked_for_compaction_.clear(); int last_qualify_level = 0; // Do not include files from the last level with data // If table properties collector suggests a file on the last level, // we should not move it to a new level. for (int level = num_levels() - 1; level >= 1; level--) { if (!files_[level].empty()) { last_qualify_level = level - 1; break; } } for (int level = 0; level <= last_qualify_level; level++) { for (auto* f : files_[level]) { if (!f->being_compacted && f->marked_for_compaction) { files_marked_for_compaction_.emplace_back(level, f); } } } } void VersionStorageInfo::ComputeExpiredTtlFiles( const ImmutableCFOptions& ioptions, const uint64_t ttl) { assert(ttl > 0); expired_ttl_files_.clear(); int64_t _current_time; auto status = ioptions.env->GetCurrentTime(&_current_time); if (!status.ok()) { return; } const uint64_t current_time = static_cast(_current_time); for (int level = 0; level < num_levels() - 1; level++) { for (FileMetaData* f : files_[level]) { if (!f->being_compacted) { uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime(); if (oldest_ancester_time > 0 && oldest_ancester_time < (current_time - ttl)) { expired_ttl_files_.emplace_back(level, f); } } } } } void VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction( const ImmutableCFOptions& ioptions, const uint64_t periodic_compaction_seconds) { assert(periodic_compaction_seconds > 0); files_marked_for_periodic_compaction_.clear(); int64_t temp_current_time; auto status = ioptions.env->GetCurrentTime(&temp_current_time); if (!status.ok()) { return; } const uint64_t current_time = static_cast(temp_current_time); // If periodic_compaction_seconds is larger than current time, periodic // compaction can't possibly be triggered. if (periodic_compaction_seconds > current_time) { return; } const uint64_t allowed_time_limit = current_time - periodic_compaction_seconds; for (int level = 0; level < num_levels(); level++) { for (auto f : files_[level]) { if (!f->being_compacted) { // Compute a file's modification time in the following order: // 1. Use file_creation_time table property if it is > 0. // 2. Use creation_time table property if it is > 0. // 3. Use file's mtime metadata if the above two table properties are 0. // Don't consider the file at all if the modification time cannot be // correctly determined based on the above conditions. uint64_t file_modification_time = f->TryGetFileCreationTime(); if (file_modification_time == kUnknownFileCreationTime) { file_modification_time = f->TryGetOldestAncesterTime(); } if (file_modification_time == kUnknownOldestAncesterTime) { auto file_path = TableFileName(ioptions.cf_paths, f->fd.GetNumber(), f->fd.GetPathId()); status = ioptions.env->GetFileModificationTime( file_path, &file_modification_time); if (!status.ok()) { ROCKS_LOG_WARN(ioptions.info_log, "Can't get file modification time: %s: %s", file_path.c_str(), status.ToString().c_str()); continue; } } if (file_modification_time > 0 && file_modification_time < allowed_time_limit) { files_marked_for_periodic_compaction_.emplace_back(level, f); } } } } } namespace { // used to sort files by size struct Fsize { size_t index; FileMetaData* file; }; // Compator that is used to sort files based on their size // In normal mode: descending size bool CompareCompensatedSizeDescending(const Fsize& first, const Fsize& second) { return (first.file->compensated_file_size > second.file->compensated_file_size); } } // anonymous namespace void VersionStorageInfo::AddFile(int level, FileMetaData* f, Logger* info_log) { auto* level_files = &files_[level]; // Must not overlap #ifndef NDEBUG if (level > 0 && !level_files->empty() && internal_comparator_->Compare( (*level_files)[level_files->size() - 1]->largest, f->smallest) >= 0) { auto* f2 = (*level_files)[level_files->size() - 1]; if (info_log != nullptr) { Error(info_log, "Adding new file %" PRIu64 " range (%s, %s) to level %d but overlapping " "with existing file %" PRIu64 " %s %s", f->fd.GetNumber(), f->smallest.DebugString(true).c_str(), f->largest.DebugString(true).c_str(), level, f2->fd.GetNumber(), f2->smallest.DebugString(true).c_str(), f2->largest.DebugString(true).c_str()); LogFlush(info_log); } assert(false); } #else (void)info_log; #endif f->refs++; level_files->push_back(f); const uint64_t file_number = f->fd.GetNumber(); assert(file_locations_.find(file_number) == file_locations_.end()); file_locations_.emplace(file_number, FileLocation(level, level_files->size() - 1)); } void VersionStorageInfo::AddBlobFile( std::shared_ptr blob_file_meta) { assert(blob_file_meta); const uint64_t blob_file_number = blob_file_meta->GetBlobFileNumber(); auto it = blob_files_.lower_bound(blob_file_number); assert(it == blob_files_.end() || it->first != blob_file_number); blob_files_.insert( it, BlobFiles::value_type(blob_file_number, std::move(blob_file_meta))); } // Version::PrepareApply() need to be called before calling the function, or // following functions called: // 1. UpdateNumNonEmptyLevels(); // 2. CalculateBaseBytes(); // 3. UpdateFilesByCompactionPri(); // 4. GenerateFileIndexer(); // 5. GenerateLevelFilesBrief(); // 6. GenerateLevel0NonOverlapping(); // 7. GenerateBottommostFiles(); void VersionStorageInfo::SetFinalized() { finalized_ = true; #ifndef NDEBUG if (compaction_style_ != kCompactionStyleLevel) { // Not level based compaction. return; } assert(base_level_ < 0 || num_levels() == 1 || (base_level_ >= 1 && base_level_ < num_levels())); // Verify all levels newer than base_level are empty except L0 for (int level = 1; level < base_level(); level++) { assert(NumLevelBytes(level) == 0); } uint64_t max_bytes_prev_level = 0; for (int level = base_level(); level < num_levels() - 1; level++) { if (LevelFiles(level).size() == 0) { continue; } assert(MaxBytesForLevel(level) >= max_bytes_prev_level); max_bytes_prev_level = MaxBytesForLevel(level); } int num_empty_non_l0_level = 0; for (int level = 0; level < num_levels(); level++) { assert(LevelFiles(level).size() == 0 || LevelFiles(level).size() == LevelFilesBrief(level).num_files); if (level > 0 && NumLevelBytes(level) > 0) { num_empty_non_l0_level++; } if (LevelFiles(level).size() > 0) { assert(level < num_non_empty_levels()); } } assert(compaction_level_.size() > 0); assert(compaction_level_.size() == compaction_score_.size()); #endif } void VersionStorageInfo::UpdateNumNonEmptyLevels() { num_non_empty_levels_ = num_levels_; for (int i = num_levels_ - 1; i >= 0; i--) { if (files_[i].size() != 0) { return; } else { num_non_empty_levels_ = i; } } } namespace { // Sort `temp` based on ratio of overlapping size over file size void SortFileByOverlappingRatio( const InternalKeyComparator& icmp, const std::vector& files, const std::vector& next_level_files, std::vector* temp) { std::unordered_map file_to_order; auto next_level_it = next_level_files.begin(); for (auto& file : files) { uint64_t overlapping_bytes = 0; // Skip files in next level that is smaller than current file while (next_level_it != next_level_files.end() && icmp.Compare((*next_level_it)->largest, file->smallest) < 0) { next_level_it++; } while (next_level_it != next_level_files.end() && icmp.Compare((*next_level_it)->smallest, file->largest) < 0) { overlapping_bytes += (*next_level_it)->fd.file_size; if (icmp.Compare((*next_level_it)->largest, file->largest) > 0) { // next level file cross large boundary of current file. break; } next_level_it++; } assert(file->compensated_file_size != 0); file_to_order[file->fd.GetNumber()] = overlapping_bytes * 1024u / file->compensated_file_size; } std::sort(temp->begin(), temp->end(), [&](const Fsize& f1, const Fsize& f2) -> bool { return file_to_order[f1.file->fd.GetNumber()] < file_to_order[f2.file->fd.GetNumber()]; }); } } // namespace void VersionStorageInfo::UpdateFilesByCompactionPri( CompactionPri compaction_pri) { if (compaction_style_ == kCompactionStyleNone || compaction_style_ == kCompactionStyleFIFO || compaction_style_ == kCompactionStyleUniversal) { // don't need this return; } // No need to sort the highest level because it is never compacted. for (int level = 0; level < num_levels() - 1; level++) { const std::vector& files = files_[level]; auto& files_by_compaction_pri = files_by_compaction_pri_[level]; assert(files_by_compaction_pri.size() == 0); // populate a temp vector for sorting based on size std::vector temp(files.size()); for (size_t i = 0; i < files.size(); i++) { temp[i].index = i; temp[i].file = files[i]; } // sort the top number_of_files_to_sort_ based on file size size_t num = VersionStorageInfo::kNumberFilesToSort; if (num > temp.size()) { num = temp.size(); } switch (compaction_pri) { case kByCompensatedSize: std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), CompareCompensatedSizeDescending); break; case kOldestLargestSeqFirst: std::sort(temp.begin(), temp.end(), [](const Fsize& f1, const Fsize& f2) -> bool { return f1.file->fd.largest_seqno < f2.file->fd.largest_seqno; }); break; case kOldestSmallestSeqFirst: std::sort(temp.begin(), temp.end(), [](const Fsize& f1, const Fsize& f2) -> bool { return f1.file->fd.smallest_seqno < f2.file->fd.smallest_seqno; }); break; case kMinOverlappingRatio: SortFileByOverlappingRatio(*internal_comparator_, files_[level], files_[level + 1], &temp); break; default: assert(false); } assert(temp.size() == files.size()); // initialize files_by_compaction_pri_ for (size_t i = 0; i < temp.size(); i++) { files_by_compaction_pri.push_back(static_cast(temp[i].index)); } next_file_to_compact_by_size_[level] = 0; assert(files_[level].size() == files_by_compaction_pri_[level].size()); } } void VersionStorageInfo::GenerateLevel0NonOverlapping() { assert(!finalized_); level0_non_overlapping_ = true; if (level_files_brief_.size() == 0) { return; } // A copy of L0 files sorted by smallest key std::vector level0_sorted_file( level_files_brief_[0].files, level_files_brief_[0].files + level_files_brief_[0].num_files); std::sort(level0_sorted_file.begin(), level0_sorted_file.end(), [this](const FdWithKeyRange& f1, const FdWithKeyRange& f2) -> bool { return (internal_comparator_->Compare(f1.smallest_key, f2.smallest_key) < 0); }); for (size_t i = 1; i < level0_sorted_file.size(); ++i) { FdWithKeyRange& f = level0_sorted_file[i]; FdWithKeyRange& prev = level0_sorted_file[i - 1]; if (internal_comparator_->Compare(prev.largest_key, f.smallest_key) >= 0) { level0_non_overlapping_ = false; break; } } } void VersionStorageInfo::GenerateBottommostFiles() { assert(!finalized_); assert(bottommost_files_.empty()); for (size_t level = 0; level < level_files_brief_.size(); ++level) { for (size_t file_idx = 0; file_idx < level_files_brief_[level].num_files; ++file_idx) { const FdWithKeyRange& f = level_files_brief_[level].files[file_idx]; int l0_file_idx; if (level == 0) { l0_file_idx = static_cast(file_idx); } else { l0_file_idx = -1; } Slice smallest_user_key = ExtractUserKey(f.smallest_key); Slice largest_user_key = ExtractUserKey(f.largest_key); if (!RangeMightExistAfterSortedRun(smallest_user_key, largest_user_key, static_cast(level), l0_file_idx)) { bottommost_files_.emplace_back(static_cast(level), f.file_metadata); } } } } void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum) { assert(seqnum >= oldest_snapshot_seqnum_); oldest_snapshot_seqnum_ = seqnum; if (oldest_snapshot_seqnum_ > bottommost_files_mark_threshold_) { ComputeBottommostFilesMarkedForCompaction(); } } void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction() { bottommost_files_marked_for_compaction_.clear(); bottommost_files_mark_threshold_ = kMaxSequenceNumber; for (auto& level_and_file : bottommost_files_) { if (!level_and_file.second->being_compacted && level_and_file.second->fd.largest_seqno != 0 && level_and_file.second->num_deletions > 1) { // largest_seqno might be nonzero due to containing the final key in an // earlier compaction, whose seqnum we didn't zero out. Multiple deletions // ensures the file really contains deleted or overwritten keys. if (level_and_file.second->fd.largest_seqno < oldest_snapshot_seqnum_) { bottommost_files_marked_for_compaction_.push_back(level_and_file); } else { bottommost_files_mark_threshold_ = std::min(bottommost_files_mark_threshold_, level_and_file.second->fd.largest_seqno); } } } } void Version::Ref() { ++refs_; } bool Version::Unref() { assert(refs_ >= 1); --refs_; if (refs_ == 0) { delete this; return true; } return false; } bool VersionStorageInfo::OverlapInLevel(int level, const Slice* smallest_user_key, const Slice* largest_user_key) { if (level >= num_non_empty_levels_) { // empty level, no overlap return false; } return SomeFileOverlapsRange(*internal_comparator_, (level > 0), level_files_brief_[level], smallest_user_key, largest_user_key); } // Store in "*inputs" all files in "level" that overlap [begin,end] // If hint_index is specified, then it points to a file in the // overlapping range. // The file_index returns a pointer to any file in an overlapping range. void VersionStorageInfo::GetOverlappingInputs( int level, const InternalKey* begin, const InternalKey* end, std::vector* inputs, int hint_index, int* file_index, bool expand_range, InternalKey** next_smallest) const { if (level >= num_non_empty_levels_) { // this level is empty, no overlapping inputs return; } inputs->clear(); if (file_index) { *file_index = -1; } const Comparator* user_cmp = user_comparator_; if (level > 0) { GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index, file_index, false, next_smallest); return; } if (next_smallest) { // next_smallest key only makes sense for non-level 0, where files are // non-overlapping *next_smallest = nullptr; } Slice user_begin, user_end; if (begin != nullptr) { user_begin = begin->user_key(); } if (end != nullptr) { user_end = end->user_key(); } // index stores the file index need to check. std::list index; for (size_t i = 0; i < level_files_brief_[level].num_files; i++) { index.emplace_back(i); } while (!index.empty()) { bool found_overlapping_file = false; auto iter = index.begin(); while (iter != index.end()) { FdWithKeyRange* f = &(level_files_brief_[level].files[*iter]); const Slice file_start = ExtractUserKey(f->smallest_key); const Slice file_limit = ExtractUserKey(f->largest_key); if (begin != nullptr && user_cmp->CompareWithoutTimestamp(file_limit, user_begin) < 0) { // "f" is completely before specified range; skip it iter++; } else if (end != nullptr && user_cmp->CompareWithoutTimestamp(file_start, user_end) > 0) { // "f" is completely after specified range; skip it iter++; } else { // if overlap inputs->emplace_back(files_[level][*iter]); found_overlapping_file = true; // record the first file index. if (file_index && *file_index == -1) { *file_index = static_cast(*iter); } // the related file is overlap, erase to avoid checking again. iter = index.erase(iter); if (expand_range) { if (begin != nullptr && user_cmp->CompareWithoutTimestamp(file_start, user_begin) < 0) { user_begin = file_start; } if (end != nullptr && user_cmp->CompareWithoutTimestamp(file_limit, user_end) > 0) { user_end = file_limit; } } } } // if all the files left are not overlap, break if (!found_overlapping_file) { break; } } } // Store in "*inputs" files in "level" that within range [begin,end] // Guarantee a "clean cut" boundary between the files in inputs // and the surrounding files and the maxinum number of files. // This will ensure that no parts of a key are lost during compaction. // If hint_index is specified, then it points to a file in the range. // The file_index returns a pointer to any file in an overlapping range. void VersionStorageInfo::GetCleanInputsWithinInterval( int level, const InternalKey* begin, const InternalKey* end, std::vector* inputs, int hint_index, int* file_index) const { inputs->clear(); if (file_index) { *file_index = -1; } if (level >= num_non_empty_levels_ || level == 0 || level_files_brief_[level].num_files == 0) { // this level is empty, no inputs within range // also don't support clean input interval within L0 return; } GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index, file_index, true /* within_interval */); } // Store in "*inputs" all files in "level" that overlap [begin,end] // Employ binary search to find at least one file that overlaps the // specified range. From that file, iterate backwards and // forwards to find all overlapping files. // if within_range is set, then only store the maximum clean inputs // within range [begin, end]. "clean" means there is a boudnary // between the files in "*inputs" and the surrounding files void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch( int level, const InternalKey* begin, const InternalKey* end, std::vector* inputs, int hint_index, int* file_index, bool within_interval, InternalKey** next_smallest) const { assert(level > 0); auto user_cmp = user_comparator_; const FdWithKeyRange* files = level_files_brief_[level].files; const int num_files = static_cast(level_files_brief_[level].num_files); // begin to use binary search to find lower bound // and upper bound. int start_index = 0; int end_index = num_files; if (begin != nullptr) { // if within_interval is true, with file_key would find // not overlapping ranges in std::lower_bound. auto cmp = [&user_cmp, &within_interval](const FdWithKeyRange& f, const InternalKey* k) { auto& file_key = within_interval ? f.file_metadata->smallest : f.file_metadata->largest; return sstableKeyCompare(user_cmp, file_key, *k) < 0; }; start_index = static_cast( std::lower_bound(files, files + (hint_index == -1 ? num_files : hint_index), begin, cmp) - files); if (start_index > 0 && within_interval) { bool is_overlapping = true; while (is_overlapping && start_index < num_files) { auto& pre_limit = files[start_index - 1].file_metadata->largest; auto& cur_start = files[start_index].file_metadata->smallest; is_overlapping = sstableKeyCompare(user_cmp, pre_limit, cur_start) == 0; start_index += is_overlapping; } } } if (end != nullptr) { // if within_interval is true, with file_key would find // not overlapping ranges in std::upper_bound. auto cmp = [&user_cmp, &within_interval](const InternalKey* k, const FdWithKeyRange& f) { auto& file_key = within_interval ? f.file_metadata->largest : f.file_metadata->smallest; return sstableKeyCompare(user_cmp, *k, file_key) < 0; }; end_index = static_cast( std::upper_bound(files + start_index, files + num_files, end, cmp) - files); if (end_index < num_files && within_interval) { bool is_overlapping = true; while (is_overlapping && end_index > start_index) { auto& next_start = files[end_index].file_metadata->smallest; auto& cur_limit = files[end_index - 1].file_metadata->largest; is_overlapping = sstableKeyCompare(user_cmp, cur_limit, next_start) == 0; end_index -= is_overlapping; } } } assert(start_index <= end_index); // If there were no overlapping files, return immediately. if (start_index == end_index) { if (next_smallest) { *next_smallest = nullptr; } return; } assert(start_index < end_index); // returns the index where an overlap is found if (file_index) { *file_index = start_index; } // insert overlapping files into vector for (int i = start_index; i < end_index; i++) { inputs->push_back(files_[level][i]); } if (next_smallest != nullptr) { // Provide the next key outside the range covered by inputs if (end_index < static_cast(files_[level].size())) { **next_smallest = files_[level][end_index]->smallest; } else { *next_smallest = nullptr; } } } uint64_t VersionStorageInfo::NumLevelBytes(int level) const { assert(level >= 0); assert(level < num_levels()); return TotalFileSize(files_[level]); } const char* VersionStorageInfo::LevelSummary( LevelSummaryStorage* scratch) const { int len = 0; if (compaction_style_ == kCompactionStyleLevel && num_levels() > 1) { assert(base_level_ < static_cast(level_max_bytes_.size())); if (level_multiplier_ != 0.0) { len = snprintf( scratch->buffer, sizeof(scratch->buffer), "base level %d level multiplier %.2f max bytes base %" PRIu64 " ", base_level_, level_multiplier_, level_max_bytes_[base_level_]); } } len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "files["); for (int i = 0; i < num_levels(); i++) { int sz = sizeof(scratch->buffer) - len; int ret = snprintf(scratch->buffer + len, sz, "%d ", int(files_[i].size())); if (ret < 0 || ret >= sz) break; len += ret; } if (len > 0) { // overwrite the last space --len; } len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "] max score %.2f", compaction_score_[0]); if (!files_marked_for_compaction_.empty()) { snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, " (%" ROCKSDB_PRIszt " files need compaction)", files_marked_for_compaction_.size()); } return scratch->buffer; } const char* VersionStorageInfo::LevelFileSummary(FileSummaryStorage* scratch, int level) const { int len = snprintf(scratch->buffer, sizeof(scratch->buffer), "files_size["); for (const auto& f : files_[level]) { int sz = sizeof(scratch->buffer) - len; char sztxt[16]; AppendHumanBytes(f->fd.GetFileSize(), sztxt, sizeof(sztxt)); int ret = snprintf(scratch->buffer + len, sz, "#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ", f->fd.GetNumber(), f->fd.smallest_seqno, sztxt, static_cast(f->being_compacted)); if (ret < 0 || ret >= sz) break; len += ret; } // overwrite the last space (only if files_[level].size() is non-zero) if (files_[level].size() && len > 0) { --len; } snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "]"); return scratch->buffer; } int64_t VersionStorageInfo::MaxNextLevelOverlappingBytes() { uint64_t result = 0; std::vector overlaps; for (int level = 1; level < num_levels() - 1; level++) { for (const auto& f : files_[level]) { GetOverlappingInputs(level + 1, &f->smallest, &f->largest, &overlaps); const uint64_t sum = TotalFileSize(overlaps); if (sum > result) { result = sum; } } } return result; } uint64_t VersionStorageInfo::MaxBytesForLevel(int level) const { // Note: the result for level zero is not really used since we set // the level-0 compaction threshold based on number of files. assert(level >= 0); assert(level < static_cast(level_max_bytes_.size())); return level_max_bytes_[level]; } void VersionStorageInfo::CalculateBaseBytes(const ImmutableCFOptions& ioptions, const MutableCFOptions& options) { // Special logic to set number of sorted runs. // It is to match the previous behavior when all files are in L0. int num_l0_count = static_cast(files_[0].size()); if (compaction_style_ == kCompactionStyleUniversal) { // For universal compaction, we use level0 score to indicate // compaction score for the whole DB. Adding other levels as if // they are L0 files. for (int i = 1; i < num_levels(); i++) { if (!files_[i].empty()) { num_l0_count++; } } } set_l0_delay_trigger_count(num_l0_count); level_max_bytes_.resize(ioptions.num_levels); if (!ioptions.level_compaction_dynamic_level_bytes) { base_level_ = (ioptions.compaction_style == kCompactionStyleLevel) ? 1 : -1; // Calculate for static bytes base case for (int i = 0; i < ioptions.num_levels; ++i) { if (i == 0 && ioptions.compaction_style == kCompactionStyleUniversal) { level_max_bytes_[i] = options.max_bytes_for_level_base; } else if (i > 1) { level_max_bytes_[i] = MultiplyCheckOverflow( MultiplyCheckOverflow(level_max_bytes_[i - 1], options.max_bytes_for_level_multiplier), options.MaxBytesMultiplerAdditional(i - 1)); } else { level_max_bytes_[i] = options.max_bytes_for_level_base; } } } else { uint64_t max_level_size = 0; int first_non_empty_level = -1; // Find size of non-L0 level of most data. // Cannot use the size of the last level because it can be empty or less // than previous levels after compaction. for (int i = 1; i < num_levels_; i++) { uint64_t total_size = 0; for (const auto& f : files_[i]) { total_size += f->fd.GetFileSize(); } if (total_size > 0 && first_non_empty_level == -1) { first_non_empty_level = i; } if (total_size > max_level_size) { max_level_size = total_size; } } // Prefill every level's max bytes to disallow compaction from there. for (int i = 0; i < num_levels_; i++) { level_max_bytes_[i] = std::numeric_limits::max(); } if (max_level_size == 0) { // No data for L1 and up. L0 compacts to last level directly. // No compaction from L1+ needs to be scheduled. base_level_ = num_levels_ - 1; } else { uint64_t l0_size = 0; for (const auto& f : files_[0]) { l0_size += f->fd.GetFileSize(); } uint64_t base_bytes_max = std::max(options.max_bytes_for_level_base, l0_size); uint64_t base_bytes_min = static_cast( base_bytes_max / options.max_bytes_for_level_multiplier); // Try whether we can make last level's target size to be max_level_size uint64_t cur_level_size = max_level_size; for (int i = num_levels_ - 2; i >= first_non_empty_level; i--) { // Round up after dividing cur_level_size = static_cast( cur_level_size / options.max_bytes_for_level_multiplier); } // Calculate base level and its size. uint64_t base_level_size; if (cur_level_size <= base_bytes_min) { // Case 1. If we make target size of last level to be max_level_size, // target size of the first non-empty level would be smaller than // base_bytes_min. We set it be base_bytes_min. base_level_size = base_bytes_min + 1U; base_level_ = first_non_empty_level; ROCKS_LOG_INFO(ioptions.info_log, "More existing levels in DB than needed. " "max_bytes_for_level_multiplier may not be guaranteed."); } else { // Find base level (where L0 data is compacted to). base_level_ = first_non_empty_level; while (base_level_ > 1 && cur_level_size > base_bytes_max) { --base_level_; cur_level_size = static_cast( cur_level_size / options.max_bytes_for_level_multiplier); } if (cur_level_size > base_bytes_max) { // Even L1 will be too large assert(base_level_ == 1); base_level_size = base_bytes_max; } else { base_level_size = cur_level_size; } } level_multiplier_ = options.max_bytes_for_level_multiplier; assert(base_level_size > 0); if (l0_size > base_level_size && (l0_size > options.max_bytes_for_level_base || static_cast(files_[0].size() / 2) >= options.level0_file_num_compaction_trigger)) { // We adjust the base level according to actual L0 size, and adjust // the level multiplier accordingly, when: // 1. the L0 size is larger than level size base, or // 2. number of L0 files reaches twice the L0->L1 compaction trigger // We don't do this otherwise to keep the LSM-tree structure stable // unless the L0 compation is backlogged. base_level_size = l0_size; if (base_level_ == num_levels_ - 1) { level_multiplier_ = 1.0; } else { level_multiplier_ = std::pow( static_cast(max_level_size) / static_cast(base_level_size), 1.0 / static_cast(num_levels_ - base_level_ - 1)); } } uint64_t level_size = base_level_size; for (int i = base_level_; i < num_levels_; i++) { if (i > base_level_) { level_size = MultiplyCheckOverflow(level_size, level_multiplier_); } // Don't set any level below base_bytes_max. Otherwise, the LSM can // assume an hourglass shape where L1+ sizes are smaller than L0. This // causes compaction scoring, which depends on level sizes, to favor L1+ // at the expense of L0, which may fill up and stall. level_max_bytes_[i] = std::max(level_size, base_bytes_max); } } } } uint64_t VersionStorageInfo::EstimateLiveDataSize() const { // Estimate the live data size by adding up the size of the last level for all // key ranges. Note: Estimate depends on the ordering of files in level 0 // because files in level 0 can be overlapping. uint64_t size = 0; auto ikey_lt = [this](InternalKey* x, InternalKey* y) { return internal_comparator_->Compare(*x, *y) < 0; }; // (Ordered) map of largest keys in non-overlapping files std::map ranges(ikey_lt); for (int l = num_levels_ - 1; l >= 0; l--) { bool found_end = false; for (auto file : files_[l]) { // Find the first file where the largest key is larger than the smallest // key of the current file. If this file does not overlap with the // current file, none of the files in the map does. If there is // no potential overlap, we can safely insert the rest of this level // (if the level is not 0) into the map without checking again because // the elements in the level are sorted and non-overlapping. auto lb = (found_end && l != 0) ? ranges.end() : ranges.lower_bound(&file->smallest); found_end = (lb == ranges.end()); if (found_end || internal_comparator_->Compare( file->largest, (*lb).second->smallest) < 0) { ranges.emplace_hint(lb, &file->largest, file); size += file->fd.file_size; } } } return size; } bool VersionStorageInfo::RangeMightExistAfterSortedRun( const Slice& smallest_user_key, const Slice& largest_user_key, int last_level, int last_l0_idx) { assert((last_l0_idx != -1) == (last_level == 0)); // TODO(ajkr): this preserves earlier behavior where we considered an L0 file // bottommost only if it's the oldest L0 file and there are no files on older // levels. It'd be better to consider it bottommost if there's no overlap in // older levels/files. if (last_level == 0 && last_l0_idx != static_cast(LevelFiles(0).size() - 1)) { return true; } // Checks whether there are files living beyond the `last_level`. If lower // levels have files, it checks for overlap between [`smallest_key`, // `largest_key`] and those files. Bottomlevel optimizations can be made if // there are no files in lower levels or if there is no overlap with the files // in the lower levels. for (int level = last_level + 1; level < num_levels(); level++) { // The range is not in the bottommost level if there are files in lower // levels when the `last_level` is 0 or if there are files in lower levels // which overlap with [`smallest_key`, `largest_key`]. if (files_[level].size() > 0 && (last_level == 0 || OverlapInLevel(level, &smallest_user_key, &largest_user_key))) { return true; } } return false; } void Version::AddLiveFiles(std::vector* live_table_files, std::vector* live_blob_files) const { assert(live_table_files); assert(live_blob_files); for (int level = 0; level < storage_info_.num_levels(); ++level) { const auto& level_files = storage_info_.LevelFiles(level); for (const auto& meta : level_files) { assert(meta); live_table_files->emplace_back(meta->fd.GetNumber()); } } const auto& blob_files = storage_info_.GetBlobFiles(); for (const auto& pair : blob_files) { const auto& meta = pair.second; assert(meta); live_blob_files->emplace_back(meta->GetBlobFileNumber()); } } std::string Version::DebugString(bool hex, bool print_stats) const { std::string r; for (int level = 0; level < storage_info_.num_levels_; level++) { // E.g., // --- level 1 --- // 17:123[1 .. 124]['a' .. 'd'] // 20:43[124 .. 128]['e' .. 'g'] // // if print_stats=true: // 17:123[1 .. 124]['a' .. 'd'](4096) r.append("--- level "); AppendNumberTo(&r, level); r.append(" --- version# "); AppendNumberTo(&r, version_number_); r.append(" ---\n"); const std::vector& files = storage_info_.files_[level]; for (size_t i = 0; i < files.size(); i++) { r.push_back(' '); AppendNumberTo(&r, files[i]->fd.GetNumber()); r.push_back(':'); AppendNumberTo(&r, files[i]->fd.GetFileSize()); r.append("["); AppendNumberTo(&r, files[i]->fd.smallest_seqno); r.append(" .. "); AppendNumberTo(&r, files[i]->fd.largest_seqno); r.append("]"); r.append("["); r.append(files[i]->smallest.DebugString(hex)); r.append(" .. "); r.append(files[i]->largest.DebugString(hex)); r.append("]"); if (files[i]->oldest_blob_file_number != kInvalidBlobFileNumber) { r.append(" blob_file:"); AppendNumberTo(&r, files[i]->oldest_blob_file_number); } if (print_stats) { r.append("("); r.append(ToString( files[i]->stats.num_reads_sampled.load(std::memory_order_relaxed))); r.append(")"); } r.append("\n"); } } const auto& blob_files = storage_info_.GetBlobFiles(); if (!blob_files.empty()) { r.append("--- blob files --- version# "); AppendNumberTo(&r, version_number_); r.append(" ---\n"); for (const auto& pair : blob_files) { const auto& blob_file_meta = pair.second; assert(blob_file_meta); r.append(blob_file_meta->DebugString()); r.push_back('\n'); } } return r; } // this is used to batch writes to the manifest file struct VersionSet::ManifestWriter { Status status; bool done; InstrumentedCondVar cv; ColumnFamilyData* cfd; const MutableCFOptions mutable_cf_options; const autovector& edit_list; explicit ManifestWriter(InstrumentedMutex* mu, ColumnFamilyData* _cfd, const MutableCFOptions& cf_options, const autovector& e) : done(false), cv(mu), cfd(_cfd), mutable_cf_options(cf_options), edit_list(e) {} }; Status AtomicGroupReadBuffer::AddEdit(VersionEdit* edit) { assert(edit); if (edit->is_in_atomic_group_) { TEST_SYNC_POINT("AtomicGroupReadBuffer::AddEdit:AtomicGroup"); if (replay_buffer_.empty()) { replay_buffer_.resize(edit->remaining_entries_ + 1); TEST_SYNC_POINT_CALLBACK( "AtomicGroupReadBuffer::AddEdit:FirstInAtomicGroup", edit); } read_edits_in_atomic_group_++; if (read_edits_in_atomic_group_ + edit->remaining_entries_ != static_cast(replay_buffer_.size())) { TEST_SYNC_POINT_CALLBACK( "AtomicGroupReadBuffer::AddEdit:IncorrectAtomicGroupSize", edit); return Status::Corruption("corrupted atomic group"); } replay_buffer_[read_edits_in_atomic_group_ - 1] = *edit; if (read_edits_in_atomic_group_ == replay_buffer_.size()) { TEST_SYNC_POINT_CALLBACK( "AtomicGroupReadBuffer::AddEdit:LastInAtomicGroup", edit); return Status::OK(); } return Status::OK(); } // A normal edit. if (!replay_buffer().empty()) { TEST_SYNC_POINT_CALLBACK( "AtomicGroupReadBuffer::AddEdit:AtomicGroupMixedWithNormalEdits", edit); return Status::Corruption("corrupted atomic group"); } return Status::OK(); } bool AtomicGroupReadBuffer::IsFull() const { return read_edits_in_atomic_group_ == replay_buffer_.size(); } bool AtomicGroupReadBuffer::IsEmpty() const { return replay_buffer_.empty(); } void AtomicGroupReadBuffer::Clear() { read_edits_in_atomic_group_ = 0; replay_buffer_.clear(); } VersionSet::VersionSet(const std::string& dbname, const ImmutableDBOptions* _db_options, const FileOptions& storage_options, Cache* table_cache, WriteBufferManager* write_buffer_manager, WriteController* write_controller, BlockCacheTracer* const block_cache_tracer) : column_family_set_(new ColumnFamilySet( dbname, _db_options, storage_options, table_cache, write_buffer_manager, write_controller, block_cache_tracer)), env_(_db_options->env), fs_(_db_options->fs.get()), dbname_(dbname), db_options_(_db_options), next_file_number_(2), manifest_file_number_(0), // Filled by Recover() options_file_number_(0), pending_manifest_file_number_(0), last_sequence_(0), last_allocated_sequence_(0), last_published_sequence_(0), prev_log_number_(0), current_version_number_(0), manifest_file_size_(0), file_options_(storage_options), block_cache_tracer_(block_cache_tracer) {} VersionSet::~VersionSet() { // we need to delete column_family_set_ because its destructor depends on // VersionSet Cache* table_cache = column_family_set_->get_table_cache(); column_family_set_.reset(); for (auto& file : obsolete_files_) { if (file.metadata->table_reader_handle) { table_cache->Release(file.metadata->table_reader_handle); TableCache::Evict(table_cache, file.metadata->fd.GetNumber()); } file.DeleteMetadata(); } obsolete_files_.clear(); } void VersionSet::Reset() { if (column_family_set_) { Cache* table_cache = column_family_set_->get_table_cache(); WriteBufferManager* wbm = column_family_set_->write_buffer_manager(); WriteController* wc = column_family_set_->write_controller(); column_family_set_.reset(new ColumnFamilySet(dbname_, db_options_, file_options_, table_cache, wbm, wc, block_cache_tracer_)); } db_id_.clear(); next_file_number_.store(2); min_log_number_to_keep_2pc_.store(0); manifest_file_number_ = 0; options_file_number_ = 0; pending_manifest_file_number_ = 0; last_sequence_.store(0); last_allocated_sequence_.store(0); last_published_sequence_.store(0); prev_log_number_ = 0; descriptor_log_.reset(); current_version_number_ = 0; manifest_writers_.clear(); manifest_file_size_ = 0; obsolete_files_.clear(); obsolete_manifests_.clear(); } void VersionSet::AppendVersion(ColumnFamilyData* column_family_data, Version* v) { // compute new compaction score v->storage_info()->ComputeCompactionScore( *column_family_data->ioptions(), *column_family_data->GetLatestMutableCFOptions()); // Mark v finalized v->storage_info_.SetFinalized(); // Make "v" current assert(v->refs_ == 0); Version* current = column_family_data->current(); assert(v != current); if (current != nullptr) { assert(current->refs_ > 0); current->Unref(); } column_family_data->SetCurrent(v); v->Ref(); // Append to linked list v->prev_ = column_family_data->dummy_versions()->prev_; v->next_ = column_family_data->dummy_versions(); v->prev_->next_ = v; v->next_->prev_ = v; } Status VersionSet::ProcessManifestWrites( std::deque& writers, InstrumentedMutex* mu, FSDirectory* db_directory, bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options) { assert(!writers.empty()); ManifestWriter& first_writer = writers.front(); ManifestWriter* last_writer = &first_writer; assert(!manifest_writers_.empty()); assert(manifest_writers_.front() == &first_writer); autovector batch_edits; autovector versions; autovector mutable_cf_options_ptrs; std::vector> builder_guards; if (first_writer.edit_list.front()->IsColumnFamilyManipulation()) { // No group commits for column family add or drop LogAndApplyCFHelper(first_writer.edit_list.front()); batch_edits.push_back(first_writer.edit_list.front()); } else { auto it = manifest_writers_.cbegin(); size_t group_start = std::numeric_limits::max(); while (it != manifest_writers_.cend()) { if ((*it)->edit_list.front()->IsColumnFamilyManipulation()) { // no group commits for column family add or drop break; } last_writer = *(it++); assert(last_writer != nullptr); assert(last_writer->cfd != nullptr); if (last_writer->cfd->IsDropped()) { // If we detect a dropped CF at this point, and the corresponding // version edits belong to an atomic group, then we need to find out // the preceding version edits in the same atomic group, and update // their `remaining_entries_` member variable because we are NOT going // to write the version edits' of dropped CF to the MANIFEST. If we // don't update, then Recover can report corrupted atomic group because // the `remaining_entries_` do not match. if (!batch_edits.empty()) { if (batch_edits.back()->is_in_atomic_group_ && batch_edits.back()->remaining_entries_ > 0) { assert(group_start < batch_edits.size()); const auto& edit_list = last_writer->edit_list; size_t k = 0; while (k < edit_list.size()) { if (!edit_list[k]->is_in_atomic_group_) { break; } else if (edit_list[k]->remaining_entries_ == 0) { ++k; break; } ++k; } for (auto i = group_start; i < batch_edits.size(); ++i) { assert(static_cast(k) <= batch_edits.back()->remaining_entries_); batch_edits[i]->remaining_entries_ -= static_cast(k); } } } continue; } // We do a linear search on versions because versions is small. // TODO(yanqin) maybe consider unordered_map Version* version = nullptr; VersionBuilder* builder = nullptr; for (int i = 0; i != static_cast(versions.size()); ++i) { uint32_t cf_id = last_writer->cfd->GetID(); if (versions[i]->cfd()->GetID() == cf_id) { version = versions[i]; assert(!builder_guards.empty() && builder_guards.size() == versions.size()); builder = builder_guards[i]->version_builder(); TEST_SYNC_POINT_CALLBACK( "VersionSet::ProcessManifestWrites:SameColumnFamily", &cf_id); break; } } if (version == nullptr) { version = new Version(last_writer->cfd, this, file_options_, last_writer->mutable_cf_options, current_version_number_++); versions.push_back(version); mutable_cf_options_ptrs.push_back(&last_writer->mutable_cf_options); builder_guards.emplace_back( new BaseReferencedVersionBuilder(last_writer->cfd)); builder = builder_guards.back()->version_builder(); } assert(builder != nullptr); // make checker happy for (const auto& e : last_writer->edit_list) { if (e->is_in_atomic_group_) { if (batch_edits.empty() || !batch_edits.back()->is_in_atomic_group_ || (batch_edits.back()->is_in_atomic_group_ && batch_edits.back()->remaining_entries_ == 0)) { group_start = batch_edits.size(); } } else if (group_start != std::numeric_limits::max()) { group_start = std::numeric_limits::max(); } Status s = LogAndApplyHelper(last_writer->cfd, builder, e, mu); if (!s.ok()) { // free up the allocated memory for (auto v : versions) { delete v; } return s; } batch_edits.push_back(e); } } for (int i = 0; i < static_cast(versions.size()); ++i) { assert(!builder_guards.empty() && builder_guards.size() == versions.size()); auto* builder = builder_guards[i]->version_builder(); Status s = builder->SaveTo(versions[i]->storage_info()); if (!s.ok()) { // free up the allocated memory for (auto v : versions) { delete v; } return s; } } } #ifndef NDEBUG // Verify that version edits of atomic groups have correct // remaining_entries_. size_t k = 0; while (k < batch_edits.size()) { while (k < batch_edits.size() && !batch_edits[k]->is_in_atomic_group_) { ++k; } if (k == batch_edits.size()) { break; } size_t i = k; while (i < batch_edits.size()) { if (!batch_edits[i]->is_in_atomic_group_) { break; } assert(i - k + batch_edits[i]->remaining_entries_ == batch_edits[k]->remaining_entries_); if (batch_edits[i]->remaining_entries_ == 0) { ++i; break; } ++i; } assert(batch_edits[i - 1]->is_in_atomic_group_); assert(0 == batch_edits[i - 1]->remaining_entries_); std::vector tmp; for (size_t j = k; j != i; ++j) { tmp.emplace_back(batch_edits[j]); } TEST_SYNC_POINT_CALLBACK( "VersionSet::ProcessManifestWrites:CheckOneAtomicGroup", &tmp); k = i; } #endif // NDEBUG assert(pending_manifest_file_number_ == 0); if (!descriptor_log_ || manifest_file_size_ > db_options_->max_manifest_file_size) { TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:BeforeNewManifest"); new_descriptor_log = true; } else { pending_manifest_file_number_ = manifest_file_number_; } // Local cached copy of state variable(s). WriteCurrentStateToManifest() // reads its content after releasing db mutex to avoid race with // SwitchMemtable(). std::unordered_map curr_state; if (new_descriptor_log) { pending_manifest_file_number_ = NewFileNumber(); batch_edits.back()->SetNextFile(next_file_number_.load()); // if we are writing out new snapshot make sure to persist max column // family. if (column_family_set_->GetMaxColumnFamily() > 0) { first_writer.edit_list.front()->SetMaxColumnFamily( column_family_set_->GetMaxColumnFamily()); } for (const auto* cfd : *column_family_set_) { assert(curr_state.find(cfd->GetID()) == curr_state.end()); curr_state[cfd->GetID()] = {cfd->GetLogNumber()}; } } uint64_t new_manifest_file_size = 0; Status s; IOStatus io_s; { FileOptions opt_file_opts = fs_->OptimizeForManifestWrite(file_options_); mu->Unlock(); TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifest"); if (!first_writer.edit_list.front()->IsColumnFamilyManipulation()) { for (int i = 0; i < static_cast(versions.size()); ++i) { assert(!builder_guards.empty() && builder_guards.size() == versions.size()); assert(!mutable_cf_options_ptrs.empty() && builder_guards.size() == versions.size()); ColumnFamilyData* cfd = versions[i]->cfd_; s = builder_guards[i]->version_builder()->LoadTableHandlers( cfd->internal_stats(), 1 /* max_threads */, true /* prefetch_index_and_filter_in_cache */, false /* is_initial_load */, mutable_cf_options_ptrs[i]->prefix_extractor.get(), MaxFileSizeForL0MetaPin(*mutable_cf_options_ptrs[i])); if (!s.ok()) { if (db_options_->paranoid_checks) { break; } s = Status::OK(); } } } if (s.ok() && new_descriptor_log) { // This is fine because everything inside of this block is serialized -- // only one thread can be here at the same time // create new manifest file ROCKS_LOG_INFO(db_options_->info_log, "Creating manifest %" PRIu64 "\n", pending_manifest_file_number_); std::string descriptor_fname = DescriptorFileName(dbname_, pending_manifest_file_number_); std::unique_ptr descriptor_file; io_s = NewWritableFile(fs_, descriptor_fname, &descriptor_file, opt_file_opts); if (io_s.ok()) { descriptor_file->SetPreallocationBlockSize( db_options_->manifest_preallocation_size); std::unique_ptr file_writer(new WritableFileWriter( std::move(descriptor_file), descriptor_fname, opt_file_opts, env_, nullptr, db_options_->listeners)); descriptor_log_.reset( new log::Writer(std::move(file_writer), 0, false)); s = WriteCurrentStateToManifest(curr_state, descriptor_log_.get(), io_s); } else { s = io_s; } } if (s.ok()) { if (!first_writer.edit_list.front()->IsColumnFamilyManipulation()) { for (int i = 0; i < static_cast(versions.size()); ++i) { versions[i]->PrepareApply(*mutable_cf_options_ptrs[i], true); } } // Write new records to MANIFEST log #ifndef NDEBUG size_t idx = 0; #endif for (auto& e : batch_edits) { std::string record; if (!e->EncodeTo(&record)) { s = Status::Corruption("Unable to encode VersionEdit:" + e->DebugString(true)); break; } TEST_KILL_RANDOM("VersionSet::LogAndApply:BeforeAddRecord", rocksdb_kill_odds * REDUCE_ODDS2); #ifndef NDEBUG if (batch_edits.size() > 1 && batch_edits.size() - 1 == idx) { TEST_SYNC_POINT_CALLBACK( "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0", nullptr); TEST_SYNC_POINT( "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:1"); } ++idx; #endif /* !NDEBUG */ io_s = descriptor_log_->AddRecord(record); if (!io_s.ok()) { s = io_s; break; } } if (s.ok()) { io_s = SyncManifest(env_, db_options_, descriptor_log_->file()); TEST_SYNC_POINT_CALLBACK( "VersionSet::ProcessManifestWrites:AfterSyncManifest", &io_s); } if (!io_s.ok()) { s = io_s; ROCKS_LOG_ERROR(db_options_->info_log, "MANIFEST write %s\n", s.ToString().c_str()); } } // If we just created a new descriptor file, install it by writing a // new CURRENT file that points to it. if (s.ok() && new_descriptor_log) { io_s = SetCurrentFile(fs_, dbname_, pending_manifest_file_number_, db_directory); if (!io_s.ok()) { s = io_s; } TEST_SYNC_POINT("VersionSet::ProcessManifestWrites:AfterNewManifest"); } if (s.ok()) { // find offset in manifest file where this version is stored. new_manifest_file_size = descriptor_log_->file()->GetFileSize(); } if (first_writer.edit_list.front()->is_column_family_drop_) { TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:0"); TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:1"); TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:2"); } LogFlush(db_options_->info_log); TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestDone"); mu->Lock(); } if (!io_s.ok()) { if (io_status_.ok()) { io_status_ = io_s; } } else if (!io_status_.ok()) { io_status_ = io_s; } // Append the old manifest file to the obsolete_manifest_ list to be deleted // by PurgeObsoleteFiles later. if (s.ok() && new_descriptor_log) { obsolete_manifests_.emplace_back( DescriptorFileName("", manifest_file_number_)); } // Install the new versions if (s.ok()) { if (first_writer.edit_list.front()->is_column_family_add_) { assert(batch_edits.size() == 1); assert(new_cf_options != nullptr); CreateColumnFamily(*new_cf_options, first_writer.edit_list.front()); } else if (first_writer.edit_list.front()->is_column_family_drop_) { assert(batch_edits.size() == 1); first_writer.cfd->SetDropped(); first_writer.cfd->UnrefAndTryDelete(); } else { // Each version in versions corresponds to a column family. // For each column family, update its log number indicating that logs // with number smaller than this should be ignored. for (const auto version : versions) { uint64_t max_log_number_in_batch = 0; uint32_t cf_id = version->cfd_->GetID(); for (const auto& e : batch_edits) { if (e->has_log_number_ && e->column_family_ == cf_id) { max_log_number_in_batch = std::max(max_log_number_in_batch, e->log_number_); } } if (max_log_number_in_batch != 0) { assert(version->cfd_->GetLogNumber() <= max_log_number_in_batch); version->cfd_->SetLogNumber(max_log_number_in_batch); } } uint64_t last_min_log_number_to_keep = 0; for (auto& e : batch_edits) { if (e->has_min_log_number_to_keep_) { last_min_log_number_to_keep = std::max(last_min_log_number_to_keep, e->min_log_number_to_keep_); } } if (last_min_log_number_to_keep != 0) { // Should only be set in 2PC mode. MarkMinLogNumberToKeep2PC(last_min_log_number_to_keep); } for (int i = 0; i < static_cast(versions.size()); ++i) { ColumnFamilyData* cfd = versions[i]->cfd_; AppendVersion(cfd, versions[i]); } } manifest_file_number_ = pending_manifest_file_number_; manifest_file_size_ = new_manifest_file_size; prev_log_number_ = first_writer.edit_list.front()->prev_log_number_; } else { std::string version_edits; for (auto& e : batch_edits) { version_edits += ("\n" + e->DebugString(true)); } ROCKS_LOG_ERROR(db_options_->info_log, "Error in committing version edit to MANIFEST: %s", version_edits.c_str()); for (auto v : versions) { delete v; } // If manifest append failed for whatever reason, the file could be // corrupted. So we need to force the next version update to start a // new manifest file. descriptor_log_.reset(); if (new_descriptor_log) { ROCKS_LOG_INFO(db_options_->info_log, "Deleting manifest %" PRIu64 " current manifest %" PRIu64 "\n", manifest_file_number_, pending_manifest_file_number_); env_->DeleteFile( DescriptorFileName(dbname_, pending_manifest_file_number_)); } } pending_manifest_file_number_ = 0; // wake up all the waiting writers while (true) { ManifestWriter* ready = manifest_writers_.front(); manifest_writers_.pop_front(); bool need_signal = true; for (const auto& w : writers) { if (&w == ready) { need_signal = false; break; } } ready->status = s; ready->done = true; if (need_signal) { ready->cv.Signal(); } if (ready == last_writer) { break; } } if (!manifest_writers_.empty()) { manifest_writers_.front()->cv.Signal(); } return s; } // 'datas' is gramatically incorrect. We still use this notation to indicate // that this variable represents a collection of column_family_data. Status VersionSet::LogAndApply( const autovector& column_family_datas, const autovector& mutable_cf_options_list, const autovector>& edit_lists, InstrumentedMutex* mu, FSDirectory* db_directory, bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options) { mu->AssertHeld(); int num_edits = 0; for (const auto& elist : edit_lists) { num_edits += static_cast(elist.size()); } if (num_edits == 0) { return Status::OK(); } else if (num_edits > 1) { #ifndef NDEBUG for (const auto& edit_list : edit_lists) { for (const auto& edit : edit_list) { assert(!edit->IsColumnFamilyManipulation()); } } #endif /* ! NDEBUG */ } int num_cfds = static_cast(column_family_datas.size()); if (num_cfds == 1 && column_family_datas[0] == nullptr) { assert(edit_lists.size() == 1 && edit_lists[0].size() == 1); assert(edit_lists[0][0]->is_column_family_add_); assert(new_cf_options != nullptr); } std::deque writers; if (num_cfds > 0) { assert(static_cast(num_cfds) == mutable_cf_options_list.size()); assert(static_cast(num_cfds) == edit_lists.size()); } for (int i = 0; i < num_cfds; ++i) { writers.emplace_back(mu, column_family_datas[i], *mutable_cf_options_list[i], edit_lists[i]); manifest_writers_.push_back(&writers[i]); } assert(!writers.empty()); ManifestWriter& first_writer = writers.front(); while (!first_writer.done && &first_writer != manifest_writers_.front()) { first_writer.cv.Wait(); } if (first_writer.done) { // All non-CF-manipulation operations can be grouped together and committed // to MANIFEST. They should all have finished. The status code is stored in // the first manifest writer. #ifndef NDEBUG for (const auto& writer : writers) { assert(writer.done); } #endif /* !NDEBUG */ return first_writer.status; } int num_undropped_cfds = 0; for (auto cfd : column_family_datas) { // if cfd == nullptr, it is a column family add. if (cfd == nullptr || !cfd->IsDropped()) { ++num_undropped_cfds; } } if (0 == num_undropped_cfds) { for (int i = 0; i != num_cfds; ++i) { manifest_writers_.pop_front(); } // Notify new head of manifest write queue. if (!manifest_writers_.empty()) { manifest_writers_.front()->cv.Signal(); } return Status::ColumnFamilyDropped(); } return ProcessManifestWrites(writers, mu, db_directory, new_descriptor_log, new_cf_options); } void VersionSet::LogAndApplyCFHelper(VersionEdit* edit) { assert(edit->IsColumnFamilyManipulation()); edit->SetNextFile(next_file_number_.load()); // The log might have data that is not visible to memtbale and hence have not // updated the last_sequence_ yet. It is also possible that the log has is // expecting some new data that is not written yet. Since LastSequence is an // upper bound on the sequence, it is ok to record // last_allocated_sequence_ as the last sequence. edit->SetLastSequence(db_options_->two_write_queues ? last_allocated_sequence_ : last_sequence_); if (edit->is_column_family_drop_) { // if we drop column family, we have to make sure to save max column family, // so that we don't reuse existing ID edit->SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily()); } } Status VersionSet::LogAndApplyHelper(ColumnFamilyData* cfd, VersionBuilder* builder, VersionEdit* edit, InstrumentedMutex* mu) { #ifdef NDEBUG (void)cfd; #endif mu->AssertHeld(); assert(!edit->IsColumnFamilyManipulation()); if (edit->has_log_number_) { assert(edit->log_number_ >= cfd->GetLogNumber()); assert(edit->log_number_ < next_file_number_.load()); } if (!edit->has_prev_log_number_) { edit->SetPrevLogNumber(prev_log_number_); } edit->SetNextFile(next_file_number_.load()); // The log might have data that is not visible to memtbale and hence have not // updated the last_sequence_ yet. It is also possible that the log has is // expecting some new data that is not written yet. Since LastSequence is an // upper bound on the sequence, it is ok to record // last_allocated_sequence_ as the last sequence. edit->SetLastSequence(db_options_->two_write_queues ? last_allocated_sequence_ : last_sequence_); Status s = builder->Apply(edit); return s; } Status VersionSet::ApplyOneVersionEditToBuilder( VersionEdit& edit, const std::unordered_map& name_to_options, std::unordered_map& column_families_not_found, std::unordered_map>& builders, VersionEditParams* version_edit_params) { // Not found means that user didn't supply that column // family option AND we encountered column family add // record. Once we encounter column family drop record, // we will delete the column family from // column_families_not_found. bool cf_in_not_found = (column_families_not_found.find(edit.column_family_) != column_families_not_found.end()); // in builders means that user supplied that column family // option AND that we encountered column family add record bool cf_in_builders = builders.find(edit.column_family_) != builders.end(); // they can't both be true assert(!(cf_in_not_found && cf_in_builders)); ColumnFamilyData* cfd = nullptr; if (edit.is_column_family_add_) { if (cf_in_builders || cf_in_not_found) { return Status::Corruption( "Manifest adding the same column family twice: " + edit.column_family_name_); } auto cf_options = name_to_options.find(edit.column_family_name_); // implicitly add persistent_stats column family without requiring user // to specify bool is_persistent_stats_column_family = edit.column_family_name_.compare(kPersistentStatsColumnFamilyName) == 0; if (cf_options == name_to_options.end() && !is_persistent_stats_column_family) { column_families_not_found.insert( {edit.column_family_, edit.column_family_name_}); } else { // recover persistent_stats CF from a DB that already contains it if (is_persistent_stats_column_family) { ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); cfd = CreateColumnFamily(cfo, &edit); } else { cfd = CreateColumnFamily(cf_options->second, &edit); } cfd->set_initialized(); builders.insert(std::make_pair( edit.column_family_, std::unique_ptr( new BaseReferencedVersionBuilder(cfd)))); } } else if (edit.is_column_family_drop_) { if (cf_in_builders) { auto builder = builders.find(edit.column_family_); assert(builder != builders.end()); builders.erase(builder); cfd = column_family_set_->GetColumnFamily(edit.column_family_); assert(cfd != nullptr); if (cfd->UnrefAndTryDelete()) { cfd = nullptr; } else { // who else can have reference to cfd!? assert(false); } } else if (cf_in_not_found) { column_families_not_found.erase(edit.column_family_); } else { return Status::Corruption( "Manifest - dropping non-existing column family"); } } else if (!cf_in_not_found) { if (!cf_in_builders) { return Status::Corruption( "Manifest record referencing unknown column family"); } cfd = column_family_set_->GetColumnFamily(edit.column_family_); // this should never happen since cf_in_builders is true assert(cfd != nullptr); // if it is not column family add or column family drop, // then it's a file add/delete, which should be forwarded // to builder auto builder = builders.find(edit.column_family_); assert(builder != builders.end()); Status s = builder->second->version_builder()->Apply(&edit); if (!s.ok()) { return s; } } return ExtractInfoFromVersionEdit(cfd, edit, version_edit_params); } Status VersionSet::ExtractInfoFromVersionEdit( ColumnFamilyData* cfd, const VersionEdit& from_edit, VersionEditParams* version_edit_params) { if (cfd != nullptr) { if (from_edit.has_db_id_) { version_edit_params->SetDBId(from_edit.db_id_); } if (from_edit.has_log_number_) { if (cfd->GetLogNumber() > from_edit.log_number_) { ROCKS_LOG_WARN( db_options_->info_log, "MANIFEST corruption detected, but ignored - Log numbers in " "records NOT monotonically increasing"); } else { cfd->SetLogNumber(from_edit.log_number_); version_edit_params->SetLogNumber(from_edit.log_number_); } } if (from_edit.has_comparator_ && from_edit.comparator_ != cfd->user_comparator()->Name()) { return Status::InvalidArgument( cfd->user_comparator()->Name(), "does not match existing comparator " + from_edit.comparator_); } } if (from_edit.has_prev_log_number_) { version_edit_params->SetPrevLogNumber(from_edit.prev_log_number_); } if (from_edit.has_next_file_number_) { version_edit_params->SetNextFile(from_edit.next_file_number_); } if (from_edit.has_max_column_family_) { version_edit_params->SetMaxColumnFamily(from_edit.max_column_family_); } if (from_edit.has_min_log_number_to_keep_) { version_edit_params->min_log_number_to_keep_ = std::max(version_edit_params->min_log_number_to_keep_, from_edit.min_log_number_to_keep_); } if (from_edit.has_last_sequence_) { version_edit_params->SetLastSequence(from_edit.last_sequence_); } return Status::OK(); } Status VersionSet::GetCurrentManifestPath(const std::string& dbname, FileSystem* fs, std::string* manifest_path, uint64_t* manifest_file_number) { assert(fs != nullptr); assert(manifest_path != nullptr); assert(manifest_file_number != nullptr); std::string fname; Status s = ReadFileToString(fs, CurrentFileName(dbname), &fname); if (!s.ok()) { return s; } if (fname.empty() || fname.back() != '\n') { return Status::Corruption("CURRENT file does not end with newline"); } // remove the trailing '\n' fname.resize(fname.size() - 1); FileType type; bool parse_ok = ParseFileName(fname, manifest_file_number, &type); if (!parse_ok || type != kDescriptorFile) { return Status::Corruption("CURRENT file corrupted"); } *manifest_path = dbname; if (dbname.back() != '/') { manifest_path->push_back('/'); } manifest_path->append(fname); return Status::OK(); } Status VersionSet::ReadAndRecover( log::Reader& reader, AtomicGroupReadBuffer* read_buffer, const std::unordered_map& name_to_options, std::unordered_map& column_families_not_found, std::unordered_map>& builders, Status* log_read_status, VersionEditParams* version_edit_params, std::string* db_id) { assert(read_buffer != nullptr); assert(log_read_status != nullptr); Status s; Slice record; std::string scratch; size_t recovered_edits = 0; while (s.ok() && reader.ReadRecord(&record, &scratch) && log_read_status->ok()) { VersionEdit edit; s = edit.DecodeFrom(record); if (!s.ok()) { break; } if (edit.has_db_id_) { db_id_ = edit.GetDbId(); if (db_id != nullptr) { db_id->assign(edit.GetDbId()); } } s = read_buffer->AddEdit(&edit); if (!s.ok()) { break; } if (edit.is_in_atomic_group_) { if (read_buffer->IsFull()) { // Apply edits in an atomic group when we have read all edits in the // group. for (auto& e : read_buffer->replay_buffer()) { s = ApplyOneVersionEditToBuilder(e, name_to_options, column_families_not_found, builders, version_edit_params); if (!s.ok()) { break; } recovered_edits++; } if (!s.ok()) { break; } read_buffer->Clear(); } } else { // Apply a normal edit immediately. s = ApplyOneVersionEditToBuilder(edit, name_to_options, column_families_not_found, builders, version_edit_params); if (s.ok()) { recovered_edits++; } } } if (!log_read_status->ok()) { s = *log_read_status; } if (!s.ok()) { // Clear the buffer if we fail to decode/apply an edit. read_buffer->Clear(); } TEST_SYNC_POINT_CALLBACK("VersionSet::ReadAndRecover:RecoveredEdits", &recovered_edits); return s; } Status VersionSet::Recover( const std::vector& column_families, bool read_only, std::string* db_id) { std::unordered_map cf_name_to_options; for (const auto& cf : column_families) { cf_name_to_options.emplace(cf.name, cf.options); } // keeps track of column families in manifest that were not found in // column families parameters. if those column families are not dropped // by subsequent manifest records, Recover() will return failure status std::unordered_map column_families_not_found; // Read "CURRENT" file, which contains a pointer to the current manifest file std::string manifest_path; Status s = GetCurrentManifestPath(dbname_, fs_, &manifest_path, &manifest_file_number_); if (!s.ok()) { return s; } ROCKS_LOG_INFO(db_options_->info_log, "Recovering from manifest file: %s\n", manifest_path.c_str()); std::unique_ptr manifest_file_reader; { std::unique_ptr manifest_file; s = fs_->NewSequentialFile(manifest_path, fs_->OptimizeForManifestRead(file_options_), &manifest_file, nullptr); if (!s.ok()) { return s; } manifest_file_reader.reset( new SequentialFileReader(std::move(manifest_file), manifest_path, db_options_->log_readahead_size)); } VersionBuilderMap builders; // add default column family auto default_cf_iter = cf_name_to_options.find(kDefaultColumnFamilyName); if (default_cf_iter == cf_name_to_options.end()) { return Status::InvalidArgument("Default column family not specified"); } VersionEdit default_cf_edit; default_cf_edit.AddColumnFamily(kDefaultColumnFamilyName); default_cf_edit.SetColumnFamily(0); ColumnFamilyData* default_cfd = CreateColumnFamily(default_cf_iter->second, &default_cf_edit); // In recovery, nobody else can access it, so it's fine to set it to be // initialized earlier. default_cfd->set_initialized(); builders.insert( std::make_pair(0, std::unique_ptr( new BaseReferencedVersionBuilder(default_cfd)))); uint64_t current_manifest_file_size = 0; VersionEditParams version_edit_params; { VersionSet::LogReporter reporter; Status log_read_status; reporter.status = &log_read_status; log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter, true /* checksum */, 0 /* log_number */); AtomicGroupReadBuffer read_buffer; s = ReadAndRecover(reader, &read_buffer, cf_name_to_options, column_families_not_found, builders, &log_read_status, &version_edit_params, db_id); current_manifest_file_size = reader.GetReadOffset(); assert(current_manifest_file_size != 0); } if (s.ok()) { if (!version_edit_params.has_next_file_number_) { s = Status::Corruption("no meta-nextfile entry in descriptor"); } else if (!version_edit_params.has_log_number_) { s = Status::Corruption("no meta-lognumber entry in descriptor"); } else if (!version_edit_params.has_last_sequence_) { s = Status::Corruption("no last-sequence-number entry in descriptor"); } if (!version_edit_params.has_prev_log_number_) { version_edit_params.SetPrevLogNumber(0); } column_family_set_->UpdateMaxColumnFamily( version_edit_params.max_column_family_); // When reading DB generated using old release, min_log_number_to_keep=0. // All log files will be scanned for potential prepare entries. MarkMinLogNumberToKeep2PC(version_edit_params.min_log_number_to_keep_); MarkFileNumberUsed(version_edit_params.prev_log_number_); MarkFileNumberUsed(version_edit_params.log_number_); } // there were some column families in the MANIFEST that weren't specified // in the argument. This is OK in read_only mode if (read_only == false && !column_families_not_found.empty()) { std::string list_of_not_found; for (const auto& cf : column_families_not_found) { list_of_not_found += ", " + cf.second; } list_of_not_found = list_of_not_found.substr(2); s = Status::InvalidArgument( "You have to open all column families. Column families not opened: " + list_of_not_found); } if (s.ok()) { for (auto cfd : *column_family_set_) { assert(builders.count(cfd->GetID()) > 0); auto* builder = builders[cfd->GetID()]->version_builder(); if (!builder->CheckConsistencyForNumLevels()) { s = Status::InvalidArgument( "db has more levels than options.num_levels"); break; } } } if (s.ok()) { for (auto cfd : *column_family_set_) { if (cfd->IsDropped()) { continue; } if (read_only) { cfd->table_cache()->SetTablesAreImmortal(); } assert(cfd->initialized()); auto builders_iter = builders.find(cfd->GetID()); assert(builders_iter != builders.end()); auto builder = builders_iter->second->version_builder(); // unlimited table cache. Pre-load table handle now. // Need to do it out of the mutex. s = builder->LoadTableHandlers( cfd->internal_stats(), db_options_->max_file_opening_threads, false /* prefetch_index_and_filter_in_cache */, true /* is_initial_load */, cfd->GetLatestMutableCFOptions()->prefix_extractor.get(), MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions())); if (!s.ok()) { if (db_options_->paranoid_checks) { return s; } s = Status::OK(); } Version* v = new Version(cfd, this, file_options_, *cfd->GetLatestMutableCFOptions(), current_version_number_++); s = builder->SaveTo(v->storage_info()); if (!s.ok()) { delete v; return s; } // Install recovered version v->PrepareApply(*cfd->GetLatestMutableCFOptions(), !(db_options_->skip_stats_update_on_db_open)); AppendVersion(cfd, v); } manifest_file_size_ = current_manifest_file_size; next_file_number_.store(version_edit_params.next_file_number_ + 1); last_allocated_sequence_ = version_edit_params.last_sequence_; last_published_sequence_ = version_edit_params.last_sequence_; last_sequence_ = version_edit_params.last_sequence_; prev_log_number_ = version_edit_params.prev_log_number_; ROCKS_LOG_INFO( db_options_->info_log, "Recovered from manifest file:%s succeeded," "manifest_file_number is %" PRIu64 ", next_file_number is %" PRIu64 ", last_sequence is %" PRIu64 ", log_number is %" PRIu64 ",prev_log_number is %" PRIu64 ",max_column_family is %" PRIu32 ",min_log_number_to_keep is %" PRIu64 "\n", manifest_path.c_str(), manifest_file_number_, next_file_number_.load(), last_sequence_.load(), version_edit_params.log_number_, prev_log_number_, column_family_set_->GetMaxColumnFamily(), min_log_number_to_keep_2pc()); for (auto cfd : *column_family_set_) { if (cfd->IsDropped()) { continue; } ROCKS_LOG_INFO(db_options_->info_log, "Column family [%s] (ID %" PRIu32 "), log number is %" PRIu64 "\n", cfd->GetName().c_str(), cfd->GetID(), cfd->GetLogNumber()); } } return s; } namespace { class ManifestPicker { public: explicit ManifestPicker(const std::string& dbname, FileSystem* fs); void SeekToFirstManifest(); // REQUIRES Valid() == true std::string GetNextManifest(uint64_t* file_number, std::string* file_name); bool Valid() const { return manifest_file_iter_ != manifest_files_.end(); } const Status& status() const { return status_; } private: const std::string& dbname_; FileSystem* const fs_; // MANIFEST file names(s) std::vector manifest_files_; std::vector::const_iterator manifest_file_iter_; Status status_; }; ManifestPicker::ManifestPicker(const std::string& dbname, FileSystem* fs) : dbname_(dbname), fs_(fs) {} void ManifestPicker::SeekToFirstManifest() { assert(fs_ != nullptr); std::vector children; Status s = fs_->GetChildren(dbname_, IOOptions(), &children, /*dbg=*/nullptr); if (!s.ok()) { status_ = s; return; } for (const auto& fname : children) { uint64_t file_num = 0; FileType file_type; bool parse_ok = ParseFileName(fname, &file_num, &file_type); if (parse_ok && file_type == kDescriptorFile) { manifest_files_.push_back(fname); } } std::sort(manifest_files_.begin(), manifest_files_.end(), [](const std::string& lhs, const std::string& rhs) { uint64_t num1 = 0; uint64_t num2 = 0; FileType type1; FileType type2; bool parse_ok1 = ParseFileName(lhs, &num1, &type1); bool parse_ok2 = ParseFileName(rhs, &num2, &type2); #ifndef NDEBUG assert(parse_ok1); assert(parse_ok2); #else (void)parse_ok1; (void)parse_ok2; #endif return num1 > num2; }); manifest_file_iter_ = manifest_files_.begin(); } std::string ManifestPicker::GetNextManifest(uint64_t* number, std::string* file_name) { assert(status_.ok()); assert(Valid()); std::string ret; if (manifest_file_iter_ != manifest_files_.end()) { ret.assign(dbname_); if (ret.back() != kFilePathSeparator) { ret.push_back(kFilePathSeparator); } ret.append(*manifest_file_iter_); if (number) { FileType type; bool parse = ParseFileName(*manifest_file_iter_, number, &type); assert(type == kDescriptorFile); #ifndef NDEBUG assert(parse); #else (void)parse; #endif } if (file_name) { *file_name = *manifest_file_iter_; } ++manifest_file_iter_; } return ret; } } // namespace Status VersionSet::TryRecover( const std::vector& column_families, bool read_only, std::string* db_id, bool* has_missing_table_file) { ManifestPicker manifest_picker(dbname_, fs_); manifest_picker.SeekToFirstManifest(); Status s = manifest_picker.status(); if (!s.ok()) { return s; } if (!manifest_picker.Valid()) { return Status::Corruption("Cannot locate MANIFEST file in " + dbname_); } std::string manifest_path = manifest_picker.GetNextManifest(&manifest_file_number_, nullptr); while (!manifest_path.empty()) { s = TryRecoverFromOneManifest(manifest_path, column_families, read_only, db_id, has_missing_table_file); if (s.ok() || !manifest_picker.Valid()) { break; } Reset(); manifest_path = manifest_picker.GetNextManifest(&manifest_file_number_, nullptr); } return s; } Status VersionSet::TryRecoverFromOneManifest( const std::string& manifest_path, const std::vector& column_families, bool read_only, std::string* db_id, bool* has_missing_table_file) { ROCKS_LOG_INFO(db_options_->info_log, "Trying to recover from manifest: %s\n", manifest_path.c_str()); std::unique_ptr manifest_file_reader; Status s; { std::unique_ptr manifest_file; s = fs_->NewSequentialFile(manifest_path, fs_->OptimizeForManifestRead(file_options_), &manifest_file, nullptr); if (!s.ok()) { return s; } manifest_file_reader.reset( new SequentialFileReader(std::move(manifest_file), manifest_path, db_options_->log_readahead_size)); } assert(s.ok()); VersionSet::LogReporter reporter; reporter.status = &s; log::Reader reader(nullptr, std::move(manifest_file_reader), &reporter, /*checksum=*/true, /*log_num=*/0); VersionEditHandlerPointInTime handler_pit(read_only, column_families, const_cast(this)); handler_pit.Iterate(reader, &s, db_id); assert(nullptr != has_missing_table_file); *has_missing_table_file = handler_pit.HasMissingFiles(); return handler_pit.status(); } Status VersionSet::ListColumnFamilies(std::vector* column_families, const std::string& dbname, FileSystem* fs) { // these are just for performance reasons, not correcntes, // so we're fine using the defaults FileOptions soptions; // Read "CURRENT" file, which contains a pointer to the current manifest file std::string manifest_path; uint64_t manifest_file_number; Status s = GetCurrentManifestPath(dbname, fs, &manifest_path, &manifest_file_number); if (!s.ok()) { return s; } std::unique_ptr file_reader; { std::unique_ptr file; s = fs->NewSequentialFile(manifest_path, soptions, &file, nullptr); if (!s.ok()) { return s; } file_reader.reset(new SequentialFileReader(std::move(file), manifest_path)); } std::map column_family_names; // default column family is always implicitly there column_family_names.insert({0, kDefaultColumnFamilyName}); VersionSet::LogReporter reporter; reporter.status = &s; log::Reader reader(nullptr, std::move(file_reader), &reporter, true /* checksum */, 0 /* log_number */); Slice record; std::string scratch; while (reader.ReadRecord(&record, &scratch) && s.ok()) { VersionEdit edit; s = edit.DecodeFrom(record); if (!s.ok()) { break; } if (edit.is_column_family_add_) { if (column_family_names.find(edit.column_family_) != column_family_names.end()) { s = Status::Corruption("Manifest adding the same column family twice"); break; } column_family_names.insert( {edit.column_family_, edit.column_family_name_}); } else if (edit.is_column_family_drop_) { if (column_family_names.find(edit.column_family_) == column_family_names.end()) { s = Status::Corruption( "Manifest - dropping non-existing column family"); break; } column_family_names.erase(edit.column_family_); } } column_families->clear(); if (s.ok()) { for (const auto& iter : column_family_names) { column_families->push_back(iter.second); } } return s; } #ifndef ROCKSDB_LITE Status VersionSet::ReduceNumberOfLevels(const std::string& dbname, const Options* options, const FileOptions& file_options, int new_levels) { if (new_levels <= 1) { return Status::InvalidArgument( "Number of levels needs to be bigger than 1"); } ImmutableDBOptions db_options(*options); ColumnFamilyOptions cf_options(*options); std::shared_ptr tc(NewLRUCache(options->max_open_files - 10, options->table_cache_numshardbits)); WriteController wc(options->delayed_write_rate); WriteBufferManager wb(options->db_write_buffer_size); VersionSet versions(dbname, &db_options, file_options, tc.get(), &wb, &wc, /*block_cache_tracer=*/nullptr); Status status; std::vector dummy; ColumnFamilyDescriptor dummy_descriptor(kDefaultColumnFamilyName, ColumnFamilyOptions(*options)); dummy.push_back(dummy_descriptor); status = versions.Recover(dummy); if (!status.ok()) { return status; } Version* current_version = versions.GetColumnFamilySet()->GetDefault()->current(); auto* vstorage = current_version->storage_info(); int current_levels = vstorage->num_levels(); if (current_levels <= new_levels) { return Status::OK(); } // Make sure there are file only on one level from // (new_levels-1) to (current_levels-1) int first_nonempty_level = -1; int first_nonempty_level_filenum = 0; for (int i = new_levels - 1; i < current_levels; i++) { int file_num = vstorage->NumLevelFiles(i); if (file_num != 0) { if (first_nonempty_level < 0) { first_nonempty_level = i; first_nonempty_level_filenum = file_num; } else { char msg[255]; snprintf(msg, sizeof(msg), "Found at least two levels containing files: " "[%d:%d],[%d:%d].\n", first_nonempty_level, first_nonempty_level_filenum, i, file_num); return Status::InvalidArgument(msg); } } } // we need to allocate an array with the old number of levels size to // avoid SIGSEGV in WriteCurrentStatetoManifest() // however, all levels bigger or equal to new_levels will be empty std::vector* new_files_list = new std::vector[current_levels]; for (int i = 0; i < new_levels - 1; i++) { new_files_list[i] = vstorage->LevelFiles(i); } if (first_nonempty_level > 0) { auto& new_last_level = new_files_list[new_levels - 1]; new_last_level = vstorage->LevelFiles(first_nonempty_level); for (size_t i = 0; i < new_last_level.size(); ++i) { const FileMetaData* const meta = new_last_level[i]; assert(meta); const uint64_t file_number = meta->fd.GetNumber(); vstorage->file_locations_[file_number] = VersionStorageInfo::FileLocation(new_levels - 1, i); } } delete[] vstorage -> files_; vstorage->files_ = new_files_list; vstorage->num_levels_ = new_levels; MutableCFOptions mutable_cf_options(*options); VersionEdit ve; InstrumentedMutex dummy_mutex; InstrumentedMutexLock l(&dummy_mutex); return versions.LogAndApply( versions.GetColumnFamilySet()->GetDefault(), mutable_cf_options, &ve, &dummy_mutex, nullptr, true); } // Get the checksum information including the checksum and checksum function // name of all SST files in VersionSet. Store the information in // FileChecksumList which contains a map from file number to its checksum info. // If DB is not running, make sure call VersionSet::Recover() to load the file // metadata from Manifest to VersionSet before calling this function. Status VersionSet::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) { // Clean the previously stored checksum information if any. if (checksum_list == nullptr) { return Status::InvalidArgument("checksum_list is nullptr"); } checksum_list->reset(); for (auto cfd : *column_family_set_) { if (cfd->IsDropped() || !cfd->initialized()) { continue; } for (int level = 0; level < cfd->NumberLevels(); level++) { for (const auto& file : cfd->current()->storage_info()->LevelFiles(level)) { checksum_list->InsertOneFileChecksum(file->fd.GetNumber(), file->file_checksum, file->file_checksum_func_name); } } } return Status::OK(); } Status VersionSet::DumpManifest(Options& options, std::string& dscname, bool verbose, bool hex, bool json) { // Open the specified manifest file. std::unique_ptr file_reader; Status s; { std::unique_ptr file; const std::shared_ptr& fs = options.env->GetFileSystem(); s = fs->NewSequentialFile( dscname, fs->OptimizeForManifestRead(file_options_), &file, nullptr); if (!s.ok()) { return s; } file_reader.reset(new SequentialFileReader( std::move(file), dscname, db_options_->log_readahead_size)); } bool have_prev_log_number = false; bool have_next_file = false; bool have_last_sequence = false; uint64_t next_file = 0; uint64_t last_sequence = 0; uint64_t previous_log_number = 0; int count = 0; std::unordered_map comparators; std::unordered_map> builders; // add default column family VersionEdit default_cf_edit; default_cf_edit.AddColumnFamily(kDefaultColumnFamilyName); default_cf_edit.SetColumnFamily(0); ColumnFamilyData* default_cfd = CreateColumnFamily(ColumnFamilyOptions(options), &default_cf_edit); builders.insert( std::make_pair(0, std::unique_ptr( new BaseReferencedVersionBuilder(default_cfd)))); { VersionSet::LogReporter reporter; reporter.status = &s; log::Reader reader(nullptr, std::move(file_reader), &reporter, true /* checksum */, 0 /* log_number */); Slice record; std::string scratch; while (reader.ReadRecord(&record, &scratch) && s.ok()) { VersionEdit edit; s = edit.DecodeFrom(record); if (!s.ok()) { break; } // Write out each individual edit if (verbose && !json) { printf("%s\n", edit.DebugString(hex).c_str()); } else if (json) { printf("%s\n", edit.DebugJSON(count, hex).c_str()); } count++; bool cf_in_builders = builders.find(edit.column_family_) != builders.end(); if (edit.has_comparator_) { comparators.insert({edit.column_family_, edit.comparator_}); } ColumnFamilyData* cfd = nullptr; if (edit.is_column_family_add_) { if (cf_in_builders) { s = Status::Corruption( "Manifest adding the same column family twice"); break; } cfd = CreateColumnFamily(ColumnFamilyOptions(options), &edit); cfd->set_initialized(); builders.insert(std::make_pair( edit.column_family_, std::unique_ptr( new BaseReferencedVersionBuilder(cfd)))); } else if (edit.is_column_family_drop_) { if (!cf_in_builders) { s = Status::Corruption( "Manifest - dropping non-existing column family"); break; } auto builder_iter = builders.find(edit.column_family_); builders.erase(builder_iter); comparators.erase(edit.column_family_); cfd = column_family_set_->GetColumnFamily(edit.column_family_); assert(cfd != nullptr); cfd->UnrefAndTryDelete(); cfd = nullptr; } else { if (!cf_in_builders) { s = Status::Corruption( "Manifest record referencing unknown column family"); break; } cfd = column_family_set_->GetColumnFamily(edit.column_family_); // this should never happen since cf_in_builders is true assert(cfd != nullptr); // if it is not column family add or column family drop, // then it's a file add/delete, which should be forwarded // to builder auto builder = builders.find(edit.column_family_); assert(builder != builders.end()); s = builder->second->version_builder()->Apply(&edit); if (!s.ok()) { break; } } if (cfd != nullptr && edit.has_log_number_) { cfd->SetLogNumber(edit.log_number_); } if (edit.has_prev_log_number_) { previous_log_number = edit.prev_log_number_; have_prev_log_number = true; } if (edit.has_next_file_number_) { next_file = edit.next_file_number_; have_next_file = true; } if (edit.has_last_sequence_) { last_sequence = edit.last_sequence_; have_last_sequence = true; } if (edit.has_max_column_family_) { column_family_set_->UpdateMaxColumnFamily(edit.max_column_family_); } if (edit.has_min_log_number_to_keep_) { MarkMinLogNumberToKeep2PC(edit.min_log_number_to_keep_); } } } file_reader.reset(); if (s.ok()) { if (!have_next_file) { s = Status::Corruption("no meta-nextfile entry in descriptor"); printf("no meta-nextfile entry in descriptor"); } else if (!have_last_sequence) { printf("no last-sequence-number entry in descriptor"); s = Status::Corruption("no last-sequence-number entry in descriptor"); } if (!have_prev_log_number) { previous_log_number = 0; } } if (s.ok()) { for (auto cfd : *column_family_set_) { if (cfd->IsDropped()) { continue; } auto builders_iter = builders.find(cfd->GetID()); assert(builders_iter != builders.end()); auto builder = builders_iter->second->version_builder(); Version* v = new Version(cfd, this, file_options_, *cfd->GetLatestMutableCFOptions(), current_version_number_++); s = builder->SaveTo(v->storage_info()); v->PrepareApply(*cfd->GetLatestMutableCFOptions(), false); printf("--------------- Column family \"%s\" (ID %" PRIu32 ") --------------\n", cfd->GetName().c_str(), cfd->GetID()); printf("log number: %" PRIu64 "\n", cfd->GetLogNumber()); auto comparator = comparators.find(cfd->GetID()); if (comparator != comparators.end()) { printf("comparator: %s\n", comparator->second.c_str()); } else { printf("comparator: \n"); } printf("%s \n", v->DebugString(hex).c_str()); delete v; } next_file_number_.store(next_file + 1); last_allocated_sequence_ = last_sequence; last_published_sequence_ = last_sequence; last_sequence_ = last_sequence; prev_log_number_ = previous_log_number; printf("next_file_number %" PRIu64 " last_sequence %" PRIu64 " prev_log_number %" PRIu64 " max_column_family %" PRIu32 " min_log_number_to_keep " "%" PRIu64 "\n", next_file_number_.load(), last_sequence, previous_log_number, column_family_set_->GetMaxColumnFamily(), min_log_number_to_keep_2pc()); } return s; } #endif // ROCKSDB_LITE void VersionSet::MarkFileNumberUsed(uint64_t number) { // only called during recovery and repair which are single threaded, so this // works because there can't be concurrent calls if (next_file_number_.load(std::memory_order_relaxed) <= number) { next_file_number_.store(number + 1, std::memory_order_relaxed); } } // Called only either from ::LogAndApply which is protected by mutex or during // recovery which is single-threaded. void VersionSet::MarkMinLogNumberToKeep2PC(uint64_t number) { if (min_log_number_to_keep_2pc_.load(std::memory_order_relaxed) < number) { min_log_number_to_keep_2pc_.store(number, std::memory_order_relaxed); } } Status VersionSet::WriteCurrentStateToManifest( const std::unordered_map& curr_state, log::Writer* log, IOStatus& io_s) { // TODO: Break up into multiple records to reduce memory usage on recovery? // WARNING: This method doesn't hold a mutex!! // This is done without DB mutex lock held, but only within single-threaded // LogAndApply. Column family manipulations can only happen within LogAndApply // (the same single thread), so we're safe to iterate. assert(io_s.ok()); if (db_options_->write_dbid_to_manifest) { VersionEdit edit_for_db_id; assert(!db_id_.empty()); edit_for_db_id.SetDBId(db_id_); std::string db_id_record; if (!edit_for_db_id.EncodeTo(&db_id_record)) { return Status::Corruption("Unable to Encode VersionEdit:" + edit_for_db_id.DebugString(true)); } io_s = log->AddRecord(db_id_record); if (!io_s.ok()) { return io_s; } } for (auto cfd : *column_family_set_) { assert(cfd); if (cfd->IsDropped()) { continue; } assert(cfd->initialized()); { // Store column family info VersionEdit edit; if (cfd->GetID() != 0) { // default column family is always there, // no need to explicitly write it edit.AddColumnFamily(cfd->GetName()); edit.SetColumnFamily(cfd->GetID()); } edit.SetComparatorName( cfd->internal_comparator().user_comparator()->Name()); std::string record; if (!edit.EncodeTo(&record)) { return Status::Corruption( "Unable to Encode VersionEdit:" + edit.DebugString(true)); } io_s = log->AddRecord(record); if (!io_s.ok()) { return io_s; } } { // Save files VersionEdit edit; edit.SetColumnFamily(cfd->GetID()); assert(cfd->current()); assert(cfd->current()->storage_info()); for (int level = 0; level < cfd->NumberLevels(); level++) { for (const auto& f : cfd->current()->storage_info()->LevelFiles(level)) { edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno, f->marked_for_compaction, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->file_checksum, f->file_checksum_func_name); } } const auto& blob_files = cfd->current()->storage_info()->GetBlobFiles(); for (const auto& pair : blob_files) { const uint64_t blob_file_number = pair.first; const auto& meta = pair.second; assert(meta); assert(blob_file_number == meta->GetBlobFileNumber()); edit.AddBlobFile(blob_file_number, meta->GetTotalBlobCount(), meta->GetTotalBlobBytes(), meta->GetChecksumMethod(), meta->GetChecksumValue()); if (meta->GetGarbageBlobCount() > 0) { edit.AddBlobFileGarbage(blob_file_number, meta->GetGarbageBlobCount(), meta->GetGarbageBlobBytes()); } } const auto iter = curr_state.find(cfd->GetID()); assert(iter != curr_state.end()); uint64_t log_number = iter->second.log_number; edit.SetLogNumber(log_number); std::string record; if (!edit.EncodeTo(&record)) { return Status::Corruption( "Unable to Encode VersionEdit:" + edit.DebugString(true)); } io_s = log->AddRecord(record); if (!io_s.ok()) { return io_s; } } } return Status::OK(); } // TODO(aekmekji): in CompactionJob::GenSubcompactionBoundaries(), this // function is called repeatedly with consecutive pairs of slices. For example // if the slice list is [a, b, c, d] this function is called with arguments // (a,b) then (b,c) then (c,d). Knowing this, an optimization is possible where // we avoid doing binary search for the keys b and c twice and instead somehow // maintain state of where they first appear in the files. uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options, Version* v, const Slice& start, const Slice& end, int start_level, int end_level, TableReaderCaller caller) { const auto& icmp = v->cfd_->internal_comparator(); // pre-condition assert(icmp.Compare(start, end) <= 0); uint64_t total_full_size = 0; const auto* vstorage = v->storage_info(); const int num_non_empty_levels = vstorage->num_non_empty_levels(); end_level = (end_level == -1) ? num_non_empty_levels : std::min(end_level, num_non_empty_levels); assert(start_level <= end_level); // Outline of the optimization that uses options.files_size_error_margin. // When approximating the files total size that is used to store a keys range, // we first sum up the sizes of the files that fully fall into the range. // Then we sum up the sizes of all the files that may intersect with the range // (this includes all files in L0 as well). Then, if total_intersecting_size // is smaller than total_full_size * options.files_size_error_margin - we can // infer that the intersecting files have a sufficiently negligible // contribution to the total size, and we can approximate the storage required // for the keys in range as just half of the intersecting_files_size. // E.g., if the value of files_size_error_margin is 0.1, then the error of the // approximation is limited to only ~10% of the total size of files that fully // fall into the keys range. In such case, this helps to avoid a costly // process of binary searching the intersecting files that is required only // for a more precise calculation of the total size. autovector first_files; autovector last_files; // scan all the levels for (int level = start_level; level < end_level; ++level) { const LevelFilesBrief& files_brief = vstorage->LevelFilesBrief(level); if (files_brief.num_files == 0) { // empty level, skip exploration continue; } if (level == 0) { // level 0 files are not in sorted order, we need to iterate through // the list to compute the total bytes that require scanning, // so handle the case explicitly (similarly to first_files case) for (size_t i = 0; i < files_brief.num_files; i++) { first_files.push_back(&files_brief.files[i]); } continue; } assert(level > 0); assert(files_brief.num_files > 0); // identify the file position for start key const int idx_start = FindFileInRange(icmp, files_brief, start, 0, static_cast(files_brief.num_files - 1)); assert(static_cast(idx_start) < files_brief.num_files); // identify the file position for end key int idx_end = idx_start; if (icmp.Compare(files_brief.files[idx_end].largest_key, end) < 0) { idx_end = FindFileInRange(icmp, files_brief, end, idx_start, static_cast(files_brief.num_files - 1)); } assert(idx_end >= idx_start && static_cast(idx_end) < files_brief.num_files); // scan all files from the starting index to the ending index // (inferred from the sorted order) // first scan all the intermediate full files (excluding first and last) for (int i = idx_start + 1; i < idx_end; ++i) { uint64_t file_size = files_brief.files[i].fd.GetFileSize(); // The entire file falls into the range, so we can just take its size. assert(file_size == ApproximateSize(v, files_brief.files[i], start, end, caller)); total_full_size += file_size; } // save the first and the last files (which may be the same file), so we // can scan them later. first_files.push_back(&files_brief.files[idx_start]); if (idx_start != idx_end) { // we need to estimate size for both files, only if they are different last_files.push_back(&files_brief.files[idx_end]); } } // The sum of all file sizes that intersect the [start, end] keys range. uint64_t total_intersecting_size = 0; for (const auto* file_ptr : first_files) { total_intersecting_size += file_ptr->fd.GetFileSize(); } for (const auto* file_ptr : last_files) { total_intersecting_size += file_ptr->fd.GetFileSize(); } // Now scan all the first & last files at each level, and estimate their size. // If the total_intersecting_size is less than X% of the total_full_size - we // want to approximate the result in order to avoid the costly binary search // inside ApproximateSize. We use half of file size as an approximation below. const double margin = options.files_size_error_margin; if (margin > 0 && total_intersecting_size < static_cast(total_full_size * margin)) { total_full_size += total_intersecting_size / 2; } else { // Estimate for all the first files (might also be last files), at each // level for (const auto file_ptr : first_files) { total_full_size += ApproximateSize(v, *file_ptr, start, end, caller); } // Estimate for all the last files, at each level for (const auto file_ptr : last_files) { // We could use ApproximateSize here, but calling ApproximateOffsetOf // directly is just more efficient. total_full_size += ApproximateOffsetOf(v, *file_ptr, end, caller); } } return total_full_size; } uint64_t VersionSet::ApproximateOffsetOf(Version* v, const FdWithKeyRange& f, const Slice& key, TableReaderCaller caller) { // pre-condition assert(v); const auto& icmp = v->cfd_->internal_comparator(); uint64_t result = 0; if (icmp.Compare(f.largest_key, key) <= 0) { // Entire file is before "key", so just add the file size result = f.fd.GetFileSize(); } else if (icmp.Compare(f.smallest_key, key) > 0) { // Entire file is after "key", so ignore result = 0; } else { // "key" falls in the range for this table. Add the // approximate offset of "key" within the table. TableCache* table_cache = v->cfd_->table_cache(); if (table_cache != nullptr) { result = table_cache->ApproximateOffsetOf( key, f.file_metadata->fd, caller, icmp, v->GetMutableCFOptions().prefix_extractor.get()); } } return result; } uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, const Slice& start, const Slice& end, TableReaderCaller caller) { // pre-condition assert(v); const auto& icmp = v->cfd_->internal_comparator(); assert(icmp.Compare(start, end) <= 0); if (icmp.Compare(f.largest_key, start) <= 0 || icmp.Compare(f.smallest_key, end) > 0) { // Entire file is before or after the start/end keys range return 0; } if (icmp.Compare(f.smallest_key, start) >= 0) { // Start of the range is before the file start - approximate by end offset return ApproximateOffsetOf(v, f, end, caller); } if (icmp.Compare(f.largest_key, end) < 0) { // End of the range is after the file end - approximate by subtracting // start offset from the file size uint64_t start_offset = ApproximateOffsetOf(v, f, start, caller); assert(f.fd.GetFileSize() >= start_offset); return f.fd.GetFileSize() - start_offset; } // The interval falls entirely in the range for this file. TableCache* table_cache = v->cfd_->table_cache(); if (table_cache == nullptr) { return 0; } return table_cache->ApproximateSize( start, end, f.file_metadata->fd, caller, icmp, v->GetMutableCFOptions().prefix_extractor.get()); } void VersionSet::AddLiveFiles(std::vector* live_table_files, std::vector* live_blob_files) const { assert(live_table_files); assert(live_blob_files); // pre-calculate space requirement size_t total_table_files = 0; size_t total_blob_files = 0; assert(column_family_set_); for (auto cfd : *column_family_set_) { assert(cfd); if (!cfd->initialized()) { continue; } Version* const dummy_versions = cfd->dummy_versions(); assert(dummy_versions); for (Version* v = dummy_versions->next_; v != dummy_versions; v = v->next_) { assert(v); const auto* vstorage = v->storage_info(); assert(vstorage); for (int level = 0; level < vstorage->num_levels(); ++level) { total_table_files += vstorage->LevelFiles(level).size(); } total_blob_files += vstorage->GetBlobFiles().size(); } } // just one time extension to the right size live_table_files->reserve(live_table_files->size() + total_table_files); live_blob_files->reserve(live_blob_files->size() + total_blob_files); assert(column_family_set_); for (auto cfd : *column_family_set_) { assert(cfd); if (!cfd->initialized()) { continue; } auto* current = cfd->current(); bool found_current = false; Version* const dummy_versions = cfd->dummy_versions(); assert(dummy_versions); for (Version* v = dummy_versions->next_; v != dummy_versions; v = v->next_) { v->AddLiveFiles(live_table_files, live_blob_files); if (v == current) { found_current = true; } } if (!found_current && current != nullptr) { // Should never happen unless it is a bug. assert(false); current->AddLiveFiles(live_table_files, live_blob_files); } } } InternalIterator* VersionSet::MakeInputIterator( const Compaction* c, RangeDelAggregator* range_del_agg, const FileOptions& file_options_compactions) { auto cfd = c->column_family_data(); ReadOptions read_options; read_options.verify_checksums = true; read_options.fill_cache = false; // Compaction iterators shouldn't be confined to a single prefix. // Compactions use Seek() for // (a) concurrent compactions, // (b) CompactionFilter::Decision::kRemoveAndSkipUntil. read_options.total_order_seek = true; // Level-0 files have to be merged together. For other levels, // we will make a concatenating iterator per level. // TODO(opt): use concatenating iterator for level-0 if there is no overlap const size_t space = (c->level() == 0 ? c->input_levels(0)->num_files + c->num_input_levels() - 1 : c->num_input_levels()); InternalIterator** list = new InternalIterator* [space]; size_t num = 0; for (size_t which = 0; which < c->num_input_levels(); which++) { if (c->input_levels(which)->num_files != 0) { if (c->level(which) == 0) { const LevelFilesBrief* flevel = c->input_levels(which); for (size_t i = 0; i < flevel->num_files; i++) { list[num++] = cfd->table_cache()->NewIterator( read_options, file_options_compactions, cfd->internal_comparator(), *flevel->files[i].file_metadata, range_del_agg, c->mutable_cf_options()->prefix_extractor.get(), /*table_reader_ptr=*/nullptr, /*file_read_hist=*/nullptr, TableReaderCaller::kCompaction, /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/static_cast(c->level(which)), MaxFileSizeForL0MetaPin(*c->mutable_cf_options()), /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, /*allow_unprepared_value=*/false); } } else { // Create concatenating iterator for the files from this level list[num++] = new LevelIterator( cfd->table_cache(), read_options, file_options_compactions, cfd->internal_comparator(), c->input_levels(which), c->mutable_cf_options()->prefix_extractor.get(), /*should_sample=*/false, /*no per level latency histogram=*/nullptr, TableReaderCaller::kCompaction, /*skip_filters=*/false, /*level=*/static_cast(c->level(which)), range_del_agg, c->boundaries(which)); } } } assert(num <= space); InternalIterator* result = NewMergingIterator(&c->column_family_data()->internal_comparator(), list, static_cast(num)); delete[] list; return result; } // verify that the files listed in this compaction are present // in the current version bool VersionSet::VerifyCompactionFileConsistency(Compaction* c) { #ifndef NDEBUG Version* version = c->column_family_data()->current(); const VersionStorageInfo* vstorage = version->storage_info(); if (c->input_version() != version) { ROCKS_LOG_INFO( db_options_->info_log, "[%s] compaction output being applied to a different base version from" " input version", c->column_family_data()->GetName().c_str()); if (vstorage->compaction_style_ == kCompactionStyleLevel && c->start_level() == 0 && c->num_input_levels() > 2U) { // We are doing a L0->base_level compaction. The assumption is if // base level is not L1, levels from L1 to base_level - 1 is empty. // This is ensured by having one compaction from L0 going on at the // same time in level-based compaction. So that during the time, no // compaction/flush can put files to those levels. for (int l = c->start_level() + 1; l < c->output_level(); l++) { if (vstorage->NumLevelFiles(l) != 0) { return false; } } } } for (size_t input = 0; input < c->num_input_levels(); ++input) { int level = c->level(input); for (size_t i = 0; i < c->num_input_files(input); ++i) { uint64_t number = c->input(input, i)->fd.GetNumber(); bool found = false; for (size_t j = 0; j < vstorage->files_[level].size(); j++) { FileMetaData* f = vstorage->files_[level][j]; if (f->fd.GetNumber() == number) { found = true; break; } } if (!found) { return false; // input files non existent in current version } } } #else (void)c; #endif return true; // everything good } Status VersionSet::GetMetadataForFile(uint64_t number, int* filelevel, FileMetaData** meta, ColumnFamilyData** cfd) { for (auto cfd_iter : *column_family_set_) { if (!cfd_iter->initialized()) { continue; } Version* version = cfd_iter->current(); const auto* vstorage = version->storage_info(); for (int level = 0; level < vstorage->num_levels(); level++) { for (const auto& file : vstorage->LevelFiles(level)) { if (file->fd.GetNumber() == number) { *meta = file; *filelevel = level; *cfd = cfd_iter; return Status::OK(); } } } } return Status::NotFound("File not present in any level"); } void VersionSet::GetLiveFilesMetaData(std::vector* metadata) { for (auto cfd : *column_family_set_) { if (cfd->IsDropped() || !cfd->initialized()) { continue; } for (int level = 0; level < cfd->NumberLevels(); level++) { for (const auto& file : cfd->current()->storage_info()->LevelFiles(level)) { LiveFileMetaData filemetadata; filemetadata.column_family_name = cfd->GetName(); uint32_t path_id = file->fd.GetPathId(); if (path_id < cfd->ioptions()->cf_paths.size()) { filemetadata.db_path = cfd->ioptions()->cf_paths[path_id].path; } else { assert(!cfd->ioptions()->cf_paths.empty()); filemetadata.db_path = cfd->ioptions()->cf_paths.back().path; } const uint64_t file_number = file->fd.GetNumber(); filemetadata.name = MakeTableFileName("", file_number); filemetadata.file_number = file_number; filemetadata.level = level; filemetadata.size = static_cast(file->fd.GetFileSize()); filemetadata.smallestkey = file->smallest.user_key().ToString(); filemetadata.largestkey = file->largest.user_key().ToString(); filemetadata.smallest_seqno = file->fd.smallest_seqno; filemetadata.largest_seqno = file->fd.largest_seqno; filemetadata.num_reads_sampled = file->stats.num_reads_sampled.load( std::memory_order_relaxed); filemetadata.being_compacted = file->being_compacted; filemetadata.num_entries = file->num_entries; filemetadata.num_deletions = file->num_deletions; filemetadata.oldest_blob_file_number = file->oldest_blob_file_number; filemetadata.file_checksum = file->file_checksum; filemetadata.file_checksum_func_name = file->file_checksum_func_name; metadata->push_back(filemetadata); } } } } void VersionSet::GetObsoleteFiles(std::vector* files, std::vector* blob_files, std::vector* manifest_filenames, uint64_t min_pending_output) { assert(files); assert(blob_files); assert(manifest_filenames); assert(files->empty()); assert(blob_files->empty()); assert(manifest_filenames->empty()); std::vector pending_files; for (auto& f : obsolete_files_) { if (f.metadata->fd.GetNumber() < min_pending_output) { files->emplace_back(std::move(f)); } else { pending_files.emplace_back(std::move(f)); } } obsolete_files_.swap(pending_files); std::vector pending_blob_files; for (auto& blob_file : obsolete_blob_files_) { if (blob_file.GetBlobFileNumber() < min_pending_output) { blob_files->emplace_back(std::move(blob_file)); } else { pending_blob_files.emplace_back(std::move(blob_file)); } } obsolete_blob_files_.swap(pending_blob_files); obsolete_manifests_.swap(*manifest_filenames); } ColumnFamilyData* VersionSet::CreateColumnFamily( const ColumnFamilyOptions& cf_options, const VersionEdit* edit) { assert(edit->is_column_family_add_); MutableCFOptions dummy_cf_options; Version* dummy_versions = new Version(nullptr, this, file_options_, dummy_cf_options); // Ref() dummy version once so that later we can call Unref() to delete it // by avoiding calling "delete" explicitly (~Version is private) dummy_versions->Ref(); auto new_cfd = column_family_set_->CreateColumnFamily( edit->column_family_name_, edit->column_family_, dummy_versions, cf_options); Version* v = new Version(new_cfd, this, file_options_, *new_cfd->GetLatestMutableCFOptions(), current_version_number_++); // Fill level target base information. v->storage_info()->CalculateBaseBytes(*new_cfd->ioptions(), *new_cfd->GetLatestMutableCFOptions()); AppendVersion(new_cfd, v); // GetLatestMutableCFOptions() is safe here without mutex since the // cfd is not available to client new_cfd->CreateNewMemtable(*new_cfd->GetLatestMutableCFOptions(), LastSequence()); new_cfd->SetLogNumber(edit->log_number_); return new_cfd; } uint64_t VersionSet::GetNumLiveVersions(Version* dummy_versions) { uint64_t count = 0; for (Version* v = dummy_versions->next_; v != dummy_versions; v = v->next_) { count++; } return count; } uint64_t VersionSet::GetTotalSstFilesSize(Version* dummy_versions) { std::unordered_set unique_files; uint64_t total_files_size = 0; for (Version* v = dummy_versions->next_; v != dummy_versions; v = v->next_) { VersionStorageInfo* storage_info = v->storage_info(); for (int level = 0; level < storage_info->num_levels_; level++) { for (const auto& file_meta : storage_info->LevelFiles(level)) { if (unique_files.find(file_meta->fd.packed_number_and_path_id) == unique_files.end()) { unique_files.insert(file_meta->fd.packed_number_and_path_id); total_files_size += file_meta->fd.GetFileSize(); } } } } return total_files_size; } Status VersionSet::VerifyFileMetadata(const std::string& fpath, const FileMetaData& meta) const { uint64_t fsize = 0; Status status = fs_->GetFileSize(fpath, IOOptions(), &fsize, nullptr); if (status.ok()) { if (fsize != meta.fd.GetFileSize()) { status = Status::Corruption("File size mismatch: " + fpath); } } return status; } ReactiveVersionSet::ReactiveVersionSet(const std::string& dbname, const ImmutableDBOptions* _db_options, const FileOptions& _file_options, Cache* table_cache, WriteBufferManager* write_buffer_manager, WriteController* write_controller) : VersionSet(dbname, _db_options, _file_options, table_cache, write_buffer_manager, write_controller, /*block_cache_tracer=*/nullptr), number_of_edits_to_skip_(0) {} ReactiveVersionSet::~ReactiveVersionSet() {} Status ReactiveVersionSet::Recover( const std::vector& column_families, std::unique_ptr* manifest_reader, std::unique_ptr* manifest_reporter, std::unique_ptr* manifest_reader_status) { assert(manifest_reader != nullptr); assert(manifest_reporter != nullptr); assert(manifest_reader_status != nullptr); std::unordered_map cf_name_to_options; for (const auto& cf : column_families) { cf_name_to_options.insert({cf.name, cf.options}); } // add default column family auto default_cf_iter = cf_name_to_options.find(kDefaultColumnFamilyName); if (default_cf_iter == cf_name_to_options.end()) { return Status::InvalidArgument("Default column family not specified"); } VersionEdit default_cf_edit; default_cf_edit.AddColumnFamily(kDefaultColumnFamilyName); default_cf_edit.SetColumnFamily(0); ColumnFamilyData* default_cfd = CreateColumnFamily(default_cf_iter->second, &default_cf_edit); // In recovery, nobody else can access it, so it's fine to set it to be // initialized earlier. default_cfd->set_initialized(); VersionBuilderMap builders; std::unordered_map column_families_not_found; builders.insert( std::make_pair(0, std::unique_ptr( new BaseReferencedVersionBuilder(default_cfd)))); manifest_reader_status->reset(new Status()); manifest_reporter->reset(new LogReporter()); static_cast_with_check(manifest_reporter->get())->status = manifest_reader_status->get(); Status s = MaybeSwitchManifest(manifest_reporter->get(), manifest_reader); log::Reader* reader = manifest_reader->get(); int retry = 0; VersionEdit version_edit; while (s.ok() && retry < 1) { assert(reader != nullptr); s = ReadAndRecover(*reader, &read_buffer_, cf_name_to_options, column_families_not_found, builders, manifest_reader_status->get(), &version_edit); if (s.ok()) { bool enough = version_edit.has_next_file_number_ && version_edit.has_log_number_ && version_edit.has_last_sequence_; if (enough) { for (const auto& cf : column_families) { auto cfd = column_family_set_->GetColumnFamily(cf.name); if (cfd == nullptr) { enough = false; break; } } } if (enough) { for (const auto& cf : column_families) { auto cfd = column_family_set_->GetColumnFamily(cf.name); assert(cfd != nullptr); if (!cfd->IsDropped()) { auto builder_iter = builders.find(cfd->GetID()); assert(builder_iter != builders.end()); auto builder = builder_iter->second->version_builder(); assert(builder != nullptr); s = builder->LoadTableHandlers( cfd->internal_stats(), db_options_->max_file_opening_threads, false /* prefetch_index_and_filter_in_cache */, true /* is_initial_load */, cfd->GetLatestMutableCFOptions()->prefix_extractor.get(), MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions())); if (!s.ok()) { enough = false; if (s.IsPathNotFound()) { s = Status::OK(); } break; } } } } if (enough) { break; } } ++retry; } if (s.ok()) { if (!version_edit.has_prev_log_number_) { version_edit.prev_log_number_ = 0; } column_family_set_->UpdateMaxColumnFamily(version_edit.max_column_family_); MarkMinLogNumberToKeep2PC(version_edit.min_log_number_to_keep_); MarkFileNumberUsed(version_edit.prev_log_number_); MarkFileNumberUsed(version_edit.log_number_); for (auto cfd : *column_family_set_) { assert(builders.count(cfd->GetID()) > 0); auto builder = builders[cfd->GetID()]->version_builder(); if (!builder->CheckConsistencyForNumLevels()) { s = Status::InvalidArgument( "db has more levels than options.num_levels"); break; } } } if (s.ok()) { for (auto cfd : *column_family_set_) { if (cfd->IsDropped()) { continue; } assert(cfd->initialized()); auto builders_iter = builders.find(cfd->GetID()); assert(builders_iter != builders.end()); auto* builder = builders_iter->second->version_builder(); Version* v = new Version(cfd, this, file_options_, *cfd->GetLatestMutableCFOptions(), current_version_number_++); s = builder->SaveTo(v->storage_info()); if (s.ok()) { // Install recovered version v->PrepareApply(*cfd->GetLatestMutableCFOptions(), !(db_options_->skip_stats_update_on_db_open)); AppendVersion(cfd, v); } else { ROCKS_LOG_ERROR(db_options_->info_log, "[%s]: inconsistent version: %s\n", cfd->GetName().c_str(), s.ToString().c_str()); delete v; break; } } } if (s.ok()) { next_file_number_.store(version_edit.next_file_number_ + 1); last_allocated_sequence_ = version_edit.last_sequence_; last_published_sequence_ = version_edit.last_sequence_; last_sequence_ = version_edit.last_sequence_; prev_log_number_ = version_edit.prev_log_number_; for (auto cfd : *column_family_set_) { if (cfd->IsDropped()) { continue; } ROCKS_LOG_INFO(db_options_->info_log, "Column family [%s] (ID %u), log number is %" PRIu64 "\n", cfd->GetName().c_str(), cfd->GetID(), cfd->GetLogNumber()); } } return s; } Status ReactiveVersionSet::ReadAndApply( InstrumentedMutex* mu, std::unique_ptr* manifest_reader, std::unordered_set* cfds_changed) { assert(manifest_reader != nullptr); assert(cfds_changed != nullptr); mu->AssertHeld(); Status s; uint64_t applied_edits = 0; while (s.ok()) { Slice record; std::string scratch; log::Reader* reader = manifest_reader->get(); std::string old_manifest_path = reader->file()->file_name(); while (reader->ReadRecord(&record, &scratch)) { VersionEdit edit; s = edit.DecodeFrom(record); if (!s.ok()) { break; } // Skip the first VersionEdits of each MANIFEST generated by // VersionSet::WriteCurrentStatetoManifest. if (number_of_edits_to_skip_ > 0) { ColumnFamilyData* cfd = column_family_set_->GetColumnFamily(edit.column_family_); if (cfd != nullptr && !cfd->IsDropped()) { --number_of_edits_to_skip_; } continue; } s = read_buffer_.AddEdit(&edit); if (!s.ok()) { break; } VersionEdit temp_edit; if (edit.is_in_atomic_group_) { if (read_buffer_.IsFull()) { // Apply edits in an atomic group when we have read all edits in the // group. for (auto& e : read_buffer_.replay_buffer()) { s = ApplyOneVersionEditToBuilder(e, cfds_changed, &temp_edit); if (!s.ok()) { break; } applied_edits++; } if (!s.ok()) { break; } read_buffer_.Clear(); } } else { // Apply a normal edit immediately. s = ApplyOneVersionEditToBuilder(edit, cfds_changed, &temp_edit); if (s.ok()) { applied_edits++; } else { break; } } } if (!s.ok()) { // Clear the buffer if we fail to decode/apply an edit. read_buffer_.Clear(); } // It's possible that: // 1) s.IsCorruption(), indicating the current MANIFEST is corrupted. // Or the version(s) rebuilt from tailing the MANIFEST is inconsistent. // 2) we have finished reading the current MANIFEST. // 3) we have encountered an IOError reading the current MANIFEST. // We need to look for the next MANIFEST and start from there. If we cannot // find the next MANIFEST, we should exit the loop. Status tmp_s = MaybeSwitchManifest(reader->GetReporter(), manifest_reader); reader = manifest_reader->get(); if (tmp_s.ok()) { if (reader->file()->file_name() == old_manifest_path) { // Still processing the same MANIFEST, thus no need to continue this // loop since no record is available if we have reached here. break; } else { // We have switched to a new MANIFEST whose first records have been // generated by VersionSet::WriteCurrentStatetoManifest. Since the // secondary instance has already finished recovering upon start, there // is no need for the secondary to process these records. Actually, if // the secondary were to replay these records, the secondary may end up // adding the same SST files AGAIN to each column family, causing // consistency checks done by VersionBuilder to fail. Therefore, we // record the number of records to skip at the beginning of the new // MANIFEST and ignore them. number_of_edits_to_skip_ = 0; for (auto* cfd : *column_family_set_) { if (cfd->IsDropped()) { continue; } // Increase number_of_edits_to_skip by 2 because // WriteCurrentStatetoManifest() writes 2 version edits for each // column family at the beginning of the newly-generated MANIFEST. // TODO(yanqin) remove hard-coded value. if (db_options_->write_dbid_to_manifest) { number_of_edits_to_skip_ += 3; } else { number_of_edits_to_skip_ += 2; } } s = tmp_s; } } } if (s.ok()) { for (auto cfd : *column_family_set_) { auto builder_iter = active_version_builders_.find(cfd->GetID()); if (builder_iter == active_version_builders_.end()) { continue; } auto builder = builder_iter->second->version_builder(); if (!builder->CheckConsistencyForNumLevels()) { s = Status::InvalidArgument( "db has more levels than options.num_levels"); break; } } } TEST_SYNC_POINT_CALLBACK("ReactiveVersionSet::ReadAndApply:AppliedEdits", &applied_edits); return s; } Status ReactiveVersionSet::ApplyOneVersionEditToBuilder( VersionEdit& edit, std::unordered_set* cfds_changed, VersionEdit* version_edit) { ColumnFamilyData* cfd = column_family_set_->GetColumnFamily(edit.column_family_); // If we cannot find this column family in our column family set, then it // may be a new column family created by the primary after the secondary // starts. It is also possible that the secondary instance opens only a subset // of column families. Ignore it for now. if (nullptr == cfd) { return Status::OK(); } if (active_version_builders_.find(edit.column_family_) == active_version_builders_.end() && !cfd->IsDropped()) { std::unique_ptr builder_guard( new BaseReferencedVersionBuilder(cfd)); active_version_builders_.insert( std::make_pair(edit.column_family_, std::move(builder_guard))); } auto builder_iter = active_version_builders_.find(edit.column_family_); assert(builder_iter != active_version_builders_.end()); auto builder = builder_iter->second->version_builder(); assert(builder != nullptr); if (edit.is_column_family_add_) { // TODO (yanqin) for now the secondary ignores column families created // after Open. This also simplifies handling of switching to a new MANIFEST // and processing the snapshot of the system at the beginning of the // MANIFEST. } else if (edit.is_column_family_drop_) { // Drop the column family by setting it to be 'dropped' without destroying // the column family handle. // TODO (haoyu) figure out how to handle column faimly drop for // secondary instance. (Is it possible that the ref count for cfd is 0 but // the ref count for its versions is higher than 0?) cfd->SetDropped(); if (cfd->UnrefAndTryDelete()) { cfd = nullptr; } active_version_builders_.erase(builder_iter); } else { Status s = builder->Apply(&edit); if (!s.ok()) { return s; } } Status s = ExtractInfoFromVersionEdit(cfd, edit, version_edit); if (!s.ok()) { return s; } if (cfd != nullptr && !cfd->IsDropped()) { s = builder->LoadTableHandlers( cfd->internal_stats(), db_options_->max_file_opening_threads, false /* prefetch_index_and_filter_in_cache */, false /* is_initial_load */, cfd->GetLatestMutableCFOptions()->prefix_extractor.get(), MaxFileSizeForL0MetaPin(*cfd->GetLatestMutableCFOptions())); TEST_SYNC_POINT_CALLBACK( "ReactiveVersionSet::ApplyOneVersionEditToBuilder:" "AfterLoadTableHandlers", &s); if (s.ok()) { auto version = new Version(cfd, this, file_options_, *cfd->GetLatestMutableCFOptions(), current_version_number_++); s = builder->SaveTo(version->storage_info()); if (s.ok()) { version->PrepareApply(*cfd->GetLatestMutableCFOptions(), true); AppendVersion(cfd, version); active_version_builders_.erase(builder_iter); if (cfds_changed->count(cfd) == 0) { cfds_changed->insert(cfd); } } else { delete version; } } else if (s.IsPathNotFound()) { s = Status::OK(); } // Some other error has occurred during LoadTableHandlers. } if (s.ok()) { if (version_edit->HasNextFile()) { next_file_number_.store(version_edit->next_file_number_ + 1); } if (version_edit->has_last_sequence_) { last_allocated_sequence_ = version_edit->last_sequence_; last_published_sequence_ = version_edit->last_sequence_; last_sequence_ = version_edit->last_sequence_; } if (version_edit->has_prev_log_number_) { prev_log_number_ = version_edit->prev_log_number_; MarkFileNumberUsed(version_edit->prev_log_number_); } if (version_edit->has_log_number_) { MarkFileNumberUsed(version_edit->log_number_); } column_family_set_->UpdateMaxColumnFamily(version_edit->max_column_family_); MarkMinLogNumberToKeep2PC(version_edit->min_log_number_to_keep_); } return s; } Status ReactiveVersionSet::MaybeSwitchManifest( log::Reader::Reporter* reporter, std::unique_ptr* manifest_reader) { assert(manifest_reader != nullptr); Status s; do { std::string manifest_path; s = GetCurrentManifestPath(dbname_, fs_, &manifest_path, &manifest_file_number_); std::unique_ptr manifest_file; if (s.ok()) { if (nullptr == manifest_reader->get() || manifest_reader->get()->file()->file_name() != manifest_path) { TEST_SYNC_POINT( "ReactiveVersionSet::MaybeSwitchManifest:" "AfterGetCurrentManifestPath:0"); TEST_SYNC_POINT( "ReactiveVersionSet::MaybeSwitchManifest:" "AfterGetCurrentManifestPath:1"); s = fs_->NewSequentialFile(manifest_path, env_->OptimizeForManifestRead(file_options_), &manifest_file, nullptr); } else { // No need to switch manifest. break; } } std::unique_ptr manifest_file_reader; if (s.ok()) { manifest_file_reader.reset( new SequentialFileReader(std::move(manifest_file), manifest_path, db_options_->log_readahead_size)); manifest_reader->reset(new log::FragmentBufferedReader( nullptr, std::move(manifest_file_reader), reporter, true /* checksum */, 0 /* log_number */)); ROCKS_LOG_INFO(db_options_->info_log, "Switched to new manifest: %s\n", manifest_path.c_str()); // TODO (yanqin) every time we switch to a new MANIFEST, we clear the // active_version_builders_ map because we choose to construct the // versions from scratch, thanks to the first part of each MANIFEST // written by VersionSet::WriteCurrentStatetoManifest. This is not // necessary, but we choose this at present for the sake of simplicity. active_version_builders_.clear(); } } while (s.IsPathNotFound()); return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_set.h000066400000000000000000001536241370372246700163560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // The representation of a DBImpl consists of a set of Versions. The // newest version is called "current". Older versions may be kept // around to provide a consistent view to live iterators. // // Each Version keeps track of a set of table files per level, as well as a // set of blob files. The entire set of versions is maintained in a // VersionSet. // // Version,VersionSet are thread-compatible, but require external // synchronization on all accesses. #pragma once #include #include #include #include #include #include #include #include #include #include "db/blob/blob_file_meta.h" #include "db/column_family.h" #include "db/compaction/compaction.h" #include "db/compaction/compaction_picker.h" #include "db/dbformat.h" #include "db/file_indexer.h" #include "db/log_reader.h" #include "db/range_del_aggregator.h" #include "db/read_callback.h" #include "db/table_cache.h" #include "db/version_builder.h" #include "db/version_edit.h" #include "db/write_controller.h" #include "monitoring/instrumented_mutex.h" #include "options/db_options.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/file_checksum.h" #include "table/get_context.h" #include "table/multiget_context.h" #include "trace_replay/block_cache_tracer.h" namespace ROCKSDB_NAMESPACE { namespace log { class Writer; } class Compaction; class LogBuffer; class LookupKey; class MemTable; class Version; class VersionSet; class WriteBufferManager; class MergeContext; class ColumnFamilySet; class MergeIteratorBuilder; // VersionEdit is always supposed to be valid and it is used to point at // entries in Manifest. Ideally it should not be used as a container to // carry around few of its fields as function params because it can cause // readers to think it's a valid entry from Manifest. To avoid that confusion // introducing VersionEditParams to simply carry around multiple VersionEdit // params. It need not point to a valid record in Manifest. using VersionEditParams = VersionEdit; // Return the smallest index i such that file_level.files[i]->largest >= key. // Return file_level.num_files if there is no such file. // REQUIRES: "file_level.files" contains a sorted list of // non-overlapping files. extern int FindFile(const InternalKeyComparator& icmp, const LevelFilesBrief& file_level, const Slice& key); // Returns true iff some file in "files" overlaps the user key range // [*smallest,*largest]. // smallest==nullptr represents a key smaller than all keys in the DB. // largest==nullptr represents a key largest than all keys in the DB. // REQUIRES: If disjoint_sorted_files, file_level.files[] // contains disjoint ranges in sorted order. extern bool SomeFileOverlapsRange(const InternalKeyComparator& icmp, bool disjoint_sorted_files, const LevelFilesBrief& file_level, const Slice* smallest_user_key, const Slice* largest_user_key); // Generate LevelFilesBrief from vector // Would copy smallest_key and largest_key data to sequential memory // arena: Arena used to allocate the memory extern void DoGenerateLevelFilesBrief(LevelFilesBrief* file_level, const std::vector& files, Arena* arena); // Information of the storage associated with each Version, including number of // levels of LSM tree, files information at each level, files marked for // compaction, blob files, etc. class VersionStorageInfo { public: VersionStorageInfo(const InternalKeyComparator* internal_comparator, const Comparator* user_comparator, int num_levels, CompactionStyle compaction_style, VersionStorageInfo* src_vstorage, bool _force_consistency_checks); // No copying allowed VersionStorageInfo(const VersionStorageInfo&) = delete; void operator=(const VersionStorageInfo&) = delete; ~VersionStorageInfo(); void Reserve(int level, size_t size) { files_[level].reserve(size); } void AddFile(int level, FileMetaData* f, Logger* info_log = nullptr); void AddBlobFile(std::shared_ptr blob_file_meta); void SetFinalized(); // Update num_non_empty_levels_. void UpdateNumNonEmptyLevels(); void GenerateFileIndexer() { file_indexer_.UpdateIndex(&arena_, num_non_empty_levels_, files_); } // Update the accumulated stats from a file-meta. void UpdateAccumulatedStats(FileMetaData* file_meta); // Decrease the current stat from a to-be-deleted file-meta void RemoveCurrentStats(FileMetaData* file_meta); void ComputeCompensatedSizes(); // Updates internal structures that keep track of compaction scores // We use compaction scores to figure out which compaction to do next // REQUIRES: db_mutex held!! // TODO find a better way to pass compaction_options_fifo. void ComputeCompactionScore(const ImmutableCFOptions& immutable_cf_options, const MutableCFOptions& mutable_cf_options); // Estimate est_comp_needed_bytes_ void EstimateCompactionBytesNeeded( const MutableCFOptions& mutable_cf_options); // This computes files_marked_for_compaction_ and is called by // ComputeCompactionScore() void ComputeFilesMarkedForCompaction(); // This computes ttl_expired_files_ and is called by // ComputeCompactionScore() void ComputeExpiredTtlFiles(const ImmutableCFOptions& ioptions, const uint64_t ttl); // This computes files_marked_for_periodic_compaction_ and is called by // ComputeCompactionScore() void ComputeFilesMarkedForPeriodicCompaction( const ImmutableCFOptions& ioptions, const uint64_t periodic_compaction_seconds); // This computes bottommost_files_marked_for_compaction_ and is called by // ComputeCompactionScore() or UpdateOldestSnapshot(). // // Among bottommost files (assumes they've already been computed), marks the // ones that have keys that would be eliminated if recompacted, according to // the seqnum of the oldest existing snapshot. Must be called every time // oldest snapshot changes as that is when bottom-level files can become // eligible for compaction. // // REQUIRES: DB mutex held void ComputeBottommostFilesMarkedForCompaction(); // Generate level_files_brief_ from files_ void GenerateLevelFilesBrief(); // Sort all files for this version based on their file size and // record results in files_by_compaction_pri_. The largest files are listed // first. void UpdateFilesByCompactionPri(CompactionPri compaction_pri); void GenerateLevel0NonOverlapping(); bool level0_non_overlapping() const { return level0_non_overlapping_; } // Check whether each file in this version is bottommost (i.e., nothing in its // key-range could possibly exist in an older file/level). // REQUIRES: This version has not been saved void GenerateBottommostFiles(); // Updates the oldest snapshot and related internal state, like the bottommost // files marked for compaction. // REQUIRES: DB mutex held void UpdateOldestSnapshot(SequenceNumber oldest_snapshot_seqnum); int MaxInputLevel() const; int MaxOutputLevel(bool allow_ingest_behind) const; // Return level number that has idx'th highest score int CompactionScoreLevel(int idx) const { return compaction_level_[idx]; } // Return idx'th highest score double CompactionScore(int idx) const { return compaction_score_[idx]; } void GetOverlappingInputs( int level, const InternalKey* begin, // nullptr means before all keys const InternalKey* end, // nullptr means after all keys std::vector* inputs, int hint_index = -1, // index of overlap file int* file_index = nullptr, // return index of overlap file bool expand_range = true, // if set, returns files which overlap the // range and overlap each other. If false, // then just files intersecting the range InternalKey** next_smallest = nullptr) // if non-null, returns the const; // smallest key of next file not included void GetCleanInputsWithinInterval( int level, const InternalKey* begin, // nullptr means before all keys const InternalKey* end, // nullptr means after all keys std::vector* inputs, int hint_index = -1, // index of overlap file int* file_index = nullptr) // return index of overlap file const; void GetOverlappingInputsRangeBinarySearch( int level, // level > 0 const InternalKey* begin, // nullptr means before all keys const InternalKey* end, // nullptr means after all keys std::vector* inputs, int hint_index, // index of overlap file int* file_index, // return index of overlap file bool within_interval = false, // if set, force the inputs within interval InternalKey** next_smallest = nullptr) // if non-null, returns the const; // smallest key of next file not included // Returns true iff some file in the specified level overlaps // some part of [*smallest_user_key,*largest_user_key]. // smallest_user_key==NULL represents a key smaller than all keys in the DB. // largest_user_key==NULL represents a key largest than all keys in the DB. bool OverlapInLevel(int level, const Slice* smallest_user_key, const Slice* largest_user_key); // Returns true iff the first or last file in inputs contains // an overlapping user key to the file "just outside" of it (i.e. // just after the last file, or just before the first file) // REQUIRES: "*inputs" is a sorted list of non-overlapping files bool HasOverlappingUserKey(const std::vector* inputs, int level); int num_levels() const { return num_levels_; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) int num_non_empty_levels() const { assert(finalized_); return num_non_empty_levels_; } // REQUIRES: This version has been finalized. // (CalculateBaseBytes() is called) // This may or may not return number of level files. It is to keep backward // compatible behavior in universal compaction. int l0_delay_trigger_count() const { return l0_delay_trigger_count_; } void set_l0_delay_trigger_count(int v) { l0_delay_trigger_count_ = v; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) int NumLevelFiles(int level) const { assert(finalized_); return static_cast(files_[level].size()); } // Return the combined file size of all files at the specified level. uint64_t NumLevelBytes(int level) const; // REQUIRES: This version has been saved (see VersionSet::SaveTo) const std::vector& LevelFiles(int level) const { return files_[level]; } class FileLocation { public: FileLocation() = default; FileLocation(int level, size_t position) : level_(level), position_(position) {} int GetLevel() const { return level_; } size_t GetPosition() const { return position_; } bool IsValid() const { return level_ >= 0; } bool operator==(const FileLocation& rhs) const { return level_ == rhs.level_ && position_ == rhs.position_; } bool operator!=(const FileLocation& rhs) const { return !(*this == rhs); } static FileLocation Invalid() { return FileLocation(); } private: int level_ = -1; size_t position_ = 0; }; // REQUIRES: This version has been saved (see VersionSet::SaveTo) FileLocation GetFileLocation(uint64_t file_number) const { const auto it = file_locations_.find(file_number); if (it == file_locations_.end()) { return FileLocation::Invalid(); } assert(it->second.GetLevel() < num_levels_); assert(it->second.GetPosition() < files_[it->second.GetLevel()].size()); assert(files_[it->second.GetLevel()][it->second.GetPosition()]); assert(files_[it->second.GetLevel()][it->second.GetPosition()] ->fd.GetNumber() == file_number); return it->second; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) FileMetaData* GetFileMetaDataByNumber(uint64_t file_number) const { auto location = GetFileLocation(file_number); if (!location.IsValid()) { return nullptr; } return files_[location.GetLevel()][location.GetPosition()]; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) using BlobFiles = std::map>; const BlobFiles& GetBlobFiles() const { return blob_files_; } const ROCKSDB_NAMESPACE::LevelFilesBrief& LevelFilesBrief(int level) const { assert(level < static_cast(level_files_brief_.size())); return level_files_brief_[level]; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) const std::vector& FilesByCompactionPri(int level) const { assert(finalized_); return files_by_compaction_pri_[level]; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) // REQUIRES: DB mutex held during access const autovector>& FilesMarkedForCompaction() const { assert(finalized_); return files_marked_for_compaction_; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) // REQUIRES: DB mutex held during access const autovector>& ExpiredTtlFiles() const { assert(finalized_); return expired_ttl_files_; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) // REQUIRES: DB mutex held during access const autovector>& FilesMarkedForPeriodicCompaction() const { assert(finalized_); return files_marked_for_periodic_compaction_; } void TEST_AddFileMarkedForPeriodicCompaction(int level, FileMetaData* f) { files_marked_for_periodic_compaction_.emplace_back(level, f); } // REQUIRES: This version has been saved (see VersionSet::SaveTo) // REQUIRES: DB mutex held during access const autovector>& BottommostFilesMarkedForCompaction() const { assert(finalized_); return bottommost_files_marked_for_compaction_; } int base_level() const { return base_level_; } double level_multiplier() const { return level_multiplier_; } // REQUIRES: lock is held // Set the index that is used to offset into files_by_compaction_pri_ to find // the next compaction candidate file. void SetNextCompactionIndex(int level, int index) { next_file_to_compact_by_size_[level] = index; } // REQUIRES: lock is held int NextCompactionIndex(int level) const { return next_file_to_compact_by_size_[level]; } // REQUIRES: This version has been saved (see VersionSet::SaveTo) const FileIndexer& file_indexer() const { assert(finalized_); return file_indexer_; } // Only the first few entries of files_by_compaction_pri_ are sorted. // There is no need to sort all the files because it is likely // that on a running system, we need to look at only the first // few largest files because a new version is created every few // seconds/minutes (because of concurrent compactions). static const size_t kNumberFilesToSort = 50; // Return a human-readable short (single-line) summary of the number // of files per level. Uses *scratch as backing store. struct LevelSummaryStorage { char buffer[1000]; }; struct FileSummaryStorage { char buffer[3000]; }; const char* LevelSummary(LevelSummaryStorage* scratch) const; // Return a human-readable short (single-line) summary of files // in a specified level. Uses *scratch as backing store. const char* LevelFileSummary(FileSummaryStorage* scratch, int level) const; // Return the maximum overlapping data (in bytes) at next level for any // file at a level >= 1. int64_t MaxNextLevelOverlappingBytes(); // Return a human readable string that describes this version's contents. std::string DebugString(bool hex = false) const; uint64_t GetAverageValueSize() const { if (accumulated_num_non_deletions_ == 0) { return 0; } assert(accumulated_raw_key_size_ + accumulated_raw_value_size_ > 0); assert(accumulated_file_size_ > 0); return accumulated_raw_value_size_ / accumulated_num_non_deletions_ * accumulated_file_size_ / (accumulated_raw_key_size_ + accumulated_raw_value_size_); } uint64_t GetEstimatedActiveKeys() const; double GetEstimatedCompressionRatioAtLevel(int level) const; // re-initializes the index that is used to offset into // files_by_compaction_pri_ // to find the next compaction candidate file. void ResetNextCompactionIndex(int level) { next_file_to_compact_by_size_[level] = 0; } const InternalKeyComparator* InternalComparator() { return internal_comparator_; } // Returns maximum total bytes of data on a given level. uint64_t MaxBytesForLevel(int level) const; // Must be called after any change to MutableCFOptions. void CalculateBaseBytes(const ImmutableCFOptions& ioptions, const MutableCFOptions& options); // Returns an estimate of the amount of live data in bytes. uint64_t EstimateLiveDataSize() const; uint64_t estimated_compaction_needed_bytes() const { return estimated_compaction_needed_bytes_; } void TEST_set_estimated_compaction_needed_bytes(uint64_t v) { estimated_compaction_needed_bytes_ = v; } bool force_consistency_checks() const { return force_consistency_checks_; } SequenceNumber bottommost_files_mark_threshold() const { return bottommost_files_mark_threshold_; } // Returns whether any key in [`smallest_key`, `largest_key`] could appear in // an older L0 file than `last_l0_idx` or in a greater level than `last_level` // // @param last_level Level after which we check for overlap // @param last_l0_idx If `last_level == 0`, index of L0 file after which we // check for overlap; otherwise, must be -1 bool RangeMightExistAfterSortedRun(const Slice& smallest_user_key, const Slice& largest_user_key, int last_level, int last_l0_idx); private: const InternalKeyComparator* internal_comparator_; const Comparator* user_comparator_; int num_levels_; // Number of levels int num_non_empty_levels_; // Number of levels. Any level larger than it // is guaranteed to be empty. // Per-level max bytes std::vector level_max_bytes_; // A short brief metadata of files per level autovector level_files_brief_; FileIndexer file_indexer_; Arena arena_; // Used to allocate space for file_levels_ CompactionStyle compaction_style_; // List of files per level, files in each level are arranged // in increasing order of keys std::vector* files_; // Map of all table files in version. Maps file number to (level, position on // level). using FileLocations = std::unordered_map; FileLocations file_locations_; // Map of blob files in version by number. BlobFiles blob_files_; // Level that L0 data should be compacted to. All levels < base_level_ should // be empty. -1 if it is not level-compaction so it's not applicable. int base_level_; double level_multiplier_; // A list for the same set of files that are stored in files_, // but files in each level are now sorted based on file // size. The file with the largest size is at the front. // This vector stores the index of the file from files_. std::vector> files_by_compaction_pri_; // If true, means that files in L0 have keys with non overlapping ranges bool level0_non_overlapping_; // An index into files_by_compaction_pri_ that specifies the first // file that is not yet compacted std::vector next_file_to_compact_by_size_; // Only the first few entries of files_by_compaction_pri_ are sorted. // There is no need to sort all the files because it is likely // that on a running system, we need to look at only the first // few largest files because a new version is created every few // seconds/minutes (because of concurrent compactions). static const size_t number_of_files_to_sort_ = 50; // This vector contains list of files marked for compaction and also not // currently being compacted. It is protected by DB mutex. It is calculated in // ComputeCompactionScore() autovector> files_marked_for_compaction_; autovector> expired_ttl_files_; autovector> files_marked_for_periodic_compaction_; // These files are considered bottommost because none of their keys can exist // at lower levels. They are not necessarily all in the same level. The marked // ones are eligible for compaction because they contain duplicate key // versions that are no longer protected by snapshot. These variables are // protected by DB mutex and are calculated in `GenerateBottommostFiles()` and // `ComputeBottommostFilesMarkedForCompaction()`. autovector> bottommost_files_; autovector> bottommost_files_marked_for_compaction_; // Threshold for needing to mark another bottommost file. Maintain it so we // can quickly check when releasing a snapshot whether more bottommost files // became eligible for compaction. It's defined as the min of the max nonzero // seqnums of unmarked bottommost files. SequenceNumber bottommost_files_mark_threshold_ = kMaxSequenceNumber; // Monotonically increases as we release old snapshots. Zero indicates no // snapshots have been released yet. When no snapshots remain we set it to the // current seqnum, which needs to be protected as a snapshot can still be // created that references it. SequenceNumber oldest_snapshot_seqnum_ = 0; // Level that should be compacted next and its compaction score. // Score < 1 means compaction is not strictly needed. These fields // are initialized by Finalize(). // The most critical level to be compacted is listed first // These are used to pick the best compaction level std::vector compaction_score_; std::vector compaction_level_; int l0_delay_trigger_count_ = 0; // Count used to trigger slow down and stop // for number of L0 files. // the following are the sampled temporary stats. // the current accumulated size of sampled files. uint64_t accumulated_file_size_; // the current accumulated size of all raw keys based on the sampled files. uint64_t accumulated_raw_key_size_; // the current accumulated size of all raw keys based on the sampled files. uint64_t accumulated_raw_value_size_; // total number of non-deletion entries uint64_t accumulated_num_non_deletions_; // total number of deletion entries uint64_t accumulated_num_deletions_; // current number of non_deletion entries uint64_t current_num_non_deletions_; // current number of deletion entries uint64_t current_num_deletions_; // current number of file samples uint64_t current_num_samples_; // Estimated bytes needed to be compacted until all levels' size is down to // target sizes. uint64_t estimated_compaction_needed_bytes_; bool finalized_; // If set to true, we will run consistency checks even if RocksDB // is compiled in release mode bool force_consistency_checks_; friend class Version; friend class VersionSet; }; using MultiGetRange = MultiGetContext::Range; // A column family's version consists of the table and blob files owned by // the column family at a certain point in time. class Version { public: // Append to *iters a sequence of iterators that will // yield the contents of this Version when merged together. // REQUIRES: This version has been saved (see VersionSet::SaveTo) void AddIterators(const ReadOptions&, const FileOptions& soptions, MergeIteratorBuilder* merger_iter_builder, RangeDelAggregator* range_del_agg, bool allow_unprepared_value); void AddIteratorsForLevel(const ReadOptions&, const FileOptions& soptions, MergeIteratorBuilder* merger_iter_builder, int level, RangeDelAggregator* range_del_agg, bool allow_unprepared_value); Status OverlapWithLevelIterator(const ReadOptions&, const FileOptions&, const Slice& smallest_user_key, const Slice& largest_user_key, int level, bool* overlap); // Lookup the value for key or get all merge operands for key. // If do_merge = true (default) then lookup value for key. // Behavior if do_merge = true: // If found, store it in *value and // return OK. Else return a non-OK status. // Uses *operands to store merge_operator operations to apply later. // // If the ReadOptions.read_tier is set to do a read-only fetch, then // *value_found will be set to false if it cannot be determined whether // this value exists without doing IO. // // If the key is Deleted, *status will be set to NotFound and // *key_exists will be set to true. // If no key was found, *status will be set to NotFound and // *key_exists will be set to false. // If seq is non-null, *seq will be set to the sequence number found // for the key if a key was found. // Behavior if do_merge = false // If the key has any merge operands then store them in // merge_context.operands_list and don't merge the operands // REQUIRES: lock is not held void Get(const ReadOptions&, const LookupKey& key, PinnableSlice* value, std::string* timestamp, Status* status, MergeContext* merge_context, SequenceNumber* max_covering_tombstone_seq, bool* value_found = nullptr, bool* key_exists = nullptr, SequenceNumber* seq = nullptr, ReadCallback* callback = nullptr, bool* is_blob = nullptr, bool do_merge = true); void MultiGet(const ReadOptions&, MultiGetRange* range, ReadCallback* callback = nullptr, bool* is_blob = nullptr); // Loads some stats information from files. Call without mutex held. It needs // to be called before applying the version to the version set. void PrepareApply(const MutableCFOptions& mutable_cf_options, bool update_stats); // Reference count management (so Versions do not disappear out from // under live iterators) void Ref(); // Decrease reference count. Delete the object if no reference left // and return true. Otherwise, return false. bool Unref(); // Add all files listed in the current version to *live_table_files and // *live_blob_files. void AddLiveFiles(std::vector* live_table_files, std::vector* live_blob_files) const; // Return a human readable string that describes this version's contents. std::string DebugString(bool hex = false, bool print_stats = false) const; // Returns the version number of this version uint64_t GetVersionNumber() const { return version_number_; } // REQUIRES: lock is held // On success, "tp" will contains the table properties of the file // specified in "file_meta". If the file name of "file_meta" is // known ahead, passing it by a non-null "fname" can save a // file-name conversion. Status GetTableProperties(std::shared_ptr* tp, const FileMetaData* file_meta, const std::string* fname = nullptr) const; // REQUIRES: lock is held // On success, *props will be populated with all SSTables' table properties. // The keys of `props` are the sst file name, the values of `props` are the // tables' properties, represented as std::shared_ptr. Status GetPropertiesOfAllTables(TablePropertiesCollection* props); Status GetPropertiesOfAllTables(TablePropertiesCollection* props, int level); Status GetPropertiesOfTablesInRange(const Range* range, std::size_t n, TablePropertiesCollection* props) const; // Print summary of range delete tombstones in SST files into out_str, // with maximum max_entries_to_print entries printed out. Status TablesRangeTombstoneSummary(int max_entries_to_print, std::string* out_str); // REQUIRES: lock is held // On success, "tp" will contains the aggregated table property among // the table properties of all sst files in this version. Status GetAggregatedTableProperties( std::shared_ptr* tp, int level = -1); uint64_t GetEstimatedActiveKeys() { return storage_info_.GetEstimatedActiveKeys(); } size_t GetMemoryUsageByTableReaders(); ColumnFamilyData* cfd() const { return cfd_; } // Return the next Version in the linked list. Used for debug only Version* TEST_Next() const { return next_; } int TEST_refs() const { return refs_; } VersionStorageInfo* storage_info() { return &storage_info_; } VersionSet* version_set() { return vset_; } void GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta); uint64_t GetSstFilesSize(); // Retrieves the file_creation_time of the oldest file in the DB. // Prerequisite for this API is max_open_files = -1 void GetCreationTimeOfOldestFile(uint64_t* creation_time); const MutableCFOptions& GetMutableCFOptions() { return mutable_cf_options_; } private: Env* env_; FileSystem* fs_; friend class ReactiveVersionSet; friend class VersionSet; friend class VersionEditHandler; friend class VersionEditHandlerPointInTime; const InternalKeyComparator* internal_comparator() const { return storage_info_.internal_comparator_; } const Comparator* user_comparator() const { return storage_info_.user_comparator_; } // Returns true if the filter blocks in the specified level will not be // checked during read operations. In certain cases (trivial move or preload), // the filter block may already be cached, but we still do not access it such // that it eventually expires from the cache. bool IsFilterSkipped(int level, bool is_file_last_in_level = false); // The helper function of UpdateAccumulatedStats, which may fill the missing // fields of file_meta from its associated TableProperties. // Returns true if it does initialize FileMetaData. bool MaybeInitializeFileMetaData(FileMetaData* file_meta); // Update the accumulated stats associated with the current version. // This accumulated stats will be used in compaction. void UpdateAccumulatedStats(bool update_stats); // Sort all files for this version based on their file size and // record results in files_by_compaction_pri_. The largest files are listed // first. void UpdateFilesByCompactionPri(); ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs Logger* info_log_; Statistics* db_statistics_; TableCache* table_cache_; const MergeOperator* merge_operator_; VersionStorageInfo storage_info_; VersionSet* vset_; // VersionSet to which this Version belongs Version* next_; // Next version in linked list Version* prev_; // Previous version in linked list int refs_; // Number of live refs to this version const FileOptions file_options_; const MutableCFOptions mutable_cf_options_; // Cached value to avoid recomputing it on every read. const size_t max_file_size_for_l0_meta_pin_; // A version number that uniquely represents this version. This is // used for debugging and logging purposes only. uint64_t version_number_; Version(ColumnFamilyData* cfd, VersionSet* vset, const FileOptions& file_opt, MutableCFOptions mutable_cf_options, uint64_t version_number = 0); ~Version(); // No copying allowed Version(const Version&) = delete; void operator=(const Version&) = delete; }; struct ObsoleteFileInfo { FileMetaData* metadata; std::string path; ObsoleteFileInfo() noexcept : metadata(nullptr) {} ObsoleteFileInfo(FileMetaData* f, const std::string& file_path) : metadata(f), path(file_path) {} ObsoleteFileInfo(const ObsoleteFileInfo&) = delete; ObsoleteFileInfo& operator=(const ObsoleteFileInfo&) = delete; ObsoleteFileInfo(ObsoleteFileInfo&& rhs) noexcept : ObsoleteFileInfo() { *this = std::move(rhs); } ObsoleteFileInfo& operator=(ObsoleteFileInfo&& rhs) noexcept { path = std::move(rhs.path); metadata = rhs.metadata; rhs.metadata = nullptr; return *this; } void DeleteMetadata() { delete metadata; metadata = nullptr; } }; class ObsoleteBlobFileInfo { public: ObsoleteBlobFileInfo(uint64_t blob_file_number, std::string path) : blob_file_number_(blob_file_number), path_(std::move(path)) {} uint64_t GetBlobFileNumber() const { return blob_file_number_; } const std::string& GetPath() const { return path_; } private: uint64_t blob_file_number_; std::string path_; }; class BaseReferencedVersionBuilder; class AtomicGroupReadBuffer { public: Status AddEdit(VersionEdit* edit); void Clear(); bool IsFull() const; bool IsEmpty() const; uint64_t TEST_read_edits_in_atomic_group() const { return read_edits_in_atomic_group_; } std::vector& replay_buffer() { return replay_buffer_; } private: uint64_t read_edits_in_atomic_group_ = 0; std::vector replay_buffer_; }; // VersionSet is the collection of versions of all the column families of the // database. Each database owns one VersionSet. A VersionSet has access to all // column families via ColumnFamilySet, i.e. set of the column families. class VersionSet { public: VersionSet(const std::string& dbname, const ImmutableDBOptions* db_options, const FileOptions& file_options, Cache* table_cache, WriteBufferManager* write_buffer_manager, WriteController* write_controller, BlockCacheTracer* const block_cache_tracer); // No copying allowed VersionSet(const VersionSet&) = delete; void operator=(const VersionSet&) = delete; virtual ~VersionSet(); // Apply *edit to the current version to form a new descriptor that // is both saved to persistent state and installed as the new // current version. Will release *mu while actually writing to the file. // column_family_options has to be set if edit is column family add // REQUIRES: *mu is held on entry. // REQUIRES: no other thread concurrently calls LogAndApply() Status LogAndApply( ColumnFamilyData* column_family_data, const MutableCFOptions& mutable_cf_options, VersionEdit* edit, InstrumentedMutex* mu, FSDirectory* db_directory = nullptr, bool new_descriptor_log = false, const ColumnFamilyOptions* column_family_options = nullptr) { autovector cfds; cfds.emplace_back(column_family_data); autovector mutable_cf_options_list; mutable_cf_options_list.emplace_back(&mutable_cf_options); autovector> edit_lists; autovector edit_list; edit_list.emplace_back(edit); edit_lists.emplace_back(edit_list); return LogAndApply(cfds, mutable_cf_options_list, edit_lists, mu, db_directory, new_descriptor_log, column_family_options); } // The batch version. If edit_list.size() > 1, caller must ensure that // no edit in the list column family add or drop Status LogAndApply( ColumnFamilyData* column_family_data, const MutableCFOptions& mutable_cf_options, const autovector& edit_list, InstrumentedMutex* mu, FSDirectory* db_directory = nullptr, bool new_descriptor_log = false, const ColumnFamilyOptions* column_family_options = nullptr) { autovector cfds; cfds.emplace_back(column_family_data); autovector mutable_cf_options_list; mutable_cf_options_list.emplace_back(&mutable_cf_options); autovector> edit_lists; edit_lists.emplace_back(edit_list); return LogAndApply(cfds, mutable_cf_options_list, edit_lists, mu, db_directory, new_descriptor_log, column_family_options); } // The across-multi-cf batch version. If edit_lists contain more than // 1 version edits, caller must ensure that no edit in the []list is column // family manipulation. virtual Status LogAndApply( const autovector& cfds, const autovector& mutable_cf_options_list, const autovector>& edit_lists, InstrumentedMutex* mu, FSDirectory* db_directory = nullptr, bool new_descriptor_log = false, const ColumnFamilyOptions* new_cf_options = nullptr); static Status GetCurrentManifestPath(const std::string& dbname, FileSystem* fs, std::string* manifest_filename, uint64_t* manifest_file_number); // Recover the last saved descriptor from persistent storage. // If read_only == true, Recover() will not complain if some column families // are not opened Status Recover(const std::vector& column_families, bool read_only = false, std::string* db_id = nullptr); Status TryRecover(const std::vector& column_families, bool read_only, std::string* db_id, bool* has_missing_table_file); // Try to recover the version set to the most recent consistent state // recorded in the specified manifest. Status TryRecoverFromOneManifest( const std::string& manifest_path, const std::vector& column_families, bool read_only, std::string* db_id, bool* has_missing_table_file); // Reads a manifest file and returns a list of column families in // column_families. static Status ListColumnFamilies(std::vector* column_families, const std::string& dbname, FileSystem* fs); #ifndef ROCKSDB_LITE // Try to reduce the number of levels. This call is valid when // only one level from the new max level to the old // max level containing files. // The call is static, since number of levels is immutable during // the lifetime of a RocksDB instance. It reduces number of levels // in a DB by applying changes to manifest. // For example, a db currently has 7 levels [0-6], and a call to // to reduce to 5 [0-4] can only be executed when only one level // among [4-6] contains files. static Status ReduceNumberOfLevels(const std::string& dbname, const Options* options, const FileOptions& file_options, int new_levels); // Get the checksum information of all live files Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list); // printf contents (for debugging) Status DumpManifest(Options& options, std::string& manifestFileName, bool verbose, bool hex = false, bool json = false); #endif // ROCKSDB_LITE // Return the current manifest file number uint64_t manifest_file_number() const { return manifest_file_number_; } uint64_t options_file_number() const { return options_file_number_; } uint64_t pending_manifest_file_number() const { return pending_manifest_file_number_; } uint64_t current_next_file_number() const { return next_file_number_.load(); } uint64_t min_log_number_to_keep_2pc() const { return min_log_number_to_keep_2pc_.load(); } // Allocate and return a new file number uint64_t NewFileNumber() { return next_file_number_.fetch_add(1); } // Fetch And Add n new file number uint64_t FetchAddFileNumber(uint64_t n) { return next_file_number_.fetch_add(n); } // Return the last sequence number. uint64_t LastSequence() const { return last_sequence_.load(std::memory_order_acquire); } // Note: memory_order_acquire must be sufficient. uint64_t LastAllocatedSequence() const { return last_allocated_sequence_.load(std::memory_order_seq_cst); } // Note: memory_order_acquire must be sufficient. uint64_t LastPublishedSequence() const { return last_published_sequence_.load(std::memory_order_seq_cst); } // Set the last sequence number to s. void SetLastSequence(uint64_t s) { assert(s >= last_sequence_); // Last visible sequence must always be less than last written seq assert(!db_options_->two_write_queues || s <= last_allocated_sequence_); last_sequence_.store(s, std::memory_order_release); } // Note: memory_order_release must be sufficient void SetLastPublishedSequence(uint64_t s) { assert(s >= last_published_sequence_); last_published_sequence_.store(s, std::memory_order_seq_cst); } // Note: memory_order_release must be sufficient void SetLastAllocatedSequence(uint64_t s) { assert(s >= last_allocated_sequence_); last_allocated_sequence_.store(s, std::memory_order_seq_cst); } // Note: memory_order_release must be sufficient uint64_t FetchAddLastAllocatedSequence(uint64_t s) { return last_allocated_sequence_.fetch_add(s, std::memory_order_seq_cst); } // Mark the specified file number as used. // REQUIRED: this is only called during single-threaded recovery or repair. void MarkFileNumberUsed(uint64_t number); // Mark the specified log number as deleted // REQUIRED: this is only called during single-threaded recovery or repair, or // from ::LogAndApply where the global mutex is held. void MarkMinLogNumberToKeep2PC(uint64_t number); // Return the log file number for the log file that is currently // being compacted, or zero if there is no such log file. uint64_t prev_log_number() const { return prev_log_number_; } // Returns the minimum log number which still has data not flushed to any SST // file. // In non-2PC mode, all the log numbers smaller than this number can be safely // deleted. uint64_t MinLogNumberWithUnflushedData() const { return PreComputeMinLogNumberWithUnflushedData(nullptr); } // Returns the minimum log number which still has data not flushed to any SST // file, except data from `cfd_to_skip`. uint64_t PreComputeMinLogNumberWithUnflushedData( const ColumnFamilyData* cfd_to_skip) const { uint64_t min_log_num = std::numeric_limits::max(); for (auto cfd : *column_family_set_) { if (cfd == cfd_to_skip) { continue; } // It's safe to ignore dropped column families here: // cfd->IsDropped() becomes true after the drop is persisted in MANIFEST. if (min_log_num > cfd->GetLogNumber() && !cfd->IsDropped()) { min_log_num = cfd->GetLogNumber(); } } return min_log_num; } // Create an iterator that reads over the compaction inputs for "*c". // The caller should delete the iterator when no longer needed. InternalIterator* MakeInputIterator( const Compaction* c, RangeDelAggregator* range_del_agg, const FileOptions& file_options_compactions); // Add all files listed in any live version to *live_table_files and // *live_blob_files. Note that these lists may contain duplicates. void AddLiveFiles(std::vector* live_table_files, std::vector* live_blob_files) const; // Return the approximate size of data to be scanned for range [start, end) // in levels [start_level, end_level). If end_level == -1 it will search // through all non-empty levels uint64_t ApproximateSize(const SizeApproximationOptions& options, Version* v, const Slice& start, const Slice& end, int start_level, int end_level, TableReaderCaller caller); // Return the size of the current manifest file uint64_t manifest_file_size() const { return manifest_file_size_; } // verify that the files that we started with for a compaction // still exist in the current version and in the same original level. // This ensures that a concurrent compaction did not erroneously // pick the same files to compact. bool VerifyCompactionFileConsistency(Compaction* c); Status GetMetadataForFile(uint64_t number, int* filelevel, FileMetaData** metadata, ColumnFamilyData** cfd); // This function doesn't support leveldb SST filenames void GetLiveFilesMetaData(std::vector *metadata); void AddObsoleteBlobFile(uint64_t blob_file_number, std::string path) { obsolete_blob_files_.emplace_back(blob_file_number, std::move(path)); } void GetObsoleteFiles(std::vector* files, std::vector* blob_files, std::vector* manifest_filenames, uint64_t min_pending_output); ColumnFamilySet* GetColumnFamilySet() { return column_family_set_.get(); } const FileOptions& file_options() { return file_options_; } void ChangeFileOptions(const MutableDBOptions& new_options) { file_options_.writable_file_max_buffer_size = new_options.writable_file_max_buffer_size; } const ImmutableDBOptions* db_options() const { return db_options_; } static uint64_t GetNumLiveVersions(Version* dummy_versions); static uint64_t GetTotalSstFilesSize(Version* dummy_versions); // Get the IO Status returned by written Manifest. const IOStatus& io_status() const { return io_status_; } protected: using VersionBuilderMap = std::unordered_map>; struct ManifestWriter; friend class Version; friend class VersionEditHandler; friend class VersionEditHandlerPointInTime; friend class DBImpl; friend class DBImplReadOnly; struct LogReporter : public log::Reader::Reporter { Status* status; virtual void Corruption(size_t /*bytes*/, const Status& s) override { if (status->ok()) { *status = s; } } }; void Reset(); // Returns approximated offset of a key in a file for a given version. uint64_t ApproximateOffsetOf(Version* v, const FdWithKeyRange& f, const Slice& key, TableReaderCaller caller); // Returns approximated data size between start and end keys in a file // for a given version. uint64_t ApproximateSize(Version* v, const FdWithKeyRange& f, const Slice& start, const Slice& end, TableReaderCaller caller); struct MutableCFState { uint64_t log_number; }; // Save current contents to *log Status WriteCurrentStateToManifest( const std::unordered_map& curr_state, log::Writer* log, IOStatus& io_s); void AppendVersion(ColumnFamilyData* column_family_data, Version* v); ColumnFamilyData* CreateColumnFamily(const ColumnFamilyOptions& cf_options, const VersionEdit* edit); Status ReadAndRecover( log::Reader& reader, AtomicGroupReadBuffer* read_buffer, const std::unordered_map& name_to_options, std::unordered_map& column_families_not_found, std::unordered_map< uint32_t, std::unique_ptr>& builders, Status* log_read_status, VersionEditParams* version_edit, std::string* db_id = nullptr); // REQUIRES db mutex Status ApplyOneVersionEditToBuilder( VersionEdit& edit, const std::unordered_map& name_to_opts, std::unordered_map& column_families_not_found, std::unordered_map< uint32_t, std::unique_ptr>& builders, VersionEditParams* version_edit); Status ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, const VersionEdit& from_edit, VersionEditParams* version_edit_params); Status VerifyFileMetadata(const std::string& fpath, const FileMetaData& meta) const; std::unique_ptr column_family_set_; Env* const env_; FileSystem* const fs_; const std::string dbname_; std::string db_id_; const ImmutableDBOptions* const db_options_; std::atomic next_file_number_; // Any log number equal or lower than this should be ignored during recovery, // and is qualified for being deleted in 2PC mode. In non-2PC mode, this // number is ignored. std::atomic min_log_number_to_keep_2pc_ = {0}; uint64_t manifest_file_number_; uint64_t options_file_number_; uint64_t pending_manifest_file_number_; // The last seq visible to reads. It normally indicates the last sequence in // the memtable but when using two write queues it could also indicate the // last sequence in the WAL visible to reads. std::atomic last_sequence_; // The last seq that is already allocated. It is applicable only when we have // two write queues. In that case seq might or might not have appreated in // memtable but it is expected to appear in the WAL. // We have last_sequence <= last_allocated_sequence_ std::atomic last_allocated_sequence_; // The last allocated sequence that is also published to the readers. This is // applicable only when last_seq_same_as_publish_seq_ is not set. Otherwise // last_sequence_ also indicates the last published seq. // We have last_sequence <= last_published_sequence_ <= // last_allocated_sequence_ std::atomic last_published_sequence_; uint64_t prev_log_number_; // 0 or backing store for memtable being compacted // Opened lazily std::unique_ptr descriptor_log_; // generates a increasing version number for every new version uint64_t current_version_number_; // Queue of writers to the manifest file std::deque manifest_writers_; // Current size of manifest file uint64_t manifest_file_size_; std::vector obsolete_files_; std::vector obsolete_blob_files_; std::vector obsolete_manifests_; // env options for all reads and writes except compactions FileOptions file_options_; BlockCacheTracer* const block_cache_tracer_; // Store the IO status when Manifest is written IOStatus io_status_; private: // REQUIRES db mutex at beginning. may release and re-acquire db mutex Status ProcessManifestWrites(std::deque& writers, InstrumentedMutex* mu, FSDirectory* db_directory, bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options); void LogAndApplyCFHelper(VersionEdit* edit); Status LogAndApplyHelper(ColumnFamilyData* cfd, VersionBuilder* b, VersionEdit* edit, InstrumentedMutex* mu); }; // ReactiveVersionSet represents a collection of versions of the column // families of the database. Users of ReactiveVersionSet, e.g. DBImplSecondary, // need to replay the MANIFEST (description log in older terms) in order to // reconstruct and install versions. class ReactiveVersionSet : public VersionSet { public: ReactiveVersionSet(const std::string& dbname, const ImmutableDBOptions* _db_options, const FileOptions& _file_options, Cache* table_cache, WriteBufferManager* write_buffer_manager, WriteController* write_controller); ~ReactiveVersionSet() override; Status ReadAndApply( InstrumentedMutex* mu, std::unique_ptr* manifest_reader, std::unordered_set* cfds_changed); Status Recover(const std::vector& column_families, std::unique_ptr* manifest_reader, std::unique_ptr* manifest_reporter, std::unique_ptr* manifest_reader_status); uint64_t TEST_read_edits_in_atomic_group() const { return read_buffer_.TEST_read_edits_in_atomic_group(); } std::vector& replay_buffer() { return read_buffer_.replay_buffer(); } protected: using VersionSet::ApplyOneVersionEditToBuilder; // REQUIRES db mutex Status ApplyOneVersionEditToBuilder( VersionEdit& edit, std::unordered_set* cfds_changed, VersionEdit* version_edit); Status MaybeSwitchManifest( log::Reader::Reporter* reporter, std::unique_ptr* manifest_reader); private: VersionBuilderMap active_version_builders_; AtomicGroupReadBuffer read_buffer_; // Number of version edits to skip by ReadAndApply at the beginning of a new // MANIFEST created by primary. int number_of_edits_to_skip_; using VersionSet::LogAndApply; using VersionSet::Recover; Status LogAndApply( const autovector& /*cfds*/, const autovector& /*mutable_cf_options_list*/, const autovector>& /*edit_lists*/, InstrumentedMutex* /*mu*/, FSDirectory* /*db_directory*/, bool /*new_descriptor_log*/, const ColumnFamilyOptions* /*new_cf_option*/) override { return Status::NotSupported("not supported in reactive mode"); } // No copy allowed ReactiveVersionSet(const ReactiveVersionSet&); ReactiveVersionSet& operator=(const ReactiveVersionSet&); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/version_set_test.cc000066400000000000000000002535501370372246700175520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/version_set.h" #include "db/db_impl/db_impl.h" #include "db/log_writer.h" #include "env/mock_env.h" #include "logging/logging.h" #include "table/mock_table.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class GenerateLevelFilesBriefTest : public testing::Test { public: std::vector files_; LevelFilesBrief file_level_; Arena arena_; GenerateLevelFilesBriefTest() { } ~GenerateLevelFilesBriefTest() override { for (size_t i = 0; i < files_.size(); i++) { delete files_[i]; } } void Add(const char* smallest, const char* largest, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100) { FileMetaData* f = new FileMetaData( files_.size() + 1, 0, 0, InternalKey(smallest, smallest_seq, kTypeValue), InternalKey(largest, largest_seq, kTypeValue), smallest_seq, largest_seq, /* marked_for_compact */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); files_.push_back(f); } int Compare() { int diff = 0; for (size_t i = 0; i < files_.size(); i++) { if (file_level_.files[i].fd.GetNumber() != files_[i]->fd.GetNumber()) { diff++; } } return diff; } }; TEST_F(GenerateLevelFilesBriefTest, Empty) { DoGenerateLevelFilesBrief(&file_level_, files_, &arena_); ASSERT_EQ(0u, file_level_.num_files); ASSERT_EQ(0, Compare()); } TEST_F(GenerateLevelFilesBriefTest, Single) { Add("p", "q"); DoGenerateLevelFilesBrief(&file_level_, files_, &arena_); ASSERT_EQ(1u, file_level_.num_files); ASSERT_EQ(0, Compare()); } TEST_F(GenerateLevelFilesBriefTest, Multiple) { Add("150", "200"); Add("200", "250"); Add("300", "350"); Add("400", "450"); DoGenerateLevelFilesBrief(&file_level_, files_, &arena_); ASSERT_EQ(4u, file_level_.num_files); ASSERT_EQ(0, Compare()); } class CountingLogger : public Logger { public: CountingLogger() : log_count(0) {} using Logger::Logv; void Logv(const char* /*format*/, va_list /*ap*/) override { log_count++; } int log_count; }; Options GetOptionsWithNumLevels(int num_levels, std::shared_ptr logger) { Options opt; opt.num_levels = num_levels; opt.info_log = logger; return opt; } class VersionStorageInfoTestBase : public testing::Test { public: const Comparator* ucmp_; InternalKeyComparator icmp_; std::shared_ptr logger_; Options options_; ImmutableCFOptions ioptions_; MutableCFOptions mutable_cf_options_; VersionStorageInfo vstorage_; InternalKey GetInternalKey(const char* ukey, SequenceNumber smallest_seq = 100) { return InternalKey(ukey, smallest_seq, kTypeValue); } explicit VersionStorageInfoTestBase(const Comparator* ucmp) : ucmp_(ucmp), icmp_(ucmp_), logger_(new CountingLogger()), options_(GetOptionsWithNumLevels(6, logger_)), ioptions_(options_), mutable_cf_options_(options_), vstorage_(&icmp_, ucmp_, 6, kCompactionStyleLevel, /*src_vstorage=*/nullptr, /*_force_consistency_checks=*/false) {} ~VersionStorageInfoTestBase() override { for (int i = 0; i < vstorage_.num_levels(); ++i) { for (auto* f : vstorage_.LevelFiles(i)) { if (--f->refs == 0) { delete f; } } } } void Add(int level, uint32_t file_number, const char* smallest, const char* largest, uint64_t file_size = 0) { assert(level < vstorage_.num_levels()); FileMetaData* f = new FileMetaData( file_number, 0, file_size, GetInternalKey(smallest, 0), GetInternalKey(largest, 0), /* smallest_seq */ 0, /* largest_seq */ 0, /* marked_for_compact */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); f->compensated_file_size = file_size; vstorage_.AddFile(level, f); } void Add(int level, uint32_t file_number, const InternalKey& smallest, const InternalKey& largest, uint64_t file_size = 0) { assert(level < vstorage_.num_levels()); FileMetaData* f = new FileMetaData( file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0, /* largest_seq */ 0, /* marked_for_compact */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, kUnknownFileChecksumFuncName); f->compensated_file_size = file_size; vstorage_.AddFile(level, f); } std::string GetOverlappingFiles(int level, const InternalKey& begin, const InternalKey& end) { std::vector inputs; vstorage_.GetOverlappingInputs(level, &begin, &end, &inputs); std::string result; for (size_t i = 0; i < inputs.size(); ++i) { if (i > 0) { result += ","; } AppendNumberTo(&result, inputs[i]->fd.GetNumber()); } return result; } }; class VersionStorageInfoTest : public VersionStorageInfoTestBase { public: VersionStorageInfoTest() : VersionStorageInfoTestBase(BytewiseComparator()) {} ~VersionStorageInfoTest() override {} }; TEST_F(VersionStorageInfoTest, MaxBytesForLevelStatic) { ioptions_.level_compaction_dynamic_level_bytes = false; mutable_cf_options_.max_bytes_for_level_base = 10; mutable_cf_options_.max_bytes_for_level_multiplier = 5; Add(4, 100U, "1", "2"); Add(5, 101U, "1", "2"); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 10U); ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 50U); ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 250U); ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1250U); ASSERT_EQ(0, logger_->log_count); } TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamic) { ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.max_bytes_for_level_base = 1000; mutable_cf_options_.max_bytes_for_level_multiplier = 5; Add(5, 1U, "1", "2", 500U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(0, logger_->log_count); ASSERT_EQ(vstorage_.base_level(), 5); Add(5, 2U, "3", "4", 550U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(0, logger_->log_count); ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U); ASSERT_EQ(vstorage_.base_level(), 4); Add(4, 3U, "3", "4", 550U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(0, logger_->log_count); ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1000U); ASSERT_EQ(vstorage_.base_level(), 4); Add(3, 4U, "3", "4", 250U); Add(3, 5U, "5", "7", 300U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(1, logger_->log_count); ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 1005U); ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 1000U); ASSERT_EQ(vstorage_.base_level(), 3); Add(1, 6U, "3", "4", 5U); Add(1, 7U, "8", "9", 5U); logger_->log_count = 0; vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(1, logger_->log_count); ASSERT_GT(vstorage_.MaxBytesForLevel(4), 1005U); ASSERT_GT(vstorage_.MaxBytesForLevel(3), 1005U); ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 1005U); ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 1000U); ASSERT_EQ(vstorage_.base_level(), 1); } TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLotsOfData) { ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.max_bytes_for_level_base = 100; mutable_cf_options_.max_bytes_for_level_multiplier = 2; Add(0, 1U, "1", "2", 50U); Add(1, 2U, "1", "2", 50U); Add(2, 3U, "1", "2", 500U); Add(3, 4U, "1", "2", 500U); Add(4, 5U, "1", "2", 1700U); Add(5, 6U, "1", "2", 500U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 800U); ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 400U); ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 200U); ASSERT_EQ(vstorage_.MaxBytesForLevel(1), 100U); ASSERT_EQ(vstorage_.base_level(), 1); ASSERT_EQ(0, logger_->log_count); } TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicLargeLevel) { uint64_t kOneGB = 1000U * 1000U * 1000U; ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.max_bytes_for_level_base = 10U * kOneGB; mutable_cf_options_.max_bytes_for_level_multiplier = 10; Add(0, 1U, "1", "2", 50U); Add(3, 4U, "1", "2", 32U * kOneGB); Add(4, 5U, "1", "2", 500U * kOneGB); Add(5, 6U, "1", "2", 3000U * kOneGB); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(vstorage_.MaxBytesForLevel(5), 3000U * kOneGB); ASSERT_EQ(vstorage_.MaxBytesForLevel(4), 300U * kOneGB); ASSERT_EQ(vstorage_.MaxBytesForLevel(3), 30U * kOneGB); ASSERT_EQ(vstorage_.MaxBytesForLevel(2), 10U * kOneGB); ASSERT_EQ(vstorage_.base_level(), 2); ASSERT_EQ(0, logger_->log_count); } TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_1) { ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.max_bytes_for_level_base = 40000; mutable_cf_options_.max_bytes_for_level_multiplier = 5; mutable_cf_options_.level0_file_num_compaction_trigger = 2; Add(0, 1U, "1", "2", 10000U); Add(0, 2U, "1", "2", 10000U); Add(0, 3U, "1", "2", 10000U); Add(5, 4U, "1", "2", 1286250U); Add(4, 5U, "1", "2", 200000U); Add(3, 6U, "1", "2", 40000U); Add(2, 7U, "1", "2", 8000U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(0, logger_->log_count); ASSERT_EQ(2, vstorage_.base_level()); // level multiplier should be 3.5 ASSERT_EQ(vstorage_.level_multiplier(), 5.0); // Level size should be around 30,000, 105,000, 367,500 ASSERT_EQ(40000U, vstorage_.MaxBytesForLevel(2)); ASSERT_EQ(51450U, vstorage_.MaxBytesForLevel(3)); ASSERT_EQ(257250U, vstorage_.MaxBytesForLevel(4)); } TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_2) { ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.max_bytes_for_level_base = 10000; mutable_cf_options_.max_bytes_for_level_multiplier = 5; mutable_cf_options_.level0_file_num_compaction_trigger = 2; Add(0, 11U, "1", "2", 10000U); Add(0, 12U, "1", "2", 10000U); Add(0, 13U, "1", "2", 10000U); Add(5, 4U, "1", "2", 1286250U); Add(4, 5U, "1", "2", 200000U); Add(3, 6U, "1", "2", 40000U); Add(2, 7U, "1", "2", 8000U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(0, logger_->log_count); ASSERT_EQ(2, vstorage_.base_level()); // level multiplier should be 3.5 ASSERT_LT(vstorage_.level_multiplier(), 3.6); ASSERT_GT(vstorage_.level_multiplier(), 3.4); // Level size should be around 30,000, 105,000, 367,500 ASSERT_EQ(30000U, vstorage_.MaxBytesForLevel(2)); ASSERT_LT(vstorage_.MaxBytesForLevel(3), 110000U); ASSERT_GT(vstorage_.MaxBytesForLevel(3), 100000U); ASSERT_LT(vstorage_.MaxBytesForLevel(4), 370000U); ASSERT_GT(vstorage_.MaxBytesForLevel(4), 360000U); } TEST_F(VersionStorageInfoTest, MaxBytesForLevelDynamicWithLargeL0_3) { ioptions_.level_compaction_dynamic_level_bytes = true; mutable_cf_options_.max_bytes_for_level_base = 10000; mutable_cf_options_.max_bytes_for_level_multiplier = 5; mutable_cf_options_.level0_file_num_compaction_trigger = 2; Add(0, 11U, "1", "2", 5000U); Add(0, 12U, "1", "2", 5000U); Add(0, 13U, "1", "2", 5000U); Add(0, 14U, "1", "2", 5000U); Add(0, 15U, "1", "2", 5000U); Add(0, 16U, "1", "2", 5000U); Add(5, 4U, "1", "2", 1286250U); Add(4, 5U, "1", "2", 200000U); Add(3, 6U, "1", "2", 40000U); Add(2, 7U, "1", "2", 8000U); vstorage_.CalculateBaseBytes(ioptions_, mutable_cf_options_); ASSERT_EQ(0, logger_->log_count); ASSERT_EQ(2, vstorage_.base_level()); // level multiplier should be 3.5 ASSERT_LT(vstorage_.level_multiplier(), 3.6); ASSERT_GT(vstorage_.level_multiplier(), 3.4); // Level size should be around 30,000, 105,000, 367,500 ASSERT_EQ(30000U, vstorage_.MaxBytesForLevel(2)); ASSERT_LT(vstorage_.MaxBytesForLevel(3), 110000U); ASSERT_GT(vstorage_.MaxBytesForLevel(3), 100000U); ASSERT_LT(vstorage_.MaxBytesForLevel(4), 370000U); ASSERT_GT(vstorage_.MaxBytesForLevel(4), 360000U); } TEST_F(VersionStorageInfoTest, EstimateLiveDataSize) { // Test whether the overlaps are detected as expected Add(1, 1U, "4", "7", 1U); // Perfect overlap with last level Add(2, 2U, "3", "5", 1U); // Partial overlap with last level Add(2, 3U, "6", "8", 1U); // Partial overlap with last level Add(3, 4U, "1", "9", 1U); // Contains range of last level Add(4, 5U, "4", "5", 1U); // Inside range of last level Add(4, 6U, "6", "7", 1U); // Inside range of last level Add(5, 7U, "4", "7", 10U); ASSERT_EQ(10U, vstorage_.EstimateLiveDataSize()); } TEST_F(VersionStorageInfoTest, EstimateLiveDataSize2) { Add(0, 1U, "9", "9", 1U); // Level 0 is not ordered Add(0, 2U, "5", "6", 1U); // Ignored because of [5,6] in l1 Add(1, 3U, "1", "2", 1U); // Ignored because of [2,3] in l2 Add(1, 4U, "3", "4", 1U); // Ignored because of [2,3] in l2 Add(1, 5U, "5", "6", 1U); Add(2, 6U, "2", "3", 1U); Add(3, 7U, "7", "8", 1U); ASSERT_EQ(4U, vstorage_.EstimateLiveDataSize()); } TEST_F(VersionStorageInfoTest, GetOverlappingInputs) { // Two files that overlap at the range deletion tombstone sentinel. Add(1, 1U, {"a", 0, kTypeValue}, {"b", kMaxSequenceNumber, kTypeRangeDeletion}, 1); Add(1, 2U, {"b", 0, kTypeValue}, {"c", 0, kTypeValue}, 1); // Two files that overlap at the same user key. Add(1, 3U, {"d", 0, kTypeValue}, {"e", kMaxSequenceNumber, kTypeValue}, 1); Add(1, 4U, {"e", 0, kTypeValue}, {"f", 0, kTypeValue}, 1); // Two files that do not overlap. Add(1, 5U, {"g", 0, kTypeValue}, {"h", 0, kTypeValue}, 1); Add(1, 6U, {"i", 0, kTypeValue}, {"j", 0, kTypeValue}, 1); vstorage_.UpdateNumNonEmptyLevels(); vstorage_.GenerateLevelFilesBrief(); ASSERT_EQ("1,2", GetOverlappingFiles( 1, {"a", 0, kTypeValue}, {"b", 0, kTypeValue})); ASSERT_EQ("1", GetOverlappingFiles( 1, {"a", 0, kTypeValue}, {"b", kMaxSequenceNumber, kTypeRangeDeletion})); ASSERT_EQ("2", GetOverlappingFiles( 1, {"b", kMaxSequenceNumber, kTypeValue}, {"c", 0, kTypeValue})); ASSERT_EQ("3,4", GetOverlappingFiles( 1, {"d", 0, kTypeValue}, {"e", 0, kTypeValue})); ASSERT_EQ("3", GetOverlappingFiles( 1, {"d", 0, kTypeValue}, {"e", kMaxSequenceNumber, kTypeRangeDeletion})); ASSERT_EQ("3,4", GetOverlappingFiles( 1, {"e", kMaxSequenceNumber, kTypeValue}, {"f", 0, kTypeValue})); ASSERT_EQ("3,4", GetOverlappingFiles( 1, {"e", 0, kTypeValue}, {"f", 0, kTypeValue})); ASSERT_EQ("5", GetOverlappingFiles( 1, {"g", 0, kTypeValue}, {"h", 0, kTypeValue})); ASSERT_EQ("6", GetOverlappingFiles( 1, {"i", 0, kTypeValue}, {"j", 0, kTypeValue})); } TEST_F(VersionStorageInfoTest, FileLocationAndMetaDataByNumber) { Add(0, 11U, "1", "2", 5000U); Add(0, 12U, "1", "2", 5000U); Add(2, 7U, "1", "2", 8000U); ASSERT_EQ(vstorage_.GetFileLocation(11U), VersionStorageInfo::FileLocation(0, 0)); ASSERT_NE(vstorage_.GetFileMetaDataByNumber(11U), nullptr); ASSERT_EQ(vstorage_.GetFileLocation(12U), VersionStorageInfo::FileLocation(0, 1)); ASSERT_NE(vstorage_.GetFileMetaDataByNumber(12U), nullptr); ASSERT_EQ(vstorage_.GetFileLocation(7U), VersionStorageInfo::FileLocation(2, 0)); ASSERT_NE(vstorage_.GetFileMetaDataByNumber(7U), nullptr); ASSERT_FALSE(vstorage_.GetFileLocation(999U).IsValid()); ASSERT_EQ(vstorage_.GetFileMetaDataByNumber(999U), nullptr); } class VersionStorageInfoTimestampTest : public VersionStorageInfoTestBase { public: VersionStorageInfoTimestampTest() : VersionStorageInfoTestBase(test::ComparatorWithU64Ts()) {} ~VersionStorageInfoTimestampTest() override {} std::string Timestamp(uint64_t ts) const { std::string ret; PutFixed64(&ret, ts); return ret; } std::string PackUserKeyAndTimestamp(const Slice& ukey, uint64_t ts) const { std::string ret; ret.assign(ukey.data(), ukey.size()); PutFixed64(&ret, ts); return ret; } }; TEST_F(VersionStorageInfoTimestampTest, GetOverlappingInputs) { Add(/*level=*/1, /*file_number=*/1, /*smallest=*/ {PackUserKeyAndTimestamp("a", /*ts=*/9), /*s=*/0, kTypeValue}, /*largest=*/ {PackUserKeyAndTimestamp("a", /*ts=*/8), /*s=*/0, kTypeValue}, /*file_size=*/100); Add(/*level=*/1, /*file_number=*/2, /*smallest=*/ {PackUserKeyAndTimestamp("a", /*ts=*/5), /*s=*/0, kTypeValue}, /*largest=*/ {PackUserKeyAndTimestamp("b", /*ts=*/10), /*s=*/0, kTypeValue}, /*file_size=*/100); Add(/*level=*/1, /*file_number=*/3, /*smallest=*/ {PackUserKeyAndTimestamp("c", /*ts=*/12), /*s=*/0, kTypeValue}, /*largest=*/ {PackUserKeyAndTimestamp("d", /*ts=*/1), /*s=*/0, kTypeValue}, /*file_size=*/100); vstorage_.UpdateNumNonEmptyLevels(); vstorage_.GenerateLevelFilesBrief(); ASSERT_EQ( "1,2", GetOverlappingFiles( /*level=*/1, {PackUserKeyAndTimestamp("a", /*ts=*/12), /*s=*/0, kTypeValue}, {PackUserKeyAndTimestamp("a", /*ts=*/11), /*s=*/0, kTypeValue})); ASSERT_EQ("3", GetOverlappingFiles( /*level=*/1, {PackUserKeyAndTimestamp("c", /*ts=*/15), /*s=*/0, kTypeValue}, {PackUserKeyAndTimestamp("c", /*ts=*/2), /*s=*/0, kTypeValue})); } class FindLevelFileTest : public testing::Test { public: LevelFilesBrief file_level_; bool disjoint_sorted_files_; Arena arena_; FindLevelFileTest() : disjoint_sorted_files_(true) { } ~FindLevelFileTest() override {} void LevelFileInit(size_t num = 0) { char* mem = arena_.AllocateAligned(num * sizeof(FdWithKeyRange)); file_level_.files = new (mem)FdWithKeyRange[num]; file_level_.num_files = 0; } void Add(const char* smallest, const char* largest, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100) { InternalKey smallest_key = InternalKey(smallest, smallest_seq, kTypeValue); InternalKey largest_key = InternalKey(largest, largest_seq, kTypeValue); Slice smallest_slice = smallest_key.Encode(); Slice largest_slice = largest_key.Encode(); char* mem = arena_.AllocateAligned( smallest_slice.size() + largest_slice.size()); memcpy(mem, smallest_slice.data(), smallest_slice.size()); memcpy(mem + smallest_slice.size(), largest_slice.data(), largest_slice.size()); // add to file_level_ size_t num = file_level_.num_files; auto& file = file_level_.files[num]; file.fd = FileDescriptor(num + 1, 0, 0); file.smallest_key = Slice(mem, smallest_slice.size()); file.largest_key = Slice(mem + smallest_slice.size(), largest_slice.size()); file_level_.num_files++; } int Find(const char* key) { InternalKey target(key, 100, kTypeValue); InternalKeyComparator cmp(BytewiseComparator()); return FindFile(cmp, file_level_, target.Encode()); } bool Overlaps(const char* smallest, const char* largest) { InternalKeyComparator cmp(BytewiseComparator()); Slice s(smallest != nullptr ? smallest : ""); Slice l(largest != nullptr ? largest : ""); return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, file_level_, (smallest != nullptr ? &s : nullptr), (largest != nullptr ? &l : nullptr)); } }; TEST_F(FindLevelFileTest, LevelEmpty) { LevelFileInit(0); ASSERT_EQ(0, Find("foo")); ASSERT_TRUE(! Overlaps("a", "z")); ASSERT_TRUE(! Overlaps(nullptr, "z")); ASSERT_TRUE(! Overlaps("a", nullptr)); ASSERT_TRUE(! Overlaps(nullptr, nullptr)); } TEST_F(FindLevelFileTest, LevelSingle) { LevelFileInit(1); Add("p", "q"); ASSERT_EQ(0, Find("a")); ASSERT_EQ(0, Find("p")); ASSERT_EQ(0, Find("p1")); ASSERT_EQ(0, Find("q")); ASSERT_EQ(1, Find("q1")); ASSERT_EQ(1, Find("z")); ASSERT_TRUE(! Overlaps("a", "b")); ASSERT_TRUE(! Overlaps("z1", "z2")); ASSERT_TRUE(Overlaps("a", "p")); ASSERT_TRUE(Overlaps("a", "q")); ASSERT_TRUE(Overlaps("a", "z")); ASSERT_TRUE(Overlaps("p", "p1")); ASSERT_TRUE(Overlaps("p", "q")); ASSERT_TRUE(Overlaps("p", "z")); ASSERT_TRUE(Overlaps("p1", "p2")); ASSERT_TRUE(Overlaps("p1", "z")); ASSERT_TRUE(Overlaps("q", "q")); ASSERT_TRUE(Overlaps("q", "q1")); ASSERT_TRUE(! Overlaps(nullptr, "j")); ASSERT_TRUE(! Overlaps("r", nullptr)); ASSERT_TRUE(Overlaps(nullptr, "p")); ASSERT_TRUE(Overlaps(nullptr, "p1")); ASSERT_TRUE(Overlaps("q", nullptr)); ASSERT_TRUE(Overlaps(nullptr, nullptr)); } TEST_F(FindLevelFileTest, LevelMultiple) { LevelFileInit(4); Add("150", "200"); Add("200", "250"); Add("300", "350"); Add("400", "450"); ASSERT_EQ(0, Find("100")); ASSERT_EQ(0, Find("150")); ASSERT_EQ(0, Find("151")); ASSERT_EQ(0, Find("199")); ASSERT_EQ(0, Find("200")); ASSERT_EQ(1, Find("201")); ASSERT_EQ(1, Find("249")); ASSERT_EQ(1, Find("250")); ASSERT_EQ(2, Find("251")); ASSERT_EQ(2, Find("299")); ASSERT_EQ(2, Find("300")); ASSERT_EQ(2, Find("349")); ASSERT_EQ(2, Find("350")); ASSERT_EQ(3, Find("351")); ASSERT_EQ(3, Find("400")); ASSERT_EQ(3, Find("450")); ASSERT_EQ(4, Find("451")); ASSERT_TRUE(! Overlaps("100", "149")); ASSERT_TRUE(! Overlaps("251", "299")); ASSERT_TRUE(! Overlaps("451", "500")); ASSERT_TRUE(! Overlaps("351", "399")); ASSERT_TRUE(Overlaps("100", "150")); ASSERT_TRUE(Overlaps("100", "200")); ASSERT_TRUE(Overlaps("100", "300")); ASSERT_TRUE(Overlaps("100", "400")); ASSERT_TRUE(Overlaps("100", "500")); ASSERT_TRUE(Overlaps("375", "400")); ASSERT_TRUE(Overlaps("450", "450")); ASSERT_TRUE(Overlaps("450", "500")); } TEST_F(FindLevelFileTest, LevelMultipleNullBoundaries) { LevelFileInit(4); Add("150", "200"); Add("200", "250"); Add("300", "350"); Add("400", "450"); ASSERT_TRUE(! Overlaps(nullptr, "149")); ASSERT_TRUE(! Overlaps("451", nullptr)); ASSERT_TRUE(Overlaps(nullptr, nullptr)); ASSERT_TRUE(Overlaps(nullptr, "150")); ASSERT_TRUE(Overlaps(nullptr, "199")); ASSERT_TRUE(Overlaps(nullptr, "200")); ASSERT_TRUE(Overlaps(nullptr, "201")); ASSERT_TRUE(Overlaps(nullptr, "400")); ASSERT_TRUE(Overlaps(nullptr, "800")); ASSERT_TRUE(Overlaps("100", nullptr)); ASSERT_TRUE(Overlaps("200", nullptr)); ASSERT_TRUE(Overlaps("449", nullptr)); ASSERT_TRUE(Overlaps("450", nullptr)); } TEST_F(FindLevelFileTest, LevelOverlapSequenceChecks) { LevelFileInit(1); Add("200", "200", 5000, 3000); ASSERT_TRUE(! Overlaps("199", "199")); ASSERT_TRUE(! Overlaps("201", "300")); ASSERT_TRUE(Overlaps("200", "200")); ASSERT_TRUE(Overlaps("190", "200")); ASSERT_TRUE(Overlaps("200", "210")); } TEST_F(FindLevelFileTest, LevelOverlappingFiles) { LevelFileInit(2); Add("150", "600"); Add("400", "500"); disjoint_sorted_files_ = false; ASSERT_TRUE(! Overlaps("100", "149")); ASSERT_TRUE(! Overlaps("601", "700")); ASSERT_TRUE(Overlaps("100", "150")); ASSERT_TRUE(Overlaps("100", "200")); ASSERT_TRUE(Overlaps("100", "300")); ASSERT_TRUE(Overlaps("100", "400")); ASSERT_TRUE(Overlaps("100", "500")); ASSERT_TRUE(Overlaps("375", "400")); ASSERT_TRUE(Overlaps("450", "450")); ASSERT_TRUE(Overlaps("450", "500")); ASSERT_TRUE(Overlaps("450", "700")); ASSERT_TRUE(Overlaps("600", "700")); } class VersionSetTestBase { public: const static std::string kColumnFamilyName1; const static std::string kColumnFamilyName2; const static std::string kColumnFamilyName3; int num_initial_edits_; explicit VersionSetTestBase(const std::string& name) : mem_env_(nullptr), env_(nullptr), env_guard_(), fs_(), dbname_(test::PerThreadDBPath(name)), options_(), db_options_(options_), cf_options_(options_), immutable_cf_options_(db_options_, cf_options_), mutable_cf_options_(cf_options_), table_cache_(NewLRUCache(50000, 16)), write_buffer_manager_(db_options_.db_write_buffer_size), shutting_down_(false), mock_table_factory_(std::make_shared()) { const char* test_env_uri = getenv("TEST_ENV_URI"); Env* base_env = nullptr; if (test_env_uri) { Status s = Env::LoadEnv(test_env_uri, &base_env, &env_guard_); EXPECT_OK(s); EXPECT_NE(Env::Default(), base_env); } else { base_env = Env::Default(); } EXPECT_NE(nullptr, base_env); if (getenv("MEM_ENV")) { mem_env_ = new MockEnv(base_env); } env_ = mem_env_ ? mem_env_ : base_env; fs_ = std::make_shared(env_); EXPECT_OK(env_->CreateDirIfMissing(dbname_)); db_options_.env = env_; db_options_.fs = fs_; versions_.reset(new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr)); reactive_versions_ = std::make_shared( dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_); db_options_.db_paths.emplace_back(dbname_, std::numeric_limits::max()); } virtual ~VersionSetTestBase() { if (getenv("KEEP_DB")) { fprintf(stdout, "DB is still at %s\n", dbname_.c_str()); } else { Options options; options.env = env_; EXPECT_OK(DestroyDB(dbname_, options)); } if (mem_env_) { delete mem_env_; mem_env_ = nullptr; } } protected: virtual void PrepareManifest( std::vector* column_families, SequenceNumber* last_seqno, std::unique_ptr* log_writer) { assert(column_families != nullptr); assert(last_seqno != nullptr); assert(log_writer != nullptr); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { std::unique_ptr impl(new DBImpl(DBOptions(), dbname_)); std::string db_id; impl->GetDbIdentityFromIdentityFile(&db_id); new_db.SetDBId(db_id); } new_db.SetLogNumber(0); new_db.SetNextFile(2); new_db.SetLastSequence(0); const std::vector cf_names = { kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3}; const int kInitialNumOfCfs = static_cast(cf_names.size()); autovector new_cfs; uint64_t last_seq = 1; uint32_t cf_id = 1; for (int i = 1; i != kInitialNumOfCfs; ++i) { VersionEdit new_cf; new_cf.AddColumnFamily(cf_names[i]); new_cf.SetColumnFamily(cf_id++); new_cf.SetLogNumber(0); new_cf.SetNextFile(2); new_cf.SetLastSequence(last_seq++); new_cfs.emplace_back(new_cf); } *last_seqno = last_seq; num_initial_edits_ = static_cast(new_cfs.size() + 1); const std::string manifest = DescriptorFileName(dbname_, 1); std::unique_ptr file; Status s = env_->NewWritableFile( manifest, &file, env_->OptimizeForManifestWrite(env_options_)); ASSERT_OK(s); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), manifest, env_options_)); { log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); std::string record; new_db.EncodeTo(&record); s = (*log_writer)->AddRecord(record); for (const auto& e : new_cfs) { record.clear(); e.EncodeTo(&record); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); } } ASSERT_OK(s); cf_options_.table_factory = mock_table_factory_; for (const auto& cf_name : cf_names) { column_families->emplace_back(cf_name, cf_options_); } } // Create DB with 3 column families. void NewDB() { std::vector column_families; SequenceNumber last_seqno; std::unique_ptr log_writer; SetIdentityFile(env_, dbname_); PrepareManifest(&column_families, &last_seqno, &log_writer); log_writer.reset(); // Make "CURRENT" file point to the new manifest file. Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); EXPECT_OK(versions_->Recover(column_families, false)); EXPECT_EQ(column_families.size(), versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); } void VerifyManifest(std::string* manifest_path) const { assert(manifest_path != nullptr); uint64_t manifest_file_number = 0; Status s = versions_->GetCurrentManifestPath( dbname_, fs_.get(), manifest_path, &manifest_file_number); ASSERT_OK(s); ASSERT_EQ(1, manifest_file_number); } MockEnv* mem_env_; Env* env_; std::shared_ptr env_guard_; std::shared_ptr fs_; const std::string dbname_; EnvOptions env_options_; Options options_; ImmutableDBOptions db_options_; ColumnFamilyOptions cf_options_; ImmutableCFOptions immutable_cf_options_; MutableCFOptions mutable_cf_options_; std::shared_ptr table_cache_; WriteController write_controller_; WriteBufferManager write_buffer_manager_; std::shared_ptr versions_; std::shared_ptr reactive_versions_; InstrumentedMutex mutex_; std::atomic shutting_down_; std::shared_ptr mock_table_factory_; }; const std::string VersionSetTestBase::kColumnFamilyName1 = "alice"; const std::string VersionSetTestBase::kColumnFamilyName2 = "bob"; const std::string VersionSetTestBase::kColumnFamilyName3 = "charles"; class VersionSetTest : public VersionSetTestBase, public testing::Test { public: VersionSetTest() : VersionSetTestBase("version_set_test") {} }; TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) { NewDB(); const int kGroupSize = 5; autovector edits; for (int i = 0; i != kGroupSize; ++i) { edits.emplace_back(VersionEdit()); } autovector cfds; autovector all_mutable_cf_options; autovector> edit_lists; for (int i = 0; i != kGroupSize; ++i) { cfds.emplace_back(versions_->GetColumnFamilySet()->GetDefault()); all_mutable_cf_options.emplace_back(&mutable_cf_options_); autovector edit_list; edit_list.emplace_back(&edits[i]); edit_lists.emplace_back(edit_list); } SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); int count = 0; SyncPoint::GetInstance()->SetCallBack( "VersionSet::ProcessManifestWrites:SameColumnFamily", [&](void* arg) { uint32_t* cf_id = reinterpret_cast(arg); EXPECT_EQ(0u, *cf_id); ++count; }); SyncPoint::GetInstance()->EnableProcessing(); mutex_.Lock(); Status s = versions_->LogAndApply(cfds, all_mutable_cf_options, edit_lists, &mutex_); mutex_.Unlock(); EXPECT_OK(s); EXPECT_EQ(kGroupSize - 1, count); } TEST_F(VersionSetTest, PersistBlobFileStateInNewManifest) { // Initialize the database and add a couple of blob files, one with some // garbage in it, and one without any garbage. NewDB(); VersionEdit edit; { constexpr uint64_t blob_file_number = 123; constexpr uint64_t total_blob_count = 456; constexpr uint64_t total_blob_bytes = 77777777; constexpr char checksum_method[] = "SHA1"; constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd"; constexpr uint64_t garbage_blob_count = 89; constexpr uint64_t garbage_blob_bytes = 1000000; edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); edit.AddBlobFileGarbage(blob_file_number, garbage_blob_count, garbage_blob_bytes); } { constexpr uint64_t blob_file_number = 234; constexpr uint64_t total_blob_count = 555; constexpr uint64_t total_blob_bytes = 66666; constexpr char checksum_method[] = "CRC32"; constexpr char checksum_value[] = "3d87ff57"; edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); } assert(versions_); assert(versions_->GetColumnFamilySet()); mutex_.Lock(); Status s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, &edit, &mutex_); mutex_.Unlock(); ASSERT_OK(s); // Force the creation of a new manifest file and make sure metadata for // the blob files is re-persisted. size_t addition_encoded = 0; SyncPoint::GetInstance()->SetCallBack( "BlobFileAddition::EncodeTo::CustomFields", [&](void* /* arg */) { ++addition_encoded; }); size_t garbage_encoded = 0; SyncPoint::GetInstance()->SetCallBack( "BlobFileGarbage::EncodeTo::CustomFields", [&](void* /* arg */) { ++garbage_encoded; }); SyncPoint::GetInstance()->EnableProcessing(); VersionEdit dummy; mutex_.Lock(); constexpr FSDirectory* db_directory = nullptr; constexpr bool new_descriptor_log = true; s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, &dummy, &mutex_, db_directory, new_descriptor_log); mutex_.Unlock(); ASSERT_OK(s); ASSERT_EQ(addition_encoded, 2); ASSERT_EQ(garbage_encoded, 1); SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_F(VersionSetTest, AddLiveBlobFiles) { // Initialize the database and add a blob file. NewDB(); assert(versions_); assert(versions_->GetColumnFamilySet()); ColumnFamilyData* const cfd = versions_->GetColumnFamilySet()->GetDefault(); assert(cfd); VersionEdit first; constexpr uint64_t first_blob_file_number = 234; constexpr uint64_t first_total_blob_count = 555; constexpr uint64_t first_total_blob_bytes = 66666; constexpr char first_checksum_method[] = "CRC32"; constexpr char first_checksum_value[] = "3d87ff57"; first.AddBlobFile(first_blob_file_number, first_total_blob_count, first_total_blob_bytes, first_checksum_method, first_checksum_value); mutex_.Lock(); Status s = versions_->LogAndApply(cfd, mutable_cf_options_, &first, &mutex_); mutex_.Unlock(); ASSERT_OK(s); // Reference the version so it stays alive even after the following version // edit. Version* const version = cfd->current(); assert(version); version->Ref(); // Get live files directly from version. std::vector version_table_files; std::vector version_blob_files; version->AddLiveFiles(&version_table_files, &version_blob_files); ASSERT_EQ(version_blob_files.size(), 1); ASSERT_EQ(version_blob_files[0], first_blob_file_number); // Add another blob file. VersionEdit second; constexpr uint64_t second_blob_file_number = 456; constexpr uint64_t second_total_blob_count = 100; constexpr uint64_t second_total_blob_bytes = 2000000; constexpr char second_checksum_method[] = "CRC32B"; constexpr char second_checksum_value[] = "6dbdf23a"; second.AddBlobFile(second_blob_file_number, second_total_blob_count, second_total_blob_bytes, second_checksum_method, second_checksum_value); mutex_.Lock(); s = versions_->LogAndApply(cfd, mutable_cf_options_, &second, &mutex_); mutex_.Unlock(); ASSERT_OK(s); // Get all live files from version set. Note that the result contains // duplicates. std::vector all_table_files; std::vector all_blob_files; versions_->AddLiveFiles(&all_table_files, &all_blob_files); ASSERT_EQ(all_blob_files.size(), 3); ASSERT_EQ(all_blob_files[0], first_blob_file_number); ASSERT_EQ(all_blob_files[1], first_blob_file_number); ASSERT_EQ(all_blob_files[2], second_blob_file_number); // Clean up previous version. version->Unref(); } TEST_F(VersionSetTest, ObsoleteBlobFile) { // Initialize the database and add a blob file (with no garbage just yet). NewDB(); VersionEdit addition; constexpr uint64_t blob_file_number = 234; constexpr uint64_t total_blob_count = 555; constexpr uint64_t total_blob_bytes = 66666; constexpr char checksum_method[] = "CRC32"; constexpr char checksum_value[] = "3d87ff57"; addition.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); assert(versions_); assert(versions_->GetColumnFamilySet()); mutex_.Lock(); Status s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, &addition, &mutex_); mutex_.Unlock(); ASSERT_OK(s); // Mark the entire blob file garbage. VersionEdit garbage; garbage.AddBlobFileGarbage(blob_file_number, total_blob_count, total_blob_bytes); mutex_.Lock(); s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, &garbage, &mutex_); mutex_.Unlock(); ASSERT_OK(s); // Make sure blob files from the pending number range are not returned // as obsolete. { std::vector table_files; std::vector blob_files; std::vector manifest_files; constexpr uint64_t min_pending_output = blob_file_number; versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files, min_pending_output); ASSERT_TRUE(blob_files.empty()); } // Make sure the blob file is returned as obsolete if it's not in the pending // range. { std::vector table_files; std::vector blob_files; std::vector manifest_files; constexpr uint64_t min_pending_output = blob_file_number + 1; versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files, min_pending_output); ASSERT_EQ(blob_files.size(), 1); ASSERT_EQ(blob_files[0].GetBlobFileNumber(), blob_file_number); } // Make sure it's not returned a second time. { std::vector table_files; std::vector blob_files; std::vector manifest_files; constexpr uint64_t min_pending_output = blob_file_number + 1; versions_->GetObsoleteFiles(&table_files, &blob_files, &manifest_files, min_pending_output); ASSERT_TRUE(blob_files.empty()); } } class VersionSetAtomicGroupTest : public VersionSetTestBase, public testing::Test { public: VersionSetAtomicGroupTest() : VersionSetTestBase("version_set_atomic_group_test") {} void SetUp() override { PrepareManifest(&column_families_, &last_seqno_, &log_writer_); SetupTestSyncPoints(); } void SetupValidAtomicGroup(int atomic_group_size) { edits_.resize(atomic_group_size); int remaining = atomic_group_size; for (size_t i = 0; i != edits_.size(); ++i) { edits_[i].SetLogNumber(0); edits_[i].SetNextFile(2); edits_[i].MarkAtomicGroup(--remaining); edits_[i].SetLastSequence(last_seqno_++); } ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); } void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) { edits_.resize(atomic_group_size); int remaining = atomic_group_size; for (size_t i = 0; i != edits_.size(); ++i) { edits_[i].SetLogNumber(0); edits_[i].SetNextFile(2); edits_[i].MarkAtomicGroup(--remaining); edits_[i].SetLastSequence(last_seqno_++); } ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); } void SetupCorruptedAtomicGroup(int atomic_group_size) { edits_.resize(atomic_group_size); int remaining = atomic_group_size; for (size_t i = 0; i != edits_.size(); ++i) { edits_[i].SetLogNumber(0); edits_[i].SetNextFile(2); if (i != ((size_t)atomic_group_size / 2)) { edits_[i].MarkAtomicGroup(--remaining); } edits_[i].SetLastSequence(last_seqno_++); } ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); } void SetupIncorrectAtomicGroup(int atomic_group_size) { edits_.resize(atomic_group_size); int remaining = atomic_group_size; for (size_t i = 0; i != edits_.size(); ++i) { edits_[i].SetLogNumber(0); edits_[i].SetNextFile(2); if (i != 1) { edits_[i].MarkAtomicGroup(--remaining); } else { edits_[i].MarkAtomicGroup(remaining--); } edits_[i].SetLastSequence(last_seqno_++); } ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); } void SetupTestSyncPoints() { SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "AtomicGroupReadBuffer::AddEdit:FirstInAtomicGroup", [&](void* arg) { VersionEdit* e = reinterpret_cast(arg); EXPECT_EQ(edits_.front().DebugString(), e->DebugString()); // compare based on value first_in_atomic_group_ = true; }); SyncPoint::GetInstance()->SetCallBack( "AtomicGroupReadBuffer::AddEdit:LastInAtomicGroup", [&](void* arg) { VersionEdit* e = reinterpret_cast(arg); EXPECT_EQ(edits_.back().DebugString(), e->DebugString()); // compare based on value EXPECT_TRUE(first_in_atomic_group_); last_in_atomic_group_ = true; }); SyncPoint::GetInstance()->SetCallBack( "VersionSet::ReadAndRecover:RecoveredEdits", [&](void* arg) { num_recovered_edits_ = *reinterpret_cast(arg); }); SyncPoint::GetInstance()->SetCallBack( "ReactiveVersionSet::ReadAndApply:AppliedEdits", [&](void* arg) { num_applied_edits_ = *reinterpret_cast(arg); }); SyncPoint::GetInstance()->SetCallBack( "AtomicGroupReadBuffer::AddEdit:AtomicGroup", [&](void* /* arg */) { ++num_edits_in_atomic_group_; }); SyncPoint::GetInstance()->SetCallBack( "AtomicGroupReadBuffer::AddEdit:AtomicGroupMixedWithNormalEdits", [&](void* arg) { corrupted_edit_ = *reinterpret_cast(arg); }); SyncPoint::GetInstance()->SetCallBack( "AtomicGroupReadBuffer::AddEdit:IncorrectAtomicGroupSize", [&](void* arg) { edit_with_incorrect_group_size_ = *reinterpret_cast(arg); }); SyncPoint::GetInstance()->EnableProcessing(); } void AddNewEditsToLog(int num_edits) { for (int i = 0; i < num_edits; i++) { std::string record; edits_[i].EncodeTo(&record); ASSERT_OK(log_writer_->AddRecord(record)); } } void TearDown() override { SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); log_writer_.reset(); } protected: std::vector column_families_; SequenceNumber last_seqno_; std::vector edits_; bool first_in_atomic_group_ = false; bool last_in_atomic_group_ = false; int num_edits_in_atomic_group_ = 0; int num_recovered_edits_ = 0; int num_applied_edits_ = 0; VersionEdit corrupted_edit_; VersionEdit edit_with_incorrect_group_size_; std::unique_ptr log_writer_; }; TEST_F(VersionSetAtomicGroupTest, HandleValidAtomicGroupWithVersionSetRecover) { const int kAtomicGroupSize = 3; SetupValidAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize); EXPECT_OK(versions_->Recover(column_families_, false)); EXPECT_EQ(column_families_.size(), versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_TRUE(first_in_atomic_group_); EXPECT_TRUE(last_in_atomic_group_); EXPECT_EQ(num_initial_edits_ + kAtomicGroupSize, num_recovered_edits_); EXPECT_EQ(0, num_applied_edits_); } TEST_F(VersionSetAtomicGroupTest, HandleValidAtomicGroupWithReactiveVersionSetRecover) { const int kAtomicGroupSize = 3; SetupValidAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize); std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); EXPECT_EQ(column_families_.size(), reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_TRUE(first_in_atomic_group_); EXPECT_TRUE(last_in_atomic_group_); // The recover should clean up the replay buffer. EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0); EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0); EXPECT_EQ(num_initial_edits_ + kAtomicGroupSize, num_recovered_edits_); EXPECT_EQ(0, num_applied_edits_); } TEST_F(VersionSetAtomicGroupTest, HandleValidAtomicGroupWithReactiveVersionSetReadAndApply) { const int kAtomicGroupSize = 3; SetupValidAtomicGroup(kAtomicGroupSize); std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); AddNewEditsToLog(kAtomicGroupSize); InstrumentedMutex mu; std::unordered_set cfds_changed; mu.Lock(); EXPECT_OK( reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed)); mu.Unlock(); EXPECT_TRUE(first_in_atomic_group_); EXPECT_TRUE(last_in_atomic_group_); // The recover should clean up the replay buffer. EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0); EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0); EXPECT_EQ(num_initial_edits_, num_recovered_edits_); EXPECT_EQ(kAtomicGroupSize, num_applied_edits_); } TEST_F(VersionSetAtomicGroupTest, HandleIncompleteTrailingAtomicGroupWithVersionSetRecover) { const int kAtomicGroupSize = 4; const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1; SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kNumberOfPersistedVersionEdits); EXPECT_OK(versions_->Recover(column_families_, false)); EXPECT_EQ(column_families_.size(), versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_TRUE(first_in_atomic_group_); EXPECT_FALSE(last_in_atomic_group_); EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_); EXPECT_EQ(num_initial_edits_, num_recovered_edits_); EXPECT_EQ(0, num_applied_edits_); } TEST_F(VersionSetAtomicGroupTest, HandleIncompleteTrailingAtomicGroupWithReactiveVersionSetRecover) { const int kAtomicGroupSize = 4; const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1; SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kNumberOfPersistedVersionEdits); std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); EXPECT_EQ(column_families_.size(), reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_TRUE(first_in_atomic_group_); EXPECT_FALSE(last_in_atomic_group_); EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_); // Reactive version set should store the edits in the replay buffer. EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == kNumberOfPersistedVersionEdits); EXPECT_TRUE(reactive_versions_->replay_buffer().size() == kAtomicGroupSize); // Write the last record. The reactive version set should now apply all // edits. std::string last_record; edits_[kAtomicGroupSize - 1].EncodeTo(&last_record); EXPECT_OK(log_writer_->AddRecord(last_record)); InstrumentedMutex mu; std::unordered_set cfds_changed; mu.Lock(); EXPECT_OK( reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed)); mu.Unlock(); // Reactive version set should be empty now. EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == 0); EXPECT_TRUE(reactive_versions_->replay_buffer().size() == 0); EXPECT_EQ(num_initial_edits_, num_recovered_edits_); EXPECT_EQ(kAtomicGroupSize, num_applied_edits_); } TEST_F(VersionSetAtomicGroupTest, HandleIncompleteTrailingAtomicGroupWithReactiveVersionSetReadAndApply) { const int kAtomicGroupSize = 4; const int kNumberOfPersistedVersionEdits = kAtomicGroupSize - 1; SetupIncompleteTrailingAtomicGroup(kAtomicGroupSize); std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; // No edits in an atomic group. EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); EXPECT_EQ(column_families_.size(), reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); // Write a few edits in an atomic group. AddNewEditsToLog(kNumberOfPersistedVersionEdits); InstrumentedMutex mu; std::unordered_set cfds_changed; mu.Lock(); EXPECT_OK( reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed)); mu.Unlock(); EXPECT_TRUE(first_in_atomic_group_); EXPECT_FALSE(last_in_atomic_group_); EXPECT_EQ(kNumberOfPersistedVersionEdits, num_edits_in_atomic_group_); // Reactive version set should store the edits in the replay buffer. EXPECT_TRUE(reactive_versions_->TEST_read_edits_in_atomic_group() == kNumberOfPersistedVersionEdits); EXPECT_TRUE(reactive_versions_->replay_buffer().size() == kAtomicGroupSize); EXPECT_EQ(num_initial_edits_, num_recovered_edits_); EXPECT_EQ(0, num_applied_edits_); } TEST_F(VersionSetAtomicGroupTest, HandleCorruptedAtomicGroupWithVersionSetRecover) { const int kAtomicGroupSize = 4; SetupCorruptedAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize); EXPECT_NOK(versions_->Recover(column_families_, false)); EXPECT_EQ(column_families_.size(), versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(), corrupted_edit_.DebugString()); } TEST_F(VersionSetAtomicGroupTest, HandleCorruptedAtomicGroupWithReactiveVersionSetRecover) { const int kAtomicGroupSize = 4; SetupCorruptedAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize); std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; EXPECT_NOK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); EXPECT_EQ(column_families_.size(), reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(), corrupted_edit_.DebugString()); } TEST_F(VersionSetAtomicGroupTest, HandleCorruptedAtomicGroupWithReactiveVersionSetReadAndApply) { const int kAtomicGroupSize = 4; SetupCorruptedAtomicGroup(kAtomicGroupSize); InstrumentedMutex mu; std::unordered_set cfds_changed; std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); // Write the corrupted edits. AddNewEditsToLog(kAtomicGroupSize); mu.Lock(); EXPECT_NOK( reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed)); mu.Unlock(); EXPECT_EQ(edits_[kAtomicGroupSize / 2].DebugString(), corrupted_edit_.DebugString()); } TEST_F(VersionSetAtomicGroupTest, HandleIncorrectAtomicGroupSizeWithVersionSetRecover) { const int kAtomicGroupSize = 4; SetupIncorrectAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize); EXPECT_NOK(versions_->Recover(column_families_, false)); EXPECT_EQ(column_families_.size(), versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_EQ(edits_[1].DebugString(), edit_with_incorrect_group_size_.DebugString()); } TEST_F(VersionSetAtomicGroupTest, HandleIncorrectAtomicGroupSizeWithReactiveVersionSetRecover) { const int kAtomicGroupSize = 4; SetupIncorrectAtomicGroup(kAtomicGroupSize); AddNewEditsToLog(kAtomicGroupSize); std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; EXPECT_NOK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); EXPECT_EQ(column_families_.size(), reactive_versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); EXPECT_EQ(edits_[1].DebugString(), edit_with_incorrect_group_size_.DebugString()); } TEST_F(VersionSetAtomicGroupTest, HandleIncorrectAtomicGroupSizeWithReactiveVersionSetReadAndApply) { const int kAtomicGroupSize = 4; SetupIncorrectAtomicGroup(kAtomicGroupSize); InstrumentedMutex mu; std::unordered_set cfds_changed; std::unique_ptr manifest_reader; std::unique_ptr manifest_reporter; std::unique_ptr manifest_reader_status; EXPECT_OK(reactive_versions_->Recover(column_families_, &manifest_reader, &manifest_reporter, &manifest_reader_status)); AddNewEditsToLog(kAtomicGroupSize); mu.Lock(); EXPECT_NOK( reactive_versions_->ReadAndApply(&mu, &manifest_reader, &cfds_changed)); mu.Unlock(); EXPECT_EQ(edits_[1].DebugString(), edit_with_incorrect_group_size_.DebugString()); } class VersionSetTestDropOneCF : public VersionSetTestBase, public testing::TestWithParam { public: VersionSetTestDropOneCF() : VersionSetTestBase("version_set_test_drop_one_cf") {} }; // This test simulates the following execution sequence // Time thread1 bg_flush_thr // | Prepare version edits (e1,e2,e3) for atomic // | flush cf1, cf2, cf3 // | Enqueue e to drop cfi // | to manifest_writers_ // | Enqueue (e1,e2,e3) to manifest_writers_ // | // | Apply e, // | cfi.IsDropped() is true // | Apply (e1,e2,e3), // | since cfi.IsDropped() == true, we need to // | drop ei and write the rest to MANIFEST. // V // // Repeat the test for i = 1, 2, 3 to simulate dropping the first, middle and // last column family in an atomic group. TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { std::vector column_families; SequenceNumber last_seqno; std::unique_ptr log_writer; PrepareManifest(&column_families, &last_seqno, &log_writer); Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); EXPECT_OK(versions_->Recover(column_families, false /* read_only */)); EXPECT_EQ(column_families.size(), versions_->GetColumnFamilySet()->NumberOfColumnFamilies()); const int kAtomicGroupSize = 3; const std::vector non_default_cf_names = { kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3}; // Drop one column family VersionEdit drop_cf_edit; drop_cf_edit.DropColumnFamily(); const std::string cf_to_drop_name(GetParam()); auto cfd_to_drop = versions_->GetColumnFamilySet()->GetColumnFamily(cf_to_drop_name); ASSERT_NE(nullptr, cfd_to_drop); // Increase its refcount because cfd_to_drop is used later, and we need to // prevent it from being deleted. cfd_to_drop->Ref(); drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID()); mutex_.Lock(); s = versions_->LogAndApply(cfd_to_drop, *cfd_to_drop->GetLatestMutableCFOptions(), &drop_cf_edit, &mutex_); mutex_.Unlock(); ASSERT_OK(s); std::vector edits(kAtomicGroupSize); uint32_t remaining = kAtomicGroupSize; size_t i = 0; autovector cfds; autovector mutable_cf_options_list; autovector> edit_lists; for (const auto& cf_name : non_default_cf_names) { auto cfd = (cf_name != cf_to_drop_name) ? versions_->GetColumnFamilySet()->GetColumnFamily(cf_name) : cfd_to_drop; ASSERT_NE(nullptr, cfd); cfds.push_back(cfd); mutable_cf_options_list.emplace_back(cfd->GetLatestMutableCFOptions()); edits[i].SetColumnFamily(cfd->GetID()); edits[i].SetLogNumber(0); edits[i].SetNextFile(2); edits[i].MarkAtomicGroup(--remaining); edits[i].SetLastSequence(last_seqno++); autovector tmp_edits; tmp_edits.push_back(&edits[i]); edit_lists.emplace_back(tmp_edits); ++i; } int called = 0; SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( "VersionSet::ProcessManifestWrites:CheckOneAtomicGroup", [&](void* arg) { std::vector* tmp_edits = reinterpret_cast*>(arg); EXPECT_EQ(kAtomicGroupSize - 1, tmp_edits->size()); for (const auto e : *tmp_edits) { bool found = false; for (const auto& e2 : edits) { if (&e2 == e) { found = true; break; } } ASSERT_TRUE(found); } ++called; }); SyncPoint::GetInstance()->EnableProcessing(); mutex_.Lock(); s = versions_->LogAndApply(cfds, mutable_cf_options_list, edit_lists, &mutex_); mutex_.Unlock(); ASSERT_OK(s); ASSERT_EQ(1, called); if (cfd_to_drop->Unref()) { delete cfd_to_drop; cfd_to_drop = nullptr; } } INSTANTIATE_TEST_CASE_P( AtomicGroup, VersionSetTestDropOneCF, testing::Values(VersionSetTestBase::kColumnFamilyName1, VersionSetTestBase::kColumnFamilyName2, VersionSetTestBase::kColumnFamilyName3)); class EmptyDefaultCfNewManifest : public VersionSetTestBase, public testing::Test { public: EmptyDefaultCfNewManifest() : VersionSetTestBase("version_set_new_db_test") {} // Emulate DBImpl::NewDB() void PrepareManifest(std::vector* /*column_families*/, SequenceNumber* /*last_seqno*/, std::unique_ptr* log_writer) override { assert(log_writer != nullptr); VersionEdit new_db; new_db.SetLogNumber(0); std::unique_ptr file; const std::string manifest_path = DescriptorFileName(dbname_, 1); Status s = env_->NewWritableFile( manifest_path, &file, env_->OptimizeForManifestWrite(env_options_)); ASSERT_OK(s); std::unique_ptr file_writer( new WritableFileWriter(NewLegacyWritableFileWrapper(std::move(file)), manifest_path, env_options_)); log_writer->reset(new log::Writer(std::move(file_writer), 0, true)); std::string record; ASSERT_TRUE(new_db.EncodeTo(&record)); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); // Create new column family VersionEdit new_cf; new_cf.AddColumnFamily(VersionSetTestBase::kColumnFamilyName1); new_cf.SetColumnFamily(1); new_cf.SetLastSequence(2); new_cf.SetNextFile(2); record.clear(); ASSERT_TRUE(new_cf.EncodeTo(&record)); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); } protected: bool write_dbid_to_manifest_ = false; std::unique_ptr log_writer_; }; // Create db, create column family. Cf creation will switch to a new MANIFEST. // Then reopen db, trying to recover. TEST_F(EmptyDefaultCfNewManifest, Recover) { PrepareManifest(nullptr, nullptr, &log_writer_); log_writer_.reset(); Status s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); std::vector column_families; column_families.emplace_back(kDefaultColumnFamilyName, cf_options_); column_families.emplace_back(VersionSetTestBase::kColumnFamilyName1, cf_options_); std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest( manifest_path, column_families, false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_FALSE(has_missing_table_file); } class VersionSetTestEmptyDb : public VersionSetTestBase, public testing::TestWithParam< std::tuple>> { public: static const std::string kUnknownColumnFamilyName; VersionSetTestEmptyDb() : VersionSetTestBase("version_set_test_empty_db") {} protected: void PrepareManifest(std::vector* /*column_families*/, SequenceNumber* /*last_seqno*/, std::unique_ptr* log_writer) override { assert(nullptr != log_writer); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { std::unique_ptr impl(new DBImpl(DBOptions(), dbname_)); std::string db_id; impl->GetDbIdentityFromIdentityFile(&db_id); new_db.SetDBId(db_id); } const std::string manifest_path = DescriptorFileName(dbname_, 1); std::unique_ptr file; Status s = env_->NewWritableFile( manifest_path, &file, env_->OptimizeForManifestWrite(env_options_)); ASSERT_OK(s); std::unique_ptr file_writer( new WritableFileWriter(NewLegacyWritableFileWrapper(std::move(file)), manifest_path, env_options_)); { log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); std::string record; new_db.EncodeTo(&record); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); } } std::unique_ptr log_writer_; }; const std::string VersionSetTestEmptyDb::kUnknownColumnFamilyName = "unknown"; TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) { db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); PrepareManifest(nullptr, nullptr, &log_writer_); log_writer_.reset(); Status s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); bool read_only = std::get<1>(GetParam()); const std::vector cf_names = std::get<2>(GetParam()); std::vector column_families; for (const auto& cf_name : cf_names) { column_families.emplace_back(cf_name, cf_options_); } std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, read_only, &db_id, &has_missing_table_file); auto iter = std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); if (iter == cf_names.end()) { ASSERT_TRUE(s.IsInvalidArgument()); } else { ASSERT_TRUE(s.IsCorruption()); } } TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) { db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); PrepareManifest(nullptr, nullptr, &log_writer_); // Only a subset of column families in the MANIFEST. VersionEdit new_cf1; new_cf1.AddColumnFamily(VersionSetTestBase::kColumnFamilyName1); new_cf1.SetColumnFamily(1); Status s; { std::string record; new_cf1.EncodeTo(&record); s = log_writer_->AddRecord(record); ASSERT_OK(s); } log_writer_.reset(); s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); bool read_only = std::get<1>(GetParam()); const std::vector& cf_names = std::get<2>(GetParam()); std::vector column_families; for (const auto& cf_name : cf_names) { column_families.emplace_back(cf_name, cf_options_); } std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, read_only, &db_id, &has_missing_table_file); auto iter = std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); if (iter == cf_names.end()) { ASSERT_TRUE(s.IsInvalidArgument()); } else { ASSERT_TRUE(s.IsCorruption()); } } TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) { db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); PrepareManifest(nullptr, nullptr, &log_writer_); // Write all column families but no log_number, next_file_number and // last_sequence. const std::vector all_cf_names = { kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3}; uint32_t cf_id = 1; Status s; for (size_t i = 1; i != all_cf_names.size(); ++i) { VersionEdit new_cf; new_cf.AddColumnFamily(all_cf_names[i]); new_cf.SetColumnFamily(cf_id++); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); s = log_writer_->AddRecord(record); ASSERT_OK(s); } log_writer_.reset(); s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); bool read_only = std::get<1>(GetParam()); const std::vector& cf_names = std::get<2>(GetParam()); std::vector column_families; for (const auto& cf_name : cf_names) { column_families.emplace_back(cf_name, cf_options_); } std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, read_only, &db_id, &has_missing_table_file); auto iter = std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); if (iter == cf_names.end()) { ASSERT_TRUE(s.IsInvalidArgument()); } else { ASSERT_TRUE(s.IsCorruption()); } } TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) { db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); PrepareManifest(nullptr, nullptr, &log_writer_); // Write all column families but no log_number, next_file_number and // last_sequence. const std::vector all_cf_names = { kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3}; uint32_t cf_id = 1; Status s; for (size_t i = 1; i != all_cf_names.size(); ++i) { VersionEdit new_cf; new_cf.AddColumnFamily(all_cf_names[i]); new_cf.SetColumnFamily(cf_id++); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); s = log_writer_->AddRecord(record); ASSERT_OK(s); } { VersionEdit tmp_edit; tmp_edit.SetColumnFamily(4); tmp_edit.SetLogNumber(0); tmp_edit.SetNextFile(2); tmp_edit.SetLastSequence(0); std::string record; ASSERT_TRUE(tmp_edit.EncodeTo(&record)); s = log_writer_->AddRecord(record); ASSERT_OK(s); } log_writer_.reset(); s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); bool read_only = std::get<1>(GetParam()); const std::vector& cf_names = std::get<2>(GetParam()); std::vector column_families; for (const auto& cf_name : cf_names) { column_families.emplace_back(cf_name, cf_options_); } std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, read_only, &db_id, &has_missing_table_file); auto iter = std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); if (iter == cf_names.end()) { ASSERT_TRUE(s.IsInvalidArgument()); } else { ASSERT_TRUE(s.IsCorruption()); } } TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) { db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); PrepareManifest(nullptr, nullptr, &log_writer_); // Write all column families but no log_number, next_file_number and // last_sequence. const std::vector all_cf_names = { kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3}; uint32_t cf_id = 1; Status s; for (size_t i = 1; i != all_cf_names.size(); ++i) { VersionEdit new_cf; new_cf.AddColumnFamily(all_cf_names[i]); new_cf.SetColumnFamily(cf_id++); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); s = log_writer_->AddRecord(record); ASSERT_OK(s); } { VersionEdit tmp_edit; tmp_edit.SetLogNumber(0); tmp_edit.SetNextFile(2); tmp_edit.SetLastSequence(0); std::string record; ASSERT_TRUE(tmp_edit.EncodeTo(&record)); s = log_writer_->AddRecord(record); ASSERT_OK(s); } log_writer_.reset(); s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); bool read_only = std::get<1>(GetParam()); const std::vector& cf_names = std::get<2>(GetParam()); std::vector column_families; for (const auto& cf_name : cf_names) { column_families.emplace_back(cf_name, cf_options_); } std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families, read_only, &db_id, &has_missing_table_file); auto iter = std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName); if (iter == cf_names.end()) { ASSERT_TRUE(s.IsInvalidArgument()); } else if (read_only) { ASSERT_OK(s); ASSERT_FALSE(has_missing_table_file); } else if (cf_names.size() == all_cf_names.size()) { ASSERT_OK(s); ASSERT_FALSE(has_missing_table_file); } else if (cf_names.size() < all_cf_names.size()) { ASSERT_TRUE(s.IsInvalidArgument()); } else { ASSERT_OK(s); ASSERT_FALSE(has_missing_table_file); ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetColumnFamily( kUnknownColumnFamilyName); ASSERT_EQ(nullptr, cfd); } } INSTANTIATE_TEST_CASE_P( BestEffortRecovery, VersionSetTestEmptyDb, testing::Combine( /*write_dbid_to_manifest=*/testing::Bool(), /*read_only=*/testing::Bool(), /*cf_names=*/ testing::Values( std::vector(), std::vector({kDefaultColumnFamilyName}), std::vector({VersionSetTestBase::kColumnFamilyName1, VersionSetTestBase::kColumnFamilyName2, VersionSetTestBase::kColumnFamilyName3}), std::vector({kDefaultColumnFamilyName, VersionSetTestBase::kColumnFamilyName1}), std::vector({kDefaultColumnFamilyName, VersionSetTestBase::kColumnFamilyName1, VersionSetTestBase::kColumnFamilyName2, VersionSetTestBase::kColumnFamilyName3}), std::vector( {kDefaultColumnFamilyName, VersionSetTestBase::kColumnFamilyName1, VersionSetTestBase::kColumnFamilyName2, VersionSetTestBase::kColumnFamilyName3, VersionSetTestEmptyDb::kUnknownColumnFamilyName})))); class VersionSetTestMissingFiles : public VersionSetTestBase, public testing::Test { public: VersionSetTestMissingFiles() : VersionSetTestBase("version_set_test_missing_files"), block_based_table_options_(), table_factory_(std::make_shared( block_based_table_options_)), internal_comparator_( std::make_shared(options_.comparator)) {} protected: void PrepareManifest(std::vector* column_families, SequenceNumber* last_seqno, std::unique_ptr* log_writer) override { assert(column_families != nullptr); assert(last_seqno != nullptr); assert(log_writer != nullptr); const std::string manifest = DescriptorFileName(dbname_, 1); std::unique_ptr file; Status s = env_->NewWritableFile( manifest, &file, env_->OptimizeForManifestWrite(env_options_)); ASSERT_OK(s); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), manifest, env_options_)); log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { std::unique_ptr impl(new DBImpl(DBOptions(), dbname_)); std::string db_id; impl->GetDbIdentityFromIdentityFile(&db_id); new_db.SetDBId(db_id); } { std::string record; ASSERT_TRUE(new_db.EncodeTo(&record)); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); } const std::vector cf_names = { kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3}; uint32_t cf_id = 1; // default cf id is 0 cf_options_.table_factory = table_factory_; for (const auto& cf_name : cf_names) { column_families->emplace_back(cf_name, cf_options_); if (cf_name == kDefaultColumnFamilyName) { continue; } VersionEdit new_cf; new_cf.AddColumnFamily(cf_name); new_cf.SetColumnFamily(cf_id); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); VersionEdit cf_files; cf_files.SetColumnFamily(cf_id); cf_files.SetLogNumber(0); record.clear(); ASSERT_TRUE(cf_files.EncodeTo(&record)); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); ++cf_id; } SequenceNumber seq = 2; { VersionEdit edit; edit.SetNextFile(7); edit.SetLastSequence(seq); std::string record; ASSERT_TRUE(edit.EncodeTo(&record)); s = (*log_writer)->AddRecord(record); ASSERT_OK(s); } *last_seqno = seq + 1; } struct SstInfo { uint64_t file_number; std::string column_family; std::string key; // the only key int level = 0; SstInfo(uint64_t file_num, const std::string& cf_name, const std::string& _key) : SstInfo(file_num, cf_name, _key, 0) {} SstInfo(uint64_t file_num, const std::string& cf_name, const std::string& _key, int lvl) : file_number(file_num), column_family(cf_name), key(_key), level(lvl) {} }; // Create dummy sst, return their metadata. Note that only file name and size // are used. void CreateDummyTableFiles(const std::vector& file_infos, std::vector* file_metas) { assert(file_metas != nullptr); for (const auto& info : file_infos) { uint64_t file_num = info.file_number; std::string fname = MakeTableFileName(dbname_, file_num); std::unique_ptr file; Status s = fs_->NewWritableFile(fname, FileOptions(), &file, nullptr); ASSERT_OK(s); std::unique_ptr fwriter( new WritableFileWriter(std::move(file), fname, FileOptions(), env_)); std::vector> int_tbl_prop_collector_factories; std::unique_ptr builder(table_factory_->NewTableBuilder( TableBuilderOptions( immutable_cf_options_, mutable_cf_options_, *internal_comparator_, &int_tbl_prop_collector_factories, kNoCompression, /*_sample_for_compression=*/0, CompressionOptions(), /*_skip_filters=*/false, info.column_family, info.level), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, fwriter.get())); InternalKey ikey(info.key, 0, ValueType::kTypeValue); builder->Add(ikey.Encode(), "value"); ASSERT_OK(builder->Finish()); fwriter->Flush(); uint64_t file_size = 0; s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr); ASSERT_OK(s); ASSERT_NE(0, file_size); FileMetaData meta; meta = FileMetaData(file_num, /*file_path_id=*/0, file_size, ikey, ikey, 0, 0, false, 0, 0, 0, kUnknownFileChecksum, kUnknownFileChecksumFuncName); file_metas->emplace_back(meta); } } // This method updates last_sequence_. void WriteFileAdditionAndDeletionToManifest( uint32_t cf, const std::vector>& added_files, const std::vector>& deleted_files) { VersionEdit edit; edit.SetColumnFamily(cf); for (const auto& elem : added_files) { int level = elem.first; edit.AddFile(level, elem.second); } for (const auto& elem : deleted_files) { int level = elem.first; edit.DeleteFile(level, elem.second); } edit.SetLastSequence(last_seqno_); ++last_seqno_; assert(log_writer_.get() != nullptr); std::string record; ASSERT_TRUE(edit.EncodeTo(&record)); Status s = log_writer_->AddRecord(record); ASSERT_OK(s); } BlockBasedTableOptions block_based_table_options_; std::shared_ptr table_factory_; std::shared_ptr internal_comparator_; std::vector column_families_; SequenceNumber last_seqno_; std::unique_ptr log_writer_; }; TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) { std::vector existing_files = { SstInfo(100, kDefaultColumnFamilyName, "a"), SstInfo(102, kDefaultColumnFamilyName, "b"), SstInfo(103, kDefaultColumnFamilyName, "c"), SstInfo(107, kDefaultColumnFamilyName, "d"), SstInfo(110, kDefaultColumnFamilyName, "e")}; std::vector file_metas; CreateDummyTableFiles(existing_files, &file_metas); PrepareManifest(&column_families_, &last_seqno_, &log_writer_); std::vector> added_files; for (uint64_t file_num = 10; file_num < 15; ++file_num) { std::string smallest_ukey = "a"; std::string largest_ukey = "b"; InternalKey smallest_ikey(smallest_ukey, 1, ValueType::kTypeValue); InternalKey largest_ikey(largest_ukey, 1, ValueType::kTypeValue); FileMetaData meta = FileMetaData(file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, 0, 0, 0, kUnknownFileChecksum, kUnknownFileChecksumFuncName); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest( /*cf=*/0, added_files, std::vector>()); std::vector> deleted_files; deleted_files.emplace_back(0, 10); WriteFileAdditionAndDeletionToManifest( /*cf=*/0, std::vector>(), deleted_files); log_writer_.reset(); Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, /*read_only=*/false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_TRUE(has_missing_table_file); for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { VersionStorageInfo* vstorage = cfd->current()->storage_info(); const std::vector& files = vstorage->LevelFiles(0); ASSERT_TRUE(files.empty()); } } TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) { std::vector existing_files = { SstInfo(100, kDefaultColumnFamilyName, "a"), SstInfo(102, kDefaultColumnFamilyName, "b"), SstInfo(103, kDefaultColumnFamilyName, "c"), SstInfo(107, kDefaultColumnFamilyName, "d"), SstInfo(110, kDefaultColumnFamilyName, "e")}; std::vector file_metas; CreateDummyTableFiles(existing_files, &file_metas); PrepareManifest(&column_families_, &last_seqno_, &log_writer_); std::vector> added_files; for (size_t i = 3; i != 5; ++i) { added_files.emplace_back(0, file_metas[i]); } WriteFileAdditionAndDeletionToManifest( /*cf=*/0, added_files, std::vector>()); added_files.clear(); for (uint64_t file_num = 120; file_num < 130; ++file_num) { std::string smallest_ukey = "a"; std::string largest_ukey = "b"; InternalKey smallest_ikey(smallest_ukey, 1, ValueType::kTypeValue); InternalKey largest_ikey(largest_ukey, 1, ValueType::kTypeValue); FileMetaData meta = FileMetaData(file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, 0, 0, 0, kUnknownFileChecksum, kUnknownFileChecksumFuncName); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest( /*cf=*/0, added_files, std::vector>()); log_writer_.reset(); Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, /*read_only=*/false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_TRUE(has_missing_table_file); for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { VersionStorageInfo* vstorage = cfd->current()->storage_info(); const std::vector& files = vstorage->LevelFiles(0); if (cfd->GetName() == kDefaultColumnFamilyName) { ASSERT_EQ(2, files.size()); for (const auto* fmeta : files) { if (fmeta->fd.GetNumber() != 107 && fmeta->fd.GetNumber() != 110) { ASSERT_FALSE(true); } } } else { ASSERT_TRUE(files.empty()); } } } TEST_F(VersionSetTestMissingFiles, NoFileMissing) { std::vector existing_files = { SstInfo(100, kDefaultColumnFamilyName, "a"), SstInfo(102, kDefaultColumnFamilyName, "b"), SstInfo(103, kDefaultColumnFamilyName, "c"), SstInfo(107, kDefaultColumnFamilyName, "d"), SstInfo(110, kDefaultColumnFamilyName, "e")}; std::vector file_metas; CreateDummyTableFiles(existing_files, &file_metas); PrepareManifest(&column_families_, &last_seqno_, &log_writer_); std::vector> added_files; for (const auto& meta : file_metas) { added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest( /*cf=*/0, added_files, std::vector>()); std::vector> deleted_files; deleted_files.emplace_back(/*level=*/0, 100); WriteFileAdditionAndDeletionToManifest( /*cf=*/0, std::vector>(), deleted_files); log_writer_.reset(); Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); std::string db_id; bool has_missing_table_file = false; s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_, /*read_only=*/false, &db_id, &has_missing_table_file); ASSERT_OK(s); ASSERT_FALSE(has_missing_table_file); for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) { VersionStorageInfo* vstorage = cfd->current()->storage_info(); const std::vector& files = vstorage->LevelFiles(0); if (cfd->GetName() == kDefaultColumnFamilyName) { ASSERT_EQ(existing_files.size() - deleted_files.size(), files.size()); bool has_deleted_file = false; for (const auto* fmeta : files) { if (fmeta->fd.GetNumber() == 100) { has_deleted_file = true; break; } } ASSERT_FALSE(has_deleted_file); } else { ASSERT_TRUE(files.empty()); } } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/wal_manager.cc000066400000000000000000000421661370372246700164270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/wal_manager.h" #include #include #include #include #include "db/log_reader.h" #include "db/log_writer.h" #include "db/transaction_log_impl.h" #include "db/write_batch_internal.h" #include "file/file_util.h" #include "file/filename.h" #include "file/sequence_file_reader.h" #include "logging/logging.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/write_batch.h" #include "test_util/sync_point.h" #include "util/cast_util.h" #include "util/coding.h" #include "util/mutexlock.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE Status WalManager::DeleteFile(const std::string& fname, uint64_t number) { auto s = env_->DeleteFile(db_options_.wal_dir + "/" + fname); if (s.ok()) { MutexLock l(&read_first_record_cache_mutex_); read_first_record_cache_.erase(number); } return s; } Status WalManager::GetSortedWalFiles(VectorLogPtr& files) { // First get sorted files in db dir, then get sorted files from archived // dir, to avoid a race condition where a log file is moved to archived // dir in between. Status s; // list wal files in main db dir. VectorLogPtr logs; s = GetSortedWalsOfType(db_options_.wal_dir, logs, kAliveLogFile); if (!s.ok()) { return s; } // Reproduce the race condition where a log file is moved // to archived dir, between these two sync points, used in // (DBTest,TransactionLogIteratorRace) TEST_SYNC_POINT("WalManager::GetSortedWalFiles:1"); TEST_SYNC_POINT("WalManager::GetSortedWalFiles:2"); files.clear(); // list wal files in archive dir. std::string archivedir = ArchivalDirectory(db_options_.wal_dir); Status exists = env_->FileExists(archivedir); if (exists.ok()) { s = GetSortedWalsOfType(archivedir, files, kArchivedLogFile); if (!s.ok()) { return s; } } else if (!exists.IsNotFound()) { assert(s.IsIOError()); return s; } uint64_t latest_archived_log_number = 0; if (!files.empty()) { latest_archived_log_number = files.back()->LogNumber(); ROCKS_LOG_INFO(db_options_.info_log, "Latest Archived log: %" PRIu64, latest_archived_log_number); } files.reserve(files.size() + logs.size()); for (auto& log : logs) { if (log->LogNumber() > latest_archived_log_number) { files.push_back(std::move(log)); } else { // When the race condition happens, we could see the // same log in both db dir and archived dir. Simply // ignore the one in db dir. Note that, if we read // archived dir first, we would have missed the log file. ROCKS_LOG_WARN(db_options_.info_log, "%s already moved to archive", log->PathName().c_str()); } } return s; } Status WalManager::GetUpdatesSince( SequenceNumber seq, std::unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options, VersionSet* version_set) { // Get all sorted Wal Files. // Do binary search and open files and find the seq number. std::unique_ptr wal_files(new VectorLogPtr); Status s = GetSortedWalFiles(*wal_files); if (!s.ok()) { return s; } s = RetainProbableWalFiles(*wal_files, seq); if (!s.ok()) { return s; } iter->reset(new TransactionLogIteratorImpl( db_options_.wal_dir, &db_options_, read_options, file_options_, seq, std::move(wal_files), version_set, seq_per_batch_)); return (*iter)->status(); } // 1. Go through all archived files and // a. if ttl is enabled, delete outdated files // b. if archive size limit is enabled, delete empty files, // compute file number and size. // 2. If size limit is enabled: // a. compute how many files should be deleted // b. get sorted non-empty archived logs // c. delete what should be deleted void WalManager::PurgeObsoleteWALFiles() { bool const ttl_enabled = db_options_.wal_ttl_seconds > 0; bool const size_limit_enabled = db_options_.wal_size_limit_mb > 0; if (!ttl_enabled && !size_limit_enabled) { return; } int64_t current_time; Status s = env_->GetCurrentTime(¤t_time); if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, "Can't get current time: %s", s.ToString().c_str()); assert(false); return; } uint64_t const now_seconds = static_cast(current_time); uint64_t const time_to_check = (ttl_enabled && !size_limit_enabled) ? db_options_.wal_ttl_seconds / 2 : kDefaultIntervalToDeleteObsoleteWAL; if (purge_wal_files_last_run_ + time_to_check > now_seconds) { return; } purge_wal_files_last_run_ = now_seconds; std::string archival_dir = ArchivalDirectory(db_options_.wal_dir); std::vector files; s = env_->GetChildren(archival_dir, &files); if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, "Can't get archive files: %s", s.ToString().c_str()); assert(false); return; } size_t log_files_num = 0; uint64_t log_file_size = 0; for (auto& f : files) { uint64_t number; FileType type; if (ParseFileName(f, &number, &type) && type == kLogFile) { std::string const file_path = archival_dir + "/" + f; if (ttl_enabled) { uint64_t file_m_time; s = env_->GetFileModificationTime(file_path, &file_m_time); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Can't get file mod time: %s: %s", file_path.c_str(), s.ToString().c_str()); continue; } if (now_seconds - file_m_time > db_options_.wal_ttl_seconds) { s = DeleteDBFile(&db_options_, file_path, archival_dir, false, /*force_fg=*/!wal_in_db_path_); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s", file_path.c_str(), s.ToString().c_str()); continue; } else { MutexLock l(&read_first_record_cache_mutex_); read_first_record_cache_.erase(number); } continue; } } if (size_limit_enabled) { uint64_t file_size; s = env_->GetFileSize(file_path, &file_size); if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, "Unable to get file size: %s: %s", file_path.c_str(), s.ToString().c_str()); return; } else { if (file_size > 0) { log_file_size = std::max(log_file_size, file_size); ++log_files_num; } else { s = DeleteDBFile(&db_options_, file_path, archival_dir, false, /*force_fg=*/!wal_in_db_path_); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s", file_path.c_str(), s.ToString().c_str()); continue; } else { MutexLock l(&read_first_record_cache_mutex_); read_first_record_cache_.erase(number); } } } } } } if (0 == log_files_num || !size_limit_enabled) { return; } size_t const files_keep_num = static_cast(db_options_.wal_size_limit_mb * 1024 * 1024 / log_file_size); if (log_files_num <= files_keep_num) { return; } size_t files_del_num = log_files_num - files_keep_num; VectorLogPtr archived_logs; GetSortedWalsOfType(archival_dir, archived_logs, kArchivedLogFile); if (files_del_num > archived_logs.size()) { ROCKS_LOG_WARN(db_options_.info_log, "Trying to delete more archived log files than " "exist. Deleting all"); files_del_num = archived_logs.size(); } for (size_t i = 0; i < files_del_num; ++i) { std::string const file_path = archived_logs[i]->PathName(); s = DeleteDBFile(&db_options_, db_options_.wal_dir + "/" + file_path, db_options_.wal_dir, false, /*force_fg=*/!wal_in_db_path_); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s", file_path.c_str(), s.ToString().c_str()); continue; } else { MutexLock l(&read_first_record_cache_mutex_); read_first_record_cache_.erase(archived_logs[i]->LogNumber()); } } } void WalManager::ArchiveWALFile(const std::string& fname, uint64_t number) { auto archived_log_name = ArchivedLogFileName(db_options_.wal_dir, number); // The sync point below is used in (DBTest,TransactionLogIteratorRace) TEST_SYNC_POINT("WalManager::PurgeObsoleteFiles:1"); Status s = env_->RenameFile(fname, archived_log_name); // The sync point below is used in (DBTest,TransactionLogIteratorRace) TEST_SYNC_POINT("WalManager::PurgeObsoleteFiles:2"); ROCKS_LOG_INFO(db_options_.info_log, "Move log file %s to %s -- %s\n", fname.c_str(), archived_log_name.c_str(), s.ToString().c_str()); } Status WalManager::GetSortedWalsOfType(const std::string& path, VectorLogPtr& log_files, WalFileType log_type) { std::vector all_files; const Status status = env_->GetChildren(path, &all_files); if (!status.ok()) { return status; } log_files.reserve(all_files.size()); for (const auto& f : all_files) { uint64_t number; FileType type; if (ParseFileName(f, &number, &type) && type == kLogFile) { SequenceNumber sequence; Status s = ReadFirstRecord(log_type, number, &sequence); if (!s.ok()) { return s; } if (sequence == 0) { // empty file continue; } // Reproduce the race condition where a log file is moved // to archived dir, between these two sync points, used in // (DBTest,TransactionLogIteratorRace) TEST_SYNC_POINT("WalManager::GetSortedWalsOfType:1"); TEST_SYNC_POINT("WalManager::GetSortedWalsOfType:2"); uint64_t size_bytes; s = env_->GetFileSize(LogFileName(path, number), &size_bytes); // re-try in case the alive log file has been moved to archive. if (!s.ok() && log_type == kAliveLogFile) { std::string archived_file = ArchivedLogFileName(path, number); if (env_->FileExists(archived_file).ok()) { s = env_->GetFileSize(archived_file, &size_bytes); if (!s.ok() && env_->FileExists(archived_file).IsNotFound()) { // oops, the file just got deleted from archived dir! move on s = Status::OK(); continue; } } } if (!s.ok()) { return s; } log_files.push_back(std::unique_ptr( new LogFileImpl(number, log_type, sequence, size_bytes))); } } std::sort( log_files.begin(), log_files.end(), [](const std::unique_ptr& a, const std::unique_ptr& b) { LogFileImpl* a_impl = static_cast_with_check(a.get()); LogFileImpl* b_impl = static_cast_with_check(b.get()); return *a_impl < *b_impl; }); return status; } Status WalManager::RetainProbableWalFiles(VectorLogPtr& all_logs, const SequenceNumber target) { int64_t start = 0; // signed to avoid overflow when target is < first file. int64_t end = static_cast(all_logs.size()) - 1; // Binary Search. avoid opening all files. while (end >= start) { int64_t mid = start + (end - start) / 2; // Avoid overflow. SequenceNumber current_seq_num = all_logs.at(static_cast(mid))->StartSequence(); if (current_seq_num == target) { end = mid; break; } else if (current_seq_num < target) { start = mid + 1; } else { end = mid - 1; } } // end could be -ve. size_t start_index = static_cast(std::max(static_cast(0), end)); // The last wal file is always included all_logs.erase(all_logs.begin(), all_logs.begin() + start_index); return Status::OK(); } Status WalManager::ReadFirstRecord(const WalFileType type, const uint64_t number, SequenceNumber* sequence) { *sequence = 0; if (type != kAliveLogFile && type != kArchivedLogFile) { ROCKS_LOG_ERROR(db_options_.info_log, "[WalManger] Unknown file type %s", ToString(type).c_str()); return Status::NotSupported( "File Type Not Known " + ToString(type)); } { MutexLock l(&read_first_record_cache_mutex_); auto itr = read_first_record_cache_.find(number); if (itr != read_first_record_cache_.end()) { *sequence = itr->second; return Status::OK(); } } Status s; if (type == kAliveLogFile) { std::string fname = LogFileName(db_options_.wal_dir, number); s = ReadFirstLine(fname, number, sequence); if (!s.ok() && env_->FileExists(fname).ok()) { // return any error that is not caused by non-existing file return s; } } if (type == kArchivedLogFile || !s.ok()) { // check if the file got moved to archive. std::string archived_file = ArchivedLogFileName(db_options_.wal_dir, number); s = ReadFirstLine(archived_file, number, sequence); // maybe the file was deleted from archive dir. If that's the case, return // Status::OK(). The caller with identify this as empty file because // *sequence == 0 if (!s.ok() && env_->FileExists(archived_file).IsNotFound()) { return Status::OK(); } } if (s.ok() && *sequence != 0) { MutexLock l(&read_first_record_cache_mutex_); read_first_record_cache_.insert({number, *sequence}); } return s; } Status WalManager::GetLiveWalFile(uint64_t number, std::unique_ptr* log_file) { if (!log_file) { return Status::InvalidArgument("log_file not preallocated."); } if (!number) { return Status::PathNotFound("log file not available"); } Status s; uint64_t size_bytes; s = env_->GetFileSize(LogFileName(db_options_.wal_dir, number), &size_bytes); if (!s.ok()) { return s; } log_file->reset(new LogFileImpl(number, kAliveLogFile, 0, // SequenceNumber size_bytes)); return Status::OK(); } // the function returns status.ok() and sequence == 0 if the file exists, but is // empty Status WalManager::ReadFirstLine(const std::string& fname, const uint64_t number, SequenceNumber* sequence) { struct LogReporter : public log::Reader::Reporter { Env* env; Logger* info_log; const char* fname; Status* status; bool ignore_error; // true if db_options_.paranoid_checks==false void Corruption(size_t bytes, const Status& s) override { ROCKS_LOG_WARN(info_log, "[WalManager] %s%s: dropping %d bytes; %s", (this->ignore_error ? "(ignoring error) " : ""), fname, static_cast(bytes), s.ToString().c_str()); if (this->status->ok()) { // only keep the first error *this->status = s; } } }; std::unique_ptr file; Status status = fs_->NewSequentialFile(fname, fs_->OptimizeForLogRead(file_options_), &file, nullptr); std::unique_ptr file_reader( new SequentialFileReader(std::move(file), fname)); if (!status.ok()) { return status; } LogReporter reporter; reporter.env = env_; reporter.info_log = db_options_.info_log.get(); reporter.fname = fname.c_str(); reporter.status = &status; reporter.ignore_error = !db_options_.paranoid_checks; log::Reader reader(db_options_.info_log, std::move(file_reader), &reporter, true /*checksum*/, number); std::string scratch; Slice record; if (reader.ReadRecord(&record, &scratch) && (status.ok() || !db_options_.paranoid_checks)) { if (record.size() < WriteBatchInternal::kHeader) { reporter.Corruption(record.size(), Status::Corruption("log record too small")); // TODO read record's till the first no corrupt entry? } else { WriteBatch batch; WriteBatchInternal::SetContents(&batch, record); *sequence = WriteBatchInternal::Sequence(&batch); return Status::OK(); } } // ReadRecord returns false on EOF, which means that the log file is empty. we // return status.ok() in that case and set sequence number to 0 *sequence = 0; return status; } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/wal_manager.h000066400000000000000000000075651370372246700162750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include #include "db/version_set.h" #include "file/file_util.h" #include "options/db_options.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/status.h" #include "rocksdb/transaction_log.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE // WAL manager provides the abstraction for reading the WAL files as a single // unit. Internally, it opens and reads the files using Reader or Writer // abstraction. class WalManager { public: WalManager(const ImmutableDBOptions& db_options, const FileOptions& file_options, const bool seq_per_batch = false) : db_options_(db_options), file_options_(file_options), env_(db_options.env), fs_(db_options.fs.get()), purge_wal_files_last_run_(0), seq_per_batch_(seq_per_batch), wal_in_db_path_(IsWalDirSameAsDBPath(&db_options)) {} Status GetSortedWalFiles(VectorLogPtr& files); // Allow user to tail transaction log to find all recent changes to the // database that are newer than `seq_number`. Status GetUpdatesSince( SequenceNumber seq_number, std::unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options, VersionSet* version_set); void PurgeObsoleteWALFiles(); void ArchiveWALFile(const std::string& fname, uint64_t number); Status DeleteFile(const std::string& fname, uint64_t number); Status GetLiveWalFile(uint64_t number, std::unique_ptr* log_file); Status TEST_ReadFirstRecord(const WalFileType type, const uint64_t number, SequenceNumber* sequence) { return ReadFirstRecord(type, number, sequence); } Status TEST_ReadFirstLine(const std::string& fname, const uint64_t number, SequenceNumber* sequence) { return ReadFirstLine(fname, number, sequence); } private: Status GetSortedWalsOfType(const std::string& path, VectorLogPtr& log_files, WalFileType type); // Requires: all_logs should be sorted with earliest log file first // Retains all log files in all_logs which contain updates with seq no. // Greater Than or Equal to the requested SequenceNumber. Status RetainProbableWalFiles(VectorLogPtr& all_logs, const SequenceNumber target); Status ReadFirstRecord(const WalFileType type, const uint64_t number, SequenceNumber* sequence); Status ReadFirstLine(const std::string& fname, const uint64_t number, SequenceNumber* sequence); // ------- state from DBImpl ------ const ImmutableDBOptions& db_options_; const FileOptions file_options_; Env* env_; FileSystem* fs_; // ------- WalManager state ------- // cache for ReadFirstRecord() calls std::unordered_map read_first_record_cache_; port::Mutex read_first_record_cache_mutex_; // last time when PurgeObsoleteWALFiles ran. uint64_t purge_wal_files_last_run_; bool seq_per_batch_; bool wal_in_db_path_; // obsolete files will be deleted every this seconds if ttl deletion is // enabled and archive size_limit is disabled. static const uint64_t kDefaultIntervalToDeleteObsoleteWAL = 600; }; #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/wal_manager_test.cc000066400000000000000000000247251370372246700174670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include #include "rocksdb/cache.h" #include "rocksdb/write_batch.h" #include "rocksdb/write_buffer_manager.h" #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/log_writer.h" #include "db/version_set.h" #include "db/wal_manager.h" #include "env/mock_env.h" #include "file/writable_file_writer.h" #include "table/mock_table.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // TODO(icanadi) mock out VersionSet // TODO(icanadi) move other WalManager-specific tests from db_test here class WalManagerTest : public testing::Test { public: WalManagerTest() : env_(new MockEnv(Env::Default())), dbname_(test::PerThreadDBPath("wal_manager_test")), db_options_(), table_cache_(NewLRUCache(50000, 16)), write_buffer_manager_(db_options_.db_write_buffer_size), current_log_number_(0) { DestroyDB(dbname_, Options()); } void Init() { ASSERT_OK(env_->CreateDirIfMissing(dbname_)); ASSERT_OK(env_->CreateDirIfMissing(ArchivalDirectory(dbname_))); db_options_.db_paths.emplace_back(dbname_, std::numeric_limits::max()); db_options_.wal_dir = dbname_; db_options_.env = env_.get(); fs_.reset(new LegacyFileSystemWrapper(env_.get())); db_options_.fs = fs_; versions_.reset(new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr)); wal_manager_.reset(new WalManager(db_options_, env_options_)); } void Reopen() { wal_manager_.reset(new WalManager(db_options_, env_options_)); } // NOT thread safe void Put(const std::string& key, const std::string& value) { assert(current_log_writer_.get() != nullptr); uint64_t seq = versions_->LastSequence() + 1; WriteBatch batch; batch.Put(key, value); WriteBatchInternal::SetSequence(&batch, seq); current_log_writer_->AddRecord(WriteBatchInternal::Contents(&batch)); versions_->SetLastAllocatedSequence(seq); versions_->SetLastPublishedSequence(seq); versions_->SetLastSequence(seq); } // NOT thread safe void RollTheLog(bool /*archived*/) { current_log_number_++; std::string fname = ArchivedLogFileName(dbname_, current_log_number_); std::unique_ptr file; ASSERT_OK(env_->NewWritableFile(fname, &file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), fname, env_options_)); current_log_writer_.reset(new log::Writer(std::move(file_writer), 0, false)); } void CreateArchiveLogs(int num_logs, int entries_per_log) { for (int i = 1; i <= num_logs; ++i) { RollTheLog(true); for (int k = 0; k < entries_per_log; ++k) { Put(ToString(k), std::string(1024, 'a')); } } } std::unique_ptr OpenTransactionLogIter( const SequenceNumber seq) { std::unique_ptr iter; Status status = wal_manager_->GetUpdatesSince( seq, &iter, TransactionLogIterator::ReadOptions(), versions_.get()); EXPECT_OK(status); return iter; } std::unique_ptr env_; std::string dbname_; ImmutableDBOptions db_options_; WriteController write_controller_; EnvOptions env_options_; std::shared_ptr table_cache_; WriteBufferManager write_buffer_manager_; std::unique_ptr versions_; std::unique_ptr wal_manager_; std::shared_ptr fs_; std::unique_ptr current_log_writer_; uint64_t current_log_number_; }; TEST_F(WalManagerTest, ReadFirstRecordCache) { Init(); std::string path = dbname_ + "/000001.log"; std::unique_ptr file; ASSERT_OK(env_->NewWritableFile(path, &file, EnvOptions())); SequenceNumber s; ASSERT_OK(wal_manager_->TEST_ReadFirstLine(path, 1 /* number */, &s)); ASSERT_EQ(s, 0U); ASSERT_OK( wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1 /* number */, &s)); ASSERT_EQ(s, 0U); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), path, EnvOptions())); log::Writer writer(std::move(file_writer), 1, db_options_.recycle_log_file_num > 0); WriteBatch batch; batch.Put("foo", "bar"); WriteBatchInternal::SetSequence(&batch, 10); writer.AddRecord(WriteBatchInternal::Contents(&batch)); // TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here. // Waiting for lei to finish with db_test // env_->count_sequential_reads_ = true; // sequential_read_counter_ sanity test // ASSERT_EQ(env_->sequential_read_counter_.Read(), 0); ASSERT_OK(wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1, &s)); ASSERT_EQ(s, 10U); // did a read // TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here // ASSERT_EQ(env_->sequential_read_counter_.Read(), 1); ASSERT_OK(wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1, &s)); ASSERT_EQ(s, 10U); // no new reads since the value is cached // TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here // ASSERT_EQ(env_->sequential_read_counter_.Read(), 1); } namespace { uint64_t GetLogDirSize(std::string dir_path, Env* env) { uint64_t dir_size = 0; std::vector files; env->GetChildren(dir_path, &files); for (auto& f : files) { uint64_t number; FileType type; if (ParseFileName(f, &number, &type) && type == kLogFile) { std::string const file_path = dir_path + "/" + f; uint64_t file_size; env->GetFileSize(file_path, &file_size); dir_size += file_size; } } return dir_size; } std::vector ListSpecificFiles( Env* env, const std::string& path, const FileType expected_file_type) { std::vector files; std::vector file_numbers; env->GetChildren(path, &files); uint64_t number; FileType type; for (size_t i = 0; i < files.size(); ++i) { if (ParseFileName(files[i], &number, &type)) { if (type == expected_file_type) { file_numbers.push_back(number); } } } return file_numbers; } int CountRecords(TransactionLogIterator* iter) { int count = 0; SequenceNumber lastSequence = 0; BatchResult res; while (iter->Valid()) { res = iter->GetBatch(); EXPECT_TRUE(res.sequence > lastSequence); ++count; lastSequence = res.sequence; EXPECT_OK(iter->status()); iter->Next(); } return count; } } // namespace TEST_F(WalManagerTest, WALArchivalSizeLimit) { db_options_.wal_ttl_seconds = 0; db_options_.wal_size_limit_mb = 1000; Init(); // TEST : Create WalManager with huge size limit and no ttl. // Create some archived files and call PurgeObsoleteWALFiles(). // Count the archived log files that survived. // Assert that all of them did. // Change size limit. Re-open WalManager. // Assert that archive is not greater than wal_size_limit_mb after // PurgeObsoleteWALFiles() // Set ttl and time_to_check_ to small values. Re-open db. // Assert that there are no archived logs left. std::string archive_dir = ArchivalDirectory(dbname_); CreateArchiveLogs(20, 5000); std::vector log_files = ListSpecificFiles(env_.get(), archive_dir, kLogFile); ASSERT_EQ(log_files.size(), 20U); db_options_.wal_size_limit_mb = 8; Reopen(); wal_manager_->PurgeObsoleteWALFiles(); uint64_t archive_size = GetLogDirSize(archive_dir, env_.get()); ASSERT_TRUE(archive_size <= db_options_.wal_size_limit_mb * 1024 * 1024); db_options_.wal_ttl_seconds = 1; env_->FakeSleepForMicroseconds(2 * 1000 * 1000); Reopen(); wal_manager_->PurgeObsoleteWALFiles(); log_files = ListSpecificFiles(env_.get(), archive_dir, kLogFile); ASSERT_TRUE(log_files.empty()); } TEST_F(WalManagerTest, WALArchivalTtl) { db_options_.wal_ttl_seconds = 1000; Init(); // TEST : Create WalManager with a ttl and no size limit. // Create some archived log files and call PurgeObsoleteWALFiles(). // Assert that files are not deleted // Reopen db with small ttl. // Assert that all archived logs was removed. std::string archive_dir = ArchivalDirectory(dbname_); CreateArchiveLogs(20, 5000); std::vector log_files = ListSpecificFiles(env_.get(), archive_dir, kLogFile); ASSERT_GT(log_files.size(), 0U); db_options_.wal_ttl_seconds = 1; env_->FakeSleepForMicroseconds(3 * 1000 * 1000); Reopen(); wal_manager_->PurgeObsoleteWALFiles(); log_files = ListSpecificFiles(env_.get(), archive_dir, kLogFile); ASSERT_TRUE(log_files.empty()); } TEST_F(WalManagerTest, TransactionLogIteratorMoveOverZeroFiles) { Init(); RollTheLog(false); Put("key1", std::string(1024, 'a')); // Create a zero record WAL file. RollTheLog(false); RollTheLog(false); Put("key2", std::string(1024, 'a')); auto iter = OpenTransactionLogIter(0); ASSERT_EQ(2, CountRecords(iter.get())); } TEST_F(WalManagerTest, TransactionLogIteratorJustEmptyFile) { Init(); RollTheLog(false); auto iter = OpenTransactionLogIter(0); // Check that an empty iterator is returned ASSERT_TRUE(!iter->Valid()); } TEST_F(WalManagerTest, TransactionLogIteratorNewFileWhileScanning) { Init(); CreateArchiveLogs(2, 100); auto iter = OpenTransactionLogIter(0); CreateArchiveLogs(1, 100); int i = 0; for (; iter->Valid(); iter->Next()) { i++; } ASSERT_EQ(i, 200); // A new log file was added after the iterator was created. // TryAgain indicates a new iterator is needed to fetch the new data ASSERT_TRUE(iter->status().IsTryAgain()); iter = OpenTransactionLogIter(0); i = 0; for (; iter->Valid(); iter->Next()) { i++; } ASSERT_EQ(i, 300); ASSERT_TRUE(iter->status().ok()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as WalManager is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/write_batch.cc000066400000000000000000002254401370372246700164430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // WriteBatch::rep_ := // sequence: fixed64 // count: fixed32 // data: record[count] // record := // kTypeValue varstring varstring // kTypeDeletion varstring // kTypeSingleDeletion varstring // kTypeRangeDeletion varstring varstring // kTypeMerge varstring varstring // kTypeColumnFamilyValue varint32 varstring varstring // kTypeColumnFamilyDeletion varint32 varstring // kTypeColumnFamilySingleDeletion varint32 varstring // kTypeColumnFamilyRangeDeletion varint32 varstring varstring // kTypeColumnFamilyMerge varint32 varstring varstring // kTypeBeginPrepareXID varstring // kTypeEndPrepareXID // kTypeCommitXID varstring // kTypeRollbackXID varstring // kTypeBeginPersistedPrepareXID varstring // kTypeBeginUnprepareXID varstring // kTypeNoop // varstring := // len: varint32 // data: uint8[len] #include "rocksdb/write_batch.h" #include #include #include #include #include #include #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/flush_scheduler.h" #include "db/memtable.h" #include "db/merge_context.h" #include "db/snapshot_impl.h" #include "db/trim_history_scheduler.h" #include "db/write_batch_internal.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" #include "port/lang.h" #include "rocksdb/merge_operator.h" #include "util/autovector.h" #include "util/cast_util.h" #include "util/coding.h" #include "util/duplicate_detector.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // anon namespace for file-local types namespace { enum ContentFlags : uint32_t { DEFERRED = 1 << 0, HAS_PUT = 1 << 1, HAS_DELETE = 1 << 2, HAS_SINGLE_DELETE = 1 << 3, HAS_MERGE = 1 << 4, HAS_BEGIN_PREPARE = 1 << 5, HAS_END_PREPARE = 1 << 6, HAS_COMMIT = 1 << 7, HAS_ROLLBACK = 1 << 8, HAS_DELETE_RANGE = 1 << 9, HAS_BLOB_INDEX = 1 << 10, HAS_BEGIN_UNPREPARE = 1 << 11, }; struct BatchContentClassifier : public WriteBatch::Handler { uint32_t content_flags = 0; Status PutCF(uint32_t, const Slice&, const Slice&) override { content_flags |= ContentFlags::HAS_PUT; return Status::OK(); } Status DeleteCF(uint32_t, const Slice&) override { content_flags |= ContentFlags::HAS_DELETE; return Status::OK(); } Status SingleDeleteCF(uint32_t, const Slice&) override { content_flags |= ContentFlags::HAS_SINGLE_DELETE; return Status::OK(); } Status DeleteRangeCF(uint32_t, const Slice&, const Slice&) override { content_flags |= ContentFlags::HAS_DELETE_RANGE; return Status::OK(); } Status MergeCF(uint32_t, const Slice&, const Slice&) override { content_flags |= ContentFlags::HAS_MERGE; return Status::OK(); } Status PutBlobIndexCF(uint32_t, const Slice&, const Slice&) override { content_flags |= ContentFlags::HAS_BLOB_INDEX; return Status::OK(); } Status MarkBeginPrepare(bool unprepare) override { content_flags |= ContentFlags::HAS_BEGIN_PREPARE; if (unprepare) { content_flags |= ContentFlags::HAS_BEGIN_UNPREPARE; } return Status::OK(); } Status MarkEndPrepare(const Slice&) override { content_flags |= ContentFlags::HAS_END_PREPARE; return Status::OK(); } Status MarkCommit(const Slice&) override { content_flags |= ContentFlags::HAS_COMMIT; return Status::OK(); } Status MarkRollback(const Slice&) override { content_flags |= ContentFlags::HAS_ROLLBACK; return Status::OK(); } }; class TimestampAssigner : public WriteBatch::Handler { public: explicit TimestampAssigner(const Slice& ts) : timestamp_(ts), timestamps_(kEmptyTimestampList) {} explicit TimestampAssigner(const std::vector& ts_list) : timestamps_(ts_list) { SanityCheck(); } ~TimestampAssigner() override {} Status PutCF(uint32_t, const Slice& key, const Slice&) override { AssignTimestamp(key); ++idx_; return Status::OK(); } Status DeleteCF(uint32_t, const Slice& key) override { AssignTimestamp(key); ++idx_; return Status::OK(); } Status SingleDeleteCF(uint32_t, const Slice& key) override { AssignTimestamp(key); ++idx_; return Status::OK(); } Status DeleteRangeCF(uint32_t, const Slice& begin_key, const Slice& end_key) override { AssignTimestamp(begin_key); AssignTimestamp(end_key); ++idx_; return Status::OK(); } Status MergeCF(uint32_t, const Slice& key, const Slice&) override { AssignTimestamp(key); ++idx_; return Status::OK(); } Status PutBlobIndexCF(uint32_t, const Slice&, const Slice&) override { // TODO (yanqin): support blob db in the future. return Status::OK(); } Status MarkBeginPrepare(bool) override { // TODO (yanqin): support in the future. return Status::OK(); } Status MarkEndPrepare(const Slice&) override { // TODO (yanqin): support in the future. return Status::OK(); } Status MarkCommit(const Slice&) override { // TODO (yanqin): support in the future. return Status::OK(); } Status MarkRollback(const Slice&) override { // TODO (yanqin): support in the future. return Status::OK(); } private: void SanityCheck() const { assert(!timestamps_.empty()); #ifndef NDEBUG const size_t ts_sz = timestamps_[0].size(); for (size_t i = 1; i != timestamps_.size(); ++i) { assert(ts_sz == timestamps_[i].size()); } #endif // !NDEBUG } void AssignTimestamp(const Slice& key) { assert(timestamps_.empty() || idx_ < timestamps_.size()); const Slice& ts = timestamps_.empty() ? timestamp_ : timestamps_[idx_]; size_t ts_sz = ts.size(); char* ptr = const_cast(key.data() + key.size() - ts_sz); memcpy(ptr, ts.data(), ts_sz); } static const std::vector kEmptyTimestampList; const Slice timestamp_; const std::vector& timestamps_; size_t idx_ = 0; // No copy or move. TimestampAssigner(const TimestampAssigner&) = delete; TimestampAssigner(TimestampAssigner&&) = delete; TimestampAssigner& operator=(const TimestampAssigner&) = delete; TimestampAssigner&& operator=(TimestampAssigner&&) = delete; }; const std::vector TimestampAssigner::kEmptyTimestampList; } // anon namespace struct SavePoints { std::stack> stack; }; WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes) : content_flags_(0), max_bytes_(max_bytes), rep_(), timestamp_size_(0) { rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader) ? reserved_bytes : WriteBatchInternal::kHeader); rep_.resize(WriteBatchInternal::kHeader); } WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes, size_t ts_sz) : content_flags_(0), max_bytes_(max_bytes), rep_(), timestamp_size_(ts_sz) { rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader) ? reserved_bytes : WriteBatchInternal::kHeader); rep_.resize(WriteBatchInternal::kHeader); } WriteBatch::WriteBatch(const std::string& rep) : content_flags_(ContentFlags::DEFERRED), max_bytes_(0), rep_(rep), timestamp_size_(0) {} WriteBatch::WriteBatch(std::string&& rep) : content_flags_(ContentFlags::DEFERRED), max_bytes_(0), rep_(std::move(rep)), timestamp_size_(0) {} WriteBatch::WriteBatch(const WriteBatch& src) : wal_term_point_(src.wal_term_point_), content_flags_(src.content_flags_.load(std::memory_order_relaxed)), max_bytes_(src.max_bytes_), rep_(src.rep_), timestamp_size_(src.timestamp_size_) { if (src.save_points_ != nullptr) { save_points_.reset(new SavePoints()); save_points_->stack = src.save_points_->stack; } } WriteBatch::WriteBatch(WriteBatch&& src) noexcept : save_points_(std::move(src.save_points_)), wal_term_point_(std::move(src.wal_term_point_)), content_flags_(src.content_flags_.load(std::memory_order_relaxed)), max_bytes_(src.max_bytes_), rep_(std::move(src.rep_)), timestamp_size_(src.timestamp_size_) {} WriteBatch& WriteBatch::operator=(const WriteBatch& src) { if (&src != this) { this->~WriteBatch(); new (this) WriteBatch(src); } return *this; } WriteBatch& WriteBatch::operator=(WriteBatch&& src) { if (&src != this) { this->~WriteBatch(); new (this) WriteBatch(std::move(src)); } return *this; } WriteBatch::~WriteBatch() { } WriteBatch::Handler::~Handler() { } void WriteBatch::Handler::LogData(const Slice& /*blob*/) { // If the user has not specified something to do with blobs, then we ignore // them. } bool WriteBatch::Handler::Continue() { return true; } void WriteBatch::Clear() { rep_.clear(); rep_.resize(WriteBatchInternal::kHeader); content_flags_.store(0, std::memory_order_relaxed); if (save_points_ != nullptr) { while (!save_points_->stack.empty()) { save_points_->stack.pop(); } } wal_term_point_.clear(); } uint32_t WriteBatch::Count() const { return WriteBatchInternal::Count(this); } uint32_t WriteBatch::ComputeContentFlags() const { auto rv = content_flags_.load(std::memory_order_relaxed); if ((rv & ContentFlags::DEFERRED) != 0) { BatchContentClassifier classifier; Iterate(&classifier); rv = classifier.content_flags; // this method is conceptually const, because it is performing a lazy // computation that doesn't affect the abstract state of the batch. // content_flags_ is marked mutable so that we can perform the // following assignment content_flags_.store(rv, std::memory_order_relaxed); } return rv; } void WriteBatch::MarkWalTerminationPoint() { wal_term_point_.size = GetDataSize(); wal_term_point_.count = Count(); wal_term_point_.content_flags = content_flags_; } bool WriteBatch::HasPut() const { return (ComputeContentFlags() & ContentFlags::HAS_PUT) != 0; } bool WriteBatch::HasDelete() const { return (ComputeContentFlags() & ContentFlags::HAS_DELETE) != 0; } bool WriteBatch::HasSingleDelete() const { return (ComputeContentFlags() & ContentFlags::HAS_SINGLE_DELETE) != 0; } bool WriteBatch::HasDeleteRange() const { return (ComputeContentFlags() & ContentFlags::HAS_DELETE_RANGE) != 0; } bool WriteBatch::HasMerge() const { return (ComputeContentFlags() & ContentFlags::HAS_MERGE) != 0; } bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key, bool cf_record) { assert(input != nullptr && key != nullptr); // Skip tag byte input->remove_prefix(1); if (cf_record) { // Skip column_family bytes uint32_t cf; if (!GetVarint32(input, &cf)) { return false; } } // Extract key return GetLengthPrefixedSlice(input, key); } bool WriteBatch::HasBeginPrepare() const { return (ComputeContentFlags() & ContentFlags::HAS_BEGIN_PREPARE) != 0; } bool WriteBatch::HasEndPrepare() const { return (ComputeContentFlags() & ContentFlags::HAS_END_PREPARE) != 0; } bool WriteBatch::HasCommit() const { return (ComputeContentFlags() & ContentFlags::HAS_COMMIT) != 0; } bool WriteBatch::HasRollback() const { return (ComputeContentFlags() & ContentFlags::HAS_ROLLBACK) != 0; } Status ReadRecordFromWriteBatch(Slice* input, char* tag, uint32_t* column_family, Slice* key, Slice* value, Slice* blob, Slice* xid) { assert(key != nullptr && value != nullptr); *tag = (*input)[0]; input->remove_prefix(1); *column_family = 0; // default switch (*tag) { case kTypeColumnFamilyValue: if (!GetVarint32(input, column_family)) { return Status::Corruption("bad WriteBatch Put"); } FALLTHROUGH_INTENDED; case kTypeValue: if (!GetLengthPrefixedSlice(input, key) || !GetLengthPrefixedSlice(input, value)) { return Status::Corruption("bad WriteBatch Put"); } break; case kTypeColumnFamilyDeletion: case kTypeColumnFamilySingleDeletion: if (!GetVarint32(input, column_family)) { return Status::Corruption("bad WriteBatch Delete"); } FALLTHROUGH_INTENDED; case kTypeDeletion: case kTypeSingleDeletion: if (!GetLengthPrefixedSlice(input, key)) { return Status::Corruption("bad WriteBatch Delete"); } break; case kTypeColumnFamilyRangeDeletion: if (!GetVarint32(input, column_family)) { return Status::Corruption("bad WriteBatch DeleteRange"); } FALLTHROUGH_INTENDED; case kTypeRangeDeletion: // for range delete, "key" is begin_key, "value" is end_key if (!GetLengthPrefixedSlice(input, key) || !GetLengthPrefixedSlice(input, value)) { return Status::Corruption("bad WriteBatch DeleteRange"); } break; case kTypeColumnFamilyMerge: if (!GetVarint32(input, column_family)) { return Status::Corruption("bad WriteBatch Merge"); } FALLTHROUGH_INTENDED; case kTypeMerge: if (!GetLengthPrefixedSlice(input, key) || !GetLengthPrefixedSlice(input, value)) { return Status::Corruption("bad WriteBatch Merge"); } break; case kTypeColumnFamilyBlobIndex: if (!GetVarint32(input, column_family)) { return Status::Corruption("bad WriteBatch BlobIndex"); } FALLTHROUGH_INTENDED; case kTypeBlobIndex: if (!GetLengthPrefixedSlice(input, key) || !GetLengthPrefixedSlice(input, value)) { return Status::Corruption("bad WriteBatch BlobIndex"); } break; case kTypeLogData: assert(blob != nullptr); if (!GetLengthPrefixedSlice(input, blob)) { return Status::Corruption("bad WriteBatch Blob"); } break; case kTypeNoop: case kTypeBeginPrepareXID: // This indicates that the prepared batch is also persisted in the db. // This is used in WritePreparedTxn case kTypeBeginPersistedPrepareXID: // This is used in WriteUnpreparedTxn case kTypeBeginUnprepareXID: break; case kTypeEndPrepareXID: if (!GetLengthPrefixedSlice(input, xid)) { return Status::Corruption("bad EndPrepare XID"); } break; case kTypeCommitXID: if (!GetLengthPrefixedSlice(input, xid)) { return Status::Corruption("bad Commit XID"); } break; case kTypeRollbackXID: if (!GetLengthPrefixedSlice(input, xid)) { return Status::Corruption("bad Rollback XID"); } break; default: return Status::Corruption("unknown WriteBatch tag"); } return Status::OK(); } Status WriteBatch::Iterate(Handler* handler) const { if (rep_.size() < WriteBatchInternal::kHeader) { return Status::Corruption("malformed WriteBatch (too small)"); } return WriteBatchInternal::Iterate(this, handler, WriteBatchInternal::kHeader, rep_.size()); } Status WriteBatchInternal::Iterate(const WriteBatch* wb, WriteBatch::Handler* handler, size_t begin, size_t end) { if (begin > wb->rep_.size() || end > wb->rep_.size() || end < begin) { return Status::Corruption("Invalid start/end bounds for Iterate"); } assert(begin <= end); Slice input(wb->rep_.data() + begin, static_cast(end - begin)); bool whole_batch = (begin == WriteBatchInternal::kHeader) && (end == wb->rep_.size()); Slice key, value, blob, xid; // Sometimes a sub-batch starts with a Noop. We want to exclude such Noops as // the batch boundary symbols otherwise we would mis-count the number of // batches. We do that by checking whether the accumulated batch is empty // before seeing the next Noop. bool empty_batch = true; uint32_t found = 0; Status s; char tag = 0; uint32_t column_family = 0; // default bool last_was_try_again = false; bool handler_continue = true; while (((s.ok() && !input.empty()) || UNLIKELY(s.IsTryAgain()))) { handler_continue = handler->Continue(); if (!handler_continue) { break; } if (LIKELY(!s.IsTryAgain())) { last_was_try_again = false; tag = 0; column_family = 0; // default s = ReadRecordFromWriteBatch(&input, &tag, &column_family, &key, &value, &blob, &xid); if (!s.ok()) { return s; } } else { assert(s.IsTryAgain()); assert(!last_was_try_again); // to detect infinite loop bugs if (UNLIKELY(last_was_try_again)) { return Status::Corruption( "two consecutive TryAgain in WriteBatch handler; this is either a " "software bug or data corruption."); } last_was_try_again = true; s = Status::OK(); } switch (tag) { case kTypeColumnFamilyValue: case kTypeValue: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_PUT)); s = handler->PutCF(column_family, key, value); if (LIKELY(s.ok())) { empty_batch = false; found++; } break; case kTypeColumnFamilyDeletion: case kTypeDeletion: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_DELETE)); s = handler->DeleteCF(column_family, key); if (LIKELY(s.ok())) { empty_batch = false; found++; } break; case kTypeColumnFamilySingleDeletion: case kTypeSingleDeletion: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_SINGLE_DELETE)); s = handler->SingleDeleteCF(column_family, key); if (LIKELY(s.ok())) { empty_batch = false; found++; } break; case kTypeColumnFamilyRangeDeletion: case kTypeRangeDeletion: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_DELETE_RANGE)); s = handler->DeleteRangeCF(column_family, key, value); if (LIKELY(s.ok())) { empty_batch = false; found++; } break; case kTypeColumnFamilyMerge: case kTypeMerge: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_MERGE)); s = handler->MergeCF(column_family, key, value); if (LIKELY(s.ok())) { empty_batch = false; found++; } break; case kTypeColumnFamilyBlobIndex: case kTypeBlobIndex: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_BLOB_INDEX)); s = handler->PutBlobIndexCF(column_family, key, value); if (LIKELY(s.ok())) { found++; } break; case kTypeLogData: handler->LogData(blob); // A batch might have nothing but LogData. It is still a batch. empty_batch = false; break; case kTypeBeginPrepareXID: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_BEGIN_PREPARE)); handler->MarkBeginPrepare(); empty_batch = false; if (!handler->WriteAfterCommit()) { s = Status::NotSupported( "WriteCommitted txn tag when write_after_commit_ is disabled (in " "WritePrepared/WriteUnprepared mode). If it is not due to " "corruption, the WAL must be emptied before changing the " "WritePolicy."); } if (handler->WriteBeforePrepare()) { s = Status::NotSupported( "WriteCommitted txn tag when write_before_prepare_ is enabled " "(in WriteUnprepared mode). If it is not due to corruption, the " "WAL must be emptied before changing the WritePolicy."); } break; case kTypeBeginPersistedPrepareXID: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_BEGIN_PREPARE)); handler->MarkBeginPrepare(); empty_batch = false; if (handler->WriteAfterCommit()) { s = Status::NotSupported( "WritePrepared/WriteUnprepared txn tag when write_after_commit_ " "is enabled (in default WriteCommitted mode). If it is not due " "to corruption, the WAL must be emptied before changing the " "WritePolicy."); } break; case kTypeBeginUnprepareXID: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_BEGIN_UNPREPARE)); handler->MarkBeginPrepare(true /* unprepared */); empty_batch = false; if (handler->WriteAfterCommit()) { s = Status::NotSupported( "WriteUnprepared txn tag when write_after_commit_ is enabled (in " "default WriteCommitted mode). If it is not due to corruption, " "the WAL must be emptied before changing the WritePolicy."); } if (!handler->WriteBeforePrepare()) { s = Status::NotSupported( "WriteUnprepared txn tag when write_before_prepare_ is disabled " "(in WriteCommitted/WritePrepared mode). If it is not due to " "corruption, the WAL must be emptied before changing the " "WritePolicy."); } break; case kTypeEndPrepareXID: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_END_PREPARE)); handler->MarkEndPrepare(xid); empty_batch = true; break; case kTypeCommitXID: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_COMMIT)); handler->MarkCommit(xid); empty_batch = true; break; case kTypeRollbackXID: assert(wb->content_flags_.load(std::memory_order_relaxed) & (ContentFlags::DEFERRED | ContentFlags::HAS_ROLLBACK)); handler->MarkRollback(xid); empty_batch = true; break; case kTypeNoop: handler->MarkNoop(empty_batch); empty_batch = true; break; default: return Status::Corruption("unknown WriteBatch tag"); } } if (!s.ok()) { return s; } if (handler_continue && whole_batch && found != WriteBatchInternal::Count(wb)) { return Status::Corruption("WriteBatch has wrong count"); } else { return Status::OK(); } } bool WriteBatchInternal::IsLatestPersistentState(const WriteBatch* b) { return b->is_latest_persistent_state_; } void WriteBatchInternal::SetAsLastestPersistentState(WriteBatch* b) { b->is_latest_persistent_state_ = true; } uint32_t WriteBatchInternal::Count(const WriteBatch* b) { return DecodeFixed32(b->rep_.data() + 8); } void WriteBatchInternal::SetCount(WriteBatch* b, uint32_t n) { EncodeFixed32(&b->rep_[8], n); } SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) { return SequenceNumber(DecodeFixed64(b->rep_.data())); } void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) { EncodeFixed64(&b->rep_[0], seq); } size_t WriteBatchInternal::GetFirstOffset(WriteBatch* /*b*/) { return WriteBatchInternal::kHeader; } Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id, const Slice& key, const Slice& value) { if (key.size() > size_t{port::kMaxUint32}) { return Status::InvalidArgument("key is too large"); } if (value.size() > size_t{port::kMaxUint32}) { return Status::InvalidArgument("value is too large"); } LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeValue)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyValue)); PutVarint32(&b->rep_, column_family_id); } if (0 == b->timestamp_size_) { PutLengthPrefixedSlice(&b->rep_, key); } else { PutVarint32(&b->rep_, static_cast(key.size() + b->timestamp_size_)); b->rep_.append(key.data(), key.size()); b->rep_.append(b->timestamp_size_, '\0'); } PutLengthPrefixedSlice(&b->rep_, value); b->content_flags_.store( b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_PUT, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { return WriteBatchInternal::Put(this, GetColumnFamilyID(column_family), key, value); } Status WriteBatchInternal::CheckSlicePartsLength(const SliceParts& key, const SliceParts& value) { size_t total_key_bytes = 0; for (int i = 0; i < key.num_parts; ++i) { total_key_bytes += key.parts[i].size(); } if (total_key_bytes >= size_t{port::kMaxUint32}) { return Status::InvalidArgument("key is too large"); } size_t total_value_bytes = 0; for (int i = 0; i < value.num_parts; ++i) { total_value_bytes += value.parts[i].size(); } if (total_value_bytes >= size_t{port::kMaxUint32}) { return Status::InvalidArgument("value is too large"); } return Status::OK(); } Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id, const SliceParts& key, const SliceParts& value) { Status s = CheckSlicePartsLength(key, value); if (!s.ok()) { return s; } LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeValue)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyValue)); PutVarint32(&b->rep_, column_family_id); } if (0 == b->timestamp_size_) { PutLengthPrefixedSliceParts(&b->rep_, key); } else { PutLengthPrefixedSlicePartsWithPadding(&b->rep_, key, b->timestamp_size_); } PutLengthPrefixedSliceParts(&b->rep_, value); b->content_flags_.store( b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_PUT, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::Put(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) { return WriteBatchInternal::Put(this, GetColumnFamilyID(column_family), key, value); } Status WriteBatchInternal::InsertNoop(WriteBatch* b) { b->rep_.push_back(static_cast(kTypeNoop)); return Status::OK(); } Status WriteBatchInternal::MarkEndPrepare(WriteBatch* b, const Slice& xid, bool write_after_commit, bool unprepared_batch) { // a manually constructed batch can only contain one prepare section assert(b->rep_[12] == static_cast(kTypeNoop)); // all savepoints up to this point are cleared if (b->save_points_ != nullptr) { while (!b->save_points_->stack.empty()) { b->save_points_->stack.pop(); } } // rewrite noop as begin marker b->rep_[12] = static_cast( write_after_commit ? kTypeBeginPrepareXID : (unprepared_batch ? kTypeBeginUnprepareXID : kTypeBeginPersistedPrepareXID)); b->rep_.push_back(static_cast(kTypeEndPrepareXID)); PutLengthPrefixedSlice(&b->rep_, xid); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_END_PREPARE | ContentFlags::HAS_BEGIN_PREPARE, std::memory_order_relaxed); if (unprepared_batch) { b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_BEGIN_UNPREPARE, std::memory_order_relaxed); } return Status::OK(); } Status WriteBatchInternal::MarkCommit(WriteBatch* b, const Slice& xid) { b->rep_.push_back(static_cast(kTypeCommitXID)); PutLengthPrefixedSlice(&b->rep_, xid); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_COMMIT, std::memory_order_relaxed); return Status::OK(); } Status WriteBatchInternal::MarkRollback(WriteBatch* b, const Slice& xid) { b->rep_.push_back(static_cast(kTypeRollbackXID)); PutLengthPrefixedSlice(&b->rep_, xid); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_ROLLBACK, std::memory_order_relaxed); return Status::OK(); } Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id, const Slice& key) { LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeDeletion)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyDeletion)); PutVarint32(&b->rep_, column_family_id); } if (0 == b->timestamp_size_) { PutLengthPrefixedSlice(&b->rep_, key); } else { PutVarint32(&b->rep_, static_cast(key.size() + b->timestamp_size_)); b->rep_.append(key.data(), key.size()); b->rep_.append(b->timestamp_size_, '\0'); } b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_DELETE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key) { return WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family), key); } Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id, const SliceParts& key) { LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeDeletion)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyDeletion)); PutVarint32(&b->rep_, column_family_id); } if (0 == b->timestamp_size_) { PutLengthPrefixedSliceParts(&b->rep_, key); } else { PutLengthPrefixedSlicePartsWithPadding(&b->rep_, key, b->timestamp_size_); } b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_DELETE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const SliceParts& key) { return WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family), key); } Status WriteBatchInternal::SingleDelete(WriteBatch* b, uint32_t column_family_id, const Slice& key) { LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeSingleDeletion)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilySingleDeletion)); PutVarint32(&b->rep_, column_family_id); } PutLengthPrefixedSlice(&b->rep_, key); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_SINGLE_DELETE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, const Slice& key) { return WriteBatchInternal::SingleDelete( this, GetColumnFamilyID(column_family), key); } Status WriteBatchInternal::SingleDelete(WriteBatch* b, uint32_t column_family_id, const SliceParts& key) { LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeSingleDeletion)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilySingleDeletion)); PutVarint32(&b->rep_, column_family_id); } PutLengthPrefixedSliceParts(&b->rep_, key); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_SINGLE_DELETE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key) { return WriteBatchInternal::SingleDelete( this, GetColumnFamilyID(column_family), key); } Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id, const Slice& begin_key, const Slice& end_key) { LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeRangeDeletion)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyRangeDeletion)); PutVarint32(&b->rep_, column_family_id); } PutLengthPrefixedSlice(&b->rep_, begin_key); PutLengthPrefixedSlice(&b->rep_, end_key); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_DELETE_RANGE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key) { return WriteBatchInternal::DeleteRange(this, GetColumnFamilyID(column_family), begin_key, end_key); } Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id, const SliceParts& begin_key, const SliceParts& end_key) { LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeRangeDeletion)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyRangeDeletion)); PutVarint32(&b->rep_, column_family_id); } PutLengthPrefixedSliceParts(&b->rep_, begin_key); PutLengthPrefixedSliceParts(&b->rep_, end_key); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_DELETE_RANGE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family, const SliceParts& begin_key, const SliceParts& end_key) { return WriteBatchInternal::DeleteRange(this, GetColumnFamilyID(column_family), begin_key, end_key); } Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id, const Slice& key, const Slice& value) { if (key.size() > size_t{port::kMaxUint32}) { return Status::InvalidArgument("key is too large"); } if (value.size() > size_t{port::kMaxUint32}) { return Status::InvalidArgument("value is too large"); } LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeMerge)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyMerge)); PutVarint32(&b->rep_, column_family_id); } PutLengthPrefixedSlice(&b->rep_, key); PutLengthPrefixedSlice(&b->rep_, value); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_MERGE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) { return WriteBatchInternal::Merge(this, GetColumnFamilyID(column_family), key, value); } Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id, const SliceParts& key, const SliceParts& value) { Status s = CheckSlicePartsLength(key, value); if (!s.ok()) { return s; } LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeMerge)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyMerge)); PutVarint32(&b->rep_, column_family_id); } PutLengthPrefixedSliceParts(&b->rep_, key); PutLengthPrefixedSliceParts(&b->rep_, value); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_MERGE, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) { return WriteBatchInternal::Merge(this, GetColumnFamilyID(column_family), key, value); } Status WriteBatchInternal::PutBlobIndex(WriteBatch* b, uint32_t column_family_id, const Slice& key, const Slice& value) { LocalSavePoint save(b); WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); if (column_family_id == 0) { b->rep_.push_back(static_cast(kTypeBlobIndex)); } else { b->rep_.push_back(static_cast(kTypeColumnFamilyBlobIndex)); PutVarint32(&b->rep_, column_family_id); } PutLengthPrefixedSlice(&b->rep_, key); PutLengthPrefixedSlice(&b->rep_, value); b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) | ContentFlags::HAS_BLOB_INDEX, std::memory_order_relaxed); return save.commit(); } Status WriteBatch::PutLogData(const Slice& blob) { LocalSavePoint save(this); rep_.push_back(static_cast(kTypeLogData)); PutLengthPrefixedSlice(&rep_, blob); return save.commit(); } void WriteBatch::SetSavePoint() { if (save_points_ == nullptr) { save_points_.reset(new SavePoints()); } // Record length and count of current batch of writes. save_points_->stack.push(SavePoint( GetDataSize(), Count(), content_flags_.load(std::memory_order_relaxed))); } Status WriteBatch::RollbackToSavePoint() { if (save_points_ == nullptr || save_points_->stack.size() == 0) { return Status::NotFound(); } // Pop the most recent savepoint off the stack SavePoint savepoint = save_points_->stack.top(); save_points_->stack.pop(); assert(savepoint.size <= rep_.size()); assert(static_cast(savepoint.count) <= Count()); if (savepoint.size == rep_.size()) { // No changes to rollback } else if (savepoint.size == 0) { // Rollback everything Clear(); } else { rep_.resize(savepoint.size); WriteBatchInternal::SetCount(this, savepoint.count); content_flags_.store(savepoint.content_flags, std::memory_order_relaxed); } return Status::OK(); } Status WriteBatch::PopSavePoint() { if (save_points_ == nullptr || save_points_->stack.size() == 0) { return Status::NotFound(); } // Pop the most recent savepoint off the stack save_points_->stack.pop(); return Status::OK(); } Status WriteBatch::AssignTimestamp(const Slice& ts) { TimestampAssigner ts_assigner(ts); return Iterate(&ts_assigner); } Status WriteBatch::AssignTimestamps(const std::vector& ts_list) { TimestampAssigner ts_assigner(ts_list); return Iterate(&ts_assigner); } class MemTableInserter : public WriteBatch::Handler { SequenceNumber sequence_; ColumnFamilyMemTables* const cf_mems_; FlushScheduler* const flush_scheduler_; TrimHistoryScheduler* const trim_history_scheduler_; const bool ignore_missing_column_families_; const uint64_t recovering_log_number_; // log number that all Memtables inserted into should reference uint64_t log_number_ref_; DBImpl* db_; const bool concurrent_memtable_writes_; bool post_info_created_; bool* has_valid_writes_; // On some (!) platforms just default creating // a map is too expensive in the Write() path as they // cause memory allocations though unused. // Make creation optional but do not incur // std::unique_ptr additional allocation using MemPostInfoMap = std::map; using PostMapType = std::aligned_storage::type; PostMapType mem_post_info_map_; // current recovered transaction we are rebuilding (recovery) WriteBatch* rebuilding_trx_; SequenceNumber rebuilding_trx_seq_; // Increase seq number once per each write batch. Otherwise increase it once // per key. bool seq_per_batch_; // Whether the memtable write will be done only after the commit bool write_after_commit_; // Whether memtable write can be done before prepare bool write_before_prepare_; // Whether this batch was unprepared or not bool unprepared_batch_; using DupDetector = std::aligned_storage::type; DupDetector duplicate_detector_; bool dup_dectector_on_; bool hint_per_batch_; bool hint_created_; // Hints for this batch using HintMap = std::unordered_map; using HintMapType = std::aligned_storage::type; HintMapType hint_; HintMap& GetHintMap() { assert(hint_per_batch_); if (!hint_created_) { new (&hint_) HintMap(); hint_created_ = true; } return *reinterpret_cast(&hint_); } MemPostInfoMap& GetPostMap() { assert(concurrent_memtable_writes_); if(!post_info_created_) { new (&mem_post_info_map_) MemPostInfoMap(); post_info_created_ = true; } return *reinterpret_cast(&mem_post_info_map_); } bool IsDuplicateKeySeq(uint32_t column_family_id, const Slice& key) { assert(!write_after_commit_); assert(rebuilding_trx_ != nullptr); if (!dup_dectector_on_) { new (&duplicate_detector_) DuplicateDetector(db_); dup_dectector_on_ = true; } return reinterpret_cast (&duplicate_detector_)->IsDuplicateKeySeq(column_family_id, key, sequence_); } protected: bool WriteBeforePrepare() const override { return write_before_prepare_; } bool WriteAfterCommit() const override { return write_after_commit_; } public: // cf_mems should not be shared with concurrent inserters MemTableInserter(SequenceNumber _sequence, ColumnFamilyMemTables* cf_mems, FlushScheduler* flush_scheduler, TrimHistoryScheduler* trim_history_scheduler, bool ignore_missing_column_families, uint64_t recovering_log_number, DB* db, bool concurrent_memtable_writes, bool* has_valid_writes = nullptr, bool seq_per_batch = false, bool batch_per_txn = true, bool hint_per_batch = false) : sequence_(_sequence), cf_mems_(cf_mems), flush_scheduler_(flush_scheduler), trim_history_scheduler_(trim_history_scheduler), ignore_missing_column_families_(ignore_missing_column_families), recovering_log_number_(recovering_log_number), log_number_ref_(0), db_(static_cast_with_check(db)), concurrent_memtable_writes_(concurrent_memtable_writes), post_info_created_(false), has_valid_writes_(has_valid_writes), rebuilding_trx_(nullptr), rebuilding_trx_seq_(0), seq_per_batch_(seq_per_batch), // Write after commit currently uses one seq per key (instead of per // batch). So seq_per_batch being false indicates write_after_commit // approach. write_after_commit_(!seq_per_batch), // WriteUnprepared can write WriteBatches per transaction, so // batch_per_txn being false indicates write_before_prepare. write_before_prepare_(!batch_per_txn), unprepared_batch_(false), duplicate_detector_(), dup_dectector_on_(false), hint_per_batch_(hint_per_batch), hint_created_(false) { assert(cf_mems_); } ~MemTableInserter() override { if (dup_dectector_on_) { reinterpret_cast (&duplicate_detector_)->~DuplicateDetector(); } if (post_info_created_) { reinterpret_cast (&mem_post_info_map_)->~MemPostInfoMap(); } if (hint_created_) { for (auto iter : GetHintMap()) { delete[] reinterpret_cast(iter.second); } reinterpret_cast(&hint_)->~HintMap(); } delete rebuilding_trx_; } MemTableInserter(const MemTableInserter&) = delete; MemTableInserter& operator=(const MemTableInserter&) = delete; // The batch seq is regularly restarted; In normal mode it is set when // MemTableInserter is constructed in the write thread and in recovery mode it // is set when a batch, which is tagged with seq, is read from the WAL. // Within a sequenced batch, which could be a merge of multiple batches, we // have two policies to advance the seq: i) seq_per_key (default) and ii) // seq_per_batch. To implement the latter we need to mark the boundary between // the individual batches. The approach is this: 1) Use the terminating // markers to indicate the boundary (kTypeEndPrepareXID, kTypeCommitXID, // kTypeRollbackXID) 2) Terminate a batch with kTypeNoop in the absence of a // natural boundary marker. void MaybeAdvanceSeq(bool batch_boundry = false) { if (batch_boundry == seq_per_batch_) { sequence_++; } } void set_log_number_ref(uint64_t log) { log_number_ref_ = log; } SequenceNumber sequence() const { return sequence_; } void PostProcess() { assert(concurrent_memtable_writes_); // If post info was not created there is nothing // to process and no need to create on demand if(post_info_created_) { for (auto& pair : GetPostMap()) { pair.first->BatchPostProcess(pair.second); } } } bool SeekToColumnFamily(uint32_t column_family_id, Status* s) { // If we are in a concurrent mode, it is the caller's responsibility // to clone the original ColumnFamilyMemTables so that each thread // has its own instance. Otherwise, it must be guaranteed that there // is no concurrent access bool found = cf_mems_->Seek(column_family_id); if (!found) { if (ignore_missing_column_families_) { *s = Status::OK(); } else { *s = Status::InvalidArgument( "Invalid column family specified in write batch"); } return false; } if (recovering_log_number_ != 0 && recovering_log_number_ < cf_mems_->GetLogNumber()) { // This is true only in recovery environment (recovering_log_number_ is // always 0 in // non-recovery, regular write code-path) // * If recovering_log_number_ < cf_mems_->GetLogNumber(), this means that // column // family already contains updates from this log. We can't apply updates // twice because of update-in-place or merge workloads -- ignore the // update *s = Status::OK(); return false; } if (has_valid_writes_ != nullptr) { *has_valid_writes_ = true; } if (log_number_ref_ > 0) { cf_mems_->GetMemTable()->RefLogContainingPrepSection(log_number_ref_); } return true; } Status PutCFImpl(uint32_t column_family_id, const Slice& key, const Slice& value, ValueType value_type) { // optimize for non-recovery mode if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) { WriteBatchInternal::Put(rebuilding_trx_, column_family_id, key, value); return Status::OK(); // else insert the values to the memtable right away } Status seek_status; if (UNLIKELY(!SeekToColumnFamily(column_family_id, &seek_status))) { bool batch_boundry = false; if (rebuilding_trx_ != nullptr) { assert(!write_after_commit_); // The CF is probably flushed and hence no need for insert but we still // need to keep track of the keys for upcoming rollback/commit. WriteBatchInternal::Put(rebuilding_trx_, column_family_id, key, value); batch_boundry = IsDuplicateKeySeq(column_family_id, key); } MaybeAdvanceSeq(batch_boundry); return seek_status; } Status ret_status; MemTable* mem = cf_mems_->GetMemTable(); auto* moptions = mem->GetImmutableMemTableOptions(); // inplace_update_support is inconsistent with snapshots, and therefore with // any kind of transactions including the ones that use seq_per_batch assert(!seq_per_batch_ || !moptions->inplace_update_support); if (!moptions->inplace_update_support) { bool mem_res = mem->Add(sequence_, value_type, key, value, concurrent_memtable_writes_, get_post_process_info(mem), hint_per_batch_ ? &GetHintMap()[mem] : nullptr); if (UNLIKELY(!mem_res)) { assert(seq_per_batch_); ret_status = Status::TryAgain("key+seq exists"); const bool BATCH_BOUNDRY = true; MaybeAdvanceSeq(BATCH_BOUNDRY); } } else if (moptions->inplace_callback == nullptr) { assert(!concurrent_memtable_writes_); mem->Update(sequence_, key, value); } else { assert(!concurrent_memtable_writes_); if (mem->UpdateCallback(sequence_, key, value)) { } else { // key not found in memtable. Do sst get, update, add SnapshotImpl read_from_snapshot; read_from_snapshot.number_ = sequence_; ReadOptions ropts; // it's going to be overwritten for sure, so no point caching data block // containing the old version ropts.fill_cache = false; ropts.snapshot = &read_from_snapshot; std::string prev_value; std::string merged_value; auto cf_handle = cf_mems_->GetColumnFamilyHandle(); Status s = Status::NotSupported(); if (db_ != nullptr && recovering_log_number_ == 0) { if (cf_handle == nullptr) { cf_handle = db_->DefaultColumnFamily(); } s = db_->Get(ropts, cf_handle, key, &prev_value); } char* prev_buffer = const_cast(prev_value.c_str()); uint32_t prev_size = static_cast(prev_value.size()); auto status = moptions->inplace_callback(s.ok() ? prev_buffer : nullptr, s.ok() ? &prev_size : nullptr, value, &merged_value); if (status == UpdateStatus::UPDATED_INPLACE) { // prev_value is updated in-place with final value. bool mem_res __attribute__((__unused__)); mem_res = mem->Add( sequence_, value_type, key, Slice(prev_buffer, prev_size)); assert(mem_res); RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN); } else if (status == UpdateStatus::UPDATED) { // merged_value contains the final value. bool mem_res __attribute__((__unused__)); mem_res = mem->Add(sequence_, value_type, key, Slice(merged_value)); assert(mem_res); RecordTick(moptions->statistics, NUMBER_KEYS_WRITTEN); } } } // optimize for non-recovery mode if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { assert(!write_after_commit_); // If the ret_status is TryAgain then let the next try to add the ky to // the rebuilding transaction object. WriteBatchInternal::Put(rebuilding_trx_, column_family_id, key, value); } // Since all Puts are logged in transaction logs (if enabled), always bump // sequence number. Even if the update eventually fails and does not result // in memtable add/update. MaybeAdvanceSeq(); CheckMemtableFull(); return ret_status; } Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& value) override { return PutCFImpl(column_family_id, key, value, kTypeValue); } Status DeleteImpl(uint32_t /*column_family_id*/, const Slice& key, const Slice& value, ValueType delete_type) { Status ret_status; MemTable* mem = cf_mems_->GetMemTable(); bool mem_res = mem->Add(sequence_, delete_type, key, value, concurrent_memtable_writes_, get_post_process_info(mem), hint_per_batch_ ? &GetHintMap()[mem] : nullptr); if (UNLIKELY(!mem_res)) { assert(seq_per_batch_); ret_status = Status::TryAgain("key+seq exists"); const bool BATCH_BOUNDRY = true; MaybeAdvanceSeq(BATCH_BOUNDRY); } MaybeAdvanceSeq(); CheckMemtableFull(); return ret_status; } Status DeleteCF(uint32_t column_family_id, const Slice& key) override { // optimize for non-recovery mode if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) { WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key); return Status::OK(); // else insert the values to the memtable right away } Status seek_status; if (UNLIKELY(!SeekToColumnFamily(column_family_id, &seek_status))) { bool batch_boundry = false; if (rebuilding_trx_ != nullptr) { assert(!write_after_commit_); // The CF is probably flushed and hence no need for insert but we still // need to keep track of the keys for upcoming rollback/commit. WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key); batch_boundry = IsDuplicateKeySeq(column_family_id, key); } MaybeAdvanceSeq(batch_boundry); return seek_status; } ColumnFamilyData* cfd = cf_mems_->current(); assert(!cfd || cfd->user_comparator()); const size_t ts_sz = (cfd && cfd->user_comparator()) ? cfd->user_comparator()->timestamp_size() : 0; const ValueType delete_type = (0 == ts_sz) ? kTypeDeletion : kTypeDeletionWithTimestamp; auto ret_status = DeleteImpl(column_family_id, key, Slice(), delete_type); // optimize for non-recovery mode if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { assert(!write_after_commit_); // If the ret_status is TryAgain then let the next try to add the ky to // the rebuilding transaction object. WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key); } return ret_status; } Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override { // optimize for non-recovery mode if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) { WriteBatchInternal::SingleDelete(rebuilding_trx_, column_family_id, key); return Status::OK(); // else insert the values to the memtable right away } Status seek_status; if (UNLIKELY(!SeekToColumnFamily(column_family_id, &seek_status))) { bool batch_boundry = false; if (rebuilding_trx_ != nullptr) { assert(!write_after_commit_); // The CF is probably flushed and hence no need for insert but we still // need to keep track of the keys for upcoming rollback/commit. WriteBatchInternal::SingleDelete(rebuilding_trx_, column_family_id, key); batch_boundry = IsDuplicateKeySeq(column_family_id, key); } MaybeAdvanceSeq(batch_boundry); return seek_status; } auto ret_status = DeleteImpl(column_family_id, key, Slice(), kTypeSingleDeletion); // optimize for non-recovery mode if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { assert(!write_after_commit_); // If the ret_status is TryAgain then let the next try to add the ky to // the rebuilding transaction object. WriteBatchInternal::SingleDelete(rebuilding_trx_, column_family_id, key); } return ret_status; } Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key, const Slice& end_key) override { // optimize for non-recovery mode if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) { WriteBatchInternal::DeleteRange(rebuilding_trx_, column_family_id, begin_key, end_key); return Status::OK(); // else insert the values to the memtable right away } Status seek_status; if (UNLIKELY(!SeekToColumnFamily(column_family_id, &seek_status))) { bool batch_boundry = false; if (rebuilding_trx_ != nullptr) { assert(!write_after_commit_); // The CF is probably flushed and hence no need for insert but we still // need to keep track of the keys for upcoming rollback/commit. WriteBatchInternal::DeleteRange(rebuilding_trx_, column_family_id, begin_key, end_key); // TODO(myabandeh): when transactional DeleteRange support is added, // check if end_key must also be added. batch_boundry = IsDuplicateKeySeq(column_family_id, begin_key); } MaybeAdvanceSeq(batch_boundry); return seek_status; } if (db_ != nullptr) { auto cf_handle = cf_mems_->GetColumnFamilyHandle(); if (cf_handle == nullptr) { cf_handle = db_->DefaultColumnFamily(); } auto* cfd = reinterpret_cast(cf_handle)->cfd(); if (!cfd->is_delete_range_supported()) { return Status::NotSupported( std::string("DeleteRange not supported for table type ") + cfd->ioptions()->table_factory->Name() + " in CF " + cfd->GetName()); } int cmp = cfd->user_comparator()->Compare(begin_key, end_key); if (cmp > 0) { // It's an empty range where endpoints appear mistaken. Don't bother // applying it to the DB, and return an error to the user. return Status::InvalidArgument("end key comes before start key"); } else if (cmp == 0) { // It's an empty range. Don't bother applying it to the DB. return Status::OK(); } } auto ret_status = DeleteImpl(column_family_id, begin_key, end_key, kTypeRangeDeletion); // optimize for non-recovery mode if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { assert(!write_after_commit_); // If the ret_status is TryAgain then let the next try to add the ky to // the rebuilding transaction object. WriteBatchInternal::DeleteRange(rebuilding_trx_, column_family_id, begin_key, end_key); } return ret_status; } Status MergeCF(uint32_t column_family_id, const Slice& key, const Slice& value) override { // optimize for non-recovery mode if (UNLIKELY(write_after_commit_ && rebuilding_trx_ != nullptr)) { WriteBatchInternal::Merge(rebuilding_trx_, column_family_id, key, value); return Status::OK(); // else insert the values to the memtable right away } Status seek_status; if (UNLIKELY(!SeekToColumnFamily(column_family_id, &seek_status))) { bool batch_boundry = false; if (rebuilding_trx_ != nullptr) { assert(!write_after_commit_); // The CF is probably flushed and hence no need for insert but we still // need to keep track of the keys for upcoming rollback/commit. WriteBatchInternal::Merge(rebuilding_trx_, column_family_id, key, value); batch_boundry = IsDuplicateKeySeq(column_family_id, key); } MaybeAdvanceSeq(batch_boundry); return seek_status; } Status ret_status; MemTable* mem = cf_mems_->GetMemTable(); auto* moptions = mem->GetImmutableMemTableOptions(); bool perform_merge = false; assert(!concurrent_memtable_writes_ || moptions->max_successive_merges == 0); // If we pass DB through and options.max_successive_merges is hit // during recovery, Get() will be issued which will try to acquire // DB mutex and cause deadlock, as DB mutex is already held. // So we disable merge in recovery if (moptions->max_successive_merges > 0 && db_ != nullptr && recovering_log_number_ == 0) { assert(!concurrent_memtable_writes_); LookupKey lkey(key, sequence_); // Count the number of successive merges at the head // of the key in the memtable size_t num_merges = mem->CountSuccessiveMergeEntries(lkey); if (num_merges >= moptions->max_successive_merges) { perform_merge = true; } } if (perform_merge) { // 1) Get the existing value std::string get_value; // Pass in the sequence number so that we also include previous merge // operations in the same batch. SnapshotImpl read_from_snapshot; read_from_snapshot.number_ = sequence_; ReadOptions read_options; read_options.snapshot = &read_from_snapshot; auto cf_handle = cf_mems_->GetColumnFamilyHandle(); if (cf_handle == nullptr) { cf_handle = db_->DefaultColumnFamily(); } db_->Get(read_options, cf_handle, key, &get_value); Slice get_value_slice = Slice(get_value); // 2) Apply this merge auto merge_operator = moptions->merge_operator; assert(merge_operator); std::string new_value; Status merge_status = MergeHelper::TimedFullMerge( merge_operator, key, &get_value_slice, {value}, &new_value, moptions->info_log, moptions->statistics, Env::Default()); if (!merge_status.ok()) { // Failed to merge! // Store the delta in memtable perform_merge = false; } else { // 3) Add value to memtable assert(!concurrent_memtable_writes_); bool mem_res = mem->Add(sequence_, kTypeValue, key, new_value); if (UNLIKELY(!mem_res)) { assert(seq_per_batch_); ret_status = Status::TryAgain("key+seq exists"); const bool BATCH_BOUNDRY = true; MaybeAdvanceSeq(BATCH_BOUNDRY); } } } if (!perform_merge) { // Add merge operator to memtable bool mem_res = mem->Add(sequence_, kTypeMerge, key, value, concurrent_memtable_writes_, get_post_process_info(mem)); if (UNLIKELY(!mem_res)) { assert(seq_per_batch_); ret_status = Status::TryAgain("key+seq exists"); const bool BATCH_BOUNDRY = true; MaybeAdvanceSeq(BATCH_BOUNDRY); } } // optimize for non-recovery mode if (UNLIKELY(!ret_status.IsTryAgain() && rebuilding_trx_ != nullptr)) { assert(!write_after_commit_); // If the ret_status is TryAgain then let the next try to add the ky to // the rebuilding transaction object. WriteBatchInternal::Merge(rebuilding_trx_, column_family_id, key, value); } MaybeAdvanceSeq(); CheckMemtableFull(); return ret_status; } Status PutBlobIndexCF(uint32_t column_family_id, const Slice& key, const Slice& value) override { // Same as PutCF except for value type. return PutCFImpl(column_family_id, key, value, kTypeBlobIndex); } void CheckMemtableFull() { if (flush_scheduler_ != nullptr) { auto* cfd = cf_mems_->current(); assert(cfd != nullptr); if (cfd->mem()->ShouldScheduleFlush() && cfd->mem()->MarkFlushScheduled()) { // MarkFlushScheduled only returns true if we are the one that // should take action, so no need to dedup further flush_scheduler_->ScheduleWork(cfd); } } // check if memtable_list size exceeds max_write_buffer_size_to_maintain if (trim_history_scheduler_ != nullptr) { auto* cfd = cf_mems_->current(); assert(cfd); assert(cfd->ioptions()); const size_t size_to_maintain = static_cast( cfd->ioptions()->max_write_buffer_size_to_maintain); if (size_to_maintain > 0) { MemTableList* const imm = cfd->imm(); assert(imm); if (imm->HasHistory()) { const MemTable* const mem = cfd->mem(); assert(mem); if (mem->ApproximateMemoryUsageFast() + imm->ApproximateMemoryUsageExcludingLast() >= size_to_maintain && imm->MarkTrimHistoryNeeded()) { trim_history_scheduler_->ScheduleWork(cfd); } } } } } // The write batch handler calls MarkBeginPrepare with unprepare set to true // if it encounters the kTypeBeginUnprepareXID marker. Status MarkBeginPrepare(bool unprepare) override { assert(rebuilding_trx_ == nullptr); assert(db_); if (recovering_log_number_ != 0) { // during recovery we rebuild a hollow transaction // from all encountered prepare sections of the wal if (db_->allow_2pc() == false) { return Status::NotSupported( "WAL contains prepared transactions. Open with " "TransactionDB::Open()."); } // we are now iterating through a prepared section rebuilding_trx_ = new WriteBatch(); rebuilding_trx_seq_ = sequence_; // Verify that we have matching MarkBeginPrepare/MarkEndPrepare markers. // unprepared_batch_ should be false because it is false by default, and // gets reset to false in MarkEndPrepare. assert(!unprepared_batch_); unprepared_batch_ = unprepare; if (has_valid_writes_ != nullptr) { *has_valid_writes_ = true; } } return Status::OK(); } Status MarkEndPrepare(const Slice& name) override { assert(db_); assert((rebuilding_trx_ != nullptr) == (recovering_log_number_ != 0)); if (recovering_log_number_ != 0) { assert(db_->allow_2pc()); size_t batch_cnt = write_after_commit_ ? 0 // 0 will disable further checks : static_cast(sequence_ - rebuilding_trx_seq_ + 1); db_->InsertRecoveredTransaction(recovering_log_number_, name.ToString(), rebuilding_trx_, rebuilding_trx_seq_, batch_cnt, unprepared_batch_); unprepared_batch_ = false; rebuilding_trx_ = nullptr; } else { assert(rebuilding_trx_ == nullptr); } const bool batch_boundry = true; MaybeAdvanceSeq(batch_boundry); return Status::OK(); } Status MarkNoop(bool empty_batch) override { // A hack in pessimistic transaction could result into a noop at the start // of the write batch, that should be ignored. if (!empty_batch) { // In the absence of Prepare markers, a kTypeNoop tag indicates the end of // a batch. This happens when write batch commits skipping the prepare // phase. const bool batch_boundry = true; MaybeAdvanceSeq(batch_boundry); } return Status::OK(); } Status MarkCommit(const Slice& name) override { assert(db_); Status s; if (recovering_log_number_ != 0) { // in recovery when we encounter a commit marker // we lookup this transaction in our set of rebuilt transactions // and commit. auto trx = db_->GetRecoveredTransaction(name.ToString()); // the log containing the prepared section may have // been released in the last incarnation because the // data was flushed to L0 if (trx != nullptr) { // at this point individual CF lognumbers will prevent // duplicate re-insertion of values. assert(log_number_ref_ == 0); if (write_after_commit_) { // write_after_commit_ can only have one batch in trx. assert(trx->batches_.size() == 1); const auto& batch_info = trx->batches_.begin()->second; // all inserts must reference this trx log number log_number_ref_ = batch_info.log_number_; s = batch_info.batch_->Iterate(this); log_number_ref_ = 0; } // else the values are already inserted before the commit if (s.ok()) { db_->DeleteRecoveredTransaction(name.ToString()); } if (has_valid_writes_ != nullptr) { *has_valid_writes_ = true; } } } else { // When writes are not delayed until commit, there is no disconnect // between a memtable write and the WAL that supports it. So the commit // need not reference any log as the only log to which it depends. assert(!write_after_commit_ || log_number_ref_ > 0); } const bool batch_boundry = true; MaybeAdvanceSeq(batch_boundry); return s; } Status MarkRollback(const Slice& name) override { assert(db_); if (recovering_log_number_ != 0) { auto trx = db_->GetRecoveredTransaction(name.ToString()); // the log containing the transactions prep section // may have been released in the previous incarnation // because we knew it had been rolled back if (trx != nullptr) { db_->DeleteRecoveredTransaction(name.ToString()); } } else { // in non recovery we simply ignore this tag } const bool batch_boundry = true; MaybeAdvanceSeq(batch_boundry); return Status::OK(); } private: MemTablePostProcessInfo* get_post_process_info(MemTable* mem) { if (!concurrent_memtable_writes_) { // No need to batch counters locally if we don't use concurrent mode. return nullptr; } return &GetPostMap()[mem]; } }; // This function can only be called in these conditions: // 1) During Recovery() // 2) During Write(), in a single-threaded write thread // 3) During Write(), in a concurrent context where memtables has been cloned // The reason is that it calls memtables->Seek(), which has a stateful cache Status WriteBatchInternal::InsertInto( WriteThread::WriteGroup& write_group, SequenceNumber sequence, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, TrimHistoryScheduler* trim_history_scheduler, bool ignore_missing_column_families, uint64_t recovery_log_number, DB* db, bool concurrent_memtable_writes, bool seq_per_batch, bool batch_per_txn) { MemTableInserter inserter( sequence, memtables, flush_scheduler, trim_history_scheduler, ignore_missing_column_families, recovery_log_number, db, concurrent_memtable_writes, nullptr /*has_valid_writes*/, seq_per_batch, batch_per_txn); for (auto w : write_group) { if (w->CallbackFailed()) { continue; } w->sequence = inserter.sequence(); if (!w->ShouldWriteToMemtable()) { // In seq_per_batch_ mode this advances the seq by one. inserter.MaybeAdvanceSeq(true); continue; } SetSequence(w->batch, inserter.sequence()); inserter.set_log_number_ref(w->log_ref); w->status = w->batch->Iterate(&inserter); if (!w->status.ok()) { return w->status; } assert(!seq_per_batch || w->batch_cnt != 0); assert(!seq_per_batch || inserter.sequence() - w->sequence == w->batch_cnt); } return Status::OK(); } Status WriteBatchInternal::InsertInto( WriteThread::Writer* writer, SequenceNumber sequence, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, TrimHistoryScheduler* trim_history_scheduler, bool ignore_missing_column_families, uint64_t log_number, DB* db, bool concurrent_memtable_writes, bool seq_per_batch, size_t batch_cnt, bool batch_per_txn, bool hint_per_batch) { #ifdef NDEBUG (void)batch_cnt; #endif assert(writer->ShouldWriteToMemtable()); MemTableInserter inserter( sequence, memtables, flush_scheduler, trim_history_scheduler, ignore_missing_column_families, log_number, db, concurrent_memtable_writes, nullptr /*has_valid_writes*/, seq_per_batch, batch_per_txn, hint_per_batch); SetSequence(writer->batch, sequence); inserter.set_log_number_ref(writer->log_ref); Status s = writer->batch->Iterate(&inserter); assert(!seq_per_batch || batch_cnt != 0); assert(!seq_per_batch || inserter.sequence() - sequence == batch_cnt); if (concurrent_memtable_writes) { inserter.PostProcess(); } return s; } Status WriteBatchInternal::InsertInto( const WriteBatch* batch, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, TrimHistoryScheduler* trim_history_scheduler, bool ignore_missing_column_families, uint64_t log_number, DB* db, bool concurrent_memtable_writes, SequenceNumber* next_seq, bool* has_valid_writes, bool seq_per_batch, bool batch_per_txn) { MemTableInserter inserter(Sequence(batch), memtables, flush_scheduler, trim_history_scheduler, ignore_missing_column_families, log_number, db, concurrent_memtable_writes, has_valid_writes, seq_per_batch, batch_per_txn); Status s = batch->Iterate(&inserter); if (next_seq != nullptr) { *next_seq = inserter.sequence(); } if (concurrent_memtable_writes) { inserter.PostProcess(); } return s; } Status WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) { assert(contents.size() >= WriteBatchInternal::kHeader); b->rep_.assign(contents.data(), contents.size()); b->content_flags_.store(ContentFlags::DEFERRED, std::memory_order_relaxed); return Status::OK(); } Status WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src, const bool wal_only) { size_t src_len; int src_count; uint32_t src_flags; const SavePoint& batch_end = src->GetWalTerminationPoint(); if (wal_only && !batch_end.is_cleared()) { src_len = batch_end.size - WriteBatchInternal::kHeader; src_count = batch_end.count; src_flags = batch_end.content_flags; } else { src_len = src->rep_.size() - WriteBatchInternal::kHeader; src_count = Count(src); src_flags = src->content_flags_.load(std::memory_order_relaxed); } SetCount(dst, Count(dst) + src_count); assert(src->rep_.size() >= WriteBatchInternal::kHeader); dst->rep_.append(src->rep_.data() + WriteBatchInternal::kHeader, src_len); dst->content_flags_.store( dst->content_flags_.load(std::memory_order_relaxed) | src_flags, std::memory_order_relaxed); return Status::OK(); } size_t WriteBatchInternal::AppendedByteSize(size_t leftByteSize, size_t rightByteSize) { if (leftByteSize == 0 || rightByteSize == 0) { return leftByteSize + rightByteSize; } else { return leftByteSize + rightByteSize - WriteBatchInternal::kHeader; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/write_batch_base.cc000066400000000000000000000061271370372246700174340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/write_batch_base.h" #include #include "rocksdb/slice.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { // Simple implementation of SlicePart variants of Put(). Child classes // can override these method with more performant solutions if they choose. Status WriteBatchBase::Put(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) { std::string key_buf, value_buf; Slice key_slice(key, &key_buf); Slice value_slice(value, &value_buf); return Put(column_family, key_slice, value_slice); } Status WriteBatchBase::Put(const SliceParts& key, const SliceParts& value) { std::string key_buf, value_buf; Slice key_slice(key, &key_buf); Slice value_slice(value, &value_buf); return Put(key_slice, value_slice); } Status WriteBatchBase::Delete(ColumnFamilyHandle* column_family, const SliceParts& key) { std::string key_buf; Slice key_slice(key, &key_buf); return Delete(column_family, key_slice); } Status WriteBatchBase::Delete(const SliceParts& key) { std::string key_buf; Slice key_slice(key, &key_buf); return Delete(key_slice); } Status WriteBatchBase::SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key) { std::string key_buf; Slice key_slice(key, &key_buf); return SingleDelete(column_family, key_slice); } Status WriteBatchBase::SingleDelete(const SliceParts& key) { std::string key_buf; Slice key_slice(key, &key_buf); return SingleDelete(key_slice); } Status WriteBatchBase::DeleteRange(ColumnFamilyHandle* column_family, const SliceParts& begin_key, const SliceParts& end_key) { std::string begin_key_buf, end_key_buf; Slice begin_key_slice(begin_key, &begin_key_buf); Slice end_key_slice(end_key, &end_key_buf); return DeleteRange(column_family, begin_key_slice, end_key_slice); } Status WriteBatchBase::DeleteRange(const SliceParts& begin_key, const SliceParts& end_key) { std::string begin_key_buf, end_key_buf; Slice begin_key_slice(begin_key, &begin_key_buf); Slice end_key_slice(end_key, &end_key_buf); return DeleteRange(begin_key_slice, end_key_slice); } Status WriteBatchBase::Merge(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) { std::string key_buf, value_buf; Slice key_slice(key, &key_buf); Slice value_slice(value, &value_buf); return Merge(column_family, key_slice, value_slice); } Status WriteBatchBase::Merge(const SliceParts& key, const SliceParts& value) { std::string key_buf, value_buf; Slice key_slice(key, &key_buf); Slice value_slice(value, &value_buf); return Merge(key_slice, value_slice); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/write_batch_internal.h000066400000000000000000000223111370372246700201710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "db/flush_scheduler.h" #include "db/trim_history_scheduler.h" #include "db/write_thread.h" #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/types.h" #include "rocksdb/write_batch.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class MemTable; class FlushScheduler; class ColumnFamilyData; class ColumnFamilyMemTables { public: virtual ~ColumnFamilyMemTables() {} virtual bool Seek(uint32_t column_family_id) = 0; // returns true if the update to memtable should be ignored // (useful when recovering from log whose updates have already // been processed) virtual uint64_t GetLogNumber() const = 0; virtual MemTable* GetMemTable() const = 0; virtual ColumnFamilyHandle* GetColumnFamilyHandle() = 0; virtual ColumnFamilyData* current() { return nullptr; } }; class ColumnFamilyMemTablesDefault : public ColumnFamilyMemTables { public: explicit ColumnFamilyMemTablesDefault(MemTable* mem) : ok_(false), mem_(mem) {} bool Seek(uint32_t column_family_id) override { ok_ = (column_family_id == 0); return ok_; } uint64_t GetLogNumber() const override { return 0; } MemTable* GetMemTable() const override { assert(ok_); return mem_; } ColumnFamilyHandle* GetColumnFamilyHandle() override { return nullptr; } private: bool ok_; MemTable* mem_; }; // WriteBatchInternal provides static methods for manipulating a // WriteBatch that we don't want in the public WriteBatch interface. class WriteBatchInternal { public: // WriteBatch header has an 8-byte sequence number followed by a 4-byte count. static const size_t kHeader = 12; // WriteBatch methods with column_family_id instead of ColumnFamilyHandle* static Status Put(WriteBatch* batch, uint32_t column_family_id, const Slice& key, const Slice& value); static Status Put(WriteBatch* batch, uint32_t column_family_id, const SliceParts& key, const SliceParts& value); static Status Delete(WriteBatch* batch, uint32_t column_family_id, const SliceParts& key); static Status Delete(WriteBatch* batch, uint32_t column_family_id, const Slice& key); static Status SingleDelete(WriteBatch* batch, uint32_t column_family_id, const SliceParts& key); static Status SingleDelete(WriteBatch* batch, uint32_t column_family_id, const Slice& key); static Status DeleteRange(WriteBatch* b, uint32_t column_family_id, const Slice& begin_key, const Slice& end_key); static Status DeleteRange(WriteBatch* b, uint32_t column_family_id, const SliceParts& begin_key, const SliceParts& end_key); static Status Merge(WriteBatch* batch, uint32_t column_family_id, const Slice& key, const Slice& value); static Status Merge(WriteBatch* batch, uint32_t column_family_id, const SliceParts& key, const SliceParts& value); static Status PutBlobIndex(WriteBatch* batch, uint32_t column_family_id, const Slice& key, const Slice& value); static Status MarkEndPrepare(WriteBatch* batch, const Slice& xid, const bool write_after_commit = true, const bool unprepared_batch = false); static Status MarkRollback(WriteBatch* batch, const Slice& xid); static Status MarkCommit(WriteBatch* batch, const Slice& xid); static Status InsertNoop(WriteBatch* batch); // Return the number of entries in the batch. static uint32_t Count(const WriteBatch* batch); // Set the count for the number of entries in the batch. static void SetCount(WriteBatch* batch, uint32_t n); // Return the sequence number for the start of this batch. static SequenceNumber Sequence(const WriteBatch* batch); // Store the specified number as the sequence number for the start of // this batch. static void SetSequence(WriteBatch* batch, SequenceNumber seq); // Returns the offset of the first entry in the batch. // This offset is only valid if the batch is not empty. static size_t GetFirstOffset(WriteBatch* batch); static Slice Contents(const WriteBatch* batch) { return Slice(batch->rep_); } static size_t ByteSize(const WriteBatch* batch) { return batch->rep_.size(); } static Status SetContents(WriteBatch* batch, const Slice& contents); static Status CheckSlicePartsLength(const SliceParts& key, const SliceParts& value); // Inserts batches[i] into memtable, for i in 0..num_batches-1 inclusive. // // If ignore_missing_column_families == true. WriteBatch // referencing non-existing column family will be ignored. // If ignore_missing_column_families == false, processing of the // batches will be stopped if a reference is found to a non-existing // column family and InvalidArgument() will be returned. The writes // in batches may be only partially applied at that point. // // If log_number is non-zero, the memtable will be updated only if // memtables->GetLogNumber() >= log_number. // // If flush_scheduler is non-null, it will be invoked if the memtable // should be flushed. // // Under concurrent use, the caller is responsible for making sure that // the memtables object itself is thread-local. static Status InsertInto( WriteThread::WriteGroup& write_group, SequenceNumber sequence, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, TrimHistoryScheduler* trim_history_scheduler, bool ignore_missing_column_families = false, uint64_t log_number = 0, DB* db = nullptr, bool concurrent_memtable_writes = false, bool seq_per_batch = false, bool batch_per_txn = true); // Convenience form of InsertInto when you have only one batch // next_seq returns the seq after last sequence number used in MemTable insert static Status InsertInto( const WriteBatch* batch, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, TrimHistoryScheduler* trim_history_scheduler, bool ignore_missing_column_families = false, uint64_t log_number = 0, DB* db = nullptr, bool concurrent_memtable_writes = false, SequenceNumber* next_seq = nullptr, bool* has_valid_writes = nullptr, bool seq_per_batch = false, bool batch_per_txn = true); static Status InsertInto(WriteThread::Writer* writer, SequenceNumber sequence, ColumnFamilyMemTables* memtables, FlushScheduler* flush_scheduler, TrimHistoryScheduler* trim_history_scheduler, bool ignore_missing_column_families = false, uint64_t log_number = 0, DB* db = nullptr, bool concurrent_memtable_writes = false, bool seq_per_batch = false, size_t batch_cnt = 0, bool batch_per_txn = true, bool hint_per_batch = false); static Status Append(WriteBatch* dst, const WriteBatch* src, const bool WAL_only = false); // Returns the byte size of appending a WriteBatch with ByteSize // leftByteSize and a WriteBatch with ByteSize rightByteSize static size_t AppendedByteSize(size_t leftByteSize, size_t rightByteSize); // Iterate over [begin, end) range of a write batch static Status Iterate(const WriteBatch* wb, WriteBatch::Handler* handler, size_t begin, size_t end); // This write batch includes the latest state that should be persisted. Such // state meant to be used only during recovery. static void SetAsLastestPersistentState(WriteBatch* b); static bool IsLatestPersistentState(const WriteBatch* b); }; // LocalSavePoint is similar to a scope guard class LocalSavePoint { public: explicit LocalSavePoint(WriteBatch* batch) : batch_(batch), savepoint_(batch->GetDataSize(), batch->Count(), batch->content_flags_.load(std::memory_order_relaxed)) #ifndef NDEBUG , committed_(false) #endif { } #ifndef NDEBUG ~LocalSavePoint() { assert(committed_); } #endif Status commit() { #ifndef NDEBUG committed_ = true; #endif if (batch_->max_bytes_ && batch_->rep_.size() > batch_->max_bytes_) { batch_->rep_.resize(savepoint_.size); WriteBatchInternal::SetCount(batch_, savepoint_.count); batch_->content_flags_.store(savepoint_.content_flags, std::memory_order_relaxed); return Status::MemoryLimit(); } return Status::OK(); } private: WriteBatch* batch_; SavePoint savepoint_; #ifndef NDEBUG bool committed_; #endif }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/write_batch_test.cc000066400000000000000000000641331370372246700175020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/db.h" #include #include "db/column_family.h" #include "db/memtable.h" #include "db/write_batch_internal.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "rocksdb/write_buffer_manager.h" #include "table/scoped_arena_iterator.h" #include "test_util/testharness.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { static std::string PrintContents(WriteBatch* b) { InternalKeyComparator cmp(BytewiseComparator()); auto factory = std::make_shared(); Options options; options.memtable_factory = factory; ImmutableCFOptions ioptions(options); WriteBufferManager wb(options.db_write_buffer_size); MemTable* mem = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); mem->Ref(); std::string state; ColumnFamilyMemTablesDefault cf_mems_default(mem); Status s = WriteBatchInternal::InsertInto(b, &cf_mems_default, nullptr, nullptr); uint32_t count = 0; int put_count = 0; int delete_count = 0; int single_delete_count = 0; int delete_range_count = 0; int merge_count = 0; for (int i = 0; i < 2; ++i) { Arena arena; ScopedArenaIterator arena_iter_guard; std::unique_ptr iter_guard; InternalIterator* iter; if (i == 0) { iter = mem->NewIterator(ReadOptions(), &arena); arena_iter_guard.set(iter); } else { iter = mem->NewRangeTombstoneIterator(ReadOptions(), kMaxSequenceNumber /* read_seq */); iter_guard.reset(iter); } if (iter == nullptr) { continue; } for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ParsedInternalKey ikey; ikey.clear(); EXPECT_TRUE(ParseInternalKey(iter->key(), &ikey)); switch (ikey.type) { case kTypeValue: state.append("Put("); state.append(ikey.user_key.ToString()); state.append(", "); state.append(iter->value().ToString()); state.append(")"); count++; put_count++; break; case kTypeDeletion: state.append("Delete("); state.append(ikey.user_key.ToString()); state.append(")"); count++; delete_count++; break; case kTypeSingleDeletion: state.append("SingleDelete("); state.append(ikey.user_key.ToString()); state.append(")"); count++; single_delete_count++; break; case kTypeRangeDeletion: state.append("DeleteRange("); state.append(ikey.user_key.ToString()); state.append(", "); state.append(iter->value().ToString()); state.append(")"); count++; delete_range_count++; break; case kTypeMerge: state.append("Merge("); state.append(ikey.user_key.ToString()); state.append(", "); state.append(iter->value().ToString()); state.append(")"); count++; merge_count++; break; default: assert(false); break; } state.append("@"); state.append(NumberToString(ikey.sequence)); } } EXPECT_EQ(b->HasPut(), put_count > 0); EXPECT_EQ(b->HasDelete(), delete_count > 0); EXPECT_EQ(b->HasSingleDelete(), single_delete_count > 0); EXPECT_EQ(b->HasDeleteRange(), delete_range_count > 0); EXPECT_EQ(b->HasMerge(), merge_count > 0); if (!s.ok()) { state.append(s.ToString()); } else if (count != WriteBatchInternal::Count(b)) { state.append("CountMismatch()"); } delete mem->Unref(); return state; } class WriteBatchTest : public testing::Test {}; TEST_F(WriteBatchTest, Empty) { WriteBatch batch; ASSERT_EQ("", PrintContents(&batch)); ASSERT_EQ(0u, WriteBatchInternal::Count(&batch)); ASSERT_EQ(0u, batch.Count()); } TEST_F(WriteBatchTest, Multiple) { WriteBatch batch; batch.Put(Slice("foo"), Slice("bar")); batch.Delete(Slice("box")); batch.DeleteRange(Slice("bar"), Slice("foo")); batch.Put(Slice("baz"), Slice("boo")); WriteBatchInternal::SetSequence(&batch, 100); ASSERT_EQ(100U, WriteBatchInternal::Sequence(&batch)); ASSERT_EQ(4u, WriteBatchInternal::Count(&batch)); ASSERT_EQ( "Put(baz, boo)@103" "Delete(box)@101" "Put(foo, bar)@100" "DeleteRange(bar, foo)@102", PrintContents(&batch)); ASSERT_EQ(4u, batch.Count()); } TEST_F(WriteBatchTest, Corruption) { WriteBatch batch; batch.Put(Slice("foo"), Slice("bar")); batch.Delete(Slice("box")); WriteBatchInternal::SetSequence(&batch, 200); Slice contents = WriteBatchInternal::Contents(&batch); WriteBatchInternal::SetContents(&batch, Slice(contents.data(),contents.size()-1)); ASSERT_EQ("Put(foo, bar)@200" "Corruption: bad WriteBatch Delete", PrintContents(&batch)); } TEST_F(WriteBatchTest, Append) { WriteBatch b1, b2; WriteBatchInternal::SetSequence(&b1, 200); WriteBatchInternal::SetSequence(&b2, 300); WriteBatchInternal::Append(&b1, &b2); ASSERT_EQ("", PrintContents(&b1)); ASSERT_EQ(0u, b1.Count()); b2.Put("a", "va"); WriteBatchInternal::Append(&b1, &b2); ASSERT_EQ("Put(a, va)@200", PrintContents(&b1)); ASSERT_EQ(1u, b1.Count()); b2.Clear(); b2.Put("b", "vb"); WriteBatchInternal::Append(&b1, &b2); ASSERT_EQ("Put(a, va)@200" "Put(b, vb)@201", PrintContents(&b1)); ASSERT_EQ(2u, b1.Count()); b2.Delete("foo"); WriteBatchInternal::Append(&b1, &b2); ASSERT_EQ("Put(a, va)@200" "Put(b, vb)@202" "Put(b, vb)@201" "Delete(foo)@203", PrintContents(&b1)); ASSERT_EQ(4u, b1.Count()); b2.Clear(); b2.Put("c", "cc"); b2.Put("d", "dd"); b2.MarkWalTerminationPoint(); b2.Put("e", "ee"); WriteBatchInternal::Append(&b1, &b2, /*wal only*/ true); ASSERT_EQ( "Put(a, va)@200" "Put(b, vb)@202" "Put(b, vb)@201" "Put(c, cc)@204" "Put(d, dd)@205" "Delete(foo)@203", PrintContents(&b1)); ASSERT_EQ(6u, b1.Count()); ASSERT_EQ( "Put(c, cc)@0" "Put(d, dd)@1" "Put(e, ee)@2", PrintContents(&b2)); ASSERT_EQ(3u, b2.Count()); } TEST_F(WriteBatchTest, SingleDeletion) { WriteBatch batch; WriteBatchInternal::SetSequence(&batch, 100); ASSERT_EQ("", PrintContents(&batch)); ASSERT_EQ(0u, batch.Count()); batch.Put("a", "va"); ASSERT_EQ("Put(a, va)@100", PrintContents(&batch)); ASSERT_EQ(1u, batch.Count()); batch.SingleDelete("a"); ASSERT_EQ( "SingleDelete(a)@101" "Put(a, va)@100", PrintContents(&batch)); ASSERT_EQ(2u, batch.Count()); } namespace { struct TestHandler : public WriteBatch::Handler { std::string seen; Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& value) override { if (column_family_id == 0) { seen += "Put(" + key.ToString() + ", " + value.ToString() + ")"; } else { seen += "PutCF(" + ToString(column_family_id) + ", " + key.ToString() + ", " + value.ToString() + ")"; } return Status::OK(); } Status DeleteCF(uint32_t column_family_id, const Slice& key) override { if (column_family_id == 0) { seen += "Delete(" + key.ToString() + ")"; } else { seen += "DeleteCF(" + ToString(column_family_id) + ", " + key.ToString() + ")"; } return Status::OK(); } Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override { if (column_family_id == 0) { seen += "SingleDelete(" + key.ToString() + ")"; } else { seen += "SingleDeleteCF(" + ToString(column_family_id) + ", " + key.ToString() + ")"; } return Status::OK(); } Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key, const Slice& end_key) override { if (column_family_id == 0) { seen += "DeleteRange(" + begin_key.ToString() + ", " + end_key.ToString() + ")"; } else { seen += "DeleteRangeCF(" + ToString(column_family_id) + ", " + begin_key.ToString() + ", " + end_key.ToString() + ")"; } return Status::OK(); } Status MergeCF(uint32_t column_family_id, const Slice& key, const Slice& value) override { if (column_family_id == 0) { seen += "Merge(" + key.ToString() + ", " + value.ToString() + ")"; } else { seen += "MergeCF(" + ToString(column_family_id) + ", " + key.ToString() + ", " + value.ToString() + ")"; } return Status::OK(); } void LogData(const Slice& blob) override { seen += "LogData(" + blob.ToString() + ")"; } Status MarkBeginPrepare(bool unprepare) override { seen += "MarkBeginPrepare(" + std::string(unprepare ? "true" : "false") + ")"; return Status::OK(); } Status MarkEndPrepare(const Slice& xid) override { seen += "MarkEndPrepare(" + xid.ToString() + ")"; return Status::OK(); } Status MarkNoop(bool empty_batch) override { seen += "MarkNoop(" + std::string(empty_batch ? "true" : "false") + ")"; return Status::OK(); } Status MarkCommit(const Slice& xid) override { seen += "MarkCommit(" + xid.ToString() + ")"; return Status::OK(); } Status MarkRollback(const Slice& xid) override { seen += "MarkRollback(" + xid.ToString() + ")"; return Status::OK(); } }; } TEST_F(WriteBatchTest, PutNotImplemented) { WriteBatch batch; batch.Put(Slice("k1"), Slice("v1")); ASSERT_EQ(1u, batch.Count()); ASSERT_EQ("Put(k1, v1)@0", PrintContents(&batch)); WriteBatch::Handler handler; ASSERT_OK(batch.Iterate(&handler)); } TEST_F(WriteBatchTest, DeleteNotImplemented) { WriteBatch batch; batch.Delete(Slice("k2")); ASSERT_EQ(1u, batch.Count()); ASSERT_EQ("Delete(k2)@0", PrintContents(&batch)); WriteBatch::Handler handler; ASSERT_OK(batch.Iterate(&handler)); } TEST_F(WriteBatchTest, SingleDeleteNotImplemented) { WriteBatch batch; batch.SingleDelete(Slice("k2")); ASSERT_EQ(1u, batch.Count()); ASSERT_EQ("SingleDelete(k2)@0", PrintContents(&batch)); WriteBatch::Handler handler; ASSERT_OK(batch.Iterate(&handler)); } TEST_F(WriteBatchTest, MergeNotImplemented) { WriteBatch batch; batch.Merge(Slice("foo"), Slice("bar")); ASSERT_EQ(1u, batch.Count()); ASSERT_EQ("Merge(foo, bar)@0", PrintContents(&batch)); WriteBatch::Handler handler; ASSERT_OK(batch.Iterate(&handler)); } TEST_F(WriteBatchTest, Blob) { WriteBatch batch; batch.Put(Slice("k1"), Slice("v1")); batch.Put(Slice("k2"), Slice("v2")); batch.Put(Slice("k3"), Slice("v3")); batch.PutLogData(Slice("blob1")); batch.Delete(Slice("k2")); batch.SingleDelete(Slice("k3")); batch.PutLogData(Slice("blob2")); batch.Merge(Slice("foo"), Slice("bar")); ASSERT_EQ(6u, batch.Count()); ASSERT_EQ( "Merge(foo, bar)@5" "Put(k1, v1)@0" "Delete(k2)@3" "Put(k2, v2)@1" "SingleDelete(k3)@4" "Put(k3, v3)@2", PrintContents(&batch)); TestHandler handler; batch.Iterate(&handler); ASSERT_EQ( "Put(k1, v1)" "Put(k2, v2)" "Put(k3, v3)" "LogData(blob1)" "Delete(k2)" "SingleDelete(k3)" "LogData(blob2)" "Merge(foo, bar)", handler.seen); } TEST_F(WriteBatchTest, PrepareCommit) { WriteBatch batch; WriteBatchInternal::InsertNoop(&batch); batch.Put(Slice("k1"), Slice("v1")); batch.Put(Slice("k2"), Slice("v2")); batch.SetSavePoint(); WriteBatchInternal::MarkEndPrepare(&batch, Slice("xid1")); Status s = batch.RollbackToSavePoint(); ASSERT_EQ(s, Status::NotFound()); WriteBatchInternal::MarkCommit(&batch, Slice("xid1")); WriteBatchInternal::MarkRollback(&batch, Slice("xid1")); ASSERT_EQ(2u, batch.Count()); TestHandler handler; batch.Iterate(&handler); ASSERT_EQ( "MarkBeginPrepare(false)" "Put(k1, v1)" "Put(k2, v2)" "MarkEndPrepare(xid1)" "MarkCommit(xid1)" "MarkRollback(xid1)", handler.seen); } // It requires more than 30GB of memory to run the test. With single memory // allocation of more than 30GB. // Not all platform can run it. Also it runs a long time. So disable it. TEST_F(WriteBatchTest, DISABLED_ManyUpdates) { // Insert key and value of 3GB and push total batch size to 12GB. static const size_t kKeyValueSize = 4u; static const uint32_t kNumUpdates = uint32_t(3 << 30); std::string raw(kKeyValueSize, 'A'); WriteBatch batch(kNumUpdates * (4 + kKeyValueSize * 2) + 1024u); char c = 'A'; for (uint32_t i = 0; i < kNumUpdates; i++) { if (c > 'Z') { c = 'A'; } raw[0] = c; raw[raw.length() - 1] = c; c++; batch.Put(raw, raw); } ASSERT_EQ(kNumUpdates, batch.Count()); struct NoopHandler : public WriteBatch::Handler { uint32_t num_seen = 0; char expected_char = 'A'; Status PutCF(uint32_t /*column_family_id*/, const Slice& key, const Slice& value) override { EXPECT_EQ(kKeyValueSize, key.size()); EXPECT_EQ(kKeyValueSize, value.size()); EXPECT_EQ(expected_char, key[0]); EXPECT_EQ(expected_char, value[0]); EXPECT_EQ(expected_char, key[kKeyValueSize - 1]); EXPECT_EQ(expected_char, value[kKeyValueSize - 1]); expected_char++; if (expected_char > 'Z') { expected_char = 'A'; } ++num_seen; return Status::OK(); } Status DeleteCF(uint32_t /*column_family_id*/, const Slice& /*key*/) override { ADD_FAILURE(); return Status::OK(); } Status SingleDeleteCF(uint32_t /*column_family_id*/, const Slice& /*key*/) override { ADD_FAILURE(); return Status::OK(); } Status MergeCF(uint32_t /*column_family_id*/, const Slice& /*key*/, const Slice& /*value*/) override { ADD_FAILURE(); return Status::OK(); } void LogData(const Slice& /*blob*/) override { ADD_FAILURE(); } bool Continue() override { return num_seen < kNumUpdates; } } handler; batch.Iterate(&handler); ASSERT_EQ(kNumUpdates, handler.num_seen); } // The test requires more than 18GB memory to run it, with single memory // allocation of more than 12GB. Not all the platform can run it. So disable it. TEST_F(WriteBatchTest, DISABLED_LargeKeyValue) { // Insert key and value of 3GB and push total batch size to 12GB. static const size_t kKeyValueSize = 3221225472u; std::string raw(kKeyValueSize, 'A'); WriteBatch batch(size_t(12884901888ull + 1024u)); for (char i = 0; i < 2; i++) { raw[0] = 'A' + i; raw[raw.length() - 1] = 'A' - i; batch.Put(raw, raw); } ASSERT_EQ(2u, batch.Count()); struct NoopHandler : public WriteBatch::Handler { int num_seen = 0; Status PutCF(uint32_t /*column_family_id*/, const Slice& key, const Slice& value) override { EXPECT_EQ(kKeyValueSize, key.size()); EXPECT_EQ(kKeyValueSize, value.size()); EXPECT_EQ('A' + num_seen, key[0]); EXPECT_EQ('A' + num_seen, value[0]); EXPECT_EQ('A' - num_seen, key[kKeyValueSize - 1]); EXPECT_EQ('A' - num_seen, value[kKeyValueSize - 1]); ++num_seen; return Status::OK(); } Status DeleteCF(uint32_t /*column_family_id*/, const Slice& /*key*/) override { ADD_FAILURE(); return Status::OK(); } Status SingleDeleteCF(uint32_t /*column_family_id*/, const Slice& /*key*/) override { ADD_FAILURE(); return Status::OK(); } Status MergeCF(uint32_t /*column_family_id*/, const Slice& /*key*/, const Slice& /*value*/) override { ADD_FAILURE(); return Status::OK(); } void LogData(const Slice& /*blob*/) override { ADD_FAILURE(); } bool Continue() override { return num_seen < 2; } } handler; batch.Iterate(&handler); ASSERT_EQ(2, handler.num_seen); } TEST_F(WriteBatchTest, Continue) { WriteBatch batch; struct Handler : public TestHandler { int num_seen = 0; Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& value) override { ++num_seen; return TestHandler::PutCF(column_family_id, key, value); } Status DeleteCF(uint32_t column_family_id, const Slice& key) override { ++num_seen; return TestHandler::DeleteCF(column_family_id, key); } Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override { ++num_seen; return TestHandler::SingleDeleteCF(column_family_id, key); } Status MergeCF(uint32_t column_family_id, const Slice& key, const Slice& value) override { ++num_seen; return TestHandler::MergeCF(column_family_id, key, value); } void LogData(const Slice& blob) override { ++num_seen; TestHandler::LogData(blob); } bool Continue() override { return num_seen < 5; } } handler; batch.Put(Slice("k1"), Slice("v1")); batch.Put(Slice("k2"), Slice("v2")); batch.PutLogData(Slice("blob1")); batch.Delete(Slice("k1")); batch.SingleDelete(Slice("k2")); batch.PutLogData(Slice("blob2")); batch.Merge(Slice("foo"), Slice("bar")); batch.Iterate(&handler); ASSERT_EQ( "Put(k1, v1)" "Put(k2, v2)" "LogData(blob1)" "Delete(k1)" "SingleDelete(k2)", handler.seen); } TEST_F(WriteBatchTest, PutGatherSlices) { WriteBatch batch; batch.Put(Slice("foo"), Slice("bar")); { // Try a write where the key is one slice but the value is two Slice key_slice("baz"); Slice value_slices[2] = { Slice("header"), Slice("payload") }; batch.Put(SliceParts(&key_slice, 1), SliceParts(value_slices, 2)); } { // One where the key is composite but the value is a single slice Slice key_slices[3] = { Slice("key"), Slice("part2"), Slice("part3") }; Slice value_slice("value"); batch.Put(SliceParts(key_slices, 3), SliceParts(&value_slice, 1)); } WriteBatchInternal::SetSequence(&batch, 100); ASSERT_EQ("Put(baz, headerpayload)@101" "Put(foo, bar)@100" "Put(keypart2part3, value)@102", PrintContents(&batch)); ASSERT_EQ(3u, batch.Count()); } namespace { class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl { public: explicit ColumnFamilyHandleImplDummy(int id) : ColumnFamilyHandleImpl(nullptr, nullptr, nullptr), id_(id) {} uint32_t GetID() const override { return id_; } const Comparator* GetComparator() const override { return BytewiseComparator(); } private: uint32_t id_; }; } // namespace anonymous TEST_F(WriteBatchTest, ColumnFamiliesBatchTest) { WriteBatch batch; ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8); batch.Put(&zero, Slice("foo"), Slice("bar")); batch.Put(&two, Slice("twofoo"), Slice("bar2")); batch.Put(&eight, Slice("eightfoo"), Slice("bar8")); batch.Delete(&eight, Slice("eightfoo")); batch.SingleDelete(&two, Slice("twofoo")); batch.DeleteRange(&two, Slice("3foo"), Slice("4foo")); batch.Merge(&three, Slice("threethree"), Slice("3three")); batch.Put(&zero, Slice("foo"), Slice("bar")); batch.Merge(Slice("omom"), Slice("nom")); TestHandler handler; batch.Iterate(&handler); ASSERT_EQ( "Put(foo, bar)" "PutCF(2, twofoo, bar2)" "PutCF(8, eightfoo, bar8)" "DeleteCF(8, eightfoo)" "SingleDeleteCF(2, twofoo)" "DeleteRangeCF(2, 3foo, 4foo)" "MergeCF(3, threethree, 3three)" "Put(foo, bar)" "Merge(omom, nom)", handler.seen); } #ifndef ROCKSDB_LITE TEST_F(WriteBatchTest, ColumnFamiliesBatchWithIndexTest) { WriteBatchWithIndex batch; ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8); batch.Put(&zero, Slice("foo"), Slice("bar")); batch.Put(&two, Slice("twofoo"), Slice("bar2")); batch.Put(&eight, Slice("eightfoo"), Slice("bar8")); batch.Delete(&eight, Slice("eightfoo")); batch.SingleDelete(&two, Slice("twofoo")); batch.Merge(&three, Slice("threethree"), Slice("3three")); batch.Put(&zero, Slice("foo"), Slice("bar")); batch.Merge(Slice("omom"), Slice("nom")); std::unique_ptr iter; iter.reset(batch.NewIterator(&eight)); iter->Seek("eightfoo"); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); ASSERT_EQ("eightfoo", iter->Entry().key.ToString()); ASSERT_EQ("bar8", iter->Entry().value.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kDeleteRecord, iter->Entry().type); ASSERT_EQ("eightfoo", iter->Entry().key.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); iter.reset(batch.NewIterator(&two)); iter->Seek("twofoo"); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); ASSERT_EQ("twofoo", iter->Entry().key.ToString()); ASSERT_EQ("bar2", iter->Entry().value.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kSingleDeleteRecord, iter->Entry().type); ASSERT_EQ("twofoo", iter->Entry().key.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); iter.reset(batch.NewIterator()); iter->Seek("gggg"); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kMergeRecord, iter->Entry().type); ASSERT_EQ("omom", iter->Entry().key.ToString()); ASSERT_EQ("nom", iter->Entry().value.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); iter.reset(batch.NewIterator(&zero)); iter->Seek("foo"); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); ASSERT_EQ("foo", iter->Entry().key.ToString()); ASSERT_EQ("bar", iter->Entry().value.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kPutRecord, iter->Entry().type); ASSERT_EQ("foo", iter->Entry().key.ToString()); ASSERT_EQ("bar", iter->Entry().value.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(WriteType::kMergeRecord, iter->Entry().type); ASSERT_EQ("omom", iter->Entry().key.ToString()); ASSERT_EQ("nom", iter->Entry().value.ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(!iter->Valid()); TestHandler handler; batch.GetWriteBatch()->Iterate(&handler); ASSERT_EQ( "Put(foo, bar)" "PutCF(2, twofoo, bar2)" "PutCF(8, eightfoo, bar8)" "DeleteCF(8, eightfoo)" "SingleDeleteCF(2, twofoo)" "MergeCF(3, threethree, 3three)" "Put(foo, bar)" "Merge(omom, nom)", handler.seen); } #endif // !ROCKSDB_LITE TEST_F(WriteBatchTest, SavePointTest) { Status s; WriteBatch batch; batch.SetSavePoint(); batch.Put("A", "a"); batch.Put("B", "b"); batch.SetSavePoint(); batch.Put("C", "c"); batch.Delete("A"); batch.SetSavePoint(); batch.SetSavePoint(); ASSERT_OK(batch.RollbackToSavePoint()); ASSERT_EQ( "Delete(A)@3" "Put(A, a)@0" "Put(B, b)@1" "Put(C, c)@2", PrintContents(&batch)); ASSERT_OK(batch.RollbackToSavePoint()); ASSERT_OK(batch.RollbackToSavePoint()); ASSERT_EQ( "Put(A, a)@0" "Put(B, b)@1", PrintContents(&batch)); batch.Delete("A"); batch.Put("B", "bb"); ASSERT_OK(batch.RollbackToSavePoint()); ASSERT_EQ("", PrintContents(&batch)); s = batch.RollbackToSavePoint(); ASSERT_TRUE(s.IsNotFound()); ASSERT_EQ("", PrintContents(&batch)); batch.Put("D", "d"); batch.Delete("A"); batch.SetSavePoint(); batch.Put("A", "aaa"); ASSERT_OK(batch.RollbackToSavePoint()); ASSERT_EQ( "Delete(A)@1" "Put(D, d)@0", PrintContents(&batch)); batch.SetSavePoint(); batch.Put("D", "d"); batch.Delete("A"); ASSERT_OK(batch.RollbackToSavePoint()); ASSERT_EQ( "Delete(A)@1" "Put(D, d)@0", PrintContents(&batch)); s = batch.RollbackToSavePoint(); ASSERT_TRUE(s.IsNotFound()); ASSERT_EQ( "Delete(A)@1" "Put(D, d)@0", PrintContents(&batch)); WriteBatch batch2; s = batch2.RollbackToSavePoint(); ASSERT_TRUE(s.IsNotFound()); ASSERT_EQ("", PrintContents(&batch2)); batch2.Delete("A"); batch2.SetSavePoint(); s = batch2.RollbackToSavePoint(); ASSERT_OK(s); ASSERT_EQ("Delete(A)@0", PrintContents(&batch2)); batch2.Clear(); ASSERT_EQ("", PrintContents(&batch2)); batch2.SetSavePoint(); batch2.Delete("B"); ASSERT_EQ("Delete(B)@0", PrintContents(&batch2)); batch2.SetSavePoint(); s = batch2.RollbackToSavePoint(); ASSERT_OK(s); ASSERT_EQ("Delete(B)@0", PrintContents(&batch2)); s = batch2.RollbackToSavePoint(); ASSERT_OK(s); ASSERT_EQ("", PrintContents(&batch2)); s = batch2.RollbackToSavePoint(); ASSERT_TRUE(s.IsNotFound()); ASSERT_EQ("", PrintContents(&batch2)); WriteBatch batch3; s = batch3.PopSavePoint(); ASSERT_TRUE(s.IsNotFound()); ASSERT_EQ("", PrintContents(&batch3)); batch3.SetSavePoint(); batch3.Delete("A"); s = batch3.PopSavePoint(); ASSERT_OK(s); ASSERT_EQ("Delete(A)@0", PrintContents(&batch3)); } TEST_F(WriteBatchTest, MemoryLimitTest) { Status s; // The header size is 12 bytes. The two Puts take 8 bytes which gives total // of 12 + 8 * 2 = 28 bytes. WriteBatch batch(0, 28); ASSERT_OK(batch.Put("a", "....")); ASSERT_OK(batch.Put("b", "....")); s = batch.Put("c", "...."); ASSERT_TRUE(s.IsMemoryLimit()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/write_callback.h000066400000000000000000000014661370372246700167600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class DB; class WriteCallback { public: virtual ~WriteCallback() {} // Will be called while on the write thread before the write executes. If // this function returns a non-OK status, the write will be aborted and this // status will be returned to the caller of DB::Write(). virtual Status Callback(DB* db) = 0; // return true if writes with this callback can be batched with other writes virtual bool AllowWriteBatching() = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/write_callback_test.cc000066400000000000000000000360371370372246700201570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/write_callback.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/write_batch.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "util/random.h" using std::string; namespace ROCKSDB_NAMESPACE { class WriteCallbackTest : public testing::Test { public: string dbname; WriteCallbackTest() { dbname = test::PerThreadDBPath("write_callback_testdb"); } }; class WriteCallbackTestWriteCallback1 : public WriteCallback { public: bool was_called = false; Status Callback(DB *db) override { was_called = true; // Make sure db is a DBImpl DBImpl* db_impl = dynamic_cast (db); if (db_impl == nullptr) { return Status::InvalidArgument(""); } return Status::OK(); } bool AllowWriteBatching() override { return true; } }; class WriteCallbackTestWriteCallback2 : public WriteCallback { public: Status Callback(DB* /*db*/) override { return Status::Busy(); } bool AllowWriteBatching() override { return true; } }; class MockWriteCallback : public WriteCallback { public: bool should_fail_ = false; bool allow_batching_ = false; std::atomic was_called_{false}; MockWriteCallback() {} MockWriteCallback(const MockWriteCallback& other) { should_fail_ = other.should_fail_; allow_batching_ = other.allow_batching_; was_called_.store(other.was_called_.load()); } Status Callback(DB* /*db*/) override { was_called_.store(true); if (should_fail_) { return Status::Busy(); } else { return Status::OK(); } } bool AllowWriteBatching() override { return allow_batching_; } }; TEST_F(WriteCallbackTest, WriteWithCallbackTest) { struct WriteOP { WriteOP(bool should_fail = false) { callback_.should_fail_ = should_fail; } void Put(const string& key, const string& val) { kvs_.push_back(std::make_pair(key, val)); write_batch_.Put(key, val); } void Clear() { kvs_.clear(); write_batch_.Clear(); callback_.was_called_.store(false); } MockWriteCallback callback_; WriteBatch write_batch_; std::vector> kvs_; }; // In each scenario we'll launch multiple threads to write. // The size of each array equals to number of threads, and // each boolean in it denote whether callback of corresponding // thread should succeed or fail. std::vector> write_scenarios = { {true}, {false}, {false, false}, {true, true}, {true, false}, {false, true}, {false, false, false}, {true, true, true}, {false, true, false}, {true, false, true}, {true, false, false, false, false}, {false, false, false, false, true}, {false, false, true, false, true}, }; for (auto& unordered_write : {true, false}) { for (auto& seq_per_batch : {true, false}) { for (auto& two_queues : {true, false}) { for (auto& allow_parallel : {true, false}) { for (auto& allow_batching : {true, false}) { for (auto& enable_WAL : {true, false}) { for (auto& enable_pipelined_write : {true, false}) { for (auto& write_group : write_scenarios) { Options options; options.create_if_missing = true; options.unordered_write = unordered_write; options.allow_concurrent_memtable_write = allow_parallel; options.enable_pipelined_write = enable_pipelined_write; options.two_write_queues = two_queues; // Skip unsupported combinations if (options.enable_pipelined_write && seq_per_batch) { continue; } if (options.enable_pipelined_write && options.two_write_queues) { continue; } if (options.unordered_write && !options.allow_concurrent_memtable_write) { continue; } if (options.unordered_write && options.enable_pipelined_write) { continue; } ReadOptions read_options; DB* db; DBImpl* db_impl; DestroyDB(dbname, options); DBOptions db_options(options); ColumnFamilyOptions cf_options(options); std::vector column_families; column_families.push_back( ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); std::vector handles; auto open_s = DBImpl::Open(db_options, dbname, column_families, &handles, &db, seq_per_batch, true /* batch_per_txn */); ASSERT_OK(open_s); assert(handles.size() == 1); delete handles[0]; db_impl = dynamic_cast(db); ASSERT_TRUE(db_impl); // Writers that have called JoinBatchGroup. std::atomic threads_joining(0); // Writers that have linked to the queue std::atomic threads_linked(0); // Writers that pass WriteThread::JoinBatchGroup:Wait sync-point. std::atomic threads_verified(0); std::atomic seq(db_impl->GetLatestSequenceNumber()); ASSERT_EQ(db_impl->GetLatestSequenceNumber(), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WriteThread::JoinBatchGroup:Start", [&](void*) { uint64_t cur_threads_joining = threads_joining.fetch_add(1); // Wait for the last joined writer to link to the queue. // In this way the writers link to the queue one by one. // This allows us to confidently detect the first writer // who increases threads_linked as the leader. while (threads_linked.load() < cur_threads_joining) { } }); // Verification once writers call JoinBatchGroup. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { uint64_t cur_threads_linked = threads_linked.fetch_add(1); bool is_leader = false; bool is_last = false; // who am i is_leader = (cur_threads_linked == 0); is_last = (cur_threads_linked == write_group.size() - 1); // check my state auto* writer = reinterpret_cast(arg); if (is_leader) { ASSERT_TRUE(writer->state == WriteThread::State::STATE_GROUP_LEADER); } else { ASSERT_TRUE(writer->state == WriteThread::State::STATE_INIT); } // (meta test) the first WriteOP should indeed be the first // and the last should be the last (all others can be out of // order) if (is_leader) { ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == !write_group.front().callback_.should_fail_); } else if (is_last) { ASSERT_TRUE(writer->callback->Callback(nullptr).ok() == !write_group.back().callback_.should_fail_); } threads_verified.fetch_add(1); // Wait here until all verification in this sync-point // callback finish for all writers. while (threads_verified.load() < write_group.size()) { } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "WriteThread::JoinBatchGroup:DoneWaiting", [&](void* arg) { // check my state auto* writer = reinterpret_cast(arg); if (!allow_batching) { // no batching so everyone should be a leader ASSERT_TRUE(writer->state == WriteThread::State::STATE_GROUP_LEADER); } else if (!allow_parallel) { ASSERT_TRUE(writer->state == WriteThread::State::STATE_COMPLETED || (enable_pipelined_write && writer->state == WriteThread::State:: STATE_MEMTABLE_WRITER_LEADER)); } }); std::atomic thread_num(0); std::atomic dummy_key(0); // Each write thread create a random write batch and write to DB // with a write callback. std::function write_with_callback_func = [&]() { uint32_t i = thread_num.fetch_add(1); Random rnd(i); // leaders gotta lead while (i > 0 && threads_verified.load() < 1) { } // loser has to lose while (i == write_group.size() - 1 && threads_verified.load() < write_group.size() - 1) { } auto& write_op = write_group.at(i); write_op.Clear(); write_op.callback_.allow_batching_ = allow_batching; // insert some keys for (uint32_t j = 0; j < rnd.Next() % 50; j++) { // grab unique key char my_key = dummy_key.fetch_add(1); string skey(5, my_key); string sval(10, my_key); write_op.Put(skey, sval); if (!write_op.callback_.should_fail_ && !seq_per_batch) { seq.fetch_add(1); } } if (!write_op.callback_.should_fail_ && seq_per_batch) { seq.fetch_add(1); } WriteOptions woptions; woptions.disableWAL = !enable_WAL; woptions.sync = enable_WAL; Status s; if (seq_per_batch) { class PublishSeqCallback : public PreReleaseCallback { public: PublishSeqCallback(DBImpl* db_impl_in) : db_impl_(db_impl_in) {} Status Callback(SequenceNumber last_seq, bool /*not used*/, uint64_t, size_t /*index*/, size_t /*total*/) override { db_impl_->SetLastPublishedSequence(last_seq); return Status::OK(); } DBImpl* db_impl_; } publish_seq_callback(db_impl); // seq_per_batch requires a natural batch separator or Noop WriteBatchInternal::InsertNoop(&write_op.write_batch_); const size_t ONE_BATCH = 1; s = db_impl->WriteImpl( woptions, &write_op.write_batch_, &write_op.callback_, nullptr, 0, false, nullptr, ONE_BATCH, two_queues ? &publish_seq_callback : nullptr); } else { s = db_impl->WriteWithCallback( woptions, &write_op.write_batch_, &write_op.callback_); } if (write_op.callback_.should_fail_) { ASSERT_TRUE(s.IsBusy()); } else { ASSERT_OK(s); } }; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // do all the writes std::vector threads; for (uint32_t i = 0; i < write_group.size(); i++) { threads.emplace_back(write_with_callback_func); } for (auto& t : threads) { t.join(); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); // check for keys string value; for (auto& w : write_group) { ASSERT_TRUE(w.callback_.was_called_.load()); for (auto& kvp : w.kvs_) { if (w.callback_.should_fail_) { ASSERT_TRUE( db->Get(read_options, kvp.first, &value).IsNotFound()); } else { ASSERT_OK(db->Get(read_options, kvp.first, &value)); ASSERT_EQ(value, kvp.second); } } } ASSERT_EQ(seq.load(), db_impl->TEST_GetLastVisibleSequence()); delete db; DestroyDB(dbname, options); } } } } } } } } } TEST_F(WriteCallbackTest, WriteCallBackTest) { Options options; WriteOptions write_options; ReadOptions read_options; string value; DB* db; DBImpl* db_impl; DestroyDB(dbname, options); options.create_if_missing = true; Status s = DB::Open(options, dbname, &db); ASSERT_OK(s); db_impl = dynamic_cast (db); ASSERT_TRUE(db_impl); WriteBatch wb; wb.Put("a", "value.a"); wb.Delete("x"); // Test a simple Write s = db->Write(write_options, &wb); ASSERT_OK(s); s = db->Get(read_options, "a", &value); ASSERT_OK(s); ASSERT_EQ("value.a", value); // Test WriteWithCallback WriteCallbackTestWriteCallback1 callback1; WriteBatch wb2; wb2.Put("a", "value.a2"); s = db_impl->WriteWithCallback(write_options, &wb2, &callback1); ASSERT_OK(s); ASSERT_TRUE(callback1.was_called); s = db->Get(read_options, "a", &value); ASSERT_OK(s); ASSERT_EQ("value.a2", value); // Test WriteWithCallback for a callback that fails WriteCallbackTestWriteCallback2 callback2; WriteBatch wb3; wb3.Put("a", "value.a3"); s = db_impl->WriteWithCallback(write_options, &wb3, &callback2); ASSERT_NOK(s); s = db->Get(read_options, "a", &value); ASSERT_OK(s); ASSERT_EQ("value.a2", value); delete db; DestroyDB(dbname, options); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as WriteWithCallback is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/db/write_controller.cc000066400000000000000000000100501370372246700175320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/write_controller.h" #include #include #include #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { std::unique_ptr WriteController::GetStopToken() { ++total_stopped_; return std::unique_ptr(new StopWriteToken(this)); } std::unique_ptr WriteController::GetDelayToken( uint64_t write_rate) { total_delayed_++; // Reset counters. last_refill_time_ = 0; bytes_left_ = 0; set_delayed_write_rate(write_rate); return std::unique_ptr(new DelayWriteToken(this)); } std::unique_ptr WriteController::GetCompactionPressureToken() { ++total_compaction_pressure_; return std::unique_ptr( new CompactionPressureToken(this)); } bool WriteController::IsStopped() const { return total_stopped_.load(std::memory_order_relaxed) > 0; } // This is inside DB mutex, so we can't sleep and need to minimize // frequency to get time. // If it turns out to be a performance issue, we can redesign the thread // synchronization model here. // The function trust caller will sleep micros returned. uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) { if (total_stopped_.load(std::memory_order_relaxed) > 0) { return 0; } if (total_delayed_.load(std::memory_order_relaxed) == 0) { return 0; } const uint64_t kMicrosPerSecond = 1000000; const uint64_t kRefillInterval = 1024U; if (bytes_left_ >= num_bytes) { bytes_left_ -= num_bytes; return 0; } // The frequency to get time inside DB mutex is less than one per refill // interval. auto time_now = NowMicrosMonotonic(env); uint64_t sleep_debt = 0; uint64_t time_since_last_refill = 0; if (last_refill_time_ != 0) { if (last_refill_time_ > time_now) { sleep_debt = last_refill_time_ - time_now; } else { time_since_last_refill = time_now - last_refill_time_; bytes_left_ += static_cast(static_cast(time_since_last_refill) / kMicrosPerSecond * delayed_write_rate_); if (time_since_last_refill >= kRefillInterval && bytes_left_ > num_bytes) { // If refill interval already passed and we have enough bytes // return without extra sleeping. last_refill_time_ = time_now; bytes_left_ -= num_bytes; return 0; } } } uint64_t single_refill_amount = delayed_write_rate_ * kRefillInterval / kMicrosPerSecond; if (bytes_left_ + single_refill_amount >= num_bytes) { // Wait until a refill interval // Never trigger expire for less than one refill interval to avoid to get // time. bytes_left_ = bytes_left_ + single_refill_amount - num_bytes; last_refill_time_ = time_now + kRefillInterval; return kRefillInterval + sleep_debt; } // Need to refill more than one interval. Need to sleep longer. Check // whether expiration will hit // Sleep just until `num_bytes` is allowed. uint64_t sleep_amount = static_cast(num_bytes / static_cast(delayed_write_rate_) * kMicrosPerSecond) + sleep_debt; last_refill_time_ = time_now + sleep_amount; return sleep_amount; } uint64_t WriteController::NowMicrosMonotonic(Env* env) { return env->NowNanos() / std::milli::den; } StopWriteToken::~StopWriteToken() { assert(controller_->total_stopped_ >= 1); --controller_->total_stopped_; } DelayWriteToken::~DelayWriteToken() { controller_->total_delayed_--; assert(controller_->total_delayed_.load() >= 0); } CompactionPressureToken::~CompactionPressureToken() { controller_->total_compaction_pressure_--; assert(controller_->total_compaction_pressure_ >= 0); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/write_controller.h000066400000000000000000000113331370372246700174010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/rate_limiter.h" namespace ROCKSDB_NAMESPACE { class Env; class WriteControllerToken; // WriteController is controlling write stalls in our write code-path. Write // stalls happen when compaction can't keep up with write rate. // All of the methods here (including WriteControllerToken's destructors) need // to be called while holding DB mutex class WriteController { public: explicit WriteController(uint64_t _delayed_write_rate = 1024u * 1024u * 32u, int64_t low_pri_rate_bytes_per_sec = 1024 * 1024) : total_stopped_(0), total_delayed_(0), total_compaction_pressure_(0), bytes_left_(0), last_refill_time_(0), low_pri_rate_limiter_( NewGenericRateLimiter(low_pri_rate_bytes_per_sec)) { set_max_delayed_write_rate(_delayed_write_rate); } ~WriteController() = default; // When an actor (column family) requests a stop token, all writes will be // stopped until the stop token is released (deleted) std::unique_ptr GetStopToken(); // When an actor (column family) requests a delay token, total delay for all // writes to the DB will be controlled under the delayed write rate. Every // write needs to call GetDelay() with number of bytes writing to the DB, // which returns number of microseconds to sleep. std::unique_ptr GetDelayToken( uint64_t delayed_write_rate); // When an actor (column family) requests a moderate token, compaction // threads will be increased std::unique_ptr GetCompactionPressureToken(); // these three metods are querying the state of the WriteController bool IsStopped() const; bool NeedsDelay() const { return total_delayed_.load() > 0; } bool NeedSpeedupCompaction() const { return IsStopped() || NeedsDelay() || total_compaction_pressure_ > 0; } // return how many microseconds the caller needs to sleep after the call // num_bytes: how many number of bytes to put into the DB. // Prerequisite: DB mutex held. uint64_t GetDelay(Env* env, uint64_t num_bytes); void set_delayed_write_rate(uint64_t write_rate) { // avoid divide 0 if (write_rate == 0) { write_rate = 1u; } else if (write_rate > max_delayed_write_rate()) { write_rate = max_delayed_write_rate(); } delayed_write_rate_ = write_rate; } void set_max_delayed_write_rate(uint64_t write_rate) { // avoid divide 0 if (write_rate == 0) { write_rate = 1u; } max_delayed_write_rate_ = write_rate; // update delayed_write_rate_ as well delayed_write_rate_ = write_rate; } uint64_t delayed_write_rate() const { return delayed_write_rate_; } uint64_t max_delayed_write_rate() const { return max_delayed_write_rate_; } RateLimiter* low_pri_rate_limiter() { return low_pri_rate_limiter_.get(); } private: uint64_t NowMicrosMonotonic(Env* env); friend class WriteControllerToken; friend class StopWriteToken; friend class DelayWriteToken; friend class CompactionPressureToken; std::atomic total_stopped_; std::atomic total_delayed_; std::atomic total_compaction_pressure_; uint64_t bytes_left_; uint64_t last_refill_time_; // write rate set when initialization or by `DBImpl::SetDBOptions` uint64_t max_delayed_write_rate_; // current write rate uint64_t delayed_write_rate_; std::unique_ptr low_pri_rate_limiter_; }; class WriteControllerToken { public: explicit WriteControllerToken(WriteController* controller) : controller_(controller) {} virtual ~WriteControllerToken() {} protected: WriteController* controller_; private: // no copying allowed WriteControllerToken(const WriteControllerToken&) = delete; void operator=(const WriteControllerToken&) = delete; }; class StopWriteToken : public WriteControllerToken { public: explicit StopWriteToken(WriteController* controller) : WriteControllerToken(controller) {} virtual ~StopWriteToken(); }; class DelayWriteToken : public WriteControllerToken { public: explicit DelayWriteToken(WriteController* controller) : WriteControllerToken(controller) {} virtual ~DelayWriteToken(); }; class CompactionPressureToken : public WriteControllerToken { public: explicit CompactionPressureToken(WriteController* controller) : WriteControllerToken(controller) {} virtual ~CompactionPressureToken(); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/write_controller_test.cc000066400000000000000000000112511370372246700205750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include "db/write_controller.h" #include "rocksdb/env.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class WriteControllerTest : public testing::Test {}; class TimeSetEnv : public EnvWrapper { public: explicit TimeSetEnv() : EnvWrapper(nullptr) {} uint64_t now_micros_ = 6666; uint64_t NowNanos() override { return now_micros_ * std::milli::den; } }; TEST_F(WriteControllerTest, ChangeDelayRateTest) { TimeSetEnv env; WriteController controller(40000000u); // also set max delayed rate controller.set_delayed_write_rate(10000000u); auto delay_token_0 = controller.GetDelayToken(controller.delayed_write_rate()); ASSERT_EQ(static_cast(2000000), controller.GetDelay(&env, 20000000u)); auto delay_token_1 = controller.GetDelayToken(2000000u); ASSERT_EQ(static_cast(10000000), controller.GetDelay(&env, 20000000u)); auto delay_token_2 = controller.GetDelayToken(1000000u); ASSERT_EQ(static_cast(20000000), controller.GetDelay(&env, 20000000u)); auto delay_token_3 = controller.GetDelayToken(20000000u); ASSERT_EQ(static_cast(1000000), controller.GetDelay(&env, 20000000u)); // This is more than max rate. Max delayed rate will be used. auto delay_token_4 = controller.GetDelayToken(controller.delayed_write_rate() * 3); ASSERT_EQ(static_cast(500000), controller.GetDelay(&env, 20000000u)); } TEST_F(WriteControllerTest, SanityTest) { WriteController controller(10000000u); auto stop_token_1 = controller.GetStopToken(); auto stop_token_2 = controller.GetStopToken(); ASSERT_TRUE(controller.IsStopped()); stop_token_1.reset(); ASSERT_TRUE(controller.IsStopped()); stop_token_2.reset(); ASSERT_FALSE(controller.IsStopped()); TimeSetEnv env; auto delay_token_1 = controller.GetDelayToken(10000000u); ASSERT_EQ(static_cast(2000000), controller.GetDelay(&env, 20000000u)); env.now_micros_ += 1999900u; // sleep debt 1000 auto delay_token_2 = controller.GetDelayToken(10000000u); // Rate reset after changing the token. ASSERT_EQ(static_cast(2000000), controller.GetDelay(&env, 20000000u)); env.now_micros_ += 1999900u; // sleep debt 1000 // One refill: 10240 bytes allowed, 1000 used, 9240 left ASSERT_EQ(static_cast(1124), controller.GetDelay(&env, 1000u)); env.now_micros_ += 1124u; // sleep debt 0 delay_token_2.reset(); // 1000 used, 8240 left ASSERT_EQ(static_cast(0), controller.GetDelay(&env, 1000u)); env.now_micros_ += 100u; // sleep credit 100 // 1000 used, 7240 left ASSERT_EQ(static_cast(0), controller.GetDelay(&env, 1000u)); env.now_micros_ += 100u; // sleep credit 200 // One refill: 10240 fileed, sleep credit generates 2000. 8000 used // 7240 + 10240 + 2000 - 8000 = 11480 left ASSERT_EQ(static_cast(1024u), controller.GetDelay(&env, 8000u)); env.now_micros_ += 200u; // sleep debt 824 // 1000 used, 10480 left. ASSERT_EQ(static_cast(0), controller.GetDelay(&env, 1000u)); env.now_micros_ += 200u; // sleep debt 624 // Out of bound sleep, still 10480 left ASSERT_EQ(static_cast(3000624u), controller.GetDelay(&env, 30000000u)); env.now_micros_ += 3000724u; // sleep credit 100 // 6000 used, 4480 left. ASSERT_EQ(static_cast(0), controller.GetDelay(&env, 6000u)); env.now_micros_ += 200u; // sleep credit 300 // One refill, credit 4480 balance + 3000 credit + 10240 refill // Use 8000, 9720 left ASSERT_EQ(static_cast(1024u), controller.GetDelay(&env, 8000u)); env.now_micros_ += 3024u; // sleep credit 2000 // 1720 left ASSERT_EQ(static_cast(0u), controller.GetDelay(&env, 8000u)); // 1720 balance + 20000 credit = 20170 left // Use 8000, 12170 left ASSERT_EQ(static_cast(0u), controller.GetDelay(&env, 8000u)); // 4170 left ASSERT_EQ(static_cast(0u), controller.GetDelay(&env, 8000u)); // Need a refill ASSERT_EQ(static_cast(1024u), controller.GetDelay(&env, 9000u)); delay_token_1.reset(); ASSERT_EQ(static_cast(0), controller.GetDelay(&env, 30000000u)); delay_token_1.reset(); ASSERT_FALSE(controller.IsStopped()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/db/write_thread.cc000066400000000000000000000701761370372246700166350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "db/write_thread.h" #include #include #include "db/column_family.h" #include "monitoring/perf_context_imp.h" #include "port/port.h" #include "test_util/sync_point.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { WriteThread::WriteThread(const ImmutableDBOptions& db_options) : max_yield_usec_(db_options.enable_write_thread_adaptive_yield ? db_options.write_thread_max_yield_usec : 0), slow_yield_usec_(db_options.write_thread_slow_yield_usec), allow_concurrent_memtable_write_( db_options.allow_concurrent_memtable_write), enable_pipelined_write_(db_options.enable_pipelined_write), max_write_batch_group_size_bytes( db_options.max_write_batch_group_size_bytes), newest_writer_(nullptr), newest_memtable_writer_(nullptr), last_sequence_(0), write_stall_dummy_(), stall_mu_(), stall_cv_(&stall_mu_) {} uint8_t WriteThread::BlockingAwaitState(Writer* w, uint8_t goal_mask) { // We're going to block. Lazily create the mutex. We guarantee // propagation of this construction to the waker via the // STATE_LOCKED_WAITING state. The waker won't try to touch the mutex // or the condvar unless they CAS away the STATE_LOCKED_WAITING that // we install below. w->CreateMutex(); auto state = w->state.load(std::memory_order_acquire); assert(state != STATE_LOCKED_WAITING); if ((state & goal_mask) == 0 && w->state.compare_exchange_strong(state, STATE_LOCKED_WAITING)) { // we have permission (and an obligation) to use StateMutex std::unique_lock guard(w->StateMutex()); w->StateCV().wait(guard, [w] { return w->state.load(std::memory_order_relaxed) != STATE_LOCKED_WAITING; }); state = w->state.load(std::memory_order_relaxed); } // else tricky. Goal is met or CAS failed. In the latter case the waker // must have changed the state, and compare_exchange_strong has updated // our local variable with the new one. At the moment WriteThread never // waits for a transition across intermediate states, so we know that // since a state change has occurred the goal must have been met. assert((state & goal_mask) != 0); return state; } uint8_t WriteThread::AwaitState(Writer* w, uint8_t goal_mask, AdaptationContext* ctx) { uint8_t state = 0; // 1. Busy loop using "pause" for 1 micro sec // 2. Else SOMETIMES busy loop using "yield" for 100 micro sec (default) // 3. Else blocking wait // On a modern Xeon each loop takes about 7 nanoseconds (most of which // is the effect of the pause instruction), so 200 iterations is a bit // more than a microsecond. This is long enough that waits longer than // this can amortize the cost of accessing the clock and yielding. for (uint32_t tries = 0; tries < 200; ++tries) { state = w->state.load(std::memory_order_acquire); if ((state & goal_mask) != 0) { return state; } port::AsmVolatilePause(); } // This is below the fast path, so that the stat is zero when all writes are // from the same thread. PERF_TIMER_GUARD(write_thread_wait_nanos); // If we're only going to end up waiting a short period of time, // it can be a lot more efficient to call std::this_thread::yield() // in a loop than to block in StateMutex(). For reference, on my 4.0 // SELinux test server with support for syscall auditing enabled, the // minimum latency between FUTEX_WAKE to returning from FUTEX_WAIT is // 2.7 usec, and the average is more like 10 usec. That can be a big // drag on RockDB's single-writer design. Of course, spinning is a // bad idea if other threads are waiting to run or if we're going to // wait for a long time. How do we decide? // // We break waiting into 3 categories: short-uncontended, // short-contended, and long. If we had an oracle, then we would always // spin for short-uncontended, always block for long, and our choice for // short-contended might depend on whether we were trying to optimize // RocksDB throughput or avoid being greedy with system resources. // // Bucketing into short or long is easy by measuring elapsed time. // Differentiating short-uncontended from short-contended is a bit // trickier, but not too bad. We could look for involuntary context // switches using getrusage(RUSAGE_THREAD, ..), but it's less work // (portability code and CPU) to just look for yield calls that take // longer than we expect. sched_yield() doesn't actually result in any // context switch overhead if there are no other runnable processes // on the current core, in which case it usually takes less than // a microsecond. // // There are two primary tunables here: the threshold between "short" // and "long" waits, and the threshold at which we suspect that a yield // is slow enough to indicate we should probably block. If these // thresholds are chosen well then CPU-bound workloads that don't // have more threads than cores will experience few context switches // (voluntary or involuntary), and the total number of context switches // (voluntary and involuntary) will not be dramatically larger (maybe // 2x) than the number of voluntary context switches that occur when // --max_yield_wait_micros=0. // // There's another constant, which is the number of slow yields we will // tolerate before reversing our previous decision. Solitary slow // yields are pretty common (low-priority small jobs ready to run), // so this should be at least 2. We set this conservatively to 3 so // that we can also immediately schedule a ctx adaptation, rather than // waiting for the next update_ctx. const size_t kMaxSlowYieldsWhileSpinning = 3; // Whether the yield approach has any credit in this context. The credit is // added by yield being succesfull before timing out, and decreased otherwise. auto& yield_credit = ctx->value; // Update the yield_credit based on sample runs or right after a hard failure bool update_ctx = false; // Should we reinforce the yield credit bool would_spin_again = false; // The samling base for updating the yeild credit. The sampling rate would be // 1/sampling_base. const int sampling_base = 256; if (max_yield_usec_ > 0) { update_ctx = Random::GetTLSInstance()->OneIn(sampling_base); if (update_ctx || yield_credit.load(std::memory_order_relaxed) >= 0) { // we're updating the adaptation statistics, or spinning has > // 50% chance of being shorter than max_yield_usec_ and causing no // involuntary context switches auto spin_begin = std::chrono::steady_clock::now(); // this variable doesn't include the final yield (if any) that // causes the goal to be met size_t slow_yield_count = 0; auto iter_begin = spin_begin; while ((iter_begin - spin_begin) <= std::chrono::microseconds(max_yield_usec_)) { std::this_thread::yield(); state = w->state.load(std::memory_order_acquire); if ((state & goal_mask) != 0) { // success would_spin_again = true; break; } auto now = std::chrono::steady_clock::now(); if (now == iter_begin || now - iter_begin >= std::chrono::microseconds(slow_yield_usec_)) { // conservatively count it as a slow yield if our clock isn't // accurate enough to measure the yield duration ++slow_yield_count; if (slow_yield_count >= kMaxSlowYieldsWhileSpinning) { // Not just one ivcsw, but several. Immediately update yield_credit // and fall back to blocking update_ctx = true; break; } } iter_begin = now; } } } if ((state & goal_mask) == 0) { TEST_SYNC_POINT_CALLBACK("WriteThread::AwaitState:BlockingWaiting", w); state = BlockingAwaitState(w, goal_mask); } if (update_ctx) { // Since our update is sample based, it is ok if a thread overwrites the // updates by other threads. Thus the update does not have to be atomic. auto v = yield_credit.load(std::memory_order_relaxed); // fixed point exponential decay with decay constant 1/1024, with +1 // and -1 scaled to avoid overflow for int32_t // // On each update the positive credit is decayed by a facor of 1/1024 (i.e., // 0.1%). If the sampled yield was successful, the credit is also increased // by X. Setting X=2^17 ensures that the credit never exceeds // 2^17*2^10=2^27, which is lower than 2^31 the upperbound of int32_t. Same // logic applies to negative credits. v = v - (v / 1024) + (would_spin_again ? 1 : -1) * 131072; yield_credit.store(v, std::memory_order_relaxed); } assert((state & goal_mask) != 0); return state; } void WriteThread::SetState(Writer* w, uint8_t new_state) { auto state = w->state.load(std::memory_order_acquire); if (state == STATE_LOCKED_WAITING || !w->state.compare_exchange_strong(state, new_state)) { assert(state == STATE_LOCKED_WAITING); std::lock_guard guard(w->StateMutex()); assert(w->state.load(std::memory_order_relaxed) != new_state); w->state.store(new_state, std::memory_order_relaxed); w->StateCV().notify_one(); } } bool WriteThread::LinkOne(Writer* w, std::atomic* newest_writer) { assert(newest_writer != nullptr); assert(w->state == STATE_INIT); Writer* writers = newest_writer->load(std::memory_order_relaxed); while (true) { // If write stall in effect, and w->no_slowdown is not true, // block here until stall is cleared. If its true, then return // immediately if (writers == &write_stall_dummy_) { if (w->no_slowdown) { w->status = Status::Incomplete("Write stall"); SetState(w, STATE_COMPLETED); return false; } // Since no_slowdown is false, wait here to be notified of the write // stall clearing { MutexLock lock(&stall_mu_); writers = newest_writer->load(std::memory_order_relaxed); if (writers == &write_stall_dummy_) { stall_cv_.Wait(); // Load newest_writers_ again since it may have changed writers = newest_writer->load(std::memory_order_relaxed); continue; } } } w->link_older = writers; if (newest_writer->compare_exchange_weak(writers, w)) { return (writers == nullptr); } } } bool WriteThread::LinkGroup(WriteGroup& write_group, std::atomic* newest_writer) { assert(newest_writer != nullptr); Writer* leader = write_group.leader; Writer* last_writer = write_group.last_writer; Writer* w = last_writer; while (true) { // Unset link_newer pointers to make sure when we call // CreateMissingNewerLinks later it create all missing links. w->link_newer = nullptr; w->write_group = nullptr; if (w == leader) { break; } w = w->link_older; } Writer* newest = newest_writer->load(std::memory_order_relaxed); while (true) { leader->link_older = newest; if (newest_writer->compare_exchange_weak(newest, last_writer)) { return (newest == nullptr); } } } void WriteThread::CreateMissingNewerLinks(Writer* head) { while (true) { Writer* next = head->link_older; if (next == nullptr || next->link_newer != nullptr) { assert(next == nullptr || next->link_newer == head); break; } next->link_newer = head; head = next; } } WriteThread::Writer* WriteThread::FindNextLeader(Writer* from, Writer* boundary) { assert(from != nullptr && from != boundary); Writer* current = from; while (current->link_older != boundary) { current = current->link_older; assert(current != nullptr); } return current; } void WriteThread::CompleteLeader(WriteGroup& write_group) { assert(write_group.size > 0); Writer* leader = write_group.leader; if (write_group.size == 1) { write_group.leader = nullptr; write_group.last_writer = nullptr; } else { assert(leader->link_newer != nullptr); leader->link_newer->link_older = nullptr; write_group.leader = leader->link_newer; } write_group.size -= 1; SetState(leader, STATE_COMPLETED); } void WriteThread::CompleteFollower(Writer* w, WriteGroup& write_group) { assert(write_group.size > 1); assert(w != write_group.leader); if (w == write_group.last_writer) { w->link_older->link_newer = nullptr; write_group.last_writer = w->link_older; } else { w->link_older->link_newer = w->link_newer; w->link_newer->link_older = w->link_older; } write_group.size -= 1; SetState(w, STATE_COMPLETED); } void WriteThread::BeginWriteStall() { LinkOne(&write_stall_dummy_, &newest_writer_); // Walk writer list until w->write_group != nullptr. The current write group // will not have a mix of slowdown/no_slowdown, so its ok to stop at that // point Writer* w = write_stall_dummy_.link_older; Writer* prev = &write_stall_dummy_; while (w != nullptr && w->write_group == nullptr) { if (w->no_slowdown) { prev->link_older = w->link_older; w->status = Status::Incomplete("Write stall"); SetState(w, STATE_COMPLETED); if (prev->link_older) { prev->link_older->link_newer = prev; } w = prev->link_older; } else { prev = w; w = w->link_older; } } } void WriteThread::EndWriteStall() { MutexLock lock(&stall_mu_); // Unlink write_stall_dummy_ from the write queue. This will unblock // pending write threads to enqueue themselves assert(newest_writer_.load(std::memory_order_relaxed) == &write_stall_dummy_); assert(write_stall_dummy_.link_older != nullptr); write_stall_dummy_.link_older->link_newer = write_stall_dummy_.link_newer; newest_writer_.exchange(write_stall_dummy_.link_older); // Wake up writers stall_cv_.SignalAll(); } static WriteThread::AdaptationContext jbg_ctx("JoinBatchGroup"); void WriteThread::JoinBatchGroup(Writer* w) { TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:Start", w); assert(w->batch != nullptr); bool linked_as_leader = LinkOne(w, &newest_writer_); if (linked_as_leader) { SetState(w, STATE_GROUP_LEADER); } TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:Wait", w); if (!linked_as_leader) { /** * Wait util: * 1) An existing leader pick us as the new leader when it finishes * 2) An existing leader pick us as its follewer and * 2.1) finishes the memtable writes on our behalf * 2.2) Or tell us to finish the memtable writes in pralallel * 3) (pipelined write) An existing leader pick us as its follower and * finish book-keeping and WAL write for us, enqueue us as pending * memtable writer, and * 3.1) we become memtable writer group leader, or * 3.2) an existing memtable writer group leader tell us to finish memtable * writes in parallel. */ TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:BeganWaiting", w); AwaitState(w, STATE_GROUP_LEADER | STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED, &jbg_ctx); TEST_SYNC_POINT_CALLBACK("WriteThread::JoinBatchGroup:DoneWaiting", w); } } size_t WriteThread::EnterAsBatchGroupLeader(Writer* leader, WriteGroup* write_group) { assert(leader->link_older == nullptr); assert(leader->batch != nullptr); assert(write_group != nullptr); size_t size = WriteBatchInternal::ByteSize(leader->batch); // Allow the group to grow up to a maximum size, but if the // original write is small, limit the growth so we do not slow // down the small write too much. size_t max_size = max_write_batch_group_size_bytes; const uint64_t min_batch_size_bytes = max_write_batch_group_size_bytes / 8; if (size <= min_batch_size_bytes) { max_size = size + min_batch_size_bytes; } leader->write_group = write_group; write_group->leader = leader; write_group->last_writer = leader; write_group->size = 1; Writer* newest_writer = newest_writer_.load(std::memory_order_acquire); // This is safe regardless of any db mutex status of the caller. Previous // calls to ExitAsGroupLeader either didn't call CreateMissingNewerLinks // (they emptied the list and then we added ourself as leader) or had to // explicitly wake us up (the list was non-empty when we added ourself, // so we have already received our MarkJoined). CreateMissingNewerLinks(newest_writer); // Tricky. Iteration start (leader) is exclusive and finish // (newest_writer) is inclusive. Iteration goes from old to new. Writer* w = leader; while (w != newest_writer) { w = w->link_newer; if (w->sync && !leader->sync) { // Do not include a sync write into a batch handled by a non-sync write. break; } if (w->no_slowdown != leader->no_slowdown) { // Do not mix writes that are ok with delays with the ones that // request fail on delays. break; } if (w->disable_wal != leader->disable_wal) { // Do not mix writes that enable WAL with the ones whose // WAL disabled. break; } if (w->batch == nullptr) { // Do not include those writes with nullptr batch. Those are not writes, // those are something else. They want to be alone break; } if (w->callback != nullptr && !w->callback->AllowWriteBatching()) { // don't batch writes that don't want to be batched break; } auto batch_size = WriteBatchInternal::ByteSize(w->batch); if (size + batch_size > max_size) { // Do not make batch too big break; } w->write_group = write_group; size += batch_size; write_group->last_writer = w; write_group->size++; } TEST_SYNC_POINT_CALLBACK("WriteThread::EnterAsBatchGroupLeader:End", w); return size; } void WriteThread::EnterAsMemTableWriter(Writer* leader, WriteGroup* write_group) { assert(leader != nullptr); assert(leader->link_older == nullptr); assert(leader->batch != nullptr); assert(write_group != nullptr); size_t size = WriteBatchInternal::ByteSize(leader->batch); // Allow the group to grow up to a maximum size, but if the // original write is small, limit the growth so we do not slow // down the small write too much. size_t max_size = max_write_batch_group_size_bytes; const uint64_t min_batch_size_bytes = max_write_batch_group_size_bytes / 8; if (size <= min_batch_size_bytes) { max_size = size + min_batch_size_bytes; } leader->write_group = write_group; write_group->leader = leader; write_group->size = 1; Writer* last_writer = leader; if (!allow_concurrent_memtable_write_ || !leader->batch->HasMerge()) { Writer* newest_writer = newest_memtable_writer_.load(); CreateMissingNewerLinks(newest_writer); Writer* w = leader; while (w != newest_writer) { w = w->link_newer; if (w->batch == nullptr) { break; } if (w->batch->HasMerge()) { break; } if (!allow_concurrent_memtable_write_) { auto batch_size = WriteBatchInternal::ByteSize(w->batch); if (size + batch_size > max_size) { // Do not make batch too big break; } size += batch_size; } w->write_group = write_group; last_writer = w; write_group->size++; } } write_group->last_writer = last_writer; write_group->last_sequence = last_writer->sequence + WriteBatchInternal::Count(last_writer->batch) - 1; } void WriteThread::ExitAsMemTableWriter(Writer* /*self*/, WriteGroup& write_group) { Writer* leader = write_group.leader; Writer* last_writer = write_group.last_writer; Writer* newest_writer = last_writer; if (!newest_memtable_writer_.compare_exchange_strong(newest_writer, nullptr)) { CreateMissingNewerLinks(newest_writer); Writer* next_leader = last_writer->link_newer; assert(next_leader != nullptr); next_leader->link_older = nullptr; SetState(next_leader, STATE_MEMTABLE_WRITER_LEADER); } Writer* w = leader; while (true) { if (!write_group.status.ok()) { w->status = write_group.status; } Writer* next = w->link_newer; if (w != leader) { SetState(w, STATE_COMPLETED); } if (w == last_writer) { break; } w = next; } // Note that leader has to exit last, since it owns the write group. SetState(leader, STATE_COMPLETED); } void WriteThread::LaunchParallelMemTableWriters(WriteGroup* write_group) { assert(write_group != nullptr); write_group->running.store(write_group->size); for (auto w : *write_group) { SetState(w, STATE_PARALLEL_MEMTABLE_WRITER); } } static WriteThread::AdaptationContext cpmtw_ctx("CompleteParallelMemTableWriter"); // This method is called by both the leader and parallel followers bool WriteThread::CompleteParallelMemTableWriter(Writer* w) { auto* write_group = w->write_group; if (!w->status.ok()) { std::lock_guard guard(write_group->leader->StateMutex()); write_group->status = w->status; } if (write_group->running-- > 1) { // we're not the last one AwaitState(w, STATE_COMPLETED, &cpmtw_ctx); return false; } // else we're the last parallel worker and should perform exit duties. w->status = write_group->status; return true; } void WriteThread::ExitAsBatchGroupFollower(Writer* w) { auto* write_group = w->write_group; assert(w->state == STATE_PARALLEL_MEMTABLE_WRITER); assert(write_group->status.ok()); ExitAsBatchGroupLeader(*write_group, write_group->status); assert(w->status.ok()); assert(w->state == STATE_COMPLETED); SetState(write_group->leader, STATE_COMPLETED); } static WriteThread::AdaptationContext eabgl_ctx("ExitAsBatchGroupLeader"); void WriteThread::ExitAsBatchGroupLeader(WriteGroup& write_group, Status status) { Writer* leader = write_group.leader; Writer* last_writer = write_group.last_writer; assert(leader->link_older == nullptr); // Propagate memtable write error to the whole group. if (status.ok() && !write_group.status.ok()) { status = write_group.status; } if (enable_pipelined_write_) { // Notify writers don't write to memtable to exit. for (Writer* w = last_writer; w != leader;) { Writer* next = w->link_older; w->status = status; if (!w->ShouldWriteToMemtable()) { CompleteFollower(w, write_group); } w = next; } if (!leader->ShouldWriteToMemtable()) { CompleteLeader(write_group); } Writer* next_leader = nullptr; // Look for next leader before we call LinkGroup. If there isn't // pending writers, place a dummy writer at the tail of the queue // so we know the boundary of the current write group. Writer dummy; Writer* expected = last_writer; bool has_dummy = newest_writer_.compare_exchange_strong(expected, &dummy); if (!has_dummy) { // We find at least one pending writer when we insert dummy. We search // for next leader from there. next_leader = FindNextLeader(expected, last_writer); assert(next_leader != nullptr && next_leader != last_writer); } // Link the ramaining of the group to memtable writer list. // // We have to link our group to memtable writer queue before wake up the // next leader or set newest_writer_ to null, otherwise the next leader // can run ahead of us and link to memtable writer queue before we do. if (write_group.size > 0) { if (LinkGroup(write_group, &newest_memtable_writer_)) { // The leader can now be different from current writer. SetState(write_group.leader, STATE_MEMTABLE_WRITER_LEADER); } } // If we have inserted dummy in the queue, remove it now and check if there // are pending writer join the queue since we insert the dummy. If so, // look for next leader again. if (has_dummy) { assert(next_leader == nullptr); expected = &dummy; bool has_pending_writer = !newest_writer_.compare_exchange_strong(expected, nullptr); if (has_pending_writer) { next_leader = FindNextLeader(expected, &dummy); assert(next_leader != nullptr && next_leader != &dummy); } } if (next_leader != nullptr) { next_leader->link_older = nullptr; SetState(next_leader, STATE_GROUP_LEADER); } AwaitState(leader, STATE_MEMTABLE_WRITER_LEADER | STATE_PARALLEL_MEMTABLE_WRITER | STATE_COMPLETED, &eabgl_ctx); } else { Writer* head = newest_writer_.load(std::memory_order_acquire); if (head != last_writer || !newest_writer_.compare_exchange_strong(head, nullptr)) { // Either w wasn't the head during the load(), or it was the head // during the load() but somebody else pushed onto the list before // we did the compare_exchange_strong (causing it to fail). In the // latter case compare_exchange_strong has the effect of re-reading // its first param (head). No need to retry a failing CAS, because // only a departing leader (which we are at the moment) can remove // nodes from the list. assert(head != last_writer); // After walking link_older starting from head (if not already done) // we will be able to traverse w->link_newer below. This function // can only be called from an active leader, only a leader can // clear newest_writer_, we didn't, and only a clear newest_writer_ // could cause the next leader to start their work without a call // to MarkJoined, so we can definitely conclude that no other leader // work is going on here (with or without db mutex). CreateMissingNewerLinks(head); assert(last_writer->link_newer->link_older == last_writer); last_writer->link_newer->link_older = nullptr; // Next leader didn't self-identify, because newest_writer_ wasn't // nullptr when they enqueued (we were definitely enqueued before them // and are still in the list). That means leader handoff occurs when // we call MarkJoined SetState(last_writer->link_newer, STATE_GROUP_LEADER); } // else nobody else was waiting, although there might already be a new // leader now while (last_writer != leader) { last_writer->status = status; // we need to read link_older before calling SetState, because as soon // as it is marked committed the other thread's Await may return and // deallocate the Writer. auto next = last_writer->link_older; SetState(last_writer, STATE_COMPLETED); last_writer = next; } } } static WriteThread::AdaptationContext eu_ctx("EnterUnbatched"); void WriteThread::EnterUnbatched(Writer* w, InstrumentedMutex* mu) { assert(w != nullptr && w->batch == nullptr); mu->Unlock(); bool linked_as_leader = LinkOne(w, &newest_writer_); if (!linked_as_leader) { TEST_SYNC_POINT("WriteThread::EnterUnbatched:Wait"); // Last leader will not pick us as a follower since our batch is nullptr AwaitState(w, STATE_GROUP_LEADER, &eu_ctx); } if (enable_pipelined_write_) { WaitForMemTableWriters(); } mu->Lock(); } void WriteThread::ExitUnbatched(Writer* w) { assert(w != nullptr); Writer* newest_writer = w; if (!newest_writer_.compare_exchange_strong(newest_writer, nullptr)) { CreateMissingNewerLinks(newest_writer); Writer* next_leader = w->link_newer; assert(next_leader != nullptr); next_leader->link_older = nullptr; SetState(next_leader, STATE_GROUP_LEADER); } } static WriteThread::AdaptationContext wfmw_ctx("WaitForMemTableWriters"); void WriteThread::WaitForMemTableWriters() { assert(enable_pipelined_write_); if (newest_memtable_writer_.load() == nullptr) { return; } Writer w; if (!LinkOne(&w, &newest_memtable_writer_)) { AwaitState(&w, STATE_MEMTABLE_WRITER_LEADER, &wfmw_ctx); } newest_memtable_writer_.store(nullptr); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db/write_thread.h000066400000000000000000000374311370372246700164740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include #include #include #include "db/dbformat.h" #include "db/pre_release_callback.h" #include "db/write_callback.h" #include "monitoring/instrumented_mutex.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "rocksdb/types.h" #include "rocksdb/write_batch.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class WriteThread { public: enum State : uint8_t { // The initial state of a writer. This is a Writer that is // waiting in JoinBatchGroup. This state can be left when another // thread informs the waiter that it has become a group leader // (-> STATE_GROUP_LEADER), when a leader that has chosen to be // non-parallel informs a follower that its writes have been committed // (-> STATE_COMPLETED), or when a leader that has chosen to perform // updates in parallel and needs this Writer to apply its batch (-> // STATE_PARALLEL_FOLLOWER). STATE_INIT = 1, // The state used to inform a waiting Writer that it has become the // leader, and it should now build a write batch group. Tricky: // this state is not used if newest_writer_ is empty when a writer // enqueues itself, because there is no need to wait (or even to // create the mutex and condvar used to wait) in that case. This is // a terminal state unless the leader chooses to make this a parallel // batch, in which case the last parallel worker to finish will move // the leader to STATE_COMPLETED. STATE_GROUP_LEADER = 2, // The state used to inform a waiting writer that it has become the // leader of memtable writer group. The leader will either write // memtable for the whole group, or launch a parallel group write // to memtable by calling LaunchParallelMemTableWrite. STATE_MEMTABLE_WRITER_LEADER = 4, // The state used to inform a waiting writer that it has become a // parallel memtable writer. It can be the group leader who launch the // parallel writer group, or one of the followers. The writer should then // apply its batch to the memtable concurrently and call // CompleteParallelMemTableWriter. STATE_PARALLEL_MEMTABLE_WRITER = 8, // A follower whose writes have been applied, or a parallel leader // whose followers have all finished their work. This is a terminal // state. STATE_COMPLETED = 16, // A state indicating that the thread may be waiting using StateMutex() // and StateCondVar() STATE_LOCKED_WAITING = 32, }; struct Writer; struct WriteGroup { Writer* leader = nullptr; Writer* last_writer = nullptr; SequenceNumber last_sequence; // before running goes to zero, status needs leader->StateMutex() Status status; std::atomic running; size_t size = 0; struct Iterator { Writer* writer; Writer* last_writer; explicit Iterator(Writer* w, Writer* last) : writer(w), last_writer(last) {} Writer* operator*() const { return writer; } Iterator& operator++() { assert(writer != nullptr); if (writer == last_writer) { writer = nullptr; } else { writer = writer->link_newer; } return *this; } bool operator!=(const Iterator& other) const { return writer != other.writer; } }; Iterator begin() const { return Iterator(leader, last_writer); } Iterator end() const { return Iterator(nullptr, nullptr); } }; // Information kept for every waiting writer. struct Writer { WriteBatch* batch; bool sync; bool no_slowdown; bool disable_wal; bool disable_memtable; size_t batch_cnt; // if non-zero, number of sub-batches in the write batch PreReleaseCallback* pre_release_callback; uint64_t log_used; // log number that this batch was inserted into uint64_t log_ref; // log number that memtable insert should reference WriteCallback* callback; bool made_waitable; // records lazy construction of mutex and cv std::atomic state; // write under StateMutex() or pre-link WriteGroup* write_group; SequenceNumber sequence; // the sequence number to use for the first key Status status; Status callback_status; // status returned by callback->Callback() std::aligned_storage::type state_mutex_bytes; std::aligned_storage::type state_cv_bytes; Writer* link_older; // read/write only before linking, or as leader Writer* link_newer; // lazy, read/write only before linking, or as leader Writer() : batch(nullptr), sync(false), no_slowdown(false), disable_wal(false), disable_memtable(false), batch_cnt(0), pre_release_callback(nullptr), log_used(0), log_ref(0), callback(nullptr), made_waitable(false), state(STATE_INIT), write_group(nullptr), sequence(kMaxSequenceNumber), link_older(nullptr), link_newer(nullptr) {} Writer(const WriteOptions& write_options, WriteBatch* _batch, WriteCallback* _callback, uint64_t _log_ref, bool _disable_memtable, size_t _batch_cnt = 0, PreReleaseCallback* _pre_release_callback = nullptr) : batch(_batch), sync(write_options.sync), no_slowdown(write_options.no_slowdown), disable_wal(write_options.disableWAL), disable_memtable(_disable_memtable), batch_cnt(_batch_cnt), pre_release_callback(_pre_release_callback), log_used(0), log_ref(_log_ref), callback(_callback), made_waitable(false), state(STATE_INIT), write_group(nullptr), sequence(kMaxSequenceNumber), link_older(nullptr), link_newer(nullptr) {} ~Writer() { if (made_waitable) { StateMutex().~mutex(); StateCV().~condition_variable(); } } bool CheckCallback(DB* db) { if (callback != nullptr) { callback_status = callback->Callback(db); } return callback_status.ok(); } void CreateMutex() { if (!made_waitable) { // Note that made_waitable is tracked separately from state // transitions, because we can't atomically create the mutex and // link into the list. made_waitable = true; new (&state_mutex_bytes) std::mutex; new (&state_cv_bytes) std::condition_variable; } } // returns the aggregate status of this Writer Status FinalStatus() { if (!status.ok()) { // a non-ok memtable write status takes presidence assert(callback == nullptr || callback_status.ok()); return status; } else if (!callback_status.ok()) { // if the callback failed then that is the status we want // because a memtable insert should not have been attempted assert(callback != nullptr); assert(status.ok()); return callback_status; } else { // if there is no callback then we only care about // the memtable insert status assert(callback == nullptr || callback_status.ok()); return status; } } bool CallbackFailed() { return (callback != nullptr) && !callback_status.ok(); } bool ShouldWriteToMemtable() { return status.ok() && !CallbackFailed() && !disable_memtable; } bool ShouldWriteToWAL() { return status.ok() && !CallbackFailed() && !disable_wal; } // No other mutexes may be acquired while holding StateMutex(), it is // always last in the order std::mutex& StateMutex() { assert(made_waitable); return *static_cast(static_cast(&state_mutex_bytes)); } std::condition_variable& StateCV() { assert(made_waitable); return *static_cast( static_cast(&state_cv_bytes)); } }; struct AdaptationContext { const char* name; std::atomic value; explicit AdaptationContext(const char* name0) : name(name0), value(0) {} }; explicit WriteThread(const ImmutableDBOptions& db_options); virtual ~WriteThread() = default; // IMPORTANT: None of the methods in this class rely on the db mutex // for correctness. All of the methods except JoinBatchGroup and // EnterUnbatched may be called either with or without the db mutex held. // Correctness is maintained by ensuring that only a single thread is // a leader at a time. // Registers w as ready to become part of a batch group, waits until the // caller should perform some work, and returns the current state of the // writer. If w has become the leader of a write batch group, returns // STATE_GROUP_LEADER. If w has been made part of a sequential batch // group and the leader has performed the write, returns STATE_DONE. // If w has been made part of a parallel batch group and is responsible // for updating the memtable, returns STATE_PARALLEL_FOLLOWER. // // The db mutex SHOULD NOT be held when calling this function, because // it will block. // // Writer* w: Writer to be executed as part of a batch group void JoinBatchGroup(Writer* w); // Constructs a write batch group led by leader, which should be a // Writer passed to JoinBatchGroup on the current thread. // // Writer* leader: Writer that is STATE_GROUP_LEADER // WriteGroup* write_group: Out-param of group members // returns: Total batch group byte size size_t EnterAsBatchGroupLeader(Writer* leader, WriteGroup* write_group); // Unlinks the Writer-s in a batch group, wakes up the non-leaders, // and wakes up the next leader (if any). // // WriteGroup* write_group: the write group // Status status: Status of write operation void ExitAsBatchGroupLeader(WriteGroup& write_group, Status status); // Exit batch group on behalf of batch group leader. void ExitAsBatchGroupFollower(Writer* w); // Constructs a write batch group led by leader from newest_memtable_writers_ // list. The leader should either write memtable for the whole group and // call ExitAsMemTableWriter, or launch parallel memtable write through // LaunchParallelMemTableWriters. void EnterAsMemTableWriter(Writer* leader, WriteGroup* write_grup); // Memtable writer group leader, or the last finished writer in a parallel // write group, exit from the newest_memtable_writers_ list, and wake up // the next leader if needed. void ExitAsMemTableWriter(Writer* self, WriteGroup& write_group); // Causes JoinBatchGroup to return STATE_PARALLEL_FOLLOWER for all of the // non-leader members of this write batch group. Sets Writer::sequence // before waking them up. // // WriteGroup* write_group: Extra state used to coordinate the parallel add void LaunchParallelMemTableWriters(WriteGroup* write_group); // Reports the completion of w's batch to the parallel group leader, and // waits for the rest of the parallel batch to complete. Returns true // if this thread is the last to complete, and hence should advance // the sequence number and then call EarlyExitParallelGroup, false if // someone else has already taken responsibility for that. bool CompleteParallelMemTableWriter(Writer* w); // Waits for all preceding writers (unlocking mu while waiting), then // registers w as the currently proceeding writer. // // Writer* w: A Writer not eligible for batching // InstrumentedMutex* mu: The db mutex, to unlock while waiting // REQUIRES: db mutex held void EnterUnbatched(Writer* w, InstrumentedMutex* mu); // Completes a Writer begun with EnterUnbatched, unblocking subsequent // writers. void ExitUnbatched(Writer* w); // Wait for all parallel memtable writers to finish, in case pipelined // write is enabled. void WaitForMemTableWriters(); SequenceNumber UpdateLastSequence(SequenceNumber sequence) { if (sequence > last_sequence_) { last_sequence_ = sequence; } return last_sequence_; } // Insert a dummy writer at the tail of the write queue to indicate a write // stall, and fail any writers in the queue with no_slowdown set to true void BeginWriteStall(); // Remove the dummy writer and wake up waiting writers void EndWriteStall(); private: // See AwaitState. const uint64_t max_yield_usec_; const uint64_t slow_yield_usec_; // Allow multiple writers write to memtable concurrently. const bool allow_concurrent_memtable_write_; // Enable pipelined write to WAL and memtable. const bool enable_pipelined_write_; // The maximum limit of number of bytes that are written in a single batch // of WAL or memtable write. It is followed when the leader write size // is larger than 1/8 of this limit. const uint64_t max_write_batch_group_size_bytes; // Points to the newest pending writer. Only leader can remove // elements, adding can be done lock-free by anybody. std::atomic newest_writer_; // Points to the newest pending memtable writer. Used only when pipelined // write is enabled. std::atomic newest_memtable_writer_; // The last sequence that have been consumed by a writer. The sequence // is not necessary visible to reads because the writer can be ongoing. SequenceNumber last_sequence_; // A dummy writer to indicate a write stall condition. This will be inserted // at the tail of the writer queue by the leader, so newer writers can just // check for this and bail Writer write_stall_dummy_; // Mutex and condvar for writers to block on a write stall. During a write // stall, writers with no_slowdown set to false will wait on this rather // on the writer queue port::Mutex stall_mu_; port::CondVar stall_cv_; // Waits for w->state & goal_mask using w->StateMutex(). Returns // the state that satisfies goal_mask. uint8_t BlockingAwaitState(Writer* w, uint8_t goal_mask); // Blocks until w->state & goal_mask, returning the state value // that satisfied the predicate. Uses ctx to adaptively use // std::this_thread::yield() to avoid mutex overheads. ctx should be // a context-dependent static. uint8_t AwaitState(Writer* w, uint8_t goal_mask, AdaptationContext* ctx); // Set writer state and wake the writer up if it is waiting. void SetState(Writer* w, uint8_t new_state); // Links w into the newest_writer list. Return true if w was linked directly // into the leader position. Safe to call from multiple threads without // external locking. bool LinkOne(Writer* w, std::atomic* newest_writer); // Link write group into the newest_writer list as a whole, while keeping the // order of the writers unchanged. Return true if the group was linked // directly into the leader position. bool LinkGroup(WriteGroup& write_group, std::atomic* newest_writer); // Computes any missing link_newer links. Should not be called // concurrently with itself. void CreateMissingNewerLinks(Writer* head); // Starting from a pending writer, follow link_older to search for next // leader, until we hit boundary. Writer* FindNextLeader(Writer* pending_writer, Writer* boundary); // Set the leader in write_group to completed state and remove it from the // write group. void CompleteLeader(WriteGroup& write_group); // Set a follower in write_group to completed state and remove it from the // write group. void CompleteFollower(Writer* w, WriteGroup& write_group); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db_stress_tool/000077500000000000000000000000001370372246700162725ustar00rootroot00000000000000rocksdb-6.11.4/db_stress_tool/CMakeLists.txt000066400000000000000000000006361370372246700210370ustar00rootroot00000000000000add_executable(db_stress${ARTIFACT_SUFFIX} db_stress.cc db_stress_tool.cc batched_ops_stress.cc cf_consistency_stress.cc db_stress_common.cc db_stress_driver.cc db_stress_test_base.cc db_stress_shared_state.cc db_stress_gflags.cc db_stress_tool.cc no_batched_ops_stress.cc) target_link_libraries(db_stress${ARTIFACT_SUFFIX} ${ROCKSDB_LIB} ${THIRDPARTY_LIBS}) list(APPEND tool_deps db_stress) rocksdb-6.11.4/db_stress_tool/batched_ops_stress.cc000066400000000000000000000320011370372246700224530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" namespace ROCKSDB_NAMESPACE { class BatchedOpsStressTest : public StressTest { public: BatchedOpsStressTest() {} virtual ~BatchedOpsStressTest() {} // Given a key K and value V, this puts ("0"+K, "0"+V), ("1"+K, "1"+V), ... // ("9"+K, "9"+V) in DB atomically i.e in a single batch. // Also refer BatchedOpsStressTest::TestGet Status TestPut(ThreadState* thread, WriteOptions& write_opts, const ReadOptions& /* read_opts */, const std::vector& rand_column_families, const std::vector& rand_keys, char (&value)[100], std::unique_ptr& /* lock */) override { uint32_t value_base = thread->rand.Next() % thread->shared->UNKNOWN_SENTINEL; size_t sz = GenerateValue(value_base, value, sizeof(value)); Slice v(value, sz); std::string keys[10] = {"9", "8", "7", "6", "5", "4", "3", "2", "1", "0"}; std::string values[10] = {"9", "8", "7", "6", "5", "4", "3", "2", "1", "0"}; Slice value_slices[10]; WriteBatch batch; Status s; auto cfh = column_families_[rand_column_families[0]]; std::string key_str = Key(rand_keys[0]); for (int i = 0; i < 10; i++) { keys[i] += key_str; values[i] += v.ToString(); value_slices[i] = values[i]; if (FLAGS_use_merge) { batch.Merge(cfh, keys[i], value_slices[i]); } else { batch.Put(cfh, keys[i], value_slices[i]); } } s = db_->Write(write_opts, &batch); if (!s.ok()) { fprintf(stderr, "multiput error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } else { // we did 10 writes each of size sz + 1 thread->stats.AddBytesForWrites(10, (sz + 1) * 10); } return s; } // Given a key K, this deletes ("0"+K), ("1"+K),... ("9"+K) // in DB atomically i.e in a single batch. Also refer MultiGet. Status TestDelete(ThreadState* thread, WriteOptions& writeoptions, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& /* lock */) override { std::string keys[10] = {"9", "7", "5", "3", "1", "8", "6", "4", "2", "0"}; WriteBatch batch; Status s; auto cfh = column_families_[rand_column_families[0]]; std::string key_str = Key(rand_keys[0]); for (int i = 0; i < 10; i++) { keys[i] += key_str; batch.Delete(cfh, keys[i]); } s = db_->Write(writeoptions, &batch); if (!s.ok()) { fprintf(stderr, "multidelete error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } else { thread->stats.AddDeletes(10); } return s; } Status TestDeleteRange(ThreadState* /* thread */, WriteOptions& /* write_opts */, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */, std::unique_ptr& /* lock */) override { assert(false); return Status::NotSupported( "BatchedOpsStressTest does not support " "TestDeleteRange"); } void TestIngestExternalFile( ThreadState* /* thread */, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */, std::unique_ptr& /* lock */) override { assert(false); fprintf(stderr, "BatchedOpsStressTest does not support " "TestIngestExternalFile\n"); std::terminate(); } // Given a key K, this gets values for "0"+K, "1"+K,..."9"+K // in the same snapshot, and verifies that all the values are of the form // "0"+V, "1"+V,..."9"+V. // ASSUMES that BatchedOpsStressTest::TestPut was used to put (K, V) into // the DB. Status TestGet(ThreadState* thread, const ReadOptions& readoptions, const std::vector& rand_column_families, const std::vector& rand_keys) override { std::string keys[10] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; Slice key_slices[10]; std::string values[10]; ReadOptions readoptionscopy = readoptions; readoptionscopy.snapshot = db_->GetSnapshot(); std::string key_str = Key(rand_keys[0]); Slice key = key_str; auto cfh = column_families_[rand_column_families[0]]; std::string from_db; Status s; for (int i = 0; i < 10; i++) { keys[i] += key.ToString(); key_slices[i] = keys[i]; s = db_->Get(readoptionscopy, cfh, key_slices[i], &from_db); if (!s.ok() && !s.IsNotFound()) { fprintf(stderr, "get error: %s\n", s.ToString().c_str()); values[i] = ""; thread->stats.AddErrors(1); // we continue after error rather than exiting so that we can // find more errors if any } else if (s.IsNotFound()) { values[i] = ""; thread->stats.AddGets(1, 0); } else { values[i] = from_db; char expected_prefix = (keys[i])[0]; char actual_prefix = (values[i])[0]; if (actual_prefix != expected_prefix) { fprintf(stderr, "error expected prefix = %c actual = %c\n", expected_prefix, actual_prefix); } (values[i])[0] = ' '; // blank out the differing character thread->stats.AddGets(1, 1); } } db_->ReleaseSnapshot(readoptionscopy.snapshot); // Now that we retrieved all values, check that they all match for (int i = 1; i < 10; i++) { if (values[i] != values[0]) { fprintf(stderr, "error : inconsistent values for key %s: %s, %s\n", key.ToString(true).c_str(), StringToHex(values[0]).c_str(), StringToHex(values[i]).c_str()); // we continue after error rather than exiting so that we can // find more errors if any } } return s; } std::vector TestMultiGet( ThreadState* thread, const ReadOptions& readoptions, const std::vector& rand_column_families, const std::vector& rand_keys) override { size_t num_keys = rand_keys.size(); std::vector ret_status(num_keys); std::array keys = {{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}}; size_t num_prefixes = keys.size(); for (size_t rand_key = 0; rand_key < num_keys; ++rand_key) { std::vector key_slices; std::vector values(num_prefixes); std::vector statuses(num_prefixes); ReadOptions readoptionscopy = readoptions; readoptionscopy.snapshot = db_->GetSnapshot(); std::vector key_str; key_str.reserve(num_prefixes); key_slices.reserve(num_prefixes); std::string from_db; ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]]; for (size_t key = 0; key < num_prefixes; ++key) { key_str.emplace_back(keys[key] + Key(rand_keys[rand_key])); key_slices.emplace_back(key_str.back()); } db_->MultiGet(readoptionscopy, cfh, num_prefixes, key_slices.data(), values.data(), statuses.data()); for (size_t i = 0; i < num_prefixes; i++) { Status s = statuses[i]; if (!s.ok() && !s.IsNotFound()) { fprintf(stderr, "get error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); ret_status[rand_key] = s; // we continue after error rather than exiting so that we can // find more errors if any } else if (s.IsNotFound()) { thread->stats.AddGets(1, 0); ret_status[rand_key] = s; } else { char expected_prefix = (keys[i])[0]; char actual_prefix = (values[i])[0]; if (actual_prefix != expected_prefix) { fprintf(stderr, "error expected prefix = %c actual = %c\n", expected_prefix, actual_prefix); } std::string str; str.assign(values[i].data(), values[i].size()); values[i].Reset(); str[0] = ' '; // blank out the differing character values[i].PinSelf(str); thread->stats.AddGets(1, 1); } } db_->ReleaseSnapshot(readoptionscopy.snapshot); // Now that we retrieved all values, check that they all match for (size_t i = 1; i < num_prefixes; i++) { if (values[i] != values[0]) { fprintf(stderr, "error : inconsistent values for key %s: %s, %s\n", key_str[i].c_str(), StringToHex(values[0].ToString()).c_str(), StringToHex(values[i].ToString()).c_str()); // we continue after error rather than exiting so that we can // find more errors if any } } } return ret_status; } // Given a key, this does prefix scans for "0"+P, "1"+P,..."9"+P // in the same snapshot where P is the first FLAGS_prefix_size - 1 bytes // of the key. Each of these 10 scans returns a series of values; // each series should be the same length, and it is verified for each // index i that all the i'th values are of the form "0"+V, "1"+V,..."9"+V. // ASSUMES that MultiPut was used to put (K, V) Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions, const std::vector& rand_column_families, const std::vector& rand_keys) override { size_t prefix_to_use = (FLAGS_prefix_size < 0) ? 7 : static_cast(FLAGS_prefix_size); std::string key_str = Key(rand_keys[0]); Slice key = key_str; auto cfh = column_families_[rand_column_families[0]]; std::string prefixes[10] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; Slice prefix_slices[10]; ReadOptions readoptionscopy[10]; const Snapshot* snapshot = db_->GetSnapshot(); Iterator* iters[10]; std::string upper_bounds[10]; Slice ub_slices[10]; Status s = Status::OK(); for (int i = 0; i < 10; i++) { prefixes[i] += key.ToString(); prefixes[i].resize(prefix_to_use); prefix_slices[i] = Slice(prefixes[i]); readoptionscopy[i] = readoptions; readoptionscopy[i].snapshot = snapshot; if (thread->rand.OneIn(2) && GetNextPrefix(prefix_slices[i], &(upper_bounds[i]))) { // For half of the time, set the upper bound to the next prefix ub_slices[i] = Slice(upper_bounds[i]); readoptionscopy[i].iterate_upper_bound = &(ub_slices[i]); } iters[i] = db_->NewIterator(readoptionscopy[i], cfh); iters[i]->Seek(prefix_slices[i]); } long count = 0; while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) { count++; std::string values[10]; // get list of all values for this iteration for (int i = 0; i < 10; i++) { // no iterator should finish before the first one assert(iters[i]->Valid() && iters[i]->key().starts_with(prefix_slices[i])); values[i] = iters[i]->value().ToString(); char expected_first = (prefixes[i])[0]; char actual_first = (values[i])[0]; if (actual_first != expected_first) { fprintf(stderr, "error expected first = %c actual = %c\n", expected_first, actual_first); } (values[i])[0] = ' '; // blank out the differing character } // make sure all values are equivalent for (int i = 0; i < 10; i++) { if (values[i] != values[0]) { fprintf(stderr, "error : %d, inconsistent values for prefix %s: %s, %s\n", i, prefixes[i].c_str(), StringToHex(values[0]).c_str(), StringToHex(values[i]).c_str()); // we continue after error rather than exiting so that we can // find more errors if any } iters[i]->Next(); } } // cleanup iterators and snapshot for (int i = 0; i < 10; i++) { // if the first iterator finished, they should have all finished assert(!iters[i]->Valid() || !iters[i]->key().starts_with(prefix_slices[i])); assert(iters[i]->status().ok()); delete iters[i]; } db_->ReleaseSnapshot(snapshot); if (s.ok()) { thread->stats.AddPrefixes(1, count); } else { fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } return s; } void VerifyDb(ThreadState* /* thread */) const override {} }; StressTest* CreateBatchedOpsStressTest() { return new BatchedOpsStressTest(); } } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/cf_consistency_stress.cc000066400000000000000000000523241370372246700232230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" namespace ROCKSDB_NAMESPACE { class CfConsistencyStressTest : public StressTest { public: CfConsistencyStressTest() : batch_id_(0) {} ~CfConsistencyStressTest() override {} Status TestPut(ThreadState* thread, WriteOptions& write_opts, const ReadOptions& /* read_opts */, const std::vector& rand_column_families, const std::vector& rand_keys, char (&value)[100], std::unique_ptr& /* lock */) override { std::string key_str = Key(rand_keys[0]); Slice key = key_str; uint64_t value_base = batch_id_.fetch_add(1); size_t sz = GenerateValue(static_cast(value_base), value, sizeof(value)); Slice v(value, sz); WriteBatch batch; for (auto cf : rand_column_families) { ColumnFamilyHandle* cfh = column_families_[cf]; if (FLAGS_use_merge) { batch.Merge(cfh, key, v); } else { /* !FLAGS_use_merge */ batch.Put(cfh, key, v); } } Status s = db_->Write(write_opts, &batch); if (!s.ok()) { fprintf(stderr, "multi put or merge error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } else { auto num = static_cast(rand_column_families.size()); thread->stats.AddBytesForWrites(num, (sz + 1) * num); } return s; } Status TestDelete(ThreadState* thread, WriteOptions& write_opts, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& /* lock */) override { std::string key_str = Key(rand_keys[0]); Slice key = key_str; WriteBatch batch; for (auto cf : rand_column_families) { ColumnFamilyHandle* cfh = column_families_[cf]; batch.Delete(cfh, key); } Status s = db_->Write(write_opts, &batch); if (!s.ok()) { fprintf(stderr, "multidel error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } else { thread->stats.AddDeletes(static_cast(rand_column_families.size())); } return s; } Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& /* lock */) override { int64_t rand_key = rand_keys[0]; auto shared = thread->shared; int64_t max_key = shared->GetMaxKey(); if (rand_key > max_key - FLAGS_range_deletion_width) { rand_key = thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1); } std::string key_str = Key(rand_key); Slice key = key_str; std::string end_key_str = Key(rand_key + FLAGS_range_deletion_width); Slice end_key = end_key_str; WriteBatch batch; for (auto cf : rand_column_families) { ColumnFamilyHandle* cfh = column_families_[rand_column_families[cf]]; batch.DeleteRange(cfh, key, end_key); } Status s = db_->Write(write_opts, &batch); if (!s.ok()) { fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } else { thread->stats.AddRangeDeletions( static_cast(rand_column_families.size())); } return s; } void TestIngestExternalFile( ThreadState* /* thread */, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */, std::unique_ptr& /* lock */) override { assert(false); fprintf(stderr, "CfConsistencyStressTest does not support TestIngestExternalFile " "because it's not possible to verify the result\n"); std::terminate(); } Status TestGet(ThreadState* thread, const ReadOptions& readoptions, const std::vector& rand_column_families, const std::vector& rand_keys) override { std::string key_str = Key(rand_keys[0]); Slice key = key_str; Status s; bool is_consistent = true; if (thread->rand.OneIn(2)) { // 1/2 chance, does a random read from random CF auto cfh = column_families_[rand_column_families[thread->rand.Next() % rand_column_families.size()]]; std::string from_db; s = db_->Get(readoptions, cfh, key, &from_db); } else { // 1/2 chance, comparing one key is the same across all CFs const Snapshot* snapshot = db_->GetSnapshot(); ReadOptions readoptionscopy = readoptions; readoptionscopy.snapshot = snapshot; std::string value0; s = db_->Get(readoptionscopy, column_families_[rand_column_families[0]], key, &value0); if (s.ok() || s.IsNotFound()) { bool found = s.ok(); for (size_t i = 1; i < rand_column_families.size(); i++) { std::string value1; s = db_->Get(readoptionscopy, column_families_[rand_column_families[i]], key, &value1); if (!s.ok() && !s.IsNotFound()) { break; } if (!found && s.ok()) { fprintf(stderr, "Get() return different results with key %s\n", Slice(key_str).ToString(true).c_str()); fprintf(stderr, "CF %s is not found\n", column_family_names_[0].c_str()); fprintf(stderr, "CF %s returns value %s\n", column_family_names_[i].c_str(), Slice(value1).ToString(true).c_str()); is_consistent = false; } else if (found && s.IsNotFound()) { fprintf(stderr, "Get() return different results with key %s\n", Slice(key_str).ToString(true).c_str()); fprintf(stderr, "CF %s returns value %s\n", column_family_names_[0].c_str(), Slice(value0).ToString(true).c_str()); fprintf(stderr, "CF %s is not found\n", column_family_names_[i].c_str()); is_consistent = false; } else if (s.ok() && value0 != value1) { fprintf(stderr, "Get() return different results with key %s\n", Slice(key_str).ToString(true).c_str()); fprintf(stderr, "CF %s returns value %s\n", column_family_names_[0].c_str(), Slice(value0).ToString(true).c_str()); fprintf(stderr, "CF %s returns value %s\n", column_family_names_[i].c_str(), Slice(value1).ToString(true).c_str()); is_consistent = false; } if (!is_consistent) { break; } } } db_->ReleaseSnapshot(snapshot); } if (!is_consistent) { fprintf(stderr, "TestGet error: is_consistent is false\n"); thread->stats.AddErrors(1); // Fail fast to preserve the DB state. thread->shared->SetVerificationFailure(); } else if (s.ok()) { thread->stats.AddGets(1, 1); } else if (s.IsNotFound()) { thread->stats.AddGets(1, 0); } else { fprintf(stderr, "TestGet error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } return s; } std::vector TestMultiGet( ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) override { size_t num_keys = rand_keys.size(); std::vector key_str; std::vector keys; keys.reserve(num_keys); key_str.reserve(num_keys); std::vector values(num_keys); std::vector statuses(num_keys); ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]]; for (size_t i = 0; i < num_keys; ++i) { key_str.emplace_back(Key(rand_keys[i])); keys.emplace_back(key_str.back()); } db_->MultiGet(read_opts, cfh, num_keys, keys.data(), values.data(), statuses.data()); for (auto s : statuses) { if (s.ok()) { // found case thread->stats.AddGets(1, 1); } else if (s.IsNotFound()) { // not found case thread->stats.AddGets(1, 0); } else { // errors case fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } } return statuses; } Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions, const std::vector& rand_column_families, const std::vector& rand_keys) override { size_t prefix_to_use = (FLAGS_prefix_size < 0) ? 7 : static_cast(FLAGS_prefix_size); std::string key_str = Key(rand_keys[0]); Slice key = key_str; Slice prefix = Slice(key.data(), prefix_to_use); std::string upper_bound; Slice ub_slice; ReadOptions ro_copy = readoptions; // Get the next prefix first and then see if we want to set upper bound. // We'll use the next prefix in an assertion later on if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) { ub_slice = Slice(upper_bound); ro_copy.iterate_upper_bound = &ub_slice; } auto cfh = column_families_[rand_column_families[thread->rand.Next() % rand_column_families.size()]]; Iterator* iter = db_->NewIterator(ro_copy, cfh); unsigned long count = 0; for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { ++count; } assert(prefix_to_use == 0 || count <= GetPrefixKeyCount(prefix.ToString(), upper_bound)); Status s = iter->status(); if (s.ok()) { thread->stats.AddPrefixes(1, count); } else { fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } delete iter; return s; } ColumnFamilyHandle* GetControlCfh(ThreadState* thread, int /*column_family_id*/ ) override { // All column families should contain the same data. Randomly pick one. return column_families_[thread->rand.Next() % column_families_.size()]; } #ifdef ROCKSDB_LITE Status TestCheckpoint(ThreadState* /* thread */, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */) override { assert(false); fprintf(stderr, "RocksDB lite does not support " "TestCheckpoint\n"); std::terminate(); } #else Status TestCheckpoint(ThreadState* thread, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */) override { std::string checkpoint_dir = FLAGS_db + "/.checkpoint" + ToString(thread->tid); // We need to clear DB including manifest files, so make a copy Options opt_copy = options_; opt_copy.env = db_stress_env->target(); DestroyDB(checkpoint_dir, opt_copy); Checkpoint* checkpoint = nullptr; Status s = Checkpoint::Create(db_, &checkpoint); if (s.ok()) { s = checkpoint->CreateCheckpoint(checkpoint_dir); } std::vector cf_handles; DB* checkpoint_db = nullptr; if (s.ok()) { delete checkpoint; checkpoint = nullptr; Options options(options_); options.listeners.clear(); std::vector cf_descs; // TODO(ajkr): `column_family_names_` is not safe to access here when // `clear_column_family_one_in != 0`. But we can't easily switch to // `ListColumnFamilies` to get names because it won't necessarily give // the same order as `column_family_names_`. if (FLAGS_clear_column_family_one_in == 0) { for (const auto& name : column_family_names_) { cf_descs.emplace_back(name, ColumnFamilyOptions(options)); } s = DB::OpenForReadOnly(DBOptions(options), checkpoint_dir, cf_descs, &cf_handles, &checkpoint_db); } } if (checkpoint_db != nullptr) { for (auto cfh : cf_handles) { delete cfh; } cf_handles.clear(); delete checkpoint_db; checkpoint_db = nullptr; } DestroyDB(checkpoint_dir, opt_copy); if (!s.ok()) { fprintf(stderr, "A checkpoint operation failed with: %s\n", s.ToString().c_str()); } return s; } #endif // !ROCKSDB_LITE void VerifyDb(ThreadState* thread) const override { ReadOptions options(FLAGS_verify_checksum, true); // We must set total_order_seek to true because we are doing a SeekToFirst // on a column family whose memtables may support (by default) prefix-based // iterator. In this case, NewIterator with options.total_order_seek being // false returns a prefix-based iterator. Calling SeekToFirst using this // iterator causes the iterator to become invalid. That means we cannot // iterate the memtable using this iterator any more, although the memtable // contains the most up-to-date key-values. options.total_order_seek = true; const auto ss_deleter = [this](const Snapshot* ss) { db_->ReleaseSnapshot(ss); }; std::unique_ptr snapshot_guard( db_->GetSnapshot(), ss_deleter); options.snapshot = snapshot_guard.get(); assert(thread != nullptr); auto shared = thread->shared; std::vector> iters(column_families_.size()); for (size_t i = 0; i != column_families_.size(); ++i) { iters[i].reset(db_->NewIterator(options, column_families_[i])); } for (auto& iter : iters) { iter->SeekToFirst(); } size_t num = column_families_.size(); assert(num == iters.size()); std::vector statuses(num, Status::OK()); do { if (shared->HasVerificationFailedYet()) { break; } size_t valid_cnt = 0; size_t idx = 0; for (auto& iter : iters) { if (iter->Valid()) { ++valid_cnt; } else { statuses[idx] = iter->status(); } ++idx; } if (valid_cnt == 0) { Status status; for (size_t i = 0; i != num; ++i) { const auto& s = statuses[i]; if (!s.ok()) { status = s; fprintf(stderr, "Iterator on cf %s has error: %s\n", column_families_[i]->GetName().c_str(), s.ToString().c_str()); shared->SetVerificationFailure(); } } break; } else if (valid_cnt != iters.size()) { shared->SetVerificationFailure(); for (size_t i = 0; i != num; ++i) { if (!iters[i]->Valid()) { if (statuses[i].ok()) { fprintf(stderr, "Finished scanning cf %s\n", column_families_[i]->GetName().c_str()); } else { fprintf(stderr, "Iterator on cf %s has error: %s\n", column_families_[i]->GetName().c_str(), statuses[i].ToString().c_str()); } } else { fprintf(stderr, "cf %s has remaining data to scan\n", column_families_[i]->GetName().c_str()); } } break; } if (shared->HasVerificationFailedYet()) { break; } // If the program reaches here, then all column families' iterators are // still valid. if (shared->PrintingVerificationResults()) { continue; } Slice key; Slice value; int num_mismatched_cfs = 0; for (size_t i = 0; i != num; ++i) { if (i == 0) { key = iters[i]->key(); value = iters[i]->value(); } else { int cmp = key.compare(iters[i]->key()); if (cmp != 0) { ++num_mismatched_cfs; if (1 == num_mismatched_cfs) { fprintf(stderr, "Verification failed\n"); fprintf(stderr, "Latest Sequence Number: %" PRIu64 "\n", db_->GetLatestSequenceNumber()); fprintf(stderr, "[%s] %s => %s\n", column_families_[0]->GetName().c_str(), key.ToString(true /* hex */).c_str(), value.ToString(true /* hex */).c_str()); } fprintf(stderr, "[%s] %s => %s\n", column_families_[i]->GetName().c_str(), iters[i]->key().ToString(true /* hex */).c_str(), iters[i]->value().ToString(true /* hex */).c_str()); #ifndef ROCKSDB_LITE Slice begin_key; Slice end_key; if (cmp < 0) { begin_key = key; end_key = iters[i]->key(); } else { begin_key = iters[i]->key(); end_key = key; } std::vector versions; const size_t kMaxNumIKeys = 8; const auto print_key_versions = [&](ColumnFamilyHandle* cfh) { Status s = GetAllKeyVersions(db_, cfh, begin_key, end_key, kMaxNumIKeys, &versions); if (!s.ok()) { fprintf(stderr, "%s\n", s.ToString().c_str()); return; } assert(nullptr != cfh); fprintf(stderr, "Internal keys in CF '%s', [%s, %s] (max %" ROCKSDB_PRIszt ")\n", cfh->GetName().c_str(), begin_key.ToString(true /* hex */).c_str(), end_key.ToString(true /* hex */).c_str(), kMaxNumIKeys); for (const KeyVersion& kv : versions) { fprintf(stderr, " key %s seq %" PRIu64 " type %d\n", Slice(kv.user_key).ToString(true).c_str(), kv.sequence, kv.type); } }; if (1 == num_mismatched_cfs) { print_key_versions(column_families_[0]); } print_key_versions(column_families_[i]); #endif // ROCKSDB_LITE shared->SetVerificationFailure(); } } } shared->FinishPrintingVerificationResults(); for (auto& iter : iters) { iter->Next(); } } while (true); } #ifndef ROCKSDB_LITE void ContinuouslyVerifyDb(ThreadState* thread) const override { assert(thread); Status status; DB* db_ptr = cmp_db_ ? cmp_db_ : db_; const auto& cfhs = cmp_db_ ? cmp_cfhs_ : column_families_; const auto ss_deleter = [&](const Snapshot* ss) { db_ptr->ReleaseSnapshot(ss); }; std::unique_ptr snapshot_guard( db_ptr->GetSnapshot(), ss_deleter); if (cmp_db_) { status = cmp_db_->TryCatchUpWithPrimary(); } SharedState* shared = thread->shared; assert(shared); if (!status.ok()) { shared->SetShouldStopTest(); return; } assert(cmp_db_ || snapshot_guard.get()); const auto checksum_column_family = [](Iterator* iter, uint32_t* checksum) -> Status { assert(nullptr != checksum); uint32_t ret = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ret = crc32c::Extend(ret, iter->key().data(), iter->key().size()); ret = crc32c::Extend(ret, iter->value().data(), iter->value().size()); } *checksum = ret; return iter->status(); }; ReadOptions ropts; ropts.total_order_seek = true; ropts.snapshot = snapshot_guard.get(); uint32_t crc = 0; { // Compute crc for all key-values of default column family. std::unique_ptr it(db_ptr->NewIterator(ropts)); status = checksum_column_family(it.get(), &crc); } uint32_t tmp_crc = 0; if (status.ok()) { for (ColumnFamilyHandle* cfh : cfhs) { if (cfh == db_ptr->DefaultColumnFamily()) { continue; } std::unique_ptr it(db_ptr->NewIterator(ropts, cfh)); status = checksum_column_family(it.get(), &tmp_crc); if (!status.ok() || tmp_crc != crc) { break; } } } if (!status.ok() || tmp_crc != crc) { shared->SetShouldStopTest(); } } #endif // !ROCKSDB_LITE std::vector GenerateColumnFamilies( const int /* num_column_families */, int /* rand_column_family */) const override { std::vector ret; int num = static_cast(column_families_.size()); int k = 0; std::generate_n(back_inserter(ret), num, [&k]() -> int { return k++; }); return ret; } private: std::atomic batch_id_; }; StressTest* CreateCfConsistencyStressTest() { return new CfConsistencyStressTest(); } } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress.cc000066400000000000000000000013731370372246700205750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef GFLAGS #include int main() { fprintf(stderr, "Please install gflags to run rocksdb tools\n"); return 1; } #else #include int main(int argc, char** argv) { return ROCKSDB_NAMESPACE::db_stress_tool(argc, argv); } #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_common.cc000066400000000000000000000212271370372246700221450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" #include ROCKSDB_NAMESPACE::DbStressEnvWrapper* db_stress_env = nullptr; #ifndef NDEBUG // If non-null, injects read error at a rate specified by the // read_fault_one_in flag std::shared_ptr fault_fs_guard; #endif // NDEBUG enum ROCKSDB_NAMESPACE::CompressionType compression_type_e = ROCKSDB_NAMESPACE::kSnappyCompression; enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e = ROCKSDB_NAMESPACE::kSnappyCompression; enum ROCKSDB_NAMESPACE::ChecksumType checksum_type_e = ROCKSDB_NAMESPACE::kCRC32c; enum RepFactory FLAGS_rep_factory = kSkipList; std::vector sum_probs(100001); int64_t zipf_sum_size = 100000; namespace ROCKSDB_NAMESPACE { // Zipfian distribution is generated based on a pre-calculated array. // It should be used before start the stress test. // First, the probability distribution function (PDF) of this Zipfian follows // power low. P(x) = 1/(x^alpha). // So we calculate the PDF when x is from 0 to zipf_sum_size in first for loop // and add the PDF value togetger as c. So we get the total probability in c. // Next, we calculate inverse CDF of Zipfian and store the value of each in // an array (sum_probs). The rank is from 0 to zipf_sum_size. For example, for // integer k, its Zipfian CDF value is sum_probs[k]. // Third, when we need to get an integer whose probability follows Zipfian // distribution, we use a rand_seed [0,1] which follows uniform distribution // as a seed and search it in the sum_probs via binary search. When we find // the closest sum_probs[i] of rand_seed, i is the integer that in // [0, zipf_sum_size] following Zipfian distribution with parameter alpha. // Finally, we can scale i to [0, max_key] scale. // In order to avoid that hot keys are close to each other and skew towards 0, // we use Rando64 to shuffle it. void InitializeHotKeyGenerator(double alpha) { double c = 0; for (int64_t i = 1; i <= zipf_sum_size; i++) { c = c + (1.0 / std::pow(static_cast(i), alpha)); } c = 1.0 / c; sum_probs[0] = 0; for (int64_t i = 1; i <= zipf_sum_size; i++) { sum_probs[i] = sum_probs[i - 1] + c / std::pow(static_cast(i), alpha); } } // Generate one key that follows the Zipfian distribution. The skewness // is decided by the parameter alpha. Input is the rand_seed [0,1] and // the max of the key to be generated. If we directly return tmp_zipf_seed, // the closer to 0, the higher probability will be. To randomly distribute // the hot keys in [0, max_key], we use Random64 to shuffle it. int64_t GetOneHotKeyID(double rand_seed, int64_t max_key) { int64_t low = 1, mid, high = zipf_sum_size, zipf = 0; while (low <= high) { mid = (low + high) / 2; if (sum_probs[mid] >= rand_seed && sum_probs[mid - 1] < rand_seed) { zipf = mid; break; } else if (sum_probs[mid] >= rand_seed) { high = mid - 1; } else { low = mid + 1; } } int64_t tmp_zipf_seed = zipf * max_key / zipf_sum_size; Random64 rand_local(tmp_zipf_seed); return rand_local.Next() % max_key; } void PoolSizeChangeThread(void* v) { assert(FLAGS_compaction_thread_pool_adjust_interval > 0); ThreadState* thread = reinterpret_cast(v); SharedState* shared = thread->shared; while (true) { { MutexLock l(shared->GetMutex()); if (shared->ShouldStopBgThread()) { shared->IncBgThreadsFinished(); if (shared->BgThreadsFinished()) { shared->GetCondVar()->SignalAll(); } return; } } auto thread_pool_size_base = FLAGS_max_background_compactions; auto thread_pool_size_var = FLAGS_compaction_thread_pool_variations; int new_thread_pool_size = thread_pool_size_base - thread_pool_size_var + thread->rand.Next() % (thread_pool_size_var * 2 + 1); if (new_thread_pool_size < 1) { new_thread_pool_size = 1; } db_stress_env->SetBackgroundThreads(new_thread_pool_size, ROCKSDB_NAMESPACE::Env::Priority::LOW); // Sleep up to 3 seconds db_stress_env->SleepForMicroseconds( thread->rand.Next() % FLAGS_compaction_thread_pool_adjust_interval * 1000 + 1); } } void DbVerificationThread(void* v) { assert(FLAGS_continuous_verification_interval > 0); auto* thread = reinterpret_cast(v); SharedState* shared = thread->shared; StressTest* stress_test = shared->GetStressTest(); assert(stress_test != nullptr); while (true) { { MutexLock l(shared->GetMutex()); if (shared->ShouldStopBgThread()) { shared->IncBgThreadsFinished(); if (shared->BgThreadsFinished()) { shared->GetCondVar()->SignalAll(); } return; } } if (!shared->HasVerificationFailedYet()) { stress_test->ContinuouslyVerifyDb(thread); } db_stress_env->SleepForMicroseconds( thread->rand.Next() % FLAGS_continuous_verification_interval * 1000 + 1); } } void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz) { if (!FLAGS_verbose) { return; } std::string tmp; tmp.reserve(sz * 2 + 16); char buf[4]; for (size_t i = 0; i < sz; i++) { snprintf(buf, 4, "%X", value[i]); tmp.append(buf); } fprintf(stdout, "[CF %d] %" PRIi64 " == > (%" ROCKSDB_PRIszt ") %s\n", cf, key, sz, tmp.c_str()); } // Note that if hot_key_alpha != 0, it generates the key based on Zipfian // distribution. Keys are randomly scattered to [0, FLAGS_max_key]. It does // not ensure the order of the keys being generated and the keys does not have // the active range which is related to FLAGS_active_width. int64_t GenerateOneKey(ThreadState* thread, uint64_t iteration) { const double completed_ratio = static_cast(iteration) / FLAGS_ops_per_thread; const int64_t base_key = static_cast( completed_ratio * (FLAGS_max_key - FLAGS_active_width)); int64_t rand_seed = base_key + thread->rand.Next() % FLAGS_active_width; int64_t cur_key = rand_seed; if (FLAGS_hot_key_alpha != 0) { // If set the Zipfian distribution Alpha to non 0, use Zipfian double float_rand = (static_cast(thread->rand.Next() % FLAGS_max_key)) / FLAGS_max_key; cur_key = GetOneHotKeyID(float_rand, FLAGS_max_key); } return cur_key; } // Note that if hot_key_alpha != 0, it generates the key based on Zipfian // distribution. Keys being generated are in random order. // If user want to generate keys based on uniform distribution, user needs to // set hot_key_alpha == 0. It will generate the random keys in increasing // order in the key array (ensure key[i] >= key[i+1]) and constrained in a // range related to FLAGS_active_width. std::vector GenerateNKeys(ThreadState* thread, int num_keys, uint64_t iteration) { const double completed_ratio = static_cast(iteration) / FLAGS_ops_per_thread; const int64_t base_key = static_cast( completed_ratio * (FLAGS_max_key - FLAGS_active_width)); std::vector keys; keys.reserve(num_keys); int64_t next_key = base_key + thread->rand.Next() % FLAGS_active_width; keys.push_back(next_key); for (int i = 1; i < num_keys; ++i) { // Generate the key follows zipfian distribution if (FLAGS_hot_key_alpha != 0) { double float_rand = (static_cast(thread->rand.Next() % FLAGS_max_key)) / FLAGS_max_key; next_key = GetOneHotKeyID(float_rand, FLAGS_max_key); } else { // This may result in some duplicate keys next_key = next_key + thread->rand.Next() % (FLAGS_active_width - (next_key - base_key)); } keys.push_back(next_key); } return keys; } size_t GenerateValue(uint32_t rand, char* v, size_t max_sz) { size_t value_sz = ((rand % kRandomValueMaxFactor) + 1) * FLAGS_value_size_mult; assert(value_sz <= max_sz && value_sz >= sizeof(uint32_t)); (void)max_sz; *((uint32_t*)v) = rand; for (size_t i = sizeof(uint32_t); i < value_sz; i++) { v[i] = (char)(rand ^ i); } v[value_sz] = '\0'; return value_sz; // the size of the value set. } } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_common.h000066400000000000000000000437621370372246700220170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // The test uses an array to compare against values written to the database. // Keys written to the array are in 1:1 correspondence to the actual values in // the database according to the formula in the function GenerateValue. // Space is reserved in the array from 0 to FLAGS_max_key and values are // randomly written/deleted/read from those positions. During verification we // compare all the positions in the array. To shorten/elongate the running // time, you could change the settings: FLAGS_max_key, FLAGS_ops_per_thread, // (sometimes also FLAGS_threads). // // NOTE that if FLAGS_test_batches_snapshots is set, the test will have // different behavior. See comment of the flag for details. #ifdef GFLAGS #pragma once #include #include #include #include #include #include #include #include #include #include #include #include "db/db_impl/db_impl.h" #include "db/version_set.h" #include "db_stress_tool/db_stress_env_wrapper.h" #include "db_stress_tool/db_stress_listener.h" #include "db_stress_tool/db_stress_shared_state.h" #include "db_stress_tool/db_stress_test_base.h" #include "hdfs/env_hdfs.h" #include "logging/logging.h" #include "monitoring/histogram.h" #include "options/options_helper.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/utilities/backupable_db.h" #include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/db_ttl.h" #include "rocksdb/utilities/debug.h" #include "rocksdb/utilities/options_util.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/write_batch.h" #ifndef NDEBUG #include "test_util/fault_injection_test_fs.h" #endif #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" #include "util/gflags_compat.h" #include "util/mutexlock.h" #include "util/random.h" #include "util/string_util.h" #include "utilities/blob_db/blob_db.h" #include "test_util/testutil.h" #include "test_util/fault_injection_test_env.h" #include "utilities/merge_operators.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::RegisterFlagValidator; using GFLAGS_NAMESPACE::SetUsageMessage; DECLARE_uint64(seed); DECLARE_bool(read_only); DECLARE_int64(max_key); DECLARE_double(hot_key_alpha); DECLARE_int32(max_key_len); DECLARE_string(key_len_percent_dist); DECLARE_int32(key_window_scale_factor); DECLARE_int32(column_families); DECLARE_string(options_file); DECLARE_int64(active_width); DECLARE_bool(test_batches_snapshots); DECLARE_bool(atomic_flush); DECLARE_bool(test_cf_consistency); DECLARE_int32(threads); DECLARE_int32(ttl); DECLARE_int32(value_size_mult); DECLARE_int32(compaction_readahead_size); DECLARE_bool(enable_pipelined_write); DECLARE_bool(verify_before_write); DECLARE_bool(histogram); DECLARE_bool(destroy_db_initially); DECLARE_bool(verbose); DECLARE_bool(progress_reports); DECLARE_uint64(db_write_buffer_size); DECLARE_int32(write_buffer_size); DECLARE_int32(max_write_buffer_number); DECLARE_int32(min_write_buffer_number_to_merge); DECLARE_int32(max_write_buffer_number_to_maintain); DECLARE_int64(max_write_buffer_size_to_maintain); DECLARE_double(memtable_prefix_bloom_size_ratio); DECLARE_bool(memtable_whole_key_filtering); DECLARE_int32(open_files); DECLARE_int64(compressed_cache_size); DECLARE_int32(compaction_style); DECLARE_int32(num_levels); DECLARE_int32(level0_file_num_compaction_trigger); DECLARE_int32(level0_slowdown_writes_trigger); DECLARE_int32(level0_stop_writes_trigger); DECLARE_int32(block_size); DECLARE_int32(format_version); DECLARE_int32(index_block_restart_interval); DECLARE_int32(max_background_compactions); DECLARE_int32(num_bottom_pri_threads); DECLARE_int32(compaction_thread_pool_adjust_interval); DECLARE_int32(compaction_thread_pool_variations); DECLARE_int32(max_background_flushes); DECLARE_int32(universal_size_ratio); DECLARE_int32(universal_min_merge_width); DECLARE_int32(universal_max_merge_width); DECLARE_int32(universal_max_size_amplification_percent); DECLARE_int32(clear_column_family_one_in); DECLARE_int32(get_live_files_one_in); DECLARE_int32(get_sorted_wal_files_one_in); DECLARE_int32(get_current_wal_file_one_in); DECLARE_int32(set_options_one_in); DECLARE_int32(set_in_place_one_in); DECLARE_int64(cache_size); DECLARE_bool(cache_index_and_filter_blocks); DECLARE_bool(use_clock_cache); DECLARE_uint64(subcompactions); DECLARE_uint64(periodic_compaction_seconds); DECLARE_uint64(compaction_ttl); DECLARE_bool(allow_concurrent_memtable_write); DECLARE_bool(enable_write_thread_adaptive_yield); DECLARE_int32(reopen); DECLARE_double(bloom_bits); DECLARE_bool(use_block_based_filter); DECLARE_bool(partition_filters); DECLARE_int32(index_type); DECLARE_string(db); DECLARE_string(secondaries_base); DECLARE_bool(test_secondary); DECLARE_string(expected_values_path); DECLARE_bool(verify_checksum); DECLARE_bool(mmap_read); DECLARE_bool(mmap_write); DECLARE_bool(use_direct_reads); DECLARE_bool(use_direct_io_for_flush_and_compaction); DECLARE_bool(mock_direct_io); DECLARE_bool(statistics); DECLARE_bool(sync); DECLARE_bool(use_fsync); DECLARE_int32(kill_random_test); DECLARE_string(kill_prefix_blacklist); DECLARE_bool(disable_wal); DECLARE_uint64(recycle_log_file_num); DECLARE_int64(target_file_size_base); DECLARE_int32(target_file_size_multiplier); DECLARE_uint64(max_bytes_for_level_base); DECLARE_double(max_bytes_for_level_multiplier); DECLARE_int32(range_deletion_width); DECLARE_uint64(rate_limiter_bytes_per_sec); DECLARE_bool(rate_limit_bg_reads); DECLARE_uint64(sst_file_manager_bytes_per_sec); DECLARE_uint64(sst_file_manager_bytes_per_truncate); DECLARE_bool(use_txn); DECLARE_uint64(txn_write_policy); DECLARE_bool(unordered_write); DECLARE_int32(backup_one_in); DECLARE_int32(checkpoint_one_in); DECLARE_int32(ingest_external_file_one_in); DECLARE_int32(ingest_external_file_width); DECLARE_int32(compact_files_one_in); DECLARE_int32(compact_range_one_in); DECLARE_int32(flush_one_in); DECLARE_int32(pause_background_one_in); DECLARE_int32(compact_range_width); DECLARE_int32(acquire_snapshot_one_in); DECLARE_bool(compare_full_db_state_snapshot); DECLARE_uint64(snapshot_hold_ops); DECLARE_bool(long_running_snapshots); DECLARE_bool(use_multiget); DECLARE_int32(readpercent); DECLARE_int32(prefixpercent); DECLARE_int32(writepercent); DECLARE_int32(delpercent); DECLARE_int32(delrangepercent); DECLARE_int32(nooverwritepercent); DECLARE_int32(iterpercent); DECLARE_uint64(num_iterations); DECLARE_string(compression_type); DECLARE_string(bottommost_compression_type); DECLARE_int32(compression_max_dict_bytes); DECLARE_int32(compression_zstd_max_train_bytes); DECLARE_int32(compression_parallel_threads); DECLARE_string(checksum_type); DECLARE_string(hdfs); DECLARE_string(env_uri); DECLARE_uint64(ops_per_thread); DECLARE_uint64(log2_keys_per_lock); DECLARE_uint64(max_manifest_file_size); DECLARE_bool(in_place_update); DECLARE_int32(secondary_catch_up_one_in); DECLARE_string(memtablerep); DECLARE_int32(prefix_size); DECLARE_bool(use_merge); DECLARE_bool(use_full_merge_v1); DECLARE_int32(sync_wal_one_in); DECLARE_bool(avoid_unnecessary_blocking_io); DECLARE_bool(write_dbid_to_manifest); DECLARE_bool(avoid_flush_during_recovery); DECLARE_uint64(max_write_batch_group_size_bytes); DECLARE_bool(level_compaction_dynamic_level_bytes); DECLARE_int32(verify_checksum_one_in); DECLARE_int32(verify_db_one_in); DECLARE_int32(continuous_verification_interval); #ifndef ROCKSDB_LITE DECLARE_bool(use_blob_db); DECLARE_uint64(blob_db_min_blob_size); DECLARE_uint64(blob_db_bytes_per_sync); DECLARE_uint64(blob_db_file_size); DECLARE_bool(blob_db_enable_gc); DECLARE_double(blob_db_gc_cutoff); #endif // !ROCKSDB_LITE DECLARE_int32(approximate_size_one_in); DECLARE_bool(sync_fault_injection); const long KB = 1024; const int kRandomValueMaxFactor = 3; const int kValueMaxLen = 100; // wrapped posix or hdfs environment extern ROCKSDB_NAMESPACE::DbStressEnvWrapper* db_stress_env; #ifndef NDEBUG extern std::shared_ptr fault_fs_guard; #endif extern enum ROCKSDB_NAMESPACE::CompressionType compression_type_e; extern enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e; extern enum ROCKSDB_NAMESPACE::ChecksumType checksum_type_e; enum RepFactory { kSkipList, kHashSkipList, kVectorRep }; inline enum RepFactory StringToRepFactory(const char* ctype) { assert(ctype); if (!strcasecmp(ctype, "skip_list")) return kSkipList; else if (!strcasecmp(ctype, "prefix_hash")) return kHashSkipList; else if (!strcasecmp(ctype, "vector")) return kVectorRep; fprintf(stdout, "Cannot parse memreptable %s\n", ctype); return kSkipList; } extern enum RepFactory FLAGS_rep_factory; namespace ROCKSDB_NAMESPACE { inline enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType( const char* ctype) { assert(ctype); ROCKSDB_NAMESPACE::CompressionType ret_compression_type; if (!strcasecmp(ctype, "disable")) { ret_compression_type = ROCKSDB_NAMESPACE::kDisableCompressionOption; } else if (!strcasecmp(ctype, "none")) { ret_compression_type = ROCKSDB_NAMESPACE::kNoCompression; } else if (!strcasecmp(ctype, "snappy")) { ret_compression_type = ROCKSDB_NAMESPACE::kSnappyCompression; } else if (!strcasecmp(ctype, "zlib")) { ret_compression_type = ROCKSDB_NAMESPACE::kZlibCompression; } else if (!strcasecmp(ctype, "bzip2")) { ret_compression_type = ROCKSDB_NAMESPACE::kBZip2Compression; } else if (!strcasecmp(ctype, "lz4")) { ret_compression_type = ROCKSDB_NAMESPACE::kLZ4Compression; } else if (!strcasecmp(ctype, "lz4hc")) { ret_compression_type = ROCKSDB_NAMESPACE::kLZ4HCCompression; } else if (!strcasecmp(ctype, "xpress")) { ret_compression_type = ROCKSDB_NAMESPACE::kXpressCompression; } else if (!strcasecmp(ctype, "zstd")) { ret_compression_type = ROCKSDB_NAMESPACE::kZSTD; } else { fprintf(stderr, "Cannot parse compression type '%s'\n", ctype); ret_compression_type = ROCKSDB_NAMESPACE::kSnappyCompression; // default value } if (ret_compression_type != ROCKSDB_NAMESPACE::kDisableCompressionOption && !CompressionTypeSupported(ret_compression_type)) { // Use no compression will be more portable but considering this is // only a stress test and snappy is widely available. Use snappy here. ret_compression_type = ROCKSDB_NAMESPACE::kSnappyCompression; } return ret_compression_type; } inline enum ROCKSDB_NAMESPACE::ChecksumType StringToChecksumType( const char* ctype) { assert(ctype); auto iter = ROCKSDB_NAMESPACE::checksum_type_string_map.find(ctype); if (iter != ROCKSDB_NAMESPACE::checksum_type_string_map.end()) { return iter->second; } fprintf(stderr, "Cannot parse checksum type '%s'\n", ctype); return ROCKSDB_NAMESPACE::kCRC32c; } inline std::string ChecksumTypeToString(ROCKSDB_NAMESPACE::ChecksumType ctype) { auto iter = std::find_if( ROCKSDB_NAMESPACE::checksum_type_string_map.begin(), ROCKSDB_NAMESPACE::checksum_type_string_map.end(), [&](const std::pair& name_and_enum_val) { return name_and_enum_val.second == ctype; }); assert(iter != ROCKSDB_NAMESPACE::checksum_type_string_map.end()); return iter->first; } inline std::vector SplitString(std::string src) { std::vector ret; if (src.empty()) { return ret; } size_t pos = 0; size_t pos_comma; while ((pos_comma = src.find(',', pos)) != std::string::npos) { ret.push_back(src.substr(pos, pos_comma - pos)); pos = pos_comma + 1; } ret.push_back(src.substr(pos, src.length())); return ret; } #ifdef _MSC_VER #pragma warning(push) // truncation of constant value on static_cast #pragma warning(disable : 4309) #endif inline bool GetNextPrefix(const ROCKSDB_NAMESPACE::Slice& src, std::string* v) { std::string ret = src.ToString(); for (int i = static_cast(ret.size()) - 1; i >= 0; i--) { if (ret[i] != static_cast(255)) { ret[i] = ret[i] + 1; break; } else if (i != 0) { ret[i] = 0; } else { // all FF. No next prefix return false; } } *v = ret; return true; } #ifdef _MSC_VER #pragma warning(pop) #endif // convert long to a big-endian slice key extern inline std::string GetStringFromInt(int64_t val) { std::string little_endian_key; std::string big_endian_key; PutFixed64(&little_endian_key, val); assert(little_endian_key.size() == sizeof(val)); big_endian_key.resize(sizeof(val)); for (size_t i = 0; i < sizeof(val); ++i) { big_endian_key[i] = little_endian_key[sizeof(val) - 1 - i]; } return big_endian_key; } // A struct for maintaining the parameters for generating variable length keys struct KeyGenContext { // Number of adjacent keys in one cycle of key lengths uint64_t window; // Number of keys of each possible length in a given window std::vector weights; }; extern KeyGenContext key_gen_ctx; // Generate a variable length key string from the given int64 val. The // order of the keys is preserved. The key could be anywhere from 8 to // max_key_len * 8 bytes. // The algorithm picks the length based on the // offset of the val within a configured window and the distribution of the // number of keys of various lengths in that window. For example, if x, y, x are // the weights assigned to each possible key length, the keys generated would be // - {0}...{x-1} // {(x-1),0}..{(x-1),(y-1)},{(x-1),(y-1),0}..{(x-1),(y-1),(z-1)} and so on. // Additionally, a trailer of 0-7 bytes could be appended. extern inline std::string Key(int64_t val) { uint64_t window = key_gen_ctx.window; size_t levels = key_gen_ctx.weights.size(); std::string key; for (size_t level = 0; level < levels; ++level) { uint64_t weight = key_gen_ctx.weights[level]; uint64_t offset = static_cast(val) % window; uint64_t mult = static_cast(val) / window; uint64_t pfx = mult * weight + (offset >= weight ? weight - 1 : offset); key.append(GetStringFromInt(pfx)); if (offset < weight) { // Use the bottom 3 bits of offset as the number of trailing 'x's in the // key. If the next key is going to be of the next level, then skip the // trailer as it would break ordering. If the key length is already at max, // skip the trailer. if (offset < weight - 1 && level < levels - 1) { size_t trailer_len = offset & 0x7; key.append(trailer_len, 'x'); } break; } val = offset - weight; window -= weight; } return key; } // Given a string key, map it to an index into the expected values buffer extern inline bool GetIntVal(std::string big_endian_key, uint64_t* key_p) { size_t size_key = big_endian_key.size(); std::vector prefixes; assert(size_key <= key_gen_ctx.weights.size() * sizeof(uint64_t)); // Pad with zeros to make it a multiple of 8. This function may be called // with a prefix, in which case we return the first index that falls // inside or outside that prefix, dependeing on whether the prefix is // the start of upper bound of a scan unsigned int pad = sizeof(uint64_t) - (size_key % sizeof(uint64_t)); if (pad < sizeof(uint64_t)) { big_endian_key.append(pad, '\0'); size_key += pad; } std::string little_endian_key; little_endian_key.resize(size_key); for (size_t start = 0; start < size_key; start += sizeof(uint64_t)) { size_t end = start + sizeof(uint64_t); for (size_t i = 0; i < sizeof(uint64_t); ++i) { little_endian_key[start + i] = big_endian_key[end - 1 - i]; } Slice little_endian_slice = Slice(&little_endian_key[start], sizeof(uint64_t)); uint64_t pfx; if (!GetFixed64(&little_endian_slice, &pfx)) { return false; } prefixes.emplace_back(pfx); } uint64_t key = 0; for (size_t i = 0; i < prefixes.size(); ++i) { uint64_t pfx = prefixes[i]; key += (pfx / key_gen_ctx.weights[i]) * key_gen_ctx.window + pfx % key_gen_ctx.weights[i]; } *key_p = key; return true; } extern inline uint64_t GetPrefixKeyCount(const std::string& prefix, const std::string& ub) { uint64_t start = 0; uint64_t end = 0; if (!GetIntVal(prefix, &start) || !GetIntVal(ub, &end)) { return 0; } return end - start; } extern inline std::string StringToHex(const std::string& str) { std::string result = "0x"; result.append(Slice(str).ToString(true)); return result; } // Unified output format for double parameters extern inline std::string FormatDoubleParam(double param) { return std::to_string(param); } // Make sure that double parameter is a value we can reproduce by // re-inputting the value printed. extern inline void SanitizeDoubleParam(double* param) { *param = std::atof(FormatDoubleParam(*param).c_str()); } extern void PoolSizeChangeThread(void* v); extern void DbVerificationThread(void* v); extern void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz); extern int64_t GenerateOneKey(ThreadState* thread, uint64_t iteration); extern std::vector GenerateNKeys(ThreadState* thread, int num_keys, uint64_t iteration); extern size_t GenerateValue(uint32_t rand, char* v, size_t max_sz); extern StressTest* CreateCfConsistencyStressTest(); extern StressTest* CreateBatchedOpsStressTest(); extern StressTest* CreateNonBatchedOpsStressTest(); extern void InitializeHotKeyGenerator(double alpha); extern int64_t GetOneHotKeyID(double rand_seed, int64_t max_key); } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_driver.cc000066400000000000000000000115761370372246700221560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" namespace ROCKSDB_NAMESPACE { void ThreadBody(void* v) { ThreadState* thread = reinterpret_cast(v); SharedState* shared = thread->shared; if (shared->ShouldVerifyAtBeginning()) { thread->shared->GetStressTest()->VerifyDb(thread); } { MutexLock l(shared->GetMutex()); shared->IncInitialized(); if (shared->AllInitialized()) { shared->GetCondVar()->SignalAll(); } while (!shared->Started()) { shared->GetCondVar()->Wait(); } } thread->shared->GetStressTest()->OperateDb(thread); { MutexLock l(shared->GetMutex()); shared->IncOperated(); if (shared->AllOperated()) { shared->GetCondVar()->SignalAll(); } while (!shared->VerifyStarted()) { shared->GetCondVar()->Wait(); } } thread->shared->GetStressTest()->VerifyDb(thread); { MutexLock l(shared->GetMutex()); shared->IncDone(); if (shared->AllDone()) { shared->GetCondVar()->SignalAll(); } } } bool RunStressTest(StressTest* stress) { stress->InitDb(); SharedState shared(db_stress_env, stress); if (FLAGS_read_only) { stress->InitReadonlyDb(&shared); } #ifndef NDEBUG if (FLAGS_sync_fault_injection) { fault_fs_guard->SetFilesystemDirectWritable(false); } #endif uint32_t n = shared.GetNumThreads(); uint64_t now = db_stress_env->NowMicros(); fprintf(stdout, "%s Initializing worker threads\n", db_stress_env->TimeToString(now / 1000000).c_str()); std::vector threads(n); for (uint32_t i = 0; i < n; i++) { threads[i] = new ThreadState(i, &shared); db_stress_env->StartThread(ThreadBody, threads[i]); } ThreadState bg_thread(0, &shared); if (FLAGS_compaction_thread_pool_adjust_interval > 0) { db_stress_env->StartThread(PoolSizeChangeThread, &bg_thread); } ThreadState continuous_verification_thread(0, &shared); if (FLAGS_continuous_verification_interval > 0) { db_stress_env->StartThread(DbVerificationThread, &continuous_verification_thread); } // Each thread goes through the following states: // initializing -> wait for others to init -> read/populate/depopulate // wait for others to operate -> verify -> done { MutexLock l(shared.GetMutex()); while (!shared.AllInitialized()) { shared.GetCondVar()->Wait(); } if (shared.ShouldVerifyAtBeginning()) { if (shared.HasVerificationFailedYet()) { fprintf(stderr, "Crash-recovery verification failed :(\n"); } else { fprintf(stdout, "Crash-recovery verification passed :)\n"); } } now = db_stress_env->NowMicros(); fprintf(stdout, "%s Starting database operations\n", db_stress_env->TimeToString(now / 1000000).c_str()); shared.SetStart(); shared.GetCondVar()->SignalAll(); while (!shared.AllOperated()) { shared.GetCondVar()->Wait(); } now = db_stress_env->NowMicros(); if (FLAGS_test_batches_snapshots) { fprintf(stdout, "%s Limited verification already done during gets\n", db_stress_env->TimeToString((uint64_t)now / 1000000).c_str()); } else { fprintf(stdout, "%s Starting verification\n", db_stress_env->TimeToString((uint64_t)now / 1000000).c_str()); } shared.SetStartVerify(); shared.GetCondVar()->SignalAll(); while (!shared.AllDone()) { shared.GetCondVar()->Wait(); } } for (unsigned int i = 1; i < n; i++) { threads[0]->stats.Merge(threads[i]->stats); } threads[0]->stats.Report("Stress Test"); for (unsigned int i = 0; i < n; i++) { delete threads[i]; threads[i] = nullptr; } now = db_stress_env->NowMicros(); if (!FLAGS_test_batches_snapshots && !shared.HasVerificationFailedYet()) { fprintf(stdout, "%s Verification successful\n", db_stress_env->TimeToString(now / 1000000).c_str()); } stress->PrintStatistics(); if (FLAGS_compaction_thread_pool_adjust_interval > 0 || FLAGS_continuous_verification_interval > 0) { MutexLock l(shared.GetMutex()); shared.SetShouldStopBgThread(); while (!shared.BgThreadsFinished()) { shared.GetCondVar()->Wait(); } } if (!stress->VerifySecondaries()) { return false; } if (shared.HasVerificationFailedYet()) { fprintf(stderr, "Verification failed :(\n"); return false; } return true; } } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_driver.h000066400000000000000000000013271370372246700220110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef GFLAGS #pragma once #include "db_stress_tool/db_stress_test_base.h" namespace ROCKSDB_NAMESPACE { extern void ThreadBody(void* /*thread_state*/); extern bool RunStressTest(StressTest*); } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_env_wrapper.h000066400000000000000000000025331370372246700230460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef GFLAGS #pragma once #include "db_stress_tool/db_stress_common.h" namespace ROCKSDB_NAMESPACE { class DbStressEnvWrapper : public EnvWrapper { public: explicit DbStressEnvWrapper(Env* t) : EnvWrapper(t) {} Status DeleteFile(const std::string& f) override { // We determine whether it is a manifest file by searching a strong, // so that there will be false positive if the directory path contains the // keyword but it is unlikely. // Checkpoint directory needs to be exempted. if (!if_preserve_all_manifests || f.find("MANIFEST-") == std::string::npos || f.find("checkpoint") != std::string::npos) { return target()->DeleteFile(f); } return Status::OK(); } // If true, all manifest files will not be delted in DeleteFile(). bool if_preserve_all_manifests = true; }; } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_gflags.cc000066400000000000000000000727431370372246700221310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" static bool ValidateUint32Range(const char* flagname, uint64_t value) { if (value > std::numeric_limits::max()) { fprintf(stderr, "Invalid value for --%s: %lu, overflow\n", flagname, (unsigned long)value); return false; } return true; } DEFINE_uint64(seed, 2341234, "Seed for PRNG"); static const bool FLAGS_seed_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_seed, &ValidateUint32Range); DEFINE_bool(read_only, false, "True if open DB in read-only mode during tests"); DEFINE_int64(max_key, 1 * KB * KB, "Max number of key/values to place in database"); DEFINE_int32(max_key_len, 3, "Maximum length of a key in 8-byte units"); DEFINE_string(key_len_percent_dist, "", "Percentages of keys of various lengths. For example, 1,30,69 " "means 1% of keys are 8 bytes, 30% are 16 bytes, and 69% are " "24 bytes. If not specified, it will be evenly distributed"); DEFINE_int32(key_window_scale_factor, 10, "This value will be multiplied by 100 to come up with a window " "size for varying the key length"); DEFINE_int32(column_families, 10, "Number of column families"); DEFINE_double( hot_key_alpha, 0, "Use Zipfian distribution to generate the key " "distribution. If it is not specified, write path will use random " "distribution to generate the keys. The parameter is [0, double_max]). " "However, the larger alpha is, the more shewed will be. If alpha is " "larger than 2, it is likely that only 1 key will be accessed. The " "Recommended value is [0.8-1.5]. The distribution is also related to " "max_key and total iterations of generating the hot key. "); DEFINE_string( options_file, "", "The path to a RocksDB options file. If specified, then db_stress will " "run with the RocksDB options in the default column family of the " "specified options file. Note that, when an options file is provided, " "db_stress will ignore the flag values for all options that may be passed " "via options file."); DEFINE_int64( active_width, 0, "Number of keys in active span of the key-range at any given time. The " "span begins with its left endpoint at key 0, gradually moves rightwards, " "and ends with its right endpoint at max_key. If set to 0, active_width " "will be sanitized to be equal to max_key."); // TODO(noetzli) Add support for single deletes DEFINE_bool(test_batches_snapshots, false, "If set, the test uses MultiGet(), MultiPut() and MultiDelete()" " which read/write/delete multiple keys in a batch. In this mode," " we do not verify db content by comparing the content with the " "pre-allocated array. Instead, we do partial verification inside" " MultiGet() by checking various values in a batch. Benefit of" " this mode:\n" "\t(a) No need to acquire mutexes during writes (less cache " "flushes in multi-core leading to speed up)\n" "\t(b) No long validation at the end (more speed up)\n" "\t(c) Test snapshot and atomicity of batch writes"); DEFINE_bool(atomic_flush, false, "If set, enables atomic flush in the options.\n"); DEFINE_bool(test_cf_consistency, false, "If set, runs the stress test dedicated to verifying writes to " "multiple column families are consistent. Setting this implies " "`atomic_flush=true` is set true if `disable_wal=false`.\n"); DEFINE_int32(threads, 32, "Number of concurrent threads to run."); DEFINE_int32(ttl, -1, "Opens the db with this ttl value if this is not -1. " "Carefully specify a large value such that verifications on " "deleted values don't fail"); DEFINE_int32(value_size_mult, 8, "Size of value will be this number times rand_int(1,3) bytes"); DEFINE_int32(compaction_readahead_size, 0, "Compaction readahead size"); DEFINE_bool(enable_pipelined_write, false, "Pipeline WAL/memtable writes"); DEFINE_bool(verify_before_write, false, "Verify before write"); DEFINE_bool(histogram, false, "Print histogram of operation timings"); DEFINE_bool(destroy_db_initially, true, "Destroys the database dir before start if this is true"); DEFINE_bool(verbose, false, "Verbose"); DEFINE_bool(progress_reports, true, "If true, db_stress will report number of finished operations"); DEFINE_uint64(db_write_buffer_size, ROCKSDB_NAMESPACE::Options().db_write_buffer_size, "Number of bytes to buffer in all memtables before compacting"); DEFINE_int32( write_buffer_size, static_cast(ROCKSDB_NAMESPACE::Options().write_buffer_size), "Number of bytes to buffer in memtable before compacting"); DEFINE_int32(max_write_buffer_number, ROCKSDB_NAMESPACE::Options().max_write_buffer_number, "The number of in-memory memtables. " "Each memtable is of size FLAGS_write_buffer_size."); DEFINE_int32(min_write_buffer_number_to_merge, ROCKSDB_NAMESPACE::Options().min_write_buffer_number_to_merge, "The minimum number of write buffers that will be merged together " "before writing to storage. This is cheap because it is an " "in-memory merge. If this feature is not enabled, then all these " "write buffers are flushed to L0 as separate files and this " "increases read amplification because a get request has to check " "in all of these files. Also, an in-memory merge may result in " "writing less data to storage if there are duplicate records in" " each of these individual write buffers."); DEFINE_int32(max_write_buffer_number_to_maintain, ROCKSDB_NAMESPACE::Options().max_write_buffer_number_to_maintain, "The total maximum number of write buffers to maintain in memory " "including copies of buffers that have already been flushed. " "Unlike max_write_buffer_number, this parameter does not affect " "flushing. This controls the minimum amount of write history " "that will be available in memory for conflict checking when " "Transactions are used. If this value is too low, some " "transactions may fail at commit time due to not being able to " "determine whether there were any write conflicts. Setting this " "value to 0 will cause write buffers to be freed immediately " "after they are flushed. If this value is set to -1, " "'max_write_buffer_number' will be used."); DEFINE_int64(max_write_buffer_size_to_maintain, ROCKSDB_NAMESPACE::Options().max_write_buffer_size_to_maintain, "The total maximum size of write buffers to maintain in memory " "including copies of buffers that have already been flushed. " "Unlike max_write_buffer_number, this parameter does not affect " "flushing. This controls the minimum amount of write history " "that will be available in memory for conflict checking when " "Transactions are used. If this value is too low, some " "transactions may fail at commit time due to not being able to " "determine whether there were any write conflicts. Setting this " "value to 0 will cause write buffers to be freed immediately " "after they are flushed. If this value is set to -1, " "'max_write_buffer_number' will be used."); DEFINE_double(memtable_prefix_bloom_size_ratio, ROCKSDB_NAMESPACE::Options().memtable_prefix_bloom_size_ratio, "creates prefix blooms for memtables, each with size " "`write_buffer_size * memtable_prefix_bloom_size_ratio`."); DEFINE_bool(memtable_whole_key_filtering, ROCKSDB_NAMESPACE::Options().memtable_whole_key_filtering, "Enable whole key filtering in memtables."); DEFINE_int32(open_files, ROCKSDB_NAMESPACE::Options().max_open_files, "Maximum number of files to keep open at the same time " "(use default if == 0)"); DEFINE_int64(compressed_cache_size, -1, "Number of bytes to use as a cache of compressed data." " Negative means use default settings."); DEFINE_int32(compaction_style, ROCKSDB_NAMESPACE::Options().compaction_style, ""); DEFINE_int32(num_levels, ROCKSDB_NAMESPACE::Options().num_levels, "Number of levels in the DB"); DEFINE_int32(level0_file_num_compaction_trigger, ROCKSDB_NAMESPACE::Options().level0_file_num_compaction_trigger, "Level0 compaction start trigger"); DEFINE_int32(level0_slowdown_writes_trigger, ROCKSDB_NAMESPACE::Options().level0_slowdown_writes_trigger, "Number of files in level-0 that will slow down writes"); DEFINE_int32(level0_stop_writes_trigger, ROCKSDB_NAMESPACE::Options().level0_stop_writes_trigger, "Number of files in level-0 that will trigger put stop."); DEFINE_int32(block_size, static_cast( ROCKSDB_NAMESPACE::BlockBasedTableOptions().block_size), "Number of bytes in a block."); DEFINE_int32(format_version, static_cast( ROCKSDB_NAMESPACE::BlockBasedTableOptions().format_version), "Format version of SST files."); DEFINE_int32( index_block_restart_interval, ROCKSDB_NAMESPACE::BlockBasedTableOptions().index_block_restart_interval, "Number of keys between restart points " "for delta encoding of keys in index block."); DEFINE_int32(max_background_compactions, ROCKSDB_NAMESPACE::Options().max_background_compactions, "The maximum number of concurrent background compactions " "that can occur in parallel."); DEFINE_int32(num_bottom_pri_threads, 0, "The number of threads in the bottom-priority thread pool (used " "by universal compaction only)."); DEFINE_int32(compaction_thread_pool_adjust_interval, 0, "The interval (in milliseconds) to adjust compaction thread pool " "size. Don't change it periodically if the value is 0."); DEFINE_int32(compaction_thread_pool_variations, 2, "Range of background thread pool size variations when adjusted " "periodically."); DEFINE_int32(max_background_flushes, ROCKSDB_NAMESPACE::Options().max_background_flushes, "The maximum number of concurrent background flushes " "that can occur in parallel."); DEFINE_int32(universal_size_ratio, 0, "The ratio of file sizes that trigger" " compaction in universal style"); DEFINE_int32(universal_min_merge_width, 0, "The minimum number of files to " "compact in universal style compaction"); DEFINE_int32(universal_max_merge_width, 0, "The max number of files to compact" " in universal style compaction"); DEFINE_int32(universal_max_size_amplification_percent, 0, "The max size amplification for universal style compaction"); DEFINE_int32(clear_column_family_one_in, 1000000, "With a chance of 1/N, delete a column family and then recreate " "it again. If N == 0, never drop/create column families. " "When test_batches_snapshots is true, this flag has no effect"); DEFINE_int32(get_live_files_one_in, 1000000, "With a chance of 1/N, call GetLiveFiles to verify if it returns " "correctly. If N == 0, do not call the interface."); DEFINE_int32( get_sorted_wal_files_one_in, 1000000, "With a chance of 1/N, call GetSortedWalFiles to verify if it returns " "correctly. (Note that this API may legitimately return an error.) If N == " "0, do not call the interface."); DEFINE_int32( get_current_wal_file_one_in, 1000000, "With a chance of 1/N, call GetCurrentWalFile to verify if it returns " "correctly. (Note that this API may legitimately return an error.) If N == " "0, do not call the interface."); DEFINE_int32(set_options_one_in, 0, "With a chance of 1/N, change some random options"); DEFINE_int32(set_in_place_one_in, 0, "With a chance of 1/N, toggle in place support option"); DEFINE_int64(cache_size, 2LL * KB * KB * KB, "Number of bytes to use as a cache of uncompressed data."); DEFINE_bool(cache_index_and_filter_blocks, false, "True if indexes/filters should be cached in block cache."); DEFINE_bool(use_clock_cache, false, "Replace default LRU block cache with clock cache."); DEFINE_uint64(subcompactions, 1, "Maximum number of subcompactions to divide L0-L1 compactions " "into."); DEFINE_uint64(periodic_compaction_seconds, 1000, "Files older than this value will be picked up for compaction."); DEFINE_uint64(compaction_ttl, 1000, "Files older than TTL will be compacted to the next level."); DEFINE_bool(allow_concurrent_memtable_write, false, "Allow multi-writers to update mem tables in parallel."); DEFINE_bool(enable_write_thread_adaptive_yield, true, "Use a yielding spin loop for brief writer thread waits."); #ifndef ROCKSDB_LITE // BlobDB Options DEFINE_bool(use_blob_db, false, "Use BlobDB."); DEFINE_uint64(blob_db_min_blob_size, ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().min_blob_size, "Smallest blob to store in a file. Blobs smaller than this " "will be inlined with the key in the LSM tree."); DEFINE_uint64(blob_db_bytes_per_sync, ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().bytes_per_sync, "Sync blob files once per every N bytes written."); DEFINE_uint64(blob_db_file_size, ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().blob_file_size, "Target size of each blob file."); DEFINE_bool( blob_db_enable_gc, ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().enable_garbage_collection, "Enable BlobDB garbage collection."); DEFINE_double( blob_db_gc_cutoff, ROCKSDB_NAMESPACE::blob_db::BlobDBOptions().garbage_collection_cutoff, "Cutoff ratio for BlobDB garbage collection."); #endif // !ROCKSDB_LITE static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range); static bool ValidateInt32Positive(const char* flagname, int32_t value) { if (value < 0) { fprintf(stderr, "Invalid value for --%s: %d, must be >=0\n", flagname, value); return false; } return true; } DEFINE_int32(reopen, 10, "Number of times database reopens"); static const bool FLAGS_reopen_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_reopen, &ValidateInt32Positive); DEFINE_double(bloom_bits, 10, "Bloom filter bits per key. " "Negative means use default settings."); DEFINE_bool(use_block_based_filter, false, "use block based filter" "instead of full filter for block based table"); DEFINE_bool(partition_filters, false, "use partitioned filters " "for block-based table"); DEFINE_int32( index_type, static_cast( ROCKSDB_NAMESPACE::BlockBasedTableOptions::kBinarySearch), "Type of block-based table index (see `enum IndexType` in table.h)"); DEFINE_string(db, "", "Use the db with the following name."); DEFINE_string(secondaries_base, "", "Use this path as the base path for secondary instances."); DEFINE_bool(test_secondary, false, "Test secondary instance."); DEFINE_string( expected_values_path, "", "File where the array of expected uint32_t values will be stored. If " "provided and non-empty, the DB state will be verified against these " "values after recovery. --max_key and --column_family must be kept the " "same across invocations of this program that use the same " "--expected_values_path."); DEFINE_bool(verify_checksum, false, "Verify checksum for every block read from storage"); DEFINE_bool(mmap_read, ROCKSDB_NAMESPACE::Options().allow_mmap_reads, "Allow reads to occur via mmap-ing files"); DEFINE_bool(mmap_write, ROCKSDB_NAMESPACE::Options().allow_mmap_writes, "Allow writes to occur via mmap-ing files"); DEFINE_bool(use_direct_reads, ROCKSDB_NAMESPACE::Options().use_direct_reads, "Use O_DIRECT for reading data"); DEFINE_bool(use_direct_io_for_flush_and_compaction, ROCKSDB_NAMESPACE::Options().use_direct_io_for_flush_and_compaction, "Use O_DIRECT for writing data"); DEFINE_bool(mock_direct_io, false, "Mock direct IO by not using O_DIRECT for direct IO read"); DEFINE_bool(statistics, false, "Create database statistics"); DEFINE_bool(sync, false, "Sync all writes to disk"); DEFINE_bool(use_fsync, false, "If true, issue fsync instead of fdatasync"); DEFINE_int32(kill_random_test, 0, "If non-zero, kill at various points in source code with " "probability 1/this"); static const bool FLAGS_kill_random_test_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_kill_random_test, &ValidateInt32Positive); extern int rocksdb_kill_odds; DEFINE_string(kill_prefix_blacklist, "", "If non-empty, kill points with prefix in the list given will be" " skipped. Items are comma-separated."); extern std::vector rocksdb_kill_prefix_blacklist; DEFINE_bool(disable_wal, false, "If true, do not write WAL for write."); DEFINE_uint64(recycle_log_file_num, ROCKSDB_NAMESPACE::Options().recycle_log_file_num, "Number of old WAL files to keep around for later recycling"); DEFINE_int64(target_file_size_base, ROCKSDB_NAMESPACE::Options().target_file_size_base, "Target level-1 file size for compaction"); DEFINE_int32(target_file_size_multiplier, 1, "A multiplier to compute target level-N file size (N >= 2)"); DEFINE_uint64(max_bytes_for_level_base, ROCKSDB_NAMESPACE::Options().max_bytes_for_level_base, "Max bytes for level-1"); DEFINE_double(max_bytes_for_level_multiplier, 2, "A multiplier to compute max bytes for level-N (N >= 2)"); DEFINE_int32(range_deletion_width, 10, "The width of the range deletion intervals."); DEFINE_uint64(rate_limiter_bytes_per_sec, 0, "Set options.rate_limiter value."); DEFINE_bool(rate_limit_bg_reads, false, "Use options.rate_limiter on compaction reads"); DEFINE_uint64(sst_file_manager_bytes_per_sec, 0, "Set `Options::sst_file_manager` to delete at this rate. By " "default the deletion rate is unbounded."); DEFINE_uint64(sst_file_manager_bytes_per_truncate, 0, "Set `Options::sst_file_manager` to delete in chunks of this " "many bytes. By default whole files will be deleted."); DEFINE_bool(use_txn, false, "Use TransactionDB. Currently the default write policy is " "TxnDBWritePolicy::WRITE_PREPARED"); DEFINE_uint64(txn_write_policy, 0, "The transaction write policy. Default is " "TxnDBWritePolicy::WRITE_COMMITTED. Note that this should not be " "changed accross crashes."); DEFINE_bool(unordered_write, false, "Turn on the unordered_write feature. This options is currently " "tested only in combination with use_txn=true and " "txn_write_policy=TxnDBWritePolicy::WRITE_PREPARED."); DEFINE_int32(backup_one_in, 0, "If non-zero, then CreateNewBackup() will be called once for " "every N operations on average. 0 indicates CreateNewBackup() " "is disabled."); DEFINE_int32(checkpoint_one_in, 0, "If non-zero, then CreateCheckpoint() will be called once for " "every N operations on average. 0 indicates CreateCheckpoint() " "is disabled."); DEFINE_int32(ingest_external_file_one_in, 0, "If non-zero, then IngestExternalFile() will be called once for " "every N operations on average. 0 indicates IngestExternalFile() " "is disabled."); DEFINE_int32(ingest_external_file_width, 1000, "The width of the ingested external files."); DEFINE_int32(compact_files_one_in, 0, "If non-zero, then CompactFiles() will be called once for every N " "operations on average. 0 indicates CompactFiles() is disabled."); DEFINE_int32(compact_range_one_in, 0, "If non-zero, then CompactRange() will be called once for every N " "operations on average. 0 indicates CompactRange() is disabled."); DEFINE_int32(flush_one_in, 0, "If non-zero, then Flush() will be called once for every N ops " "on average. 0 indicates calls to Flush() are disabled."); DEFINE_int32(pause_background_one_in, 0, "If non-zero, then PauseBackgroundWork()+Continue will be called " "once for every N ops on average. 0 disables."); DEFINE_int32(compact_range_width, 10000, "The width of the ranges passed to CompactRange()."); DEFINE_int32(acquire_snapshot_one_in, 0, "If non-zero, then acquires a snapshot once every N operations on " "average."); DEFINE_bool(compare_full_db_state_snapshot, false, "If set we compare state of entire db (in one of the threads) with" "each snapshot."); DEFINE_uint64(snapshot_hold_ops, 0, "If non-zero, then releases snapshots N operations after they're " "acquired."); DEFINE_bool(long_running_snapshots, false, "If set, hold on some some snapshots for much longer time."); DEFINE_bool(use_multiget, false, "If set, use the batched MultiGet API for reads"); static bool ValidateInt32Percent(const char* flagname, int32_t value) { if (value < 0 || value > 100) { fprintf(stderr, "Invalid value for --%s: %d, 0<= pct <=100 \n", flagname, value); return false; } return true; } DEFINE_int32(readpercent, 10, "Ratio of reads to total workload (expressed as a percentage)"); static const bool FLAGS_readpercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_readpercent, &ValidateInt32Percent); DEFINE_int32(prefixpercent, 20, "Ratio of prefix iterators to total workload (expressed as a" " percentage)"); static const bool FLAGS_prefixpercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_prefixpercent, &ValidateInt32Percent); DEFINE_int32(writepercent, 45, "Ratio of writes to total workload (expressed as a percentage)"); static const bool FLAGS_writepercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_writepercent, &ValidateInt32Percent); DEFINE_int32(delpercent, 15, "Ratio of deletes to total workload (expressed as a percentage)"); static const bool FLAGS_delpercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_delpercent, &ValidateInt32Percent); DEFINE_int32(delrangepercent, 0, "Ratio of range deletions to total workload (expressed as a " "percentage). Cannot be used with test_batches_snapshots"); static const bool FLAGS_delrangepercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_delrangepercent, &ValidateInt32Percent); DEFINE_int32(nooverwritepercent, 60, "Ratio of keys without overwrite to total workload (expressed as " " a percentage)"); static const bool FLAGS_nooverwritepercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_nooverwritepercent, &ValidateInt32Percent); DEFINE_int32(iterpercent, 10, "Ratio of iterations to total workload" " (expressed as a percentage)"); static const bool FLAGS_iterpercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_iterpercent, &ValidateInt32Percent); DEFINE_uint64(num_iterations, 10, "Number of iterations per MultiIterate run"); static const bool FLAGS_num_iterations_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_num_iterations, &ValidateUint32Range); DEFINE_string(compression_type, "snappy", "Algorithm to use to compress the database"); DEFINE_int32(compression_max_dict_bytes, 0, "Maximum size of dictionary used to prime the compression " "library."); DEFINE_int32(compression_zstd_max_train_bytes, 0, "Maximum size of training data passed to zstd's dictionary " "trainer."); DEFINE_int32(compression_parallel_threads, 1, "Number of threads for parallel compression."); DEFINE_string(bottommost_compression_type, "disable", "Algorithm to use to compress bottommost level of the database. " "\"disable\" means disabling the feature"); DEFINE_string(checksum_type, "kCRC32c", "Algorithm to use to checksum blocks"); DEFINE_string(hdfs, "", "Name of hdfs environment"); DEFINE_string(env_uri, "", "URI for env lookup. Mutually exclusive with --hdfs"); DEFINE_uint64(ops_per_thread, 1200000, "Number of operations per thread."); static const bool FLAGS_ops_per_thread_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_ops_per_thread, &ValidateUint32Range); DEFINE_uint64(log2_keys_per_lock, 2, "Log2 of number of keys per lock"); static const bool FLAGS_log2_keys_per_lock_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_log2_keys_per_lock, &ValidateUint32Range); DEFINE_uint64(max_manifest_file_size, 16384, "Maximum size of a MANIFEST file"); DEFINE_bool(in_place_update, false, "On true, does inplace update in memtable"); DEFINE_int32(secondary_catch_up_one_in, 0, "If non-zero, the secondaries attemp to catch up with the primary " "once for every N operations on average. 0 indicates the " "secondaries do not try to catch up after open."); DEFINE_string(memtablerep, "skip_list", ""); inline static bool ValidatePrefixSize(const char* flagname, int32_t value) { if (value < -1 || value > 8) { fprintf(stderr, "Invalid value for --%s: %d. -1 <= PrefixSize <= 8\n", flagname, value); return false; } return true; } DEFINE_int32(prefix_size, 7, "Control the prefix size for HashSkipListRep. " "-1 is disabled."); static const bool FLAGS_prefix_size_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_prefix_size, &ValidatePrefixSize); DEFINE_bool(use_merge, false, "On true, replaces all writes with a Merge " "that behaves like a Put"); DEFINE_bool(use_full_merge_v1, false, "On true, use a merge operator that implement the deprecated " "version of FullMerge"); DEFINE_int32(sync_wal_one_in, 0, "If non-zero, then SyncWAL() will be called once for every N ops " "on average. 0 indicates that calls to SyncWAL() are disabled."); DEFINE_bool(avoid_unnecessary_blocking_io, ROCKSDB_NAMESPACE::Options().avoid_unnecessary_blocking_io, "If true, some expensive cleaning up operations will be moved from " "user reads to high-pri background threads."); DEFINE_bool(write_dbid_to_manifest, ROCKSDB_NAMESPACE::Options().write_dbid_to_manifest, "Write DB_ID to manifest"); DEFINE_bool(avoid_flush_during_recovery, ROCKSDB_NAMESPACE::Options().avoid_flush_during_recovery, "Avoid flush during recovery"); DEFINE_uint64(max_write_batch_group_size_bytes, ROCKSDB_NAMESPACE::Options().max_write_batch_group_size_bytes, "Max write batch group size"); DEFINE_bool(level_compaction_dynamic_level_bytes, ROCKSDB_NAMESPACE::Options().level_compaction_dynamic_level_bytes, "Use dynamic level"); DEFINE_int32(verify_checksum_one_in, 0, "If non-zero, then DB::VerifyChecksum() will be called to do" " checksum verification of all the files in the database once for" " every N ops on average. 0 indicates that calls to" " VerifyChecksum() are disabled."); DEFINE_int32(verify_db_one_in, 0, "If non-zero, call VerifyDb() once for every N ops. 0 indicates " "that VerifyDb() will not be called in OperateDb(). Note that " "enabling this can slow down tests."); DEFINE_int32(continuous_verification_interval, 1000, "While test is running, verify db every N milliseconds. 0 " "disables continuous verification."); DEFINE_int32(approximate_size_one_in, 64, "If non-zero, DB::GetApproximateSizes() will be called against" " random key ranges."); DEFINE_int32(read_fault_one_in, 1000, "On non-zero, enables fault injection on read"); DEFINE_bool(sync_fault_injection, false, "If true, FaultInjectionTestFS will be used for write operations, " " and unsynced data in DB will lost after crash."); #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_listener.h000066400000000000000000000147431370372246700223510ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifdef GFLAGS #pragma once #include "rocksdb/listener.h" #include "util/gflags_compat.h" DECLARE_int32(compact_files_one_in); namespace ROCKSDB_NAMESPACE { class DbStressListener : public EventListener { public: DbStressListener(const std::string& db_name, const std::vector& db_paths, const std::vector& column_families) : db_name_(db_name), db_paths_(db_paths), column_families_(column_families), num_pending_file_creations_(0) {} #ifndef ROCKSDB_LITE ~DbStressListener() override { assert(num_pending_file_creations_ == 0); } void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { assert(IsValidColumnFamilyName(info.cf_name)); VerifyFilePath(info.file_path); // pretending doing some work here RandomSleep(); } void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*flush_job_info*/) override { RandomSleep(); } void OnTableFileDeleted(const TableFileDeletionInfo& /*info*/) override { RandomSleep(); } void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& /*ci*/) override { RandomSleep(); } void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { assert(IsValidColumnFamilyName(ci.cf_name)); assert(ci.input_files.size() + ci.output_files.size() > 0U); for (const auto& file_path : ci.input_files) { VerifyFilePath(file_path); } for (const auto& file_path : ci.output_files) { VerifyFilePath(file_path); } // pretending doing some work here RandomSleep(); } void OnTableFileCreationStarted( const TableFileCreationBriefInfo& /*info*/) override { ++num_pending_file_creations_; } void OnTableFileCreated(const TableFileCreationInfo& info) override { assert(info.db_name == db_name_); assert(IsValidColumnFamilyName(info.cf_name)); if (info.file_size) { VerifyFilePath(info.file_path); } assert(info.job_id > 0 || FLAGS_compact_files_one_in > 0); if (info.status.ok() && info.file_size > 0) { assert(info.table_properties.data_size > 0 || info.table_properties.num_range_deletions > 0); assert(info.table_properties.raw_key_size > 0); assert(info.table_properties.num_entries > 0); } --num_pending_file_creations_; } void OnMemTableSealed(const MemTableInfo& /*info*/) override { RandomSleep(); } void OnColumnFamilyHandleDeletionStarted( ColumnFamilyHandle* /*handle*/) override { RandomSleep(); } void OnExternalFileIngested( DB* /*db*/, const ExternalFileIngestionInfo& /*info*/) override { RandomSleep(); } void OnBackgroundError(BackgroundErrorReason /* reason */, Status* /* bg_error */) override { RandomSleep(); } void OnStallConditionsChanged(const WriteStallInfo& /*info*/) override { RandomSleep(); } void OnFileReadFinish(const FileOperationInfo& info) override { // Even empty callback is valuable because sometimes some locks are // released in order to make the callback. // Sleep carefully here as it is a frequent operation and we don't want // to slow down the tests. We always sleep when the read is large. // When read is small, sleep in a small chance. size_t length_read = info.length; if (length_read >= 1000000 || Random::GetTLSInstance()->OneIn(1000)) { RandomSleep(); } } void OnFileWriteFinish(const FileOperationInfo& info) override { // Even empty callback is valuable because sometimes some locks are // released in order to make the callback. // Sleep carefully here as it is a frequent operation and we don't want // to slow down the tests. When the write is large, always sleep. // Otherwise, sleep in a relatively small chance. size_t length_write = info.length; if (length_write >= 1000000 || Random::GetTLSInstance()->OneIn(64)) { RandomSleep(); } } bool ShouldBeNotifiedOnFileIO() override { RandomSleep(); return static_cast(Random::GetTLSInstance()->OneIn(1)); } void OnErrorRecoveryBegin(BackgroundErrorReason /* reason */, Status /* bg_error */, bool* /* auto_recovery */) override { RandomSleep(); } void OnErrorRecoveryCompleted(Status /* old_bg_error */) override { RandomSleep(); } protected: bool IsValidColumnFamilyName(const std::string& cf_name) const { if (cf_name == kDefaultColumnFamilyName) { return true; } // The column family names in the stress tests are numbers. for (size_t i = 0; i < cf_name.size(); ++i) { if (cf_name[i] < '0' || cf_name[i] > '9') { return false; } } return true; } void VerifyFileDir(const std::string& file_dir) { #ifndef NDEBUG if (db_name_ == file_dir) { return; } for (const auto& db_path : db_paths_) { if (db_path.path == file_dir) { return; } } for (auto& cf : column_families_) { for (const auto& cf_path : cf.options.cf_paths) { if (cf_path.path == file_dir) { return; } } } assert(false); #else (void)file_dir; #endif // !NDEBUG } void VerifyFileName(const std::string& file_name) { #ifndef NDEBUG uint64_t file_number; FileType file_type; bool result = ParseFileName(file_name, &file_number, &file_type); assert(result); assert(file_type == kTableFile); #else (void)file_name; #endif // !NDEBUG } void VerifyFilePath(const std::string& file_path) { #ifndef NDEBUG size_t pos = file_path.find_last_of("/"); if (pos == std::string::npos) { VerifyFileName(file_path); } else { if (pos > 0) { VerifyFileDir(file_path.substr(0, pos)); } VerifyFileName(file_path.substr(pos)); } #else (void)file_path; #endif // !NDEBUG } void RandomSleep() { std::this_thread::sleep_for( std::chrono::microseconds(Random::GetTLSInstance()->Uniform(5000))); } #endif // !ROCKSDB_LITE private: std::string db_name_; std::vector db_paths_; std::vector column_families_; std::atomic num_pending_file_creations_; }; } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_shared_state.cc000066400000000000000000000017771370372246700233330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // #ifdef GFLAGS #include "db_stress_tool/db_stress_shared_state.h" namespace ROCKSDB_NAMESPACE { const uint32_t SharedState::UNKNOWN_SENTINEL = 0xfffffffe; const uint32_t SharedState::DELETION_SENTINEL = 0xffffffff; #if defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #if defined(OS_SOLARIS) __thread bool SharedState::ignore_read_error; #else thread_local bool SharedState::ignore_read_error; #endif // OS_SOLARIS #else bool SharedState::ignore_read_error; #endif // ROCKSDB_SUPPORT_THREAD_LOCAL } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_shared_state.h000066400000000000000000000341021370372246700231610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors #ifdef GFLAGS #pragma once #include "db_stress_tool/db_stress_stat.h" // SyncPoint is not supported in Released Windows Mode. #if !(defined NDEBUG) || !defined(OS_WIN) #include "test_util/sync_point.h" #endif // !(defined NDEBUG) || !defined(OS_WIN) #include "util/gflags_compat.h" DECLARE_uint64(seed); DECLARE_int64(max_key); DECLARE_uint64(log2_keys_per_lock); DECLARE_int32(threads); DECLARE_int32(column_families); DECLARE_int32(nooverwritepercent); DECLARE_string(expected_values_path); DECLARE_int32(clear_column_family_one_in); DECLARE_bool(test_batches_snapshots); DECLARE_int32(compaction_thread_pool_adjust_interval); DECLARE_int32(continuous_verification_interval); DECLARE_int32(read_fault_one_in); namespace ROCKSDB_NAMESPACE { class StressTest; // State shared by all concurrent executions of the same benchmark. class SharedState { public: // indicates a key may have any value (or not be present) as an operation on // it is incomplete. static const uint32_t UNKNOWN_SENTINEL; // indicates a key should definitely be deleted static const uint32_t DELETION_SENTINEL; // Errors when reading filter blocks are ignored, so we use a thread // local variable updated via sync points to keep track of errors injected // while reading filter blocks in order to ignore the Get/MultiGet result // for those calls #if defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #if defined(OS_SOLARIS) static __thread bool ignore_read_error; #else static thread_local bool ignore_read_error; #endif // OS_SOLARIS #else static bool ignore_read_error; #endif // ROCKSDB_SUPPORT_THREAD_LOCAL SharedState(Env* env, StressTest* stress_test) : cv_(&mu_), seed_(static_cast(FLAGS_seed)), max_key_(FLAGS_max_key), log2_keys_per_lock_(static_cast(FLAGS_log2_keys_per_lock)), num_threads_(FLAGS_threads), num_initialized_(0), num_populated_(0), vote_reopen_(0), num_done_(0), start_(false), start_verify_(false), num_bg_threads_(0), should_stop_bg_thread_(false), bg_thread_finished_(0), stress_test_(stress_test), verification_failure_(false), should_stop_test_(false), no_overwrite_ids_(FLAGS_column_families), values_(nullptr), printing_verification_results_(false) { // Pick random keys in each column family that will not experience // overwrite fprintf(stdout, "Choosing random keys with no overwrite\n"); Random64 rnd(seed_); // Start with the identity permutation. Subsequent iterations of // for loop below will start with perm of previous for loop int64_t* permutation = new int64_t[max_key_]; for (int64_t i = 0; i < max_key_; i++) { permutation[i] = i; } // Now do the Knuth shuffle int64_t num_no_overwrite_keys = (max_key_ * FLAGS_nooverwritepercent) / 100; // Only need to figure out first num_no_overwrite_keys of permutation no_overwrite_ids_.reserve(num_no_overwrite_keys); for (int64_t i = 0; i < num_no_overwrite_keys; i++) { int64_t rand_index = i + rnd.Next() % (max_key_ - i); // Swap i and rand_index; int64_t temp = permutation[i]; permutation[i] = permutation[rand_index]; permutation[rand_index] = temp; // Fill no_overwrite_ids_ with the first num_no_overwrite_keys of // permutation no_overwrite_ids_.insert(permutation[i]); } delete[] permutation; size_t expected_values_size = sizeof(std::atomic) * FLAGS_column_families * max_key_; bool values_init_needed = false; Status status; if (!FLAGS_expected_values_path.empty()) { if (!std::atomic{}.is_lock_free()) { status = Status::InvalidArgument( "Cannot use --expected_values_path on platforms without lock-free " "std::atomic"); } if (status.ok() && FLAGS_clear_column_family_one_in > 0) { status = Status::InvalidArgument( "Cannot use --expected_values_path on when " "--clear_column_family_one_in is greater than zero."); } uint64_t size = 0; if (status.ok()) { status = env->GetFileSize(FLAGS_expected_values_path, &size); } std::unique_ptr wfile; if (status.ok() && size == 0) { const EnvOptions soptions; status = env->NewWritableFile(FLAGS_expected_values_path, &wfile, soptions); } if (status.ok() && size == 0) { std::string buf(expected_values_size, '\0'); status = wfile->Append(buf); values_init_needed = true; } if (status.ok()) { status = env->NewMemoryMappedFileBuffer(FLAGS_expected_values_path, &expected_mmap_buffer_); } if (status.ok()) { assert(expected_mmap_buffer_->GetLen() == expected_values_size); values_ = static_cast*>( expected_mmap_buffer_->GetBase()); assert(values_ != nullptr); } else { fprintf(stderr, "Failed opening shared file '%s' with error: %s\n", FLAGS_expected_values_path.c_str(), status.ToString().c_str()); assert(values_ == nullptr); } } if (values_ == nullptr) { values_allocation_.reset( new std::atomic[FLAGS_column_families * max_key_]); values_ = &values_allocation_[0]; values_init_needed = true; } assert(values_ != nullptr); if (values_init_needed) { for (int i = 0; i < FLAGS_column_families; ++i) { for (int j = 0; j < max_key_; ++j) { Delete(i, j, false /* pending */); } } } if (FLAGS_test_batches_snapshots) { fprintf(stdout, "No lock creation because test_batches_snapshots set\n"); return; } long num_locks = static_cast(max_key_ >> log2_keys_per_lock_); if (max_key_ & ((1 << log2_keys_per_lock_) - 1)) { num_locks++; } fprintf(stdout, "Creating %ld locks\n", num_locks * FLAGS_column_families); key_locks_.resize(FLAGS_column_families); for (int i = 0; i < FLAGS_column_families; ++i) { key_locks_[i].resize(num_locks); for (auto& ptr : key_locks_[i]) { ptr.reset(new port::Mutex); } } if (FLAGS_compaction_thread_pool_adjust_interval > 0) { ++num_bg_threads_; fprintf(stdout, "Starting compaction_thread_pool_adjust_thread\n"); } if (FLAGS_continuous_verification_interval > 0) { ++num_bg_threads_; fprintf(stdout, "Starting continuous_verification_thread\n"); } #ifndef NDEBUG if (FLAGS_read_fault_one_in) { SyncPoint::GetInstance()->SetCallBack("FaultInjectionIgnoreError", IgnoreReadErrorCallback); SyncPoint::GetInstance()->EnableProcessing(); } #endif // NDEBUG } ~SharedState() { #ifndef NDEBUG if (FLAGS_read_fault_one_in) { SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); } #endif } port::Mutex* GetMutex() { return &mu_; } port::CondVar* GetCondVar() { return &cv_; } StressTest* GetStressTest() const { return stress_test_; } int64_t GetMaxKey() const { return max_key_; } uint32_t GetNumThreads() const { return num_threads_; } void IncInitialized() { num_initialized_++; } void IncOperated() { num_populated_++; } void IncDone() { num_done_++; } void IncVotedReopen() { vote_reopen_ = (vote_reopen_ + 1) % num_threads_; } bool AllInitialized() const { return num_initialized_ >= num_threads_; } bool AllOperated() const { return num_populated_ >= num_threads_; } bool AllDone() const { return num_done_ >= num_threads_; } bool AllVotedReopen() { return (vote_reopen_ == 0); } void SetStart() { start_ = true; } void SetStartVerify() { start_verify_ = true; } bool Started() const { return start_; } bool VerifyStarted() const { return start_verify_; } void SetVerificationFailure() { verification_failure_.store(true); } bool HasVerificationFailedYet() const { return verification_failure_.load(); } void SetShouldStopTest() { should_stop_test_.store(true); } bool ShouldStopTest() const { return should_stop_test_.load(); } port::Mutex* GetMutexForKey(int cf, int64_t key) { return key_locks_[cf][key >> log2_keys_per_lock_].get(); } void LockColumnFamily(int cf) { for (auto& mutex : key_locks_[cf]) { mutex->Lock(); } } void UnlockColumnFamily(int cf) { for (auto& mutex : key_locks_[cf]) { mutex->Unlock(); } } std::atomic& Value(int cf, int64_t key) const { return values_[cf * max_key_ + key]; } void ClearColumnFamily(int cf) { std::fill(&Value(cf, 0 /* key */), &Value(cf + 1, 0 /* key */), DELETION_SENTINEL); } // @param pending True if the update may have started but is not yet // guaranteed finished. This is useful for crash-recovery testing when the // process may crash before updating the expected values array. void Put(int cf, int64_t key, uint32_t value_base, bool pending) { if (!pending) { // prevent expected-value update from reordering before Write std::atomic_thread_fence(std::memory_order_release); } Value(cf, key).store(pending ? UNKNOWN_SENTINEL : value_base, std::memory_order_relaxed); if (pending) { // prevent Write from reordering before expected-value update std::atomic_thread_fence(std::memory_order_release); } } uint32_t Get(int cf, int64_t key) const { return Value(cf, key); } // @param pending See comment above Put() // Returns true if the key was not yet deleted. bool Delete(int cf, int64_t key, bool pending) { if (Value(cf, key) == DELETION_SENTINEL) { return false; } Put(cf, key, DELETION_SENTINEL, pending); return true; } // @param pending See comment above Put() // Returns true if the key was not yet deleted. bool SingleDelete(int cf, int64_t key, bool pending) { return Delete(cf, key, pending); } // @param pending See comment above Put() // Returns number of keys deleted by the call. int DeleteRange(int cf, int64_t begin_key, int64_t end_key, bool pending) { int covered = 0; for (int64_t key = begin_key; key < end_key; ++key) { if (Delete(cf, key, pending)) { ++covered; } } return covered; } bool AllowsOverwrite(int64_t key) { return no_overwrite_ids_.find(key) == no_overwrite_ids_.end(); } bool Exists(int cf, int64_t key) { // UNKNOWN_SENTINEL counts as exists. That assures a key for which overwrite // is disallowed can't be accidentally added a second time, in which case // SingleDelete wouldn't be able to properly delete the key. It does allow // the case where a SingleDelete might be added which covers nothing, but // that's not a correctness issue. uint32_t expected_value = Value(cf, key).load(); return expected_value != DELETION_SENTINEL; } uint32_t GetSeed() const { return seed_; } void SetShouldStopBgThread() { should_stop_bg_thread_ = true; } bool ShouldStopBgThread() { return should_stop_bg_thread_; } void IncBgThreadsFinished() { ++bg_thread_finished_; } bool BgThreadsFinished() const { return bg_thread_finished_ == num_bg_threads_; } bool ShouldVerifyAtBeginning() const { return expected_mmap_buffer_.get() != nullptr; } bool PrintingVerificationResults() { bool tmp = false; return !printing_verification_results_.compare_exchange_strong( tmp, true, std::memory_order_relaxed); } void FinishPrintingVerificationResults() { printing_verification_results_.store(false, std::memory_order_relaxed); } private: static void IgnoreReadErrorCallback(void*) { ignore_read_error = true; } port::Mutex mu_; port::CondVar cv_; const uint32_t seed_; const int64_t max_key_; const uint32_t log2_keys_per_lock_; const int num_threads_; long num_initialized_; long num_populated_; long vote_reopen_; long num_done_; bool start_; bool start_verify_; int num_bg_threads_; bool should_stop_bg_thread_; int bg_thread_finished_; StressTest* stress_test_; std::atomic verification_failure_; std::atomic should_stop_test_; // Keys that should not be overwritten std::unordered_set no_overwrite_ids_; std::atomic* values_; std::unique_ptr[]> values_allocation_; // Has to make it owned by a smart ptr as port::Mutex is not copyable // and storing it in the container may require copying depending on the impl. std::vector>> key_locks_; std::unique_ptr expected_mmap_buffer_; std::atomic printing_verification_results_; }; // Per-thread state for concurrent executions of the same benchmark. struct ThreadState { uint32_t tid; // 0..n-1 Random rand; // Has different seeds for different threads SharedState* shared; Stats stats; struct SnapshotState { const Snapshot* snapshot; // The cf from which we did a Get at this snapshot int cf_at; // The name of the cf at the time that we did a read std::string cf_at_name; // The key with which we did a Get at this snapshot std::string key; // The status of the Get Status status; // The value of the Get std::string value; // optional state of all keys in the db std::vector* key_vec; }; std::queue> snapshot_queue; ThreadState(uint32_t index, SharedState* _shared) : tid(index), rand(1000 + index + _shared->GetSeed()), shared(_shared) {} }; } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_stat.h000066400000000000000000000146251370372246700214760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "monitoring/histogram.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/snapshot.h" #include "rocksdb/statistics.h" #include "util/gflags_compat.h" #include "util/random.h" DECLARE_bool(histogram); DECLARE_bool(progress_reports); namespace ROCKSDB_NAMESPACE { // Database statistics static std::shared_ptr dbstats; static std::shared_ptr dbstats_secondaries; class Stats { private: uint64_t start_; uint64_t finish_; double seconds_; long done_; long gets_; long prefixes_; long writes_; long deletes_; size_t single_deletes_; long iterator_size_sums_; long founds_; long iterations_; long range_deletions_; long covered_by_range_deletions_; long errors_; long verified_errors_; long num_compact_files_succeed_; long num_compact_files_failed_; int next_report_; size_t bytes_; uint64_t last_op_finish_; HistogramImpl hist_; public: Stats() {} void Start() { next_report_ = 100; hist_.Clear(); done_ = 0; gets_ = 0; prefixes_ = 0; writes_ = 0; deletes_ = 0; single_deletes_ = 0; iterator_size_sums_ = 0; founds_ = 0; iterations_ = 0; range_deletions_ = 0; covered_by_range_deletions_ = 0; errors_ = 0; verified_errors_ = 0; bytes_ = 0; seconds_ = 0; num_compact_files_succeed_ = 0; num_compact_files_failed_ = 0; start_ = Env::Default()->NowMicros(); last_op_finish_ = start_; finish_ = start_; } void Merge(const Stats& other) { hist_.Merge(other.hist_); done_ += other.done_; gets_ += other.gets_; prefixes_ += other.prefixes_; writes_ += other.writes_; deletes_ += other.deletes_; single_deletes_ += other.single_deletes_; iterator_size_sums_ += other.iterator_size_sums_; founds_ += other.founds_; iterations_ += other.iterations_; range_deletions_ += other.range_deletions_; covered_by_range_deletions_ = other.covered_by_range_deletions_; errors_ += other.errors_; verified_errors_ += other.verified_errors_; bytes_ += other.bytes_; seconds_ += other.seconds_; num_compact_files_succeed_ += other.num_compact_files_succeed_; num_compact_files_failed_ += other.num_compact_files_failed_; if (other.start_ < start_) start_ = other.start_; if (other.finish_ > finish_) finish_ = other.finish_; } void Stop() { finish_ = Env::Default()->NowMicros(); seconds_ = (finish_ - start_) * 1e-6; } void FinishedSingleOp() { if (FLAGS_histogram) { auto now = Env::Default()->NowMicros(); auto micros = now - last_op_finish_; hist_.Add(micros); if (micros > 20000) { fprintf(stdout, "long op: %" PRIu64 " micros%30s\r", micros, ""); } last_op_finish_ = now; } done_++; if (FLAGS_progress_reports) { if (done_ >= next_report_) { if (next_report_ < 1000) next_report_ += 100; else if (next_report_ < 5000) next_report_ += 500; else if (next_report_ < 10000) next_report_ += 1000; else if (next_report_ < 50000) next_report_ += 5000; else if (next_report_ < 100000) next_report_ += 10000; else if (next_report_ < 500000) next_report_ += 50000; else next_report_ += 100000; fprintf(stdout, "... finished %ld ops%30s\r", done_, ""); } } } void AddBytesForWrites(long nwrites, size_t nbytes) { writes_ += nwrites; bytes_ += nbytes; } void AddGets(long ngets, long nfounds) { founds_ += nfounds; gets_ += ngets; } void AddPrefixes(long nprefixes, long count) { prefixes_ += nprefixes; iterator_size_sums_ += count; } void AddIterations(long n) { iterations_ += n; } void AddDeletes(long n) { deletes_ += n; } void AddSingleDeletes(size_t n) { single_deletes_ += n; } void AddRangeDeletions(long n) { range_deletions_ += n; } void AddCoveredByRangeDeletions(long n) { covered_by_range_deletions_ += n; } void AddErrors(long n) { errors_ += n; } void AddVerifiedErrors(long n) { verified_errors_ += n; } void AddNumCompactFilesSucceed(long n) { num_compact_files_succeed_ += n; } void AddNumCompactFilesFailed(long n) { num_compact_files_failed_ += n; } void Report(const char* name) { std::string extra; if (bytes_ < 1 || done_ < 1) { fprintf(stderr, "No writes or ops?\n"); return; } double elapsed = (finish_ - start_) * 1e-6; double bytes_mb = bytes_ / 1048576.0; double rate = bytes_mb / elapsed; double throughput = (double)done_ / elapsed; fprintf(stdout, "%-12s: ", name); fprintf(stdout, "%.3f micros/op %ld ops/sec\n", seconds_ * 1e6 / done_, (long)throughput); fprintf(stdout, "%-12s: Wrote %.2f MB (%.2f MB/sec) (%ld%% of %ld ops)\n", "", bytes_mb, rate, (100 * writes_) / done_, done_); fprintf(stdout, "%-12s: Wrote %ld times\n", "", writes_); fprintf(stdout, "%-12s: Deleted %ld times\n", "", deletes_); fprintf(stdout, "%-12s: Single deleted %" ROCKSDB_PRIszt " times\n", "", single_deletes_); fprintf(stdout, "%-12s: %ld read and %ld found the key\n", "", gets_, founds_); fprintf(stdout, "%-12s: Prefix scanned %ld times\n", "", prefixes_); fprintf(stdout, "%-12s: Iterator size sum is %ld\n", "", iterator_size_sums_); fprintf(stdout, "%-12s: Iterated %ld times\n", "", iterations_); fprintf(stdout, "%-12s: Deleted %ld key-ranges\n", "", range_deletions_); fprintf(stdout, "%-12s: Range deletions covered %ld keys\n", "", covered_by_range_deletions_); fprintf(stdout, "%-12s: Got errors %ld times\n", "", errors_); fprintf(stdout, "%-12s: %ld CompactFiles() succeed\n", "", num_compact_files_succeed_); fprintf(stdout, "%-12s: %ld CompactFiles() did not succeed\n", "", num_compact_files_failed_); if (FLAGS_histogram) { fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str()); } fflush(stdout); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/db_stress_tool/db_stress_test_base.cc000066400000000000000000002360741370372246700226360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" #include "db_stress_tool/db_stress_driver.h" #include "rocksdb/convenience.h" #include "rocksdb/sst_file_manager.h" namespace ROCKSDB_NAMESPACE { StressTest::StressTest() : cache_(NewCache(FLAGS_cache_size)), compressed_cache_(NewLRUCache(FLAGS_compressed_cache_size)), filter_policy_(FLAGS_bloom_bits >= 0 ? FLAGS_use_block_based_filter ? NewBloomFilterPolicy(FLAGS_bloom_bits, true) : NewBloomFilterPolicy(FLAGS_bloom_bits, false) : nullptr), db_(nullptr), #ifndef ROCKSDB_LITE txn_db_(nullptr), #endif new_column_family_name_(1), num_times_reopened_(0), db_preload_finished_(false), cmp_db_(nullptr) { if (FLAGS_destroy_db_initially) { std::vector files; db_stress_env->GetChildren(FLAGS_db, &files); for (unsigned int i = 0; i < files.size(); i++) { if (Slice(files[i]).starts_with("heap-")) { db_stress_env->DeleteFile(FLAGS_db + "/" + files[i]); } } Options options; options.env = db_stress_env; // Remove files without preserving manfiest files #ifndef ROCKSDB_LITE const Status s = !FLAGS_use_blob_db ? DestroyDB(FLAGS_db, options) : blob_db::DestroyBlobDB(FLAGS_db, options, blob_db::BlobDBOptions()); #else const Status s = DestroyDB(FLAGS_db, options); #endif // !ROCKSDB_LITE if (!s.ok()) { fprintf(stderr, "Cannot destroy original db: %s\n", s.ToString().c_str()); exit(1); } } } StressTest::~StressTest() { for (auto cf : column_families_) { delete cf; } column_families_.clear(); delete db_; assert(secondaries_.size() == secondary_cfh_lists_.size()); size_t n = secondaries_.size(); for (size_t i = 0; i != n; ++i) { for (auto* cf : secondary_cfh_lists_[i]) { delete cf; } secondary_cfh_lists_[i].clear(); delete secondaries_[i]; } secondaries_.clear(); for (auto* cf : cmp_cfhs_) { delete cf; } cmp_cfhs_.clear(); delete cmp_db_; } std::shared_ptr StressTest::NewCache(size_t capacity) { if (capacity <= 0) { return nullptr; } if (FLAGS_use_clock_cache) { auto cache = NewClockCache((size_t)capacity); if (!cache) { fprintf(stderr, "Clock cache not supported."); exit(1); } return cache; } else { return NewLRUCache((size_t)capacity); } } bool StressTest::BuildOptionsTable() { if (FLAGS_set_options_one_in <= 0) { return true; } std::unordered_map> options_tbl = { {"write_buffer_size", {ToString(options_.write_buffer_size), ToString(options_.write_buffer_size * 2), ToString(options_.write_buffer_size * 4)}}, {"max_write_buffer_number", {ToString(options_.max_write_buffer_number), ToString(options_.max_write_buffer_number * 2), ToString(options_.max_write_buffer_number * 4)}}, {"arena_block_size", { ToString(options_.arena_block_size), ToString(options_.write_buffer_size / 4), ToString(options_.write_buffer_size / 8), }}, {"memtable_huge_page_size", {"0", ToString(2 * 1024 * 1024)}}, {"max_successive_merges", {"0", "2", "4"}}, {"inplace_update_num_locks", {"100", "200", "300"}}, // TODO(ljin): enable test for this option // {"disable_auto_compactions", {"100", "200", "300"}}, {"soft_rate_limit", {"0", "0.5", "0.9"}}, {"hard_rate_limit", {"0", "1.1", "2.0"}}, {"level0_file_num_compaction_trigger", { ToString(options_.level0_file_num_compaction_trigger), ToString(options_.level0_file_num_compaction_trigger + 2), ToString(options_.level0_file_num_compaction_trigger + 4), }}, {"level0_slowdown_writes_trigger", { ToString(options_.level0_slowdown_writes_trigger), ToString(options_.level0_slowdown_writes_trigger + 2), ToString(options_.level0_slowdown_writes_trigger + 4), }}, {"level0_stop_writes_trigger", { ToString(options_.level0_stop_writes_trigger), ToString(options_.level0_stop_writes_trigger + 2), ToString(options_.level0_stop_writes_trigger + 4), }}, {"max_compaction_bytes", { ToString(options_.target_file_size_base * 5), ToString(options_.target_file_size_base * 15), ToString(options_.target_file_size_base * 100), }}, {"target_file_size_base", { ToString(options_.target_file_size_base), ToString(options_.target_file_size_base * 2), ToString(options_.target_file_size_base * 4), }}, {"target_file_size_multiplier", { ToString(options_.target_file_size_multiplier), "1", "2", }}, {"max_bytes_for_level_base", { ToString(options_.max_bytes_for_level_base / 2), ToString(options_.max_bytes_for_level_base), ToString(options_.max_bytes_for_level_base * 2), }}, {"max_bytes_for_level_multiplier", { ToString(options_.max_bytes_for_level_multiplier), "1", "2", }}, {"max_sequential_skip_in_iterations", {"4", "8", "12"}}, }; options_table_ = std::move(options_tbl); for (const auto& iter : options_table_) { options_index_.push_back(iter.first); } return true; } void StressTest::InitDb() { uint64_t now = db_stress_env->NowMicros(); fprintf(stdout, "%s Initializing db_stress\n", db_stress_env->TimeToString(now / 1000000).c_str()); PrintEnv(); Open(); BuildOptionsTable(); } void StressTest::InitReadonlyDb(SharedState* shared) { uint64_t now = db_stress_env->NowMicros(); fprintf(stdout, "%s Preloading db with %" PRIu64 " KVs\n", db_stress_env->TimeToString(now / 1000000).c_str(), FLAGS_max_key); PreloadDbAndReopenAsReadOnly(FLAGS_max_key, shared); } bool StressTest::VerifySecondaries() { #ifndef ROCKSDB_LITE if (FLAGS_test_secondary) { uint64_t now = db_stress_env->NowMicros(); fprintf( stdout, "%s Start to verify secondaries against primary\n", db_stress_env->TimeToString(static_cast(now) / 1000000).c_str()); } for (size_t k = 0; k != secondaries_.size(); ++k) { Status s = secondaries_[k]->TryCatchUpWithPrimary(); if (!s.ok()) { fprintf(stderr, "Secondary failed to catch up with primary\n"); return false; } ReadOptions ropts; ropts.total_order_seek = true; // Verify only the default column family since the primary may have // dropped other column families after most recent reopen. std::unique_ptr iter1(db_->NewIterator(ropts)); std::unique_ptr iter2(secondaries_[k]->NewIterator(ropts)); for (iter1->SeekToFirst(), iter2->SeekToFirst(); iter1->Valid() && iter2->Valid(); iter1->Next(), iter2->Next()) { if (iter1->key().compare(iter2->key()) != 0 || iter1->value().compare(iter2->value())) { fprintf(stderr, "Secondary %d contains different data from " "primary.\nPrimary: %s : %s\nSecondary: %s : %s\n", static_cast(k), iter1->key().ToString(/*hex=*/true).c_str(), iter1->value().ToString(/*hex=*/true).c_str(), iter2->key().ToString(/*hex=*/true).c_str(), iter2->value().ToString(/*hex=*/true).c_str()); return false; } } if (iter1->Valid() && !iter2->Valid()) { fprintf(stderr, "Secondary %d record count is smaller than that of primary\n", static_cast(k)); return false; } else if (!iter1->Valid() && iter2->Valid()) { fprintf(stderr, "Secondary %d record count is larger than that of primary\n", static_cast(k)); return false; } } if (FLAGS_test_secondary) { uint64_t now = db_stress_env->NowMicros(); fprintf( stdout, "%s Verification of secondaries succeeded\n", db_stress_env->TimeToString(static_cast(now) / 1000000).c_str()); } #endif // ROCKSDB_LITE return true; } Status StressTest::AssertSame(DB* db, ColumnFamilyHandle* cf, ThreadState::SnapshotState& snap_state) { Status s; if (cf->GetName() != snap_state.cf_at_name) { return s; } ReadOptions ropt; ropt.snapshot = snap_state.snapshot; PinnableSlice exp_v(&snap_state.value); exp_v.PinSelf(); PinnableSlice v; s = db->Get(ropt, cf, snap_state.key, &v); if (!s.ok() && !s.IsNotFound()) { return s; } if (snap_state.status != s) { return Status::Corruption( "The snapshot gave inconsistent results for key " + ToString(Hash(snap_state.key.c_str(), snap_state.key.size(), 0)) + " in cf " + cf->GetName() + ": (" + snap_state.status.ToString() + ") vs. (" + s.ToString() + ")"); } if (s.ok()) { if (exp_v != v) { return Status::Corruption("The snapshot gave inconsistent values: (" + exp_v.ToString() + ") vs. (" + v.ToString() + ")"); } } if (snap_state.key_vec != nullptr) { // When `prefix_extractor` is set, seeking to beginning and scanning // across prefixes are only supported with `total_order_seek` set. ropt.total_order_seek = true; std::unique_ptr iterator(db->NewIterator(ropt)); std::unique_ptr> tmp_bitvec( new std::vector(FLAGS_max_key)); for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { uint64_t key_val; if (GetIntVal(iterator->key().ToString(), &key_val)) { (*tmp_bitvec.get())[key_val] = true; } } if (!std::equal(snap_state.key_vec->begin(), snap_state.key_vec->end(), tmp_bitvec.get()->begin())) { return Status::Corruption("Found inconsistent keys at this snapshot"); } } return Status::OK(); } void StressTest::VerificationAbort(SharedState* shared, std::string msg, Status s) const { fprintf(stderr, "Verification failed: %s. Status is %s\n", msg.c_str(), s.ToString().c_str()); shared->SetVerificationFailure(); } void StressTest::VerificationAbort(SharedState* shared, std::string msg, int cf, int64_t key) const { fprintf(stderr, "Verification failed for column family %d key %" PRIi64 ": %s\n", cf, key, msg.c_str()); shared->SetVerificationFailure(); } void StressTest::PrintStatistics() { if (dbstats) { fprintf(stdout, "STATISTICS:\n%s\n", dbstats->ToString().c_str()); } if (dbstats_secondaries) { fprintf(stdout, "Secondary instances STATISTICS:\n%s\n", dbstats_secondaries->ToString().c_str()); } } // Currently PreloadDb has to be single-threaded. void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, SharedState* shared) { WriteOptions write_opts; write_opts.disableWAL = FLAGS_disable_wal; if (FLAGS_sync) { write_opts.sync = true; } char value[100]; int cf_idx = 0; Status s; for (auto cfh : column_families_) { for (int64_t k = 0; k != number_of_keys; ++k) { std::string key_str = Key(k); Slice key = key_str; size_t sz = GenerateValue(0 /*value_base*/, value, sizeof(value)); Slice v(value, sz); shared->Put(cf_idx, k, 0, true /* pending */); if (FLAGS_use_merge) { if (!FLAGS_use_txn) { s = db_->Merge(write_opts, cfh, key, v); } else { #ifndef ROCKSDB_LITE Transaction* txn; s = NewTxn(write_opts, &txn); if (s.ok()) { s = txn->Merge(cfh, key, v); if (s.ok()) { s = CommitTxn(txn); } } #endif } } else { if (!FLAGS_use_txn) { s = db_->Put(write_opts, cfh, key, v); } else { #ifndef ROCKSDB_LITE Transaction* txn; s = NewTxn(write_opts, &txn); if (s.ok()) { s = txn->Put(cfh, key, v); if (s.ok()) { s = CommitTxn(txn); } } #endif } } shared->Put(cf_idx, k, 0, false /* pending */); if (!s.ok()) { break; } } if (!s.ok()) { break; } ++cf_idx; } if (s.ok()) { s = db_->Flush(FlushOptions(), column_families_); } if (s.ok()) { for (auto cf : column_families_) { delete cf; } column_families_.clear(); delete db_; db_ = nullptr; #ifndef ROCKSDB_LITE txn_db_ = nullptr; #endif db_preload_finished_.store(true); auto now = db_stress_env->NowMicros(); fprintf(stdout, "%s Reopening database in read-only\n", db_stress_env->TimeToString(now / 1000000).c_str()); // Reopen as read-only, can ignore all options related to updates Open(); } else { fprintf(stderr, "Failed to preload db"); exit(1); } } Status StressTest::SetOptions(ThreadState* thread) { assert(FLAGS_set_options_one_in > 0); std::unordered_map opts; std::string name = options_index_[thread->rand.Next() % options_index_.size()]; int value_idx = thread->rand.Next() % options_table_[name].size(); if (name == "soft_rate_limit" || name == "hard_rate_limit") { opts["soft_rate_limit"] = options_table_["soft_rate_limit"][value_idx]; opts["hard_rate_limit"] = options_table_["hard_rate_limit"][value_idx]; } else if (name == "level0_file_num_compaction_trigger" || name == "level0_slowdown_writes_trigger" || name == "level0_stop_writes_trigger") { opts["level0_file_num_compaction_trigger"] = options_table_["level0_file_num_compaction_trigger"][value_idx]; opts["level0_slowdown_writes_trigger"] = options_table_["level0_slowdown_writes_trigger"][value_idx]; opts["level0_stop_writes_trigger"] = options_table_["level0_stop_writes_trigger"][value_idx]; } else { opts[name] = options_table_[name][value_idx]; } int rand_cf_idx = thread->rand.Next() % FLAGS_column_families; auto cfh = column_families_[rand_cf_idx]; return db_->SetOptions(cfh, opts); } #ifndef ROCKSDB_LITE Status StressTest::NewTxn(WriteOptions& write_opts, Transaction** txn) { if (!FLAGS_use_txn) { return Status::InvalidArgument("NewTxn when FLAGS_use_txn is not set"); } static std::atomic txn_id = {0}; TransactionOptions txn_options; *txn = txn_db_->BeginTransaction(write_opts, txn_options); auto istr = std::to_string(txn_id.fetch_add(1)); Status s = (*txn)->SetName("xid" + istr); return s; } Status StressTest::CommitTxn(Transaction* txn) { if (!FLAGS_use_txn) { return Status::InvalidArgument("CommitTxn when FLAGS_use_txn is not set"); } Status s = txn->Prepare(); if (s.ok()) { s = txn->Commit(); } delete txn; return s; } Status StressTest::RollbackTxn(Transaction* txn) { if (!FLAGS_use_txn) { return Status::InvalidArgument( "RollbackTxn when FLAGS_use_txn is not" " set"); } Status s = txn->Rollback(); delete txn; return s; } #endif void StressTest::OperateDb(ThreadState* thread) { ReadOptions read_opts(FLAGS_verify_checksum, true); WriteOptions write_opts; auto shared = thread->shared; char value[100]; std::string from_db; if (FLAGS_sync) { write_opts.sync = true; } write_opts.disableWAL = FLAGS_disable_wal; const int prefixBound = static_cast(FLAGS_readpercent) + static_cast(FLAGS_prefixpercent); const int writeBound = prefixBound + static_cast(FLAGS_writepercent); const int delBound = writeBound + static_cast(FLAGS_delpercent); const int delRangeBound = delBound + static_cast(FLAGS_delrangepercent); const uint64_t ops_per_open = FLAGS_ops_per_thread / (FLAGS_reopen + 1); #ifndef NDEBUG if (FLAGS_read_fault_one_in) { fault_fs_guard->SetThreadLocalReadErrorContext(thread->shared->GetSeed(), FLAGS_read_fault_one_in); } #endif // NDEBUG thread->stats.Start(); for (int open_cnt = 0; open_cnt <= FLAGS_reopen; ++open_cnt) { if (thread->shared->HasVerificationFailedYet() || thread->shared->ShouldStopTest()) { break; } if (open_cnt != 0) { thread->stats.FinishedSingleOp(); MutexLock l(thread->shared->GetMutex()); while (!thread->snapshot_queue.empty()) { db_->ReleaseSnapshot(thread->snapshot_queue.front().second.snapshot); delete thread->snapshot_queue.front().second.key_vec; thread->snapshot_queue.pop(); } thread->shared->IncVotedReopen(); if (thread->shared->AllVotedReopen()) { thread->shared->GetStressTest()->Reopen(thread); thread->shared->GetCondVar()->SignalAll(); } else { thread->shared->GetCondVar()->Wait(); } // Commenting this out as we don't want to reset stats on each open. // thread->stats.Start(); } for (uint64_t i = 0; i < ops_per_open; i++) { if (thread->shared->HasVerificationFailedYet()) { break; } // Change Options if (thread->rand.OneInOpt(FLAGS_set_options_one_in)) { SetOptions(thread); } if (thread->rand.OneInOpt(FLAGS_set_in_place_one_in)) { options_.inplace_update_support ^= options_.inplace_update_support; } if (thread->tid == 0 && FLAGS_verify_db_one_in > 0 && thread->rand.OneIn(FLAGS_verify_db_one_in)) { ContinuouslyVerifyDb(thread); if (thread->shared->ShouldStopTest()) { break; } } MaybeClearOneColumnFamily(thread); if (thread->rand.OneInOpt(FLAGS_sync_wal_one_in)) { Status s = db_->SyncWAL(); if (!s.ok() && !s.IsNotSupported()) { fprintf(stderr, "SyncWAL() failed: %s\n", s.ToString().c_str()); } } int rand_column_family = thread->rand.Next() % FLAGS_column_families; ColumnFamilyHandle* column_family = column_families_[rand_column_family]; if (thread->rand.OneInOpt(FLAGS_compact_files_one_in)) { TestCompactFiles(thread, column_family); } int64_t rand_key = GenerateOneKey(thread, i); std::string keystr = Key(rand_key); Slice key = keystr; std::unique_ptr lock; if (ShouldAcquireMutexOnKey()) { lock.reset(new MutexLock( shared->GetMutexForKey(rand_column_family, rand_key))); } if (thread->rand.OneInOpt(FLAGS_compact_range_one_in)) { TestCompactRange(thread, rand_key, key, column_family); if (thread->shared->HasVerificationFailedYet()) { break; } } std::vector rand_column_families = GenerateColumnFamilies(FLAGS_column_families, rand_column_family); if (thread->rand.OneInOpt(FLAGS_flush_one_in)) { Status status = TestFlush(rand_column_families); if (!status.ok()) { fprintf(stdout, "Unable to perform Flush(): %s\n", status.ToString().c_str()); } } #ifndef ROCKSDB_LITE // Verify GetLiveFiles with a 1 in N chance. if (thread->rand.OneInOpt(FLAGS_get_live_files_one_in)) { Status status = VerifyGetLiveFiles(); if (!status.ok()) { VerificationAbort(shared, "VerifyGetLiveFiles status not OK", status); } } // Verify GetSortedWalFiles with a 1 in N chance. if (thread->rand.OneInOpt(FLAGS_get_sorted_wal_files_one_in)) { Status status = VerifyGetSortedWalFiles(); if (!status.ok()) { VerificationAbort(shared, "VerifyGetSortedWalFiles status not OK", status); } } // Verify GetCurrentWalFile with a 1 in N chance. if (thread->rand.OneInOpt(FLAGS_get_current_wal_file_one_in)) { Status status = VerifyGetCurrentWalFile(); if (!status.ok()) { VerificationAbort(shared, "VerifyGetCurrentWalFile status not OK", status); } } #endif // !ROCKSDB_LITE if (thread->rand.OneInOpt(FLAGS_pause_background_one_in)) { Status status = TestPauseBackground(thread); if (!status.ok()) { VerificationAbort( shared, "Pause/ContinueBackgroundWork status not OK", status); } } #ifndef ROCKSDB_LITE if (thread->rand.OneInOpt(FLAGS_verify_checksum_one_in)) { Status status = db_->VerifyChecksum(); if (!status.ok()) { VerificationAbort(shared, "VerifyChecksum status not OK", status); } } #endif std::vector rand_keys = GenerateKeys(rand_key); if (thread->rand.OneInOpt(FLAGS_ingest_external_file_one_in)) { TestIngestExternalFile(thread, rand_column_families, rand_keys, lock); } if (thread->rand.OneInOpt(FLAGS_backup_one_in)) { Status s = TestBackupRestore(thread, rand_column_families, rand_keys); if (!s.ok()) { VerificationAbort(shared, "Backup/restore gave inconsistent state", s); } } if (thread->rand.OneInOpt(FLAGS_checkpoint_one_in)) { Status s = TestCheckpoint(thread, rand_column_families, rand_keys); if (!s.ok()) { VerificationAbort(shared, "Checkpoint gave inconsistent state", s); } } #ifndef ROCKSDB_LITE if (thread->rand.OneInOpt(FLAGS_approximate_size_one_in)) { Status s = TestApproximateSize(thread, i, rand_column_families, rand_keys); if (!s.ok()) { VerificationAbort(shared, "ApproximateSize Failed", s); } } #endif // !ROCKSDB_LITE if (thread->rand.OneInOpt(FLAGS_acquire_snapshot_one_in)) { TestAcquireSnapshot(thread, rand_column_family, keystr, i); } /*always*/ { Status s = MaybeReleaseSnapshots(thread, i); if (!s.ok()) { VerificationAbort(shared, "Snapshot gave inconsistent state", s); } } int prob_op = thread->rand.Uniform(100); // Reset this in case we pick something other than a read op. We don't // want to use a stale value when deciding at the beginning of the loop // whether to vote to reopen if (prob_op >= 0 && prob_op < static_cast(FLAGS_readpercent)) { assert(0 <= prob_op); // OPERATION read if (FLAGS_use_multiget) { // Leave room for one more iteration of the loop with a single key // batch. This is to ensure that each thread does exactly the same // number of ops int multiget_batch_size = static_cast( std::min(static_cast(thread->rand.Uniform(64)), FLAGS_ops_per_thread - i - 1)); // If its the last iteration, ensure that multiget_batch_size is 1 multiget_batch_size = std::max(multiget_batch_size, 1); rand_keys = GenerateNKeys(thread, multiget_batch_size, i); TestMultiGet(thread, read_opts, rand_column_families, rand_keys); i += multiget_batch_size - 1; } else { TestGet(thread, read_opts, rand_column_families, rand_keys); } } else if (prob_op < prefixBound) { assert(static_cast(FLAGS_readpercent) <= prob_op); // OPERATION prefix scan // keys are 8 bytes long, prefix size is FLAGS_prefix_size. There are // (8 - FLAGS_prefix_size) bytes besides the prefix. So there will // be 2 ^ ((8 - FLAGS_prefix_size) * 8) possible keys with the same // prefix TestPrefixScan(thread, read_opts, rand_column_families, rand_keys); } else if (prob_op < writeBound) { assert(prefixBound <= prob_op); // OPERATION write TestPut(thread, write_opts, read_opts, rand_column_families, rand_keys, value, lock); } else if (prob_op < delBound) { assert(writeBound <= prob_op); // OPERATION delete TestDelete(thread, write_opts, rand_column_families, rand_keys, lock); } else if (prob_op < delRangeBound) { assert(delBound <= prob_op); // OPERATION delete range TestDeleteRange(thread, write_opts, rand_column_families, rand_keys, lock); } else { assert(delRangeBound <= prob_op); // OPERATION iterate int num_seeks = static_cast( std::min(static_cast(thread->rand.Uniform(4)), FLAGS_ops_per_thread - i - 1)); rand_keys = GenerateNKeys(thread, num_seeks, i); i += num_seeks - 1; TestIterate(thread, read_opts, rand_column_families, rand_keys); } thread->stats.FinishedSingleOp(); #ifndef ROCKSDB_LITE uint32_t tid = thread->tid; assert(secondaries_.empty() || static_cast(tid) < secondaries_.size()); if (thread->rand.OneInOpt(FLAGS_secondary_catch_up_one_in)) { Status s = secondaries_[tid]->TryCatchUpWithPrimary(); if (!s.ok()) { VerificationAbort(shared, "Secondary instance failed to catch up", s); break; } } #endif } } while (!thread->snapshot_queue.empty()) { db_->ReleaseSnapshot(thread->snapshot_queue.front().second.snapshot); delete thread->snapshot_queue.front().second.key_vec; thread->snapshot_queue.pop(); } thread->stats.Stop(); } #ifndef ROCKSDB_LITE // Generated a list of keys that close to boundaries of SST keys. // If there isn't any SST file in the DB, return empty list. std::vector StressTest::GetWhiteBoxKeys(ThreadState* thread, DB* db, ColumnFamilyHandle* cfh, size_t num_keys) { ColumnFamilyMetaData cfmd; db->GetColumnFamilyMetaData(cfh, &cfmd); std::vector boundaries; for (const LevelMetaData& lmd : cfmd.levels) { for (const SstFileMetaData& sfmd : lmd.files) { boundaries.push_back(sfmd.smallestkey); boundaries.push_back(sfmd.largestkey); } } if (boundaries.empty()) { return {}; } std::vector ret; for (size_t j = 0; j < num_keys; j++) { std::string k = boundaries[thread->rand.Uniform(static_cast(boundaries.size()))]; if (thread->rand.OneIn(3)) { // Reduce one byte from the string for (int i = static_cast(k.length()) - 1; i >= 0; i--) { uint8_t cur = k[i]; if (cur > 0) { k[i] = static_cast(cur - 1); break; } else if (i > 0) { k[i] = 0xFFu; } } } else if (thread->rand.OneIn(2)) { // Add one byte to the string for (int i = static_cast(k.length()) - 1; i >= 0; i--) { uint8_t cur = k[i]; if (cur < 255) { k[i] = static_cast(cur + 1); break; } else if (i > 0) { k[i] = 0x00; } } } ret.push_back(k); } return ret; } #endif // !ROCKSDB_LITE // Given a key K, this creates an iterator which scans to K and then // does a random sequence of Next/Prev operations. Status StressTest::TestIterate(ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) { Status s; const Snapshot* snapshot = db_->GetSnapshot(); ReadOptions readoptionscopy = read_opts; readoptionscopy.snapshot = snapshot; bool expect_total_order = false; if (thread->rand.OneIn(16)) { // When prefix extractor is used, it's useful to cover total order seek. readoptionscopy.total_order_seek = true; expect_total_order = true; } else if (thread->rand.OneIn(4)) { readoptionscopy.total_order_seek = false; readoptionscopy.auto_prefix_mode = true; expect_total_order = true; } else if (options_.prefix_extractor.get() == nullptr) { expect_total_order = true; } std::string upper_bound_str; Slice upper_bound; if (thread->rand.OneIn(16)) { // in 1/16 chance, set a iterator upper bound int64_t rand_upper_key = GenerateOneKey(thread, FLAGS_ops_per_thread); upper_bound_str = Key(rand_upper_key); upper_bound = Slice(upper_bound_str); // uppder_bound can be smaller than seek key, but the query itself // should not crash either. readoptionscopy.iterate_upper_bound = &upper_bound; } std::string lower_bound_str; Slice lower_bound; if (thread->rand.OneIn(16)) { // in 1/16 chance, enable iterator lower bound int64_t rand_lower_key = GenerateOneKey(thread, FLAGS_ops_per_thread); lower_bound_str = Key(rand_lower_key); lower_bound = Slice(lower_bound_str); // uppder_bound can be smaller than seek key, but the query itself // should not crash either. readoptionscopy.iterate_lower_bound = &lower_bound; } auto cfh = column_families_[rand_column_families[0]]; std::unique_ptr iter(db_->NewIterator(readoptionscopy, cfh)); std::vector key_str; if (thread->rand.OneIn(16)) { // Generate keys close to lower or upper bound of SST files. key_str = GetWhiteBoxKeys(thread, db_, cfh, rand_keys.size()); } if (key_str.empty()) { // If key string is not geneerated using white block keys, // Use randomized key passe in. for (int64_t rkey : rand_keys) { key_str.push_back(Key(rkey)); } } std::string op_logs; const size_t kOpLogsLimit = 10000; for (const std::string& skey : key_str) { if (op_logs.size() > kOpLogsLimit) { // Shouldn't take too much memory for the history log. Clear it. op_logs = "(cleared...)\n"; } Slice key = skey; if (readoptionscopy.iterate_upper_bound != nullptr && thread->rand.OneIn(2)) { // 1/2 chance, change the upper bound. // It is possible that it is changed without first use, but there is no // problem with that. int64_t rand_upper_key = GenerateOneKey(thread, FLAGS_ops_per_thread); upper_bound_str = Key(rand_upper_key); upper_bound = Slice(upper_bound_str); } else if (readoptionscopy.iterate_lower_bound != nullptr && thread->rand.OneIn(4)) { // 1/4 chance, change the lower bound. // It is possible that it is changed without first use, but there is no // problem with that. int64_t rand_lower_key = GenerateOneKey(thread, FLAGS_ops_per_thread); lower_bound_str = Key(rand_lower_key); lower_bound = Slice(lower_bound_str); } // Record some options to op_logs; op_logs += "total_order_seek: "; op_logs += (readoptionscopy.total_order_seek ? "1 " : "0 "); op_logs += "auto_prefix_mode: "; op_logs += (readoptionscopy.auto_prefix_mode ? "1 " : "0 "); if (readoptionscopy.iterate_upper_bound != nullptr) { op_logs += "ub: " + upper_bound.ToString(true) + " "; } if (readoptionscopy.iterate_lower_bound != nullptr) { op_logs += "lb: " + lower_bound.ToString(true) + " "; } // Set up an iterator and does the same without bounds and with total // order seek and compare the results. This is to identify bugs related // to bounds, prefix extractor or reseeking. Sometimes we are comparing // iterators with the same set-up, and it doesn't hurt to check them // to be equal. ReadOptions cmp_ro; cmp_ro.snapshot = snapshot; cmp_ro.total_order_seek = true; ColumnFamilyHandle* cmp_cfh = GetControlCfh(thread, rand_column_families[0]); std::unique_ptr cmp_iter(db_->NewIterator(cmp_ro, cmp_cfh)); bool diverged = false; bool support_seek_first_or_last = expect_total_order; LastIterateOp last_op; if (support_seek_first_or_last && thread->rand.OneIn(100)) { iter->SeekToFirst(); cmp_iter->SeekToFirst(); last_op = kLastOpSeekToFirst; op_logs += "STF "; } else if (support_seek_first_or_last && thread->rand.OneIn(100)) { iter->SeekToLast(); cmp_iter->SeekToLast(); last_op = kLastOpSeekToLast; op_logs += "STL "; } else if (thread->rand.OneIn(8)) { iter->SeekForPrev(key); cmp_iter->SeekForPrev(key); last_op = kLastOpSeekForPrev; op_logs += "SFP " + key.ToString(true) + " "; } else { iter->Seek(key); cmp_iter->Seek(key); last_op = kLastOpSeek; op_logs += "S " + key.ToString(true) + " "; } VerifyIterator(thread, cmp_cfh, readoptionscopy, iter.get(), cmp_iter.get(), last_op, key, op_logs, &diverged); bool no_reverse = (FLAGS_memtablerep == "prefix_hash" && !expect_total_order); for (uint64_t i = 0; i < FLAGS_num_iterations && iter->Valid(); i++) { if (no_reverse || thread->rand.OneIn(2)) { iter->Next(); if (!diverged) { assert(cmp_iter->Valid()); cmp_iter->Next(); } op_logs += "N"; } else { iter->Prev(); if (!diverged) { assert(cmp_iter->Valid()); cmp_iter->Prev(); } op_logs += "P"; } last_op = kLastOpNextOrPrev; VerifyIterator(thread, cmp_cfh, readoptionscopy, iter.get(), cmp_iter.get(), last_op, key, op_logs, &diverged); } if (s.ok()) { thread->stats.AddIterations(1); } else { fprintf(stderr, "TestIterate error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); break; } op_logs += "; "; } db_->ReleaseSnapshot(snapshot); return s; } #ifndef ROCKSDB_LITE // Test the return status of GetLiveFiles. Status StressTest::VerifyGetLiveFiles() const { std::vector live_file; uint64_t manifest_size = 0; return db_->GetLiveFiles(live_file, &manifest_size); } // Test the return status of GetSortedWalFiles. Status StressTest::VerifyGetSortedWalFiles() const { VectorLogPtr log_ptr; return db_->GetSortedWalFiles(log_ptr); } // Test the return status of GetCurrentWalFile. Status StressTest::VerifyGetCurrentWalFile() const { std::unique_ptr cur_wal_file; return db_->GetCurrentWalFile(&cur_wal_file); } #endif // !ROCKSDB_LITE // Compare the two iterator, iter and cmp_iter are in the same position, // unless iter might be made invalidate or undefined because of // upper or lower bounds, or prefix extractor. // Will flag failure if the verification fails. // diverged = true if the two iterator is already diverged. // True if verification passed, false if not. void StressTest::VerifyIterator(ThreadState* thread, ColumnFamilyHandle* cmp_cfh, const ReadOptions& ro, Iterator* iter, Iterator* cmp_iter, LastIterateOp op, const Slice& seek_key, const std::string& op_logs, bool* diverged) { if (*diverged) { return; } if (op == kLastOpSeekToFirst && ro.iterate_lower_bound != nullptr) { // SeekToFirst() with lower bound is not well defined. *diverged = true; return; } else if (op == kLastOpSeekToLast && ro.iterate_upper_bound != nullptr) { // SeekToLast() with higher bound is not well defined. *diverged = true; return; } else if (op == kLastOpSeek && ro.iterate_lower_bound != nullptr && (options_.comparator->Compare(*ro.iterate_lower_bound, seek_key) >= 0 || (ro.iterate_upper_bound != nullptr && options_.comparator->Compare(*ro.iterate_lower_bound, *ro.iterate_upper_bound) >= 0))) { // Lower bound behavior is not well defined if it is larger than // seek key or upper bound. Disable the check for now. *diverged = true; return; } else if (op == kLastOpSeekForPrev && ro.iterate_upper_bound != nullptr && (options_.comparator->Compare(*ro.iterate_upper_bound, seek_key) <= 0 || (ro.iterate_lower_bound != nullptr && options_.comparator->Compare(*ro.iterate_lower_bound, *ro.iterate_upper_bound) >= 0))) { // Uppder bound behavior is not well defined if it is smaller than // seek key or lower bound. Disable the check for now. *diverged = true; return; } const SliceTransform* pe = (ro.total_order_seek || ro.auto_prefix_mode) ? nullptr : options_.prefix_extractor.get(); const Comparator* cmp = options_.comparator; if (iter->Valid() && !cmp_iter->Valid()) { if (pe != nullptr) { if (!pe->InDomain(seek_key)) { // Prefix seek a non-in-domain key is undefined. Skip checking for // this scenario. *diverged = true; return; } else if (!pe->InDomain(iter->key())) { // out of range is iterator key is not in domain anymore. *diverged = true; return; } else if (pe->Transform(iter->key()) != pe->Transform(seek_key)) { *diverged = true; return; } } fprintf(stderr, "Control interator is invalid but iterator has key %s " "%s\n", iter->key().ToString(true).c_str(), op_logs.c_str()); *diverged = true; } else if (cmp_iter->Valid()) { // Iterator is not valid. It can be legimate if it has already been // out of upper or lower bound, or filtered out by prefix iterator. const Slice& total_order_key = cmp_iter->key(); if (pe != nullptr) { if (!pe->InDomain(seek_key)) { // Prefix seek a non-in-domain key is undefined. Skip checking for // this scenario. *diverged = true; return; } if (!pe->InDomain(total_order_key) || pe->Transform(total_order_key) != pe->Transform(seek_key)) { // If the prefix is exhausted, the only thing needs to check // is the iterator isn't return a position in prefix. // Either way, checking can stop from here. *diverged = true; if (!iter->Valid() || !pe->InDomain(iter->key()) || pe->Transform(iter->key()) != pe->Transform(seek_key)) { return; } fprintf(stderr, "Iterator stays in prefix but contol doesn't" " iterator key %s control iterator key %s %s\n", iter->key().ToString(true).c_str(), cmp_iter->key().ToString(true).c_str(), op_logs.c_str()); } } // Check upper or lower bounds. if (!*diverged) { if ((iter->Valid() && iter->key() != cmp_iter->key()) || (!iter->Valid() && (ro.iterate_upper_bound == nullptr || cmp->Compare(total_order_key, *ro.iterate_upper_bound) < 0) && (ro.iterate_lower_bound == nullptr || cmp->Compare(total_order_key, *ro.iterate_lower_bound) > 0))) { fprintf(stderr, "Iterator diverged from control iterator which" " has value %s %s\n", total_order_key.ToString(true).c_str(), op_logs.c_str()); if (iter->Valid()) { fprintf(stderr, "iterator has value %s\n", iter->key().ToString(true).c_str()); } else { fprintf(stderr, "iterator is not valid\n"); } *diverged = true; } } } if (*diverged) { fprintf(stderr, "Control CF %s\n", cmp_cfh->GetName().c_str()); thread->stats.AddErrors(1); // Fail fast to preserve the DB state. thread->shared->SetVerificationFailure(); } } #ifdef ROCKSDB_LITE Status StressTest::TestBackupRestore( ThreadState* /* thread */, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */) { assert(false); fprintf(stderr, "RocksDB lite does not support " "TestBackupRestore\n"); std::terminate(); } Status StressTest::TestCheckpoint( ThreadState* /* thread */, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */) { assert(false); fprintf(stderr, "RocksDB lite does not support " "TestCheckpoint\n"); std::terminate(); } void StressTest::TestCompactFiles(ThreadState* /* thread */, ColumnFamilyHandle* /* column_family */) { assert(false); fprintf(stderr, "RocksDB lite does not support " "CompactFiles\n"); std::terminate(); } #else // ROCKSDB_LITE Status StressTest::TestBackupRestore( ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys) { // Note the column families chosen by `rand_column_families` cannot be // dropped while the locks for `rand_keys` are held. So we should not have // to worry about accessing those column families throughout this function. assert(rand_column_families.size() == rand_keys.size()); std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid); std::string restore_dir = FLAGS_db + "/.restore" + ToString(thread->tid); BackupableDBOptions backup_opts(backup_dir); BackupEngine* backup_engine = nullptr; Status s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); if (s.ok()) { s = backup_engine->CreateNewBackup(db_); } if (s.ok()) { delete backup_engine; backup_engine = nullptr; s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine); } if (s.ok()) { s = backup_engine->RestoreDBFromLatestBackup(restore_dir /* db_dir */, restore_dir /* wal_dir */); } if (s.ok()) { s = backup_engine->PurgeOldBackups(0 /* num_backups_to_keep */); } DB* restored_db = nullptr; std::vector restored_cf_handles; if (s.ok()) { Options restore_options(options_); restore_options.listeners.clear(); std::vector cf_descriptors; // TODO(ajkr): `column_family_names_` is not safe to access here when // `clear_column_family_one_in != 0`. But we can't easily switch to // `ListColumnFamilies` to get names because it won't necessarily give // the same order as `column_family_names_`. assert(FLAGS_clear_column_family_one_in == 0); for (auto name : column_family_names_) { cf_descriptors.emplace_back(name, ColumnFamilyOptions(restore_options)); } s = DB::Open(DBOptions(restore_options), restore_dir, cf_descriptors, &restored_cf_handles, &restored_db); } // for simplicity, currently only verifies existence/non-existence of a few // keys for (size_t i = 0; s.ok() && i < rand_column_families.size(); ++i) { std::string key_str = Key(rand_keys[i]); Slice key = key_str; std::string restored_value; Status get_status = restored_db->Get( ReadOptions(), restored_cf_handles[rand_column_families[i]], key, &restored_value); bool exists = thread->shared->Exists(rand_column_families[i], rand_keys[i]); if (get_status.ok()) { if (!exists) { s = Status::Corruption("key exists in restore but not in original db"); } } else if (get_status.IsNotFound()) { if (exists) { s = Status::Corruption("key exists in original db but not in restore"); } } else { s = get_status; } } if (backup_engine != nullptr) { delete backup_engine; backup_engine = nullptr; } if (restored_db != nullptr) { for (auto* cf_handle : restored_cf_handles) { restored_db->DestroyColumnFamilyHandle(cf_handle); } delete restored_db; restored_db = nullptr; } if (!s.ok()) { fprintf(stderr, "A backup/restore operation failed with: %s\n", s.ToString().c_str()); } return s; } #ifndef ROCKSDB_LITE Status StressTest::TestApproximateSize( ThreadState* thread, uint64_t iteration, const std::vector& rand_column_families, const std::vector& rand_keys) { // rand_keys likely only has one key. Just use the first one. assert(!rand_keys.empty()); assert(!rand_column_families.empty()); int64_t key1 = rand_keys[0]; int64_t key2; if (thread->rand.OneIn(2)) { // Two totally random keys. This tends to cover large ranges. key2 = GenerateOneKey(thread, iteration); if (key2 < key1) { std::swap(key1, key2); } } else { // Unless users pass a very large FLAGS_max_key, it we should not worry // about overflow. It is for testing, so we skip the overflow checking // for simplicity. key2 = key1 + static_cast(thread->rand.Uniform(1000)); } std::string key1_str = Key(key1); std::string key2_str = Key(key2); Range range{Slice(key1_str), Slice(key2_str)}; SizeApproximationOptions sao; sao.include_memtabtles = thread->rand.OneIn(2); if (sao.include_memtabtles) { sao.include_files = thread->rand.OneIn(2); } if (thread->rand.OneIn(2)) { if (thread->rand.OneIn(2)) { sao.files_size_error_margin = 0.0; } else { sao.files_size_error_margin = static_cast(thread->rand.Uniform(3)); } } uint64_t result; return db_->GetApproximateSizes( sao, column_families_[rand_column_families[0]], &range, 1, &result); } #endif // ROCKSDB_LITE Status StressTest::TestCheckpoint(ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys) { // Note the column families chosen by `rand_column_families` cannot be // dropped while the locks for `rand_keys` are held. So we should not have // to worry about accessing those column families throughout this function. assert(rand_column_families.size() == rand_keys.size()); std::string checkpoint_dir = FLAGS_db + "/.checkpoint" + ToString(thread->tid); Options tmp_opts(options_); tmp_opts.listeners.clear(); tmp_opts.env = db_stress_env->target(); DestroyDB(checkpoint_dir, tmp_opts); Checkpoint* checkpoint = nullptr; Status s = Checkpoint::Create(db_, &checkpoint); if (s.ok()) { s = checkpoint->CreateCheckpoint(checkpoint_dir); } std::vector cf_handles; DB* checkpoint_db = nullptr; if (s.ok()) { delete checkpoint; checkpoint = nullptr; Options options(options_); options.listeners.clear(); std::vector cf_descs; // TODO(ajkr): `column_family_names_` is not safe to access here when // `clear_column_family_one_in != 0`. But we can't easily switch to // `ListColumnFamilies` to get names because it won't necessarily give // the same order as `column_family_names_`. if (FLAGS_clear_column_family_one_in == 0) { for (const auto& name : column_family_names_) { cf_descs.emplace_back(name, ColumnFamilyOptions(options)); } s = DB::OpenForReadOnly(DBOptions(options), checkpoint_dir, cf_descs, &cf_handles, &checkpoint_db); } } if (checkpoint_db != nullptr) { for (size_t i = 0; s.ok() && i < rand_column_families.size(); ++i) { std::string key_str = Key(rand_keys[i]); Slice key = key_str; std::string value; Status get_status = checkpoint_db->Get( ReadOptions(), cf_handles[rand_column_families[i]], key, &value); bool exists = thread->shared->Exists(rand_column_families[i], rand_keys[i]); if (get_status.ok()) { if (!exists) { s = Status::Corruption( "key exists in checkpoint but not in original db"); } } else if (get_status.IsNotFound()) { if (exists) { s = Status::Corruption( "key exists in original db but not in checkpoint"); } } else { s = get_status; } } for (auto cfh : cf_handles) { delete cfh; } cf_handles.clear(); delete checkpoint_db; checkpoint_db = nullptr; } DestroyDB(checkpoint_dir, tmp_opts); if (!s.ok()) { fprintf(stderr, "A checkpoint operation failed with: %s\n", s.ToString().c_str()); } return s; } void StressTest::TestCompactFiles(ThreadState* thread, ColumnFamilyHandle* column_family) { ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_meta_data; db_->GetColumnFamilyMetaData(column_family, &cf_meta_data); // Randomly compact up to three consecutive files from a level const int kMaxRetry = 3; for (int attempt = 0; attempt < kMaxRetry; ++attempt) { size_t random_level = thread->rand.Uniform(static_cast(cf_meta_data.levels.size())); const auto& files = cf_meta_data.levels[random_level].files; if (files.size() > 0) { size_t random_file_index = thread->rand.Uniform(static_cast(files.size())); if (files[random_file_index].being_compacted) { // Retry as the selected file is currently being compacted continue; } std::vector input_files; input_files.push_back(files[random_file_index].name); if (random_file_index > 0 && !files[random_file_index - 1].being_compacted) { input_files.push_back(files[random_file_index - 1].name); } if (random_file_index + 1 < files.size() && !files[random_file_index + 1].being_compacted) { input_files.push_back(files[random_file_index + 1].name); } size_t output_level = std::min(random_level + 1, cf_meta_data.levels.size() - 1); auto s = db_->CompactFiles(CompactionOptions(), column_family, input_files, static_cast(output_level)); if (!s.ok()) { fprintf(stdout, "Unable to perform CompactFiles(): %s\n", s.ToString().c_str()); thread->stats.AddNumCompactFilesFailed(1); } else { thread->stats.AddNumCompactFilesSucceed(1); } break; } } } #endif // ROCKSDB_LITE Status StressTest::TestFlush(const std::vector& rand_column_families) { FlushOptions flush_opts; std::vector cfhs; std::for_each(rand_column_families.begin(), rand_column_families.end(), [this, &cfhs](int k) { cfhs.push_back(column_families_[k]); }); return db_->Flush(flush_opts, cfhs); } Status StressTest::TestPauseBackground(ThreadState* thread) { Status status = db_->PauseBackgroundWork(); if (!status.ok()) { return status; } // To avoid stalling/deadlocking ourself in this thread, just // sleep here during pause and let other threads do db operations. // Sleep up to ~16 seconds (2**24 microseconds), but very skewed // toward short pause. (1 chance in 25 of pausing >= 1s; // 1 chance in 625 of pausing full 16s.) int pwr2_micros = std::min(thread->rand.Uniform(25), thread->rand.Uniform(25)); db_stress_env->SleepForMicroseconds(1 << pwr2_micros); return db_->ContinueBackgroundWork(); } void StressTest::TestAcquireSnapshot(ThreadState* thread, int rand_column_family, const std::string& keystr, uint64_t i) { Slice key = keystr; ColumnFamilyHandle* column_family = column_families_[rand_column_family]; #ifndef ROCKSDB_LITE auto db_impl = reinterpret_cast(db_->GetRootDB()); const bool ww_snapshot = thread->rand.OneIn(10); const Snapshot* snapshot = ww_snapshot ? db_impl->GetSnapshotForWriteConflictBoundary() : db_->GetSnapshot(); #else const Snapshot* snapshot = db_->GetSnapshot(); #endif // !ROCKSDB_LITE ReadOptions ropt; ropt.snapshot = snapshot; std::string value_at; // When taking a snapshot, we also read a key from that snapshot. We // will later read the same key before releasing the snapshot and // verify that the results are the same. auto status_at = db_->Get(ropt, column_family, key, &value_at); std::vector* key_vec = nullptr; if (FLAGS_compare_full_db_state_snapshot && (thread->tid == 0)) { key_vec = new std::vector(FLAGS_max_key); // When `prefix_extractor` is set, seeking to beginning and scanning // across prefixes are only supported with `total_order_seek` set. ropt.total_order_seek = true; std::unique_ptr iterator(db_->NewIterator(ropt)); for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { uint64_t key_val; if (GetIntVal(iterator->key().ToString(), &key_val)) { (*key_vec)[key_val] = true; } } } ThreadState::SnapshotState snap_state = { snapshot, rand_column_family, column_family->GetName(), keystr, status_at, value_at, key_vec}; uint64_t hold_for = FLAGS_snapshot_hold_ops; if (FLAGS_long_running_snapshots) { // Hold 10% of snapshots for 10x more if (thread->rand.OneIn(10)) { assert(hold_for < port::kMaxInt64 / 10); hold_for *= 10; // Hold 1% of snapshots for 100x more if (thread->rand.OneIn(10)) { assert(hold_for < port::kMaxInt64 / 10); hold_for *= 10; } } } uint64_t release_at = std::min(FLAGS_ops_per_thread - 1, i + hold_for); thread->snapshot_queue.emplace(release_at, snap_state); } Status StressTest::MaybeReleaseSnapshots(ThreadState* thread, uint64_t i) { while (!thread->snapshot_queue.empty() && i >= thread->snapshot_queue.front().first) { auto snap_state = thread->snapshot_queue.front().second; assert(snap_state.snapshot); // Note: this is unsafe as the cf might be dropped concurrently. But // it is ok since unclean cf drop is cunnrently not supported by write // prepared transactions. Status s = AssertSame(db_, column_families_[snap_state.cf_at], snap_state); db_->ReleaseSnapshot(snap_state.snapshot); delete snap_state.key_vec; thread->snapshot_queue.pop(); if (!s.ok()) { return s; } } return Status::OK(); } void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key, const Slice& start_key, ColumnFamilyHandle* column_family) { int64_t end_key_num; if (port::kMaxInt64 - rand_key < FLAGS_compact_range_width) { end_key_num = port::kMaxInt64; } else { end_key_num = FLAGS_compact_range_width + rand_key; } std::string end_key_buf = Key(end_key_num); Slice end_key(end_key_buf); CompactRangeOptions cro; cro.exclusive_manual_compaction = static_cast(thread->rand.Next() % 2); cro.change_level = static_cast(thread->rand.Next() % 2); std::vector bottom_level_styles = { BottommostLevelCompaction::kSkip, BottommostLevelCompaction::kIfHaveCompactionFilter, BottommostLevelCompaction::kForce, BottommostLevelCompaction::kForceOptimized}; cro.bottommost_level_compaction = bottom_level_styles[thread->rand.Next() % static_cast(bottom_level_styles.size())]; cro.allow_write_stall = static_cast(thread->rand.Next() % 2); cro.max_subcompactions = static_cast(thread->rand.Next() % 4); const Snapshot* pre_snapshot = nullptr; uint32_t pre_hash = 0; if (thread->rand.OneIn(2)) { // Do some validation by declaring a snapshot and compare the data before // and after the compaction pre_snapshot = db_->GetSnapshot(); pre_hash = GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key); } Status status = db_->CompactRange(cro, column_family, &start_key, &end_key); if (!status.ok()) { fprintf(stdout, "Unable to perform CompactRange(): %s\n", status.ToString().c_str()); } if (pre_snapshot != nullptr) { uint32_t post_hash = GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key); if (pre_hash != post_hash) { fprintf(stderr, "Data hash different before and after compact range " "start_key %s end_key %s\n", start_key.ToString(true).c_str(), end_key.ToString(true).c_str()); thread->stats.AddErrors(1); // Fail fast to preserve the DB state. thread->shared->SetVerificationFailure(); } db_->ReleaseSnapshot(pre_snapshot); } } uint32_t StressTest::GetRangeHash(ThreadState* thread, const Snapshot* snapshot, ColumnFamilyHandle* column_family, const Slice& start_key, const Slice& end_key) { const std::string kCrcCalculatorSepearator = ";"; uint32_t crc = 0; ReadOptions ro; ro.snapshot = snapshot; ro.total_order_seek = true; std::unique_ptr it(db_->NewIterator(ro, column_family)); for (it->Seek(start_key); it->Valid() && options_.comparator->Compare(it->key(), end_key) <= 0; it->Next()) { crc = crc32c::Extend(crc, it->key().data(), it->key().size()); crc = crc32c::Extend(crc, kCrcCalculatorSepearator.data(), 1); crc = crc32c::Extend(crc, it->value().data(), it->value().size()); crc = crc32c::Extend(crc, kCrcCalculatorSepearator.data(), 1); } if (!it->status().ok()) { fprintf(stderr, "Iterator non-OK when calculating range CRC: %s\n", it->status().ToString().c_str()); thread->stats.AddErrors(1); // Fail fast to preserve the DB state. thread->shared->SetVerificationFailure(); } return crc; } void StressTest::PrintEnv() const { fprintf(stdout, "RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion); fprintf(stdout, "Format version : %d\n", FLAGS_format_version); fprintf(stdout, "TransactionDB : %s\n", FLAGS_use_txn ? "true" : "false"); #ifndef ROCKSDB_LITE fprintf(stdout, "BlobDB : %s\n", FLAGS_use_blob_db ? "true" : "false"); #endif // !ROCKSDB_LITE fprintf(stdout, "Read only mode : %s\n", FLAGS_read_only ? "true" : "false"); fprintf(stdout, "Atomic flush : %s\n", FLAGS_atomic_flush ? "true" : "false"); fprintf(stdout, "Column families : %d\n", FLAGS_column_families); if (!FLAGS_test_batches_snapshots) { fprintf(stdout, "Clear CFs one in : %d\n", FLAGS_clear_column_family_one_in); } fprintf(stdout, "Number of threads : %d\n", FLAGS_threads); fprintf(stdout, "Ops per thread : %lu\n", (unsigned long)FLAGS_ops_per_thread); std::string ttl_state("unused"); if (FLAGS_ttl > 0) { ttl_state = NumberToString(FLAGS_ttl); } fprintf(stdout, "Time to live(sec) : %s\n", ttl_state.c_str()); fprintf(stdout, "Read percentage : %d%%\n", FLAGS_readpercent); fprintf(stdout, "Prefix percentage : %d%%\n", FLAGS_prefixpercent); fprintf(stdout, "Write percentage : %d%%\n", FLAGS_writepercent); fprintf(stdout, "Delete percentage : %d%%\n", FLAGS_delpercent); fprintf(stdout, "Delete range percentage : %d%%\n", FLAGS_delrangepercent); fprintf(stdout, "No overwrite percentage : %d%%\n", FLAGS_nooverwritepercent); fprintf(stdout, "Iterate percentage : %d%%\n", FLAGS_iterpercent); fprintf(stdout, "DB-write-buffer-size : %" PRIu64 "\n", FLAGS_db_write_buffer_size); fprintf(stdout, "Write-buffer-size : %d\n", FLAGS_write_buffer_size); fprintf(stdout, "Iterations : %lu\n", (unsigned long)FLAGS_num_iterations); fprintf(stdout, "Max key : %lu\n", (unsigned long)FLAGS_max_key); fprintf(stdout, "Ratio #ops/#keys : %f\n", (1.0 * FLAGS_ops_per_thread * FLAGS_threads) / FLAGS_max_key); fprintf(stdout, "Num times DB reopens : %d\n", FLAGS_reopen); fprintf(stdout, "Batches/snapshots : %d\n", FLAGS_test_batches_snapshots); fprintf(stdout, "Do update in place : %d\n", FLAGS_in_place_update); fprintf(stdout, "Num keys per lock : %d\n", 1 << FLAGS_log2_keys_per_lock); std::string compression = CompressionTypeToString(compression_type_e); fprintf(stdout, "Compression : %s\n", compression.c_str()); std::string bottommost_compression = CompressionTypeToString(bottommost_compression_type_e); fprintf(stdout, "Bottommost Compression : %s\n", bottommost_compression.c_str()); std::string checksum = ChecksumTypeToString(checksum_type_e); fprintf(stdout, "Checksum type : %s\n", checksum.c_str()); fprintf(stdout, "Bloom bits / key : %s\n", FormatDoubleParam(FLAGS_bloom_bits).c_str()); fprintf(stdout, "Max subcompactions : %" PRIu64 "\n", FLAGS_subcompactions); fprintf(stdout, "Use MultiGet : %s\n", FLAGS_use_multiget ? "true" : "false"); const char* memtablerep = ""; switch (FLAGS_rep_factory) { case kSkipList: memtablerep = "skip_list"; break; case kHashSkipList: memtablerep = "prefix_hash"; break; case kVectorRep: memtablerep = "vector"; break; } fprintf(stdout, "Memtablerep : %s\n", memtablerep); fprintf(stdout, "Test kill odd : %d\n", rocksdb_kill_odds); if (!rocksdb_kill_prefix_blacklist.empty()) { fprintf(stdout, "Skipping kill points prefixes:\n"); for (auto& p : rocksdb_kill_prefix_blacklist) { fprintf(stdout, " %s\n", p.c_str()); } } fprintf(stdout, "Periodic Compaction Secs : %" PRIu64 "\n", FLAGS_periodic_compaction_seconds); fprintf(stdout, "Compaction TTL : %" PRIu64 "\n", FLAGS_compaction_ttl); fprintf(stdout, "Background Purge : %d\n", static_cast(FLAGS_avoid_unnecessary_blocking_io)); fprintf(stdout, "Write DB ID to manifest : %d\n", static_cast(FLAGS_write_dbid_to_manifest)); fprintf(stdout, "Max Write Batch Group Size: %" PRIu64 "\n", FLAGS_max_write_batch_group_size_bytes); fprintf(stdout, "Use dynamic level : %d\n", static_cast(FLAGS_level_compaction_dynamic_level_bytes)); fprintf(stdout, "Read fault one in : %d\n", FLAGS_read_fault_one_in); fprintf(stdout, "Sync fault injection : %d\n", FLAGS_sync_fault_injection); fprintf(stdout, "------------------------------------------------\n"); } void StressTest::Open() { assert(db_ == nullptr); #ifndef ROCKSDB_LITE assert(txn_db_ == nullptr); #endif if (FLAGS_options_file.empty()) { BlockBasedTableOptions block_based_options; block_based_options.block_cache = cache_; block_based_options.cache_index_and_filter_blocks = FLAGS_cache_index_and_filter_blocks; block_based_options.block_cache_compressed = compressed_cache_; block_based_options.checksum = checksum_type_e; block_based_options.block_size = FLAGS_block_size; block_based_options.format_version = static_cast(FLAGS_format_version); block_based_options.index_block_restart_interval = static_cast(FLAGS_index_block_restart_interval); block_based_options.filter_policy = filter_policy_; block_based_options.partition_filters = FLAGS_partition_filters; block_based_options.index_type = static_cast(FLAGS_index_type); options_.table_factory.reset( NewBlockBasedTableFactory(block_based_options)); options_.db_write_buffer_size = FLAGS_db_write_buffer_size; options_.write_buffer_size = FLAGS_write_buffer_size; options_.max_write_buffer_number = FLAGS_max_write_buffer_number; options_.min_write_buffer_number_to_merge = FLAGS_min_write_buffer_number_to_merge; options_.max_write_buffer_number_to_maintain = FLAGS_max_write_buffer_number_to_maintain; options_.max_write_buffer_size_to_maintain = FLAGS_max_write_buffer_size_to_maintain; options_.memtable_prefix_bloom_size_ratio = FLAGS_memtable_prefix_bloom_size_ratio; options_.memtable_whole_key_filtering = FLAGS_memtable_whole_key_filtering; options_.max_background_compactions = FLAGS_max_background_compactions; options_.max_background_flushes = FLAGS_max_background_flushes; options_.compaction_style = static_cast(FLAGS_compaction_style); if (FLAGS_prefix_size >= 0) { options_.prefix_extractor.reset( NewFixedPrefixTransform(FLAGS_prefix_size)); } options_.max_open_files = FLAGS_open_files; options_.statistics = dbstats; options_.env = db_stress_env; options_.use_fsync = FLAGS_use_fsync; options_.compaction_readahead_size = FLAGS_compaction_readahead_size; options_.allow_mmap_reads = FLAGS_mmap_read; options_.allow_mmap_writes = FLAGS_mmap_write; options_.use_direct_reads = FLAGS_use_direct_reads; options_.use_direct_io_for_flush_and_compaction = FLAGS_use_direct_io_for_flush_and_compaction; options_.recycle_log_file_num = static_cast(FLAGS_recycle_log_file_num); options_.target_file_size_base = FLAGS_target_file_size_base; options_.target_file_size_multiplier = FLAGS_target_file_size_multiplier; options_.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base; options_.max_bytes_for_level_multiplier = FLAGS_max_bytes_for_level_multiplier; options_.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger; options_.level0_slowdown_writes_trigger = FLAGS_level0_slowdown_writes_trigger; options_.level0_file_num_compaction_trigger = FLAGS_level0_file_num_compaction_trigger; options_.compression = compression_type_e; options_.bottommost_compression = bottommost_compression_type_e; options_.compression_opts.max_dict_bytes = FLAGS_compression_max_dict_bytes; options_.compression_opts.zstd_max_train_bytes = FLAGS_compression_zstd_max_train_bytes; options_.compression_opts.parallel_threads = FLAGS_compression_parallel_threads; options_.create_if_missing = true; options_.max_manifest_file_size = FLAGS_max_manifest_file_size; options_.inplace_update_support = FLAGS_in_place_update; options_.max_subcompactions = static_cast(FLAGS_subcompactions); options_.allow_concurrent_memtable_write = FLAGS_allow_concurrent_memtable_write; options_.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds; options_.ttl = FLAGS_compaction_ttl; options_.enable_pipelined_write = FLAGS_enable_pipelined_write; options_.enable_write_thread_adaptive_yield = FLAGS_enable_write_thread_adaptive_yield; options_.compaction_options_universal.size_ratio = FLAGS_universal_size_ratio; options_.compaction_options_universal.min_merge_width = FLAGS_universal_min_merge_width; options_.compaction_options_universal.max_merge_width = FLAGS_universal_max_merge_width; options_.compaction_options_universal.max_size_amplification_percent = FLAGS_universal_max_size_amplification_percent; options_.atomic_flush = FLAGS_atomic_flush; options_.avoid_unnecessary_blocking_io = FLAGS_avoid_unnecessary_blocking_io; options_.write_dbid_to_manifest = FLAGS_write_dbid_to_manifest; options_.avoid_flush_during_recovery = FLAGS_avoid_flush_during_recovery; options_.max_write_batch_group_size_bytes = FLAGS_max_write_batch_group_size_bytes; options_.level_compaction_dynamic_level_bytes = FLAGS_level_compaction_dynamic_level_bytes; } else { #ifdef ROCKSDB_LITE fprintf(stderr, "--options_file not supported in lite mode\n"); exit(1); #else DBOptions db_options; std::vector cf_descriptors; Status s = LoadOptionsFromFile(FLAGS_options_file, db_stress_env, &db_options, &cf_descriptors); db_options.env = new DbStressEnvWrapper(db_stress_env); if (!s.ok()) { fprintf(stderr, "Unable to load options file %s --- %s\n", FLAGS_options_file.c_str(), s.ToString().c_str()); exit(1); } options_ = Options(db_options, cf_descriptors[0].options); #endif // ROCKSDB_LITE } if (FLAGS_rate_limiter_bytes_per_sec > 0) { options_.rate_limiter.reset(NewGenericRateLimiter( FLAGS_rate_limiter_bytes_per_sec, 1000 /* refill_period_us */, 10 /* fairness */, FLAGS_rate_limit_bg_reads ? RateLimiter::Mode::kReadsOnly : RateLimiter::Mode::kWritesOnly)); if (FLAGS_rate_limit_bg_reads) { options_.new_table_reader_for_compaction_inputs = true; } } if (FLAGS_sst_file_manager_bytes_per_sec > 0 || FLAGS_sst_file_manager_bytes_per_truncate > 0) { Status status; options_.sst_file_manager.reset(NewSstFileManager( db_stress_env, options_.info_log, "" /* trash_dir */, static_cast(FLAGS_sst_file_manager_bytes_per_sec), true /* delete_existing_trash */, &status, 0.25 /* max_trash_db_ratio */, FLAGS_sst_file_manager_bytes_per_truncate)); if (!status.ok()) { fprintf(stderr, "SstFileManager creation failed: %s\n", status.ToString().c_str()); exit(1); } } if (FLAGS_prefix_size == 0 && FLAGS_rep_factory == kHashSkipList) { fprintf(stderr, "prefeix_size cannot be zero if memtablerep == prefix_hash\n"); exit(1); } if (FLAGS_prefix_size != 0 && FLAGS_rep_factory != kHashSkipList) { fprintf(stderr, "WARNING: prefix_size is non-zero but " "memtablerep != prefix_hash\n"); } switch (FLAGS_rep_factory) { case kSkipList: // no need to do anything break; #ifndef ROCKSDB_LITE case kHashSkipList: options_.memtable_factory.reset(NewHashSkipListRepFactory(10000)); break; case kVectorRep: options_.memtable_factory.reset(new VectorRepFactory()); break; #else default: fprintf(stderr, "RocksdbLite only supports skip list mem table. Skip " "--rep_factory\n"); #endif // ROCKSDB_LITE } if (FLAGS_use_full_merge_v1) { options_.merge_operator = MergeOperators::CreateDeprecatedPutOperator(); } else { options_.merge_operator = MergeOperators::CreatePutOperator(); } fprintf(stdout, "DB path: [%s]\n", FLAGS_db.c_str()); Status s; if (FLAGS_ttl == -1) { std::vector existing_column_families; s = DB::ListColumnFamilies(DBOptions(options_), FLAGS_db, &existing_column_families); // ignore errors if (!s.ok()) { // DB doesn't exist assert(existing_column_families.empty()); assert(column_family_names_.empty()); column_family_names_.push_back(kDefaultColumnFamilyName); } else if (column_family_names_.empty()) { // this is the first call to the function Open() column_family_names_ = existing_column_families; } else { // this is a reopen. just assert that existing column_family_names are // equivalent to what we remember auto sorted_cfn = column_family_names_; std::sort(sorted_cfn.begin(), sorted_cfn.end()); std::sort(existing_column_families.begin(), existing_column_families.end()); if (sorted_cfn != existing_column_families) { fprintf(stderr, "Expected column families differ from the existing:\n"); fprintf(stderr, "Expected: {"); for (auto cf : sorted_cfn) { fprintf(stderr, "%s ", cf.c_str()); } fprintf(stderr, "}\n"); fprintf(stderr, "Existing: {"); for (auto cf : existing_column_families) { fprintf(stderr, "%s ", cf.c_str()); } fprintf(stderr, "}\n"); } assert(sorted_cfn == existing_column_families); } std::vector cf_descriptors; for (auto name : column_family_names_) { if (name != kDefaultColumnFamilyName) { new_column_family_name_ = std::max(new_column_family_name_.load(), std::stoi(name) + 1); } cf_descriptors.emplace_back(name, ColumnFamilyOptions(options_)); } while (cf_descriptors.size() < (size_t)FLAGS_column_families) { std::string name = ToString(new_column_family_name_.load()); new_column_family_name_++; cf_descriptors.emplace_back(name, ColumnFamilyOptions(options_)); column_family_names_.push_back(name); } options_.listeners.clear(); options_.listeners.emplace_back( new DbStressListener(FLAGS_db, options_.db_paths, cf_descriptors)); options_.create_missing_column_families = true; if (!FLAGS_use_txn) { #ifndef ROCKSDB_LITE if (FLAGS_use_blob_db) { blob_db::BlobDBOptions blob_db_options; blob_db_options.min_blob_size = FLAGS_blob_db_min_blob_size; blob_db_options.bytes_per_sync = FLAGS_blob_db_bytes_per_sync; blob_db_options.blob_file_size = FLAGS_blob_db_file_size; blob_db_options.enable_garbage_collection = FLAGS_blob_db_enable_gc; blob_db_options.garbage_collection_cutoff = FLAGS_blob_db_gc_cutoff; blob_db::BlobDB* blob_db = nullptr; s = blob_db::BlobDB::Open(options_, blob_db_options, FLAGS_db, cf_descriptors, &column_families_, &blob_db); if (s.ok()) { db_ = blob_db; } } else #endif // !ROCKSDB_LITE { if (db_preload_finished_.load() && FLAGS_read_only) { s = DB::OpenForReadOnly(DBOptions(options_), FLAGS_db, cf_descriptors, &column_families_, &db_); } else { s = DB::Open(DBOptions(options_), FLAGS_db, cf_descriptors, &column_families_, &db_); } } } else { #ifndef ROCKSDB_LITE TransactionDBOptions txn_db_options; assert(FLAGS_txn_write_policy <= TxnDBWritePolicy::WRITE_UNPREPARED); txn_db_options.write_policy = static_cast(FLAGS_txn_write_policy); if (FLAGS_unordered_write) { assert(txn_db_options.write_policy == TxnDBWritePolicy::WRITE_PREPARED); options_.unordered_write = true; options_.two_write_queues = true; txn_db_options.skip_concurrency_control = true; } s = TransactionDB::Open(options_, txn_db_options, FLAGS_db, cf_descriptors, &column_families_, &txn_db_); if (!s.ok()) { fprintf(stderr, "Error in opening the TransactionDB [%s]\n", s.ToString().c_str()); fflush(stderr); } assert(s.ok()); db_ = txn_db_; // after a crash, rollback to commit recovered transactions std::vector trans; txn_db_->GetAllPreparedTransactions(&trans); Random rand(static_cast(FLAGS_seed)); for (auto txn : trans) { if (rand.OneIn(2)) { s = txn->Commit(); assert(s.ok()); } else { s = txn->Rollback(); assert(s.ok()); } delete txn; } trans.clear(); txn_db_->GetAllPreparedTransactions(&trans); assert(trans.size() == 0); #endif } assert(!s.ok() || column_families_.size() == static_cast(FLAGS_column_families)); if (FLAGS_test_secondary) { #ifndef ROCKSDB_LITE secondaries_.resize(FLAGS_threads); std::fill(secondaries_.begin(), secondaries_.end(), nullptr); secondary_cfh_lists_.clear(); secondary_cfh_lists_.resize(FLAGS_threads); Options tmp_opts; // TODO(yanqin) support max_open_files != -1 for secondary instance. tmp_opts.max_open_files = -1; tmp_opts.statistics = dbstats_secondaries; tmp_opts.env = db_stress_env; for (size_t i = 0; i != static_cast(FLAGS_threads); ++i) { const std::string secondary_path = FLAGS_secondaries_base + "/" + std::to_string(i); s = DB::OpenAsSecondary(tmp_opts, FLAGS_db, secondary_path, cf_descriptors, &secondary_cfh_lists_[i], &secondaries_[i]); if (!s.ok()) { break; } } assert(s.ok()); #else fprintf(stderr, "Secondary is not supported in RocksDBLite\n"); exit(1); #endif } if (FLAGS_continuous_verification_interval > 0 && !cmp_db_) { Options tmp_opts; // TODO(yanqin) support max_open_files != -1 for secondary instance. tmp_opts.max_open_files = -1; tmp_opts.env = db_stress_env; std::string secondary_path = FLAGS_secondaries_base + "/cmp_database"; s = DB::OpenAsSecondary(tmp_opts, FLAGS_db, secondary_path, cf_descriptors, &cmp_cfhs_, &cmp_db_); assert(!s.ok() || cmp_cfhs_.size() == static_cast(FLAGS_column_families)); } } else { #ifndef ROCKSDB_LITE DBWithTTL* db_with_ttl; s = DBWithTTL::Open(options_, FLAGS_db, &db_with_ttl, FLAGS_ttl); db_ = db_with_ttl; if (FLAGS_test_secondary) { secondaries_.resize(FLAGS_threads); std::fill(secondaries_.begin(), secondaries_.end(), nullptr); Options tmp_opts; tmp_opts.env = options_.env; // TODO(yanqin) support max_open_files != -1 for secondary instance. tmp_opts.max_open_files = -1; for (size_t i = 0; i != static_cast(FLAGS_threads); ++i) { const std::string secondary_path = FLAGS_secondaries_base + "/" + std::to_string(i); s = DB::OpenAsSecondary(tmp_opts, FLAGS_db, secondary_path, &secondaries_[i]); if (!s.ok()) { break; } } } #else fprintf(stderr, "TTL is not supported in RocksDBLite\n"); exit(1); #endif } if (!s.ok()) { fprintf(stderr, "open error: %s\n", s.ToString().c_str()); exit(1); } } void StressTest::Reopen(ThreadState* thread) { #ifndef ROCKSDB_LITE // BG jobs in WritePrepared must be canceled first because i) they can access // the db via a callbac ii) they hold on to a snapshot and the upcoming // ::Close would complain about it. const bool write_prepared = FLAGS_use_txn && FLAGS_txn_write_policy != 0; bool bg_canceled = false; if (write_prepared || thread->rand.OneIn(2)) { const bool wait = write_prepared || static_cast(thread->rand.OneIn(2)); CancelAllBackgroundWork(db_, wait); bg_canceled = wait; } assert(!write_prepared || bg_canceled); (void) bg_canceled; #else (void) thread; #endif for (auto cf : column_families_) { delete cf; } column_families_.clear(); #ifndef ROCKSDB_LITE if (thread->rand.OneIn(2)) { Status s = db_->Close(); if (!s.ok()) { fprintf(stderr, "Non-ok close status: %s\n", s.ToString().c_str()); fflush(stderr); } assert(s.ok()); } #endif delete db_; db_ = nullptr; #ifndef ROCKSDB_LITE txn_db_ = nullptr; #endif assert(secondaries_.size() == secondary_cfh_lists_.size()); size_t n = secondaries_.size(); for (size_t i = 0; i != n; ++i) { for (auto* cf : secondary_cfh_lists_[i]) { delete cf; } secondary_cfh_lists_[i].clear(); delete secondaries_[i]; } secondaries_.clear(); num_times_reopened_++; auto now = db_stress_env->NowMicros(); fprintf(stdout, "%s Reopening database for the %dth time\n", db_stress_env->TimeToString(now / 1000000).c_str(), num_times_reopened_); Open(); } } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_test_base.h000066400000000000000000000215451370372246700224730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef GFLAGS #pragma once #include "db_stress_tool/db_stress_common.h" #include "db_stress_tool/db_stress_shared_state.h" namespace ROCKSDB_NAMESPACE { class Transaction; class TransactionDB; class StressTest { public: StressTest(); virtual ~StressTest(); std::shared_ptr NewCache(size_t capacity); bool BuildOptionsTable(); void InitDb(); void InitReadonlyDb(SharedState*); // Return false if verification fails. bool VerifySecondaries(); void OperateDb(ThreadState* thread); virtual void VerifyDb(ThreadState* thread) const = 0; virtual void ContinuouslyVerifyDb(ThreadState* /*thread*/) const {} void PrintStatistics(); protected: Status AssertSame(DB* db, ColumnFamilyHandle* cf, ThreadState::SnapshotState& snap_state); // Currently PreloadDb has to be single-threaded. void PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, SharedState* shared); Status SetOptions(ThreadState* thread); #ifndef ROCKSDB_LITE Status NewTxn(WriteOptions& write_opts, Transaction** txn); Status CommitTxn(Transaction* txn); Status RollbackTxn(Transaction* txn); #endif virtual void MaybeClearOneColumnFamily(ThreadState* /* thread */) {} virtual bool ShouldAcquireMutexOnKey() const { return false; } virtual std::vector GenerateColumnFamilies( const int /* num_column_families */, int rand_column_family) const { return {rand_column_family}; } virtual std::vector GenerateKeys(int64_t rand_key) const { return {rand_key}; } virtual Status TestGet(ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) = 0; virtual std::vector TestMultiGet( ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) = 0; virtual Status TestPrefixScan(ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) = 0; virtual Status TestPut(ThreadState* thread, WriteOptions& write_opts, const ReadOptions& read_opts, const std::vector& cf_ids, const std::vector& keys, char (&value)[100], std::unique_ptr& lock) = 0; virtual Status TestDelete(ThreadState* thread, WriteOptions& write_opts, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& lock) = 0; virtual Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& lock) = 0; virtual void TestIngestExternalFile( ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& lock) = 0; // Issue compact range, starting with start_key, whose integer value // is rand_key. virtual void TestCompactRange(ThreadState* thread, int64_t rand_key, const Slice& start_key, ColumnFamilyHandle* column_family); // Calculate a hash value for all keys in range [start_key, end_key] // at a certain snapshot. uint32_t GetRangeHash(ThreadState* thread, const Snapshot* snapshot, ColumnFamilyHandle* column_family, const Slice& start_key, const Slice& end_key); // Return a column family handle that mirrors what is pointed by // `column_family_id`, which will be used to validate data to be correct. // By default, the column family itself will be returned. virtual ColumnFamilyHandle* GetControlCfh(ThreadState* /* thread*/, int column_family_id) { return column_families_[column_family_id]; } #ifndef ROCKSDB_LITE // Generated a list of keys that close to boundaries of SST keys. // If there isn't any SST file in the DB, return empty list. std::vector GetWhiteBoxKeys(ThreadState* thread, DB* db, ColumnFamilyHandle* cfh, size_t num_keys); #else // !ROCKSDB_LITE std::vector GetWhiteBoxKeys(ThreadState*, DB*, ColumnFamilyHandle*, size_t) { // Not supported in LITE mode. return {}; } #endif // !ROCKSDB_LITE // Given a key K, this creates an iterator which scans to K and then // does a random sequence of Next/Prev operations. virtual Status TestIterate(ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys); // Enum used by VerifyIterator() to identify the mode to validate. enum LastIterateOp { kLastOpSeek, kLastOpSeekForPrev, kLastOpNextOrPrev, kLastOpSeekToFirst, kLastOpSeekToLast }; // Compare the two iterator, iter and cmp_iter are in the same position, // unless iter might be made invalidate or undefined because of // upper or lower bounds, or prefix extractor. // Will flag failure if the verification fails. // diverged = true if the two iterator is already diverged. // True if verification passed, false if not. // op_logs is the information to print when validation fails. void VerifyIterator(ThreadState* thread, ColumnFamilyHandle* cmp_cfh, const ReadOptions& ro, Iterator* iter, Iterator* cmp_iter, LastIterateOp op, const Slice& seek_key, const std::string& op_logs, bool* diverged); virtual Status TestBackupRestore(ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys); virtual Status TestCheckpoint(ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys); void TestCompactFiles(ThreadState* thread, ColumnFamilyHandle* column_family); Status TestFlush(const std::vector& rand_column_families); Status TestPauseBackground(ThreadState* thread); void TestAcquireSnapshot(ThreadState* thread, int rand_column_family, const std::string& keystr, uint64_t i); Status MaybeReleaseSnapshots(ThreadState* thread, uint64_t i); #ifndef ROCKSDB_LITE Status VerifyGetLiveFiles() const; Status VerifyGetSortedWalFiles() const; Status VerifyGetCurrentWalFile() const; virtual Status TestApproximateSize( ThreadState* thread, uint64_t iteration, const std::vector& rand_column_families, const std::vector& rand_keys); #endif // !ROCKSDB_LITE void VerificationAbort(SharedState* shared, std::string msg, Status s) const; void VerificationAbort(SharedState* shared, std::string msg, int cf, int64_t key) const; void PrintEnv() const; void Open(); void Reopen(ThreadState* thread); std::shared_ptr cache_; std::shared_ptr compressed_cache_; std::shared_ptr filter_policy_; DB* db_; #ifndef ROCKSDB_LITE TransactionDB* txn_db_; #endif Options options_; std::vector column_families_; std::vector column_family_names_; std::atomic new_column_family_name_; int num_times_reopened_; std::unordered_map> options_table_; std::vector options_index_; std::atomic db_preload_finished_; // Fields used for stress-testing secondary instance in the same process std::vector secondaries_; std::vector> secondary_cfh_lists_; // Fields used for continuous verification from another thread DB* cmp_db_; std::vector cmp_cfhs_; }; } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/db_stress_tool.cc000066400000000000000000000234301370372246700216300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // The test uses an array to compare against values written to the database. // Keys written to the array are in 1:1 correspondence to the actual values in // the database according to the formula in the function GenerateValue. // Space is reserved in the array from 0 to FLAGS_max_key and values are // randomly written/deleted/read from those positions. During verification we // compare all the positions in the array. To shorten/elongate the running // time, you could change the settings: FLAGS_max_key, FLAGS_ops_per_thread, // (sometimes also FLAGS_threads). // // NOTE that if FLAGS_test_batches_snapshots is set, the test will have // different behavior. See comment of the flag for details. #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" #include "db_stress_tool/db_stress_driver.h" #ifndef NDEBUG #include "test_util/fault_injection_test_fs.h" #endif namespace ROCKSDB_NAMESPACE { namespace { static std::shared_ptr env_guard; static std::shared_ptr env_wrapper_guard; static std::shared_ptr fault_env_guard; } // namespace KeyGenContext key_gen_ctx; int db_stress_tool(int argc, char** argv) { SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + " [OPTIONS]..."); ParseCommandLineFlags(&argc, &argv, true); SanitizeDoubleParam(&FLAGS_bloom_bits); SanitizeDoubleParam(&FLAGS_memtable_prefix_bloom_size_ratio); SanitizeDoubleParam(&FLAGS_max_bytes_for_level_multiplier); if (FLAGS_mock_direct_io) { test::SetupSyncPointsToMockDirectIO(); } if (FLAGS_statistics) { dbstats = ROCKSDB_NAMESPACE::CreateDBStatistics(); if (FLAGS_test_secondary) { dbstats_secondaries = ROCKSDB_NAMESPACE::CreateDBStatistics(); } } compression_type_e = StringToCompressionType(FLAGS_compression_type.c_str()); bottommost_compression_type_e = StringToCompressionType(FLAGS_bottommost_compression_type.c_str()); checksum_type_e = StringToChecksumType(FLAGS_checksum_type.c_str()); Env* raw_env; if (!FLAGS_hdfs.empty()) { if (!FLAGS_env_uri.empty()) { fprintf(stderr, "Cannot specify both --hdfs and --env_uri.\n"); exit(1); } raw_env = new ROCKSDB_NAMESPACE::HdfsEnv(FLAGS_hdfs); } else if (!FLAGS_env_uri.empty()) { Status s = Env::LoadEnv(FLAGS_env_uri, &raw_env, &env_guard); if (raw_env == nullptr) { fprintf(stderr, "No Env registered for URI: %s\n", FLAGS_env_uri.c_str()); exit(1); } } else { raw_env = Env::Default(); } #ifndef NDEBUG if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection) { FaultInjectionTestFS* fs = new FaultInjectionTestFS(raw_env->GetFileSystem()); fault_fs_guard.reset(fs); fault_fs_guard->SetFilesystemDirectWritable(true); fault_env_guard = std::make_shared(raw_env, fault_fs_guard); raw_env = fault_env_guard.get(); } #endif env_wrapper_guard = std::make_shared(raw_env); db_stress_env = env_wrapper_guard.get(); FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str()); // The number of background threads should be at least as much the // max number of concurrent compactions. db_stress_env->SetBackgroundThreads(FLAGS_max_background_compactions, ROCKSDB_NAMESPACE::Env::Priority::LOW); db_stress_env->SetBackgroundThreads(FLAGS_num_bottom_pri_threads, ROCKSDB_NAMESPACE::Env::Priority::BOTTOM); if (FLAGS_prefixpercent > 0 && FLAGS_prefix_size < 0) { fprintf(stderr, "Error: prefixpercent is non-zero while prefix_size is " "not positive!\n"); exit(1); } if (FLAGS_test_batches_snapshots && FLAGS_prefix_size <= 0) { fprintf(stderr, "Error: please specify prefix_size for " "test_batches_snapshots test!\n"); exit(1); } if (FLAGS_memtable_prefix_bloom_size_ratio > 0.0 && FLAGS_prefix_size < 0) { fprintf(stderr, "Error: please specify positive prefix_size in order to use " "memtable_prefix_bloom_size_ratio\n"); exit(1); } if ((FLAGS_readpercent + FLAGS_prefixpercent + FLAGS_writepercent + FLAGS_delpercent + FLAGS_delrangepercent + FLAGS_iterpercent) != 100) { fprintf(stderr, "Error: Read+Prefix+Write+Delete+DeleteRange+Iterate percents != " "100!\n"); exit(1); } if (FLAGS_disable_wal == 1 && FLAGS_reopen > 0) { fprintf(stderr, "Error: Db cannot reopen safely with disable_wal set!\n"); exit(1); } if ((unsigned)FLAGS_reopen >= FLAGS_ops_per_thread) { fprintf(stderr, "Error: #DB-reopens should be < ops_per_thread\n" "Provided reopens = %d and ops_per_thread = %lu\n", FLAGS_reopen, (unsigned long)FLAGS_ops_per_thread); exit(1); } if (FLAGS_test_batches_snapshots && FLAGS_delrangepercent > 0) { fprintf(stderr, "Error: nonzero delrangepercent unsupported in " "test_batches_snapshots mode\n"); exit(1); } if (FLAGS_active_width > FLAGS_max_key) { fprintf(stderr, "Error: active_width can be at most max_key\n"); exit(1); } else if (FLAGS_active_width == 0) { FLAGS_active_width = FLAGS_max_key; } if (FLAGS_value_size_mult * kRandomValueMaxFactor > kValueMaxLen) { fprintf(stderr, "Error: value_size_mult can be at most %d\n", kValueMaxLen / kRandomValueMaxFactor); exit(1); } if (FLAGS_use_merge && FLAGS_nooverwritepercent == 100) { fprintf( stderr, "Error: nooverwritepercent must not be 100 when using merge operands"); exit(1); } if (FLAGS_ingest_external_file_one_in > 0 && FLAGS_nooverwritepercent > 0) { fprintf(stderr, "Error: nooverwritepercent must be 0 when using file ingestion\n"); exit(1); } if (FLAGS_clear_column_family_one_in > 0 && FLAGS_backup_one_in > 0) { fprintf(stderr, "Error: clear_column_family_one_in must be 0 when using backup\n"); exit(1); } if (FLAGS_test_cf_consistency && FLAGS_disable_wal) { FLAGS_atomic_flush = true; } if (FLAGS_read_only) { if (FLAGS_writepercent != 0 || FLAGS_delpercent != 0 || FLAGS_delrangepercent != 0) { fprintf(stderr, "Error: updates are not supported in read only mode\n"); exit(1); } else if (FLAGS_checkpoint_one_in > 0 && FLAGS_clear_column_family_one_in > 0) { fprintf(stdout, "Warn: checkpoint won't be validated since column families may " "be dropped.\n"); } } // Choose a location for the test database if none given with --db= if (FLAGS_db.empty()) { std::string default_db_path; db_stress_env->GetTestDirectory(&default_db_path); default_db_path += "/dbstress"; FLAGS_db = default_db_path; } if ((FLAGS_test_secondary || FLAGS_continuous_verification_interval > 0) && FLAGS_secondaries_base.empty()) { std::string default_secondaries_path; db_stress_env->GetTestDirectory(&default_secondaries_path); default_secondaries_path += "/dbstress_secondaries"; ROCKSDB_NAMESPACE::Status s = db_stress_env->CreateDirIfMissing(default_secondaries_path); if (!s.ok()) { fprintf(stderr, "Failed to create directory %s: %s\n", default_secondaries_path.c_str(), s.ToString().c_str()); exit(1); } FLAGS_secondaries_base = default_secondaries_path; } if (!FLAGS_test_secondary && FLAGS_secondary_catch_up_one_in > 0) { fprintf( stderr, "Must set -test_secondary=true if secondary_catch_up_one_in > 0.\n"); exit(1); } rocksdb_kill_odds = FLAGS_kill_random_test; rocksdb_kill_prefix_blacklist = SplitString(FLAGS_kill_prefix_blacklist); unsigned int levels = FLAGS_max_key_len; std::vector weights; uint64_t scale_factor = FLAGS_key_window_scale_factor; key_gen_ctx.window = scale_factor * 100; if (!FLAGS_key_len_percent_dist.empty()) { weights = SplitString(FLAGS_key_len_percent_dist); if (weights.size() != levels) { fprintf(stderr, "Number of weights in key_len_dist should be equal to" " max_key_len"); exit(1); } uint64_t total_weight = 0; for (std::string& weight : weights) { uint64_t val = std::stoull(weight); key_gen_ctx.weights.emplace_back(val * scale_factor); total_weight += val; } if (total_weight != 100) { fprintf(stderr, "Sum of all weights in key_len_dist should be 100"); exit(1); } } else { uint64_t keys_per_level = key_gen_ctx.window / levels; for (unsigned int level = 0; level + 1 < levels; ++level) { key_gen_ctx.weights.emplace_back(keys_per_level); } key_gen_ctx.weights.emplace_back(key_gen_ctx.window - keys_per_level * (levels - 1)); } std::unique_ptr stress; if (FLAGS_test_cf_consistency) { stress.reset(CreateCfConsistencyStressTest()); } else if (FLAGS_test_batches_snapshots) { stress.reset(CreateBatchedOpsStressTest()); } else { stress.reset(CreateNonBatchedOpsStressTest()); } // Initialize the Zipfian pre-calculated array InitializeHotKeyGenerator(FLAGS_hot_key_alpha); if (RunStressTest(stress.get())) { return 0; } else { return 1; } } } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/db_stress_tool/no_batched_ops_stress.cc000066400000000000000000000700461370372246700231620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" #ifndef NDEBUG #include "test_util/fault_injection_test_fs.h" #endif // NDEBUG namespace ROCKSDB_NAMESPACE { class NonBatchedOpsStressTest : public StressTest { public: NonBatchedOpsStressTest() {} virtual ~NonBatchedOpsStressTest() {} void VerifyDb(ThreadState* thread) const override { ReadOptions options(FLAGS_verify_checksum, true); auto shared = thread->shared; const int64_t max_key = shared->GetMaxKey(); const int64_t keys_per_thread = max_key / shared->GetNumThreads(); int64_t start = keys_per_thread * thread->tid; int64_t end = start + keys_per_thread; uint64_t prefix_to_use = (FLAGS_prefix_size < 0) ? 1 : static_cast(FLAGS_prefix_size); if (thread->tid == shared->GetNumThreads() - 1) { end = max_key; } for (size_t cf = 0; cf < column_families_.size(); ++cf) { if (thread->shared->HasVerificationFailedYet()) { break; } if (thread->rand.OneIn(3)) { // 1/3 chance use iterator to verify this range Slice prefix; std::string seek_key = Key(start); std::unique_ptr iter( db_->NewIterator(options, column_families_[cf])); iter->Seek(seek_key); prefix = Slice(seek_key.data(), prefix_to_use); for (auto i = start; i < end; i++) { if (thread->shared->HasVerificationFailedYet()) { break; } std::string from_db; std::string keystr = Key(i); Slice k = keystr; Slice pfx = Slice(keystr.data(), prefix_to_use); // Reseek when the prefix changes if (prefix_to_use > 0 && prefix.compare(pfx) != 0) { iter->Seek(k); seek_key = keystr; prefix = Slice(seek_key.data(), prefix_to_use); } Status s = iter->status(); if (iter->Valid()) { Slice iter_key = iter->key(); if (iter->key().compare(k) > 0) { s = Status::NotFound(Slice()); } else if (iter->key().compare(k) == 0) { from_db = iter->value().ToString(); iter->Next(); } else if (iter_key.compare(k) < 0) { VerificationAbort(shared, "An out of range key was found", static_cast(cf), i); } } else { // The iterator found no value for the key in question, so do not // move to the next item in the iterator s = Status::NotFound(); } VerifyValue(static_cast(cf), i, options, shared, from_db, s, true); if (from_db.length()) { PrintKeyValue(static_cast(cf), static_cast(i), from_db.data(), from_db.length()); } } } else if (thread->rand.OneIn(2)) { // 1/3 chance use Get to verify this range for (auto i = start; i < end; i++) { if (thread->shared->HasVerificationFailedYet()) { break; } std::string from_db; std::string keystr = Key(i); Slice k = keystr; Status s = db_->Get(options, column_families_[cf], k, &from_db); VerifyValue(static_cast(cf), i, options, shared, from_db, s, true); if (from_db.length()) { PrintKeyValue(static_cast(cf), static_cast(i), from_db.data(), from_db.length()); } } } else { // 1/3 chance use MultiGet to verify this range for (auto i = start; i < end;) { if (thread->shared->HasVerificationFailedYet()) { break; } // Keep the batch size to some reasonable value size_t batch_size = thread->rand.Uniform(128) + 1; batch_size = std::min(batch_size, end - i); std::vector keystrs(batch_size); std::vector keys(batch_size); std::vector values(batch_size); std::vector statuses(batch_size); for (size_t j = 0; j < batch_size; ++j) { keystrs[j] = Key(i + j); keys[j] = Slice(keystrs[j].data(), keystrs[j].length()); } db_->MultiGet(options, column_families_[cf], batch_size, keys.data(), values.data(), statuses.data()); for (size_t j = 0; j < batch_size; ++j) { Status s = statuses[j]; std::string from_db = values[j].ToString(); VerifyValue(static_cast(cf), i + j, options, shared, from_db, s, true); if (from_db.length()) { PrintKeyValue(static_cast(cf), static_cast(i + j), from_db.data(), from_db.length()); } } i += batch_size; } } } } void MaybeClearOneColumnFamily(ThreadState* thread) override { if (FLAGS_column_families > 1) { if (thread->rand.OneInOpt(FLAGS_clear_column_family_one_in)) { // drop column family and then create it again (can't drop default) int cf = thread->rand.Next() % (FLAGS_column_families - 1) + 1; std::string new_name = ToString(new_column_family_name_.fetch_add(1)); { MutexLock l(thread->shared->GetMutex()); fprintf( stdout, "[CF %d] Dropping and recreating column family. new name: %s\n", cf, new_name.c_str()); } thread->shared->LockColumnFamily(cf); Status s = db_->DropColumnFamily(column_families_[cf]); delete column_families_[cf]; if (!s.ok()) { fprintf(stderr, "dropping column family error: %s\n", s.ToString().c_str()); std::terminate(); } s = db_->CreateColumnFamily(ColumnFamilyOptions(options_), new_name, &column_families_[cf]); column_family_names_[cf] = new_name; thread->shared->ClearColumnFamily(cf); if (!s.ok()) { fprintf(stderr, "creating column family error: %s\n", s.ToString().c_str()); std::terminate(); } thread->shared->UnlockColumnFamily(cf); } } } bool ShouldAcquireMutexOnKey() const override { return true; } Status TestGet(ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) override { auto cfh = column_families_[rand_column_families[0]]; std::string key_str = Key(rand_keys[0]); Slice key = key_str; std::string from_db; int error_count = 0; #ifndef NDEBUG if (fault_fs_guard) { fault_fs_guard->EnableErrorInjection(); SharedState::ignore_read_error = false; } #endif // NDEBUG Status s = db_->Get(read_opts, cfh, key, &from_db); #ifndef NDEBUG if (fault_fs_guard) { error_count = fault_fs_guard->GetAndResetErrorCount(); } #endif // NDEBUG if (s.ok()) { #ifndef NDEBUG if (fault_fs_guard) { if (error_count && !SharedState::ignore_read_error) { // Grab mutex so multiple thread don't try to print the // stack trace at the same time MutexLock l(thread->shared->GetMutex()); fprintf(stderr, "Didn't get expected error from Get\n"); fprintf(stderr, "Callstack that injected the fault\n"); fault_fs_guard->PrintFaultBacktrace(); std::terminate(); } } #endif // NDEBUG // found case thread->stats.AddGets(1, 1); } else if (s.IsNotFound()) { // not found case thread->stats.AddGets(1, 0); } else { if (error_count == 0) { // errors case thread->stats.AddErrors(1); } else { thread->stats.AddVerifiedErrors(1); } } #ifndef NDEBUG if (fault_fs_guard) { fault_fs_guard->DisableErrorInjection(); } #endif // NDEBUG return s; } std::vector TestMultiGet( ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) override { size_t num_keys = rand_keys.size(); std::vector key_str; std::vector keys; key_str.reserve(num_keys); keys.reserve(num_keys); std::vector values(num_keys); std::vector statuses(num_keys); ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]]; int error_count = 0; // Do a consistency check between Get and MultiGet. Don't do it too // often as it will slow db_stress down bool do_consistency_check = thread->rand.OneIn(4); ReadOptions readoptionscopy = read_opts; if (do_consistency_check) { readoptionscopy.snapshot = db_->GetSnapshot(); } // To appease clang analyzer const bool use_txn = FLAGS_use_txn; // Create a transaction in order to write some data. The purpose is to // exercise WriteBatchWithIndex::MultiGetFromBatchAndDB. The transaction // will be rolled back once MultiGet returns. #ifndef ROCKSDB_LITE Transaction* txn = nullptr; if (use_txn) { WriteOptions wo; Status s = NewTxn(wo, &txn); if (!s.ok()) { fprintf(stderr, "NewTxn: %s\n", s.ToString().c_str()); std::terminate(); } } #endif for (size_t i = 0; i < num_keys; ++i) { key_str.emplace_back(Key(rand_keys[i])); keys.emplace_back(key_str.back()); #ifndef ROCKSDB_LITE if (use_txn) { // With a 1 in 10 probability, insert the just added key in the batch // into the transaction. This will create an overlap with the MultiGet // keys and exercise some corner cases in the code if (thread->rand.OneIn(10)) { int op = thread->rand.Uniform(2); Status s; switch (op) { case 0: case 1: { uint32_t value_base = thread->rand.Next() % thread->shared->UNKNOWN_SENTINEL; char value[100]; size_t sz = GenerateValue(value_base, value, sizeof(value)); Slice v(value, sz); if (op == 0) { s = txn->Put(cfh, keys.back(), v); } else { s = txn->Merge(cfh, keys.back(), v); } break; } case 2: s = txn->Delete(cfh, keys.back()); break; default: assert(false); } if (!s.ok()) { fprintf(stderr, "Transaction put: %s\n", s.ToString().c_str()); std::terminate(); } } } #endif } if (!use_txn) { #ifndef NDEBUG if (fault_fs_guard) { fault_fs_guard->EnableErrorInjection(); SharedState::ignore_read_error = false; } #endif // NDEBUG db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(), statuses.data()); #ifndef NDEBUG if (fault_fs_guard) { error_count = fault_fs_guard->GetAndResetErrorCount(); } #endif // NDEBUG } else { #ifndef ROCKSDB_LITE txn->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(), statuses.data()); #endif } #ifndef NDEBUG if (fault_fs_guard && error_count && !SharedState::ignore_read_error) { int stat_nok = 0; for (const auto& s : statuses) { if (!s.ok() && !s.IsNotFound()) { stat_nok++; } } if (stat_nok < error_count) { // Grab mutex so multiple thread don't try to print the // stack trace at the same time MutexLock l(thread->shared->GetMutex()); fprintf(stderr, "Didn't get expected error from MultiGet\n"); fprintf(stderr, "Callstack that injected the fault\n"); fault_fs_guard->PrintFaultBacktrace(); std::terminate(); } } if (fault_fs_guard) { fault_fs_guard->DisableErrorInjection(); } #endif // NDEBUG for (size_t i = 0; i < statuses.size(); ++i) { Status s = statuses[i]; bool is_consistent = true; // Only do the consistency check if no error was injected and MultiGet // didn't return an unexpected error if (do_consistency_check && !error_count && (s.ok() || s.IsNotFound())) { Status tmp_s; std::string value; if (use_txn) { #ifndef ROCKSDB_LITE tmp_s = txn->Get(readoptionscopy, cfh, keys[i], &value); #endif // ROCKSDB_LITE } else { tmp_s = db_->Get(readoptionscopy, cfh, keys[i], &value); } if (!tmp_s.ok() && !tmp_s.IsNotFound()) { fprintf(stderr, "Get error: %s\n", s.ToString().c_str()); is_consistent = false; } else if (!s.ok() && tmp_s.ok()) { fprintf(stderr, "MultiGet returned different results with key %s\n", keys[i].ToString(true).c_str()); fprintf(stderr, "Get returned ok, MultiGet returned not found\n"); is_consistent = false; } else if (s.ok() && tmp_s.IsNotFound()) { fprintf(stderr, "MultiGet returned different results with key %s\n", keys[i].ToString(true).c_str()); fprintf(stderr, "MultiGet returned ok, Get returned not found\n"); is_consistent = false; } else if (s.ok() && value != values[i].ToString()) { fprintf(stderr, "MultiGet returned different results with key %s\n", keys[i].ToString(true).c_str()); fprintf(stderr, "MultiGet returned value %s\n", values[i].ToString(true).c_str()); fprintf(stderr, "Get returned value %s\n", value.c_str()); is_consistent = false; } } if (!is_consistent) { fprintf(stderr, "TestMultiGet error: is_consistent is false\n"); thread->stats.AddErrors(1); // Fail fast to preserve the DB state thread->shared->SetVerificationFailure(); break; } else if (s.ok()) { // found case thread->stats.AddGets(1, 1); } else if (s.IsNotFound()) { // not found case thread->stats.AddGets(1, 0); } else if (s.IsMergeInProgress() && use_txn) { // With txn this is sometimes expected. thread->stats.AddGets(1, 1); } else { if (error_count == 0) { // errors case fprintf(stderr, "MultiGet error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } else { thread->stats.AddVerifiedErrors(1); } } } if (readoptionscopy.snapshot) { db_->ReleaseSnapshot(readoptionscopy.snapshot); } if (use_txn) { #ifndef ROCKSDB_LITE RollbackTxn(txn); #endif } return statuses; } Status TestPrefixScan(ThreadState* thread, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys) override { auto cfh = column_families_[rand_column_families[0]]; std::string key_str = Key(rand_keys[0]); Slice key = key_str; Slice prefix = Slice(key.data(), FLAGS_prefix_size); std::string upper_bound; Slice ub_slice; ReadOptions ro_copy = read_opts; // Get the next prefix first and then see if we want to set upper bound. // We'll use the next prefix in an assertion later on if (GetNextPrefix(prefix, &upper_bound) && thread->rand.OneIn(2)) { // For half of the time, set the upper bound to the next prefix ub_slice = Slice(upper_bound); ro_copy.iterate_upper_bound = &ub_slice; } Iterator* iter = db_->NewIterator(ro_copy, cfh); unsigned long count = 0; for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { ++count; } assert(count <= GetPrefixKeyCount(prefix.ToString(), upper_bound)); Status s = iter->status(); if (iter->status().ok()) { thread->stats.AddPrefixes(1, count); } else { fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str()); thread->stats.AddErrors(1); } delete iter; return s; } Status TestPut(ThreadState* thread, WriteOptions& write_opts, const ReadOptions& read_opts, const std::vector& rand_column_families, const std::vector& rand_keys, char (&value)[100], std::unique_ptr& lock) override { auto shared = thread->shared; int64_t max_key = shared->GetMaxKey(); int64_t rand_key = rand_keys[0]; int rand_column_family = rand_column_families[0]; while (!shared->AllowsOverwrite(rand_key) && (FLAGS_use_merge || shared->Exists(rand_column_family, rand_key))) { lock.reset(); rand_key = thread->rand.Next() % max_key; rand_column_family = thread->rand.Next() % FLAGS_column_families; lock.reset( new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key))); } std::string key_str = Key(rand_key); Slice key = key_str; ColumnFamilyHandle* cfh = column_families_[rand_column_family]; if (FLAGS_verify_before_write) { std::string key_str2 = Key(rand_key); Slice k = key_str2; std::string from_db; Status s = db_->Get(read_opts, cfh, k, &from_db); if (!VerifyValue(rand_column_family, rand_key, read_opts, shared, from_db, s, true)) { return s; } } uint32_t value_base = thread->rand.Next() % shared->UNKNOWN_SENTINEL; size_t sz = GenerateValue(value_base, value, sizeof(value)); Slice v(value, sz); shared->Put(rand_column_family, rand_key, value_base, true /* pending */); Status s; if (FLAGS_use_merge) { if (!FLAGS_use_txn) { s = db_->Merge(write_opts, cfh, key, v); } else { #ifndef ROCKSDB_LITE Transaction* txn; s = NewTxn(write_opts, &txn); if (s.ok()) { s = txn->Merge(cfh, key, v); if (s.ok()) { s = CommitTxn(txn); } } #endif } } else { if (!FLAGS_use_txn) { s = db_->Put(write_opts, cfh, key, v); } else { #ifndef ROCKSDB_LITE Transaction* txn; s = NewTxn(write_opts, &txn); if (s.ok()) { s = txn->Put(cfh, key, v); if (s.ok()) { s = CommitTxn(txn); } } #endif } } shared->Put(rand_column_family, rand_key, value_base, false /* pending */); if (!s.ok()) { fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str()); std::terminate(); } thread->stats.AddBytesForWrites(1, sz); PrintKeyValue(rand_column_family, static_cast(rand_key), value, sz); return s; } Status TestDelete(ThreadState* thread, WriteOptions& write_opts, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& lock) override { int64_t rand_key = rand_keys[0]; int rand_column_family = rand_column_families[0]; auto shared = thread->shared; int64_t max_key = shared->GetMaxKey(); // OPERATION delete // If the chosen key does not allow overwrite and it does not exist, // choose another key. while (!shared->AllowsOverwrite(rand_key) && !shared->Exists(rand_column_family, rand_key)) { lock.reset(); rand_key = thread->rand.Next() % max_key; rand_column_family = thread->rand.Next() % FLAGS_column_families; lock.reset( new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key))); } std::string key_str = Key(rand_key); Slice key = key_str; auto cfh = column_families_[rand_column_family]; // Use delete if the key may be overwritten and a single deletion // otherwise. Status s; if (shared->AllowsOverwrite(rand_key)) { shared->Delete(rand_column_family, rand_key, true /* pending */); if (!FLAGS_use_txn) { s = db_->Delete(write_opts, cfh, key); } else { #ifndef ROCKSDB_LITE Transaction* txn; s = NewTxn(write_opts, &txn); if (s.ok()) { s = txn->Delete(cfh, key); if (s.ok()) { s = CommitTxn(txn); } } #endif } shared->Delete(rand_column_family, rand_key, false /* pending */); thread->stats.AddDeletes(1); if (!s.ok()) { fprintf(stderr, "delete error: %s\n", s.ToString().c_str()); std::terminate(); } } else { shared->SingleDelete(rand_column_family, rand_key, true /* pending */); if (!FLAGS_use_txn) { s = db_->SingleDelete(write_opts, cfh, key); } else { #ifndef ROCKSDB_LITE Transaction* txn; s = NewTxn(write_opts, &txn); if (s.ok()) { s = txn->SingleDelete(cfh, key); if (s.ok()) { s = CommitTxn(txn); } } #endif } shared->SingleDelete(rand_column_family, rand_key, false /* pending */); thread->stats.AddSingleDeletes(1); if (!s.ok()) { fprintf(stderr, "single delete error: %s\n", s.ToString().c_str()); std::terminate(); } } return s; } Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& lock) override { // OPERATION delete range std::vector> range_locks; // delete range does not respect disallowed overwrites. the keys for // which overwrites are disallowed are randomly distributed so it // could be expensive to find a range where each key allows // overwrites. int64_t rand_key = rand_keys[0]; int rand_column_family = rand_column_families[0]; auto shared = thread->shared; int64_t max_key = shared->GetMaxKey(); if (rand_key > max_key - FLAGS_range_deletion_width) { lock.reset(); rand_key = thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1); range_locks.emplace_back( new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key))); } else { range_locks.emplace_back(std::move(lock)); } for (int j = 1; j < FLAGS_range_deletion_width; ++j) { if (((rand_key + j) & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) { range_locks.emplace_back(new MutexLock( shared->GetMutexForKey(rand_column_family, rand_key + j))); } } shared->DeleteRange(rand_column_family, rand_key, rand_key + FLAGS_range_deletion_width, true /* pending */); std::string keystr = Key(rand_key); Slice key = keystr; auto cfh = column_families_[rand_column_family]; std::string end_keystr = Key(rand_key + FLAGS_range_deletion_width); Slice end_key = end_keystr; Status s = db_->DeleteRange(write_opts, cfh, key, end_key); if (!s.ok()) { fprintf(stderr, "delete range error: %s\n", s.ToString().c_str()); std::terminate(); } int covered = shared->DeleteRange(rand_column_family, rand_key, rand_key + FLAGS_range_deletion_width, false /* pending */); thread->stats.AddRangeDeletions(1); thread->stats.AddCoveredByRangeDeletions(covered); return s; } #ifdef ROCKSDB_LITE void TestIngestExternalFile( ThreadState* /* thread */, const std::vector& /* rand_column_families */, const std::vector& /* rand_keys */, std::unique_ptr& /* lock */) override { assert(false); fprintf(stderr, "RocksDB lite does not support " "TestIngestExternalFile\n"); std::terminate(); } #else void TestIngestExternalFile(ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys, std::unique_ptr& lock) override { const std::string sst_filename = FLAGS_db + "/." + ToString(thread->tid) + ".sst"; Status s; if (db_stress_env->FileExists(sst_filename).ok()) { // Maybe we terminated abnormally before, so cleanup to give this file // ingestion a clean slate s = db_stress_env->DeleteFile(sst_filename); } SstFileWriter sst_file_writer(EnvOptions(options_), options_); if (s.ok()) { s = sst_file_writer.Open(sst_filename); } int64_t key_base = rand_keys[0]; int column_family = rand_column_families[0]; std::vector> range_locks; std::vector values; SharedState* shared = thread->shared; // Grab locks, set pending state on expected values, and add keys for (int64_t key = key_base; s.ok() && key < std::min(key_base + FLAGS_ingest_external_file_width, shared->GetMaxKey()); ++key) { if (key == key_base) { range_locks.emplace_back(std::move(lock)); } else if ((key & ((1 << FLAGS_log2_keys_per_lock) - 1)) == 0) { range_locks.emplace_back( new MutexLock(shared->GetMutexForKey(column_family, key))); } uint32_t value_base = thread->rand.Next() % shared->UNKNOWN_SENTINEL; values.push_back(value_base); shared->Put(column_family, key, value_base, true /* pending */); char value[100]; size_t value_len = GenerateValue(value_base, value, sizeof(value)); auto key_str = Key(key); s = sst_file_writer.Put(Slice(key_str), Slice(value, value_len)); } if (s.ok()) { s = sst_file_writer.Finish(); } if (s.ok()) { s = db_->IngestExternalFile(column_families_[column_family], {sst_filename}, IngestExternalFileOptions()); } if (!s.ok()) { fprintf(stderr, "file ingestion error: %s\n", s.ToString().c_str()); std::terminate(); } int64_t key = key_base; for (int32_t value : values) { shared->Put(column_family, key, value, false /* pending */); ++key; } } #endif // ROCKSDB_LITE bool VerifyValue(int cf, int64_t key, const ReadOptions& /*opts*/, SharedState* shared, const std::string& value_from_db, const Status& s, bool strict = false) const { if (shared->HasVerificationFailedYet()) { return false; } // compare value_from_db with the value in the shared state char value[kValueMaxLen]; uint32_t value_base = shared->Get(cf, key); if (value_base == SharedState::UNKNOWN_SENTINEL) { return true; } if (value_base == SharedState::DELETION_SENTINEL && !strict) { return true; } if (s.ok()) { if (value_base == SharedState::DELETION_SENTINEL) { VerificationAbort(shared, "Unexpected value found", cf, key); return false; } size_t sz = GenerateValue(value_base, value, sizeof(value)); if (value_from_db.length() != sz) { VerificationAbort(shared, "Length of value read is not equal", cf, key); return false; } if (memcmp(value_from_db.data(), value, sz) != 0) { VerificationAbort(shared, "Contents of value read don't match", cf, key); return false; } } else { if (value_base != SharedState::DELETION_SENTINEL) { VerificationAbort(shared, "Value not found: " + s.ToString(), cf, key); return false; } } return true; } }; StressTest* CreateNonBatchedOpsStressTest() { return new NonBatchedOpsStressTest(); } } // namespace ROCKSDB_NAMESPACE #endif // GFLAGS rocksdb-6.11.4/defs.bzl000066400000000000000000000035501370372246700147020ustar00rootroot00000000000000# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # # defs.bzl - Definitions for Facebook-specific buck build integration # in TARGETS load("@fbcode_macros//build_defs:coverage.bzl", "coverage") load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary") load("@fbcode_macros//build_defs:custom_unittest.bzl", "custom_unittest") def test_binary( test_name, test_cc, parallelism, rocksdb_arch_preprocessor_flags, rocksdb_os_preprocessor_flags, rocksdb_compiler_flags, rocksdb_preprocessor_flags, rocksdb_external_deps, rocksdb_os_deps, extra_deps, extra_compiler_flags): TEST_RUNNER = native.package_name() + "/buckifier/rocks_test_runner.sh" ttype = "gtest" if parallelism == "parallel" else "simple" test_bin = test_name + "_bin" cpp_binary( name = test_bin, srcs = [test_cc], arch_preprocessor_flags = rocksdb_arch_preprocessor_flags, os_preprocessor_flags = rocksdb_os_preprocessor_flags, compiler_flags = rocksdb_compiler_flags + extra_compiler_flags, preprocessor_flags = rocksdb_preprocessor_flags, deps = [":rocksdb_test_lib"] + extra_deps, os_deps = rocksdb_os_deps, external_deps = rocksdb_external_deps, ) binary_path = "$(location :{})".format(test_bin) base_path = native.package_name() tags = [] if coverage.is_coverage_enabled(base_path): # This tag instructs testpilot to use # the lower-memory coverage runner # (e.g. it tells testpilot that the binary # is actually instrumented with coverage info) tags = ["coverage"] custom_unittest( name = test_name, command = [TEST_RUNNER, binary_path], type = ttype, env = {"BUCK_BASE_BINARY": binary_path}, tags = tags, ) rocksdb-6.11.4/docs/000077500000000000000000000000001370372246700141755ustar00rootroot00000000000000rocksdb-6.11.4/docs/.gitignore000066400000000000000000000000701370372246700161620ustar00rootroot00000000000000.DS_STORE _site/ *.swo *.swp _site .sass-cache *.psd *~ rocksdb-6.11.4/docs/CNAME000066400000000000000000000000131370372246700147350ustar00rootroot00000000000000rocksdb.orgrocksdb-6.11.4/docs/CONTRIBUTING.md000066400000000000000000000112501370372246700164250ustar00rootroot00000000000000This provides guidance on how to contribute various content to `rocksdb.org`. ## Getting started You should only have to do these one time. - Rename this file to `CONTRIBUTING.md`. - Rename `EXAMPLE-README-FOR-RUNNING-DOCS.md` to `README.md` (replacing the existing `README.md` that came with the template). - Rename `EXAMPLE-LICENSE` to `LICENSE`. - Review the [template information](./TEMPLATE-INFORMATION.md). - Review `./_config.yml`. - Make sure you update `title`, `description`, `tagline` and `gacode` (Google Analytics) in `./_config.yml`. ## Basic Structure Most content is written in markdown. You name the file `something.md`, then have a header that looks like this: ``` --- docid: getting-started title: Getting started with ProjectName layout: docs permalink: /docs/getting-started.html --- ``` Customize these values for each document, blog post, etc. > The filename of the `.md` file doesn't actually matter; what is important is the `docid` being unique and the `permalink` correct and unique too). ## Landing page Modify `index.md` with your new or updated content. If you want a `GridBlock` as part of your content, you can do so directly with HTML: ``` ``` or with a combination of changing `./_data/features.yml` and adding some Liquid to `index.md`, such as: ``` {% include content/gridblocks.html data_source=site.data.features imagealign="bottom"%} ``` ## Blog To modify a blog post, edit the appopriate markdown file in `./_posts/`. Adding a new blog post is a four-step process. > Some posts have a `permalink` and `comments` in the blog post YAML header. You will not need these for new blog posts. These are an artifact of migrating the blog from Wordpress to gh-pages. 1. Create your blog post in `./_posts/` in markdown (file extension `.md` or `.markdown`). See current posts in that folder or `./doc-type-examples/2016-04-07-blog-post-example.md` for an example of the YAML format. **If the `./_posts` directory does not exist, create it**. - You can add a `` tag in the middle of your post such that you show only the excerpt above that tag in the main `/blog` index on your page. 1. If you have not authored a blog post before, modify the `./_data/authors.yml` file with the `author` id you used in your blog post, along with your full name and Facebook ID to get your profile picture. 1. [Run the site locally](./README.md) to test your changes. It will be at `http://127.0.0.1/blog/your-new-blog-post-title.html` 1. Push your changes to GitHub. ## Docs To modify docs, edit the appropriate markdown file in `./_docs/`. To add docs to the site.... 1. Add your markdown file to the `./_docs/` folder. See `./doc-type-examples/docs-hello-world.md` for an example of the YAML header format. **If the `./_docs/` directory does not exist, create it**. - You can use folders in the `./_docs/` directory to organize your content if you want. 1. Update `_data/nav_docs.yml` to add your new document to the navigation bar. Use the `docid` you put in your doc markdown in as the `id` in the `_data/nav_docs.yml` file. 1. [Run the site locally](./README.md) to test your changes. It will be at `http://127.0.0.1/docs/your-new-doc-permalink.html` 1. Push your changes to GitHub. ## Header Bar To modify the header bar, change `./_data/nav.yml`. ## Top Level Page To modify a top-level page, edit the appropriate markdown file in `./top-level/` If you want a top-level page (e.g., http://your-site.com/top-level.html) -- not in `/blog/` or `/docs/`.... 1. Create a markdown file in the root `./top-level/`. See `./doc-type-examples/top-level-example.md` for more information. 1. If you want a visible link to that file, update `_data/nav.yml` to add a link to your new top-level document in the header bar. > This is not necessary if you just want to have a page that is linked to from another page, but not exposed as direct link to the user. 1. [Run the site locally](./README.md) to test your changes. It will be at `http://127.0.0.1/your-top-level-page-permalink.html` 1. Push your changes to GitHub. ## Other Changes - CSS: `./css/main.css` or `./_sass/*.scss`. - Images: `./static/images/[docs | posts]/....` - Main Blog post HTML: `./_includes/post.html` - Main Docs HTML: `./_includes/doc.html` rocksdb-6.11.4/docs/Gemfile000066400000000000000000000000731370372246700154700ustar00rootroot00000000000000source 'https://rubygems.org' gem 'github-pages', '~> 104' rocksdb-6.11.4/docs/Gemfile.lock000066400000000000000000000070351370372246700164240ustar00rootroot00000000000000GEM remote: https://rubygems.org/ specs: activesupport (4.2.7) i18n (~> 0.7) json (~> 1.7, >= 1.7.7) minitest (~> 5.1) thread_safe (~> 0.3, >= 0.3.4) tzinfo (~> 1.1) addressable (2.4.0) coffee-script (2.4.1) coffee-script-source execjs coffee-script-source (1.12.2) colorator (1.1.0) concurrent-ruby (1.0.5) ethon (0.11.0) ffi (>= 1.3.0) execjs (2.7.0) faraday (0.15.2) multipart-post (>= 1.2, < 3) ffi (1.9.25) forwardable-extended (2.6.0) gemoji (2.1.0) github-pages (104) activesupport (= 4.2.7) github-pages-health-check (= 1.2.0) jekyll (>= 3.8.4) jekyll-avatar (= 0.4.2) jekyll-coffeescript (= 1.0.1) jekyll-feed (= 0.8.0) jekyll-gist (= 1.4.0) jekyll-github-metadata (= 2.2.0) jekyll-mentions (= 1.2.0) jekyll-paginate (= 1.1.0) jekyll-redirect-from (= 0.11.0) jekyll-sass-converter (= 1.3.0) jekyll-seo-tag (= 2.1.0) jekyll-sitemap (= 0.12.0) jekyll-swiss (= 0.4.0) jemoji (= 0.7.0) kramdown (= 1.11.1) liquid (= 3.0.6) listen (= 3.0.6) mercenary (~> 0.3) minima (= 2.0.0) rouge (= 1.11.1) terminal-table (~> 1.4) github-pages-health-check (1.2.0) addressable (~> 2.3) net-dns (~> 0.8) octokit (~> 4.0) public_suffix (~> 1.4) typhoeus (~> 0.7) html-pipeline (2.4.2) activesupport (>= 2) nokogiri (~> 1.8.2) i18n (0.7.0) jekyll (3.8.4) addressable (~> 2.4) colorator (~> 1.0) jekyll-sass-converter (~> 1.0) jekyll-watch (~> 1.1) kramdown (~> 1.3) liquid (~> 3.0) mercenary (~> 0.3.3) pathutil (~> 0.9) rouge (~> 1.7) safe_yaml (~> 1.0) jekyll-avatar (0.4.2) jekyll (~> 3.0) jekyll-coffeescript (1.0.1) coffee-script (~> 2.2) jekyll-feed (0.8.0) jekyll (~> 3.3) jekyll-gist (1.4.0) octokit (~> 4.2) jekyll-github-metadata (2.2.0) jekyll (~> 3.1) octokit (~> 4.0, != 4.4.0) jekyll-mentions (1.2.0) activesupport (~> 4.0) html-pipeline (~> 2.3) jekyll (~> 3.0) jekyll-paginate (1.1.0) jekyll-redirect-from (0.11.0) jekyll (>= 2.0) jekyll-sass-converter (1.3.0) sass (~> 3.2) jekyll-seo-tag (2.1.0) jekyll (~> 3.3) jekyll-sitemap (0.12.0) jekyll (~> 3.3) jekyll-swiss (0.4.0) jekyll-watch (1.5.0) listen (~> 3.0, < 3.1) jemoji (0.7.0) activesupport (~> 4.0) gemoji (~> 2.0) html-pipeline (~> 2.2) jekyll (>= 3.0) json (1.8.3) kramdown (1.11.1) liquid (3.0.6) listen (3.0.6) rb-fsevent (>= 0.9.3) rb-inotify (>= 0.9.7) mercenary (0.3.6) mini_portile2 (2.3.0) minima (2.0.0) minitest (5.9.1) multipart-post (2.0.0) net-dns (0.8.0) nokogiri (~> 1.8.2) mini_portile2 (~> 2.3.0) octokit (4.4.1) sawyer (~> 0.7.0, >= 0.5.3) pathutil (0.14.0) forwardable-extended (~> 2.6) public_suffix (1.5.3) rb-fsevent (0.9.8) rb-inotify (0.9.7) ffi (>= 0.5.0) rouge (1.11.1) safe_yaml (1.0.4) sass (3.4.22) sawyer (0.7.0) addressable (>= 2.3.5, < 2.5) faraday (~> 0.8, < 0.10) terminal-table (1.7.3) unicode-display_width (~> 1.1.1) thread_safe (0.3.5) typhoeus (0.8.0) ethon (>= 0.8.0) tzinfo (1.2.2) thread_safe (~> 0.1) unicode-display_width (1.1.1) PLATFORMS ruby DEPENDENCIES github-pages (~> 104) BUNDLED WITH 1.13.1 rocksdb-6.11.4/docs/LICENSE-DOCUMENTATION000066400000000000000000000432711370372246700172200ustar00rootroot00000000000000Attribution 4.0 International ======================================================================= Creative Commons Corporation ("Creative Commons") is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an "as-is" basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible. Using Creative Commons Public Licenses Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses. Considerations for licensors: Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC- licensed material, or material used under an exception or limitation to copyright. More considerations for licensors: wiki.creativecommons.org/Considerations_for_licensors Considerations for the public: By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor's permission is not necessary for any reason--for example, because of any applicable exception or limitation to copyright--then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. More_considerations for the public: wiki.creativecommons.org/Considerations_for_licensees ======================================================================= Creative Commons Attribution 4.0 International Public License By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. Section 1 -- Definitions. a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. c. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. d. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. e. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. f. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. g. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. h. Licensor means the individual(s) or entity(ies) granting rights under this Public License. i. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. j. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. k. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. Section 2 -- Scope. a. License grant. 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: a. reproduce and Share the Licensed Material, in whole or in part; and b. produce, reproduce, and Share Adapted Material. 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 3. Term. The term of this Public License is specified in Section 6(a). 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a) (4) never produces Adapted Material. 5. Downstream recipients. a. Offer from the Licensor -- Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. b. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). b. Other rights. 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 2. Patent and trademark rights are not licensed under this Public License. 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties. Section 3 -- License Conditions. Your exercise of the Licensed Rights is expressly made subject to the following conditions. a. Attribution. 1. If You Share the Licensed Material (including in modified form), You must: a. retain the following if it is supplied by the Licensor with the Licensed Material: i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); ii. a copyright notice; iii. a notice that refers to this Public License; iv. a notice that refers to the disclaimer of warranties; v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; b. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and c. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. Section 4 -- Sui Generis Database Rights. Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database; b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. Section 5 -- Disclaimer of Warranties and Limitation of Liability. a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. Section 6 -- Term and Termination. a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 2. upon express reinstatement by the Licensor. For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. Section 7 -- Other Terms and Conditions. a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. Section 8 -- Interpretation. a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. ======================================================================= Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the "Licensor." Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark "Creative Commons" or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses. Creative Commons may be contacted at creativecommons.org. rocksdb-6.11.4/docs/README.md000066400000000000000000000061261370372246700154610ustar00rootroot00000000000000## User Documentation for rocksdb.org This directory will contain the user and feature documentation for RocksDB. The documentation will be hosted on GitHub pages. ### Contributing See [CONTRIBUTING.md](./CONTRIBUTING.md) for details on how to add or modify content. ### Run the Site Locally The requirements for running a GitHub pages site locally is described in [GitHub help](https://help.github.com/articles/setting-up-your-github-pages-site-locally-with-jekyll/#requirements). The steps below summarize these steps. > If you have run the site before, you can start with step 1 and then move on to step 5. 1. Ensure that you are in the `/docs` directory in your local RocksDB clone (i.e., the same directory where this `README.md` exists). The below RubyGems commands, etc. must be run from there. 1. Make sure you have Ruby and [RubyGems](https://rubygems.org/) installed. > Ruby >= 2.2 is required for the gems. On the latest versions of Mac OS X, Ruby 2.0 is the > default. Use `brew install ruby` (or your preferred upgrade mechanism) to install a newer > version of Ruby for your Mac OS X system. 1. Make sure you have [Bundler](http://bundler.io/) installed. ``` # may require sudo gem install bundler ``` 1. Install the project's dependencies ``` # run this in the 'docs' directory bundle install ``` > If you get an error when installing `nokogiri`, you may be running into the problem described > in [this nokogiri issue](https://github.com/sparklemotion/nokogiri/issues/1483). You can > either `brew uninstall xz` (and then `brew install xz` after the bundle is installed) or > `xcode-select --install` (although this may not work if you have already installed command > line tools). 1. Run Jekyll's server. - On first runs or for structural changes to the documentation (e.g., new sidebar menu item), do a full build. ``` bundle exec jekyll serve ``` - For content changes only, you can use `--incremental` for faster builds. ``` bundle exec jekyll serve --incremental ``` > We use `bundle exec` instead of running straight `jekyll` because `bundle exec` will always use the version of Jekyll from our `Gemfile`. Just running `jekyll` will use the system version and may not necessarily be compatible. - To run using an actual IP address, you can use `--host=0.0.0.0` ``` bundle exec jekyll serve --host=0.0.0.0 ``` This will allow you to use the IP address associated with your machine in the URL. That way you could share it with other people. e.g., on a Mac, you can your IP address with something like `ifconfig | grep "inet " | grep -v 127.0.0.1`. 1. Either of commands in the previous step will serve up the site on your local device at http://127.0.0.1:4000/ or http://localhost:4000. ### Updating the Bundle The site depends on Github Pages and the installed bundle is based on the `github-pages` gem. Occasionally that gem might get updated with new or changed functionality. If that is the case, you can run: ``` bundle update ``` to get the latest packages for the installation. rocksdb-6.11.4/docs/TEMPLATE-INFORMATION.md000066400000000000000000000047231370372246700175230ustar00rootroot00000000000000## Template Details First, go through `_config.yml` and adjust the available settings to your project's standard. When you make changes here, you'll have to kill the `jekyll serve` instance and restart it to see those changes, but that's only the case with the config file. Next, update some image assets - you'll want to update `favicon.png`, `logo.svg`, and `og_image.png` (used for Like button stories and Shares on Facbeook) in the `static` folder with your own logos. Next, if you're going to have docs on your site, keep the `_docs` and `docs` folders, if not, you can safely remove them (or you can safely leave them and not include them in your navigation - Jekyll renders all of this before a client views the site anyway, so there's no performance hit from just leaving it there for a future expansion). Same thing with a blog section, either keep or delete the `_posts` and `blog` folders. You can customize your homepage in three parts - the first in the homepage header, which is mostly automatically derived from the elements you insert into your config file. However, you can also specify a series of 'promotional' elements in `_data/promo.yml`. You can read that file for more information. The second place for your homepage is in `index.md` which contains the bulk of the main content below the header. This is all markdown if you want, but you can use HTML and Jekyll's template tags (called Liquid) in there too. Checkout this folder's index.md for an example of one common template tag that we use on our sites called gridblocks. The third and last place is in the `_data/powered_by.yml` and `_data/powered_by_highlight.yml` files. Both these files combine to create a section on the homepage that is intended to show a list of companies or apps that are using your project. The `powered_by_highlight` file is a list of curated companies/apps that you want to show as a highlight at the top of this section, including their logos in whatever format you want. The `powered_by` file is a more open list that is just text links to the companies/apps and can be updated via Pull Request by the community. If you don't want these sections on your homepage, just empty out both files and leave them blank. The last thing you'll want to do is setup your top level navigation bar. You can do this by editing `nav.yml` and keeping the existing title/href/category structure used there. Although the nav is responsive and fairly flexible design-wise, no more than 5 or 6 nav items is recommended. rocksdb-6.11.4/docs/_config.yml000066400000000000000000000055001370372246700163240ustar00rootroot00000000000000# Site settings permalink: /blog/:year/:month/:day/:title.html title: RocksDB tagline: A persistent key-value store for fast storage environments description: > RocksDB is an embeddable persistent key-value store for fast storage. fbappid: "1615782811974223" gacode: "UA-49459723-1" # baseurl determines the subpath of your site. For example if you're using an # organisation.github.io/reponame/ basic site URL, then baseurl would be set # as "/reponame" but leave blank if you have a top-level domain URL as it is # now set to "" by default as discussed in: # http://jekyllrb.com/news/2016/10/06/jekyll-3-3-is-here/ baseurl: "" # the base hostname & protocol for your site # If baseurl is set, then the absolute url for your site would be url/baseurl # This was also be set to the right thing automatically for local development # https://github.com/blog/2277-what-s-new-in-github-pages-with-jekyll-3-3 # http://jekyllrb.com/news/2016/10/06/jekyll-3-3-is-here/ url: "http://rocksdb.org" # Note: There are new filters in Jekyll 3.3 to help with absolute and relative urls # absolute_url # relative_url # So you will see these used throughout the Jekyll code in this template. # no more need for | prepend: site.url | prepend: site.baseurl # http://jekyllrb.com/news/2016/10/06/jekyll-3-3-is-here/ #https://github.com/blog/2277-what-s-new-in-github-pages-with-jekyll-3-3 # The GitHub repo for your project ghrepo: "facebook/rocksdb" # Use these color settings to determine your colour scheme for the site. color: # primary should be a vivid color that reflects the project's brand primary: "#2a2a2a" # secondary should be a subtle light or dark color used on page backgrounds secondary: "#f9f9f9" # Use the following to specify whether the previous two colours are 'light' # or 'dark' and therefore what colors can be overlaid on them primary-overlay: "dark" secondary-overlay: "light" #Uncomment this if you want to enable Algolia doc search with your own values #searchconfig: # apikey: "" # indexname: "" # Blog posts are builtin to Jekyll by default, with the `_posts` directory. # Here you can specify other types of documentation. The names here are `docs` # and `top-level`. This means their content will be in `_docs` and `_top-level`. # The permalink format is also given. # http://ben.balter.com/2015/02/20/jekyll-collections/ collections: docs: output: true permalink: /docs/:name/ top-level: output: true permalink: :name.html # DO NOT ADJUST BELOW THIS LINE UNLESS YOU KNOW WHAT YOU ARE CHANGING markdown: kramdown kramdown: input: GFM syntax_highlighter: rouge syntax_highlighter_opts: css_class: 'rougeHighlight' span: line_numbers: false block: line_numbers: true start_line: 1 sass: style: :compressed redcarpet: extensions: [with_toc_data] gems: - jekyll-redirect-from rocksdb-6.11.4/docs/_data/000077500000000000000000000000001370372246700152455ustar00rootroot00000000000000rocksdb-6.11.4/docs/_data/authors.yml000066400000000000000000000017631370372246700174640ustar00rootroot00000000000000icanadi: full_name: Igor Canadi fbid: 706165749 xjin: full_name: Xing Jin fbid: 100000739847320 leijin: full_name: Lei Jin fbid: 634570164 yhciang: full_name: Yueh-Hsuan Chiang fbid: 1619020986 radheshyam: full_name: Radheshyam Balasundaram fbid: 800837305 zagfox: full_name: Feng Zhu fbid: 100006493823622 lgalanis: full_name: Leonidas Galanis fbid: 8649950 sdong: full_name: Siying Dong fbid: 9805119 dmitrism: full_name: Dmitri Smirnov rven2: full_name: Venkatesh Radhakrishnan fbid: 100008352697325 yiwu: full_name: Yi Wu fbid: 100000476362039 maysamyabandeh: full_name: Maysam Yabandeh fbid: 100003482360101 IslamAbdelRahman: full_name: Islam AbdelRahman fbid: 642759407 ajkr: full_name: Andrew Kryczka fbid: 568694102 abhimadan: full_name: Abhishek Madan fbid: 1850247869 sagar0: full_name: Sagar Vemuri fbid: 2419111 lightmark: full_name: Aaron Gao fbid: 1351549072 fgwu: full_name: Fenggang Wu fbid: 100002297362180 rocksdb-6.11.4/docs/_data/features.yml000066400000000000000000000022051370372246700176050ustar00rootroot00000000000000- title: High Performance text: | RocksDB uses a log structured database engine, written entirely in C++, for maximum performance. Keys and values are just arbitrarily-sized byte streams. image: images/promo-performance.svg - title: Optimized for Fast Storage text: | RocksDB is optimized for fast, low latency storage such as flash drives and high-speed disk drives. RocksDB exploits the full potential of high read/write rates offered by flash or RAM. image: images/promo-flash.svg - title: Adaptable text: | RocksDB is adaptable to different workloads. From database storage engines such as [MyRocks](https://github.com/facebook/mysql-5.6) to [application data caching](http://techblog.netflix.com/2016/05/application-data-caching-using-ssds.html) to embedded workloads, RocksDB can be used for a variety of data needs. image: images/promo-adapt.svg - title: Basic and Advanced Database Operations text: | RocksDB provides basic operations such as opening and closing a database, reading and writing to more advanced operations such as merging and compaction filters. image: images/promo-operations.svg rocksdb-6.11.4/docs/_data/nav.yml000066400000000000000000000013531370372246700165560ustar00rootroot00000000000000- title: Docs href: /docs/ category: docs - title: GitHub href: https://github.com/facebook/rocksdb/ category: external - title: API (C++) href: https://github.com/facebook/rocksdb/tree/master/include/rocksdb category: external - title: API (Java) href: https://github.com/facebook/rocksdb/tree/master/java/src/main/java/org/rocksdb category: external - title: Support href: /support.html category: support - title: Blog href: /blog/ category: blog - title: Facebook href: https://www.facebook.com/groups/rocksdb.dev/ category: external # Use external for external links not associated with the paths of the current site. # If a category is external, site urls, for example, are not prepended to the href, etc.. rocksdb-6.11.4/docs/_data/nav_docs.yml000066400000000000000000000000661370372246700175660ustar00rootroot00000000000000- title: Quick Start items: - id: getting-started rocksdb-6.11.4/docs/_data/powered_by.yml000066400000000000000000000000331370372246700201230ustar00rootroot00000000000000# Fill in later if desired rocksdb-6.11.4/docs/_data/powered_by_highlight.yml000066400000000000000000000000331370372246700221520ustar00rootroot00000000000000# Fill in later if desired rocksdb-6.11.4/docs/_data/promo.yml000066400000000000000000000003161370372246700171240ustar00rootroot00000000000000# This file determines the list of promotional elements added to the header of \ # your site's homepage. Full list of plugins are shown - type: button href: docs/getting-started.html text: Get Started rocksdb-6.11.4/docs/_docs/000077500000000000000000000000001370372246700152645ustar00rootroot00000000000000rocksdb-6.11.4/docs/_docs/faq.md000066400000000000000000000115611370372246700163610ustar00rootroot00000000000000--- docid: support-faq title: FAQ layout: docs permalink: /docs/support/faq.html --- Here is an ever-growing list of frequently asked questions around RocksDB ## What is RocksDB? RocksDB is an embeddable persistent key-value store for fast storage. RocksDB can also be the foundation for a client-server database but our current focus is on embedded workloads. RocksDB builds on [LevelDB](https://code.google.com/p/leveldb/) to be scalable to run on servers with many CPU cores, to efficiently use fast storage, to support IO-bound, in-memory and write-once workloads, and to be flexible to allow for innovation. For the latest details, watch [Mark Callaghan’s and Igor Canadi’s talk at CMU on 10/2015](https://scs.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=f4e0eb37-ae18-468f-9248-cb73edad3e56). [Dhruba Borthakur’s introductory talk](https://github.com/facebook/rocksdb/blob/gh-pages-old/intro.pdf?raw=true) from the Data @ Scale 2013 conference provides some perspective about how RocksDB has evolved. ## How does performance compare? We benchmarked LevelDB and found that it was unsuitable for our server workloads. The [benchmark results](http://leveldb.googlecode.com/svn/trunk/doc/benchmark.html) look awesome at first sight, but we quickly realized that those results were for a database whose size was smaller than the size of RAM on the test machine – where the entire database could fit in the OS page cache. When we performed the same benchmarks on a database that was at least 5 times larger than main memory, the performance results were dismal. By contrast, we’ve published the [RocksDB benchmark results](https://github.com/facebook/rocksdb/wiki/Performance-Benchmarks) for server side workloads on Flash. We also measured the performance of LevelDB on these server-workload benchmarks and found that RocksDB solidly outperforms LevelDB for these IO bound workloads. We found that LevelDB’s single-threaded compaction process was insufficient to drive server workloads. We saw frequent write-stalls with LevelDB that caused 99-percentile latency to be tremendously large. We found that mmap-ing a file into the OS cache introduced performance bottlenecks for reads. We could not make LevelDB consume all the IOs offered by the underlying Flash storage. ## What is RocksDB suitable for? RocksDB can be used by applications that need low latency database accesses. Possibilities include: * A user-facing application that stores the viewing history and state of users of a website. * A spam detection application that needs fast access to big data sets. * A graph-search query that needs to scan a data set in realtime. * A cache data from Hadoop, thereby allowing applications to query Hadoop data in realtime. * A message-queue that supports a high number of inserts and deletes. ## How big is RocksDB adoption? RocksDB is an embedded storage engine that is used in a number of backend systems at Facebook. In the Facebook newsfeed’s backend, it replaced another internal storage engine called Centrifuge and is one of the many components used. ZippyDB, a distributed key value store service used by Facebook products relies RocksDB. Details on ZippyDB are in [Muthu Annamalai’s talk at Data@Scale in Seattle](https://youtu.be/DfiN7pG0D0k). Dragon, a distributed graph query engine part of the social graph infrastructure, is using RocksDB to store data. Parse has been running [MongoDB on RocksDB in production](http://blog.parse.com/announcements/mongodb-rocksdb-parse/) since early 2015. RocksDB is proving to be a useful component for a lot of other groups in the industry. For a list of projects currently using RocksDB, take a look at our USERS.md list on github. ## How good is RocksDB as a database storage engine? Our engineering team at Facebook firmly believes that RocksDB has great potential as storage engine for databases. It has been proven in production with MongoDB: [MongoRocks](https://github.com/mongodb-partners/mongo-rocks) is the RocksDB based storage engine for MongoDB. [MyRocks](https://code.facebook.com/posts/190251048047090/myrocks-a-space-and-write-optimized-mysql-database/) is the RocksDB based storage engine for MySQL. Using RocksDB we have managed to achieve 2x better compression and 10x less write amplification for our benchmarks compared to our existing MySQL setup. Given our current results, work is currently underway to develop MyRocks into a production ready solution for web-scale MySQL workloads. Follow along on [GitHub](https://github.com/facebook/mysql-5.6)! ## Why is RocksDB open sourced? We are open sourcing this project on [GitHub](http://github.com/facebook/rocksdb) because we think it will be useful beyond Facebook. We are hoping that software programmers and database developers will use, enhance, and customize RocksDB for their use-cases. We would also like to engage with the academic community on topics related to efficiency for modern database algorithms. rocksdb-6.11.4/docs/_docs/getting-started.md000066400000000000000000000054651370372246700207250ustar00rootroot00000000000000--- docid: getting-started title: Getting started layout: docs permalink: /docs/getting-started.html --- ## Overview The RocksDB library provides a persistent key value store. Keys and values are arbitrary byte arrays. The keys are ordered within the key value store according to a user-specified comparator function. The library is maintained by the Facebook Database Engineering Team, and is based on [LevelDB](https://github.com/google/leveldb), by Sanjay Ghemawat and Jeff Dean at Google. This overview gives some simple examples of how RocksDB is used. For the story of why RocksDB was created in the first place, see [Dhruba Borthakur’s introductory talk](https://github.com/facebook/rocksdb/blob/gh-pages-old/intro.pdf?raw=true) from the Data @ Scale 2013 conference. ## Opening A Database A rocksdb database has a name which corresponds to a file system directory. All of the contents of database are stored in this directory. The following example shows how to open a database, creating it if necessary: ```c++ #include #include "rocksdb/db.h" rocksdb::DB* db; rocksdb::Options options; options.create_if_missing = true; rocksdb::Status status = rocksdb::DB::Open(options, "/tmp/testdb", &db); assert(status.ok()); ... ``` If you want to raise an error if the database already exists, add the following line before the rocksdb::DB::Open call: ```c++ options.error_if_exists = true; ``` ## Status You may have noticed the `rocksdb::Status` type above. Values of this type are returned by most functions in RocksDB that may encounter an error. You can check if such a result is ok, and also print an associated error message: ```c++ rocksdb::Status s = ...; if (!s.ok()) cerr << s.ToString() << endl; ``` ## Closing A Database When you are done with a database, just delete the database object. For example: ```c++ /* open the db as described above */ /* do something with db */ delete db; ``` ## Reads And Writes The database provides Put, Delete, and Get methods to modify/query the database. For example, the following code moves the value stored under `key1` to `key2`. ```c++ std::string value; rocksdb::Status s = db->Get(rocksdb::ReadOptions(), key1, &value); if (s.ok()) s = db->Put(rocksdb::WriteOptions(), key2, value); if (s.ok()) s = db->Delete(rocksdb::WriteOptions(), key1); ``` ## Further documentation These are just simple examples of how RocksDB is used. The full documentation is currently on the [GitHub wiki](https://github.com/facebook/rocksdb/wiki). Here are some specific details about the RocksDB implementation: - [Architecture Guide](https://github.com/facebook/rocksdb/wiki/Rocksdb-Architecture-Guide) - [Format of an immutable Table file](https://github.com/facebook/rocksdb/wiki/Rocksdb-Table-Format) - [Format of a log file](https://github.com/facebook/rocksdb/wiki/Write-Ahead-Log-File-Format) rocksdb-6.11.4/docs/_includes/000077500000000000000000000000001370372246700161425ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/blog_pagination.html000066400000000000000000000015211370372246700221630ustar00rootroot00000000000000 {% if paginator.total_pages > 1 %}

{% endif %} rocksdb-6.11.4/docs/_includes/content/000077500000000000000000000000001370372246700176145ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/content/gridblocks.html000066400000000000000000000003241370372246700226240ustar00rootroot00000000000000
{% for item in {{include.data_source}} %} {% include content/items/gridblock.html item=item layout=include.layout imagealign=include.imagealign align=include.align %} {% endfor %}
rocksdb-6.11.4/docs/_includes/content/items/000077500000000000000000000000001370372246700207355ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/content/items/gridblock.html000066400000000000000000000017061370372246700235670ustar00rootroot00000000000000{% if include.layout == "fourColumn" %} {% assign layout = "fourByGridBlock" %} {% else %} {% assign layout = "twoByGridBlock" %} {% endif %} {% if include.imagealign == "side" %} {% assign imagealign = "imageAlignSide" %} {% else %} {% if item.image %} {% assign imagealign = "imageAlignTop" %} {% else %} {% assign imagealign = "" %} {% endif %} {% endif %} {% if include.align == "right" %} {% assign align = "alignRight" %} {% elsif include.align == "center" %} {% assign align = "alignCenter" %} {% else %} {% assign align = "alignLeft" %} {% endif %}
{% if item.image %}
{{ item.title }}
{% endif %}

{{ item.title }}

{% if item.text %} {{ item.text | markdownify }} {% endif %}
rocksdb-6.11.4/docs/_includes/doc.html000066400000000000000000000015241370372246700175770ustar00rootroot00000000000000

{% if include.truncate %}{{ page.title }}{% else %}{{ page.title }}{% endif %}

{% if include.truncate %} {% if page.content contains '' %} {{ page.content | split:'' | first }} {% else %} {{ page.content }} {% endif %} {% else %} {{ content }}

Edit on GitHub

{% endif %}
{% include doc_paging.html %}
rocksdb-6.11.4/docs/_includes/doc_paging.html000066400000000000000000000000001370372246700211100ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/footer.html000066400000000000000000000067551370372246700203430ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/head.html000066400000000000000000000026731370372246700177410ustar00rootroot00000000000000 {% if site.searchconfig %} {% endif %} {% if page.title %}{{ page.title }} | {{ site.title }}{% else %}{{ site.title }}{% endif %} rocksdb-6.11.4/docs/_includes/header.html000066400000000000000000000014601370372246700202610ustar00rootroot00000000000000

{{ site.title }}

{{ site.tagline }}

{% if page.excerpt %}{{ page.excerpt | strip_html }}{% else %}{{ site.description }}{% endif %}

{% for promo in site.data.promo %} {% include plugins/{{promo.type}}.html button_href=promo.href button_text=promo.text %}
{% endfor %}
rocksdb-6.11.4/docs/_includes/hero.html000066400000000000000000000000001370372246700177530ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/home_header.html000066400000000000000000000014471370372246700212760ustar00rootroot00000000000000

{{ site.tagline }}

{% if page.excerpt %}{{ page.excerpt | strip_html }}{% else %}{{ site.description }}{% endif %}

{% for promo in site.data.promo %}
{% include plugins/{{promo.type}}.html href=promo.href text=promo.text children=promo.children %}
{% endfor %}
rocksdb-6.11.4/docs/_includes/katex_import.html000066400000000000000000000003531370372246700215370ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/katex_render.html000066400000000000000000000141361370372246700215100ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/nav.html000066400000000000000000000030071370372246700176140ustar00rootroot00000000000000

{{ site.title }}

rocksdb-6.11.4/docs/_includes/nav/000077500000000000000000000000001370372246700167265ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/nav/collection_nav.html000066400000000000000000000042021370372246700226110ustar00rootroot00000000000000
rocksdb-6.11.4/docs/_includes/nav/collection_nav_group.html000066400000000000000000000012251370372246700240270ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/nav/collection_nav_group_item.html000066400000000000000000000001641370372246700250460ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/nav/header_nav.html000066400000000000000000000020501370372246700217050ustar00rootroot00000000000000
rocksdb-6.11.4/docs/_includes/nav_search.html000066400000000000000000000006711370372246700211450ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/plugins/000077500000000000000000000000001370372246700176235ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/plugins/all_share.html000066400000000000000000000002461370372246700224450ustar00rootroot00000000000000
{% include plugins/like_button.html %}{% include plugins/twitter_share.html %}{% include plugins/google_share.html %}
rocksdb-6.11.4/docs/_includes/plugins/ascii_cinema.html000066400000000000000000000003621370372246700231160ustar00rootroot00000000000000
rocksdb-6.11.4/docs/_includes/plugins/button.html000066400000000000000000000002001370372246700220140ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/plugins/github_star.html000066400000000000000000000007141370372246700230260ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/plugins/github_watch.html000066400000000000000000000007131370372246700231620ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/plugins/google_share.html000066400000000000000000000002401370372246700231430ustar00rootroot00000000000000
rocksdb-6.11.4/docs/_includes/plugins/iframe.html000066400000000000000000000003301370372246700217500ustar00rootroot00000000000000
{% include plugins/button.html href=include.href text=include.text %}
rocksdb-6.11.4/docs/_includes/plugins/like_button.html000066400000000000000000000011251370372246700230270ustar00rootroot00000000000000
rocksdb-6.11.4/docs/_includes/plugins/plugin_row.html000066400000000000000000000002401370372246700226720ustar00rootroot00000000000000
{% for child in include.children %} {% include plugins/{{child.type}}.html href=child.href text=child.text %} {% endfor %}
rocksdb-6.11.4/docs/_includes/plugins/post_social_plugins.html000066400000000000000000000024121370372246700245700ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/plugins/slideshow.html000066400000000000000000000050461370372246700225170ustar00rootroot00000000000000
rocksdb-6.11.4/docs/_includes/plugins/twitter_follow.html000066400000000000000000000007611370372246700236010ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/plugins/twitter_share.html000066400000000000000000000007551370372246700234040ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_includes/post.html000066400000000000000000000030131370372246700200120ustar00rootroot00000000000000
{% for author_idx in page.author %}
{% assign author = site.data.authors[author_idx] %} {% if author.fbid %}
{{ author.fullname }}
{% endif %} {% if author.full_name %} {% endif %}
{% endfor %}

{% if include.truncate %}{{ page.title }}{% else %}{{ page.title }}{% endif %}

{% if include.truncate %} {% if page.content contains '' %} {{ page.content | split:'' | first | markdownify }} {% else %} {{ page.content | markdownify }} {% endif %} {% else %} {{ content }} {% endif %} {% unless include.truncate %} {% include plugins/like_button.html %} {% endunless %}
rocksdb-6.11.4/docs/_includes/powered_by.html000066400000000000000000000023171370372246700211720ustar00rootroot00000000000000{% if site.data.powered_by.first.items or site.data.powered_by_highlight.first.items %}
{% if site.data.powered_by_highlight.first.title %}

{{ site.data.powered_by_highlight.first.title }}

{% else %}

{{ site.data.powered_by.first.title }}

{% endif %} {% if site.data.powered_by_highlight.first.items %}
{% for item in site.data.powered_by_highlight.first.items %}
{{ item.name }}
{% endfor %}
{% endif %}
{% for item in site.data.powered_by.first.items %} {% endfor %}
Does your app use {{ site.title }}? Add it to this list with a pull request!
{% endif %} rocksdb-6.11.4/docs/_includes/social_plugins.html000066400000000000000000000020211370372246700220360ustar00rootroot00000000000000
rocksdb-6.11.4/docs/_includes/ui/000077500000000000000000000000001370372246700165575ustar00rootroot00000000000000rocksdb-6.11.4/docs/_includes/ui/button.html000066400000000000000000000003531370372246700207610ustar00rootroot00000000000000{{ include.button_text }}rocksdb-6.11.4/docs/_layouts/000077500000000000000000000000001370372246700160345ustar00rootroot00000000000000rocksdb-6.11.4/docs/_layouts/basic.html000066400000000000000000000003311370372246700200000ustar00rootroot00000000000000--- layout: doc_default ---
{{ content }}
rocksdb-6.11.4/docs/_layouts/blog.html000066400000000000000000000002731370372246700176470ustar00rootroot00000000000000--- category: blog layout: blog_default ---
{{ content }}
rocksdb-6.11.4/docs/_layouts/blog_default.html000066400000000000000000000006231370372246700213520ustar00rootroot00000000000000 {% include head.html %} {% include nav.html alwayson=true %} rocksdb-6.11.4/docs/_layouts/default.html000066400000000000000000000003211370372246700203420ustar00rootroot00000000000000 {% include head.html %} {% include nav.html alwayson=true %} rocksdb-6.11.4/docs/_layouts/doc_default.html000066400000000000000000000006651370372246700212020ustar00rootroot00000000000000 {% include head.html %} {% include nav.html alwayson=true %} rocksdb-6.11.4/docs/_layouts/doc_page.html000066400000000000000000000002571370372246700204670ustar00rootroot00000000000000--- layout: doc_default ---
{{ content }}
rocksdb-6.11.4/docs/_layouts/docs.html000066400000000000000000000000601370372246700176460ustar00rootroot00000000000000--- layout: doc_page --- {% include doc.html %}rocksdb-6.11.4/docs/_layouts/home.html000066400000000000000000000006261370372246700176560ustar00rootroot00000000000000 {% include head.html %} {% include nav.html alwayson=true %} rocksdb-6.11.4/docs/_layouts/page.html000066400000000000000000000000251370372246700176330ustar00rootroot00000000000000--- layout: blog --- rocksdb-6.11.4/docs/_layouts/plain.html000066400000000000000000000002471370372246700200300ustar00rootroot00000000000000--- layout: default ---
{{ content }}
rocksdb-6.11.4/docs/_layouts/post.html000066400000000000000000000001341370372246700177050ustar00rootroot00000000000000--- collection: blog layout: blog ---
{% include post.html %}
rocksdb-6.11.4/docs/_layouts/redirect.html000066400000000000000000000001561370372246700205250ustar00rootroot00000000000000 rocksdb-6.11.4/docs/_layouts/top-level.html000066400000000000000000000002471370372246700206340ustar00rootroot00000000000000--- layout: default ---
{{ content }}
rocksdb-6.11.4/docs/_posts/000077500000000000000000000000001370372246700155045ustar00rootroot00000000000000rocksdb-6.11.4/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown000066400000000000000000000171451370372246700244170ustar00rootroot00000000000000--- title: How to backup RocksDB? layout: post author: icanadi category: blog redirect_from: - /blog/191/how-to-backup-rocksdb/ --- In RocksDB, we have implemented an easy way to backup your DB. Here is a simple example: #include "rocksdb/db.h" #include "utilities/backupable_db.h" using namespace rocksdb; DB* db; DB::Open(Options(), "/tmp/rocksdb", &db); BackupableDB* backupable_db = new BackupableDB(db, BackupableDBOptions("/tmp/rocksdb_backup")); backupable_db->Put(...); // do your thing backupable_db->CreateNewBackup(); delete backupable_db; // no need to also delete db This simple example will create a backup of your DB in "/tmp/rocksdb_backup". Creating new BackupableDB consumes DB* and you should be calling all the DB methods on object `backupable_db` going forward. Restoring is also easy: RestoreBackupableDB* restore = new RestoreBackupableDB(Env::Default(), BackupableDBOptions("/tmp/rocksdb_backup")); restore->RestoreDBFromLatestBackup("/tmp/rocksdb", "/tmp/rocksdb"); delete restore; This code will restore the backup back to "/tmp/rocksdb". The second parameter is the location of log files (In some DBs they are different from DB directory, but usually they are the same. See Options::wal_dir for more info). An alternative API for backups is to use BackupEngine directly: #include "rocksdb/db.h" #include "utilities/backupable_db.h" using namespace rocksdb; DB* db; DB::Open(Options(), "/tmp/rocksdb", &db); db->Put(...); // do your thing BackupEngine* backup_engine = BackupEngine::NewBackupEngine(Env::Default(), BackupableDBOptions("/tmp/rocksdb_backup")); backup_engine->CreateNewBackup(db); delete db; delete backup_engine; Restoring with BackupEngine is similar to RestoreBackupableDB: BackupEngine* backup_engine = BackupEngine::NewBackupEngine(Env::Default(), BackupableDBOptions("/tmp/rocksdb_backup")); backup_engine->RestoreDBFromLatestBackup("/tmp/rocksdb", "/tmp/rocksdb"); delete backup_engine; Backups are incremental. You can create a new backup with `CreateNewBackup()` and only the new data will be copied to backup directory (for more details on what gets copied, see "Under the hood"). Checksum is always calculated for any backuped file (including sst, log, and etc). It is used to make sure files are kept sound in the file system. Checksum is also verified for files from the previous backups even though they do not need to be copied. A checksum mismatch aborts the current backup (see "Under the hood" for more details). Once you have more backups saved, you can issue `GetBackupInfo()` call to get a list of all backups together with information on timestamp of the backup and the size (please note that sum of all backups' sizes is bigger than the actual size of the backup directory because some data is shared by multiple backups). Backups are identified by their always-increasing IDs. `GetBackupInfo()` is available both in `BackupableDB` and `RestoreBackupableDB`. You probably want to keep around only small number of backups. To delete old backups, just call `PurgeOldBackups(N)`, where N is how many backups you'd like to keep. All backups except the N newest ones will be deleted. You can also choose to delete arbitrary backup with call `DeleteBackup(id)`. `RestoreDBFromLatestBackup()` will restore the DB from the latest consistent backup. An alternative is `RestoreDBFromBackup()` which takes a backup ID and restores that particular backup. Checksum is calculated for any restored file and compared against the one stored during the backup time. If a checksum mismatch is detected, the restore process is aborted and `Status::Corruption` is returned. Very important thing to note here: Let's say you have backups 1, 2, 3, 4. If you restore from backup 2 and start writing more data to your database, newly created backup will delete old backups 3 and 4 and create new backup 3 on top of 2. ## Advanced usage Let's say you want to backup your DB to HDFS. There is an option in `BackupableDBOptions` to set `backup_env`, which will be used for all file I/O related to backup dir (writes when backuping, reads when restoring). If you set it to HDFS Env, all the backups will be stored in HDFS. `BackupableDBOptions::info_log` is a Logger object that is used to print out LOG messages if not-nullptr. If `BackupableDBOptions::sync` is true, we will sync data to disk after every file write, guaranteeing that backups will be consistent after a reboot or if machine crashes. Setting it to false will speed things up a bit, but some (newer) backups might be inconsistent. In most cases, everything should be fine, though. If you set `BackupableDBOptions::destroy_old_data` to true, creating new `BackupableDB` will delete all the old backups in the backup directory. `BackupableDB::CreateNewBackup()` method takes a parameter `flush_before_backup`, which is false by default. When `flush_before_backup` is true, `BackupableDB` will first issue a memtable flush and only then copy the DB files to the backup directory. Doing so will prevent log files from being copied to the backup directory (since flush will delete them). If `flush_before_backup` is false, backup will not issue flush before starting the backup. In that case, the backup will also include log files corresponding to live memtables. Backup will be consistent with current state of the database regardless of `flush_before_backup` parameter. ## Under the hood `BackupableDB` implements `DB` interface and adds four methods to it: `CreateNewBackup()`, `GetBackupInfo()`, `PurgeOldBackups()`, `DeleteBackup()`. Any `DB` interface calls will get forwarded to underlying `DB` object. When you call `BackupableDB::CreateNewBackup()`, it does the following: 1. Disable file deletions 2. Get live files (this includes table files, current and manifest file). 3. Copy live files to the backup directory. Since table files are immutable and filenames unique, we don't copy a table file that is already present in the backup directory. For example, if there is a file `00050.sst` already backed up and `GetLiveFiles()` returns `00050.sst`, we will not copy that file to the backup directory. However, checksum is calculated for all files regardless if a file needs to be copied or not. If a file is already present, the calculated checksum is compared against previously calculated checksum to make sure nothing crazy happened between backups. If a mismatch is detected, backup is aborted and the system is restored back to the state before `BackupableDB::CreateNewBackup()` is called. One thing to note is that a backup abortion could mean a corruption from a file in backup directory or the corresponding live file in current DB. Both manifest and current files are copied, since they are not immutable. 4. If `flush_before_backup` was set to false, we also need to copy log files to the backup directory. We call `GetSortedWalFiles()` and copy all live files to the backup directory. 5. Enable file deletions Backup IDs are always increasing and we have a file `LATEST_BACKUP` that contains the ID of the latest backup. If we crash in middle of backing up, on a restart we will detect that there are newer backup files than `LATEST_BACKUP` claims there are. In that case, we will delete any backup newer than `LATEST_BACKUP` and clean up all the files since some of the table files might be corrupted. Having corrupted table files in the backup directory is dangerous because of our deduplication strategy. ## Further reading For the API details, see `include/utilities/backupable_db.h`. For the implementation, see `utilities/backupable/backupable_db.cc`. rocksdb-6.11.4/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown000066400000000000000000000066401370372246700303150ustar00rootroot00000000000000--- title: How to persist in-memory RocksDB database? layout: post author: icanadi category: blog redirect_from: - /blog/245/how-to-persist-in-memory-rocksdb-database/ --- In recent months, we have focused on optimizing RocksDB for in-memory workloads. With growing RAM sizes and strict low-latency requirements, lots of applications decide to keep their entire data in memory. Running in-memory database with RocksDB is easy -- just mount your RocksDB directory on tmpfs or ramfs [1]. Even if the process crashes, RocksDB can recover all of your data from in-memory filesystem. However, what happens if the machine reboots? In this article we will explain how you can recover your in-memory RocksDB database even after a machine reboot. Every update to RocksDB is written to two places - one is an in-memory data structure called memtable and second is write-ahead log. Write-ahead log can be used to completely recover the data in memtable. By default, when we flush the memtable to table file, we also delete the current log, since we don't need it anymore for recovery (the data from the log is "persisted" in the table file -- we say that the log file is obsolete). However, if your table file is stored in in-memory file system, you may need the obsolete write-ahead log to recover the data after the machine reboots. Here's how you can do that. Options::wal_dir is the directory where RocksDB stores write-ahead log files. If you configure this directory to be on flash or disk, you will not lose current log file on machine reboot. Options::WAL_ttl_seconds is the timeout when we delete the archived log files. If the timeout is non-zero, obsolete log files will be moved to `archive/` directory under Options::wal_dir. Those archived log files will only be deleted after the specified timeout. Let's assume Options::wal_dir is a directory on persistent storage and Options::WAL_ttl_seconds is set to one day. To fully recover the DB, we also need to backup the current snapshot of the database (containing table and metadata files) with a frequency of less than one day. RocksDB provides an utility that enables you to easily backup the snapshot of your database. You can learn more about it here: [How to backup RocksDB?](https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F) You should configure the backup process to avoid backing up log files, since they are already stored in persistent storage. To do that, set BackupableDBOptions::backup_log_files to false. Restore process by default cleans up entire DB and WAL directory. Since we didn't include log files in the backup, we need to make sure that restoring the database doesn't delete log files in WAL directory. When restoring, configure RestoreOptions::keep_log_file to true. That option will also move any archived log files back to WAL directory, enabling RocksDB to replay all archived log files and rebuild the in-memory database state. To reiterate, here's what you have to do: * Set DB directory to tmpfs or ramfs mounted drive * Set Options::wal_log to a directory on persistent storage * Set Options::WAL_ttl_seconds to T seconds * Backup RocksDB every T/2 seconds, with BackupableDBOptions::backup_log_files = false * When you lose data, restore from backup with RestoreOptions::keep_log_file = true [1] You might also want to consider using [PlainTable format](https://github.com/facebook/rocksdb/wiki/PlainTable-Format) for table files rocksdb-6.11.4/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown000066400000000000000000000041111370372246700310160ustar00rootroot00000000000000--- title: The 1st RocksDB Local Meetup Held on March 27, 2014 layout: post author: xjin category: blog redirect_from: - /blog/323/the-1st-rocksdb-local-meetup-held-on-march-27-2014/ --- On Mar 27, 2014, RocksDB team @ Facebook held the 1st RocksDB local meetup in FB HQ (Menlo Park, California). We invited around 80 guests from 20+ local companies, including LinkedIn, Twitter, Dropbox, Square, Pinterest, MapR, Microsoft and IBM. Finally around 50 guests showed up, totaling around 60% show-up rate. [![Resize of 20140327_200754](/static/images/Resize-of-20140327_200754-300x225.jpg)](/static/images/Resize-of-20140327_200754-300x225.jpg) RocksDB team @ Facebook gave four talks about the latest progress and experience on RocksDB: * [Supporting a 1PB In-Memory Workload](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Haobo-RocksDB-In-Memory.pdf) * [Column Families in RocksDB](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Igor-Column-Families.pdf) * ["Lockless" Get() in RocksDB?](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf) * [Prefix Hashing in RocksDB](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Siying-Prefix-Hash.pdf) A very interesting question asked by a massive number of guests is: does RocksDB plan to provide replication functionality? Obviously, many applications need a resilient and distributed storage solution, not just single-node storage. We are considering how to approach this issue. When will be the next meetup? We haven't decided yet. We will see whether the community is interested in it and how it can help RocksDB grow. If you have any questions or feedback for the meetup or RocksDB, please let us know in [our Facebook group](https://www.facebook.com/groups/rocksdb.dev/). ### Comments **[Rajiv](geetasen@gmail.com)** Have any of these talks been recorded and if so will they be published? **[Igor Canadi](icanadi@fb.com)** Yes, I think we plan to publish them soon. rocksdb-6.11.4/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown000066400000000000000000000031401370372246700236500ustar00rootroot00000000000000--- title: RocksDB 2.8 release layout: post author: icanadi category: blog redirect_from: - /blog/371/rocksdb-2-8-release/ --- Check out the new RocksDB 2.8 release on [Github](https://github.com/facebook/rocksdb/releases/tag/2.8.fb). RocksDB 2.8. is mostly focused on improving performance for in-memory workloads. We are seeing read QPS as high as 5M (we will write a separate blog post on this). Here is the summary of new features: * Added a new table format called PlainTable, which is optimized for RAM storage (ramfs or tmpfs). You can read more details about it on [our wiki](https://github.com/facebook/rocksdb/wiki/PlainTable-Format). * New prefixed memtable format HashLinkedList, which is optimized for cases where there are only a few keys for each prefix. * Merge operator supports a new function PartialMergeMulti() that allows users to do partial merges against multiple operands. This function enables big speedups for workloads that use merge operators. * Added a V2 compaction filter interface. It buffers the kv-pairs sharing the same key prefix, process them in batches, and return the batched results back to DB. * Geo-spatial support for locations and radial-search. * Improved read performance using thread local cache for frequently accessed data. * Stability improvements -- we're now ignoring partially written tailing record to MANIFEST or WAL files. We have also introduced small incompatible API changes (mostly for advanced users). You can see full release notes in our [HISTORY.my](https://github.com/facebook/rocksdb/blob/2.8.fb/HISTORY.md) file. rocksdb-6.11.4/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown000066400000000000000000000074561370372246700317020ustar00rootroot00000000000000--- title: Indexing SST Files for Better Lookup Performance layout: post author: leijin category: blog redirect_from: - /blog/431/indexing-sst-files-for-better-lookup-performance/ --- For a `Get()` request, RocksDB goes through mutable memtable, list of immutable memtables, and SST files to look up the target key. SST files are organized in levels. On level 0, files are sorted based on the time they are flushed. Their key range (as defined by FileMetaData.smallest and FileMetaData.largest) are mostly overlapped with each other. So it needs to look up every L0 file. Compaction is scheduled periodically to pick up files from an upper level and merges them with files from lower level. As a result, key/values are moved from L0 down the LSM tree gradually. Compaction sorts key/values and split them into files. From level 1 and below, SST files are sorted based on key. Their key range are mutually exclusive. Instead of scanning through each SST file and checking if a key falls into its range, RocksDB performs a binary search based on FileMetaData.largest to locate a candidate file that can potentially contain the target key. This reduces complexity from O(N) to O(log(N)). However, log(N) can still be large for bottom levels. For a fan-out ratio of 10, level 3 can have 1000 files. That requires 10 comparisons to locate a candidate file. This is a significant cost for an in-memory database when you can do [several million gets per second](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks). One observation to this problem is that: after the LSM tree is built, an SST file's position in its level is fixed. Furthermore, its order relative to files from the next level is also fixed. Based on this idea, we can perform [fractional cascading](http://en.wikipedia.org/wiki/Fractional_cascading) kind of optimization to narrow down the binary search range. Here is an example: [![tree_example](/static/images/tree_example1.png)](/static/images/tree_example1.png) Level 1 has 2 files and level 2 has 8 files. Now, we want to look up key 80. A binary search based FileMetaData.largest tells you file 1 is the candidate. Then key 80 is compared with its FileMetaData.smallest and FileMetaData.largest to decide if it falls into the range. The comparison shows 80 is less than FileMetaData.smallest (100), so file 1 does not possibly contain key 80. We to proceed to check level 2. Usually, we need to do binary search among all 8 files on level 2. But since we already know target key 80 is less than 100 and only file 1 to file 3 can contain key less than 100, we can safely exclude other files from the search. As a result we cut down the search space from 8 files to 3 files. Let's look at another example. We want to get key 230. A binary search on level 1 locates to file 2 (this also implies key 230 is larger than file 1's FileMetaData.largest 200). A comparison with file 2's range shows the target key is smaller than file 2's FileMetaData.smallest 300. Even though, we couldn't find key on level 1, we have derived hints that target key is in range between 200 and 300. Any files on level 2 that cannot overlap with [200, 300] can be safely excluded. As a result, we only need to look at file 5 and file 6 on level 2. Inspired by this concept, we pre-build pointers at compaction time on level 1 files that point to a range of files on level 2. For example, file 1 on level 1 points to file 3 (on level 2) on the left and file 4 on the right. File 2 will point to level 2 files 6 and 7. At query time, these pointers are used to determine the actual binary search range based on comparison result. Our benchmark shows that this optimization improves lookup QPS by ~5% for similar setup mentioned [here](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks). rocksdb-6.11.4/docs/_posts/2014-05-14-lock.markdown000066400000000000000000000225241370372246700212350ustar00rootroot00000000000000--- title: Reducing Lock Contention in RocksDB layout: post author: sdong category: blog redirect_from: - /blog/521/lock/ --- In this post, we briefly introduce the recent improvements we did to RocksDB to improve the issue of lock contention costs. RocksDB has a simple thread synchronization mechanism (See [RocksDB Architecture Guide](https://github.com/facebook/rocksdb/wiki/Rocksdb-Architecture-Guide)  to understand terms used below, like SST tables or mem tables). SST tables are immutable after being written and mem tables are lock-free data structures supporting single writer and multiple readers. There is only one single major lock, the DB mutex (DBImpl.mutex_) protecting all the meta operations, including: * Increase or decrease reference counters of mem tables and SST tables * Change and check meta data structures, before and after finishing compactions, flushes and new mem table creations * Coordinating writers This DB mutex used to be scalability bottleneck preventing us from scaling to more than 16 threads. To address the issue, we improved RocksDB in several ways. 1. Consolidate reference counters and introduce "super version". For every read operation, mutex was acquired, and reference counters for each mem table and each SST table were increased. One such operation is not expensive but if you are building a high throughput server with lots of reads, the lock contention will become the bottleneck. This is especially true if you store all your data in RAM. To solve this problem, we created a meta-meta data structure called “[super version](https://reviews.facebook.net/rROCKSDB1fdb3f7dc60e96394e3e5b69a46ede5d67fb976c)â€, which holds reference counters to all those mem table and SST tables, so that readers only need to increase the reference counters for this single data structure. In RocksDB, list of live mem tables and SST tables only changes infrequently, which would happen when new mem tables are created or flush/compaction happens. Now, at those times, a new super version is created with their reference counters increased. A super version lists live mem tables and SST tables so a reader only needs acquire the lock in order to find the latest super version and increase its reference counter. From the super version, the reader can find all the mem and SST tables which are safety accessible as long as the reader holds the reference count for the super version. 2. We replace some reference counters to stc::atomic objects, so that decreasing reference count of an object usually doesn’t need to be inside the mutex any more. 3. Make fetching super version and reference counting lock-free in read queries. After consolidating reference counting to one single super version and removing the locking for decreasing reference counts, in read case, we only acquire mutex for one thing: fetch the latest super version and increase the reference count for that (dereference the counter is done in an atomic decrease). We designed and implemented a (mostly) lock-free approach to do it. See [details](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf). We will write a separate blog post for that. 4. Avoid disk I/O inside the mutex. As we know, each disk I/O to hard drives takes several milliseconds. It can be even longer if file system journal is involved or I/Os are queued. Even occasional disk I/O within mutex can cause huge performance outliers. We identified in two situations, we might do disk I/O inside mutex and we removed them: (1) Opening and closing transactional log files. We moved those operations out of the mutex. (2) Information logging. In multiple places we write to logs within mutex. There is a chance that file write will wait for disk I/O to finish before finishing, even if fsync() is not issued, especially in EXT systems. We occasionally see 100+ milliseconds write() latency on EXT. Instead of removing those logging, we came up with a solution of delay logging. When inside mutex, instead of directly writing to the log file, we write to a log buffer, with the timing information. As soon as mutex is released, we flush the log buffer to log files. 5. Reduce object creation inside the mutex. Object creation can be slow because it involves malloc (in our case). Malloc sometimes is slow because it needs to lock some shared data structures. Allocating can also be slow because we sometimes do expensive operations in some of our classes' constructors. For these reasons, we try to reduce object creations inside the mutex. Here are two examples: (1) std::vector uses malloc inside. We introduced “[autovector](https://reviews.facebook.net/rROCKSDBc01676e46d3be08c3c140361ef1f5884f47d3b3c)†data structure, in which memory for first a few elements are pre-allocated as members of the autovector class. When an autovector is used as a stack variable, no malloc will be needed unless the pre-allocated buffer is used up. This autovector is quite useful for manipulating those meta data structures. Those meta operations are often locked inside DB mutex. (2) When building an iterator, we used to creating iterator of every live men table and SST table within the mutex and a merging iterator on top of them. Besides malloc, some of those iterators can be quite expensive to create, like sorting. Now, instead of doing that, we simply increase the reference counters of them, and release the mutex before creating any iterator. 6. Deal with mutexes in LRU caches. When I said there was only one single major lock, I was lying. In RocksDB, all LRU caches had exclusive mutexes within to protect writes to the LRU lists, which are done in both of read and write operations. LRU caches are used in block cache and table cache. Both of them are accessed more frequently than DB data structures. Lock contention of these two locks are as intense as the DB mutex. Even if LRU cache is sharded into ShardedLRUCache, we can still see lock contentions, especially table caches. We further address this issue in two way: (1) Bypassing table caches. A table cache maintains list of SST table’s read handlers. Those handlers contain SST files’ descriptors, table metadata, and possibly data indexes, as well as bloom filters. When the table handler needs to be evicted based on LRU, those information is cleared. When the SST table needs to be read and its table handler is not in LRU cache, the table is opened and those metadata is loaded. In some cases, users want to tune the system in a way that table handler evictions should never happen. It is common for high-throughput, low-latency servers. We introduce a mode where table cache is bypassed in read queries. In this mode, all table handlers are cached and accessed directly, so there is no need to query and adjust table caches for reading the database. It is the users’ responsibility to reserve enough resource for it. This mode can be turned on by setting options.max_open_files=-1. (2) [New PlainTable format](//github.com/facebook/rocksdb/wiki/PlainTable-Format) (optimized for SST in ramfs/tmpfs) does not organize data by blocks. Data are located by memory addresses so no block cache is needed. With all of those improvements, lock contention is not a bottleneck anymore, which is shown in our [memory-only benchmark](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks) . Furthermore, lock contentions are not causing some huge (50 milliseconds+) latency outliers they used to cause. ### Comments **[Lee Hounshell](lee@apsalar.com)** Please post an example of reading the same rocksdb concurrently. We are using the latest 3.0 rocksdb; however, when two separate processes try and open the same rocksdb for reading, only one of the open requests succeed. The other open always fails with “db/LOCK: Resource temporarily unavailable†So far we have not found an option that allows sharing the rocksdb for reads. An example would be most appreciated. **[Siying Dong](siying.d@fb.com)** Sorry for the delay. We don’t have feature support for this scenario yet. Here is an example you can work around this problem. You can build a snapshot of the DB by doing this: 1. create a separate directory on the same host for a snapshot of the DB. 1. call `DB::DisableFileDeletions()` 1. call `DB::GetLiveFiles()` to get a full list of the files. 1. for all the files except manifest, add a hardlink file in your new directory pointing to the original file 1. copy the manifest file and truncate the size (you can read the comments of `DB::GetLiveFiles()` for more information) 1. call `DB::EnableFileDeletions()` 1. now you can open the snapshot directory in another process to access those files. Please remember to delete the directory after reading the data to allow those files to be recycled. By the way, the best way to ask those questions is in our [facebook group](https://www.facebook.com/groups/rocksdb.dev/). Let us know if you need any further help. **[Darshan](darshan.ghumare@gmail.com)** Will this consistency problem of RocksDB all occurs in case of single put/write? What all ACID properties is supported by RocksDB, only durability irrespective of single or batch write? **[Siying Dong](siying.d@fb.com)** We recently [introduced optimistic transaction](https://reviews.facebook.net/D33435) which can help you ensure all of ACID. This blog post is mainly about optimizations in implementation. The RocksDB consistency semantic is not changed. rocksdb-6.11.4/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown000066400000000000000000000012261370372246700236500ustar00rootroot00000000000000--- title: RocksDB 3.0 release layout: post author: icanadi category: blog redirect_from: - /blog/557/rocksdb-3-0-release/ --- Check out new RocksDB release on [Github](https://github.com/facebook/rocksdb/releases/tag/3.0.fb)! New features in RocksDB 3.0: * [Column Family support](https://github.com/facebook/rocksdb/wiki/Column-Families) * [Ability to chose different checksum function](https://github.com/facebook/rocksdb/commit/0afc8bc29a5800e3212388c327c750d32e31f3d6) * Deprecated ReadOptions::prefix_seek and ReadOptions::prefix Check out the full [change log](https://github.com/facebook/rocksdb/blob/3.0.fb/HISTORY.md). rocksdb-6.11.4/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown000066400000000000000000000011011370372246700236330ustar00rootroot00000000000000--- title: RocksDB 3.1 release layout: post author: icanadi category: blog redirect_from: - /blog/575/rocksdb-3-1-release/ --- Check out the new release on [Github](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.1)! New features in RocksDB 3.1: * [Materialized hash index](https://github.com/facebook/rocksdb/commit/0b3d03d026a7248e438341264b4c6df339edc1d7) * [FIFO compaction style](https://github.com/facebook/rocksdb/wiki/FIFO-compaction-style) We released 3.1 so fast after 3.0 because one of our internal customers needed materialized hash index. rocksdb-6.11.4/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown000066400000000000000000000057211370372246700256310ustar00rootroot00000000000000--- title: PlainTable — A New File Format layout: post author: sdong category: blog redirect_from: - /blog/599/plaintable-a-new-file-format/ --- In this post, we are introducing "PlainTable" -- a file format we designed for RocksDB, initially to satisfy a production use case at Facebook. Design goals: 1. All data stored in memory, in files stored in tmpfs/ramfs. Support DBs larger than 100GB (may be sharded across multiple RocksDB instance). 1. Optimize for [prefix hashing](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Siying-Prefix-Hash.pdf) 1. Less than or around 1 micro-second average latency for single Get() or Seek(). 1. Minimize memory consumption. 1. Queries efficiently return empty results Notice that our priority was not to maximize query performance, but to strike a balance between query performance and memory consumption. PlainTable query performance is not as good as you would see with a nicely-designed hash table, but they are of the same order of magnitude, while keeping memory overhead to a minimum. Since we are targeting micro-second latency, it is on the level of the number of CPU cache misses (if they cannot be parallellized, which are usually the case for index look-ups). On our target hardware with Intel CPUs of multiple sockets with NUMA, we can only allow 4-5 CPU cache misses (including costs of data TLB). To meet our requirements, given that only hash prefix iterating is needed, we made two decisions: 1. to use a hash index, which is 1. directly addressed to rows, with no block structure. Having addressed our latency goal, the next task was to design a very compact hash index to minimize memory consumption. Some tricks we used to meet this goal: 1. We only use 32-bit integers for data and index offsets.The first bit serves as a flag, so we can avoid using 8-byte pointers. 1. We never copy keys or parts of keys to index search structures. We store only offsets from which keys can be retrieved, to make comparisons with search keys. 1. Since our file is immutable, we can accurately estimate the number of hash buckets needed. To make sure the format works efficiently with empty queries, we added a bloom filter check before the query. This adds only one cache miss for non-empty cases [1], but avoids multiple cache misses for most empty results queries. This is a good trade-off for use cases with a large percentage of empty results. These are the design goals and basic ideas of PlainTable file format. For detailed information, see [this wiki page](https://github.com/facebook/rocksdb/wiki/PlainTable-Format). [1] Bloom filter checks typically require multiple memory access. However, because they are independent, they usually do not make the CPU pipeline stale. In any case, we improved the bloom filter to improve data locality - we may cover this further in a future blog post. ### Comments **[Siying Dong](siying.d@fb.com)** Does [http://rocksdb.org/feed/](http://rocksdb.org/feed/) work? rocksdb-6.11.4/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown000066400000000000000000000113021370372246700257020ustar00rootroot00000000000000--- title: Avoid Expensive Locks in Get() layout: post author: leijin category: blog redirect_from: - /blog/677/avoid-expensive-locks-in-get/ --- As promised in the previous [blog post](blog/2014/05/14/lock.html)! RocksDB employs a multiversion concurrency control strategy. Before reading data, it needs to grab the current version, which is encapsulated in a data structure called [SuperVersion](https://reviews.facebook.net/rROCKSDB1fdb3f7dc60e96394e3e5b69a46ede5d67fb976c). At the beginning of `GetImpl()`, it used to do this: mutex_.Lock(); auto* s = super_version_->Ref(); mutex_.Unlock(); The lock is necessary because pointer super_version_ may be updated, the corresponding SuperVersion may be deleted while Ref() is in progress. `Ref()` simply increases the reference counter and returns “this†pointer. However, this simple operation posed big challenges for in-memory workload and stopped RocksDB from scaling read throughput beyond 8 cores. Running 32 read threads on a 32-core CPU leads to [70% system CPU usage](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf). This is outrageous! Luckily, we found a way to circumvent this problem by using [thread local storage](http://en.wikipedia.org/wiki/Thread-local_storage). Version change is a rare event comparable to millions of read requests. On the very first Get() request, each thread pays the mutex cost to acquire a reference to the new super version. Instead of releasing the reference after use, the reference is cached in thread’s local storage. An atomic variable is used to track global super version number. Subsequent reads simply compare the local super version number against the global super version number. If they are the same, the cached super version reference may be used directly, at no cost. If a version change is detected, mutex must be acquired to update the reference. The cost of mutex lock is amortized among millions of reads and becomes negligible. The code looks something like this: SuperVersion* s = thread_local_->Get(); if (s->version_number != super_version_number_.load()) { // slow path, cleanup of current super version is omitted mutex_.Lock(); s = super_version_->Ref(); mutex_.Unlock(); } The result is quite amazing. RocksDB can nicely [scale to 32 cores](https://github.com/facebook/rocksdb/raw/gh-pages/talks/2014-03-27-RocksDB-Meetup-Lei-Lockless-Get.pdf) and most CPU time is spent in user land. Daryl Grove gives a pretty good [comparison between mutex and atomic](https://blogs.oracle.com/d/entry/the_cost_of_mutexes). However, the real cost difference lies beyond what is shown in the assembly code. Mutex can keep threads spinning on CPU or even trigger thread context switches in which all readers compete to access the critical area. Our approach prevents mutual competition by directing threads to check against a global version which does not change at high frequency, and is therefore much more cache-friendly. The new approach entails one issue: a thread can visit GetImpl() once but can never come back again. SuperVersion is referenced and cached in its thread local storage. All resources (e.g., memtables, files) which belong to that version are frozen. A “supervisor†is required to visit each thread’s local storage and free its resources without incurring a lock. We designed a lockless sweep using CAS (compare and switch instruction). Here is how it works: (1) A reader thread uses CAS to acquire SuperVersion from its local storage and to put in a special flag (SuperVersion::kSVInUse). (2) Upon completion of GetImpl(), the reader thread tries to return SuperVersion to local storage by CAS, expecting the special flag (SuperVersion::kSVInUse) in its local storage. If it does not see SuperVersion::kSVInUse, that means a “sweep†was done and the reader thread is responsible for cleanup (this is expensive, but does not happen often on the hot path). (3) After any flush/compaction, the background thread performs a sweep (CAS) across all threads’ local storage and frees encountered SuperVersion. A reader thread must re-acquire a new SuperVersion reference on its next visit. ### Comments **[David Barbour](dmbarbour@gmail.com)** Please post an example of reading the same rocksdb concurrently. We are using the latest 3.0 rocksdb; however, when two separate processes try and open the same rocksdb for reading, only one of the open requests succeed. The other open always fails with “db/LOCK: Resource temporarily unavailable†So far we have not found an option that allows sharing the rocksdb for reads. An example would be most appreciated. rocksdb-6.11.4/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown000066400000000000000000000020141370372246700236460ustar00rootroot00000000000000--- title: RocksDB 3.2 release layout: post author: leijin category: blog redirect_from: - /blog/647/rocksdb-3-2-release/ --- Check out new RocksDB release on [GitHub](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.2)! New Features in RocksDB 3.2: * PlainTable now supports a new key encoding: for keys of the same prefix, the prefix is only written once. It can be enabled through encoding_type paramter of NewPlainTableFactory() * Add AdaptiveTableFactory, which is used to convert from a DB of PlainTable to BlockBasedTabe, or vise versa. It can be created using NewAdaptiveTableFactory() Public API changes: * We removed seek compaction as a concept from RocksDB * Add two paramters to NewHashLinkListRepFactory() for logging on too many entries in a hash bucket when flushing * Added new option BlockBasedTableOptions::hash_index_allow_collision. When enabled, prefix hash index for block-based table will not store prefix and allow hash collision, reducing memory consumption rocksdb-6.11.4/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown000066400000000000000000000031271370372246700236600ustar00rootroot00000000000000--- title: RocksDB 3.3 Release layout: post author: yhciang category: blog redirect_from: - /blog/1301/rocksdb-3-3-release/ --- Check out new RocksDB release on [GitHub](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.3)! New Features in RocksDB 3.3: * **JSON API prototype**. * **Performance improvement on HashLinkList**: We addressed performance outlier of HashLinkList caused by skewed bucket by switching data in the bucket from linked list to skip list. Add parameter threshold_use_skiplist in NewHashLinkListRepFactory(). * **More effective on storage space reclaim**: RocksDB is now able to reclaim storage space more effectively during the compaction process. This is done by compensating the size of each deletion entry by the 2X average value size, which makes compaction to be triggerred by deletion entries more easily. * **TimeOut API to write**: Now WriteOptions have a variable called timeout_hint_us. With timeout_hint_us set to non-zero, any write associated with this timeout_hint_us may be aborted when it runs longer than the specified timeout_hint_us, and it is guaranteed that any write completes earlier than the specified time-out will not be aborted due to the time-out condition. * **rate_limiter option**: We added an option that controls total throughput of flush and compaction. The throughput is specified in bytes/sec. Flush always has precedence over compaction when available bandwidth is constrained. Public API changes: * Removed NewTotalOrderPlainTableFactory because it is not used and implemented semantically incorrect. rocksdb-6.11.4/docs/_posts/2014-09-12-cuckoo.markdown000066400000000000000000000106321370372246700215670ustar00rootroot00000000000000--- title: Cuckoo Hashing Table Format layout: post author: radheshyam category: blog redirect_from: - /blog/1427/new-bloom-filter-format/ --- ## Introduction We recently introduced a new [Cuckoo Hashing](http://en.wikipedia.org/wiki/Cuckoo_hashing) based SST file format which is optimized for fast point lookups. The new format was built for applications which require very high point lookup rates (~4Mqps) in read only mode but do not use operations like range scan, merge operator, etc. But, the existing RocksDB file formats were built to support range scan and other operations and the current best point lookup in RocksDB is 1.2 Mqps given by [PlainTable](https://github.com/facebook/rocksdb/wiki/PlainTable-Format)[ format](https://github.com/facebook/rocksdb/wiki/PlainTable-Format). This prompted a hashing based file format, which we present here. The new table format uses a cache friendly version of Cuckoo Hashing algorithm with only 1 or 2 memory accesses per lookup. Goals: * Reduce memory accesses per lookup to 1 or 2 * Get an end to end point lookup rate of at least 4 Mqps * Minimize database size Assumptions: * Key length and value length are fixed * The database is operated in read only mode Non-goals: * While optimizing the performance of Get() operation was our primary goal, compaction and build times were secondary. We may work on improving them in future. Details for setting up the table format can be found in [GitHub](https://github.com/facebook/rocksdb/wiki/CuckooTable-Format). ## Cuckoo Hashing Algorithm In order to achieve high lookup speeds, we did multiple optimizations, including a cache friendly cuckoo hash algorithm. Cuckoo Hashing uses multiple hash functions, _h1, ..., __hn._ ### Original Cuckoo Hashing To insert any new key _k_, we compute hashes of the key _h1(k), ..., __hn__(k)_. We insert the key in the first hash location that is free. If all the locations are blocked, we try to move one of the colliding keys to a different location by trying to re-insert it. Finding smallest set of keys to displace in order to accommodate the new key is naturally a shortest path problem in a directed graph where nodes are buckets of hash table and there is an edge from bucket _A_ to bucket _B_ if the element stored in bucket _A_ can be accommodated in bucket _B_ using one of the hash functions. The source nodes are the possible hash locations for the given key _k_ and destination is any one of the empty buckets. We use this algorithm to handle collision. To retrieve a key _k_, we compute hashes, _h1(k), ..., __hn__(k)_ and the key must be present in one of these locations. Our goal is to minimize average (and maximum) number of hash functions required and hence the number of memory accesses. In our experiments, with a hash utilization of 90%, we found that the average number of lookups is 1.8 and maximum is 3. Around 44% of keys are accommodated in first hash location and 33% in second location. ### Cache Friendly Cuckoo Hashing We noticed the following two sub-optimal properties in original Cuckoo implementation: * If the key is not present in first hash location, we jump to second hash location which may not be in cache. This results in many cache misses. * Because only 44% of keys are located in first cuckoo block, we couldn't have an optimal prefetching strategy - prefetching all hash locations for a key is wasteful. But prefetching only the first hash location helps only 44% of cases. The solution is to insert more keys near first location. In case of collision in the first hash location - _h1(k)_, we try to insert it in next few buckets, _h1(k)+1, _h1(k)+2, _..., h1(k)+t-1_. If all of these _t_ locations are occupied, we skip over to next hash function _h2_ and repeat the process. We call the set of _t_ buckets as a _Cuckoo Block_. We chose _t_ such that size of a block is not bigger than a cache line and we prefetch the first cuckoo block. With the new algorithm, for 90% hash utilization, we found that 85% of keys are accommodated in first Cuckoo Block. Prefetching the first cuckoo block yields best results. For a database of 100 million keys with key length 8 and value length 4, the hash algorithm alone can achieve 9.6 Mqps and we are working on improving it further. End to end RocksDB performance results can be found [here](https://github.com/facebook/rocksdb/wiki/CuckooTable-Format). rocksdb-6.11.4/docs/_posts/2014-09-12-new-bloom-filter-format.markdown000066400000000000000000000062031370372246700247530ustar00rootroot00000000000000--- title: New Bloom Filter Format layout: post author: zagfox category: blog redirect_from: - /blog/1367/cuckoo/ --- ## Introduction In this post, we are introducing "full filter block" --- a new bloom filter format for [block based table](https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format). This could bring about 40% of improvement for key query under in-memory (all data stored in memory, files stored in tmpfs/ramfs, an [example](https://github.com/facebook/rocksdb/wiki/RocksDB-In-Memory-Workload-Performance-Benchmarks) workload. The main idea behind is to generate a big filter that covers all the keys in SST file to avoid lots of unnecessary memory look ups. ## What is Bloom Filter In brief, [bloom filter](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter) is a bits array generated for a set of keys that could tell if an arbitrary key may exist in that set. In RocksDB, we generate such a bloom filter for each SST file. When we conduct a query for a key, we first goes to the bloom filter block of SST file. If key may exist in filter, we goes into data block in SST file to search for the key. If not, we would return directly. So it could help speed up point look up operation a lot. ## Original Bloom Filter Format Original bloom filter creates filters for each individual data block in SST file. It has complex structure (ref [here](https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format#filter-meta-block)) which results in a lot of non-adjacent memory look ups. Here's the work flow for checking original bloom filter in block based table: 1. Given the target key, we goes to the index block to get the "data block ID" where this key may reside. 1. Using the "data block ID", we goes to the filter block and get the correct "offset of filter". 1. Using the "offset of filter", we goes to the actual filter and do the checking. ## New Bloom Filter Format New bloom filter creates filter for all keys in SST file and we name it "full filter". The data structure of full filter is very simple, there is just one big filter:     [ full filter ] In this way, the work flow of bloom filter checking is much simplified. (1) Given the target key, we goes directly to the filter block and conduct the filter checking. To be specific, there would be no checking for index block and no address jumping inside of filter block. Though it is a big filter, the total filter size would be the same as the original filter. One little draw back is that the new bloom filter introduces more memory consumption when building SST file because we need to buffer keys (or their hashes) before generating filter. Original filter just creates a bunch of small filters so it just buffer a small amount of keys. For full filter, we buffer hashes of all keys, which would take more memory when SST file size increases. ## Usage & Customization You can refer to the document here for [usage](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#usage-of-new-bloom-filter) and [customization](https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#customize-your-own-filterpolicy). rocksdb-6.11.4/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown000066400000000000000000000024201370372246700236520ustar00rootroot00000000000000--- title: RocksDB 3.5 Release! layout: post author: leijin category: blog redirect_from: - /blog/1547/rocksdb-3-5-release/ --- New RocksDB release - 3.5! **New Features** 1. Add include/utilities/write_batch_with_index.h, providing a utility class to query data out of WriteBatch when building it. 2. new ReadOptions.total_order_seek to force total order seek when block-based table is built with hash index. **Public API changes** 1. The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. 2. Move BlockBasedTable related options to BlockBasedTableOptions from Options. Change corresponding JNI interface. Options affected include: no_block_cache, block_cache, block_cache_compressed, block_size, block_size_deviation, block_restart_interval, filter_policy, whole_key_filtering. filter_policy is changed to shared_ptr from a raw pointer. 3. Remove deprecated options: disable_seek_compaction and db_stats_log_interval 4. OptimizeForPointLookup() takes one parameter for block cache size. It now builds hash index, bloom filter, and block cache. [https://github.com/facebook/rocksdb/releases/tag/v3.5](https://github.com/facebook/rocksdb/releases/tag/rocksdb-3.5) rocksdb-6.11.4/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown000066400000000000000000000061071370372246700270440ustar00rootroot00000000000000--- title: Migrating from LevelDB to RocksDB layout: post author: lgalanis category: blog redirect_from: - /blog/1811/migrating-from-leveldb-to-rocksdb-2/ --- If you have an existing application that uses LevelDB and would like to migrate to using RocksDB, one problem you need to overcome is to map the options for LevelDB to proper options for RocksDB. As of release 3.9 this can be automatically done by using our option conversion utility found in rocksdb/utilities/leveldb_options.h. What is needed, is to first replace `leveldb::Options` with `rocksdb::LevelDBOptions`. Then, use `rocksdb::ConvertOptions( )` to convert the `LevelDBOptions` struct into appropriate RocksDB options. Here is an example: LevelDB code: ```c++ #include #include "leveldb/db.h" using namespace leveldb; int main(int argc, char** argv) { DB *db; Options opt; opt.create_if_missing = true; opt.max_open_files = 1000; opt.block_size = 4096; Status s = DB::Open(opt, "/tmp/mydb", &db); delete db; } ``` RocksDB code: ```c++ #include #include "rocksdb/db.h" #include "rocksdb/utilities/leveldb_options.h" using namespace rocksdb; int main(int argc, char** argv) { DB *db; LevelDBOptions opt; opt.create_if_missing = true; opt.max_open_files = 1000; opt.block_size = 4096; Options rocksdb_options = ConvertOptions(opt); // add rocksdb specific options here Status s = DB::Open(rocksdb_options, "/tmp/mydb_rocks", &db); delete db; } ``` The difference is: ```diff -#include "leveldb/db.h" +#include "rocksdb/db.h" +#include "rocksdb/utilities/leveldb_options.h" -using namespace leveldb; +using namespace rocksdb; - Options opt; + LevelDBOptions opt; - Status s = DB::Open(opt, "/tmp/mydb", &db); + Options rocksdb_options = ConvertOptions(opt); + // add rockdb specific options here + + Status s = DB::Open(rocksdb_options, "/tmp/mydb_rocks", &db); ``` Once you get up and running with RocksDB you can then focus on tuning RocksDB further by modifying the converted options struct. The reason why ConvertOptions is handy is because a lot of individual options in RocksDB have moved to other structures in different components. For example, block_size is not available in struct rocksdb::Options. It resides in struct rocksdb::BlockBasedTableOptions, which is used to create a TableFactory object that RocksDB uses internally to create the proper TableBuilder objects. If you were to write your application from scratch it would look like this: RocksDB code from scratch: ```c++ #include #include "rocksdb/db.h" #include "rocksdb/table.h" using namespace rocksdb; int main(int argc, char** argv) { DB *db; Options opt; opt.create_if_missing = true; opt.max_open_files = 1000; BlockBasedTableOptions topt; topt.block_size = 4096; opt.table_factory.reset(NewBlockBasedTableFactory(topt)); Status s = DB::Open(opt, "/tmp/mydb_rocks", &db); delete db; } ``` The LevelDBOptions utility can ease migration to RocksDB from LevelDB and allows us to break down the various options across classes as it is needed. rocksdb-6.11.4/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown000066400000000000000000000033761370372246700271330ustar00rootroot00000000000000--- title: Reading RocksDB options from a file layout: post author: lgalanis category: blog redirect_from: - /blog/1883/reading-rocksdb-options-from-a-file/ --- RocksDB options can be provided using a file or any string to RocksDB. The format is straightforward: `write_buffer_size=1024;max_write_buffer_number=2`. Any whitespace around `=` and `;` is OK. Moreover, options can be nested as necessary. For example `BlockBasedTableOptions` can be nested as follows: `write_buffer_size=1024; max_write_buffer_number=2; block_based_table_factory={block_size=4k};`. Similarly any white space around `{` or `}` is ok. Here is what it looks like in code: ```c++ #include #include "rocksdb/db.h" #include "rocksdb/table.h" #include "rocksdb/utilities/convenience.h" using namespace rocksdb; int main(int argc, char** argv) { DB *db; Options opt; std::string options_string = "create_if_missing=true;max_open_files=1000;" "block_based_table_factory={block_size=4096}"; Status s = GetDBOptionsFromString(opt, options_string, &opt); s = DB::Open(opt, "/tmp/mydb_rocks", &db); // use db delete db; } ``` Using `GetDBOptionsFromString` is a convenient way of changing options for your RocksDB application without needing to resort to recompilation or tedious command line parsing. rocksdb-6.11.4/docs/_posts/2015-02-27-write-batch-with-index.markdown000066400000000000000000000074421370372246700246000ustar00rootroot00000000000000--- title: 'WriteBatchWithIndex: Utility for Implementing Read-Your-Own-Writes' layout: post author: sdong category: blog redirect_from: - /blog/1901/write-batch-with-index/ --- RocksDB can be used as a storage engine of a higher level database. In fact, we are currently plugging RocksDB into MySQL and MongoDB as one of their storage engines. RocksDB can help with guaranteeing some of the ACID properties: durability is guaranteed by RocksDB by design; while consistency and isolation need to be enforced by concurrency controls on top of RocksDB; Atomicity can be implemented by committing a transaction's writes with one write batch to RocksDB in the end. However, if we enforce atomicity by only committing all writes in the end of the transaction in one batch, you cannot get the updated value from RocksDB previously written by the same transaction (read-your-own-write). To read the updated value, the databases on top of RocksDB need to maintain an internal buffer for all the written keys, and when a read happens they need to merge the result from RocksDB and from this buffer. This is a problem we faced when building the RocksDB storage engine in MongoDB. We solved it by creating a utility class, WriteBatchWithIndex (a write batch with a searchable index) and made it part of public API so that the community can also benefit from it. Before talking about the index part, let me introduce write batch first. The write batch class, `WriteBatch`, is a RocksDB data structure for atomic writes of multiple keys. Users can buffer their updates to a `WriteBatch` by calling `write_batch.Put("key1", "value1")` or `write_batch.Delete("key2")`, similar as calling RocksDB's functions of the same names. In the end, they call `db->Write(write_batch)` to atomically update all those batched operations to the DB. It is how a database can guarantee atomicity, as shown above. Adding a searchable index to `WriteBatch`, we now have `WriteBatchWithIndex`. Users can put updates to WriteBatchIndex in the same way as to `WriteBatch`. In the end, users can get a `WriteBatch` object from it and issue `db->Write()`. Additionally, users can create an iterator of a WriteBatchWithIndex, seek to any key location and iterate from there. To implement read-your-own-write using `WriteBatchWithIndex`, every time the user creates a transaction, we create a `WriteBatchWithIndex` attached to it. All the writes of the transaction go to the `WriteBatchWithIndex` first. When we commit the transaction, we atomically write the batch to RocksDB. When the user wants to call `Get()`, we first check if the value exists in the `WriteBatchWithIndex` and return the value if existing, by seeking and reading from an iterator of the write batch, before checking data in RocksDB. For example, here is the we implement it in MongoDB's RocksDB storage engine: [link](https://github.com/mongodb/mongo/blob/a31cc114a89a3645e97645805ba77db32c433dce/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp#L245-L260). If a range query comes, we pass a DB's iterator to `WriteBatchWithIndex`, which creates a super iterator which combines the results from the DB iterator with the batch's iterator. Using this super iterator, we can iterate the DB with the transaction's own writes. Here is the iterator creation codes in MongoDB's RocksDB storage engine: [link](https://github.com/mongodb/mongo/blob/a31cc114a89a3645e97645805ba77db32c433dce/src/mongo/db/storage/rocks/rocks_recovery_unit.cpp#L266-L269). In this way, the database can solve the read-your-own-write problem by using RocksDB to handle a transaction's uncommitted writes. Using `WriteBatchWithIndex`, we successfully implemented read-your-own-writes in the RocksDB storage engine of MongoDB. If you also have a read-your-own-write problem, `WriteBatchWithIndex` can help you implement it quickly and correctly. rocksdb-6.11.4/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown000066400000000000000000000016061370372246700267750ustar00rootroot00000000000000--- title: Integrating RocksDB with MongoDB layout: post author: icanadi category: blog redirect_from: - /blog/1967/integrating-rocksdb-with-mongodb-2/ --- Over the last couple of years, we have been busy integrating RocksDB with various services here at Facebook that needed to store key-value pairs locally. We have also seen other companies using RocksDB as local storage components of their distributed systems. The next big challenge for us is to bring RocksDB storage engine to general purpose databases. Today we have an exciting milestone to share with our community! We're running MongoDB with RocksDB in production and seeing great results! You can read more about it here: [http://blog.parse.com/announcements/mongodb-rocksdb-parse/](http://blog.parse.com/announcements/mongodb-rocksdb-parse/) Keep tuned for benchmarks and more stability and performance improvements. rocksdb-6.11.4/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown000066400000000000000000000007661370372246700240510ustar00rootroot00000000000000--- title: RocksDB in osquery layout: post author: icanadi category: lgalanis redirect_from: - /blog/1997/rocksdb-in-osquery/ --- Check out [this](https://code.facebook.com/posts/1411870269134471/how-rocksdb-is-used-in-osquery/) blog post by [Mike Arpaia](https://www.facebook.com/mike.arpaia) and [Ted Reed](https://www.facebook.com/treeded) about how osquery leverages RocksDB to build an embedded pub-sub system. This article is a great read and contains insights on how to properly use RocksDB. rocksdb-6.11.4/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown000066400000000000000000000057271370372246700242630ustar00rootroot00000000000000--- title: RocksDB 2015 H2 roadmap layout: post author: icanadi category: blog redirect_from: - /blog/2015/rocksdb-2015-h2-roadmap/ --- Every 6 months, RocksDB team gets together to prioritize the work ahead of us. We just went through this exercise and we wanted to share the results with the community. Here's what RocksDB team will be focusing on for the next 6 months: **MyRocks** As you might know, we're working hard to integrate RocksDB as a storage engine for MySQL. This project is pretty important for us because we're heavy users of MySQL. We're already getting pretty good performance results, but there is more work to be done. We need to focus on both performance and stability. The most high priority items on are list are: 1. Reduce CPU costs of RocksDB as a MySQL storage engine 2. Implement pessimistic concurrency control to support repeatable read isolation level in MyRocks 3. Reduce P99 read latency, which is high mostly because of lingering tombstones 4. Port ZSTD compression **MongoRocks** Another database that we're working on is MongoDB. The project of integrating MongoDB with RocksDB storage engine is called MongoRocks. It's already running in production at Parse [1] and we're seeing surprisingly few issues. Our plans for the next half: 1. Keep improving performance and stability, possibly reuse work done on MyRocks (workloads are pretty similar). 2. Increase internal and external adoption. 3. Support new MongoDB 3.2. **RocksDB on cheaper storage media** Up to now, our mission was to build the best key-value store “for fast storage†(flash and in-memory). However, there are some use-cases at Facebook that don't need expensive high-end storage. In the next six months, we plan to deploy RocksDB on cheaper storage media. We will optimize performance to RocksDB on either or both: 1. Hard drive storage array. 2. Tiered Storage. **Quality of Service** When talking to our customers, there are couple of issues that keep reoccurring. We need to fix them to make our customers happy. We will improve RocksDB to provide better assurance of performance and resource usage. Non-exhaustive list includes: 1. Iterate P99 can be high due to the presence of tombstones. 2. Write stalls can happen during high write loads. 3. Better control of memory and disk usage. 4. Service quality and performance of backup engine. **Operation's user experience** As we increase deployment of RocksDB, engineers are spending more time on debugging RocksDB issues. We plan to improve user experience when running RocksDB. The goal is to reduce TTD (time-to-debug). The work includes monitoring, visualizations and documentations. [1]( http://blog.parse.com/announcements/mongodb-rocksdb-parse/](http://blog.parse.com/announcements/mongodb-rocksdb-parse/) ### Comments **[Mike](allspace2012@outlook.com)** What’s the status of this roadmap? “RocksDB on cheaper storage mediaâ€, has this been implemented? rocksdb-6.11.4/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown000066400000000000000000000115011370372246700255750ustar00rootroot00000000000000--- title: Spatial indexing in RocksDB layout: post author: icanadi category: blog redirect_from: - /blog/2039/spatial-indexing-in-rocksdb/ --- About a year ago, there was a need to develop a spatial database at Facebook. We needed to store and index Earth's map data. Before building our own, we looked at the existing spatial databases. They were all very good technology, but also general purpose. We could sacrifice a general-purpose API, so we thought we could build a more performant database, since it would be specifically designed for our use-case. Furthermore, we decided to build the spatial database on top of RocksDB, because we have a lot of operational experience with running and tuning RocksDB at a large scale. When we started looking at this project, the first thing that surprised us was that our planet is not that big. Earth's entire map data can fit in memory on a reasonably high-end machine. Thus, we also decided to build a spatial database optimized for memory-resident dataset. The first use-case of our spatial database was an experimental map renderer. As part of our project, we successfully loaded [Open Street Maps](https://www.openstreetmap.org/) dataset and hooked it up with [Mapnik](http://mapnik.org/), a map rendering engine. The usual Mapnik workflow is to load the map data into a SQL-based database and then define map layers with SQL statements. To render a tile, Mapnik needs to execute a couple of SQL queries. The benefit of this approach is that you don't need to reload your database when you change your map style. You can just change your SQL query and Mapnik picks it up. In our model, we decided to precompute the features we need for each tile. We need to know the map style before we create the database. However, when rendering the map tile, we only fetch the features that we need to render. We haven't open sourced the RocksDB Mapnik plugin or the database loading pipeline. However, the spatial indexing is available in RocksDB under a name [SpatialDB](https://github.com/facebook/rocksdb/blob/master/include/rocksdb/utilities/spatial_db.h). The API is focused on map rendering use-case, but we hope that it can also be used for other spatial-based applications. Let's take a tour of the API. When you create a spatial database, you specify the spatial indexes that need to be built. Each spatial index is defined by a bounding box and granularity. For map rendering, we create a spatial index for each zoom levels. Higher zoom levels have more granularity. SpatialDB::Create( SpatialDBOptions(), "/data/map", { SpatialIndexOptions("zoom10", BoundingBox(0, 0, 100, 100), 10), SpatialIndexOptions("zoom16", BoundingBox(0, 0, 100, 100), 16) } ); When you insert a feature (building, street, country border) into SpatialDB, you need to specify the list of spatial indexes that will index the feature. In the loading phase we process the map style to determine the list of zoom levels on which we'll render the feature. For example, we will not render the building on zoom level that shows an entire country. Building will only be indexed on higher zoom level's index. Country borders will be indexes on all zoom levels. FeatureSet feature; feature.Set("type", "building"); feature.Set("height", 6); db->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), well_known_binary_blob, feature, {"zoom16"}); The indexing part is pretty simple. For each feature, we first find a list of index tiles that it intersects. Then, we add a link from the tile's [quad key](https://msdn.microsoft.com/en-us/library/bb259689.aspx) to the feature's primary key. Using quad keys improves data locality, i.e. features closer together geographically will have similar quad keys. Even though we're optimizing for a memory-resident dataset, data locality is still very important due to different caching effects. After you're done inserting all the features, you can call an API Compact() that will compact the dataset and speed up read queries. db->Compact(); SpatialDB's query specifies: 1) bounding box we're interested in, and 2) a zoom level. We find all tiles that intersect with the query's bounding box and return all features in those tiles. Cursor* c = db_->Query(ReadOptions(), BoundingBox(1, 1, 7, 7), "zoom16"); for (c->Valid(); c->Next()) { Render(c->blob(), c->feature_set()); } Note: `Render()` function is not part of RocksDB. You will need to use one of many open source map renderers, for example check out [Mapnik](http://mapnik.org/). TL;DR If you need an embedded spatial database, check out RocksDB's SpatialDB. [Let us know](https://www.facebook.com/groups/rocksdb.dev/) how we can make it better. If you're interested in learning more, check out this [talk](https://www.youtube.com/watch?v=T1jWsDMONM8). rocksdb-6.11.4/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown000066400000000000000000000020311370372246700307730ustar00rootroot00000000000000--- title: RocksDB is now available in Windows Platform layout: post author: dmitrism category: blog redirect_from: - /blog/2033/rocksdb-is-now-available-in-windows-platform/ --- Over the past 6 months we have seen a number of use cases where RocksDB is successfully used by the community and various companies to achieve high throughput and volume in a modern server environment. We at Microsoft Bing could not be left behind. As a result we are happy to [announce](http://bit.ly/1OmWBT9) the availability of the Windows Port created here at Microsoft which we intend to use as a storage option for one of our key/value data stores. We are happy to make this available for the community. Keep tuned for more announcements to come. ### Comments **[Siying Dong](siying.d@fb.com)** Appreciate your contributions to RocksDB project! I believe it will benefits many users! **[empresas sevilla](oxofkx@gmail.com)** Magnifico artículo|, un placer leer el blog **[jak usunac](tomogedac@o2.pl)** I believe it will benefits too rocksdb-6.11.4/docs/_posts/2015-07-23-dynamic-level.markdown000066400000000000000000000105161370372246700230370ustar00rootroot00000000000000--- title: Dynamic Level Size for Level-Based Compaction layout: post author: sdong category: blog redirect_from: - /blog/2207/dynamic-level/ --- In this article, we follow up on the first part of an answer to one of the questions in our [AMA](https://www.reddit.com/r/IAmA/comments/3de3cv/we_are_rocksdb_engineering_team_ask_us_anything/ct4a8tb), the dynamic level size in level-based compaction. Level-based compaction is the original LevelDB compaction style and one of the two major compaction styles in RocksDB (See [our wiki](https://github.com/facebook/rocksdb/wiki/RocksDB-Basics#multi-threaded-compactions)). In RocksDB we introduced parallelism and more configurable options to it but the main algorithm stayed the same, until we recently introduced the dynamic level size mode. In level-based compaction, we organize data to different sorted runs, called levels. Each level has a target size.  Usually target size of levels increases by the same size multiplier. For example, you can set target size of level 1 to be 1GB, and size multiplier to be 10, and the target size of level 1, 2, 3, 4 will be 1GB, 10GB, 100GB and 1000GB. Before level 1, there will be some staging file flushed from mem tables, called Level 0 files, which will later be merged to level 1. Compactions will be triggered as soon as actual size of a level exceeds its target size. We will merge a subset of data of that level to next level, to reduce size of the level. More compactions will be triggered until sizes of all the levels are lower than their target sizes. In a steady state, the size of each level will be around the same size of the size of level targets. Level-based compaction’s advantage is its good space efficiency. We usually use the metric space amplification to measure the space efficiency. In this article ignore the effects of data compression so space amplification= size_on_file_system / size_of_user_data. How do we estimate space amplification of level-based compaction? We focus specifically on the databases in steady state, which means database size is stable or grows slowly over time. This means updates will add roughly the same or little more data than what is removed by deletes. Given that, if we compact all the data all to the last level, the size of level will be equal as the size of last level before the compaction. On the other hand, the size of user data will be approximately the size of DB if we compact all the levels down to the last level. So the size of the last level will be a good estimation of user data size. So total size of the DB divided by the size of the last level will be a good estimation of space amplification. Applying the equation, if we have four non-zero levels, their sizes are 1GB, 10GB, 100GB, 1000GB, the size amplification will be approximately (1000GB + 100GB + 10GB + 1GB) / 1000GB = 1.111, which is a very good number. However, there is a catch here: how to make sure the last level’s size is 1000GB, the same as the level’s size target? A user has to fine tune level sizes to achieve this number and will need to re-tune if DB size changes. The theoretic number 1.11 is hard to achieve in practice. In a worse case, if you have the target size of last level to be 1000GB but the user data is only 200GB, then the actual space amplification will be (200GB + 100GB + 10GB + 1GB) / 200GB = 1.555, a much worse number. To solve this problem, my colleague Igor Kabiljo came up with a solution of dynamic level size target mode. You can enable it by setting options.level_compaction_dynamic_level_bytes=true. In this mode, size target of levels are changed dynamically based on size of the last level. Suppose the level size multiplier to be 10, and the DB size is 200GB. The target size of the last level is automatically set to be the actual size of the level, which is 200GB, the second to last level’s size target will be automatically set to be size_last_level / 10 = 20GB, the third last level’s will be size_last_level/100 = 2GB, and next level to be size_last_level/1000 = 200MB. We stop here because 200MB is within the range of the first level. In this way, we can achieve the 1.111 space amplification, without fine tuning of the level size targets. More details can be found in [code comments of the option](https://github.com/facebook/rocksdb/blob/v3.11/include/rocksdb/options.h#L366-L423) in the header file. rocksdb-6.11.4/docs/_posts/2015-10-27-getthreadlist.markdown000066400000000000000000000120421370372246700231430ustar00rootroot00000000000000--- title: GetThreadList layout: post author: yhciang category: blog redirect_from: - /blog/2261/getthreadlist/ --- We recently added a new API, called `GetThreadList()`, that exposes the RocksDB background thread activity. With this feature, developers will be able to obtain the real-time information about the currently running compactions and flushes such as the input / output size, elapsed time, the number of bytes it has written. Below is an example output of `GetThreadList`. To better illustrate the example, we have put a sample output of `GetThreadList` into a table where each column represents a thread status:
ThreadID 140716395198208 140716416169728
DB db1 db2
CF default picachu
ThreadType High Pri Low Pri
Operation Flush Compaction
ElapsedTime 143.459 ms 607.538 ms
Stage FlushJob::WriteLevel0Table CompactionJob::Install
OperationProperties BytesMemtables 4092938 BytesWritten 1050701 BaseInputLevel 1 BytesRead 4876417 BytesWritten 4140109 IsDeletion 0 IsManual 0 IsTrivialMove 0 JobID 146 OutputLevel 2 TotalInputBytes 4883044
In the above output, we can see `GetThreadList()` reports the activity of two threads: one thread running flush job (middle column) and the other thread running a compaction job (right-most column). In each thread status, it shows basic information about the thread such as thread id, it's target db / column family, and the job it is currently doing and the current status of the job. For instance, we can see thread 140716416169728 is doing compaction on the `picachu` column family in database `db2`. In addition, we can see the compaction has been running for 600 ms, and it has read 4876417 bytes out of 4883044 bytes. This indicates the compaction is about to complete. The stage property indicates which code block the thread is currently executing. For instance, thread 140716416169728 is currently running `CompactionJob::Install`, which further indicates the compaction job is almost done. Below we briefly describe its API. ## How to Enable it? To enable thread-tracking of a rocksdb instance, simply set `enable_thread_tracking` to true in its DBOptions: ```c++ // If true, then the status of the threads involved in this DB will // be tracked and available via GetThreadList() API. // // Default: false bool enable_thread_tracking; ``` ## The API The GetThreadList API is defined in [include/rocksdb/env.h](https://github.com/facebook/rocksdb/blob/master/include/rocksdb/env.h#L317-L318), which is an Env function: ```c++ virtual Status GetThreadList(std::vector* thread_list) ``` Since an Env can be shared across multiple rocksdb instances, the output of `GetThreadList()` include the background activity of all the rocksdb instances that using the same Env. The `GetThreadList()` API simply returns a vector of `ThreadStatus`, each describes the current status of a thread. The `ThreadStatus` structure, defined in [include/rocksdb/thread_status.h](https://github.com/facebook/rocksdb/blob/master/include/rocksdb/thread_status.h), contains the following information: ```c++ // An unique ID for the thread. const uint64_t thread_id; // The type of the thread, it could be HIGH_PRIORITY, // LOW_PRIORITY, and USER const ThreadType thread_type; // The name of the DB instance where the thread is currently // involved with. It would be set to empty string if the thread // does not involve in any DB operation. const std::string db_name; // The name of the column family where the thread is currently // It would be set to empty string if the thread does not involve // in any column family. const std::string cf_name; // The operation (high-level action) that the current thread is involved. const OperationType operation_type; // The elapsed time in micros of the current thread operation. const uint64_t op_elapsed_micros; // An integer showing the current stage where the thread is involved // in the current operation. const OperationStage operation_stage; // A list of properties that describe some details about the current // operation. Same field in op_properties[] might have different // meanings for different operations. uint64_t op_properties[kNumOperationProperties]; // The state (lower-level action) that the current thread is involved. const StateType state_type; ``` If you are interested in the background thread activity of your RocksDB application, please feel free to give `GetThreadList()` a try :) rocksdb-6.11.4/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown000066400000000000000000000040501370372246700301330ustar00rootroot00000000000000--- title: Use Checkpoints for Efficient Snapshots layout: post author: rven2 category: blog redirect_from: - /blog/2609/use-checkpoints-for-efficient-snapshots/ --- **Checkpoint** is a feature in RocksDB which provides the ability to take a snapshot of a running RocksDB database in a separate directory. Checkpoints can be used as a point in time snapshot, which can be opened Read-only to query rows as of the point in time or as a Writeable snapshot by opening it Read-Write. Checkpoints can be used for both full and incremental backups. The Checkpoint feature enables RocksDB to create a consistent snapshot of a given RocksDB database in the specified directory. If the snapshot is on the same filesystem as the original database, the SST files will be hard-linked, otherwise SST files will be copied. The manifest and CURRENT files will be copied. In addition, if there are multiple column families, log files will be copied for the period covering the start and end of the checkpoint, in order to provide a consistent snapshot across column families. A Checkpoint object needs to be created for a database before checkpoints are created. The API is as follows: `Status Create(DB* db, Checkpoint** checkpoint_ptr);` Given a checkpoint object and a directory, the CreateCheckpoint function creates a consistent snapshot of the database in the given directory. `Status CreateCheckpoint(const std::string& checkpoint_dir);` The directory should not already exist and will be created by this API. The directory will be an absolute path. The checkpoint can be used as a ​read-only copy of the DB or can be opened as a standalone DB. When opened read/write, the SST files continue to be hard links and these links are removed when the files are obsoleted. When the user is done with the snapshot, the user can delete the directory to remove the snapshot. Checkpoints are used for online backup in ​MyRocks. which is MySQL using RocksDB as the storage engine . ([MySQL on RocksDB](https://github.com/facebook/mysql-5.6)) ​ rocksdb-6.11.4/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown000066400000000000000000000300761370372246700271310ustar00rootroot00000000000000--- title: Analysis File Read Latency by Level layout: post author: sdong category: blog redirect_from: - /blog/2537/analysis-file-read-latency-by-level/ --- In many use cases of RocksDB, people rely on OS page cache for caching compressed data. With this approach, verifying effective of the OS page caching is challenging, because file system is a black box to users. As an example, a user can tune the DB as following: use level-based compaction, with L1 - L4 sizes to be 1GB, 10GB, 100GB and 1TB. And they reserve about 20GB memory as OS page cache, expecting level 0, 1 and 2 are mostly cached in memory, leaving only reads from level 3 and 4 requiring disk I/Os. However, in practice, it's not easy to verify whether OS page cache does exactly what we expect. For example, if we end up with doing 4 instead of 2 I/Os per query, it's not easy for users to figure out whether the it's because of efficiency of OS page cache or reading multiple blocks for a level. Analysis like it is especially important if users run RocksDB on hard drive disks, for the gap of latency between hard drives and memory is much higher than flash-based SSDs. In order to make tuning easier, we added new instrumentation to help users analysis latency distribution of file reads in different levels. If users turn DB statistics on, we always keep track of distribution of file read latency for each level. Users can retrieve the information by querying DB property “rocksdb.stats†( [https://github.com/facebook/rocksdb/blob/v3.13.1/include/rocksdb/db.h#L315-L316](https://github.com/facebook/rocksdb/blob/v3.13.1/include/rocksdb/db.h#L315-L316) ). It will also printed out as a part of compaction summary in info logs periodically. The output looks like this: ``` ** Level 0 read latency histogram (micros): Count: 696 Average: 489.8118 StdDev: 222.40 Min: 3.0000 Median: 452.3077 Max: 1896.0000 Percentiles: P50: 452.31 P75: 641.30 P99: 1068.00 P99.9: 1860.80 P99.99: 1896.00 ------------------------------------------------------ [ 2, 3 ) 1 0.144% 0.144% [ 18, 20 ) 1 0.144% 0.287% [ 45, 50 ) 5 0.718% 1.006% [ 50, 60 ) 26 3.736% 4.741% # [ 60, 70 ) 6 0.862% 5.603% [ 90, 100 ) 1 0.144% 5.747% [ 120, 140 ) 2 0.287% 6.034% [ 140, 160 ) 1 0.144% 6.178% [ 160, 180 ) 1 0.144% 6.322% [ 200, 250 ) 9 1.293% 7.615% [ 250, 300 ) 45 6.466% 14.080% # [ 300, 350 ) 88 12.644% 26.724% ### [ 350, 400 ) 88 12.644% 39.368% ### [ 400, 450 ) 71 10.201% 49.569% ## [ 450, 500 ) 65 9.339% 58.908% ## [ 500, 600 ) 74 10.632% 69.540% ## [ 600, 700 ) 92 13.218% 82.759% ### [ 700, 800 ) 64 9.195% 91.954% ## [ 800, 900 ) 35 5.029% 96.983% # [ 900, 1000 ) 12 1.724% 98.707% [ 1000, 1200 ) 6 0.862% 99.569% [ 1200, 1400 ) 2 0.287% 99.856% [ 1800, 2000 ) 1 0.144% 100.000% ** Level 1 read latency histogram (micros): (......not pasted.....) ** Level 2 read latency histogram (micros): (......not pasted.....) ** Level 3 read latency histogram (micros): (......not pasted.....) ** Level 4 read latency histogram (micros): (......not pasted.....) ** Level 5 read latency histogram (micros): Count: 25583746 Average: 421.1326 StdDev: 385.11 Min: 1.0000 Median: 376.0011 Max: 202444.0000 Percentiles: P50: 376.00 P75: 438.00 P99: 1421.68 P99.9: 4164.43 P99.99: 9056.52 ------------------------------------------------------ [ 0, 1 ) 2351 0.009% 0.009% [ 1, 2 ) 6077 0.024% 0.033% [ 2, 3 ) 8471 0.033% 0.066% [ 3, 4 ) 788 0.003% 0.069% [ 4, 5 ) 393 0.002% 0.071% [ 5, 6 ) 786 0.003% 0.074% [ 6, 7 ) 1709 0.007% 0.080% [ 7, 8 ) 1769 0.007% 0.087% [ 8, 9 ) 1573 0.006% 0.093% [ 9, 10 ) 1495 0.006% 0.099% [ 10, 12 ) 3043 0.012% 0.111% [ 12, 14 ) 2259 0.009% 0.120% [ 14, 16 ) 1233 0.005% 0.125% [ 16, 18 ) 762 0.003% 0.128% [ 18, 20 ) 451 0.002% 0.130% [ 20, 25 ) 794 0.003% 0.133% [ 25, 30 ) 1279 0.005% 0.138% [ 30, 35 ) 1172 0.005% 0.142% [ 35, 40 ) 1363 0.005% 0.148% [ 40, 45 ) 409 0.002% 0.149% [ 45, 50 ) 105 0.000% 0.150% [ 50, 60 ) 80 0.000% 0.150% [ 60, 70 ) 280 0.001% 0.151% [ 70, 80 ) 1583 0.006% 0.157% [ 80, 90 ) 4245 0.017% 0.174% [ 90, 100 ) 6572 0.026% 0.200% [ 100, 120 ) 9724 0.038% 0.238% [ 120, 140 ) 3713 0.015% 0.252% [ 140, 160 ) 2383 0.009% 0.261% [ 160, 180 ) 18344 0.072% 0.333% [ 180, 200 ) 51873 0.203% 0.536% [ 200, 250 ) 631722 2.469% 3.005% [ 250, 300 ) 2721970 10.639% 13.644% ## [ 300, 350 ) 5909249 23.098% 36.742% ##### [ 350, 400 ) 6522507 25.495% 62.237% ##### [ 400, 450 ) 4296332 16.793% 79.030% ### [ 450, 500 ) 2130323 8.327% 87.357% ## [ 500, 600 ) 1553208 6.071% 93.428% # [ 600, 700 ) 642129 2.510% 95.938% # [ 700, 800 ) 372428 1.456% 97.394% [ 800, 900 ) 187561 0.733% 98.127% [ 900, 1000 ) 85858 0.336% 98.462% [ 1000, 1200 ) 82730 0.323% 98.786% [ 1200, 1400 ) 50691 0.198% 98.984% [ 1400, 1600 ) 38026 0.149% 99.133% [ 1600, 1800 ) 32991 0.129% 99.261% [ 1800, 2000 ) 30200 0.118% 99.380% [ 2000, 2500 ) 62195 0.243% 99.623% [ 2500, 3000 ) 36684 0.143% 99.766% [ 3000, 3500 ) 21317 0.083% 99.849% [ 3500, 4000 ) 10216 0.040% 99.889% [ 4000, 4500 ) 8351 0.033% 99.922% [ 4500, 5000 ) 4152 0.016% 99.938% [ 5000, 6000 ) 6328 0.025% 99.963% [ 6000, 7000 ) 3253 0.013% 99.976% [ 7000, 8000 ) 2082 0.008% 99.984% [ 8000, 9000 ) 1546 0.006% 99.990% [ 9000, 10000 ) 1055 0.004% 99.994% [ 10000, 12000 ) 1566 0.006% 100.000% [ 12000, 14000 ) 761 0.003% 100.003% [ 14000, 16000 ) 462 0.002% 100.005% [ 16000, 18000 ) 226 0.001% 100.006% [ 18000, 20000 ) 126 0.000% 100.006% [ 20000, 25000 ) 107 0.000% 100.007% [ 25000, 30000 ) 43 0.000% 100.007% [ 30000, 35000 ) 15 0.000% 100.007% [ 35000, 40000 ) 14 0.000% 100.007% [ 40000, 45000 ) 16 0.000% 100.007% [ 45000, 50000 ) 1 0.000% 100.007% [ 50000, 60000 ) 22 0.000% 100.007% [ 60000, 70000 ) 10 0.000% 100.007% [ 70000, 80000 ) 5 0.000% 100.007% [ 80000, 90000 ) 14 0.000% 100.007% [ 90000, 100000 ) 11 0.000% 100.007% [ 100000, 120000 ) 33 0.000% 100.007% [ 120000, 140000 ) 6 0.000% 100.007% [ 140000, 160000 ) 3 0.000% 100.007% [ 160000, 180000 ) 7 0.000% 100.007% [ 200000, 250000 ) 2 0.000% 100.007% ``` In this example, you can see we only issued 696 reads from level 0 while issued 25 million reads from level 5. The latency distribution is also clearly shown among those reads. This will be helpful for users to analysis OS page cache efficiency. Currently the read latency per level includes reads from data blocks, index blocks, as well as bloom filter blocks. We are also working on a feature to break down those three type of blocks. ### Comments **[Tao Feng](fengtao04@gmail.com)** Is this feature also included in RocksJava? **[Siying Dong](siying.d@fb.com)** Should be. As long as you enable statistics, you should be able to get the value from `RocksDB.getProperty()` with property `rocksdb.dbstats`. Let me know if you can’t find it. **[chiddu](cnbscience@gmail.com)** > In this example, you can see we only issued 696 reads from level 0 while issued 256K reads from level 5. Isn’t it 2.5 M of reads instead of 256K ? . Also could anyone please provide more description on the histogram ? especially > Count: 25583746 Average: 421.1326 StdDev: 385.11 > Min: 1.0000 Median: 376.0011 Max: 202444.0000 > Percentiles: P50: 376.00 P75: 438.00 P99: 1421.68 P99.9: 4164.43 P99.99: 9056.52 and > [ 0, 1 ) 2351 0.009% 0.009% > [ 1, 2 ) 6077 0.024% 0.033% > [ 2, 3 ) 8471 0.033% 0.066% > [ 3, 4 ) 788 0.003% 0.069%†thanks in advance **[Siying Dong](siying.d@fb.com)** Thank you for pointing out the mistake. I fixed it now. In this output, there are 2.5 million samples, average latency is 421 micro seconds, with standard deviation 385. Median is 376, max value is 202 milliseconds. 0.009% has value of 1, 0.024% has value of 1, 0.033% has value of 2. Accumulated value from 0 to 2 is 0.066%. Hope it helps. **[chiddu](cnbscience@gmail.com)** Thank you Siying for the quick reply, I was running couple of benchmark testing to check the performance of rocksdb on SSD. One of the test is similar to what is mentioned in the wiki, TEST 4 : Random read , except the key_size is 10 and value_size is 20. I am inserting 1 billion hashes and reading 1 billion hashes with 32 threads. The histogram shows something like this ``` Level 5 read latency histogram (micros): Count: 7133903059 Average: 480.4357 StdDev: 309.18 Min: 0.0000 Median: 551.1491 Max: 224142.0000 Percentiles: P50: 551.15 P75: 651.44 P99: 996.52 P99.9: 2073.07 P99.99: 3196.32 —————————————————— [ 0, 1 ) 28587385 0.401% 0.401% [ 1, 2 ) 686572516 9.624% 10.025% ## [ 2, 3 ) 567317522 7.952% 17.977% ## [ 3, 4 ) 44979472 0.631% 18.608% [ 4, 5 ) 50379685 0.706% 19.314% [ 5, 6 ) 64930061 0.910% 20.224% [ 6, 7 ) 22613561 0.317% 20.541% …………more…………. ``` If I understand your previous comment correctly, 1. How is it that the count is around 7 billion when I have only inserted 1 billion hashes ? is the stat broken ? 1. What does the percentiles and the numbers signify ? 1. 0, 1 ) 28587385 0.401% 0.401% what does this “28587385†stand for in the histogram row ? **[Siying Dong](siying.d@fb.com)** If I remember correctly, with db_bench, if you specify –num=1000000000 –threads=32, it is every thread reading one billion keys, total of 32 billions. Is it the case you ran into? 28,587,385 means that number of data points take the value [0,1) 28,587,385 / 7,133,903,058 = 0.401% provides percentage. **[chiddu](cnbscience@gmail.com)** I do have `num=1000000000` and `t=32`. The script says reading 1 billion hashes and not 32 billion hashes. this is the script on which I have used ``` echo “Load 1B keys sequentially into database…..†bpl=10485760;overlap=10;mcz=2;del=300000000;levels=6;ctrig=4; delay=8; stop=12; wbn=3; mbc=20; mb=67108864;wbs=134217728; dds=1; sync=0; r=1000000000; t=1; vs=20; bs=4096; cs=1048576; of=500000; si=1000000; ./db_bench –benchmarks=fillseq –disable_seek_compaction=1 –mmap_read=0 –statistics=1 –histogram=1 –num=$r –threads=$t –value_size=$vs –block_size=$bs –cache_size=$cs –bloom_bits=10 –cache_numshardbits=6 –open_files=$of –verify_checksum=1 –db=/data/mysql/leveldb/test –sync=$sync –disable_wal=1 –compression_type=none –stats_interval=$si –compression_ratio=0.5 –disable_data_sync=$dds –write_buffer_size=$wbs –target_file_size_base=$mb –max_write_buffer_number=$wbn –max_background_compactions=$mbc –level0_file_num_compaction_trigger=$ctrig –level0_slowdown_writes_trigger=$delay –level0_stop_writes_trigger=$stop –num_levels=$levels –delete_obsolete_files_period_micros=$del –min_level_to_compress=$mcz –max_grandparent_overlap_factor=$overlap –stats_per_interval=1 –max_bytes_for_level_base=$bpl –use_existing_db=0 –key_size=10 echo “Reading 1B keys in database in random order….†bpl=10485760;overlap=10;mcz=2;del=300000000;levels=6;ctrig=4; delay=8; stop=12; wbn=3; mbc=20; mb=67108864;wbs=134217728; dds=0; sync=0; r=1000000000; t=32; vs=20; bs=4096; cs=1048576; of=500000; si=1000000; ./db_bench –benchmarks=readrandom –disable_seek_compaction=1 –mmap_read=0 –statistics=1 –histogram=1 –num=$r –threads=$t –value_size=$vs –block_size=$bs –cache_size=$cs –bloom_bits=10 –cache_numshardbits=6 –open_files=$of –verify_checksum=1 –db=/some_data_base –sync=$sync –disable_wal=1 –compression_type=none –stats_interval=$si –compression_ratio=0.5 –disable_data_sync=$dds –write_buffer_size=$wbs –target_file_size_base=$mb –max_write_buffer_number=$wbn –max_background_compactions=$mbc –level0_file_num_compaction_trigger=$ctrig –level0_slowdown_writes_trigger=$delay –level0_stop_writes_trigger=$stop –num_levels=$levels –delete_obsolete_files_period_micros=$del –min_level_to_compress=$mcz –max_grandparent_overlap_factor=$overlap –stats_per_interval=1 –max_bytes_for_level_base=$bpl –use_existing_db=1 –key_size=10 ``` After running this script, there were no issues wrt to loading billion hashes , but when it came to reading part, its been almost 4 days and still I have only read 7 billion hashes and have read 200 million hashes in 2 and half days. Is there something which is missing in db_bench or something which I am missing ? **[Siying Dong](siying.d@fb.com)** It’s a printing error then. If you have `num=1000000000` and `t=32`, it will be 32 threads, and each reads 1 billion keys. rocksdb-6.11.4/docs/_posts/2016-01-29-compaction_pri.markdown000066400000000000000000000156201370372246700233160ustar00rootroot00000000000000--- title: Option of Compaction Priority layout: post author: sdong category: blog redirect_from: - /blog/2921/compaction_pri/ --- The most popular compaction style of RocksDB is level-based compaction, which is an improved version of LevelDB's compaction algorithm. Page 9- 16 of this [slides](https://github.com/facebook/rocksdb/blob/gh-pages/talks/2015-09-29-HPTS-Siying-RocksDB.pdf) gives an illustrated introduction of this compaction style. The basic idea that: data is organized by multiple levels with exponential increasing target size. Except a special level 0, every level is key-range partitioned into many files. When size of a level exceeds its target size, we pick one or more of its files, and merge the file into the next level. Which file to pick to compact is an interesting question. LevelDB only uses one thread for compaction and it always picks files in round robin manner. We implemented multi-thread compaction in RocksDB by picking multiple files from the same level and compact them in parallel. We had to move away from LevelDB's file picking approach. Recently, we created an option [options.compaction_pri](https://github.com/facebook/rocksdb/blob/d6c838f1e130d8860407bc771fa6d4ac238859ba/include/rocksdb/options.h#L83-L93), which indicated three different algorithms to pick files to compact. Why do we need to multiple algorithms to choose from? Because there are different factors to consider when picking the files, and we now don't yet know how to balance them automatically, so we expose it to users to choose. Here are factors to consider: **Write amplification** When we estimate write amplification, we usually simplify the problem by assuming keys are uniformly distributed inside each level. In reality, it is not the case, even if user updates are uniformly distributed across the whole key range. For instance, when we compact one file of a level to the next level, it creates a hole. Over time, incoming compaction will fill data to the hole, but the density will still be lower for a while. Picking a file with keys least densely populated is more expensive to get the file to the next level, because there will be more overlapping files in the next level so we need to rewrite more data. For example, assume a file is 100MB, if an L2 file overlaps with 8 L3 files, we need to rewrite about 800MB of data to get the file to L3. If the file overlaps with 12 L3 files, we'll need to rewrite about 1200MB to get a file of the same size out of L2. It uses 50% more writes. (This analysis ignores the key density of the next level, because the range covers N times of files in that level so one hole only impacts write amplification by 1/N) If all the updates are uniformly distributed, LevelDB's approach optimizes write amplification, because a file being picked covers a range whose last compaction time to the next level is the oldest, so the range will accumulated keys from incoming compactions for the longest and the density is the highest. We created a compaction priority **kOldestSmallestSeqFirst** for the same effect. With this mode, we always pick the file covers the oldest updates in the level, which usually is contains the densest key range. If you have a use case where writes are uniformly distributed across the key space and you want to reduce write amplification, you should set options.compaction_pri=kOldestSmallestSeqFirst. **Optimize for small working set** We are assuming updates are uniformly distributed across the whole key space in previous analysis. However, in many use cases, there are subset of keys that are frequently updated while other key ranges are very cold. In this case, keeping hot key ranges from compacting to deeper levels will benefit write amplification, as well as space amplification. For example, if in a DB only key 150-160 are updated and other keys are seldom updated. If level 1 contains 20 keys, we want to keep 150-160 all stay in level 1. Because when next level 0 -> 1 compaction comes, it will simply overwrite existing keys so size level 1 doesn't increase, so no need to schedule further compaction for level 1->2. On the other hand, if we compact key 150-155 to level2, when a new Level 1->2 compaction comes, it increases the size of level 1, making size of level 1 exceed target size and more compactions will be needed, which generates more writes. The compaction priority **kOldestLargestSeqFirst** optimizes this use case. In this mode, we will pick a file whose latest update is the oldest. It means there is no incoming data for the range for the longest. Usually it is the coldest range. By compacting coldest range first, we leave the hot ranges in the level. If your use case is to overwrite existing keys in a small range, try options.compaction_pri=kOldestLargestSeqFirst**.** **Drop delete marker sooner** If one file contains a lot of delete markers, it may slow down iterating over this area, because we still need to iterate those deleted keys just to ignore them. Furthermore, the sooner we compact delete keys into the last level, the sooner the disk space is reclaimed, so it is good for space efficiency. Our default compaction priority **kByCompensatedSize** considers the case. If number of deletes in a file exceeds number of inserts, it is more likely to be picked for compaction. The more number of deletes exceed inserts, the more likely it is being compacted. The optimization is added to avoid the worst performance of space efficiency and query performance when a large percentage of the DB is deleted. **Efficiency of compaction filter** Usually people use [compaction filters](https://github.com/facebook/rocksdb/blob/v4.1/include/rocksdb/options.h#L201-L226) to clean up old data to free up space. Picking files to compact may impact space efficiency. We don't yet have a a compaction priority to optimize this case. In some of our use cases, we solved the problem in a different way: we have an external service checking modify time of all SST files. If any of the files is too old, we force the single file to compaction by calling DB::CompactFiles() using the single file. In this way, we can provide a time bound of data passing through compaction filters. In all, there three choices of compaction priority modes optimizing different scenarios. if you have a new use case, we suggest you start with `options.compaction_pri=kOldestSmallestSeqFirst` (note it is not the default one for backward compatible reason). If you want to further optimize your use case, you can try other two use cases if your use cases apply. If you have good ideas about better compaction picker approach, you are welcome to implement and benchmark it. We'll be glad to review and merge your a pull requests. ### Comments **[Mark Callaghan](mdcallag@gmail.com)** Performance results for compaction_pri values and linkbench are explained at [http://smalldatum.blogspot.com/2016/02/compaction-priority-in-rocksdb.html](http://smalldatum.blogspot.com/2016/02/compaction-priority-in-rocksdb.html) rocksdb-6.11.4/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown000066400000000000000000000024751370372246700236550ustar00rootroot00000000000000--- title: RocksDB 4.2 Release! layout: post author: sdong category: blog redirect_from: - /blog/3017/rocksdb-4-2-release/ --- New RocksDB release - 4.2! **New Features** 1. Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions. 2. Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families. 3. Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances. **Public API changes** 1. CompactionFilter::Context includes information of Column Family ID 2. The need-compaction hint given by TablePropertiesCollector::NeedCompact() will be persistent and recoverable after DB recovery. This introduces a breaking format change. If you use this experimental feature, including NewCompactOnDeletionCollectorFactory() in the new version, you may not be able to directly downgrade the DB back to version 4.0 or lower. 3. TablePropertiesCollectorFactory::CreateTablePropertiesCollector() now takes an option Context, containing the information of column family ID for the file being written. 4. Remove DefaultCompactionFilterFactory. [https://github.com/facebook/rocksdb/releases/tag/v4.2](https://github.com/facebook/rocksdb/releases/tag/v4.2) rocksdb-6.11.4/docs/_posts/2016-02-25-rocksdb-ama.markdown000066400000000000000000000013331370372246700224640ustar00rootroot00000000000000--- title: RocksDB AMA layout: post author: yhchiang category: blog redirect_from: - /blog/3065/rocksdb-ama/ --- RocksDB developers are doing a Reddit Ask-Me-Anything now at 10AM – 11AM PDT! We welcome you to stop by and ask any RocksDB related questions, including existing / upcoming features, tuning tips, or database design. Here are some enhancements that we'd like to focus on over the next six months: * 2-Phase Commit * Lua support in some custom functions * Backup and repair tools * Direct I/O to bypass OS cache * RocksDB Java API [https://www.reddit.com/r/IAmA/comments/47k1si/we_are_rocksdb_developers_ask_us_anything/](https://www.reddit.com/r/IAmA/comments/47k1si/we_are_rocksdb_developers_ask_us_anything/) rocksdb-6.11.4/docs/_posts/2016-03-07-rocksdb-options-file.markdown000066400000000000000000000022661370372246700243450ustar00rootroot00000000000000--- title: RocksDB Options File layout: post author: yhciang category: blog redirect_from: - /blog/3089/rocksdb-options-file/ --- In RocksDB 4.3, we added a new set of features that makes managing RocksDB options easier. Specifically: * **Persisting Options Automatically**: Each RocksDB database will now automatically persist its current set of options into an INI file on every successful call of DB::Open(), SetOptions(), and CreateColumnFamily() / DropColumnFamily(). * **Load Options from File**: We added [LoadLatestOptions() / LoadOptionsFromFile()](https://github.com/facebook/rocksdb/blob/4.3.fb/include/rocksdb/utilities/options_util.h#L48-L58) that enables developers to construct RocksDB options object from an options file. * **Sanity Check Options**: We added [CheckOptionsCompatibility](https://github.com/facebook/rocksdb/blob/4.3.fb/include/rocksdb/utilities/options_util.h#L64-L77) that performs compatibility check on two sets of RocksDB options. Want to know more about how to use this new features? Check out the [RocksDB Options File wiki page](https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File) and start using this new feature today! rocksdb-6.11.4/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown000066400000000000000000000055031370372246700241610ustar00rootroot00000000000000--- title: RocksDB 4.5.1 Released! layout: post author: sdong category: blog redirect_from: - /blog/3179/rocksdb-4-5-1-released/ --- ## 4.5.1 (3/25/2016) ### Bug Fixes *  Fix failures caused by the destorying order of singleton objects.
## 4.5.0 (2/5/2016) ### Public API Changes * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes. * Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. * DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead ### New Features * ldb tool now supports operations to non-default column families. * Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true. * Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate.
## 4.4.0 (1/14/2016) ### Public API Changes * Change names in CompactionPri and add a new one. * Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit. * If options.max_write_buffer_number > 3, writes will be slowed down when writing to the last write buffer to delay a full stop. * Introduce CompactionJobInfo::compaction_reason, this field include the reason to trigger the compaction. * After slow down is triggered, if estimated pending compaction bytes keep increasing, slowdown more. * Increase default options.delayed_write_rate to 2MB/s. * Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb.
## 4.3.0 (12/8/2015) ### New Features * CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key. * RocksDB will now persist options under the same directory as the RocksDB database on successful DB::Open, CreateColumnFamily, DropColumnFamily, and SetOptions. * Introduce LoadLatestOptions() in rocksdb/utilities/options_util.h. This function can construct the latest DBOptions / ColumnFamilyOptions used by the specified RocksDB intance. * Introduce CheckOptionsCompatibility() in rocksdb/utilities/options_util.h. This function checks whether the input set of options is able to open the specified DB successfully. ### Public API Changes * When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. rocksdb-6.11.4/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown000066400000000000000000000067771370372246700240470ustar00rootroot00000000000000--- title: RocksDB 4.8 Released! layout: post author: yiwu category: blog redirect_from: - /blog/3239/rocksdb-4-8-released/ --- ## 4.8.0 (5/2/2016) ### [](https://github.com/facebook/rocksdb/blob/master/HISTORY.md#public-api-change-1)Public API Change * Allow preset compression dictionary for improved compression of block-based tables. This is supported for zlib, zstd, and lz4. The compression dictionary's size is configurable via CompressionOptions::max_dict_bytes. * Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see [https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F](https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F) * Expose estimate of per-level compression ratio via DB property: "rocksdb.compression-ratio-at-levelN". * Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status. ### [](https://github.com/facebook/rocksdb/blob/master/HISTORY.md#new-features-2)New Features * Add ReadOptions::readahead_size. If non-zero, NewIterator will create a new table reader which performs reads of the given size.
## [](https://github.com/facebook/rocksdb/blob/master/HISTORY.md#470-482016)4.7.0 (4/8/2016) ### [](https://github.com/facebook/rocksdb/blob/master/HISTORY.md#public-api-change-2)Public API Change * rename options compaction_measure_io_stats to report_bg_io_stats and include flush too. * Change some default options. Now default options will optimize for server-workloads. Also enable slowdown and full stop triggers for pending compaction bytes. These changes may cause sub-optimal performance or significant increase of resource usage. To avoid these risks, users can open existing RocksDB with options extracted from RocksDB option files. See [https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File](https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File) for how to use RocksDB option files. Or you can call Options.OldDefaults() to recover old defaults. DEFAULT_OPTIONS_HISTORY.md will track change history of default options.
## [](https://github.com/facebook/rocksdb/blob/master/HISTORY.md#460-3102016)4.6.0 (3/10/2016) ### [](https://github.com/facebook/rocksdb/blob/master/HISTORY.md#public-api-changes-1)Public API Changes * Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier * Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signature of Cache::Insert() is updated accordingly. * Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted. * Add monotonically increasing counter (DB property "rocksdb.current-super-version-number") that increments upon any change to the LSM tree. ### [](https://github.com/facebook/rocksdb/blob/master/HISTORY.md#new-features-3)New Features * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" rocksdb-6.11.4/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown000066400000000000000000000042751370372246700242530ustar00rootroot00000000000000--- title: RocksDB 4.11.2 Released! layout: post author: sdong category: blog --- We abandoned release candidates 4.10.x and directly go to 4.11.2 from 4.9, to make sure the latest release is stable. In 4.11.2, we fixed several data corruption related bugs introduced in 4.9.0. ## 4.11.2 (9/15/2016) ### Bug fixes * Segfault when failing to open an SST file for read-ahead iterators. * WAL without data for all CFs is not deleted after recovery. ## 4.11.1 (8/30/2016) ### Bug Fixes * Mitigate the regression bug of deadlock condition during recovery when options.max_successive_merges hits. * Fix data race condition related to hash index in block based table when putting indexes in the block cache. ## 4.11.0 (8/1/2016) ### Public API Change * options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter. ### New Features * A tool to migrate DB after options change. See include/rocksdb/utilities/option_change_migration.h. * Add ReadOptions.background_purge_on_iterator_cleanup. If true, we avoid file deletion when destorying iterators. ## 4.10.0 (7/5/2016) ### Public API Change * options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes * enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one. * Deprecate options.filter_deletes. ### New Features * Add avoid_flush_during_recovery option. * Add a read option background_purge_on_iterator_cleanup to avoid deleting files in foreground when destroying iterators. Instead, a job is scheduled in high priority queue and would be executed in a separate background thread. * RepairDB support for column families. RepairDB now associates data with non-default column families using information embedded in the SST/WAL files (4.7 or later). For data written by 4.6 or earlier, RepairDB associates it with the default column family. * Add options.write_buffer_manager which allows users to control total memtable sizes across multiple DB instances. rocksdb-6.11.4/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown000066400000000000000000000040211370372246700241430ustar00rootroot00000000000000--- title: RocksDB 5.0.1 Released! layout: post author: yiwu category: blog --- ### Public API Change * Options::max_bytes_for_level_multiplier is now a double along with all getters and setters. * Support dynamically change `delayed_write_rate` and `max_total_wal_size` options via SetDBOptions(). * Introduce DB::DeleteRange for optimized deletion of large ranges of contiguous keys. * Support dynamically change `delayed_write_rate` option via SetDBOptions(). * Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default. * Remove Tickers::SEQUENCE_NUMBER to avoid confusion if statistics object is shared among RocksDB instance. Alternatively DB::GetLatestSequenceNumber() can be used to get the same value. * Options.level0_stop_writes_trigger default value changes from 24 to 32. * New compaction filter API: CompactionFilter::FilterV2(). Allows to drop ranges of keys. * Removed flashcache support. * DB::AddFile() is deprecated and is replaced with DB::IngestExternalFile(). DB::IngestExternalFile() remove all the restrictions that existed for DB::AddFile. ### New Features * Add avoid_flush_during_shutdown option, which speeds up DB shutdown by not flushing unpersisted data (i.e. with disableWAL = true). Unpersisted data will be lost. The options is dynamically changeable via SetDBOptions(). * Add memtable_insert_with_hint_prefix_extractor option. The option is mean to reduce CPU usage for inserting keys into memtable, if keys can be group by prefix and insert for each prefix are sequential or almost sequential. See include/rocksdb/options.h for more details. * Add LuaCompactionFilter in utilities. This allows developers to write compaction filters in Lua. To use this feature, LUA_PATH needs to be set to the root directory of Lua. * No longer populate "LATEST_BACKUP" file in backup directory, which formerly contained the number of the latest backup. The latest backup can be determined by finding the highest numbered file in the "meta/" subdirectory. rocksdb-6.11.4/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown000066400000000000000000000012261370372246700241530ustar00rootroot00000000000000--- title: RocksDB 5.1.2 Released! layout: post author: maysamyabandeh category: blog --- ### Public API Change * Support dynamically change `delete_obsolete_files_period_micros` option via SetDBOptions(). * Added EventListener::OnExternalFileIngested which will be called when IngestExternalFile() add a file successfully. * BackupEngine::Open and BackupEngineReadOnly::Open now always return error statuses matching those of the backup Env. ### Bug Fixes * Fix the bug that if 2PC is enabled, checkpoints may loss some recent transactions. * When file copying is needed when creating checkpoints or bulk loading files, fsync the file after the file copying. rocksdb-6.11.4/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown000066400000000000000000000044111370372246700247370ustar00rootroot00000000000000--- title: Bulkloading by ingesting external SST files layout: post author: IslamAbdelRahman category: blog --- ## Introduction One of the basic operations of RocksDB is writing to RocksDB, Writes happen when user call (DB::Put, DB::Write, DB::Delete ... ), but what happens when you write to RocksDB ? .. this is a brief description of what happens. - User insert a new key/value by calling DB::Put() (or DB::Write()) - We create a new entry for the new key/value in our in-memory structure (memtable / SkipList by default) and we assign it a new sequence number. - When the memtable exceeds a specific size (64 MB for example), we convert this memtable to a SST file, and put this file in level 0 of our LSM-Tree - Later, compaction will kick in and move data from level 0 to level 1, and then from level 1 to level 2 .. and so on But what if we can skip these steps and add data to the lowest possible level directly ? This is what bulk-loading does ## Bulkloading - Write all of our keys and values into SST file outside of the DB - Add the SST file into the LSM directly This is bulk-loading, and in specific use-cases it allow users to achieve faster data loading and better write-amplification. and doing it is as simple as ```cpp Options options; SstFileWriter sst_file_writer(EnvOptions(), options, options.comparator); Status s = sst_file_writer.Open(file_path); assert(s.ok()); // Insert rows into the SST file, note that inserted keys must be // strictly increasing (based on options.comparator) for (...) { s = sst_file_writer.Add(key, value); assert(s.ok()); } // Ingest the external SST file into the DB s = db_->IngestExternalFile({"/home/usr/file1.sst"}, IngestExternalFileOptions()); assert(s.ok()); ``` You can find more details about how to generate SST files and ingesting them into RocksDB in this [wiki page](https://github.com/facebook/rocksdb/wiki/Creating-and-Ingesting-SST-files) ## Use cases There are multiple use cases where bulkloading could be useful, for example - Generating SST files in offline jobs in Hadoop, then downloading and ingesting the SST files into RocksDB - Migrating shards between machines by dumping key-range in SST File and loading the file in a different machine - Migrating from a different storage (InnoDB to RocksDB migration in MyRocks) rocksdb-6.11.4/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown000066400000000000000000000024211370372246700241450ustar00rootroot00000000000000--- title: RocksDB 5.2.1 Released! layout: post author: sdong category: blog --- ### Public API Change * NewLRUCache() will determine number of shard bits automatically based on capacity, if the user doesn't pass one. This also impacts the default block cache when the user doesn't explict provide one. * Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files. ### New Features * Added new overloaded function GetApproximateSizes that allows to specify if memtable stats should be computed only without computing SST files' stats approximations. * Added new function GetApproximateMemTableStats that approximates both number of records and size of memtables. * (Experimental) Two-level indexing that partition the index and creates a 2nd level index on the partitions. The feature can be enabled by setting kTwoLevelIndexSearch as IndexType and configuring index_per_partition. ### Bug Fixes * RangeSync() should work if ROCKSDB_FALLOCATE_PRESENT is not set * Fix wrong results in a data race case in Get() * Some fixes related to 2PC. * Fix several bugs in Direct I/O supports. * Fix a regression bug which can cause Seek() to miss some keys if the return key has been updated many times after the snapshot which is used by the iterator. rocksdb-6.11.4/docs/_posts/2017-05-12-partitioned-index-filter.markdown000066400000000000000000000071111370372246700252130ustar00rootroot00000000000000--- title: Partitioned Index/Filters layout: post author: maysamyabandeh category: blog --- As DB/mem ratio gets larger, the memory footprint of filter/index blocks becomes non-trivial. Although `cache_index_and_filter_blocks` allows storing only a subset of them in block cache, their relatively large size negatively affects the performance by i) occupying the block cache space that could otherwise be used for caching data, ii) increasing the load on the disk storage by loading them into the cache after a miss. Here we illustrate these problems in more detail and explain how partitioning index/filters alleviates the overhead. ### How large are the index/filter blocks? RocksDB has by default one index/filter block per SST file. The size of the index/filter varies based on the configuration but for a SST of size 256MB the index/filter block of size 0.5/5MB is typical, which is much larger than the typical data block size of 4-32KB. That is fine when all index/filters fit perfectly into memory and hence are read once per SST lifetime, not so much when they compete with data blocks for the block cache space and are also likely to be re-read many times from the disk. ### What is the big deal with large index/filter blocks? When index/filter blocks are stored in block cache they are effectively competing with data blocks (as well as with each other) on this scarce resource. A filter of size 5MB is occupying the space that could otherwise be used to cache 1000s of data blocks (of size 4KB). This would result in more cache misses for data blocks. The large index/filters also kick each other out of the block cache more often and exacerbate their own cache miss rate too. This is while only a small part of the index/filter block might have been actually used during its lifetime in the cache. After the cache miss of an index/filter, it has to be reloaded from the disk, and its large size is not helping in reducing the IO cost. While a simple point lookup might need at most a couple of data block reads (of size 4KB) one from each layer of LSM, it might end up also loading multiple megabytes of index/filter blocks. If that happens often then the disk is spending more time serving index/filters rather than the actual data blocks. ## What is partitioned index/filters? With partitioning, the index/filter of a SST file is partitioned into smaller blocks with an additional top-level index on them. When reading an index/filter, only top-level index is loaded into memory. The partitioned index/filter then uses the top-level index to load on demand into the block cache the partitions that are required to perform the index/filter query. The top-level index, which has much smaller memory footprint, can be stored in heap or block cache depending on the `cache_index_and_filter_blocks` setting. ### Success stories #### HDD, 100TB DB In this example we have a DB of size 86G on HDD and emulate the small memory that is present to a node with 100TB of data by using direct IO (skipping OS file cache) and a very small block cache of size 60MB. Partitioning improves throughput by 11x from 5 op/s to 55 op/s. #### SSD, Linkbench In this example we have a DB of size 300G on SSD and emulate the small memory that would be available in presence of other DBs on the same node by by using direct IO (skipping OS file cache) and block cache of size 6G and 2G. Without partitioning the linkbench throughput drops from 38k tps to 23k when reducing block cache size from 6G to 2G. With partitioning the throughput drops from 38k to only 30k. Learn more [here](https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters). rocksdb-6.11.4/docs/_posts/2017-05-14-core-local-stats.markdown000066400000000000000000000125631370372246700234660ustar00rootroot00000000000000--- title: Core-local Statistics layout: post author: ajkr category: blog --- ## Origins: Global Atomics Until RocksDB 4.12, ticker/histogram statistics were implemented with std::atomic values shared across the entire program. A ticker consists of a single atomic, while a histogram consists of several atomics to represent things like min/max/per-bucket counters. These statistics could be updated by all user/background threads. For concurrent/high-throughput workloads, cache line bouncing of atomics caused high CPU utilization. For example, we have tickers that count block cache hits and misses. Almost every user read increments these tickers a few times. Many concurrent user reads would cause the cache lines containing these atomics to bounce between cores. ### Performance Here are perf results for 32 reader threads where most reads (99%+) are served by uncompressed block cache. Such a scenario stresses the statistics code heavily. Benchmark command: `TEST_TMPDIR=/dev/shm/ perf record -g ./db_bench -statistics -use_existing_db=true -benchmarks=readrandom -threads=32 -cache_size=1048576000 -num=1000000 -reads=1000000 && perf report -g --children` Perf snippet for "cycles" event: ``` Children Self Command Shared Object Symbol + 30.33% 30.17% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick + 3.65% 0.98% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime ``` Perf snippet for "cache-misses" event: ``` Children Self Command Shared Object Symbol + 19.54% 19.50% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick + 3.44% 0.57% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime ``` The high CPU overhead for updating tickers and histograms corresponds well to the high cache misses. ## Thread-locals: Faster Updates Since RocksDB 4.12, ticker/histogram statistics use thread-local storage. Each thread has a local set of atomic values that no other thread can update. This prevents the cache line bouncing problem described above. Even though updates to a given value are always made by the same thread, atomics are still useful to synchronize with aggregations for querying statistics. Implementing this approach involved a couple challenges. First, each query for a statistic's global value must aggregate all threads' local values. This adds some overhead, which may pass unnoticed if statistics are queried infrequently. Second, exited threads' local values are still needed to provide accurate statistics. We handle this by merging a thread's local values into process-wide variables upon thread exit. ### Performance Update benchmark setup is same as before. CPU overhead improved 7.8x compared to global atomics, corresponding to a 17.8x reduction in cache-misses overhead. Perf snippet for "cycles" event: ``` Children Self Command Shared Object Symbol + 2.96% 0.87% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick + 1.37% 0.10% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime ``` Perf snippet for "cache-misses" event: ``` Children Self Command Shared Object Symbol + 1.21% 0.65% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick 0.08% 0.00% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime ``` To measure statistics query latency, we ran sysbench with 4K OLTP clients concurrently with one client that queries statistics repeatedly. Times shown are in milliseconds. ``` min: 18.45 avg: 27.91 max: 231.65 95th percentile: 55.82 ``` ## Core-locals: Faster Querying The thread-local approach is working well for applications calling RocksDB from only a few threads, or polling statistics infrequently. Eventually, though, we found use cases where those assumptions do not hold. For example, one application has per-connection threads and typically runs into performance issues when connection count grows very high. For debugging such issues, they want high-frequency statistics polling to correlate issues in their application with changes in RocksDB's state. Once [PR #2258](https://github.com/facebook/rocksdb/pull/2258) lands, ticker/histogram statistics will be local to each CPU core. Similarly to thread-local, each core updates only its local values, thus avoiding cache line bouncing. Local values are still atomics to make aggregation possible. With this change, query work depends only on number of cores, not the number of threads. So, applications with many more threads than cores can no longer impact statistics query latency. ### Performance Update benchmark setup is same as before. CPU overhead worsened ~23% compared to thread-local, while cache performance was unchanged. Perf snippet for "cycles" event: ``` Children Self Command Shared Object Symbol + 2.96% 0.87% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick + 1.37% 0.10% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime ``` Perf snippet for "cache-misses" event: ``` Children Self Command Shared Object Symbol + 1.21% 0.65% db_bench db_bench [.] rocksdb::StatisticsImpl::recordTick 0.08% 0.00% db_bench db_bench [.] rocksdb::StatisticsImpl::measureTime ``` Query latency is measured same as before with times in milliseconds. Average latency improved by 6.3x compared to thread-local. ``` min: 2.47 avg: 4.45 max: 91.13 95th percentile: 7.56 ``` rocksdb-6.11.4/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown000066400000000000000000000046411370372246700241710ustar00rootroot00000000000000--- title: RocksDB 5.4.5 Released! layout: post author: sagar0 category: blog --- ### Public API Change * Support dynamically changing `stats_dump_period_sec` option via SetDBOptions(). * Added ReadOptions::max_skippable_internal_keys to set a threshold to fail a request as incomplete when too many keys are being skipped while using iterators. * DB::Get in place of std::string accepts PinnableSlice, which avoids the extra memcpy of value to std::string in most of cases. * PinnableSlice releases the pinned resources that contain the value when it is destructed or when ::Reset() is called on it. * The old API that accepts std::string, although discouraged, is still supported. * Replace Options::use_direct_writes with Options::use_direct_io_for_flush_and_compaction. See Direct IO wiki for details. ### New Features * Memtable flush can be avoided during checkpoint creation if total log file size is smaller than a threshold specified by the user. * Introduce level-based L0->L0 compactions to reduce file count, so write delays are incurred less often. * (Experimental) Partitioning filters which creates an index on the partitions. The feature can be enabled by setting partition_filters when using kFullFilter. Currently the feature also requires two-level indexing to be enabled. Number of partitions is the same as the number of partitions for indexes, which is controlled by metadata_block_size. * DB::ResetStats() to reset internal stats. * Added CompactionEventListener and EventListener::OnFlushBegin interfaces. * Added DB::CreateColumnFamilie() and DB::DropColumnFamilies() to bulk create/drop column families. * Facility for cross-building RocksJava using Docker. ### Bug Fixes * Fix WriteBatchWithIndex address use after scope error. * Fix WritableFile buffer size in direct IO. * Add prefetch to PosixRandomAccessFile in buffered io. * Fix PinnableSlice access invalid address when row cache is enabled. * Fix huge fallocate calls fail and make XFS unhappy. * Fix memory alignment with logical sector size. * Fix alignment in ReadaheadRandomAccessFile. * Fix bias with read amplification stats (READ_AMP_ESTIMATE_USEFUL_BYTES and READ_AMP_TOTAL_READ_BYTES). * Fix a manual / auto compaction data race. * Fix CentOS 5 cross-building of RocksJava. * Build and link with ZStd when creating the static RocksJava build. * Fix snprintf's usage to be cross-platform. * Fix build errors with blob DB. * Fix readamp test type inconsistency. rocksdb-6.11.4/docs/_posts/2017-06-26-17-level-based-changes.markdown000066400000000000000000000131211370372246700243230ustar00rootroot00000000000000--- title: Level-based Compaction Changes layout: post author: ajkr category: blog --- ### Introduction RocksDB provides an option to limit the number of L0 files, which bounds read-amplification. Since L0 files (unlike files at lower levels) can span the entire key-range, a key might be in any file, thus reads need to check them one-by-one. Users often wish to configure a low limit to improve their read latency. Although, the mechanism with which we enforce L0's file count limit may be unappealing. When the limit is reached, RocksDB intentionally delays user writes. This slows down accumulation of files in L0, and frees up resources for compacting files down to lower levels. But adding delays will significantly increase user-visible write latency jitter. Also, due to how L0 files can span the entire key-range, compaction parallelization is limited. Files at L0 or L1 may be locked due to involvement in pending L0->L1 or L1->L2 compactions. We can only schedule a parallel L0->L1 compaction if it does not require any of the locked files, which is typically not the case. To handle these constraints better, we added a new type of compaction, L0->L0. It quickly reduces file count in L0 and can be scheduled even when L1 files are locked, unlike L0->L1. We also changed the L0->L1 picking algorithm to increase opportunities for parallelism. ### Old L0->L1 Picking Logic Previously, our logic for picking which L0 file to compact was the same as every other level: pick the largest file in the level. One special property of L0->L1 compaction is that files can overlap in the input level, so those overlapping files must be pulled in as well. For example, a compaction may look like this: ![full-range.png](/static/images/compaction/full-range.png) This compaction pulls in every L0 and L1 file. This happens regardless of which L0 file is initially chosen as each file overlaps with every other file. Users may insert their data less uniformly in the key-range. For example, a database may look like this during L0->L1 compaction: ![part-range-old.png](/static/images/compaction/part-range-old.png) Let's say the third file from the top is the largest, and let's say the top two files are created after the compaction started. When the compaction is picked, the fourth L0 file and six rightmost L1 files are pulled in due to overlap. Notice this leaves the database in a state where we might not be able to schedule parallel compactions. For example, if the sixth file from the top is the next largest, we can't compact it because it overlaps with the top two files, which overlap with the locked L0 files. We can now see the high-level problems with this approach more clearly. First, locked files in L0 or L1 prevent us from parallelizing compactions. When locked files block L0->L1 compaction, there is nothing we can do to eliminate L0 files. Second, L0->L1 compactions are relatively slow. As we saw, when keys are uniformly distributed, L0->L1 compacts two entire levels. While this is happening, new files are being flushed to L0, advancing towards the file count limit. ### New L0->L0 Algorithm We introduced compaction within L0 to improve both parallelization and speed of reducing L0 file count. An L0->L0 compaction may look like this: ![l1-l2-contend.png](/static/images/compaction/l1-l2-contend.png) Say the L1->L2 compaction started first. Now L0->L1 is prevented by the locked L1 file. In this case, we compact files within L0. This allows us to start the work for eliminating L0 files earlier. It also lets us do less work since we don't pull in any L1 files, whereas L0->L1 compaction would've pulled in all of them. This lets us quickly reduce L0 file count to keep read-amp low while sustaining large bursts of writes (i.e., fast accumulation of L0 files). The tradeoff is this increases total compaction work, as we're now compacting files without contributing towards our eventual goal of moving them towards lower levels. Our benchmarks, though, consistently show less compaction stalls and improved write throughput. One justification is that L0 file data is highly likely in page cache and/or block cache due to it being recently written and frequently accessed. So, this type of compaction is relatively cheap compared to compactions at lower levels. This feature is available since RocksDB 5.4. ### New L0->L1 Picking Logic Recall how the old L0->L1 picking algorithm chose the largest L0 file for compaction. This didn't fit well with L0->L0 compaction, which operates on a span of files. That span begins at the newest L0 file, and expands towards older files as long as they're not being compacted. Since the largest file may be anywhere, the old L0->L1 picking logic could arbitrarily prevent us from getting a long span of files. See the second illustration in this post for a scenario where this would happen. So, we changed the L0->L1 picking algorithm to start from the oldest file and expand towards newer files as long as they're not being compacted. For example: ![l0-l1-contend.png](/static/images/compaction/l0-l1-contend.png) Now, there can never be L0 files unreachable for L0->L0 due to L0->L1 selecting files in the middle. When longer spans of files are available for L0->L0, we perform less compaction work per deleted L0 file, thus improving efficiency. This feature will be available in RocksDB 5.7. ### Performance Changes Mark Callaghan did the most extensive benchmarking of this feature's impact on MyRocks. See his results [here](http://smalldatum.blogspot.com/2017/05/innodb-myrocks-and-tokudb-on-insert.html). Note the primary change between his March 17 and April 14 builds is the latter performs L0->L0 compaction. rocksdb-6.11.4/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown000066400000000000000000000020461370372246700241670ustar00rootroot00000000000000--- title: RocksDB 5.5.1 Released! layout: post author: lightmark category: blog --- ### New Features * FIFO compaction to support Intra L0 compaction too with CompactionOptionsFIFO.allow_compaction=true. * Statistics::Reset() to reset user stats. * ldb add option --try_load_options, which will open DB with its own option file. * Introduce WriteBatch::PopSavePoint to pop the most recent save point explicitly. * Support dynamically change `max_open_files` option via SetDBOptions() * Added DB::CreateColumnFamilie() and DB::DropColumnFamilies() to bulk create/drop column families. * Add debugging function `GetAllKeyVersions` to see internal versions of a range of keys. * Support file ingestion with universal compaction style * Support file ingestion behind with option `allow_ingest_behind` * New option enable_pipelined_write which may improve write throughput in case writing from multiple threads and WAL enabled. ### Bug Fixes * Fix the bug that Direct I/O uses direct reads for non-SST file * Fix the bug that flush doesn't respond to fsync result rocksdb-6.11.4/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown000066400000000000000000000037131370372246700241670ustar00rootroot00000000000000--- title: RocksDB 5.6.1 Released! layout: post author: yiwu category: blog --- ### Public API Change * Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads. See https://github.com/facebook/rocksdb/wiki/Thread-Pool for more details. * Replace `Options::max_background_flushes`, `Options::max_background_compactions`, and `Options::base_background_compactions` all with `Options::max_background_jobs`, which automatically decides how many threads to allocate towards flush/compaction. * options.delayed_write_rate by default take the value of options.rate_limiter rate. * Replace global variable `IOStatsContext iostats_context` with `IOStatsContext* get_iostats_context()`; replace global variable `PerfContext perf_context` with `PerfContext* get_perf_context()`. ### New Features * Change ticker/histogram statistics implementations to use core-local storage. This improves aggregation speed compared to our previous thread-local approach, particularly for applications with many threads. See http://rocksdb.org/blog/2017/05/14/core-local-stats.html for more details. * Users can pass a cache object to write buffer manager, so that they can cap memory usage for memtable and block cache using one single limit. * Flush will be triggered when 7/8 of the limit introduced by write_buffer_manager or db_write_buffer_size is triggered, so that the hard threshold is hard to hit. See https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager for more details. * Introduce WriteOptions.low_pri. If it is true, low priority writes will be throttled if the compaction is behind. See https://github.com/facebook/rocksdb/wiki/Low-Priority-Write for more details. * `DB::IngestExternalFile()` now supports ingesting files into a database containing range deletions. ### Bug Fixes * Shouldn't ignore return value of fsync() in flush. rocksdb-6.11.4/docs/_posts/2017-08-24-pinnableslice.markdown000066400000000000000000000042511370372246700231210ustar00rootroot00000000000000--- title: PinnableSlice; less memcpy with point lookups layout: post author: maysamyabandeh category: blog --- The classic API for [DB::Get](https://github.com/facebook/rocksdb/blob/9e583711144f580390ce21a49a8ceacca338fcd5/include/rocksdb/db.h#L310) receives a std::string as argument to which it will copy the value. The memcpy overhead could be non-trivial when the value is large. The [new API](https://github.com/facebook/rocksdb/blob/9e583711144f580390ce21a49a8ceacca338fcd5/include/rocksdb/db.h#L322) receives a PinnableSlice instead, which avoids memcpy in most of the cases. ### What is PinnableSlice? Similarly to Slice, PinnableSlice refers to some in-memory data so it does not incur the memcpy cost. To ensure that the data will not be erased while it is being processed by the user, PinnableSlice, as its name suggests, has the data pinned in memory. The pinned data are released when PinnableSlice object is destructed or when ::Reset is invoked explicitly on it. ### How good is it? Here are the improvements in throughput for an [in-memory benchmark](https://github.com/facebook/rocksdb/pull/1756#issuecomment-286201693): * value 1k byte: 14% * value 10k byte: 34% ### Any limitations? PinnableSlice tries to avoid memcpy as much as possible. The primary gain is when reading large values from the block cache. There are however cases that it would still have to copy the data into its internal buffer. The reason is mainly the complexity of implementation and if there is enough motivation on the application side. the scope of PinnableSlice could be extended to such cases too. These include: * Merged values * Reads from memtables ### How to use it? ```cpp PinnableSlice pinnable_val; while (!stopped) { auto s = db->Get(opt, cf, key, &pinnable_val); // ... use it pinnable_val.Reset(); // then release it immediately } ``` You can also [initialize the internal buffer](https://github.com/facebook/rocksdb/blob/9e583711144f580390ce21a49a8ceacca338fcd5/include/rocksdb/db.h#L314) of PinnableSlice by passing your own string in the constructor. [simple_example.cc](https://github.com/facebook/rocksdb/blob/master/examples/simple_example.cc) demonstrates that with more examples. rocksdb-6.11.4/docs/_posts/2017-08-25-flushwal.markdown000066400000000000000000000055641370372246700221470ustar00rootroot00000000000000--- title: FlushWAL; less fwrite, faster writes layout: post author: maysamyabandeh category: blog --- When `DB::Put` is called, the data is written to both memtable (to be flushed to SST files later) and the WAL (write-ahead log) if it is enabled. In the case of a crash, RocksDB can recover as much as the memtable state that is reflected into the WAL. By default RocksDB automatically flushes the WAL from the application memory to the OS buffer after each `::Put`. It however can be configured to perform the flush manually after an explicit call to `::FlushWAL`. Not doing fwrite syscall after each `::Put` offers a tradeoff between reliability and write latency for the general case. As we explain below, some applications such as MyRocks benefit from this API to gain higher write throughput with however no compromise in reliability. ### How much is the gain? Using `::FlushWAL` API along with setting `DBOptions.concurrent_prepare`, MyRocks achieves 40% higher throughput in Sysbench's [update-nonindex](https://github.com/akopytov/sysbench/blob/master/src/lua/oltp_update_non_index.lua) benchmark. ### Write, Flush, and Sync The write to the WAL is first written to the application memory buffer. The buffer in the next step is "flushed" to OS buffer by calling fwrite syscall. The OS buffer is later "synced" to the persistent storage. The data in the OS buffer, although not persisted yet, will survive the application crash. By default, the flush occurs automatically upon each call to `DB::Put` or `DB::Write`. The user can additionally request sync after each write by setting `WriteOptions::sync`. ### FlushWAL API The user can turn off the automatic flush of the WAL by setting `DBOptions::manual_wal_flush`. In that case, the WAL buffer is flushed when it is either full or `DB::FlushWAL` is called by the user. The API also accepts a boolean argument should we want to sync right after the flush: `::FlushWAL(true)`. ### Success story: MyRocks Some applications that use RocksDB, already have other machinsims in place to provide reliability. MySQL for example uses 2PC (two-phase commit) to write to both binlog as well as the storage engine such as InnoDB and MyRocks. The group commit logic in MySQL allows the 1st phase (Prepare) to be run in parallel but after a commit group is formed performs the 2nd phase (Commit) in a serial manner. This makes low commit latency in the storage engine essential for acheiving high throughput. The commit in MyRocks includes writing to the RocksDB WAL, which as explaiend above, by default incures the latency of flushing the WAL new appends to the OS buffer. Since binlog helps in recovering from some failure scenarios, MySQL can provide reliability without however needing a storage WAL flush after each individual commit. MyRocks benefits from this property, disables automatic WAL flush in RocksDB, and manually calls `::FlushWAL` when requested by MySQL. rocksdb-6.11.4/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown000066400000000000000000000035651370372246700240450ustar00rootroot00000000000000--- title: RocksDB 5.8 Released! layout: post author: maysamyabandeh category: blog --- ### Public API Change * Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints. * `Slice::compare` and BytewiseComparator `Compare` no longer accept `Slice`s containing nullptr. * `Transaction::Get` and `Transaction::GetForUpdate` variants with `PinnableSlice` added. ### New Features * Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators. * Replace dynamic_cast<> (except unit test) so people can choose to build with RTTI off. With make, release mode is by default built with -fno-rtti and debug mode is built without it. Users can override it by setting USE_RTTI=0 or 1. * Universal compactions including the bottom level can be executed in a dedicated thread pool. This alleviates head-of-line blocking in the compaction queue, which cause write stalling, particularly in multi-instance use cases. Users can enable this feature via `Env::SetBackgroundThreads(N, Env::Priority::BOTTOM)`, where `N > 0`. * Allow merge operator to be called even with a single merge operand during compactions, by appropriately overriding `MergeOperator::AllowSingleOperand`. * Add `DB::VerifyChecksum()`, which verifies the checksums in all SST files in a running DB. * Block-based table support for disabling checksums by setting `BlockBasedTableOptions::checksum = kNoChecksum`. ### Bug Fixes * Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`. * Fix incorrect dropping of deletions during intra-L0 compaction. * Fix transient reappearance of keys covered by range deletions when memtable prefix bloom filter is enabled. * Fix potentially wrong file smallest key when range deletions separated by snapshot are written together. rocksdb-6.11.4/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown000066400000000000000000000045311370372246700252160ustar00rootroot00000000000000--- title: Auto-tuned Rate Limiter layout: post author: ajkr category: blog --- ### Introduction Our rate limiter has been hard to configure since users need to pick a value that is low enough to prevent background I/O spikes, which can impact user-visible read/write latencies. Meanwhile, picking too low a value can cause memtables and L0 files to pile up, eventually leading to writes stalling. Tuning the rate limiter has been especially difficult for users whose DB instances have different workloads, or have workloads that vary over time, or commonly both. To address this, in RocksDB 5.9 we released a dynamic rate limiter that adjusts itself over time according to demand for background I/O. It can be enabled simply by passing `auto_tuned=true` in the `NewGenericRateLimiter()` call. In this case `rate_bytes_per_sec` will indicate the upper-bound of the window within which a rate limit will be picked dynamically. The chosen rate limit will be much lower unless absolutely necessary, so setting this to the device's maximum throughput is a reasonable choice on dedicated hosts. ### Algorithm We use a simple multiplicative-increase, multiplicative-decrease algorithm. We measure demand for background I/O as the ratio of intervals where the rate limiter is drained. There are low and high watermarks for this ratio, which will trigger a change in rate limit when breached. The rate limit can move within a window bounded by the user-specified upper-bound, and a lower-bound that we derive internally. Users can expect this lower bound to be 1-2 orders of magnitude less than the provided upper-bound (so don't provide INT64_MAX as your upper-bound), although it's subject to change. ### Benchmark Results Data is ingested at 10MB/s and the rate limiter was created with 1000MB/s as its upper bound. The dynamically chosen rate limit hovers around 125MB/s. The other clustering of points at 50MB/s is due to number of compaction threads being reduced to one when there's no compaction pressure. ![](/static/images/rate-limiter/write-KBps-series.png) ![](/static/images/rate-limiter/auto-tuned-write-KBps-series.png) The following graph summarizes the above two time series graphs in CDF form. In particular, notice the p90 - p100 for background write rate are significantly lower with auto-tuned rate limiter enabled. ![](/static/images/rate-limiter/write-KBps-cdf.png) rocksdb-6.11.4/docs/_posts/2017-12-19-write-prepared-txn.markdown000066400000000000000000000131331370372246700240500ustar00rootroot00000000000000--- title: WritePrepared Transactions layout: post author: maysamyabandeh category: blog --- RocksDB supports both optimistic and pessimistic concurrency controls. The pessimistic transactions make use of locks to provide isolation between the transactions. The default write policy in pessimistic transactions is _WriteCommitted_, which means that the data is written to the DB, i.e., the memtable, only after the transaction is committed. This policy simplified the implementation but came with some limitations in throughput, transaction size, and variety in supported isolation levels. In the below, we explain these in detail and present the other write policies, _WritePrepared_ and _WriteUnprepared_. We then dive into the design of _WritePrepared_ transactions. ### WriteCommitted, Pros and Cons With _WriteCommitted_ write policy, the data is written to the memtable only after the transaction commits. This greatly simplifies the read path as any data that is read by other transactions can be assumed to be committed. This write policy, however, implies that the writes are buffered in memory in the meanwhile. This makes memory a bottleneck for large transactions. The delay of the commit phase in 2PC (two-phase commit) also becomes noticeable since most of the work, i.e., writing to memtable, is done at the commit phase. When the commit of multiple transactions are done in a serial fashion, such as in 2PC implementation of MySQL, the lengthy commit latency becomes a major contributor to lower throughput. Moreover this write policy cannot provide weaker isolation levels, such as READ UNCOMMITTED, that could potentially provide higher throughput for some applications. ### Alternatives: _WritePrepared_ and _WriteUnprepared_ To tackle the lengthy commit issue, we should do memtable writes at earlier phases of 2PC so that the commit phase become lightweight and fast. 2PC is composed of Write stage, where the transaction `::Put` is invoked, the prepare phase, where `::Prepare` is invoked (upon which the DB promises to commit the transaction if later is requested), and commit phase, where `::Commit` is invoked and the transaction writes become visible to all readers. To make the commit phase lightweight, the memtable write could be done at either `::Prepare` or `::Put` stages, resulting into _WritePrepared_ and _WriteUnprepared_ write policies respectively. The downside is that when another transaction is reading data, it would need a way to tell apart which data is committed, and if they are, whether they are committed before the transaction's start, i.e., in the read snapshot of the transaction. _WritePrepared_ would still have the issue of buffering the data, which makes the memory the bottleneck for large transactions. It however provides a good milestone for transitioning from _WriteCommitted_ to _WriteUnprepared_ write policy. Here we explain the design of _WritePrepared_ policy. We will cover the changes that make the design to also supported _WriteUnprepared_ in an upcoming post. ### _WritePrepared_ in a nutshell These are the primary design questions that needs to be addressed: 1) How do we identify the key/values in the DB with transactions that wrote them? 2) How do we figure if a key/value written by transaction Txn_w is in the read snapshot of the reading transaction Txn_r? 3) How do we rollback the data written by aborted transactions? With _WritePrepared_, a transaction still buffers the writes in a write batch object in memory. When 2PC `::Prepare` is called, it writes the in-memory write batch to the WAL (write-ahead log) as well as to the memtable(s) (one memtable per column family); We reuse the existing notion of sequence numbers in RocksDB to tag all the key/values in the same write batch with the same sequence number, `prepare_seq`, which is also used as the identifier for the transaction. At commit time, it writes a commit marker to the WAL, whose sequence number, `commit_seq`, will be used as the commit timestamp of the transaction. Before releasing the commit sequence number to the readers, it stores a mapping from `prepare_seq` to `commit_seq` in an in-memory data structure that we call _CommitCache_. When a transaction reading values from the DB (tagged with `prepare_seq`) it makes use of the _CommitCache_ to figure if `commit_seq` of the value is in its read snapshot. To rollback an aborted transaction, we apply the status before the transaction by making another write that cancels out the writes of the aborted transaction. The _CommitCache_ is a lock-free data structure that caches the recent commit entries. Looking up the entries in the cache must be enough for almost all th transactions that commit in a timely manner. When evicting the older entries from the cache, it still maintains some other data structures to cover the corner cases for transactions that takes abnormally too long to finish. We will cover them in the design details below. ### Benchmark Results Here we presents the improvements observed in MyRocks with sysbench and linkbench: * benchmark...........tps.........p95 latency....cpu/query * insert...................68% * update-noindex...30%......38% * update-index.......61%.......28% * read-write............6%........3.5% * read-only...........-1.2%.....-1.8% * linkbench.............1.9%......+overall........0.6% Here are also the detailed results for [In-Memory Sysbench](https://gist.github.com/maysamyabandeh/bdb868091b2929a6d938615fdcf58424) and [SSD Sysbench](https://gist.github.com/maysamyabandeh/ff94f378ab48925025c34c47eff99306) curtesy of [@mdcallag](https://github.com/mdcallag). Learn more [here](https://github.com/facebook/rocksdb/wiki/WritePrepared-Transactions). rocksdb-6.11.4/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown000066400000000000000000000027751370372246700242440ustar00rootroot00000000000000--- title: RocksDB 5.10.2 Released! layout: post author: siying category: blog --- ### Public API Change * When running `make` with environment variable `USE_SSE` set and `PORTABLE` unset, will use all machine features available locally. Previously this combination only compiled SSE-related features. ### New Features * CRC32C is now using the 3-way pipelined SSE algorithm `crc32c_3way` on supported platforms to improve performance. The system will choose to use this algorithm on supported platforms automatically whenever possible. If PCLMULQDQ is not supported it will fall back to the old Fast_CRC32 algorithm. * Provide lifetime hints when writing files on Linux. This reduces hardware write-amp on storage devices supporting multiple streams. * Add a DB stat, `NUMBER_ITER_SKIP`, which returns how many internal keys were skipped during iterations (e.g., due to being tombstones or duplicate versions of a key). * Add PerfContext counters, `key_lock_wait_count` and `key_lock_wait_time`, which measure the number of times transactions wait on key locks and total amount of time waiting. ### Bug Fixes * Fix IOError on WAL write doesn't propagate to write group follower * Make iterator invalid on merge error. * Fix performance issue in `IngestExternalFile()` affecting databases with large number of SST files. * Fix possible corruption to LSM structure when `DeleteFilesInRange()` deletes a subset of files spanned by a `DeleteRange()` marker. * Fix DB::Flush() keep waiting after flush finish under certain condition. rocksdb-6.11.4/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown000066400000000000000000000054661370372246700247140ustar00rootroot00000000000000--- title: Rocksdb Tuning Advisor layout: post author: poojam23 category: blog --- The performance of Rocksdb is contingent on its tuning. However, because of the complexity of its underlying technology and a large number of configurable parameters, a good configuration is sometimes hard to obtain. The aim of the python command-line tool, Rocksdb Advisor, is to automate the process of suggesting improvements in the configuration based on advice from Rocksdb experts. ### Overview Experts share their wisdom as rules comprising of conditions and suggestions in the INI format (refer [rules.ini](https://github.com/facebook/rocksdb/blob/master/tools/advisor/advisor/rules.ini)). Users provide the Rocksdb configuration that they want to improve upon (as the familiar Rocksdb OPTIONS file — [example](https://github.com/facebook/rocksdb/blob/master/examples/rocksdb_option_file_example.ini)) and the path of the file which contains Rocksdb logs and statistics. The [Advisor](https://github.com/facebook/rocksdb/blob/master/tools/advisor/advisor/rule_parser_example.py) creates appropriate DataSource objects (for Rocksdb [logs](https://github.com/facebook/rocksdb/blob/master/tools/advisor/advisor/db_log_parser.py), [options](https://github.com/facebook/rocksdb/blob/master/tools/advisor/advisor/db_options_parser.py), [statistics](https://github.com/facebook/rocksdb/blob/master/tools/advisor/advisor/db_stats_fetcher.py) etc.) and provides them to the [Rules Engine](https://github.com/facebook/rocksdb/blob/master/tools/advisor/advisor/rule_parser.py). The Rules uses rules from experts to parse data-sources and trigger appropriate rules. The Advisor's output gives information about which rules were triggered, why they were triggered and what each of them suggests. Each suggestion provided by a triggered rule advises some action on a Rocksdb configuration option, for example, increase CFOptions.write_buffer_size, set bloom_bits to 2 etc. ### Usage An example command to run the tool: ```shell cd rocksdb/tools/advisor python3 -m advisor.rule_parser_example --rules_spec=advisor/rules.ini --rocksdb_options=test/input_files/OPTIONS-000005 --log_files_path_prefix=test/input_files/LOG-0 --stats_dump_period_sec=20 ``` Sample output where a Rocksdb log-based rule has been triggered : ```shell Rule: stall-too-many-memtables LogCondition: stall-too-many-memtables regex: Stopping writes because we have \d+ immutable memtables \(waiting for flush\), max_write_buffer_number is set to \d+ Suggestion: inc-bg-flush option : DBOptions.max_background_flushes action : increase suggested_values : ['2'] Suggestion: inc-write-buffer option : CFOptions.max_write_buffer_number action : increase scope: col_fam: {'default'} ``` ### Read more For more information, refer to [advisor](https://github.com/facebook/rocksdb/tree/master/tools/advisor/README.md). rocksdb-6.11.4/docs/_posts/2018-08-23-data-block-hash-index.markdown000066400000000000000000000217241370372246700243440ustar00rootroot00000000000000--- title: Improving Point-Lookup Using Data Block Hash Index layout: post author: fgwu category: blog --- We've designed and implemented a _data block hash index_ in RocksDB that has the benefit of both reducing the CPU util and increasing the throughput for point lookup queries with a reasonable and tunable space overhead. Specifially, we append a compact hash table to the end of the data block for efficient indexing. It is backward compatible with the data base created without this feature. After turned on the hash index feature, existing data will be gradually converted to the hash index format. Benchmarks with `db_bench` show the CPU utilization of one of the main functions in the point lookup code path, `DataBlockIter::Seek()`, is reduced by 21.8%, and the overall RocksDB throughput is increased by 10% under purely cached workloads, at an overhead of 4.6% more space. Shadow testing with Facebook production traffic shows good CPU improvements too. ### How to use it Two new options are added as part of this feature: `BlockBasedTableOptions::data_block_index_type` and `BlockBasedTableOptions::data_block_hash_table_util_ratio`. The hash index is disabled by default unless `BlockBasedTableOptions::data_block_index_type` is set to `data_block_index_type = kDataBlockBinaryAndHash`. The hash table utilization ratio is adjustable using `BlockBasedTableOptions::data_block_hash_table_util_ratio`, which is valid only if `data_block_index_type = kDataBlockBinaryAndHash`. ``` // the definitions can be found in include/rocksdb/table.h // The index type that will be used for the data block. enum DataBlockIndexType : char { kDataBlockBinarySearch = 0, // traditional block type kDataBlockBinaryAndHash = 1, // additional hash index }; // Set to kDataBlockBinaryAndHash to enable hash index DataBlockIndexType data_block_index_type = kDataBlockBinarySearch; // #entries/#buckets. It is valid only when data_block_hash_index_type is // kDataBlockBinaryAndHash. double data_block_hash_table_util_ratio = 0.75; ``` ### Data Block Hash Index Design Current data block format groups adjacent keys together as a restart interval. One block consists of multiple restart intervals. The byte offset of the beginning of each restart interval, i.e. a restart point, is stored in an array called restart interval index or binary seek index. RocksDB does a binary search when performing point lookup for keys in data blocks to find the right restart interval the key may reside. We will use binary seek and binary search interchangeably in this post. In order to find the right location where the key may reside using binary search, multiple key parsing and comparison are needed. Each binary search branching triggers CPU cache miss, causing much CPU utilization. We have seen that this binary search takes up considerable CPU in production use-cases. ![](/static/images/data-block-hash-index/block-format-binary-seek.png) We implemented a hash map at the end of the block to index the key to reduce the CPU overhead of the binary search. The hash index is just an array of pointers pointing into the binary seek index. ![](/static/images/data-block-hash-index/block-format-hash-index.png) Each array element is considered as a hash bucket when storing the location of a key (or more precisely, the restart index of the restart interval where the key resides). When multiple keys happen to hash into the same bucket (hash collision), we just mark the bucket as “collisionâ€. So that when later querying on that key, the hash table lookup knows that there was a hash collision happened so it can fall back to the traditional binary search to find the location of the key. We define hash table utilization ratio as the #keys/#buckets. If a utilization ratio is 0.5 and there are 100 buckets, 50 keys are stored in the bucket. The less the util ratio, the less hash collision, and the less chance for a point lookup falls back to binary seek (fall back ratio) due to the collision. So a small util ratio has more benefit to reduce the CPU time but introduces more space overhead. Space overhead depends on the util ratio. Each bucket is a `uint8_t` (i.e. one byte). For a util ratio of 1, the space overhead is 1Byte per key, the fall back ratio observed is ~52%. ![](/static/images/data-block-hash-index/hash-index-data-structure.png) ### Things that Need Attention **Customized Comparator** Hash index will hash different keys (keys with different content, or byte sequence) into different hash values. This assumes the comparator will not treat different keys as equal if they have different content. The default bytewise comparator orders the keys in alphabetical order and works well with hash index, as different keys will never be regarded as equal. However, some specially crafted comparators will do. For example, say, a `StringToIntComparator` can convert a string into an integer, and use the integer to perform the comparison. Key string “16†and “0x10†is equal to each other as seen by this `StringToIntComparator`, but they probably hash to different value. Later queries to one form of the key will not be able to find the existing key been stored in the other format. We add a new function member to the comparator interface: ``` virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; } ``` Every comparator implementation should override this function and specify the behavior of the comparator. If a comparator can regard different keys equal, the function returns true, and as a result the hash index feature will not be enabled, and vice versa. NOTE: to use the hash index feature, one should 1) have a comparator that can never treat different keys as equal; and 2) override the `CanKeysWithDifferentByteContentsBeEqual()` function to return `false`, so the hash index can be enabled. **Util Ratio's Impact on Data Block Cache** Adding the hash index to the end of the data block essentially takes up the data block cache space, making the effective data block cache size smaller and increasing the data block cache miss ratio. Therefore, a very small util ratio will result in a large data block cache miss ratio, and the extra I/O may drag down the throughput gain achieved by the hash index lookup. Besides, when compression is enabled, cache miss also incurs data block decompression, which is CPU-consuming. Therefore the CPU may even increase if using a too small util ratio. The best util ratio depends on workloads, cache to data ratio, disk bandwidth/latency etc. In our experiment, we found util ratio = 0.5 ~ 1 is a good range to explore that brings both CPU and throughput gains. ### Limitations As we use `uint8_t` to store binary seek index, i.e. restart interval index, the total number of restart intervals cannot be more than 253 (we reserved 255 and 254 as special flags). For blocks having a larger number of restart intervals, the hash index will not be created and the point lookup will be done by traditional binary seek. Data block hash index only supports point lookup. We do not support range lookup. Range lookup request will fall back to BinarySeek. RocksDB supports many types of records, such as `Put`, `Delete`, `Merge`, etc (visit [here](https://github.com/facebook/rocksdb/wiki/rocksdb-basics) for more information). Currently we only support `Put` and `Delete`, but not `Merge`. Internally we have a limited set of supported record types: ``` kPutRecord, <=== supported kDeleteRecord, <=== supported kSingleDeleteRecord, <=== supported kTypeBlobIndex, <=== supported ``` For records not supported, the searching process will fall back to the traditional binary seek. ### Evaluation To evaluate the CPU util reduction and isolate other factors such as disk I/O and block decompression, we first evaluate the hash idnex in a purely cached workload. We observe that the CPU utilization of one of the main functions in the point lookup code path, DataBlockIter::Seek(), is reduced by 21.8% and the overall throughput is increased by 10% at an overhead of 4.6% more space. However, general worload is not always purely cached. So we also evaluate the performance under different cache space pressure. In the following test, we use `db_bench` with RocksDB deployed on SSDs. The total DB size is 5~6GB, and it is about 14GB if decompressed. Different block cache sizes are used, ranging from 14GB down to 2GB, with an increasing cache miss ratio. Orange bars are representing our hash index performance. We use a hash util ratio of 1.0 in this test. Block size are set to 16KiB with the restart interval as 16. ![](/static/images/data-block-hash-index/perf-throughput.png) ![](/static/images/data-block-hash-index/perf-cache-miss.png) We can see that if cache size is greater than 8GB, hash index can bring throughput gain. Cache size greater than 8GB can be translated to a cache miss ratio smaller than 40%. So if the workload has a cache miss ratio smaller than 40%, hash index is able to increase the throughput. Besides, shadow testing with Facebook production traffic shows good CPU improvements too. rocksdb-6.11.4/docs/_posts/2018-11-21-delete-range.markdown000066400000000000000000000370311370372246700226370ustar00rootroot00000000000000--- title: "DeleteRange: A New Native RocksDB Operation" layout: post author: - abhimadan - ajkr category: blog --- ## Motivation ### Deletion patterns in LSM Deleting a range of keys is a common pattern in RocksDB. Most systems built on top of RocksDB have multi-component key schemas, where keys sharing a common prefix are logically related. Here are some examples. MyRocks is a MySQL fork using RocksDB as its storage engine. Each key's first four bytes identify the table or index to which that key belongs. Thus dropping a table or index involves deleting all the keys with that prefix. Rockssandra is a Cassandra variant that uses RocksDB as its storage engine. One of its admin tool commands, `nodetool cleanup`, removes key-ranges that have been migrated to other nodes in the cluster. Marketplace uses RocksDB to store product data. Its key begins with product ID, and it stores various data associated with the product in separate keys. When a product is removed, all these keys must be deleted. When we decide what to improve, we try to find a use case that's common across users, since we want to build a generally useful system, not one that has many one-off features for individual users. The range deletion pattern is common as illustrated above, so from this perspective it's a good target for optimization. ### Existing mechanisms: challenges and opportunities The most common pattern we see is scan-and-delete, i.e., advance an iterator through the to-be-deleted range, and issue a `Delete` for each key. This is slow (involves read I/O) so cannot be done in any critical path. Additionally, it creates many tombstones, which slows down iterators and doesn't offer a deadline for space reclamation. Another common pattern is using a custom compaction filter that drops keys in the deleted range(s). This deletes the range asynchronously, so cannot be used in cases where readers must not see keys in deleted ranges. Further, it has the disadvantage of outputting tombstones to all but the bottom level. That's because compaction cannot detect whether dropping a key would cause an older version at a lower level to reappear. If space reclamation time is important, or it is important that the deleted range not affect iterators, the user can trigger `CompactRange` on the deleted range. This can involve arbitrarily long waits in the compaction queue, and increases write-amp. By the time it's finished, however, the range is completely gone from the LSM. `DeleteFilesInRange` can be used prior to compacting the deleted range as long as snapshot readers do not need to access them. It drops files that are completely contained in the deleted range. That saves write-amp because, in `CompactRange`, the file data would have to be rewritten several times before it reaches the bottom of the LSM, where tombstones can finally be dropped. In addition to the above approaches having various drawbacks, they are quite complicated to reason about and implement. In an ideal world, deleting a range of keys would be (1) simple, i.e., a single API call; (2) synchronous, i.e., when the call finishes, the keys are guaranteed to be wiped from the DB; (3) low latency so it can be used in critical paths; and (4) a first-class operation with all the guarantees of any other write, like atomicity, crash-recovery, etc. ## v1: Getting it to work ### Where to persist them? The first place we thought about storing them is inline with the data blocks. We could not think of a good way to do it, however, since the start of a range tombstone covering a key could be anywhere, making binary search impossible. So, we decided to investigate segregated storage. A second solution we considered is appending to the manifest. This file is append-only, periodically compacted, and stores metadata like the level to which each SST belongs. This is tempting because it leverages an existing file, which is maintained in the background and fully read when the DB is opened. However, it conceptually violates the manifest's purpose, which is to store metadata. It also has no way to detect when a range tombstone no longer covers anything and is droppable. Further, it'd be possible for keys above a range tombstone to disappear when they have their seqnums zeroed upon compaction to the bottommost level. A third candidate is using a separate column family. This has similar problems to the manifest approach. That is, we cannot easily detect when a range tombstone is obsolete, and seqnum zeroing can cause a key to go from above a range tombstone to below, i.e., disappearing. The upside is we can reuse logic for memory buffering, consistent reads/writes, etc. The problems with the second and third solutions indicate a need for range tombstones to be aware of flush/compaction. An easy way to achieve this is put them in the SST files themselves - but not in the data blocks, as explained for the first solution. So, we introduced a separate meta-block for range tombstones. This resolved the problem of when to obsolete range tombstones, as it's simple: when they're compacted to the bottom level. We also reused the LSM invariants that newer versions of a key are always in a higher level to prevent the seqnum zeroing problem. This approach has the side benefit of constraining the range tombstones seen during reads to ones in a similar key-range. ![](/static/images/delrange/delrange_sst_blocks.png) {: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} *When there are range tombstones in an SST, they are segregated in a separate meta-block* {: style="text-align: center"} ![](/static/images/delrange/delrange_key_schema.png) {: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} *Logical range tombstones (left) and their corresponding physical key-value representation (right)* {: style="text-align: center"} ### Write path `WriteBatch` stores range tombstones in its buffer which are logged to the WAL and then applied to a dedicated range tombstone memtable during `Write`. Later in the background the range tombstone memtable and its corresponding data memtable are flushed together into a single SST with a range tombstone meta-block. SSTs periodically undergo compaction which rewrites SSTs with point data and range tombstones dropped or merged wherever possible. We chose to use a dedicated memtable for range tombstones. The memtable representation is always skiplist in order to minimize overhead in the usual case, which is the memtable contains zero or a small number of range tombstones. The range tombstones are segregated to a separate memtable for the same reason we segregated range tombstones in SSTs. That is, we did not know how to interleave the range tombstone with point data in a way that we would be able to find it for arbitrary keys that it covers. ![](/static/images/delrange/delrange_write_path.png) {: style="display: block; margin-left: auto; margin-right: auto; width: 70%"} *Lifetime of point keys and range tombstones in RocksDB* {: style="text-align: center"} During flush and compaction, we chose to write out all non-obsolete range tombstones unsorted. Sorting by a single dimension is easy to implement, but doesn't bring asymptotic improvement to queries over range data. Ideally, we want to store skylines (see “Read Path†subsection below) computed over our ranges so we can binary search. However, a couple of concerns cause doing this in flush and compaction to feel unsatisfactory: (1) we need to store multiple skylines, one for each snapshot, which further complicates the range tombstone meta-block encoding; and (2) even if we implement this, the range tombstone memtable still needs to be linearly scanned. Given these concerns we decided to defer collapsing work to the read side, hoping a good caching strategy could optimize this at some future point. ### Read path In point lookups, we aggregate range tombstones in an unordered vector as we search through live memtable, immutable memtables, and then SSTs. When a key is found that matches the lookup key, we do a scan through the vector, checking whether the key is deleted. In iterators, we aggregate range tombstones into a skyline as we visit live memtable, immutable memtables, and SSTs. The skyline is expensive to construct but fast to determine whether a key is covered. The skyline keeps track of the most recent range tombstone found to optimize `Next` and `Prev`. |![](/static/images/delrange/delrange_uncollapsed.png) |![](/static/images/delrange/delrange_collapsed.png) | *([Image source: Leetcode](https://leetcode.com/problems/the-skyline-problem/description/)) The skyline problem involves taking building location/height data in the unsearchable form of A and converting it to the form of B, which is binary-searchable. With overlapping range tombstones, to achieve efficient searching we need to solve an analogous problem, where the x-axis is the key-space and the y-axis is the sequence number.* {: style="text-align: center"} ### Performance characteristics For the v1 implementation, writes are much faster compared to the scan and delete (optionally within a transaction) pattern. `DeleteRange` only logs to WAL and applies to memtable. Logging to WAL always `fflush`es, and optionally `fsync`s or `fdatasync`s. Applying to memtable is always an in-memory operation. Since range tombstones have a dedicated skiplist memtable, the complexity of inserting is O(log(T)), where T is the number of existing buffered range tombstones. Reading in the presence of v1 range tombstones, however, is much slower than reads in a database where scan-and-delete has happened, due to the linear scan over range tombstone memtables/meta-blocks. Iterating in a database with v1 range tombstones is usually slower than in a scan-and-delete database, although the gap lessens as iterations grow longer. When an iterator is first created and seeked, we construct a skyline over its tombstones. This operation is O(T\*log(T)) where T is the number of tombstones found across live memtable, immutable memtable, L0 files, and one file from each of the L1+ levels. However, moving the iterator forwards or backwards is simply a constant-time operation (excluding edge cases, e.g., many range tombstones between consecutive point keys). ## v2: Making it fast `DeleteRange`’s negative impact on read perf is a barrier to its adoption. The root cause is range tombstones are not stored or cached in a format that can be efficiently searched. We needed to design DeleteRange so that we could maintain write performance while making read performance competitive with workarounds used in production (e.g., scan-and-delete). ### Representations The key idea of the redesign is that, instead of globally collapsing range tombstones, we can locally “fragment†them for each SST file and memtable to guarantee that: * no range tombstones overlap; and * range tombstones are ordered by start key. Combined, these properties make range tombstones binary searchable. This fragmentation will happen on the read path, but unlike the previous design, we can easily cache many of these range tombstone fragments on the read path. ### Write path The write path remains unchanged. ### Read path When an SST file is opened, its range tombstones are fragmented and cached. For point lookups, we binary search each file's fragmented range tombstones for one that covers the lookup key. Unlike the old design, once we find a tombstone, we no longer need to search for the key in lower levels, since we know that any keys on those levels will be covered (though we do still check the current level since there may be keys written after the range tombstone). For range scans, we create iterators over all the fragmented range tombstones and store them in a list, seeking each one to cover the start key of the range scan (if possible), and query each encountered key in this structure as in the old design, advancing range tombstone iterators as necessary. In effect, we implicitly create a skyline. This requires significantly less work on iterator creation, but since each memtable/SST has its own range tombstone iterator, querying range tombstones requires key comparisons (and possibly iterator increments) for several iterators (as opposed to v1, where we had a global collapsed representation of all range tombstones). As a result, very long range scans may become slower than before, but short range scans are an order of magnitude faster, which are the more common class of range scan. ## Benchmarks To understand the performance of this new design, we used `db_bench` to compare point lookup, short range scan, and long range scan performance across: * the v1 DeleteRange design, * the scan-and-delete workaround, and * the v2 DeleteRange design. In these benchmarks, we used a database with 5 million data keys, and 10000 range tombstones (ignoring those dropped during compaction) that were written in regular intervals after 4.5 million data keys were written. Writing the range tombstones ensures that most of them are not compacted away, and we have more tombstones in higher levels that cover keys in lower levels, which allows the benchmarks to exercise more interesting behavior when reading deleted keys. Point lookup benchmarks read 100000 keys from a database using `readwhilewriting`. Range scan benchmarks used `seekrandomwhilewriting` and seeked 100000 times, and advanced up to 10 keys away from the seek position for short range scans, and advanced up to 1000 keys away from the seek position for long range scans. The results are summarized in the tables below, averaged over 10 runs (note the different SHAs for v1 benchmarks are due to a new `db_bench` flag that was added in order to compare performance with databases with no tombstones; for brevity, those results are not reported here). Also note that the block cache was large enough to hold the entire db, so the large throughput is due to limited I/Os and little time spent on decompression. The range tombstone blocks are always pinned uncompressed in memory. We believe these setup details should not affect relative performance between versions. ### Point Lookups |Name |SHA |avg micros/op |avg ops/sec | |v1 |35cd754a6 |1.3179 |759,830.90 | |scan-del |7528130e3 |0.6036 |1,667,237.70 | |v2 |7528130e3 |0.6128 |1,634,633.40 | ### Short Range Scans |Name |SHA |avg micros/op |avg ops/sec | |v1 |0ed738fdd |6.23 |176,562.00 | |scan-del |PR 4677 |2.6844 |377,313.00 | |v2 |PR 4677 |2.8226 |361,249.70 | ### Long Range scans |Name |SHA |avg micros/op |avg ops/sec | |v1 |0ed738fdd |52.7066 |19,074.00 | |scan-del |PR 4677 |38.0325 |26,648.60 | |v2 |PR 4677 |41.2882 |24,714.70 | ## Future Work Note that memtable range tombstones are fragmented every read; for now this is acceptable, since we expect there to be relatively few range tombstones in memtables (and users can enforce this by keeping track of the number of memtable range deletions and manually flushing after it passes a threshold). In the future, a specialized data structure can be used for storing range tombstones in memory to avoid this work. Another future optimization is to create a new format version that requires range tombstones to be stored in a fragmented form. This would save time when opening SST files, and when `max_open_files` is not -1 (i.e., files may be opened several times). ## Acknowledgements Special thanks to Peter Mattis and Nikhil Benesch from Cockroach Labs, who were early users of DeleteRange v1 in production, contributed the cleanest/most efficient v1 aggregation implementation, found and fixed bugs, and provided initial DeleteRange v2 design and continued help. Thanks to Huachao Huang and Jinpeng Zhang from PingCAP for early DeleteRange v1 adoption, bug reports, and fixes. rocksdb-6.11.4/docs/_posts/2019-03-08-format-version-4.markdown000066400000000000000000000053061370372246700234260ustar00rootroot00000000000000--- title: format_version 4 layout: post author: maysamyabandeh category: blog --- The data blocks in RocksDB consist of a sequence of key/values pairs sorted by key, where the pairs are grouped into _restart intervals_ specified by `block_restart_interval`. Up to RocksDB version 5.14, where the latest and default value of `BlockBasedTableOptions::format_version` is 2, the format of index and data blocks are the same: index blocks use the same key format of <`user_key`,`seq`> and encode pointers to data blocks, <`offset`,`size`>, to a byte string and use them as values. The only difference is that the index blocks use `index_block_restart_interval` for the size of _restart intervals_. `format_version=`3,4 offer more optimized, backward-compatible, yet forward-incompatible format for index blocks. ### Pros Using `format_version`=4 significantly reduces the index block size, in some cases around 4-5x. This frees more space in block cache, which would result in higher hit rate for data and filter blocks, or offer the same performance with a smaller block cache size. ### Cons Being _forward-incompatible_ means that if you enable `format_version=`4 you cannot downgrade to a RocksDB version lower than 5.16. ### How to use it? - `BlockBasedTableOptions::format_version` = 4 - `BlockBasedTableOptions::index_block_restart_interval` = 16 ### What is format_version 3? (Since RocksDB 5.15) In most cases, the sequence number `seq` is not necessary for keys in the index blocks. In such cases, `format_version`=3 skips encoding the sequence number and sets `index_key_is_user_key` in TableProperties, which is used by the reader to know how to decode the index block. ### What is format_version 4? (Since RocksDB 5.16) Changes the format of index blocks by delta encoding the index values, which are the block handles. This saves the encoding of `BlockHandle::offset` of the non-head index entries in each restart interval. If used, `TableProperties::index_value_is_delta_encoded` is set, which is used by the reader to know how to decode the index block. The format of each key is (shared_size, non_shared_size, shared, non_shared). The format of each value, i.e., block handle, is (offset, size) whenever the shared_size is 0, which included the first entry in each restart point. Otherwise the format is delta-size = block handle size - size of last block handle. The index format in `format_version=4` would be as follows: restart_point 0: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) restart_point 1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) ... restart_point n-1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) where, k is key, v is value, and its encoding is in parenthesis. rocksdb-6.11.4/docs/_posts/2019-08-15-unordered-write.markdown000066400000000000000000000053751370372246700234420ustar00rootroot00000000000000--- title: Higher write throughput with `unordered_write` feature layout: post author: maysamyabandeh category: blog --- Since RocksDB 6.3, The `unordered_write=`true option together with WritePrepared transactions offers 34-42% higher write throughput compared to vanilla RocksDB. If the application can handle more relaxed ordering guarantees, the gain in throughput would increase to 63-131%. ### Background Currently RocksDB API delivers the following powerful guarantees: - Atomic reads: Either all of a write batch is visible to reads or none of it. - Read-your-own writes: When a write thread returns to the user, a subsequent read by the same thread will be able to see its own writes. - Immutable Snapshots: The reads visible to the snapshot are immutable in the sense that it will not be affected by any in-flight or future writes. ### `unordered_write` The `unordered_write` feature, when turned on, relaxes the default guarantees of RocksDB. While it still gives read-your-own-write property, neither atomic reads nor the immutable snapshot properties are provided any longer. However, RocksDB users could still get read-your-own-write and immutable snapshots when using this feature in conjunction with TransactionDB configured with WritePrepared transactions and `two_write_queues`. You can read [here](https://github.com/facebook/rocksdb/wiki/unordered_write) to learn about the design of `unordered_write` and [here](https://github.com/facebook/rocksdb/wiki/WritePrepared-Transactions) to learn more about WritePrepared transactions. ### How to use it? To get the same guarantees as vanilla RocksdB: DBOptions db_options; db_options.unordered_write = true; db_options.two_write_queues = true; DB* db; { TransactionDBOptions txn_db_options; txn_db_options.write_policy = TxnDBWritePolicy::WRITE_PREPARED; txn_db_options.skip_concurrency_control = true; TransactionDB* txn_db; TransactionDB::Open(options, txn_db_options, kDBPath, &txn_db); db = txn_db; } db->Write(...); To get relaxed guarantees: DBOptions db_options; db_options.unordered_write = true; DB* db; DB::Open(db_options, kDBPath, &db); db->Write(...); # Benchmarks TEST_TMPDIR=/dev/shm/ ~/db_bench --benchmarks=fillrandom --threads=32 --num=10000000 -max_write_buffer_number=16 --max_background_jobs=64 --batch_size=8 --writes=3000000 -level0_file_num_compaction_trigger=99999 --level0_slowdown_writes_trigger=99999 --level0_stop_writes_trigger=99999 -enable_pipelined_write=false -disable_auto_compactions --transaction_db=true --unordered_write=1 --disable_wal=0 Throughput with `unordered_write`=true and using WritePrepared transaction: - WAL: +42% - No-WAL: +34% Throughput with `unordered_write`=true - WAL: +63% - NoWAL: +131% rocksdb-6.11.4/docs/_sass/000077500000000000000000000000001370372246700153055ustar00rootroot00000000000000rocksdb-6.11.4/docs/_sass/_base.scss000066400000000000000000000171011370372246700172530ustar00rootroot00000000000000body { background: $secondary-bg; color: $text; font: normal #{$base-font-size}/#{$base-line-height} $base-font-family; height: 100vh; text-align: left; text-rendering: optimizeLegibility; } img { max-width: 100%; } article { p { img { max-width: 100%; display:block; margin-left: auto; margin-right: auto; } } } a { border-bottom: 1px dotted $primary-bg; color: $text; text-decoration: none; -webkit-transition: background 0.3s, color 0.3s; transition: background 0.3s, color 0.3s; } blockquote { padding: 15px 30px 15px 15px; margin: 20px 0 0 10px; background-color: rgba(204, 122, 111, 0.1); border-left: 10px solid rgba(191, 87, 73, 0.2); } #fb_oss a { border: 0; } h1, h2, h3, h4 { font-family: $header-font-family; font-weight: 900; } .navPusher { border-top: $header-height + $header-ptop + $header-pbot solid $primary-bg; height: 100%; left: 0; position: relative; z-index: 99; } .homeContainer { background: $primary-bg; color: $primary-overlay; a { color: $primary-overlay; } .homeSplashFade { color: white; } .homeWrapper { padding: 2em 10px; text-align: left; .wrapper { margin: 0px auto; max-width: $content-width; padding: 0 20px; } .projectLogo { img { height: 100px; margin-bottom: 0px; } } h1#project_title { font-family: $header-font-family; font-size: 300%; letter-spacing: -0.08em; line-height: 1em; margin-bottom: 80px; } h2#project_tagline { font-family: $header-font-family; font-size: 200%; letter-spacing: -0.04em; line-height: 1em; } } } .wrapper { margin: 0px auto; max-width: $content-width; padding: 0 10px; } .projectLogo { display: none; img { height: 100px; margin-bottom: 0px; } } section#intro { margin: 40px 0; } .fbossFontLight { font-family: $base-font-family; font-weight: 300; font-style: normal; } .fb-like { display: block; margin-bottom: 20px; width: 100%; } .center { display: block; text-align: center; } .mainContainer { background: $secondary-bg; overflow: auto; .mainWrapper { padding: 4vh 10px; text-align: left; .allShareBlock { padding: 10px 0; .pluginBlock { margin: 12px 0; padding: 0; } } a { &:hover, &:focus { background: $primary-bg; color: $primary-overlay; } } em, i { font-style: italic; } strong, b { font-weight: bold; } h1 { font-size: 300%; line-height: 1em; padding: 1.4em 0 1em; text-align: left; } h2 { font-size: 250%; line-height: 1em; margin-bottom: 20px; padding: 1.4em 0 20px; text-align: left; & { border-bottom: 1px solid darken($primary-bg, 10%); color: darken($primary-bg, 10%); font-size: 22px; padding: 10px 0; } &.blockHeader { border-bottom: 1px solid white; color: white; font-size: 22px; margin-bottom: 20px; padding: 10px 0; } } h3 { font-size: 150%; line-height: 1.2em; padding: 1em 0 0.8em; } h4 { font-size: 130%; line-height: 1.2em; padding: 1em 0 0.8em; } p { padding: 0.8em 0; } ul { list-style: disc; } ol, ul { padding-left: 24px; li { padding-bottom: 4px; padding-left: 6px; } } strong { font-weight: bold; } .post { position: relative; .katex { font-weight: 700; } &.basicPost { margin-top: 30px; } a { color: $primary-bg; &:hover, &:focus { color: #fff; } } h2 { border-bottom: 4px solid $primary-bg; font-size: 130%; } h3 { border-bottom: 1px solid $primary-bg; font-size: 110%; } ol { list-style: decimal outside none; } .post-header { padding: 1em 0; h1 { font-size: 150%; line-height: 1em; padding: 0.4em 0 0; a { border: none; } } .post-meta { color: $primary-bg; font-family: $header-font-family; text-align: center; } } .postSocialPlugins { padding-top: 1em; } .docPagination { background: $primary-bg; bottom: 0px; left: 0px; position: absolute; right: 0px; .pager { display: inline-block; width: 50%; } .pagingNext { float: right; text-align: right; } a { border: none; color: $primary-overlay; display: block; padding: 4px 12px; &:hover { background-color: $secondary-bg; color: $text; } .pagerLabel { display: inline; } .pagerTitle { display: none; } } } } .posts { .post { margin-bottom: 6vh; } } } } #integrations_title { font-size: 250%; margin: 80px 0; } .ytVideo { height: 0; overflow: hidden; padding-bottom: 53.4%; /* 16:9 */ padding-top: 25px; position: relative; } .ytVideo iframe, .ytVideo object, .ytVideo embed { height: 100%; left: 0; position: absolute; top: 0; width: 100%; } @media only screen and (min-width: 480px) { h1#project_title { font-size: 500%; } h2#project_tagline { font-size: 250%; } .projectLogo { img { margin-bottom: 10px; height: 200px; } } .homeContainer .homeWrapper { padding-left: 10px; padding-right: 10px; } .mainContainer { .mainWrapper { .post { h2 { font-size: 180%; } h3 { font-size: 120%; } .docPagination { a { .pagerLabel { display: none; } .pagerTitle { display: inline; } } } } } } } @media only screen and (min-width: 900px) { .homeContainer { .homeWrapper { position: relative; #inner { box-sizing: border-box; max-width: 600px; padding-right: 40px; } .projectLogo { align-items: center; bottom: 0; display: flex; justify-content: flex-end; left: 0; padding: 2em 20px 4em; position: absolute; right: 20px; top: 0; img { height: 100%; max-height: 250px; } } } } } @media only screen and (min-width: 1024px) { .mainContainer { .mainWrapper { .post { box-sizing: border-box; display: block; .post-header { h1 { font-size: 250%; } } } .posts { .post { margin-bottom: 4vh; width: 100%; } } } } } @media only screen and (min-width: 1200px) { .homeContainer { .homeWrapper { #inner { max-width: 750px; } } } .wrapper { max-width: 1100px; } } @media only screen and (min-width: 1500px) { .homeContainer { .homeWrapper { #inner { max-width: 1100px; padding-bottom: 40px; padding-top: 40px; } } } .wrapper { max-width: 1400px; } } rocksdb-6.11.4/docs/_sass/_blog.scss000066400000000000000000000013541370372246700172670ustar00rootroot00000000000000.blogContainer { .posts { margin-top: 60px; .post { border: 1px solid $primary-bg; border-radius: 3px; padding: 10px 20px 20px; } } .lonePost { margin-top: 60px; .post { padding: 10px 0px 0px; } } .post-header { h1 { text-align: center; } .post-authorName { color: rgba($text, 0.7); font-size: 14px; font-weight: 900; margin-top: 0; padding: 0; text-align: center; } .authorPhoto { border-radius: 50%; height: 50px; left: 50%; margin-left: auto; margin-right: auto; display: inline-block; overflow: hidden; position: static; top: -25px; width: 50px; } } } rocksdb-6.11.4/docs/_sass/_buttons.scss000066400000000000000000000014131370372246700200360ustar00rootroot00000000000000.button { border: 1px solid $primary-bg; border-radius: 3px; color: $primary-bg; display: inline-block; font-size: 14px; font-weight: 900; line-height: 1.2em; padding: 10px; text-transform: uppercase; transition: background 0.3s, color 0.3s; &:hover { background: $primary-bg; color: $primary-overlay; } } .homeContainer { .button { border-color: $primary-overlay; border-width: 1px; color: $primary-overlay; &:hover { background: $primary-overlay; color: $primary-bg; } } } .blockButton { display: block; } .edit-page-link { float: right; font-size: 14px; font-weight: normal; line-height: 20px; opacity: 0.6; transition: opacity 0.5s; } .edit-page-link:hover { opacity: 1; } rocksdb-6.11.4/docs/_sass/_footer.scss000066400000000000000000000026041370372246700176410ustar00rootroot00000000000000.footerContainer { background: $secondary-bg; color: $primary-bg; overflow: hidden; padding: 0 10px; text-align: left; .footerWrapper { border-top: 1px solid $primary-bg; padding: 0; .footerBlocks { align-items: center; align-content: center; display: flex; flex-flow: row wrap; margin: 0 -20px; padding: 10px 0; } .footerSection { box-sizing: border-box; flex: 1 1 25%; font-size: 14px; min-width: 275px; padding: 0px 20px; a { border: 0; color: inherit; display: inline-block; line-height: 1.2em; } .footerLink { padding-right: 20px; } } .fbOpenSourceFooter { align-items: center; display: flex; flex-flow: row nowrap; max-width: 25%; .facebookOSSLogoSvg { flex: 0 0 31px; height: 30px; margin-right: 10px; width: 31px; path { fill: $primary-bg; } .middleRing { opacity: 0.7; } .innerRing { opacity: 0.45; } } h2 { display: block; font-weight: 900; line-height: 1em; } } } } @media only screen and (min-width: 900px) { .footerSection { &.rightAlign { margin-left: auto; max-width: 25%; text-align: right; } } }rocksdb-6.11.4/docs/_sass/_gridBlock.scss000066400000000000000000000035511370372246700202450ustar00rootroot00000000000000.gridBlock { margin: -5px 0; padding: 0; padding-bottom: 20px; .blockElement { padding: 5px 0; img { max-width: 100%; } h3 { border-bottom: 1px solid rgba($primary-bg, 0.5); color: $primary-bg; font-size: 18px; margin: 0; padding: 10px 0; } } .gridClear { clear: both; } } .gridBlock .alignCenter { text-align: center; } .gridBlock .alignRight { text-align: right; } .gridBlock .imageAlignSide { align-items: center; display: flex; flex-flow: row wrap; } .blockImage { max-width: 150px; width: 50%; } .imageAlignTop .blockImage { margin-bottom: 20px; } .imageAlignTop.alignCenter .blockImage { margin-left: auto; margin-right: auto; } .imageAlignSide .blockImage { flex: 0 1 100px; margin-right: 20px; } .imageAlignSide .blockContent { flex: 1 1; } @media only screen and (max-width: 1023px) { .responsiveList .blockContent { position: relative; } .responsiveList .blockContent > div { padding-left: 20px; } .responsiveList .blockContent::before { content: "\2022"; position: absolute; } } @media only screen and (min-width: 1024px) { .gridBlock { display: flex; flex-direction: row; flex-wrap: wrap; margin: -10px -10px 10px -10px; .twoByGridBlock { box-sizing: border-box; flex: 1 0 50%; padding: 10px; } .fourByGridBlock { box-sizing: border-box; flex: 1 0 25%; padding: 10px; } } h2 + .gridBlock { padding-top: 20px; } } @media only screen and (min-width: 1400px) { .gridBlock { display: flex; flex-direction: row; flex-wrap: wrap; margin: -10px -20px 10px -20px; .twoByGridBlock { box-sizing: border-box; flex: 1 0 50%; padding: 10px 20px; } .fourByGridBlock { box-sizing: border-box; flex: 1 0 25%; padding: 10px 20px; } } }rocksdb-6.11.4/docs/_sass/_header.scss000066400000000000000000000043441370372246700175760ustar00rootroot00000000000000.fixedHeaderContainer { background: $primary-bg; color: $primary-overlay; height: $header-height; padding: $header-ptop 0 $header-pbot; position: fixed; width: 100%; z-index: 9999; a { align-items: center; border: 0; color: $primary-overlay; display: flex; flex-flow: row nowrap; height: $header-height; } header { display: flex; flex-flow: row nowrap; position: relative; text-align: left; img { height: 24px; margin-right: 10px; } h2 { display: block; font-family: $header-font-family; font-weight: 900; line-height: 18px; position: relative; } } } .navigationFull { height: 34px; margin-left: auto; nav { position: relative; ul { display: flex; flex-flow: row nowrap; margin: 0 -10px; li { padding: 0 10px; display: block; a { border: 0; color: $primary-overlay-special; font-size: 16px; font-weight: 400; line-height: 1.2em; &:hover { border-bottom: 2px solid $primary-overlay; color: $primary-overlay; } } &.navItemActive { a { color: $primary-overlay; } } } } } } /* 900px .fixedHeaderContainer { .navigationWrapper { nav { padding: 0 1em; position: relative; top: -9px; ul { margin: 0 -0.4em; li { display: inline-block; a { padding: 14px 0.4em; border: 0; color: $primary-overlay-special; display: inline-block; &:hover { color: $primary-overlay; } } &.navItemActive { a { color: $primary-overlay; } } } } } &.navigationFull { display: inline-block; } &.navigationSlider { display: none; } } } 1200px .fixedHeaderContainer { header { max-width: 1100px; } } 1500px .fixedHeaderContainer { header { max-width: 1400px; } } */rocksdb-6.11.4/docs/_sass/_poweredby.scss000066400000000000000000000020221370372246700203350ustar00rootroot00000000000000.poweredByContainer { background: $primary-bg; color: $primary-overlay; margin-bottom: 20px; a { color: $primary-overlay; } .poweredByWrapper { h2 { border-color: $primary-overlay-special; color: $primary-overlay-special; } } .poweredByMessage { color: $primary-overlay-special; font-size: 14px; padding-top: 20px; } } .poweredByItems { display: flex; flex-flow: row wrap; margin: 0 -10px; } .poweredByItem { box-sizing: border-box; flex: 1 0 50%; line-height: 1.1em; padding: 5px 10px; &.itemLarge { flex-basis: 100%; padding: 10px; text-align: center; &:nth-child(4) { padding-bottom: 20px; } img { max-height: 30px; } } } @media only screen and (min-width: 480px) { .itemLarge { flex-basis: 50%; max-width: 50%; } } @media only screen and (min-width: 1024px) { .poweredByItem { flex-basis: 25%; max-width: 25%; &.itemLarge { padding-bottom: 20px; text-align: left; } } } rocksdb-6.11.4/docs/_sass/_promo.scss000066400000000000000000000014231370372246700174750ustar00rootroot00000000000000.promoSection { display: flex; flex-flow: column wrap; font-size: 125%; line-height: 1.6em; margin: -10px 0; position: relative; z-index: 99; .promoRow { padding: 10px 0; .pluginWrapper { display: block; &.ghWatchWrapper, &.ghStarWrapper { height: 28px; } } .pluginRowBlock { display: flex; flex-flow: row wrap; margin: 0 -2px; .pluginWrapper { padding: 0 2px; } } } } iframe.pluginIframe { height: 500px; margin-top: 20px; width: 100%; } .iframeContent { display: none; } .iframePreview { display: inline-block; margin-top: 20px; } @media only screen and (min-width: 1024px) { .iframeContent { display: block; } .iframePreview { display: none; } }rocksdb-6.11.4/docs/_sass/_react_docs_nav.scss000066400000000000000000000126431370372246700213210ustar00rootroot00000000000000.docsNavContainer { background: $sidenav; height: 35px; left: 0; position: fixed; width: 100%; z-index: 100; } .docMainWrapper { .wrapper { &.mainWrapper { padding-left: 0; padding-right: 0; padding-top: 10px; } } } .docsSliderActive { .docsNavContainer { box-sizing: border-box; height: 100%; overflow-y: auto; -webkit-overflow-scrolling: touch; padding-bottom: 50px; } .mainContainer { display: none; } } .navBreadcrumb { box-sizing: border-box; display: flex; flex-flow: row nowrap; font-size: 12px; height: 35px; overflow: hidden; padding: 5px 10px; a, span { border: 0; color: $sidenav-text; } i { padding: 0 3px; } } nav.toc { position: relative; section { padding: 0px; position: relative; .navGroups { display: none; padding: 40px 10px 10px; } } .toggleNav { background: $sidenav; color: $sidenav-text; position: relative; transition: background-color 0.3s, color 0.3s; .navToggle { cursor: pointer; height: 24px; margin-right: 10px; position: relative; text-align: left; width: 18px; &::before, &::after { content: ""; position: absolute; top: 50%; left: 0; left: 8px; width: 3px; height: 6px; border: 5px solid $sidenav-text; border-width: 5px 0; margin-top: -8px; transform: rotate(45deg); z-index: 1; } &::after { transform: rotate(-45deg); } i { &::before, &::after { content: ""; position: absolute; top: 50%; left: 2px; background: transparent; border-width: 0 5px 5px; border-style: solid; border-color: transparent $sidenav-text; height: 0; margin-top: -7px; opacity: 1; width: 5px; z-index: 10; } &::after { border-width: 5px 5px 0; margin-top: 2px; } } } .navGroup { background: $sidenav-overlay; margin: 1px 0; ul { display: none; } h3 { background: $sidenav-overlay; color: $sidenav-text; cursor: pointer; font-size: 14px; font-weight: 400; line-height: 1.2em; padding: 10px; transition: color 0.2s; i:not(:empty) { width: 16px; height: 16px; display: inline-block; box-sizing: border-box; text-align: center; color: rgba($sidenav-text, 0.5); margin-right: 10px; transition: color 0.2s; } &:hover { color: $primary-bg; i:not(:empty) { color: $primary-bg; } } } &.navGroupActive { background: $sidenav-active; color: $sidenav-text; ul { display: block; padding-bottom: 10px; padding-top: 10px; } h3 { background: $primary-bg; color: $primary-overlay; i { display: none; } } } } ul { padding-left: 0; padding-right: 24px; li { list-style-type: none; padding-bottom: 0; padding-left: 0; a { border: none; color: $sidenav-text; display: inline-block; font-size: 14px; line-height: 1.1em; margin: 2px 10px 5px; padding: 5px 0 2px; transition: color 0.3s; &:hover, &:focus { color: $primary-bg; } &.navItemActive { color: $primary-bg; font-weight: 900; } } } } } .toggleNavActive { .navBreadcrumb { background: $sidenav; margin-bottom: 20px; position: fixed; width: 100%; } section { .navGroups { display: block; } } .navToggle { &::before, &::after { border-width: 6px 0; height: 0px; margin-top: -6px; } i { opacity: 0; } } } } .docsNavVisible { .navPusher { .mainContainer { padding-top: 35px; } } } @media only screen and (min-width: 900px) { .navBreadcrumb { padding: 5px 0; } nav.toc { section { .navGroups { padding: 40px 0 0; } } } } @media only screen and (min-width: 1024px) { .navToggle { display: none; } .docsSliderActive { .mainContainer { display: block; } } .docsNavVisible { .navPusher { .mainContainer { padding-top: 0; } } } .docsNavContainer { background: none; box-sizing: border-box; height: auto; margin: 40px 40px 0 0; overflow-y: auto; position: relative; width: 300px; } nav.toc { section { .navGroups { display: block; padding-top: 0px; } } .toggleNavActive { .navBreadcrumb { margin-bottom: 0; position: relative; } } } .docMainWrapper { display: flex; flex-flow: row nowrap; margin-bottom: 40px; .wrapper { padding-left: 0; padding-right: 0; &.mainWrapper { padding-top: 0; } } } .navBreadcrumb { display: none; h2 { padding: 0 10px; } } }rocksdb-6.11.4/docs/_sass/_react_header_nav.scss000066400000000000000000000051511370372246700216150ustar00rootroot00000000000000.navigationFull { display: none; } .navigationSlider { position: absolute; right: 0px; .navSlideout { cursor: pointer; padding-top: 4px; position: absolute; right: 10px; top: 0; transition: top 0.3s; z-index: 101; } .slidingNav { background: $secondary-bg; box-sizing: border-box; height: 0px; overflow-x: hidden; padding: 0; position: absolute; right: 0px; top: 0; transition: height 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55), width 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55); width: 0; ul { flex-flow: column nowrap; list-style: none; padding: 10px; li { margin: 0; padding: 2px 0; a { color: $primary-bg; display: inline; margin: 3px 5px; padding: 2px 0px; transition: background-color 0.3s; &:focus, &:hover { border-bottom: 2px solid $primary-bg; } } } } } .navSlideoutActive { .slidingNav { height: auto; padding-top: $header-height + $header-pbot; width: 300px; } .navSlideout { top: -2px; .menuExpand { span:nth-child(1) { background-color: $text; top: 16px; transform: rotate(45deg); } span:nth-child(2) { opacity: 0; } span:nth-child(3) { background-color: $text; transform: rotate(-45deg); } } } } } .menuExpand { display: flex; flex-flow: column nowrap; height: 20px; justify-content: space-between; span { background: $primary-overlay; border-radius: 3px; display: block; flex: 0 0 4px; height: 4px; position: relative; top: 0; transition: background-color 0.3s, top 0.3s, opacity 0.3s, transform 0.3s; width: 20px; } } .navPusher { border-top: $header-height + $header-ptop + $header-pbot solid $primary-bg; position: relative; left: 0; z-index: 99; height: 100%; &::after { position: absolute; top: 0; right: 0; width: 0; height: 0; background: rgba(0,0,0,0.4); content: ''; opacity: 0; -webkit-transition: opacity 0.5s, width 0.1s 0.5s, height 0.1s 0.5s; transition: opacity 0.5s, width 0.1s 0.5s, height 0.1s 0.5s; } .sliderActive &::after { width: 100%; height: 100%; opacity: 1; -webkit-transition: opacity 0.5s; transition: opacity 0.5s; z-index: 100; } } @media only screen and (min-width: 1024px) { .navigationFull { display: block; } .navigationSlider { display: none; } }rocksdb-6.11.4/docs/_sass/_reset.scss000066400000000000000000000017341370372246700174700ustar00rootroot00000000000000html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, acronym, address, big, cite, code, del, dfn, em, img, ins, kbd, q, s, samp, small, strike, strong, sub, sup, tt, var, b, u, i, center, dl, dt, dd, ol, ul, li, fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, article, aside, canvas, details, embed, figure, figcaption, footer, header, hgroup, menu, nav, output, ruby, section, summary, time, mark, audio, video { margin: 0; padding: 0; border: 0; font-size: 100%; font: inherit; vertical-align: baseline; } /* HTML5 display-role reset for older browsers */ article, aside, details, figcaption, figure, footer, header, hgroup, menu, nav, section { display: block; } body { line-height: 1; } ol, ul { list-style: none; } blockquote, q { quotes: none; } blockquote:before, blockquote:after, q:before, q:after { content: ''; content: none; } table { border-collapse: collapse; border-spacing: 0; } rocksdb-6.11.4/docs/_sass/_search.scss000066400000000000000000000060761370372246700176170ustar00rootroot00000000000000input[type="search"] { -moz-appearance: none; -webkit-appearance: none; } .navSearchWrapper { align-self: center; position: relative; &::before { border: 3px solid $primary-overlay-special; border-radius: 50%; content: " "; display: block; height: 6px; left: 15px; width: 6px; position: absolute; top: 4px; z-index: 1; } &::after { background: $primary-overlay-special; content: " "; height: 7px; left: 24px; position: absolute; transform: rotate(-45deg); top: 12px; width: 3px; z-index: 1; } .aa-dropdown-menu { background: $secondary-bg; border: 3px solid rgba($text, 0.25); color: $text; font-size: 14px; left: auto !important; line-height: 1.2em; right: 0 !important; .algolia-docsearch-suggestion--category-header { background: $primary-overlay-special; color: $primary-bg; .algolia-docsearch-suggestion--highlight { background-color: $primary-bg; color: $primary-overlay; } } .algolia-docsearch-suggestion--title .algolia-docsearch-suggestion--highlight, .algolia-docsearch-suggestion--subcategory-column .algolia-docsearch-suggestion--highlight { color: $primary-bg; } .algolia-docsearch-suggestion__secondary, .algolia-docsearch-suggestion--subcategory-column { border-color: rgba($text, 0.3); } } } input#search_input { padding-left: 25px; font-size: 14px; line-height: 20px; border-radius: 20px; background-color: rgba($primary-overlay-special, 0.25); border: none; color: rgba($primary-overlay-special, 0); outline: none; position: relative; transition: background-color .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), width .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), color .2s ease; width: 60px; &:focus, &:active { background-color: $secondary-bg; color: $text; width: 240px; } } .navigationSlider { .navSearchWrapper { &::before { left: 6px; top: 6px; } &::after { left: 15px; top: 14px; } } input#search_input_react { box-sizing: border-box; padding-left: 25px; font-size: 14px; line-height: 20px; border-radius: 20px; background-color: rgba($primary-overlay-special, 0.25); border: none; color: $text; outline: none; position: relative; transition: background-color .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), width .2s cubic-bezier(0.68, -0.55, 0.265, 1.55), color .2s ease; width: 100%; &:focus, &:active { background-color: $primary-bg; color: $primary-overlay; } } .algolia-docsearch-suggestion--subcategory-inline { display: none; } & > span { width: 100%; } .aa-dropdown-menu { background: $secondary-bg; border: 0px solid $secondary-bg; color: $text; font-size: 12px; line-height: 2em; max-height: 140px; min-width: auto; overflow-y: scroll; -webkit-overflow-scrolling: touch; padding: 0; border-radius: 0; position: relative !important; width: 100%; } }rocksdb-6.11.4/docs/_sass/_slideshow.scss000066400000000000000000000014431370372246700203440ustar00rootroot00000000000000.slideshow { position: relative; .slide { display: none; img { display: block; margin: 0 auto; } &.slideActive { display: block; } a { border: none; display: block; } } .pagination { display: block; margin: -10px; padding: 1em 0; text-align: center; width: 100%; .pager { background: transparent; border: 2px solid rgba(255, 255, 255, 0.5); border-radius: 50%; cursor: pointer; display: inline-block; height: 12px; margin: 10px; transition: background-color 0.3s, border-color 0.3s; width: 12px; &.pagerActive { background: rgba(255, 255, 255, 0.5); border-width: 4px; height: 8px; width: 8px; } } } } rocksdb-6.11.4/docs/_sass/_syntax-highlighting.scss000066400000000000000000000122471370372246700223400ustar00rootroot00000000000000 .rougeHighlight { background-color: $code-bg; color: #93a1a1 } .rougeHighlight .c { color: #586e75 } /* Comment */ .rougeHighlight .err { color: #93a1a1 } /* Error */ .rougeHighlight .g { color: #93a1a1 } /* Generic */ .rougeHighlight .k { color: #859900 } /* Keyword */ .rougeHighlight .l { color: #93a1a1 } /* Literal */ .rougeHighlight .n { color: #93a1a1 } /* Name */ .rougeHighlight .o { color: #859900 } /* Operator */ .rougeHighlight .x { color: #cb4b16 } /* Other */ .rougeHighlight .p { color: #93a1a1 } /* Punctuation */ .rougeHighlight .cm { color: #586e75 } /* Comment.Multiline */ .rougeHighlight .cp { color: #859900 } /* Comment.Preproc */ .rougeHighlight .c1 { color: #72c02c; } /* Comment.Single */ .rougeHighlight .cs { color: #859900 } /* Comment.Special */ .rougeHighlight .gd { color: #2aa198 } /* Generic.Deleted */ .rougeHighlight .ge { color: #93a1a1; font-style: italic } /* Generic.Emph */ .rougeHighlight .gr { color: #dc322f } /* Generic.Error */ .rougeHighlight .gh { color: #cb4b16 } /* Generic.Heading */ .rougeHighlight .gi { color: #859900 } /* Generic.Inserted */ .rougeHighlight .go { color: #93a1a1 } /* Generic.Output */ .rougeHighlight .gp { color: #93a1a1 } /* Generic.Prompt */ .rougeHighlight .gs { color: #93a1a1; font-weight: bold } /* Generic.Strong */ .rougeHighlight .gu { color: #cb4b16 } /* Generic.Subheading */ .rougeHighlight .gt { color: #93a1a1 } /* Generic.Traceback */ .rougeHighlight .kc { color: #cb4b16 } /* Keyword.Constant */ .rougeHighlight .kd { color: #268bd2 } /* Keyword.Declaration */ .rougeHighlight .kn { color: #859900 } /* Keyword.Namespace */ .rougeHighlight .kp { color: #859900 } /* Keyword.Pseudo */ .rougeHighlight .kr { color: #268bd2 } /* Keyword.Reserved */ .rougeHighlight .kt { color: #dc322f } /* Keyword.Type */ .rougeHighlight .ld { color: #93a1a1 } /* Literal.Date */ .rougeHighlight .m { color: #2aa198 } /* Literal.Number */ .rougeHighlight .s { color: #2aa198 } /* Literal.String */ .rougeHighlight .na { color: #93a1a1 } /* Name.Attribute */ .rougeHighlight .nb { color: #B58900 } /* Name.Builtin */ .rougeHighlight .nc { color: #268bd2 } /* Name.Class */ .rougeHighlight .no { color: #cb4b16 } /* Name.Constant */ .rougeHighlight .nd { color: #268bd2 } /* Name.Decorator */ .rougeHighlight .ni { color: #cb4b16 } /* Name.Entity */ .rougeHighlight .ne { color: #cb4b16 } /* Name.Exception */ .rougeHighlight .nf { color: #268bd2 } /* Name.Function */ .rougeHighlight .nl { color: #93a1a1 } /* Name.Label */ .rougeHighlight .nn { color: #93a1a1 } /* Name.Namespace */ .rougeHighlight .nx { color: #93a1a1 } /* Name.Other */ .rougeHighlight .py { color: #93a1a1 } /* Name.Property */ .rougeHighlight .nt { color: #268bd2 } /* Name.Tag */ .rougeHighlight .nv { color: #268bd2 } /* Name.Variable */ .rougeHighlight .ow { color: #859900 } /* Operator.Word */ .rougeHighlight .w { color: #93a1a1 } /* Text.Whitespace */ .rougeHighlight .mf { color: #2aa198 } /* Literal.Number.Float */ .rougeHighlight .mh { color: #2aa198 } /* Literal.Number.Hex */ .rougeHighlight .mi { color: #2aa198 } /* Literal.Number.Integer */ .rougeHighlight .mo { color: #2aa198 } /* Literal.Number.Oct */ .rougeHighlight .sb { color: #586e75 } /* Literal.String.Backtick */ .rougeHighlight .sc { color: #2aa198 } /* Literal.String.Char */ .rougeHighlight .sd { color: #93a1a1 } /* Literal.String.Doc */ .rougeHighlight .s2 { color: #2aa198 } /* Literal.String.Double */ .rougeHighlight .se { color: #cb4b16 } /* Literal.String.Escape */ .rougeHighlight .sh { color: #93a1a1 } /* Literal.String.Heredoc */ .rougeHighlight .si { color: #2aa198 } /* Literal.String.Interpol */ .rougeHighlight .sx { color: #2aa198 } /* Literal.String.Other */ .rougeHighlight .sr { color: #dc322f } /* Literal.String.Regex */ .rougeHighlight .s1 { color: #2aa198 } /* Literal.String.Single */ .rougeHighlight .ss { color: #2aa198 } /* Literal.String.Symbol */ .rougeHighlight .bp { color: #268bd2 } /* Name.Builtin.Pseudo */ .rougeHighlight .vc { color: #268bd2 } /* Name.Variable.Class */ .rougeHighlight .vg { color: #268bd2 } /* Name.Variable.Global */ .rougeHighlight .vi { color: #268bd2 } /* Name.Variable.Instance */ .rougeHighlight .il { color: #2aa198 } /* Literal.Number.Integer.Long */ .highlighter-rouge { color: darken(#72c02c, 8%); font: 800 12px/1.5em Hack, monospace; max-width: 100%; .rougeHighlight { border-radius: 3px; margin: 20px 0; padding: 0px; overflow-x: scroll; -webkit-overflow-scrolling: touch; table { background: none; border: none; tbody { tr { background: none; display: flex; flex-flow: row nowrap; td { display: block; flex: 1 1; &.gutter { border-right: 1px solid lighten($code-bg, 10%); color: lighten($code-bg, 15%); margin-right: 10px; max-width: 40px; padding-right: 10px; pre { max-width: 20px; } } } } } } } } p > .highlighter-rouge, li > .highlighter-rouge, a > .highlighter-rouge { font-size: 16px; font-weight: 400; line-height: inherit; } a:hover { .highlighter-rouge { color: white; } }rocksdb-6.11.4/docs/_sass/_tables.scss000066400000000000000000000015061370372246700176150ustar00rootroot00000000000000table { background: $lightergrey; border: 1px solid $lightgrey; border-collapse: collapse; display:table; margin: 20px 0; thead { border-bottom: 1px solid $lightgrey; display: table-header-group; } tbody { display: table-row-group; } tr { display: table-row; &:nth-of-type(odd) { background: $greyish; } th, td { border-right: 1px dotted $lightgrey; display: table-cell; font-size: 14px; line-height: 1.3em; padding: 10px; text-align: left; &:last-of-type { border-right: 0; } code { color: $green; display: inline-block; font-size: 12px; } } th { color: #000000; font-weight: bold; font-family: $header-font-family; text-transform: uppercase; } } }rocksdb-6.11.4/docs/_top-level/000077500000000000000000000000001370372246700162435ustar00rootroot00000000000000rocksdb-6.11.4/docs/_top-level/support.md000066400000000000000000000010551370372246700203020ustar00rootroot00000000000000--- layout: top-level title: Support id: support category: support --- ## Need help? Do not hesitate to ask questions if you are having trouble with RocksDB. ### GitHub issues Use [GitHub issues](https://github.com/facebook/rocksdb/issues) to report bugs, issues and feature requests for the RocksDB codebase. ### Facebook Group Use the [RocksDB Facebook group](https://www.facebook.com/groups/rocksdb.dev/) for general questions and discussion about RocksDB. ### FAQ Check out a list of [commonly asked questions](/docs/support/faq) about RocksDB. rocksdb-6.11.4/docs/blog/000077500000000000000000000000001370372246700151205ustar00rootroot00000000000000rocksdb-6.11.4/docs/blog/all.html000066400000000000000000000006501370372246700165570ustar00rootroot00000000000000--- id: all layout: blog category: blog ---

All Posts

{% for post in site.posts %} {% assign author = site.data.authors[post.author] %}

{{ post.title }} on {{ post.date | date: "%B %e, %Y" }} by {{ author.display_name }}

{% endfor %}
rocksdb-6.11.4/docs/blog/index.html000066400000000000000000000002571370372246700171210ustar00rootroot00000000000000--- id: blog title: Blog layout: blog category: blog ---
{% for page in site.posts %} {% include post.html truncate=true %} {% endfor %}
rocksdb-6.11.4/docs/css/000077500000000000000000000000001370372246700147655ustar00rootroot00000000000000rocksdb-6.11.4/docs/css/main.scss000066400000000000000000000071361370372246700166150ustar00rootroot00000000000000--- # Only the main Sass file needs front matter (the dashes are enough) --- @charset "utf-8"; @font-face { font-family: 'Lato'; src: url("{{ '/static/fonts/LatoLatin-Italic.woff2' }}") format('woff2'), url("{{ '/static/fonts/LatoLatin-Italic.woff' }}") format('woff'); font-weight: normal; font-style: italic; } @font-face { font-family: 'Lato'; src: url("{{ '/static/fonts/LatoLatin-Black.woff2' }}") format('woff2'), url("{{ '/static/fonts/LatoLatin-Black.woff' }}") format('woff'); font-weight: 900; font-style: normal; } @font-face { font-family: 'Lato'; src: url("{{ '/static/fonts/LatoLatin-BlackItalic.woff2' }}") format('woff2'), url("{{ '/static/fonts/LatoLatin-BlackItalic.woff' }}") format('woff'); font-weight: 900; font-style: italic; } @font-face { font-family: 'Lato'; src: url("{{ '/static/fonts/LatoLatin-Light.woff2' }}") format('woff2'), url("{{ '/static/fonts/LatoLatin-Light.woff' }}") format('woff'); font-weight: 300; font-style: normal; } @font-face { font-family: 'Lato'; src: url("{{ '/static/fonts/LatoLatin-Regular.woff2' }}") format('woff2'), url("{{ '/static/fonts/LatoLatin-Regular.woff' }}") format('woff'); font-weight: normal; font-style: normal; } // Our variables $base-font-family: 'Lato', Calibri, Arial, sans-serif; $header-font-family: 'Lato', 'Helvetica Neue', Arial, sans-serif; $base-font-size: 18px; $small-font-size: $base-font-size * 0.875; $base-line-height: 1.4em; $spacing-unit: 12px; // Two configured colors (see _config.yml) $primary-bg: {{ site.color.primary }}; $secondary-bg: {{ site.color.secondary }}; // $primary-bg overlays {% if site.color.primary-overlay == 'light' %} $primary-overlay: darken($primary-bg, 70%); $primary-overlay-special: darken($primary-bg, 40%); {% else %} $primary-overlay: #fff; $primary-overlay-special: lighten($primary-bg, 30%); {% endif %} // $secondary-bg overlays {% if site.color.secondary-overlay == 'light' %} $text: #393939; $sidenav: darken($secondary-bg, 20%); $sidenav-text: $text; $sidenav-overlay: darken($sidenav, 10%); $sidenav-active: lighten($sidenav, 10%); {% else %} $text: #fff; $sidenav: lighten($secondary-bg, 20%); $sidenav-text: $text; $sidenav-overlay: lighten($sidenav, 10%); $sidenav-active: darken($sidenav, 10%); {% endif %} $code-bg: #002b36; $header-height: 34px; $header-ptop: 10px; $header-pbot: 8px; // Width of the content area $content-width: 900px; // Table setting variables $lightergrey: #F8F8F8; $greyish: #E8E8E8; $lightgrey: #B0B0B0; $green: #2db04b; // Using media queries with like this: // @include media-query($on-palm) { // .wrapper { // padding-right: $spacing-unit / 2; // padding-left: $spacing-unit / 2; // } // } @mixin media-query($device) { @media screen and (max-width: $device) { @content; } } // Import partials from `sass_dir` (defaults to `_sass`) @import "reset", "base", "header", "search", "syntax-highlighting", "promo", "buttons", "gridBlock", "poweredby", "footer", "react_header_nav", "react_docs_nav", "tables", "blog" ; // Anchor links // http://ben.balter.com/2014/03/13/pages-anchor-links/ .header-link { position: absolute; margin-left: 0.2em; opacity: 0; -webkit-transition: opacity 0.2s ease-in-out 0.1s; -moz-transition: opacity 0.2s ease-in-out 0.1s; -ms-transition: opacity 0.2s ease-in-out 0.1s; } h2:hover .header-link, h3:hover .header-link, h4:hover .header-link, h5:hover .header-link, h6:hover .header-link { opacity: 1; } rocksdb-6.11.4/docs/doc-type-examples/000077500000000000000000000000001370372246700175355ustar00rootroot00000000000000rocksdb-6.11.4/docs/doc-type-examples/2016-04-07-blog-post-example.md000066400000000000000000000011741370372246700244540ustar00rootroot00000000000000--- title: Blog Post Example layout: post author: exampleauthor category: blog --- Any local blog posts would go in the `_posts` directory. This is an example blog post introduction, try to keep it short and about a paragraph long, to encourage people to click through to read the entire post. Everything below the `` tag will only show on the actual blog post page, not on the `/blog/` index. Author is defined in `_data/authors.yml` ## No posts? If you have no blog for your site, you can remove the entire `_posts` folder. Otherwise add markdown files in here. See CONTRIBUTING.md for details. rocksdb-6.11.4/docs/doc-type-examples/docs-hello-world.md000066400000000000000000000005061370372246700232360ustar00rootroot00000000000000--- docid: hello-world title: Hello, World! layout: docs permalink: /docs/hello-world.html --- Any local docs would go in the `_docs` directory. ## No documentation? If you have no documentation for your site, you can remove the entire `_docs` folder. Otherwise add markdown files in here. See CONTRIBUTING.md for details. rocksdb-6.11.4/docs/doc-type-examples/top-level-example.md000066400000000000000000000003501370372246700234150ustar00rootroot00000000000000--- layout: top-level title: Support Example id: top-level-example category: top-level --- This is a static page disconnected from the blog or docs collections that can be added at a top-level (i.e., the same level as `index.md`). rocksdb-6.11.4/docs/docs/000077500000000000000000000000001370372246700151255ustar00rootroot00000000000000rocksdb-6.11.4/docs/docs/index.html000066400000000000000000000001201370372246700171130ustar00rootroot00000000000000--- id: docs title: Docs layout: redirect destination: getting-started.html --- rocksdb-6.11.4/docs/feed.xml000066400000000000000000000022571370372246700156300ustar00rootroot00000000000000--- layout: null --- {{ site.title | xml_escape }} {{ site.description | xml_escape }} https://rocksdb.org/feed.xml {{ site.time | date_to_rfc822 }} {{ site.time | date_to_rfc822 }} Jekyll v{{ jekyll.version }} {% for post in site.posts limit:10 %} {{ post.title | xml_escape }} {{ post.content | xml_escape }} {{ post.date | date_to_rfc822 }} {{ post.url | absolute_url }} {{ post.url | absolute_url }} {% for tag in post.tags %} {{ tag | xml_escape }} {% endfor %} {% for cat in post.categories %} {{ cat | xml_escape }} {% endfor %} {% endfor %} rocksdb-6.11.4/docs/index.md000066400000000000000000000002561370372246700156310ustar00rootroot00000000000000--- layout: home title: RocksDB | A persistent key-value store id: home --- ## Features {% include content/gridblocks.html data_source=site.data.features align="center" %} rocksdb-6.11.4/docs/static/000077500000000000000000000000001370372246700154645ustar00rootroot00000000000000rocksdb-6.11.4/docs/static/favicon.png000066400000000000000000000075271370372246700176320ustar00rootroot00000000000000‰PNG  IHDR szzô AiCCPICC ProfileH –wTSهϽ7½Ð" %ôz Ò;HQ‰I€P†„&vDF)VdTÀG‡"cE ƒ‚b× òPÆÁQDEåÝŒk ï­5óÞšýÇYßÙç·×Ùgï}׺Pü‚ÂtX€4¡XîëÁ\ËÄ÷XÀáffGøDÔü½=™™¨HƳöî.€d»Û,¿P&sÖÿ‘"7C$ EÕ6<~&å”S³Å2ÿÊô•)2†12¡ ¢¬"ãįlö§æ+»É˜—&ä¡Yμ4žŒ»PÞš%ᣌ¡\˜%àg£|e½TIšå÷(ÓÓøœL0™_Ìç&¡l‰2Eî‰ò”Ä9¼r‹ù9hžx¦g䊉Ib¦טiåèÈfúñ³Sùb1+”ÃMáˆxLÏô´ Ž0€¯o–E%Ym™h‘í­ííYÖæhù¿Ùß~Sý=ÈzûUñ&ìÏžAŒžYßlì¬/½ö$Z›³¾•U´m@åá¬Oï ò´Þœó†l^’Äâ ' ‹ììlsŸk.+è7ûŸ‚oÊ¿†9÷™ËîûV;¦?#I3eE妧¦KDÌÌ —Ïdý÷ÿãÀ9iÍÉÃ,œŸÀñ…èUQè” „‰h»…Ø A1ØvƒjpÔzÐN‚6p\WÀ p €G@ †ÁK0Þi‚ð¢Aª¤™BÖZyCAP8ÅC‰’@ùÐ&¨*ƒª¡CP=ô#tº]ƒú Ð 4ý}„˜Óa ض€Ù°;GÂËàDxœÀÛáJ¸>·Âáð,…_“@ÈÑFXñDBX$!k‘"¤©Eš¤¹H‘q䇡a˜Æã‡YŒábVaÖbJ0Õ˜c˜VLæ6f3ù‚¥bÕ±¦X'¬?v 6›-ÄV``[°—±Øaì;ÇÀâp~¸\2n5®·׌»€ëà á&ñx¼*Þï‚Ásðb|!¾ ߯¿' Zk‚!– $l$Tçý„Â4Q¨Ot"†yÄ\b)±ŽØA¼I&N“I†$R$)™´TIj"]&=&½!“É:dGrY@^O®$Ÿ _%’?P”(&OJEBÙN9J¹@y@yC¥R ¨nÔXª˜ºZO½D}J}/G“3—ó—ãÉ­“«‘k•ë—{%O”×—w—_.Ÿ'_!Jþ¦ü¸QÁ@ÁS£°V¡Fá´Â=…IEš¢•bˆbšb‰bƒâ5ÅQ%¼’’·O©@é°Ò%¥!BÓ¥yÒ¸´M´:ÚeÚ0G7¤ûÓ“éÅôè½ô e%e[å(ååå³ÊRÂ0`ø3R¥Œ“Œ»Œó4æ¹ÏãÏÛ6¯i^ÿ¼)•ù*n*|•"•f••ªLUoÕÕªmªOÔ0j&jajÙjûÕ.«Ï§ÏwžÏ_4ÿäü‡ê°º‰z¸újõÃê=ꓚ¾U—4Æ5šnšÉšåšç4Ç´hZ µZåZçµ^0•™îÌTf%³‹9¡­®í§-Ñ>¤Ý«=­c¨³Xg£N³Î]’.[7A·\·SwBOK/X/_¯Qï¡>QŸ­Ÿ¤¿G¿[ÊÀÐ Ú`‹A›Á¨¡Š¡¿aža£ác#ª‘«Ñ*£Z£;Æ8c¶qŠñ>ã[&°‰I’IÉMSØÔÞT`ºÏ´Ï kæh&4«5»Ç¢°ÜYY¬FÖ 9Ã<È|£y›ù+ =‹X‹Ý_,í,S-ë,Y)YXm´ê°úÃÚÄšk]c}džjãc³Î¦Ýæµ­©-ßv¿í};š]°Ý»N»Ïöö"û&û1=‡x‡½÷Øtv(»„}Õëèá¸ÎñŒã'{'±ÓI§ßYÎ)ΠΣ ðÔ-rÑqá¸r‘.d.Œ_xp¡ÔUÛ•ãZëúÌM×çvÄmÄÝØ=Ùý¸û+K‘G‹Ç”§“çÏ ^ˆ—¯W‘W¯·’÷bïjï§>:>‰>>¾v¾«}/øaýývúÝó×ðçú×ûO8¬ è ¤FV> 2 uÃÁÁ»‚/Ò_$\ÔBüCv…< 5 ]ús.,4¬&ìy¸Ux~xw-bEDCÄ»HÈÒÈG‹KwFÉGÅEÕGME{E—EK—X,Y³äFŒZŒ ¦={$vr©÷ÒÝK‡ãìâ ãî.3\–³ìÚrµå©ËÏ®_ÁYq*ßÿ‰©åL®ô_¹wåד»‡û’çÆ+çñ]øeü‘—„²„ÑD—Ä]‰cI®IIãOAµàu²_òä©””£)3©Ñ©Íi„´ø´ÓB%aа+]3='½/Ã4£0CºÊiÕîU¢@Ñ‘L(sYf»˜ŽþLõHŒ$›%ƒY ³j²ÞgGeŸÊQÌæôäšänËÉóÉû~5f5wug¾vþ†üÁ5îk­…Ö®\Û¹Nw]Áºáõ¾ëm mHÙðËFËeßnŠÞÔQ Q°¾`h³ïæÆB¹BQá½-Î[lÅllíÝf³­jÛ—"^ÑõbËâŠâO%Ü’ëßY}WùÝÌö„í½¥ö¥ûwàvwÜÝéºóX™bY^ÙЮà]­åÌò¢ò·»Wì¾Va[q`id´2¨²½J¯jGÕ§ê¤êšæ½ê{·íÚÇÛ׿ßmÓÅ>¼È÷Pk­AmÅaÜá¬ÃÏë¢êº¿g_DíHñ‘ÏG…G¥ÇÂuÕ;Ô×7¨7”6’ƱãqÇoýàõC{«éP3£¹ø8!9ñâÇøïž <ÙyŠ}ªé'ýŸö¶ÐZŠZ¡ÖÜÖ‰¶¤6i{L{ßé€ÓÎ-?›ÿ|ôŒö™š³ÊgKϑΜ›9Ÿw~òBÆ…ñ‹‰‡:Wt>º´äÒ®°®ÞË—¯^ñ¹r©Û½ûüU—«g®9];}}½í†ýÖ»ž–_ì~iéµïm½ép³ý–ã­Ž¾}çú]û/Þöº}åŽÿ‹úî.¾{ÿ^Ü=é}ÞýÑ©^?Ìz8ýhýcìã¢' O*žª?­ýÕø×f©½ôì ×`ϳˆg†¸C/ÿ•ù¯OÃÏ©Ï+F´FêG­GÏŒùŒÝz±ôÅðËŒ—Óã…¿)þ¶÷•Ñ«Ÿ~wû½gbÉÄðkÑë™?JÞ¨¾9úÖömçdèäÓwi獵ŠÞ«¾?öý¡ûcôÇ‘éìOøO•Ÿ?w| üòx&mfæß÷„óû2:Y~ pHYs  šœ¼IDATX í–lSUÇ?[i×­ÛŠŒý†lP²dæA!b”) Fb0šøcš ¢F\PCˆHŒ?f†€FBt( 1bP1  ƤllÎ1Vºnëó\n_»ÒΆ!áNòzï»÷Üs¾çÜï9¯1€!Ïu“ØëæÙïø€¸æˆ‰ÂòAQö¼ítÀãó`f¡¯ÇMu­ÁªMáæÀÿ½Ø$¬ƒåpK8™LîˆÎŒ[ ê”.†‘iáû[™8Vœ–AÊ`no,GÇÛm0ïåÈ'¢8r † |øòÕÓ`× nŸ“Í ´tØ(ß÷½Ç/×ÖïQÔ»à¡Ló&æ—Àa7°Û|d8»~“‡-»aÏÁÈÎÕjT§ÏAOج‘X-°t.l|ޫʦ׃3¾›£.]=±Xý4·ˆ§iyða1<[´Õ/ÅâUÂÑO WhÚÕ<¤f1’’ùwj²•=îî8žžuw—…³íqä´SU“ÊÜIððL±³ÍÖvö µ¥ª ðÄY1Š‹0š¶aÔmÆxä qØWºSó0ö½‹áÛ±¦$çÒ¸åÍLÃ[c|õvªÑ]…áù£µrÑ,væbLjÃô)qË0Þ?¬ƒQ™°z³¤«<^ÑðKÞhxã1ƒüZï$+å"éÉ^¾ü-{'4Ñæ¶rNˆ7ÄÞÉœP²Šî‚ArM.B£\§·þk>ÕFý7¤_ЦBÎ0È]Mm¦[”¥‹5?úþf¹çS⸠G\ï%¥ñÃ.HÉYø®f(ù#]Ì(šzXø:¤:![ºØm`J.ttmǧR»IÐÒÍ~çCåðÚ'áHde AiéÝ —Æ'$ €·vf3&£›¥—Ö&ùOhç¦Ýsb¯ú8,€T>Û;ªÍ]]]†ùª’¶c?œn†%s„€dJ²NùÄìv¶Vgò`‹ãM ŒNssü |ñ”m‡:)Ùþ$> oÕ•› ¼¦5C8 –&‰Òò…” Ñû ¾^+e•Èm#ÏÓÐj§¾ÑCñûp¦T„ý‰Y·k§bWqLIó?á/ŨÍ_jáþWÕLKR¼Ç >ÉÓáÆ$Z;¬üyÚÃï'LÈãÂéÒ˜VH£ ¹äpÝ(ÛBaíÑŸ4—Ƥw2el;÷‡껢ÎÇ/Dw®Î„e ¯¡Á‰’; 쯳“&Ìß{ÈÇöŸƒZªÓ¡;¥ê–iƒ¡bÒŽƒ:ÿ5‹@u9iFôßyJ/Èù› »`Y™¾Ó¨êr*ÚJ鄬—:N—Hš„,½>i&+õ¥C¸ÐWÐgæ÷]¹òy€Ã®›Jwæxî}…D]¹/²auת;^”KÔêÇÚϵé ÑÊ ¡æ³DgúýÌ–ˆ¨Ú7%@µMSTé¨o‚ãjÕ«êj6,‡Óô—ÐÔȨ®Õ”sAc‡ëÆ: $ÙÛJuÔzåê~7~<¯²hÅæ²êùŠ`ª ”d'®ä¡>ùW¹¶!¸@pûÚÏ¢vÂk á_=~È!ÁljIEND®B`‚rocksdb-6.11.4/docs/static/fonts/000077500000000000000000000000001370372246700166155ustar00rootroot00000000000000rocksdb-6.11.4/docs/static/fonts/LatoLatin-Black.woff000066400000000000000000002114741370372246700224120ustar00rootroot00000000000000wOFF<3dGPOS€4üb …UÞPGSUB6|Îܨd“”OS/28L\`›ý£‚cmap8¨€æÿ‹ cvt <È3:?(¬fpgm<ü§ —ØÛðgaspB¤glyfB¬J*¯è·headŒÀ66 D8ßhheaŒø!$ûõhmtx«Ld_i¼kernÈo­þÔølocax‘¨È*ìZmaxp 0name,ƒÅž'Ä7post ° ÆEC•prepÀ{ŠöŒý.xœíœytTÅÖö«2B „) FQ "a’y™dP/“†!ˆWE#èÅ ƒ ‚ˆ(Åš@2tš&:-ÐF b+­ï]ßúÞ[ßïTb@pXëÞ÷w}œõôpºNí]UûÙÏ®ôi„B„Š‘b¥èÕgàHÑdê‚´Y¢õ£iÓgŠö³ž7Gt´J £í¯¯e×~5^û‹€™ÓÓæˆ&ú1^?¶ÖwÒÄèã·F_~Ú“&‰;ïZÖy\ïËý¢‡;Çy’WÈĉºÿ7e7Yêçç×ËoŽß3[8zù•úûùôçÿ˜ÿ¸ g'848™c8g8‚ñë|%¤nÈîKµ‚8Âj¬sÇè:—LJ¯ˆH‰x,buÄ'gëp´¨{g½Üz—xÿI䩈³g£êF 瘵0jUÔ•ú³êï®_Ê;Ž~ ê6Hkà‰n=Æw&j^ôJŽ ïs|¤¯ýYt0ãÓSÑ—|çýŸÆñ†Ýˆ³G7žÕx5Ƕ¦­£ßoúXôû‘§¢?£7ó¹ñè˜n1cbæÇ¼³1æHLIÌ嘟bCbãââ¦Ä½ÛlK³"ñ¡ñu5¢[m9±U:Ç’VëE+ᯋ¨ö‰(•/¢Õy‘ ¾­@"çîP;Å4õ¾ÈPEb©:#–«Sb£Ê•ñê°lZ€– ´­Ám —Ê—ãÕ—rxÌT;å,0Ug¤ùà¸:%O€P¢F‰ÚÂZ©Å"E}+2Õz±R½ ªL‘§\â˜Z#JT…°RµDÔ½®õd±©úнŒïC`\™+"Ežè",âžßôÊ•_‰IÊ#ÒÔUù’H–«ÕÏr¸W¾,ZÉWÀ«¢|Mô$~SÔO|Ò˜wÍõu—Åq»8&ÂE)mÖˆD®éÉ5CäZÑ–ëÒ2™…ªïE] Zág Ö¶aõ€:-r™ó<æ×¢²-Ÿ×®ög²h÷‡>E2þS\±O¸êWœ¤ïéb»º(ÞW°ñz¸lôЗÚÓC{<¬K/-èe¨ÌV_èÑyù4œOëwN¹F)>çE°º"B@8h b@,ˆ·«oD[åí@HÆò]Ê&Úƒ»á=  GÑ‘s@guUt÷‚®à>РܺƒTÐô½@oÐ<ú‚~ ?‚A`0†‚a`8F‚Q`4xŒcU±‡ŸãÁðr‹‰`˜ S8?LÓÁ#àQðøc˜f‚Y¼Ÿ æ0ÆÇy~‚qÎi¼~[ó`ÎS`>x¤ƒ`!Xž‹Á®ydˆ¦b)+» ,ÍÄ õ• Pe$¨O<4@cÐ41 ŽKTWäí  ¸¤€Ž è ‚A`0†‚a`8UßÈÇÀßÀ À8dxÌOùàiU Á"ð X –€gAX –å`ÈÄÏ•à9°JËçÁßÁ àEå–/ÕÀˆ¼—y~¬¯"Qf1¯ƒu`=x¼ þ6Ðÿ[àmðx¼6‚M`3m²Á°ß·íà}°ƒL´ì»Á`Ø >ûÀG`?øŸ€OÁAp9ôõø_€\ð%ÈGE¤´€c Íä P¬¬óIp ”;š1“EX5ƒô#F(—ožøô5Q_Ôâ“ωȯÈmåä¶r2R#x? 67„ë àú0B¶È£åwdɲd²ããâÈ¿®’ï!ÞMä-È¡­@"¹åvµ(¼îªÝjŒØªEæÕSÅ)5Q‚Rõ6¹=¹=¹=µ—ˆÚKDí%¢ö¢…hA!ZP¨3ã2ã2ã,~‡•É^FÖr+ÈŒ%ÂP‹¶FþÁ·|Zì'×Õ£ÕZmaµ˜…â6ÍŠ¢YQ³#Œâ}^bŒâ‚h£,Œd“¸“YÚ¦ìbØ öÂáÁu{¨›u³¢nVÔÍŠºYQ7+êfEݬ¨ÛFx‰^b„—áFx^`„á&F¸‰n’sXËÇÁجsËÑîQhU”9êЪ&hU}sÔ¡äº ¤¨O¼Ã'Wy£®-$kÜHÜÁì|Éç#ȡϓ+Ÿ'>ÏL%/ÃÉEû±´ UxUØ€¥%¢HÅ*ŽÏ€ßÏÀïIð{1ü¾~×ýáõ`8ŸSásoøÜÞ>o{]£ˆÁýøñ(+Ð/úi‰DZüz>¶„‹÷ÀÃð® ¾í‚o{‰ñ,áÔÞú£Û XU¡.U¨KêR…ºT¡.U¨KÊ2e„² BUÆ¢*cQ•ñ¨ÊxTe¼qGÎuU6ª’ªd£*Ù¨J6ª’­g¤;Ï© è zÞÀ˜©xî úþ`Á` ŒÎó0Œ£Áƒ` «&UÏøT•†b¤¡i(FŠ1–•‹bŒE1ÒP‹·Q‹4Ôbj1C¯Ð“ô1Ùy ÌOƒt°,‹À3`1X¢2P‹ j³…ÔfKÅ2õõÙÔb¾¹Ò‡YéCbŸzN|¤^ûyþ˜çÔF‡Ô‘®E@}•ªd£*Ù¨J6ª’ªd£*Ù¨JŒX #ÈÅ0b1ŒX #ÈÅ0b1ªS…êT¡:U¨NªS…êT¡:U¨NªS…êT¡:U¨NªS…êT¡:U¨N•ŽÀ45è¦Q˜Îùjlu4>Ãk#"—ð\3*—ñ~9X2ñ{%x\‹Ö¿Óï à¢v½JCUÒP•4T%MGñ[ôiDò;<ÑüÏÁ&°™6Ftoáy+ã¸åïóz‡ÊBU²P•,T% UÉBU²P•,T% UÉBU²P•,T% UÉBU²P•,T% UÉBU²P• T%UÉ@U2P• T%UÉ@U2¨©—RS/¥¦^JM½”šz5õjê¨Ê|ƒq¨Ê|T%|’¢ÊˆP£>© R.¥ÌH3RhDÍì§>¢ÕãÄr+Z®¥å>Z.$¦¬ÄÔ ®˜+ŠêêÆäªxrUbé3béX ýéĺ¾ÇZ®`3Y»LÖíÖëÖ#ux«¹X½‡uèÀ: cú±OàÅ}2›út›ZÄ\~ƒÿV,…RgÖÑ E=‰wQx·Ñôî=¼sáïÖâMÞ^4!W~ƒµ9Xû kIXk޵®XKÆÚ¬¥°‚?aé5tŠú˜^¿‡«Vz®b€Ù[`ÎÞFÆ7©xsÞÌ%rŸ b(½“íÆ,F0‹èYÌQê™4ÆXÁì5Àãoñö2½=@o?è½ÍËpḰ–™{UÜ©÷:µPþJ”ÿœØÊîâ sNMƒVG› QÿÓ¨~;¢Éâ‰[´üoZþWuËZ”Ñ¢€Ù´°Ñ"ÇhQJ‹ô•‰.®D·¢äð÷ Ê‹Å<Þ[ÔQZÒ2ªˆlC«ó´8dZË×~•â…cjÉîÓ…Éâ¤èÌ:t÷‚®à>РܺƒT‘(z€ž è úpþžûòÜôÀ@0 CÀP0 #ÀH0 Œ‚1`>ŒÀCDÄD0 Lƒ)œŸ¦ƒÙ <Éux¶Ttb_Й}Ao‘I$­;ű•£X쟊Ýâ‚ø€ç=<"6wJäÊZ"OÖÊ^õ´8%¾… TÆÊqRF‚ú¬zV¿Ïynš‚ɹ•´šO«—i•A«u´ŒW)xNë'ô:Ô1סÚ,Ú3C홡öôó)³ÑÞœöÌF{f#œÙhÏl´gás kàzÙŒMØéÏÔf&Ú3홉ö¬A?ì=¦×  XˆÀÂÓÌHCf¤¡9# Í5ˆ0­F`5Â\ƒ¬F0[ ÝçælÁò*,oÂòkXÞÁ8&c9ËXŽ`´ým ¬¯øÓkðÞMÖ`{õÌ¥å³Õ:Z­£ÕS´Zf¶­×àQZí2ÿb–N«µ´ZF«7|1#—m'¦K[ Œ³Â—R¸rŽ|ÃÙ¦Â_QŸTg1?Úü@§~åæU‰fR4ï'‹îúU ìݬ+ˆšWfÒ"•Y¿HÖO"&¡[IèVº•„n%¡[IèV­RÑ­Tt+ÝJE!RÑ­$t+ÝJB·’Э$t+ ÝJB·’Э$t+ ÝJB·’Э$t+‰8LB·’˜‹$t+‰™KbÕ/²ê™›‹¬ÁYÖà,kpÿϲgYõ‹¬úEVý"«~‘™¹ˆn%‰yÔ7k} m+ÆU´6ê­<óïÒvÎ^Ô}q»´eÚ²ËÔ¾“´°ñ "ˆëì\w€O .XÌ*ŒÝîFÃÝh¸ w£án4܆»Ñp7îFÃÝh¸ w£án4܆»Ñp7îFÃÝh¸ w£án4܆»Ñp7îFÃÝh¸ w£án4܆»Ñp7;óÙiÿÌNûgôÜÍ.»’]v%»ìJvÙ•ì²+ÙeW²Ë®d—]É.»’]v%ºÿ»ëËhÚoÌŠñ7¢r´ÞÖ»Ñz7ZïFëÝh½­w£õn´ÞÖ»Ù}þH½þ#»ÏÙ}þÈñgêñŸ©Çf‡ø3;ÄJvˆ•ì+Ù!V²C¬d‡Xɱ’b%;ÄJvˆ•ì+Ù!V²C¬d‡Xɱ’b%;ÄJvˆ•ì/³¼Ìð2;ÀËÔÔÔd͇D;ØÚÎ̘íَ̌·˜3ƒßM„A„o&Âá«ÈrcYÙWÑïd1Vg°q<ÀCd‰`˜ S8? L³AXŽd¢ß+É,[ÈH[É »¸j7Lÿ€ç=<B¶:(Ø-ˆmÔ+{¨WÞ‡Ç3àñsðø9j—mðx,< WCe†ÁPF0ÔÁP³¦Ê†2‚¡:OgÑþu°l³àQ<Š€GÄëd|ý5Ö=Äëw:Öƒ‰õ"Æk|“r˜Xßaƺ±¯8©?=£¿¥1þŠxýú—Ê2ýÍÉ z<¯Õ Øü~¦€¶hû>m*Ð5U€Þ»„%yôdQgñi7ìܪ—`æ0óe˜ù2+wý¬ÁM£ò|ÕìãmŠ9»{ûõÈØKjÖ¦Š`ØÂASbAþf€™jšœfƒ£ª”è/%úK‰þR¢Ñ¿‡èßÃ:'¨·éAFxœ:ᬿõ·±þ6=;éÙIÏNzvÒózÞBÏ[ôÕ¸úP«·rõ®ÞÀÕnrõV®ÞÊÕ[ÿp–Óóò+°°Æ bìÿáXÎ(–3Šåÿ†°³vVÀΠد[ã›Ò\²Æ8±Q­„±ò>ÑFv÷ÁÞÑ07¶&ÃÖDö*F%5ž}È^ÓÆd‰"u–Öá´îBëIæw¬}áyk®l͕ŽVê5ýø9Ã_eЍ¦°¬@+P!¢Ô:V¡‚U¨`*X… ¬FPeÕmÕvÑ$»Xóöàn"çФ¨}¢#ç:Î0± ¸t÷nà~Ф‚ 'èzƒ>àÐôýÁ0 ƒÁ0 ÃÁ0Œ£Áƒ` KdNeN‚ÇÀßðk˜ fñÞ÷÷å\Toª·Õ+ÿ ßDæ‹gÁ22Ê â~£zQl"ïl¦:ȦÏÿŠEËQÀr°,GËQÀr°\Æ©|¯2esд  h n‰¨Óí  ¸¤€Ž è z©uDp\AWÁDp\AWÁr&ø,0ÌUÛexÌOùàiÀÕ…`x,KÀ³ ,ËÀr°d2Ž•à9°Šlð<ø;x¼Dö {ɵì_wÉõ*W¾Þÿ¸þ-ð6x¼ ÞÁ&°™6Ùê´ÜÂóVµƒŠ`Á*‚ÿÆo"óågàsp|rÁ—%“Vu\ž§@9³5ò‡{­áX8œŠF¯ƒQ“š‚ âA‚ZÛ^ {ÙÉ^v²×\²×Z²×øa‡vøa‡vøa‡vøa‡(èz‚^ 7è}A?Ð Á 0 CÁ00Œ#Á(0<Æ€'Á<â÷)0< ÒÁ°,πŠn,%¦—«½dÙ-dØ-dØ dØ Ä¸·ãvbÜNŒÛ‰q;1n'~mįøµ¿6â×FüÚˆ_ñk#vmĮص»6b×FìÚˆ]±k#û.!û.!û.!ûÚɾv²¯ìk'ûÚɾv²¯ìk'ûÚɾkɾkɾkɾkɾȾȾˆU;±j'VíÄO>ñ“Oüä?ùÄO>ñ“Oüä?ùÄO>ñ“Oüä?ùÄO>ñ“Oüä?ùÄO>ñ“OFÏ%£ç’ÑsÉè¹dô½dô½dô½:Ïî¡V™ÆÎk:{Õ ^ë8·—È)#rʈœ2"§ŒÈ)#rʈœ22nc2nc2n9·œŒ[NƵq-dÜdÜdÜdÜB2®…Œk!¢\D”‹ˆrQ."ÊED¹ˆ(å"¢\D”‹ˆrQ."ÊED¹ˆ(å"¢\D”‹ˆrQ."ÊED¹ˆ(å"¢\D”‹ˆrQ."ÊED¹ˆ(å‚©ï§Æ½ŸLk!ÓZÈ´²ì 2ìy2ìy¢ÎEÔ9ˆ:Qç êDƒ¨su¢ÎAÔ9ˆ:ÖA†u}gˆ¾ "ÍE¤¹ˆ4‘æ"Ò\Dš‹Hs‘MD[ÑVF´•meD[ÑVF´•meèc WFÄ•qeD\WFÄ•qeD\Ù²1Ù²1Ù²1Ù²œlYN¶,'[–“-ËÉ–ådËr²e9ÙÒB¶´--dK ÙÒB¶´--dK ÙÒB¶´--dK è"]D ËüëJ}tÚø a’ùíZwrÊýdF ™ÑBf´-dF ™ÑBf´-dÃódÃódÃódÃóD³ƒhvÍ¢ÙA4;ˆfÑì šD³ƒhvÍ¢ÙA4;ˆfÑì šD³ƒhv dCÙÐA6t dCÙÐA6tíD{Ñ^A´W ›?cÏ+VÁ¾$›]Ì6±ƒÈ.v'/ˆ}ìXÖ°c9$²DÇ:qD|!Ö ’§ïH üm >|:$$äMq†šÄIMâ¤&±ÂŽbØQ ;ŠaG1ì(†Ű£vÄÃŽxØ‘;òaG>ìÈ9°ãì8;ŽÀŽã°#väÀì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Áì°Á޵°c ìH]RD]RD]RD]R[r`KlÉ¡.)‚1G¨KŠ`Í1Xs ÖØ`ÖXaÖXaÖXaÖXaÖXaM¬)€5ù°¦„úäyû,5J)5Êyj”óÔ(%Ô('a” FÙ`” FÙ`” FÙ`” FPŸ8©OœÔ'Nê'õ‰“úÄI}â¤>1î¤*†qÅ0®ÆøbW ãŠa\15ìX7YÞ/î•ÝQÍœƒ!`(†ƒ`&uî,0Ì%¿¦':ûòa_>ìˇ}ù°/öåÀ¾Ø—ûr`_ìË}9°/öåÀ¾Ø—ûl°Ïûl°oÌ3þ&|ÌkóúÀ¼Ô(EÔ(EÔ(EÔ(E01&æÀĘ˜s`bL̉9Ô(EÔ(nj”"Xy Vƒ•Ç`å1Xi…•VXi…•VXi…•VXi…•VXi…•VXi…•VXi…•VXi…•VXi…•VXY+ `e¬,€•°²VÀÊXY+K`e ¬,AƒÎ¢AgÑ ³Ô/¥Ô/¥Ô/Fí/ `V1̲Ã,;̲Ã,;̲Ã,;Ì¢BÓw€¶èJ;îBùÛƒ»©léȹN 39º ¸t÷nà~Ф‚ 'èzƒ>àÐôýÁ0 ƒÁ0 ÃÁ0Œ£Áƒ` /^„Y}`V˜å€Y˜å€Y˜• ³raV.ÌrÀ¬£0˳ `VÌrÀ,̲Á,̲Á,̲Á,̲Á,̲Á¬"˜U³N¬2Xå„Ue°É›l°É›°É›°É›°É›Š`‹¶Øa‹¶Øa‹¶Øa‹¶ØaËØ2¶t€-ñ°Å[ì°Å[ì°Å[ì°Å[ì°¥ li[šÀ–BØR[ aK!l)„-…°¥¶–\Ø’ [raK.lÉ…-¹°%¶ä–\Ø’ [raK.lqÀlqÀ–iæ=¯w™÷ì:Õ¶8`‹¶8`‹¶ä–\Ø’ [raK.lÉ•ÆÝЛó[°Å[ `Kl)€-°Å[l°Å[l°Å[l°Å[l°Å[l°Å[l°Å[l°Å[l°Å[l°¥¶Á–"ØR[Š`Kl)‚-E°¥ ¶”Á–2ØR[œ°Å [œ°¥ ¶”Á–2­mУ5èÑX3†}ñ³°Â+\°Â+¬°Â +òaE>¬0î»Ë‡VXaeÿYν9hÕ÷IÌsøüÚ¯ٻū5ä×5ä×5ä×5ä×5ä×5ä×5ä×5ìïžee]¬¬‹•u±².VÖÅʺXY+ëbe­¬¬••µ²²VVÖÊÊZYY++kee­¬¬••µ²²VöiåìÓÊÙ§•³O+7ïgèÄêöauÌ;²›³šVVÓÊjZYM+«ie5­¬¦•Õ´Þxw¨¾cÂʨ^ÛTkq@½%Šèݸ«z†…vXhgîã°0V$°—M%0ZêûTZ²_ZGÕ¼—}R6û¤Ã¢èÇ^)SÜ)š‰eâ^ö§Ùsœ–ñb´lZ€– ´­Ám —hÉÞa{‡uìÖ±/8̾à0û‚Ãì ³/Èd_ɾ SÎÍäãà l„âATàÁ9,ï§— z© vÒ¼Ÿf«ýT÷ÁâKqT§D¡(ÅB¿¯«šd1C¤‹ýëú÷áè¿[Ìx"`a@Qв õ7óðy‚ãn8úšÇh}LK‚_3^ßð­™ñ×·ø¼Ÿ?cÖŠñ{~¯Àïø½‚Ñ3úbF_ÌèQY>Ÿf3Û¯£¨A%‹…ó\—çhД×1 ă5+ ÙËZØËZ°¶kë±6^œƒçàÅ9x‘/ònàÅð"^wpt!Sw†yð"^䙼8/Αdc'ÙØI6v’dc'ÙØI6v’dc'Ù¸„l\B6¶‘¿§”a ȰdØ2llK¦Iá¹#è:ƒ` †€¡`F€ñj*37•™›ÊžÓžÓžÓžÓžÓžÓžÓžÓžÓÂì®gv×3»ë™ÝõÌîfw ³;»çàî9¸{»çàî9¸{îæÁÝ<¸›wóànÜ̓»yp7îæÁÝ<¸›w;GºÀËþ—ÄDàû¾ç’þNÞàUœh%çŠ#ô:VÎSåSô4_,O‹·eº¸S.„w‹Ä[r±È”Ï¢¼â5¹…\&ºÉå´Q\’/É7ðh²$ƒË—å`ùŠœ%×ÊiòUù< ”oˆUòMòî‘.ߦ}ïßÊ÷ðp#ù+[&ÊúÿœþÐ÷ôKß'D Y fÉ9±Æ·å ô«_¿i’æ7MÆ·!NÆÒöúoƒ˜ß·A{˜­ÃÌV³UÄlýB|}"³O •â,Ö¯ÖÏ…õsaû‡°ý(lO‡áîáWaøU~v{`·‡ZÐC-è¡ôP z¨=Ô‚jAµ ‡ZÐC-è¡ôP z¨=Ô‚jAµ ‡ZÐC-è¡ôP z¨=Ô‚jAµ ‡ZÐC-è¡ôP z¨=Ô‚jAµ ‡¬â!«xÈ*²ÊUê;™ÂK¦0îOú–šíkfå8±v–zÍC½æ¡^óP¯y¨×<Ôkê5ÙÄK&˜K&˜K&˜ ËÂò£°ü(,? ËÓay:,O‡ÅXìÅXìÅXìÅXìÅXìÅXì¡¶òP[y¨­<°Ù›=°Ù›=°Ù›=°Ù›=°Î ë¼°Î ë¼°Î ë¼°Î ëŒû¤¾¦~ùšúåë[|'ý×~+öŸúÅÖ_ûÙ­~cåûmG®h*òÄÖÇóp‡ß‹¿9vûÿúú°èãŸ×q]ÆôŽ7EàŒÀtóXƱ*(‘¬Cs»„¹MÿüÎæúµÿ«¿ƒùßø‹?¾/ýæðÿ?÷µ»´kÞYý-~¡Å¿hñO>ýçÜw]óS—ùé|ú­qÜŸ¸ÿÛ¸£´#URGª¤ŽŒ­#URG³JêH•ÔѼ§°#™­#u@cþ/ótÆšÆX¿d¬‘TH©:R!udö¦ÈíÌÅI8["Ö±nÉ¢+Vºb¥+Vºb¥«i¥+VºšVºb¥+V"±òßÌj],ÆÒ,¥c鸟Xꊥ®XꊥǰÔK±Xzë&wÖ¼°9~¤âG*~¤âG*~¤âG*~¤âG*~¤ŠL8°…}ã.bt7{ÌxÞÃó'<Ï'×kµÂ¢V*¥"ƒ1¤2†TÃ~¦âg*~¦âç,üì…Ÿ-DÜ wñv0ïâíPã.ÞxÑT¬xÒRl%—ìóñ¦ÞÌÇ›xoþ6¤5^ÜŽSñb*^$Wßìó¢ú_}_~–£°¥YýV&‚I`2xLáü40Ìi S$ááIO¾Õ7}«oõñ-+GMß&bi5–¶céM,íÅÒ#øVßêã[}Ö`(kÈ„áÁßõô³éaWæê%\½ž«Wqõ;\=š+ºqE4­Ó‰){ÊŒÿïÅ¿‰Þ‘4…ýÉ(¹2A¾)øuó›uݱÌo½y|¬R¿¯9þ©£/Lç#vî4£ïžÿA¥7þ¾“Œ¥d,%cé–’MKÉXJÆR–’±”LôE3ÇMˆ<‰¥"ó×(ïa©/‘W KÉXJÆR2‘÷Ñ‘Wów0¯²º ¬n‚¹º âw0 Œoª¹ºÍ{?Åò:,¯eŒï_÷;˜¬ý¹_N¿ §u8¾…sEú-~núŽoá¦oáø~“߇ûû÷üþù»|]ú.ßßÞ÷ûǪäGlï—§ H‰ïî„ õS‚vï&Ú[‹ë?<Ê¥*Ùm uIyô¹oÔOúÙ8÷“ú‘ã{å¤ÍWêÅñÞíkyÓË» ­×|…ñï/³ÛõùÖ|þæwüÚŽƒŸµô¥Î©³×úä—³?©ô~Jû¸¿¾ÅÆê¼:Ɉ.ÜįoôXM«ê‚ÑÆ9{Gß™¯u¿·Y¾JÕeýø™ù>›ý¹PUÊ£6óÅë·ÔJõžÚ§&b'ƒºA¨Ij–šÎ³MgÆUvíÛœ/ßœÿdŒ˜gc–ÁïÔÏê*«`ô|õwürk¿\Æ£¯ýø=WeúU¾3^éçï}VoÚ×·zLnu麾®ê™ûÿ‰”óDÍiú®Àê¿®í }e«×y4î©÷½?¡Žð¸^íPŸüÑ\ëëlú9ËcÔV5BÝ­ö2C)j´Z€Ÿå¼zP=mŒíûúB­ÔÏ›ÍØÿùÚœ3’šñõ³Ž¯“Œ´PÇ×_ÌL8Åh/Öè³Ô˜“ßX:SýÊ¡¿c.ßäñªW=›ƒÕ45캫>Ä–U!+{FGÂαy†U<ÄõWÍuð˜1c¬Eu4®ÕÏçŒõ&J y.2¹}-¦l†ŒÒù×´ªb¤'T1½õ;³7_çñ5cú}#Íí1Ì~Qj䋯*MµQ¨jσ±°L=¯–óÉL°²F_UšÕîk3鳫ϹŒ¹ç¹„)c]>Ã7ÏÖÿÌogš–ÈÏB}®Ö™ï§êµ=ÅÚ5Ï`[MW/1Çùt=g0š…<ïS¯p¥£º¯ÃÆkâd»ù~¥žG;,fý¿çõ2"7†ÏÀÃEj söŽZªÞå“|µ«ªûÚ¨ÞÒ=1ß®ýz[m£NÕãõúøzcæÁ‡âï2UkÔ ó}œŽáW8ÓÐ<Ó^=¬â™û»ÔjÖaŠž‡:ç,UãÇÑ꾎k? ÕÓïu<eÊwÆ­ãë¦l6ÿù 44ŽWñâN*›Dù„¸³m8ˆbqß0÷ÖÕ¡´®Mu êpR „óy•r0û¤(ZÕ×um@åJmÕ„óMÙÖ¦ Šåª8Ž0,Ås¦9GöF-é#vªx»×DŽzâvŽH¼h£wðwÐë ðï”õNÙ–Š±ì dŠì,‚dÙCS•ö!²·ì-jÉ>ò*ûÊœ(GˆÚr¤-êÈ%õŽ/b1Q>Buñ¨œA9SRUË9Œ¿cküÿ‡úWžìPŒñ×Ñ£­£ÇÙŒQÅòÚ‰o ­ôBõüôüõjë1Ô×cz ÍÙë&q}²Þ‘Ü%Ú3Ú»Eæ¥#ûËz¢3ut0öÇâÇ8*¤Hj²‡±6EïN¦r4¢3þªòˆxŒ×3¹jG#ª3c×2‡#L<ÎÑHâh!ös´s´‡8ô½– ú¾„ ý=G”þn#J·,Éãdk/o£¶6V·Ž^Ñ:²kÙL¯œoÍZé5 Õkæ§×¬¶^3©×¬¹œ+ÓÙM/+D=™)Ÿ£çUò"æE*ÉX™åÛ›’ëå?Øl›h¹YnáõVù>uìy˜xÊ‘yøsTæÇe”Vê¸]Ç5~u:.pJÐǬjüï0ëOþóé8uQ‘º“œy†CŸÊ«ÈœT´ä*œ¶l¾E¾J Ÿ~ܬŸ¿–Ô½5ÚQ%ݬòøõ¶®Ô8oÔgW®½¾…íïµb|EnmHúŽ\z•Zbž pAµÖ*wƒºÝô_wýØ\?Þ⮳³–j¡J¡Œû&þ«Ç ;ê°~½Ãüä[5°F»Ë¿ÍÏúüO¿¾ª9?èÞ¾JÎWcÜ꟩z/êÇ|ýxØüä;5¡F»bÔYWFµxÍ'õ ^»‰€Dfñ;õ*óW®ž ¦5Ö½TEÿþ¼'ß³Ng¨rêÚb µÜIæÿ¼ú€÷.4ݨ ŠP¹K×]÷ËozzAÏŸÏ¿SzÆ­<^«ª×õJ{X‡½×÷`V0ÉXúÖÐ7­Uß3¢«¬ÿ]Õ­~¼ÎÚoæÓ¬éŽëÇO®õ¬FýžÏ5{V}uü¾§_¯Õ3û³0Øls‹ø­ÑË+:šíÕ㋘öª1æ§ ÔvzsrR‡„±Ïr_ÏHµFGŒ¯N4ªµmzOð ùéj*ï}ôžáG_-¯?óÕ‘«õcɯ³ ×£fü¡ù¾õ¹I}§þ¡Wçœ~}ZW(ðD=l~ú»šr§úXíF½XY½‹òÍ_šönŸ~½Õüäµùóy#nܹø¡I¨—@½F¢.¢O­P¦¹èÚÑ Eº_ ǃø‚cðÌPÑ ­¢µ´Š£¡SùÔPÎZ(ç {&Gꉒhå ×ÊY[+gc˜O?OsŠt±€rÔ‹8Å3hi°ÖÒðßhi=­¥‘ZKU¼F<³PGÏB8³ð j¶Õ @µVšª¤U+X«VˆÖ«ZZ¯Bµ^ÕÖz†^í¤Ÿ]¨V°V­ðª©U+JÏi-æo€YÏõ\ˆ®gü9_‡g£ªñ×5\ˆ®m kMxmT8þäàf¼6ê]±…èj'TW;õjTlaºÚ©¥«p]íéj'BW;qzêUl¬W±¡®s¤^¡X]Ã4Ô³ßXÏ{3]«H]«Dê™­£gÖOÏl}=³Á¾™ÕUJS]¥ÔÖUJ€®Rjë*%@W)Mt•ÒH×'te£+“]™굑º‘2VÆ^WøSkvàŒQ“øSeöäµQ™4 ÊÀk£>ñוeˆ®RBu•R¯FeYKW)AºJ‰ÐUJœ®Rê*EêõŽÕ«ÛP¯k3]‡DêµôÓkì[KFþkkÑCWã7Ëaedü¯¯í…Ðÿ¿÷fƱƒu3jÚ^‡èÊÞ¸ %HãÚ?¿ÿ/Àø¿¾åeÃ#¿¿¾ôÓâ¯ùSÿØñ¡69êSÕÕŠEÑÊyðoéû]½ûžŽ®¶V ì/ª=Æþñ/õáSø¥æ»™úqß^åÓ…/Ìw;Sÿ á8UÁ$aüê•Ëêÿ>ÎëÇwÌw>%0wã¦z~nì6«ØrQYžbo;X×d‹„ñ÷›¿òwH ÇwÃ)ƒQû¨÷÷Sç‚E‡üÈ÷¤fÔmü4ëüõÞ @sÏ_ï 45}{Úö×ùÉ_ÇfÎOáæaä¤Z:…ë<äÛ-Ö×¹§Î= uî©£sO#{êꬬ³N€Î:ÕÚ1†ldd©÷RÍ´4Ó(@é<¨U ™ÎF~:Åél£³}ÓÙ¾^S´ÎIaÛfr¥‘™BtfŠ×³¥3S¼ž£(=GazŽjë9ŠÕsT[ÏQ¬ž£P=G¡zŽ"u–ŠÓY*NkGS¥üu– ×ù)\g¦Z:'ùv¸õuj óP‡êê ¬3P€Î@q:I­2Ít Ð*ÓLg#?b´¾4Õ9©žÎIÑ:'5ÖúÒŒš 3Ó<±×Q6¤¨ÏØ=Pc•î­êkÉ Œ£=/{ÈÆ²‰|Nï㌌eœ—z7,õþRŸóøœÎ¬F+©ÿA#¶ÂtlÕÒ ¨#,H+`Í¿bêh Ð ¨c.H+` Ž¼ ­€:þüuüIµuüùéø Õñ¢µ/XÇA]õtÔÑq®ã ŽŽƒp‘z#´úêu Óë¤Õ'°úo=ym¬q€VŸ@½ÒAZ}õzûëõ–z½ýôz‡hÝ þb3Š2xœe’9hTQFÏ7 A‚ c ""©Rˆ8·ãhbÔ8.f Š A,FŒظ4™™,¨XY‰àãÄÂB¬¬¬¬¬¬¬¬lâ÷þLgqÏáÞûîòý÷! ™mœ&Õ×?˜¥e¬T>KgaÅye‡ÖÐZIÕ-ÆÎœ*Ñì ®õ÷žu .öR4Ñ»Šeuw¹uº-©Ïfh MŽíîçbu¥6¸×¬¬ç Ç)ú¦çç·¨q—ûñ…o|ç?ùÅoþð×4i©ZÕ®ÕZ§.uk£zÔ«Œ†”eKœÖíì‰G8Îû”ĆÃE-—ȆG}‡ÄeŽ&V#{ú®°/\e \c0<ÉþðCái6;W7=fš­æýfž=fCfQmf‰Ãæ¨Vš'9f–Õižç)Ÿ¿ÃœÐu³ÂN³J¯Yc—9IŸ9ÅnsZíæŒnš·ÕaÞÑ*ó+|MÞ'ó_…+ÌpÏ5~Àcæ\å|æ+— ì ¯Â9®„ó\ ô:±ïô&\Ñ|¸ª÷ášÞ†'õ.<¥ái.$µÐ“xýñ¨ÅŨťÈü4r>Krj6É©IN=Orj.É©—IÎøÛüBÿÃrpÉxœc`aYÈÜÂÀÊÀÀZÁ*ÂÀÀ(¡™w1$1™300ñ³211±031/``XÀ àÅ%•> ~³°yý«bœÀ¾q•Ãd /ëF ¥ÀÀ |X²xœÕ”‡sTUÅç%„^J /%B'€‚ŠŠŠ‚¤„"½w[èQÄFDD (½X@ÅFï!”PE#›Ua”AÄo_3ŒøxgνûfÎÞ·ßù _N-DhU¶“üs„S$ô…Èr“¥ªÿõä†å°œC|ÃI$“ƒlc;{ø‘•|Á>á² ò-ß©ß+‚Ó¬f¿sŽ?XÁa»S)LQŠS‚’”&’²”£<¨D.ÑxT'†ZÔ¦±ÔeŸ³“ýlæyêOÑŒºÒô¦ÂÀ@3ŒŒdãHb2S˜Ê4¦³‹OÙÍ{ËêªI!Õ×ýê¨>£Ñ«ñ§‰4T’žÓ$MÑËJÑl½¢zK •¦ÅZ¤ÍÚ¤-ÚÊ^Žó_³-d0†£á3IåW Ø?þ7çù“¿ÈÒa.q•+,ã23HV&õ¢’*Ärl ¿é³µ8rUM¨ÂP6²‰÷ù€wy_ÈU††ó_ò!ÑšN¤³•u¬W)yŠW´*N­t›Z«¥ÔCŠT{õRoõ¤³˜Ã\æ±ù¤±€E¤ØÃÃ;ÛžHÿ¾Ø’ì­SJN3g“îœ ›¶*lcØÖ¨n)·¬[Ñr=7Æw[¸mÝ¥î wmtd´ç9^„WÒ+ã•ó*zQ^¬×Îëíõ¯±ïbîåðÜ\?/Åmž‹Í½¼¹ï6÷“×Ý'›{¤[ޭ캾{‚¹/¹‰{is¯pݽŸï޹KeB?;®Ö+¦9—rÎæœàšk &ƒ¶/3%c³Ïd¯®ÎN\4ƒ@Ž)+X˜ YM •å#]dŸLÓ±<åpÂp‚Îiç¬>›§r2Où§#þžù¿'ȱü†ò‚,>3 R-ÿ3,ÇìöG-mÇ9¯Â*ÂLˆe5MEIfƒŠ©¸JXú]ˬgiqXÀHŒ3ã-Å-óRL#³½ŸäêÈÏTVO?Ï}4”ª„xcÄŽ3f'O Ø$ŸÙ£Ö˜¥©Q»Ð¸M3fACŒÛÍ!r•Îp£©†&(FONœjé)ÕÖÓŠÕ³ª£gT—ƪG5ÒT5Ö45ÑtŠÑ\ 4YMõ‚ši¦šëE%h–ZÐR$êvÍQ¥ê½JzêNzé.½¤¶š«»õ}uæé^½Î­ôWËì-ôS;Í×zS÷é ¡"ƒô°ÞÖ#zGjI¨ô˜–ª³–«“–éq­P­¤£ÕU«ÔCkÕñÔ`‚žP”új›úi»úk‡h'5™¤ÁÚ£aÚ§á¡ý¥ƒ©ŒU7­V¢ÖX7¤ä7Ã"k‰¨]¤Ý¢½~…úgßDë­óÒ­í¬$9Öi?Ù\sü–kåwR;¬Qþ#$x(xœc`@ŒŒ Ïž±nd``½ÉÂËÀðo;û¶a¬ÿ`½Ï"ûÿÿ٠># xœ­VùsÓF–|$NBŽ’ƒõX±qšÚ+“RL’eÜùZ J+ÅNzÐ2Ãßà¿æÉ´3ô7þ´~oe›@’v†i&£÷íî§}÷“ÉP‚Œ½À…h=3f¶Z4¶s/ Ë­†Ñ¡èí”)ÆŒ‚ÑéÈ}˶ÉÉðd½o˜†¹™ŠDtèPF‰® çmÊ­Ü믚“žßñ·ï¶´­^ ¨ÝlÚ -AUFÕ0IJŠ»´Š­ÁJП¯1óy;0¢ šlvŸM2Zg´YQ†™å0”d´ƒƒ0t(«îÉc”÷Úå¥KcÒ…ù!™‘C9%a—è&ù}WðIªœŸ8÷;”-ÙØ÷DOôpw²–/­­ j[ñvȧ›;Ž,vj Ù¡¼¢q¯Ü72ihư”®Dˆ¥SfÿÌî§|É¡q%ØÈ)¯ó,gì ¾6£)Q]YPýñ)ÃóÝ’= ö„z9ø“é-f&xð8~OÆœ)Ãâh’°`äÐJÊe\OULò:-ã-ÃzáÚÑ—Î(íPj2ë¶%í°d;4­’LƧn\whF(ñîòëÒ išWÛXMcåÐ,®™Ó!ˆ@ziÆ‹D/4ƒ 94§Z»A’ëÖÃeš>OzCµ¶‚ÖNºiÙØŸ×ûgUbÌz{A2;‹üÅ.Í–¹HQºnr†Óx¹„Ld‹í áàÁ[·‡ô²Ú’-ñÚ[é9¿‚Úçž4a»/§ê”&†1/-Œ¾iš:WóÊHŒŒ¿Ьt…OS(ÊI(Ž\Aý_ss¦1c¸n/JÎŽ•éqÙº€0-À·ù²C‹*1Y.!Î,Ï©$ËòM•äX¾¥’<Ëó*ci©dœåÛ*)°|G%,?P¢Bæ‡JÀ6sì0‡Á.sì1ç6ÀÌað%sÌa2ǸÇ÷™Ãà+æ0xÀœÀ×Ìað sDÌa3ÇØgƒst™Ãà@ÑõQ˜yA@ßjt è;]OXlbñ½¢#ö¼Ðì5böO1õgEµõ^hê¯1õ7˜úPÑÍõ/4õw˜ú‡FL}¬žNä2ÃOn™ ”]n?~Sœ+qMcÿÿxœ½}|SG¶·æÞ+ÉE–¬.«Y]¶¬b[Vqï wƒ 6Å`›Þ{¯¡w!@IH ÙÒ l éeI6ÙeÙ$›¶Ù$›ÍæeÙ”}Y°¯¿™¹W²dd ï}¿’þ9gæœ3gfÎÌåPœ4‡Êçžá˜9N>§˜Ã‘o¶žË„”˜í¶"Ê—c3›„„Ùd—æì—ð£›F±1ÿ~UttzãÒV‡ÚîqšxšKŠ8©-+C§¶©I†¬HåÌ3ó2RR2òŒæ<§Š~œ’^û¯rnß>j]fqZÍ”¢@sžÃb’wγ2Ì6gª25!)ÁÒ?˜ëP©¹Fö•Úrµ©™+æp8ÄÀW÷!È{GÁáÒ ìBÀ'Ñdó#_ÌF`$÷l™Ä"“h3ù}Õé¶÷_nOÌuÐ?9 “p=¶©…ÓlôÂþ?¤hZë‚!í/èÜç1mw$mHÔdóåøæÈJ€Yä2…75’Ü®ò‚yZþ̹uI×UœSah4"‰ Ž+çG²Ñ÷°\7ªÔ;Åâæj“¼Ø¦Ò$)…|}Êáð!_\Ž ñ…™’åF)ì2Ę<Ð$M—½B´¥ƒwé&!í»˜Fëæ&—º8ÄEj"á"vLvÒ%Ä:úÑ«ô ºTÓ†tîºCrvAø Ö!ठé­P,_Ž›È)„—}oö±jÀ׬ F1ñvy[¦DîUòíé`g±Qí™{ç¥Í…Ú‚ e{·×ì¸x[ÿù“¿ÂœVàò)úPrZ•/§Ò!éNÙß™5¶©Le>ºjü½KËQNÁÀ®Šû"'~ÈV¤e6Yl°~‰ß‚ÔP‰Ä5¹)¤r™3(T@•¸y ýßO¤ÿs¶{Æ ¸¯ußšé õ‰Á’‘ßäµcr púÎfw}Yž^ Ëãw-\<ÿu ¾÷~ ~mŽÎ[ã8,T&+5Ê䢥ÍwjiQ¼Ü¬IHB|ÑG1_ °m„@ô$c6{Àï͆jà‡&c¸‰›² K+¹ª†¿Ü‘YlM¶Wõ¤WÚ´<­H+W+ kÝû¾¼gÔ­ð/‚¿?ya’U`°»SÌÁ4y\Üx _’¨%n­zpXñ‰dS¨ï¹gÙ¾çÛjV6¥p± ˆçܳ³_£ÿþpÿ[„ÿa |m6}28ýФÏ?ŸthzøàýÓ˳¹gæ¼Dÿt÷¬SËKûF—.?…lªÒ¾i'sŒ¨GCZ%öÊÍb)ªBÎÖgöyÅÄûˆ¦ûãŸãjØJOž¤8Ѧ·Ÿ£7Qáºp½ÄÚsÄ ØGH‡°.(Cû3À_À[´Ÿhïk'¦cu«=ú }ÿÆ<ø›HÃN´þHRã" ÙÍÚ{ÿÔòaóËZv¸>–Çx\ß ‡ßÓBúQL`}­ÿM¦8,Ÿ í¡ê~0Š3C­Áî&5Ìð!vÉZ‚7¯û“ô¿_0ëM ;=öÞ-óœÝÊZGZõˆVoס™y%‹ïï)ìñì5Xˆú·Ž´}Nãs²ZeV$”¯xdú¢'W•ªd›Ô,ÿÜMá6ÿán¢í?M? FÍÈ}ÓúqÏôßCL‚ú•Äáðtð7$G‰¤­fög°Ó}ð3üë¥ ûOÍà³ ¡;)0j'•pþüµ#zTÂó‡ÿq8,à/˜&'2þ‚êF.~7>AÄÃ÷øaÐyéY%ÏvÜL0*'¢?é?…»,Ò Cº²+Ä×.yBÆí{H±‘øºpåù5¹s $¤Å(Î6gÕfª@+‘¼â=Má«jC¢¦xJùP¨O@ZzøÁ›Žð gU±ÌMØdàŸú댠FWž lt³Ûg+Ò€sôïuézEý´$ài¥OV²Ç>²éúŸ(Ô³ÓúÇÅ%+EàšVÙ€mXy !l¿‘×úöƒV ¤¿çž¹6—! nêa1S†ïË)F¯V¹ŒÏSú¾ý§,ÛáOÓèÑïÀÚ¾°‹ª‰Gû¿9ñ‰GtˆÞ00ÀÖógމ#‡4IÎ0>T?ïH;¡ ;vM€Màh‚8Bê ß+‹l ÒËû¾LRôjI¡Ým/”Ыá‡Ë€PÕò°TfC¾À’¹Ú¨@EVé*ÌýR³Åb&þi©Ôõçž!äÒ~­5Ã9ÊF|&•'aÝÁ:Œ´Qld[lxTçÓvŸº¿…"éË+ô*…xŠK¡zRRÉÉöZC”…J”‰ûîp4™I’¥ ÇD×è²á£!Ø4 AŒz2ºÏÖy335¦³"^˜,–•ô·_WïÞ±Gæsĉ·™$E6·È®lÈ_ðÌÐ_H9ö/ð¬K‘xfìb¦¿~}öì×êáïÞ}yãÃÝ!åÍ ÞÉÇŽä7,|ƒ¾r÷¬Ç–•”,{lÖìÇ–/{Œƒû¡6 $><šÄ'3ˆû ÅÞ:!+kÂÖÑÛº²²º¶‘v¬YÓþñ¦#מèî~âÚ‘M·“èò•§gÏ:½²¾Îš _É_³cÅÆöÁVmo<=?“© .%à1ExØ”²!ö Äž‹ô…ãKNŸ>òñžŠª½ßyš>E|µf~vk¾Î&~[»îÔäÙOo¨¥S ¥HššØ4‘ ò˜T¡Ù_»ž.EÿéæÑÿä-Áí‡F)V¯BmÈ„æ¼!ZˆÃHp ¤\!e£>§¿)èJª­{ˆ>ÚkgV°Z6›Õºk§:~+¢‘;´!{0®ñay-¡ØÃå…ÙÉ{ i•ÄÔÊIEÚÕ·m%Ûéwڬߋöv(ù‡Ç>ÝUb1»Ú˜§$)sÑÈ̉³èæ¾¾ð &¥ÿKî™ÒEÇÆW¯™9J#öš±Ð\¡Ô°= Ùñ5êO~½S€ÍÄó±S,%ù¸È¨VúÊÛ]ãƒc¯X<6XµéåU»6.ÊèØ•ú†ފqS‚c­X46X¶îü²òµ zìÔ6•JgѵT:ó挼Îõc'Ÿ¾­Në­Ê8&VÚܶ–1Þ‡Áž;~ǔ֣+jå鹆Oú(æsL,>‡qÉ_À¾ê¡I‰1ã\WÑʽö_$UÇݫۓ1ÂݼԬRÛ „hœ¥€ºÀÇqþC=@O¡OÑS¨ÍÔ‰kc©@Ë¡¹ñ)XNCú˜2à¤/Á_\‚¯øWô™sýÝıs  èq›®„ñZü} «qðwl§ã·z‚ø¯Š™#ì uôñ¯÷íÿúx;hµ˜ ]£–7]}÷mþìxgçñÏ6Ã÷Ç–·¹0M8/E43Ùq…;@u“lL8Âí*_sv¡%kH šW¼ñûq­ˆû¿utºT^`‰Y3'8â»ï½¼6ÄÃçÂsp ‡'ÂFè Ã7ðrê?G¿Ó$à 0PËlb¡YTpŒÆOî™tCßÞåee˽¤@‹å£8œ¸F&6@ô˜y7KX ûÒ"Ð'ª-WÄ5ÒEô;ôwqB>_Gß­3?þxI¼Dým|¾£ÖÿZ'Ž#®WNn‘{µZ¯¼o9÷Lš±?ÎÒæžPKü¬…óîþ¸ªW“™øYçÄ2þeœ yÒ¢ø…<>jic<#.Ë…pçÒ§Œéú£¸úŠÐ¦§_¤ A«1ULܤ8Z˜ªtÀ ´"™N“™ˆïˆ—Å2Zž¤JO*dýu¸¾/a}£ÙØÍB²ãŠH¯‘¬¦Oõ_§WüŽÚ‘,²«/òe³8 GÿeÂAnI7Ðc²G;2Fg‡’Ôò$&&üSY}–A:£à éÿA±¨ñôµ£ÉBâTÿ,ªŒPg+úä)H¬|~ÜÙÈ}ŠñºˆÄJ}A ¼xd³ëÆ/«Òä :Ýý¿ŸðåmîœÜít‰öš†Ñ¾êE-Îàöo¯« Tr,§œ¤T·Æ—áðSG¶Î>ØÑ¿LeË5f›ÅÖš™UÁ…]%”sl‹J¯2(0opþ–€y«‹âmÐ*À0¼yÂüGÍ Û±ÁT5ÀÀ}8~ƒ¬Ló‡ú_D¦”3ÕÅUëb‹0)$ãàDƒäX` —eJ‚ÚÐ1(4j+Ty|%RaóDzÙÑ‹-¦„Èd xÄFfaÂK|v/íŒÅä‡ÿNâ=›A[± ­uF‘ qJš®c8-[ÿšE/n¯wÛŸ=÷ ˆ'䩜{εK™9G`/ÀÄÇ‘ Á1‘o·ù‹Ñàè&༠¥‚Ç'¼®‘%i¤nf›dPgÖƒÖÙ««¡nÍ>¿µ¡|å£Ó:wåÊÚë/åI’õnYšBf”$Zf¼Ôv¶H,R_26m™R<ÒcÚÒÞrùÉTúÏ/üû}#Ë×[Üyb͈€·uMöª‰:o²’—Äã)MýTqIÖÌnu¶²¡ÔÛ8Êc¡îɹ‰@|'ͨÊÓ›L‰ÖMö7z±ÑÌ]¼Ø7û…=#«7=·tê#++yÉ9q›µ*`šÜõä†Åjƒš±›êïnŸv~gkÉŠ3ó°ç€<À1€á¡b†˜nʘ©¡eTº9sH•_Üý«Õ57âÕž›–—˜#cª.G|óÎW×E±m¨Ä§Ü·9Ç+¶»A!ðZ9^ÜÓ´^út…9cîÌ#ݶ3•ê¤ô’ Å'ƒÂ8“qE~ÇÞ)Áø„;“åI¼äôroÖ¥¾7qÁñ„Ÿ ç㹜°º˜Ž”Q>¦c0®÷Š£}1µ/¦›57Ø|my©?ü~ 1Z¸ñw·çŒ0£Bí¢É{®w¿¡ÄR2Ú› 9f…ÉÇ—¸ø8p­4Òu‡ä‰ÊÓÂéŽÕb¬(ÆþoAfsEKÕÚD–µ5˜5(ÕÍÚ€>Æ-"9#š½cß”@|ü†”ä´r¯>,iÿk7n˜@ß›Ñã«;Ü¥ÜÏÐ:ËÍZbˆ6!‘cËCÅ8±Ùo¢u,« o|>ì'ß Ö¡¥?j]øæÆ:Gí Þ6¶í¶N7ýVU…·ÃA¿5¬~‘gæçOß×Ö?›¸½¾-ÃÚ¯½‘B1q1ä-þÈ[ý­hPX†g¸¶D\º&/¨»4Ã5-ÃrB\ÿ7”/€Íœ‘‰ZÄý<¶ É4ÈkéÖ?ìwM4'nõ´­M©Ä~ÈÉ)–Ñ„WÖÄÇß’l(žPȽöSË`&vŽýC„#û‘T´ö(·ž6;”Míó*4 }¶ïvýû¹×ñ“TÙDš‹Ææ±DVŽìêöî%8wϺ^"¹Œ;"T/Þ9Šð²Ԉet¶­jÑg©Ìr‰•oÁ;“¸fI¼‘Erù`aëÞrÛØ,^ÜýJqaí †#jo¢x$O'ä ‘ƒâ ^WºÄ1söuØ#Uãxá^ˆ£µ×ªÐÁààÇòÜP"òUµ+ßdÈÍP«3r ¦|—ÌIq曌yΔgžÑ”ïL¹v[L¹"C0]¥JBûïC>_}wx‰YùÄX¾ƒò±¸ã#ä†:ÇßG NÜÐ ·­fV­×m²•×^C¯Ûrc #¤³Hn¢Eḛ̀mÅú8ÁÍdÆ ‡÷1MsxGÓZ£üäñ3´9XñMGM)6©¿ÜË Œý^ÇpÅöK{ýMþÔ¸£ÈÞÇ&6L)TûCl 7rÀqBEýw©r\Jv]VŵŸX=ãÿûËÄiÆvµƒú[HÿøïFà{©¯BvÇ¿ÈÚÝ„[±»[kƒHíàM¸¹5Ò 7l¢(½º½aã ÑÆpû<Ñ>Pû°øóøAÜnØnù5Pw›¡îÞÀ’Jb˜÷Ä_¤å1->Ê./qü·®ó‘ý}uêË_ØQ¨ÝïÂchæ ýeä3³<Œ›lÛ1ÕV •ÅâÖ$ë ¨]?)¿\&Ð{ÓÈ\v<§ïÂþ«æêÀõÜý‚®¿Žå[ïô륈ös0¦+* ŠàЪ 4j—ÝÞŠ åžÅ–8jsëô¢W$n ÐÒ?Úr”61°Ð_›³Å¯õú‚ˆ .*ÈLˆë{F,–”zO¶AìÒÒÉJ˜›"¢}‘[8ýäyÔ­Å¡·*Å-¤·*ÚÍãÓ[’vÐ.©­Ü÷8©×É›’Ñ9³ZÔýXÇ)ú¢·KßÕóÙe–KÓ‹«Wó©Ÿîô^Ë¥°Í íÀ­è²7º¶á ±Ðy¼åM–…5æ*D²Ž›®p çø…x‡užŠTvÑÓ=±“5cÀÚFNÝn6Š•bQ‚P?¡à`óˆù iôžhž²]E‚àqÈ@¿ÅÃç' ow¹2šæUª°«ºnÅ` “~ï‡å7®òÛö‹æ•!VÉxËSÎÿô@l¡‡ë‚Ø"ÅlÖoôÇ *•“µ²“Ý·1[¡ZN²£ñŽY.„oðÒRhq6j[—ÙnÀ‹ðÌ‹7[ ~l¨¬lh¨[@Q6ã¶RUšÙRTÞî•l¹ŽÇ/dŒÊ­0Ç-^±x\°bã…U+Î,6רœÉæKË„²…GÇnœB6×Ôë³­òþ»­F!É“e¥*,jQ²ÖN9æ«õqb…;¥qÕhIÉÕje<™¨Î›TWµyZ±»®ÛW9N«¬ÉUz³œâŒ]Wödã\)hrî(['ÖÝî:qþóí`œ1°Ñâ[aœÑ‰ÛuçÀl¼·õ9ÄÑÚ°.vl˜ÜŽ ­kófàG©FäN\c{ÿbVÃ(Xc9Õ{î¹çÎ1k0~¯q?eóËÐlphÄw²Ý®Ï©´Ûʼ:·Ìf¯ÌÑSI×~ ’€ÖZš¥Óe•ZmeÙZmvC³Š>¾‚4™œ†!K˜Òá³Ê@\d°Ê³l–Ùdà×FD<Ôâˆ*mˆ…«ÝlƒøŠÊ;›ÿyJB¹nXºé·Þ9ÄÀiú.,;“ïsݰ|}K {c6Ë¿¢YJ³ÀÀX7âK³æ‡Ôƒ.«SYݸáèî¹É€Åý`øqèNH×§B~ÿ÷uQIô¦aꢒp]f¨DÇ`½hßU.EæÓÇÒÖÓoµ,~J{Ê#Ê駈…O¤‰²‰)4ãuÈ'éAûx–äìáüâŸWMé8éÈ'£Ð»ˆÄÛ{dؤÓ4­éò$bu°´Î5a¥A]7zRfÇöñ™ô[Û‹G" sÔ©¤œ Æe”NjɬpÈ|“olú¾‘襣Æfè—„|¬å4DT²_¤õ[ô%µmÙQ¶ +'ïÖf¥Ñ®VÉähÞ…iº#hÆÈŽ®wïÑëÒæbÖ{jì]óJ"çˆ%|hÏ ù¢ˆD,#‹‘Ž ¢zÉK;v¼´déË»v½¼tæäÉ3çL™BéFluåª×wÔÕíx}ÕÊW·è{æÎ#GË(˜¹\¸Èœ³BHt—U!¶Ì–ˆ3²¼ªy÷•V!V6Íp‰4z»æÐ_d÷iÌ?&½÷H}>AV>¿ñ•¥J1Êâÿ\ya)âÎS¤ÐKâ_PvÞ¯ ì‰ZɱFH †ßh‘ûR‘[˾Þqãz'O¾±¯ì­Í…Üt³¯Ü=¶ÅP|aÕÔ3FÔm|zæ’óEùO® í$_µ8ÞFoûÈåîöq¡îÕ³­‘Ýgý òuU=—‘‡g_Oã™H)^¬H/´K™éUÅL+Žª“¥Î*ïµWØÉcê+ÍÍCc>Ït`_‚bs.>BS˜1*Ó‰ÂXœdfz’ª_nè:sAky°.òÎqXÄæ×w‡rNIäz©×ÍþsÝ„¬˜ýË"ù@9Œ×O¶–çÖ†¹Y{îbôDê/¬üÜ»˜¹-šáŘúEòs3’\@Ï»¾î0w{cízǘنf+úM-}‚+ÀñrËÍâú›È÷Sîáâ~ºqxÁˆ¯oqN0Œ°Æ>(lÄBhX©· .X™³17„z N§oÈäjgûÿ>X_I,ý¿ñŠÀLcí Vn¿‘Ñ 3å2%¶âŠ Þvâ»èų›s{}C¢ô;.½C¿é+:CÄi>ש˱:dø.´ˆ ða˜ó?°[l†hóà=7Ä<γæ1îþŸ´ q)ån87n©HsÂ:Â]6¼-Ýbƒð; ü…ýåR ýî‚~N»ñ¼-2ÉåØS‰0Ê•„gUÀçPhq!§@çS¨{Û‰ýýsК ñWL“>çЙQgëbÑ6FåÕRSÙOŒQ!ðÇ:‹…¨•´Ñç¦yñ`.z8eÜKåÑsè¯è9€0ºâöJu‰ ûÁ8'»ð“ª¤=¹³ü³æ‚÷”©Øpáüº‹¢Ÿx$š=tœ—p"m8!›ÍFôòºèçûdÚ¸‰b>_œxPä½B?ÿ3¬uZ’,.I'Þ«Ý?®éq á– •–àš+Ìæ*sÃ’¯';»Ç~¯L…ŸÜ¾^ov¯sþÊZˬ_àœt||0)òQÌœ#¤jéËÆtÁ3\aÜ!”“þ-G?ÆTÁ3 ·¨¯[:ÒIÛ+[Z**ZZ*™¶3P{‰ ÷E|~˜Í¿±Pæq‹q¦ ÝÇ}1U÷“T øLþ–'"Úùuè .`wõÑéR¤4i”Iï}.Q+åú7ü:2^’Lx‰‰êç*µ—Ô2õ0}{RK÷)æó[Ån÷Eú?JéO:¶O¿à‰È¹L=°< u¥ˆô’séç •Füùs¥Æ¬ä×ÑMð L–Æøw2X€9K#‡‡ÄÈÖ¹>'Ñõ+øÖáè(>“äh³¹ñÏÍÖæH.ÿÔ›96-Rð ýÑ7—%ÆøQ¼$ôÃQñF ÷ ý)0&Æå‰âãDü±ÃV^‚J¥Œç[éo€ð…I|$5L’ˆÏæ¬ýLÍ' `ì/Ô.æð$!H–Å“|e†YœmŸSÆýs\’§Ï›To”‚ø·Aê(ñ ÷©ÐÉ?/«™HÍ¡µáŸUúgÍE£²¬%—X¥}ÞR4*ÓZêwqg¦¤O®¶)µJ |Sc‡oMw€TñMŒ¶Ã)ì Q8£ Q% ½4þY}^{0ØT˜­F‹OÏëóÛ‚Áæ‚lÞ‹bMª00¥É£·ê%|Ûœ ß2±ºšZBò¸§BgYo’Õ1îº ‡‡fhÜ4ë‚©WÈýŽ”ðu·Xo¢®8RR=ª \Lqä ¹a^y þt¥2ÝoH ¢M‚©C>3ýÛI!^ãÎEëüèzNbVÿîÜçñwÍ<ñ.ïùÐwÍĺþu¼çÆßÕP[ˆ÷¡ÎÊÙhÎ:†:ì°¦Þð~|J†ÙäL‰ßP¿†Ú’Ù2«PãÐ…ðŸÂY-™Lý^"ño¾ŸÿfèàÅ †JÓm]¾o}••ðŸ®Ûx‰ÆÊµY~Vé´Z;j³£T±™'a|ßÐ;c2vÕªÛÑ´²1™Y•ii•Y™cÊÒNÚÓ-‡%ÝÎõg·ä¥¦æµdg5SSƒMYf—Ëls:ý_sÿ‹˜Ï_}sú¿¶6;Ý­ÅVkq«ÛÙ\hÝ¢Ï{Ü]çÓé|ungmŽV›S댠¿ÚýìïP+’!gd}é%ôoÈ\æ5‹D™¢»Áý§¤)F%r„)bè±?*’ã½Ðþÿí?ïŽðÃô¸¡w„çw*‡iÿzq‡˜Eÿ”’m>ù”²È÷þC¦üPkø‡\ñ¡ÚÄôËej QÏý S¢}DýÐí®ÃZ–¥Õf•Y­åhϦœ¡ñ ·(ã·Å¤QKYÍÅ™Zmf±ýŠwÒTäÖjÝE& ƒ24> Ë9NäC¸Ã,jyÁc.R•>šc¹é9†îkô b G€v︑ EÊpIhC9Bí÷†bÅ™¼ŸÀl· bz€G¯"ÒA~’å'ÁÏÈOR"} ò“9mÈMÆë³«³¡›ô0¿P‰îãx¯Í(õ2úç/Qö)œ=qwàûP+†**ðN8Š_À$âqúG1º›@L|“vu­T®€N@ç Ë+ç;û»»&ŽE†rîá9 Îåã="œ‘‘GÏ•b#šEàóè¤@g§™Í>Š9…Þþ/ò¦5y gìimÙ3£ÐÓ4-¯ÿ oáã¹KsÛw÷æ¼  /Ï LÞÕA 8éÊóãê–jïÙ7Ñힸ¯§ýÐ’º8zO®ëdbœqÞþ_OÚþýãS§>þýöI¿Þ?Ï—È®óÂXð"ŒÙ ±[Ë}¨§ìz ÃùÃ7ÔEufe†£©,'9{eËÄRaü‘ÒjïWt¨ßë~o©Ô¢«2k³G9]Ýï•Pö?Ø0¡¶ŒXÞÌÏeê-ø7õ%À÷È`®çæƒÈ°*`r™Õ,¼òmÆJm¤>ÊrÍü$©:Uš£Ï.·ÔΨ4%“¹ôÏ”Òæ¯îðI3¬*¹)èÐð^é*ƒ\g l}iS£C°TeMIÊ·®Ù=~öÆQÒO-*Ká')Doäoºtx­Øš*ƒº°`à{ð{| ÞM„a¬]è@½-ëi…ä÷…Iã)±YK25šÌ«­Ä£ážéeÜS&<5Ž(¸ú­¥È­Ñ¸‹,–b·Zí.Fþ£pà{r&Ô +'ÝÏEÞòý\…Öѽ Ë'Ü9#×QÒPéåk/)â,Åž¦J_µ×&ÍO¹Žg*޲ÇßÖ\?·µÀ›ºó¾¬Ú|_a]NAµB£°^ûçP¡îÒ€»@ Ê©Ubc-"‡§áÏTƒ†o ¦)T®B›«Ì)GY\ÿ-Q'(ãfG¶–Ôºò͆@º*ÕWiËž¿`Žk`€Ó i¿BþÀ1qJGkœp¬Ä#ï€mþ ÆQ3¸šxëÈ÷àÈ ´ÊÀÖ(*ëÝCEtáï¹ÿ„¿‡t¡Î™9PLžä#DÖA?¹ìàe#úR7:ú«ÔC'Å tk¿ßfœÜÞ”âùb»Dœ¦ëÉ}mgÏ/Ë‹“É¥|I]ïÒbsv"ÎÅ,%çƒ<1¤~Æ7/c Ò'þé›8ázSBe‰>XÖÌ–…¿ëÐo Ovú1° 4âÑ scÇœ¡)« ú"äjJÃU¢Z‘D©4/ÐØþ4â‹/‡| Áòºä ÖéŸCz&ÎÌÃĬ§…³ü‹|“ ?(x"|J/<¾B >–%Ù§ÐWoŠ=I¶L"‘ŠÚ\(vƒýv°ß-.2ÿIŸ,Öc:S8+ KG ³ÅÀ€a::ÓFð諘‚X*‘,C4‰;=Ç-.4¿¯'ëÿd.»!²Ò0qžÀ<>¦áv‚´É9ãO"Ÿç¼›ÜÁýÝUdES€æí #Ñœ‘¥ÚHe­þC+äþÁëGåÒ‚Í<9ޱ¼bóÁ+WxòŸïdbÂÃôY° Çg¾•ÿ‡¯\AeúÞþ¹ïíèú¤Èi㥠»w£¿UF7ª²2, ÜüÞ«.¡Ö¡A{)à¯ÄFâãPL7ÐöA||óþ0ø‰ü„x3ôÝÃdNßEâM&7k.ø‰ûÇÁïær§\=ú®‰þ=Ù3p}Ç…Ój²§ïÛY/½ÄøÒ6Î\ÊKå`=b'm›Ÿœ2õÉ-[žœ:åÉÍDsïéõõO÷öœÞP_¿á4²3°’¤;Öï‹Í…-ÏÈB³¹p¤ÇÓRh&F¹šóM¦üf—« ½6ÁßçÑO$Ôs4_@ÃÒ[`¼´)‚Ç’ Jé*NO¢åIÛk§•¥¦–M«Ýž$O¤’Ò‹]JbAÏ/=3JeÕÊâšç}ýϟ׭ûùŸ_ÏoŽ“i­ªQϼü–ÓC?~†õhŸ°e…DœL@3!¯úVé*IcjÙ·~ý>¦†´¦†—C5|N›8ômþK¨†—ÞÀzò,Ñu!‰É#Qà|~4€ƒCsÌ.º\p™¹£®¦;õKùKRˆ§ úYðö7(ZÈÌ¡üƒ.q™çÀ‚ ô³KõN©K˜uXOüME(fÃiÌu%8üÂÑ‘žÐ…·zѰàcw Ñç|@eЏâüø„xYzÝ^.ƒoó‹ã2aÎŒC=žDa¢»@Ÿ®J„o<=‡fäp÷$+„üÆÊ@÷Ø1.[U¾S påW[]cÆv*ù0ÀÝìíÚØâ×Ùé,¨}LfÎÒ9;;ǹ[6Mð2z–ù.fùä9ƒMkð‡˜cÇ <”18SocÅîG7— Q•®'$µ #»¼›#d°Vç»g~•-J† ›tYfÃw+é!9ÜqÆô¨pæÏ·Ûì°{Íh€Cº ƒ]|¡@@©P¢È v:é–’˜#³ÉÒlûkö§©û¹¼$ƒ¾·Û¤vi4FIL¦âT£—ŽV¨-é¿~Lž&*Û–¶)ãIqâèec"áöe˜¯Lúj ÷(õy»TÎ̹êµ²Ñ7¾¹\ñž8QTжµË«Ô)Ão0Mr3˜„ïw‚vwòa@!“´ñnŸ+;QË=C~ÌŸ›ë?ô(N€å‰Áò&Î;8NxìÅc{þÀî‹0ÖND§ @(ÙMè ‡0˜IÙÿ|ž}0ë_ƒ¢ ´œýQÀ­ó×¹ËewÐï(-©Úä⮿¬ýà†æôº)E™ŽÜÙ÷Lt`Jÿ _в0Ûœc–Ä+¬Z0Mcâ’Í+ÕÄf÷ôüÌÆ`ªÙ@Â(Äee5su—-™<2µvgYãŽéE¹ãW”¤ ü9~µ#¨W{²rRéMù·Uj ûèéLÌÒ4 äMâ.…zã¡ÐÝkx>] …ŠÌUŠÈò+˜T/|]We¥¢æ{î -ü;UJö¸ÊtWº"Öšð]ZÕ0®ÖŒØöâ’õï=é‘lkÚ4k”ÞàItd/ßu|ܳô+[œ¶ÿÉß/šổ½ÁÒÅ÷u |IêJ»ËŒº¼Že‡Ç;kŠ:RFf¶Î.ÌÏýiFUºØÛ¹¢fÕsëÊ&>øéÚ{¿?Þ"Ö˜Äb¡v–1èP.þø/Lëñƒ³ œ£VÜõ›3ŸÛÕš¤ó˜Ê‚®æ9¥¹KfOªvðR““W3ð=Ý}•ΩäŒe×Ò£SÛø‘®·u°=(t±k`ðª3|É+ù³@7Åÿë…“ï_PT³ú¡I Oz[ãT2™ÎâsNžªÉ,Oó,ê¾^ߤ­'Ÿ›:õ…Û&ù|“¶xaêÔçNnä;Ùqô+VþñXgzÝŒ [Wüáh¡ÄO¯k¨]ûp÷”GVW6ÌŽ“K’M ÆÄ2ɳSŸ?±QÙzâù©SÂºö£ûÇ¿ÿ£µmG·Ìm̘pâ“h¹œBì Ã7²ÊvŠ·Õ2ûz1.Jm€”Ôg´|¾Ê“6 ü§®}y$΢7?ÈÇ%¦¨ ô`bï¾ñNYz©KgÓ—¯oÜ2Ä€H›¡QÛ5Â?Óþ–Ë«Ã"¬EBµÖÚÿ&Á0w“¥¢·ÄÑPš#fÈ×7Ó4Œ-Ô“õÜøþ/;ŽšÍК­b³/€v÷LvR ¡ð*½¯ö×úà¥\÷yáE¶>[±Lpþ\’T”7!!9N(맨¹g~EW€gÕWßú{§ÄÍê#&|AÂr‡Â•öŒ½c×Kô“IÜ—™5åë×/Ê„ÑìÞÔа¹'ìÙÜа©;xY‘0iŠÐ+Õ5bÃÄ`pâ†5ë'ææN\_eÌÍP©2r¦|´’êìäúàÜâò/®¢OÊúÕþfZ%ÒlÊ‚Œ\—M)ÑÅ“9²£«gî6Õ;‹ Â¯Ñ¸/&·m~rî¬çv·óvjÜWx§‹%0 Œë˜§ÑÈ„‰P`|A¡u3´j†fYãµÍΞ÷bUˆ Þ½ì'_Y̯wTÞÒ'V´Ü6µQ·ÊÑd´9[šÀª$%Ô™]6W§ü[jÊÒ÷U ÝÚóÄciùïw=²¡!½¶'·QÁ5Ûmóg|°D¤Q$-˜äHH\«p[•çyê´ õ;´ûOÈßaƒpàôMÎó1¼Ñ·¾AÖHÉðmƒ—è·˜×ÿÐËáô O*R´ŠA ˆµ'Æœøøcø™m7Ñ£}Nggøº]ì[Sëñ]Éèô™™ïeö#ø°kDÀŒC÷€ÔLâ][èŠ7`ÆÑ Nt ½RÔ€|’xàdúI‚§Š/,+NŸåIKL~­j¯Üš’ÀÍ<‘ÅÕX]ªíUcüxúë¨PEN|mæîª“UÛUv€Ê!›¿¯¬ñd|(›H/–™œ) ™þ‹.]-; ôí·X=>z)Ø–¬±*èïiW¨‰÷ÙF_ª×ÝgKMlÝ÷ú¢E¯îmiÙûê"”®L_°7§§7Z­…MééÍÅ6âƒè¯Ïuvžª^ªsçè¯_¸óÅ9éés^¼óÐ…¹éés/0~i2l; ÷q´lE…n.†­‡&p¶Ciú^*‚8  =¿¼8Ÿ}öñ©6W·]ÌçK6€Õ×Ü8‡t  Nk„v Ðz?>à „ô‹¼ï5¿›×ÇQéÁþæõȶÊ.‰¼¢øä±[NõÎzlE©«qjý×À,7P\OöêÊ4àAwW—oôøZÀ’ ›}|–¯xÞ]ã›·®\˜O$–àylφ©ý¡EÚ@KÎ÷£vŸ<íøš.óvÿ8™krENƒWT°|ÖØy…²ÔñQ8m[}åôŸ¤í§E.Èuu˜”Ë,#ëÌ*‹§Î¯»¬Íªr´3œ£{ã¯Áö‹ï«Æ7"âí9Ú@±Šóaëg¹\z-©IÕuÒß÷?­óð ÄqFøÁ*s‹³ÿ‹SJ}4Y4‡Éí}›Bû W(ô¸纛¢²±¡Ä|»”õ àÙôzËÂÓË Gx{EõúùíEiJ½¯xTQј@ʲ9Ý.¬"¾‘eTeѧãxUËï¿øÅ zo¹mLn´Ò«»ü3æZsˆÖrìrf­ÊØy‘1û?|ìKÑLˆ'ÓT“Z@óä %ŸqJ®­0E*àžÙW­ºö®£€’:*S¥)IqN ¤åƒý*Äçdì2 2 hÄ^ô%\ì]–`ñ[=3SnO­Wó³P8nqõî…5µ[ž]´äüúÊöú5y›ŸþœüñÃ¥=¾¿Ö˜×DóĆä,ï´c³zNol¨ÙðĬ=Ÿ÷Òeº4(:PSE|‡Öx‘­xÅ!K xÅD]°—-Ò«eÜŠïÏoݺ8üÖD./‰ßùzÿfÿþ¾—ø7óü ÖFŠ#l„Œ´ê>Bm¶÷¿–,{MfV‹djyŠ N¢LU¬ŠÏn˜œ_2©Ô’î×N6–¨_²¦®©”¹¥/É<—Te•)’$‰%-“«µµ¸Ò+ŠòôS1i?ã y6½0ÀÚˆÿ:!íyñÙ#ºüE]Åw ˜›ÕØ•3º&W1ÑV$Ú$zS¢.Ëš–¥‰“T)ñqb¹^AæÜ*sÑhµ´*·lBVìnJ(Tªef]Š>™»3A¢•¨Í‰H–(Øßl«y°­û@Î„ÏØ‡ÜìcìÃ+ÓHîIpR!I;G§3p% æ2°ÏV¿š)RÝqGŠtxFm€nô^eîõ·‡×ÊoO6Zªd³¾Þ¿±-Å(ã6y¼ìZòý”—º ?¯,@×è)/½»Þ æõD½ìþì¤1¶·šÈ¢IQhÝz5¹»o)ú„É}àœ£2©Åx­>ØãA (㹓>OŸ{ÔÓo>à}€~ Œ8ªèswƒz¸›þ-ðÞ dz(þjãìƒ<ýŸÍ`êC"mñÐuoò“>(ºK4ô3äüaÈÎQÿ¸ë¶í`¯¬Ë¡Œ¯±ûøá¤ð/1ú¢áSÜœ#T"µŸÉ€Eøæì¸lÆ€—ï•_ÖƒtÊâÿ”ÔÓ‹0ó£™gˆ¿íýäÓO?ÙûéÅ‹ŸÂº¶ƒäU²“ŠOÉ É»’YˆCfÏ·3ª+'Õ¦b¯…g’‹«L¾™S&:Ì%> Ï!Ï-¯µáÏ7ùž<–,Wð^ÐØT‰:‹N¤PÅÓ;4øžÞ-’+ø4Vø~£Œ;§CߘÙu©ÝœÈsÔ¼„ö‘¸h ö.²2b†·´Ô ´eßeÒ¡Ô¿M‰RŽ$Ù•.ýIi"ïBWYr¬ÿVÀf·¢CäÕ¾ËJ Ð"Jô%ñérºþ0é¢uXiá¾jã4C}ùö•ˆÕù *cÍ ? «³ûŗô‹ øSÐLŸÆ¿?Îi&/Rÿ€¿—³¿ÇGI#Å9N:ú.ƒò«-¥:dW#¡Rìˆ_.Š“–';”ï€uËÁêw”"HóYHó«(š¬tЋ³â=‹h¢¿j;HÒ•Zªóéïí)ˆ¦~9½î¥#y¹@'’/e(ßAmµ¨) ÚÛ*¢è«ýËÈÝ@ ”Ÿ01bËÀßyJîsºk °CtÁ¡ëÜŠXR©Åöúyµ Z\ÄÄþ{s&íS¸`Ú„ô“žeÇÞX}ŠþúµÙã7|yöª‹÷.ivP×hò‰¦Õ£Ý™k2è{š·öU¾Q… k4¥Ï|$ŸœþâƒÛ&ùò¦î~ð7“0O9?RÇàœY€¢y€tx­]B rÃùd«ktµ|Ìí3ò}¯9ZÒ„‹vï¬[gíy²ï°XçP s2„€“:mëck–wø(]®Žúk횇'YGOèõ:}ý'âĉ=/ÿúàœ"ÏÈy¥=§î\—Ó³n„Ö¨0æ·íè ä,úÍ–8Ÿ]Wá\æN!ßÄqjSôl›¯'ø‚ÏðsZ"sCíÑ7d*‹Hr@é0Je† …Ú™f‘H,iÎþ߆ߪá—2™ÑA,¨ZÓ v­©òwÖiÔEucýadl]‘ZST× Þrl¶ƒPnÉÖé²-òÐk'@°wGëÈ퓃jwÙ\àV'oÙº£7¨v¡Ý<—šÑŒ›z¸ÏsÄ(’µFŠ£ Cc$½€’ß-ÍY›'‰×¹KÅcó´#Ÿr´wÍ)ŸøÀ²Š†£ßÞÓu|IyëžçŒ]ç2ä6¹%¹™±Qò¾yä¶É&èeÝ#UNžC7xÍÒ†ýïÞv‰~øÊ†¢Ç'/yóöQþ,ËÈU-i2•LªÃ9¯×èÓäßy“ñŒ!´”€™å“8+ØŽò¹ð9ÉcIÕ³v·Ù:Ò“ žX§Ê­qʸk…U,ø!ŒzgúáÉ™bÙ‘29^æ(u]›?ý䈃ŽñaG}Ä;òíã1´%Î Å ÊÔGýŸÐWÿZ«t¤`žÑ—‡rG$0&ΩçøÏXb^’Îmœ˜ªü[f×Þä ¦£ÜoN}–ÛÎ>wgðé9FnûAú}”¾p›L=}pÐßkŒŸ­„¿á²¹¬È°yÖñy#—Û¯ïÒ™ÄW×JDb"§ÿ¢XD] žîxLÿ¯®NUxdÄ‹Ä9Y¦b*?#ãÝ Ž¾O|zn÷è ú0ýpö¤ìì‰Ù`tßG¤…´0yÊKèÅÄï¸o³7ŒžÅ‰u&<ú:â͈s6±Î…G\8uŠæ‹¡GÃÉÒÈ[Q˜6„1 5fð™8xºÎÜYNò;ÀÓô  ¿¡=z”Ü@=C‘¾—OÐ?ÄÀp‚4ŒÆ\›ã½3¼SsñÁçõ™}Ã.Z·ÓÁªL[ŒeˆsçÎeÚºc­[Œ£Ë´ ]ŠXvb|‡Î3cèÂÅLÆ—!=ÞŽ×JEè Ž,Œr±ÑâB3ŠC-€oêß´çøô7 …—š?¦ÐB\X+¨Ûta½…~iÙ2нåÕÕq«‰Çžê¬ç@wV•KNŸ|lv.]LÎ8ŽgíerÌà?÷p?ßsqºÔáª"§úŸN¼—åºþ-RdLçËa§D”eŽ,0 Úr<¹µ¿•WÕÞtdE]ýŽW–®xiKõIP£2ºùKKq[vpLÁXÔî÷·‰£êœY'æM9»­¹aû……úüFçèïcÚ§òRÏ®IÔÅà&b5áÒP®äu›HP ˆŠœú,¥ÌÝ›Ûà–)³ê|ž’t‰ÐV–“SfJÒK¿,Ÿ¯­ÛvaÑ¢ Ûê´ùãK{º«Ö<6uêckªº?4ê2ç;±>]¥JÓ&'kÓTªt½˜8'rÚ?¿ö´ÈeøšûÚM‹SÍÍ»rrº66Ü2Ñ相e¤·<-Ýþ“YbKN¶•€±îêJi hoÞ„ n Ìf f³¡^ͦς·)“Ç'Eë ^ô¦¡ó†=8~vèÄ€>KˆÆ==~üÓãúÿEý†X؉Úst‘‰©¼Pk¾âäFœ¯”A®"º0ö·0– ËÕñ@ìK—Å 9îÜÇ9fÎ߉%83íKN<ñ ,ó5iGªËÁ2Ù2-l™\æ+ÒÀ–)ƒeàžÇe6á2ã¨Y:ø?l‡JúAr!¥gò ¹á˜„*^¥ñ_p3¤eÈ…¶ö¡ ‡|”pènªÌ©Á ‡´h³½‚3;Ö5×Ï‘ít¥î¸×_žYšÅ$’Ï\?µ‚Þêi|f”žÎæ¬ÿÿ©«Öÿs]&?ü?T÷Ì›™ÃÀÀÀEèûKðZñßÁJVÐ}4mÐ ‡î£y€¬ò"¦Ãà;aðᇗ±n3ø>~x·ƒßÎâpúL¬ÄíÆàw°øá‡û°¼ ~'ƒs¶a>G†ñ»¡¹`;ŒÅ¶,ÆrI‰…— \!_Á|JÙþ=Å”‡ís——±íãµy Ë+gä}˜*Ÿéò÷rîŠhOT>…µý©CÚ9…µ…•¸)Œï áŸM˜~ ËÏ‚Pÿò̘¾†õ9»bâÛ£pkß î‹ÀMa|_® ã·Gâ¸_4¬Ï‰,o ãwFáƒõÞÅâø¹˜O#kwŠÀ­a|7çávðáv0²í Ä8zΣÓ1±tãjLÇÄöïk1ñÝ!Žjl&¶™ø?³—·°åÿÈà°ü˜ [þxÜÓÓG1næpŒüaè…ÖŽˆ³I1êe7£GÒSb×Û‡ž‘Hÿxm,²AôœC,»ÕZÆ6‡àÛ£pk߉ž ÆMa|_® ã·GâXl¬D–·„ñ;£ðÕaü0èŠÉÏ]lyü,BÌ:#±1&¾= ·†ñÄ¡\Æogpæ9˜N«3ý¸5Œïâ\Æogpæ¹~˜Ž“¥s)·†ñÝœ¯#ð‘aünÎר'ãgûaÝsaß²e.ã[†âÛ¢ð3a|GþTß…Ÿ ãû£ð¹aüÀ0ôF∳çó5ôQbâhw 4öÒÈáoTžy…±Ÿý˜Õ3%ö•ƒ#ÌŽ¸X|Ü­>4/Ö5„XØ?aÛ^ŽÚ(‰_qBø_pÛ-gthç¾o‹ÀŸÂmÇà;"ð}XWúŠ|.Ö¦üüv¬s ¾7_Aÿà Î_ƒù_ÍðÏÍã30Ÿ«þñ¸Æ|[^„é3øŽ\ùgè+"èÿŒùgʈ(¯Åü3øÞ;?i‡óÿ6DˆÓ×Rè½¥_<õÐÑè¨yÑéAÏþQþu ñ} xœc`d``ßö/Œ‹ý_àß#œ€"È€ñ2˜Ìxœ}Ô[lUðoÎef[Á‡¦©uYk… mжBë í.[¤•½iˆHBk¥&[¢nx°”Ø“¥f‰O—> úÀƒˆñÁ Jð¡‰\¼E›h´5¤hLLð–ØPÂñ;ÓÙfhÜnòËÿÌœ³gÎìžó©~Ôƒç @7¿è—Íœ·ÇÌê6ôÝHúþBÁõÌ õ ¾,ûÞ·‚÷°Z £@CòiTê—Ø¥„­Þ‹SØÇïP¤I5Ï1×xïI´êul?Š˜ï3Ô¨iÔë0âãš"=PzÀü­“æ*ûcê6¬µÜ'W£|îe4Ë/ÑÌvB¡£ØÌv“J`“%~DÜ;„x¹‡¸Î±=ˆxÙM~÷j`®öy£#%Œ†ÆurÞ_yý'ÛÇø¬3ˆGæuÖ«†TÎŒÑQ….Ú)ªñ¬¨2ïŠjsFíc?‰°Nxrß¶ãã|·qÜéþÄz”×ßU,ðÝGPk©\¢¾2Äôëˆê$kë D#)D½‹ˆyå¼û} sÀVíæ_ÕbæåI4Фe ”µœ»Ö\d½NsTRÎm3Ϻ™ª(J·SuQ†h’NSŽž¡iš¡ïiJÖ:.­Ò(w³Ø®¦0¤W!-2xNžFÊùÛE#Dw¸.·ig Ú&ïE‚¶9 Žë­{ÑMƒòºiP~ŠfÄX‹×üöï8¨ò8åvà|l?&Úù¬Mh¥&1ƱcØ ²ü ²Ø({ÐÃ1÷[Ò`·«¸oóè ^ç}ôÊôêÑæž@§{KÿƒGd;ÿ§<úè.ª£~Z£òæk9ÕÓS”\œß,°¦É®)-&ÑB TO©¾Ùm×J£Á¼Å¶5NÇé«àþ›t–»}èVŸ E¿ƒŒzÈ+|Î7<£“Ø©÷b‡}¾øÖfm¥{¨RD¸·òØjû\¾«H!Ë5ÜGh‹žEJ­ú7<$OaGÙ0Õæ’ÜÈ=Û€®âYöÅ´ŽþX<ö…ÙüÃæsj„õ§i¾ãýcÌ›Â- eײ +ÅfYÑR¦…³-°”o+°¹fó¯ÈÏÁ¢ Kñó1¤˜‹%Í-f›ýÏüì KÚì2'Å~v½Jg3t‰^ñs,‚Œ¿—rË,fYI6ÓÄuîýP¶-±¹æ`než­Àû|eKÙ÷žÇ~›‡a6‹l>é¤Ü1ìfÆNè3û¸«¢^xœ,ÝipT÷ÿûÓcü7Ä“2ÒÌErd¤VKÝíHÊ(C2CFÌÐ>ÝAèk³µ´$@É&€vg;ÄkÇf›E·„A<2‹X-aâ$“eòàÞª[÷Á}p«þU·î£ùÝÏ÷ýÿ?àU]­Öéïr¾¿sN7êöî.ɵîžLsÿ)sÜ¢ c?¹ òYw^Ž»‡rÂý‡œrKò¤»éýY(˶z×c6æ`s1‚[ìC­ö,!ì·ç à Þ¶ç ÝÁ»xïÛ³‡>ÃøÈŽǼ?ÓöSüåKîùC÷ª¼ê^’wÝkò+7ê­à‘+xä ¯Ý;%íñ+xü oÖ Ë9´ß]áÝò¾%ïxeÒ¶³‚í<î»?ʬÀJìÃ$öã^Æ3´€ûñÄQÃqœÀIœÂi<äÉ×¼¿•¯+æÇCoàÛÞ?ÈÃ<æÅcxOàI<…3xAþ7í!_ɘ{ [ÜŸ¼ÿ¦é¶zdâËÜÿ#|_•Ox¾—/-ë'Èú ²~BqæÉ×Ûг^¾éÈ·•Å<û<ûJ/CÛYéebÚ6W²Í•ls%Û\ʳŸ†ò1Š1Ü„X‰U¸ k±ë±± ›Ñb^IÌ+‰y%1¯$æ•ļ’˜ÿ\óµ$Wàãî¢\ë¾9î2ÏÝ—ùî÷2ê>“1wRÎjúþÜ›Ãy÷y“Û·4qîÝq¿õþ\S¦mjÊÌõ˜9Æ\Œà{^M™žWSfØcÏêÅ>Lb¿E¢Îšƒr¹¬"—U䲊\V‘Ë*rYE.«Èe¹¬"—U䲊\V‘Ë*rYE.«Èe¹¬"—U䲊\V‘Ë*rYE.«Èe¹¬"—U䲊\V‘Ë*rYE.«Èe¹¬"—ÕÌéjæt5sºš9]Íœ®fNW3§«Ù»V³w­fïZÍœ®fNW3§«™ÓÕÌéjæt5sºš9]Íœ®fNW3§«™ÓÕÌéjæt5sºš9]Íœ®fNW3§«™ÓÕÌéjæt5sºš9]Íœ®f¯[Í^·†ìÖݲ[Cvk´²m”}ÜNb?îÅa´×ãr\CŽkÈq 9®!Ç5丆×ãr\CŽkÈq 9®!Ç5丆×ãr\CŽkÈq 9®!Ç5丆×ãr\CŽ_ÓŠ½$s?”+ðq÷Œ\ëd—±‹ÝiY‚¥î}Y†å¸Éý\VðÓJ¬ÒZ÷5¯7ãVŒc[ÜÙææe—Ž_óº±{±­%±¸'Ï»‹Û»Ý 9„Ïkßþš·‡íïÅaÁQ÷©ÃiwMžuÏɼ€³®MÎá%·,?v¿’—¹}…Ûóî=ïk¡§l;š>UOÓg®ÇlÌÁ0æb·X…CCVÕÐó¸÷â0Žà ø"î³ÜµG™ûñÄQÃqœÀIœÂi|Ùj®ã‹ù ¾Š‡¼oËŸXvÚß¶Ê×½ù¾í5Èw¬Ú¡_à»øfËGð(ÃãxOâ)<Ío½ïnȸ}Æz:‹çð<ÎX×´Çš TûS¼†×ñÞÄE¼…Ÿ[C_à>Òs=©uõ×Þ“ZÓÌ”áÙÑíëÚóÍXˆÅªÉ׵盥Z¿®=ß,Çÿù[ü´[Ü—²ÛIìÇ<~7½:ûúºöOsG¹ÌÔÞeî³ßUßÍýxâ(Žá8Nà$Ná4¾lQ©ïæ+ø*æ§Gð(ÃãxOâ)œ±8Õ‘ÿðÖjÅø´ ¬UÖ¿–»ulZ«ì¤žQ÷èÍWðU<ÃOÏâ9‰ë031 s\B†1Ïm—ùnŸŒ¹ïÊ*7+«q3nÅ8&pîugä0Žà¸û¾œp/È)×,_r›ä]¹üÛñ#Lá%­„¥}ƼêvÉOt¤ø+í9æõÿúò®+“K®T.ãW¦ö"e¤½ÈŒb 7aVbnÃZ¬ÃzlÀFlÂflµ*é,Ñ °Çj¥³D³“ØoÕÓŠjâŒÕDÝ7o[MÔMó.ÞÃûV%uÓ| ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÑÍ4º™F7ÓèfÝL£›it3n¦ÓÍtº™N7Óéf:ÝL§›ét3n¦«›ïÈ0æé5]Ýœ•1}¥ÓÍtº™N7Óéf:ÝL§›ét3n¦ÓÍtº™N7Óéf:ÝL§›ét3n¦«›ÛåGÜNqÛº™N7ÓÕÍýÒº™N7ÓéfººÙ%—tþœ®nš_¹£^:ÝL§›ét3n¦ÓÍtº™N7Óéf:ÝL§›ét3n¦ÓÍtº™N7Óéfºº©*©›f€=–»ºiöaû­zê¦9ˆÖÍtº™N7Óéf:ÝL§›ét3n¦ÓÍ¿æXÿ7¸N×Ër>îRr­úµNüO™çH»ž]§ëÙ/dLç~ëtæü[yË=’öJÔ:?k :6×c6æ`s1‚[ìY”»žE¹›ÚÕë:®^×qõºŽ«×u!»jX§ÜÍA™AäDžAäDžAäDžAäDžAäº.ø“œCË"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2È"ƒ,2¹JÍä*5“«ÔL®R3¹>Íäú4“ëÓL®O3¹>Íäú4“ëÓL®O3¹>Íäú4“ëÓL®O3¹>Íäú4“ëÓL®O3¹>Íäú4“ëÓL®O3¹>Íäú4“ëÓL®O3¹>Íäú4“ëÓL®O3¹>Íäú4“ëÓL®O3¹>}JëÆ=ù˜{W®ÀÇݹÖ=+cº"xJø­,ÁRuá)šå¸É]–ü´û0‰ý¸“GžÕ¾ý”®åœuurçÝÞSê¯bPÍõ˜9Æ\Œà‹S]Sœêš9ˆûìyUys?Àƒ8Šc8Ž8‰S8¯yÿ,_÷räÛžâWµíþ#xáq<'ñ”ü¦jû¹\kuÆøMöüojŸ¿#cî”<«3öoª2æœÕÙì7UsÞýÁû¦*£-¨2æzÌÆ c.Fp‹=—*£í«2æ Ìb*³˜Ê,¦2‹©Ìb*³ˆ-‹©Ìb*³˜Ê,&1‹éËbú²˜¾,¦/‹éËbú²˜¾,¦/‹éËbú²˜¾,¦/‹éËbú²˜¾,¦/‹éËbú²˜¾§Ïç2_ûÏÓŠdN»Od –òÓ2,ÇMîßd?­D[óŸÖ±ÌÜŒ[1Ž ìã‘vù´öX»½“­íá§{yöaÁQÃqœÒ5ÚÓ:Š™—M]}è~ejöb&±ßrQ¦æ î³çÕkîÇxGq Çq'q §ñ0Á£x ã <‰§p†x.à~Š×ð:ÞÀ›¸ˆ·ð>™~†äzíQŸÉ˜öØõªüCY›q+Æ1{Ð^AZÏ+Hëyi½gWÙë½I];¯Wm—½õª˜¶©Š™ƒh¯Z¬çU‹õŠÄó>ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’M$ÙD’£5vQ>æž—+ðq÷Ïr­û‘,pQ¹ ±Ø­•%XêþZ–a9nÒ*š£=Ü~Z‰UîmY›ÙæVnÇ1-îo¤¯9^›k’]®FvcöbÛLb?pÿNž}·wç>¯õ?GÕ³gÙëFä0Žà¨Ž&9šs\gz9:3ü¾œtË”k•ÓZÿs´NÊs:£ËñλÃrÆMÉ 8ËOçð’k“»~y™ÛW¸=ïF¥7æpÞ˜ã=twå—nÁËÑ<*­`ª¿V0s=fc†1#hïÑäðMïÑäðMïÑäðMïÑäðMŽV<ÕŸwjrx§&‡wjrx§&‡wjrx§&‡wjrx§&'4ä’òy«gh; ÷Z^¡awPޏoË\½|‘Çì³¾h­øïr¿UOk…ÝsÐÊQnéj"GkÅ÷ä„û9ÉýS8/ëº)'ô#÷÷òÛCB¯â!/.bÕ°G¾îý“|óg|ÓË“oyß’o{¶…wl¯ýßÅ÷ð°Ë–Gx®£ºËÑZd·ëZ Gk‘Ý>‰§ð4¿õ¾ûü€ÛgÈ÷¬ûPžÓ9vNè<÷ÌØÞ¥‰3èì§x ¯ã ¼‰‹x oÛ¨ópó.ÞÃû¶7jrÍø9µýÂýT.qû‘¢ 3ÑaMôA{¯m¢ÃLtX]+7b!»Y‚¥ÚrXm–£MtXm?­Ä*÷¾¬ÆÍls+·ã˜Àí!a&:ÌD‡5ÑÏËnìÁ^ìc›IìÇîß©Y k¢íöníaom¢Ãšh{–½Ê=¬‰6GpÔË1w?nBNºÝrJWµaMôˆ´‰k¢ÿ]ž×±&ÌD‡™è0f¢Ãšè•k k¢íönÏ»Wå':K k¢M›è0ÖD+M´ê¯‰6×c6æ`s1‚6¡a&.¬‰Sw˜¸0fâÂL\˜‰ kâì1û¬nL\˜‰ kâ웸°&ÎnÛÄ…™¸0ÖÄÙýS86qa&.¬‰³g|mâÂL\˜‰ 3qa&.ÌÄ…™¸0ÖÄ©kš8ó]|mâš8{.›¸°&ÎnÛÄ…5qvû$žÂÓü–M\Xg·ÏP›¸0ÖÄÙ=3Ö}Mœ¹@å?ÅkxoàM\Ä[xÛöMœyïá}Û[4qæüÜö&.¬‰³ÛU®fí÷r>î>•ku’«Yû­Üˆ…¸I¹äj:É.mæj"ÌìÅÜ…»yüNsϼŽã¹Ú¯ô\Ú¯Ìõ˜9Æ\Œà‹Gû•bÐ~eîÁ½8Œ#ø¾ˆ/[œÚÌWðU|Ç"QÍwñ=<ïãxÆ"W¿Ìsx?ç§_à>’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*¡’*™§£ÀÇÒÞÛÍã½Ý<Õ3aÿOD×PyÞJ|×jOÎóÖq;31 ×+þ¶™Ä~àþ<û.nïvÏÉ!|Þ]’{x–½n^㎺ŸË1×µažŽËrÒÝ—SêWžöœwäYYòtìø\ž×™|žŽvÏœÕZ‘§c‡yɽ!?æy/sû ·çÝü„Hð¡öö<;îJ{Ÿ(÷‰ò¼¯¼J/OGEÅ»Ìy¼ËœÇ»Ìy¼ËœÇ»Ìy¼ËœÇ»Ìy¼Ëœ²ëå<šQŒá&¬ÀJ¬Â-¶'èüÐn×bÖc6b6ã÷mßmÇÜö^|žfD½ÐŒ˜{qØMËn¿à^’/rÛŽby:Š™û¹ÿ·ºn9Êí1×11OG1»=‰S8/Ûþ£Y{Q¾ÂíWñ÷cùwS¾æ•É×½rù†×(ßÔ-OG±ÝòmïÛòÛ4›æ»øæYŽàQ<†ÇñžÄSxšßúÏØ~¨)Þ/Ïs{Æö=¹Ìºü)^Ãëxoâ"ÞÂÛ¶êÈeÞÅ{xßöU¹Ìø¹í·Z%´Çj•°ÛO¾Ö‡wä |\GÒ|­°ÿ†OâZ óµ>Øí ÌÄ,,P.ùZÌB,ÖYJ¾Ö³Tûa¾Ö³7¹=²‚ŸVb•»!«q3nÅ8&°EgbùZ NÊ.wDvcöb[Kb?pÿNžw·wëˆŸï ¡­ùZ lû{u˜¯ÕÀÁQwNŽá¸ûµ´Wáòµ<’SÜžvHû ùZŽË 8ëŽÉ9¼ä.ÈÝ=y™ÛW¸=¯•-_+ÀÈ|¨iÊ× ðG/_ó®çÕ¼«/šws=fc†1#˜gѼ›QŒá&¬ÀJ¬Â-ÖYͻݮÅ:¬ÇlÄ&lÆ!ë²æÚ܃{qGð|÷Y/4׿~<€qÇp'p§p_¶}@sm¾‚¯â!o§ü‰ÖÏ|Íu¡|ë–ozEò-¯Y¾ím’ïØ> ‰6ßÅ÷ð0Û?‚GñÇxOái~ëÊQ>ÊQ>Êñ=Êñ=Êñ=Êñ=ªiýP–`©ŽDQM«YŽ›ÜYÁO+±JE5­æfÜŠqL`Ljµ‰jíöN¶¶‡ŸîÕG5k掺ër ÇuÝÕ¬•Säbï¾Ey÷-ªyùR.à#÷'ù+Só¢ßåxåxåxåxåxåxåxåxåHåHåHåHåHåHåHåHÕž¬\´'›ûñÄQÃqœÀIœÂi<ä É·¼„<Ì=Gð(ÃãxOâ)œ±Ši¿2ÈúS¼†×ñÞÄE¼…·­ªÚ¯Ì»xï[µ_™dLg3ò1×!Wàã®X>áNË•ø$®ÕQ#¦½Ëng`&fázw[fcû¹ ±Ø¥Ë,uOÉ2,ÇMîŸe?­Ä*÷š¬ÆÍ¸ã˜ÀW"ÛÜVi¯"Æx1Æ«ˆ1^EŒiÿ´­%±¸'Ï»‹Û»]–ÂçÝOä¶¿Wg&1í½æŽê<¦½××9^L{¯=r’ßšÒÕbLGŠNyÖ=+Ï©æ17ž‘öcüŸÃ˜Ž]r/¹òc­Ï1íùvû ·ç/íUį"ÆtÔ°:éÞ•Ü‚üÚycLs¡¨tQu1×c6æ`s1‚y¶MÍ‘Ån ¬Ä*Übû€¦Én×bÖc6b6ã÷m¯Ðy£Ù‚;pHûvLÇí!:¾˜{]¹vÃrÄ5Ë,w_ì§û¬ƒšJÓÎcšJ»mç1M¥ÝC;oŒi*«ä$÷LÙ>¦©´Û/Ûž£ãË>ù ·_E;oŒéøbQ½á}W¾©óØæwHÚ«‹1SvËw¹ýÚYbLsm[>ŠÇð¸Û Opû¤û;yŠÛ§ù­ðŒíu:¦ØíóÜž!¯ ¸àzä§tö^ÇxñÞ¶½QSoÞÅ{xßöLM½ù?·½”³Ä˜Ž&vÛ^QÜ 9ý½,Á ¬Ä*mnÐ<š›q+Æ1}<2‰ý¸‡û÷êªjƒæÈÁq=ïβ6hjxÔY=^5÷ã<ˆ£8†ã8“8…ÓxÈS:ÛÏ“oxß’oé96¨GöÓ#xáq<'ñÎXœê‚yKÇ ©7p¤ÞÀ‘zGê ª­Åÿ>ÞuîTàq_ȫڗ ¼›ÚBwK[+ðì¾À»ë~#¿Òã7zíºÙÈomä·6ò[ù­{äŒb®Ò™ž9‹s8ïîxU:CÓt†f®ÇlÌÁ0æb·Øs…úíYB8ˆ‡¼nùº× ßðºä›Ú[ª´×µÈ·½ˆ|ä•êúØâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þjâ¯&þj"{×Ý/½8{{œ½=Á= îIpO‹öŸm2Œyº–iÑþóï2ævÉ«š…u_?U÷Í{ì1ê¾Ù‡Iì·ßR÷ÍA¹CÛO“aÌÕ޵Ë`žÎwè¹Þ’1—'ϸ%ù¡ÖÛÚKMÛKwhÿüLÞѱi‡öÏßÉ%­º;´š_Ùo)Bm_š>cÏjÃvìÀNìÂgñ9ìÆ‹Gy™}˜Ä~‹Py™ƒ²•¼ZÉ«•¼ZÉ«•¼ZÉ«•¼Z5ƒ’–]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’]+Ùµ’Ov>Ùùdç“Ov>Ùùdç“Ov>Ùùdç“Ov>Ùùdç“Ov>Ùùdç“Ov>Ùùdç“Ov>Ùùdç“Ov>Ùùdç“Ov>Ùùdç“OvÏè¸ù/st…Ø©Í<í½Ê람¹Ëb:u&oÚÿ–ìäKvò¿%;5ïŸË ~Z‰}˜Ä~ÜÉ#/©:‹0¯*ÎNÕä^§²Öó*k3À{vÅoöaû-Åoâ>Û¾ÎöÍýxâ(Žá8Nà$Ná4Æ#xáq<'ñ”ì¢b]T¬‹ŠuQ±.*ÖEź¨Xë¢b]T¬‹ŠuQ±.*ÖEź¨Xë¢b]T¬‹ŠuQ±.*ÖEź¨Xë¢b]T¬‹ŠuQ±.*ÖEź¨Xë¢b]T¬‹ŠuQ±.*ÖEź¨Xë¢b]T¬‹ŠuQ±.*ÖEź¨Xë¢b]T¬‹Š=«cÇii«î³¬ºÏ²ê>§3½ïÊ"ïÿU:'yNÇs3nÅ8&plEï÷òlÃvìÀNó,>‡ƒø<îa›ãÞv9áÕÉI¯^NéžÓ9áå½›òoAžñþOù¡ÎéžÓ9áÿ'?âvŠÛ¿ÔùàsÞÕÐ_IËô9~SÞ åÊ;¡,y×û¿å’÷_r¿ •zÏéÌPñèÌÐÜï½%lñëèfþ­nÏéHg·ßÀ7ñ-¤ž:?´-ü Ž¿ðFå9ïÿ•·-G+šwñÞ·|CŸáüBÛìfõëfõëfõëfõëfõëfõëfõëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝëfÝë!¯òê!¯òê!¯òê!¯Võ²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë!»²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»^²ë%»>s>’9n­ £í÷)¯QuI;çïó6híóÆu ÛçM*‹>oŠ{~¨©OY_“º/äEîÿˆÛ)n_u‡åM©ö© òŽ»)ïjõîSì‘Ëø•»íõéìTQ©ŠGÕ0챨”—Ù‡I´«ƒ>®ú¸:è ÙšßÚ…»ñ>~†d’Ü“äž$÷$¹'É=IîIrO’{’Ü“äž$÷$Y'É7I¦I2M’i’L“dš$Ó$Ù%É.IvI²K’]’ì’d—$»$Ù%É.IvI²K’]’ì’d—$»$Ù%É®Ÿ+¸~®àúÉ«Ÿ+¸~2êç ®Ÿ+¸~®àú¹‚ëg_íç ®Ÿ+¸~®àú¹‚ëç ®Ÿ+¸~®àú¹‚ëç ®Ÿ+¸~®àú¹‚ëç ®Ÿ+¸~²è'‹~²àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©^•àU©A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²$»A²Û©¼êds5;µ*šyº®Ü©OȘÛ*¯*†|RÊNÅùko§"Ño)3ÀgìwµÊ™íØØ…ÏâsØ=ö,ŠßìÃ$öÛó*~sPîÒùÒoåcîgr®u?°ÿ›¬¹Ø¥,Lû;Ç]ü-É.uçMs¯Ê šÜ]:Ï´-œu)9£+è]º7gqç݇ò&[°¿žÞ¥|åíÒµ¹žQ׿æzÌÆ c.Fp‹Å¦*)*UÉ ÐþZd-²‹¿ÙÅ_‹ìR¾ŠSùšƒhŸ²Ký2wã#ûùÊ·X†1ÏMÈ|÷SiÿWa·ò-’gÔ£ÝêÚ¼¼©©ß­\îH{er·ö¨ßy»›¶ ØÌ{l;ŠÍìÃ$öÛ6›9(‡ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆ†ˆaˆž×–ÿ oéè¼Gg¼å3hŸ-³GçŠæ!ïŽ|Mç³{t–xQ¾áÝoz§å[:;Ý£³Ä—åû^‹|¤ó̽Ê..ا³Ö½Êi²öêyÿäíUäú©"7ì±Ç(r³“Øo¿¥ÈÍA9Ìö‡Ùþ0ÛfûÃlXgþ-r¶¢× ŸÁ6lÇìä1Ïâs8ˆÏãK:n ë¼½Y~àý£<£J ë¼½M^Ô9þ°ÎÛívŠÛ¿ô¾'¯z'åMלּ¥Ÿ «šçå]¶¶äý«\Ưì1:c×séŒÝ´ú SŸaê3L}†©Ï0õ¦>ÃÔg˜ú ÿÏú¨wÊH½3_Ã×ñ |ß·ñ§<ãÏðçxNG…êæÎɸÖí“9n» £­óãŠù×2êŽÈ˜{Cnàžb=ã¸W‚›´†{Ü®Ä>Lb?žuÿ.gÜ'òÎâλiG„qÞßWµM›ŽæzÌÆ c.Fp‹e¡z*~ÕÓ °ÇbVeÌ>Lb¿e¤Ê˜ƒ¸“ûwánÜgñ‡~€ûñÄQÃqœÀIœÂi<ŒGð(ÃãxOâ)9ÁëÞêÑ ¹׺ŸÈ÷}Æ<­óêÑoeÔ•1÷š´¿„à/a'øKØ UÛœÃyiOð>Ë„êü;oBuÖöUgs=fc†1#¸Å"QƒêlØc‘¨Îf&±ß¢RÍA9ÉkY“¼–5ɧÉM*‹G2¦-Ojïz(KÐ^˚䵬I^ËšÔþö™¬à§•؇IìÇ<ò[¾ŒWUÕI^˚䵬I^˚䵬IE®gWäf&±ßâQäæ î³ík0÷ã<ˆ£8†ã8“8…ÓxàQ<†ÇñžÄSrŠÏ@›ÒþpZ®ÀµnXÚÿ™ò˜§k“)Ïþ_å”ö‡£2æÞ“Ôå)þŠ¿ÁŸRUÿ$˰íÿžMñ·ùSümþ›?ÅßæOñ·ùSªª=ò¬Î7¦´w]–pÖ]‘sx‰G^Æyw^Þ$ž[lß®ë§Bö®¦´¿™ë1s0Œ¹Á-–©º¦Õ53ÀÛ²ºföaû-kuÍÄV͵¹íS¦øT)>U`ŠO˜âS¦øT)>U`ŠO˜âS¦øT)>U`ŠO˜âs¦øÜ€)>7`ŠÏ ˜âs¦øÜ€)>7`ŠÏ ˜æ1Í1bZý:#óÝ%sÏË«šèiÕê×òŽ® ¦9:Lst˜æè0²Oœ›õb&±ß¶ÀAyR×D‡e ¯ºCò®û…\r¯ËeyJ÷ÿQgjg´çœÖã'SxÕ½%­k§É’¼«k«Óú]{ä²|_½þOy¯êþ}=~YÚñ÷¬2”aÌÓÊ|V™—1÷¬wW儳óš)]/œÕu\¦¼ˆa —t½|VÏ(U mMÕ0ì±mªf&±ßžEÕ0ñ¶=WèÞÅ{xßž=ô>çùQV„çXÎ)æ¼szvݯg7ìçþ”çõ»‡eXǵóüîyýîï¼~W÷ëwÍû¹åŒ~Ws¥ß5ótu3CÅf¨˜ŽÚæŒ*öki›¡b3Tl†ŠÍP±*6CÅfô¼Úšž× Ð*6CÅf¨Ø ›¡b3Tl†ŠÍ¨bz®Ð¼‹÷Ð*6CÅf¨Ø‡Z=nÊV{Ñ;©3·µJdz†¾íEåßyß–‡¼„|Ëþ—ŒÎ*ótNü˜r¼¨ÕÌ|Ü}G®u£²@kïEo#b±[%XêþM–a9Ú»µvÙO+±E{ìEÅS*û¸'‰ý¸“ßÚ­µè¢7„íE­iæœu9r/ñøË8¯k¥‹Þ'šš‹Þ>Ôœ^Ôš¦,´¦™ë1s0Œ¹Á-–ihȲ ={p/ã¾€/â>‹_«–¹àAÅ1Ç œÄ)œÆ—­>¡á+ø*ò¾%_ÓuÊE>óö¢®¶Ê7½\ù¾íõÈÃlçÅcxOàI<…g¬Î¡³xÏËØs>bÏùˆ=ç#zñ{ÎGì9±ç|Äžó{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœѦØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž“bÏI±ç¤ØsRì9)öœ{NŠ='Åž3Ëñe–ãË,«å,«å¬VË礭–³_fY-gµNæÈÚ 9Ë 9Ë1e–cÊ,Ç”YVÈYVÈYVÈYVÈY­Ú²VHsí˜2Ë1e–cÊ,Ç”YVÈYVÈYVÈ9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢#Ú9¢½¤hûeؽ-íÓà/ñ^ó%E;á]R ú©b0´÷‚/ñ^ð%Þ ¾Ä{Á—ƒ=~åÇšÖßʵºŽøXÏÒ&Ãî_ež®¿>Ö³\Q­ ë¹vÈqs~Ìß6~¬Ê|%/ê<äcUÆ\âþeSs¡m*6mS±™öØ6›Ù‡Iì·í+6sïÛöUó¼L.S‡ËÔá2u¸L.S‡ËÔá2u¸L.S‡ËÔá2u¸L.S‡ËÔá u¸B®P‡+Ôá u¸B®P‡+Ôá u¸B®P‡+Ôá u¸B®P‡+Ôá u¸B®P‡+Ôá u¸B®P‡+Ôá u¸B®P‡+Ôá u˜×zû’\»N¹Öý\èº{^ë­YˆÅ:·™×zk–êi^ë­YŽ›TÃy­·öÓJ´Ošç“…æùd¡y>YhžOšç“…æµ& »Ty¯{°ûØNûq€ûwòŒ»¸½ÛÍÈ!ÜÃ6íóˆæù<¢y>hÞU§æ½1´ë÷yoZõœ×QéÇò”Îxç½Óî¶|ŸmÎÿW¦7zʯ5\•Ñn®ÇlÌÁ0æb·Xõ´†«bZÃÍ=¸‡q_ÀqŸe§5ÜÜð ŽâŽãNâNãËVO­áæ+ø*Òqo^ku±|Çr ýßÅ÷ð0[8‚GñÇxOái~ë}«^ènŸ±.h=7Ïáy´ÏPšç3”æC TõS¼†×ñÞÄE¼…Ÿ[§B_à>Ô~ûKûßaÞUï ]›\õV⓸30³0GÇÖ«šÄWe®Îu¯zÌsC2ß½)cΗUÜ_›q+Æ1{p¯"¼ª=ÍÁqå{U«ýM9åfåEÕÿªfÙ\RÝ®j–e(Ï¢ åcc¸ +°«pÖbÖc6b6c«å«U ð‹9Ô†íØØ…ÏâsØ=V­-f&±ßj¥µÅÄ«€ºlÞ¶ „îà]¼‡÷­&ZÌò­9ÿ)׺cÒþŠüõHçRêË5iïF}¢ÕòKi¯<¢ùÒ#•Ýö1@{oèÞú„÷†>ὡOxåù^yþ„WžxÆžqg\àxÆžqg\àxÆžqg\àxÆžqg\àxÆžqg¼î=ë~)Ÿ3C+ð/ñgøsyC{òïäJ|×afbh•»¡•Ù,ÄbwG– }õ >‰úŸD}C+ó²¬à§•ØêUÉvo³ìÀ>îOb?îäwí“´oðIÚ7<û|‰Þ0Žà(Ž¡}ÎÛ ­™º­}[qjß6£ÃMX•X…Û°ë°°›°‡,k­®æÜ‹Ã8‚/à‹¸Ï2ÒêjîÇxGq Çq'q §ñçË7¼BiŸ qC+êwäÛVC­Ÿö˜#xáq<'ñÚ_CÜàÈoð ä7øòš#«á\ÀOñ^Çxñ–\ÔÑüOr>®+ EíW_Ê•ø$®Ã ÌÄ,,Ðä.j¿2 Ñ®°¹ÂZÔ~u]–a9nÒj¼ÈÖ"WX‹ž­T‹Z9Í͸ã˜ÀVïMÙîm“Ø¥sþEýÍìÅ>¶™Ä~àþ<û.nïvwåîaû{ÉzGpÔ=c8N^ºnZÔ™À¤ý•֢ξ’ö*ý¢w Oó˜÷ÝÞ¢öjý®ŽïªªömÕJû¶Ån ¬Ä*ü¶×)ÿN/†6{+¿£#×¢öyûi-Öa=6`#6a3YäÚçÍ=¸‡q_ÀÑ® ¹*\äªp‘«ÂE® ¹*\äªp‘«ÂE® ¹*\äªp‘«ÂEíóIù†ÎµÏo”oyÿ(ß¶Þé\B]й„ù.¾‡‡ùÝ#xáq<'ñžæ·>À3ÖMÍ…yÏãŒuSsa.БOñ^ÇxñÞ¶nêxdÞÅ{xߺ¯ã‘ù?·=Açæ’¼­9úR>¡£ámÍ‘ù$®Ã ÌÄ,,Ðz{[sdb±Ž}·5Gf©Î3okŽÌr´W*nkŽì§•X¥Y¾­927ãVŒc[½Ëvõâ¶æÈìâþnìÁ^ìc›IìÇîßɳïâönUé¶æÈÜÃ={5M·5GæÚ; ·½1×\ÜÖýNNãi~ë}{¤æEѤ¨¡|Œb 7aVb~[«ëmMʹÙ+—Û¸¿ë°°›°‡¬ÚšsîÅaÁðEÜgÕÐŒ˜ûñÄQÃqœÀIœÂi<ä=+mFnóR·9.ÜÖŒ¨/šUF3b¾‹ïáa~÷ÅcxOàI<…TXçÛæhÿ{í¶&Å<‡çq†.\Àzñ)^Ãëxoâ"ÞÂÛÖÍм‹÷ðsü—ä]k^“+ðq·]®uc²Xûó]ííf©Ö¢»ÚÛÍrÜ„ü´[tŒ¾«½Ôn'±wòÈ“ºžº««0=‹®ÂÌõ˜9Æ\Œà‹DýÕvÔ_s?Àƒ8Šc8Ž8‰S8/[„ºª2_ÁWñuûË.õýå›^Ž<ÌãàQ<†ÇñžÄSr‰OÇZâÓ±–TÃ:¹Öí—ÅnI– Õp‰.QÃ%VŒ%ÕÐ~Z‰}˜Ä~ÜÉ#OºCò¬Ë•3xgqíï–ø»€%oÞ‘•ÝŸõ´Äg=-ñYOK|ÖÓŸõ´Äg=-ñYOK|ÖÓ’j®ÈUs=»jnîÇxGq Çq'q §Ñ¾ h)ôš×(_×qgIu6ßöZäasâ1<Ž'ð$ž’ËÔy™:/Sçeê¼L—©ó2u^¦ÎËÔy™:/Sçeê¼L—©ó2u^¦ÎËÔy™:/Sçeê¼L—©ó2u^¦ÎËÔy™:/Sçeê¼L—©ó2u^¦ÎËÔy™:/Sçeê¼L—©ó2u^¦ÎËÔy™:/Sçeê¼L—©ó2u^¦ÎËÔy™:/Sçeê¼L—©ó2u^¦ÎËÔy™:/SçeêüÐ{ÌKÈø¸Öó‡ÞZ/[æhŠzöWxuõ¾´¿†{èE½’1÷’gïíÿ­´w!ò.äÃP–m'ô4®ÇlÌÁ0æb·ØséJMÏ¥+53@û«º‡üUÝCþªî!U÷PWjz^]©™ƒòKÏ>¿èKÅsÙûR?5pÐ]­°¿Â­°¿›o{±ÐJå5-ÃîŒÌsŸJʘûyh¥"ÑO‰`ýT‘˜}˜Ä~?€ƒò/ìósäJ\å~+Ÿäö×¹†ë¸'31 s\— »È\wGF0O1ü…"|GÆ\§,p±‹Ý-Y‚šY†å¸ÉÝü´Û½oɯJöqOûq'¿µ›-á°û“ÁQ÷HŽá¸û•œp ý…Ît}–ŽÌÇ(ưÀr·OÔ‘X‰U¸ k±ë±± ›±Õj¥™>cµ µa;v`'vá³øvcUU5û0‰ýVguÖDê ³sîÅaÁðEÜg•Ô¤›ûñÄQÃqœÀIœÂi|Û:¥‰¶ÛGð(ÃãxOâ)lÕzúgîßeØ”/iÿÌ²ïÆ•WÝKò®{M~åFµæÚ#Ÿæ‘OóȧyäÓ<òiù4\Ï#×óÈõ!ÓŒb 7aVbnÃZ¬ÃzlÀFlÂf|Í>…ÓÞ· ÙçLÖ‡ìs& Bö “E!ûìG‹á‚Ì'—|rÉ'—|rÉ'—|rÉ'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹|²È'‹(YDÉ"JQ²ˆ’E”,¢t$J.Qr‰’K”\¢ä%—(¹DÉ%J.Qr‰’K”\¢ä%—(¹DÉ%J.Qr‰’K”\¢ä%—(¹DÉ%J.Qr‰’KŒ\bä#—¹ÄÈ%F.1r‰‘KŒ\bä#—¹ÄÈ%F.1r‰‘KŒ\bä#—¹ÄÈ%F.1r‰‘KŒ\bä#—¹ÄÈ%F.1r‰‘KŒ\bä²Á¾wX®ÀÇÝE¹Ö}!st4Ù`ÿŸJæk5Þ`ß;,cÕú¶Á¾wXλßțܶUtƒ}ïphƒ}ï°|×c6æ`s1‚[ìyí{‡¥öس۷®È>Lb¿Ebß;,e¹K¹K¹K¹K¹K¹K¹K¹K¹K¹K¹K¹K¹K¹K¹K¹K¹ËFû¿£²+°û0‰ý¸‡Ñö®ö?-åp?Àƒ8Šc8Ž8‰S8‡´·l´ïo’¯{aù¾­ýj£ý?Lyâ1<Ž'ð$žÂ´½®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ì É®ìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®ˆìŠÈ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìŠÉ®˜ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®„ìJÈ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìJÉ®”ìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®ŒìÊÈ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œìÊÉ®œì6Ù÷n‡6Ù÷nË”Ü_ÁýÜ_Áý•Ü_Éý•Ü_ÉýUÜ_ÅýUÜ_Åýß²o¥ ý-Vã·ÿ—·¼½òŽ·3ôwܳùyKmÖýß}‡{¾£{6Ê;:³ú.÷ü=þƒ}ϵüŸ·[ô\ÿ`ïÎË!Ó¾çZþ_ÁWñ ?=‹çð¼ÜÂÖ¶°µ-lm [ÛÂÖ¶x7í»šɨ¼ã½ÚÂö·°ý-l ÛßÂö·°ý-l Ûÿž}¶\‰Oâ:ÌÀLÌÂb÷KY‚¥:_úžýïYŽ›Ü5YÁO+±Êݑո·bØÇ#ûq'ÛÙÃý{ÝûrGpTgPß³ïÑ–ãºÖûž}¶œr?“—ø­Ëø‰û•\ÀGÚ—¾gßúž}¶Ì³\ì{´ec¸ +°«pÖbÖc6b6ã>ËžG[îÇxGq Çq'q §ñ7 ßòåaî9‚GñÇxOáŒU̾G[.õ§x ¯ã ¼‰‹x o[Uí{´å]¼‡÷­Îö=ÚòüG^úGû®^¹ŸÄu˜™˜…9.!Øç¶Ë|·OÆÜwe•›•Õ¸·b¸÷º3rGpÜ}_N¸ä”k–/¹Mò‡®\~Èí‹ø¦ð’»!/ãU·K~â~(ðúýyוÉ%W*—ñ+Ó¾«Wæcc¸ +°«pÖbÖc6b6c«UɾyYØcµ²o^–}˜Ä~«ž}ó²Ä«‰}W¯¼m5±ïê•wñÞ·*Ùwõʲ†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèf ݬ¡›5t³†nÖÐͺYC7kèæ6ûvT¹w)¹VסÛìÛQ¥½b¿+ßmöí¨2æNËy]Ûn³¿K•wÜ„¶Ù7¥Ê§q=fc†1#¸ÅžÅ¾)Uú ]çnã:w×¹Û¸ÎÝfß”*pPÖy-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y-‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y‘×y=‘×y=‘×y=‘×y=‘×y½}õœCË¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,êÉ¢ž,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢,È¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,É¢‘,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢‰,šÈ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,šÉ¢™,þž{ZæëÙÿž{Z»Od –òÓ2,ÇMîßd?­D;êý‹ý…ˆÜŒ[1Ž ìã‘Iuÿ‹}÷´ÜÉÖöðÓ½<û0Žà(Žá8NéJç_ì»§åeÓ¾{Zö`/öaû-ûîi9ˆûìyí»§å~<€qÇp'p§pã<ŠÇð8žÀ“x gˆç.à§x ¯ã ¼‰‹x ï“égø@ÆéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~ÅéWœ~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~%èW‚~}ß¾Q]ÆÜd•{(«q3nÅ8&pîu_ÉaAûŸ0ß·oT—SZ«¿oߨ.pgì‘öêò>ù ÈíD²H¶Év"ÙN$Û‰d;‘l'’íD²H¶Év"ÙN$Û‰d;‘l'’íD²H¶Év"ÙN$Û‰d;‘´I ‘´I ‘´I ‘´I ‘´I ‘´I ‘´I ‘´I ‘´I ‘´I ‘´I ‘´É"ÙA$;ˆd‘ì ’D²ƒHvÉ"ÙA$;ˆd‘ì ’D²ƒHvÉ"ÙA$;ˆd‘ì ’D²ƒHZíÛ±åcîy¹wÿ,íÝÞVÞímåÝÞVû¾l¹ŽÛ˜‰YXà¢r#b±[+K°Ôýµ,Ãrܤ+¸V­öÓJ¬roËjÜÌsmåvØâþFú^Žls:ï±ï´’ÝØƒ½ØÇ6“ØÜ¿“gßÅíÝÄ9„Ï»Ërϲ×ÈaÁQ·CŽá¸Û.'týÞªÊûrʵÊi§ëûmyΕçÝa9ã¦äœå§sxɵÉ]¿¼Ìí+Üžw£Ò®ë[¹®oµïÑ–_º…P«V3Å ó+uJçWæzÌÆ c.FÐÞyoå÷VÞyoå÷VÞyoå÷VÞyoå÷VûÞm¹ÛµX‡õ؀؄Í8ä’òy«§}O·ÜkyÙ÷tË÷mù‚ÓõOèE³ÏúbßÓ-÷[õ´ÒÚ=]¡åö˜û®wß“îä$÷Oá4¾ìvɹ¿—¯Øzyqùë¬}O·|Ýû'ù†gÏø¦—'ßò¾%ß¶Ox´ï®’¿Àwñ=<ì²åžë¨+“Ǹ}Ü•ËÜ>‰§ð4¿õ¾ûü€ÛgÈ÷¬ûPžs³ò<÷ÌØÞ¥Ù4èì§x ¯ã ¼‰‹x oÛºƒwñÞ·½Q3n>ÀÏ©íî§r‰Û•Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾Ïìû̾ÏìûÌ~ÀìÌ~ÀìÌ~À¤LtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀDLtÀ„L\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄL\ÀÄ=£Yû½\»OåZ]»=ãÙ_<£Y3 q“ryFÓñHÚÿ²~ÆëÆìÅÜ…»yüNsϼÎöŸÑ~¥çÒ~e®ÇlÌÁ0æb·X<Ú¯ƒö+sîÅaÁðE|ÙâÔþ`¾‚¯â;‰úk¾‹ïái|?À3¹úežÃóø9?ý—ð‘l£’mT²J¶QÉ6*ÙF%Û¨d•l£’mZaþ$»ØB7ö`/à.ÜÍo á4÷X=Û¨gõl£žmÔ³z¶QÏ6êÙF=Û¨gõl£žmÔ³z¶QÏ6êÙF=Û¨gõl£žmÔ³z¶QÏ6êÙF=Û¨gõl£žmÔ³z¶QÏ6êÙF=Û¨gõl£žmÔ³z¶SÏvêÙN=Û©g;õl§žíÔ³z¶SÉv*ÙN%Û©d;•l§’íT²J¶SÉv*ÙN%Û©d;•l§’íT²J¶SÉv*ÙN%Û©d;•l§’íT²J¶SÉv*ÙN%Û©d;•l§’íT²J¶SÉv*ÙN%Û©d;•l§’íT²J¶SÉv*ÙN%Û©d;•l§’T²ƒJvPÉ*ÙA%;¨d•ì ’T²ƒJvPÉ*ÙA%;¨d•ì ’T²ƒJvPÉ*ÙA%;¨d•ì ’T²ƒJvPÉ*ÙA%;¨d•ì ’T²ƒJvPÉ*ÙA%;¨d•ì ’T²ƒJvPÉ*ÙA%;¨d•ì ’T²ƒJvPÉ*ÙA%;¨d•줒T²“JvRÉN*ÙI%;©d'•줒T²“JvRÉN*ÙI%;©d'•줒T²“JvRÉN*ÙI%;©d'•줒T²“JvRÉN*ÙI%;©d'•줒T²“JvRÉN*ÙI%;©dçÿßÑ>ÇužWoÄN%ŸòaT5SS“Ly$Q¦-;ñKòÄ{2ª)g&Îær/ÊÂ^xïŽì7I$„Ý*“´%K¶d v!ÊòÇ)%Ø¡Ð-B ¬žÊ¿r繿/§º€‹÷=ç<}úòJx9y†“g8y†“g8y†“g8y†“g8y†“g8y†“g8y†“g8y†“uNÖ9Yçd“uNÖ9Yçd“uNÖµeŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~ÖùYçgŸu~6øÙàgƒŸ ~6øÙàgƒŸ ~6øÙàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“ N68Ùàdƒ“MN69Ùäd““MN69Ùäd““MN6½3›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÉÏ&?›ülò³ÅÏ?[ülñ³ÅÏ?[ülñ³ÅÏ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œlq²ÅÉ'[œ<N¾ø~øóqŸu¶ö åÏþ|  ü/^ÿ2üøøÑòjà¯Â_ƒû²³q]á'ãNðlÜAWø)øXy!ðqß}~ºÜ ü ü,ü<üü#øå¸÷<‰X ¬—³-x&0µZaîëçìÛöºSŽ> Ï—?¼`ý‹åß^‚—ápy'pŽ–ÿ83:÷Î'^O–«kåbàåBà]øÃr>ðGðÇåÝÀ¿,âõ_yýÓò•À¿‰÷ÉÙÚü»²ø÷åÿ8÷˱o¼ëb.ñ®«ðAø|ž‚ÀÂÓÕD>? …ÁÇáðÓðÉj²¿ãõàïÂ߃¿ÿþ!ü"|ºšr¼«+¼/ÂKð2|> Ÿ«f÷ã^…Cðy8 Gà(ƒãpNÂoTïHG…×á ø­êL¦¸ß |©:Ÿ>îÄ?øJu¾{܉1ð»µÇ_¯Þ‘£ §á ¼iýY8çá\„KpÞòS«ðvõ^ŠÄUx§ì®{ýFõ.Š{ð 7LsnÁm¸wá܇ïTï®.<€‡ðö`¾[½#×Ó`•Èu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'rÈu"׉\'µõø-œHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw"݉t'ÒHw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒJw*Ý©t§ÒÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu*ש\§rÊu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grÉu&×™\grù­Iw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒIw&Ý™tgÒI÷`íçÊ7ßWþEàûáÏ—ü…òVà/Â_‚Dö#ãÕë_†¿?,ß |~´üÏ¿  ~¼üOŸ€Ÿ,ÿkà¯ÃOÁÇÊÿø¸ï>?]¾øøYøyø%øGðËå'ÿ¬ü|`õäzГëAO®=¹Œ¼W«epæ¾~ξm¯;埆çË/XÿbùõÀKð2.¿8GË—Ç\9î§&ÊW'Ë3ke#ðN¹¸^Þ|£lÞ…?,ë?‚?.¿ø—‘¯ÁH}õú¯¼þiùµÀêÉõ '׃‘úÊç¿/§OÊÀ€÷kO F«è˜`ô@…‡àÃð|~ž®ÖŒ¨ðÃðQø|>? Ÿ¬ÞÑÕë/Àß…¿þüCøEøÇÕ»bàOà—áWàÓÑÕƒÑñ‰~¨ðbù©ÀKå¥ÀË埩´G?Tß}®š`ôC…W«‰D?T¯Ÿ/[Ã^ÀÑr(p¬ütฯLTï±è‡êõ7ªwNôÃs×½¾¿Uûfà‹X½\ûŸ¯ÔþGàwªss=1ŒNèN{=o–_œµòœ‡ åG½^*3pÙë[~jÞ®ÞuÑ Õëu¯ß ë.Ü(Ïnšì܆;pîÁ}øNõnŒ6¨ð£êmPa¾[½K£ Þ <öºzJžkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6ȵA® rmkƒ\äÚ ×¹6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBÚ Ð…6(´A¡ mPhƒBœ‹œþ[à'àãð øéò½ÀÏÀÏÂÏÃ/Á?‚©+38/øúÅò(ð¼ Gcßsî£ÏEjúçb²q}L¶Â«p>‡á…cpNÀIø­Z¨øvítà˵ÿøÚGoúî,œƒóp.Â%¸ ߨxÆ*¬î¿Î¹ÿ:çþëœû¯sájżûmîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîµ¹×æ^›{mîu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^‡{îu¸×á^'àß7á܆;pîÁ}X¹Ýáv‡Ûnw¸Ýáv‡ÛO×^(x»¼øv4ÞÓµjµ§kû±ïÓµn¸÷tí ü§Àûqýyןwýyןwýyןwýyןwý×_pý×_pý×_pý×_pýE×_týE×_týE×_týE×_tý%×_rý%×_rå%W^rå%W^våeW^våeW^våeW^vå3®|ƕϸòW>ãÊg\ùŒ+Ÿuå³®|֕ϺòYW>ëÊg]ù\íáò7OÁÓåµÀê·ísµGË߬þšÖsÕIë?©pà«Õ•_ƒOÁ³Õõ La«rX^±×{]±×{]±×{]±×{] æ¸_V̯çŸ \Áá Wp¸‚îàp‡+8\Áá Wq¸ŠÃU®âp‡«8\Åá*WíuÕ^WíuÕ^WíuÕ^WíuÕ^WíuÕ^Cö²×½†ì5d¯!{ ÙkÈ^CôÑ;Dï½C8 á0„ÃC8 á0„ÃC8 áð|px$ð<ŸßžWßõÏÇûä_oÇ;áùÚ›ñ®x¾ö|»\ Ü-ÿ!p¿Üì–åýÀãò߃÷ËÞÀóÁ0Ö†>ÏV»à S˜ÁêSîóÁ°Â"pÃa ‡1ÆpÃa ‡1ÆpÃa ‡1ÆpÃa ‡1ÆpÃa ‡1ÆpÃa ‡1ÆpÃa ‡1Ápà G0Ápà G0Ápà G0Ápà G0Ápà G0Ápà G0Ápà G1ÅpÃQ G1ÅpÃQ G1ÅpÃQ G1ÅpÃQ G1ÅpÃQ G1ÅpÃQ G1ÅpÃ1 Ç0Ãp Ã1 Ç0Ãp Ã1 Ç0Ãp Ã1 Ç0Ãp Ã1 Ç0Ãp Ã1 Ç0Ãp Ã1 Ç0ÃpÃq Ç1ÇpÃq Ç1ÇpÃq Ç1ÇpÃq Ç1ÇpÃq Ç1ÇpÃq Ç1ÇpÃq Ç1œÀpà '0œÀpà '0œÀpà '0œÀpà '0œÀpà '0œÀpà '0œÀpà '1œÄpÃI '1œÄpÃI '1œÄpÃI '1œÄpÃI '1œÄpÃI '1œÄpÃI '1œÄpÃoÃß <O—o~¨üÛÀGËvàÛåÉÀ7býøn¬_áSðluMubK` 38XýT¬_axÍú׬Íú׬Íú׬Íú׬Íú׬Íú׬Íú׬Íú׬Ýú×­Ýú×­Ýú×­Ýú×­Ýú×­Ýú×­Ýú×­Ýú×­Ãú7¬Ãú7¬Ãú7¬Ãú7¬Ãú7¬Ãú7¬Ãú7¬Ãú7¬ÿbícqßúbí+ð«ðkð lÀ&,ày¸Zûpà›ñyõÅÚ÷k_|Ëëxý×^¿]»¸[»¸_{=°[{%ð¸öçïÁûµÑÞ_­9ðà«ð5ø=¸RËïÔžx)ØVõõ+ð«ðkð lÀ&,àyøBìòRp~(ðvíáÀ7k—¿_{0ð-¯àõní‘ê¼²êDãZ=ð=x¿ö±—‚g¬<+|¾¿W«þ í×kÕßï~!vùvìò¿߬U'+|¿ö¹À·¼þ×o×¾¸[kîײÀ®×µß<®ýVà{ð~íLÜO—À•ß |´<x»<|3>Ÿ¿¿­*¬~[½þô»åßÄïµ×ã÷ÔÏ߃÷«Ÿ ÷býp¯Â§àŸV» üüsøð ¬ÃlÂ<[ñ Ï+La+†áy…Eà]StMÑ5E×]StMÑ5E×]StMÑ5E×]StMÑ5E×]StMÑ5E×]StMÑ5E×]StMÑ5E×]StMÑ5E×]StMÑ5E×4]ÓtMÓ5M×4]ÓtMÓ5M×4]ÓtMÓ5M×4]ÓtMÓ5M×4]ÓtMÓ5M×4]ÓtMÓ5M×4]ÓtMÓ5M×4]ÓtMÓ5M×4]ÓtMÓ5M× ]3tÍÐ5C× ]3tÍÐ5C× ]3tÍÐ5C× ]3tÍÐ5C× ]3tÍÐ5C× ]3tÍÐ5C× ]3tÍÐ5C× ]3tÍÐ5C× ]3tÍÐ5C×ÍÚñ9áf¨{ ð¬> Þ EÃ.ß ¬>Þ¬}$úçfm4î…oÖÆCÅÍÚ„¯¼Y¾ª·ß,ï~ß×ßòú^¿]Þ Ü-»ûåF`·Ü <ˆÞ»>TW¾ï—ï Üx²bnŸp£Â§àÙŠUèª0…¬>=Þôéñ¦O7ª¶¼9Іx{°8Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,í³´ÏÒ>Kû,ís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Gûís´ÏÑ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<íó´ÏÓ>Oû<í ´/о@ûí ´/о@ûí ´/о@ûí ´/о@ûí ´/о@ûí ´/о@ûí ´/о@ûí ´/о@ûí ´/о@ûí ´/о@ûí ´/оHû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"í‹´/Ò¾Hû"íK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾDûíK´/ѾLû2íË´/Ó¾Lû2íË´/Ó¾Lû2íË´/Ó¾Lû2íË´/Ó¾LûrhïîÂýò«^¦}™öeÚ—i_¦}™öeÚ—i_¦}™öeÚ—i_¦}™öeÚ—i_¦}™öeÚ—i_¦ýVíçÊ|_ùZàûáå•À‡Ã[áF…§ã³Ç-/ôV¸ñJà£åÀó[qoR­°Vþ ðò‡wááàOË7ߎû¸[q§öDà~ívã.ïVÜ=}vàÖÀªÝþ|>†§à#ðƒðÉŠgø ß Ÿ‚Õ_½å¯ƒÞò×Aoù렷ŸàþTXÀêï‘Þ *ìÀ“¸Ç_áÉ OVx²Â“ž¬ðd…'+¬òa•«|XåÃ*Vù°Ê‡U>¬òa•«|XåÃ*Vù°Ê‡U>¬òávèýxà)xº üPùj`õ߽߽ ¼é¾yÿ÷ÛÁ!® >ÏVׇ S˜ÁÁêgƒC…Eàš½Öìµf¯5{­ÙkÍ^köZ‹½~¸Ÿí×·n`Õ-kÑ$ÿ:°†Ãk8¬á°†Ãk8¬á°†Ãwp¸ƒÃîàp‡;8ÜÁáNpx/°âpÇîwì~Çîwì~Çîwì~Çîwì~Çîwì~Çîwì¾n÷u»¯Û}Ýîëv_·ûºÝ×9°n÷u¬ã°ŽÃ:ë8¬ã°ŽÃ:ë8¬ã°ŽÃ:ë8¼¾x ž.ÿ:ðCåQà£å oÄÊñõX¹Â§àÙê»±r…)Ìà ësXÞµò]+ßµò]+ßµò]+ßµò]+ßµò]+ßµò]+ßµò]+ßµò†³ƒ7œ¼áìà go8;xÃÙÁÎÞpvð†“"7œ¼áìà go8;xÃÙÁÎÞpvð†³ƒ7œ¼áìà go8;xÃÙÁÎÞpvð†³ƒ7œ¼áìà go8;xÛMl7±ÝÄvÛMl7±ÝÄvÛMl7±ÝÄvÛMl7±ÝÄvÛMl7±ÝÄvÛMl7±ÝÄvÛMl7±ÝÄvÛ-l·°ÝÂv Û-l·°ÝÂv Û-l·°ÝÂv Û-l·°ÝÂv Û-l·°ÝÂv Û-l·°ÝÂv Û-l·°ÝÂv Û-l·±ÝÆvÛml·±ÝÆvÛml·±ÝÆvÛml·±ÝÆvÛml·±ÝÆvÛml·±ÝÆvÛml·±ÝÆvÛml·±ÝÁvÛlw°ÝÁvÛlw°ÝÁvÛlw°ÝÁvÛlw°ÝÁvÛlw°ÝÁvÛlw°ÝÁvÛlw°ÝÁvÛ]lw±ÝÅvÛ]lw±ÝÅvÛ]lw±ÝÅvÛ]lw±ÝÅvÛ]lw±ÝÅvÛ]lw±ÝÅvÛ]lw±ÝÅvÛ=l÷°ÝÃvÛ=l÷°ÝÃvÛ=l÷°ÝÃvÛ=l÷°ÝÃvÛ=l÷°ÝÃvÛ=l÷°ÝÃvÛ=l÷°ÝÃvÛ=l÷±ÝÇvÛ}l÷±ÝÇvÛý`ûOÏ}<÷ñÜÇsÏ}<÷ñÜÇsÏ}<÷ñÜÇsÏ}<÷ñÜÇsÏ}<÷ñÜÇóø\ô/ï+wß(_ |¸üãÀSðtü&z'8ÿsà‡ËµÀGË—ׂó;ñY¨Â»ð‡ðGð§qgôŽ“8Þ uÿ:ðN| ŠõãSP…‡àÃð|~>Y1 O‚CxRáSðlÅ$ÔU˜Â V¬B]…E`—º.u]êºÔu©ëR×¥®K]—º.u]êºÔu©ëR×¥®K]—º.u]êºÔu©ëR×¥®K]—º.u]êºÔu©ëR×¥®K]—º.u]êºÔu©; î€ºê¨; î€ºê¨; î€ºê¨; î€ºê¨; î€ºê¨; î€ºê¨; î€ºê¨; î€ºê¨; î€ºê¨; îºCê©;¤îºCê©;¤îºCê©;¤îºCê©;¤îºCê©;¤îºCê©;¤îºCê©;¤îºCê©;¤îºCê©;¤îº£Pwø¾òVàûáå¥Àê¿??ª‚§£ŽjÕÿ—zêæ-g?œœFzä4Ò£Ú'ã³úQí×á§àce?ðqß}¦0ƒƒðœ+×â“çQxõ“À»ð‡å_þþØ•??-×wñÙ·~õ¯ýGÕÿ×pîUø |> OÁGàá“•Òp/4†{>ÏV+‡{¦0ƒƒ•êp¯Âž«ˆ»› ;°:_õÈùªGÎW=r¾ê‘óUœ¯zä|Õ#ç«9_õÈùªGÎW=r¾ê‘Tœ zäÕ#'¨9AõÈ ªGNP=r‚jÏ{æØ3Çž9ö̱gŽ=sì™cÏ{æØ3Çž9ö̱gj=Së™ZÏÔz¦Ö3µžyõÌ«g^=óê™WϼzæÕ3¯žyõÌ«g^=óê™WϼzæÕ3¯žyõÌ«g^=óê™WϼzæÕ3¯žyõÌ«g^=óê™WϼzæÕ3¯žyõÌ«g^=óê™WϼzæÕ3¯žyõÌ«g^=óê™WϼzæÕ3¯žyõÌ«g^=óê™Wß¼úæÕ7¯¾yõÍ«o^}óê›Wß¼úæÕ7¯¾yõÍ«/w}¹ëË]ßû&Ø7Á¾ öM°o‚}¹ë›cßûæØ7Ǿ9öͱoŽ}sì›cßûæØ7Ǿ9öͱoŽ}sì›cßûæØ7Ǿ9öͱoŽ}sì›cßûæØ7Ǿ9öͱoŽ}sì›cßûæØ7Ǿ9öͱoŽ}sì›cßûæØ7Ǿ9öͱoŽ}sì›cßûæØ7Ǿ9¾ëóÕ»>_½óºø¡òÇ–ç«{ùwÃ¥x×gªw}¦z×gªwCo\z+La«Ÿ ½÷ìrÏ.÷ìrÏ.÷ìrÏ.÷ü[è={ݳ×={ݳ×={ݳ×={ݳ×={ݳ×={ÛëØ^Çö:¶×±½Žíul¯c{ÛëØ^Çö:¶×±½Žíul¯c{ÛëØ^'žöžxÚ{âi½'žöžxÚ{âi½'žöžxÚ{âi½'Þ‡'žöžxÚ{âi½'žöžxÚ{âi½'žöžxÚ{âi½'žöžxÚ{âi½'žöžxÚ{âi½'žöžxÚ{R=íýÿÛY«èxœÂDc}ðµ­µÖ®¶Õ½­µ[ÛÛzïµ­µÚZÛzïÿþÿÿ{o’œ$ùd’œLæL’$I’œIòI’““$“ä“ä$ÉIN’9“$99IΙL’ïóùýd2YïÿR²l¦à}Á²\-Êòùü‡Âª)úIÅŠâHñ¬Ô*½ÊˆrR™V>ú g / _TŒjXµ]¤,+š+Ú*ʪmêVõ‚úTýTl*(>×´kRšÓWI¢d´äHKiãÚmþMß›»Ò@éTéC™µ¬«lµ,­Óé,:F7ª[×åõú˜>®Ðç JÃá°\Už(ÿV*ûo og á!BDœX ²ÄÑiŒÇ;Æsã•Éd¦Ó^¥²r°r½òÖL™;ÍUÖªñªªëª{ c™°ü~Ǽ›¶Ê¬‹Ö\µµz :U}e«°A[Ÿ-m»¶=Ù v—=j´ÏÙwìWögÒIv‘óäwò’üåÐ:\ŽÇ'ÇŽ#ëxvZœ­Î¤sιâ<®1×t×|©¹ HªƒŠQqj–Z¢¨*CÝÒš #tŒŽÓÃtŠ^¦Óô1}AßÐô caX&ɬ2ûL¦ÖW›ªÍºÌ®QWÎM¸#îîq÷¬;ç™ò¼ÖÕí×å¼mÞt½¹þcýA}Þ×í[ôíû^:&Žî=íó·~Ÿô³þ6¿å?òÿ À@`:°8 ä›4M]M›As0L/ƒ¹fss[óbó^Hjõ„VÃêðLx7|> gÂwá|D‰µ”¶˜Zœ-ó-l€M² ìgv“ÝeØ3ö’½ûÏö•ÓpGr^.ÂE¹N®—äF¸)n;æÎ¹+î—2 &à>À‚VÐú@Œ‚i0VÀø‚ï ~‚ßà…Wó¼÷ð!^ä;øç‡ùI>Å/ñkü6ÿ•?á3ü ÿIè……°Â8çà2\‡;ðžÂð>Âg¤BdE.D½G=h ¡ ô -¢/h í£oè]£{ô„帛1…1Àm¸÷ãx Ïàüoâ]|„Ïð%¾Ãð« ¼BDˆ B¯0(ŒSœ°,¬ ;Âp*ÚDE±CŒ‰qqXœSâ’¸&n‹_Å1#Þˆb^RJ:É"1R@‚R»ô—ôAJJãÒ¬ô·´*¥¥=é&ŒE'ÿÎC;6Ó‚q*:w–xœíWÍrÜÆžµ%R¤l9vÇÉA5a.d™ îR\Ë%ž(¹¤bKRQ*û’CfY`j <3XxyµKw?€ ç<‚ß ·ÜrJŽy…t÷ v±¤(š‰©T¸ÅEcfúïë¯XÆï½f=æÿ~„/÷ØpçåwØ:ûKßeûko°_÷Öƒ|“ôîy}Ð; ò:ôDo±Ozßyƒ=èý)È›7lïoA¾Í>Y{ä÷ØoÖ¦A~Ÿý¸ö]ï°ß®?…Hz76 Ð×ë"È=vïÖ?‚ü»³q3Èï²?lü2È7X´¡ƒ|“e?yÝÛlϯ³?nþ>È·Ø`óÛ o°róÏAÞ¼õ÷Û¿ òm6¸»ä÷ØþÝA~¿÷ú® òvð«ùì£[û²ÍGu57*Íߎwøþ`8ìÃ׈çÜÍ_è\” ?©§Âžñ/ΔLÎTÌ·3窇{{MÓDn^Ñ¡(ÖÅÞo”Ëø©´ÒÌdŸèÒñg¢|ëD8½ñËÒÂV]&Òp—Iþòø„?¯déO‡»üKi¬Ò%FÃ…CU9Y•GÚ¤{ÏŸœì€M0 ÿªärOOeZçÂìGÆ!äpr¸8Ч‡˜àá«ÓƒþA4ˆö£ÑèÁ𜠌´AxcϨ?ø¼?øìw0ÈAyÁ[áœ7¼åÊrÁ‰,„™r=¹Üè²óE ¤QðW0‰Ž3ŽZ´Sš?SS»8“³¥{+W£Ðu%'"–|" •Ïy"­JK¨ dO©‹jÉȤüÆI¨Ööï#M¾ÆŠ¦?XÖ`Ë\äŒg/$„Á·¼-4 é*›A@¤¤åq,ŒœÔ9D7€k5ШÒŠS7b]NrÐ)¯ŒÒF9P~È•ã6Óužp+eÁ¿®•þø¥­Àjé¶x“ÝjëSë¸IòqíxCš‰²U.æÞ)XNU)r, rvUH’BºVIñWOœ kU ç+£+m ¥ÝåàÛ©I±Ï”Uã\¶ˆÖU%M,ö©šIZË¥sÒL´)¬/”†gºœû²ä2Ed"~äHÃb£9U€ ÓæRN—UϵžòBL_9S à@J@„£6â°ø€D€äLâ½®Ó 1N4X*µã纠$µ1d¾†•…ê]é„Ê-ñ‘¢õ9“{Á'RæXQ8´.\¶ x«ÜcbѰ—:vµ‘ˆ0æÉ:1ˆœ‡3Öµ-¥…š—DUk»!gâ=æš)È ŽoS_á©Læ†quícõ xp0|ÊÓ|^e&®”ª”¼‘8b­oZåðÖáK@ÎBù&–Q×ßMÀü´©ôG°õ&uªçw9$ÒÈ<ÇëSƒUÇ(_ñ*Ó¥tä ‹AÓ¤âÐ \Ì y8-Ì5ÎÎW g8®‹ËàeO‡KþžA—o²ÇL³ŠÍ™aŠ¥,cŽq¶Íb¶×}6`Cøôƒ4‚µ1œåpjÎ^€fÎ+Y+'¬fS¸³ì î¾€oÅ$ìà5&›hÛ¯‡l> }"²Tu,EpZ³N` h;Ðäì¬Yø7lFv9{çJŠ÷è°ÊÙÄ!`MƒQTè]Â9´jò‚v8YF­—ìNrö"Á³]Û«vaåKÒ¶°®éì< ß¡=Dµ‚5 gP#‡«†ÕöŸƒ—ÈÒÇé£öWEv&1 Š¹§9Þ¨EªqêpÒE ýŽ…ÃEÙ+°w»d'"{#ø<€ý·ÇÁÏEr‰nd|á±×Ïéû3Z3òày·&¼=·v—ÃŽ…oA•5pÅ:)ÆŽ¶¯ÏÜèÚWuÂ’ÓG°ŠÌåP“†âLÀOL{­¯G°âaä¹Ë‰£SzáMÙ[ÒöÝ¢Y#·'„r~Bž‘‘ØÏ uN€2ô‰¯ý²Cj8]„¾ñ•t¢”ì8+Coý4ýû‹iòåŽQ7PÙ1aç­¬Æe~ÒœéÎN>=8ºql-¢ôÕEü²ÐÇÔûŽð“¤ë1Ž)RÄ{Àc‡ñznxd-±¥Šj'‰ÏŠNy˜8>!Ÿq`tJç UÚC¼ç‡¥#»ì¡ß„î$±›³¯aM‘ÌÐ3ó­B¬è}‹x—…éV/ðõùÁn溯$®Õtm:>¨¢lç+™êðü@{˜gÛŠ:ùm¾ý$ICu±Ög”wD]áÙÓ)K»q°XiBØ„þÀœwiG„µx1©<î3Š_An9Ù]åh šÅ¢5ÌiFûí9Ôu„”¡ie»ÒQŠ|⤴‹±ÍWº%§9Úr&¢Yà:>ì≆y!–MKú’¦ÚÅ^ÏÁŸ¦=œ}ÓÀ_IÙ'KO~"´+"<óÚ™ÒWχv"ø HB¬ÝGþ¤4Óˆ³•8á`‚Z†p-¨CÃØ—ýð†³?ê9€›r3nÎ-ð(·äVÜšÛp[nÇí¹wäNÜ™»p wå ænÜ{pOÁ FàpîËýÐÀý±GðŒdÞ§Š#q5ŒÂu¸£ñÆ  GãwÌbÍÔa”1ÆYÉyÆX‚qcp,&à*\Šó8xÞe«ñ*k0‡µL°¯sÞäp¼Íz¼Áx ïðp6ðŽd#®@ƒ0;c'l†[q?À½¸ã ,Ç#¸·`s´à ŦXŒ{P³x$Bè‹Cq&â8ôÃéX‰™8U0PŽ2ôÁ`œƒÙ8g`Bgb.†‚-°%¶ÂÖØÛb{ìÈQÍ1lÂÃè¯ñ Ä@œ± vŸa!f3æãR\‰p9.Ã,ÂÏËqhÄvØ›`¾Ã÷ø?âü„ëñ$ÇóË œÈGƒ­L2E“mL³·ã<…;ÐJ‹“ÄœŒ¿˜a³XO3‡gÙÉãi3_X`‘SðVã9váyNå4Nç <‘'ñdžÂSyÚx:ÖàE¼„;‘æ<“38“gñlÎâl|Àsx.Ïã¬Ã+XËóÑιœÇù¼€ò"^ÌKð9/ÅC¼ ñrL‚…Éè@†WðJdñ'¯"x5ŽG'l^ƒ<¯åu¼ž ¸7p~弉7£ˆ¦ðtñVÞÆÛyïä]¼›‹1•÷`:¦áœ„y/ïãý|€r —ò!|ȇ¹ŒËùå ®äc|œOðI>ŧù Ÿå*®æs|§àdœŠÓø_äK|™kø ×r¾àz,å|ÌWù®Á%|=XÌZÎ/X•Kç²æä`ÂÃ[ ;XïM”†¤aç²½Ùt`Œ¬7ùÁ†¿Ÿr÷ÁÎ… é§h©ˆÄ*{·µ´9QU@Ð"Q˜€ÄF+ˆU¨e«Å’ǰ€PbB‰ %&”˜PbB‰ %.ùã’?.Œ¸îfÈÈ£DÇã¡ö¤e'‹msj ]bù´p¬ïä´mšÙŒ‘MYI%ãÚ̦Ë2¹l:¯Ü…@ÁaZTÕ)¬jª€&Ð"Áâ?:¨á¨€Ó­ªK¼.¡º„ê˜Òa$%³ó,I#’/"ûÙ8Ý«Q!FU·Óp¥îU•ÞX]Ó¿Ó´­\*if ¦m¦‰¢ ´hš“N W¨Z3_°:Œ‚™¦¦ªá@Ʋ ¥ÓÌ›Ã]ÓÔH “ÊdR¯«sx"¹&’k"¹&’k"¹&’k"¹&’kq=Ðà0•bÁʤLÿnÕ«¼ã)U­¶9Å Õô¸P^ã„Ii[©õ.Q­{‰”„; &|jÂ¥–'ºÃCÃJ² ë^vk:^„†÷l+õ^;õ~®z/W}7©WýˆÐˆžx/G8ª4ÉbÁô§ÕÊHwªŒô:sWõH°ÑÏÚèfíר^̦ »Ø‘1Še”GUB‰D•Ñîjhô¿Jjj\Ó©©U#ª)MÞ šüZMn­²&Ûʦû5mT±9mSL¥Ù+ÑÜS¢OsÊr®FÞʇƕh3Þ ,ߣ á1<·’%:'{Ü2½FM?Øô45»-á¸sM•j(]’&ýo»Â½¬IŠåÑò3Z^ùIÿñ%¦dJ}©Q²žÈÙR_¢ÁœŸ'çù’ÛH%ۣإ¾Ä”¼'Zþ?¾Túcµoˆs×uyµtyµtyµô°& a] "ˆ Ä„!]†|tù èºó:ûÝ=g‹®³Åzîòœíòšì*q¶«ÛÙi%ÎN÷œÞ-vY­™)¡¬Ñ™Ëì\g»ù7Ž–µxœcðÞÁp"(b##c_äÆ  ÉØœ6I02h›¹99 ,Q60‹Ýi3#'Íá´‹ÁÂfNqÙ¨ÂØ±Á¡#ÌSñvq400²8t$‡€#`3/#ÖÆÿ­Xz721¸ldMqš%“rocksdb-6.11.4/docs/static/fonts/LatoLatin-Black.woff2000066400000000000000000001247001370372246700224670ustar00rootroot00000000000000wOF2©À3d©Y×Ä …\`: — ‚šì"6$ŽL„€ ‡( ¯E“F [³Ò£ q×ìÈ c&éLíEÄù"¿ž+h“¹?õî2 JñÒ û5ºé”¡”…mªùíâ±½ tŽmrð ´—efÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ}'_¤ûï““&mg|õD-Àï՗ɸ§Š>I’”ŒbôÁ(³(ÎEc…•Uê V›Ø¨f¾‘G ,©³H1Ë|(ÈbÈ(R3‰,IB Ï[žG##·Í蘸èŠ'ÌÝEÚC½ ìë/g57VƒÞЂD ±Ø w‘Qƒò®nÝ4«‘oxäQYyª¦ö(ʨ—*cU­Ä½(xhبªÆ4ÞšÕâÜi†'[–¡)“uJdMs™¡i6ÚËg`’áLêt’|ÌѼ±tšƒ2aŠÆQ`“¨[OÄ‘*êc‹°5 VQ  Ž4D%œš Km²åIZáMØLá„_@ÞC4¨‡ 4a§®xMAPBƒ&èÈÚ9ZÖÎ2ùZ€ ¯ëžkuÓßÄ)7nܶØEɶÏšý+” Ñή›ˆžCÆö€9èÛ^²-ï3%å.M˜q“Ì÷½À›³’:f¬dý‡¬Á^8CGì˜y8M)2ï¡9hddø„'ëŠOÕ¯²3¾zÎ2î[°ZF`0˜|¸9O›à·P[ã8ŽÈkL–.ÍxêºvQÐè¬G@ŠJAšÓgΗó+ª»Ú¡§k¼Ä™i‚ëÚ«áÀPŠ3Á vSàürÜPÁ,[7Ѥ €•Ë[4~2|ŒÚÚ n»ÎÝ6Ю»²ˆS ^pOûÔ¨  n›¢ûú¢Úej›ò‡ú±ÇÊŸ´qp­v©>Uß¿f7‘OžI2Ïì£úoIÝŒ¸Li>WÿBr$©(Å=b/» _–& :_)1'Mü FFƒ¬t8RÉë!jÎð7oÙËôÓV»øá;öž5pšÛGâ¯ô­Yÿ7úY·ÚqÁ·n ·½òj;‚ï¾—õ¨üAí3ÁÀ,j ’u]óöõ84ýõÒ?©lSò»êuÛ+}¦ÿDÿ‘â=øå€LÖsñŒ°X÷èÏG´üñ£ 3˜»¼¤ßGÇ®¢ËŠ–Ä?©ÿYÿ/úÕÿ›þßõÿ¡¹óü‰:î}nšð Ó÷šŽñükÈXá „æâ]æÒ¿õÿ£ÿ¹Ú§„‰€<5E¶DCÌSÏì-Á÷™‹’ ÿ ·ÿ¥³ÎÞ Ž[@Å@èÌUÙìC]ºŸü_ù‡oï¿Ûgæ¾OmÍ¡ª‚(t­2 Kª5E¸ ²>e†§mþ{y´DŠUŒ)êÖX™Ë?]-:Wº´]þª‚¹u”9rÁ+Ƶ$lMl¬Øˆ‘£Zh©P1°0A0ƒ( £@ûý(ý0 €R^¦(v·Ï›Ñ«Ò”ðU}«©Ê‡é$ÞÝ»«fªûü÷_t‰–Ë¢@RÎ:ÿ0ÿߦý~üWÒ¬ˆ§O´íéIÅS kž}ïDªÄ¬¡¡¡Ð†8}J=è,øˆ23ÌÌe€1H òܦßv6}f7ùE˜œEæJ©á_iÏd³¡0„[}cÁþÌîìÞxS‹$BâÑ!‰hH„F(D¼Š—FöÍoÆwó«ØôOÛ~¥gTM^ $œ}Áe„Ú6ô» ûÙÿÿ›ÖLCÖŒQy7½W‰’¬ùø_;&«$S¡öÙÈöëc,l†=/«9² ¾ÖˆÌKCITn*^Û÷þ¤N{ eùœÍ™/›­þ³ARSb “ÍI ÿÛ·Ñ:™Bfƨ@›HùV ÷Y=Œe×Ç"ÜýÃNL0ÞÎ03“7 èýܫݚ³¡ÞÛmº,T…Mò@·ó%%ˆ‰ôpLóI>ZÂÓm)ñŸÿàEúÓX‚+ý{_ä°™& ´™BN!‚ão à¿;86<­1Íý1õ{o”õˆ'*uyqóèÙ Ó½äLôØúÀ¹`ŒÖ‰ÿ÷$rÜ2»AÑ5€ù ޵o*[[Um*ü§S9ÍÝ›¾vgú “íØL’5yø}\ÿ ™ ¤È²Œ!,8ç„Çÿ¢nï?DÓhMÀc뱕!AÿóƒÐ1ê@Ø€æjøéÊL™Ó2Ëã+ù»xiû“T, ç߆0°#zz¥ø¶"ü?ÚúYQ‚OsÃCì¥ÃË* šÙÝÿ‹š¢ž®:|ضs.]Ž f §ÖÊdŒ‹S©›ü œá&¸»’ á]a˜ˆ¹CT– 1~2Õ5«°, Zòü™då—¤ÜnàÇÅð ¶–u…MQ·˜QJØ3ó¤s3s_`dlTp-“^û„4$6fNMÈÁjKû @4š=˜×t_ÛÁ>PŸÔ]ê¶ ¢RÚ$¤)¢Ü‘´|¨UXü¼n8¨¼ÿU»öÖŽÖ/˜§æþ%¬ÍŠ¿óœ‰#ÝâÂò²G˜ŒÖ(i)¬%ÃBTc±>䛇…çØhQ˜j·(˜¿wd7våËùøƒ°6S‘PÀÉ7´…w÷b‰ä6 |õ'¡ › % {º]Ò;I‰¤¨h§xO´yÓ”]\^ÁcjÁmÕæ–YZ[õXUk›·~Ü ¼µ‡‰ЧSSXÅs<ÔƒD¼LÍÐÄSJMQ•ÔTTEW„…Xª¦®æ\\úñ¡f¢âbd–æfuzÆg€V¨h–šJ7«ï/° ãK§N럧=u²;q¯BP¨ñKÑ%¦j“ 8Bar>¿cSÔUu Çé¾nÄÅqñ¼ÈJÜàv;¯–ííBß$ *+ÙŹÁ¹‹j· )Ê\j 5‰ Sr‰Ba>Ïó¿¬fëg »Ÿc¡B’¶ºº7ü™Ý¬BV|r‘Ë.‡Fù"÷Õäv,©µžùW5]IQÈQ×\š’Nû1µìwJ¯k†½á$?Š(’,Ò) ¯‘ÒÙ¨»ˆWI„ÙtRw)]nM®QgǤävÅEiõÎSz+[¦Œ3uÊ‹ß »}Þw›>ÿ×°éKj…úפÐ#<âûcoFp,f4^æeµèÏW‡¾hÄŒœ¬¥õç6]í£U~²`gOÌ&oÿŽŒ±µññ“Us“1 ê&*Ö¯–r²ÉŠn•Áî¶µzbm(c´5ÉfÊæMûÎ(dÁê‰'m­måó%¶•¶m¶¿ÛåØ Ù-¶ ~Ô^ßÞÚ¾È^ ?8à.¹½]‡>»ÌœvjéWnÇ@ÃÁå§.oL7þ‚nžÙ·=W¨ PÓǧáž.³ðÚ¼yêry¥#ú?€fKŒˆA”"6# &w¿"þv6vF"±H rŸË€Ëmzeв;`¤˜DÌŒùÜ…lÖ'Y‰ö¿©xº3ásc4¸*ŽuÁ Áâð¥ è×$ ÐDŽ‘Ëd±ãDsyæMäÛ#žo„ ¨!›÷ um¸>ÙÊ–˜m9v¬Ö+–Í·ºo¶‚l…NoîA¯å÷FAygôÅÆÝ”YÅàFë4kÚdß*°#vWWAÖ&QâˆÉt1[²œ{ó…u5/ –cèjcÛJUü´"³‚ü&"Äa ¹æ»R3LqoEü ù÷{dÎÌåÀÝʋϋ ’¾­åÌ[sqÏwŽHz á­žš–õTCÿ¦ª­XŸ-bGÖ®ñ/u†r3,v¸k w7þ¿ƒÁŠ•{s8@J)I¡è–[¬6P"I²YQ5Ýh2[¬6ƒÊxA”dEÕtôÊGâ9ùòÎgêëçZ͸¨‰aZÙEàŠ &o€¤RuJËZÂaÁá D™BÐK£äꋪa¾±ôšÄûÿõ [HLBJF^*ÃÎï&”q!•6Ö‹Ô#æFÅÄ[èzx¨ …H"Sú>¹Ù}ú3XRVIuªFoÆÿ ̬lÎâI8ztò œä !8„¢N“´\<¢”Ìy™·Ýq×=÷=ðÐ#=ñÔ3Ͻð2¯‚¯½ñÖ;ï}ð1ŸDýÙ_óMúßýÈÏñ¿ü®ÎMÿ'ùËâõÌK°ö»(óó†·äfú8ÄdÄ ˆ» ºŒ·êª„5È–øm±;&2ÎË È×f- Œ)5qe‡>¶è#ðÃ6‡N.µªE1Ì¡rÜ03cª»ßXð¹[ÊE}÷èî;H„eTL< Ød—ZVE¿û5{[°:T¬Ê"†Ä¬0weýMËÙtX‘sL C:Ÿ³k*· Üàe4—÷ån^ ¬PBŸbÃJÄÝÃx(å9Ê‹ù·¿Ä¶ƒwˆN]ºõèÕW®qÝsÿ¾ƒD$FÅÄ“0<)•´øL—ývBmÏ‹î$«€»ôNIÓ‚|ô öŒewÔdË6óÑMƒóÜ*!ôMƒ qA¯š˜[[’CŠÜlÿOÝ×ç½·jÝæwÎRQõELMÜŸö§íÊØ—ó^Á–âí¥ÌmkFi³¶¶ØÔKn¶ÜW¯túÕμ¾Þo4÷—>òñ' ¾Äwc²[–ê5þgÚÙ´æFþB0‚báö C”bãË Á¶ 8’Ä(+ª¦Ñh›‹Õ8 QjØH`B/ˆÒÍV+šn˜Ö5=½à‹h7­ iH—‘LŲdOsTÈÓÏJÜùËäLiF Öpx‘D¦P£©§ÿC¥z ,e)59d¶¹i/óJ·×éÍô­6Mù¬G“߯ ê5hÔ¤¹lU¿]Ͷ/Å5ƒÎ/&”q!•6֫ѲO¾'8´W9ŸZ +1ãZ>3Íe™å8« ?ÙøÔÙJm«¿¨Y%ŶΠ—Oà1-VqæY>‘³$« Ç¦BÙÙéb_¢ðœ*Á–5%tØöIPHXDTL\BRJZFV.yõ•”UTé16d²Øq*syT®¯=ýb»¬Fnëm…g’ñnf“WcJòÐ-‰µÜ«o´/hÄ ²R×êOý&µV¦Jß‚5„xÊñ^nÐþüŽt  ›üѳ'|Òú!ƒ1Í -™ø p¾/®^z¶æp=ÔP³©vËĶÄ®–/‰øêª×Îý–˜©ŸDˆ¸ANˆÏ¶$CÈ ’;« ;GðÁHzÄ\ ¶ÞÑP¾)sKó¶ŠS» /];3$$È-k–0Ê¢ À¥þþ]•¶†\¿Ü’¹-}Çl¤ôÙ-É©XÐéžo1â³·>5VS ®±'ûWŠ*ˆ9W”ÐÊØŽ¤¿«Ü½TÉÜa 5eléØÖ²ƒüt©Ï)tÛo?ÿÏ/ z!ƒãW`±ÜÞpÌrö%™ðRMßúôX³/SÁ–ÆmI;JÆœß2&¹fµ7: чœÀô`ÞˆV©¯Uùïä£îÆæµaîºÑI¹=¿—6B*+á`4—+†aA.^Ønò1 ‚6.~€¦Ð†pDÄ"IHÉÈ)DQR‰“87©Å'ÁŒDI’¥Dã¦T:¹Š¶-U‡­žŸþñ5›Ó⸇|îŽAç yå„ÞnTŸ1+`*0“µ¹LÁ¼üä––‹9‡!¸dj‡ö áh…Q›OµhmfêB÷Õº!ëFÕcÓâNÅ7ÛŠÞÌò‹9ó®ä*~\sýâM3Y°Ãa 2'øò<‚EÞ)ðïœb , Ÿ€P¨0á"ˆˆE’’‘Sˆ¢¤-FñauÑÐq½¢©^µ~¬ ÊüÀõ§ãH@®P‰IHÉÈõêÓŸW~÷#?W¯êßOº§Ë1;v9ôõÛáü;æÜò™bé6{wYy…EFÕ™©²Hͪ"›¨ìòrðjÅù´ØQY”@B-»æ¶Ç^ —„6ŠÑ°4‰·Z!òCA[è’ò<¾T•›˜mÇ sY<æ®#iX™¨2‡C‹W²:”ÍÁìsÄ‚£Brб=MKhA-¦|Q0\h­"BÅm ¨”»—Ê1OÅÄ1‡™ft¥€Ó²õÛÇVÌL+ÆÄ }Šm†º 8 YÑ(¶ê´yš‡5B“Q™G“æ3ÊÒoº1h µd”ŸÙÞ,3Ùjo­MGv[r8Ü‚( …ª2šË,:‹µYY²Ù’ÝXɤoÙ‚‚$Ö!1]4.$]’.b.Ä™È3‡B‹…¬d³?»ø1à(]NÆî<-®çaQ%Qbc'666Qº&×€ýæ”bGDˆ§9sšNkjrF[SH¿…1cp¿5©¡VAz38,S®GÃüí—ñƒuŸ"F€Þ°±¦Í,Áâš›¸7œ˜Xw*؃õ½ÓÆÜJÕdÝÙ>HÆÂMF@¥¯{XÆ)7ÖͱùxÍŸ·€ˆ)H°¬ÊöÊ¿ x¦Ð†pDÄ"IHÉÈ)DQR‰“8¯©Å'Á‰’$K‰Æk©tr•ż•@Y£T™r*U©VsûêĨ©D#µfÏ´˜2à’CÞtËÿwœÀð'GÓì`¤6ôcmͬ·¹µ7ßßNú/Ò2ÞèÇÀÍj¦@ìÁ¼¯W µMšÓ¦sSò ­êzôU‹sÝcsVpÓsrþ4íkþ“1·?ˆGO‚BÂ"¢bâ’RÒ2²r1\Ød±ã¸ºË»T÷w7FFpÒó±ƒÀ Lz½h±³EÛ£®&-‹P\9äI=ÃyFÒžK^Xíóó¦)žÉ¶9œê é©êÜmƒ28éëÀ¦|«ú>µ™½ƒ£“³‹«›;à „2^%YQ5Ý0­R VÍ{@-¥0f±8‚íÊV =½’·ÒòËrJòú—ÕÄMŤaMlÃö3XlÍÖèl5»à‡[GOÓÅÿ¬ÆQy.3`ÓOÃË)J¶‘Æjä æ&4³64¤¬ù©É8„OtjÁ*ì¶1Ô=÷S®[f£¨§êÐëÅh®ÃÐU¡K"ñ;ãIJ¤®S/»¨!bÂ;$îõ§ø°‚½ÿ–Æï÷4Ù3á%rر ) £Û!%jó-Í=U4Ô¶gêˆ®Þ ¡W‹öì'Û „2.¤ÒÆz©i‹š–è„2.¤‘9°l'÷[ÂáF#ÉbÇ9•Ûy[y9¢¸QcJÔ—$@d …J d uxéHÿÊifzú×Ó333sô5½²é••¦ïdFiÒôv7a0˜W3Ôt;ÈÕ®tO+Ó3É:1‚EU¬‡>—,»¨¦F_‹&εÆí]ÈRÀɤùªPxøO§Ó¸îþ;=gÁJ3Ã(V4Ù.ƒP{¦…³Pl~×½âF 1DÑÓ1úX®ô¬½˜D”UÓ&³Åj8ˆ0¡ŒDIVTM7L«|41÷È—¾òuze•+wA£eñÞwµHœB ô¶ÑIj‚îiw_Ä^A¼ D"‘H$Ç-‘ Ð,¿˜a&”µë¤g ¤ÒÆz‰ð§¬ÿìK£C‘X"•ÉèCFP 'HŠÎÉâSg)6-Uêu õûGæM| õzôôôôôôôô ñž¿½;e\é¼HR¯'õ2·zâévÏ×b ¡ xfFæà¢±(ÆÂ¹ƒ9ìØmÕÂ3›ØÙ;8:9»¸º¹"L(ãQ’UÓ Ó¢e:Œ‡'Id u½aœ b³xÂ䙼pBïtOT¯A£&Í1è܈0¡Œ ©´±^˜Õ&Ç›œí~úå·?þúçdê*-Ч!8“˜¦Lç^ ž…„EDÅÄ%$¥¤edåb„2Yì8-¹·¬±_Ê9w ªo»ß×ùÂø„`Ųë³c8‘0DñÑ1B,Wz˜À'&‘eEÕt£Él±Ú€"L(ãQ’UÓ Ó*_‰•ô‚¶Éäëøz%&„ !KšŒÁbqx‘D¦PiôyÇ ƒ¢^ƒFMšËVì~üƒ†L˜4eº-„Ø“Õ ¿€ pAOJ"†!P Å—Æ`qx‘D¦P“+Ï+¬+b†áàÉòÉšöq›ÄG à ’¢ã¥ JÉlÑI‡‡œ0lÄI§œvÆYçŒ:oÌ]rÙ¸ “¦L›ÉlÇš3ïÊ»wÁè_ïÞÐxK¿F‡ÓjаuNµÞ˜ ¶¸ä?Ôk¤môŒgĈ_ÝÀiT®Ãû㇘îðˆÁÊÃ(VnX½rGö@FP,»XÇpÂÅNÇp±\év1 %+ª¦Mf‹Õ$paB/ˆ’¬¨šn˜VùbûKT½¥nמý°à)‡qÁ³üHJ…cqx‘D¦P£ Ó77˜H-ÊA;"9–zkjz@Q¯A£&Íe«ðv`‡N]ºõèÕ—~ă†L˜4ezý|šJ~±À—Âï·úàN"ÔI©@aG C±§1Xž@$‘)ÔäRó ïiÀæ8x“Ême;zw {à±S 'HŠžܦ\6ïˆR2\tèz—”’–‘•K^¸‚¢’²Š*=F2“ÅŽ3šËK-]]cÝ›cýQ‘Àû @0‚bÙõÕ1F¢@t /Ë•,€Ä$¬¬¨šn4™-VÀA„ e¼ J²¢jºaZåËÈ_´²èH-Ö®=û9à)‚…ñÂi„0’’ÆbPXž@$‘)Ôhpú¼aâÙ‡—Þº œ ¢^ƒFMšËVø~̃†L˜4ez³ùâ_(îÅh/…Úƒé>~A!á$‚”  C 08 ”Æ`qx‘D¦P“ËÎ+\ÑlË]OVÊu‡5îu°Ð 8¢N½¸‹º¢”L—”’–‘•K^AQIYE•#ŸÉbÇ ryµ6®u4†¶6>[‚ŠÀ|Òq¢:ãdb2°lrò äÁJÓËD×:žbʵµYÑëômU$(4‹Ã—‚ðÕä¡H,‘Êäà‚à ’¢S,.Uê•”ÁI¬{ìºn•ã-V›ýVÂÐ¥Xâðœ}:ÒÃ9ܯ:×½n/QPÝ#ãuÒëJ°"™ÆÂ2*†q`$Qù·0°­…k B¹· QnAâ¢A …qËžPTá  Ï­äÁ„ºµÅKHzÛÊ)+ÙÝRŽÜäq‰ü® ` °ƒ›'´À£o¦îXñ"¡º9{œs(rŸ'=°L›5oÁ-·Ý¡·úhG³¥;d)wvX]¢1$kÌÊy{5¬m*ÍžÛF¦Ð*§êõk÷æûåMÌÊÁçjfæ@ÐjÖ¬@ hi×Úêúø:Ó•‡,5ÖÍr¯ˆà Õ°ý N¾}ÜXOèfî­ãÑ_{¶ÀKxÇ·=È·( nžÞ8Y€þððÜë!ô"x'`µÖYoƒ^²IWº‘6¿·eë(e\1ü\ jÿ‡œU Œ ©´±^jžoÆû:¡Œ idê–íä~Mx±•³Q ‰%R™<€'„`Åp‚¤è6 ØXµp¼Åj³ç÷Â?þú—Ý@= ‹ˆŠ‰KHJIËÈÊ%¬ ¨¤¬¢JQÌduöy/ ¤§u~&&Z±\a³‚+œG¨Ä{!$nW¢‰a>¯ëšE¿ï¢(¢ÑŽví†ßŠûNìíèÎö^¼xá×ʦýìâèN`ÖƒoŒ’ZÚ·MŒŠ=N{ÒÞÓ*ÎÚsifÔîIͳªþÒ¶p´UeÛ¬hûÒ­9ⓌíȤ4s‘,%³ÖÈV%{ƒ9J´Ã@$í²¾=Xí%nRNSÛ'»ƒy»¢·k¤]—ÚuÅÍ}Ý#9-T<)Wö·ÏL™‚  ÞQ[“ ¨†wÙ›žûmr¾ƒ-¸„È"a×Â#Ög}çº=¯£³JFKŒ¬åd¢$+ª¦Mf‹ÕpaB/ˆ’¬¨šn˜õ*Ê*mVÖbÛVå¬ñëšy¶]‰œ-„˜„”Œ¼TÊ-P~ßA"¼£bâI8;)u<*ÀY#(†$E;qêŒa9ÞbµÙs~îÂ¥+×nܺËóÌ+[^õÌ£ \û£\Ö2pyL×z¹Ö±¶›U Üm%îÊ3ÚðÈ ®»Ý†šÁê5‰0A„õJ:¶ÀØ•‰·Á¡ ÃSFIÍ 9~;9œ| …óÞuJ<åœ\À¼àä.Œ[œÉ05Þà3 b ÖŒµæ”¯Ø¨¤ÝИ¿8–AR”¤ÿ ˆ¿ÿB”rמeDÕÒúŸÇ›òXr½ÌA’¥Íoêœô» åƒ!.¨ÅÓTF¨ä„áºBæ4ÿ‰?”+2 ‚ÑŸý4qD@Üäwõ‚ñÁ§Ôâiª0ä¹}ïx¥ËŒÂÕ>çf§µ¨Sþ›*ÜZvk~?.#ÿë¶c,Ç+Hg(:ÆÜ³¥NI?× ÇOò¾BWE×Äw%×›[›©r&ç¾-ÏF—tYkÙ)ÿñ•Žn­ºuhð¦Ù'Ÿú[ÙÎ)éäèâB<|˜9UÖ¶2ªGÚ'Û'׈™A_ÜÛ•ä*ñq‡ ÄHÉ*-ºJ†¢cÌ÷,3Ù,¬DY†Úh™Ð.‘!>,1¤o`ï¬XhIôo•Y@cD81°«*«V&Ve×ù%SgÅFÁ(V°×Ñmƒ]Ök²^“N+¬ŠÀYAUˆ@ª”ô. EeEe …WW0ƒ¥‚mÇ.†§ŽdÿP½hx™d©t®¬Ec²$3KgÍ*jFkF[i»½¶ëÇ`l·c¯"¡è³¿¥’mLI#G×qž~ ö\V'j‘¶Ë9Äú6”Ue(~e$¹«(n_ ²ÊåZ$³aêg…u§ &oòT V<û>†æŠæ‰×H–Éä1&K…”ë›îdJš¢c®aÕ «Úª¸ÆÉW“PðÖoÞoì{ çÆ„M.=VÚü ,xY¹ÊÒ“®^Rg’!ÂåÆœ³ õè.žMS20dƒi Slôôô®$===}W_GòôÕ›¨3G@‹×²šÎæpvÉ9bÃQúœaìºiĈµÎÉgîv ˆµºBg4 §‚4ð@ŽƒD'âU2¯Vqڸݶå¹êdéº2‡ì¶[•6'îҸ ŽW#³ÄŠWrNvì’»8'™}G1“N¨&ÆöuPj3¼{¼âyMNIÖmÜ€½²­àÒ§á4 ) š¯¾J¼\7s7Ñõ±bµmŒÎ.xe|Vÿ‘y5³Þ”Hþÿ–à&ŸÅÐÒÒ:—´´!`ÒÒîØ%iæ ´x¾ˆbªˆŽFKû`mÓbZ–uˬÀ°sOîgÔ{ªhöÉ~]0ƶî,žŽqK]-R~(¸¨Ðʬ†$™Häi‘ÍàRÊ2@"˾3“†®lhŽ\kdÒr r_ƒ¾¶’ßÒªô!é)Ða ½q°×þSvî9pªÏºIÂÏ^Áýeîš:Ö®º°ÌKñéw³©*ÍUF,Óq¹‹V‚¦¥ 7Üü–ï¦%‡$ˆýÍ!ŒÂI%7{KèJ_ø“ç×?o«¥Hy3G“êX#á>6]W˜½ šo“-EË=QÈhùL.¾1VWmìrGÍÅožÎ¾Sï?ÂӆĴ̹Òû#WÇÊйç/îÓøŠø"os¡.莿cEv1HÙòÕã-J™¼Yµâ‡âpb£—]¾ØÁ½•3è¾úßÝIÚ²=Ø ™ŽÙ¦À]Õ&ˆŽ1áRBohÍ˽s,ÓBêµ¾9[Áä±÷M-k@×Û87¢Ö0‡äˆuËŸžå—Ñ˾ÒHÏ»UÄÒ1à¶véI¤& †0³-ÚS.”˜ç>OW£»Â¶)úøè¶¿{ä% GŒÏ‹‹üÏÆ6ÉÏ“x§ÚÄÅ9maÉê86Ù BŽæKÓÙ<žÌDŽåÕ–ÛI8kæY ±Ågàrã~·ëÑWgÂÛD¹y%¬»Îò²0´­Îá>Z˜V£™ìš¿ù uë÷.ywÕÙýHàDîâ"íã\…qbâû¾+Œ*t.H«ªºNϽÍ8y¼¥FT÷Þ¨gõË]]P^K¦D ûvÏ×pÐpV*¶ô¨ †\,€‘Ë·³£ø€2…œðc(òÃ0 pJÀ¥üIóÖš„/Ü%†’ ²º?ÓС˜0,$Ž<™‚‡À—T‚?щЉIÛëeäHŒR2©%ƒk×X4ÜÜy`óä…Ã[$—äñ•|~RÀŸ…×m(~!ù -PHXzáQZ±#­ˆŒZTýc´¢ÅD/VÂÔJ%ʈQ¦ìXåȬ@QìŠUÇ©F}¼4%¨YkBˆ<(lÁ3' ÝÜXÆ ñmVküy C àÛ®‘õœÿ³ˆ×b!˜B…á —‹ç¯å+ä“"eʱžŠßb••Ÿ«Õ«Ó€«bMxš1µ¼Šµj#ÒŽ§ã„N]0Ý6£Ú"ضâØFj;\ϫܜyjGŽ¢9†tü•æ„“”NqsšÆnÎÒ¸”»Ì2wIB«fñçxøƒØ–÷[J2ü«¨©yå"l,1/YEêI*ÑÖäâÁ˪u»Öf}²6èÁ­·~¹˜&l$E£châø·“¸Oë‹„ªÏ‰ÃÍ5×eXWºÍ¸;Ýg݃1êqO!2œÅiòŽót¼{ñ:K@MFx òü„ø·_Ln=—øÍðÞwctâ“ÍfSSí iSv‡sÊ…Ý}¦þ¡ì4SGÂù!­^Ùƒ{3ê4!PUVŸåÙq‰gæ-_XõãHÅWî<‰¯~ 6í‹Øê~ñdoïºÅP·\Äïá]û%(EºMeüfŸó÷ÔäXòfâ1mí=‰›Ù\Mqd¾ÐµR·ªv•…€6ýÓšFb(Mt!ã1ýpzªðõ—K—¼<[\ö½ä‚¾‘³%/Ïå"ór…PûM +>ÿËX”¯K³ðdþ£Z²?aÚ¸#±1(ÛÍ0 A¬¼Uä†L‹«4Sé½)fó5* —7³gv♫Թ¤òÜA*M‚.ï#x“@}v þƒ7ŒšÕk¥ûŽõh7añëR²—¡.ôf”;^xåOpç·L󺃳¤Ìè§R ™W‡a¤Œc­Ý—Áa!†±êUùÈÃSP! ¤h^¤}øàbñ=Â@¸F$œåñÕMqj_Ñ=xëͬèf(æÈ`-jÞ[í'ÊYxaöÿåã ,AB÷ÈDÈQÁCµ6:z¬ðù‘òµ¸´d䥻ým2Û OVªh¼§ ÝuæðCa‚<,At`KÌÇ-±´ìö¸åV Y)!C&ª¹XŠ‚6ˆ› Ôд—õèô ÓˆÄøœ©ÓZ,ë˜[oÂÆY–—ÞÆmÒ…ˆÑÅI\¼_ÂnˆWî¬Aþ}SH]z`õÖX(ˆ¨Q…†Ž1˜/ç±:bÚ„íŒÉ™„,³Mñ¨å Ek¢&šÐü†:„7RÔtó6JI…šqBÓ*ú4 ‰;é¤\ˆHˆÀù%óI8O•SÀx¬bA&ÄjB!ÀaÀHg“Ä©„œ<#Y|!¦ðß“ÈÛhþB¢æ‘—hKDÔ•]ÉKaQXÙ›„¦JTôi.Jì„BI¦6‘¤©2H<éOVSX"¸Ÿ!‰§Ö¹uð/êÞØxøLns<ò¬QÁ%'Ô”"«Î2DÉ7ÿç#}ÃÇ"±4Êo1‹Ž_¹ý»ÿuògÔ.ÖÕƒ÷ßð»é_õ²Æ“O]ùØ ÷Í øùqyÖŽu—¯MØVØœø‹ ]CÜRƒí¥ÍýÚzÒÝ^霆S—s»|ëè÷å1È/W|tSÛG—‡é¨ãfZï”ÓÎ8ë’+ö1}tÛ14  `P|‚?†ˆ÷¦P]F){äëàGs¢HZ ‰°ˆã8v€ÇÆ”Á1*U ÜU© €¹F*8UˆQ&wúiÙŸÜ¡‰Ë•ÄÏÕ•P©ñ©ñ1cðCAŽÄ¡(”ÙI²ô¦HWŠæGkÃ-!V÷AÀ(ƒC£ q¢äÑΆèp ë)lþ à-» øV{3o8Ø_u—kèS¶~†.Kö›»- JHõp… h`ZPY`91V°T?hqC¿¼ÌhÍŽòÛ8 %<÷  =jÛ‹È\,$2^z³j]m”øÿa9æl³—>/¶“ðÀ2©:4® V]÷Sb|Ù7ï’me‡CžyæQxnÁ¤)ó~vØ%sÎy×’e/¼ô*||â¨?úÝßO&ª šf6 cL)Ù¸Jðƒž„ŒáÊî6sÞ|øòãï–1·ÝpY¿Z‹,*\ž|EJ%WEµu5kѪS·-¶Úf»wŒºë&Е¼¢$^ªríÚtX£Óútuë²ÙÖL3gm¢ÝíjO“ííjWºVIå¾Çž¸fÜ#E^v×¢v6ß3êföO}ò§½ðÅßõû[³¾žóVRÁo¾Àÿ=ëLº2IÍMJ .»â˜´tÄQßZiM}éa³ÓVÊòÀ„'ãÐÐr¥h™åVZ*I2¾D%JWb˜…ƒÓ.ãö˜°—°mÙû¬âº³“t·nØ“ðdä|^Íçcó82.މk›Qµ@-U+Õµ§Z«^¢ŽD@FÍ»ò]5®ž®½Ð— /×5RR㧉E—¢«pÿøÿŸ++x–(ûªÃÉÜä#u'æ«Åj¹Z-ü´ö»lR2¨dÏzz€Îg©þÿ#vðÒ·þ3wøSàÇ¡ZY á“r¾Å|Ÿì±üiìú‹±å xŽ•'À÷‡ŸÅ?#K*¾¹ËØNyð÷Eåó¶–/’?ù|@ìR¾È7ùÓžý ª[m>µcîþýë~»xD}=ãÜ¿ÇžÊ qÞC=C7;ã7¼l³WËDƒt0Xøº {b€>gcB$›X5Wn%ˆð‚K«–ÎÒ Ž‰^¼Ä,¾ý©}Aä²¢\C*¥ŠÚ ê,8Ä]º‘±xzЮ‚öhxÒ·² ¯¾¿^ö@nò°–§uaó¶ülâë%þPH‹m+ØöBô` k‘-é j°0C…nI8«Ç›ÄÒ*¶V³ã)ÎPIFŠä(j`´©ÑœÅ#R•/•Å/ÁÎâì\ _[š©Òí+ÃþYð:Q¦e;T–ƒå8\®inÚÊ3S‘ù ZÃÃÚÊ(U¸Q¥›U¹UµÛÜl®Î½=¬I³ÇµzZ‹'ŠË7[¡9ÃŒ*Äíe1Q_ubÜ­Þ} =Ì …”'ÝwÕxâ‚ÿùe1ß„c˜^¬lIÞ×$$«Û½+z7ÔãiÀÙÛgϪìL-Oçé;÷\_‡Ý¿N£"°“Ezú¶»§è>e:§Þê¡ÆW„ÿ ©^´!€0eó LÜšB¢:mÅ¿²ù4òЛśÏ9XèÆý+Öos`ü­ýÇÖþ·¢F! [ ·Ô•  &Ë?’q¿W˜ns½£dÚt,ëYz5µD)ŒR ¬çƒR É(Aú°J¯2Ê:K¦ÇX‰Ù¤Ô¶©I‘T ßâaYlÐÆÔ ]þÇh¾»dö:×WÝð¦d+ÂøC^ÐC”ÖU9 ôº2A„é.7Æ’ ¤Ó€˜Â),¨±£G.fÖæŸL /¥Ü_磵˜ô«‰»H—C {0xÖ$a4†)¤W-Æt’a$§º¦mÐ܆9|>f^žŒ@„ÝJZõn°3Ó4™Wºm…r98Všè´\×¹c ‹žðê‡JKë•ØÖ›;–÷¬Õ9kØT5ž_a³dŽƒ§ïr“³y9‹un?'O_ç§oØ]Xjî]4Øõ>Ù>›‡5a¦žšåUX=x`ò!ëUnóžÆØ’Ê;ÆÁ`=ŸëªˆÞ¾áâ¯lçáqz•èµØ€ÉæP`tbÎëmàf'¢}—±[Ò³%LKR…õ·ck7Y&NÏÿ] ÁOtgŠã¡Á}ÅñÒX€øQƒJ¢‚ê Œzˆ ¢h¬¡ˆ"ürJSøÕÊð÷QÇØùIaŸˆ‹˜HЈ“ N§‰3ÄYâqž¸@ÉÕk’gP½¡â(š©W³X0d¡R×ÞkjP ªc§Ž´R‚6Ü´.L%È£f§3P7•÷¹ ‚Îe4QÑ( µÐ5[OC›×*ØŠ!)ß/ìß{rÏn¶VÞ¿ìtP¯•­ïK`j¸ét‰'z§G'AmhG飉äpNQ+®Õó¿sÖ…A¹K/±@KS/çý£—~€éÏD¹)F]9k&õ£œ´2¿â<"¤%†€8oëíÇïòOqµÕÍõ4û'%%iL3Ò–©fÔZ££7ª[CÓ°-“„†îL×þG•=~0 ÙØ2}ȤKK„¿z­šÐ6DFDµP>¼¨RiÐÆ oì«.AO°8ƒþ{M¨fcýMgè‘„ÁÙ³ÁD1Ãá‚ÆShÆc;Gõº;×%„›×ùUÆãO^2Jšµ—J °¯ÉÑ^­¸Ðõ£YÕ=W#£˜"b—¶|“Õ—ØjyïŠ3öh¦WYƵ«úP6 1*…™,Õ­©H-œ©ÚʼnÈRfÝnêí&ÔÇ=M3ô<åᱪyd:õ-voÝdz©N Å>s&‰¾ãlj°i‚IsÞ†éLhƒßåH¤3væI÷/ÐÀ½#tÏ…‹Ð÷K÷÷0E-G fÎ2¹h³´¹n… ÈA¬"aMñŽ×¡lHàæM$l)ò¶;¼‹„=EÞ7à@"áH‘ 8‘ÀO‘p¦Èç\HàäK$\)òµQ~ƒÏYÇæí›ÑOíNŒ©û¾¤¯Å8<\‚4ÔÓ'õ è^B†Í×{¨· õÀGȈùyoõµ! ¾;~øiìu$§zÒ sÖÉéä#{ª3b»;ú^3í½^°÷j+ÖW5^Íy–ñÞ²®%zÒ~U‹êܸ8r:*º?Öª,¿Ž]Ägñ¢ƒÀ$q-2¯{G‹J)Ê•v“t[£†1óçà‡ì¥»&zxTúl–‘HPjNn"ÖÛ“HùûqóIìzv¤[C¢c6—*ºgyܰT@þÈŠ*Ž?#å)ͺá‘!š¥¥eÇ4]óJzlyBФ^¥$è‰ÈvÎ[ =‰Á  áÿTî¯9¹‡Óþê•…w /Ž=ÊYÀjV0IktÁ»> ·œUBä¥dÕAòpô9s³Õ–†É…Å]òÆì¢…- ¡â¢Iλ×B}×0 E×½ðËÒ‘S“ ÷ÆîðR§=¥«"PºªŒqÏÖï͔Ͱ)vwCb0ƒX‚g¼\¿Êz‹žåö˜Jã©ÕšÂ•òÎ÷Ã9ÄV°|¬P°Ð•¶l®æ#Ô<Æ~*y۪ƌñ_@ ÛªU+¨¶Èöm€×æÂؘmÞ$:ïD¤ÜqJŽg#5SÌ…jeäzQ¤ëìŒYÉ2A¼ð³ß•<žm,ˆ³[š;² !Fr”ñˆeêŸtZK¥™ýnNƒ‹ð‹hqͲ»´2Ah‡Å®°ºgd WéÄžfö Ï –Ž„F$[(›…¡—)Ó=âŒc®l™¼ÊÔ ‰P¹¸ü. ên^ÞÅuhB6¡<E ˜]²¾E5W±4¸€Á&nóÚN×§M&‹)ü*§|³:AÅ?)JEÍÿk+oP‘ÓždÊIîl(\8k3(õq>ÞÔ–‘je6 IH‘½Ó$3ù}À[Ñ®Nq\"zܬ“C–ᡱŸ"ªë†ž:#5ˈ\+#WwP‡ðîR\ßöË(L?mÁ=¼é/Î{ %•²›(êjûÞHš)˜fdu‚ŠMjÆTöKQ½qáÑÁ#Ë7dFëHfñfÏD}—ç…ßùåúì¾ï¥ê7\×769€+™Úèª Ž9 Ü¡=W½S*ÔÕˆ£!É+c=Òž‘žêµAEÕò@/{f÷à´íY±ÐЬ+xŒ—’<­Þ¡0ŽGë÷"†8¶ŒÑS´o”ÜRŒhriZ£Àò‚›o )Ì&²qYÜ6Fpr8&p.„¹ÂóñœE;%ò¤ m\­añÑ13Rž¡H1”—Š&!¹…×"éïÒá±V3â M"Ëditž"…9î;He`$­¾‡_ç­ì/ •Ü"Í)GPÞ¸šS”]9Ì?ÊTþIƒøD³§&Ž ˆ#º›¼Z0ù£·ôUXd ¤ñ©¥ë,í¥‘tüÍP±OxÇ&\xò`ë®…¦s6¤P£ @MŠÔað¨¤Íª· iZ9H¿XÀ¿Ä¶xؽ; yvÖ×N=µ‹xµ )ý&}ÈšM–gÎfµ(lú¤Â²ÌC`-t…º€|uUéQÈD·¹dIî8>©8ûQ¾27¼œÂøæ/“Û³G9F«ˆÍ5Õ,Bx-SÊaæ@"½›1j*¡tÃgFÖèm~†¬)Ø:ø$WªºsJ¹œ~­N’=”æªÑ©k‹;í„C`ˆÃœ2e~lOéÕËa÷™8öi•aìÇ'ˆ’h÷¬ÉTŽñ´b7I™uʘ) óTñª È~ßÅZH…yª…å%IòB¥º×{= )Dµ¶ö^±ÑÐvn7s‘"æ#5VÓí$ëIì!Ö„F8j6£=Ý·³)0wK Ê:ʤÍ×ø%Y:Q·™ ®~–õÒs3½ÅcLÖ¹ve‰]Ý¥@+‰”>Ámâ¿ywi£u\Zpè½ì&¤¦O˜¦˜ÞuN4+lJp¡7²°Ù[ž˜ªºksQ j¯î<ãHHÖCÞ—6/(w–äi¸Éc¦Šxuˆ€†Xf5aLÞ ¦%±ÏÛ·ü2¼[Ð…7PÚª§Ù±úüD#Ï#`6›íàqÄ30tÛkÈdÃ)´V¡{ ër™Œ†Ì *ëD¡èzÏÌa;VÈZ¸n$ŸV‘®}&¨‘qε5-³ Å»"è2Ï#‰º¬«¶i©£2±ÖèA—G1ý''1.z[JlGyò†!ßÊ¡,îöÌÔçþ1ÔŸ ÄAš_ûŒçwC×r{íêÜg­±ã"9VÞD§Î‹Qf­“á? à•Ê[â¤!%ÔfߎòÛÖÖ*n«o ÂXN…=޳1ÞrEÆzĽT¡ÀÏ:ØŒ>èžTEÈŸç¿’φ>#åVòã?ë2Eq—„ªÁ0.xFš¶‘0œs=ûºËKä#%ïCc¯žØó)u}ßÔ7Z„:Ó˜6Zg©Ì&réõ/ÈÞd’D³Sßc^ ö) JˆÀrÓ39MìgÌ2º1¨Œ,ž# Ë)]X&f߀!)­,.åC¦>#]¹È£†>Æ 1¦8 *Ù=Ë€=®ŽAI‹¶{kûÝÛ`ßÒëËêv{d®Oz!aÕ]A,ÌM:ëH³†4ëššmºd>‘ÊÍŠó3žFÞ8<ò±FzØ=½ÐÚzˆ‘U”"À5<žQGǘ4†`QHÚtDv è홓;5“¯-„ü€™jxª8HýE)…+PÿîËSVÔ~&ÚA‘Ô¸—î÷Ì©9<‰›Z·FÙóB§lØó}žT<²H3ÔAõƒ‰à\è¼[A쎣]GÞ*Y˜N–Vy<Ž÷\oÂð¯Šì‚²ìØsư1­º†Q •¸ÆŒêG(ß\c }p`qthèbŽÙ€ºÍÐ>Ë»Ÿ„‘]û뤠óÓ£e¢£×d¯ rJFrÛc˜ ôߊ]–!´» /?é\EáÓ/À7û·ñHǵ=ŒHHm„.²é”îû¼Og4‘º¦ÇX»ò1ÒyzÚË{ä¼ÃÒ7šËnª%0 ê»EY&„Üʦ‘ºÜ‘}HG|sÙp˜œCÓUì܉=NæÙ•Õ‚cÄEþÂn$—æ8È }©±9Ä@fþUC $>—W\›D@îH9‡ÞÇ50Í?ßê…5ëÊHÅ€%%ŽVaµVIJYãXÖ¸ ºæš\ó‚*ss:@aYšP,t¥•˜›Š»)Í7B/3RnRDש'wX«W âYìˆcÓïu˜+?¢æ"¸ê¼G_._²m–4xÄÛßœYkÑ&äs]ò¨yóCEB&0„aAÁISj˜ Æ79Ì Ï6†”Ȉxјa(L„˜IP¤@’½{ŸY•Q#Œ£|˜t>è-邉Å PýÈœMçêêëF]&^"sx7XVîzïà¨cOð¹½æBHk¶ï#Ò…òŠ(]éìøqNÄŽ)ã“äW2³Y±ÐG(‰ÀÃ"cJ‚Ç‹ÿÎu*äú(…7*žÑSQ× ô° W1u«¨Gé n¤f.Ũ÷ÜM©uÌ6Ó‘‘¤ÅÞí ûá^Ÿ¥0OŸQŸIƒÑ#©ù Ò燴ItštÄïgÞfhx…=ÑÂg£ýüìÎ:‹Ð/öASôÜf݇Ëp“Öx@¼#§¤…¿çjòý?Ûâ=U4JfN±Ê;1À.«ìÙíúv˜¯ÙÝÊU=>4ˆ„>NáØŽi0Õmª¼nß¡­w}ô0+–áÒauÖ&3s˜5õlÞ`ù¦jÏ´bÅjAVæšuÑã=9¦³S9Gé^I?ê;;™c»+‡1˜QÞËJyuäUÊZ8£œ1½¹%Ë”~ýTe‰äñî–S¸v`t (í£®}¹¦VîzÔð&3>VÝ=óRëÝÎØ¦ðo£¿C£. R{>®Jo‘} ë8/¸3WÞa¶õ•YDî°z¯s %_x>e³&Ôj{™f/3¡$õr²*üf–„œ*©:ÀÒi_?߀ò…å\$*CšU¬'Eš"M¥¥Ëç;MÅ™v^îüõAú"v‚— ûT‚Lö¼e¡fÝVñ¹Uå²ùÊîó‘kýóÌ.Ù̸‡¸ÚÀ0¾¡°ŽºˆÃr ø¨¥GX ßžå Èø¢gá w;èåܺ¬Úöžæ-Ï‘KTÊÁÊK˼£t°_NÇøç`Ô·àvÁq¹ƒ¸oÉ{-®w-h¡9Q*Qà¬ÏÕ5ü ™vór›†4üŽ1à)DâåÕwÐÛÎë)}EÃÊLͰ þšåÞv=ç‡v ÝCü¨£ë‡ª-/æ¤Ã½JÅmEv˜A¤›Åg­‚âöÄ‚mÁ¹ÎØÏÞ¹6ôàLî~$[rVJ”(âÁ$N^CΪV³W u,œ»’cµçz»¹yÇJ‡¢ÙyX¿ÚÏ(ë IÑl ÙAÆñœÜQMŸ`[°&¥7øî $³ðš=2÷`Pþ¦J"ÆM(JéyFjí-/%ñ:ÜÕO¬Åû4ÍÙ®ÑÕ¹)<ü¬Q ¡õŸÁ»ÑðãÀ§îGt+'¡ÒçDnNÆŽÓFǶq¯§rr£Fñ=u"K·Ü°Y)~¦lÞñ¥ðä•"/=9ß^³ÍÏ”JñæCÐý¶©¸¦ì,»ïÂ^8qmdzÐîZî .—f}}¹¾wø}*ŸèþÂ2ïÛ3É›a}¹AúôDbUÌDdûékQ ²¯ÛîDüñÐÞ¬z®/ÀòKÑÏárŸäêìÆ¶ašÖ¬”ü5`ºÍM×÷ÏÜ“=K¬ÎâÝ B\MXVŽB=°Ûþ~5ÌQÄÁ(Œ¤x)ŸTç^‚® –øÇÅ2êÔ:þ‰¦†©ðj^'¼‘\Ì?ªÝ=œæˆœyjq¼x4†Rç^ìR/ö‹‹gt¤ÉÙ;k+·–s×Ú4~ZÙŸXÚ®´ê^e²Õ±Nðs”Ôx¿Yè£eé´ß.ž×¿£_žKÌ[€ïwWB%í‘ïyË’»Ã6¿šòêIa |¹8IžÔ¢ZÞ¿ÿ}ìÆžû[ª\nI’'ŠñåaQY$ üg$i çjÍγEª¶Gûp>þ ï¦lÜ+¾_+v Oú,ÈoÁŽL~ÉÖl<² ÍAs2®ÜšmÁÖ”“mÍ?ã¢AÓÛnäfŠi-¹›}·ÄFÛJ^–„ ª›ÿ¯Ñ&ªË&úËëŽS…‡øð”oSmÙðÇÿ[ª/êOîºþ æûš½VïÉÌÅߦ¦ä®/N¹þ,š¯þ~êþkGÞ•Ó€ZD$ʉ®­DÁêæ¡îªÏ?ãU®Êñï#Ù“@˜ŽÓ{Kœ¾sÖ0u›ã·»Ä׋› 9`s¼A™ˆ„Óvø1cÝ—<Óäo¤‰[™9Œ-±mݤù·1ÉB_ZeMc¥B,£¨=ñCȈI.å3øŽ¹ ,jü¾¤úöà ^H¨ü4ióçã›É6¨ôÓ[õÖ±xÃ2)ßmŒ¾Ù(tb8Çq6Nã¬E¹,7ï˜Ä<ó xʦ´,;žéè!„yiÿæð ñ–TŠÖNM’ò­VrWg’ÊìXûwvù1Ö´k>pF Õá±úw†%Ü(⺴²h³˜ÈógåÅïæÕ±ÎbÔ¶Ûƒ=PœÌ…H CQÃ+ã"rH<ÒÃ_á»þæh7/&f½P×D¿P²6äfIÏ~©Æ;Ìæ¶gåÁZr=¦:Åñö&'Gþ“û^þrwšZ ÜR´WGÿy8#Ææý›ÉËÃç[Ò’Ž ÚDóuœ©†î Ymå9qX8Xzû€ZÎT¹Å8œÞÚÄÞT¯6©e—"³™y¶¿¯²Õ&àù¿xÒÓCèêXá2¥Ü1ŸÈ»„Cç7å^t¯“ßêÈœoZwQV–~4¤²‚7\€Q>bð–vùIÝN-د˜žæ­" ñCŠñaIõ:áLø×rgý¸nû]+{cýÖgé¶34øÿݶZ-æl±p_1í9ß‹ ÌvoñÙàiQ˜½HÅ_ šüQÉ?¹»±hµ‰·:šIpb<Ó̱¸Y7Xé‚ÌÅæ¾RŸõ†ÌZÙ–¥†â̆¼"ñªæÉ˜E¬mP+óãÈØ Ï ÊMôM"~CÜü†Ž–aA¹ð1W§Ò ‰/ÅÐX·‚—¡UâªÖÊ»ÂÏ$c‚É.§eXF«Kd02!ȨnjÎ3€¯î•\CÎlý4ŒéI‹ŒTêçk¸A°¸€ª4º”SX6ÐYù“PîeÊ”y4DÅUùÈFeûQöÞˆ°,ŽŸ2° 0.WÞŒŽ¾©É ‚<ø€Â¦Ò Õ{åÕz¸åºýh }øÚö\ªuôÚµw´åÚ>¶Í}x¬]xßG‰Næ…ûÑÄÊÉTËHY¾¤¥†4™¦„dpê(=3Gãnh×Ý£o G®Nø´z§C‚ÞïÄ©ÍÄUN“Ðp_^нÅó qÑùhÕ„½4y£’°Ò“ÑIÉFݦ˜Y„¹×E%w0Òñ¬Ý;ØvÅXìÓÁÿHëUišêÍü35ÆËùöëiŒÓ#,¬ -$9Þ ó‡ÆÁ|:ïÑ’y½±Þ›­BÅèÏÕX ekvÉ!A‰æ¸YìQ`4KýÇ5Ñ™¤ÉHžX|`›qûM¥àÁ4¡Ûý$QX/W›4ãw¦–æ] =·¤áDQ¤[>ß©0;t~ì]°]A?Æýxë-”ª„èÙ âãPÌ…ð„b:÷o­…!H=jª½ÇFeZ4è³ði–‹æ³Ô2˜«´ò Ûb‰…°›náÖÊ\h$«ª@µÝ+Æé®Â+U— ô‰ÚŸ‘!à67¦WW{–„3Rh|‰ZWyýÿFúH‰ ‰‘ìê"ÕN/u“Î@?NV@ÊÜ'ôugìªrè9b¾"ê=µe´e~E½d9j `©t ¥ çÌòá!Üðl ÁÂálW6ïÆƒû:³§iM‹‡ß·Ùš=V¤Ã•!µû¤%Àrò°3°*dÕ…t$ôŸÇý y  Ÿ,ªÒl€pƒò^[GùFß¼“yŠ\æ›Fÿê,Ö [½£š… ‡x&V&»H÷†sUîɶ>±íÔøTïžÊËöw„Èå‘ѱ|!޳ú8ï~º²m‹‚[bÔJ>rµçmþÎçM¶ÒŒEcKœK'Ã:ÚE3UÕ¢¹ŽŽÉðÒ²©ðÎŽSdu•h¦³}*luqLvêÖ;R·dçé¶ìbt[Áò‹ƒÌìò½;"ŸïîñË«¸–=IÑgGò/•åpÉ­9>ŽÆû Nûž°xüñ2LËš3½n¾¼“ߨ÷SÕ¹:Ô[RuñÇ¿•)Û®‡­… ]uÆÕy«Rìâ<¤œïÁÊquuúpx}Gر¾çÂgÛÏ„e—ž RHN%T\.­S­Ží[ c²vȳÍ;¯YÚ-Ùxu7:­ª#}ˆk]èíáieí|m‹ßô>ÚkÙñja5š® %®“O»yú±#Z-èFÞV¿Å¶]± N§r½ÕI2Á°ík~¾ñÆ“Sæ¼Táx)ÝbÔñ=ëxOOž°w8‰ûÛ=ðW‚s3¡„ƺ(‰Ü`Blˆ˜QŸ—p€’EË´Õ¸*¼ž O œá‡àG ùxÙÆ¥—‹P9ź¨H\ž»VÈ!—êb·SÓý2lÕ˜ŸWoC\÷bEle ÔìˆÓú$1ÝÇ!•û|u×Ýl8–—ëSEñÃHbŸ(é0į…)4šÁS+Þ}~Å8!‘}.Ìolþ8í\y-Ä «eŽšß§ëËrÐÎØ 0’óêÈýmà8omÿ' ÝÅüÚÓOfy ÒõÕ`‡kEfŽ\ïgc3¦ÚùÓ)¸RrɤˆI®å²ë´Šþ¿— Æ"öcÑ Jõ 5{°N[ˆƒfø³É¢™žëU=ô‘ì×@Pè^Á˜Áö@· XÑgÅZÀž'Vßu¡tým"»ß>Ÿ·åvée$Ð}²°ÃZ|œ«ÚdxÀš«ÑXólÑMù3ý¿1M¿=c«)œDh ëï¬PÇÝHUwÞ ´' íµ÷sÑù0¢½S>Äz^Ý«·»;µ£øñGÉ2ÈþÆw/2„øàa-Ðý°¥jÒOó¡¯OB°ùÍG.Wÿq"éòö‹-Kp@ŸÞˆRC†o½£®µõ÷;Í™|‡V¶TæËö™; áûJÃÒúÖ{)“p\.6ŽÖ˜üãê˜vç)J¦ŽB!yÒ€·}k¯dW¤ú·¯PÇǃj?Qã+'—FˆÈ届R„h-I­8›-Š —ªåk="Q|x2™—âç͕ɼy@€›ô^2׺¿‹1X¬F8B§Üºmñ ²A>ÆCßÙIƒUi6ÌXë'ó%o@ÿ/‚ƒS¯Ùoóê¢Å¸æ°«#ð™1²7È7Íê„9ì+¬£Ó× |ÅÃLȃö0à3;’„!@L[qÊ…ŒôËi{ˆfsðMÇ}´ÇzU{a’¼6wW@Œ “FëøGïZ[šlP³O­ž«ŒsëˆÉèÀsËcgUs¶Ó#÷‹!èÔ ç‹û7{ÎÆiÌýr&5H¨ÀûàXê9€š›t¸Mì¶¼ÈpÖùG*½üü"gœaw`µ–—ƒ•’(ïÓ*q>í ìg Ùûzd³]Zªyt«+2m[3Ê¥u{:Òµ÷|ëu$ªy[ ÕrZÑðG40êÏϹœ³^«.×­PÃÖÄï©KvÐ#x{Ç }©£þ(È%;BZã  «Ì$ÛP‰<¸ž G!Ø8‚ƒb#ðn<„3¸ãÐ7-82æ—ï—ÐØË+‹;8Üj,úŠõSa]>à:ú)1Ä9JϽfƒ^¿?{„Â$襳 ~»¥P¼€+o«q…l $ë4;o‚-²æXó°fv»%&'ÄXƒ"$ÉÇÅp{(ßo>ày…‚(ìM\ãÕ«Ò27d&u{Êe‰oü\gZ•WoäšôðÞBåŠçÀIÿRÓV%ª+_»‘>”Ò‘û×ÔˆiÚ®]&ipÿü:RèC©;ó•¨ÖRS š¸Sf»éÏÚƒéÈö¦„þå+®TÈ’YÏŒSüîì` B˜Oè±b~ eWЯ¼€xº·Ê3 ô¯š4o‚ˆ·ÿbK-sZtâ˜cÃ2}xj×Z©—I<+‡Ýh¨ søŽ;M+°cœh¶¢5VWìÑ•îM°{FÛfjmÞ,m‚{¸n„oÚ&áîÀ×O¨ñL}œvð·/xÜ—ïXìÐ÷Ï,þóoCÀà>)REÓŠ¸‰[}3ãý¤"÷¼™(Gv̓¯[´ßW€ ýYLå2½Â©ÌHŸ&°'*m=x«R¹¹}ÅÌúW3}°íÂߟO³!UÕqTé•y¼'yÚŽ‡'»&äÞÁ»²uéTš`6Ò¢pgQéû•ìÿùM† izøp­uéZ9û pÓ”¸3¨aÝþ¨¸°¹ÍÕÛmÎAÃEÒ#‡ß<ƒ’o$ë<6º ÿª5ù&¹ì³ ÊQþÀЇ(çßlòM˜Îƒ³£îÇ,œš0Í÷È2½ ˜¤ ÿDfZ´­ÏLÊŸÿƒûJ{ÜD—ÑÈ)ºÞädqÌH†ø‚G®P© ïŠ*¼:è>9SvS J’*>ÑÜx*"-½X-ÇJÕ²ZêYéi¬æz€`…'ZrMêêí-w½v]ç鉎¼&¿ÒAF‚¨B­Ã­Uv†·dæÕ‹·å•wvöÅ<¾¯VªÍ²à°´ÊßQOÿ¿.UÞ¯ª®Åô5kÉ™8ô1C ~·¯Û åeÞœœºÏõвˆ¶"‹E™¸™l9¸îøÒÓOg †³#?Žl½¸xq³üÅÝØ7‹Â. µIV—­6ËþgL7jáúîzE£͟וŸÃ¹ØØ5¡¬1²XùÊäð*ºš+—d ot²:ÿCš¨ì︦p„Üì°ã"%êQ’ÿÑÒú“¢JY!-T¬¹R%Þi¨¯z•$‹,Èz\]juM÷«-~“”{5ÖÔëßZO»Uȱ1£«Ïá~¾èÚÏý¤R†žÓŸ“>¯³ÏNÅd}’êNÿá(ḏðXÅÁꉆ€È•~0šv¬é¾Æ ùÚÌïRS6;é.¿cƒ=·~g¨·U¿Rt8ìºS¿#&¦¨“„_ækº®sxlw(Õ§›ß60®Z‡SþÑÓ4 ôË7œ›‹-)™‰]Nê8>žøŽš‰))™‹ù´2Ç˽¸‰à\ ÁÝ=’ÀÌÅNônÃŽ3ó"ÿÒ‚ópã€=üâEð §e„¢¿]: º›™R?[ @žy«çiêìÈ~rp@)I¡$BiÑòx%?1Ô^ÀÅ|8ˆ°ÊÉßß’=HMÍ9\‘ÏÚ‘ %µŠ+ÓŠv 0ÉÙ6^À¨É&þ‡ã!fqz`›ˆÏ ‹¤Tß¿¢þ)Që,<ï§ËØOªIL܄ֵ3¢Ô¶’<5œG¡ñP>ÖÛ<üL9"ç:>aË'ªŠþç ÎÒª1†Ò„:8|Èñ¥sœÄRïü:ÎeÓA¨²éáÖ#lñ ~Y9ó@ÂùĦu7D•Â:çrG—TûºÄÉz§p Ž&ÐN‰ô¬²¼k„]ä¢táîmè‰–Æ ¢’ÒóåU¡žw2g½ò¥PëIåFG~kÛL[Qi÷ó⦀û×ÜXLbR]Öðí¾“g')^{~µu¢ÂìÏTÛd½TTA:}£E~©ð@V2†ä"õtwâvÅ^JÕ™|‘³„ja~‰pzp"ú'>&ØS•YäÇ÷çø$ˆSà¾T•µÐËO’˜„Kx~ö±‚„8Coâ: &û« 02Ê©¾~­AØs¨ÁçÂbMî…èß±4•ëxx \•ÍG£ŸL4·™×,˾9|ûñ4LJäý‡*r ‡:¦ ‚Ë¿Gß3ÇáÛSLo¼DyžÞ½ŽY. ùR°\ ý&SžÄ%¢óôê>%ƒesݼ¼e»ãyúk;òÀ¥ØëÈ‘ŠºÞÛóÍ˱lË>‹’æÇÀbÃV9¶Ñõÿ…PqJ‚¯ŽPå{†ñ ½¥1¤±GrR²”Òš¬í`Ä:¦SýE^²ÂÝ‘ó„š!Ǭ٩L«L߉ø¦¶þä)jnõuÿ½ikº$îcÉ 6½öL UÕŽØ%Gc{T™ þZ i3债Wj]i¡Y:3T¼e]îþ ¸°<¯p96Ÿ­Õ žÚúµvEk]‹Û CMщÜÛ+}_TzG«ö ]ZN‰)¦Ž´Ì¯›!]qÜm…Áã)+mØéb_·ø4í3ÃP^™ÕîB]ÃHÔ2:ä F{bj C@…†{>=Ðót^¥ƒ“¦2ÚrFG¢¶ˆ›%=ë9ðÌC,@zÈÑ¡rÿ6›Ôú›ÏY0RHœ§¿^Ì×ÉsÍ¢Šö*ɉè\7ù ¨l#÷Vã¢H¹ó_ê÷&íZSÈÙ¯+•]éî™Pä©kiX…K•Ÿ4ÒÍËõcëGˆ>@ß,˜gDoËNíòVâc¬­j”*«+NþBÛó¶,ÑêOf‹Œ¨í h:šLcÏ÷ƒ¤¹~4¾Õ«ü0Ç]ß<]“ ,ƒ[÷ge«‘§ÙDž·oÊÜÝO™zv+0ZYõÜT¾¿Ñί2€4êì»[ʘ[ªw_&Ò~7 LPÎüá>Èœ¥}ZµsÌàn„*|wÚe!*yÿ~פúItû”o H[ìЋðÀmˆ=½E€î¤Ÿ5<;íÐïðß~»ßÕ¿K¯Áø“W'TÇ8¯¤b%æ§¾|´4a4Öù^“5ñ¾YÙÕàÛãL¥}·‹ Ùá’*EòGÛaÇjÍRãw7åŒûÕc'ƒ³ÊûËhZg™‚«µ]3£çÍG¾ûsEöCE_ˆr¡z‹Q].J:ªL¡Œvò¡*ÉvJEvx_AZÄÙŽ¶iIY夨¥]0^ü°ÿ¹p,GåîgwÂpS#\V9„{xÛŠˆþþå¢×áz{8„¯ WDáÞßë\Ðx aÛ•JuQ´Wà‰v©c9¿yÇrˆçDÃ35á{Û ÆƒJJÄŽ³DÏøtñ\–ƒ†”²§@ÉßP“yœ™žÞ¢E»TYðÎðÐÀûTêߘ”¸–µoòøDJ£TAi‰OZK‰ê¨ñI”–ßäkÎÃõNNµç¬8‹}VÜZ'§õ÷­Îi^\t{Åëo~uõS|ŠJêß„á¾4Í@ž¤3Ý¡VñÂ36ñwmÞ®a¼•ïvè·ÏxÜçïOXìà÷/,þËoƒ€á3“¨´ï8¼3cüjï.3$LäËó Y:†Ý«Ã^Š \oO–ïj*UøXÏÿ)ŸòÁ´Ï‡V[ÛìÃ(¿†³IOYq¿[?U*íKñ±îË  Åœ̱‚|j;lá¯6åøõ!Þƒfò'b{ß¶Ð>8¹ÖSëÒ¨"i=Ÿ”™iÖÿvÔ¸ï@Næg’ž„|tûÍê±Je_¾²ðns(±˜½nR^* ¼ô.:àú‹yöŠs”àØ¿l+%¯ÁÿËÓs“¶ß€ïéBú·óýàÇîU¾ûürâÓß§KÕõxPû6¹ƒÿ6+Øç…Ô#^R?½ àrBÀd®(àr‹wëéW°Œ„€!FÍì˜Ñ)^.ÕeŸÃÛçxTËöÙÌÌiËß~dý}J˜hö3ØNL©|òôuŽ9cnêÜùåR‚MíÜæWœÃ®$è%GÀæ4´ÁMŒ4 Òf¦DmÄ6"ÞèԶéȰÆNÛÄ…6вï41ïõvoµ`|¥g8Þ2ÑŸšnû¦Cf²™îôn¢lÍMïÁlê¶ÏUÅ#|,Φnd³cÖŽéFÊD¥¿£ö¥‹žñÎ!äS›7~évìêÜ‘#l7M1{=ŠÐp á?’­hÇQã… Ë»íEŽÝ4E»w®ÿA£1—‚!F8àá?âc4¡Gq,Ú]à°³%=‰6Ï‘ò·EWÕ®­›Œô¾1Ngô¢ÀîÖó¹ó/¾÷ê x7ÅëýXùûùýGá×WÀÀøZOÑ;SÝy§X hµˆÙ”c¦íØmì¸årn¦ýÆÉ†÷aXgÆÌSÏ›ÅöZœKȘêűuÙ ù×"ï+ò[a·ó`¾ŠH2U#vÌIÚWšb´ä_5Âs4eúªä1ª3I?…#V‘  zë¤` šq€ê«Ÿä=˜¬á´´²ÞwV4gßB‡ª8WŸmuGW)ufIôí ˜F#’:3ÍŒÎô P‡w0†.wžº.OÛØû¬Ø™rÜUáä ]@øÉX{&ÊëÖe`X¹‚g*v I(ØZ ¶&7¾š¯³ù× ~Ûª€±j+'“7ÓŽ»@^rüȪÛ0@î÷kw×èmÔ5®bÌ ®µ·ï¯ßøÊyö¹úøº—Uq&E ÚÖÚ ®¿0¨h ‡¡ËÁVIòbmtEóÍÑ8XiDg¦+Ôfn€‡:V“Àì[‘‹÷ÕˆªÎTR,’7 ‡¡ËqÔÆaÉ[c;8C‘;.Óí;w ¨h Né Ì­\ `8¹º3c`#Øl(KþÑM¥éRM¢ª‘m_ óÓT5‚ eËGÛ°@¥–RíAÞluaGÀŸwN‡QØ‹mO’Ž2~­å6q±&殢uŽ 5Ò¶òêŒ#§†8Â+¿a?¾ttg¶)Ê|èÂéõX¨¢{€q…½ gÓ¾&56œ± ØOçÐkgtL7l;ƒ eË?"|òƒ¢V"‹æ`2_§+TgFœBz”ð/…ù¡ÓLåü¨`â?õÛ¿¬Éi5»Ã™Ùac yò;6Õ¶þ¨v¯uº³ê‘Äcê6uÿ?õ€Øý`ZÓ²M’Ë„ U‘?eäÜój’+ ØHôP?>ßž°‘è!^À(EÀ“ƒÂsæEÇ€9¾lè Þ›@:—X1ð}àëG*Ôåøo ö#í_sÿµkG þûÁ×~Ð.pZýãžoæÕW&r'qojÆ»TwÏìÖõýc";¶ ÔU.˜¿Gú[×Z½òmVêR–\©úùo9¾kYèçéÂ,s¤³«f]& úºf×!l PÂr€ì Åœÿ›åÙJP£ h¤)Èù¡ž¨Ràú˜+gh”ÚÙbvj.‚˜fMëXñ°³F渉2nPú¬FXˈ÷¤#ãöòìηŸVóU‚%Í~üQ7ßâ~y¦ëv î—êô$YÓúý…¥Ý^#ü! 3oøY"s–tC8ô½ ¦COmE[¶™ˆÊ1˜\Í·l)Ú®E€®çŠÖŽ!m.–_eŽ*(Œ8—dd5ìàJoµ¬˜i ôd?EíëûšÐ,‘„«Jïÿ¬TF†¬~4©ýxôñÊ%!‰1PPÅÞá _î>o^i×LðÕ`,Å‘ E´Ë %²<°äq7·«n“¹ÑV¦«rqçœÌ2!ªhœ/pÓKjBÄÆu:³¼<sèaú‹Ù1Êm ŸÌh²¤q²¾ýÐe”DØšhÛï×ý*c¶9ºxòÇI¹ LÖå\ ° ÞY¿¬Ìåõrß!'Ÿm|&cˆ—!`ÖÊã1EœÞ¼ï­ ×A’¥ªfëÑ`¡Ì6q™mßéô.SºLßgûß*d_¢oä.TCDxW_Ïé,ã¹àdÏ£÷éÐ}ò¶É7)]qdój ;ÑÆ«Ëø¾»ìjAEãŸýD.öé]\ÛHa—Æ “ñ`s¾¶ySÉÍ]<#óøsÿZTwלqÿ˜‡7-VnL_™‹H:õ{Ð{x°Cîí17Ãu1•xSù8)°Ô¡*MdÏlæõ×ÛTmogæÎîfÿc>¨½žèò« s«ŒJ‹úžE®õ¦o[Uȼ²Oè(ô,¬Dׯ1±o\£÷Õu+ZãLö·a8æ:6Û2Á¸d˜ƒÐg´PɰcÕ Ï^È/ˆ®õ‡8­àß•ç(öÕ¾{IÆŠÉÂ2Q …oÖWA«ž¸˜¶²cÛßèôˆL<. ù 胕‹Ý4ÃŒX`Mh)âW0<‰™èsci𪸠èíèü»éZÃ5wôóyÙý¿ž-0!„B!„°ÈL ómFL ó îÍK±æ¿}¶,˲ll,Ûw³G¯Ä’µ“ãñŠ¤Í‘Êù‚à ’¢ÓðI'0=œ!3n,Ç[¬¶Þ9¬ãEsDö’Ó§1}æK[]R­º‰ÿÞ‰ˆQ‡ =n©&IÏlæÓÇê›ü1ˆÁjÇ1fÌø·±ÁFÏ=y cðãOD,’„”Œœbë2f̘1cÆ“]b!Éþˆ‘Bp0t4ÒPØ¡XÿdÇËzPâ/]A~`-qSCÓ Ó²ë0Ÿ0“GPV.1ae›Ã²Ý¢’$I’$I’$I’$I’”É€<¢_æx£G5ÙQ}ÔÈ&Ð$ƒüÍH}ã&0†pcdšv¦2 ‡Ä'$&‘ï¶ûå©ôŒÌ­NÛrvÓþȃ"¼ÿ(‰ß<‰Ò²rÅË—,Ç­twâ2©+×·ÜÜû6©ÛÝu¿ó g*næD(ãBV+÷¥‚)$K¤2yÜÁŠáIÑUÔ=•Tê4‚ªMöWÐôÈ=ÈL/+ÁMº6…áIÑmvĺەòû6!‰ReLQÓè F“ÙRÏÞÎË÷ï¿Uö±„ ôÉçõe1o6Ú_8ÌŽ³à,8pà,8 Î29;0lÞd+Âæà¶ŠÀ%Åu–ä]lU‰?~[À7CŒ N]…Ðþ^ç#^¼à¶m*mq¼Åj³×Añ?}ñkÎvú;2&!é}û^}Û¦R÷ïJÿáïú÷•Ù—À ‚‹r‡Žb¹xÎ:JâS(2eº•¦®èÙ½Ybìå°’Uˆµht©áJ;;Ú•>ÊÙ">¶aŸïœ&”q!•6Ö«€f‚à ’¢ÓÈÉDÖfÕÇsJ]¹¬)ªb½Áh2[j-t6vGÏeÝ}ì¡>>SO«=Ø%8g¨`… Áñ¸SÆÃ–ø³` JI¥8%ÌÖ(Mc©w&ŠÒlsZ4©vé]½8ÜÜp¥ékõÃþ~"½{ù{wáò³»(=â¿öÙ „2.¤ÒÆzÕ,¡ A¨¸¦”•7AÖ§ŽÁ¶¢VÌÆî¨³ ¹¬»/2g°ÀѤs8¡d!Ì \8:šÇ[#â船¨]€ÚúV¾«¯–·D÷Û‚Pv¿1BnÝ}¾_$ IFŒ†”‚ᣑ"a ñòø+W°vFâžJÓ Ó²ëž0s^¹B~¶†‘´ÏÐÖZ’$éÎ àäN I’$I’$Ij¡ícS    {ûIE‡ÉCàö¡…¡“ÁÊ&‡“Wˆìf+]BQVéÙ¦˜ŠÒt°[bÙǾ<¦3ÁŸI(4‹Ã— øÎ\Á›!FP 'HŠN'"=lfŠ6[mq¼Åj[{—.óLz–ž¥çUÿ™¾ðå †³ºïØÉ¶žÔ³Ôà;cB²ç¼FÏ[¬½¹!š›°¹ñ€ÏÙ'pÊg\׃™†7Qi*´»ÍhV8"€'Im5íëÜãÞAT:ÜY–+”¦YkebCêcª3áQÓÆ yÓØ$Ì:x„MHT´¢1•±âtãûM(ƒ”ú÷ðãO}„S át æ¬~³qænͼ ,<â¢\üÌ’TºtyÅ8eÓ˜fÑç\ãG 5Ü2~«eY–é¸1ä]Vv¥ë¤n²w[Üu¿ö~•Q–º:ò÷ÿ ü¯SÅ5gB²ŽNÎ.®nG«þõ«4"""òŒ†‡”D°7CTh¦Ç˜áþaÿÈz”uÑvLq×ì*Î{ãK‚-‰·'Ý69MÉÔŠK+Ýz”×if}Wöd8-´l9O—;÷ƒÂÁ€àÈK£ûx¼$ È© gvdù¹˜Ÿµ0Y´,­Ê§WòꥮÑŒ&³¥õ˜ÍîèüÄåÞs®½š}åÜèï_Èonæ@k€&€@÷ÁäZ Äú DÀÉ´Õ„8ÞT0Ý˾°VÁW|=ã^9¨¼»[Õ÷æJç•<5<1$ì4Þrf¥«êY«¦²U Õ•Õ lÍi[àlvÓ;g×í¹…'õ´µoêý@WoW ÞçxÊßÞðÄp¾+&€€pxÐìL í͇ €ÀU`—=öE@Ž8D@ÌÎùtˆ€c–"€€›Æ{È/ꯀ"%ÊT¨¢×Ø€‰…]§¯µ§=±û·Bª„h’Œ$GD\Ʊ&LFÆç22Ÿ‘|é+_»k®ç[[Õ5—‡>œ™ ƒÅáKPyÞ6·Ó¡S—nøgë¼â\ࢶÒa®rM#S™¥g„Qc W]dÃvŠ©Zâs€_× XGácwÂÚc¢Ó‚ \:v ;™¢ã¾a‚I/6À ›— ••ÚÀ`Pð.§˜bZ aÕ°Pð_  zÐ$ϱ~í«JXoC•U Åö¡¡0 „‘0¬ÃBð¾Y+²(Ù9ºöxäÜâIžòõ±Rmò·E´}‹3aêü„^¿Â¬µÖZëgŸë „².|^¤ÒÆzEîÖGÿù¿À`¡oDâÍK•i‘ÚŽˆïѱˆ_=aEWüEågWY­FgíPj=ZY£°&Í‚rè!xdÀ( 'HŠžÙ1çðGÁG¨²TŽÜ®«¼Fo0šÌ–VÌfwt’.w®ÞW~ÿw”ðVÿsk¨épmt¬iX×zôm6Rel¦¡‰ŠvbLe¬8nO ’’7´û@ÊÝtsV§Ù8sÓü‘ JáºE ß3‚¬¶^\|=Â*V-‹–DãˆLèÑšÐZ)K?;ÇDæ†'yÔõqûA·Mâ¹ù˜ ƒD¡1X>?!&”uIó´ •6Ö+,*¡H,‘Êä5 #(†$EWܨT©käÕî¨ãÝ€Ùȧ‰¢=7^‚GŠáÄ$ŠžÙ1'_/CK¥l[±z‘kô£Éliemv‡Óåî1y}ïïÔïðìÞŽ'}eL†l9Ny…d·V¬¬¤×Ù¼å]§L™žÒT¼JÙl§-FK†¶œ­´Êe-M_U#3æÈáž«>Âm Høø}6ßë^è»àwàƒ3ÌŒ NFa&ª6«¶8ž3‚¤èÊeMQÓè F“ÙRk¡³±;röºÖteJ³óc«Ój®í¥Žµ %ë–õèÛl›×N#ùÆ&aòàžE¢¢íS+Ž Àˆ¤ääv‡¦ÒÌYfãÌMóG.(…Q‹Rñî%QºyyE±²éÆÍš6ßÑbKKM«Ôwtè@Ÿn¤ZD[ãÖ×ÁÝTþ‰G>ƒ¨«1ýA}ÀÑîRýûßAèL»7s&”q!˰ƒaBY®x{é •6Ö‹–·xmÛ±ÈP`ù  ‚9\Q¹g²J^ÓAÕ«Œ)vˆxˆÙE<îwæf·Û»í”(Ï.R¬P-µµÔAÖ« ð7|>³ˆç=ÄÐf‡ïÞ6ÇîÏ Ûìh—ãŸïR発DêÝ.µ{£Élɪ²Ù95.w=GÐ×{ˆË÷ÿ+L ‰oG‘3¿ø7†)D(%Ug6uÙ],%ì5”¦®á}ë¾;ôð¬Î#(†$E§!LA@&¤6XŽï…Õf¯ƒâ|”»iì i3[Ûê‹ Fè&à@sÍ _ŠTuºÏÙ!âÕ ª “g_]ƒ&m‘ºpFŸGÀoˆbÔ-ºb¡³=K€’ ³‚J+Èœ‘…ÿN'¦;Ä“ è›P­‘ÜóvÆD÷ƒÜwÖ1©s¥H‡ÐÝ¢n8Áö›ðfKï(®˜çŒbGâ ƒP1FPV.4AÖ%7·õ|Íp!Á…>¬ûI®wœgÜð {ïøwïü}Åï4+ˆ„ŠƒFPV=aeÏ’‘,°sÏ›_þhR'¨ôóqSµøÌH”$YÊ4õ¥¢“«¨:ìêùÕÀ¿FüšÍÕâxuÈoÈ«(†Õט•¦2i&ëæ2mÞƒü´[”¹s>aÈŸ€¨Nщ.ºwú»7”ÞHM¯S™Ø²¯·†ÞšÍ11Ǽ+u5ÓÄ5®ÿzÓ šy„k‘÷kؽC›þBº±9;¹\ùþ[Ý›ÅI’$I’$I’$Iípfffffffffffff;¼Ò‰{äÌ磺Y#ˆºOñ!}FmÕÔñãpéêv?wNîkF(ãBs¾®·°wÿ÷zõþõyöýö»Þèÿ[àè$ $I’$I’¨«Á¡[|¦0ý~¥[Zù>2ØžlOÆó™-ˆÒëéîÁ &ƒPñ|eåŽ +¿¨_‰Ø’B * u9Ìë©CÓ&êî#è_(Õå›=ƒ1Óe˜-ÛîÞø$Iª·»·PºGˆtl×'háž9Òë~BDD”¾®;¯)I’rPG£ÇéçP7Y)À¯A•3q2“%ãõu`çnØÚ(/üõ¸$6"wˆ/Îu…šˆ £ˆDŒ~â^ºmUŽsâž0ceåÑd}1?ëß;5g%àÿÿÿ¯2dØC `5O@ù¨P©Ò:Å+º3[€ƒt «j:©”É™PÆ…¬VÜsÁ‰%R™¼@²‚`Åp‚¤è*ªœK*uêL|Ëyo n²») 'HŠn£lqã”ß Y”*ãõû“Ö¿}Û]o0šÌ–zvih«Ó€ø{Iû®«¥â€ Òp8‡Ãáp8‡Ã[¼wýN™2m1K–­D3ž*(K:Lz ö>œLhaBRic½ªa²Î…Z sG(ãB5K™Û©[ÓüøBǽ€‡!&‹§…»¼¯~IRDý!Z¦‚œ¡¸Hˆb“3ai&––Pr{}]H_ÿ¡ûî=oÃ%I’$I’$u©C¨Ø»§A¿Â‚PJ)x[, ‚b¿ÔA¸DÜùÎsÞqà€ʸJëU8ƒÏ0"±D*“È„`Åp‚¤è4â3ðΈ «æç{6Ó†ã-V›½~?èëããn¸ñsã¡}¬s.]¹vcŸ±‚gF4PJ)-Ú°¶e:ýf̶TOë‰l´·kÏþpû Ù6xx‰à™Fǘ‰Ç.M˜>c¦Ld![4ÝiÇv«~» 9×ê;×WècýˆdÀ !&M™n>M %·X`Káç ^>~Á… ¨ Âàd(v4‹Ãˆ$2… ˆ¥2¶²8zËÝ·§×Á@Ü$5…áIÑmpñþD)Vº^F7zƒÑd¶Ôº“³±;êÜù¹¸ë9Ìk}_eñö€÷AFPŒ‘8Œ0¤@t†—åòà…Ê¢V5Ò3ýf̶Tgë ll‹Ýí±¿ƒ~ÀC <\„ðÈ4:bPcáÒàô H&ž³[ŽÛK¿­õ±~L0hÈ„IS¦›/¾…âZŒ6^>þ ÁÀ( Ž@FÐ,O ’Èê1«Ùöä×;çEBº%t„à‘}£0œ )úõt?ÝÁåÂ(-_ÀŠÕkX£7MfK+oÓÉ’ñ^þ@&Bc<­©ÝéÞétŒ1™Œqè©F4f’$©ã,R!ˆ A0‚b8ARtr³¤R§ç— #l›Är¼¥W¶Cgrø›±GŪfP²ÖGH˜Pߜ̎B™ŠÅ%¼;j1CS‹ÅÞ/ vøôÒÊß Ïð›ˆßI?ƒ 3#(†$E§”Ž,=/Cf±mêmq¼Åj[{ÚqJèWðÙÄ-B’]‹ØÉÚK1”-R§Ã´DïÍ„"èLDÌ$ƒÅáKà<g P2^?QgÃÆÚd$í3?›!áÍÜT)‚þê…ß>ý#MõÉýäó·G4¯æAŽ¡ˆÃ]ùgжJ¥R©T*•[6¶@È<ÂÑ(Ôè÷}ó;÷3?8 ªÿ½çÇ%­ãB*m¬Wyá™^³·ýä€Ü ÌåT`¦F½¹Ë•Ù<ãñ«‚|Þ¡¾7‚Q>€®ßôüÄœ½ù…óˆE¯¹Ó»""""â¹"öu EÚ(¤F bvÈLJûªûÜEÇwIÀ6ùúÔ@¼"°n Æ7tÖµáZ ©^ݱÚÚÜj"""bfff """ªªªjfffÎ9çœëo:ÖtõômÊÊ©¾›3"x 3¤gÇy¹ÞîçëýùNûÏšçØÖ,Íò|å~ã￘c,ÄI’y$I’$I’$I’$%Iª$I’$I’$I’´³äptÈÀÒÍþ¢Ïœbbàx¼>¿SÙ#ÿ#!A1œ )ú15|tõp fÜM}‹·åx‹ÕfwXÇ>'’I’$I’$I’$I’$I ªªªªªªªªªÜÀÿfGÌÿžõvÛÒ4M×u])¥zyr½†­áÍŸ]ç_ÿÿó7t×ÉW=í£³ë,üÿî±Â)2{}Ä5¿EL1Àw:%I’lþÃ’úoW—Þ"""ò¼Á‰ B0‚b)wèŒËå9*‹J|ª¥S¦LOiZÅf{l1ZbÜrXib-®ªñ Wæì˜«>Êm~lÃþ|ç“c†€ʸJëU 3ÁŠáIÑiäÄûÚp¾2PŒ88‰8üyb1B’ƒ£!¥àpèh¤HXC¼<þʬQàâž+M7LË®Cx3BÅœ×Ǹ.i…a$í3<³?´mÛ¶mÛ¶mÛ¶mÛ(wìTUUUUUUU…©T•ªªªªªÒ䟻Á|ÆA\‡™Q‰4 ínŒX8âÀø 'e5±ßË)ùJ¢tpYæ9r…Ò4‹®·'´Ù¦¥Ösc¯s† A1œ )ša9ÞbµÙë`ó‘s.]¹vãÖ]?OÙ/Ökßä® Q€Ï)JBÂyî-ï:eÊ´Å,¡-[‰†ŠŽ¸ ØÆÍ\÷Kó&UôYï˺ûÙsçTUUUUUaîŒ-æ1™°m—îþëX raݲ}›¼ilf<Â&$*ZјÊXqºñc&”‰AJí¸Ýò”B:ƒ9«ßlœ¹[ó/( ¸(?³$•.]F^1NÙ4¦Ytóøê–‘­:ÜÓÑ®Žì²²+]'u“½Ûâ®ûµ=·ìU¿ï·?ë· ˆ*0¡Œ Y†D˜PÖUš[Jë¥AHI°7CTh¦×粺½e{ËŸ{“õ(ë¢íбoì*Î{ãK‚-‰·'Ý69¦djÅ¥•n­<_%ÍLú›ö2 œZ¶‚§Ë@á`@pä¥Ñˆ}<^äTÐ3;²ü\ÌÏZ˜,Z–VåÓ+yõR×è F“ÙÒzÌfwt~ârï9×^;òý˜ý…÷“o{0ÈMÞÍ €ÜœÝlôøL[ ÑLH’#".ãX&#ãs™ÏH¾ô•¯Ý5×ó­­êšËCŸ ÎL Áâð%¨ƒÕ‚`Åp‚¤è4„é2È„ÔËñ½°ÚìuPœê¯½|õu}ÝÒý’Ök¿iìä´’$I’$I’$I’$IÒ*I’$Iwò@AQIYE•¾‹–$é³-8 ÒpÈ 5N_’Ö”ý%YàÖ:$Œ1ÆãœsÎ9B!„€Õ4c¬µµÖÌˆ¨ªªªŸI€àBÖÔŸÇyöpÏ9YµWçár×㞯õ5cÀ~!þÝÒ÷¿×x I’Ì1$I’ôþБ$GªY%I’$9Žƒý§ëVU‘#ѱãdã2~B[¢jâÝ$II2*¿Hæ¸è°:aӪͲ¸.ï…€ˆ.üÿÿÿC@UUUUUU­ªªªªªª¶mÛ¶mÛ¶mÛ¶9ØÛã'•¾žN³‚É T<AY¹cÂÊþX×ûÕ’$I’$I¶mÛ¶mÛu|ÂXO ÒŠŽ1g]I?× —_¬,tUtM|Wr]¦–;ȹ ²_H”(‘HäÍ¯ŽŸ0qúŒwl&Û9%]\ˆ§#ÿ\ ê‘öÉö•ç ̵¸xâysßÅ6A>&’$I’$Ì-%zIBMž2uK7Û†Û™ž¹÷Tò uÞÑè¯Z"åÊîÑ•;ˆúü]@’`I¿¤+É$=Šm^9æp1󋶆ò‰—!Y*ûJ–Ô£yf*ˆÐJÑvñÞ’³ô/è~2†òF¶³öa 1~ÂÄé3Vܱ™lc\ÇçË„åaG¼6Ò†6ÒFÞþz3âÓ *€i—A,+‡[~2¸b?¥RÉ”•Wi¶‰#” G«õò g‘9K€’ɳ‚J+¨2#p¢ç>ú 4d؈QcÆM8pèȱ§ÎÖyoýE,[¶z{eÛög'bc€¤ÐFƒÊB]ózêд‰ºû‚êŽ/”ž!>!AT±ãÔ8ÑSB­F˜ŽPÆ…4jªÌb;uåæY_ÒfÊ¥ÈÀÿÿÿÿÏÀCóA"bÝR jI~9ï<Æp4ž±²ŽPçÔ†ÔÙª[ÖÃé øøì!¸°©(‹ºbÞ3 ¡' BÅAY¹Üë=Gïï÷bÎ<¾~ëß_ö˜;ß Àÿÿÿÿÿ¨ ‚=üÿÿÿÿ?p€ÛÓ¾ˆRÑ 8˜ìs$'q|q%R1€Ñh+©Ûf,œ¼›9¨y­VÌšÍëà ”ø>xOìe÷SÈSiºaZvÅ' BÅAY¹îY—úÜ^Êó'ަªR‡jþdóE怪ªªªÒwG»#WB½Î@’$I’$I’¤ñëˆ2j¥‰Dó~ꘋ§YÁÒ T¼:‚²rß„•ýžzŒmÛë÷¶D$ñþ>[¹/røoÛ¶m? I’´zÛèK’$IÚôàæŽ¦sÍÓ^ü Lí;î °údÓêÝÚdqß®€ ¸îú¿“¤Þ5´…N…ņÿü Ç9Àò¾ DIVTÍW0ƒiÕfYÜ”$)%I’$I’¤”RR’Ę$I’$IcŒI¶mϳmÛ¶mÛö<ϳÁ @UUUUUUUû^’$iõf¶¢$I’$I’¤Ï7PX“Ä!âÎÇ@0št$¬*fª©ÅbÇ›Œ¿rÏ(Q¢§Òtôì:O„Š1‚²r¡ +û8Y®îÌ€ªªªªªªªªªœÙ Ñbp‚NÑW!ú¨ÇCÃËËÊå+…/V.I©\åÙ&¦ÛšnÞÂr+{Ç.ž±GÙï-þ àg”,êS“¦¦e×Iù„A¨#(+Oë¹¼³e—ÌE?ûß´®ŽÓ¤Ô@•^À2L³õz\ðOˆó«6ꓹ‘?Ð{ç*¡GŒ]WZÄ9(&v¡º¿«’¨X±³œ1ИÑΨL(ôYƒ‰$îþøîdÏlu©õ¿1ܩҵ_aÇæ¢¦f*+ “É_PÚRXy7ÿëãE€/…¥£Ó(mß¹ÌFö“æú\ìþñµÔvó½ZÄwÓæû/-G34KsÝWSœÃÔe DWCËxû|ùÿQÊ)¬Ú‚PÓ/?Š;„QælC±6¯å¡ÚˆDy”v¦›;YÖT!ðNu4D1“M[¤Ê‡j%Rw6Q{Ù¯”!íÒNìR:gðÈÜw6Gî4²Á=WÑvŽ˜ìÉœî7Ÿ|GÉÉaõ…LþR1uGEœ~†® â5…¥xa¡ d¼ÎfÙØ|jÔug”ÜCèá:©öµI 饉ܱ¿`ÕÙ2yÇÏ”`T„éïügP•ÕJwGæk>!Œ?J¨m > ÔH‘N-zû!á‚¥§d… (ö6•A¹Kb5/M‡Zˆv¿‚^)$ÍáÚ‰_qykŒ¦RŽÁ–(çC‘|¿ª”#ØVµÝ£´*!CL¶k?ch\P -ÐŽ5L²K€&ó–ðv©ê†Œ6mÁÉÜ9!¥ƒŸ¢ 6y]€YŒ°ƒ¯7Yg©à@Ù‘k ÔÖôÒOÐi ÞØŒÄȻܞ:³'6à i^÷Ýî±î/¡ZA2·d‚r¤]*êT•†‡„aJb£ ý@~Û†Þ¤I„z+×éÝsŠéy »´:§é/Q硟áȘâì®ßLµMÁ>|m˜bûª¸@;ªl#–l@ÔÎQwÊ\:Cq÷ÊzƒUð &É*y ×;0ö¼ZmŠ‘ÝÙ’LSêeRa¿‚û8Ü=MŸên1‡âÖ ßÐ-Áûú+ʪCŒ/ÑK b r4ªƒ(€~úhÍå”:f@ÛGÝ;?]Ÿ ú¨ÿÜþè,™€§ßuœ&¥ªô–aÖp®õnæ‚BôöUõŒÉÜÈèI‹pÞ˜gGŒÝmŠY@fK.T÷wÝǧ$*Vìld,Ú±–2£Q™Pèо¢Ë%qoóòÂØ 6 Öÿù¨‹`Kšývær\ÀÔ"AÎ01ZŒpÒ–ÂJsõü¯ŒŒ˜7$:|XÚh[™§gí'Mý” ÿ8$š¾¹WñnþÜ|ÿ¥´4¨ÜmžäÕİnJYˆP9è†Öür¨ÇË^ hÚUWÖ›®‚!hÉÁçÀÂÄgÞQÚØšU»²KõP&ôlí|á±¹côYÓ¾ô±ŽÒ…jÜdC¨x4Äl'„ýà•/c¼ß¾K;ÁÒÕ.¥[„eÌ1˜»³eXÔ¦pû¡l«Zí$ØÓ>Ðé~óŨ%˜–oÙЙx î! {V¼ZqÊ Aº+øEi¼êŸy›òIÌ*P±W3Je„pT}9Õ¾öÓ¤f”Z˜ÈÔktÅ B¤Uí‰|V¶ÇÏpZšP2ìêh»@Þ×ü¨BD:ÅÜšª~0ˆ¶O’Œj†šù^_Âû²‹h–:<(c1+´~  :+¡ìªYú{3SFÔ‰K1çøÅîå­A–èÓË1®5©Ï®@Ç‘Ô\žrÄ ÖÞØéf‚ô3Äv\íÓ(ç½`ñfÀ; 3ò®W Ào>½1o6÷©ê~–é¦]0âšµta73¥¸?baHo7`öÁØ’£ëu–ªá©@ñX®gYˆ†Yï ±Af¦©—ÕDm;ëh“›šÓ}(-HŒ¦{BÀ¶KÁ–JìɪD%Ó«¶UȇÀÃ-|‚É3& ѯH¢^1Eo j‡bD^¨}ЫժÙU’T‘Ä׋¤̘*ï%<¼Y*¡~¦ &æ=š…Šp¹Á6^ÑÀ‰`Z…”cä²€ 9N°mYJ%¯mº»k®7E㳓˜CÉÜ3JQ,±ß_^&!Î^ÁmÐnž&Ë¥ªM!¯®b(Û\xôÞ×_µ“Õ©ÎNW☆ƒ<¥ˆJ=láÈ 5ì#’*u/-Þ8o©™¡ ©9ýÕ:6Ô þhª½)1×vœ+/±žíèåFÕß@ ÜC…O•¼|‰6)¤fà P"d„QŽÕq‹}‹*„ÞßÑ@Xt>á´š–Y.2˜ÅŸó$ïøJh$ˆ‹ 9¸xø]?®B.D…ŽHëó2r Jª¹ é¿Ux/?#±éóÔ/a’z8â^'.¨ä´}Ü¢R¥¥+Ý”2|'jÒZÝûG{‹Ë)X½PaÂ-9^¤âr[>ï$ú®ôëWd·="Z‘±•½Ñª>l5Co‘©HQ¢{»˜Þ-¶÷‹ëâ{Ï‹¦“TÜNØßä«&%®9g›7XÏn›uÙ53fñ”øBÎ4ª úKL_ö «~뵦X¶¹©Å€ÕÊЭ)7ÆŒØîJiêõèål3^Êv!šXߟä¥Ë)«Ëæ}ïO£Ðg±³ŠìF ROs²·ËAv¸àßË•7Wþ.ÈPí÷“þòOþ?ïãó5Ì߯\ÿÞº sWâ”2åÚ©uÓ¦Ï0èªyC43gÑúZyæÎsΕ¥Þ‡˜Ó’ëÿ›¾•ºå†›ÏXxvÅÊU«'·ôpÝziÜ› oá é‹ :b¶°­¶ïðfÛÎã]ªÖ‹îWɸá3Λ>Û˜ŸWͿԹp»w¯É ZÈ]¶;ï’çÇàBŠîU|ßý<¸oÿŸ_?üˆ5J”>ªìÀÚôf!‡ž8|äIåO©T¡Jê§Ÿyö¹£Ï{ÁÛ¶3ºCrà^ööõ  ‡#Qµótšº·â‰d*ñkÖØGÞçGíÕû)Þpry8¶¯e¶‹´o<û~q+Œ|Rï]Äâ„>°o÷~ž`CòmS=©ÒýýÝÉ<ÛTÂÝðzõúÞǃ¢¢¡c`b‰Ïmã#à|ädr„†Ž‰EôXNaˆJ+·LØ\NfŽ›«SÑPÖqá?ÄNqB eþ’H™·?N,‘€©ÇÀri±¥ŒâÆ!äÁÝD¿†ëF "Ñò|J ²LÆPT¸mT$zeJF‚TýL€ŽcŒD[Áʶ‘WwêÜCÉTöx3VŽÐ—<¸[ÚãpnDuúeh²’qjñ p¤\ K–Ùè>VáYÁn…ï×óˆP¸ÑB -´ÐB -°ì1ÙÞŽRȹ^Á?àÚç°í+ÿ¸í&Ànº´“S°—AZÏÅþUªïó] Ôkú´év(¡vXÔ—OCjʆÑ3E=9¯Ö»uó¾Ú¯eIü4C&$ãK>2Ë­’UEŠùœK¤ù·ý<.["-ëBÅpE/WK²]×®ÕÝR”l˜¦ór¨l­w[ÚÞë¶OÙR¿ÕÖ¹”~É^ìÿ»ÞÂÊYr{Ä%>Q¥šzJI·ãZ:ŒÌHe”ÄÆVºw»úµ¡wˆA\Åj_ü¹Ž¿¾ýNI”´é–òæ˜E¦À"ó„gS vÚ…žh,rÜfEtÀ¡W$:&1£¡¡¡ù#uJK5é:™ÔS¥OùT|¢âÿùþCþ«s=¶µ99ö?Mkýøò%m‰ø\yæøÔë¾~ñFÕ¼6Vq>C@<\ú˜a¬g<Rî_Íù  €ÉŒ#ë>ƒÔ»FÜaÛ=ÿ[Dn[î=¯£ÖÑ3üéVDÍ9vß½S,c=sâz+Í4º>¬Þ‡xê]rocksdb-6.11.4/docs/static/fonts/LatoLatin-BlackItalic.woff000066400000000000000000002152641370372246700235410ustar00rootroot00000000000000wOFF´LÀGPOS€/VYÂÐ[GSUB0ØÍܧÿ“3OS/22¨\`›ý£cmap3#€ä‰7cvt 7(1:O(¢fpgm7\§ —ØÛðgasp=glyf= YF®°-YôBhead–T66 {8ãhhea–Œ$$7hmtx–°Ð@é»Yžkernš€miO?‚jloca쟢¿˜“$maxp Œ =name ¬Šñ^»post8ý ±ïloíprep8{ŠöŒý.xœÝœwxTU÷ï×½“$$„J !h, MìØQ”.M €Š(ÅBMÐ(‚‚‚tDƒ"‚a„‰1™I&„ˆ%ÀØþ¸÷>゚³sBQ_÷ý=Ͻ7çùÎ9sfïµ÷^ë»Êž9 JDêÊ@I“êýo0Pšž9u’´bêØé6éñiS¤—T§h-VۋתÊuH•ëjR=eìÔ)Òܼ¶2¯íÍk"M,W¾Z²B8×”°v‹·_7©oØ-ÛEµëoä.W=Õ¾ y$dZÈ<Žu„xB~«Ö­ZÏjý@Ïš‡kzk ‡u‡£ÖgµJC©Ý¶ömµÏÖ‰­Ó“£_¬½8F6ð6Œox[ÃÓê6jÕè¦F#¥slo´¯ñÊÆ»xSè:î ý%¬Ǩ°ñaSÃÜaÿ#¼Uøm¼ãŸþRxaD›ˆeŸ—ß qšãˆ?šÔæˆ5=û4IlÒ pXŸ6éÙä‘òûMû7bÆÙôݦۚ~Ãq.rR“Ú‘ÛšÔ]y:l”9ÿ6ªé»‘¢êFµêÕ?jxÔSQލíQΨo¢ÿˆé;5v%8Õ2¾e’Áµ­Îµþ&®Çq£¤•TÓ‡¥:¨¡H¸>*MA[Þ·ñú3IÐÛeŒþTæè<™Þ×NÕBV± %hZƒ6 ÐWõÕGÕ£œƒÁx½]MSç©lpäéQÒXB¹®þEƒ¦ Y$U¿"iúY«‹d§.–Lý†ì×'ä ÷rµ[¾yz²êDû&W•0R6]"e«´“A…´½-û¥¿€¹W“Z )…2Bÿ¬Iõª´W¯IS•.mÕ‰½ÉúWî†ò®¦Ôæ]‰ •khÿ;w;Ѷm¨×¥-ZJ=æwžùg~çÑí zü&ë´_öÒs?:?À¼òôwŒ}ÞÈ3c³Š{¯:~#Öì£ÕrF­CË­È+ëõ1Ù¡?PѺ„^Éj±þ†žéKÏDõ†Ô w>”¬áN<*YŸ”“RË4Ñ ´­@¼¾ ÑH‚þI:3¯$Ð\£ÏJWÎÝ@w4|-¸X¯çÞ àFúÝzpÝ“óÍ ×½9÷}A?p èn·ÛÁàNp¸ ÷€{Á}à~ðx<"÷aÎpÄùQÎéã2˜yCÁ0ýƒ #ÀHð8ÅýÑú{X~VÆ‚q\?ƃ ȘRÀ$ÞOæó)\?ÉõSú;yLe¬g¸7 k> žÓÁ 0ÌσÀ‹à%}Jfƒ9%s¥©Ì—HY OϪ ¶É*D€&V«f JŸRíôyÕtñ ;¸\’Áà.p7î÷‚ûÀý` rÆ‚qà 0^ÿ„wþ„wþ¤X›z<žSkSÓÀ³à90Ì3Á,ð;ÀNð È»À§à3°|Ž=ö€/À^°ì_‚à >ûȇ€S"U68 \ú´ÊG@|²bÊ)üø4>[l›Lô¡ƒðï Ñ©˜èD„†xöXtfùo<º»Ô'6(U÷&Þõ&Þ•çŽçæÊž¹‰o=° »‰17KkÓ£­ÞA$ÚŸÆ¿½øwÀ–2))›ô٬爃óÎåR×ÚR‰KÏ“pQú>"ÿ"ÿ"ÿ¸æ…k^¸æ…k^¸€k¸¸"&~Çè¿Û1ñ81ÑGL<†Ä“&&6º¢u‚ÞôW=kcmb¬MDÔrW²>D¼lH!ÄÉÑóµÄÆÄÄNO&ô =dÁ,ts1¢›ƒèæ”tÒ{o©$ßæ—×ÑnƒÎ—`«þZ>û™Ckk¡=dGÙÑCvô=dGÙÑCvô³ÐÑAttDG§ÐÑ)tt bÞK™÷Ræ½T¥è"5 Lfܪ¯­úWfðk¹ȇÐV…ÞjÑ+H¯ ½‚ôØŸJø$ˆÏý/Q°(’* DçòÙpâòh¨¼K!®¥ }>YeÚzUŽÐÆ­³É.ïúFãÿƒðý;ðý‘øþp|ünøùüÌB»÷£ÝŒp'y°9\m¦oÅOÄ/sñÇ×ðÇõø@†73¨¦Ó±@ú_f£Î01 t]™a7Ð]O ûL ûL0«¸ž{7€µƒìã û8È>²ƒìã0«ì£3É>™dŸL²O&Ù'“ì“Iöq}2É>²ƒìã û8È>²ƒìã û8È>²ƒìã û8È>²ƒìã û8È>²ÏÔJmŽÖ©d–T2K*™%•Ì’‚%RÈ,)d–T²Ê²J*Ye:YeºÑþ3Ș†vžÏé`˜ fçÁ àEð’^BVYB–*sõbX:•z-Ì2Û¶â² „5·sþØXõÙ¥?”ÏÀ<Dzn#í û8È>²ƒìã û8È>²Ï¬¿†§ÃðtžÃÓax: O‡áé0<ýÿ8C¡ ²Ð²Ð²Ð²Ð²Ð²Ð²Ð²PJ%grm±ðyÎU™8›÷sÀ\0Og’…d!‡ahšžJšJšú·l}K§’uRÉ:©dT²N Y'…¬“BÖI1l~Ÿó*°|@‹Ýk9¯ÓÓ+Y¾‘ëM:ƒ¬“AÖÉ ëdu2È:d ²NY'ƒ¬“AÖÉ ëdu2È:d ²NY'ƒ¬“AÖYBÖYBÖYBÖYBÖYBÖYBÖYBÖY¢êÅê+§N£æN£æN#ë̶<ެ3›¬“jjÍï`¨•y¬ê±˜œ¬ÑÈ÷h¤69ø_ÒßÜO«Y6—ß¡åLøô5­çJ¦„Ú1§>ÊCߥ:Ó#Øg¶y›¼‰ž µ©ÐsgŽMì9v6«[Ï„UcaÒ°¨o¥&>‡M‡É_–6ê0³"fÁ̾GÒH:¤fHê¤P$5“älkwcå"¯½£ð³?ÉÎäjŸÔ!×ÈõGùÔç»É ~ZäÒ"Ÿüþ5­²ÉÓ©0#wkɆ;È1;A¦ÞEëZ;ií¢u6­¿4|*¥õYZÒÊI‹ohñ--¾£…Gª1ÿh¬…uÉõÌí †€¡`˜¼*ÃÁ0R–ËãœGq  &ƒ9Ní܈Ú9TRe¶¤É$Y#e­,dß7]6ÓÏÁy ç² »}ÉŽïsl÷¶Û#e™¸dµä€<)-¯¨`¡¼jf·˜³5Ã×8§ƒ×Á€«7i·,yÿȇ€KdƒÃ ‡Ödfõ­%£´»dÝ=ÈNA²Sì$;ÉNA²Sì$;ÉNA²Sì$;ÉNA²Sì$;ÉNA²Sì$;ÉNA²Sì$;ÉNA²Sì$;ÉNA²Sì$;áv $cœ#['[œ#[üUa³BÃ’ƒÔžÖ®´õ5Y!HV’‚d… Y!Ht ]ƒD× ‘æ‘æ¼=G¤ùîþA¤ùk… -v­ƒY;`ÜNvþ™Ò«„a•X¥ z 5^½H棧Öhÿ~´ÿ$ÚïöïEoMUžÜü_–Öi£í¨Ói iÝ`l*;Ö4%£öô*eìÝôLgìÅŒÄØÇàQ";ˆƒóÎ.ÝuÁxCoà ¼!€7ð†ûìlöÙÙì³³Ùgg³ÏÎfŸÍ>;›}v6ûìlöÙÙì³³Ùgg³ÏÎfŸÍ>;›}v¶zT/PÁ`ˆþZ ÃÀp0ŒƒQ`4£ßUcÁ8ð¯Ç© `"˜Ç|æƒ`{«ÍÀ¶€­àC° |¶ƒµÛÜ >™`ø|vƒƒÚ‰':ñD'žèÄ7à‰ðÄ Ô€mÙ¶ƒãñìN;Âæ½ žE{gÑÞ ´w‚®d…+YáJf_Ä싘}³/bö«˜ý*f¿JÃ9œsÁ9œsÁ9œsÁ9#¥2R*œË‡sùŒ8•Wü_ƹ8WçJà\ œ+s%p®ΕÀ¹8Wç¶À¹l8·ñ ÎUøî¿Ç=ÜsÁ=ÜsÁ=ÜsÁ=ÜsÁ=ÜsÁ=ÜsÁ=ÜsÁ=Üsa™T,“ŠeRá^>ܡ{ùp/îåý|¸—÷òá^>Ö[õV`½XoÅ_p¯î•À½¸W÷Jà^ Ü+{%p¯î•À½¸W÷Jà^ Ü+{%p¯î•À½l¸— ÷²á^6ÜÛ÷6½æ»ù=ÔEÈû0`n¯n’ªu= †¨ß‚¨F%d}çXVƒh³¬ëôµlÁç¬wøìûÀ~ð%`Ê.ú(»è£ì¢bÅ0¹h”‡ ©õ¢‡(´‹Š 7•H'ð~/žïÅã ðø¼2¯ÌÃ+óð8çÁã žÓÁ 0ÌσÀ‹à%Æœ æp=—*!Jû±š«ù±š«ù±š«ù±š«ù±š«ù±š«ù±š«ù±š«ù‰ ¹D\"H.$—’KÉ%‚äAr‰ 9D"H$‡’CÉ!‚äArˆ 9D"H$‡ˆñ(£+£ #Î|k³œû+@X ÞïƒU`5ؤ ±v!Ö.ÄÚ…X»kbíB¬]ˆµ ±v!Ö.ÄÚ…X»kbíB¬]ˆµ ±v!ÅODñQüD?ÅODñQüD?l(† Ű¡6UÂaûRY "ïËì‡×Éy™ûöôÛØ¡¤³CÙ%Ëä3Ž·e»õwDÕŠ0Ï]Õ’ÈEÕÏÖ ãèYóGvö5¨ËkᛵAC b@ ÐÊòYFL –u¦öM]@Wü±èN¦»\’aÅõÜ» ‘4˜Ó æô"çøÈ9>rŽœãƒIY0) &e‘s|°ÉIÎñ‘sòÈ9y0˳<0˳<0˳<0˳<0˳<0+fåì\˜•O~òãß‘Ÿ ÉOß´|˜æ…i^˜æ…i^˜æ…i^˜æ…i^*©îS=åZu³DÃr—Üå#wù`e¬Ì‚•Y°2 VfÁÊ,X™+³È]>r—Üå#wå‘»òÈ]yä®<ë±ë±ë±ë±ë±ë±ë±ë±ë±›cóal>ŒÍ‡±ù06ÆæÃØ|›cóal>ŒÍ''úɉ~r¢öÆÀÅĘZä´Ú !ˆ1 hâÙw'°§îL5Ÿº€® èNü¹\Êc• ƹ`Üw#Œ»ÆøbW ãŠaœ ƹ`œ ÆÃ8Œ+†qçq>çƒq>çƒq>çƒq>çƒq>çƒq^ç…qç…iÇ`ZLó´"˜VÓŠ`ZL+‚iE0­¦Á´0íN˜–ÓšÀ´"˜VÓŠ`ZL+‚iE0­¦±ß‘ú0­>L«Ó\0ÍÓ\0ÍÓ\0ÍÓ\0ÍÓ\0ÍÓ\0ÍÓ\0ÍÓ\0ÍÓ\0ÍÓ\0ÍÓFUaZ{˜v#L+†iÅ0­¦Ã4LsÁ4LsÁ4LsÁ4LsÁ´b˜V ÓŠaš¦y`š¦y`š¦ù`š¦ù`š¦ù`š¦ù`š¦ù`š¦ù`š¦ù`š¦ù`š¦ù`š¦yaš¦yaš¦yaš¦yaš¦yaš¦yaW¤ÜE­œF­œË¢V~e¢LX” ‹œ°È ‹öÁ¢}°h,:‹œ°ÈIœg~7ÀõD¦Ðï)bÊÓà}ý<õnõnõnõnõnõnõnõnõîsX,‹eb±L,–‰Å2±X&ËÄb™X̉ŜX̉ŜX̉ŜX̉ŜX̉ŜX̉ŜԸyÔ¸yÔ¸yÔ¸yæûüòÃíX1 k9±–k9±–k9•õìç*°¬#Ó¬ÀF'Í̾è+šÊ¥ û“•bU­‹ä:¤÷Ezk¤w×ã‹Õä>²Ç}RC S×­¥ÙC=·‰z.[:É jºÅ’(qò˜´—yØa3qy pé#ÔjGT âl,h ZÖ  ˆmA_iL=¶–zl-õØZj­lj­lj­lj­lj­ÅÔZ‹©µ«²ô$0°ÓWiàeð c×evÌ®€Ù}ˬv µ©H-@ʤì@ÊvWµdŸ|)Å%9rDÜ¢j¿b²¤õ»ÃÙ2/äÇ©j=ehµmÕ²ªý«Ö˜Z³j¾ô¨]×>9_v̰yWù  3]ÈL·0ÓÝèñ(³}‹Ù.d¶ ™íBtpEGÑÁQfÿ³‹Ù¿%ÏVVÞuMæ.Æœ›š ~y>QæQ…çR…ç2Ú"FÛÈhñ޼#ïÈÁ;Üx‡û²zЉw¸ñ7ñµ ñµ ÞáÆ;Üx‡ïÈÁ;ŽãÇ«ÔzEvwiuÞÉdàZ¡O@ÐÆ*ô\*ô\*ô\*ô\*ô\*ô\*ô\*ô\*ô\4¶mDcÑØF46MDcñʼ2¯ÌÁ+sðʼ2¯ÌÁ+sðJ7^éÆ+Ýx¥¯tã•n¼ÒWºñJ7^éÆ+Ýx¥vÁóÜxžÏsãyn<Ïç¹ñ<7žçÆóŽãyÇñ¼ãxÞñ¿«÷¤©y¶jžþQÒØ]­E»ë°Ö|k#Ø£7sßÁy ç­ÜûìПH&±«ü—9 ­'þ ñ=?¾g}ìµµ~ì$ƒÁ0T>’QœËIë$“–NªX,m/ûp¡ý  _½ÉçKÁ2°´~Œ‘aäöá üÛöuæw:‹ó3d š%¨©¶^¢Çi¥Ê;j‘º‡Q"¥ˆQΪtÕŒQþ§zC…1Jžz›¼´œ|ùyjìWŸKwµÿ9Ì~6OÉŸü†ÖðjwíßI­§Ynb5çV3¦üi"Ož<(EèÞƒ·ŒÇ[Æã-çñë‰ê×ð–L³gí¬ƒxKo Ê5úw<¦ )Ãc‚xL â-exK™ÜˆGÜz€žàfÐ ô}@_ÐÜúƒ[Ámàvp¸ÜîÀ=à^p¸<àað5ORÿ.CÀP0 #ÀHð8Fë÷ı`xŒ˜ÿD&ñ~2˜ÂÚžä<ÙÏ€i:@Õ j P5¨šTMª¦US€ª)@Õ *”ʈ 9TM~öñ.I%Ö¥‘{×b¯Mèv3QÆÁy çLòñ.}V>ì…áð§pøS¬˜É^÷wöº¿«FxRc"@Ð4QºŒ¨1ž¨1ž¨1ž¨qž¨qž¨qž¨qž¨qž¨qž¨qž¨qž¨a=eIÔÈ$jd52/Ù׳n¢F¨$j‰A¢F¨$j‰eD2¢FQ£Œ¨QFÔ(#j”5ʈeD2¢FQ£LÍcŽóÁóTõïjXlýKðH¯ƒ7Àð&X –·ÀÛàð.X޼ ¬ï÷Á*°|@›5`-ؤTeª²UY€ª,@U * P•¨ÊTeª²UY€ª,@U * P•¨ÊTe"X¬ŒVF+#‚•Áʈ`eD°2ª2?U™ŸªÌOUæWNªÅlpäÙ¿(]ùkXÅ/»ñ·/ñ7?þ¶úo~!ðc3?6óc3?6[ÍVc³Õÿ§òŸþW"ý/T.ÿ7Ö_sˆª¦ª¹ëªº¢—«©Fª±TWá*Jjªh- T u=QOÕ[ij—TGmÐ\%šë^j€š¨–q„؇ó²ã7ë ³Hs¬¸âø¢ü¨ÖÐ[Íá©v\<þU½Uõ~`ÌeÇ¢êÇjt«Ñß>äx´Æ¾šmk¾_«­çj}Y;™O»Õ¾Ã>&^r,«½¯öÏuš×éYgpàmŽ­uvÕù²nlÝu¬Qox½Wë½m뮺yÝ)É~yXȽ•wzsç6îô­¼Îó|à¥ÏóתxG›çd/;Òý2vOÿ·?íÿÏž³ÿãyñ¿jõ?ÿüèÿ¿OˆVÜõ_r÷ò'9Kùôw>-³Ÿä<ýo<ëyy k„’KZü{Ï F5#©1#©1#Ye$5f¤]cFRcFÙ"‰eÖss¬:‘U?Áªï0σ½Éý¥`XÃ7OžÄB×H,c‘‹ÄX$ÆÚc‘‹ÄX$Æ"Ѫ%»#q ïEâHŒEb,­:²§Ú€§æá×WîÈ®öüUc‡2v(c‡2v(c‡2v(c‡2v¨lf‡¼ì”0ê~âÅ}°j8¬ÄÎ2‰½elº—9†2GëI¼æÊœB™S(sêbF‰a”F‰1^0 ŒcÁdÆ5D¬Mì°7³wtpÞÂy§ÜÈ6œ‘î`¤;$Ïè7†‘bì‘b)†‘bÐÓ0bÒp0ŒdÔÇ9§Ò; Ik¥3Ò#‘‡ôH¤Ç!½'kº5õaMwšèy]¶ ä€<¹Z¾>Z¬O-_MÖGÿõÍȯs~,aŒŽ¬/œõ…³¾póËè0¢êp0Œ”ë˜I<+ gÍá¬9œ5‡3³Hfv3»™ `V˜Q/fÔ‰udF Ì(‘=ÆŒz1£^Ìh8ëgýáÌ,ž™Å3³xfo?ËaGŒxf~ÂÑO8ú gžiÒ–ÑZ1Z,£Å2Z3F‹°ÿjkFkÍhm¥“aGsoÂ<ѳ=;ѳ lIï&h°%n@Â-—=1Ô¶ÊCýŒ¤h( Å£¡x4di#mÄ£x´Ï(ÍåIXÐ’‘1Ò8FŠa¤qŒÃH·1R#E˜§˜]ð1äó5+m`¯þMÎKÁ2d^ù¬âß?aûÏŸ¿«ÚòðU[Ö»âI»°/Kìeöûaæöæ©»«µnJë6´nNë8ZGÓº•iýï?ÅWÆ'¿òÉÿâ“ßí§ø~ú›gü*>ýÙþô<Ÿ~_Ù÷ê{Ô;Öw1Ïæ64;óf—íÌ;Á¬ð?Ý/2;ó^ôœ ›ÃæŒKvåkˆ¦ÈË9ÒˆÎûÇOþV} zQ•§ /òw¼y^7ÉÉHNFòn<ûn|ðnû{€Çìçu“íÑ’-™Ñ’-µjŠõnFÍbT£3ꇌº‚Q?gÔdFMfT+L7Oâ>‰ä0óoB^•×è5Ÿ^oØOãZ»úù´¼¯ò©ähæmÏ'Þ~ 0šÞ»ì§ï¬òTò‹UžJNF‡ ô¼yâz ½›Ñk==zÐcœý¬÷$zÌ GWz´3Ïz[ömħ*žFç|ƒÚiý¯!é¦J"®¾)?«ájÊ i|ÙÑÞ>ú˜c°9&™,ØM·AÓm˜O´ÚÆÖj´Ú­¶A«m˜c‰ýýðÓö³ãKÑb´Ø-¶až}XY˜yÞÚbFRã‡ÔMH³¥Æ!5©qHCW-Xùq$l?û<Ïþýà[¤Ç!=éq0#É# 3’œd? ³KÜ‚%n±-ñ€ÍŒ${´$FKb´$FKÂB·°Ži¶…ö0êF-`ÔõŒº„Q?fÔ$FMbÔ$FœRùŒý@z¤Òãyû{Ë?†ÒãyZÝn²x s g.áö\ZÙ¬§÷6{Ì~H˜Œ„wð,æ"átgõŒ‘òÔ_HùØ–r;R¦"e%Rf"eRnFJWSƒ\}7üçO€þ“g?ÿ>’‡ÀÔOÔh™g~= ilþ7ÆÃÃÉñòßð§‹ôoú'v+ÖõYvÚb]ë ú+"¯uïøU~ÕÅœÏèoõ)züRÞç y¿èR}‚hk]ÿ@ôµÎ~pŽ^%œKõÏDÞò¶'ìö»ß¯*ïóûÝúŒsù,Ýà„~§¼=»ëõ™ß9æö‹þTûÃz”÷þ ù•¿ãþIsULõlt`^O`#ëÓ3X^*d—Ë7ïÊ*¯.‘¨—鯩ǭ«#ìC­³é¯Wm~‡V×]ü —êWõ{zŒþBogv›õãz<»Ñùp¢BšŸON–ÀŒÎWÞϳWzÝÿji‹ã2ÓÇXó…r»]Uƒ§Ñù9ûê|•ûgÌë9d7bi÷7ýçŸÌÝóÖû?‘gY¿|.¥dÖËåYs+µå}‹œhùFþ=œÒeÖX—ÉËÕëØõZWYT!Öù yué/y]©ú“«Îã—Ê+gå•WÇö[õHÝ“wŸ ÍÞz˜ž£çë»™Å݇OfP-X¼ûõi¿]|O tñþW¼¬äßׯª+*îš×Ãf½ç`Ö/ú3]Èò˜œ¼d¿Îºêjò‰Û¸Œ}£™³z¢}÷W=HÕƒ/®ÛØÉ 3ü–g™YYžVlX²Ucy!ì8Ãþvس¿`÷¼PqUÁûºÄhÑðû]ѦÒw¬ù6\øšõZ¯Â­KVÖQÏÖ)æ*CO5çWÌë5–÷áãu’áá9Ý;õ×éqzwFóî ÊÕ$=³Š¼X¸Ü÷K­q+ïŸ2¯–ÏÊ׋NO²þChà$v ëŠ-í\6¿1ú ¥{>=¤ß5w ßôd¢³¥ƒ=În9RÏÕOë‡õ½–wïèçôLkfz•ÞSEÞËÚmùšñèæÎ׿u‰ñ³ÓÌf¡Ýr!ì^¡ŸDƒô׸~Y¯äjÅÐJyžJÿpR­[gà ì~ØøÇfýùUõ~Ñ?_r¿)Qh¥ñ¯•z°¹3×¼&h*"½\¿«ì–ˆP]õu:]OæÝ#°o²¥ =Q§W‘ÀGÊ=ö‹Uâ©ß¿d{5&³”³ÐŠçUbG¥¬“zqåµíúf°U?RuEög"î…‹«gn“ Ná¡§­ØÆ»òú®¾Þpøwÿàj¯iý‡à +F"OÕÍUù'>lvÎ’^Î6s¯³vè^øâ<ÖšÁ3Ëc•z8w•O®ºš¿‘tQ£¿Ú3mÖu’]Qù§>Ý›×7uk-ÜvÆûõ¯¤^2‹~—çO¡(‘ë¤oy¦°Û†ÚW‡.‘áÑë/f6sç{Øzƾ®°ï /ø©"ãèÿ4—*´k¿^Må¹î¼üiF¬Òö×J¦”Çñ³DoÿemƾÅîÇÊuÇùË?K÷öU¡žebí)¼õ‚áŸOwæu£yý{œú;+Û|6Õ­ÇýâW§Œ6²¤%ºµ"Ç1S ˜ JËç[õóË9mÅ!ûΡŠZ™çÆ?\pQ{È}ݾúà’™m"n¶²´ØÑ{õâœíV·šØÖÏxÍ›­¥öü®b!Û~†d´Õyf>ÈòW"﹊ޗ·¸BbI¥¼“zÉÅUÙç;Á6_þž/ãKEïüŠj±²Íƒ†çË/æ}´ò}Þ^56èè/³<ïWFœ#—ÇEý¸}ˆÁëÏsÚaÎeX¯Êœé»”jª aØÁ«ò¹D/–?*ùw‘ÏbåÃËZëû-o$—¯¼ƒÕÌùqñl•–[õ hºÊzi±ár­[9‰Œ2ÖUèä‚üíßEíØïs+æ+rEûGú­?¹¿¶òj‹}þB?ƒ?ö­Òf†nE¿Œ+î˜ZRo¨|ÿ‘}Þ«§‘óûÙï¾§’š©;põa… ˆ°ÏˆUãmÖ]ìèØQ/°$šé5¤ºÔä°ž¾³žc¯#u¥šÔ“úÜoÀQCrÔd&a´ç¨-u¤‰yj1’yÕ’h‰¡O ŽêÌ7–Þ­8êIkŽÒFâÑVÚ!¥½t ‚ˆç•ŽaÒ‰Ãz¦=©‰M¤³$ISéÂÑœõ^Ã8]¥#uG QháîßÈ-7I>íÉ-7K/i)½åAfñ dË#ô$šç 3¿è ¥Í0yœ6£d´ùÕj,mÆÉxîO‰Ì:…£¹L’ÉHžÂÑVžäh-O¡ý6hÿ9¤M爖29³8âäyŽh,ò"w^’ÙôšÃÑNærXO`ÎcÝó9â±ÞÖmÙÏúubëÞÌ‘(ë—«-\o•ÑÂ6Ž$ùˆ£‹lçHÂÚsmÙûcïkŒ½»{w3ön½ÃyPM¸ÓÛÇÛ·Åöñ¢:ªNRM%¨nR]uWÉRS]¯zK-ÕGõ•ÚªŸê'uÔ-ª¿ÔU·ª;¸§º_ê©ÔCÒ@ Tƒ¤¡zT –Fjˆ-Õõ„„©ñj¢D¨5Yš¨)êYiªžS3%RÍRs¥¹š§æ3ŸjóIU¯HŒZhžóz³ü—@µŒûo©w¥¥Z®VK+õZ+­Õ:µQÚ¨Mj3ów¨Ý´ÿ\`Õ!i¯œê°Ä+lîh¾+k sÇXÿ/.ü°8m8ÝÞpº1\mÀµÅãÆ†Çí [÷æ\Gc±Æ0¡%×kÖ¶7¬­eX[ϰVÖ†Ö60¬mbXÛа6{ua_›¾F¾62|€­2‹…‘†…M ­_NGq=š£¥áb³J.¦ÐkGK˜8™6«.¶4\l.SådNƒsÕ çêÎ…Î…ÎÅÎÕ0œka8×A6š5Ì 5Ì«k˜j˜W×0¯•a^[üv†ym óÚæÕ7Ì«o˜WÓ0¯™y¦™á_moj«H‰,æÕQ-T[‰2ük ÿºqßb^c˜×‡ë¾0¯5Ì»ƒk‹m ÛÚ¶Õ2l«gئ Û¶54l‹3l«ÛæÂ¿y0¬¶aX¤aX¬aXSðfUaÖÒ0¬9 Û'Xܪc¸n¸UÃp«E9·þ7¦ €qxœe’;HÕQÇ?¿¿!!bb–àt‡ˆnÚËÌniVv{ Ü—H4\É^ÕݧèMM!jeö²%ÂÉ©©©©©©©åö=?ïÖp>Î9ÿóøþΚ8ÈU¢þ¡$-¹üu¶³AãÔj¬Ï[rdp'mõ‘¨n£aâÚ•íÎNç.}¯y_gÎõ^D#-¾«ÑZw·Z§ÚÆúl‚â¤8¤~ÊWDZhzM6Ng¸DV7½Á÷yL•§‰ÿ*\f–gªñ YV•?ó•Uî(A^–Ý)îºÓÜsgìm°îôÎ]°÷¸KöÁ]¶îŠ}rW¹jaóþúS^‹[^‹ÛžyÁs.†œö2ä´¥Ó^…œö:ä´7!§ÿmz¡Þœrxœc`aIbna`e``­`a``”€ÐÌ»’˜Ì˜øY™˜˜X˜™˜00¬`Pðb€‚’ʆF†ß,l^ÿª'°oc\¥ÀÀ0$ÇÂ˺H)0p]V4xœÕ”‡sU…¿³ ‚tЄ°Y ”PB/¢ ¢‚‚^‚tB¯”¢Òˆ(   `A%tBB#м‹XÀ’Ê»ÁððÎ6÷¤^ì&i²æ;1N3'ÛÙçœ [¶6lKØŽ¸in·‚íÆ¹ž›à&¹-ݶîJ7Ý]_>Þ‹Oˆ¯ç9^„WÚ+çUô¢½8/ÑkçõñTß1ÿRx~¾Ÿ—’vŸËÌ=ÊܳÌýÄU÷)æ^Þrc]×woaîi×q/kî•®º÷÷Ý1w©dhƒü!ðwm¸œmŠÌ;Ÿw:ïk®¾ØàÔàI›—™ºrƒ¹ëƒé¹™ßr³g- L9´À,8™T¬4Wéª ÷£¦ã* Çp‚Îi猿>S r¢@…«cþ|ôOcù åÿrxŸ=–ûC–î –éMÆÂ<£ãœÂUĈ8l|d°DLg£Šª˜Š[ú+[f«XÚC1ź–âæ)&ÒÈlï'¹ƒ:ò±êáç9Yƒ©Âãu¤;Ú˜o<54b'úÌÎ2jYšµ‹ÛTcv©”bÜf„ÈUÃ&OcUUã§žªk‚ô˜jê ÕÐãªEcÕ¦‰’Œ‡š¢†šÊ 4W==­Fš¦Æš®&š¡¦z^ÍèAzêfÍÕ-zQ·jåè­Ö$«fê6½¤Û5Ÿ~ºC/«­pe™½‘þºS¯¨é.-Ô¢¬ûôºî×z@ËB= µ\+Mi…ÑJuÒ*ª2ZµZݵV]Ou&¨·*©¶éQmW_íP?í¤“5P™¢l¥h¨öi¸h˜ö3V]”®nZcÝ0§°R­%ª¿vi€vköøêŸu~­·ÎË´¶ 5P—­Ó¾µ{½à·\+¿“Úaò6²tfxœc`@ŒfŒf Ïž±nd``½ÉÂËÀðo;û¶a¬ÿ`½Ï"ýÿ„¥xœ­VùsÓF–|$NBŽ’ƒõX±qšÚ+“RL’eÜùZ J+ÅNzÐ2Ãßà¿æÉ´3ô7þ´~oe›@’v†i&£÷íî§}÷“ÉP‚Œ½À…h=3f¶Z4¶s/ Ë­†Ñ¡èí”)ÆŒ‚ÑéÈ}˶ÉÉðd½o˜†¹™ŠDtèPF‰® çmÊ­Ü믚“žßñ·ï¶´­^ ¨ÝlÚ -AUFÕ0IJŠ»´Š­ÁJП¯1óy;0¢ šlvŸM2Zg´YQ†™å0”d´ƒƒ0t(«îÉc”÷Úå¥KcÒ…ù!™‘C9%a—è&ù}WðIªœŸ8÷;”-ÙØ÷DOôpw²–/­­ j[ñvȧ›;Ž,vj Ù¡¼¢q¯Ü72ihư”®Dˆ¥SfÿÌî§|É¡q%ØÈ)¯ó,gì ¾6£)Q]YPýñ)ÃóÝ’= ö„z9ø“é-f&xð8~OÆœ)Ãâh’°`äÐJÊe\OULò:-ã-ÃzáÚÑ—Î(íPj2ë¶%í°d;4­’LƧn\whF(ñîòëÒ išWÛXMcåÐ,®™Ó!ˆ@ziÆ‹D/4ƒ 94§Z»A’ëÖÃeš>OzCµ¶‚ÖNºiÙØŸ×ûgUbÌz{A2;‹üÅ.Í–¹HQºnr†Óx¹„Ld‹í áàÁ[·‡ô²Ú’-ñÚ[é9¿‚Úçž4a»/§ê”&†1/-Œ¾iš:WóÊHŒŒ¿Ьt…OS(ÊI(Ž\Aý_ss¦1c¸n/JÎŽ•éqÙº€0-À·ù²C‹*1Y.!Î,Ï©$ËòM•äX¾¥’<Ëó*ci©dœåÛ*)°|G%,?P¢Bæ‡JÀ6sì0‡Á.sì1ç6ÀÌað%sÌa2ǸÇ÷™Ãà+æ0xÀœÀ×Ìað sDÌa3ÇØgƒst™Ãà@ÑõQ˜yA@ßjt è;]OXlbñ½¢#ö¼Ðì5böO1õgEµõ^hê¯1õ7˜úPÑÍõ/4õw˜ú‡FL}¬žNä2ÃOn™ ”]n?~Sœ+qMcÿÿxœ¼}|TUöðÜW¦d&Ó[¦—ÌL2™’LMï é¤7B iÔЫtA°`Á‚"b[±û· «®b]uÝÕÕ] ‹e’—ïÞ÷&•PÜÝß·n˜™÷fî)÷œsÏ9÷ÜóX+ŽÅ"ÒȇYVV˜•ÆÊb±d™Àï3` ¹«ÃžIv«EˆY-Y ‹Ü„=0KÌaø÷UæþK«‡×eakßUreö¤½&Öä4j¥ òLµ+ÕjNMˆ‰IH5[S]jê!;ÿ]9>·ÿzbmbV\QOf¸2ÕkQ4Ïs„¬v—QeŒŠŽŠ(1§8ÕjgŠ9òJl:7¹’”°X,lðˆûyˆ;—¥d±î!ààÀj±;p3.V30ã%“¸2žÚ+¹7ö;½<7%™-Ìp ²°ÇA–9fefÍŠ£¦ ¼ç06uNë‚#±©Ã$—Û3zl!=…ÇV!PÈ•~bjH—µ'¹­uR}ÜxÈ Úä KäC*ñÑxô߯З¥†ç$ sn²*×)Wc8.£é¾›Åbÿq#X*ˆ›„ ñ’˜9f梔€™ƒOŽó|-G Ô"ð…i Šª¹Z’Oá>Âd°«»+¨Dl+uà,µ÷<(g ‡ãã¬È×§áø8¾‹Å²A²‚D—á~úƒÝŽˆÇhi0˰…éÕ~•$>/éÏÏþpc–¥®É°æÑ…AcÞŒÒ%óÊw½½aàóã?[³j}æMAmÂ_¥6+ƒ9Uy?&6­*KŸÛZ¦5mêí>º¾B¬œÁ3d4ùËOã@ø}J#`Hð‚ôÑ€©8hB<’O…Ü€a†«RãL:±XA.h£Îßµå½]¥É=;|eNõ¦9õ¦“F¢ÓÇú ¼-çº+f÷K4F„ϹvþâŽ#ßlZõùÉ=3SŒá2Ï¡R¬Ò©Ä×ÿqíÔ]ÝA„µŸÆ§‚Å"=øÅpð¡*"`À]±;Â!xÑï Aæ90ï²ÇWå\¯¬Q„T¾{µÈî7DÇwgØ3­qO =üu˜þuäw£•Ú8¢ùㇳRdSDߎ1¬½s#ª=3‚+o&=´5üÕ®x U+Lt ´pÅJ!ÆÒ«vÒü@ðpšCüZSÿ[ÇAဠú¡øÕÐò7øOø½Gà÷$Ì÷8a(cðÛ6›Câø#ð7YépÛ u 0tl;õ“€±?Š–`G¾ÝÚ#ÜNÍb±"öÿ3ù!+š×6àSÒk Z´4ÑŠØÚP’ÜgŸ9óP%òC%Ÿ“Ü]k–€žó £ðCã±?ƒøÅ1øe‚0@£Áyh7ËhEèV&îgvœú¢‡p©E–ÝeO ²wQŸÿU¡WÊø€Å—rE98 ¥ß @óŒWjs¬b³ÅbÆ~´dkûï#&¢äâ`Ì1›sŒØ XE áCëí¡ q€“8ŒDiÉ»WAá8Á„«¿Rq61 ŠÑãR=ý’ã|¹¸¥5[‡‡i{µÊÇCh}"Œ±#¢–ÆP‹à˜qÓpp~ZïͽZ¯+A$ŒUˆÕêô>AI´Qyä>šb }lÆÍ úõ°¼yïÜL2JÌ_mQDU5Zð9Ö}ÿ'äCD”BÜ¿Ù^jÂ5ço§ç٦סmR±"kÉ(ã„”QK$F¾îÞqì­…Èʇ»w{kÑŽ/ÖQÇj–•u˜›æ¬ZVŽÖ<´äƒ'÷ÌJë>úݵK>¤ß=ôýÎæf¤ÔìþÓÚæ]ÓSªw¿Ã¢ý–èÁP ÿí©œ•1P2ΊxÍ;½èÀy …Læ ~JU8OXJv=³À>%„°pà KDkweeÅw¹×þ¹ºÁ·õö|—Lo´iî*+Dbí´Ò#Z=Âêªf<»á³µ âVS¥Çbš¾ßÊ ïŠOVèe\ȯ¼Á3œo ¿d¬Ä‘õqˆGëW˜v/à=i(–F®wOÙÕ¶ôæM½¾NÇKàzd º]˜ÞÔÿ¼¤þåkO]WÚ÷à¸êd® Ï4ý³aÿëwÍHk¸õÃõ »f¤â¯3®ßæ¢ÊÎûÿ±éúòê©UÎ3:IÚð`‹ ÛÒ4ªÄbß¾´Ÿ×?±4¹x÷W·Þ’ öy|éôÌ„¢€ž²`ogͽ¹aéë7ÖR äWÓcßB­¿ÈØÌzÝ44úþÔ_Ö=±ŒýV#mÊ àG@¡_GÙ±7²æÜÔ°ìõk(òá16%‡úž–C+ É!#oC¼ìÑRI›hQ¡^î’¸! O•W•ÎÄæµ“•ú)¥ÑŒ­‹ˆÞÚˆ(öS¢1"ºyî©c7%S qCvޏ™¦Û1!ÝȲËC†«)œ–¡Uûˋ¯^óô²pBõ’2ƒOò.õQ|Xp@[œÙƒ„¢ö`|žWCåa¥t][Yyýò©I’u`ЬGµêÏ‘…G¶ûûÁ3„Ž|Ey€ªˆ°±‡:ƂЮÄŒÄ{*üI©E­ò¤”ø<•»´µ³­Ô]¶ýõ5YkwY ãº`v]æ”Nwqë´Öbwñæç—g.î©ÕשT†XƒÆ“i‹ Å[â}å}Ï__§ó&ì‘(^Gg‡;5Λ4yamÇ#[ª5ž,+µŸÆµyB\/æX‡=øï"!gùƒs„ºÖþ®ærÝ郎ûÑë¦FÇMài_˵xÓ­'° 7D< 'ŠCûuPdfâjûÔvQÛL„‡8z¾Š8 0´¦,„ß}~W4Êo Ó?Áqw(©j¨?TSß+ÔIÿÚB¥î~d`*vÇ#  `E¯MÐ^÷Àqb‰Dk³ wlMmÏè.KâÍ…3%³îûbÓ¦SW%Óá'ž<ÜXÞ†•åø´þ?»­¹ù¶Ï6¢÷å+¼ôØÓ¨;鱑ÂQ9þ‰,å§ {f k×?8ƒ¸ÀíÊ,¾ê‰%^ñô 3òáþ ^Ü7S9Öó÷x¹í‡N¯ˆÁ”Æ«®ÇGÿŒw34¨²g€<: > ö¶ÓÇÞ­ä*½PËVlr´“Çûÿœ¸8+kq"nÖÈô˜,WÇôÐcFrhÇ žÆÂ⊤>¸†€k²,ŠÚO}ô-`9ýdÎÆEF1p²•†Xy™3‰7E5`"TF;¾GÐëªþéäÃSÿ¶Z§.— Œ †?XkÜîjøY¨‘  .Ý iî†øéhüAø¡Ôa@ˆñ/ñu •úÄdå½OpIê¤ß_ˆh´¦) _þoBcQäŽòCÂ2|hE²©T1!v°lUs’XkSZ²S%¶¼¬[[“§W&ö>Aí)ÙS²ú.½CÅËòßÒè+Lû—äã@wc·{Êuí× œœ'×ôKDšX„­¤ã¿Ò°3ÆÁÖ‚‹¨Èã°ÂEèö T¤¬Ù3Ëñö?>.n™ï‹ò™KÇb|©Á_ m¸“0Âõ*†ÕNÇ2 92Ÿ“‰Ck‰oèÑzšB‘D¡Ý hOT5’K„ë[$1¢ÂR¦ÌÏVÕÛò‚6.8ç*ð›y틲q 7k}[wÒíûT¡æ'–âQÉ龜ßbpð¯Ìï]kʺ£;oqC’4±zí£Ë‰©uñéîXs¬J¤ãƒ“¤Ñ“fjѤNÍw˜Óëƒ<£Ý¥¶ú…&ûÕ S½ù.Å5KkKRbqÍÔÚ}ŒAÙZ]U”“Uè*ëN‰-”å3Kg •j¥P$@k· ®› £0²C‚E’¢œ‘t)6âUšA—¾Èš’kKM4OÊM‘y =³<¡fÛc]勜¸Ðå;r$–Z^´xwEg³¿aq.X^Ûæn¿©wóÏOÎIKŒr: HÚ¾ ìn ´,7ô‡#~Z*8c–FŒ‘#(,e¾–isó¦ìêM®¹éÔŠé7¥uñu¹Öä±yòãÕÎdSbšË®… ;.ó(2ÜÞ½æo÷wO*[È—Hd’`×®©r¿FkÒŠø¨ý4Eã×ã! PzñK,ËÃrlº&• •µÍÁ‰Wã¬üÙ¥qRpÅ…8¶&z´„}‚•÷:qñœõ qh.up.ëé¹D‘ÓP` dD›æí(Í£ÅU†ÒOŠL¦© 'Eî}’ÖþI÷w 9µç·Î_—ÓãÈÅ…j=M‰µósðÿ;ç@ ¡uœÕéÍ÷í^ÕäsÆbQt¬ñ WÐq¯•:6ò•]Zdåpd\6“Š»BÇ„ÎÁ+D˜áÛ šoÁ ùf!=Õ(GžPǽ¢Ï7ŽFïÇeÛFã‡ô"«Û®ššîæ ^QƒDõ2ì;¥4}úÖnXõùý£pÛüÍýÓâ,zÃåÏ?Ïøüj¨#yKÂÊdb©ðP"LI}ØXÜàÍqØaœlwx@J„UJ6|â(Ï Š¢ô®T{°6ÍÔN”Uîþ`sËÞ¾ŒÒ LoÙà!éŸ&G %Ó“²}b…§ëáÒg||¶î KõÖé¦p¼Ê]13sæ|üOõ_ì|úöÙž´¾ƒ3ŸÙQ›“Ù¸%¼®Céàg(âtg‹¤ 4«MîÑøë!þ÷AýŠ¢3{ö(;.ZeH³ÊéÚ—åªâ€“1ûæfØ ºVÝT[³®Ñ“¼õŸÇ°¦†㈗\5Kdq¸4ÏÀd³ÞÞYs`ëâZoby{â¤Å-ÙÒ™Ó† Ä ®[ NEcpœ‹9sW€hÅîÆ œ»šÍ¡+ż¼M>ÎÝëör çïk¹()Ð~e FáFò – ù$ŒšÑ3ï‡qÿ°%e‹€ŸÎL É‹òW¯ËÐÇ’â«9ü²%·4[Ó :Ò£*Ÿ"`¿£ÃÃýçLÉ!´˜vaí4òï¶ÑüÝ@ýßeWÔüQfˆ2C§F›¡ÌË/°cŒ`Ý@φèãÜ éËD•$ÿ …­ÿŽÌ!“–<¡I»rb/0tŒÎ#šÐúPÉØ:¢~E×Gl]Ïï´u¿‹[ì¥W`úpðÊLäïdè0—âáN¢|øúŽQ×w¡ëH¨Ã´=hœØ\¤¾Åúßš ×5¥ÿ帠ÚâÊLIÄ–¼HëÚ¡iÿÍúOôˆ¡!“º…h…ó`öR†‹J˜-›KÑA´®ä…Û67¹2ái̦˜ì¤>ŽÌDB~¢Å¢úcñºŽdï…\`ðÇáÙ »“i„ÇMODÖö±_‚8VN,kÁú?’°Î‰húð?—®³cI͹Ébb¥ß`Ì‘5a>ié’Ÿ‘º1»»Ãq8s±pHÖ«Ð%§ån3PRŸ: Cô[ÔYs‚`'?FgJ¦Þ¼Í^daBª±nÉppÔÿ„Dbcb¨½Ä££Ä*=˜#¢‚Lœ[ün$vjtL;])î¿3†b‚¼ËRec¨úý¡Ô|%áKÝKþ‘©œ€4ÿDxuMà2*wÁ³ën¬xéõî¾øJ[MxyÜ Ùº|p.ƒøžäuõÚßeô¦‘o ÒJ8B[ˆÉ‡—¥1’'v¤G “.僖F‰ÿû7š¯ý$×êÅ ±'²ó3·¥·80œÃQÏ2ô¬OAôlŸ²#øƒ‰ÔÛoPÓâ½1< wpxœhá n·9«1™#·eøÀQFÙÃÑHÙÓÆ¤Eh‰â¼J~ŽöyYö{hú}«L;èh¢‰ÕþýâÄ©'Çç…ª'Ê~¥FÏ q–H½ùÕ}Q^ÜÏLqÒÅg:P5šCøà“Ô­D,¡g‰XFzï%òÒÃNgßeÈà3i=”‹ç@A¦·:ÐÖ‘b8½+UÐù.ub’ô`ƒ§"l=;ãJÚ±2J†/Ò¶VÈKjd[V­I 7<{U÷¡¹é¹³·ìÜĵöžÒ´Ù5¾´Å,LŸíÙ­NH·±¤Óºµþ¶ÕžQæÈ)²¦–Åç–‚³)rƒ)6»¹²6%¾ ¨2P¹cV–³¨3¥tö$‡ˆ¥Ö„s5Gåòšòjµ´<¨X,‚$?¤ë&&Ì2™ÁSM¡‹ä¹>Dµý¿¡±ÙgXRz Oج¢}¦¬@v€faœôó~àzù ‰M˜Lr2Ь¿l37ê/°“pApðÿÅpbèð­e,½ö!èL¥®Ÿup¢ð ´„z­„ë5æg‡¥™Ôk›¯Ñ\­!Õ€˜'ÞZQè†ànÄf×åÙ :Ò)‚˜öø3ÏÌŠ§ó7&ø—ü,RŒªÆGo®¤Þ\ f›yVkvôã³­±¹~#}þG"èl9Iz}RŽÍ–ëÓë|¹ˆ·ïP‡é1µ,/‹%»¸ FqAàößJïNídˆ ŽÂ¿°2ø3ß>ÈS´ñ&j“iêhjÜé3Ô-à<ÄUF׋÷Ÿ.`Æ«/ð‰VƒÖ xó¯1Î!ºQ€†ýÁTä^y"HŒ‹äFå¦Ê ªô»¤G/çè%8±™ãQÛ8ðwTOƒ4½÷+¿ó‹†µ¯;ðò à;1fÓöüé9GD/ɡ݅Òì—Aƒ©Su1ÎD ¿U­>”ÊÝUÙ¸H\pgoÎô|-´oV¸H'3¦øâ¼áO©ðdªÛŸÑX·`ÎÂÙ#Â"2g5…^aÞ[²§$¼AÛF5œ6ÄIw1ÛT€}¥ÇÚ²gMŠX&\jRZJò’Å)Ôk+!rxsWÛhPÎò9¹ƒñ1ÔÔ>† ÎèèÈÙ‡‹ƒMç Òf졵©4ì7Z(¤$Gñr.ŽÌ¿Š[ööe’ÃÞaÑ ”6”ažqèat­Æ?èµpøÌËÐÚ§¥°hÊÁé@uŠ1£€þ7¾áæ¹YSö½>¯a÷Üløz:kÎõÕó;²f爵ÛùUêÜ;fnùû=­èuóßïm£ë?|pé,.”£$6epáEõDŽfYô2B—:£ói¨ ìÑZ9¶f ý‚µÆÆ!à³c§¸æmg>Ìli 6ÍžwDi³oi—½:û– øKps-Â!Ánz.¸©¡• ×I¹L ¶vð ÍJ@œŸ}tÈØm»clIÃÈŽâè½ãǹi“Ò¦dš£õI±©u©fðÕ䥪ÎÕ%ºðÒ»ÞM·øK_\X·aŠ×=yöÚíÅS·¸s˜3iq'|íßr[ªFö‘±¶Y‹s—î²f'êSfÜJV.9¼j£g`Ú¯n¨jÌœ±¥|þÉ=‹Ê…¹ÙEqE]…‹¦äHg·ï&#ûüõ¡ª'³]²&hHúºÇ–h¨ŠèÚâÌîu$RïÜÅmPÙNLª]êÚû†6(VjYdŒM×b8/ [ˆÓûã`{ n›2 v,&3&Ä<ÿ)ð¿°ºuøOdV­„caê-”Ôc‘š¶+ª§ ”¯™éòµ¦ô]u£*Õî9¥U°±‘z5_Ãâ<`(,©)M;÷ŠÜd“°†|0Jè'A€«pû(5¿Å^?¥-19¥³&OL½…ìÎkr”Ô_ÔDöÚ.Ê1&`ð3t^Ò‘6ª®‰}Ù’h(–Ÿ Õr¹Ãb+H3‡ Š BæÌE‡{“Úë •eB›6&Ö™êÍÍ6‡ò‹òCæàŒýÓ§ßÄýR±T% :µvCŒ1.µ2\±µ7CnñjÈUj:-ΧWélÉÕ©E›gd˜ºíϨý4Žõáø{K¡'D=qÊú*ñÄuÐöIY~éï ¨`egA´s‚J¬ÍlµÑ¡¸8¡€•2E'_gU£SÈøPå $”¸p2èzYŽ!Éjýl˜¹ó‘éó÷´8©ù©{rn²½dÅ-UñÕe¹1µB‡6Ænw›$­8!»,;Á•S–ãšTšÚeçv1‘dÚ‚!8Mw¬*Íí\”Óœí‰w4N—¯pjv÷Í]>4k}2¥Ú –šÝÚØ€Û“Q6©cR0œíM),˜{]QÿsôN”c2-¢7Aú úFbö1aâ·üIóon‰M5éH—Êv¨Ù+™‚”R„Ó¢°igw˜ÏÛ#”»‹çAP¼ `Ï`ÿ SÑ^l˜‰EQ~É u +k:RxkŠ?¿Ö|íFs†âÏÏ™ñ£a¹i†ÔïR3. #ûñÀ·Ñ>NÚxçJѧv„N^)’c‡q®eÀA¾|KŽð>9‰böÑxrÐß„¸®€Ø «U&\|x,’⨿\‚›ó-úåk†±[óxûDE²Ã%3i\MÉ¥²uŒÎWŒÏ4\ý©6’qePŸæÑþØ´Ü•ª×Oó/ùÞhý±gzô$ù‰æŠÔðbì/´Þ‚)Dþ°²É\…~ß•(êI Íïˆîrª¡îö±ÖCTuñ­Äÿ!ñq ÎÞΟ +³®‡Úù¿ã%¶oá83À…»‘—T³+g,xÿKÁbt“ǺY¶âw²”½:ý?æ×ÿŠŒŸŸí¤‘ˆ¦O¢ÂHb¨Æ|Ì©1¸ú@_éýSsf¢Œ¡Œ‚QeB‰Ç<”RëÉmmFÉFl×@Ÿ3Õ*²åµ§a¶Èøp Bãû.ÚÀ>!L3Ž/¹@ø*¦¦] Z=Vj’ø%âuþG„\ˆxˆ—”9S6ê(ŠðˆxŠKë,“Ìç ©3æߥeò‡FåM›Ÿ’2/¼­2±`¤ÃæBæ|š•É…ÑílØt‡Q‡³FŸQCpØ|ªù¬TE–á$Æ•ð^V¸¿>ò½ÁA8˜4Nñ Ï£½ÆäÀÓ¹Rh”îâºi Ô:ŒT&¨T.ÕÀ@€A§§=±ÓÞQáZRâ´ƒÛàmN33¿#gÓ˜¶: Ñ¡,ò›±' þf°òÖ‘öã"»žúÜô д¼…ÑÒÍš8 Áüo˜” ƒ/ãQÿ[0øX,§~Vkþäd!©zŒpA8 ‡ÜÐñ,u ´›²¿þŽl³™Ý̎£5Âg5}îË4pþIIåèSõ†4=x"OÇfqè\8.ÊrÆNÃçŠqƒ„Ê{tý¿Š1­¼H-ÑÑ+Miò¿Z-KÁò¾‚¶tž‹ÝE¾ÄâÓg+ÐI+º]Äpn‹Nÿ³Â²@ä$3ÝMàYgÀÀ—©T5y]¹¿&EC&vé5|®Ê™àÕ—×è'oéð(…Lk5Uª.‘E+´"©H¦³‚Þ¸¼f?µJ¬ÖÉô2^EalE®—Š *Q´H Ò~ÚB<û|%c‘/?ÖËPpÉäxÀgÙÓKâÂÊrf”Æ'wnP¤y m.5‡|ÁQÔ“Uwm»ßQ4=§vs«Ÿ²öN›¥”¸gùy‘؉m'Ÿ£û  ×n?U]½M>ÇPqàwog‹°÷8¥0® 1QÉhëÀk £Zuû[µ\EýÌ)ÅyR1u°ù|å Ì$vâW30x̶ ä.~5uVoºoÌ›@>Gý¦’ý¬gæ¬c‹ðó ø}”D^~žz,Z¡ŠÿñT´ZcRpJ©¿òl Å2FÛ“ÝNU¤?F¤> õЏU‘â*n4{¸¼j']+Ų#ÕR€õcÁp œ6ãHC³Dà ~¥~<ñ’DNJωæ4ðLòaê3`æs§ð¥\!g ;š±mOûñòQæ(ìgR¤(°2á56Gi àºrªÝ–äD»X¥%rܹÕ.KŠ×NΊ‰ÏêždWéTZø¦Èß0ã>Oâwq,Œ4@c8<0 ñ†F¾Knó±ØŽ¬Ú€¿ èT£Ïà(;.«Ö( 8ÙÏI´Fa¸g²×`3Hµ&ø¶2¾Eòö8ù ¾cŽì}\®"ìœÆf1¥$h4 )&Kš[ŸÒ,æTWLŒ+ÕlIsŰכ’ãÕêød“9]M1ûÌÐõñ0ž@ÎE{22¨Æg°4ÕÀËäÜ“èÞ¯l9ÞÄ>Iï×À{ +“<Â>yÞûœØ„çÂùR L\'üC«d ø±~nº”ó8.w˜ŒÎ!þ2'¾r9±)±jv†Ö© á?³«á8ß±ùø&N,=ïÌ8´€£`†¢Å•±ÜIgvvˆü“Þ¾šÍ7Ì,N …’r¦;žцýÀ–Fô”™ˆH^<Ì …²±l`g»óë\îÉ):6ÛSPïq•‡u £Ýœ@V³ÝA†|U©Fcj•/ir²Ñ˜<9Éêv[í.„1•ü'9«. c™Yç,Ĥ3«Ò7ɳAX̶xö1OiP¯–z\Å.Pì†ÁúÊÿ¿i]UÐ;Pm5Gô÷c{O+\¦E›$‰ ˜’VáM'£j“)qŒ*1­ËB¥˜‹ø"f‹ˆŽ Ž'c¬tdE­¿ Þ‡l)9&ôG­ˆ£°åAG_]ŠÏ7û‹|fðB]EìÆN’Çh}³ÊüXß;!vʽ@4“GY\ˆ'Ë6ràþ׆eìâJ¸r›[}n-ö9ˆû {ÇT˜TcêÃŸÐøÀ_‘ä3ôjkFaŸYFoJ íT¨p3Êý˜™m vxWÿ(’“9>Ê™Ÿ:£Ò»f]ÝšZ§)¥Ü•K9}>¶Š zóÍ=OlÿÆ·w”ú»oìøm®¦Š µïèܸ;yÆîÖœiùñÜoB®Û–ÆÔ/Ø\ºñ‡ã3jw<=cÚ‰½óÑöåà/ÐW)‚>E,¤Ž3²QâP²Et?/ä‡FJál?Q$ þBçäŲà⢊'cöådq÷€P ÏN©ÒV‚PCôä·—Êbõ’)m%“­ SãßÎN'>¸»¼5' Û<¼Áü…x† ^%´ª¡7`i€3rè8l’è}³lEÚh& ÃîY\O¢ùD¸5˜gMoHÑÊŒvÙ§1ÔœÒÆ€ÌiQb1rsj0IuLŒ)Ì.-þ)¾ÖÎxx]is¢h©ÚWÜ“‘6sùM=÷S.ÊáD+E¯¤m|wï ”hƒg1Ý/MÏØŽÕ™€ëJ‰± ù.ýklLPxBëͶ۲µÚÄl›=Û«%x~Ê£mmNÁ2Ï}›éÑj=™±±YÆ“…Öžíƒgñ>(vt*wTr~T²™öPÆÝîh˜Ö—Y·¶Î™[Už)r½éT=ÉÊõæùìR•’/½§1¯J¬š•^·¸6#5׳ý¶@U~zV±'”­Òª$²óbˆäWF}f"¤û*F9‡ÎWGä4Miu»¢,ÉqJµ;ÃîÎu)LY­ÿ–ªÅqþì8\çN³šÂñjc°À Ï=8ãùÁgø, ëN;ÚÇ»‹µ»Š< æ@XÐV…Ç=žž[/Àõd:K>MÿÞ2Áãǰ]æó0°¸Ë_Gìe>#ž®ÀŸß°=¨/ˆÌƒbeÚ=es„˜ ~ >*¼´©Ž%E'è0#_oÀŸª½ç¶›rB¤,J(—ˆ²šçç¨âù¨{:~#xŸÍÔßϺ—x®Ñ†ãã>8¾…u½Oz(fE¾‹›àw­‘ïÂ߀³è7PŸ¡ÞOãd@Çnh7ÎÄì(øçãøãqù§ÝÙ NbQö0Nh,l)Ëá°h(Å´,¥±®ßàϳÄð›ƒMhdð0€äs@¹\!Ž®ÉÓŸgƒØ–XB•ÄbýÇÖt‰ ì².qº…ÇÇùSd 72`è Ý_Ô‹ìOõIK,©ºF.—FU Žé˜ÏFõ¹$éÖu‰þcKºØÅœ€øà8Ž…õ 烠”¾ÇÇ ôõ£ôõ£è:äчƒ·à‡ÉGPïArÓ b:܉ʵØM]p5G¯’ÇCσÔc`[Aûx~‰õà™3lů·3ùèðÞvú‡¹«€ΜAßéã×þ7?x ö„ÇCß@5jÈì^œ$ù«½UBò‘P⹊0ZZXeà+¬û`ÈŸœ ~1Q샳ôšþ¿Š½8tï=ì'Í€{ñ t¯ü›œ>ro6ѯ9O0÷ÿJ½‹½=¸î­ #Mìmõ€S½àÉ'éß±æ‰DéêøN½Õ×ïé=¾©¢bÓñÞžã×T`•]m(+ÛðPWçCëËÊÖ?„ê^ÁU`÷Lôû kF•×[“aµfÔx½UV¬Ö]™f±¤UºÝ“Ñëdøûu«‡2Ž ñw@ü³áÜDd¡Ì¡«RèoÈm°B&Gÿg*S ~Úµx¯q-ÄÊ ±ºå+ߌ;æ%J¤†;L ¡ugÚ³2Ô:õ0-lYÂä¹yY©Dʘݞ³!´V¤rªŠBM‹rfvòìÙsÓB9ìh…h=ä÷:Ùëɳt¯*he¡D–Ö¡”C‡Ca¿CϢ饗•=Ý›ƒ"r¶C'·[–U¯kù¦Ü¦$ÄB³l{h{¼]+YQ;| -ð:ñ0_D°cšæ7ðÑ›üá£ñ±ÑJ~ÃÂ5‡ñ›5G)„»ü!x]¨Œj\DËà|ê ÑN~ÍŠŽº òÿÜ¡)Ý—1%`Ø?͹ÙišØ$C. 683­U.W©~‹“Ú À•öplLJz–Qj¦¯,ŽXÈSX\êvêPÿÛ2©T†{A{»ÚeQD Ålð‚D.Ô]˜!¬Ñxà3’SÈ3œAHÙHO¥ú¨ãQûv jvXÍ ä_‘%fÐlÉ]`;µþÒ(¿}¦]í±(€IHe"ÜÁsÑ& °xÔíg€÷B"  9¸®O³D¬†ˆ¥Ê¨ô²Çö¶!0TE5¾„`5 7˜mŽìJ§^m‡Ñ®^~êä ‡;d³úÍ’œi ¦åÔ¶lé ÔÑP¬®Úô16W¦/³¾9¥ÙhÍ>8#ÐV•£Â^3%ÅÛò€ÁœgSÈ“ÀW2±L-“â”_R¨¸kRãM^SM¨z[oªÌâÑΕËQUEÈ•êñôTLÙá´6goèN“[“tô¼õƱ͑ž\¨Ã.úŠ;ÍlðöQêìÑá?Ým{g§•]ûÌ"üqâÜrû¹x(¥kseûÊá8ÛpÑ Ç‘å·èà‹Ip?Ñü7­U°Rª‹ŠÒJ–ñm†3`ýk8K'ÿΔª×§š¾“ë°‡i|*À:"„#¹© ?ÂbTØbŠ…?H œs×^UœZ™§õ;œ™^·¹Í¯Ò«†ß 1¯Ç7ƒ'`9·Aã'OÄzD¿ËArcÉ£Ôápà00þü ca½Jû¯‚y´ŒÏBðY´Uˆ4TÓÝw½£:ÝŒ`ÛÃÕ~ÐÿA&E¦Ïdz¥ñ¹Þ`§+¡úcýqf©¤éå™Ýûg„¼5óótêÌEwMŸ²©9·Ûø–zK0V¦5qXMêõïÊzʰu«òâó“´63õ7žÜ¨R§g;·Ö—mœßbN_jØ¿¤ yê²+Gjši :âýÔÞ9kµ–]ƒLŸåÜA!ù)¹ˆ%DAIÃeA ‡JÈhÁ#¥\™ðŽ‘éAÆ4¡¡µatñ šïT([K<™2‡Í,üZ%0¨CxÆÞŽ«?9Ø\¸îÄüâåmr]œNB*}‹7í­;@=Ò‘>gß“ï/ZùáíS‹V™¦ ¹-¼¬µÁÉ ZVßÚêÌMIÒà*<ܲ<§EhII0&9%y ÷5öœ“<ï%ê—}^Ý\(V룅„ FXeNvªzžÜÛW~óÆ}+òýeëªOîh‹6x̹a{ns0cݲY•¶@"`jϲäsжÐç F¶Í.è³Åâ!›`ÚÉzˆðH›Qº<Á(¹=¡£ »ïXY´êžŽ…wù«¹j¹\tu÷jóâ¼áXMW°có]O÷ö>søÚŽ`°ãÚÃÏôö>}׿Žà]Mûÿ´âª÷4Ç—Î\¿»zÅŸö7aoFiyñš#Ózî[UP™>‡«*¥¤·dš$rézOÞŒFÙ|ødoÏðû§{×|rÇÔ©w|²¦nÿ¦¹ ­‡?K·€•*0Fº’*mGµ´G<@ºž$f‰yTÏR0ŠHpObXÇᨽqµà·Òúä9‹^­ø0¬háêÔ&ê'ÐÞuýT—<>Ç­wH¨Ór€æ 6(Ò%h5­ðc*TuzÕ0 kQÕÅŽ¿K…Ô;øÆØü®lgyN@"L2á[/dÃÆÏ¿/#—BçÐ}Šq«Ì6slð5ŒGzEãø Iõpâ÷ƒ¯Ã¯…®Q‚”8·ä¸I³]hWëTfî$Üp¬äX´DÈ碹­Œ ›Íá8åÐ+ÑV²¾=9¹}}IѺö””öu…攵:!ÅlICáHC0ÛY Äf3<2ûÃý‡ÌRÎq„™¨ÓV1 xXÅabcVbJFOåH6¡t‚:> úv;§ø›ûöíQŨ”·ìßß?Çí¥?+öaxŒ+Õdaró|¥ž¹Q©VÊwïÞíœêOjŽßs#ü¬’ßù·‡ñ;ăg‰cpÍ.@qWÄ¡~ð›Ã¤ñ ­¥3cÐFï[Ã~Ô[ó9V~qvá’öÍ2¡N-Vª4ö£÷67øs™d§ºpÛ˜Hm”R ·æLÚc„Þ¢šï¶/~hy.g±¼ #‰NüØòç6ëC¥ž¹Ñ2‘B£m»¦YíV%Å4¢6œ7ÜdM°²ù‹ç-ªºþ•¥cÕLÍö/ƒ Äö"zå0fEŠF—éÂ%Ã)åÑN£ 8U2&Á“EG·Hí"ÉÕuu„âC%yAøHöÚî¬Òm¯­ÑabµAÜçNžÕ^cìƫԞp¾»>{uWfÉÖ—WÆ>s_ẨõòXµÈî ¿Íhf)±Í“;ÙÌW§É}å󪻎_×Ì[#ü‘gñ¦Z6J”›]6Ë’*—77ß»±–=_—ð‚¼10+F!R©yŒ~ÕB³‰X\º³MšŠDÝ™:ô!Rð±¤àhVUØì¬–4]8„ñ\í=~;ÉÜç._Û˜¼ûÃ-&\¬ÒKú|¥‹›…Z•Pg´©7{²}qJ»Oçg´®.«[¢/LHWët®7E¥‹öM½êôíìÑ1òè¿Iòü=Q”XÂØÏÖhåÌ~ÖàßX,â\ëé`(=híТ¦kéîàCŽ,.󨻹¾×3¢rDÆuÚ“:1¶WÛ|ºþ|uþ$ža—ˆ¾­Ô]÷ü²Æ Ór•ë¥z©Ñê6Ö/ŽÖ‚{°ÝH\gu&ð£—)]±ªãÂòÒŸ#|ÿÜvߺ²¸ÂÎÔr9aQšä¼Ås>CºP ñý7í³ Z@3ó_˜ùÏO«èȾvÀ¹ƒrêm>ðÈ©?é©Ó:êM%ðò©·äõɹøDÞJ‘ŒÆi¿¥Æ÷c«5ü꫃‡pŸÃDûëÎxL—0t6íQ§û˜Z9~f/…C‡ÃÖHç`+Sr>T=É„½4~³½tÉXJœ0'«°x¡Û,Õ½ºt›Ü,#¸5Ôs·êšåSÑåGŠ_³‹²sJ›DeÞ-Ëï_~2VÉá4á5?ƒ—¹?rZ¨…2s‚(©uq1|p](«åçåîxàvû©…à:Ô2zÉdõ࢒|Ú:A´”2FŽó}¢²íÎOVMÝ×—ÑzûÇ«§Þ27“z3&.¨Kl+õ¨~);=`WâK¿¤ø|nñ®wý Ôþ½¹º}M©1{?õÛÿµ­.5h3{n>…æìA¨ü§¿Ù'ËžL¹‡bJàÇñSº~J7ЂóÈCª«†]þôï=|ðö‹&¨Ý*ær¤Q«ÀÊóŽáó‡Yí«ÐçHé–Âtrš Gõ‚áÄ5Æ¡7¯IL"•ý¥ÂhÇrZRµé¹éÍ*maÚ-úé[ï™Ú²£3hZšG½’¾8D\´ë²µàHöìÞéi…“+R‚±|íìÛfÛ*§Ü¼iU.uJªdêRÏi—ŒÑÙrŒ£3P6qxý V@˜:ŽÒð§nkM4f´d<=å¦g{ZwÎ*TlÓúâ su0©"¤“…§MÖÔöeÅd0X§R„L¨eE~åòÆ4Yù뫞^“íißÕŠbóTÒÊ:礶 %9^ùGCZ}JaMm›Œw äC,’~"ý|3-Åè%Š Jc·'Q\j&ö¡·j‹TüÀ?´ñ˜ûèû0?ءϴüx¯ÊD=j,ÚñmýWC^¸ /2#Ïd ×#ÎèÈ¡‘âuÈŽCFdйÍmÔ›“ì…¦ö‰ÍÞ_™»¨³Ê4ÛâѹƒÅɹSÂê©õM¼A>tUfy?‡>ãºÊµïßÒ`ðåÚVÚMS°¾/¥išÍ‡¡óüK }Û Ò¡z:0ƒdÒ[ZÄ61×"8®©Ìü«×†Œ'5±x ùЪäõùãSs|ž>.æ%šW3Ñ™:8·"úXrå¨s&ˆqìÑ—Œ¨Ñu;(«€“¦o,ñ”õ„šè¦éi Z’«v¾´¸sߌ>TžHݯjPßâÿWßb/hO™Ò7©“ŠEw󦿾—n¬Ïœ»oJåuëWdRiZ¤+"D`ߢü2£?CÚcóã!¥ªåë-‚T±^-#B€{¼ú 6èŒ"ÙNãKGÿŽQŽýÄ<Ó*¢7Y£ô÷Žjj(BïÁ?ÍR8ðZ¦P|“`–ÉÕ*ƒ(J¦6(™,NmHÑü¹Öå–Í“ýUy2·ä%¥Ï—¨P™Tªhi†Ë–Í3…Šâ½Þü:ø+VñH¥3có !%­1¡Qy=C‡OœQÕL©KÑrÝîä†9@m®7zš=Ç‹UźÂyÒ¸ Q Ðkõ‚(©J/ÇéK“?ǜۨ‘dùŠzóŒRÏäL{t´*F튷«wreF•Ò —‹dQ‡vþ ȧJÈkFGqá0:±†ñã L@ðy'Á!®’&ÈŽI©oµzŒ 6¼¡^m¦õ¯Ë…ê›oŽ‘õ€¨z¥à"‚sàú¹àÎÕ?%µ9œêøcÖêЊº³ŒK–{™>P“Y‡‰IÄ^ú¹Bfè£&÷~ÜL$P[“~òQÛt`ïlXŠ©y`çi°Z@ÿn$t8(¾ ߢë_†óO£{©ÐçÍ'¢1y€Ãó€ ûX¢>§¶mK¨oRв»:~3(¢NÜ ÊAÚêÚCÇM¬›ˆBâ>úlrŒóØÇoÎÄ?ÑõÇ‚Æ`aœ99Ne ÚÑþ\ãø Mð”#߯WŇŽü$Êhwùî` áßìU0¹2¯D”.)Á7kú—`u§±œ÷™=}ÖüßÄNz/Ýêw½KììÂqÖƒJB€ÿÂÒÅh£¸šÇÁè~±ë™æì •xmA°wZ‹Óš²sä¾¥ø 7gÃOÌ½Ž–KnÈÁ‘&î(EßÄ÷ŠäJÎc!!Ö VªyÇRýz«žÚ!R(9Üð½H¡â=”„÷ÿÿzÖ38El¢kc]#ÙèT€@Æ)-®ÓSï„òr<€#Åu|>Alz_åî‰böE;T§è±ö°ž!Øp,ÚFÒòmC™<‡ íZü<Ñ›ŽÜ¡EOüµxõ‘[Ck—ß8Ì•ð;_8º»/Ó[3/§óÁ›×:×–è¬á|sZÝÖ®p`ÑS›¸“« °N“üU(ÿVV)Ä6Äò˜'ôùB‘v×6ŒÜGJ²‘ʬHà’‰"Jm‘ºËí1V%³•+F}R$_eÁ–æÎ+·FEö¬ÎõMÎJ’ë²Kê}¹sËеîU¹¾Ê,Ÿ\—UR‡UÚ&ayªÞgSMiÃïìè:’»¶V×léNFÏýDÏÿLîÞRS½µkø3#÷ÑÐo*'OBÚÒÇôîæ°•x¤ÂŒqØ#ç˜Â¡Q%°/ZK‹óu„Ôt„ªÓb¹‚äpBcÛ윦ëÛbÍ‚¿½±éúްg÷ÓÝ•³­x”0.;NŸN”JŒÂ¬Õ×tY³užšEÝ}ÄT£?VV¾ë­«ß¥ŽœYã/yõ†Zo‚ªfeUœ\-—陘ñ6ê!|»›®P:? =Ü1gš£çGOš½½ÎÞ/ÆØ½:¥È%'× #×gèñ挽݉ùj‘JÌ“;sÜççÏØ/(Ð.¼€œ0ä¿’mì»Q}/seºã¡­tÑ0Ù6 èßS­rh£ è39H{tð5ßk`ßý[¶H s›Z Ê_Û|ör'±2RÏÌšN=Jž`Öh0ò$>3¹gÏIêÔÓÔøïÉ}R@œüíßlúûÍSÔcdTd_À ¶E`Ÿ‚GÓÓ©RªÌˆ‰ ÁñþOðXµÇ”P‹q>ùú°ÿ5›iM5ÅàpÝâÛÓtlÎÇš·¦pøY³öõê¼ ÌƒÕè†é9|h¡»Vm &5îìóx»Ä )‰/ ßPeŒz¶Žã•mØ·t¿f.„Ðÿ~ÎSÌ„ùL»ô„¬qài•bäÔ×êqË!ùÁ5ø qœ:yþ•ƒÔ >H°ÎCz¿†ãÜN>ÃÊC¹³ðÅs4„‹$@ì× vòé\F¦pâ\F4¸WN5 À½‘äÙr¢ŒˆèzlYÊEϽ0»1ÿ}Kà› Yퟟ~&íQ¸ÆÐuq¨žÙÌ‘™=È´"grˆ1(=ôr~[x­†zNí‚fL¸!3{p…|Ê®7®ÉL¦ÞœU#¯ñƒëžX’¼»¯iæ `𝨫¤nëÜß—J¥à}Áíáé7µŠ™X ’"ß Ž«˜v¨ÿƪv+ÆZ"úÉcz=ÈÆF~ôFó GLé©É²·Õ¸'§[«6óì¹+§¥ÕÞôƲžƒs’µÁÉÐÓQ”ÿÏÎ’žŒ¶9q¥3sgc‡ù|ÏÔëZ¾±·)wÉÓ Ö®Z™žÑÇ œ£×‡ˆKÆj– yš½c!u@]–ãö°`$‚ICå>•(6-aF‡$6åžÄÜx©@—dóä&È…†¤[{›ªv¼¼¸ó¶yúÜee¥åéiÜÙ›2ÿí¸Âöd{¦K½f­-Û«%ö¬Z·!ťͪ÷ÂN %vÞØÕ÷Èê‚P×Öú•=1ñ§´ß±4/Ô¶®tíãд±â¿gGA?A×97+‡î!1®F–3Ìeº•.ø¡£ÜK~Ê™qM!: Úµ¹"{FiÜW V­Ë ‰·j\&)vÄmÿè|³Ämú”ÜvɯWOÝ3;Í]½ `êÞ¾twÍ¢¢¢´˜pcVAH¨9bݹ«$1`KäKù­ûúÒ=Õ‹‹Š“ÕðKùae°ÊNõ(øˆÐ3µŒZärø•l|•ËwéO±1Aö‘ñUnÔ£˜¤åÑÖÖG[~ ž¾ ¤mpUƒoÀžb/ƒÒñV¾¾Ö‹¯Äž ¯}Áš¹v/üÞ|ö©«[Úœo„m½Þ>Ú³õêˆÝðÓr¥§yC=py>üÔ¾5åÄÇ?}ÿQ}ýòæ‹ yѶûûöQ¯S_ž­‡kb6ÝöHgý+nHËæ kŸ°yk¤­n:r•6MÐ8÷‚n´£z³^Ø/—éEûN¤»öÐzöÂ֌ش‰¨ð+—íœ7Ytžý½Í1֓ļž|ð ÏJõ_pVªyüY)R4|6*í§šÇ}þ_ƒÁ‘y´åþ^Iºà ÿ‘dMØìüJem\sæ¹?äÇtÏ“$Ö¸ž'—m#K><ªÙIÿ?¯¤§ìØGyµ^¦Á,´SÔJº¯­ué„öi?xŸéN­¤{¶ZXLOöýØdšÔËõšžºñôü÷­]É—FüÕÿ¢Ïë†P¾ÿ¼é+:qŒg-è ²éX[¯ÓçÑÉÇàõÕ4®[‚Ñ<Œ< Þ¥ÏOÜ¥eÎOôb÷Áû» ~μbý|÷ýÜýŸè'\§î€ë:¿jA‘ üÛÌz~Âë[XßÑu Ìu‹ËR2U1‘#âÌa\3s"Û¶ÒÕo£kþÌxÉ$®Œ§öJîÃýN/ÏMIf 3ƒ,ìqPñÖ›<¹Je'88É#C†$ÉKŽ%€CŽY™Y³â¨iï9L€MӺȇ[1žTRÁpàÿ+ÌÑ@9 gË0ÎÛY?Žºn¾¾ƒõ÷‘ëì“Ãׯg}3êû±Ã×o`ýkÔõUÃ×ob®ÓÏ3×Ѽâ0¼¢yÎy4ä•=ÍÆô©3ö˜Ç§DÊi&àöh¸*5Τ‹ä‚6êü][ÞÛUšÜóÿz»ö¨¨«<þ{Ì€ÃÃ30"2¤ˆ@”š„O@D1ÈCÐTJÀ03,å%ø@ÃGíëd Ò2«ÝͭΖÊÝ­Sí¶X»îikµN»Ýð›½÷{sç7ü. wÿP˜Ïùñ»ß×ýÞïï7÷~?m=¿Ýº¼ik¡y»ŸÙ¨3EXgd%­9µc^bî!ƒmÏA1ʨóõv<¼cÙ¿7íºöË®ÍéQ©Ùö½þ¡“!0÷à¯í(OÑêUVê3Ôúà¦É䵓¶GC™mNNúçïëì‰/1x¶O˜!t¨UñŠ—¾æC'‹¼'yWM ðæ]>›ä]D}vŒÏuJè'®ÊK‘Žþ\8ù6Ó_c·ZBÄ3˜~¸RÚÒYem½W«‡þu^ðk»~ºPê‹Ï«Í~³æÍ²Óݳ–%¿8ë‘^î¬LÓön8óU;Z½Ó³–v|¨©WI ±„ÏqT£<èî âàW=òQ{,(~µ!<‚ÙÉa|;Ÿ*½“Äb~r,âHÄÜ#a‡rÐXOàQ¥]}ÒNÍKHˆ}D aOŸð'ç6Mĵ¯œQ¼KïçNp.îòkø~g½KNABrЏÛ-îÈÃ’ß#]ìã^I(ÆRç3 Ö:2f+×èƒâ’àm2Že¹ qIðƒÎ:zð 9óÐÙHï/”€OÈõÇœ;Á'éÈWÅPï\/€OœŸ£z'é û¦º|™¥ºðzÀ²½/צ”Ï #Ìa÷Xó*uÛùù_Œ}¾;ßnÍñóu²Åww»ØÃ¬¸t3/˜Ä ÷ÕÓ> =÷»ô”D­ß®»”êb _?Y^Óƒ=b/ô5Ñs7tÅg;P5ΫV‡çÏÄ&…H¢èU&];6$"4ȇwŠ^š’VúÁ‘89BÜ{·}°GÛ+ú6ÄÌ5‰©|:QY£©¬­øÐ‚l‹ ÛdyþÑùvt 'q̧2ñýpvR”qìb˜+¯‰Šd¤Uì†Õ‘5 ¯¤–·õ}XסÔòö¾kÚ®ŸZ!õ%ä×e—¬Ê˜šW—ƒW­¶ïÅYµŸ\êz0£üÜ Gí§ðÛ 7ÛWwnJÏ?òûÇWwT¦/?ò;qÃ9ç Ø&\¶Í…®Š·ñ›Ü8Ì‚ä×+®·R¼Ó·Qü¨lKœ×--òy~bü(ºÞÕùw+÷ÒÓ¶„ªNäê*~ɽáaÉ93kRßyêõºÔ„åµÙ‘3t>‹Oõ;égµ.Ê”^ëK™uŸ°°8%>3É(e /¤—9–-;X¿6R7=fÈi‰àÏ…‡ ]Óöª#Ä“æ(Ä“Ež[oA]|Íý- k4™[“w7!]çànž'¦nÝ6ͤ8ÖJv)(:Ì`O_<Þe7$.YWº~Ibvë•Ýsvï(‹©ñ1¥Ì]1ûÒÄEëJÖ-J\´ïrýì "G¨,Z †Hk¤Ñ>Û6eV|ô”™¹Ûr¶]>¸Â”Ç6ÈÈDz®Ðuù˜uU•)ãVTˬfÆ£¦Ĭ{œ7Q ˜´¿¢¾î¾—Ô§xÏ/Òß›3B½‚d´$WLÓ)µ’^ëšÍ{+SØsyšs¼ÀJuð½ÿÄ •Œ¿š¼Ë«º.‹ã=B³“$—æB4#OÏìzr_&–NÛ+Iy¬þ'±tD– hîo„¸•ëã™ø~^Y nCÙØ.ï¡Ò» ;¹góé¯õÞòìé7óó7ë|þ˦¦÷St•èÓÄàÔ"!Ú<—´²!G,<ùdÿ3«W?Óÿ$þ=§ae’6€¹nÙ¢©Ì­ü$n¡xopã'cå<©¼ÞJñN|Å{à6Š•m…–á Ȇv¼×g@Wóe‹;8àñ1YÙìkBÀPôÉSÈ4¾z©[úì<75Y»Qô¢ù©^¡‘Öàì©Ó'>äå£ñ7úñf!*6xð’g0¼‹]†™3 ƒ•ÚÞ8óà·¶»}…UÔáo¡§D ~“Ÿ˜¸}ö7êýøã=ÅÆ¯‹HŠS“<Û]‘am˨.6¤Kæ˜t±1"{œZIÏŽ\ÀE=F)ÊúY~ópͱhx›—vãÔv(ôö}ËÆªùÕÛ´9Cóó3U@Kë—Áÿ·qzèçyR,ÊÆÉ–£Eý|éR–äࣧ&ù6GèíÆÿèÒâyéúµ~ÒÐU!A|*Î,-(ÌŽ]šÈ¿bLTû‹Êi¡r¶qoÃ:u­GÏüÓHMò(Y§ nãàTŒg#FÆzõ´¨÷2 •æïüЯÂC¯˜Åï¶0«ö¥ÁVÝÀÅè‡fÒ»V—Ç/’åÂÏû§á¹*Äùô2‹Æ{DÞ„Ï\S¢À\?üÄx ¼šuäm™RÔm…S.®…hàZÈœsb‚kᱟGÄ&òϨ{ºé’WÕfòQCå«ÊÝô ƒ:|œFc÷ìW(롽@uiáÞ¥:ÎÕö¸pÞÄ}Lq^[E¯oWàV¨ÞÁ}éÂÅ{Qü÷à˜kåO`CòþÀQDrÆÏ£û$â.6wó*“¼n»Â)u¯ÆìkJª2è¨HË?üÞÎÊÃe¾&]p¸Ùn³¯¼?z¼+cZlx€É÷¢Ú’/¥fÛCðîÁÝ=[¾ »ÚW§ÓëRÊ:ÖÎÌI6†›Ã|Å•±Ü¿ý‰>͹Õs¯¶Ÿâ]E@øó *àM÷»ñ/¸ƒðÅ~Ý߇K†Síd,îç¾a,jÓsrjLÅuó Sø©ÀÀ²æxÕ½¶¬²]‡ ò÷¬²§ø¦O(Zéf]©`DÚâG ˆŽ›f4Ú‡j1óÊœêgKóOØQtWNñ] v¬™´¹Df\«‡µÈ”u¼ ¶":6/së~bpÞT¡´U½¾}wß§£@‰÷S¼k-G9Ê×(8Êü§”ƒ÷-¯C3|çæœàùŠ|ç¶_@u4§!œyh\™WqYóRUþº#ŠŸXL|%=r0¾~­û ˜ïqýæÉz’› 7£?þw8çÂó5úïgH‘ô¥´èmjS›0‘Ñçz37z(‰ÆBÕ²ð Èd¬gÈàð3Ç€ó:Dz=qL®o—ßUbü'`+‚w8a;ærÎAß§fsc(Àâw‚˜ú ç|k6M!w£çº,ØSBžë29ºgf Ì §ünuøðÏRì·táÝθxÅÀ'¤Ÿ…c§À{)Þì_ xKµŒ#y0?• ï.pçØ¯áþ›H.­`㎠’K Þ <_)·eúR›ùVÜ_|—ÚÜ·æÓœWD¾ Tî–íJ}z\8oªVâUôúv¼—ân\³Xa—#ùn¿–^Eìuͯ€û¼ù/dÎ~,5Á±¿ÃǼLmpÒÅ!óna¼Ý'Ÿžc“:/Ž‹Ã)±˜•Öé[Õzz=ö!ú6ûûô#û$A¼¼ûbÏ©x~¢¹‚JÞ—Ì’º…o>”ü?#ÓGñ;n˸“üp†?óeKfVîí¸‚l Þ±Œ×0‡ŒÛA?ÌGÂCή‡ü$Ìsáš× .êIÎìôúW¸CH»`¼Ù5‰»olL+êp#÷ÊuF ŒŽŽe#b=μ%öÐ …ß ó–èÝî¶“ö0Ì‚·(ð½gïpãÞ…`ï]ÄÞš£×]w{k]ø„@îд¸ìíýº!WyçÙ*Ô>ùðW|>V¿žÒb¸ce;òàWbwæyj÷ËàWbßv…?H^'x‹¿~%x‡|²_é º_©iÇÀñó'ñÁ{¹pü¬5"ãcrÜ‚ƒã+v5Ħå^DPY{\:ð¦Z¥UT·öG”x/Å;”8 þŽà½œ›6F¦8uhûŒ‘{ã8ÛÌ£§ã`ÕoD§—®¼‰Ïø4Àd}Î%ÏÞǽðãFbÚ`ψÜ›X:1é8qCdéqÉÈ›äçÌÙq dßKduãû nÞ¬À« 7¼KþnèzjÛÊîCÞ3š>äAèï׫ùåÿGr.Ù¯~ô}ÈëïHrä7oT»ÿTz"WÀ·¬Ü/L¡è×Ô{zŒ_<õ ÐÑè¨yÑéAÓþ+þu \} ¶þV cþ+þ \ÐöÿµÐxœ}Ô hÕUðïÿ<þ·×]æj»=tj¶®Y®¹¹qµ;Ýr²6u-" jWr-å$lePÝ‚8'fLÁGPŠ+e D!D*I†sù¨,÷¨`m¥÷ô=ÿûÿ?ÁÝ…¿sÎ=û?îùí«Rˆƒg+ +ÌNË=av«3H¨£¨¶ô[Hè­ª„kVœóe\ßÃÚ‡¥b%b¥—Õæ”ž‡„¥“4F4È}3è1އX§P#½Äy7uÑ!Î_ÔMf»~ר <Ã^sYÌA¹lÇ;Îä‹R$œ³ˆºeˆ:Ÿ Ã9€‘A‡¸„gŸ9,Æh‰™(–mX$ײ>k~%ø@Ü…í²•Ï¹÷«4éϱJv˜³¯µHŠÙ(·šoÅjþö«M¯h@±¨7}ò&läžçh¦ìƒÖöa›¨ÞÙ‰N™Ný¦»y¨qŸÄÖ~ ]² mÜó8MQi¾K²žc­c·÷¥©TDwÒóÔ*¶á±Íœ§1:ãÄ2s¹Þ@h¡½¿ý.z•:i‹½½@ë݇ð¨z³u ÊÙ7Q7=ê8Šô{hQš#vè6×±æñ¹~e-³}¦æ¢‘ãZ7q‘«ø<´†öên´é4–ó7%›ÍÅ+#.OšMrûó¬`ß/±}Ï3=hÏÙÎmï[¶÷-öWÂö™Íã=ðn3¹¹“3rð2)lO6—²ù4¡4<×ë˜UáÜ:æçUX6·ròò+äX8Ï|~¦MÊf]˜—y›}›“2q‚Ÿ…¹YçŸÍ«0 ˆYuµŸ[¿±F)/;Ïf˜Í/ÛOΨé÷r+Ìæ×$lŽe™ïƒ< xyö*l†M8ç ­ÙL›Pí ­y—‹îÇü0/ ÌÄ€¾`õÛ˜ª~Áíj‘ÿ)äÎxœ,Ýù[Tù™ÿÿSÓ:!júšëꤘ¹„–’¥€R'Ì8&5ŸoËh½‰BQ€-k\Y´µéu²t»ÓÑF¬[èÑ£¸¶)»ã$uòO|ÿ‚y_÷óúþò¸ŽUuêÜ÷ý~ßï:ç…çy>Ïsÿëù fï{Þw¶þ¿Þßy/¸§r®väKîÒ9<ž‹·$ Ýu¹Ûý_9éžËcxÁ=ñþΗn¯ô½Œ031 ³‘wöm³£øv²ÁvŒÚQ|½Ã'vßoð)>w»¼¼¿SÌ/xÇÝIy–—Ü_åîoò†ûP>äñ¯ÝWò·øÜ x«Øw•÷†7+íVñ«x‡UÞœ—+çÑÞmï¶Ê»ç…åï_åC^o*w^Å;¯öÊÜŸe+Ü_d%ÛU¸Eï¶Ú«Æ­ø ¶a/¯‰añø0ލb«½QÛŠvµ·dúÆìõ¾Ÿà~<€ñÆ#8Ž8‰SxÊ+‘ïzyò4œÁ³xÏ㼈Ó8kQù®Ê¿×Œúƒ (¶¿×ÌùRF”Åßk|µ­ñ5cxœÇOà;xR~K•ü‹ a%Va§W*{ÙŽaÞtÿ#—LUC¨æ~<€ñÆ#8Ž8‰Sø®W ßó~ ß÷êäšßReìÙ3xÏáy¼€qZ¦K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹¤K ¹|[ëÆä*\­ÕàÛZ7l;G]ðmøYàËB÷±<ª¹ñmï²rù¶7ƒWpNóäÛÞ<ÞÃ÷ok%Ñ»i%17`fbfcnÃvt­$f;FíèšifL®!ò5D¾†È×ù"_Cäkˆ| ‘¯!ò5D¾†È×ù"_Cäkˆ| ‘¯!ò5D¾†È×ù"_Cäkˆ| ‘¯!ò5D¾†È×ù"_Cäkˆ|-«ÊZV•µ¬*kYUÖ²ª¬eUY˪²–Ue-«ÊZ¯Û°—Wư‡x|mmYËÚ²–µe-kËZÖ–µ¬-kY[Ö²¶¬emYËÚ²–µe-kËZÖ–µ¬-kY[Ö²¶¬emYËÚ²–µe-kËZÖ–µ¬-kY[Ö²¶¬emYËÚ²–µe-kËZÖ–uTcÕXG5ÖQuTcÕXG5ÖQuTcÕXG5Öiåo½¼>†}8ijÃ8±Fq ­&ë¨É:j²Žš¬£&ë¨É:j²Žš¬£&ë¨É:j²Žš¬£&ë¨É:j²Žš¬£&ë¨É:j²Žš¬£&ë¨É:j²Žš¬£&ë¨Éwô‰ögù‚{W®ÂÕn·|ÉEe±û… b –¹Ë2„›Ü´ÜŒåXáNËJž­Â-ƭø ¶bFÜÏåënNv¹«²{0н¼[ û°ŸÇ÷Õ[lïsçå ñÎÃ8â~+Gq Çufòoºeù¶Î¾£žÝ#gð ι7å<^s¿“Ÿ»oäu¶Ø^tŸÊ›TrÉôý“½³úZõT_›031 ³1·YÍ}dä{ ÷á á0Žà(ŽY54£Ìýxâ!<ŒGp'p§ð¸‚>OÍwð$žò*äÏÜËw½bù¾W$i5÷ý ?ÆOð4ïvÏâ9<ð"Nã§ìõk¼„—m}Ÿá ^ÁY;Í[ó¾Ëxïâ=¼půחÄgø\ÇzQ«÷ÿÈB¾¨õÓìŘüÍÿ¿È –`†p“:ú4ÿÍr¬äñ*lÃ^ŒaîÅaÑÑÿA3Ó3}<«Ñ7÷á !{iôÍQßà~<€ñÆ#8Ž8‰SxÏàY<‡çñ^Äiœ%Ú«ò%­“}*½¤YdžÀwð¤ü®÷-w_¦à‹¸Ó03°Ø}-ƒX‚eî?e7¹¹˱Bãû]UÞž­Â-ƭø ¶böòÊ>ÜËQ†x|G43¿«q1Çp\W+ßÕŠaNºsrÊÅå1wQ^ãñëxÓ=”Køµ*ó]çK­ zÖ—k9úò0¸ 7c9Vàv¬ÁZ¬ÃzlÀFlBrÑü1÷á á0Žà(ŽY4Ìýxâ!<ŒGp'p§ð”·G~èm’§yä žÅsx/àEœÆY«°æ’y‹*ÝÆe¼ƒwñÞÇøÐFÁ÷%>ÂÇøÄÆEWæSù=}Ò=•ßÒ'Â÷4Íq=¦a:f`Ž{Uæbžû“ÌÇ€{MntÇd¡«•e<ÂJ¬Â-Êî{š‡æV|[± {ye ûpˆÇ‡qDŸ‰ßÓ<4Çp\÷=ÍCsÒ½!§Ü~yŒíãn«<³îßäU¶ãl'ؾæîÊëxÃMÉ›nR.áíÿýJ>tß—+®Z&ñk÷_ò·øÜý‹÷=ÍgÅ ù¬Zi>›ùÀM¸˱·c ÖbÖc6bî´úë,×lÇ«’¯_Å×ðu|ßÄ]Ø…»m¼|{°{0j#¨5ߌá¯ÿ îÇxáa<‚ã8“8…§ñ žÅsx/àEœÆY_uy‹ÚÞÆe¼ƒwñÞÇøÐ怺À|„ñ‰Í uùTúé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé?]à§ ütŸ.ðÓ~ºÀOøé‚Tº •.H¥ Ré‚Tº •.H¥ Ré‚TuÁ¢ÌEë‚Tº Uóß,ÃVbÚÌOeæ§2óS™ù©ÌüTf~*3?•™ŸÊÌOeæ§2óS™ù©ÌüTf~*3?•™ŸÊÌOeæ§2óS5ó÷È®Eκ×åUÍçTÍ|ÛN°m3?•™Ÿª™IÞäñ%´™Ÿª™J®¸£2‰6óS™ù©šùÚ—™ŸÊÌOeæ§2óS™ù©ÌüTf~*3?•™ŸÊÌOeæ§2óS™ù©ÌüTf~*3?•™Ÿª™¯škæ›íh3?•™ŸÊÌOeæ§2óS™ù©ÌüTf~*3?U3ßÜƒÝØƒcøÜð ÂÃxÇq'q Oã<‹çð<^À‹86ÏS™ç©ÌóTæy*ó<•yžÊÿ²ÆÑ\íbò%­N/sÅý²FpEºßÉb]s½¬+>³ËxM7ñž›±+ÜcYɳUØ‹1ìý¼Ïe×!g\»¼‚sn‡œÇE·à½¬Y¡5+Ì ˜™˜…Ù˜ƒÛ, ÍE®9`Æ#êÊË܇ƒ8„Ã8‚£h×û/kÔÌýxâ!<ŒGp'p§ð]o³üÈ+§yä žÅsx/àEœ–4F¿—«ð%EµA]öPæb ÜYè~.‹4ß6¨ªöúe½AU5çÔTU©zêYÕÓÜ€˜‰Y˜9¸ÍŽ«.Ó±Ôef;F툪°Ã>;º¯dkEkEkEkEkEkEkEë[kEkEkEkE+C+C+C+C+C+C+C+C+C+C+C+C«Y«Y«Y¦"ù›ÌÅ€®X3Éœ,VdjΛ%XæÈnrÿ-7c9V¸Û²’g«°ûp/ï`w“2¹›”ÉݤLÎ29[ÈÔçþ_åu“ÏîLů¨|lǨŦøÍòÎšÕæ>Ä!ÆÅ1‹G³ÚÜð ÂÃxÇq'q Oã<‹çð<^À‹8³dtoám\Æ;xïá}´ÏÓ,ÍœßÊBU&Kkõd5nÅW°Ûp‡qDŸ/Yª¹9†“š!YÌ´,•}ãe©ªzgUՌᬽRÑšOxÍoð©Ì&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉ&žlâÉÑzþµ|AW 9ê5sµÎ0sÔkÊb·Q±Ë\P†p“+•›±+t嘣™lÏVáwVVãVÞó¶[± #ºËñ:½\ùºë“]®KvcF±—÷Œaöóø^b{‹í}.,qˆwæ(#nBŽâŽ»!9“.*§ð¨ë–ÇØ÷m}ÊähU±Ê|æ.Êw\^Á9ŸÇk®_~îÈël/°½èÞ•7ÝI¹„ϨùWî7^ŽºUG÷­wçeº½V*sf`&fa6æà6«§ÜÕÅæ>Ä!ÆÅ1«›ºx›ÜoSÛ#õ¹™£.¶íÃîyDç™9êb©ºØŸÄ)<®kÉß ;ß;xOyš?¾ŸiåÏÑçW“|ϳÇß÷¶Ë¼—¿ä¸¿Âñ<Í;ŸÁ³xÏ»fyí‹8Ÿ²×¯­ž¾Kl_¶q÷}æÞ“3î3y…GfmÄÕæ-j~—ñÞÅ{xàC›:'7ác|b3DÝd>Å›-¾¤Í7ß3¶Ÿ+ª\º,W]ö¾ýüZ—åÒe¹ê²×eK°Lï–«.37¹^¹ËѺ,W]fÏVá—Õ¸•÷|…íVlÈrÉ¥Ëré²\uÙqÙ=Å^žaöóø^b{‹í}î'r‡xÍ0GQî¹ê2s ÇÝGr'ÙwÊýL%žcî„|[ã’K—åªË–å ¯¼‚s<>×t¶«.»(¯³½Àö¢ûXÞtö:Khý•«þÒqÕ_·eºU^ýenÀ ÌÄ,ÌÆ´þÊõ‘µúË܇ƒ8„Ã8‚£8fÕ ¿ré¯\õ—=bý•«þ²më¯\ú+—þÊUÙã“8…Ö_¹ê/UIýežDë¯\ú+—þÊ¥¿ré¯\ú+Wý¥º©¿Ìñ<Í;ŸÁ³x­¿rÕ_¶}§ñSö²þÊUÙöeqú+—þÊUÙ#³6Öê/ó5¿Ëxïâ=¼ð¡Íõ—ùã›ê/ó)®Ø<¡¿rÕ_¶ý\Q婛˗tž“§yþ;9ÃöœÓç`žæ¹hhdÍãöJUØ|OÊ|Þ-ŸwËçÝòy·|Þ-ŸwËçÝòy·|Þ-ŸwËçÝòy·|Þ- Þ¿'_pSr®v­ò[:Cx)ø"¾¤Ï‹€·ží4LÇ ÌÒ^6»Ý2ˆ%X¦Ï¬€V s“›±+ÜAYɳU¸E9´b˜[ñlÅ6Œh~´V|,»°{0н¼[ û°ŸÇ÷Õ[lïsƒr‡xçaѵ@@k…9†ãî9“:ç h­øFUÿ´VüQ¾í.ÉËÔê3M4:¶}çܘœÇkZZ+lßël/°½È(Ütÿ)—ð™Kʯt6ðž{Å^@ë†bðÙ=Ã€Ö ½^놹30³0s0×ÆË—‡ùÀM¸˱·Yä¾íl×`-Öa=6`#6ám&øZ± w •×zeîÃAÂaÁQ³QÓzeîw¿Ø>¨s•€Ö+Û>ŒGÜÈq¶'p§ð¸ÍÍó<‰§¼·åÏtŽÐz•/ßS•Z¯~,?ðªä‡ZÓZµ4[´j™ã'xš÷?ƒgñžÇ x§ñSöº„—mÖi½2gtÐzeÛ³6ß´^™·ëÛ¸Œwð.ÞÃûøÚœÔze>ÂÇøÄæ§Ö+sÅf©Vª¯ä3¶Ÿ+žZ žÉÜ9¹ W»·äKîYŒA,Á2÷SÂMZ-7ª¯Ír¬Ð'ÔFõµ=[…[476ª¯Í­ø ¶bFxåë.!»ÜŒìÆŒb/¯‰aöóø^¢z‹í}º Þ¨¾6‡xçaQÖÕ׿Ž»E9GÕ³ÕÅ·åe*0ƒWpŽw›Çk¼æsÚFõ¯m/°½è¦åMÞg ŸiæoTÏêýÕ­zu«¹30³0sp›eä#/u¹q‡qGqÌj¢®1÷ã<ˆ‡ð0ÁqœÀIœÂãxßÁ“xÊÛ'ßõ åûÞ+òuÍFuÊVùK«¿:Åü?ÁÓì{Ïâ9<ð"Nã§ìu /[ÍÕ)挮»7ªSl{ÖFPbޢ·qïà]¼‡÷ñ®Ø¨© Ìgø\Ç*ÐgßSûÉS|×c¦ckF¨#Ì,s·d7¹çr3–c…»#+y¶ ·¸'²·â+ØŠmØË+ûp/Gâña´Ÿ)à'F ø‰‘Íg‹mí'F ô9uUÚOŒhÞÚã×ñ¦ÎÓ 4KMû‰‘~b¤@sUÏúr1ó1€›p3–cnǬÅ:¬ÇlÄ&$ÍjsâãŽâ˜U@³ÚÜð ÂÃxÇq'q OyòCe[ 9iœÁ³xÏ㼈Óh?1RÀOŒh¾Y·qïà]¼‡÷ñ>´QÐÊl>ÂÇh?1RÀOŒð#…Z‡ÿK¾àÞ”«pµ+—ßr_È|_ÒT¡æ§m§a:f`–[‘ÙXìü2ˆ%Xæ e7áf,Ç j®ÚvnÑ\*Ô\5·â+ØŠmqùòu]Mjõn“ÝØƒQìåÝb؇ý<¾—¨Þb{Ÿ+“ƒ8¤Ï‘BÍv{ÿg«Î(Žá¸k—8éNË)}j%_c¯·©’ýŒe¡ÎÊ®É}jU7çx|¯¹˜üœ}¯³½Àö"ïpSgP…êó™ÖB•Åå×ú„-Tï˜v†V¨R<:CÓ±´ækµæ›031 ³1smìÔwf>pnÆr¬Àm›ºÏ¶k°ë°°›ðÇ6+t†f¶ádÔ•æ>Ä!ÆÅ1Au¥igh…êJÛ¶3´Bu¥mF;C+TW¾*'xdRפ…êJÛ>n3GŸ5QùÛ'ÑÎÐ u†vKÚgM¡>k¬ÂvVV¨ÏÍ}Ö³ý ž¶ÑT_Û;ŸÅsxÞýH^`û¢®‚ Õ×¶ý){]ÂË6ëôYcÚYY¡>kl{Öæ›zß¼¥+ÐBõ¾ò2ÞÁ»xïã|hsR½o>ÂÇøÄæ§zß|êfå £içf…úT²í犪ˆÿë/âÿú‹Ô›••lWá]©7Í­ø ¶böòÊöáãˆûRŽâÚ=ü"îᩳîË)Í"õÔCy“ìÿ‹¸«_Äÿhñ?ÚEüvÿ£]Äÿhñ?ÚEüvÿ£]Äÿhñ?ÚEüvÿ£]¤ß.ß÷¾/OóÈ<‹çð<^À‹8³¹ÆÈ¼ET·qïà]¼‡÷ñ>´ì4Fæ#|ŒO,_‘ùT{9î_d.ÜÏäF7g³R#Uì]Ògh±wCç~ÅÞ=EUÌo[{Ïu”bßNÛËÁvÜmûúö`7ö ­–ž^ŒÉ Ç rÜ Ç rÜ Ç zox¥ÒŽäèAŽäèAŽäèAŽäèAŽäèAŽäèAŽäèAŽ^¢£o•¹˜§+¦/®KnTw—(’LyBW%Šä÷ÒÎÐJ¼«îÏ2Îv‚íúŒ+ñlDJç×òû|¨ÈK¼ÍÉ/‰ÏÝo¼E®ã*r³;ìè¾N|_Ã×ñ |waî¶8•¯Ù=µÈ•¯“¥úlýZæ¨÷K•µpýr£:·”ÿ,U¾•²H×M¥üOG©rÿ­¼¤yRªÜ˫ʢT¹Ûv‚í£ëúRÏ~²´T¹¯È‡Ê´T¹?•I|®¾+Õ ¯HTÅ  ˜í¸Û"Q.f7ö`Ô¢R.f û,B_?È2åõ™‹y‘2¦йw™rŒËB÷¯ò’ò*SÌOä=Å_¦hÿ Ÿk|Ë•ÞAQ™íØaï£q1_Å×ðu|ßÄ]Ø…»íˆÊÅìÆŒZ ÊŌɑ‡ˆ:—67`fbfcn³#úvÛ±|{°{0jG÷õb OyÇä{^“|ß{K~à…å‡^ƒ|îý‹WM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕäUM^ÕÊk«|ßë’vo³Zy)fåòZ½ÛîÿzmÑ\ý‘ÌÅ€®R#šŸ¿‘…î-/¢¹¤Ç5—ÌvÜmÏjV˜Ý؃Q^ß‹1Ù®w.–¹h]ÐN´ë(åFwEòì Õª]]ðµ´.h§ Úé‚vº OÒvÍÿß˪d;ó¿]óÿ¹LâsU¬]1ë=³ÙŽ6ÿÛ™ÿíÌÿvæ;ó¿ùßÎüogþ·3ÿÛ•©"T¦f7ö`”÷ìŘì Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í Ó2í ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2í$ÓN2Ý¥L÷È\´o™Ø¥ìþ uî±Ë+V¿ïò‚X‚eÊw—®ÚÌ e±KWm¶]…½Ã>ÜË^×´nïò®£]gíâ:k—òÕq•¯ÙŽ»íèŠÜìÆŒZ<¾^Œ!ï¬ëzsâãŽâ˜Å£«9s?ÀƒxãÇ œÄ)÷6i}²:wQç.êÜE»¨suî¢Î]Ô¹‹:wQç.êÜE»¨suî¢Î]Ô¹‹:wQç.êÜE»¨suî¢Î]Ô¹‹:wQç.êÜE»¨suî¢Î]Ô¹‹:wQç.êÜE»¨suî¢Î]Ô¹‹:wQç.êÜE»¨suî¢Î]Ô¹‹:wQç.êÜE»¨suî¢Î]Ô¹‹:wQç.êÜEwë³ì¾´5ÎNíwéÚ±;ñUï}ù¾Žoxgå›lïâ5»qàqõíÑÙé^ùk=¶Gg§ïÊYï ¼ê“q¶láý—¼¡óÐ=ÿ®4û¸ÒìãJ³yÞÇ<ïcž÷1Ïû˜ç}Ìó>æyó¼yÞÇ•fWš}\iöq¥ÙÇ•fWš}\iös§®Ÿ;uýÜ©ëçN]?wêú¹S×Ϻ~îÔõs§®Ÿ;uýŒo?wêú¹S×Ϻ~îÔõÓÝýŒo?ãÛÏøös§®Ÿ;uýÜ©ëçN]?wêú¹S×Ϻ~îÔõs§®Ÿ;uýÜ©ëçN]?wêú¹S×Ϻ~îÔõs§®Ÿ;ud7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd7@vd·Wym—¹М߫¼®I»§½WþNÚoQíå®Ñ^]¯ÔÑÍvÜm¯×ÑÍnìA»ƒ½—;Ø{¹ƒý–®Ù?•«ð%7%s\Tæb@•yKõü@º·e‘²~K×ïW䌮‚ßÒõ»9çfå<.ª;ÞR„ßÈ:{|K×òz]Ë›031 ³1·Y$ÊK1(/³£ƒâ7cØgñ¨zæ>·Ÿ`Sÿ.s1àÞµŸÎp3²Ðý@^Rõö©’Ϥuñ>î…îS´’ÏíY]ûêèf;î¶wPUÍnìÁ¨½§¢2cr£rôAŽ>ÈÑ9ú Gäèƒ}£rôAŽ>ÈÑ9ú Gäèƒ}£rôAŽ>ÈÑ9úw‡ô>õ†u¦š'_Ã7t~;¬ó@mûNyä»:¿ÖÙàò}ïsùÎR‡u6h|äE䯽ïËçÞ„7¢¼vÈ\ ¸i¹Q½6¢¼ÈÊe„£(~½Fñ›í¸Û^©øÍnìÁ¨í«øÍ˜å(£e”£Œr”QŽ2ª³âJyÃû¼£ØFu>üïò÷/ò¹·Íåè£}”£rôQŽ>ÊÑG9ú(Gåè£}Œ£qô1Ž>ÆÑÇ8ú9Ž‘ãGã(ceŒ£Œq”1Ž2ÆQÆ8ÊGã(ãôø8=>NÓããôø8=>NÓããôø8=>NÓããôø8=>NÓããôø8=>Á±&8ÖÇšàXkB³b£´#NpÄ Ž8Á'8âGœàˆq‚#NpÄ Ž8Á'8⤮¡þ(_p×å*´µeR‘4Ë\ 舓Z[>•…î´,b¯bÅ0©+Y³í·8'u}jÛ—Ý 9ƒWpÎ}!çqQ×Å“œyN* ½^+Ž®•ÇÜ€˜‰Y˜9h+ϤrW„ÊÝlǨE¨ÍöY´ZyÌ$B]çšûp‡pGpTNq¿tŠû¥SªÏÏ¥ý´Ã”êcÚïnO©>qYèÞ—Eš{SªÀ×r¯à¯œÇEgÿ·pO½?ÅÔ)î NquŠ;¨SÜAâêwP§¸ƒ:¥Üƒr×Ñ•»ÙŽQ;ºr7ch¿×9¥ÜÍy”»G¹;q”ßU<ª˜OÉbž b –©VG½Ú}‰£^%ÛUØ‹1ìýìe÷%Žr_â(÷%Žr_â(÷%Žr_â(÷%Ž*ZWÑš1ä¸ÿp”ûG¹ÿp”ûG¹ÿp”ûG¹ÿpÔg¿Õ{Ô÷Üð ÂÃxÇq'q Oã<‹çð<^À‹8-©k¾‘/hÄiV˜/¹Ã2Ç5É\ ¸?È­KÇTás²ˆ}‹1ˆ%XÁ»íeû²úô˜æŒyçÜœœÇEóãóú˜gÿïL3GG×Ì17`fbfcn³85ŠPca¶cÔ"ÔX˜1´1=F×£kŽùˆP£cîÃAÂaAûë·Uv™‹}¾­ÕìKû‰7$ígÞV$zV‘˜í¸Û^£µËìÆŒÚ^ŠÐŒÉ |:_Йá°Là ÷SySG¿ Yg>toÉ“\ÐÙ`Ô›Ö^ßÈ«iíeÞйдâù|À#öÓÓÚëk™”Ÿ²Þ^ö¾¥sËË^ ¾ˆë1 Ó1stÎsY¹›yê˺Š1nPÚÙïeÕ¡[Žh ¿ìâNê<í²7¥sÚ˺¦›‘³.O^uù2Îv‚íW"“¦/׎ëËÃ| à&ÜŒåXÛ±k±ë±± wZ.)³;,#]™¯âkø:¾oâ.ìB;ß¾Ìùöeη/s¾}Yã«jøz1†³V ßU|hÕð}‰ð1>±úèúÑ|*?óìg?Så—dÀÝ“µþ|¦j_ô>SzVY˜í¸ÛžUÆ'VµùTÎj¶o•ÆÕn›|Éí—Åî—2ˆ%X¦:Ïê³ÌÜä.ÉÍXŽö{â³út³g«0â–e‡÷#ÙË#1ìý¼ç>E5ë ¢ý®Ä¬Vï<9ƒWL­ÏŠMë³¹30³0sðû^Pn³,|Eë­¹q‡qGqÌbÓ§¡¹àA<„‡ñŽãNâ·Ü}'ð<‰§ìÿ2õYi¯9ƒgñžÇ x§ñ²ÕÇ÷ÎàyUc7"Wáj] ]å·´®jìÎÊ –`™û\†p“züªÆÎ,Gû^‹«;{¶ #:“¿ª± Ê^‰aîå=÷iõ¸ª±3/«¿®jÔÌ+8ç6Êy¼ææåç껫:«±í¶Uó«êÊ3r ŸéªðªF\iÄÍ ˜™˜…Ù˜ƒö;JW}D¥±6÷á á0Žà(ŽY.ks?ÀƒxãÇ œÄ)%¿]ªÛ¢×=Å^^Ã>ìçñ½ë-¶÷©&‹ú 5ljv߯ îrZ}´è}ªz.z‹ÿ»Ã[ôý“=«OIe¤OIsf`&fa6æ ýÌ碣ëSÒ܇ƒ8„Ã8‚£hŸ’‹|J.ò)¹È§ä"Ÿ’‹|J.ò)¹È§ä"Ÿ’‹|J.ò)¹È§ä¢>%U+}JšïàI<åµÊ÷í7Ÿ}â/­2¾_áÇø žæ}ÎàY<‡çñ^Äiü”½~—ð²ÕYŸ¡æ ^Á[Ôó6.㼋÷ð>>ÀLâ3ù…úâ¹ÌEû»*_¨/~/ 5o¿ðÞð¶Ê7qÒýUÚ7V}¡Ž0WÜ72iªôê³wÛû¨ÌnìÁ¨½³zÁŒáGzÿúTúF¦à‹¸Ó030ǽ#sÕÅ7ønØ|7ì Åoÿc´Ñ—…®KÚï¼ßàwÞoð;ï7”Å‚œrä1íÜðìwZo(#sEóí†2’úœÒ±ô9eæc7áf,Ç ÜŽ5X‹uX ؈M¸Ó²PÅÌv´ïY½Á÷¬Þà{Voð=«7øžÕ|Ïê ¾gõß³zƒïY½¡:+_ÕÙìÆ´ï溡:›1´Ÿs¸ÁïŒßÐg–* Ï,ó>Æ'V­BæSyS+ÏWò%wZZåoªòqp×äF÷µ,ÐçÝMÕü°,RT7ù©’›Zþ&¯º_Ë®¸%™4Õ×zOêp“:ܤ7•‘ÞS™Ý؃Q{edÆÐ~›ï¦VQs@.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.íÑ.ímþFÛmo:Ó÷ü9þB.«/þG¦à‹¸Ó03ÐþŽÃ2Ça™¿ã°¬Ï…¤ á&ÙŒåX¡µeYŸ ölvxùÒ~ŸnY]oöòx ûp/ûÚ_ÄXöFq lja'ydJõ\æw$—ÕMfæc7áf,Ç ÜŽ5X‹uX ؈MHTü†eþ Ã2…a™¿Â°Ì_aXæ¯0,óW–õé \ôé`îÇxáa<‚ã8“8…§¼ò}¯L~dÓjoŸÁ³xÏ㼈Ó8kµRWš·Èå6.㼋÷ð>>À‡TøK|„å]¾Ïð®fΟd ¾ˆë1 Ó1‹uEvW3Ç,Á2­w5sÌMºÊ¾«™c–c…jxW3Çž­Âï„|CWŽw5sLû±»:»0{0н¼>†}ØÏã{9î[lÛ÷^ÞõѾ á®7Šc8î~''p’H¦Ø~[5¹ëÙ_K¹«³Ž?xw5÷ô¸æžrôåa>pnÆr¬Àöšå÷uý{×Wím–Ûy¼k±ë±± ÉE³Ñ܇ƒ8„Ã8‚£8fuÐl4÷ã<ˆ‡ð0ÁqœÀIœÂS^|ß«’zEò# ™¨ª:31?ÆOð4{Á³xÏã¼ˆÓø){ý/áe#™˜3xgm¤4«Í[ŒÂm\Æ;xïá}|€ñK|„qÅÆWg/æ3y_óü¿å·tfr_óÜ|×c¦cë¼ý¾æ¹Y‚eÂMêÇûšçf9Vðþ•<[…Þ¤|ÃÛ"ßÄ^aîEûfûÞ ÚÏÜ÷Fq íwÀï{8Ék¦ØþÔâÔ¼Õ¶æ­mça>pnÆr¬Àöêä÷½í²Ú Êí<^ƒµX‡õ؀؄D®ykîÃAÂaÁQßà~<€ñÆ#8Ž8‰SxÊÛ%mÞÞ׼͓Ym5oÿ&…ã'xš½ÎàY<‡çñ^Äiü”½~—оQù¾æ­9ƒWpÖFÇwo1 ·qïà]¼‡÷ñ>Ä/ñ>–uwB®Bû†É‡:?“ÅÎ~ê?ˆ%X¦Õà¡f£¹ÉÝ•›±+t~þP³Ñž­B;ß~È·G>Ô<´Gb؇{yOû›V½A¼àyuý¥HtýenÀ ÌÄ,ÌÆÜfÑúx7Í sâãŽâ˜Å ¹aîÇxáa<‚ã8“8…Ç-G]™ïàI´;“5gþyšWžÁ³xÏ㼈ÓxÙª¡Ñ7gðŠ\ÑHMÈU¸ÚÕË—tM±¢‘úOÄ´‘Za¤V4Rwäf,G»/½ÂH­0R+©od/Û1ìý¼Û>ÍÉ‘yÁ ÉË®DÎàœÃy¼æÊë¸èÎÉ›ú\ñ–ЮìV4ÊÊB£lnÀ ÌÄ,ÌÆÜf™úˆG£lîÃAÂaÁQ´Q^a”WåFy…Q^a”WåFy…Q^a”WåFyE£¬úøNà;xOyåò]o›|ß¾A+C¥<Í^gð,žÃóx/â4^¶ÚjÄÍ´ïL2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ'ñ$#ždÄ“Œx’O2âIF<Ɉ?ó^ÐyÝ3o®Ö'æ3]'^’¹î¶ ¸Yi¿óûÌ+ðvÈB÷3ѧÒ3ï¨W#¯ªæÏt…h®¸g2iúÒí=}/ãÌÀLÌÂlÌÁv\]Kšíh¿MüŒß&~Æo?ã·‰ŸéZR1èZÒŒ¡}¿ê3ß |Oʯ”Ë72í§È¾Rü‹ÞW:–ñE°£<Þ‹1éì·Ï|/è~*s]BÜÉî²ÐýÒ÷‚ÞGÏê}ÌvÜmÏ*f³{0Êë{1&ר™‚kÜŸä‹lÿÛ~\Ï#i˜Ž˜ãze®;*óÜ×2î”Üè®øìoÇd±KÊ –`™ûJ†p{mÆr¬pOd%ÏVá^‰|{y$†}¸—÷ÔÕ®œÀI÷W߻Ε¹­Ý5’ùÀËÎîÉÍXޏk°ë°°›p§UC£`¶c‡åâëÄWñ5|ßÀ7qván«›ýF•ìÆŒZ%5vf ÉZ«„¹q‡qGqÌj¥UÂÜð ÂÃxÇq'q ?²QPïÛö<‹çð<^À‹8·‹Û¸Œwð.ÞÃû¨ëhŸýMÞ¯å%En‘ö”Ïþ"íÏ|öiÍY÷Ç9™ÇE÷Ÿò†{.ï°ï=| ¾ã=t‘+¼Ÿkü£Žõ{™ã¢2î=¹ÑÝ…®KNê}þQWÿ#¹y\=õÞ wWÎòÊ«š·ÿ¨¨l;Áö ÷@>täŠzùu\󹽯~ÏBF°wóøìÆŒZ vçPÆð¡E¢3[ó>Æ'›ï7øTþ“ýºÌÅ€ûHnt×d¡k“7Üïä=÷gùÀýÍ÷Oöóç2‚í¸Û^o?.»±£ööóç2&ío*“¹Z£ì/+Ý—•ýe¥iŸýå#=k?­ç³¿|dî¶gõÎf7ö`”×÷bL¦kŒÈãî¤<áËKêñtï Åœ®,>”yükuº÷[|î|/³ïËìû2û¾Ì¾/³ïËìû2û¾Ì¾/³ïËì»}7°ïöÝÀ¾Øwûn`ß ì»}7°oûf°oûf°oûf°oûf°oûf°o&ûf²o&ûf²o&ûf²o&ûf²o&ûf²oûf±oûf±oûf±oûf±oûf±o6ûf³o6ûf³o6ûf³o6ûf³o6ûf³oûæ°oûæ°oûæ°oûæ°oûæ°¯}“ÿ_|öMþf%Va§Wê³oÚ·íöáMÍû{iÿ×gß$oîÇxáa<‚ã8“8…ïz>û–øøì[âëä^®Ï¾ Þž=ƒgñžÇ x§e¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K¹ä‘K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K>¹ä“K€\ä —¹È%@.r K€\ä —¹È%@.r K€\ä —¹È%@.r K€\ä —¹È%@.r K€\äbߌýŸ}3¶¹ZŸ)öÍØ¶£žÚhÿo( ÜcYè>öÙ·IÿÙgß#ý?>ûió ÎéS̾GÚ¼‡ö)`ß­wÓ•‹¹30³0spî´£k…7Û1jG×JnÆd‘y‘y‘y‘y‘y‘y‘y‘y‘y‘y‘y‘y‘y‘y‘y‘Û·ÔþÙgßRkVhì;im» ·èèö´æV|[± {ye ûpˆÇ‡qDYØ7ÊšcxSë§}Ë«´¿pç³o75÷ã<ˆ‡ð0ÁqœÀIœÂS:+´¿pç³ï&µGÎàY<‡çñ^Äiœµ¨ì/ÜùЍFÕ(¢ET£ˆjQ"ªQD5ЍFÕ(¢ET£ˆjQ"ªQD5ЍFÕ(¢|ϧïùôQ"ªQD5ЍFÕ(¢ET£ˆjQ"ªQD5ЍFÕ(¢ET£ˆjQ"ªQD5ЍFÕ(¢ET£ˆjSbªQL5Š©F1Õ(¦ÅT£˜jSbªQL5Š©F1Õ(¦ÅT£˜jSbªQL5Š©F1Õ(¦ÅT£˜jSbªQL5Š©F1Õ(¦ÅT£˜jSbªQL5Š©F1Õ(¦ÅT£˜jSbªQL5Š©F1ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕR ÕRªQB5J¨F Õ(¡%T£„j”P*PBJ¨@ (¡%T „ ”P*PBJ¨@ (¡%T „ ”P*PBJ¨@ (¡%T „ ”P*PBJ¨@ (¡%T „ ”P*PBJ¨@)(¥¥T ” ”RR*PJJ™¥Ì‡RªQJ5J©F)Õ(¥¥T ” ”RR*PJJ©@)(¥¥T ” ”RR*PJJ©@)(¥¥T ” ”RR*PJJ©@)(¥¥T ” ”RR*PJʨ@(£eT Œ ”Q2*PFʨ@(£eT Œ ”Q2æCó¡Œj”Q2ªQF5ʨFÕ(£eT£Œj”Q2ªQF5ʨFÕ(£eT£Œj”Q2ªQF5ʨFÕ(£eT£Œj”Q2ªQF5ʨFˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆj„¨Fˆjl²¿+ Ýsß&u˜½“›yv3ÏnæÙÍ<»™gËy¶œgËy¶œgËy¶‚g+x¶‚g+x¶‚gÿÙûÂk‘÷¼VùÀkðUë‘­òž÷äo›ïßì'6eDçHÿæ;Ž'ð<)·ñšm¼f›ÞaLÞ±¿Ö£÷ù‰|à ú¶±ï6öÝÆ¾ÛØ÷Þ·Ü}™‚/âzLÃtÌÀb÷µ b –¹ÿ”!ÜäVäf,Ç ·$+y¶ ·¸G²·â+ØŠmØË+ûp/Gâñaq—å(Žá¸{"'pÒ“S..¹‹ò_Ç›î¡\¯U±Ø_¸ðýÀ÷Oö¬/×rôåa>pnÆr¬ÀíXƒµX‡õ؀؄äb?#÷á á0Žà(ŽYÔ#æ~<€ñÆ#8Ž8‰Shßeö߇Þ&yšGÎàY<‡çñ^Äiœµ Û_¸·¨Òm\Æ;xïá}|€m|_â#|ŒOl\ì/\ȧò‡Þß¹§ò[î™LÁq=¦a:f`Ž{Uæbžû“ÌÇ€{MntÇd¡«•e<ÂJ¬Â-Ê‡æV|[± {ye ûpˆÇ‡qÄÍÉQC»ûýCî~ÿPóð 9åöËclw[å œuÿ&¯²g;Áö5wW^ÇnJÞt“r oÿïWò¡û¾\qÕ2‰_»ÿ’¿Åçî_|?ä®û5ŸU+Íg3¸ 7c9Vàv¬ÁZ¬ÃzlÀFlÂV]¯™íØaU²ŸÕ”¯âkø:¾oâ.ìÂÝ6^¾=Ø=µÔjiÆpŒ×ÿ÷ã<ˆ‡ð0ÁqœÀIœÂÓxÏâ9<ð"N㬯ºÀ¼Emoã2ÞÁ»xïã|hs@]`>ÂÇøÄf…ºÀ|*ÃtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtA˜.Óaº L„é‚0]¦ ÂtÁvû[êr®ÖùÃvû[ê2ÇýVÜ—²@²Ýþ–º<ªs×íÜ›Úν©íÜ›ÚîÍ)æíÞ<Úÿ­l·¿®._Æ ˜™˜…Ù˜ƒÛØk§×þººlǨ×þººŒÉb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æb®!æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æZb®%æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æ:b®#æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æzb®'æbn æbn æbn æbn æbn æbn æbn æbn æbn æbn æbn æbn æbn æbn æbn æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æFbn$æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ&bn"æ)¶¿É\ è<ÿGŠmNëzöGº64K°Ì=!Üäþ[nÆr¬p·e%ÏVa/öá^ÞaDÙýHgæŽã^s•×M>ë¤ø•â7Û1j±)~3†¼³®ÂÌ}8ˆC8Œ#8Šc>mÍýxâ!<ŒGp'p§ð4žÁ³xÏ㼈Ó8KFWñÞÆe¼ƒwñÞGû´mfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£fƨ™1jfŒš£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1jaŒZ£ƨ…1ú±ýÆ,Te~¬k¥?ÈjÜŠ¯`+¶áãˆVã«ææNj¥ý1køu5ôïǪªÞYU5c8k¯T´æ^ó|*[‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xZ‰§•xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xÚˆ§xvÏâÙA<;ˆgñì žijƒxvÏâÙA<;ˆgñì žijƒxvÏâÙA<;ˆgñì žijƒxvòt;õ)L®ÂÕNÝ¥Oùe±Û(ƒX‚e.(C¸É•ÊÍXŽº®ß©N·g«p‹;+«q+ïù ۭ؆])ï´ß†¯»>Ùåºd7ö`{yÏöa?ï%¶·ØÞçtÅë âï6ÌQFÜ„Å1wCr']TNáQ×-±ïÛ®C^¦2Ÿ¹‹rÆ—WpŽÇçñšë—Ÿ»ò:Û l/ºwåMwR.á3jþ•ûo§V3Ý·Þ—éö>:#27`fbfcn³zúÈ]«œ¹q‡qGqÌê¦Un›ÜoÓ*gt;ä!¶»Wä]íîÔ*§1Õ*gOâוþNß ;ß;xOyš?¾Ÿ¹Gò]Ogl¾÷<{ü}OWª¾¼—¿ä¸¿Âñ<Í;ŸÁ³xÏ;}*kå´í‹8Ÿ²×¯­ž¾Kl_¶q÷}æÞ“3î3y…GfmÄÕæ-j~—ñÞÅ{xàC›¾/ñ>Æ'6CÔMæS\±ÙâKÚ|ó=cû¹¢ŠÐeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðeº,B—Eè²]¡Ë"tY„.‹Ðe‘ÿ¯£;}®ë¾ï;~Q;Ó>iŸh¦OÒÔÚhS–ew{âÄ•;ií6ëÔKqÄ<çðÜ+ì@ìIDÒ›l+ÆBìD@”e;‘F‡$‚t/Á8·‰ë4iÿ˜Ûïy=ùÌø~~ïߟ^Wʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥¬[ʺ¥,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²DÊ)K¤,‘²—"SG¡OÄ×\/ÅGû?„n{¼Cߎs¾E•¾SýIl¶ÒW«·Œ®ô½zÊ´S¦2í”i§L;eÚ)ÓN™vÊ´S¦2í”i§L;mÚiÓN›vÚ´Ó¦6í´i§M;mÚiÓN›vÚ´Ó¦1íŒigL;cÎsΘsÆœ3æœ1çŒ9gÌ9cÎYsΚsÖœ³NuÖ´³¦5í¬igM;kÚYÓΚvÖ´s¦3íœiçL;gÚ9ÓΙvδs¦3íœiçL;gÚyÓΛvÞ´ó¦7í¼içM;oÚyÓΛvÞ´ó¦7í‚iL»`ÚÓ.˜vÁ´ ¦]0í‚iL»`ÚÓ.˜vÑ´‹æ\4ç¢9͹hÎEs.šsÑœ‹æ\4ç¢9ÕkÆ=ý@g%ôƒô·:½¡Ot®…¾@?N?A?Ùùvè§è§;? ý ý,ý\4Õ¥ø¼Pýíè;ÿú%úeúUú úMÚí-Ïv~z±³šÑœ^¦…·)i­ûó—ª×ã¾ÎRh?0yuþ.t˜¾B§:ï„NÓ?¯:/Eÿß }à lÓú¶i?¥?ó6?¯/EóWÿÆãw:¡¿0ç—ôqÜÿ¥èü˜ms¢í+ý}’>EŸ¦ÏÐgé‹Q®hûJûh? ƒtˆÓWª;‰¶¯t”ŽÑ+tœNÐI:E§é ¥¯Ò«ô½N¿S½ŠLôüó¡¯Õ¾úƒÚÉÐÖ¾:WÝô|¥ t‘Þð¾Kt™®ÐUºF×é½é½¶èÕGÃWºÝÙÝñøÍjƒÕo| ½ã†ïÒ{t—Þ§{ôݧª­EoWú˜¶Ã+•‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚T R)H¥ •‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L 2)Ȥ “‚L r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)È¥ —‚\ r)¸)øÛÐt·~þV糡ÿºónè¿¡ÿŽ>Ñi„þ¶Çÿþ}’>Ýyú }¡óïC?N?A?Ùy>ôSôÓô3ô³ôso„~Þã/Ð/ÆÇÃåÈN¥_¦_¥ß ß¤Ý„ží|+ôb盡ÍéeZ˜VÒZ÷ç/;U¯Ç}O†öÓHñåÈN5¨ó½Ðaú ê$¡Ót¦s#t¶³úç×BÿÂ{ý¥[z£“†Þêü,t»“…îзýùOéÏ:eèϽï_{ü7¿cÂ/:³¡¿¤cƒ—k×y+ôW_‡þ=m×^èºY‹ótýv剋 Fâ*ý}’>EŸ¦ÏÐgé‰jw]¦¡'é§égègéçè‹ÕÙº¾æñ£ÿþ!ý#úÇôOèŸÒ¯W]ߠߤߢ¶I¯´öÓ:H‡è0}¥Ú`$½ÒÑÎCÇ<¾Ò¹:îñì¼:Õ9:íOf:gCg=~µúȉ¤_½æñuúêÕ¶º¾Û¹Z%ýr$½ºáÖþGè\õÑI ]ðx‘Þ¨¶I¯&/ÓºÚù³Ð5×;/†nx|Ó{mÑ7ªºHz¥ÛѨ—#éÕã7«·Hz¥w:=¡wmùÝ¥÷é}@÷éÃêc²ë€Ò#ú~õñÙÕ¤­Î›¡ló8úärtBõ¸§*tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ð …N(tB¡ Pè„B':¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„R'”:¡Ô ¥N(uB©JPê„?¯Ôãç•z"›ÿúy¿@¿Øù_¡_¢_¦_¥ß ß¤…·,iðçƒt¨s:L_¡Õ÷Cöø~ÈžHÖƒÐÙøé‰L= ý…3T?¹Üã;${ü NŸÁéñ38=~§ÇÏàôøœ?ƒÓãgpzü NŸÁéñ38=~§'6þµÐ×j¿zß,ÑeºBWé]§ôÍêä±£Jï8Õ]zîÒût> ûôaE;ªôÑ÷+ÞØQ¥­Ðº]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢nu»¨ÛEÝ.êvQ·‹º]Ôí¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »x¹ölç÷BOГï†>×y;´ú~/×¶:ÿ/ô½Î߇îÅ©^ö›‘^®µÃåå®ÿY½WW7Mè¥ê}»RšÑœVÿŠxÙï÷{Ùï÷ëåÛË·—o/ß^¾½|{Ã÷ÿ†V¾½|{ùöòíåÛË·—o/ß^¾½|{ùöòíåÛÇ·oß>¾}|ûøöáíãÛÇ·oß>¾}|ûøöñíãÛÇ·oß>¾ý|ûùöóíçÛÏ·Ÿo?ß~¾ý|ûùöóíçÛÏ·Ÿo?ß~¾ý|ûùöóíç;Àw€ïß¾|øðà;Àw€ïß¾|øðà;Àw€ïß¾ƒ|ùòä;Èwï ßA¾ƒ|ùòä;Èwï ßA¾ƒ|ùòä;Äwˆïß!¾C|‡øñâ;Äwˆïß!¾C|‡øñâ;Äwˆïß!¾Ã|‡ùóæ;Ìw˜ï0ßa¾Ã|‡ùóæ;Ìw˜ï0ßa¾Ã|‡ùóæûJø~%ôýpçÿ„~„žì|;ô¹ø·î+q†ßÝêüs×+áo^•&ô¥êí»NÑÓô =KÏÑóô½H/U“ã„•f4§—+¯8a¥e舎8ሎ8ሎ8ሎ8áHÜÒû¡{Ñä#Õ«®‡¶;ÿØ5âä#N>âä#N>âä#N>âä#N>âä#N>âä#N>âä#N>âä#N>âä#N>âä£N>êä£N>êä£N>êä£N>êä£újÔ™GyÔ™GyÔ™GyÔ™GyÔ™GyÔ™GyÔ™GyÔ™GyÔ™GyÔ™Gyԙǜy̙ǜy̙ǜy̙ǜyÌ™ÇÜö˜Ûsò1'sò1'sò1'sò1'sò1'sò1'sò1'sò1'sò1'sò1'sò+qò/‡ž ޳]‰“Wz²s1ô¹øJäJœü©Ð«ß„nÅ9¯Ôªÿó¿R»^Wjoyüß‹¯Ç¯ÔªÏ’W‚îW¡û}¼R{¼WjÇ´Ýiv] Þð ÞJúR引ž¦gèYzŽž§èEz©:gðVšÑœ^®N¼•–¡ãxÇñŽãÇ;Žwï8Þq¼ãxÇñŽãÇ;Žwï8Þq¼ãxÇñŽãÇ;Žwï8Þq¼ãxÇñŽãÇ;Žwï8Þq¼ãxÇñŽãÇ;Žwï8Þq¼x'ðNàÀ;wïÞ ¼x'ðNàÀ;wïÞ ¼x'ðNàÀ;wïÞ ¼x'ðNàÀ;wïÞ ¼x'ðNàÀ;wïÞ ¼“x'ñNâÄ;‰wï$ÞI¼“x'ñNâÄ;‰wï$ÞI¼“x'ñNâÄ;‰wï$ÞI¼“x'ñNâÄ;‰wï$ÞI¼“x'ñNâÄ;‰wï$ÞI¼Sx§ðNáÂ;…w ïÞ)¼Sx§ðNáÂ;…w ïÞ)¼Sx§ðNáÂ;…w ïÞ)¼Sx§ðNáÂ;…w ïÞ)¼Sx§ðNáÂ;…w ïÞ)¼Óx§ñNãÆ;wï4Þi¼Óx§ñNãÆ;wï4Þi¼Óx§ñNãÆ;wï4Þi¼Óx§ñNãÆ;wï4Þi¼Óx§ñNãÆ;wï4Þi¼3xgðÎàÁ;ƒwï Þ¼3xgðÎàÁ;ƒwï Þ¼3xgðÎàÁ;ƒwï Þ¼3xgðÎàÁ;ƒwï Þ¼3xgðÎàÁ;ƒwï Þ¼³xgñÎâÅ;‹wï,ÞY¼³xgñÎâÅ;‹wï,ÞY¼³xgñÎâÅ;‹wï,ÞY¼³xgñÎâÅ;‹wï,ÞY¼³xgñÎâÅ;‹wï,ÞY¼¯ž ';·CŸ‹“¼Œ½]¯ÆyâÏã<•&ôRõ·ÕëY„f4§—½}AËЫ&_5ùªÉWM¾jòU“¯š|Õä«&_5ùªÉWM¾jòU“¯š|Íäk&_3ùšÉ×L¾fò5“¯™|Íäk&_3ùšÉ×L¾fò5“¯›|Ýäë&_7ùºÉ×M¾nòu“¯›|Ýäë&_7ùºÉ×M¾nòwk·kχþ„¾[ûzè{µ? Ý­ýIè^íLè~í[¡j=¦íÚw}·k³VýÍ«µ ¡oÖªŸêy‹¾[ûbè£ÚWCC¿_Ûª½úníK¡ïÕþkèní?…îÕ¾ZýF¾ï×ÖªßëýˆÓví¿t½V;]«~£ôz–ž«}6ô¼ÇWkº§}-Îð‡¡oÑwk¿ú^í£¡»µçB÷‚ñµðúXèÃZõsG¼Í1mék]ß®fv}‡~—~~Ÿþ€þþ¨Vý†ëvÌÿa¸Wúní ¡ïÕþsèníwC÷jº_û½Ðví+^µñdhB_¢§èz‰¦´A¯ÖzC߬e¡oÑwãN~'ÿzè1m×N„vª·ïú·ôuúWôÇ¡¯ÇÇOz‚žìü2ô¹ø×õëññóíТ^¯}œ~‚~²óëÐOÑÏE'¼^û¼Ç_ -i}Ù{ý,ãõÚ_Óê±^÷¿X¯ÇÇgøÆÇg¥ ½T¹ÇÇg¥Íéåê<]-©É]½´öÓ:H‡è0}¥:O×¥cô §t’NÑi:Cgé ºD—é ]¥ktnÐvíÓqãÕëÚü˜ÎÅ¿z‚V=§±çâþ¯„V¯#3÷_ýíÕ˜0ý«Ðª±ç4öœÆžÓØsÑØ¿Ý‹VŸ‹®þMhÕÕsÑÕíÐcÚîuÍÅmÇ̸íJZuõœ®žÓÕsºzNWÏéê9]=§«çtõœWl™óŠ-s^±eÎ+¶ÌÅŽª™-Cç‘Î#G:té<Òy¤óHç‘Î#G:té<Òy¤óHç‘Î#G:té<Òy¤óHç‘Î#G:té<Òy¤óHç‘Î#G:té<Òy¤ H. ]@º€téÒ¤ H. ]@º€téÒ¤ H. ]@º€téÒ¤ H. ]@º€téÒ¤ H. ]@º€téÒE¤‹H‘."]Dºˆté"ÒE¤‹H‘."]Dºˆté"ÒE¤‹H‘."]Dºˆté"ÒE¤‹H‘."]Dºˆté"ÒE¤‹H‘."]DºˆtéÚñ/Ÿxoཌiès…~´s?ôùÎÓ¡ ÆÕ+8‡þE¸Ü¨½Úù§Ð«qòqCßìÜ ½ŸmoÄ Tâñ{Ÿ‡V¯Ýs£VýVÏqBšÿ¨ó ô˜¶;÷ºnt½XÊmÜp7ÜÆàŠSW¥Íéåê„ÁUiI{ªÓvÕiƒ¾_¹«I[¡KØ—°/a_¾„} ûö%ìKØ—°/a_¾„} ûö%ìKØ—°/a_¾„} ûö%ìKØ—°/a_¾„} ûö%ìKØ—°/a_¾„} ûö%ìKØ—°/c_ƾŒ}û2öeìËØ—±/c_ƾŒ}û2öeìËØ—±/c_ƾŒ}û2öeìËØ—±/c_ƾŒ}û2öeìËØ—±/c_ƾŒ}û2öeìËØ—±/c_ƾ‚}û öì+ØW°¯`_Á¾‚}û öì+ØW°¯`_Á¾‚}û öì+ØW°¯`_Á¾‚}û öì+ØW°¯`_Á¾‚}û öì+ØW°¯`_Á¾‚}û*öUì«ØW±¯b_žŠ}û*öUì«ØW±¯b_žŠ}û*öUì«ØW±¯b_žŠ}û*öUì«ØW±¯b_žŠ}û*öUì«ØW±¯b_žŠ}û*öUìkØ×°¯a_þ†} ûö5ìkØ×°¯a_þ†} ûö5ìkØ×°¯a_þ†} ûö5ìkØ×°¯a_þ†} ûö5ìkØ×°¯a_þ†} ûö5ìkØ×°¯c_ǾŽ}û:öuìëØ×±¯c_ǾŽ}û:öuìëØ×±¯c_ǾŽ}û:öuìëØ×±¯c_ǾŽ}û:öuìëØ×±¯c_ǾŽ}û:öuìëØ×±¯c_Ǿ}ûö ìØ7°o`ßÀ¾}ûö ìØ7°o`ßÀ¾}#Ø[¡ûö ìØ7°o`ßÀ¾}ûö ìØ7°o`ßÀ¾}ûö ìØ7°o`ßÀ¾ýf훡¤OtfC«WÄ»éñnÆ=…~´óƒÐç;ú±8áÍÚñ¯…›µíøZûfm‡¾Ýy3ô§ôÎ߆îÆW^7㫼B÷ã+Ö›]¿S¹týGú!ú$}Š>MŸ¡ÏÒ«óx½¼›^/ï¦×Ë»¤q’ ­´¤=Õ©‚´ÒmÇWv›è6Ñm¢ÛD·‰nÝ&ºMt›è6Ñm¢ÛD·‰nÝ&ºM,›X6±lbÙIJ‰eË&–M,›X6±lbÙIJ‰eË&–M,›X6±laÙ²…e Ë–-,[X¶°laÙ²…e Ë–-,[X¶â£ñסûñÕè®-\[¸¶pmáÚµ…k ×®-\[¸¶pmáÚµ…k ×®-\[¸ÞŠ?=AOv¾ú\g;ôùÎWB·â_ÈoÄüøÛ˜_iB/Uo)¨4£9½\½WøVZ†Þ2ÿ–ù·Ì¿eþ-óo™+²ù8´ú¿[ž_»·ñO¡íêo¹ßâ~‹û-î·¸ßâ~‹û-î·¸ßâ¾Í}›û6÷mîÛÜ·¹oóÝæ²Íe›Ë6—m.Û\¶¹lsÙæ²Íe‡Ë—.;\v¸ìpÙÁ¸ÃkãÆŒ;Üw¸ïpßá¾Ã}‡û÷î;Üw¸¿îß =AOv6BŸëì†>ß ­ži}3ÿ¹ëÍp‰· —Jz©zËp©4£9½\½o¸TZ†Þær›Ëm.·¹Üær›Ëm.·¹Üær›Ëm.·¹Üær›Ëm.·¹ÜærÇëfÞñº™w¼n毛yÇ÷Üñº™wü¾Ó;¾+àŽ×ͼãu3ïxÝÌ;^7óŽ×ͼãu3ïxÝÌ;¾àŽï¸ãûîòºËë.¯»¼îòºËë.¯»¼îòºËë.¯»¼îòºËë.¯»¼îòºËë¯{¼îñºÇë¯{¼îñºÇë¯{¼îñºÇë¯{¼îñºÇë¯{¼vyíòÚåµËk—×.¯]^»¼vyíòÚåµËk—×.¯]^»¼vyíòºÏë>¯û¼îóºÏë>¯û¼îóºÏë>¯û¼îóºÏë>¯û¼îóºÏë>¯=^{¼öxíñÚãµÇkׯ=^{¼öxíñÚãµÇkׯ=^{¼ðzÀ민ðzÀ민ðzÀ민ðzÀ민ðzÀkŸ×>¯}^û¼öyíóÚçµÏkŸ×>¯}^û¼öyíóÚçµÏkŸ×>¯‡ñYì(ôƒô‰Î_…Vß•ý0Ü+­~OþÃø,öVèó×B?¹~ŸÅ~ºMwèÛÞò§ôø7ÕÃ8á¿t=ŒÏ_19>Uú!ú$}Š>MŸ¡ÏÒ«3Q¸Q¥ ½\¹ÇÉ+-iOu’øüUi#ôË–,X°`9Àr€åË–,X°`9Àr€åË–,X°`9Àr€åË–,X°`9Àr€åË–C,‡X±b9ÄrˆåË!–C,‡X±b9ÄrˆåË!–C,‡X±b9ÄrˆåË!–C,‡X±b9ÄrˆåË!–C,‡Xްa9Âr„åË–#,GXްa9Âr„åË–#,GXްa9Âr„åË–#,GXްa9Âr„åË–#,GXްay¿ö¯âßfïÑ[¡¤Ot&BŸíüiè z²ó¿C?Ÿß¢øª>ˆ~úý8ýýœi/{üF$ôýà­t‡¾Ýy;ô§ôÎ/B÷¼}õ}ªïu¸u¥¢OÒ§èÓôú,}±:gPÇ ƒºÒ„^®NÔ•–´§:OPWÚ NØÕKûh? ƒtˆV¯öÕtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtKM·ÔtK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ÔrK-·ô(n# =AOÆ¿xÅgÃÐç;¡Õ÷o?Š“Ä߯I*Mè¥êmâ3`¥Íéåê½â„•–¡Çæ›lþ±ùÇæ›lþ±ùÇæ›lþ±ùÇæ›lþ±ùÍlþcó›ÿØüÇæ?6ÿ±ùÍlþcó›ÿØüÇæ?6ÿ±ùmÿOÒöÿ$mÏ}´=÷ÑöÜGÛsmÏ}´=÷ÑöÜGÛsmÏ}´=÷ÑöÜGÛsmÏ}´=÷ÑöÜGÛsmÏ}´=÷Ñö?!mÿÒö?!mÏ}´=÷ÑöÜGÛsmÏ}´=÷ÑöÜGÛsmÏ}´=÷ÑöÜGÛsmÏ}´=÷ÑöÜG»zîãÿ•ü«xœ H" PÓÉÌÊ´ÚÖÌ>k~¦Ó>¦¦fægfuœÇ)""ŽˆX–EBdYB$–ˆXB$b‰ˆˆ%âˆc ‰8"B$ŽC$"Žˆ8"""äXâŽã=‡3ù¿$§X&,#ÊÖ¸®Ž;ËMp¸ž‚çà½ç}âíðrH0 ì—åêò™ò#¾oæ‡ùü— _ÅIE±âU D›‚§J¨’®ŒTþ*¬® /ªª¢U™ê†êéê“QM¤fWÔ(Ú­å×þR{#VˆçÄGâ¬Ä"A$´ä»ä¶¯;¬7Ô;êÿ¬ÿ«þ¡áCÃÅå›õFNãRãé[ðíªT!MKÏšêš|MóM™¦œL(ÓɦeiÙ¡ìªloÞi¾’ƒòuy±EÞ2ѲÐrÑŠ´þÖzצmK´åÛíí™wÀ»O ¹â¢ƒìXèÈu<)ʈò›òwUÊ©SETIÕŽ*«zPËÔ¸zQýU½©>Ußj¤\óQ“Òj.AØ úÀq0f;ùlç—Î"@b¨J@»Ðt CÏ0VÂÌÀSp^„Sðœ³p¾Õòµjí„vU»§ÍvA]ñ®¼Nª‹éžõR½M?£_Ð'õÏÝñîמXÏIÏk/Óû£ê‹÷ †ˆáÀPìoïöo÷ßEFÆ8o<4 MFa ›¢¦´éÒôdæ˜s̼i>5?( Ÿò§eÝR° ¬J+aYÏ­O6ƶbÛ°Ý bƒ™Á‡Á’k¯¶·ÚWí[öç¡ÔÐæÐžCæ;®‡åÃöáíá'×)r&)çÖö÷Î’ pÑ®IWØ5–]i7×=뎸Ýÿxøž:áùâùêùæùîù(bA$Ž,#iä É#×È=RBTŒ2è:‡FÑzŽÑ[ô }Å&°Y,‚-`I,…mpGD#²‘ÀŸ'ñ%| ßÄñŸ^·ÁÛîÕzÍÞ à‹øJ~Àïóù§ý[þ}ÆF(a!‚&&‰0qJ\WÄßÄK`"0ˆÉ@*°ØdH‚'gÈägr‰\#ÉY oÈGª›²QÅPSÔ¥Ô µNÝS% Å´œií }ô}BÿA_ÒwA_p,8ü̯ƒ÷Á0bF΀Ì1“c Ì óÈü Bth2͇â¡åP:´ÍŠY9 ²ïٻȮ²ìѨhû£^@ Ѓ(8w“xœíWÍnÇîµeJ¤$Û EI€ A s‡»ÔÒ2ÄC@ɱ!)’"Ÿ¸w¦w¦±3Óãîž/¯”[^ §ÖÕ¨4s|'Þåƒá°_‡|²ànñ\ç¢LøI=ö”~ªdrªb¾“9W=Üßoš&r‹ŠE±.öwy£\Æ_H+Í\&ü‰.* É·O„ÓÛ?Q±,-lÕe" w™ä/Ç'üY%K:Øã¯¥±J—| —mlTåldUi“î?{r² 6Á4ü«’?ÊE<;‘«ø ‚,Ž …“£å~¿³„Y½z1ê¢At>¾eˆû“m Þ"BtØ|Ö|zÄ;8ä ;…,q®K\äÊrÁ‰,„™q=½æè¢·ËA˜'¢à¯agµi§4ªf:wq&ç+÷VÂF¡ëJNE,ùT*_ðDZ•–P€€JST RIù“P·söï#a¾ÆŠ¦?Xà`Ëœeç/$„Á·½m4 é*›AEÇ RÒrˆ8FN뢛@‚µUiÅ)±.§€/ÀòÊ(m”å‡\9n3]ç ·RüëZ9`"€_Ú ¬–n›7¯¶>¥‰N€õ$ŸÔŽ7¤™([åbá‚åT•"Ç*g×µsaRHתSi#þ à‰sa­Šá|et¥ Ô£´{|;×pbŸ+«&¹l­«JšX`¤j.i-—ÎI3Õ¦°¾PJž SèráË’Ë‘‰ø±# ‹-çTV0L›K9[U=×zÆ 1|å\%€)PŽŠ7žás‰÷ºN3Ä8Ñ`©ÔŽOužëB‚’ÔÆaøN<VªotMJ¤*·ÄÿeŠÖçLîŸJ™cEáкpÙà­r‰uFÃ\êØÕF"˜ $ëÄ rÌX×¶”j2.‰ª8àF{!gâ=æš)ÈKŽïP_á©Læ†qyícõxp0|ÂÓ|Qe&®”ª”¼‘8l­oZEsÁáK@ÎBù&–Q×ßMÀü´©ôaë1ÌìTßÞåH#ó¯_¬:F9~~Ì«L—Ò‘3,aLóŠC'p1‡jäá´0×8C:_6¦á¸..ƒ='.Tøx]¼É3Í*¶`†)–²Œ9ÆÙ‹Ù.\Ø€ áÓÒ!¬Mà,‡S ö4s&XÉX9a5›Áe§p÷9|+&a¯1ÙDÛ|=dûðiè‘¥ªc)‚Óšp#h@Û&g/Àš…Ãæd—³'p®¤xŸ‚n«œmCÖ4HE…Þ%œ³A«&/h‡“eÔzÉÆp’³g žíÚ^·°+¯IÛº¦³Cð4<'C zˆjkΠFW «)ì?/'¥ÓGí¯Šì>"Lb@u kî0’8ïkqªpÒYýþúGËZ±W€êNŽÈfD¶áóö¿;"¾fómDº1ò¥¿>\?£ïOiý|>äÁï4TÀvñîY¶'9ÜYøTmW¬}WCY Ÿ«³9º²Æeݱâù1¬"›9T§¡8ðÓ^ëë¬8B¹¯À2Fâ蔄þ8/{KÚ¾ƒÔ2käû”ðÃ>˜’gd)öxB‡S¡ ½ãY°êšN¡—|•(%ûÎÊÐoï¦9a¾¤Ü1ꪨ¢lk™êðLA{˜gÛŠºú»|çeª‹µ>¥¼#ê Ïž˜NYÚƒ}ÄJÂ&ôæ¼G;"¬Å9ÞyÜç¿‚Ür²»ÎÑ4+Š# Dk˜ÓœöÛs¨ë)C“Ëv­£ùÄ©'hc[¬u ÚH—œ‰h¸Ž»|ÊaEˆ¥EÓ’¾¤©v¶×sð§igß,ðWRöIàÃÊ“ŸíŠÏÁv¦4„Äåó¡¾’k÷‘?)Í4Ïcœq>¦’úƒŠ9ÅÝ”|—Ô“éDl„érÃR(˜†<­&PBuTÛ™ÿg«h×ê¼Ê^Pl’êÕö¨·Ò„Ê¢µ½ÀoXµª¡£˜¼ž¿« ëš2i9ÜÖņSøéYäÖXÓÖXS•¤aô*;Sµ}ƒQdÝ:¯æ}[×”"Á|ýúÙ9¾Óy^µ¶2B¤Z¢ñŸxŽívf¬¦J`݇Lðó ÅšÓS, ÕlÏ”TIYÉå›­Ÿ:mäªó¾`;¾ œ³¡ûý4AÝáÒo÷ ¤ûlò3?MAóËsÿ0h=ïÙyðz™.iÛ|yÿUÈ÷z‹åžXÇÄŸŒø%ô[Ÿš+¯Þ¯xx&àuzC,çá4øjë‡ –†ùøï¼M{ëšXrõ·Á«þž¸º‡ÿÿ6úoømô¯hžý¥pÁo—ÒV¤xœmSxe~_H/Ð*¨ˆDWï.É¥´#…b¡Ô ŽzM®éAšÔKB€[Ü¢(nqï½(Šóqï½÷ãÀûrgÄØûaÔ2„+G²Ë~ì/0ŽPÏ͸9·à@ÂcÜ’[qkæ6Âm¹·çÊaܑùGpgî‘ܕ£ð8wãhîŽñ8ˆ{pO4p/¼† ÜY©b¦†É¸·c ¾ÁT4áü‰EÔÑÌ0#ŒÒ`Œ•܇ûr?Êý1 ÓqǸ —£†cpÀëxoð@V±š5x“µx›q¼Ë:¼Å±xïqë9ž±—"€°†b ¬À=¸wán<…•xâ܈hÁg8›ã܉jœÊ ¡Æà@‰£ÐóñNÁBTÁD9ÊÐ#pãt,À܈ NÂɸ aKl…­1Û`[lÏFNâÁœŒ‡0ßâ;܇q®Ã0 Çœ‡óq5§p*ÎÅEXŠkp .Æý¸¿²‰‡ C°úà*ü€ñ=~ÆOø˱ŠÍ<”Ó8‡ñpÁ#Ù£h²• &iaœ‰›ñ Vã´²)$ðÛ±6gp&žƒ…§™Æ³ì`†Yvâ7M‡9¼€5XË<žg³ØÅٜù<†Çò8ÎCÇz¬Ã‹¸)žÀ9Ÿ xOæ)<q!q1Oëx¯ðt´ó žÉ³x6Ïá¹\Âóð%Ïǃ¼ŸðBÌ€™è@šñbdð7/!x)ŽF'.EŽ—ñr.ã¼’Ëy~çÕ¼†×¢€ÂGùç|’Oq%Wq5Ÿæ3|–ÏáxÌà 8‘k¸–Ïó®ã‹|‰ëñ_Æ|ŸòU¾†e¸¯ »Âý«²©lÆšŒ{c`\«éë½…Ò0l¦w<“ L•ý&?Øô¿'‹ßÝÁÉšù åK´TDb•½ÛZÚÜITШ€! ±ÑJŒ µÌeµØ2Õ„bÅŠ!C(†PbB‰‰~Lôcˆ…‹ i™Jt,jOØN¢ÐÑ–¶fÚå ¶邦ýf¦ËʤÍLÒN(iïÔV&U–ÎfR9%_Üä]f EUÝĪV!  hº@X ,üÛU ¸Õªa‰ ëJ‡™)w0D/"RQ‰'bKÓ+ÃÞXUéÕ5:-ÇÎ&V&o9V2/8Ù@‹&¹5-"0úZ¹¼Ýaæ­¤05UÕiÛ1•N+gåÍ➦Féd6/‹ºX]Ë3TM@Ui¾&Íפùš4_s›ßà2•BÞN'-ÿ2Õ«¼ã)U­Ž5Ë Õô´½¼Æ 7R¶RëÝšÚâ­QâÅU0îSãEjy¼;<4¶Del÷v1§ÛüиžÏJ½WN½¯UïiÕw“zÕï‰÷4ô¨Ò`& yË_V+‹Ke¢WYq7 6úªEÕþí…LÊt i³W&{”É%”HT™RÜ MùOJM)S{"5µÊcD5¥É;A“Ÿ«©˜«¬É±3©þMelN9æ,KiöR4÷¤èÛœ´Ý«‘³s¡i%½™^ ,ŸÞÓAÓObzn%JúœèqËò µü`Ëë©ÕhÑcQC J•Ȥþk—ÞËž¡ØÞm_ÑöÒÏØÄCI—úR£d¼&gJ}‰³¾NÖó%»Q—â”úb(9¯i¹M|©ôÇjß÷„åA…5C &à>×°^!  h§‡"ÂÐ…!o?¬ #,Œ°,øÕßš9CvÝ›L€iŸ©ê%Ò€êÖíï墎u‚ÃõÜÊ•KŸdÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿß÷2-ÿ“4Àú‰Xû\ü+S NÉ1õ>„(¢˜¢(‰£$ÍÊ£"¦@™+)KB™*(x‡´TR®Ug\xg¬Šjõàyh°fæ#*¨…Ú.è‘ë‡,ΨÛNbÐtÁ?ˆ<ð(§n¿‘cCjxÄÚ ò0WãD¡3ÉÆŠ3šðHyFek¨æ â +¥6ŠiÇF ©YŒæ4°Ò‚-yÙ Ã9rÔCÞ$ZQÁGõ¹ò–”6˜‡¹BMúÈAõ f^7)È&Ô™ŽÉ1ç””<0%ç˜ žùÍ,q^iPv¡y¾EÉQH¹R±m˦Pëí|à©Î>Þ\ÔAìäYÅÆ®5w÷ž½[à>¹íg|€å²‚|ÛL1jÔñ u¼‡T“²eŸ%‡ ÚXÈ`®P “véˆ*8z츞€Ë„2Ý`{p@……›E¿| ¡2(Ý×Åó;ª`ËYP7®ZÖ*Ø—]´“Hº{O|rAduÖà «ÕÝ—ÿ ¥ðá«Íxªè¡ÁR–‹ô[õž°§h%/á™üç/Ä¿4¨î2Ž-Ýà•ü×òw[‹$m¿1²êE¼ï.ˆK»VøÐ|V˜õñ;J¶ÔÊ¢…âªaÒïMvÊóð†AÙN*á˜ÉºŒ¼±ZyÎ^ W:°Á*ÌÓ+æ,ÆæûÊp=ÓåQÛÛ #£˜J@ .±F±qÕNtÍ´&ÿªü{!ü ür}ûJ•èíA:Â|ëÃ1ðÔ  g±G§-¼GlÍàÑgV6H¤æ…Øwg #™„±4³}Æðë7]n®ñç䟓Ž©¬®…ïòÈ#q›é[Áð êKêxøÓ–·ësÃ_gUú9É ºû[•¥z3 3¯*6AÛÙ–XØÕƱ½ÕÜð¯¿ÅÿC§ÕNÕîÃåÿgm³Õÿ Žƒ[(±$$÷ïDçm ·åg’{à¼>òÝ0!‡”œ<Ç–àtúUÈ#ãz$ÒJOº–ڰμÄçÏgõçÜ[U© B·L÷È,¨vÁéÎŽÓ³òÀào?3@Ûì8±‹ 1°pFĦÎA‹è6£f&SÌeâ¬bç’1XÂâýá¢þØïŸ¶öýoÄ5YK‚¹—è!2 …*¢¡Ï<ÿÛêçãÿE}m:[ÉFéfë;F¬ &™ ×AeL´k¡Åh”AB¸xá÷r¡­@™¯=+ÿÏ~|û½îøC·¥Á,†.b¬Y´Èëáøfëµ¢Lœæ’1èâc-*…Ýý¿¨SÔÓU‡‰À¹­+s *""²d "S7[¦"ÈRAd¸æN-WÙ´r5¦¥íùõÕ÷í¯¾±ÿwÿe?šPóºÛœúªä!_¿{m»ñ!€`4 ¬D‡4[™œð„¿öïY˜»úT-_÷Rã› CñÀF°ª€™^çë0•@÷OF®«!€òöÿsÀö"ñcÁ@§oL°9@{' ·ÇšfËõcØòI±§¨Ch‘¼ꓺËßTýi}ç©K=u²;q¯€D>*1îgh]Õ>æ‚WâžðjEù)d‡0K”êýðÛz¡ò*ø7™jZ]Z2O¢Ñ$ôs…3…3åļIÛŸ¶~Ãä-fïKrÉÊ+¢Ñêw˜x}›££¤÷É"KÍìÔ´Pµ@sf",áþÊ Ñ8U ”ÿ¾´üÖŒ¹fº2]¸$zËKv÷ÿ.ä#°`L¯–øçÿáOvŸL·"$‚5´ àSMû0iX‰ð° [ÖÅö¬r¯eL£¾PÒ‘ÒxäF¦P¥5\z¨ýôf¯6±~DùÛ+¤å·'íK°Ù4USEƒC˜¦‹C'ä>Gh¤i›¤7ßHÀ  63´42»UôÕ|ÂÎ †×ôl3j2^Xèí68‡ ïá—!(&·<ZW¡ÔËiaŒpx6°6hnà˜ÝÛ¦‰ȨE?H"1ÒÞÖˆÏóÍ­hjw°±Ts1vûû‚ÆæŸS_BýÏW˜Á7L6””±ÞvBKx©²ýbv.Ü€ýPÿW›µ¢%0¢v¸¯œö´ïwý ÕIg’aÅ)ø¸V_#dšñ؈{CϪª#’‘ÒÁ v‹ª—¾MƇÍÄH6½Ð2’'ßÀb,oñ½§neQ¤÷{ÁCǼiwÜ{ººZ\‚%þ»Êê ¨8¥*!ÎÐ2Ysª¸¼ÎÁ[˜ô”x@‹7ñu÷éñj5L6Çeq¿‡P å©.–ÒR-?çµµt©¬QÑN1Èš íö/u™Ý®Êä}!dù”«TÕÓÀJ›IõPrs~¸ßZ¡_ï6¯àqóHkdÜ›¯jÙoU>¤`ÆùW•_wÑ1-–D¤<ëd,ìkB=Em«Ù'ë@! [£"~âò¾ùߪ:†ãˆšq½ˆ‹ãây‘•ºÁívœŠú …dXhH§&¦âí ‹¸(ÙÙøˆUŠá™äÖ.…GYsuy}AB]N0<ÑŠ÷‹´ÈÃh[+ ÊÏb>”ÑB©›ÄÜ-dz '‡joqi‰í12™]ÆÛk<Ê ,=8 /ÉC›ŒmqF,<0¼6õú@£PŒ{Ð|Ò-Ö‚­úü’4”5MKOCµê&¤ê *²B«´¶"-Ôê¯öø*Ð0-+Gó3³.1+S³79;=å'hh!'ÿïR¥>Éceüt ³®ˆ®Èe* ‚ê¸HÊZêZc¢» ½ ƒ†y¨…‰a»iaQi*ÛZí;¤=r[àšúùf$ý™›F²âŒ%/ë‘d'c;/"ëD!Ç›§À"{‰=f9dÉÙ=;Yt–¼ìÝCÀê¸#¨®h¯¼¦»Š -·nˆ‡ÿ_ößlíéUÈ'Ü;òÉn¢ô·j5áü¢æÑä¦Ié’kr‘“Ž’Uä&7¹‰Ñ8P›•Ej©RÓ6Ë»³UrÞmŽd‚xI‚C‚•È5M Àÿ÷÷¼ê?³ š Ûw•vVšy@™bÙ ,ýøÊ¢ªq&‘ @v¶ºÐ¼Dª‡vt_ضhm¡@Çóÿ¤6Ê\k&”¹YhBÿ,àR¹Ž,T4¦v’W!lî´¬- WHJ×÷[ŤFJ"5þ›!âHh»í0ÉÁÿÿß®Aqù2kÌe¯wÇûã6ÿœV\<–`` <Žð}ëDÅx>:¡â¾gKù€xnŽ\ª,e{@ô©,²ÊN‹ÀÿÛò CV)°FW/åèi°õ\°5é«Ei»}6w€ÂÐWP`€h: ‰%Þ2›Ð…òº PMdîÖç$0¾Ò$ÈeYÜÒ}•ÿRá¿™Vz¿f³zÓo¦0BaÜɘ¢×€9ßÏê²?»×”²„PBÁa„bÂgr÷ïAl†Æ>~c¬tn‘#ºD.ÕþŽSPLûó·¬P•OH^L°•vÿn™ß%Áôáä“B0Æ!¼Æ4Mx4Cæ÷m²{ˆg ¦6ªáºW`n%]Ä¢M(”Ér«¨ÙËJÏÿÞ¡\x&Ç?K)!ˆHp˜:•O- ¹±‰ÿuÑÐÄh6 t,ˆíð·pÙKùìsR€~WKy!@|_}nàõᢊb 4Z·é/‹,‰Ðz+/Я`D¼W@{PŸžœà="áeñ‘Ùl7 î¹öT¯äí ¯=Îs~ã䪼9}в}R}ÄÏòT¿K~wý¡$£ÿaÿ×ÓM¤yÀÏ ,(5m!NÏÆ|7$&DòÁC’!È¢$ß 9º*ô`’¡ãsÉ}&”º3¬4¬>ìjØ&/ ­Â`°{áÑáËÃÎHšð—à <€Ã6IBxœý …ÆÀSá&Ï"ªGʈQÄâºUü3¢1Ñ>Ðñˆ—aÎ üÚgÄhÄ_¨@ •Ž’¢ì¨ZÔvÔnÔ7¨ë袤Øzì*¶bk¼"USùš»]©CLíV—;31ëMïfœ8ÒPÖéㆅ3íÊ–Œ Æ©ûÐYïQ’•¾m¶!Ó”,0]Y|ú›ïÇþ­\. pNzK3I,Ï£-¡sM„û™x,ÉxÐ:È¡˜×Kƒ\æ¡Ã«í²4‹ææíP'LéQþÖR2QäB:¦¶Ëàn¿ˆG b*Ë4N™°4vê¤iq¡…_n`¡â.CºÔ‰¨I ,.ñô¹vÄä™åy„EÐvfhé6ÈF’/Ú%ߥ3ö£ãI}¾12mXt˜ÙЄ,d~ca¶ÑPiÉÚ¸±­®4÷Ž@2=ÛÂß‹™ohìŸÁÀ±;NÚ1òÒ(Ûã6†{âÑ¥n›ç^á=–ß¡[õc á¿´„ºðþp%š“$E3,Ç ¢$+ªF«Ó×1c^_kØnƒ ÚÑ ÓÊíÕ³…§¥þMƒGÈ1òdaEÛVœ%ª¥‹*ÓzùÒÖŒQ1NeR¥ZZuê5hŒ¦Å5ŸÖÂF+Kí¬u,IÝ— S7ÒýBÂ"¢bÝÄâ ;JäŒÄ›_4ñ$J’,Eª´,ýJW¹:®ÙÞÕòåy[E\€ÒµhݶtšŸ2}&© 3¹ Ñ©®ùÜœæB@:¼8—ÙPÖÅu¬¶ínx#)&‡˜Z—- ^BñÞ'A”dEí6îÄ•Itâ{ƒ1 «²˜°ç&çUa5ŒàK%í¼ŸxêŒþhþðÜ8Á•î1Êïnæ³ ¿AìAÒ€?dâz)€À NVÍÒ‹O×-FÌ´ØÛá½g(ªˆå¥>È”¶÷þxäꇻ]zäí~â'›:¼™^ÿÕûÃÝúñÚïúÿp)¡M’ˆDª„RXàªAªD¡‡ TV5Ž(Hà€Œ ¬lC]ÉN\zQ’U£Õé F“Ùò¬»I.d-“©¢¦²j uÙ £)¡[5\g2\ø¥ÍÄ1È5ä¹ëžûxXJ)e¯|?!™¦f-m :êò°kwt1ãÔÐyŸÈ„¸ÛBÆÖÏþ7ýœ¯|íßún£¸Çš×–Jë|'zksŠ/ÔVu:N§Ó-ŸËÐ=B§Wµªµ SšàäÏAõ%A ß&Pë-Y||ãWMA±ä#eR-;Ó|Vvð,×j×­Ñêô£Él±Úì§Ë@B0‚bxˆÍ%EÇSž¡5tÎù§)Wç¶]Æž¬ÏšÎÐ9žÂí|jÞÖ^:™û .Ù§›gRÈËëALãæià‹ù\æVjþ5ë®nM5®3‘ŽzÆì圊”&£K2¤·Îw]}ç“nÔó‹U ‚ÎE°ÇÆ MåÀ’æ7ê‚fSj‰"¥õ,´%•]ˤ ¡™Û{Ö·{xÎnþ6ùÛj¤Ï)Âôµ0\8·Œ¶o›Ã;±<‚)_GWmòk…24;7¹4ÂåÞ ¾õäkZÑ–•N,þå+õ±à7º ÿV›'½m»öå=ËVážGWýü J¤.,ÔO¶U$¾žíc•&0i+š–.6Cš¨ËÄgÏÑRAÝ]êšY‚päðý2éΩ²Áä P ÏKßÌB„›¥x.ó “ôqh‹¤ó‘ Hq£<Jk›„Û&ºâW#F¾ Å¢àá4’`RØd2VDÕ…Q˜XÁÖ‹ƒ»äÙ&[/þ‚Ì¥èüæZ›bªS3¡Bûåë6ªÚØ]r|˜8Obä$©£dN‘;Fa#¥½TÙ…]§6Ÿá¡[/³”s`—ªìoûÃβtµ©Øæ|è£ø8Ó>ñéßÿy­´Ì2ð><Aò|œÿN$B0‚b8AR4Ãr¼ J²¢j´:½Áh2[löËw<Ó;Ô¶°öñù:vt!²1€K]H«q㤿„„EDÅLš2ïð9çC¹{?o¾ø 2ŽGghó#I˜€ I™‘Á‘³ `êèÿv$íõ²O•V¿’D5(¥u,û¦`"¡SNàˆH„¯åEÇÌb„#ˆ#}_Ùpò‘ð~D’Âp™GQ¹b4fe™Ã!€peiÔH8_FLñ´>Gê×߀ž¹gú·õ\³Cõ/ûBߴо£rðX'®W@ªÜú 4 ¨Aµ­C­ŽØ7åÓuÔÊDôV©Ô…)楜à p%i´F‹e2¬Cªâ0²‡/‘•ÊQàÇfŠ‚ÒRZ JP¨õèAðÄa}DÜüƒ‚‚Âéb%³y964Nÿ,\β!¡“@õ@¤Hmݧ…VçÛ“ ú¦:XTd ˜]–Il!8[›_à2dÎÈA¦Öõº]‰B’T2í¨Köꙵ9»T ½ ¤iWK( CÍE½/NOd{½ŠCõ‰O?ÿçµÓ33Špžöö™I¶ÚTk³z}·èŽâ|hÄ"SàR:¤1t C&½thzóívˆZÄ’ G¢mé¦Ï€æoÄCãDÙ^Ò¸ü‡Î„`Åp‚¤h†åxA”dEÕhuzƒÑd¶ÔõØVöõ°éÈÉÙÅÕÍÝÃsñ¾áãÀÏ‚û ?éïú¿ÍI`¥´ÖZk)©yjïq¿Âç<Ý´ÌCîýÃî:zƒÑd¶Xmv‡Óåñ@Rt<å0Ûx®9ßa‚ˆ³ É;ŠÛÓr ˜Ñiýž}S3CÛ—bfÆØ™¨ó$ü@"H’‰&LÁÀg¨â ÷숎ö½Ü{&c58y$¿n¼ÚBN–&6Ò»p6Á¨ØC"T†#bèr§EG.t.öA)Û¼&¼8#çvŒgÛeÓ‹šíB“[éðS|…bßSNLå¥ç—ñ‚a t…Ï"–aÝ´ç¥SŒ``áØ…Ón#qçã¤ýˆ¼~”‰ø/!Ù„`Åp‚¤h†åxA”dEÕhuzƒÑd¶ØìãÀ;:9»¸º¹{xžönü|龿²,ƒq~JqeN d)¨bppñð)©¨iheɦ VŠÛU €D¤¦e;Áú…²p¢ñøã7þøÿ,ÙhuzƒÑd¶Xmv‡ÓåϺ¯æ}–và)*š…a7ÀrF|°íS\þñûJ¢Ê$:ìÞ` ©uR0˜Íìäéëº}÷%ÔìÄ!ó”c).!)%M芴{¥×7è¾YEQPitF˜¦‚c±9Üðžë „÷ˆ­x,ó•ݳ’É {ªÃåñBQÄX"Ì:W(‚Ã}|ù^û‡·âZ†q<Çi±èVB·ãíôÀÎ#ž¡¼p¹‰°uezßA\…¿æºѳ¸o0˜«ÿ‘—LDDd±Ú<ÂBJ[”µäÐú²F@—c(ÇãxÎU+† *8 nðÜuÏ}<ôÈãã¡Mnm6Ïh¼ˆËMJ[Wf³rò ŠJʪjêšZÚcçâiÔXDH*"Ý0-Û 6$”…«†¨ÖhuzƒÑd¶Xmv‡ÓåÑ-):žm_Íû󺃹“u¼W‰GV4/ñ?6?uÜkgAÚ¢¬%‡ƒ°ÀùI®IAw–wXí¸™·…ÌG¡v50Ãå¹ëžûxè‘ÇáãÖeŸÛÍ•“WPTRVUS×ÐÔÒÖéŸzWB¦PitF˜|ÁбØî׊üü1|ßß8êØTìspy|PSk´:½Áh2[¬6»Ãérà #(†‡ÈKоñï¥ –ôW"Ó8kŸvC†­5j‚È4‰~žOßì˜ã¶@œ À!~H§çwfqlâîŸí¤é'˜GY(i‹²–ZÏ• é­P°À¹Aò Ð]ŒåÆuW{Ò­Nu;ÊhU5õ¹)ÔeMÐG5 LáÀ¹zû™… E”ñªjê!)Ôlli©jÀ™šËs×=÷=ðÐ#ÃG j×´þ…/±9ÔLd–œ¼‚¢’òiÇ;÷‰N2¸S}UìZ¨khjiëD¯ï¬oеYEQPit!A1›ÃýÀȇÌ1ë±mÛˆ%  (°mÛvQ¸X[ð³”Ð ë)_˸µ,p"H«(t—áÄp‡42MyM%©]ÍÃì®û)¯aL\BRJšðѵ7ß•)T& !A1›Ã o´/^þÖ4ødÿãÍËÂÒÊ:7ekg>òØ“Ã~h‰¾ŠÇ:‰®§Ì¨(ÖczCï=rxc˜%R¢‹%kŸêÜÛ_å-S¬r VxåûYˆf':Z©cµ:Ó3°.7ùm]Q]×i^%YQ»­Ýu&Ññî Æ€æÊbJ>[úïòw_sÓÜvÚšBÐëF3›9ÈR0¶Xë¡ "àð¶“”T.¸qþüEB±É¿IÆÿÎoš‡— XgÓg§/·+îëîϧG=è1GïmnkÞS›ÿŽOëäGÚ2òH16zºÉûFF„S®JÕØSÜ„«ºÇoý´‹|ÃøƒŸ÷›Uà*ý»C?¯‚ò KâÍhAh¹ÖíLyM5µª†0Ó鼉ÕxoF™?ü|ß®ê\¥J•`0 ƒÁ`zzzzUªvU¹+5™B¥Ñi&B0‚b,v:¸uÛ|ä‚¥•µ­]<<ü‘Çž¼-Zï ~ÀªNkÙõDÄ×|ºîèiÖÏúTг½|>¼ÈÃûÎÌûjðîÉØÄÙr'*Â?Å://oÜæåå‘¿-R;Ïttt”|¢\¶`±mÁiÚiÏ¢÷FMÕµ´Æ–Õ”K°K´%QÉzX§¸[ŸçõgE÷+#3&¸Íü«qï ÛêÞb:Ê»·Øôi¡}ˆÏ’Ma"RnÈY€Õ’+€,»Œ’°>í¹ ÿåˆíw8ªxÏ’Ve vµžs¢A³Û’ÑvP'»ÛìÑÏ¡‰±p @FP 'HŠfXŽDIVTV§7MfKì2ˆtôlÊxÄAjïGÙÕñDGµa£˜&Ì+ÂIÑa°ç;|FÁuì ÙµU7B=Zðè™—å;g¶ûÒp¸<¾@(Š8“H£JÔ­No0šÌ«ÍîpºÜ #(†‡ÈIŠŽgðå=ý‡Io!{T8àèÎL’ÌHžÕ+Qù®*©CqJÈòަDâ54žÍëæZHB§@ó¡?d!‰´JEV–QÌ^tÀùv$õöåËm•ocìã¼Ïšƒùl8ðCŽ?Â'$§pÓçç é ƒŸ!–È-»˜ñЗí+%ô™,¾'ýH~kü¥þþ§øW8Ù 9`¹|yÀÉ€ßA‘ÿë%äšçµÙ ::ˆ lü¶ƒ€‡/7£ÙÑ>ÀœŒÀ kŒÈ äÃEŒOLùÚC»ñ°’¨"E"SM® úG‘ؤâu«ßetôªr@.$â$6‰ÌЃM2ÏsQöø ‰?•£PYc4˜Xø[}ɺhÔ½âkI®×Ç5ÍÊÄQ°\¿—vª«£õM%,PÙb&þ6Û¢Q÷ŠQÒ‘÷#7);ƒÉ–-Û‡Ô“Œú¦*3öéÕÜ•)FcbáÈUc(&Ö%2@ÒH¥’½YÊeQr§Ò…¥P $ô¸Ëi§ê¡Îc°OPz\ˆo?s£èÆ{v˜X?sJqTFøÒJOéÀÂ.±l±8žNÔ Ùã´愌-q3ÊÚGãaâkIÄ’ #ÿᬠDʤêÂbêfD¦P‡¦^idcvt¡2ÇÀÄ 6nåñ•­ò™«èx'kBÉÄRJ2§K„€¸"“¡s¥@§ÜK•.ŒC-”"R™²eìêc4'e1ÏΦz”YNÄ쌲5÷•°AŸ²/î©àæò`œc†ÇB'c³ -q1ʼñaâ3“X‰…né4Fd™ãP‘0sd]ˆ¢(Š¢( V§”ØOÌØÓòŒÁ&>ŸD ˆ)`®JTõº0µÎ„g±›êRÂ1Å:âÚ(Ó» ¥z=±>²[¼„#ˆ)7òû°K²1KÐa6vo'ÀçÀ©?ÛÌû²Çû5§Ýwt½µÞ[aO'ð3?z°ÜÑ»‹™ò͹1%¢ã]wòµº¸†¡û<ã|ÿ¶¡ß7з½Y)NÅ¢¢a a¢a9‚-§·À†þŒËÐv„4j…,ôDÓÐÂiJÿ˜²}~´õhÙ­zø¦j„ûÒbÌ1Û’âŒæ†ã‡ñxÑ¢Ï Ü†‚1$ ë|ƒ¸qI Æì¹5 ù¿Ä5(ƒ%ƒzåé‰Ï[<‰ÃL¯¾õ_-@ÅæÒ;ÊPSJåŒØ÷½~³÷Œq+(Jø©ª ˆ ñHŠé ,~ÍM~© ™+ý)ÞQ—¿AÔÒõümk5ú^+ÅHëÐ~cH*öàfaYJQLFRм(“¼ªò÷VèÖKo¬‚55˜po¬XÏw³è zRŒW¨º¡ôŸ[PÞºöªù”zÊcç0°ÿc·dÖ²Ï|aÛZä®-BÕ6*gš[Ó0&V‘qPH±“€:ž ±¡Æ‹iþ`¼x^‡y®˜ˆr±4ÜtÙb%Ôàdܺ¤=~êÏ ¿™ž¶îÁF¹_F¦Êtàð'Ød4ùÃÞРõ JÎÓ7I WoøNS‰É’ì›QTÓpÊš«äŒÁ–F{ïèÙÍØ)©–AÂoë¬Õ“>MšSfH.ñž! §mjµªl1Þ­­ú%¼¾o('­ì¶1cBYæV;jÛ@GÍ0p0BBGxÙ6 ¾S+„YAò!Ýê)9á’ [Ô7q<]¸=+&>}Âûf(n0ƒrþ#N¨Àp¬\B5î‰Xiï~œ¶9n^…2ÑX‚|>€sqµàbžøÔµ¸ÆÄˆ¹­ÜX§Û—ø±TÕ°¾±CëFJB _¦ íw¡›ƒô¨4§¤së.m?%j1æÈ;¶Ò®…¾×Tbƒj¾…í¯õU>Ää\T¢E{Òèu™û ñ˜–_(¡qÓ±àgný…¬ ›„þH¼TeHië=_™C&Ê_h¾®0º'nŸ eaÃÖš>SÈj…¡ ;ðv¨†oðfTÞt `öNgþóI©ðÊ„:Å;5’§zµcS€2ÌÚç^Ñ›¢gϤΆñ¹ñ»/÷Ê>^ðŸI<¸™RIц©¹²í•€œÜÞ–GJgaBÿ6á.#Äø©`´?ò6Õ¢¯VPŒ÷6Õonsµ¿ŒLjC…´xß§u — ¥;br9ýXtÛ0IÙdiÆ—ýÇFÀ~.`ä6*ÐÅj¾úªâêô‚7t8šlwBÝÁ†êxêUÍM®Tÿ~½7ÎzãIiå>¬,ò|¡ù)CU¡C½ó™fŒšÓh!×iyv±¹s7µ9–½{¬þlêÒ†¾h£³ ›»i‚Áõpr¡Ü<BÀòZŽO¶:«®X|CÓ–‚¥/¤,ò³¾Ð3ÔÌ‘‘VÆÚ93A L‘1ã‚9 (®Ö‚w–<@£ö¬xòbÍ›/~–@ó‹ • ÁBà…Z'][™–ó’etßX ,xlÙpør«:——<«à­–ÏN#Z¡"VNJ*{¥Ê8¨PÅQuXŒµpêpT_a4h„Ó¤™“–€ ¢6œµCÒ‘dñ­.Š1\C1a]Mšâfw3x˜ÅÝ\zX×Tªçð² -BXäÂĨX޶NÝé ¬KwõÙˆB¬_ÑŠ©Z)Aþyf7„4D,—ÚHC[(pù[Pâ äꊜ¥Yg•R5Zt´Õ^£fvÒyWÜò¯‡ž{볟ž‹ ŠËjÕmPÁ„3¤È(°’¡ ™‘!9Òkò2ÁlÈ·+,oTÛt*©v”% "UI¨IF]ÊA† AA@:è° ãbNX TɈK¥]*Е²€îÈ*Ð2VŸx  x’áßá“߈NFÓEÿBFæ™u³³¬še`O/pÿ%ÂÄ©;Ôo£íÓаIóÓ§N8M¥ž†;ø÷êŒl ÈÑØÈMr*Q±'{;Œ}-`‡p #+U•{ÖøVÕèÐjÕÉ5!ñs²7Õ›íM÷fÒœBϵçŠT@ž.»tÐìçoQxýÜ$ý þá@.cɇ¶JèyÀK/»ç€—ÕpÇÒ’€VÀNÜæÿxð¼…ÆoÀ0—°¯š}_ ¹?€4 ¸žVëp ÝpÃÑÆ›8ò÷nZºíz |AéøßÁ¹»ž²`ؼ;îºçÿú/~~¦^{á ±‹4pœÜ„߸̡X°fÖ{œ‘¹ puDƆG¬å†Ê“7?þèØxøÔ®²Òj…Š•(U¡I‹VmÚu8IBî<ïКN½6HÆU¢X©re*óê~u4éIToými{ÛÚÑ®vv¬£Oê”+n¸éŒ£Î*wÙ%°î°½¤)í³·ÞyïV—ûâ‡ïöùª“°KyÈO)üà›_^u±YsF€eäm §À!‡›0jÌs?ûT~]3mF¨L§k«9}Ìé“5 GÙ2J6Kd„›°°1ãêÑgÐz[=³øÍvèsÚG…q}h x¨fëÌ)žOŽòüwÙx6vvüL»Þ?u²Ÿ”Ñ1çÈ ‘dÜdMåæ+ßù­@©JtS©„­ïùɹCëjZ0k.mäÑùÆ7#+'ïA¬9ážiRŸ%€}Òkƒxˆ·Ÿ$¹g°ÊV®>­gå+QªL¹ÊuçÛ ‡yz$/2Pªßƒ¼Íäc6_sÑÌç“ V Ñ/Ó a0Ã)4naº ·®ëåiCQ]Q¿ÉÍšQ‚¢m*ÎÖblmU)v—jOiöF€²¥ö•NÜ_”öw†Êt€£²–Ža¬U"¨ŠÃFʼn²Éâ;)ÇÏÞ6Íå9mµsòP*rA!UkÊ2ݨ½¡°øúlN@!×)+‰=\^ÞB¾»8ç¼£ù'ÿó/-î߀ã&âèÄâXÎdžÂ oÖý×Ùè/Ö`€sc[ø¯*ïû>Y«_xÙB¯oFžò¢HOÿÁÜÝS¸‡O™Ñm:ßê¡Æ}¯kÿùR¢áÆÖÎ{à  Í'šòy ÐV¼ ¦ù4uʛśÏÙ›wãÁvÞfÏÊ7'JˤÒ[Q/ Al[:‹LNƒwV±À»X(¤z€ë6×›¯g]»¼(˜kQ6ŒR ¸Í¥(‘cm+X¾Ê°šà4‰®,`˜LÞÚvvTK3F+%˜‹ Ô}])5¦üe‰>Ø5•Ô^çúʵ7%[iV>ä'Ö)Äpm-À¯LÐ2 æ²+£ÉhjÁ!8ÖHÀjnnþ1e b+³4Ûê<íˆÇä˜.\¤ËÞÐõ8axÞІ„±ªÅØR@YÉ Û–Èo ¨½÷ƒšµ9ýTÜ]-´ÌÛj½R¦Ÿ&÷Tû*{g¾\ö´®¸¢Î^€)áá“®pëÑTrÄ=!ø|~s`i‡v!´ºç­/òvÏè S—ØÀ¯öLëCðJ9õÀÆ:å&t•#ŒÄÍ ¸[”°¶0E-G ˜ëV„¢…-o²m˜Ä €µÛ«³¹ ä #Ð|/)˜`]„APTHÄ%S¬Še…DÎQ1Àš(PUHÔ5S¬u…DÏÑ0ÀêÐTHÌœ£¥íß8×,ÎZûع³5»v”zÙózažÉS]‹¥º%û¤û€‘WOõÌ–ê•,é=`æçÕS}³¥ú%K—þ½?y$ùÌé1.÷›:½4·“{;¸s> ¿)m_•š†¿ipóŸÎð_°¯~-æYä-é)_T×À<¬Ê$çÆ)p–ºÉ7;G÷»!CrÐ4^^—èxŽx¹z¶äà:¢DpVÿr0’µdn°òÕéí\7+[ÞB„SEˆÖ—¨J5ªTˆ­mÂTÅñhÐT%‡êÙ€å™n’¨b­,Bûìr·8()1Þ/"x;Kéìw1ÞÄ0r²qmÕݘ9b¤¢„`ã–­¹ D*j{$»|ÌHC¢/08[ñQù½ì(ÇqÒå’ÌnrY~utan'%Ö&gFR^6Ç5a|mœrŸL GI/9¨í˜³¿÷Ù74sc¿q|ºíöá÷áêùˆått%0ÑëÀŽ•±\Ö ¦¥¥æ(ªb²¢ª/Toiïó?Vãí« Ôôý¿õé4q©}±J ÑUY|1E%22ÅÌ®  ‚uk'æ0¼«`­1¤‰¡oÌÕªR—Ç^ý#}Å䱓ùî»TÎ\ØDìfó"ˆ¢ù^v8¥ÑoF¤+¯~»=B€VSsH ˆ•&_¶ä±‹ƒnAò·ñ̳ ËÈuÖñ»/®,Â…´€r‡õÊs°YW˜«ñ9¡£1’U—} uÓ‘hk¢Â®A£EÌ:G_&\—?“ ×'ó¹ÐÓc-†?BT"Ó35 Ió¬S‡¹¸äcà!ª¥~KPò’?Yýˆ¾(í:i±Á(«±­â(_éaÄ1Ð,¯¹<|ëÄ6½^lÕ‘ö’pWgI ê‘ÍR*=^×µ=€å!‚¢‘ðf k-K4)€°}†< Ê*YX@ áÂJ&Ï–ÆL»Õ;»ô_A„ÔøüÀ<¼1ÕDm™²"ȶ!ýl@ÈæYAΣ„ìÂÂeS*’»#iË Ç~›ÅV¨Ë„IâáµâYÄâȽùË“–Qá !‘Î5U+ô"¸Ê`ãËH?í ƒÆhÝŠµ¤ ŸL# +¨…¬‘ù1ÀM´=l ZûÙF©þ†v$FM¥I)/Ëœ|¬ÚYkª]‰ýlX!ž¾ ÷ÆŠ£Gb}µ ÷£YÑ!FŒ#BöañHÙn½^O6ý¶ºQ?Û-r¦Œú"Ü‚:Ñ)µ¿›SzXèu~7!wû?úßa,.„22–CA3‹$èŸzåJÒðJò@=º'UN%ÄY¨ùÌ¢¥_¸éÁ\Sö¡7M—ÊÓOw ÛT_4_D»Ú(¶ IæýHÓ[Z3ú!RÒHûW)>Ñš¡Vº•…$Ìâ¡!¥ÉrÚæãIëb*ƒ,OÂy˜ÇöÐxF¡k²¢*ÃÉÊ”VqµÑ¤WQE#M¥ØŸ¼•$´B_ád~`Ì@¦è £\_K0- D’ë«¡èÚgQ›™›=Ë£k³°h U(ƒ )t^‰·e ‘0~袷óo™+ˆ.)•Š–øÊ/‘Ðäâ? ¦ÝCM½j¢è`)åÞ›½ðÂ;+:om˜xR«S „ñ8èÆ+€ØQ‘Íýßb‹cjI~&>©‡9¸‰I ¯Â%í ’ô%£&k jFöÑoå!Eªf=aSޱ¸| ÍRÆõŒM÷cE=#Á6ÓFº%,#εÄîÑšM¹ÝQÚÆóhãš™šÚ‘…ýFïëDOo‚jºñÅâ“ç§±«lôŽ=r§D®\hí廩O{” tV>|‹´ÏÔ‰Ê,œƒúñdmùf´íwk Ê< 8PX¯èmŒ¬sÁY0!ŸöpPQ¡#äôâ×'AÇÚÚ Y„9ø›:mµO˜|wìK„èív_(K2Ͷ¡R'U­”Hƒ9]ÎHЦ™bv’eBÝ©¢f1zeâݘ0†2Ô_ÛðDÆD5”t vø?jUm=Ž;ú-}&á‡cÎ'ȰÿÚÍùq‰ŸümÖ–Ü#˜;¥ÉñŸ‰¯eUÏ´ÿD³¡Ê„©vR3ÃPùÌ î¤û YÀàÑ|±L­8ÅùÂÂàÅÙ.QKW¡ «/ ~£Ñ³wêÃOÛn;£&eÙ ¨É‚Û¦>5 =èÁib'Üì·GŠrnùBäaåÌ.46=çÝ]ÓÉ·Üû£m\‹íxmÁ{®93Õ0_:ASó-›–s$+4ªia2®Æ«8ìЄWÇ¢£ïöh€Ko¿©‚Œîývª1Ü„~RÍë?פÄÌ[†bÆñÂ4„ðäfóOcƒÓ£òÜÏ«œ/“Žã\;´Êôv,¨TÓÙûËgÚtp5_o–5S%^íh”긫0çð8;KknÉ …™TäZMëä¼».¡™‘ÔÕBXP‚á‚*ãPÑûÍ5/eP‡ð0ÙÃUü$zn—¸eöS¿ïžhÞŒÿ ÕǤۛ‡d‘e@¬MÀ$‹÷”æˆQ¹i»iEC¢Ì(ŒÀó9/jú\L¹ í3º¯‘öë)ZV%îçÓ µ·õùöUÑF Yr¦4`x†3³Ÿ's!\äQg´È<È55K!Œ<ºÏǨ¿X²´d CnÞùëˆHÚ?”õ;%%hÙm-Î-þarr¾!¡TÕxé ?Ɇ63'BBu¡êD… ¹­mHÇë-ÿu¤ÉÌËM·N)Àl@‘šó3]AJ‚w‘ûUC¿ÏJW%ÉtÙ8”î"öO§/×J²H% ¹dh±¿ºg-êÌýz ìª1qwA»7yh5|æ[–|ð$1Ê>«M˜Â‡žÏw˨>E”Ÿ$ÿô¸Õlšo]þÞ£hM<’ 2=¸f,)%ëX  »Î]qmªTg%ÃbšÜV€\,m€ˆ%ÉÙƒW·X«Ö·¤˜ï¼²£–?C–ÒzÝT‘ç¦Ôî“îð9–»„Ò²¢˜nsí>ë.h; 7¡RšðLˆäÁl¦«‡áI¡™d2"‰è ]5k8t¯ºb²DpÞÛéCw¹ð jaÙ{¿n†ƒ…1[ƒ 9tñ¼äeÌébuÌø9îSÇäz’Aì$¢iP .c ÜŒ3ÒŠ@Qv2O_… ‚ìh&+èØ@‹9È p¨kL¥ªóÀ¨„e£¼@"P' Sbà`!“dˆœ\ /w3d0Í0aî÷DBã• µÒ%y²2½,«‘š“Ió¤ PÙe¿ìƒæFZoÃk)yÓ7]Á!×ðc¿o ‹å¤ 4!F¶Í%æn!GŽ4‡¿ì‡¾{`šìš¹ôÐúTÈÝ+_ÛV¾°ƒYî¿}uÂÛØOô׋ã|ßTø>¸Bb{÷dµ¿iÙçrIž¨ÄB›:Y ¯4²bNÞƒæ‹èáþáAÏæ8}Ñ÷ml7ÕÍÐþô}Ý€s;æï%ÏqþIÞ5Ç7ðc×Î yÿãS üsjN4a`|{Äá 7Íþ®oM«føK‚Y”“+¶bÞvpÛ5©Øj±­T­Šœ‰«é+ñh&g_WõàÁ²=Àþ]èXÇÿ»CCUòº¹öÚ ›n;·©~îŸ%xâ9±hdWÙÅÄ*}¶’oT®ë>ñ>zꛄ_°FÛ°Dt­Ë7¨uWp&¸)øPYûfáý²ø $»ðV휟óân~Y¤ˆu#Ò(?P©Q!&2»˜¼ùM¼Û®ƒ‹–î.¹.ðirË-~éX»Íø“v±5f‰Î3S 9ï/9vÞ½6«f0å{ ñ§{MVõÎ4§©YÆœDø³î>¸„¬<÷§¹‰kYk»†‘{–kg÷Azá*Üß+âJq¸ÅŒ°Û!j8'®Úí1 ñ§evž%|8%OV¥çÒ?^«ÍMÉsÈK úþÇÎ5»LÌoá?ðY44Qömbm˜Ð”hS Êæ,Í 1Soþ#Œ+\­«[šþ]»ƒ9j,m(S Ã÷¢ÿhÇþX =tG+4!f»RuqKDC!ÈS!OuÌliÅYðC³0*‚ Y½†w³`ò Ò…µµÃÅ`ކ´d_è^xPÓÈsƒmÈ£1§ñGI9%¡‹ý‰rëµ±äÖç䑬Èoõ¿¨0ñÎÆè(ó$¼êÙ×7tµ¦F ¢‰ƒ3p>,a½å é. %!÷ Ÿþµ`Fß§j—*s'ä"g%úŽÍ^˜°HáÝG¿ƒ,[xn–J)/óθÓÙZõÅ™±~.]2XXàð%ŽR·û‘z2è~cð_Ûÿàç¬ÄɯTXû” L?ñå·åÜzÍn§ü¬ÁÝê¡Ùü$zÌA°~6¶÷®'º0Å(tözÔåbb 5y´¸z8­ýòÈd+kÖ .<´%"U¨ 4'àÎU…ͦú¾kÅy»w°ZͤI¼¤>C•áû"Ðc $üFþî#‹Fö”Ÿì]òÒ½v"÷÷yü{î#‹Ë#߆k&sÿÐ.Σ/Õ—6%jÜ«õÕCÉ7»òhƒº’¦D­{MNõPÊ ñÖth§Ë¡ªÛ–—ÅG|eÚ(ÖDƒPfR>îb–)w0úMzöB™Ñõ0"ÇvAlµ}}¬$æéêóÌÈÍÀæ_óMž•üæX¢¥¥ÅÉÛ2“gÿÞsô ¬´$‰»ñ ½hæï•±Å܇ôÐ~cæì–®+Î;T¿ôï«â}8gMÛ‰‘.luzÜ÷_Ţܓ¬B•¤Ç"2ß)¢Ø+’M]Ÿs8½±Z0lnR\Zöʰzeê¯Ñ‘X>b6%{«@³W%ÝuöØIS¥`¹¹yKýêÕØÏg(Çüô['Q—õà?]XTMˆÞÞ¥¹5ñW¬{:—g ¢%™1þ3M^DOÍŒ,äÑb¦Åmà¿4Ùw<±ÉÅŸ«ÓÑ—ä4Ï,Pc®‡N¿PhŽã«'Ž HJ1Nüot¦Œ/D "º•6SŠzTE޹ Š±œT;G¥«ÒV«Ï,\r9·3£ÝÀvÉèÞ‘ë½ð(ù—\Jz¾ÓYÄøVAµa‹ˆ6–"+®>·H±·Þ™œ–ô”‡aH?ýìîo^°« 2臩õudê\ª‹`ejRöòî­Òg²$”‚H“ ¸iL€}{’nÂ÷Bõ¤9ÂÚF³TfA˜ô} ¾‚©6±Ú4~òEB{Êo_÷ÌÚßÅ:g÷qh_w¢ µñM&`ÊÝ“}M ®3œ›=¶«¬>c„zSqÒ< f«ÿá™k™w qi\•@Þ—=qöÄ `˜t¤ÔMüÞÐag6¬…Kˆe·Ôá@×û=dòž˜×(ß·“8ŒŒRB.ìè”l?”äk`üÍ}¶Z`½•®¡û/ò÷êAúÔ‡¨œ}aûAžVöoì) ðb_©#ˆõ¢ùžÒ¬jv;RU°Ö”PÀ4Jì±îð„ c½M±3Ï`Uö$ 6N§ †T¶0¢ &[µ‰fsä\ÔÕ•tfâ„ny$ ±&5ËD‚ªâå9é ûñÍ>>eFÚ‰ ß4$ääÀ¥öax~^¦¨4”åJ²²r¶È”Ø&b6K ìù²8M!}yÉ\ýÔü#+ÝZoòZÚýƒìí`Ï}À=}‘¯.bT‡KòÓŠëë=úËw\q·–.—'«]õÛwCÃ×½½Nfš¸WÌ ‹Ô,mÁ³¶ŸÊCCNnØ>ðÖ×älÛd¶ÌÀ°FJî ÕW6}R¡PUR™˜7Ä ¹(SoÉ^/4$Ö„‘Ë.‚7Ýwz%³Ü¶£¼HÓw‘Ò<òvÉ­½Ö2+A"òÐ\"©9×Í/À—é¾jν¾ âøy)¢ÒÄÓù6§9༻Ûsõ?Ïßq°Èá¦æ öÛR¯õ5—gX~¹m~¼HÕÄ9Ü’µß¦ð qÉZ\Yz¹©ãº%5ªŸ-™È† úÁ3·`4¹ª*u«\u²{Ñ^CiA·ðÑ 3Þð£.ãÂÝÀ’Tß©ÜG%#ߺ[ªâ,O°oÌ? ß©´ÞVtyaHAD©ˆŒœE›²Ÿ¸céÕõ©ë]Òj逫¡Ý°œ™kèŠ7k…¬LAÃ@þ¯{ ÓÊ,Eæ( KíBÔj†ìfØäÜW?½¿ÌQË+`ôUÎGà{Ë—·+1S(íg~w1Aié.´¨Ïô¬ä·ÞG¯T>AjórÀÝMô-@¾¦|(\7Ðû´YB´Ô4(î¢n]õ}ç ÿ/7a¿=;dV_€”%jªß&«–÷ùµ”û/ -¿?P)æ¹IƬÞú^äÕ«~>àɾï PîS2—³ú}NvHC¯2Т\âõ ó-Ä­÷ЫOäË'õ.1H-ÚÁ‡µ#GF^½îû"ˆAú n|*ãòW¿× Ù ù‰è’´'ÌÝäâ^™vµ2WTÎÉta–‚íÚ¥|¿¯¬L‚}Aâ$¤t ß)&ñAï³Yáõtaîv€²¯CU ý;*ýé™–ÏB‹/À{ü_‚Y~.A¬ï-Á·/Mh÷ÏÊ¾ÉØ@›n|aë5Ù<º5à¿Ì°òÒÙ‰ÍN¥ØªÜØI¶Î _–ÀeîRšäs”ó‚JA:þl‹µ™¥Ž 7ûøÄVwLmÀ‹ß~†. ÇÃh$¡Ÿ ¶úãæâìŽ:}p=E¿¢kG¶þ ô¿ø` Ø74ªÈ-ÅF‘ÜEQXg1ù•dÅÎTLE‹9@_€ÅòŽyÖ3"ÓR€-—c3Ûh<6Àê±ÔñŸ;´t‹¹ìsúO¹cý\逃!Qâ/¾ §,ò(°`%Î÷‡•ú‹›”œ¥“D¹oÍ•¯Ë>FjÜÔÛÕ0++ÿ,oQ6º/åq×~µ§Uåž–=ûŸPð»ìŽ~5îÞxßé2~lj·™~!YêÑÄ]ðýˆf­¬\þçCXHD5RË\Î0â'˜Ÿ7ùÔç1ñA¨]–*|tåû7!„ÍÇWo]…\fÂ1æÌà裉v–è:ŠŠv ÒGp5¯ë t \`þál%3A]ý™½ZaMÃlÆ„Êÿ» yh“,}¸0N‘Zˆ—¦SÌHÿpReoBèäµH‚©ôœ„yrd@‘eýgnÂ/‘Wb¯Hçîk0ÿŒ„x}òœž“âJéÎLþ¸{ˆ©E–ÿFö šá ]4ÍÙŽæ¨8dOj˜ÚOÄËL$;“ jš˜§êü;Ø1×ÇŒ)$ £­´ìhC-RŠËŒÌæÙÑ‹ºËO¥·yÖÒª3SœIjQE…|eqH¿Ÿ’)ˆY=uJEŸÐlÐ&/m¨ß)ó¦VD>ÐûÎŒàÚÇ"| Aø¡%ìéǧqž>‡ªïÔŠ‹9’a±ñjg_´Ë.Ó®Q¡å¹K•|OÌ%Šã) gH;A§}äô8ûP‡º'ù¨}†”hã@ÔËàñ³ׯMÆ Yâ("ñ€’Éá‘$B°"ÔœzüH:&æû7 $É…`oû¿„L1°2U›uévrüûûÃ}zpï} ª'˜ÙŠ´»ptíœlÖÎËÛîìCÀŒ`€Ôýv"¾£!Þ|‡­*ëå짦wp´=¿è Ä߹؈›î;³« ý_¯cpÒDæÁ\Å[©”Öo¬(•p$®HWìiÏNÂp]Lšå± ÿŒ§åý+a¨y*×{¡°œ#wÿô "z*ÅA%D”8oPÄ% êgf)! p~òhiÕhZº‹+÷ å, œ¾% T S€ZÈ ÕśܜôÄJì}]ªCvèKvP_“êŒyeNª®NßP>÷zlËe[­y(2ˆ¹Ym½lFnLDsÍÉÂ Ò Ìz~ÕS^Ò4EPÛS.µ?gYµf:/W»ð4—Ú— tŸ•›é/VÞÈŒÙýË“2SòDi3ñãnâ÷ß‘’s“JñË^)³ÛÍÎY%ýd.¹ð·í@XWä°M*Ý9³×Ø–WÊœ§­‹;‘5RáíÔ,¤(módq"aÆÎ̪̪>“áJ“ó½Â\ªpê_‘_˜s zî¿ßÈ/ªÞ¤x)KhH¬ƒ ¹™…;«\¿øZÓœñ½Ì@É|‘v~zõÒœu¶üW ‘ýë:ÔÙàŸ¢Ù׺v@ø;Ñßp… –#)ÊAƒñJÖmØ›xÃÉÎ Pûd±±0k÷{_Äö;}ôâN¤Kp,€ñŽs£3<¼è›ÙdÙ8ík“Fdb¥T+>[•6à-O-I(„ådÌðɯ E3 xç¼–¶à'µÁÍ«p¦OÇã§ágÔ­Ÿ†xµjÁŸ<ÞK$ÒÿP|‹¯®àZ=òï8HóBH…«Òñ+òÐÏàB:(|G^½áûlžyÁÉŸý›Ñ"¯¥†xG°–¯ Óò+ŽçðH¯;µøØ1œ$‰ O Gÿ‚GcÊIî(¾=_”¨¨ÒÇ;ý¼)ݼÖÅš/Ô]þSPzÚºÓ†_Ù~®”ZIqEý]ÿ²¬: Ð?D@ô9xúmyMu±XeÛaµ‰Ê8òS·Rí ߪJZØŸH¼„”!æžå5h4ßE›¨­+HW‘c>'%±1×ì9 ûuk—%—\×&FÐj渾ù$p Žt¿ –¨Õ3È*D#¸ïÂt ãçÀñà?±ÕrÌu?Ï´BÕ)Þu£ï1º_MI¹Q.>ÁÁ¯7^CGØ; ßÛ•ªÚk9‘ƒ [žðóƒ(ìdw}àETÿͱ˜H@|:˜î‹þ üù÷M¡°Ý~ˆÑz²`,î»O?ø8àOCÚâøœJE¿ €Ì Û¦8'Ø/Ç@ÔŒ´ÁÔÛÛ!aL$Òéž^~{SËÑSÅ(«8~޳vK›sÕ†Ÿ1Z½VNŠÒf‰æ1ª¦C!˜á,:îñoHdÿ,óþœ°„¢Ì¤z2Œü¹ŽÂn¶FTJQeÆ”ˆrýN[7;üa ³ØY!'÷ú)•@Ôû3¿Ü—×ýì5üy3aíúàëÐZ^àŠ@,È;ìW<¿cd*îÏX ãZuYí™úý•àïëGÿ†¼ð%¥¤5}g|üÈ{Ö¿cÀ_€-‘ä ì)ù®Ø”×û PîT î)vuĺ…ò2#ºñ£)¹]1YçeæPâ¢û~3ôâ3áד'0»LOz–ÉåH1ÒÕá7’'ƒ'MTOz¶!žÃ•a¸«Üæ9ø‘K[¢ñý[Gð„ÑsñÑ[¯Œ®Åãûo‰&ŒîÇæ%Do=6€øÖ¤ N$¡wOj`Û´ŽÛoªŽOÑA+*/ÒiÉUxÜêûôo*c*²uÍñá5XрϖËü÷…CB¡QxßO˜RR¨˜EÂ<üœ8$)ÇÈÂÎCërtŸ{N*0b™ql :k ‹ý;…°“-‹%P*e¯–®º­‰Š¡+¢¸*­š¦x‘¦ÇÈÑÄÅüX¼žñÐò|€6t掟b7¾¡o*èKÂøŸ)öŸ˜Z-ëü0üÉ÷~õ6âûß]›úkÏ{üØÉû-gbØ÷´Ë‹å1û6‘q"–M$~p¢† ; “ì–óÈ=¾øq¸_^†¶U­_»ü]ùZú5ì“2Ÿ±ÄÔê\&@y9ün1z`ÿž6x£34§“EÒÍúÓ·`úNÆ)g|°uj!}W‚+åe2¹z;†f;†RÚ]„+J”hb‰wÙð|Ø:yH£4¢Ø(©Ã÷«{=Ù7­tUïuâEªãeNö÷è²ZG m¹Ò}+NJÏnÈ ³s• YŠ,vùôEÄõ¢7m ž|¥%@ô ’tÂíÙý|ÓÅ o6 ºÞ%-fT´ìž­°B: ìF*I!"¦š¨‰ŠHOZJX¶™¼<Ìó8bçsÒ~¡W%2GpaÃQFlÄÅp nëbt/¤#T²ýÀûî‘¥æÅî²_öµOÛλ¿&{ÂI»þø@!ø–ÚÙû …º*çÁ uMI,øÝ/½üÑEß´wm.×€þ‰ühÛˬûºÎ_ÿ%½t=û:xrÞ“pÙÞŽxÿ6PvûÜßSÀSS¨«« àÔ§ŠŸ€+eÈ#wÝ$“êÖNövïS—„¹¯û˜Ý.Ì}³1—cîM'@ª}YGˆ×.e FtÖeý;¡°,@°] Z¶½(¶úåú‹ 5TCâaÒ è óªJ>n)¶l¨M½%:ÏûoryáSéòWlîV>Ûºßç²ó‡…êí"„·lN.À-ÃiZ4;©ÿ7©Ìñ|U/¬ClàMO‹yÚàÚñÃ8:©e| Ö½(Yã uãÍ™Òý§'¥ädû8¦çM7idhù•+ù¿Þyö+. Q‰®@T¥ðO±¦ȃ‡°N–µ‹®Ï¢T‰…¨\ªBÀP0à›ÉOÖ ÁÓÈÓ‚ê&J›6¦ÕÁ&˜¥¯ Z’A`6ç3b°×ÚD8jÒjYÐÎ÷ÛLÝOš™ùÇüW©eÂDdºYÂÞÈ‚–ß,³˜©=ÉcD“%§¶Åôa|'‹ ƒï^ŸH|÷З¾ÐÄ«¯lúM°ôÀ‹ÁÛbý|‹ƒO6´âÝ#8m¡™ç˜û?äóÓ‘¤7VRÁ…í´BŸñÑ:h\œ z6`c<­°\HÍÏq“PóN¼y= èÌëÏv`žï&‘™˜7ˆ„­ ¾÷ä „<ÿ´(h6Ž…”·ú¬`Þ™€Ú•xÇÙäåç¼X‚î§lR¿ð-yÿêæ©p¸ó¢žLÄû#¢… E¬:^‘BèÊ´–Û™OHz¾>e~}ñ–¤ªG˜‰ `´ªoI0¨Ìc?°ÓeÉü8aR£ŽY$z²òóYiÏÉÖt%¯»Â½&¹Œk‡é¢…¬ À:x#ð½²`Ïo¾&ò3ð G+\Éý:‡zçÜ†½€L ¼ÄÄîkûwdÕªm¦ß7ª±ÍÞ}øaˆ•–úÁ×o°·²³xˆì(1]î@M1ŒÕ/øœ¸b‰ÿêó· àd×-f«„•t‰"ãyNÒu%z)Æ‚ð·¤\H/÷¬ä–:y& ³G_[›/ÆC6òÍi+½37IKóûÖz ”D›D“„2St'LF‰4O‡ó+‘ª3˜Ûaƒž/êb Y‚ÃY6¾2ŸÑ,*H‡U‰ü„õe¹Ž¤JÅœz̘¥„¿¦´rTPÔýcÍšóVЊÙ¥®þ²:e3ÄŽ”F¥žð ÐÂu¡ ‚>Ú(¸æ³gïQ‡ÿ†ö”Ÿ+¯Žm?ÒrWÒÐ~Ï´~mÁÓù³2œµa›ÝUð¦+]ÔÉ—ñ²ÈÊÊKÃu¥7ÙÊge¯bØé"ßùÀJ@؆ Êoö”òT¯gƒ¢®#m›³8s{{ûi‘wSvlNeSCxøž(ÎSÜSŸ™ÿYŒ‹‰wp“N/ážpŒ÷÷orç–”œäŽ;æö;Ʊ–ב!Œ;N&[çnrôy;¾©¸ÓòãÊ 7 ZÛn¬Þ`ý±Ó³RÕWV=[;D×·Þ´­·<ØÏ¿Þt¬YÜúGLîÿ}«ÒïÌÂ2v}ÖÚp)G«¸/ç¿®;gõ:ÜKˆ’X—ÓÄ…œÓß9 aç‹[¢NÓ¶7ˆê2ïT|Ã=ßù¬®Jç~œ/ªtaÈ-cDÒ×+÷Ÿ^„ñÝ«tî žvgGçÙ…–îÌs5Ïæýö  hÝ.T fö´Ú^³ðt¢Rºœ…½‰ZmObQá¸óîÈ–Ö¡¤ÕBˆÎk-%J[ó%ˆo^Ù‚nYٌǗÎo‰Æ·Ì/­-ÊCrX¸kl(s*‹(à$ ¬–#+— -ëͯj„wtøí̱`†do¥yÖéñþsŠJų̀BžRµº©úáÞˆP»{,×N-Y7?®÷ÈšDYEF,›£~ñþF/mXneE«ô¨ßoƒ¾æw“ºæ%¢‹~3akÜÙûØâ,5ó¤éƘ~“÷P€ãI%%Û’ßu&¸5é5z·jßüßZ;г£þ«ÿ1´:îZs9¡©OzVÕs½qß-!_1ö¿Åre§5çå)·ô\0´ùFÔª# Õô³e°EæwÆòDnÄ~7óg€r.õ×üÔdz–ÆÈŸY ;Z˜âŒñÀ‰è®|0¡:î;MH§ÊæåŒÙÚ²î ­¿ç˜ç‹[÷Ÿë*´:á…*´AüÖê¹> 9òFØÔïºó¤A¹ $… ëüÂ` ï 9óA,?H¹M‚ˆ:ŽÜ' –kT˜©¥! Q?{F>Ûs·/è:£n¶tS‹d"§u¶f™Êç ü ñÎÔÐTùx+Àš3ëëßÎÅš™°5Œ¨Wtô+6|Möƒ[û%LÂþ@ûgqÛÁçaFå}ªiý³ƒ¿­ }\ †Ê©]‰ç¨M…´ ¹bÊËð Ñùß6.X>ŸÜ¤‡ï2’Ú~*½Œìk^`cøbÑùûS—ÉÍ¿ÿ'R[â~W³ãšìô±‘nu¹-t@¦ÑôÊ®ßøäÇêY¶1EÕ`ꃥ/m†õOTwØV(ªÓ$ÌqDtPUYQ,{†j%Ãïb!rEňÂ8Áñ‡áW€ÔÓ÷Ð?LÏÿŸóßÓzTkj¶P­Úö›mÿ‚zî‰EÁ,5|çpaÊ›{¶ 3ò+’Š#KRä)YâT«¶t\Fܾ"2ª¢dÝ@Å®­`ßéàôéò9Ö¾Æv±HÀÙÓgSÁöç!Û|ž-?¾EªÃ6§šó“Ï×\™•w|yÇ©ŒbûbFy¡l0Ü5Ÿ· >÷n>;'—— ¥ñäh;/šZZÏF]Ë­ÂüaL’§j¹ÿmï(°ž¿(uuýPÐýð­”x´’¦Ÿ£G gG‚—1–-Àø½˜Hžì†´Î²j Ùò&™/Þ±8ÅQ¿@×e½»zÅ]S»Ø‹q&èDSmpfÔ÷r.QœÇå‘UIÖ‚"“ÕLÚ›põ 8³ÒÞDþ@:Ÿ:š˜þ€ì&lå‰G b"ÎèsMêü¬-™'*LÊ7ª˜J*Wå\}÷ÉškFiJ]\¾UЗW­;¿xÙyãL×XZi5o,_µ€7[ÓòÙp¾ÜDá†>CС ØÐqÿØÿçgÆòyfÿóû•Fà×#¨&¸•‡i„OÎ÷®ŸGfD ï¾^Eh¹!ö†á›¶ÕÌL6ázÑ™1LŒš-¥Å7W¡„‘@øã„I—è@ó’rqs¢„1Ç,B¾ÆVŠMâJ,—k€Š̬ïŠÈ;ûæ ¤c4J‰º8&óùI$B‹AŸÛ·&¬ì4Pû‚‹“wÍ9ñ|Mp3£$,6lx/t8})ÌßÓ}öïp47Bè‰hM™©eæ°ù­€xž7“µx*aqúN}·¬…MˆøùÊf.ðro´zr—éUΞ©%OŽ@ù¼y#€ßFðCô‡-G©X‰XChl.ÆQq M‰nl4š·V @[è P ç¢îlî’Û1çÞÞ8/Æ_$ÆFÐÁ÷s2^1¦ˆ˜¡E£¹¶µˆ­æGEçGM¯%|É5ƒ½›‡c‰r”ÛŽ…ä—B'ÿ_ÄU“ÝB@ÆËþÈ»”ƒß1F-âaȽÀ± úîÔæÀ¼4 coo:ïï-èÆ‘WãÆzõS};htµ%òîû-‹ò}÷T’…¾Ó@•¼µ¹Õ¥4Ã[ú }þ õåÁ"tÃ+y0õƱ¯5ÙIÔw:ÓÁVy=êb¬•‰/°ià<Þ`j¦_Ö¢šCâ࡯vúýàÒÖc?E#¢î }Ù”vµ©†¡”ÖÅë,q³´±ŽB‡ŽÓïp ,iuȮͰ`S=3³©nuÖŘYÈò gË ›ËNlî™»Éq–[Õ~%imñÌ…Ù›iGOª¿sÞ=/v…i–|[¸ÃLZj*³%¹8qË9òGt¡­ÁΔ³– ïÐüªõiyÊj¦JOª¹œ»ö ìå¸B;£”+#ñgùtÉõ¯Ë²¹%ý®ü|E3Çâåõé»8ÿ@¨Ðñ–ubÂõøàK.¦}cè-Ê”‹'ÛXƒiÄ%+z”ÒËæyܳbvVį Öîÿ•tc[\-P»£çô Ù€ÛÕÞ² üò$78w›¨ŠVäHzÑ.6¢¦òsÉz¢×_ v ª|lf¶X&².ÑØ)ßCæ nÕw½ùmörë’4G}=Ø<ÓsihéqƒGWÁ¤S@úà6ªîß̾’waÐçpéµ Rùh…{a¼‘b í†Ìš —"’2aG`Bÿ”\*»=›„C.Þ-ZÀøuK¼|˧¯Y±ãj8¿_%#ÿ»Öß>íZÂö´oþ X;NŽ‚_ƒ¸à» º Y½_ùKw²†.É5‚ͯMKqÿ;ƒŠÉöhÙmou‘O_œçùã3ûÛ¾ž“cûí;Éinæœ<´)!ÝKкÜ“h¶ž¦¹Mˆ£ÿE ÆOå7€)ë¶¢e÷¨«×ó~® ´¦–W¥•D½ÜöO™%œ½9=(U˜hÛ§åÿ¶j9BwÊòýTÑ Ü‡ãQ°5ˆÂúuÝÕZÿ)¨ÃMƒ`Ç’@kåìý–\­§ ªÝ[»ìØfá4Ì”¥èïëŽAà¿_ï\ ª,y6¶(eZ„¢ÚOOßeo…9óVV]MêK|ÃöÛWm€­1°××Ï>ÌŸY"XníðônÁ2õ´t%Ù“Vh¯40²¨)ªè2±EWÑ?þ«8"nc$/°{ W—^µ4¯UÎlpÊ";¹íYm¡ãä •xÕ0½Î×PdˆÂDÊ)[bË]dHú@`œ%"/‚ØÃÇÉLqfdcïÞ­zSL³,sWëö.šuA:ÓµŽWãJ0É£ÚDžR­:B£êÉwh6ô4î[óŠí‰]ê®ÚCk¼Û&)­èM– Kâ”9Ô:™-iÔUÕŸ’%,Sé©ÞW™ŠíDôỡæ Y/1òÀ;¨q‹r…ò@åé.Ù=¶œÀÞDjOÛœ¡Ê¡6ŒPnÉ;¤‘ö)9R‰BóƒSØ®úÚÌù7#<ŽŸ8Ü¢¯œL%çéŒHoÎm×| ¦ÐEgfë’M¯_þÊ™*¿ˆZ…L×uÐLZFµ,;¶V¢×%«Ø×Ç.¸º!žžùQÉý—.ŠM›G‹Ã`çÒžÁ$'Or±M£Kê!–ÔS:0ãõÔ ?ä&Î9Xø£ðùÏÐFx#ÅÊ‘<;¶ÙæwÔ®qçE^û{®ñÔ• %–ú‹ÀîÀêàéŽN†ÖŠë6ŹšÀ®wÓ?½0¾Ðõ d¼£þ¹o2Á› ¤µ[ì^Ñüòê ¸—`ÇÞxNÔ=Gù'eýKû²–û€‹íœh”üÅM<ôN'aŒ»ªÆ¶7»_ÎÓ^‚^ø¡šôŸ'€Õñu¦¼¦}¥„ÓðkÐ{ÿÇYøä¤4óªŽb]€9ïí–rìRª½BX¦Nëu«¬úE…öÞý dkYÕ6j³«e—“H>µOm HËÞyÇË 9*_IQ÷ÌɳÑðZn³UëäãPl®Õ›ÐÞê4[ÑÂ]HÌÜC˜VäuNôÓêœp‰¤v)7*b/dlÌ{³a¹E+xóUž95˜ÿç¯Í6þùå:f@шe‘­ÄùÅ¢‚-fÔ;ŽÎÄ7Œ ¶‘ÁÝíUZåoFöóz›ÈeLÞóÄÖ)´¸sb ¸ZÀÑÕ³®2»ÐYÏ‚?›8Ê h4{R-}³}Åˤ(mÙèùe7s‰¢<ì['øi­7Ä ÆÊ¤Ào‚Ò’)8(ÿR Ïo©12ß&·h JíIÅñ=ÜZ½ƒ4^=eñÆ)™Z©¡{?ý¤òc‚†çNå`tyîÓÅ@‹SZH÷&áyÓlÚðÅa†¾JóY¥ªõÅg4üQÉÔð{E#Ž#ò÷%§ÍŸŽû¥y|dêÉCžüÐcåŸË€fê"ÿdON.dšÓÝöUé#,òO>gòM ÔÏ ÙðMù„•ÇD§Ì?-í@[ãõVóf\·­Oö#ö¶Ýàig”ÃÚ}ŸÈ©Ó§è5ñÆZ?/¥ÁkòzVAÓÚ¸ôÉ~š²V¿D¨;«\iýR¡jöÞ p“s‡öAÁ‹9•ªºt·ü gÙ&°êôk‰oÈÊKå¢~Öܳˆo«S^*ýŽe¼•ëÕÖâ[‘»‹â)2Q:ЊÞì{A‹¯Nyn%{€ªô;7<·÷O¼¸·8ußÜU ‡qÜh­Lñ÷’öáé= V^ß2«¨Ÿ’§7«­¹÷ÁŠuê'C¥?=¿k¥{”ˆË)ÿ,¯þGJ}Ã*©Ó?&Ñ߯ª-xnOåÔÜÜáΉǒêg¿4Z‚ª¨% oNÒÜÜ©O}Õnžóƒ†‚ª´U%3½sïø»‡õ¹¡Ò_–ñ9±[•ÕÎmݠ掼(8±èS¥«öËÝöÒÜUz¨Ïw­XÅY,?ò!N^w÷¿[½×Ú̧óŸ >¿ó}ë¸õ÷Òû¶£çïo\|üùñàß;KÍïŸ?W¯Krs¢&Žg$#»Çi3ð³Iç¹'Ë;×xãt{ø>¶x f™Xì~@m?ïOÓ%ûí³œEÁ±ý±Aw-äL2¸c>6j¬tÃ|$ycüxÙ(UÐM™åèÑn/@®©ý_!ïÎ]ig—±À2ήü\’øBæ$Ü÷Áå˜ûqn/ïªÈ6µh4!ôŒ“Ò$nŸ%ü¾oÔ¦^¬Mçdeèb‘²§gœ@;!é-žDB%Jk\yiíñÖ•Ûâ´Û¡^}—±+©Ëÿ!Gæ$õhé¶—€EžœËîʽJ&jœžòh}e ›ŒFBi”(5I’¼UëÈ–ö¾ÕžJí}kJ°9¥dò 9ENñ6Pb_B”ÚI´q*Á>€^Ý"˜jß«yf„àV:‹ûÞÿæL6NÄä 3Ó4K“²§öÅ“Å{Hï´W&ß“Eoeès<›=OndbI—@çäâ ,çÌPÚÕYk›Ù¿öí?ŽôNgD'hÍýšôsªögËÅÔi¬scu-“'úÀ5p>sªX]7g‘?‹CÑÛ®¢^žqÐ7nÇü¿4HS¯ŸÓÿ…kkÂüŸt"ÀãòboŠ+þéß}æÛÕ…ôÛûq@ ¿æõf×íŽä¡¤Ü$J-V$©+ø0Ê ÄÈ*Þѱù R-%©ö‡Eý¯LeߊÝéžoKZŸ¨eð#ÿ²ª5ÁqŽ$S’²yÜ’‹«Ñ·µj+#äm…ìèŒ_š}{œ«¨îÊ9§°WC E´û5JVž+E K¨vÌ÷#˜N´MÆø~¸„4Œf}N1£Ò5ݲ~Ý4¨’èÃ0$˰ôõþö! &X/ÛÓ¯ºž_Ί!ꕤ²jk´GËødSãbŸ¡~WrˆH"DÕ‰8ÿˆ­´eEÖÑYN6˜þ³„0Qö]‚Ÿz5ýi^ey§¬²-þ2¦7{ü—õSÎëÍ_4ï/¿eKíXÌ/Y<{á7âûÇÿwÌo±\ßÚ­B¯‰ºï<°A3•ƒÙÃrÄY‡=ðŸ§0àÍ„6‹?E@»Æ1Ž­ÒÔ–Ìîn‹ ¦¦_s¨D3j¯þyaú*–ç…`Î[–xPëÌ+Õ_÷é·kf¦2}uã"hw•@W–zmï*Þ9¹9ëü#fš¡I—c=Aïº>Z;Õ+Z°HFE,,7Ù¸{H©ˆÊªÔh¦K]+ >Á—ýëêÄ l?kÄmÞl“Ù®àwåY‘TÛiÄ!nLG€Pøiså)oüØe_& %Á‰ÓxY®RÑ'¾×‡ñÿ”ìl&À•Œh̃EFs.¢eåÖr¬¬Eª˜O²RR÷å\—)ÍÇBóEm«)¢és–Ãs‰‰µuÙ2×)ÙÉðL”ô϶nSÕ.êŽp™ß×O§ÄøA{M[t;³™pÏ»ôÔÑ[üê@‰}gÖ~#im xÿùSK#†e:rxÑш#Bu4’d4x–ÍÖ£‘U»UDR‚Gø}>ƒÀâZëäd(Ô"܉yÏäB0‚b,6‡ÛP¾‘÷;×ؼ²–VÖ6¶Ýê ÆzÊŠ ¦“YjnQ ÎXž8}ñè·ÉÝAÅ$¤VMòßÎiö¢$+ªV<3¤À¬,ëútêúÉã–I’¤¿…4¹i ÔY°?I’$I’$I’$IšIÀ—[rrë”Bûs=÷µ•Ð’’¥\*Cª„…Ó "¢Ð1ÆVÆQOP$’*éê&seFU5õÉNS ®¡ÓM· C3u–ëlr \^>‹ –»uL×%ÔÓo Ì°‘@cÄ&jÚÌØ¢)žXÞã\ÿo¼bâ’RÒ„#Î{æN2…J£3>~h #(Æbs¸Ç[À@X„ép¬!´±ÝÚ`#BÃN# NÔtÇŒgZÞ5c±Ãe9ŽË#ˆ’¬¨ç^íõàýý_#-]÷D ¿­§ÿ|#ÂàˆH, =vo$2…Jcçà¸ß¼­r'A¦P_w: &B0‚b,6‡knaiemck·§I<ó=S ÿ, þ6/x„ß¿×<‡i‹²– ƒ°póˆH Ã2¶2Ž Üx‚"‘4¹i*Óf ;‡f®ðò‚BªšLµl´· E\âvñßöÞã*ïeƻˊœ¼‚¢’²ªšº†¦–¶ÎkŒ; 2…J£32Í #(Æbs¸E*M‘CoF@1œ ©Œ†›Ð<‚(ÉŠzÚîŽÍ~Ž•Ü'k«zßeìeZ¶ã*4/+:†r¼œâVÜ ññð½`qÌã ŠDÒä¦)PgÁÎaž /'(TZÓµ­Ï¼¹'ôÔ[{†ç^4²º }£¾ÈoâÏ?Bpß¿}}¿M¯¥­v-Û{î{ýÏqùQ†_VN^ã÷‚¢’²ªšº†¦–¶Î ñ2û«¢(ˆtôlçðª.²ãË»¹¦ØÒº„ ’¢–ãQ’UÓ³˜1³ssG%YQkoè6{Çæ™É㻀Mp`½¨9ø¿:õé?w=IMs¥éR®´Å¾2^+LŦ"i›M…¹wûIØ€Sv[Ø)#‘HÄ£,V„ ’¢–ãQ’UÓÏ=ð«Ç=CÉXÂÑQk±Öã(ã1ip‚ø&[[}÷·ó›½¢$+j·Ž¢È™…˜•ey!„Â…¯ÁB!„B!„Â…÷SÞŽîÝtª„B!„Ðî›M} ·û!åñÈ~‚§ç<)""""""""""""º•MAEã*ªŠ . Ì Tp+U›¯o-úm Ë­ðlU­í&n¹½¡Ö((È_¤$Oûì̼wéîA”dE=m€÷H}Wa¯°û[;{ÇÓ”Zô® LͰߣ»oãÂfIVTíÑ?V+DîÍ$ð§þ8^%YQOÓ>‘¿ÒeÈAØUØ™‚ï̃˜uLH’´Ž6IZ‡‚$I’¤uI¾?.¥ „9 ÂÂ9‘0:†uledê´îÜ\x|A#ì·ÉëmûãtÝíãàôrÝõ1x¹ÞI)TqL½€Œ ‹Íá²FP 'Hjf˜Y8^%YyÔï®R®lAÚ­ÈY »Ž€bñ¯&tjºã ³¼G;ÓÊÿïܪE»usŒÜ¶ÇíDIVÔZ±n³w„kµ7ÞžwwÊëaÎØ³¼öç¿À6 m6YÀ€–‚¥Õ ÒJPÐ2ÀJÔÔ^h+ Ò)‘(€•¡Jz¬DEm@Ch¹èÃu¦²êì€8¹Ò¼ø-’¦f­,ikHGÝ>`z—€풎؉1=3}'2±qñÑP…Ô*mCŒ8 ’¤H#È€–@BFAECÇH« `€  0À½u ®2 „€•á6w¸]w!,Y)´Ý »½×¡PÓë0ÀM:H_fÏÊ~™÷~Åo~÷‡?ýåïžoöÂK¯z½Áo{·îóÑz€Oûø×Žx+Sq·~/È"%V{ºÍ½q®šî\ûá:©ªªÎO˜{ICÏ•P/WRJù¶y¸é2¤[´e±äPG°Ž†º‰‘jQè7ÇVÆQOP$’*)í¦>eU5õÉNS ®¡ÓM· ?ÎÔY®³Éáâòòõ k"j±ÖFÖ¡^wNm@Ö³õ¥k†ŒlÜ·‰˜n¶€"ó‹µm¾v`ÏýÐ÷¸ø³óŸ¹ñ­w1—”’&”ËÊÉ+(*)1õ]UÔÔ54µ´×‘.b]rÙ•×Ôï$È*Îøø!uYX2JLç,fÃáæ]Ê‹t:œùÏ¡ãšõëöãšLpþ1uuú÷[÷É)( 4ì=ŠáIMwl¤éGs µÕ2®YæÖ< J²¢ÖºÍ~Ž{ó>é®î íµþ탋•?~ýk¥•%µ”ïËh„‚°prD$ £cleàx‚"‘´RS7W¦[5õÉõ)P×`Z¢édPÎÔY®³Éqyùfµ uX×~=ô7pÚêQ›˜i3£‹<ësûçý/w1—”’&q˜m¤;IÈ*Îøø!€Œ ‹Íáòøa£!¤±b¹¦„n Ž¡á¾P 'Hjã™æ}ÏÒâÜjÇ,ï¶r·G%YQÏÍýàýý_7v™–í¸ ;P8 ·‚ˆ„¢Ð1lc+ã¨À‰¤ÉMS™0KØ94s…7?× ©j2Ö6²ÑÞ&q‹??[Dþ‘ÿãŠ?|ª²rò ŠJʪjêšZÚ:Ïw!S¨4:c¿+‚c±9Üô¤«Ò¼¸Ù…°dÅš [v J²c`_ ¯yÃ[ÞñžpKá3_‹÷Ý2SÆhy™ÚdÎf;ãjwÔ† 3 YáŸJUóW‰BÇøv,ã¨@…'(I•duS^e¹ªjꓦ@]coZ¢é–á™™š¥c¶wŽ–¹ðòå ÔÔ_KÛÚëL¨.a&ë­/ìmH#U÷mb¦›u©_¬m›m‡©]„½ç~è{\–÷öbþo¼²—”’&”1'+'¯ ¨¤|Ä\wU[MÔ54µ´uºtÉeW^ ¸“ S¨4:wú€Ogƒh2ˆÄ"ƒ´Hܤ%¢¥YR1‚ ‹2HWø63HQ>‘ Ò\©T43’°pÄu,þå„&³©éŽÒôŒ¹…Ú*™–YrÕó ^@”dE­í×möŽ*gWúºîEâð¹lFË£óc½´ƒõÑ~Œ“gA ˆˆ$Àpom9ŽpñŒ:¤ ÔÅ·î(¾bžýöÑlà×§3ÿßn¼iŠáI•I=[ÊñœÄÀ®6ØÜ‹Í^Ǩóɺ~^ËF€D5‡˜ÃrcñQÈ쎛«rß®cö"ôÍÂ%N³²¬»Ã0Ru§ž<ÁªŠ¢¨ ‚ {QzÈ£ ªé »/2~žÎö~i½µ|–±À‰$$ë¢Wõ~!®DŽŠV<¤¦e;Ň@Yyê¬üÁ̧5àÏ€ šþó‘É&د_L&H,WN+³•×wIÕ&SWƒšþP•š<ƒë3vϤƛ³ØG6+Ù­åürÝï|úŸó˜[àˆYn*ΘÕ4ÕÿïóÓ ÐÃ‹ì™ø<ŒÆQB@€€@@@‚±Ò¨’$I’$I’$I’$I’$i{Ýf_SZŒgqÚL§´œëd+¸B,ÁµÏùÞyÜ=‚(ÉŠzZØû.æÞ‹Áx|WXŠ´¹Ô˜ÞćM]€þÞ©‹È$ /¼$I’$IÇã%¥6|iYÊÉ=îõüÄW*f³¥òSjÙ°s¼Ì»Æµï÷æþÉ¡ 2„[Z[!cìÁé >lÖ›R½XI )Lc»7ѯ‘áyä¤÷þ¬¨U•7Øs³ x3Úuưɷ±Þu÷€'hò „Ï«ÄVì?œ´ü`ºØÄ%ã5’1’°hÊYr‡Œðâ"Dz‘®ÈŠÌØ€0¢hk5v7àD#¦ª‰'ËÕ‰5Ç»>5Õ/¾ .y—+ò¬£‚÷®½&aÌÏ.*ÂGÒ]…:é½ãqÞyï‡ÇwÆöf°¼9,§ï4ñ÷2M𔚨ÁnÅ¥þp9D!“’‹Ò¨@úœý=]Óô"´Ná’îçôšö ïÅŒ%WELÇðÄjs;9GúF1‰A›Îò~ û?ö㼇G%YQ«¥øƒC ñCa5e3–ÿËt?å]:FKtqωEGùϼ÷‹Ù/¥jåëú¦¡ {"—øMeYWa à+0œ&6ÖíÙ˜û× ÜcŒ$‰é³ýN6Hæ †ØÂ¤QÜ þ{»úô"4Má’¾©,ëIBD`ç89_^7ïŸÏ©ë'>Á“ ""J)³ÕxL¦RÊÌL©1òñVUUUuŒ‰ø^‚ ýnÂB »³êD.‡™â}WØ™gÕ1¯Ó#­IdWÿè;û>&jÛ¶mÛö¼!{nÚ‡­ƒ (Š€œ º$I’XxNž’(U¦œ®‘$I’$I’$c$I’$I’$IÈÇ¥'d'åJUëËû•$ÉD]¬W_÷¡c”àˆ‰“Ç \rè¬aØïž¥m*Š¢ETG[t«‹nQÚ1‚®† …U÷Sè‡ufê¹Ùr4|Yù­¼ïsïÇÀÐÈØÄÔÌÜÂrË#‰³Ë½Àì#Ô.Ï;V%IÒêþ×Û—¼ü¬aÕä™ÿÛmæ®<½ê}ó'ëûó¹k(sëëók4gËösnPï0;Ïñqþà“Giýó¿ê¸ç¬ý»ÁB¸äèѰªÏ™{Ö£»hpUUUUUUU}í>ÏæZ$ç: Þ£Q´l–þż‡…f‘,K*ú®O8`ÔwK-Їù0°ßà’Õc–ŽÖI“Hîòl>ò‘<ìCAƒƒ‹‡OIEMC+K6Ýs¡ç °¼ìh¦VE˜ )ša9¾vé¶Ý«ã|§uÃÞ¢H²¢jú±@5§[3ÁŠáÓê¡Wû^†_5Û<Ši‹²– €°pnD$ ýƒ©¿þ['­ö¤[r»2ZUM}rë¨kè´Èéš©9p./_@°0ÅšVט痂ïq{ÿ· ʲEN^AQI¹ã]éD;µª¬†º†¦–¶ÎkŒ_ö ™B¥Ñ™ÜÁбØn‘Zc\S'Îl ¿îÖEÁ€ÉšÝ1./гéÉÖssÊS-éÖÀ…@0‚b8ARtŸœ¯Ç·p‹am²PEˆ¼r]²C{”‚‡JË€3sÐ\^>J°°¾¦/3¾çøí̲EN^AQI¹ãëD§;\uo5Ô54µ´u^©ïÈŠªé† „2Ëv\UŸÈïÙYJTr¿¬WùÂÀÄŠQ#‘H$‰›éG6ï[ eJ£N)@…ÁHTòþŽ%4ô»KοzMº– ÒUPЩS¬DõY{ÿõ#Ÿ%o<¿¥¶æÓ&׺Ÿ¾ùs q I)iB‰I{§Y2…J£3b‚c±9ÜòÆ›}Âs*¹þõ~¢çM½ÀÒÊÚÆÖ®Nb~„ÇëÉÏf u’Iù4WƒšagI6׿kƒBï×ã`sm¾„–­¨­9ÅÀ·IÖIÒõ”ž¾aF\&™Q%‹¯mä7?ÀC\BRJšPâ}–Y2…J£3΂õ‚’$I’Ë—À½avÆOe&8Ë&I’Ë—gÙý…—{7¢€F>ò}j[ö#¥N[3 c^˜÷•V]…D"„ر¼ J²¢99¤º<“Ä"+÷ïog°ººÊ¼Ž›uûó(BŸ_íkÍ~i79¤ñUk–õ߯äïJ.=ž±œI^«çiÇÍãœsÎ9gætnÙ%—$I’$I’$I’$Iòk­n ÀôºëºîVÔ´\·í¢~KùëB É­ëÖ¥|ÍFó)t¿ú(B%fÙ{…ßWjáσPkd!VQÇ÷£¯F`;d“Õšr€-Ƨœ%wyiEN­¿»˜Û‹0Ø‚ð‘øUÿª+ü,¶—xµŠºÂ +¬°Â×ãÃ!Ed2–› ÐÏÈ¿»‡náÀ£7á#OßQ‹=J†*`9È­ltï*UIIIIIIIi–ú?Ûe§)“Ÿc±ùˆêðöòòŸw­-ã¦o EDDDD)¥”RJ)­µÖZk­1ÆcŒ±ÖZk­µÖ9çœsÎ9Î@ eÂ1X\Ñ­ur²R¨E¸óÞä!A1›Ãm(ßÈûkl^ÙK+kÛÇn«c° UUUUUUUUEH’$I’$I’$I’$Iö+'&˜Nf©¹Eb8cyâôÅ£ß&w“Zý5ÉsN3 J²¢jEÁ3†˜"eY{jxà€óކ¨¯ƒk㨀lj¤ÉMS Î‚£æÂ+b¿Åï[ï¦Ô ÂIÑ ËñÝÚzÔ. J²¢®¡Ÿ_¢?׫Î&ÀØ @UUUUUUUUUUUì3< ªî½ ª‹ªªªªªª*º'I’$IZH’$I’$I2$I’3 ÀhàëƒÜòdô·§ëqo*Æc­µÖ9çÜOØï…Koñö!íL\ÈŒ,)WæÍJ¯²ŸëÃßΘ5÷˜ß«{}oìGL}Ós²}õyž÷õ·ÉÁf;…ÈL¬T5?o!ï%(t ËXÅUàA‘HšÜ4•i³„C3WxùA!UM¦ÚF6ÚÛ„".q»øo{ï¹ÊûÌx÷¬„•ËŠ¥rµVo4[íN«û;¡#v*T¨P¡â¨lͯìVAFPŒÅæpÏ{Ì~¿¡Uþ…ÇꀔþЀ>zpŸÅÐ K8: c-Öze<& NÿK·¶úÓÝßÎoö"ˆ’¬¨Ý:Š"gR`V–õ6WÞyªªªªªªªŠI’$I’$I’$I’lÛ¶mÛ¶mÛ¶mÛÔ ¡¨¢ªªªªB+]È9sªT5ïä "†BÇ°Ž­ŒƒLÖƒ› /h„ý6y½ÍÿP’%ÊUUUUUÕªªªªªªÚaöÜ×~”n¥d)_—ÑêîID¤§QèÅc+ã¨`?ÁoE›(¤J^m-ÐMÕ•U¨ª©O–)Pר›–kºexg¦Îrš£n®ðòI…ÄšµˆÖî«£`Ý©´a¤Ùú²ÁÞ†4RÓ˜t3m¦¥HÓb¶=ïçï{.øõòàvç=î±".!)%M8â,÷Ø¿ü)d •Fg|üòäëUů{û«;Ú„åXÔ1¥ÇZçÝÞ›S¸yœÿ«a‘«înèÒ{„ÆN7óѹ¥×x@@è`.Åâ_MØ$ÔtÇfyG=ÓÊ_/Q~Û_usŒ^[ŽãzQ’µV¬Ûì«®î=÷Úù-<`è€vHe#îÖïY¤ÄjO·¹EŽ×tûápb$I’$%-I’$Û;‘mÛ¶/$§$I3I’$I’™™™™™™X æü 5%¨$Kù¾ŒF( 'GDR Ð1:ÆVÆQŽ'(I+5uÓxeºUQSŸ\Ÿu ¦%šNåLå:›——oVP R‡uÝá×C§ Ù¨Þµ‰™63ºÈc±>·±ï9ï÷¼Üc".!)%MHÔø)­×‚AD"yÉI¸@FPŒÅæpy|°ˆÑÒX±‰\SB·¶’¡á¾P 'Hjc“r3-έ–f™›î<‚(ÉŠÚÝ:Åãý_ÝÓÀ™€ÄÅO9á~Šd^Pc›7gAºŒÚ-Ú²Xr¨#@GCý„…{ D¡c|;–qT Â‰¤J²º)¯²\U5õÉNS ®±7-ÑtËðÌLÍÒ1Û;GË\xùò…jꯥmíŽu&T—° “õˆÖ— ö6¤‘ªû61Óͺ‹Ô/ֶͶÃÔ.ÂÞs?ô=—åuÙ ïÍ+Û;&â’RÒ„2ædåä•”˜ë^µÕD]CSK[§‹@—\våµ€_þ„L¡ÒèŒ_»‚º‡,¬ %¦3KÑìMÎ%s%ï¬üÅ‚…ét¸VV>ò*Fç¡ÜHÉ)Qs„qƒ…ç9_†š“Zìˆ%çHZNK¨©:ÊOX*J²¢Ööë6{G•³«a÷"{B{mÞõs/܀õ×*ùƒMì¸R†'(I“›¦@;'4^þ® ÐQ¬íZb^Ÿ¹_£ªªî%Û¬¾^›Õ_‹£ª>¶61‹$ÉÙ:ž¼Ò >ÉŽ+Ö0C ÝDØë€© æsXn,> I@uÜ\•û¸ŽáE(èÂ%NCYÖWÜ"%I«7fJ’$I’mÛ{TÛ¶mÛ6 PUUUUER’$I’$I’˜´öæȆÖå!SB–÷Æe™ïÒ4W´‚ÖAºaZ¶Sì: ”•ÇÌÁʾÕHì(€%âU‹OjPU]_ÎÉ«ªªªªªGÔ“ºõa¡yÈo“#ëÅ®L1Œ…çŠC÷b|ÔòyƒÜ àÄ@’ŒuÑ«z¿¿Dέ xFn˜–ígœÊÊSÏÁʾÓ5Ž €Oÿÿÿÿ§àÿÿÿÿZðÿÿÿÿÿ Xý~ !ïÅß±7‰ªQ™‰e÷iíËzÝu¢ê«ªª¸”$I’Äu’P’$IRO†Ca¢“#‘HZ¤R#É\V”Ü›j…¥ŒËÌú‘Bˆ/£èmœäú5J)΢zôs#ùÝAפTRaIvOä—GqQf”ÚT‹Ql$Ó¸áñ•mJ>Äýʨ$ãJraR «»*)rF†Bþ;¯–eŠƹêöÛ¯‹v£ÝNí¶Cƒv£_ ÖË .$××ZH‘ ŒLtä‚ÁÛZT$Õˆ‰…O WQB%Ê3>ɲNyA¬SI0ïçeëëÚ¯6{·gvû—Ý´L¸åXZ !Űp"2ÖÆA¦P§u4‚mÂç –›?oּ틃s5øv ×Ý8½,·>/×›™B¥ÑÇÔ„`ÅXl×ÜÂÒÊÚÆÖîŸÿ!÷¼áù™6{ñý͸6w´a]42®4•²™tzXÑÔoN(žHzLOEm)oÕs³0³Ñtw©ìÑo/™{r†H ’$ å@’ªTUU•¤ªªªÔ‘®P‘Fm­ÆîïÜ€hÄ´C5ñd¹:±æx·À§¦úÅWÁ%ï²cEžõUá]%zM~.*ÂGÒ]…:é½ãqÞyï‡ÇwÇö¹UEQUUQEUQEUUô$I’$I’$I’$I’$I’¤$I’$I’$I’$I’$I’dÛ¶mÛ¶mÛ¶mÛ¶mÛ¶mc$I’$I’´z9H’$I’$I’¤ßUÁŒ¬Œ«OÓž¬öœÛå5ôF±Ä ­³Üt}ÅsâØÛ\ J²¢ž–åWº 9Hqv>*›è>øç·Á$I’$IZcH’$I’$Iž50€/B)Ÿ‘}Øþú3ÑT°74ޏ,}§Êœ±ÔZŸv³2•Žy¬ßyKªÚÑþüÊ%įZy‚',IUUÌ˜È ÌÌÀ¶sÞãÙ¶m;çœÔDiÓ¼HG¥6©ÔQÜ[À\÷êK%äÞèR!øCún*c›ÂN)¥”RJ)¥”RºýúÐ! ™”\ Íúù¯kš¡ u"—t?Ÿ à·sÌŒ“eY–eY–eY–eY–}b@B I šifª™™ZÚ A$‰¢(I’$%IÂ0L’$Ifff8Ž›™™™Yg àïŠ*£cü±¦¸¶ä\X;5uK:ßÕ³_kعŽG%YQ«Åÿ¡aH!xVSŠ™Y_;¯ƒU’$I’$I’$I’$I23333333333333333€^ªB¨„ˆû9å :#˜5š£*G*”ª­8mh5&D,1kë,®o=“o.øò*ˆ’¬¨§eü] 9Hw)ìL\3–×ß´Él±Ú™ ±º\Vù´›2t™kY’ëîK•Éêê³Ç`,tLeYóßOüüôÓO?Á/ °¾€óqUUUUUUUUõˆ‡ø"þ“c^;mÿ>ø:-Qíš­:­È7{}Váƒj ù¦ ‚ó=jÔŒh¬Ñšq"íç0ØõxñNq*ö×év/¹ÅÏ=Æ#qîœÛ½’çíJžON€y^Pöüž |¨k.À`ô^0ÕØ^^—fpº/Œq¨\Ž7xá…’û¦ ¯&\çÿõ1”ÛàÕÃWgš½zŽ×赆k‹q~f×Bg`}mºäÏVçeRj Joà9®ÛZö"rA]ÿj£‘1Yùz¹šJ¨¦óͶܛº&Â-»Qí¯50+v¶“3:3êŒÊ„B_,˜HâáÕ»Ó#³AÕZëßþÄ|òÀd;¾ÃŽÍE\Ëe–•L&3|“Ò¶ÂÊëüç?op.\d”N£tüÔ$m%=¨þ|*™|¶í¿µÆ'ñ&Úh`9–g…á«K…‡{­4 $¥-çÍé/Ÿxƒ ° v‚!#„g‡*Þƒ;0”QæNv ØMœÒV*KÛ»‹v¦ŽÍ“,kª xƒ:¢˜É¡-R哵·¤álÂvÅ'ï”1ui"v)ƒ3øp‹gõΦ.çл€ìsðÈm¬,*{2§çƒ20I,rè8¨ÓpTÈÙzPUA¼z3P&ìP JÆûj’åF¨ÑH ©Qjð¡ÊN©Ž½I ¥‰š¤OìoÀ²êj’¹ãgI0*Êx “¬ë„0•)Ô®ŽŒh¾ ôYCä‰ÏFCê)2¨9oN˜½ +lòÁ¾Á¦2h#wI¸ê¥éT Ñáwà+†¤•@¹«þŠË{ckzë9Ù¿;‘“Ë­ Ž`ÛÕº;©5!CJÖu\.±7.VqoÐŽWVö7AI@"žF6æÃãS Õøƒ’yt°Ç”¶6ŠÞtÂ0ä±€‚ og'çWYW©àHÙ‘k ÖÖô <.ÐeF®øl‰’»<žº‚Ç7â iû:õx×—bí…37oÂfl­õ¦J³‡(ô®½VŠel]¤ö¢½K“0 Ò¶%]>Ø}4ÂnEAS_*摞RbLqÐÅ7WRè“´µMê8¼47˜ŠêÉßQÍ$íšÕQòÎe0 ´¼÷XŽO H*y ×'¤ë-j-¬KFô@R4Ê¢Â~‡°Zê^&­[ÈÏjÀím¬Có8’°ôïw(«FÈ”}©©ª9`cà“¤Q â˜éi¥5”ë4FCÔj¿¯2é1 ù$šØð>ý ó2)5P¥7ð÷Û[»}! jõwÿj£‘1Yùz7E8oHpûÒˆùf6£óùjåþgF¢bÅÎvB61Ð!ëį̈3* ÝaIԔă¬dèÛ`Ó€Jp±þí±`>ñz°Äå;Âpi¿…kíD{D\Ȥèí ¯ýRÚVX9µç?ÿyã(ÆšJ:zü,¿5W_Y6ø§÷þ<œ=Ë#kã¹§7ÿÓêD‡E{/<}Í«Ëf‹<¨F_†Z´þãoéÉËÞ\Ôô]ýÚaXH ý Ü¡,žÏÜÉ|ëbùÃnųœÑA<Ú9lžKÖ´ouJGCé&fr° >ϧpäI¹'‹à2y×.Ëó]Ú•Ú.eøFts±ú,ÉàÚaxÈ>;{m³Ùö²Ÿwz>ˆn"àôþ;½HêI‰Qá_ÊØ‡âÕŸW(Bª;ô…²ló¾šnW„°ªHxÛŒRI)Sq›êØï›RŠ™œžUycHPFÕšä vÁÏ:T ú‡Ã'Ï!cÃáÀÖ zH4ǼLL[Ö\›zDú0…n/‘âK¡º¡æ¾ï—ðº†^Œ-R>$³9Bà@ ðOÔ»U×Í1<®%™(óE-Íùå^ä½QàYôœ£'øâr8¬¤)8âÆ^›.¯ã&¤^ é¼¶—S7.¸°Å@ôY6¸P²øüöãùðyªû3:Ð… ÷ŒB§ŠÂÀ6N-Ðâ£ÂÌh°0ú0µì(tu†ðU¤|"×Í.¦qö§BrÈÍ2x“Â¥‡Ò!ê>iŽ 3jþ·nqG8˜l•ùSnLMÙïh»Õ±à£3Z±tf‰0|Í÷‚+zhÀ˜½¨Åòˆ£&œÆìwo¬zw¢Þ,=ÆRµ7áuµUMzÞ´©Äê-¬¾Å+jBÓà;o4xÒ˜W#í˜ÜXPæ®vëM¶æ½Bíáî9n.¯ ȪÙë>zmIqß,‹Šïžðnò=9Í:ç0£knk†æÈç¦cAÿ~‡Î®^ ºÀì Üé­í Í~ú"Sz\Ç,u^^¢qµNZÄ( ´D¹ŒêÏè%¢|øú¢¨ï Ó%DÔ·´!,¿ÿ¨¢]/üY@µFÁ@@$àìR ÝO ½qзx<sÒ½•!½\ÚoÝy—•1EÒOÇŠ‚u€¹2±°í²[d!õÚÜ%WÂ"û'ÀËh±˜þ-¶ÿÝ/ü×B; 53–¥Hª.8ä ÈigŒðôؘqf¸ž ebžD„µ%Ë4Ë…ý‡¾š¤ Ô%\6=,§=Dzµ“–R*Oë´™IÐaÉJ©©†m‡4K-ëxEÌ>|¸n1`¾¥ 6Úd¨t&¿žìtÀvÛ1ì[û,/•-˜“Ž%}òÁŸ}µßÙ^atÒ1p˜“÷O6¾¹áô›¢tÎ4~yV–ãOפž*å+è‚\ç+LU;ÐÖ÷”¯ vÑ¥*\®2„j5AûskÔT^Í]wÅU3VÖR«FÅ΄­õ,µEõ¸å†›ñÍpZœ¤t“àU ޵ÙÓ¶È¿pAÑÊ·µmŠýíñü¶2¥ÊÛiM»ÚÝžö6êš‹Ø.v a•*T5¢Š ㎜h²©¦›QÓ¬:µê5jh®ù$ j¡Cö¤#¨AñD²>v¹‚N9팳Î9OIåBÍ”B©‹.¹5lÊW]s½×Éêh7{Ö-·ÛcKwþïn”ë³µ²ÚE^fb¿Þ¶ˆ28¡Õ–ùÜÏ3lê†l‹"nËàÚÏöÈÞËá8p›µ´ V]tÒ4œq Ëp¢Ó;d¥Ð ²n— )]k“›ÑsþtÝ÷’™.m’ÙŤ…¾fî_y¶¤â—I׉]Z¢O_îØ7Æ’q.O†×j´xfçó~’%ú’$EC´2Ûäý"j¥c+¤ÿQ(Tû¶‰É.Ô5yGÓEíä$r7ö"z]ØÏõÄ~7è÷5}T›†ña…_ÉðÖ>jêF$ŠF0ÅÑžú¢¯gA¾mœªK/›—ªRó&kÍaþÙÛ<­sf«ÓÈÕq…ή^UÛ:ÝÖìö„Ë6XÃOì×¶•žnaûѰ}êœo[nç÷‰ñ®öô™ý?_ö‘ŠIÑBG»È’Õœ¬§1À6QÇi72!5CÖ×°Æa¸ûêe”aí WÍ‹#×rà»Ó·O¦ÛrW~¤WA wAž®gMÞYêÙèË/±ÝƒÞ„©À–¨Í0œ$ECdØ$Ä›ô¥%Ý´WÝšbX7ìòÿ;,ôØèL9^“‡µÏ¯?Aø#8ÿÊÿÏ!B{ÃËèê;äãs õôO¡{ýuë ¿¦#ÜRí§ÿQ3#:«M ®s­8ÿŠƒ³+çf´}ÒQ¾âôÝuùÞôèíç$‚zºÿP-ûaú^°öEVprocksdb-6.11.4/docs/static/fonts/LatoLatin-Italic.woff000066400000000000000000002217241370372246700226020ustar00rootroot00000000000000wOFF#ÔXXGPOS€6fZ)¾aGSUB8 Íܧÿ“3OS/29ð\`™à¢¸cmap:L#€ä‰7cvt >p.:$'Qfpgm> § —ØÛðgaspDHglyfDP[®4gq¦headŸT66 7hheaŸŒ$$ÇùhmtxŸ°§@;lkern£XmÛo”–¡loca4ž¢!ëBmaxpÔ  8nameôem4ÁÀ post\û ±ïgoäprep#X{ŠöŒý.xœåtÕúö÷„ôBBR€PR¥Q¬4é ¤wAׂР5 Bè $’C ”$@‚½ÉI;Ác½ûûÍ›!†z½ÿ¿ßZßZŸ³žÌ)3{ïy÷ó>ϻϙƒÊPJy«j±r}ðáÞTÑs¦OQÆO;Yµ™òÔŒiª«r奵2ýó±Qé±K¥ÇU”ëä±Ó§©ò·®üm$›sˆÙÆͶ\Øû¨Zº´ö¼'²Û«½÷ö¾0èË­F$Œþ·2þ[zXatvâãr¥J*]Ø¢ªDñxI•-U®î®¾ÀÝcžÇÝl¹æ3WwÏiž»\ñÌôòõ¶ù(ŸÚl|ìþϰ­ 0¿Z‡jcªÍ«öqµ¤j¿Uû-°F`ã ÷ º<ÿ8¸¯%ÏN`Ë . vVïW}~õuÕðŒ­ziˆkHTÈúС÷•¿œ:‰mÛ2¶mrfBèÐo¹™ï Í-=ìÇp%ý&…7 0|Û¢š.¡Ëj>º,¸Q͹´fîçç†7­¹¸æÚš 5ÏÕ¼P+ V£Z#k«õb­eµ¿­X¯{½y‚#l?Ö;#øµÁ§‘¯4t¨GT¬\›>¨‚ôq¢óT¤þN5Ô6ÕXïUMô/ª™Þ©ZêsjŒþJÍÐ%j&˜¥óÕóìgƒ9`.x¼^óik.T Ù¯Ö‡ŒlÔu@]PÔ @$è¦CôwÆP0 ŒÑ¿cÁ80LÐ;‰`X¯KŒ l›Àf°lÛÀv°ì_‚]`7ˆ{ÀW A‰à[ðHb ‡@2HÕU rÑñÊ[;UWêyªƒ>¯¢õRµX/Rkt†Ú© Ô.½L}£Ïªx]¤ôKê¨NSÇô÷*UÏ0šr~È-[©ž¸¦µM*XmW[ݯüÕ7*HÅ«€›¶ìA+GÔý“ñºªn,QÆ›ÊÃxKùï/žôWJ¥UÂóa1ç^4–èïp¾'ïä¨áªmüÆù¡œÉùMŒ·U0mT£oý;íÓN1ãþ3.ªµúÚ¯3i363ió,ãù]Ú“ñpen:¦b‘ÎQ©áŒk„^§¦Ã·µº¯ú\ŸQ;t¬QSŸáÌPã ý#gr¶7gKõ¿hÁO¹qö¢v‘ÞDå7ã- ðê9uFy˜£UAMP D€º 1hÂU7Ó—T F}'h ZíÖôÞ´eÛöÀl±#¯uwqLgp·ÎR]Øßºòø^ö÷nà~ðx<€î è zÞàQÐ<ú‚ÇA?Ð  Ýì±Ì~û¡°cŸÃÁ“dWF‚§À(^ Æ€±`&€‰\Ã$0LáùT0kšý3´ÿ,˜ÎãçØÏÐÙdy6YžM–g“åÙdy6YžM–g“åÙdy¶š‡Z¼ æý…`‘þ™/0ü€?A T! „ë<£¡þÝhî̋ѴíAÐô½@oð(è}ÁíD!œ(„…p¢—PˆK(Ä%ƒë2žÏ€gÁtð˜f‚Y0ìy0ÌsÁ àEð˜^óÁð c^ha,ÿ¯‚×PŽ×Á` x¼ÞKÁ;à]Î{¼>ËÀr°|Hû+ÁGàc° ¬Ÿ€OÁgÖ€µ:Ëø¬_€õ:%ÌF ³QÂl”0%ÌF ³QÂl”0%ÌF ³QÂl”0%ÌF ³QÂl”0%Ì6ö2_ƒ}`?8¾A}ƒÎÅãV xi*ÌÒº =)£‚8Mqš â4Äi*ˆÓT§© NSAœ¦‚Øf à¸` †ðÚP0 ŒÑvæÔΜڙS;q^EœWçUÆd}Ò˜¦ÆŽv\A;® WÐŽ+hÇ´ã Úq…|/#ßËàrŒ/#ßËÈ÷2ò½Œ|/#ßËÈ÷2ò½Œ|/#ßËÈ÷2ò½Œ|/#ŸËÈç2ò¹Œ\ºB.]!—®KexÛ­¸µ‰:j3(gÌ%"t¹‚cVeAL–LÒß›Õãû]¹óNï”ñNéU×$ ]©¸]ÈÑz¾¶?[Ž—½‰g½‹½‡-ǃ–À¸ªÌÿ ¼a #™…K¯„yïªÃpç•áQrëHUUÑù%èü|úÚ}}m½]m®Þ®vCC— ¡ÐƒÄ0ö6ƒ½uÑÔUц0˜ÜìLœ6¡wQèÝâ4SýĈÓ`øR¾§ÏÅésqú\œ>§ÏÅésqú\XÝV÷‡Õýqö(œ= g†³ÃÙ‡ÉUwäµNà.KÖâì±8{,‘X‹³ÇJ4îCͺûÁàAðx˜caßãz€ž è }Àc /xœcû±ïÏ~ûz5΋³Çâì±8ûÄŠ(ֳɨÙdÔl2j6®ÅlFáÚQd×l2«?™5ÇžŒcO–YyŽ6fè;ÇŽÁ±cpì;ÇŽÁ±cpì;Ç^€c/ >•ú|):‡ýU²tœ5»{Ôý–Úª?QÛØog¿C¯P»Ùï‡ÉÈòYw2ëNf½ N¿§Åé·Â€Õ0`5N‹ÓÇâô `CÙ»”ì]Jö.%{—’½KÉÞ¥dïR²w)Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.Õ@.YÛŸ¬íOÖö'kû“µýÉÚþdm²¶?ŽUÁÈ9<6YùûÊÌ|™çóÁðŠÞ…ãÇÂÖÕÂÖÅz"Ž?ÇŸø™ûžMÆÏ&ãg“ñ³qø(> ‡Âᣄ٫Ù>ŸqŒÉô5ì×êÉŒÿ‚Çëu ƒÃÇàð18| ƒÃÇàð18| ƒÃÇàð18| ƒÃÇàð18| ƒÃ/Àáàð pø8ü~¿‡_ÀZh)k¡¥¬…–²ZÊZèUÖB¯²z•gf*5•šÇÿéH™°ö<Ì9 K ÐÅ‹DæWtlj^T²îé _ƒßÁ¹Š#§À±_8úµKy±¶ñamã¯NÀ+³R¨_Ò™§¥ÌÑëÌÍÛÄûZ=O¼«ïâÝ‚x×&Þ-ˆw±Ë ^_1Ö,Æyˆž\O²l8þ;‚a-LÿOÙ¡·ÒS>½øÑ‹ƒ–»Ðr®å Õ¬™ ¡åúæÚ‡–ÓiyŸ¬²¾!„÷§áýiÎl-+š¥*Tµ ¿ý•b2• _Gßžô=Ÿ,%K?b O1†¯üJWîÁxö[•K˹Œ­>Y´Ž,ZÇ¿"ω)Db:,CÁÊu0òI+2Ç»5ö0‰Ê2=Ö …iÁ²‡¹Ž®c9,x–(­!J›Í•X…/¤2Z7F›C‹wÐâï´èK‹uiÑ‹}™¡5²"=†{Ç}NáÏÅâϬôjÜò]³,ÃuÔfg9êG°VŽ6ŽLåÈÃy”ñD³ÚZŒ«¬ÑID©ˆª­ˆ£÷pt2G'rt GÇstñë@³Ž™«æ]²upÔŽúž£~à(œ‚ëñgvý˜MOÕQµR=Ô0ðžT½UFªþê)ö£x}  ¦‚ù\ÃÎ^HŸÑê!µXuV1ê.µFõTëÕ}jçŲßÈ~§šÀÜ.`Õ>ùÈüŽU ª¯JQƒ” ¤ªåJ«†xMõ–ѽÁÞá›ìßoƒ¥€ïrÜ{à}ÀóDð-øjr/jr/æf†ƒV쩬Ü+_w7ΉÃ9q8'çÄáœ8œ‡sâpNΉÃ9q8'çÄáœ8œ‡sâpNΉÃ9q8'çÄáœ8œ‡sâpNΉÃ9q8'ç„ÿNΉÃ9q8'üwâRN\ç2Ž“ã\Æq~cîJ¨Ô‚jAµ ƒ¹t0—Ö' éÌåÏÌåO¸Œ—qâ2NòÃI~8q'.ãD­¨µ“¼p¢\—Q®Ëðü2Êõ\ÿ åúM‘¥°æ™x~g¶¼˜­ªÌ–7³ÀlùÐÛ¿EÌÌ*ÿ졳҅YiȬ4%ž¾F*+‘ÿikõi­½¥^‘´fÐZM“YäËb²b ìÝ%•°ùyÍQ"feÈaŽ}Ëü4•–<äQ|ƒÏað9 >¿ wÃà®ëÂàn\ ƒkapÌüæ#úŽ3¥ÿþôNÿïÁ±08ÇÂŒÏaNªêÄȇ«öd„9òaœY‡3‡pæ8Μʙ£8³†C«©d‚Gw²>ëéÄ‘C9r0GãÈ–Ù–£‚䨶Õ„£úY#™ÆQÏpTŽê)q5{nKÏæzqGFpäƒÙ#‡päãJÏÕ8º½Ä«"¡Ä"”X„ÒÆbjÅ"”X„‹PbJ,‚9ë}Újå[/˹—‹PbJ,B‰EÄ"Öº¢UQ j4Z©ª£\•QŽ¢EƒÓâ´8Ÿ¦Å1´XŸ‘Fë”;-ÝÏš·ãžÃióI®8 ”·ÚØl•q‡XãaÜ!Œ;„q‡Ð[=zëlõEoséíyz›Ko½èm(ãaü!Œ?DÆnƼg5ãŒG8cgŒåŒqœÑƒ3ºqTCø:\µâ¨þUƒ£îå¨îÕÏÒ¡ê\EUŽl‹¾t`^¾B+„——àåϼS‹ÕÄ×ú møU¨Žyì/p9ÓZáš™Œó²:7?1ÈæÖ ú‚°Ú‡UíHœÜ|äNf%‹k^ßâÝįñëEüz1æ^Äo ñHürþpâ7øõ"~½ˆ_/â× ýöA‘½Ðo\L;1` Z¿žYÙ  ß÷£ßÈáéäðëfÏäñóäñtòx ª: Œ†&Ÿ?cNz1'½ŒÙÌúkj Œò öå:>ëHü¿–Ž÷b^z1/½ŒeÄxqM€¹‰à[ðHÂÃd`S0+ÑøçbÆôÃòO0v  )–Úˆç^"˜Pqä·9ò;9ÒtqÓSÓxç{˧/ÊœxõLùìv­Þˆž™®{ÆÒà f8Ÿ_ƒãîB“Ê× ¦6äü,ÎÏ`îbv;ƒ»Apè î÷nà~ðx<€î è zÞàQÐ<ú‚ÇA?Ð Á 0 CYëNgÿ˜6ϳÀó`6˜æ‚À‹à%0ŸŠcÁ?ð¤søÐ<è‚pó¢Ï•&àG©:ÿÉÇòñŸ|ü'ÿÉÇòñŸ|ü'ÿÉÇòY-üNmö;«…ßY-üN_@_@_@_@_@_@_@_@_@_`˜õóNð%Øvƒ8°|tzzz>wŸ;‡ÏÃmžÄE¢@¹’Ôµô©&ÌüÐRŒ0³Ì3»ÃÌ`fk2»³Û_ùI¶®KWc°SÕ&Ƀºä?yàK,.äA YeHîV“¿R†Ü’_ðëù<}-õø3¿$ÆåÕ¢ùîÞ-àÝ#â¦ûñÃo¨·ã©]yç8Ï äû•T¼Ü<þ$¯fqüv«µ¢Šï"Ìï~eæ+éÖ7´û²†W6è\á ֹŬq‹9ò]Ö·gYßžåŒ3Ògká`†ú”>Ý,o^o]›éËIdÔÚÞv›=àê¨ßPúKå ª˜C­¯jò¸ˆuA$í7¨ŠïõXÕD³²Ô£È(å £d”ƒŒrQ2ÊAF9È(å £d”ƒŒrQ2ÊAF9È(å £d”ƒŒrQ2ÊAF9È(å £d”ƒŒrQ2ÊAF9È&Ùäø/²iÙ”D&m§ªkGœÛPÕµ#ÖmˆeWbÙ•,rE²ÈA9È"Yä ‹FCý¥Ñ܃¶ h:€ 'èzƒGAðè †èYÆP0 <¡ÃÁ“ Œ#ÁS` Æè%ÆX0Œô(c"˜^a< Á"ð÷epœD'‘ÁIdðv2x;¼ú(R/‡ »aÀ16Á‹Dï"Ñ+"zE\ár®p9W¸œÑç3ú|FŸÏèóý&F¿‰ÑoRo½8¸÷âà^.Ü3?#‹ƒ{qp/îÅÑã zœ÷à^=£g*^ùLìÿî½B¸W÷ á^!Ü+„{…p¯î½ÂJÜ‹½{»õ‡d懑ƒqp0ÆÁÁ88ãà`Œƒƒqp0ÆÁÁ88ãà`Œc†f0C3˜¡p0&ÀÁ8˜à`L€ƒ p0Y|“Y|“Y|“Y|“YŒb£˜Å¨ë8X á`!,„ƒ…p°ÂÁB8X á`!,„ƒ…p°ÂÁB8X oÂÁX8 cñ€ô+˜UÞª¹Z­'«ÏäèvFgê³»Y3˜Ÿ|,‘O>po˜À! k”'T8u˜7g´QŸ£­;ôëê0ê}D7jÂàשbË?7‹À5[Ÿ›yËçf¦²»T(¥7Wñ¼¡~ƒ18XQWÑŸ*Wà¦ç© ÞkLÕÒ ´Ð_¨;AKÐZïTm@[½YµíA}HuäµN`(•Íh´{&€‰¼> LSx>vžfÿ ¾ò,˜‡¢¿ ^AÕé<¢² G8Á¨~U;þ=Ý×?úS£6¨ê‚z >h"A7½Ê˜@%8Lôa< žÏ‚éà90̳ôNãy0ÌsÁ àEð˜^óÁ­÷Puì¡êØCÕ±‡8ßI|C¬ÏoBŒtš± ,+À‡œ³|>«Àjð ø|Æ11` XKþ9X¾{¹Þ¯Á>°¸žqăǪ;Uwsz™uH$ß4¦&m¾™Ž§Z¨O•Rÿ¿Èð3R«mЩdu*Ùü=Ùü=wŠŒ;EÆ"›N‘M§È¦SdÓ)²)žlŠ'›âe5ö÷eÑ5µ¬÷ öUAMP D€º ?oHÑîÜëh¢°(Ìäês¸ú®>‡«Ïáês¸ú®>‡«Ïáês¸ú®þ+®þ0ú–@–åD`Xfe YhVš•fe YhVš•fe YhVš•fe YhVš•A—Á%Dp š•‡få¡YyhVš•‡få¡YyhVš•G”7å DyQÞ@”gå™Dy&Î!Â9D8‡çá"œC„sˆpÎ!Â9D8‡çá"œC„sˆpÎ!Â9Dø0>L„áÃèT:•€N%°~4•aµþ‰ ,A!ÜÀ"¥ÔjVf ÕZµNýƒ•ÝNÖq»ÔnµLÞ_ÊýaÞT´ÍÕÃ.‹ÜyFz½ÅvÐû•ëZsgåyP¦û÷Àû<û¶sÜ‚õР%h ËÛ€¶Ô²í@{`V°y­xBU—ÏLŸ£és&€‰¼? LSx>öžfÿ xÌ` 6ÌσÙ`˜ ^/‚—À<êÜ—Á|tjû…°6\§ÿ¯¹ÂxЭ£èÖQtë(ºuÝ:ŠnE·Ž¢[)èV º•‚n¥ [)èV º•‚n¥ [)èV º•‚n¥ S­Ð©è”:$Ÿ+} KЪ´ª­*A«RЪ´*­JA«RЪ´*­JA«JЪ´ª­*A«JЪ´ª>Úá£>Úá£>Úá£>Úá£>Úá£>Úá£>Úá£>Úá£>Úá£ÍKGóÒѼt4/ÍKGóÒѼt4/¾¦Ã×tøš_Óák!|-„¯…hàõ%îU„{á^©0é$L: “NVÜYU“ǵ@¨ êóZÐÞ6-XËß Z‚Ö  h Úö ëüŽì;'TwV†Õ»‰Û%°D–h¹]¢åvGaØQ–ÃÒ`X Kƒai0, †¥Á°4–ÃÒ`Ø1v †aÇqÇXf~ߟA6eµUb¹äI˜w ‡,Â!‹pÈ"²‡,Â!‹pÈ"²fž„™'aæI˜yfž„™'aæI˜y’ú£9õG3£‹ªeÜCýу×z‚^ 7xô¾`€.6‚A`0˜@•1L\3,N„ʼn°8'ÂâDXœ‹aq",N„ʼn°8'ÂâDXœ‹aq",N„ʼn°8w‚ŵaq€õYb½›8n",N„ʼn°8'ÂâDXœ‹¯sÜ£°ø(,> ‹Ââ4Xœ‹Ó`q,NƒÅi°8 §Áâ4Xœ‹Ó`q,NƒÅi°8 §Áâ4Xœ‹Áâc°ø,>‹Áâc°ø,>‹Ãâã°ø8,>‹óaq>,ÎÇÕ3põ \=F;Ñ×?}üm¼zÿÂõYF4Y¿7Ci[ÀØ;AKдmA;Дk¦ FÛ`ôC0ºŒ®£Óat:ŒN‡Ñé0Ú£m0Ú£Óa´ F§Ãèïaô÷0:FgÀè £3`tŒÎ€Ñ0:FgÀè`ô0: FŸ‚Ñæý«E0ºFÁæ Øœ›ø l¾½ÎvVM`ó°96ûüGÝ€– ƒÀ`0A¹ÃfwØì›m°Ù›m°Ù›m°Ù›m°Ù›m°Ù›m°Ù›m°Ù›m°Ù›m°Ù›m°Ù›ÛWbsuØ\6§ÃætØœ›Óa³ 6Û`³ 6Û`³ 6Û`³ 6Û`sº|{º¬¥Jû¬_€õ\Ï 6‚M`3ض‚m`;Øv‚/Á.°Ä=à+°—yøìûÁð 8âA`"ø|’˜—C ¤ 6p¤ê?Ô0Ø ›£aó£² ÒÓd-ÔBo…¥[aéVXKãai,ƒ¥q°4–ÆÃÒxVGä›îÑú ½C/ÁÐK04†ÆÃÐxz †n…¡—`è zv½l~Æ«¢aU4¬Š†UѰ*VEêhXÍ*b³½•ÙÞÊloe¶·2Û[™í­ÌöVf{+³ÏlÇ3ÛñÌv<³ÏlÇ3ÛñÌv<³ÏlÇ3ÛñÌv<+‡#¬ްr8ÂÊáˆ|£R¾:kü˜ùKÌü%fþ3‰™gæã™ùxf>ž™gæã™ùxf>ž™¿ÄÌ_bæ/1ó'˜ùÌü fþ„‘ʪΗè^"²—äÓæÕz>kÅ®¬Wš÷À½FЀ2óªÚª5Z3*¸fTzM䛤HªçòÏJ¶QIVMUKªé•TwAj(ÕÕ|ó3]tf¡¬öQ-ï£Z>Hµ|ЈÀCjƒ: .¨êƒ tãú‡PÕÃÀªÏ±`&è•T»+©vW“©X¦€© šLY þ^•ØÆ5G2‚† ±Îe¤{åϬë‡è}ô´žöÑò^ZÞKË{‰“·Ú£¾VûÔuP%¨eS‡ÕeøH-¢:àñ‹]÷¸?ÌÖÛ}êàæÑÆãŸÎ>|b®ÝnòíŒ?#ŠfDŸ3¢db˜Ç¨Ö3šhFÍh¢¹î<®;ëÎãºóÝzF·žÑ­7|É–ódËy²å¬rç =ÐLOàÅco´ÓǼjàªòº?ûfªû@Äz:˜}uÂë¡ìÃ@8¨jrN-ÐY…Îâ!i ÅG ð‘®`$W0—uU*ëªT®dW²]5e\Íx½¹Ü–Dö&‘½IÖ]ÞÉdp2œL'“ÁÉð0‘ N&ƒ“¯»Ãº@E`$Ü{ Üþëd²<™,O®t‡u’u‡u™ž†ãEÅxQ1^TŒãEÅxQ1^TŒãEÅxÑ)¼èLNÇ‹Šñ";l.‘Ï®ÞaM¼ð¢S¨ÆyTã<ªqÕ8jœG5ΣçQóxQ^”†¥áEiFÎkÊyÍØ7-À %hZƒ6 -ǶíAБ×:»@gp7èî]Á½à>ÐMÿhÜÏþð x< ÝAÚê zÞàQÐ<ú‚Ç9¦èpÁ 0 Ñ#aèH:’µj*kÕTÖª©¬USY«¦²VMe­šÊZ5•µj*,Þ‹·Ãâí°x;,ž ‹çÂâ¹ÆdkLSåþ±$T5 UMBU“PÕ$T5 UMBU“PÕdT5UMFU“QÕdT5UMFU“QÕdT5UMFU“×än¨ã °¼ Þoƒ¥àåöî OFe“QÙdT6•MFe“QÙdT6ùº»ÂÓPÙ4T6 •MÃ_‹ñ×büµ-Æ_‹ñ×büµ-Æ_‹ñ×büµ-Æ_‹ñ×büµ-Æ_‹ñ×büõþz =…¿žÂ_Oᯧð×Søë)üµ-Æ_‹ñ×b¯Å+AñJ®û ˆœ‘ïîÂi;ƒ»Apè î÷nà~ðx<€î è zÞàQÐ<ú‚ÇA?Ð Á 0 ÓÁs`˜ fçÁl0Ì/€ÁKà)[•ß¿”Š[}¬ÖéÔ`½þ%OQ±ì7²ß„m–ûc¿V»ä[¬CêÔ žuUUi ë%H%CüõFÁ :¡à°,ëÁ 6‚M`3ض‚m`;Øv‚/Á.°Ä=à+PiF¬;Iþ›{&‚pŽpåiÖ==­uΛ7½g¢–zRõSQ üûÈ¡rgƒéA³U e6^ó8c¾ªf¼¦úáM‡éÅô§×ée½l¦—8zÙK/ eÔ R¬2ï÷S㽬÷s|²ªO¯ßænÿ[½cÝabÞSX‡þ¦ôסüžBª€Tê‰ ðöîfÇÝì8™g²ãLvœÉŽ#Ùq¡a¸Ð0\è,.tÖúto 4÷¹€û\À}.à>¥¸O>î“û”à>%¸O Γóä“YäF–õû¢,r#Ëú}Q¹‘End‘YäF¹‘End‘YäF¹‘End‘YäF¹‘End‘YäF¹‘End‘YäF–õû¢,ë÷EYòû¢aŒï `þ*ëI%¿Î*U#ÁS` Ì;3Æ‚q`<˜&2þI`2˜Âó©`×ü´Üû˜eý¦(“œË$ç2ɹLr.“œË$ç2ɹLr.“œËÄùì8ŸçÛŒóÀõ6“{È¿òï49·€œ{•œ[@νÊÌnS»õÏjHÐo‘cï‘cï©£´q ¤êe8f)ŽYJÎe‘sYä\9—EÎe‘sYä\NjÇ)í8¥§´ã”vÐŽÚq@;hǹì8—ç²ã\vœËŽsÙq.;ÎedžáNÃp§a¸ÓYÜé,îtw:‹;ÅÎâNgq§³¸ÓYÜi î´wÚ‚;mÁ&âNq§‰8ÒéŽtGº€#]À‘.àHp¤ 8R>Ž”#åãHù8R>Ž”#åãHù8R>Ž”#åãHæ= YhJš’…;•âN¥¸S)îTŠ;•âN¥¸S)îTj¼Þï÷Á`XV€io%ø| VÕàð)øŒcbÀ°^g¢_™èW&ú•‰~e¢_™èW&ú•‰~e¢_™èW&ú•‰~e¢_™èW&ú•‰~e¢_™¸‘7²ãFvÜÈŽÙq#;ndÇì¸ÑÜènt7:€mÆ6ãF›q ÒÛÜûb~÷ºŒÞEî±¾{Ýx“ï^—1·Ë˜Ûe7ùîu#ó¶‘yû;5j*´6ªÞ†ŸáoAF°QÝ1B0#ܨiDµ:F]£žQßhÀÚÅp&kƒîj¥µíc³_»žÖV[¶HÐÁè.‡ŒÅÆ6¶ÚÖöóµ›K€lwZ[wÙJ¯ßªDZÛ,ssmîÚüoÿ•éíõz»ß‡ºÊߪ…úFÝ©âY9^}¥>¯4╯xWÜ£}íoÜ<®>ã˜ûÔ~féöñêžÿë¿€ûï~{öÿÏï‚þú/þþßüÿp÷¿ cCcŠùo1ø4¡’Õe#@]þs3š}Yàãë¶$—0—i.ó­í-¶÷Q‰‘U2]ç»îp½â6w§¹-±¶×lgÜÝÝÛ¹r_äã~d±9=\<|=&y|ëÙÁs¤g¬gºçÏž¹^]½Vx{{·òžå½ïºíGkûÍÜ|ÍíoûõÃÕw~¸áë¡nýBáGe[¿P¸ñ¨›ÿŽáú£Ò¬£N\sÔ_ÿ­E0•¿•¿•¿WïGåïgUþ~Tþ~Tþ~TþæýßÓ‰FÑèD4î =©øý¨øý¨øýäþæTÕEU§ÅZ  ÅZ  Å«ÅZ  ÅZ  Å™´X‹ï¶*î>´@‹´hVÞuu¨e*šzãçV7»w8œ¾½èÛ‹¾Í;ž½èÛ‹¾½èÛ‹¾½ÔÖ2©û»T34»Ø– l©R8ßRñÍ|ûÉå>ŒÉ‹1y1&/Æ.½øÓ‹?½ø‹zŒcÀX0,VíU ³°ž#70ÖXöÙïTu”y—nŠºƒžî0†žüéÉßêÉŸžüéÉ_ÕfåäÎÊÉ s7ïÀgå䮢9{1-­Qa´îGëAʼ_g#ûª.×ÔˆkjÀ5Ý!–À|¤HE^SîDÑ–;‘t'þîÒóÛì—‚wè£ £õæú¼¹>o¹‡çIœ- Œ#U# áš½¹fo®Ù›köfd~Œ¬)#kÆÈš0ª&Œ¨# µœ1”…1¢VŒ¨#ª§Ì;ɵ¢ª¯¡¨¯ƒ7ÀPþû.KiCYññ&>ÞÄÇ›\\Ì5¯Q é-€ÞèÍ—Þ|¬)#Þé-ˆ^B…ŒÝ¼óÛ“3=83”3}ˆ`5Îö!‚Õ¬;]#­;]ÒB=iáÏ;]ÛJK×Ý¿bÝ«Rq¯~´Œ¯ ,¨FOžôÔ‘žüé©#=ùÓS#zò¦'ù¥N |´ó^Ú«÷ù—3¢â>þÛÜsÿ×~5òßßG^ùȃ7=Ò熻Åw¯]\Ñ~2ôRÉfóÎñ›í[~o±þmÿ‹¶ÿÅÑÕäè¿~'ú9ÞÉå_xçëNôœÿpŸúÕw³­w³¬»}ËϽõ§.–'„ËoN<åó“Ðë>? …iÞ·ü åuõ"mÔãÌn°»5ìpÍg'1¨ë:ê›hßÃ7|:3ï?ü¢åCZŸnýºçqë×=ïÜâÓ™Vpyx §Í'Uc2½±õiM+ëw(VoôAoôAÆ6æZ ×Æô:‰^_¢×Yôú¢õKž1ôA¯ô!:nêI3:Ö݃½9ëiΚÄY“9ë1Îz„#›UüÚÆŸñø[ã ±îf÷¯ôk›;¬ß§…i´0ºÒ¯m<ä×6æ/‰:p¶/g 区Ö/‰Ìø ¶~Idþ†)X~Ãdί9«Õ­ÏÆL? $FÄÈS ñ´âH<‰G ñ¤õç­ïËî¡åær5ïòú{à}£0&o“9§A´D«A´ú ­Y­Ñj­ÑjWÀ˜—ÐòHë×8[ßÅ.¢õ Z¢õ æÔŸ–ÛÉœ†Ór¸uæHbI #­6·æ4Üê-œÞÂé-œÞ‰m$×q¯Û±ô:ƒ^§ÓësâÏï0Šw9î=ð>HUwWüê«gô±æs2gL±æó~Žj„" W‹7cñ¶ÆRÍšO“ QVŸ‘´ð8-DÑÂXZf~ógú.­t•VºÜ¦•‘V+h¥'­  •(Z,wbÄ0Ï& oý Àíð?ùµY1O5Ë¿^ÖF*fó›Ï¿ø;?ùU ‹û ó<÷-[àM#õ7ÿ§Òu®Î“Ç¿¢õŠQ)ý»>JLÌ×òô%}AÛu¡¾ ~Ðú¬>­³ô/<+¸®-'GæêŸåq‘0÷çÀeªš}åêŸ*ŽÎ”ţ߮kë8m•ârŠ¿iù7'£ù—vXïÒÅšXÓ[6¿c|yºˆGTºfÏ­ý¨Ó˜MóÑIæÐÜç[¯ÿ,}b¾¿\ƒ´oWV~5<ʪhëmý=ŒPú¼>¢·É+™òwsh¶˜ÅºÏi?°J/Qz^ÃU|®ê‡ä˜ò1äðÚ9ÜÈ|œ-±ÿCŸ)qþåêh¸â"bpQÿ“¸3?E7Ìä9æÏnµë¨ôzŽüµ#óZÍö.â{ò:ñ/àÙÅÚ:ÃŒ9ÊgÉœ§Š×³åo1Q2[(aL%ú£É ï_õÏŒ²€«;{M[Gô>ý™<ú^–ý>ù{ˆ•¶ÒŸêí¬{+_ZñÈiíÉßÃz8ÜÛaÅð;z/§ë׈ô)ý•¾_Òsõ®ò(Z‘üýj›åÏycñ3M"QVÁ¯K¯b+g—Xü²ñÊ!á×9þ&Ñ}±>Èh~®g~å+°z>)sšTñjªNaþF3z-Q/¯–±¾\‡ÓŠöï:(·æ,—Ú¤¼%“ÛßˈÏÓ[û å0ÇjõPRñè´µÿ…öO¡*J8WX1–ŸÍëfŒ±”Vðë-ÿ“þÓ9ï÷«ó_éºè÷ôXyô±(û7äoc3³ô=Iß!£XZê…ºƒ Çëþ¼ÖS?§g²Ÿ¦çXmeÑ Ö5ž¯ÔÇiëJþ09ÄŽ€Ãs‚±ž#žŒí;*ÓÊãšÉ»é™4‹gûåï‹ÂÑt4áyyþxN¯Ð¯èqúk\éÕŒo*ûXýÕÖ ²7O¢‘­¿.›üýDr;‘|X~X ¿Ðóè=…g;¹îw„—yV[?è8½ÆŠs9ïS,楷²*/g`Ùu‘¾`í+®SG2Æ%n›õye¾üí`Ž\Du–çÄWwæÊïÑ=ôûz.ÏÆë:J¢ô‘ÕVzwÐâÄÉJQÏ4µåLÑ©åYÿ,+½ª$ã\¨ìÝX1)VKÍqÖ†ÆTV.¬ÝØ\qYwÌìãñ;oŽöÁù\Y'šÿr_U6w*;ó÷‚Alž¬|‚9®:õ¬7ëļ^“jÆ\mEpVm6_zªË+õØüT}Ö·U©kÒJ#z nmL5Є-Q4¥ÕflÁŒ®9SC£1Õ]£©ªb43ðb£­Ñ5oGã^*ÅûŒnÔ}÷÷³ÊÀxjâ!£;¯÷0ú²{Üè¯üŒÆ`êò!kã c4•Ùc<•ÝcÕãd®¿:×:Äü·M¿…Zrýrµr¡\UÍ+ kðkð‘k0ä\äüäªË5T•khÀ ±%­·’õgkê‰`VÐí¹þŽTóÁT¯Cèm(›YšëÒ'å>ÉQ²>ÍV—ªÏü7Æ© <ž¨&sÖ¶ºTæºušùëõ4[]jÒgëtõmÎP/qMóÔËÌÐ|¶@µ€-H½Â¡²¹©ElµÕ:õWµž­+Þ Ìe,[5V½y¼Imfî¶°Eª­l Õ6¶Hµ­¡ÚÍæ«ö°ùÊ}NîÔ¥ò/È¿]ào0CAF(Ãpæ)ˆÞ5f&̹ 0º1‹¡2g2[2[>2[†Ì–ŸÌVU™­Æ4c&l˜e,`þ^1Ñr´ñªY˽Žï–¯¿å_DøÀXÁÊáCãSŽüÌXÃãµÆª¦±Þø &í5âO‚ñ•x’‘¬ÜŒj¾ÚF*õ[]åâ`Vpn£Üwïºêý~õµ~W¡¢Ú¡÷ã÷‡pÆçQâ\¿¯IG«JPq|…•ìõçÿ„Î÷À‰¾i¢keœ3_·7]Ïôp©Òñ…Ô&Î]}.•ƒS_1ý•0+;ûj’[Œ?mlÍø>¡ êL*‡RTtý—×/TD(½ój- ¯Ü¤‘z™Šåô²Þ„7jé1Tø'¼³¯Ë1«<^ûí5"s–‘_` feUˆ§÷ºÅx¥jý³2ë^bkÖ —åùE³B¾úì/\ÿ"ð’jJÿÓzíW ßâø<㘠ëù?©bïb¿Q‘ê UÖõÊ[cEcŽ™ùúªÚ:¿„¾Z‚©¾âº¦‡Óxc#\¯”ȧ0??3£Ì½:ÂëÚøX?K¿Å}Št¢T£”º¾–Ô˜åÓÔQÿÄ_Nãb¥2ÞªžË<+ã5;óRÄ»ïëŽfÍoWÑÎ×ÀÕžÓ/°O¤ 3†¿ŠÏÇè¾·ˆŸ¹zÉ4{µžSûS‰9õn™y“¿f]ñ¾nsóóoÚæ‡ä[¡pô Z9F>ü .çÆ5ÇíÐ{˜í³Ö(w±î+ý½”Úæ™ÅËÌg)ØçFédêŸ 2b³J;,=šùù®ðH˜/¯Ç¹¿ÅXäêOÀöÌJ¯­Ô3Ì:^Æ_HߥDðe­‹*·™1ÿ“Uqùza»9~©=Íjê„•ŸæJîÇu¿Eÿ…\i‘¹.“§.x[K|Ká[ð¿Á8SC<éYm.ÒU´þ1´>H=n~Ï­ú‰â@©ªhô»j4Š|X=+ »ž;ÑbKTÖôFC¼ÑE¼±Šx£+ÞxÕYÝÍ»]ØÜÔ=ª+zq¯êÇú3OÆ1„‘™þé.þé%þé{Žæ]Ó3½ðÌIøØd6W|s*­™žYU<ÓG<Ó—k˜E;ϳ¹©Ùj-ÌeóS/°¹©ñRñÒªâ¥þâ¥â¥ÕÄKÅKƒ$ ‰‚ŸD¡*Q˜C%3×rŵò®éZîâZâZžâW^âWÞâW>âW¾øÕډŵ<ĵªŠkˆkŠkIL½ˆ_/©d¤’ó’JÆ`ïÌzÆêÍKªW|­ÍÚÆ  ¨Ãc³Â1¤Vó’:Ç]ê?©s|¤Îñ•:ÇMêO©sªHã-uN ™Åp™ÅZ2‹áRáË ùKõ.ѯ!q¯#µJ°Ô*!Ù ‰lu‰l D¶ªD6@ª”ÚR¥xH•â"UЇT).R¥„J•RSꓚR™DHe!•I5™›`©F‚0#̪F¤1¨2ÛðŠY“Ô—÷ñجL\©/»óجO ©)½¤Jq—*ÅOª©RܤJ©"UŠ·T)5¤J —*%XæÛ_f7\济Ô!!2—Õe.«Ê\p5íäS§Îê~©Ão–?±H«äÝæ¿Úÿ©s[Ǽ™Õ¬YÉ›ð`3ë{žsõ_ê¿ÝUØ\\<͹DºŒ‘Ú÷oûý»Ä ý7=Ç[YÉuÙÚàu¦î>ËësP¨×ÙwüK­9Ê뜊ç{nrÌ[z >rHwŹféx´~ü_ká5Ï~-×sþÚôlÙ¸ÍÑ•æÏQIw¥Æ>/›&+ºÕ(ïÉŠcÊ?å³ÖÛ¾~µ÷«¯êÀ‹àV¯›z‡Æ{äóù|Ь€þ!ÕU¢îG%ÚÕ:û°| ºÞ6æç È73Û¶°ØÆ`7fæ—‹ùïϰ’‘†ã"YEÖ ®’—UdÝà*Ùé&Ùéf­\TÑ.7ÙʵËWTËWôÊ_”ª|=é+êT_Ô©–¨S„¨SQ§Q§z¢Na¢NuE¼EÎ9ÿóøþΚ8ÈU¢þ¡$-¹üu¶³AãÔj¬Ï[rdp'mõ‘¨n£aâÚ•íÎNç.}¯y_gÎõ^D#-¾«ÑZw·Z§ÚÆúl‚â¤8¤~ÊWDZhzM6Ng¸DV7½Á÷yL•§‰ÿ*\f–gªñ YV•?ó•Uî(A^–Ý)îºÓÜsgìm°îôÎ]°÷¸KöÁ]¶îŠ}rW¹jaóþúS^‹[^‹ÛžyÁs.†œö2ä´¥Ó^…œö:ä´7!§ÿmz¡Þœrxœc`a1gœÀÀÊÀÀZÁ*ÂÀÀ(¡™w1D2©100ñ³211±031/``XÀ àÅ%•> ¿YؼþU1N`߯¸Ja2HŽù+ëL ¥ÀÀ QÙÈxœÕ”‡sU…¿³ ‚tЄ°Y ”PB/¢ ¢‚‚^‚tB¯”¢Òˆ(   `A%tBB#м‹XÀ’Ê»ÁððÎ6÷¤^ì&i²æ;1N3'ÛÙçœ [¶6lKØŽ¸in·‚íÆ¹ž›à&¹-ݶîJ7Ý]_>Þ‹Oˆ¯ç9^„WÚ+çUô¢½8/ÑkçõñTß1ÿRx~¾Ÿ—’vŸËÌ=ÊܳÌýÄU÷)æ^Þrc]×woaîi×q/kî•®º÷÷Ý1w©dhƒü!ðwm¸œmŠÌ;Ÿw:ïk®¾ØàÔàI›—™ºrƒ¹ëƒé¹™ßr³g- L9´À,8™T¬4Wéª ÷£¦ã* Çp‚Îi猿>S r¢@…«cþ|ôOcù åÿrxŸ=–ûC–î –éMÆÂ<£ãœÂUĈ8l|d°DLg£Šª˜Š[ú+[f«XÚC1ź–âæ)&ÒÈlï'¹ƒ:ò±êáç9Yƒ©Âãu¤;Ú˜o<54b'úÌÎ2jYšµ‹ÛTcv©”bÜf„ÈUÃ&OcUUã§žªk‚ô˜jê ÕÐãªEcÕ¦‰’Œ‡š¢†šÊ 4W==­Fš¦Æš®&š¡¦z^ÍèAzêfÍÕ-zQ·jåè­Ö$«fê6½¤Û5Ÿ~ºC/«­pe™½‘þºS¯¨é.-Ô¢¬ûôºî×z@ËB= µ\+Mi…ÑJuÒ*ª2ZµZݵV]Ou&¨·*©¶éQmW_íP?í¤“5P™¢l¥h¨öi¸h˜ö3V]”®nZcÝ0§°R­%ª¿vi€vköøêŸu~­·ÎË´¶ 5P—­Ó¾µ{½à·\+¿“Úaò6²tfxœc`À›€°›¡›u&ë.æ¯ ÿV²oûƺòÿÖ],Ìÿ?Aø,'”xœ­VùsÓF–|$NBŽ’ƒõX±qšÚ+“RL’eÜùZ J+ÅNzÐ2Ãßà¿æÉ´3ô7þ´~oe›@’v†i&£÷íî§}÷“ÉP‚Œ½À…h=3f¶Z4¶s/ Ë­†Ñ¡èí”)ÆŒ‚ÑéÈ}˶ÉÉðd½o˜†¹™ŠDtèPF‰® çmÊ­Ü믚“žßñ·ï¶´­^ ¨ÝlÚ -AUFÕ0IJŠ»´Š­ÁJП¯1óy;0¢ šlvŸM2Zg´YQ†™å0”d´ƒƒ0t(«îÉc”÷Úå¥KcÒ…ù!™‘C9%a—è&ù}WðIªœŸ8÷;”-ÙØ÷DOôpw²–/­­ j[ñvȧ›;Ž,vj Ù¡¼¢q¯Ü72ihư”®Dˆ¥SfÿÌî§|É¡q%ØÈ)¯ó,gì ¾6£)Q]YPýñ)ÃóÝ’= ö„z9ø“é-f&xð8~OÆœ)Ãâh’°`äÐJÊe\OULò:-ã-ÃzáÚÑ—Î(íPj2ë¶%í°d;4­’LƧn\whF(ñîòëÒ išWÛXMcåÐ,®™Ó!ˆ@ziÆ‹D/4ƒ 94§Z»A’ëÖÃeš>OzCµ¶‚ÖNºiÙØŸ×ûgUbÌz{A2;‹üÅ.Í–¹HQºnr†Óx¹„Ld‹í áàÁ[·‡ô²Ú’-ñÚ[é9¿‚Úçž4a»/§ê”&†1/-Œ¾iš:WóÊHŒŒ¿Ьt…OS(ÊI(Ž\Aý_ss¦1c¸n/JÎŽ•éqÙº€0-À·ù²C‹*1Y.!Î,Ï©$ËòM•äX¾¥’<Ëó*ci©dœåÛ*)°|G%,?P¢Bæ‡JÀ6sì0‡Á.sì1ç6ÀÌað%sÌa2ǸÇ÷™Ãà+æ0xÀœÀ×Ìað sDÌa3ÇØgƒst™Ãà@ÑõQ˜yA@ßjt è;]OXlbñ½¢#ö¼Ðì5böO1õgEµõ^hê¯1õ7˜úPÑÍõ/4õw˜ú‡FL}¬žNä2ÃOn™ ”]n?~Sœ+qMcÿÿxœ¼½x[×Ù8®sAÌhË’lI–dز$33CÌ”˜’ØqÀ'q˜±¡†›4m’6IÛP¹IqÐvÐn_»ß¶Â ëºuÝ éJ±uý?ç^Ù±ú}{þíKºº:/œ÷¼tÞó^Á²³Xøy˜eayX‰¬TK‘†»ã ˜J)&$Àl³¦žx«Ù$ÆÌ&›"Þø~²£Ìèƒÿ®'æ”;ÒgFì^—™-Z»ÏV˜™¡ ;Ò\ê½L¦P»Ò¨ð7†ãâðŽácÄbµU/3g4%ø*’£­ÍìÅ&wt„#)Â"WËÕÃCÎ ™!R£‰0ÈÒ‰S×gzÉ6ò"‹E8sYj à`@”l¸ƒhÜxÇzRLJ BêsShŒŠÓ…Y9<®ˆÅBø½Àb±ß†ø‘¬ ˆŸŒFNfä™ ^FÞ*6ȨýX=Æ’Sr,JéO¢¸¿Ø‚öàÍøsrößázÿ½Ø¿)Ý>JrlgB°pV;äo„Ág©XQ,–’‡dÀˆ+pwà½/  ¥Á¨À *––G¬^Gý=mGêß+—UF¬X'›÷À¬8Ká¼¢µkó7½¼ŒŠ¼xoTÙÜܶIôS¢èÒ9™3gî³W¦/n/Ñèw/è8½4 B¬ö‘¯ˆïȳ,' ŸpÇ©CéIp@ðro8C ‡¹€„S¥4`Øé¬î2_¤NÀ™­ßž\ûëÅ‹~ äÇKVtW‡gp•yDn_Ýâ×ï­IšyÏu˜-LÍ!+»4Ÿþdëàý`P—]ÆE8#Zûî𼋃Yê(K‹E:ð[ÂwßÀS|Òâ£×Œ;Î ùfê?»:çÖXÕŒ#¡öýÝA%¹Æ”*—5+$ˆ#g‹äZEDT~¦˜×qòÝu·Àx”¬ÖyÜ4Yº#<)B%ûäǪ7éyP†Ðü’iËBV0$O‰± 8Fˆ$'RcÄp’É´¯Qÿ~’__nx{ÅËÞ™÷¶P¿ÝÜ¿¼§}šzëKÈÃ'ÿºcÎùe™Cšù-†c±¥,ã¨ä0crŒ8 …†gö¹Øü¥¿¸§Ì×NÍ"êT:x+_ð*õÅ”¬êqÑã•áÀGŠ;×?Lbâ8vt.¶›™(¯HjhÅ"@ðŸÂÈW’Á‚b§b‹Ão^Ä] ’Eé÷QâC´ £y=GÿÎrãwãtèU?y¸ì «XXìþuãŒ[ºY•= 7€/o®™> Rú?+¹;ä?€~@¯2ÊŸÞ—8~Ó*°9ðqBˆùm^Žæx®¾§jÝÛ*þhNOÛ<¯Á’"‰T«=%=Ń¿ÝW™<çPK|cEAxÄkr%†÷¿oK²)ZþǶgûý庸œˆ2_,Š´„´žÿîà‚'Ve U¡J£’°FyOæòÒÿ¹q£‚Ì»tµÂŸ[qõâÕrìJ9ReØ=þ~ò°%¶ÉIüÒ³8Kƒ¨â-æÀÏ‘d !ܸ›èöç°+`á¡5Ìh«w‹ö˜¯š±+æçÍ ñ? ]CÃâù „úúÀ ü–›e„èÃÕ W‘v‡—G¾§ÎÑßéáwSÏ5"€)'Øyáê…)§õúL4ö{#_a‡c#ÍŒf ®ý4À,~4)nl¤xàþo‹S„Ë•‚`c´>¾¹Ð-Xr¼;ðôÊ ±¸O¢&9¤©lU3þÊ þª|áP^Hɰ¡ÞÕÐ8 ÆGL«²ÌvøJå0Üïÿy„5È"°ÔIDteås®à&… ‘ûïuÈøF¿A¥ÀR‚Uþ¹4shXx7m[Xš¼[á—\¼rìøqòðIßÃ…÷l„÷Ș{8>o:€wZ8l‰ãáý9ÏK1íéú+¥`ž„ ¦Î¥?ŽÅ´­ Öboúw¼´2Tð¥fô¾€|™%b)àÐqjÚ  ð52G. Õ¤´¬gP!_–ðù†ÄÖBµä\?€;{âefä0 ø; A‰ø§Ö@¥¬A_ÀIa\:¹ç±ý¿×:BÔFõÀ°µ§/n V‹A‡Oò•B,^¦ FsK˜•.íð`´Z…o r*‡Þ!‹ÃÊ•*B‰oT Ç­ \­ˆNä?€Ñ€f«…X/†ck™œ(Pš¥C…Ê¡g5QrB˜ê™KbY¿Ç ”[ÈùÉ@Ü8tŒxîÚe•ö<›Ž« -¦’fÇØêS§kDXÀñU§Î9¢*7µÅËäÅj‘dö‘¤±+‡¾#A^ vªâú4G#_“; îQ°l£¶b¼ö¡-Æ8ÕCîXð:4ÞÚ_Þÿ+ x`Ã[÷–¿”0sË’_ì*÷ͺ·…¶qOSo¼?Øúð§[Ÿh;ó÷}—Vä´]üñHßÅÙ­~¤ýÇÈ>òyòÏP·hXÉ~¸N(õ TzR›9C Âè©5¾4®–Ÿ¹ \³©À½®a‰oÉ-¤Qq*»/89¤ø­Ò*ãYrr…Z:ý]« br¤~_R£#á6ãx*~eÚhìãœ[õÁ€Wó4 ÉÞù"¨+h¹î¹Æùšæ™kœ piÌBùÆÜz>5Ð`úž\_àl\WÝ÷ÔúBWãz<%ü £8)Ø|ÅðgºÆï,¹º>oÖ3C{þ19 ×È·¶?`犜Ž'©£½­È&î¯ W5Ÿüëæg÷£U÷®øaf½ ´ÝAëÁ jpæhE‚eÔ¬«w˜rfåþ®ÄôŸéóí|sãﲞ»€oY[’ÔœiöŸÁ>÷vî¨]ðü–Rÿ§äèØ‰´Ï86¶uòÐn CƒoÙñ»B8ôÃs½E÷¼±ñw&¨šŒà4|Ñzÿ#Ø?¼Ûk<¿µ„ú’ôd|Fb)Õxü¹û¨z9! †‡bøsïxï1x¯dœÿ‡\LÔŸx@¤ˆúcósMÔ["`§­ †©º¹þìosÁÙcÇÐ[F¾$ÍpŒ`Fà ÕÏ8†èC‹*7ug z¯ôJ:ï{cpù³Kœ’ž+½]ÑÀ¾´¾CøìჃo­¯?úÖ |¨ñÐÜ4z\ê4=nÂMŒs¦ö‘x ãÙæ^qùà©ö)œ¦â”¥O-tŒ‚®d"콺¿K9Ù™JîŽÄŽþ~ù(F ¦¯>h_ ^r FúŒÂ¼aÎÉÂYêÀ`=þWLÂà|Ƶ!ÓJÖ*†Ya¥QQ¥aK¦ƒc­d±8/À±¢è±1>|3qÐd0çÿg_ÁH袰)Ç™í 6`G1¨àQnxPt°ž|œdã<9ì##iàD¹ÔªTZ¥C—ÈÃÁÊa¾65̘¢Å¿“éèOIaaIô'Æ7½ åâß/R_l@&F-<öX~F©fSïqT…(Tþ–H]<«”s‹ä³ÁJò2NAcß/ûSJ1?vŸXìÿ„'çƒï$bF{!¼Ax Ægb˜ª#;‘%ŸÚt:XÇù·B ÔËÀ 1è(–êÿ–Œ·Â`å•D·Ö£ÇÒd!4þŸÂñÖäq94Z Ë0wx?ö‘rØÄÅ„\¼ÏïU௒-bµhhΡd±ZHìGv$näþCÿ2ŽB›yB¥¤Ål,ò¡Ãl ò…<Ø×êhUÊüöŠÄ°#±­5ºàðê–™¾”ö\«½zÑ®S™ƒ)ËšÍÁø2·åÍX´>÷ëÔ¾ý Ô¿3fF8›6Ö6>rhu‹[¥Ž ×";ã¦ÎáÒðó&ÃWLµàuèoø&¡†]›„VýK#(&9\&TLð$lýO^'ÆT_œŠ«Wüöf2 ß¤€ñâR‚ ýj-k:‹ðö¨ƒ9i8Ô´ Æ—¤ÿ"*Ò7åƒÞã0j˜¶fl%3MÖ»ô¡™ö²Ìø–âxx'¥½0N¸îlGDñ÷îÝó2f°…5töÄnܱûŸp%% )5é.µ\ÍÕÔ]šÕ·Lòؼ”îâ(ßÖ^Ëç¸J’íÖP)_Êá)Uq¹–¢yEà{õ’BN¸;#<1Çï‹/‰ êïO«KXš¿¶#•LYºÌi”‹}•Ók¦µZ²܉sj|8_!8zT©7ë•<.šÏd(O¿‡1†œ3êAŽ&49tªEãœG7À­½Þøæ’ä(UM™«½¹\o´ÖMŸå]·Û”Þз&¯r]´˜Úzùü_<ö˜â®…ƒ)+Rz÷ÕiZg¾íê“U§ö¯móæ¤gjµ.ZgI‡P¦c‚BÛ´›3Ht"1.¾µµ+©~s“³tÛ‹ º¶¸828$&Ó»¼Ù£K¨ôf7¥E«’§Kìò”¾Ã̓ŸŸ•Ÿ“Í凯»ÜÖ•o‹pFÈøÐ#øù“m16&Ìøm,2>f#oFŒ³¶¦!nj;œP´ Ô ÷Ü„ 7¾À®ÁÓ§0º«ùÙ½÷Lcc|@ßÈ5‚ y*EšûFÀH6-ȾQy¦U³­½¾òU ®šrW{S¹˜Ÿl1)¬½>z>ç®Î+ŒÎŃ¼Ñ­ûŸiX™Ò»·¿x==\û‡¬ž’È矨:u`m«7Ù‹9XtLq EtÌj`%LŒZwƒñu ¦5Å´Pw‡ß„ ×_wwÈ2üZ@óË}3¿Ì±£'e·ÓÛçæÀ@ÒÚëÚš;ÇáV¼ñ‰ÙEË£ G,Á$÷Ï!çC6àìøèú—ha3‡¶Œ¡µâŸOÏKô|æx÷]„WÂÈ7øëähw2ÞËüΆF ZÐ@slVîÂ7ŽÂxŸF mã/2çÕ§Iù¶”i ÅUѦÔj°±²5oËk몷u&ä®"Ï£L3À™%Hˇۇ»†Ã6!©’@× lïr ¢­¹›:å*Œ·[(ÈìX]”/#æèÂWà—á3†_z3ÜFÔ‰H¥ª ½ns‹KÙ¬ NhÌlYyY5Âf·’`%³Ê ´) ³¼0ó͸¨5n|¢íÁ?¥U9U‘ÒQää¾—\›é€8çmíPª1î>?gÁÑö™OçHM6PØn¶oGíO“ÇQùcã¿!¯°"FóSÎИ¨ý˜oÙ…jUÿvB›#(Zv¶Wn³™ErL¦ª^>-–‡4ŠOg´"3 -Gæ¦â$‡l„è`ás‹Y¬‘´ð‘O²LÈÄ>Òf°Åèñ¼Fï¥O…s‚Ù z@/A™{èˆP]ÙÃp¡º_¢<¾F%b:MF1t[çò¼šóu Mÿç‡8¿Œp®!œWW¦³©”Tª³(U6 X¤z‹JeEW¬*•E/õcw¦–ä©ÂC¤Òp•Úª“ÉtVõ¤ÏÔ+·¦>@ïÅqôîFô®Ÿw}/x€‘-è? ÙÊb±È‰;d·—2 âȘç€7Œm åDÜäq /Ô7¼†·ÆvÚî$ŽžáÐ^¨~†4 ¼³Þ•ÄÝFÀFu© ò,wt?ô')æ©-6èy´‡Z1¦gîlŽ3Ì”EØÌBÍÌӧ䕼sø>øï*¹õvÆa"Cwï÷¬•¶‹– f}㯮oö5x=Uþ©ÇuD«bÒÒ¥™jÖi?giµ*J"ÀtAEb)¤ú«ÉªáÂMä ]¹ØÇ+ †r/vâÕì:cæu†üï¹E¾%£•­Nêdò•ïÿ ¼³ºÉý_p4 OŸãánüÝ19û÷¸ë{ñGõ½¦Úh)»Õ®ýÿi©MµßŸcþ/.À)êþ×k’Ñk'híýŸVÞe¥ý¯V=¯÷⿆óšÀì50›WÚÙöÝÑÂà¿^˵—ÌÎ6ÇÉq¾D#ËË,¤Ö0^æVÿ[ü£¡·Sg—Ùùüé<Š*ð Ì<´ÒSX×}´ NCvd ºþ·›hpn-3ý¼žlsìxòVÞµ0âÝ,7Ã)=㩾xgéó1ÙÅÐ×Μ2Φ€Û•¦ÜÀÎ[E’ƒJ+â0Íqj™!Tªá×aµžGQB­Î”|ñ¡¨šÈÑX†êSEÃÇ2P'SP Íra¨’J”à~•ˆÒ݆Çèáî…ô´Ž›ËdpW4Ü]8ÁÀqäõi÷Îl­6áw¦®.2ùFpAs)w+5»‹™æ.%Í®)¦ùC&à²ïHüÝ où8SG3eƒa{ü|ÒÎo/‡ŸÕwhFÁÚô¥—{ÀÞ³x”ýŤÙòMƒø ÁõF2œàïÛì[¯3cl>ÔÉàœ»åù^p?Ä`î¡¥;rÚ/=vÂÞ½d„bë!&•\'®MG g×äpæhÂP4Š`ÂD¶5 §b£‰ëäy:^0*Ñ;»7+Ú2º£ô\*!UÒÖhÀSÿI%Ù8pFj#ôJ®„cq­LŸ18›N53 as;"gïXZ kô2:±…’% êäJŠgâîX‰\"«‰Žíéb+“œ`„YýÍ´ê d½øP?0Éš.N&ù,«û–t©§$ì'f@þ ž )ŽB)õm¢ØÅPÌå‘H®gîMÔœ@äMÓ¾gÒTæÑ¶êˆx”#ãÒ$‚È‘WQÂáòƱÄÏLuÓmf¼aîMi>ò´goK e5U…²î Þj³Žåß 6aS) „j,aŒ£l7òg&fÀÁÓÞêDCC™·*Q_4|V¦,›—ÕW ¦qHój…ÆaÒ(ôf½d-?ßW·«Ëçm^¾uw.O P[[S’ºJb<ÝÚ½í6ðº³tzl}cLQ«»±•2„Ø•*«$¦ ÄœTF(“Ã…êøô¸Šœ”ätKH¬ÓåM·æ¯nO ÏjIÈhÏw¨_š¥Õ[‹ælïËÒji½š ™ðù2]Ñ9UNÆ J¦L­\¾>Û_;V|®ƒÙ[缊ÿƒ°°ø,%+BS•û@jÆŠ˜M)TzC# ñÑÒžžPZ8Ø+f%”-oˆÕbs¬Ñ‰áêd}öƈèŠíÏÏ]ÑlT†…5Þo/ÝøÄL{qØt{ |Ó… zvÔ!tRc ÁOzî©_óñ¹öt»N qK¹F^#H¨L¬ºÀ®µw\UïÝU0n•©±™`ÊǪj8[¶¤ÚÉ!äŽOÙÊz§ÒQ„&wWø„{žšeUD¤P‡—õ¡­Tk(ÚJkGÕ5XsßXW¹¼2²f½»ÍéÏ,Íœ]¡IhÞñªey«.ödÏ)*ÙñóeÎÚ¬\ä¬_])KÊ4BM¨ â]Q³¨¿oYr׆‚SþǺ›šºÍé žÈ’ì¤X‹ͽŒ:‹ÿνY¿Iµ3¾ [„aÇ­=>´ù©®-£«TÖÆÎÞ„u»˜­¾âåÑÞÇN×§Œ+œIîÙ[7”Õ[9ºA—ì¥òÈRFöhøwU»ƒÕîL@ BoböGŒrâ6Ü÷/|ཱ—׳·HR;ó­ãpؤµr‚qT&g9ÆÕƒ©oQæÀ|téW*]úU^âjV ³˜+Ú=)íyV{õbTáµ<åä|yˆÀnÔy-]†¶©÷“ÛrÂÇ×s]ÿƒH¥3º{ â!¡wg5+{TÃ$F´tÏI[÷jZÂy¸¶±wkVæK‚‰âs*­b022òö-œÇÄÉõ?ÄÔ5ÃĨ¬b¯q¥B±Ñ™ã²yôi­sZÓ|3÷4ÎØïŒá©dÒ0WaBl¹ùÂ=}kmÓö8¼Z(P‡h‚c’΄HkRcAÞšI¹Þ‘XgÖⲭ¯¾0c Ù—O× ŽPGiü¦±X·@„s3Ú·-!޾zö›rÕ¯.¿EqùòÈ S£1™È̶tîTUL+¸™È§`Ãø0“|ŒU5á„íí瀭o猦~Ô´m8ÕR¿ºÂêÌ­ÊuFggD§ÍÞYÖ¸-ÖÅÓÈd&G†+*ß­säTä8b‹j‹b•ŽÂø´¾š$~/1õ©4ÿ ùá5ÅÞê.gÎô¯;ÁjvFF§ÖgÖnmsçz³„bˆ}¨·0ʕ/HH«ÏMʨHÐ%» Ù‹× ¢Cz©Tä³ôaCúÂô©Çwã#ÁAÚôUEa‰ÁR:HÊõ„s×0Aì„Ö1"»ns³KÀ¯Gi+mbSÖÐ_'€‚° ¡þÓpúâõ³ï&Ðw•÷šâ«m7ËÕ…Rf½ð¤Ô÷S.lš”4ªnJ:_¼ ÷;&³nþftqNÁ567Pí5>97~ÉÝ2yw‹1ðB^™”š„«I€tÆp÷躼M–³îîDœ‡¯2ÈÛ—ˆ¬‰k¯eªu+»=—n†ÕÌ:f_„ëx.kä×ÜfÝþWø7YW°gHzÉ)3™½Tâ“Xî¤Ò€÷ƒ©2=íÿÖ‚·¦H1k“=®Íjú<÷m™z7ŠdT&?}ô.˜uª†æR9ýÿ.04WøûMü[:· O !L»b¢þæòúLIÅë=í©j!Ê/$–;ÚËué(‘–ñìºîö{š± þ‹âš×W`(ލ¦ÎÑãÆÝòü»u*XnO›BàZ ôS"piùdÉZ%‰š¥aCo‹…¿ñšú|þEÜK]ß­ "°¹¸@+‰\œIª%þEÖ"«¥È†íªàXe,ù4þ=êmƒÎQÙÐ /6•–Üáˆù4U·9ˆ=ˆá˜@D½'Ywy#Ù‚@ ©“)ÔX?ÉÅy*!¤^†P0ad¡2£Ñ_e¿UÊü‹ÌYaaYfl·D©”øç›3èÛÈËÎSNíè/Øuç6<[Gže è3  ¤³•üJíOu*Éþ“Õ”¨ sä‡S_x—æÄ:lg’LÂ!ƒ.OhF•¥îž‚´zÍ®p½D,!‰rL¥5¬f0+ÎaI¯¶Së²XU˜Z›QœbŬ‰9r•\£ó ðìOä蟧ðÏèèrt3ç/I-fWÍÒ‚äÖL³«n¿±>##.7Z™Iž±äÎH©ÚÐgÉmO©„¯þ¡­‹‹k¬Ù­ ÅÓ¬±›yÀ££†PF ÀÓ¿¯E¡3zõñ&.%sè}¸\ “Šü_’9‹áü›ØÜÉ>>öc ý#ìã³áwb=רŸ²îÑþ1à›¼f·„½˜`‡ÄÙl¾p±ˆmÉî&vD¶ûŒ‰MD¢Ñ×^ ÇÑB¾–qd´Î0YÃxá(3¬_+YEð Þ¨êbî*ñýlîºÍ–ÄUeÑêmh=‚|¢{…ü äYhÀK…"pÊ™M« ¤Ø „°&äZ,™5IZò­–t»z™H’öÈèÒ•—™ã qåDÆ$$ÄØ Œä5ì#NÕa¬!Ã=™¦ðÔh%A˜½YáæähU`ÄDE'°°e9 Ï’e³e × Û N¸&OÑkR1~UÇÞaëß…(sŽŠÃ àžaqˆ¢ì%™!-U©.UzŪéž) ®Y?G‹3696ÖÖ+ËGÀ+ÔˆT¯hø…<ÒÀÑ~+}¢Ò0GuÅ“Øz:½I"±õ“wI÷ûŠ<ÍÖbp‚¦ã‘ÉpìCSÁz‚:Š©áJvûVI=7Ø8e’.¿³dŠ¬ÛƒqÏŠ%¢Çpb÷†ÃÞue´ÎLkãM5ǦŒD·J4©¦ø/Ô xfäŽ;>ësT.à*ÍZAXPZåt)_`ò–yÕb`|z˜Ø„m"OÐkͬpc‰{f›:sLŸû~Œ<Èâ .J>cÀ]1÷/qüS¶€Ôj0Ù{ûìü M‚1Ë9t Þ X&¿„|år,F”"…®->¾×ˆ7¢äŽqì gà˜ÞV °=åòLÛ5;Å3csU×ù5Q%=ñ”S¯Ç`µªd :¦çÙë»–¿vOYæÀCÝÇg„˜À%yÑÒ3ó·µ¹²Ö\]^º¬!IL•›Bf<®ž¶dß´=×^²åùùý??ÒÌÌ­’:K¨ Ï`¡û×Ð~%½!bS³ic?êdzoÏJ´åxŒxæþúÕ)€$MõV yìñrnE}ÕFwõå&^jòŠ-(¶Ä]?½ÚVa¢>‹'Yþ53·×cúõe4lÞÈDþ9âmÞЬ£J/Î I0²1f±§£Ý ÷™QúÈHtàFÝ&BÊ™5ž×ڬЇ˨§ƒ3Ókp„JƒcóìׄR¡Õ“ r½Î,µÍm°Ïhk´Ò5Åp¼£ø;p­œ²¢=ƇX}ØN<ä»ÂQ0ùžñÔÆO&eîMˆâè÷dý{Ú›™<†åŸo‚aS\1ŸdÓMx„Üá3âçÃøàò;D§Â’Ò¨°Ø1¦F'h_ÅMNW ¶k†#Qâ?än]¿,>”+抅Úüšniòñþ8B~)”uø†®×†cCûüäízÿö ÐÐ×á½Ø%x¯9p/ü ø%ú Œ!þN}Áú<Àw:bj£ƒÎ4Ìj³z}7ãó¯qèç×Òè@h ƒã˜  ACËO9k-x ?Á’BzÙÔÏ‚-¸h €óT*Ìç‹4)Oã@¯"d˜G#S?j"!`£lؘqRá80ãÀ@Çæ0.óA×C`Ôç.¹$‘ˆxóùM¦SSƒ!‚ à …A€£± B/ >Ø/à8&ÖyÏó@BóŽ]¢¯_ ¯_Rš?ŸœÆ{ÈGO®¡ÃB¥–&…bÄ…‡™ÈWõ&Ç«ÔYGú‘MQ¸eæ«Û¶‘þeôÊ•±ï8Ì·2øïÊvøéþ”úhøoŒ~>=r;ËÀcÚˆ¡õº0,š fåëâ¸ä#&ýõD:>Ú‰±bŒ¶a è/®¯Ë)vb/úîuL‚?ƒíýîO؇¶c%ó; Yxã»Õį¤C>滑¿QŸ`¯Ô£ïHMb¯KüÑâ3‹¡ß½Áú–ÝIëçäÎÍ%¥[º’“»¶”–lîLƪG?•ÂOð[øûÙày°ûÇT¿Ÿ­‰ôB"‚‚"B žH V9öÉ‹l/ü} õ>æfÓþƒYS$JÈ’j¡ÅIŒaÿ‡D…@3Q•l•‡ÕÜ»8_$Tõ—E‚g‹î?q$ÛÆwÔ>ýÌÕ;аBtòÃôýýŠB´¿K½ž€ã£Zmúˆ»Ú(>Ʊ90´(äÀ ÖG†äÒêÈd‹«ÔþøZˆ2›ñ{þŽ üÎHWXÐy4è™X¨³\Ì :ôóCj5ÆõÔGÙÊv‚V™àeÁßõÀߥz¨&6‡­DÃýb¡÷…r/’±Ê¦§¬9/´§‚õð°P¾¬x:I`ÇCìÉFoºÁlÈN“ ‚1~\NwŽ)Ì–àâ8îhX_K—*¥ {œ uÕŽÒ¤ž´9=¥áâô¼¹¢4"¿=)kéÂÉñY³V¨=>zÆ¡Ù>zÍýõbð-Cî!ÇÄFþÓ¾Œ&EñÖxè"*Oè¦_ˆ%Ÿätg›B­¡^¯¨°ƒÕo¨‰`:!CŽéìIFo4|Ùi2a0Î#‡ŒúŒVª%”*ä {µ"dÍéÉ !¹µRe)äí|ö-ùªâCÚÓB¡ …#⥕JèZ>#!Õ ½=”«A¶+‘›d²PÉž¸=’ RדNv6ìŒÂ¬´„îqÓ× xãó…øñ,_ˆaAwA'_)Ö[_ý¥M'Rò; » Wuvñ5“åÕ_*2±^‡8>K! äKL­d@Øllsb¦ð¢žO…æ…F™uÀ[ tk£e‹|ÔîˆJÛ•n‹w‡ãÄ®SŸDå _Àÿp6¸š¤w‚`x‹Ž‹è>‰„ ÂtÑ5S¾LÚ+†ÜÁi—Sù@Ëïd$ž-8N£°?"ô° {TÚî|#DÆcª(8˜eŠ׉°²G¥ïÊ7GºVœ`€zœúY’Á‰q(‡@ ÿà œ†$ J'àˆ±¬#ë¡nÆÂµ4–HÚ¡3L×±'Ö7°9ã¡ÉýмcuOj –œç­™›—ÔØ<ÄèÌr9‹½úœîîœúéávo÷¾Ö‡]¨8@nŒ-LŒ+÷é#³jêk²"gt[ñÓ·×5íp‚7ôµžØ¯Ko®ˆwdûÜ8´ž!#ÍäÈLH(ê,hÚi¬I,Z;Ý—ãÍ u&ޙ⊈r¥W%Ôï´™êÓ2—4zÒ¼ŒÞéñ‘»ý¥F»»ŽÆ‡FrÕ{OÝ÷$õù“Çf7èI,ØðÌ|<Šd]÷‘¯_÷á-q«J-¢õÉq¡¹‘w äs=¸ Í:Y÷ç"9Wü27H¹ÌìÇJ1˜®‰VE:¨b%¶‰Ág8N( N¤;z}î€-@G?Í£1ë™`%·;ÃÓÕVkÉ,»-¹]™ñÝmµXWM^öüŠ˜0›qZ>|c§3;tœ‰ `D†S8\Ø| ° ÜÄ5`ð†[ȃÔGCM¦Ð£ýÚøì$ü‰õ:mÿ_qž-_oB]@Ÿ$`„ƒN"LàF-Ù˜ 0 `[ÇZÓm²B Ú7.4§N‹m;› ž¸W¤¶µým]Õ–v¯§eeÍ’8ûÞÆÂÅ‘ K£a“AQ)‘îªä° +Ò'"äŠIs>öRÚã= u)¡µyÔg¤P)^^×Û´®²|çâæÐ¬5éU{zÒœUó2¤X­K°ë­ÙMñ VS³²væÉÕ ÿ2ÒT?"#¯¥PîQ7ªÑ*’çtˆåhWOZü6Jp@ºm´Ýè%*꘺«Ì‘…ëf$ƹCR’4›CÊ]üŒbGÎò³]›¿yzNÅ®_.Ùù§"i°Œ‹z©¡{¿z´-¥÷À¿é[üæÑÆÜ%÷ÕåUé› \ùNuXzÓ’ÝÓÒ›#ð^~ÿ®ÂÄ¥».[R:ëÈ[}qö¢‹KRÿõá#;ßÙW’îäá)Ï]PÕöøÈ‰ÕŸ½~zqFù?m[ýÇc Å%åyæ”*WʪųÊìJ%œãþ‘kly†ÅÊ£ÏyCÇÍç-ÚAYÒ ¦ÒÍAø z`êà˜ûˆ‰ 5ñ¡…í.LÏ]þp{ßÁD)ŠCcK“ò”GG—ô¤Åçy¢‡ÓãÛ6=ø\Wו7µÅ£÷WººžCï;cªúwX`PR—ƒC,¢Žã+†^š@Î&†D²ùfâ¦@™?’ItNÇJ{½Hÿ™-ðgúã£{™Ž{(õþ8xIt*eiò)b~]ö†L/+‰AÌÞSÐ-sû¹$)â>/JɃ]”|Öå¿ ú ©ÇBÄ"PžK݃•qDÔo5:q¸EZŸìÅm¸ž<Çä'{Á¾I}T÷ºë–ääÔ»Ýõ¹9KêÜ+ Þ¨¨B¯aô•˜•½¤..®nIvÖâ:·»nq&¼ŠêÄ¢Š|¡¡¾"s ëC‚CL‡0Qæer¯V£ 96sÒÅæÓ0§R}Úè±90ÓœZåpV§†‡§V;U©f,÷PÌ̤Įè#  &£þP#ý¹;æP#ó;á¨D·W:œUifsZ¥<„¾8Üt(fVbbwôáÆC¡&£ápÓÁ˜Yp ˜ƒŒî–Ž|M< mrŠF ׯV!‡I«AUʼatÕ¸‡ØÌ>·×gNÚixàó7^NÎæÊÄRcRKIéæùÓ´˜‹Ä AþÙ ¼ÃÁB5T'çá=UóŠìrŸYýþ= HÃ÷7ãÂõøÚÕ¿¹§¤£.‹'°:¬ñÝû§ûd!æ«W¦ûÍí^;ÁÍéZPµÿÖt1}ì#2¢€ÝM?§¡,°›†’~LІöû8´_aÃQXj;UÓ£>…FÁdÓ™† zóM¡ž‚Èࢅ‘¦¤²˜ÃÝ»Ü.µVlñŶd¬êHËÛpu± SfD\IOåiÅò˜Œ–œî´%­‰Y+.Ï Á ª Rζg×:£Òc-R2'®bVJL¦Ë,ÁMª¤ ½ÖdäP*UlÑœò¶G6Vs.Ëu Ú‹Ëbk¬“‹é±Åóªª.-&w+ôHžG ý'>eqYÞÑ=Cº—÷h>GóÆI^ eYx\~|fo¥›kÆöËô#¬âž(6§P¢Ïç‡:³À‘'×^ëéåyÖ¬&OŒPëbMÊèòù¹Ôw(6‡8?Eû!ôg¬-úLÞ.Çž^!uNjá߈GmÔ9!¨¡¿^êW»ƒôäÛ|!Ζ6)h/6¿cvõ"È„/¸M£ôÿÌ” Ó%±TÝk3ÂÞaã´ž0ãn&Zd"3“63¥ÝL‘âÿP:ò¡æàØÈPïøJ͵Zîé}*DÍ—~¹¥\«ô-¸ô¥Ûî Q $¿íýLɵYX·ë˶]ßV¢ç/Áû©'Ákù:-õdX„ª-Ô‘p8c´ù@1üôêèzpƬ§A|» ®H¡÷‡™^èxõBuaÒ€œHi½ÿíÅõûz“[îkIݽ½)—¬Y q…ÛçdY3ëã wôeá³?¢.¿?§`×wÊÞï+Øõ§Í Nt9+§>zsÁ‰NGùã ÎÓoPÝyÚ7µpP·`8%¤8€=ì?%ó'Ê@ðrŒCŒ°ÄZ•”ˆOí<±~ýzüe”ú=>›°‹Á‚ëgjÉ -¤ƒîN®fÚÀùÛ:€³àcéaŒCïÌnÒé áð¦T€÷²çE„z‹bbkó½I”¹>¸eÕáÚ²µ­ñ¾–Á\j^Ô4ëI£vV²fJ|LüÀ×ögƒÕ±¶‚îôùó«¶>Ñ>ëžæxQY˜‡Çׯ×ç.,2ä-©—\ªgާ5ºeg‹³nÿ/æ&Ϫ/ IUéÑ©5©¥«\¡émé±s’g=O*wQëDDðê7ö–GzõEA¡–ФY»jš3ÃåŠPì(c+Ÿ†´öC|d£õtæõÑ›LD?Trýå—u{*Ô쯕©01y¨4,Z2T¢5§cÍZÙ"ij§ižã ,†R=Á=€ïŠ ª{qO^EtXJµ;k~¥di" ëò²×»K7=5»n[›[ï-£Ö‹ã ÇñŸ-XãmZ𙨒£IÊ¡sGÙ]Më«{žÙRž<{O]Ѫ,Ši m“/±wÐókÐúr+|P;; ƒÁ—|j±à^/™"”öŸÙe---à‰˜fŸç}Þ¿þƒD¹°‘V[OéôzB[ÇÌjÌih‚½nðW¥ãQ8Š®‡z#4Þ(g¦\¨ÕfëtOŸêÎ ¿êŽêWÉ‹£„:Á{! ‰I»+.Z.Â…³{:«í%ÙéaçÄôœ(F~Àì¶oܺ¹Õª±2‹&€”­Rì)¨s9‹ãu[|®²ŽøÄ Ÿ‰—¨¶°q6ÆlòZ”yµü¦jOLl†L„‰â+âC´ö´ðvY®/¥%Ý›ï âreü0ƒZmqicc°Ô´»+R)ÄésœÃðO2ö§ÀúðÝX³ot}€NÆåPŸ9˜D4MÝ£¦~+“ì]š”_(ìAÔs…É¥KRaø-…ÎÈQHž‡¶ƒÇbÙÆòäŸhìú|ü>e\X¾'TM6éCiu³.nb'ýŒšÑfënÜH˜©™žj¶ô숇±Vª\X£ªéßíÇZñÈÛ@ÞúÞ$~û}²ÚXï1D“‡žÁeãh°³À×M½JÍ­{¨ßͱ`}%v?u¬tRGZ@7hl¡Nƒ†Úëd½DÄ«à;í£Œ ºuâ6 uþ‚t8 T[âtAz©Æêq%×ßø§s%CÁ]œU-7Dhô±áª4÷„´ÞZž#äø¶ÀþóÀ$ãn¼A:üp^/~K]~·žæÃW¬ðÓÄJzïÚ sÄJÿ«+ˆ•è¦ý4X‡¿Â ¡mx½¹~Ã}¶1 Kq‹ë¸C—ಠKR<mMvC¢Ë—EdÅw¶6è;¾ bqj|G[3ó4rI–§³­ ¯)ƒs\S„I¬Ò g»c‘Fê‚H$èsÚF‘*X8‹¾Ê讇YÿÄ?…ó¥ è.’NžjHtá*÷áŸJ0¥’2©œÚàh8-c2¿ 0¢nµ,˜ÿ _J²%‚¿òµÒ5`¯OAyŽùSNï!Ù¶ Ì«Í‚Nºü3løu)¦P÷Um\8Õ$aìØ=rjî©–ÿW„-P~Ý¥ÕÔ<4'¬ "ƒ¸À¢Oyù4pnˆ«Í}6$¥ºí;ìTtîp¾µûd=uè_©pŒp ±Ò4ŽRu€ÐtU± ¦ ¡b4q!^8$Á$þHhˆð|œ” »—Ku‚7yR¨ø½@']r…ÜÿÒfT¿ÇªÀÿÇ—Ñ•Aª½£Dóèâñ¿q†!èÀÿ8¼‘ÔŒ`¨ö°??AŽ“ ÔFà÷ Pþ&ŸÚŒÆ¬Pg¢õÆœ ðñ/Ã÷†û%x#\›‚°u´ì.ù‚c6!ªðáF2:Ò -ýpëØA‘ôe¯¸Ø×{yMþòpzá¶WWÞw­i†kðÈÕùQŸý|öœŸñƒý?;¾|šªÂgvœ]‘W¸öÂLÑWmo›60˜—ÛW±àç@pø(ýrA|ÛÆûŸšp‰ù ÿÆ×Bú‰cíÕé$9¡z ~JÃÔ¤¢üö]ÊÀÌ2ùœËkó“üDT¥Mè-ïZ¶¥(¥!Q×rö«}@añ†‡çe&jöO›¿µ$sV-,ÓD[|wUÔÌ%ës³óýù\… çµKÐÉŒ.˜áí¸thMœX+ž ÑjJoH/\7#Á³ìç÷„ò$\(Y,Šø?Aû°¨k[à +‡±äf̶7Š¡lš¨@è’†Íli¸7Ô˜œà ‘[bÜÆ°d—Ño€Ÿ½!òð˜xú3›-µx°cI¹¡<^zãü$wyzœ*4§¬ÞÔž†®ÍMrW0×ê°rW3ÈàɵÙr=† gÁ®qï]è;L[¿¢¤dEƒË˜XY”ht5 Ïõ±£ŸÑœDŽ|C¨h?=uBiDRÀHrØï9-gk\¡é¡¤5ax³ÐS<Øœ$àÊìy¶¢²j×´5•zipßCÙZ±¦&\(ž¾û™™WóH.Ô9×–íñåÜÕ¾ùgÆTgØÒúö×evçYˆçB¢t⢿Zó+êÂ7›{µ-üÕ½µ'M©š_þZ¬ÊY`ä!êA|»•Ž,FÓÐpôä7Î4îÞ$ÌêX[l. à<…18¹65œ»^˜Õ¯O¦Mªƒ×ˆw»÷¶Ú…²b¡ˆ…$µd åvïm³‹¤%ðInÉB:ÌÃb‘ ìK¨Ó‰¹Ò‘‰úT£UÑ ~b÷Ù‹!á$”©‚‰. EkâìK?–c‡aô¤SMaYÆŸž¬¤k°·RgÈUÐ> Æåþ¡Å'ûžíÒ«ñڥ׮ʈ?‘ä'Ù-èß^ȃßQg‰á@m¨ñFy1 Õb$þ9ü<¸Äñ?ˆgʇÎÃðX€£Âx$ yªH–?9ÞŸ|8S¬à6,Ò‹3ÿ|ÇÈV( × FU! GŸçÉÄ´bô?6PÿàŠÙ¤˜Gý (yb’-悳ԲÔr°iö<–§±ÊåVÿ9ÿæ–;ü<÷@œÔbì5ò1K)çh&´ŠºéÔ>s òS>:™/!c¬Qù>t†'¡†»ø|OýŠJ{ž5=¼+Ü\Ò,u0  ‰œºMÍ.e­ò5fµ®~£ªr#ý/•H£·2ž«ïˆ(›¼ËÁ ª^Ê÷¸ô)zìdõ®í……ÓÏmüƒk¾~ÈJÀ^„÷üC]fÑ=0Ö#¾ Üà ܓDßóöËÀ=‹á8ò~úž4úžYíqÞc1=…®SG±-'tÅy£y]"AŽË$.a[ŒÅu 9ó*cœ…ueI<Ë"WPçÔÚSŒ¾ê› ˆ÷Åä¾?èãmA¶‚™™• «S<Þð£gìYI ٵɑéÑjC¸AE(&s“ƒd–‹OÙ>ÖÚÿ¦ÜZþ—kx 3bÑEøßýª;-‘‘‘‹PÉýHç?)ùA=‰þõ´‰5BÛŒ‡ÀEl=|­É'ß! ý£Xú<:#']о¼Œ9Áo=2›:+*7Ž*jQ†®èl¶¢fýôL«Lm‹&·f‡çm{s+xé¼Ûà “)ãúÓ¦¯) ë\Ú °óÛàRæÎ¶,úNòŤ§ÞûÏ—ïÕÔ 4昄¾EÛx@ȵZõî¾x§¶vYc®I˜°hÇ£sP¿¦þv­v¬÷ê“Îìwh¾:©I¶wªÖ««=d;Q;¾{™¦Ùt“ØtÔ4û掫㚎Žï Íëµú$½§õ¿ë¬:¹Õ-ç-UÇZßξsëÛû%ÊûW+£¤L¯É(¡.ÜÔRõ§4ÁEç9qg“ƒwy.èã›Î…O>Dò4èS„F…î‹ÒLúüß:û!¢ûÆ»˜Î?EЦhöÿÓ¥ŠwSóÿ»³ O Ÿ}Búè>(=áDÇ[ ’!㺕 ¿}ýP'>ÌéÄ훣2}±©Aº'«‰UCë¦ûÀ™±ë¨ç(Ý‘ ]Çl¬@_v2‹¦§v2=ÿ×Ö¤¤EöÿÕvíqQUyüžû˜†£`¼AÁ@ž‚ˆqÒÌç€ó+«5ˆÇƒÇ ŽV_K‡ ô~_HåÔý…ÊÙÉüzX~ÝŠïgÞRàG)~€yO¡¯‰âÝÌ» ¼†â‡É癕xßr ì£'öAiàg‚[™T™oŒW\à ¤Áž—¯Ê\ì‰âU•9# ¾þ|QÃÕÇv¾Ý1gÓÛ(øèÜí«ÄúXƒ“Êšo~óТ¼ÕlŠÊ‚B££BõÂüU­u'þl?‡²o—>3¹Ò/ )5©þÔ5dž[±°m¤A•òA~õÛµcT`áw½ç·" ËYžåtüýÆ(Ãè4ÐíÅ»â@éˆàÃó>BO°ŽQø'ŒúÇ–b”îô¬Ÿ?c…ž‡@ç »q¨N£¹hœ¦ H¥PÐú ñ«ãbðúz÷û‡«_Í^}¨^|gÊ(Ÿ‹ÕòsS^/ý¦Mp,}ì³}ëOo+ìYöôU~†¼ØîF,WÎ#AÒŠî‚Éq y=œ*°·¾ÑY™³J¿ÕˆÏ@ïa™ªZ/ŠÿxT4Õœ 8;ÿ¬OVôiq ,–b/Äöµ°]ÄF8¶‹!† òÜ?FñJÞïܰòç­Tœö˜£å7é•ñ‹ÎMCƒfQ7Ýlzo€K3W©BT`ÄÈnÑØ+8ÈO‡t¬ #ÇÒpz°µ;£±V/1# 9à çŒc ü(Å0O*ìg¢x÷<ŒâæŒÂÜâ°NБ6š¶ôSÌ*zµÖMw,]LQ§®õ•fß}_gÍï÷Ì¿ûmvbáž Kâó'††fÍ];ÇöN÷üië{ë3—VÏŠKúE°ïÚÔYö´ó“ļDsýÉ¿´¿„2>¹'"½4©2ÄÏ01~|ÃékGZŸ»·Ä?ä * ä.zæ^YI#Ñå1ÃxŨgRŒ‹6!NxŠâý¨ÀWÂÌá¸"¸\´9Q¥¸0óÌùjgYõùóU칪(uP9¾[Ív:ïÎì.Õzï‰übÛDbÞUæögPàí®&mþñ–Mzn*u„ÑXÚЄóÏœÿPCÑaœøûÞ‚õŠŒÓ)ƒñå?±Gðý2Žåb÷Cìü€kÅ_Û¼Ûµ–â¾`{‚;\ ÄöØ'‘P§ÜÌfÃ9ÅÇ®o¸·°~áÀsª8Bv †Âlkàœ-ÇîÈ®O à‚-ktrdf]E†¡íÜg*+ ¶¼°£Ðhl Ŧˆ©¼·Ž{MË.Rl„€|Ìê—Uà—Ù/[ÜŸWÔ],Ó¿žQÄÒ4ŒÞ±$=c‘zw“û¯Ò£²ñˆöŸÀÑTàìaYvç‰Û­¡þì:–cwŽîK!”)˜Ÿe‰ üXpø[_ ›Ì[ÐqE¿ ÷¸áxÜ$/ãšµo„k$0o‚ܧÎf²<º/°@¹šx;š¿ n…:=×]?(³¸ç—zmöµ¾‰+‰Ý—WÝõ2?ºûÒ¡ªŸN]}¸¾íýU9kÕKÅ\ÕLâÒ_ßýÄÖpòsû (ãÓíËŸü¢£ùÌöÒåÿy¸y`{IÃ3ÿáÚ4r4–ò Ñ£“æh ¿Bñýh¡?Jñ¨LaÅ»Q…Gñ#(®e`?âçöíäùÁ­Œöw¶ûf~Bòpß1=¼:ÜB\£´¼uÑK&ÛíJ‘ù®“ÍY™+ÛG… ¬'2Ìâ•€˜øÝ³_~†ýz±L[¼¥¼ùnçGì‹iKî«\ÜÓ¶Àšè|=‰å­çIÁîWO%’kø2È5Ñrn: x®1ý@Ç¢c#ÉAç>À:N‡§Þ#Je×­dÚkõRÌ`1š¦-œQØX—VÙ´¦©2mæ®—7ÝÿfQ±O¸ÑœRÚ8§lüIi•k+Ó ·žZß2®Šþœ10vblRY]VfyFâÄôÙkf¯úñÞê¦Û–Œ“s&§T6çT¤Ç'¤Í^_U{|ûì…ˆñÈÔJݪǪ›Fþ»Aåxµso@;6ÐÓÛ’Oñºòp’ú´¿„ø´ÇíCXo½ÔMË}³KC#~«¸ôèËýbm0oxQCÌ¡AžzD#ÐðØ-8΂8‹#c/"g”-bàR†”ž®ê½5.‘„â2c Ò"5;˜äÇK’ ŽGy­V&v,–c¯ëk!â:AÞ| ñ3WAN‚[ñŽ*Y¾ÃV¾Ã!uÚÐÜànšÿàª"úsëûßµÝóR[jàÚsë ³·° ªØí.hî]ÊÝ9tÄv©¯¶¶ï’ ÿ»wioK°E£–y ç%È9ÏG_¡ø~dTàG)~@ùyÈy rÎSâ5?,ãS|ÅHöÁø†Ñ_»M”ט¯5ñv–Ô·âŠ,ŸÜ” g,$Ø®ÓR.^eeýçsO½†-©7êÄ[Ÿü{·uÛ‡£Ñb@?ò‰ O¶F gç싺…‰ábšfAÍW%X, Aƒg‡Õ2ä7nzTtþ8îš)þÊ‹ŠÊƒ¿< ;·Ž`s¢ã~6ðux>ýtO–c© t'¸´¾Æ“½~o TV+Éþ°ü\¹øà k„þ+³Á?Ò„¶¢ÃûÄoÔžéΟ±Ó¸†p‹óµÜŒqY‘li¼z2€ÌÉò>°΂¾Àø s ‚|¿…<ëûã;±ÌðfÃÈfuš9bà3t˜ý£e(Ƈõ÷ášÙfîb·FÍWo \ß;ÍêÏö²>ÉãK5мß섚?Ýu…{ö[“ß#×ÔþOngÞ‡ßþ ¼ûš 7ÌÜè½4 OÈRwfc¿ MɇŽôOiX4+· ~uNþʲx­mi‘-[]¬…¨:¸ 7¨ÿvzóá%âW…k*’”ýé‡RâÆñ <Û¼Éú¼-Ø©>Ì)Š_lnE0?¡ø;B)ý|—ÿò/ÁJoBP;~JñCÌEÀó0~?Øq Ä®ýv²VHøZü=)Òn3žÚÍÛk—éaf•!¹ô̆†¦¼Ú=ËR絿ÒÚ´7cªÞd0ŽO)Ê(»§.+bêüì’eÉ!æÀF•%ŸÏ;98¿ÙQgûÓé5å¥%>~Ö[¬sxvUASybRj’‰;áaE"ó&yÆheÖh©7j¡[Ô½÷¾Wjþ”gs>Ù·Ãó âƒÞ õÍFðMà{ò†ñÝ ÜÞDêRòycÄUýÅíÊ[Y³2ðåÕ¢ q3ohøŽ-˜±¶&/èá—£‚b§^ÉXÙ´¡pqûŠÌèË6wÌŸ·qVlv×ÐÏÑæ6‰o#>*Ș­ž »2—ÙÊÍSr‹ã‚£BœS³æ¤‡å­ëm¨ìÛ·uqzJiM|ñ†%ÅœÉì 4Ü1Ϲ!ëµYÖkîØôÒ¨¼Ç¬X…ÚÏ7 RÝ–Qöå^˜ßàK±BéûRêã®åJÜAñƒK•øYŠ÷ʸij÷¤‚«ÚŽì”;öß îX;÷¡Œë„ݰ>,’÷ŒŸâß<ájÃã®¶fæf²µå¨¢ï¦ò·õ¨óÿÁèöªGàâÅËm³é&•ä«BFŸ¥xï %þCŠ?"Ï÷RŒï„Ï×»$Ÿ¬@ï.ÀvCXNÖFW²gç(p»+¾Ï„”byàPªKâÕ†—xÅÐÔS:÷«t”*v¬[d¬WàS1>âpd’ñTŒÏƒ¸&ŸïrM§2Wƒ­~ÐUÂȼfì\艵”Ç#s-4øÆP‡†B* ²‘Úa™rñ>ê5¸ã²’ìeRz÷å2Ì—|ž7ü÷/ÑwÝx¿Ë ŸÞ+ðÇj’¯kÈKp‡üÇ#–:…]‡$+S«¼g¡ßkæ#"“ÊÚQKtž†m!ql¹ñ~y¾+ÐíN²FÕjãöZ’Ë î~¬¬ïeÈR»÷zœY¨RåæëÓhñ« •ÛNåîX­ÔÇæÆQD«/¥Ÿï;(~P_SØ¥§†qÛ|±×Ç4FؽbO&#Èð 7B§4âÁƒðH#p,_mÀŸDä‘bÙ1îr}ÃÅ‚œ‰œ<Ù?H¸Ôs†TcbMR§ÿàQŠQÏð±1+‰ž…Ñ)tš;V´ ±+•­QyŒM+V§®7].ìÛ«0—‰û_'1Xq‰'(FºA*á¿$g7³Å>ö8ÆËGËt=®©}£âì™Ü’¦yú“[Xþý|C‰©Ât£¡]¦FÐSâ.¹ÖÓm°6£o(ž±½¬§èWnGÆ0nGûH8Âñ‘J˜¾GÍÌ¢ø±rµ|®ŽöQ³·œQå™â6°ëboÞìÆõ ¸ÏtÛ[?ÿ¦é¦2[¨ýñàºø­†ËnûųÉNEàObo?â…àOb×.>âàO‚w ûG? üIðƒò÷{aOÑ{a{ªMÜ~YÃî g«ÞØ9ÔNðÎ×±P³>ÕbðPÕtD›[F±\){)•½«I‰;(~pþ)Å ã<§°AüýóFxð.rŽåÒÄíò,‚;˜ÉÒYÆØX9Õáüü˜y:DMš¹Cew¢Í­'Š@“@àÞýw‘˜)$1Cp¹¡ç…•C­èux:‚5ÒfîPÉNd±¹eDd_Fø=@öÝDvÒ£ZÆÝ5›ó‡‹ƒ&{Fcõ÷ AkÕndü³T®^Y®«x#Y8Âø0þî¾é Ê9î²Î X lÐ`Ü9=7::/ Z§ã{A¨¹¡ÞéÛ®Û;}o@÷†ÒÞé;+‹Ún´wz‰ø/ñ­$xôNOðÚ<½Ô‡/žšX–Íö,.ª› uO_’Ç}¶J¨Z\³;cÁÙe~yÃÝÓ'%%V¹Û§·­Üx{-û˜Ü>z3¼¿ÜÇù6þŸØò«1ÿWáXü×à _0_<õ ÐÑè¨yÑé@7þtþu£R ¶þVòþtþ[£Ðöÿ¶Ðxœ…Ô}hUeðïy^ÎTšè$R£ÍiæZÖš¶5¦w¾Ü‘óLd¶bËËL†”Kg˜ZXa®1-ÿâ?sI%—p/9ð-–™IRj(Qþ!,Är…ßçœ{æx×àÃó²ß=÷\žßóUO üs.jÐôYîiÓ«ºW×zµ`¢*àÚÊáÚ£f:„‘‰¸È4Ã2Ë\Q— º„ ;º=É:ëGßÓ‘ßP«jùœuÁžú”õÿà!KEñ–Ú€]º z-ÕM¼yï©~Ó­š±QUš?ääÓsú;LQY(–{ñ‚¥²ñü¨ÇP({¸OâxîQTd ÁS8ÿ^Æ-Îs}÷ùçn¢zTP0Þ[‡{AÝÛTÌç~ÄuŒó½ðôLx‘8,—«Ì ø MâO´ŠkÎ q ·9î£#œÿBÃ4HÿÒ ñÙ4‰¥e´“æS4ùÙ뺒~åºCÓ­›­Ö`ªzœÛ(“¥8èœ@®ˆÀc?èŸQà@‡ÓÆwéD‡8†V§ÅÜGÈE¹P(‘™(—S9FÍM¡Ñͽã²í¢õªguêå6ó;çç„@kÖe.‹(¢¢Ôœ…X,ŠÌ9 Ÿ³ækŠË6ÌÑ›°™óKÔàìæ{'Ð¥ÙW®ƒµîûØlé:´Ë´°¦‘¦<ÚA ûý9´ž¨˜^£…TI K´¡ŠMmæ?ç‘; ܃öS ÕÒ.ºEíÔA4—vRµº±•=ñ”^*•'ež9Ì»»@×àÞéó¬iÕæŽY±Ïµ}ÆÞÝÆù}ÕÎOx–ïQOÒi½-ü|ƒÞÍg?cF2êP-?3â2ûsV…}Ïótì9ûw&°½oÉÞoö™Ÿ1a Ïÿj˜3i]¿—=÷ÍŸ4üLJÕls /ù„WéuJæU@Wò¼Ss+‘’Wcs+-›_¡0ÇÆäY’ŸiÿÃf]*?óB¯˜6ÿR20½d&Žê_˜uöìl^qÑ ³|“i6Òü0Ãl~Ù~rúÍðhn¥æ×8lŽÌP˜g!?ËN1¯¶ÏfšÛ›FJÞ¥£Ûñr*? CÌÄ^oþÖx÷z°„ß[r»hëÿxœ$½ùwTç•î_կג½îZÄUÝ ’jF‰ºÝ$ÕÝ6£Z Ã9ÂÂJh£0¶ã$îÌ63¶±ŠÂ \h5!ÀPØ&›9þ#¾A¿ßgî/ŸuÖ©:çÝ{?ûyÏPU§@0pÿV]˜ ž{ñÿ ü]à)÷P\Ÿvsâ"÷[±ÌÝË]A¬t7Åj÷kq—û/qœ÷§Ýç¿ FlÁ¥p,Ëá X ËàZÛ6¸ÝF zЇ6J° fáÞù9|Ÿ¸\à©Àß¹»b¹ºŸˆo»·ÄOÜ_ÄëîWâ¼;$~é~'~åþ[|↠ØvÛ.¼hm ØÃö°€=,` ØÃö°€=<ˆ¹?Šq˜€)÷…˜†«YSkaëëa\ã~/6ÂáKp Ü »xgvÃÝìm€Wáb{:0 Gà(ëÇ )õt`ÂýI4½žþƒ­²ŸàØûá„CpŽØèÁïýpÜÀƒð…cpNÀcð8<OÂSð4<Ï –Qð"¼Iä·à ¼ gá8ïÂyË:øxÞ‡¨ÃçÐ:ê©óÿ&–¹¿Šå°Ú})zîÿ¥.Õu©Ñ‡öªºÔ˜…GyçÛðø®øLðýÀªÀ³ð徸H½÷ õŸb¹Þó ùë·b•|ô {^<Ìús¬™”¿8/»;bÞV$ßÜQe¾¸kËò”ö¬hµgEkôa§íSѳâB"YH$ ‰d!‘,$’…D²HÉB"YH$ ‰d!‘,$’…D²HÉB"YH$ ‰d!‘,$’…D²HŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨á£"|T„ŠðQ>*ÂGEø¨£f1j£f1j£f1j£f1j£f1j£f1j£f1j£f1j£f±fì•bïÏÂn¸›}ðžAhš£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£i1š£és:VþY|JGÃçtž`|Ú½*.rÍbÌý‡‡ ˜r“b®v§ÄX ëÜ ±žWàåòœ7¾_‚[àVè±ík¼¿Í}*¶ÃØ »x5 »aëwÕ–ûØO?ÐøœúÄö?¤#ûsêãuWÅ18.•ŸÓLhû<Âò[îñœk'Ý¿ˆçáe·EÌÃ+¼ó3Uû¹ÀU–¯±<å.‰74O>˜6ªë4ŠÎ T[A—Á¸®€¥° ®µúÉN½jìƒýpÂ!8 G, õªq/Ü÷Ãð <Gá‡ð¨ÕPÇ;ã;ð]øCË4øK«vðWðø!<ƶÇá xž‚§áx~ÄVÃOà9FùNÂóð‚©&/oRÏ[pÞ†³ðœƒwábþ>„Lß`>†O4úóê„¿‰Gt~ø¼¶2~Šÿ[^ø«‡ ˜r_‹iX ÇûwÛ{‚ÿ{`ì‡pÁaxÔ¶UåïÀwÅEÚçWbËýðœ¢]¤žü£˜7j[½GÛßïÂsö~UÕ8 Ï‹ß <ã>Ÿ…ÏÃÅp ŒÀ“¾©Ü ˜ÒÙû7•»qµÎ¾©yÀX ëÜu±žWài÷MÍÆáKp Ü »x§q}S·åÝŒ5À«ƒpÈ͈ÃpŽºÇâwgÅ wM<â>¯ÏUxƒ÷OÃ/5~Sçü¢ªmƒå–ip%¬€•p5¬µ°¾ ×ÁõpÜ7ÁͰ ’‹:ÁØûá„CpŽXäeã^¸î‡àAxŽÂ18'à{zñËÇá xž‚§áx^°*ÉqÆ›Ô眷á,¼çà]8oõ×ÑÇxÞ‡LyÊøP|AGŸ{â3ºê|A}h|.†K`–À2ͽ/èlÖ¸Rc½¨€•n‡¸Êý@¬ÖÌù‚ú󺘆«¥ò êOc-¬coõ¼Ú×èxñ‚úÓø"| n[aïÌÂn8ÀúA8ä¦Äa8GÕÉ/¨3ãn«8áö‹Gˆÿ¨û'ñmxÁ}G¼è^/±œcùŠûX¼ ¯»#â 7 NÃ[ÿó•8ïÄG:B½(À/5»¾ ÞþT|â¾xA®ÔáÊZn¬€•p5¬µú¨Ãëàz¸n„›àfØ·[^º"0ú°ÕÔ î€¯ÀWákðuøÜ Ûà.Ó1ø&l‡°Ó”Õõ…1 GLyĸîƒûáx‚£p ŽÃ x ‡'àIx ž†gàYxÁô•;Œ7©í-8oÃYxÎÁ»pÞz@î0Þƒ÷á*ö9|(†pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„pGw„qGw„qGw„qGw„qGXî8"–CsGw„åŽsâ*wC¬fÙÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜÆaÜ–;šÅ·ÝKâ×"^t;ÅK,çX6w„qG˜»aÜÆaÜ–;ì\·X€æŽ0îË0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îËÒEî0úÐÜÆaÜÆaÜÆaÜÆaÜ–;¤£Üal‡°Ó”•;ŒYhîãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îãŽ0îX¬«øâ"÷.æàbuþÅJ÷;±ÊÄj]«.Ö•ïŸÄsî¡8©.ÖÕ®ñ²›óð¶®5ì>Ãbî.Öõ©öÌ=ÀÅÜ\Ì=ÀŪ³ö©:³â"YB$Kˆd ‘,!’%D²D‘\-’%D²„H–É"YB$Kˆd ‘,!’%D²„H–É"Y¢H´OEbÌŠî_E¸áþU„ûWî_E¸áþU„ûWî_E¸áþU„ûWî_E¸áþU„ûWî_E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\E¸sáÎU„;Wî\-Õüÿ•ø”¼¼4°>í:ÄEšÓ–ª¯‰1]Ë,•âÆL¹'b®Vg.•âÆZXçn‰õ¼Ú»`vÃÝìçœ['NêØà ‚ÃpÄFW÷Â}p?<ÂCpŽÁq8Áãð< OÁÓð < o’Ñ-8oÃYxÎÁ»â uŸÅ5R…f{ã‹ð%¸n…p©+VhV7ŽÀqÍQ+Ô-¶·#:‚¯ÐܨõšXó9|(–2n)ã–2n)ã–2n)ã–2n)ã–2n)ã–2n)ã–2n©Ž _‹‡Ù§^Ê襌^Ê襌^ÊèešQOéì«L^6>í¾'>£~.Ó9¶ñy¸ÈýH\Ìò%0æJÅ8LÀ”«Ópµ«k`-¬sb=¯6À5î¤Ø_t?_by ãneÙs ñ5—Ú4—Úaì„]ì- »aëwÕ–û\Ø\8Èþ‡ÜÏÅa8GufU&¿ÇÝ.qÂeÅÃ:ÂoŠoé ¼L3Èø©;.Nºañ<¼ìÅ<¼âv‹Ÿ¹}âU–¯±<å~&Þp?§ác¹¯,ð…fø2yS£»ňi¤¹Ñ¸ –Àåp,…e°ÜÔÑ9¼±VÂÕ°ÖÂ:¸Öª­3y[^×à p#Ü7Ã&Hm5'û`?€ƒpÃÓEsBJÜë¾+îcÍ~÷â–ºZñ®ýË4'¬ÇX?'àQ]é—ß¶®¾ß…ïbâÝ]ñGjñý€½ÿ§rñ—ÖÁ_Áà‡ðû<OÀ“ð”û–xšå3ð,üˆ­>v—ÅOX>güÔ½*Nš²Áó¬¹`½$o¢æ-8oÃYxÎÁ»pÞúMgGÆ{ð>|`½'/ÂGÖ‡Á‚u]ð1ËOU¹<þ;ñ)÷q4—ãèr9·EŒÃL)þr9׸Úùb ¬…æÜr9×^m€kt,+—sæÜr9×–·¸ÓâV–=×%šsËåܽb;쀰‹½ea7ìaýn¢ÚÃrŸ®XËå\£9·\εýéš·\Î5ŽÀQ]ɖ˹Æq÷®8AK¹œkkÞrí¢9·\Î͹å8·ç–ãÜr9÷}ñ3wF¼Êò5–§äÊr9×–§¡y¶\žÕ¸òìŠWž5.ƒ%p9\Ka4–ÉZž2öÁ~8á†#V1ÏÂØÊ„¬CðT¹÷Å~xŽõ“ìó<¼¬s­•ê=Qý Q‚l¥~0öÁ~8á†GmŸÒÑø|ž³UOã$-+•ã!1&M+•£1Sîûb®vÅX ëÜÄz^m€k4¯VjÞ3¾_‚[àVèiö¨Ô¼wJlÓ|X©yÏØ;a{ËÂnØÃúÝDµ‡å>wXì‡ìy‘é0£î#q Žë<°RóÞÄúF¨Ô¼gkÞÒµj¥4ê']¯x^ÖYM¥42^ѼZ©ï¾x•åk,O¹ŸŠ7Ümq>fÜ/t†Y©yOãjÞÓ(š÷TsÍ{Æe°.‡+`),ƒkM— Y«+Œ}°ÀA8‡áˆULóžq¯\\©yÏ–÷»÷Ä,„‡Üˆ£,Áq8šRê.ã;ð]ø^àßÄ÷5ûUiŠh®3~?„ÇØÏqxž„§àixž…±ÕÇêíJÍu¶|ÎôU''uí\©N¶å VaÍuÆ›Tøœ·á,¼çà]8oêk®3Þƒ÷á4ú>²~Ð,÷¹ø˜å'Šg•ôX|ÊM‰ àÓnT\$½VÉAÆ8LÀ”;&¦áj WÉAÆZXÇ>ëyµz:[^%wÜÛtÜ\%w;`'ìâYØ {X¿›÷°Üç΋ýpT}¾Jýo<¬Š­R·?Ï©7V©Û*ž‡—uœ]¥n7^Qo¯RŸ§Ügâ ]g­R‡‹êjíMý¬ ¨ŸË` \WÀRX×Z„AâT?û`?€ƒpÃËQýlÜ ÷Áýð<ÁQ8ÇáV/­R'Ûò9ÓBlœt3ây–oRÉ[pÞ†³ðœƒwá#ÓNi| ŸhÿUº>}$> Ÿ‡‹á%0¦n¯R—0åfÅ4\­Y´J]j¬…uîŽXÏ« pŽ_Ušç/—à¸vñάº¥J]jË»k€WáΫ4WG }¥Šï±Ti®þXœPïUi–þD¼ÂVWá r™†_êø[øÊÈ÷Xªt•§õºÊ3VÀJ¸ÖÀZX_†ëàz¸n„›àfØÉE=lìƒýpÂ!8 G¬êaã^¸î‡àAxŽÂ18'à{”xŒåãð< OÁÓð < /Xm5[oRŸ[pÞ†³ðœƒwá¼Õ_³¥ñ¼˜"š-ÅjͷŧtnP­¹Ñø´{Y|FsHµ:Óø<\¤³Öjõ§-/XWhþ¯”˜+ã0S:O®V¯WëZ²Z½j¬…u:jW«WíÕ¸ÆåÅFø"| nq“âV–=Þùš®ãª5ëvˆí°vÂ.Þ“…ݰ‡õ»‰jË}º²¨Ö¬kp'ÄAö?¤³…jõ¹qŽ2Öçî¬xØýB<âNŠo¹ýâ9÷†ø©Ž}Õš«_ÏÃËî51¯è¸_­3ÛÏU–¯±<¥cSµœ2.NÃÇhdWdÕrŠFÚ'Å՚ɥ—frã2X—ð–ÁrSJþ2VÀJ¸ÖÀZXךâr™-¯ƒëá¸n‚›aüžõ@p Ü ·Aj.÷û`?€ƒpÃÓKî3îÕ9LµÜgËûu¾W-÷ÙòAxÈ ˆv«Zî³5ö¹sµÜgËGáÛîñ–ß…vFT­+Á¯D;ŽTs^T­£‰ºBG騣‰-9O<ζ'àIxÊmO³|Æý«x–åØêcÍ<Õ:šØò9ë1MŒ“îñ<ˬ»ätãM×/ÞBåxÎÂ;pÞ…óÖrºñ¼X7ÊéFœ®ceT°ÞÖ1È–Ÿ(ª(Ÿ+Eù\)*oþU¬µ°Žõõ°Ú=Þ(÷x£Üãr7Ê=Þ(÷x£|öå³§(Ÿ=E¹ëå®oTÎú­8 G }†å3ĨœuGœPÝ¢òÔœxCLj¨¼ òyb”O‹¢|ZåÓ¢(ŸEù´(ʧEQ>-ŠòiQ”O‹¢|ZåÓ¢(ŸEÕâ1–Ãð$<OÃ3ð,¼`1K/ãMâ¹gàm8 ïÀ9xÎ[^ÒËxÞ‡,Sée|(Æ×Õ™±À ÜÕUI,ðÄ}ˆ³>Îú8ëã¬OÊ\­XWêŒ1¨€•nD\¥s×D Ú¥ÄOXÁ})^Ô‘.¸ÄrŽåë:ú$øíLB£Ä»Ê:˜×‘%x$/'øÄ¶ n·ƒôa«í?¸¾_…¯Á×áp'lƒ»,Âà›°vÀN‹9سbRG¢±ÌUŠå°RW"Iåø±X¥Ù2©L¿+Fu-äs„dàmÕ?©ÜˆöM†¤r¿%^b9Çòu7-ÞvÅ;î7â]©“TîwÅGªCR¹ŸØ~4C*U@1¨Fî²H”‹±vÀN‹J¹³°Û" öÀ^1…Ž)tLIÇ'b¬Ô± ¥Ï‹Õ:F§ø=`*`ŸY§”…ñ:{¸ã¾ïºÿíº)¥hõ*J¥P*…R))¥=K)ã+ðUø|¾wÂ6¸ËbPvÆvØ;-*egÌŠirI“Kš\Òä’&—4¹¤É%M.irI“Kš\Òä’&—4¹¤É%M.irI“Kš\Òä’&—4¹¤É%M.irI“Kš\Òä’&—4¹¤É%M.irI“K47ú°6ˆ¯Â×àëñ –wòž]ðMØ þE|;ð-ñãÀñ“À:ñBàŸÅ‹ìáË9–è¯&ÄÛÝâ@—xW{­ ÌšÄG­b> | :+øüÅü!ü1|þü üïü9ü,þ5P'GüE´_ÒÕI‹?êQ¶eë™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mê™mêѽÝëѽžÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦žÙ¦LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´LÈ´L×è*à?Åp‘û•hŸà¯áü5|‚¿Fç_‰»Y>§8×èüö ñ<¼Ì«y8¥#Ô£jo:G5.ƒ%p9\Ka\kãòéü>_çókøt~ ŸÎ¯áÓù5|:¿†Oç×p.·F]­+uu£øö‰ŽºZW¼äÕH^äÕ°ÏnÉ®‘ìÉ®Qç?_‹5°Z¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛH¾äÛ¨Lk[·t¶°zêÕõb9¬dMµûAÀS/izÉèÃNÖwÁ¬èkÛåb9¬tGÅUîªXíÖˆo«o}uø‹”»¯ÿ½x‰åË×ÝMѾûç«·'Zoûêí?‰øÄÖ(¢xŒ>Üec©Kí°vÚèŠÓ˜[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8[‰³•8wçâÜAœ;ˆsqî ÎĹƒ8wçâÜAœ;ˆsqî ÎĹƒ8wçâÜAœ;ˆsqî ÎĹƒ8wçâÜAœ;ˆs§®ô¿Ÿ…ÏÃÅp ,sÛÄrXéæÅj÷K1¦xvÊ;ÆLéLi§®#Œ«YSka»'Öójì‚YØ wóN»_´“ûE;uæoœ†ö­ªÊT‘(S£;-edÌBö#7û`?€ƒpÃ]WƽpÜÀƒð…cpNÀcð8<OÂSð4<ÏŠmÔ¶Ú¶QÛ6jÛFmÛ¨mµm£¶mÔ¶Ú¶QÛ6jÛFmÛ¨mµm£¶mÔ¶Ú¶QÛ6jÛFmÛ¨mµm£¶mÔ¶Ú¶QÛ6jÛFmÛ¨mµm£¶mÔ¶Ú¶QÛ6jÛFmÛ¨mµm£¶mÔ¶Ú¶QÛ6jÛFmÛ¨mµm£¶mÔ¶Ú¶QÛ6jÛFmÛ¨mµm£¶mÔ¶Úî üÎáw1+¾©3·õ¢[áøŠfÏ7uæf| ¾®WÞÔ™›-ïä=»à›°ÕÑ›:sû¶ø1üDçNoêÌí;âÅÀ÷ÄK,çXþu`@¼xOüñÜ|_¼Ãú»q^ѽ©ó·mb> |$:Qçoƽ:j½©³8Ŧ³8ãáûð?àOàÏxÿÏá/à¤E¨s¹ ´3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/µ3/ugqvgqvgqvgqvgqvgqvgqvgqvgqvgqvgqvgqvgqvgqvg'qvg'qvg'qvg'qvg'qvg'qvg'qvg'qvg'qvg'qvg'qvg'qvg'qvg'qvéÌg^,s+År¸ÒýM¬€•nP\åNŠUêÒ.ÅŸ£rSOÛè Øïs»¸‚îR.ÿ%^dŸ—Xα|]WÓ]:˵­ì ºKÄyåØxÄú|bï×ùq»Å£¼Œ>lµ¨t®k|¾ _ƒ¯Ã7àNØwYüª†±vÀNËEÕ0f¡ÍŠ]ºî6öŠYê“¥>Yê“¥>Yê“¥>Yê“¥>Yê“¥>Yê“ ¼ ­>Yê“¥>Yê“¥>Yê“¥>Yê“¥>YÕç¾øˆõhõÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸ,õÉRŸn®ƒº¹êæ:¨›ë n®ƒº¹êæ:¨›ë n®ƒº¹êæ:¨›ë n®ƒº¹êæ:¨›ë nz¾›ë n®ƒº¹êæ:¨›ë n®ƒº¹êæ:¨›ë n®ƒº¹êæ:¨›ë n®ƒº¹êæ:¨›ë n®ƒº¹êæ:¨›ë n®ƒz¸¿ÔÃý¥î/õp©‡ûK=Ü_êáþR÷—z¸¿ÔÃý¥ÔïáþR÷—z¸¿ÔÃý¥î/õp©õ{¸¿ÔÃý¥î/õp©‡ûK=Ü_êáþR÷—z¸¿ÔÃý¥î/õp©‡ûK=Ü_êáþR÷—z¸¿ÔÃý¥î/õ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’]/Ùõ’Ýnå•Ëa¥û…h¿Û­Œt~©'ÞÑl¼›{§»5ºÞ©Ñ>´ßaíæwX»ùÖn~‡µ[£kݘ÷èã¯âSê¢=ºº4.rýb¯–ÃJ²GU=!VËk{TÕ߈1uþã0%‡îÑ9žq5{®µÐ~ѳGçxöjì‚YØ w³Ÿs.'NºKâyxÙ]ópJíáŽúî¨ï‘^_‹vogjò—À]Ã*]×Á¸®€¥° ®µ¬UIå«J}ØiùªbÆ,ì¶Ü¥—±³Î'}°ÀA8‡áˆÅ¬óIã^¸î‡àAxŽÂ18'à1xž€'á)xžgÅ>iZ/–ÕR¤O³ŸÑz¬O=vI¬vMâÞ ^× ÖÇï)ú8¾÷ìS€>õÿ_}ª˜ö¦Š}ØjûÔÌf|¾ _ƒ¯Ã7àNØwÙèêXc;ì€êoÌŠýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýdÑOýd1 þ$Úïƒïމ? Ô‰?lß×¹ü Î‡_ø¥øÓ@Jü8°R|h ©Mb9\©ùmHu0VjžR~#V»âÅ6¤+&ãuhŸé qtH¹kÊÝèÃVÖÀWákðuøÜ Ûà.Q¹Ûaì´”»1+ù0‘ù0‘ù0‘ù0‘ù0qç0qç0qç0qç0qç0qç0qç0qç0qç0qŽçqŽçqŽçqŽçqŽç¡Â#Tx„ÈGˆ|„ÈGˆ|„ÈGˆ|„ÈGˆ|„ÈGˆ|„ÈGˆ|„ÈGˆ|„ÈGˆ|„ÈGˆ|„ÈG9‚ŒrÅ£AF9‚Œrå2Êd”#È(GQŽ £AF9‚Œrå2Êd”#È(G1Æc¬1Æc¬1Æc¬1Æc¬1Æc¬1Æc¬1Æc¬1Æc¬1Æc¬q­þ >¥ù\G+ã"÷ÇÿŠå°’÷T¹óbµ;&FuÎ?®cÊ qÒ]ÏÃË:îë˜bœÒùð8ŸHŽ縎 Ú¿Ž Æe°.‡+`),ƒk-e§”чC° fa·Å£#ˆ±WœPFÅp‘û‰Xæ6‰åÐ~Á7¡Œ®Švwe‚_ðM(£/ÄI¶:/ë¼eBí^î„2úc`B¹èUåb\Kàr¸–Â2¸ÖbP.]¹}h÷R&¸—2Á½” ~Ç7Áïø&øßaEþïb9¬twÄj÷3Ñžàt˜'8æ N‡u¶ðG1 í7/‡un`Ë ° fa7ÜÍVWt&|X^3Þ`=éë°¢ÕˆŠÖèÃNWѳ=ð<¨Ã<ê0σ:Ìó ó<¨Ã<ê0σ:¬#µÆÕ‘Ú¸îƒûáx‚£p ŽÃ x ‡'àIx ž†gàYñˆúü÷âSêÒ#ê ã"7$–¹Ë¡ýJñ}~D>.FÕ'GÔ×ÄIx^vy1­Ïp¦t„_pQohÿê ã2X—ð–Áµ‰ª­Tm£;-UÛ˜…ö­‰#ê c¯ø–âI,‡•ÚÛ[š+þ¯Xí‰w”Ñ[Ú³^Õž>ÜeïÑü`l‡°Ó¶ÒˆÆ¬xZçö/*ÂÓ:Ÿ7^—î§uö>">‚ñ,w3ÎìûgõNãuwA¼Ãz»ƒqV[=±\?ÂMçϸÿ#> Ÿ‡‹á%°LcS¾Æ•ºf<§#ˆ±Ò½)®r?mž<w—Å x.pDªÓÙKxQ3ä9ÃØrŽåGîŸÄ‚1Xn£W X WÃX ëàËp\7ÀpÜ ›àv‹\Z}ØjñëÈe|¾ _ƒ¯Ã7àNØwY¦RÐØ; Íðç˜áÏ1ß ZÍÏïÁûðÕ$ø9|(~ªÚ~ –kVÿTõüJ¬V ?UÌZ¯˜>ìd}ÌŠ“Rð?Ågáóp1\#°–©‹&5Šq¥›+`¥kW¹‹ÕîUqœ=›‚“Rð’h N¢à$ N¢à¤f­câ445'QsRjjD©i¬€•p5¬µ°¾ ×ÁõpÜ7ÁͰ n·,T£[-©i|¾ _ƒ¯Ã7àNØwYÖRÓØ;`§Õ!سÐÔœDÍIÔœDÍI©©úHMãCñ÷ÿ/ZQñ´ëÏ©bøÞéEÍ„âø´[+ÚUêESNˆq˜€)÷™˜†«Ý‡b ¬…u:º]ÔQÆ^m€žŽPu¬±å,솻Ù[Ÿ¢½è‡ç¤ÝEÍ«Æóð²®.j^5^qiN»¨c“-_cyŠånŸ8 k¶¹øÂ¨¹Wiî5.ƒ%p9\Ka´«Ô‹AbÓQÌØûá„CpŽXF:Š÷Â}p?<ÂCpŽÁq8Z­‚oÃwà»Ð~ƒvQ×,•¢ýí¢Žwöþãð< OÁÓð < ÏY…ƒŸÂIx^¼DW\¢+.Ñ—èŠKtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GWäèŠ]‘£+rtEŽ®ÈÑ9º"GW\Öì=-> Ÿ‡‹á%ÐŽ¿—9þ^Öì}W¬€•®C\åÞíø{Y³÷qÂ}.ÑÁeÍÛõbÚ,}™Yú²fiíY³´±VÂÕ°ÖÂ:ø2\×à p#Ü7Ã&hÇÜËs/s̽¬YZ1k–6¾_…¯Á×áp'lƒ»,;ÍÒÆvØí˜{™cîe޹—5K+kÍÒÆ{ð>|`uÐ,m|(æ©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©|žÊç©üÕs¯XîN‰•:§ºÂUÕÕª 挊_ïQüFÚЮ€®pô™fÈi±ÌíËÝÅJwZ\¥}~¦«‰ß‹Õ¼zXg¼ŸI‘¿Šu]/GÂG:—øLŠˆšy´7«÷k\£wÙÞTc;쀼Ú³ðí_™ŠWÉô*™^%Ó«dz•L¯’éU2½J¦WÉô*™^%Ó«dzL¯‘é52½F¦×Èô™^#ÓkdzL¯‘é52½F¦×Èô™^#ÓkdzL¯‘é52½F¦×Èô™^#ÓkdzL¯‘é52Òñ문گù¦ó)1¦kº)¿Œ hÏûây¿S:~åÅX ëÔ3S<ïwŠçýNéø5-¶)¯©@;쀰‹÷da7ìaýnÆÚÃrŸ®§tŒ3ÚUíT` ž&³ºþš LýÏ@`*ø¶^Ç,e¡c–q,Ëá X ËàZÛOuÌ2öÁ~8á†öTÞ)žÊ;ÅSy§x*ïOå⩼S<•wЧòNñTÞ)žÊ;ÅSy§x*YªŽYÆwà»ð½@\ü¥U ø+øücÛãð< OÁÓð < ?b«á'ðœÕSG4ã$<oRÃ[pÞ†³ðœƒwÅ_««ÿ$–Cû}Ù¯ÕÕ¶\­ùçךQ¿/êŠû×êdã#]ÕþZ,ª‡õNõ°Ñ‡»à›°vÀNÛ›zؘ¯kþÿ‹ø,|.†K`–À2]ß]W„?í×áE±Ú ‹ãî¿ì{À:ïº.ßÍ‹u¥¦3#øHge×­¨9_{Óœo¬€•p5¬µ°¾ ×ÁõpÜ7ÁͰ n·8U £wYœª†±vÀN‹\Õ0f¡}Ú{]³·ñ¼XFrºñ¡xCîþ½¸HÕ¸¡Ê¼'Úüv#`ǯªÌÄ*m{Cõ9(FÝßÄÃluQcÝPeŒÜOÅ‚QÒÞ¿ö¦ø>Üe{SüÆvØ;mϊߘ…ö‰Û`짉sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8§‰sš8ovÉ5·oBg >!ÎЫ3ôê ½:C¯ÎЫ3ôê ½:£ùöK1о«6ÃwÕf4ß~!ÖÀZh÷*gø®Ú ßU›á»j3|Wm†ïªÍh.µýŒº?‹cp\sæŒz^Ëš3ÖÛ3ôö ½=CoÏÐÛ3ôö ½=CoÏÐÛ3ôö ½=CoÏÐÛ3ôö ½=CoωA³«±öÃ8‡à0´ï¹Íð=·¾ç6Ã÷ÜføžÛ ßs›á{n3|Ïm†ï¹Íð=·¾ç6Ã÷Üf4‹&Åc,‡'àIx ž†gàYx“šÜ‚3ð6œ…w༠çáoà=x_œU?ŸQOÎJ}ãóp1\#°ÆÜoÄ8LÀLÃÕ°ÖÂ:ÌJ}[n€­—D;ÚÎr´åh;ËÑvV½aïÉÂnØÃúÝ,ïaÙÔ™ ôÃ!Ö Ã8ê¾Ç =rV]ô[ñ-©<˱x6ð‘墾Ò{‚å,¯„°®†5°ÖÁoéúz6øm]¯ÍªÇlÍ:¸n€á&¸6A²Pû`?€ƒpÃø}¸îƒûáx‚£p ŽÃ ø^ A´#õ,GêYŽÔ³©gÕ{öžãð< OÁÓð < ?b«á'о'?«#µqž‡xõ"¼IµoÁxÎÂ;pÞ…ó¦š:ÖxÞ‡ö}¡Ù`>çÔÿŸaùYø<\ —À,öMŒ9¾‰1Ç71æÔÃÆ4\ k`-¬cÿõ,7ÀÖÀ¿ˆö9ãœzØØ;aïÉÂnØÃúÝ,ïaÙ¾ó?è‡CD5 Gà¨æö9õ°q\՛㠫sê^{õ#£ºW¯2+Î1+Î1+Î1+Î1+Î1+Î1+Î1+Ω{W‰ßTˆ/³f\7ÀpÜ › ñóÍ9¾¹1Ç77æøæÆßܘã›s|scNÝkü>Ü ÷Áýð<ÁQ8Çá|/P+þÒê¦î5~?„ÇxÏqxž„§àixž…±ÕÇðxÎTP÷'áyx¬/›Tûœ·á,¼çà]hφãÙ°s<vŽgÃÎë*樸>í6Š‹t½3¯žüLŒÃLér^=i´{ÚóX ët¦4¯ž´W §¹n>ÐÅrvÃÝìÍžq4Ï3ŽæuU2˜×5ˆbÐ5ˆq,Ëá X ËàZ‹3ÈÞÔÆ>Øà ‚ÃpÄbP÷Â}p?<ÂCpŽÁq8Zvº1¾ß…ïVŠÇxÏqxž„§àixž…öä¥yž¼4Ï“—æyòÒ#©óޏ>íšÅE®OŒ¹10¥9ê‘Ô1ÚQïG½GõñÝ­GRÇ^m€öŸ}¤Ž-ga7ÜÍÞN»qñœû'qž‡—a^q÷Ä«pÊo¸ â4´g=’¦Š\š—Á¸®€¥° ®µì‚Ä M}°ÀA8‡áˆE.M{á>¸€á!8 Çà8œ€G­&Á·á;ð]xŒWÃð$<OÃ3ð¬X@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µªP­€jT+ ZÕ ¨V@µÇ§ì9ði•ëê¢hŸQ>Ö5ÔUq•®žì3 Ñ>µlß’í¿~ëêÉøH×/uõ$#¶·àR¸ –Àåp,…ep»¨ë,£wÙˆºÎ2¶ÃhŸ×?æóúÇ|^ÿ…¢Ëaµ»øB{Ó²öfôa'ë»`Vtö{àSÚöhÐþ1öS±Ò=«Ý…àSÚƒÖkFv²¾ fÅ…:Ïùø,\èþ$>ÏòÿvƒöOšC,/fý%°Ì½´ÿÙÜ/®tOÄ Xé>W¹kAûÏM{5æˆq˜€)E»P2®†5°ֹ߈õ,7À.˜…Ýp7ûé#’~8JcpÜý9¸Pç6ZÖ¹^Õ¹±VÂ*^]Ír ¬…up­U@ç6¶¼®‡àF¸ n†Mp»ÕD•7ú°Õjb¿._¯Â×àë𠸶Á]V=õ±vÀN«§t4f!u}°ÀA8‡áˆUOþ5î…ûà~x„‡à(ƒãpƒÇá xž‚§áxž£>ŸÂIxÞD…[pÞ†³ðœƒwEû÷Æ?í¿ ÿ*~¢ZÙ¿ž Ú¿žÚ¿/¸{âE÷H¼ÄrŽåËîxÐþÍÐ8ån‰×ÝoÅÛîsñŽâ|.p×})λ‚øÈ}%àõÃßkô/Ä2ç‹å°Rþú{uû±Úu‰CŠíïunã./N¸YñËGÝ´ø¶ûXüÄýM¼à&Ä‹î]ñË9–¯»«â¼{_|ä~.à÷«à߫߃úÍèÃ]‰:ÇØ;`§Å¦Î1fá‹Mç¥Æy‹Mç™Æ{ð>|`Ñ?‡Å°oŠå°ÒýB\ånˆÕn§x]šþƒ}ŸG4½þÁ¾y(zЇ»ìýöÍC±vÀNÛƒ}R#fEû_í_<®í_<¾Úÿwä‚öZ¯}}ØÉú.˜#öôK±ÜŠGÝOÄ·Ý[â'î/âum±ÿ¿TÌû/lñ‰ .eÛ¥l»”m—²íR¶]ʶKÙv)Û.eÛ¥l»Œm—±í2¶]Æ¶ËØvÛ.cÛel»Œm—±m Û–°m Û–°m Û–°m Û–°m Û–°ír¶]ζËÙv9Û.gÛål»œm—³ír¶]ζ+ØvÛ®`Ûl»‚mW°í ¶]Á¶+ØvÛ–²m)Û–²m)Û–²m)Û–²m)Û–²m)Û–±mÛ–±mÛ–±mÛ–±mÛ–±mÛÚÓwWW X í ÷ƒö„Æ»b™ûO±\3Æ*û'h±JN_eÿ´§#Úús¬™”ßí¹ˆÆËîNО‹h¼-w¬²oî‰wmÙþ ZÜn{¶7}Øiû´‚³b‘TI‘TI‘TI‘TI‘TI‘TI‘TI‘TI‘TI‘TI‘TI‘ØsÕþ´çª0%7ÙsÕŒ«YSkaëëa\ã~´çª_„/Á-p+ìâYØ w³·^„CŠßžf£¬ƒãªŒ=íOA{*š–í©MA{˜öcÿ´'€ûá„CpŽØèö/0A{˜qÜÀƒð…cpNÀcð8<OÂSð4<Ï –‘ݧ Úó¸,ò[pÞ†³ðœƒwá¼emWúA{—ñ>|@>‡Å(jFQ3ŠšQÔŒ¢f5£¨EÍ(jFQ3ŠšQÔŒ¢f5£¨EÍ(jFQ3ŠšQÔŒ¢f5£¨EÍ(jFQ“çqyW0ŠšQÔŒ¢&ÏàÙjFQ3ŠšQÔŒ¢f5£¨EÍ(jFQ3ŠšQÔŒ¢f5£¨EÍ(jFQ3ŠšQÔŒ¢f5£¨EÍ(jFQ3ŠšQÔŒ¢&Oë ò´® Oë ò´® Oë ò´® Oë ò´®`5£¨EÍ(jFQ3ŠšQÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆP3†š1ÔŒ¡f 5c¨CÍjÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5ã¨GÍ8jÆQ3ŽšqÔŒ£f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3š ÔL f5¨™@Íj&P3šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍ$j&Q3‰šIÔL¢f5“¨™DÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f 5S¨™BÍj¦P3…š)ÔL¡f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Q3šiÔL£f5Ó¨™FÍ4j¦Qsµªô7ñˆ®âWëUãçð¡Xë5¼Zë5¼Zëµ¼ZË«µº’ýZì‚Xó9´wÖéÚùkÑÞ_ÇûëxOï©ã=ß ü:ïââÝ@U°QË+ƒÿðÜWbŸÞóÏ~xNûùg]qÿQ̃Gí=Á·á;ð]xÎÞüNÂóâZö¹–}®eŸkÙçZö¹–}®U<ÿ,Þwÿ\Ë(ke-£¬e”µŒ²–QÖ2ÊZFùNà÷™ø,|.†K`–À˜ÿޝÏUxƒ÷OÃ/Õoß |e´'ÿ‹å–ip%¬€•p5¬µ°¾ ×ÁõpÜ7ÁͰ ’‹|mìƒýpÂ!8 G¬ö]Yq/Ü÷Ãð <Gá‡ð½@½xŒåãð< OÁÓð < /X•änãMês ÎÀÛpÞsð.œ·úËÝÆ{ð>|`ŠÈSƇâwí_ÈÅg܉ÏÂçáb¸F` ,s[Är¸Rc}×þ…\¬t;ÄUîbµkSêÀïÚ¿‹«¥òwí_ÈÅZXÇÞêyµ®q÷ÅFø"| n[aïÌÂn8ÀúA8ä¦Äa8GÕÉßµ!ÇÝVqÂíÿQ÷OâÛð‚ûŽxѽ(^b9Çò÷±x^wGÄn@œ†·þç+qÞ5ˆÜ¿ˆø¥›¿rŸŠOÜ·ƒßµ!Ë-kûr±VÂÕ°ÖBêcÿB.®ƒëá¸n‚›aÜny=èÃVSÇþ…\|¾ _ƒ¯Ã7àNØw™ŽÁ7a;쀦¬æpcŽ˜ ö/äâ^¸î‡àAxŽÂ18'à1xž€'á)xžgáÓ×þ…\¼ImoÁxÎÂ;pÞ…óÖö/äâ=x> bŸÃ‡bwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGwdpGw¼lÿ7-.Ò‘ýe>yÙþoZ´Oä_¶ÿ›«Ý%ñ°ºúeû¿iqR5|Ùž¶!^vsbÞÖùÕËöÔÑ>yÙþ{Z´ÏD^æ3‘—ùLäeûïi± fÅuD²ŽHÖÉ:"YG$ëˆd‘¬#’uD²ŽHÖÉ:"YG$ëˆd‘¬#’uD²ŽHÖÉ:"YG$ëˆd‘¬'’õD²žHÖÉz"YO$ë‰d=‘¬'’õD²žHÖÉz"YO$ë‰d=‘¬'’õD²žHÖÉz"YO$ë‰d‘l ’ D²H6É"Ù@$ˆd‘l ’ D²H6É"Ù@$ˆd‘l ’ D²H6É"Ù@$‰d#‘l$’D²‘H6ÉF"ÙH$‰d#‘l$’D²‘H6ÉF"ÙH$‰d#‘l$’D²‘H6ÉF"ÙD$›ˆd‘l"’MD²‰H6É&"ÙD$›ˆd‘l"’MD²‰H6É&"ÙD$›ˆd‘l"’MD²‰H6Éf"ÙL$›‰d3‘l&’ÍD²™H6Éf"ÙL$›‰d3‘l&’ÍD²™H6Éf"ÙL$›‰d3‘l&’ÍD²™Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰Hšˆ¤‰HšˆäßíäÅrXéþ,VëÈõïö„v10åæÅ4\ÍšX ëÜ-±žW`Ìòj7Ë»YU´ÿnÿ /^Wöòâv‹ÄþA^ôa§Åcÿ /f!û±g¶‹}°ÀA8‡áˆnÿ /î…ûà~x„‡à(ƒãpƒÇá xž‚§áxÞ$£[pÞ†³ðœƒwÅf´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹f´hF‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹´hA‹ïi¶ü³¸F3ä÷ì߯ÄáKp Ü à ÒÌù={Gà¸ûJœp_‹‡Ùç}}OgnzUgnƬù>·0úFßÂè[} £oaô-Œ¾…Ñ·0úFßÂè[} £oaÜ-Œ»…q·0îÆÝ¸[w+ãneÜ­Œ»•q·2îVÆÝʸ[w+ãneÜ­Œ»•q·2îVÆÝʸ[w+ãnÕ™ç×âoà=x>àŸC‹gñl#žmijx¶Ï6âÙF<Ûˆgñl#žmijx¶Ï6âÙF<ÛˆgñlcÜmŒ»q·ëzÐø”®t¶À§Ý÷ÄgÜoÅgáóp‘û‘¸˜å%0K`Ì•Šq˜€)W!¦ájW)ÖÀZXç:Åz^m€kÜI±¾è~,¾ÄòÆÝʲçâkº²Ûhs: ´ÃØ »Ø[vÃÖï&ª=,÷¹:±¸qý¹Ÿ‹ÃpŽêº`»æ ã¸Û%N¸¬xØuˆGÜ›â[N3ˆýO½ø©;.Nºañ<¼ìÅ<¼âv‹Ÿ¹}âU–¯±<å~&Þp?§ácÍBÛí¿ƒÛ5Giôàb÷¡1‚Ká2X—ð–ÁrSGW Æ X WÃX ëàZ«¶®Cmy\7ÀpÜ › µÕÜhìƒýpÂ!8 GLÍ)q¯û®¸5ûÝ?ŠX>èjÅCº6ß®¹q­8Æúq8êJ|{ðmëŠà;ð]ø^ &þÐÝ¨ßØû(i=üü~±Ïãð< O¹o‰§Y>ÏÂØêcwYü„åsÖQÁOÝ«â¤)<Ïš ÖKò ñ&jÞ‚3ð6œ…w༠ç­ß4‡ïÁûðõž¼l|Y ÖuÁÇ,?QT÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáq{xÜÃã÷ð¸‡Ç=<îáqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqûxÜÇã>÷ñ¸Ç}<îãqÿÿ%ÝÛw\÷yÞq vVr•]&®%K´e+¶jëät¹«Ë7MÒ´I.šƒ!œÁ½7öæŒ,Šç4"Y;um7À9kFùG]òrH‚ @€È¤Hê4µÿ/»ïþäæY³F˜ý{¿ÏûÌÞññññññññññññññ9žÑÿ;ô±òoB?_þ<ô ôYú\ü6÷r<7+}¾XþSè×ÊG¡¯ø™WËŸ†^¦ï¸Ç5wé÷ã÷Ü—#á¡‘Ÿ8eØ£"•¾J/Ó×èú:}ƒ¾Y]3öXé z“¾SXý?‘Cwènè(¢QD£ˆF"E4ŠhÑ(¢QD£ˆF"E4ŠhÑ(¢QD£ˆF"E4ŠhÑ(¢QD£ˆF"E4ŠhÑ(¢QD£ˆÆ!C4†h Ñ¢1DcˆÆ!C4†h Ñ¢1DcˆÆ!C4†h Ñ¢1DcˆÆ!C4†h Ñ¢1DcˆÆ!C4ŽhÑ8¢qDãˆÆ#G4ŽhÑ8¢qDãˆÆ#G4ŽhÑ8¢qDãˆÆ#G4ŽhÑ8¢qDãˆÆ#G4ŽhÑ8¢ Dˆ&M š@4hÑ¢ Dˆ&M š@4hÑ¢ Dˆ&M š@4hÑ¢ Dˆ&M š@4hÑ¢ Dˆ&M"šD4‰hÑ$¢ID“ˆ&M"šD4‰hÑ$¢ID“ˆ&M"šD4‰hÑ$¢ID“ˆ&M"šD4‰hÑ$¢ID“ˆ&M"šD4‰h Ñ¢)DSˆ¦M!šB4…h Ñ¢)DSˆ¦M!šB4…h Ñ¢)DSˆ¦M!šB4…h Ñ¢)DSˆ¦M!šB4…h Ñ¢)DÓˆ¦M#šF4hÑ4¢iDÓˆ¦M#šF4hÑ4¢iDÓˆ¦M#šF4hÑ4¢iDÓˆ¦M#šF4hÑ4¢iDÓˆ¦M#šA4ƒhÑ ¢D3ˆfÍ šA4ƒhÑ ¢D3ˆfÍ šA4ƒhÑ ¢D3ˆfÍ šA4ƒhÑ ¢D3ˆfÍ šA4ƒhQõíŠB?V¾úqúKåJècñšíb0Vúú,ýby+ôKô¹²ú<}¾XvB_ò_¿L¿¯*/ÆëÞ‡ÎÄk‹ñº·ÒŒÎÒÜOô­¹ÿ'~ÝíWËÝÐËt¥¼ºJÿÅŸ•…¾S~;t§üNè.ý~¼&¼~VúÃpõb¼v­ôýò/C\þcèO*WªqµxÄkÔJ?I§OÐOÑ'éSô«Õ„ÃæŒUú*½L_£Wèëô zµbŒ×•ÎÓz.Ò%ºLWè*]£ëôÍÊÛØu¥7èMú¡¯„V¯/Æ+Æp/^1VºI·è-WhÐ&mÑ6íÐ.íÑ·=ª_þ,tàö;Õ."Q•î”wCwݾÍÉ;ô.ݧ÷è½OéGÕîâµ_¥èy\?‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL$3‘ÌD2ÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT2SÉL%3•ÌT23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌL23ÉÌ$3“ÌÙHæ~èÇÊ¥ÐÓ_*'ô—cƒ³C¿B•>V.„þšÛ¿N?A§ŸŠW¹³COÒÏ—O…~>K¿XþÛÐ/ÑçÊCŸ§/ÐËzèKþë—éo–?ý ý÷ô·é–;¡äö×üäD9:Sf¡)Íè,ÍýLA/Ñšû_1Õ×Ý~µüw¡—éke3ôŠë¿^¾ú½JWœµJ×üäzÙ ýÓò/BÿkÙ ý³òZè;åTèvùס;åxè.ý~9úúÃòпt¹ýWn¿Í0ÏšµÐŸÐGvTý/·³ñ ŠÓ‡«¿‘žçQì+žG•~’>NŸ Ÿ¢OÒ§è…jSߦŸ¡OÓçèóôú"ýjµñáßqû?Òߥÿ‰þgú{ô÷éÐÿRe`øéÑ?¦<ço¥¯ÒËô5z…¾Nß W«}Åó·ÒùòJè‚Û×ÊZè¢ÛKt¹|-´úßÕgãù[ÝSýÅõl<«ÛoÒëåXè ·oÒo ýVè7ã÷»YÏâÙáo}>ô­*ñ\Ž=Æs¹º½Eo•_ mxl“¶h»üýÐŽÛÝò?„öÜ~Û£úñ\›çruû*cñ\®t§Ü ÝuûÝ*]Ã{ôvy9ôŽ-ߥûô= ÷é!}P%pøˆÓúa•ÆáSzVB?Bô°Êv4@uû<¦Ê5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r k€\ä ×¹È5@®r Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€B Ð…(4@¡ Ph€BàR<7ÏC¿DŸ+ÿ)ôyú}Ñý/Ñ/Óê¯C/ùëÐKþ:ô’¿½ä¯C/ùëÐKñ¼«~² —èkî¿B_/ÿ.ô z•VŸpɧ\ŠgÖAèzøv)žS÷C\þsèO*õI—"qÍÈF¥ót^£‹t‰.ÓºJ×è:ýFìýRl¶ºÝ MÚ¢mÚ¡]Ú£ïV3Ǿ*½mž;ô.ݧ÷è½O郊+öUé1=¡V¤±¯JÏBköR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥f/5{©ÙKÍ^jöR³—š½Ôì¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^êöR·—º½Ôí¥n/u{©ÛKÝ^^ú šü•ê{ÒC«OU}eè¼üÅð×ãþÿZÝÿu÷Ýý¯úùWÝÿªû_uÿe÷_vÿe÷_vÿkîÍý¯¹ÿ5÷_qÿ÷_qÿ÷¿îþ×Ýÿºû_wÿîÃýo¸ÿ ÷_zª|!ô­>)úªOо:ôt¼¸:ôÙr7ô™xmvÕ'^ª>¥äêÐ{ôƒòg¡‚ôê¿\møOªë ŽÐê3–¯úŒå«>cùªÏX¾ê3–¯úŒå«>cùªÏX¾ê3–¯_¬ÎNhJ3:[M2œÓ"tÎüsæŸ3ÿœùçÌ?gþ9óÏ™Îüs柋ùOCÊ¿ =,ÿ>´b™ –ø¯Xæ°Ìa™Ã2‡eË–9,sXæ°Ìa™Ã2‡eË–9,sXæ°Ìa™Ç2eË<–y,óXæ±Ìc™Ç2eÞ§ûÎËOC+–y,óXæ±Ìc™Ç2eË<–y,óXæ±Ìc™Ç2eË<–y,óXæ±Ìc™Ç²€eË–, X°,`YÀ²€eË‚½,ØË–, X°,`YÀ²€eË–, X°,`YÀ²€eË–, X°,`YÀr Ë5,ׂ姡Ÿ¡O—WC?[vCŸ)¿:pÿ»å߆îÅÌׂ¨ºý=·?(ï…VŸ{-è†Æ$ׂîQèGáõ¡‡ô¼zÒkH¯!½¤qý ­tŒŽÓ :I§è4¡« ƒ´Ò”ft¶š9H+-B‘."]Dºˆté"ÒE¤‹H‘."]Dºˆté"ÒE¤‹H‘."]Dºˆté"ÒE¤‹H‘."]Dºˆté"ÒE¤‹H‘."]Dºˆté"ÒE¤KH—.!]Bº„t éÒ%¤KH—.!]Bº„t éÒ%¤KH—.!]Bº„t éÒ%¤KH—.!]Bº„t éÒ%¤KH—.!]Bº„t éÒe¤ËH—‘.#]FºŒté2Òe¤ËH—‘.#]FºŒté2Òe¤ËH—‘.#]FºŒté2Òe¤ËH—‘.#]FºŒté2Òe¤ËH—‘.#]FºŒté Ò¤+HW® ]Aº‚té Ò¤+HW® ]Aº‚té Ò¤+HW® ]Aº‚té Ò¤+HW® ]Aº‚té Ò¤+HW® ]Aº‚té*ÒU¤«HW‘®"]EºŠté*ÒU¤«HW‘®"]EºŠté*ÒU¤«HW‘®"]EºŠté*ÒU¤«HW‘®"]EºŠté*ÒU¤«HW‘®"]Eº†t éÒ5¤kH×®!]Cº†t éÒ5¤kH×®!]Cº†t éÒ5¤kH×®!]Cº†t éÒ5¤kH×®!]Cº†t éÒ5¤kH×®!]GºŽté:Òu¤ëHב®#]GºŽté:Òu¤ëHב®#]GºŽté:Òu¤ëHב®#]GºŽté:Òu¤ëHב®#]GºŽté:Òu¤ëHב¾¤¿z>¯¢ß ®ÿ6üfL÷Ä$•ŽÐY÷ç´½î±×=öºÇ^÷Øë{Ýc¯{ìu½î±×=ö†ÇÞðØ{Ãcoxì ½á±7<ö†ÇÞðØ›{ÓcozìM½é±7=ö¦ÇÞôØ›{Óc¿é“î¾Y}×kè>=z.ôpèÙÐó¡Ï ÿùÐõ¡Bßú7¡ïÑâ·ž?zúß㱟=pû<®ö­øùÏ…âjߊG}6ô=ú û­ø™ç†ÿGü× ÃߎGUZÝóxÔWBßz)ô=ú¿âÊß©¾ÿ*ôaèw‡~9^ÉwèWè¯Ò_£¿N?A§O•z>]>}¦|+ôóñ:í»C_ ÏÒ/ÆoOß­¾?4ô9÷?zVßiòV´ÁBŸ)3ôz<ÓߊNøûÐwã5ó[Ñ ÿúžÛßsûƒòvhõé@oEüŸÐª ÞŠ6ø¡éyuOx§„‡•ŽÐ‹ÕYñ¼®4¥­No+-B7̹aÎ sn˜sÜæÜ0ç†97̹aÎ sn˜sÜæÜ0ç†97̹aÎ sn˜sÜæÜ0ç†97̹aÎ sn˜sÓœ›æÜ4ç¦97͹iÎMsnšsÓœ›æÜ4ç¦97͹iÎMsnšsÓœ›æÜ4ç¦97͹iÎMsnšsÓœ›æÜ4ç¦9·Ì¹eÎ-sn™sËœ[æÜ2ç–9·Ì¹eÎ-sn™sËœ[æÜ2ç–9·Ì¹eÎ-sn™sËœ[æÜ2ç–9·Ì¹eÎ-sn™óÖÐcñÜ¿Ó~:ôýtùÏ¡Ÿ¡O—WB?[¶B?y¾ó?ú‘ó[¾_ãÖЛÑ'·†ªOÚ¼D†¾[þuèž+¿çö÷Üþ |?tßcÊ£ÐÃø7îVpÝýÈýéyõóÃ_¥RMt•ŽÐ—«Ùâ߸JÇè8 “tŠNÓz±¢O*MiFg+¢ð¤Ò‚V­uk¸Fë¡ .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¸ÔàRƒK .5¹ÔäR“KM.5¹ÔäR“KM.5¹ÔäR“KMþ4ùÓäO“?Mþ4ùÓäO“?Mþ4ùÓäO“?Mþ4ùÓäO“?Mþ4ùÓäO“?Mþ4ùÓäO“?Mþ4ùÓäO“?Mþ4ùÓäO“?Mþ4ùÓäO“?Mþ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´øÓâO‹?-þ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþ´ùÓæO›?mþtøÓáO‡?þtøÓáO‡?þtøÓáO‡?-ÔáR‡K.u¸ÔáR‡K.u¸ÔáR‡K.u¸ÔáR‡K.u¸ÔáR‡K.u¸ÔáR‡K.u¸ÔáR‡K.u¸ÔáR‡K.u¸ÔáR‡K.u¸ÔáR—K].u¹ÔåR—K].u¹ÔåR—K].u¹ÔåO—?]þtùÓåO—?]þtùÓåO—?]þtùÓåO—?]þtùÓåO—?]þtùÓåO—?]þtùÓåO—?]þtùÓåO—?]þtùÓåO—?]þtùÓåO?=þôøÓãO?=þôøÓãO?=þôøÓ“¢—z\êq©Ç¥—z\ê…KÕö=¶r©Ç¥—z\êq©Ç¥—z\êq©Ç¥—z\êq©Ç¥—z\êq©Ç¥—z\êq©Ç¥—z\êq©Ç¥—z\êqéí¡¯ÐÞúXüüöÐÇécååЧÊéÐ ôéx öv¸Ô }&®üv¸túù`|»úóÐgécž·ãw¨JŸsåçé ´úÔý·ãw¨ê¿~™æ´ —è+®óNù½Ðò½Ð]úýr/ôôýò'¡?öóÕ{yo¢š|ø_ÓOÒÇéôSôIúýjÅþ]ø_é­è±J z©" Ç*­SVß?ú*½L_£Wèëô zÕ„stž.Ðkt‘.ÑeºBWé]§·hƒ6i‹¶i‡vi/´o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Íöm¶o³}›íÛlßfû6Û·Ù¾Ílv`³›ØìÀf6;°ÙÍlv`³›ØìÀf6;°ÙÍlv`³›ØìÀf6;°ÙÍlv`³›ØìÀf6;°ÙÍlvmù‹Ðê Ñ–?ØõÀ®v=°ë]ìz`×»ØõÀ®v=°ë]ìz`×»ØõÀ®v=°ë]ìz`×»ØõÀ®v=°ë]ìz`×»ØõÀ®v=°ë]ìz`×»ØõÀ®v=°ë]ìúØéK¡è§c#ïÄ¿_•VßúNüûõ^è3儾ëçß«4<‰ŸO*¡/WŠY*£ãt‚NÒ):MgèÅêúñ/K¥)Íèlub8\iºmÎmsn›sÛœÛæÜ6ç¶9·Í¹mÎíøwóoC÷Ý>p»úÛ†íøWòçÃÛ(¶Ql£ØF±bÅ6ŠmÛ(¶Ql£ØF±bÅ6ŠmÛ(¶Ql£ØA±ƒbÅŠ;(vPì ØA±cþÓî˜vÇ´;¦Ý1íŽiwL»cÚÓî˜vÇ´;¦Ý1íŽiwL»cÚÓî˜vÇ´»¦Ý5í®iwM»kÚ]Óîšv×´»¦Ýåù®™wy¾Ëó]žï¢ØE±‹bÅ.Š]»(vQì¢ØE±‹bÅ.Š]»(vQì¢ØE±‹âÝ ¨ôýtù³ÐÏЧË~ègãÙúnP,†þ0®üîÐè´ú«˜wcþøù˜<®“W:B_vÿ(£ãt‚NÒ):MgèÅêĘ¼Ò”ft¶š!&¯´Ý3ùžÉ÷L¾gò=“ï™|Ïä{&ß3ùžÉ÷L¾gò=“ï™|Ïä{&ß3ùžÉ÷L¾gò=“ï™|Ïä{&ß3ùžÉ÷L¾gò=“ï™|Ïä·} ómßÂ|[ZnûæÛ¾…ù¶¿Ó¸í[˜oû;œÛ¾…ù¶oa¾í[˜oûæÛ¾…ù¶oa¾í[˜oûæÛ¾…ù¶oa¾ã¬;κã¬;κã¬;κã¬;κã¬;κã¬;κã¬;κã¬;κã¬;κ묻κ묻κ묻κ묻κ묻κ묻κ묻κ묻κ묻ÎÚwÖ¾³öµï¬}gí;kßYûÎÚwÖ¾³öµï¬}gí;kßYûÎÚwÖ¾³î9ëž³î9ëž³î9ëž³î9ëž³î9ëž³î9ëž³î9ëž³î9ëž³î9ëž³œuà¬g8ëÀYÎ:pÖ³œuà¬g8ëÀYÎ:pÖ³œuà¬ûκï¬ûκï¬ûκï¬ûκï¬ûκï¬ûκï¬ûκï¬ûκï¬ûκï¬Cg:ëÐY‡Î:tÖ¡³uè¬Cg:ëÐY‡Î:tÖ¡³uè¬Cg:ëA¼6{úqúXùíЧÊß ½@Ÿ.ÏC?Wþ(´z7êA¼6û‡ÐwâµÖŸsþÀçœ?ˆWPg¡? ï—zPþãðƒx¥ÿ5^)UúIú8}‚~Š>IŸ¢_­f¢8=ˆ*¡Õ{O¼÷ôÀ{Oâ•RL¯”*­‡a9Âr„åË–#,GXްa9Âr„åË–#,GXްa9Âr„åË–#,GXްa9Âr„åË–#,GXްa9ÂrŒåË1–c,ÇXޱc9ÆrŒåË1–c,ÇXޱc9ÆrŒåË1–c,ÇXޱc9ÆrŒåË1–c,ÇXޱc9ÆrŒåË –,'XN°œ`9Ár‚åË –,'XN°œ`9Ár‚åË –,'XN°œ`9Ár‚åË –,'XN°œ`9Ár‚åË –,Æo4•~¬ü ôãô±òõЧÊß ½@«Ï!ÿ0ˆvCŸ)¡¿ŒÆo¿}§ü«ÐºK¿_þ ôôýò~hõ»Æ‡¾#àà‹S‚®ÒOÒÇéôSôIúýj5OÐÅ$AWé­& ºJ Zý•ò‡AWi=ôÝ)ºSt§èNÑ¢;EwŠîÝ)ºSt§èNÑ¢;EwŠîÝ)ºSt§èNÑ¢;EwŠîÝ)ºSt§èNÑ¢;EwŠîÝ)ºSt§èNÑ¡;Cw†î ݺ3tgèÎС;Cw¿þ"ô ôYZ‘ž!=Cz†ô éÒ3¤gHÏž!=Cz†ô éÒ3¤gHÏž!=Cz†ô éÒ3¤gHÏžý épµ‹³øñÃÝo‡^ OÇ•?ŠM~úL¹z¼Å)ñ_ã”JGèÅêgâ_JSšÑÙêQqz¥EèC×èú]ÿ¡ë?tý‡®ÿÐõºþC×èú]ÿ¡ë?tý‡®ÿÐõºþ#×äú\ÿ‘ë?rýG®ÿÈõ¹þ#×äú\ÿ‘ë?rýG®ÿÈõ¹þ¹÷…Ͻ/|î}ásï Ÿ{_øÜûÂçÞ>÷¾ð¹÷…Ͻ/|î}ásï Ÿû åÜûÂçÞ>÷¾ð¹÷…Ͻ/|î}ásï Ÿ{_øÜûÂçÞ>÷¾ð¹÷…Ͻ/|î}áóê}áÿÔôžlxœÂaHZ `S33gfÍ©9ç™>íiÎԞÏ÷žú|½÷|>Ý‘#ä9D"â!qć#"d„ŒLjˆˆ!ãqDDý!q?"""†„DÜq|Ç›üßï´MÒF¶ýÖÖâ[øSü×ü]þ±@/ä¿j‚#¡H¨ú…Yáºð¨]ÞNµloŠŒ"N´*:í°u¬wìv|‹Åœx^¼'~è:#K—’‚dSrÛ•êz×u-…¥kÒëG™G™PVíæw§»rƒÜ.-#oöð{d=…ž …B§x§Xï…{ñ^¶÷[Ÿ¿¯òXñx^)V"Êõ'ò'5 bTÕê»zLMªß¨ÿPÿ£Ñh`MF³¦9è—÷ãý­LKi—µ'ÚÛ§ÈÓ-Q7©[ÓÝ=#žmëaý¢þþ‡køËp7€äv.J£×ø«q×xd¼0>˜”&¯)mZ4m™nL€p°l§ÀƒYoFÌyóŠyËü§ùÌâ°ä-_,ÍÁÈ`a°8¸:xö:½`œß‚ŸÀÏà>x^YyV¥ÕhuZVÊš·–­uë­Mfm%Ûé8”Ú¶Ûí¬}ÎþÞþÙ¾ÿÜù|Ûávl8n†Áá…á3'묹x.¿ëw׉«åFÝU÷áˆrYy?Ò€PšÊÐ*T‡î= Þ3ëYóìy.¼*/îôV½w/r/¾Â<Øãp®Â—£²Ñ™ÑO£õ1ÉX~ìÊgðÙ|/àc}Yßœoß?ï_ô/ûïljñ/ã÷] 8œ.ƒ¹à\°ôŸrð øwðA ™D~BfãBBåÐ~è$Ô@•(ƒ¦Ñit]@/Ðï“bFa“X +c+X ÛÄêØ.Ã58€;ñ<¾–„‰p*<Þ ï…ÃgáËp3¤#Ó‘ÙÈy”f¢ËÑÕèI´½"ÜD€ ‰WD–(Eb‰¨­˜(¦ˆq±c?Ç6b;±¯±#ÒFzI”dÈ49MÎ’‡à4LP»”…rSФ^QYª@i%m í4Lã4Kgè%ºBWé z‡n2|FÆh€)0Ef‰©0U憹‹ã}q}Ü÷Ægã ñ·ñ&;ÅæÙyöœ½f[ Q"—˜K”åÄJ¢–ØLÔ9–Ëp9nŽ+qen…kpWÜ]R˜”'µIKÒ\M~Hn%[)QJ‘bS™T1uøRöRó/wf9jЃ|&6wxœíWOoÜÆŸMYRœ ùS í¡è$!ZjW–âÀ:)NlX؆ãÄ·¦³ä,9ÐÃÌ —^Ý›k?A?MŽí©è­‡ ôÖÐCѾ÷f¸Ëµ¬*j{(Šj¡åã̼¿÷{\ÆØÏ°ð÷=üyÀ¶à.Èo° öÛ(¿ÉØï¢|‹ýh0ˆò[ìhð^”7`ý‹(ßf£ÁWQÞd ~å-voðë(oßrƒßGùmöÑÆ£(ßa?Ùø&Êï°ï7^Fù]öÓÛ'ÉàÖúÝí¯¢<`ïoþ!Êo°;›‰ò›ìç›ò-Æ·^Dù-VlµQÞ€õ?Fù6ûÅÖߣ¼ÉFÛßDy‹UÛ¿Šòö柷ÿå·Ùèƒ?Eù;üðƒ(¿3øîÃ/¢ü.;úño˜zaU^x¾›îñÃÑx<„¯c>]p¿xj´¨2>iÎ…»àŸ_(™]¨”ïÞ×÷Ú¶Mü¢¦CIjʃ=Þ*_ðgÒI;—h*Ï‹Rò‰ðf'á•ÊÊÁVSeÒr_HþåÙ„?©eNÇûüki2'ã¥C—ZU{—8¥cóƒ''{`LÿªÎ¼Ð*=L þ~r²Ü†­Líäù³£áQ2J“ãã{ãåÎt~ƒDäx8út8úä„÷ÒÖ 5ƒxåýª¼çÊqÁ½™,…=çfv% ÉU¯Oèžf¢äÏ[a3“µ>3^þXíÓBÎWî,­B×µœ‰Tò™(•^ðL:•WPÈ›ŠÐ”%Ô²‘IùÒK¨Ðköï"5^ëD;œ ,e´e/ó$T—Âà;ÁÆš„t•+ vg RÒqˆ8VÎ ÑM Á:Ô© „â•ÀÔT3ÀÎym•±Êƒò}®FÝ0;B7óóFäÐ?†­05¨¾ºË!‘Vj×G«ŽQž==åua*éɃ0¦QÅ¡¸˜C5òpZ˜kZ ¯ÈpÜ”WOÀ«žW*üæ[ÇXÑûñ®ˆS¬YâòŸ‚Ý,ÎïPI\kèÚö|f„QMÙ.Ö25ñ9ö0Ï®õò?ó­)Ê ŠCq5ŒÇµ¸ð5&¢‡ãœE- 3Â( ÆXÅ}¹Ga÷w}™‚#p$ŽÂ•¸ µ<óy 6à=¼ÎƒXÍÖâ Öá-Æñëñ&Gãm¼Ë1làXÌF\ކ¡?†` ÜŽåX¥X†'ñVâÜŠÛ°%Zñ)ÄæXŒ%¨Á<!ôÁ8Sq4úâd<ŽÓq&ªa¢åèqÎÆ<œ‚‡9AœŠÓpl…­1Û`[l‡AØM<”‡q<B?|ƒoq†â\ÜŒ±ÎǸ7p'â<\‚+p#.Ã¥¸7á6óp4a ¶Çf¸ßã|‡Ÿð#~Æuxš-œÄÉœÂ#x$âT¶òhšœÆ“´0 p'žÃ3¸ ÓØÆø“íØH›Ó9/À³Lãyv0Ã,;ñ+¡ÃÖ`V3YàLvqgsåq<ž' s±k±w#ÅyOæ)<•§ñtžy&ÏâÙ<¯âe¼Âyhç|.à¹\ÈE<çó|Á ñ /ÂǼÓac:æ%¼üÅË^ŽcÐ ‡W Ç+y¯æ5¼–×ñzüÆx#oByÌäÍèâ-¼•·ñvÞÁ;yïÆ,Þƒ9˜cq<Žã½\Ì%\Êe\μñ~>ÀùæJ>ÂGùç|’Oñi>ÃgùŸç ˜‹p"Nâ*®æ‹\õ\Ç—¸_òe<ÀWð _åk¸sC°±+Ý_°:›Êf¬Á¸7ÆL3`ƒ·P¦“Í”Å3©ÀDÙoöƒMÿ{²øÝœ¬™Z¾Dke$&PUÖÖÚæN¢ª€&ˆD ‰V¹`T ¨å.«Õ–©. C(†P ¡B1„JLôc¢F,\THËT¢c±P{Âv…޶´5+Ð.±ýƒˆ šnô™‘r,+“63I;¡¤½S[™Ty:›Iå”|q#w™VUu«Z¥€*  èaH°ðªp«UÃÖ•3!Rî<"`ˆ^D¤"¢NÄ(–¦W…½±ºÊkjûuZŽM&¬LÞr¬d ^p²VMrkZD *`ô¶ry»ÃÌ[IajªªÒ¶c*VÎÊ›Å=MÒÉl^õ±úz—g¨š€. ªb¾&ækb¾&æk®ù.S)äítÒò/S]°Ú;žR=ͱfZ¡ÚÛ+jÝp3!e+uÞ­©+Þ%^\ã>5^¤VÄ»ÃC£KTFwosºæ‡Æô|V¼r|­O«¡›Ô«alhlO¼§¡G•F3QÈ[þ²FW\*ã¼ÊŠ»áH°ÉWm*ªömj/dR¦SèH›…¼2Þ£Œ/¡D¢Ê„ânh¿RjjL™Ø©©Õ#ª)ÍÞ šý\ÍÅ\åÍŽIõmÞ$cKÊ1gZJ‹—¢¥'Eï–¤í^œ M.ñfJ1°bJƒ¦ŸÄôº•(ñ9ÑÓ-Ë+Ôòƒ-ÏS«; ЪǢ†@,”*‘Iý»]z/{ºb{g´}EÛK?ý?}1”ti_j•Œgr¦´/Ñ`Ö×Éz}Énâ’ãQœÒ¾JÎ3-÷Ÿ¾Tùcß÷„åA…5C &à>×°^)  h§‡"ÂÐ…!o?¬ #,Œ°,øÕ¼ÎŠ-lRs—×Ù.¯È®’ÎvuwvvIgçxÓmvy•Λ¡ŒÙ™Íålg»õ7c¿±‘xœcðÞÁp"(b##c_äÆ  ÉØœ6I02h›¹99 ,Q60‹Ýi3#'Íá´‹ÁÂfNqÙ¨ÂØ±Á¡#ÌSñvq400²8t$‡€#`3/#ÖÆÿ­Xz721¸ldMqš%“rocksdb-6.11.4/docs/static/fonts/LatoLatin-Italic.woff2000066400000000000000000001305141370372246700226600ustar00rootroot00000000000000wOF2±LXX°å×ÌZ…\`: — ‚Ü4‚£{6$Ž@„€ ‡" ®m“1 [M7Ò²?ü{Δ“)bÌìYÙ¯¾*œÅе€|^­Ýî6+ j‰¬=ê öŸ€U‡Œl0νZi.È6§ˆë¹æPK •ýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿí&ñ÷ÿ5mµ¾ÂÃ7A†Ü ³è‰¥YJ‘·ÌÜÞè`QÆ‹YE=iK-…Có«rrÚD£Íxn,Ç•¡2\Å*œÓ–­ÛhlÜÙL‹P6/Cli¥¹p(Ø8¥¸ì¤VQCr!õ’5Úµ[Râ,Gf޼€&ÚÞ–­|$›©Ø–l+XG n`fÐÈTía£•Õµlo6ЬƒcÕ8BKds6†ûö334֔Д÷a5í‘õ¶P;ë#wh6C P+8(8Ô³fA9ŠºÃÞ….DGGÛÉDi€I€úYv€Uš7[°-¡94C.™Ó.ãGX²ÕØI¼€¥dˆ,@ާp=±½¼T¯éˆê vƒšŸÚBUÍsÏÊ-,©‚™ëVSrîØ¹qãf¬£HŽO‡9Â3œ‡ê‡1çÅëÊ«K3Ú9ØJr4ÃK3h&ß:ß`-<«ÿœòóÔÒVj^ÓÏ΢½ú.âÍ´ºWm‡&’5¾EP+Hð%v9èjØíè »Ê".í¸†J/‚3ì:ÛËMÈ}48ukñ åñU4¨¤û*GîgTøæuÓ>®®€+¨ÁA°Â·ôÓtì89sA>=oð¶²äŽà¢Ú»úï LVÂLżKõ}°Qÿý¹þ‡lªÝÈ´™òG¡J œÒcƒÛô]Fû%4 5AûõÐÿœÁ³² ÞôŠàθ¥ø@ÐsçñS4`Ïø>ùHã&ªä1ä ?×wÒeý‡ô/È”îÄCz4jy©ÿ•þ¡þ×ú*£Ÿ½U?}§¿R»Gz+›ÃiØsÒ÷ú?èç'õßÑÿQù)ý§ô¿ÓRÿ!ŧà§PmØÃ;…ŸõßÔBÿmý_ôÕÿMÿwý?ôÿÔÿQ?ýRÜG¿Cõîöá´Oÿyý®ŸŽèÿ«ÿ¦Öã‡ØaæôOh*¾àŸÿ…‘¹üQ°üQ¶‘&´dÏ’äìö_w×!:v<ÿÍ”¼a0®÷AZQZTêÁÆÁ 9Þ|­ý¿§«»föîû€–€$’QÑ€B2( ÍB†U²=ÛìˆVBE QtbåܬE… ˆ`Îx§½ÍƘ‹òç~ïdsN†®ƒ•°`•®>ÖÍNßôÄC¢© l0æc÷ôÖ®·eÊ”ËÕ2fÞþ¿Íþ¹0Þ «•hëÁCQÁlôµ×F-Í…+׺n•«·Ýv‘»ØîúŸi¶\?€-߃§Mç*£íƒp¨>©»Àý¦oïÕèÛ(ƒð6 Hb}ü»u…ÿÿ¿õ X˜@¦ö´áÖé¶W<ŽƒsÚÙ 7Åÿ2p*aÙvDiG`o ¡Òç‰oÎþîãÿéç™2P1¤*ÔOswT ¢à­sT)*OIËÅ0‰]Éf³ë ݲ$SÅE1}šáû0ÕeA&9ò¦8êú´ªº¾MýGÅ©‰LÔë39©Eʃg@¬uÒe"’vfŽÓÓ¾_µãÿ\•¬½fL¸®&fÆÂ)5qã]÷ãG¯mWƒöOЏ÷ó¹õ 3³À0n†pUÆ¿éÐ?êJ*š»yÚ ¹ ûHKG)(ïQѦ¨ÝU#‡¡¦÷çèFàŒX?•³Øj¥.§Öÿñ‡6Ää„jTg “ÍqYÜï!-xª‹í+‚žk’î㌠¯s]´7%ftdžG*b¿‡ì}ï°Àòˆ 0À`\|t\|ô•¿ò©EST¬‰{„Æè–WFv6—åU€<ü›tk¯®2øLPi€Ý}#0mKO÷³—•O a w7þó? þÖV¨¨¶J ” •Hˆg3kŠËÿDïO§Îtv >¸§úvÊ¥zhì:qW"šÅ²eµÛ,™’EÓÓÄ"GÙDC_.Ca=lÇTc·Çª8ü=A a»%QX3%'ÔÙtÂé–@oìß`<ËæJ~n’ð ±úÐÐßÛÆ¹êXqfFåáZ&¬kÐV¸Ã¬¡;‘:FŽ´ÇÖ¿9¿Ðùª€úîêbÞÀ¨“€:Õš/”šežØæ_ bÁgéF¶ä (h³¡PØŽçrÂl#Îæuî?²ÿ}ꇗxBýà7€°6síW"TéÚ£ŸiŸá€Í·'Góm!á¢D¢VÏž9AÌÓÄJRté²C?Z}ÇÞ‡œ›; ÷­}uÞ˜ƒ–”ïЉÄñíø ümü;|i޾k='ˆÞ{ù6±žx´¿&þHüƒ”Aê$Í& I9­s8Í‘idîÎJ¹ïŇØEòuòeòMŠ“¿ÖÊ(«T ¦ò6ÕŸ*¤rÍ^ç5ä^ga»ˆÞÎ-F0bLõØùªÕnõ–t5®.WyÕÊŠŽ‡ÜnÒÝ¥îí„ziÕý$û’¹ŸÃB oãÁÒƒ»¡E¤@AÍ…™+#NCÜ›ÃMÎ;Iƒ* ªrru5ŒjAEz VZ5n ÑœO+ÆP>®‹úL(ãR3#C³¨1Ï]¬¶hò݈Ÿ` à ’¢–ã1ξ‹«[Üù“„° mºüL…˽Ïâ¡ ÍÊbOØb™Íæ0÷ÖcHp"ZA,ATèc… ,l ©ÚPºèmÊÖ£+¢•&ÂnGŽ‚v½¬"„uªHÃÚɳOÔ!k‘¯Í¤ˆ=¬ q¬–Õhï2–‚Ýú„´¦-@ßbÜ{B÷ ”€çÀ gÚÝ…kçEf9ÞMžŽã~*þZ³Ä#BtB›a®öjr»éôy€Ñd¶Xmv#(†$Ec’Ev‘J$MÏ(gY‘­' $’b\RyòD!S¤8–ë””RR¦”ƒP&¸ÈvŠq!•¦1¹!—Æhq¦iiž9oKDdÖ(¨lšv¹²ZY¬d~%ÛhÔD0Ï| ,´ÈbK,Mø›m±5¶YºÝŽù‘ªb¨ª¶@bMš·äݪm{t™«Ä¨%R™\¡T­»F«ÓŒ'›-V›Ýá|u·çËç—£ÙÜ^ˆ NͰ/ˆ"L(Ç/­j»ùa^w?¹¬å낲ˆÝÓ*lÞÄn]/a¼¢ "]ÌG2A š?Ä Ž,4ap™Hæ7—7I¦|GÿëÛèB.ÖÁÔ.Ù™y‘Y9æÔν>ÜYpl¶w¬è8e)FP- ÖUÆ.1ö%…‡¦ËŸ?$úJåÉí «¢\($5A­±™ýÞ ™Ÿ47iöà6Ò…UmÏÝaþþÙ†ejkÙ@û'¥ÍÏØÓÐ1VÊÀbÁ«INµ(Ff®Ì²9'÷Ü<Ä­‘g¯Þš¼ƒWTPG˜PÆ ÓRÙo!{io•‰–<Ú¦åù ¤2T¿acE3”¹Œ UÙÛ}6ŸÊKMW>Ž)+sÑ|Dd&”q©™%?¢?}ùÎqÌ…³ûbµEkÜØ±‡sþÜÕDõ+€|u±hH"C0‚b*ƒ“‹Û7Þöö;ïO>1–´*{CìËY,íX~Tà;ž ˢ̧{óåV EE_̉\%hP Ó„V-¢§g÷B\e*—´ß<¥BYðÚ:¥¢ËÊ2XñëésaÎ'w!Ÿ‹Ù4Á/±Â”_Í¡š5b´ãa Z÷˜íÛÖâŸÞÞÆ×¤”{p‡Ð‡¹À²ÃZ_›ë½9˜ ¯ÜïÊô4¬ÈpÊ Ï#zxUÃA PAÿ‚Á¶.¼€`,Š”íªÉµÜ,mÛˆÒë5¤Ñd¶”U»-ËNäÁŠáD‘Ú)YÖK.{9x{¬ir\´†Ù,v8¢Ëû°e‰½DÛ‘qv¬ªÔÔ54µ´ut£7ìOÃÕ¨9^o˜ú_—·í}<â¹þæ¾5g?ÖŽãñS jV\©f©õ×;#Íévsw²¿¡ò&”q%!Î>} Dê€`ÅpbþTßL@F’’—±à‚ Þ{‘Ö¯r÷’¨ÅuÁkþ"mË™05‹€ ‰[Rçtr&çVs9$ÏKo_¹Döœ£ÛUv­Eˆ NͰ/ˆ!x€Êñ”DIŽâIÕ¬àìüìüàñßÏ©r`ÅhåÂf Ï»»p©ùhŸFÓ<ÃØSY¬ k4ÐeƒÍ¯”†Î(¤÷Yœ‡„¡t^FÔZ%ë=»Jñnñ“šÙƒ2„:Ìk„ÓhécˆÓ£5xºÉÆt¬_¯Y@`/e!h«Ï@% Oì\&–Ë( Ÿ”óÔEœŠ»6$M:ÔˆÂê\Q„ ±.Æîæ³\4eà ´Ñv$»"¥NYÐÚJ#9¾¤Õè/ƒ ïî¥q”mt˜p'îTº.±Î”ÖïÐàZé»ü˜›;ùd§Z΋eýÞ”jøZ¸ºYÛ†¼g:Ëëéç}9Ðîz†CU?Òojk¬N†Ç$ B[³»¹Mj%b1Œ>3ÊB¯Ó†ÐEÒ«ÈóÎ ¥ý+«‡%Õ^€,yšÆT#n”Mô¥‚cÍAFžÖxuëÓ4`*=mjº ô)™®b¹É +fV£DñzšéÙD¢‡A„É,Ç‚¿„žùø-SVd[D@x½ˆœ3El>I’*ùÓ4Á:í «X2¡ö!·€B¥$#¨~5µͦ”Ò*¤#¥·ŒA*£¦ÉŸÌöŒ_‰U–¥l«ËHÆ ó§L1*Z~¶ÏÏyáåüëoï5oö~'’í›?CbR W"‘/©Ô­No0šÌ«Íî€`Åp‚¤h†åxÁéÿÓͶ¬%»Ö™nk‘¼ EJF¨hÞ:m*¬£è›fj*aŒK‚DI’¥8í K¼jùkoâm‹ßUïÿ‚È y2B ã¼'°üˆtpžõžÚ¡Zظ9ÖÇ‹\ qí˜ 3Cœr Õ®ügÅg¦`APÇà à &)cˆd<¶ø-³di$ž€ØœKTæÝã;É‚ñ—›,‘4ÊnÌæmb;í©Î²ž!N³¢ªñ9K—§„ø™ì§£Ì£Y°Í#XÆ2mäÖÃQÞcüf gðãÞ'V¡T¹¤nAKOáÖ×iÅ©ÔÉhCb†?ý“飯ØLGš!  ;1j1à¨ÅÓo£ÖO¢oÕŒ™Õ0R6¼Ö^ߢEÈDøAj d2ÎÌa#ÊFIƸNG›r†¹ñPñÉ\fÉÞ–.¯ã¼W'jͨAe–ŸŸ<‘ˠГÔ«Qoá0c êÔ¢µu·íˆ+^©<óÍy vÞ ¤Éì‡Ì6o$ôL”éð3øAšPg|–žkº‰#¸G)v³dœ6o5GÃò=6Ä<ݤYp¶&S!BW¸wkGüãC!Ò æ‹ 'àYˆß2eE¶Â"’%gŠØB¤JÆMpNÁ8clG&ÌÓr‹(Ì¥”lÕÕ*htŸ:0¥†v?­³œŸAc5Mv3Û7þ¦ãL”Ra–fÁáI"%AÉB “i'ûù,Ú3˜=o/¼ŒWp{íÍÿËš•,X æÅGd¦2È £¡Óª¬#ÅgšVÀ[9ͨÚÜ…ûñ!´€ÛöC½6.¸½•Ÿ‰nB…âÍM¤?[^‚™Ó„J]ô([¥ÖhuzƒÑd¶Xmv#(†$E3,Ç ÍhåäqÅ/¼SÆ…TšnLo¾tÍqZâµ®¢,D¥¥åzm$],}¦< ‘Y£ ²Ñ¢U[´·ÒÆ®+Îç­4Be¯}Fí7æ€qjiL˜tÈáÐÇ=é”Óq&dzÕtQ6‚‹ôtnâP÷ ´„Ð<[ùÜ”=CËA}»öú›©9Ѱ2…N‹`”…goÙpÌÆ^™XmGúÚY!àý£R ªpä>ó¿ÖÑö.@±}…•·…4áG8áµ:Ô-æÂLöÒB§¹9D;XÍÓ<ê©iÇ8o rEYUçÑÓ<¦©©ÌD6‡VÚ¬ç}Y —ƒz„,÷¯®k!]-`¬b¢CÇ+®ª|ó f‡•¡¡Gé{Ë‚ÄÞ˜Ïé¯sª…~Sˆ•K*µF«ÓŒ&³Åj³; A1œ )ša9^pºî¤Ab†7M­EêÀÚeœ£ñîöç]HüžmÐÅr2¹VŒô„Œ NͰ¡ÜTŒî8aB7ÌXzBªØÄÇ ^uºÀ3,/ᥥ¥MRÒ_MŠŒY5V}8ÁÜóàö†²0•-Cã^ÓUÏÀ O©<ùQàͪ¨a-TýÐðẢšVv„†Ì2°‚ŠÛÙœÎsï®ö?vêI’$Ø¡/«ç@?çœs.I€™‘$IÒK¤J\â ’$I Iàpìñ´9K±…Pµ€wv&Oý]²Û|>{íñˆ›ÀÎô˜É{Ç`% gàF¶“±ŸÞ,Î`~ã=s.Äùq7{Ë ­æÌ°-«Ÿ†²d(ªq¿®¨çvP V3xDw‘Ï Z…¨JhïðIÎ7üêæ/ ˜NPrÁÍŒä†Ê“¯ R”‹åå¢4¡²RžÂbu„Æçß’M[.ý5Ú•Åîæÿ_» ›A 0 ƒ7‰GÙºèÒüñU’$I‚Áâð!,Id •Fg0Yìp ¸<þ-÷ ~#'ëfèt =ajy(K¤2yÌ¡–±\±;œ"L(ÇGçæÔâù- ƌĊÌyËî3æ¦$â*'ï¤áfr”™¨l23î59Ÿ„Tžü( \¨¨Ub/ºÃF2¡¢¡ žG4ôif…X•açâë9p¯ ·L•ͪ ÕjÔªS¯ÁJ«Ö75¨9@k*ʪ©bËP ã^YÑÙÜpøñ Ð| ,´ÈbK,µ,×ôŒîaB7ÌX”!UlUÇ¿gÿå5€ŠáIÑ Ëñ‚gàâêwÕƒc÷´ŒÄþ¦^ä ¨àÜP à¯vk5`HµnzA®½í Kv­lžâË©ŸÂkðÖÙ‰k[ )‘ŠÄ9GZ˜9-«˜ÖqëûËŸEñò2 ïð‘ð ?9-¼– ‰`«Ì(viιfyòD¡D‘âñr³R/e“Ëù#*5´MRiºe•ÍâØúñ-ܵ4™?Kutt e‹ÃaC±Id •Fg0Yì§Ú/ôho¹ÁR*cÁ‚R5ÃP$–HeòùO¶}ãî!Ç©,rw ‚b8AR4Ãr¼ † @!„r¼ Jr¾ªM-Þ¢·‚ÅG‘ * þ¸«*;¬ì˜hèÙõ˜‚«"ªR›Å›_~ü”ÀŒ‚gšGÄEŽ«5u­®Õ $WV®¢ZZuê5XiU¬ÖøðMšC´äÕ:ë?ŸÖŸÍ΂9…µ N‹ë”œÑ¹pÅÀ VX67~üÜ,”ç&àn"ÇMBØ<ØòŒ N„ñ·JŒ]tiþu«I’$ ‹Ãˆ$2…J£3˜,ö£íýFno–s%ñ¾ÆbKMMMMMM}(5B9Z$€ŠáIÑ Ëñ‚ÂŒÊñ Q’£ðT-:z˜_¸ðÔd¾'‹ǽ WäÇe×ïðÊÃ1Ð×:ºµÖ°°J‘$I’Z;yø¹2¼Ü§*4‘ïÇ&æ˜(X̹‰çykÖ¬à@6ºµÖXk•RŠ$I’$I’ÔZk­µ6Æ×uEäEÿ,á ºCh¹ªƒ–qSRêׯŸŠòCJ׃Sîòü%ÝÖÓi¥—^å×û¢.önÊh‹yÃÇïÙKƒX`B—Þ™Õwõ^•$I’`°8|+‹$2…J£3˜,v8À.ßNSÊŒ÷kl©H¥*jÊ¡H,‘Êä1·ts¾Àp–ƒ† O™@D‰m 5Z£yº¿ûJ4^s—ß6cˆÕðtj섦¼VeS¤·Öe“L”ä(‚yûgþw9:û)%U¬–¥Rk´:½Áh2[¬6»‚à ’¢–ãxÏ ’¢dXŽVÑu1ÃöÖû¥@½KS&qCZŸÑ8ŸP”Qõ¡º®ñÆ®†í‹Ž~ Òõ°›žxÿsfJªjêšfær€ŠáIÑ Ëñ‚xi7޹Hî4¿„ªU=­'‡_Ùï(["/f‘ËdF’ŸÕÚ¼ö\¹ì¿v<›Dš(qй ’&ëwò·}&Ù´u½œça·>Äéò÷ãžä~gßp¡Ûip¿ŠÓž=æ.Aá,„Ç«ý„Ûa0íÙ{hÕ@ <ŽhèÁ䱂­â¶0öDCßËbx7y>Z¾ágæ/ ‚´‚#BÑ"Ô@MÍŒÂ÷È„,Ù„Drˆå’Ê“¯ µŠÇr’•Z”ù{âDDDDDDšªén6Ö7k7 Ó*ÇRx ôdÄQ5$¦â¦Tc\øébÓä4?E(PzãÞƒGOžãÕý݇O_(‘: A1œˆPnø£»O˜PÆ 3–Ñ*¶ÂãÎ{Wy&4lORiºaZ¶P=|AD$‘!A1 •ÆÁÉÅžv¶Xmv‡3®GçóÊm€ŠáIÑ Ëñ‚k÷ÆÖÎÞÁÑ)ÎcW·¸w?ß+ßÒ¹†j‰¶#=ôÙªTMø:´å9„–(ïTS×OW—ºgs<ïÞx÷õâÑ6öŒ‚fh-¾N(ÊU4O-‹hYn-—§»Èݶ5»ã­Õë±d¾aø˜¾gõ| Ÿ;®N0S¸ÃlŸÖƒj†ÓÒD1 ¥5}GñXZÏ¥QØ YçúY&ÕØ\²ªb;,x„Û¨¢°œŠ¶NG›!?dÊ"^CL*ó\’¨â؇Áv^"I¿;ú|Ð3DYVWõ Óð`¬o4ë´Éf»ãª¦š ®©i-?:Þ ì£iâÁ,?àö”ÉjÒV:»À­· °Öy­F¦A2Æ/F_i ™m¿*«]½›]gŠs²:ª ø¬²$i[úMOC´Ý)yzoTsWìÈtbiMí‚=:ZÆ%ÉOTh 7u¸ik ÷bu‰0Ç)žæ©ª,vv­¢41X”"u­Ú9¥ŽVÝ€dIøN‹+¨Å6s]ÅN¬ kÖÜl㪄db šÛƒ«uçD‘ !·ûÚ¦E Á°GFKtðLˆòhލ«÷ÇAŸ)wîÑœ¬<÷5$g]ôU¨PV?F¾õNHu<û¥ËÌU¹TLó6n%ëŒ=…´ÊÖSÊkàds°óš£3Ê<`©®ØÐcG™½=ž×—]¹“t5:o˜´¼”êJD4\Yc’««n7\…¨4 ÆÅ©6g¥ŽŒÐ]†È*I›Ë¦ׯ­ÇäÈ ,š[ÝJllø€x²+­êŒ^>'Òгº¥±÷oñÖÓ¤^³Í·Áæf³ÛìS!/©þ è­Þ¦zB1‘XÇ1¦2ÎÀô8Y?`¹‘ç"Ô°9ú<-Êã«Ò˜¯ìµŠ&¨¹|b?\€ E<±fæðÉFEµ à¾eZ"m%®£RZªI4Äz?ú¤¡elB›:Rs0æbÍÙoÖÚ _t~û·€…ñ‰-j‰ø¸xãû´p¢‰Hµ'“V¼t™‰ÁäÔ²SÓ0¤e+Í-SQ†:̓Ê(Xuõé‹N_ñß-]úÓõãb€2òçÀ‹xkOÈ×%|í,³‚²ÊjΧãÍ¿ð·šô.x$ïª6õšÿh\ÿ÷>¿é¶Æ÷8÷ñÔx€ÜC<þÒx쉎§ž <÷!ô‘…>qŸb}F×çø¾x¬½ú¾ï5©ˆüHê§ÇÈÏ¿ôüÊÐoŒüÎÐŒüKâÿKZ+‡Wi¶V¿Fw ‚d£ýœÛžíësûwŠõN¯ zÖu"0ásäí &-Û9¶ }žíʇ&ß¶Îð†9ìüƒïát0{¶ÓÈ{µiˆ½Ý9N3¶7g(‚‹ÇÖÐٮ¦G#lØ#ZM‘5KÇ&Ûל¢8&— Iißܽ¾Sì³H("ZŠæ'¾o¥ÊtP¹’Oès®ãØOVñÞZIF;N®öÞ\p±‚šðˆ¹ó",²¬C‘¹<támÛ¾U5­÷µ®~^íÖoŠ™ŸÕ,ð;´èø©›ËCÕ‚ø(¨•V?rðŠtõj–{Qâ>„¹ÕbåR R/È<•Ó¹(ø3uL¾³©Hø€ñǪI>)ä@Ö ^?Vjrãë ‘L<³õCÛ€•îKvÞáûS¹…pÕ©¼e•7ø……Gâ¨!ƒKAˀͥK“­l´€‰’H¤VÙ+Ùr‚¢{Ùmwର÷izgß­^¿‡[Ud—ÛÇÎKü¼´JnT. s"µ6ô$¯¯ëº)Nž qm^jeóÑ3‘¶ ·›9ue¸Ø*,î宲R }žË0&‡/z€Pµb ÍÆ&[F!öm‚dƸ£uÖ&0,l…Zu2ž¨ùêN ìÑDþ+ên·xU+0Òµ@¤l–ÑdÊc^zí=[öñàBPÙÞàåçpóW#õ]‡œ±TÊGšâ]á-£‡-€}ð0bïÿ؆´Ì÷Bý­E7#Vc›zq‚œ;¼¡sõz;ò qÄÉ>¶4ðºf‘¤ÐI´Á×Zä“)ú0·€¢”(4,«b”“bËlÑ[pñÆø\M¿Šg|&©ïˆw¤Ág›ù9û9údúçU†çvJezVB°k¡?ìc°ª«v̈Ä;ÅÅŸý è3àq(۾ܧåÕ ½7j¿1L84ZSó=QÑHÃÐ<Íy¿âOÔAÞÊ¿¤Æ*áªKhÉfÒWéš‘ëikD¸‡ÒJÈź0ÝÃ5V‚«Š„‚Èc,ZKÁ©_¨—>É^bY¶U ËH¥•LCyI'~ ‡•°WŠ·JK_¥+×ÖˆpˆyïbrD+Ôè§·§‰Oäþoä¶³ß]röÖ@ݳzwý¥Éiù9á¤RnÖiW¯œ(ö\ðÝÇ` ¼væÉ2Þµ„zL©xA‚"æÊè˜G&ŽÍÃõ ¥ ¡|ȇ Œe¤`ÙB()º©8ÜRŒœ½ˆÞïÎ* gåW>µ›g 6¡á_` 8"PÑŒMèëd™t"ÊÕjÒiƒmvQsÈ Ó.¹ážÇžzí£ïÜ•\¿ToåVùj繉-º³Å[YØIŽ'rßR,ƒP"D‹EŽd#¶–ŠP)µÛlOßàZ!Y`dp+²ØbCXÑýú¿¨PPµÂAj½8P­âAµIÕ¾ C¨Î%‚Ú, ÔÞ¥€Ú'Ԙ͠•m Hæ ›   ¶6ÔQÐF2çgço‹2¯¹—ël±}¾²×øü§ãN1j³WÈnîȶ«º…»Õ7¸ƒ¦¼C@›Þ$ÈWS5 ÆíbçMm¨ÎÔ_×\ºâ2¸7n_ÜXÜhÜ~ƒwØvÊ•àS’\ñû;Ø­{ç?dÅÀqSpÀûƒ¿¿ ýe~ý¯£fà?—ùÿôª‰Ü À_ÎÛX EÀù¹àx»*ïÃ/Sä²™ßíëé 3ºéê à=ƒc¦èýkÀ„!ãn¹íŽ»î¹?þ5ø<ƒ^zæ¹ó`B„ Éyƒ‹‡Ì…-;œÐ¹Hïï_6w¸<§¡uÖa¼ðøð$D¾l"bf7å+´\©2å*­Ö¤Y‹VmNPÓ™Ô #¼Œ“.èùé ƒÆ^¥ÐÞÿðOŽNŽHؑ۔“.¹æºÓŽ8£ÂE˜tÍÑsswx?zí·nì¢O¾ùj·ÏÚÉv·¢Epà/_üðbçGÏ0è²–»…Ñ›tȈëOAé©ï{³¢þq‡MWå\ã^çÝàÿ¾›Un9ϰèg÷¤]î?¾}¶Î¨]×nL©Šè›\V:Gmóñ·^¿íd/ûÐÂ%ûN¶ûþ]p;›p÷£ò‡÷<>ðä ‚õÔC?UŽöÀ¡ ÝöÇ·µgOœêø\Ï{jöXÜà7YG£ºi‡§ ™úï8Ÿ×_ž¼=Í?{ØË,ä}Åó›Q:æ'¿øÆÌ~a86œNp©lè}­â¤í#ù °½†šØø+ÜkùHÁÌï[±õwd¾ 'â8I[ÿˆ¼Û‚=|Š,?fú–/Õx…Zû?Hu;jÎlV99Ùf£ž@bMyé¡Ð¹x,>õSYâøl|ø9:sUwú‚÷ѱù­«ƒúªZý–T\í9ÓsêD‹7‹¡°7H ø|×^ÝÆ|Á4þLôeX‰­ÌËIz*ÀS2Èáx‘’ǃpè@kÏ…Ïýˆ¯¦uß”„ñä©óâ×A¾$aøæ ”ã3¼¹ó+¤:+Ÿ÷²xh"iļ_µ‡)Z 7?Ä{¦ˆ¢šÈãÎ=¡jAìx}4˜ ƒÐBëK Jü¹‡C Ö‹yöS­Øn6—)Ø‘G«Åèùrâ8–8*Ï0F+Ò+àqu#‘0¿ÔÇKý]e.=wB{™ëÕÈ#” 7£6?÷4$ÄI8ÄA‚Ðê0Io Í?C2«»|Ô`­ãX°¥‡ˆÎ~xRlÄ­£+¥r-¶Ñ Å“15ߪ€òÙµP’× v=“ sû±(n´Ð™V$ÍÕÏx&ðjÏ–¤°)ï6õ¹ÄLW1Þ»…k#€=ˆ<ý‹¹‚æãˆÒ 1±"Ê  T¿õÆ|ZK7¡»üoFÎ ¯ÉãPˆ}áÉPà9)ZôÐ&¢ ’ rT ÐexϨ¾7<¶çA.ʶúarñ£ ïÛš6I>dØ]¨½f˜µaÌCæ‹ò‡ ßÚƒ*À¥aðÆíp~tÜɳVþ©¿ð¼¬yëYwÍE Á?g$Š]¯.È<¥M_r<ÖjÌUô@ä§nžÙI³Š—4RPë¥ðWìÙ@=—farÙ¬*mÆŠ0Õ[Ñä0ªIuY(;/<›®±e3dððùo«¡b+XãQ˜R§AV‹Y³Ð=Û¬‹Ö,LYuMj{‹ê‚½%XH‡?S˃¬Ã¢…oÌ}ø'uKü1;AËnØYàlKÉ-.¾‹Ë2¦Ð“÷ìöó݃B`ç&+~\{ÁªwlŸ/ÉǦZ“Žj'ìGZäÙ÷¬8€zæOV†‰1GaIX³Øcwv$œ3Ø™qqvM¸ f²;ãáì™ðÌjoÆÇÙ7á7ºØÄ„ ×!&IÛRϱÐVxç¼Gʉȵ%?Ìö‚QjRˆBáTXf.B‚¯‚/‡ ¤#<—>Lð2¼þèˆÈ| ^ áX•§‡¾âôÄ\îZú®Ç¼±öåÉ-^ f3Pàîñ‰Òz¯TC<\ôOç5Ÿï_ŠÚG^Áås¥õ¯U‚ÌMÍ=OÄ÷±”áæZó H;éÝÀÑΣ•- ›°­*§“2ÂVƒ½9ø¦NÀJ? "6ó ¶:ÕyõMŠUÿè$9;bz]e—Vvª9»ÍåÄ£C b=%SSR2TÝJªk8–ù4îRÌÀ%6ŒšÃÅQËKC;kyÖ¤ù¸Ì0Ý8P¯Ç¾–‘=G#†2œ2ÇÙâ}B¤i2Ž–'è°.jf$æç’dèºBÅ©«âbã™@—Ëì¼®Ó¦4+¤ØiË»ÓRfÒ[¨æ©V‚ƒææç›kMzæ®Ýºå†n²W~5¯¾; ß®m\]»ÖеM»®vèܪZKœ[.Ð.ާVØÂ–¶ÒkOk ä‚Õ{ÝÞÅÇíåxEß…–N¦NF÷-çsíÃ]ÿÎU•ÄWWK7q¥£ÝÓ4B(÷ÿ¸.µ”›6-í/Ñ»œ©âdƒ>2¯j‹¼]3p= ·Äñšpáú.¯í’-,]lL£¦«wa” yc0 AUe“ rÑKœ›º¹Óvb5[b¶3^ ã <äDAÒŸø´Jn—í%‡¼X¢åPî) ¥Yl—Â*¼Ô$vyAS˜<ÊmÞäÒ9v£Ùh;,Í<㌶rŒR«æ™ «B]áLiµ¿ÏZ³Öî^lއ‹ Më¬Ç+þ'_´zä‰Å™Wô™è\¾úpéU{®<¹¢¶êZûîžW€}´æ®ù¡y óèÜy°ïy ¾Ù!: 9c«nË”âf°ï0-§9‚(tíâç–Á¾U«. ¶NÍàÔÒÒ  ódž†Ì$týÄ ßâ¦5ö0æP„[‹š‚‡«ÌA&¦bÔ¨:²!¼Z‡ þp‚¬þ“Ed$)—I¢Õ½ô±åµ`=ÆÃS`ßåÕU°oY<Ðy>S«¡Í!lJG¸ú­èÍ÷I2= ™¯Xu­ªÐΖ_’g°H\p×ê*¯‚ »£ygV÷UX£EÙ&`=ô éÑk.µ=ì )ý£ª»R…,¨GЂ_'„Õ>\–³Û‘ÓÞ3¥-qTQ¨¹Hk¯Gq°½ ¶êj#ªÔµ˜PÈ¡€«tâ-@‘ÀOG´ezleöùð2ë#‘ññÎb°=†!U4Sš•ªûAÅ4Õã‡eӵâ¶Ç!×£T+vQ,ÝCÿ×q]¾olNÑ qÕš_Vµ7µiWâï±j-4A³|'õÈ’kãH¬tL*fÆÆÈe§’ ^µÒ†@À¾‹ õÔ‡'š=kQùé‡.á/š=ßÅžeÍvÐÆ+¿|5ƒLx´òŒcÆ..ÝK¹Upêò>ÀÑPèZßòòT‘ÐÕUHÁÆ}GÄ¥üþÓ!»‹V£1Ü!ð±0“Ë:÷ Z_§£Íãƒ6r*wgÁMˆuaBÔªµFE-³Gfé¯Q¡PH˜èK@sÄêNJ¯õ÷£°‰ÔüJ‰–YW%ðIðÅ=Ѧ±GM`ŸÕPtö¶e§Ò1=^Òf€‰AC8´â$éö Ûð!eèá}"·œ±1Ú„Kñö½xìY¡Ž[èA4$uÌ4 Xöæ”=B1Ì`Yvß"kÜY3!FNƒ›ì…$¼µ„GŽë±0ÞGn èòòÝ£ ód¶$¾°Ô V®­¸¨Á¾!šÇÜùó7‡OÒè“Ìùáõc9¡f'H¹—6:syƒPމB­lÔöÁà>Ë‹ùޥèyñÈ>çlyûÇ/»ÏÏMP0Kù£G—&„‹‡mÁ¿­¡z‰Ð!jâEI„&Ì߃Ì®¨éÐþ-ð±!;Ñ‚@§$n¨$ab9×%»ù2q 2¿ ï»ö= žXxžéê˜ã”ŠŽ@Ø/ìÈ8VL>6÷[ Zw æÅ˜Wa´ò†Óæ²Ä)í“ÉsÕØ„1ôõ5‚o;è¶ØìˆB(Y1ËðÅŠÔ˜¾cÎv8Š®¾œ(yxä'÷ƒ>p âäéA’û$—Zomv"òºw1è0)½c!¤ÃMëvßïaŸóÁb=¶È•–üÇ!J÷ëTp«{'j¶Œ-Ãfs¸FcNaúPiæ¢eÍ\v Ú4ÃSl¥AqÝ/Š…A5¨Q-¨AÔs`Õè†Õ ¾þ!fR§U槆Ð|jC’ZÙÇÞÕÓå²KGª;_ «c$Qˆe³‹êr#JxÙi•(d™¬ Ç¿zƒíÛæ@ÜIJ ö“Ãè¤Ù•©êF*,ˆ¶«ªO>[ak ?^DWº7hnsÈãU±ú^©@ bûM˜òlE©¿ŸšHA¢ò^Q¢++¼´|Ž/Þ4ݱFa‚=ð™rRA&4n]Ø9~ܧH¥ÔhJw.ç#U£yÆs¨µ ›I­lÜ¢ŽŽ¸6GbGë5éÇ'ëñ°‹z¨Äõ¦©F¿ÏáÄC£¾c%þÈwä—î%©Jß°dM-o~!Áñ:#hçHo7Gn<Ì®Ùb$¹ ‚ƒ@:<5ãŽL¹¥¢gräÅŒxQÁ¾åÀ‘)oU‹¿V4|ÒOâI}tr<|ÏÞž¯º%Íž¡ŽºPŸCûŸHÉ ÃC¾Q»`Ä: „A“ —åQ©UsçA=ò‚Д1 ¤Æêñ°CS\hT"EÊS®¨â½þ”×þsí„AšC2¨ùÎâQr+êF`ßÍ󇈪*ˆ5Ë¡)_ðÌ-¿’ñÊ›%µLȹbB*)dx®;6fÌ©AhDÀ ;Qx NùMÏ[1äüÑpŒxŽEÉ#LÐôs x´(+ÒZ÷Ì)‹±9>_ýhšÞª6JÇ s¸ZÎek#¦W¹•§V,e¤§ÕÔrºoµâð¾ût9p~åZ½[¸Y^U¸CŽ­^¿Š–R}ÙÊ%CSöÉ]6êøà£‘"4í³• ã‹eÒt=^ήv/¥B¹oÒÙò¿;n†–¿V²é4=I{ ³³nv59®‹t&¶såÎ]ÆÓ¡GWÂJ! %ŒÅ;"k-Òoa1cãòNJqI‹ Ûwˆ lA¡–y›‘Ó» sClH¯ ±¬†SZ<öÄÃz™®îñBXxÃUÆAÀ2³Á¨5ØúRI ¸<›£ D Öç¶“A(dZ*Ò­~nÉ÷› t—Eã–—pφéšU EÓņQPÄt¡šÝøé®K]kù 1 L ´˜¥ß÷Æ2Û|]¹h[ím+Îc,/êŠË¥ VT-Rã}‡ò&Œé°m™ËœŸ7ênAJÀ5 ó¬ò.Šç`­Ê A°Îd©Û“Ÿ5õ‰‰SæXãW¡ÑHÅé©àšL»¶ eùŽaz9ÙSè4Æ™D4@˜ûØAŠºŠL•—ÎÔVtYâ5£±k½=KÔÙqÚE£PpÖbq6¯¹+‡ñE‹€@vc±*ÄÆš¶hÑDP£……4}l8ÌÊ–]JµÆ1 L}KË‹[cÏþ³êÄÐ[ ¤ˆ ZU}a 4AöwÂ}z•8Ô„Ð6DfùºG DpI ”Hlž è–[b§õ@+-I|(áL%;À@ؾÏ¢P•âØ¶Q'{ÛDÑç'o­"#*èO¡÷DÆYýKsD{{uœˆ”Ú”NG”I4œÐ„˜¥Š‹GÂPhl Yem„„ŸÒ/ÈÄ Ê¶#«ÁÛ‘e?Õ°aƒÀEáœXè!MôÍŽŽrM½ŸýÞ3Ò¡¸ªÛu¿èß?¿fIãbüjNBT5 ò Ô©ú‘-ÙÆæQµS觤wSõ£I†™Øï #} Ýir¬Xú«h‘˜¾†°z>3E'™ÔuÉÉ»¾=þ²xÍþMíªÈÅ&ÂåB?ðïPÒûQǤ«15. %ƒxÖ÷Üz °~xñ¦´ ùþ €ßôãÏ„áÛæ¯³”Rv²oÁ’ÝiM~ŠŸ ‘¾½º›ÿ—í©ZÎ/_%˜üÓ’}ÊxEha·Ã®ªýEa²DÏzi^e’̩ӥs{A<µ¡é*¯Ù­¦vð H)ôÂíg »³Ü4]Á»êÉ®ûšþ)ùë-GëØ÷?Å·ìMF©aìˆ*¥™cïX# .÷Aîjw…ßVÅ/lÙÊ2°õ¶çóÛ >êŽEIù†Pê>©ÜnÓ–ä Lö´;—N õæ9Ì7æ9Zߦ |]=Ä.\[ éuعêÀ;«Ž `(Þö^Í8˜úùUórö|¡i7WDz½YÊjº•½õ°â02ÝüòNy7^9_/þw-¦B›°ÖåÓšx¢âóÍÈÖñö*¾áÀÛô±LÅBÚvÀYÄ>þÛj×_œŸ‡Ãˆtê8âMlmÉ’ÙÐcãUmL»÷VÇŒîÚÁ™´¯ú׆ßÿÄ©yÞè\¦HâRÞlÉÖJ“ =mçEL>‹@¤h¯ãžŸ9ö\´[þËNÞãK9ö´ñ)%òxqQ]å\«¢Oy&î€r  +ŠýuLé™ïÉ­À–͈,ôô°“¶ž æö,_7Œ;;­7’&7ظ‘m‰ìi˜Õé£jŸ“6†…Q LÁ†µãTõõ®#Ÿ¸æ=%%8Òm‰Ì¿¤ò¨šïÓ'?¼-Ñí–Ù¦…oPæ˜XéA"‡»O ¶ûÖÍ’ÒÝÎÑ"\ mLq ’ó¶Ìäå;Ä–ºp‹öü†Ýé,÷ÍÃLfôPÙ¢V6J*6×n¹,ðtB!SÃŽ§„½‹|a)²"y¡èt÷¦ ©õw\%OÝÒ†#D“D•É)~åü¨iJ˜Á³ÈËüÊ^–ªN<Є••WÏŒ–¢¿¼rŽÖo˜‹mç˜]_aw,7&……µ•ôþ3c÷æŸ#v7ÿQà' `VÌ?s~˜ ?Í›‹ü7AÛÄš»ªÃqP·+}C|A9w̲^(Y< ×bx>¾ý‚¶œGÆ‚ŸãZ¬'=íb0ËyXVÑd°Ga^À°¥ŠÅ–IcÁ½Œâä盬ΨkÇ‚žâšQ4K':€7Œfrš¢Þó°_4b“eº&Û»mª“(úÅ(ó֨㹔."Vô®ž©s¸[£›©ì~£rèû“ÑnüVG  Þ@¬Ï²¬íÔ-Ôý?´\3i]O–¯Ÿ#ª´ÌšÜùã7Þ:^Œþ'uüÑµí¿—tgVÙûc)ö×€}1gQKžÌ™¦ž3|ï{ðÂM ¿¤üHu®·Z#O¼" Úâz\4:ŽÜBÈÍðA§Ê;O”š]Gïý<¦ãÀå’g5Å11£H+-ô‰âä ³½zUÙ‡3†ëÚ·i'óy-Ä 6»—µÌÂXw qUð ©{ëÙ6KLJTþ?PúiÓºifa\`z¸4EÈ‹ÐzÊe¬J[Ù‡¿„çJ3 i—õ3?¼Ã3F.øí.#7© ´’Ì-ÞWã]ó¯Zeˆ*¡„mØÞh’$vnÏÿ†sxŠkJ2eq`Æ×?QÓÛ Ui'R‹+Lz—yTWÅ)ð”óüÔ¬˜È²Íð7‘'רpãÍcå¡I„£×µ¯À?š€5ÿƒK##4IòX£_-±…Ó£ŠÐ¼8M©b—‹Ša¯Sôâ"ïÈîí‹g2Ü'Ôå9lqz—à–ÛÜLϼtÇ‹~6Z—¤ 4âcÕã²ËƒG´hò«-û.Õl‡ ¢øEvnî2—Â5Ë5]' œQâ4ïHA¶Ï§³ÂK |3à…äÌ¥T;9¶gßç6jµÌ®šŸ\ê½fBšKQ–úG¥¶éÔ!粚àÖTQ±llçÀY' å5³Žæ¸ìÅË*ÉO*õêÞ¾tZjlpû ]ûÅ%µÄN ƒ€TRõºý'”ÀÙ¿yëË 4Ú:³ÔŸÚ†¨ûCNâx†: 7Ø.›V¶UþMïÄBe‰¼9zÊõ·1¯í©mÒ>:–ßê½à¦‰WŽ”Ç²oóN»”éÔ—^ìߟ.p/áH3¼)°(-S÷Óô´·éÚÒîÌ-цҨ!}¹ðjëöy¸$o<;Dcµ)éf’ðò–?ZKf;s6åxµxúðb0Q>Þ- ´Ozf—°Ò³cZìÅ£ŽB Ñtž§ão!õ?ßaæÉa•N\dKŒ¾”5œ£`Œ%˜³›êUýaé)UGºŸ]¹]ý7ïZm¬ïo •Û¨˜ü«Cº k ps:'¾¨Œ•œSmŽÕ—†¡>§g$÷e§9Öë>™užu&7øñä·ÈÖˆ0Äh¢­d”ª¸óµí!òx–ƒ†Ì Rš¼³l¸ ö_‡ Þê]ŽY½`Áù†çûÊ’ƒË­ù ò;®ñë¼ RïðŶê|("‘`#ß "v°`°ºjŒ›_t/°rÀô¾Î°GÈË'ž:(‹ßc«íìÛÖZøÁÕÚ!ëLÞÑÐõf0¬$‚x7˜ ª7æjÛB¤ L­Ÿ%ý*Þ_LÊôTÙp!õ 4£ÇÊ_ŸeºÞôðtãŒ>µ¤…ÄË_Aö­ë¬ Áç¨× -¶W“ —lE»í2è¥g{Fƒ™À”'‰!°…i° 9@BiòzY9 >š¤h¸j2òòÂ9y¤m`vRï©$Ç5ÅÔ¥éCÕYŠ×éäŸí‡2±Æ<“ þ3¥4åaÂQ¶Íª¤xÿ+Þñì /:bÇ„ÀÏWbÆxßÌÉ4½|€\‚ÑzÒxô0©×Ú ä`ç ¥ìˆ½Ü¦âk3N;\’éÉíøc®•L®€µY¬ñ¸ýbÐAÚýÜ•ñö­fô6Фé:ö®zi')-È•Ÿïß.²Gé fJý×31®€£~eKôl ,NJ)0©Íx½ïÈ«\¼X SmXû;­ë$,ÒåC”²|˜ÒÃÍ׆9†–$¥”P›ÝìëÑëÉP®æ¶CG°ž4¸‘Æ^¦ªžþb³wcÿóW—îùuÔéqdïé Ø×'°òA¸” tçSjî˽νbD®×Ùd¬÷€!ÃTÑ nS.‡GŒcÚ†#.UDY¹ vÚeòË«†¹yÒNè¥ òšIÌ:»Àª»ì7ª~”ÂÍx¹/ÆPñÀ)fŒ0"Ö§T+qȾm=s4?“µ#Ö‚²EÎÖC¬ÜU,Û=Ò»A¨¹¯N.ŒUðrüµ ¶Ãù$M‚Ö²úÑÚ¯%ØÚçzµ†&æU•X­×03rþ?etŠ@Y½6ò¢íª9R?ȃ~ñâ7¿<½0ŽÀcè0ömPüÓÛÎm7I·­þ™«$ç0jŽŠz³ódzÜ;M¢ÓBê1l7*u­³w#3‚£“]õaêŠû›ü‘éÑ2+[¥szå.Œü4ÝGcÛÓß“•ôÁk[wüpƒq¨ÉßÔñ삼õTg~Q^¼o©·¤»ìélRï‰ä^l£]R쟚!l¡¥Zq½N$d²F¯º¤Puˆ"[Ð휞`ß+vè´ 6/çæ&7vÃöQ«ÍgiôŠÎ1V§È´ÏáêçÅØ¥Øe°6¨t-¬ŒÀ Z\¦è%’3“š<ó/³KÊ‘ 0NµÊSáÞ y}e±¼Q‘Ùóã®Ø†ôu,Z5nìL}eV,åUœ¦:6¨Oy)HèoM‘µ$«{CŠ‚0ò+<}«BÞœt§‘E啵Àþf¤#â@”Kù­YœPÝœ« ÷t½ÿVugô;ØR^e«&÷‰‘ Ì™ ¢Œ¶àãÖ%kùø² ä$?¶¢5 1ßh!(ƒx2ŸˆÝØ8ip*Ù¥íÀŸ +ŠñâºSü«O>¯«è ¦k_ózcn´ýu^5ÂòG¦îôæØ4£ot þgçÑËê²²íÙ Ùç L¼bi‚šíç”Å×ĘC/y¶aZv* Œ’⌾­ ƒÒ›]Ò²Wÿòâe•ÚóDwoë²õ’Ô;;0*SLÔõšO•8°8,µLéŽÓ¶ýÚ÷Š|\xKì`p³_[9ÖB9Þë•5~KL Sr¨..³T²ËYåé៖£ä1Ùœ¼`Ñ–¹kÞhÍ­vl½ç Dßñµ­â2\ýpëöBÄ·QËǬ÷K4€ÀÖLô/ ~ý·§Ð %é¾{ˆEç[楋‰Ï’Š#; swÄdç ¿ô§àþþe¯st˜Â5Ï…ïÍúXçSæ¡‹ŒGdÜðF½fs¤JÓÇ!þ£P1>¡zyñB‘œö…MOÒ}‘Ý °iænìwæ¤É™é阔ëÔíìÔ /cˆ0X/”¤³…á_y3ý!÷ÿÇá©â7çô‡+²·ׇÌ1Wàñ.Ïþáù‹Ý¼œ¥þ¡Ÿ ‚R}¬ ItB("*cd‰¡©Ñ.±ŸI]#Êùp]̈ÜWK—b× º#4„[‹ûwP¿Q¯i ‘3ú¶Ø#TKÞ5¼¿*efÛðl×¹Bï\¯.Ó·è»lM`”äÄÿUƒ[C@‰Ç‰IÑe+ÒUßÕ¸NˆóÎëÕ×¹òY‚ݦh/¼ë¦²–ömaÇ}z "n7ÏOÚÿ„;Éâ‘Ø—!QàªÿSË8dîÚ—®³k&?˜½Ö<°cj!Á½F•ö+Á(¡öÒi€îZdô×é雼ª>-Pª;¹êX?ás\›õd¾}ÒUY­+£Ðt6zv=.ÀùKµKHrÙuEVVð}íò?äF᯸7ò1Ûçã†ÓàQõNL/°å² Òñä8ý"]uÞì84Ká×ø?a@sš_{#Ô†À$lÕ¶n˜†&b‘zlJe1ÇV/‘|($¼4ËQXÈãÖ+ïé'û+3>o²œ|9¹ÀQÒMizƒŸîZ€ó¡~OÍãâΧvÝí-âÖôjl…4I='SˆmŒæ/8/,K£èvNMÀ‘ ;ÿùÚj‰ßcè$¹)Àý“ïóß—-†÷;ˆ ƒf¥ÌkŠ*³Ž^ÓqÚÐÔ½§í~¿‰’ÞrÌ!ØaÿUæü3çnˆùi±.Tkv\˳?Nå='nW,àå„L·ší|.c;‘w 0§ÞıÎz?ÑPyi°÷$¡j¾Îq|„ñË‹Ö4Ý!íôò›à¿iûØ{ÿ[Ì©N=ï"Ö]Øí‘z°M˜Eì¥ $¶w\·ü£ßÛ-t\3/ùðWB÷=[æ×Ç >ýÎÁ ÐcRyVÙøm£4=Œ`±W8¾(½)H&õüÔ1CÜ‹ÿ6—ÛûÙûtnºPèë)WÅmõ*±bÏóD2ޏfûQ ¸þ;ªõLzär2Bz`mS˜4NçÉzäqÒCzTZS˜ÝºldsCR"Ì zùó ºè$n&à\uÈíôPùÏ:Bù± V€}ÇíÕ“¤F¢qþÿn@اC¿ZÍLóþü%Ü Äæs䮨 Äú_ñ¤?Ý€Ðk;‡c&›YûÂpÍèÒ=nS ùÅnò, Ø:ÂÞMF°5AHiTÐä½ÉÏú))Le((ÿ÷tç¼u¿Ï$‡þ×ø¹›Ãgúú=|J%á HNOuá­ÿKï§€ëD™üo'Ù©ú2™By[ãDîúOP²ò~èl¡óW¶ÿ¬ËæwÓ[ à€:`êñ€ãç½Tl8û÷‹5þáR\žª!T=ëäãSåZ¯òÌãK ü÷ÿl{:»pÛ#6[`ÿLyš¬Û¾w @<ÍŒñÔi^^\Ú3’@&õtˆiÞ^ß>! r}“fý6%%ßñ h(Èb&/…Kõð¼7OæEF†èxTOæ¡ùT•wÒ¹˜´sþׇoÀ¡VOŽX Ç~^ýè´ío«™£dÅ•Ÿ×_§þ5´Þ½ê±Vx½¼Ñé:ðîHκùšx@ŒY-›.ý'FZ3Ó›Ô´8ñ#ðáóŒw±û¾æ†ÃY˜áÇÿŒéƒP Û#epú_JBš'ÀܲqW#µ¸5ëf¹T^¥;ÑðshøEßÛæÒj\ŸFœÈÄo“7ÅÍVgGlÉÍÙ’Q0Ïëè,¶¤ø™’Ô%1ÿ²NäOýö–;I[7¥,U×ro4ŽN—P^Z«Ä›çã# sI\d@¯îˆË°^—ÊswR{Kv+À‚¢Ã¦0µ^Ñ›Žd8•ý¹m GˆYÃ>€‹ì¨£Á¾Ê¶ß(T!d/[I È-Oyë» = “a÷¹ìóÖ8Jÿ3#ØW{Æó‡±LÜõ 2އªöJkj¾R¼ó]¸±.¾!cTÑ£ÄR‚ Áþî¬<>á#ÒF=â”å·­«ïÁlÞ‚ø…úÚu% YV* Ã'ˆ–|o­OÐ$r\kˆÂKðMVçöxó‚«AŸÕhÌñ_H|? îMà”÷H‡;Ôänv0¬$+º-%[2ÑSwŽ_íðêÝøº¶úÛ´úó[¯êÛꈛ 7Ô¿KxíxV S ¸l#=Ç[ÉðâI3ñŸ1ò" ÷€Lf2ÜÇÚŸ¹ÑòÜÖŒy2€ÇW*âkO•F{¨dº…éç”BÑ‚Óé *¸”Óœ£zSб)4/›Ý•žѯ­oÕHܼyž‰Ñ;ŠKvÇgËêƒSÓ}+íŽÆ eRÓÄ’+Ý+ þÅ—Q“ÛÎöåEöcDa$B®Vó?±H GûúɺĹ5ÑÛd1a–ó®(•#޶ÌeŸr‰ÈÉÛƒŽ—Ô Ž77ÌÅlú¥þì³2ú‘­©nÛt+©q«%AüG·õ>ß륒uéûw½Í9r*çÿû;â¾.½;8°Pÿ ±¢á±bl0ËÒ×(\m˜W#âtùڤő4Tìñ)=”CoK¨Ï.jHÙ'ju€ðÚG/˜¢yÓÒºü‚Úæ„Ycp®Ù4Ãϯâo âŠþÆà¿“b%ÕŠLÿ¦¤SRz‰B„È^|„ãðÿÖÅœƒzè/° .²ëQäd],0úàM¾°{€uÁ`Ì ˜LßVSgÎ8¨V ìϨ­Kß8™µ1ÿÛã_{{¿0:–÷-À‚¾‰J&~%óö"OŠñ½¡=³óS­-9%Ý%°mòMÊ û×÷‚Ú&hiùã­Ž§¹x"7Ò;ªë7hYŒ,é^@ì?¯bŒSq_›b¿žšŸV N) ¬q>Ê]I¸øð…•0°¦T’Rè°ÚZÞ˜”r§u)üèo€g ¼A¥jŠÃUxƒ€^¨ÃE†p•*¼¾•ëï*UJÝüý»Pé*]E®2ÕÉʨ\e@á¾`´‡éÌà æCfW-3ýQ†GHpºÇ#táwÃ^*t12ŽŽŽ¨³‡†?v/ЉaÜÞ–•5<|Ù_,`\ZYÁ‡¬üíÊ0>îÖNvme´7>Ø¿öfRõ†“ù±*>Ïâ{´^É)YU™T}¨˜ñÅ&ÃZBú¨Àá;ò„õƒ™;+e쇎v»® ÃøIõ¶š£éñÏÖtì„ò7­‘AâÀ¯ÊQ²¾4PÌwW†øÿÈ•xn§Ü”Y׆¥¦úqݹûR<.Ûíö(€¶¥ûý§-q²tÃZûí»U¡ù²ø²T½èpOïbF½µûüßD®ÿ«Fðb^‚œ“Ò·ç¨ùI,°)p~Þª“GWfdòÆÐ¦“òÊu´õŸ¸€_R{ÖúÍBmCÈACoÝþ"¾®# lMxÐ÷ÎâÙìäŒ ök¶Û«1‚Ó:”/º&/·vã?¤¡3Ýv£P_1–Ûž¼º³ÿ‰¦g»:ü!Âm=r|Ðá{òœÛ¶›ì’?W†œæm¤.÷ áoaªb¶íuæë˜A“6"üwä†-¤SLu5Ud¶x!¼˜l¢<`|r=J"x¥þiŠð*!ÃîCà\U$”¯Â«Þ¦ñáõ檎Gj$Áೃr[ð‚³ó\Àøf ‡³ŸJÙ„|¨ý»Tþ÷Û³$1šåµ­T~ÖY?üÛÛŽáTÚØñíßµ“ÿº¦l™pN›ùòQ©ü‚-ñ>ÐP:ñÄU±eö¯¬SŠóõ‰ª"@p£ä•.eï'/ÊÔ»$¥ÛbWw<Ò>ð¼¼Ù­+ÎrBVãµgT‡ËúÒÊNñÜžÜíÎ5 A;˜å5ب‡î;|¥ ÚL~F(aÿ÷±ƒ1±ägª·‚ë© Z4ér5*ª/ÅG…|ü˜Ï+«—Õû˜$l7,£†)éâvêÛWC,GÉѶŒ°TŠÎ`šÕž®ÖnliòwlËÄzì€Sø6Ø”•Ú Kón‘h â_U¹W UT¤ÛܲÀÏ/Þ­1²6a)nÿ$m£*X2„%‘ÕF+4~uœ`W…(¦¡Æ¡Ç×ÎNla¹¦ðeÔÀ0XÄo«F/àÚY–Ó.Mè•(Ÿ;î n j¬‘æ¾6„^€wk#g%F •µ%ëÀ€ìþh­1xSj‡êÑÀÆ›ŠÊ¸" â+eåÈ$.„c7a·ø”¶uQÈ6e´›&›ÃL +Ã,r[$^#¶É©±íÄÓßüÂFŒÓ/=ãe͸ߊN¹7jê;JÅ5ìÔlÿ&^ ]ÇÏ`²øUaiYm¼geµ¾Í]bA}’ÞYš|næ3ŠdKlNqøDîÒÆnT4†ñZÿ~JuÛ ü¨µÓšAí{aܤ üFýþב’”Vàó¢'ÞVþo–óâý?ï>GL%nAzÿníäÄ.>ä\0XR˜[fÈ rK çóùi˜©–ŠÖ,qTÎ%ÏþÍÕ‘Â(Nˆ6%!Ÿ“JÉuoÛ„HÒüŽï9ÃôP ia¢P•:Û*ƒ“˜öŸíOs-` ¸þàØZä$ºb-¥ýˆ…I'J³Ö,<JÙùÐuxQyËUsKÃqcH~xÎhı­µ„aû®áˆ£ý¬££Àägš×Gi60·72¿@mØYªÝQ’U¨ ÍîV§iºÏ.º. º¤‹H.”šG¿Û¸æfz:‡Î#£ç©D—ž€íœ,û'jÜ‘@x[[s÷•V ü âÒú«ú0XÖÑÄaç»çød»¹m}'w(în åG:ßlÆÚ4—ÖwÈXÞ^‘ÏÈñÑ0‚ÜúßIÇ틺BùldD ×îó¿×²·íÇü§>§~Hwèx¾3ÕÊ$lÚ†gÕì²ï ûÉ>ù\þé =kê5õ®®}yˆ¼r™Ð}‰×ýæ* ¬éÏẇƒ[ú×àÖïÀ5Ó“¿x\ì6¥Éß÷´å;:®yµÅÑÝ}ˆ×;ÈÖ‘Ië‹Xî»»)7E\çÞOyŸÃ¤ï §OŸÌº"E]Ø_ e'¥’cEØk‡ŸôÒS[¶ÜÎl›ø¹1»%W"l)MZô©žwª¾q£/Ã\¿…­>X{µ'§¹7soà«Üc@ˆÔÓ¨+<ò¾­Ìïdagƒ¸[$ˆÿ}~8ͱ*aßQœ0î2•nŠ•sÔþÑçÜHƒöÄÜðÛŽ]aX¾ÚqÐ3Kéé)rç&D«‹v1)ÒÌ®ªÜÍQɼ¢‰ÉûÆ6ç?¸82]OÕÛKºû|}¤pÆÉSx:È1û†äÕ¶sœ1MñHÖ³B2a›*DÜPÛ$oV𥮠‰e)_‡É¤_ d)у›"PÓ·G*B³¤þNQ1€áöØêHC|‚z7tþ H>† x;óuILô”ô{YQÔ´{L­ Ï6âè.=/§9æ30ɹŸý-VE»7Ü€rèdÈí“æFN8'JT Ò†xãTÕ:ž{‹÷Q¯3éË@µ²þf£î$¦Q÷χµ¯ÿ%‡Ši@pâ¦Aw“‡ɰáº><ìþbÝg÷$»Zs3€è¹—ȢƕtÞÊF²ˆ ܸÂK_iœRê"Z)†ˆÄa·8£ÐN¿a|E+…SŠOyû‹É…=Û{ò@]¼Ìp»t»´ˆ^$½µetš¼>5ž‘ [†©²f±ó¡¥zš€ûÇÿtuãŸkV¥gi”gÈR\÷¡ü7ºô@â’&ñ;&øÉ5„ñ’x—Îæ£¢·g$.ÎÚlj0ùâw|\Àø˜V*IÞY,Á«Å!]¿y¤zL±…öøçNå•ÇR½Øv?­!_'Ôr;‚R²è=ª@CÐGMá×5?ÙN„£i}”ø«ÏöªT$Ó&Ü8§ŽgN©„sãb\pÙ=ÛOÒ0ž}`q(9ƒMôÀ¼ç°fÒ.|"öa{w î~ÇÜ™—ŠaD7–]ß=Ö…×ìH¦þ¸Çÿ©L'Øc¼ÉÔO;ô#u û;D3 Ý1[¸…ô…1ÇHøõÜ9rS3”×ð—1>óÉÏù«—ËÙò?)yÙ@UL^¡¸¡é'ЉÞ[Ð~Â×Pý_¸Í·(O¯Ç­fÜ?G/ñæ?qÛqMЭý‚!b‰Dt >{û½ò$ë˜(Ò¹‘¹\a§ @á&©›˜Ç^ßáˆðÛfÝVj¿dv| ð»åØÇÕ,0~|‹z`ú’ Cn×,;bן¿ÍdcIoøsüMûfåñëN6ªQãVÛ˜;ô”ø-¸åäÿ„HœhÌZœ¼¡›q]çôr6į̈ãTW<—‰ÚÓ¸N/Lâ·ïŽ÷(hRÞ/lO²!õ÷´{hÒ¯l]»T–À¯oÇãáÌ Ú¿(NkÁ4Ú˜7óË+Ò¼ rüZÃ>IÒ$6ɺYùþ†EñåƒWÕ‘™'Õ;HŸîè"¶”:†õ©â”æFGß$>yé­ë( ‰-É-wÿ—³ÄˆÄ°×Ës¥k»>åvEpo¹»Ù薟Ćå™å_œPÌ/™è°œ/èÈÌ}Ó–ö9þEÈZÌçÀG!-ñ‹91™b^ýµu-.ó±jŽ{uËû:ÃOK§vrÃʳØÃÚIË·V…ܙȵh!9ô|x"2¨Gx¿;:²^§xúB–wËî à”b˺:ôþx²-E AÎGðÉ48Y2g‹åêæÃ5¶q©´ÿ¼\Ÿz©a]UïÛÔÜûªbæ?ö…/ÜŠøŸÀñМ ŸezVw0‹ËNÜ{á¿!,ÈAnþ€è­òø»irXhy‰ýJ4sÑ5†ì«]¦)=KÁç¡9ûŽà ~cjv@Aög:ÄïDËüºЯ¡@ù"¹õsK.® ây[?‡ÜŒÍ(ö^â÷‹¦¯‰GÙ}DNí;= c{ß%\ì{†Ú0fqÑ ´ÃˆoAï“ÏÏj`I½ýßæ÷Ã{)@ŸšÞþ©BïS$Åô½h™¿Ð|Фbœu¨l]ü'ÊÍпT¹wAsïgª2ÿùêçâ÷'óDö]aâž×.nczL÷Yd ì*óàR"á… &^ü‹„— zL/<Ê"Q²ôK‹ÀþîÖ!þ1Zæ×´€~I%©gîjsOcY"âË„ ú–¹rÛZmᥥHþÛ¦y×SÎý}÷÷ÁYì%µ>ù·»ÿæï'fë&UåÑo]¼’Ïÿy^Û•Ýnüš\½âÇ—Ñ7ôÑÙë²njT¾´&fÖqó˜i4cð &"µ½€áÕ·wÿ•wèÍG±ÝίÒû3Ru@G/l¿C¡à%XJ³Nç."€Ðw0·Kî?)Ï6õ -¹R;:‹äYCÀ&€aˆ>*õÚUÛxgKÈo%ýE4c [C¹Y ßïE È<¼®$õû4y Ñý.6æ' ¶Ïpdާæ÷¦þÌIª™ˆ¸ßÖg(`6n@ÍM'j v™;Þ›ú3'‰±aÁL" ukhd%‘€xVÊ'ŽYq³·o‡Re%™?à㜻ð¯pÁ1»·Ý—Û5Ch®© äX-å©´a·Ó šÄà"ë¾Ný4P+f_—H·µtKý¦Á®¹jõ‹v“›Kn ˆÍVb·bEŠ»Ž7à½Z옣²Ì}„ôx¥·n¤ó¿Ö'WØŽ=é-Ã9ie¬ºæ9ož,8ïêPÑ.GÝU7Ô1¦5öqŸ-òôƒ¼1EÓ3«ðm2àûC**8˜ãív$ úm­°Úíje¦1ÖÁ6h;ýpŒ„GtñŽ¿Ü—?FK2FCÁLŠÜd̨Œ9üœ÷´MIæs c˜I+X÷‹ž€²y›ö0%Q³¡_2 ã&Pˆ@@°›¿!Â_Âÿ™àP—ðÃSEø³¨ ßm«Zä3(À(@ð;ݹËû<óP—øÜ}ä^—ÊíA`¨>q P¯{üsv¶Çð âz´¤¹ãÁ7‡û#ƘiNÀ{pBQköJº´µ®!ÎÉpÞ½Œ©¸¯&š5Ÿ ) âÊy\¯Ö#ίX¦%Àc(­|šÌ§Óé¡»^P2«YUúÁŠ+­ÿËßøFï~¾¹\˵„.û•+ASœÎ)Ÿ ¤oÔLÂ%qyOB]Y•å-rÓm‰»­£„Q–r±–Žö9×jä°GjÍ ªZ»Ë´]S<ÊéÖƒ°FÛâÆyb_׿ýcI{äABRý šƒoè‰lйÀ?”À çR.–‡L3Xª½„%½?,{ÁŸf¡_ršùMÌúÎÂjتgöš ,'öÕ² $ýÒyœ8±$wµ2µÌËeýZûèÖzõ0c›oò:•r7w|ñÚß«÷»Ã½x+y÷1÷™ì¦jæÃe6ó»q­¬« ä 3–þÐÁ $Ä5ü};6Œm Çz5ưÔôƦ^2¦‘`©à "),Mëzþ1ýBpLpp4#ÔqÊ«BïÌÚþwýi.B¡ P"¸ù®bˆQl_T0sÒhRQT'‡s,ΰ]F…|Ó²2ÚŸùä¤ìßÔÒÇj*Ò¤ L6U'ÈKО™H&K ´ë&_¥ðA+òD¿ Û­U'ya+!‰&o 6†c¥î_½wS•f0ßIðË÷ôêN|D€…tÕëFDãr U²¥Ø;‹K­é|_B7ðÑÊ[K9‹XÁ¦»ËJ·?ÖºÁ#[îRˆ 7´„Þ%jx°Œòn= HáÒ~†3G@¸òÌöLÑ÷Ì U¥¨Ÿ˜ûŠ€“pœë}Á£ ^KÖ2¾WjàFס»ŒFpžaÁñràqÖ‘qœÿ]’§ñ ‹‰{îCÚR-íOÞ§¦ˆ®z"¾†ÕÑWO|öÏþ}0ót-¸ˆÌ³BhDFµ!‰Üvìwkÿ¸nÂé…ÀŠá¤‘1a)¡Ž†¥côZÔ/þ¾þÿZgpÜé‹|×ç•H1ÿ2ó0̃Ì<Ìã[YWä™™ÈCòÕ¥”²/lbá5âGæ]ô#7‡—pˆcÕkÔ¬U›´ˆ,…:È$MYÇ<-hIŸ)Ùÿ~ª½ªg}uÛ3" %Åp‚¤¢Éa9^%YQ5Ý0-[®0µ(B˜PŽOÜH’+{]M8âA„ù.™ÍÀ’ÛОÑ×'$}³ à‚΂¢¨öEÉбuÔ¥vÿGpþ× @˜OÇÈ)JAÿû5ÄN"ìf®xbj,‹õ8-£ Ãµ9\ÛÝ®Õ*‰·jqºà]Cø}† pqÊ3O nÖç+ó´%%T…¤)!ÇăCHê„’Ö+§>¾“@HMZ´ë¬ëÁRŸè¨zô“:ÉPHÝQÃÖhDã8¡i:ÚsóÊB]ÑRVs¥M YÛ®œè:‰—¿ÈcXA8qCdˆQ™jV LÀZ0@À  `àƒX}¶‚†…ƒ?¼;!"!£ ¢¡c`baëhÄÅÃ÷áX¦Ì Ö""&!%#oŽEK­6Hàç¸[Ö,ÁŠÃWö[E=Û50¿.Û¢C6»[èsÅþ‡rϧܣܣœrÊ=ù›rrOßæ;—ÚÿÇ ~8skÞæ¿ :àF­1VñáåìŸ.ÃëÑõ›mûú´41²ä)P¤$‡$‡’¦ƒ•ÓßI ÈLMZ´“Êõj«Oÿ{ýïO?|ƒ… SgFšWjt Íš¤ZZò÷R °ñùq3/ÌÍ‹!SÛKQVQUS×ÐÒÖÑÕÓ70¼Q_F NÔÖËdXŽDIVö‹©mÝ0-‚[®•5p6ƒ&”ãV_‘$§¤QUÈ?Úlâgÿ¥É¿>+ýøþ¬Ldå•‚„´P®â·ªž†:©Ä`Ç ß™Du M-íI™Œ²I1mF¶9‹4=éLy³öÞs‰ûS¶w(iGÍ»|­ t*¿ÐMÑó¿¾Ügë³Î튋®uv£ã~kËŸ@ Ò§¤¬¢ª¦®¡¥­£«§o Ô~Ò"€»;‡0¡ŒfV†¤ÊNµwÿÒð@†åxA”dEÕtôll¹²-+€BÊñ;È%yÊiMµÚô}Ϙ½kÇzj\>½0×.®O´€q[qq;qœ×Wð¼Ðù€éPÜêýµ…ñï—À`®*®ãä «FaI+Eèih!cDÔ Y¬/dÕ M-›`îÇÓ·n;ôÃl¶ãVkÄk’¤é($I’$iÚq«6ÏKW» /ZîÌ5œ²]’eª2…ˆj aäpD$*šƒ3<¾3‰L™:"`¶¤9r%óÚÂnÑ[…€œR !P‚\‹Hd •Fg0YlP$–HeǹÌpö¡œ=œ=Þ³ LsVœqÆÙƒùßYñªºÓûe»~Hó'ÓßѯL(ãR2­EMcöÎÈÎï‡uN?š[J„D&÷%‰ÈÊ “›IˆFw$ÑÏ †È® 6J³1f8b“T¼‰z2 ì¹~×átFid£4êÖfGÿ¼lÙ%êàÌ¥R7Ï=¢à ’жa9^%YQ5Ý0-¦eòyp˜yÈ#=ñÔ³8þãùî–B%"Ô54µ´ML›!Í-2À[º[ ÷»½óûÙ›ÉÞúx·AtKx¨=ÉÀè¾àÿÈõO¸„|pÉ”r¢4DF6HáòmŠJa>ƒ#"}…Šö5'm|g‚D ê5¼X³¬µåÚêˆéS=±Ý¢§?)ä)6˜›º£ [#ë7.M\Þtt3図Hs’žál¦‹-¥f9×Êñ€¬Öê²err¥óò]íXî‚âäùqÿ„ =¥KØýŠü}ü÷GÕ!P¼"DIYEUM]ã }ÞjkGGWOßÀðu®ç$ƒÅá¾¾’xlÒ;&ORN˜ÚÒ^—þöŒ·Íœ›UØ96æŠáñÛ뀲ÃÐ÷F»ÒØþM˜š-·H\bóÒ¶Œ|ó-¼Ù2­¬oMplR=ßNÚ)ŸTl\ä„r|ÁEQ’rÝ*­îø1pší‹´j†Â &ÑÞo¼ÍKù“˜B i¾×ºrØVn¤\:ƒ gÖX±Ït®xçÏ]¸£‹±wIÙ»vXïòàª55zÃô,nxlFÍO˜˜{×þ-BƒKåÖÝ[<×ùÞßÞ¯ƒÍµœTnÐ]@…Á{ÿÅçÀ?H(4‹Ãljz?aè¦Ú `èÞ³*»e8_†k015‹ú½0"qËÐö²Èß|Em¨Åy ÝÄ„rÙ:8é!„ åøâ¹â‰ªÕÍ|þy?} ÏöðÏ{¥À%††#¢±l¦`j¿ËÆæf›“\š›÷ž ìçdPh ‡?¿s¬hçk6r8Ìdà¤ýå€×òH”jaÍÈ„#"e£¢ÝÆ`ã<ß™ ª®¡©¥]Gz4öÄK]=ýIƒ&£lP¦îhÃÖÈÃË&C7µÙ!7g‘¦…žÊ„·4¾3A"X]CSK{Ò¢dÄ”6u›MbÚLÿæÕ" @ÏКIœ¥m~ ±(]co7¯_ÂÒ}SÛ¹USVQUS×ÐÒÖÑÕÓ70ücÓ‚Bc°8|Ü"‰L¡Òè &‹GzA{!‘X"•É¿»µþ|ß÷>¿„EMMMMMmj'Ž—^y×ÆÛˆCýoZÂmhIÕeÊ>Ó4dÈ É-ß* HÉ»hî‰ùBÒf(iëa`å\‰ï$PÔMM[:k«³ÈÒ|"µnèÑOê'Ci uG 9’и8Qßt´3¢æX¤m“.c‘¦f—*´4¸’'ð—kƒÚ9tý§/³ã7îº_”.¯ç:mèc,-e(»]2!P|i””UTÕÔ5B¤Ý[mÚÑÑÕÓ70ÜßLûÖw¾¿Ï»“Ph ‡w¾Ü,"Òé'7”/G= ú7÷㭪ݔ§Ô¶s‚ë÷xñ÷ÕPqÞçC¿M4¨©)óÔ¶:´Y ü\¸®¨›8R3»Ë[óâ‚¡%WZW7ªÛrGÔð0K(/8#(oØO$‘-Ôû½|í‡qÌ7ÛÏ*²ÿSJؼ%¦ÂÆÍ]†¼¼ ÈO;yÁWe¡†¡kÄš‰–3,?¢û§ãÃïþ‰"àñ ÝZwÖÁÀú8ÐÎý} /-@   Ôõ«Áâ*ˆVõ‰JyŸæà–y?CÎZÿˆŸÆ?£÷oÁBEÝÔsõ骇CÃ'8ó KhBRÈàN+HkêE5¸O-Æ!‰…º>‚gÈp¾A‡«s¸ %u„Àkm¢ººðÜ(Dä¬n¡û™¬@O#kEÁ„2.%~Š*Ô´¨ÕÙc×Áyï·¾Ž@1œ ©h€ Ëñ‚(ÉŠªé†iùÜ›ÿ‚“lC&Á^_ó^‘2YÙåK+i¡Ú†è°ÝÓ³vJI›°¹ýÚáQ’O*¹ÝÈTéeíY$TUUUTUUU(=쨠ôÖ•gÅ >ÊE–ˆ %æIm•ñQÇþjHR‰Y2,ÙQ¸É©U÷@}C}+~`Øp£³M/XŸ‹ÀŠá æX´Hjlþ:¶ÛR­]o¥d&“µ=J2333333 ÊÛ¤iæé’´M’$I’´Ûûº£Šõžˆ¸PÜc}ö€•$I’¤l@­íO¦ìydç •lÁn0Ý7$”Ôº÷™ö–±–ÑøK¡ —âw?Bï…#(†“F–øKA¿tŒ±«,‡í|š°P)ÕL†$’J×0ãÁÒe¬×Ó\ãv™˜°³A1œ42ÿS,u4òý2 Æ›™¿…óY;Z@¬U&†‡D@ÉAâ[$d”Ôú, '#ÂôìNK¾g.fož{u à ’жȰ/ˆ’¬¨šn˜–ωùv²žbO‘Л$ `{‚Xù쬷ñ±¬KiRJÅŽSv‹·8ë[IcÁXMìèÕª¤¬¢ª¦—áó7-Òo(›yì^V-¢Ð×*Ø^+z­²ˆº³oQó¸³NŽa=˜ÉëÌ–÷n2Ð^è…çE§ÊâNçÌŽúQ!yÇ©W·ÝÚ­ÚºóÕ‹þb^¿IÀX 0( @< à “d‘Óh Y È?0ءÆ`Åp‚¤h†åø¨ºCÁô£w*L(㆙õ'{”Tçß¡¸Îy_¾|ûÉïyô¿Zÿ‡·áÓš5ëzÖÇ[ë¤K…’ÕÈ1¹'Ààž„ÀÓËðY’oÉ*:IÆn¶jh‚à ’¢–ãã2H¤q*šZÚ:ºzú±^5c“8™¼`³ÙcŸˆ¿ˆwðÒÄÊ~m¿.ݵ¯_rz–p“d)Ûž'Ôë0L=8"’­ƒýâÊ÷a|gBêŸÔ&£LÔˆvcqlN® ¼|Ï:Ö» 8±)”»H—ÛÕ[}aÎA 0 ƒ?O¨¯}“Ph ‡OXL$‘)TÁd±gMç.¿Ãº‘o·óZ°t!‘X"•É[YmžÝñqîÞ"´Ü9ç$IH’$ I’$I égš[ÚYyE%ö¹7î¸*¥\¦*‹,Æõ¥N)á ­ò®4wÜB0‚b8AR4Ãr|\á`œ„2n˜±[5©âÄ7n-§#JrœŒ¿2§.5P…g äDÔ¨6v¨?ª)úKA ÷÷~øö|Oˆàªè¹ÚK1CXÔñfÔÛ‹+ªØßÿ1LÙéS,uÌX¬lµNc¼¼—_Û_ëï9$Y^œŒô#¹J2ÆI’$I’$I’Œ9§fÝ…2^Ϥ !„Bˆw·3Y¡B‡1„B!„B!„ךI¦™†B!„B!„ ìIO{6žSôJ5jÔó»R/ÔEíT»©ý B!„ÐhBÆBãit`š DÙ^&˜S»F­Ì„a€EUVË;ÎÕdâqñ8u‚à ’¢–ã“eC± 53jaµ¥IWgä¦^wžy…©_„ó÷üØr ¥ ðl¶ytN€Üf½›g^…þÒõ‘C•¢HQjfÔÂjKs­ÎÈN¨Ã:g“µGDD~ͱ<Æ+O]ÝÌù¾¦ÀA  ®©Õ˜¿qÌ­F»÷ŸœµŠ°óýZµ-cemckgŸÃ¡ï+n\ð×ÜÖ¹·‡§—7c8ÁB˜PŽD‰ÜÈ í!„ì‚Þfk† „;“ÒÀÿ§wQ@Mˆ+ôEö<üý¯cœÎþ´`!PÜËö½ M‰Bg`qøû¿y4•(†$EȰ/ˆ’¬ì—çYš°`9ý¹ÇÃÓË›±Ëbw8ç*ìÀIaB9>!±B$É»£5Ë21j¦W111qÄGñ%ŒâbA·#PEQD½£-ZÿþÚýŠñ7®+6ÒÓŸÔŸŒ²©Í°óÆÙlG›³HƒÓ3 ™È½ê¯ñ^ÀëF‹Åb±XüQ5³2›ÃWø¡èµ÷_Êõð‹Ãˆ$2…J£3˜,vû±£LLÍô‰%R™|ó[°¿E$2…J£3˜,vœ`íâñ÷¼'èOH$–Heòý±‚û]²³ƒ½ÎKxÉR޹fŽˆ£¢©1Ø8àøÎu0“Úd”©ƒzá~@âl./ß„ ºB¯¨‰r©þ$5Ð …ÁC›Û7ƒÅá#H·ˆD¦Pit“ÅŽ³Žvñøt&$K¤²•Ïnð¦o}BÖ:–HͰp™t‰úÔ$ÿ4ˆl5Ž_Û¸4V£1šFgC0‚b8i m)¨cƲ`e|ÿT=㯧ÀŸOaÙÉÐm)¨µ¹ºmü¿ÿW|ˆ:Ôô;y5!°  ÖÔmãeñ£( %ã‡È²Í@ÁçåS!) ûSDž{L”­¨±JÚ£ !I +G 9EêˆF»hœ´¤Û Ô,Û,€Î*À€À€@að±g$Âߦ˜•0¿,¬#gðfÞ#A—”–•‹ÑÇÙxV¯Þ¦'`.ÆYXÙØ9¶s\ÑÎò†127˜Ö“™OX*W*뺚ò×°±Õ)zdg´‡¾zâ3 ~Õ -Ô]$—o%QEµMºm6VkÙn@™VåX¨ô„TjHÔc imyCí¾jÖû¯¦°£âM5‘H$ßò{¬•=þuσšH$‰Gq ªUhôIR´¨b¾žÕ·ªïZÍ ;ê½÷÷ƒHIy+çÃçÏÕÛ~½€*Q¯¸“×÷Ô¨½ôÒk`Ô^ÆË'|ÎÝõ¾Ä‡¿hm•Ýÿ¢þ3ÿø ØÇè±ÝˆB!„0ÆcŒ1^„ˆ!å8Žr¥ÌÀÀ€0Æ8cŒ3Æ9ðuS73S7 Œ£ôhÁOñ;þh P´‹A«ªªªª*—1§aˆÀ‘@ÔèÉØqsãaêšZÚ“MF™:h“˜6ƒN•y,¦…žÉDV{+ìÍ‹¬(4‹ÃoaH"S¨4:ƒÉb „"±Dj{‰±å S+€"„ åø„À$yÊLµÚ\äüRŽÂ#áã0Â#£—GÌÀº[èÿužðöáa‰D"“/ùÑV.>‡Èä¿”ÐLYJ™Ò§P.£÷aµmøuVíOÚ?÷þ¢Ø×¨<°¹ˆ›÷æàäâñÕèiÍæc;Œ€‡Þ÷íý¬û¯ à²J“‘•WPT ‘ä¨h: 6NïøÎ‰ÌêšZÚ“%ƒ¦´©ÛlÓfF6¯iz†æLÒ,-ó÷²ÀÂ¥‹t¼Ý¼æÞŠ!¥®R”UTÕÔ5´´utõô ïcÔ—(†$µõr–ãQ’•ýbj›F7L‹ Æ–kAµÎf„r|ÂêIrJ­V‹+Fÿùó0@í¤°j¿©=U;j‰"«mWKRO‹  –jÕvùU€º·@4µÉj]"M^òk‰R˜"Nœ}Ϙ=¹bòXÒ&Eèih!cDÔ Y¬/dÕ M-›`wǼ¿®¯ŸiÇqæì¸ÕñØ…®½€ªªªªªZUUUUUýÿÿÿÿÿ‰ªªªªª à5 ©‚ ¥ c†#"©x5z26îã;&›Ò¨i³Ó£*ÿ¼lÙ%êàÌ¥R7Ï=¢à ’жa9^%YQ5Ýp™~y3¶Ã B˜Pnù¹†Ù }î æ[ i‚pf3R©`…•}“*ª½î1ÞšÖrÜn½æÕY6•IÕÖjãÔJ’$IÒ´¥$Iü‹(ÉGlÛ¶mÛ¶WÞ±¿í†â|ÉRN”†ÈÈ)\¾­@Q)ÌgpD¤¯PѾÆ`ã¤ïL¨A½†k–µ¶\[1=pª'ö¯[ôô'…<ÅsSw´akdýÆ¥‰Ë›Žn¦|siNÒ3œÍt±¥Ô,çZ9ÕZ]¶LN®t^¾«Ë]Pœ´QVèù"]ÂîWäoòëéËÿŒòÇÀ€À( ^¢¤¬¢ª¦®q‹>µÚÚÑÑÕÓ70|ë/O Ð,ÿðë«×³¤ßïÈ“”¦¶´×¥¿=ãm3çfUv޹bxüöÃ: ì0ô}£Ñn‡4¶¦f Æ-$—ؼ´-#ß|E o¶L+ë[´‘»í¤òIÙ–ÁE!L(Ç\%¹âœªÕoÈÜl¿—ýp³ëû÷¼v­äc¾ŸK#CƒCHca`å0v©}G,æ²ÇåD—æÃ›BV,d  a`áà‡ã·¸ððÇŠ¶¬±9º›ÍaxLPUUUUUUUU«ªªªªªªªªÿÿÿÿÿÿÿÿ)üÿÿÿÔD1~í(Ë”êÝÚLèF3°:2§«;ÝÓÙ«¯®¿/f'×ÕÓŸ´h2ʦn¶¡F„—4Ãô-Њ! $[é¹ yù4‚6Ä…ˆ[ôVpú@vÀ( qkÔC¢Ð,Ÿp-‘D¦Pit“ÅÎQÇåñÛó|ÍxýcLÎÄÔlAŠ“HeòÍW´@m™V‚ ·§cdgœâ0ÊñuÃü½È—¿UònX>½Üͱ; Ž*oK²ÜG-M##$wù¶E¥î¢Ý ó9i3*Úz 6Εñ u«khÖÒ¹¶uõ@ZO¤®=ýIýÉ(˜º£ 5’°qi¢~ÓÑ͈6g1m›éf,ÊÔÜRE–†Vòþjm0›`Ž\ðòeîxĂ椻…î/Ê.¯ç¿¯Èi¯Wtåg¸}  Âài””UTÕÔ5"íS«­]=}þ™ö­ï|ÿ:ï_ž@¡1Xþá÷æ#¢¤Ó'·”/S§IÿfÆ'gN²œš9rýÎã·ßÏ€ŸwðF¯yË¿GÆR^û–ï‘iÛ¬øyáºEÄ%Ž,-evË7O †-µ’°ui£z[íˆ6LXAÁð‚G0¡_Ø/JreB­M®¦1¿Ù~¯¢Ï[¿÷ÏKbkè˸ˑLÁ`ÇÍGHD¨khjiOZ4e¦Í ›³HƒÑöqf’å Ë[ôúC.›¨àÓ7ßýðÓ/¿ýñ×?ÿ÷s+îž^ÞcØré–ÀýGësÏÄ"É)!ßå”Ëeæ3W2Û¾† ‚ëÇ#– 9òòÖÕÕgî«=áñ –†€‘ˆˆ™YDDU•è²Të lÅ$H»Ÿ!Œì+¢ËQe–ªÃ_ƒ>~×LG'˜PÆ 3‹ÙƒTÙýXwÞRö€[CÄÕÆ(ØøÎPUUŒ¹¥´Xl¡ªªªªªR \ 8ð-IPf)›”±j÷J~eK…£e#ÎDÐÓ7Ô&¾~âsHÇ™s–p¼ J²â‰k¢éF\,+â½Óô|HþÿÿÿÿHWUUUUUUUUU¤ÿÿÿÿÿò·19|€9¥ØÝÓÙØ¬&¬ì¹›ÖA€$I’”$I’5M¨L×*‚ eÜ0³T*;Lîþ2xÖ;_ ':s6:t˜À‚$I’$I’$I’$I’$I’$I0Þ‚`9Óü6Ww0Õ š’rXJ*Ȳ^Ù Tc êè¡Ç»ß¸?×íÔš™2uDÀël¢9ruñÚXèý¡¥›Tí …ÁCÄ{þCÿ‘Ÿöè]Êåý WïÝÕ×(†$5ÚȰ/ˆ’¬¨šn˜–oýó5~©3Ž>a<ÔZò¿;_×Là]% ضÂÒ*2D‘äåKïÒ·Áæžh‡DIV<©ÞÝèF&­—µ‡Ø÷hÛ¶}Ùö5m#Û}ÿ¼÷lÛ¶mÛ¶m0¸<¼w* H’$©Î[’$I’$I’$y0iÉwk°¤$([•jå-|cR廋ýÖÓmÂÎ=7/ˆ’¬xRiÝèF&¿—µ‡»FDDDDDDDDDDDDDDÄžŸq‚{¥ÌÏ>e ¢mÑ-æ?G~ë¡6ØÔÑnt#sº—µçr›Ùìÿ?r ªªªªUUUUõÿÿÿÿ1[UUUUÛ¶mÛ¶m[¤®ßÀ"¥$p¦™ I"•­aÅàÒËTë5ýBîB¼3vNʸtdþ§8êÑȯŸvrûÿÍ.q:Êǹ_Œc0 °ï(ÿgšëµHÉìiT!a¤®5So­X*•Uwkú¥HлcÞ-x$i‚ e\JVyŽ.Ô4‰îŒìB}Xç44p°©ýÞV\ ¥ü«ÿ üÝ1‡_y}©ö‚j½ÉÃß)Sw–µ¿:c̉µl»ÝãÑÐu÷¦.Êvå˜2Ä~% øÿÿí8øà6‹K´‚+Ëý÷¾€¿2LA €g„ɃɜˆÒÿë+ükr`f$8oIàÍKb@,þABC3ÇZ&àEŠe¶ÑnÙ øÛ²ÂWÄ5˰ñ–}Àn-ûÁIdmÅ`?Ã?@Èf Y Àj¹k ô+ –o8 ïÒ,P°«AH2?° €hg¶ oZ]\º®‡‹þoýeÁÞÑ× à{Œ`±„ÈŽeÝxàüßßÿ©ó2)57(K¥üvx]‹ð æúWŒù:È?0øÎÝSBʘ¯…‚UÙf® 0´›ÐþE¬DÅ–í䌑ŽÀ’:ÙSè ¹$~ý’‘Ù`j£Ýob5y@R‹¿Ã‘ÍEJ¤DL&“¾¡´-²òÅêç?oøZpÐñ[P“ô5áuîÇÿùæË=¶éoåìÿÿ6_%ER&µZ£nM“DÁta¾ØÌœõˆŒt q×Á”Y•<}å`0F™;ÙhWfLÛUµcR³}t3uìŸdYSEŒ—¨§!Š¥Ú"U>I{" g²«>y§Äð¨K»à²Kœ¡†*Vïl’²EtçR~9Œ©¤ ™ÓóÁÉÿRxZ0!¼LÃáÄg^ÐQEñšlzhÀ˜°Ã(ï«q60NFb vF¬Á#„®.QèØ›¤Q›h¬¬û+¨V]-™;~—Ã1FMŒ5G¼€­L~ââ(¸æ F@/ð°±j»â³ÑE`­(2¨¯ ©_“6 |°o°¯ ÑÈ]¬vl:ÕBtøèºBÒòßÓþ‰Ë{cmz&s²%Jçe9ù2‘æ¶]­»‘ |²®ã2‰³qÑJÙÝxb0c‰F:Èâ©°1 ~­z  ƒÚ§dœ¥OE¶9~Xr[€©0ï”'Ÿn°®R¡®²#Ô€§­él\Ë )xã²& wy|u¥o—ƒ¤¹íg‹Ç¹¼i¹%&FVÐFQï`«4}¸0LýÁ»ÇJ±Œµ å@~^‹Þ¥I¨JO—v߀Žt ÝXA5'/aóPNùcLñ“mßl*OÁ¿Öm²fþJ»Áa„žl¨Õ0 ŸvÍê¸ð e0,»TÎ3,«ŠÈ•2êhz^-„)Å#‹ 2'ijeQa¿CíºÜ¿LZ4Õ¿õÀ–[ûÆ–àÃ8ôßïW„ !ýnª70c°“¤‘ "/”S5-‡åŒhµ~!U¡¯¯rB ‚O?è¼LJ ¤Ä ÊÅÒ¿‚ÿ¼‡ò¯E5àLøÛ¿ÚhdÌ×AþKÂùb²[äGÌׂ_±ÜLVÏó›Ðþ…ûï¬DÅ–í´<‰‘ÔŸ,©3œ9…®$@'%ñXKÉÃÙ°.Œv¿Í «9W±­YNw8.ºËxŒ‘†pŽ)Ññ çÅ(m‹¬LgW?ÿyC0®®ŒßœšóÖ×${uRþ¹6Ì·ÏøF¬wtÿÿ·y &$fÃÚôT£d±µQ+ ì™ÿp!°~7iiíÙæÎzª(6úÊÁ`Ì-ÏÜÉÏX·ݦ§’½¤­wó#aÿDLYÓ½ùxOSUv9 O󜸈BD ܼ§ÄxíÒ®‘I/¹FÀºT‚ÑêÕ׼ؑŸÅõ™Áj¬4O±Ï;=<7Ô Ýߺ¤Rs‚’x !¾‚ìžx­Ü (îB¾gy_-·-NZ U¢²g±&iú¬Ð±« Yfi”CP¸òaéU=™ßÈb'ü®!:Òï Ÿ˜>"&-‡Î@Û íqÍÝ ŠaÏuåÖn>œAÛWHà²XíP+ßó%Ü—P…lBùˆÌ­‡b‚>P².bjº²jøðFdÆœ–52çz’÷fY£gJæüS’Ïï‡çÈfNjŽxr÷fW*ÅÏá_ìXí«(‹.Üz ûˆ[˼tW€þüzc>jž´ê}èP—2¸² %´}ˆöšõ m SvƒµÒ§LËF®ëx0…·\ÅñP? YÏ„PeòB&´KE b\*ä«}Bô ¹wÁ¶3.'Ã"³²zÂ@–j`ÝÕ–UâÇÀÁ Þ¸ræ aúùĽ ÔaŒ/µ>Ó¨ˆèî S¬d¬BU—ºÞ˜öÜ8*î: *ªÌâíñPÇ. ÔÛhŒŸWüôCÐÂy`œ.4^¸1‹ƒbŸ(~ü¢8ü4oÑ`žüìã¿'@à(=Q3SE+^ÉJ-<îòUX`á*WeѪݴx5–¬Ö“ùX&aõív@’×’’Ëú©k~ÒÖ¸¦ 39ÿЄHé›Dd3ÿ¶EŸë½ÉÖn¶[Ù²9Ó6}3º»˜î/¶‹ëÞâ›ñpó€^ý._!™”De¤ƒ4N9íóÈ )°"ôÔlDãÔæêØb1mådQ2éø§éÉtš#©]`n=ZMmIÕµh× é”¨lô´ZŽ[²¥[¶å+hïC‡ê€õ†9ÇÛl‹­–´•P¿Áì0h»>‡ ù²U[ÝRËঠy½÷Á;Ÿ|ôÙÿAûtõ¬Ùâ@”“›+wÞü†Æh¹n:ÿHY¸H®_#fXºÌ9çL¿ÌøŠEÐák”uL@P‰g¶ØÔÔ$Jp`mÛåípÂL,uçfiÚ¶ú²f?Ùå$_(íÖë®¿áÆ›¤•ñ=+ ï5¿ÚÅw+¹ÑŽìsÕï07”«8!ýžýwÞu÷=åVWÉT©ú¬lB)èëRuï}Gï—ó„Úžç¾^²ùÐ"íýL‡ÇÂŒ.x¯U’4'Ÿ+èuÈ~]ý»óöN—Ï*÷“b±ÃÄ:ŠbÔ“~g®LŠÅ8»¶{„g:bçûw®õ[½û^‹ïËÿä Ѧ¤;Àúý‹°AÁ õ¬%†¼°ÐÉbesVÈÄÚ¥×Ñç5â $c)W¢ƒ¹#å…Ž=Z Mó'çŠ%\9×5¹¢¼ØZàÄGÏ9?ºî}Œ¢)xXCA agŽ^Otñ¤Wê®úÊÚô! !Ö»OîÜ´áM.à9R;Zýüxž8µü• LŠÅžmþr”“¦÷€”„ÿh7q÷\0°é«¼¬û U<ËwÏ*,†+<Å"ÞÐ'Nÿ½:EAÕº¹èÅ\‚õU{ 7BÇ b•%“"In¬eཞ[¯ùŠ{ÿ xöcª{|©++ùê©—7?I2®L÷ܦÂ^­HEéH}Sßž2e>ä2ñ·×Kf‹^Ùa":A2˜‹-hîiørª8¥ªDU¢¦«TªIÖ4Ô$ëT}C½E½!bî_^™ƒ+½»¼l léåR°ðS2˜~ ëîkl_/O ȹï·xÜUÄhú|Qs?÷€ †Î Qña’K ¥ó¦ô±Í§c×84ÿ×’ƒ2f`j†Xx| 1·ÿ^k߈yÏ—\ rocksdb-6.11.4/docs/static/fonts/LatoLatin-Light.woff000066400000000000000000002156341370372246700224470ustar00rootroot00000000000000wOFFœQ0GPOS€9ík~þbD[GSUB;pÎܨ&“ZOS/2=@\`™R¢ðcmap=œ €åG‰Tcvt A¼/:O&lfpgmAì§ —ØÛðgaspG”glyfGœNÉ¡ˆü2Îhead–h66 º6 hhea– !$q£hmtx–Ä¿Då•Ãkernš„nWjurLlocaÜš¤z ¤$maxp x  7name ˜€Å6_post ¸†Io)prep {ŠöŒý.xœíy|SÕÖ÷÷é<¤#PFËŒ(*(XEpÀYPDeA‘ÑPx®38O  ‚¢`ËL )%”’#×E+ o iŒæîç{Vµ Š÷ý<ÏûÏûz>¿œôœ}ö´ÖoýÖN²QJ©4u‹š­z]yÝ-ªÉÝSÆß¯ÚÝ;~ø(uÞýwM£º«Ê(­•Yö÷F÷quÞÇ«„QÃÇQMäµ…¼¶“׳(bÖqâ«YWçtÕ¬ýM]—÷¸²KŸ¤> ˆöèðìç)£}’´ð¦q™Š;+n|Ü[q«9ösŒÏ‹ïÿ\ü‚øeñ ’Û$$ß’<•ã}®p¤4Ii7>åꔉiÃÒÞHÛÈQ‘¾ «ÇuYUÙ7eÏ̹:gfÎ✊œ#¹ù—å^Sooý8þ®¨$‡£AAƒÉo4XÐ`Y^ý¼òÜyþâhؾaAÃ×å5êßèÑØ•o4Zʱ…ÃÍá“'y¶Ñþ:wSb×›æ7íe¶›s¤éÔ¦/5]Åá>­_#÷i¯5r×?"å­sÓ©ù#óÿ‘ÿA¾=wþ‘æIÍ;5ïѼó‰-n±ºE¸å¿[µ7Ñj$Ç­& ¦·]ÝÖqú·?©³U¼^¯@¢þ\ÕÓv•§kTÎmõ6Õ^¯Vgh·ê WªaºH=¤Pu•š®ËÕ ½[=®]jÞhäëõFsд­@kд=´Ý¸Üîõ۸Ü î#ôJc$>Ô?%z·a›Á°U» p‚r]©2T=ÎÓ!z9CЫYú]5[¿ª õ˪X{•]ïS›õU®Ãj'¨Ð³TÎ O Q­jŸ\®RÕ'À¬a#>U¬¡–_N¨%'·ªÁúk5^ûŒ§U‚ñŒϪ$ã9eσ¹:j¼ RT²Ìe[ý%OüB}_Qß^ãY0^ÐGðè}H  ¾Áôh3ì¨P‰Ô“F=9ÔSߘ£Ò¹Ôû‚²Q:Mÿ¦²AOµÕ»xú'õ^§Vê ÚÈœc/;sÙ2éµ}B½§êg.óRɯ¨ú0O}À%Ô}‘Z¤U‹÷J½„R¨þÓ«ç¨åy0‡æª8jÉ0ÞÓ‹™µj(ÖßÏéñ /¸Ü úÛ@p»Þ£î Ÿw‚` ƒ!à.0Ü †áàp/¸Œ` #Á(p?cßXÎ0ãÀxÞ?H[´úa _M“Á0LƒGÀ£úGõ˜®ÿK«™Xðq<î ý½§6²A®ÞoÔy !hƒ& ™þÑh§3NíÁ  8€ À5àZp¸Ün7›Ápáác¬þÊ`<ã1ƃÁð˜&é¯É` ˜ ¦‡Á#àQð˜f€™àqúù˜fë=Æ€'ÁSŒïið 0=ï9Îσ9`.À9¿^¯€WÁkàuðõ¿ ÞóÀ|ð6XÞïRæ=ð>XHß?‹Àbð¡ö%`)ø,ËÁ'àS°|>+Á*ð(E`5Xƒ Ö‚u`=Ø6‚M ”èÃDÈÃDÈÃDÈÃDÈ#DÈ#DÈ#F)vÞÊ@9¾“ Œ¢kjy˜Ì•J|ô ,ß»ÃÌÕÁÚ»©Ü]W®!îí$îU™’àÓç‰RsU}âVœÞLÉDÐóˆ ç9Rëÿ½›ˆÙ‘'7óäf"fGuš”l£{·z£)#`ú4åÞcž^ª{¨e PO¶j¹X•ékÔvðGm× #½Ñ‘ÞèHo¼l^6/›ƒ—ÍAGîEGîEGî=!bºiíg+b†ˆ˜»‰SeÊT™XÄŒÓNцý6LŽƒ?Д^HÉ}Ê¥«$:›ÑxªéE5½(æ|Õ ~µm(Ù´×ÿd„‡Ô™z9£\®Î¢æ³©ùÐtBnç|ç;Á0 ƒ–"nÔ7jˆ!âF q£†¸"n„ˆ!bEˆXQCœjà|·ázžÿªêïèýêC¢òG`¹^«>+éÛFPŒUíú{ft?1¡†˜PïÃ(¹%÷¢ä^”Ü‹’{Qr/JîEɽ(ù|ãVÊõ·þàv½‹,Ä" ±È!,r‹Â"‡°Èr,²‹,7î×Nc4è7ñ D<Băñ D<Bp:§Cp:§CpºN×০×Àé8]§kàt ~[§kàt œ®Ó!8‚Ó!8‚³!8‚³!¸†[a¸†[a¸†[a¸†[aøó+üùþü BdæIË™åO@Ì£¾bV}權G¥òT”§¢<å)O­U[¹e !ú†›y¼`ÁbžcDª3žhçÊ]hØ4ìaôj :4퇯Dñ‘ ±ù3ôà>úúz0 -S;È^Ê9ï¨rC=ŽØ>ŽùnÍ\·g®[2×í™ëFÌu;bnbnñ6“X›K¬­G¬­G\‡ bÌoÀ¼âýMPþ\+ ¤Z†fÄËtbeq2‹9O<ìL<Ä^¡¾a4Û`Ë Ø2õw£þnÔßú»Q7êïFýÝ0¤' é Cz¢ú=Qýž¨~OT¿'ªßSfä΂®zª?ÕŸ‡êÏCõç¡ú󬛋ê/Bõ¡ú‹PýE¨þ"™Å«8_M¹Þàp-¸\n7‚›€9Û}8÷·€[©³çÛ@@ ªµÄݺ/Ìì 3ûÂ̾(zO¬ÜEï Kû¢æ·ÀÒ¾¨ùͨùÍb¹©c‘ã!0L“Á0LƒGÀ£ú˜}jþj>†AͧÂòî–|¬TŸêÇÔ ý¢úŒóçœWÒç"°üáûñŒýx†A°ˆ `À"¼d.^2— `À<"ÁD‚D‚D‚D‚D‚D‚D‚D‚dn27‚› ÁM†à&Cp“!¸ÉÜdn27‚› ÁM†à&Cp“!¸ÉÜâ‘èž'õʉ\ŸŽzçTÞ›ú0çº^:¿g€™àqÆñ}ŸÅ8Žzî?Ј'Á©<øÝ—hÑ—hÑ—hÑW<úMê4½zgӳ߿¼¼Þ¥Œééïs^¨o®õøÅ¼ÿùX–‚Á2°|>+Àgàs°¬_€BPVƒ5Ø`-XÖƒ `#ØŠA‰žN0 `:Àt2€©dSɦÁº›ì#‚u'‚õ%Å¢Œ ݇îÃs¶š+fe±èbQ@µƒ«k(¤ä,ü[SúEJ¯¦ô@üì0~à©§T!qk£JRŬìœ]p|µ—¿v‚ •‰}gbÓiØs6œŽý&c·G°ËìÑš–Y¡ž±ÕN3ìÑ {˜Qä3ætsúãX¯šÐ37ñ4B<ÐÃ=|“µS"=\IôðIz¦‡5ôp)=:Lj$sé@þif/ϰ²|VÙh1…³hÑF‹ ¬Õ‹“ÖV¡í¬ƒëpx55bL>Æä£¦djiF-Õ<ÂÓ)æÓê\ú¶’>xr<‘âMž¾ Í>ÌÓѯÛé×&úeæ“O2s…ôs1­l¥‡Ì\C¬ßŒ%Û¯;2‹—ÂŒf²'3Ù›™¼œ™¼ àýáñó™Õ›¬YuZã˦‡Yôð <{Ý/îŒ_ÀX_`v»1ÞaxÙpfø•iyHvÿ•|Cï¿§WûèÕ>jN­µ¦öxÿ\V”/(ßš…=f3× É) ™½X~QŒ–’Åí&ÇÀv¬D›ýIÉ溵¶d %¾£D9%VRb'%6Pb %\”(§®YºŠR>JÙÍì•’Ë(i§äjJn¦¤™+n£®¢ÝBZ,ÄG6òD1wìÜÙLî[ÁZ)Heéj¸ŠãµúÛ»\.ÝÀ% ;¸\.§=@OÐ \®äúUœ¯æÜ\®×ëÁ àFp¸ô}Á-àVÐÜúƒ;èÃ`ˆ ƒÁpÊõa`8 ƃynºÊV3°äLð¸ÊQ³(;[uPï«3ÕBÕE-Q稥êBõ1çeœW©3ðÇaøãµøc/ü±‡Ú¬ÎWeŒ~;¨P“Œ8ÕÙȹDËzp)sCÎ@cÐ<ε'8ÏO©sñ–sg€9³Ïq~Ìs3m¼H}/—A þn›Á°UåàÛè?ÞËš%*žžÇÓÃ4zf«µ[7²(Ù@”l J6%ˆ’ DÉ¢dQ²(Ù@”l J6%ˆ’ DÉ¢dQ²(Ù@”l J6%ˆ’ DÉ¢dQ²(Ù@”l J6…ãQ²(Ù@”l ŠšGQçßQæïQæßPæ>·_>™)$+܈Wû¹’’µw墼Q¸…ëQ”7ŠòFQ±(*…ÓQ"úoDô߈è¿Ñð4@D`å8ý5ž½Šè⤕µX0 &bÁæ& 4f~R˜ýúÌ~cf?‡ÙÏc¾ÒŒròÙ¿[CÂIjH’Rêðx#Üc1Þd–›Qî’Ϙ©É¬…w擪1üÊÀ¿3ðï üûr|9ÌÀ—3ðå |9ŸËÀç2Œq´5›ža$Ϫ1ôåjj:ƒ¾\@_êQãøR¾”/ep­­hN+ñ´O+ñ´r,J„E¨îC¹7 £ÍOÉh)dƒ§èïÓª#-›ŸÈ5 åi¹;-?DËi¹!-ÇÓr<-Çï‘S,¢—åªs;@µ¡ÅZL¡ÅZL¡ÅjM¡ÖFÖxÚSëµÔzµÞcqãLjÈ#¢@gžf¶ŸåêsêAJÝF©§)u¥:Sªž?»<­:P*‡R)Õ†RwPª7¥êÑ« zOéfb‡xˉª3dc†lÌz.c6lٰ̆16k6l̆Mìð êw¬ h'ײ™°16l[kƒZH …ZÈgF’˜‘$f$‰IbF’h5Vh5V,ë'Ðj³e~æÐ‰–“h9¯Ž &Òò ZnDË ´œ@Ë ulÐò?²AO˃¨µÍ 6hH©JM¦ÔJÍ¡Ô`J]Xkƒ&– r)u¡eƒÁ”ºRõéU&½J$Á¸B‰±Õ÷×ð¤Š;Éxßz]RÑâ(÷;åjäÝO¼;»!(ºÉ¨D¸[F~=æ™Y”Èfλr؈Ý6ÔËFl´¡^6ÔˆzÙ°³ Îeç³Q¯lb~6ê•zÙP¯lÔˆzÙP/š`C½l¨— õ²¡^6ÔˆzÙP/êeC½l¨— õ²¡^6ÔËôª®Ø¼+3Ó `,P@ÿ °@6ïŠÍ»b󮨼+6ïŠzÙÔ,ô˜&É` ˜ ¦‡Á#`:Ö™¡ê¡rõQ¹†Œü4¢*×èŠNtA庣]P¹î¨Ü9¨Üs¶ˆrWåzå®FG.Fé.Eé.%âMÀïºâw]Qºl”ΆÒe£t6”ΆÒÙP:›1EµEí²Q;jgÛ °~ÞT`©]6*°ØQ`©]W|´+>ÚÕxU5Ç.1>Äû>KÀRð1X–ƒOÀ§`ø |V‚Uà PŠÀjPËí`3ضâ¯àÛ˜k37 [™ÒVüh ‘ºï+ÂûJˆÔ䛸R†Uê ”ZB,ÿœ¬tyáv`>•Di3ó*£T¥¶Pªœ;ßa£Ø÷ KP—e¨ËÇÖ§mæ÷ø{*¿P´±ÄúæÂTŽ2Z¨âù=ت+y÷EàbÐ \ºƒKÁeàrÐô½ÀàJp¸ô×€kÁuàzp¸Ün}@_p ¸ô·þàvø÷ 'ê~ˆµz5kõjÖêÕ¬Õ«Y«W³V¯f­^ÍZ½šµz5kõj²€½¬Ñ$8L&ðµp¸˜Úõ·¨þ!TÿªÕ?„êBõ¡ú‡PýC¨þ!Tÿk×ßÉægíú;k×ßY_FÉÖ£dëQÖ—QÖ—Õ¬/«Y_V³¾¬f}YÍú²šõe5ëËjÖ—Õ¬/«Y_V³¾¬f}YÍú²šõe5ëËjÖ—Õ¬/«Y?þÈúñGÖ?²~ü‘lãk²¯É6¾& $¦ ƒÁpžüxZÅÛÓñöx{¼½Þ~Þ^ˆ··"Þåaásà^'xjƱ;8ß €pv †€»ÀP®ÃÁh0<ŽÌBEfcß'“]ˆ†/Aµ–¢¯s^Æy× U{8}&œ> NŸ §›ÀéVpºœî§óàt¼£÷õé}}z_ŸÞ×·2sÝYßü<[¢õ‹”} ¼ ±bÜJäv'ýI¦Çóy–­ŸOÆç˯Ÿ_…ϯ¦ÄJØ)±]îîæîÜuqwGFTÊçó.ËWvŠ.˜åwXµ}ËÏPŒbhrÌ,ïUˆ•ßMFü®|æ»_-e-· êW¹»—»ßÀ«aî«0÷Už]ÀÞ%‹Œµ·×Ì®ÉÖ õëü夼ƒòf?E§>…0µZŽŽør•¬7ª š‚fà4дÑ#U[ÐŽœ÷tÐ^VgègT=†ÂrØQH¤éL;m:ÓçÎô±}ìf´ÓÓA{pèÎàp ¸\®7€ÁMàfp»iÜît©1 ƒÁp îÃÀpýŒq¸ÜFèÆH0 ”èrXRKÊaI9,)„%…°¤Pe1Ò—é*F`„5Œp­¿Lë/ÓúËÔ\CÍ5Ô\CÍ5Ô¼š7Póyz.O^çéu<=—§çòôÜ“<½Ž§×ñô:±€ 8±€ 8±€ 8±€“šGPó,° 줅a´ð<-ôÃë°@%øâ/,àÄN,àÄN,àÄN,àÄN,àÄN,àÄN,àÄN,àÄNF1‚QŒ`#°ÀN,° ìÄ;±ÀN,° ìÄ;±ÀNFú<#}ž‘>ÏHŸg¤ýi?FÚ TbJ,P‰*±ÀXà ,ð…|_!Ñe|ÞÙ¸˜N7•%Ÿø=+ŸRØÌU¬xöQÚc­<âÍÒ0ëåÒ_ªĽn²êÉä©FÖ7ð¹Ä†Tj03¬&ÊPy0À`^_ ×j«Uïéo•KÅã Q?€MÂØ$ŒMªž~»„±K»„±K˜h•Àz\kZ€– h à§Ñ´Óaü9Œ?‡ñç0þÆŸÃøs=ô“øtŸãÓa|:ŒO‡ñé0>Æ§ÃÆt`$ÆêÏÀ80<&€‡ÀD0I/1&ƒ)`*˜€GÁc`:˜f‚ÇÇ`˜­—£ÿËÑÿåèÿrYƒ˜ë¶9ø§éͯè2ãUðx¼Áóo‚·À<0¼ €wóh¼ÇÞç¼P¯!—XC.±†\b ¹Dˆ\"D."—‘K„È%Bä!r‰¹Dˆ\"D."—‘K„È%Bä!r‰¹Dˆ\ÂüFm‡±¬ëÁ€þ›@1¨óé…áÒA£\ÿSV穲*5W¤³áNîáNÞáMÞáMx6Æ=C< ςij'‰gK‰g“áG~áG~áG~áG~áG~áG~áG~áG~áG~áG~áG~áG~áG~áG~áG~áG~ñõ ¾À×øz_àë|=€¯ðõ¾À×ÄÞÕÄÞÄÞõÄÝ7ˆ¹osçsçããA|<ˆññ >Äǃøxÿ â¿Aü7ˆÿñß þăøoß â»A|7ˆïñÝ ¾Äwƒønx<ƒx<ƒx<ƒx$‰ÇAâqx$‰ÇAâqx$/%/%/%/%O&O&OÆWƒøj_ â?ü'€ÿðŸþÀøOÿ à?ü'€ÿðŸþÀøOÿ à?ü'@ŒßAŒßAŒßAŒßAŒ_OŒ_OŒ_/±v‘à +Ža¬†³êÙà%ð2×âÌŒI=¡”Z Þ#{ü@}¨þ‹Œq•zš±H½ªÖp¼®Ös¼¡ Û0ùÍ`Šjß>郴³ÒïàxÆv‘*ùËo(ÛÓƒàlò²s@Gp.­Ÿ:±»€óå7Û‰Þ%Dï¼Ó‡wúðNÞéÃ;}x§ïôá>¼Ó‡wúðNÞéÃ;}x§ïôá>¼Ó‡wúðNÞéÃ;}x§ïôá>¼Ó‡wúðNÞéÃ;}x§ïôá>8–i~r@¦M¯"ŠWÅ«ˆâUDñ¢x Q¼„(^EßL¯ªýÝу¼Ú‡Wûðj^íë}DîŠS~‹ØM%Ÿò›Ä*‰ÈœDdN"2{‰Ì^"³—Èì%2{‰Ì^"³—Èì%2—™KˆÌ%Dæ"s ‘¹„È\Bd.!2—™KˆÌ%Dæ¼Ý‡·ûðvŸõiP"Ùù‰¦Íú1›ø•MT®"*W•«ˆÊUDå¢r Q¹„¨\BT.!*—•KˆÊ%†97ï÷Á‰¿!Ú«öÀª=°j¬Ú«öÀª=°j¬Ú«öÀª=°j¬Ú«öÀª=°j¬Ú«ö•+ˆÊDå ¢rQ¹‚¨\AT® *WœäW–~Xç‡u~"ôOÊOn³›Üf7¹ f”ÁŒ2˜QF&ä‡e°£ v”ÁV]¬%Zƒöðª8›Uý9 #8WÁ’"X²–¬†%«aÉFXRKŠ`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`‰ –¸`IXR–Ôƒ%[`ÉX²–l%E°¤–Á’-°d5,ÙKŠaI1,qÁ’RXR KJaI),)…%¥°¤–”Â’RXR KJ`I ,±Ã’mä<;aÊòž òžJò¬ÙJ´+ƒ9.˜ã‚9.˜ã‚9.˜ã‚9.˜SBγ›œg79ÏnržÝä<»Éyv“óì&çÙ ³Ê`VÌ*ƒYe0« f•Á¬2˜UF¦Üv52.![îŽ_õkÁuàzp¸Ün·ê½F?pèF]£ÀX] ëŠa]1¬+†uŰ®ÖúbXWëŠ`]¬+‚uE°®ÖÁº"XWëŠ`]¬+‚u.Xç‚u.X×ÄúÎÑfý"6G>Ý~Eoq[`Ü·ÆÁ¸"WãŠ`\Œ+‚qE0®ÆmqÛ`ÜW ãŠa\1Œ+†q¥0®ƕ¸RW ãJa\)Œ+…q¥0®ƕ¸RW ãJa\)Œ+…q¥0®Æ•À¸Wb˜ŸÎnÁ&P Jhß6ƒ-`+ZçNÉ‘*È‘*È‘*`ßWêØçƒ}>Ø·ã]ªÇ:?uýñúôê”:a Úa Úa š¿1rÂ@; ´Ã@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ ôÀ@ 4We­a` Ìå0°–ÃÀrh‡vh‡å0ÐËa :` ºa ºa ºa ºa ºa n‡ÛaàvYÏÔ_ÃÀÝ0°ú`` ,‡»` z` z` z` n‡>èƒ>èƒ>èƒ>è;¥¶]ŒžtSy00¦° ÙJ­;‘…uµÏ °Ð °Ð °Ð °Ð í°Ð í°Ð í°Ð í°Ð í°Ð í°Ð =°Ð#ßËBS÷r`a9,,‡…å°°Úa¡Úa¡Úa¡Úa¡–ÃÂrXX °Ð °Ð ݰРݰРݰРݰРݰРݰРݰРݰРݰР·ÃÂí°p;,Ü ·ÃÂí°p;,Ü~’OvÃÂݰp7,¬„…•°°~£®……saá\XØæ†U»`Õ.Xµ V9`•Vm€U`ÕXU «°ÊÁÚ¹B @ Gð~$ÆP† Ïõâq/àEsñ¢¹xÑ\¼h.^4/š‹ÍÅ‹æâ£±ì.,» Ëî²»°ì.,» Ëî²»°¬Ë:°¬Ë:°¬Ë:°¬Ë:°¬Ë:°¬Ë:XcV°Æ¬`YÁ³ÂÚ‰us¬Ï<Ì])XÓ5XÓ5XÓ5XÓ5XЋ½XЋ½òë™/Õ«êÝM­deã"y…‰}âšN éÖJ6™òˆSñªÙxÖ€ QßTÖzïñ/f·œ5žC©rYçÍWg™ßýÝfª,É.—¢½Ë@ó¹]o0òU£9hZ‚V 5hÚ‚Œëvýë wX½ÃÇÁÇÁÇÁÇÁg>kœù¬qæ÷3öÑ` ¨“©4z·„Þ-¡wæçuÅÔ¸„—Pãj(¦†bj(V½Ý 6±¢Ø¬ÊÔvåR;Xw\'뎪‹ê¡¦'MÚÏQ|“ê’’›Ò,eLúï¶¼cã¾4?3œDû/Óþ.fç}XA&чIôa£:¨Ž0ª#Œê}ZAŸVЧÆ…ø²_vàËv•Äüègˆ©¼OÃWÓ9Û@0?WÌâœÍõι ¹O}Î @ž^§rnƒ& îçùüݘŸG¶äÜ Ïh b¿¿›µ¼“µ¼“‘¼ÂH–©3áM=E…ÚMÿÎA'¢û¹z-<[ ÏÖóõðl=žVÏÖ³µ¨—õò£^~ÔËzùQ/?êåG½ü¨—õò£^~ÔËzùQ/?êåG½ü¨—õò£^~ÔËzùQ/?êåG½ü¨—õò£^~ÔËzùQ/?êåG½üêSÁÀ05ƒ!ú;uÊõ»ù{çáàÞß î#èÿH0 ÜÏߣ¹?†ñåýzqb“ϵig*õ˜&É` ˜ ¦‡Á#àQæò1[™*ç…3{Q97kgkgêæGÝü¨›uó£n~Ôͺù¦úŸ(\)±ÉAlr›Ä&±ÉAlr›Ä&Ç ŸŸÉsˆ'gq>œ:‚Nà\pè Žÿ<ùB®u‹A7p è.—ËA½Úèɹ¸\ ®WƒÞàTŸK÷¡L_p ¸U‰’~‰’~‰’~y̾‚”Á`¸ wƒa`¸^Ó–Á´e0mL›Ó¦À´)Æýäs£Á0Õ{ŒãÁƒ`xL“ôZb÷Zb÷Zb÷Zb÷Zb÷Zb÷Zb÷Zb÷Zb÷Zb÷Zb÷ZTÙ*ûQe¿ñŠõ4x< žσ9`.xAg¼ˆ­_/ƒW¸ö*x ¼Þ ¾7Á[`˜Þ À;à]ʼÞ õ&bý&bý&bý&Ôºµ®D­+QëJÔºµ®D­+QëJÔºµ®D­+QëJÔºµ®D­+QëJÔºµ®D­KQëRÔºµ.E­KQëRÔºµ.=‰Zï%ï%ïE­Ý¨µµv£Ö»ÉãXQuÕ¢A„h!Dˆ¢A„h!Dˆ¢A„h!Dˆ¢A„h!Dˆ¢A„h!Dˆ¢A„h!Dˆ¢A„h!Dˆ¢A„h!D`j¦†aj¦†aj¦†aj¦†aj¦†aêÑýÕlý³"B7?VêOÕG` yèRV—s^Æy¹~C}ÌocßAšßÎo²~uU‰~…~eþ®vG`wvG`wvG`wvGð Áƒ"X1ŒÃX1ŒÃX1ŒÃX1ŒÃX1ŒÃX1ŒƹWhø‚"°ZöLıÄAë—_æoŒÒÕàNùŒù[¯t5L~i™®Fƒñäq ŒS‰Ö/}Æ’'ô¦†öÖï¼ÌÕÞ8ãEʼ^p­\~K6PuUƒÀ`0ÜL½l†^ŽU¨5ÏOæú µMPùÆCêtc"5LæþÞOS-GhõQÕÖxŒrÓÉTfp~Ju%k¹‡5²~õô=O–Ò£wåWO¯ªVÆk<ûõ¾EF3÷óiïmê_@¾ñžºÆXCý븾ž:7P÷FÎr §jÊn«óKŸúòîdßs—±¤×ù»‡õöYôëë÷&ãé×½ôëÚmKÝÕ>ùÍt² ðÔ<€:Pçê@•(q7”¸J|%>`)ñjQáN:„êV¡ºU¨îAT÷ ª{Å­Bq«à˜ŽyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyᘎyQÜŠBqC(nÅ É>Ú!à.0ÄöN…PÜÐ1{§FÐÿ‘`ˆí£ ¡¸­½T!øë…¿ð·þVÀß ø[+àoü­€¿ð·¥Ý‡ÒîCi—¡´Tv©b½Ÿ·ÃçoàðkpøY8ü~V~-¹Y¿W×ÃÕõpu)ŠB‘CpÖ g½pÖ g½pÖ g½pÖ‹ïCm¨mµ  ¶T4€ŠPÑ*@ý¨_õ  ~Ô/€úP¿ê@ạpÝP¸n(Üî w…;€Â@á pP¸(Ün5 ·…[­>Fá&é*T« ÕªBµªP­*T« ÕªBµªP­*T« ÕªBµªˆ9^bŽ—˜ãEµB¨VÕ É>¨Ø^áªBµBÖþ̪BµB'ìåzƒúÞoy`>x,¬ßÙëõ!ÙÈG` X >ËÀrð ø¬ŸÏÁJ° | AX Ö0ÿkÁ:°lÁ&P JȆì`3ضbWpsÙŸÿZêÿl/»A„‰3’Œ #ÛÈ5êyFC£‘ÑØhb45šùFs£…ÑÒhe´6Ú°ú1öËÊ£·zË:J9ÂÇ”Ž]äèãåý?Œ ·áˆÝë׿¸£“­ã)óˆïÂñ¨uì7„eæñ¿ºþïïÕ?ÕÎõ$ÙC»Q%«buš²¿ÞI”;Å*‡«Y'\MçjÚßÜw›~ü]µJv‹¤ROsêÉÿ_Ý¥ûŸí—ýÿ»ÍÝo_Õÿ½ÝKÿ¯ïEŠƒÑíŒ1æ¿“îˆ7Hm9îøÉÈ4:>ÇŒ¯âzYq¬OÜŽ¡qûã‡&d'¬OT‰= ¹Û+±Â:ŽÔ=’š$]–45©0髤prèÂÑ=ùêä~ÉSÚ¥¼•RœÚ+u|êÃÆW©O¥þ”64íéIé]Ž9nH¿Ï:fËñ~úªôUsoUÝ]N»)ñõûòJk—ÓcTÝV‰²c%Vbßߨ—efä™dü™ò­ó0 ÃÁh0ŠÙæþ@ù½Uwëw˜ùÖþ?sDÙ~&Ù~&Ù~&ùl#cçm²ó¤lꤲh%‹V²h%‹V²h%‹V²h%‹V²h%‹V²h%V.µö²4 ¥æ´”OKçÉ^–)óxÙÜuHX„¯nSÉ´túI~•û-~l}Ó’~dÓlúaî7ȦÙô#›~dÓlú‘­fÁ«÷‰ÂK¨a©ÊSs^ÆyçBÕ þµ‚íà_Ѿ2ÖMÛA…jÁ²ƒüêŸ~fÓÏlú™M?›ÒOsTŠÊ?nGMÆIvÔdЋd5[µ '©j!X‚†-åé9/ã¼ ZZcî¯*Ö۹ÏêènœX/jwÛ¨´œHˉ´œ(Ñl ­ ƒå·²É¬^’éM"½I¤7‰ô&‘Þ$Ò½1{’COÒèI6=I£'Ùô¢½hÌœ4aNÓ›,z“Mo²éMz“HoY ™»1“­})ÉØ6Ùú•T²õÙr2½M¤·‰ô6Qz[w?J½M£·d²71Þ¦l? ½M¥·¹ô¶½Í¥·Éô6—Þ&ÓÛTz›m©½Í«ÓÛFuö²¤ÑÛ4z›fe6iVoMI“}Yuö²ˆß$Ðj&­žA«6ZM¢U­&Ñj­fÒj6­fÓj&­&+óW»ø±éÙ–ÍͽsÔL éÔ¬–‰çåPC}jÈ£†ú–ÍmÔ`£†ÖRÃiÇ­ßÓO¶~§ó·[ùxV2-%ÓRS«¯M¥¯«¤¯É´B i¡!-¤Õ®ûc^U»¶ÿ‹=2§Þqö÷v;_j¥UÊŒe[kKe³‡a)£\̽ …ÜÛ¨«yâOü¬ÌÛAÌ4÷6d÷dÈ[,+N£¥»­}'Û‹Úò„ý‰]ëìO¼ñ$ûÿì3Š£wvZwJkïüõΖ¿¿Å-ûXNÜÙbfôc2ù— Ï‘ŒÞÀcþÆîcž+WqI¯™Ï$-K6s®vêô?½Ÿþ™¯~²c¥¿åÝœh—Žðú‹”Ù§«õ/ºJÔ^ÞíÔ>½["‡ÿ³:«Ì{<ý3kî?®ÈÍ•þZÿ€Þ˜”×ÈŸÔq€yVô*ÌjZéß©Ë|ú ^ªC¼Æúõ+ø—þPoÖßë½”°Ó³ïA¥.e6ÿ¨+lw`3¥ËµE¦?V¯\z/çÍÚ\-šW¾±þ\[Ã÷µï~µÎ»PwÅ3Ÿè…œ’k?ÑjÕ_Ñï¥úÞ½£çèÅú&ý¹Á¹‡îÀµJîo“Ù˜õꙌõgÙQ ãýë]³Î/™¯Ã–¥"<À¡X{ÇÌ×/Öù[±ß>ìóËÑ«Øî›Ø(èá^yç3-ŽÐöþê²ÆM‹Õb‰ïaFíœè/å\­ƒôÍ#ã9Ä<쥴›Ú~‚O'³§]¿.e]Ç\]§çñúóóiíµà¯òn«¼®—×^zƒ¾–™ì¦/Ê«¸rèªgòºBÄ»Kõ8=ÿø‘Ôþ=F*=\<‰éõ¯–U“ ™W?ÿú×Q[3ß‹õVfàJlÁF¦ybT[kÌO¸öï€éÛGÇ¡7Õ^/Ñ·Q“KÏçu e…ÝÎô¹¿Ržõг½}¤ÝâQGðϯۤOûè¡Ô*}?k³¶•¯jßýK8ó£® Ö)¬s@|Ãì[”j¤Ì6½‡šÃôÓGɽXpÏÀm³%9?§¯çõY]_gÖ¶}H§ã)}£¾A'뼋Óé\}¾B×Ó]ôT¹v—¾Ÿ™ú/Ó¯¬º$¾ðêÈU“½rþ…UêÂÚoéå6椌¾~Mì=ÉÚŒ—æë[ú…:s?Y¸½Î?¤âÝxîO‡‹vý þTOÒÄ£v8f¯3ÆodÞöéÕÄ{¥“k¿é·Í£·ô›ÒÆëz¥~_?Fïߦ ôó\Û„ý~3-WÛ¯ z¯Nî¨Ó/s·Â³VùÍ+‡ÿàjí“ÛäuGº–K‹ô`=¤N]g‹'Ï¡Wgë{yמ§¯æïöŒÿ>Ýk£õkÌÜ´:ñ«\¯“™)•xwD®áo3r¯ÅÓ1 ÕöëHm/ªäõGT-ž<)Ÿ?Z¨³Phó3«Ó¹z¦ìïO@o“P0sÍg~ÒJétT/l*ƒû™I¬3r)Q#…\}Ê5 kL#ßjÂõ¦èy:™Ñi<•Ïa£¥\iÉ‘Anך:Ú)e¡Š§“··çÈ!ï>ƒZÏä¨ÇйµžÅÑ€þA~w¦Ñüâ,£3ùBãò‘ ËÈ-/7zoô4z’=õ2® ƒ¼Òèmþn߸™<°q ÷­F2Û;ɬŒáÆ}ä$#ŒQd!÷3þŒõvó߈&£3g¡©Œ?GF›#ãÌ“Œ=GF’#ch#cÈ1ÄËd ™2††2†8CKu¶ê(™¢ùéѹê<Úë¬Îgä¨ yßUõ§µÛ9‘­™Ÿ* $33ÈÌÌO—îæH#C33Ö{Ô}¼¡FñÔýidlæ§Nc8²ÕXŽ4õ€‡Æ›ŸD©9RÔõczT=†¦sÔS38ê«™ÍÔãIê Ž|õ¡úˆñ-±>kˆ­ÚcŸ7˜+÷åêrÊO9Z©­Õg­Ôç­UG[ÙëÒV~u–lÄÅ2kÉ­s |ÅÈ3šðÚL¾#Ï7Ú’õšÖÍ‹æ=°ežX.GlÖFl–!6‹›eŠÍâÄf-ÉÍ&ªÓŒIÆL¼äqc5Ï6ž$#}Êü<Öx1öùšd믯ãoïPò]ã}Þ/4ãû«Éù×Åô§ÄØ‚7l5œøV)Y_¾Qn˜ÿÞS\b¶™Ç%MZÅœµ8YûÏþÓ‡‰ÜAbËWúb¢Ò·zØ®§ ÁŸÁ`N‚Ñ?¡ÒŸ×HÇÕÓø~¢è_˜º§çXw<ºyr?“ûý+ÆýcžÿQ^ƒ±“x”¨tˆšYe¢G£ÎqÏþƤ÷ßêVh_¿@=gÍ<Σ³Dß‚D®Ã§˜ŠlÑ߉‰¼‡uuÛItþ’Z*‰o5Ô¾ú,ÄRú©Ù=¢µ…f]´ú½>»¶TDèX”<&ce±òîÆYýG½Œÿ¨žüÉøåÞNy}K^cYZ±uç{}ErÉB¶Æ¢ríµÃÔüóV£ÏgþªÑ‡ƒŒr¶2gßÿoý×óF©#ôs—þ'Êafo‚(å~<ÆÉlÅSâ ç•俨ä15- ‰í¿mÜÀ^oݽ]?OÆäBkÖÒϯ¸»âh~Gýfît•Ø|1ÏT£d‡ñˆi7×*óóÙàŸ´ÿмÆ2úrëZP÷þë§äżJæõ…é±úeÉ™~àÈúëgåžXÖä 3ËgÌ óC2Û_ôV™{™ÏýXi¿Œå k†õè¼”xS™«‡¦‡™ý'7qÒÿ¬»`ù#1ÿÆocn~·îùkŸš«%ÎvëαþSIå•>Iÿ_?0j³ÿnðö;¬ûZe†OXý?D?—aG+ß´2ÀÇ$’x¤ýuÖ½ú²:íü„gV›ùgíqh^GÔK¡^·ÀØÛЧ¶(Ó8tm*Ò]býÄúô /ýH¡ý险¢I¢¢©¢¢æ§Tws×TÎT”s$}Gê9šÚLåÌåLå4ÿ- ‰Ô3‰#QMVS¨a*G†šÆ‘¨FK“EK3EK³DK³EKsDKsEKëÉ,$Ë,$Ë,dÈ,d2 SQ°i¨Vªõ„ùé!ª•$ª•,ª•"z•*z•&z•.zeC¯>¢ž%¨V²¨V¦¨V¶¨V®¨V=™ÓTæïZÉgò$ŸK‘|&žë濞ff5ñ’Ã¥HnÓ]C—%É' hÎ{3ω—Œ-E²4Évrêdl6ÉvR%ÛÉ”l'I²,ÉvšŠŠ‹Jžcˆ…šIÓPf¿±Ì{sÉU ÉUref3dfãdfëËÌ&ÇfV²”|ÉRÒ%KI,%]²”ÉRN“,¥‘ä'$3i"™IÉLÅ6†d#†ÑÔhÊŒ›ÙHžd#ñäš¹bæ$ñd™—›»cÈLeöæ½™ŸÄKf™"YJšd)9u2ËTÉR’$KÉ’,¥©d) %K1ÄÞÍĺ Å®Í%É[Ɖ-“c¶dô]äs§‹TÉÆOÉ<¬5«ê¬üÌÿÊ»ÌãCìfæ´)²/ÀÌìÍýÊI‚£ÿÅ[øëÿÌÿŠ1{—×zZŸò™¿ùQ®’x¹‰u9zÓYÕO²’[¬Ïÿª>³ô1+H§î†êNE;éÉóÙÚ¹µ”êuë¯ûåuó)Ÿ—OÓôÛµo’Ìd´¬æÖÈšðkV³U¬“ÝGUð$µ|+¯O×þ-¹¡¹6äõU=[Îvâõôä8½@SÂú%Æ\¡û“¡uÓcäêNÝíT}¯óŸ!ß.~}JöÿYœ2'ߤ.¥ŒÉACÊÄ ãe¥ LŒ—•B‚ð1Qø˜(O›¾xD«xñÔ$‰VY§²$B¥JlÊ’¨”%‘¨D¢<‰DꬻšH$Ê‘HÔX"Q²D¢‰D‰µzÒŸˆcF"CÖW-DZHTJ•U ‰M‰¢ -$BÅI„j.*_à4‰SÙ§r묦Jœj ß%všÑ*E¢UK™£ú­ZÊLÕ—™Ê™²ÉL¥ËLÙd¦Òe¦Òd¦Òd¦bªÒ\"WsÑ“Ó$rÅKäÊ’˜•%Ñ*UâT–Ħ›òꬠr$65–Ø”,±)AbSs‰M†èO ‰P ¢?-$NÅIœÊå9M¢U®D«†­šŠò´`žbÿÂÁÃê}Þ7ø<쨷ÉI‚檄¬%Êë¯üõãÑU‰å‹iêVÕÏ\³Ê4}8KVzqò/Úöã¾¹^6dj˜÷y%±×,eþV§£ø›Mü-[42]¼.S42]|/S42]<0M42]ü0S42]¼1S42]42E<3U<3±Î'Éâ™Iâ™–Fš¾‘#¾QO|#^|#N|#^|#N|£¾øF®xE®ø@L±ÒÅî6±{¦(VºX?S+]| M+]g×â<çœÿ\Þïüh¢‡q¢þ¡Í“ùÂÚØ`㬭±>¯ÔÈàNbõ‘¨nÑ09q9ÏVg»s—}oó¾NÎõ^D#;«Ø\w§µvkë³IH¦×úi_@Ñë5é"œâ9»éU¦¸Í}ªä$_qŠÙÆ× çV±’_9Ëo,ç í)‚‚¦(Å)AIJQ†²”£<¨D4.1xT#–šÔ¢6u¨ËV>a{Hgõh@CÓœxºÒ$Ò[ýÀ †2œŒd ˜Äd¦ðSÙÎGì`¯ÒSu ©žÚ©£5R#4J£•¤q4Òx=£g5QÉš¥ÙzQ õº)EK´XéÚ ÚÄ.ŽðÇÙÍF2IâYf&sù™öÇÿâ¿ó't äp‰TþfÓu€ú VqEpž‹\áíçÖ³GÑŠQ*3„ÏØÀ;¼ËÛ¬á'.+SCøžÏy÷iM'2ØÄZÖ©„\ũЍ®ZêݪxݧûUJíÕC=•@/f1‡yÌg Ha!‹I¶ ïlsmüýb·0ÁÎ<Ï©ä4w2œLçTØü°´°ô°-ÑÓÜH·¬[Ñv=7Ömà¶tÛºËܕ21^LlLœçx^I¯´WΫèE{u¼v^¢×¯úž ¹—Âssý¼·û\jîåÍ}—¹¿æ>ÉÜ˸åÝ(×õÝãÍ=õî¥Ì½Â5÷¾¾;æ.E†>;®Ô†Ë™¦b9r¾Ë9 Á´« N lN5u ÖÊ>½.¸*;#ð§é,.šNVfÃÉÆ…Js•‘.*kqÏ2ÍS>ÇL'è|ëœñ×gòô/PŽç)uØŸ³þ÷9–ßPþr‚‚¹–ÿi–ãó¶ûC–¶#œSAb¦å?Ó²š¢ÂLg½Š¨¨ŠYú£-³®¥=Äa#±®±g)ŽÏK1EŒÌö~’;¨#?¥?ωD’Œ×‘Fl’1;ÎxjdÄŽ÷™eÔ³43j·)Æì#h°q›"W 3šªjŒªi,áÄ)VO¨†žT-=­šzJµi¢:4UCMR#MVcM¡-T_ÔDÓÕT3ÔLÏ«¹fª݉$A­4G·i®Zë%JÓSmè¥Ûõ‚îÐ<Ý©—飶š¯»ô 7ÓO•,³7ÑWwkîÕkºG¯*‹Š ÔzCj©Ò›¡ÐÃzKj™Qª:i¹:kU¥Ç´RݵZ]KuÆ©—¢ô¸6«·¶¨¶ª¯¶Qƒ‰ ¬Ý¢¡Ú£áÚ§aÚËhuÑ*uSšuCr~3,¶–X¨~Ú®þÚ¡ÚåwP¨ÖúM´Î:/ÃÚ.Ô@¹†F?ؽ^ô[®•ßIí°Fù€¿v¶xœc`ÀI@èÍàÍÚÎÀÀºù1ÿìÛþ…±NÿÿÈÿüÿó¿~áÙxœ­VùsÓF–|$NBŽ’ƒõX±qšÚ+“RL’eÜùZ J+ÅNzÐ2Ãßà¿æÉ´3ô7þ´~oe›@’v†i&£÷íî§}÷“ÉP‚Œ½À…h=3f¶Z4¶s/ Ë­†Ñ¡èí”)ÆŒ‚ÑéÈ}˶ÉÉðd½o˜†¹™ŠDtèPF‰® çmÊ­Ü믚“žßñ·ï¶´­^ ¨ÝlÚ -AUFÕ0IJŠ»´Š­ÁJП¯1óy;0¢ šlvŸM2Zg´YQ†™å0”d´ƒƒ0t(«îÉc”÷Úå¥KcÒ…ù!™‘C9%a—è&ù}WðIªœŸ8÷;”-ÙØ÷DOôpw²–/­­ j[ñvȧ›;Ž,vj Ù¡¼¢q¯Ü72ihư”®Dˆ¥SfÿÌî§|É¡q%ØÈ)¯ó,gì ¾6£)Q]YPýñ)ÃóÝ’= ö„z9ø“é-f&xð8~OÆœ)Ãâh’°`äÐJÊe\OULò:-ã-ÃzáÚÑ—Î(íPj2ë¶%í°d;4­’LƧn\whF(ñîòëÒ išWÛXMcåÐ,®™Ó!ˆ@ziÆ‹D/4ƒ 94§Z»A’ëÖÃeš>OzCµ¶‚ÖNºiÙØŸ×ûgUbÌz{A2;‹üÅ.Í–¹HQºnr†Óx¹„Ld‹í áàÁ[·‡ô²Ú’-ñÚ[é9¿‚Úçž4a»/§ê”&†1/-Œ¾iš:WóÊHŒŒ¿Ьt…OS(ÊI(Ž\Aý_ss¦1c¸n/JÎŽ•éqÙº€0-À·ù²C‹*1Y.!Î,Ï©$ËòM•äX¾¥’<Ëó*ci©dœåÛ*)°|G%,?P¢Bæ‡JÀ6sì0‡Á.sì1ç6ÀÌað%sÌa2ǸÇ÷™Ãà+æ0xÀœÀ×Ìað sDÌa3ÇØgƒst™Ãà@ÑõQ˜yA@ßjt è;]OXlbñ½¢#ö¼Ðì5böO1õgEµõ^hê¯1õ7˜úPÑÍõ/4õw˜ú‡FL}¬žNä2ÃOn™ ”]n?~Sœ+qMcÿÿxœ½½ `[ÅÑ8®}‡ë|:}Û’lɶ,_²,ß·ãÛñ߉íÄNlçpNçNIˆs'ä$ „\„;@Ê™hZ …ô†ÒZ>(†òAb=ÿw÷I²$Ë9øúÿ•:’FO»3³³³3»³3<Š—Ìã‘—èž™gã9x9<È'mi‘„F-£äÀh6åSöô$ÂhÁ?³*=Ö¦Õ¨ùFƒ è½þ¾Æ–[ž1£Ì¬OͶÅAE|":5'Íž\`L*N Èkõщ‰ÑúDö yzt†‘ì}€RÇ„3Q9M鎩9Ö8sÈ’ÕQÉ ö’ä˜L³F¡¾” ÿ¨ã×VÅÐ<‚ÊãÑ4ÄWÀÓðx"`2Ë€@ :fROÊ€ùû¡\Æ— ÙÉóSeJú¬³ò,¥–Õ%³ ‚xÓ˜lQ:3œ¬\ޱ³Å ØüÁ aÏÑC¸íd¯¶e¨u{z>áðêe¤F­µ¥å¶4ù¦§G*<¼9©v†Î”ªzw\¢P'Ç…¦…›B´öBdô£àˆêâ¨Ì4 CéÜ][%6XQ)v±D,„¸ñxü¿@ÜhžŽ‰1è!>|ˆŽ^£W¡?òÉA¡ZÌ.¢+34ÓùØeiÄTRFþlôÝJbÐy²š HGh’ÎÇ65ƒgÙÞz¤ÙùZ“ó<¢Ÿä½ËãQ«`A· <^lZz}¡‡tÚÓMF;'„  £ž!Ž]xÙÖ³³½¤ÍÚÑžðÌÅUç—eš+ç•=ø“ÂÕç³öÆ“ûJæUÇ9£>qþ4­qÀ±gCÊŒ-Í™mELðã»:ööÙÑÞðØ7´€>ÌKB}Gi4$د2@©Ó  Ôq’¨QGˆõŽ|@ý&H—Ø¿fÛþÿ<9³ïéÑ{îþpz”‘dôìéÛð‡#-­÷üveÍŠ¾–:è+%¹ÚÑHΜA/v4Z%‰öĶ3_î?ôåéVu\¶™¢/ .ì1ŒK5ÄEFÊ‹`(gŽ 4?lsÌŽ$â&Bù  ³†·5wÎK-‹¯ âÑLX\Dzfhr‘9v÷ÿ<4ãÆ¨ÓA‚­•éQdÞì\ìž9PîRгÉ’¯Œ$tPÀÌI¤|1zƒ)Ð9s¿9ôûíÅÙ½/þ™†mËç$‡ËÂ噳îØþ÷ûš›O~¶·`ÞÌÎdöŠXNldEÕõxœâ‹ýZKA|b°”Ÿ˜bBBsð«³mŠKX'eºyMk8^„;ü³ÁiMÿèöþ~r9ÒVÄYg=âœOàti üÍ›ð7$ÖWh™0º~ŠýþKÝN.!çQCO=›ê?O)ØË âú7¨AJQ2Ù7П ‰Ûäé! ³¿~¼.öx¾‹òW„¤Ïx‚=ã©é—L>ˆ¾ª¶:ö ±¶Šf §òžÙÜ Ø±:{èÞYÖé©€$±LbHЬYÝbþ¼âÙµ2¹,\&’ „‚”ÞÃsȧÆeøØ®~Àƒ òA€$@õªÃkH"-ˆåÖiªpJLäÈ#ÔÎCª¥ÆÀ;šBubç#b•.*¸³¿Ÿ¸F åb¢š‘8ïRküµ³M*#úÄ 1ílÁ¼‚}’Äë/VƒüãèõþÖVHö;'87ȯà÷*î{==£ÀÇbá¢/ ò«Ñë¯ÄOöƒ“B1»¤µ•¨ì+Ké.âgõ_ŠÅrú’óï¼±1W?OAÚ4xmÙÅûµ›f~¹k¬ aé0ƒh5ˆL¨CV™ 9aã—·“ÎßÉB¡2çkdG¹L!„ä‹á2KX„Œ õlhĨ14”ü{dØõ*z„/ VDÉÈ?Iù>²«Âý!–Ž÷d²CN‘2ÂÑA²Ã&“‹Èƒ$jU$¦Þ‚íµÓ#ÉuGduÚ­?Žà¶Ðx1îÕk~¸äÿ– eƒåñe¦ R*¥[»çŒ.ðzÕ4«\a– ”«V•Q¯O耺à }êÖXXz-ˆnµí¥ è Hwâté!þ^épSÓáKëÝ:ü?ûáìŸ9 gÿ@ÿKìNøòLkë™/üâLKË™/àx †}¿缎—e!K(´’AàúDcåRëd9Lã‹¡ ó«SéCý¶Ô¥Tüd.¤«…lž½!ºôõ†MR=cyaþ „ïªwšŠU*Çy›âp𞯭Y¹«ÿi¤¤ÝìçObÔó²ždä?ô€ˆç0þ%2N笻*LÇüJ÷Zƒ\üA«„¯Eõ©Gõ/½8RU5rqé’—Fª«G^¢_£?·õôŒ‘¿Ÿho?ñ÷‘§·Î‹%˸e ²õô—|ù@[Ûkp™•4ƒºÆ;Û“ç\Ac“;66‚Úñõ¯¿”ÛWH'ºL*´&!åâ­>hDŒ4~Ê`míðÔø¼¾;¶ÜÑ——·ì¡ù;].QˆÂã2«zò¦mîLq}—1pdÖÐÔûL¬Õ”XÓç¨è*¶&Úë‡[Ûï[VÒÛ’(6ƃmm«Êªg%&¤7¬ê,ß<;oJ‘=†qì „ã Ì×ÛCßÖ»wú¤VlÐíÖ¸oqUPc`“ö†ãq‰€ò0ˆý5daÀ±·ë©AVÞÏJ¨:åÚ;tJ+~úZ¿å^ö—]þ4Ä6ÐÂ>ÒÏ>Ú¸_±†ÂѯHe!ø[k+úíËc_S_Ã߆¸4 œM˜‘IÜÛH‚øÓœ“‹ræTíx}íÚ×wTõç,W›š¦¥‰\{Pk^홸öLf¾7;¾¾çß>¯°%ÓÙ.!Ÿ/dgt²ÅÍB0[,‚žØ » G}ïˆiZDƒÙ¤†[ÿß ¹žN„]޳…ZÕÔ±â3¬J á>qøÄAxâ†lG¤/ >hÅq¤f¸l/êéÙ2ÍSAl•@.é!x¬n¶LJƒs|¸Ÿ–ÍbuŒØ%—;ï)ˆ?OI„΃r9Q#’8›a?jØÏ>Øš³oü™ˆ-sœßvÈ”|pPª ÕRpVÉ:ˆrç D9Y(•8—ë,EGlË`{e°½.Øž YäéC‹Ì¨wÉFדۮ% ÉÍÐŽ£ûTÚë¡Í±bFH}Þ õe3Ô[Iä·Pî3‘eLzœÀ-Ð\1$sn6…éS§&ˆww¿}W¡ÞQaΞÙTat>W÷Ø‚ú•SãL‘Y|CÛœ<[Oµµâ蕇W WÌž’(ËhðmÌm²§U&kC¬ù1Īºz[÷H“seø†¸ÂÄàÈü™¥óëS¨íãRâ‚„X§7³çȇ0޾8ª¼ö]&A2ÙC‡·™ï‡ùŸð4 ‰·‡‰èÉ0g\Äùø~ä°|4‰¢sl‰ Í&(ÂM°ÿ@òÚ O÷¤1βnH¥k•{‘kœÑäÞ|ãõ®ol®“9ð@ÑÛªæäU,mLåwB ô¹©ÅËZÓœÏÕ>8oý^®¬caQõæY™b™s ¥mmMy_©)H& ÖUuÌNßøR²ºyþ*¤#©âè7^^¦J¬¶|ë/î˜÷̦*Hé VÁá×—šÉq™ÌXºUUYeõubl°Ÿ¥Ò'+ow]‚!T 1Xì‘ÞvUzR<ãGà9 Q[è#hŸÃƘMyÀæ5Í9ñ@«9ó÷á%Ö/**! i°)$µ)WJØŸ‘kD¤Ö¼¾ù΋8( =tDaõšòÑMˆ_ޱ«üíôÏá,)Fg65¨wF_ lŒþ%ž¨^“ç$vÜ15áÑLî?½¬ìŽJ…·N&3üÔî\‘Ä6}]uÄý¯ýЂ<\OWÕ<îÂ/ îSy]7äŽ{HssSõŒï¢­•<çô2cqQiŒb"Í)s,˜æbögÝÎ1êMb1A‹Ñ`‰“DŒê!L!-à;GnÀ‹u£›¼+N6¨Ïèãh_å¦ÔODê$Äq `HCbCS¡q¤L‚ü&?ÑÚàBÛËG~£´qéBÛu˜·ðͤ‹|¸ì©¡Æu =6Í|¸¸gY"s¦õ¥ÏÚÞêÜL¬Ýyo›S=©qø_€øÔݦÄLŽô¤l—_:|åãf±ó†Ôql†tA ï¢ÏN®'øœoË|±Í8Ð]B’àØÐãåÝl‡HóôýýqPŠ èSgGsq>íC+ `³YÜz±Þœ*½Ï!a„~·ŒFË>x”>ªùJôqú´«]¯.Ñ|†}PÓéS¼ptjèC…{H€½'#³íÚ=1¥=9q!¡-)aErÙQ±ø(_.,ùK3×Í‘¦5õfAº˜Xy×®{ênÙY©ô,_ød!´Pÿm°#o7xk˜rl; x>„« ÍYìÞ»_)hÏgl‘ NŠeºH¡“JuŒ"R'6™B"µÒƒ"R+==‘šRDh¥Rm|ý2’q6¤ÂO×¾˜Œ@M³0MûÆirÁ‹0ü€­pœáúùçœËÑd èdò%òq‰®”ÔPù¤rçåû<‡*þòŽd冒H8&a–§]ú3HÿméTþø4àºL¬_\•µiP¬ 2útë˃) 6…Œ(ïaÃ&Ÿþ§!‡%àe`O>F.QŒ…k¦š\Àq,÷„?:Kâ¹×RëdÇ3àä=¥‹*ì¯0SzØðnL¦ûy\®¹"ºH†ÎkØà'žÏüáeðJy<=3ASæª×ª¼ç9½3¬'0{cJ»sá G¶´åôý.Œ&¡ÀWÆU7Gø þ^=Ö»©·>àÆúÀŒ,Éõ÷üÊé^™6J¡à…B¥•tñ¨a¿¾U†L®@\Ÿ¡ ¹.¹xó†oöQqn½ÂÐ ~Á9½퀳¼æÛŸW7Ð5·3ß„“¨ æ¶æá¤ji|„sÉü’ø[òÛ0—nc÷à5<ù†º}ü@ÂöÉTzdN{nD’–&ƒ˜PuCÝ$ŠüúÅô¶¢Ø qŠˆ$é¶rÔeB\°ž­¼my€ßíˆAdn{N¤UÇ'ƒÚ·<øH_7[íÊš•’X°ôtïÀ™,âÅ’—–#”ãM ù3¥¿\ðNIZ¦B<Ç®òð™R$¯aüTIà¿(ǺÎ×}Äëøê¸ŠÙÜa;ÛÚ¸¦)>¾iMcÓÚÆøøÆµÔa|¢>òŠ5þµ³î]½àxïÞŸ[Ô¥Ý++ª7ö8‚dÎù辤·,^,ް;Gýù\ŒNÉíÛqŸ{(Žl-ž[ïuâ¯Âsé {‚üÚ î\ô:1cÙs'x€Š(˜25Ñs J?5ôÜÛ®“P$x±äÄ*\G´UžÓÙ 6Œ. q÷uûögŸÐŸoÅçpÍÙŸÑÓãÓU}Ûl'†U äÜ™/ìëH[à³gÆçäÙ‘a#«3v{=wÃÞ¸ƒçþCJJ›*ÝgÏÄG×.ª¶ŽºÚš…!.@< û Eó˜t_èÀ:HàŠìÓåÔvgGåê„´L’—Ý^n×öØA\Íë.6ŠeÅ ™qJ‰s-yYÆp±ƒ𣹓ÆzÅ‘{E߯ åbCÖ´ÜŒöCJm__mJRûÆiƒ÷äji©X›ßYRÐS¨O©ëè­KIj^]Ó¹+›…D…D¥—™’ó££ÍÙÓr‹5¦”d„ÂèØhcnƒ5­8):ڔݔoŸU•˜žÁãb1Ží·;˜n{KÓªºIc§oͼ=²ò4¦‘S†5,£‹ÒnHï™±o( ú$U­IM>"¤És¯ùÑÕ¼¹#)¹¼±bkèKËŸ^žéÈŽI‰ÔÇç6å”.i´zF)¯1¹¤!Ý”Z]Z‘Ÿ]Ú”UY`iÛÖ5B~>ûäÂÎ&¶A ¬ú;O·øöŽ B(B£KmÌÑ#/*æüš!´b"‹ª¯ÿœs€ð5”×Ýt!šq4ò´Œèz‰c_ <={ñ±ì‘IÜ+ò$âX è›è7A»ÀûúŽõXæäî°nŸ‘¸â`´Ùì~h{„Æ)Ç|0AwF&8{LjóozáófÛïãÉã<h äm· 3o”&9§%:;ŠDê/m p³Çñ  õ¤=GÕazÙàF6¿1ÚÀÔø:䣼‚iÉ5]:sj¨Òù2&Ï,à È0HßœE{ŲG!}AVâ„@†|†P9DÞØg§š¦'í³wëòoP>bñ= ›ù·çÏn0'nÈu—<`…æÄ·)&Ÿ.DØ 6%þqÉ4Ù6„€Ã[°Êq²}& &¸)Ú9êz¡ÿPˆ»¾?E9A¤º“„œŒÇnNðŸÆsbÙ™7Ÿ(ç _á7_öy¼h•ówç‹ó³Í¡pîe'ÿ6ÜïSéF“‹ã—ÿãd‡_e¾xÒ9v«RÂ/éqþþG ÄÎ5ö€wꇇ óÊí¨Œ‡¢€zl=2äCÈ‘„N´Ý]Ú’=Ày]ÄZçfäi_b^úÕÛKõ¹_h ЮÍ'™2àÑ6§…IHßÞz]?̲Y¾r~*ܽW!> ¯X}­oØÈWÙéÓØéÕ2% *å"‘TÑŒ„Ûo‘‹F¿N´XIF,Eí¤A}ö:ùO´ÎŠð•hÐñèBnSå [ÏÈiîväßÒ¯³O—3|öZHÒAì—|e)ûtìð:?ˆäK„ H 1k@(ДHऔ¸ '”GFŽþYáÂŹ“ÁZíè?u M‚#>äzH_xàX}w(= ×w¢`ýK”˜ýP —´€¹ì™N™Œ1Ô _&ne`„à%ÃÎ…œp‚Ï$B¶S¨;E6óúoÀ¾´ˆx'Ñÿîù†ó5¾JYúÛ9U¹:•/¢h1TÓJ1yùÚ2%¸‹| tM¥PDÉI>â.j»Ò‘ Û–â}Wð>ŠÝ'Seλ‰>g¼œXì3Õ¢aïK™[2{ ZD…¤Úóbô¹Iá 3 –VZrâÂÄ Šê¤Ë2ÃÌ!bYˆA S¬Ã³Ø­Bq©.&XªŒÍ2Ç!S¸fª6D+¤!nËÉâ<}êÕ¨@{œ/ç8X2Xe6W –”ÌC¯óJáÿæÎKL˜º°¬láÔ÷«óÃ;—,Þ¸qñ’;9¾½Bmßчñ}iO$ˆ ãð¤?Ó‡eʯANgÍçEñIW,“W „ŸÑŒt+Å›x¯ù/% ñ½$Ÿ€­Ü‹òD|÷ø!®‡ÐõlÐHm6µó_…ã¤úçr‘@ª¤y´~J.sšsebÄõ»ú)qˆDþ€šÌæUä×]+'A$ZˆUl†QŸa M°‚<µ)Ã`ȶÛhž#9¦ )ToÖgY]o¸vWóeÄ“ü—9iÕxÃ3OËò`S¨,GmÎŒ5çe‡ëbÃd O—c.ÎËä×+#ÍcAJ¸1ÞˆÞ R"à[n/‘zšØG·â¯›Fx¥MˆâêñâºIÄš›ü4â ÿw·Ö_jÝÝjŒ °BRô(H„§"B#•jз¸?…û³÷xwPŸ'èX´wŽR|ÜAè§c‹0o÷ók q°ÕýÝ~ð+àoÍÅß öw@9Ò¸¬›û¢ °·¸3+´Œ‰/JI)ŠcÊB²:©½æÒŽ´Øâ´ˆˆ´âØ´ŽR37~Ûøâ €Ä2 >¯fø`Ïæ5ªÄòôôòDuõ|ÁÚ ÖòÔÐÐÔrëº;ÐØ|B-%ré œ®PyÙ_ˆG¼drêâ“°ä|cLArXXrAŒ1?9,»À–VX˜f+ µ1¹–KnLLNbppbN ‚Ã?Ôþçôw„C~óö?³æ ùÖ°0k¾Ák ³»Ûç4ä$'äŒÙè5ÛèÕþ?¨m¤ë •·¶Ð{Þçë¥$á¨7@Ó?¡ I~Vªõ¨ô/#‡í¼Îç“ù%lGéÛßÕÌ?ßF¿=!އ¿RjÓ_”… ”×EÂ/ùuþÃé.õ5øœ¾cã,ò¹ÿ1=%ЙÈâg‚ÏøŸlã3ø[†á~Å0°-þ.&J'“é¢W[\{Œ¨æIÐy=éæ ìŸd³')³+à¾Í$o߱爨½ ¢}†tËŠ0¸Ã€Â­™©ã–ÔU.´Ç—‘ÈHÇm&Œs»\;‚ô"í£‹ø2éÅä~¨c2§ÚeAÎ=ÐE­†üÞÏ­Œ*x¡³.‚ðÞ»‰Ç£¾‡óKˆ8éÐsº^u5>ÍvÛ>ò߬I(äBb-_x=IJ$Ìn¢V«¬¡¡VõõŠhEtD”žNàæÙˆ×E¸æç ¼ hÓÙžOzïQë48ч®qîC®T<È"»¨fXªçÔpq×½o/\øö½]ÅçzX’QƒÞЬðž‡×WÌîß##ÿ~n~Åú‡{³BÛÒêUêÅþaåÝÝ_]½ÿ¯w¯üË!H‘NÓa«žøpãýן>ýñë÷oüð‰Ua4p<Íž vA{&Á…#”3wùJƒוUl £wÔ®xsBMn]üÔܾMFŠLQëz¢ g>”ÛšTl.4K5Á¶©= j+,m1ìóZ5âÜ?eQ]üÂ>âu§ØZlQþÇ®Q]äïQ P[\7çr ðq¬tèƒ8aÉC·€ÒaDJXOuTæü„ˆZ[Mz¨’`kÈ”Êv+® -ÉM$ÿ©V“Ÿg£rú»Z-ûÏwGš ú†­ýí ø;Æžþ}§H\ÜþÈúÈ, áîÍœ»ÚáØK¹84Ft_—ôQA&D|²þ©²½Ze‘ËC *³q¾f­ošž™9½©ÞJä^û¥û;µ>DfG}t]!F¡î1£>ÕsrùôÚVÚГY¶n†#¹bZ•ñ‰¤Ši•© ]¶â™Å&\ð”:’°4³.¡nñ”š¡¦G¦áÌ£ …Ù¹5³J3šs¢Œ£äú /‚€w× 2x ½r:Úqä“>ž«®CÁ^šŠ¢ôa”\›œb¶$kâ2 _QBšd¶›è4³1-'%¾¹±&Ç ‡m¶’¯ó ¼³‘h?ðaCF@ø~Èï ˜ÏÁùdÄa„O8ÀµÉáË„]¾Öú±¶ K‡ÁßÂ6á˜y“Àg[ƒÿÁóq=ùˆ¦ßEv˜* ¹:-_ #´:­à‚1„ÉI t´A  U„ïäíØ~GŠ’¯ æÏM sqéÉÏ€‘þlõ Ik0 ¶KXa»Þ£¸¿GÔý,‘ Ÿ5ºž…¿ ú ²ØwA4ïÂÅ‰ÒØMŽ|Âd69>5„0¹©aB@GùºEÈ%T`Ûð÷„þÞ€NâqËRÜî"Þp/¹Ý-ŽÕÊiWAêH¾º?ÛT‘W®DF¨ž“‰d¿ÐËå`¬–Ëõìû¡@ãÒY[ÀcÜب2PŽ4SáßÊDAÒç`K„LÊÞ LPbasˆfØ?q þÞÀ{ ãõo ó¶K<Šáç1ü<‚ÃþjÆ#ÑíNhlë°9=ÓDŽь™’›ã­jý7d”ñ~ÅÞÔô¿°]dcŒ¿ª®¦ÿõý“ÜÜ}‹=tø;÷-Šƒx«º=3úûËÑÿåù÷¥BçfHËÕÄ›åÄ™‰ÒOÄ^ƒÒŸÛNÄÄj· ¶¼Ë&«pˆ(òmbŽû» ¤jô_Ä.^Eß;þÝš¹öµû»Lök²f̆¾£¡ËHÖŒ~ùvi)·öFTr\‹×o?‹D%ÕææÍ¯OJªŸŸ—;XŸDL˛ߔÔÀ}‚ßr´í_‚Vâ9Ô†¿Õ´K£Ö˜ÂŠp“F¦ ê=ŸbÃаXøû!öÁEú;|v®V‰!ÜšäÐEŸxeÕÊ ‘0¡v°("¢h°6A( Z¹ŠXþ!;ûeFC‡.ÿàý¿ðþËCi ó2;áTÛ<ĵ Ò |k’»Q)0'G†nô#pÌÝèÃÃu7 ŽÁ6Í~OáXK¹8 -ŠBÕ£… „ýFư¯€|FF?öÃ.…4†ÿŸ)r¹xUì÷`ÄõŠŸ¸M-=a_UÈ~ó™ä±ßçùKáï0?a?þ¦ÙÈÜrÍ%ÉÃp êÝmlaåmwB#ZÇ V¯[ÊmJ­2$Þao#lå½Há(¬0©´*m|–јߨLƒÅ…ˆ~,2³>5kþà =6?)Š/ˆJʵΟ™ZŸ)'Xjò2zG¼6O_3ój- b Æ¿Ò<: Àô{á麬Œ>Z·Ôrâ 0ꋚþ–È!¦ùã„Êœ?—#HÀ真ˆ ŒÿAòNâEú(~ q1 † ‡9àèDÏ‘’’O¸ZA¼Q6+äT£ q6C¼ò’\-Ò›N Åù‹!/ÊÂTêPÙK¡•L(„` ‡¾(e”!r*Þ:$b  eþø&”rF´ qH($)¡hÈ:¤fÔð+e¸ŠQ‰Xˆ‚R âæß)öùýŽ'cT|´†3øÚD„Ì@Ç._$0Ä T®“²›uººsR]x˜NÚx®NL®e'ô+‰ÿ_G3ᄜ—¥R6‚”áL´WV¨Û±ÌC¶p]A‚±@îRO ‚uÁ\o¨kÔ›¬ˆ»é¥h& vÌFH¥à2ì8Œ‰&^ € Ö»ËÆÖ‘Óp>¡.œOˆÒå;†þgÎ|—SᾜáuaþWæ’¶ž^ky~L^Cû KGQ!‹YM¹™…Æ´ÆiMM%Éí›ïÉÕѱԔ?½4¿§ØÞ²`ñ‚–ôΰ$½Š;gÎ!þ_dOL°„DwçY§8¬ñf©|/ ‰ ‰²O1'•:’RSfä7®1šz˼OÖ ¹ É¶šœ””ô²ö¼Ú5±J‹-/6cfu¢Ýî^OŠ©÷]yÈÜ™ðܾ—žzÿ£öùæþ3Kóò–žé'~OÛ¯Ié«×¤äªÌYwÕÖoœŽ®aò'Þ$ßçr´¸Â¼8§Ã•öôýR1C³Ÿ‹d4-²ÿ¢Ù°2‡xSDˆ”б‹V:ÿW('^ÄøìçÉ1â‹ï˲ؼ]]tn[¦mÞà,‹)'Aj£s[3Óç Ì"a)ÅfG×”8sŠÙë-nÿ(y–÷/œ«‰§bpfS2`þ%"dŒLAˆèö¯G•Š9bðšŸ'ä8·Ú[xÍ °ÜÌû†n†r£@7?½óƒˆ€ÿ.¨;ÂçI4y%r;ÆhÃñê‹âyUq9]K3Ë×ÍŸËÚÉîv¥Nþvâüõ{§Vï[ZžÞ±¶¢bŠ­kSCÉÊv;‘Äд>·-'wÖó´6ð²5]HH$¢8Ÿø8¬®g(§vI­):kjxÕ/”ý*½µ0&m殎ŠM ZµLYÑ––Tן)&IJ°ÒÜøØ’éŽÆC-leűJ©¼ëõNHãÊ1%_NÛà¸ÎðŠsáè,|ø£Áð¥–B.V†6 ð)ß ‘îyãËDåÛ ûû³Ôq9q1µåùšš`"&Ϙ5PŸ<óÁ¿­ÛöŸ,~ûü§O¢¦å‹Ø«g`?þýêåoű-ß]\œ={km[;‘¹«1³1#¬hݳËì3bÉZ rùþ긲´ˆà‚yMüéK 4¹ Ï:ðÁ¾Š¹/±?Ü÷{õ™¥EB¾8¶ª~ðW öÅAä¥M|{rÁÓwVÌhëj2—N·7?º­EÉàq_1öo~ôƒ-¼rÌŒ(ŸÄxlý³d@CˆB¬¿£Üã$Q󃆜ËzN,-,Yq¶7½µª(j~‘>§³´iëL»½ûÎG}¡}Ԝ޵ùÄó}½/œØÜ•ŽÞ¿ÐÛ÷L/ µÏÚÙÚ±s¦MbI[âzØqõ™äÖU±ã곃ƒÏ^åñ(íB^$/ÝÃxË œtæ®yÏ…wzшsPxñE­¦U+æ­8uê轆5J1U,ã³W„ÁáCÍ@âK "o\‰wgvNŸÆç–.ïÛ „³{ùA [,eW‘Ã×ñ¡b¢‹^0‘^®m(ë=d‰sv™°å‹t¡1þã q"HR犴R8õ€?½^þ+ê¥|ú}!#f„§ ²‚D‚¿P4t°©?Т z$Ç9JP9ΟƒŸ&²› °ÆÂ–”= ” 2!˜/¤>YD–oÒ÷p1“÷ }æ±(¹a¨ `aCrr‚¡†äŠ˜¼i))ÓòcbòÑk^ 5”¿YØ òóç£Çæç[S0 õy?ïù9U ûDŠ.=׊ÀŒX†Ãìй(êðBÈè€ÝRÙ›™Ù[e±T¡×J ±ñˆu¨ õXêјø¸˜£©GÑç!ë‘Ô#±ññ1G‰û3gUZ,•³Ü?cc¸ÓŽZ Â7ðAã‘4WC®¼ºª±+T/}i[•Ûvp%¦¡ðÒ€m =:àÑqoq 5ž+Äl´Ûð†€QuFѱý¹¡Âù3b#i‘P¨5¤–¶gw\R%':ä çË$Ÿ‘³QҊŇ:ò»+l±!¢ alÌ´Ù+Ë??2Mvê>Èãd÷æKûêt‰E#-ÐDÆFj V?½¥Þ»¾ž|ûhÊB»OI{–»®æ{9 ö.(H¨ž“{ïýSVŸ¹êä@¶Ò¤0 i&ÚV;¿ªc¤ÓßsßÒé;»RÒ¦-Ê+YÒš-ækÕTÂÈÚ±¥­lá´,yÄsÛZÎÏ+¸³P!à«%¶ü¢ CE‹’ËkÝü¢ê¡ ƒ‹(ñ—ô6¢)àŒFy á¤åò²¸A!®@ñ¼—|V*ûç4g¯\AŠˆ×›ÁÇħLœfôíthU̇¤âdò±Ñvì§Œ}Cþδïï3ÂË5Ä'Õf§$@FÉŠŒ…-ΪÝr¾/{ÞŒ©‘"•4µvî”ÆÝ9ÖÎ]Y*ò†ˆua,leä‹ï¾è©µÅcR°YiïÛ?£fC§M©VKãÕaX¬ƒôY\çûh]æ¯ >™¡,BÚ¹R¦-&vK•£ï Ãy "ºv]¡¦y¡…8Í•MP^¯à¹’Æ›p;†ÊÄ×Úåd²HnÞÒ™’Ö±¦rêÆéÉ­k€02]“}¶«uÿüÜÒ•ÌêºoqÎZÏî’Z‰3Ù]ùµ€´ŽuÕuëZ“¦[ªcŠÊ³æœÞylIQáðé>G_ß`®óC·îoÈ™ˆßà½`8lŒC€¦‘Ž˜‰ÝrœÚ¤¢µ¿ËNMM%Âc× i~ÔóÎ^ø»Nø;;ñ;³›ÆMŸÏôás³Ç3lr@ÌšøÂ/ÂSc5:szx~Œ–+L²µ3ë²ô! ™‘¿U™ÕÅÁ ÁB…à_á™Y¹ÑÆtS–%üž9¡©•)qe9‘—höß:ö=aƒýgß½ñÚ@pOÒkº€qÜÌC²5–šÌèòÜʚآæ”܇A) ¡IyL~Z|^¼FTeïlÓšmáµO!í™’\b)nU2ÍÅ)µŽÈH[IŒ”¦¤Â¸D}XBzˆ>=VJÀ¥9M¯(sH…ܧtøO ñ&œjÈ)÷ܰѮ7à™Žûx_¹ ½ ¥ø §äÁTi¸Œ]£xõUE4ø3›ˆÆŠ}¶UÇÕ0»ÂRrÀïÅaL¢NˆRJé>F‰Çu!ï*”Ú‚s÷»ßÚ=Ê6Ô±à\x‚XÍæ€_T‚WÙ B¨nŠ*ˆâú\ÄWY†ñþ0D>:Ú VÚÍ*cˆÜa6ÛÇßAœÀˆÙ-;»=o!¿Ï‚ÉÉÙ®}~Wšï³dîèkèH¬›Pø?ïAÞïÈËÔ"î„>BUEø§²‘z ñè8Á.¯XξšØðÁpù2 #þ®b¯yÕ×U@Ìþ§ ñëà ù yœ‚î^¢¹íйS:ºnãDâkYÀ¤44Õ¢«"û2m³{g&e´j4½¹ö9³º“&ý†LhBeÙ–ä˜ÄkD¦5¾²¯q°$ }‡è<Dä½”ë.à·ìœ"kSÕ&=/—8•ÄW9ñ‚PB‹öF\6ƃ;‹Ä ãô%lç=ØÚ,×n±è“BH¾çTJdDnÌùÇPâéxv}‘˜€Ü–iŠ Ÿ†xõpüï„c#Ggçpô5㢉Ǜ|Ô-žG*‰c•ìØW ±s9é ^=ù,5þ^Ãý^5¢7ˆ¯œÊG“Æ¢'LŒQ%e`[»û ô# ÀÊ8°<_¬¢6/Á6æÝ¦›:t yÜyàjýI¡NÔ[4&ÆùžRÝÇn-€~)P(D´DäB¥8Éh"?óˆÆ{~<ò“F§†ÌMÀ\ û\íò÷Å8K©{×Ój”Œ;ơ㻻†8—5059yê@–£³®$*ª´®Óá)Š*©ëQÙ-™Ž–ì(Sa“Õ:­Ðd*œfµ6š Ü‘ áD˜¥~IyùÒzK\IkrrkIœ¥~iyù’ñÏœŒŽ}K‰è#¾â ¹FI·aHømá!óD‚~ظkÑTu¥bvÙ¬£ó²â*çcJJÊãZ·u§•ïþýΆ»zì•›_Xlïm)Õ’‡v§VgÆ Ätaù]/¯´õÖ¥TÞñx_õòF å 6jÅ[±þUöüw;2æÜÝ «%,«%'"ÛnU¼÷?š` ˆ[?$ìò ¿{®{t2àºGGrw-ù ó8•$ÆÊ)ú"€kŒ¡¹=åIAw!X™¾(X@J4K¢>íÝÕ‘ ’™E|*º|qÃu“ïg®o(½Ô»üç¸x^#¢wm'qÒÔ»ÎKSÙÿ|¦Ðl¹’ü­PDòƒÀ?iFÂî‡Jâ¼J)r~ªµhcÍô§îXÞ)ì jÌUg¼’ž»4ËùĬKÔ ´ý‡yüƒè¯ï÷Ã篸â8‘.ábEHMê©+׿AÅ(ÅõN1qÒÙ#¡ í£_†Ž~Ñ£”‘©d’TÃÉÁÈØ Пñ¤h…òÔ$ÃÄ@úDÐ9ÍþT% %"öEP$¥E*p’í+fûÀ½ÅÄYâžx“Ú¬rö;çkÔ qÄÁÑŸ‘…d!¾ÿ¾œ]F AK p‡Œ»2ñ3¾WöFÀL“ø2sºÌ,²Y»ûwF<èFG®×æÕ䛸âÀ~o²Ë¶öTåàŠé5àaqþT²Ïõò qNs¾×Ì^Èa 2r(-®›=vª êÒ"—¯ŽÜ߯A@©ŒøT3ÀQ$™¬acÀ_•R¶¤"Å£«têQ»±05â7Þ`¤à-‡áÖ|­Y"QY ЖÃÔ8ðWâ_F‰™WÛV%zm"ü%m:\àîQ—@œ»ñ^¨ï1 é z´½‹ ÌaÇ s¥›?tîè±ÓÎ_þs¯>GFl§KV=>ôÜÌãÇAðWÿn#Ö¥4¯¨€b°þþ“ì–ŽƒÙÎwÈŸgƒ¿ü%'Ãäôž< Òú‰k («ÁÛ ýN>ŸJ™X;•›óu¾¸¥1%-Ûg¦Ûfl¨kÛÑ“–Ú±±)*]“sª§ó育5õ÷ž.œÖ62†iÝ›j6u¦æÍ\[0uËÌ R_aÌ+Ë]p¬§çÔòâ•/hzw#!Ô!Þ¬„ø(\{ S`ä¿‹ê›~Ùá#G”B]—^:wCn¥éÝ[¾ˆ­[ÑØ°¢.6¡a¸ºzyCø]éâÆ¤ÒUÏ™óðªÒ¤†Å¥ù=E†¬¹‡»»ÏË2÷äZ›–•ÏX[UµvFù²&+y%knSjjÓܬ¼ÁºÄĺAv•<·cu]ÇÞ9sövÔ­îÈ•+ó[sQòõÌY›«s›QΚ×7ö5ým)ÐÓo›‹é¿câÊðåâ&Ï¥4ÎÏÍ_€Â=¸Méc^ƒÕZŸk4æÖ[­ yFb6Ÿ‘•Œ ¥ÚRê½”Ÿ§“jÐYkzš{¥l­wµY­mwµ¶mAy¶´Õ•ëõåCµµ ¦èõS€= k;” ~Âs Ñs ÑsÑÑS¸ô{‘W¹OÚõsØàØ1¤'n *œç еŸ"öØÍn«ž=A˜ÆƒæœïQÿ‹|µúcc¼éäf"ƒß %ç 7ã3Å6r-‘ŽaŸz`[às&~#„]†bÆÁúá 4Ñ;xFÞ?ˆZ vyì#â"|æ3ÂÉãb¹ éI'»ž1¸žù?ó qÍõÌløÌz/~f?ó)OƵCâ2þNö ñ¤ÇõÑ[„*6ßCvçð ï1?xûRʧU¥‘(°/¹¼¹‚ ìë)6 eBöoàÛ‡(°/Dͼڬ´tãéG,yv{YW‰+°OJMõ ãAmåS8â#ŸóQ|êMFcÿ_Ë0ýÿNÌkoò=”h&‘;ðï?@žKvPŽ”cPs9RPb3q|­³ÓïÑ/ðt¼T”sÈh¼³/@ .•-1ºîžéÜ—ÑÌö¸ÚÈ|š ÄFš1ðÄÚæM=E&FkN’ät•ÄLÙþÛmàgçlvQ4£N[’ß³¡.ºoe Îm‡OÆ)¸'»‹ñ“ôãÙÏþõÛ¯ÿÚܼ¢£Ô q,ÛñØ¢£ÐÛ ûw‹3}Ä”È?ªµI›[@bÒ{’oG¯we=ûþ·_½×Ò²ª£Ì É\¶ó±¡£ì›ì§ÿnñä4<…<ŸÉòÀN’ÐÐ;-ì$¹ ½“(LdèÎcx Ù ô$™ o#ÿ#ýY ô„· òŽ€© o!7$Ò%?¡ž&à»/æÝEqE÷·NÈLªõÏLzÓ|£ÿ­{â?Î8!ÛÞ%à†)¤'ŠÃÍJ¯ìÒ®|ËoáÍ<®ÅU._*»ç"tÃï1ÜÁ®Áùù (;#‚;9øØ7tn‡ƒï"‡yãuG%Ôo m¾‘ýSâ×Ä4ÿ51å5üËž±8ŽA(ÉÕ%†4¡ºÄñ¼I+Cî{qƒBÅì ~HOR¹˜úØËÿÇuŒ¹ùO±|Š]óù«€ðíîªh„¼#ûÙ»ò±>DŸJÈDŸð—F¦Ÿò°›ôá;~]øñ|¼ þ‹~lvÓ‡ç½Ø5ï¯zÁ·zàû½áxÞ‹]óÞûùËø1ÞåæI¤!eí¸JÊ*?6ÞVeåSþºñ6*-“›}ÇÁ5_r0Í2Žfp~Ži–¹æÑScc×T;Ú ?úÝãIkÜãé]ÁÙìO¬»¢óq?B|+<ûéö±÷`«K°¬r1|[Ç–p÷yÐ9•|;‚óˆ±oá?áY¨’³ÿúƒ*;2ŸnQ¿W`;f¼¦píï[áÿFù ±lqð»98²» Ëßï‚#¤$˜Ïü  ŽòVNÇ|æàÇÆ–c>¿;ö õ9^³9¸ŠXˆù ×ìé®PtCn²JÑþ´NZ9Ú¡ \IÚ}—]ãª)­æd€÷1¦å$²3ðXh\cá²? ãmçsÏ+(3)âZ7—áyÅÁp䌼€•ý–š –û©‰À—Á¶ëî›ÿ’§=`7PÌ!.¹vÍ“+ôLs'—`s@øvÇ5œÁö´ãÖ"Yüµî-T`fÿæ¯%n¥$3ù'¯vÓ€e?ÌÅ‹mã´aÙçàwûÀ·zàû½áXöÃ\:Æ®ôÀ¸à¸/æÞµ.ůœÁµyÓy7¯Î«ò]Jn¡Xï!ÿ5ýfÅ{©ÇÉœ'±¬è]òþ9†ã¿˜.ƒ‹®§ðœæà /WË»­ê¿þºõGVÎó–QØé»°ühò›Û?–$¿ü$BŸ¡EãõØÏðÚÇÝ8>³ep`ìÿèÆ# üíwÑ`öŠ¿râªƒí¾’û‹€r4ˆå(Æ%GÏà=öö†£Üd‚É«.ãâÃz†è Ÿä’|BFHøªü&ˆTëµw ¸>1–[“KÇcyö‡oGpÈÎp–vàŠÆþ:-@…c°ÆO,'-yL_ôÕ[¸¬·L.½U>Ž/Ö[&—Þò†oõÀ÷{ñÞ2¹ô–7<Ý? *¼àJüˆëy\§ó)žãѾÁ!ÿ88[ɸåÊÆ±~ ½ÝJÇÎ#̱[)}|‡/ï1î˜ÇMw£]jÇ5‹1­—î{ÓÊÁøà \ÍØŸ²Iª³ò_ò–;ö·epÿ_‹ËGx ã‹k"c|]øþãËÁhýFòü«%û{ÀþÅ“?õŸù>Å”ý€l&ºüŽKØÁ5—±âjÖÔpëŽ?|[ wÈÁGpæ*ÞíVifèöë6å2Ýn)gÐæ“ÉÈCï ‡ÞÝ>|hõÀ÷úÀc=ð}>ðü€ü‚~ÈÇu1ŸS1·ñŽðܹ²`;‰hßäÖ*Oàí­×¶NàèÍ«C'Õ'”7Îy·Š³ʨÛ@Ù?·ÔÍ1&¾Ù§Æù¿‡ÿGÿ]ð§ñ¸80|d87^¤ >‚kÓón½:íÄA»õzµ &ŒÚ-V°%Vú%òòÅ¿è6ðŸ0€·¾ÿÞ"öä÷0zƤÕ3&{}Æ*ÖßçñÀøÀï÷Àݲ€öÔ3p­1®Nã6ð O ²{¼jm£Ìžç/a}ßì²e)®Ö ŽY@µV–OŒ§þ¿V[qL¦ÿ~ý•‚‰Ê÷ÿÏŠ,ïyK*ä­›‡”ýé×ÅÜ<æà÷{àG}àû=ðû]ð-ì2Ãgà1êŸb8:#*Çó»››÷c ¾Â_ñ‚oCp1ö„ŠÏF5œÍ;q ä= c¢‚Å).Ïù­Cc—a+ÿÀë×ÏnWÿÐ/aÇp æàcï@ø,ãÜóû\ð+þæ?àÂÕ¹Œe¥™xb}¿³B€˜ ¥FÎù©HÃèó º#Þ,Îç™ÍÁ—Á1h¾¸Î’·q6ð.v ŽÿqÃÕqí Zx,ú]:˜;#áà#Ðsò®lá[%cB’‰e3ˆŒ KK Bà¢ÏjâÁkЃ×n„—‹nToà ?Î{Уo®b:]k̉€ðmîÒÑWñØáhŒ›UÐ`lݬ¦èž8´·Pfƒjòl ƒvûÐÖêïõÇzàû|à#øIxt Žk"`8·Ç¹MÅùÕ÷cø/giŸ¤ZÂDᘬ|xbÂ⨠xÕ_F6³kpÍ?$Óeîý‚£‡»£¶ÍÌsï}ˆþ\œOàv*2L4-~\†è‰"qÛU€ÐßV§kò=IèšfôÇ‘5a’ß>UìD{²ŽïU<×¹q<®âà6ßAºj@x4ï¿d°Çpý‚B/Ô ŠWÜ ¸ŽØm˜5iµŠÖžÉªÔšRéÀU)†… çOR Ñ“1ö „×ÛUx=Òzž®Ãr¼ŠÓi.¸ Â÷pp\;a‰ùƒžï?9n-;þDç¦ùòÿ4Q‚o”Aÿmßá¸ë,ŽVµ­?ÇcÊѺϋ֧±îãhÝhu=ë2îùãíð‡0/×q¼¤îòÀ×`ž­ãxé‚Ãöùœ®XÇñ’Úèæ¥ ¶_Ì›ö΂>‘Íÿ‡¼è¼•¸i¦ô@ãÂÿ7Žoêq¾ bñ¸p|Û7Î7A0Žo»ß\ÏSx\¸ç¸žçbZ{bZF\1-þðmùÏÁ‘ýÏ›,“»y¢øNÌíÐZœ˜í=€ÅáÐêÁm¯αø>øˆ~À~Á?ä²eüi¿Çõ<ÎÍŽáwr¼‹·N/rð¸çÝ^6÷‰êÈïÎn Èá[Lù~ÎÏ'áhiõи×E;ÎÕŽißä’nžrðžÙ©“dq '7Èë>€œÀ™ÞýqçpiõทÇ͉bø! ã¾`{¤˜³GpŽxœK4’ç—%~‚õä›6žX2aÅõI$ÎøÛJ\_H/næ|ºB´ßCPºsÙò½SÙžs¥²•28“­UæÊd›šçJdƒÙâ{Äô«?:í«j†%'ä±¥5˜õ_Ìcû:{‚zê6óØ>õßÈc Ç ŽÁ߸<Ÿc¿¦5ûýÞÿ3X›×<4e_<õÐÑè¨yÑé?ÚþÍþtë4 xœc`d``ßö/Œ3äßÙꯀ"È€ñ"ž&xœ…Ô}h•Uðïy{î¶ò¯é¬fÎ,\´y¯·²a¡¶u§rI|ml´M°Qî¥$•9ï–v ¼x­|9 b‚[蚬—éÁ2p(QaŽ1êíö=gϳníÁ‡ßï<;Ïsï¹Ï9_-P þÉBÀ¢À $m£”¢Ïè#¤_é.]åýŠ’äx„µŒª¸ßæz/c§Nã¨>…¸åý›P%Îc·Áañ Râ&’æ’â ¶‹4Zù>·Ë“hØ!ÏÓl4ÈǰS AÍf­AL à[q}¶—ƒØÃ}{Ñ+Â^µ‹t‡(ƒHJ-´\Æq*ä Ä©’¿ç~ÎÿÂR×±•ï1ƾ‹>íHóóÓ¦›½÷ÑâíC³e¾Ç&õ"8çåS;§Å´žVQ9õÑ{´…ž¶Ï¤i—<„m 5T*fŽ}ÇëïøÏ9èßs‚NÒU:K¿û×¯Ñ û?¯•ïeËÍHèwQ§Æð°þ/™¥XfV»µ´ËD¦ÉÿNËèU»çX;¨Å{Õ¢sa#­µßËÇÛ&…zó5jT rn¡Z¥'/p¯Îçsü3Àw{ξkwŽªÇÏ‚=–žÅsÎ=góÆeÍt‚ì™*ƒ¦àr)sGæf~bFf¦û4ÄqºÍfÖ¿Ùõ¹Í­ ³&g×T\†‚,ËÎ4ßD®MÃæ]6—{æ_ ÈÁ)ù¹8‘ùÿ£ÔϳšñÌú£.³ò™Y{™Yt›cÍú#Ý·9fó‹ûh+%'²+;æa³L^ÆÖ¶ Ó.Ϙk6Ç?Ó&m®9~ÆM™÷@8`s0›ËÃ@ ^±Ùhñ -ñÊy¶þâlBù?€’îxœ,½ùwçöÝ۱ɞœÁé~ÎÅ©‘@nõ¦uFïÃDIŸÇcöH¨Bxiƒ¤n-$6mØŒ=“É$“‰c³c‡EXn„<Z ÆF“m’<ÄûÌý^ßϼ?ð9uºJuëºîë®…Z@0pÿ®º8<¿úÿ |+ð”»%> ŸqÓâbwW,‡÷•XéþS\é~'V¹kâ[îsqÔ=ÇÜÄ#î‘xÆÍ¾üž­-ø,…ËàrXËa6[[ÁmЃ>Üi-wÁØ »¬†`æàœÕ¼ çá=xߪ ~ÀÇî÷§ßR»OiëÆÄwÜGâQ÷+ñºû¥xÇýIœw?Ÿ¸wO³üÓ,ÿ4Ë?ÍòO³üÓ,ù4K>H¸/Ä$L©Åg5°Ö¹ûb=s`£4|&ÐWÃWàfØ»X2 s°—µíeî€ûZ„Cp˜eFਛÇÜ7âû%¸îƒ?†ûáx‚‡!k ²¶à(ƒãð<OÀ“ð< ÏÀ³pÂ* ^‚3ürÞ„·`ÎÂÛðœ³­ËÆyxÞgîðømõÞGbÄý—X¥žðíÀ6yúíÀn-ÿí@?ç÷;îÏo«ïiyõ=£»l®ú˜1Ú‚ÿß…ïÁs¶¶àyx^ÿJiº+.vb¹›#î·â*9òWªê†ø²û‹xÞÝ'Ü—â%xY½ô¯SpÞQÿ*pWýÿ¯”­S5kªÙèÃ.[§j6æ`·­?Ø{ÅETµˆªQÕ"ªZDU‹¨jU-¢ªETµˆªQÕ"ªZDU‹¨jU-¢ªETµˆªQÕ"ªZDU‹¨jU-¢ªETUDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠHM©)"5E¤¦ˆÔ‘š"RSDjŠq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šq¡Šqáyík~/>¥}ÊóÚ/ŸqÅÅ®Z\®Ä=(ƒQ×%VÃL(qÏËAcJy|^ka{_¬gnlT ÏËAãjø Ü Ûà6wV|Í3 vÀNØÅÚ²0»ù½—vû˜Þí.Šýp›÷²þ?Ï«‡à°›Gà¨Æçç5VO‹G˜~›5œw+N¸*ñ¼ìšÅ)x ?cù«LO3}Í}x^½HkÖQŠ´ÕQб.ƒËa,‡Ølú7›òÁ6¸n…}¦p7ì‡{à^8áÜg*©÷Ãð <Ãa8Gá‡GÍ핌ïÂ÷àû¦yðWðø!<Æ_‡'àIx ž†gàYøõ±iü5Óç˜>/À‹p¼S:Œ3(|Þ„·`ÎÂÛð¼oî(ÆðKÖù|«õÔþK<âþxAeü>¿£DüY¬†1¸Mû—ï¨ïÙt¿QNù%ø?¿ì{á„Cð¨­A ß…ïÁsÌ=/À‹âbåôbTûÄŪă U¾XÙ4¦` ¬…uꥋ•M›n€Ûø¥‹é,ÌÁ^hG2‹9’Y¬,ü%°XýSíjëÔ–¶ÎØ÷À½pÂ!¸ÏÖ¦~hÜÀƒð< ‡á…cpµj¥’ñ]ø<ÆÜãð< OÁÓð < íhj1GS‹9šZÌÑÔ‹gÝIñ9ø\—ÂX „)ü(µ°Î}&Ö3·6ªÎ5WÃWàfØ»X2ëfÄÓ½¬m/stLõ¢F0ãÖ>îE`ÆQwLÓö¢úí‡âíI_Ô¸dü\g/fàc¶ô÷$ð¢Æ(ýmp…mQ°V•°ÖÂ:X_…kàZ¸®‡àF¸ î³m‘ûÆýð<ÁÃpŽÀQ8Çá1xž€'á)xžgá„©¤±Â8Öހ7á-X€³ð6¼çLIíIóð¼oÚj40>¿«=é¼ø¬»$>_€KàRXKa¹k#p…+ˆ°Ò­Wºq±ÊýPL¸Sb¦Ô£¾«>f¬…uîŒXÏÜب=ûwÕÇŒ«á+p3lƒ],™…9ØËÚö2w@càwÕÇŒCpØýF£nµ8æúÅ#®I|Ç%Å£ð¶hR{Ìï>e:Ïôí×¾«½ä7âU¦§™¾îÅÏÝNqÞúï?ˆó.&>t/.ÀÇ(ü |bsÕoUú­ôT¿5V•°ÖÂ:X_…kàZ¸®‡àF¸ n³­ÐY†Ñ‡ÛÍ©`;Ü_ƒ¯Ã7à›0ß‚;ÍÓà.Ø;a—¹¬scî3G”ã~x„‡àa8 Gà(ƒãð<OÀ“ð< ÏÀ³p¼VRŒ3h{Þ„·`ÎÂÛðœ³þ ¤çá=xßzˆ’b| †HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJˆ¤„HJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJXIù1-)a’VRºÄ•êKa%¥C´¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJ˜¤„IJXIi?e:Ïô¶È’&)a’VR~,ZRÂ$%LRÂJÊ߉Ý«â|ŒÂßÀ'ö I “”0I “”0I “”0I “”0I “”0I “”0I “”0I “”0I +)òHI1úÐ’&)a’&)a’&)a’&)a’VR´ÕJбvÂ.sYI1æ %%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$%LRÂ$eIàwO\¬V–(ŸŠq, ¬Òqïõù[¢]c\¢³ø/ÄKð²Îb–è,Þ8ëþ ÞqE»¶¿Dç8Z›œÒÚä”ч]¶6ik̉Ki})­/¥õ¥´¾”Ö—ÒúRZ_JëKi})­/¥õ¥´¾”Ö—ÒúRZ_JëKi})­/¥õ¥´¾”ÖK¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’V•´®¤•p%­„+i%\I+áJZ WÒJ¸’ö=ÕħÔç¿x>ãÚÅÅn‹˜psb¦Üm±ÖÂ:wZ¬gn´ÿ#øž\°é,ÌÁ^–?ïêÄ ·B¼/ó˼âþ,^…×Ü9ñswMœ1¿gµ_‚¥p\Ë`9ŒÀf«_Þ©uygÜÀƒð< ‡á…cpµíÒ™»ñ]ø<ÆÜãð< OÁÓð <+¾$ÿ$> «Î—”»sb®r—Å*ø²|yIŠý‡hcÑKKð²»!NÁ;: ~IÊhmRÆX —Áå° –Ãl¶v•Pµ¨„}Øeí*¡Æì¶‚=°W,eÄ(eÄ(eÄ(eÄ(eÄ(eÄ(eÄ(eÄ(eÄ(Õ(ñç@)#C)#C)#C)#C)#C)#C)#Ã2­y^ŒHáeZçÅ„úÏ2õCcÊ}%ÖÀZXçfÄzæ6À.˜UUËÔmº—%‡5F-SÞÖß–Ñß–)éúEµ©]Õfôa—µ®ÚŒ9¸ÏÖ¦eÜÀƒð< ‡á…cpƒÇá xž‚§áxÎPá xÞ‚8 oÃ;âr–¿›àjø Ü Ûà^8 ^´\£¢qŽJ«å ÿ"ŽküY®ñðËÀrHš«É8gs5Âçá=xŸ%¿€Ä2*)£’2*)£’2*)£’2*)£’2*)£’2*)£’2*)£’2*)£’2*)£’2*)£’2*)£’2*)×XwW|Jç#åÊ ñ;•ëHõSñ9ø\¬#Ïr¯ÚôRXKaÔ-«a &ܳb¦Üsb ¬…uî ±ž¹ °Ñý‹ØW»ýâ+LoÖqB¹4±émî[âk®A̸Ä]°vÂ.Ö–…9ØÍï½´ÛÇôn÷7b?Üã~"îeýî§â ‚ÃîEqŽº â˜[/Ž»´xÄ­ßv5ây×)^pï‹Ý¿‰(v ^vÄ)x…mÿÌu‹W™žfúš{GüÜý£8é(¢<ðµû¥ø˜éoŒÊ¦êÑ('¿4ÊKá2¸–Ár+Ì) +áJXka¬‡ÍV¹Ž‡mz \ ×ÁõpÜ7Á>W&î¶>ìw•â§ñ+¸—é÷¿ÄAíéʃC,³Ï<Òèñ]q¿ ‰øå 9¥Ñæ›S=lù8Êïcpu:kÒþ¨X|×ú‰öGÆŸ¸ëâû¬íWðø!<æŠÄã¬á„ûkñ$Ó§˜>Íôx~Ä_}ìöˆ¿fúµw)ñ‚Û!^ä— ë?Ê£q¿nÀ›ð,ÀYxÞsÖÇ”_ã<¼ï[S~à—Ö÷‚_Yo >dú±ªŠ(×_‰O¹1ñih¹Ž(׿Ÿƒ/@ËuD¹¶é¥°–¨ûX c0“0k`-´\G”k›n€î¤Ø-×åÚ¦7»Ÿ‹mLÛ9T„\G”ëq쀰‹µeavó{¯kû˜Þí6‰ýÐrQ®mýÚ¿G”kãvž8GÝaqÌŠã®W<¢œF”ëÿ-Z®#ÊõˆuÜ!×r!×rQ®GÄÏÜ/Ä«LO3}-ýÎ@Ëu„\G”eÕ ,Ë#eÙX —Áå° –Ã\aî(ËÆJ¸ÖÀZXë¡e9¢,Ûô¸®ƒëá¸n‚–刲,µÉr„,GÈr„,GÈrDY¶eöAËr„,G”eûŲQ–mÚ²!˲Q–í÷18-˲Q–Õ7”e£e9¢,ËkeÙøüZ–#ʲ­Á²Q–múÓ§™>ÏÂø+ËrDY¶ésÖsÈr„,G”eûeÂúŒ²lœÁ¯ð&¼ pÞ†wàœõ+eÙ8ïÁûÖg”eãø¥õ7²Q–mú±ªZ¡ä~%.v³bT=g…òhŒÁ„û½˜„)XkaŽW(6Ý·é˜a…ÒdÓY˜ƒ½p7mõ÷ṵBýÿ¾hG+vt±B}I•¨o¨uõ c?Ü÷Â8‡à>[¿ú†q?<ÂCð0†#pŽÁqxÔêWß0¾ ߃ǘ{ž€'á)xžgá9ÛFym¼/Â/m«å‚ñ¡Xþè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_þè_©}ÙŒø”;!> ŸÑhV)GŨÛ+VÃLð{¦ÜÅX ët”U)Glnl”ª•ÚCWÃWàfØ·i”¨ÔêbFGw•ÚC;`'ìbmY˜ƒÝüÞK»}LïVö+å¯qûOq/뀃p³¥#pÔ}#޹Ç⸎Ø+µ‡ú­ø¶»"žwíâ„{K¼/»}â¼âΊŸñWW™žfúšû'ñs¦gà#é_©½Ò£@¥öJjW{%ãK°.ƒËa,‡Øl[­ž9.î6/Ô3{à^wP`zé!¦÷ñW?†ûMõL›>hº©gÚôa8¬=u¥z¦MÂ18šGê™ûÅw™~¾oÊkaü~ñWÇá xž‚§áx~Ä_}쮊¿fúÕž·¥>,mÕ‡íKk%çb•Ú_˜27àMx à,¼ ïÀ9sYû ã<¼ï›ãÚ_À/Í}%å–øéǪj¥òò;ñ)«¬T^ŒÏèhd¥òbs£je¥òbŒÁ„;.&aÊk`-¬ã÷zØ·¹Å×XsÆM‹»`ì„],™…9ØÍャ³éÝî×b?ÜãˆÃJÄJõyãÛê‡+Õ·ß'Ü¿Š—àewFœ‚WÜmñ*¼æ&+Õoõ·ê·ªMýÖX —Áå° –Ãl6eÔo¥†ú­±î{á„CpŸm—ú­q?<ÂCð0†#pŽÁqxÔôT¿5¾ ߃ï›Jê·Æà‡ðuž€'á)xžgáGüÕǦ°ú­MŸ3ÍÕoÜoÄ‹LÏ Þ xÞ‚8 oÃ;ðKóH=Ðø>ÖúWé,cN|¾—À¥°–„ûLL”zò*õ=c-¬cmõÌm€ÚŠU««á+p3lƒ],™U…«Ô÷lº—µíeî€+ˆƒpÚU©U8ê.ŠcüÕwA¼¢Qq•ú˜ñsíéVi´ÕÓ´¼ŽÃµ:7V•°ÖÂ:X_…kàZ¸®‡àF¸ î³JÔÇŒûáx‚‡á0£­P3ƒÇá xž‚§áxN˜2ÁŒ3lé xÞ‚8 oÃ;pŽ5Ü…óð¼ozj3>«4R]ŸÒ˜Y¥‘ÊøŒ{E|V~U©ÿ_€‹]‡¸„饰–¨ÎGª4¦c0á¢b¦` ¬…uÚ/T©_Ùtl¤ž&¸¾7+AUêW6½MÇäU÷Þ3îuq쀰‹µeavó{/Ó}LïvI±îq—Ľ¬À]áv¯‰#pÔýL£žqxDãa•ÆÉ‹çÝ6ñ‚ô¯ÒYêMqµ‰—àeÃViÌ4^q=âgîñ*ÓÓL_c=Ÿk¿\¥n|¤³­*\ë(´*ðû"P¥ž¯z4ÆÊ/±ÆR¸ .‡e°Fà sJI1V•°ÖÂ:X›ÍqåŦ×Àµp\7Àpìs¯Š»­pÆZ¥1<&îu«ÅW'êì¯Jc¸ÍÝg¾(_Æý:ǬR¾lú û¾xˆéÃpØýq„5Œò˘û[qœé£Ö74†oßeú=ø9UÁŸkÿT¥ñ\îk<ω0ý!<Æ’ÇYÏ xžÒyh•²iÓg\½x–éø«µÏªÒxnÓç¬/iÛõ| Fçt¶ \מ4¸ãîˆwµ†hÀžw‹žèX±šeªY¦ZË|-Þåw[¦šebû¿³X  –»¿#p…û£X+Ý€¸R=A™sËÅsÚºXàæNº?‹Ÿ2gúºÆÿX`VÞÅÔnA´-Š©ÝGâûÛàfk1Ø·À­p›µô ·Ûšƒíp| ¾߀o | î´jƒ»`ì„]V0 sb\û i±Ü}[Œ@Ûö8Û×¶ïí>Ûx`•ÎâRàïÄ—µEñ€Ýí—wÅOÜ‚8©>—6gúºË‹³,cÇWq©1/ÎÇR,X€Ot4שz¤ƒ*‘Fšqtˆ£CâèG‡8:ÄÑ!Žqé Ê¥ƒ±vÂ.Û é`ÌÁnÛ¢`ìÒä%1M“š$¤ÉÏÄ•î?Ä*í‰ÚûüI¼ ¯k«ÚÒyñ®¶(¡mÑ´-FÚ¶$Ø–Û’`[lK‚mI°- ¶%Á¶$´-jQÛb쀰Ëjжsb’Ê“Tž¤ò$•'©ÜisÕ«°v±|æD/PþßNŒÀJ÷ޏRGøžÖ\+Ú±œ§œ~&κ¿ˆvÔ籟ò”Ó߉OÜo^p›­!èAî´õ¨cì„]¶fÕ`̉>5øÔàSƒO >5øÔàSƒO >5øÔàSƒO >5øÔàSƒO >5øÔàSƒO >5øÔ°¶SÃvjØN Û©a;5l§†íÔ°¶SÃvjØN Û©a;5l§†íÔ°¶SÃvjØN Û©a;5¼xV½ôÍÀsð¸.…%°–»´•ªäMÕyBL¸Y1 S:Ë~SÉ2ÖÂ:õœ7•,›Û»`æ`/K^W¡íßTŽˆ3Ðî‹{SçàªDçàÆJ¸ÖÀZXëá«p \ ×ÁõpÜ7Ám¶uêÕFvÙ6J1c•Mã~x„‡àa8 Gà(ƒãð<OÀ“ð< ÏÀ³b§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœÊàT§28•Á© Nep*ƒSœz+ð-÷P´Qt§ŽrEúp;l×qÞNå_ƒ¯Ã7à›,óÜ {àøŽŽÕvê(7.þZç™;u”k¿Â1ñdà‹Ÿ2gú7:âÝ©£Ü×Åÿ©j6ðŠx'ðâ]¦ç'> ¬à“À@`§ŽuµfëíÞ—:âU…:â5þþþ+üü9üõïð—ðBàÿ¿ÒÜ]Œ~»ýv1úíbôÛÅè·‹Ño£ß.F¿]Œ~»ýv1úíbôÛÅè·‹Ño£ß.F¿]Œ~»ýv1úíbôÛÅ跋ѯƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡ƒ:¨¡“:©¡“:©¡“:©¡“:©¡“:©¡“:©¡“:©¡“:©¡“:©¡“:©¡“:©¡KÇH×Är÷”+Ôn—ޱ•nP\鎋«tÎÞ¥Ú¢¢ÝÏÜg™s ºtÔý8©zºtÔmÓy¦¯»OÄYõ½.Õÿ¹x—éy÷…øDyéÒ‘jP®Õºrmôáv[¿Ž±;àkðuø|fà[p§U«m7vÀNh×·»‚Y˜ƒvGtwDwqGt5²¨‘E,jdQ#‹YÔÈ¢F5²¨‘E,jd¥Æ_DS#+5l:Ï´©‘E,jdQ#‹YÔÈ¢F5²¨‘E,jdQ#‹YÔÈ¢F5²¨‘E,jdQ#‹YÔÈ¢F5²¨‘E,jdQ#‹9ÎÂrœ…å8 Ëq–ã,,ÇYX޳°ga9ÎÂrœ…å8 Ëq–ã,,ÇYX޳°ga9ÎÂrœ…å8 Ëq–ã,,ÇYX޳°ga9ÎÂrœ…å8 Ëq–ã,,ÇYX޳°ga9ÎÂrœ…å8 Ëq–ã,,ÇYX޳°ga9ÎÂrœ…å8 ëæºY7×ͺ¹nÖͶw³íÝ\7ëæºY7×ͺ¹nÖÍu³n®›usݬ›ëfÝ\7ëæºY7×ͺ¹nÖÍu³n®›usݬ›ëfÝ\7ëæºY7×ͺ¹nÖÍu³n®›usݬºÑ¡ºÑ¡ºÑ¡ºÑ¡º¹nÖÍu³n®›usݬ›ëfÝ\7ëæºY7×ͺ¹nÖÍu³”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”éA™”é%)½$¥W:üR´kq½R@ǻڮ‡â–´;%zéÕ½ôê^zu/WÒz¹’ÖË•´^®¤õªu­A­sboÎéÓ9ᅧOÃÅn›XîÖ‰¸Ê}(V¹‹âËJ\Ïjõñ¬VŸŽ¿k`-¬ÓÕdzZ}<«ÕØ&ûx«§±úx«OÇ„¶üy9Õ§óâÏÄKð2-NÁ+ÊuŸŽ×ܧâç:SîӣўtèãZ}\ûêÓY³¶BgÍÆR¸ .‡e°F`³m¯4Ô–JC£»l{¥•1»mÛå”±Ú³`}< Ödz`}< Ödz`}< Ödz`}< Ödz`}< Ödz`}< ÖQm©êÑ–}Øeõh‹Œ9Ømµiì2Ú{xÆ´ŸÒÑæ˜¶Ñ¸ØËÝz1­?Œi¯ˆU:êãHuL[÷HœÎcÚ:ãeÀcÚ:ã5m×#ó#óŽŒiëÔŠ¶ÎX —Áå° –Ãl¶z´uªD[gô¡Ý_7Ì´#Ï1Ž<Ç8òç:Æ8×1ƵwE«œ;–ǹcy\{®?‰5°ÚËãܱ<ÎËãÜ«<νÊãÜ«<®}–-yEÛ8®½’ñŽzÂ8Wƹª0ÎU…qê§Îqêç>äqîCç>äqîCç>äqîCç>äqîCç>äqîCç>äqîCç~ãqî7ç~ãqî7ç~ãqî7ç~ãqî7>"¯¿ŸÒqÔym\ìvŠå®EŒ@{’∼¾$V¹_ˆ¶¯?B>"ǧŠísÈqãeíµÈqã5~™Õþ÷HÀž8>BÞÈqµ%Ç¥p\Ë`9ŒÀf«JJª)iôa—Õ#%9hûâ#ì‹°/~[Ûò÷bVª‡¿Í5ç·µ-#¢yô6Ϥ¿­õk­ßèC»þü6×ŸßæúóÛ\~[íêoÕ®1'žÑq濉yxÝýJœW=gt¼oš¿€Ä Òö—bD#üiX\PµúEÕ}ØÅïY˜/ê¯~.Ú¸}Qu3pQ¥_ôWFvñ{æÄ 9þ@|¾—À¥°–Âr×%F ]ATÀJ÷–¸RǺüo¾öW¬ÙÆ“ 9þ¢9>ã8>ã8>ãr\­Èqc%\ k`-¬ƒõðU¸®…ëàz¸n„›à6«\j}¸Ýê—ãÆð5ø:|¾ 3ð-¸Ó¶TŽ;`'´»&¸‹`‚»&ä¸Þ…óð¼ošÈqãñOÄ3ê9Ÿì ©I¥‡Å§á3îûâb×'FÕë&Õ0íYõIžUŸÔ'/ÖÀZX§ña’gÕ'yV}’gd&yb}Rûcö²ünU8è‡ÿSɼ/»Zq ^Ññê¤Æã5U>©ó¦÷ÄøHgГŸU¿Ægc)\—Ã2X#°Ù¶1ØgÛÜ ûá¸ÀA8íùIž‘ŸäùIž‘ŸäùIž‘ŸäùIž‘ŸäùIž‘ŸäùIž‘Ÿäé›Iž¾™äé›Iž¾™äÙùIžŸäÙùIžŸäÙùIžŸäÙùIžŸ ÚuÑÉàyx^?ÅåOqùS´Íãr—ó¸œÇå<.çq9Ëy\Îãr—ó¸œÇå<.çq9Ëy\Îãr—ó¸œÇå<.çq9ÿÿW2/As9Ëy\Îãr—ó¸œÇå<.çq9Ëy\Îãr—ó¸œÇå<.çq9Ëy\Îãr—ó¸œÇå<.çq9Ëy\Îãr—ó¸œÇå<.çq9Ëy\Îãr—ó¸œÇå<.çq9Ëy\Îãr—ó¸œÇå<.çq9Ëy\Îãr—/kDýƒø|.Ka ,…åîÇâJwJ¬ÒètY£è§¢Ýy™»å/kü¬óÐFÎËŒœ—5rjm9•p%¬µ°ÖÃWḮƒëá¸n‚Û¬BœFî´:5;`'ì²Ê5spÎê×hœ‡÷ Ý¯~™ûÕ/s¿úºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡ÛºM¡Û©Ñ-FtÄxEjÌ‹U:¼¢Öõ»Z7ú°‹ß³0'~¦Ñæ´Xî^#nX¬tgÅ•êáŸéXý¡XåzÄq«|x[ÇðŸIÏ1òû‚Q¹ÖÚÔ®Ö¦v>ÜikÓV;`'ì²5«c~ië~ŠWÙº«lÝU¶î*[w•­»ÊÖ]e뮲uWÙº«lÝ4[7ÍÖM³uÓlÝ4[7ÍÖM³uÓlÝ4[7ÍÖM³uÓlÝ4[7ÍÖM³uÓlÝ4[7ÍÖM³uÓlÝ4[7ÍÖM³uÓlÝ4[7ÍÖM³u×4þÿR|>ãv‹‹5®^Óø?-VÃL¸OÄ$Li4¾¦ñßX ë¤Ì5ÿ6·ns—ÅŒ¶ñZ`쀰‹e²0»ù½—µõ1½[ \Ó>Â8¬òZ`žq̵ÿÞ¸\j¿k/ ­Ð^ÀX —Áå° –Ãl¶- ÚÓj×´0öÃ=p/€ƒpî³jµ0î‡àAx†ÃpŽÂ18š>Ú ß…ïÁ÷m»‚¿‚Àá1þê8<OÂSð4<ÏBô ~¬#ÆkÁ_3}ΔÔþÂx^„3¨wÞ„·`ÎÂÛÐî¿°7\Wþ‰]›“†×ÕŸ'Å*„×5úý§8Æ’GÜ-qÒóð¡F›ëêÏ¢z²Ö£žlôáN[z²±vÂ.[³z²1çlýŒóð¼o-jt2>?Wþg±ÜííŸsµäs¥ïk±Ê'uVû¹*Õ+´¼jÓòªÍèÃ.[R5sâ kžaÍ3¬y†5ϰæÖ<ÚgXó kžaÍ3¬y†5ϰæÖ<ÚoÞrsâNcð[ð;ðßá/Åïú(ð®ïú(ð–où(ð–où((Ë«a &XC¦”Ü‚²l¬…u:[/ð–³o9+ð~³ï7+ð~³‚rjKÚûÒ ¼A½páÖÑfA™5ŽJ“_*(­ú…w\xÇEw\xÇEw\xÇEw\xÇE·[x»E·[x»E·[x»E·[x»EA¹Ö–*ׯ~¸î…pA{sZ7§xsZ7§xsZ7§xsZ7§xsZ7§xsZ7§xCZ7¤xCZ7¤xCZ7¤xCZ7¤xc|7Æxc|7Æ‚¦aðœA±ð&¼ pÞ†wà:ß…óо|t[£½ñiøŒ{$>«‘á¶z‘ñ¸.…%°FݱÆ`Â}.&aJ©¿­^d¬…vmä¶z‘Ím€´ØWÃWàfØ3 vÀNØÅ²0»ù½—¶ú˜ÞMUýp/k³cøÛA8Í»Û8ª´ÞVŸüJ×8y[ãÕ×âÛÒðvÀ®.ÞœÕyÍmíkî‹›A{Êõ¶ö5ÒPûc)\—Ã2X#p[+áJXka¬‡¯Â5p-\×à p#Üûl«ƒ(Dõvã^8áÜgª·÷Ãð <Ãa8Gá‡ï›òÚ[?€ÂcÌ=OÀ“ð< ÏÀ³ð#þêcóB{+›>GçáxÚ·‡n/Á¼¸oÂ[°gámxαþ»pÞƒ÷Íwí5Œà—Öt¤d|+•sJÍ×¢½sŒ½sŒ½sŒ½sŒ½sŒ½sŒ½sJͱÆ`B½nN©1¦` ¬…u,SÏtlTœSjŒ«á+p3lƒÎ)5ÆØ »XCæ`7¿÷2ÝÇôn÷…Ø÷²¶*„CpX{„9¥Æ8 íÚÚœòòñm¦?bm»ßæ”-Ã?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?Ç?§>/­ÔçýpÜ à ‚ûl«Õçûáx‚‡á0£p ŽÃ÷mKÕçÀá1æ‡'àIx ž†gàYˆbêóÆ_Ãsæ‚ú¼ñ¼'ØÞKpmoÀ›ð,ÀYxÞv pŽk€s\œãàœú¼¼SŸ7>€_òûWð¡8¯ýÅÏŧá3îGâbGÌ«WÿV¬†1h÷KÌs¿Ä¼zuA¬µ°Niç~‰yî—˜×ÙÁ±‹é,ÌÁ^–·½ö| žÑ™×¼Æ[Õ ñÖX —Áå° –Ãl¶:ÕCT›zˆ±î{á„CÐîv˜çn‡yîv˜çn‡yîv˜çn‡yîv˜çn‡yîv˜çn‡yîv˜çn‡yÛkëtlo|¾1÷8<OÂSð4<ÏÂs¦€úƒñ¼(>”/ÿ.> Ÿq[ÅÅ®WL©•‡RÞX ë܉½LŸÑQèÀ=Gü00/ÁËp ^Ñ~ðaà*¼¦£ô‡G:y(ýÕ–ô7–Âep9,ƒå0›UÏu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pçu.Pç#ß#:÷|¤cøËâJéÿHÇð§ÄIíwéÞøPýó‘Î_Ä }둎ä>Üi¥ócì„öÆõGÁ,´7®­?#°JkþZkÓ´Öfôa¿gaÎÝ ~ËîOÚw ÷í;†¿+Ýoƒö5ÃÏ‚ö½Bý®5}ØÅïY˜í b_í bÆgÜ]ñY÷@|.rOĘþÓ‹ÝŠ!wS\ÂïKa ,…ËÝíëcÆr÷bÄeÄÌ­€•î}q¥Ë‹«Ü—b•Û/¾ì¾Ú7ˬ’jƒq÷Ç }¹ìFо\fL¹[Aûr™±ֹσöå2›Û=þʇÛa»ûƒ¸¾ßä÷ Óo1½îâ—Ø »Xgæ`7¿÷°|/5ôñËnw;h_13îaî€û}о_f‚Ãlõe«Çø}Üý)hß/û]ðyëéou®g\jKª?Ë)õgc)\—Ã2X#̓hnÿ‡%®„Uæ¦ýO–X ë`=l6—íÿ³Ä5p-\×à p#Ü7›ãÁ6¸n…Û¬Ø}5¢·³|;Ü_ƒ¯Ã7à›0ß‚;­·(MÆØ »¬ç¨Ws°Ûô öÀ^Øg½H#¿±î{á„CpŸù«‘߸€á!xÃ8 Çà8ü'ëÁ†?ÿ ÿþ þþ¯ÿþ¾Ïï¿‚Àá1Öž€'á)xžgáGüÕÇð×ðœõOíMŒàE8aýPGÆzÝ xÞ‚8 oÃ;pŽ¿º çá=ø%ýù+ø>V ß |Ë}#ê8G<ï>'ÜñüÄ='Ý#ñS¦óL_vÇÄ)xM}õ;v7”¨#|ñ޶â;vÏ¿h5|Çî}Ä%ö¾¬ }k )hß0ÚX´DcÑ—0 -Ñ1öŸÄ&¸¾7Ã6¸(GKìMnâUŸ\¢,!q“â;òqIà¨[?qˆ“î_ÄO™Î3}]£ñUû¡øP#ÉUk|bkP‚T§dôáN«VY0vÀNhYXB–…%öv2ñœ³ªäŽqÞƒ÷iå ø@´¯!üuо†`¬Ôže©Ý{´o"¼)šÚöÕƒ{AûêÁã }ï@KÚ½gAûÞq§-o÷ž‰°vÙìÞ³ }áÍ ½ÿ–qcâ;î#ñ¨û•x]í~Ͼ³,λŸ‰OÜ;Á—Xþ%–‰å_bù—Xþ%–‰å_bùR–/eùR–/eùR–/eùR–/eùe,¿Œå—±ü2–_ÆòËX~Ë/cùå,¿œå—³ür–_ÎòËY~9Ë/gù2–/cù2–/cù2–/cù2–/cùr–/gùr–/gùr–/gùr–/gùËGX>Âò–°|„å#,aùUì÷WÙ×~År¹¿Ê¾ö+®rób•FUöµ_ñ¼öû«ìk¿â%xYùZe_ûgáån•½Q$¸Ê¾ö+n³uÚ×~EÚžt•}íWÌÁn[¿}íW쫨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ªŠªª¨ÊÞróEÐÞrcLi,²·Üka»´·ÜØÜبtØ[nŒ«á+p3lƒ],™…9ØËÚö2w@û/{Ëq³Ì•ö–›o‚ö–ýbßìÚ[nŒ?†ûáx‚‡!k ²6;s Ú[nŒãð<OÀ“ð< ÏÀ³pÂ*´oví-7öË xÞ‚8 oÃ;pζ¾Ù´·ÜïÁûÌý>£¸Å…(.Dq!Š Q\ˆâB¢¸Å…(.Dq!Š Q\ˆâB¢¸Å…(.Dq!Š Q\ˆâB¢¸Å…(.Dq!Š Q\ˆâB¢¸Å…(.Dq!Š Q\ˆâB¢¸Å…(.Dq!Š Q\ˆâB¢¸Å…(.Dq!Š Q\ˆâB¢¸Å…(.Dq!Š Q\ˆâB¢¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P Õ¸P 1\ˆáB b¸Ã….Äp!† 1\ˆáB b¸Ã….Äp!† 1\ˆáB b¸Ã….Äp!† 1\ˆáB b¸Ã….Äp!† 1\ˆáB b¸Ã….Äp!† 1\ˆáB b¸Ã….Äp!† 1\ˆáB b¸Ã….Äp!† 1\ˆáB b¸Ã….Äp!† 1\ˆãBâ¸Ç…8.Äq!Ž q\ˆãBâ¸Ç…8.Äq!Ž q\ˆãBâ¸Ç…8.Äq!Ž q\ˆãBâ¸Ç…8.Äq!Ž q\ˆãBâ¸Ç…8.Äq!Ž q\ˆãBâ¸Ç…8.Äq!Ž q\ˆãBâ¸Ç…8.Äq!Ž q\ˆãBâ¸Ç…8.Äq!Ž q\ˆãB¸À….$p! \HàB¸À….$p! \HàB¸À….$p! \HàB¸À….$p! \HàB¸À….$p! \HàB¸À….$p! \HàB¸À….$p! \HàB¸À….$p! \HàB’¸Ä…$.$q!‰ I\HâB’¸Ä…$.$q!‰ I\HâB’¸Ä…$.$q!‰ I\HâB’¸Ä…$.$q!‰ I\HâB’¸Ä…$.$q!‰ I\HâB’¸Ä…$.$q!‰ I\HâB’¸Ä…$.$q!‰ I\HâB’¸Ä…$.$q!‰ I\HâB’¸Â….¤p!… )\HáB R¸Â….¤p!… )\HáB R¸Â….¤p!… )\HáB R¸Â….¤p!… )\HáB R¸Â….¤p!… )\HáB R¸Â….¤p!… )\HáB R¸Â….¤p!… )\HáB R¸Â….¤p!… )\HáB R¸Â…û½xÄý1Xc_¢¿€ÄZæÖ2·–¹µÌ­ensë˜[ÇÜ:æÖ1·ž¹õÌ­gn=së™û}û¾¼ÕùÎ÷íûòb &ôWß·7;‰)XkaΕ¾oovà6~éb: s°îÖVß¾//ž×ùÑ÷íûòbŸµeß—ûá¸ÀA8÷ÙÚì Nâ~x„‡àa8 Gà(ƒãð¨Ukß—ß…ïÁcÌ=OÀ“ð< ÏÀ³ðœm‘}_^¼/ŠÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨ÚŒªÍ¨úƒÀ³î¤ø|.Ka ,… wEL”Îú µ°Î}&Ö3·6ªÎhL3®†¯ÀͰ v±dÖ͈9¦{YÛ^æÚ•ááÖˆ÷iÆQwLsÓâ÷¡xEãêìm?âçî–8³¥ß¸'ÁhÜÓßWØ+`%\ k`-¬ƒõðU¸®…ëàz¸n„›à>Û¹oÜÀƒð< ‡á…cpƒÇá xž‚§áxN˜JW3lé xÞ‚8 oÃ;pΔԸjœ‡÷à}ÓVã›ñøCû²ø¬»$>_€KàRXKa¹k#p…+ˆ°Ò­Wºq±ÊýPL¸Sb¦Ô£~h_Xka;#Ö3·6jœÿ¡}aY\ _›aìbÉ,ÌÁ^Ö¶—¹î¢8‡à°û8GÝjqÌõ‹G\“øŽKŠGá'lѤû[ñS¦óL_q??S¯û¡}aYœfúº?w;Åxë¿ÿ λ˜øÐý½¸£ð7ð‰Íµ/,‹+LOû²X WÂX ë`=|®ká:¸n€á&¸Í¶Â¾E.úp»9e_XwÀ×àëð ø&ÌÀ·àNóÔ¾E.vÀNØe.Û·ÈÅÜgŽØ–Åýð<ÁÃpŽÀQ8Çá1xž€'á)xžgá„ym_XgÐö¼ oÁœ…·á8gýÁ¾°,ÎÃ{ð¾õû²ø@L“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”4II“”WíÛ¾âbµòª}ÛWŒè8êUû¶¯X¥ýø«ü_ëöm_ñ¼¬c³Wíkàâ¬ûƒxÇ=ïÚ´}óWÜfk³oþŠ>ì²µÙ7Åœ¸†Ö×ÐúZ_Cëkh} ­¯¡õ5´¾†Ö×ÐúZ_Cëkh} ­¯¡õ5´¾†Ö×ÐúZ_Cëkh}-­¯¥õµ´¾–Ö×ÒúZZ_Këki}-­¯¥õµ´¾–Ö×ÒúZZ_Këki}-­¯¥õµ´¾–Ö×Òú:Z_Gëëh}­¯£õu´¾ŽÖ×Ñú:Z_Gëëh}­¯£õu´¾ŽÖ×Ñú:Z_Gëëh}­¯£õõ´¾žÖ×ÓúzZ_Oëëi}=­¯§õõ´¾žÖ×ÓúzZ_Oëëi}=­¯§õõ´¾žÖ×ÓúzZ_Oëh}­o õ ´¾Ö7ÐúZß@ëh}­o õ ´¾Ö7ÐúZß@ëh}­o õ ´¾Ö7ÒúFZßHëi}#­o¤õ´¾‘Ö7ÒúFZßHëi}#­o¤õ´¾‘Ö7ÒúFZßHëi}#­o¢õM´¾‰Ö7Ñú&ZßDë›h}­o¢õM´¾‰Ö7Ñú&ZßDë›h}­o¢õM´¾‰Ö7Ñú&Zÿ‘}e[ŒèÈóGö•m1án‹I˜r_‰5°ÖéáGöL«Ø»`VÕþÈžf{YrX•üȾ²-^q¯í+Ûâ6k×¾²-ú°ËZ·¯l‹9¸ÏÖfÏ–Šûáx‚‡á0£p ŽÃcð8<OÂSð4<ÏÂ*¼oÂ[°gámxGlAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lAÃ4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4lEÃV4ÜlßX›àjø Ü Ûà^hwìlæŽÍܱ³Ù¾±.޹¿ˆãîkñˆû2¸™;g6sçÌfûƺxÎÃ{ð>K~ˆmTÒF%mTÒF%mTÒF%mTÒF%mTÒF%mTÒF%mTÒF%mTÒF%mTÒF%mTÒF%mTÒF%mT²…J¶PÉ*ÙB%[¨d •l¡’-T²…J¶PÉ*ÙB%[¨d •l¡’-T²…J¶PÉ*ÙB%[¨d •l¡’­T²•J¶RÉV*ÙJ%[©d+•l¥’­T²•J¶RÉV*ÙJ%[©d+•l¥’­T²•J¶RÉV*ÙJ%[©d+•lÓÙÐ]ñ)Ûo < Ÿq:>Óùѧâsð¸Øu‰K˜^ K`)ŒºÅb5ŒÁ„{VL”{N¬µ°Î½!Ö3·6º›àj·_|…éÍ::Ý&Mlz›û–øšk3îq쀰‹µeavó{/íö1½ÛýØ÷¸Ÿˆ{Yÿ€û©8‡à°{Q£NÇR^Ç1R^çR^GT·]xÞuŠÜûâE÷oâŠ]‚—Ýq ^aÛ?sÝâU¦§™¾æÞ?wÿ(ÎÀGÚo³/e‹™þƨñJõ¿g~_‚¥p\Ë`9ŒÀæ”οŒ•p%¬µ°ÖÃf«\ga6½®…ëàz¸n„›`Ÿ+w[°o‰{Ü2q/Ón…8Ä2ûÌ#ûj¶¸ß…ÄürМ҈jÓ‡Í)ûj¶8Gù} ŽÃ£.,þ£+ßµ~|þÄ]ßgm¿‚Àá1W$g 'Ü_‹'™>Åôi¦ÏÀ³ð#þêc·Gü5Óç¨í¼K‰Üñ"¿LXÿQ3øuÞ„·`ÎÂÛðœ³>¦üçá=xßú›òk|¿´¾g_Í2ýXUyäÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵG®=rí‘k\{äÚ#×¹öȵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\ûäÚ'×>¹öɵO®}rí“kŸ\oWŠ¿»Y1ª>³]Ù4Æ`Âý^L¬µ°NgÊÛí¥bܦó‚íöæR1 s°~ø¶Î ¶+ ÷E;ƒØ°3ˆíêKªD}C­«oûá¸ÀA8÷Ùúíí¦â~x„‡àa8 Gà(ƒãð¨Õooß…ïÁcÌ=OÀ“ð< ÏÀ³ðœm£¼6^€á—¶ÕrÁøPlGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôoGÿvôßþ;Ðúï@ÿè¿ýw ÿôßþ;Ðúï@ÿè¿ýw ÿôßþ;Ðúï@ÿè¿ýw ÿôßþ;ÐúÿÝÛs$÷yÞq ’˾òExiÙ¢(®DŠ–U’xSª(a*eY'Û•²š æÈîÞnΈçÅqNN¸»U®²&\`猙Àµ,Š‘˜¢²',îZ‚Å)%ú[Òùõçæ©) §ß÷ý¾Ïƒ™Ù&§—ð_ ÿ%ü—ð_ ÿ%ü—ð_ ÿ%ü—ð_ ÿ%ü—ð_ ÿ%ü—ð_ ÿ%ü—ð_ ÿ%ü—ð_ ÿ%ü—ð_ ÿ%ü—ð/á_¿„ ÿþ%üKø—ð/á_¿„ ÿþ%üKø—ð/á_¿„ ÿþ%üKø—ð/á_¿„ ÿþ%üKø—ð/á_¿„ ÿþ%üKø—ð/á_¿„ ÿþ%üKø—ð/á_¿„ ÿþ%üKø—ð/á_¿„ ÿþ%üKø—ð/ã_Æ¿Œÿ2þeüËø—ñ/ã_Æ¿Œÿ2þeüËø—ñ/ã_Æ¿Œÿ2þeüËø—ñ/ã_Æ¿Œÿ2þeüËø—ñ/ã_Æ¿Œÿ2þeüËø—ñ/ã_Æ¿Œÿ2þeüËø—ñ/ã_Æ¿Œÿ2þeüËø—ñ/ã_Æ¿Œÿ2þeüËø—ñ¯à_Á¿‚ÿ þü+øWð¯à_Á¿‚ÿ þü+øWð¯à_Á¿‚ÿ þü+øWð¯à_Á¿‚ÿ þü+øWð¯à_Á¿‚ÿ þü+øWð¯à_Á¿‚ÿ þü+øWð¯à_Á¿‚ÿ þü+øWð¯à_Á¿‚ÿ þü+øWð¯â_Å¿Šÿ*þUü«øWñ¯â_Å¿Šÿ*þUü«øWñ¯â_Å¿Šÿ*þUü«øWñ¯â_Å¿Šÿ*þUü«øWñ¯â_Å¿Šÿ*þUü«øWñ¯â_Å¿Šÿ*þUü«øWñ¯â_Å¿Šÿ*þUü«øWñ¯â_Å¿Šÿ*þUü«øWñ¯á_ÿ† ÿþ5ükø×ð¯á_ÿ† ÿþ5ükø×ð¯á_ÿ† ÿþ5ükø×ð¯á_ÿ† ÿþ5ükø×ð¯á_ÿ† ÿþ5ükø×ð¯á_ÿ† ÿþ5ükø×ð¯á_ÿ† ÿþ5ükø×ð¯á_ÿ† ÿþ5ükø×ð¯ã_Ç¿Žÿ:þuüëø×ñ¯ã_Ç¿Žÿ:þuüëø×ñ¯ã_Ç¿Žÿ:þuüëø×ñ¯ã_Ç¿Žÿ:þuüëø×ñ¯ã_Ç¿Žÿ:þuüëø×ñ¯ã_Ç¿Žÿ:þuüëø×ñ¯ã_Ç¿Žÿ:þuüëø×ñ¯ã_Ç¿Žÿ:þuüëø×ñÁÿÿÂÂÇò÷ƒ~œþFÞúPþŸƒ~>ýúúżôKôËù8è“ô)ú´Ÿ?C¿BŸË·‚–œ¹–ÿ8hDcšÐóŽLiF_ôó¦s~Ïã—òyЗé+ùiÐVþ?‚¶éNx!lórÐwò¿ z¾›O‚þ¾—ß ú#ú~þýÅ?Q½KèGÅŽ‚÷ }@ÏÂù#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀˆ#Œ80âÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90æÀ˜cŒ90áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„L80áÀ„<ø~Ðå §¿‘=èoæ·ƒþýmúPý?A—>L?Ÿ?ôèèóÏýý2}’>EŸÎ_ úŒÇ_¡˜ïý*ýý:ýó@æüÂw=~.ÿçAKy%h-/hLzÞÙRšÑý¼éñ÷<~)ÿRЗé+ùõ ¯:ÿkù»A_§oÐV^ Ú¦üoƒvõÓ£|~>ø9èNþ\Ðâ{´Î/ìå·‚¾“7èuúnžý!}/oýoùfÐyücßwžŸäkAoПæýY¾ô,|f9¿ðóüdñ|ÈNè'd'ì+d§Ð‡é§è#ôÓôQzŽ~¦ØÔâgécôqú$}Š>MŸ¡Ï_ü†ÇߤߢߦߡBÿ”þý^þ /p}í|Èæ‚¾š-èkùÓA_ÏÿUÐ7üöB±—ÍBWò?ºêñZþ/ƒ®{¼A[ùm;CÇOºù? ÚóøÍÂ!›ô’Ç—é_‡4_üO }»Ø~ÈitÓã-zÕ‘}çÐ!å_:öx’?têñ¶gÍÂߢó!§Åãk…—BN¿t7¯Ýó“w -^§ÅÿEr>d¶ØÝ-z›Þ¡ûô.= ÷ -Ò#zL?,\·xBOéG…sBº‹~x|ºJ¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;•îTºSéN¥;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îLº3éΤ;“îCOƒ~‰~9ÿyÐ'éSôéüAÐgüö+ôýü«ôkôëôÏéwéyG¦4£M?•¾–Ÿ}¾A‹ï(~Ñw¿¸P|·í‹!AÇAÿ*däEßHübðϯƒ^(ΜPè ]¥ktnÐmÓíÒ½Jût@‡tDÇtB§ô¢Ï°£Boèç&½EoÓ;tŸÞ¥ô^1KØQ¡Gô˜~hºz´a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.vѰ‹†]4ì¢a »hØEÃ.áïÀ¯ƒ>MŸ¡ß ß¤ß¢ß¦ß¡ÅÖ¶Ö°µ†­5l­ak [kØZÃÖ¶Ö°µ†­5l­ak [kØZÃÖ¶Ö°µ†­5l­ak [kØZÃÖ¶Ö°µ†­5l­ak [kØZÃÖ¶Ö°µ†­5l­ikM[kÚZÓÖš¶Ö´µ¦­5m­ikM[kÚZÓÖš¶Ö´µ¦­5m­ikM[kÚZÓÖš¶Ö´µ¦­5m­ikM[kÚZÓÖšÔ  -6Ò´‘¦4m¤i#MiÚHÓFš6Ò´‘¦4m¤i#MiÚHÓFš6Ò´‘¦4m¤i#MiÚHÓFš6Ò´‘¦4m¤i#MiÚHÓFš6Ò´‘¦4m¤i#ß[¸–ÿ¯ ä?z= gøÞÂQþA‘ÿrñ%Ǽ䘗ó’c^rÌKŽyÙ1/;æeǼ옗ó²c^qÌ+ŽyÅ1¯8æǼâ˜Wóªc^uÌ«ŽyÕ1¯:æ5Ǽæ˜×óšc^sÌkŽyÝ1¯;æuǼî˜×óºcÞpÌŽyÃ1o8æ Ç¼á˜ æŸ zŽ~&ÿßA?K ï^.øæ Ѿ—ÿ*èèá Â9´øæí ‹ÏgXüKú<-¾ýþÂâ¿§K´DË´B«´Fë´øç ¾Ãù‚ïp¾à;œ/wšÒ,è²Î—u¾¬óe/ë|YçË:_Öù²Î—Cç§Aò£ Å÷u/ë|YçË:_Öù²Î—u¾¬óe/ë|YçË:_Öù²Î—u¾¬óe/ë|YçË:_ÑùŠÎWt¾¢ó¯è|Eç+:_ÑùJèüAòŸ=,ë|Eç+:_ÑùŠÎWt¾¢ó¯è|Eç+:_ÑùŠÎWt¾¢ó¯è|Eç+:_ÕùªÎWu¾ªóU¯ê|Uç«:_Õù*櫘¯b¾ªóU¯ê|Uç«:_ÕùªÎWu¾ªóU¯ê|Uç«:_ÕùªÎWu¾ªóU¯ê|má‘ÐÉÚ§iñMãk¾i|Ík¦X S¼ôñð¾h-LñHÐká¯ÓÚÂßùí÷óÿôï=þÇ„OCk ûáïðZ˜îNÐâ/ÒZÈ×Oƒþ¢xîbñýík‹ß¥Aÿ--¾½|Í·—¯ùöò5ÖXC` 5ÖXC` 5ÖÐm PhLz¾è?(4 ºŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ë¬#°ŽÀ:ël °Àl °Àl °Àl °Àl °Àl °Àl °Àl °Àl °Àl °Àl °À-Z´h!ÐB …@ -Z´h!ÐB …@ -Z´h!ÐB …@ -Z´h!ÐB …@ -Z´h!ÐB …@ -Z´h#ÐF @6mÚ´h#ÐF @6mÚ´h#ÐF @6mÚ´h#ÐF @6mÚ´h#ÐF @6mÚ´h#ÐF @:tè ÐA ƒ@:tè ÐA ƒ@:tè ÐA ƒ@:tè ÐA ƒ@:tè ÐA ƒ@ºtè"ÐE ‹@.]ºtè"ÐE ‹@.]ºtè"ÐE ‹@.]ºtè"ÐE ‹@.]ºtè"ÐE ‹@.]ºtè"ÐC ‡@=zôè!ÐC ‡@=zôè!ÐC ‡@=zôè!ÐC ‡@=zôè!ÐC ‡@=zôè!ÐC ‡@7ÃÔŸzŽ>–ôñp†7ä«‹o†~ÂÏC?…>O‹;4½Î\hLzÞñ)Í‚^tæ‹Î|Ñ™/:óEg¾èÌù¢3_tæ‹Î|Ñ™/:óEg¾èÌù’3_ræKÎ|É™/9ó%g¾äÌ—œù’3_ræKÎ|É™/9ó%g¾äÌ—ù²3_væËÎ|Ù™/;óeg¾ìÌ—ù²3_væËÎ|Ù™/;óegþë…ƒ…Ï,þíµ…ÇßZøÍà·~‹þ6ýú ú»ôaúhþ¯ƒž£…O.o…NA¿˜ïýýrþaÐ'éSôéÐù[áóuñÛ¯Ðó4¥m:ò=ú#úApï[ ? ïQßZ¸A‹Ï5o-~¦èdñ³ô1ú8}’>EŸ¦ÏÐoÐoÒoÑoÓïÐ?¡JÿŒ>WL¨ú<=_ÌèšÑ EÏá³y¡+t•®ÑuºA[´M;´K{ô*íÓÒÓ ý/îòö_éÛ þ¿<è9úXþƒ>ž¿ô‰ü© ÅgÌ·ÝKèíýÿ´ø¤ù¶÷ùo‡ìÿ2è/òÿ¹øöâsÅÿ’>O_(Î\ThLz¾8sàPhôŠ®èኮèኮèኮèኮèኮèኮèኮèኮèኮèኮèaS›zØÔæ6õ°©‡M=lêaS›zØÔæ6õ°©‡M=lêaS›zØÔæ6õ°©‡-=léaK[zØÒÖ¶ô°¥‡-=léaK[zØÒÖ¶ô°¥‡-=léaK[zØÒÖ®.<þÎ_ ýXÐsô3¡îÕðªQècùëAÏûA?—ÿ8èá/ÆÕ…ßɺºÐs̵»«áuäçA¿ú¹^GŠÇ?ðøƒüï‚÷ÕºúÿIÐC‹»G] ý?X¼ºølÑCHP¨Tèóôß篅.Ñ-Ó ­Ò­ÓŠnÃì…Æ4¡Åµ¡«‹)Íè‹Å‹ Ú ÚG£F>}4úhôÑè£ÑG£F>}4úhôÑè£ÑG£F>}4úhôÑè£ÑG£F>}4úhôÑè£ÑG£F>}4úhôÑè£ÑG£F>}4h Р1@c€Æ4h Р1@c€Æ4h Р1@c€Æ`áAþë ÿ@ 2dÈ  3@f€Ì™2dÈ  3@f€Ì™2dÈ  3@f€Ì™!2Cd†È ‘"3DfˆÌ™!2Cd†È ‘"3DfˆÌ™!2Cd†È ‘"3DcˆÆ!C4†h Ñ¢1DcˆÆ!C4†h Ñ¢1DcˆÆ!C4†h Ñ¢1Bc„Æ#4FhŒÐ¡1Bc„Æ#4FhŒÐ¡1Bc„Æ#4FhŒÐ¡1Bc„Æ#4FhŒÐ¡1Bc„Æ#4FhŒÐ¡1Bc„Æ#4ÆhŒÑ£1FcŒÆ1c4ÆhŒÑ£1FcŒÆ1c4ÆhŒÑ£1FcŒÆ1c4ÆhŒÑ£1FcŒÆ1c4ÆhŒÑ£1FcŒÆ1c4ÆhŒÑ£1FcŒÆ 4&hLИ 1Ac‚Æ 4&hLИ 1Ac‚Æ 4&hLИ 1Ac‚Æ 4&hLИ 1Ac‚Æ 4&hLИ 1Ac‚Æ 4&hLИ¢1EcŠÆ)S4¦hLј¢1EcŠÆ)S4¦hLјúÔ6EcêSÛ4Ð(Ž/hLј¢1EcŠÆ)S4¦hLј¢1EcŠÆ)S4¦hLј¢1EcŠÆ)S4¦hl/ü“p†í…åôãô¡ü¹ æßzŽ~.ß úD¾ô÷ÃDÛîK²í¾$ÛáSÃGAŸ¤Oѧƒ¶Ý—dÛ}I¶ž Ÿ¶ÝydÛG¶Ýyd;|v(ŽßÉôð.k{á:}WÅÒ÷§ÝíðÉ¢Ð÷ó¿ú“üfÐ….þ^Ñóâ'éÃôSôúiú(=GŸ-¦ äÃ\|¡ÏÓóÅtU¡}±˜4°*´I‹»œl»Ëɶ»œl»Ëɶ»œl»Ëɶ»œl»Ëɶ»œl»Ëɶ»œl»ËÉöâ›Å‹ô½L¯úmŸèŽè˜Nhq÷“™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnfw3»›ÙÝÌîfv7³»™ÝÍìnnws»›ÛÝÜîæv7·»¹ÝÍínnws»›ÛÝÜîæv7·»¹ÝÍínnws»›ÛÝÜîæv7·»¹ÝÍínnws»›ÛÝÜîæv7·»¹ÝÍínî;{çîÈ9wíln›sÛœÛæÜ6ç¶9·Í¹mÎmsn›sÛœÛæÜ6ç¶9·Í¹mÎmsn›sÛœÛæÜ6ç¶9·Í¹mÎmsn›sÛœÛæÜ6ç¶9·Í¹mÎmsn›sÛœÛæÜ6ç¶9·Í¹mÎmsn›sÛ¼¶öOƒž£Å«É5¯&×\Å»^Mö‚>‘ÿQÐòºŸÿ*hñÇ×л8„ç…>O‹×‚k^ ®y-¸æµàš×‚k^ ®y-¸æµàš×‚káµ Ô ¯…Æ4¡ç‹êj¡YÐ=ïèyGÏ;zÞÑóŽžwô¼z¾tßo‹Ï¿;¡çA>ïè|Gç;:ßÑùŽÎwt¾£óïè|Gç;:ßÑùŽÎwt¾£óïè|Gç;:ßÕù®Îwu¾«ó]ïê|Wç»®“îê|w¡¸ëÓ®Îwu¾«ó]ïê|Wç»:ßÕù®Îwu¾«ó]ïê|Wç»:ßÕù®Îwu¾«ó]ïê|Oç{:ßÓùžÎ÷t¾§ó=ïa¾§ó=Ì÷t¾§ó=ïé|Oç{:ßÓùžÎ÷t¾§ó=ïé|Oç{:ßÓùžÎ÷t¾§ó=ïé|Oçï„Îÿ8è9úX~ô‰üíÅwÜþ÷…Ç}áß Ï-~žÒ,èuϽî¹×=÷ºç^÷Üëž{Ýs¯{îuϽî¹7ü[ú ÿ–~Ã=¬o¸&~Ã=¬o¸‡õ ÷°¾áÖ7ü»÷ ÿî}ÿ{ßpEû†+Ú7\ѾáŠö ÷°¾áÖ7ÜÃú¦Z7Õº©ÖMµnªuS­›jÝTë¦Z7Õº©ÖMµnªuS­›jÝTë¦Z7Õº¥Ö-µn©uK­[jÝRë–Z·Ôº¥Ö-µn©uK­[jÝRë–Z·Ôº¥Ö-µn«u[­ÛjÝVë¶Z·Õº­Ömµn«u[­ÛjÝVë¶Z·Õº­Ömµn«u[­;jÝQëŽZwÔº£Öµî¨uG­;jÝQëŽZwÔº£Öµî¨uG­;jÝQk_­}µöÕÚWk_­}µöÕÚWk_­}µöÕÚWk_­}µöÕÚWk_­}µîªuW­»jÝUë®ZwÕº«Ö]µîªuW­»jÝUë®ZwÕº«Ö]µîªuW­µÔ:Pë@­µB­Ÿ-þ«­µÔ:Pë@­µÔ:Pë@­µÔ:Pë^xòA?>¡Ü ïC }(ßúhþ çh‘Ù{áÝÈ{AŸŸ,îù4t/¼øiÐwÂ_’{áýC¡ï†÷÷Âû‡BßÏtßùü¤øïšî…÷ ¡JxŸPèÃôSôúiú(=GŸ-ú 3†NÂŒ…>O‹ÿçñÞbJ3Z|º¹çÓÍ=ŸnMwhºCÓšîÐt‡¦;4Ý¡éMwhºCÓšîÐt‡¦;4Ý¡éMwhºCÓšîÐt‡¦;4Ý¡éMwhºCÓšîÐt‡¦;4Ý¡éMwhºCÓšîÐtG¦;2Ý‘éŽLwdº#Ó™îÈtG¦;2Ý‘éŽLwdº#Ó™îÈtG¦;2Ý‘éŽLwdº#Ó™îÈtG¦;2Ý‘éŽLwdº#Ó™îÈtG¦;2Ý‘éŽLwdºcÓ›îØtǦ;6ݱéŽMwlºcÓ›îØtǦ;6ݱéŽMwlºcÓ›îØtǦ;6ݱéŽMwlºcÓ›îØtǦ;6ݱéŽMwlºcÓ›îØtǦ;6݇aºŸýXþ£ §å/}4ÿ7AÏÑÇò_ý\~=èù[A‹O†÷ù¿ º“ÿ8è;áÝø‡aÆBß ïç? 3ú¾Ÿì‡÷–zç󡿆C­0c¡ÓOÑGè§é£ô}¶è*Ìú 3ú<=_ôf,4£Å»ô½Kÿлô3ž˜ñÄŒ'f<1ã‰OÌxbÆ3ž˜ñÄŒ'f<1ã‰OÌxbÆ3ž˜ñÄŒ'f<1ã‰OÌxbÆ3ž˜ñÄŒ'f<1ã‰OÌxbÆ3ž˜ñÄŒ'f<1ã‰OÍxjÆS3žšñÔŒ§f<5ã©OÍxjÆS3žšñÔŒ§f<5ã©OÍxjÆS3žšñÔŒ§f<5ã©OÍxjÆS3žšñÔŒ§f<5ã©OÍxjÆS3žšñÔŒ§fü(Ìò/‚ž£å·‚ײ? ³´ƒ„é>rß–ÂùÃ1áü…>O‹ëÚ¹®ý‘ëÚ¹®ýQ¨žêš½¯Ê}Uî«r_•ûªÜU~´¨r_•ûªÜWå¾*÷U¹¯Ê}Uî«r_•ûª5£þEýgßL_¹ï{N#Öd5eÍ•æEËhWµ‡ÚÏ:PÇèRº‚®¤Û×ëî ²B)è=t Ý@÷z™Þ¢OêKú#ý7ý³Ae  K†’¡b8P L|hÀj˜„§à$\„·àCø>ƒ¿À?à¤ñ! ’@²HÙB>"‡ÈoÈg¤‰ü4ªŒ„1oÜ4î rÓƒ'ƒ÷&‡©fúfúgÈ.‚ âÄ,1O,«Äñžx qC’š Í…*¡j¨ª“fÒEdœœ%çÉ%òbÒ8é˜Ä&#“”RPe¡<IMQIšC‹i6ÐV¥)z.Ò›ôý‘¾¡ÿ¦Ÿ.#aXf†™c™wÌWæ–¹gžÃ@XV‡Sá\¸nEâ‘ÙÈ|ä,r¹ŽÜE=Q2:MF³ÑåèZ´ÄòY9ÛÏšX'`Yv›­°U¶ÆÖÙÛd[1&–ˆ¥cÕX-V=Źqc|%~?û‘»4RÑ{(8wŽxœíWÍrÇØ)R¶œÅqrPM˜ Y&–EX.ñDË%«P’ŠRÙ—2ØìNawg=3‹5xMJ÷<@ ç<‚ß ·ÜrJŽy…t÷Ì RÍćT*DÛ;3ý÷õ×½ Æï½a=æÿ¾ƒ/÷ØGpçå÷Ø&ûKßgûko±Ÿ÷6ƒ|›õyƒ}Ô; ò&ôDï°Ozò{ÔûS·oÙÞß‚|—}²ñ*Ȱ_lÌ‚ü!ûnã÷A¾Ç~¹ù "éÝÚ‚@ßlŠ ÷؃;ÿò{ìÞÖí ¿Ï~³õÓ ßbÑ–òm–mý1ÈìÁv{~“ývû×A¾ÃÛ¿ ò+·ÿäí;¿û« ßeƒû›Aþ€Þä{oî« ßcG?ûgÄ~òqkÿÇlûãÑ]-ŒJ3Çwã=~8ûð5â“w‹—:eÂÇõLØsþ幒ɹŠùnæ\õøà išÈ-*:ź8Øãr?“Vš¹LøS]:þ\’Ó;«X–¶ê2‘†»LòW§cþ¢’¥?ì󯤱J—| —mlTåldUi“¼x:Þ›`þU ÊÈ™Lë\˜ÃÒ8†ÆÇË}:pŒ ¿>;êEƒè0 /ØàÁH„7†ðŒúƒÏûƒÏŽyƒ”§¼%.xÃ[®,Ü‘ÈB˜×Ó+Á®Ú¸XBú$Ý“è8ã¨õ…vJóçj¦sgr¾roàjº®äTÄ’OE¡òO¤Ui 5ì© uQ@m Ù™”ß: ÕzËþC¤É×ÂXÑô'Ël™Ëœñ à…„0øŽ·±ƒ&!]e3¨ã©ˆ”´"Ž…‘Ó:‡è&Ѐ`­UBqJàF¬Ëi®b:å•QÚ(ʹrÜfºÎn¥,ø7µrÀ?¿´X-Ýo2 [m}J×!I>©oH3Q¶ÊÅÂ;Ë©*EŽTήkIRHתsi#þà‰sa­Šá|et¥ Ô£´û|;#© ö¹²j’ËѺª¤‰Ò>UsIk¹tNš©6…õ…RÂðL˜B— _–\¦ˆLÄOiXl4§ °‚aÚ\ÊÙªê¹Ö3^ˆà+ç*H ˆ€‚pÔF¼ö€œK¼×uš!ƉK¥v|ªó\7”¤6† ÃwâѰ²P}£k"P"P¹%þ/S´>gr/øTÊ+ G€Ö…Ëöo•{L¬3öàRÇ®6Æ\ Y'&‘óÐ`ƺ¶¥´P“Ó’¨Šcíh?äL¼Ç\S#…yÉñ]ê+<•ɼÂ0®ï±=b¬žƒOyš/ª ÒÄ•R•’7GõM«œÞZ"| ÈY(?ÐÄò!êúÑá» ˜ŸÖ"•€þ¶žÀ¤ÎAõâ.‡D™çx}f°êåéË^eº”Žœa1cšT:‹9TC § …¹ÆÒùºá Çuqõ¼êép¥ÂÿÀ3èêMö„iV±3L±”eÌ1ÎvYÌöàzÈlŸ~F°6³N-ØKÐÌ™`%K`eÌj6ƒ;ËÎáîKøVLÂ^c²‰¶øzÌàÓÐ'"KUÇR§5+àFЀ¶MÎÎÀš…Ãæd—³§p®¤xŸƒn«œí@Ö4HE…Þ%œ³A«&/h‡“eÔzÅNá$g/ <Ûµ½naV¾"m ëšÎÁÓð-ZÐCT+X³p5r¸jXMaÿxC–>Nµ¿*²;^VsO!rÄÈ@-¢PãP‡1H—-ô;Ž—\?§ïÏhýí<ȃçi@Þv˜ðîÜÚ]+¾UÖÀë\P¤3XCÛ7gntcë:aÅéXEær¨ICq&à'¦½Ö×°âaä¹Ë‰£SzámÙ[ÒöÝ¢–Y#·§°ç§ä‰ýœPw!¢eè_ûU‡Ôpº}ã+;èD)Ù·pV†Þú~ú—ÓäkÊ£n ²ÂÎ[YË|¯9Óœ|z4p:tãØYFé«‹øe¡O‰íŽð“¤ë1Ž)RÄ{Àc‡ñznxd-±¥Šj'‰ÏŠNy˜8>%Ÿq`tJç UÚC¼çÇ¥#»ì¡ß„î$±›³o`M‘ÌÐ3ó­B¬è}‡x—…éV/ñõùOÀn溯$®Õtm:>¨¢lk™êðü@{˜gÛŠ:ù]¾ý$ICu±Öç”wD]áÙÓ)K»q°XiBØ„þÀœ÷iG„µx9©<îsŠ_An9Ù]çh šÅ¢5ÌiNûí9Ôu„”¡ie»ÖQŠ|⤴‹±-Öº%§9Úr&¢Yà:>ìò‰†y!–MKú’¦Úå^ÏÁŸ¦=œ}³À_IÙ'+O~"´+"<óÚ™Ò×χv"ø HB¬ÝGþ¤4Óâ½ÜÈM¼÷ó>ȇø0á£|Œó >ɧø4Ÿá³˜Ó1gð9>ÏÍÜ­|/ò%|É—q/_Á'|•¯a®àëþBÆ,³E6•ÍÓüq÷í9E·üµî@©KèV6Ó5žIù&È|ƒ¬{ëIgÝ~YY=ï7¼Meá˜@y׿¦fû#P4°@D * ±‘r¢eÁ›ÕdÊgH@(Q¡D…JT(Q¡Ä„“ü1ÉFLs2¤åS¢c±@K´…Öæ´1Ã×"1½ˆ j(ÚcZÊ2ŒLZÏ$Í„’vwmdR%él&•Sò΄/o3}MÁ ]8¨– T€&öþÕ!ŠØÝ5 Õ$J +­zBòÙßQÉ–õ°¬‡Ã —;ý…Ê5÷]á+«zµ–™M&ŒLÞ°Œ¤/^°²¾&U Äìœj¨¬»‘Ë›­zÞH S C¾´iéJ›‘3òº3§þt2›—AM¬¦Ææ‰äªH®ŠäªH®ŠäªH®ŠäªH®Ú’×ÙL¥7ÓIÃ;QÕþ w{JÅ˘nª:µ/­²Ãõ„´­T»G§Ú9:JÜùã5îPKãáEYFtL;5m#;—•Z·Z/W­›«¶ƒÔ¥vT`Tg¼›#QêôD!oxÃJeŒ3TƸ9³ZØ_ïe­w²ö¬o)dRºUhMë…¼2Î¥Œ+¢„#Êxg60þ?%Õ`L™Ð©+\FDUÜ4xµœZ% –™IõlئbcÊÒ§J£[¢±³D÷Ƥiœ™ L*Òf²X:¹SAÝ+¢»n%ŠtNtºe¸^°ájjtøšBr×B±˜@y U”&õ_»B]Ì©ŠéîÑô2šnù©ÛùUÒžT)WäL±/ÖË“u}Én£’åR¬b_¢JÎ-·/åÞ»Ò3ľ•š\-M®–&WK • T iaˆ€0äîk!ahÂMSý¯Û‚ëlÁq¶°MÏí®³ín“íEζw8;³ÈÙY®³³:Ä.©6Òy=ÑÛ²¹¼•mk1þìv±LxœcðÞÁp"(b##c_äÆ  ÉØœ6I02h›¹99 ,Q60‹Ýi3#'Íá´‹ÁÂfNqÙ¨ÂØ±Á¡#ÌSñvq400²8t$‡€#`3/#ÖÆÿ­Xz721¸ldMqš%“rocksdb-6.11.4/docs/static/fonts/LatoLatin-Light.woff2000066400000000000000000001247141370372246700225270ustar00rootroot00000000000000wOF2©ÌQ0©e×Ö~…\`: — ‚ÂŽ56$ŽD„€ ‡$ ¯E“8 [2Ò5<¾¥K_ Ì؉âLž°kØZÚ­ÕSiÎDÖm%H«úê/² æˆœÅÆ÷xZ8ÔTÇ-!@ÝìwpíÚ=·6ýìÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿü!M’»ªÍÛm_D…ŒÜ(¦Qæ”(RF‘Ì#ÅÜ(c1<%3#7òbBÎJea¥Q¹–é© º¤B56pSÞ‚mСn¯?`cÍþ8r‡‘'3Á3š0­¶ÍzF3å!õÉX­†ÊVEîÐ º¤‚"µa¢&2˜Ye8ÊrXÍÖçxÁLP5 ¹÷#ò*40ϬŠê`¡R7¤d0¢+ËB%YI>I e %2©Ée Å/aÙö·‘5ÊÈÉɲÉ\ –²FœÆQ™°–-_4ϲ!è’ šÈZŽÁç´š²Òf¶T]Ú Ci-ÕyeÝú*X®¨`€l­Ñ nÈ:3à¬NÁä&jUYm†6ƒr°Š™4Qœ¡ŠñÅp~ hQkZ¥v4Tñ„G*ó6ìoÕTns¾}M}Šlr'­Þµxzw"íA{×ìCûÐAVꉊ‚5l.ü Àø!:<‰«]%1­{ŽÐFt´ÍìD  -FF-d’ârê´ù1X<ćaK(J3vˆÂd°.¢Hžhdàød)­aíþZT‹¡-À–U­Ê"EòX9‘í`¾Û=uÈ#,tv$Á²“íïÑvÚ¹ ÒXNmÚÇš´6*¨¸±/r:@Ù)VÛTž¢M¼]!'_ì,ϹsãvzUiH- ?ýÅÎÒâÝCÞD5*܃LÒàP3-ßlZvÓ„A3hCãƒÁö`+–ó<ßЊ”ÌlÊn aÒ´ÅR$3¾–\ÐÂçP¢e°c žßߌðB S’4ÆšLpTùyý;/ÂY}š¥t‰W)Gk­›¥ÑN°rb/ÑÎËt…UÄkås’M+ØXÙ%Ê5ŽP\e&ˆÐ.¥Žäšòëx öÄr2vƒÕÑAv V¡ .â+Z2œË-àÙMÀ‹êoª/ a•Ú „' Esx6E:~TÏ Koé¿­þ8¿ªv3¸£ÿ®þ{úéþÙµŽzô€õa½‹§(¦S³`ÏÊùh!(ãŠÀÂÞâ‚{à9°©¶ÕYSœihÊ"¼£¯„ü1ÈùnònÀJ¢Eøè¯†yÑ|7lÒñTíP²…×Rè¾hwgÊ=×ÿBÿKýÛ•>Æ›Ù+¾H\ »^ UU³N¿¾%J”I-Èzq@\ÐÿLÿµ'…uAIÁ ÈKÝoô×÷‹3oõ¿ÓÿVÿ{ýôÔHû õõïÿdŠÆéýÂIcÒ;·?Ë’¦µð•Æ­ûe_h¿)ß«ÿ«þoÉB5õô?Tÿ5Línð#øÂ@d?õÿÒEõ)ð›ÿ S{Uú÷Ÿ©½ÎÀš¦·tî•ì¿þ“úÙZ¤}³™v3íÿéÅ샿ޛôk°G Æõf@ÈQ²¦ÕD‡¼xœë'zþÿ^úês‘Œ•y¢ù'g Ïo³÷ƒE±°°£¦¢¨+×á¶ißéfäÒ¡­¸©Ø±rN7ÝpC¶[TºH‡¯³¾'˜¸R-§Á•†FÈ´Ôéϸ±T†¼íáwO»ÃÃ6û7g£FÍÅDEª‘*$Ĩ\š[ë\ºtåns»pQ‹¾[œÛõvU»ÿ1¢Ù&{ý¨Á4J)þÿ¹×ÄÞgÞ¸jhÌÜìNBáVû  xI jkÌÃoó¯Ê½¨r/å’çqî-¿‹,{¡ûŸ·¿ÑFq.±`20." \À{áÊå¥t<û?ÀàªÎëµz Sgp6)Èéì›jŸçT¹è\=;ïÌ®«•C¾ŠŸ„DŒ(‚€Òi)€¹Ý‡P¦˜/P…GGVþùaýƒÑ78/çÁsf™¦#Rqš‰ž¨ÿüuaÑqÞs.?]–‚)ÞH$áM`L‰K“,ùêÃÅ–Ø–B(þf?6¶ÿÑ“\ÎÛÊdRì‘@–Bêm&ípÛuÿÎHߨ ~ò=N!±•¶ø§ÝÛ4̰Tvóͽ*“7Y§gÛê!†`¬‹»ãM„= àÿ#¿ëçÞã >™Ê¯D„/ìîKx¹pÆö„ÍÌ]@ÿc›¡4T"Üw¶½T'ž’eÀÄ?Ñp+±8Sâž2È NœJ„NÜÈÇ×åZþT¨N`![§ÁU¿¾ã]Nr(KJ>ÁÕ°©ýPdûµÆäÊKS„GîÓ\ê0¥-‹KXá ºCÈ—©¹rþdËÉA’{@e*dU`‘BW$P@u…Õ×Í%‡Öf bµ.ÆAȯ§öˆÍ$ñòôP~WШ×ÿ¢£¯ñÆÏìçº@o—»tZ ßÿ×”’t9¥ªu!‚]4ÀUñQ ½ •eÍCÃh¸ÎùçÝ4¢Ãû93›Ô>i=ñ3¼%Y˪ŠEÎÄi38§Vˆ;y¡_9<™6%wB¦ªïó•¢ÀucM耒 pç ²¦£ã‡¿i 4N¶ÕÒÖØÆu0èç=‚T¨D“ÈD¹åú–ë¶95X«u1\ÀžÐ îü\‚>Ì~js.cÄnÂ?¢*’딂vpÅAó@ðmw¹ßç¼êÆgvù®`ɯð…$ºäƒ,²°Ÿ[V0¿¥Ù{{XÚ|ï+ ÜwjoydHqÒ¼>~”ôš¾ËµR›J€ý±!œðÿÌöùÿnQnœæ’1¦‹OŒoQ(¬uŠš®Éaâ¨Â!Ln¡›–·Q ¡³ªõžÎ5.IHl,V-¤×”·Ýé5Ûû0“ fÒ« °‰­Ôh›ˆù‘{ |“N)@™ÅÆÇéÔÄÔÄÉáèÑš3ÚçàEÇ@•‰„“€€V²4ïE?¼ßÿÚïÍÄlÎlxsCÆ.KV[=b#$±I…iNE;JnI"ͨùá jÍ% ¼ˆŠ¶Ÿ ¢¤T•Ugb^j,åéo¦KÙÁ±¤EZZde2a[qh±lÑ–¦£pUj "ÜÀ(¨¶ôK¿?@Þvñ.](w· lLNB3‰)]HþÓš(ކh —x ˜š¡¡žÓ‹tq[hEWxX„…X­±…[ñõ``ÆfpFgxFh…ŠÆ(ÁÕ(ôˆP¹w¶{}ñÿ‡ªùç$§D‹âê~í¦éªÃ^,P‡tZµÛ ìCò˜³ìžEÈó-„BÒ,°H*yz\¤ÒèÛK¡ g®e>Ó0¥ì"r~oaU³ 7\8.g²fV£är=”e÷]f°À"×ÿ“ÞÿïJè¼OSUãªqŒŠˆ±åmÛy#÷÷Iø'Ù˜ï_°@¼l00ǘð8«>yDÙ®>éPÆa„1¦izö?3IgoŠ@ÿËaykQ…) ‘#•Õ²,ƒˆX|F§Ib˜´Ç} NB»p[ü¥þLVÒÞéK)ÍI8[!„Æ;®¿·Ã#ˆCâjz¸u #ÝÒ½>¦—`Ñ`¸dËÊWL±@н ±ÒÎW Á?¬IÆÏˆ'TÔ¨–°”PB9qòÄQVÎ `k£¡—>{÷ä³í´ú䛯ñúä§E“ÝÁb¨W¯ÅíÇcZ„'ÈÑo›2w±zRj+àŸ(ÌXaÀÊd=×Xìž ¶þBy(ìaᤒ.a nýâÌË׿ÖZyšÀ‹L Üvq¯îmeZÑVÌÕÆU\{*X[Z¶„õÒuÊ6l[ºŽbíNõAŽVôìàµNáRw µði£ˆ‘é‰üðRÓNÚÚ´/¨Éz«iµiÛ"Rdyˆ>ƒV˜1ÍŠcÆ|‹xSï…æ-à¸JîyÁ‡ ›¿x2ù“…Ø f£Öp_ÉL¥¬^ÍNXõÕ\Üœ*ªØ8íÊ/·UFT Ù¨lF£®EMÛ2ÛsD³ ºBCÜŸ³ûnÿ?j£‘ãJ'µªºÆ.ú}õˆù™ú»Pb]Î@øBÂi?ê`éR¢1Ý×+à@ØÅä*)ÈÏú äZt ‹Â\~©ˆÂ”…²8æíÿoToݹO÷²W¡k›Æ9Š<GWÚÇ!÷½×üjÕµ£-ÆžLšŠ/®w=^ÇAõê?$ëÈÑ{^62hÔ•ŽU1‰°$4Ž% ƒÄómªáÒØéÔ˜7±¤Öhsî£tcÀ„ØãÆ ia‚â.)Í0ÎØÙ O ãÈè†1y©r~>f8j¤Ôè¸ùr"ìfŸ²Çn4xÔruiû£rþX^ þ˜†I½Û볟ѰSF÷Ä™¾SH̘´æñrð0$&cíãîÂ=á'å•»ö¶WÇ?Ô€-Ôn€m„Á׈~¶ƒl@ ݉Áâð"‰L¡Òè &‹Íáòø¡H,‘Êä ¥ªmÔiöiiëèê雘š©3o°°ÌªÏºoáÊ^ÔVªÊt›B`Åp‚¤¢{2‡[¡œH^² ZÍf0ã³àõLt&fl"‘ÊÚÅ=ÖžÀ „2.¤Òƺ|PŒ¿ûðù†/í¿P,6;qEJ]¥Ö¤õù¤+î’¤¬{<¨HÒé ÆL†\®PªÔš´¾NŸA;š¬²6‚à ’¢–ã…Ì@IÑ «Ö¤ýÜé §'s–ÎÒvgóÜWï0âL”`áü|»›ªNïdì~Bç‡Ðî{Ra*bz+È–û º?!x!¦»´ìŸky+×óïÛ¿}X¯êÓ¼m:õ¶lÿjÇQ]´ÃžýÈrÇnoØ]xz¥3¯´»2ÈŽP¡Pwx¬òj¶¤¼×'NEv¯êæ… ,¼;¡É÷Æ‹÷ÇýiΉÚë¡‘²¸Tvf™x7. ´Zã"£V"Èzˆ¾Ì(=ç p)×hÝ ä^qX©W úf(#yò0­?Jg ±tkÅÙFÙUÑ\Š•wgrL”«ª»žÌäa^¬ÚX×Y»¡»Û(݉<ÄîQžÓ÷iÿ¤o¶þ; fD»xÒÒ£k±(̵¿QÈ%'RYr# ÊâhCæøÂåúÌg &”qa\(UjMÚl‘ÀL(ã‚(¥‹÷#GVÖ6%Y ͰjM–Ç/ï¯Wœ/‰z{Yu؉–ñn[÷B¨Ô))PïS¢¥TD¼’\c7)£>z¾~k‚—³ö.©Â¥uqu×¶úãíª³¥J1ÕÌqð} 1LøÒ©å¬ wªѰãí½oü i¶øFa»TêrˆfÂTî­¥_*àª(¶¸_T’4—ŽB9íNMXŒ3±êJ'–3¦’Aø½x ¯ùÈì*GJä?©;³¤zý⤆ ½Û¸¼‹·`½W.û±#10AØt¾mÂÊå8ðÿ.#&Æh—˜¬òM£©Ú¯…ra!h¬0ïÁ3˜ÎÙ”Ù+lä;µ^Ha[«Çë–æøDIÓƒBt·¸›%KÆñù•ª¿—€ç@qØlâ2zsõ{¹ 4ɺë:±Y`þ_kÃÉǹ؅3]:×ÕD7f¸YŽ;°úài¢EŽ.³ãXٵѴ˜r èÊñäÔÇ~nˆß¦SîqÜ uQÚÂÀ™ÄﯡÍÛÅ |¿òŸÛª®Ü@f¥ÂHîƒÆB´åå¶ã6ª+Ð;¨X„„û¥ð19ÕMÞ]#«Fë¦m#’Ó…ƒ/æ3¤Û †),O ’"+ •Fïc6«l./ŠÄ©L®PªÒìv@)ÃFŒ3ž‰nS¦P^~NÏÁAr”¾¬8.›¡\Þ¸\ÕrQÇW3-´Òv\gÂT¬pÍB7„¸ÉZ I­Vj`àÒÜ!ư!cVæX°¬ì«rävïÉ“Zâ.I’’­ßË;Qð‰’‚œ¥ŸQcÎå<ÃŒ›8ä"ox\uPÍ­ñþc´ôr¶;t3A0‚b8AR4Ãr¼ J²¢jºaZ¶ãz>ÿ¿Jç=rRV›ìr†Õu6e~ü@­áJ#%F#´u·”””””Ôyj–rE­XÛôíðÕþŽ4x¦® A½Ù•ù;]CÝ@<ƒ#”&""Ìÿo¤\2 r*¦÷£‘»ÙG‹–™tÅ>åó‡ÁHP8Àúrôzƒtҵ¿è%ËLtM1íÄ›ÊH`g!ùãíÇÂ…“ODädÄÖF¢‰T?ŸÜé(˜Q†‚R]]ÒÈT¨¶ŸmI¾aÔˆQcÆ3›2íĪ|i.@#Ô!'ۉŤ^ˆÌpä’p”ZÃSOD#Ω¤¶aÂH5¶?‰à¶P*6hŸ–Åþf‰¤’¡ÑÊKì¹Hì‰ÔgeöG®›F˜V’&ÔBÄ%Ž.QKºù«È¬‹œ¯†™z8¹Ü‹O)°˜eyY–5­Dð¶øLÈÓ¯ï^Á`ÜïQ3Š " Ã0 A‰dZ¨Á€ìCÉRìb…>}âìï]`×#Ó˜aJ³ƒûþ€{3Œî À¶š ý»Ñ·`1«‡ 6•7òGqªølö|³&­Ä^Ñ›N1}k ¸·”O¹×¯‡ý =¿À‡'I‘½¡Piô2FLoXl—ǯ€P$–Her…r[ = ÒAèêédˆ66±dm¥GáÇ ¢$+êoèS";ÊXÙÌäòÇ奖§:,Í|µ˜£µµ½k8‰Z0l©k–¹a~Sµ¹&G«•°Bš!Ř^Ș•9,çéê’”œ¹rE€¡üÐaN̘˜ÀÂÚÇJÛÔ›Fø4 ÍZ´jÓ®C§.ÝzôêÓoÀ ¡œeœQcÎå<³Œ›¸yS—MI•T"Úðõâß–!:ÜíîÍ¥ÓZwtìê1iô(lêìåÒpÙÔ Ñ’¯Þ¶Â%¼¸ª¨z«£ÑFú|$:pêdqâZØ/3VvÚÒáÚ3#(†$E3,Ç ¢$+ª¦¦e;®ç{þïdÒS•¾ö‡ÀŠáI-[æO^¤0¶¯Z:KhßFEâ46‘He‚Z“ö¤NošëS&Ñd•µ€Œ NͰ/äì¹xñš752¾šhì¤íÂÆYí>‰  Å'¿ÿ<"vs„W¤œ#39jj£áÁ—2‡•Amvd§À.l»³GB˜pÿêž<á¸l¤\™q…«å®N~ÍRk¯•¶ê[[иa.á¶Qó1–S£nr´ZÁÝ,ÈݶKX+ Ú¨šes¦Zµ]SÏ0yÝûýó „2.¤ÒƺT&w Q ÉZ£Õé FSf·„²¸!sã~|ýòÿ1}WŽbNwÃÓäײyìgXç»nT)ešÛé©?ôºþh]ˆm$ƈ¬\  µ€1FDDDDœs®iš”RÆz4Žá]Ìb+\àìÌ.òÁƒfŸU?ÓÆ¨xbµ,M¬êȈã³zßzTírëx~þñ³¦Ôá;Õվ̠ȼ èˆÈòe5G]‚\Eª¥Ÿ; O¿»ží¢`Í.Œ¡¹Æx$h Þ…Èm¨Ð ‡'Id •Fg0Yl—ÇEb‰T&W(Um#ͨÌÔLù6 ËùªÌqJ¹^ì«Ô‚‰àxFP 'H*:œ¹é%ªæ\RÝåýWȺ*ØóMˆ›H¤²6&VE„ ]m®r2™1€Êú3ì]A*m¬Ë·pó~û ` „2.^[%YQ5Ý0­üJÔš´hÇõÆúc¬ÙnNm¿t"wP ±$é¶L®PªÔšõûpu®è»ûÆ4Ye=@FP 'HŠfXŽ2‡7¶vöŽN¯l\<ï7ö¯m}e¾•éXîrá*Ƴ@­!RÌ2²JÑëÇŠAÅþĺιڹVÁК1Œ NͰ/ˆ’¬¨šn˜–í¸žO2€ÀጠNÔ~‹•ÝS߃¸ö“ܯëDcbc‰T¶~›Y 6Úd³-ë³;v-UQ„RÄ:½Áh2[¬aíBY|ÒƒØIc ˬÄ`Åp‚¤h†åx!gX3£ÆœËù2¾š¸Á³è†V|áN#Q® ÖÛ…s\àjdSiÄӬЀAEŽO§%N½±´ÛnXÖ,;cQmÝv¶ùòów=œì X®•>ó¡ œ Zƒ·Ä!d>*t#‹Ãˆ$2…J£3˜,6‡Ëã „"±D*“+”ª‹–WMƒ3µ›š©3Ï·°¬Õ²o ‘Êû©ž£9h Ö…Œ NTt$sÛ‚»—L]a5Š›H¤²ÏoqäñóÛ)"öݰcRlâkJBpH>ŠáIÑ Ëñ jͦdYBÇfædr8îî’$IîŸèýC`ffÒ'ªÃ˜O»RA«ÅW2xú\&V]¬Ù…ZfaÀs{ è30ð^HÐ,¼%ö#÷¢B‡0Xž@$‘)TÁd±9\_ ‰%R™\¡Tµ-§úÞdtÐÔLù^ ËZ-û& ‰<{q 2[ –„`Åp‚¤¢ó3„` "¨:’$IÒZ[/_¯Ÿ?<’$É-.’$éSKÖ’̯L,ÝòÉÜ ð’ìØhŠˆˆˆˆˆ8ç>^‚Qò ˆˆˆˆPU%I’$I’$­µÖÂ'ÒL°¶61§¿rV(W¾ô¿N¶Õ0ââòå×2K¿Äæ[ ±ƒ‹¹.¹j}\C£öbX&”ñ6>¹|¸´aBODIVTM7L+r\oó `Âv‡æn»!äxšNÿæ ®ÒΙßL_²ZuÕPUè-²K¥‹A³•î¤[ÅΜÌÂZ8†c:hÕŒ^Œ¢q.1F\‹ÌíØçÝÊ,‹²ì-5 -¯©‹kòºØ0-;ö¨ÐÐÛ2[«9ÛlOx‰!RT¢‹œ±ù‡È¿â^©ˆfhÕ†nËhÕЦmèHÌ*öWÁ;팳ƨMºä2hweͱt_!Þž{–Ó¿ãÝj­žµÑš²ŽÛù½K>]u(8¶ å«2,Š._¾|QQQQtô9=Cëý1%€-}û°tæN(„n—ž"Š~ Š´ÆQ7.e*äâ0c^X^jA–M²ív²ÏN-5‡Úô€Ÿï¯唇3ªšsQ¹”„k¶̽ú €-gC‚`ðNìEBuº ‹Ãˆ$2…J£3˜,6‡Ëã „"±D*“+”ª4cZÚéTéê¥ßjPCF}3QÓ¹fÔ™g[XÖjÌ×1›äyö"¯2—«X ) Œ NTtIfS‰Gü«4˜Á,ÈDMbc‰T¶¼¨ÞâK‹®, sW¶ˆçßÀêß o»>$C W¶’;¥OÃ`qx‘Dî «?JCg0Yì©%Ýý”,ymRO¤²KŸRÐ pM£Õé FSfT¡,®0dÎÁkJaBDIVTM7L+¿Lãz®H«ˆ0¡¬ukÀ¢”NÕŒ™¦²ìL®PªÔš´U>CÞh²Ê: @FP 'HŠfXŽ2—llíìî Ôúø=pþ –ןCÀq‚à ’¢–ãQ’UÓ Ó²×óI+8‚à òUêS5énÜ>uÅT;.ï¼b>S®%¤µµõöõ?2,nŽñ'^ÇÕ.J]kr´Ú¦wÇ'õ+·îÜ{ðèÙÂ2ë$!A1œ )ša9^ÈçìÞ­öOv}ýE0éó¦Óåã{Óíñ+½€îkXúNƒv6ŽÝ£T¢œÇ+Þà;'’ D{’$J²†¤ÐIÛªôy9SUb9¤.‡÷ô# r´¿—+BÉgÞñ$G‚–ÇÌE¨)†MéMêåTÌö*‘SE7ÕLScy޳ÏÉ5í‹ü±OÎp¿±ãð™`Ð.ø!áÑ8Ü8bâéãk:„à6-ï5‘ ²<îòÝéÑè§Pÿû¥ûãá'å•»öæt†³œw¹Øˆð–%Àshn‡dÐnÙg(æ ¥J­¹ä¼ú@FP 'HŠfXŽ2?KŠfXµ&íx§7dl™Ì´xþ&[”š Žýy7ÿfB0„…ˆ‰cdfÌY°d…†-;ö¨ÐЃã€u~ð÷vá¿p'§/)uGþŒü¨r5ÿ¤kwŸ†æ\O§9ûܷЇ/&Œ£?¹¾ƒœ~»ÔºÀVëTÁ]“ï†ì¦“žùkÜn°7€žÊܦÕ;þ_H Aá77ºhe”`ª~JäÒÏuéà q¸3Ž`Ä­·þ@!¯J·>õI¹Õ¸„1`Ç@{Ú‹n¤íÔôRs`cÊ©ÍÎ~`IyKÆ2°ÜZþZ……w}êçœKW›WÃë„«|=²án6rk3²y³µlù;X]ß׿«w¢œÜjwú:ÂUÅ'`KÖj¹„¦»³KoL#ݵ˜,cºb–ýÅæ(™0eÚ‰Uù$0Ò6¾j5-®`N‘ýÓY‹µk¥FJ}o/ª“y}™ŠÙö—J&L™vbU> ŒôÞñH„–8¡‚èî®®`GIÁªFF=ÇE< …üM s·QÅáp8‡s¨Ó˜s”Ú¬Ds´ùö‹þ+ÕUêݯ©ï†Ús¯?ê¡>ƒƒTøß‰‚CIÁ[jdÕÛ¨Ï$DšæÑÌ}¦pŠ;G:Ÿóùf sm¾~aÁ’t“ò@º‡^÷Œ!q6xZÚÌ´TŒÚ?PÊ0nĨ1ã5ÁTMsBù-0|x.Å¡ö9ýJXEºk2ÝèùM¶œfûL-ж‰9q“‚O¼ 5ñgå≢[zÐK¡(ŠzTµFò„<œ‡““sò—롉N9ŠšÔ¥fR¢>Fµ#s8ñ!ËyMÍ0Ìbææ±ÀX Ç·ö`NN¿ê<Ó n–6‡›:®@b Â`pYRósºüÝ}Wµ…PŠ:—àü”Ùc:³Î_Xq7 ${XÓžQw~n Ü÷úd}L;µYH‡¸ÿ óˆ1¯(óZ~!z5:ü«qM+åw­\n”ÏÌCÌ3ŽkHgÿ0^`n@™0RÃÍü’nÌ%ÍuLBÒ“ª(Û¡Ç;·5G|½…3FÄb·}ܹÉ"ñWQiY]$A(H=³™³—ÑhR‚»g9à4‘N=‰€fi »v†dÇ;„=¤>ÞTÓ©Ù’ÑEP?Ok²D#•T 4,:›.öj$;ß}R_-3¹!ÉbZ÷B<éN»ÀJð:˜ÚY:Kjê©\ô‚¤zâÇ’%ÔFÄ'6D¢žÔXd¶Dþ¼…íÝah\¼Ø{Ò:½é wϲ¤ži/nHºƒ1ÖÔ $@î $HKRÒËó‡cW‚~ðŠ4ÜAç`\‡š”L:q#íì xMšLÂ%}•MÜéà®mIêy¢l“^çKÚ¡Z:Ȱ8• 25 öÈ·Û†<†YÏÖ@K1þÑØÛVmÕ· ­l9©2£‰%£;³#.qÕ<¨XøQª›æâS0R Ðà öòGÁÅ'žU¸3¾'âκ2n>´ “ä“}†º[ûxÂa²#´¥¹ žž”•JBW%™\L91CÂi ˆ9B?)6-#¡c³9ªØvÒX®Ó¦ßòɇx[HË0Z&ígМr]Æ£‰qù_@눔élá¶x`4U?%q2;`DQsì§qm&'ûÅ]Å(Ž Ö{( X ¼Ê&Tä ˆš˜IkS?—³#?TN)Â#µê “1ñ¸Ip“šyŠ›Â¤¼µ™os9“vz ûœ h+L33éŠX‚HU ûHŠgDŠg²…Å-ܹ°0þqJ«5B%—,ØT·„•”N|r¬QK73€Mâ‡õúRëºs5·¿êvì/w°öš/åÙÜoÍüN–â÷>1ñ±q±fSMl%(py¹†cQý«Œ’¾¾…ldt 6‚X‰²ž ‰¼“‚°ž­ —›çê²—mÁÝ:¨+•¶ds-(ž­ên=Õ}P;4êÇvàšáÏáECp¶÷P+c³³GŠ"±$–*áå Qµ7Y«S¶ÙdLOfÌùе±ÅÞ>本yç.®n„öÀV–e'vo#õ6F µÎ‚»7kML¬$šñ©*m:T¢Mo±Áº„ºõrô´WhÄ`Êþ†ÒºÎ84dF•±ZÝ{zX™ ‡²Æ¶Áq*¬|:“QÆöõ,A&ɨCÞ" ŒÅDÙ`åì´Ñ¨‰æ6CuÅ^Ýj°ÖÕ®ƒ­N¨ºpÐ Uþi.Nï8ÂîN¨êªi£QŒáCŽåÝ)ÇãS6¡³È¬fÓ¢>$;%²æn¾0jMl¡±Û¯J|R䓚,¦ÉN.|¸ÉcAä (Zk¬R7¦!¥ùÏI9–ÚÇ­S—–iÌLŽg„NÎæ“œÏl.DÍ&š´@Oè–;Ïö1ag¶±þöEÆÇ‚´þÖ¨ÆÛ@C {==óbô±’ƒâ^Þ×¼ ¿î!›=1_¸Ôùh„Ê>fžÎ5<1ø¨‹ p ÃÌ’bœ83teÒ:VkÔ׿gB ¯`.ø! ÆLO… ê'ÙY=Ÿ°*Æ‹6¯x玽[Í¥ÀR ¼ÈU„ &L)œf“î0-1±j•S°,žœê·PvÝR’Lì—SEQi :g”Íž6™.J?Ñ5˜ûkšê`’8û÷ühÝ ð}‰?¦Mth:L/]m`ÒMQÈõ32‚PÏ=Åî%…©}ÛdÌìG!$"¯›ÖåN^}Ö¶.ûEÏM$Ø=´OÊéÚ†{’•oLq7úFŽºLC½M»îÄ^Mª §æ%9El¶{æ¨+ìól±{ U2ǵÓýØ3›U?B®8-³&EZr¯}ÞúpF§Dg½Üž$ž=ë$¼¾btï[ë Jk ^*ÚD7Tì°¯»Êä¶×ä+Ä*¥£G@:Lã˜hïJ…X‰º2­¹Û<2G9øé¾8ƒ{ÄÔJaKV½B×'áœ{š-}Åùxø9$g{CäÉp~q¡&Õ‰pF—$ï›Ãó`M€l]G€ßz¶¿&“š›ÂÂÞFm?»R šžWÛ&TA`È@—˜ÇüwŒÙæÐ1×üUlÇí°“°Ç^¦€bäà“„hysÔÞ³oÄq²èï†ç½Èôæ©—½‚{ufzí˜zݸŒ²Ø./?8ÎÖoP2“…$;9¸`axBªHŠÆ®‰™fà,?‘¯6h™ZŽøÃ$MX¿#üÈ$ÔÍ1UHhœ°Z#¨E—A²°l¥SÖHJK‚åôKÉÄ—Nyæ%• ¦|£PD4B#ò¥´I)U"#ÍË€[Ë`¬reÖJDâÇ…OBìp̬í.´ ˰Ö&aUIP‹.‰e+YI°\fKæ´‚ÉË#DÖų†ôD0Ôr%žhžþÿª¦ zúü†®êeuzV ÁܨCÙ,a´˜\ÿi@ù‰èÈÙŒ« ÿBH€G¿4oêË–<óÓ_ß2åÙ¡19Aï?:>:xƳ/Zÿã üÜiåEŠ®;çN6B_·Þ¦5á©’ÖòûÞÓXƒ}cø.â2L`ýF'ï͵ÓÒº8°©ùŸ³"؛Ĵ&©0°ü:^yËÖèÕS r9e4“ɃÒÊö¸'=íY¯{[LdpˆªÞ“'Ë W Œ èA62U˜¤1dsT6;%y)›‘²YH(qšª¹d¸Òq TJ–PÖVÅÒðwDÎŽ‹QqˆŠØ_žsêUR’oÙ>¿VÍT„**3#I4ƒ¦¨sA©ÀZ¤¤<àl¾Êfª$Geó’ð4Q=q¥qD!(=bD²E¢#Ï“”1÷Å .µ¯; ›¡ÂάH bÁ©;[•{,OÃlÊ.»M™ÜyHr«<a[24Éà€|&'5tCHÕÈ1rä‘fÑ,š¢Y4‹&ÈSÚ À±{kZpù0C‚g-¸WY³*•ªÕªÑPzÚ´j×i­›n+S‹¤^²Fl}8€Î†Qã˜pÎmrˆë¤ÞzÅ7!aš—Kš4á¾ð¸úML¾Y0A”ùOª;Màœ(–¤H±Î9®õ4T³¯¥rWl_C5Ïsf®M®´ëHZN† Œ‚&‹X8‘|ò2%&/—¡ •+V!0Æõ_øíõ%8À¡:Í'“9Wgbî.%Ñ®÷ZoÁ$aI¥'#Ù”c¹u­z-7®]=ª×ÝÊnn³É.u¥›»Çr~’yk7>4À›?qϾĒ‹€£]«˜Ÿ8ÀæÔ ÐÁ†}þ?q[›Þïý~öû1¿x#´_<æñŽ_m¬oô¢éþÊóã(÷EÏþ8ÿÀÏþs%?|öɳ}÷nðãs㔇ñ1Ú7àúw•Û÷ý³ö =i/Öâ×V¶€_‘oÝ·£xT8XêßÿU77ÿtGÈŒ×mZ¯)DW¬TxÖò"@€òô{òjv²{ióé—>g…“Eਹ‰KJäöOY*Ŭ¤h UªR­F­†RÍJ«¶rÅ£„¸~ F'÷Ú1ì饎¨“]ç R}®<’ J“$-5KÎ7)ù%M—?u›©'=IÕ`Àlƒæ2×°y}`}DµÞb-éÓÒ®˜™b±Ü\«i¬\2Mf&¼5`‘…Å7%ÛY¦¿dxßQIí²ÎŒõf9àu"…Ý6Ø#ÇœöÚd_ö§µßvó¶V/^J& ½oŸìõ¡0õƒÃ9\ãšXånˆsË¿nv>¶9˜P>+ ]A¬8-‹pI”­_ÿ î!÷,ýà}X3þ‡Ÿ\–|1Éࣣõ³;›qq¥§Ûwýƒø»ó¶¿+(»ê>#—[8L«,=ùvŸÀö?&zfi<-‡j»N@Vÿ‹à/Fåží:ï§ÚrXb‚8; h_T$ÉÇ©Q€«½ÕëOÉ›wmï[w’gé©u§YB«û4ÖùŠ"Š;>ŠLTËÓ§u µ“JCˆÑ‚ztôQ}%ÓŽzÎØI90¤Â±º/(_&¨F˜FÛ̥͘HíwZD!9ÍèÖ ÌV{àèQÕTKãæÞ‹í]Ÿ¬^¥òÒÕ×9Y&(K÷Óà<â®.@çå 4‚ÙËÑh44VPq,48da ͽ$²4ËxYn­K`⽫xÉz1•‚Á €+Zµš¼Ø(ùÀl$âY ËvÍêR~W4¯L­ÀB`Ö¯˜#Ê@ÞÜF“îi¦É;óù’˜è¨‘2•/€™„ÁǼð¡VcƬ€=`Fl'Xǥɧ‹ÚØ–F 8xò6uR-óÌÚ¾\Ýh*UiI·™§>!Ç\ 0±ÿØjK$/´v•Æ“2Xå×°®D£F@Œ…zó¥»é'ZÚ~ãöɶc4!¨V£y&m] œGuE@_è-˾Y÷«4ÄêB‰?qõÍÕ$Hbkÿa×Ö {g,±è³` [).¹µºÛ‚!2Ä:3ñe„kã&˜Cô=âh¢¢l¼XpICh,÷¬ZBž" ‡ užgÓ ҠѼ+ü&è²Ä w³“У¯¼ú(â5¼‚ðµ9·9.™ò¥YØ?:ŽÃ‹aûÞúuÒ˜‡Þ.ÔåÀ’K7¼ŒXª ×_½ ¸Å)ËÁ ·VñDAHä=´{`…¸(³xbÍ5솭{{%ºhYœ†ÿÅFx¸ë8¶T¶|°¾®ó_fÕ@Pêy«ë¼ÝÌþ>kÂšÆøÌ,Á;Ù艵e'סÛ¡÷°´ô¿kÃI0±2 Z7ciÙˆ©ØeÃãŽM˜f"ôG¨q¸0Å‚æ “–•¡VOÚ‰´¯é€:zÒI¤s…JÔÕ“n"Ý+lz žžôé]!ÓµyÒ¶š+ÚðPûâìߺ­Ó„¶‡©¾BGQ»þ0ßD~?ÆìÒs ‘_;ò÷ÿ0ú4óoGNcÜ0‡b=}R…yî7N,w=ŽB»ÆË_Oži¨¦¼mJ¾™ öWý%_Îþm.}Q6‡ó!”åí5#FqGŸ%Ü“ýª6 é9ðªbÈ‚5ÈaeMiŠÀÔ”US°`S‚ˆÂ€¸HjÔ)aJe°¤‚ut¼ú–9Ð2Ø!àŠÁª³ç +“I€Ná…c<Œá¸,C`‚µé1p¹³4u¹R‘~Nð -år¡h'€ .`{¸Ó„Πė¢õâ6DRØxª¦ç•ƒLâÀ­ûæwª©3(Š×Ç3£]Ó³ç˜DÃC|[ï]Ôd؇p799è£G=<BÔ509by±9ûâð$´a©˜e;–Œ?‹Ð<ž2kÏiÞÁ}dg¿/ù`qÎ7£«?þ¿‰·qQDöº›†„ SV+nâ]ȹÃ\æ :út¾ 4¡Œé¨@[ØÊˆâàé;,×ù…}Ñxî¿,ò°†y»êÂ0ZëÍc§v˜íÖ]7‰+³8iš®§R° ¥)áÁC~õ›^ÝÛdžðÆ$òš[tÝn²â¢ˆ¹#°£„> S-×”a Ú`7^•^Ïrì±À[ÛÙÊf–Á”¦S{®ªÛvôdÛÀk§´†Ô# ŒC]ÎfŠ‘—­ Ïè ¯FtC1-("À6éfüfÇëË+ÏÕâ+ IØ\_os–VrD<͙Ƅ ^×@¹ÔDZ'R ¤qNÍ c(3×ûÎ qDab•*2)Yw3±iÎvƒ¼ªPeo‚_µZŠHk®EÕ‹”ƂӔi ›Ÿû ƒÐNд;g‡f›ÇˆÚGuüqƒn2!‰¼kzû‹»b»o4?y2r™dá“R™¤kn!Ødxoõˆ0õÅfWȹS;ñ>.Ëy¤Kš»Rµ‚¯¾xÈZªe¿ ¬¤J¥/,úŠh$pµHt˜†2à à»$†)o dñÝB±Oˆ!¢Þºþƒÿ O¢a™¹§?YÖyXñÀ2„ÙuÙˆï†àðTg¦Ù~+S¬J=éFÂѪ¼Pæ`§dé«r>ßFCZÒQ•ƒ°WlÑs]Û©”/MŒê˺‚_ËÌ fêÊ^Vô‘?b’‹jób³®³(™€"ÊDÕРnZ.ó.Âh¨ˆ%Ö¶´Þ"k–;⫝̸™âqº8 Õ,Û¼' :J‚{ëa~Žf[í¨©—8b[Q38L‹´ˆ)H^6í!¿î‰8Øj»•ž{ûMÌ×¥ ý¾ÈÊ4ÛAì^77·ø ä(õ¥ozh°à„5'7guHTÑœÍ\§mZèá!jeR½>ooZivú.ièbn¹‡ývfaÎîhšË¢;ΦÛJ*:˜$9Â|“ó(1UíéSùd³y<ªiȵ‹%X#CùÍÛÅtÞc &W:ƒ—{ÞÚÑToRæ?5Ç®“S©Œê¼ÊIíõYƒÍ^DçW¼l–eËñò¤˜ q‘“Â` ”¤<‚ÂOcÖM,±.Ëc!ñ(‚ð‚Xev˜>¢`Üñ0W£³5¾þà,±/\RL1Õ \pWC_Úå}Ǭ?¹ ¼ôžª[\JL§Uvf¡bƒpÁØxV‘/· º@*“  ðŒ49‚oâ7£o™ÝJ·›«¦U6ié«o€ê&àÖ³=dbÁM`0ø³¯Œ~’¥%£¤K ƒ¬øË)ÝgŽÄ”‹Kü±î,0Øœ ìÙÆÚ,nyH@–æR¹¬’@>ÑϘ¦º”6HÌlTóøYúKªÃxý–$•¤í:$‡æäÝq[°m€±Âa.¼,ÊÛ/iȮѶŠ-”âepZÁÊ8†_ЧÈãáMOí2Eéöã Œ0Á„ª.ÀpѧÕü¥bÈ4U¤²î¿(f»iì+kòH¥¦þÎBl&oU†QñÀƒ¡ÔH¬ê°S3Äa¾XtÌvÖÃ!¥Åe¸jÉkVÜŒ™ô¾w„o3WÍתÂÞ÷fÕ²üÙó1åŠÜPˆ$9¤*›þR;ý¸¾£étr„+SºŸ¥Ú ódÆz!ºg;Èü©UàúNBØó9oJ×Ç\§[&Rzù .&°ë /"84Éâ5m&¥:C¿]Kws®óÁlaÏÓLÚ¡7…xÕ÷'Wss“è *øöx"šlîk$u@®!Ì¿}ÍiÒF2•€Rrµ¦F£:Ï~,ÇuY]#|û1Cd‡¯9&š8 ÓQðŒW×´9½™š—ÙÆÑW]²5@ýX:V·H%)Ï#8 B’ìî!™4ÀÑR(ŽêÓÌp*éŽÖ§ÒV´\!´í,sê³ ‹oTšC¸,^ÃpÝ£óæ3d…R&Îhr#J\¢—!žÑ*CÀc¢öîó×(UAƒ °u¸yD§ ÑæˆêÑ•`gËÆ¾"b)Å!te»£j²³ ÷Ì©‹¸ë9" Y!Í–ß#µpÑЪdVæ@’Ü1 ƒ´ÙkŽ@.æ\••IW;ìË€°âˆ% K®ç)ÇpŠæÃÐ#1 ®ä¯0%®6k=¨‡5¿<(®w•dË}ù­µÖ@ž?×ðMv3¹&¡¡ÞFÏ˾×G0T¥èæºÊ„ >@೑÷ ¿ÀÑo“2¹Ä„³”’4‚¢x£¤j€#ñTÍOû¼0ã3Ø(hÚ ×ì Iñréðjï8õZ Ö¾~#…úìü˜Éd7c{ #Ï-ƒ„2¹jF‘cMH²´"Âðuõ§P›ÊƒÝºø‡ô ‹o7Ò=ŒãZQX¥yÍMº¦/±â&Rè..ŸyD¥›DŠ©ÚˆÉ‚:“VÞI}ƒ ßf€g jÓ½Už8o|ù.Õ “´ü^ ¹AòóìZ¬Ò+\ÉLÉΰ"Bp./Dù^cr4ðO›š$ÓˆAB”$éFÃQÔ†Õ—@ìÐ¥ªAÒ³Éü#ÙcøFíÔsðÕú N&-Äаÿƒ«H 4PÇ@ÈD=›j2 ò#~‡‡*Ry8š -‡•7úzO.M<…eowd;r·ÒPR÷™5’]Ÿ¤‚tæi×°„j±‰‚wîÅchq‹ºÑ”wÄ·±I¡—½íxWhíwÞÜ™sóëN"&¥ñyfU¯:Û¬.Äní/äM û±1¯‘í½})ƒBëœl¾¡=dÖ¯œÅ@^ž´sýYWÁˆü(>ȲÚÄ’w6ø9ç˓ң²‚ìVíŸã–Â€Øø®e¢2aæL’à„µ ÊÊÈ&[ÎezeV»C€½HÝ„‘\T( :ÝËBÌþ€XÙÔW]£íT-_lÁ ºföͪÊ'°Í·,!ò\æe~0›Â¸²Ÿ09(ºT¢+B u¿*¡ì¬ÜTÑy+@™äãl“—¯,ëŠL£{gd¯Õ3¶œLôÇÊX én¤áAÑ sgZ„ŠðÄAÄÌî<$·¡TÓIG®uð ÊÊ·–,~"¢8^í^p­ÑY4ªðn÷˜ 4Rý{é±xø¸ÌzU®ïb^F±ðcï4 ¤H»¦ïLÁsålN‘TÈ1›l Ï‘²SU ¥m•,™@íç7 ¬~ò\P› ¢óæ^]_Hz°×ø™2dzºóg;Êãáñ3Yƒ½ýsI9Ë|´“¯‚Ïx’Í.[éC/Œ5Fp‰Z³…ÐŽ k•ä”R6RÚ©,ÛN§š6e5Í´‰*Wßnc¥ ›CÐßbTóáøug̹xÒ½søà >‰ä^ /NjéqñU“¡&á¯ÜkÄcôem³êy$ †pö¾F%Úg)‚&ù ýD¤0ÿ6Cƒæ}_°g¹ã£ët €>DÈüÎ쀩½-Îᤞ}IHPÕgƒµ‰Õ,ÔÑ*mŒb)ˆh1â[GQ '/œB'Qdgý¢¼½ª»´Ë(ðÆ:¥†÷[]›£µd"rÐÉ›åæ×:·ÄrmËÆ&Ü_+¢þC`7¨§:|s0r¼ámÙˆYÌÅmºôÇ®¸³iÅàŠ¨Ÿ›4gƒbü ©Üp ˆS ª7¾¢l÷@Yqã@— ‰š5ÓB¬óëDwkW¤É„¢C˜mñ,šþ?¹g˜lJ{»‹ãL 'jçïó´/œ QºÑÈóZ:½TâéiT«á‚/GE Á5¿e×ÎõUÏ@â¥8—;éÆsOû[gLTËLòQ84ÈöqH.wöædÝýÛNQb@ŒØœ1ï˜(l,À0.‰¿Sˆ/È1óúöhéWádÕÅlHXgRÛ7ÙtZÉÞ7ÇMJwµÐ%.”-uZ¸>çJÇŸiìô" j’;=pžàL40SÒÄ.Y.0K%^AG6jÛ>9•8žNH¦%’Ÿ§œ•¶úõ„+&›¨ ”'¸ò åÌÂÁcx¸Çã29°áφ[Uvõ ÙÂÙknŸ/H|¢æv~9‡¿àÿ!M™ûÅÒ×£¸ÿèëù* CS)L Cд{Ã$[|8WñùvRÈYÉý ?U {Z€žú…¡”TŽMSTµ\Ÿá€æå{IýSeËðsêL¼}ìT5ü‚ô‰þI‡Ð÷t€óAT|€ËQ²m˜Ó½^Ñ-Ñàˆ4á olb¥‚ÛŽ÷ãg }~Þ•ô÷$u%öß¿Qkƾîî¤4BÈŽŸ3…Ôg”ˆwXh(Á%˜·ŠL7 Þ¦Cšî[ÌyL:óî›É:‹üÈÝzõ÷![ôì;ÏK°Fâp'—qþöw¢•Y‘Tw†Eq»b4@ û“•UD U¯†“%`]÷×1œ`¬ Ç\TKNPÁ_+\úùÎÿ/%îÍä¯ÿ¹AõY{­óxÄÅ\¾É•Ž‚‚B5óFpc‡Ú“ÂÕ¾¬¦42pØLüzýÚk ñ,fúPIÉfOË\ÂÁ@Eýÿ“]õ¦Lƒƒ]l-÷k¿òÝClšsU÷ù/uü[º‘3ôÞ@7­…ÿ®˜È/QfûÞ¢‰5Å\4ä¸5䃎°ß”qUZ|ùiÙ,ð„=ä ïÍl•i~ü%/TQî˜Èä˜K–²[hѼo™(ùvVY& Y ·6{Œ re¸ߊŸñšØoÐ(QSTJŒL ¢´Å­°TvŠºCÃ:¤ßÕ°¤JtØ•ÞÆ¡ÒCµ:ÿHß‹@‰PÄ Yœ$µA;,B‚³µé¹Üa l¹N§0Y¢e߉?q›ø3¤ÛRU¨wƒ—jð„Á‹7DÃ䕌a{ñ Ò±½$#¾ã¿ØÇ‰¿g3ègÌVÁAœçÃ’3ò¢¸ä!3&×{;Šêµ¾[³Íÿ¼cÙ©Èů^!°¬àÏ÷yèÇàNRc.Ĉ¹µb/ŸÚ9Ü®Í!ŒÀm‰P?r‰¾ñågÚ'ÕÑ×)q.kô£Ä{·.ÚCf@WÓˆƒv@=ý²–£UXIžˆí‡oT6°Áëg>Œü²m*÷[z„aô ~  ¸ÀXEf3m߆‡ªMG€ê«ÌÓª/·_‹”â^~ NRJ”&‘l•Ÿ»83ÆI/Ï’Q”Þ¦zÅK·Œ¤éäIŽÌ §Æ.È›Rkó2ûÕ¤‡P[ÊäÂö…œ8¼ÎÀØÐl] TßôæßÀ‚œo½‹2.‰Zcºñß"Ž-™!{f­^'/þµyÓs½QþC+Üg;›°>ÅÖuO»yNýýø”æûéÝŸhšr¬(uÖgȪ–¶ ëðCQr—‰šL‰:8°Ä$IðØ6ƒ·¡¹%’жéÓÇF«Yçú²tÿRžÛ¬žÝ@;viO¡ÛÂâ_»§Ü:PáVø0£ßB·’cò¯'wÝVvî|¹³ó0̬]/˜À“0‘µk‡€ÁdÂî¦ÿä!sj˜;À´kë#棭€S¯R­Uù&ËÔßR˕幀 ì¿Ô㘣ü~•jÛYë€þte¿ºÄÁò"”'zþŒý(:‰8±dƒìÙcñ>»äQ+ ªO¶Ó¿9ÿpéukaêÆøâ‘gÚísªïÇ'Uofö¾Önd”ú™)†äïÚý o½H@®”WëØ×°Ïq?ÄDÃ#a ‘ ¡'`m&Øcä 5\«'‰ae°/ÂqÔÂDZ›_zš6fv¶n£[0F_(Ñ52ôT‡ò *§9UmAO-Ì¿U'Øy²èÅÍ@EI âr0/tµ`¿2¼üˆÚ:ŸZú*Ô8¯³ŽfÖjÝÌh¿ú‹W;ûŒ•)¯=–ÿÊqzæÒQ»îv,°Õ{dG&w÷‹Çá°ðLjåç˜5g ˆë €þ~qòÑ÷ÇLÏÚãN>lsƒv¿œ/>ƒUw ³f?MC<ôÙh–êú—8‡ûŒhÇ6q•ÅhAj*Ä\ñôòƘìMͼ,ÂUzÙ¸{, æ½°ôWÄÔÔŸ K…Þ{æªFRQФjøýäÜmMg‡ú™šS|?1©ønj£ÓÖ89'ÿŒ,ðÒl/×:¹YýV"'  ”2¯}&ÿ7µœ¨òµíª>nêzÔ^¡ÊH=Ç>€ÂSÎþkjˆX4škó§l½bâ& ¼Ö.ivg?9¡f{–´µ5´E60>ÜÝ4yL âå"å¥]ƒºÝØüÁgö]K:¯=s:o¿’gŽÁüÝØA}i7R.áMçQÓGóÇñEÆŒ6ÑÜÕcW=æÝþx÷X’ÏÛÖ]õ½Îƒ<|=½’‚:ºKQ'戟/‡"Ýd¨Þjäu5¨! ïÛÑ5¹á½-çÊ’—¤;kÚh€¾Y0.RëEÓ—6]Xû¥ƒ—î¼Lkñž z¯ËàãKûeiQžÙ_l›`äyw0ÉõvÜ7‡‡.Ž—HêRrʼnÖL!5ßÊê@÷§t¬RjVOÝ^[»Ÿ]¾öºE¬>æÕ_Ó”g ¡©zÓ ÚÔ&ž˜j¶2;Rú“:ðVµÉ( ö:l›©–Š£<@þÛ¢"}“¥jC¶Ñ+Dþ¸ ¼­Ðø“¸ŸgšU6~Óæ€ÄIÜ3 Sãïð×1uÏ9kÁaÑ ¿²®ßmÝ6𠝂‰},ºŽHŠÉ]‡¤±kS4ª´V©ð^ŽdŸ´¬cCWpeW§§UIJĽ”š̃•ÁÇ ÅU©µÕ­Y9k 'Aæ M?èÌÆë‘¶ BÏ…aìP@Žª¤ŒÕ‘É(‹’²0Žl¦ j‹úGâÕ‹ SS‰Ž³w«ÕYê®VßÇÐ5¤,ýdîÓ‰ÒvºAÆÔ±ÕÖ±,­øSµÍYW*ÖîÊq”NȈR÷F(¸ÑÌ€PÀð$XîÒ„Ó5}Tc5vÖ^‚›ÕWtfå ͱ_§ùè¥Kœ<¼6¾(dÚέê¡_h®¡œ¯Ü'*/œ¤ÕŒ®®ý;ÄV°ø¦Ðë «'e=”÷„»×­}d€pÒ7@=ÂÓëzº:ôn˳ݟmµÌÚiF·ßZ}!<Ë«œ¥?VRqöæÿs’–œ#3Òûxv†0Wl#W–ŠÒÑu5è}îæë®ØDû÷mé'†QÇËÛ@³´–:¤sÃ3Þÿ†(ÀüÅüë»=ßmÑt]?7¯;í=—ÄOY6´>-G×GÑ• ×+ò36XÛÐ'SoŠ[rv$}X?BzQ=sHSkÜÉlhâ.7g¬è˜:ˆ¥á’Zû¯ÇV—)N‹Þlß³ ’n²¿Ãy¼“ =o5ø!çâ¦"îËñK Ý!×Va¹’æTžØî)YP…?hªÌR‰1¢ÄãÞÄèªö\¼YD*ÚÏod^ê,£žªéÙ—]ªÚURA- ~ªæ-š0Ì•†í£,ˆê¢Þ™N6·¸Ê_Ë×pÏ[G=»‚Q:Ô)]Rue27ú»aLè¥*UXš"âý˜"q+GÀläñŒ‰9ȱvÚБžkD´‰m<’Yò{…M,*ßM­ãuÀ„ö`ê»Ýȯ.øv$o9èW¸*qØß,W&–ƒAš]¢¾ç§öyÉÔ¾ª}åñÇ6¤ôFê›1ž£]Üð4yy—l­Ï«(\åqk…¢ //LÖ¶™ÛÖvîì-<`ai0oÍ…t£NsâpT—FÆMGsg˜bøÞÃŒ…jnøÎóû‚K¦ö×íõzÊktû›Ìÿû›Ö/O­Rížk÷š¹ç ŒmÉPv­¶(-©)’8@€È"5úÆþ-ôÒNmbù}±eïáÃÊIýµ'çÆïj€;íJúÜþ9 ~Ë~kÕ|\t >kоH‡m¬na‚AuÜOqã”i¾ Þ’ÞÞâJçõU#õãÏ+yoô³Ð^œ&T…ÇùäÄWÖ>EŠ­5ÏõªçP2ñÏúsxÞ¥wþÌáKiè½?1ï¿ùoÉÍ, 0Jå]|ðJÿçí§èÌÀb79¨8J›$P7Òè–àk™B qõ0X4 öƒæ <ìB[oAu¸Âƒ‚œÏEF»î9ªåp¾ç6KíØ'šB¤–ÌÔ!©‘ ÒÄ1e \¢•¿›4PŠ(Q×KMù¿°½Jû|9©FrÝò+²ƒjÄByo×·lkóW™ ÿi;rëw%´¸böý¦ïÿ`ûaþˆNíFé¾cÄÐ|‹ofïßmiX²(–?esŽÓ ¹=™¸Mú¼v~ƒGš¸+Ó%·ç²bQíBQ)iuNÐ̼nZH­Ëǹá-¡ Úç’Di2Ú£9=±fZ†Ú½|jÚ»72?S˜^—§èÁHÕ¼°i3&åcK`‡[¯û!C š*ãÐiÕuÚ½GÉŽäR!Ñ”!uTé£aT&)è¿wa~bî ‘¸ _y ÈØÑKÆÑƒÐ¤ý˜Õà.Àçw½Ç lÙùÞ<Ï“ûÊ[zT9 㪬ÎìkL5[õYö[û{iÚ¾±ïžéA°f6­ jüß6þçïÿ_ã³%~öz"/ѺòMð/±²„…°Õ´îEì׋y…rçÞ,­í oðIçû¸ö[Œwl½`Ú]¶•C…^ö'o§CâzN*Ñf÷e=>¼ô,ZP˜I`Ï 1Œä)ÉRLÚ œ=íÁ•L½’lYCÕã䦔QC›©m¬}s=¡ ^(J®ÿÑ`SÍ–Jù°‰¨úØ?®ú”£’-¨ÞøÙVóbD_YŠ$ñ¼Nù´´§ˆú‰[ùµì€°¶ŸºT_O=ß5eeª¨ë¿ðsÁR]NB÷“§õ}T]uʌ͚2««é£é Ól…çǬՆžÑU÷Q´[ÃÔÐôÕ˜Y› µ¥iÃõ†±~€n¨Áì¶cçþÚtžÂ¦^h¨ÿŠåý‰jYùñ(xÂÀ)6‡¹£n™n}j»ºö: ²ÙjÔ€ ÏæLšV¯Ï *ª»a8«¨i¡)”­4yMâÞgäÕ­Ôì*¶“8àà‡“èM‰#ËÛ',ÛåýYƒ©ÈZj»;„ÖlN `qJ ĉBq7ÅT“árn± ª4r,1ÜÄ.ˆcuÐÕêßñ¹~èÃʹÏì#ÿêl»È¿xË¡¢+¢àõlp°&I&ËX&ô,çâ­wg%ŠÆ$Dñ›€ÇtŽ5÷üë„rÁ ª2;€p‡Ó’»ã c2'CìYþ.'ä° âÈÑ眬5P"ƒ¬‹ãóEAì>ó þ  k‹Ûý æë•¡Ô ]ÔJ,7!õ¿œktQ´5!'+UIea*uòîTUÉ4û´\pâ»æª!šÃ0Åý#ä§²$K´#Ú/ÉJUЉ˜BUvsJŽq-~²Þ·5zr¹BÞ”&q ÉõI k®ƒÌêÆGÿz‹º7]/ÃXÓéX#Ÿ/Ïäg|Vn¤WR£ pÓªõòv´Ä¾qd/ð]4ƒbŒû!FžÊ!Ë´x{¼G1ÒRBš2KÈIU¢×ßÚ“²mÉùZâPÁצoí¬™ºŠÝd}—­gðwÄ|gŒ’&$3ÏËzK'ýtÇÅ·rbfÕ1'x Ûn9é›$i‹::[vn•íiª*.¦i=Âv9»tJ*ßI‘š“_Pè]ÕÎj©€+ º‡ÑU:6:ç)ïÆ3—<È·Ûö ï·ñmß¹y;gævf7»0V÷Ö(çNÂÀ<ä}ì1wÍ]÷ÃúYÖ¡£O…®à¥qTk‡( “̇ð*D´ŸW¹ö’»z<ÏÛpÀÜŽ‹¾{Wôƒšå©P+KR¶jž $6u67YÂgNÇ]¬y¥³F*~%œ³¢º?14*1pþýÈÈ¿&sp‘È«] \^ÀUðÕŒÜv¬dœ“¨òç~ÇÂrãá× ×lEªÎ5ÀæBíA¨`;¬7´è=.a¸Èï.œá:ŒáÚR˜³i3ѓ㦉~g|m„> ö„t a]º§í¶ç ¸wq‘ëÏ=Ú®¼(õëÀ+å¯î»Ä éÑ‚À“îË)€XI ú,áW‘Mü0¾‚ ˜Ä ú7ã~×B.iM‚giHl»ç,ÇRüÿhpìŽ:KS ¤HäÅá®^¢FÆ.Ü;‹B&1óÒÆ#‹Wás8¹Ù){›öBðE¦žä'eç&Tðxbºwq‡Sú™¹×š]RÜUS]Ü]RZÒ]]SÒ௧CÛgCÏ…ëå:pñm Ëw5/5  ‚¦½kv÷am3Û 0WÓQƒ>ó¡À ƒ¡ ‰ë\žNàÅ9˜5I¼ÁLJyMØü¶ýqê€ ¦_4ça "hÁO죻çU˜jÄÛÏ.bâz_‰•è±51áÉ×|hHz¾>.n#?Äü! •ïxDAÐ+[ãXqׯ"BXG¸Òâr;×"Q_޶ÄvÅd†ºaŸ«>·})ÕÆ°5ð'Àé–íV/nÅØŸba·Š>¸=– ¼æ ýü7H»JÙ†'LŸÎ‹‡±º™8œ†W cÅóÒCõ$Ž!„þá‘ÓÝÚ•ƒñçç‡6NÇøq{W€fjàHä¡Wjí›ïÑhó¯H‡‡ßÏ+f½ÇÏúÿŸwŸ‚îþóžKmšè¦R^Ÿ§:œÀ⨋-!Ù}Ø~wúÜdô‘>.ÏgƒOùg–y¾÷mm›Ç©ÿI[éy¾üµ|®f> ~ó,˜pþûÏ.¦.q’o¼Ú±)ZÖðþ¦áÜÿYã8<óPy¾þ<]÷õ$û‹~¸¥Ë­D[à\ƒKn¸˜û›Äë… ÖL´%¡4°IñKäž–šJJ~¨E]*7uÂbîDF·7ÙŒŸé0<ü]³)`¶ªnë^Ó¦ü§ÛÆn‰*+BL3÷VÚÔĺW·o¶ÊÐü#XÃûN™Â4µ<ïÙUöÏ™Ã?otÆÿC·«Gð¼ýè&Sø©¾p üŒ‰òÍA%‹CN ry,ÓñòR[ONœ€ìJâíÀí#³“]ɘÙ%û+(Ö«¦ß1a¬'¬à­ì‹Fö†" | I÷_H(ø"92!N‚Çàeaƒ=ð>ærÁJžhʤ²b G+Ä2Ì’ÆaŠ¥Ò2LcrÁÔ:¾põ ß›çš™~û%(ømˆ½FÀ˜ŸÞ÷ÅË[S/Js’¶¬¬º³ôIîv}=eóÝvøóõ%I}ÒÔ‹i[Áüû>dwvéiΆïmØ{Í2 Ò.°¿X­¯>à®fCI[ ©™á\Pî;ëº8ŠÊD / ªá5ñlÝðÙÏ’Œ /šÃ—$8BŠ2"ob½Ò—°‹“Íœøò/šùÏ24CMÚè¯é館I|¼CSœ¬d^C’³`$oPô|ƒPõî̺5F—pç÷Ïç7¸ã P@¸Ï®Zu”ùÍùòÑó·ûãðÓ>-¶/9½ŸÕ¶íëþâšãìÏöêÿ›-«Œ¾Žãm?ú|[¾&¢|çðc[ ¼,D²êtìó¨}|vàÏr?c"”J2DýÇCaÓ8ò ¦<Õ‘!ç¥åÑŠEœšmá±°¿_Ê ‹·+ûª1šK7Ų(±²T<^¬&—§V¥–ãÕ\¼Ž&1´8³O\Ðñ³JôKаHN_Ldó5Xú¢³©Ù1—YSHí“ȨÝ5ƹ4Ç=0ûÄM.ÄgÙóès&óÓjO¨þâÔó€û23þÆ®`*73:ŸÂÓ¢HøÒ„âÔ„”„ù¶‰îJC€#3€¢ƒHÏbØ8ô©ÿn€ß½d…—J²M('EB®ÎÏ©‰' Ú¢ËHÆ\Ɉ¿<ÜêïxsëÚÉ2M;cB*Þ”Vcæ¯5ñ=úòVµ \ ãMjøé ùÚž49§£“ŠB ›¼W-áœ×&R¨…H;Á˜©PNjÕÏ ø­K)}üW휅fœICBœaLXTO'p«'"sBøtZi Bìb\U=ç"ËA‘V`wX&ÌŸïÛô… }ø·è³ÊÝw,Tù|xÊe‡|â·Væ"Qôª2ÛGZ»7²øîµfäÒþúÈ›·ýÊ~_¿´§ê×YgÖI[× @¢Óå­Òúç¯U(Hò"'ÜHT3çÛ†N«ëˆyp&à8¿Ñæ?¿‹WgÞŬ©gì-²±÷64ìeY h6…™øÄ\ õÒi3„q•ƒs!aTkða˜»Òs2¥Ð¿Bh›°VüÙü]½½»òñ6ëY<˜»¿ü]ø³V~UqòÅÍoÄ.R¬£E¿œ<õSäHÞA츦©I3ŽÏuürêôÏQ##?E<]ô è²Ünn0óJ•»ËglÝVÓ`ëì¬ó[ETJ!÷õЂŒ¶ˆŠAu=ì’F…4jæÐzîuÚ•jóãE ¸¿À{H¹ê*@¯‹_Pôóâ'­pˆaç³Ñy#Ë;Ûñ¨,SœŸy¯LÐë <ž›ƒ-—Éœ£#“aËétl™LVe0œØj¦2 ‡P~¬DàpÒ¾@(q'®gÔ'a'êëBk¢R$V“m¥ )R²ÅžnRh§,ß¼£]2èåËG©¿,é¨rnËpŒd¢æ£¢Ñéé‰[v‰ücd¦i_¶þöð¦3’ fYp^ŠŒ)º“þþc†ßhíºÀZH6LЫ›³OB“·EAÀXØô wkãüF|Z—œ¯jŸdè!d2wЋBSŠl„ÆAõ¢d`RùÈwd7eÇj¬iZêN cJ][™j]J‡fT2Ê“ODm¯®ò‹âMDzŠVL· kVøÁ£éN)§Þ !M;kvòì®ÞZŸ—P$äãa·¥Bt_ÆÌKf,ƒ§ «‰iãv§Ûd´ŸÐe³ÓL.*èG"ÍkÜæ>-z+ß÷á¼0"™® qÐ貞a®Jßeµ¤nÕV´3$å\œéÞä5¥Áî¶ÛQu7m±¶Šq¾eà„¸f•—ÚçËmã;Ç ™V—Wì?kÂë\N–ŠVÙíÙ •P§ýà{òñ»Šê¶²‹rj‘MmR5~”~ã u85·/ÍCõHž£ À—‡Ó š¾SÎ…$úešÜ`Eü!!ð¯žÃ@®h¨P¨ÃÜQñØŸ!Ü-[s@¿¹w™É:QÛ¹ˆ0е õî@²¼/yô¦& :‹Ü^Â~ާ¹íJ„£8ªõɪ1â&xò´yþ2˜úTÞ÷¿M¸¤êñ¸ÿ¤¼÷’¯r‚áÕç—K,©ßðÂYMIÉ¢&ô\¿sçr«`œí_êg<Ìúµbë„@¡Tn½5‡°~ñfã|…b‚_±ßÀb™HôDÄk‹‹Ï |zºéêæb1pÃzŒæï°)¢üò›%{µ¨6©™—‹¯QgW#hœªDd;dw¦yèm[¶®ºp2]+ÃV°Ö›e¸!e{EÃÇç–ÿäz €— ÿ"º]®§÷ëxI"¥tq[ÄÎ3ÃZÞDsÙ^jaó ÓáÈêŠ6ý_+vùo3‡X+2 P yèš Vb¥I¶[Ë3®EçKRËXâ4§RPKãT[Beý?²:? ü4bƒ£8ø3ˆúÍ”jj‡ßo—õèÚÃêý Àªeà•ûéUU¤myU9Çû[X6ri`^\.æá•õ¾Ä•%‘|Å:‚ÃJËÏAw #éYpAÀGΚö¤ÀùH8=˜o&ö»® f'¾8È °sêþ,C~jU’Ö©7´¤gK»Óòôø6q.¶M£iÁxl¿…q²˜{ùÚ1RY e"ÏÈÙÚX½‡i)ØÎ¨ª£ï¶e[ÑVFñhMôWqZKkIÛAØÐ>äÜY$ï¬ÐÐä'¡žì”ï~8ï±tE ÂZˆ‹ K¸Dßü¬ö5eÚóÑ’°®!.1”»³å‹On#mÀéãËΰSÄ¡Ñì6_³ „‰Ul2‡™¡ŠÄQe‘–XöÜç§zsP_S>KOº1Ì ‹ÅßË ¦Vi—3âäæÔF÷AÎ×·AÉk|×DÆõ?EÂüË´e~0Ãdzȹ®D͆'¦»…I»“kw˱˹—f]Î]>ö|D˦ÊÌIµõõøºúÊJ^e•³‚çtÖÔçG‡²5 ø¨ìЗ( Å­ÁÞªˆÚ{$Ÿ\ãx;#ú *¬j‚H=žD_Ôy ½CDw79RëÕšôÄŠ%ámKŠÍРs·,©T¢Q˜R€ÀPÈ»ˆÉà hÏþéJÐÄЯŽCæ¿íNzÓÅ‚¼ÿ5˜½ûTÒ'Š9þ )ôÌ”²#ˆœ›7‘OlË>6–;ÖÝÑOåN«›ßf¿=¸Ö}ûÂ~t?Ü0„ñ¦1ñM=rÆÜFW¼šöax?ƒ®Ñ=j¶™è,¥õ}¤þàVãj”×¶L>Ôôõ<ÖíÜ¢ÿ¤ÎˆÀ.AÂ:Q³r­bk;æ•kjåöa§?«½FØgêéÓïž6o-õøà#ïŠÿv»Å ž·½[%“à‡˜Ãï/­<'{ç'mLº\µ®_²Y©â(îý=- (óvzß,äm,s4’˜„ì,+H…µiOÌmùª}66½)pw’íi²db´>' Éo6ç7dq)útj]ôÒH°ê̇ »º8%'j8FMÊ”gøí'-Šg1(ŠÏ?§(YŒ_QYJ‡¢9-ŽÚ£Î$…gKôw$<Ë‘œ+ÅV Øš\iô“•i@IØlÍÛ·„mb& Ôì¤pœì¿%à9C±–_Ù—{1 q,Ù"ØRy–-ÂÒÒa›U/”Û“]|ÿËdDZƒøÞ‰Ú!AÁ`Ú¥Že¾dÝïWn¬•÷MvÂFcðZïäYu5Ñ®&qÔt–«éÉä·ëÓD\oU6ˆ%¼†Xf¯ÍÓ7&ñc™žÎˆÄ½Aˆpª)šÏÇTyÉðª[;€Gµov:(`G$Á‹q_-J½¾‚Y‚£È~6IŸ„×Ä«¦¢ú_ÿ•g’þN¼°U–kI~ŸñŽâ= "•áÏ ˜b/ÒÿHr@Zòj¥ä^@NßÇN´sÎôp6³Æ'®s†Q3aËLöº[,ðùc·MnÚô^J9¹yxÛ?ŠšZnŽzŽ£‚°Óé )ê~6%;ÑñŠò*ø_5 |ôæóýŸGCH¾'—u‚¦§œñ HbéX“›,ôŸ^ë||rmÿµhˆ±éœ¸ºêîÕþUïEì7ŽÜÉýعÏÁ÷‹Æ‘T“ŠÐà*ðçvdºUž™Y^L»Ý2âº5d¼©øpVÅ"®ðÃäkCß~uÙãò0€Ý?vØjõ›®&š ð=yzOž¡—«¡šÌi­ )®_oî&ºùÄ~ƒ°ÏrÍÆ-”ŠrúV³™;ÛRˆå°Î1ê‡Jåg¦în,U8yÚ¤À× ò9­%ÙQTjXß’Óš/À×(úÑÐîŸYȉ¢§36ïšEK*–IêRÄâúæVw8Ъ ¹Óœkàlh² •¥åk ¹˜f™¾ŠÂÞ6јêØbª˜Ô[R8•i*ȶÒB¯Sc‹Y*¤Ôᵩ}rmšJ/GKµ˜>­Ó/ÕÅméô2´L‡ík%qN*_ƒÁHc(ü(‘HâÊ)¼ FCåe[¯õ‚.˜Û_ÿa0W2+#PVy2X'fbÅ!sUÊ¢ £)-À¢)³j òw`:c‚çô7õ^ýF¾Â øà/Ó?=¹$æådTrRKù9JRvZ@œ÷1[çÁðøAÀÅås6¦ÇÂz€¼Gú3&‡o:„òEuùˆ*«ç´é)ó×ø·°§juPC‚Ïyy~_~-{q¨j+0þÉÏCê÷oÕ$¤½>ÀÆ÷ªƒèæNŒDÝ«A[1O!ÖÏ.¿ÛüIH&–þÔ|m‚7¶å/¢a*¹{âyÊêŽúŸT§§(ï±W*<ø­²Dkúã€Ã+_D<‚Ñ€(ðÔFÐbM\ ZxNbô›Ò"s†\}ô£Ó¡ysu´‡›Sµñÿ:·×u9A6\0o/uS\9˜Í ìU?¸åý˜l¦…ûò­9N c;0;iá D#i!Š B”êê¹þ~»=ê8zS²ì±Ór72ânU®ßÿeÞ ŒeÄÄú;ðà{LÈÀŸÊoÛ=À©Ä4K6P®+Ýžµ"±xи3’òÁÊÅÀ—JÑå–’þÿZú[©:ñ »5ªÆÅláuý‹yµ¼Ü™¦¨šášCt0ÈmTÅÚ©Ýíé\©N3Ô%»HíÔ¡š×Ÿ€zÄÓ¥á1pìày[õŠ®Ç5ÒNºgíÏzëy£n‘5{Á?AQê)ýîÏ: ÐW3;ùxnojŨ6%ç3°R•ù.B‡¶ü"QO–îҤü¿ƒ9±"CHÌ÷Ó €%Ñ  X Š—¤¸«,e5ź/¦wR÷=$e>£ \™áH_¤Ë .K綪L$Æü:$%•2T`+«¼W8ã2‰+=ÊHÖ{ùq•ÚÀéJ“˜UR’ô¥ÊøUÌOPåçe¨ÀYÂöñUP•^2u{¶“Í­;°‡æB½ì¸C?Ä¢ Ë_”¡ ’EYÊ%ÛäóÛ^  •ÝæW:m”Ì(»Á¿ WF›·Ý ï*à¢çéb]ªËu%¦Þ(ÊSÂÊRòGæ¡å¨GK⼌ñpœ7œ˜E òÕnÝ­wχ^JUé΋âä©`‡µþ ó@<,&ÃÉ¥Òî¡–w[V,Ô ã*k 821~±A±ªTÏÑ õ«y(F)ÊQ)‹m<-f»äwH™vߦwz³Ry˜èådNþN: Ç´Ù›jñ2ÿ/Êk6œÐå ®¡´ƒ#×dà YNh‚rgÁH䙿¡å¨ä pñ¶=jÂì3˜'¥6Û„²0®AKg瀆\êâ`(½§ú¿nò4‹<3Xs;€·½ücGþýÈ_Wùÿ³ÿ¼iýûô4ëϯ{ž»6~E=€oý{³SLEFß3 7˜Çè[¹.¹þò!ú àõz+ÐN:¢ûT°]g]YÏ‘¡ý§Ö…Í{£ÃûS–´öù°•u¾#@³½Ò®jž=§1—P{ûæøC;.ëæµ§P 0Ï„g»Ý”"ýÉ“·§ûñlÇÆWš³ö3{ÿ¬æo« cJ´ ÎÕk\ìïž’ ö†-̉ó6ëcOÓW\üõqHµ¯dòÄíe ûØ0N]ÆåàâCÜš<Â.ðŒæ YTZ ÷:j¯hW[yWh§_NWhÇÝT1h½Ðv‹ÛÜ„š²/'ÏÔ~æxÒÂ1¢=À÷ŠæôyÄÊkñ-§¶NþX‘¡ÚÚõ—ÓôŸÝ+1²â+C3 é[+؇—ÔÖ}x±V8¢‹OW`¼µúŠ‹q }F<Õ§ŽÄÒM€ñ¬%k °ÜŒ…¶<¶é,«ò˜E £éPšNó|HTEzÔ,rVI=¶ èPšNóšVaã|¥"¾kßZy[µ/BÆ&ïDW•Vàîo@(÷o äò³ˆúTaYŠ?£‘âÍÐ’ÿyÒ+¤còÔ¾ÍÕ³]ÐNsÓºh¿5ÄeÓZ ¿{Yí¢v÷Ï{“>ï½¥£éPšîåY½ho´QÎ~«¯OçXh[n›2| ç<}¡AÀT-Õf³$›Šg(>³aÓž½IõäU.q>ë-™ë-Ÿ×[>­·¢¯·2Ÿ´BK7]ÑB‡Òô2—ü¡ŸÌÏêa¿_r`/ú¹î>ÐÏ9«,ëÏæs õëþuï“_ðßÁš•à›8ŽkóGÆ­ÿMþMZ¿^ù6`à?kôÃ0iÀæ:”SÜÕìvÈ7<ÚIÀ<‰å´a±š‹0>¸=—zá†Ñx ¾)PÑò<µž½Gk:ý4]°«Ïf9¾ºÝ§#8ˆÀi ­ÌM¹À±Íâ)¹ÁºTêÈQ9²ßËãÔ¬zØj³qYF³Íé<ÏÕZ}’:îa “ÑlQ· }.sm«ëí¢Np£kîq·#ÃXT‰ÖÈ™.Øl”§ð‰Z~XèÌwἤªÀ4RẠ¼u»è;õ¯‡/:ËÚ¬r,û’±Ò¹±²¸YüoNâf È)í9YÜœŠmú€Ú>š¯ i]–»ò(Úáðئ-jÑo9}•£íìþÀzû@ážkDÀj­˜­‰OëÆš,Ì!à3D˜Ó1 V]†å=ŸåÓZ`…ï¢ÏþšÓü!½7q} ØS-k÷ev†”ngU˜@Êm‚| ȵ¨Ž¼2Ét˜Bµã­à¹Èê¹)Žz^ãVÞ~ÀbaÝ À7¦-f¢ÞVÊí¶äÂu"§BÌÞW:°¾Ï¢+Ú€ÖÅå&ø´ÿ`·ÓÝg´®as€f%8,L[ÀÌo·ô‡²#ÅL#$q¹þ™©¦3£-•¥3ÑDH —BœoŸ-¦ØJÏ2¹6©Fø¢¡ŒqœÍ$M6˜:.„<ô½†(ƒDJ¡\,_Êrá\ñ-[“„qB˜6Ð,%ÐÒ’'B„¯® IâˆØâšlÃŒuµUmçpFC(knÅ$Œ| ΄kLY Þ„ å^´å˜@.„Bšæ4ËYÄ)™›Þ¸,óáøîêH±î”ý‰ºUgzÃð4pÀÅÿ\ x½‰CaB5ƒKAIb9¢Ä0ëuOîJ’{ÿÒ@P!ÊxbLe57†}]‡È–˜$#‰2|çǯÿŸ¥ ½JW!Œ@ ± !äÕ{B[AAAAC›@ÿûÄ0v7Ø"gàÛ¤J2* 3KC`‡ƒòD?(H!DÆNöuÖUœp¨#\_ ‰Ÿ ?¾+‚ÃS¦kÅ%)šaÕš´´½aFLk~Gl/²)õfT{@³‡FžÝÖräÖ*¢¦Uªcäuw½Á[ïò9¨¾à+âøöúPi®‰öè+k|÷ɰ-ÓÉq$€ ’kÁQH÷M>Ÿ»„º›œ-aìµô}O ­Dv’$I’$I’$IRî€ö6@ÙÿS„.RD¨Š©ušŸ2Ãë¤Ü°ï(†2Úaå´À#(ªNª$£BІŽéH³·=rì!Sf̳‚B`·ÐðÈ–®ƒ’«®ðóDßpCOˆŒi]c”j›nÿ´Àè¿¢´L­¯IkÚkÀô6ÚŽzÀZ „2žXýåJ]Å5×SJ@„ e\< ˆ’¬¨šn˜Vöfzp½4PÒ1 'HŠöÆ9?AD)yoUHGUô£ÉlÙ§Öåž'n^|¿·‡fPSß)ìß _xëèòe²tÙºl]¾®•.¿àp—íãr¯Êæ0fo½óÞóµ»}ßý˜-vöëîM‚Ù}JÃÍPcqx¡#ùÈÌèÀ‚ÆÞ…ÒÝ|ÝùŒ¼ „2.>¨Q’UÓ ÓÊNڃ뤀‚Œ ží4ÏGR(Ôû³U6Žªšº†¦ÖÖ1:cq8ñ Dô¥Ê2hjZzFæV¤oÛÇA{Â'8(!¨H¬^‚!Uç)Q™ âSR~Œœœ¾Ó -/A!„2.¤Òƺ8ãÎÑ|P$–HerB0‚bxÊuE-¡h†UkÒ.±;ô†Œkí‰5íðù‹°½?_e+U5u M­.²m>à.7íøÛ®«Òþ’A†•C‡GPt7©’Œ 1*:fVSìpPžè£ÄP‘±%&ΚÊð?yØÓxdAœcŸ"øŒ‚ŒÜ­\wVÈz}¸>·çó^g}Xn¡5Ÿ*''ËéD[^‚B„ e\H¥u©%˜ !h\Þ`4™-ÖÍ®ÊânÜ?9Ex|P$–HerB0‚bø.5ÕˆµŽ„¢V­Ùí~Þ°Ç`Óšgï¶w‰ïΦ¯eØaE3)‹tá”—].”x@í˵Ûìï^"êóñP»ÌaA€Z)BÙl$!1Ò:{|Tº¸5dy‡ØFÀ¿½¸*ÀhËÊõnáò#`Kt]´2ólC¶mÛ¶mÛ¶7Ñ,”_32ÛÉ6Û¬~M-¼Úut5Ý«ÜÇô…IWû­¸9„ï–‡Š6Íx(Ŭ¡Wv‚\ A˜PÆÙ¯ùDIVTM7L+{¦\oDºªÌ[;{‡õ~¾^Ñ|µùjï¯#º=4l—›ëþz }‚-±ám…5aBOŒ¼Ê­ ¦Ñ:FfãÛœµ˜-˜‡Àçýåy(@3ÏÙmBH&ƒ+G  ¨/©’ –ˆ(ÛÃM†TE²n5j]û¾–s8ŽlgêD€ì˜t't{ ¾S’SYiiizÃŒM3±4 )œ€nk/ÙúŸÓí”Ì”Äò•VvP¥&åhu¯MZö½Ã€¡Œ—`å¼  hoR%·QÕ¦«†³˜Ž<ëmóKk 3æYA!°[œ…G¶toçª+·òDß †Þ%2¶ÖÄ×2M­vc–:bð±jÄIJ”ŠÀÔÜNœH;S ¼©keâºT·¢¤¬9¥,ƒ¹ „2¾Ofäˆ0¡,/uÝ'D*m¬£Ú¤ÊcñË?Zý*€ð‡¢ß‡ë’/I¿'û~ü ˜–”­¶tëÖLÉžåÀÛþÀîëöŸeê¾=5) KêF‘5€'™t¦C;!'ù\†>Æ»Jsq¥Ôíª =#3‹ë6vÏÕ\Ü>³Îñß÷Füùùÿš¦Á.”‚ët:°ƒ{º`¤¨…‚øD'hMã@0Ä`qxB(ˆŸ,­h¤F "Óå Ì;tH13Ÿö-ÎÛpKoÇús%÷tA¢§É ŠLoaúèЧ@÷b(ènðf@A_]âòÉ”·+Šø…œñ»wi¸‹Â\¿¼¾"L(Ë}\‹OT*m¬ eYXœ©Ÿ+PC(ˆ rvyQD@Áfó#l7ný éñPŽüVÑ‚èìzä0KFºª$v•…Æ`©ilKè苘˜÷r«½ïãËç+ÇÁÊÏÁ(ýì“5( e °pðŠ $’qÎ"6¸ÒŽi„ãuòEë#‹¹øñ>)*~AQ&”qñÑ ˆ’¬¨šn˜Vvö\o B0‚bx¶óõNìÝe™¸¬ã´X½—z}ﲩžKó_Ü%ËJɵï<†1g±È)€GP„Oª$£AECÇÀt䜷U8FdÊŒyVPìÙÒu”äª+¬<Á'€NY$Æ&ä® L »L§ÜN…€R>¨Òܤ#íL5n–‰ p,™OiäB0‚b¸qéç ®ŠÎC°¢8U —ï(ˆQIIv^Ù]¨ªÍ¥: ˜,·s…]zØ” G:¢Àà$ A»;çù¤Å&ÜLg\k§C²<Ÿ‡ý÷ÝR¦½Áh2[²n¸7쎜Ûî wžÍÿýHôùW#ê%ŸR]dª¤–¯DUì74€¡ V® !¤dù»ê8×f̳šB`· ×)W] +Dn¢É]W(”î‚*<éÛh?¥¾äj(Êxb)W©A„ e\< ˆ’¬¨šn˜Vö*{p½´. !<¤c(N=†mOŒóøuwÐ÷M’é7Uô£ÉlɳŸ~ñýÞÌÅp¸J¾ªšº†¦ÖV1è±8@|ÂbÐReY(ÀÔ´ôŒÌ-°¶}Ì^…{P"B¼"1ÊI‡ÔcOIϰSö*N§“ Z^‚@„ e\H¥uqÎMÑ|P$–HevB0‚bxÊ¥tE-¡h†Uk¦íž½!ã*{bÍÝW/!ÿÄ— dù +ªÔ¤®î5 I˾0”‘  è\ɤdI ])SYìm{ÛÓf̳‚B`·8·tæŠ+­yÑúh@ í&2&Âzd|c{Ø"e+ÛjŒg‚£° P:ªœt¢6íL3¦ŠËÄ…ŸK;WZ¤{­;¥,ƒ% „2¾O2aBYÞ¶ë>!RicÓþˆL\ñGJ«¿ .ž#Š W$Ó£ÈfÇUq2…zV¦;{Ð]g··Åz¡ý³zoZÜ·™&Î9 r„cC&Òˆ \Ñ'éªþèŒhËqú³ ƒÃJóöŠ®oœÏêÞêa`dbfq%±±sx–qqûôféóm{Àî:Ó+çá1©’L ¨i c`f5…Àƒ§o €ºlÁhÕtÄî ~:g§ÑžðB‘X"•ÉÁŠá)ÓuE-¡h†UkÒNÐzñÆ™šsõ÷/?}eß׫%ÓÖ³nó«QGÌyYåÙÄcoC[ÁtP‚ÁŠ *¢æk›ëï–îým÷YÓÌyЩ¾Óñ°vRœ: nJ mzƒÑd¶XÃI›@Y<{ èù:m¼®¹ÁW¡y5röT³ÐA—É&½ÕÞõÝ;ú¥^_[MS5Š_ ßGÁ§6Ë·àvá½Až>­‚bÛ'8[Bîóiî.—/9ð¸Öò"˜W7Í&Ÿg^õÞPGÂ'è¾C.í¾©ª>·± ÍÆý¬Î¼ÔvÌàà10ÊÛX¾´ 2e®pK}ÙsVs?lùÙóqøoïq.Á Á Á Á Á ÁB°"Øû¸'ù»v@A±BkÒµód8{ḋóëRèQeC ™€L™a”§ Èäd¯¸¸J¨Ç0Õ±¸h|qפÊd²BÙÃMþ$*4€Ä l)…à+;Áø† L(㉤Ï6Öæ¸ äI +SUUUUUUUUUUUU;Q5¯Ž­Sy]tPÑ£ÂSô gwÚ\¹ (–·Œ|ÁA^EÓ̶eýa_ ¾ÞGMG:*~B÷6GÜÛzFvO°¥G’$I’$ vµ¾IÈä ç`¬ v HkÑ+G ¨'©’ ;ÜÔSÈÖûÃçÞv@%(þ¼ò}¯]Š–— aBRic]NÞÁdEP|¾rÿtÌÚ$/ŠÄ©L®@FPlðNel šúÇçÚòÖ?}ÃÅÍã{rÔW€ 3…YêÙÀŽ“ûqÀ­ËÀ[>»±»±»±»}öD÷ƒa¿ îIžËøÝ¦» Œ X¡£Éÿ¿v*zðY’"I’IR¤¨Óèx$ùê¿0að\ˆ 1‘K‘ÌLºpK]kÝzwq÷Þ?êBüK£Sì.<[½B£¢ºCo0šÌk8i ”ÅS÷Y–OðüfhEš̙û±S@PŸô[f@ÐKšÏŠø.?xUkW·´}nÉzÕt½)|CƒÁá7ÑÀ`ðÀçåÓ*#[þ©9cyÅ.ep$smE¬ŽòÔa(ʸÄ5µHj×ÑÕv¯rÚ/üKºÚoÅQ ôå° ”ftH±Ëä *y &h„2ž˜õ*·‚šFë™ÍÝæXäÎÏs`@ ÁwÐëÕ´7ê­w³ªÌ[;{‡õþã¢"ð6 4&JaYüî.ç®õ„îFA)MÏû I’tá—ÛØ’f×ÇkåˇŒR»ºáº¶p]×%jñâù9ËdZ¹¥vµí×yFúÏ4ãO;ä­úý->þÌÏ FôL·ëøÂ¡¯ôzãÇ'ê;ÔÂ@ @‰(‚ Ò嶇5F’DëèÊοæ õ5%>ÇKÝŽ®Ãk¡Rèi‘!XÆ™—†\—(=5X欖< úŒ¹C A1|G¢oå»u·,wƞ庛3=\IÉtwŽ®Kk ëV˜z:eH–r (IA ¸<:{XÛøñ (¯aA˜PÆY®r+¨i)ZÇ8±|ÔéÁÃ…oa+\Aɾa‹¦9«¡­3 ™T&êÂ¥«ó›ZOó²üó†‚PxaF¡pC‹Ž3wïÝððÑã'OŸ=_­7ÛÝþ€4ŠÖ»?å´ë`9hž¢˜ö´6S‘÷óÝÊ-£Y60KÏ:Úêr¯o4bÝÖ(NÒ,¶ToË¢•ÆxVgßžñˆ¥õÇ#˜Ì•µŸyʧ\¿+P÷¦÷[ÏŒÏÝúÎV™óõkOˆ¼øªÆ7U3¿æúsUÓ'/**ô¼êhõ:ß\TkgÛ˜÷s¨Ü3^< ‰a†a˜Õ¬†aVÃÍj¸a¸a†…•½ûUUUUUUUUUU¢Wâb1ü=Ƹ±ž BùQÑÈ’EF–ø{‚q’_O¡ü¨häëJû2XKG!†Aˆá2ÃeÄe„Yeï@Á¡ªªªªªªªªªJ¡•™>¤Üö ?_RˆôPk™Ôiû ex°rTxE,é$9Gˬ ;üªŽ•Ûo·(:"@i6¨BAkÜ2u½B²{a:Aö†!L(ã‰ù®–íj™Â„2.²éóûxffffæñ/o˜ùåT&ÅÑÉÙÅÕm6ñƾÿ™‹ØÆ„p¸w‹5Žã8°û¤UYQ5ÝÀx4™wËcÝœã®cwÕûCøC“gO?GVÏÞ}é°Z¾m& g¬DYvÔÓµPÑáA«ûgw”W4òõl”&Ûö.ÛÑlÏC2i']“»¡ôHÇV~Î8µAWÐ0¦I”ÅC[$óÝ^Z¯“}3eY–eY–eyžçyžçyžçyžçyžçyžçyž·ÞÉÚB;щNúŸ'ëc8Ù`g(£ VA‘0é$9GhŽÉ˜2cžu »EEÑSAõ5Þ‚¹ó—êþt^ hUÿoJg'˜ áŽ0¡Œ'¦¼J†0¡Œ‹OZ!Q’UÓ ÓÚÍq½xýz¦v“[?\#(†?OóOëõlæÞVg·Ù»ñp#£h†U¯æÃÎÜ®­t#à–ŽëºÚÑ ÄX'/IŽÔœÓdŠ_g'ˆh Ó$Êâ¡-’¹‰n·¹sŸ‡aí±õô±åænûgÂîìCð¦ï#5¯ãSÛz Cñ¦­œâ­ …B¡P( …B¡P¼Ÿï>>¯ÉÏÞ9t½þÍþ3öö³ÿÀ‡½½}Û§ Ç„À#ؾ⠄½Áø%ín|º‚ˆR$WÃPšÃóy {{{{{{ûOùÚ…TÚX—‚”…;„2.ž ‰’¬¨šn˜/‚ó]Üó±xé»ÊõµjDÐdͰjÍ´O<]ú5¬"×-¹ Xä&+Ýdi›¬a¬]ÔÞ–iwöÁûg ›põËþIJ”_~PFq’fù;¹öò FÔnBäEYÕÁPNÊ^\„0¡ŒÛ^¯ J²¼‘$ ânðÁCð8†$E¹»‡vsü'좔¼Ã.¥£kzƒÑd¶teë»Ûbçð„¹¸}^¾ÿmZì^jDDÄE¿CWÁ$CX8x„Äs$dXjh蘎ə2cžu9v‹ž,uˆs·¼7åߺ`ƒ""6šRµ™ydæ¾íqêÏ_w ð4ã oxË;ÞóÖAžÇEb‰T&WäzÊÞäÖÏ'ƒ`Åð籺ÎfoGvöî¦L5âYGnͰjMÚÍ<ÿþƒåÏ5fæ%YŽG7 KÏ$€Xím(¯_UUUUUUU¥@aYQã¤ulزcÏ÷ûóôw_âëøöšm÷JKoÑuÊ„6»ÔùÓ_jä2HPEÖ  ë's‡YaB?ħNι#° Y6§B*òÅ»ë^è( …)‰²xDE+§È9-ÍÑü[ zh÷#”'¾Ûݪ/]2’ÜTÙÛZ^&.€Ó+^ñjí›^ñ {åž•{VîQîQîQî}fåž tyîZ–/p®ÝØkÖrºÚ²ýœ|3Õº¨IÐ õÀ÷oñ †nÁ0ر8@|QïÒXîo/ü]}ïª~ìö÷ÆãÙý'ÛÓíYÍ·Þ7*ûÅø§WÇ_ÿ2ÞîþjãCŠ~㣎;9²uMñ÷˜òLGt:IM¿&(î„!L(ãÏs›~L_8»ÏGæÂœ‹¹ärbÃÏÚ4×7'€Õi`›€Ö° <éN?ÿ”ýÚ{’ß¿þylgÅõŸ)סРøHî¨çÉ‹R}˜ËW¶½ìüèþÄWy¾)]ã§§µãì{ݦmÀz úb±@S… OB1F5!6Õ~=`úÐÿ©õÈíMŽÝ¢ûv§ÎÚŸº?N3ÞÎ6Î÷ƒ¤ï5LV«V­Zµµ2Ñ­Úwë¹Ã­ƒž¡q@c~5µM쨲«[Ú¾µè髃ǛòÓ¦Ûœï­ë·] è)ä5¥Å‡W«Õ333³ˆˆˆˆªªª*€™™™™[ÔÌÌ̬{Ÿ“÷þ¦_@90 ”ÐI’$I’$I’$¥òßhín°8Ÿ@l'U&Cq©i陳š†Èš'ü`DR$v®ÏY“I8Ô./ŠÄO_|~¡@FP O™®#”¤h†UkÒNÐNô†1­ynË7pþ®ÀÀÐ 0Àà°À}Ì ßÙ/{ÿóï.ƒ?*B!¥”R)¥Tî¿[þ.@¿fÌ@}Y(Àû»Í·1U5u M­­ctÆâpâˆèK•e(ÐÔ´ôŒÌ­&Hß¶ƒö„OpP"BP‘X½CªÎS¢2;@Ŧ¤îþ95}§-/A!„2.¤Òƺ8ãÎQ|P$–HerB0‚bxÊUE%¡h†UkÒ.±:ô†Œk­‰5çÑSøÛ•ï>Àœ,À±9=æpÌyïØÌ`hŽäã‹ä .ËO‰  ÕƒãñËãyÔ¶Å]¢ª  Rªªªªªª P•$H ªªªª€ªªª’TUUUUUUU’Òù»ìƒqQ(%à8q@ÂB¹ÂB@ ?"è’«ˆšFuÖ˜Ç û $I’$I’$‰¼ªªªªªÿÿÿÿÿzTUUUU­ªªªªª©g£×&$€.z¦ ¬)<‚¢¾¤J2X"¢l7R‰Oi+øÊNÍá# ŠnŒÝ òà „"±D*“+‚ÃG¤ E3¬z5ólë³$I’$I’$I’$I’$I’$I’$iw–\¯c·Àä5¥@ŒÁdumtxÿ¦ ç—ÓRvŽÎ¦.ÆÝÞóÿÿ?gÿÿÿì»ÍÂ.pqn%å+UvT­†rtu_ƒšµô½Ó !ÆK±8/Ç'íT™ ÅíTšÂéÊp6Óȳn›g,mSš5Ÿ4DögÃ#·tŽ·çª•[ó„ïÎ"B•Xc-SjµYþtDbÕĉe …Ss›81íMx©kËÄZª·BRÖ)Ë/0·„ e|ŸÌÈaBY^êæ>!Ric]7˜dùo,¾cùG«_þPôûp]ò%é÷dßÓ’¢úvº5S²g9ð¶?°ûºýg™ºoOM‚‚€À’ºQd à G¦éÐNÈI>—¡ñ®ÒÁ\\)u»ªBÏÀÈÄÌ⺆Ãs5·Ï,ßÌ>ÿji/μ§ýµ8É~­S. ¥_Ÿ¬AC 46.>š$“1Ïbˆìðœ1s±Qz:ÕEñøÏ<•øþê“â  „2n{¨ J²¢jºaZÙÙÛÁõÆP#(†g;_í°w—eâ:°ŽóJ¿zÿ9 G=zô‡¶Èø>ÿ™UR¾‚"ê74hˆ!Åâšâ&žM"ÙȿۡƹMiÖ|VÓÙ[0·%i®Z…•"›è@¸ë„²€t7¤ðTBßFû‘ú~ÌB&”ñÄR®R ‚0¡Œ‹§DIVTM7LkouŽëíF= é †$E!v¹u¾-~íAÅ}'îw{3½Áh2[fäõãëÑ›§GÀ(a΂“• d+ôVT©I9\Ýk@“–}?2`(#Aѹ$’Qñ ])G{Û^Çö4…ó¬ Ø-ÎÀ-]ÇŸ¹âJk^´>ZC»‰ŒJ²´ÆT™¡-ÛjŒg‚£° P:ªœt¢6íL3¦ŠËÄ…ŸK;WÚc­ûEº[XB0‚b¸'2 #(æãÈ¥€¤h†åü™Å)­þ&¸DxŽ(.\‘L"›WÅeÈêY™îìAwAÛ‹í®«÷¦â¾Í7áo r„cC„‰4E#b®è“TUÿ´gT[ŽÓŸelVóöJYOüó§î­^™˜Y\Ilìže\Ü>½Yúü«íùæÐ;Û_Œc¤ò»ëL?±8Ÿ@ÄI•ÉL!u陳š†È&o:±dX5Î@™;¨?ñû¹ûÿ7¨<ø¡H,‘Êä „`Åð”«"‚çÏ©uþ¬?ï2׫§s6Ùœ³.f¶5f•¨Ò«O¿ƒ7´1À€¨»çc«7¬m¦5qu‹µþ"890ÊT¥T÷}""æÏÌ,òEDT¿ªªÙ73sÿîî?""á·/„ª›Ùäyî•DÜ™OÐÝ”Ñ:ô£Él±†‡=~³Ÿ~²~î¦ÄÌ}ÈJ P³Êü—¨ª «Úˬõíü¤ªªªªúÿ?, ¦|ÝPÿÿÿÿW<¢ÿ» ͲL×`àº`^DóúšÍu[ñnéÞyÐßvo³q˜]}ŸŽ‡ÝN9Û0øv }O€‹(QÛžýœ%sö1îüëÿÿÿÿI ªªªªªªªªªª"€ÿÿÿÿÿC®µVùï]+ Ü¢g+ì6E'œ5‚•(o`å|ej5"""""""ªªªªªªªjfffffff?ÓöêêçûëvQ¥"U5u M­ÍiË t  5`ôÇâˆâˆz–*Ëd…²ÇM?¥Ì–ÖX)Åý*rìRT^aBRic]NÞÁdEP|¾2?óÖQ¾@(K¤2¹!A±ÖïòoxëŠ×uåÀA^EÓ̶åËMûjðõ1þt¤£â'tßhsÄ]@±í¡÷`dß,ï Û¶mÛ¶mÛ¶ªªªªª’ü%bÂ<"hLäR$3“nÜR×Z·Þ]ܽ÷ÀºÿÒè$Ü…g«WhT”Bwè F“Ùb 'm²xêdÎ'èïôXÍ"Î}ð0]z²H8€$¨ªªªªªªªªªª Àcm)~Ûj"´üý»žÚ½lqtm•X ÕS'C±Œkƒ˜¦-Útìj«W©Ú.¼K¸Êo•£*Ð’ÃBT ´F‡)vå^Ù¼E&”ñÄdWnAM£#³);Çb—µ'DIVTM7L+{íG(jº2ƒe;®·þìÍŽ¤ýy½ÃÍ®åÇÕ&Núߥ\¿‹nÔAúˆÒ,«_ÿÎ綘Åý‚¾ô-ÝÏû ¬N”²¯þLdæ¶¼ 6[m;—WÚò.ðµYŸÔ.àÿ¶mÛ¶ýÜk´A·Ë± I’¹n’$I’$I’$yÒ5»oaËŸ—éÏß oÌÜ{võßöYk­µÖÚº ‚³šÖÇmíûŽã•íyËÛ¹ù>­Ï7³«û\þH™¹Æøúªªªªð€ªªªªVUUUÕÿÿÿÿƒpaLÿØÉ| ù*våp]B¬‰:$çéÄ”8ˆ€$iÈ€—GÏÄ¥¨0 ° L(ã‰,—\5-EuŒÌ=ŸÙøÓýN&óÕI’$I’$I@rà ªªªªªªªªª f2xì);Èr@MÉZYãÚ@—Ç °5Üä«+î.¸Æ „2žHtÉUPÓ²TÇÈ,×óº¿íßt>omÛ¶mÛ¶mÛ¶mÛ¶mÛ.¥€–ž–•P_m3î#ŸÏQ¿OX{7½¿¥ÍãÍ}úÎV_ "]²°¼$I’ä_{Œ1ÆcŒ1ÆcúCþ‰ïülÀoz÷Z5éï¢ñ66ÐKøÒ¶â§ @Œû?ïÐ®ßÆË`ŸDÚÑ?1¬½?!’p¨6;ëqE» )2R¿Fj$æ-ul`H±L×—¾cÉœKç¡À\íV èz±6XicŒhÅ£o*6ݺ¸±­¡xg—\'·ÝŠZ?Û‰¥ÎÐõô3Ã=<–žÏ¿·“÷…ŸbÛM¹ð+CúÚO“¥ú9”2û6Œ"¨æ|¥•ZÆb¬äw îÂY™aD°mfh2go0¾“Öíû$°DÁŽm猑¶ÀŠ6†µ Ð/,$qóË÷·ÌSö·ÿ÷Ö›&_¹î+œØ\´A Ùš-J&=|"©ÓÛ–ï×=»ÀA»p+K{½0¿áeT²ŸDŠë·“ž=­tÞ|Os¥ï¦Ã›_²"'ŠÅ²4—pSÖÎ’WUg±R=Ku W*³¶N¡ˆçCäp`Œ2od+ÞóÀviº/áX¤›iÃá Ëš Þ©ž†(V²jTxo;’¨9›´{í;Ï”>mRO¸ì²4ΈÚhÔŽmcë)A{gÑ{ÜrŸ"g2§ã¦È_x˜NL.œ÷÷O¦æ°Ì…/ìÁ¾*âŇv¦c£Ðd<Šìê)ªÔƒtc|4¸…ÐÅ~µ¶¹J i >&Z FÄþQ€ŠŽšA̾C‚as‰;`Ã_Feìý1(Í'L£ F€»â½ÒÙ"­]¤Q½UH¸¡ýÙÂ’;û‡ÂЕÜ%éZL»Zˆ6¿‚[-$ qï÷/.•ÙŒŒ{gK4åCì\\݃#ØfµÍGiuB†lÓvŽI òÂJh‚n<4Ë#êb/ªÌ«Ç›=TW³Ž¥ ¸ÊŽ—|­U8í„fVa&f&Zorûé˜<Ñ.Iué»û':]¶í>sõ&¬ÆÜÓýE¹©|h¦åð=b¤Æì"ØÏÑ£T‰aAd:¼±û”"mš¢%­ª¡ËJÑÔcŠƒ[Þ$Rè%rí]hgx‘‹XêΆ<²u‚zöâ(D¾¤1˜—ÑglDT%¡2^z€Æ^V«¤·lpdù ¶ ZدP¼¬÷O“z]Ü—p̵澱y!Ì2}õ+>EGKh$X~Ù(Õc8;I% bßÑôé2•Ûµ0æK©Ûâ—ÔW†äÿ¬ÿú“dÅ8<ù]ûi²”€}r|•²;«" N0ÙÈ^i¥–a1Vò; >……sNœUÑدN&ÈN{ë‰Öí{ç׌% ìØÙvÈhiÃjgEƒeA¡;"EKâfίT6¤(û×aXw½Ø8Æá+p2T]êuÄ69•£z_Y“ÔÉÛ–´éú¯gŒŸ[bËȼðF¢Ëàgl?™'T÷ÁÚ³]ñÒ[~ê½›u‡7¿Ènj­˜þ¤LbxëNZ¨28#û·ÜJ¥² $ ‡Sͬ×.‚)hyÍ¿!rpÀù˜ e¬€±hßwÒž*rŽn^8KÖTîn¢§tQ‰‡¬(¯y'Z d!¼Ðy.‰±»»I=ÁΦZˆ˜ßÁ±5¶W ójÇÞ{qXÍÀ4ç# gÚ‹ŽåÚéícZá¸3&>‚…!Ð<{ ^BqgBœ ü¢QÏò7ÞvÉÂX…{‘ᣌ$•Iû¬Ö6»S3‹.æå”)œò€€Œ¢.É'°° |G­Å>ýAóŽ3ZЄý0nº‡ŠÒü(ãÅTdä5»Ãå‡Ntx’4¼,P{Tƒï÷îÖ¢ˆϦ'î”™„ƒÖÀk” G”]YhDóØl” =RÆ9ìE+d‡³†,@ö†‰×Ä”‡YP„ܤPè“òòFí±¿ÇƒC÷qƒ8.ØWž öjZé*m‰ þ#ð˜‰0xfpùºOä;ô( aÆý‹†M[. ¦ª£ºR£°«VŽ\ï9]FÇj[=^'¤Êäû^퉘»„Õ~E³pÖ ¯¤÷I0•Lús^y#ʬ†.²iqmíì®ÝD…oL"Û+[Š£%KÆ÷g—AK&®@ÑC}=ìšœ§ò²§5×5ãìX™{ËôÕ¯Ô•ÆL×Á˜ 7ô” iÑ^™¸Pþk<§Ò쥅禚-0J«¿™Ö 3þ¨š¹WOeÐ8Sâßüv1?´ý“‚ܨ|³ii´¬&Eç( ÊK?ÆuhjƒÆ[ܹóàéQÜiŒñ4‹LoÍâ‡é—¹ño¬11f 6Û ª-JŸ lÁ^YÔ²ür0ø(ýNl¸‰ 6§œ"$@IüÎiÙ1t*ˉ§ÛÓʰ]·ºÔV‡ëqïÖ’—Þz³:mðÖ†aô]ñ;×3³Ðˆ2d"Y-LmK´Ñ¯=ÿŒòäۡ҂色¤FÎëñù­ÏrÜž^Ô÷ü’>¿´gÌ— i-KÛ ¨ Ø ©sèf}úõè5JpzÐ|­°}c ÒðÃ!³f´ŸsÌSÀÌ×GðŽÈ?øæ+ä C‡!|ÙNf-šæÈU;ê6•ÌläŸV”ÈÈ)([2ú{A=(–¨g; ‚RÚl nL±öaŸTÙ©|ËnÚ¤#sSñjÆ™v]Bb¢âî9w =ãøÙ°izn²°²±+êˆ "T{r(Îê¿éÿÊ”wÓA×þÛqãjó÷×tÝêú”ÌÐä>™¢Q“f-l»z:+ª«+äh"ëvuH…ð-¯ùOæ¹1æ¶kØ5JhQQ­W† QÅx³¿^ØñÔôlq1"K9ŸmmQ_œÿåû¦A/i¿*5í(±šÜÕìžûô#ç=ò¸du:PêÈáùٮ͓ÒZ(C³VíÚ:á)¦.ÝzôöôШÝ4®~ß.ö=¸9ç¼ÿ™­uÁE?Ï;V¸‰¢ËK.Ó˜¢uÅÕ¼ÿUŽ×ݨIQþÿoŽ©ìæC6(¹(•/þ ⨀ZÜTôƒh¯,éw‡º.ÑU¼/³BúÇg`Ê]ôÛÖ<†ûó£¤('HŠ3\WW^å.ûl NmÍéÒ¢P»Ÿj7Îã>×ê§>§(Ûôù¦^ôÍÆÀ jõY³1ëïW%âFXý™n+š+~g]øŒ¶ Ü˼ ë… >ûç`zo©B»`Û±vŠPõC®Ä’¢0‚öÁ6kãyó>ýÜ^lRRdu6N@š(Z+:Ì‹v½ûê2‘s2úõ bÏMŒðâÊNÅšJL§âGKŸ·ÿmn8pàÀC8$íé þ.cb1éo´oW!x¿x…xï.QÓB±Ëß®%ÄÓ†ð"’“å@®#²º$MÃ(cw9öƒ“W-¡è !Êijwñ»’wÆüuË¥QlÎÇŠ#{ ªBÉ‘Ç6ÉüãÆükšÉÚ4RstšëÎAär“§Vn{ÏTzyZ›*·öaêòÑKŸüv¾‰æï¬3ÌF/Ú×Âÿ^„IظJ]ż¤I£`™°Çu T‡«–ã²v ûNrßúÕe_¹å1ÌJŽù·xÎÑÆDZºaÇÜãLOÎ4?½Z|2ªÖÖôí8àhÎ}ŠOñÀÓTëíùåÉ™ÑØ·gyšW¸œ8édoŠáIѦãG®ûGJÿb·€¯â‡8ô£ìÐOq/»ê“?úôi«»þùìϽðMmø¿–®‚Ä7<ýÓÑ>qN€½Ô»Ì!àòqòn]<„¿©2Å>Æ]êC¡³.ÁWƒyŸGÂ.ÓIƒø×ÔuÌÚ²Î8^Âÿ‹{Uë©­›¿ë&â!=—@3øw ¢wÂ`á;1a—rocksdb-6.11.4/docs/static/fonts/LatoLatin-Regular.woff000066400000000000000000002154101370372246700227710ustar00rootroot00000000000000wOFFD<GPOS€:Æk„<’ýGSUB\`™à£8cmap>t€æÿ‹ cvt B”1:'VfpgmBȧ —ØÛðgaspHpglyfHxMi”´®Ä­ohead•ä66 ã7hhea–!$šÅhmtx–@ÄL‰¡‡kernšn[ÙTFloca`˜¨ý™$maxp ø -name hyÌÌÄpost€ ÆE>• prepŒ{ŠöŒý.xœíy|TŶïkgžÈ„tBB aAEQQTÂ<Ï£ÈL@@Ï9ÅD‚È$“ˆ H‘4:Û¤“N‚!ÚDIwCÕØÉ=õ¾»²‰õÈ}÷Þ÷þ¹ìÏ/»³»vÕªZë÷[«:½UhBˆ ÑO,>]ëÑOÄŒ˜3}¢h2fú¨ âî‰ÃfN…m„”BoûÛk­Æk¯¯½…Ï„QÓ'‹õ3Aýl¢~¶¤‰ÞÇïê}yqõ› n›×¥ßcõ‰îÓ#¹ÎÈ£ë*´fuÔ©ÚCÚÏ^-½¦{­ñ:ÈQÊ1Ý;Ê»«÷kÞë½wy¯÷OòoïßÏ.ÇF®pÄÄxMè07hlÐûAG9²ƒ×‡5áèöcxïð%Ý"–GìˆÈ‹(lÄÑ5²gí’:~üžW§<‚£n§ºó9Þ¯»±îž¨è¨UQùQÿâ7Žz-ëuª·&:&:9zIÕ•ºïGïæ°päs8Ôó£/D_ú¡¿[iЬºÛ,¶§>nDyìÒØ÷crœ·!:¿Nyôz3αKãgǯŠßŸ_ÒÀ«Aôn0ºÁÒ„¿'œlè×(¤Q{…Ù -Ux­ñÉÆyM/sxÄÂ[>ÀWîµe†ˆ’E’4‹Æò¤h&ˆ;䢅Ü#FÊOÅ òŠ˜%/‹i‹¥]¼$³Äù¥– k A#’@cÐ4]e†6Pšµd0Œ‘?hcÁ80L{´‰`Ø.¯hÇ¥]Ë'ÀIpJfi ²åFQKxaq”ô`eŠh/KÅ2ù¾X.ßåëÂ,¿²Lœàu6ç`“/‰ˆßÝ5”ø¾qç'¢ŽØôŽùf"28ßÚKw~#†È1]þ¨ýSÔÕ^“?i¯‹Xm…ÓÞo ?ímÑ@øc•rǬrbÕZj¼ëKD·§ÇdE_qB›æÝ(úI¢ŸÚ›"š¾BhCë ù³QÜÕ˜êwo•™b?ý“ùôšþsµ;i\mãPfôWvF².ÙÜñŽH–ÿÁ];¸ã}wáëm`¿ÜDqôp†é¡=ÔÃB_z ¥—¦Ú&bć^®cÉ%m…”´ð…óíåYíuY®½-+E‘ðǺ bA}Àòš¸“™µwÖ̬,wƒ{äÑ´ô(îåZÐQ^À}à~ðè .àaðè ÇA7ðxt=ÀS 'èzƒ§AÐôÏ€þ`x< žÇÎ  ’.1 CÁ00œë#ÀH0 ŒcÀX0Ž9ŒÀD~Ÿ&3Ç)œ§²ÓÀt^Ï`¬™¼~Ì/‚Ù`˜ æù`X(‹@ ê¹å]‚‚¾„/–ÊÍG:µHP‡H¬ ¢ Ä€XPÄ/ÍäÏÚ 9hÚƒ{AÐôOž è ž}@_0F^ƒá×`ø5~McÚt00yhÌCcóÐæÈm.˜æƒ`!XRÀb°¼–‚eع¼ ^A!þþþ^•.íŸà5 GÞ Îo€7Á[€HÔV²«À;`5x¼RÁú_ ÞëÀzðØ>i³ l[°}+ølÛÁ°ìƒÝà°| ö‚}à3°Ÿƒƒ ‡ñÁð8 ¾Ç@:0ƒãð+œ'Á)˜h™à+ü|dlbÇG1f(­nðП+ùÄh),wÂîK¬U)ïþLö¼»Ÿ¨´ {yè^ÊT þw‚Ñ!p>ηD½Èíå!´- zÊ9Cý×÷(fîü–;¿E1Ûˆ8Õ2IŽE·Æ’SVÀôµä”7ݽ ½Ù Ê9F/ÝE–ì#¾¿õ6ž<2–<2–<2–([K”­%ÊÖekÉ#+È#+È#+nRÌKŒ\Äh— Åt¡˜èT–гŒ®˜!ØxJéu{¹Uöfu®Óº ­7ëŠ&¬ò²¡}JïÄ<2§Ìi#knd…Hd>Æ:Ì,ó˜¥K4'ƒ¶ÛDKöNzoî­Ásày0$ƒA`0ÐóÁP0 #ÀH0 ŒcÀX0LSÀt°ñ%òW¸^)¶¨ù¦‹íòs±|"‹=€˜cþ.fegþ—XÕ+è‚]ðÀý ²¹ln#›ÛÈæ6²¹ln#›ÛÈæ6²ùvmížÏçÁ@²|2ÆÀ±±`&Èmxe^Ù¦M–ÙÚ0Lc<ìF†!›$›Tž›A3e*Ù<•lžJ6O%›§’ÍSÉæ©dóT²y*Ù<VƒÕ㨹ç“ÍÃî)ÔÝsax_#>'ö‹Oå±W¾-öqþŒó~êÖ4tö0ø-2®WTdÔ‘›©ÖSl&JÞ%JÞ¥XO°‡ ¤ )¨@ *‚ ¤ )¨@ *B…PD…PD…PD…PD…PD…PD…PD…PD…PD…PD…PD…PD…PD…PD…PD…P¤"rº|ü£r6×çÈÕÑ9Ÿ×z„.ä\3J—ðûK`)XÆ<–cûËÌãFäþ]öA9úüe¯–CPŠ!(Å”bˆŠèµô©Gõ:ÎzdÀyøl¤é›9o‘«#~¯·ËT*€T*€T*€T*€T*€T*€T*€T*€T*€T*€T*€T*€T*€T*€T*€T*€T*€Tj 5…‡BC¡Æ¡PãP¨q(Ô8*€ÅT‹©S,f4—=Ò\öHsQ¯¾:ûP¯¾¨×2z•Êä±ß"'›)eUFÞ×D¸zÀÈ ˆïPZ¿Eë´Gœ•gç¸+EDÙŽ¡‰föT´³²oü-Ìfï˜l"ÿ¾†OâÏEøp þ›†ßà—Ùø£#çàúø#´Äwâû±¤!kkgM³¦‡˜G:û/v¡AìÂAh/ÿ…AX¸ bá›Xx ˰pÃ"Ý’P´ö#ÞLjvF41b¤1Œx'#Æ1b£Óíå>zuÃá/鹌9eNgé)œ^ —‹ÜÉÝÜÝJ´Á¶O°©œ;G «¸{ùú:w¯Ã®Áص»öa×$V.ð–•Û2J:£˜ÕÊ™äjظšœ3Ú°Šý`ÆÃ¬ä Vr(+Ù‹•î%ú»ñ«Yաƪd~X˜ˆ… ±ðQ"{Ý(nMwb®é¬îCÌ÷¢l4+ü‘«ŽdrPÕî¶ôƾ—žÑó5V/Ðè=–ރ轫ʜ–É«Tq¨9²©àv’ ‹ÈhÇÉ„_QÉQA² b7zÿŸ´üž–Žê–´øŽY´ø˜Ù´ø’'ha¥E}-#"–³SÚÂÊíÇ΃TYLjC35e#Ÿ ž³‘{˜•“Via¦ÅYZdÐâ4-®Ñâ¬ðb^µ™G=1Šê·žh-¦ˆŽø¨¸ÜÁƒà!Ð…}ûÃàÐ< ãú㜻q~< ºƒà)Ðô½ÁÓ è úg@0< žÇ†  ãÅ`0 ÃÀp®£À$0Ìྑ(‹xö| Øó5Ëıœ6Óã1Mìä®]b®ø˜ónÎÄsÄäzbr%1ù1¹Bœ3E–X ¾6±_óS´HPŬËN š³‰s ˆõÁ2®-çü2xUŒ'bÆk¯}eWp~¼ Þ¬´¶’þVwÀqb>œ'Á)ј½Lcö2µÓÌ•h€—[8ï~XQXfªöÛýT*‡ŠÀCEà¡"ðPx¨Gvþ™ì|˜»¢>9H}ŒÈ®ú„ÆFÌ]'ûzȾ²¯¾{໇ìë!ûzÈd2™^{PõŸQõŸQõŸQõëpõ:ª~]„£:ú'!ÇP˜¯å( ÀƒÁxPÿ$ÉÜËúD°ú-Xýv¬~cV¿5ëUWËõo»‡€?è!Dõ ³´ìºšÇ÷iÂ[éÆês¯ZÂ¿âˆÆ8â;ŽøžJ,DZG,ÇËÄ\1§Í@¿_Á–טÉëâ#lYHOÏcË0l¹ƒwKqÄR±§mͰ­#Ñßš8y Épv3 †0ÒPÎÃ8罪‘ýÙ‘5Föcd?¢8˜ÍÈ#·b䑌<Š‘3ò`„F0|Š„+bø À5€Q 0j«ÂìÆ«Õš‘G>Ð×a>#·eäF`äfÛŒÙF2úã·íƒô:ÑðÁ zíy“ åÝ´J«¥´šA«å´šH«‘´j¥|ÐVƒhÕ”VOÓª­&Òj­Z`Uâ0 ¼fƒ9`.˜æƒ …Y.ÍÉu-Èum˜ù‹dŠqäºÑd‹ùd‹©äºådŒ©äºå云äºÍúj¡u©hÝ:´n Ùdùîo仿¡{Gˆ¾yDß<ò]"ùÎD¾K$ß™Èw&ò‰|gÒæáãe\_Îë—Á«b10‹˜šeä¼Yøhq0‹8˜eä¼yDê<"užö®èJL,Ó¶£;ÀN° | vƒOÀð)Ø öÏÀ~p|‚4p‡ëà8 N¡ N³’z…v ½.@¯ÍDÓ&ôÚL ¦ƒéèµ…8´ ÎU­®©V;åâcðG­³¨Ø¾ú]~´Öë¯S´ºL+ ­²yçlöRŸy"Ç|LŽÑ³„_Ñ"‡¨¯-ü¹WÏGxÇÆ;f#\æþB"¥#•x'p¸<:ƒÁC  x<º‚GÁcàqÐ <žÝAðè zÞàiÐôýÀ3 ?žÏQ×O…aÓTŽw³kw³kw³kw³kw³kw³kw³kw³kw³kw³kwS |ÏnýõÀeê³Ìð³Ë×ÿ.Eîw“ûÝä~7¹ßMîw“ûÝä~7¹ßMîw“ûÝìbË©ëËÙÅ–³‹-g§é¡n÷P·{ØizØiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙiºÙI^Ðô¿L'Á)y–šã,5ÇYTtŒ ªÔd(¨‰hÃPÐG‰öÁDûX¢ý9¢ýs¢ý T¯5~F´ƒ¥­•š=Ïy Hƒàì`0 ÃÀp®£À$0¼D -CÉ–“í6³CÝ‚‚í÷ÀëÇàõ=ðú1xÝ^€Óáô8 §;Âé'àôpz4œn §[Ã[_-EÍ 3haÌ …Qãèska|*¯­ákk¸Ú NÀ©8@Üío‰û3x·HŽ?qoeÎ6âþq¿×ØÃTí;ôwí¼{žw¿á]KÍO£Õ§õ_ÑëE#C¸Uû¯Þô]ä>Úçòî)Þµªö%´ÿ‘+9\Égo´Q}ú{Eìb7ºdo¦Ç pB®„½+aïJîÝÀ®o£Ê4U _î³ñÛ;-´ÿ >evûÀAv…iÔŸ‡Õßù¾ÖÿâHñgV Ä‚ú $€$9C4M°®)h&lj;$ë+‡À’ÏaI6 Ù‡Ú4ÇÞ(Nslnm±±­ÖLžÖîÍA ÐÜ :€Ž x ô½@oð4èú‚r†– Á¨Î0 ÃÁ0Œ£Áù¶6ŒãÁ9D›&ã2¦dÔl˜’ SöÁ”}0eÈg„IŒ°’âCx ÷/<²ñ@6ÈÆÙx dãl<²ñ@6ÈÆÙx dãì[<òñ@>ÈÇùx äã|<ÏLW2Ó•Ìt%3]ÉL2ÓÌt ÈÕô¿rŸ'AM)æ ¤Ø ÿWšku;‹ªO°_WŸ×ÄÀì(µ÷)SŸv d“ Ÿ·2_ýS+LüFº¸+€»p×=Æß㛢 ué¡.=t@ £`€Æº¾E½ÕX.›è1KxÓ‡ð•ðI>)Ã'e¢¶| ¿”á—2üR†_ÊD3ÆmAf¾Sî­À]  ¹önpÜ(Ú‚v =Ùñ^®uáh'p¸<:ƒÁC  x<º‚GÁcàqÐ <žÝAðè zÞàiÐôýÀ3 ?žÏÉb„<)Fƒ1`,‡]ãÁ0‘ß'cóÎSñÉ40ƒûfRþfÁl0ÌóÀ|°,D1%ܳ&loŠ©6¢D›d±Øÿ¯.äÏ‹äÏ‹äÏ‹äÏ‹äÏ‹äÏ‹äÏ‹Z<ŒO¯k A#’@cÐ4Í` #žËˆç2⹌x.#žËˆç2­«|˜.#¦Ëˆé2bºŒ˜.#¦Ëˆé2böQÏO“À4¹[›f€™à0 ¼fƒ9r“6ÌóÁ°,)`1X^KÁ2æ±¼ ^‘;¨vPì Ø¡v„o°3}“§Í«åIí]ðHk¸-x¬ëÁ`øl¤Í&Tn3ç-ð†ŒB=±zbõ„‹zÂE=ᢞpQO¸¨'\Ô.ê õ„‹zÂE=ᢞpQO¸¨'\Ô.ê õ„‹zÂ¥ÆGÀà(øéÀ ¾’í4ÈVYªeSyÃĺêÓ†·ÙÙ,‡;¥p§î”›RxS oJáM)z–ã^EÏœè™=KAÏ6 g“à‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~8á‡~èßÓpëbÝA¬;ˆu±î ÖĺƒXwëbÝöîE{-hïçèî:4wš»Í]KŒ;‰q'1î$ÆÄ¸“wãNâ·”ø-%~K‰ßRâ·”ø-%~K‰ßRb·”Ø-%vK‰ÝRb·”Ø-%vK‰ÝRô8=NASÐc'zìDè±=v¢ÇNô؉;Ñc'z¼=Þ€o@7 Ç“ÐãIèñ$bÕI¬:‰U'ñã ~ăøq?âÇAü8ˆñã ~ăøq?âÇAü8ˆñã ~h¼· ñ4Þ‚ÆŽÆŽÆ®´v-JpMÿä†}Ü(v¾> ¬ïpÍK¯˜ÄR!ı‰ r«Ø.^¦j< þA˜&Þ‡9ÞG9ÞZÈHõ ÂãÝÌosPËàç9^ é$Ò‰Â<¢0(Ì# óˆÂ<¢0(ÌC½k¡ÞµPïBÔ»õ.D½Í¨·õ>zŸ@½O ÞY¨·õ6ÅDg1ÑYLtÅDg1ÑYLtÅDg1ÑYLtÅDg1ÑYLtÅDg1ÑYLtÅDg1ÑYLtÅDg1ÑYLtÅDg1ÑYLtñÔ­¨¶¡â—Qñ˨øeTü2*nFÅͨ¸¿ŒŠŸ@Å/£âgPñ3Dv1‘]HdÙ…Dv!‘]HdÙ…Dv!‘]Hd¢â6TÜF„gáDøü2Q]LTÕÅDu1Q]LTÕÅ(·ÈÎ#²óˆì<";ÈÎ#²óˆì<";¬FtçÝyDwÑGtçÝyDwч2×B™k¡ÌµPæB”¹e.D™ QæB”¹e.D™ Qf3ÊlF™Í(³e6£Ìf”ÙŒ2›Qf3ÊlF™Í(³™h/&Ú‹‰öbãs¡`ªý³M“ñ÷ÄDô«ª|U¾Œ*_F•/£ÊfTÙŒ*›Qe3ªlF•ͨ²U6kúÚl›ÁyU>ƒ*ŸA•ÏÀªBXU« aU!¬*„U…°ªVªBXU« aU!¬*„U…°ªVªBXUˆ*ÛPeªlC•m¨² U¶¡Ê6TÙë `]¬+€u°î¬»ë. ÐÿÅÔ6Ô6Ô6ìaˆê ¢ðn,çú $€F\KÍD,‰€%fXb†%fX’KÒ`É!Xr–‚%é°$ –¤Á+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â+,±Â’^°äXr,±À ,±À ,Iƒ%i°$ –X`É!Xb%fXb†%VX’ K2aI&,É„%™°$–dÂ’LX’ K2aI,É€%°$‹šG¯Ãs¨{r¨{ò©{ `M&jwæXaŽæXaŽæXaŽæXaN5O5O5O5O5O5O5O5OÌÊ‚YY0+ feÁ¬,˜•³²`V•r;ØÕV{P$h‘‰{pí)Ðô½ÁÓ è Ï‚çÀó`ÕõD0 °°Î ḛ̈Πḛ̈Πḛ̈ΠëÒ`]¬Kƒui°. Ö¥Áº4X—ëÒ`]¬Kƒui°Î 묰Πë:Â8ýSìg‚qM`Ü0Îã,0Îã,0. Æ¥Á¸4—ãÒ`\ŒKƒqi0ÎãraœÆ™aœÆ™aœÆe¸L— ã2a\&ŒË„q™0.Æe¸L— ã2a\&ŒË„q™0.Æe¸L—ã2`\ŒË€q0.ÆeÀ¸ —ã²`\ŒË‚q90.ÆåP#åP#åP#åÀ¾ŸÄ÷°Ï ûÜ°Ï ûra_.ìËegqE}g;–×õAH¸–š‰P 3a`& Ì„é00‡Çaàqhé00Úa Úa Úa Úa Úa Úa Úa Úa Úa Úa Úa Úa Úa Úa Úa Úa ö€M``˜s`` Ìé00¦ÃÀxæÀÀS0ð ´Ã@ ´Á@ ´Á@ ´Á@ ´Á@ ´ÁÀ,˜O«ï³/a}_‚uKy½×’ÿ6Ò·^1בvh‡vh‡vh‡v˜Ý0Ð Ý0Ð Ý0Ð Ý0Ð sa`. Ì…¹00æÂÀ\˜ ï†m`` Œ`ræÂÂ\X˜ saa.,Ì……¹°0^„…aáEXx†ÂÂPX 3aa&,Ì„…™°0fÂÂLX˜ Óaa:,L‡…é°0¦ÃÂtX˜ Óaa:,L‡…é°Ð í°Ð ÛÃ…ÑFÞk s`a,Ì…9°0¦ÃÂtX˜ Óaa:,L‡…é°0ÇØäÀÂS°ð,< OÁB,´ÁB,´ÁB,´ÁB,´ÁB,´ÁB,´ÁB,´ÁB,´ÁB,´ÁÂ,X˜ ³`a,Ì‚…Y°0 fýÁ³°°ÀB;,´ÃB;,üEt‡…+`á XØæÍ„Uù°*VåÃ* ¬²Àªc°ê¬:«NÁ* ¬²°wÎÉ"’¶¨o×NPùãXu…µA¾M­ ŠVE+ˆ¢DÑ ¢hQ´‚(ZATÌijùx6ÏæãÙ|<›góñl>žÍdzïmÑRÿûê¶D$¨ê’ʈýS!û§“ìŸNj ¢•Ö4‰ 4M@SЕy dŸ• 1DãX€º°ÇIgó6{œ·Ùã¼­M†SÀTP£*AX÷Ö}u°ê0=~A_Ðãôp˜ÓÃa¡±»ø’ýÄqqBd‰¯…U|þ£·ÚwÔmÅÃ"Åw¸_)G™oÑ6 2 ~ÀôsóqËßõÏ ç3þÆÏduÎcÃVl˜ ó±a>³:ϬÎ3«óÌê<6mŦ­Ø´Uë@,Ÿ%–ÏËg„Ÿt‘U,d‹äuêÌ9Ô¡\ãÎõΑ 6Y¦çº ŠÝB=ÎÑÀb@,÷Ôq žß€^7ä܈}|¢ÚË÷eÙËŸd/¯?qô:3Ù$š³Ûh!§Š–ì“ïD[»@kfÙ†¿Ü̳Cðì xöÅÿ÷gBžÇÆ  ’?ŠÁ`Êžu‘=œë#ø}$çQ`4¯Ç€±`öÀD~ŸÄûºFLáõT¼Œª‹é\«z$Ÿ,—O–Ë'Ëå“åòÉrùd¹|²\>Y.Ÿ,—_ý™ZŠü†,—O–+UœY*`ï|ž½óùûÀ‡`#m6Í` ÄVðØ¶“wv€`øìŸ€=àS°ìŸýàøiàø«ÏÓ&œ'AÍ]êWÄói²áË0á%¿B *QƒJÔ 5¨D *QƒJÔ 5¨D *QƒJÔ 5¨D *QƒJÔ 5¨D *QƒJÔ 5¨D *QƒJÔ 5¨D *QƒJÔ 5¨D *QƒJÔ 5¨„©•0µ¦VÀÔ ˜ZS+`jL­€©0µ¦ÞxrãšX®¾=v’¼¹Ql—›Å°Sž#Ë9ÄÇœwsþD¾+öý¯‹Uƒ<.ÌdÈ â³Ï’N2¡SØäw°»vWÂîJØ] »+aw%ì®$‚*‰ J"¨/VàÅ ¼X+ðb^¬À‹x±/VàÅ ¼X+ðb^¬À‹x±/VàEýé‰kx➸f|Lÿ¶‘þµ½‰*h²ιê{?&1 L§ð‘`†Ö^£ ùíÛFýo黽íÚJÚ¬¢±–-Ú«¿)σAÕ_ªç©oéù2^ÔѦ‰±ôÚZ›É>÷z›%ºh/Šg´ÙT¯sɧóx½@tÓQ9§ˆÞÚb*Ü%T*/ÑþU1ª¥‹ÚbÑ—X”E9Xt ‹*°è¸ö®xB{‹×ÐïûìO×ñz½¸Kû 7PolëµÃôÿý¥ï/é›Ì¥Y¨«2E'fp¸Æw~êªW¿ý\3þF®éï2—è[þŽýšñwì!Æ÷N^À¶UضÛF0voúŸ,ÎÁ‡²|9Y¾œŒ^N†.'C—“¡ËÉÌådãÞdãÞdã«dã«dãÈÆGÈÄsÔK¨7™×Eæ½Hæ½Hæ½HÖu‘u]ðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏðÌÏÿ¥'©Æaÿx0Üxªj2s»ñdÕ úŸ)Kàp .Ã%p¸—Àá8\‡Kàp ÙÖE¶u‘m“m‹È´éb{¹ü^} y§< Âã£ðø Mƒ¯_Â×|øš_T?U‡ŒQDˆ± > ¢"ã–“qËɸådÜr2i9™´œLZN&-'–“ËÉ€ådÀr2`9°œ XN,'Ëõ&Ëõ&Ëõ&Ë]%Ë]%Ë]%Ë]%Ë]%Ë]%Ë]%Ë]%Ë]%Ë!Ë!Ë!Ë!ËÍ!ËÍ!ËÍ!s¹È\.2—‹Ìå"s¹È\.2—‹Ìå"s¹È\.2—‹ÌåBwèŽÝqü—ŸìZCkÁû`X>À‡ æ“_Ûe W‚Æ• q%h\ W‚Æ• q%h\ W‚Æ• q%h\ W‚Æ• q%h\ WB¦r‘©\d*™ÊE¦r‘©\d*™ÊE¦*"S‘©ŠÈTEdªt2U:™*]=Qöçßšú¿{º]u5-@ Ó"µ:Z]-Z3i1Z¬V_‹Óâµ­¡ÖHKÔ’´ÆZv@šO©Ú}™ +4­™6Uÿ/ÉŸTŠ7˜ÙÝ|\ÐBµÖ Ï-ÇL­Ð««¡c}¼’9†{•z÷ ÷9â+|ö=À»]}³ãzÍÃ/Æï!¿¹~ü ýÊý£@[ŽÎþÝüûû h°&àX`×ÀéóµÂÀ¿^ôJ°_pÛ›ŽžÁcc¹:6üÙm>iUó™§ïhá¦Å9Zœ3žyºxOEÕl‘o´Ð¿áxÆhñÃm<¥¥?ëOÕOÕÿâ©úãª?žª?Þø¶F e¤û)”ÈÒm g„HFhÃm!Š•‰fe¢¨ŠfæÑÌ<úß<1ó×ÏŸÝÞS·¶:d´:i|ÿ¹ªUèMÏ2ìb–»þLÃAVûÕ’™™e€,Ú~ lä&ý‡°[îôãN?ãÎHîôçN_îôåÎî P,ÛD¼êwßþ39†JðN¡ñ …ã/ž°¸ñn¦ñî77½[ã)$"4HîY=Ek‘ÈÈSyŒñ$Põì^ bÔ¶Jâ®vÜ1‚;^6žú›±2chu7º×š({ TO=‚máØnØnØlØŒmÁ†mÁØŒmáÆ“¹šñÍæ Œ4‘^a¤%Œt/¶c[0¶ãƒø øaAw5ê}ô0Œ»›sw?îîÃÝS¹{2wëŸs5äýùÐ.*fü”rè1mÄL}õY` óˆa1Æ“‡1†Í1؃Í~؃Í1x2{Ck|8Àø,°©ñäa öÆ`o ^lÂÈ÷*/Ö|ú³+U›•ªm¬TíÛxú³¶òÇÍÏŽd䙌܋‘“ozú3»õyÁeÜ1Óx^pRõó‚·>Ùå¿á©ÈqŒ4Ž‘–1Ò¢ûTäŸ=­8éOžVü³Ï(n¼“c¼“]ýοºåöŸe)Pϲüþé½¢Ÿ¦e©ÿ6d+UÑk¬Ùm<‹Ì}ÙÂËoµ~ß.ÿ]d÷&â¿õŸt3¾?H³ò¿»8_à÷kü¼¢Ú”ÊŸäy™÷ÎpÍ. 9_–ÅÚ§MòGÚxj\½ÌªY,ìôßêçµ?éã‚ÜÃϹs™ºV&·èóÁŠäGò1^­“¯ÊM²¿ÌÓå'ò)ù0×rXÁb5J¹¾†ò<í Õ]—ôq±ÈÃ;¿TŽUnã•K^Ç'—iåùÍžj»®çsúºâ2ýŠá¿+äÔ*?^Ò­Ö½ ·¿ÒÒõ»¾ŒÞñÔee_‰êóÆ‹ ozä·ÊϹø#×nÖý,6þÎ6Õþ€\Íϯ‰îšW©Õ{—µÙ[}íWõ³:rØ›é?3ÔÏGäW²·Ü*•÷É/ôwdÐU.ÒÛÉòƒì&gÈýÕ÷^7ý“ªuŸ®"‰±¸Òˆ¯+zTrõè-ñuø:ÅJ2ãÃò{_…ò›šÑÏJ8nÄŒúýv Æ xÏR}=S&³šv¹†ŸÉºÇÁƒòÞj.d«x>…5gXɳÊvݬø†q¯1û«ŠCçå7ÖʈÕ+Õ£ä×°ãœâa•—«·/)«.öe1’Gg™šÛxÓŠ 5ØscŽÆy™ìËÏò.™t#Rè+”zWÈ>òi"gñ*XΔ±²»ì)ëËÎrœÁµ¡r"£,×5ÇèË¥fsž‘õõ«R… :Ô:xXçS¼²2_+ ròó[^çßj›jR­žE¾'WÔXû©zkî³ÊñÊ® òm9K£·Ipu¶œÄµT4å{yìFä)†:”Nì•Û9ÿ¬®ý,WªÁVùº|U­Á6¹J¾ˆ•¯‹ëä*®}λ?Ý{å‡ü4Ë/ÕÚWÅö¯´'žåF¹Kî¾§FÌýEUqo­Ñ×:ù?ß—ÏÊ5æØTEòØ¢ÿ· …ìÈÚ· ®VÉFrŠ­"m46ÚKnè—þß(ÒW&[iá »ò”2#Ϊ´°´:¾~­¶¢ê÷kd5oêÿx~I`÷£>³jÊÕæ\õ%cùqhdÏõ9o­ƒÉz>ì6jñ~(‡ûŒHZÔV{†:ìÝëÑ6šŠÊ_ÄRySÅqWóV> U>óR>kHm6›ýÔm)Q²L{™ž_ÑþAõÿªþé·¶’Ê4L[E]¢­ÖR‰Œ5Ú‡´Ü¨mæõm±¿];$|µÃš{Žk'‰†SZ&±õU_¼–M$¼|Ãõ:Îw¸ßg¬Y©Øîé:œµÂÚvh÷9´­„ ;¥Ü‹®ŸA•¯¡Íùä´’?íÃ-âçÓà'ù±^Éï|+›×hW†Î]ýM~»®~zT•ñ³®•êõ¯z>­®#<7Tç–{iOË"Žxeÿß@JíBå]Ø­òÛ5l¼úK‘(ô*/Rib#~Ó8såfÕ#U¶ü,õºÿÆ{Uª;K­ÄiYÎùS½/Ú—«[yT%T]Ôèûbõ«+Ìå§êßt«=ÕwÿáüÕ{võsU×GwÎËž5Ú ÃþÍnuí2vºÈHdkZ_$Ÿ\Äk ù2×ÎÈ€¿nÜ{Ÿæ²æITŸß“ÑÄÏ¢hçˆ×/ ¿¬?7ªé›zZ«"Ä¥ü¥×²ïñ*dãÝÑòMê˜ïéÑŽeN¹zïÆÚ\Tsgî>'?àžŸäß±ê;oñF›ë5w 5÷Õ×V¨ŸUUî‰ê»j®ßX]5¾úù¬òß.¿o«*Ä=Mÿ|Äêû•gU^.e†?Wel<‘ÁÊÞ˜ÿd¹›5-S™ÖE»½T’nêe•ª¢jýôûw‘—=:+Õ»KåqU)]©Z |“«GªzïûêûeŘeU¥,ôê½æüsˆž|½âüCûW)Ë~Pöç‚Mª }Öh3AîÄþ‹Ê~jH¼w°Š÷ÕÕÀ\uÿ)5þÞ–ÉGk®³ŠÊ+¿]!§Õ"›µãÕ½¢yrù©1™iym&Y¤³Òú^h}4{þX¿Rü~(õrñ ½RŒ@‘³Ä4¥°[é±=Þ…ÊêRSÒKeHo•!}ÈÉ`Ä}¼{?‡¯x@t&›=(ú`A_ìÀý¿f§gQ?•EUõ'‡Žà]=s’9Ç£è8|Èž“èMÏœ¡*s«ÌÂfÑÏ‹¾b¶˜Cs9j‰y¾b>¹Ô_åÒP•KÃT. W¹4BåÒH•Kk«UðW«à¯V¡–Z…PVa>lYˇ¬µœwõ¬å§²–¿ÊZ*_ª|¤òU°ÊW!ä«ô³“¬å¯²V¨ÊZá*kEª¬U[­i ë×]Õ3Qªž PõŒ7×kqÖ«oUèڦ.y-†×z…ãMЀ×zã­*¶Uí©j'¢FÅ¢ª@U턪jÇOU;aªÚ‰U^¬§¼hR^¬§êMy¨¾ªaê©Õ7©uo jMÕ*‘jek©•õR+[G­¬Õʪ*%^U)ÁªJñQUJ°ªR|T•§ª”hUŸD«Ê$FU&1ª2ñU¾ÑT5¢iqZ+®W#Qªñ¦Ölǽ&ñ¦Ê|„×zeR—*³;¯õúÄ[U–ªJ RUJDÊ2PU)~ªJ SUJ¬ªRê©*ESþ®¯¼[OùµªC"•/½”/ý«|ÉìÛªÏ:‰‡U5þGJ–~üXµûRÿôÿ+ÊFú±¿é5m€z6@¯ìõg–ýnüó6ðïÿùèÿ‡Í£[äåÕŸ~ÿòžÛü‡6æÈùT<{äC(^ ù‚üeýXÞõßÔÿ:VéS9†úª-{àÔÛäœÛ¼·ú“#S­0~› ~¹½ûåŽêß?WYw´Ðwñf9DϽìf/S)Üü9ËM½|§~¾Uý»Ú­ê{C½öÐ÷çjxºò­ù‚œx‰¤ñúñº­¾¯W£·ý+ÛküÓ?ûÜÃt~}Jõ¿ª? NéŒòBt&jŠƒšjã¥8è­v >ЉÞj§à£øè«øè«îÖc±‡R+o©~J­Â”N…)… TÚ¦T)L)Q]¥DQJ‰¢kì»b”E(%2)%òWJ䣔ȷ:Ÿ<‹âèJ¤©ýU‚Ê J•|ÔÎ*Ai“¯Ê J¡¼”B5P ¯2@œÒ©p¥S‘5vSõ”NÅÂÀh§®VJ­ª5ª£Ôª¡Z©:j¥j©• Q+¬V*D­T°Z© µRAj¥ª²J¥\ T>‰SÊå­”+LiV˜R«@¥SaJ›ê*mŠª±ƒŠPÚdRÚ䯴ÉGiS¥MšÊ? J¡|TþIP:å¥t*^ež8¥V‘J­ê)µŠU™'uªú¯Ìgö^øé?ý êU‘¾ËÐ?3½¢ÖWõ¹hX ψþú®˜]¦þí×ÕNO×4ýº¦öËšÚjúûü|Yi¯ÞJÿEÝ¥â-DÅ[¸Ê‘Á*êBUŽ V±ªrd°ŠÀ •#ƒU†ª¬¢1TåÈ`•#TdªÈô­ñ‰€¿ŠL?™µŒ©ÇF„ŠÚ*6¼Ulx©ØðV±á¥b£ŽŠH‘*ª2V°ò{ˆò{¨ÊXÁÊû¡*c«R+XEB¨ÊXÁ*c¨¨TQá«¢Â_EE­ªŒõûÿ•Yxœe’9hTQFÏ7 A‚ c ""©Rˆ8·ãhbÔ8.f Š A,FŒظ4™™,¨XY‰àãÄÂB¬¬¬¬¬¬¬¬lâ÷þLgqÏáÞûîòý÷! ™mœ&Õ×?˜¥e¬T>KgaÅye‡ÖÐZIÕ-ÆÎœ*Ñì ®õ÷žu .öR4Ñ»Šeuw¹uº-©Ïfh MŽíîçbu¥6¸×¬¬ç Ç)ú¦çç·¨q—ûñ…o|ç?ùÅoþð×4i©ZÕ®ÕZ§.uk£zÔ«Œ†”eKœÖíì‰G8Îû”ĆÃE-—ȆG}‡ÄeŽ&V#{ú®°/\e \c0<ÉþðCái6;W7=fš­æýfž=fCfQmf‰Ãæ¨Vš'9f–Õižç)Ÿ¿ÃœÐu³ÂN³J¯Yc—9IŸ9ÅnsZíæŒnš·ÕaÞÑ*ó+|MÞ'ó_…+ÌpÏ5~Àcæ\å|æ+— ì ¯Â9®„ó\ ô:±ïô&\Ñ|¸ª÷ášÞ†'õ.<¥ái.$µÐ“xýñ¨ÅŨťÈü4r>Krj6É©IN=Orj.É©—IÎøÛüBÿÃrpÉxœc`a©`œÀÀÊÀÀZÁ*ÂÀÀ(¡™w1D2©100ñ³211±031/``XÀ àÅ%•> ~³°yý«bœÀ¾q•ÃdóWÖ™@Jq•HxœÕ”‡sTUÅç%„^J /%B'€‚ŠŠŠ‚¤„"½w[èQÄFDD (½X@ÅFï!”PE#›Ua”AÄo_3ŒøxgνûfÎÞ·ßù _N-DhU¶“üs„S$ô…Èr“¥ªÿõä†å°œC|ÃI$“ƒlc;{ø‘•|Á>á² ò-ß©ß+‚Ó¬f¿sŽ?XÁa»S)LQŠS‚’”&’²”£<¨D.ÑxT'†ZÔ¦±ÔeŸ³“ýlæyêOÑŒºÒô¦ÂÀ@3ŒŒdãHb2S˜Ê4¦³‹OÙÍ{ËêªI!Õ×ýê¨>£Ñ«ñ§‰4T’žÓ$MÑËJÑl½¢zK •¦ÅZ¤ÍÚ¤-ÚÊ^Žó_³-d0†£á3IåW Ø?þ7çù“¿ÈÒa.q•+,ã23HV&õ¢’*Ärl ¿é³µ8rUM¨ÂP6²‰÷ù€wy_ÈU††ó_ò!ÑšN¤³•u¬W)yŠW´*N­t›Z«¥ÔCŠT{õRoõ¤³˜Ã\æ±ù¤±€E¤ØÃÃ;ÛžHÿ¾Ø’ì­SJN3g“îœ ›¶*lcØÖ¨n)·¬[Ñr=7Æw[¸mÝ¥î wmtd´ç9^„WÒ+ã•ó*zQ^¬×Îëíõ¯±ïbîåðÜ\?/Åmž‹Í½¼¹ï6÷“×Ý'›{¤[ޭ캾{‚¹/¹‰{is¯pݽŸï޹KeB?;®Ö+¦9—rÎæœàšk &ƒ¶/3%c³Ïd¯®ÎN\4ƒ@Ž)+X˜ YM •å#]dŸLÓ±<åpÂp‚Îiç¬>›§r2Où§#þžù¿'ȱü†ò‚,>3 R-ÿ3,ÇìöG-mÇ9¯Â*ÂLˆe5MEIfƒŠ©¸JXú]ˬgiqXÀHŒ3ã-Å-óRL#³½ŸäêÈÏTVO?Ï}4”ª„xcÄŽ3f'O Ø$ŸÙ£Ö˜¥©Q»Ð¸M3fACŒÛÍ!r•Îp£©†&(FONœjé)ÕÖÓŠÕ³ª£gT—ƪG5ÒT5Ö45ÑtŠÑ\ 4YMõ‚ši¦šëE%h–ZÐR$êvÍQ¥ê½JzêNzé.½¤¶š«»õ}uæé^½Î­ôWËì-ôS;Í×zS÷é ¡"ƒô°ÞÖ#zGjI¨ô˜–ª³–«“–éq­P­¤£ÕU«ÔCkÕñÔ`‚žP”új›úi»úk‡h'5™¤ÁÚ£aÚ§á¡ý¥ƒ©ŒU7­V¢ÖX7¤ä7Ã"k‰¨]¤Ý¢½~…úgßDë­óÒ­í¬$9Öi?Ù\sü–kåwR;¬Qþ#$x(xœc`ÀÛ°›¡›u&ë.æ¯ ÿV²oûƺòÿÖ],¬ÿ?ý›â-L‹xœ­VùsÓF–|$NBŽ’ƒõX±qšÚ+“RL’eÜùZ J+ÅNzÐ2Ãßà¿æÉ´3ô7þ´~oe›@’v†i&£÷íî§}÷“ÉP‚Œ½À…h=3f¶Z4¶s/ Ë­†Ñ¡èí”)ÆŒ‚ÑéÈ}˶ÉÉðd½o˜†¹™ŠDtèPF‰® çmÊ­Ü믚“žßñ·ï¶´­^ ¨ÝlÚ -AUFÕ0IJŠ»´Š­ÁJП¯1óy;0¢ šlvŸM2Zg´YQ†™å0”d´ƒƒ0t(«îÉc”÷Úå¥KcÒ…ù!™‘C9%a—è&ù}WðIªœŸ8÷;”-ÙØ÷DOôpw²–/­­ j[ñvȧ›;Ž,vj Ù¡¼¢q¯Ü72ihư”®Dˆ¥SfÿÌî§|É¡q%ØÈ)¯ó,gì ¾6£)Q]YPýñ)ÃóÝ’= ö„z9ø“é-f&xð8~OÆœ)Ãâh’°`äÐJÊe\OULò:-ã-ÃzáÚÑ—Î(íPj2ë¶%í°d;4­’LƧn\whF(ñîòëÒ išWÛXMcåÐ,®™Ó!ˆ@ziÆ‹D/4ƒ 94§Z»A’ëÖÃeš>OzCµ¶‚ÖNºiÙØŸ×ûgUbÌz{A2;‹üÅ.Í–¹HQºnr†Óx¹„Ld‹í áàÁ[·‡ô²Ú’-ñÚ[é9¿‚Úçž4a»/§ê”&†1/-Œ¾iš:WóÊHŒŒ¿Ьt…OS(ÊI(Ž\Aý_ss¦1c¸n/JÎŽ•éqÙº€0-À·ù²C‹*1Y.!Î,Ï©$ËòM•äX¾¥’<Ëó*ci©dœåÛ*)°|G%,?P¢Bæ‡JÀ6sì0‡Á.sì1ç6ÀÌað%sÌa2ǸÇ÷™Ãà+æ0xÀœÀ×Ìað sDÌa3ÇØgƒst™Ãà@ÑõQ˜yA@ßjt è;]OXlbñ½¢#ö¼Ðì5böO1õgEµõ^hê¯1õ7˜úPÑÍõ/4õw˜ú‡FL}¬žNä2ÃOn™ ”]n?~Sœ+qMcÿÿxœ½½`[ÕÕ8®û††µ¬mÉ–mI¶ä%[¶,É{¯xÛñŽ“8ñHœé Èp²÷NHp†IH$@€$¬Fm¶´”ޝÐJ)m¡H¬çÿ½÷I²lËß×ÿÖ‘ÞÑ{gÝsÏ=çŽó8ÇÊá7éN4ÇÁIçds8ŠÒžN¨”J LsåH5›ŒÂd´(Ržßà•dü»¡M/ªIÌ5)6Üê´™(ÙJ¥³ºl‘“ÒcÓb„ZQgŽM£—Éô1j[s’|{(%…ì:NÝ£6ëe¦¼)i®ÚÌxs´¦÷£=>&1#Æž%WËÕC7“òdá±ML¸,—:}c–“Nà.‡CAžy‡ÈD`áòH9²Râ°“Oý%H¤Rÿ…|ì.’¿Ù½z3ר=Dd¿N(‰R]±¥Îé¾ìþ§N Ú˜3ò0ˆ ⥙sô.Œ7ɯav¤:]þøIBª”j{JaOQkÈ'½ä¾ Ÿjoé®ÉC4;JgT'Çi•bš' ü¸zK£¯)Hï10sc–0Â’¤3Çó|>ͼ ßàp¸ÿ‚¼ÑœÄ›ð€gðòcPè<3]&a^£u`h(ÙýÛKAfÝâß$ãVf‰î¿fAºÿŽ+‰tc¶‚­ÌÃJ-=0ÈD=Àh8$ç9‡: iA½Æq8ÑP8Ôö-að|uxZŸ°dÄùÉË«¢™·§~Eù_éÏ0t >X¸îÊr†{Ä\<=}á:FLõ0š¨¼)ÎûîHíØÑì˜ÑP¨ xó”Có2‘þgúð×´„>ËI€)Xñ‰Ò•;£’y@ [T)à ¤{—“z8XTÕró‘¡ 3{^ı/Zó¸Š`yjÚö…Ïo*¯ÚôLoÞ¬æŠhˆOîY<÷*<ô\›;³¡5Hl±Yªöÿfãæßì«”F&¥˜æ(æ£ê@¤À#,l¶{Š*ÃâJ$nÜ=ECKê?”M­L¯µÅ–Gȸ2n°&BßP»çó‡[oó,XôÆô>~–¢Ä™¥I\rž€gÔ™#‚ú‹_ÔщE#›†mJïƒêq´P4%Á¥`£R(µœ—šHàï9½¯ç2ssÐýwB3èË=?+Ûxe)óh\zecñ‹“ÌWfѳ^b~8¹ñ×{+ož¬Üókˆ›„¸OBÜÁƒ×ZX”*ƒ *=“Ã.#>\‘AÄK b#óhÔåHxhÐýù믇87"ìLðb†¤†05L™8²˜Ø…ÚÙg<¤'„Ôx,ôGŃ¡]äw ´ðß ú šƒŸaÎQÙð3¼ðõä‘g ¼Ñ™Êöô_w›#qLÝy™<=ÖKjpLwõðé à¦d‚ÿ‚D•ûâ5r u“º€Ï£{§A»/„ö†¼Ž 1eöV8,Ë’HŽ–Ó%ƒbdºpÞwûŸ`¾øÉœY/ƒàG÷¯š“èÇjCrZWL^ñúöòª=¿¸¯ëhù‡ ñ´û½ê*lù§äêmRa\‹R(•ÄDëj¼³~ËïƀLuðït«g€ø†vø/wmèæµk$±Ó½”p¯#îCòœPøÌ?à3$*ù£h“çQÔøèqø/µiè&IƒžÓÑ©SÕµÓ”äßÿ¾y¡£$K@ó9úóé¬À89ÈXqí¼œ™Ì9ðKø-Ü¿M£ÛüÒÓ–Žk¡6à8W ñ‡ M< ñ#IpÇ·äW€8j‡ÌN<œ¹äÑù³.A¨ÔòÌ”‚Ù%Ñ×ÀWË_ÞP¦O“©ä1 ÚÈó>9è$ˆ/^ ö9 @ΡÕ`¯a‰Ð‹#5ƒå“̉ ðgeBäi÷ÊhUh‚ŠÈxTª”ò¹21©åêU2¶h˜Né>$0…¹[i¡4ˆHתÜ}X?ù)+8Ñ*¤xòÓ¡w¯ž:EÜ Bü@O@ø»ŒýçHÍð®h•’GË(0ôî‹!§®‚uaÌÑS§ˆØé›Ã„MÄ/Ü;_ß`^fLP¤áa×8F4ÃÚH¯¬Üí·µØ0€Œ V!ùP³@©¡>¡ìÜíçH‚Qj’Â’4L÷Ü]˜|%Ò©Åà3‘^‡Ú‡Ò«lÚ¡µV+¹Ña½ù =¤Ú’ê!i‚üìUƒiB7<6¦»T­šî M„tè5‚8×%–K$|2› ò`•¦µ©†àÉÝ|S—¬&¿Ç~ãƉü†Ìëý=6àÌ(D×ô7Ùª\µXgnlúÙXj ê7w8dŠjµX2k®ƒ² ŒûÒ´áoèO ?Pp,^Ÿíç¼®Ûo¤¡?Áþ{BPƒ#^|É•Mee›®,ñzòo¯Í{ ˆOžâ׿Í{ùöäÆßì«®Þ÷›ßÙWUµïØ–Ó†@úïÁ8@ÍIƒ6š}ª9 JÊ’…­èõîP/<ƒ(Ì#ƒ¡ 8¼ô¢šŸÕ’u°ñQQL¶ÌêìνRÛ¥ÊÔW¿Ò×^òæä:eddׇ%…kÞ,«‹‹GLmywùÓ™ÌaÎS’¶†k5D^Äüg8†jXu _ç'`ÙüÆ"Žàhá‰&p+jÞ!€øföÙeyyËÎÎî=»iüôŒ=%ÂÏ¢ðÈWÍ–H£Ui‹«,qUó‹ò\Ñrµ(ÅÐ0cŽkú©åE¹+ž^2÷T.™/iÃµŽ®Ý-í{fØ#̲üØ ‹¼dÓµU‹^Ý-Ó‡c3¢_|»y$ÒËA€Y${Іܖ~†}kl±áÁì諎‹k¨Œm>07ËÃTÒÜï.-ççkWó«6½°”e7oœ<°­š þÞ…ª”p¢¡8(öçz§ Ðu$y”êe^FR––ízÀXБݲÁuaŽ9(8ÿÌüæÝ]ά¾S=S7¥’T¡ã­;wM‰«D¹‘Ý_ÝQ¸þÚš¾«»ërÓˆd˜WBšp@y¥Í+I!¶›fh7³1/Ö±¼Sf&‡ÝaWÙI£?O ä â*ï®þõ¯ ‰/ ¯¾Ø¹½ÉR;‰l¿¹-zUMGÞš—×ô½¼½*7ͽè„<Ô mw3Œ‘ بÀ52/ÂI•7@ð°Ç?žÅìÌfK"ÈEƒ€K£æòKé’úD2ØQ13§bq¥%±¶(³:KÍ9ýÏ-ŸyvMYÎÒ‡gw?§\:ýyƒˆ/ŸÕºX&VvÁ6ãhÅl伈M~t‰ÒŠd»U:É–bìÙ´eÏÓ¯D믋¥é{<¸ýéâ~ éP¯ÐÏÁ<)Î_´”…¼ðˆ(,f‹Íï*ìX²ÃÛ z]i+RÖ2¡FØ/àÉV©Bú?<ã!µéÌcA3ƒùó6³PûøÂ|¡b£ðáøè?é›8{Á÷˜£R øqGs¢…Þyù‘áŠs nÁ99Y‰×”åòð&6RÊå‹ è3B>”:NZà{$.B.ˆÓŒ¹¾ñÑ"r>ã–¼9k ÜCŒft„¿ÅjÝCcøûÙ{Ÿ—µHŽ£N»U ÷‹™œÀþþüK* È¥‡€.cZál?qæ¥Â6IáäߎË1Í4QÇ«ÍQ- ÕH?°¯æylSíýÞþÀ‹†¼9µ¸ßï¥zû?OâéÿmwÓÿïPjúuÔ£Õ1¨'ǨQï~|íŽtqÇ~âöêáøtbðÓÉAj±®ôƒBpÖwpŸý§þ.ûÏ-œÉ÷+Y`c»‹î6‘Çí}ß½uß‘9ßMû`}Äc³í–¾Ú·t@ %DÇ„ÚP<¿Òàå©VQ]0‘c¾ùÛìÞjkPP‡@@€IuäcÝ0±ï,½»¶ÏÜ·ø8†ï°¡H0¢SägŸ‚1böø¸†„#«è\Ï"ºÙ·0‚eãÀDe¨äI"t'™¡r½ì|Pc1¥úpûD!- ‡Ž e Y¦`–¢%#’*Áƒ*16:´emS`|7ÜYl{§‚LØ©‹/­«¸CáJ‘p™¨yîNÚ‘Ø—âÓ—ÐÜÿXÛ·{dà²"`öí³Í®ÚS¸âiçüØõ;a<¬8÷ˆÀV:ŽàÞ¿ÙéÕ'²ƒÕ¿óìy Á‰ÈDåžÂ•œócÖï¼U€+o0ü5] óþt¼ž3ÖkމÈGq«@Í-3¨(AŽM®ä „ÜÔÄõ%ËÖ1ñ°û—J%kQd‡ûD^ŽT.å ã“7¯!æùÂ÷Ñó Sª`N­óø¦á¯y/A>kî&'õ²hðI6J° LqÌõG±$â;šy?'9,6\Å—9’Ö/‡bNdåðH-šu$'‡9@~Jq9R=Oõ­{fAj"e‘ï4ªB¥”5;¿À‚¹#óuNW"ð¬2xm |é¬O [4wÞ"}Z=XW:­w^JZ©©l~yÙ⚸gâ–Ó»vÖ—æF3;ú×õwdæ¯|záÂ3 \Óê­Ž`ƒ-²¢!½gWCÁâ$ðU¸³Â:uÁ‚©Ö g¸û½èDËåéjÆÌx­ÎV@%Ò‚ÚòM‰2¥UL Ô)-%ù+§§'”ÏpTÌŽP¦«“’â‚ãv´—¯hLÔ£µ¡]P ¤¯â]°’d;Èø^$ŒËf¯rˆáGž–#áDAL(üCëhO‡]/µ^dÝ#Ý#3ˆVu"h n@¹K.ߥéPc’0ü-¤ù?ð«>0ïìì-´@›w¢!¿Ð;"¿päÈdRdsQƒ«{W“{/Ñ·ø¾ŠwµiÑϾgPð_°¾èÙ‹±)ÆF‡“.üÊ”¤ÓYJ¥ÑªÓ%™TäwCBò;£³š”J†ÁO4½ÇÉfÎG!>Š8£&I%„â;оô' žðCɤ££$ê?’:ÄÂXÏ5ÄרU$À9 ¥*'¿CûâðcùE–y´Oøæ X€yµŒÚ¹ä×Çk"²d~e¤ÞÁhôþƒÊ”8¢—D¤—›¯Ž§©¼ñJ×!]Ä“"UPî‹ø˜J¤{þ—Ç&n92‚¼[vôÕ‰‡°«¬_Ä´ ¿ÿgZäwLï´P‹ Zñ°aª!a´¹!ˆÕ»çލ=yáí± ä×ÝaéÐçÄjzCÅ%ÄL÷G8Æ?ŸÝŠÖCÅ®‡p¶yù§ÿAÑì, òÏžECÒ¿+Iü;Õ®¹f8mg{Ï}aêâ¦)µ«›/Ξ™P“eº8sZA_ù]ôìâ–Å3\µ©!ö™{ÚQ—]Ö‘5% }[¹¼4Û-öéî6ÚÕ4š ëR4Àa(Ÿ<Õéç qrftV¼f´3€4=í}ùYy|8Çn»sLD#¦~M“­ o½Å›š’d7ßUiÄÒÙs‰òZ'¤t¶ïZôMC³½Dá™rÆÛÞˆ´…Ö®¹°°ï"ú¼ØwèàÁC•‹*¢ï§èò­W—-uë¤I[_]¾ìêÖò¡åo^zæ­Ô™{Ú.½‰Û®bøå¢&q’cd_L¶Yô¿B¼§Îo<Â[êÀÖ’9KjbR'MZX¹´&&“˜–S€¸©Üàt)³õ…û JS5[ÒŠäCë£6Gמ¶ÃMiή}m'f«Š_Æ,#ËÃC^/~­ñi6M S#þ ¸ë ‚8:NÕÈÚ4 xW%ÕÀo!†]Œµðæ=>Â5ÑŸDd5£åèX“±c«±hvqéìbã6¢Ž²Võæg·æYå*‘]~~Õ帶­ëô=yyËéÙð²S[6uqAýÎY™ÁÚ‘h&æÍKm{f¦²:½hÙ/Ì •¦¥Ëúú½ª_1?µ1ËÀ®M³ë»˜³”â±k™Þ•vÅÉ2zíÁc]yÉKËv?`*èÈnÞàä=:¯­.B+†vdZhæÿÅÎÝh)seMGá´Š¹«.7É£+£ýhâ5KÓXšŽñ4GSÌ{¨×KpÚFûÅ‹~ä6•·l¸ŠÉç2v^¢ÕÄœ%ßõÊ7n­V6º\N;i°Ýë·V{!÷t¯wI´s»Cd&é<—w­–xïÆ%ðeùïZmInu4ëˆ7!ÍÏ3J˜nccæy7û:m;»#35bnˆÄi´Ud&).öèÕ7û‰t“VæªKu¯%¿S‡²~ pøkR m0 ^p'ÞzGŒÝµF¼/ÒJÍ®ÊTk™=ÔZ>­sZ¹5eæî©sÏŠã«eò˜ôúìÔêT­µ|zçôr«­}sS׃dµZªÑÄ8",©f½!&«9»dYsJ±³H, 3†…&fã\}dlΔüŒ¹u¶\æ‘9ŠylÄãl¹»#æã'÷×Lpn¤m¿óîdÊ_Øäâe:CÂÏ·ÝBTÔÆÃ_Se0(óyF JJn Dù\rbé¬C3“âó*òâµSké[«g<ˆE.qV8ãKRti iÖšB«6­5¯hAmü…âµ—èóYSسš RlöHKª5©¤§ºze£u¤uò¢ÓJìq“ 5¹Î¬R[d~ª!®aMÙühö¹•l~J<ù‡æ¥ð¦9„ohd¢w’ÛÖÁÌ…$!‘ŠÄr{^/¡ q6ÒmZ[þÍkä4ÊYØ1}økâ:4Ê‹ð‘ WàÄ ¼¾|ÝÊ`õ™± ‘Dó¾`”Ô(”¥*>6ÝAã,ìQx?ÄR†Ýgé'BúW ýX(—Ý1†º*`êfÊÛ=Š `aþèÏ LÍv—÷ãåñEÌK~ɨáxe§'A¦q8Y BþÜZð$ûñÊ#œ}‹Zcôª§/õõ&#<Ž_:'8qÌ9ê×Cvo¯ßŽ÷1á<ùg-÷j=ñüɈv{kWuV²û?¹6¿wÙzŸ8Äê§ÄâPŸëP?€˜j†Ä=)ʶ•Ûß„ÏÆÜN*ý‚û?iœ¸r ÂÞö.„´²Ùú-ç &îö™áÍ1±·è§ü»Œ‡_Þo }NöìûòŸD» “ãU7ƌߵ/¾%óÅþ–Ý¡óG,vœö'l V|ÖäY;çI°/¸S;Wß­Ücºw²nsÅ“ž”ÖÎslaqx:‹ö*c¢NÁ*c ñ¨äÎÓàq³_·ÑҘ㱠îyhÇ9õ™;Ô ÷ü&èn[žxônÛó¼úÿ·a"é9ÛäÛæ†6‰«ña@òí9æ IÁé9“ïkŠggIÐt@ôüI­®®ÝMÄf÷Ê%k* ˆ/X|è<$Ä—ìvÎ-Ì*žŒÌ6F»&8\GDZ0f*òå"¿üÞƒN~Ãtž`:÷©ÃhDr>_.7LÁÎw¨¥î¥Ñ%&SI4±3XÅŽMp<æä÷hg›!Ù]è$ÚF‹²hc-ÛžèŠ%…f¹¸óü.]ÐGü šâ*ˆØÌ<¿S¥§¯É‚ áïi­¼Z©£ßÊù´TðSšåÁH<©IÒh’BÜõF–Ÿ²S‘ÁPåaŽ•õ—PÖ0ÏttŠ›Ù„®p²|ò—©µüaJÀÿgFù èd{L­äÿÈ—½'PIO3i$`—$TÈ<"Ñ|ð/‰˜Ù¨y†è¨`.ð=¤£D:Àóx¬^Õ'«ÕïÝ?„†m|™vDJi°P x\½œüîÆGʰŸ¼¬‘¸×Ç%&Å«ó­w“e'šÃPrGmD'ËÄB÷ûDœ›‰ˆ÷‡ÄWäYE”ØmÞQ.6îDùI™K´Óç8B”ùz7;;ïh':øsIGV¨#Æ^nb±¯œälÈÏ’Ëù¢¤ät£9ÏjnÝ;'=ÝlÖKä\ªš”…šTq0³’Y vfƒPž¬ŠÔ•±Y±ÆB§‘ˆÎÍ’«dÁ|ÞŽÌ"~†Ï>ELœÿsQê{°d:󰨤tq]BBÝâ҆ɓZêëé³ñµKJË–ÖÅÇ×--+]Rïv÷vuÍžÝÕÕËÚåj iFöïXH`ˆ”ÜߣÒI¾ Ïª4×ÅR@À{¸A„‘—…îUxöXÀ£ìÓµj­”yƒ—¤f¾¤% ãFˆû¢7¶'•ÿ. UÍ9(1DÒgF*¾®QÁ{ƒ¹A¤ŠÅí± d*æ²4L¹öo2“—Å|,á¹:ó=ÚG\Ãg¼9Þh+3Þ@\ó¬úÓ|Š]÷÷,çÓA» 8 P–wðÜò˜™eâÑ“Èþ“Æ~tåH&‰G.ÜYs‘É×><óa¿HÍ]Kóhøÿµ<µd°÷n¥QFˆ~}‚EñΗg¾„,ˆVŠD+ÄOË|¤\Ùa¹üœ‹õšD­Ã0öã6ÃGÌØÞôZÈ“GjƒôÚœtú5•(ÈÙV –€R¶­Rç‰gémzÎé²{¬Éd䙼ӮL”ÌÓ&åǘ²6M±Pg+°ÀïI´5.!©&-BoÔ·ØjÒÑ'‡Õ#Ÿø#÷sÖBa²5‚f2^´„Dc ÍI*J°–f;Ã4Fh¡6^•e9¹÷ÉôQòøšì(ƒÅ G_kñWv®bu„x…î÷ž9½ÍƒÊ·Yí Z=v³Äm·A°të¸jâWÜëwH× ©¨U„Ç¢RC*à!ˆQêƒÈ f5¤À]¯¶ +‹ÚË͘k¶Q¿"ž¤‹Ð:ªœ±ˆÈq_£‹îÁ¿­à:ˆG¸ƒÞßV “ŸÜÁ^üÛj'ñ´K•'â²{j¡{«f燭‘ÓããÓ’5aù½ÔÎøI3]†t(vLºÁ5sR2¹C%•©ÐEFÃgr¨Í„„>ׯL ;øÓàôEÔf óÖ‘H†ý‹4é2x†¼NíöOÉÅÌr8ìC„Ä¡øÆBùú1ò}E‚µB?Ô ÏOw9³©¯XÂ^Kÿ~í£ŒÆÇ9¤ÿÑpÊ€"|:¯AúçO’Î8peΚ+krsá?s®˜!ýÈÖŧ#–zkñ¡ïŸêèxêûC‹ß:µ,‚æ³kg©‡aleöðˆ*Bà6²à*( ñ ËáêáÜô¸ªÜÄ ¬# Í<ºÕ{aÎÜ)Û/Ö¬*ÓXBìU޶N›5®1Šù{LÍq]¼if+qÒ­¯fÏ —ÿ@í!ÿ÷/(áhŽ—Ä©Là¡­ÃÀ%؈,i9@î2a¨=zý@ð¥!‘1Zs¡=³5CÏ "™ÝdˆÙž]fé4exNª™~_ ×Ê(y8Ä>3eÙc÷–™Åeúˆ˜†MS­½kOyœùáêúj%VTÁ¯U5J´ƒÝÃ߀­ÐÄl% Ô &t6–ã’@'Åp—Ã?*^a´†z;g¨Õ¨ ܯ[›š§¤¦Nin²Y7~§KD¿&â_áÝhüþ†´@dA¨ök*¸øÕhÊu­³²j¶ÌL³וÙ)ùJWT_j3fÔX³›³¢EZÉA…y@–¨.Ñ  µúÔMbÃòŠÊ“³NÃÀkafzIkN|I².<*\sóÄX&q{Í…ÑÉTÎ?ÑÙg ZrºrH×È&p>›]χŠŠˆ å’<25?ßž¬M.±^ç‰< W•œiÅΤµÅn1½Í<<ÌÙ q¯'ßã9Ñãc-ñ%„¯†m° Ó°p ñ%Šo ¼ò‚zè¥,+ð‚±Ò@¼ðyz|â…²™8QìÙÈÇË:“|”Ó?¢ú$ŠD«ácSB>ÎÅ…‡Tæ:¥$_¤Œ ä³¥ÇìJ3éù‘í½}©*ÚG>›|ÔB$çiŽˆú¯¥B¼ÄJ3ržÀôž6 ‡÷k!Üä¹>²Ð3—væ(\ÌK8:yd1CÕfô1äÇ‹P…xÜÒÁ#»]&^9ÅÇ ÄA,…8Œ?c·±1gxŽ|ÅÖÑj®…&åª<˜¬•4?þ˜¡H%ð¿Ï5±Â0°I6… c5ÌM­Pq¬‚8~q 8YEZq2‹–Htò üŠEÀóáÿJB¯aú @‹\Z!4ÂŒôù!>„¸Œœ‹˜Ï‹ ëÒ ¾ÇðK~ ÄxÖjÎÅôU”Khp2”½~%vé+ˆVQrd¥Êd¤ÿiÄ÷>Üy´ÇTv™é™;h÷gØðyæ,(Ä¿ñØ_ÑFŠçwì@÷ }Æüe诜±ô8¡E.²°22YD´VèÓDôUcä ‹Šà¬&DÄ\â”7†[ ¾däÄ©ÃÏ%BLþ–Øãýí9ô±g=þm-!¦/Œü¶–N¹ñ¶÷·|æ²qx ®{Ó_²qèŸ/_Îòöç#ò9ò Ô'Æ:Šw2»¶TVméÎÌìÞRU¹¥+“˜\µ]m­ª‚WðWÔ§ÀK`€ø<Ðó 5±Žðˆ´˜˜´ˆpG¬ºï•ÛNø|ó>xŸ+Es ¥\ƒÌ‰Ï\Ȥ¸Äwó{ƒƒ¥5½ùááù½5ÒààÞùÄ=¿gæ=a²-ùøÓ¿­\ñù'/M2„?ËÌC2åBœW NTi$•° ®˜ë9çȳ$N—S ó{ùWÌÁÌs)>Æ >+ÕÈÄtòÒ?ù|ÅÊ¿}úñ-–…HŸþg™ 3lw1»C6¢Ð*ϪC™½ /TMoýñ—aÊ^x‰±ÁÉa>ç=Ï ˆLÀNÑÀyfo¨ú,|ô1–(øöPe n›"H'>“‡bm¼,ÊÖ°Ã’ÀøMé} ¤l;¼R‹.Ù!ü©ºE"—¨c]ƒ+V ¿¶T+EaκþɱÁòà’‚‚ø;¹¿Î&¢·v7¥ÍŸ-D¥æG§ÌìœÖÜ%WÖ&5,-Ní„?æÖÍ­ÍK™9³+µxiCbRŽy-€¼ªÇñ°¨#\yGâb(H|¯½ hŽP0£*0.Ô0N¤×åBî:IµJyW³—õ( ÚËzS·\Y“ˆXïš93%¯óÚON'þ@ŒvEA¿}Ò"šÐ Œ[ñ¡}dÄ!T#Z B»lˆ?$ .Ãexc8¤N¢íá$t•¨é‰m!1GL³,&¡ød {Qs²8!†lc:†¾×hH8‘a:ø\£aBx:`‹Èò£NÒbÛ…j`IA)±a&i ¶mc©mã9`–‚¥Ì„h4às–2Ñ€äK»‡7+éGàx…OIÀ78Ø­cת¹<¿$cì´Ó/$Y2cŽ+)<µ¤¦!&ßE%©ÅU™j«t†%×ôôöÔ$W7¤d§ÌØÓî[oÏlȵ×8ÃRê{ç÷Ö§4MMɵµoiê|0ƒø½½4Ý¡«HŠJK4›LÊðIàhÉZ—aNOŠOt•¶g×ÝgŽžY<²o ÓZsÌÖ{\‚½dj^ͪ(ó슌¹µ¶vºdØE§Àxå_ÞÊsÞÌ@§¼œùÇñÙgîÉͽçÌl2ŽæÜpÑoÞp‘S³{w×5íêNC86ÏS[Ÿ3[¼3´ÀNQëeZÞg: O.ü«Sn—Ï+¤àEG‚ƦcŠ%Jâ/ìX° Rj8H„Žÿáú&€ˆ›ù“ÀÞèâž¼ÔžéMQìEw>º ºK ûj"-††RøÅꙣœûȠõ =ãUs]0ª§ƒé惣‘&SäQ`Dã6¼Ÿ˜ï7rÞÆãöÛ  ÛHËð×ô}0æâU}ïYtÞhö?Gï3T…ëW„PpA¼LOæÌµUw¥ö],ß=¡Rí›öÝÚ•åI ‹‹²3]{ZkÖOsQ Á¼ˆäâÄ”ºôð`“Ó Ž™y„R>@t-íÄ´üÎ"S}1ó¹D½Ê\笃SKVÏi¯ÞW=iít—½aa–šoÑ¥%†G¦WYMyEffjÑI õ’?Ï€2Í–q“莔3 Êäݯ…ì9p}†í±~9Þ‹ãrÂ\ØW[-‡Bżýaô~ &½«Ûìù¹™)²íJBŸ‘ÜRhi9úîÊÍ_œïžûÂ=ü½QÎÓÍc¾zü<óû÷W»ºv>þ³yíOßÐáÈž»¿¡²S’¾µåJùk_^]Øc%‚ÜžÅeÍÊ”¦üàÊéNEZÏžæí¿ÞQÜsé?ûÎ1Ãç[ ì"¼ ¬~Á¯@ÂËóßzr߬ô˜ê%÷?7wÞs›++ªjKb'ue4>±£E¥„íÛ=|«ƒ¹n§„ÓµàŒS!@y‰èÁ¡&)—W„ò&>•+e¥?¼tæÉ%¹E+™9ÿpz",”D¤Te”öÕÆÅWö椖8â‡rS;6=t¹»ûŇ6OOM¾ù¡»»/?´©#µ;¡nÉŽ‡ZZNíZRoí|ìó-íXR—Þ¢¼‚IE+íê>»¢°0-Ÿ+‚NÀ S£ªõñÁš0¦ûÅSðyˆ÷Ô‹Ý]/>Ä~‡4ZOïY:Ùj¼tÏéÖÍÿx²;qò½{Îp(ŸüAœHN§ ÷>$®¦fšMG‹BìÞN?AQú«ðS²Ò´°M_šºaÃÏ™#Ó ?½ß }K ÕÕñcbÅ/?æ™ø´®2:-JÎü<ºzå´!Âp%sA Åf\uóÕ1Ò´²2Òíã¥gµ‚û19‰ÎÇu°¢p´Šüž)þã"qÍERãÙ…¥@+?h¢¿|viõ3ÔßçóŸEÈ#ÄÏ ΑI…gÅ<¾ølTFô2Zð·^÷S`~9ótX¨)gµ|1óN¨6,$‰ùlÌÞJZˆ¿ÒçØyÛñó„£{\kJ˽ÅÅËZìö–eÅÅ÷¶¤¬ wNŠ‹›ä ÷~R³ ïmNIi¾·°àžf»½ùž|ˆ@¿–»""\åˆæFÎGEu@š.Îøuž)žÅÅæÇ—†Ý”æÒàŽËÓ€NSN]RÒä쨨ìÉIIu9&¢ä„YéÝñGZ‡ ú)‡fg¤÷$<Ðz8^?@œH¬C·×%&Õç˜L9õŒòöÆf§§÷Ä´=€odìN8ÌújÍð7Ô,8OB¹ ;­0Òý¸<* ë ö;rI¨+ª½ÖbrØÍ©x_s@9}×só Ϩ7åðdYtNGÅþ«‘„)TËÐþ÷.Ìêæ„½²ßYŸ© DüŠÒ_ï^zy[SðîÍð÷-ä†5?ßS©³•$ ÄæDó¯ºäzeŒ*8\š&Ó¿ý¶*2:RÅå—t/®9ðî†ix°2ù”™Ãr*Œ; ׄ®†y*;{f!Ù*Mžt€]ÔñÛá¦Q°j.Î Q¿ã±óZDwAW±):¯)¹0zN¸1«6iAÙξâ¬%Ì0UtÅÏÊœü‰ÂV:£(ªdËÜüô'fPuxö@f¼R'‰M¯JÞÆµ–LMK)sDû¹1Y±¶RgL°I“ZßßÞ6¸¼‚÷¤BÿÑŒŠz¡ÄlKä UjÓÚ“vÌ+¦ö*Â"®Ï,×#ùЮv « ]}Æác»ÂS¢\°‡j¬ žæ!Q«ª5ĢʥՖ-+ÀY/Ì:uApû±E¹k^X¯T…Áõ‰|©DiÍKÍmÈ4 å4¹’›Þ¹­ñäU¨³úQPÍÆg/|q{uA©ÿ»0/9G(”…„ðÜ\*Â*àA›²q8T3;—©À™ÏÈàlöŽÎle`ƒm»­Òæ¼¶áû†½žDö1—Á›úPæ²!6i)Ãd 9PR®=áF¦ œ1ë™ã,ßYÐOTúÕz]¦4Œ«õJUÖí|eþ‚—vÖÖî|iÁüWvÖ½àšº² `e»ËÕŽ>§ºˆ_\aþu©¥åP\¹”Ï´´<ÃüóÊé·ffnýðôéÐç°Nàð}ÌSØa¥º ÒPÒì2ò†›ìä°X§VÐià±}[·n%ßI°¾/¤)¯ ,ºñO¼> õ®ƒü㪫jöà-lXß;D˜|Óªï?x_o* –Ÿ§O+ˆŠ)žætµ—Ú‚D±ÑS%uýƒíS—Øêçf1›cêÍg¡ó²12ðËŒ¹óææNωˆL¯N2ièà™‡{0PœV»mÕ½YÌ¡òÓ4ü5ù)ðŸÍõÕ«Gý†}v ;æÑ1ŠWËzImÿƒíSŽ,Ì1å·§ïÜU±þ‰ÎÅõ•i«ÂÓÅbKQo]Å¢ sxýž¾öuuÑ–¢©NçTÈ»ØM©?©['Í­M“G=¹¡spafz×¶ÚH±(4¬º,%¹º#)££$f0´§å´g…{$v z¬Á5›Õ(ºÇ[+Thbõ–Ã&!Þ£(÷Èe å…cîJ=%'^8CÐD.'ÒÞ¬f¶€Ùòøž¿éƒº ÎD úϾ5%ïðÉåY¬ãsú¦õ=¾$³fÇ•ù¹K»› h˜H)›QX±¼)ÑRsoóžì¾%Ä#IéL¹*¤`éàÔÅÏo(ÑÆ9Ã+uæ[몪Ây•qr¥¬6£mFÈ •îYËGŽ x& ]xé…J—òÜçʼnæµD›8,Ø­”Á§Ö˜ ½9YM j㡲¸¯ôB Ö8’¤Z¦VVÅ‹ÃDï§ÌqÚ]¶8¹„Í™î¬JŠ)ÎɘËÇ´‹‡ !í4¿>ÃSOÜaF˜²´ñò’íõ™‘÷nŽ+žbKoÈŽ—ºÔ>¬·#’"ƒe–ü¤æÒ ›-]¤ÖbG#To/ŠYk^éjH×k“Ë’BYPD¸J£ “4ƒ¬Ô”Ô¤¥˜BíÇ¢ƒø#ŒðѪ»#ÕE²]Cerx»ØÂ½~"MžÇ¼­P’A ¹¿‰±1Ïå ¥O=,J¿eR8¹]b[—ß⛳~Q‘ ¯$S䉑UŽ5Ý®Äzéæœ§¢¨Ý¸¾·x¬]f ¢˜Ùû˜9àÐ>ðÑÎÔƒó[ÀcL#k‡ˆvò:ñ‰wþøÙ>tšød þm=ç}ŠGµà÷€E\ òQf9:t1L­ ¹ƒ9 fþt2G;@hí`Á0fåð]œ—(µŸmŒgi€1Ñ·lÌuùÊP>˜9vulì5䜳Zãž<~1ŠàÏ‘×É]žùzOm,)–6g øý>§ø"ù-µ†]‡·ðL.¨BügçÙUÄkÓÁ&eÙºeÌ· r:Ó¾X¼nñ7$÷-CC[ÞØòÍ7[Ø1òI°†üùN(ŽÏRðºµ/‘õßQL'åzg¢Ib\”:«{º5Ü•dèM‹ ³»¦'Üâ7²M¢ .NJ4ÄÄ*­h¾=Ùk`΋!t‘-~—x 1žù™‡8_PÍØÏ±sœ4Z·Öë]SZu0êW•Áè5ÁJâÕ `š+~¤ ^ ö—ƒ}÷É´A,®¿@\ŸA\h.J…ó™hèàÌ–htbülèf0BdÕÚ-L®2˜øi9³`m°.è¡”+T~¤•yÞËÐÍ©…ör¶•­§CkQ˜r7ùòPù2Ó»Ÿ™ îßÿ‡-Ä[Ñ´ûV°œÙÊ>ÿ§–|…Ú ŸW{ž/Ù $wè0hl¡¡Éjð‚N>tƒäÊuÝ«)0èaXðJÐ_V¬ cë9¾ ñþjÞ)aÅ|áErxAjÓ0¥:9Äû³rfBõN”KK¿BøY½™ä×°ÿÑxíNüú°;“l‡½Ü°Ò3ü%WKŸáˆFjâíá´gLƒV¼ýÞ¿Ai ´µpÕSóç^XWF^Ê™´ýg«_Ÿ2ÃÖäJßÌß_Ÿ3ï5 yhñk®œl%nÖ“³:Ï®*™´îÉY⯧_ÜÞ°¬¿¤x~eÌ¢×pà(ÿtQêôM>;é ŽÏ˜C‹P• €lÅ"f<¹FûF0®ÒWWã]ŽGÓ’AYËfUËç]XWšá¦âê,"gM÷Š­åY­aSÏ}}(¢QQ¥ùéšý!}Û*óg—Y"ó´aÚC﯉›uï†âÂRw)_!œóÆÓæfÆ—Ípv>ýÀÚ‰NÒw¨ÍlÌmM‹š´~FšcÅë»#R>l©†n OàXµÆ³Í:…“†11ýãѵ%59$¥‰ÊM KÊŠÊqÙµZ»+Çý¼ïkTT®-,Ì–KÎYÔ˜’Ò¸('mJe¾^Ÿ_ÙîÊékJIiêËqµ³)`oHR™ÍG‘pG±ÅRì÷~záDpr˪ÊÊU­6cfe\\e¦F)ðº%Ù{ÍÚzãð·T̯h‡ø˜úž¤w`$Æwþ;Uÿ]poG©2(:»9³a}«ÕÕ8ÇR^Ýdo;8;½üÀŸ6í›Q¹íJßì#6ìè/M‹ åÂû\ Ï-¶NγäÌ¿¿9¿§$šº&)ßùÖÚ·˜'¿Ý’>÷éKÞ:ØÔÒU ûéÏ•Z¥HŽêw3‘_r§áÌMFyÌ0dOfºrH¶Žòg¢‚啦òð R ŒÔf6gGñ×Cغ ¦ˆÔe4gGó©÷{öO³Šd"1—͘Zp³¸gÿt«8¸@hæÔÔÇ`ö;îS(fàÜʳF1²q›úû‡n< j4aôýB1M ùài:LÁ}êÇZâ0LˆÔR¦ÛP` uééRvq s–6xÞ;3òömxñyFøü‹Ôû4çdzÜfô‡ßUΜ¥~ôÄ•Èxö$ rêÇ¡n"Ûýyôf_HÜëÞ.äSOÒwfª;s _¢‘áë%ùì¶QáôuŽ„Ý5Ín;¼[ )€YËbæ†DÁãÉ%Ìàè%|•œgºV1]àØ*â%B«ÏŒÏÔ»?sW™IȆ~A¦’ì—†ïeî!Òçq,î&eüçQE:ˆ=Þr&æc¿cÎø #~E8FŽ“¬«ßÜ‘*“W£sÎsÈ}¢‘ú Þ8“Êõ¬qøÞ‚ľ¤Ç ï7Cóó×ÞxƒL¢n29ƒÌ׳˜?¨Y”¿«ƈÔ4ØG =¹}ÀɆl€ê7˜ðš¥cü$”œœÅLÏé5ÌêôýèY‡k×®éÕ{œóO/¨^9µ8¤T—¨TÄÚ ­¹y‘à9â¯Ûœ ý†±3O÷ ð¦*Žp¼õûp~%fß$át´ÀHÂÐ$!qž…•€ëò€ïŽJ˜tð&W»¼a ñã&AéêK÷¬ynÏàXý|>w#±o×}Ì¿ÁâÂ΂Hfÿô9éPuÇ{À!GÏìXtþƒö+xë4_ ¯ÞÈEÎ<èû“ñzUòø÷• ƒþ„Ù%ìtIAÙ=“­ u}…h·yLÕBƒÂl_YÖ|dIQÙºgæÏ{ª¿pò0O’9¸1±¾¯ dQM\òäù™Å‹ë­ÄPˆUm2§Ï°§óÜš²âûžé›ö»àïr-ÖÑ È“Å3Q:ž+…ß,‚+F’jÿõB|ø›(*YX›X l¤ùôØÿNLÅ;Àþ:ü ñ ¼çsU˜@÷´Á{j=÷¤zîùßó)ð܃ð¬¡Oà{–â{þÆQxð„sØ1}2sŒŒúÀûûh_Bå»FW}Oë±iQÑ2;«f³oƒß ï¿Ìjk6.E:³h솷ߢ ~ õ÷VTΩtÚ’ GIÊOµeÕg%³ü¨àñ9Ù,ÿQ6;Ÿ³î¿i·ÑÿÏíšœoÉ-ZÒd³5Á[XŒù#·äÿ7M?ÿv]cxxø:trïà9⿃F ¡ú*ŸB—ì­¯òè'6ÀÏÚáRú=úç0ÆIFk8@Bã…oZ Ð\tžÝ‘«ñžœ³8ráˆh'sh¼BâB+#A€Ë#V5nèÈ7ËÔ–ÔQæ´Â¨’í¿Ú^=ow"eÊ”Å9k«#»–wâüvxg^L0{çô|'ýJƳú÷Wjl\ÖVd¹–îx¼ïL¼B¯7¹S™sæ@þ^©NlÛØßý{èæ´ôgßÿ÷—ï55­h+6ŠÒ–î||ÁæçÌ_¯7ùj>‡æ&ª×:A¡Â‘"‰U(©¨8¡·6ás㫙ߢnã]&¼ƒ·¯é8q1B‚³:B¿ã³1Ýlua|6†­.œ†O¿xÏÆÄÝáÙ˜ÿƹéœz ·wÆÝ´ø­«;7€ŒÛÔz ÞÑ¥_=õL¡ÏGµ9ì»êX8Ók zá'†'1ý¸ŸUâÀðGXøð×ôÓO³Ï¿wøÞYx ìqÿÂñ[z„¯Áð½œA|?~?/ ÂI ß ‚·‚k}ð@9ç~îƒï¿îƒôÁï÷‡ód>øáQðz|¨0½WTŒùä ³|îõƒk}ðàh@øîQp¹~Þt˜}·,¾_Àaï×c›ûú^X–*Ë)dÛ‰ó1'|û(¸ÖßÉùW@øn8n'~`|п™~x\îƒçüë¿ÿÓ•xìæ ŸÞ 1‰§½ýárü8xœÅûz¬~´zéÒy¿Ôƒ¿Á‡‡~å!¬«·áµø~h%Äý~ðí,|øCoÇxd¸wîàøà]ؾYø>üOð¢ ë…`á4©Ÿ‚õÆÂï÷Üÿ¼(Áò²ðÞû¡±¼,üøöaüùÔƒ硞7Ä|[+zo,y<Ð[顦€ïl%Ð;`ÉMPV ~h@¼®ÑçY¾ö£Ñ‡\¼"v‘ß±WÔV° “°RzÚÕÁä»p›¨0Û·³pxÿ(Ÿv‚sƯÍ3!\ëñQ }}6âƒï+GàÜ¿ùàû¼pÈOƯõð³ÔcƒßП`~B=¾qK@øöQp­¾ìcÛ õÐõ‡_÷ÁŒ‚úà÷ûñí„zúŠ?\çƒõÀñ{%1ŸoxÁ®õÁws^÷ê¿‡Ò ?Á¹‰áø]‘уç‚\ëƒïä\ ßí…Ãvü÷]#nß÷°í‹ßuˆïòÜ…C~Öb~¢<ü\ÃsáÌQ Guax"ÄãèÕ§›T&ÚÐK[ #…Û¨ÿœ:u3ñ€ß‘ˆe7{l ˜¾}\ëƒïµ#plf øÃ¯ûàFÁ}ðûýáØÌð‡×ûà Î®óÁzîÇï1ÄüDzr}áÛGÁµ>øN¢Žùgáîãø=ØöØõ½-­¬í…oCpø Àï,çÜõ›Ç×ø_¼jØ:¶öÀ]¿|tù+ðÉ»Í'ïÞQzè÷Á÷‚ùàGÁ|ðC£àøà´±püž:¬çd¬ßmœcøw>[+›s‡o°«Ú;¥]ÄX}Þþw xtµ‡Y|²ìB²øà—}ðc8~–Ý…á[Ðh¾ Áá' Àï.ËáÜáÛËÆiå.^g–4V/wò~3òÚ(Íøäðɳk”œý>øþQð"üà(øžC£à—}ðc8š÷>‡ßÁ¾GgzËŒçÝQ~ïŽØF-òå`VìS=>.~Rl­mÜçQ­íœÿnµmרú/–ßîç(þ©Çýúø¾àÕ™Í@#?p?ü²~lü„~Ò_Ä$çÑ!|*n“ÀàxùsØ ÷à>2çe[†»ñý0â"BX8Û¶Î!†_ƒÿXp¼ìy;mô8µOºös¸ØÒ±qò ÿâ Åþ’¥¿—¥ø"lÓÔøï!\‰mš½ÿ þ „ó°žXø!¿/0 #^#‰â¨*€û1e¦ÁÎñ²Œ+<=F0¨¿]0Cu¨œ™l,¶’m‡Nx1õ Ï\áöáɬ¾™~¼Îã…ŸžÆâá úì¨fy|Øi?ø€¾k|›¾× ‡ü ÚÈ^ø ×-Æø{=¾ól@ø6çø®sŒÎìݪÒñøAzâÒÇàçãêݲ2¥ 8n@Þ¶ùxÞ;J–~|ÿ(x‘~p|À?4NŽxฆ-†/dcËÏbxñ{Œgá0®å«dáë™~\ó–½µ5›c㺵O‹ÇÆæ‘,|€“…+*ßMEÛq£Öÿ®Ä­l\¸ëš·€ß^^¹¼òî²yàÐV•ØVYø 5 …z{ŽÎá°uL!ÜS× 9J\fkKÞú¹`â«â²eNFàb¹qí3—^­Œ¶òrä7k‚×ÏX••ÕÝìÆ¾íI%†hòÁ{pû¯`û¢;ü5Ícá¸~ë6¢ëÕÔ ÁyZêUð7BÞ¦¤ç‡ãš}âŸO‰ƒ‘,ßâ>Çʨ‘‘¦qŸce<è'cŒ{‘ŒÀÑ ÊT¤§Í%*‹EãkV«–~¿¯ý£ø-òÁŽ‚øà‡FÁ?ðÁðÄËþ±ŸÜG<÷㺒¾ŽÕؾ ÁÙ=Ë\´÷g)wW‰r\\s×¥)™·i÷Ž‹UãXYú}2î÷ÈŽëLbÙ7xld7–…À(3z‚ ”–q2aIJÝxQÕ¨Ë3ËC¿·ýˆ7Ï„“0ϧ+X¿Žë[b;aÛoW¹?ü²~¬œ×ó`žÔ„Ï^ÊáÈeÀ²ø—©T§J£jDÄ#~eª2[¬å£«T4RÒ_ý¯kT}­w_·³'³nÛ3]Ï U;ëÜߣUѨFÕâ¼ùW†öì¹yeÞÿ¥FÕ%æ,õË»ªQõËÜ ¶FÕªQÅE5ª.öÎmß–xÁ¿FU’5Ö¿FUgñ [£Š?£l]žáרo¡Ögµsþ?õòÅ×˦]_<õÐÑè¨yÑé@3þþu DR xœc`d``ßö/ŒsÖ¿¹ÿ9]€"È€ñ2‘!rxœ}Ôh”uð÷óýu“Æ*ÏÚ,!¶ÛºCÕ–s?W‘?֚縹놿6ý#6j-¶ÂI‚©±¢Œþ˜!ÁìÒY‚ešTYn-ÉTÒB)èzŸ{žãAÜ/>Ÿïsß»çû}ö|ßê!T‚tF·B‡šsÿè;‘Õµèrý‰¬q Õ²®üì0’ú¯,ÕÈŠRòi,ÐUü\Rîµ5ÔÀ¹7òL’VÑëX§>áœÝk]ê"û:,wíÇýê(ªô—øÐÅ5…QªKp·š‚PX*CƒeâhRë°SN¢MCûvW7žS›°ŠkXm‰Ï -FtÞ¢º†})û=ˆªÄ\ý·©^o®ÒeDͯV¯/Ì›äï.áøÄBż×Þë ÇŨWØ©:sÑug [©Î™Æ>g*węΕ7¹DœsØm6Ûùž-ÜÛfàï”Ð|ö×h†}9Õ맰Ь@Fso§÷áM9„•ÎIlw`Ôù ƒ|.½úÛìÿÌÍ®žfWîók;ë ú!?ÎýD36ÏlŽñ=zž¶2,˜es°™FÛhØÏ6Ÿ›k§PióÌõÊ }`jö4¢¼ÐÆ…ì»FlÙ\ôÉxÔf¤¥÷`™I¢Oß… •Eéÿ*ê9xœ$½ûW\×}þ¦–kG·«Ng>k \, ÌÀÀЪõ§-θ«â.ˆ$Î1¶Çæ2ÃÈÂØŠ$®Š—c7‰çnÝe[y„,)•‹n!Y8ŽÛ¤ÉOŸÿàûtŸ÷«¿¼ÖYgΜýìgïgŸ³÷8žç…<ÏýÚ|ö–ç}ý™ÿÏû3ï·(®ƒºOÄ'Ý=±ÂÝ£î¾Xé~-V»ñewYœv_ˆ3î÷âœ[O¹ÛÞŸ…Êì<¡§áFX+`n‚1Øje…ú­¬˜³²B£0—­¬Ð=¸ïÃVzè3ø~é>ðñþLçDú§Åï¸câ›îGâu÷q~åæ¼u¹Ž#×y/xÛE;~ǯãøu¿Žãõâªû£^6¸ÿa6¹ßŠÍ|š‚-ìß ŸÏÁ^ØG82 sp7û÷Â}î+qNÀI÷Ÿâœv_Šs¶?ô”í í·3„¾ Àƒðœ„SpÎÀY8Ã#ð(<Ãð$<OÃy4\€‹(¹oÂ[ð6¼—à]ø€o}Š®ÞøG±Zµûso§û“¸G-þçÞœµ=ê'úTýʇo±ÿ»ðmøâþM\u}â“î›b­{MÜë`ܰÁa6¹Ÿ‰Í|š‚-œ+|>{aÜ鎋qçÅA8‡ágËÂeÿnÊ}•í=îCq ¾¦ÑãëjY;ÿ>j:'ऻ"NÁY]Ó¿îv—ijî_Åy÷Oâxɵ‰ ð²Fž¯{Ÿhþºw…í«l_³Õ+t6<òP#q#,‡0 7Ál5ŸCcæmè5¸îƒãp¾߀ûÍõ=ãx‚“p NÃ8 çàaø–9¯«ñmøü¾®A_ýØ‹‰?7oC¿€¿„ïÃ#œá(<Ãð$<OÃøÖ‡E¿úˆí3øvžƒç¡õð¯Óÿ®n®Þ€7á-xÞKð.üÜÚ1ô¸ ¿TYO(¹ôžPÿ7~Š©þüßb6¨Ö©þlLÂfö§àNµõ_ªÚvæŒj ã·áx‚“p NÃ8 çàaø–_ma|¾ðéQx ‡'àIx žŸTfÿ nu° îÔð¤2ò_â˜Q½NûÕëŒ{á>8'àëð ø–A oÃwàÎvžƒçÅoxÉùoxÃ'àXKaŒëNþjcƒ»#6Â$lÒÕùj û4[Üš¸>Ÿƒ½°ŽpdÖ}&æØÞÍÙöòé>;¡1Á8'uÿð Æi÷ qFýçê9?/k<ù†RoüT#Æ7¼Eø%5ýQ#€¾ª´…ªàfX ›`3LÁ¸ ¶ÁvØ;aì†=p¿ÕEýÍx„‡à$œ‚ÓpÎÂ9x~Ïû[ñ=¯F<ž£ð<OÀ“ð< çÍ1%Ô¸H­oÀ›ð¼ ïÀ%x.›«šWà}øÀ|V6Å¿Öµé®ø˜æ;­^d|n€%°–Á ×*Fá&÷¹ƒ•®S¬rSbµûg1®õ×êoƵé_«¿“°Éý‡ØÌ§)ØânŠ[á3ð9Ø ûàGfaîæl{ùtŸ;+ŽÃ 8ɧSpÚ='θ½âœ{VüŽkßt â¯\J¼?†xÙ¯Àë|÷S]þZ=ÓxëþŸ¸âââªû;q ~e{ÔWU®úª|S_5n†Õ° 6ÃlÛ`l‡°vÁnØû­-tGm àóÖ"¡ø|¾3ðeø „CÖv¡a8³0g­©ûscî7ç•ãx‚“p NÃ8 çàax…Çàqxž„§ài8omªDñö¼ oÁÛð\‚wá²µa\÷áë J„ñ¡&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&a&!!!!%bXŒBKD„DD”ˆwÅ*wT¬fÛ!!!!!!!!!!!!!!!%â_ÅÙ.°m‰ˆˆˆ±_´DDHD„DD”;rÕu‹kð+—õ"$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"¢D¨-”c-!!!!%Bm§DG`æ¬5•cZ""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B""$"B"6h.ÿGq|To`mmƒzþ-±Rwò¼ÍîX­{¹ ¬­mМ÷·âx‰ï.À;šSlðîºUÑV³7h†«ýšá7ÂrX£pŒA[[Û VVéjecíž|ƒÚŘKP^‚ò”— ¼å%(/Ay ÊKP^‚ò”— ¼å%(/Ay ÊKP^‚ò”— ¼å%(/Ay ÊKP^‚ò”— ¼å%(/Ay)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊšF)k¥¬i”²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊ\¾”¹|)«U¥¬V•²ZUÊjU)«U¥¬V•²ZUÊjU)«U¥¬V•²Zõ”Fûâ#>ê|ñIWxJ­`lмï)µ‚1 ›”»§Ô öi ÚŒþ)ùlÛY˜ƒ»9þ¬ûGqÞ5‹à%÷Œ¸¯¹ÄOåóSKEõ.éQï2n„å°Fá&ƒ­¦Yí¢Õ.Æð <'ᜆ3pÎÁÃÐVžbõà)Vžbõà)ÖpžR«Ù1Gá1xž€'á)xZü9ü•¸>éæEûEìo¼(´õù¿Q-ÚÜöo¼9ð7rlIœwÿ.^€—ܲ¸ïJÏßÈ%S.7ÂrX£pŒÁV+]T¹Ê 1€9+74 óp·i½ ÷ˆeŒ'eŒ'eŒ'eŒ'eŒ'eŒ'eŒ'eŒ'eŒ'eŒ'eŒ'eŒeŒeŒeŒeŒeŒeŒeŒeŒeŒeŒeŒeŒeŒeŒOKÃoŨœ|ZJþ$VKÉÓêÃ7ÅlÐuíiõac6¹ëb3Ÿ¦àÌòiŽíÝlOÊí§5/«¦Oë@Ôh =R¨Ò¥ÐÀœ•.…Æ<ÜogSÏ4€á!8 §à4œ³p†GàQx ‡'àIx ž†‹(¼oÂ[ð6¼— õ¨jÙÿmy#ëÌYgÞÈ:óFÖ™7²Î¼‘Õà9'ŽÃ 8­kÄFÏÆº3?÷6†ì—¦¡ ð{>ƒÅrJ,§ÄrJ,§ÄrJ,§ÄrJ,§ÄrJ,§ÄrJ,§ÄrJ,§ÄrJ,§ÄrJ,§ÄrJ¬à.·BýyŸ¸>ª{¶ Ý÷Þ‡OÀ'Ý„¸íX Ë`­ÓÕÔÛë`Ü‹ Øàža6iŒªP³OS°EwPªµñÍ8*TkÛîu“bÛ;]‘8àþI̸ qÁa8ÂÙ²0GÙ¿›r_e{Ó]‹7_Ó˜Y!?íüûÜwÄq8']8§u[¡ûä.qÖµ‹s®G<ìþA<«ûÞ ïœû…xÞýTœ×}T…òn¼ä‰ ð²{AüÄŠWؾÊö57'~êÞáº^Tx¿uzÊ—4hÜPiÜ0n„å°Fá&ƒ•Ö:ºÇ6n†Õ° 6Ãl­ÖʺӶí6Ø;`'ì‚ݰŽéÊUzÍÚ=´W³‰ŠÐ>×"Ž»'Ü&ñu×$¾Á1û­]4T‰\µx=‡¬E4Øö”µ”F€˜8ÃfÙ?÷(å»N£ž®Mêº6¿çm¿¯1³"ô/%¾ëÙyÞó¬¬{[ÅŸsþ_À_Â÷á÷ ñ(ç?æJÅãlŸpO‰'Ù>OÃøÖ‡î—âGlŸ¡gÝKâ9뽡óì™·¥$iÍð&¼oÃ;p Þ…ËÖët—n\÷áëJ´ñ!üÜzcè7îeq•í/¥*ª¤ßqÓâ:hI’ë¨ò›·À:wÛÄl€0 -¿Qå×¶S°Å·BËoTùµí^%"ªüÚöN%(J~£Êï€8‡à0álY˜ƒ£ìßíöˆ¯²½Çõ‹cÐòU~íüûÜûâ8œ€“nDœ‚Óî 8ãfÄY÷ª8ç¾-–oQòU~ÿ]<¯ž%¿Qò%¿QòU~gÅOÜÅ+l_eûšûø©æ†Qå×hù’ߨò+ ʯœW~a9¬€Q¸ Æ å1J¾¢Ê—Ú…|EÉW”|EÉW”|E•/;f¿ùF¾¢ä+ª|ÙËWTù²mËW”|EÉWTù²ýsð0|‹R,_QåKm§|-_Qò%_Qò%_QòU¾ÔRÊ—ñ—ð}hùŠ*_v~ËWTù²mËWTù²íSð4ü€oY¾¢Ê—mŸ±v'_QòU¾lϼµ¸òe\Äíð&¼oÃ;p Þ…ËÖ+”/ã ¼XQ¾ŒáçÖ[ÈWTù²í/¥j“ÒtO|Rw8›”¦/Ä-°Æu·Ii26ÀF˜„Mº:oRšl;wò­¶³0wÃ=|:ÏêN`“úêÄ Fõiñ©úŒq/ÜÇá|¾÷Û™ÕgŒàAxNÂ)8 gà,œƒ‡á[œí»ðmø<§Gá1xž€'á)xžá[gá9x^Œás ŸcøÃç>Çð9†Ï1|Žás ŸcøÃç>Çð9†Ï1|Žás ŸcøÃç>Çð9†Ï1|Žás ŸcøÃç>Çð9†Ï1|Žás ŸcøÃç>Çð9†Ï1|Žás ŸcøÃç>Çð9†Ï1|Žás ŸcøÃç>Çð9†Ï1|Žás ŸcøÃçJ]/~->¢‘­R× ã£îEñ1Õ½Rw†Æ' ]A*ughÛ%°–ÁZ—·À:wY14ªWªŒIؤ1¶RmdŸ¦`‹FÑJ]YŒÏÀç`/ìƒ;]Npï‰ÝËUêÊb‚Ãp„³ea޲7å¾Êöê2_ýœŸ®ž•º²'à¤F¤J]YŒÓ86í©J]Y¾»3âY0•ê9V÷ ð’{C\€—5^UêjrC¼ÂöU¶¯iü¯ÔÕ䎸¿àÌ¿Õ\²RW•««‰ÚEWãFX+`n‚1h3ÓJÝ 7ÃjØ›a ¶@»úTênжÛ`;쀰 vÃ8æ¾/¾f5U"Œûà¸ÆêJ%¶_×B¥aÛû­-”ã]O+•Û>äÆÄI¶§à´õ %¶gá< ß²> DÈU%¶ßßó¶‰ßWö+C?ôjÄyµâ{ºNUê:¥v×uÊøKø><Â9Âcð8<OÂSð4ü€o}¨™c¥®S¶}÷Îê:R©dý‡xž=óÖt2.ÒŽ7àMx Þ†w༠—­é:e\÷áëoºNÂÏ­ïé:µ$®²ý¥TU)ÑçÅuðQ÷ºø¤û7±Vw/UÊ©±Æa6¸Ÿˆ0 ›Ü;b3Ÿ¦ ­=V±öXÅÚckU¬=V±öX¥œš†wQ̸yqÁa8ÂÙ²0GÙ¿›r_e{fXUÊ©ñ5ÍAªX½¬bF\ÅŒ¸Šq•gÏ-TySÐFÑ*ÏVª”ʉóœá¼äŽ‹ ð²î«”Gã5wÊ«Rât†=UR¥Ä7ÂrX£pŒÁVóV‰P-”ã^¸ŽÃ ø:|î‡ß†àAxNÂ)8 gà,œƒ‡á[æ¶a|¾¿çýø}õÕ*¥ Vü¹¹ª ߇G8ÏQx ‡'àIx ž†ð­Ý‹±}ƼU Œç4²U…γmëU¬KT)æí xÞ‚·á¸ïÂÏ­Õ·«ðK•µYצ{âãð ¸–ÀRXã® &`ƒÎ°Y}Û˜„MîŠØÌ§)ØâîŠ[á3ð9Ø ûàGfÝm1Çönζ—O÷IùfõLãœäÈ)8í>g”‹Íºvœí¹ Í<´YW/ÄE£ú¡Ž×H®Zh$7n†Õ° 6ÃlÛ`l‡°vÁnØ÷›~õ@ãx‚“p NÃ8 çàaø=ïïÅ÷4_ج¾d{ŽÂcð8<OÂSð4œ7—ÔCŒ‹Ôú¼ oÁÛð\‚wá²9©qÒ¸ïÃæ­ÆIãC±Z÷9—D[«f¬Z£â3âc*·Z}Éø|Ò ‹Ø.¥° ÚšsµWkÝFq ¬ƒq°As®jõ7c6¹ŒØÌ§)Øâމ[á3ð9Ø«ìT«¿ÙöNÍΪ5–~KÌÀA8‡ágËÂeÿnÊ}•í=š!Vk,5¾¦Ñ¯Z=Öοíq8'Ýq N;û[ôÌÂ9öÖ̺Z£ëóâ9Í«5›¾"Î;_¼/¹Aq^†Ÿhô¨Vo·í«l_Ó½Aµzþ”¸¿P¯¨ÖýµÚ—º®U{¿S²ª•éae¬š•±jVƪY«fe¬š•±jVƪY«V‚ÔjJq3¬†M°¦` lµÖWŽl» ¶ÃØ »`7ìß²þÚwÂ]pÌŠ¯YßÐÁ¸ÏÅÅq·]œpÿ$¾îñ >Ýom§<¸ălrâ$ÛSpZsájå±YœeÏœûGñ0Û6£¯ÖÁÎð6Ûï@»GªÖáºøž÷¬øsëº"¨tE°í÷á÷œx”³ƒÇá W/ždû”û[ñ4Ûð­­½tE°í3ÖÓtEø¿â9÷3ñ<{æ­)ïÆEë Ê»µìMx Þ†w༠—­*ïÆx>°>©¼ÂÏ­êÚqN\eûK©ªá—Ö~i­á—Ö~i­á—ÖfŽ5üÒZÃ/­5J¨q+|>{aY˜ƒ»9Û^¶÷¹ßˆãpNêέFù2Nëž¿Fùú\œ“{5êçÚÏï­5üÞZÃï­5üÞZÃï­5üÞZÃï­5üÞZÃï­5üÞZÃï­5üÞZÃï­5üÞZÃï­5üÞZÃï­5üÞZÃï­5üÞZ£Ö‘BµŽq%7àMx Þ†w༠—­jã ¼P¯ÏàC±Ö;£ãk½ëîWâ]·,Ú·j½ùVë}å~«+É ºoß‘[8r ŸnáÓ:¯Âý1 7¹?‰1X©»ô:¯Ê}W¬vO‰gܪø+}·Îû^w‹¦¿Î³ëHJ_íN»Nç×ñ¡~;sȇ|Þ΀/ÀáK0_†¯ÀA8dJBÃpfaδ…Fa^¬×uç–XឣÐêUO½êU¯=b•æwõžcJ¡KŠ5R[¯‘ùâ›yF׫¾kâEÕ¨^µ¶íÛ×u¿Z¯ºßïj¶R¯º/‹+î¸*ê½5ø•f.õ¥JnHÜ0ÐܨÇzܨÇzܨÇzܨÇzܨ—Ò/7Œ#0 sV¹aÌÃÝV¯Ð«p—3ºŸ“3ÆJ÷=±Jí¨ÑÔu‰—uÏוÅx]׋¸jwO¼§ž—~}Kú²ïJ‰qfaÎÎ&%Ƽ˜ Üå&(7A¹ ÊMPn‚r”› Üå&(7A¹ ÊMPn‚r”› Üå&(7A¹jWÅ]°ú0ð²âóp¾_„/qÌË𘇯Á9[ß÷¾ãåÄ7u¦Fï#•ÐèñÆÄ_é{ÞEïÛâÇlØþµ·S¼îïx?ïzï‰÷¼wÅoŸ¸ê×àWÞ5¯1´ÎJ ýü7Ó¦«ñûð]øCø#øü Çÿþ >0Í¡ÏàCøÙ$·/ÞQ_mâo÷šäùW^3#C3#C3 j&AÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ ÍŒ Íd¡™,4“…f²ÐLšÉB3Yh& Íd¡™‘¡™‘¡™‘¡™‘¡™‘¡™‘¡™‘!ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(ERÔ(EZt7þ'ñ÷‘¸>é¾-6éjØ¢»Ê5q^š[t'i¼¤LµèNÒxM#R‹îõ-Ý7ÂrX£pŒÁV;¿z`³ø®Ýy«ï%ÅÙoZê{õºÚ˜ª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª­¨ÚŠª^ï–f—}p§zB‹…•î²X¥ž¿S­?áíT;j¿ÚÑÀ!ûT-bY˜ãøQ˜wéÌ‹Qh}l}l—J9$ViÞºK¥Ô‹vEÞÅy×î]Œu»¸:ïâê¼KJt)1ÐzÔ.zÔ.zÔ.zÔ.zÔ.zÔ.zÔ.zÔ.zÔ.éW¹ÒoY˜3%ÒoÌ‹ýèïG?úûÑßþ~ô÷£¿ýýèïG?úûÑßþ~ô÷£¿ýýèïG?úûÑßþ~ô÷£¿ýýèïG?úûÑßþ~ô÷£¿ý>ú}ôûè÷Ñï£ßG¿~ý>ú}ôûè÷Ñï£ßG¿~ý>ú}ôûè÷Ñï£ßG¿~ý>ú}ôûè÷Ñï£ßG¿~ý/iý{ñqøÜK`),ƒ®]ŒB{ná%ÕÎöWëNþ%ÝŸ¯ˆ Ø z½¤ûsc6q|3Ÿ¦àÌÂÜÍ‘—u÷ò’®ËÆëº;z‰gí^âY»—<{>ð%Í UnˆÒ574VÃ&Ø S°nƒm°vÀNØ»aì·:ªŒbÿ0Y˜³ºËOcî·ºèþßx„‡à$œ‚ÓpÎÂ9xGá1xž€'á)xZÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ í˜¡3´c†vÌÐŽÚ1C;fhÇ—u-ûÑ®¯è*r@Üû¡oB|Àà‹ð%Žy¾óð58­³¾âÍx›ÅY]¹^ѽe\üŽ÷ºø¦7"~ä Šg¼ƒâ¯¼=âE}òŠî-m»Àö¯UÂ+º·üHü_µw¼÷Å»ÞqñžwD\ñ¦ÄUï»âüʻὢ;L)Ѧñ€×!þ›)×õÔø}ø.ü!ü|þ„oýþ þÂkÏ©¯„–­^¡{pÞ‡¬Žº5>„¿ÑÙ½½½½½½½½½½½½½½½½½½½½½½½½½½½½½½½½‡Ð?„þ!ô¡ýCèBÿú‡Ð?„þ!ô¡ýCèBÿú‡Ð?„þ!ô¡ýCèBÿú‡Ð?„þ!ô¡ýCèBÿú‡Ñ?Œþaô£ýÃèFÿ0ú‡Ñ?Œþaô£ýÃèFÿ0ú‡Ñ?Œþaô£ýÃèFÿ0ú‡Ñ?Œþaô£ýÃèFÿ0úGtŸy[¬p_£Ðþ»Èˆƒ•.'V¹wÅÍnA¬vbîHG4÷ÿø&GžÑ t„¹ÿˆæþÿ)~ÌvíëîCñÇÜuŸŠ÷Ü’¸â>mî?ÂÜD>hîN¥JnHÜ0ðy+Kn_€/—`¾ _ƒÐ~£ Ø…9«KhæánÓ¬¹¿q˜ÅŸ,þdñ'‹?YüÉâO²ø“ÅŸ,þdñ'‹3Y<ÉâF7²¸‘Å,ndq#‹YÈâ@²8Å,dq ‹YÈâ@²8Å,dq ‹YÈâ@²8Å,dq Ç0Ç0GÏÑÃsÌsÌsÌsÌsÌsÌsÌsÌsÌsÌsÌsÌsÌsÌsÌsôù}>GŸÏÑçsôù}>GŸÏÑçsôùsÀsÀsÀsÀsÀsÀsÀQÖ»FYïe½k”zR¯QÖ»FYïe½k”õ®QÖ»FYïe½k”õ®Qúü(ë]£ôùQÖ»FYïe½k”õ®QÖ»FYï¥ÏÒçGYïe½k”õ®QÖ»FYïÅQÜÅQÜÅQÜÅQÜÅQÖ»FYïe½k”õ®QÖ»FYïe½k”õ®QÖ»FYïÊãOòø“ÇŸ<þäñ'?yüÉãOòø“ÇŸ<þäñ'?yüÉãOòø“ÇŸ<þäñ'?yüÉãOòø“ÇŸ<þäñ'?yüÉãOòø“ÇŸ<þäñ'?yüÉãOòø“ÇŸ<þäñ'?yüÙ-gbbVê[»åÆE±Úí¯KÿnÕè3ñžzþniÖ‘Òl à¯Ò#0 sv•nÌ‹¯ênê¿ÄGÜÄuðI7(V¸n1 +ÝÅÍÎV­ªÝQ±FcË«ºþ½˜€ ^Õ°1 íï@_õšù4G`æànŽ<«¿êÍ»KâxÉà5]Ñ^Õ½±¿ïÀ»îsñžÆÀWCe¦<ô4ÜËaŒÂM0[­ŽòMµ“oÆæ¬vòǘ‡»­¦jã¸ßJ×}¯ñ<ÁI8§á œ…sð0<Âcð8<OÂSð´¸G-ò”…ö—5{¼¬to‹Uî¤XíþA¼înˆwÜWâ]iÞÃÀõs¯Zë ªµ1€Ï³¾_„/Á |¾ᕨ>fY˜3 òИÇP>†ò1”¡| åc(CùÊÇP>†ò1”¡| åc(CùÊÇP>†ò1”¡| åc(CùÊÇP>†ò1”¡| åc(CùkÒðo¯îº(þÀ{V|W³Š½ºëäíßóþ]ü±÷/⇚;ì }éõzû˜Eîc¹O5µµdûëé}̹ö1çÚÇœk‡»¿ÃÝÇßáŽóÝq¾;ÎwÇùî¸Î^/î‚ýЇýÎí=à ðEøǼ _yøüŽ×"¾éÕŠy›Ä3Þ?ˆ¿òÄ‹Þ7ÅÙ.°ýk{B]óšo‰w¤b\3šñž´ŒkFcg[•ãšÑ¿ÒÝÿ¸f4*K3£90Žã80Žã80þ¿h¾#årÞøø.ü!ü|þþ„3ÿþ žóþV3sr''pr''Ð0† 4L a h˜dždždždždždždždždždždždždždždždždždž¢¬)Êš¢¬)Êš¢¬)µf“h%N©Ä?yS”5EYS”5EYS”5EYS”5EYS”5EYÓó¿qq|ÒímÌŸfÌŸ–;f³îê§¥á‡b®¤ÓÕ¿Ϻ_‹ónQ¼/éJ=­Ûx Ú]Ù4¿M“âiÒ*K£´q#,‡0 7Ál5UŒÒÓŒÒÓŒÒÓª‘ô¨FÆ<Üm¥h”6îgTÇß‹èŠ?£:Ÿt‡Å ×%F¡ÕQó|¹4ÃÚÎ ­9£ÚýVœ×½ôŒjg¼¤ûŠÕÎxM->ÃßPÏð7Ô3ü õŒj§RT;ãFX+`n‚1ØjzT;)Q팴•–VZfXi™áï7gøûÍþ~s–^=K¯žõìIªYõ–?Új…ŽŸÕuùwb6h›ÕuÙ˜„MºnÎêºlŸ¦àÌÂÜÍ‘—Õj³ÞhëQ³¤f–ÔÌ’šYõ:•«^gYhµ˜¥³ÔbV×MY×Mãx‚“p NÃ8 çàax…Çàqxž„§àiqN=aM|Dwsê Æ'5«š“‡;ìWCX©ÚÍ©'|(š‡sê DëísêWÄyw]¼/é.eNýÁxOïp†»|ËžnSPYêưVÀ(Üc°ÕTÉaé‘ÃÆš“s89‡“sê*EýÁ¸G<¬º<+F¡=tXýÁöW»o‹wåÃa©úƒwXç×1:¿1€Cv¤ZÐ8³0gßU¹Æ¼xJ÷Ø? ðº{O\q?WášøîÉ¿/ª¬t¤Ñ~Gþ€ß‘?з~&Ú˜ùôWÔ:è öéšø¡Žÿ“xÞÕ§òŸKÏz©žõ‡OÀ °–Â2XáöŠQ¸Iu?«û c¥mfqVÎ ˆÓîßÅeê¬7§v<«ÙÄfñ¢«?f»Àöªk׌¡J+%T7ÃjØ›a ¶Àm° ¶ÃØ »`7ìý¦\md àó¦_÷9Æà‹ð%˜/ÃWà ´9ÈYæ g™ƒœerV-«º‡Fa.›¡{pއ̓Ðgð¡xNÞþ\Œjü?'¯zç¤V{¤ÖÀûGa^<¯oýTŒºkbµÎs^ßÒ}ËÀûGa^œW‹)>Ÿ€` ,…eÐZ|žŸW‹ÿAŒÁJ—«Ô3çUn¿8­YɼZü?Å9J±Ÿ§ÅçiñyZ|žŸ§ÅçÕâ*E-nÜ «al†)Ø·Á6Ø;`'ì‚ݰZ‹ÏÓâó´ø¼Z\úÕâÆà‹ð%˜/ÃWà ²šªÅ#0 sVwykÌÃes t®ÀûÐZ|žŸ§ÅÅõÂ~QŒ‹§tÇþ+‡›½_ñ7t5®×ÁGÝÿŸtv×X«ž|ÑÛë`\ó닺*܂ؓОO»è5ói îTö/êÚdÛY˜ƒ»9~ÇÁ³j¯‹Ÿà%W'.ÀËš]^Ôxb´¿p¼¨¹ä9q~¡ëøEÕÒ¯±Ú¸–à …›` ¶ZCcV¯Ðkp/ÜÇá|¾÷›~]àAxNÂ)8 gà,œƒ‡á[æLè»ðmøüîÒ/ê9&þØK‰G8þ(<Ãð$<OÃ3ægè,<Ï‹Ó>¦|Lø˜>ð1} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @(Ð ô} @¸¤Ñøâãð ¸–ÀRX+Ü!1 +Ý~±Ê«Ý8í.‹3ºsÖÝ“;/^T]Ò¨k´Q÷£î¥­;] UÁͰ6Áf˜‚-plƒí°vÂ.Ø {`¿)Ô¨k àéÔøiY˜3å?y¸lú5~Wà}øÀj¤ñÓøP\À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|[À·|»,7rbT÷™—åÆ Ñfî—Uºö«tcsì…6sÿDãϱB÷6Ÿè 3b¥{_¬ÒýÞ'ºÏ¿/V»AqV©ü„ÿ¬»fŸÈÕ»b®JÛ'rUTÞuN•®sªtc‡ìœª»qfaÎÎ/UÆ<´ÿ±ü ÿcùþÇò'¡Ï­ÄÐoàªx…Z_¡ÖW¨õj}…Z_¡ÖW¨õj}…Z_¡ÖW©õUj}•Z_¥ÖW©õUj}•Z_¥ÖW©õUj}•Z_¥ÖW©õUj}•Z_¥ÖW©õUj}•Z_¥ÖW©õUj}•Z_¥ÖW©õUj}•Z_¥ÖW©õUj}•Z_Óäâ:ø¨jwMuù¾Xë4^ë b¬ƒqÍÑ®é blpØó˜0 ›äØ5]AìÓÜ)ý×¼Œê~Í„CpŽpLæà(ûws¶WÙÞ£» kºÊí?!_ã?!_ÓuðGâšÅ\ó®ýÏ?z×ø/Ç×tQ-t1n„å°Fá&ƒ­VS]GT;]GŒ{á>8'àëð ¸ßÔê:b<ÂCpNÁi8gá< ß2t1¾ ßöòµÐÏÍÐ/à/áûðß= Áãð< OÁÓð¾õ¡®#×B±}Æü …çày¸ˆ‡7àMx Þ†wà´'c¯{ö'®{öß㯫·Ûv•ÓìTý|`%ªçŠŸªW¿'V¸7ŨÊýTšïˆ›Ýbµ›/ê*ü©Šê:^Út¼´˜³#¥Á˜9ó"g^äÌ‹œy‘3/ræEμș9ó"g^äÌ‹œy‘3/ræEÎ|Ë{ÙÝ_1†ÖÁ¿‚?…?oë÷_âãð ¸–ÀRXk5ÜV¢u0®1ç¶m´ÿ&z›ÿ&z›ÿ&z[‰^›ù4G`æànŽÜãþ$ŽÁ}œyNÀI÷•8íÞöì¿ÍßVfµ'dÿ·í¶®˜ÆÍ°6Áf˜‚-plƒí°vÂ.Ø {à˜iPº{! C( ¡Pé6¾÷[½”nãx‚“p NÃ8 çàahÏôßVZmû(<Ãð$<OÃ3桲i<ÏÃy´]€‹øvÞ„·àmx.Á»p·ïÁx_\â}7K¼ïfI#ÿoÄÇÔâKêKÆ'àXKa¬•Ú%õ%cŒ»OÅlÐLI}ɘ„MnIlæÓ´¿Zâo –ø¨%þj‰¿Zâo –4ëé_ðªÄaFé^ÒÕÄ8‡áçÌÂeÿnJ•í=èƒ{9ÿ>÷P‡pÒݧà4œ¡¾³š¡/y¶ºä†§4ë_òNãÞ|÷CóP½Zßâ]?K¼ëg‰wý,ñ®Ÿ%Þõ³Ä»~–x×ÏïúYR TŠR`Ü «al†)ØÿÞKŠÿà5ŠÛØÓÛaì„]°ö@P"Œ{á>8'àëð ¸ßüT"ŒàAxNÂ)8 gà,œƒ‡á÷¼ñ=o“øckM]ãÔ.ºÆ ߇G8þ(<Ãð$<OÃøÖ‡ÖFºÆÙö”Ÿ…çày8o-¥i©ð&¼oÃ;p ޅ˸w®Àûðõ]kŒáçð7pU\V¾¾“ÂeåËøÜK`),ƒµR¸¬|ë`\ ZV¾Œ nYl„IؤZ/+_öi ÚS©ËÊ—ñøì…}°ßÛ&¾ YV¾ŒÔÂ!8 G8gæà(ûwSú«lÛzø²òeÜËùíÿÁ.óÿ`—ù°ËÊת8§•—eÞ·ìÙ;G–½ÃlÀÙìŽeYiÒ‘!û`Yé0n†Õ° 6Ãl¯±wYéˆ‹ÛØÓÛaì„]°öÀ1s[é0î…ûà8œ€¯Ã7à~sCé0€á!8 §à4œ³p†ßóšÄ[+(V÷_À_Â÷áŽ< Áãð< OÁÓ'CÿëäGðŒµŽra<ÏCû¸ËüGÜeåÂ<¿oÂ[ð6¼—à]ho\歂˼Up™· .+jSåÂø~ÎþßÀUqE× wÅuðQ·]|Òíí}1+¼/f…÷Ŭ¨ÿ&& ½e…·±¬ð6–õÿb3Ÿ¦àN]ýWÔom; sp7ÇÛË w,+Éì­hÄ–ØÆ°VÀ(Üc°Õtò¶šÞV³ÂÛjVx[Í o«Yám5+¼­f…·Õ¬¨·Hƒz‹ñ<ÁI8§á œ…sð0|Ëj§¹ƒñmøü¡úÏŠú‰sƒÇá xž‚§¡Ýu¬p×±Â]Ç w«Ì W™®Ò:«j½¢=}´ÊÓG«j‹U±&¡­%®òôÑ*O­òôÑ*O­òôѪ·›#OéNxÕ;ëì/ýæáx .ÀËy^sǼUfy«ÌòV™å­2Ë[e–·Ê,o•YÞ*³¼Uµ”4óìÐ*Ï­òìÐ*Ï­òìÐ*Ï­òìÐ*Ï­òìÐ*Ï­òìÐ*Ï­†~àÕˆö³«æ>‡ëÝWâlÿ%ÛOºÿÃnEÜÀþX Ë`…ë£nTÜäî‹1Xé~ V¹Åj·G¬u¿·À:W-þÂþ›´¨‘\l„IØäþClæÓY˜ƒ»9rêþÃuÅq8')k N»ßŠ3¶ßVŠÄJ«…î(Œ›a5¬5t_aÛÍ0[`«9£; Ûnƒí°vÂ.Ø {`¿y¥v1ðyó*4_€/—`¾ _ƒpÈ\Uï2ŽÀ,Ì™ÏjecŽQÇ×à^¸ŽÃ ø:|î7Wí¿j‹àAxNÂ)8 gà,œƒ‡áxƒÇá xž‚§ák;]wŒçày8OK]€‹(¼oÂ[ð6¼—à]¸Ì·îÁx_ü+ïÏÜçâ9ùW!?çÝGâxÉýB\€×Ü'âu÷@¼ã–Å»nI¼'m¥:ÏïÄ ×)Fa¥;)V¹bµÛ²7,‡ìMKâ¯Ü-ñ¢».~ÌvíëîSqÅÝW¥°T£ñ+;úŒÎ¬>c àû‡áÌœ•¨Ö7æá+×þ3FÈÞP ©=á&Fa¥û±Xå.ŠÕn‡h5}Êž ï¹/COÙn¢8dÇÛnâÌœÁžpób™½ ZŒºiñ;î˜ø¦û‘x]#C™½ZüÊÍiœµ#ŸæÈ§9òiŽ|š#ŸæÈ§9r#Gnäȹ‘#7räFŽÜÈ‘åYΑåYΑåYΑåYÁ‘YÁ‘YÁ‘YÁ‘QŽŒrd”#£åÈ(GF9rGnâÈM¹‰#7qä&ŽÜÄ‘1ŽŒqdŒ#cãÈGÆ8Òþë`u¨ n†Õ°ÆVoÄuðQõ¥{€X¡^QcoC7k ¯±·‹–²{[±x^R«±·‹w %¨Æžù ÕØ †ø4ÜËaŒÂM0[­t{[±èÃæ¬t{[±˜kQ^‹òZ”×¢¼åµ(¯Ey-ÊkQ^‹òZ”×¢¼åµ(¯Ey-ÊkQ^‹òZ”×¢¼åµ(¯Ey-ÊkQ^‹òZ”×¢¼åµ(¯Eùû??b6Hçû??b6é*³ÅþϘ‚-ìß ŸÏÁ^ØG82 sp7û÷BÍ Åq8'UÇ-öFqZiÝboT m±7ªˆûí ö~Äð <'ᜆ3pÎÁÃð< Áãð< OÁÓp à"JnÀ›ð¼ ïÀ%x>à[ŸÁ‡b>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás>×ás=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãs=>×ãsŸãøÇç8>Çñ9ŽÏq|ŽãsŸãøÇç8>Çñ9ŽÏq|ŽãsŸãøÇç8>Çñ9ŽÏq|ŽãsŸãøÇç8>Çñ9ŽÏq|ŽãsŸãøÇç8>Çñ9ŽÏq|ŽãsŸãøÇç8>Çñ9ŽÏq|ŽãsŸãøÇç8>Çñ9ŽÏq|ŽãsŸãøÇç8>'ð9Ï |NàsŸøœÀç>'ð9Ï |NàsŸøœÀç>'ð9Ï |NàsŸøœÀç>'ð9Ï |NàsŸøœÀç>'ð9Ï |NàsŸøœÀç>'ð9Ï |NàsŸøœÀç>'ð9Ï |NàsŸøœÀç>'ð9Ï |Nàs>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às>7às#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âs#>7âsŸ“øœÄç$>'ñ9‰ÏI|NâsŸ“øœÄç$>'ñ9‰ÏI|NâsŸ“øœÄç$>'ñ9‰ÏI|NâsŸ“øœÄç$>'ñ9‰ÏI|NâsŸ“øœÄç$>'ñ9‰ÏI|NâsŸ“øœÄç$>'ñ9‰ÏI|NâsŸ“øœÄç$>'ñ9‰ÏI|NâsŸ“øœÄç$>7ÙSO¡&{*Iü >›ÙßÌþfö7³?ÅþûSìO±¿…ý-ìoa ûÿÞ»ë=#Þóþ.ôÏö ‚¸ÖÁ&¸SÚþÙÞ8/Žm _| î…ûà8œ€¯Ã7à[v{ã¼ø6|žálgá9x^lEI+JZQÒŠ’V”´¢¤%­Òß*Þóþ1ÔŠªVTµ¢ªU­¨jEU+ªZQÕŠªVTµ¢ªU­¨jEU+ªZQõ¬÷˜;/>Ÿ€` ,…e0î>ÐÖëže½îYÖëžU]®‰Í|š‚-nMÜ ŸÏÁ^ØG82ë>slïæl{ùtŸûµ8' ­æ=ËjÞ³JÇ/ÄwIœs?/k¶ø¬ý¿kñSwO\„_RÓßY÷{6Ti5 UÁͰ6Áf˜‚-plƒí°vÂ.Ø {à~«‹rj<ÂCpNÁi8gá< ¿çý­øžW#aÏQx ‡'àIx ž†óæ˜2k\¤Ö7àMx Þ†w༠—ÍÕÐ=¸ïÃæ³²f|(~ÓÞþ&>æÅÇáp,¥° V¸V1 7¹ÏŬtb•›«Ý?‹qõ¨oÚ;ŽÅµé7í©$1 m}ø›öŽc1[ÜMq+|>{aáÈ,ÌÁÝœm/ŸîsgÅq8'ùt N»çÄ·WœsÏŠßqâ›®Aü•K‰áǰ/»Óâxï~ê^á­ÿù⊋‹«îïÄ5ø•í±w‹•曽ãXÜ «al†)Ø·Á6Ø;`'ì‚ݰö[[Ø[¿Å>o-bï8_€/—`¾ _ƒpÈÚÎÞú-ŽÀ,ÌYkÚ[¿Å<ÜoÎÛ;ŽÅð <'ᜆ3pÎÁÃð< Áãð< OÁÓpÞÚÔÞq,.âí xÞ‚·á¸ïÂek;{DZ¸ïÃÖìÇâC1M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"Ò$"M"¶Ù»nÅuðQ9¼•Þmö®[±R÷-Ûì]·bµ®\ÛXéÝfïº/ÀK|wÞqÿ-Þu«â=÷ûÐ6{®ø4ÜËaŒÂM0m¥w›½ Wôaíd›½ WÌ‹m(oCyÊÛPÞ†ò6”·¡¼ åm(oCyÊÛPÞ†ò6”·¡¼ åm(oCyÊÛPÞ†ò6”·¡¼ åm(oCyÊÛPÞ†ò6”·¡¼åí(oGy;ÊÛQÞŽòv”·£¼åí(oGy;ÊÛQÞŽòv”·£¼åí(oGy;ÊÛQÞŽòv”·£¼åí(oGy;ÊÛQÞŽòv”w ¼å(ï@yÊ;PÞò”w ¼å(ï@yÊ;PÞò”w ¼å(ï@yÊ;PÞò”w ¼å(ï@yÊ;PÞòN”w¢¼å(ïDy'Ê;QÞ‰òN”w¢¼å(ïDy'Ê;QÞ‰òN”w¢¼å(ïDy'Ê;QÞ‰òN”w¢¼å(ïDy'Ê;QÞ…ò.”w¡¼ å](ïByÊ»PÞ…ò.”w¡¼ å](ïByÊ»PÞ…ò.”w¡¼ å](ïByÊ»PÞ…ò.”w¡¼ å](ïByÊ»QÞòn”w£¼åÝ(ïFy7Ê»QÞòn”w£¼åÝ(ïFy7Ê»QÞòn”w£¼åÝ(ïFy7Ê»QÞòn”w£¼åÝ(ïFyÊ{PÞƒò”÷ ¼å=(ïAyÊ{PÞƒò”÷ ¼å=(ïAyÊ{PÞƒò”÷ ¼å=(ïAyÊ{PÞƒò”÷ ¼å=(ßnOm·÷ƒ‹ÕÒ¹':¶óDÇv{?¸Ø“°É]›ù4G`–Oslïf{Rú·ÛûÁÅËòa»½<´ÝÞ.ö[éö~p1€9+ÝÞ.æ¡=ɰ'¶ó$ÃvždØÎ“ Ûy’a;O2lçI†í<ɰ'¶ó$ÃvždØÎ“ Ûy’a;O2lçI†í<ɰ'¶ó$ÃvždØnïoÀ›ð¼ ïÀ%xWìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ^œìÅÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œìÃÉ>œü–½A^lq_Š[á3ð9Ø ûà^¸ÏýN‡pZ£Ð·ì òâœÜû–½©M¼°ç3øPÜA‰;(q%î Ä”¸ƒwPâJÜA‰;(q%î Ä”¸ƒwPâJÜA‰;(q%î¤Ä”¸“wRâNJÜI‰;)q'%î¤Ä”¸“wRâNJÜI‰;)q'%î¤Ä”¸“wQâ.JÜE‰»(q%î¢Ä]”¸‹wQâ.JÜE‰»(q%î¢Ä]”¸‹wQâ.JÜE‰ýÌCûí­dâ:ø¨ûWñ1êýš™Ÿ€Oº qÛ%°–ÁZW"nu0îŠÅlpOˆ0 ›œ/6ói ¶¸wÅ­ðÍ€úUkÛîu“bÛ;]‘8àþI̸ qÁa8ÂÙ²0GÙ¿›r_e{Û ŽÁ×Üq/çßç¾#ŽÃ 8éêÄ)8ítÍÖLV÷rX÷©rX×Bï°ûñ¬f¦ýÞ9÷ ñ¼û©8¯™N¿½×[¼ä‰ ð²{AüÄŠWؾÊö57'~êÞáî3ñ·îÃP¿Æi°·‰OðVÀ(Üc°ÒZG³`ãfX ›`3LÁØj­¬¹°m·ÁvØ;aì†=pÌ=#¾fínï ÷¹qÜý‹8á6‰¯»&ñ ŽÙoíbï ¸jñ {Y‹h”³í)k){o¸8ÃfÙ?÷(å»îiñmë¡wà÷¼-â÷u¯Òoï ßõì<ïyVÖ½­âÏ9ÿ/à/áûðˆû†x”ós¥âq¶O¸§Ä“lŸ‚§á|ëC÷Kñ#¶ÏP‹³î%ñœõÞÐyöÌ[R‹´æ xÞ‚·á¸ïÂeëušGWà}øÀz m|?·Þhï WÙþRª|’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷IºOÒ}’î“tŸ¤û$Ý'é>I÷Iz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éIHz@Ò’ô€¤$= éI^¹¾'>©™ãóJëâXãîb6ÀF˜„Mš<¯´Úv îä[#lgaî†{øt žÕìãyÏþòâyÏþòâyõ1iñ©úŒq/ÜÇá|¾÷Û™ÕgŒàAxNÂ)8 gà,œƒ‡á[œí»ðmø<§Gá1xž€'á)xžá[gá9x^Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|Àç|þÿ;ºÓçHîû¾ãK)+Oü„ORe[%K\ñ’l]¼lUd§¨*U"K–lÇväçFw£3¼©]w¹´»ÑJ €ss†v%ŠRV¨ì…Ű² ¦d;Lçׯ'Ÿšzúûý¼¿Ÿùî ìyç§q~ç§q.ã\ƹŒsç2ÎeœË8—q.ã\ƹŒsç2ÎeœË8—q.ã\ƹŒsç2ÎeœË8—q.ã\ƹŒsç2ÎeœË8—q.ã\ƹŒsç2ÎeœË8—q.ã\ƹŒsç2ÎeœË8—q.ã\ƹŒsç2ÎeœË8—q.ã\ƹŒsç Μ+8Wp®à\Á¹‚sç Μ+8Wp®à\Á¹‚sç Μ+8Wp®à\Á¹‚sç Μ+8Wp®à\Á¹‚sç Μ+8Wp®à\Á¹‚sç Μ+8Wp®à\Á¹‚sç Μ+8Wp®à\Á¹‚sç Μ+8Wp®â\ŹŠsç*ÎUœ«8Wq®â\ŹŠsç*ÎUœ«8Wq®â\ŹŠsç*ÎUœ«8Wq®â\ŹŠsç*ÎUœ«8Wq®â\ŹŠsç*ÎUœ«8Wq®â\ŹŠsç*ÎUœ«8Wq®â\ŹŠsç*ÎUœ«8Wq®â\ŹŠsçÎ5œk8×p®á\ù†s çÎ5œk8×p®á\ù†s çÎ5œk8×p®á\ù†s çÎ5œk8×p®á\ù†s çÎ5œk8×p®á\ù†s çÎ5œk8×p®á\ù†s çÎ5œk8×p®á\ù†s çÎ5œk8×p®ã\ǹŽsç:Îuœë8×q®ã\ǹŽsç:Îuœë8×q®ã\ǹŽsç:Îuœë8×q®ã\ǹŽsç:Îuœë8×q®ã\ǹŽsç:Îuœë8×q®ã\ǹŽsç:Îuœë8×q®ã\ǹŽsç:Îuœë8×q®ã\ǹŽsçÎ œ87pnàÜÀ¹sçÎ œ87pnàÜÀ¹sçÎ œ87pnàÜÀ¹sçÎ œ87pnàÜÀ¹sçÎ œ87pnàÜÀ¹sçÎ œ87pnàÜÀ¹sçÎ œ87pnàÜÀ¹sçÎ œ87pnâÜʼnsç&ÎMœ›87qnâÜʼnsç&ÎMœ›87qnâÜʼnsç&ÎMœ›87qnâÜʼnsç&ÎMœ›87qnâÜʼnsç&ÎMœ›87qnâÜʼnsç&ÎMœ›87qnâÜʼnsç&ÎMœ›87qnâÜʼnsçÙÒûòÝ ï§ÿ.)è}ùúñü;Aÿ˜þ ý$ýýtþjÐÏÐGécù¥ ûí´øëúY]?ë¯ëgýuý¬¿®Ÿõ×õ³a.ß útþ“ µüJÐ&¥-¡s4õóyu[·ó}†>>#ÏúûüYWaf]…™uf¶Tüíîli‰´gKÅU¹Ù0÷ï½â Wé[y/èÏèÏóÿôôí|83;óÁâ 3Å_VÏÎüý0ý½Ÿž¡¥Ч ¶!KÁEÈR¡ÏÑçé ôEú}™~ƒž£çé}….Ò%ºLWè*]£ëô›í¥B/ÒKô[¥? úíü·APúxÐËÕði½ÐMºE_sžíÒíÓÒ}ݳÆù¿xüFÁ6ä°ÐüzÐ]‹ka³®…͆OëÛëô½IoÑ=z›îÓw‹ †Ïà…žÐÓP+’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶#ÙŽd;’íH¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;–íX¶cÙŽe;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶ÙNd;‘íD¶çJ¿“¿´¸8ç à\Høç‚þnxÖ\éô÷è}yô÷=þú‡ôCôÃù{A?B?ž8èÓ?¡ŸÌÏýýtþ@ÐÏÐGécy-èã~û}2ïý,ýýýÛÀa.¼.ŠÇÿ?ôéüï‚Öh“ÎÒˆÆÎ–Ð9šúù¼º-Ûù#AŸ¡Ï†$Ï…×Eqþç=~¾Hó¿ºD—óï]ÑÏ*]ó“õ¼t;ÿzÐüí »ù/‚^Éÿ1èUúVÞ ú3úsú?CæÂk§xü¿<~;Ï‚þ2_ zþsþÓ ¿ÎGAOóÛA“ïÏÌ…WYèÇ•Á9Wç\œsepΕÁ9Wç\œsepnæÁbj3чé#ô1ú8}‚>IŸ*¦?óEÿ’~‰~™þý ý*ýkúwEfþžþýoô™< úl‘ðê.ôùü“A_Èÿ&è‹ù úR~6èË~ûbváÕ]èù¼tÁãWò/]ôx‰.ç_ º’?tÕOÖò¿ºîqqÅp.¼º‹3\ôøýVé‹A¿¿ô¥Ï½\d#¼ºÃ«»x¼E_Ë¿´ãl]Ú£ýüAó? :òøuÏó ¯îâñEÒ«ûσîä? ºë'WŠŒ…Ww¡×Š$„Ww1Ùô&½E÷èmºOï œ™ÒzH9¢ÇôÝ"Ÿaì=ñø4t•Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ú©=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=Ù™=0ïÞ5óî]3ïÞ5óî]3ïÞ5ó®hÌ»wͼ{×Ìß»ô³ôsô ôoé¥1Mèw¶ç<.î%;_z¾HóÓ Kt9ß º’¿´¸ê|Èyø¹;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ¼;ØÌ»ƒÍ|˜Nè0L§Ðk:¹NoЛôÝ£·é>½S¸Ó)ô€Ò»|Ñâ«-Sh™BËZ¦Ð2…–)´L¡e -Sh™BËZ¦Ð2…–)´L¡e -Sh™BËZ¦Ð2…–)´L¡e -Sh™BËZ¦Ð2…–)´L¡e -Sh™BËZ¦Ð2…–)´L¡e -Sh™BËZ¦Ð2…–)´L¡e -Sh™BËZ¦Ð2…–)´L¡e -Sh™BËZ¦Ð2…–)´L¡e -Sh›BÛÚ¦Ð6…¶)´M¡m mSh›BÛÚ¦Ð6…¶)´M¡m mSh›BÛÚ¦Ð6…¶)´M¡m mSh›BÛÚ¦Ð6…¶)´M¡m mSh›BÛÚ¦Ð6…¶)´M¡m mSh›BÛÚ¦Ð6…¶)´M¡m mSh›BÛÚ¦Ð6…¶)´M¡m mSh›BÛÚ¦Ð6…¶)´M¡m mSh›Â3¥7ÂñÏ”ÞÉt?¿´xÖ3¥ƒÀí™Ò{ù¯gžu̳ŽyÖ1Ï:æYÇ<ë˜çóœcžsÌsŽyÎ1Ï9æyÇ<ï˜çó¼cžwÌóŽyÁ1/8æǼà˜ó‚c^tÌ‹ŽyÑ1/:æEǼ蘗ó’c^rÌKŽyÉ1/9æeǼ옗ó²c^vÌËŽùFéþüá gèƒù·‚>”¿ô‘ü+Ažÿ[Ð_:óµâ˜™¤gélqäLDcšÐ¹â¹3)Í‚žSåœ*çT9§Ê9UΩrN•s¡Ûý ûù4è4d㜺çÔ=§î9uÏ©{NÝsêžS÷œºçÔ=¯îyuÏ«{^ÝóêžW÷¼ºçCÝ ºŸÿ¿ ÓBÕ=¯îyuÏ«{^ÝóêžW÷¼ºçÕ=¯î‚º ê.¨» î‚º ê.¨»Àï¿ ü.¨» î‚º ê.¨» î‚º ê.¨» î+¡îz†>˜'AÊ/}$ÿ`Ð7ò“ ?Ó¥ô&}'œç•Rñúz%ôp+è4?z^¡¯„„„ãC?ál¡ŸBÏÒÙ✡ŸBcšÐ¹¢Jè§Ð,è¢~õ³¨ŸEý,êgQ?‹úYÔÏ¢~õ³¨ŸEý,êgQ?‹úYÔÏ¢~gžóZœ)Ó ­Ò­ÓmÒ¢ÿEý/êQÿ‹ú_Ôÿ¢þ—ô¿¤ÿ¥ÒGÃs—JÐÂË/K¼,ñ²ÄË/K¼,ñ²ÄË/K¼,ñ²ÄË/K¼,Í|½8?GK-q´ÄÑGK-q´ÄÑGK-q´ÄÑGK-q´ÌÑ2GË-s´ÌÑ2GË-s´ÌÑ2GË-s´ÌÑ2GË-s´ÌÑ2GË-s´ÌÑ2GË-s´ÌÑ2GË-s´ÌÑ2GË-s´ÌÑ2GË­p´ÂÑ G+­p´ÂÑ G+­p´ÂÑ G+­p´ÂÑ G+­p´ÂÑ G+­p´ÂÑ G+­p´ÂÑ G+­p´ÂÑ G+­p´ÂÑ G«­r´ÊÑ*G«­r´ÊÑ*G«­r´ÊÑ*G«­r´ÊÑ*G«­r´ÊÑ*G«­r´ÊÑ*G«­r´ÊÑ*G«­r´ÊÑ*G«­r´ÊÑGk­q´ÆÑGk­q´ÆÑGk­q´ÆÑGk­q´ÆÑGk­q´ÆÑGk­q´ÆÑGk­q´ÆÑGk­q´ÆÑGk­q´ÎÑ:Gë­s´ÎÑ:Gë­s´ÎÑ:Gë­s´ÎÑ:Gë­s´ÎÑ:Gë­s´ÎÑ:Gë­s´ÎÑ:Gë­s´ÎÑ:Gë­s´ÎÑ:Gë}38z2èú`þó …¾\¼8óÍÐOøyè§Ð³t¶øm8s¡1MèœãSš½àÌœù‚3_pæ Î|Á™/8óg¾àÌœù‚3_pæ Î|Á™/8óEg¾èÌù¢3_tæ‹Î|Ñ™/:óEg¾èÌù¢3_tæ‹Î|Ñ™/9ó%g¾äÌ—œù’3_ræKÎ|É™/9ó%g¾äÌ—œù’3_ræKÎüíÒOJŸúSúNé?Ý+ý— û¥¯–¾ô¤ô™ ¿¢ï•>?óí™qéOg¾Sú§ðø;¥—þ<è›ô¤ôèLñ NÎ|7œíƒî• º_z2èÔã÷JŸù^xnñý o„c¾Îð@Ð7éAèä{á˜ÏÌ|ßoÎð± ûáç?g(¿ýá ôÇ¥? ú&=)}2诂¾ZúÝü·A?@þ>ýú‡ôCôþüKAÏÐâÿ5{5°-~þHþàŸ ¯…WÃç¦B?¾>7ú(}Ìñûí4¦ £óŽüy~ô´xw÷jé—á÷jé-î ûễEÝÕg¦ÐÇèãô ú$ý"ýKú%úeúWô+ô«ô¯é× !'…ž¥³~ј&t®ðrRhF¿Qx ŸË =Oè+t‘.ÑeºBWé]§¯ÑíÒíÓÒQÐú^§ÑËaRz†[î²-w9Lí• å¯}$ÿDÐâ³ÆeŸ5.û¬qÙ»ÙË>k\öYãr Îhz–;í²vÙN»l§]¶Ó.Ûi—í´ËvÚe;ír`ꆅÆ4¡sE'a¡YÐ ýoèCÿúßÐÿ†þ7ô¿¡ÿ ýoèCÿúßÐÿ†þ7ô¿¡ÿ ýoèCÿúßÐÿ†þ7ô¿¡ÿ ýoèCÿúßÐÿ†þ7ô¿¡ÿMýoêSÿ›úßÔÿ¦þ7õ¿©ÿMýoêSÿ›úßÔÿ¦þ7õ¿©ÿMýoêSÿ›úßÔÿ¦þ7õ¿©ÿMýoêSÿ›úßÔÿ¦þ7õ¿©ÿ-ýoéKÿ[úßÒÿ–þ·ô¿¥ÿ-ýoéKÿ[úßÒÿ–þ·ô¿¥ÿ-ýoéKÿ[úßÒÿ–þ·ô¿¥ÿ-ýoéKÿ[úßÒÿ–þ·ô¿¥ÿ×J÷…_ .þ}Ð3´ø®–×JÐó¹ åß úpþ³ ä÷ýXþ« «ùo‚þ“#ߟò^ ÿ$lž×Â;âñO=~'ÝsÌ~þË ÓüvЃü(èI ðZéWô½â'3O]¡Ÿ@£Ð³ôëE­@£Ð2­Ð*­Ñ:mÐ&-ú4 iBç /3)Íè|ÑóL‹¶ƒvðéàÓÁ§ƒOŸ>|:øtðéàÓÁ§ƒOŸ>|:øtðéàÓÁ§ƒOŸ>|:øtðéàÓÁ§ƒOŸ>|:øtðéàÓÁ§ƒOŸ>|:øtðéàÓÁ§ƒOŸ>|:øtñéâÓŧ‹OŸ.>]|ºøtñéâÓŧ‹OŸ.>]|ºøtñéâÓŧ‹OŸ.>]|ºøtñéâÓŧ‹OŸ.>]|ºøtñéâÓŧ‹OŸ.>]|ºøtñéâÓŧ‹OŸ.>]|ºøôðéáÓç‡OŸ>=|zøôðéáÓç‡OŸ>=|zøôðéáÓç‡OŸ>=|zøôðéáÓç‡OŸ>=|zøôðéáÓç‡OŸ>=|zøôðéáÓç‡OŸ>=|zøôñéãÓǧOŸ>>}|úøôñéãÓǧOŸ>>}|úøôñéãÓǧOŸ>>}|úøôñéãÓǧOŸ>>}|úøôñéãÓǧOŸ>>}|úøôñéãÓǧOŸ>>}|úø ðà3Àg€ÏŸ>|ø ðà3Àg€ÏŸ>|ø ðà3Àg€ÏŸ>|ø ðà3Àg€ÏŸ>|ø ðà3Àg€ÏŸ>|ø ðà3Àg€ÏŸ>|ø ñâ3ÄgˆÏŸ!>C|†ø ñâ3ÄgˆÏŸ!>C|†ø ñâ3ÄgˆÏŸ!>C|†ø ñâ3ÄgˆÏŸ!>C|†ø ñâ3ÄgˆÏŸ!>C|†ø ñâ3ÄgˆÏŸ!>C|†øŒðá3Âg„ÏŸ>#|FøŒðá3Âg„ÏŸ>#|FøŒð>ÿ'èžcöÃç‘Qàsô€|FøŒðá3Âg„ÏŸ>#|FøŒðá3Âg„ÏŸ>#|FøŒðá3Âg„ÏŸ>#|Fø¼^úÐÕë¥÷åßú~z_Þ zþÕ gèƒù¿}8ÿQÐGòNЧ¯ß#ôSôÓ¡“×Ãç¸B¥Å}Â_/¾G8è4¦ £óŽÜÎß z%+èUúV> ú3úvxÇøzødW_Ücðõ™}Îüý0ý½Ÿž¡¥Ч Gvðhz–Î^ŸB3:_ø | mÓo¨xŽž§ ôºH—è2]¡«t®Ó×h‡viöé€é(èØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹ŒÍel.cs›ËØ\Ææ26—±¹LÌeb.s™˜ËÄ\&æ21—‰¹LÌeb.s™˜ËÄ\&æ21—‰¹LÌeb.s™˜ËÄ\&æ21—‰¹LÌeb.s™˜ËÄ\&æ2)íÑýüÝ Ó°Ó&&51©‰IMLjbR“š˜ÔĤ&&51©‰IMLjbR“š˜ÔĤ&&51©‰IMLjbR“š˜ÔĤ&&51©‰IMLjbR“š˜ÔĤ&&51©‰IMLjbR“z#LäƒAÏÐâþÃo” æƒ>”‚>’ÿÇ ï&o>ÿtZœ†g§…ž¥_÷ó§i™Vh•Öh6h“ÎUÂf.4¦ +ên…fA·u»­ÛmÝnëv[·ÛºÝÖívèözнü½ ûð¶ÏÑÛá_–p¼Î·u¾­ómoë|[çÛ:ßÖù¶Î·u¾­ómoë|[çÛ:ßÖù¶Î·u¾­óïè|Gç;:ßÑùŽÎwt¾:?Zt¾:ÿuТóïè|Gç;:ßÑùŽÎwt¾£óïè|Gç;:ßÑùŽÎwt¾£óïè|Gç;:ßÕù®Îwu¾«ó]ïê|W绘ïê|ó]ïê|Wç»:ßÕù®Îwu¾«ó]ïê|Wç»:ßÕù®Îwu¾«ó]ïê|Wç»:ßÕùÿ}õŠÿ¾z%t{ô‘ð/þÿ5òŠÿyżž[ü<¥YЫž{Õs¯zîUϽê¹W=÷ªç^õÜ«ž{Õs¯ùfçk¾Ùùšov¾æ›¯ùfçk¾Ùùšov¾æ›¯ùfçk¾Ùùšov¾æ›¯ùfçk¾Ùùšov¾æ›¯ùfçk¾ÙùºZ×Õº®Öuµ®«u]­ëj]WëºZ×Õº®Öuµ®«u]­ëj]WëºZ×Õº¡Ö µn¨uC­jÝPë†Z7Ôº¡Ö µn¨uC­jÝPë†Z7Ôº¡Ö µnªuS­›jÝTë¦Z7Õº©ÖMµnªuS­›jÝTë¦Z7Õº©ÖMµnªuS­[jÝRë–Z·Ôº¥Ö-µn©uK­[jÝRë–Z·Ôº¥Ö-µn©uK­[jÝRkO­=µöÔÚSkO­=µöÔÚSkO­=µöÔÚSkO­=µöÔÚSkO­=µn«u[­ÛjÝVë¶Z·Õº­Ömµn«u[­ÛjÝVë¶Z·Õº­Ömµn«u[­}µöÕÚWk_­}µöýÛ±_*þfi_­}µöÕÚWk_­}µöÕÚWk_­}µöÕºÞ]ü6èûò;AßOïË׃ޟ%èú`þÏAg¸ãzÊðîâ4èvبw»‚ÿô*}+¼Ã¿Þúv qÇÝûï¸{ÿwï¿Þ „*á@¡¦¡÷Ó3ô£ôúTÑOð:  =K‹«w\ݸãêÆðN tÞ Ú:ånÊÝ”»)wSî¦ÜM¹›r7ånÊÝ”»)wSî¦ÜM¹›r7ånÊÝ”»)wSî¦ÜM¹›r7ånÊÝ”»)wSî¦ÜM¹›r7ånÊÝ”»)wSî¸;àî¸;àî¸;àî¸;àî¸;àî¸;àî¸;àî¸;àî¸;àî¸;äî»Cî¹;äî»Cî¹;äî»Cî¹;äî»Cî¹;äî»Cî¹;äî»Cî¹;äî»Cî¹;äî»Cî¹;äî»Cî¹;äînp÷« ï ïœïw…ޗϽ?¼*ïw…>:¿܃îîwwƒ>ÞÜ ôJþNЫô­ðÎünðXèÛ~»ç ûž5 ¬î¡VðXè‡éGèýô ý(}€>Ut<†~‚ÇBÏÒÂã]ïòx7x U‚ÇBÛAx<âñˆÇ#x<âñˆÇ#x<âñˆÇ#x<âñˆÇ#x<âñˆÇ#x<âñˆÇ#x<âñˆÇ#x<âñˆÇ#x<âñˆÇ#x<âñ˜Çcy<æñ˜Çcy<æñ˜Çcy<æñ˜Çcy<æñ˜Çcy<æñ˜Çcy<æñ˜Çcy<æñ˜Çcy<æñ˜Çcy<æñ˜Çcß ^>ô -þïŒwÿGÅÏÉÏ->żºú—™wÃùÃ1áü…ž¥³Å‘á_¢BcšÐ¹â¹¡n¡YÐ{ªÜSåž*÷T¹§Ê½På7A‹*÷T¹§Ê=Uî©rO•{ªÜSåž*÷T¹§Ê‰*'ªœ¨r¢Ê‰*'ªœ¨r¢Ê‰*'ªœ¨r¢Ê‰*'ªœ¨r¢Ê‰*§®QžºFyêå©k”§®QžºFyêå©k”§®QžºFyê}þ©k”§®QžºFyêå©k”§®QžºFyêå©k”§®QžºFyêå©k”§®QžºFyêå©k”§®QžºFyêåiqòÿ|[xœÂ}H}`S3g¥æœšÙu~tg·2S³´ûøÝyÞ"òâCFD„Œˆñ2$$bĈ1"$"BFˆŒˆˆˆ!#$F ‘2^BbÄ;ÆóˆD¢è_)Ñe¬†«I×TÄxTœïˆ$ ‰M2(‰JÒ’]IYò(5IíÒWÒEéi­¢6^›­½–IeC²´ì¶.Z7U·X—«{’#òiù–¼ ¿Æ<û¢èWL)²õÊz¡~¤~¥þ±lXo8oìlÜl¬(ýÊ å¥J®T#ªœêPu¦†ÕaõªºÜ$44]4›Êš¨fCSzy¾¥…µCÚ5måEìEQ'ÖẠݲ¤·é9ýœ~W_ÐߌÖ4ä›eÍÓÍÙæ;#dŒ[Ô-‰–µ–BËÉhš0Ý´jZ'Z‹Ð T€ªm m¼-ßV†µ°NÂ9øþWÍj³Ó1'ÌKæmó¥Ej–9KÚ²jÙ·\[åÖ~kÌš´f¬'Ö_6Øæ· ¶„íÈö»=Ò>ß~‰ˆ%bDüH™Df$òÙE[T†jPõ  :ŠN¢oÑtÍ Çè]i'ícö7ö¯PÇTÇzG ‹c)l»Âî±Ç—ñ—?:_w»L]£];µ#é¸êƺã݇ÝONØ9îPëÔõ…:¦ Ô UÀ€0`DÁkðü Ò`l€,Ø'àA<ÐbZIé½L¢·é<}DŸ3vÆÃ&ÂŒ19æ9c®™;¦ÂüÈÚ@6°8 \Šrà…XŒõ² ;ȦÙvƒÍ²{ì {ÁÙ2ûdƒCÁ‚ãÁéà|p1¸Ì?s&ÎÎy8À­rî3·Ï+xo἟çøa>Æø¾Äÿ'ˆ„Á Œ q!!Ì Âa]Ø iBP M†fCïBïCk¡£°6<ŽþK‚;HÓ‚t$4w’xœíWOoÜÆŸýsb ‰ ¤=´$DKíÊRX‡BqàTèÂv#ANÍ,9K–ä03ÃeV襗ý =õ³äÚ[í­·=ýí{o†\®dUQÛCQT -gæýû½ß{ä2Æ~4(Ù€ù¿oáß˶ w^~ƒm²ßù±ßù.ûÞàNßdǃAÞ€õŸy“_y‹½7øu·Ù£Áoƒ¼s×þä{콟ù-öƒ8Èo³o7~äû쇛?Hw·!Ðo6¿ò€½³õ§ ¿Áîoý-ÈwØ—[ò]¶»ý‹ ¿É²í_yÖÿäMöåν o±ÑÎ<ÈÛ¬ÜùMw¶þ|o;È÷ØèÝ¿ù-vôàÇA~{ð̓Ÿù>;þþžèjiTš9¾ïó£Ñx<„¯>]r·|¡sQ&|RÏ…½à_(™\¨˜ïeÎU›¦‰Ü²¢CQ¬‹Ã}Þ(—ñ—ÒJ³ ªKÇŸ‰Bò݉pz7âËÒÂV]&Òp—Iþéù„?¯déO‡ü3i¬Ò%GãΡªœ¬Ê#mÒÃçO'û`Lÿ*_Ê´Î…9Š Sˆ~rÚm ÃÞ)&wúêåñð8EGÑÉÉ£qwˆ‡C­koA9Ž>Ž>8å½ÌsP›AÈ–R¿â ¸²\pgD" aæ\Ï®5ºnã2ø„ðY" þª&ÑqÆQë#í”æÏÔ\ç.ÎäbåÞ ÀÓ(t]É™ˆ%Ÿ‰BåKžH«Òj™S!ꢀš@º#2)¿vªôšý‡HÏ…±¢N–3Ø2W¹â+Ï að]ocMBºÊfP¿s)i9D #guÑM ÁZ ô©4„â”ÀX—³\ÅuÊ+£´Q”så¸Ít'ÜJYð¯jå€w~i+°Zº]Þd@³Úú”¦:ŽC’|Z;Þf¢l•‹¥w –SUŠ ¨œ]׆ §®UÒFüÀçÂZÃùÊèJ¨Gi8øv*FF@ì eÕ4—-¢uUI ¤{ª’Öréœ43m ë ¥„á™0….—¾,¹L™ˆŸ9Ò°Ø`N`ô¹”óUÕs­ç¼sÀW.T8á¨}x#ì>  ¹x¯ë4CŒ –JíøLç¹n $(Im †ïÄ£ae¡†F×D D:¡rKüïR´>gr/øLÊ+ G€Ö…Ëo•{L¬3öàRÇ®6Æ\ Y'¦‘óÐ`ƺ¶¥´P“󒨊ãìø äL¼Ç\S#…¹ãøõžÊd^a7÷Ø>1V/€G£Ñû<Í—UiâJ©Jɉ£Õú¦UNo-¾ä,”hbùuýððÝÌOk‘J@ÿ¶žÀ„ÎAõò.‡D™çxýÄ`Õ1Êóg¼Êt)9ÃbÆ4«8t ¨†@Î@ s3¤óMCŽëâúxÝSáZ…ÿgÏõ›ì Ó¬bKf˜b)˘cœí±˜íÃõ^YÆðéÖ¦p–é%{š9¬d ¬LXÍæpgÙÜ} ߊIØÁkL6Ѷ_Ù!|úDd©êYŠà´fœÀÐv ÉÙK°fáß°Ùåì)œ+)Þg [À*g»‡€5 RDQ¡w çlÐªÉ Úádµ>eçp’³ç žíÛ^·p+Ÿ‘¶…uMgÇàiüš -è!ª¬Y8ƒ9\5¬¦°ÿ¼L K§Ú_ØÅ|Sˆq1€*p°Ÿ€tUkxIï´«Ü){{Çpâ˜lEdó>`ÿª%~ÉÒå¬ûñÎË®Ò÷´þúšçÁÛ, l{U¿9§ö‡] ß‚*iàŠu-èÌÖÐþí™ÝZã&æ¯8|«ÈTµh(ÎüÄ´×úúV¡Œ¼V`#qtJ÷_—½%mߪ˹<ƒ˜8>#ÏÈ@ìß„º ;¾ }ák¾êˆN¡O|uG½(%ûÎÊÐKßMÿa7=>§Ü1êª;%켕õ¸Ìwš+ýžçäÓ£Ó Çn¥¯.â—…þ;§^w„Ÿ$]qL‘"ŽÈ>Æë¹á‘µÄ\”*ª$N+:å5bâùŒ|ÆÕ)7TiC ñžS”Žìf°‡~º“Änξ‚5E~0CÏ|Ì· ±¢÷]â]¦YÝáëóŸ‚Ý$Ìq_I\«éÚô|&„QEÙ.×2Õáyö0϶uó?óí{8 ÕÅZ_PÞu…gOL§,íÆÁ>b¥ aús> ÖânFxÜ¿‚Ür²»ÎÑ4+Š# Dk˜Ó‚öÛs¨ë)CËv­£ùÄi'hc[®uKN¬åLD³Àõ|Øî †y!–MKú’¦ÚÕ^ÏÁŸ¦=œ}óÀ_IÙ'+O~"´+"<ãÚ™Ò7χv"ø HB¬ÝGþ¤4Ó¾‡š_žû'AëIx‡Îƒ×›ty¨HCØæÝý'T!ßë-–çðÄ:#þdÄ/¦ÝúÔ\ñxõ^ÅÃ3¯‹Ð¢›‡³à«­k&Xæã¿ó¦ì­kbÉíßoû[áöþÿ»ç¿áwÏ¿ ù¬¯CqxœmTtÛT½7udweïMY%’åv†³IIBê­8j;ÈvÓ–M²¡¥eϲ7в÷Þ”½÷Þ›ÃÔ“Dâ|Žï×ÿÿÝ÷Þ¿÷K(ƒûûk&ã~\áüË0[qÎÄÖØ)l‹Ï0žl‡í±˱#ì„ Øï3ˆJ¨\ æâ„ñtDÅïˆq$â¬@vÁ®Ø »£Ž!\Šê‘CGq4>G#Ç ‰c¹:×àš‡û¸׿:\—ëq}nÀ ¹7æ&Ü”›qsnÁ-¹·æ6Ü–ãq?·ãöÜÍØ›;r'´p^Á>Ü­¬ÄT1ûRC;.Ã"tàkLB'öÃ8‰atQg„QÆgwá®Ü ûswG—n€q.ÂŨå8{âUî…÷XͼÆZœÎ:&X7Ø€·ØˆwØ„7ÙŒ·ñ.÷f ÷a+ÛpØc± 6Ƹ·a–âV<„‡qîŵ¸k¢ŸbO¬ŽÅX‚ÌáD„0 {`/ŒC0Ç✀Q (ÇHl‰Sp2NÅlÜÍ}Äq8gCÁ8¬…µ±ÖÅzرœÄNÜ…1øßb96ø›bsœ…ùX€+¸»0çâB\‰óqnÇUø…ûs2Ú°>6Äj¸ßã|‡Ÿð#~ÆB<ÂnÀyf¡Á)L2E“½L³7âq<Š›0…§"‰?9 3Ã~fñ$L<Æžà¥Í<~eENdzx OsÏpgrãá<‚Gò(ÍcÐËc±ÏáyÜŒ4gó8Ï8‡'ò$žŒy Oåi</ãE¼Ä3ÐÇ39—óxçsÏæ9ø‚çâNž‡y>¦ÂÂ4ô#à x!²ø‹¼‡b6/Až—ò2.ä异Wò*üÆ«y ¯ELçuäõ¼7ò&ÞÌ[¸ˆ‹1ƒK0 3qŽÀá\Ê[y—q9oç¼ñ.ÞÍ{x/ïãý|€ò!>ÌGø(ãã|‚Oò)>ÍgpŽÄÑ8†Ïò9>ϸ‚/ò%¾Œ/ù îà«ø„¯ñu\‚søF°˜µ*_°:—ÎeÍiÁ„7§v°É›(-IÃÎeG$²éÀ$Yïôƒ ?åî;ƒ3 AÓOÑS‰ Tèíéu¢ª€&  D¢1‰V9«PËV%a¡Ä„JL(1¡Ä„J\òÇ%\qÝÍ‘G‰ŽÇC}IËNû{3æŒ@ŸÄò"*háØ¨iiÛ4³#›²’JÆ;µ™M—grÙt^)¸ ‚à ô¨ªSXÕ*TM ,  D‚ÅuPÃQ§[U—x]Bu Õ#1¥ßHJfçY’F$_Dö#²qºW£BŒªn§á*Ý««¼±¦vÌ€i[¹TÒÌLÛLE;èÑ4'®P´‘f¾`õ3%LMUÃŒeÊ€™7 †»¦©‘@&•+Ȥ>^_ïðDrM$×DrM$×DrM$×DrM$×âz Åa*Å‚•I™þݪ V{ÇSª§Øæt3T;ìBE­n$¥m¥Î»Duî%Rî,˜ð© —Z‘ 5”diZvk:^„‡·•&¯&?W“—«iˆTÖÔjŽ÷r„£J‹‘,LZ£´ºS¥ÕëÌ]Õ#Á6?k››ut[_1›6ìbÆ(”vÒ^B‰D•w5Ô±JIM+“†#5µÚcD5¥Ó;A§_«Ó­UÞi[ÙôèΕ*v¥mcº©ty%º†KŒìJYÎÕÈ[ùÐämºÝÀŠîa ¿ˆá¹•,Ñ99ì–é5júÁ¦§©9è Çk®ªPCé’4éUí —YSË;£åg´¼òSÿãKLÉ”úR«d=‘³¥¾Dƒ9?OÎó%·’J¶G±K}‰)yO´ü|©òÇßç®ëòjéòjéòjéaM@6º@D *ˆ C>º. ù èòÐuçuö»-zÎ]g‹+õ<è9;è59Xâìà³3Kœå9;kHìò:3S0BYc —/ع>óRµxœcðÞÁp"(b##c_äÆ  ÉØœ6I02h›¹99 ,Q60‹Ýi3#'Íá´‹ÁÂfNqÙ¨ÂØ±Á¡#ÌSñvq400²8t$‡€#`3/#ÖÆÿ­Xz721¸ldMqš%“rocksdb-6.11.4/docs/static/fonts/LatoLatin-Regular.woff2000066400000000000000000001253601370372246700230570ustar00rootroot00000000000000wOF2ªðD<ª‹××…\`: — ‚©4ù~6$ŽL„€ ‡( ®y“F [³'Ò*Dø]}W6rRÔÓ 2L”ÒØ®ú|ÁÃÇ; ÚTÚÜ[ƒE³ïV!×ý´®Ps¬Öi¼Áxgªìÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿo<©H³²•Xη¼{ ð ôD•g’g,T’”IYx&¬¼f%l¤.м)˜)zÛ,ï{•„5Brp8JŽÆîŠdGqʼnÐÓT¼ªg’S÷#UQ9(‡ù"CÎ%rG­+ª\\ˆWàê°V|&(×{B•;rœ¤Âd)T¥Ýnc7Y•M›ûLf'*kТñb‘!g†ŒU1…Ô”¹ä[ᢄ‹¬E^C†4ó ]§ªmj»pGÝ éÓà(UÊq’jç®±Êd·Ú°^%Ü´ÈÅQîºÄ»–TEð(<Ú[ ­`Ÿ¬Ý¦vi¯`GHÆÛ_¡hŸìÝ.;€ å€Z—ԈÃ<æ’ÁN«ÆÕìˆÙµ[Ù!G©MÌ1‰w4 Y? émp_ 3ÎV’w’ç’‰KbÒ§—Ç)©%p9îÜ÷õ‘OPíÜ‚ÜgÈCªZN€=hP"²„ :Yž"<ÙëÓª¡±l}—Ác›,´aŸEv³±Æóƒ‘ýVïÐŒ°—6Öœ”ÍzC@»ÞN#QuÎ:÷êíLB}ASSsje$GñÅ8Çõ¶Ø)v§W5CnaXâdôKø2:6]}ìõr][½¾~Uß`»…Ë d7k)uºí°Þ«NÞÔ·•ï‡;2âºu¼>†Ë½æ{û21~Çþ)¹Kp²hï©>âÎݾs„]j4µyß¾$ã7À:psgamjGpn_êGiö ~j÷™ýƒGžÓö¹B°N¿™­ZÔCýríï³þJV¹Ýëp¡pÎí¿¶ÿƾ¼5¿j_¿‡dÔXêåáƒày@¼7€n¯T4þàþ W á@íáUÄnßËšª$›¨–#H‡ Y{]ÞìwåU©;Íðû?’¡Oò™ðSœ&>ɈqÇ1>Ö'¯7øjŒbDØ@²Œ`Âãð"æ:ÉúÐórÑüY«/C¾¼¯êÖPå¸ý¯ìŸ·ÿµýoŒnTÁ·ê;Ý·˜qfÁÇÙbÿ…Ù1Þy=°Šµ!δúûÊòøâ\³}Õ¾:)?ØÿÑþ÷ö²ÃøÏö¯Ž­þbõ]¬Áä5ë7ì_ûÅܦð_Íÿö;­ÿ®þ í?íÿeÿoûÿ˜¼ü¯áð¿È["í‚ ý?¥Î,]–C;÷Ú—ËåÔÚŸá…ügíb´Ÿƒ6aè&ËT›ÀAèü)”Ñ!Êa:~’˜ÔÚjµÝä ©Kï#üä¡Ú¯Ç?Ó3=³»/À*¬UT$±æò‰ŒŒ ;"a£H V¿þíQ]é@‰òHS 7 í¸Q8 ØûŽSHõVµw%ÌëuØxê°þ4·wÛ`УoDרPGEŽŠñ‘hG%ZÁm•±!p[WæÚÈR¦€ EPq±EdÊt#Cq£8QsÌ\™–¥•åh[_šµ¿ÔÆ×÷»ïgý¨ÿ]ß¿õ¿{þŸºg÷¼ÝQhˆ‰Å?ü¯=û}æ ª«?HÚ(jZÅJ¤DÉ%¸7IùràH§J ß…x*ÐHÃÒd*Á¿^V€ÈÊ-öÚ_I¦*€$ÃÑZeWó.Ñì1Ã3É H6.BE©<'².ò %ÿ~° Þ7#øø“Ý«ƒ8,ß™øæìï¯ç_j•¹òAE 8©Ó3*@ÍÖï<)¤¡´¥J( bÉ žÀ.‘ »Ù¬XBBÄ6¶IЄ{ئƒhöT§âÿhMc%§¨ªT]xùˆRãªÈŽþ« ‡Æ-dÖs5²+«nôŠ*hÆñ#äWx9E¯ª@ÿL ÂÜ4ð4à]ñ@ZB-…K¡íÄŒ™SÉæ¬>8ó— 0ANé8Ïå’Tò=+€€ø/€ÿ_›~IiÕSjðJvbãßÒß?wOSZ’)ý˜:ûO‡©HbÈÇHü­Èrn°%ùp9VÈ­p? ?Ú‚ð…Mtøî*‰,¹ Iž ZkÒëÚ‹ŸtÃñÖ_4£>OÁ½¾ð³ÝoTƒÃ.ÿÉŽñ~o ð¿jcÖ­5òs½oY颃Þš˜Öù(1ÚM÷$tyo©€AöçPl¿>ÞÔÛ8’3vy¾Ï·S0p0þšóUvc%öæR’¶’€Þxp¥!}¨ñý|êªÊ 08 NhB»Oo&H#'Îöýb&­¥75î’ í“+«–R;Ç$òP3ÞÂxçßXYZ[¦tªt×é5I [;Æ^–­€kg# ,#ɲ9I:µ%…²±#"†g~kïæ>òrb3B—¦äHöW»"§´Ó·HáæÞ½`5¡Ømx—ËwT‹ž8Åùù=<–dYu µ”öƒóJ9ZE Uq˜hr‚iA{1H"Ò.òƒ§a}¬6iý^úÇ—´›™_ ¬xÐæíNYPÁ ! N8q`Òz¦pD’êó}§ÿZv¦dgü÷ӱVÞ%1•xOἈü‘°¯±ðÿÌÖkE™8Í%/bLŸkQ(ìîÿE¢¦«¯'¨®š¢â¹™ª¯/ίäjË#Nº^ÉvÏÝ&!5E°hUN¸¸Óh©1²—Ú|é?Íž“Ãä²z˜ÐHûSÒ³šœ’¡#Â-,âþ‡¬¦»Z3¿–I?:¢¤^`¢«º¼B¦ØÚêæ¬­x®²ffp…Y …Z¬E\ V)*ïî_ЬöÝ‚€¡EôÎã²x+©K6Š7SÚåq A¶°¸DLÌM*'/36Ø]¤ŽÂ*¾ ,,Ê"-Þ:QqÑQin¦æ&gw¾Á Ïú­PÑÒ0Å‚ T ·~þU5W€$T\Þ“¯ú.M|¾'¥•ÝÖ]Ÿ’akuÊ„ÿ?êã"AJ–A¸‘òJ.)PW@ÔQ¤t.ò幦øR_×]O#(7ÚNBH×d§Ù—^·Òû0&c«Ó–e+}Ë:fÌ:$<ÿý²2ÚG¨L ~À$iuªdž*RµZ–éG­+ù”¯Yãq€‹m®¯õµ¾cQ‡Ð¢!t1[TátµbÿÕ²Ùß”C¹ÛCêÒì9b’!; ©)ôáÈBSü¿œž‡0‡ ](ä ‰uŸUìcº¥°Œ–÷*Í5;²edîœw…‡øCÿaMÜ—ŽW²ã~ü³d¨Â¯Ë_äßç¿èÿÀÿO¼hù²À'—)òò'‰£„YA­Ö3A AKP8tú1ôÕØ&Œ cÁöÁ‘ðB7ÞÍ)&¸R®Ã¿à à„bâ…cå©ÂE‚%OâRÉaÓd:Æ2N]ÈÖA#ôá•&ÖÊ¿¶Â¨)€Á`Á8äFÚ®ÏïëKñŸc7&±\†ü?ÉkÑëÂîV<µãšd@C-1÷ó¨\Œ‘ÓNXº™,ÜV49¹x¬ßßdÅà±hœ(A¬=ŸìHÛ-µ‚b8ARôÆ«Ï?QGC3-]-D)™|Q3š=|™'B ƒ N²¢j¦fæ–VÖ6¶ˆ~úò €̰\ªT­Ñêôc²½3³ßZ.I3Hý#¾e=Pœ†¯Œv Ðx…qã¨Þîúý »ãñ ²ì6—|bM§àžZÚ¯Õmß²ÏkÜ.wúbKô’¯qñC§}¤ÜSRM7¬¬“Ù0hSZ!`mŠ·_î-+ß±é]‹ß³ù}Ó?°àcßt¥òñSžq,ß½Åý²)øeÈ7TµIdÄÀ(xŒi:ar•ï¬ýtò³éûÇþT/žùÅÿIèI]%Ó( hÙ9@àxUŠE¡ÃnO Žf”ì¥hÔ @FÖÝÚ-çU’¢G+öîžxDIVTm4º:­©Vß~Ýå*Tµçë37¡GÍ‹8ñI¸M¢¤yòl©OJ =ý>Œ®Íñ'È(Npy|0Ñ£Jÿž2…Ê]“9¡ÒeU²6_ےʵënHܺ»ù=‘g^õC!v%6Zü$¢.iø'‰V=öÕ,Ç ¢$§Èd±9\_€b8ARôãWϼq +?÷|ómƒYEq„³J|o@«"­nÓóëzcYÕ¯'¿Ë‡¶ÿÌü‹Okx;Èàæsºm ‹ûuIJ}n²‡¼4}ÞïöJ±ãòø¡H,‘Êä ¥J­I{t­Ñêôc¦×6»#çËwÇÓyVðžxùGoûšÉ¦Üaþ>\rz§ L“?ê|æ¿V\­Ï?E̳Rôür+ŽúˆÛS.MWµbÒÄfˆKÛ­uFÂ6íFrî–ç(þÀ•®ë‹:ºhðbèH(sŒ€FŒñŽ›œÐ»4}æµã;Ÿ°%áJÐ Ó‡ƒû I-y=zŠOD=ÑôÌ—õ³XÒn£Dhb¨ák1ëãæ³ñ‚–fëÝÔá” º~úˆ 6›öf!ý ­CüGRÆ1†q¾üI41ƒûÙôRɰõúÇ*úˆÊSùÁšòý\W×4z¸ö6÷’õò2ä½kö{¾}ßšè:tÈΣ÷»ðÄÕ½²ªË­;úßjßÌ0Ê=¦wÜÞŠóWnšœõé%ÓÀ™«êe—ç̼¤ŽýÃöQÿŸ¼¿4Ô74ëjÌ\Ü2A[Wq¿Šÿ…Ê0-uX}8ýø(ÌÀRšŠ8nv*T†hiÀÈ2«G‡‡r‡Á <Š,ÊMÈiuêc¸¸. ,¬İaiq`¼!]– ëž`Ž‚¿Í±¦Õ§É匠Xx-HŠîF[íN{Q’UÓ Ó²×K½CÌ4™¸ë’RÒɘ–EÉ¡/÷EnKœ–¹¬ð(¸¡(®$¦lVUVÍCu׸ت‹Z>Ã|€}÷6þ¶öœ­Ð˜îÌM5ijE[µ£CgMÍMózt=M-âEÃO»¼S<]B僳>ùœ/ž¾úötv„«Ô3€*…:oäMë–íNŠžB0‚b8AR4Ãr¼ J²¢jºaZ¶ãz죻õ×,µÌeňdž”Åf^ÛBcP‡3KÁÐFÉMªU›v:M™æå‹‘_}âï58ƒší Or2–^&ML@lª¸C$+å ±a†ùb† ñŒÃ&ô,ÏQ}l€S Z(²Û«áDñ(qÒ{øn9A“5,¨˜ÑDŒtfÃÝY…sœASf#ÏÁÓ/ïNæ|Ê!†aOÖl>*“ŽšP]hR¨C3—Í–¢˜ ZR›eZ‰ž¥xm”ÁW]±­bÕ ËÄ|²(Kò‚+ŽM L3,o;Òˆen [¯v“û®EŒƒB 3ë’ûÎÝDyX]‘šù‡‘‹k1³¼6×RåySEªD†õ8u,¶xˆ»’0¼4 ˜6“[0(¯©²°‹]åtˆ‰‰‘ìì—a Ñ +¯Ù\ÔŘTÜà„çë`Ÿ4ê•3–×°uý'ÉAÒ}ÐI’$éî²a{´À½ª‚$IyÜ{§×ìpäòŽ#šUXóÌ¥Žl])QgY®q£Ž*”dj9Þn… v•ó¨ù§xW†¯KøÀ‚×C}7{; »j³"ßñßNz%7˜pR(Û`²:öªªúõeÐA“cccc cc C`#”C„‘ÝMg+ J²¢jºaZ¶ãz©W‰É#‰»+!)%Œ¼,J]·B¿Íàîºç¾Ê{dîð"Ì’aËP+ü …Ç]SrYÙ#U3jŠê®ÑWeµN;u$ó!öSÜGøíWt¬~Èwæîš¬µmÕŽÝ=~*gš×£»!Ðúyçî _#/¿b¹¸§.l[_Åáòø¡H,‘Êä ¥J­ÉʵÙ95\îó×Ç_ÎD±@2 ˆÏ"+p&b ¯%i c…C ¬ÀÇÕ*"3È?fŠÁ²].'I<ÖîHìô„W¡õ¿ÿC©²9êFL uæœ{æ ƒ„`Åp‚¤h†åxA”dEÕtôlÇõÞNkýXöd޽Ÿ Npy|ðpÂ!’JÙ<Ý,¿1>"E‹!qàÐQŽÇœ8u–ó¤ —®rìÆ­;÷|úòí'Ý¡zúùmëïä¿©Ћ³J¬é9XY‡wì•pÕE/n98•Ÿ† #ɪ‹"FdâP I)éd`²(9ôÅV ¢Š¼Jú•mST³O[åÑrC`“8Õ>¨ƒV¸ÜXÎÖ•7ºóÐP½NhCà -v%GªÕã}úzòí¢”‚ç¼®Ìre»„ÖIQ¿¬kŽñ(óçAèûõíÇas؉/îQR"6(ܼð-ƒÓÐÁïnḬ̀3æ(Uá95în;ßñöŽÔ4®µF0:ˆ0•í႘›°;8,[Ë~‹cÈòŠ2…9É;`€ÁJÊ*©:£–:\ãâMGµH ²%Ú aRJ‡f:’æØ¼ôÄkÎ!œÏŠªé†ie“eqÙáäûóOÉl®¼u²}!"î¡Z1N\„ð¿è^+\¼È Jó:òRÔ¯þS¥%½½ŠÒ㹫ÇçÛ|Ý »=†ÁÈw×>{‚a0 ƒÁÇqǹ³ÃÌÌÌÌn·Ûír¹œsw¼Mîš¿aÍŽTÞþT1óîÌÛÓáõÕlmLöˆü|!IV‘vXº»†µŽ¯pMÁÿŠ7 GÏ¡HâL†ˆó(%‘k×|½ÛNJDf­\.! íÝþe÷r/„Ö•W#ŸŸL A•BÂ-®Û™H¢Â @FP 'HŠfXŽDIVTM7LËv\¯el}­a½ ‰¥ŒJ+=íæJ–v»’£-Fò(†\_ L_<Vpš)1°Y²»8d¸¨ #m2\º(Ëñ‚(É)#™,6‡Ëã÷™ÿÖ( †$Eg‡d)ÉØ#(†dYͰ/ˆ’NoÈhøØ$yí¢jã'ƒ¯\‘Öð¤ƒÜ>¯b$!AÉsE_ÅkÞu¥“›ïpy|P$–Her…R¥Ö¤…wzƒÑd¶ä±BΙٟ?Ç?"8hXÔSò eç—Wyü­_À€Ô`Úy•¯¾ !A1œ )ša9^%YQ5Ý0-Ûq=˜pH'Šá—Ç'R\›3]È­_¾o( €ŒÄèÏd±9\_€b8ARtZ¯&&á¤À®ž¾¡‘±I¤g…²x~8ù9pê(Ç;Ÿ8uæ\LB‘X"•ÉJ•Z“5t³;rn»Nîûï³I|Ñæª/jšñJ@çI×ér-âb“À‡†U3©Ö´õ˜uYþraí¸nw´-ì ½«Ì‘ƒÆQ ,í>ÆÌ1ì, ¼ÄI$<°>‹£E;ºÒ‹~é0l a\ “˜–Ë‚à ’¢–ãQ’UÓ Ó²×kÿ0ºG®ûyX¨£«=H¥•žv«ŽåÝnáh‹Û‘Šá—Ç%ÄÇ%0)V.dAi/*ÁH›Ld¤2Yl—ÇïÓ.pO¤( 'HŠÎ"ÉR’qÀÈaäôÞÀ8XÞP#Æ&kMfBä˜kN]•hܘ™€™9çI’$I’$)"""¢ªêýò[íÜÿý›KfÈ×—!h»IˆÝË\}ór“Á ¡ñùŒÆ¯C AB£"$ å2„`Åp‚¤h†åxA”dEÕtôlÇõÚÿÌ©v?C&$r”TZéts'Ë»äh‹£H#Šá—ÇÕŠ%Û"{‚Ÿá Ž›a̱Œ–à"Ñ¢€Œ´ÉÚÒÙ2²rò ŠJ1"™,6‡Ëã7¥ÃJ›5ŒB˜{$*NÚPu Í,"I–rÚƒ'ÚÀ¦-ÛvìÚËþކiÙ§Ëíñúò£]ôm¿mÿÉ´±ûrãaM¤ÔÎT‹eÊC Fê #(ù°UÉTÂdÅ6v¸<¾@(K¤2¹B©RkÒjwzƒÑd¶dÜ쎜ñ—Ûãõ-ýoµÃþO*H&­J‰Оw‹éÚeåP¸úæå&ƒiCãeä3?@ A}B£"$Šæ2„`Åp‚¤h†åxA”dEÕtôlÇõÚÿÀrÂ)u?ã&$r”•V:ÝÜÇòn7p´Åi$b8ÁåñÂDñyVÙ—3A9V6‹o™ºçiYî‘tQ€Œ´ÉHéÁ2²rò ŠJ1’™,6‡Ëã?¥ÉJƒµ»(HìÇÉh‡áIÑ© W×ÐÌ"^ÉR’ÑÈçĊᨖãQÒé ¥M^;³õR^€Sky¦àp8`Â+8@ @ @ ä‹!Ï3Àb‹õh4úÚ¨HL¼ô-ÈY(6ÂC€¡ô¢1-ˀឆò[遼Y»1­ã±^¹šß]ò…øKï•×Þxë÷-8abÅp‚ŒÕ Ëñ‚(éô†ÊPÔjøC ÚÒÐ+S¯…çìúw¡{88:9» „\š*-èq4ÐÄÊ—µ±š7_\ÖÈ.NkH˜2Á*óC¯C6Á1l#8誂bÕPAiÕ¨Q¦^÷ý$TX±R$SŸôõ½²QÒÎÔgb8ARtkB£Š–‹ ¢”L²¨™té©Ù…hæt„Å2"tEê¶ã*O«à¤þÕvGâ€þèÏþН]§µÖ߇kŽ¥O¡vytE¾þ ÿ‹ÿïÃUçtžÛ7P1µ]¯óB…2ò©V¨P!ŸjÕóê&IÙÈlYÎB§¤ºbT^jJC´ ºè e$ÝH[ôÄDÇÌRVغÄa 8ñ\«Þm?÷/K==Ñ12ÃìP'qò‚ùÔh‹ññS \çŽé¤þÿVsÛ‡Pdâ{%$ç)¼4VÂrz ¯©µbmHt5㯌½Î¾x¨»öÇFO}öõ`ðŸ‚0333ócÎÓΘ9ažòß—ßÝþìUµ“©I’$81䧆6 ‡'#!S¨ÑFg0Yl—Ço2ä#±D*“§€J•Z£ÕékPcMdö/V[ö¬„®ª‹ž¾¡‘±I$Y¡,~2œÜœ-!ÿ=#(†duͰ/ˆ’NoØ#0}Ú{†¹…¥•µM»‘úôe°á¼y1’€Œ ä(Þ*-º±ÁdÅnY&!A1œ )ša9^ø´rÃ3²uoAígÏ ÷¡¿×}Ûgÿ B0‚b8AR4Ãr¼ J²¢jºaZ¶ãz¤‰„Na8AR4Ã>æÐÆyÂX[Òó*·ª¶u¾ã:-ìQù!c±¸®õ–ŒÖ^E@©ífp1±ÔhuzƒÑdÎ2€Œ NͰ/äœxÜè_çv¦#ƒÀ`g©"}­üÀ`0Øê{Ûãª1Ñh𽚃QÜÕÉh‰VR;hhV–—J¨šâ³§S]†Õ7*ßMùÔ”XŽk‰öÚr®=ÞÊ¢Ö™Q]ÉÖ]úªŽÖHwû7ƒ¶·ž¸KÝér%}®­ßµw¢ò\šŽ§Þlš‰½ì ¦ÝlXsiu¨Á–sîZkÝZF­&ÛSëÝîn½™Yï·¬ûá—¡gélî©Ä*l¦Q SN,'G§?'ªCQ{¿T¡V§4vj´ì p»d'GÜÀÏôÔn¨KX©ûñ$(,$œ1¼H¢œ @FP 'HŠfXŽDIVTM7LËv\ﳆ¨:Ízž íAPõ¼5Œªæ^'Ò*†\_ ¬ˆŠùk}Rr€’Ï”C@žŸÒ8ÓîÜ],?éðÙã‹ñ­Ïÿï†X=ÒY„`¤Mæ3Ž1Yl—Ç NÖ]‰&œïêé›Dê.”Å/…“w¾ÇOûàÉÕj%“+Rî—ôx£¶_G[Á” ,9ëb1d)•)–É’žõx0ý‚é”ôê3gýµŒ!±Ôrë'F…ÕàÌÝÖê¨Boȧ;¥;Å `¤$'l€vžêN~†øÄ„4vâìËqâŠ;ƒñŽæ¤·EÀ¨ ö+v!d@ ¬V^§°ß3ôÄ„~Ý_ø¼+úö ÝLOñÂ%Ã_¨f†7ãã½_N3Oæ6˜ðAû\q1Oц0=]A@N}^ªÝZò‡H;ÂpøRòš^ù)ËWæ0ŒJÌ-i·8½ÄÚ×À©=h´A£­V“r'š:ôÊ“ª…FûÝZÇ7œzÇé­Øº”Û…ÿŽy#E´=µÆònGž—ÞD©í̸J¶xËÝ¥†¡k\6ihŒ6´§Æ£Ã1–bLÝÞ ßÃ4ÆØÔ¦þàüô, ឆMø×á|h„8üüVûÊ—PAå@n6® "ïk EiÑãg‹‘Q>!2x5–lé#ÆâdÓØHMƒjnCjâ‹Ð2‹ÃqÞ‡WîA!~oÕŒ —²#o£` Og®¹›'„Ù:Fñ+”±e¯Ð¹iã„v¾w×Ù†·0YIèÆ>0t$4Dý½†6CM5ÖÞ=˜¿ã” Ä«êú³£;öæ3ñ6)55ø)"¡î0ÆÖYHR½¯)qBWð‡?0woÏ%DÆå_7ò$‡ŽGSçÖUÒÝôä@E¤åÛ‰ÑLMwý{‚‡‘Áð†£ø§(cmë™”-"¹ñÎd( ïæ‰ þü¡!Z’Çä©‘ã®ÈgÒñ7©Çòí,gþàO$ûb¦ãGþ'eë_ôÓ§…zy[’½kòÈù…2×qÉyzÃÇÐâ]á\ßÅ„¹É-UG»ðÇ’x"©ýÝ®B+ÒGI3®í»ƒ²…®>|ä½×§òCg•ðÊź¨)–›`ùªe Î~l<™:õƒŽâΦ¹Ý›§÷„ýZd(¾áé”KH„÷j{eÁ©™ÇXÎ’47fó²ÂÀg›lц'°¢Rq}JOþçØAÔ¤ÏÖ+¯qBɵg‰É¬`™hÆ vì°!u7µj7ês·+!ôùŽ®3V«ú¥‰½wðwÌjêíø¡>¾±{¤;Þ=Å.Ñ©ú½R¢±G©èÝÜ7®wýï›’Çàsã¹)I~^‡þ6x8sp×7¾D& ÜRîºh,ü1.š¶Ö6¿Þˆ@—çfXû©âMÇõ…3r7†ÿon3”ª†P­)¬MÙvœ«påY ç‡Ñ¸ñG\š ý¿LU°Ætô:l¶§mçtÝûð‡:¥CÔŒ tÁA¦A#ÐôoL³ŠV„IS¤ÙW²û8¸Rñi“4ÍA™¨Ý=uÀ¤ýª»rƒá&2·Ü&Ü w3÷Ü'l”Ébs¸ù"@ŒÁü[\Û*´ÞjɤUQ¨´AŸ§Ádsùö”Kƒ*SPåÈ|2H¡‰è9>EI¹yL$¥HʼnÈI=ìÐ)L<ò¼ô¢íË¿4ÏóWbR+â Ø=wsít ¤Íp•°#1ðl°[ày»øçÁš´XR<Çÿö¥§=v3õ¶•êZí!«»â¨ˆ¦×²ž÷M—ÁF+ŸW>è>ÿ²´FúæÛï¾ÿå·bR1µ³cP~ŽT (‚bؖÆHÉÒ8²•E§.e)‹B‚»ášK”#’zʬdTP[åYÃϾ)¼>âa¼0¤JÒ_‚½ëVRB÷1ïeŠ”æ—æg¤G‰F§Q©C)E­RVRH¯²H•¨,. ì«'ŽJÖÃCya_Òt$¸Bbƒ·y…ƒ?öžw±€þüm2{ûYa¬Ú¥S¬Í·i‰¾ïªi3ñeøGÖ_å}Âø¯ÎA8áa €?ƒð©üÃ`¥¹f”R9{$’!˜èˆŽh´»e3@­:NXå…Ñ$ðïVÑEú7f#‰tÂÒIÓ–õ¥Ï¦»ÇÖ²}/.ç}ÿ­Fòíµ£¿L,ø¡ƒ~¹ŽÀ]ý—¡3“8#LÞa«‡LvQlxïSéð_ÜVƒ€ þ!ÍŽ©Öг?‚@˜B&ƒPœE§ŒÊ_¸xéò•¡k¶Ç 1,X³é‚kîxä…w¾ø +‘vÄkáÐ"Ñ0%ã‘Qg¾Ò²« Ú¥PT…ñTxAŠà­Hþ¹l¾µ¶€‹FÔh4Wß®c#]@;Ú¡ñvþ¦‰Ì tX@™²„ äÊ *ŒTxP¼ "C„Šá•]H¨>Ðð@9£Aµ7Þ3ºÑ”-hÎŽ–ìiÍ6b´«¥™.â>ùºÔw› pŽîf£vÝasÄ”¹ðÔŠãN£ÁÉfÚÊ…B)Tq.¬sµFÌkÔ‚¶[Ô˜£Ú©qK×'ʃ2ídâ=?Và…‡ë§1³ÿ¹æ™en´ŸâP¡xÞlø9ߘ¹$™ŸB ‰]íL¡‚€úöñ87¾¸°×ƒîõÒëTw¸þg³¹n…š×‘6q6xп&8hÔmBÒu:Ó³²¡œhðËtþü7iÿ–ðÂyw܈æl[·á”WÆ,š4ï®{î{à¡GàglðË„qÁ;ŸÀ¨‹p{ÌX+è5VB„‰Áˆ“ Š,¹ðgI‰ UjÔi8†é¸³VÔÒ¤C—>c¦äÈg`lx8¯D© UªÙÔÙªC¹ •ªÈ…ž°à¤s ÝÂ"¦MLÁ¨V »zušÒk«6í:5jФE·>½ú °bÙª’~Kºê¦[N[uF¦Ë.¹¢N«÷$üôÙ_ÝÎÜ/ÿü5ì·j5]H[b¾¾ùÀŸÎKŒ)h2BÉ“”dɲýÝž¦Íx‹Õ¶ü^º®ñÅïö 8’3~–z5NkR½X?,‹Ä¢±x, Kò°|ìövˆÃãH¸hüf¼ Þ€Â#ñh|^„7à-Ä»²þ‰ÅZx ¤Åè[ÕÓwˆ…cC°X:ñØæiyäŸjê4ÓÊ'0®Ãÿ{à¿Gãv7þù;áÿ×¾ßÖG¾ßø>x†o9ë³ßsßcoþ.ÿf°uîÈ+ðõßçñɆڪ¿ŠÀ§ƒDHå!ŸìFžž;×kn§·HüºÁ¹çŸú~ÂÔ·NïÁOÈ˳ßïÏ‹GE#æþ=_sÛQ§µ¯vØ7‹.kwUÅpB]ù3ÖôÇ5ŽÄð ¼Ãó3ö1-T4±’28!Î\I²¸¡œ¢7$P.Ç•'­TqmÀukãÐ6[qC3î ¸ßÅ{+âJxsg(¥7MÓ€¤1<­Â5‹à¥U¤ä ¢d]Ý õÄͤŒ 0¶‰A¦†˜„_p©ÆPŒ{\íž„Æ-,žaövrh5•šädw¢bzÌÖ¾‹ˆNd7{¹˜r)²ø”fyšãež¶R[àë0‡ø9ÂßQÿ¶•Q¶cB, *‡¢ÜŠ ™¬úÅš_­Ëq#e¥•º-Ï]ùªÜsÒÛNx«¬rìDõ¨°çͺ‹sÓ·ó {È,…âçlÚ²úE‚ ðÏkOþŽXðèùêñê‡ædm‚Éì_Á¦×‘Í÷y·Å¤d+¾‰éKpì/ƒ ÐSZdéé?4«| ø”¨Ú"ï­ªM¿‚­þ—Áތʭ÷ 7m>!,1ÀËÚŠ?ghúiÊ“’‹ÍâÍçäÍ»6·ÏØmò´½9Q׺為·¬Ž©[òŠ|Ne“ e¨?±$нy÷QoS)‰XTŸŽª&P²Ûq€çl Ûts€@,lP«[Áø*Á4Â5Ê®Ï@ 0•âµí¬ˆrœáUd‹ $zÐLª…öKô^6˜ÔÉêu*¯\}““å¥í!-D§/t[¯L(± æÙ}Ñhi¬àâ( ’0Iãæ …ˆ£´e*óÑÎ$&1¸ˆ—½¥W£Àò>•ÁîÀ D¨-Zfîä¤ Úr¤‚·%¡0í1«súÑpe²VB[z¥¬f¶t׿—&m²w‘¼3Ÿ/û€Ž“ ¶òyÀIè|‚[?Î4fŒœÝ›q1`AØö¥È?ÃdQ[ÛÇÔhOߥN«y>‹QÛ€ƒ§¯ÓÓ·Åùeoº‹Žæxï¬ý6G,Ï ‚-výbÆ)°j2aíUê–;…Zl¾už×¢7 /º:Gï`ÛD6"VªÇ ðˆŸžâ½w; ÁæAGˆ¬ÿ;Kï–F-Ð4¢*E ÿmÏïöÌ‚³ØÖþÝá<×îÎä€-] Ðá® X!ˆÂPE`ŠÂC(ŽTå•|h¾Sz õ÷¬“رÁÕ³”íšç£HðÈðÈôÈòÈöÈñÈõÈó…¢<’$ÎT|®`gšñÏ3_h…Œ¢Tû¸)ÅNq ”BZ)Q«7­˼ÄÑÜ@™o¬`•ÒØ™¬,RµëWî”4°Ž 蚪W ;ºà°aÀ9CP+ZýæãçâžÝìR]TA£Vª~"Xoª1œ5h€ÔÝÔ¢ã°zNƒV Mê3·7±†R•út›Ð8ÕHïÊÑË:€V¡vBpK^9h%*MN=,¤aÞ)¨Æa±ÖÕ—o»K?ñþþˆÛ{H÷9ÇhBR-³á0mY lG Šïé-0ËÖ ¬[X³º·X>?¦ÍõÕ$hjËðacY‚í† Y4mˆ ˆË=³z6EÌ 1Ä$Wß«œAL4Ìo–φ¥Ão«"Hàæ% i²”žw¦=¦A£yW–€’ÁÞ2·\¿Ç_^ÎKi“— Pov Yx ý"–ŸfSý?v¶À48ìù-ß$yè%‚+Öªé^åik˜¾þÂÛASÔt€zo\a¤EÓŒvå@‰:“Kp,Œ¬%M£»IëêÝ….ûãð·b•xPvøi‘Gë?Œ¡nóY=±Û‚鎞6oSÞ¯¾t¯o¥e'ÔŸ)Gªb ¤ïGG9Bõ;2joÔ ­.Lƒ–M~B,Ê n4ØÐõ ³Šª3–¢&µS@GGõcH§‰š3Æ’´NµgŒ#éœ&.êÎOÒ;D|ÔŸ1z 9[ȧY\Û¸)ΔөϞKËÖTªÇØ*µaÒ;Å2)…HøDUV'‹UÏ©^ˆØ@TMuŠXûŒûB¤´T=ÙÔ,sàÑoN;ટ&Æ|Inm*Ñ» @À`MzÃ%Ø.çš>2šÖ .øàŠËÁÁ˜‰Ä•hU¡{¿1¦%ãsµÄ ùÍͱ’I@+³&‰fì˰&Ö$oÚEJ’,AOZY¬=¿VÐÔ”NsFÅ´zû™N2wcxJ雦ieËè·zÍ4S‘FS®eMŠéý-%)vë/6/‹&ÿ!š¬­Yµ1!e‘ ¤ó’•k³§ àˆ2)¡Å@.iPc4yãu~À3$ƒ•”†ï"Ã÷¦œ¹ÉgÒ eµxch"5]®(1ÜŽ¦~¬¤à´’ew²˜;Þó±²nÎ]jæB7‹WVc‡±\L‡C[úQon>¦G£âAÜÙ_W‡Íõ÷ÊÕ›–Oo{ïÅÆ^më! ŸËahÈÍó}.þTt¡X©¦0ÉŽ×6˜ qÞ(8¸À@Àuy”ðLÏÔ¼( ü«4Œ[a’ä€f3RS¥ö˜¬¨>V̺cáyæ°¤eX}ëSF"EY“ð®…’F>­X‰ýÆó_÷_–ÌŠ½³iƒEA®ô¬Åσ™t¥øBD†ôX šÎfâ‰$ÐŒxÑ C¡ý3_1H¢BlT”í¼3¹è÷±¦I%“”`E&Ò>Ç[É_P¢ß¬`¥XØ&ˆ 3§ùÏú0»˜[˜[ÛÊ”K0¡ŠéÅX¢ˆ )èZ& âdz, æ"Ez/‘qK §‘ˆQ¡]:aŸy-@«/÷oÕ¡|ö*3«Úâ$¨(ÚM}›5À<5P`s-WnÔeG?œÑÎŒá¬yšC‘:,&Ø~ÞõÀTµ&šš!;¾‰5nês-=– øUZÌØúÊóiuÑ9¢dƒ½;B,¯÷•1 þbvAÙc¦">}Šó,”ÕÆÐœ¤bj w&m×^0ÈÔ ÍÄa¤— 4Å’zx¥ç»A…í~í?_€ùÌâ©Vd´yù>v‡€´€tWí ,Õµ†Þ~‘¤–‹­„êjÃ!…XtÆeZ»š”üj8Þ˜Í Ãçï¦àv‘(!f…G‘–™ÕŽiÅäÍ„šç=U&!q³Á)Ð\îߦ‡8N²}SêÂy‘<]K_H)jCÞ™nX=޾ƒ$ü2÷‰k€ùÉ 0Å\máFɬwkq–å·Þ3·ŠBÍJ.V99:áE61œÇð½ëq)q Åä:V J1’Ã33^./A¯ôõÛD„-}¡ˆYÖhé2fõ |Gâ2*š´xÜÏ—]¿Ý#lWY£ˆy¤lÓæž»¶µZ<Åæ´ZßÉ…Ç£~"§¨,¹”ìZt)<Íl‹¦-Ðí¢ÄÉÞZi>tB£jç7!¥`ÛBÁw¬Š{ºÑLÌù X8~Ã1h”W™ÇLÊ•&¬Ä$¡{ýæñöªÂu‚–ø®°(iÆgƈ[e‰ àibI ÁóÃi-c}VV¥K¾,§·Ÿ"•Ò³pvT(>9SŒœXV?îºD¬žIšÌ¥zGæQ°I>È}_~—Ú²Ò"¥¬;lVƯÁ¸•Ñv#Ó³/OÐõùÍgÈ6Z kï¥%­&•yÀ3.éR|í& XIB×s ÑqަC_–üú»Èd6Æ @-Ó’)q,Ü’N3pÓšRZŽ\CšË²X/ê!µØ(2«½O™oõÏ ëÍÀkÚ:7%5šdV@îMZ>­­hÿÒrg—>ãT‹¯t2 O†ô¢œ}w7Ÿ‚ª÷“«é ÒPo²©ñmË/FâƒÅ²_NÖw)NÑü~ɹÀÀc=¥"f ¹e5²Ó‘L·Z¶# ۙϩ&ñ¼JöÌ|@P–Ð"YµAḱôAéÚgÅQ×é‘¥0u”R¬Ó.'³¶e­_*©§±O2öBeQk ßç[¢ OiLL^éfá ákqÔF» ËiеvG—ê‹,W2ò˜ün‚4So¿E^Ã„áæ Êóô^Ð#ŽöêS’ötý§ÂpdØ©"ǼÊE7ϹZÉ)ÀÒ“ýtHÑ4ÇV]·%NÖï2HåÖ¤.Ò­(à½UBìøqÚ¼U†RèlËj¸›l¢QèpbsXØq˜'>÷¨-r¼:CD°“¥‹áÍ tÉéIê㸢5]çç/=†L7°Ëãù¯²®ØŒ:âû§Vd/óòñ‚%à¾F!âNXÖyC¹^ülFRÀ3yÀ7sTr^…Žj´4É3ÏñCž#ð/©«ú—³RúѬâgqîµUÛºg<Ÿ½WÆ729°Ñ¬ÒÁ‡49ï‹Ã‚¨–M Ü“ì(J56}L˜ÎjÞ&„²6@G7ÆÆ÷¦T:m²” 7©ýqHk|Lño@Iló/ÔÕµš,&üšøÁË.Bq«5'’wù¢? »îžY£î;e*Á ; 5þLÝ<ßç$ÔF«~ݪQX‡™^†Ô (t4÷6 Þ^Nl;›f'Ï[f J†È Ù)ŠªI‹ ×giSgR)WŸÀZŸ½a{Ì[ÔÙz› nO×%C/¯ú,ºk˜åú•Õ5¿Pïx̶Ûdd—¼Ê£7ÀˆÜ Áþ=‚û°R Ý¦™ wÙÍù.Þ+fÃ=RÎæY Ýã<âÅ`^¿v¹œPãbi8ÇÇÀ« y ¬³ÊŸõYâ_žß<Û홲©@ÖìØØÍ©ïÊDS˜"òÝ*ƒ'y”U(˜kÛ¤äÂ™ßøðvĉ@0ô!ç?áÄ=À>^KßYÓ5@\ÆfÐòènã‚ 44!ƒI|Œ‰—Èb–<Ã4ñ|­4w0ßïÆæX7‹ÚsÔyÁÙJafµ8KiË G”Þ ;:¼ÂÒxíPOQ’Ó§&Æ5À5ñ|õq„^¸$Ñ<.~¿É¹» ¹þoä¬êÇó?å"t_™~í¬0ôúå-Pˇ¡È‚Ã17a¹/ƒß$%ìïÅ A êUÚÆMl™âµôWžÒ¹h§šLSÆn,= 3ÂÉ !lº:³¡ ±è!=šüïUa׉#Çã—Âá{-åôõǽ à OÝhe3Ûÿn­ãí8>·#M63£nàpñ 5‹õ9ÀvÇÔZEµV²Ÿ›ØÏ²O€€ôðò†­XàyCAU Ï’¢òb‰ÀÈà°×]@,ÊÓ¸”"¾™Ør@LP‘´^IhÁ¾˜ã…žÄï´'À¸}h‹»âš×ä9¿€H‹;ÄN¯ä„Ü ]ÞCÒìÁ,Añ§—ñ˘ÿ‹ãŸºªª{4¾ZH~üfÝbíðnvTûšâCÔfå&îf»¯Ge Ù¼œ 1‘7‰^»OhíÎöQ|eÙŽf‰ÌåKÄkWÍ Ÿ-üË4ÓPB‹õ¨c+¥¥ÍÍך Z·¢“ëCÑÀçᔣ` ÿ÷›Žþ¦`žâr¢¹Uâ{cYnù¬ÿ8à3æ÷µq@Kލèv õpˆ&ôšASãÇqÞ’ÙSÑ¡@CƒݹgâfË Ð)e TçñŽÀ¢äíÆ… <´ïºËÝÿìr3£•=-%Á³Û§I¼ß€Ø Ãëý´Îl“†$ã§3sD˜(æO±sòl–°Â0 ¶ ת™·OК[5Ú‚ˆ94ìw[’œlˆ%ø/äÞV·â‡ë¼ övð9îIBkô²ÛMu‹w+ ôM‰BÔÏôz“éFç´lG´“µ°”­—á½e÷WyÄØf_]/r³.¥ eF5§j-` ³_Ýj×óÝjì˜¢Š¦cÝèw^0¬¼xDƒø´¢–DW‡ÖÝ%:»yGg´Ð³U¾Ð»8„®–廸=9n7è ‡‹u³žRcmWo®bÐdÙ5æRºïÉzn]uíÆF euÎò]Ížùöõòü§ö¶57Kûs`!Ñ}Fi•g@"­ÊÃ’íQlS®1†ÃZ4bèÎnüþ²•ø"—êïï4F–( VL]†ñö1mI9­CëWEe¶s $š‰k8ýC$YC¤Ò+W ¡®ÜÁÑ~ÊF­Ù%q\^BˆëY¾ü šCTo;¥–¨Ú£ÎÝñ|M¯ôùy]ò½ ´ÉyÝ'¸7§: ãK,³<8£³Ð m!IZ†{ùá.ìwÅèSëÀiL°JŸ@ŠÞ)Ÿ}­ò”Ý1Ÿ® }áïf,='šÑï‡3!Œ¤p±2“™™Y‘;…ɘ¤f¶Ìæ>ää$©ÑÜiUz%)åù®?,þ—ºj& {E"XJ{Ô`«×½†å÷âqåðL©;r}8ÁdYD9y¯aš-C¨@wûp; ^¬ ­ï÷‹øF…¿«M™B»Ó/ef)øûù†ØžÇòªŸ=*K»$Gà6¶ðGFDÏÑ:c`<ñ}˨ªììòßïŽzÁë~“yþ…ÄÂNÝ‹|'ßS»žquåßIãÿâåÜã¯A»bt¨U~…Òìi=VMÊ­¦<7u•ÄÀå’è|RÀÞ@µ?’?vrR.6§3hó ì`C°‰; 8óÚaÏN°¨ivà?Jàü% €Î ÁؽŒâi._þÝ Ôû[˜o‚›¾Õd†SMª,çy+„³—†D¢r6Ò@_ÿÛ–Éß±¤µ`½ÑùÕ÷öÚ…Èü¢ÓŠñ¢´ðœ„x”ünÎK¦ŸÕáY3~·ä¿Ÿ*tQRbDMýOlÅEß™ú j{Ë s}+J{ôbh¦Þ2grUœ´ò¥p*ˆ? Qh¾°– “È¿²z³»ó=Q#&C?œºÌ~£.ü£™¦b>È ¡ëÍ2n¢Ì²ÒŒßu ¬h’ä {pÛÙµXFHÁÆ–¸à‹d“r¶ìÇëô“—s&ôÈ ª`êUÿÖgÑ{‚“¾Ó ¯F¢„ZX½¿£ßh#öà™*K#æ0ÛwלªZÑw&êXGeæò ûQ3 5AÜ]I—YMëôJ®á~W$Y:ËßKÓ¿Ï\/òž›‹ñ¾°º©lßÔË"½*MŸMö `ª¨m¤¼êó’ž.ïõô¹ó#Àc‡ÜJÙÅ>ZŒ˜ÆñŠox|]½ß•+8@)`b7*wÐ"2üžº(ý]~ÿ6(rIR¡%îáŽQ·ç ×üöò(ëïwÜ›µ¸^¼ŒI€Í<\¼ãzk°Çéîí‹nušBMÝ tÞóÁ½^ûSêŒh×<—«Åÿ'¬þ·‡CfÙP8køôÚ#ÀöÀ ôe±|ì3gîDï‰þØ]ć.÷0xóK /(l½a;ÿùºcFj]CzSÞzgHåÒîã'…Ën\Oðž[ˆõ¾zÝ·ô¨f¼%º”û S·>8Zü‘žÐ„P5¯Ë¶»¼ÛÕï~tçi»yšœ–ôE Å¯ ¨=ÅfçÚ°óîµûÚÛÈÛÇù !fcˆ·Jç3oŸ‡¯ÁÏê(h°8++9a”Yˬð›È»&ÖÕÚ®#•¦âà}°0Ë- ¨]9§Ö€h71ƒô>éóàÄeìÎ/Ë8ºä–xø¨àëYéÓ„„ûTýØ*ÝøU`ÅžŸ×|F~6ÿt”— úBsš{«ÙëݸKæ‹Aü°&¶ŽKQáèõŽ jµcêQSPÇ«nÌvòÅ[èÌó¦éiïÛhS͹;HÃKÁ~ùZÿ~ôÒ¶ÍÞ>/ ßG`¡À¿>Œgze…Ž8-Mê· Õäy{÷‘4Úâr}'D.Ћ¦b^W·”ÍÌà/®Ž8›¯[s*à“š¦U÷÷ºÇwÔÙ+Ð36 –šßíy‹îö: tÙ€ïQ–`ßt‡=[Ùo$¥ë'ÔtMwÜïú>_Äè¾U\TtBÐM(¿#6õK¢d¯;¥90Ë"+µ¿‘˜ÅPT±~—1ƒçmñG¥ ãiï÷ôTƒ†‰£;­¥Jã;Ïb³qü莶Øü‹ªñ¿ªÿ_–¹ž+=Ë^ެý4:û·Æ«ñ—¼™Ói|PBfU[£´²¨Jî¶£Œ©g²o¾¥¿‘ÚmgÏ[Ì‚C}y·¸]ßÂ=¸i dÉá·µU7@xÓùz,õ~£€4$Ø­“jˆ½Ö=h³–ðé¾êvA4à=Rw„kÅQ%iGÊÇ® ˜øÇ‹Œûÿb1¡å÷c »è'˜%)ã¹jw޵+#+€˜ò/ŽîôõnÀ 8F<7 § &³d†xGxG”ƒiP±š³ ÒÍK‚ÚáÇf…&>×QåãÐð1:H¼ œ(æ‡&qXY…ñ-¤öðF¡†ÕQ(aØ*8åM72Åé>P4q“¦úqi¾—?»|6kêuº%͒þÿû…oëß.î3‚¨ßö9Ö¿^}ä\#š/U<î3²‡’ å±»rJVKZeŒO_ÖõôT`bz¾Ì­%«Å9±» åÃÉ@9<WÃ^sì>QíYõMüs0#˜Óo¼{L^¾¦ÍéNÐДë;M¸þl: v˜"ôNÍœqÏÛBð6ÖÅä{÷ ËWô *Ú«§Ä_2fDÎ äÝ<Þ„z_pªPör/ ûIÙ[YÆgÛ‚Ÿ áÒÿá rzR÷“}¹4`â ' þ]ékkÏn«§í/†œýù‹ «A)­õ)æ žm ß‹Ÿ¾.˜%­%a?(8Žüx>ðù©SK¨#¹ãìp2TâRcÏÚé3±¿­±÷3ppæ”Ãi]CãE†Oݸ.O•ðC¿úªZ¥X•7hºã³M”Í€yº)°°.3AE*€°’J°Ù,HæTMhž2v5XšŒ\=–þ%,`|„á q˜ŸP:£äë³Ï)›e-:é5XÚT=ÏŽøúïXoèpêÂí»·Þ`öÍë:äLQoZÆA]iö(ÏÔwÞÞ’tÇ6:»ÓbÇòÝrëIvà¼z<æ8þáå3‘Jà ™5qiDŸ1*1TÄò9*´E=ÃvS%H¥þñ…s‚ÆÞjkM굦ÁÓ;ìåsügÀdlÔDx(#ßÓòè·û=üýhäŽ÷,Ó=¼Uë:.)ëIºP‡=¸_¸Ûz[b¸ö8ÑÈî­Ë&Ü0'§0"EßežâI8¬ÃH$íô-dKVêF¸¦NÆêV[Òõê‘9jq~Oj| ˆÅ?Ã¥ôê·q4…¤Nˆ)ogãähZšì°;ÍÔÆ¸zÌÕêmsîï ÿ½ÍxúÉÜ';Rkz-CYI™ßœú·BœZDHͬ‰ÉÔ’øJÊV¹ÆJöçËN[„ÓÄ UÛS;œói2å´9ÒÏØ RÛÛ2¬–Óê4m`¿ýJû|?ûl ²8]v}½¸žRès6YA=Ô~Ðæè±¥©øb®¸0<¥cg;©ûâ-×\–•X_zZØ1àt§»Œ}¾¹ï5÷ªÜéd[mÚÑŠÈ`Ä¡ÍaÑ#ª{´ˆg@ BŸù;Í”Ê}¨M|pˆãäÇY6Ÿµ nuÒzbØQ»D*ïȪˆ®ðíñNHµ’qШ£õåäµSün'˜SD´¦ Ñqò hCƒ/`ªúTMÕÙÔŽ(¹‹ÂEçÂ(ˆÓÝð÷æúDJ*Éé2\%G5.ÔïFúF)œôi¯çHú/Ö rÛ î-••«ð&ìÓbe™©¾&Ulþ~e‰j+SmŽÑö.WgY·–Ë2Ù?.úÌÏ®™Î™®YÎÏ…9À ÞãR«Þù:IÈOœ¶W/¦Ô’äîwØòÜo{€ ¥­íú毖 ‹µëš à5ñkLоG}ýø¶ñ&pÎùeÄO7TŸOa¸×ޑΉ:-w£ßmd:¯°¢ù–Óoìhô,9îèîOÜÜÒÍ…· ~{½~c@ÔgY2óäžýɇ'K9¹!µü¸sa„ù0ý˜©Ž,©Þ˱JG=mA389„He…sD×`{‘kçæOâ,¦šÎÀ!¡®¥)á]CÞ^ï»Í``?¤¡FßKusÕÓÕŸvÒ„½%–¿ˆÚ¯ÂX©\ÿœ/c¶nÊ5V:ýƒ0…£¿ gE+ÄN¢51!Û’¹ù6Ç©ì¤ š&È:»xúÎH‰ªSòm•åw»õ.—O ×+Q Dâ~!"ßÃê\î mÊøÈôÈ ¸XTƒQ#8žj2·§ÖÇQˆšLš!JVú—0ÂB¡¯Ã©Ã­0¨0†¥rØÍͧª«EUf0 ŒÌü%z=Ž›}Åû“%†ªÁ<Ô÷‹z˜¢æÖ„ü„œªÖ •“ztyqÙÃgfíȉÈ]üÌç Ø7‰r&Á ¿4ÞUqvƒa­mÉVKUÜrq"çþRowÆô›Kokˆ2ñòëÜ×ë–H þ1¹¼êRÖ‡ó/>OÇD$Ä:Î×­:85?ŸZ™ùõàWÊ”ŒU0Ñ&b³]¿ÙŠ›îú€“‡26[š€ŸBq^x•£ççeÈ«î%?Á?è_ú謬4xx¯ƒq F¾&‘z¼ââùCº¼â­5Á-_®¡`±ŠÖXme4¯•­òJF² }Ÿ“ùüPXÔXÀ­þ¼*ã–Œ¤¬È<(³O ÝôŸ½Ya½Ê¶8-Mâ· Ñæy{ÍÔÚ’ò)Ä]È÷8§Éhëç^olâ^kÍçjÛ’(Z­©‘{ýÀ/¹¸:Ñ'Û6Ï®´mMJltqa4çìÎMP;ê@-ˬ“™ÂFsvÓ EQ|µÄÎxÚÙXÎA–!¿%…ϹJð¹Kºp…ÌŒ”Û(ÞØ:‡/Xk¡le“÷ky9Áθl»Õ’º*I®Õûèð]Ýü¨¾óln”WTB¤—Â/ÂÞs`SauÊ&«w;IÑ£7¶‚NDzŸÊ]³3âªWÛÌó¢ºþ´†ìGÚõ†þÃ.ÅÝWóÝé꼤,7k(­¸™¥,"(1â¨4?âÐéÜšÓdG +ôª«k’mX».¢„ƒá‘šUô#œWÕ×ë ºùÑo)ìðeÍëœÿÏ!WùÌ'T‡»ô,5c>§Téú?0Ó6ÌSº“M‘Ô’¡: öd•Wî“‹§‰ƒG+B;–¿Æ¢4,@Y–­ |5e™Ð8J:4l¶žAÌ1bóÃ2’ÈÚaLC‘v$&¿ì%‚ךïÐÒ© N‰“"S†ð‘·2H!FLAxF 9‡Ã¢–å*z©ZóLêä«0NdU–ÆÁ”¹xÙÎ/¹l×ì‚_ŠÊ‚S%»’Šò¢+éÉ‘š4Ž8æÿ9/Œ''çEËq™,¡Ù¨î'ËŠf¢^ÓJq03J€D &Ä T±ÆUb|—A–qSe nYHÃÂÙv=–ͯŒ«"ÛÔŸ.}iåko^æoomptŒÅ%ÈýãŒç‚Ñð°àlNêRAµçº¢‚c|«ÁõwøeAÒA¶ŒÏàêµu¶<:±µÅ°š0Ú\®¨|¤‡o„'›•3c#²†&Š§Æ¥Ü_A¬ãgĵðëˆwjÐGßÕf›héå< ì§´5®½±³”CƒÂãÛ®|5/ODFÐnÁÄ%¼ kè 2àR¿‡«lÚÒóÔ†ëk}Êi ¸Ì¥ìÓÖJ(>wCšØ ˆ1XÙF ÏèRVª0ÝðRF™2“GTh–¼ä‚­çæýÊg³»F—=sá¿3¤9 ,¿üjŠÑz'sDóú²-¢NcŠìÑ·T“em[¿óýèÚè››;!׿ô©«6-ð%öÁËÁ°X‹ÈŠ " jke|7œ† σüK¡î— ä_œj˜ gŸì”î{<»…oÊV‡üÒtðæ>Ü.ëA&dïnŸ#ñlln÷Í!9ªå[“о¶W»-?é¦ ]Õ?Ÿ€þqû“Öêñ‰õÛwAýXÛ_Êk°ÝG$z.´C/M‹–ú´ý5}ï/¶¢Â ®û˜àú"á“È¡\½—®)? ÿ$ïË› ý.Í GrÂéD¼¾ÛLÉ`ªp{Q*z[r²ætÝ/.ƒxÒÆIU<‘«glÇ•n"¦ %’püë~>=@&ÉcF|-÷’~í*¿ýw飸F1 DW^7ã¹Ûü]UÝ·¯wmUÝuòu ný¸Öþk¬®øø>G’qçǤhÏÓx4ˆbû~Ó£© ²‰_¯á± F—,@=]t|ëïd‚8—ßL¿¥ •ê âT:›÷Üâ•­nïç—òÙ.bÎöâ©Ïa^å]^„Í“gfów­:{ê›u‘/Îp$&ÎÙõ"5A0ßP¥žT¸ì;6:<ôžò_QÑ&]5 !ûÓ=?™oÛaØÖå€Ê(˜ìg^(™"ÄÞR„Àü+–ª£àòÏ_öLÇžSц`ÔVAp¬­•ƒ€]PèÓhá! ƒ?, ‚v>…ÏB½ûCf{è²·‡d–®AüÔúÔóÆ59£U [†¥%‘lQpQüVÜǰ¾¨¨>ØÇ¸­ñÁEl‘5 \Ü÷—×þ§è¦»OÉ*ăíû¶woh$†W”Ž !k0¼Èˆª ]„ŒOGVcx¦p²ŽJ)!çK‡BÕoÒlšCá!HaD šöKŠFâ!(¿èId Š¢)ØCÇþ7‡o‰·ýzDwú·.=xíò¯_¯Êl<òÞ‹»SAš‰Ç÷sq?üùåí!Ã…Ç[&±ûÒ )ÁI²Óùï{xX’ß:¾ ÔረMËë*Y÷O']×ÙËeÌZæ}¥ûò0ƒ‡fr½ÆpIp< ¶àÅaóžÚío$4@Ýè› EÔ€Ç=µðîkS"â‹¢âÅHÓââ:QiñÌ ÿ—š>o‰E»}Iõ4'^.Ûª¸s%^íL–¦cü&¬Ý›!è ý(ÑwËÆm7ϾÀ!Š1•LqÒ8kÝrTMªÅ>ìeAIÞ¼…¤ø¯÷S·dp²ÿš½é¬ù“sâg1]V M–÷QŠTñ™r%?K‘œÎ Òü÷8<%Gpy,e‚P@Õ§Fác2%E‹Z–z!¸ó Hʦºœ#Xÿ>ùþ¦ï™ssÝ]4ÓŸOP±®Nl>“Êæ0Ñ~ûIŠ7Ð Ù7Aj Þ8=æÏ© Qû`S™Ãß) 7 äº[«ß ÿ’éý F¾ÍúžÜtÛ+dù£ÏÐËöþY%š;ïIx°Ë[w®Ã¶D´m­¾n­¥!H…àÐZ*“¸ë“k›d$ÀSó_=°çT€«÷)C olÛ ]¤®ªo.í¼]Œ9öèRþd ûüÍ© ™G¨æË-Qì8 ¦€¤Á8"¦Ô™É¶8ð?4%ç;-ŸC¯Éù·|½ž_v €C5Jy&|œmIÉå¿=‰JŒíÉy‘8¡ØÃh.ΘÈF·ªJkTY©™­©Ù–Ønˆ£å`ÏÆhüØÛ|37ÜŒÅÄèâ¹Ò¨nP÷Îôà%¾£ïËÉåè—-‰àÂôO¿í{^pÔ±I!8åé~ÊÅSª•Ÿ¾]õz%óŽéÔPódì­„Š¼×ÂöI»j wÆ+¯H>WÖ»ÃM-1н% ò~W„R©Y¡÷BÑÔPå[Yx^ üC¼RðÓîÝ˱8V¢»8µÇ?gr÷m\Ib"ÖrwÏžÀl‘WâÒÝì•Ý]5“¯YþÅÚßÜ= ˆ½{B°­àîðè· ^ Â@U ¢RxV/’àa!1süúmék¡ñ‹ˆpìÂ~hŸLüïÁ&ƒßzý­q¦rm¥sn „æwjFë.ÊÆ}Þ÷ííó‘òƒ¸9Õ‚h:ÇÙDæ ðy ±„ô>_î¶sAØg(q '\‘‡ýR‰O‡Ñ«•|G^NÒþÚÆÃâ2Ï™ÍKA\Ÿ/ó=o…§‹JG›OàKGªÂs @úèev{QFÂ`eù~±“{Óæi(×ëFœ×åñHN Òg Óã ™Îö©‘4 ™í[k‚ÞöL^—ÓþÓ°Êõý"È}¯ÌÖºR[͹ԶíµÁÉs6p_Ï÷{î4öCÇ>^N=ï¤ÿõØ =—ãç êä†dŸs(c¡Ò1'¬’5“òyül{:eß:3r$–HI+ˆH“b*ù®DoÁz6Ü*BÎNJ’ø^ ¸aG|õ›?·–ÓãŠ5¥šMÒRÒöóF\‡˜߉t³CïE´žCp6OÄ. 9oûƒ"øƒ¹é%–ˆIƒlÁ2ªC?ðTuXéû8+ÌþÃѨ~„R¨Ktè›î±ß{eH2I3é–üY´Buè¬LŽr–ˆŒ€¿qoI0´¼âWYuE×Ëí ¸reWàÐÙ™WUùºŽâÔÀ —‡Œåõ7³¸Ü äöëÂ…£‡¹Ñ@^?ïRÀúàèúï‘ɯnnWE ã%Çm;RN“ô®¸’øÜ=[—_Æ)­JK +à§ä£™ñ¹aU:³TcŸç롘tXöÇååîgØ‹9 ꀲ¥¬þrs§CéEn„ ï"y6=óV:E¯*LéWññelyn|çüð©Ê(ñþÎÔE–©v6¡Ú”>—K0ŸÝ}SIÊK·Æ(3m4±”XHe¡ •‚Ñ0[g.èPTp¹ ©¤\^RŠÉ̫ɿáÖŠù༙’æ~+¯ØYjÞ×Ü ¹EóωšØ?Ò"ó­Õ°Û N­µ?¿Vv~°bÑ¡2Í7GhÞÌL ‚N'мÌFªFOi‘òHå²Ìú9³Î½[»ÿÈNÒ—‰kÀ®cŠŠÿ©ŠH8yç¾RqafóâooHÓþÍÀù…=©L”a%rDUÑ™Ò0K%ñK™ôsdjû';ö·9zú$l+-‰S£V°j8œ¿?ûáÜBÎwA ›Û%cÇ©1teȆÞQOî%8â#Tü„œZ¢ ~=2ýY‰ÞjËxuî¸æñ«ïÂhr¶7U§Bó¸Ø mô|[ð¨áü¨Rø«óMŽç#nÕéŠÓ±iQ{´´¼ ˜Ñ+( ˜òV ¸¶y{NµÌtÖvN¶LîŽGyýy€Õñ°Èh6šLzýfkªV[Xø>**¦G×úW#i …Nư˒~p) Èt—‚ÛÛû·þ)Ï™<µ’«Û‰þW5ÏË9ØkA74•?lVƒ¶QR⊱æ°,]ñ>R¯¡q%XSx6J‰Ê™lÃ8§¯X•€ÕÕÄwvîÚf]ÿÚô/þšr\\;Úýÿ4©Æ[âè¼þ¿Lÿ¼²ÌÞî4]lú'©˜¯½Þ¿bþõôÝéã¶]ùãMàÂ5—~™kÖWCüÞösЊZÄßìð ž›o#:ÆÛâøPâÂ…lŸC ÿâ/¼ …Á¿Ã§ìÆÝlöý¬Ë%&îÑ––וågDN§J¼×,°ÏÃDKÛêNªÚöÿš›ã0¨|GÅTæå0ÛI¨m}=Ácjo’×Ú Û\í}¦ ¹'k†¼²)üÐ9„KnÈ/ž•~êßVq¦¸£^Ø%à% ÿY袔y–2nËN›Ç›ÊúYâd}dbz°iÊ;}¬ë}ûxŒ7Òǰ‹­!„8vj‚¾dG(L¤î¬6 ÄK8%qéM¤õQÌŸàdŒ¾{&žN£U‘gß)šcù}}²¶ù¡è×|á»WùÅ_=ïôµ²ÑÔº˜‰Dw¡ÖÛ¨© 9¸L;óÎÑf²rqñ©Õã•Ë{Ä]5-5>÷/Fl”pë™…UÂc5ûYïhIIêÎ}y§Žƒµ$ Ô—…K‰Ò`7=g"£d(áNÇÝ ÅøowjvúŽ%””%î.´gÞ˜Xô±Y§tTu ào Þm‰6Mxs‰Ï)ógžÎc~;Yšáù.‡8j˜ñ+–¤EÞ Íså¸._§t3§z}Á‘9•øœÌª½KØW õ=Ÿ<Ýù³sR¢#"Q[‘íœä¶áˆ·¥ÛútYÂà2°é.„ÒËoÅ.OÝ»:@ŸÔìÚoãÓúG¦e7×ÿ;UäÃy ã¯:‹}©öÚøÜ ñ¿ï7§Âþå¤b=®5dí2­ ü¥ww~h'ºÝMæ O9 êÝOÒ´•í°ë=ÃHÆý³ñ·ÿ/?£„AX'¼Qò†uÕ~s~Û¼Qà=Xw÷ŠÙô Í™µâµmk¹ tûžÍ÷РǼ#Ò8ð®ÂWåôyøÒÜ×·›§ä¶Wí-Ü£œÓˆQoý3¥×-T¸§µt9¡~ëæóû›b·O¶sXØ»#{€fÕQ‰çÂw°©ÕQ²”äj®0ʦUGðDvš"+²"IV#QV‘±ÿ(“_Ê5Eº©„Ú ÞL±)ýXGñÙôÊŠ×øŽ­Ü³¶‚¬û9Måß¿GÄλbaI" 7&‹iÖ,N9:)IôYšUœn”¤ÛÙbžeˆlVœ›"4F ƒ")ÂàܘX¬ŽÍa—Ãô^âö3õ¡ÂÉ­¥gÊËéÖ^*˜,zWP­h‹¨=\'OÙVc™I4Hlvn™¼®ö0Ð$¤©é=ÚœZ*›g§ªs¨=j-µW+ßš\*ÚæÃ Îõà ˆÄ¿~‰çr9Á9þ1sànÁ9=­{gE;9½­ÃׄºUÊ,5½›Ê~-ÝMõ±$ñyuTu.­/˜ç¾M6â/r—ƒÌþ“¿ë²Á_ÅImñqäÒ¹EÔ”·ú‹`²ÿšÌ\óO^3Sü×i¼îO±÷§èÂJõF)ãëï¡”Ÿ©`¤S±c›jö<œÁæÑ•TaU™LÆñJŸ¨õ‹<Áÿó³™6*ÆhœT >hší`¿)‚°›×Ô‰]+aŸ±7Ÿù}¦Ñ5²é¼¯.±sCOئqÛoÿJ÷¹öæ)-)Šáþü0°&(ÕÐN‘fcºµdåÓFOÓúg†84ç@ÈêÓ´ å§¾Ÿøjƒšž(~ý‘P;g¨ºJ\zHÌÜq ¥ü9‘.Ts€®høÎ‰ø”ÕÄú gÂF_86þc¢Â_Á9JâÇÿjêo,_¤»(öU-d¾Rh=àοûûYwkm”?Åðn+Ú—¦‡ÜüyƒôŸq ÉjY¯]îˆÐe~mîI× 6r†åÀ³RN›°úm*éyŽº™KÉ“ÞCÿé#Úr} • @^xë£×jሷ9ü3ÚZt8¯o :ŽÃôŽâp<˜=`×?ª[„f\‰´_éS¸(wÏÁ2΢ΞÔtÖ1ää`ˆLs1ð+¬ó&$ªYŽ=÷§§–)ë]æ )Ù–n@7–„…¦p–ôמ’=.¸ìÒÜw`â$ßí`;ïtKÔš¸yNÃlŸ3“Îñ¶¸&u¬gÿÏxŠccËt6=›˜Ü˜ðÖV<›œ YŸP<‹{þLÎ4n:‚:•…ñ,h–fÝÒÁDL…©C´í¶Û®ÏŸAUáýH_6ZTŽûïõD'AV~|4‡ûœML+­éÿ¨ÄýyÂÅ~ãT ͨ !EŽ1*]lÂì¬æ~3eü¥ÕÖ]“'IË…é{„¡y‹9ƨ\Õ›•…· 4]WTqµMé¤_ ¥)öëªò0½÷ŠSÎyÌÕÃê»õÊ—ÐÚ˰鹀ðÝp‰‘I°M¶ig\OLÔ ‘º¤eCâ±ßjÁÐŒZ+Rä£Re‘ÝVÿ(&r\'~‹\‘?‡Yâèy@ò2€s)dƒ ¨D”®ÓwÃ2>¤È1Fe›‡”ó¤û‘†f4ÅE½þuÊøP\4Šü‹˜ÈWu{û_kÞí0ñ€θàr¬p‡Õ%èª ›žA "CRUÈÉO­æ3)FÆ=V‡!–\‰÷räzbC&}²ý «}¯~ýPc) –ï!¼GDè(™E¯÷Æó!V¨Kê ×'H"Q‰$tÛ$ÈP T¥,m€ì©S¡¶à)¹ªtÚ“¥x—·’Ó~I¨P¸ø“@GDÃ×ë)ð [.oE(‚È$Ȭ¸y06µL“:á8Y„"(€”i %}÷û ¡@)…Ñ®#¥<•Ç^«ŲJB(v]Õö†ÉŽN¯§+àW­Ún î °ƒ §ñÿÔ®åÉOÿ’äwî+a¥ŠõcäqýþÊ*þþw>¯´äß=~ýIã[ñäô;×c:ç™còRddi¡XzMLµ„®@úöѨè·—ÐyTšLõ³ø~¸Qó§È~$ ¤µQV>xÛÀÙ ÎêŒû}¯"èïöJ;#â’ÁµW…ËŒ‘Þ‹ÂAp)å g=)Ù-H*0T’ƒýb2ΩëI®Ó]b±§»¹—H8D{s‡œ¡]BÎæ¸kÿè1T–з³€´_îÂ&ÏÌ'WÿäER<´Ä<5œ6…ĩlIòP/1¡ lòü$õÈ@+É®%¶ËqצÚåø¥QÕ‚¶·¾D‡¦ì ‚Nîèù¢¬8PëñB¯QEA8ºAkÇó­’µ“¡ô'C­W\kÉÜ]b¡ù¼”¦àÐ-'{\Øäù‚ù$õ†DÈ*Ýê‰ãغ¤[É`[Bçâ| œ“þqŽ“£,¥¤,nY2 Wt¹Ô$¸0pÌ'©?0$Æóú‘IƒõcYàÂtóÀ¸—ãpaÚÎâ`°ÇN€M =<Ç™:†ÆØ3Œû¾Ç»â½.ÇÆ&/|CËgÏ’Lº`LìeŸ‚öJÍeÁjã[ÌÁ0+”±µ÷äáÇ«bm•î‚'Ué˜Ôp¾·Ä5È{)e;))¸Ào_·Òò–mJd›”·EJ,¡í ‹^Otè+êkâ /Eg×¾:Ü~[x<#ž+èM}e½“ ú‹n„ˆ@s[G´sîÔÄ¿Wn÷Ô2èruýž˜, WÑ0¿íQOKf3ÿY'~–² gÉ»ü»¥€, F¤z:t²0$tòEºäž½âàÌ¡Ë6±­Š*¦è¨Ë~{§‡ËŽÚºxûÌT6ôQrCê¨àÓ?cÃ?¦‘q„éK,꣉1TÚ°³Š&Ò vü9ÜsÄ´£o?Ó®ØÉ¬r‰N ̳×K§Ú‹º²Qñ¾.j›j¨û€›`˜·Žîý£Sth˜Ç®…ãy“ñxÿ&BM9” ›RÐð»ùЩÚÍ“ËNYkàk*z™sÅ<¿p ^n™ºŒâ˜g)\ã—m™2/ˆ},Ì FLÌ ¾|®Át!n}°ßC üÈjøˆimQ´KlcØQí†(-¿R8–éy@g¶TpMbšûqbî6Õã´m9¸f;¼Þ/÷ÒŽT da D{s>Bì5;Pf([ :4æI›­z jŸ¯Á±Ö,V—žµ»*§ÑF€Qú˜Ö„æÝÒÚÝòñYÏkâq§`8ARô11ïãÝÂ̽ žUîkX·q6ƒï[Øþûÿ W ¦áž™b¸·ánîy)þ?Æ ø.øÄÃIj[záááá$ÀÖB`[i{QvØÙ®b8×ψ“ S¨Ó»fÀœ/-@X„Å%÷Ikï†0|A`Åp‚¤h†åxA”䕸UÖR¬c4™-VÛî=<Þo!óZ…¹XK6šsF»—Oë³ø²\[AÕÎÃÀõˆ§Çóÿ­¨R<»Öç·SYXvÇt„¨"ºc¨\ºçu°SO?ñu*>‚"ŒXžäkâz§`8ARô1ÁïãÝÂÌ® §6 gê\@J ¤”RÊÐ^I±ìÿËœ‚ÉÂð€ÅúØ¥¹Å÷A6„: ƒGx‰Œò‰U6®1ž Þ!‘)Ô4k«í¹Ž®Þt§0ׯ͊ÜÀ²Éæh.Ý&ïûWîWqÍu7ÜtËmwÜuÏýçƒUUÕUßü\ý}{ÔGŸ|öÅ×Ç·oN‚RÑ…þ휹ϙcŽ9]/sÌ1ÇsXª?9¿;-N}Lðûx·p'GÞ ê™Äß§°ÝIp'M `íº = X&†€À²1Å4{% ·4 ÈÍæoa@Úí#Ä]'È«beYµèÕh´:½ÁØôšÙbm{Ùî(Ïz^[ÁÓ>ÿ>;øÕ+¬Ý †P( C¢Ð˜Xø8Ï´:ƒÉšÝ˜#MÆÊfÞf¯!~Ë ‚b8Aåz×0,Ç ¢¤Ó–QÔ5¶ÃÞÁÑÉÙÅu{Oç­¨­¸¤p"{N½çÇ'¯ÒUÍ®I?VÄ€Ï÷?7ƒm%QV •ƒG8†Œr‰U4®1ž‘D¦PÓäÖVÚ-b]½éN3`®¯Ï÷*æÓ>ŸsÌ)s§ŸçO¿@„Eî‹—,kCÑ–Â{ÑÄÑesoF§.:+÷ ¹î-̲•«­õnÞ¹~KÎ)~â¢%Ö¡7”çž²Ð{•5iæ¡y¼•…ÖÛ@zƒ<â7 †$EÏ dé9™_\áZ±siW¡_YVáj4ZÞ`lŠš-Ö¶˜ÝQþ¢3°Ëþ÷[ÅZ]%çZ‚üJƒ#Qî¢1±þ×OŠH"S¨Óâk«ï–šuÐÕ›ƒ¹~mX6霒K3Ï;¾a…¸®1u%ˆ7£S•k³øXyö:àu{e÷Žèêé…µ¾îÅp‚¤h†åxA”tzC¹XQµfö›§ÛX Ô€<â1 †$EÏe¹ã×z)v/Ù)æ$~j­¸ë¢ÑêôãɇwŸ¸þù½&÷¹^‘+)«DhDFQ£1±”ããIId uzW†ž™6ËižäS,(ÂE,¦DeCõ¥ž—ÓY´†¾m^½¯Áw(Œ NͰÂì÷±D*“+”*µ¦#Ÿt›€d7 †$EW(”«&^F«ÓŒgZâ=c±žm­÷Îã˜l< ò‡_ÒJH H¸ `߈VµR‘ׇ…ú 0 eëçqñ$xD$‘SH§’fqm?ì:ºzÓfÀ\_gEn`ÙÒæh®Çód¾,a‘Ýâw6̲Kwhâ`Sª(ËìߌNå8“7'Ú‚-­•üÖ®l#=Ó¸çÖ ÀÇKàë ËŠA 9Ì ;«‚ÃsG¤–j ¦¤Ä<!xÌž:–çÄWL™íÜ·.¬qé@V CÓaìP3µ“,9ï³|aªõ«ÓȾ›Èξä ìrÚyAŒ—‚ @G,’M¡NaÀ³"Uµ3 âЧVË’vS«œÔ‡šZ:zF§RfV·wØ9ÌOx~™ËþÿAq~üEVˆ»ÆÃVD$dÔ €a3Ï­…Ðè‰Ø¤w˜¯t¸ µ×—Z €æÃ9¥jZŠt™˜YXÙjŸ pråž)ãM&ÐgU ºåFøíPç—¯˜aí'î.°` °7^ÕCÍ: ›ŽÙò‡ˆÏ|„¼oKƒíFwö¹ß‡Lµ:uh½÷åÚ$p%ïÎP¨4:ƒÉª¢+ÏÕÁ8ÏYjÑH¼ Ý3±7öÎIóHèR(Ìw â “ƒ.\0B³4¤¹9­Z§FŸk TJ¹6ô÷&Ñcì-¸Z=Èk$Ir\w_vAèHšës,[©5™©Ÿ2fÏFÄL¨Ì?r ®p$f@5íeÀ÷8Ð[påÇú‘TÙ©_‹bLäðµ%3M 6î: ’¤H'ÓN(rЮðXpâ¸%1Ëf­ÊZó0u£áª‹¶|&óì{roËß¶=——s«ûKXÿAýÎÕÿÀ_þóÊïùìÍ|×–·ïpœsßD†f>¥δ­™°žrS€´¦à~¿ ð{?KÕb‹\¶«WIF`DK|¥4÷d §Àà$ ]Æû¿?¨ÇÇ ŸÔñit^.>{ƭ¼”a†aFU•±m­Š› y^ȈÅP™9øÎß°Ö,¸ò ’’{SõùKÄ«Pk,ÔeŠä*6¥÷{cJï·G'› }iy´ ^Ûéu1Ñh=rÙ… 牚Ï~²lÛšmÛ¶mÛ¶mÛ¶@è[6UDVRVYŠªv¨Õ¤Y«BA7’(03¶w\c¼ÐÌYRN¿cZÃl>.¯Aä¾#à ’¢–ãO:¾æ»¼]›@Ùñ¹¯À©DÁì!A1œ )ša9^¥!·mv„ÀÑÓë„ýöUÝCÁä—¤qwI$E:™v²@‘ƒvæ’a—¡®ð[pÏ¢k–\¶ì‘U3§vêLÂBm¡¶PÛŸ©¤2Ê~*÷‘ü‚íWÒ)—ðëÝNÑtW”ºC@( žx ”Å=êB}˜©„.³Ö6_ïp|¾~û\%¢fý6}ãVªƒ „B!„@೫ªªªªªªªÀ+çKåýž‰x ­†UÉçjN˜ÚS3øºúÜ¢VˆÜÒÈ2²åí´Ž  5¨µ²œÍîÅ$z‡7!HðI\EÕtôÎÎ~ ”Ÿã œsÓëÎã±}j5l)[Ç6½µ‘ºÿ ê=êºUUUUUUUUUUUuÖa/#f´Ód$rõ »ÍÔ ËÀnØÕ,¸ò½-@$Un…QóêÞ¯²”ÀɑȄÊpºÊ*ÜÚ@Ó´…¥Mgÿèxµs+} ó²KOƒ¾¦yFlª.G½ùrµþËÀo$u£Àà$ ]f¾l\åÉZZ“)ÛfuÁ )aBRic1¹3ò3ÃvUŸQ³Ž£aÓŸ¶<±ÂsÝþð¸? ˜õª÷~m¸S œTK†B¥ÑLVí°& Xy| ÆÉq"Sc3333//ù™Ã/6éVÐ;¶"uæ"Ü „ƒ¤°í¼³®î 7ÂU;O!ñïý'ÎazWjw•\;cº÷’sÂp¸\DDDDDD–EDDDDDDL.X’ä,[ZMðÄÅ(õe(0‚b8Qìš„Úê/K‘/""·`3Ã03Ã0 €ÀC @áTêò–·( ÅS¬›/sÄ}OõvOL´Ý éZt:Y«'ÛEèiÕו Q“šÃ¢öüe2‰é©Âà$ ]&Ñë̈́ɘ¬»,»PçzÀ²uÎÏÓ*¦–>l‘›Ð=ߘžzóH̓ÑcÖG1›‚̱jH™˜Ç¤§Àà$ ]&Ëcì-¸Ê)^Y˜fú£›L™fvA3³¯µfqÜÙLpš¶0´éØÕŸ^#} ó²Ë8 —F¾®fÌfùÈ>@$í 8ŠÃ,àÚ&”q!•6Öù“’‘óÉTLe†¥[$Wb7T†Ô%Ûë›n1ì }Σ4O;‚yš'm$ޏ ôžÄô´Àà$ ]&Ñcì-¸½DæÌgMú)cöü±*£¯)\VÃ/ƒ¹ÇvoW¾«I•Çú¨ÏÏ…¤H3¥®Ý¥e¾ž3ß4Ä ]áHæ)³o—ãš)Eã”’ê\îLú²Ýs±ù¥¥¿W¼ ¼Ò>ÔV[“š´IÕtñì'I’$I’$I’$I’$I’dKŒéMÓÊö y4ÕŸÅð»µ+ò¾ªö«=þ ÿæµÖv?PظOµûL¾|³Š0@`J”©xØdÉÆ Ñåëîêöé:u$!#####K6LÄ@FP 'HŠfXŽ/2ÙdÇðÌs/¼ôÊko¼Í~ÊUõÁÇÜ>>û’#ùëŸÿeÊd;|_ñuدÓ]~LÌüÃÏ}uU!×*Xä\( AFiFcbáããIcšn3`ÎêeÏ¡‰4™Y§Ò•k°hEÔÚK›gØ×íì÷ºzú†F'H`¾wè (†äQzi–ãQÒé '/å*¨Z ë­L·µ³]ïö88:9»¸ö÷èéq1õ`¿ZÉR@’$@$I’$ÀïDpÒy·Ýq¤*»$¬8ìdîôç ãÈ}ÃåñáÐâ:,<ø( Ž@¢ÐsÝnÙJ§f3ö̳P‹©{ –«›½{ÐgP¿Óôã?­¬$UU‰¼¯n‘KªT¦ûÄ÷Xë-¸dRõyR+‘-uMæ\›.½BŽ3°sœŸ°†@¤äSyoücoÁ•§I•½ûµ6^¿^™Cßù8|WÆ^’‰ÝO’$I’$I”RJ)¥”RJ)¥”RJ)¥”RJ)µV[z¾ŽK·¥ÛÒ“uü1m[£´b·™f-•Ú:uu¯×ÐÏ×öp½'žz¶~{ ÷œ½tÚ+z½ro´Ñ°qŸ•ôôýÿk¼é/8þ)Lp§ŠáIÑ'€0å;taà ò¨¤—†a9^%Þpòj®‚ª=¿ùó€rÓßyÄIc€Œ ä)¤âÓì諼Êd­]ß:ô£ÉüPßø–E\}þâøÏ1‹{–ß„¢)¨¹¶B'©PñV¾Æ±»…;Þ㥓]ïõ¬zß×°nãl†Ý·xl; )¸UÓ› MèK(eí-óú­÷ýëNàã]÷àq¦¹Òh4chÆÐh4¶Öy,žÖöÊ þÝ^ã$óÒÿÓe}&VRV ÅÁà@d ×ÁœÕ•Í2‡ù® !Ä%°!öu°{ rÝ¡@0‚b8AR4ÃrüÇß'˜þ!}_bU‰%R™\¡T©5!aÉ!AÉG_ßeÀdÍÎ5‡µ+[‡Þ`4™-[½ ûãØöOÝØ>¶ÿ×)+ÙV„Ø+)«„aðÈ(hL,`\c< P"‰L¡ök?·_Ënttõ¦ûÍ€¹¾Î ÞÀfgœSæi>ã„E´bɆ³6B–Â7¦»˜Ý[Üî}<¿üáÏüØÚÚÚÚÚÚ~mƒ‚ûŠéz’¢–ãÿ0ý}"¦fæ–V…:Db‰T&W(UjMG„.Ôσ|wñˆ“z!AÉSRÉi6=õ e²²œµ‡´½Áh2[¶.nvGgÜåöxß· ®­ˆÁJÊ*¡Œ0x¥ùëç]\c¼$ ‰$2…Ú¯ýÐ~u7ž2ÞÕAWoºß ˜ë×fo`ÏGáÛâvÛvÛvÛvÛ»"ˆ]q)oØUŠÝØvßé<Ð×þË „`Åð®"»†˜€¤hfŽoÌhbjfnaiU‰%R™\¡T©5aÏxwñˆ“t‚”ÜÄk‰ßŠFéÓ 0±ÊæõËpÇnYv¡ŸF&f×4;‡g6I—»§ÖËðužY1¹ð&låI’$I’$I’$I’$YNX_¿‘·Ga8AR4k% ï~MÍÙ«&[r0Ãÿxãã}»ÌÍ<‰ »ËÁ;2ÐÛQ—û¿¶¾Ã‘!Ëû®¿m˜ý³«–]DvQ0œ ©AG™U† 0¯ô—­3Iæcé³VéŽ]§æjõϯOnÝyâÑS`p….Œé-¸òô"©rd_õùo„z ]÷h½—\Ká›úŸÇýõÕS‡ˆBReŒ—Pý:§ÆœÚrjû8u8×RʨO8tŒÅ Nz‘óÆEÒkt¹-¹#ÜKdIåY£§.#’“)œtÌ»6¨i†õ%0ótš»ú ®¯ïe#Þø~†|Ì(&cœ0×´ö8<÷óáù¡>¼‹,¹2‘ÉÆg;Çd–²ž× Š@ ‚øÄ@ @ @ (Q @ @ ŸàpÔHA1œ )zòÇvù¥ŠCª¤*]5­No06 4c‘7ÖçÊÙôz*õU)'^?ŸZÌÌïSüÒlüÓgUÆ%ï‚Ê~*¿[ñ¥3Ùyæ¦ CíYÒL‡ë &%µñxñИ›'"Q€iñ‹é­T1ƒÐÊHäêvï)§tØ »šW¾wsÛ"iY(ËOät‘ºê¯y ÄŒˆÂz»qã-Áì2MÓ4M˲,˲´ÖZkmÛ¶mÛ¶ã8Žã8®ëº®ë"~þtþ² ,²TËn–e™§ƒˆJ¢[†Ò¥ShÞWØÔ‡œÈ$&ßWϼ òF\G Qè2ÁØ,¸Ê¼^E@nsm‘ºXR<ð°‘–‹1‹á(®OÄ$ÈjzWÌ,x£ ŽÉ=¥µWþ„ÁŠáIÑ Ëñ‚(É•ø*µuŒ&³Åjëèax»3ÇŠþÿÿÿÿÿÿ-Z´h©€6ÿwóXWÌ™p|QU53û×ÔÏÚÀ ûÃXUØ4d6êÍùÅ߯îÅp‚3A¢±Ä˜cÕË ïI­§Àà$ ]&ø1ö\eú»zŒ‘:ߪªªªªªª*Ô@UUUUUUU…¨ªªªªªªª¨M’$I’$I’$I’¤ûÎô? þŸh€].- `Þgœމ$2…Êë‘þ3¡Éµ8ÎùÇ|+µVɽËÃxóHHXé@UUUU­ªªªªÖªËW±vúµÖUgǵ ¶V¤ƒœl+¡¬ê<›È(o£1±ökŒ'˜˜äÊdMQLMóFmkºeê:èêMwšsýÚ¬È ,ÛÇs4×Ãy’ïÖ„EÒÅ%2ÊhdG©¬ÆV4qº©òHËœ1ëT‘³r‡šËÛB-[¹ÑÚímèèã9Ùï¦×~VGWOßÀÐ(PB0‚bxן`ŠfXŽoleŸ4µ©™ÅÍmnazK ¶jz³È7ÅÒ–ìJ}¼LËM­xÆÊáUººh¶ÐÞ_¶ÿ; #XÇ«:Á¿Ny¼.Äôƒ¸hÞƒ Á‘wF‰}:®‰Ï&mJé™ìý9™Ÿ ðS±xiWqkeYu«Ñhuzƒ±i†Ùbm›ew”¯èüË.˯ÛÞ§>^ö*¬®  Ö k'ˆ! ƒGà(4&>NÆ3m€Î`²f7æH“±²™·ÙkˆßrÇ‚ N)Ͱ/ˆ’NoXvZAÕ¶ÃÞÁÑÉÙÅuOô{ÄÓãy+®ãfüÿÿÿÿÿÿ?Úí9ðÿÿÿÿÿÿÿϵhõùÊïºJ~\«àB¿C‰`p2Ê]4&Ö_㟀š‰L¡¦×Vß-5:ºzÓfÀ\¿6+rË&£¹4ó$¾a…ø¹èÏÆÔM”,ÞŒN=vV®ÍâcåÙë€ßÝ.¿wÐÕÓ704:! k}Ý1ŠáIÑ Ëñ‚(éô†r±¢jÍì7O·±<¨!xÄc 'HŠž!ËryéÅî%«ÀW²j­«Ñhuzƒ±\àtmï5{“.k^N`€éÕžÿfлÆD‰g€Þ'\ßò[UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUÕŠûë×ÖÀ†²d…"2 ªÑº[MPžˆMz‡ùJ‡ÛP»±†@ap…Æ`qx‘D®’®UªÑ:ŒLÌ,¬lµOÐ8¹rÏ”‡ñæµáª!—½ÙzC¦­/!¡lo˜¿¥ª}BJ)¥´ÖZkcŒ1DDD¬µÖZçœsîÁ#fBƒ’ùG®A!ÃîSrô\ù®>@$Uë £fÐFÀ4ÕÚËÀ¯È ªN³ª‚Q…&‘$I’$%IRÍ*I’$IÛÉOl ]¢®N'¥Aô9ø&YÒÖŒîkkÚëq sˆ꽿œz¸¶_/ Xy@ŒdÉ=,ÆÌ¢¨šn˜Ö*;€3ã»ÛÞ¯æÀXLïõ4 *®ªªªªªªªwI’$I’$I’$I’$I€mÛ¶mÛ¶mÛ¶mÛ¶m$K)P”R@)Àø;4LeZäàK8ÃZ³àÊ7Z€HJîMaÔ|`q=QH’$I’¤$I’$IúkC FÃ[É–=%w<í 1.aïëÐ~¥3k¬ÚA=í—Šr:>Hë5f†yycaÿÀpáîKñFNù•ؤúê{R/òX„Pq7ægfd{5ÃÙnñá³s­š nÛ?:ÏX9 ‹A`J”©XµƒšMš´D„T‚.$R`h˜°½á$¼ c`Æsú,ØÙM×06×òD¦à …ÁHƒÅá+_ó5ήE@±ò¹[À©£0˜£! L(ãB*m¬ó!&‘‘#ÿ¤S¾yjäuÄ«Pk,ÔeŠä*6e÷}ø‡“Ùà‹åÑ&xm§×í ë‘Ë.9×-–­s†ìÊa ü€€þþÿÿ"x¯ê¯A­a©Ëäʉu5+Lu‹É<]þfQ3DfiH3²ämµ À5È•²Ôfw3‰°S \‰–¡Pit“U;{P¬|Žpê¦í“bŒg5ö_UO=Àÿÿÿÿÿ?õÿÿÿÿÿÿÐÛÉi·êv‚iÑ[ýß« h)ÁɑȄÊpó”™§Â< hš¶°´éØÕ™^#} ó²KOƒ¾¦yFlª.G½õ;7’±Q`p….3Í6 ®ò­@­ÉLm ³Å@X ¹#(†$E3,Ç ¢4ä¾{Žp€…|ÔGû&Y˜3öSÕgÔ¬ãhØô§-OE¬ðî¶·ïÖ:‘ÀIµd(TÁdÕhŠ•G¶`œœ'“Ðô8ð ÐÇp½ê}¥.¢»ûgQUµÍ´I»G’I’$I’$I²n CãŸ*Ó]BüzÉË;sj¹Œ—1™Œ1™LƆ&~ž óHÈÕ+ì6cPƒ.»aW³àÊ÷¶‘Ty¸FÍ/15UUUU­ªªªªÿÿÿÿ—a­µÖZk­µÖZk­õÞ{ï½÷Þ{ï½÷>ƃ&ãýÆ›- ƒÁ`0 Æ{µ š–>l‘›PTW½‚Ô-˜³>’Àˆ‰ØdŽUCÊð@G Qè2Y°Yp•S¼¾°Áê{ÕÄïü°ñ˜Ìl¬]k°vw­ÅU¨ªªªªªªªªjù:º*êru»qGêd$ŸAÓj®‹Ð£Ô×N`¤‰8i#ùpôÀe 7ÓhÁHºL¢lÜž?ÑègÎÀIiÛ¦MÚ¶MÒ¶mÛ¶m“ÿæEZÀ ÷fÌlÍ8öÌœd–õö² ^§«dݱ^Œ—ÀfsÌÌypRcÛ¶1ÆØ¶ÖšOšHkMD¤õôã÷*ƒ~yЍªOwt6ËþISU¸ú?.Ðg©rBït½3&ÛÅ%«ú <ˆÿ,¸½LÉq3Œ ¥‡¢ÌÊÉÉÃTÁĈs¶x’k_<£qÄQZI²@½Í…ÀÂç1`+è©Ç²i-: ¦Ò,Ë9w7—{ø0`ñâ|XWb,ðŠ÷Í:ðŽWLJ…ùÞðL-:‚{Ž#xRîyàä…ÎÚ÷Tt% zWþ%_'žÅ†Åg,ŠÃl‡ºçÙ(iÂýLÐ&… ¹8¡tUXCèìl ¬0)˜€ÃBmŠn‡“#Pé ²%c¬Ü¥“ÔFvÆ©ƒ\X’e\¤Ï€ 4H”¨‡•`bE$Î »'›ÀˆÖªúhvâG0 `W ¢žž5G9¦'ŒÞ[›ÚX:* |$màC pUÙ5µÎE²cI:±cW‡@í¥¬¼‹äÌøš3‡±ÞÑñÊÛŠdF¥•Ò«—¶ÅX `é%> GêœÒÑÀ¥­Em]ŠqÏʸ…F¢}À—‹"{èqOO {JáÝ#„Öv6j*À(Ò‡é@ùý–Ú`/¤Ä •eL#¸Z UpõyGôRêyV¤FO¢'CŽšú^ý|j.â‡ÐÅ`84=“…) £;"jEÀàWÕBÎ…f`-öÀG6»@cµR5‘jL‘§°/k[OÍE)ÍŒu…ÐÞÞô&'] î£b–!ËîEN“µ»¨ÀÙÂ{?‚.Ž9(”â'‚úUŸ¶Ø86Œì”Ò$@}/K©—d\ÏÁu$ˈ©«Ø‘Ná¼:v=Žx¦û$@…b¼ïKÀ¡‚#lÞ\B’È8AO ÷¢ÏT)ÀB€äˆ)ˆ[hfÐ…XΪG¨k(}ÿ—Ñ!3äŸ÷ß !€·îZòX¸ 'îV‹eu¿’f÷Vç°ÁòøÞHÄäa[÷¨G¸º˜üDŸU0ËÏæWÎ4,ÓæÞ“~Þ|l[€w¤Tœ‰p'IªWØL¶hrOA‰ì(Õuy¨›{ëFEBÅöûÍ!ÀëLKñfT‡)|ª4åõ.,››u¦ÎÐn6Ý|®)Ýþàë»í©D,“qÒj]µ¤TVb>ŸÛ^ßÞçTœ5;øç®•ÿðn=»qNqŸ)scؼ¹Ì…ÐJDº b&r8¡T;p<”…G²]GSJG/vlœ›$Ë y¬XÓ:!^ü@5O>kÝ»‹ê‡þÁý À Ô«  pr‚fŠ”DSö’J[~®j·9ƒF. I‹¶GMiû™èiq€¾· 2ò›qî`’˜ªîF²`É>Oä°"ÑéÙñc‚Wœó—|~þ¶FÄ] ‰›Aáux{‰•+7 "å¾ß]·#j;£õ¸ÙQy›§ÿÍòòŽ7mtÔ¥Ÿ_q˜â%HÄrWuä_Šßêõ$Xˆ>ýìJ­!ën´ª‡­ÆífkkÌž€°Ûmìn›ºßæî´¥{´•eÕQüÛ·ˆ ±kÒó*·±wŠUã&ó‚5~¤[—­Ú¨bE +¡Âñ¸™·PƒÎFÎ^’ƒ¼zÿÙ­R-»*Õ:''ÇREˆêíH”EEMÓ^ï}°ÐÜMFIǵi×Z5V]­öè5¬ÃQ#¾ç/û¿ØG´Ï'Ÿ}ôÕßÐ+íõ=³6ù,\ÜÄÌ¢X kÓ¶œ2#²ReEù[R¬ŠJUc³åžœ +Ô¸q wÖ¹.´ÝEÍZ´"±zw(/¶®.ûªã÷‹«[K¯VmrÿzÒ_v»±ÆØ5W\MT|cZ´jӮü6òëîµ£gÝ/q›R)¯¶öê-Õ¿úr³ é2;PVýo¯}† ég£ÆŒ—#[nÇËÓëñŠœ±ß³æÊo¹BhŠuÖaGÌ[pÓbOë”gEzô*J+:ÚˆêèeÀg6êC*õnuv*±7Ž@¢z³=Å5 «Ûÿ°JòèÏÛÐKI룿½÷„‡©ßdêpë’Öå~›|¹>¶y¬szpë¡+™§Í¬9ƒß‹èËé|Úßý£M 1œ )Úf¸VÁAÐ~äÕp„$E;:Ý6†˜¯:íÄÍôjjüVŸÁ‰†<÷ÿ!hÑÇî€.Æúí;e^þDx £Á¢.Ê’´B#:…ßKÌeØÀ ¾Z]°¶d­‡}a8+é10­ßN± Œ!a5œ+CÉ ŽßG¯ùÑýt™TÖx1VŽp[9…ß+›ñ0èf_R“™Œ#Ñ»O€{d%Ìt°QyÒk~µÌîûõ¾$noV¬X±bÅŠ•Veßn¯b5™"§•þ3º½„—;å“ëë”êµ¥ ó7io"ˆnÖñ6·éoñèv»ºE ë{Ã÷5q•0çl7²È†M²Þ Ü6/êË:ùs¦l1d㬾åUùÔ•ª.<ô5Czýgû:ä%‘æy¢lø¿;‹”÷£~õ{~†–ö Wãó4ù¨¼úH?t‹,©ý(lþs§tfŸé³Ïÿ.?{aåCŽú«[ì7¤ úæõó1ÝÑêßK¢ “íw2JI°;t­Kbp>l¼;Ö=ˆŒ.ÐÃðkZ. ¿_Ò·inB½Í¤<ð§Ëp±Œ.KtyèzÕ+™Ðzäô ÔÕ[V»š<ÜlFÏHÅJ )'úõ©¸Ô˜Ã4Ç9ÂŽÛ‡5YãiIÉ>½jÉÉ çÛÒ“8 úÓC¾·+ Û¸à;ÕˆÈb23銈0'€Xžâ´4“7ðM¡*ÇoM+•K}ë>ý™ügâ`’µˆÒ-Ïü¶ÔXÆÁÐ ôŸ„ü¤/ÙÿáƒÃRÝZþûÌöf#êkßô?è$HΛ«Z\³–1 ß'ŸÒº¡ËÜÉÉö>}ÔÿdMFÛN€éºíµÝÚ¦&Šæ‰Y¿Ùq¸ŽÃ~#¥yÇŠþ øŸÂ­)ºÑç{tûL æÅ\¯OÇöåþ©g¥Âe¼º†Ò/ïÍ Aúלø£öˆð‡‡£”CtڥŽ#·)?ï?,ÖÅn%)\Z².v;ÄÔ4›+ù­æ¸dxzW¬ü[øÉkãûf·¶Ð4ý)Œ›ÍÌ( Ãc±qŽ? ò’hÉ*âEôqýEO2[Õõ47¨ãÞ©Ýiuô­-Å•¼Ò:l.ñ‚HôÏ^¡‚nÄ€ÇõéùÔŸi 2Çb¹šÕâÞ¬]máýð¿ð~¿s¤Êœ›gbð8ô Ÿñ¯søeû{ Šßâ…s¤þ_ö„q#ûÃõȯk±Éã g5 ·I4m±¬‘‘†F‚+¡b[V¨®sÊŠnñÑŸ¤¾ø‹áïX%懫[j0°÷RGÔuÑù€Žµù1sáÛÔ¾ð¦©sá›õ}þe£¶Â3Ц@ü«Ù¼ûnxçÁ­§Œ´_øI,í 8bP=J÷ü¿¸û9ü.Þ§4½µ>—>Žý«|H`ð5¯†-Ü‹¿]­“<­²ƒ%Ã}<´(ë ¯ÆÿGqwãråm%1Ï}<¨Áp ™öÿ ýñWÆKŸŒÿôÝKÃêÖÚm–žmá”Ev<‡tìTú…‰xþï^k°»øá/:_k ×®£&;Q´A¸Œ’z÷©©]R—*Ô× BS4Õ›>*3oã·µR½G¹eùTg{R-ïm„Ž™b¤Žö!Œ©Ô®?^•óKMOn}†-Œl2S“ßÔÂÕ—"9d_lçùÔÐO¹Ã)ǘ ŒÏaÒƒ7¢9íkÃÃXd޲вÿ_ƥѭ®tKu„[¼±ŽÙ·=ì:Þ+Œû”†9:~4í¥‚ïrºê±|Ô–þÜg˜â¬Áz’ŒÅ:7°9¦Xö)‘ÚFÏ–E-ž )ËBŸ‰üP|-…õݿì­Ê»1Âî€<íÎ|Ž+µµñ;ßjks¬ÛÉylNZ(œ)'·P0?òÏŠãMÐÑXåõ{däû“ý+¶Û"€ÁÇ0âºTœ"­Ô纔µ;;ßiúäâÞXOÓâc‰#;™Oðår>´ÝFïÂöVÊ–Û®¯¶|––G'ë#g Ÿ~}®7ϸB§Êz•<Õ˜n’>]g©ÆM?kuª&É=¡lgpï9U ÙG’znêO¥]ÙHùFJÏŠî7ÀWÎ=óVãœãå"FÇAYnm{r‡%O­N’¦ppùíÖ¨®çÿXHÏð¡ÇëVaEUôÅRî+‰&‘krwyKþ쑟ʯ@‹l»"D\qÀÅ6ÚÙî%Hã îÇ ª2O°ë^ ý™¸æ¨E¸.¤dg¹â¶ìuË«b¬· ãN+”mHCs*Ê¢ÑA3›=@xÇëUgñLñiOcŒ Ó­‚ö=Qñ®§ª•k»Éî ŒnšBÜ~5‹5ü®NæÜ={×#Œ­ØbUxÏû#"´ Ö-®@ò¥V'øKsúÕ-æ„× “ÉZ®Ó0>½i¾qÏ®GjÆö<`v=ª¸Å33©ùŽG¥Br„;sèiþ^Éœ°¦àG¹©¸ÈwÆXH\·À>¼V¶•¢Ýë‹}:Îkɺùvñ™äzû7xûÄJ?\ZÆzµéX?F ŸÀSI±h(1ÎrHô¡ bqžçšúkBý‰µÛ¯©ëvZxî+LÃð!GëKâس\²V}W´Õõs©·“è9eüȪ䕄Üz3Ⱦ iFïÄSÍ· <±éÉ×Ð:·ˆ4«iãŠ-kJP‘€Ê÷±‚O~•ó'¼âL-µÍ>ïKóIÒ©)v·Fü ®WíL8;N8ôüëžQÔÑJÛG¤ZÜ b3J&„€Ù ×ò®¶ÖÚÚXÇÉœãï-bé6’=¥© ÊD¿tãì…Pi<*èĪÿ2>§ø•«YÜ Â“ÆeþÙ¶r›†à£vN=:~uè‘F$@ÊF¥x…¼Ká½n8eÔgÀr“ ’@bIãÖ½¿Áö‘Ýéý•${#þ®s&ÀW° ö¬?v݇u} U¶$Ô㊚;cŽ™çÒ­-£,¬G¸ÏøÕˆaXÁ0úâ±pkr´3M’>7F§ð§G§)dÿu°+im£lìqž¼ÔÉn‘Œ²ä²idGo<_vRßï/øTÊÓ+`¦GªžMmÅ>”cšãÈsü.§úf´#ÒìîGî/-œžŠd #Íh•Ñ-´aéZõƉ} í¤·pÑÜ@Ì’!Æ2yõ þÔÞ<Ðv§ü$Ú1ƒþ¯PE˜ŸøùÿZàåðÌÊ2b%Op3T'ÐÈlòòOàsYúŽ£5ŸÚÏ0žÈü3ü¨»è5ætú‰5 ]‹Í.úãO¹ÿžÖÒ˜ØBA{W·øGöÁñvˆ!T[mnÝx&eÙ)ï/r | þ!¾™ÊÃl±ã°š»k«j$*Ífî¼ßÏ4Ôœu@â™õƱûmëó—vafH^VªÒ¼ÿÄ?µµ°TkÏf‡$-¢$$~ gõ¯[‚ø-ÆÝÁÁÇåHÓo­K©.ࢶ±¯âOêž*”K«j7z”£¤—S4¥~…‰¬7µÙÝ"û vðwã±ê^ñ÷Ãk=&uo‡Izƒ q­s~:íÜp~”—½Ô§¡å|<ñœ&lo-J–ó2ã‚0j’-Ü3¼7s‰£8eŽTb? ׊|:ý£u\ÆÖ:”rZÌš}ÞZ =xþî¸>õïÞý¤>ø»RûV¥¥jS8SÎÔ²«K§_F§«bGæ3]T  Y&Im•†à×òF=Y@¯uøSñßöq°µ3k{ÐBÆQÅÖ¾ŒÐ¾1ü7×-”iÞ1Ò%ˆŒk🆊޾­äŸCóSâ‡Àˆ~$øYô‹MbÊÎf•%©Y:g¡ç5à×ÿ°l¦±Ô´ËÀÇœŽ¿¦Öûñ'á¿„>'éÄZk:P¼\´L‚~ nýkäoüñ„56Š8&ì$–û‚‘ëÔÜåK ´gÀ:§ìñJ$®™muŒÿ©ºAÿ¡b¹GàO4ÒÂ_ Þ¹L(%ÿÐs_§~ø9ãoL‘¤—b"yw¸m£ð ױ韱µÄ–¬uKÁ ¨ŠÖ9}rA?¥i’—@²? í|2Ú‡b¹“@eÔ`ŒnŠîÝ£mÝôõõ¯¡>]]j4›©l˜etXcP°ÈäžÝëíO~Ìz¯ƒ4ɯ­u+}^8†vK§Ê„þ1³ãêExµÏ‰¬4–0_èH ðZ ½¸ÿ€²çXTii#Dû|~› ž*_-ëu—MòV4üdÖž¯á_„÷rO¥ø×U·™GËkw¥yŽÞÛ•‚þf°cñ?ƒ¯A ¡lþÑG"ÿ㯟Ң»³ðÝòŸ³êi¼ôI`t?ªãõ©S‹ÐM³“¸X#¸‘a,cìw]¤ŽÄŒœÄÔK SËç³þ«wáøÆ|§F_ö²_@ÄXtÁ4⺦.`vWB%Š9N~îv‚3ÀïÚ³Ú6V?ñ/(¼™€­DÑ®O sõþTó¢ßŜĤüñ­îˆæK©•=ýƤÞê5ôBڜƶ©8É9ççVµ®ž_eÕÍä8ëö›`ÿª–?¥} ¡kþÓt‹›-kÀÖúܳDÑ}¤ßO ®A° ^?ݯÎÚ¶â}fÈ!f†æX° V#ù Ú4à×b\»j}‰¦þÐÞÕŠí'ûÌbcÿ}í®¯Mñö“ª.è™dø q üÅ~yÃ7—"Æ:t5ÒøædñV’ÑJ±ºÝÄÁž@Š>aœ±8éRè.Œj]Ñ÷ü:Ε9ÁŸÊ'Œ8­K->Æù€Žêfè €OçR·ÀŸ^éÚº,/jrAÂÄ |þ•ç·ºmö‡u$”qYH™Ý¹Z6þuƒƒîS³=Aü,±ïSÐŽEeÝø.h>ê:‘Ó®;J×JÇæZjq¾)'×­lÃãŸ@>MwÍ—h%?øöhä“%¤O.™{h>YhìÃ5öõ¶Nê2 ZåüýTך^\N‘íŠyWtŽÎD„’ÀìçðAZ®TJœ™ôï‹t¯ÙòÎÔÇo©k‚d~ÅI,}ĪåŠð¶ƒ¥·A»½º²#­ü)ƒžÛ]ã¿ÁË¢äͽžLŸ™É9ÍXXæqó;/¸â²K›E£U§@xÏqÍD×$© F1È㊯£1åÉ>»¹¥ûŸ¿Èÿk‘YÜÒé Žæ8#XÑ£€ €)ŸmV<>㟭H!´@2Ã=\ …îlc71qØ05Wl/®‚ý«~pÌOcŠ`Ô$€¬ß—øÓ[U°SÌùö ¡:ÖŸžN}Âi]ÛB“±ñjGlã½áŸÞxRây¬–"ó( &BÜ{Ykdy îþUb;gÇ˸ úšôÝšÔ唓Œµ4µ?]ë:œ—Òí†W ‘TpÏ^+Ù¾j7]ç™4’…˜¹‰ÀÚ?Ƽ,[LÒ"’ø.«Éõ#Š÷?€Ö.‹~Lgã°ÿdVrµ¬+r«­ç»)˰çÔÓ㺼µ9Šîx±Ð£‘Êœ–ìC¥Jm¦=kžÄÜè¼=ñ“Ç~eþÍñ^«cþY‹§)ÿ|“Ò½Eý³>%骫6¯ úŽ×6±“ù…õ¯xp3Ž=*´äD '£^Œ.}&¿·¯Ž ]¯¦è·`Œ$.3ù8¯.ñ¿Ç¤ñÅË\]ø>ÆÒáòYìddÿºÛ«Ì¾Ö‘¸gÞ¼ly9ÏZM7£͸бúøiYÐpÄU;ÝÏË}JÑË™Z¹ô‚¾ ø‡ÆßÙ4ۈІùžÚå¶ê~b+è}öBºŽÜ ÿÌîG(–°¸Ï¦J_~4ñ¦³¢xÆî;=BXDev#åùA®›ÃŸµÏÄÿ D©eã RnœÈÒ¶£ßsôOã?Á+ï‡VKqní¨ÀÇdºpTÿ¾ÒbAú¥|§û6èþ-’}A|,$ÎÏ$±_Í9'“†Èë^Y¨~Þ®-žÞo^\@ã î%Sø>k!?mŠIl¶ñëqÇà*Y¿¨J©)ý–/CÐ5OÙ+ÃPXkÖMÿN÷Ì£ðlåî?f=O¹óm|C©ÙÔ\iÌÜgÕx®Nïö™ø‹ªϬ3“è€,UŒ>3šÞIeÕ%‚ßy±üêS©Ü­¢î¯tø— ¨ >v)ÿ¡b©‰"˜þêå?Ý`kægøÕâWR%½–E=¼çÖ¢²ø•us{oÌç“¿ÿ­G+¶¨HúsÌïÜ$j=H/Û¬¡=ü\zH?¥x$>'¸•[‚#<^ßJ²šµÛg/<÷¬[±ªô3i·›Äzeí¬¢e{a•õVo輋~É_GaÏ­{d·]€%އý4@Øüê•Æ‰gzÍæXÛê*ÿ!]®’µˆp»û;x¦ÛL×µn‘æY­ÁEE 7|ž:ƽoÆ^=Ót-0Ý˦³«|±+¾Ð팧 y›áëMçϳ‹È˜¥ž‡þU.³¤¾¯l‘¼Í…lÃ#8ÇJÆsŒ¥w±¢‹Hí<ñoÃúÞ¿–«Ú_è ̪_?uþé^3ÿׯmø£ xcÂ~ðýÿ…¼hºû]nKÈã`cyö‡Sšù+CðtZvµk¨\l¹û;‡ò1ò¸Íz7> jþ8†u9bx-åy`†+h¡HC*.Ð#EÈ5드úÕÊtÔ_(Fm6tøÎæ×¥ä/žz)ªrü@”p×xöGü+ÏD#jí¦˜±‘Á®[£K%±Ø]üE@áf¿p¹ç2öüêµßÄ- HŸÊ¿%ÁÆe‘@þuâž=e¬)ýسÇûƹåu²’ ã<ý+¶4“I˜¹t>гñ¼ð‡Šu•FC¸~b¦ÿ„º z»×¼ïÁðãA¶qÎüœãÞ¶Õ àžrÉÙÙ#x¤ÑÔCãu¶f•¢i† Úê08õ¥oˆr“òÚ[(÷Sþ5Ⱥâ,n@Îúôàºþ&¥M­ŠåèŽ:-,‘ž;šõ¯éá[$Ö"ñ%Õ¥£JÐîÇíJUKùƒî0\‚¼ñÛ®1X3éšuÕΗm¤¼òKqQÎ/6 ,pU[¦ÌíÁ>¼ÖµŸÃírgh¡Ñïf“¡ÀÏ϶5Õ&Ù–Îç+â}:Öoݽ€O°6 Æ•_,Éò€; cŠú[öYðHÔ¼%ªJÑ‹í ûyj­xž­àkF‚îô{ûHRdÝ$öΊ¿0HÅ}™ûi1Ü|=Ö$`›.OýqŽ…+ÙÏcI~DÊ“úS$ø_Ë~éþ/ñ×…~ÞÛZk—­g5Ä~la`y2¹Æ~P{×)¦~Ð߯ïww·9c²3§aoÜr $àԌ¿PåðtrF~A‚1Ò¾ý¿>ÞxƒÄqÐ’;ó¢Û»jPÂཾò¬›—¨Ü2G¯¢’j/R ›zÇã]zùu‹Ð:s;úš½oñ7ÄÖÄmÕ¦`?¼¿˜¬©¼5©§Þ±¸_,Õ94›¸¾ý´«õC]/S´³øËâx_þ?NŽ%þ˜¯ªþ-ÇÄÏC¬Å[íF;+ÔƒÅ|O‡‹g6²ùr¶Ô;zœôö¯ÔØÃáeî›ðCþÔ°šÆòw¸• ¹ˆ£€fp¤‚3Ê€G¨"³šV%Ý#Ïfø{~¿uw\VU÷‚u$V>Y5öLžM¿êÇåYWߣ*G–?*ç±lü™ø¡¤ÏŽ5XÜë.ÓŸ`+’ÆT?_A|lðÆ~/ø‚Õ oÚ!Ç|]‡í?ðCß<=áë½Mk‹éæI›+‡TT9Ø÷nتöªö:T•×Cä ¡|€}x­­Bk¹T•ã5¿£hP]x‡JŠà/÷Q¬›þîÝÀûWÒž2ð×…À×Z&“kgw¥´³Ä 3þîBvàò¹çé^N33úµjTyçvºÙz— jI¶Ï ±ðœBÄx5Š´³Ðo¥Wúý+Õ´ ûRþÒÉæT„Kpá#BÇv=g“ØV_Æ Káß kq½ÅÉ·¸û!’Öu‘ƒ.YqÕOfé]›”¬)-4>_u ã5oCˆÉ«ÀéžDðdç¯øJÑ®5è” ãôþµè7h™mûK¦h—wiñG¸ngµq+ñ/US·ýðƽKâÖÔg+‘æàW€ÇËÜÖtRš»Cm£´OˆºËp¸ÿ€W£xZâ}[C·»¹ÃM&âÛü«Å!$ã¶+ßü¦J<'¦‡-Ãñ$Ò¯hÇD8Ë]G}˜ŒSŰÏ9ÏÔÖ¯öd¡—å=sÒœt÷îŸÂ¸õ1^ØOÍÇ1ÉmG–Änàg–=+e,çåêyâ’[ÃaJžg±hÂû0dSHmÀtcé[’i¬IH? ºyTvÇ@O4“w°[CÂþ"ÈO‰yù"qõþµ‡l>I‰8ù1ÓÜë]G‹FÿÞ>V ϲTRÀô<¨{1vHäêz†m4=v‚|‘õçŸëZ© ‚xœŠ“O´òôëXñ€‘(ý*È·Àõ=Íy²níI=,Rx‹ŽO4† À}+@[îÛÈ4ñkê+=^Å­u0á´,±`ÍÆÞ¹í|פÚ~ÌŸ4ý³Gàýv̘6$‰¿"A¼¿­|9©ÚÝ Vâñ-åYZ5±xR ó;ô¯·ï¿ik–6pÜj+¥jSFŒ-.Æ×ùºsÐþ­Õ[-Iqv?:>#øgâ^‰¢yà×íYUí“KåœôfÚÇ€qϯjõÿÞ¼Ò¼=«xz=KÄ3\k:}Äi÷s©±¸žHcíÆÏQrÙR=GgûPxÖ cK´ÓtÈä½´Šío'ÔáV‘ȃ 2Ì¿¼98ÀÊ|ø·aáß›}ZQ«[ÇcæZj|»@[aÞÄŒŸà ;æ³sªê%‡¥J•«NsÜá<ðCâ^‹&¿§øB{K6 /žF1Gj-ãŒ;ºH1à0ÈRNxìkCá_Ø Ô¯eÔB[ßù³KʉQ p ¹ävcë]“ü-ø£ø3@¾ÑŸG´MVÝu&y'2Ù´£q…”¯r^#¥rbV"M:-%çÜèÂ:¹¥U;¾ÇÓ~Ñâ×4ËËí+Á‘kPC*ÇÖÑÆ¡ºoR»ƒd {VŒ|eðGÁ~¹Ô¼}áoAžÚak*K¦2 ‹’ 9¬À“€$*Ïìï¬ø«á…ޱckæ]I¿TÒH‡Ýûè œžp«’W€›_· x'â§ìï®Cy¬ZÅ}¦Ëþ²‰žuȪÌY”‘Ó9ÁÅkF¬ÜR­¿[uãOÚ~ïfyÏìóႾ$ø‡ÅºT°]YÌ‹ýŸ¤KlÆêÜ…Ë™”¼í;H8í׺ÿß¶]‡Áh¼5§ø_ÂWZåæ®$M{¶P.ÖBè îÉÆÓœñž þ Ùãx3Á¾-Ô'¶kW’x-bQ i$›بáwÒÔÿLóäÿ´ǹ¿jŽú…¨h’xOðûO¹»íÈ­±‹J¤…äW Þ~b9®ˆ·fÒ%Â.ª„¥uÜ÷ï~ßÚç‰4Ý.ûÁÓhªÖñ½ì÷Jè’9!Dl¼Àq9äŒq“ê_ÿhßü!‹ÃGSƒQÔeñó-!Ó-Ä­åàí’pÀÎN¡¯üiðçÄgƺN§àw‡_ðÔÖVÖV×Ìгcýär»¾J³ïÁ\2€x½CâçÂóãO‹:gŠ­üLÖò=«\hÓ)ytÛxÔ)E]¸e,eÜ;¸À9ÉÔ”¤ìÎøà)Öp„tïÀòïþ¶×¢Öuë&þÛXÔµ‰¦¶½’h^$„±Â`R«Ü @È5ñÞ¹ðûUðÀÕnoínõÑ (–çíƉ•%GLfŒ1'8ãXЩ:\ö»ÜÇ:x,CÃÕ|½5ßúó"Ñ~iø›PÒ¼G¬[ésÇaö‹R'FHÀÆFC)giÏo\Mká[½ µ›mOIR€Ç¶ÌïrpüãîóØzr:×aeáßxÛÄžÖ®tÁ®é–)^Çmp¶&TóäfØñ» óˑʃÛ|Òõ/Š_<5ðûÄ-ªÇá®îþÉåÎ[$1¼¢6rctXÉ–8Œl°¾íêÊÚ_ÐáxªS—îct›WîyâØÏdªe?BGñ¦Áá›k:6‡ªÜ›=;Q¿‚ ‰ƒ„*…†pH 03Ü×é^“û)øÀ‘ßëvk¹–;c¾=@Å4m±WæÛå‚.:ž9¯•m¸l|=ã+- FӬ앴պÃkrJÆGYÜ6 c¡³\žÎ2©³JNís#â_Ž¿ôO|O¼Ðü+xu 3lFyÖV Ê2¥€¯?B9=jø ÿDñŒ0ê1ƾd,Q’Uuc•ÈÈ'žk¡ñÿ‰âð‡DzÎgâ;ËÇe‡ZºµT™CƬÄ8q±&üà|Ë´ž+üðœÿu6ßE C6OvL‘»t)aþÏJÞ»‡¾þfŠŸ¶›TÖçcñßGŽ/†ï§ÀÑÿhÝ´~L€wl ùÚ{…Œ§v #¥~“~ÓŸ¼àøâm-þ!I§Gz³iöÞM³î@xN×pÛrÒ9 dŸ„Ÿ¾|gñ5¥½”SxSÇÖ°¥Ìé$ ö[‡Ly»Pôf$’»TÏ84Q—"³ØÂQÒýO‘õÿÙïÅÚ›_Gg©§,k)º±™$Ž@\î8õ~µô߇>ßèÐ-u=6çOº:|bº…¢~Ps‚ëšúÚ÷ᆕð7K{]SÅzuŶª‚HÖiE³­Êò‚G¸)EÚÌIÚr+€ñÇ‹&ñÕÜqê~©=êZi’ÜÛÜ^ZêAÃdÂàí p§ª…²:ä©*ó´\~gl©áùy¡={(| ¦x8⢛ÂqÛÄòKµ#A–wáTw&½£XøEâ½.;IRïK¾3†>W‘$D@뽉Î};WâƒÓx•dñήÞÒ‡ìö–îZ+Õ!·ØWVÈt8¶»Ö ¾ÚX…¿îFGlþ©ð÷@ñMî²gÑ´Óª$k$’ÛÅ.¹|üñ À†á5Ð|;ø'¢-­­Ö©¬¥Ñ¹;­T´hIp 'Ú½·Çž ´³ð‚\ø&8tÿØÈ°Ãuei2Çq ¹!3†`[©N0 'Š…ùb´g|0•©GÚ7ª#·ðO‹µ–²|DmšŠâY’'sOÍÏ#21Ÿoø%£ižð†¥¤Æ-áºyüû˜-ÈÚ“ ÀÀà:g½~~øcâg‰O‹-_VÖïdÓ^A ×”ûLòpg¨ÁÈõ®ãâGŒµ ÝøOYøc¨_Çk«5Ä·–åK¶wF¤Ë·çY0OAŒ`V’FÚ“V8¥*. A>n§è<¾´ŸNŠs¹d‰$ç­r×Á§E40ÅqÈr뎧ֹ7ö…Óìlç²Õí®l%´‘á,#2«…b€ž@ã8¨> xæÏJC#Gú:Üíò™[ià“øW“7b¡JRÑ#±µ×.5»K—ºeg‰Â+ é_3ÞøÎÊë㈴+æVs:¬-0Þ¹Ú¤.?»øýú7Yø÷g¡^®Ÿ¯[èïc¨,Ü]ÛJêÅ–\ ¸,O¡ÈW‹x—M>"ñ¶Ÿ­YëÖ×·7ÄLílÂ*„¯˜m\Œø‰ª¯Š¥ø›ãMON¼žââÚÛG¾°´Í½Â,e² ʡݻæá‡¥zWÄŒ?Xäá5XÚªjŸ#¿CŸ-ÂKêFJÚ«™ÃüHøëâxÛÅZˆüO¬_XZ_´6ßbŒGå,Ü Ë:´“ñîx5ä¿üwcñCÆa«êƒMÒíÙn'cº;fy%'c¾Fâ8' 3Ü Áø¯à¿ˆz¦›7Œõ½S½½šÊï]¬î#v;„qËÀ'<ñŸ†eÖ½â7‚(˘,^/;‘84„Ÿl×¹á>dºioÃ_Äú ®QÑÖýu¿ã¡ôo~éÚ·Â{étíÄ7Ú„:†‹glH’唃>å#j6Íù@H<‘ž‚‡ÂÝ%|#§Ía‰[µF–L9’'å‰) qž˜¯ ¿e øSÁÞ I‚øŠõÙå‹REÛUR=Ãjå@'hÉ$çŒW§jßtŸOö™KZ<047m ä,HÜ¬Ê åN\6TŽw šåÄGÛ¿uèua1RÂÝÉ^èüÓø¿ûXjŸõ¨//,íBéÖâÂÊ-Dvà’¸ŽIÏ语¿cOÚƒÅþ?ûv“}àM/Q0ÚL4½i4È Ûp;GP[`†$œñùÕu០øòëK¾‘AÓï%µ˜•ÈWG(xôÈí_X|7Óïn¬ôÏ 7Šfž-j7‚É´W6ÑXÎTˆÚFÚ Äû¼g¡º•U;F"¥‡uã)ɘÿ ôâÄ8õߨÇâ Kûf¸i.§1†lpQA · äv¯¢4ï Cñ+ã&áUþ ]{'H@d³•þu¨ß(ÁÇð‚¡}û,?ÁÏiòiº¬—Z”L¢î32]Jßë%ˆŽ€±$†Ç^½3•áí/Çß ücmâ=OOŽ ɾÙn¢•CÌ»"yp ŒîuR$*œx&(NRÄJRzlq£,,cM{׿á©ô¦•w£üø{o¤C©Üê©¡ZËr×ú–Éæò”´Ñ@ÈÈÀ±_2Ž:ïí¯é^!³Ðlî-ôÛÕ”[½Ò dŒ)ûÁð¤w äqéZú׋-¾,kM¥éP ›©´k˜Å,ŠÙY!`’xžW ÕÈÏá±ðkG»±ÕlayiÄÈng(1Èá—œôÈëÒ½<Âu(B1¡÷êqe´èÖ›ö³³õ±ã´§Â}cÁ>9]jêæm59æ–Õ£o-áŽ96ÆŠŠ9\co 3ï ¯à›¶cÃú¶·¤x§WŸ^m>wQöT^9‰°Œ0Ý‘üGœjOˆ_'ø‹§¶±¡jÖš…”vQ´Ö*ªRÞM£sùÀÁ>§98éŠû?áζú¿€¼=wpžMËÙEæÇ´¦Ö 6’Húk–5ÔéFkçÿ МU7 ²ƒÕxÓâÛ|0øiá_ H<@šš_›èµ»r¢•Èl“Ééõéá:_ˆµ›û¶é·I~` ˆ$r’*¯ #s–iÆr0IÀ¯¹ÿm_x{ÆÚo†µ-KÄIáÿ±K-³\y+.ñ"‚uà'¯s_øCá:|7ñ¥¯ˆl>%øCQµ·º[¤µ¹¸h•ö¶à$BŒ㑸Šúj¬¦¦4*E)nïßÖß½ X•R×£:TÔôŽztÿ ž‰´iúfžˆ©4lÙ•ðÌǩȒ:×Iaûêz†£7‹ç³¸²Ò"šI"ð½ðU¹¹?0,I|Œð@#‘× &÷ˆµ¯‡8øË¡ë–~%Ò<=â{ë‹t]Ãq½Õ¼ø` £b£>1Œp[9ä×Õv¾#OÌñ~ØXí1ƒÑÜg£‘_;Œ©Nî/…íÝy_˸Ô%J¯µO^«t|[{áK:Vª¾½Ó¼9¢Ý6c´‚X£îé!p¸%²ÜÛ€Î:ù×ňž2¹ð̾Ó/$:w*ÃPŠQÉðÅJu`§fpx‘Þ¾ßñ7Ã+Gñ5öâ8VãKñ=¹„Ë"ÝÄÊwE&W†”l¾9Ú2Hjñ‰Ÿ ô ÃÚ_…a†çQñÔ<¡"ÙËÝD\®ã!pP¨b9<×·€ÂåøªQ„¢ÕH®Žéÿyßk=ü7ŒÇ©ÎQ’å•´ÚÝ,Œ¯†ß¾+hÞÓ—XºÓ㾉v\ÅsÜ2|ä.éq¸ãàãð®’/‡_sè(8ÜZXÎÝçùÔþ3ý¯m¾xöÓÃZ¶‘ht½BÖÔÉ4«{å1„K©ÊÒ!AaÇCÞ´´=?KÖ¢±Ömm£H (ÙÑâ•IãÜ}=«Þÿnßxsá?ÃKÒl®®ïu›†Ý}¨ÝË;G`¸ bYyÙÓ5ó€5û{/%ˆ”4Á‹‘èOùÇ%R¦;k~ Z*òŽñk¯s识~>:EòGqšØ:»E¸áñÜ­{o<§Ýéúýí”Òiú\š$¢K%‘dýÓ¨å€å°FÆÞ¼×ÄVZÞ¡mÚå¦Ü6ùí^ÛðËâ íͅ࿾µ‚XpápA;³ŽäqÒ¹]ÇÞŠ½Eãi¸¸Ë­Ïœ|Oq#OnºM¬Œw­¶Y³ÇP­dÇâë¨0Š…’0ãHö5ôï‹>"x?Qðö±§k—©,—–r¥œÞAyc›iòÝ[—æ ϦsÁÅxfá6âJk§Óõ(.’Ö?1Ñãvž%*HƒW9äc?Jö¨ÕöªòVGÊÔ¤¡nW©SDñn§¯ë#Çyª^]HÌñCq6çvÉ$*ž¹=1ŠúKOð¥â+FöOø…þ×)lu+#ìÛƒ²I˜ÎG#v½Sþ ÙàðePH'’âñ!Iö€Ì‰q‘îÆ¾‚ñ^–Ñ#’kÏÄÔMòÃOëÐÞŒœ½¯­ÿÌù¦ÃDñJØÇmyá« Kxåˆ<ö²3(|6ὺ!n˜< w¯ñÏÃ_‰Ÿ‚ôÝ&ÂÇMˆÛÜÉr¸"–7$žx[ÌsŽÄ~Pëå–V]ßsr.âBô®h×?1Î*£¿-½—t_üXðŽ›%Þ«§ÃwfŒVêX.îHÖMÀœqÉúW[Ä-o]¶O]ØI¢êz¤›kÄ–7Gc€A%@b¸ÁìO5ë¾"…§øeã[e-Dè …`ÄsÇ!Hükå§ñ-Ö•â};ÄòÆ…ôéa–Wl|±7ȇ댜z½l>"ubÛ8+B*J) ý¡þøáV½àýâ èé,°ËpHùžœ„S&Õ°ûÞµ™ð«BÒ4=v_´”Ôm»ÄÍW¾ÓÔú©|a¯x£ö“ñÝ狼EÍݲÇ*ÇX¤h£¢˜ó’NrI5ÆÜ¾¡áÝNvF1yg$7Ý"±­z¿ºO^§~¤ayÚög×–ú7t v]ËÆÆˆ,„Ní1—Ë~p‡¢’p†+ÇGÍŸ>éÿþ$]iv» ‚8åŽb0¯¸r®¿òͲ:r:pkgAø­áÝOÁú¾›âMçÌ–Ñ¡ûnž‘¼ˆäª¬¤>>ïûÜœ3‘ã°†ßV›ì7Sê¶A€K›¸|™êÛ÷ѪÂP)9IЧ^*1GܱçÂ{Œú&³§k¥í¥ž‡-½ÄÖ …Éc6TÈÊØQ“€>fèzýì±ðÏI„Â4+‰”HÒ†}Jå ³rHòÝ@þ•ð?ìñWøWñG{+Léú´ñi÷ög»…±½IÊ‘ƒž:ê¶½åùͰñYâ\á+'£Ôጜ­.§žê¼'¬ima5…ë[ùkƯy¹UF ”àÿ:ñk¿Ù;Ã_¼cqªèŽ«h/mÈXä˜8Eb7¨ ÉZúF-^W"CŽÜ×ñÆh^Ô5Ea¾ÎÖR‡?ÄGÊ?ï¢+–“”]£¥Â¤¥$¹ÏœôïŽÁ,?b–êÿF¶˜Ú‹«…òÝŠ1ƒ)8<xŠú\þ×> 5[«¢ÚN¡%£ÁÕ“¤»‘€vÍž›½ò@ɯÎßO«Y_ZÜM 7š6¡o7O$¡UIϘD6U‰<ߊâ±ÑΓªø“C´¾BÑ˦_j0¬ÍfVFF`OàŽv×äíâ [à?ÄíCÃþñ”©¥Ïf³[êšEáMepÒ'uÀÝ´ŽF26ž3óœÒ Ó#;M,îwIêka­Ó'ží3ôÁ.¬þþÖzÄ·“[ÇáßÚ ¿ë~NñómüÀN\mØ$ÿ+êHm¼9ñ{F{ Yๆp–¦+œ}ÓÓïÈ$úƒùEðÿÁþ8ñF´wšÆ…mãz„é;^JÉ”o¾øÓáψ-›E:¬°™dH K2ð¦A, ÊAÝcÓ8¹ã϶¡à½bëÂ:€ð׉,ížf±áíçt%¤’÷‡8¯%ø7ûZëZÇÅÿxKU±šÎK›Ÿ&tf”m%CžÀÜgǰõ ¹ºžI­­ÞI¸‘Œ+—ã7ñÇÒYÓ¢¹yw3Š{_có•'×þ.As¥ø¦bm?W¶x^KˆÈH$)$`”äc=9Îëó.¯û1|QÑ5?ìý?Ljº7Ó­dœ1¹ÎzcŸn?i…´GF_ì‹YV1Û¢6£}Á¯Ÿ?f;]_Áß|}á-SXÔ¯…¼d@º…ãÏæB$)q8%[æÆ9ÅyЯ]Jë——Ñßó=ðîž¼Üުߑð'ƒ¿e/Žº/‰tmlxZˆÙÞCs¹‚«©G ¥·qJýHÔþê/½æ„­cu+o?hùcEìŒr}ù qÈW¦‡+& ®[ãN™ªëüYm¡_ÜišÉÓå–ÊêÔâE™õ }Êíüj§?nÒjÇ$$éìsº•ߊ¬4if×¼1k«ZØH#Cy†qŒH¬åY@ AlûØã¯“xº?Cws«xGâE†…<Íæ?‡ü@VæÔ7p“ VQ謇¯Þ¯†m>2üGÔ­¤–ø“Y…8xnu Ú1ߘձùŠåµïŽ2Üf+Éî/®‚ þ9¯cB„ˆîÑ•JµàïkŸ nµmâ+éî>)/ÂÝzsn±C©Œ×*w}ßž |Ù Ÿ^+ˆøµâ¯ iÞ'Šm ú[O²¡…ü43dË–ÉP€€IÜHœœ‘ÎOÄ:F¥sâI™ï5‰4˜3Ä{L¯Žç¨þué–.ü k›ÓêPíGr"nSÇ*Žœ×bÀP›ºm|Í#™×¢¬Ò#Èþ)üFÕþ0ø¦ç_×î<û©@H¢NÇ ˆ;úœ“É5ç·q\i’¬Ðöþ5àþ=«BN™mp÷ø¶Á(K“þ!“ü'ëÝ•cÆ1QF-»Ü]ÇÚì:±†H[Ê`Á$YN;œŽ:ÕS–ÐopmÐ|¾X'å^¸=ù&³ßN‚ÚÚY š=£ƒü´ú ͱ՚Þ\0–É'œzà×-JvvGM:ŽQ¼ŽÖvÊæP÷;æ‘hdfåBŽ€}•Y’óE&^&v†0²®6rp:òk’ŽÂ%‰¦I¢“qÈ%²T}=k¤ðnwâkõ²Ó,ÖI‚4n6çrÎì{×4¹Tnö7Š”šH÷ÏÙwöÖ>øÊ U…ÿ‡o\ÈSf&EäoSÜŽàž€×ßž2Ѽgáøu}"í.ì§RUÔ`ƒÜyzWæçÃï…'ñWˆæ²°®tÝ%<Ë«–*ªõD~äãZö¿ë×®kžOˆìáʸÌHüdäkØuOc[Hc†ÚQ•(Ù!ñî9ΪtõSKm>E¨´¹ãèý:}ÏógŒêWÑ\}•m û3|¢Ažñרdt÷¯GýŸfïþÑ>#¾°Ðå†ÊÚÅ÷ÜdE\I9ëÒ¼ºüDÛÌyTYÛgÐÇéŠý&ÿ‚q_Ætûh×j™tÉ ä\‘ýk µ8]ngs_ömý‰t¯„>$ÄšÜm­YÖ¾{îHŸûà Ž8ãó_R\k7Hp·è®kcZ„#¶:W#0RrkÈ”å-[.Öy«Þ!ùŽOª2x5óÅ-Q³[´ùpù–l/ ò&VÈ“'^ÀñÐ{*¤’Lr¥$®Ö…8ãŠ{3·™Î*+ ާ§MorØÀ‡Î6ŸÏùšv‹,RĤά£Ÿ)—}e lÝ´kr‚6›ýFîzñ‘]k™®~ТY™² NxíZvQ;¸÷j£ut‹ V8ÔÅ;ÈÓ0ÄŸ0Q´žê6‚=Éõ®ò+6ãH–XÐùw—ç9ïƒ\µªr¸z>Úúì{OÁàûm9µ‹øãµ·`<˜nO"ÿw“Æ}zw¯Ò߃Þ5¹ñ—€t½böÍ,$”“)!qåò|Ä ñŽqÖ¿,~ëvÚuí’¾is r)òå„1ã½}sðÓã ðþ™,º±K™å:}ª‘uC‚C±çª®1Æßzð]•FãOjµ7:)9lxí—ü Ÿ¶ŒúM¨UK³¢³r±Hþböä„eï_¦÷wÉ2¬ŠAÜ ñ_ xþã@ñ<úGÅM=^×Ĩ¡æÒ§·,Ò£y,"|n×€@;H*ÛA<û}ŸÆMEìmRÓG»Õæ½RÖqéèdó£ƒTbŽßg»<“²_ð5è¥tKÐØÐôé|Em6ûMÝÀ(«Ÿ¼ê êp÷5϶›u%¯Ú㵗졊y»ÍÞ™éŸjÑðö³}áMZµE7¶WI|EÔu½6|ÚÏoä%³º †9¾+¹Ôþè¾>Ùî«<(Ì1¿È~ u¯•Åã&J)µéðXPRv¼Ž·áçíá jþ/ x/HûUü»•c·‡Ëˆ¹9ãž•ãþ8“TÕ|]«\ë»_Uk†Žu …ЃÙ@}+¿°ðׄþ^Ò^ >G@Ó게Ï¡ÇËÓ ëß5ÀþÐßâÑ ½›Sm¥`}þ\Ö¼?ö¤ñ¢x‹JÓdù“^Lez:~l+©ñß-üKáèp+Ю—0@åHe=Ôí?ËëóOµIo¯ã‰Ø²À»TÿU{8Zsv•HÙ£åq1„'jræ]ÌHò_жfqò€ùÎFES€#Œž™«0H%vÈù|תp^´š}~'‚ÖK§¶•f1D3§?€Î?:õïÏw«øzW½˜éëùŸe„eÁòríÖ¼¿Ã¡ÓoþШ«» ”0±Œ¯óé·w–~¿“Nsqp"9SÃGÇVpôÎOrk¢¬ÑÙI®V$³•/’ò9HIÕ̋Ǣ¾ÿÿ‚~kÖ–¶š40ÁsÌzUÏÚ&™”Ç)7¹S#`îÇ#Ž+ó¢+–ŽïÍ`yûÃÔµö7ì³=î…à­nãF˜.»y¢ÝKa4®JC%¼é+ásŒy"vÁÏ* yx˜sE#n×G鯧­Gtptæ¹}Zö5Fc ¯´ÏŽZ÷‡tNEŽane BîYd‰† ãæŒ2x#šÉ¹ø»©ë2\À»mŒe~emÙ >Sí’ÀkÇpíc~XïsÒ¼_ã[}/q7 1ï^YâïŠ>u¢iŠ’Kw«¬––`åUå)ò©aÊî$G¯jº»7oçNÞs™šC’1×é_;üZñ– þ*ѵˆêï*ÊhƤCûÃÇñnÆ=”õíµNOÞ¤£±Ô|/Ðõ_]麤"ÓMÑÙ]¯¥²€E:ʇ’%;›î1$Ž×Â|~Ñ®%d·†Õ¦žI*ýé6ùÛÕŒb6?Q_Ex~ÓMÑnõÓAc©Ä5˜¢Ý•FvÌ‹õÄÑ zD}+‰øµ ®·¤kZÈ‚âÞÝ/r¤neŠB1þüˆk¢œ# ûªÅNµJªÕ%t|}as(šˆå°ucï^šÚW†ìíb]NÔ\ݹäF*ªÞ€ŽH¿¥yõŽ‹u¦j­ÌF6ǘAùGåZƒÌH?1]ÀÜðÀ1^Ý;ny³¾ÈÏñN—ÝIudòIFYY1Ç'½;Ã:âBÍktå‡É'÷OlÔ¿Ù$f|'ÈÀM¿‘ªWÚ)ŽFh*88ì{~¢¥8ÍXºuÎßÂþ,Ôc¼a²{¸•Ь„„VaÎXØ\üHÓtOßÜãRP†HdÜÓJÉ!ÏUWÌgq%”OS_l^´+%Ó´È"Ótèr±ÚZ Ž5' êrs“ÉÍ|_ð£_Ѽgðò×ÅvSC-ô1¥ÄÐ ¶‘\u à÷öž VÿO†é0VXÃgð¯—Ââg[ž3+‹±êf!FPtåÍ+œÔ÷Î[®MU–G ½IȨîŸd¿ãJ“+!õ®ÓÉ<ö³Õî ×m ¾:}ø·I#ŸqSà:‚:1Éïè y,:¥¼Wj†(Ãr‹‚ }ÏJí~üO—áOÄ‹mzXZçHºÞú4ùLFHô`@aêF;Ò©R.©¥9ºsS]«µGpIÆBSç«Ç Í|êÊ_5¢ïùŸMO9QK™[ò<úóWœËuy«E¦é7;³¯˜È9;F'׌z×Î~"½¸ÔP§û¾–øû&x[Æš…¬WâXÔê³ê&hI<écŠ!! `ýÆ8Æ =8¯[ ‚XH4ß¼úN?1xÉ(¥î¯ÄüÀø‰¥>(¡hŠÜ"ùÑcwθ€'¨8éØÓ5á¾%Ñ,.õH·Grv‰ãûÑ·©Ä>¼û×Þ?¾<:Þ¯àÝX´:®™3­þ«:»"ûFGõÁ¯‰|o¤Oá½q­®"ÐÎU‘¿‡‚?.ü+zR½âÏ%¾Ç˜ê\šTÁ_æÁ) 0ÿjl1¸€áP‡½­lµm0CrÑC‡½¤ÇçñÒ½ÿáO¼3¡f°´Ó¤’ K…vwÏ&±ÅMÐ\ÑΚU]¤ì|£ •õ¿îšŒ¯ð¼DÔV£Í ’¯ Ü¥EÀÁúQ¤þÑ_ÙöæyBˆ”vç>€ýsQÝügÒüYucw­hº\Œò¢ÛG¦GsqÇñg“ÿP0y5ä,Í­éþ?ðGêK¤Ì¶ðú¼¤#3º!ÄEIgÇ¡Õô×ìQ¡jø×LŸRK˜t[+—Fñ’Ñùs%HÛ³ Œr ú'£éÞñ6wrivW·–›^ÚêïN‰^ìce\¯àkÐü5 éöz,W-¿º‘Cù—h×<€¨xèqÎzv¢‡¶vQ·Ì™aáIsJZŸkž ÔõëÍcLð‡…æ¹µÓ5™4È]á‚ÞX  xU ¹$ã2™´¼9û9xÁ¤¾m\é$SÚyh·¢VIDdlB$ÆÍ^ßë:WÖ:½Äò;ab"ýpÒ¹=D0¡ÔÖé­ö<óölûE”–÷ž9[mêUšÃIiø9è^Xý}+‡ö:ð͆›¦Ø·u¹á²ºk¥'F„nbNsþ}Oç_Aß’MbÜ)Îy¨Uå‚ÇšÙûÃÐ&˜"ñ†¤Æ­ŸGV›þ_–épG˜Ø8?Ãé\õÇìÙik¬™´OCqi=¬ÖÓZëvÒÄø’6BIJ꙯_–ÝZ˺¡“w"—ÖfŠQ¹òŸíáàýá•‘²Ó­.-ô‰,¦},î‚áÑÔ´™À%Ùœ³üÃŒb¾]ºnt?¼VÈúò:ûÇöÕðÐÕ> ézâ 2éWë–þìr©FüÙb¯ƒ‰,ŽùÇÍœ^þ\Ô“8ê+6Ry#6þ[6Ö…þ_VCȆk©6Âx’öÂþÖF 8äŽü¿LW~‡Ï* œ€@XÒu½C@Ÿu´¥6Ÿ™rÔëR³³"Ç¡Ùøa5ø˜Y•ââÙ?Ú\óƒ“ùý+ÐtßYÏðÅ| ®E*jZ]ÛÝèzŠA¼Km7úëir¡\oS‚2Òg¼³Cñ…¼7‹tðË¥^c‰mÎapz‚‡±î+nûÅš Ÿz—é"ðѱôõηN6¹ž·-|&ñŒþÖ#Ó£»’žSl¼ØŒ×ìýyuφz<å·3ZBI=~à¯Ã(àÖÛ÷èžT¥Œ¬p1ž£ë_¦ÿ²ŸÇý3ÄŸ-a ö½,-ÂIÁ$U‡¨#¿¨#µy8Õx©öÐꢮùQïÚÍÇ–ÎÀ‘Žk‘¾ñX²lÀ®kñÆ»(¶B·G$ϲ0î»v&¾lÖÿi}sÄÇGÓâòå„ò1GÞ%¹Èüñëƒé_(ÞxæâÛXÓõ7»’[xeѱM ü²+ÁÈ=ûfºÿ‡³dø£VðkNd´Ô#WÓå—£Ht}ÙSî[ÒºãFñ×sÔO¥uÿ‹qÈ-æSþãf¼ÓÄ¿õ ;ý)˜+X]\‹iå݃pDmî7àLŠòøNn-îÖâÞH&Š´dt óÚ´æÖ`×t‰íçGÈ…rxÇ¡àóšŸf–áÏØéõ[­KWÕ4‰î&”ê@]iÒÎra¹þYƒØÛßµ¡_Ÿi±Þ†œ˜îc˜í+*ðß™çñ¯>µñQñ>‹o|…áÔ-ÑnVI0¹uêÝxÝÏÝÒ¯ëz׈®ÓXÑnž ]B5¸’8í¤,§†E r:{ûÖ«•+H¨BSÕŸ²G˜ÁŒ’ÀãÓš»¥è×z”汀 ÿ¶@÷úÔ¶p æX‹mùIc·ŠèÒhÒÝ£äDé¾ÊþDW«mNg¡4^`ÎXOIíNec‘ŠŒ¸=óéíNŠ[Â…"½Š~˜Þv8úÿoÙj6^$4ßD¶³Çþ®åò¬ ìzô={ÚM-–†jýYöìYñ*÷Sðýçƒu$ycµÍÞš‰;Æ¥¿xªWøA!±ÛsàW»ü7øAâOˆ~<Ô(ðµÍÍ–™§¼W±I—Só¤A0òúÈeÊç} +Mu§Y]\q4ά@èxª—ÎÚdV ª]ÚC87*ü°Sòïá˜ìµõYVh-ö¡@$Åe9¹­Ec„øïðnÃâ?ƒõ)mí‘‘¨¨Ò€»º”ÅÌD6sŸ•8=ñžüö¯'š)ÛÆú}•Ü’Íc,ˆ 2¶B±8üFqùר^øGáßhmd—"×QÚ KÆÔ>›{×Îb£4¹e©íÑ”«G›cGá<úå½óÝ]_ÜÍu>ÕÍ!Ø •‰¸úWè]&8»*¨•~{xBGÓÕïå²¼\N:ñ³+^[\4»âI#eeU¤yÁS:6äà8`?¯ã[Þ"Ö­]íæ†$Žû8š$%‘†;g¥Mk{c4e|…hfmª= ízöybÞ¬æ»ìaD“^¦ÈnU¶ž"+†€ëøfƒ¢^ÏôÏàùD1P9lÇ£Új·’Ûù3XÝÇÏ–Fþùïø~UzÎÏTÒ¤Ʊk0Æ0@ÏšƒÓ?x}:{P¡ÌÖ8ÉO9«¶A…"N3þ{W¯|3¹Ôle’ÖÛQ—KFY­oŽU/Ê9\dàû㊫4zOŽ`Sl ž§Úð;ä'Õ[¡ú.kOᮨø?ÅÖÚm˲iÚùþ̸’US‡eÃ窔pÿÈ<ÍJWƒ¶¨pž§Iáôñ&“o©É4—Z¾?Ùçq2:òð¹c×+¹OýsÍC®h’kþ›ÑmsvýÛ•âaÿb$?õҬ隤^Ô5]áoàÔæ+[‹`Uбf Û¾VR£ ‘Åz€¾ëÞ2°hÿ²¯4»F˜È÷zˆXâb¢LoÉçŒkÈ­ˆ†­.TzhUÄIFœ[g™ZÈî¦;¸Œ‘Æk¹Ò%þ×ðœ¬7&kÃn¨¹y°Êå‰v³7^¾`•ë¶_´[EW½¿ººaÕc ÔÔv¿ 4‹Jqg%Ä0\BöòÆÒnVR1É#9Aõ×ÏÿnaTì›õ·ôÏz9&%«É/KëþGA¡xÀ^.Huÿø®ÿDþÓÅÁa¦ -FûÑ‹…,7ž)Œç­{‡‚?e‡VŸm‹W½ñ5»‡†î#þ8äM|ãá{O|&µ¹F’×]³îh6ˆ‹€1†^„3–nœ ó[ö¤ñ€¼r÷ßð†\ø`²©”iû­°ã«Ç…iჼ‘]ëO¯E©®ÉÙþ'ÁJ‹µTàûµuý}çß?²·†¼¶Þ³‚ÁeR­cu“êUAý röhñ^€Íg¦é0}•NWìs,qþ ÅHü«€øKÿ7b4yìu¦êlu5[;¿¢J>F>ì{å·üGáu¼*šÜZ®…¨¿gqk’=Õ²/£+á0•¤Úr„ºê×õò:þ±ŽÃÅ&”ãèšüÄÍÆTµ-Xdâ9øÇ|ʯFª„€€’zƒŒÿž+SP&+K{O9.D ±dO¸ËØŽ™¬Õ™pW€}1ùWÖÓmǘù‰¤¥bÜQÛJ6Ë ŒZØ‹K·º³òäQ,G…ÿ#Ûð¬TJáSæ÷' õ­‹XãÙ“^¤òMtGc „|I®xèyLotå;‚±ÉǡֿO¿aOGâ/ÞiË&b´O1ù£Y3•ú§ó÷¯ÊûxVÓý¥Ñc#sçÈõ¯±¿àœ^3ü{­éW‰î¬± r Ȥ•õÀ$þu”à¾(;Ÿ¦šLÊmòê>AR8"¢Ó¥MB˜+Å1W_L1üêºÝ[ªýŠ;ˆÍÊ -a¹A=H¦Cjtð÷ ¸«ƒ¹É>˜ý:ÎÄÛS¤n¶1_*ÁE~/Ž~Ûø®)Y/<.]ŒAr%‚g‰ß*Uè¾¢µ¸Ia §‚ƒŒçš¥âË^ðö«¥k¯Òïm^Þá.$UVFR“íßµM¬Ë?tŽ×:7Áßü9živßßÛÞZíÉV!€•[Ð7Ö¹;K(g€›ˆPÝw ¯ZÓ¿g Ãã>¯|g¥YøkJ¾’'°»Iä¿\†VR›„kƒÉnAÈ‚GÝ0~Îÿ üUðím׺kXËòní” ñŽg_œž‡9®Iã#ƒvqzž• ¾X˜ss%ùŸ•—Z.·a¨¬'Ø\Z¯Y6’W'ëÇJö¯jײ_Ú½»½³ŸÞ²·U%s´ýëXÿ> Ü|ñ®³§Ï{%ý°·:mä§÷ þ¥[rç¾Ü÷­…oüš|å·5ÂóÜ©ÿò1óoÞEXjs¤Ý9=QÞ|>¿›VøÏ¸"FC! ™ü±ï€zç?|hWƒq~zü ’Gøª’$edõñý+î +Z1ù¸ÅpÃF“–‡w¨Þ/–y®Y¹ÍÎioüQ)Çç\Í˃[9œ Û•ÝœŠÇ¼€*{~ZÃÔ|C'‘ù×4¤‘× leĦ3{õª2êEIÅdj^&†<î‘}¹ë^â/Œ~Ñ­Ö§}ÁLqï’qÈ\ãñÅd”¤ýÕs£‘E{Ú—.©åÅ!Î8Í~WøÒ6µñn¾«.È’þávû‰¾èÕþ _·ˆ´0ê!ðýÕÄqN È’…g#,Ø Iç·Ä_ü9/„þ(xŸG“{5¶¡0 ä³2–,¤“É$úL 9RO›vy8†¤ï’º)2£p$œ÷Íiïr„¨‡;Aç$ÛL²áœ&ÜóŽ9©,bmû¾ïlƒÍz¾§é£ÔÚ8d–gš8FÔ.ÛŒcÓÔ Ù°Ö.âÉso,Ƚ.-Ïï‡Ö–Öò{2­q3ÆNCîÚMléïlÎ$ku—æE17ËÏ;N0F}»VÑVzöÔµa©éÚ»«Ü¨»Ç"òÕBÜGþüÄ?Îk¿Ò_XGÞG©XÉ*é8h\}ÜŽ ñìkDðBxŸV¶±]9u ùä šq)pìqŒ(äžkïoÙ¯þ —x."Öþ"êÚØ8š4XRÂi{`·ºÖÓŸ*³2Q»Ð_ƒ_¤ø§¦Xê¾Ò¬o5gÚš…ÌÑÆ&†QÃ;’Nc9\õéšû À³¦áûq6¼W^Ô]~a >Dyê{ýOä+Ò<)á Àú,Nƒ¦ÛéZt olWê{’{“’k^¾n9fVuä®ß}l{o1Ä:*Œ]’í¥ýO ñ÷ì©¢káçÐn¤Ñnzˆ_2ÀOãó/æ~•ó¯~ x«ÀR¾Ó™íAÿËoÞE÷‡Oø÷ý# ‘J°‚zäÅd¸\EÜW+òÿ#¯ œâpúIó/?ó?;4Ý2IPyƒ&¶dÑ-nm½Ý´w0ÁŽD ?ZúçÅ_<7â=òÃn4«³ÿ-m Oºtü±^/ã/‚¾!ðÊÉ4PÿiÙ¯>u¨%€÷N£õõðxÜ“„|ðW]×OÔúì6o…Å®I;7ÑÿV>GñïìàÏ\]jpŸM¾™xŒJ| Þ£ºŸ®áí_:j^ ñƒ¯çÒ-|Mq6ÎPE>X¯°# q_\|HøÑ¢øÚXVt»Õ)¬'-»ž¾•ó§ªI­ßÍ{w;5ÄÌYö9Ú¾&©Œ¯±:Åm}Ï'4#Oùd÷K·¡æ÷ßòI4_úÿ“ÿDÇ\+ÿ¯?QE÷¸ÕŸSã6¼9÷ÏÐÖÍ×úÑþíW|¾6%ßü‚cÿ¯ÿ¡ ÷¿Ù þN#ÁßõÜ覢ŠÅl‰‡ÆÏÓŸ‡Ÿò×ëñÿô ë|Aþº/¡þTQY}¢‹RÿǽûßÐ׿ŸíOÿ%STÿy¿V´~1ôgÎÿÅ}þñþUú3ûɵèöõÿ£Š+Ì΂_(þ$œ࡟ñý¥Ø2ãÿB¯ø÷ôßøò4Q_=?÷SÓŸûÌJø ÿ% ¿ÞJúÚßø¨¢¦ Ï-ÌKøëž—ïóÚŠ+î%ð²­ÏÜ5Ïj½ýÚ(®w±Ð¶G|]ÿ‘7Qÿ=«æ!ÿ-ôj(¯s ð3ƒº>‚ñ'ü’O×ðþM^û]ÉÁø¯þºÁÿ¤ñQEzëâùdvùž-Þjiÿñò¿ïVæks~ßþAý­{äHøoÿ`éšÑEtÇâ‰CÖÿàšòwÖ¿öºÿÙködQEaWâ4}Š(¬„QE¥7¹¢ŠLGáÇ_ù9ØVïÿFµsÍ÷cÿq•W™Gà=Lñ>KòGÿÙrocksdb-6.11.4/docs/static/images/binaryseek.png000066400000000000000000002064341370372246700216040ustar00rootroot00000000000000‰PNG  IHDRÒ&>ÁçsRGB®Îé pHYsgŸÒR@IDATxìÝ `eýÿñç™Ý¤÷!Pî³Dñ*¨Eh›zl~*m“ (‚ *"JUNAPQT’–‚€ØM[Mz@Q¬¢hQPDK)G/Ú$»óü?³Él6iîn’=ÞéÜÏ<Ïkfg’ý>Ï3Æ0 € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € u›õI@@@@@@nŽ:ꨒÝö?∈µG[ã½³¡¾ö¬n6Í©ÅùšïœB$3ä‘@4òJV@@@@@@ò\`÷ýŽ8.‰Ü÷M£¼¤çk¾óür!û ›€7lGæÀ € € € € € € €ä ô<)d @@@@@@@`ø¤Ÿ=GF@@@@@@ÈAé9xRÈ € € € € € € €Àð H>{ŽŒ € € € € € € ƒÒsð¤%@@@@@@@á >|ö@@@@@@@ ¤çàI!K € € € € € € €Ã'@ }øì92 € € € € € € €@ Ds0Od @@@@@(X“f}bÒHÉÁÎF¶Æëœ÷ŒoÌÓÍæ§oØ‘‚Ÿ0óŒGy¥Ç˜ˆÝKiOpIó˜õì?ãuÏ(]·#iûvú;š¿BÜóB<«”)¤çÃY" € € € € € ÷3æT¿ß³ö kÍqAalªDžÑ¼ ºŽš‰¦"V½NñîÅ[-?ýÕò[_MmÒ‡ÊçÌŸf¬w¾R=^éµÅ”v¤uçŠXÍf¥ûÃW̆KïÇ·ö!É› vúÖÍLEeõgì©í«ÝÕKê~Ú>Ÿ›SÍ÷Žš—Ï©¹NÖ‡R*¾ûgc}ÝÇ"T>§ú»Æ³ öµÎÿUC|áç’û otížogŒü"€ € € € € €@¾ ØòXõíÏþ6 ¢w[kR@ü†1ÑÒÿ–ÏšûÿºÝ®mŴؼIå•Õ Ö‹¬²Ö~¤=ˆÞiOkÆ*bÿ…IvâßËæTOí´¶ÛÙÁN¿ÛwZ£ƽTuŽ~Œïò%ˆÞß|gËÜYó»ÐK½|¬lö܃:±ö:;õÔ¹Œ?+LÇ8{_¯;±"@ ½@N$Å@@@@@@ÜP‹Þ¯+Èn ìœ{Z?7ß¿X­Ä¿®é;ÔçúŸ3of” )±õå‡3æ·›œ>«j¿ù½5¶<½Ò¹Mƹß9ã¾£t/Ñò;•î¿Óë™b=³º/AÕÁN?#O=Nª¢€œì¥í¹¯«uõ×Úçssj ùΦù‹O=¸B×ÀK¡ŽµÑ¹át_Ç㢑ÿÓõ52Ø^×ÔË/<ýׯ¾îËvä»]»çû$ÿ € € € € € ³³ªŽUËà  œoÎm´n4ñx2\Ž­˜?î-##§yÎ}R­Çÿµzõê¦p]çñÔ©S£Ñˆ·JËn[ç+íë[^¸¤‹ýlyeÕçŒó®T‹õA`ÔFJ~¬ýZ»ýæ;ý.Ùå"åûBå÷òp¥sþ¥ñ…ßçsu<|gÛ|ݺu-{Ly×ÍjMþ唓µgjüUýø}vóìÜp[u«Kf8ÏB Ez¡Ÿaʇ € € € € €Ã'5'«5ºb™Š^:óýÆúÚº ¢ëïkX´iÅ’Ú5ÄëŽÜèÞ¨ –u7Œ4å3zÉúÛSëÕ =黩Jû ]уM\ã’…×[çÒ­âµìøòXUæ|*©ðŸÁN?×ïšp› …}~Ñ–ïÁ0O67/”›:96ÿ­ªÄðžôvÉ–ºô4ô"8É@@@@@†GÀ9{Lxdϸ†pº·ñÚx|swÛ”—lÏØázEIÏ_Y_÷»p¾»qC}Ý2…T“±þ#ÓéÉÁN?} &*bÕg+ˆûÝô&ê¿1^—n™ž^žcÍ÷`™¯þù-OªǯÒLÖ›Ÿžîe"j#A öÔ\7+–.Î|E@¸Š1+@ ½`O-C@@@@@áPsr˜uKžj™ÎxþuÑ^îï'í÷Â龌}—x<ÜNZ†ïÜìôÃcw7.›S}†*ܤŸ¶îðÝ—Ô^ÝÝö¹²|Gò=ØæO¹7~®ëð•ÀJª#JwZonjY‚6Þ;ØNø×7¯oYÚÛ>¬G Ð¤Ú¥< € € € € € ;ÎÞfÆYóéò9ÕŸ ç2öFŽú œ‘Ô¾ÎlÝòjKfWí½&™ð[ž7RPuô 3ÏèÐ:y°ÓÝÕXïDÿ˜gÍ"­k_ùîÂñºô;Ò»Ú'–íh¾Ûü‘x¼Ù8{Khe­›Nw;¶æÌöuî¶µkokŸg âˆG1)% € € € € € 0ôþÖ-·EFþŒ‚ßoMµ²¶æúŠXÍGŸ¼æ¿Ï<Ô°nݺ–~åÊzhßÞ=;v—’Ù•Õj¤îôc­úãÖØk×´þ·&XçyšÖØÚ·´ïoLòõg7dΛÁN¿ÃÁÚgÊgÏŸ­`ï­jŸª$à|w~c}ÝwÚ·ÈÍ©¬ä{Ì“ISõÌ…)Ekœ«yçªxíߺR6í´ñºTf¶ö  kÄ%èÖ½+(–¼ô‚?Å@@@@@†K`åÊ;^¯ˆU•©ì?*˜¾s*Öc#‘ú½&ùÆž¹BAð%/>ó·»úTWkâ}Ò½±+8¯Ü·óAOè­ÏÖqkÏèê]ÿ+‚QzœÊ„ºìvk×&ÚfS£ÁN?óXáôŒØü 5²ÿ©æSq+½÷ý<ÑûÕe}˜ÖP޳•ï¡0_µ¼öq½Ã}­®‹©QÄ¥Z¥Ÿ×•Wt쨘.™ÑÁ:]#¬Š/þ{WÛ± B k÷B?Ô@@@@@†U !¾ð‰­Mþ;ÔÊú6&õÊé¶ÁÚ‰Ö³Ÿð"^Þ“|©,VsÉ3Oß5\ÝÕXï5ŸÔÕò.K¿¿=ܰӓ;3"b#õš/i]æno\’ûAtUŒÈZ¾‡ÎÜ¥[–+P~Ú¡±Xiú³æàN›æÔl6ò=”æ«ã7ÿS]éß×fñ\I:p®ŠÔòýSëœùûª¥7ÿ)5Í?©ô"=ñ@@@@@rK`åòºG1 Ò¹²æ=é鶉„µ¿ —-·ÇLš<=œÏÆx°Óï)+WÞñz"iN6Æ=l§æÑ“"Q·¦S«éž’–u;šï¡6w¾¹) åÙ¹á´^žVïé–ëázÆ›ôb;ã”@@@@@rV ©9¹&9kÑt‡XΪxíCÆ™†Ûè}êßÖûÖKÃùvú½åoõ²ºç“-椰›{Ó÷ó̈5'Ūvêmßá\¿#ùjóW톸z>x£ÕË\›܉'ž>FÖ³ƒej±¾Íߺå¶áôäØä‚@‡›o.dˆ< € € € € € €Å*PZêí‘.»3ÿÒ´ŸžoPca÷Ùp™‚Ÿ`&œÎga<Øé÷šÅÖ–ù~™2òfjckÞ>Âx«‚`o¯;ã;ï!5OuGol:Pn]d^éø³5c>µL_´²FJ@NHωÓ@&@@@@@@ › ,ð\óÃ]/¿¦öW]{ÓIÙL»?i•Ïž{È´i§ïë>Öy ·UÀüþp:sܯ»W-¶.SïߨˆUŸÎ÷e\;cßî¶ìô»;næòñE÷«iôÿ)œH-·æ˜F.ËfëûÌãekz ùjsÏwí]·[Si=“¾~|ßÔe˃tÈgh>gž¼#€ € € € € €@q |ö³ß±Û~Þî%vÄIçïîYo½W|µÌÞÝ·‡³f`w—ÊnZÖ±æÊ+oÜéË_þôз²”\V26zBEeÕ÷ÝÖ7¿ÛØøÓõÝœ1¯¼²ê<µ>?\ï×Nw;ÓüyëFÌZ[cGj|cyeÍ þ–-5=´&öÊb5§¨•å… Ò-ß'wN7œìôÃãô4n¬¯]1£²ê“ã- ¶S…'» ·>bÌÇ5Û¹¥~OI éºæ{(Í–Ö=\«ù“®›cÚ®¡÷Hª¸ðïõuk‡Œƒ!£º3 €Ý ”Íž{±Ñ÷ê«£µU¤¡¾.]+­û½ { &¹q~ËfÎ?”DŽÖGGë·»çã ¿‘9# € € € €;* Öä#GŽÛý­ÎD‚w„¿ÍóÌ!ÎÙCô]Ð>Цöõ]ÙŠ š×yQAã“Æ?ó+çòÅÍ[öZOO1Ö+Ïã‚ý”!uUîîwÆ>jÿ¨¦Ÿ0ž·³Vd­w†6™¦ï|s½²„ó]+æTÏrž¹=H7p®ÙYû¥ùý<¨7¬6Î*}{*ª iºëx·uó¤ûf°ÒŸ1kþñ‘häÞ Ë:7Û—Ô ³ßÕ¸bNÕ—åtE¸N­ñ¨ÜŸç‡j<ù,ó®ŒTIa~XI!\¯ÏÌEKj¯ç#PÌ´H/æ³OÙ胀ç•,Õ/V‡§6uæ}Ø¥à7Á$7N±Fþ ?‚FoìÑ?É\‘ @@@@þ œÝu£&™±G«1ÏûñÞ£o{Þ®`Þ~ üz™­ÕYCðkÑú—5ñ¢È ’û/:ß¾LûšŽXï¥×ôâ³E_ºé¦O¶{ ×°O˘=]‰Y¯\§é*Cð}Ö š?Á¨/íôÐZ¸ô¬Å+í†/¤t3¡†OËÊcó?¢ w*‰½R›Y[ªôߥà}ðÓºgÛ(ŦåÜ&ìÓ] y3Øé‡YémÜP¿ðÊòÊê=tŽSï†×µqvùœêõõu_ëmßá\?|¥yËÍ?ó&޼^—Gøê»Í-N3Ž@. HÏ¥³A^@@@@@(pu±þW=6œ[Ï{¿Zi¥xoi{±. ‚åÎ=®e:ëþ¥Àù£¾Zp'|ûÌ‚ ?ùª–kQîwý|ñ3Êå ú–«ZÀyš>¾Ç\;ówy\¬ñŠ·ËXÙ_ôûòòòƒÍ¨Ý>/”ÏHo·ŒÕÛO:÷˜ø¾ÿj“»å¾†E›¶ß ã’ÁN¿ãѺŸS«õÏ)˜¾›ÊWl%ϯj~½–¯û½†Í@ò=TækÖüdKEeu£4OKI©GCC]PI…$ÖAÊ[Œ©§Î8¶$z’ª'«8ûë'xÊîªe5FÑ×õÐxM…{ÙoìÖ4ßß¿õY-c@ /**ªwóKÍÛúšYß%·FŒÛÐìî^v[ðÀÛ¡÷Äè)ëN‘n‘Þ¯=¶¯y)ÔírÕäñ؄‘n‘¡{S³ÿø= ‹þÎÚ¸l:X› ¦¹­¨¬ù·2žz¥€^'P¼ß}° Bºä›@Þ¶H¯ˆU««s‘.P EåÈ|æ¶M[³«¦vÕIÑ;T¼¶v’2Â(ö'=4îx³¥ùö_-¿5¨¹Æ€@î Œ4ñúÑmµ|4Œ0%F¿@l±Î<¨Ù?&îVý‚ôÔJþ)HÒäø#¼¨ýuX¸##gkúGá›k×®-¸^ Âò1F` yHW+ô uå ¢G:VïJyÊ:»ÖxþU“íeßz ë«§®Qœ™¢šmSÂ.€ÓûZ3VÝÈ\ìFŒº[Ë~›^Î* Àú±ªˆò@yeÕ—,¼¾@‹I±@@@@@`®¼þÆÉž+] .ÄOSè\m¯äsè;úÛÛÌϾò•šàU” dU qéŸ+Áà‡¡Ó¦6Þ9«×-´î”0ÉÅýØM( | ¤{åsj®ÕC÷ü̳¢‡ñ½)ä»~Òܶry]o5‚ly¬z½wäSž3ŸTZRi9÷üªe —™.Ó„•Õ·¨¶di0¯wƒÜž ïQ>ÎmjItûKÁˆ¨ã›’ý"žÛO5<÷Óµ^© #†eÒCQŸ{ﺲʚ­+–ÔÒíwâp_OúÉ5;RtÝ_TW¡ØrñyYìç„ò#€ € €C) è{ëÅo_Õ1çéûȽç¼Yßß×*H÷/_PóÄPæ…c!€@ߢcFÍO7@uâ‹ÿÞ·=Ù âÈ‹@zyeÍ ªóéÌÓ✿¸9ÙtñÝËn{1syÓ®1^÷´ÖéØŠù—í\ê¥úpçiþNý讨W9sš~ñKõ€à¬ÿÇœ°°þ«k~qós½äå_éõ±Ø×Êü‰õ<÷5U 88\n»~ú¬ªÕªHòŸpc(.Õ|eýÍ(®RSÚAÈÅçå ”D@@@2®¸âû;{£F\¢eêMÖŽÐXÝ·»›~Ó×/¹ð¾wÌÄb\ˆÅ‚¸Ç¹a–œ±ß §#€@»@ÎÒõNçu ¢ûªÉv~c|á€?Ô÷5, Þ½òí£Ž:ê{ãö9|L;S´ L:5Úá5Ökj_›GSñxr…1·Ÿ8óô{FDG>¨f{¹W7ï##QûEM~&JCV@rL `ž—9æJv@@@\¸êÚg"ÞOõ=ãÞÊ«âçæNßù—^üù³Ïõ¼“?Š]`†³ž= pÐg÷…§Ìw» åG +œ¤—Ïž{ˆÞ{~“"~éAAôO4.©ûizÁL¬[·®Å¬[÷Æ$Á®,PZºOPƒ²}ð]~ÒÛJ°fùOþW›3.òÛÖîÝS+ÞÓ^@¦@è¿@¡=/û/À € € €@Ñ Ø«®«ý’JýMÑ£êîõ÷É„ûÌW¾PópÑIP`òP`ZlÞ¤ˆñ¾›Îºõ¿óÈ’xszž H xé©\œð¢×(ˆ>6Ìš‚è7d+ˆ¦Éîì„Òt½8¯éA9WÄݯ.ÝJ—Ù™CS- Ó ˜@@ …ø¼ìŸ[#€ € €Å#°àÚïrÕõu+­µWê'¢fèWmÛðüñы砤ù#P>§úsÓcsÏÌqY쌽¢&ºL±·]ƒåú ¿”Ø´í¦Ìm˜Fvœm‘^6sþz—…YUýO¹ †óÅ0ž6í´ñ%£Gíß°´®«š|ÞŒÙó÷÷¬90áì+æÍmO®^}ûÆÞ\#ÍîŽËŒ-Ý»ÄDöò¿9¹¹é‘¾ä±»ô‚å½äߨ»þ’=ö?|Ï‹îŸ4æ5oÛË666¾ÙSšÝ­ó›ÜH3²}­zGØÖ>—ÇSÎ>¨‡äQA Ô2}Ę·L9ʵÿªûÑ;MÙËXwëy»ç¿nœ}éoÓóÄ·Ö[p}ì¶ß;÷Œ»‡³f'ãû/$·5=½£×åPÙ æq†ò³Õ×r¼75®yì.›J7¯¿?ßÚ×ýºÝ`ܳš—bÙo(î½\Ûƒñ,µÓN=sr´ÄÛÛ÷£#}/ñ\tÛúgúó,š‹çOÜÏ7foç™—Ýæç]½zuÖ*s[1ÜN%þ^¾óöÐó xÖS[^}êÙµk×&ríìór ìƒ € €°@ª+wÏ»SÈî¥èÛzç»3.ºð¬Õ\dІ@Þ ­ÎõY½6jK"å±ê§ xLñ€}vÑ÷Aã‚‚©K÷„Þ¥\É÷ãy{šÉøäl Ý–xw,¿ýæ`Ù:Ϙ©SçŽ?©äªpyÒ$ï]_ÔÎ÷u<#6¿"b" ·ßøJËEk×.î6([VYsµu¦B·±·êæv¿ö;6Ü÷„™gì<:2â+Æsg©ÛœÔûÝK‚•ãF›ŠXõßT{èGëíÆ[;#Í0O™ã3>þ;zÔÖxstS~ŸÖ¥z=ðô=½§<ê†ýå[A[÷݆xÝ=™ûö4ÝSþƒýÊæTOU‹ñ3•ölE‡S 2àFîæ**kž2¾¹·ÉúÝ_øZOÇ ÖÆ£¼’÷yÖžÔq[;£,V³ÇeçÜ›[n^¹òŽ×;.͵9ÿ‰¶Ó’ʘØ]† ‡^ÅœªÙÎØ™º.¦éMLçÁ¶vŒ1ÙLØ2¹²æý¾üŧÿúÓÔ«²± P7f—)s«j¿ˆg¯4Ö}ÔèH¦ÎçJ] é<Ý ­¸äçTAæU™Tкó†½-½>«÷¬ `>9V½QAÔË‚#÷ŠÉfü–GŒùFúˆ½MX/èBéø¶Í|ßùïïm—|[?”÷ƒî®íl?KOšõ‰=J#£®Ö¥xšÎG× kKu¥~ÜEJ*ËçT×¹‚DyeM¥ž÷×h›}»º¤õ¡ÐïköP¥y…™XáÏž{úŠ¥‹ÿÝÛù*ÛeJ•µÞg”îá­ië“ÔÕŽŠ¬kñþ*AMÔ•|Lû«6½’øvWÿŠåyÙË@@@b¸êÚ›¦é¯Æ[Töˆ¾¹fÛÆ.^°`Á€z5+F?ÊŒÀpèËÅÇ=ëVè»oÅNÔ=spæŸjŒþéñ›“¹˜iØ^ ë/|·ßnH—”úîx07ë\Ý@Z_ i¦ù`m_ü7êÛoÑ»¬1®ŽßüJ÷[´¯ÉVšÓ¦M¡Öðõ:a7éøÛÑ,l?jû”âegŽ0Þ£êwJûÒ¾O)HúÙRQ ³sËñ®ÓÐC]‰ÛŸ+˜~JW[(J±·‚‡tDïj‡.–ù¥›{í^¿‹Ý†x‘;6}@çÞ¸{Ùm/¦ç³;á•WV_k½È*…m뜴*È(†ì¶t^Ìëh#‘zõpƒfÓ÷‚®¶ífYpì况{c:ˆÞ͆Û-^Ö›«Æ l·n̘3ï}Š…é—ö º~™™Ó]ÅŽ8ÔïšíÏVOR¥…Ó#ï1}6?ÖÏÏÞþºP‚À_¿‡Áºgmz剫U÷éÁ0Cºö¾2cfuk%Žpa7ãòXÍ\­ ž}©AŸï­¬¿ùá|Œ‡ó~æËÖs/L0¨SùW}®Oײ^ïUÚ.ªWZü`FeÕü   wÝ«ôìü™Öí¦ÛãØšc¬ýcЛBOÛ=µŒÛõÀ‡u¼éw‡Ã{Úv»uÖŒÕçñ²q»D¯ÞnÏó²«Ò³ @@(.«¾}ÓûLÄÖë» wõ—.¨ù"Aôâº(m~ ¬\¶è×ñºòW·%wN8s¤ºpŸå'ÝŒ–ææⵇ7DÏÏK®‡\@ß…æàày'dæÊùö×™óÅ6];c/ÏØåú"|TkÙÝd¹^.ñ“ö¿‘HrŠ3‘£ÙJ3芶ÄN\©c¶€RñQs«õܯ“-fÝÊèÆGOrã'”ÿpç"SܼH¦‘©|ª•i´¤äÇšþp_òl£²GÕzï <|:½ssÖÝn|÷p2yÆxÉñшwˆº?NÛÕh»t€C»o+vWç÷ÍêA²!â›Ej;^Á° Ê£ÞãmÞCÅz@Ó›Ãù®Æ¿ŒÇ7uµwËœ³«l2ñŸ„ïoÐ=b?gíÁÖ7kÝ4í£;6 æ=+x¿ôôØþs£&ú]OêÑÄêsµÊiв¼Ûë·õ}EîZ]u©BÉà ³íå¯ ¬„9¸×0Þ25²õÜ ÓÔýä0=Ž~©³–ºg¤Î›1õÆù¿Õ³ù9?ª7¤ûöm.¢ç³3ŸÓém{†ëäì5峿ýÍØè"íD˜¦nÑýávmû°5îY“4»éÞø6=·tl¯4¦ûäN‘Ñc.Õ~ŸKïÛiBŸ“J݃;TäÐgüeë[Ýwü»ZœyÂxþó6}‹~På%Û/T~Òâ”ÆgfĪo]¯[—™|1»¦ÞEÜúì°`NP˹3VÔ×ÝÌ÷6hû£µMð„¦þäÿÜñ…A»ó\7·•ſߢ.ñ¦#íÓ¶Á”èØ½Î×ôU™;¬\^÷¨æ«Âem-Î?qfãÒÅÿJÏçÙDÙ칩cº[{Eõ^ßÔ’¸z0ŠQ¦wE+“D÷ºLñ¯ëx~æ1ßÔüÃÁχc±ïvêtQóÔ ó=K-6¿Ü¸¤îŠpYOcuc|™ŽÝ!ˆ® åÖ¦ MŸ^³æ'[À¿ª´þ¦×ÖÅø:æ™É{Âå;2V°ì]ºÖõ0©ÏfD7.9seý¢ °žSÃ`}¶º*dEeÕ™ZÞ9ˆ~Ù¶åã?]ßiŸ'5oÛ² €ÿ˜zE†=¸Óv½Îö=+x@EeõeÊÈ7‚ÌèÜ«®¼ÏîÜ•wfFxÿ¶6Ü9X¦`¬K&]ÕªÖÏEæfy;=œ÷ƒL´l=÷Â4uŸ¯óÔØh|ÿ‹:ÇA×üší0üSsKUYⶨ‰ÔëzH¶õYÛIö¿hwýŸžI$ü3W-[øÛ¶ùpô„&îÓO]YeõgT©î{šn­fͧ§Ï¬¹qÕòÚàw€íÝt±öì`….«”ÇoéuË5Ûá¬ù › UzµNí«ìÆÄô£Ü©oç‚ùà™›.[¡?/ƒ¢3 € € €Å.påõ7N6λ[˾Eßg-]÷Ç5Ÿ*vÊ P|­_ÆæV¹ÕKï´nÔ*ôÁ •_8_lc}kýN}‘=3UnÑêkÏêDO“N}yßx¶Ò Þ?­@‡Vá‰ÍoÞ9ˆžÎ`ÛDãÒÚGZœ?G¿€¥ÏkÐJ\«û|=û:ßœ«.HÞÛM=}ØñE÷«õÜÒ ‚ k[ƒvšqVï_μhSÆ\>MÚà½Ä ¢ß¥ó•ji¨ *Û|—œ;ïæ®ˆU¬×1¤k§*€óŠ¢8'7Æk/ZçNGµèßЯ‹iûOë§½uøFù¬¹ÿ¯ÃÆ]ÌÇÖ¹ýbz•3›óÏhXRwfAôôfá„‚XÿQWëçmÛ°m¯lØTÌ®~‡‰DîQž&ÇÜ­ñOmÌÁ zh0Ÿ­3«sλ1<¦Æº.Ü×UÑâä.‚蛥&ýõµ·7¸ o<;¯ìi~¨îYÿ÷Ä•º†Ó44½*x§tWy›1{þ‡Uy,è<5èZ¿±‹`j¸:ïÆÃy?ÈÄÊÖs/3ÍÔ´3+ w¨‚è?Ö¼Óõ W®üShRAíp ]© ºóÝm-›Þ<¢·ó¾bIÝtÍׇûk\âEÝ)ó&››Ýouÿ}IôJ}¶ŽQ}©6èñü±Ñ½1?µ_˜šµïV%£}ÃÙ.Ç…ó¼ì²x,D@@ŠMàòËkwóLÉý麇þÚýÕ O¿yZ<O›åE@ ÏË¡¢Rk¨·(è §î|ÿNãX_³n+÷/7®ÿw{Ðz0²”¦*#ºöÝzz2áNï.Èß9»wÅ=¦ˆCÐÊ-5^\&‡ó=ŽUm‹éõµÁû³» \d¦±"^·Dˆ„Ëè=(œîn¬c”f®s¦¹9s>—§ƒwç–ÍœDYeMºFÿ{ê=åm]º ìuß7'®Œ/j„2Xg½«ÂQ¾ÏPp¼_­»µýȹ2¿àž‰~-=ßõ„böêÌc«¬ŸmŒ/üI×›w¿´/A÷î÷n]£žU—Ì¿T~v –¤‚¾¾JC|á]½í;lë‡è³)±×f܇Úó/Qe‡*wA¾.ÁoN½÷}²{VP©ÉOøó”µ–TöÔ£H4j2+¤O:w¤çy? ‹ kþém›¾ÎÀx8ïø2®·¬=KUYâ/ªÌU¶zYÝóÖÍLðÞ)UTù}æjUòú¼*ïôùÙ™t‰ ·ƒô ruÛ+Ã= ‹þûZ“pCýÂxz‡>L¬Ç7«ÛúÇñ»êíˆ0I]gkÈPàØ6½|µÛYú¥}Ï =]oe:g‡]g‡égŽgÄæ—k›w¤—9÷7µÂ¼5=?„'Çæ¿UM~¥ÈmëýÌ™­¾ïW¬\ºè—C˜jÐ?[­= ´¿ëYÁÈç7®O^?ÌögŸ¡¾g­X¾è!uñ~…®ÜKƒ|êZ(/¯¬©l\R»$Ì÷¸]¢_Õ5› P*ˆ®Û¨©ÎF%Ž0ý¾íÛgÌš|ß¶mßÊ‹8? ·/Ù~j8ïÛçFT/Ù|–ÚT®ºWy;.ÌŸõý§Â龌ƒûŸ®£õºÏ¦zÑ“­ÇJamïÀêKÒ¶Ñ+ZþäEÒugmÐSЯ:l”1“»ÏËŒL2‰ € € Ð'+¯¯«Ö†'닊W·µ4W,Xð™Í}Ú‘@(@œk‘^bL‡@ºZ¤} ]]Aÿ 1žÝÀ玦1žœé¡E­Þ~žëãÄÖD"x'kû`Û‚ÜíK²<å:“‘ƒ{:@çv‰„7 IO‡Ð:ÅÅ÷RPîm]ý(\wp‡ zÛT{ôQ—LLS„/fûZêPˆˆŸ1ï'MË…óýš Þ®–ó* Dm¤²»D<ë¿ä§½88vZ,§÷Ý‘ õ¬p`©õîÕùÙ-HGÁ»7}ç—çA}ŠÝÏ–=7ó@êEà«k×.Þ–¹l0¦‡ãžõ“½\t>Ô^÷½ ·ˆ`>èÞ^¯´øBzµ7©âGdÒAü¯D¢‘{ûûcl¤÷ 2Ãx?è qGŸ{]¥ÙïeήÃ>;ªÃ|fÔ£J: k{ï]¥In·I“Ÿx:s¡ßHÏ\Ôa:WŸ—2É  € € €@¯W]õã}UyûÛ© ΂/~¦ÛFH½&Æ €€@ÎÒmR½3g jE¬ØzÎlP3è˳*°£iÆbA3µ†yRCÊûÒêíWËo}U'ûõ05Ù Z¼ Ú séàCpˆçÆôx0g:¾#ݼ–#-Ò{Ìu—+SA÷hɱê•Ï©þÆ´i§ïrÃXxÔQG•èÓ;5LBÁû•ݵ·ém¼¢¾îµ°1½ó>”žÎ˜˜:uj4óØŠ^¯[Q¿°ÛÖ“»fuRÝè Š †¶¶¢OѓɲáÈKV ÖKbýølYõ ñ‘ŒäžlŒ×Þš1?8“ÃtÏZ·n]K"éÏÓu T®ðÆŒþµ^ÔÞ¤qêù¦õϾ¶-ÙT6Îóa8ï]Òíès¯ËDû¿Ðw^‡ç¾œž§ý2žeδ÷>ÒÏTzÚ<õ|NµàoÝÊ3n÷ž¶×}º`ž—=–“• € € €@¡ ”FnÔߪãõ]ÅÒ‹.8ëÎB/.åC@ 7œëÚ½9âÖ—fæÚs;gÎáô#wǾ–årïPšåÉ1ï2Q3!#Owª¦¾òŠÝ´i“Ý6y²Ýwóf»uën¶iһ˶m¶©i¢mž°Í¶4³ã[šlËØf›h£Fl.¡Àc*)uí¾%#ͬOªUð–ˆ^µÝ¡Ã¥¸zõêœh‘®€Eƒ‚Ôÿì¶Ö¾E®»+h·»¶Û]-N÷Ѷ­•f¬}«¸¿;ê4u±ýÑÆe‹ÿÜm:ý\±×~ï<ºCkxßtÙ{?“õ}|Dûìì§ŠG¿7u<¾53ñ;M9&óØ*w6Žyˆ^§Ëcgì+ë Eqà­¼:]ÏÉ+—õÜýu¯ çÁ}ýlUÌ®>\1äTKý”‘Þ1­ñ ÷0œ÷¬ÕKþU•W®ÔõùÕ Ìú\Î+ÕDõ9|o0 êyá¬TFjÝ;7ÿÎûA7";ôÜë&Í~/ö¼¤î ýzmw Ý[6ë:Úny6¤*Bd|6õ|î퀹ù¼Ì& i!€ € €.pÅu7}PýÍÐ÷oýmož]àÅ¥x €ôI çé-‰ÄúÒ’ö/™­³ÅHïÓ‰Ò"‘C2§àá•fâ•f׉fü®mkÆM4%©vrQǵ#ƒýüoFŽV3ÌÑÆtøÚ]Ë[Ÿ™î°O{Zص9ö<¥2áÿLï¿£¯y™6«zïˆgçª ày:W“ƒýRãhÉ}esjæ­¨¯½½¯iõ´ïÙ)­Ñú¶­¬ûWOÛ÷yïÓ z?l¯_棻¸ AËȧ;ì5fΫuzÇ×d®„鲨{Y;"¢ïŸJÞ™Íɤ›±jÙÍ¿„Ãåm’êúð׈± Ia†ùžõ”ÝpÙ7ñTÅ>U‘ uŸž.·o­\Z{wz~ˆ'ô „/&Mreë%"=Þ‡õ~ÐßÂéöÓ¦MaGïqxÄÚ£Œç©'ÜŠ–ïTÀÉ.A­>Óäèó²ÏùgC@@@±Þ•)ç_sñÅç¾  €µ+αA݉¾^QY´PLÅ[œ5ûåX‹>;Îx“úþízß¹t‡¼qϹóÒ¡~r£5zÏîríêeuÏkÅeú¹¼bNÕE ˜\Ѷa‰õÌ'ÍúĽw/»­½ûô.Sé}¡õœªQ´_вe%Hªwh?Öžjª¥÷$å¦C ]ÇêtMQ€V Ží™Ò_irJJIAtg’ÓV-[ôûÔ<ÿ¤¬çëi¥«›°!©ð0Ü÷¬GâñæýcÕóœ *B?:•Á2ýè·°Ö±Ö[œc5NÕ–ém5m›ÿÔå‡k¡óGèMÆ­GÏxWìpe' Çu õ ¯¬¨¬Ö»lí¥Az:‹ãGDG}W“•;š~ç`åÖDË“;šf°¿µþ¿3ƒ¯:#;uN·ó±Ö>Õy›A›·nO]Ï£ÓŸë^× ž´ãåqÂÏ“õ[†$ž ÷¬•ñºu±šu:}Ç„§PϹ?ürIa>Û:Ÿë¡¼„¾ŒÛŽ­˜?nç‘Þ糟W]–±ík2¦œ{^êêÁð„îiOzÖ\¥µ%[t?YxÏËîËÊ@@@ ôÅ7ÚŠõÍk¿ð½–Œ@\ ¤+[î× Ÿ¥éšñFúŽÕx•~rC ™™ ç'Nm\º8;Ýxg&<ÜÓÎ*žÎDÞ¶HO— m¢aIÝ‚òXõLUdxG°H•æLÅÆ®Ç7wÞ¶?óêN}[†—zñ/UëcójÒèjÛ¤3#íçA¯7›:og}פîßÓ‹#.{HU yÈY¹îY×´Ðî£^Ž×œ«zÿÝñ…¯ I&òä :OН¶Ÿ'ñT©cH†a¿g•ǪN×ç#Do+u¬lvõôKë îù6œ÷ƒ!¹¢òè ÓgÏw4âýB÷¨=3ïѺ÷ÿGóFãûk|/ùçUñÅ/eK?.Óö} ¤æøÀ£ @IDATó2Óƒi@@@ P®¼¾öCú›ñú;ñÅm_¨-ÔrR.@"ÐÖÜv »Þ>jÅ|ofꊻLÏœgz˜œéð.sçEæ Òá3º¼5¦i2,É*Èõ»ðÀ ¨ÛÑþØCÃùŽÕñ@‡w'yž{ë@ÓÊÜOïw+Õ¤û6ù¿Ìõ©i¯ã5©îë;ì³ÝöY^иdáµÆù×¥“µæí#Œ·êÄO“^Æ„ÃÚ+œó–a¾g}¤b¾˜^ÐóCëàÚ_a={SÐëJ¸ªPÆÃz?(Ä,”cƬùÇG¼ÈoRAô¶ôôÅȽªAubc¼nÿñÚÏ®Xº°±s½ÿ‡.Üçeÿ-Ø@@È/ÏØóZsìn\°`AÁ4&ʯ³@n@rU 'évÛ–µú’·½¡µgN=uîÄ\E,º|Y×!©Š…H÷\FkY[P¿D&ëðþîˆõßÑëØY¯Ãu¡ÖÆY ¤+Ô!ä›-/wΫïw¼&;ïÓyûÁ˜oˆ/¼PíûoO§­ÖÇ#&Œ\vh,ÖþâôÊâœð¬ß!®nÉa¾géÕªþ[ReuæI—<^móS•sd°×(;¡àÞ=6œ÷ƒ!¹¦òà êc§HÔ»MרèÖkÏ%ïJôâu÷dµü¼Ìª‰!€ € €9&ðÍoýxŠ*\—é;­&Û¶çXöÈ €Ã.“ôÆÆŸ®W«ôú´Ž5cÇ•–|2=?LzxNz 5‡Ÿ°Í<¦º†-Ì@ºÉhagÕux º”#ŠcÍ›æ0“hn^×q·ì´HW¥štërgÜ?V¯¾}cÇ㣮ßÿÒa™ËÒ±;$ÚëŒ{áÉ¿ÎS€tM¸¥X'N¶~¢yîBhIš‡C›ÔØÚ!i‘>œ÷¬²9Õó¬±ÓÃr+’yÞÊú›ÿ ëä²p™ÖÏ+Ÿ3Z8_ãἂ_6Ê <çth‰nÌ%õ éK‘Â}^fã\ € € «%%Þ9ê¬ÒÓ÷w\|ñ¹z›ÌÕ<“/@†R gƒ;Ic¯Ê„°Î]4}VÕ~™Ë{zëØf?ójQ¸{æ|±N¯\^÷¨j)þ7]~ç>¼c;=_(Î/Ø麖ßÙá4ùÞCæ0³úç·<©Ýž wÕ1N*//om .ìç¸lö܃:TÔðm—­(Õü ýÂÿB{òö½e±3öjŸš©uëÖµl2oÌVð?ØW´Rï¤ÿþÐä ·²z里3Ò¹¢{ÇpݳNžµéÖæª²jeýÂ;ƒò¿øôƒWËâïi ©-¤.Þ‡ó~6-ö k?¨u³Mææp>ëã~^fÝŠ@@@8묗èõŸ²“´>ß]åÈy! €¹%³ôUñÚ¿)0¶:ÍeíÄHÄ»ÍÄb[Ò¦7ÈþÄÚx|³¾{~-²3û¦§‹}·w…ªµ8iœ™ð•p¾pÆí­¶õKeKá”Km=wtX}Κ6½úïÇÂùû.}]¤ZBŽÜUÝ|°‘’o©Uw4‚Mü"=½ÝDû±ƒ®Œ­+½|»M†`Apßh1Éérýwx8}Fή¨¬úf8_Äc_={ü2,pïo'œÎêxîY#J£uÆš ©r9³Ù¸æ³Ã2•.|ãWk>UaK×láuñ>¬÷ƒPºhÇžq=>XûpCCÝv¯ÅÈžNá>/³gDJ € € €@n L>$r²±våê_9ÿ“æVîÈ  €¹!³ô€Çÿ\uñžîÆY†ãÊÍ„e0]™x¼ýTÙSÙj_Ч)ÔöîÓ†y´Q2é_´pkϲ=Æìù“Ûç `ʺôûœuã  D©"”ÇjæêÓõty¬ydíÚµ‰ôüL4[ÿ;Ú½½'k¿xÒ¬Oì1$ÕÅÔªýÔp_]n÷7ÆoþM8ßyì'Üw:\“Öœ1#V}Tçí†b~uüæW|?y²òóRûñ¼KÊ+«ÏmŸ/Î)¿Å_ ûɰôêqàÂfž±s8?X㡾g•UÖÔè™ubXÍ/iŒßúl8ŒWÄ>à|óÝpY¡uñ>œ÷ƒÐ´XÇ'žxú(õ“®x¨^}·2X>/‹õú¡Ü € € P,¤¾s·K‰)' €ýÈé@zÐ]³õS-öÒåR ƪr;aù{c1}I¼cCE¬ú#åsª‚Ý äµÒ­Ù£,V]ÙíÆVL:5Z1§æ&ëÙuZ•÷³+—/ü‡‚>aA0áyÞMÕÅ»3­÷줰¬ù<˺¦oÌ,ƒu~Öºû½+¾(hÙ¾4L_×ȘÒè¨ÅôºÞ¾Óbó&E¢Þ÷2÷°Æ}#s¾ótÛ5¹*\®{…UéGAZᲡ¯\ºè)½¢bZfe uåýŠÊêT—YC™—\:VpžÔ*=}Íéz?¦¤ôºà~ÙŸ|NÍ=\÷’¾î3”÷¬à5$z¹Ø·Ã¼©BÅ šßÎgŽ›6nûªæŸI/+ .Þ‡ó~ö,Ò‰5k~²EŸt¯japÄÔ©sGö•£¬²ú3zAŸ·×½­àž—}µb;@@@ ,¸~¢þn,×w~Ò$îÈÇ2g@†B §é@Cý¸qþu™ ΕO²ÿ® öœÌå}ž>{þ»Õ2ôgúbyñìÌžöSl]æzý‚qi_‚ÅÁ1ÆM:ð>㙚Ìý iZ=‚V>¯‡eRÜò„qfâƒå³æ½+\ÖÛ8x‡°Îŵ¹Øš]ííÓûv©˜UuloåÉÕõ3fV¦÷tß®ÀòŸÔËBºŠ~Y~dã+Oþ(›ùnI¸ t]¬Ó”Ýì)f¦zæ”pYOãsªß5Ñ¿j¿#ÂíЩSÅšŒnãÃ5ÇÍ--稵óé¥Ö¾»ÄDRÙ?”^ÖË„ïo*ÀuÔQ}Òv—dðŠ åg¦~šƒm‚à¾ÞT|sÙìêéÝíS ˛۾¦sº¥½¬ö Ý/ï9qæé»¶/ëz*¸—Í©¹D×Hðú~£!ºgÙHÄ.ÔÉ×V‚ýAšî½s©‚€§þdýd¸\Ϙ‚êâ}8ï¡i±Žu¯ùSFÙKÆí½2c¾ËÉ Ò“žÉµž±ßîWéôºŒôt…ô¼ì¢x,B@@ N`ĸ1³ôMÕìÞK.8û…‚+ B@ Kýj˜¥cö;Ð>¯/v_U‹§Ë2¾ØâYW€ìdîp¾k Z€v“¸­˜]}¸ï¹ã=ã}LÄcÂí¬³G†Ó]7ý/qÓø]K‚wøî¬WÿmzøÃesªç¯¨¯[, ‡ À3n—ÉÑ ¨ÏT>cZ®w”º¤º.þ·¾Ž>$Ü®PÆA7Ååsªç©uéò𼨜™HäþŠ9U L³]ÔÕ;Y?‹MeÆ©5©÷)}Ñÿú¦>‰x“KwçoXÈÔùôÕ¥» Ëf"Þ·NŠU•ß_øZç -½=gÞßX_tm>ȃ=®Cq->Cy= çý ¯á²‘yg–ècrz˜”®«Ï陽©icÓÕAŽpy8N=Ǭù®¶;V•¬ôh7›Ã !š~§¶ îç~¸}æ8wŸ—™¹d@@@ ÐweÁ´þø‹‡Ë#€ €ÀöyH²Ý¸¤î ÈžT0}‘Qé–Qzè­/}VØí; Ò=«Çÿ úâ÷em·I»í¢î wU î-ÛI±±í´>ìþ2ßÐÅJÜÊgÏ¿PM ëÃõ:æÊïÕ5ü&EYÿeùŸ‚oû(õà8a+DeÅlö}óžç¦*@RpôÀCã_̘SõqÙ.–IP‹QEµ¥ú¹ÂŒ4—ÉèaýBö\p>tžö—ÓAòKwµ­}RƒÎÑÑš¸­u.7þ]¹¼îQå©rÓÚó5ïáìCe±šŸ{ÆLeÜE­ðTónú`ðfµæ»ñ‘x<Õúx°J!¿³u<ýt=´šf\ëZ1—ÚI1’µ--‰ywý¼ã;›»N±ÿKƒÀ >¯ÿ§ëâV<Õ^y¯é›ÇO*¹¡"Vó] Ïêºx]¹›¤ ST®÷ëHjaÜž[ùþÆsË̵kïØ.ðÞ]®VÄëê” ’‹ZÓKmé)h{¡Z1_8n’I¨Î?u¼êœí©¿Ñ!Ó-¡ÓZ^4è`‡éÁÑ—Ô.©ˆUíªJ6­Ý{&»B|>ذ´îáT‹ìŸvÃeånüþ ŒÏMÝÚ½#Æ®·ëAë+býI×HP#:¢Kxo] ÿOçl§p[ÍÿKÓj]¿žcƒyÏJõ¬áÙoeäñß›Ö'¾Î÷4ÞÒÒ|Á˜èˆ“Ó×bkï‡v÷lê)­\[7œ÷ƒ\³Êü4Ö×®Ðóêûž5ç„ÇU%ž¯Ž˜8â,UÆZ­ß]wžÝ¢Ð¸î¿æ}z¦éYÖ68wŽžÝúÊ~½mÉÝ7ïÑ>«Z|ó³ ‚D¸i0ÎÕçef™F@@hX°`A©þÞûp0ç5›U¸ € €@÷­­E»_ŸSk ûYsrëÆw?R%Ñ9sú"x_ZÞ«0Ü©š>]?ÓôÅðQ™Á—Ì}L\æüÄa½*—.Zª€Î A ­Ìý•ö8ëÿ)ð1CÇxG0®×–¿w¾yÏŠ¥uÿËÈÊú…wúÎ}De2,ÛXqTûNýbV.ŸËé}§ƒèémÙê|û¿ô|.Møæ›a·Ü©l)З J¤¢öÒT¹RAt­U‚ýÍ„Ãs)ûó¢Køßwgª•ô‡‚ÖÛ×gs>h'üÄñú,´¿§78€5cÛ>3g+¨s±Ú8ÖÈñCZ£ zë|¾ƒÏÜSæW®¼#ýú€p}oã†%u·%É“TÞ§:o«ûBTÇ{‡–T™ù€ò’¢‡Ûꃾ1` da¬ž5¾¯ÖÉW¤“²v¢Ú§ß•‹¯5Hçq0'âñdc|áüÀ$8ß™‡Ò}u—¶kä,«*§“4Nуû°î­×«gƒ 7‘UZ¤{–õ¼È"ÝïÆ„eQNÏêÜûA¸®óøWËo}5éüóÂåºN ª‹÷ἄ¦Å8Nn~þB•û×™e×5º[ª‹ç]¡éïê>|®ž_­Atç6¥žõ oÔø¶ B`¸oê>íÙkKŒÛ/\Öa\`ÏËec@@( ö ¾Ó÷Øîá/}©¦CEé*&EA@ +yHJt ÜP_w¶5þÛ‚`Š‚œõKBÛ+ s©Þ»~‚‰³—.Z5ö:¨Eü¹¾ïŸ¨€Z½w?(÷‚ÊmŒ×¾¿qií#ÝoYXkVÖ×ýîI÷ÆÛ;_Á£ÿô¥t¥¼.Ú’hÚgE}íe}Ùg¨·IµöUY™»fò¥²üÙKª'ÿá|}.Þ΃2ó ÎÉ-ºÞ?›lñ×5Œ^Ipkkv?£«–Þü§ÿ>õà¡ÊPÐ"ò™Þލk¢Iy¯7~ËáÁgnGZ÷¯\¶è×O™ ºO¤®É§{;v°^fO(€ÞkÛ’{¯XR[Û—}ú³ZCE­?ƒ–ò­ƒºD"k¦Çæî.*²± Lü„¤ìïÒ HöT~›“<^-m/èk€º»ô²}ÏÒëG>«à÷Ãã©< ;¿þ#\×Ý8ðëF²2\¯ gÐÅû´p>ßÇÃy?Èw»æõêÕM KjOPï8ŸÐý5èá¡ë!øì9·¼¹%ñ޶g„ ^—£ÏÜ)ZþjæNÍ·>s>œÎÓçe˜}Æ € € €@Ѩ·±éAaõ}]ú;ˆ¢)<E@ Ÿjø—ÿCТS]¯Ÿ`}·jÓí¦–U»ª`cU²×H|M´Xg›ÝŸ»zgwN<ñô1%ãJÞ«~¢Sš‡«Kê=uLÕܳOø-®!èÞ´szêr~®ºH}›M&hJ¸Öüâæç:oShóe3çáEìIêú~‹Ýõžø‘zoõ|›zúÓ²xª1²ù!£V©ùPöiÓ¦ˆŒÙëãz÷aÎxoS™öÓyYA†gtÝýKÓËÔâø‰|(ËpæQãÃ==Y~´]AëÝ—õùxÑóÝŸ6Ú kÖÆãéÙÌkùìšC­u'ûÖîï·»ÎÙ8Ý‚×<©ñ¾>ëâµ阚egž¾kiÉÈSô™:LAéàÞ:Îøöߘ§M2Ù¸bù¢àü ÊPh÷¬AAÊr¢Ãy?ÈrQò"¹à÷—ãK> ž0Õï,o×k-‚û *úª`زdEüÖ.íÁóϳçLõÆr°~¿ÿÚ6ÿë÷5, ^ŸÓåÀó²K"€ € €9#pÕuµ©×±ƒõ¥ìqŸ_}_ÎdŒŒ € ƒŠe1 € € € € € €@! \y]ݪ(ýoõ>öÚ_î_³k³97rE.@È}鹎È! € € € € €ˆX{l°£sæ¾%ÀN €©ô"=ñ@@@@@ Â÷£;ËûÑ‹àtSD@ì HÏž%)!€ € € € € 3 ,ðô~ô÷jJl¥EzΜ2‚ Òóá,‘G@@@@@ú)0rÜî‡ëýèÔ¯ûÓ ¾ôÙÿösw6G@ ¨¤õé§ð € € € € €…*à¬×ú~tÃûÑ õS.@Á >x¶¤Œ € € € € €À° x¼}Øì90 €@þ HÏÿsH @@@@@ØNÀ“j‘žô[x?úv:,@@ gÛójÖ"€ € € € € €@¾ \~yínÑÑö%Ó7^t~õDå_“  € ÐWZ¤÷UŠí@@@@@ÈÈhÿÝ­YujL=OÎÙDÈé¹s.È  € € € € €Ùp‘£R ùö/ÙIT@(.éÅu¾)- € € € € €@Q¸T‹tgô¢8ß@ ÛÒ³-Jz € € € € € ¿@k×îÖ®þ¬@òO€@zþ3rŒ € € € € €@· ®¾aOkíÚà/_Pýd·²@º Þ- +@@@@@È?‘%£ZßîLÐÝå_ È1 €Ã/@ }øÏ9@@@@@@ ‹~[·î†÷£gQ•¤@ŠK€@zqoJ‹ € € € € PàÖØT Ý÷S-Ò ¼´@Á >8®¤Š € € € € €À°8kR]»'|G‹ôa9@ l!‚2 € € € € € €€1W_]»·)µÏç^ûÒ5;c‚ € EúÀÜØ @@@@@œp%­­ÑÕ*}]ÎeŽ !€ GÒóèd‘U@@@@@zpï Öë=éö¼k@@ 'é=é°@@@@@<P=HwÎ'žGç¬"€äžôÜ;'ä@@@@@ ض@ºñ¤L½@H X@@@@@@ ÿ\óÃ]GEK^vÆl¼èüê‰*‘&@@` ´Hˆû € € € € € c#"%­ïGwî¯ÊAô;?d@ ¿¤ç×ù"· € € € € €t)`k}?º5A @v@€@úà±+ € € € € €¹#Ðú~tß8Þž;'…œ €ä©ô<=qd@@@@@L°Eºç ôL¦@€ô ±  € € € € €¹$°`Áõµç¶®»ÿžåRÞÈ  €ù(@ =ÏyF@@@@@ C dÜè#ƒYgÌCñx<™±ŠI@€ô ±  € € € € €¹$àYïÝA~¬±tëžK'†¼ €ä­ô¼=ud@@@@@VkÌÑm`‚ €;.@ }Ç I@@@@@á°môG }xÏGG(h”ƒb € € € € € P”—_^»›ºtßWïG߸më %…F@ Ë´HÏ2(É!€ €ÀÿgïNà$)ëÃqWuÏìÁ± r xD.¯ñ6/p]9vˆ'Â1ƒ‰õ—¬Ñ$`¢Fñ » (¢áЖE¼0Æ[Ä‹¨HD‚\ ì5Ó]ÿoõl÷TÏNÏÎÑs?ýa¦ß®zÏç­îYú[o @€ @`2Ê Óþ˺gٷ׬YS̶µE€f«€@úlYã"@€ @€ @€¹"àþèse¦“&M@ }Ò¨5D€ @€ @€Æ'pîû.8#jHÕR ¤g‰û£rñ’ŒY@ }Ìt  @€ @€ @€ÉH³ôÈsß{Á¹M­¦Ù“óו¤òͦí^ @€cH3‚ @€ @€ @`’²ä›i©ôæsßÁëó–ßõž™&龑¼ýç¼öדÜÍ @€Y+Ð1kGf` @€ @€ @`– T{{¿]*Ï‹«¸§ÿöÏï½àö¤”,Ø9Äâjô4V­?=KÓgnßüëó׬Y³m–1˜pÁ÷Q™ð5@€ @€ @€Œ]àÜ÷­ýIš¦GeY²-M²o%iú'I5y[5é½.MÊ/NJé‹“,Yœ$Ù3ßzΫn{KJ @€¹+`EúÜ{#'@€ @€ @€™)¯>?*MóÕèDG–fo)¥ÿTKgÉŽJVyáÛßôjAôăŒAÀ=ÒÇ€¦ @€ @€˜24+^ƽÖX¡+Ðw>²leÑ¿X陽€@úèÍ” @€ @€ @€S&PíÛ5ÞèL5{Ç[ß´úã× @€À˜ÒÇĦ @€ @€˜;~¹ý»q1÷Þ]Z¯fëÞò¦Õÿ¸Ëv @€Q ¤šL @€ @€ 0uçŸÿ†íqSôï5õ Ë®ÛúÀ¯_Û´Í  @`Ìéc¦S @€ @€L™@ã>éY–Ü´uóŽ®5kÖôMYo4L€f™€@ú,›PÃ!@€ @€ @€9!P ¤gIöËJZ9iÍš×=8'Fm @`’Ò' Z3 @€ @€ @ m½YH¿/ɲßþW¯¾£mõªˆ¨ ¤; @€ @€ @€À غõ×?©$ÉÉo=çU7ϰ®ë. @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€˜dt’ÛÓ @€ @€ @`\˺V½*K“§%Yöóžîõÿ0®Ê&@€Ct ±Í& @€ @ ¾èm#¦ªÚ*àØl+§Ê @`rÎO“t^–%mW³ÇŸúŠç—æ=£oËí›â±½]õª‡f¦€@úÌœ7½&@€ @€™%à‹Þ™5_s©·m?6çž±ÎL'ÌÌyÓk-°lÙªƒ²ŽôišÌïÜëУ½?žè6ÕO€Ó[@ }zÏÞ @€ @€]|Ñ» ‰ €HF£%ïÄ tu•ON¿¦”$O©fé÷®é^û¾‰mPí­²yÉÓò z¾?K’cž¹lÅÞ_Ùpá­òÛN€³_ þ>{ @€ @€3I`¨/zgRÿõ•ÌDü²ßKO_yÊ’%KjÁÖvŒaé¶móKIú¡$IÏŒ/ëÕŽ:Õ16úú¾÷[¿·¿túYAô±9*E€Ù$ >›fÓX @€ @`Nø¢wNL³A @€À4ȯ3¿cÁ/Óréê¸ì÷W¦Q×t¥M7|êâû²mw>*Ë*ÒÓ½ö…mªV5 0ƒÒgðäé: @€ÌM_ôÎÍy7j˜:Wƒ™:ûÉl¹§§gKO÷…ÿmÆÕÝ= @`® ¸Gú\?ŒŸ @€f¤@þEot<ÿ¢×ƒ˜`üj0‹:;îMÒt¿¸ »Ë~O°·ê  @€Àt°"}:Ì‚> @€ @€ @€À´p5˜i;5:F€&LÀŠô £U1 @€ @€ÌWƒ™3¹ä”W^îè|fZJŸ‘dÙaIšü8©fß¿'Ý|é׺»·7Š¥ËW½(‹ri’Ýßsùº.¯} 0ûÒgÿ! @€LC“Nzé¾¥=ö|QÜó‰Iší_ò~?˪ßzðîêM7Üpñ¶Ýuy$_ôžÜµjyš¥O/ƒ³áŠuï­×yb×YPÎ:^œ¦éïÇ @DnÎÛÿYrß'nîîÞQÏ×êù¸ãŽëØsÿ#ž^.%Ï˲äq‘ï ¸Ôí}I5¹-+%·ün[åÒ¯l¸ðVåóíCõíé]] ÷O¯Žzžý9(úþó¤Tý¯ÍwU.[¸_ùñ¥lÏ´’f®Zÿ•áê.î;ì}@éóm}½•;6}ê£ÿ[Ü?Âtzòé+ONÓä™avT˜-оý¢š$·ÅôßyðÞ[¯¿á†úFR×ñ/:û‘ó;˧§¥$ú”eî‰yÿaVMnºæÊõãuT;òG»ë®åÚq“”çy*•ôÿ®½jý/†Ë?xß²®U¯y}Zl¿mÃåëÖäûOî:óÐRÒyj®qæßSݘV“om(ÝsÒÝ]Éóìî1Úci¸÷×q]]{íQYtL¹”Çô1iš˜UÓ$¥ì;I¥ï¦ž+/þñîú3Ôþg.[±÷âŽôèrGrlRÍëN®CiÌÿ÷þï5“| ÷˜où¡˜l#@€À¸âïÔÃËiÇú4IOlT¼ãñ‚¤”&d‹ÿúÄÓÏ~åµW^ôÆþA‰È~q”Ø'þ&]1h—— 0Òçà¤2 @€L@„Þkÿ#Þßç¾-‚ióâ'µ_·+'‹,÷.=cõÿë¹|í{bGÄl‡~Œä‹Þ¸ŸÛ¥I)™AÚæµœÐµòaóÒÒÑÚé;›¬7|¾ÿ°ø‚ùѧ¯8ëÚ+/üvþzðã¸SÎÚgѼŽ7ÆöÕ;ƒÀÑçB®h0ù°¥5K—¯|GÏë×ö6%÷méi«"Àš^å>÷?ju—Vì½òøøR|¯$-½.µFÞ§÷\µîëM¶x±÷å®4-},ßÝ1¯ôÒxU =ïWLˇb ÇÖ›¨u+~õw3Mö>ðˆ;—uñ± ÝëÞ\Ï3Äsiiת¿bï¬Mt!Cô¯+ÚH–u­þj¥/[µñêu?*ìn•lw}­Ú©m_ÚµòÑÏ‹ãE)˲ï—{zÞ°†Þùo1ÇqÞcz¼å‡ØLØ6Q'YÌ„±ë#™ °lùªÓ⤬ÿˆ?%ûG{ãoÕ-ñ×åŽ8áïñúˆøCÓŸÃ-—;®?þÔW9šÏñ™0~}$@€‰HŸWµ @€ @€]" ÷˜˜}-¾È­­ŽÕÌ•,M~áò=âËÞÃvèŒ/Ϡ볪[¶œ¹qã'~·KEcØõ=7‚‹ºvcÕyú“ˆzß_0?:‚|qÙÓ´}xbG©ôÙ%]gµ©û¢»‹Í,;}ÕÆjóÏE¹Û³äþHßcÈƒŠ‡¦YrTÔ¹8BÅʯµ'w­¾÷šîµW7ò·H,=íìc"xù™èÛ¢¼oñEøÃdKôëñѯý¢_Ÿ¯öUï*•ʯ˫ˆKµ®ˆ§Ò#÷Yy™øBý·}þúª<=ÒG~iØè×µ‘ß¼L}¿¿~•E—Ò#cÜGEŸ÷ÍÇûžÒªÞeËV”,Hó€ðókyc¬~?ê98ÉÒ£ÃíÀøyF©#½)VÔ=ùÚî‹0Yõµj§¾ý¤å+_c¼(^Gœ9ûî–¾Ïÿ|Ï'ï­ïõsš<>YpÐ÷bn¿üÑõÞÏûǶ<àq,$é%Ë–¯þÓÍ÷ô¾a¸Uäyþúc$ÇR=oýùy§ž¹ßžó>.¯µ;²$»3~åóý@¼o÷É‘ù¤‡ÃÒùóW„XÝsźO×ëê9¯wÎùyPçôF½Y²=Žë˜ÛôÎ8fßùÞ‹jbÜiúî¾¾Eª®vCã=¦Ç[~¨±Í„mu’ÅLûtêãPWŸ¨ –¥—â~j|@ïÕmñ ý?I¥÷¢k®¼ø§ƒû_{_–;ó÷ú³âoÉ¡ñ·áŽxï}¿7«|êºî 28ÿà×#¹Ìà2ùë,[qÈüÎ8!+®tQJÒGÅg̯ã3à¶J5ùlÒ{ço‡*cÛ(Jé©‘û•i–=¥Ù9ÉÖ-—ôô|òžz ùÕ[Ìïøh¼~Nþ÷e~çüŒtœèA€†HÞÇ^ @€ ÐFôOóa<º#†þÁdÛÝß©¯>àõ嬼¤”¦±r5‚ÑizryÏ=>yŸ]+1Ž_Q×’øÒþÌÚ=_Üÿeè/)è#Èþ˜Øwq¢é>YùÝÑÜ«‹Mî¨ômžWî¬Ñ?W©TÎûÍÏ¿÷¥o¼±·ž¯võý;ÞÁ‰·Ç¶R5>«8?{Cw÷ƒõKK¯N²êgí«½|bW×¼H|)~›oˆ Í$÷¿é†ËLŽ=öØÎC;úïcl›¦Éüެsmd}Füìr™÷v×—÷i¸ÇÒÓWÄÕ J—`9:Óö${þç¯þظ‚>ùæm†ÅeÕx7=v®ÿ؉§®þzGG¶!¾èÀkú§'u­X¶±û M™Gû"V;Ç—Æß¨T+/Ýxå…?\¼§{ÝmqÙö-êìŒ/˜k+Ê#8‘¼&~_0_÷©‹ž_Ö:©V?ÔsÅ…×®#½3úw‘ï"8;Æðˆ=³½Ÿ»Z÷¿”œyD ñ¢žË×­To5îí~UaÛ‘~Jþ…y¶àÀXå›\RØ·K2ËæŸÑü8o»ë\ñu~ìÅ… >Û:"ê}ceË–|¦ WHˆ¹¾3NxõP+º?sÕÇïˆöþ#®€ðµ8aâꘃüj ËOZ~ö36^qÑW‹ýÛ%=ºc©VÞcz¼åk¼ž˜“,fÀÀ§}ãJñwfcüí8qg_ó«Z|?Þs}q¥¸HíĬRœÎu^ÛïȪ•¯§IgäOŽÌÿ"ÄI<¿‰€ø-q5ùõüùß¡(ÿ¾¸Ãïzº×^<^ƒÚ R9zM\%¿•D|lÔÕøüûI´›_ÅåˆèÉ!ñY’ÿzZÿn¿Ç*súùî¾õ…C}~×ëÌ?Ç㸹8ÀßÇÁÂ8Ùî‘=Ýûe}¿g 0”@ýøPûl#@€ @€Ú(_ôn*ˆ^lâÚ«×Þ’Uúò@víQJÊç%±ºµþz,ÏùÌ |ÖPAôz}7|êâû"}qíõÎ/˜ëûêÏè^Ö*ˆ^Ï“?W²¾÷×_Ç ûÃë類kÁ‹,ûöíÿ{Ó«‡Ú_Ü+¹/‹Èù¶¸_÷ÙÅ}C¥#fð]ùÆ¢ÿôöŸ}ge‹ zS¹¯l¸°vl7¶sÎÇ{L·|q\3!=ÄIÏÎ?Ÿ÷=ÎÅgãâx=->l¶Ö£i~’…ï]cµéu|¶ï÷Ã{cÒW}Öæ»zmè^÷Çñ~Úwýt¿¬š½+þîõŸ–_Ý¢Ôñõ(sdäÿF–õ'¯²áòuÏÎó÷d÷=,ީÕNˆ‰âÿxüñ¯Øs¼]=ô°c®Ž•òo‹zJÑ“_V²êÉ÷n«ìÇÊ"P}Xûˆø›¶(«&oˆ~å'yŒG K~µ»ÏïZõÕì§õf²¬óÈzÚ3h%àt­dl'@€ @€S$ÐsÕÅqoæ¬voñøòÿqË’ÅÏWWFøs–UA¢ñ|Áü»ä¡[ú[ŠUwÃ?²¤ò׃.w=düñqïÛOä;#Fr\~Iú!3ÆÆ|s-péj’å—JÕ£ZIooèÈžÞH&1?yS[óKÖ';vô¾ywEóû¹Fäçy¾,I—üí®o—ú7œtúŠçÇJΫâl…yáüÍ¢¿`ç‰-JLÌæW¬ûr 6åµGÐìiùœî®¥‘Ky= ç—ÞXŸŸ(÷¦ØÔ¸ÃpílÎ6Ÿ.¿Éó¤¥Ò9ƒóÎ+-#ºï¿ßÆß«—E üä W­ÿJýÖyçó`jœ¨ówñ¡‘ŸOÉþñ³8>Kþ!?¡¬§û¢/Åæ÷zww%‚êk"ø~Yž?J2Ñüç÷§Çö;®(rF4|Réì3[ú¶³±{ýÆÁ'ËäÓz®X{~eËCO[KJ^ ýu½Lü]Ù«žöL€Z ¤·’± @€L­À‡ëÍgYú¸zzbŸÛóó׺»·F”bsÞ×`°›>qg`c7ÙúwW³þ xmÕg’žÕªP9éèß+Ø{ïÛ±3@Ò*÷®Ûk+OãRüùž4+ýÃI§¯>a×\Ão ƒ|U{Dl²å—Æ>wco¾4{dcËÎD»ë\þúä嫎+—ÊúWw'ߨšÜüTÑ}Ë*ç×Ó±ªtwÁ¦QKáù¤¼îÊ}4Ç`üŠI½pg¿óô®®…õ>ÖžÓ¬vI÷¨øWµìM;G÷¢s>Þcz¼åG7ò©Í=Q'YLí¨fQëYÒÓÓ½¾vRUËQeÕÆgG\JýÂXþ÷íXÎÒBþR6æ•Êù-Ò$ë¿*K¬4϶ÞyÚç¯þؽ-û;J¥¶·ß¾ö Ä ½í«MM 0Ü#}.̲1 @€ @€ÀŒض£òãóûÿ·=îázÔd ÿ‚9ߣ~äƒ}敞÷÷Îûùð4KâÒÖIsp±E­± ð—-v ¹yc÷ºcµßw¢Ÿ¤Î^™Þ?µKòÖ ì lž‘¿Ž@ä'¯¿þ’‡êûFõœUÿ"V†_Aû½ÊåäºXÿù¨ï#;îß¾i$uF0ú µö²´#úü‘¬ÆUƒã¿RÌhT<Î_G+q?÷¨¿?Xœ¥ù¥ö›í®¯©òx±´kųâ̇k"¹0‚Ë_ë{pë ?·©»vBÄ༓ö:-ý¸ÞV =¶žêy´ÇRxÖVý‡üÿ UßpÛb®~´siŸê¢üD—› ùkssYÏSØ5ºdÛç|œÇt2Þò£þ”厹{RþY8–“,–._ua¼òKz×N²ÈO,š²Ìᆷ÷õÞ² sç]QJÙüÝQlíë»eÏzþ,s ýaóÓ¿ÊWµ÷·—­Émv×7û  @€©HŸ:{- @€ @€–×ú¢_-=cÕ¶¤-Ø]±e%¸ã¤SWþ~©œ¾$‚§ÄŠëÇÇó@ ~ 5!=ˆ ×ÚhâÃÑä£ã2äÏ{¿®ØÐþéâSâõâÚ¶Jߺâ¾Ñ¤7\±¾{Yת}#èýÞŒíí=/Ú}Þ‚Å ¶ÆöÕ$ýÄ5É}’¸4ðàz;å¬}¢L~BADÌãÞ¼IüäDµÿúŸÒ;_çyóµGö“z*nw}źóttíb]䵑Ú3¢ÿv{õ„¯lºôÁù&ûuOrÿ/—e‹wDçÅ< HMßžwê™ûÕç§ZÍÁú‘Ö‘õõý$íè¬e/§¥ü„‰Z =¯7Ž“‡ç;⬈ƭj“™¼@IDATGùk"æ|<ÇtÞýñ–%Á”eŸÀ“,¦lLs­á믾äîeg¬ÎWwÆ}Èw{UÖ|ÕøÒ®ÕÛã³p~¼‡k·ä“Yš>5/Ÿ£wÇç×ú1Õ¡ 0mÒ§ÍTè @€hˆxqrüD =Yдg _,9mÕ#:;Ò÷E`yvÆ}Ó|Õò7#Ðys5MnN²ÊKIé3üÜ{"ºZy`Ë'Ò½þk»ö,¥¥ÑFS =:õʼÝü^í~óãèĆîuœpÚË{:Ë ßVJ²—Ęòl¬¶O—GdfùÒdñ³å«^Íën(6³°\~Lãu–lˆû´ÿw)KªÕ|zÜÜ7ÿKÓ«±:½šÆõê³R©ZŠíõýi5ùV£|$Ú]_±î<VNç峺ߢùY~ÌMy =?I!ëZõÛèWœÞ/~Úò˜Wê|T½¢¸’Bíþõ×#yÎÊÕÛs«Ú#Mª'‹õƺÞz=ùóDÍùXéz߯[¾^Ït}ž¨“,¦ëxgq¿â¯¤/>¯ûÏxÁ@ã=ÞçÇ'tþ÷wl,®tQûpˆ+] q’ÕØ*UŠ˜*ô©’×. @€Fฮ®½"PÜ ‹õ0Y'mW¾ ½\N¯Îc襸6 {ðî¾OßpÃÅÛŠYÖµºérëÅ}ãMoÚtéæeËW_k WÄÏ©ùÊÝú}¼Oî:óп o#ú·v¼måå?sÕÇ×{ì±çò˜cNˆšó“N`I>G¿±ñÏ-]¾âÄž+.ÌmjŽ[þ/Y¸×ÎWÕ[¯é^ÿ/;_Œé©ÝõíÒ‰,ùvœñ÷I¹|yøÕ‘v\—ì?î+.œÒ`úÒ¥K÷ˆ¾ö¿ÒdÔ+ÇwçÎ ½¥Í?ëLö©½ŠSk•¯ÕöR¥ó÷’ߪÅù«l­Tnë,÷/~cäðVåG²}"ç|,Çt±Ïã-_¬kº¥‹'C´ó$‹é6Nýi¿À’%/[k'éŒ÷Šíï  @€±ôÿË~,%•!@€ @€&L`dŸ#•§Ù”Ò#ˆÜYê(uGðøàž÷ÅÚéÕ=—¯;éšîu— ¢7ú=‰¾¬ï‚¼úd/XÔÙñ’zSi:ÿ呎ÅãÉÖwô]ZßÞŽço¼±·çе×ô\±î¬Í½½Œ[³¿;Ö’çw;/Ç Î/;¡kåÃêíôô|òžèCmErdøƒúö±>·»¾¡úÑså…WÆõˆÿ"ß *ÙoAùÓK–,Ùí½…‡ª«]Û²ŽŽŒ€tm}g¼Ø®z?×Ý}ŒõWµúJÃß{}¨6³ršß½ö(%½?¨§ó:âøMíu–ŽëRô“1ç£9¦ëc,>·|±®é’ÎO²¨÷eÌ'Y쬠x’E½NϳX`'Ï”²ÿ›Å#54 0gÒçÌT( @€Ì$X ùäz+•©¤òè£pfð0ËÞÓsÅú1ß{¼>®ñ<_{åE߈€å÷ó:²4={ ®¬~Y÷+ê«Ôöµ/•×½áòõÿ/ÿËZ­iºÏ¼¬ôŒ¦òËÜÇ#‚ý”tu•›öåE»ë¢qÙîÇeßµs×s:÷:ô“méûmdSÖ™[Ï—Æ-êé¶<ïôŒ9ŒûÃÇÉ£y¤é1yö¸þóæžîý²X4âþ?ªíK“Ç?½«+n0ŽÇ$Ìy½w#:¦ë™‡xoù!ªœ’Mu’Å” F£“*ÐÙ›Þ^o0N Š«£x @€3]`tÿ“0ÓG«ÿ @€ @`ä«¿ãÚoÍ»ÁâßV·n¿qª»–’gÖû÷ô¾¶žžÊ縸|íÒí±\ùÉùeçOîZù”Z?>ïS¥R”@ßC[>š¯Ð¯9N~¨½Î²oïÜ~à²dñ«jéñüjw}-ú+îÿ.©&ý—ÅOÓS—eûÔVÿ·È>a›;RVê$ÙC[*}m}dÕ¤V_~)æ¥]« 'c ?¤ã_tö#ã²Íµüq ~mpAíþöq\.Ú/ÛçMƒ÷êõ$Íy±OÃÓÅŒ-Òã-ߢÚÉÝüèWŪÖÃòJâËøwä÷W…í(œî¼OuÔU)EX}˜G~"@ìÿ ìaÚÈwmî­|<¿„{ž.w$gÇåÕk«Ñ#ñ“k¯Zÿ_ùöÉxD05¦)_ŸÜQl/ÛöÐ{bÅòïvn{×I'½tßâþѦÛ]ßpíoHï{mœÄñéZž¸ýÉg¬>o¸ü±o¯\Aî#óºãäøüÕ»·íìH³÷ÆüÜS«?Ißuüñ¯Øs$õÏŸ×ññÆ\X;¢’¼yp™­½ÛóW–xË §½üàÁyFúz2ç¼Ø§VÇt1Ïpéñ–®îÉØ7Q'YLFßµ1ÕéOòdIö„©¼šÇT+hŸÌaÿÇw¶ Ò8 @€ @€À´HÓóU¶Ãõå¤3V®ˆ¨ì¿ÖòdÙMq©íÿ.ÿdí‹Ë§÷¯®Ž#8ظ'ùàö?õrØ1_ˆ@ã^ù¾6ÖžçkÇëÚ¥ÛÓ¬»ÖN’¾"žkýŠÁëÛPiÙ«^¾ó¤€–Õ•÷Zpv~ôZ†ÞJÓêäÚ=®«ÕwÔö¥é~¥=÷üÜɧŸU ·ª0¿'ùI§­xÎPûÛ]ßPm4¶uwWîIîIÌßçÛâ ¤7ŸÜµêoûÇ“Áû`i×ê³"ûþ¼™Hýè×·}·–O³ƒË~¦{ýo«Iõ-µíirðüÅ ®[rÚªG ÎW ˺V¾7‚ûù±‡xöþ W®«Ý^ ž'ÎþiVý»Ú¶xÌ+/üÂɧ®È/ßòq\W×^ËN[Ù¸êC=c›ç|¼ÇôxËׇ5íŸ'ê$‹i?p¿@üÝÎ+‰«£tRº¨ÿä®ñת @`ІýŸ÷)ê“f  @€ @€À¬ˆÜ’½8âGËÎ8üÝÕ¬÷s×tì×ù@—.]ºGuþAÇ–J±ª:é¿ßw¬”ÝÄ×Äîˆ Oý£/ëûbgR¾7‚ÆûÅjù×.;cå½ÛîÛqîõ×_òP­wqð““}–•Òì|=0V3ßùˆh“óä7ÞØ;!£èË.H:Ò3kmõ7л£oûGÇÛÖÒÓV=%B!—ÄIÿxÈáG¿7©¤ŸÿY龟ÞÜݽ#¯;?a`Aç¼×Gˆùmùë﮹úÂäéâ#î%ÿ‘¸løË£O$ŽIKßY¶|廲jú_ü¶ï»7Üpñ¶Z]é¼#«¥ÒsÒ4{]äËWôï_¬§žnw}õz‡zþZw÷ÖXE¿¬´Ç_Žþ?±”¦ï‰=îÝxùú ‡Ê?Òm»{”“ô¬8nV䡨üŠ•Jöš‰:~b,Å ìJÿ4úõ¬Îròݘ¯·dÕ¾ÿºæÊ‹o1eKºÎ> \-?9N—ø‘ïiýãÌnÙvÿöw¶ó†dó‡—f‹_nO‰z—t”¿qòòUÿïë/<”ÞÓ ÝÝ.]ú’ýÓÎ=›t$/Ì’Ò벎ä—Qß“×Ù®9ï1=ÞòƒÇ5_ç'Yıþ–rRZ‡aý$‹—lºjݯ†êwþwèaO:7ò{’ÅPem›]qe’÷Ä­YVÄßÊyñY¶&NŽºtÓ¦MÛg×(†Ìô¹3×FJ€ @€Ó@ kGD0îâR:?YÖµúÁ¬Ý+[+×W5ç}Ì’oT«•—n¼òŸMƒ.׺°©û¢»OêZùÊR–ôÄ"ÂYzG¬à=gYתŸÄxvDðà¨Ø¸oÑ“T²ã’rv|lglÛÿÐÃþßCsÌ¿ô\±öüvgÃU뿎ÿ¬'äu‡ç†ë¯¾ä®q·Ó‘<7¯#Fú¨øýüBõ‡g‹+‡u­º-NvØ3NrxxmoÞf–ý"ÙöПEr¨“ªÜÝ÷ÜEtžeÞt{%ié¼üâø{ÐÙ~[£‘½ó¶bSGîȤö‡é²ìˆ™½{óÝ?}Öt ¢×û»±{ýÆj¥ú¼ˆV×V^Gß÷ˆ¾Á§F 0¢©¯’<¹çªu_ß~ÿŽ÷ÆXoè/›>2-eûÕëiûsVýx½Îj%Y[Oç¹çòuçÆX—EŸkÔóT Ç ÕxN# º-î¥ü¯;’ì˜VA’¼l¾ê|C÷Ú7&Y5Vg? —ˆÏFˆ%µÈ;ƒèùëˆÈÇj÷ìÒ¾JßÓZÑkÙÚ]_­Òa~å«p+½É ÑíßÖŽÕRé?cuõqÃÙͮ얨kà’èƒÞaôPV­ž÷¿ÉýOÛxõº‰ ¢×ú™{ÆIçì<¶ÿ§>?y »Dß9š˜µŸfÕì”86VlÚtéæÝ ²6ï=W¬ûË|Þã=ó?‘¿~¢E©D¯Gœ¯öU"ˆÞèãx¡ñÓã-¿;¯i¸¿v’E¼Çß_;&ú¯Æ±®Tî¼%N„ø]Ü~àiÇ]¥rº1N®©]© æò¢Þ¶>¹Õs>sS»ìþp-ï¼RÄ^‘ÿ“‘ÿ¥Cå>ü.>Çö‰ù¾"þÞ´ ´¾`ÙŠCÌ/_G˳šêɲ{ãónÏø›µ ßÇN_\•æýQçßÔòeÉj'Í4ò‚˜*]þá7UÑ. @€ @`´'t­|ؼ¤t{+æGôüŸ¯¹|mížå£­Gþ‰mlâ{¤£˜¨“,F׋¹›{´Ÿ!SHÏgé¸ãŽëØ{ÿÃßç^¼º|4uqi’ï%}½«“Þ{oN>¼ÿV郔¼$@€S+ >µþZ'@€ @€Æ!÷¢ý‹¸çøùy;²Êã®ë¾0îÙî1F›ŽcÐ'ŒA =ñÔÕGvtfO‰['~Ý‘U*ßè¹ê¢ïŒ¡.E @€Iè˜Ä¶4E€ @€h«@\ý5ù*¸äò×ÑÛJ«2h@víÕko‰ªò @` ”fP_u• @€ ÐXº|ÅñqÉÜ'æ²jú¡Æ  @€ 0N+ÒÇ ¨8 @€ 0%¥¤Tú»¼å,Ë~ñà=·^6%½Ð( @€ÌJôY9­E€ @€fÀ –­8ä³.¼½8¢eg¬z{’¤Ï¬mK“÷Ýpà }ÅýÒ @`<ÏxÉG¾· yX^ǃ›³gý`ãŸÿn<õ)K€Ì<ô™7gzL€ @€æ”ÀÂù¥k–u­ž_M’/ÄÍÐïLÓ,èé kYöåî¾õßçˆÁ 0ႨN<ýÈ²ÇÆíCÈ;Ú±G¯ïѧÿŒ5õÐ{¸‰Ã £€ŒN1 @€ @`â–œòÊÓ4=:o©”$OHÒ @€f¼€@úŒŸB @€ @€³W`Ó¦K7ÇèòK·»|û ŸæžË×=k†A÷  @€˜CqU4 @€ @€ @€ @ . ^—ðL€ @€ @€ @€B@ Ýa@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ^À$@€ @€ @€ @€éŽ @€ @€ @€ PH/`H @€ @€ @€ @€tÇ @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ^À$@€ @€ @€ @€éŽ @€ @€ @€ PH/`H @€ @€ @€ @€tÇ @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ÐQHK @€ @€ @€9%ðôøè$ËŽ(:K“ùéÎ Êó–=ýÏþ}sc5¹ëkÝþ¥Æk  @`V ¤ÏÊi5( @€ @€‰@–$‹K¥Òå­ò¦i².I w-%¯‹¼é­Àl'@€³D ~RÝ,Ža @€ @€ @`äC­FNJéº4IåµdY²*ɪV#œtFæ|úK>|CÌù³w×ùºÿêîûvqë¦7lß]^û'GÀ{xrœµB€¹(`Eú\œuc&@€ @€ @ &`5²!ˆ&ÖÄÊó/îN#V¦ý³ úî”&w¿÷ðäzksI p=š¹4lc%@€ @€ @€@’|ý?_{C–d#ºLw¾ù®ûv¬ç6ûFr˜ÿé9ï#™»zÏÍa]Â3ŒD@ }$Jò @€ @€ 0kòÕÈ#œÕÈ#Qš¹yvw˜ÿé;·»›»zÏÍa]Â3ŒD@ }$Jò @€ @€ 0kF²¢ÕJÖY;ý w˜ÿÓ´L 7wõ›Ãº„g©€@úH¥ä#@€ @€ @`Ö ìnE«•¬³vê›Öê80ÿMLÓòE«¹«wÖÖ%< @€ÀHÒG*% @€ @€³V`¸­V²ÎÚiße`Cæ¦i¹a¨¹«wÔÖ%< @€ÀhÒG£%/ @€ @€³V ÕŠV+Ygí”9°ÁÇù’iZnÖÊêŽ!7ÚI ãŒ,Ûµßì?/+Œ³Uóö‹ãI³ DÓöBûÅöjý/ô§Z(ßt,ÆXìˈÛ®|‹qûÙÔ—ÂXòö“‚s>™µmµíé,”êÛ‹FýÙ û å›…Bž‚ñ.å ãŒl…6ÊGª±½Ö§B›Iáø/Ž%+lZ›Ê7õ³Å8‹m.ßÒ¹0Îèb£Íb_úMö%…c)aùVuGƒm- cì÷h?-8ÇEu5õ±VÁ@;AÛÈ×tÌ·Çå‹ÇS4Ó(ßÜ—¡·÷7_h¿P¾É¹°½éÉ+(MÇB¡ŸM},Œ¥V¼o´ÇB­ÿóüÓ4ßV{êmu,äùŠãly,ÆXìc^>-Íþ®MŸ…Åc!oýlêcqŒƒÊ797ÏÙ¾ñ²#ï¯3K Ò—­ºã—[?~þùoØ>³z®· @€ÀPþQ>”Šm @€$±ºg]D8èr Ä:ršBè#ÕÐJËdS"M"$_|^– åg«)¥…õ;Ÿ 1ñ¦=M%JM¯Š-FT~ XS?†ù /²÷§ »šÊ7Õ[È”—*¼,d‹í¯ Yš 䯚wæ5Öé qÖ·ÛË·5§PWÓöFá¡Ú(ÔÜäÀöV}qûUzÒŸl5Îæ6*Hí¬ªàÜTy!c“Ea{ž¿i_±‚B¾¦<…í»”/ì+$›Ð›¶Û«U6ôÞbLnp‘âñЪŸC×Ú_SKç…†íKsgvéjÿ[T;[ÕÝ\b˜W…Á4åjý"?†|4Yrì²½P¾l=ç…>æÕ6•)¶3(_cW«íƒ*kêg¡‘Brg•[š«nµ½Ñ“Z¢Õœ5l ªæÂñªUùÖÛW1Pù@*¯x _+‹<Çpûê5´êËpå Í烬WåyW¬¾)NXh¤Ã¼‘nÞ§eqbEãui¤›¶Çéõ"q‚M#o‹ó(¯#ÙH7o/”)ô±V¾ØÏB;Qk£®8w£‘Ž&é¼|«}Å~6õ¥ÐÆ.íë.Œ³©|aŒµòMãès±ŸqNT£ÏEãZÿ ãl=gå‹s™—7`£îü¬–Ú¶Ú¯§VõÖŠÊ7ÍyaœMÛùµtÞY>>žTîH×|p²!Ú»{ R @€ÀLHŸ©3§ß @€&I Voˆ/B·›‹ïø_^6}áØôEä@žøª³‘?¯§é‹È®Å/¥ã+ÑF™â—¢µ~4}ù[ø"µ°½Øfñ‹ç¼|t¿Qw±Íæ/ í7}Y[[‘8P¾©Í¾´c­ÿ§èJ£®¤7Ò‘¥‘.×ú_(Ùù"ÀÜH·ÇX+_OS?‹Û‹õÚËËûôB›ã/~y^ìˈÛ/´¹Kùãln³Ð—ÂXjý/8— ûŠNMÇBdÊËÕiVpnÑϦò…æû#[#_œHÒH7o/ô±¿V¾Ðf\¢Q>†ÕH7õ±Ð¯¼|ð5ò…fÿöc(üµö õÅ)ûâêtVÌS°¨•/Œ'N²i”‰+W4ÒÅ6«Qq^®þˆf¯‹ÎqåŠÂö<¥Aå«åâ¾2ÕR¹Q¾Z¨kpùlåK…òÕò@½ùšöò• íWŠÛ•/ö­Rè9V7ª–¶ÛË÷W+ùÊ…|•Bù¦í•ºòòåBŠ})ê­ë­Týª•ïèh¼®êî(lïë(ÓÑ9ÎË÷õu6Êwôõ5Ò•B¾ŽBž¾Îÿ>—†Îµ<Ì,sß¿î®8 怙Õk½%@€†HNÇ> @€’lû–Uû¶7XUãX @€ @€˜3× œ3C6P @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€( ¬Y³¦ãŸß{ÁéÅmÃ¥Ïû×µ/Xó¾u.} @€ @€f²€@úLž=}'@€ Ф÷%Iúç¾íuç·ö­ª\³æƒ‹Î}ßÚuI9]½æœU¿m•Ïv @€ @€3]@ }¦Ï þ @€hƒ@Ú[}š¤OÍæ¥7Ÿ÷¾µ+Wyî¿^pÂÂE{ü0MÓ}}½ÿ0x¿× @€ @€f“€@úlšMc!@€ 0F·¾õÕ÷W«ÙÒ$Y”¤éº|uzš% òêJ þ{Z.]—¤É#ãåoÿ›×þpŒÍ(F€ @€ @`F¤ÏˆiÒI @€/°ýÿ-Z¹/o)V§Ÿ¿ö®¥ÓtyþÌjô~¿  @€ @€f·€@úìž_£#@€ 0b5kþê¾|Uú0¬FÇ. @€ @€Ù# >{æÒH @€Œ[ ¿WzTR[•>¨2«ÑxI€ @€ 0{ÒgïÜ @€Q Ôï•>DA«Ñ‡@±‰ @€ @`v ¤ÏÎy5* @€cbUºÕècÖT @€ @`& ¤ÏÄYÓg @€(0Īt«Ñ'Ð[Õ @€ @€ÓO@ }ú͉ @€˜rªt«Ñ§|6t€ @€ @`²Ò'[\{ @€f€@aUºÕè3`¾t‘ @€ @ ½i{«Sö dñhoj#@€©À¶mÛ“‡¶lMö{Ø>#-" @ ­i<ÚU¡ï™Ú%©ž¡ÎÿK“­[·%ñª—&{ì±p¨,¶Ív~.å$>›fÉaÓR Ýï×i9HšPŽ ­]å @€ÌX æ'ù @€ @€¹&ж3:çœñNŽ@~6Þ=¿{(9ýõ—NNƒZ!°¾ciòG;8yÇû¯O¾|ãÏw“Ûîv |à¢ä?ß÷âX¹#9ñU·³ju˜#oÿdÝ»OKî¼÷Á䌿üÄ„´¡R-°bù±É+O96¹â3?HοäkÝœú§±À1O84yÿÛNJnýå½ÉÊ·]9{ªksEÀ19ò™f5r+9§—@÷^š¸ß^I;W’ÕW}>ï•ë’¾JuÊ\ÛÊ·_™Üú‹{§¬ÿòæ%ÉSþð‘É»þý‹Éç¾úÓ)ëÇë^ö´äŒ%˜\²á¦dÝåßš²~œøìÇ&oYýìä¿ãûž·Ç÷>£}<ÿJÉüÎ4ùìw+ÉŽ¾Ñ–Èÿ„#J>ü÷/J~}çýÉKßtÙÀŽIN-\Ø™\·öìZ«Þ3ýøõ÷L;?—òšóϦ©þ<è¡ßfÀK—þQòê?{j[ÿ1{tŒd4V¤FKÞ)¨T]á}Êð5<¤@5î:ฒfÂ6V Ÿì'ŒYÅmp̶SUS&P¿ÉNþì³wʦaZ4œÿÛ§þp,Ô%`!E€ @€ @€ @€t @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ^À$@€ @€ @€ @€éŽ @€ @€ @€ PH/`H @€ @€ @€ @€tÇ @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ŽBZ’ @€ @€¥ÀoîË’Îr–T«£,(; 0mÒ§íÔè @€ @€ÌïÝ&‚>æI  @€Àh\Ú}4Zò @€ @€ @€ @€À¬HŸõSl€ @€ @€ @€ 0ôÑhÉK€ @€ @€ @€³^@ }ÖO± @€ @€ @€ @€ÀhÒG£%/ @€ @€ @€ÌzôY?ÅH€ @€ @€ @€£H–¼ @€ @€ @€ 0ëÒgý  @€ @€ @€ŒF@ }4Zò @€ @€ @€ @€À¬HŸõSl€ @€ @€ @€ 0ôÑhÉK€ @€ @€ @€³^@ }ÖO± @€ @€ @€ @€ÀhÒG£%/ @€ @€ @€ÌzôY?ÅH€ @€ @€ @€£H–¼ @€ @€ @€ 0ëÒgý  @€ @€þÿöî¾âlüøÌJò‘Ã7„rh^$Üôý7-gHìBˆx m$6zr†p½æåBJ¡\vB¥¼ÄᨠÐ7=x)m–B)åniZŽ@.âKÚù?³²YZ9r¼²%姉vggçø®$"=3; € €ôG€@z´È‹ € € € € € € PòÒKþÓA@@@@@@@þHïy@@@@@@@J^€@zÉ_b:ˆ € € € € € € ÐéýÑ"/ € € € € € € €@É H/ùKL@@@@@@@ú#@ ½?ZäE@@@@@@(yé%‰é  € € € € € € €@¤÷G‹¼ € € € € € € €%/@ ½ä/1D@@@@@@èôþh‘@@@@@@@ äÂ%ßC:Xôe‘úw/ú~ÐÒ¨Qîud¿1Û+S]*š^l]]éµ5Ò|&ÍUÛ²:zû* œÿmÙ/„"ïý®;òz`_Ïü{¬È/æ›?f—­½†WFx- Ð’Óƒà5™»#V¹[‘³°ìïAùz|iÜn*îÝ·údߨo´Úq»‘ùêæ&ËU•øžýé=¶Um]›ÌŸ¯ ;ïøî´ËŽÕCúwßÖëâÖ£† i;’ÿ¯(i;ÊË6¾yÏ$^ýÉ÷L>Þ Cýy>Q&C)°ûÎ[ eõÔ]Bº„úBWJPÀÈ£»E—@@@@rÐòÈ![NYø)'&2!€À&‚ü\²UñÙ´ p#0 ß¯h §©·v/Ò G³@@@@@@@ò#@ =?®”Š € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € €Ó £5@IDAT € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € ©À5×ßõÅÙ³oÛjHAåø8>i$!€ € € € € €‘Àœ9û^3÷ίQ“i*žÀÇzÍkº¢¬eö¼Æ«njÚ E@JCh € € € € € €@ÿ®šsÛ§#‘ÈðÙ­mí?¿ÚÐÐÐÙ¿ÈÀÐ ÌžwÛžŽ‰ü^)1Êü¸Ýuç6\0ãáo-Ø’¤oÉWŸ¾#€ € € € €Àf \{í¶Ó‘Êýg]X¿b³ à$È«À5óîë˜ÐÚYçÕ½ž×І°ð«®¿cï²pør¥Õ©Ê¨ÚºÚÆ5ÌüîÊ!lU#0 Ùóî:ÊQÎcRHH^ÓŸ(enk‹Çnl¸ðì÷T0'#°™Ò7ŽÓ@@@@@ !0{îíÄMü£Ë.øÎ;¥nbè¡ŠÊ %pu´épÇ_|ñŒ5¥Þgú‡@1 D£ÑиÃYfŒú·Ìp½º”êvæ®Ve—K€ç[rmBlì2q÷«_0ã7Åx­h3©³oj:×Ñê¦dšÌNß ïá;Ú»:nh¸èÛò~æÀà HóœW ®¡4@@@(Ù76ŽÓŽþÖªÂÞjX¹êžNW]{ù…õo[÷|è^\e®™un½ÊŠD@>›NqBú§)Í-Š€úœ9Ÿ2eê™ 6]>WËRÚŸØ4æù¶µÿî² ¾óŽßqÒ(ën¼óK:äü"ýN ©}“ctjev¹×ò~H•a;éA(RFÁxôaú"Y¦êìîzwÛLÃÌsë¯,˜†Ò@@@@ „®»©é‡2{òû©]”Y1Ù¿§Pê}Ðy­mͺƒÎ]Ú/¶@ 8¢ÑhèÀÃŽ}\n}¤O‹ " ~ͼ;w ™ÐLiߌÄ=|ZšHŠ»Ê=nÖ¹g>™5¶Üå¼üòËzÿý÷×+Wî¤Gþ—^µjkÞ#æT¯®×_«G|2Lo¨\¯·6£œöö º¼¼\w”—é²öv‰ºNg$¢#aÝéÔ‘ˆ»q­ÂaÇ‘s¼<²oŸu\Þaå8²othšÖúÔM_ÐÄÀ=£»®uî9on:?9Ø´ôM‘£²нƷÌ<·îÙ’Az<@@@@ò- ?ê‡+ªwþ¹Ü-p|z]^@]ëE2Ln‘>ô?tç@—•Ìz7?ä’ ÎúKzØGâ¸öÚ[·q*+äVÑj÷,­’€ºÏV³4/‘,Ÿ£ÿg´ºG»Z‚Œ®óHäQ²¾©ÝO¤m$©k“½ƒF6lг{WÒí 2´ÀQŽ=æíÊ5{Äæ•t{<‘Ï;Ë;?™×ž¢¤|©Þ‘ÏI¯,/¯ÍjÏ— iò\)FÒdOâ©Òù߃ÝõJ2’õØT©Ï&K2 YÚå=ÛT›¯§¯Þ¹’Á–k{à•m·l½©yåx¢ 6Õ¶©;¯œëÕâ•/=•³l†ž¼¶\ÏUR¼ÆË1›×ª¤ìÛ2¼~z…{gKF[—mwZ^¯Lk")+ù,ɉ~zé‰síA¯\9èµ*‘YÒymª”oÿðHë“רMŸ¦” ð&'µ@à ·o_ _$ÿK:[þ6̧=¯¶­Ù #…¿·ÖçI € € € €y°j§²Ò®mº«_ÞÝCPÏ%€ÞÝn£âfÊÌ êòëi P\×\×Cýt–ß““´€úus›fHû¦M´'Ù.žÈÿŸÊè ç—è¼ %HÄè%üo”k‡ب~â?y¶#l^;šÁûKþ–)éÞÙɼöX"…±ñ\9nËLTe‡,xgyçÛ! ¶& þ§¤{C:íñš%y¤.¯9×KJ9´ÉÆÿ,Ág1N¼Ôò1e£6æiŸÒ´HåÊür;)¼ûˆlÈç±Û½Û“WB›ÝÁQ™0nç‹Û#6¯¤§bíyö|{¬»&y’ò¥¯^;¯]&\wW(õÛ­ìÛ$[«¸îº®L‰7Þ®7¹ZÊ”|‰Ó$£H$'ú%§WÒl/\[Žô5y®+™íq%w·ÞÙw$£²åJŠä¶ûFöÙöÒB!oÛ 9!¯Ov;îȾœè†ì9¶ëÚË~HöãòJÉyºKÒ¼ý¸ …Ã^º-3‰›X,b±˜‰ËvX¶e¢¹Û‰™XWÌDÊÊL—<Ëõp˺ºLgY¹)ë”çŠ £Ö®s+*†™öö³¡²Í kaÖß`âRqõÚµFnÓn¶Ùæ##·m7ü¤+·p7rçëfÿ”ôã»ßýQù§ö&ÿŽPöÕQ‰Ê¬]5wUì£]?s溾òr þHïy‡\ÀÐ+œÈ…2BïùŸ˜ß ôdíÿÉO¼ø¼úŸ%xF@@Ø"|ë3ÒíZ’é"vmÉô4»o×›LO·ëO¦§Ùõ(ÓÓì¾üø™‘¾¦ª*#Í®céw¾]Û2=Ý®u™žf×¾LO³ûÃÚ*3Ò+*Ê3Òìš™~çÛu4ÓÓíºšéi‰u6ÓS•ê,‹dæíìÈH³ësfž­”]³3=ݮᙞf×ôLO³ûvÏôô°OÞˆ¬šžÏ;_ÖMO·ë…¦§ÙõCÓÓì¾·¦hÚX8óüp–óíZ¤i§Ër¢™iv­Òô|vß[¿4í@Ü'¯üÏ÷Ö4í]€]µwн±¯]ûÔ÷q²®ð=’’(¿åÄ$@ó“®®øÕù ¨Û™Ÿ<›+?ÈO©šM@`Sq /ß4ó¼ú‹6•qsÛ€úØÃþº¤/“€úgú,ǨuªË2sfý+}æã E.pݼ¦ùò‹i}tcµ ™˜×¾vù3qJÚllÿ¸Ýì9|ô#€ž¨^F ˤ[“m±7+In§>kú–ºo·»×”é•lÇØõJèÞ‘léŽO^ù‡OF>[„Œ˜ËH· Ô¤×eᥧÙ}ß¼>ç˸;ßóíj8éåúõÕÏÄžgÇ%¦Ÿo+¦§eë¿ÿù™mõk“­Ã·.Ÿ6É%Éh“=ß®dŸSrzFš½›Ljžä¶\–Œt¿¾ÊÉŒ|¶ ¿¼þ&þõÛ›Ú$Û’|ö5ñé“Íïßþ̺ì¸Õdù©Ï~.þef:yõû¸æÚ'{¾Ÿ•ŸiüýÛŸi’h¿O¿|^SöBÛüŸ×½ÿQz>?›'×kíç”õü_Sö|¿ríð]{,õ!”3ÒìqIÍL÷³Êr¾Ÿ«üè–Q¦´(#ÍÖïß~Ÿó}L²µß¯.¿×”W¿4À>§>äýi~¯i{Ž__í§JjyÞ¶O=^zŽuù×ckʬË/¯4(³M¶ý>mõ˧uÀï3Ùfñ{]ù¾¦|Þg^~.~&>í´çû¾W}òú½Oíù~ïk¿¾úõÓ«ßÇß/¯foó§?r½~öB§Ÿëíû¼×¤¦Œ¼~ï3{¾Ÿ‹¯‰©=?÷ÿÿd¾Ï¼úý®µO_ý^Óö|Û‚ÄsÊß9¾¦ggžïW—ßu²çûçÍôÏúùãóÿÿ×_f™^ý>}õ»Ö~×Ùžo?¼ç”¿üúšì§hm+æ‘”ìl"€lòÿ;"`^>Vvíöʪ¿%ÿGºTþ3&Nù÷ß+òÏ—rÉK(ù¾´«|Î’Kk%ïò¹têøêÛãVÑò9åTT>Y>£l@}ÿ>ÚøZÛšu²¬é¹«ûÈÃ!ŠV`öMwåhçv¿Èûr­Ü¥þ‡ªSÖB¿xÆ¿<¤!„@ÆAJ)À1AjR € € Pòò»ZæC~ÏH—§3ÒüòÙÒd`‡OÞÌó¥ÈŒ|ö|à’‘.Ef¤É0 Œ4¯~Ÿó%)#¯ nÉHóê÷+7Gï|Ÿ¶JMuÉàœŒ´Dû3Ûê—Wºä{¾Ÿ‹__¥I¾çç|­ýœ¤¾u Ð߯¯2Þ(Kû3û%)yýúé]?W¿¼Ù^ÿtÎ¨ËŽŽ³e÷~d¶³ûø§¤¾êÞyý÷¤ÐA Xõ' .mZeÚ»œ5ëì·ý[M*“@CÃGTT•?+ÿo߯¯vÖçQmгç6M‘ÿµ^.Ÿ¡ŸóÍgÌcmkÿ9Q>Ó\ßã$"P¤×ÞpÇaN(¼BÞ§e½º wcõ~¤Ú»æÎšuÎÇ½Ž±ƒ@¤ç•"ƒ°_lÊGŽþ¦ücÁ®³W.¥Ë×¶¿H¾“y³ÿ‘ùEÐÿ‡€Ì|¶lœñ¥Ñï‡ ¿/·Þù~?0ø|¹µS¯’}éõì“׿ýþçË÷àŒrýÚï÷ƒmGÎ?døü¸àïÛþ̶úÕcÏ÷ó—K’Ñ'éffšW€¬É“öðý!Ã÷ÇûCZf¹RUf™v퟇_Þ¼øû´É6Ç¿ý™þv]$Ÿæ kf^ÿ23M¼ú}Î÷»ÖÙ_™åú™ê~øû·?³Ÿ‰öûÔ/ 8¥[É Li6O®×Ú®•^¦Ý·ëK¥§û•ég’8?³_~çûÕcÏ÷k—]ë*£MYüý\ìúVéçË32Òlÿ¼>&>ezçû¸ÊE2êòë§=ßÏÅ®áe¥>ìš^©ûÉíÜß?™}²ekF¹²tWFš]?,Ygê³],ußnûµ_nÍ™‘ÏæÍÕß®Ufó§?üÛŸY—4Ó÷|oMµ´Bí:hiI¾¯S›Ç®–ž7±vZïÔÄÚi½Óìžk×OK{صÔÒ’|ë±yìºl™y3Ï÷«Çžç×.WÖiË¥L›Ç¯­ÞšpiÄ}úd³Ø5áÒ²ú–i×–KÏç/Øéévý¹ô4¿~Ú<²]F^»^]úùv»ô4»ï›WÖºKÏë×O›Ç×ϧMvͼô2í~<ÏH·ëê¥çµë不Ùý°OÞX,³L»>Ÿßùv;ôt»†_zš]Ó/=ÍîÛuþÒÓc²æ_zZ$V–‘fóØõÓóÚõÓÓìú6íWÔ‡wÝ5£+ý8û €Å#pÁ 7 ß.¼Õo¥ÅŸí³ÕÆ|(ƒ+nø0öñcÆg²-Éßä®ô—É¿ôû˜¡nþضÆ=¢¡aƆä¹<#€@Q èëæ5.‘ß'gk½üCtPôd«ß']Ϲ±ñDR—ËçÔ}ŽÏ™ynÝÅ>é$!P” ×ÿxÇÊHùsÒøÑÉÈøÁõò+õ­íJßÐp^ÝGÉtžÈ·ô| S~`v˜;ö›òioiÓw@ݘ\ÝuЬsÏy3°P € € € €@¿æÌkZ,'D³ž$t¨yã*wÍ­ƒ@OoOnusÿÌsëOM?—}(ënºK~[v®òkqÐ3šyÝM_“ÙE—Ë@€q©qO¹ø¼3ÿ'5mŠQàÌ3ïŒì¹oè¼ü’×~£>‘) ·µ¹îõ Ìø°ûD›‹[€@zq_¿-²õÉ€ºüƒAÖ²R{gG0/~[}øP~ ËÞ6Ž € € € €¥- Ÿ‹e2Älß^H=½m› ¨KÐÿ‚YçÕÏM?}(|kç6ïhÕ*ŸKNjk‹!€žÚ^»}íÜ»&É4®€úAv_&Ÿmˆw™#.½èÌ?Ú}«€¬‹~‹¬‹þ¹j›ôá¶¶xìú† Ï~¿XûC»‹_€@zñ_Ã-¶6 >îÐc¾¡½[oùÔ厰‹/>¯þ?·X$:Ž € € € Àu7Þu¬rô£é+ùaüC ZÍm_ÛykC÷×AÓrª²€z\¹±cgžÖS9D&(«®¿cï²HøwÒ˜QÉc=Ùöä³|ÖNÏÚ+ä³öP‰¦¿ÓæÆdÖnR‡çb¸nnãi²鲦âí]s.úö¿‹­´·ô¤—Þ5Ýâzdê~ô©²¬½åû>鲎åųÎ?sNz:û € € € €Á ÌžwÛžZEþ 3%·J–.«UD¿±ÐèÉö&Ÿýê¶/¦½ëÀY³Î~;™g(\††¨¨*VkµŸme)ÐÓµgßtç1Žr®0Zuµ¯ùçÑòÙKÏÃ>…,píwì'3ÑO;懗ž;ã_…ÜVÚ¶e Hß²®wI÷6[@]f¥»Ê5Ç_|Á™—4C@@@†Xà‚n¾mhÔ32Ùás¶)6è¬]5·m]Ç-…<}Sl™uóǶ5î 36lê\Ž#€À èëæ5.‘=“K1€ž.;gîG*'´ÕÌsë—¤cB°ÿŸeH!_¡-·mÒ·Ük_²=ï¾åû)²†úåÉê²FÌÇ]]îA—_4ã’í8C@@@†X@ÖE@~9¹Tè霽êêw¬:5=û P8sæ6^jý}™luêøêÛn¼ðÂO §u´@ Ð¤ú¢}›-`êc?úërK›Ë¥OË?–þüa|õaüci³I9@@@È*pݼ¦‹´13%ˆ>÷£®n¹~æÌuY3ùd@] ||ñyõ?+òîÐ|JRàê›î80¤B_–úü&\’—˜N!€y žwb*jùbã”W>¥; þâÌsëNê6Q? € € € PJ³ol§´9êãØÇ·•rÝçšÙßWeì(@ÞŸxQh PLÒ‹éjÑÖ Ø€zEÕ§¾îºñ —\0ã‘ÆÉ € € € €©¬R5ØF@@@@@@@@@@@@@@@@@àÖî%zaé € € € €(U­;Óhu¨2æíÖæùÿ …"ÀkÓçJD£¡Iºú,G©ƒ]£ÿ´´¹ñ&Ÿ\$!P0ÇGÏØÑ¸¡:Ý®¿?õð¢UÓ0‚„)…B@@@@@¸E+]fŒº'¨æsâ·¶/wÊmX¹\A•K9[œ@à¯Íl‚Åòš­io/ו£nµý`úÍÙú³¥¥çcÐE>ÊŠë2ý7n\dç1\j”þžÌVÝJ…”Š„ÊUíÉuÿP®¹½eÉüÙCaA ¼ôàM)@@ fä¢D@@¨­­ÛÁ„õ;Z«òȈŸ“æX`M¤9ôà5Û‹£Xwò1è"e…ï öcüøñá‘Ûïý´tô Ì[>ë]Œ£¾8Ô‰ùA]<@@|;ÓÀQZfèÓ䥻~ ¨@@þ ˜2u¨ ¢Û3Rc¨6²ÿ¥pƒ'Àkv𬩩ôFn;æL  ”è©ù•kÔdë:Ø(sª1æãªe¥¯@Ør˜‘¾å\kzŠ € € € €X‹ý²*^¥´ÞF…þüé–ëX$§#W^³yå¥ð-K@V Ñ ¶Ë4qå›/õÜsÏuuü^žïïÞæ JD€é%r!é € € € €ä_`Å# W›ö÷v5&þ­ÍÇå¿Fj@``¼fæÇÙ$&L®ÛYk½Ý—Yéw¤Ñ“YxF`Fz‰]Pºƒ € € € €ùhmmÝ 5ü&¿µP:Á ðš Î’’¶\G¹{+òbqór)JLœ2õpG‡¾íõ­ý“ï·¶Þÿa)ö“>!«ô\¥È‡ € PÔ|,êËGã@@@Rí•l€2ï%·KéY«Ð§eÖý©¶O‘Ð¥Ô7ú‚Àæpk÷ÍQã@@¢H~´_;#¡HÑu€#€ € € €À håŽHVQNgr»¤ž]ÞÓŸPyGÏ6l¡ÌHßB/<ÝF@¶8¾ nq—œ#€ €Ù&N\+³¦uqÓµÉôÔçIѺ)ÚèC$Ï¿[–4ÍM;>zÆçB&üuàùY£T…´áe[ÿ›jõO_nnÞd`füøñááÛîuXÈQG£>#åî ´^­\õ–qÔß>nß÷tË‚uÉúüžýÚvX4Z¹­®®—r•öì m[9î¯Ö¾ r›Ð¾aÇ ×qmZšÿ´_™~iãÇŸQ1r;ç@{,Öÿ×òGîyÃ/ß&Òô¤“¦OÒZ!fûˆY•´íW©·”1ϯ_õú+V¬ˆm¢ ïð1_›ºKy$t’v”´I–Ä庿d\õÂÒç/“})6÷GÐåõU³÷ºQNµÍë<úÐüwúÊŸ~l(_³ém9ºvÚèòˆ'¯Û±ŽÒ»eþ)×õ­¸«~®ºÞû(=.û›s-jjNÙVUTÚ÷r]µrÙƒ Þì«®qãÆEFïù…Clž¸ ­´¹ñ}åçXI $îë^’]ëî”qË•|HÚ‡YÓI ½›…§-W€@ú–{íé9 € °e ðep˺Þô@ð°AèÛîu©£Õ%,.“?òðþ’˜^HUmêª9¹þòÖÅ×ˉíù?$¨»PÎ%9–øçPJÂ÷É_e¤½Çæ96:}ë2íÜ%çÔ]e²úcìñ=Mõ»4íŒG\ð»Ÿþ£ªÊÂ?ôúî °´9%—Thw·®pj¦L¿¬uÉü¦”£½6ÓÛV3¹îP9{‰œ¿³m¸÷ðÊv¦ÜVí«•!•o+ù5YòÖúPÓo{˜egäv¡¨ÖÎ"{8\æØ[÷+nÛ%—åVéè¸d^³ä¯D3µ¹ý^ïÕF÷ZÔÒÜtQ2ϳS­›%§]é]è” Ò¾¨Ô¡j£õÿ™ºe7½’r8ÛfÐåe«ÇK¯‰Nÿ–´s¡ì8ƘC]ŸÙç >‡â5›ÞŒÚÉÓPa½@^kû¤“×—÷ÚÕZ††„wx6õXÛ›}-ÖUÄÚ«tè^©c÷PȬ<*ÝïÉææ5ÙêÜi±3Åñ*{ Vã”kËV;%Ìhì²nÕKs0“­›>)¶òÜ#Y¾¼÷<.:­,¹Ÿ|þ‡Z÷V_ƒ¢òeD¹¶ŽÑ{{oHéP¨:¼Ûñ'Ög TÚ`ºÞ_ñÈÂÕÉ>û=oÎà–Ôrú;à+—Ap©å³@®Òs•" € €@¿‚ši0ÐY7ý28Ðúû Ç  € € È»=ª¶Ûë þx3¤e6sÜhõŠÇÉLç=»+ŒH°õ: º~Éݰá´eË~úq ‘ò¾*ÁÃERöÎR¯Ì:ׯJ â= î&3Ö÷´Á]iÃþaÇùù„èÔ}–7ßýAj½µ'Õ}^f›?)çmדn” ø½,}x_žwÖFí#eVKHråèÆIÑúUK›îÉŸe£fòÔ±Ü|\ÚVeÛf´~IL6H»ö•vm#ízʹï;NèÛ¶íèiò”S ]rŸaÏ‘€ÓG±õÿ|Ènçú˜pÂéc¤]Jþ­ì92ªá÷ò×_¥0i’Þ[ú½´y+Û_9vp¶rkkëvPú'rü(/OOÝ¥œ”шÛöòçp'¬_@ÛA6/üó`•—­ždúÄ)Ó¿.}¼[öéû7Ä:zªõþUÉãùzèk6µ]ÞLî=h×åÅ^?]™‰þªøÛ×ï^r…GÛ +Û2¨#·Ç@¯íŠææõ“¦L¯“j.FWªj;€f†_í“N:coí˜Kå5-—A=ÑÚÜ4ß/ß`¤5 ¨¿m4¥þò"¼UÞ—£6ž+¯NGk]”¬ Vsr]kGWGÝßk¯kNšhý¢ú#ù¼‘@µ<œîÓ¤ÈĦ£ä³ûÝÚ)cn1ïßÚÚÚº¡;G¿ž6w@Š}/HSn”Ê>/dØMâá8ΓQt94ÆTíý²R¯'ó¥>çËp åNœ<í+¡cïœòyûÿ£d›#:ü²òYo„Šœ)y“ùÒž7{pKj9rí{ FÛÔ€/9×~¾ð@ pé“R  € €@P3 :ëf _Z?¯@@BÐÿÏÆzäÑ,1ô©öžOd$x½]È„&8ZK0G‚ÑZO öˆäý²wÆþ’²&HÀð4 ­—€ï÷%@oj€^‚4{ȱ…ˆ’öéQaºZªëÌëŒÇÖ–…"É ú“ñx|οßþÓ/Ÿ{î¹®dÓ¼Û¨o¾DÝìS޼~$3nƒ…É<>Ïe*ºGXªäX³êPßmmi|¯;ŸS;¥î„µÆ~mgúI;'}9XúÿYSSóƒ¤O™^RMô´]eã+vGBm÷._¾¼_·G"÷J»¶³åzM\Ú¼à¯à9µÑéÇíÌPÆýùÆä[ûG£6ÎõKùói›*ý»Ö©5ç¯X¼Ñ$q»îþKú6K¸åa±Á¡ÃåOÆmÞƒ.϶©¯GÍIÓäîν’ƿСÌQO=¼h³n}ÞW=éÇ‚xͦ–¹óžc–>L´i„þ»«ÜsVw˜_¥.A`gÓŽtGM•k0Kòî”z¾ßvP×bé’ùOÉ](î’:ì{®þøÉÓï“Ûæÿ*½Ní„ï×±,Å`dÙ„N¹+ÄÐ<†b@Б'ž¶ÍðpÙr]¦xséº|¦½'ÙÁ-ë¤Mãäºî-¯¹|º¦<\þ’,%Pߺ¤ég})Ùr‡EÊï”÷ùI=åÕ!Ÿ‡2E¿'ïý1²mÉÝCô§äÏÕ±XÕ}•™íؤ&u­ìŒô-¯‘Þ|qmdÑ81ϽƬíÝÃÄÞ@·ø•iÓrð•í\Ò¨ô r> € €@@3 ‚˜u3/ƒAÔßà € €ÀÐ õ¾qÝ­ηò^îà‹ä¶¿ ‡M‹M$ðªÿßÄè´ÚeÍ ZzeîïŽÌv–[?wã§ú­Á,3[ß’œ_«ŠDÞ–@•Qþ-©â,ù#q¤Äã±G¾mgzÊBη¶.YðD2=õ¹û¶¶WH¾OISmài¸ùUÉ“½ýŽ:QòVHPìnYë}Zjy²íÊÚÈm°ñ` zU™Ší%ð¥ì-±³>Œ)?M¢ù’]֓޹Yo3ïWÀ„ ߨ’œ·µôyÖÒ%At{šÛÒ<ÿ1y¶|{¨êï'®¥]ÛL[º¤éîôŒv0‚ü¹LfðW;Z}GZ|ˆV§Èíý§ç º¼ôòS÷íkO&†Þ/ia Þ>ß°áèǺCBj=¾Û¼f“åŠåɲíÑå%ý¸Ì¨ÿ† ȘQß=àã¹]ùOBÇor°@×â£öø…[—‡&Èkn×pX5N˜0áó©?jOž~º|Ø÷’Üu\_´tñ¢¿'û7øÏƒ> È‘@ícò>꾓‡ú¿XLM}ôᦿ¥ö]n‹_-3ú¯’Ï“ïŠãvò™ò°Ì ><ë2Ñhh¸*³`¼à­ä—»ƒ¸g®|óOϦ’ãvvóxñŸ)¯Ÿ²åËïó শ%}{ Rän7Æ";Ý] m%ŸÓeàÀ¹¶Ðt´£ÜwÓëkmYØëŽ"rÉèwPƒ[|êËiÀ—Ïy$!ˆô@)@@À 9Ó ˆY7ù2Dý¼*@@ G@f#/÷ ¢§¶ðчÿV3ùŒoépäw6ÝQ¡9*]¦š›ã©ùú³-õ>µîƒ×ëk}_»Ö¬¬Ñ½PÊ•À¯ª”ÙÜ»´6÷ÖI »6—zã&6/¬#Sm^™a?¦¯sl]‚%Xùæ v6nŸ™Éý@•©ž'µ‘²^·-¿Ï@ºLM=Í(²ß.{xþK}žv04¼r¬$9^²«ÿ”v8§];3RnYwæf¿ zjAº}ý•ªbøÙÒ¿,Õmƒ†½éA——ZwúvÍ”i´rš%=b¯ÏÚ®ØÑ+–ý´ÏõˆÓËÈ~P¯Y»Ä–Ì(ž'APûBø—ioòS›¸-·ã¬“; ï³ùA_ ;3¾6ZW'í”A*zŸðˆ/—x¯ššS¶•ÛÏÏõº`Ì/–67ÝÙgãò}pÉ]ÎI ¢ß¼öƒ×.ðû,ë^[þ{2 åå{Ÿb+MXýX8’?ww¨ÑÕßëDÝÈgIû{ge¹Ë…+ƒ~!eü"±Fyÿ€ƒÒ=¨b¥Ô¼R*½í½žeGníþFkó‚·6Õ¢|Y®7xá¹çìíøß—;‘¬µ¯wy´õgÍû ·xµ'ÿê߀¯äY<#˜@â#GA € €[ª€7Ó ûv}‰™c—5Ï_–z»>kcg´.i¼%¾á“ýû²²³näG·VãÆmYÜx´ÌÞy2mdº²³nävqWH¾{¼²ì¬åͺñvm~»6ÛÒ¾&kF&G®{_íÂô?©·½ ¢þ¾úÇ1@@Âh}h¡Ü®Øxk‹K0ø3µªú«j©QïúžÒË4Æí™áiLdïôã¹î¬>y}c^g¯Ûþ[FÅ/Hÿw¶_Nûoe LÿÔ“@ëx{Kz¿|6m┩‡‹×W{«ô~=ܸ¶A«Ä#lKnöë¹\/± *{Ngg×E›:·µõþåïØ|FéÏf亼Œ  Ošv”Dè’àe™8ÿÎ ¢Ë@‹,Ùó“Ðkvër-3wõèD#MC–@iÿû‡kÑÒÜôsù^Ùd#Aà‹Žžñ9o»r¸Ñõ6rìÙ—`»¼<†ð!¯‰œ™xÌÞÙÂ{t %÷s}>ºvÚhY:á/¿Ü¡¥¹ñÜM}–-mn”Ûø»v­m‹[3eº½»F¯Ç±“¿)·î×WÙD¹%ük+ß|~z.¯ôßzê³c¤„zH9*uY ŸSOÊ—a¾ÊMÈ­ôíCkÔ’ÛÃn‘÷Mÿ.Éš¶ôîKY+êðõƦ|e-„ @€@úð8@@ !°q¦ìÛ™m2ÓÀçv}©^‰™©)™ÛvÖM¶[W¦æ¶³n’ûÙgÝôÿË`°õ'[È3 € €@Üžl£1ú3Éíü>ë&Ë—ÀÁˆävŸŸinn“HŸ7ˆTYÉuÕ³ó¿­Íwÿ2ÛÁôt×$‚â0‘¢õéÇ“û!N“õ¤»Vw>LÏõÙÞ@¾WØÙ‘dqþ{âIõÇæzn2ŸØYíRŒyÅ’M¦oâ9‘O›]Òó]^zùvÒ”ºñ!'Ô’©gÛÔšcìÝ üòFÚ&^³Ý·ç—àï-jÍü Úœ¯k[×v¾¼bþ!íŒÈk¸I±ÇÈ+л³‚ÜÓ}–]†!¨>仜 U–;?7º7E܈MnƒÖšµsäšÿÛöQ;Îyé}-s*ÏÛX®š™Ë@žô26µ_R¤‘ù2ÌW¹ž«cÊí³|vvzû¹ü•‡Á-©Õæ:à+õ¶J€[»%I9 € €À,·™9šÚY7Û©Qݹ³Ìºñ¾ Êûóe0Èús,‹l € €C/ÐÞÿkEyâ§S£Õ>ƒÑ"™™Ù%Á¥~?ì ÖQeÎe}oÛÎåNL2ÛSUæRqÍßsɗ̳¬¹é9¹ÕóÒN R›Ó%ýJùÓë¶Í‡E£¶n».¶ºÝÿÄ÷Ú™¼ý÷;23ü‰Ù…Ôc2þ))ïŽÎ5Ës)S‚Ñûy•–6ß!S_µ¬U/qy¹¢RðÆm»/µÈzîR~â®YFg¯ƒ./¤&:íK2…w©¤WJò™Øú¶ãž\Þì ˆHÏ[(û›|Í™Ùィõ_²y=¾ØŸ7öî5Sêȵ¿DNßÃ~&Ø>=k#·t—’yWn•ïÝý£çXv@Š °o‘¢dé o@ʱ+i^@Ñý."_†ù*7ÑÁÄ$ù@ÌyFº´g¬÷ZéÿÀ¥=å“7càRt¿|¥Ë. >`B @@Ÿ¡± v¦Ak€3 r•µ_ÊåDZµòÅ­Jþd™uÓÿ/ƒÁÖŸkiäC@j'~v÷»²n»,QÖOu{ÒëŸxâôÏ:!}ŠûN¢ûʳa$·’)>KÀ¤Qª¸]ªÜMf}~Õ.Á”ZÁ¶ºúÙ¯öÒâ1ïVÙ©ÇsÝnY2¿YÖ­ÞJav}êRß‘Rï‘Õm’¾ÌUú§KÕê¿íøÎ%çØòUÅ»ÅüÞvÃÆîäšzÏ·»÷mޞƙW{6e#èòR˶ÛÒ´/ÈŠËÊÖpDÿ¨Ã=öéå÷­KÏWLû&|£Júµ«×?£z–-hò}-–=Øøxí”úÊQÓäQ)¯? ›iÒnyé×c ‚ä½â-q ソô·ç20å•îsœQn•½«Ç )exV4™'åÐÀ6 m@J¾ óU®§oT¹ý0”ë“óŒtiO —R_ýð•z.Û!@ =EÊ@@¶t<Í4ÈÆºY³n6ãË` õg+Œt@@B°A3;ƒQé2«±@&×}*Ö7I°uŠ·ºã¾ÚÎZþD=^vµzY™øKŽr—èìÈ|4;¾nÃOõÈÊ%p2ÜÑŽ 0ö ¤K£N·õ àŸ¼ÛK ²nõ]²žrk$Ty‰£Ì)Ò§m¼à¦ÒS¥¦Ô¨ê—Ì”ºï.]Ò´"µšÊPhž}£Zdöß8F¹®‡.‹ÑÛ¿djº+³Ó]-÷«7Žã:’ž<®]õûžóe#èòR˶Ûb‘ z™½ rU·©*7ö5WÔt5¬bŒí›÷p¼Û¥'÷ôœïkaçjóW¹AA²®ê’`zñ=2 èÈO³ï5o0Šëš¿ö·÷&{U‡åe-vlÕ ¤Ûrå£kG›.3ž`á•W`Ròi˜kc ½‡¶·v—Ox•ÛŒô|nI6‹g†J€@úPÉS/ € P"ùšiÎ3àY7ýü2xýé² € P°ã£ÑFØÁk ¨ ¡¡ößá~BÚ²“¡ËLôGåÖÚ‹ÖûÙŠ ÛSÛX­ïu»õÔcݶ·¿–»x3vu¢ ¢$×ñž=mg Àmëö5´.{þãýä_òôÝqãÆ7z±ÇJÉvÁIå±×è³RÖ²>¾uÉkã=Âþ¡*“ËÌ»¯/mžCòØæ<]^FŒúƒ €ø/ -¿}Â:ü˜ ÿtË‚¢ ¦GºôJJôTî¤/¯‹`ù¾ÇE§}Z‚èWÚÖÊ {Óÿjù*i_ËÇÓƒA-e³•9ïn¶µ²lÄûýmµ ¹+{†"h•ø,•BRË•Ñ,ý.·¯vÚ€”Ô¾æË0Èr7Ú&îæ'žÓ²o Æà–ìµsü HÏ¿15 € €¥-§™I´àfÝôïË`ðõ'Kä@@ Ð†©Q{õ´Q›!¤K9â„f; Q‚ç1eܳ[—ÌßìÛ¦÷ôm37b&vWX…§I »¢*>EйÝ¥uù7åI&«¶õ]±ûlZPçž{®KþØ5Ä—JðþUe¡ó%Ìy©:CZ9>æñæùÙúZ[ïÿP¼/^ÛK÷smCÐåùµ§õÁÊ-ë¿#ˆ·Irì6¡ŸMÇòåË;üòzZKKÓ{ÒŸÕÒŸQÒŸ}‚jo^¯E4Šhg¡´UÖ©W¯ÉŸ«eÌÊ=òg‚¬»=Uî|pwPýŒr2 ¨ËYûfDòš)ï³=ûÛ^'Ù]uGŸäf=K%´ÅãoEBr? yÈ{wã] ú[_þ’/Ã|•ÛCÚ}7?ùüÌé³'߃[zÚÅC$øÄ¢Ê©@@ ø¼™ÝÝr¦-Òꉄôïd3*_²åÎoæQ¹óâ×׽ߵCËâÆ£[š°tqcãÒæÏÈ×ð¾gÝØ/ƒö‘ã—A›5Ðúm<@@ŠB@fù”lh<>ôôÑ»0N‚yva;Möú¡ ¢Û&<úàÝÏÊdݽæh=Õ>'ætû,3—$g©wôɖݲxþåÊèï{K°¶Ì8‡÷ªÄÞæ^ìÿ‚’i¯c›³ty>m[Ùß.7ž¿ªûÐW"#v¾?¶ûÔ58IÚ  Ê÷¸ýíGž®E­©:_^/‡Zy}Ÿ)óEòbn±ûŽÖów\°{ÅñÈ€ '››×»^OóéþöØ„tâóJNtTן“çÛ÷®ØþÛÛ7ºßå&ËÉöl¤H»¿c§ HIüí¤<¥çË0_ånd0e‰mÓŒt;¸EÞ'ÞÝ‚¸´±l!PÒ ã:Ð @@ hìLù¢ºÚv È™é³ndÅÂúÖÅM—67=~ëÊÜðú÷e0øúsk%¹@@†VÀþ;P–оضB>¹mÏ m‹äßÙŽ:"ÙYÓûÑäöP>ËÍå½[·Ëw€ƒìÔIÑéKr_Û¦xÜ”Ùò±O6ÜãÍ畦 ~°»rñþà=ˬôZU}¦·=¿‚./K[Z—4]¡\åÙÊ”Ýkͨ»²d-üdc¼u±åu±ÃD]å ²¤Ñy¸¢S÷3ÚùoÛ>yßÏ— ú »íªŽsd`ÈZy}UkåÝâÝ&ÅcÀ‚º,È€•/H‡ûKÒz¬E²v­Í‹þž &cä_ñŽiµïaÑheê± ¶ j@Jž åõ˜¸SJÀ×&ᯌÉiFºwNž·ñz  *п¿ÖÆù € €%*üLƒàgÝôïË`ðõ—襧[ € €@‰ ì4æ€3%г§í–‰/³k‚ySÖŽ;VïãaÈáÏÀî£{hmWü'%k³Û¡°šªµ“”óê£Íÿ•MŒ‡ôå2IÀN«¥ÖgÚ?¹^‚xw§]5qâ©[¥ïïvÐåõU‹^}¶sæåqÔ´I'×Ïé+¡“kr½D¥½Y­!¥äNõÌ üZÈ d]ú…rׇr™=ÿž»aÃ…Iӥ͋þ)¯£™v?y‹÷ä±B~b@q•7ˆHú½kM´nj®ý=ækSw‘ÛÉyùåýùLúyr+¹ßÛ4yãVmcFŸ~<ˆýB’/Ã|•kíeíúÄMùêÊùZäapKÎu“< ôù®<×Mñ € €¥"‡™AϺéï—Á ë/•KM?@@¢ÐzûñãÇw¯Úëß‹‰'O—u¿ÕÞQù7®Ìl¼Ó?çভ³«¥Z™ezJ¶Ú9ñ[ÛÞsì/$B5Âæ‘ÙÚÞs¶üI÷nÝ®M³WÒß’g¯]$›?r»ÏujO®ûf÷ €¬Å…FTL•èfbÐ@W¼WÀλݰë^æ¬õ6ÎðáON:錽³&l wâäi_ñËty~uô¤57Ç?TkN‘ë÷›&?â_4)Z×ÜíÉWà­ÍMoÉ«ðÖD3õ.á;/«©9eÛlÍ>>zÆŽªrÇû²O¦}-jtõLyßÔÝÎï-[öÓä /I–»SÞL¿¶;År‹÷ uj3W|hû-w¸ê˜c¾5ÜnoêQ^¾FN°ëÌÇT\]”ž¿­«£g‹|žÍA€@ú S € Pêy™iü¬›þ} ¾þRÐ?@@  ìlÒ‘ÛíõJíÉÓOO]븦¦fØÄ)uÿQsrÝ‚ræKÀ¨B‚Gk]eÎ’I\xè1û_ ä­²-‘ÙògKzµdFí¤hý‰‘rp?Hf3àµZnw¾©`´—osÿŠï¶ãÒ¦í$ig|wuÆ:îÙÜâ’çÕL®;Xzz¯ x]®Ë÷jNªßÿh´{Ý^¥ì€k “óo²çH±ôá=ë0'Ë‘µäïc^€]Ú7Öq"Ï×N™~‘”èøñgTØ|^Y“§1iJýeè};r¼ÁÉ2RŸƒ./µìôígš›ÛÜ ŸÔJû½[8K÷z;Ð#=_¡ïÇ7´]-}ðÖÄ–×É‘ªbÄsvÄÄ“¦íiÛn_ŸÇŸ4í@yýžRá—å:KŸ‚º¼ÿœÌžþ/[§´siëâÆÅ>õË´z·^à ®Öºlèn·?ˆ‚ožÿ‘«\o6¾ô{§òêŠÇ&L®û”—d¯emtú\ù¬µkä3¯åÁ¦Óó?õð¢UÚ¸Wxé2è§,Tù‹I'N³·Ïúލ•÷iÖ ~ `@J¾ óU®ÇhÔ{öYÞ¯ŸµO^Ú&þ zpË&ªã0ƒ*@ }P¹© @(M|Ì4|ÖM?¿ ^i^zz… €•€ö’9¥ ]þnm´~]M´þoºb‡µ!GÿJèS½Îõ¬°´yþï ¥sË›ïþ ®Ìéè“¿}8—IPëýÚhÝóÒßÖèQÈÌ·ähDÅÍøä,`‰€l»ó˜Þ¨™Rÿ]ï´€ÿjyhþÓr{÷¿$‹•Ƶ<ñð½ï'÷7û9¬¾jϵ·”–ër³Ì9iŒªÞ ·—~Mú»²<\.YÇÎ6w„äÕþÉʶߠwݱ¯JoöììL}Ç™£Ãú™‘ÛEÖ‰ßÚŠHÅ{*ìüÆqÔUòúØQ*ÝæøÉÓw³õû<‚.ϧŠIvft,®Ž“ëù›2ú®š“¦çhÞXÊÐnÙ>´w¸ãäµéÍ®·×T®ì½¡Pè ñÿpô˜Ö†C¡ßËëw…Ð ‘IDAT®¤WI4û†[<àkaïPÒ‘…rÍËä5²¾£3~N¶ºk^ðª\‡«ìqyM?iJî·:ÏVææ¤ö€ e‹çß-ýþ•×o­¾ ©?Êûpz÷ݼ «¬/¿Ý¤“ꎗkù+¥óý2k_Óqe¶>¶¨µ·Ë{ÒûŒ•>}F‡CÏŠéÙAM6hnϳw/°Ás;hf¤õwv~œ­¼lé…0 %_†ù*WÞÉÿ÷‘G½î„a¯IÍ”éçw»§›5¸%½\öjéC}¨@( g>릟_¯¿D®3Ý@@ŠRÀ¨5fmLn¼>HPU8{KÐ qkpcÖI,öêµ¼ö¥e.x³Ðú¹¬yþ27î)?oæµ´}˜´ý‰ê"Ѭ­$PùK ºÔúPÓo;ÖtΕ¾®HôAMÞúcÜŸ$Ëvãª1¹=çÖÅM×I_k¥Œ'{Ê‘ëä „™±ò,“ˆM»¬|c§2c½Ù={o¬X±°½¥¹ñʸÇÉ9¯xuÉ"~aùkã­‹½µ¼Í}±xìPYãýÞ¥lÜ º¼%ûo-¨éÝx—:VÚý‘÷Zuœÿ‘€ãxÿÜ…™úó–+×}ðÚWŒëΑ@ùêžVÊ {»/ƒ0þ¤â]‡›ö÷zŽobc ×bävc.‘÷ÎX[¼.yâgw{²U»òÍ®—ëàͰÊ[¼{ïƒÁdÖ¾;VÞkó¼÷޽fZ79¡ÈßdPËÇ2鈿ï„ô2¹–‡z–ÊÜݵ®í 'ž¸÷“l–Jf‹Ë —/×üÈ–+ïÇrÇÑ vPS•µFÊ^­+G|`¹ØA3ö3NóÙ\o/ŸZo HÉ¡¼lóqmbë6Ü.ïÇÄò2ðH¼üCþüJP¼-×ä=í8?.+ }9Õ¸{{Àƒ[|Ê$ !Ï € €#ptí´Ñå¡äKð—z•(·¡”Û¿ïù‘DÖJ³·y“‚£›e†ˆ÷ãV¯“”š>ÑQºÕþPfÉä<™  ;¥¼}ì—iùÎýoYwíD™žqŒÖΕ‰"Ì?Œ«oh]ÒxK²È ¾Q9ìm{Ž—fÌ»òü–|AÜUŠßE¶e©{FËâù÷$Ï ²þd™<#€ € ®À¤“ëÏZoVÿdEsóz[sMô´]*§µ»»2zùóžkÔßÜ ï®X¾|yÇà¶n3j“Û¸×Ä«rÃf?ÇÕ»G¯4]îo–=<ÿ¥ôÒŽ?iê!Ž}Ú1ê~·XNÏ¿9ûrëõäßù'Ë?Öß•5åw“2üf†oNÑÞ9ÞºÙñÐ>d#ÿnß]+Wn[¯_íRñ?Ú™úý-Ø~/Е_”€ÝX£´ùú–Òñ7͆ö¿¦¯KÙA——KEžGbýÞáˆ9X‚³ûÈ7½™xüÙÖ‡î~~ ý*åk!LùlÒm2â`±Ñæ ò^å|v@67¯}ÿ+W¬XóÉÑ“ä•)³ñ%˜}Oë’¦3zdÙ˜8yÚWäN·Ê€”}¥~ùjÝû!ß×_“Ï ¥¬Ÿõ>Ò÷^Í”iò]>$¿¨ÏHΌɟvÐŒ¤ß/Ÿs7ËJ/-×~L<±n_'¬~#MßZÚÚ!¿%·tIÓŠôòúÚ·KMÈçÝÍ6œ¿gâî|}ÑûX¾ ƒ.·&:õËJ…ÉU–;Hô~Øßeä÷˜,]ܘuà”½¦r÷Êëõ3~¯Áë”ëÝ‹ÇoyôÁ»Ÿí]Cb/×ëêw.i)ñadá”… € °å ØÛãÜvÌÕòei†ÊG¥ ;Ó ÖU¯ºV½¬+wLŒPÏH·çv!¼Y¾d}.£,oÖù¦¥aG¦—W—/•zÇ'ò™+[75¤ž³9_ƒ¬?µ-l#€ € PìÇF§o]¦œ•l)—èùl ¬\Rì}¢ý¢@! :*­®T#¾¨LèY\!,ƒ‘žw7lxas¢¤ZÛ[º7#>¯Ð2ûÜÞEC¾›××w¹¯¬xdáÆ;¤žT¤Ûù2 ²\[V…ª>ÎÃetÃŽ<W~ÏyµsMÇò>ï6rMJypKJ7Ù,qé%~é € 0„ÁÍ4pÖÍf} °þ!¼T € €@ µÑéß‘5‘½»@ušøgëHZ…!€ €C&@ }Èè©@@@@(^Y3÷%¹#Ôþ2kô·²®ùaÅÛZŽ €d d¬7‘™…@@@@@6 $Ö5ÖûÛãê[7a @( pitƒ^ € € € € € ’€£ç [—1æõ¾þÀ ÕK5 € €À  0#}Ш©@@@@(.£k§NoqíÉu—j¥ðÒµºiÅŠ±ô<ì#€ €Å.ÀŒôb¿‚´@@@@È“@e¹³´6Z_î*õ eÔ{Z  ëã¼êŒùõº^¿-OUS, € 0¤Ò‡”ŸÊ@@@@@˜pÂéc”ÖØÖÉ­M÷SÚnyÙ·c*v2³Ñ-@(EP)vŠ>!€ € € € € L`ï=>w"¡w¥”  ÛIY¥Í«23}AÇšŽ¯?ÖºèãÕÀÙ € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € PœÿèÊ`0ª{IEND®B`‚rocksdb-6.11.4/docs/static/images/bloom_fp_vs_bpk.png000066400000000000000000001453241370372246700226110ustar00rootroot00000000000000‰PNG  IHDRÝq´t ÈiCCPICC ProfileH‰•—TSنϽéEJèM^¥„@A:ØI ¡ÄšØÁQTD@ÑAÇÈXQlƒbTt‚ "ê8X°€Ê»À#̼·Þ{ëí¬³îw÷Ýgï}Î:g­?Pð,¡0V M! ó÷¢ÇÄÆÑq¿ÀÅ ¡¡Á±ÙçßíCØ]‹©\ÿþý¿š"‡+f…"œÀ³Ó>…Œ×l¡(ÔAįŸ•!œâ«+‹îŸâ¤â„iF£§c"¼VOf±DI ?=“„ä!û l%àð#ïÀÍcqFê‚ùii«¦XаIÂ_ò$ý-g‚,'‹•$㙵LÞ‡/¦²rþÏíøß––*™­a„ 2O†0À8Wà‰tB@ˆ+ð@,6€BP ¶‚ ìÀap œÍà,¸®€à6¸)/Á(ø& ÂAˆ ©C:!dÙ@N;ä CaP,%AHåA¡b¨ ª„öCuÐÏÐè"t ê†@Ðô‡Q0V†µ`#xì3à 8^'Áép.\o+àø(Ü_„oÀ÷a)üC ECé¢,PN(oT*•ˆ¡Ö¢ŠPå¨TªÕ‰º‹’¢^¡>£±h*šŽ¶@»¢Бh6:½]‚®DF7¡;ÐwÑèQô7 £‰1Ǹ`˜˜L& Sˆ)ÇÔbNc.cîc†0°X, kŒuÄ`c±ÉØÕØìl#¶ ÛÄŽáp8uœ9Î ‚cá2p…¸Ý¸£¸ ¸;¸!Ü'< ¯ƒ·Áûáãð|>¾?ŒŸ ( .„‡C(%$´n†DE¢1ÑAL&n Vˆ—‰ýÄw$IäLZBâ“Ö“*HÇIWI¤Ïd%²Ù›¼Œ,!o!"·‘ßQ(#Š'%Ž’AÙB©£\¢<¡|’£ÊYÊ1å8rëäªäšäîȽ–'ÈÊ3äWÈçÊ—ËŸ”¿%ÿJ `¤à­ÀRX«P¥pF¡WaL‘ªh­¢˜¦X¢xDñšâs%œ’‘’¯G©@é€Ò%¥A*ŠªOõ¦²©©©—©CÊXece¦r²r±ò1å.åQ%;•(•l•*•s*RŠfDcÒRi¥´´Ú¸ª–*C•«ºYµAõŽêGµyjžj\µ"µFµûjãêtu_õõmêÍê5ÐfK4²4öj\Öx5Oyžë<ö¼¢y'æ=Ô„5Í4Ã4Wkм©9¦¥­å¯%ÔÚ­uIë•6MÛS;Y{‡öyíªŽ»_g‡Ît:ƒžJ¯ wÐGu5ut%ºûu»t'ôŒõ"õòõõëõôõwè·ëè,2È3¨7xhH0t2äî2ì4ühdlm´É¨Ùè¹±š1Ó8׸޸߄bâa’nRcrÏkêdšbºÇô¶lfoÆ3«2»e›;˜óÍ÷˜wÏÇÌwž/˜_3¿×‚lÁ°È´¨·°¤Y[æ[6[¾^`° nÁ¶ ¾YÙ[¥Z´zd­dhoÝjýÖÆÌ†mSesÏ–bëg»Î¶Åö¹×n¯]Ÿ=Õ~‘ý&ûvû¯Ž"‡‡GÇxÇjÇ^'e§P§§«Îg/çuÎg?»8¸d¸œpùÓÕÂ5Åõˆëó…Æ ¹ .tÓsc¹íw“ºÓÝãÝt—zèz°þ>E>]¾J¾‘¾•¾Oüôü’üêýFýíýWû·`‚¶ô2µ˜lfs4Ð1pM`G9(<¨2èi°Y°(¸u¼(pÑöEý‹  7‡€fÈöǡơ顿,Á. ]RµäY˜uX^Xg85|eø‘ð^¥"M"%‘íQòQË¢ê¢>FûD—EKcĬ‰¹«Ëm‰ÃÅEÅÕÆ-õ]ºséÐ2ûe…Ëz–/Ï^~m…ÆŠÔçVʯd­<‰Ž?ÿ…ªa%0ªFÙÞì]ì—OÎÎ×[ÆNtK,K|žä–´=i„çÁ+ç½â{ó+ùo’’÷%L I9”2™Ú˜†O‹O;#P¤:Vi¯Ê^Õ-4 ¥é.é;ÓGEA¢Z1$^.nÉPFÄÑM‰‰ä;É@¦{fU槬¨¬“يقì›9f9›s†sýrZ^Í^Ýž§›·!o` cÍþµÐÚ„µíëô׬Zï¿þð↔ ¿æ[å—å¿ß½±µ@«`}ÁàwþßÕÊŠ {7¹nÚ÷=ú{þ÷]›m7ïÞü­ˆSt½Øª¸¼øK »äúÖ?Tü0¹%qKW©CéÞ­Ø­‚­=Û<¶.S,Ë-ܾh{ÓúŽ¢ïw®Üy­Ü®|ß.â.É.iEpEËnƒÝ[w©äUÞ¯òªj¬Ö¬Þ\ýqgϽž{öií+Þ7þ#ÿǾýþû›jŒjÊ`dxv0ê`çON?ÕÕjÔ×~=$8$=v¸£Î±®îˆæ‘Òz¸^R?rtÙÑÛÇ|޵4X4ìo¤5Ç%Ç_üÿsω í'N6œ2d¾ü]üû—¡‚g”gåÃ:ÃuÏmžŸñ¹ýbé‹¡——¯ ÿPü£úµÉëSzþys4ftèèÍäÛ’wêï½·{ß>:öäCÚ‡‰EŸÔ?þìô¹s9ùÑ8S¾öNNN4ON~­Eš}ˆè˜3Z~ÊŽà9î`eÜSôü«ýì› ¨æóTiTXtXML:com.adobe.xmp 793 477 Фɉ@IDATxìœTÕÙÿ;[f{¯,eé ]ŠT)VìåEcoI4‰æŸæ›þ&&¾¾éÆÄhÔMŒÆ®(*(*"Š4 "H_ØÞ{™ýŸß!v˜ÙÝ™ù>ÃÌÞ{î)ßsî½ÏsžçœÖfD@D@D@D@D@DÀG\>JGɈ€ˆ€ˆ€ˆ€ˆ€ˆ€% %CAD@D@D@D@DÀ§¤dø§’¡> " " " " "àSR2|ŠS‰‰€ˆ€ˆ€ˆ€ˆ€HÉPð))>Å©ÄD@D@D@D@D@¤d¨ˆ€ˆ€ˆ€ˆ€ˆ€ø”€” ŸâTb" " " " " R2ÔD@D@D@D@D@|J@J†Oq*1)ê" " " " " >% %ç8•˜ˆ€ˆ€ˆ€ˆ€ˆ€” õŸ’áSœJLD@D@D@D@D@J†ú€ˆ€ˆ€ˆ€ˆ€ˆ€O HÉð)N%&" " " " " %C}@D@D@D@D@DÀ§¤dø§’¡> " " " " "àSR2|ŠS‰‰€8ÚÚÚPUU…††{¨¹©ÕÕÕhjjr¢è»—hiiAmm-{YÉ:/ûËÌþÌ¡¾¾ÞÞO¬ckk«­s]]]0W¹××Íãñ€íÂþ§ "О€”ŒöLtDNˆÀßÿþwœ{î¹xýõ×A¡Í _üâñðÜã'òýöÛoã /ÄðáÃígÒ¤IøÒ—¾„5kÖØ¼_zé%Üyçøä“ON$›ãºvýúõxüñÇñùçŸã½÷ÞÃUW_…éÓ§ãÕW_í6BÀòåË-žRnû×cøõ¯­[·G\Ä6ùÑ~„SO=õÀçþûïGee%þýïãé§Ÿ>¤ïú£ '’&…ì÷ß7ß|3Øç§L™bïö7*²¼ï|ðAüð‡?´uòÎkïÞ½øío‹?ýéOÞ‡QQQßýîwî¥Q£FáÌ3ÏÄ}÷݇’’Øëî¾ûîC®ëŽ?vïÞgžy¼ßwìØÿøÇ˜ú(úõë‡!C†Ø¢óÅLA×—#®¦âââñ‹_ü'Ÿ|²U`þñXá>!!ÍÍÍàh'…²î ]^¸p!Ægëÿ·¿ýÍ~ï{ßÃØ±cÖ-Åa[,Z´È Ž£Gî–<ÏdöœÙh¨o@vNöá§Žùïüü|ìܹ}úôAÿþýárß8ÓaŸ¸á†l¿aA222œœ rúàƒ@Ž|o +W®´ {ZZšþrûäbÕ»«ðÜsÏY>T´y¯±~¿yZ=öøáì×Ì;ßüæ7a°ÿðÜÅ_l-<áááÝŠ„÷0Ûcû¶í¸è¢‹°ní:ÔÔÔØ{žJÐáõðWáø¬Ù¸q#/^b…혘eÕiºƒ Âõ×_¬¬¬Nãï ¶qii)6mÚ„œœœÏm¦G‹,ïò>ÿüómqqqHLLÄ€зo_¼òÊ+VÑ=Þüu#)ÁتªS pMÁ€£èÞ’’’)SQQ‘Ž–,Yb”„8+ì?=ô}‰M›6 ¯¾òªœ®¼òJœtÒI¸çž{¬ÒrÚi§’žÛ…ÌÌL+tòE9fÌ+îòBeƒåáh/• ÓO?W_}µM“åáK’#Ø´xü¼óγ/×gŸ}ûöíÃÇŒÈÈH\{íµøôÓOñÆo`âĉ …fðàÁ‡ÔïÍ7ß´£É|ùÒrÁt íèu×]góg9öìÙƒ9sæ(G’9j˸dÀ|)ì1_vÙevä–‚â¯|å+v›£»Tê˜Ë?þ|Ë”Â(?,kll¬Éûðà”×Qa]™Ö)§œ‚ììl›6-Sééé6]¶ÁâÅ‹ÁQL² ÐÃc]çñ””°žH‡¼Ï8ã +¤² ž|òI”••Ùòü×ý(ø¾øÒ‹Ø¶m¶oßnëÏã—_~¹í?Ny)Pr4›ý†ÊÚ¾ð«È¹Ýnp„~éÒ¥v´Ù‰ÏoŽÈÏ;ÏN•òŽŽÆÀAe455Õö)*#GŽ´uxíµ×l¿#·Þ8ªL,÷׿þu«p‘ËEÙÙ¶g{³ý¼™ÒBÃ6äuTØ¿: <ÏÁGˆæ}L w`£ÀýØcaݺuV1c{S)£%eÕªUxâ‰'¬õŠ÷ï¦K¦«W¯¶Â,Û‹ýý‹ecŸ¹é¦›¬ï-`´D2ØZúËCx÷ÝwÁ{–÷#b–ƒ}oĈ¶ßѲüxOÄÇÇ[E„ý€}‘ŠËÀú3Þ /¼`-5W\q,X`ïù_|ѦÉ~2aÂ|ùË_FTTyä['j|ç;ß±ýÆ»¬üM.¼žõæýþÏ{‰Z]™ëË¿/¸à[^2{ùå—ñá‡Ú{€×Q‘~ØX~©dÑ2LÅ‘Ö&¦Í{ý—õaZTˆY–“<ùüY¶l™íŒÃôØWØßTžØ?xOòþ!§oûÛ‡D£RG„÷0ï*|®³ßñ›ý呇ÁìÙ³ísé‹õ‡„0)!Üøª:PøæR.|Í¥E]‘8yú^z b²rŽŸÂµ#¬ñ›nBNàòïÿ{+LSHæK˜7… P}ô‘}Ñ­|g¥™9s¦}‘Q°àKòp!™£ltaà šn¨),p¤›n1Nà‹—.3f̰Ï¿þõ/+8P8⋚Â…Úââbüõ¯µ _¸Ì—‚þUW]e•ºÙP8 rÁëèšÅ¼(ð8£\)|s¤ý–/¹äìÚµËZ9hyáÈ Ó ÐGAŒÊ…‚yóæYÁ€B ,æÍrÑJ¯+VXa‚¬JÈ~ðËÛr§0òÓŸþÔ -, 'Ö¨ôQ¢ KŒiSàb ð²~ýÓ'vYF¼†‚*Ù9#§,7Ó£°DŠ-C9º;lØ0PÙp„c§žl—ëPË\ UdO!‹BØ­·ÞŠ©S§Ú¾A^l' ²yyyNRÇôÝÔրťá“Êgìu®°HŒMºg¥}ñ@:ŒóZé_ñQåSâŒIZ€³Ó¾| Îá?Ø.TÐÈ‚mïXÅ(“û#û‘Ÿue¿ºæšk¬Æ6ìLÉ`ߥå û† ¬ úÕ¯~ÕIÎ~öÙg6MæÉ~AáôÏ÷ÿßýÞw-7ö?«ÜÍ™‹¾€?þñV!æuˆy°¯°ßÒÕ‘>?ÿüóÖ Ê» Ù¯©äP 2ÖYgÙ2°_PѦ2Ã>tÛm·Ù¾E70 Á¬7ËÏ{€ý“éð><çœsì½öÔSOÙ¿YöMÞ'¡ÀM‹ï9ös2PYá}Ïû’ý‰Ê-óè(ðùD%ŒŠ ûÛ‚ŠØ¬Y³ì½C…ˆ×“ãðž ²Ãçïu$°¾¾ñoØgÊÏþsðþKeóÒK/µÏ;¦Å¾þýïÿ@‘Yg2糎߼'ùË_Z…ñðç• ¶±£¸ôÍ틯Ýú5p €÷ûaBbv˜Á>D@ö’¡žÒÚZšÑZ[Oí¡#•Añ4_ä¶£û@s„‹£oæ(8äææH’/@¾L)`sä“<_þtƒ¡ÀAŠ¿)0¦A‡;G÷ß;PÉ 0As>…SZ(:®—B_’¼) R¡¡àK%„ÂçLð%Ë‘F üTtø¡…„×ÒMƒî"üÍ<ËN¡¾Ñ,… êN  À/Gý¨lPXáH.óåHß¾ý¬¢Â8¬/y°Ü¼YŽèRˆ¤E÷–™<(¬Ì1– /¸Ð F|©³Î,wBB¢a”i…HæE!ŽB…!æËòuæêB! 1T"h¡ÀÃüYW*Hù!?úúoٲŖ•q©ð°y ËÃ|Y^ ,…9¶@²fS¢pÂ:ÓÏŠ¨ÇÓj…8 `ôØÈÆ;P ¦D“Š•NZ¿(DQØ¢ÀÈ6ò¬³wŸ¡H¸9’Î:>ðÀVa£€ÄôÉe  z¼JëÜä©CKëþÉÉaahj;Ô—ßÆi;,ŽçÐ8Þuáoº²ŽfÉCÞ¬/G¼Àû‚}Ê5Ù² xt—Š5•/¶'Y0=öUöQZJxŒB0-J, ûUÞ Œ¯7ŠËÓÐh”-Óç)óZö5žcaãßì+¬ ïÑ}{÷aÐàAŠWQ^aÛ–æÃ¾Ìû‰Ï–•ý“ƒ´8°¬T¨$³ÍÙר ±¿Pq`ÿee¹Ø‡©(ÓjÉû÷ëËòp. û$Óf½yœÏ"æÍû‰}Ï»O(¬ùÁû†ýò 3ØÁ>Ï~Äo–—.”ì§|~Ô×ÕÛ{—ÏÞ?,-7Þx£åHk¯£Â{Ê ËÁ>Ã>Ëç(ûÛ€lØo9xÃÁrf¿eÝ—œÈß;°ý¨4PÁà@ 9°ÝF°¯±®Tîø dý¨(ñÙDå×P)$ö5)Þ”õ;Ô *­„: Õ?4 pQ?,L×…eŽ®ó…Ke€‚_Ž¿ùÍoìK‘Âÿæ‹™#‰ŒKw*ä)hRáè2…‹Ãýѯ½îZ; Í—+¯çG¥ùBf`"8ºÊ‘o¾°)€0]ŽÜÒ%_ô 0ø¦‹ëÁ(¿)äp4ÕIƒJ…'^ËBŽüq¤’ÖÏùPc9(T°.,­,… À–œ—=ób¹x-ËÈ—|LlŒ-óf<–óÏþ•ŦðNÖdÇÀ¼ŸÎó`ëëC>mž6+r¢ðþðûwcS#òŒÃôx !@(ýßÿýŸ=GҤ¨ŒQa»Ò}ŽåbÝ)°:¼È™ ¹²Þ\t˜û…I*(ž)8’'˜SÀôìTj8ÊÏÀ6¡¥‹i“?û •LÇU‹(8ñ¸Óg½ÓëéßdG6쟬9:}ˆu"cö3ö²q¬mìC¼—: ´rÔžý÷ÍÃÆeç¥_: D3¶%9’)ó£òÏ~KÁ}›}ƒec¾TÖyß3.û²#˜RðfYxœù±^­FÙtêšjÛŽRå sÙt9ˆ@%€u㳃iðÞ`ÝYnæÉkøÍþɲðo–“߬û¿ùqXRi¥ÅŒ )¯eŸpX3Αî%ö]*3ì£Tš(xSÉa»±¼´˜ÐÊÇôøÜáv¶¯cÙY*n¼Ÿ8¿Œ–>/hisÊÇû…dÌúñxNN ÌhïA2!c~Èõfö¬?-H¬-]t颲ÁA>œÀ´¨ÀñÙé0¢å‘÷VZHyœåçóCADà )Yè—øœÀI#O²£Útg¢@HŽ òåÇIø²¢pÀ&… tiâK˜îD|¹ÓJA׺eð¥zx3®0¼Ž/9¾ ùâä Ïy)3?çr¾tù§@Ë—-… ”|!ó¥JÁ€/Z¦Éòñƒ#`8é:ÇìÉÿüçŽrÛô)1]ïÀ|ùB¦õÂËÅQFã9æå-ÄxÿÍãÞy3>ËJkËKåŠßô§ÓÕÀ4É›éÐç›Â?•)ÖÊ'HÓÍ„£šäƶb›²~,²¢`vË-·XáŠn&ÞÂ,Yózr¦pE®¬;1½Ã9wTvŽ sn-:Yýö·¾mG½)œR˜¦;IžQ€¼Ë@AÒ ÌŸý‹yR fùÉŠýƒé0P‘apÚÄþÑKþc¥`IKG§Ù> l #S Áv!cGd¿#+ö½Ž9ð¶?T ¨Sd #ž§ KÅå!SP¸¦•„éSÙf\ üŒKe”–+öu'8÷ÿ>¼_óËÂ6`û°MXo'8åàqÞOTl˜ëç<'ضνËëöÿax´¸—ôøMÅs³he¡²L«ÿ¦µ¡«÷ëÁ>Ç~È{„e¦ÅŒJ• *Btóã=CN|ñ~XVæÃo û´ ÒÊK«Ý9/ƒ\È‘åå=Êç_uuŠKŠ->çl/ôúyPIä=K%œJŸTÞï¸ã;àÃèlÇ>üò±µ„ñ>eYYNöö=r§ÕVADà )YèWȘ>©͈ÿa£\¡p™LDLû‰ÃÆwÙQcŽê97…úÏsŒ– @|©ÑÕ…s(¨P@cÜ‚èÓÌQE 0íHèãK®v. :8*È—8_ÂÌ“BGÎyŽþÐL“æ}º&¼a, Tjx-"¾àw¾T»LT¾¼Y_ @ŽÀ㔃õ[¿n½d˜?}§ùr>Þ4ëOA…£ŸTXŽÊs²¼#dQ@¤@’•™*e*gtm¢N Ö‚…¶Û‚œé*AåÐ;° )Q äÜ޼2–æDDîÌ:£Ý´0}2¥Ér²íŽÆ™‚%çÍpT—Ê*ËÆ:2Pˆ¥Òpøˆ­ÃÀF2ÿ±QØe_ ÄÌ‘sº×±.œ›Á@¡–uf?þøãN³ÑºÔPé(°Ÿ°ÿ“5Ý éRC¶´€0PÈ$' ·œçD—îUT8x/ñþ¢pÍs̃TVø›Âû±çÞà}I¡ß Ì—VLö=öyrás†Jû..: Gº•9È@a™÷=ŸMLƒuã}Àóäy*rñqfB¹y¾uø|á½ÍöásŒn…lƒ¼¼<+ÄÓâÃ<8'…ó?œÀþK%šóbè EåŸm·Ã¸J9J3Ó¢ •–•sØgYwZšŽv/1-Æ'7^sã 7Úç/ŸÞÏY¶wSs“Í‹lcº¿ñÞã½ËÀç.ÛŸuV8H@JÆAú‚£cÀ?_L4Ãsš/>~è®Bw)Ž ñ%ÇQiúó寗˜3Y™9(Žº:n‡—•4Ý;82GÁˆÂ Ý´hâ§ Ã)ó¤Ï1' ~ë[ß²‚G½91’£‹´’pe' °Lƒ®>ô›¦<#™¨%Ç; Ö™eæh&• Ü,Gž: fÞixÿ¦€@áúŸÿü§/á¸58£Ë+ Ý.X/Z$:œ9²Ï•¹( PYqF)9·æÞ?Þk-¼–Ê]8ÂËÀòQ¡àËùôû§Ê‘ØSgŸj…4Æczlƒ{ï½×úóÆ¡rI!´+ÁºÃüGQb»z–ƒBàÑûÉ<3—.ÕÊ6bÙie£àÄ6§ Ê´:cu´:‹ï§q½qE£ÂGA”#øìs¼¯èfÃ{Ši3û3ÞK.¹ØÖ‰mÝY ²Âû†÷ÝÑ8…,œ¾Èö§NWº¨QøçÊF¼‡Yžã}Æ6¢;ÏuvÿvVç8• –ƒ£óLË ,ËÉO¶ý’#ðœ@!—Jï)Æg`¼®æÅºP`Ÿc”ƒ~ýú+Ù¦á \°opïŸþô§à½ÕQ ZÉŽÊ?•4[^£0ppƒî}TÈ‹ Hð^pزò™Äû˜ý“J ËÄÕ¬œþÍø|~QYáž!¼–?­Tæ5ðú(³JI¶“÷½Åß|ðM+"ïÖ+Pñž¦’G¥œÏ¶½‚ˆÀAaft¢ë>¯Ó/NPHãh…ZçeÅBŽvQˆã‡ç9ñ±¼¢Ü D"œø´PH¥àÉ—)Géy›òÅͰw Û ãòEçGÙàµ<ÏsÌ“/RÆåH-3O¾$ŸqêLy*LyX aüf¾¼†×3=–ÍI×±NL‹.RÞ YD(DSÀsÊÇtȈå`ð.ëÊtòòo2dYȌױ‡—…|),3Ö‰×P áuMMÍF™©6¿£Íè«™,܉%ƒeaúdÆë˜ƒ3º[]]cÊyÀ‡ñX&–ÕaèŒh2*—l3^ïÄczN0Gs£M¹(ó8Óqêêý7¯óepX’Û˜ŸÓ¿8ŠL·1*»T¨Ø¾½1°­Ù^TTɘ ºsÏ8}õä1~³/Q`%cÖ•íËo'CcC£¨íc:ŒÇ6áoæÇïýýj¿‹aScââãlþŒË¼L:UU•¶\ì,¯ãýÃ|ø7ËÏ¿þÎß´|¹£Ýö˜SŽs%8ZŸhñd|ö=§ßñ7\öç^â9¦Çs¼œ¼¿Y†CÊÒbÊbFëyÏ0.•6öeòaZ ,3¯ã9~''%wjÉ`|¶ y9¼yŒl˜.Ó`ý©|x³=ü^w ¨ü° ,·Sw¦ïÔi±ŒÌϲüOŸpêêô›È1þç¤ÁþÃþÆ6eyÈ‹JW®bÿâêUä§ "°Ÿ€” õèA|yu÷K‰y2t”ï‘Î & O>ñ¤õ‘¦%ÀÛÍÃIÇWyu5=®üCßz'_ç:~sä—£ãŽbá}Îùí\×7'¿»¯+q¼Óô×ïÃËÁvãjUT¹Ê]ûz{8¼G*ï±Ä=R:ιý·RÇ÷°¯òb:tý¢Û3 u £ÀxGë›]×Ù±#¥ç¸‘9uôNƒÖº¬y+pÞçùÛ¹îhåíR<ó83Î\>­ûáåõþÛ› ³mî¿ÿ~üä'?±nWÞqõ[BÀÑmë¡NHõ?8ÚKÖY)Ï#;–²pDòìùgÛXø»£à«¼œ´–Gy;sÿqF{´:ú>ZúÎ5]‰×•8Nzþü>¼¥¥{ -Y±ògyŽ'íÃëp¤4Ž%î‘ÒqÎí´îxäÚWy1º9#ôNÞ‡û*?'Ý#¥G¢³þA«Xg÷|WÒvâðûHe8Ïàw\ãóãï2ÑbB Ýéä*åGèJ:` È’°M§‚‹@ï&ÀQ>º(„»ÌjQLíΰ,©ï(P€ã'ÔÛŒn,¤Ä£õ3ZÏ¥mÙ>tÑééÀûˆ÷SG Fo(cGeóõ1¶Yðs$+¨¯óUz"(¤dJK©œ" " " " " :öc«˜" " " " " ½€”ŒÞ×&*‘ˆ€ˆ€ˆ€ˆ€4)Ý|*¼ˆ€ˆ€ˆ€ˆ€ô>R2z_›¨D" " " " "Фdtó©ð" " " " "ÐûHÉè}m¢‰€ˆ€ˆ€ˆ€ˆ@@’Ðͧ‹€ˆ€ˆ€ˆ€ˆ@ï# %£÷µ‰J$" " " " M@JF@7Ÿ /" " " " ½€”ŒÞ×&*‘ˆ€ˆ€ˆ€ˆ€4)Ý|*¼ˆ€ˆ€ˆ€ˆ€ô>R2z_›¨D" " " " "Фdtó©ð" " " " "ÐûHÉ8J›´µµ%†N‹€ˆ€ˆ€ˆ€ˆ€x’áMÃë÷êÕ«qÍ5×`ðàÁ¸ú꫱sçNx<¯ûþéOÂìÙ³1eÊû¹ì²ËPZZÚ.žˆ€ˆ€ˆ€ˆ€ˆ@¨3#õª?¬µ‹ŠŠpÎ9çàÚk¯Å”ÉSðÀƒÀívã—¿ü%’’’‰}É%—`ôèј5k¢¢¢ƒñãÇÛ߇DÔ" " " " ""dÉè ¡W®\‰ððp\tÑE˜4ynºé&,Z´ÕÕÕ‡ÄniiÁºuë0cÆ ¤¦¦bذaR0!¤?D@D@D@D@B‘€”ŒZ}×®]èÓ§âããAƒ¡©© ‡¸LíØ±õõõøÊW¾‚óÏ?“'OÆóÏ?†††RÕ! R2:hg*T0\®ýxbcc†ºº:x{—ÕÖÖZ+ÆÿøG,^¼\pzè/¨ªªê UÐ Õ<¶Z&&$Zk„£PÐZÁß)))¦8räH<ðÀHLL´Z4Î=÷\|þùçÖ}ŠVïPRRÎ÷hmmõ>¬ß" " " " "Ðͼ¼<¿ä{¨ì—,/Ѭì,lß¾œsÁ°{÷n;ñ;..ÎZ4œmܸk׮ŕW^i'zGFF¢¹¹´|8V'.¿©¤P! ưuËVäö͵߃±~ªS{»wíFBb’““ÛŸÔ‘ #PXXh딕•tuS…:&@aZæû÷ïßq *Pݳg†TõRe:'ÀUS?ýôÓÎ#œà)œ>}:öíÛ‡'žxº@=øàƒ˜7ožUøÐ}ë­·ìjR øÍo~cçoL›6 ÿüç?1|øpð%Ü‘’ÁÉäücs…JWØR ®p—µà©ÍC£½g—Ú;4Ú›µ¤5žï2µyh´9VÕÞ¡ÑÖN-©d8^;Î1_~kNF4ÓÓÓñ?ÿó?V˜9c&>ûì3|ýë_·ó4òóóqûí·ëÆk^ºþúëqÇw`Ò¤IXøÂB|ë[ß²‹’Õ! ²dtÐÌÔä¹ßé§ŸŽššPéÈÌ̲£:4#.Y²ÙÙÙvtçÖ[oµqËÊʬrA+†F}:€ªC" " " " !C@JF'MÍù\º–f$o×'*Þþ©œcÁOß¾}‰×I²:," " " " AO@JÆš˜ËÖòÓ•à­ˆt%¾âˆ€ˆ€ˆ€ˆ€ˆ@°МŒ`mYÕKD@D@D@D@zˆ€”Œ¯lE@D@D@D@D X HÉÖ–U½D@D@D@D@D ‡HÉè!ðÊVD@D@D@D@‚•€”Œ`mYÕKD@D@D@D@zˆ€”Œ¯lE@D@D@D@D X HÉÖ–U½D@D@D@D@D ‡HÉè!ðÊVD@D@D@D@‚•€”Œ`mYÕKD@D@D@D@zˆ€”Œ¯lE@D@D@D@D X HÉÖ–U½D@D@D@D@D ‡HÉè!ðÊVD@D@D@D@‚•€”Œ`mYÕKD@D@D@D@zˆ€”Œ¯lE@D@D@D@D X HÉÖ–U½D@D@D@D@D ‡HÉè!ðÊVD@D@D@D@‚•€”Œ`mYÕKD@D@D@D@zˆ€”Œ¯lE@D@D@D@D X HÉÖ–U½D@D@D@D@D ‡HÉè!ðÊVD@D@D@D@‚•@D°V¬·×kOãgX_½›åŠÅ€˜I8%ñ|D›ß " " " " "Ȥdô@ë}R» ¯•üõ-Eðxš°0ì«[ƒ= qAÆwžÒ¥R–" " " " "àr—ò ÇcJeeÅ?PÛ”ÖÖ´µµ¢ÍÓ‚–ÖZìªYa¬ËÐè©?¦ôYD@D@D@D@z)ÝÜ{·¡¢q+ÚÐÚ.g§ ÛêÞES[C»s: " " " " B@JF7·Tmk%©£" " " " "¤dts#Å…'apÜép…Eã°ÐXîAÌŸ?@颗ÑR_wØYý)" " " " A@«Kus;…Æi©7%#[ª£©µÂ¸Nµ!1z0ròÓ]° ¥ ÿæ½{Ð÷²–aWŸêæb*;8nR2ŽÝñ_˜‘†y)×c’Ù£´9ß쇤ˆ ÄÍBCßmØû؃¨\½EèíW?`0ÂÂåBuüÄu¥ˆ€ˆ€ˆ€ˆ@w’Ñ´½òJŒH?ÙQyƪ—ù‡( nB:¢S3°û±P¿åcl»÷.ôYpÒ&N…+Êí•‚~Š€ˆ€ˆ€ˆ€ˆ@ï$ 9=Ü.fn†U0þSWdâ ÅÀ/ɳÏAKIö<òGìYø$Zjkz¸´Ê^D@D@D@D@ŽN@JÆÑu{Œ0— 1Ù¹èwé5Ⱦâf´µ¶¢äå§°í/¿GCqÃÑí…R†" " " " "ÐER2ºª'¢E¥¤"kö™pËw‘œ†êµ+ñù½¿@ågŸ ­Å쵡 " " " " ½€”Œ^Ø(ÞEŠˆCÊØ‰øµï#vè4lÛ„÷ÿÅï®@kc£wTý^A@JF¯h†#‰øC÷Åÿ‡”¹ç¢¥¼ùÿ¼{ž Í5UG¾XgE@D@D@D@D › HÉèfàÇ›3O£ïÅW#çʯJ?‹mýõ{5OãxÁê:ŸÐ¶>Gêß9O#sÖip›MúòÍ~5ëVáó²ô½òKH:®5©[@©‹€ˆ€ˆ€ˆ€€,G#Ô ÏÛyã&"ï«ßEì°1hܱ;ÿü+¯|ÝÌÓh8¤Äž¶V5íBuk9<柂ˆ€ˆ€ˆ€ˆ€ø›€†½ýMØOés?„AÃ0à¦ÿ‡‚WžEùŠ—ÍNáf¿üÈ=o"“ðVÅSø¨úe4´–!ÌìÆ‘=³’¯F÷ „‡©éýÔ4JVD@D@D@Bž€$Í›“‹Ü‹¯DtŸþ(|ö”-}ùØr^6G.CCK©™¯±ß‚±³¥UÍùX}'Ò"s¸æ*ºˆ€ˆ€ˆ€ˆ@o& w©ÞÜ:],›;% ™§žŽþ_¾)éÈßü6Ö–=gŒ’ “òxšPÖð 6Ö,Gƒ§¶‹©+šˆ€ˆ€ˆ€ˆ€)ÇÆ«×Æ>0Oã–ÿFÕ„~ðD™=4ÚÚÚ•·ÍÌÑØÝ°Þ(uíÎ逈€ˆ€ˆ€ˆ€ø‚€Ü¥|A±—¤açi Ž1—âÓúíhmëÌZfæh„õ’R«" " " " ÁF@–Œ kQÎÓžs*¢#S€°Ž‰\÷hD¹b‚¬æªŽˆ€ˆ€ˆ€ˆ@o! %£·´„Ëž€1‰!ÜÝ.Õ¸æþ=n)íØè€ˆ€ˆ€ˆ€ˆ€oÈ]Ê7{]*§$]€Øðdl®}eŸ£fO9Â6Ö"o[šf¼‹Ö3sàŠOèuåVD@D@D@D@Ÿ€”ŒÀoÃkoŒ“ÎÀ˜“Qï©A[JJ·¼ˆÖâ(_ôŒù.FÿËoDT²q«R’áC˜½-)ºDeDõÝ_,ã9•u~ cû äÕ§QùÎRìljDî¥× &;œË¡ " " " " ¾ ÉÒ$èŒ,äÌ¿}oøp¹cPõþ›Øvß/P¾a <ÍMR SD@D@D@D ·’ÑÛ[ÈÇå‹JJAú)³{ímˆLËBöO±ëï÷`ÏÂ'Ñ\]éãÜ”œˆ€ˆ€ˆ€ˆ@(»T¶zxt Ò&OCtf&ö¾ðoÔlX’—ŸBýŽ­è»à:ÄöíoܧÂCŒª," " " "à ²dø‚b¦îŽF‘põ-ȼä:„…‡£fý;Øvÿ¯PòÞ;hmlÀZ©È" " " " ½€”ŒÞÐ =TNöŽÉÉEÎç¡ß—o‡»ÿ4îÚŠüþ »Ÿ} Må@[NÙŠ€ˆ€ˆ€ˆ€,¹K¥éÚÚÚÌÆÙïœí})ãñã ÀUš"’:n¢Ó3Q¸d!*V¾†²×žGÃîmÈý¯ë?`µtx×W¿E@D@D@D@D 3R2:!³qãF<úè£X·n&L˜€o|ãÈÉÉéPáhmmÅoû[¤¤¤àÊ+¯D\\\'©öÞîÈH«LD\rbEáÂÇQûáûØQZ„Ìó.Cúäà\8¹Ku@¨¬¬ ßûÞ÷¬Òð¥/} ûöíý÷Þ‹šššv±·mÛ†ýèGöüæÍ›ÑÒÒÒ.NÀ0Z32fÌCÞ-ÿØ‘ãј¿û»žx¥%Æ}JþSÓž*¨ˆ€ˆ€ˆ€ô)€_½ú=⢋.Â\€/|á xúé§Q]]Ý.öÃ?Œ¦¦&Œ3QQQíÎâˆØX$þ×} éç^OSÊW¼Œmü•›?†§¥9«¥2‹€ˆ€ˆ€ˆ€t)€þüó­ÈÍÍEVV¢££­QYY‰ŠŠ x<žC®X°`n½õV <8 çcR¯?¸ÚT\nô™±Ù¼ï›ˆ0{jÔmZ‡¹…¯¿Š–ºZ¯Øú)" " " "  hNÆA~Õ××#11ñ€ÒŸ`çbÔÖÖÚÉÝ"š£F²ñ""":œ¯á7G%§"m÷ÔÈBá«Ï¡jÍ[(xæ¨Û¾}.üb2³a*ˆUS™E@D@D@D@üD@JF`9q»¹¹ù€BÑÔÜd'$ìW6¼/9–Õ¤ÊËËQZZÚÎâ^¯ýÝæBýøh‰2“Úß{ï,EmþNṪ¢Ü y¶!a_²ÛNBÏD"º×VEó ††P!/)1su‚žÝB¹‚^GsÓ‚¾ò!ZAÎ1ä»ð³Ï> Q¡Umzjð¹®ö­v÷gm¥dt@7ÓŒÚïÚµëÀ$îüü|;ß‚JƱ(‡'Må…€ }úÀ3|š&MAÑÂcmÓ:lÛ€ðãBÑŒòöxVbpÌY˜™t"R¶ª*øÑ pAöiZý‚ŸHÒÒÒ‚¿²ª¡%ÀyˆT*¹²¢BðhllDAAÚ;ø›ú@ ©Xr#…–xý…˜2e2¨X¼üòË8óÌ3ñ¯ý Ó¦MCLL¬ü½fÍLš4 ñññÇä"ʼná?9Ü(Zm©FÈHC~ÁíK2ö…l Oxvx^ÆÈÈ)ÈŒËFDXpL†?XCýrЂ*ß ÁOÀYøBíümíÔV Žl«Í"Áýnæbò£öîvö®ÝáóŒ½Ïùâ·&~w@±±¿ùæ›q÷Ýwƒ»¹WÆ-·Übn¼xìÝ»?þñ­B×'8Ž®lÜç\¨ßaÆSšÙ€p3ÃÕšÚÜRƒí kQïÑäð@mc•[D@D@D@N„@"â‰$×Ò¥‰ûcL:õ€©˜KÔFš ë¸êÔ]wÝe¿½ *%T4bÍò¯¡*[ŠŒvç{‚T·” ¥MK݆B_PE@D@D@DàpR2'òŸ¿3331oÞ„Çøõ*ˆ€ˆ€ˆ€ˆ€„)¡ÓÖ>­i´+g¤~ýâf7±ƒ“»ãÝýqjòuÈsõGç±û‘{Q¾aÙ5¼Ñ§ù+1ÞK s—Þ[f•¬—èãŒ3Ó¾‚„ùød÷ÈÍÈCŸ¸ÁÈÎì‹Æk§cß³ÿBý–°çŸ÷¡éü/ cúDÄš}6D@D@D@D@‚š€”Œ n^ÿW.+j2£ú¡ ‰; 1\Î4 ±'%#*1…‹_@ÅÊ×Pøì?аorÏ_î"® " " " "¼ä.¼mÛm5 ƒ Qf‡ïpPg ³ù†™µ¶ã BîEW kÁ hó´¢ü—±ýïDÍÎÏÑf&Ô+ˆ€ˆ€ˆ€ˆ€'Y2‚³]{M­¢3³‘5çl¸S3°ï™GP³î]ì,/EöEW!yôx„G¹{MYUß%Ã7•ÊDÆ' mÒ4ô»áëˆ: ;>ÞGïGÁÒ—Ñ\S}„+uJD@D@D@D  È’ˆ­€ev‹EòȱˆŒODá3Oã¥(Zø òÑ缈ÎÈ4žVû]­°z*²ˆ€ˆ€ˆ€ˆ€Y2¼`è§ ØyyƒÑÇÌÓȼøZ˜‰¨xk1v˜en«¶}fæit¾ƒ¸K¦ÔE@D@D@D@|I@– _ÒTZ]"“™ƒìyóáNË@Ás¢öÃ÷±«¢ Ù\”q“Ý¥tID@D@D@D w’Ñ;Û%èK™„´É3™”‚}Ï?†ºÏ>Dþ㢡0ßNLL zª ˆ€ˆ€ˆ€+)ÁÚ²P¯pw4’O‡È„D˜ý4*W-Cñ¢'÷ÏÓ8ÿrDgç˜iòè €¦TE@D@D@Dà’àÁ¡?º›ÀþyCì~™]cæiÀL _†›yŸm‚§¥¹»‹¤üD@D@D@D@N€,'P—û†@LV3OãD¥¤£à…¡öã°«ªYç^†–1¹ØM(iÞ‰XW2ÅœŒ¾Ñæ=6|C_©ˆ€ˆ€ˆ€ˆ€o HÉð-O¥v8#ý”™ˆLæ<¡~ËÇxsÕŸŸ–„*´xêá ‹Ä'Õ/a\⥘”xâÂO G]*" " " "àR2üAUi7ð褌ž`çil]ú8>ê³®ðf„7ÜC£©¥k*þ>îa;.óOAD@D@D@D ÷tfÚ¢­ÍLèBèj¼.$¥(G Ày ƒ†¡ùŒQˆƒðȃ †sYCK)¶7¬G}ksHß" " " " ½„@H[2Š‹‹ñè£båÊ•ÈÍÍÅ—¾ô%Œ9áFÈõ---xì±Ç°lÙ2{î²—aî¼¹p»Ýxæ™gìñÆÆF{INNn¿ýv$%i Vo†Çó»!¥®2 Ã=úŒbXÙ¼Ímû¹OúºFD@D@D@DÀ?BÖ’A«Äo~ólÙ²çœs\.þçþeeFª=,P‘X´hf̘áÇãÎÿ½›6mBkk+^xá³Ìj&OžŒéÓ§cüøñˆŠŠ:,ýy<"ÒÍŒÎõàX$!ÂüSè]BVB+((°Vˆ{ï½³fͲ Äܹs­ò’’‚ˆˆƒh~øaçÒK/µŠÅë¯¿Ž·ß~ƒÆš5kpçw"//HNNF´v¬öI/3k¢¢¸¥Ê¸´µ’f}a+Z—¯Cã¬éˆW¤»CéèA!kÉØ»w¯µZŒ3±±±6l233‘ŸŸo §Mè*E«ã%$$Ø8£F®]»ì‡–ûî»ßùÎwpóÍ7ã½÷ÞCs³övpøÈwBx*f$_‡Œ˜±sEHÊ–Žþ³òþ.ìýç(\ñZjªœ×ž%pp¸¾gËÑí¹×ÕÕÙ9tm¢»Cjj*jkk™N%ƒó-¨`Ð¥Šó-öíÛ‡ÒÒRŒ;Öº[¥§§ã¹çžÃ_þòë2•––fãê¿#04v¢Y¦6{?Eyó^Äš%k³"!õô(Tcªß …Ï=Іü]Èž1b2²`ôÄ2ÕÕ" " " " 'D d•Œ˜˜k±ð^1ŠÊD\\Ü¥ƒd###íß´N8q›ššo]¤îºë. 2Ä^çLß¹s§u›:|yEEÊËËáñxN¨ÑzãÅd·gÏžv“æ}SÖ(¤µ@œ§ŸÝ€/ʃºˆ04›ŽWZ>x åo¾‚Ú¢}ˆšq:Â3r`4Bßd­T:%@E½±¡ÑöéN#éDÐhhh°u©¯¯š:©"G&À÷ßwŸþù‘#êlPàÅct¯¢ûSÉ %¤³@ņVGYé,^ §àA WÜê¶`¬EžAƒÑ4vJ^~õ­AmA>RO;)㧠¸Á)ø­|q±qˆOˆ÷_&J¹×pÅ ÅW!4ÔÔÔØwš,ó¡ÑÞT0øQ{‡F{³–”G«ªªüVáU2¨8Œ1O>ù¤]ºvùòåvÂvž™ÀÍIß«V­Â€ÓN; ¯¼ü ¦L™b•’Õ«WãŠ+®°ñ¸B'Š7ή@Åk¨Àx+*NëQïV!Üɸ¾é>F—2ÎoéÖ`&Ú{  .yÕëV¡nñ3ˆª©@öiçÁjÜÖä>å—&¡‚k­v~É@‰ö*´\1pq …Ð À8Z2Ôæ¡ÑÞ¼Ç9˜ ööf-ýíY²JW€ºõÖ[qÏ=÷X…‚®_üâ­RAkÃÏ~ö3;‘{þüspõÕWãûßÿ>n»í6ûÀ={¶wA¡šKÖþîw¿³ Å97Þx£pæy„NWí¹šre©ÄáÆš”ˆÒ¾y(yí”-]ˆÆ½»‘}ÞeHÈŒ0¯ÕÂz®¤ÊYD@D@D@Bƒ@È*lÞùóç[+GáiÙ8ùä“A—&* tƒŠˆ·ßÜCcëÖ­v¤ž“½éVEWªo~󛨼y3¸±-Ü/CKØZ„Ýú•ºØÜþˆ8ý\¸³rQ´èIT¯M¥EÈœ)R'÷©˜n¶²t+e&" " " ½‡@H+œCpÖYgYDïU¦Ø<çž{îV¢21mÚ4Lœ8ѺHy»B 4´s:œIâ.Ôn'•˜ŒôÉÓ­›TÑÒQ½öì{òo¨ß½Y§ƒhãV%÷©noe(" " "bBZÉpÚº«ó$:ÛÉ›£èsòÐw÷p™e‰“FŒF¤Q8Jû Bé’çP¶l!öîDÎù—#>o\rŸê¾QN" " " !G@JFÈ5yˆTغOõ3ŠÆ|¸ÍÞtŸªÙ°»ËJ‘1ÿ¹O…H7P5E@D@D@z†€”Œžá®\»‰@dB’qŸšaܧÒQ´ì%T°r¿ûÔ®mÈ2ó7¢Ó2­ûÔŽ†OðyÝ”·ä› ÿR0(zòbÆ Ú¥yÝÔTÊFD@D@D ˆHÉ¢ÆTU:&pˆû”Y}ª”«O- ûö˜Õ§`Kú.|Pû,*›¶£ÕSo–ŽÄ–še—x &KH¼Ùq\AD@D@D@D ë¤dt•b2Ç}*ÁLþÎÌA¡Ù¼¯Ú¸OíkÎÇšyhL*2›Ò´Ú¶zÑÜRƒµ•£oôH އ0óOAD@D@D@D k¤dt“b ÈÄ$¤™Õ§¢¬ûÔ"¬¯\ŒòÆJÄ´…·«a}s!v5|„\÷ĸ´«u;@: " " " ’Ñ ^ܼ/‰›÷…#v[܉«Le[ÚW¸­ Õ-Åh2– )íñ興€ˆ€ˆ€tFÀÕÙ  &@÷©>ý7b&¢c;·R$Dd Êåjªœˆ€ˆ€ˆ€øš€” _UzE`dòL¤D1ûóµ7êÅE墯{$Üaûw¨Š©°" " " "Ѓ¤dô |eÝó’¥bFòµÈŽ›dV•вj5žSµ[[ñz,’÷…#ÌÓóåT D@D@D@D ´¾ ¤Ò«¬"àCc'"Î,S»7~‹™ƒQWU+j?{韔¢$ÿ_ˆ8ï2$‹p·Ü¦|€[Iˆ€ˆ€ˆ€„)!ÐȪ①D„E¢ô³\í04{àŠmCͼ (l|µ›Ö#ÿñ‡Ð0ïŽÆ¢½È>óBÄdfÛ]ÂCª*" " " ÇD@JÆ1áRäP!ær!aàDÆÅÃm”Š’%Ï¡üÍWÑRQŽŒÓÎEâápEÉ}*Túƒê)" " "pl¤d/Å1ÑFÁÈœ}&¢Ò2PÄ]ÂׯBãÞ]H™y:2fÎCTRJˆQuE@D@D@Dàè¤d‘b„8Èø¤Mœ†È¤d¾ü,j6¼‹â—ž@ÃÞÝÈ:ëBÄåöGXxûÃC›ª/" " "¤d„pã«ê]'àŠŒDòˆ1ˆˆ‰CIv®qÚ?W£©¸Yó/FÒIãÌêSf2‡‚ˆ€ˆ€ˆ€ˆ¤d¨ˆ@W ˜]Âãó›¦ÓÅKžGݦuȯ,Eý¬3ûÔiˆJNíjjŠ'" " " AK@JFÐ6­*æ/n3?#cú\¸3²P¼üeT¯[…âEOíwŸ2«OÅõË“û”¿à+]€ %# šI…ìm££Í2·ã­å¢¬ï@”¾þ*ÞYºOeοÉ£ä>ÕÛÚLåè>R2ºµr 2aa.kµà„pwv/~u›7boUêgžL®>•’v Ö›ëÖ`kÝ{¨j-D¬+c&`HÌɈ O8G?D@D@D@D HɆVTz”@Tb2Ò§Ì„;5 Å+– êý7QüÊShÜgVŸúûÔûµ‹±¶òiT6mG+wwEaGÝÛ(M¼“ÏA+[$?@IDATB¸–ÂíÑFTæ" " " >% %ç8•X¨àêSIfõ©H3ñ;:wJ—½ˆÊUËûT!šÏžˆ÷S_BeË´µµZD­­õ¨iÝ… •O¡_ô($ÄHÉÕ¾£z‹€ˆ€ˆ@0’Œ­ª:õ ³úT¬Yu*rîÙˆÎÌAÑ’P÷Ù‡ø ýc”žR†ðØý †wáj› ߸ýÜÃàvÅzŸÒoXR2¶éTðÞJÀnÞ7i*¢ŒûTÉÛËPçy-ÍM‡«}‘Û<¨m-Gs[Ü’ÑŽˆ€ˆ€ˆ€")Øj*s¯'Ä¡#•”‚¡{ê°!jÚPß¾ÜfòxrD–±bÄ´?§#" " " " :Z К¨Ø"Ð DgfcÊðË‘™8 aaíuúd÷万"2ÌÝ K¯"‰€ˆ€ˆ€ˆÀñ’q|Üt•t™@FÜÌL»¹ñÓo® Cc•M[\];™a¹]NKE@D@D@D ´Z „R«Œ"`†ÅMD|D2öÅnEEu>ŠV­@̆½HX»ÕÓû jò D&&X­T\蘀”ŒŽ¹è¨ø”€ËLûîkVâ§%ºUãÇ£¤üUÔlXÂâÇP¿g2çƒØÜ~s…û4o%&" " " ÝM@JFwW~!O ƒ”±Ínàé(íÓåo¼‚ò¯˜Íûö ãŒó‘ž+í(ˆ@hw»‘2z¢’SQjv /[ñ*ªV¿¦Â|¤Í=i“¦#2!1´!©ö" " " EÀçJÆòå˱xñbÄÅÅaذaÈÈÈ@RRšššPVVf?«V­Â[o½…n¸ãƃ˥•tª×¨°¾'`¬{qýòŒ2‘wV.J–/BýÖQXYކ‚|dÎ91Ù9Æ ¨{Å÷𕢈€ˆ€ˆ€¯ ø\É(//Çx³rΘ1c——wˆµ¢µµ•••øôÓÍØ°a½U8ÚÚÚ|]'¥'KÀºOM=ÜįäÍ%¨4²e/¢©¸À¸O]€„!ÃûTTÀÖO >W2¦NŠììl„‡\†3??)))hii±ßÓ§OÄ ãQUU%+Fhô3Õò¸Ìü¥¤£™”wv_”. ÕkÞBsY12N;©¦ ">¡Ãw7nÆŽúQÝZ‚øðT Œ‡l·±hGñyé ˆ€ˆ€ˆ€ø\ÉÈÍ=¸{1]¢–.]j,Ÿâ‡?ü!þð‡?X©iÓ¦!&&Æ~üS-¥*O 6'‘§wz&Š^[ˆ†­Ÿ  ªÂºOeÌ>1Y‡¬>µ¡æ |PõÊ· ÅSpW46Eæbb⌊Ÿ‰W\àCQ D@D@D@‚€ß¼÷îÝ‹;î¸Ã®$µqãFë&ßþö·hll 8*¤ô4ȸx¤M™¾—Ý€äYg¡Å(¥K_Àž§AÕÖOáii¶EÜc‹÷+ŸDAÝûhh.AKk-›KQb¬T=…¢¦=]å/" " "Bü¦d|øá‡à*SœÜÍ¥ké>õ­o} k×®µËÚj.Fõ2Uõ„¸""‘4r ú\p2/¼ ®ØxT½·{þýW”®y-uµØZ·åMŸ£ÍÓrh^fÎSYçØKëF[Ó¡çô—ˆ€ˆ€ˆ€ø‰€ÏÝ¥¼ËÉ¥èåZ0ª««?õ-"p b²s‘5ïlã>•…⥠íêSûªÌêS…û°oÄGhèØBèñ4™9¥h6JFD˜&rE8N~S2ÆŽk—®½ýöÛ휌x\yŠ{dhC¾ãl-]ò"ã>y:¢RÓPòÖkfõ©fó¾gQYÝŒæ1MpÔé° Gœ+ÙLþ–‚qŠ~ˆ€ˆ€ˆ€ø•€ß”ŒœœüøÇ?ÆK/½„I“&Y‹Æ¨Q£0oÞ<ÄÆÆúµRJ\‚™@XD’†B”Y}*Ú®>µk¶cwš±táð­4Üý‘å$+F0w ÕMD@D@zŸ+›Í…E…àœ ·ÙÉxÊ”)4hU, €ÚÚZ»”m”Y¦SADàø Äd÷AÖÜùps•©×¢hý»ØÝR w^8"ÜaFÙˆ@BTLLúr¢FºRD@D@D@Ž‘€Ï•Œ÷Þ«W¯†ÇãiW”õë×Ûc&L€”Œvxt@Ž™@D\Ò&MGdr*ÂßëƒÖ,ÆîE@n"M< Ã2ç``Ü8Ä…'sÚº@D@D@D@Ž—€Ï• ÎÅHMMµ–ŒŽ åX8::§c" ÇN Ì¬Üæ¸O%§ôCᛋP¿»ÉÅuÈš÷hs›w0WãØsÒ" " " "Ð5>W2Æ~V­Z…Í›7£¹¹Ù*´npÿŒÙ³g[Wª®Q±D@ºBÀºOÍ3îS™Ùfõ©—PÿɇØWRŠúü¹È˜1ÑfS?³qMW’R8!>W2œÒìÙ³ÇnÆÇ=2V¬X9sæàã?F„™´Êý2D@|O "Ö¸OMœ†¨³úÔÊå¨|÷u”,~ {w#sÞ9H< ®H͇ò=y¥(" " "àMÀo›ñQ¡(**ÂW\a­T2¸1÷Êp¹ü–­wÝô[B’ݧ‡ŽDŸs/EÖEW#"!ÅnÞ—ÿÄßPôÖ24™]ÃD@D@D@DÀŸü&í×××£_¿~˜5k&NœhÝ£®ºê*TUU¡¤¤¤Ó9þ¬¬ÒP"‘ÌÙg¢Ïå7"nôD4ì0+¿½ðòŸ5»¶£­µ5”p¨®" " " ÝHÀoîRÆ ÃöíÛ±hÑ"»OÆ¿ÿýo[­ºº:­,Õ ¬¬B›@DL,R'L±îS¥9ýP±r)Ê_f—ðŒÓÎAÒȱ`ð%¿)ÜcÁ‚àÜŒ³Ï>|ðžþy\xá…ÚñÛ—-¨´Dà(ÂŒ{b ¡ˆJN±“ÂK—/Bí‡ï¡¹¬õÓæ™IásáNË8J*O7xêPÜ´;<›ÝÚqž¸;Újüà%ú%" " "bÂj‚?êÌ ÞÜ|¯´´ôÀjReeeøò—¿Œ¬¬,³ÈV¹ñ÷žJ“.pÉÉɈŒŒì©"(ߣ Å"6w¢²ú ¥±õ[7¡~Ç4–—!")‘‰If¿ð#¦²³aVV<õÕ ±«i5ò=QÚZˆäˆ,³GÒ¯ÕÉÀ&P]]m+ØQé»L€nÏ HIIéò5Џ¸hee%22º>踵UÉI€ÛJ"''Ç/@üfÉhjl¯ýkäææÚ‚9ÒºN-[¶ C† Ñäo¿4§#ŽFÊøIÆ}*%Y}­ûTÅÊ×Ð\Q†Ô™§#eÌɈLèxã¾’æ|¬2 ÆŽÚ×ÑÚZo3ªoª*·£¥­ 3“/GZ¤TG®•Ί€ˆ€ˆ€ô6~›ø½éÓMvŸŒ)SNA¸Yí&;;§Ÿ~:|ðA”——kâwoë *OÈ s!>oræ_„ìK¯…»ß`Ô~²Ïþù/>‰ÚÝ;:œ¾¥n òë×P0`-­5ØZ³{·8‡ô-" " "âü¦dpùÚ˜˜̘1ÝZ-’’’pÖ™ga÷îݨ©©é]ØÛºXœ#ģɉ› *ˆ@ p›½42gÎCîe7 qÊl´T”¢lÙBìyâï([·-uµ‡Te_ãghöTrÌù£¡¥ ¥ÆÒA‹†‚ˆ€ˆ€ˆ€øÍ]jðàÁ ïï~÷;Œ;MMMX³f§‘˜˜Ø+ædТòòË/cÆ ÈÌÌÄÅ_Œ¼¼BKë¡Ö6:*cÎAvÔÀ@«¼ÊÛEšøÝEPAM¿ƒ¨1»PMüî¤ ‹°¿¹ºÔµ×^‹áÇÛÕ ¸²­sæÌÏõt Pü·¿ý ?ùÉOpÖYgaóæÍ¸ä’Kpýõ×Û}=¨9ᡇÂСCqË-·Ø•6X¯•+WZ«Ì3Ï<înNë…v2wˆé; põ©´‰S™²~QnöÔ¨^ó&šŠö¢nÚô=yúÅLŶÖ׌¢Qw Š‘ ™p.úº‡8¦" " " ¡MÀoÒ~KK þú׿¢oß¾8餓p×]w£"œ“qÓM7õ¸¢‘ŸŸ}ûöaêÔ© ûÖ¤I“Ày#;wî´–GÉà¨.­´ÀÐâ… &Xë µ~ºWño*½Ay íî¬ÚŸ0°0Äõˆ¨ÔtD÷釲·—¢æ£P¼È¸îž‚‘SG".;…m[PYS„8w%LÆø„ÓÍ2¶Zöð„ù+~S2¶nÝŠGywÜq.\ˆO>ùÄZ ~ÿûßãÒK.µòÁ½Ûïxiú§ÂÀÉéܳƒ® Íã49Êw)§âìíAeƒÛ9cþüùÖMŠ ç¼síáßL›Ÿ`œ Î97d"EëðVÜ¿Ûúä¡í”yhŠCËGï£â¥ˆ)ÈG¿±cD™à9O«<î­,8 uÉçœ÷N»³ßŒK!Ü;­ÎâÚq2cý´_ µÜ‘Ë5l$’rû¡ù¤1¨Y½õŸ}ˆ–½»1d4ÒFŒôáýëîx_#§¬³FÀqÕ=h-wü奵žÏvµùñ3 ¤+)›¨½©ÅN¼¬þ–Gý¦dЉ»}s’ôÆqÏïïÁ’%K¬E >>þÄÉœ` ééév‚+w·äojðœÄÍy#ÎË”YP)HOKGqqñ ]­8GãX ¦kv\6Ÿ` œs“––´õ Æ6ëz²ÐÖª DÙ{o¡b•ÙŒï“5h)-„+Ò…”©§"*)¹ëÉ)f@àà CVVV@–_…>v|®SQ›;»@¼‚^Üb@툭w|e¦ešSüü¶OƘ1c0nÜ8,_¾W_}5†j—‹å¤é”””C¬þªÜ‘ÒåM”gæS¼òÊ+àƒ”+HñaÚ¿«<|øá‡().±ŠÅŒ™3°bÅ ØIÞ¬ùÀNh×èΑë\03ÊvâБèsΥȾäZDŽð‚¨xåiì}é)ÔîÚÞá~' ÁS‹*³ÿF³§ñD’ѵ" " " =@Ào– Îoøö·¿íÛ·Û‰ß\Òö /´ûKô¿ý¸¸8ÜxãvùY®µwï^\vÙeèÓ§µdüêW¿²ÊÑܹsqÕUWY—¯Ÿýìg _2¦“O>ù²ãbÕ“.`=Дeˆˆâ~³Ì^0Ù¹ØýÚK¨7{j”-{{>w>’FŽW©:‘PÚ¼ŸÕ½‡= ¡ÁS¸ð4 ˆ¡±‘žz"IëZn"à7%ƒåçòµü8áÜsÏu~öŠï XEîOÜ»ƒKÙ:®\œK’““cn¼wóÍ7[+'ŠÏ>uöeÄ©Èyçg]«ÜÑnç¾E ( ¸"£|ÒX”64!"3-ëW¡fýj4—¡~Ú<¤O…ètãRc|¹5T¶”àʧ±¥f1šKu±Õl]õ&ÖË01álćË5ëX¹*¾ˆ€ˆ€t70ã"tp)¥îν—äÇ¥u¹Ï…cè¬X\iƒ.RÇ:£³ô‚éø¦M›ì¾!Á:ç$˜ÚÊWu¡•2Þ¬ÐæÚ· %o.Aí‡ï#̃„ S‘1çl$  —9,áƒê%x«ô>Ô6ímwYJÌœ‘ö cÑ8¹Ý9ð?Z{híU t%®¨¨À AƒB£Â!^KÎÉà2þ#GŽ q¡S}ÎÉØ°aƒÝŠÁµö«%ÃöGš´Nt%PQ8H ÜÜ;©f?wzJsû£bå2TšMüšŠö!mÖH™pÊ1M ßÕð!Ì<ŒŽBeãv7ïÂLèÑ•é:*›Ž‰€ˆ€ˆ€JÀoJG½¸šÔákªså&º$Mœ8ÑN¼>šõàÐâê/^GÀ¸EÅç ATJ:ÜY}PúÆ«¨ßò ÊŠQŸ¿ ³NG¬Q@ÂÌÇG Mž:xÐÒa4O[3ZÚš:<§ƒ" " " ½‹€ß” N¾ûî»í.Ù4½Ñ Ç¥l“““1dȼ¸ðEÜù¿wÚÁ¥hô®N¡ÒˆÀñà2¶™3æ!:# %o-CÕš·P¶ô3)<ßL ?I#̤ð£,᜙‹W¬YQªª]ÜIf>Fª¬íÈ耈€ˆ€ô>~[–k-ósÍ5×€ËÖ^wÝuàähZ8ø½k÷.¬^½ÚîUÑû°¨D" ÇCÀeæ,%>^ŽÌ ®@„™^c&†ï{ú®XŒÆ²’#&;"v:ÒÜ#ídoïˆ.W$rc¦¢{¨÷aýè¥üfÉ(,,´ï]qÅpæã|»ÔméÛËPûÑ/zMÅȘ}&âú0îSí?}£‡bzòÕXž„â†{T¢\‰ÈŽI‰ç#=R“ŽO Yt©ˆ€ˆ€töoyeݯ_?pÕ¦üãvŸ ºKqc¾aÆKÆ~ðÁvÿ ¹Jù¸’^F Üì—‘6i:¢Í<²~yv§ðò·Œ5ÃL çêSI'3îSq‡”Ú…pŒŒ;)‘YØÛ¸Åì“QcöÉHA?÷$GdÂæ7ãë!åÐ" " " 'FÀoJç]ÐMêÍ7ßÄ»ï¾ ·ÛmwÔþïÿþoë25tèP̘1CËÁžXûéjèÕ¸ÐÀAp§¦#&wJV¼ŠºOÖao‰ÙSc:÷Ô8ÕÎá8|Oì¨<ð£ " " "˜ü¦dP©¸í¶ÛðÊ+¯`ýúõÈÊÊÂÌ™31zôhìÙ³ÇîÞ·oߣîM˜XUjo‘ ‰V¡ˆJÏDéÛËQùþ›(yåéý;…Ï>ë¸öÔðNÿX7›Uªö6nÅžÆÍhôÔ"Áì*ž3iÆËeþ)ˆ€ˆ€ˆ€œ¿),]¢Þ~ûmYëÅîÝ»qÓ7aØðav'í+º®$´j$ ; î”4Dçä¢ÔX5*ß}…fOSÍžã§ÓžÇ[÷F³L7ðaõ+¨hú­mf“MW¶FÃ䤋10z ÂÃüúh<Þ¢ë:~{“râ÷wÞ‰œœ;£¥¥ÅZ4~ù«_âþûï·;l %TDÀg¸Äm–YÒ6Ê|sOZã>UPV„†}{n–ÀíÓ¯K{jov˜ åT>…²†M@›Ç&ÓŒìh1+_qϳLnvÔ€ãM^׉€ˆ€ˆ€~S2>úÈlÆUP€{î¹ç€’Á­ËÏ?ÿ|k u?aáÑ1H;Ùìnæj”d÷Eåªå(5{jPѰ“ÂGŒ'Žû#l©{UMÛ(NO3öÖ¯ÁÞØYR2(úã$à7%#ÚH³f>-öÃßÍÍÍhjÒ®½ÇÙ^ºL‚†Ý§ CTrÜ™Ù(}ýÔ¬[…æRcÕ˜qÒ&Ï„;-ÝZ|Wé6Tµ ÅSßa’M-•¨m-ï𜊀ˆ€ˆ€t€ß”ŒQ£GYÅâç?ÿ¹ìMÅ‚{cŒ?Ü/CK×v½‘S‚™€;5 Yfï wZ¦Y}j±ÝS£èÅ'ö»O™¹ñƒ›üù*D›ù.3çÂÓÖÜ.ÉpW´Ù—#¦Ýq86~S2’““ñï|Ï=÷ÊËË­ƒ–üàˆ?¶R*¶ˆ@P7–Ï´“O1ŠF†qŸÊ5{j,Gù›¯¢É,u›6ët$È„$0Cÿ˜qØU· µMùíÒKqA––ÎmÇED@D@DàX ø\ÉØ¶mJJÌÊ6 ÿþ8ûì³ÁIà´\pƒ¾„„x<íq¬-¥ø"ì8é:o°qŸJ1îS9võ©ÚOÖš]Â÷¡n×v¤OŸã“IáCbNFqÂÙØ\möìh)4s¿[ÌóȄȓp®Q2;iÕOD@D@üNÀçJÆêÕ«ñþûï[EâðÒs§onÌwÒI'Ùù‡Ÿ×ß" "•œŠ,ã&åÎØ¿§FÍúwQºä94îÝe—ºM1qÇo MŽÈÀÔ¤‹i”‰üÆMhl­A|D:ò̶¢OBŒëÐ]ÈÕ"" " " ÇNÀçJFnn®ÝÙ»­Í˜2: t›bhll´;wM‡E@B”€+* ©ã&™ÝÀÍ„ðÜþ¨xg9ª×½ƒ¦¢}¨Ÿ6iSf"ÚLçäñã )Y˜˜p&ÆÆÏ6ûd´ 2Ì­½1ޤ®Nø\ÉØ»w/2331iÒ$$'%›•aÚç¼sçN¬\¹˜7ožß]§Ö­[g-(•••vâùܹs;Tn–˜ŠŠ oΜ9v?ÖÖV¼ñÆàùððpÌš5 ãÆ³Ön6È͹jCjj*ü×ÿgï=À㾪ôÿW}F£Þ­bÙ–»åÞ{o‡Â’À>@¨Ùý-,,°K 5° ! ‰IHwìȽÛror—Ü$«÷^þç\gdÉ–K3#My¯Ÿ±f¾å–ÏrÏ=í„Ú\~ózš‰ÈÈHüçþ'^{ý5Lš< aAWíºW­Ze®ûæ7¿‰èèh|÷»ßÅ믿ŽI“&áÙgŸ…š}õ«_Emm-|ðA#x$''c×®]øÊW¾büLT‹¡‘³l6Út»rNY·P§ðô!&yŸU̧J¶d¡öøJô©z£ÕX,NãC¡fVîR.6œÆÑšM(h8Žæ¶z0ÄïÃ:ÃC§AýBXH€H€HÀ› 8]ÈP¡AM‰Ô¹ûøñã(,,4‹÷`ùñ×d|úÐ(SvÃÕpÕÑ\M·RSSŠùóçã¹çžk7o²·¯×©@¡×© 0oÞ<üñD}}½$~ò“ŸM† S§N5cËÏχ:³«ÉÔ¶mÛŒÆæÞ{ï…†ïuµðdï7ÿ’€/ @üÌù°$%£dûFTîÞ$¡nW¡A2…ÇÌYŒèñSMtªþfr¹1Û+^ÁéªÐÜRs¥;"(]¬Û…ÚÖJL•(V¶€ˆþî&Û'  —pºaï©:wgffÚ_öÛ_ÍÑ¡š û¢_Ÿë15kêXô˜& ¼ö:uNW©Ž‚ƒj,ŠŠŠPPP€ôôts &*l赚€P¯g!p>?ñ‹Š:–¸¨V£Tµ'™P·õ3m.lr¼?µÇj¶#·fãUC1H0ŒÚƯÎBjÈ( µNp>ÖH$@$@nBÀeB†›Œï:oz¥þ ×f×c‹ý:»Æ¥c´,}®šÍ\þµ¯}Íøw¨/ÇÛï¼m̬{ì1£Õ° ,özU©®®¾NÀ±Ÿ÷ä¿êï¢>7öÈaž<öýæÔÕÕ™Hr*ˆ÷Wi– ù×bCý‰ƒ(Éz ugOÁ’9 ACGÁ?BƒOô­D3š[Í]b©h8‡ÓE‡` Jìò¼»Ôï.-ªÁeñ úWm>çÜ7æ[ÇõÁùöùÖQv\ÛºbÔ^/d¨ã¹jìš u.Á!Á˜8q¢Ñ’èqW§Ç´Ž®Î¹brûºN—·Ž­¯YzJ{ý=çF«!y3lRÑ|2uv¡&g*N†ŒBؤ™6 졯Š|Ê%,n£ä#í:Œw«„Ìmnmò¸ÏŠý³mÿÛW<ÙNÿ°¾9çý7}Ù²}žíû²m¶Õ?\=×.2t‡S3€çää˜Ý~ý;}út¶¯pNž</¿ü24z”š9mP´™™cÍŽû¹sçà€É0yÒd¼ôòKæ:ÕPèucÆŒ1 D¥¦P3g΄îììÙ³K—.Eee%~ö³Ÿáÿý¿ÿ‡aÆ™0¹–Wï¿VˆÑñªI•>¼±hx`»ú½°øý|k0ý\õ{‘ƒÖ¡ÃQ5r ÊvoCõ‘}h<}¨«B@K#¢§HtªøÄ^çÕèéø’K†£¸|˜KÕ^w«-8é±#lK¾îœ;ÐðäZt“†Å7”––šîœsߘo l£ηoÌ·ŽR7¾uSÝUÅeBFcc#ž~úi£X¿~=~ñ‹_GêÓ§O㡇ê3Çï%K–`åÊ•øùÏ«ÕjÔ€?þ¸ÑPüóŸÿ4‹âx‹/ÂʬÄSO=eŽ©C÷£>j¢ûî»?þññÓŸþÔ˜;¥$§'q Í«fW}JµšGC£kébûZs,WM ë%¸BÀ_üÀ"‡Aè€4TeNDyö6TîÝv%¯Æù³ˆµèJ^>ÐjŒ…ü†#¸\»G¾Ä¯äÐÑ^„aPèlñÉæ”ikh­CAãYä7žFS«}µ*Í2šl…H€H€ú“@ÀIqE4²”†ƒ]¾|¹ ÷zë­·íÁïÿ{è¢]ü}QÔ[ýjî¤þwÝuÌ_€ à œ?ÞìĪBs`èuºs£ ÷î¼óN,d|ö¨Xêk uèu=üF…ððpãø]UUev{ÆŽ‹ûï¿ß˜Oùù÷­x_°¼Qjj¦¬é“q#JÞuN“Vj.™¾ú,ß,½1q´Jô)ëÀÁâ“Æ’BÔÛº yhn–ìÞâ§¡¹7\¹‹P YÛà'ÆSò °!8õf‡Óíu5-Ø'Nä;+þŽ“ò÷¼D®ºPe-¥aCûàÌ¢ßsZô{Å7Ø}2tÅû ¨?†Ý*ÁûGË*5—R7upEñ“º6v°µ7ß|ÿó?ÿÕbè‚ý{ßûÆMЗm|\ù#m÷U³¢í}Ô"¸ýºÀ ë²•ë® *„t,ŠPãtÁÕ—cêØ‡þ~~ìØ1#pÑ\ª¿g¢ïÚWD·1—êfØ­M¨8v%’-¼jÿNc.6~bg‹VC"T¸X«QÚT Z†3’Œ¯V´ qH lòuÓÝ>T½ Jʆ3&r•ýæ€+FFÈfJôƒNÍÇAs);aßùk7—2dˆï Ú‡Gª¦Rš,Y7QY|ƒ€šK8pÀø»bÄ.3—RGj}ÃnܸјåææâÔ©Sf×óÚHN®صuªpp3åF×u·c«‚Ewçn¦M^C$àþAÁˆ7 –„D”¤¤£lÛZTìX†‹çP;sb$[¸+}5b‚’ gÍ$~¦nªÏw0´––º+Z &ýs6vÖG$@$Ð.24ßìÙ³ñüóÏgjõ‹PsšÏ}îsW̉ú8¬d˜ðR /#`MJÁ€¥wH^4ɾÕw£ð­—Qg|5ö‰VÙHëÅ¡¼ª¹Pü=»¬¶¡¥•Í%]žëÍÁÆÖz”·JæòFD¶„1‘`o ò ð1.2T[¡ù"¶nÝ Uµª“´†x5NÑ>æ¯àcï)—Ü’@€ÅŠXÕ\$ @IrÊwl@Ŷ,Ñjä¡nÖâ+Z G-6nÙÿŽ òF°¿DróóMFçÄ¢z]€_‚ý,oéõó‹ §q´fòšÐ¼gJ‡`°u †XÇ!<€¶ú½ËI€HÀË 8]ÈPß5“R{m{´%5'Ò‡ú;¨oÆ÷¿ÿ}F`òò7‡GîJÀ–6!1q°ˆù”É.Ná…o]F}þyÄÍ]‚°ô h¤*w.„ŒÄ¹À­hh*íÜU<¢ƒ3â¸ý%ñ÷ØZ.¡½«×µg//)ß‹sµ;QõiLŽXæ4a¦ó øŠH€HÀÓ 8]ÈPG±_ÿú×FÈ8räŽ=Úî(­ÈêÕ«MVlOÇþ“ x.@[âgÌ3Q¨J¶ DÅîM(Û´ —/"vîRDˆ`ÍîÆeDèt”4Ç©ê$»x™D i•ü·uŽþÆ7¾aÂÀvd8BbãE«1¥›?@mÎA4 þÒ9ÄΘP1«Ò¬âîXDˆ˜yâ‚Ò%'GÛê$lm"†Z§b5Ab2åHѹ…'EÀ¨î¢š6ñù8ort8*d4Šûùúc’UØpBò¥7É8’Åk†‡Naù¡x» ‘ ¸€€Ó… Í#ñÃþšÌNý1†jâðj˜W}”––™ ƒ. «$ ŽŒF¢˜IY$·FÉÖõ¨’~Å«ÿ)‚ÆÄ-¸Uü–P·}“×§§ONƒ>4)_‹èÔ#ÐÏ9¦^ªÉhmkî¶KzNÁ-çëcSÙŸPP›ÝîÈ^è·—êöʸª1=ò‡&GûÈûI€H€zNÀéB†vÁ_v5~þÞ½{±nÝ:“²\}\¸pÁ䙘9s†É)Ñóîò p>?IÔ5jœ„³MB©újlÍBÕ¾mh*_9K3I¾³Dãá®Ná!þ΂Â"E‹,ßçÁí‹ÿŽä­’l0&бNõ’?äxí6ÉŒ¾·sbúUÛ˜ã5‘: ‚÷/éØw>' p=—¼æää@ÃÖjd)ÕhhÞ ,¥BXH€HÀÝXâ´x9’W<Œˆ)sÑ(>êöÒÛ¯¢òôqhr?_)!½*CÌ•¢B†‰lÕÙd,(0é’‡c€ƒÎåÍÅ(n< ‘{}iCus> r¯?Å#$@$@nOÀ%š µj-4WÆ_}YYY˜7o†ŽÛo¿Ý$ç ïä¯áö¤ØA Ÿ à,þd¦^ u+¾åÛ×£|ó8.!fž8…gªSx¤O°Ð0µ QàHõÕž4æSa–8 ¿IáËà˜s|›x`¨ÃzwÅœC0  Ï#à2!CµÈ9žƒI“&cÍš5P“©²²24Ô7x)ö˜HÀ§„&§"øÖI¿tIà—…š£ûÐhœÂÏ‹Sø<„Hu[§pgM”Õ? ÃJ©‘8R°ši|Hì($Bh@„ÃÍDŠÉUp:.ùïìl.õaͶ€Ä9%ËáN²  ^p™¡ZŒ´´4lذ‹/ÂücäççcàÀˆ‰íuªW=çM$@$àbÖPÄM›mœÂ‹·¦¢bçF”¬yCœÂ%§Æü¥ˆ§p½Æ›‹Ÿ„‘ JÁ°€+‘“­ÉN® 1Ã$ï¥úC(©?"y?t4×…Ñ:SÛ#d´JèÝÊ–5]4™h‰Ä”$ŽòÁN+" ¸JÀeB†ú]LŸ>ÝD—š={6V¬XÓ§Oã¾ûîCXXØÕð ¸9°ƒ}h5$Ô­h5ªî­Fþ§pÍ".¾,½#0È2ÍQã°äÝ(m:m€ÐÀxÉ*> ãÂÁNíµ­U8]»9’÷£Ä´Ñr%L®8£m³%/‡8õ;©hT. ÿÛ*ÿÔyÞÑPÂNê«! >'à2!#çXŽ$Þ[ƒÇ{ÌäÌxüñÇÑÒÒÒž˜¯ÏGÊI€HÀAáH˜½Hø¥¢xëZTfoAáÛGCþEɾ¶CÜ>S¸ÃwÙ­q0Ï ›#&Y£LÞ ]¤Ç%›‡³ÂñæÕÁæ²çP&árE]bÆR旃†C†·Iò|ÌáпZÏåÆ<œ¨Ý…¢Æ3h_’1õ*Îói!#®¿ãÔIhßÒ¦i£ÍHŽúÆt¬›ÏI€HÀY\&d´´¶`ÿþ}xã7žžnB×Ú;=vìX9'–»½Nþ% WÐÄ|ÃG!$Nø‰O†&ð+Û² ;™8…O@P¸o8…;›u„$ÝÓ‡³‹f?Y»åõ'Û Ó†ämªk*Â)ÑnŒ†£fYù g°©ü¯¸P³µ=Cz@€ëb–8ÏìðÐêZkp®þ¨FåR!#Vüc†‹YÙ`Ë8§h}î$+   ¸LÈHHP¶ö…^0alí™ÀµÝï~ç»2ø$ðX!1qHZ´–Ädo\ê#{ÑXRˆú‚KÆ)Ü*‰ýüဥÿ ”5_©³Ū‹Ä‚"hÔ´ârSžCB†šc­ÙŒóÕ›:…ãmi©G~ínìˆfF·‰ùTo‹ú”äÖÆÆÒgP*Òíã)ðß|ñgiŽ~™búÅB$@îBÀeB††¨ýò—¿,C®OØ9溻À`?H€Hàf ØC݆H’¾"6*vlLᯡ¡ðâE«6xBBn¶:^ןÚk\M—.5î$`Øklmm”\ ')•†îm©j)CNífq?ÚI#ÓÚÚ$BÇQ¬Z!âß⌨_½í£»Ý§ÑÐJšòQÑ\$á Œ ž:û³ ô — ®ö‘Gé›Q° 袭°‰SxpÔ £Õ(Ùø>*wm§ðË}J̧ÆMBpdtôŒm~H *0q’1\wüuAÞ©øù#<0I!ƒ;îé ]Ì6·u•PðJM-â¯Ñ ÙÍ)¥Í(j8ÑIÀ°×§ã*mMQÓ¤Œ¶vèo£$H,o-D¥üklMF°¿Å¡ú®½¹¡µ…MçQ.š&{³„ 4ø_“øñÚûnöµ e‡«7ãTíVT ;#dÅ &0"t ü\¶ü¹Ù.öèºæ¶FT6—ßžˆÀ˜Ý{3«OQqËE\j;ްzñ' š7ÇÃTßLÛ¼Æ{ xÖ§Ì{ç##ð`A’œ/aî„Ä'ŠùÔTÚ‚þU̧."væhÎ ?‰¸ÇÒ÷4“ú,¨IQIýávAC³˜Û‚S0Â6Ï„èu¤gáÑI*…]U"‚¨Å?Ñî «¹TkW&_¶Ù*íºPt´èâÿbÃIœªËÆ¥šãh¦¨üˆ0œŽt‹s˜Êæ¬Þ Nò›P-š5-ŒAp”m¡=[DXª¨GyߊD†Óòžºh[€Å1Ÿ°önôê¯ÎÇ~ ½]|}ê›J ƒ@mõi\öß#Y.lq_#­Wu_SE#SÝRŽ`¿Ì£®¿ÄÁ#:ùg¥2îÃ$™gŠC¾P×v§MBS«F)¿á”‡ ‰|ÁÑyèØŽŽáHÍ6\h8"BF©ô?©!£1Ò6Ýpëx­»?w‰Q\\Œ3gÎÀf³!##‹sÕ¬î•ý#ð]¡ÉiH^~÷§ð ï£jÿ4•£nÖ"}ÊŸD­F¿=4ׯhÙ!O·ŒA±˜鸆ÉÕŒãÎ(ºË8R´%µ²ëx²f=jŲ±Hý­ÓeѼÌá¶šË##t†hdöÊî|‰˜|…Š–dXè\‡Ç£&FGj6¢°n_'³,u`¿T»»eQ˜2ġܥ"F 3¢‰©i¼ ÂA–ì Ïqh1¥‹Ø2Xº*­bÚVÚxʘ¯Yüm]]rÓÇTësA„–\Õ2H’Ç@Y8'Šƒ¿FúJ”LöŽ–Šæbì­|åub&g/¬ ©¹ ¹5p(8 ¢ï·ŸéÕßry¯f—¿|EÀèPƒšàÔíÁ@n‰y¤Ã™Þ=Ué„hÇòD+SÝR$‘ÐÂä½4Êhdœ±@×Å¿Žå@ÕzÑüìÏ` BÂe>Fb|Ø-&Dvïz~õ.Ýœ¸ Z¾2'…âU/Ÿ—Ñ”ÆKS"î’±Œ¿zq/ŸéwÓžª5¢…{Õ ç®Ô"‚å)IJZ-‡†ÛvfÑuÍhtf•êrº¡ÆSO=…sçÎábÔ¨QxôÑGAÛ¾Näù‚HÀk ÚdmÖ‚+æSæ¶jß\~ý/¨9u ÑSf™Lá uÛ÷Ó¯6æ6'ù,\Ûû˜ DÌŽú„$œ(ù>4©`‹ì’ÈaNÙIÕ¤~j·QÌgŽË³Æ2mWŒm.ÆÚæ_Û¥¿.ö˲{jÏ%Ò±4Ô'¤\´ñÁ©OõèyM.ž®¹S¶ÍP-;Å;hwë̯z«™ŸëƒÒ\Ó¾T?œÓuû%ÿÊM´/uðW-à y3£Ñ›]ôVò'+eQY(‹ò®JsKÎHhfG„ M©¦kÕ]ÏG‹Œé¼ œx¤«.Üô10×lÁÖÒ? ²!Wî“ÉáøT`.ØcYì¿Êgı5¢š•m¯x‡Ê_‘ÒWýŸ eŽJåývWü·Nº©ml*{yUkÛÇ®Âr¥„¯V¿ŸØÄï9Ô†.ø‹EXÝRú²‘PÐÞ†¨°P%s´C¡ÖÉNÑ,©0s´f»lZœD½¿jا_mωϜ.dlܸÿøÇ?ðÀ ®®?ýéO1iÒ$ÌŸ?šœ…H€|€æÔˆ™i̧JÓ£|×fTîÞŒºÓÇQ3u¢'Í€--]L¨Ê[ÞVÙÍRúpEQ“©‘w‰ÖbšD«Ê•%I›±Ë׎š|iÕ¼Yvç»+êÒÐÖýùîîëx\ý/üĬ¨Ë"çüõŸƒÎßaшáîbSχ ˆÖ)VÀCe—Û‘R!‹ó}U¶ƒ ‹ÁÆæ œ•Ýÿh (°$æ!štŽ2-²@ïªèÎ}£„Nv¨H[%id÷Eßa7:ßýϨ†lGù ²?{õ°yjZx²z•æ“EÛ·èê¹>ÓQh±#•ot0´5‘Ë—™9"͈¸½‡5_½\…ÒBVÎK„·kK›œ+að„˜¶M‹ø—kOßôkݘ8+Z±N†ýn™«ú¦bÇv‡… m'»ê}1Yü ªD[’䋽§ÿuúªÇŽX°`ž|òIüà?ÀèÑ£±sçN“íÛé½g…$@$àæ,â >`ÉíHýÔÿ‡èù·¢­YBš¾ÿ.¼ú'nY‡úBÉÜÜE¨o7»×O4 z¢ãÂæ‰È|c–ã C‡cs¨ðÀä.G¦~¡bþ¥f[Žõ_‰Ç宊ÿ‡@„Ø ;R¢%¢Ø¨°Å°K¸Z\ìÅß?±b271¼÷ A{]•bÊT`vùíG®þÕô\1ÙÑÅ\ï‹ThµŠ†¬«â/ïƒhÑ”9RT˜‹ ˆ7æv]Õã/;ÜÉÙ‘¢ J›ó%æ©.«imm¿“ëî]^ÜÍAÝ•/lÌCcSY—Whòêövyîf¶ˆQÑÅúãhméÚ´H‹ ^Ú¢uT‹ðÚ}†¼ï)*6jT·-¥¿#WªêVåçH3í÷v³Ð~¾ÇOªªª0xð`-"@vòÆŠŠŠ×ÃH€HÀ[ø)«D™*2e;6¢öø!\8‹Ñl¨ Uø°‘ sÌdÀ[xqýC :(CÄï£Pr~tZ°ÉBÝ'ÎÒ³vÖU!e”8-‰#yµ˜†èN³]Ð&ˆMûÄðÛD“áØþ§úZŒ±Í1‹üµ[Ä>¿ØhG¢‚b¬Øç;#²”j}šÄiº«¢‹Åq×#%*(Ã¥¿‡*^‘Ú« ?á&&kãÃnu¤zso¤hÇÆFÞ½e5Z{…~þ"^½×0h]j’¥Ñ«ŒÓº½ò•Q½ø29^nÄZZM€#EEÕ 㬑¿)jƯ‚£ Æ]õWŽ'v¤ ©¶U„º¨o¼tÁ¡ ?âf§ š|¯¾¾%%%”VUªàQR\‚ à ÓèèhšN}ÄÄð4 €÷P!"aÖBØÒ3Pºk+*²·Hª,ÔžÎA¤1¡š[ŠšP1 •÷;ûHwÎLjG}k5N‰x˜g´µ´"ÒšŠ Y´_G‹ú– jý9’%½²ù¢Té/!~"S´Î´¹hzä’}ž±s×¼êìï¨í¿}ü!þ¡¢ÙI@UK®ýPû_i+"0Åá\ayº8ú6Š‰Ú…ºÝhh‘VZ·äÉmÌHÛ´ö6{ûD£#M¸Ódgª·£²¶a¶(D¥c¢˜þ¨O‘#%@µ%"XØÄ”éz¡L£‰Å‰»#EçVýŸ‚ƒ¢D8.¿®*e68tòuÇ{r@ÛH \HPÌuäT(ÁVCe;RDxÔHx‘\A£‰u*ÒF„C‡ u•Í×›vjˉ/œ.dhß¶lÙ‚ÆÆF#H¨©Tss3~ù«_B5Z¾ýíoC3‚³ €Ï [j:¬eÊ6dJ·o@õá½(~ïUÔŠcxÔôy& UHl‚Ù¤ñ9>p¿P‡ë¹QŸ¿’É8[~uµµÈŒ›! ¬¡N‹Õ¯þãÄÔk„ø–”É‚G5š4QÚ:³¨G¸øgèÃÙ%ZB¡è@óß:iÔ¡YMœF‡-q¸I5ƒÓ°Ä·Æ~§kgˆÙÑ%ad‘¨LÃ%ZÒH‡ë·W ÙÂè0,pŽUÀð¸QÆ×Ç&‘“-êe˜„t™ï3•kd'ýŠæJë½¢‘QárCÍh¡*3âã¢õYi¢o™ å»VÍÊ,$h ‡ÛÐp¸#ïÇþŠ¿ÿ{!âÀ>ضHœ²'8Ô†1Âñ¼˜/ˆËßP%š¾fÑ–Š!&’Ó"ï_Ÿ‡Ú¸òžJw¨ŽžÜìt!cΜ9æ‡Q£LiQŸ -ªÙ°Õv° €/ð AÌ„©M„²¡#cxíñƒ¨?µgfª01­ ㆌ/¿Oúcìjn4Dœ×£êR%|j¹ì®qI74´p’B½º¤sQ©fÝž*ŽÄubês^ò£4Š–AýVÂe!:¶D4(³?¢†›?­¦L ëëÊ¢¾> é¨K «c¹P®í§jfE~ÊhKŠÄo¡±¥b!Ú« áwÈûk̵·ôøµ¾gçDÝ+âFÎ×ï1ZŸ Ñ6©ÿϤðå‡wÖiÖ{Àƒå}{V5Kâx"‰#Ó,jæ·ÔhçzÜñknPIfØ\šR%4òÑ:‰VLòðˆoŒ£Z%mJ»a¢ Ù’Šš† ×´îü—~b§æ˜¡Ú5}:pà‚EÕŸ>0]z?£ÅP †šMÙ‹Õjµ?å_/!pìØ1¤§§#44ÔKFÄa|³gÏ"22111u)϶–T=…RñըܷMÅ—’2Q3æ#fò,X¤ô{nK—.™Q$'wíüCäi$PZZŠòòr â!Ñ\×å.NÖî-Ó'CMj4K¶£²®k¨ÔŠÖ*//šzÀÙEÍt4ɩڽ’Ï¢@œÚÑ"Ú˜4ÑÊ8³´ˆ¦$1>IDAT“·†­ Y¨hb’%“¼³çÂø‘H8[Íc&m¸"© 3™\[—úÉì­ú»%?J„­Ž«žGÆýçµ—9åµÓ5ï¾û.RSS1|øpcõôÓOãÞ{ïElll'AÃ)½g%$@$à4ÜmÄÐŒ2É ^ºcjîGá›CÝ…<ÄÏ_†ð ùNµpƒÆ ¦›Cð"ºK?!Üq_/BÒåPÔL'\|LÍæÞe媈F_Ó‡«ŠŽÅ*šk°cÉ]Õ¿ªWÍ'‹æ%Vü†r롦ҩº†NÍ;]ÈÈÍÍ5ßvÉ7¾ñ Ì+ª2XH€H€º'h GüÌùâ>ÅÛÖ£ÂäÖØ„ÆÂ|ÄŠ -æU!Ñü.íž Ï |”[3‘2ò|Ôå½>ït!£×=á$@$@†@hrRn¿¶ÁÃP"¹4jŽîÅå7^D}þÄŠc¸Mü8Šo  w&à!Cµö‡¾ãs}ÝÑ?C_³ t& ¦Q±“gÂ*GÉÉ.&T¥ëÞFݹӈ™½Qc&ˆVCüa:ø»u®¯H€H€H ÿ¸DÈØµkž}öYÂVÃ×®\¹[·nm.~øa:÷ßœ³e ": !Ëï†5uJ6­AMΈF£îÜ<ÄÎP­Æ`j5ÙU ðN24ÃwVVÖ¬Yc._¾‡2;Tug"; þ% P­FÜÑj$%£xë:ñÕØ„Òµo¡îüYÄÎ^ŒÈLÑjDQ«qcŠxµgN"fÎbD$‚H¼(5üû©‡l–H€HÀW PÈðÕ™ç¸I€¼‚€`¢F‡U¢P•J^²íëQw6çQ›w 1Óç#\ŽX™-Ü+&œƒ  !@!ÃC&ŠÝ$ ÐLàI‹–K¶ð É.Z};P¶þ]Ôåj^ ÑjŒ› KBÔÔŠ…H€H€\M€B†« ³~ è#~ˆ>Ê„º-4 ¥"lÔ:‚‚×D«‘+&T3D«!¾ê<ÎB$@$@®$@!ÕtY7 ô ˆH$Ì[‚Ðô!(‘¼•{·¡\œÃërO!jÖBÄLœ.‚H T(a!  W á ª¬“H€ú™€‰@5x(¬b"U6d¸8†¯E­äÕ(|ãE#lÄÌZ„ˆa£μEýe¹ëSW’øm–p·9Ð$¨ª._DiCq ŽŠvçQ°o$@$@ýL€BF?O›' w$p%Üíl8™$~¥;6¢FÂÝ^¼x5’Ä/fò•P¸â@ÎB$@$@× q-‘^¾nkk3wúÉ. x «øi¤H¸Û–ø’Äo-Ú.œÇð•¨=•ƒ˜9K9z¼8†Çß}Þ2ã 8‡€O§NBNNjjj‘‘ñãÇ#((è:‚§OŸÆ±cÇÌuC† 1×KøÆÖÖV8pzÞßß™™™¦ž€€S‡ Û·o‡ÅbÁرc»¬ûºÆx€H€<„€†» Ή˜è8„\ÊE¹h5êNAþÅ\ÔN›˜ésG 5ÔCFÄn’ ¸š€× xúé§Q^^Žêêj¨Ððõ¯ãƃ]HPÈzÝï~÷;”––¶_÷ÄOAãðáÃxæ™gPUU…††ÄÅÅáßÿýß‘ššŠÊÊJìÝ»?øÁ0mÚ4#|DFFºzÞX? ô9˜8 ‰p‰@U¼EÃ÷í@éúwP{æ¸ÑjDK|"4Z €oð÷öá¿ÿþûFPá©§žBKK ^yåÔ××wúêÕ«±gÏ|å+_1שvâÕW_E]]>ôâýèGøîw¿k´ª¹hjlš5kð Á¥K—ŒÆ£S¥|A$@^FÀO´¹á’)4UUzÙ¨9  žðz!cÆ ˜={¶Ñ0¤¥¥aùòåÈÎÎFccc'V7nĬY³Ú¯»õÖ[p¢Âˆž[¼x1’’’Œ9”ÖwðàA446 ¤¤wÞyæÏŸo´$*å  ðRš3#~æ< |àóˆ»u‚âQ±s.¾ô,.¯[…šó¹hmnòÒÑsX$@$@EÀë…ŒââbcÞ¤¾ZÔÔIõ³èXôºØØXãsa¿NM§ÔŠO|ü°Ùlt|ì”ÏI€|‚€5IÃo_ÔO‘3 ¥¶…o¿„ +_@éÞh,/õ $ @g^ï“¡~vC‡®Ï;¾¶ãèî:{Ä”Ž÷èó+ÇýxóU+¢æWöHTö¶½áoss³¼®5Có†±q ]Pm S`ñ æ³-Aôtó¥Ë’”†°¥w£)6 U»7£âà.ÔŸ?ƒêé’ÜoÜøÇ&À¯‹€]ÖŃnA@?ßú9ïvÎÝ¢—섳è\ëo9çÛYDÝ¿W¯Go~…ìþ¬ºìabb"ÊÊÊÚ5ª±HIIéäô­7vu]rr2¬V+ÂÃÃ;ÕQXXˆH=SÙµ"×jQºì¸‡T_uŒï‰ÐåaCdw¯!`79Ô%ï'`ß@¨¨¨¸Á`ý`™4A’[£éÈ^4݇âÕ¯£@4ã¦Á6|´„»‡úu°¸?ýŒ755áÆsîþã`oŽ€~—ëo9çûæxyÃU2œE )›••e¢GEEEaÇŽ1b„Y „šBiXÚ>øÀ\§¯wî܉aÆ™p´“&M2Náê‹ÑÜÔlÂÙªÿFWapoÔ]:å­‘§4ô¯ o¡¡ ay£÷€7;{ö¬y?ëç…Åû hp -ºùò‘eHZÆO4Nà%š1üø!øm_Ks=bg‰f#uB,Y /è_º£­‘Ü¿aë}B ¶¶yyyœï>¡íØS4¸ª7^¯ÉXºt©‰&õ‡?ü8tè¾óï D⾯\¹Ò䶸ûî»qË-·˜hRÏ=÷œY8©c÷üǘóŸúÔ§ð›ßüÆÜ¯Z5•š¼Å"ÚG;"FEHL¬î`"¿æ $@$Ð(dôt6I$@$p= e5zœ8JvDéÖµ¨=u sQ3cbDÛ–6šƒƒ…H€HÀ½ PÈpïùaïH€HÀç¨FÒâ‘hSÃQ´q ªìDiÖ["pä F´Q™a‰K€_@€Ï±á€I€HÀSPÈð”™b?I€HÀ‡øù \œÃ­‰É(6¥›V‹Sx ^y5§ç vÖB„I‚¿ °p¢Â¡’ €ç á9sÅž’ €Ï!"qÎ"qÏ@ÑæP¹g*¶¬ã8bæÞ‚hɯaAÄ??g>÷æà€I€Üš¿•ÝzzØ9  ñö†mà X>ñÊD«Q¼á}Ô<ŒË¯¿ ZÄŠ°¡ZàÈ(Â" p2Üd"Ø   °X7m¶ƒQ$Ná;7¡jÏÔåžDôœ[3eB%›¸pð+âY  — árÄl€H€HÀ™4xê÷"lè(¯_%Yàèí—Q{ò˜„»]*Iü2Õ€° ô ýí’ 8@À?(1ã§À–šŽâ›P¶mjŽíEýùÓ¨™µ1ÓçI’¿tIâgq ÞJ$@$Ð[2zKŽ÷‘ ô;Øx$/» a#Q´aªe£dõk¨U_…Ë9jBbã˜Ä¯ßgŠ ð52|mÆ9^ ð2~Y*räX¤|îV¢OÉÁ¥Ky¨¹±³‰VC’ø…„xÙÈ9 p_2ÜwnØ3  ÐèRI oEØà¡(\· Uûw tí[¨Ë;8Ñj_¸xh  × áZ¾¬H€H  øùû#\2…[â“P"&TšÄ¯þìq\¼pUSçI¿°IÎ &ñëÃIaS$@>I€B†ON;M$@ÞM (<‰óoAØa⫱•{·¡|óûê(b,—$~Sa‰M`¸[ï~pt$@ýHÀk„Œ’’œ>cÇŽ…Õj½­ž?uꊋ‹Íu™™™µ†J¨Càüùó8sæ êêê––†áÇ#((­­­8qâÎ;Ù%6lRSS`êºpáZZZL[¡¡¡˜! p–„$¤Þq"ÇNDñæ, w»å[?@õὈœµHL¨f 4%þî1]ì €‡ð÷ðþ›îïÚµ ÉÉɘ5k–1úä'?i´ UUUhkkkâî]»öA¯KHHÀŠ+°oß>èuk׮Ŝ9s0bÄ 4wÜqvïÞmL§²²²°lÙ2ÓÆ¤I“0oÞ<ìÛ¿ÅEÅ8}ú´Ñ‚lÙ²;wîl7›jo”OH€H€܆€†»U©ôû>‡Ô‡G„8ƒ·µ¶ ä½•8÷§_ãòú÷Qw-õõnÓgv„H€<‘€Wh2Ô¿"11ј+é$¨&ÂÏÏÏ,þUøÐçZŠK:_§&QêcQQQË—/3'}­Eý:ÔC5z>&&¦½~mK5çΟCtt4V¯^mü3TàxðÁñÄOP›a(ò? pO b&LExÆ”îÛ…Ò­kQŸ{—_ûªìBÌœ%‘ Kœ8‡‹o @Ïx…¡‚A ìNÙ‹:dké¨ÅÐ××^g(ôºkÏÙëÐû´èk?õ—¢×ê=ù‘zä‘Gð±}ÌhHÔ/DM°TÐ knÌMòŸ ,ú¸¶_öóžüWß«««ÑÔÔäÉÃ`ß{@@çZƒ$¨Îâýô»K‹7ηeüTĤ¤£dç&TïÝŽªGP'aoÃÆMGäô¹–sþ­JlxÿDw¡~¾õsîsÞa˜|ú!ýŒëo9çÛwÞ®^^]™{0SÕH”––¶/ÞUË ‚€jìZ žj8ô:¥¥ãuª(//o?§Z uìV'r›Íf>t­mWîS͉š[i»jr¥‘¨TÈYºt)¾ÿýï­ÈÀ; >Úž~awl_yKinn6<¯μe|ÇõêÅœDç]ß×,ÞO@ç[‹=’ž÷ØASæ"rð4Þƒ–“‡Qµw+ÊîBÛ°ñ5ÖÔtEDvú]ñ>WGÔÔØ„¦æ&]=Èg^K@?ÛúÎùöÚ)¾n`2®Crý¡C‡߈¼¼âMkŒSxó‘]¼œ‡°Ù‹Å9|:¬RæÝdtº)¦›oC† ñú±r€@mm­Y×èZ‰Å7覻®o]UþKŠ«*ï«zÕâ7Þ@nn®ñÃШP긭ÞªuÐдãÆEzz:Þ|óMèâHÃÔêu·Ýv›¹N5wÒ]Ù'NJ´©Õxøá‡#¸ ¯½öšÎæÍ›qøða<þøãHJJÂoû[cUXX8F;ï¼³Ë}Å£?ÚQÎ*@i^ß  ‹5ì*oð­Qj€ -¾=ÏÏ?ÀøbDŽ FRª87 ©(_Ž=¨>{­OÄßbE@ˆþ²å­EUƒ¥¾‡,ÞOÀn§k*ß  š õIVËW¯2ta«’÷|`4ª•øâ¿hBÊêâ÷å—_Æ’%KŒy“^§Ñ¢4r” *,èâX?TZž;zô¨>î¹ç³€RáDwñ4•j:¾ô¥/™°·ºƒ«fV[·nņ ÌñW¾ò£Õ°û{¸bÒܱN î8+®í… ×òu·Ú}IȰ³W‡ïДˆaÃ?:Íõuh¼˜‹ªƒ»Q{ñ<‚À°pJo,2¼qV»…ŒîÙxëW ~ÒÀÕ¯NQÍœôKQ}(:úb\;¬]§»6z¯fì¾¶ý‘Õ]ÛŽNæZ·"T§gm×ׄ ;[5—RaŒæRv"Þÿ—æRÞ?ÇGè;æRGÝùy}a$ó[‡Šì-hÌ?/ÚŒPD͹ñón5q€D¡ î|ƒ‡¿¢¹”‡O`»o7—Ò„Â,¾AÀn.5qâD— Ø«ô¼jÖ¤Q>ªÜè:5ÿè®tg& ÂHw纫‹ÇI€H€<‹€Iæw罈ž0 ÅÛÖI¨ÛÝ(Íz uy§‘°ì.D  BÅB$@$¯28Ÿ$@$@$àJ~º™5x(B%ÒTyæ$¾ÿjOÂÅÂK¨[|â¦ÍAˆäÖðû0ç’+ûºI€HÀ PÈpçÙaßH€H€Ü’€úkÄŒŸÕn\ÎzÛסðÍ¿¡þâ9$Þr;l©ƒà/f·,$@$à«(døêÌsÜ$@$@Š´»ï794Š>x ;×£ñò%Ä/¹aÃÏŒáSf$@žH€B†'ÎûL$@$à64ÂTâÜ%°ˆó÷åUÿDÍѽ8ÿÇ_ |ÂtDϘ/æUà 5µb! _!@!ÃWfšã$ p?É—%¡nCbâP¸aµD ÚŠÊìͨ>´Sæ!fÆ<Ø–ŒáQô×pÙ,°b w"@!Ãfƒ}! ðhÖ¤¤}âADIfð¢«QsdÊ·¬–Ü»5s¢§ÌBhr*‚Â$ •D&d! o%@!Ã[g–ã" èê9b ÂÒ‡ "ç0Š7­AíñC(~ÿ¨Ü³ Ñs– JÂàjn@ÛG‡]ï—A°Q p… òv  èŠ@€ÅŠ˜ S>t$Êd£dó¨Ï=Â7^4æTÑsoATæDXâ¡×² €7 áM³É± ¸ q O˜½QcÆ£${ʶo@ý¹Ó(øûP1t4bD³1r,,q‰ŠaoÝnÙ! ^ Ñ+l¼‰H€H€zF 8*–܆hɯQ´m*voAýÙã¸x&c&#V"T… Ž`‰Då/Žä,$@$àÉø-æÉ³Ç¾“ x?1JBêŸDŒ8oÎB徨>œšœŸ4 ±3À&þÁQщŠao=l‚Ù]  PÈà[H€H€ú˜€Ÿ¿?l)ºâ!TOc"QUÚƒÊë%ìm6¢f,@´•k‚#"‰ªç‡Í‘ 8N€B†ã Y ôŠ€æ×—Ìà¶´tTžÊA±äب>v¥kßBåÞmˆš½Ähš‹ƒ…H€Ü™… wžöH€HÀ' …G"qÞɱ1—³ÞAÅ®M(Yý:êrO!ñÖ#bø(&òóÉwMžC€B†çÌ{J$@$àKüüŒãwÚ=Ö1EkÞDmÎ~œ/¸€„;>…ø™ó)høÒûc%#@!ÃÃ&ŒÝ% ð-¡6$ÌYdÂÚæ¿÷*³· P|5Úš›;u¶äÕˆa¨[ßzKp´$àü=¢—ì$ ø8[Ú ¼÷³ˆ[öq´TV àÆ…×ÿ†ÊÇÐ\Såãt8| w#@M†»ÍûC$@$@ÝÐ}ñâ«ÑÚPò­ lã{¨9¶1‹nC̤™°ÄÅC#U± @ Ñß3ÀöI€H€H ¬ z÷ý°I¸Û¢uï¡þìq\~õyTÈFÜâÛÄ)|´ u« ÿXH€H ¿PÈè/òl—H€H€zI@ý4âg-@ø°Q(Úœ…rÉ^{ü .äž@ÄŒ…ˆ›»X2ЧÓ1¼—|y €ã(d8Î5 @¿°Ä'"õ®{5nŠ u[}pÊ6¼‹š£û+&TÑ“fÀ› &TÁýÒ?6J$à»(døîÜsä$@$@^@À/ ÐäÓ°¦¤¡ìà4¯}u’À¯àÕ?¢òÀnÄ/¹Ãh<‚#£% M¨¼`Ê9ð2€· á­3Ëq‘ ø,¿€„Kô©Ðÿe¦¢(ëmÔªWžCåÁl$Ü"&T#E*Ÿ}—pà$àZ2\Ë—µ“ @¿°X'YÁÃ3F¢hë:”mÉB­äÕ8'ao#g.F¼D¡²*‰VÅB$@Î$@!Ù4Y ¸!˜X¤ÜöqDd¢PUíÛŽ²uo¡æÈ1¡ºÑ¢íŽŽE@ˆÅ {Ï.‘ x" ž8kì3 ô€ŸÂeÀúÀçQ>q ?ªSG‘ÿ÷gQ¾gbÅ1Ü–ž ð†…÷°v^N$@ PÈè̃¯H€H€HÀ« ¨ Uìä™2ÅÛÖ£tóÔ8Œs‡v#0:a™“1v2š£bÑêç¶ÖVúmxõ;‚ƒ#נᮬ•H€H€Üš@Ht ’—ß-&T…jý{¨>²Í¥¨Ø±[Ö -2ÖñÓQe –зi }ëÖ³ÉΑ€û á~s‘ @ŸÐä|¶ƒ0øáÇÑXQ†òC{Qq [’ùGseê¶®ÁÙ‡·äNÄL™‰è8fï“™a#$àù(dxþr$@$@$à0Íž0G"NÍZˆ†ÒbìÞ†ÂíÑšŸ‹‚•’=|ÿ.Ä/½ ÃFIèÛHšP9Lœ€w áÝóËÑ‘ @¨v×€ˆéóÐ2‘Åù(^÷.jOÆù<Í.‚ȼ[$ôm Cßöˆ,/&ß"@!÷曣%  ›&¤±ã9z.¯}»6¢lÃ;’=|ân¹Ñ“fˆ U¬dºé:y! €oð÷ar”$@$@$@½"àçkR ~òa¤á››8 -UåúöÈ}þ×(?ŽÆŠr ­­WÕó& ï$@M†wÎ+GE$@$@N%àŒ¨Qã`KŒ’ìí(^û6j4ÙãæÜbL¨, h uj»¬ŒHÀ3 PÈðÌyc¯I€H€H _I¢¾¤ù·ˆ ÕXŠ UùŽ (]ûªf#Nã'NGpT ü¹Äè— b£$à&h.å&Án €ÇP*ÑZ¤­xƒûÂÅ„Jslä¿ô òþü[T;ˆ¦Ê šPyÌ„²£$à|Üfp>SÖH$@$@>A@¾#GfŠ Õ ”ìÙ⬷Psd/rOç zîRÄKH\KB4Ë8 €o á[óÍÑ’ €Ó ÚÂ8o "GÅåu«P¾}JÖüUv#~Ù݈?š‡Ã&TNgÏ IÀ] Ð\Ê]g†ý"  #`‰ODÚ'Àà/>‰ˆÉsÐ\V„K/>Ü~‡ŠœÃhªª¤ •‡Í)»K½%@MFoÉñ>   ë¨Ã·fMˆÒ}»P´æMÔÊFî©£ˆž¿q³šd4¡º€W áUÓÉÁ €{ µ!A õÙ¸¼~ʶd¡øý rßÄK"¿¨ñSŒ ù¹Ç|±$àl2œM”õ‘ ´‰GÚÝ÷›Ð¶«ÔOc§1¡*Ïž€„eCøáP„þíÈø„¼‚… ¯˜F‚H€H€Ü—€_@ Â3FÀúÙCéž©(\ýOÔ݇¼39ˆš½Q¦óª@ÉÁâ¾aÏH€nš…Œ›FÅ I€H€H€!`L¨æ.FĈ1È_ý*vnD©„½-Ûðlc&#zú<97AQ"lXiŠ÷’ ô3 ý<lžH€H€|€æÎH¿÷³¨¡¢xëzTÙƒêÃÙ¨>°–!#»`¹„Ã'ÂF$5dž$ÿc!ð,2ቪ¾è2Š·¬CùîMh8wÿøŠR!fÞ2ã cü6(lxÅÔs>B€B†L4‡I$@$@îHÀO´VÑl¤}ü~$.\†¢í›L2¿†KçPðòÿ¡äƒ&{xô䙉‰ƒ&þóógš/wœKö‰: Ñ‘Ÿ“ ôàèX¤üËÝH˜»%ÙÛQºe-êÏÂå×_@ÉÚ·5k b¦Í%1 A¶pøô[_Ù0 À Pȸ1ž%  ècAá‘HZx+â¦ÏEÙ¡½(Ùð>êNCñªWQ¶ñ=DL_€ØY : ‘J² ¸~*Ýk>Ø    h4ªx4b&LEeήµ9Q¾á]TlÍBø¤™ˆ³¶ƒõó`!p2ÜcØ    nh8Ûèñ“9z,ªÏœDáº÷$ÕTJÜÊÝ›%üí$ÄÎ[jœÈƒL® †¿í%“@Ÿ Ñg¨Ù €#üƒ$"•äØÐÄ~5rEØX%Äw¡FBàÖH\KÆ(ÄοQ£Ç›ð·þ–…H _PÈèìl”H€H€H ·üÄ#lÐPØù"ê îFáÆ5¨ÌÞŒú³'páä¥AüÒ»+NâªÙ`!è{2úž9[$  p ešœŠA÷} Ëî4ÂFùöõhÌÏÃ¥—žACa’o_AAà ¬Y ô”M÷”¯'  p;šC#íîû1ü[?BÌ’¡­©%YoàÒ;+ÑT]‰¶ÖV·ë3;DÞL€š ožÝ>[€Ä*ׄJ,¾CÀ_võÁâ8×¾1ÏG©s®ßížVBbãöÂ_œÅK³ÞDùŽ Ðçñs— 8"ê×ÁÌá×Ϫþ†{â|_?¹Y®žs¿6)7Û^GÝ(--EDD«¼;D^w¼²¢Á!Á°XÅÅë&·‹ÕÔÔ˜£6›­‹³<äêëëÑÐЀÈÈH^KC=jòÎH4ª¦ÿþânK[ò@:BÞ{sýk1-ú×ϲ–¶Ö6—›ï|ýüÇÆÆš÷„9éÀ2€Ç[Í›7ã?øª««Ût~ûÛßbÒ¤Ií¯Éɳ è“þØìÙ³O>ù$þõ_ÿŸþô§ÍBäìÙ³xá…ðÞ{ï™AÞwß}ÐGJJŠgÚÇ{¯‹ ÿö·¿aÿþýxðÁ±`Áè{áïÿ;ž}öYó\ó(Œ3>ú(fÍšåãÔó¿üå/ñÿ÷Ô^yè”ë‚óüùófA7æÎ ÷ÿøÿÀìÙ³Íwºn }ûÛ߯øñãÍw¹j:X<—€jžW­Z…à­·Þ2ÃÓ¦MÃôéÓ¡ ?úÑŒà™••…Çãž{î1GS¦LqhÐ2ÂÇ›=j„ »¹Ì°aÃh*åeo ]€;vÌh(: j:¥óÿñÜìjª`©»Ü§N2*YW%÷ñ2¼n9Ýââb3Š7ß|jÇï½÷â«_ýªùqj¿O<Š€ öM5}ݳg¯Y\ê·~Ç«ßÝ?Cí¸U»a·ëö¨²³†€né÷º>ôw\µï¾û.l~»7mÚý¬éK_2¿ë*üä'?1¦TDè™Ô¯FŸU ÐÍÀ_ýêWF˜˜7ožùW¡CÏkÑkÕ"EߎF—r” ߯Îbú#ô³Ÿý̨×ÕfwçÎf×[,ÞA@t~ǣ„ Mãôµ}g¤óÕ|åItN;Ϋ½ïº6þ|c2³cÇcN¡ µçgñ|ªzÿý÷ñꫯS‰E‹™÷}AG¨ßj«»Þ,žM@ ÕHë¢síÚµøÆ7¾a4*D~ñ‹_4Ç6nÜ}è.wÇ/ž=rßî½~~Çg4ÓÛ¶mCkK«¢› ö¢Ïuçh¡á(A¾_ß„O<ñ„1©ÐÝ-UÅ 8кÆâÝtwK (õ¯>Ô”FMgh>ãs?tèP|ö³ŸÅ-·Üb̤ÔYTMèÔlŽÅ³ èw¶:x?ýôÓÆDB¿ÛÃÂÂÙ”šÅé‚T‹j)u±iJãÙ£öÝÞë|ªVòü#ÔÿFµÕj‰ ¿ëú¹~òÛO"55Õìh«I•šÌªé,‹g°ÿ>«à Úgõ©RSgý¼[¬³q`×Nêµ…*99ÙáÁRÈp¡ïV ª´Ÿþô§8|è°ÙÕÒ)µßŽ‹‹£Ó¯¼-t·KC›ê•†AÔ/%}®»Ýú`ñ>j£¯ 8¥ M*UȈ‰‰ñ¾ÁúЈTpP‡O]p¨3èמøšÙ(2¤.L.\¸`üp ò Œ AgÏ~ƒè÷µZ¨©ów¿û],[¶ÌüŽ«6KµÏþáY3ÏMÆ~_¿Óõ;ŸÅ3 ¨™”óÐïo}®ßßê{¡šhÕNjuøÖãú8räˆ1Ÿst´tüv” ߯?2'OžÄózÞ8k¸;•„5òŒÝ~ׇñxýÐU{¥‘)ÔlFm9Õ)X+ê ÊH$^4ýÌäÔAx÷îÝ&ä¥j°Ô´Fw½233½hÀ¾7”ãÇ›PÔºÀ˜:u*²÷d3)ÝÉV‡`5¯xþùç‰Å¦Í›L »‡ïÑòü«©›nüú׿6¾Wú{½~ýz£µÒà-ºqðç?ÿú\µÒꯡáNuS‰Å3 ¨Ÿ…n©ÆJÃO« ¡šË»ï¾ÛR?ãÇÇÁƒ‘+Ô_ÃÑB!ÃQ‚>|¿þð|ýë_‡†/}衇à€o|óæMjlø}˜7]môuçCýÂÒ¢jWu Õ0ÝåÒh%ºH±Ÿ÷F¾4&ݽÔ&.´è‚S”4t±~æÓRÓðØã™_ââmc½té’Ù0RÍÔ×¾ö53<]xêÂãË_þ2n»ív<õÔÏíþ­·ÞŠÏ|æ3Wü´¼ „ŒG˜jâ¨ÂÄêÕ«ÍC‡®Båücó®ßíÒV¯Uó©û·ó:Þ9LýýÖ`ÙÙÙWÖkòZçXûM^ õϸÿþûÍo¹:†ëûÁÑâ'»PmŽVÂûI@CØê‚D¿´X|‹€~…¨©œ:‡vtó- ¾5ZÝñVM†n4tt ö- ¾5ZÕRêœs7Ûwæ]5j­Zkï! >VºÜÕﵚÑiˆò®ô†…ŒÞPã=$@$@$@$@$@Ý ãw·hx‚H€H€H€H€H 7(dô†ï!    è–…ŒnÑð @oPÈè 5ÞC$@$@$@$@$Ð- Ý¢á      Þ Ñj¼‡H€H€H€H€H [2ºEÃ$@$@$@$@$@½!@!£7Ôx @·(dt‹†'H€H€H€H€H€zC€BFo¨ñ     n PÈè O ô†…ŒÞPã=$@$@$@$@$@Ý Ñ-ž     è  ½¡Æ{H€H€H€H€H€º%@!£[4ýéOãsŸû\'AB/¼xñ"ZZZ0|øp#0\ÛmWëüÛßþ†qãÆá¾ûîæM›ðÛßþgΜÁôéÓñØc!&&+W®ÄàÁƒñÈ# ¦º/üå£QˆÍf»¶j¾& ¸I·‡nò&^F$@$@E@µê#qÏ=÷áB…  ***PXXˆ¡C‡báÂ…X¶l™@Nœ8“'Oâ¿ø…1µÒ×*t\[Ô´Jëñÿ·w¯”ïaÇSÎ-—¢HVÜ1gFl8$%)¥ØX)gÁÒ?ÀFò(–;™)³³˜k) ’²’ˆ¹ E ‘Ý\ŸçvÊqîÎ1³ø~ëýÛ¿óûý¾ç;¯oóô<Ïׇ//yB™‘ùùyÏN zùÔââ¢577{ù–²KKK~]ÁŠÍéÛé7;>>¶ªª*ŒYÞ@  È(Ì‹»@òP9’‚e/†††<(8<<ôò) ¡LAEE…UVVz锚¸UZ¥lCKK‹MOOÛÀÀ@Ö§©¤I×Õø]__ŸuM'êñP¢ûÆÆÆ¬ººÚöööìúúÚÚÛÛ-™Lz¹Vqq±g,T¶¥²*Ý£ Fã¶µµåŒË €… P.U˜w#€ä) ]¥ÔwQVVæYý¬R'õFü×Ñßßoççç677çFoo¯g}é¿»»ó,HII‰g^Ž£FðËËK»½½õ¾ ú¼õ  ïêßYww·544ØÖÖ–mllx Ö›¢7Þ/òr\Î@  “Q˜w#€ä)ðððàeH*mRy”J’ÔÜ­r¤Ì¡l‡¾ô«lI ×###þ¥vvÖNOOmyyÙ›¸3÷«á[Ï455eÞÊzíìì´™™ï÷PSùãã£ÕÔÔX:ö¾ e4¾~ýÛ>~úèeZʆ(“¢Þ‘‹‹ Kµ¦<(Ê”@‚2 &ã@| ìïïÛææ¦­¯¯ÛÍÍg Ô‘9´«Óýý½ùNSSSSvppàýµµµ–Hüû·42÷ë>•7%“ï3oe½j<•Z©ÏãääÄ´­m¦AüË—¿ìèèè)Sò§MNNúu•h©dêêêÊÏ)•ÊâäxµAÆ«éx@àÿ´-¬voR“¶vˆš˜˜ð]¢´}­ú!´E¬‚ íì¤àcaaÁ3 Ê: [__Ÿg8Òé!¿OŸ£Œ‡Ê©”yûöœÖ¸*ÑÒkOO¥R)nÔûÑÑÑaŸ?/[WW—­­­y£ykk«¡> Êj466æŒË €… =ý§ý½ðÇx@üT6¥_5¿—“pNVÎq @€ @€ @€&ÐÕi ² @€ @€ 0yôùišô7;>þ•ÅíÑ/ Ú I#'é9Yš¾²Ù‚²<»6ú·5«ÑG`²îÁÉÊ9Ž @€ @€ @ Ó²N[õ @€ @€ @€ @€ @€ @€ @€:I@´“®¦µ @€ @€ @€ @€ @€ @€ @€' Úq—Ô‚ @€ @€ @€ @€ @€ @€ @€Ní¤«i- @€ @€ @€ @€ @€ @€ @€@Ç ƒvÜ%µ  @€ @€ @€ @€ @€ @€ @ “„A;éjZ  @€ @€ @€ @€ @€ @€ Ðq wI-ˆ @€ @€ @€ @€ @€ @€è$aÐNºšÖB€ @€ @€ @€ @€ @€ @€tœ@WÇ­È‚ @€ @€ @€h‹@^«}3/¥Ûl°¼–~¹Y¿> @€ @€ @€˜º€0èÔ @€ @€ @€ @ #6W.ûZ,¬þÇ‹ @€ @€ @€¢@vÏíÔ @€ @€ @€ @€ @€ @€ @€ @º  @€ @€ @€ @€ @€ @€ @€S@ô`ê;7 @€ @€ @€ @€ @€ @€ @ @@´H7 @€ @€ @€ @€ @€ @€ @à` ƒL}ç&@€ @€ @€ @€ @€ @€ @€ƒé&@€ @€ @€ @€ @€ @€ @€LaЃ©ïÜ @€ @€ @`zÒéö :ÓŸq(]Ci.ÓuCÍôûe´K'­eôºÔûN¹ßÝꎙyçé”{|æÉ›1 @€ @€ @à t„s:% @€ @€ 0EùóçÏ9ö§/JÓüÉ‘yp’&NóäÁy’ž’¦I–'ùÑ~C’ÇŸ4½.©Õ>ö³üîO\S©ì˜â©§åðùåò‘Çf=Ï(%éIžŸž§ééIšŸ'‹ÏÉɱ–»£íæ4ÉoΓäWñó+ÕZö…<ÛyÍëÖýxZ&5ÉA/^|l6çèWƼŸó>'æ{F\Ÿúµ¸6†¬ÿùv:2òñ/ûÞ$OQtXº¨żî®ä‚<ÉΉs?0¼NJòôI’Ÿ÷ÈH ð‹¸wnŒ÷7ÞsÔ®Ivå^óó¢Ávÿ%gæÝ¥gÅz÷úâç q¯ë8!æv|ÜïGÆ=´#O“[Ò<­ß/7‡Á-ÑkN¿°»Zý-•5ß9ØëØ{þ… /~À¬ž®'ÕÒìÉY’?1æúàu×Íù§¯ºjõ{Ç:vг0ë[ºìIyž=)ËÒ'Ƶ~tÜ 'ÄsýØ{îéäÖ¸¾?ûú§q¯ÿ4¾Ë_ÚQÛQ¹²Rùåáxݳ5wô3ý0»––K€ @€ @€˜’@ü7^ @€ @€ @€@] oéÊF¸¬¿@cûÆu«žPPÓ°{ñ’•—Òü "àõÎáõCïjПõ-|f„VÜ3Ïôèuã6ß*»ª–ç7ç;.O*•긅°1B‹OèîN_ᦋ÷ž&qî<Ï¿¿ðø×êÎÛVmÞ¼ù–I q¿Cz—¬xu–¦¯º_ǽ µ$ý¦õ«×îîsWqÄqÙŸeiÇ^›¸ ɦ$yëðúµ_=ÎdÞG ®ë¸“N]”&é Âñ‚ð8i¢ã„cämÓ¯DÐðCñv}¬ïÛc:ê/(—Ožö\÷üüp?Oêyâ{aéü?jymËÍ7þdý¶mÛêÙöºðÂKŽ›sd÷@Ìc0Öó”)œxw|w¾$µOìI7oÙ8ôõ)Œ5î¡}å•‹Ò,ÿ‡q;ïm¬%éßlZ¿j¨YÍØ>ÏÂ{D•—?¼«”®Œ{{E|2Ö©ÙçøžŽÄ÷ýqý/»é†Wâ>¾{l}}üî,ûÐØö½Ÿ#~óðúUOÚûùPüÙ»tù²,-­i6·°xu“¿;›:nßâò%-•ºÖÄ÷kö¸£Ó䮑juù•µßÝ<•÷‡ú3}QÿàãâØP´ÆÝ#ùÅ[6®þrQ];ûûV~(Íó‡73îùÛnºñº'èç~£ùh'@€ @€ @€LDÀΠÑRK€ @€ @€¦(&µc’4;«ñ0y}wÃû½.ê_ñäRW:a½sî×ÙbC„ÎæFéâRš.îOzþ4Y²â7nXý±o[Ù‚ËŽž{tײØÍñ÷"õ¸=O៯ “scŒÊfó–¾¥ƒ¨í¼í/¦ M³Øm2ixb·¾cFƒ,Z4pJ÷Ü´¸Šõ´´˜ÈšÖƒÇ]ý}ƒWW“Úë¯X·æó£Çlõ}ïÒ}"ûÛ©ÜõsÅñõ‰Ï‹±æÅÛ×G8úÒ‘;ÿjË–u?ku.í¬ëë[ö¤»ôº˜ÙïÅÌŠY8ù=aÙtIÌ–òéoìXñ–ë¾÷­¡íÛ·ïžÀ0.]P.ŸØ“õüMœÿwâ6™ÕÒÒü,Ý1Î3’${FWWþ×ýK7%#é_mܸê›Ík½7ϪG¥i©áw¡>R¯öû>´2úáþ,¬?3ºzf¿9îíÁðÊZ1[ßÓúïzŸ‡??îã×÷-Yù’á «>=º.BϳâËÝäúå±Û¬×h:>&+%²­ï:<ºg¼÷ùy-YØÎ èLx¦Gðükñ¼‰Í¼Ó‡Ž§²·­«kÏý}À •—=2.Ù…1¯½S¸ÿÏ<ùwAÐû³h!@€ @€ @€f†À¤~±83–f– @€ @€ @`æ Ôw‹Ý@ßAÐÏFðgÒAÐûK¤ç%YöÑs|¤Jºÿô´Dðñ’¹G—~›wGÐæž h›NÙ¹aôÊlÎ1ßì]:xQ›†-¦níꙡÚÉ­'—çgµô1…'SpÑÀò§Åõû\–fÛ{oì9Q=¬úâ«}/î¿7Î/—súiûxÑÀÀ©êý—¤»ëû±®?ˆy´5:vâ:3M²;ýìó¾×[|êØþv}î_ºòs³¹ß‹5½,‚J³Ú5îÞqbܸ•Ò¾¤;ùzy×,8co_'üì¤gaý^¨·âн(®M[~_÷ñÙ‘ÊÝÖ?°ò=õgR'\󃱆b>ª«;ùd|™Æý‡öŸS~[^«>oxÃЧöoŸÜ§öLó®4M–Ö¿»…um*èJKõpuÓW5­®jZ “ @€ @€ p ´å—‹‡ðúL @€ @€ÌX…å=áäÓ>›fé_GÎkzÂiº ‚ŒŸêë[þ é„ÚäZºò#`³6kÓ,ŒPÔ)‘dÜÂ÷Æš¦ûw!i„[×ÅõyÔdýò<Ù¹ëÎêú‰_hv%¥Ï…åÓ&rÜDk#¬67î¿×Ÿõüg_ß%§Oôø‰Ö÷/YqA)Ÿ}mx¾rºC cç÷æéY)½º¯<øÂ±}SüœF8ó÷ Óýw”ⸯy—eɬ/Æ.ynT4“Ú;éYXÿîÖï…úw«Ý× ¾7ñøK^ÞsL×Wë¡êvßéãíÙU2MëAЭ5žÛ·T«ÉooXû¹¢ÚVúgâ3=ß]]•Ç«ÙúÂò'<ðÔç6«ic_–gɲfãÅt»p¦Y> @€ @€ @€À¡,0ÝÿÄ¡¼vs#@€ @€ @€‡¬@oïà ÝYúñØmòÉÓ=ÉÈ“Ì*]}A¹|òtœ«·wÅ?ù´ODLé¦cüFcƺ^!¼¡¤\.5ª™j{Nÿ ™ ¦2N¤·¶|øÃ—ÝÜÊóæÍëî[ºbU= ÙJ}»jÂòÜ´»û }K—?¾]cާo`ÅŠô®è¡9£þþ‹Q*u]]ÿÞÍ0Oò_Œäço® ýgQmQÿL~¦_v]x]]´Æ$K—Ö´¡`ñÀ²gÅ|Òt¨4YÕ´_' @€ @€ @à=Ä/é @€ @€ @€Àá'p^¹<+›•nŠÞÃÔê#<ôˆ9¥žµí>_ßÒeOLg§Û# ñÌvÝÊxÂ[Ö_ê¹,jc‰í}õ÷_rv úÖ©Žš×ª-…SžS.súÙç}4M³Á©žsRǧÉÉi’ýG}÷ÎIßä Þ•¯IòtUÜóÓ³n“síŠ9Äm“¼cÑ¢SÆöMôs_©çÏc°‰×¶ú49¾»+ûDïÒç¶mÌ8P§< ëëè_ºr]нü€ñ¥éY³²äê4ëšÖ]ŸØz¦ñD½KÏÉ»»#šœTtš‚þ¢Z­>kËÆ¡¯ÕõwÂ3½–T/-Zgž§Í/—§uGðúb[Ц7îÙÅt×ÈPÑ|õ @€ @€ @€eƒþ õCÇÜ @€ @€ @€ÀÁxx6ç}]|F ç®E¸á[`ùYž¤¿L’¼Ç¡ºÓãýC&¬‹ÀæùŠyÁ¦õCjáÜ…%‹ÎH“Ò§"‡yDañ¨‚Û\—äÉ–X×wcE7äIrCìÖxlšçLÒìÜ$Ís­ïvØâ?z™.‰¼4÷Š }þDxã;ûãxŽÇqùâ¾øiý~ˆüÉi–<8ž‰{ôEÏÖx>߃÷zNd†Q»¨¼ü¬púT†fã:ü,ݽûÙWl¼üÚ©®¾Sžé7ÔvŸRê¹-î±xîŽÿŠ{uîqÙÜ¾è¶ æsŸ»âˆ¸ŽýãÏ`_ë¶ún¦û>yC€ @€ @€˜ 3ð¢™2 @€ @€t°@žþnNm´Â=;[¥Éæ´V[{G¾óS«Tn¯vÁ‚eGÏ9*{Q–¦¯ŒðØÃÇ«¯-1oKÊå+“J¥:^ÿDÚ²dö{¢¾Õ h-BOÿ^­Ž¼{s岯58ϾjïÒå¿;d¾£ÕGc]oÝ?±iýêo7{BÍ1Þ ÂuÊ;dÆõ\»mÛ¶‘¢“Ÿ“õ¼-5-A#H¶yd¤öæ-W¹hìyóæuŸzÖyÏ)%éÿŽÌØü¢ú¸?»’R~ù…^ò¸ø²›‹ê›õ×QI©ôÍjööí¹÷“|udžÿßð†µŸöÈfµöêï_y^Þ•¿,î—ÁXã1­µ/\Ô?ø˜Øðë­Ô©IK¥|U¤ ÁÖ-I^}Óðúµ_3ÆèŸý¡¯ï’Ó“îî? ï‹â>œ5ºo¼÷qï,ŒïSÛàïmoëgaïÒ•/ŠçEëAÐ<¿"Ï«_t/\¸pn÷Ç-Œ{ì¯cüG4òûø„F}õö4»ã0}ÅîÎg楬-ܸΩUÏß²ñòï·ƒ«Sžé×T*;úb×Û¸‹^ÚÌ%ú—Gÿ´…A<>‰°ióx"þÑŒ4›£> @€ @€ @€ÀL(üôLX„9 @€ @€ @€NˆÀDã h’oªÖªyEeí·ŠÖ{å•kc§®äñç]}+V&yöÞ{vÑqô;·7ëY°)Iö/‹Ž¯?šËZ@åùçcWÇW ¯_ó•ñƯmÓú5ÿíóûʃ/L²ôíÍÜö;xFèéýñþ©ã7á¶4]ù½ò›cGÓ÷äi¾½V«][MJ·v'ùYB<'˲ÇÇn‘Ë#ÀwdýèêH²ªÑ({ÛûÊ+ë;wþáÞÏMæÉ7òdäÅÁÂý†Ø¾}ûîøse4^ÁµÁ,IþoÌõÄýŠÆ|ˆ€Ùé³êú÷hŽÎä_ç”z^÷ÉÙ…#äùcÌNä=æÆ«¾Ÿ_}A¹ü–9¥¹ÿç¼pt£÷]¥ä…Ñ÷õFýÚ{Ë+žÁ¤óõ×ÛëáÖ'½8vâ½´YÝx}÷în÷²…åÁ7ÏÊâj%Ä[Jþ6Æš?Þx‡b[³ïthgijð9åò1išÿ]Ü …ı¦ŸÄ ñq?Ôw-|mݺõ®(ZêMÈæ¼6vMþËø<§ð@{ê;W'yWAÓ‡’Äó§–î:KeÝ k[(è¼gúH<úš‡Aóüü¾¾å^óóˆ&^’ï ú7>.Ïï¸óæ|cã= @€ @€ @€™!¿Ï÷"@€ @€ @€eݱ±ß^7Ô×JtÌZòáu«?PM«çGØècúÆý±¥‹Æíh±±·wð„,im·ÇˆÄ­ùõ×?s²!¿áÊÐc]ω="o-š^„ËžÒ·d0BzÓóŠk4’Ôò·Vï¾íÌá C»ׯׇ*C?ÝTYý›6¬~߯u«^qkmÇCbÔׯ¼·í:yÑÀÀ©i–´Ìóá_×îzÚD‚ c%6­_5”ï®þF¬¥pÕQõö.YñûcÇhõó¢E§dyR°5}Å\¾»{Ǯߜì=2zðT*7 ¯[Uò¾wt{Ã÷éž0hÃîFi–¾¸Qß¾ö<ÿÓðníÚî;hÿ7[+C×WwÞÚ[7Ú¿çþŸâz=3vB,ÞÞÿÐC¦e¦= ÊzþO¸?°0ÿæÝÕOl5:z¼oV*»6®_ýæZ5??|nÝçýø¢>-Ëg*Bù§_q_kž$ß‹gâom^מ h'>Óãïœ/„Óµ÷©ó.MKiwvñ8=SnzAyðÁñYœßl ˜_媫VßÙ¬F @€ @€ @`&ƒÎ„«dŽ @€ @€ p8 Ü]©?¼~èߦ‚pź5ŸOjÕ¾E&¢ù+ÂKõ°\ñVv †Igåo/ÚY²~hLåŸ7®ܶmÛHƒ¡ZjŽÕwkI¾¼¥µ¥Ék[tâEµXóʆþtóæÍMYŸ¨TnÝ´nÕÿpoaè¶”Ï~gŒ{|Ñtâªþ¿ë‡^¸­R¹£¨¶¨¿¾sÛÈŽÝšÍë»i6}eYú– –Ý´¨AgwO÷KöîÚ ¤~“ìªÕF–nÙ²îg k&Ñq×­#¯‹/Bá˜õr#@yæDN±pá¹q\ÓSãzmÞ°úm·Qmý~KGF^è›ÕìmÏ»ºš¦öÖ¢?gÔ³°wéŠGÅC´…°tþÕ]wîž_*OÅ}Seèš‘‘„&ù¯¦2N§[cÆnºÛâ8£h­ñwÊ·“]Õg¯ýIQm«ýúLûî…i²¼°fÝ¥=ã6ýïòZ¾jC;„ @€ @€ pÈ 4ý¥È!7["@€ @€ @€‡™@-¯¾äŠ«¿ØŽeoXû¹gMáXirò¢þó ëÆ)è-þfšfƒãtí×A¼GÀõUÑXNÝïÀ"ú¡M¾¡A÷¾æ8Ùó#¤u6½©Õj/pçemnÏ0õ]#°´¨hÌá\}Ó×ÕCgm±¬ŸoË–Ëo¬í¬Ë’›Ÿ?=zîÑ¥—4¯¿7O²Â0l„&ߺ¹rÙׯaò­W^¹ö¶´V}u+#äY6¡û¥kî±õðèœæcç[š÷O¬wãÆË¾WËkÿ§è¨z?»¨æPíŸiϰ~g„‚»šzF€7âÎnÝzy[œ±ÓðW«I­þ½ŠÍ‡½Æ Ôw,å³ê;‚>tlßý?ç_Ïwåóëáøû÷M®¥“ŸéÕ»v­Žð~µ¹Lú¸‹ÊËÙ¼fâ½Yž6ýÿñ÷ØÿÄîÜŸžøÈŽ @€ @€ @€z ‡Þ51# @€ @€ p¯@¾jÓú5kÛÉ1²cן¶²ƒf©+{ôdΛfÅ;EÀïöÝÕ|RÂfsŠ0æ›bmßmVá¬Èh¥…Ëfcܯ/O¶mÚ°ú}÷kŸbCìâøG1DÓßåDÈåg;oyáTwWoª›6­þE&_>^ßè¶gÓ¦¡_×¶^ÑéÏô=µäÒb‘ô’¨‰KÑžWì0Ý|WÐxðí΋ÿA{fc @€ @€ @€Àô ƒN¿±3 @€ @€ @€‰ Än“WTÖ~câ¶tÄ犪ò$}@QÍØþEý+æEÂã”±íûÎoþõ×µÙÿ¨V?廫ködßšÛƒýæSËåž&%-wŹþkSeÍÇ[> ÅÂ#J=/3ç6+u\»)¿ûýÍjÚÑ—×’w“eù«ŠjF÷ÇÜ_0úóxïG’êúñÚÛÙÁÍ—§ÙE5ûõ§ÙÎý>÷!Ož4^óTÛâ~üÇøó®$Ïÿ¨–ç kyí‘7ÝxýÜž:¼~è™ñçw§;`;Õ5Üïøø,Lóì¢û­cLC^Ëß6¦©m¯¼òʵ<¶ñÛ6Ñ0PoïŠÎÉz®Žpùà O—'ŸÙ}×-¿½yóæ[ k'XÐéÏô:Çw’["xÞ4D[ÿÇú–¬|ÆùÆ-Ÿ?~W–ì —ŽÛ¿§1M>½µ²ú‡ ô @€ @€ @€f–€0è̺^fK€ @€ @€‡‰@¹>0]K çç‹ÇÎ)®Ù¿¢«;)@åɺm۶ݽÿ‘íû4<|Ùu1Ú¶û˜çwÄN|_‰ ÜeiR{ËÉ#=Mƒ–÷;¾ACš'm‚ÖO¡ÚÞ§Ü×»‚¾'©Tªû¦éÍpeÕ‡"àóófÇÃSç—ËG6«Ý7\Ûqa5ÙùÐj2òœhY\›·Å9>»~5þÜ;XÞpź5_}Ì´¼Ïóÿ)7Ëó‰Ý+µZñ˜iþúz©èÜíß´~õº|¾zãú¡wlZ?ô¡øüíéü¾Mt~“©ŸiÏÂEåågÅø‘ÍÖº'È]úl³š©öÝP»«ß«¶‡§:¯yü‚rùÄlvzuš¦ç7ž?Wß~smÁ–-[n/ªL§?Óë&߬Tvųû²"Ÿ4Ë—Õ´ÒÜI§=?¾k'6¯¾ÿ/Õü¼z  @€ @€ @€Ó#Ðö_2OÏ4J€ @€ @€34ÿÏéZq‘~Z8všN,ÆŽ_‹ ÇMª—×L±¢Vû‡<Ëþ;‚Ÿßɳڵٮä;ÃÃkëkŽ¥·÷UKókÚ;b’<÷¹+Žˆ×ccgЦ¯‘<ÝÜ´ }µ˜Ê'c¸Æž4-[šõ䨩׿"Ä“ÿQÖÿÜA´ÇÆÓüJÓÂàf,¾{"³¨•vÿO–ÌnzH=œvüI§­™7oÞŠíÛ·ïnZ|¸wΰgaw–>Ó<ßt_Ök*•ý+‡â<¯žîsŠã÷öžeiýÙr^Ñüâ/†þ¼º£ïš«*;Šj'ÓX<Óï…I.íîNŠvŠ~á‚ ^UßÁv2ž{‰]A÷¾ÿg~çMÕ»ãð"@€ @€ @€tŽ€A;çZZ  @€ @€tŠ@ìb;ú}gº–“%#·;=Î)ªÝ¿x`àŒˆƒ>ftÛýÞÇnÃë×~é~ímn®¬ùp}wÄMVÿËæuk?AПÄ)Ú­O;ÝÝþ0èÇ$Oް`Ñ?è¹}keèú6Ó5.vÍ»_`slq)Ïž>¶m²Ÿ¯ºjõ“=v"Çåiµ0üAØ ý>íŠuë"x»›¼â/=ýìó®^\|RAéáÛ=Ÿ…ñ )Ü!y¤Vý踨µjmø@œçP;ÇóÊåã³Yé'â_(xtÑÜ"x¿uÇ­¿X\ÏÕN¶ÿpz¦oÙ8ôÕø‡þ«¹Uz\Ï‘'^м¦yïâÅ‹ïZQðúƒÛ*•;𤗠@€ @€ 0³&ôË뙵4³%@€ @€ @€3S _‰™OÛ®ˆÕZRT‹ó—&¢—%ÝÏ.®Ï¿Z\3s*"Dô£áá5?o÷ŒÓRZªŒ°Í—Û}ÞfãÅZ[8_ñ¼›ã ôå¥Y-œwBß…¯VË“K[·¾›îo–Jéû–~xñ’Áåõ[+Ç.53íYøœrù˜|ó;¯¨¬½ö@\ÃêÎ[¿ÏŠ`<|^^xÉqGds?_®Ç­:Bî¼îûÿÝ?Õ*‹Îs¸=Óã;PøüK³&;MFiÎ1Kb÷ì¦[0çµdU C)!@€ @€ @€Ì(aÐu¹L– @€ @€8|:×Yª¶? šÔÒ‡Í9Oó¯Ǫ̃þtz™±+ëo9äi:­÷ÈØóßP»ûE¡²‡Å»bœ–ݱKè¥,]}dÖó‹¾¥+?Û¿tÅ_,.¯œcD®êð}Í´gáQÙì3’4mŽdf=ßò½1•«¿eË–Ûc>Ó¶ËôTæ6ÇÖøsŽêº*î›ÇŸßvÇMù‹¶oß¾»¸vj‡Û3}×]»×†XS×xF^XßÝs²²qlv재aÃжf5ú @€ @€ @€3Qàÿ…üLD5g @€ @€ 05üÖ©ßüèZíÎ;šW$I„ 'BK“Ž™ï A•͘þØÕñÆé˜lÞB)«UhôšJeG„Êšî‚wÌQ‹]|ât˜LaÌlQyùYýK–÷÷/üûzÐòŒ³u[VÊ>¹Õ×ÄM~æÆnxèæõ«~äùÖ†:"HŽOOÒìM¥Ròå¾Á_ô ^Ö7°âE‹ œÒè°ÎmŸYÏÂ,OO.ºRÛ^TÓÎþ4™žÐz;çØ–±òô˜£K=WE~ú ­—}Ô±éë[«ZÕáöLߺõò_=ÿâ97;uTy2²ýý—œ¹ç9Ùäà]E÷aµ+n] @€ @€ @€@ tuÐZ,… @€ @€t„@ì˜uÛt.dçÎ#jsç´û éIÅ#æß-®™9yR»yf—ôøÂq³ìý}+ß[X×΂-!Ùf'^¸ðâ”ætŸó{X)­ß¡‡G(ì¼Âm={m>û½Uíù¹{Ç®Wv÷ÌzBœÿÁ“1î…ıÇÏ‹»çÎNú—®üF¤›®Jóü£×æ;>ýÍJe×dÇž ÇÍ´ga-IO.ü—xÓ¤i¨ºÝ×%¯%?L 'ÕñÒä/ã{2{BgN“?¹¨Ŧ+6®þℎ›XñaùL­o/Û®¯Uü›Ë£ÿ}ÍjÆëËK]+š=Ëë»Xg##«Æ;V @€ @€ @`¦ ƒÎô+hþ @€ @€ ÐqižLkt:À"˜Q¸3èHmzC®Ó±®fcæIvS³þÉô-^¼ø˜8®…èVz\³0ÌdÎ]xLA´~|-Í+g’óËå#Kz›¤É9i–Ÿ•æÙYIšŸ•çéY1µ£î6‹’½¯ûÞím9P?·lY÷³Eýƒ »»óÏD0õˆ¶œ7M+ztLÿøáùÜÛ±tðãyšoÝQ½ûÃWV*¿lË9¡AfÚ³0K“òÕ¦w·ÓûŸ¿~¾ƒ÷=¸ÿ|¦§%ž ‚Ö§;ñ–º’KçÏŸÿømÛ¶Ý=3;\Ÿé7ßxýGO8éôâÖk¶[î3úú.9}xø²ë&dŸ&+šÕG(ø³7^ö?Íjô @€ @€ @€fª€0èL½ræM€ @€ @€+;ÿ͸0hžä'E£é5éé¬0h: ;ƒV»:®ÔTñÐî,U“¶…AçÍ›×}ú™Zdùâz=)’[çÆêï ÊÆ½¶çv‹»®ùmwPÁ¶lúj_yå%^]Ý·Ci;&ëŽlÚ}s³žjìº-¾‡•|WòÁM›†~ÝŽsì1fÚ³0vC,Ü4Íðs0M# êÕH v§<÷¸“N{côÿïF5Si?\Ÿé®é]:¸:¢ù¯mäöiÞÕ}Iôÿm£š±í}K–=={ØØöÑŸkyþÑŸ½'@€ @€ @€t’@ ÿ²t'-×Z @€ @€ @€À¡/'iõП娦…;ƒ^ý7g\Èuì*÷û\mÿΠ¥$;~¿sÌ´¥tÊóï]ºâܾ¥ƒÿrúÙçý<-%WDðçw"ôx^PÌÈßk WVm©&é£ó<ÿÔ´]ÎØá0±Ï«÷f³’Ÿõ ®ï[ºì)Óv¾4ðL{F.¹Ù.ˆ÷¨Uk4œáz¾tk´õ4ñ`ù_Óõ}9œŸéµZõE*‚òËŠjöëO»÷û<æCž'wUï¾¥2¦ÙG @€ @€ @€@ÇÌÈ_šwŒ¾… @€ @€ @€X°`ÁìØ¥ðØ‚¥Ü½}ûöÝ53ª;Oª»Ú=áRš·mgÍvÏ­•ñòÚäw]P.Ÿ!ÐwÇN—_Pã+ãç ­œs&Ôl^¿êÃë‡Î@èïE8ïWÓ:ç4áÙ%IRú|ßÒ«.(—‹ŠÓ:¡Ãgð¸gO*Zm-+Ðç`–'wÍ©“ûãûvMáúö„©».?þœÂÚ ÎÏô+*k¿þÿÙœ,=oQÿàãš×ÜÓ[ÿÿñ.žmÍ^ùÆ-[¶ÜÞ¬B @€ @€ @`& ƒÎä«gî @€ @€ @à¨yä…Óˆ\a‚$”ŽšÑ YìK9‰×â%+/ž[êù~„@_º&1Dë‡äy5JW׃™µâø“NSQÝDû×gú^§¼–_º÷}£Ÿ¥®dy£¾ÑísŽ~À¢¢x¢–VW>Æ{ @€ @€ @€@§ ƒvÚµ @€ @€ p€vþò—·ž2M{b×µé ùNâÐ/ˆ ËÌÞѬV›ðn©}KVþY–ækc7Ë£§å Eø3É“oD ìŸky¾øÖÚŽ" öìz03Ëk7OË9› ºmÛ¶»7mZsxZ¾k÷‘þßûRÌošÓ{\ÿµéÊW4™–®väùMEÃÔÒÒôÜç Nß­z¾Ó8ð͵ü­×­úÃ8q¾»–¼"¾]…ÏÖxþ¾æ¢åOkçdÇgúh¿|×íëâóÝ£ÛÆ¾Ôú@R.—ƶýœ%éàØ¶ý>çùõ›×­½z¿6 @€ @€ @€&à?ºè° j9 @€ @€ @à@ D¸m¤oéÊÛ#ôÒtÇ»#Ž8¥ÞÀÃwÚc*çËÓÚMiÒ<‰Áï%ÕüuS9Ït›Õªÿ5‘±cÇÊwDý«#:‘ÃÖî |¥ù·c´oG¶òkI^ûÒ·d_»êª¡;t;†‡/».Nÿ÷õ?”Ëo«= @IDAT'ÏN{.ÈÒäÂàxNÛñiòÏ}ƒ·Fõ²ƒ¸äN?ua4Kl83¾ ‡_4¯½aã†ÕoÜ{³m­ ]ß»dÅŸÅN¹ÿ¼·­ÁϬ+ÏÞÿÔrùq×T*;ÔL¨ùp{¦ÅÙ¼yó-ýƒ›âyvñؾ½Ÿã=¥7™}~}|oÛØŸ Êåãï¾çGmÃWžæCÑ9M¡ú†§ÕA€ @€ @€8  ”ÛÉ @€ @€ @€@g ¤I!Ï´i4›Û% Zpùw×Ò›g5ς֣.s†+C‘›™Ù¯Þ¥+ʱ‚‚Nü¡Ïiš+Žüzx|½V«}cw’}ëC•¡ŸNl´|ÖÄꧯú#•Ê 1úûëê»èwâiOLKõPhòì4Ožš¤éTç9ÄdÕEåe_»¢²¶nçÕfù!3Y+v?}Wìw\KkȓόTG3¼~Õ¼ëW¿qxýš¯´#ºçÜy~|Ñ"@yП¶uëÖ»"PõÑ0ø“áõCÍwœ»|¾2Òoõôb3ÄÖ_ÈZÖ×·lFß?­¯öÀVfµÚwŠÎ;ÚN*È[4n“þÇ5é똮Ø=÷mñÝhݳÎ/¿6åÓ¢³$-½ÿ©årO µMK—gzS„ú3*­­jVÏ¥£æõ€gW³hÑÀ)ñ¼{âx}÷µå¸ï½w @€ @€ @€Î8迼î\Z+#@€ @€ @€‡‘@šÿ¼hµYrÀCPES:ûó @€ @€ @€@§Œ÷Ë”NY›u @€ @€ @€H ZK®):U6X4bS4n¬í¹E5‡jì ú;…sË“vÜöË?)¬›bAšäg Ûú͈ߧ _v]:²ûÙ±ä EkŠþÂu·0†’qÒÏÛ<ª±Ô,õqÚÞÆyfìsbÚPbà-W¹–$ooáYWž½áÂ…s[¨mXÒéÏô† Û‘æ—Žmý9þ^;)·OÝöÜç®8"þ>öè¶±ïóZuÕØ6Ÿ  @€ @€ @€*ÐÕ© ³. @€ @€ @àÀ üø¾ùÕ3Î:oG„/{ž5Ï~ÑÀÀ©W¬[×Òî… Ç)è¨ïü;ºÑ¼,¿6©TªÍkNouwí3YwAþ0M½hÑÀ)õ]!Ä,/^|liÎÑïÊ“üÇi’ý(©Õ®Ûç?ºí—?¹nÛ¶mw·:‡E‹÷ÈC‹êc‡Ëw^yå•;‹ê¦ÒÿÔr¹'æò[EcLvgÐ ÌžsôÎNké#’Rrn„4ç:vxýÐ…EçœlÿÆ—}/vÃ}s„§ÞÕlŒ4ÏOþúM™8¯¶ ¤µÏí‹›™&é‹¢ÿ­ÍjÚÑÁëØ…·#uÞ7Ôv¼á”¬§7žg5]]ôÏ:âø¿š?lZפ³“ŸéM–}¿®›ªwð„¬ç]a~äý:ïmÈòlq¼Ý¨>â¸äyñ÷ùìFõõÝ7UÖ|²Q¿v @€ @€ @€@§ ü—¶\ë!@€ @€ @€¦C`ûöí»#€öåfcÇiW>kY³švôåÝÍwÛsŽZò¹vœk:Ƹí×?þZ’çwÝ5wÖK‹jÚÕŸÍ::‚Péò‚þYŒù¯I–}´»Tºöø“N»«¾ËjÿÀàú–®»g·Ægíî>î´Æ½£zÒ‘OŒú4-oOIç<+žS4x„º‹jöö÷ þy„1·†Å÷ç}â±í7ÒRZ‰ðßãö¿$þ\Ðß¿ò¼½õÓñs÷HòéÂqÓtV_ßò“ ëLX v÷í›ãž¹«Ù‘Ï|ÄEý+žÜ¬fª}½åÁߌû휩ŽÓ©Ç_S©ìˆëôÒ<^EkŒ’Wõ-YYo4N'?Ó­y¼öm•Ê]¯oo[ܳõ0è¾W–¦‹ö}ïMž¯Žf¡öñl´ @€ @€ @€) Ú‘—Õ¢ @€ @€ @€ÀÈ÷ìˆWtÞtEQÅTû³,/ œÖÒ$vï;4_±ÓæHÌlßÎhf²WïÙi³QA›Ú,Xvtš¿+^wbÓ¶$B…é“ãÝYW]µúÎf§­vå-…AwÞ‘¿Ù8mé˲ ZgV‹uIìÄ8/v½{AX<,~–Æ=®+_8n{›·lúF’äM¯C=÷óÒÎ[ÚtJÃŒؼysÝõòQMã¾-ue4nG›Ó,ÖñÛ4̓:Ìð†¡OÅìߊ&±ç1—æï/ »7§“ŸéÖܨ½šÖ.mÔWo¿OÎÞ˜/—Kmº“rž&«š§ @€ @€ Ði vE­‡ @€ @€$‘j¾ª2kzú4yd}Ǻ¦5Sèìí÷¼–¼¯éöt¦ÇuÍ9æÅuS«è9¦ôÑ9®h”¼V+ À¥Iíø¢qêý³fuMëï° œwê`+s‰tRËaиù·µ0æ´†A”ˈëuD³yDàêÇõ›Õ蛊@õÝEGGq ¯¼¼iЭhŒFýýK–÷Çøýúµß'pkuÇkã{û³ûZÆWxy\úwã÷·vê3½xåûW\±nÍgcçëæaÿ®ÚâúQ½IÏScWåxžÿÊ“üšM뇾;~¯V @€ @€ @€@g Lë/Ò;“̪ @€ @€ @€ñ¶TÖ|'Ú¯¯ot[–¥ïž7o^÷è¶v½Og¥;26ï¥ÉåW^¹ö¶vs:Æ® G®öEcÇîuQyÙ#‹ê&ÛïNx¯):~Ox¤¶¾¨.ËÓ_ÕÔû³îü ­ÔM²&6µ*ÂrGµv|Áý4jj­ºmÔÇñߦéS.(—O¿sê­=IOaØ:ÂoTS§n8Âðú5_‰ Ú6,¸·#Ͳ}N¹|LQÝDú# |b’–Þ3‘cçÚOT*·¦µêï·hðû‹–Ïo±v¿²N}¦ï·ÈÖ>Äã'ÿ@ÓÒ4ÝÍÒä¢fum>N³ƒõ @€ @€ @€f¨€0è ½p¦M€ @€ @€EØòŸ ç•&>ýaz]aÝ úû/9;~…¡žÝ»ó™”ªÅòß^LQÊJœ_.Y\;áŠô¨ãÓi¶3Û}#¦^û“û>ÿ®–æ7Œß³kždOÜ¿¥}ŸzVþ¯Øåï·Z1všÓj핵ÿ!À"‡lNÖósZ~O—gÉ3‹æë¯·½¦S š¾¹pø4}ðÑYϦv}/¼ð’ãz²¹[b7Û Ï­`ŸÀÆ k6'yRÙ×ÐàM|oÒ,)ýû½!ùU ›;ò™ÞpµM:FÒÝCÑ]÷÷»6Ïëë[ö¶¨¶Yÿ¢EŸ4種mˆJ³:}ã ìÞ±ëU½iüÞûZÓ$9ó¨ã³·Þ×Òú»N|¦·¾úû*¯X·îÇñ\ýä}-û¿‹ïDšt—^?ÎÙ¿gÔ§4Ù\ßÕuT‹· @€ @€ @€ÃB@ô°¸ÌI€ @€ @€œ@^Mþºèlõ˜eÙ½K/*ª-ꯇ # ¹!‚‹+ªMjù k‘‚k*•y-ÿ›–¦“¦pÒiŸ\¸ðâ´T_PÔ·då_ÆÆ•¯)(ÛÓçùÚØó­Ô~øÃ—Ý;º}¡¨6îÇŸqö#'¸j0vÚ70øú¬TúhýÞkP3~sš5~Çø­*{_ôÜ=~ï}­i’ýYïÒ}÷µLí]}'½,K‡‹×—yóúUWMílŽnE`ä®]áøÛŠjãš=%›}ô—'ï_²¼¿knw„ÜÓÇKÿø[¶\~c-Ïÿ×ø½û·Æ3ï•}KŸµkñ§N|¦¯züŠZ-¹tüž{Z#úÊfýñwùª¦ý:  @€ @€ @€* Ú¡Ö² @€ @€ @€ÀÁ® }0Pï/<šÎŠ]Ö>!½??¯\žUX?NAÿ’òéßLÒtÁ8ÝcšòË7nXý‘1‡ôÇMV¿3Ï“Í-M2MŸÖ=·û õÝ/[ª§¨¾;aßÒ•oM³¤¥jÌíöd÷ÈëǪaS-muWÊì5}åÁÞ†µØÑÛ;xBßÀÊÄŽ™oŒC&ñ»±ü˜Oµ§lëÖËa±5­ÐÕ*{âE‹&8;vïÒå¿‘vwmMÒää±}c?GXû-cÛ|ž-[Öý,¾#ÖÊè~{X„•¯ì_:¸ñ¢åψcšÞ«‚ïê_2øœ¨ÿX’•>÷÷CZ9šÆ›6 ­Š°zaP:®U¼’ß³#uãáÆíéÄgú¸ -h¼å×mŠïÆ-MÊßÿyþÓø»bÉŠ?Ù˜ßý‘¤R©6;¦ÞwÑÀÀ©]ù¬Šh_œ£ð•'ù¯wTwüaaá!XPÛyëï”fý±Ö‡M/<––JWõE@ld$ÿË-WÇnůç”ËÇöüQÿš¶~¬å¿;<|ÙuÅg¸¯b¤š\>«´g÷Ø9÷µ6x—%"œúþ‘;ÿO=X× jÜæØuóÜØ)ñ%ÚZ÷È ãµÐ˜æÉ“r¹ÔÊ}¹w¸µ>7›{QXž¸·m¼Ÿ1·¹q]ÿºkêXùw7Tïú—úîãՎ׶¸|Éc³R×_%y²8ÎUüUÈ“oEX»µpñx'Ô6aáõCï‰Àûóâò,jéàx¦u%¥¾þ¥+oˆçÖ‡â¢þ –'?«û븟OLÓüAIžž¤ùÂø<éûzï\ò4"y^ûªé®—•’Yÿ¶GìkïM<ÏzÞ]Íw°çØN{¦³Ä¦mÛ¶ÝÝ?°r]¾¼°xLA-IVGSüð"@€ @€ @€~ ‡ß5·b @€ @€ 0í[·n½kQÿàÒîîô‹q²ÂÐ_„ωPÜ–¾¤çWéÀà–Zž<©¥?©&µ’ÕÛÓ¹¥s²Zz^Ô=2BQçEê)ñsnK Éójš×^re¥ò˖걢͛7ß²¨Å’î®äsaÔÒªáô¼°^„Ю‹Ú‡òj¾­Zªý,ÍK7¦»j·§³j§×ò®³²,?+‚„gGðiaÈÒã½}Óž]`'rT’l­ ]ß7°âcWÌÂcõße½´»gÖŠÞ¥ƒïˆøÏ–ÉŽ~¤R¹aìY/¼ð’ãJsºN-u¥ó"@÷âXÏÓÇÖŒý;xF.ýIÔž:¶oßç4--Lz¼5I®ß×Vð¦~¯õ/YþŠ$-Å.¹Å¯¸F'Äœÿþ”lîGð31©ÆîºÿSKÒ–FvÿpG¶ûö®Úœ‡”º’Ӣïiy’=*Þß³ l¼)|åùyR}QÔ ÿbµµ ßqë/—ôóÀ­û®W+ÃÇ.¯q_¼¤^šíw}ãÞÏû5Ž;b„I7ÅE»ë†ïǼC7¯[÷£Þ%+þ<ËÒw´°Ä—/X¶q󺵟l¡v_I§=Ó÷-l‚oªÕüÒR)p4Ù½j‚§RN€ @€ @€èaÐŽ¹”B€ @€ @€--‡¾ÞW¼$-¥—ÅÌ ¡õÙGpéñãw³4ýݤ!¨úÿÌ?õWvÏ=ÿ[œƒÚSVûEÜïÅ›6¬™Ñ»!ÖwøŒpáÒ<)­ `k!ØÏÓãÇïÇ5øýØmðžÙì:dÈî ”µh¹ôÞÿ‰Ù–›o¼þu£Û&òþ¦êÝo9¾4·¾cç)-—¦=1ë?%üiO,¿é`ìž™þ0æqc¬ñ¤È7žAÙ#[ëÞ¢8öWI­ö¢$Ëc½·Ù±¥4ÂǃÖÇÚ¸áÿ³w?Àv–õÀŸ÷Ü›ÄàªXÀÊREmñ8€²ÛÅÅþ¡@€dÒ„€Á?,к3N븻m%aÝ:Ón;ÛÙm…]«ä&ÜÜæÞ\³R£‚©º0ƒ­ˆ€- ù#`.$¹çÙçææÀ¹çÏ=î{Þ÷sÌ=ïû>ïó<¿ÏïÜ“o¾¼›¶­X½vCšû’fs×]KÀt¼ò¹–dµŽ…°hQX…çfGwضý3![±}üºoÌÎàÕ vîÜù̹çž{þ¢Ã^¹3œe0ëÆÍ>ôÀxõ±ÍŸ4³g³ŸáYejëÆÿ™žH|aú¬­ýÜÏù•z™Uâè§–-[vâŽ;žœs`ƒ EûLoPbËSÛ'ƾžÂïw¦¿¨Žo9øà€øµmÛ6ßuðÐ  @€ @€ @€@Éýg%+]¹ @€ @€ @€@¿&'Ʀª3ñ=Ïïú½XƒùSPðÃS[ÇR oø¿R¸0ZgÞ•ë¸g!«IáÚ¿œ™^¾k×®óÝÇ®‰‰§Bœ97ÍÕQ€êàz)Z ¥,Ö»Ÿ uñËûfÂÉ“›þO2½ãà¼s¼HOf0ýø9é³ðKM†õäRzÏ]»mËØÅO>ùd˧~f! ƒ¾X½š—†÷½øRý™}ÝèÒÃÿ¼þl{GEúLo¯âªfá3/>;÷™j5^;÷UW @€ @€ @€Å-~UH€ @€ @€T`jbl÷™êi)XsïÀ6ÃOª±zá¶ñ ŸØšXhr|Óíûfâ/§°×íX®n‰Z¨0Vã™ßpi˜˜h2«»¹ÁA­–,ÆåéÒþ—ûu*e‰ÂU“Õé÷|vbìµEžz¬òé[µù‚q^aÐZ`v²º÷â$·±ùü}ºãSé½réö­6÷iÓv P{zää–±w§÷Ã!}Fupk›CãOcµú‡égôƒé†ê«^õªÖ¿ ΂0hÝmÛ6|+=qø¿6¸ÔèÔe+~{í{]hu®HŸé­jmtý™™½›ÒÏC»ÿa§ã¾'4\ߨç @€ @€ @€ƒhý ÀAîÆZ @€ @€ @€@!vLlº·º/œš‚iÿ«ƒàǼ,Ò·ÎdÏœ<5¾±¡‘ZˆqÿÞGÓB«OÛ§æ…ÔñMñ›±Þ9¹uìcßÚä†m[ÇnNOŽýÕÔ³ûš ëÉ¥ôDÕÿ›2G§No¸òÐ0ëM7müi æÝÚl‘ôÒ_vIJ¡~(­óèœãz|!™nÙ?½ïMéɸŸîñÔ¦ëN ¦÷Ã5ÓÕ½oNaíÝM5{wzâèŽô´Ûã'·nü‹t6½ÝÓ3ogŽÑøUŒ™0hcšpß½w~"ý}õOs\>x:}>d±>µlÙ²—<ÙÁ‹"}¦wPö³C?71ñPÙÎvîK½¸aûöí·3Ö @€ @€ @€@Q„A‹ÚYu @€ @€ @€œ LM=’žXwùjõ­)¸4ÕóíÅø@µZý½ô$Æ_ݾeË}=Ÿ?GÞxã{'·lü“½Õé7TcüË´µ§û²½NÌüÈ#=pJí ¯ýX£6ïôNJùµÚâž ±õtoJ!ÐÓ&·l8krüºo4š;Õø™Fç=7G>rèq‡¯c-”¹oï¾7¥ ¯Me¶û$¼—IŠ1~;=Áõ=égíÂ;¶ìéx7 D ‚Û¶elÕþýñ¤Úwk}ëxágŸü¯Õêo¦'ŽžwãÄØ‡Î±dÉ¥‡ÏñzzŽó¥?}Ûm·íŸ9/Mé#¢ùW²cvx-ˆ;¯¯"}¦w P-?ks¦&lèxn7 @€ @€ @€ &Ðò¿[°z•C€ @€ @€,°@zJèwÒ–/_uÑ©!Œ|( á¬eÇÌw[)DuGÈâ_<úÃïmÞµkWßBvóÝ_?ïÛ91ñpšÿÎY¹ö/U²Ë*YüôLÀ“{ìq'ü8}†ÙxijºÝ³½:=o×¹æuž @€ @€ 0léw†¾ @€ @€ @€ +°lÅÚ†ßJ»xw @böªôýˆt\©ÛY ?I±‡R jw û}±š=óŶlù^ݘ’œ}öšW.9lôÌýõdôÖ,†£RÈñ¨äùŠÒ¤§T>“‚´¦1ÿ³øõ¬¾^ݾ65µqAbË–-{Yeéá§§°äi¡’¥Ðp<)íñˆ†\Shõ@ªëáô ¯‡S­÷¥àÜÍéɳ7=8~a¹¹þYzÚñGçºî< @€ @€ @ , eé´:  @€ @€ @€Àð TÒ ‹³Ãg¦÷ÿä‰'|ÄçßÄSN9eѱǾåÈ#áå£3á‰Gžy¼ÙSç¿RÿD³Ã;¼/Ý[þñç'&K«¥hñ¾jýzíkOxùÑøò,Ä‘ýOÅ=ÿÕâ«¢¾œ·úâw†‘/7›Ók²µ§„gÙèî¦Êû÷½eÛ¶Íw5ã" @€ @€ @  %h²  @€ @€ @€(—ÀŠß¾xE¨Œü]³ªc¨^:¹eãß6ã~ ,_µö¯³,ûݹÖHOý‡ôTÐÓæºî< @€ @€ @ L•2«V @€ @€ @€”A †‘7¶ª3«f÷·ã:~ œ°råâ,d«›ÍcõÓÍ®»F€ @€ @€(“Àh™ŠU+ @€ @€ @€ú)žtøé…{ÃL¸û@5ÜýpeúžÝÓý\³áÜ•øoCÚH³¯§âôíÍ®»F ŸoΖüVz‹þÜ\k¤§‚î}úÉêø\×'@€ @€ @€”M@´lW/ @€ @€ @€}È.HÌÃÃH‹ÒŸ£ãÒ¸|Õ%ßOçîŽ!Þ] ÙöíãnêãÂ9+דž¸xN³5RÐî¾ÏOL<ÚlŒkú*U>Ôbþ­;w^÷D‹1. @€ @€ @€J# ZšV+” @€ @€ @€~ d!>–žÈyøÏÖÉÒWzýšÚŸÐ<³ÿK¯û]œ…ßMk,Jš}ÝÖì¢kú)°|ùE¿æ?«Ù3ªW7»î @€ @€ @ l•²¬^ @€ @€ @€ôM ËZ{Í+¿t$B+—¤Ý©Ýl5ư7Ý?U alûø†›ÓëôÒÁ ,_}ÉÝé×Öª1ì ±úçÛ¶nܙƧ ³/ @€ @€ @€v„AÛQ2† @€ @€ @€}8÷Ü 9lñÛ+1œ*ñí鸿˜âq‡‡½"†øò,ËFS^=éûžt}OŒYú÷ÄjxàÀ3ß¼cÇŽ'û°5Sh[àüóÏ?<[ò²÷T²Ê¯¥€ò/‡,“Þ«G¥×O§ï÷¥÷î?§”òwgÄM;¶mümOl  @€ @€ @€ @€ @€ @€È»À +W.Îûí@C•+Gžw’ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @`Àï¼èêõ^Òr @€r#PÉÍNl„ @€ @€ @€ @€ @€Ì%P  Çy @€¢ ƒ½Ãê#@€ @€ @€ @€ @€ @€@QB‹ÒIu @€t( Ú!˜á @€ @€ @€ @€ @€ °€¡ ˆoi @€…](yë @€ @€ @€ @€ @€ @€ÀüBççæ. @€¡ÚÖÙ8 @€ @€ @€ @€ @€J, Zâæ+ P>aÐòõ\Å @€ @€ @€ @€ @€(†€@h1ú¨  @€– -‰ @€ @€ @€ @€ @€ @€Ü „æ¶56F€ Ð;aÐÞYš‰ @€ @€ @€ @€ @€B@ t!Ô­I€ 0@aÐb[Š @€ @€ @€ @€ @€ú$ Ú'XÓ @€äA@4]° @€ @€ @€ @€ @€è^@ ´{C3 @€äR@4—m±) @€ @€ @€ @€ @€˜—€@è¼ØÜD€ oaÐ|÷Çî @€ @€ @€ @€ @€ @ SÐNÅŒ'@€ȹ€0hÎd{ @€ @€ @€ @€ @€ 0Ðy ¹… @ ¯ yíŒ} @€ @€ @€ @€ @€ @€@w¡Ýù¹› @ 7 ¹i… @€ @€ @€ @€ @€ @€@ÏB{NjB @€Á ƒÞÜŠ @€ @€ @€ @€ @€ 0HÐAj[‹ @  }@5% @€ @€ @€ @€ @€äL@ 4g ± @€N„A;Ñ2– @€ @€ @€ @€ @€†W@ tx{gç @€’ ƒ–ü  | @€ @€ @€ @€ @€”J@ ´TíV, @ ( Eé¤: @€ @€ @€ @€ @€ @ ==}Í5ëÚl @€…]øØ @€ @€ @€ @€ @€ X Æx¥@è€Ñ-G€ 0oaÐyÓ¹‘ @€ @€ @€ @€ @€†Y@ t˜»gï @€r ƒ–«ßª%@€ @€ @€ @€ @€ @€CBÁð’ @ · ¹m @€ @€ @€ @€ @€ @€À B¡l  @€n²nnv/ ?Ó×\³.»²# @€ @€ @€ @€ Ð@-°ÙÝ mÜ] WÝzÝý_§­B€ @àPaÐC5¼&@€xç…WÇ”¡ @€ @€ @€ @€ ° Y–­¿åúË×-Èâ%@€ 0‡@eŽóN @€ @€ @€ @€ @€ @€Ò Ôž@zúškÖ•®p @€äZ@4×í±9 @€ @€ @€ @€ @€´€@è Å­G€ ÐJ@´•ë @€ @€ @€ @€ @€ P:ÐÒµ\Á @€\ ƒæº=6G€ @€ @€ @€ @€ @€ % ºPòÖ%@€x¡€0è E @€ @€ @€ @€ @€ @€çB½ @€ò š‡.Ø @€ @€ @€ @€ @€äV@ 4·­±1 @€@i„AKÓj… @€ @€ @€ @€ @€ @€À|Bç+ç> @€^ƒöBÑ @€ @€ @€ @€ @€ PxзX @€Ü ƒæ¶56F€ @€ @€ @€ @€ @€yÍ[Gì‡ PaÐrôY• @€ @€ @€ @€ @€ Ð#ÐAš† @ maж© $@€ @€ @€ @€ @€ @€Ï „z' @€ R@tÚÖ"@€ @€ @€ @€ @€ @€Â„¦• !@€ä^@4÷-²A @€ @€ @€ @€ @€È«€@h^;c_ @€b ƒ«Ÿª!@€ @€ @€ @€ @€ @€ „Ür @€ ƒ–°éJ&@€ @€ @€ @€ @€ @€Þ „öÖÓl @€õ õŽ @€ @€ @€ @€ @€ @€À¼BçÅæ& @€6„AÛ@2„ @€ @€ @€ @€ @€´# ÚŽ’1 @€ ƒv*f< @€ @€ @€ @€ @€h" ÚÇ% @€y ƒÎ‹ÍM @€ @€ @€ @€ @€ @`nйm\!@€è\@´s3w @€ @€ @€ @€ @€ @€–¡-‰ @€hS@´M(à @€ @€ @€ @€ @€ @€@§¡ŠO€ ÐH@´‘Šs @€ @€ @€ @€ @€ @ G¡=‚4  @ Ä %n¾Ò  @€ @€ @€ @€ @€ @`0¡ƒq¶  @ ¨ Eí¬º @€ @€ @€ @€ @€ @ W¡¹j‡Í @€†J@t¨Úe³ @€ @€ @€ @€ @€ 0ÌY̪ü{'@€XaÐ…q·* @€ @€ @€ @€ @€”L *W~eóeW•¬lå @€ô@@´ˆ¦ @€ @€ @€ @€ @€ @€ÍA›é¸F€ ÐJ@´•ë @€ @€ @€ @€ @€ @  AÐ.ðÜJ€ ð¬€0¨7 @€ @€ @€ @€ @€è“€ hŸ`MK€(™€0hÉ®\ @€ @€ @€ @€ @€Œ€ è`œ­B€(ƒ€0hº¬F @€ @€ @€ @€ @€¨€ è@¹-F€(¼€0há[¬@ @€ @€ @€ @€ @€¤€ è µ­E€(‡€0h9ú¬J @€ @€ @€ @€ @€€€ è-A€(¡€0h ›®d @€ @€ @€ @€ @€è½€ hïMÍH€ ðœ€0¨w @€ @€ @€ @€ @€èR@´K@· @€4mÊã"Èo@IDAT @€ @€ @€ @€ @€h. ÚÜÇU @€î„A»74 @€ @€ @€ @€ @€”T@´¤W6 @`À · @€ @€ @€ @€ @€C@´}T @`„A‡¡KöH€ @€ @€ @€ @€ @€¹ÍU;l† PxaзX @€ @€ @€ @€ @€ ÐKAÐ^jš‹ @ aÐv”Œ!@€ @€ @€ @€ @€ @€I ˲u_Ù|ÙU0 @€ R@tÚÖ"@€ @€ @€ @€ @€ @€¡¨Ao¹þòõC[€ @€ ­€0èÐ¶ÎÆ  @€ @€ @€ @€ @€ @`P‚ ƒ’¶ @€@#aÐF*Î @€ @€ @€ @€ @€ @€çA½ @€Z@t¡;`} @€ @€ @€ @€ @€È­€ hn[cc @€R ƒ–ªÝŠ%@€ @€ @€ @€ @€ @€vAÛ•2Ž @ ß ý6? @€ @€ @€ @€ @€ € èӗ  @€…-t{G€ @€ @€ @€ @€ @€ ‚v*f< @€@¿„Aû-l~ @€ @€ @€ @€ @€AСi• @€J%0ZªjK€J þO¨õ%(S‰ @€ @€ @€ @€ P2ã•ý.¹*W~åúË®ê÷:æ'@€ Щ@Öé Æ @€ @€ @€ @€ @€ @€A ¼ó«c?×ôDÐ~êš› @ [J·¸Ÿ @€ @€ @€ @€ @€ ³€ è0wÏÞ  @€å-GŸUI€ @€ @€ @€ @€ @€ A 8E€ ;aÐܵĆ @€ @€ @€ @€ @€ @`‚ ƒP¶ @€@/„A{¡h @€ @€ @€ @€ @€*AСj—Í @€J/ Zú· @€ @€ @€ @€ @€”K@´\ýV- @  Eè¢ @€ @€ @€ @€ @€ @ -Aж˜ "@€È™€0hÎb; @€ @€ @€ @€ @€ ÐAÐþ¸š• @ ÿ ý7¶ @€ @€ @€ @€ @€,°€ è7Àò @€] ƒvÅçf @€ @€ @€ @€ @€È»€ hÞ;d @€­„A[ ¹N€ @€ @€ @€ @€ @€C+ :´­³q @€C„AÁð’ @€ @€ @€ @€ @€Š# Zœ^ª„ Pvaв¿ÔO€ @€ @€ @€ @€ @€ ‚°©J"@€”X@´ÄÍW: @€ @€ @€ @€ @€Š( ZÄ®ª‰ PnaÐr÷_õ @€ @€ @€ @€ @€(”€ h¡Ú© @€ç„A½ @€ @€ @€ @€ @€ @ ‚ …h£" @€ƒ6@qŠ @€ @€ @€ @€ @€†K@t¸úe· @€ ƒvæe4 @€ @€ @€ @€ @€äL@4g ± @€ž ƒöœÔ„ @€ @€ @€ @€ @€ 0(AÐAI[‡ @`!„ARßÚ @€ @€ @€ @€ @€ 0oAÐyÓ¹‘ @`È„A‡¬a¶K€ @€ @€ @€ @€ @€!‚z @€”I@´LÝV+ @€ @€ @€ @€ @€ Z€&* @ #aÐŽ¸ &@€ @€ @€ @€ @€ @€…]H}k @€,”€0èBÉ[— @€ @€ @€ @€ @€:HAÐõ·\ùúŽn2˜ @€ @€ @€ @€ @€ @€ @ ÿ§¯¹æÊþ¯b @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€š@†(–À;Vr]–eW«ªùWc\ÿÕ-¿³nþ3äçN½­ï…ÞÖ{éHo‹ÔÍúZô¶Þ£HGz[¤nÖ×¢·õE:ÒÛ"u³¾½­÷(Ò‘Þ©›õµèm½G‘Žô¶Hݬ¯Eoë=Št¤·Eêf}-z[ïQ¤#½-R7ëkÑÛz"ém‘ºY_‹ÞÖ{éHo‹ÔÍúZô¶Þ£HGz[¤n†pëæ+ü{ÿbµT5 @€®*]Ýíf @€ @€ @€ @€ @€ @€ @€¾ ƒö•×ä @€ @€ @€ @€ @€ @€ @€î„A»ós7 @€ @€ @€ @€ @€ @€ @ ¯ }å59 @€ @€ @€ @€ @€ @€ @ ;aÐîüÜM€ @€ @€ @€ @€ @€ @€è«€0h_yMN€ @€ @€ @€ @€ @€ @€èN@´;?w @€ @€ @€ @€ @€ @€ @€ú*õuv“ @€xÿG>ù™{¾Ÿ½à çtÁ3NŒ»>þþ°+§Ûëh[|m8c×7³3:º©Àƒõ¶¸ÍÕ[½ŸÉõ]òs[ïQ¤#½-R7ëkÑÛz"ém‘ºY_‹ÞÖ{éHo‹ÔÍúZô¶Þ£HGz[¤nÖ×¢·õE:ÒÛ"u³¾½­÷(Ò‘Þ©›õµèm½G‘Žô¶Hݬ¯Eoë=Št¤·Eêf}-z[ï1ìG·n¾¢ÔÿÞÿô5×\c\7ì}¬í?˲u·\ùú"Ô¢ @`áFni+ @€ý8æçÂëîù~?fÎ9_t8#déO¾R-a×7 PHJÐÛAæp½ÍaSz´%½íd§ÑÛ6¥G[ÒÛAæp½ÍaSz´%½íd§ÑÛ6¥G[ÒÛAæp½ÍaSz´%½íd§ÑÛ6¥G[ÒÛAæp½ÍaSz´%½íd§ÑÛ6¥G[ÒÛAæp½ÍaSz´%½íd§ÑÛ6Å– @€ôH Ò£yLC€ @€ @€ @€ @€ @€ @€ ÐaÐ> š’ @€ @€ @€ @€ @€ @€ Ð+aÐ^Iš‡ @€ @€ @€ @€ @€ @€ ÐaÐ> š’ @€ @€ @€ @€ @€ @€ Ð+aÐ^Iš‡ @€ @€ @€ @€ @€ @€ ÐaÐ> š’ @€ @€ @€ @€ @€ @€ Ð+aÐ^Iš‡ @€ @€ @€ @€ @€ @€ Ьsš’X@øà'¯ YX·€[°4 @€ @€ @€ @€(µÀé¿ßý?Õ¿uóÝO2Ä]8}Í5WÆ× q ·žeÙº[®¿|ýÁ^ @€æ!àÉ ó@s  @€ @€ @€ @€ @€ @€ @`P ƒ’¶ @€ @€ @€ @€ @€ @€ @`£ó¸Ç- @€ @€ @€ @€ @€ @€Àø8Ç•çNÿ˃a×ï_j:ÈE @€‡ƒ‚á% @€ @€ @€ @€ @€èVàC¿Ùb†veGÿÎú£\&@€ pP rð• @€ @€ @€ @€ @€ @€ @€¹Í]Klˆ @€ @€ @€ @€ @€ @€ 0+ :ká @€ @€ @€ @€ @€ @€ @ w ¹k‰  @€ @€ @€ @€ @€ @€ @€f„Ag-¼"@€ @€ @€ @€ @€ @€ @€äN@4w-±! @€ @€ @€ @€ @€ @€ @€À¬€0謅W @€ @€ @€ @€ @€ @€ @€Ü ƒæ®%6D€ @€ @€ @€ @€ @€ @€˜µðŠ @€ @€ @€ @€ @€ @€ ;aÐܵĆ @€ @€ @€ @€ @€ @€ @€³ ³^ @€ @€ @€ @€ @€ @€ @€r' š»–Ø @€ @€ @€ @€ @€ @€ @`V@tÖÂ+ @€ @€ @€ @€ @€ @€ @€@î„As×"@€ @€ @€ @€ @€ @€ @€Ì ƒÎZxE€ @€ @€ @€ @€ @€ @€È€0hîZbC @€ @€ @€ @€ @€ @€ @€YaÐY ¯ @€ @€ @€ @€ @€ @€ @€¹ÍÝŽlˆ °7ÿË™ °ª%  @€ @€ @€ @€ @ Ÿï}ýú9½¹  @€ LÀ“AFm! @€ @€ @€ @€ @€ @€ @€@ç ›¹ƒ @€ @€ @€ @€ @€ @€ 00aÐQ[ˆ @€ @€ @€ @€ @€ @€ й€0hçfî @€ @€ @€ @€ @€ @€ @€ L@t`Ô"@€ @€ @€ @€ @€ @€ @€t. Ú¹™; @€ @€ @€ @€ @€ @€ @€µ… @€ @€ @€ @€ @€ @€ @€ ƒvnæ @€ @€ @€ @€ @€ @€ @€ÀÀ„AFm! @€ @€ @€ @€ @€ @€ @€@ç ›¹ƒ @€ @€ @€ @€ @€ @€ 00aÐQ[ˆ @€ @€ @€ @€ @€ @€ й€0hçfî @€ @€ @€ @€ @€ @€ @€ L@t`Ô"@€ @€ @€ @€ @€ @€ @€t. Ú¹™; @€ @€ @€ @€ @€ @€ @€µ… @€ @€ @€ @€ @€ @€ @€ ƒvnæ @€ @€ @€ @€ @€ @€ @€ÀÀF¶’… @€L`&VÃíûî »Ÿ¹3|{ÿáÁ™GÂÞøLˆé¾ @€ @€ @€ @€(Ÿ@²pX¶$=rDxˢ׆ӖÞ¾ø¸0’yÞPùÞ *&@€aÐaè’= @€æ)c ŸúaÓS_?¬>6ÏYÜF€øÿìÝÍ‹WúðS·[Û14Bœ^D&t•‘q@7qAÿ½™æ éL ‹0‹0¸ÊË¢Yd—M\È8‹‰ Á˜,ta$`¾ ¾µmwý<Î/ÞνöM×­:UçSÒ¤nßêsžçóô"Èù^  @€ @€ @€ Ð5øAò·Ê»á›ûß?üúìΩ°­·%Ìl:nØŠ¢èZËú!@€ ÐjaÐVOñ £À¾ÃÇf‡õýÆÇá•§‡=‘CÃß÷n7n,ß »>¾Z¼Ø†tA€ @€ @€ @€ @€ÀXâοwãÓpâΗáÍÍ3áÙÞ3cÝÏâ @€¬^@tõVž$@€I<ø´µ·†ròëâ×°ëÈ¡rØÛÞë€Àw÷ ¯_ÿ0ü°tµÝh @€ @€ @€ @€:Î>øú×®¾ÞÝüjxaòù:·¶ @€Oè=áû¾M€ ÐRø/‚ ‚¶txÊ&@€ @€ @€ @€ ˆ@ü úx)žGr @€ м€0hó3P @ 2²,Ã;×çü‹ •‰Zˆ @€ @€ @€ @€@¾1Ï#ÅsI. @€šmÖßî @€JŽß=Î.^¬tM‹ @€ @€ @€ @€ ¯@<Ï%¹ @€hV@´Y» @€*X*—ÃÜÍ•­g! @€ @€ @€ @€Dx.)žOr @€ М€0hsöv&@€T*pæÞ…pyùZ¥kZŒ @€ @€ @€ @€ñ\R<Ÿä"@€ @ 9aÐæìíL€¨TàÔ¹J׳ @€ @€ @€ @€~p>é' ÿ%@€ ÐŒ€0h3îv%@€T.p~ñRåkZ @€ @€ @€ @€QÀù$¿ @€šmÖßî @€Êæ—®T¶–… @€ @€ @€ @€ °RÀù¤•î  @€Ô/ Z¿¹  @€c¸].Œe]‹ @€ @€ @€ @€ @Àù$¿ @€šmÖßî @€ÊÊPV¶–… @€ @€ @€ @€ °RÀù¤•î  @€Ô/ Z¿¹  @€ @€ @€ @€ @€ @€ @€«˜\õ“$@€ @€ @€ @€ @€ @€ž*ðÁçÃùv>ìßwøX1ì©/>9:;ì}ï @€ä% š×¼uK€ @€ @€ @€ @€ @€cøèøÐœgÜ}Q„ýO)cö)ï{› @ #^F½j• @€ @€ @€ @€ @€ @€ Ð:aÐÖLÁ @€ @€ @€ @€ @€ @€ @€@N 9M[¯ @€ @€ @€ @€ @€ @€ @€@ë„A[72 @€ @€ @€ @€ @€ @€ @€9 ƒæ4m½ @€ @€ @€ @€ @€ @€ @€­mÝÈL€ @€ @€ @€ @€ @€ @€ä$ šÓ´õJ€ @€ @€ @€ @€ @€ @€´N@´u#S0 @€ @€ @€ @€ @€ @€ “€0hNÓÖ+ @€ @€ @€ @€ @€ @€ Ð:aÐÖLÁ @€ @€ @€ @€ @€ @€ @€@N 9M[¯ @€ @€ @€ @€ @€ @€ @€@ë„A[72 @€ @€ @€ @€ @€ @€ @€9 ƒæ4m½ @€ @€ @€ @€ @€ @€ @€­mÝÈL€ @€ @€ @€ @€ @€ @€ä$ šÓ´õJ€ @€ @€ @€ @€ @€ @€´N@´u#S0 @€ @€ @€ @€ @€ @€ “€0hNÓÖ+ @€ @€ @€ @€ @€ @€ Ð:aÐÖLÁ @€ @€ @€ @€ @€ @€ @€@N 9M[¯ @€ @€ @€ @€ @€ @€ @€@ë„A[72 @€ @€ @€ @€ @€ @€ @€9 ƒæ4m½ @€ @€ @€ @€ @€ @€ @€­mÝÈL€ @€ @€ @€ @€ @€ @€ä$ šÓ´õJ€ @€ @€ @€ @€ @€ @€´N@´u#S0 @€ @€ @€ @€ @€ @€ “€0hNÓÖ+ @€ @€ @€ @€ @€ @€ Ð:aÐÖLÁ @€ @€ @€ @€ @€ @€ @€@N 9M[¯ @€ @€ @€ @€ @€ @€ @€@ë„A[72 @€ @€ @€ @€ @€ @€ @€9 ƒæ4m½ @€ @€ @€ @€ @€ @€ @€­mÝÈL€ @€ @€ @€ @€ @€ @€ä$ šÓ´õJ€ @€ @€ @€ @€ @€ @€´N@´u#S0 @€ @€ @€ @€ @€ @€ “€0hNÓÖ+ @€ @€ @€ @€ @€ @€ Ð:aÐÖLÁ @€ @€ @€ @€ @€ @€ @€@N 9M[¯ @€ @€ @€ @€ @€ @€ @€@ë„A[72 @€ @€ @€ @€ @€ @€ @€9 ƒæ4m½ @€ @€ @€ @€ @€ @€ @€­mÝÈL€ @€ @€ @€ @€ @€ @€ä$ šÓ´õJ€ @€ @€ @€ @€ @€ @€´N@´u#S0 @€ @€ @€ @€ @€ @€ “€0hNÓÖ+ @€ @€ @€ @€ @€ @€ Ð:aÐÖLÁ @€ @€ @€ @€ @€ @€ @€@N 9M[¯ @€ @€ @€ @€ @€ @€ @€@ë„A[72 @€ @€ @€ @€ @€ @€ @€9 ƒæ4m½ @€ @€ @€ @€ @€ @€ @€­mÝÈL€ @€ @€ @€ @€ @€ @€ä$ šÓ´õJ€tZ E§ûÓ @€ @€ @€ @€Í 8ŸÔœ½  @€DaP¿ @€Žl,¦:Ò‰6 @€ @€ @€ @€HMÀù¤Ô&¢ @€Ü„As›¸~  @€Î LOlílo#@€ @€ @€ @€ @ Y瓚õ·; @€aP¿ @€Žì\·½#hƒ @€ @€ @€ @€ÔœOJm"ê!@€ÈM@4·‰ë—è¬ÀÞ©]íMc @€ @€ @€ @€4+à|R³þv'@€ êw€ ÐÝëw„m½-éF @€ @€ @€ @€¤"Ï%ÅóI. @€šmÎÞÎ @€J&Š^˜Ùt Ò5-F€ @€ @€ @€ @€x.)žOr @€ МÀds[Û™ø%eYÎû¹¿+^ùͶòÞñ^wnØNÜù2œ]¼ØÝ&uF€ @€ @€ @€ @€@m/¯{)ÄsI. @€šmÖßî @`d/>9úö°úÏ?޽Š : ©ÃïEÞÜ<^»ú~øaéj‡;Õ @€ @€ @€ @€ãøõÄsÏ#ÅsI. @€šè5»½Ý  @€ªx¶÷Lxwó«!þE¬‹ @€ @€ @€ @€¿D ž?Šçây$ @€Í ƒ6? @€*xaòùðÏçþ^^÷Råk[ @€ @€ @€ @€n ÄsGñüQ<‡ä"@€ @ É4ÊP @€@Õñùþ¾å/áøÝÓaîæ‰pyùZÕ[X @€ @€ @€ @€ lëm 3›„ƒö„¢(:Ô™V @€´_@´ý3Ô @à‰ñ/dýêáO~ÎÜ»N-œ ç/…ù¥+áv¹Ê\ @€ @€ @€ @€ä'P„"l,¦ÂôÄÖ°sÝö°¥Í»v@IDATwjWؽ~G˜(zùaè˜ @€@ „A[0$% @€Ö*ÿ‚vÏÔo~­u-?O€ @€ @€ @€ @€ @€Ô+àc[êõ¶ @€ @€ @€ @€ @€ @€ @`$aБ¸¾Z¼øä‡¼C€ @€ @€ @€ @€ÿˆ8ÿÞOÉ;_†77Ï„g{ϰ!@€ @ ^"u(ƒ @ Bïîÿ^»ú¾ h…¦–"@€ @€ @€ @€ ‹ÀÙ@ÏÅsH. @€ÒMcª @€T&ÿEÐׯ~XºZÙš"@€ @€ @€ @€ @ /xþ(žCŠç‘\ @€4/ Úü T@€¨L ,ËðÎõ9AÐÊD-D€ @€ @€ @€ @ _ç‘â¹$ @€Í ƒ6ëow @€@¥Çïžg/Vº¦Å @€ @€ @€ @€ÈW žGŠç’\ @€4+ Ú¬¿Ý  @€• ,•Ëaîæ‰ÊÖ³ @€ @€ @€ @€¢@<—Ï'¹ @€hN@´9{; @€*8sïB¸¼|­Ò5-F€ @€ @€ @€ @€x.)žOr @€ М€0hsöv&@€T*pjá\¥ëYŒ @€ @€ @€ @€? 8Ÿô“„ÿ @€hF@´w» @€*8¿x©ò5-H€ @€ @€ @€ @€(à|’ß @€Í ƒ6ëow @€@eóKW*[ËB @€ @€ @€ @€X)à|ÒJ ÷ @€ê­ßÜŽ @€±Ü.Ʋ®E  @€ @€ @€ @€ à|’ß @€Í ƒ6ëow @€@ee(+[ËB @€ @€ @€ @€X)à|ÒJ ÷ @€ê­ßÜŽ @€ @€ @€ @€ @€ @€ @€U ƒ®šÊƒ @€ @€ @€ @€ @€ @€ @€ú„Aë7·# @€ @€ @€ @€ @€ @€ @`Õ «¦ò  @€ @€ @€ @€ @€ @€ @ ~aÐúÍíH€ @€ @€ @€ @€ @€ @€Xµ€0誩i€~œ @€À„A×èÇ  @€©ì\·=•RÔA€ @€ @€ @€ @€@ÇœOêØ@µC€ Ð:aÐÖLÁ @€Ç ìÚõø7|— @€ @€ @€ @€kp>i€~œ @€À„A×èÇ  @€©ì^¿#lëmI¥u @€ @€ @€ @€ Ðx.)žOr @€ М€0hsöv&@€T*0QôÂ̦•®i1 @€ @€ @€ @€ÄsIñ|’‹ @€æüysöv&@€T.ppÞðòº—*_ׂ @€ @€ @€ @€ä)Ï#ÅsI. @€šmÖßî @€JŠ¢onž ¿žx®Òu-F€ @€ @€ @€ @€@~ñR<Ï%¹ @€hV@´Y» @€*x¶÷Lxwó«¡•ËZ @€ @€ @€ @€@>1Ï!ÅóH. @€šm~* @€T.ðÂäóáŸÏý5¼¼î¥Ê×¶  @€ @€ @€ @€ÝˆçŽâù£xÉE€ @€@“i”¡  @€ªâ'òý}Ë_Âñ»§ÃÜÍáòòµª·° @€ @€ @€ @€ØÖÛf67ì EQt¨3­ @€h¿€0hûg¨ @€Àâ_ÈúÕŸ6ü>œ¹w!œZ8Î/^ óKWÂír!”þ¸ @€ @€ @€ @€ÈO EØXL…鉭açºíaïÔ®°{ýŽ0QôòÃÐ1 @€ƒ¶`HJ$@€¬U þíž©ß>üZëZ~ž @€ @€ @€ @€ @€¨WÀǶÔëm7 @€ @€ @€ @€ @€ @€ @€ÀH #qy˜ @€ @€ @€ @€ @€ @€ P¯€0h½Þv#@€ @€ @€ @€ @€ @€ @€Œ$ :—‡  @€ @€ @€ @€ @€ @€ @€õ ƒÖëm7 @€ @€ @€ @€ @€ @€ @€ÀH #qy˜ @€ @€ @€ @€ @€ @€ P¯€0h½Þv#@€ @€ @€ @€ @€ @€ @€Œ$ :—‡  @€ @€ @€ @€ @€ @€ @€õ ƒÖëm7 @€ @€ @€ @€ @€ @€ @€ÀH #qy˜ @€ @€ @€ @€ @€ @€ P¯€0h½Þv#@€ @€ @€ @€ @€ @€ @€Œ$ :—‡  @€ @€ @€ @€ @€ @€ @€õ ƒÖëm7 @€ @€ @€ @€ @€ @€ @€ÀH #qy˜ @€ @€ @€ @€ @€ @€ P¯€0h½Þv#@€ @€ @€ @€ @€ @€ @€Œ$ :—‡  @€ @€ @€ @€ @€ @€ @€õ ƒÖëm7 @€ @€ @€ @€ @€ @€ @€ÀH“#=ía @€N ,•Ëá̽ áÔ¹p~ñR˜_ºn— ¡|ðÇE€ @€ @€ @€ @€@žE(ÂÆb*LOl ;×m{§v…Ýëw„‰Â¿E”ço„®  @€RMa j @€#”e9;ìñ¿+^ùͶòÞñ¿GáøÝÓaîæ‰pyù @€ @€ @€ @€<ˆ&«¼¾¹ÿýïÏîœ Ûz[Â̦áà†=¡(ŠGϺ!@€ @ aÐzœíB€*øâ“£o[ì?ÿ8öÖƒe†”ù{7–o…w®Ï…³‹3—Ð> @€ @€ @€ @€«ˆ:ÿÞOÉ;_†77Ï„g{ϬöG=G€ @€@½ Ö° @€@K¾»ÿcxíêû‚ -™—2  @€ @€ @€ @€¤&?„>žAŠg‘\ @€Ô' ZŸµ @€ Äôõ놖®6Z‡Í  @€ @€ @€ @€h·@<ƒÏ"Å3I. @€ê­ÇÙ. @€Fʲ ï\Ÿmt 6'@€ @€ @€ @€ Ðg’âÙ$ @€ã¿± @€ ¿{:œ]¼Øx @€ @€ @€ @€ @ ;ñLR<›ä"@€ @`ü ã7¶ @ Q¥r9ÌÝ<Ñh 6'@€ @€ @€ @€ @ ›ñlR<£ä"@€ @`¼ ãõµ: @ q3÷.„ËËׯC @€ @€ @€ @€tO žMŠg”\ @€ŒW@t¼¾V'@€4.pjá\ã5(€ @€ @€ @€ @€î 8£ÔÝÙêŒ @ aÐtf¡ @€ÀXÎ/^˺%@€ @€ @€ @€ @€@pFÉï @€ñ ƒŽßØ @€Fæ—®4º¿Í  @€ @€ @€ @€è¶€3JÝž¯î @€ÒMcª @€ŒMàv¹0¶µ-L€ @€ @€ @€ @€g”ü @€¿€0èøí@€hT  e£ûÛœ @€ @€ @€ @€n 8£ÔíùêŽ @ É4ÊP @€ @€ @€ @€ @€è†ÀŸÿÿoçÃÉ}þÝnuA€ P‡€0hÊö @€ @€ @€ @€ @€ @€lŽzJ«e8YL}û)Oy› @€À#Þ£;7 @€ @€ @€ @€ @€ @€ @€É ƒ&7 @€ @€ @€ @€ @€ @€ @€ú } w @€ @€ @€ @€ @€ @€ @€ä„A“‰‚ @€ @€ @€ @€ @€ @€ @€}aо…; @€ @€ @€ @€ @€ @€ @€@r ÉDA @€ @€ @€ @€ @€ @€ @€¾€0hß @€ @€ @€ @€ @€ @€ @ 9aÐäF¢  @€ @€ @€ @€ @€ @€ @€@_@´oᎠ@€ @€ @€ @€ @€ @€ œ€0hr#Q @€ @€ @€ @€ @€ @€ @ / Ú·pG€ @€ @€ @€ @€ @€ @€HN@4¹‘(ˆ @€ @€ @€ @€ @€ @€ Ðí[¸#@€ @€ @€ @€ @€ @€ @€$' šÜHD€ @€ @€ @€ @€ @€ @€è ƒö-Ü @€ @€ @€ @€ @€ @€ @€’Mn$ "@€ @€ @€ @€ @€ @€ @€ô„Aûî @€ @€ @€ @€ @€ @€ @€É ƒ&7 @€ @€ @€ @€ @€ @€ @€ú } w @€ @€ @€ @€ @€ @€ @€ä„A“‰‚ @€ @€ @€ @€ @€ @€ @€}aо…; @€ @€ @€ @€ @€ @€ @€@r ÉDA @€ @€ @€ @€ @€ @€ @€¾€0hß @€ @€ @€ @€ @€ @€ @ 9aÐäF¢  @€ @€ @€ @€ @€ @€ @€@_@´oᎠ@€ @€ @€ @€ @€ @€ œ€0hr#Q @€ @€ @€ @€ @€ @€ @ / Ú·pG€ @€ @€ @€ @€ @€ @€HN@4¹‘(ˆ @€ @€ @€ @€ @€ @€ Ðí[¸#@€ @€ @€ @€ @€ @€ @€$' šÜHD€ @€ @€ @€ @€ @€ @€è ƒö-Ü @€ @€ @€ @€ @€ @€ @€’Mn$ "@€ @€ @€ @€ @€ @€ @€ô„Aûî @€ @€ @€ @€ @€ @€ @€É ƒ&7 @€ @€ @€ @€ @€ @€ @€ú } w @€ @€ @€ @€ @€ @€ @€ä„A“‰‚ @€ @€ @€ @€ @€ @€ @€}Éþ­; @  û›Vç‡W^œöDG ß» @€ @€ @€ @€ @€ @€éƒ¦3 • @€U EñÖ°O~Büv9T{Û{ @€ @€ @€ @€ @€ @€ ôªE) @€ @€ @€ @€ @€ @€ @€  ^ @€ @€ @€ @€ @€ @€ @€RMij!@€ @€ @€ @€ @€ @€ @€ ƒ€xI€ @€ @€ @€ @€ @€ @€HI@4¥i¨… @€ @€ @€ @€ @€ @€ 0 :â% @€ @€ @€ @€ @€ @€ @ %aД¦¡ @€ @€ @€ @€ @€ @€ @€À€€0舗 @€ @€ @€ @€ @€ @€ @€”„ASš†Z @€ @€ @€ @€ @€ @€ @€  ^ @€º&P„¢k-é‡ @€ @€ @€ @€„œQJhJ!@€謀0hgG«1 @€À6S( @€ @€ @€ @€ 06g”ÆFka @€&ݹ!@€è¤ÀôÄÖðÍýï;Ù›¦ @€ @€ @€ @€h^ žQrý\àƒÏþzðÕ·óaÿ¾ÃÇŠÁï¯|ýÅ'GgW¾vO€ ·€0hÞó×= ÀÎuÛ…A3˜³  @€ @€ @€ @€4%Ï(¹~.ðÑñ¡9Ïøðþ¢ûþSÿójö¾ã @€Ù ô²í\ã @€LöNíʤSm @€ @€ @€ @€ Є€3JM¨Û“ @ 7aÐÜ&®_ @ ;Ýëw„m½-Ùõ­a @€ @€ @€ @€Æ/Ï&Å3J. @€Æ+ :^_« @€˜(zafÓÆëP @€ @€ @€ @€ÿÇÞÀÇUÖûž33i“–®B Ø–J•€ n(д™ašÐfQÁ õïu»rÁ /Ü«WqÁÌÒ6éI›e7z•â…"ÊeQà*ei‹@¡@Û43çùÿNhK’fÎs&™If’ÏñU;sžç<ËûœYÊë|ç{Þ½IÞ=Jl € € _¾uç×—Ö@@‚8kâÉêí‘# b, @@@@@@@@Ɔ€wO’wo € €ä_€0hþé@u­µºlZ­šš1êca € € € € € € € € PüÞ½HÞ=IÞ½Il € €  ù7¦@@  ¦8“Ô•Ó>J ´ ΃@@@@@@@@@ x¼ ¨w/’wO € €ŒŒaБq¦@@  æ„R?šq‰z{äÈ‚ƒ@@@@@@@@@âðî=òîAòîEbC@@‘\Wô„ € PÞ¯ñ]=ýãêÖ]÷ª–WïT[Ým…0,Æ€ € € € € € € € €,0Ë™®j'Ÿ®Îšx²ÒZðH € €cS€0èØ<¯Ì @ððþcìÙ¥ïTšx’º÷cjC÷ÃꑞMjsúµÃt+#ÿcC@@@@@@@@ñ) •Vez‚:84SÍÌU§L8^Xr´ ig|‚0k@@ @€0hœ†€ €Œ–€÷gOžplïŸÑý"€ € € € € € € € € € € €€¿?ÍâïC) € € € € € € € € € € € € € 0ª„AG•ŸÎ@@@@@@@@@@@@@@ þ>”"€ € € € € € € € € € € € € €£*@tTùé@@@@@@@@@@@@@ð êïC) € € € € € € € € € € € € € 0ª„AG•ŸÎ@@@@@@@@@@@@@@ þ>”"€ € € € € € € € € € € € € €£*@tTùé@@@@@@@@@@@@@ð êïC) € € € € € € € € € € € € € 0ª„AG•ŸÎ@@@@@@@@@@@@@@ þ>”"€ € € € € € € € € € € € € €£*@tTùé@@@@@@@@@@@@@ð êïC) € € € € € € € € € € € € € 0ª„AG•ŸÎ@@@@@@@@@@@@@@ þ>”"€ € € € € € € € € € € € € €£*@tTùé@@@@@@@@@@@@@ð êïC) € € € € € € € € € € € € € 0ª„AG•ŸÎ@@@@@@@@@@@@@@ þ>”"€ € € € € € € € € € € € € €£*ÕÞé@@`TÒÆU÷ï~´FHñ@IDATLmè~X=Ò³ImN¿ v˜neäl € € € € € € € € 0>´ÒªLOP‡fªù‘¹ê” Ç«KŽV!ÍZDãóŠ`Ö € €… @´Îc@@FXÀ£nÝu¯jyõNµÕÝ6½Ó € € € € € € € €²€÷cò¯™]êñÔ3½ÖíÜ f9ÓUíäÓÕYOVZëB>cC@@`L “§•I!€ €™¶»¯©o¼Ô¢èy"s%J@@@@@@@@@>ÞÎgû êÎT—M«USœI}Jyˆ € €ùpòÝí#€ €ŽÀS©çÔ§_üAÐÂ9%Œ@@@@@@@@¢ð~„޻ɻ‰ @@FN€0èÈYÓ € 0ªÞŠ _yé:µ%ý⨎ƒÎ@@@@@@@@@ ¸¼{¼{‘¼{’Ø@@@`dƒŽŒ3½ € €£*`ŒQßx©… 訞:G@@@@@@@@`ìxPïž$ïÞ$6@@È¿aÐüÓ € 0ê·îºW=ÐóĨƒ € € € € € € € € €ÀØðîIòîMbC@@ü Í¿1= € €£*6®jyõÎQ#€ € € € € € € € €ÀØðîMòîQbC@@ü ͯ/­#€ €£.pÿîÇÔVwÛ¨ƒ € € € € € € € € €ÀØðîMòîQbC@@ü ͯ/­#€ €£.°¡ûáQ@@@@@@@@@±+À=Jc÷Ü23@@ Z8ç‚‘ € €yx¤gS^Ú¥Q@@@@@@@@@Àà%®@@ò/@4ÿÆô€ €ŒªÀæô £Ú?#€ € € € € € € € €ÀØà¥±}~™ € €@a-ŒóÀ(@@¼ ì0Ýyk›†@@@@@@@@@îQâ@@@ ÿ„AóoL € €À¨ eFµ:G@@@@@@@@±-À=Jcûü2;@@ ZçQ € € € € € € € € € € € € € €ƒ ”… € € € € € € € € € € € € € €@a-ŒóÀ(@@@@@@@@@@@@@@AƒÊÂN@@@@@@@@@@@@@@ 0ƒÆy` € € € € € € € € € € € € € €À „Aea' € € € € € € € € € € € € € P„A ã<0 @@@@@@@@@@@@@@`P ƒ²°@@@@@@@@@@@@@(  …q € € € € € € € € € € € € € 0¨aÐAY؉ € € € € € € € € € € € € €†@¸0†Á(@@ ¨€1æ ¿º§¿Cà°Yæƒ~u(C@@@@@@@@@(.huýE2âwøÚhµ©£µé*¿:”!€cE Z½üd¥Bö›«ÝÄšÖ–.¿:”!P,„A‹åL1N@öÜÝú©¯ùaüþ¿®½\iEÔ‰2@@@@@@@@@@ èôÙZ«˜ß°µR¥œ0¨e 0fŒÒÇ8Z_ì7!Ç8Jy—_Ê(§XÊ8@@@@@@@@@@@@@@ñ(@t<žuæŒ € € € € € € € € € € € € €E#@´hNE@@@@@@@@@@@@@ñ(@t<žuæŒ € € € € € € € € € € € € €E#@´hNE@@@@@@@@@@@@@ñ(@t<žuæŒ € € € € € € € € € € € € €E#@´hNE@@@@@@@@@@@@@ñ(@t<žuæŒ € € € € € € € € € € € € €E#.š‘2P@@@@@@@@@@@@@DÀ¸îC&¤¿í‡a\}Ÿ_9e“aÐb:[Œ@@@@@@@@@@@@@@­I¬|@¼?lŒ g\Ì’I"€ € € € € € € € € € € € € €E*ÀÊ Ezâ6 € € € € € € € € € € €@a |ä,ã;°¿mV]¿þ“úo% @@è#@´@@@@@@@@@@@®À…g[Z0ªKü©¯YjQŒ € €À>gß# € € € € € € € € € € € € € €œaЂ;% @@@@@@@@@@@@@xC€0èUHsZ²dÉ4g┋eÜï‘q#ã=LÎw.•qzÑ©Ôííí+ËÓ¸ue¬®<Vçå#}$^³”Ñ)e”k$%ý>'×ÎVy¼õõkÄÝ v››“É–ÍySΚŖa"¡Ê|N”kýMò÷L¹VfÈ™³¾§'þMg¢å/9Ð0Z´è‚7•”†ßéjç]Ž2'ËX5FO—s6Ý›‹1j‡tñ‚œ¯çåñ Jéçåõý”Ìë×;¶™ßÞv[ókÃBQ>†Þ hõòwã¼ÓqôÉr®ß*×ÂLy_Ÿöú5­^–õŒ\×ÏȵþŒ¼–ïÝéîL¬O$þQÔ'°/ïÛl € 0–Ìæk/—/VWŒ¥9Ä\îøÛé#Ñͨôñ¡­_•~é@@@@@@@@ÆÀí³®.ÈÉžqø9. J\¡þÔ× sp…1ª÷.ûéåÆ˜+ c4Ã…¼®øÝÊO û|G«npYÌ2ší­'Yêd,^²´á‚6_ÏTA^×$Ûš~¡Ü‰.­ÿ€ÜÇZ÷ú8õ” õݽ'Tv›kLû³s•J$ÒƒVÁZ<)Ñ—×ì½§!ô-×ñ#Þùiº{{ãš5k^BûRµ´îRGëKö+سÃU沎¶æÖ¾ågžY7iÒtçËŽ6rœõÜÈiPJ¥®J¶­¸·o;Cy,¸ðôYs*µÒç‰ã9â1+ÛvÄQò¶ú~ Þ(Ûd~dÛF>êŸÏž KÏ‘k¾B\+äï9ÃíG^g–6¿qÛ¹mëÓm]]]^@vĶsÏ]6}âäHŒ£^æóîatÜ#¯{”rïèIé5íMF[ƒ7TjÇ|gÐÂ=;]¥¿ÑÑÖØäWg`ï…¯‹TÆk ‡tƒ\Ûuòú}ó@'¿çò:MÉëý9ÿ+_ÜòTB®ã]ë{íGçÆû÷>—@ø¶d[ã;÷>çoVå@@@@@@@@@@@@­Ü©J;GeÆ0Þê†ûm‹cuï …u“„õŽÙ¯0à •IÕ%!­—ÄTé—ÔÒºj_Ý|kÀÃsVmáÂåSʦ„—ËjŽ‘wô6,éÅ¡nb2_Žýž3aê•Ñêú_¹ÝÛÿuØ¡PíÈj“*ãy’Õú¦öoeeÍ!‘2í®d>&#YS/xŽEkêïJ+÷²µ­-w÷m3èãªêº¨„Ⱦ=œkÃëKŽ÷^.m•ËÃË$}}jg÷¿uv¶>t,¹¬.³Š„¾(#û˜ŒlB.Û~=,«—::´tÆìy_ÖÔ]ùäc7mܸ±'—ý lka<~`©Sú éÿ#r™”ºR6ÒÿyDÚyŸRÎûÂasE¬º¾C¥ô¿µ·7>Ô¿ÚП'}€Ö¡Œ¯¯emL¿×CÞÆû{¡÷ž.ð-¹¶ëÅË b6°Ž¼Nòïl9ül¹Ž/‹.m¸(¹ºñ·}ëIè¹D^Ü>çÏÈj³l}†t2ú6Àc@@@@@@@@@@@@ñ'à­ø(«^.AÐßIðgÈAÐýåôå8·Hxìf/”´y~öHðqYÙ”Ð3`û±DÈ^‚æ¨+ U•‰ÑÅÎÄ©UU×/ÎQ³Öf¼pk¸´DBµC›„/Os\}‚µ£×Ôž*çï÷ŽvÚs{môvä…U/”°Úcrý}½"Ÿ< û¼=]\S3GB½?R‘ðã2¯Ïä::pàÈÌfȲՓeéóç”2OË2êòý´qÕ3ºÇíêèh–ýl € € € € € € € € € € €@a ,Š×^Ò«$dø®¼Lë…düu4Z[‘L¶lÎW?^kƬyÿ!÷y.ØÂ™C‰ôqˆäâÖH†å§É¶¦‹¥%wè­YÔnm•{Ôßb­™¡‚1ª{÷ké¶ Åƒîöš’ü»,ï–Þj²²Jè Sz~4ºla2¹òÉA”£±¥uçã$$$W–£&7#žótHÝ××%M7>Ð^QK8ó»r®>g¯š“^w¹£J>´8¾üŒµ‰æ¤ÕQld,½z¯]¹.“÷©œo½`¥>Y:5|¦„ª+Ö¶¶>•óNÆAƒ„Aûd}¶¼¡È2Ö>›V÷IéÕ>5(B !Ç|P¾è,Cqïþ™<3aЪªú™²àûEÚ¨Ê× ¿t#_ɼ™kù•ù¿}˜òÛ ¦D§ä—Pnq•iÚµýùÎõë×w÷"ñ € € € € € € € € € € €À( x÷IkGÝ.÷@™ïax÷a›’Ð]çÄã¼9‘Ø’ëþªªêÒôj¹—û¹nÛ¯=™×'$„79éîhP‰DÚ¯îPË$pú K.êñÞqâÒyÓM+·i£¼¼<2ï¨ã!s«R?Wu¤¿ù*¹'Z]{n²­åþ\µÛ·hM]1ú:Éfk¢¬Ó¸ºªºáÂŽ¶ÆëûŽoHãñPÔ)k‘9Õ éøa$鉃ÂN¸«2VwVg{³—Õ*Êm ½:²jçä$|2ß'BÞSŽ›’»Î‹×W(•—·¾|OaTÛÏÉR­£::G P`a<~`¬¦þgN‰zÊQúß½€d;L9&,_¼Ïs´³ºlꛫªë¿7’Ë×g;^ê#€ € € € € € € € € € €ÀØX—8%ºCîwÎ{t¯¦„‡Ž›*]±÷y®þŽV/?Y‚ %˜6¢Aнã÷VHŒ…JWÊs™bn·XlÙÑÒèUÃmÕ¸éÆ mœOwô‚[´vF4ºolZÍÖÊù·zç¾}9zPUÓðyetcï=þ9js¨ÍÈä²Q߯¬¬9d¨mì=.*ýÊhA÷ö/WýŒHع£ªºnþ¾}Eô`¬¼zóˆU7´ }Þƒ ûN¯ÖG•8ê.í„Þ·F3h€TBŠM@–]?_;úÇ2îs÷•\Ow´úì §ô¼Å±ºåkÛ›ÿPl.Œ@@@@@@@@@@Š_àXgâÏå>é÷˜‰kŒyX‚kÏ¥ÿ¡”q帹ª›'ßœm°N›§É;çu´5Ý ok•%55‡iúµä0'Y+÷©`”yRÕ)óú«Ìh‹Qj‹¬Ö8Ms¼ÒÎ|¥Íñ2Ö“ä€ ¸é¥²‚çï’mM?èÓͰšpø{b\:œ†dn[_üÇÓ·ÚÚðÂdSœ²ÛÅäd[ݾåÒþ³Z™[ŒQ•«žK‡Ü­ÒFH¹¡ƒå"×Ë ÂX)Åi}ËøXëÉÒæÚªxýû;M2ÖË¢ ¯=WÆô]Yè)ðQrl–ëüJ®ÏÚ¨íF¹¯ÊBQ)W›)2§©Ž2³ŒVåÒèÉr½Ì ܸT”¡.+ù¾<Œgs\ߺKâ å2¾ã`›1›d.¿‘y=b´û¸³gNÞÁ® Ï•‹ý0¹öçÉàŽ&+dwÄ+³nZMÕÆñB˕ֺVaŒ¼êcCek…öìlxå5û”¼Ëqæ ¹.žñ®yƒŸ-+F*ï'È5zží½UÞŸŽ‘×„áƒ^„ÙŒpìÖ% :vÏ-3C‘è]"½ôçòô‘|u-m «ßŪë®hokþ¶ôãæ«/ÚE@@@@@@@@@@ú ýQ¹§yN¿}}žHøSrŸjvݯšî_ßšH¼Ø§xßÃ… —O™x€óaGë‹%fbƒVÈáÎÞ/§J3V]ÿž¶nª òå:‡ÝÓ € € € € € € € € € €ŒS¹_:sT™Ž´›þêÚÄŠ‡m<ëׯØ.u®‘??ˆÖÔ5(ãüDÚž`;N_å”.ìPj_ðÒvÌ`åÖ\8eÌݲªã%ɶ–ûkk°}m-’ýÑxýùÊÑßõsë=^Vð”ÐÓuòø”ÁÚËzŸÖ²*_¦Íl“M¯5Últ]÷Ñ´ ½Qæ( !ã8Ή²Zd­ø&{G§Sª1S+{÷Gã ÞÊŸÝûÜ÷o£4*u¡%Xد‰7öÈŸõ²s½×êeÊÿ”±دҀ'0›7á€ð/ewt@QVO •~^®“£­ó7Yçéül®‘¾m¶·7>$Ï/='¿rb¨ìÒç¹}Ë3=‡ÔùR–u´*^÷A‰Ù-ÈÔ®·ß ·J²ûBY‰÷z¿zƒ•%“+Ÿ”ýŸX¯ÿV‰#×PoHy‹eU Ö^!îó{MK€¶(Þ ÏˆÇ§jmþ]®+±Ìéi¹ >#׃·Š¨u[·nÝ©Ô&êŽãœ‰ÿ"«&UžO´H«€¼²!€ G ¢¢"<ïè·tȇyÞƒ ýÆ©õ³æ~¿ß>ž € € € € € € € € € € €#( ¡ûX²µ)$:`h&ÙÚü«´NŸ&a£ç” úTbK‹-¸³ªª~¦£‚­ö(‘¸YÀçC ù%M7ȼÎ5"_¶ OîGwti½„ôò³É9J)×\•ÞµýˆäꦕUH“Þùº1ÑôLG¢ù7«›ÞÞÚø©—Ýo–ePÿEÆÝi[urqMÍí¨`aAc’/¸;NÍ&:P¢£­±Éô¤ß&s±® *Ъª¥uŸØFÐç••5‡8Fy6ßMÆòמ»ß;Ôk¤oã7'[’­^÷'}÷g|¬{à‹3hG_˜©lß~c¾$ÞÁÎí¾ƒú?X—hÚ”î~¹Ê3ê_²ÿ39_ˆÅ–Ùƒ·ûZ0{Ší½ð§ôkâ~Ð<´+½óä Aоí=”Hìnokþ–›6§‰ÏK}Ëx<4 Csã(@`ŸÀôYs¯”Púå=¹òeþ¿åƒì—òùkòk*Ÿ•ÇWËœ6£Ô£ûð@~ÙæâhuýŪR@@@@@@@@@@ȵÀ®tÊ=-ÙÖô‹á4¼¶µån妣rOµÜRí¿IxÉ ËÙ—²ËÐŒ.1ßµ­,é*Cùa{kS}WWW*CSvK€ê¯rym ¹iõ/;’+snh_Ýô¥5kÖø²îH$^îhmüO ÷ZC·!3áiw†m8rVÖÞÖt~W"ñª­®­<™lÙœÚÙ#¡Yã­¦é»9޾ráÂåS|+e(Œ”F.Ú»Bj†*ÞE²ÛuSÕ­Ïf¬3„‚/§¾(/k›ÞJ¹ <"›.-ZT&Çù®˜*çk]ruóÕÙ´›©®w½éTê<ÉP¼˜©ÎÞý&>mïã"ü»¨Þ «ªëÞ"o¢ÂÒæ»_ë©ð‚ÊÃ9'‰¦ ©”B•y~8íp¬R„A¹ @aÄ–Ö-’ÀÒ„|h=g\õ³;5¯½­ñýɶƋä òò *×Èã/Ê?€jäW<æ7õ^ù¢ß,möiWê|IÍòÓÖ¥ € € € € € € € € € € פ/ZÛÞü‡\4–\½â÷ÒN‹µ-­fWÆêÊ­õ©P¯¯ÖNý EývIïv¹¿ûÙi §ö;0à „Þ(¡ÉË3ïÛ--!­ùûväè,`ôI¹w}eŽšëmÆ[ÅQV3­´µ)÷ÑßõâÖ'½ÐYN,½þ:;Wmu»½`™Úêß¿žR6%t‘ÁKr¬aX M^µ&±òÁ[úÞõëWl×núÒ -ÇÉêz —Mó£ýÛ6þåÙ•¶·¯|Ì5î×lGIлhsÅö^(Ö×H(8ì{N$À+qçs×­[•“§¬4üÇ´r½×•,>Ì6T C•ã8÷••ÌRŽn”@Ƀúoò%ó&ÓmÞš\ÝøídrÅÓ~µ½ÄÈ?êS=îûäËáßýêzeÞ°£B++âñɶº”#€ € € € € € € € € € €¹0m-+rÓÖë­¤vîþR4Caç­CéW;ªÖvœÜÃýJOÚ )@è×¶„1¿)sû«_Þ{Óµ¶,ýÚØ¯Ì¨.YÀèçûíæYÅñsÒ„o.Iû•ÔùÃ]]u°¡vt4?'ÉOVÖwŸÑêR‡úî³=^¯Ÿ+)}ëój÷k©ÿò­3ŒÂöÕ-í¶ë¥·ùžM7ŽQsmõeÙ_Ûêd[®SnÒöÚ6Ú|PÚµæ3²í;ÿõ‹ë½0Z½üÝò^ãYûnr>.ñVâõ­”eáë«@›ÿÈò0ª÷ð}ÓíS‡ €Â¥‘¯É÷Œévï÷Ô(÷›²âçyÞ—Íý }vx¿ãv¿üùÂs³OµÞ"ùU†ƒfè2ïË4 € € € € € € € € € € wT:ý\wÒÙÙú¬´ù¨­]G¹slu)—Û®õ¢Aö÷Û%I´/®K4mê·37O$—hšlM9FÈV'›r×M_™Mý uÏŠÇgȽô¶Õ5®{ÅM7­Üf«7Ôr L®‘õFïô;^Nú¼%º,æWg`Y‰c_ñT®¥ëò97oLr-ú†‡{Çíª™Çï÷\–d´†AýŽjYï¢ZZÝ;ðx/ *«Ç>-ÿÿ[9—ëÎ=wÙ´u ýyѽjÇ8—“².׫ ï=;^ùÇå²PðC{Ÿówv„A³ó¢6 Ð+°8¾üxùbeýµùBr}²µù²¡²­Y³æ¥·nŠ)cî¶µ!¿<òÏòÅÇNµµC9 € € € € € € € € € € à+ «M®M¬xзÎÐ o;Ô(ý&[啱ºr¹üûû?7Û^Øúäõý÷åî™éI·ôfß|š”Õ4ß{J<^êS%p‘ôõ§ŽDËíXqR¨ô“rÿz™_u™Ç£f×u~urQf\u­Ç1—Øêô-—±Ÿ×÷ù`S*Ý6Øþ\î“àæ¶öŒv&Ùêô+×Nw¿çƒ=1êƒíî>¹ÿKþü@òŸsYä÷xÉK”IèpN²­éòç£ùØwû_„ï…Ú8‹÷›Ç€Æ5WØ•³§ëׯïvÎ[û9h6D´@O ÃBÂ9á—_òð_*^œO>þÐ'†;“®®®]»wô,–/=þ_ä´šZ29ü…áöÇñ € € € € € € € € € € €€Ÿ€¹~åW>œ2 zZÒ‘Uå¦fÛG8¢ì(£Z½û·³m;hýdrå“R·k¿úƼ*+ñÝ/÷Œ¯ÔʽrvªÔ7h¹ßñvh£rõº’PmU†.÷í–UA¯U‰Dzߎ<=H&o”•%7û5/§TÄã“ýêô-Kº;ÏM«îÃÓ*u†ìÿ„œ›«¥äºû£üÙ.+XnYÛÚrOßcòòؘÿ³µë“ݵâºö6µ¹¬¢¢"lë;Ûò޶æV |^ÚÞÖôý޶¦åù#ù|½e;¾¡Ô/¶÷ÂÊxíQò>Þo®½AîDÓïüê ·l‹»#!¯«—†ÛÎx<>ç/ÌñˆÈœ@`| ÄbËŽ/ròKòÖgs]õÅ7öøT \´nݪç£ÕµK• mô;H–°¿´²ò‚ïuv®ÚêW2@@@@@@@@@@@`ÈÚüϵ(A¤g,UäVn]N”{­+­íªôõö:ìáºß1Žó¿üü‹qÜGÝê/Éä oÎ2õÜn®6rÛ¢RgžY7IB\o—•A}·”Ñk|+ä®Ð•¡Ü)ÍÕflR‚š*y—”{õ웄Xeð—ŠÞŸýŽñ d¿,Ü™çMkkpS&Éfn¨çÿ5Á÷­õü³æ¶”——×å*áÛa1Ù{aÄq¬ïƒÚ˜Ÿçû”lH$vÆjš¤ŸKóÝ×XkŸ0èX;£Ìò.à†ÃsäÛoGFÝÙ‘ã_BH¶µÜ«®_+ÿpÉø‹42ª²ðÄ’ÊØ¾í;> @@@@@@@@@@@¡È*–²¢ß_†rhc•Ú®”ÜEVzœ¤­½u–ÔÔ&qÐö>ôoYí1Ù¶âÞAËr¸3™h¹Išóþä}Ó=¹ƒNšªÞ%·ÓûŸ ¥6®K4mÊû÷t ‹=Ý)aßÌaP©2Î{ä¯ý‚Cãm·5¿6”ã²=ÆèôNëkA)'›v×¶¶>«©—טžâwœœãêyG/8tÎ þiM¢)oáo¿1|Y¾Jâ|±F©”›¾e$ìÝ´›tBaÐ,±³zÁgÙ6Õ@1' ¿l‘/5±MÌuͶ:C)7Êýºí8호­å € € € € € € € € € € €ÀP$Lt¿—·UÓ®’ šu Ykô©à¨Èé}žfxhþ˜¡ (wËêO&[6çzð:¤½P¥ï&«žÞç[!Ç…2×ýÙÇãa ¿9* ÐHV¯iOâ*Ð ¸°}o(¤ÿ­®¿iÉÒúÚ³âñÆ3nªÛ{áñøT ‚Z^¿æµµ‰ŽÄILw¿|¿¼W#[6„A³Ñ¢.Œ{¹G-8K>üfùAȯŠü.׫‚îíÏ[T>é,¿£ß±¤ºáȽÇð7 ÿ@@@@@@@@È›÷(å–†(H­ÕãùX(û0¨rõ›mc6Ú<`«STå:?LY•õ½6£u^¯‘ýoqw=a •¥O‘ã >Gå-`U¯;-VS÷]Gëœk.ž‹Å¤ÀnYP뜣›';¥ÏE«~«®û×%ñ†ric\ߤ\lï…8SZû†‡%¯â…â_ù;;;_‘ñäm•éጭµ-Ë\Ècgl €Àˆ È7•³lÊ—[KXÓÖ‚¹ü G«|‘:ׯ–Öæ|)¿Ê¯e 0~ÊôõšÙ5~&ÌL@@@@@@@@@`D¼{”Ø@`< ˜—ó9[×}íÕšêÛ…„ ³ ¡iuoƒR(ëÓ©•A%¨¿Õ6ç¡”­N´á;nzDà‰ÑšoÔC2ÍI®˜*+/8°³sU^\2õkÙïTÆkˆhõ6É©¾[®ëSdœ^Èr¢wœÍÙÒvÆâ5mOĪë×IoqÆJƒHPÆ$+K:ï …Ô7£5õÏËóÛeA­ÛR;znëìl}v°ÃÆî¾âz/tŒžm»¨ä}pãHž/­¼Ðº>n$û,ö¾ƒûÝñ{Ÿ+,Ç;ºç H JÁëœi«Ú“2wØê §ÜI¥»T‰ÿÛ·ü\ aÐá s,cLààÐLõxê™16+¦ƒ € € € € € € € €…"àݣĆãG@VÜžÏÙvwOrËz£p¹ìEϲ·fþj¯S<5Œr·åa´’UÔ3¬í:ÎuÎü‰µ^.+3S¾-†JÃÞØG< ºhÑo MŒ!ã;2¤Ý#ä5t¬àHn¾ì+Ý;hÿÑï­•›¿{vî¾8RZr’ôèP[”káMrìò÷‘² *VÝð „ŒnÓÆÜò¨ÙùÛ‡‰ÝCm»Ž+¶÷BWéÙÖ¥qµòBÕ#¶WýM[5bÃ)ŠŽüÓDE1™g]«+„Õ9F9ÇHü ù°™¥Œ–_Å0ÊWJúNV!Ü*·J4t‹ÒîµÛÜœL¶Œè@ž ½yÎÓœ¡htÙ<ù™cü»2Û:Û›ï÷¯3¼ÒdrÅÓÑêú'd,GfnIŸ´¤¦æ°5­­Ï\‡/ó#s ƒŽ—“Í<@@@@@@@@ï%6?Ú¨¼†Aó!)!;ëÊ )7¿!×|Ì˯MÉ€¼èW>”²%K–L•ãD·ôô‘ 6öÎÅõê¸ÚLï­›‡ÿ«ˆÇ'OW¥o—ÜÍ1Ú1Gi㥴9Ê}” í€7ºtú,Î8âJû†á­âY«_‰˜ÿ–`ê¤}Ãy Õ[eFo•€é?kÊ^9®ºþv£Íºé]7­O$þ1œ¦ ñØb{/t´:ØêèæwµÓýû÷ú½×Áþã)ü=„A ÿø+**ÂÓgÍ©”dþyò|޼¤zã—–<ê}"5” ÉçÈŽ9{vÉSçB1&VÓp¿,õ|£1¦­£­ù‘\M$º´á«>mðkO‚©w%Û?áW'×eUÕõ‹åÍú¶«õÍí­Ÿõ­°°ÐÏSÀiU5‰œÖ{ûŒZ¾¨Ý%Å®O•\ýZò ƒÊ«ÒLx¿Ôù{®:¤(^S&¯ÖíÜP¼`ä € € € € € € € € PÐÞ=Jl 0~då¿¢ ƒJ¶aVoÂç4…Sc+ ªó°2h:rÀt/DR¬[(­r-//Ì;â- •c–HØórÿ|qÙ”•äÁžM€Œê¨qv¶7ý1oX&áÕÖ¾+”æb@¯`uT^wÑ2§4-«†vÉë0av«::š^ÈE£ÝF±½JÐź2¨vFø}Pk ƒ²e# ŸMsÔ-vªêºèŒYsr´Ó.«~D>{,…¾ÿ¬åXïc«\Þ´/—¶þ7ZÝð‹ÊÊšCö¯™ýטÛäCF~!ÁçR ò‹Ó²o}èGÈ‹é"ß1Éxeì^PpØ[1œ§aO²‹úÛ°¤Î¶:¹(7Êí²µ#«øžl«C9ŒKŽV³œœýÛu| 1K@@@@@@@@$àÝ›äݣĆãG@oJßlµueÐM›*º«ïyHç~eÐrføöYè…!=ìñKžc~´ºþGóŽ^°Y‡ÔZ/{#ÉÏ2õ¢Ìh%i¥ß* Áy VågÓ"¥Õébõ§D=«©o‹V/w~:¹V‹í½Pò.³­:iwDÙÑþ¬ó/‚ EùFS®E7ÄÅ5µ§Æªë¿'zLŽ'àH4ôÂpé„Ç¢Kë¿î-}=œö×$šþGõ°_Òßg”óýê䲬ªªî IôŸíצ¼A=·éñ‡nö«c++¦ód›Kq–›·ØÆíºæA[Ü”ë?ÙÛÑ'ÙëPƃ@H;ªvòéãaªÌ@@@@@@@@FXÀ»7É»G‰ (T… NŒm±©]7nì)Ô9 e\F¥wå8¿cBÚõê$ÆúÊ  ãñ%úcY4íÏj¼XþžégULekÚŸH¶5&ÐIöåù¼Ž]ë Ï.U*tw´º®ñœxÜPÌë€ÆOãrÍZ tЈ¾:F½6~Î@nfÊ·îÜ8u+^@3¬B¿—U-OÍçDäËS™,|Ù §ô¢Ñeó†Ó—«ÕõÖ㵪µÖÉQ]¢«åÃ<ìל¬Ò¸b8_‹ñ<ùye™VÞ¯uønò©÷7ß 9*ܵÝÝ )ëJ¦Ú  Œ³&ž¬Þ9rŒÌ†i € € € € € € € € €@!x÷$y÷&±!€…,àNž<É:>IÁYëPA‰ÒEÍàȺ”CØ–,m¸ ,Tú¸äF>eËŽ ¡ùþ‡“–@æ]^0Ó5ª¡a~ŸI ô/nÙ4GÉú°ŒaC>{GÙœúÒPé_b5 Ïg_´ýº€ÑÊô ™ôˆ¾ÆÓÊÖ‚ƒãñÜg}ÏœËËË#^ŠÞ hŽ$ƒ¼[ÏבÈ=ÑêÚ‡ÚozÇîfù`KùoÔû×ÔÌñ­“£B ºÖÛšJ¥]{€uFŠù< 2¢ÝuV<>C~á`ß ³ûÆDÓfß:9*\¿~ÅvY!×w9l/€]Yyõ—r4$šAÏ_uÙ´Z5;4£ÀGÊð@@@@@@@@@ ¼{‘¼{’¼{“Ø@BèþÇ?¶[ǧuiEE…ïQÖ6ÆAyË¥¨§éºY¯–]ÚðeG›²šå”¼Ì]Ÿ’ xP22?tYò²»sf²µét/˜éw[^úôi´««kWÇê¦FéfwÏa’þ‚„€ï•ñå)0ÝëúÓXuç|†EQ.ŒyÑÖŒ«Cù¹Î3t,¯­í/Ã0Šj7TEuºr7Ø3âñ©SB¥I ·–»V³hI«ÙÚ8¿‰-­«n_Ý|sGöVíì\µ5ZSïW™éXù‡¥vTÉ2)¿*S\쯪®›/ê'YÚÚ¸6±âAKýŠ‹ý<í7¡"Þ1Ñx´ Y'à­ÖéZk宂×ß[ýšÓ‘ðaR¾Õ¯e 0~¦8“Ô•Ó>ª¾òÒujKÚú]~üÀ0S@@@@@@@@ÈJÀ ‚z÷"y÷$±!€…. á¶T´ºá 2ú®x7iÒ!^ùˆ‡ï Ý¯ïøŒv_Ô–ë%1ø˜J›/ö=®P;núOÙŒEV¬ü¾Ô¿T2#Ù–±®Ä)_QÚ<"­="ÙÊ”qï}õ%çÛnkz-ãA£XL®|RºÿïÏ9ñøì ºôG«s…㌜‡cµú¡ä„^–êÊQœòXïÚz¹£F6œ)¯…)c=×ó# škÑ"hoA<^2Å)»]^0'g3\ù@~V+s‹|øl”¸Ûsé»UÚ)7t°£ÜCäÍü¥œJù‚4-P»ZO–6×VÅëßß‘hÊzùhÇuÒW¥__Ú¨Z)ÏkT-?iä7 IºÙ¯ :VΓ¿Lñ”:a=Û6Z£õßlurY.Kto’KÏ7 ê„ÕáÒçrÙ/m!€@q Ì ¤~4ãõ—ZÔ=O÷d= € € € € € € € €Œ¸ÀÛ#Gö®JtÄéé†! Y yjß0¨S& j1îqõ¶Û"KFML&š:,M|±,—A^:”JÞ¥[kó°ûgYõóÏ’)y°G9ߘhz&»öLIvõóWûæDb‹´~÷Ç[EwúsOÖ!/ªN—ìÎ)²TøpÇ*9DÕ¸8¾üYŒÍ³c˱€QZÂÜþ›äTF4œéõg“ÿˆÇ_)aÐñwÎÕ1NéÕ¢ •¡5©”û­Îöæûl\ååå‘9G-8#¤ô$ Ya«/‹w†UȬ:÷Üeï¸é¦•Yý‚ƦǺyÞÑ ž“ÕMÊÔ´ÿ–ªêÚ·u´µdõë ™Úd¿tÑ8¤èõ]Þ‡ø³kUÆ  ÆÊyÊ0½¢Ûíh=Ë6hùÂßmurY.xOÙÚ“/U‡ÚêPŽãOÀû°WOÿ¸ºu×½ªåÕ;ÕV7«àñÆŒ@@@@@@@@@@Ír¦«Úɧ«³&ž,ynÛç’@â…¬¶É;×\¿Q—¸þaQ¿cÇKÙNµëÅUæ;]¹¯þ/,è­Èê[±€ Ï‹×*+`þ4›!Jvä~­ÜµF™“n÷ŸT"‘ÎæøÁêíL/ÄOÜ=çÖ[ÎûóE‹•…'N}¿vB5gÈ÷„· 6Û>9.rÂÿ"õ>b«Kù´zÁz”ÑGXëä°‚\ßsrØÜ¸hŠ0è¸8ÍoL2oðVîüì{|õ Q© “m+îõ©Õ¯hãÆ=òg½ì\_UÝPï(õŸ =°_¥O$Ì9oÂá_Êîè€"ß§^_sZÐ"óù¾•³\Êó­Š×½_þ5ëû…P~Íaí­‰Ä‹þcì_:–ÎSÿ™ï3ùB6K®Uß Woò­ãBÓSÖ1É—¿wKs pj͵Wø ã«¿R8ü`¿J]x¶9¥c_Àû±g—¾S}hâIêþÝ© Ý«Gz6©ÍéÔÓ-?¬#ÿÙƒ @@@@@@@@Æ¥€wb™ž ÍTó#sÕ)ŽW'–­BZîfCŠP@Þ׬+g¸as¸Lí¡"œÞˆ ùŽDb{¬º>-9ŠÌëƒJٴٳß,ƒúûˆ ,ÇÉê§?•dƒÝ‹oÔ§ÜÔ§e5Ës< ÉUšbíÛ¬Ü<êÎëÖ­Û!ƒ¼eÏ.³Ž„ɪU2úÓeà1Êt—Ëñ—%“+žö8…Y 8®ûåøŸ É@ )È›õ`Þ8ào<äQ A”ÆHÅ55sd•ÀëMǘä îΆ®DâÕ@õ©ÔÑÖØÖÞ®"Îòá2*ûvÉ«ªª¥uŸîXÝü£};=02í•·©eÒÔ—ä¨É,*9!]o«î¦Ý`æ{›çɦTøårÚWÕæ•‘œ‰qÍ«Z~nÄo“_U öÔ¯Ê N@ÞS/÷Tן•òþømžmüŠ)GÞœ=y±½ÆÑ´™* € € € € € €¹â@IDAT € € €Œ'm6KöÀwÆŽê AÝè[‰Bc´Þ*’‡øQètø8)ÿ»_B-óV•ìM¥åré¾qÕeÉÕßÌ×\$Ç0ÃÖ¶¬HêŸî³5‡ò=AÎk¥ék++k‰”F.’a~^‚žÓt1‘Ч¥Þ—Ô¥J¯š]÷L2¥òöIµ ¼¼<â-à—EÓCªº /‘õ‹y­ ©ƒ1zPÁ½àǨsAL+d&\#/ ?kok:8AнN&[6§vö|P~ŽÀú뎣¯\¸pù”½Çù»£­ùåƒËåR­•<+‚´—MSâñR©¾ï1Æ<Ó¡ºo÷­3 p,ž§S,ʧÞÊ Ök½ÓZ'‡ÇÙekN>¢ ƒÚ(G@@@@@@@@@@±-`Ô¬ÔúkUˆVׯˆÖ4üUVÙ¼Uÿ¤ª¦þ‹UÕuñÊXÝIUUõ3sÔMžš1VKí„ÎÈSçyoVVý¨ïʧ{F`Œùa>ƒ ^7’Ú³æd%Gÿ”sÞÅü;èìl}¶½­ù몧çí’ÿyÊ¿ö륒U,R:Ù ÜšH¼(G<ê{”Ö%‡¶ÀwA@ßã³(<Ò-• ¨.É⪊aÐqrÄbËŽ–PX¥mºx»ëÅ­Oz úœ-×Ù¹j«ÛmN“·ú÷¯§”M ]ä_gÿRùÔº~ÿ½ý÷Èꉵý÷ ÿÙÁ¡RñÔSüZ’¥H›U"‘ö«Ó·l,Ÿ§¾ó,ÊÇF•ÙÆm”;¢aP×(kÔ]šÞ69Ê@@@@@@@@@@@"H»jƒmèrïõ ¶:¹+×§Jâh B)Á·O8Jÿ»£Õ‘ˆs¯3A?/AÑ-*宿ܵ$NÿmkMæv¦­N¡–˪ ±ŽÍ¨-;·ÿ㟭õ†YA+s˜­ Yªµ(²aÉäÊ'uªçtI+m±ÍIÊ­óÐUÐwº»ÏÎPD-ìó4o¥Ÿ¢}ŸÈJ€†‹â`T±˜pøsRÅ÷|KXóÙîWRçwuu¥,Íe]ÜÑÑüœvÓŸ´(¿Gpi¶_XÒÝ/¯’v}Cq„UTTL´õŸ]¹® PßTíÛÆX>O}çYŒeyõ Öq§GvePùU ßëÞ¯|ÍñuoU  € € € € € € € € € € PPOýßCTÆø/þc̱‹kjæä{àÞÊŸ’q8Ì¿óh6 Sù·•ÛÒtk ƒ*­ÞZYYsHn{ÎÜÚ’%K¦Åjꛣ5uWÆj>[ZwVe¼öØls$•••H@÷ðÌ=½^"÷ò_³~ýún[½á”Ÿ—ÊXÞokc¨+ƒ.\¸p‚¬Fû–h¼þühMýe½«ÕV×ßdëo8åíí+“µë¾ekC3Oêøf lmPžA@»¿ËP²o·äg>¼ïIHÞå£yl~Ì6Í cÌžÚ7&vV<>C"a~cÏàŒë^qÓM+· ^:ü½í«[ÖH‚ÿN¿–ä cÞ]ó«3°lÍš5/ɇAÇÀýýŸë)ÓgÍYÔßПUUÕ$_ÏòmÁ˜»;Úšþê[§OáX?O}¦Z”6ö0è¯ êƒJÈ;R”à @@@@@@@@@@È‘ÀÆ{$WqŸ_s²B§›’å~urQf"útk;®ú½µÎ(UØþÂSHžâU[÷á²’ÛêäªÜ)™òY9¿µZ9_–6ªç–H(ôèŒYswx«¬JPô ;¶žyfÝ$¿>#‘ésýÊ÷•éÔûçéÁ!zâ¥iëâPL œÐçWbÕõëÄâñ²)¾&«Ñ>¨C:!Yž¯Ëå¿Lþœ‹5,ÈÓ”z›íI©ßZÛ׺$­e­G…¬Ü]¯¬‘kf‡ßZ©ãÇêÞåWg¸eUñú÷ÊõvÌpÛÇg}R¨ô“ò«e~S•ÀØ£f×u~urQf\u­Ç1—ØêìWî*ëØåM¢v¿ã†¸Ã)ÑH{a¿ÃÅôz¿òeãâ< œt1=7ö•Ae¹{ÿ_ŠÉñ|]Ǿ2¨, ø‹]އGs € € € € € € € € € € €@Á˜+âI °.ß–Ì„5pêje]½/ßãÌÔ~WWWJÊîÎT¾w¿Ê.í]isïŽ<ý½páò)ÚQÝ“܇Äi”„ õ»äÑQ·ÝÖüÚþµÞØ“›@aÐîWÍão•§GŽsNÀ–KÖ“x*—ÕFÏ‹#åïРDžMÎb¬ýÎö¦eA8ßó`dÛê–…ãØr-ðú‚|j•­ÝPØùœ­Îpʵ£óÚþpÆVèÇ-ô3”ƒñÉW•­YôÚ‘XB<™h¼Ñ(³Ùo<òárJE<>Ù¯ÎÀ²öÕMwÉ/Kl¸¿ïsù¥‚…Þrê}÷ ù±öÿ‚ç¥äwnO¯Î¦ýñpž²ñ(´º²tz€•AG6 jtÚ>•ëŠ0h¡]LŒ@@@@@@@@@@F\ •6^ÈÌ·c­Ž÷V¬ó­3ŒB/Ó ™‰³}›0fg÷ötÁ® ê] û¹ïz õôðÄ©Ÿ²×^Ò©¡ÏHØsº­ÉÍXpZ¹3líxå%%á¼æ±–ÔÔ&Wj}±(­‡Aåâï Ðf^à ãñ7Éùò]¡UrOmH$¬y‰s¡Ê éº»ÏNIQ×DãµçöÙ•³‡±¥µ1i?–³ÇYCy}óg–9]o kùx»mp)£×Øêä¨\~ BÝéÛ–üºÀ´PÉ»|ëì_èÊ<÷ßÝoODEL¼ßž!i«›6ªÓV‡òá $òúí°µ"×Âg8e·/Y²dš­®_yeå³&î’@ü»ýêQf j7*êò!`_>YëÇGr’[Ü]OØVð4JŸ"cÊúú”O\ß9åMHG´³|¨ó]RSûyã™ãw¼ÑªIÊåó4ø6ÞÎSp™Bª©­aP'ü5r13I&—ÚÚ‘ðu·­å € € € € € € € € € € €Àx0iu…m®’˜à8ÎÚªêúŶº¶r/)aÈÕ±{‡­®rÍåÖ:RaC"±Ó¸æ†£õ♳æÞ¹hÑVñ´·]ÚðU‰œ|Þ^SÂƬ1 R÷¦›Vn“î±Õ•ëãÄÃŽ>þ*[½,Êu´¦þ2'ºÅ»ö²8N.+u@6õ{vîþ¹Ôße;F+çËUÕuQ[½ åÑèò7;ŽNÚçgî[ÓÖx[Ðv©7tÔŽÝŸ‘WÈv[ rÎÞíL˜rßPò±¥µ±pYDBîú[_”Û²ÛÙ›¤F! H0ñDÛx7=¢aPï_~&`³ß¸äâI}g½ìojÇ‹7Høí¿¶%b:ä0¨cœz¿¶½««vÿʯÎ`eãí< fPøûì+ƒj²†3s9OyØEk ¹D§-@@@@@@@@@@(jd¢é @]g„Ö%Z©$¤÷•ñx‰µþ bKëΙ1{ÞC’¡X8Hñ€]fUûêæ›ì,è§«›¯‘ Çš@ƒÔúÔHYäoõË@õ©ä­N­n¸J;*Pµ7_Ò“ºl¦2îruÐU)ÏGãõU XPUU?3ZÓp³¬˜ùu9d9/35`W½ÕÖ­[õ¼D_Z‚#ÐæèÒúË+++³ œl»ªºöm:^'ÁÕÙË>—°ö•÷ñšL®|ÒÞÃ5Riµª$¤®=öÅ›µZ©ץvvíÿ³w/pvUõ¡ø×:g&É ¨<P°ÕµÒZ+Š dÒ!òP±>®ÖÖÛV{ûW‹mõ¶Þ[½Ö¿Õz«HpÌ0ŠÚÆúÀGƒ¯¿Ø Vò¯ ¯df¯ÿÚ!`2dÎ>3sf2ïîg:{ïõ[¯ï:çLð³g•‰u¿j¥ú,ïºyB™û’JVä×È¡Õ5öSxJèé©·òº|¬…mŶ¿˜Y›yf¶lº‰[Û̼®çwÌœý‡ —¬øÛ;úþø® âk¨â÷Y=ç<¿Vïx_Há¬ÜWõ[!…ër²vkÉÅ}+nM wÍÊOä„÷Sóò,h©FþLëõîE‹WÜ‘?·®Ì‹ú³"…ÛóêÞ“_ÏOŽ1R<>'³ÌÏ×Ã~]?6–”“b;÷ûQÉ “ø•¿ñ üW•+–CâœêOÔ6Cå¿UGÓœª˜}•ç­9?ëaÐdвN­–æ_CJ=¼6ãÌl5k_}>~/¦ ?oñdª®S‹<ã(¬Ü´ùë6¿¬«ÿ±×ÆåÈVö—w­Ü¾½CÒ @€ @€ @€ @€ @€@¸à‹ÍŸ;Í!óò³§ó*°Î¯(WL€À0ʨ,Z¾¸³3~;7Qù\v~vûY9)n}wèº;.Y¾¾HéKy7¡_ô‡âް½ÿ8³þ¬ZOÊq'æÇÎOÊÉP/É¿g¶4¼”úc*Þ¸±Ñ¸«¥øqtùå—ß·`Ѳ³;;Â7²QK;¨f§S³ý©9 í¦œ„veêO›úëÅm1Õ·ÄÅqZqL‘:Ž«ÕÒq9‘ðøü,ÿüü™9{(SÏkôáu»vJ­64VÞܽdÙGò®˜•;&æy”yYoêìš¶láâå EX¿=lûùç;özúéçÌ©Ïè8ªÞçæ¿çåùüöÀ˜×yÏœ‘cXöøuŒõù¡ë©B¸ùñ{'åkmÑÙKßb=ï’[}ä5:4ùY›ù'9 ðkyP?Ï»ëþWâÏë};¾­¶óŽbÆÓêáèwtïÑ)Ôž›ÏÝ6ŸT)=˜Bÿër\nÞ1†iÛ/ï:»ëà§lx|½Zé<ïòš_»r·j{­o¾Øu½×Í}¶˜“I×å6ªv×­L¾ßgã“ø¦dÐI¼¸ýÍ©OàùÕûð’Aóëkò6Âÿ™ÿñìÁ¦Ÿ?R–äo>øÓ¡|óAþ€Y6X{ÞO÷ï|è¾µÍcžX:U×é‰ãûNþ×ăUŠò°Êÿhç,sâ󌼽}ó#•ß®à @€ @€ @€ @€ @€ @€=Ö¯]ùÃîžåçÄz¼$ßoéYðœWð¤û†ZŒo9a£Vþ¿™»37ö|¶»êáóÝ)“ýrºßyë.[5¡wC,wøÌÉ…‹S¨_œ[K‚ÍÙó˜üëmy Þ–w|4‰lz ™ewæÿ×¢ånÒ]¿r’Ùú­[n~÷ž÷†r~oÿ#<¤>³Ü±óÈ–êÅØ•Gýçy Þ•§ŸsZ¶åÿÝ[l{sØ´iØ; nj4Ìs™Ÿç²)'·4äaæäÐ\çļ¾'>Zwh©‘9Cøßváœ+«ní^|nNôlžþ•7¶zEî粡Žóž;n~ã!‡3=ÏqÉPë¶+¾L‡.R\qùe¶”ßÓ®~µ³·À®Ý’,8£cæìüþ¿»wi{¯ò{ô³½kVž7wîÜú¡ezq“#†üÀ»c/ÿRß‹cr]ÔcÖΚãE!ç¬ {üy[ð•91¯éîœÜ·´Õ¹vtu¼v÷ö݃VéŃ6)˜ÊëÔ„eÅ«•b*“†Çìˆi×·j4í/ÿá“ ÚTH!É/P~]Ô¶}'¬¸ûïÂ_Ü÷é°aÛ5ᆾ[ÃC鉠“ùÍ @€ @€ @€ @€À åɗϕϕϕϕϕϕÏ9 0UÖ5V^Ó×_œœ?ün³9§ðË"¯]»æÂOŒYŸcÐQïšU×îèO/ÎF®ƒîöê¢L*ÌY…Ý»æÂ7†F£i>É^¹(çSêÎÅ; ÛE‘Â_ådÖß»²±òÖ²ƒ·Ö~”Í;Ke2èM›6õõ/Ír ¹r;*¤ô`~­¼1'‚^ÚŽæ´12õë×?лzåËóëá-ùŸ‰¿YkûªJEñ§ù=ú†\Z<å)O©ÎkŒA2èÊj´\Nü8ôoOÓ«Õ¦ w8½½«nO1~¡Yýü½ ó6Æ-n?^[Ö¬­ü¾ÿ¼bõªo6¤p*¯Ó $ãòvŠ·W,ÕŽ®Œic@~ WöS!´æš"0Ñî/ ¶õSáïïÿ\ØRlhÃ7^ @€ @€ @€ @€ÆX |Ψ|Þè]ù¹£òù#¦ŠÀúƪŠá%ùùþOåD¨¾Ñœwîãýqûó×­¹hõhö³¿Ú.“w>|oÞ!´ø›œkQ¹)S{Æ™~˜7$ûíÞËV¾¯=í=ÚÊÚËV~¹èO¿›×ìÆv¶»¯¶ò×0|!¿ô^²nÍ…¹g2ëÕW_ôPNÌûƾ꜇öȨ /…»Šþìž;nž[îð:}”ín»¿ïy9-mUNnÿÖÙ)]“@Oî]}ái½k.þî¾æçØµE€ÀÄ(¿‘ï/îûL¸£̾œgbÀ% @€ @€ @€ @€- ”Ï•Ï!}ü? ³j´\O &º@Þ%ô?󺻟û’œbw^~fû´ãS‡;¯œDõƒÓßß»å–K7mÚ4ª»ŽwŒ£Uoc£qWnûOÎèYþáÎZ|S-¦Sóž€/̞͓]š(ç—¦/…"üóMÿõã+6oÞ¼³yøÈK7n¼øþÜʲ=Kÿ¦£V{{~M¼.ÏáÀa·œƒs>ÀÕý±¸àŠ5«¾YÕÎÖâáÆ¡õ®…ýƒœ7*[°hÑ9ǯ]{ÉõUí V¾aÃ¥wç²×/\¸ìÝiz<·–ÂòœNóüÁâ[½Ÿ“«oʹ=WýÅ[ïºåߦÚû U§ñ·~íÊæq•?ï[¸xÙ 9¿êÌüš86¿È¿ßõ;„'§îËï‹[Sˆ·åí<‘×üê;ŠG6^Óh šÌY; U'ƒÆä›I¼0ªòˆ„OîËîÅ+>—?ü5Ÿeú÷œÙü¢æ1ã£t~ϲgL«×þ«ÙhrÆþ-9›¾rwÁfmŒç²…‹—ÿŸZŒ4Øó?úÒŽôÔuë.ìá–_7æ×ÅQƒ¶‘·â.¿a°òªûSi-Yþ©ü?hfR¤þ¼Ýýª%7k$eÝ‹—_—“AO¬òõÔ[l›±ç–ìƒÅŽôþI==Óž]ëz$gðÏñ~¹vÍ…³GÚ—úãOà·_ûÉüoÿ‘_ÿȈ›ÙÆaí/ÿüãpTCRþ, ïÚú©P~3Ÿƒ @€ @€ @€ @€#x~ç3Çæ¼)ç¼ þÈâHûÍú§<ã+£Ùü m¿ô#÷úÆ¥oy#ƒŽpü¼ôœúËüLÜùã¤Õ#ÌïŸó¿~É›ß_)b¼ ,X´ü×;;Âkòø^ž žR|Jþ}h¾®í5æü wÞAòŽœuM~Z÷_‹¸ý_¯X½ú–½b¦øÅé§Ÿ3gúÌŽWätÐWe£çÆžœó žœ=H“‹ÝžiïÍ1?Ê›7}'á;ÅÎðí&9 ›•ë TëšýÒœ,yr¨Åœ4œž—ÇxèÀ$×2Ç Ïë®üa~Wžë9qîËyçÙ«w'ÊØÚÝhN€=®£^››×âùy¿‘ž›çY¾ög<¡¯”¶å×þ–Ó–<×;²ËµEHWŒ×‘'Œß1Ø•\k×5ë4¿‡þ±wÍÊ·5‹™jeƒîp7Õ &ã|·…Gîf6Z éÈyóæuLÞ¬úâ‚üMƒ&ƒæTw„iaqFÊßðÄcaϲyÍAËEQ½Ýö[þÕëô+‹qÃóM-_OÝýÓŽè á£=—çO õæÿ«JþÇnÓ?Š£=Fí °¾øÈw%‚îz½ @€ @€ @€ @€˜”åÓ—Ï%½ºëÅ“r~&E€VöØïo÷ˆ¯åŸ¦ÅÙýÛvþòþûo¿gòæfì1ëž^uÕ%[sŸÛýóxksçÎí<昞ÔW³:úÃý·×·ß×lWÁÇ+õë×?»Ý¸ûçñ”I¢qæÌÙµ4­ëábÛÝ_l4ʹæ<Љ{äÄÕòèËŸ5{΢\¯£>iV_Gš•s“ê;LwîÞAuÏ0ç(bzÒÞÙôûMñö}ÜÒ·$ƒNâåÿr£qÿ¢ÅËû~³À^SÎ[kÏ>üð§å{7îu’\”ßwö¼6§Ì½`Ð)ÅðÚ\¶ÏdÐX«-´^Y½Û¸óЦ1…Ö©h§óΠꨟ#F=4ÔwõÓt0y¨ÿ_Ó…L:þT„UîŸo±›t˜&D€ @€ @€ @€ @€ÀãåsI¯œñ¼Eå#û×qB€) PìÞ¡òÎ)0×QŸâæÍ›wæŸ2ñkÂ&íN-E'ý±{½îÉ-†,ÐQ„§ Ø[ù‰mÔŠ ûyðÄɴ玷Çq¼¶’s×â–ªÁÅþŽçTÅLäòü ywÐ&G /éî^zÄÀˆ¼cꌼ5u÷Àû{]Çp鯷ïuoèÖièfû¥FÞּܴêxYU@;ÊS-üne;­·²Lkw\¶å9 @€ @€ @€ @€ Ð>ò¹¤òù$ @€v¤PVU;±ˆ7UÅLµr;ƒNúOß!.l6ÍX«Ÿ’Ë¿Ð,f"—=Xl»ä€ÚÌÿwtœ¾¯yÄ|„ÎúY¹ì{–ú”§žífíyoàyhžh:°Â ×ÖiPšqTcq]¨øÚürš—‡üþÑö®~švÓ_į7 PH€À¤¸f{þ˜r @€ @€ @€ @€ @`Êç“^4ý٣в&  @€ÆJ {ñòÏ„nýá§}Eøé]µm×_Óhl«þï§–^”ó¶¿Ü×ɃiÛµûº?•ïÙt’¯~ ñkUSÌo›WUÅLäò/6÷æ>¯h6‡ÃfcíœfuB ?º¼qáæ¦1-Z§¡ösغ⑟äu¿«é0bxI¹«lÓ˜Îëé90#ÿÑüH!Ý_ŸßiB/ŸÁ @€ˆ±Ümóƒs¤ßÍMOîéé µðæÁûß=̰k“´ª°)W^™O4åD&Ù„óN›}yJ߬šVNË|Ç‚ ªŠiy¹ãf¬…œ úÄ£Ì$Ïã8,g±ýf>;®]‰ »{*ò‰+ŸØëžwâÂǯRìyü|_')}~ݺ‹îÜWÑpîY§á¨íŸ:½iÛ¿¥¶4ë=ÅøæSzzn3ܲ… —š?¸ÏkZ?¥;껢iŒB&¥@Þu{RÎˤ @€ @€ @€ @€ØÿžOÚÿk` @€‘ ¤Ÿ5k£LïªÏè<½YÌHÊŽ¨wýiÞìïЊ6ÊMü.«ˆ™’Å’A§À²§"üßêiÆ93~kuÜÈ"º®¿='{Ωj%Å¥U1C-ïKÅMë¤xêüùógÎëéÉ»¨ÆW4‹-BhÞV³Êƒ”Y§A`ÆÛíF£?¤´¶Ù°rVóìYõ®w4‹nYì ÿ½j§ßœ ¶nÆKïnê @€ @€ @€ @€ @€ @€L>¢Ÿ¯šU-Æ÷,XðÚ¼á_{îÅËßœAÿª²Õ]›ø­¼§2n H‹ÞÛXÙ›Ró¬í’!ÖâûÎì9÷ÄÑ"yÕ«–Û~gUûy¬)ôkªâ†Z¾¾±ê†¼aÚ׫—øfvΘsêœÚŒÓòùôÁâò·Ýyó ?®üà¬þ`÷­Ó`2ãï~*ª¿] ïrûÎvï¶{úéçÌɉ X%’Ç÷©ªå @€ @€ @€ @€ @€ @€SKà‘´­Ì‰Êûä59b<º£kÚåóæÍ›Ñ$jHE /=7o:úU•rZY_>X7UË%ƒN•/ß ®žj< ^«îÑ1«£‡:$~©E‹Î9¾{ñòOάϼ9'‚¾'·ÕQU?ç¾]³¶xäUqS¹¼q*ãL¦¹_Óhl[xv΢®ÅVÎ+Æ3=ìè¯ÌŸÿÚ37l¸´2´ª½î³W¼'Ǽ³*®,ÏŸ#_ѸøG­Ä'fS£ñ`þ iÄ^¿ïúqNN¸kvlÍñY§fôã«lÝš‹Vç×Ò›ò£¦ÌrÆýßuöŠ›/¿ìˆ;ƒ…‹W,Ï/Ë¿ªªŸÿ k$ýTµ¯œ @€ @€ @€ @€ @€ @`b ô®¹ø[9'æÓyole&9§å•¡_™ëü4Äx}N»©üI1ÜŠpgеÃj¡8*—•cóïptÞTðyù¼õ,óæEˆËB£Ñßʘ¦jŒdÐ!®|Ny>|Ñâåÿ}ˆÕF-üÆ~ü±Í›7ïl¥ƒu—]ôÝ‹W¼<';žUãouÎìüÖž¥o]×Xõ¥Êø}œuÖY³kÓþ¹¿vP,AÃag_¹«á¨ý±¸ #Ô_?œNŠ¢¸`8õ†RÇ: EkÿÆöýo¯×ê?¨øv‚Z-¦Õ /ÿ­[n~ï¦M›úZõܹs;Ÿ~ü‰—ã[J¦©¯-»¶:>q @€ @€ @€ @€ @€ @€O í¦…SrÞ×Ó[}ΟyVŽ}VNú̹ž1ç|æ£þXíÖó>«ñøï”ú‹P¼åò5«~öø='û ºO–Áoæ—éÓò+õïÛ’ƒŽ=öS¡ÅdÐrdÅö_¾¾>}ÖóòîU#ÍoÐgÆzýꜵýž¾ôžõk/ú÷ª:eù)==ÏŠ]œë¿3[ÜJ]1EzCoï%7µ?ÌÀ+V¯úZÞžøúüsüPšÈɪÛw<ÔÉPê 7Ö: Wnlëå]b¯Ë Öån»Ò¬çü^(ÿÌýù!‡ýÒ…=Ëßµ®±òšfñeÙ™K–þN=Õ?”O_Rûhyñ‘ÞË.þFk±¢ @€ @€ @€ @€ @€ @€¦ªÀºu+ï9£gùK§Õã—sÎËsö—CÞ¸ñ¶T¤sòf†_Ý_c˜HýJH«Õ†±^~ùå÷-X´ììÎŽðœ:­•&sÛ©ñÔî%ËoÊɰW¦þ´©¿^ÜS}KÜQ<§Ç©ã¸Z-R™`ççÌ·Ù­´ýXL‘Ò‡s‚Üç»ýßé³yœJ?1¦+®ºê’­C©3ÜXë4\¹±¯×·mëû;fÎ^¼+Q¼¢ûóÒXßÌ Ö?H)}&_ÿhgJ7Þ¹ãÈ¢óˆ¢VzÞFô×ò²7æ²ç>ú æâœ¨ü­›n¸îÝÕ‘" @€ @€ @€ @€ @€ @€„pecå­§õô¼¬«6ó 9ìcm’sk¾¸­Ø¶lc£q×X÷=Qû“ :QWnã.wø\töÒÅ)Ô/ÎoÔ™­6•“ӎɱo‹õø¶Žrßœö¦—[øÖB½YþÌŸÿÚ'ÕgN{A-…¹¡–^CxfÞÏsvñàÒ¬cÎOL÷§oË¿oËåy÷Ϙ§ÛrþØÍ}Ûïûòúõëé˜ÔT@2¨WBذaÃÙὧõôüÃŒZן×büoùzFÛiR¸«ˆáC[ï¸ùÿlÚ´©¯íí±Á7nï^¼ü’œ´÷ö¦USúæº5+Ú4f §ê:m[»¸bõê[rƒÝÝ=KOµÚÇr‚õ3ÚÖAù Å;ò7 ¬n[›"@€ @€ @€ @€ @€ @€ ° .½;ß¾z÷Ï"Nêé™öãFcÇ ÜÉ £Â:1ݽËàŸœÑ³üõø¦ZL§æLíæd¶úfT¤”¾ŠðÏ7ýׯؼyóδ5 U‹ B¨7MM!ä˜ñsLÍu?þ­Ž¤·±êª“{zþåÈZ×›ò·ôÄNÎuk­Öß+.…ïç7ÒǶÞyó%9‘ú‘½Ê\ @€ @€ @€ @€ @€ @€ØAÇ=ï¼ê 0¸Àé§Ÿ3gúÌŽWätÐWå¤ÈçÆžœ·ñ}rˆáàµR ÛCL÷春˜¾‹ðbgøöºuÝ90Öu{¬S{=G£µ×ôô>=Î8+o=?'X›­Ï ¢y[콜<Ê-±óûì›y{ìoìì ÿ¶~íÊïíåjª üök?™?’Gv|ý##nbd‡µ¿üóWŒÃQ mH¯Üò®¡UM€ @€ @€ @€ @€!|é° !z|„žòŒ¯ì—¼ô#Tÿ—¾eäì—Ù·§Ó—žóO™­=¿=­íßVò3Äçý’7¿ÿŽBï @€ÀD°3èD_ÁQÿUW]²5wñ¹Ý?÷6wîÜÎcŽ9áI}õ0«£?Ü{}û}×4Ûp2¦ÖiL¹‡ÕÙç;rÅOîþÙÕÆ¼yófÌ>üðÃCÿôCc-=Ð÷ðÎ{7ÌèÛþau¢ @€ @€ @€ @€ @€ @€“R@2è¤\ÖÑŸÔæÍ›wæŸÛsOåcœ X§qº0»‡µiÓ¦Gòé»vßõ‹ @€ @€ @€ @€ @€ @€{ Ôö¾tE€ @€ @€ @€ @€ @€ @€ 0ž$ƒŽ§Õ0 @€ @€ @€ @€ @€ @€ @€ÀÉ @\ @€ @€ @€ @€ @€ @€ @€Æ“€dÐñ´ÆB€ @€ @€ @€ @€ @€ @€ tˆK @€ @€ @€ @€ @€ @€ @€Àx :žVÃX @€ @€ @€ @€ @€ @€ @€$ƒqI€ @€ @€ @€ @€ @€ @€O’AÇÓj  @€ @€ @€ @€ @€ @€ @`€€dÐ .  @€ @€ @€ @€ @€ @€ @€ãI@2èxZ c!@€ @€ @€ @€ @€ @€ @€  :Ä% @€ @€ @€ @€ @€ @€ @`< HO«a, @€ @€ @€ @€ @€ @€ @€’A€¸$@€ @€ @€ @€ @€ @€ @€Œ'É ãi5Œ… @€ @€ @€ @€ @€ @€ 0@@2è— @€ @€ @€ @€ @€ @€ @€ñ$ t<­†± @€ @€ @€ @€ @€ @€ @€Hâ’ @€ @€ @€ @€ @€ @€ 0ž$ƒŽ§Õ0 @€ @€ @€ @€ @€ @€ @€ÀÉ @\ @€ @€ @€ @€ @€ @€ @€Æ“€dÐñ´ÆB€ @€ @€ @€ @€ @€ @€ tˆK @€ @€ @€ @€ @€ @€ @€ÀxèOƒ1 @ Z ¥t~³¨WüFüݧ–^Þ,F @€ @€ @€ @€ @€ @€ÀÄ :qÖÊH  @€À.o®~ëû›Q|ã#ŸøËƒdÐfHÊ @€ @€ @€ @€ @€ @€H 6Æj¨ @€ @€ @€ @€ @€ @€ @€)' tÊ-¹  @€ @€ @€ @€ @€ @€ @€I@2èDZ-c%@€sŸÄÂ@IDAT @€ @€ @€ @€ @€ @€¦œ€dÐ)·ä&L€ @€ @€ @€ @€ @€ @€L$É iµŒ• @€ @€ @€ @€ @€ @€˜r’A§Ü’›0 @€ @€ @€ @€ @€ @€ 0‘$ƒN¤Õ2V @€ @€ @€ @€ @€ @€ @`Ê HrKn @€ @€ @€ @€ @€ @€ @€ÀD :‘VËX  @€ @€ @€ @€ @€ @€ @€)' tÊ-¹  @€“U †8Y§f^ @€ @€ @€ @€ìgÏ'íçÐ= @€À” :å_ @€É"03NŸ,S1 @€ @€ @€ @€Æ™€ç“ÆÙ‚ @€À”è˜r36a @€À$8¢~h¸¡ïÖI:;Ó"@€ @€ @€ @€ @` ”Ï'9Zxý©©iðÏo›þõá«Mƒ @€ØC@2èN  @€Yà„Σ%ƒNä4v @€ @€ @€ @€ãX |>ÉѺÀy¯®ˆMaS<â­ï¯ˆRL€ @àqÚãgN @€&´ÀÉÓOœÐã7x @€ @€ @€ @€Ư€ç“ÆïÚ @€ÀÔ :5ÖÙ,  @€) ð‚iLJÃjs¦ÀLM‘ @€ @€ @€ @€±(ŸK*ŸOr @€ °ÿ$ƒî?{= @€Ú*Pµ°ôÀW´µM @€ @€ @€ @€ @ |.©|>ÉA€ @€Àþð/òýg¯g @€@ÛNñ¢ðüÎg¶½]  @€ @€ @€ @€ 05Êç‘Êç’ @€ì_É û×_ï @€¶ ÄÃ{g/ ‡×ik»#@€ @€ @€ @€ @`ê ”Ï!•Ï#•Ï%9 @€Ø¿’A÷¯¿Þ  @€m˜U; |pö$„¶]Vƒ @€ @€ @€ @€¦Ž@™Z>‡T>ä @€ @`ÿ HÝÿk` @€¶ Õñ”ðñCþ0<¿ó™mo[ƒ @€ @€ @€ @€Lnò¹£òù£ò9$ @€ãC c| Ã( @€Ú-P~#߇æ¼)|ñ‘ï†U~%l)¶¶» í @€ @€ @€ @€ 0‰«Í K|E8uÆ‹BŒqÍÌT @€L|É  Í€ 0¨@ù?ȾºëÅá•3^®Ýq}¸fûuá';o·÷ßNÛCÊÿç @€ @€ @€ @€ @`ê ÄÃÌ8=Q?4œÐyt8yú‰áÓŽõX›zfL€ @`H‹dˆ @€‘ ”ÿ틦?{×ÏHÛRŸ @€ @€ @€ @€ @€[_Û2¶Þz#@€ @€ @€ @€ @€ @€ @€ I@2è¸ @€ @€ @€ @€ @€ @€ @€ÆV@2èØzë @€ @€ @€ @€ @€ @€ 0$É CâL€ @€ @€ @€ @€ @€ @€[É cë­7 @€ @€ @€ @€ @€ @€ @€À$ƒ‰K0 @€ @€ @€ @€ @€ @€ @`l$ƒŽ­·Þ @€ @€ @€ @€ @€ @€L&ɘŸ4ç´¯×\Ü×M÷ŸãsXFE€ @€ @€ @€ @€ @€ @€ÀþX´èœcSgýå!ÅÄž”b:$¤phÛ!!ÆCJÛR ÷Å·¦¶†|žïÝcüÖÎþþ¯®o¬úÏý=}ô_ë^|î‹Sª½¸V‹/J)üZ÷¡1…ÙÎ)ü2×¹5ÏëÖ<×[CHßÝVlkll4îÚG[ãùV\°hÙÜÎŽðšjÏÊs|J é°¼ŽOÉszr ±/>¯UÚ’Ï·ä¹ÞbqMØ‘>ßÛ»êöñ<1c#0Õ$ƒNµ7_ @€ @€ @€ @€ @€ @€ƒ¼¦§çðé±ë59ñs^q^N»£WäA/‹!>íÑa–SØ=œ_ý>8ß98_ž˜ç•øº®Z×Gó\¾BqɽwÜÒÈóxdw­Ç•íwÖjW>~cÀIN¸ÜÚ»æÂ¸ÝÖËyóæuÌ9ì¨y~g䉽&ÿ°²ƒÇ¦¶ëìÑ9•÷ê¹è¨|ou®—:SZ´dŵ)¤+SJkÖ­¹è'm¤Æ²€dÐ!“©@€ @€ @€ @€ @€ @€ @`r twŸû´ÐYwN üƒœ9½³{41ž]‹õ³9ü˜¿ê^²ìƒ7]ÝÊÍ›7ïlg?ÍÚZ°`É‘]Ó?ç¶<ÇÕšÅV–w:-s±^«¿:Ïã½Ýg¯xcïeþÛžñ9éuZNÀÄ´)ïžù‰ÓN;wÖhŒ{(mž¹déo-Z¼üµX[›×qD‰ ûè·–íÎ+ »Ï^þWózzÜGŒ[Œ²@[>¼FyŒš'@€ @€ @€ @€ @€ @€ @ Í‹Î^öšzšþ9yð¿vèÀ¡ç'©Õã¿t÷,ÿýeí¼.“C ÿ˜ç7³í–me·r÷Ì·tÜñ½2©¶Ýí·Ú^9ÇŽPÿFÐoµZg8q¥a¬Å÷RëúNw÷9Ç § u¾€dÐáÛ©I€ @€ @€ @€ @€ @€ @`B t/Y¶,ÅxÅh$IdFÞ§ó²…‹W¼~uZ ­•»v–É‹­Vn\¹ÛiGšö/gô,êpÛN½¹sçvv/^váXÌqÏñåØbgç·º/}Áž÷ 0º’AG×Wë @€ @€ @€ @€ @€ @€Æ•ÀÂ%+ÞR¼0'õuìïå1”Ûk~tÁ‚%G¶k,'õôL[´xÅêÜÞ[ÚÕfe;17­þ%Ö:ލŒmCÀ)==süI_ˆ±¶¼ Í ½‰¡öÕrwÙ¡WVƒáì÷ìá Z @€ @€ @€ @€ @€ @€†.Ðݳôôœ|ùáP¦`¶x¤nÏÉ£ß)]—BøYLáþŠk±ÖWÄ4+„ÚÁµK1ÌÍM¾(†xL‹Mï ËC9¨cæ´æ‹ž¡Ô$6>»>óŠ\öêAÊ÷y;¥pK)×K?Ës¼µ–çX„xx¬…§†~=ñŒâû¬¼ûfNk}Vv¸$Ç5 qY™ì:«6óK¹— ¥±<¯ÛbH_ÈsÝŠpg½Ø’Û¨‡¢~D-Gæaÿz^Ëy®³[j7Æs›W,ìYþ²u•×´TGà :l:  @€ @€ @€ @€ @€ @€L2‰0ÔëieÄ)91ò¢ŠOõ^vñ7s|ÝÚ±hÑŠ“RGzsN ]ž n¥VŽýý‹–ÿúúµ+ØJü`1 ¯x]Npl=4¥+Rêÿ_Usœ?þÌÎæÌωžççöŸ3Xÿy‡VVÞiY¸ƒ4ô¬Zׇr+-'‚敼¼¯¯øÀúµýû M>~{îܹGwÒ)õß•×nÞãƒœäØŽPO—ž~ú9¿qÕU—l$ÌmÚ ´ ˆš @€ @€ @€ @€ @€ @€ 0ÞžUïzgNd<¾rœ)ý<„â÷{׬º¶2vk×^øã|û¯ééùàŒúÌÎ}ž¾°'ÜꨇßÏ7‡ zJOÏÁ1¦¿megμÛé/r’äÛ×­YYî"Zylذáá´&'Ô®{NmÆŸ…X{O¾žQY±ÍÝ=+Ê;ÿ¨¥fSøQ }çõ®¹ø»-Åç Í›7ïÌ?óéÆœX»¼ÂÿÎI¡OnV¿Ü vúAŸÎ1ÝÍâ” 02É #óS› 0!úS®Ýq}¸fûuá';o·÷ßNÛó×3µüMbžI€ @€ @€ @€ @€@k9i#ÌŒÓÃõCà G‡“§Ÿ^0íøP9åÃA€“R`Á‚%GÖRxOþÐôÈ‚þ´oÛŽ—¯_¿ú¶¦-~¾Ñ¸#‡±hÉŠOäßo©¬w%ƒ¾¯2n€ƒj]ïÏãž2Hñ·ÓéßvÊîñíq¿úôÇÆŽœéú…=Ëÿ%ÖâçsbæìêZí‰8sÉ’£b ´ÔZJ½÷ÛVlj4l)~AëÖ\¸²»{é—Bgí+yÐöòø­ì¾páÙË޶>þøM'´UÀ¿ÔÛÊ©1 @€ÀøÈÿ1¾°í;aÅÝþâ¾O‡ Û® 7ôÝJH_Ke4 @€ @€ @€ @€ÆT ü"ùò9¢òy¢ò¹¢òù¢ò9£òy£ò¹#L>ήÎ7†l:³”vEßâv$‚îÙÏÿì{wþëR™\Z&.Ztα{Ömõ|áâeÏÍy®o«ŽOßÛñÐÎyÃIݳíu•×ôõ¥ßËSïÞóþhž×ÓôÈɼ‡Tõ‘ÿ”jíš•¿?’DÐÇúèí]u{ß¶/!•»½6=jµøÁÓN;wVÓ … [@2è°éT$@€Œoû‹‡ÂŸmýTøûû?¶[Ç÷`Ž @€ @€ @€ @€ý.P>gT>oô®üÜQùü‘ƒ&—@ µ3«f”“ÿîòÆ%߯ŠjùÆß‹þw´R/ÕjMw ¬¼3å?ädÒŽÁÊwÝOáÞœîzú† —¶%sýÚ•ßëEéZ4í· …9Iöø¼ 邪¦rrê¿Ü»å¦2)¶mßî°~ý¥[ŠíeâkØÒ¼ÿ8kæ¬ú›Ç(%@`¸’A‡+§ @` ÜÒwgxÛ½ ?Øù³qT>‡ä @€É!0¿gùÑ9‘ðMg“ÒƒÛêûHÓ˜®½lÕÚ¼ûôO+›¨ÇÃ+ct/>÷%94ï^ÙüH1ýa¹Óe󨡕^±zÕ7C‘þ×Ðj =:utüq®Õ4¬Ü}uû}¿¿iÓ¦¾¡÷мƺuÝ™zßÒ<*g ÆðŽÐÓS¯ŠSN€ÀК~ ½95 @€ö·@ù|qßgÂý÷îï¡èŸ @€ @€ @€ @€ *P>T>‡d‡Ð º€†M€ÓjÕ;J†?sÕU—lPµ­—1„êdÐ":äNc­zÇÌ6ô®^yÉÛn¡ÂÃÜõ—9 òÇ-„+äÔžžCBˆ¯«ªœŠâüÑ\ÜÐ{yÞoô+ÍÆ‘wh=æ¬8sQ³e O@2èðÜÔ"@€ŒKümIá¯ï[%t\®ŽA @€ @€ @€ @€˜XeBhùj)5MzXAÏÒãB '¼¿çõ®DÉÆÊ¯ïy¯Ýçw7²ë}ín·l/墨°ªÝ¼+è'B£Ñ_7ÒòÞÆ…Wæ×ÎíÍÚ‰)œ<¯§çÀf1ʺ@ÇЫ¨A€ 0úó×ì¬zð+ãqhÆD€ @€ @€ @€ @€À(ŸKz匆z´Ñ^FC'@`ª ä$ÁËC¸13”?OxàôU¯ZVîÆ™7îå#ÆÊdÐ"†Î¡Œ¢³V[PSú¿U1#-¿¦ÑضhÉŠ•¹wŒ´­=ë—k““LŸŸwmzô¥˜—xL޼D»^CKí-Æúìú´ßÌåOx­ ZG•þ5^I$€ 01®Ýq}ØRlƒ5J @€ @€ @€ @€&Œ@ù\Rù|’ƒ&¯ÀÕW_ôÐXÌ.ÅþmUýäDÃ!å;å]?Ϭj³¯èÿBUL;Ê‹þ¢·íìÙÆ‡ßŒ1Vm¸yCcåÍ{ÖÍó¼3he’g=Õ~{4Ç mSQ êƒ`*š˜3׿µäç7à{>~÷G4‹á¼W7/W:1®Ù~ÝĸQ @€ @€ @€ @€ 0îÊç“^4ýÙã~œH€ã\ Õ§…Š.ó ê­Î┞žƒssI‡é¡+ÿG«mŽ$®û/¯]³SNÞ¬že‹Åz¬˜_)¥o±¹¶„åJÿ½z†ÕãnË`4B` HB‹mª 09òüe³™lúaåO³ã¼Wçï¿qL:Ÿì³/ó™tv&D€ @€ @€ @€ @€@sÏ'5÷QJ€ƒ Ì;·óècOüZ=œ‘B¬ÜÅsð–žXrPmúÓCŒM“Gó“ÓßË5‹'Önÿõë×?нdÅæ–ŸÓ®Öc /­J ÍÙ§7´«¿VÚ¹£xägGÔºš&½æµ>9·Uîò:&ö­Œ[ ‰. t¢¯ ñ @€v ÜÞ  @€ @€ @€ @€ŒŠ€ç“F…U£˜Œµ=KíŒáy9ð%9[ð伃äÜ<ÑådÛ¶]æn¹ZЇW5šw±Ü¼;|L~ÅPîÒÛ– šbxA•[­èÓdÐkm9éõö zä`¨yÝZ°àµO^¿þÒ-ƒÅ¸O€ÀÐ$ƒÍK4 @`Ü <œ¶Û± @€ @€ @€ @€[ÀóI{ýŒží˜?ÿµOªÏè<6ïÊùÌz,ŽÍ»@>;'@ž”!OÈ÷ºë¯*‰ñ±¸áþ.B<¼Üz²éC™´8fG*ÂÏcå ZNΩŒ‡TF×jŸÉÉ™Ÿ¬Œkg@J‡æµnÚb½«£»dЦJ ´. ´u+‘ @€q-B×ã38 @€ @€ @€ @€&®€ç“&îÚ9†+0¯§çÀ9¡ëùyçÍgÅZ:.¦Úq!¦ãRŠÇ•»>þªÝÚ›s6OüUöœÕb8¢²¥"ý²2¦­eíq8묳ÎCk!µ4ÎiOC€¨H-[*bš3„… P! ´H1 @€ @€ @€ @€ @€ @€É.0wîÜÎcŽ}îi¡–ÎÊy~/Î 'ä9ïNDÌ©†»² ó>•cžu8¸|BåΠ±ï¼…Q(‰±mɧýÍ©ÂǪÉz :VØú™’A§Ä2›$ @€ @€ @€ @€ @€ @€' ,\¼ì„œâùöœì¹8çyÚ®]-ŸØSûïäñ^ÙjѶäÌʾr@ÞMû—Ù³•Ðʘz¨R4žêqb<ÛÛ” :%—ݤ  @€ @€ @€ @€ @€ @€¦²Ài==Oîªu½?üAŒqBæå¤Ëêְ¨ÕwVÅ´³¼–ÂCmÊ õ˜òΚíI,mç[m+vmÕJV&äu+C€ @€ @€ @€ @€ @€ @€ÀÎ:{ÅkëµôÉœh8뉥m¾“RŠá«yËÌKSˆ;j1\Ø®r»;«R%ë©ÿ võ×J;ý¡v`½•ÀbR Uͯ…fö_H­6mÿu®g“O@2èä[S3"@€ @€ @€ @€ @€ @€ °Oî³WüÓr"èèäæäÏÜöu)¤¯¦¾ü@±mÓ—_–ƒYtö²ù!Öö9®aÝLéÞªi±>ú ¯{ ¾Sî¯=´y…Ø£é‰wZ;&Þ ˜Àø :~ׯÈ @€ @€ @€ @€ @€ @€´M`Ñ’ͽ£]ÉŠyçÊrbéOrêãORJß©øîƒ÷Õ¾õÕ+jÛ ›7toóâj¡Lλ#[´­¿‹{ch¾ÏhN¸½>ô§wÝ [ï©Vôÿ õh‘T H­RN€ @€ @€ @€ @€ @€ @`‚ ,\¼¬'O!'‚ýÈIŸÛcL×åš? )ü°(Ší µë®l¬¼uh­¥iC‹oBÌÉ’ÍÛ—œÙ¼§GKËþªÆÔJ;eÌÎ"nÖ<4äõ˜ÑÛX¹®Õ6Å 0q$ƒNܵ3r @€ @€ @€ @€ @€ @€•gô,j-†ª Ü# '€^CqE éÊÞbûB£Ñ¿Gñ°NS¬ÍiW¢ä®ÄpOå@R<¶2¦y~Gµ«¹má‘{§…™M›‹!9oÞ¼ŽM›6õ5 TH€À„ :á—Ð @€ @€ @€ @€ @€ @`< |ú ÍGóóÛüßZò‰¦ùPß\ýÖó›·¢”­ äÝ%?BœÓR¾ÖWô½íŠÆÅ?j)~(A)bÓ¿\k­6Y+Šÿ µæáyGÓçµÚ^›â~£Mí„/7÷/Z¼¼?£ ¾?h.›}øáOË}ÞØ®~µC€Àø :>×Ũ @€ @€ @€ @€ @€ @`‚ \ðÅæ‰NyZór.Ô¼Šé_Q®˜- ”»‚Æ„ʦRÞÛ{Ù…ÓRÃÊ!RU-ïHÚ<»sL|ë€Ô•réà³K᤹sçvnÞ¼yçUGåô¤žži!÷׊u‹Hyr[òäŽlû;ž“Ëol£Œ‰/Ðò‡ãÄŸª @€ @€ @€ @€ @€ @€˜ZyWÐ74ÝYr7GJéÿÍDв›œØX¹;iÞÉsðÄÎK÷ÅFãÞ|ë?ÜÞû2ÆiO}úI'ì}st®žYtåDÐ8­½­§oWµkõSªb” 0ñ$ƒNü54 @€ @€ @€ @€ @€ @€ûÈ»‚¾~Ÿ{ÞLáŽm÷ßõ§{ÞóÒÓ«ÚÍ[a1ß)~³ªÍzg8­*¦幟Wµ£=ÛH!~mÏë}çìÙ¶÷»¯~Ü#@`ÿ tìßîõN€ @€ @€ @€ @€ @€ @€Àh,X°à ¼Så3ªÚN)üÃÆ·WŤüäžž®<–—Uµ1”Awµ‹¯‡P;¯Y»1Ä×åò¿kÓŽ²œx›wamGK¿j£gñµZgE~l ¿¶`Á’#ׯ_}Û¯jŽÞÙYg5»>cÖÇRH·ÄP»1ÅM;Sºñþ»~qÓ¦M›½žµL`j HÚëoö @€ @€ @€ @€ @€ @€“T ³sÎÑ-M-ö}¹¥¸g¼Úd­¶kÑ9ìèÔ½dÅyØó:ÞøàÖtÞÕW_ôP»úÕ©.P‘>ÕyÌŸ @€ @€ @€ @€ @€ @€ÀÄèïH-%ƒn0Ý0ê3¬Õ^ÓbÓZŒÛvùå—ß—O.­ªSï¨ýqUÌHÊc-ŽJûy§Í¾<®oV-§e¾c×N°U#,?í´sgÅZÈÉ OpO€ÀôƒN£Q @€ @€ @€ @€ @€ @€”JàŽ;n:büf³¢R ñü—-;÷ïšõ;…ölíÆÍWWªÕ/¥±ûNás)o^pJýGt:züOBˆOŽx4æmà¬ôñíV]5f‡&×]±i]ÏüÞo§bÿm“®ÓÖ¼¿VˆõxÝ„̲Ë-^z×êÕoÀ[<›¸öŠ- ¡ò¾æ=“~Œ7~¡vã÷'ÒWNM@ôÔ¼ô&@€ @€ @€ @€ @€ @€ Ð1õ,ìšX±•÷­]¿ùò‰õ¿×å—o^´vã–/f!ûpê5‰ìR|áø£·ìÚuóÃ1†ÿRÜëÙÖôÍsRXuϺ ›o»lã¦ßOO kMoÚìYwÅæ7¤þ{C¥zkZßoLdžéì³ã–þ{ZßÎ ™e¯íßûÍüí¬ê?F§üí©k7lù»¬&BMµ=N ]=ÆP 0 =Ó0†! @€ @€ @€ @€ @€ @€(¡ÀÐpøüœj¸6•6·iy•pK ÿ}zh`ðCy°²iÿ.ßpåòô¦Ìw¦·nnÉBX4¢é”n³Î ë×WC­6|J|®óömý×§·’®LaÍ5ú|–­í Õµë6ly$†x{ªýÁz ÓJOëyq–Å%!fËBW§ï'½®çk‰YŠLNáªþòíÕ¾3^²ìå͆ɯYµzçÚ`ŠÜuÛ ém¦Í¯7¬_ÿÂ3²yš>ÿ¾ä0ñpn=¾cûö›~Ú|=˜Œ€0èdÔ|† @€ @€ @€ @€ @€ @€@ì®õX»ñÊOd¡Òô™)ü—gÞÕ;oΕ—oØüßB=ì ?þb­öÈè¥^zéÛVçöœ]íÉ.HÊ«RôwG÷ý}LW Tþ,õ={tÛÉﳬº:ÌûõÝ!8ùìÔnâÀ/»bÞ ÏÚêšø[1³ð’ }g>U%}°q¥ožùþW6šGÜ¥0éŽ4F³·«N*äúü4;wî<¼fÝ•Wôö„¯§@èœçŸý™öueoo¶2…dšê»=Ç}ÃÕúÃY¬>š¯?•Í©¿´{~«R‰¿bX–¼:íÑ‚¢1G·ÕcüøŽZÿ­£Ÿûžé>K# @€ @€ @€ @€ @€ @€(ÀÃÇ>rfu~þÆÎ_›PqY6¯ÂûC5¼^˜ÖmØ<‚?NaÇGS˜pqqi "ž>¡±žë”>û‹P¯ÿa¨T~#õEŸ­fÙE©}²aаgÏžÁÕ«W¿¥wþÂ=)ùEsM_[üüxÇ¢—¼4Y\1,hPSþ†ÏuWlÚCõÆÚœ?¡¥Niï^šþxOVÍÞ“Þ†š?¡/ít¨„êó׿™×ÔÿW¯´·»=zà/õ©ï˜nü­. @€ @€ @€ @€ @€ @€f©À¾ZíHˆÃ«Ó{9ŸšÔS84ÏMÁÊ×çžr4Æÿ}|8œ·½¶õކ¿Û¬†ôfÎÍú4kß½{÷Ñ¡ÃoNAů5ë;ÕöäúÙÛnîßôÔSO5}ëgâ”ày½·Ý²ug²üýôªÕ‡§ZÿT>ŸÖþ©íÃk÷íÛ74•q|–æ Íô @€ @€ @€ @€ @€ @€ ÐÑÛ·m½7‹qmZĉ\H½Ç·×þýíµþ‡òyª|?ýQ/®!N9 š¿k×®§¶ßÜÿúã‡~Y<çdZãÓ±^ÿóíÛ>÷ŽôéúYgÕ<«•…i ƒæÕæ{z|8¾&2ïLõSùL2±¯Kkg¨Õš†`§2—Ï ð¬@óÁ"@€ @€ @€ @€ @€ @€ @ ãn»¥ÿ+õáøºüI«“ÞXù¥‡.Ú±ís׌ Þyç O§`æ×‹æOo =ç²›^[ÔçÚâömýÿs ~ôwBˆ·œÂç »¦7ŽîJo;=wû-7|,uLËMïé~QOá‡òŽ1›¶0h>W²=qô‰ô†ÐúߤÁ4›zÚã÷b=üîö[úÿzzÆ3 ˆ’> @€ @€ @€ @€ @€ @€fÀŽZÿþ'‡^™b‰[ó—;Nû’b¼3…@/Þ~óçÞ´}Ûw5~z-ègÆz>òY5Vÿbä÷S½ÿb­öÈm7÷o8q"¾2£eZúý§ýS‘ @€ @€ @€ @€ @€ @€v ìÙsã“iî+׬ßô7=•ÊŸd!üaȲÓ']OŒRªôÎá¬þ™/lÛúf㪭-ªÎûdÙiãõͲ°fݺ·-»í¶›¯Ïdžïº­ÿ{ésù×__¾áÊå•]²ð›éÝžKÒŸ/yæÏ^³p8¹<CöpzãæÏÒ›@ï|¤~lÏþZmÜ0gå´Ø< šÅŸO¦î‰|fO­öXê÷go^¿ù㽕ì]•,®LëyuÚÛêD>?NŸ”/_õð¿~úÿ~ð…{î¹çÄ8ý<&@ Å -6< @€ @€ @€ @€ @€ @€2 ìªmý§T×{׬Yóʼ¿W‰áâPÉ.Jo }eâÑ! JÉÇRHò±þüI ~e¨^¿ó¹q&¼Ä}µÚ‘ÔyòáÓ ÏTÜqǶò·ƒžúBǶ{OO>ÅW ‹;L½õöZÿCi”kò¯K/}Û¾ù=+Rô?¤=û×i__œB¸/Nu¾pôLé=±ƒ!‹O¤>ßYüVVߪŸÿgÇŽZ`]ƒï _@t|- @€ @€ @€ @€ @€ @€f½À®]»žJ‹ÜóÜ×Éõ¦è ²ùóTâœyGë¿Ø[«J)SèK žÅUÆjù,f-ƒŽœîŽ;nÊ÷ìÖç¾N6]pÁ½/}éò UÃ=ÃáɃÕÁÃEo==ùA7´M@´mô&&@€ @€ @€ @€ @€ @€ P^çB¢yPÔ5žz8+4KƒVê3¯ì{î¹çDúÊk)E=ãÕé9 fÿziôtG€ @€ @€ @€ @€ @€ @€c ÄP}Ř #fõì§#¾uK€ x3è„©t$@€ @€ @€ @€ @€ @€ @ lk7lþtÈÂÂpøáP=üð±ÊÀûkµ¯³/ ©¢ëH¸·¨]ÆOÆs @€ @€ @€ @€ @€ @€:@ »Si[R÷çé ¤‹šŒq¢~<ÞÒ¤fŒ+ :. @€ @€ @€ @€ @€ @€Ê.P¯‡/6«±’e\³æ­‹›õ;Õöµ6ÿQ ‚~¸éçbüâŽý7í§ÆÆc @€ @€ @€ @€ @€ @€Ê/p,äaÐza¥Y¶´gÞœ—\rÉÜÂ~§Ðxù†Mÿ1½tô4ûHŒqh¸>Ò¬Ÿv ƒéh#@€ @€ @€ @€ @€ @€ @ Ô{jµÇB _oVd–…‹Î\¼t÷[Öo¹ Yߢö7­_ÿⵯ¼. •Ϧ~MóYYˆ×í¬õ«hLmh&ÐÓ¬ƒv @€ @€ @€ @€ @€ @€”Y †¡ÿbõéMYQ©ù ÕjøöÚ[¾êñ¿n¿¥ÿ«EýG¶­[÷¶e±§çÏÒ[Òó¹¡p¦g?CÜ¿½~ìoGŽãž“ŒšÏ @€ @€ @€ @€ @€ @€ PíÛnüæÚ ›?• zçDŠJ9Î7†JöÆô™†,{ ÄøÓü+fá@¨‡ŸÇ¬²¸êg§¶³SßôgXBöÊtßôM 'çñH=dW†Zmøä37˜¤€0è$á|Œ @€ @€ @€ @€ @€ @€òÄãáýaNxCz7èË&ZUzËç+RßW¤ÐgÊzfϾì³úü§'žû|þ'ÿŒq¸ê¼sÛÖO>sC€)LáßHS˜ÕG  @€ @€ @€ @€ @€ @€ 0;vô?~¼/†ð§qØS*Íÿp½WìØ¶õÆSþ° @`aÐq`<&@€ @€ @€ @€ @€ @€ @ ³n¯õ?40|ôb ÷¶£òãÞ4ÿy;j7|­ó›“Ù+ :{÷ÖÊ @€ @€ @€ @€ @€ @€tÀžZí±ª½8Öëb<2#1Çÿjû¶þ7åóÏÈœ&!@ «„A»j»-– @€ @€ @€ @€ @€ @€ÀìøA­v|û-7|t°~'ÆzzSè`+VCx8ýÑzˆÿfûÍýIs¤G.L¿@ÏôiD @€ @€ @€ @€ @€ @€´_àöZÿC©Š-—^ú¶?sZuC–U¶dY¸h*•¥ðçÑôùõúwnûÜWÒ}ºu @ µ ­õ5: @`Ʋ¥_#ä ͸‰ @€ @€ @€ @€t‘@~>ÉE€:YàŽ;n:”êÿ‡ükõê·¾¨:Îù•.•x~ú¹sÒ1Ü!d/LçqÏȲ,e®â“1d§?NíéíŸYú3>ëáÀÐàá¯ìÚµë©NöP;' Úy{¦b @€À˜ó³¾ðt<6f›‡ @€ @€ @€ @€˜Š@~>ÉE€f‹ÀîÝŸÿEZËÏ}ý‹eý«õëçü V;þ/< @€@*mœÛÔ @€Ó(°¤ºhG3 @€ @€ @€ @€Î'5,Ü @€Àìý{l…:Q@´wMÍ @€1–÷.ã©G @€ @€ @€ @€˜º€óIS74 @€©ƒNEÏg  @€%¸¸ïÜU£ @€ @€ @€ @€f“€óI³i7­… @ „A;q×ÔL€Càü9ËÂâÊÂ1Z<"@€ @€ @€ @€ @€ÀäòsIùù$ @€ímŸ½™  @€Ó*PÍ*aÓé+¦uLƒ @€ @€ @€ @€ @ ?—”ŸOr @€ Ð>ÿEÞ>{3 @€¦]`åÜ Ãy½çLû¸$@€ @€ @€ @€ @ ;òóHù¹$ @€ím¯¿Ù  @“¶›ê@IDAT€Ó*eY¸zÁ¦ð’ê™Ó:®Á @€ @€ @€ @€è>üR~)?—ä"@€ @ ½ íõ7; @`ÚΨœ>²à¡Ó.k@ @€ @€ @€ @€Ý#AósHùy$ @€ímÿ¨€ 0íg÷œþþÌ÷†ózÏ™ö± H€ @€ @€ @€ @€ÀìÈÏåçòsH. @€Ê!ÐSŽ2TA€ 0ÝùoäûèÂw…½Çî[Ü­šî)ŒG€ @€ @€ @€ @€À,X\Y6¾"¬œ{aȲl­ÌR @€t¾€0hçï¡ @€ÆÈÿBvռׄ7Î}u¸÷øaÿà}áþÂÁáÇÃÑ8búÇE€ @€ @€ @€ @€@÷ d! ó³¾°¤º(,ï].î;7œ?gY¨f•îðb @€ Ú›¤D @€ÀTò¿ ½°ï·ŸùšêX>O€ @€ @€ @€ @€ @€̬€0èÌz›LY ÆxmÑ +^•½îe‹ãë‹úh#@€ @€ @€ @€ @€ @€èaÐÎÙ+• @€g¾qó»?TDñõO\MÈ‚0h’6 @€ @€ @€ @€ @€ @€@ T:¨V¥ @€ @€ @€ @€ @€ @€ @€ºN@´ë¶Ü‚  @€ @€ @€ @€ @€ @€ @€Ní¤ÝR+ @€ @€ @€ @€ @€ @€ Ðu ]·åL€ @€ @€ @€ @€ @€ @€t’€0h'í–Z  @€ @€ @€ @€ @€ @€ @€®íº-·` @€ @€ @€ @€ @€ @€ @ “„A;i·ÔJ€ @€ @€ @€ @€ @€ @€t€0h×m¹ @€ @€ @€ @€ @€ @€ @€$ ÚI»¥V @€ @€ @€ @€ @€ @€ @ ë„A»nË-˜ @€ @€ @€ @€ @€ @€è$aÐNÚ-µ @€ @€ @€ @€ @€ @€ @€]'ÐÓu+¶` @€ @€ @€ @€ @€ ÐB·¯Œ…£ÿø`Ø÷ÕvÒH€ @`„€0è · @€ @€ @€ @€ @€ @`ªW­j2B û²%ïþP“^š  @€œ¨œ¼sC€ @€ @€ @€ @€ @€ @€ P:aÐÒm‰‚ @€ @€ @€ @€ @€ @€ @€ žÆ­; @€nŽõpïñÂþÁûÂý'„ƒÃ‡£q0Äô‹ @€ @€ @€ @€îÈBæg}aIuQXÞ»4\Üwn8βPͼ‹¨;"¬š @   eØ5 @€fX Æö»;l=rWx´~h†g7 @€ @€ @€ @€eÈ™üÓñXøÑÐCÏ|íØW†M§¯+ç^²,+sùj#@€ 0+„Agå¶Z @`|'ëO‡ëo ß9ñàø´ @€ @€ @€ @€ @`„@þKç?öä­á® W/ØΨœ6¢Õ- @€­¨´zã @€”GàŸ‡~ÞóÄ'A˳%*!@€ @€ @€ @€ ÐQù/¡ÏÏ åg‘\ @€Ìœ€0èÌY›‰ ÐVü 8üéðÈðm­Ãä @€ @€ @€ @€t¶@~)?‹”ŸIr @€ 03 3ãl @€@[bŒáºÃ[AÛº &'@€ @€ @€ @€ 0{ò@h~&)?›ä"@€ @ õ ­76 @ í{ݾsâÁ¶×¡ @€ @€ @€ @€f@~&)?›ä"@€ @ õ ­76 @ ­ñ¶¹«­5˜œ @€ @€ @€ @€Ù)ŸMÊÏ(¹ @€h­€0hk}N€h»À½ÇÖµ½ @€ @€ @€ @€ 0ûò³Iù% @€­m­¯Ñ  @€mØ?x_ÛkP @€ @€ @€ @€³WÀ¥Ù»·VF€ PaÐòì…J @€-¸ÿÄ–ŒkP @€ @€ @€ @€äÎ(ù9 @€ ÐzaÐÖ› ÐVƒÃ·u~“ @€ @€ @€ @€ 0»œQšÝûku @€å-Ç>¨‚ Ð2£q°ec˜ @€ @€ @€ @€Î(ù @€ ÐzaÐÖ› ÐVb[ç79 @€ @€ @€ @€³[À¥Ù½¿VG€ PaÐrìƒ* @€ @€ @€ @€ @€ @€ @€c ƒŽÉâ! @€ @€ @€ @€ @€ @€ @  åØU @€ @€ @€ @€ @€ @€ @€Æ“ÅC @€ @€ @€ @€ @€ @€ @€@9„A˱ª @€ @€ @€ @€ @€ @€ @€Œ) :&‹‡ @€ @€ @€ @€ @€ @€ @€rƒ–cTA€ @€ @€ @€ @€ @€ @€S@tL  @€ @€ @€ @€ @€ @€ @€å-Ç>¨‚ @€ @€ @€ @€ @€ @€ 0¦€0è˜, @€ @€ @€ @€ @€ @€ @€Ê! ZŽ}P @€ @€ @€ @€ @€ @€ @`LaÐ1Y<$@€ @€ @€ @€ @€ @€ @€”C@´û   @€ @€ @€ @€ @€ @€ @€À˜ c²xH€ @€ @€ @€ @€ @€ @€(‡€0h9öA @€ @€ @€ @€ @€ @€ @€1„AÇdñ @€ @€ @€ @€ @€ @€ PaÐrìƒ* @€ @€ @€ @€ @€ @€ @€c ƒŽÉâ! @€ @€ @€ @€ @€ @€ @  åØU @€ @€ @€ @€ @€ @€ @€Æ“ÅC @€ @€ @€ @€ @€ @€ @€@9zÊQ†* @€‰ ¼vãõ×õýàgÃë^¾¤¨GW­*n×J€ @€ @€ @€ @€ @€ PaÐòì…J @€À„²,»¦¨ã¾ï…]W­ŠEÍÚ @€ @€ @€ @€ @€ @€J$P)Q-J!@€ @€ @€ @€ @€ @€ @€% : Ä· @€ @€ @€ @€ @€ @€ @€2 ƒ–i7ÔB€ @€ @€ @€ @€ @€ @€% : Ä· @€ @€ @€ @€ @€ @€ @€2 ƒ–i7ÔB€ @€ @€ @€ @€ @€ @€% : Ä· @€ @€ @€ @€ @€ @€ @€2 ƒ–i7ÔB€ @€ @€ @€ @€ @€ @€% : Ä· @€ @€ @€ @€ @€ @€ @€2 ƒ–i7ÔB€ @€ @€ @€ @€ @€ @€% : Ä· @€ @€ @€ @€ @€ @€ @€2 ƒ–i7ÔB€ @€ @€ @€ @€ @€ @€%Ð3ê{ß @€ @€ @€ @€ @€ @€SøÔ—Š?üãƒá’×n¼>+êõ›ß}mQ»6 @€îí®ý¶Z @€ @€ @€ @€ @€h±Àgöæ<óÙ/ɲpI“2®mÒ®™ @ ‹*]´VK%@€ @€ @€ @€ @€ @€ @€tœ€0hÇm™‚  @€ @€ @€ @€ @€ @€ @€ní¦Ý¶V @€ @€ @€ @€ @€ @€ @ ã„A;nËL€ @€ @€ @€ @€ @€ @€t“€0h7í¶µ @€ @€ @€ @€ @€ @€ @€' Úq[¦` @€ @€ @€ @€ @€ @€ @ ›„A»i·­• @€ @€ @€ @€ @€ @€è8aÐŽÛ2 @€ @€ @€ @€ @€ @€ @€Ý$ ÚM»m­ @€ @€ @€ @€ @€ @€ @€@Ç ƒvÜ–)˜ @€ @€ @€ @€ @€ @€è&aÐnÚmk%@€ @€ @€ @€ @€ @€ @€:N@´ã¶LÁ @€ @€ @€ @€ @€ @€ @€@7 ƒvÓn[+ @€ @€ @€ @€ @€ @€ Ðq ·e &@€ @€ @€ @€ @€ @€ @€ºI@´›vÛZ  @€®ÈBÖ•ë¶h @€ @€ @€ @€fFÀ¥™q6  @€@w ƒv÷þ[= Ðó³¾.X¥% @€ @€ @€ @€ Ð.g”Ú%o^ @€ní¦Ý¶V @ +–Tuåº-š @€ @€ @€ @€™pFifœÍB€ ÐÝ ݽÿVO€tÀòÞ¥]°JK$@€ @€ @€ @€ @ ]Î(µKÞ¼ @€Ý$ ÚM»m­ @€@W \ÜwnW®Û¢  @€ @€ @€ @€˜g”fÆÙ, @€Ý- ÚÝûoõ @€@œ?gYX\YØ+µD @€ @€ @€ @€fZ ?›”ŸQr @€ ÐZaÐÖú ÐvjV ›N_Ñö:@€ @€ @€ @€ @€ÀìÈÏ&åg”\ @€´VÀu·Ö×è @€R¬œ{a8¯÷œRÔ¢ @€ @€ @€ @€f‡@~&)?›ä"@€ @ õ ­76 @ íY–…«l /©žÙöZ@€ @€ @€ @€ @€@ç äg‘ò3IùÙ$ @€­m½± @€¥8£rZøÈ‚w„–b7A€ @€ @€ @€ @ sò h~)?“ä"@€ @`f„AgÆÙ, @€RœÝsVøû3ßÎë=§õ(‚ @€ @€ @€ @€ÎÈÏågò³H. @€fN gæ¦2 @€@ò߯÷Ñ…ï {ݶ¹+?ƒ”ŸEr @€ 0s 3gm& @€@[ò7‚~àð§Ã#ÃO´µ“ @€ @€ @€ @€ ÐÙù¤ü,R~&ÉE€ @€À̃Ό³Y @€mˆ1†ëomë.˜œ @€ @€ @€ @€ÀìÈ¡ù™¤ül’‹ @€Ö ô´~ 3 @€Ó)þâìÚ¢ñV¼*{ÝËÇ×õÑÖ}{ݾsâÁî[¸ @€ @€ @€ @€ Ð2üLR~6iÕ¼×´l @€ ð¬€0¨Ÿ Ða߸ùÝ**ù럸þšaÐ"¤.kŽõ°õÈ]]¶jË%@€ @€ @€ @€ @`&ò³IoœûêPÍ*319 @€t­€ÿâîÚ­·p @ [î=þ@x´~¨[–k @€ @€ @€ @€Ì @~6)?£ä"@€ @ µ ­õ5: @ íûïk{ @€ @€ @€ @€ @`ö 8£4{÷ÖÊ @€Ê# Zž½P  @ %÷Ÿ8Ð’q J€ @€ @€ @€ @€\À%? @€Z/ Úzc3 @€Ú*ppøñ¶Îor @€ @€ @€ @€f·€3J³{­Ž @  åØU @€Z&p4¶ll @€ @€ @€ @€ @À%? @€Z/ Úzc3 @€Ú*Clëü&'@€ @€ @€ @€ @`v 8£4»÷×ê @€Ê! ZŽ}P @€ @€ @€ @€ @€ @€ @`LaÐ1Y<$@€ @€ @€ @€ @€ @€ @€”C@´û   @€ @€ @€ @€ @€ @€ @€À˜ c²xH€ @€ @€ @€ @€ @€ @€(‡€0h9öA @€ @€ @€ @€ @€ @€ @€1zÆ|ê! @€ @€ @€ @€ @€ 0)·¯Œ…ŸûñÁ°ï«ß _+중 @€ÀaÐn  @€ @€ @€ @€ @€ @€ÀT®ZÕd„öeKÞý¡&½4 @€8)P9yç† @€ @€ @€ @€ @€ @€ @ t ¥Û @€ @€ @€ @€ @€ @€ @€  w @€ @€ @€ @€ @€ @€ @€Ò ƒ–nKD€ @€ @€ @€ @€ @€ @€hƒ6,Ü @€ @€ @€ @€ @€ @€ @€J' Zº-Q @€ @€ @€ @€ @€ @€ @ ! Ú°pG€ @€ @€ @€ @€ @€ @€(€0hé¶DA @€ @€ @€ @€ @€ @€ @€†€0hà@€ @€ @€ @€ @€ @€ @ t ¥Û @€ @€ @€ @€ @€ @€ @€  w @€ @€ @€ @€ @€ @€ @€Ò ƒ–nKD€ @€ @€ @€ @€ @€ @€hƒ6,Ü @€ @€ @€ @€ @€ @€ @€J' Zº-Q @€ @€ @€ @€ @€ @€ @ ! Ú°pG€ @€ @€ @€ @€ @€ @€(€0hé¶DA @€ @€ @€ @€ @€ @€ @€†€0hà@€ @€ @€ @€ @€ @€ @ t ¥Û @€ @€ @€ @€ @€ @€ @€  w @€ @€ @€ @€ @€ @€ @€Ò ƒ–nKD€ @€ @€ @€ @€ @€ @€hƒ6,Ü @€ @€ @€ @€ @€ @€ @€J' Zº-Q @€ @€ @€ @€ @€ @€ @ ! Ú°pG€ @€ @€ @€ @€ @€ @€(€0hé¶DA @€ @€ @€ @€ @€ @€ @€†€0hà@€ @€ @€ @€ @€ @€ @ t ¥Û @€ @€ @€ @€ @€ @€ @€  w @€ @€ @€ @€ @€ @€ @€Ò ƒ–nKD€ @€ @€ @€ @€ @€ @€hƒ6,Ü @€ @€ @€ @€ @€ @€ @€J' Zº-Q @€ @€ @€ @€ @€ @€ @ ! Ú°pG€ @€ @€ @€ @€ @€ @€(€0hé¶DA @€ @€ @€ @€ @€ @€ @€†€0hà@€ @€ @€ @€ @€ @€ @ t ¥Û @€ @€ @€ @€ @€ @€ @€  w @€ @€ @€ @€ @€ @€ @€Ò ƒ–nKD€ @€ @€ @€ @€ @€ @€hô4nÝ @€ ðÚ×_[Tç?^÷ò%E=B¸jUq»V @€ @€ @€ @€ @€ @€òƒ–g/TB€&$eÙ5E÷}/„ü«èºjU,jÖF€ @€ @€ @€ @€ @€ P"J‰jQ  @€ @€ @€ @€ @€ @€ @€À(aÐQ ¾%@€ @€ @€ @€ @€ @€ @€”I@´L»¡ @€ @€ @€ @€ @€ @€ @€À(aÐQ ¾%@€ @€ @€ @€ @€ @€ @€”I@´L»¡ @€@ ²µ`TC @€ @€ @€ @€ @àYg”ü$ @€h½€0hëÍ@€h«Àü¬¯­ó›œ @€ @€ @€ @€Ù-àŒÒìÞ_«#@€(‡€0h9öA @€– ,©.jÙØ&@€ @€ @€ @€ @€€3J~ @€´^@´õÆf @€´U`yïÒ¶Îor @€ @€ @€ @€f·€3J³{­Ž @  åØU @€ÀÿgïN -+ëÑß¹·™D†DâÐ1ð2HLZàiù Á ¨ÁKPÃS\‰Äîû­$«»UŠ$í[¤“¬¤³Ôî§%E1ÞPU€ŠbHÊ$H·I™6BÛILƒv!”¹†{öÛ‡°ñž[÷îsö9gŸaïßqUöÙû›ÿûµàþÝ(Màõ+~¤´¹ML€ @€ @€ @€ @€¿£äg€ @€@ùŠAË7¶ @`¤¯]þªpbãØ‘îÁâ @€ @€ @€ @€TS õ»I­ßQò!@€ @ \Å åúš 0r©ØsO€ P_Å õ½“ @€5ˆ1†³1¼÷Ñß ßž{´F'wT @€ @€ @€ @€Ê8iê¸ç~'©õ»I>‡ lû\G—U)ݪCG¶=ÙÚvç† @ ÖZŸÞá  @€58ªqDøð1ï ­ëC€ @€ @€ @€ @€^Z¿ƒÔú]¤Öï$ù @€ 0Å Ãq¶  @`,N™>!üþqÿ*üø²WŒÅ~l‚ @€ @€ @€ @€ÉhýîQëwZ¿‹äC€ @€À𦇷”• @€ÆA õ¿Æ÷ëÇþLøÜ³vì§Â¿Xù“áËûÿ6ܳøfxhîÃÓɾ¤ÿñ!@€ @€ @€ @€ @ ž1Äpx\NžzqxͲ—…ׯø‘ðÚå¯ S±QO§&@€ 0ŠAÇ ¶@€•@ëξnÅ?{îϨö`] @€ @€ @€ @€ @€ @ _Àÿ4K¾V @€ @€ @€ @€ @€ @€ @€ÀHƒŽ”ßâ @€ @€ @€ @€ @€ @€ @€|Å ù>Z  @€ @€ @€ @€ @€ @€ @€#P :R~‹ @€ @€ @€ @€ @€ @€ @€òƒæûh%@€ @€ @€ @€ @€ @€ @€ŒT@1èHù-N€ @€ @€ @€ @€ @€ @€ÈP šï£• @€ @€ @€ @€ @€ @€ 0RÅ #å·8 @€ @€ @€ @€ @€ @€ @ _@1h¾V @€ @€ @€ @€ @€ @€ @€ÀHƒŽ”ßâ @€ @€ @€ @€ @€ @€ @€|Å ù>Z  @€ @€ @€ @€ @€ A É@IDAT @€ @€#P :R~‹ @€ @€ @€ @€ @€ @€ @€òƒæûh%@€ @€ @€ @€ @€ @€ @€ŒT@1èHù-N€ @€ @€ @€ @€ @€ @€ÈP šï£• @€ @€ @€ @€ @€ @€ 0RÅ #å·8 @€ @€ @€ @€ @€ @€ @ _@1h¾V @€ @€ @€ @€ @€ @€ @€ÀH¦GººÅ  @€c"pÞiwÉNlƒ @€ @€ @€ @€ @€ Ð.àÍ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c% t¬Âa3 @€ @€ @€ @€ @€ @€ @€vÅ íî @€ @€ @€ @€ @€ @€ @€c%0=V»± @€ÀˆþðþsG´²e  @€ @€ @€ @€ @ ,óN»«¬©ÍK€ @`¨Þ :Tn‹ @€ @€ @€ @€ @€ @€ @€Š (-æ¥7 @€ @€ @€ @€ @€ @€ @`¨ŠA‡Êm1 @€ @€ @€ @€ @€ @€ @€@1Šżô&@€ @€ @€ @€ @€ @€ @€ U@1èP¹-F€ @€ @€ @€ @€ @€ @€(& ´˜—Þ @€ @€ @€ @€ @€ @€ @€¡ Lu5‹ @€} $I²5o’s"¾ñå'&oÊë£ @€ @€ @€ @€ @€ @`rƒNN¬ì”<'ðů¼:âîßþÈU!Å yHÚ @€ @€ @€ @€ @€ @€$И ½Ú* @€ @€ @€ @€ @€ @€ @ vŠAkr&@€ @€ @€ @€ @€ @€ @€&I@1è$EË^  @€ @€ @€ @€ @€ @€ @€Ú (­]Ș @€ @€ @€ @€ @€ @€˜$Å “-{%@€ @€ @€ @€ @€ @€ @€j' ´v!w` @€ @€ @€ @€ @€ @€ @`’ƒNR´ì• @€ @€ @€ @€ @€ @€¨€bÐÚ…Ü  @€ @€ @€ @€ @€ @€ @€I˜ž¤ÍÚ+ @€ÀàîÝÿ@¸{߽ᾄ~'<™<šé|X\ áȸ2¼túøpú²—‡³WœÎXþòÅ;ð©\ ¦©j!0ª\Ípål&áJ ³À(óU®vŽ2¹šI¸o¹:Þñ±;™À(sµµÎ"áJ ³À(óU®vŽ2Qæj¶9›I¸èN`Ty+W»‹^2Qåj¶¾+ @€‡ ÄCyB€L²@òÐG® 1lä3Œbïxÿ¹£Xv¤kÞýì½áš§î ÷üöH÷aqU8mú¤°åˆóÃÙ+ÏøqäêÀIMXc2s5c•³™„+þÊÎW¹Ú_|Œ& ÈÕL•Àx ÈÕñŽÝÈÊÎÕÖ:þœi»èO ì|•«ýÅÇh™@Ù¹š­#g3 Wý ”™·rµÿø˜@&Pf®fk”q=ﴻʘ¶ãœç¼¿ÿ_Õ¿û†÷ô?IÇŽo‡s.ûØUI’lßv¿³ãÖ?»þg¯î~„ž @€Cjý—ÃC9ÔëÆ #Ð*²~ðÑ¿vÌ»ÂñSGçôÌo’«ù>Z ô+0¨\Íö!g3 Wƒd¾ÊÕÁÇÇŒ2¹šI¸o¹:Þñ±;™À sµ5§¿g²®/0È|•«ƒ dƒÌÕlN9›I¸(G`Py+WˉY dƒÊÕl¾ª^ßyA’{´û {þø+á ¹4 @€˜'àÍ ó0Ö®ßüîôö'æ=:ôk’|cçÍ×þú¡ ž ÐI`õ%[.mÄ䜼~û›áßšÝþ`^Ÿª·­Û°å?%!yûÂs&sáÒ]³ÛïYøÜ=…Þ ºP¤»û:¼´õ¿xù þ¾BÐî~$ô"ЗÀ+¦O¿sÜ{{zC¨\í‹Þ`…úÉÕl!9›I¸(W ß|•«åÆÇì2¹šI¸o¹:Þñ±;™@¿¹ÚšÇ߃3MWå ô›¯rµÜø˜@&Ðo®fóÈÙL•@ùýä­\-?>V ô“«ÙøŽêÍ Ï–„­ñä+¯îدƼ´ÆÁwt @`QÆ¢Okû0¾9Æøs¹q¦¶<N O©Fò¦ÜüJóoY£y|ŸËLôðµë/]’$?C<õ?1¬œèÃÙ<#øïݬtäQ°º´Þ¾Ûʹ^>rµ5cô&ÐO®f+ÊÙL•@¹ýæ«\-7>f' ÈÕL•Àx ÈÕñŽÝÈúÍÕÖ<þœiº(W ß|•«åÆÇì2~s5›GÎf®Êè'oåjùñ±L Ÿ\Íæp%@€ @ {Å Ý[éI€RV­Z5âÔJ f½µ¹Ti“¨§ÀÝÏÞþdß_ÕóðNM`D­œkå^‘\-¢¥/Áô’«ÙÊr6“p%0^óU®'>V! ÈÕL•Àx ÈÕñŽÝÈzÍÕÖxÎ] G ×|•«Ã‰Ud½æj6^Îf®†'ÐKÞÊÕáÅÇJ2^r5ëJ€ @€@1Šżô&@€@iÇð²«Ó·þTi ˜˜Z \óÔµ>¿Ã•@ÑÜ+ÚTç².ª ôš{½Ž«šŸó¦@/yעažÉZª(ÐKÞõ2¦ŠvÎD`˜½ä]/c†y&k¨¢@¯y×ë¸*:a ô’w½ŒÖy¬C ªýä]?c«êé\†!P4÷ŠöƬA r¯QvF @€qP :Q°j/°ffã¿1ü›ÚC @ {÷?î?øíRæ6)ù­Ükå`7¹Ú’>Ê(’«Ùäl&áJ`¸EóU®7>V# ÈÕL•Àx ÈÕñŽÝÈŠæjkœ¿gz®†+P4_åêpãc5™@Ñ\ÍÆÉÙL•ÀðŠä­\~|¬H (’«ÙW @€Š (-nf*°nÝ–ÓS›búèÄ&#@€Àówï»—#è6»í7£Xš@¥Šæ`Ñþ•Æs8C(’Eúù–#Py"ùW¤oåáÀŠä_‘¾C>†åT^ hþí_y@$0D"ùW¤ï`)µè%ÿzS L‡$0$ns°Û~CÚ¶eÔN@Ö.äL€ 0é¬iIx^àí3›_š4Â1Äc¡ @ ,ûŒN Ûì¶ßèNbeÕ(šƒEûW[Ïé W Hþé;ÜSX@õŠä_‘¾Õ—sBÃ(’Eú÷V#P}¢ùW´õÀðŠä_‘¾Ã;•ÔC —üëeL=4’ÀpºÍÁnû g×V!P?9X¿˜;1 @€ÀðÃ_ÒŠ¨¯ÀÚ™ÍïhÄðç1D… õý1pr @€ @€ @€ @€ @€ @€…¼´—ÎèMàÌ3Ï\ö²Wžþ1Æ÷õ6ƒQ @€ @€ @€ @€ @€ @€ÔU@1h]#ïÜ Màâ N™JVÜcøçC[ÔB @€ @€ @€ @€ @€ @€TF Q™“8ÆP`ÍÌÆ7O…å©t ƒcK @€ @€ @€ @€ @€ @€&DÀ›A'$P¶I€Àd \xá¥Ç/?bùÖ$I~.†´Ô‡ @€ @€ @€ @€ @€ @€= (íÎ0,&°jÕª•ÇpÊÏÇØø·iûÑQèbLž @€ @€ @€ @€ @€ @€ P@@1h,]  #×nØ|iÚþáôM §æô;¤)IÂÝ!$“޾óF @€ @€ @€ @€ @€ @€¨½€bÐÚÿ @ _53›ÞØ˜Š¿BüÉâs%Ÿøëæ3Wþp<ìWB,>Ú @€ @€ @€ @€ @€ @€ª/ ´ú1.ó„­Òµ¤ÌÌ=qãâ“\tцhL5ö,Þšó4Iæš1þâ®·ÿv«×_²9§³& @€ @€ @€ @€ @€ @€ê, ´ÎÑïîìñ¢u›Î\6Þš„Æ«Ó7žCrbHâ ièK’¦Ó<còpúýá´4ôÛ!6ï û“Ïìܹã¡î–Ðkâ4Ä^¦˜žžj—$É×’f¸b×ì5÷«? @€ @€ @€ @€ @€ @€õP Z¿˜w<ñªU«¦=ñ”‹bˆo1¾5}­ä‰­A­×KþÓ'ýöÜMÚ#„©ôÙ)éƒSž”Þ6®H–%ɺ [¾œ„äSiáÛM»nºökÏîû²ö’-H‹O·äM”¦þÑΛ®ùÙ¼>ƒn[³~óÅiUàoæÎãgn¹ñš÷åöé²qÜãÔå1êÖí@šÿá™Ç¿ókwÜqǾºÞy  @€ @€ @€ @€ @€ @€èM@1hon•µfý¦µi‰çÿc|u?‡LÇ·jCÏLç:3ýúÁµë·l;øÌ¾ÝvÛßêgÞÖØf’Ü9Õˆ¿š;ONY½zõ/ïÞ½û»¹ýؘ‚¾;-ž}eÞ”éÞÿ(¯½Û¶IˆS·g©Q¿½ÍdîŠ]7íøJÎì¨ @€ @€ @€ @€ @€ @€ @ ­_ó!ÂÅ6žµnýæ»±qK¿… ‹x6ÒÒÐ+¦[ñ·k/Ùü+«ffŽ\¤O×vÏnÿRHÂÿÈ®·¢±â¨wäõdÛš5›NHBxsÞœé[Rùæ×ïûL^ŸNm“§Ng©K{úsñ­f³ù3·Ì=ýÓ Aëuç$@€ @€ @€ @€ @€ @€ 0XÅ ƒõœÈÙZšÓaêîô­–g•y€´@óð؈<®qØ—Ö®½ìÔ~Öjư­ãø6vì3 qy\ŸÑæ¾i7†äº½{÷èuÉIŒS¯g­Æ¸äñ´øßxêÑWíºùÚÿfgçªq.§ @€ @€ @€ @€ @€ @€ @`ØŠA‡->Fëyæ™ËÖ®ßtM«@s˜ÛJ‹&_—-û¯k×o|m¯ëÎ=½ÿÚ$IæŽOÂ.Þ°á”Ü>jL ]7wšêà\³së"“Lrœ9N %O%IøýOxÅηøöÛoº‡vD @€ @€ @€ @€ @€ @€JP Z"î8O}ÞÌÌѧ¾êôÏÆØèXÄXÊ9b8)†ÆÖ]²é­½ÌÛm7<bøLÞØ´è46ÂòËòú ¢mÍúM¯ !þd‡¹öÞ:{ÝW;ô9¤yÒãtÈ*ü } è7B’üÒ³OœÎ1šS'7BóÒâÌ ¡qQú–Ìcºš7Æ#Ó9o]3³ù »f·ßÓÕ˜yÒ}|"]ë¢yù“°1}øiàƒ˜ÄéÙs?Ífñ·‚V%N¹0hl6Ÿzrªyä;v&ûv‡ÙÙ¹ É @€ @€ @€ @€ @€ @€3Å calçÕÃ~=-¢ìº4-ºÜ}ð`óßßv˵Ñigžyæ²S^yúyS!þRZ ¹ªSÿôåÓa*¹ámo»ì'>ýéëëÔ~û7¿~ßgÒ·›>C «§GtK€ @€ @€ @€ @€ @€ @€ƒÀšä®ëÖ]öªÃE΄ä}ø­7t&úvÛ~Ûm7<ÜÜ—ü_é„ç‰G~ÔÔ»óûÚCç·ÆFÜxèÈþžœŸŸ¾uô-kÖl~ñüg=qSÞØ$ O?óøÜÍy}¶Õ!N Ïìž @€ @€ @€ @€ @€ @€: (íl4Ñ=Î?Óiaâw:ÄÁ$îîÔg@íÍ´èñ®Ü¹bœ:fjùOçö9´±™žóšC·=Y–%3mOz¸¹xæòI‡™?4¹åŽ;®{<¿Ï÷[k§ïÚ7 @€ @€ @€ @€ @€ @€èJ`º«^:M¬ÀG‡ŸŽ1vŠóÞÛg·ç¾Usé›AïJßÒ¹1oΩ¤qvÚž_4º`‚8wð“Iœþ@zÞ´ÞtñOŒÖº]¼µ»§SSÏÍ‘ß9 Ûò;´·Ö)Ní'wG€@/gmøÈÖ¼qødxãi'çõáŠ7ç·k%@€ @€ @€ @€ @€ @€NE‚ã³S;éI NÅVQeî'I’¿Èí0àÆô ž±t¹f¶Xç}g=³ë-·\ÿ¿Ö­ßò…ô~UöìÐkrÖ…3›N»}öÚûmëêI IZ ºd¹i:G’Ü¿óæí{ºšíùNuŠS} X\ ­y¿jñ–zºç¯BhýÉû\ñæ$¯Y @€ @€ @€ @€ @€ @€À 4Æh/¶R‚@LÂ9¦Mbüz§>ƒlÿvóÙ¿K Ps«’_Ÿ®Yøç³òßÈÙzkè²Ø¸¼×ó¬Þ°ñi!ë)yã“¶§í¹ç[8¾nqZx~÷ @€ @€ @€ @€ @€ @€ °´@áb»¥§Ò2Žiaâk;í«Ñœj1è=³³Ï„ÊÛWZpù¢‹.ºô%y}k;øô£–™>±XÛ Ï¡çbÐFÒØüÂ<‹|i¹6ÃþO.Ò”û¨nqÊÅÐH€ @€ @€ @€ @€ @€ @€mÓmwnª&ÖTÆã:ªÑøÄÚ [>Ú±ß ;$ɋӂÐܧ›níýáÜN o¿ýö§×®ß|SñÝ š^¸MWýáÕ3[ÎÜ={ÍÞvñåõ33‡¥[^סëžÝ7Þø@‡> ›k§…î  @€ @€ @€ @€ @€ @€ @`iÅ KÛL|ËêÕ«NÑÅÛ_ã±ùe™%Pt(m­ØŒÉ±½¬œ¾šóq*,Y Úš³ÑÓK¡bГ+/N‹LÊÝSL¶å¶/ÒX×8-Bá @€ @€ @€ @€ @€ @€,"ÐE¡à"£<š¹e/꩘r\75zÚÿ®Ùí÷„$ùë¼s¤Å¯ÂÌÌT^Ÿ…mé[V7-|Ö~Ÿ<~à©ïÞÒþ¬ó]]ãÔYF @€ @€ @€ @€ @€ @€h (­ðÏÁTh7ÑÇ›Š=ï¿Cþ:c8iuãðs»õ¹è¢KOLûžŸ×?IÂÍ·ß~ûÓy}k«sœóðŒ @€ @€ @€ @€ @€ @€Úƒ¶{Tên*&=½Ys\’foomí?îonOß:—w–FH6æµÏo›>lúÒãôüg ¿ÏÅf~êÂÏß×9NKxL€ @€ @€ @€ @€ @€ @€óƒÎèÚ×ôM•/šè35Ë{ÝÿÎ;JbülÞøš /¼ðð¼>ßoklúþ÷E¾%É_ßzãŽ/.ÒÒñQãÔG @€ @€ @€ @€ @€ @€ŠA+üCcxb¢×lîïgÿ±9—ÿ¦Î\vı«;­±nÝ–ÓSË׿õK’xM^{^[Ýã”g£ @€ @€ @€ @€ @€ @€B˜†P]$6a*÷€Iæ’_Îí4¢ÆFsî+ý,ý?“}·ÿ³pØwbˆÇ/9O.OÛ®_²=mhN'!}èRŸ$™ÛŸ„íK5wz^÷8uòÑN€ @€ @€ @€ @€ @€ @€º (­ðOÀf|ly~-hIX¹svû®*2Ü7;»ÿÕë7_—¾yó}9ç;ÍšM'ìÚuí#Kôi¤Å¤­‚Ñ%?IŒŸÿÔì5.Ù¡CCÝãÔG3 @€ @€ @€ @€ @€ @€j/ ´Â?Ï„g]Ï=a ɬZµjzÏž=s;Nlcs[SKƒÆ§Ãò°>=Þï-vÄ53›V¥Å¤§,Ö–=k6CºFïqêÝÎH @€ @€ @€ @€ @€ã(ðñÏæïêþ‡Âª³6|$æõúâWnÍk×F€ P/Å Ž÷ÎÎ>¾nýæ¹ãÒïMÛŽ9é¤L¨"Å®›v|eíú-_N :_»äùb¸4m[´46›–×jH£ûžxäÖÜ>Å©f @€ @€ @€ @€ @€&°ís¹už­Ó´^Z³ªÃ±¶vh×L€ P#FÎZÇ£&IŒw:xœ›þáN}&¹=}ûiþ›;“ðÏ×®ÝxòÂ3¦oL]c²váó¶ûn¸ãŽ;öµ=+~#NÅÍŒ @€ @€ @€ @€ @€ @€ @€@mƒV>ÔÉëtÄØ˜:¯SŸIn²ùÌõI–,ØŒ­Ï²©Õ Ïøâ~ðm!Ä£>Ÿ?7ò MçwÎý.N¹<  @€ @€ @€ @€ @€ @€ PcÅ ~âŸv:b áüN}&¹ýs³³¦oø¼5ï I kiËy6ÿA¾º{öš½óõú]œz•3Ž @€ @€ @€ @€ @€ @€Õ˜®þë}¹Í?m,ëPóÃÿqÑE~à¶ÛnüÖ0´V¯^}ÌÔÊ£~/ ÉÿŽ¡ñ@h6¿q Ixüþþ{öìy¶Œ=4çšÛSS—,5wZ»êmo»ìØOúúÇZ}Þò–Ë 1¾u©þÿô<ùd~{÷­âÔ½•ž @€ @€ @€ @€ @€ @€¨›€bЊGüñüßÿýÅ'¾ìÉ´°ñȼ£N¾ügÒö­y}ÕÖX~ÔûBˆcHK0[ŸF#,K/Çø²dí†-Ä<$á'K®¸óÎkŸz®OŸÿg×ìŽ;×nØü÷éš?¸ÄTËV¼hêíiÛµ­öÃŽn¬M/+[ß—øhîOv,ÑVø±8&3€ @€ @€ @€ @€ @€ @€µèðÊÈÚ8Tö é›6¦‡ûb§¦e™?ÑE½¨S¿~Û[oÜŒƒú‰­O'¦…¢?~{å  AŸ_©šaû¡«Î×¼p—Ä™¾/ö%I>³k×µ,ÖÔË3qêEÍ @€ @€ @€ @€ @€ @€ÔCÀ›Akç¤þKœ çç5;½òè+Ó>¿žß¯¿ÖÃŽžú—i±ç±fIšÍ:õ)Ú~0in[¦þí’ã’xÁ…^xø+W¦EÒñÜ%û¥ Ͷåµ÷Ò&N½¨C€ @€ @€ @€ @€ @€À°ξü£W§¿Tý¡2×M’¤Ìé‡:wz–­g_úÑ­e.š¾iëŸ]ÿ³W—¹†¹  @€Ñ x3èhý‡²úÎÙí;Ó¿<þ]§Åb#~èâ™Ë¤S¿^ÛÏ?ÓéØ÷wŸî5 ›7uêW´ý¶Ù_IøÓ¥Æ¥¯%=|ÙÊc/8¶±ò-é÷KõKBòÈ7¿~ßg–jïõ¹8õ*g @€ @€ @€ @€ @€Ã¸ûº÷\áW†¹¦µrÒX(ÍñÑD€*" ´"ìpŒôE–á·:ôI›ãS©?X53sdç¾…{Ä?†x|ç‘ñS;w^÷÷ûïÑñž°&†ÆÅy3§¥ª;öîÝ{ ¯OmâÔ#œa /ÐHÿ‰›£è6»í7º“X™@µŠæ`ÑþÕÖs:Ã(’Eú÷V#P}"ùW¤oõåœÀpŠä_‘¾Ã=…ÕT_ hþí_}A'$0<"ùW¤ïðN`%õè%ÿzSM§$0ns°Û~ÃÙµUÔO@Ö/æ­+“¸§… ÏÅbL¶c @€@y*ʳ«™j>³-}£åw:m*}5ük^Ü8lǪU«Vvê[¤}݆Mÿ1-6ÝÒŘæ\óà¿ë¢_O]k>=’äÉ¥§o}{Úö¶¥Ú[ϛ͹Oæµ÷Ó&NýèK€K ú_ëK-ã9Kt›ƒÝö[b èS híßçö '@`ž@‘ü+ÒwÞ¾ 0"ùW¤ï¶f æ É¿"}ç-á+(šEû`‹¦ @ày"ùW¤/`+ÐKþõ2f°»6z t›ƒÝö«·¦Ó(O@–g;î3+q„‚Ž8–'@€ÃP :\ï‘­vÏìì3I3ùÕ®6ãÅ/>ñew]xá¥]¼Å³óŒk/ÙòïïÜ3¤ušÉu·Î^÷ÕnúöÒgÏìì“I³KǦ¡Ç,Ýö–¹?qÊ‘×D€= ¼tz ÿ•Þóú¨»@·9Øm¿º{:?²Šæ`ÑþeíÛ¼ê(P$ÿŠô­£¥3(S Hþé[æžÍM ŽEò¯Hß:Z:32Šæ_ÑþeîÝÜê&P$ÿŠô­›£ó([ —üëeLÙç0?: t›ƒÝö«“³¦€¦öø­¥ tD1Q:"xË @€Ñ LnéÉ\9-$T­“éi‚Ži`l‹”+ ´ o ñC ÿ±à°Òº¿è‡~è?‡.‹A[›hîûÞ;§Võc!ÆÓ:m*ÆøŠ85uçÚõ›?wð`òÛn¹ö/:iµŸ73sôQñ°_HÇ¿?µ:º›1Ïõi&ïÚ¹óúotݿǎ·Þ¸ãO×nØò·1„W™"-VÝ·ÿ©¹ë‹Œéµ¯8õ*g,&pöŠ3Â<ý'‹5yF€ÀZ9ØÍG®v£¤òºÍÕlr6“p%0|"ù*W‡+Èäj&áJ`¼äêxÇÇîdErµ5Æßƒ39WÃ(’¯ruøñ±"L H®Îãß¹f®†/ÐmÞúï×áÇÆŠæ t›«óÇø^=¡CŠ©BÐ!A[†ŒŸ€bÐñ‹I©;Ú½{÷w/Z·é’eÓáî´ ty7‹¥E,[/X»aó7ÒbØO%sÉž¹©æ·b2õpÜß|".ožÚL¦_Ùh$¯ I«À2^˜¾ ô˜næÎú4“ä·vÍnÿƒì¾ükòÉtŸÿ¾È:1&·~úÓ×?VdL¯}Å©W9ã @`13–¿<œ6}R¸ÿà·köŒZ¹×ÊÁn>rµ%}”#P$W³ÈÙL•ÀpŠæ«\n|¬F «™„+ñ«ã»# ÍÕÖ8Îô\ W h¾ÊÕáÆÇj2¢¹š“³™„+á É[¹:üøX‘@&P$W³1®ÕPZrl‚– lz 0Þñޞݕ!ðÜ>“æúôM—O™?-=5íÿÞ8g§ÃÔÝS1|½±¢ñpŒÓ_šj„ëÓö_I G7-MBrÛcó—‹ì¥ï¾ûç¶§s4‹ÌÓœkn+Ò¿ß¾âÔ¯ ñ 0_`ËçÏ¿õ! ͽ¢ý‡t ˨¼@¯¹×ë¸Êƒ: zÉ»^Æ”xS¨…@/yעZ`:$zÉ»^Æ”xS¨…@¯y×ë¸Z :$’zÉ»^Æ”´}Ó¨@?y×ÏØÚ;(Šæ^Ñþ%lÙ”j) ÷jöÜC· BCZ´˜ÛIcq… ÅÍŒ @€P Z±€v{œ[nÞ±;„¹ÿ3 á[ÝŽ)£_ZúñsϬݳgÏÁ2æ_jÎ;¯ûûôìw.Õ~Èó$ypWØ÷ùCž—ü îq*™×ô¨•ÀÙ+ÏoXñ£µ:³Ãµ@+çZ¹Wä#W‹héK`0½äj¶²œÍ$\ G ×|•«Ã‰Udr5“p%0Þru¼ãcw2^sµ5Þ߃3EWÃè5_åêpâc™@¯¹š—³™„+á ô’·ruxñ±L —\ÍÆºV[@Aè€ã«tÀ ¦#@€“) t2ã6]ï¼iÇ—÷Ï%?•d~y ˜$i}šÉ¯î¼éšw‡ÙÙ¹CÖµÙ ]¿é3‰ÉöQí³îqXÀMD€á¾$¼búd A •k­œëå#W{Q3†@oýäj¶¢œÍ$\ ”+Ðo¾ÊÕrãcv™€\Í$\ Œ·€\ïøØL ß\mÍãïÁ™¦+rúÍW¹Zn|ÌN è7W³yäl&áJ |~òV®–+ÈúÉÕl×j (P|‚Ò4 @`òƒN~ û:Á§f·?xàéGÓ7„6-$É“}MÖõà䯒f8{çÍÛ?Ôõ:î{â‘[Cífê$‰Ÿì¦_Y}ê§²LÍK€: ¬ŒËïó.¡u ¾3U õ/:Z¹Öʹ^>rµ5cè7W³ål&áJ f& ÈÕL•Àx ÈÕñŽÝÈ‘«­¹ü=8u%PžÀ òU®–3È‘«Ù\r6“p%P®@¿y+WËÙ dýæj6kõ„öc… }N€ª% ´Zñìé4·ß~ûÓ;o¼öƒO7Ÿù¡f’üV:ɳ=MÔiPþ¡™þÐþã·¿yæ®Ùí÷tê^vûwܱ/ Éõ×I’/îºiûßtìWr‡ºÆ©dVÓ @ vÇO~ç¸÷†7¬øÑÚÝ C •[­kåZ?¹Úž±: *W³•äl&áJ`ðƒÌW¹:øø˜‘@& W3 Wã- WÇ;>vG d®¶æô÷àLÖ•Àà™¯ruðñ1#L`¹šÍ)g3 Wå *oåj9ñ1+L`P¹šÍçZ}¡=ÆX!hp† @€ê LW÷hNVTàŽÙÙHÇü?oŸÙü[Ëñg1¹ }sæO†§ŠÎ5¯Z_š|>4Ãÿ÷ÿuß­{÷î=0¯m ¾6·…0õ/ó6’„öŸO=ã4>þvB€*´þ0?xÌÆp÷³÷†kžº3ÜðÛU8–3©ÀiÓ'…-GœÎ^yÆÀö!WFi"/”‘«Ùär6“p%0²òU®&>f!\v6©@IDAT ÈÕL•Àx ÈÕñŽÝÈÊÊÕÖüþœ)»Œ@Yù*W³ÈÊÊÕl~9›I¸œ@y+W3ÈÊÈÕln×ê ´ BϾü£!ý½òUÿ´8¡BÐ š‚TO VïHN4H·½í²cW>}nZz~ZyFLÂKBˆ/ 1òª£$ ûBLMû|5‰É—b3|©y ü·]»®}d{2סât¨‰'ª,pö¥Mÿ_rŸ?ûí¾§èoc8úï?w w5œ-Ý»ÿp÷¾{Ã}üNx2y6ýçméû¼}XT áȸ2¼túøpú²—‡³WœÎXþòEûò¡\¤¦¹ê 0ª\Ílål&áJ ³À(óU®vŽ2¹šI¸o¹:Þñ±;™À(sµµÎ"áJ ³À(óU®vŽ2Qæj¶9›I¸èN`Ty+W»‹^2Qåj¶þ ¯çv× §ëz®sÞßÿ¯êß}Ã{úŸ¤ë§cZzµ‚ÐÖ A;i&@€õ¨Ü_ëÊážüÌ3Ï\vꩯ9þàT8jz.<þÐÔ¾ïÞ3;ûÌpwaµNâÔIH;ÉP ZNÜê\ ZލY  @€ @€ @€ @€ 0zÅ £ÁœsÙǶ&IrÕÂçîS… ~  @€éœ6M–Ø»wïôÏCi‡ÖŸ1§1 Œm @€ @€ @€ @€ @€¨©ÀŸ]ÿ³[ӂР tÁ€BÐ n  @€……Ü @€ @€ @€ @€ @€ @€Êh„Ư.kþ‰›W!èą̆  @€À(ƒŽBÝš @€ @€ @€ @€ @€ @ Æ BŸ¾BÐg£ @€bŠA‹yéM€ @€ @€ @€ @€ @€ @ ö¡ AðSd  PÅ õ‰µ“ @€ @€ @€ @€ @€ @€±¨mA¨Bбú9´ 0 ŠA'!JöH€ @€ @€ @€ @€ @€**P»‚P… ýIv, P®€bÐr}ÍN€ @€ @€ @€ @€ @€t¨MA¨BÐ? š  @€¥ƒ.%ã9 @€ @€ @€ @€ @€ 04ª„Ư¾ûº÷\54P  @€•P Z©p:  @€ @€ @€ @€ @€˜\ª„¶ A[g›ÜÈØ9 0jÅ £Ž€õ  @€ @€ @€ @€ @€ @€ªVªô…ÐúB€ô! ´›'@€ @€ @€ @€ @€ @€õ·‚P… õúùsZ 0*Å £’·. @€ @€ @€ @€ @€ ГÀ¸„*í)| @€=(íÍ @€ @€ @€ @€ @€ @`´£.U:Úø[ÔM@1hÝ"î¼ @€ @€ @€ @€ @€ @ "£*UZ‘ Ç @€$ t‚‚e« @€ @€ @€ @€ @€ @€@»À° B‚¶û»#@€†# t8ÎV!@€ @€ @€ @€ @€ @€JVA¨BÐ’hZ @ £€bÐŽD: @€ @€ @€ @€ @€ @€ã.PvA¨BÐqÿ °? PmÅ ÕŽ¯Ó @€ @€ @€ @€ @€ @€Ú”Uª´6?BJ€ÆV@1èØ†ÆÆ @€ @€ @€ @€ @€ @€¢ƒ.UZ4ú @€e(-CÕœ @€ @€ @€ @€ @€ @€ÀÈUªtd!´0 °@`zÁ½[ @€ÀÈ>þÙ¶}.Žlýq[ø$áŠ7Û®zÛØ¶»‰m»G•îĶJÑl?‹Ø¶{TéNl«Íö³ˆm»G•îĶJÑl?‹Ø¶{TéNl«Íö³ˆm»G•îĶJÑl?‹Ø¶{TéNl«Íö³¼+ýwïòï ÚQ*r'¶ ä"ÇÛEP*òHl+ÈEŽ!¶‹ T䑨V$‹ClA©È£ªÅ¶"a™øc´ BϹìc!I’«z9ŒBÐ^ÔŒ!@€ÊðfвdÍK€ @€ @€ @€ @€ @€ŒT ×7„*iØ,N€," t @€ @€ @€ @€ @€ @€j-UZ¸;¨š€bЪEÔy @€ @€ @€ @€ @€ @€6n B‚¶±¹!@€ÆH`zŒöb+ @€@I’lÍëvîOÄ7¾üÄäMy}´ @€ @€ @€ @€ @€ê&Ð*=ç²¥¿Š¹øïb*­ÛO„ó @€ÉP :Yñ²[ ¾xã•Wç1ÜýÛ¹*Ä 4I @€ @€ @€ @€ PK´ ôê´ 4,,UZˇ&@€%И¨ÝÚ, @€ @€ @€ @€ @€ @ VAhZü¹5›B!h&áJ€Œ³€7ƒŽstì @€ @€ @€ @€ @€¸@ö†Ðtâ˜~ß:ðLH€°€bЃšŽŒ^ Ù’ïÿ¯U~?Ýïàþ‡Âª´wëO*z¤±LÿTà#¶íAÛv*݉m•¢Ù~±m÷¨ÒØV)šígÛv*݉m•¢Ù~±m÷¨ÒØV)šígÛv*݉m•¢Ù~±m÷¨ÒØV)šígyàáøÇ!I¾Ðþt2ïÒŸÓUéÎ[|R±­îØŠí$øÿÉíQ’·íUºÛ*E³ý,bÛîQ¥;±­R4'ç,­‚ÐÉÙ­ @€uP Z÷Ÿç'@€Ê Ä“®õ ùKgmøHŒÑ/d?”ü•ð…xò••øMb›EõŸ®bÛîQ¥;±­R4ÛÏ"¶íUºÛ*E³ý,bÛîQ¥;±­R4ÛÏ"¶íUºÛ*E³ý,bÛîQ¥;±­R4ÛÏ"¶íUºÛ*E³ý,wýeò…_ýeÿ® ]¥wb[8.v ±]L¥ÏĶq\ìb»˜J5ž‰m5â¸Ø)Äv1•j<ÛjÄÑ) @€Êh”7µ™  @€ @€ @€ @€ @€ @€ @€úP Ú¯ ñ @€ @€ @€ @€ @€ @€ @€ƒ–ˆkj @€ @€ @€ @€ @€ @€ @€@¿ŠAû4ž @€ @€ @€ @€ @€ÿŸ½{²«*¾÷¹7!‰4Ï"j@_ ½˜àèj[­îQ‘"©¢R$8 vÛê¸Ô^­3mëàÒ¦gÙ-š Ñ„;© ‰˜ÑÖ6£¢íjÓ¶£àcpäåEPžJ=»hQ ©{êys«îï.Kî=û;ßþöïTýu×— @€Ì¡€fÐ9Ä•š @€ @€ @€ @€ @€ @€ 0SÍ 3t? @€ @€ @€ @€ @€ @€ @`4ƒÎ!®Ô @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€#;æ¤J€ @€ @€˜G=ýƒçårOnZrJ7_¹ñýMcØbw÷ÙG×–ÖŸCñ‡ù‹®ùxG„˜ŽH!Røyé–ÂCŒ·¤2ü8Ž–»FF6æë^ UÀßÊÌžlïÀúKSH¯xt–4Îi }íÑ×}&@€ @€ @€ @àÀÔ̶v%@€ @€ @€h._cèmÃ7òú‚o]½zðÈpP8/¦ðêãS÷5‰áÁ5†B~÷Û µÈ­¡‹ãÞÞþÁÿU†4´çÎÛ¶ïܹóþ}ïwe~ ø[™îóëé?÷9)¥ òßÕoÿl~›+¦%¿}ï  @€ @€ @€\@3è @€ @€ @€ØŸÀi}}]V[ú7yâçÚÜä¹ôw]žû‹ÞÿµÜã6þ}Ø+Š_±ìÐÇÞ±ºpèŽò¾wîj4îÞÿ®è ®®®zˆµKsûô¾ Aà” @€ @€ @€æ•@þ÷½ @€ @€ @€´—@OßàYËŠe׿&Ð׿^µ¥³S]<¼ˆñÏ(–þ뙽ëž;;9e!0?Ž8jÅ…¹ôßÏÏêUM€ @€ @€ @ ó4ƒvÞ3wb @€ @€ @€@û ôõÕzú/µØÈ“@;…æi¡O®ÕãWzû×ýeÎïû²¹@–³­V÷­}Iþûú‹¶.Rq @€ @€ @€ÆÓŽ8zŇyÑ' Sà}ƒOõ°3„xøÂ<¡S @€ @€ @€X¸þ•ã…ûlŒ @€ @€ 0o?zÅ{ó˜ÂÓ§PpR¸&…ðƒÓ-©L·ÇX<>„tlˆñr®§O6WxAOÿàµÃ[†þûdïG`¾ ¼¼¯oùAEü\ˆá‰ó­võ @€ @€ @€ ‚fP¿ @€ @€ @€T wͺ3rSç['SD é硌‡½{7oºe¢{zÖœûük¯Ëž9fÑDq»þáUç~ÛæM_xØ5o ,3žX ÿnŸ° ä @€ @€ @€:P èÀ3;2 @€ @€ @€@›twŸ}t(â†Ü´™‡y6å†Ñ«ÓýéYÃWnx_³FÐñ,ÃWnº&OúÜ;Z¾ ¥pCóÌ!õ"Ô>ÙÕ×wpU¬uóI`uÿàSkañ—ó˜FÐùôàÔJ€ @€ @€ @àQšAâ# @€ @€ @€@ëêK]˜[1¯Ú1…ò=Û7¼bddãÏ«b¾~ÕÖ_/ïÿõÉ)¥Ï<üúþÞÇ:".{ÓþÖ\#0zúÏ*bøçü»}ì|¬_Í @€ @€ @€üN@3èï,¼#@€ @€ @€h¡À™}ç>3O+<¯jËÜÈùñáÍßU7Ñú¶mÛ~uû­7õ†”¾:QÌC×ó|Ò·ž~ú9•Í©Åû/vX¹rå¢<ôâX‹Ül}H;Ö¨& @€ @€ @€¦& tj^¢  @€ @€ @€fI VÔÿkˆ±Ö4]nà¼ñúk_×4f‹»víÚóÀ½£gæÆÒ6 áÐÅ×ßÖ4¦ÍÏì]÷ÜîîcÚ¼LåÍ‘À™O<ö)'}©ˆñÏçh i  @€ @€ @€8šAº-  @€ @€ @€.ÐÛ{Îñ!¤WT9”exûîÝ»G«â&³¾cǧn ¡\SC|cw÷ÙGWŵëzQ«êKߨ;0¸å̵/h×:Õ5û«ûÖ¾¬3O¸}Þìg—‘ @€ @€ @€) ô@êÛ› @€ @€ Сe½þÚ˜_MŸÂFC_i3ÅÅá-WüKHéªf·åª–Õ—,~u³˜v_Ë´õâšz¨})7…~kõšu¯=ãŒ3–µ{ÝꛞÀgœýû½ë/Eñ™ÜÌ|äô²¸‹ @€ @€ @€vÐ ÚÎOGm @€ @€ @€(°råÊE¹YñUUG+ËôWU1ÓYO¡|wÕ}±H}U1óg=>»(Š.~Ìá·¬îüàªþõOž?µ«´™@WW×’ž5ëÞ¶xÙâësÜ*¬›%³F€ @€ @€ @€@[ hmëÇ£8 @€ @€ @€ÀÂXñ”_šG‚Ýìd)¤¯ÌöTЇöŸšB¸ú¡Ïûÿo7Þ^vg¹çãŸo4~=OÏÓQe¯î[÷¢¢?ûxO™úÁÓåß/ï;ÿéq黵O]Ï @€ @€ @€¤€É RßÞ @€ @€ @€Î8µêØ£{Óç«bf²^ìÛUuþ"m¼ta¿b|JŒÅ‡­-ýqžú?Îì;÷Y ûÀóûtÝÝǵb×”AS+CxËÖÍC¯¹¶Ñx`~+¨ž @€ @€ @€@g híÌçîÔ @€ @€ @€"ÐÓsαy*åS›ožîؾuã¿4™Ùêðð¦[RJ?lž%ž²j`à¸æ1 e5>&?—×ÕkõÿÓ30¸«§o𬮮®úB9ÝB9G½^›ò÷»ù÷ü»e^0²yÇŠƒs @€ @€ @€èD)Y؉HÎL€ @€ @€ 0;iÑ¢?©Ê”RüÇ“Îùë‹U;ÄtÐ «bÚz ñE±G,_ñ£ž5ëß¹zõº£Ú;ä<£)”ï¹ïÎ_œ<ÒúZ‡œÙ1  @€ @€ @€,XÍ  öÑ: @€ @€ @ ýbÏ®ª*Ç|¡*f6Ös£Ü®ª<1¤çTÅ´ßzÚ”Rÿ¹&µå¦Ð'Ä"üu±8ÞÜ;0¸ñÌÞuÏI>÷¶T`w™Æž3¼yã»vîÜ9£ßƒ–Vm3 @€ @€ @€&¨O¸b @€ @€ @€À¬ ¤“BÈížM^e™¾Ýdy—â·ª“ÅSªcÚ+bdËÆïäŠÖžqÆÙoZ´´þêãëCŒOšv•1.ÏW_׿¦Ðo”e¸tÏ]¿Ø¬ÉpÚ¢svc á'©,ÿj$í¹<4cs¶‘Ä @€ @€ @€´\@3hËÉmH€ @€ @€è`N¬:ýh?ªŠ™õ=w–7-;´¨JU9É´*ÁZß±ãS·å½ßŸ>°ªð¥EˆçÇNÏŸ+=qÍñ”¢ŸXzèc?°ºðï÷–áow4†nš8ÞJkÒ¹ô¢Ñ{î¸xÇŽ÷¶fO» @€ @€ @€ ÐJ|ÑÛÊ2íE€ @€ @€ 0ß^Ú×wD ñqMϑҟn ý´iÌ,-îܹéί›¥Ë͓˺»Ï>ºYÌ÷NW Ý“R¸ø{FŸ<¼yè½A§ëè> @€ @€ @€í/`2hû?# @€ @€ @€„À’rÉ ¡Vy”ñ)“eeÔìŒï÷¬féâ¢úqy}FÍ“Íò·rí7S<ß¹råÊ =áÄž¼÷ù¹±óEÓ®!ÆZ aU-ÔWõô~7¥tÙiφ]ÆÝÓÎéÆJÒ1…Ëöܽ÷ﯾú“wTÞ € @€ @€ @€y/ tÞ?B @€ @€ @€Ì¢—WUšbüQUÌl®§nÊÍŒM›A‹zxRÞóë³¹ïε{÷îÑü³%×±å̾sŸY+j¯ÏM¡ƒ!†C§[[ŒñùçÒ#ÃÒ÷æÆÐ {Ëò²í+¾?Ý|îÛW ,ï¹»V|Öpº[h4Æöp… @€ @€ @€…* t¡>Yç"@€ @€ @€´™@ãÑU%Ån¨Š™ÍõÜzsU¾<ññU1óyýªÆ¦ërýo<õÔuñ˜ÃÂ9EÏ!ž<ý3ÅCb V/Š?íXÿù0.nløtÎ×ʉ¯Ó/¿ïܶmÛ¯ry[Û¸D¥ @€ @€ @€ 0GšAçVZ @€ @€ @€G ¤ŽÎÓ'yñQŸRozÔ¥9ý˜kº¹²¦X>§E´IòÏ}nã=¹”¿ÿ9³wÝsë‹l íÏŸ—L§Ä<%tüa¿$ÔÂKzú×ßRù·i4~lddè—ÓÉç @€ @€ @€t²@Ñɇwv @€ @€ @€Ö ä¦ËêÉ 1ÝÕºŠBîOLwWí—§•vD3èîںñë[7½òî±{ŸÊò­!¥ë¾>Õ÷¹-ô¸XÅ[zú/ïé_û連C< @€ @€ @€:YÀdÐN~úÎN€ @€ @€h¡Àd&ƒ†ïkaI¡(Š=UûåFÆŽk}È䳯íùýóÏ[Ý·öűVœSèÎÏ©öPÌÿ»$ }UµWõö~u,ÅKoþáwþçîÝ»G§˜G8 @€ @€ @€:JÀdÐŽzÜK€ @€ @€8€),«Ú=…²¥Í e •Í )ÄŽm}ØóJ#+þaxóPO;.»½;7÷þôaëSãÖŠðÉãžrÒM«û×_ØÝ=pÌÔ“¸ƒ @€ @€ @€! ´3ž³S @€ @€ @€¸@ ñ Ê"ÆZ;4M¢4OÂ\RYw oºedˆÿrûÏnZʱ³B _˜ÑñcX^ÄðŸëKߨÓ?øw3Êåf @€ @€ @€,Pú=—c @€ @€ @€´™@Šé ÜZQUk'ƒ“hM!,ª(º#—wíÚµ7|ëøOwßÚ§Õ‹ÚëóûWÆ›HŒ±R:u:÷º‡ @€ @€ @€ ]ÀdÐ…þ„ @€ @€ Ð.©z2h ­ ZaO%OLšA+¶7®øþð– oþiyï1e ¯ÎWÿ¹âË @€ @€ @€ 0“A§€%” @€ @€ @`ú1O •“A[Û šâØ}!Ôš*Ï2Õ ÚTèw‹_k4²gøøøOwïºS-Šç‡Î1.ý]”w @€ @€ @€ 0U“A§*&ž @€ @€ @`ZyêçâªÇÊILê¬J2…õ8VÜ_ž¢`µi߀í[7~cëæ¡×ŒÝç1yõ²}#\!@€ @€ @€ @€É øâz²Râ @€ @€ @€f$SnôÌc6›½êõ±–~•jeÞ¯ùdÐÓÞf5[›@ ¯¯Ö–ŠtAJéÔ+þi\&@€ @€ @€ @€Zúe:p @€ @€ @€ÎH1Ý+ºAÇb\ÒJ¡b,ïWÑ šëy •5Í÷½V¯^wT<(ž—›_—÷ŠPñÌçûyÕO€ @€ @€ @€Vhm…²= @€ @€ @€²@¼»Š¡ -m!-­ª)¥pUŒõzÖœûükäùŸgåg½X¨ß  @€ @€ @€Ìž€fÐÙ³”‰ @€ @€ @ ©@ÊÍ ¹U°É+†Zesf“Û§¼ã$šOcÔ :ì©§®{ÌÁ‡ÇµÙñ‚ülŸ=Aؤ/§~<é` @€ @€ @€è Í ô°• @€ @€ p@Rž Ú¼4”“iΜÅCŒïWTä‹)U6ƒö®|qÓ!©feyÏåçwîÜtç¬$›f’ÕýëžC‘B¸tdó†ì/Æ5 @€ @€ @€ø7Í ~ @€ @€ @€Z"›þî® Š7ƒ¦2,‰U£ASøeK€Út“îîcêK¿6?œÿ˜Ÿß13-3oü¼tï}¿úÄöíÛsC¨ @€ @€ @€T h­²N€ @€ @€ 0+E¨ž RZ:+›M6IQTî—B¼u²éR\ÏšÁ?ŸRX•'Îè{ÅÜš{ÃÎT–—Œ4®øl~?þÙ‹ @€ @€ @€&)0£/m'¹‡0 @€ @€ @€Y Þ]Åb:º*f6×c ËCŸÒRÇ4ƒžvÚ¹‡,;¤6˜;5/È  Ïxк§ùóHw–)||¬,/ÝÞ¸âúæ±V  @€ @€ @€ @`"Í ɸN€ @€ @€ 0«c)þ´VÕX˜Š³ºiE²\Nå~1• ¾´»wðÙõúxhX››vSõ˜*XÇÇ~~/O½ôŽ´gîF£² ¸*Ÿu @€ @€ @€tº€fÐNÿ p~ @€ @€ @€@‹jcáÚP4ßl2Í™Í3Lq5¦Ü Ú¼õ±LÅ‚l=±¯oñ qYoÃrèó§(·¿ð2¤ô™±/Ù¶eÃçöà @€ @€ @€LO@3èôÜÜE€ @€ @€ 0EnøÎŽ}ʉ{cŒ~G•lΜbâ„ç –+š·‚æVѱ±f°EÛÝzFßàŠE1¼.7æž—Ï~ÔŒ Lá×!–—¥â²m[†~8ã| @€ @€ @€ @€À>~ѾO¤  @€ @€ @€f °{÷îÑÜ úsŠgL˜&…LJ¾¾Zh4Æ&Œ™¥…ñɘ!…åMƒæFÇááM·ÌÒ–2M\Õ¿þ%ELoˆ)œb¬Í¸˜®Ë9.½ëŽrèsŸÛxÏŒóI@€ @€ @€ @ ßÒ@IDAT€ hÆ @€ @€ @€À¬ ÄpmÎ9a3èøÔО±ÅaÎ0Ÿ^.zB¨Å¦ƒAó¤Òñ†Çɼ>šB9ó)›“Øiì¾½¿˜D؃!§Ÿ~ÎáK®¿*8?7½>%Ï9}ð“½?qe éÓ± —l½rèóûYw‰ @€ @€ @€æ@@3è JI€ @€ @€ °”âuÍÛ/ó}õÚ ùÿç¼4ÔÜgÿ…þæjnüNÓ€ß,oºd2q­Šéî]wJ½/Èõä) KgºoJáW9ÇÇFËò²?ši>÷ @€ @€ @€ @€ÀÔ4ƒNÍK4 @€ @€ @€À rSáµãÃ)+^/Ìë_¬ˆ™ñr*‹*K¯wž½zûß‘@ß7;e§kó3»dôÞ;6îØ±ãÞÙÉ)  @€ @€ @€LU@3èTÅÄ @€ @€ @€L[ ÆòºЦ÷Ç»rÀ…MƒfeñÁ}šf+ãWš´áb¡ͅ+ŠNi,…¸#•å%#ÿXm™ @€ @€ @€Z  ´ȶ @€ @€ @€ø7‘rÏw{‹e¿ÈÓA;¡I ÏëêêZ²k×®=ÆÌp¡«¯ïàÂsšM)M!ݶ­1ôÍn5nOáö2¤Åѽ— òÆùS¸J  @€ @€ @€ °ð4ƒ.ügì„ @€ @€ @€öh4ÆÂÀú­¹ ×7)jÉaËŸð¼¼¾«IÌŒ–ŽŒKžblþ]Y _È›äA› ü•·ËT^ò³´çН5÷-ðÓ: @€ @€ @€æ¥@ó/¸çå‘M€ @€ @€ ÐΩLWÆ"6k µ²xY>î¹:GãiEUò˜þ¡*dÞ®§4–k¿ª,ÓGFÿ÷¼=‡Â  @€ @€ @€ Ð!•ßqwˆƒc @€ @€ @€´H`8Ý÷¥­ͶK1¾îÅ}}‡6‹™îÚêÕƒGæ/É^Óôþ”½gïUMcæáb é—¡L=P†ã·nêÕ:¢’  @€ @€ @€èHÍ ùØš @€ @€ p±ÒÖfÄ;¤¶ôÍb¦»…·„nvnVÙ±ãS·5‹™Ok)¥o¥PžwûÏnzÂÖ+‡Þ±£1tÓ|ª_­ @€ @€ @€ètz§8? @€ @€ @€@ëR™®ŒµxA³coîîî¾xûöíw5‹›ÊÚé§Ÿsxný³ª{r}­Ši÷õܺ7İm,”¹jË_n÷zÕG€ @€ @€ @€ ˜ :± @€ @€ @€9ilürJáææéãá‹–öÁæ1S[=èàE—ä©£¿×ì®ÜDùƒ\ߛŴóZLé¶<ô}atìIÛ‡ú®Ú¬´Ÿ—Ú @€ @€ @€ 0“A'£$† @€ @€ @`¶Êܰø¶ŠO5O_»z`ýF6oø@ó¸êÕžÁwåi£çVE–)¾+Ǥª¸v]¾rã¬6жë9ÕE€ @€ @€ @€N0´“ž¶³ @€ @€ @€ÚH`dËÆÍy gåÎü…ÖE«Ö¬_3“ÒW÷¯Œ!¾»*GžVúOÛ®ÜpeUœu @€ @€ @€ ÐJ“A[©m/ @€ @€ @€À, äÑ•Ë{ûß2‹)g”ê†ë¯½d÷îÝ£SI2VŽýi­¨}+ÆØì{«¢ˆióêþÁ“ï¸õ¦wíÚµkïd÷X¹rå¢ãNxæE9þÍ“º'í}ë¤âÍ+…ð·2¯ÀK€ @€ @€ @€À¬ 4ûR}Ö7“ @€ @€ @`öò¤Ë'„>8{g–é÷Ž?þ£aŠÍ W56]×Ó¿þÃyçÿÔl÷Ü,cï8bùŠ?ZÝ7ø¶‘ÆÐךů9°öµT{~û¼ªØ[/?4|å¦k&+j> ,„¿•ùä­V @€ @€ @€f_@3èì›ÊH€ @€ @€ 0½÷Ýqa}Ùaý6ìUÜ—cþ(ÖÂW{ú¿•Rº<þöhJ7Üöüì˜rÑãÊ¢v\#ú¬< ò¼¼vRn–Ô+¥ðO7^ÝÛ',ˆ @€ @€ @€´X@3h‹ÁmG€ @€ @€ ðHíÛ·ßµºÝ©!Æ/æÞÍ£¹ºÿOyNèäŸñ‰¢aqîø<&, ¡Bñ›ðIö€>Go-Sÿî)N5ýÍVþC€ @€ @€ @€æ\à¡ïÃç|# @€ @€ @€˜H`dËÆïŽíý“ÒÏ'Š™‹ë¹ô'qô®¡›æ"¿œ @€ @€ @€ @`64ƒÎ†¢ @€ @€ @€3¸ª±éº8[ךÒMe /ܺõSß›qñ @€ @€ @€ @€9Ð :‡¸R @€ @€ @€LM`ëÖ ×Ž…NÉBG¦vçÔ¢S [ï-ï;eÛ– ?œÚ¢  @€ @€ @€ @€@ëê­ßÒŽ @€ @€ @€&¸jóæ›ójOOßÚÓcQ\b|ÒÄÑS\Iáe(ß8²eãæ)Þ)œ @€ @€ @€0“A½  @€ @€ @€š 7®¸ú'å}'†”Þ”'y^“cËfñM×Rø×²L¯ùå­7®ÐÚTÊ" @€ @€ @€´¡@lÚ”D€ @€ @€ @`—÷õ-?(.Yc<#O =>¤°<ÆpØ£S~å/Á~BüjéšÑ½áKÛ·}óÑq> @€ @€ @€ @€ù" t¾<)u @€ @€ @€ì#ÐÕÕµä°åË—‡±ƒŽŒEºkï½£·ïX²÷ŽÐhŒíì @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€m&Û¬å @€ @€ @€ @€À ôôŸûœj¯l–¾ŒecÛæ+v5‹±F€ @€ @€ @€@kê­ÝÎn @€ @€ @€ p RˆO-b¼ ÙþE*¾—×w5‹±F€ @€ @€ @€@kŠÖng7 @€ @€ @€˜Š@oï9Ç÷ônšÊ=b  @€ @€ @€XX&ƒ.¬çé4 @€ @€ @€,®®®%Gµâí¡ˆïˆ!=°@Žå @€ @€ @€LC@3è4ÐÜB€ @€ @€ @€¹è]³îå)ÆÄŸü›}4ƒÎ%¸Ü @€ @€ @€Ú\@3h›? å @€ @€ @€ Ð9==ç-º8İ*vα” @€ @€ @€ Í @–  @€ @€ @€ 0×'öõ-~Zmé[CŠcX6×ûÉO€ @€ @€ @€ÀüÐ :¿ž—j  @€ @€ @€X€O+–n!¾4Oõ"@€ @€ @€ @€À>Å>W\ @€ @€ @€ @€– äi G¶tC› @€ @€ ðÿÛ»óøº®òPØ{mIŽí $q€@˜B™\Zh)˜¡q"ÅÎ`™Rzé-.½ô–¶Ð¹å^ÚRúb;‰ma91CH‹i¡@!Œ S[ LÈh’8Ž¥½î{œØ±déì#éÈ’ŽŸóû9:{­wMÏÞ{ùåõ"@€˜W’AçÕí2Y @€ @€ @€ @€ @€ @€ @àpè>Ül½ @€ @€ @€8\rU]—»ÒŸ5[®ÒçšÕ«#@€ @€ @€ @àÐ H=ôæF$@€ @€ @€ @€À¬\1xÙcàÆ @€ @€ @€æ‘@9æjª @€ @€ @€ @€ @€ @€ @€ÃN@2èawË-˜ @€ @€ @€ @€ @€ @€˜O’AçÓÝ2W @€ @€ @€ @€ @€ @€ @à° zØÝr &@€ @€ @€ @€ @€ @€ @€æ“€dÐùt·Ì• @€ @€ @€ @€ @€ @€8ì$ƒv·Ü‚  @€ @€ @€ @€ @€ @€ @€ù$ t>Ý-s%@€ @€ @€óO S~Ñ)ëhå J­Í@Ìl;KÙÛe'?3Ö6SO~Ç tò³6v­ëNÛÇ[£2 @€ @€ @€) tO±f @€ @€ @€½Ëúû:¶\ôs]E:½Èù´œÒiEʧEe\'Ff˽Qv{*òí¹(n‰ŸŸ©ÊOçr÷§®Ü¸ñ{{;™ÿi¬ã¸âˆg§²ëy‘ŽsVÌ󄜋R‘NÈE>&¥âžXÏÍQwsÎéæ”òUU|räÞ=Þ¶íò›æÀ&5…óV­}t™«•©,•s>5ÖwZ¬«ñsa¬÷‡ÑÙ÷b½ßM¹ø^•ÒWo¬îüÔàà®I 2Aðyýk—–]ùœ"dzRijÏL<'§ì ÏÅ­ñó¶øsCNùUQí¸ãÆïzÇŽ÷NÐݬŸ}öš#<®\Y¦âYñ¼œ“99ž‘“c=ë=ñ3žûÏñµ*ÿžªâ߯ÿïë>qíµ×î™Õ‰7|åÊ ÏÈ=ñäôôxNˆŸKâ>>ž‡%Ñìø¸_GÅ»¾+§âŽ”Sã½¾=Ö|G”Å{‘>½gdäãÛ7|£É³ZÕ){Ö¬"¶qðÃmï]¶lÙÂcvÚŠø;ä§c_¸¿ˆ½#é¤xßÊØ/nŠòã}»1޵닪úÈó½×´kÿmã­Ó @€ @€ @€À!ˆß#ù @€ @€ @€&/°b嚟ìéIÿ#V.Ø›6ù."w,-~Yñw#»w^rÅWÜ1….¦Õä…ýý9ºkÑ/ÇÎu<#Ù&ýiÆ".}!þ³½(F·nÚð¥iMª¦ñy篽 +å?œ(,(ÿ|hãúwW¿téÒžÓÎ8ëÜ¢,^õ/ˆõ¶þ»¢\ÜVùÝ9Ý÷Ž+6nüÎxý7+Û›ð•^P–ecì¥Íb®ËwW¹ø»´§ú¿CCn8¸~j%-Xn Ë߯÷sW_üsÝEùŠH íţNj™¨,’½n‰§æÒ‘áâ=Û¶¬ûòDq‡ªü%ýý'‘½$Ö±,žåeñóþÄÜiL Þ‡Hh˯rµíö›¾¿)’y‡§Ñ][šÎ…=«oÕEÏHE×eã-(’O û#Æ«{ ,^Áü­&õ‘·[|qhãºþf1}ýkW¤2¿µYLU¤?Úºé’uÍb¦Sב{oQüÍЦuoŸÀ¥ì;๑4½&îñÊxÏŽ™ nÜâØ/««œ·\‘w]^ ŽŒ¨ @€ @€ @ £ZÿG3X @€ @€ ЊÀòå³ø˜î‹â”À_Šd–§µÒ¦•˜û]òûªÝ;ßx(’B_Ö?pò©øõ8±ôÕ‘˜3©D¾fëi$†ÆI¢ïßSüÞLŒØ{þš_Ž„ÊwN4\U¯Ú¼þ D¯¾Õk^QåŸÇ/‡>QÛË#©Ø|×íÕ«®¾zýÝ-´I}ç¯ùÍ"•ojƒõ½¹Ê9”wý~;’¡j-sñ÷C›.i$¯îÿ¼¨¿ÿø#ËÅœ·¿pZ_òæ‘â¾7L%ÁvZÃFã¾¾‹Yôt½!Þå_Šõ4KBœÖP‘ ŒÕŸ^ÿŸ_]w¨ODk{ÖýIÄ]ÿ2-Ð&ãÝüt<³ÏjRô®ºø¢2umh[Ùëš$66kÚ´®£÷Þ"ÿA$â¾y,À¹+×ütWwZ¹÷[7µë|]Qåÿ¹eóúL­½V @€ @€ @€óU œ¯7o @€ @€ @àÐ ô­¸pñ1]?ˆ“ÍÞÑÎDÐÆ*"mq$ʼ¦\øëzW œ;S+kœŒýÿÙ]éÛE™^߆äÄQSÝ{Òf*ú»Ëò?úV üC#ñiTÀ,\4Näì[µvC*Ê÷¶!´±‚2ÜVu|úçsιà„fKzéK/<.ƾ2•åÿm“õÂT¦ßé+]Õè»ÙØ3Q×Hä‹DÐ/ÆZÚ”Ú˜e:¿ÌG|½wõÚ×ÏÄœÇëóÜÕ«O‰çóÿ/zºÿ+žÙ_‰õÌX"èÞÅñüýãigžõŸ½ýMÇ›ïTË:aÏšêÚçZ»Ãrï]¶¬;N}S$‚~¢}‰ ;›Î*ÊòÃ+W |hÅŠÕ'͵{m> @€ @€ @€3' tælõL€ @€ @€:B`Y#¡eÕÚ¿Š//¬Í£frQ‘¬xRd^‰jïŠqÚú{Œ•+/<óô3Ïú·èÿ·£ïž]GJÝ‘üóª#ÊôåÞþ5Ïɱšõ½båÀÓŽïZüùHö»¨YÜTêâyø©‹{>qNÿÀ©ãµoœ†wÄÑÝ_ˆ±Ï¯~:eaû¢…Gõ|¢‘è:~&Ó6¯~¥;—‹õœ2™v­Ä6’1ãaÿ?‘¨Ü8Ñ5^ƒ™û¬<ÍKº"ù4 _ÓwæF:¸çxfN+»Ò?÷õ¼üàÚö•tÊžÕ>‘ÙíépÜ{Ïé_ó¨%'žú‰H^s¼kÝ3rRZÞ½hÁÇúú.~ÄŒô¯S @€ @€ @€9'ÐÖÿ‰bέ΄ @€ @€ @€i ôö®yØñ'žzM$ýú´:šdãHžyu$ ®+úû»&ÙtÜðÞUkâÄ/DžÝOŽ0S…©8¾ì*?Ú{þÀ+gjˆ‰ú=·ÿ¢Ÿèé.þ%2 Ïœ(fÚå)=®§+}lùò壒 'hvw§;6í1&ê O<¾\ÜHžñO#y1ÅßD2t[žÇ‰&‰Ê¿Ïý{Úõܧoõš59¥+ã}^<¶î^/Œ4ïÍñNþÂLŒÙ){ÖLØÌFŸ‡ãÞÛÛ;°¤§L¿o~z¦ÍãïÊÇ ºþù%ýý'ÎôXú'@€ @€ @€˜}É ³Ì€ @€ @€ÌI¾U=#‘®„¾Y9Ù2Ö.ZÙµè²À™ÖI‰½«.¾(ù}ÑÍ‘³ÝS–éÝqêã[büCò»™åýýí*»·ÍôI® ϸ9g,>桯ÝgÛ8¯«(‡bìûÊfêgã‰ÇWÌTÿ~#±õgRWZIW­>‡U.ò 9wLe^1Ê+V–‹¶Äé– §Ò~¢6½«×þF‘Ó%±Œ™9¥p¢Ç)oXæÛV¬X}Ò8ÕS.ê”=kÊs¬áá¸÷žÕß¿ \¶ÆþèCu;â]zü®E—ªñŒC€ @€ @€ 0{³þËÞÙ[º‘  @€ @€ @€‰Î[½úôTt}l² ”‘w}‘‹m‘œòÍ\7ÆiŠ7Fúã±)ç'©|B‘ò#¹´q:g‹I‘éü¾UŸÚ´îí͵YùÊó^I‰ï˜Vùîï.çoG»UEñ"Ü‹®¥¢zH¬íQþÈXà "›íéÍÆ[§>þVßù{†6¯{ãØº¶^—éˆÅyñ–XñéuýFÂâ=E‘¿8ÿ÷êîˆ?+¨kuÒg]?Ñæ/ìï÷âÝ»«8)ôq}BM›=1öWbìc¬E’ââÿôx>ÎÛ%5mGU§œ~? ÖÅŸ¸]íý„Ë£#±u[ôÚ413ÖòÙœ«÷”##W½ÜóýëïkÌdYÿQÇ•G<:Ã^^äòâXëéòÚOJçÿðSñxQü™öºúú/~i¬å/ãÙ­z_@#¡5îËgŠœ¿÷æ¿S.v梺«Låp•ò1ñ ?¤,òÃs*–F›gÄ};m_ÛV~ÆTŽî^¼àmÛßJ|]Ì|ٳʑ2¶“bÏxëÉ9wߟ';^íþ²qÛî¯-Š‘¾ÏÚ×Ãrï íÇ• ÿ!öÞŸk¾ŠûýÕx#˜‹ô£Ø «hwj¼sñåGÆs0©ßãÇû÷üø^¶uÓº¶0¶ @€ @€ @€y*Ðúo|çéM› @€ @€˜¼@ßêµÛã—/n±e$«åwŒ ¿ãŠÁ˾X×&N‹{JJåÛ"yå¹u±{ësÞUyéÖMë¿ÖRüAçõ_øÔ²ìù—FÒY‹íöDvÎ;F†‹K¶mY÷…º60{R¹:úC$ñ<¤.¾QÉ?‘7X¬˜NÂNïùk~¹,ËwN8^.n‹ù?q}ÞI— ¿ýÊÁK¿:6.N¤ì>îá§œYéìˆûó¨oš¹¯},ì-1îYñܼt_ÙA?sþ`ÜË÷Þ»³ºfûöKwTY¤½ý¿(uuýZ`½¨…ĸ½]Ä};/L¯§¿¦Eµ–M[Ge.¾:<\½òÊ-ë?Sõ©wÕÚ5eÊ_k!>º¯^5´qý»[‰(¦qRáãºÿGÜ—3'ŠÙW~ÿã™×G^Úßm¾ôߢ<×Ö>+W®=+wçWÇ{=ÐêûÐèyÏžü”xß¾ÜÚ(GužµrõÀgãÙh$ËOðÉ;·l\×Ò^3A{‹'v–©kC³˜x^7Õ$üÃuï=ð{ñ÷Á)¹î}¿RqEªªKïÊ»?ö‘ÁÁÛÆ‹]¾ü¢c]¾"þ×Äü¸ñbÆ+‹þ¿6Tíú‰bppN$7Ge @€ @€ @€Ó :=?­  @€ @€ @€@Ç ´’(´Ñ9ÿ[$¬ýêЦ Ÿß_Öâ—¾þ—eúËfÉ3ûºŠ$›OmºäYû®ë~®X±ú¤8uðs‘˜öˆºØF}$Ñ\“†÷üê–-—½•øcz{×<¬<"½%æ8ÐJòbÄÝc-ݲå²oØO«ß§‘ÀXå*ÿIÞS¼mëÖu·¶2ÞŠ•Oëé.â”Ñô¨Vâ›Ä\;\Œ¼îÊ †-}â”»sãþ]ÏÇâºû‰k?_7¶~–ñÌoݵóæ7nß¾}÷Ø~›]÷õ]üˆÔS¾/LÏnר‹Ó9oÞuçÈ™$ÎÖ5ß[ß»zà ‘ØÛHêmþ‰Ópã]~ùTÞå;~Iÿ‰ »ÿcÓ¤àÄ3ùGqZîïP4鯰g5Ý É öÞñßx—·ŽŒŒüîx øã·Ø[šúV¯Y§ ¿+öÁ–NkŽÄøs¦ó 4™‹* @€ @€ @€9 PÎ9˜ @€ @€ 0Gz{–”E×_µ6¼áÖ›¾ûÜ©& ®{$ȼ02Þî¬/ažÙwþÀóêâöÕ÷,^ðæ–A«üæF"áTAãmݺþæ8­ïqâq¹gß&úk9¶èé~ÿòåË[JŸI•Çéªy$¿¼‘t×j"h£ÿÆ ©÷Þ5¼4'ìûàüòå·Þxý³'“ÚhÛ8é3NÝŒÓcóx'ˆ>ØýÞoéç'`Ž)œ±Ëœ«ßˆää×O6´1¡¡¡ 7|½ÚuNd“~°n‚ñ ?lÑ1ÝSN”l$敹øÝºq"™ö›{vÝ÷쩾Ëöÿ¡ÁÁ‡6^ò²({×å~OÅË'¬k¡¢Sö¬–:/Bì½£oSì÷ÄûõKC×õM2´ÑQŽ“ß7’FžßH ÝóøW‘„}îø5J  @€ @€ @€:A@2h'ÜEk @€ @€ @€mH ò_FRãC뺋䖿È;v ×Å6«„¿oVE¾8úËÍâu‘Dù[u1ú•+/<#zû…ºØÆ˜UUýZ$HþA]l+õ‘쳩¨ªó"ÉoW}|zÚ£O¨c}?õ‘Dtëpª^É·[ë£ޏêªËnOy䮩/ âŒçäÂxNî­>8bÛ–õŸ‹“#ÿðàšÑ%SóS-|ÚèÒ™¹Š‡æ C›Öÿõtz¿npð¾H]ý‡ëú‰µ½nEÿÅ«‹¯¾gQÏ«âÅ9j¼ºýe9ßWUë¶mÛøÃýemørÏÃoˆõÕö§é>¡ñÎNuÈNس¦ºö¹ÖÎÞ{йwd¸z~ücÿxPÍ$ ö&ÒW#}¿³êšEy#;rB} @€ @€ @€:Q@2h'ÞUk"@€ @€ @€SèíxvJå@]ÓÈFùh$·üjÄÕ&¦ÔõÕ¨„ÐFêÊ›êbc°÷®Zó„Ú¸î®7E‚Yw]\‘Óïoݼþojã&°eóúE2èªVš¤²ü­¢¿¿«•ØéÄTUzÍdOå;ޖͶ…ÿ·Æ–7½ÎÅo»é»çdZŸoä{ßÞÊØeWñÓÓ¨…Æ‘‹õÞ¡ÍëßÒBhmH#!4ïÎÇ[ô£šàžž²lñ´ÞÑ=墬=%0ÒËþâŠÁ˾8ºåô¯¶o¿tgªF^×JO¹,kßëñúé”=k¼µÍDzlïuÛª<òª+·¬ÿ̨Â)^ m¾ô“ÑtCmóTœ¸b嚥µq @€ @€ @€楀dÐyyÛLš @€ @€´_ •ÅÅu½FâØ÷ŒäWÕÅM¶>NÕüãH´ûf³v‘à‡ž¥ÍbV®¼àñtQ³˜ûëòg†ò=V7ùˆHý@äÉþC]Ë8ºíŒóÒâóëâ¦S§‚~êŠÍ—lžN´­RÎ-'Îî=Án¸¸xª'‚8ßFÒdXm?°l¼ïñxüÄxåm+ËÅ»ïþŸmë/:ÚºuÝ­#¹øµÚ>SZÞJ"ôýœÓ?pj¼1O?°ì ï9ßµûîá)%šÔ×8‘D¼¥î½ÞÛ¬+8NóÚ¢Nسj9Oì½coT¾dë¦ —Ž-Îõð®û~{ïÞZÓIWw9³{aÍøª  @€ @€ @€fN@2èÌÙê™ @€ @€Ì'F¢å9uޤ¼7|`pÝwëâ¦PŸS‘×Õµ+súùf1¹»ûÇ:šŸ¶™ó®*ÅààH³¾¦SwëÈ®ßlé4Ë”ß0qêÚæ‘¢mÉ‹»«âýuãí¯Oé#[¶\rÝþëi~‰ûõïõ]¤ãêc¦QÕk¯ºê²Û§ÞÃø-#Y÷òHðºfüÚKË"½ìÁ«úo Ê¢iâôÞRzÏL¬éÀÙÅžÑ4É{olU,9°M‹ß;bÏjq­s>ÌÞ;ú Œ¼utÉô¯¶mÛøÃèåëu=•EuJ]Œz @€ @€ @€ù) t~Þ7³&@€ @€ @€mX±rÍÒHÚ:©y§ùö[oºþ½Íc¦^›÷Œl¨;õ,,Ÿý¬þþEŒÒH{Éuû‹£÷nÝ´®>Am‹ÉÙ18xW®òÖµŒÓ,Ÿrîê‹®.n*õíƒë>5•¶ãµùàຊé¥-|RU½½…°I„TŸ­ Žç÷غ˜iÔ_»uÓú¡i´oÞ´*ÞÑ< Κ-ŠI%ƒ¶?\ŒlªwºõUQüw]9•GÖÅŒ­ï=kì²æëµ½÷À;—‹W^ú•‹Úøý“u}å"P£ž @€ @€ @`~ HŸ÷ͬ  @€ @€ @€@[º{Šsë:̹ظcÇŽ{ëâ¦Z?4tÙõÑvÇAís¾+Æþ|$7^–ŠêOO^´ø ˜(è]uñ“##©6 &Õß×¾Ýe7æ]›#…¯ö$É®ª|~»Ç~ ¿mmî7òúŠkûŒ“Wo½ù{WׯM" ïþñ µáiæ’AãÙ{OíøÓ¸þ[×}0ù–f]D²ëϾô¥¶|úéPµë¥#ÅîGÃ/Œ~_ïÐ[bŒ8Ý5!þìŒìүܸáÓÍÆlK]Îߪë§ÌyÜwºY»Nسš­o>ÕÙ{Gß­*ç÷.ißU$zþ[}où!õ1" @€ @€ @€æ£@÷|œ´9 @€ @€ @€íˆ$Êõ=޼·>fšUõÖ\–ÿÉwßÈeõõò¾âCC—þ zƒk>©«6©2’áþ}ë¦ _ªé©-ÕŸܵrõš÷Eú¦¦â¹Më§X™†÷|tŠM'l–SñƒHJlþIiÒÉÕÍ;TK€ @€ @€ 0W$ƒÎ•;a @€ @€ @`–Î[½úôHX|rÓáãÁ¡M—~¶iL*‡7\Ý4þLúIŠ/¨m”‹ µ1m ˆÅ )M“AcÞÏ<«¿Áuƒƒ÷µmè8s˖˿ٶþì¨6)n?ó`x{¾5’KûV­Ý–GLØcŠÚ™øäâW\qÅ3Ñõ¨>«ê#EYNœ Á]e>'~´– :ªóƒ/®¾zýÝ—¶¿$§‘]EÑüW’qãÊÉŒÜ){ÖdÖ<—cí½Ü8Ézë¦õß8 ¤­_Ëbxgíû”‹…mTg @€ @€ @€sF`R¿X3³6 @€ @€ @ meÑSŸDYä/´mÀèhÙ²eÝq$ås꺮ª¢í‰ŠÍƼí¦ï¹ˆÄÌf1EJ‹[,ú©¦1“­LÅuѤí§>FÒ×-µSÉùëµ1S Hù®©4›n›8µòSÓí£•öy¸…w,§µÒלŠÉ] Z˜OW 1ûC:aÏÚ¿˜yþÅÞ;úÆÉŸ’¶ï½ûF©ŠH­ýLê}ªíM @€ @€ @€Àœ :gn…‰ @€ @€ @€Y¨Ò#ëFÎ)±.f6ëYrêã\È£kæ°g÷]?úRML[«'Zæ"5’ƒš~ʲxVÓ€IV朮Ÿd“–Âsn!É)3uŠfó¤Ú–V0ù T×N¾Õä[ m¸!NU½¹iËT¿¬¿ÿ¨¦1s réÒ¥=½ýkž¿rõš¿,SúÛ¶O©ö¬¶›ÌR‡öÞÑðñ÷Ð.iïU׈dÐöŠê @€ @€ 0¿ºç×tÍ– @€ @€h»@*V×g$Îé“A{ºê×Pù?¶oß¾»n­3PÿÙèóg›õÇÈØ¬~òuù¦É·iO‹\”w¶§§1½DFrG“ÊOŽÏ®ºDèÆ{–Ц§[.~xÌÊ)©Ø—+ú/>£'O)Šò™9¥gEBÜÒˆ]؈Ÿ‘[Ö{Ö–ó®ØÞ;ö–å™Ùÿ¦ªî¾««xÈØAG]Ç;8#¯Ý¨A\ @€ @€ @€ÌŠ€dÐYa7( @€ @€˜K©‘\VóÉ߬ ˜Õê\Kê²_"¯ï+³3É|]]J\™Š%íœ[*f/´¨òL ÚN¢VûúÖöí—îl5xúqõ'º–y¸‘8üßÓkr=œsÎ't-ì9£HéÑ]©:#2sÏõYñ¬=!Êíë­î=Ü7½ŸóÏšÞúçNk{ïè{ïÅŒî»wY-Þ›f=z\W @€ @€ @€‡‡€dÐÃã>[% @€ @€˜P ’·jO®f6ÁeÂɵZ‘#™²6 ­¼­ÕîÚ—ru{‘ºšv™s:¾iÀ$+ã$×»'Ù¤áÃ÷µ±³Yí*©CzÂjÊyg$V6]sNe ÉÛM»˜°rYÿQÇ‹žïÒcS™“rù˜"åÇÄóù˜˜ÖÑ6,xÝšÏ÷Á~P¦2jIDAT6íû#Îÿ=«}³Û“½w”œ_<£É £sA€ @€ @€ pØ H=ìn¹ @€ @€ @€Ñ¹ÈO¤v®½ÿª{xŽ'ƒ¶r²fÊwŽ·¶™/+kÇÓÛ{2hJ»g~]‡Åµ÷® qÊâκÔʲh_2èÒ¥K{N;ãIË‹2ŸÉž?'}>!ÖSÞ¿¦˜ÉÞÉÄîP7©v"´ÐWGìY-¬s^„Ø{Gݦ\Hâ‚ @€ @€ @ ­’AÛÊ©3 @€ @€ 0Rí){ßýîusú´³8 sI]ÂZ.Ò!MìÛ÷$ìÉwô”ͳérjïÉ UÎ’A÷Ý€iü<Ô‰]{Olþ¨9åiŸ Ú»jÍ"ÅóW"ÙsU ‰È5ƒNÃpfšÎÿ=kf\}¯öÞÑæñ÷ÌÈèW @€ @€ @€Ú' ´}–z"@€ @€ @€óN`ùòåGDå±5¿÷Úk¯ÝS3«Õ‘kY{²f*ªYIhíê®?U5å|t[S9ÜÖþÛÎmq$zÞUwJo‘‹‡Nõv,ïïè¢rÑDû_J)ÍËßvÊž5Õ{8×ÚÙ{çÚ1 @€ @€ @€N˜—¿äíäbm @€ @€ @àP TGudíx9Î~›ûŸ®¦8+'¶•é§è©Ý®Ú‡\ ùÞ—\ SÙ|™qzè›GŒ_{Þùk/è*ó»âÐcÆhciÎ#9ÄÕËã¤Äû"að’võÞA{V»Hf»{ïlßã @€ @€ @€65¿N>l,” @€ @€–»ô£úÓ2SZ´lÙ²9ýLFºêíu70åôº˜™¨î©7÷ÌÄØúœž@œÒyÔôz˜\ëT”µ‰šqzè“ëµ(úÎ_û¿Ë”/±DÐHþŒÄϯDÞøßV9ŸwgµkÉÐÆu/Ú´îË\Õ¾›“YO§ìY“Yó\޵÷Îå»cn @€ @€ @€&0§ÿÇNö @€ @€ 0×vìØ1Ü·jíS*Žn6·#<©QßÖ¤®fãMº.åÛ"Ñ­¦Y®Oʬéa*ÕåHqLQsv^ÎI2èTpg¸Mw§”¿-¿§~~¹ªª¯ì)ʯ~ppÝ&×[^0¹øúèNسêW9?"ì½óã>™% @€ @€ @€@gHíŒûh @€ @€ @`ÊqÒàíqÊß©Í:XP5OmÖöPÔE¢çíÍÏSŒóSqì¡˜ËØ1"YêøºDÕH¶ûÑØv®ç€@:Ä'ƒùøº<»öŒ´t2èËúNŽgþï&£  ŸOEue$Á~p¨Úý¥bppd2íNjͩ<®îÌÞñÚ5+ë„=«ÙúæS½w>Ý-s%@€ @€ @€˜ï’Açû4 @€ @€ 0M‰”u]TÝùQs]]ÜìÕçÛê’èrNOœùEbÝO„qó¡SþVóµ³$pÊ!7§'Õ=*w–Ã7´2§]ÅÛã8®•ØHFþ×ájøµW^ú•–â'”#Á55þSJu¹Ü£FìŒ=kÔ’æñ…½wß ’yÎ9œp(F^¾|ù‘8ü„š±î½fpðΚ˜¢q*hÊÅŠº¸F}®ŠßÛ²é’çÌH"hô†qÚióOœH:¹ßYvÀžÕ\dþÔÚ{çϽ2S @€ @€ @€ù/pÈÿ‡‡ùOf @€ @€ @ Ãrñ™Èغ éªRzrÓú6Vö­¸4N|FÊùÛ¹(¾Sñí"ço ßîªÒ··n]wëØán»éû_^râ©wGêÙ‘cëö_§´è˜%§6Nýòþ²þròiªM¤Í•“AgøVL¹û‹zž¯™r-6Œ‡ó¬8!³ùïîrñõVº‹SA_ïBW]lÎùo‡6¯û㺸éÔ甎k~.hÌ4Å[>™OìY“Yî\޵÷Îå»cn @€ @€ @€&0¹e·ÓVo= @€ @€ @€@1RŸªcÈÅ¡KÔ°Ÿ‰Ì°3#CììHŽ{uY¤?/S¹¹§§ülyDº¥oõÚ‹þþQ‰n;vìŽÓ5?]·ŽîžüŒº˜vÖûГžý-¬ëswqïWëbÔÏŽ@U¦F2è!ø”µ'ÈFòæ­L$Ný…Ú¸\ܸkç^_7Í€8‘÷ôº.rN“úe'ìYu&ó¥ÞÞ;_î”y @€ @€ @€t‚À¤~±Ú ¶ @€ @€ @`´À÷¾uÝâäÍ]£KÇ\åü¸sW¯>eLiÛ/{{–¤TœÞ¼ãüõbppdlLUŸ[6ö: <lÙL^—eOóWcð8ýôë¼q&ç¡ï© ”9ÿüÔ[·Þ2 û뢇GŠ­u1+V¬8:©U—sñ7Û·oß]7úgõ÷/й<§®Éž Ú){VË|©·÷Η;ež @€ @€ @€ó]@2è|¿ƒæO€ @€ @€¦)píµ×î‰Ó8?׬›8¡3uç5‹iG]îI/¨í§š é³ùD]ÛH¼üùsú×Ô&ÊÕõÓJýÒ¥K{ŠT¬­Íùcµ1fO ¥öõ]ôÈ™œÀËúNޤÉ6#žÝomÛ²îËÍbu==ÇZ³·> _ÓRÜ4‚NJ ŸÍkOÆÄÔžÉ Ó1{Öd=—cí½sùî˜ @€ @€ @€@ Hí ›i) @€ @€ @`ª9Uµ‰”‘0ºfªý·Ú®,smÂi•Šqç:²{ç§ã„ÓûšÕHjíIéUÍbÚUwÚg½,N{|xmUñϵ1fS ,t ÌäŽ(‹Æ»Õü÷v¹þTÐÆGºsKÉ »ïÊÿ5“kÚÛwY¾¤Å1´·?¬ö¬ý‹yðËA'?XßrÍ32*øÐ]Ø{µ‘ @€ @€ @€oæ¿T>¼m¬ž @€ @€6Ã#ù’Ÿ¦ NÅ{ûžÝ4f•½½KR.^Ü´‹œwíÞ9òÉñb¶mÛöã8‰óòñêF••Å«{{×rù®v®£ÑçŠþ‹—RùÁøZ³–Ftú‡­›Ö}³ñÍgî DrcwYvoZ±bõIí˜mïª5Oˆ“v£ß8râO$ é¶›¿÷¶‰#F×ÜwWõÝ(©OMEïLœòïÔÏGrçžUó«8ùô˜æã×vÊžuÀêî8àû¸_{v·5!yÜA¦Xhï"œf @€ @€ @€Z Ú"”0 @€ @€ p8ä‘âÍuëŒÔµ#ʲ¼²wÕÀ¹u±uõË–-[¸¤\´9#ŸV[TùMµ1ÐH‹ºÕÅÆ:—]]Û")¯¿.¶•úÆ©‘Ý]忤"=².>æ÷µ=÷Üöuqêç–@äm>¶gÑ‚÷õ]T{›ÍüÜþ‹žIÃ+Rqb³¸¨«F†ó«wìØ1\·¿úª«.»½ÈùÓû &øÏÿÓO?ó‰1AõTŠSßêß‹wêÃ=bR¤âèIÅÜ {Ö˹í€ïã~Íeùë±ov[9Ë…öÞY¾†'@€ @€ @€èxÉ ‹- @€ @€´.04¸îýq:è{j[¤´ Ž3|$ýÎYýý jãÇ Xyþš—âi×)-§zLQ¾|ËæõS8áå½Õ® ã´Á›& x°¢§LåÆHl}ëŠ+¦–ÖßßµrÕÀ¯tuu<Aö`×ã˹؋ê‚|à÷Œ¡tN ¤ô˜bA×'Î[}Ñ ¦2Ïxg.ìêꊤáâáuí#iø]WnYÿ™º¸±õU*¶-ÿºü¾þÞñëZ/ííXÒ·zí‡âùÿÃh5…ß?懴>ÚèÈNÙ³ö®*7^ÝÁWñܼxɉ§þÝLœêzðh“/±÷NÞL  @€ @€ @€­ ÌÉ5¶ÕÉ‹#@€ @€ @€Ú/pßÝ·ÿjÏ‘ÇÿL$=¾YïqJbã÷ ò¸rÑÚÇŸ¿æõ[ò½*GšµiÔ»zõ)ÝyÁ_Gh_ŒQûÉE¾u×È®_« < àCƒƒ7öö¯¹0u•âºä´²Lé7Óâc/è[½æw¼úêõwÐÝø_# ô¼rñ ºŠâ/â„ǧŽtpiÎÕÿÚºy×®Q2_"éñ´®¢ûš¾UkßWìþ½¡¡K¿_7÷•+מ•»ó[¢íKêbõñÜß°³Úõ;­ÄŽ)._ÐU¼9ÊŽ­;èº,6Ç:Þ3¼k÷lÛ¶ñ‡Õ7)ˆSuŸ§ú¾*N]ïò’&¡M«R.VÄûÔÊþ1^G°g5Ö{æ;Jí'½òôÇ<éœÓó¤ŠÐãÏHJù˜H4?9îÇÈЦKVÔv1CöÞ‚Õ- @€ @€ @€ ê1 @€ @€ @€F 4N¬\±r`UOOjœHX›LI¡ÄÎm}Å¢[ÒêmUÎ-ªôý‘¢º±Ø=òã´¸ë±e•Ί¸'FÒäYENÏŒŸ‹G :ÑEÎ#)W¯Ú>8ø£‰B&*ß:¸þŸãÆ7?pZáDaûË#î‘HõÞ£ËïˆvŽÔ¬«ª¢úN)n¨î­nîZ¼à!9œTætr$>?Öq^¬ã¡û;háKœVzÕÖÍëÿ¦…P!ó@ ’ _‘{ºÖö­ø×8Åssdp~m¤È?ØSì¾ma¹ðáE•Ïý“S*/ˆgå©ñŒµ¶ªûŸû×\38xgk FG}`pÝw#±ù¯RQþïÑ5_=Ôý?z-X'ä¾­¨Šm»‹]ßn$õ~éK/<®ka÷)]Ýii¬äcý?;6fìu¸ÄcŸ¾±§Œ­Ûç‹Nþ@Q|wÙ$¾tÊž¦_kyÙ±÷Düêãã銂ànœzíµ×îy°îÐ~³÷Zo£ @€ @€ @€>’AŸ{m¥ @€ @€ @ em[Ö}¹¯ NÖL—E£Ú„ÐFÇ‘ŠtBüxe$J¾²ˆÃíÊÆ?pÊÝgsFÂR+ŸFY¤‘ýbœ¢yE+ñãÅ m\÷'‘¨wb$¼½f¼úqËRZSìõv5m,áÈ}‘q±oþû~ùqÞýã=kjÂTÏ9|{Üôã&šV<['á9ñã9˜Æ£Þ³/×ùÇ¿QÞú'ß]E’_<÷l½ÍÁ‘·Üû§Çw-nœØyÒÁµã”ÄssÿíxÞ{QÌåª]±îoÇ ¥7Å»ýðH3<5² §å„EÑö–¢ª^Q”å#£¯wM])’ħ˜ Úè·ö¬¡M¾‰è·<°—6–5éO<‡Ý<ãñgF2èW'ݸ ì½mÄÔ @€ @€ @€ü_/  @€ @€ @€ö ®ÛZäçïMèÚ_zè¾DBÔk·n^wÉ4G¬†6­{md•¾.ŽÌ™f_Skã†á‡6^ò’«®º, }æ•@NoŒgçêC1çxNnȹzÎÖM릕Ú˜ëŽÁÁ»Š¯ñsÒ‰ 9ÿË}#ÅS‡7\U#_ª›C™ŠÔÅÔÕwÀžyÀ¹iÒlA£¾«èzb+q3cïa`Ý @€ @€ @€~’A¿{nÅ @€ @€ @ e­ƒë>5[è'ù⇪]Ïÿààº4âﺽüJüÊfŸ<ídÐFïó}ÏÚuç-ÏÚ´NõLež É {o¶½·Ù3¯Ž @€ @€ @€Àä$ƒNÎK4 @€ @€8ì¶ nø¯ê¾â™qÂàßÇ ›Ã3 c|r$í~êÖMë7¶{œ­ƒ>IjψÄÖ+ÛÝ÷xý…Õ5yw~êÖÁõ¯^ÙüøÔàà®V÷Ä)›ùñ§íI½ÑáU÷ì~ö•7~¯Ý*[6¯»&Nø}nÌú;íî{l±ŽÇñÌ­›.yS18¸ÿ$Þ«¯^w$8~rlü×qé£Ï]}ñÏX6ÕïóyÏÚ¾}ûî‘jÏEayÓTן‹4g’Ak°÷NõNjG€ @€ @€ @`´€dÐÑ® @€ @€ @€q¶n]wëЦK^=\UOŠÓ.·Ž2½¢œ¿'wþÚPuÏs¯Ø¸ñ;ÓëlâÖ[7­ûæ–Më΋„µgENßÇ&ŽœbMÎ÷E¿›ª‘êq"ÞÙ[·®¿yŠ=i6Ç ¡qO_S¥êù‘¨÷­¶L/N-ªê¥C/yÙöí—ÎØ©µÓ2wí~JN»!žÏ˜~›?9_Ýx§bˇ6]úÙñzcAß;^ùe]¹ë·¼žÎ÷ù¼g]1xÙ‡ïÙýôFrüT ÒKm¬ÁÞ;•;©  @€ @€ @€ÑÝ£/] @€ @€ @€&ˆ÷¾µ}}«.zfQtýb*ŠåEJ'OÜ¢yMä¥}©Hù­·Ýô½ËwìØ1£§Ž8“HXût\?åù/,ÊââÈŽ;;¨q`̤¾çüXË?ìÙ5|É>pù-“j+x^ \±qÃŽ³Ï^óä£K¯)ŠôŠ"“;…1çÆ‰™WçªxÏÐມøy’3ÿy ÙtÍŠþ‹ÿ¸»,%Þݘ{:jÊ#Gw¼7W¤ê½WnÚðouýÜ^Ý3¸¤kÑÛÃìȉbS*V¬\yá™[¶\öŸÅL¶|¾îYÛ¶müáÒ¥KŸwÚ™gõÇ©ªźώÓS[úÝnÜ—S"¾ñ’gk2÷ÄÞ;-± @€ @€ @€F Äïy} @€ @€ @€SX±ràÉ=ÝÅK¢‡çEbÜÉEN‹ŸK⺑Œôà'wÆÉ„7æ"}*’•>V¥Ý»rãÆï=0»ßzW]ü”"•/N¹x^$†ž”Sñ°”ó ‘0×õàÌòÎXßõ±Žë£üú"çë‡Sõé+7nø×c|›/½ç¯ùå²,ßÙl¾U.^¹uÓ%ï(&£Ÿ9zkã™øÉHÚ{dÄ8ꙉäÏœÒF]$ çO{ªmCCn˜¨¿CU¾bÅŠ£ËEÇ>»ÌųŠ2Erw~J<ûKFÍ=&IÎÃñ>ÿ(~©ø£xo¿ë¸&N¾ú$ËC5ݶŽ3÷¬åýý]T,zn*‹sÊ/rù°@9&¥ê†8A´‘˜ûÝ\•×ï.îùîöÁÁFBzͽw~Ü'³$@€ @€ @€˜}É ³Ì€ @€ @€t¢@ÙÛ»æ„bA:vdמ;wî¼áÖCyògASoïÀñe9|ü½÷·\uÕe··±o]Ͳ@;’AÇ.aÙ²eÝ9ñÄGtï){FFî¹íŠ+®ˆ$èù“˜×HM‹[æ‹î©vÝò‘ÁÁÆ3?o ÇÞI\wÊž5‰%ÏéP{=&G€ @€ @€ 0’AgCݘ @€ @€ @€³.0É ³¾( @€ @€ @€ @€Ž(;rUE€ @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLÉ y_­Š @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLÉ y_­Š @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLÉ y_­Š @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLÉ y_­Š @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLÉ y_­Š @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLÉ y_­Š @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLÉ y_­Š @€ @€ @€ @€ @€ @€èÉ r#-ƒ @€ @€ @€ @€ @€ @€èLîÎ\–U @€ @€ @€¨HùÇ9ßiUÝÕ¼^- @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ÀÜø|µ9+OdûEIEND®B`‚rocksdb-6.11.4/docs/static/images/compaction/l0-l1-contend.png000066400000000000000000006160641370372246700240650ustar00rootroot00000000000000‰PNG  IHDR8";mßsRGB®Îé pHYsgŸÒR@IDATxìÝ |\W}/ð{ïHŽåì !!…B€f§€ p‚Kbl%¶e ”­…–×òè¥-ô•–¥@-Á²Ø’0.[ ` P l…„-aÉž8¶5sߜؑeÍÜ‘<²µ|çó1š9çÏ=ç{ï\óù(?Ÿ$ñ"@€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ÌRt–®Û²  @€ @€ @`†t-ë}I,å1–“§Éõ›Ö÷½­Q>Ì®eËŸ$¥6ZO5­öo^¿v[£}&*àœ¨œã @€ @€ @€³O mö-ÙŠ  @€ @€ @`f ¤ÏKÓ¤»Ñšâ_ûÛýŽô 0cò$=7KÓW6ZP–g×Fÿ¶F5úLTÀ=8Q9Ç @€ @€ @€fŸ@6û–lÅ @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,pœ…Ý’  @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,pœ…Ý’  @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,pœ…Ý’  @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,h›…k¶d @€ @€ @€3V ¯V¿—Ò¿k´À¼š~µQ¿> @€ @€ @€ ÇC¡ì @€ @€ @€‘ÀæþË¿§ªýñ"@€ @€ @€ @€À”ȦôìLŽ @€ @€ @€ @€ @€ @€˜‘Ž3ò²Z @€ @€ @€ @€ @€ @€˜ÚŽSûú˜ @€ @€ @€ @€ @€ @€˜‘Ž3ò²Z @€ @€ @€ @€ @€ @€˜ÚŽSûú˜ @€ @€ @€ @€ @€ @€˜‘Ž3ò²Z @€ @€ @€ @€ @€ @€˜ÚŽSûú˜ @€ @€Ìt†-uºÿb*]©4—ɺM§ûý2Òe&­eäºÕû™r¿»Õ3ýÎ3Sîñé'oÆ @€ @€ @€À”h›’³2) @€ @€ 0,X0÷¸ûŸ±8Mó'FºáIš<0Í“æIzjš&Yžä7Fû IÒôº¤ZýÄ/ò»?uMÿŽ©È± \>긬ãi¥$=3Éó3ò4=#Ió3b®ñ99%Örw´Ýœ&ùÍy’ü&~~­R;”g;¯¹rýúŸN¥5-Y²ä¸lî1¯Œy?5æ}nÌ÷̸>µkqm̳öç»éðð'.ÿÁ$Í;]ܽr~{[raždçÆ¹ï^''yzÿ$ÉOŠ{d8Îû«¸wnŒ÷7ÞsT¯Ivå\ûËIšSˆíî¾ô¬¼½ôÌXÏcã^¿_ü<1î•b'ÆINˆûý¨¸‡väirKš§µûåæ0¸%ÚbÍé—vW*ÿ1Ô¿ö{-›ÐA´hÑ%÷›ÓÑö„jš=1KòÇÇ\˜çéñqÍŽ¯­%Ï“»â¿ëõ›xÿÛ$IßïŸÆº>s×Íùg¯ºjÍ9…i}ø zf]Ë–?!ϳ'dYúø¸ÖŒ{áÄx®wÏ=Üêçq_ÿ<îõŸÇwù+;ª;ú·ö÷ÿzZ_@“oF`F?Ó›PC€ @€ @€hV ~GïE€ @€ @€¦¯@ײUŽÀTwÁ ¶¬_ý¸‚šºÝK–®º¤”æoªW¡¥w nè{wþ¬kiï3"ô²òžy¦ÇÔ©³ùÞ ÔUÕ<Øœï¸"é﯌Yx#ˆ÷¸ööô¥عdOˆgçÎóü»ñKŠ­ì¼mõæÍ›o™ÀÒ¹t嫳4}Õ÷6T“ü ›6¬Y?²ÿ9ÏYyä‘Çg‘¥yWxmâ2$›’dømƒÖ}eä8y!¯¶ãO>mqš¤ÏÇ ÃãäñŽŽ‘!M¿á¹ÄÛ ±¾ïŽwŒÉ¨¿°\>刴ã¸ç„ë‚øyÚÁž'¾gÎÿ£šW‡n¾ñg¶mÛV }²×E]züÜ£Ú{b½±ž'ĉwÇwçKIRýÔîátóÐ@ß7b¬1í*¯Zœfù?ŽÙyoc5IÿvÓ†Õ}jF÷yÞ#²¸¼â¡m¥tUÜÛ+ãûû ÑN>Ç÷t8¾ïŸŠëùM7ü´?îã»G×ׯoϲŒnßû9BÎ7nXý„½Ÿ§âÏÎe+–giim£¹…Å«üÝÙèÐ1û–”/}t©Ô¶6¾_GŒY0²1Mî®TV\Ù¿î[#›æýT¦/îî}L„è7­q÷p~ÉÐÀš¯Õµ²¿«gÕGÒ<h½1ãž¿í¦¯{â¡~î×›v @€ @€ @ µvpl­§Ñ @€ @€ @` ¤IõØ$Íή¿´¼¶ ݯ‹»W>±Ô–öEíÜ:›lˆ Õ¼(]RJÓ%ÝIÇŸ'KWþéÀÆ5Ÿhòð–•-\¸ü˜yÇ´-]÷þ B=Ù3ðAü3Šar^ŒñÏÙǾµkY;oû«ƒ:¦Yì ˜Ô½N±«Ú±#A/î9µ}^Z ÅzšZLä'kaڶޫ+Iõ W®_ûÅ‘c6û¾sÙÊ®FýÝÁܵsÅñµ‰Ï±æÇÛ7Dà÷²á;ÿzhhý/šK+뺺–?(i/½.fö1³âÑ8N~O4]¡©¥'œrÆ›ºzV¾õº|§oûöí»Ç1̸K–Ë'udçÿý¸Mæ4u§4>K{Œó´$ÉžÖÖ–ÿM÷²ÞMÉpú׫¿Ýø°æ{ó¬rtš–ê~j#E˜h¿ïC3£Ïögaí™ÑÖqÄ[âÞî ¯¬³Ñ5ñ=­ý~öyqøóâ>~C×ÒU/ܸú³#ë"È;'¾Ü ®_»‚zˆðÞ£²RÁÑÚî°#{ÆzŸß™W“E­ 7N‡gz„©¿Ï›Øt9}ðX*{ÛÚÚöÜ߇,àxqyùÃã’]óÚ;…æÉ¿ 7È¢… @€ @€Ì ýâm¦,Þ: @€ @€ @€ÀdÔvqŠ]ßáÆÏG˜eÂáÆç–žŸdÙÇ# ð±ZÐæÀþÉi‰0ߥóŽ)ý³ûÄ>M³gzl@›¨p¥i²¬öÝ-¬kQA[Zª†¾*ieuà @€ @€ @€À´hÉ/ߦµ€É @€ @€ @€ ,*¯|ð‰§œþù4Kÿ&²K“HÓ…ÎûLW׊´pê µ'œ´lÕ?Ehd]„°&5,AŸS#·9Brï‰Löï/Òl®ëóˆÝdCž';wÝYÙÐdùž²Zè°-)}!,Ÿ2žãÆ[¬yqÿ½á„¬ã?»º.=c¼Ç·¾{éÊ Kù׆ç+';Ø8znqož‘•Ò«»Ê½/ÝwŸÓþÓžPoºÿΟ9n½ÃkáÔåY2ç˱›Ù#ëM§ö™ô,¬}wk÷Bí»Õêkß›xü%/ï8¶íëµ p«ÇŸéãíÙý/MkáÆû­5žÛ·T*Éï n\÷…¢Úfú§ã3=ß]YÇ«ÑúÂòþ'Þÿ´ç4ªia_–gÉòFãÅt»%®Q> @€ @€ @`z Lö 0½uÌž @€ @€ŒC ³³÷Äö,ýdì øÄq6¡ÒÈÄœ›Ì)]}a¹|Ê„(8¨³såýO8åôOEôæO J[ÚëzYËú’r¹ÔÒG !Ê?ŠáÂMã~‰¤¡~ôò››9pþüùí]ËV®®…›©oUMXž—¶·©kيǶjÌÑãtõ¬\™§é•“ü}®ŸçF$vcç²U¿ß ¦ù®¸÷â¼üPßûµ Ö‚EmYÛ¶ÅÝ+×ü„§^å zfµÝÅw7ž)gµås®~~¹÷SïŠNÍuw_ò°R©íêÚ÷¦h†y’ÿvx8¿`sßÕõOçgúààå×…×ÕEkL²tEaM –ô,fÌçA ‡J“Õ ûu @€ @€ @€Ó^@ÀqÚ_B @€ @€ @€© p~¹<'›“nŠPÙCÕ|"ó°¹¥Žu­>_ײåOH·Gèà­»™ñ"X¶¼»ÔqyÔÆ[ûêî¾ôœôm;j^­4¸xv¹|ìçœÿñ4Ízöœ:>MNI“ì?j»,Nèøuö¬zM’§«ãžŸœJœ{tWÌ!n›ä‹÷œ:ºo¼Ÿ»JƒõŒ÷¸–Õ§É ímÙ§:—­<¯ecÂfʳ°¶Žîe«ÖÝË_šž='K®N³¶IÝ÷­gOÔ¹¬÷ܼ½=ÂÉÉE§‰pã¯*•Ê3‡ú¾^T[Ô?žéÕ¤rYÑ:ó<½xA¹<©;7׿Û76ü»qÏn“»†ûŠæ«Ÿ @€ @€˜Þ‡ýÎÓ›Ïì  @€ @€ @€À=Íæ~ âxOk£ÿÁþw"”ñ‹ÒĹ K–ôôœ™&¥ÏD¶ðÈÂâ ¹.É“¡X×÷cE7äIrCìªw\šçOÒì¼$Ís­íJ×ä?¾˜.??¸¡ïÝ#NsÐoó¶¶㎃(ÖvãM¿þÙ'ŠÆ¨¤ŽÉæ}2L_T;²?ÆÿEšäÏód{RM~U)UoŒ1JIµô€,©ž÷Ë£‚qq„ðŽy\Ý÷izTŒyeg¹÷é›úû®©[7ŽŽ®òŠ‹bNïˆ0›>*î‘_Æ}þå$îÿ˜ÏÒ<¹-Oªwdi6\MócbMÇfI~rž&ócÐÇÇýrFÓƒGaLåè¶ysÞoËã9ndí’òªù1¿¿ãæ^y~}¬å?b]ßÍÓê³{×T;¸š´7û™q;+†\Ííµ¾ÂWš›æY-ˆ»¸°vŠÌgaúÐÒ¼+ƒöyãáïìOã9Çå?Šûâçµû!ð§¤YòÀx>>*îÑç=[ãùtn|"àÝìM8žÎŒÚÅåg‡ÓgB¨0×áéîÝϺràŠkvõ3å™~Cuçà©¥ŽÛâ‹çîØ¯¸WçŸÍëŠÞI >ç9+ŒëØ=ö öµn«í:¹ï“7 @€ @€ @€ÀŒpœ‘—Õ¢ @€ @€ @à äé‹â?Ò?­Þ9÷ì@”&›ÓjuÝùÎÏ|¢¿ÿ¦±j.\~ÌÜ£³fiúÊD=t¬š±Ú"äñö¤\Þšô÷WÆêO[–ñ¾¨o6ÜX Ï¿W*ÃïÝÜù7êœg_ð²sي߉ ßÙìα®·Å.vŸÚ´aÍwëŒ=®æïùázÐ;Æõ\·m۶ᢓŸ›u¼=B"M‡#µyx¸ú–¡5_-{þüùí§}þ³KIú¿#µ ¨>î϶¤”_qÑE—>棽üæ¢úFýµOR*ýS£š½}{îý$_9Þÿ7¸qÝ£=òFͽº»WŸ·å/‹û¥7Öxl3GEí w÷>*vjûf3õ£jÒR)_¡ŸÂߟEXs(É+oܰî+£Æùñ #?tu]zFÒÞþ—^}a܇sFöõ>îEJ}r«B©c£åm3äYعlÕ ãyÑ|¸1ϯÌóÊ?Ýã‹-š×~äñ‹âû›ÿaõüã>>±^_­=Íã¯Ø…÷¬¼”ÕÂ…»µÖ§ÃÕÊCWü°\3å™~MÿŽ®Ø46r‰þÑ?iÇ£NH"@ÙøSˆâCæ¨ @€ @€˜…¿ Ë´  @€ @€ 0y¨nLòM•jåõWö¯ûNÑ ¶n];*%ïŠ?ïîêY¹*ɳ÷ÇØGáµó:³Ž…›’d_˜°è˜±ú#€¸¼éPOž1vß{Õà†µ_k¬±Ú6mXû_Ѿ «Üû‚$KßÑÈmÏñ±Óby>ïŸ<ÖxãnKÓØ=­Þ+¿9vž|_žæÛ«Õ굕¤tk{’ŸÁºs³,{lìê·"BiGÕŽ® '«ë²·½«¼ª¶ÃâïýÜðgž|+O†_\–ÛoˆíÛ·ïŽ?[£qk„±z³$ù¿1דö+õ!BSgqtÛ¿Gs„J&þ:·Ôñš¸OÎ)!Ïû¾`<÷ÈÈ1V;>¿úÂrù­sKóþ-ÎyÑÈþzïÛJÉ ¢ï›õúëµw–W>3Â6ç×믵×›¸yqì˜zY£º±úîÝ…ìe‹Ê½o™“Å=ÔL0µ”ü]Œµ`¬ñ¦b[£ït„B§Å³ðÙåò±išÿ}Ü …ı¦ŸÅ ñGq?Ôv{,|mÙ²å®(Ú!áMËæ¾6v·}}|ž[x ‚=µ†“¼-Âéƒ IâùSMw]0Ô¿þ'…µM̼gúp<ÃÚóü‚®®\ûË&ˆÆ_’ï ¯×?.Ïï¸óæ| ~ @€ @€ @`¦Ä﻽ @€ @€ @€V Dè守BýÁàú¾®f£Ο®_ó¡JZ¹ 4¿Õ7æÇˆâ\0°~õ+n­îxPlWùÚ˜÷PÑî€÷ôœ–fIs¸<ümõ®§Œ'Ü8ZbÓ†Õ}ùîÊïÄZ wºŒ`PgçÒ•8zŒf?/^Üsj–'µPVÃWÌåû»wìú݉Þ##ÿXÿ ƒëWשïÙ^÷}º'àX·»^Gš¥/®×·¯=Ïÿ<¼›»¶ûÚÿÍ–þ¾ë+;oí¬íßs৸^ψëŠÃ¤:eZ¦Û³ðè¬ãÿ„ûý‹óoß]ÙñøfÃ#Çûvÿ® kÞR­ä„Ï-#û¼[ ‚Á§gùŸ‰ ùécWÜך'Éâ™øôÍë[nœ‰Ïôø;çKátí}jc¼KÓRÚž]2FÏA7=¿ÜûÀøÇ.h4P̯ÿª«ÖÜÙ¨F @€ @€ 03gÆu´  @€ @€˜ZwW†« nèû·ƒ™Ö•ë×~1©Vº"ÿãWrj°â-Çê “ÎÉßQ´`íИÊ{Ö÷õnÛ¶m¸ÎPM5G(èûÕ$_ÑÔÚÒäµM :þ¢j¬yÕÀƾ?ß¼ysÃѧúûoÝ´~õÿÀja´”ñ®÷„¢éÄUýú^°­¿ÿŽ¢Ú¢þÚ[Ã;vG4¯ízØð•eé[.\~Lâ:íí/Ù»“e’ÚM²«Z^64´þuk&Ðq׭ï‹/BᘵM#xÖxN±hÑ¢yq\Ã-ãzmܸæíã·^mí~K‡‡Ÿ!ß›êÕìmÏÛÚ†€öÖMÑŸÓêYعlå#â!ÚD8ÿú®;w/¨…oÆ}Sß5ÃÃrLòßÌ83ýØZÀ0v=Ý÷3‹Ö§|7ÙUyÆàຟÕ6Û?SŸéqß}¨Ð MVÖL  ½´g܆ÿ­B^ÍWO`h‡ @€ @€ @€ÓP á/ ¦ázL™ @€ @€vj^yÉ•k¾ÜЉ n\÷…gmáXirÊâî•ó ëÆ(è,÷þnšf½ctí×á²OFhóUÑX¸ÜïÀ:"äø‘¾±N÷¾æ8Ùó"xtÞ¾†½©V«/Àâå-nÏ0µÝö"„³¸hÌ–\}Ó×Õ‚T-±¬oh芫;ka©äÆÆçO™wLé%kÆîÍ“¬0àAÀ·mî¿üc0ñÖ­[×Ý–V+¯nf„<ËÆu¿´Í;®ˆœÛxì|¨qÿøz.ÿA5¯þŸ¢£"¼ü¬¢š©Ú?Ýž…aý®º¶5ôŒPjDx/ڲ劖„cGدW’jí{›Äz¨íôWÊçÔvn|ðè¾?çßÌwå jïû&Ö2“Ÿé•»v­‰@z¥±Lú˜‹ËËÞ¸fü½Yž6üÿñ÷ØÿÄ.ÊŸÿÈŽ @€ @€ @€¦£€€ãt¼jæL€ @€ @€SX _½iÃÚu­œàðŽ]ÞÌN‡¥¶ì‘9ošïСµÛwWò …âÍ)†o޵}¿QMŽ"w”†q@_žlÛ´qÍh?ȆØmïObˆ†¿‰àÆ/vÞ>ü‚ƒÝs¬©nÚ´æW|ùX}#Ûò4yuR.—F¶½_Tî==®ÄcÖåù;ïþ§†5Ñ9°qí@Ñý²gøRzÊxN“åÉéEõ±ÝçgŠjÆÛŸW‹¾Ûyš?3ÆMÇ;öᯟ^Ï®eËŸššuÃW\Wµ2@W;Ù=»õæÿÐðij°³«kÅæ”ÒÏÄuyHËß^Ý™\P{6QÛtÉL~¦×vÙ¿>Q„Q*e-ÝűkÙŠÇÆ­qh²š÷żbz^ @€ @€ @€Àlhø öÙ` @€ @€ @ •ÕÊ?¶r¼ÚXµBü¸¶hÜ,©žVT3F-<¸hŒöýš"]õº-ý}×ïרš‘µÛdh8Zìöô{ ÆÙY­VÞ:ÎC ËŸ[.Ÿ9´æÕêß|ô£—ß\T7ÑþnŽXȧýŒ%é¼îF5£ûædÅ;SƽôÁÉ\[mNq/6 Äî™w59qôü}Ž­ó ŽŽŸhßàຟł¾2úøZè1vùüYüïgãZn¹è¢K]3Õ?O»gaš†¨ã¢liõ®¯{¯ã]·ÿú‘çúöÞϳýç…åò)i{ö™ø¾ŸSl‘¹r÷­ÏÞ´©ï·ÅµÍWÌôgú‰jrY±HziÔÄ¥hÍ+vn¼{c<øvçÅÿ¿ 5³1  @€ @€ 0§ÂU0 @€ @€˜±+à•ýë¾5I‹ùBѸy’Þ¯¨ftÿâî•ó#µpêèöý?ç7ÿöÆëšAìT³ŸòÝ•µ{ò\ ˆmœ~÷ÉårGƒ’¦»â\ÿµ©í'›> ÉÂ#K/ç5*u\»)¿ûƒjZÑ—W“w“eù«ŠjFöÇÜŸ?òóXʆ±Ú[ÙaÄ—§Ù‘E5ûõ§ÙÎý>õ!Ož0VóÁ¶ÅýøOñçÝIžÿI5ÏUóêÃoºñúy¤;mpCß3âÏ‹&;4z°k8àøiø,LóìâÖ1ª!¯æoÕÔ²[·nÝYÍÓI¿e=uv®¼ÿܬãêL?´ðtyò¹ÝwÝò{›7o¾¥°vœ3ý™^ãø^²c(ÂÔ ƒ¡µ@|×ÒUO'ߘå ,hË’=É1û÷4¦Ég·ô¯ùqý= @€ @€ @€ÀLpœiWÔz @€ @€ @à° D8éC“uò/~±xìüØâšý+ÚÚ“âPOž¬ß¶mÛÝûÙºOƒƒ—_£m;`Ä<¿#vLûZ„¿.O“ê[Oîh<àø: iž´<ÜX;UE;ëœr_sìÞø¾¤¿¿²¯a’Þ ö¯þH„V~Ùhøpxò‚rù¨F5#û«;.ª$;\I†Ÿí/‹kóö8LJc×¹¯ÇŸÛb§Á®\¿öK#™”÷yþ?Eãfy>¾{¥Z-3ÍßP ç{¼ý›6¬Y!ÆWlè{ç¦ }‰ÏßÌïÛxç7‘úéö,\\^qv|Þh­{ÂÉý}ŸoTs°}7TïêïU˃z;¯CyüÂrù¤ìˆôê4MÏ+:o<®¾ýæê¡¡¡Û‹j'Ò?ÓŸé5“o÷÷ïŠg÷åE>i–¯(ªi¦ÿø“O^|×Nj\;yÿ_ªñyõ @€ @€ @€‡K å¿„=\ q^ @€ @€ pØÒü?'k®ùyáØi:¾PW ;3-.7©\V\sÕê?æYößfü^žU¯Ív%ß\W[s,½µ¯jš_ÓÚ“ä9ÏYyd“;86| ç鿆­ë¬ÆT>ÃÕ¥¤ié¸Òœ'FM­®øÁ̘üO¢°öç€cjÑ,Nò+M ȱøöñÌ¢ZÚý?YrDÃCj«N>}íüùóWnß¾}wÃâÙÞ9Íž…íYVøLóü“}Y¯éïßÑݳª/ÎóêÉ>×T¿³³÷Ä,KkÏ–ó‹æ1|ü—•]×\Õ¿£¨v"ý³â™~/ÌðprY{{R´£ï .\øªÚN£ñÜ{LìÞØ»÷ýØ?ó;oªÜÁy/ @€ @€ @`6 ØÁq6]mk%@€ @€ @€ÉˆÝcçµïMÖ ²dø¶¢±cG¾¹E5#û—ôôœÇGl;à}ìÊ7¸aÝWhoqÃ`ÿÚÖv±Û´qÍ¿l^¿îÓnüYœ¢åáÆÚ´ÓÝ­8ylòÄÀýÃ’Û·ô÷]ßbººÃÅîf„G—òì©£Û&úùª«ÖÜ9ÑcÇs\žV Mî×ïÀ®\¿>´± eÁ+®ñ²3Î9ÿê%åÞ'”ÎÞîiø,ŒMáN¶ÃÕÊÇÅE­Vªƒ‡â4Ð÷õøÇþ«±Uz|ÇQ']ظ¦qï’%KŽ‹ïZQ˜øÃÛúûïh<’^ @€ @€ @`¦ Œë—»3mñÖC€ @€ @€Z%ÿÑþ×b¬IÛ½®RM ÃWqþÒxÖ“%íÏ*®Ï¿^\3}*"ó“ÁÁµ¿lõŒÓRZŒÉW[}ÞFãÅZ›8_ñ¼ã°ôå¥9Mœw\ß…¯Z͓˚·¶ëéï–Jé—»–õ~tÉÒÞµ`V3ÇÍ–šéö,|v¹|lb ¾¿ùWö¯»öP\ÃÊÎ[¿ÏŠ`œ=¯‹.ºôø#³yŸŒ/×£‹VÁí_÷Ãÿî>Ø‹Î3Ûžéñ(|þ¥Yƒ‹@£¿4÷Ø¥±Ëqírój²º‰¡” @€ @€ @€3L@Àq†]PË!@€ @€ @€Ã#ÿÑþ'óÌ¥JëŽI5}PÑœó4ÿFQÍ´êO''d»gþn‘Cž¦“zŒ>ÿ Õ»T”Ê“ôÉqÜ”ÿ}ÑüùóÛ;Ë+/èîYùŽ,Mß3z­­øïŽqš)ÇnŽ–²tÍQYǯº–­ú|÷²•µ¤¼j~ŒY¡ÙûšnÏ£³#ÎLÒ´a 6Ò†µ wÓ÷ÆÁ\ý¡¡¡Ûc>“¶ðÁÌm2Ž­LçÝvUÜ7-?¿íŽ›ònß¾}wqíÁU̶gú®»v¯ ±†®ñŒ¼¨¶ ãDe#¶ÛÛèØ=ÿÁƾmjô @€ @€ @€3S`ÊÿÂzf²[ @€ @€Ì<üÖÉ\SµzçEãG€n|Áª4¹á˜ùž`OQÙ´éÝ÷nœŒÉæM„s²jå¯éïßA©†»UÆsôâÅ—œ4&1f¶¸¼âìî¥+º»—öþC-½ž…YžžRt-"xµ½¨¦•ýi29AìVα%cåé±Ç”:®ŠLðãš/=æèãÒ74W{pU³í™¾eË¿)zþÅsîˆtÎÑå‰Èvw_zÖžçdƒƒ#HÜݳj÷Òº @€ @€ @€À¬h›U«µX @€ @€ 0I±³Ñm“4ôžawî<²:on«Ïž\ØÕ³êý…u­,Èó#äØpÄRG[mî“ültâE‹.¹_inûY1¿‡”ÒêYñzhÎp×yÑÖ±÷ØÆ³ß[ÕšŸ»wìze{ÇœÇÅù8Ñã^¸_{Iü¼¤}ÞI÷²UߊÄÎUižüÚ|Çg¿Ýß¿k¢cO‡ã¦Û³°š¤§þ‹°iÒ0(Üêë’W“§…“jõYÃxiòúøž1®3§ÉŸ]ܽrÓ•k¾<®ãÆW<+Ÿé±EéeqÛu5¢ŠGaEô QÍX}y©me£gym·álxxõXÇj#@€ @€ @€f¾€€ãÌ¿ÆVH€ @€ @€‡@ Í“I 8NÆ"lP¸ƒãpurƒ›“±®FcæIvS£þ‰ô-Y²äØ8®‰8Rz|£€ÇDÎ]xLA¸±v|5Í/g‚ Ê壎O:¤É¹i–ŸæÙÙIšŸçéÙ1µ£ï6‹’½¯ûÞím9T?‡†Öÿbqwï¢ööüs¶<²%çM“GÆŠ¡É?}h>ïö‡-ëýdžæ[vTîþèÖþþ_·äShéö,ÌÒä…|ÕÉÝ•òÀó×Îwø¾ÎgrZâ0¾pcm±cj©-¹lÁ‚ݶmÛÝ“1³ÙúL¿ùÆë?~âÉgÜ·^£]MŸÖÕu郃—_7.û4YÙ¨>‚®Ÿ¸üÕè#@€ @€ @€f®€€ã̽¶VF€ @€ @€‡P vh›vÇ<ÉOŽPAC¥¶á™pL'aÇJûÑÇ—*NíÎR%iYÀqþüùígœõˆ…I–/‰ðÒ"t^¬þÞðgÜk{n·¸ëßv‡lh ïë]åU—F s}LtßN’­˜T¬;BiWtÍË:*±»ã¶øöç»’oÚÔ÷ÛVœãp1Ýž…±k]áŽivˆŸƒiG¯z±‹àyÇŸ|ú›¢ÿ׫9˜öÙúLÀèpç²Þ57m=¿°Oó¶öK£ÿïêÕŒnïZºü©qØCF·ü\Íóüì= @€ @€ 0»šø×„gˆÕ @€ @€ @€‰äIZ™Èq‡÷˜´pÇë¯ÿö´ n64­´~ÇR’ÐðœS½³”ôü;—­<¯kYᅵqÎù¿LKÉ•fùýòKŸ–¿‹ì_=TIÒGæyþ™I»|±]>ŸVïÏæ$¿èîéÝеlù“&í|‡hàéö,Œ¬m£ÝêîQ«Tià0B¯‡ô|‡èÖhéiâÁò¿&ëû2›ŸéÕjåCE*ÂßË‹jöëOÛz÷û<êCž'wUî¾¥T³ @€ @€ @€À,˜–¿TžE×ÇR  @€ @€ @€À¤,\¸ðˆØMÁïÞ¾}ûiÕ'•]­žp)Í[¶b«çÖÌxyuâ;8.,—OŠ`ã{cGÂoFPï•ñóÄfÎ9j6oXý£Á }DÈñ"pö›IsšÎ‰@èÒ$)}±kÙÊÕ–ËÅ¡»IÐì<îÙ“‹V[ÍJ‡ô9˜åÉEsšÉýñ}»¦p}{Âm—-X°`naí8 fó3ýÊþuß ÿÿlL–ž¿¸»÷1kîé­ýx϶F¯|`hhèöFú @€ @€ @€™- à8³¯¯Õ @€ @€ @€1ªGuä˜##Ù5ò£÷c „ÒÑc÷L“Ö,öœÀkÉÒU—Ì+uü0‚¯ˆ?m¢ùCò¼¡›«kaÃjž¬jþÀƒ¯Œã¿ÝtÃõ§U«ù › ^Ä)Ã1^YoG©ã{Ý=«^zC9´I>ß=²môûHb÷$åritûèÏY’öŽnÛïsž_¿yýº«÷kó @€ @€˜uþ£„YwÉ-˜ @€ @€$I¶†»–­º=‚ w&;òÈSký‡Óä=]=½·F°òòøä™~ê€c–ÚÀa|f_À1¯¾q`ãš7í½Ù¶ô÷]ß¹tå_ÄŽ¦ïÙÛVçgÖ–g|r¹ü˜kúûwÔ©Wól{¦ÆÙ¼yó-Ý=½›âyvÉè¾½Ÿã=µ39â‚(úäÞ¶Ñ?–Ë'Åß}{œÙ-@IDATϋں¯<Íû¢s’‚âuO«ƒ @€ @€˜bŽS삘 @€ @€8Ti’Gp1mpÌæµ 8\ÝÕôæ9óµøÆÜÁþ¾È‚LïW粕åXA„ÇÿŠ ãÎ4Í¿G~3<¾Y­V¿µ;ɾó‘þ¾Ÿo´|Îøê'¯úcýý7Äè¬ý©ívzüI§?>-Õ‚ŽÉ³Ò¿3ÖzŠÚj¡RÖöÚ¨ûý¢ZýH“ß•§gÖ´° îïÓZ8Ü”*vl­íÜ÷G›6ô½·Î$ótx×Kò¶9߈ãujö4giú']K—n\÷…FuÍôÍ–gz#‹Mýk?áÒŸ†{ƒ{1ë\¸páK·nݺs¬±"¹|¬ö½mqõ¿0Ô¿ö‡{?ûI€ @€ @€Ì^lö.ÝÊ  @€ @€ @€ÀìH“4vplüª¶ån\¡÷Sýý·%yÞ8¸—¦¥ãN9åAÓY+v©|wìÙu|SkÈ“Ï W†5¸aõü kÞ4¸aí×ZnÜsî­ïŸæW{h+³jõ{EgŒG'N-·Aÿcô͘®ØåôíñݨnܳÎ+®ôß6±è,IK|r¹ÜÑDmÃ’ÙòLoˆP{F¥ÕÕjâ¹tô¼£ï÷¬±j/î95žw«ï¾¶üC÷½÷Ž @€ @€˜Í‡ý—»³ßÚ  @€ @€ @€ÀaHó_?Ky°§hJS±?ÏÓôÆ¢‰¥•¶‡ÕLÕþÚîFZÜÌüòjò† «Ÿ~eÿºo5S?Þšæcg°)÷;°ÁÁu? —÷ ®_ýœÝw팰cõ1Ï[š\{Þ^úÃ&k•CàŽüî/Ý»“`ý£òäüùóç·×/h]Ïùåòœ†ßº§îHyšü¼™Ùýä‡ßy{\£ÿ*ª`ó¹Èæ½µ¨®‰þÿLo ©æÙ‡ ë²tÉX5msÅõˆÍHë¼ò|ÇŽÛ*ëôj&@€ @€ @€f™À”ûåî,ó·\ @€ @€ pøòäË…'OÓGÖ´¨ kYﺮžUßï^Öû‰xÿþΞÞ×u.[Y^ܽòq½'¶è4“4L^h™f¥gOÒÉ'}ØØ½ñEIìBYt¢!½gpãê7ÕL„I w‘Œ÷êkæä-:vhhý/j;[&»w?:BŽ?mfØÈ Ío¦NÍø>ÑßSqmãÒtÎÏ<ÿ¼†5-ê|Hµãüø®ÍiÑp3b˜íÛ·ïÎ_R¸Sn¬6"u¯¾¸gÅÓ~á3û™ÞŒÏæ «ÏôÏ6ªÍ“´|ë¿9hˆã·n]w[£±õ @€ @€ @€³G`¬_6ÌžÕ[) @€ @€˜Å•jrMÑò#„pÈŽMyJ¤ÒΉ„Ês"Ìõ²,Iÿ>K³ííÙW²#ÒßDøñ†¤\. Ù­i2úÃésEãÆÚžST3Uûc÷Æß/œ[žÜ°ã¶_ÿYaÝA¤I~fѱýÚ´øØààåץûŸ;öÝP´¦è/\wc(S ýâ˜Í#KíÉÂ'ímœgÚ>'& %XóÕj’¼£‰sdmyöÁE‹Ík¢¶nÉL¦×]øèŽ4¿ltÓÈÏñ÷ÚÉ(}Òȶçynü}~D½úÚ®µ›ú×~º^¿v @€ @€ @`ö ü¦}öX1 @€ @€˜-Û·oß¡ª¯6Zo줘¶ås–7ªiE_ÞÞx·§=ç¨&_hŹ&cŒÛ~ûÓo$y~GÑØmóæ¼´¨¦UýÙœc"Ü“®ˆpã_ĘÿšdÙÇÛK¥kO8ùô»j»av÷ô~©kYïú{vÛªÖööãO¯ß;¢'þÔˆO“òöÔtî3cà¹EƒG€¦½¨foWOï_FÀpKXüpÞ1'Ý»†~+-¥ýh{SÜþ—ÆŸ »»W¿·~2~îN>[8nšÎéêZqra‚q Tï¾}sÜ3w5:02‡»¸{åÕl_g¹÷wã~;÷`Ç™©Ç_Óß¿#®ÓKóx­1J^ÕµtUaºÞ83ù™^oÍcµoëï¿#°ûÇêÛÛ÷l-à¸ï•¥éâ}Æz“çk¢YP{,m @€ @€ @`– 8ÎÒ oÙ @€ @€ @ &ïÙ¹¬È"]YTq°ýY–†(«i»¬MÍWìˆ83Û·ƒU½YFHêÕ{vD¬WТö… —“fcï^a”Ø\+‰ \úÄxwöUW­¹³Ñi+mySÇwä?l4NKú²ìÂ&Ç™Ód];æÍÝÉž‰Ÿ¥1kËÙޢơ¾oÅ·±áu¨…º~YÚyK‹Ni˜›7o®¹^1¢iÌ·¥¶ìOÆìhQcš¥“:~‹¦yX‡ÜØ÷™x€ý[Ñ$ö<æÒüƒEîzãÌägz½5×k¯¤ÕËêõÕÚãï“sö…ÀËåR"îx›§ÉêFãé#@€ @€ @€fŸ€€ãì»æVL€ @€ @€ö WòÕµàÔ¾†±Þ¤ÉÃk;‹ÕÕŠ¶ÎÎÞ#dö¼†cåùŽ·U¦ì޵¹çÕä ×°§3=¾m(®;¸ŠŽcK±“ã‹FÉ«ÕÂPWšTO(§Ö?gNÛ¤þÞiIOÏ™q§ö63—HÜ4pŒ›[cNjÀqa¹|¿¸^G6šG„ˆ~ZÛÁ®Q¾ƒ¨¼·èèÖõt•W4 oQ¯¿{éŠî¿»^¿öûn­ìxm|oq_ËØïj¡å£ŽOÿ~ìÞâÖ™úL/^ùþW®_ûùØ¡¸q€½­º¤vTgÒñäØý6žgc¿ò$¿fÓ†¾ïÝ«• @€ @€˜­“ú‹æÙŠjÝ @€ @€ @`º õ¯ý^ÌukÑ|³,}ïüùóÛ‹ê&ÒŸÎI_;ç5¤¥É[·®»m"ãªcûû#+ú£¢óÅ.m}qyùËê&ÚïŽe¯):~O°u¸º¡¨.ËÓßÕÔú³öüqÍÔM°&6ùœ³:`G7w|Áý4bJµ²mÄDZߦé“.,—O»óà[;’ŽÂqº„‚žºîƒÖ~-ÂWÿY·àÞŽ4ËþõÙåò±Euãé€ëIIZzßxŽ™ÍµŸêï¿5­Vþ°Iƒ?\Ò³bA“µû•ÍÔgú~‹lîC<~ò5,MÓ=Ç,M.nTáÇÆã4:X @€ @€ 0cg쥵0 @€ @€ М@ìâ÷Ï…•iòÈ3òˆ×Ö³ »ûÒs"´VTÙ½;ŸáŸj,ÿÅé‘¥¬ôáåòQŵã®H>!ý—F;hÝ7bú‘ÁÁu?»ïóØïªi~ÃØ=û·æIöøý[Z÷©³gÕÿŠÝØžÞ숱#èÜfk¯ì_÷ßl+rÈæfŠ1'åwky–<£h¾±þZÙk2*é[ ‡OÓ“uljÕ÷÷¢‹.=¾#›7»ŽžTxnû6®ÝœäIÿ¾†:oâ{“fIéßï ~שªÛ<#ŸéuWÛ c8ÝÝÝ51_±»îü®®åÊÓdñ˜µÆØ‰ùÖÊ]…¡úºÇë @€ @€ @€f¬À¤üvÆjY @€ @€˜›ú×~2’ß.\Zš¼¾³¼ây…uMì µ·ÕÂ;Cæ_XóÕ&‡=¬e¿¬î¸,Âr…;Fææ¼³Žµ ,h:ˆ×̺{Vþc’¤«š¨­VªÃÕD]²ëŽêõQW7زoŒ4霌]>ãžû½,¾yßyšxÛÓDÙÞ’<©¦ïÝû¡ÞϸfÏíêY9®yÔkd{g¹÷ÉH}ùȶ±ÞWòdh¬vm­ì_=ßßME#ƽð̲yŸ\²dÉqEµú/¾ää¹G·m‹÷“Õé[`÷Ž]¯ŠãMc÷Þך&ÉYGŸ½í¾–æßÍÄgz󫿯òÊõëÏÕOßײÿ»øN¤I{éuñãÜý{F|J“͵Ý7G´xK€ @€ @€Ø# àèF @€ @€ @€’¼’üMCĎȲìÊÎe½Õõׂ}ðÛa¼ÇÕ&Õü…5S¤àšþþy5ÿÛ¦¦“¦ŸxòéŸ^´è’û5U_PÔµtÕëcƒÁ×”íéÎó|]ì\ø­fj?úÑËoŽ·¾TT÷ÇcÏ<çá Õ;íêé}CV*}¼vïÕ©»9MŽ»cìÖJ} zî»÷¾Ö4Éþ¢sÙÊ®ûZî]mdz,K‹×—uó†ÕWÜÙÝŒÀð]»þ(ß·ÕÆ5{RvÄ1_hè»{éŠî¶yíÜNUt.ýc ]qc5Ïÿר½û·Æ3ï•]K{Ÿ¹kñ§™øL/^õØÕjrÙØ=÷´F¸ñ•úãïòÕ ûu @€ @€ @€³V@ÀqÖ^z '@€ @€ @€÷ ö÷}8B=¼¯¥Î»4»a}8‚gy~¹<§NUÃæî¥+/<á”3¾¤é†…{:ó+6®ùXqÝÔ©Ø´qÍ»ò<ÙÜÔŒÒô)íóÚ¿TÛ¥°©ú1Šj»Èu-[õ¶4Kš VÆÜnOv¿aŒ¡ê6UÓfwÌ^ÓUîí¬;P“½'võ¬úXìlø¦8d¿ÏÊmòT{ʶl¹â7€ZÛÌ1r\A©7.^¼x\!ÊÑcw.[ñ;i{Û–$MNÝ7úsß:ºÍçÉZÿ‹øŽüE3£G ë!ÀÝÚ½¬wàâžO‹cÞ«ìnë^Úûì¨ÿD’•>÷÷ƒš9šú›6ö­Žvaø7®U¼’ß³spýáÆì™‰Ïô1ZÐx˯®Ûß[”Õ¿ÿóüçñwyìíE€ @€ @€8P íÀ&- @€ @€ @€ÀlØuçͯj?ò„§D€ñaÖ)‘ÚïÞòЬcÕÖ®ü³üî%ýý•FÇÔú.îé9­-ŸóÏlìŠs¾ò$ÿíŽÊŽ?.,œ‚Õ·þ~éˆc~'Öúà¢é…çCÒR骮= ç¯X»º¿ž].{LÚñ'qük"$×| ¯š¿hpðòëŠÏp_Åp%¹bNiÏ.Ÿsïk­ó.K6FàòƒÃ;vþŸZX¬N՘ͱ;ây±£ÝK"ˆ´*î‘Ç,j¢1Í“û'år©™ûrïp;ª;þr^6ïâ°ëE]z|inÛi¥¶t~„Â^ëyêèšÑŸc§Åw¥?‹ÚÓF÷íûœ¦¥EIÇ·$ÉõûÚ ÞÔîµî¥+^‘¤¥ØÍ´ø×èĘó?œšÍûÓ¶}.&õãØõªIúãÒðîïÈvßÞVû R[rzÔó==O²GÄû{vëŒ7…¯<¿#O*/Œ:¶B¬–ä;nýõÒŽcï¿eßõjføØ3î‹—ÔJ³ý®o|Øóy¿Æ1GŒ€ä¦£hÔÂ@ù˜ƒÏÐÆÍë×ÿ¤séʿ̲ôM,ñåKz–l^¿îÓMÔî+™iÏô} ç›J%¿¬TJÇp̆w¯ç©” @€ @€ @€³H@Àq]lK%@€ @€ @€EC}ßì*÷^š–ÒË£¶0äX/Â8÷‹/ÊÒôEI)‚=µÿ™j¯ìž{þ·8Û³§¬`‹Û‹7m\;­w­«íĹeyRZá¶æ‚!žgÄ?Œkð‡±+Ü=Á¨#j!»7$Õ¤åÐ{ÿ'‚SC7ßxýëF¶çýM•»ßzBi^mgÅS›:.M;bÖKøóŽX~÷²ÞØå0ýqÌãÆXãÉ‘Ù;=ŸG55Ö½Eqìo’jõ…I–=(Æz£cKijÇp¬5°qí@wOïê{U£±÷ë‹P[|.ßsIÒÚK’öö¤#iOîùp_õ8/ÛîJ’voÞ°î+÷àÝ¡غuëÎE‹-iŸwüÖí>ýМ7¿â¦®щ§œÑxGÐ<Ýyhæ3}βiãš÷Äα—ij¶ö½¯ûŠk™fyÛ¿-^¼øQCCC·×-£c¦=ÓÇXbaÓæþ¾ÿŒ@÷wâ/ª‡ï+È¿<0pŵû>zC€ @€ @€%0ò?+Õå# @àÿ³w?@–UõÀÏ}Ý3#u  1D!FÅ5 Š+Š»† ƒ 5þ9HdƒÆ­J¥’”»É º±*Y“M¥5„ù#ÌL;=¸²‰£%NŒ ’ˆÊŸdÀ0 È€ü™¦§ßÙóІyoº_w¿~¯ûÞs?ÏšzïÞ{î¹ç÷ùÝn«ÞðK€ @€¨£ÀèȺ­Í‰øögÂd‹ÂoÚºy] ™Uÿ•s)¤9ñ–ôø½]‹YM ŒþõèÄØÊíÛ·ïïuÛGFž qâÝi®9…‚ž½^ <¶B1)_ô¶gÂ1s 7ÆøÕ}áÄÑ‘ ÿ;™Þþì¼Ó|HOÐ;cšC]w?òà}—¦7v4àƒ­ˆïD3\¼mÓÚ›|)Ówh=ÕvÿØcïJ¿ ¿ÒeX_¥{îê-×]ôÄOÌøtÆ"Dǃ՛ÅþpiˆqßÁ‡Ú÷¤ä1Ç,ÿÃö½³ÛÊéwúì*>xT³W¼wú=Íf¼zú£Ž @€ @€ @€ÚÿÝd @€ @€ @€g¶Ž¬Û±¢yj ‹Ü½`$1ü°›çoÙ´ö“ v͸Ðè¦ ·í›ˆ?ŸL·-ÀåÚ.Ñ ÊÅfüè覵—†‘‘ƒSm'O±Ñª¥ˆqe:4>ÅáAíJù˜ð{£Í±·~dÝý­‹<ùhãŸÒ[³ûcOÇVt´¹ç¢$·¾ûü:ã“é^¹tÛæµ×è ¦ƒ@ë)£×½-ÝéwÔNåÐøTl63ýŒþr:¡ùâ¿xæ ¶ŽSènÙ²ö[éɰÿcŠCSíú•siÍ;¦:0Ó¾œ~§ÏTëTÇŸžØ³!ý<Ì6,¿7î{bQãSÕ` @€ @€ P.™ÿ‚¬\ëµ @€ @€ @`®Ùpws_8%…­þba†žV—®qËDñô‰[7­Ï2Ñ æïÙžäØüX >ÙÒœOŠÿ›áÍ£›×ýîœOír–Íë¾”žðùÖÔ³]†õåPzòåÿI9šS¶nZ{ùÍ›nZÿT ›ÝÒí"éI‘?wöy½©Û˜i¥0èè¦u§dåûÓuvO;®Ï’éÆñ±}¯NO0ýLŸ§6Ýübº>=ÖÜs\ ÐmžßTÏž y}z*ékG7¯ÿDÚ›n÷ôlÒ‰#†Ÿ1õ§ Ç©iÂλïøxúÿ«žæð³»Óï‡"6Â_­X±âùÏîœÃ‡œ~§Ï¡ìg†~adäÁŠgs^êÅuÛ¶m{l6c!@€ @€ @€ê+ àXßÞ«œ @€ @€Ì(°uëºGÒ“Å.Ûßl¾.…q¶ÎxÂ\Äx_³Ùüµôļ·nÛ¸qç\O¯Òøn¸aÏèÆõÙÓ{E3Æ?Jkß;õÇðpz´áo=òà}'·žÄ9ˆk´æ{|ÿ )“Õz’×3Á¬¾^'Æ›R°ñÔÑkÏÝôÙoN5wªñª©ö¸o(ýÖÛsü[AÃ}{ö½:Uxu*s¶O,›ãe’bŒßNOÚ|{úY;ÿúë7îšóNXV°kËÆu«ÇÇã ­'£¶ú6ç ?ó„ÎxMh6!=òìFÖÝwàË–í?äÀíi>M³¿ö»o½õÖñ‰ýñÒ‘~Et¡8zÉ¡Ë[áÒž^9ýNŸ3@3Îøû·5gjÂÚ9Ïí @€ @€ @ v3þ  µQ0 @€ @€ p@zšãwÓΕ+W_xJCï/B83ÅQ œåŽ º=ñ»¿ÿ½k·oß>°àØ,—³ Ãny8]ð7ÞµjÍ-i¿Ò(â;Ó³Ûþ]òšÇBRf2þMJ“üÕ½ÿò­ëZ!ŸyÌ5«So¼ñ³§ï]±ê¢ 7ÿ%ÝïK5üĬNžjP »¦¤äMEóªë6møúTCÜ÷hsÏÈáC‡üiz’Øaî?ðsQ„çž{Á±[¶\s×ûçòù†®ýAÉ9ç¼÷ÃqYqa#†5¡'ÎeŽ©Æ¦Àð½)`u]s¢yÝ£ï«uû9˜Ê¤*û®ß²îÓZ[~÷œÕï}M#g§{âéçø¥éý§žyáÈX„ÇÒÏÅý1»RŠõßRÏoz°¹÷Æ##ӇřŽE|¨*V‹±Î붬ÿƹç½÷OBhüúÌ×/þó{V_ü¹m›ÖÞ4óØ©Gäò;}êê¦Þ›þæóG{üÒï°#¦ñÌ#IwmkŽõì:ݼö @€ @€ @€ù ¤¿Só"@€ @€ @€Ì]`Źk^¿d8üb:óm)ÔsTˆÅ‹Óûái»Ñ6[ ?LQ‡SÈgG °}¹Y<ýåë6nü^Û˜šoœuÖ/Zvèð)âøŸ’Ñ늎LÁ½#“ç ;iÒÓŸNáÐÝiÌ?Å"þ}Ñ ßߨºuý¢†žV¬XñüÆ!ËOKÀSC£HAØxBZãáÁÍÄÜŸêz8ý%ÕéÖ) ö¥ô„Л~¢í,·”Û)ÔùÊá¡ÆÉ©'¦:NJ^—êlÝûÏ;hÁ1Ž¥{ÿûE¿Ÿj}0¹ÜÖ ñº­›6Ü~ÐX;j/ðL`²hÜÑ "ý ýùè¦uê6ƱÅÈáwúL‚ç®^sWú½÷Ê鯥ßžJûáéŽÛO€ @€ @€˜pœ”ðN€ @€ @€ôC ‘žtwDXZ,Ÿÿáã?ðˆ'ÓõÎzòÉ'/9úè×±(¼`x"<þÀÐÓu{ú[ïWÜ™­àcqè¡Ëqé!{šc?øâÈÈ£éj)ۘ߫կ—¿üøìŽ/(B2>ôã']æW¬Š"pöy½e8 }µÛä±>2ºyíǺq¬œ9üNoɶžæ\Ã;º*ï{Í–-×~§ë  @€ @€ @€I@ÀÑm@€ @€ @€ @€œûKCŸë¶”š—Žn\ÿ×ÝÆ8F`+W¯ù³¢(~uºk¤§7þ]zzã©Ó·Ÿ @€ @€ p @ãÀ Ÿ  @€ @€ @€ @`qbzÕLW.šÅ½3qœÀ Ž_µjiŠóºÍcó3ÝŽ;F€ @€ @€8P`øÀ Ÿ  @€ @€ @€ P7ôDºÏ„"Ü&Âû›á·cwí[p‡F|cH éöz2ŽÝÖí¸c)p\±ìÓ-ú“Ó]#=½qÏÞ'š›¦;n? @€ @€ @ S@À±SÄ6 @€ @€ @€5(ÎI±Âåa(„%éÏKã!qåê‹ÿ-í»3†xg3Û¶mZ{Ó QÞµjÍQéÉxïêvÛùÅ‘‘ÝÝÆ8F` Eãý3Ì¿ùÆ?ûø c&@€ @€ @€<+ àø,… @€ @€ @€ÔQ ñÑôäÄ哵é•>¿¬õ'…ÏhÄðïÓç—áWÓ5–¤?Ý^·v;èA ¬\yáϤùÏìv‰ýÍOu;î @€ @€ @ S Ñ¹Ã6 @€ @€ @€j%P3<1ž´bÅy?=(“SW­:$4Âe3ÍŸR—7Ï4ÆqƒˆK†>Š"=ãtêWŒñö붬ÿÆÔGí%@€ @€ @€L- à8µ‹½ @€ @€ @€ÔF ÞÓ­ÔÖ‡ž·ä¬ncæsì¥C‡üfzRäá3Ì1ÞÜ7Ï0ÆaX±âü—¤{ôCÝ&OÇ?ëvÜ1 @€ @€ @€ÀTŽS©ØG€ @€ @€ @€@mšÍð…™ŠmÅo…¼f7×ã+W¯¹,Ç~oÆóbüÂÖ­ë™qœú,°råE/]rè’›‹"<º©cˆ<÷n˜î¸ý @€ @€ @€é§“±Ÿ @€ @€ @€Zìc­€c³k±EñòáC–n;ýôÓŸ×uÜž³ú¢ ÓÃ!ÿ|¦SÒ“ñöO4ÃïÏ4ÎqýX¹ò‚£‹%¯†Pßmî‹Oíë6Æ1 @€ @€ @€ÀTCSí´ @€ @€ @€u¸ûŽ;ö¼öøßŠpt·šÓì~æÃ^xÊ«_{âß¹ãöºívìÌU«Ž|ýëOþoEhüÏpœñïl‹¯Üºyý5ÝætŒÀ\Î^uák_qüq¡uïOuÞ™gž¹ìõ'½iEÚœîÑŸjÌsûâSã{ÆÏ»óΞr®çÆùD€ @€ @€8X 8x—= @€ @€ @€¨—ÀÊÕžÂÐ×S˜kV‡Cø›ÐŒݼî˳•:÷Ü ŽÃÿ‘.qq:gVO‚Œ!î{K™˜íuŒ#0“ÀÊóÖŒ¡8'İ;„øt?·ˆá¡ÐGÆ~*=±ñMé'aùLó´ŽÇÐüøèÆõ¿=›±Æ @€ @€ @€:fõ—s'Ù&@€ @€ @€ @€@n+W¯ùË>¼t.uÅï EqWˆñÞÖŸX„ûB3<‹ÆK¡ù²tìeé/eÓ{xy ænÌzþŸœʼnÛ6­½gÖçH`ç¬^ó¿Eñk³ÚuH FÞõÀÄžvŒŒŒuè  @€ @€ @`áiöÛM€ @€ @€ @€Z Ä}ῆ¥áéÉuÇ̶ðˆ|UûªdLùÅôL¼Ö‰C“gÏ>Ë8yƳï1N4CóÛ6mn|Ň~ ¤;ó¾>Ì•nÑý—7öAÒ @€ @€ @ Æóøµ«) @€ @€ @€ì¶n]÷Ⱦf<-=•î;‹Y\ºþ®f3ž±uÓ†Ï.æ:\;cؼwþÕ5ÿdtógo™ÿ§^V`åyß™þƒcguÕ¶‡ØüÃ-›×ߘƧ\® @€ @€ @ ?Žýq4  @€ @€ @€5x÷»Ï?bèÐ¥ohÄprhÄ7¤¿tý¹ùZBñÂâ Š¢N°Çc(v¥÷]éø®‹ôwÅf¸oÿÓ}éú믢&\Ê,©À{ÞóžåŲ翽Q4þc Ýþ|(ÂQé^=2}Þ›Þw¦{÷_Ròöž‰ýqÃõ[Öÿß’–aY @€ @€ @€ @€ @€ @€ 0)püªUK'?{'P)U«†*µ^‹%@€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€åxËñ¾ÓßwÕ1å^¥Õ @€ä&ÐÈ­ õ @€ @€ @€ @€ @€ @€ÀÜbˆGïÛ·]ÈqnnF @€ÌO@Àq~~Î&@€ @€ @€ @€ @€ @€@QÈ1Fª‚ PÇêôÊJ  @€ @€ @€ @€ @€ 0X!ÇÁúš @ M@À±Ã @€ @€ @€ @€ @€j. äXó@ù @€…p\8kW"@€ @€ @€ @€ @€ @€@5„«Ñ'«$@€T\@À±â ´| @€ @€ @€ @€ @€ D@Èq ¬&%@€xN@Àñ9 Ÿ @€ @€ @€ @€ @€ @à@!Ç5|&@€è³€€cŸAMG€ @€ @€ @€ @€ @€¬„³j§b @€ep,S7¬… @€ @€ @€ @€ @€er,cW¬‰ PyÇÊ·P @€ @€ @€ @€ @€X!Ç@v  @€@½ëÕoÕ @€ @€ @€ @€ @€ @ w!ÇÞíœI€ p€€ãA$v @€ @€ @€ @€ @€ @€À´BŽÓÒ8@€ 07ǹyM€ @€ @€ @€ @€ @€BŽî @€>8öÑ @€ @€ @€ @€ @€¨€cíZ®` @€@¿û-j> @€ @€ @€ @€ @€ÔE@ȱ.V' @` Ža5) @€ @€ @€ @€ @€j" äX“F+“ ÐÇþ›š‘ @€ @€ @€ @€ @€õr¬W¿UK€è“€€cŸ MC€ @€ @€ @€ @€ @€Z 9ÖºýŠ'@€ô" àØ‹šs @€ @€ @€ @€ @€ @à`!ǃMì!@€˜V@ÀqZ @€ @€ @€ @€ @€ @`ÎBŽs&s @ ®Žuí¼º  @€ @€ @€ @€ @€ 0(!ÇAÉš— •€€cVíT  @€ @€ @€ @€ @€J" äX’FX @ ¼Žåí• @€ @€ @€ @€ @€ @ ÚBŽÕîŸÕ @€, à8``Ó @€ @€ @€ @€ @€ @ ÖB޵n¿â  @€ÝŠn#@€ÚN;ÿS_nßc‹ @€ @€ @€ @€T_ †pLª¢õgp¯¢¸wÉÒ¥§o¿ú’ƒ»ˆ™  @€ª$ àX¥nY+,ºÀ›ÏÿTúÇ‹ @€ @€ @€ @€èI@ȱ'6' @€rhäZ˜º @€ @€ @€ @€ @€ @ d1=¾oßöÓßwÕ1%[™å @€,‚€€ã" »$ @€ @€ @€ @€ @€j+ äXÛÖ+œ Ð) àØ)b› @€ @€ @€ @€ @€+ ä8X_³ @€*" àX‘FY& @€ @€ @€ @€ @€²r̪Š!@€ô" àØ‹šs @€ @€ @€ @€ @€ @`þBŽó74 @ ÂŽnž¥ @€ @€ @€ @€ @€ @ òBŽ•o¡ @€½ 8ö*ç< @€ @€ @€ @€ @€è€cÍB€¨˜€€cÅf¹ @€ @€ @€ @€ @€ÈR@È1˶*Š ÐM@À±›Žc @€ @€ @€ @€ @€ °pBŽ gíJ @€8–  –@€ @€ @€ @€ @€ @€?rt+ @€j# àX›V+” @€ @€ @€ @€ @€r¬H£,“ 0?Çùù9› @€ @€ @€ @€ @€! ä8Us @€J% àXªvX  @€ @€ @€ @€ @€<+ äø,… @€s쪚 @€ @€ @€ @€ @€ ‹€c.T @à ǃHì @€ @€ @€ @€ @€ @€R 9–ªC€è—€€c¿$ÍC€ @€ @€ @€ @€ @€ƒrœ­™  @€‹$ à¸Hð.K€ @€ @€ @€ @€ @€srœ#˜á @€r 8–»?VG€ @€ @€ @€ @€ @€ 9¨á3 @ ÒŽ•nŸÅ @€ @€ @€ @€ @€ @ †BŽ5lº’  @€s쪚 @€ @€ @€ @€ @€ »€cîV PÇ4Y‰ @€ @€ @€ @€ @€ÈR@È1˶*Ѝ€€c}z­R @€ @€ @€ @€ @€ä' ä˜_OUD€ÔF@À±6­V( @€ @€ @€ @€ @€2rÌ´±Ê"@€rp̽Ãê#@€ @€ @€ @€ @€ @€@~rüLJU# @ Ç\:© @€ @€ @€ @€ @€Ô[`ç²0tI½ TO€¨–€€cµúeµ @€ @€ @€ @€ @€ p°ÀÎeÅðé{Í¥÷|È @€² 8–µ3ÖE€ @€ @€ @€ @€ @€³nœ’1 @€ 8–°)–D€ @€ @€ @€ @€ @€³nœ“A @€r 8–³/VE€ @€ @€ @€ @€ @€Ý„»û8J€(½€€cé[d @€ @€ @€ @€ @€ Ð! ÜØb“ PEÇ*vÍš  @€ @€ @€ @€ @€ P_áÆúö^å @€@fŽ™5T9 @€ @€ @€ @€ @€ÈX@¸1ãæ*¨Ÿ€€cýz®b @€ @€ @€ @€ @€TQ@¸±Š]³f @€@Ç.8 @€ @€ @€ @€ @€ @€@)„KÑ‹ @€ôW@À±¿žf#@€ @€ @€ @€ @€ @€þ 7ö×Ól @€Ò8–¦B€ @€ @€ @€ @€ @€Â 6  @€9 8æÔMµ @€ @€ @€ @€ @€ @ áÆ|z© @€À”ŽS²ØI€ @€ @€ @€ @€ @€‹( ܸˆø.M€X(Ç…’vÈ’è@IDAT @€ @€ @€ @€ @€˜€pãl”Œ!@€d à˜A•@€ @€ @€ @€ @€ @€L„3i¤2 @€³pœ’1 @€ @€ @€ @€ @€ 0háÆA ›Ÿ P2Ç’5Är @€ @€ @€ @€ @€ PCáÆ6]É @€G÷ @€ @€ @€ @€ @€,¦€pãbê»6 @`ߥ  @€ @€ @€ @€ @€ PsáÆšßÊ'@€ê- àXïþ«ž @€ @€ @€ @€ @€‹% ܸXò®K€(‰€€cIa @€ @€ @€ @€ @€¨‘€pcš­T @€ÀtŽÓÉØO€ @€ @€ @€ @€ @€ƒn„ª9  @€p¬`Ó,™ @€ @€ @€ @€ @€n¬hã,› 0áALjN «@Q_ɵ6u @€ @€ @€ @€ @€@}bÇ„°€pã€MO€¨š@Qµ[/ @€ @€ @€ @€ @€ôWà´ >}yŒñŠþÎÚ6›pc‡  @€–@ @€ @€ @€ @€ @€ €pãqMM€¨²€€c•»gí @€ @€ @€ @€ @€(·€pc¹ûcu @€Ep\T~'@€ @€ @€ @€ @€ @€@¶ÂÙ¶Va @€þ8öÇÑ, @€ @€ @€ @€ @€ ðœ€pãs> @€L# à8 ŒÝ @€ @€ @€ @€ @€ Г€pcOlN"@€ÔO@À±~=W1 @€ @€ @€ @€ @€% Ü8(Yó @€2p̰©J"@€ @€ @€ @€ @€ @€À"7.ºK @€ª, àXåîY; @€ @€ @€ @€ @€Ê! ÜXŽ>X @ RŽ•j—Å @€ @€ @€ @€ @€ @ tÂ¥k‰ @€ª! àX>Y% @€ @€ @€ @€ @€Ê( ÜXÆ®X @ "Ži”e @€ @€ @€ @€ @€ @ dÂ%kˆå @€ª& àXµŽY/ @€ @€ @€ @€ @€_@¸qñ{` @€Ê 8V¾… @€ @€ @€ @€ @€ @€À‚ 7.(·‹ @€òpÌ··*#@€ @€ @€ @€ @€ @€@¿„û-j> @€@kÜ|¥ @€ @€ @€ @€ @€ @`ÂsÀ2” @`fÇ™Œ @€ @€ @€ @€ @€ @€@Ý„ë~¨Ÿ 0Ç š’ @€ @€ @€ @€ @€ 7fÔL¥ @€Ê$0\¦ÅX  @€ @€ @€ @€ @€”H (î]†NÿÛk.½·D«² @€L<Á1“F*ƒ @€ @€ @€ @€ @€}øQ¸ñ­Â}U5 @€ÀŽ`øH€ @€ @€ @€ @€ @€I …—,]êÉn @€ 8”×ä @€ @€ @€ @€ @€¨˜ÀÃÛ¯¾dgÅVn¹ @€p¬XÃ,— @€ @€ @€ @€ @€n­‰  @€p<ØÄ @€ @€ @€ @€ @€ÔO@¸±~=W1 @`‘†ùú.O€ @€ @€ @€ @€ @€‹,‹°sÉ’¥§o¿ú’‹¼—'@€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€å(ʽ<«#@€õxÓyŸ¼¢(ŠËëUõôÕÆ¯üúÆ^1ýˆêÑÛö^ém»GN[z›S7ÛkÑÛvœ¶ô6§n¶×¢·í9mémNÝl¯EoÛ=rÚÒÛœºÙ^‹Þ¶{ä´¥·9u³½½m÷ÈiKosêf{-zÛî‘Ó–ÞæÔÍöZô¶Ý#§-½Í©›íµèm»GN[z›S7ÛkÑÛvœ¶ô¶{7o¹öþÛ÷îDŽ @€*)Шäª-šd*P„â­™–ÖSY9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍì8)'œjéhSO›9yäTKOÍt @€À¬†g=Ò@ @€ @€ @€ @€ @€ @`^§]ðéËÓ;¯˜×$%9¹(Š+¾vÍeW–d9–A€TPÀ+Ø4K&@€ @€ @€ @€ @€ @€ @€Up¬z­Ÿ @€ @€ @€ @€ @€ @€TP@À±‚M³d @€ @€ @€ @€ @€ @€ PuǪwÐú  @€ @€ @€ @€ @€ @€ @€@+Ø4K&@€ @€ @€ @€ @€ @€ @€U®zÖO€ª$ð¦ó>yE·õ.]Û7^tR«cG‰|òòŠ^ý±xÌý»õv²—z;)‘ß»Þæ×ÓÉŠôvR"¿w½Í¯§“éí¤D~ïz›_O'+ÒÛI‰üÞõ6¿žNV¤·“ù½ëm~=¬Ho'%ò{×Ûüz:Y‘ÞNJä÷®·ùõt²"½”Èï]oóëédEz;)‘ß»Þæ×ÓÉŠôvR"¿w½Í¯§*"@€˜Y@Àqf## @€@ߊ¢èÖÛ7Þ·Ke1Ñ‘/ Ç„"\‘C1©–pÿî*éO zÛÇ2΢·eìJÖ¤·ýq,ã,z[Æ®ôgMzÛÇ2΢·eìJÖ¤·ýq,ã,z[Æ®ôgMzÛÇ2΢·eìJÖ¤·ýq,ã,z[Æ®ôgMzÛÇ2΢·eìJÖ¤·ýq,ã,z[Æ®ôgMzÛÇ2΢·eìJÖ¤·ýq,ã,z[Æ®X @€À ƒ¾€ù  @€ @€ @€ @€ @€ @€ @€Ž"¶  @€ @€ @€ @€ @€ @€ @€ 8œØ @€ @€ @€ @€ @€ @€ @€NÇNÛ @€ @€ @€ @€ @€ @€ @€ÀÀNì @€ @€ @€ @€ @€ @€ @€@§€€c§ˆm @€ @€ @€ @€ @€ @€ @`àŽ'v @€ @€ @€ @€ @€ @€ @ S@À±SÄ6 P“^Yš¥Ì{!9Õ2oŒ4AN9Õ¢·íz›¯‡Þêm»@¾[9Ýë9ÕÒ;.'œjÑÛv½Í×Coõ¶] ß­œîõœjéÇ—“GNµèm»€Þæë¡·zÛ.ïVN÷zNµôãŽËÉ#§Zô¶]@oóõÐ[½mÈw+§{=§Zò½ãTF€(‡@QŽeX¨‡À›ÏÿTœo¥_ûãyO1ß%8Ÿ @€ @€ @€ @€ÌZà´_Ÿÿ¶~˵˜ÿ$³^ñ`žvÁ§/1^1Ø«,ÌìEQ\ñµk.»ra®æ* @€@Žžà˜cWÕD€ @€ @€ @€ @€ @€ @€J. àXòY @€ @€ @€ @€ @€ @€ÈQ@À1Ç®ª‰ @€ @€ @€ @€ @€ @€”\@À±ä ²< @€ @€ @€ @€ @€ @€ £€€cŽ]U @€ @€ @€ @€ @€ @€(¹€€cÉdy @€ @€ @€ @€ @€ @€ @ GÇ»ª& @€ @€ @€ @€ @€ @€ PrÇ’7Èò @€ @€ @€ @€ @€ @€ @€@ŽŽ9vUM @€ @€ @€ @€ @€ @€ @ äŽ%oå @€ @€ @€ @€ @€ @€ @€s쪚 @€ @€ @€ @€ @€ @€ @€@ÉKÞ Ë#@€ @€ @€ @€ @€ @€ @€9 8æØU5 @€ @€ @€ @€ @€ @€ @€’ 8–¼A–G€ @€ @€ @€ @€ @€ @€rp̱«j"@€ @€ @€ @€ @€ @€ @€%p,yƒ, @€ @€ @€ @€ @€ @€ä( à˜cWÕD€ @€ @€ @€ @€ @€ @€J. àXòY @€ @€ @€ @€ @€ @€ÈQ@À1Ç®ª‰ @€ @€ @€ @€ @€ @€”\@À±ä ²< @€ @€ @€ @€ @€ @€ £€€cŽ]U @€ @€ @€ @€ @€ @€(¹€€cÉdy @€ @€ @€ @€ @€ @€ @ Gá‹R 0{/ýë³l$ @€ @€ @€ @€TBà?{s%Öi‘ @€õðÇz÷_õ @€ @€ @€ @€ @€ @€ @`Q<ÁqQØ]”ê*c¼¢[ígœT¼õ˜—Ä·uã @€ @€ @€ @€ @€ @ Ǻ¨¨ŒÀ×7~ðÊn‹½å?yy(‚€c7$Ç @€ @€ @€ @€ @€ @€,YT¡ @€ @€ @€ @€ @€ @€ @ RŽ•j—Å @€ @€ @€ @€ @€ @€ @€<óè£* @€ @€ @€ @€ @€ @€ @€@¥+Õ.‹%@€ @€ @€ @€ @€ @€ @€y8æÑGU @€ @€ @€ @€ @€ @€ @€J 8Vª]K€ @€ @€ @€ @€ @€ @€òpÌ£ª @€ @€ @€ @€ @€ @€ @€•p¬T»,– @€ @€ @€ @€ @€ @€ä! à˜GUA€ @€ @€ @€ @€ @€ @€*% àX©vY, @€ @€ @€ @€ @€ @€ÈC@À1>ª‚ @€ @€ @€ @€ @€ @€TJ@À±Rí²X @€ @€ @€ @€ @€ @€ ‡Àpe¨‚ @€ @€ @€À LÄf¸mß]aÇÓw„oߘx$ì‰O‡˜þçE€ @€ P^"áÐbYøé¡ÃÃqK^N]öÚð†¥Ç†¡Â³cÊÛ5+#@€ÈU@À1×Ϊ‹ @€ @€ @` 1ÆðÅ½ß ž¼9|¿ùè@®aR @€ @€Á ´þª§âÞp×þûŸùsÃØŽð’Æ‹ÂE?qFxçóÞŠ¢ÜÅÍL€ @€@›€€c‡  @€ @€ @€Ó <Þ|*|ô± áÆï™~# @€ @€•hýCVŸxüsáæ±ÿ>²ü¢ð‚Æa•«Á‚  @€TQÀsÔ«Ø5k&@€ @€ @€Xpïí(|h÷Ÿ 7.¸¼  @€ @€N õ[µ¾j}äE€ @€Ààoì  @€ @€ @€h=¹ñ·ûLxpbwÅ+±| @€ @€™Zßµ¾ j}'äE€ @€À`ëkv @€ @€ @€Š ÄÃGÛ ÜXñ>Z> @€ @`.­cë;¡ÖwC^ @€ N@Àqp¶f&@€ @€ @€È@à‹{¿þaüž *Q @€ @€À\Zß µ¾ò"@€ @`pŽƒ³53 @€ @€ @€@Å&b3lxòæŠWaù @€ @€½ ´¾j}GäE€ @€À`†3­Y  @€ @€ @€T_à¶}w…ï7­~!* @€ @€èI õÝPë;¢7.{uOç;éG—¼3v¥ø×Âö/ß¾Òuƒ @€Y 8fÙVE @€ @€ @€ôC`ÇÓwôcs @€ @€TX õ‘€ãüøþ_˜áü¶/ýà•3Œr˜ @ CF†5)‰ @€ @€ @€@_¾=~__æ1  @€ @€@u|GTÝÞY9 @€@ùËß#+$@€ @€ @€X$&Y¤+», @€ @€@Y|GT–NX @€@ŽŽ9vUM @€ @€ @€}ØŸîË<&!@€ @€¨®€ïˆªÛ;+'@€(¿€€cù{d… @€ @€ @€‹$C\¤+», @€ @€@Y|GT–NX @€@ŽŽ9vUM @€ @€ @€ @€ @€ @€ @ äŽ%oå @€ @€ @€ @€ @€ @€ @€s쪚 @€ @€ @€ @€ @€ @€ @€@ÉKÞ Ë#@€ @€ @€ @€ @€ @€ @€9 8æØU5 @€ @€øÿìÝù]gyðçÌâ±=ŽÛmgq¹Yl•¢8Ä5¦µEU¥T"Õ*‚TH¢@ÄŠ*¤¶(R%6)?¤jZÑÒšÆi“8K³Bìǘ,Äp @€ @€ @€ @€ @€ @€ÈQ@À1Ç©©™ @€ @€ @€ @€ @€ @€d. à˜ù•O€ @€ @€ Ð=*ªî]Ü•  @€ @€²ðQcR$ @€@¦Ž™NÙ @€ @€ @€ÝXXt+ @€ @€$-à3¢¤Ç£8 @€Ì3 ò  @€ @€ @€º'°bpY÷.îÊ @€ @€Yœï3¢,æ¤H @€<󜛪  @€ @€ @€z °zxeV± @€ @€@ÊWøŒ(åñ¨ @ sÇ̨| @€ @€ @€î ¬YÓ½‹»2 @€ @€@>#ÊbLŠ$@€ÈT@À1ÓÁ)› @€ @€ @ ûWÏ»4–,éþBV @€ @€HR þl¨þŒÈF€ @€@w»ãêª @€ @€ @€ V±yц:Ñ @€ @€À\êφêψl @€tGÀÝvw\]• @€ @€ @ óׯUë éF @€ @€³¨?ª?² @€ Ð=ÇîÙº2 @€ @€ @€@UUÅgÇ6ÇyƒK èF  @€ @€³¨? ª?ª?² @€ Ð=ÇîÙº2 @€ @€ @€@!‹Fã‹cŸr,džÚ @€ @€´¨ÃõgAõgB6 @€º+ àØ]_W'@€ @€ @€(Dࢡsã+K?W ¯*¤#m @€ @€üº@ýÙOýPýY @€î u + @€ @€ @€(C þ­ý_ZòÉØ:ñxÜyð¾Øwü@é‚ @€ ÐpåKbó¢ ±qþÚ¨ªªáÚ'@€ Ð;ÇÞY[‰ @€ @€ @ úÜ6-¸6>0ÿšxòè®Ø>¹#vN½{§÷ÇáÖd´Nüg#@€ @€HW Š*V#±bpY¬^ëFÖÄÕó.Áj Ý¢UF€ @ PÇB«- @€ @€ @€î Ô?ð¶väò“_Ý]ÉÕ  @€ @€ @€ P¦€_3Ræ\uE€ @€ @€ @€ @€ @€ @€’pLz<Š#@€ @€ @€ @€ @€ @€ @€e 8–9W] @€ @€ @€ @€ @€ @€ @€¤“â @€ @€ @€ @€ @€ @€ @€@™ŽeÎUW @€ @€ @€ @€ @€ @€ @ iǤǣ8 @€ @€ @€ @€ @€ @€ P¦€€c™sÕ @€ @€ @€ @€ @€ @€HZ@À1éñ(Ž @€ @€ @€ @€ @€ @€”) àXæ\uE€ @€ @€ @€ @€ @€ @€’pLz<Š#@€ @€ @€ @€ @€ @€ @€e 8–9W] @€ @€ @€ @€ @€ @€ @€¤“â @€ @€ @€ @€ @€ @€ @€@™ŽeÎUW @€ @€ @€ @€ @€ @€ @ iǤǣ8 @€ @€ @€ @€ @€ @€ P¦€€c™sÕ @€ @€ @€ @€ @€ @€HZ@À1éñ(Ž @€ @€ @€ @€8YÞ@IDAT @€ @€”) àXæ\uE€ @€ @€ @€ @€ @€ @€’Jº:Å @€ @€ @€ mMÅ#“;ãÑ_/L½ûŽÇ‘Öd%´L€ @€íT#±|`,.¾0Þ3²:®Yó*?ÖÎÌ> @€ @€´|š•Ö2¹£á Ú'@€ @€HQ@À1Å©¨‰ @€ @€ŠxìDÀÑF€ @€z%ð¨çÐ^Q[‡ @€ @ Ç°J€ @€ @€ΖÀ S¯ž­K¹ @€˜QÀsèŒD @€ @€èƒÀPÖ´$h¬@«ÕÚÒ®ù ï®n¸xyëý펱 @€(C`ßññ2Ñ @€d!à94‹1)’ @€ Ð8ÇÆ\à ÐO‡ïºõövë?ôå;n‹*Û!ÙG€ @€ 8Òš,¤m @€ @€@žCs˜’  @€ @€Íh^Ë:&@€ @€ @€ @€ @€ @€ @€ú- àØï XŸ @€ @€ @€ @€ @€ @€4P@À±C×2 @€ @€ @€ @€ @€ @€è·€€c¿'`} @€ @€ @€ @€ @€ @€ Ð@Ç]Ë @€ @€ @€ @€ @€ @€ @ ßŽýž€õ  @€ @€ @€ @€ @€ @€ @€@8t- @€ @€ @€ @€ @€ @€ @€~ 8ö{Ö'@€ @€ @€ @€ @€ @€ @€ plàеL€ @€ @€ @€ @€ @€ @€ú- àØï XŸ @€ @€ @€ @€ @€ @€4P@À±C×2 @€ @€ @€ @€ @€ @€è·€€c¿'`} @€ @€ @€ @€ @€ @€ Ð@Ç]Ë @€ @€ @€ @€ @€ @€ @ ßŽýž€õ  @€ @€ @€ @€ @€ @€ @€@8t- @€ @€ @€ @€ @€ @€ @€~ 8ö{Ö'@€ @€ @€ @€ @€ @€ @€ plàеL€ @€ @€ @€ @€ @€ @€ú- àØï XŸ @€ @€ @€ @€ @€ @€4P@À±C×2 @€ @€ @€ @€ @€ @€è·€€c¿'`} @€ @€ @€ @€ @€ @€ Ð@¡ö¬e @€ @€ @€ @€ @€è‘ÀÇ7¶Ú®´gol»ÿ™x íAv @€) àXäX5E€ @€ @€ @€ @€ @€4nÙ4C­ØV­¸õö޲› @ @{Ò @€ @€ @€ @€ @€ @€ ¸€€câR @€ @€ @€ @€ @€ @€(Q@À±Ä©ê‰ @€ @€ @€ @€ @€ @€$. à˜ø€”G€ @€ @€ @€ @€ @€ @€Jp,qªz"@€ @€ @€ @€ @€ @€ @€‰ 8&> å @€ @€ @€ @€ @€ @€ @€Kœªž @€ @€ @€ @€ @€ @€ @€@âމHy @€ @€ @€ @€ @€ @€ @ Dǧª' @€ @€ @€ @€ @€ @€ ¸€€câR @€ @€ @€ @€ @€ @€(Q@À±Ä©ê‰ @€ @€ @€ @€ @€ @€$. à˜ø€”G€ @€ @€ @€ @€ @€ @€Jp,qªz"@€ @€ @€ @€ @€ @€ @€‰ 8&> å @€ @€ @€ @€ @€ @€ @€Kœªž @€ @€ @€ @€ @€ @€ @€@âމHy @€ @€ @€ @€ @€ @€ @ Dǧª' @€ @€ @€ @€ @€ @€ ¸€€câR @€ @€ @€ @€ @€ @€(Q@À±Ä©ê‰ @€ @€ @€ @€ @€ @€$. à˜ø€”G€ @€ @€ @€ @€ @€ @€Jp,qªz"@€ @€ @€ @€ @€ @€ @€‰ 8&> å @€ @€ @€ @€ @€ @€ @€Kœªž @€ @€ @€ @€ @€ @€ @€@âމHy @€ @€ @€ @€ @€ @€ @ Dǧª' @€ @€ @€ @€ @€ @€ ¸€€câR @€ @€ @€ @€ @€ @€(Q`¨Ä¦ôD€R¸þæ;¶´«í3߈.YÑ[6µßo/ @€ @€ @€ @€ @€ @ Ǧ¤F(F ªªÛÚ5³íÙˆú«Ýv˦V»Ýö @€ @€ @€ @€ @€ @€²È¢JE @€ @€ @€ÂT#…u¤ @€HYÀshÊÓQ @€ @ ¹ŽÍ½Î  @€ @€ @ ËÆú¸º¥  @€ @ ižC›6qý @€ @€òpÌcNª$@€ @€ @€Â.¾°°Ž´C€ @€) xMy:j#@€ @€4W@À±¹³×9 @€ @€ôQàÚ‘Õ}\ÝÒ @€ Ð4÷xmÚÈõK€ @€ÈB@À1‹1)’ @€ @€JXwâKǪÑÒÚÒ @€$(P?^7²&ÁÊ”D€ @€ ÐtǦ¿ôO€ @€ @€}˜W ÇM£ëû²¶E  @€ @ Yõóç¼j¨YMë– @€ @ Ç,ƤH @€ @€(QàC ß«†Î/±5= @€ @€@"õsgýüi#@€ @€ ¢€€cŠSQ @€ @€4B`¨ŒÏ},ƪÑFô«I @€è­@ý¼Y?wÖÏŸ6 @€ @€) 8¦85 @€ @€ @€@cÎ\½äBŽ™¸F  @€ Ð:ÜX?oÖÏ6 @€ @€© 8¦:u @€ @€ @€@c.¾(¾²ì/cÕÐùéY£ @€ Ð=úù²~άŸ7m @€ @€RJ¹8µ @€ @€ @€¦ÔQãï—~:î9ü`Ü}h[Œ·5¥u} @€ @€ÀY¨ÿjãM£ëãC ßCÕàYºªË @€ @€螀€c÷l]™ @€ @€t$PÿðéGFoˆ^Û'wÆc'¾^˜z5ö#­ÉŽ®å` @€(_`A5ËÆâ²á ã=#«ãº‘51¯ò#aåO^‡ @€ @€r|šUÎ,uB€ @€ @€…Ì«†ã†ùWžü*¤%m @€ @€ @€ @€N8í;¾A€ @€ @€ @€ @€ @€ @€è²€€c—]ž @€ @€ @€ @€ @€ @€8]@Àñtß!@€ @€ @€ @€ @€ @€ @€º, àØe`—'@€ @€ @€ @€ @€ @€ @€Np<ÝÄw @€ @€ @€ @€ @€ @€ @€. 8vØå  @€ @€ @€ @€ @€ @€ @€ÓO7ñ @€ @€ @€ @€ @€ @€ @ ËC]¾¾Ë @€ @€ @€ @€ @€ @€@ƒ¾zoûæ÷ìõ×ß|GÕïºuK»ýö @€ä) à˜çÜTM€ @€ @€ @€ @€ @€,¾¾µmv±îa}UÅúšÙ2Ã~»  @€2Ȱf% @€ @€ @€ @€ @€ @€ @€™ 8f>@å @€ @€ @€ @€ @€ @€ @€sœšš  @€ @€ @€ @€ @€ @€ @€@掙Pù @€ @€ @€ @€ @€ @€ @ Gǧ¦f @€ @€ @€ @€ @€ @€ ¹€€cæT> @€ @€ @€ @€ @€ @€ÈQ@À1Ç©©™ @€ @€ @€ @€ @€ @€d.0”yýÊ'@€ @ SéÖñxòè®Ø>¹#vN½{§÷ÇáÖd´Nüg#@€ Š@U,¬FâüÁeqÅðÊX7²&®žwi V~`*3R @€ @€ @€ ¯€€c¾³S9 @€,Z­Vlx<î @€ @€ @€ @€ @€ @  2ÚÐ @€ @€ @€ @€ @€ @€ “€€cNÓR+ @€ @€ @€ @€ @€ @€(D@À±Ajƒ @€ @€ @€ @€ @€ @€ä$ à˜Ó´ÔJ€ @€ @€ @€ @€ @€ @€ p,dÚ @€ @€ @€ @€ @€ @€ @€9 8æ4-µ @€ @€ @€ @€ @€ @€ @€B ¤6 @€ @€ @€ @€ @€ @€ @€@NŽ9MK­ @€ @€ @€ @€ @€ @€ @ ÇB©  @€ @€ @€ @€ @€ @€ “€€cNÓR+ @€ @€ @€ @€ @€ @€(D@À±Ajƒ @€ @€ @€ @€ @€ @€ä$ à˜Ó´ÔJ€ @€ @€ @€ @€ @€ @€ p,dÚ @€ @€ @€ @€ @€ @€ @€9 8æ4-µ @€ @€ @€ @€ @€ @€ @€B ¤6 @€ @€ @€ @€ @€ @€ @€@NŽ9MK­ @€ @€ @€ @€ @€ @€ @ ÇB©  @€ @€ @€ @€ @€ @€ “€€cNÓR+ @€ @€ @€ @€ @€ @€(D@À±Ajƒ @€ @€ @€ @€ @€ @€ä$ à˜Ó´ÔJ€ @€ @€ @€ @€ @€ @€ *¤m @€ @€ @€ @€ @€ @€@‚ߨj[Õž½±íþgâ¶ÙI€ P¤€€c‘cÕ @€ @€ @€ @€ @€Ò¸eÓ u´b[µâÖÛg8Ên @€ ìIK @€ @€ @€ @€ @€ @€ @€@âމHy @€r¨¢Ê¹|µ @€èXÀ=pÇdN @€ @€ @€ @€, àØàák @€@·V#Ý^Âõ  @€$%à8©q(† @€ @€ @€H\@À1ñ) @€@Î+—å\¾Ú  @€t,p¾{àŽÍœ@€ @€ @€ @€4W@À±¹³×9 @€® ¬^Ùõ5,@€ @ %+ܧ4µ @€ @€ @€ @€‰ 8&> å @€ÈY`ÝÈšœËW; @€ŽÜwLæ @€ @€ @€ @ ÁŽ ¾Ö  @€t[àêy—Æò%Ý^Æõ  @€$!PßûÖ÷À6 @€ @€ @€ @€Ù 8ÎÎÉQ @€ÌA`°ˆÍ‹6ÌáL§ @€ÈO ¾÷­ïm @€ @€ @€ @€³ð“³sr @€sØ8m\5¼jŽg; @€@õ=o}ïk#@€ @€ @€ @€˜½€€ãì­I€ @€ÀªªŠÏŽmŽó—Îál§ @€H_ ¾×­ïyë{_ @€ @€ @€ @€ÀìgoåH @€æ(°x`4¾8ö !Ç9ú9 @ ]:ÜXßëÖ÷¼6 @€ @€ @€ @€Î;ór4 @€s¸hèÜøÊÒOÇUëæx§ @€HK ¾·­ïqë{] @€ @€ @€ @€@çCŸâ  @€ÌM þ«6_ZòÉØ:ñxÜyð¾ØwüÀÜ.ä, @€}X>°$6/Ú篪ªúX‰¥  @€ @€~ ìüÁSñ‹Ÿïk»ü¢sÞï¾f]ÛcJÛyøÐÁøéÞWO~|ë͘œ8'¾ŽM FctÑ9ÿÿµøäÿ½àÂ߉ GKcÐÏ)Þ+§`œòOï•S0ü“ @€Æ 86þ%€ @€@oêß´àÚøÀükâÉ£»bûäŽØ9õrìÞ‡[“Ñ:ñŸ @€@*UT±°‰ƒËbõðÊX7²&®žwi V©”¨ @€ @ /¿ô£ØóâómWþísW4"à8qäpì|îéxþ¹gâñ×ÏhòÆøé¿ü´þÿ®¼xU\¶úʸø’Ÿ¹ ù‘Æ3fºÃ{åWƒó^ù•… @€ @àTŸœªáß @€ôL þðµ#—ŸüêÙ¢"@€ @€ @€ @à¬9|({x[¼ðübzúØœ®Ùjµâ¥=/žüš72?._ýθvÝúž7oN×s¼WRœŠš @€HI@À1¥i¨… @€ @€ @€ @€$.°{×Îøïûïú/Ò­íèäD|ÿéÇO„wņMËÏ»àl]ÚuôMÀ{¥oô&@€ @ #ŒjU* @€ @€ @€ @€ Ð'ãÇÇýßûv|ï;ÿtVç¶óæãñÏw3žxìÁ¨ÿ£@ŽÞ+9NMÍ @€ôK@À±_òÖ%@€ @€ @€ @€ @€™LOOÇ÷þýžøáŽg»^ql||ûñùǨƒb69 x¯ä4-µ @€ ‚€€c SP @€ @€ @€ @€HT ný×oÅžŸïi…¯¼´;zà»=]Ób~ï•ßDϹ @€4U`¨©ë› @€ @€ @€ @€˜Yàчî—üâÌžrÄyç_cK–Æè¢Å1þü8tð­xëÍ7bÿ/~ãöŸrdû>÷ì±déoÅï½ëšöÚK   @€²pÌnd &@€ @€ @€ @€ @€½øñ^ˆgž|dV‹Í_°0®|÷µqÙïŒEç,>ã9{_{%v|ÿÉØýÂŽ¨ÿâÝL[ýWÇ–,‹ W^2Ó¡ö蛀÷Jßè-L€ @€@æ™×¯| @€ @€ @€ @€ @  ‡Œÿüî·guå•ÿn|tó'ãêµïmn¬/¶âÄ_wܰñƸñ#çœóޝßjµâ?îýç˜:ztÆc@ Þ+ýP·& @€¥8–2I} @€ @€ @€ @€ @€³(ð?üWœ˜ñŠu¨ñƒ7~4,ñØSX~Þñá?û‹XyñªS¿ý¶ÿž8r8ž}ú±·Ýç›ú-à½Òï XŸ @€œsžžÚ  @€ @€ @€ @€ @€@^ßÿóØùÜÓ3^ùò5WƵׯŸñ¸3022?þð>ËW\p¦C~ùýgžx$&'Žüòû¼WR˜‚ @€ÈY@À1çé© @€ @€ @€ @€tAàчîV«ÕöÊu(ñ÷ÿàƒm™ÍΡ¡¡ØôÇ7ÅâwŒµ=üèÑÉxê‰ímI}羟þ$|+õ2Õ×€÷JX%@€ @€ÀÛ8¾ Šo @€ @€ @€ @€ @€¦ ¼ùÆxiϮۿî½bpppÆãfsÀ‚ ãüÓýÁÿ²w'ðQ×wâÿ?Ÿïd \rŸ""—'¢ÕZG«ˆ’8$ mµÛkÛÝýïo·{Ô]·ç¶Ýv·ín»Ývu  Ä) àAëUÚªhÄE9”Bf¾Ÿÿû“ù~¿s%3“×w³™ï÷óþ~Žç÷ûùb¿ïùlx^5;ê—©ooÝ¢–ÜóSõØ#õj×Îí™ÚMúåS€kÅ'a € € €€‹@žKE € b+«~~·[•_ÿ?uÍÙÃÝ"”ºã&÷rJ@@@@@@@@d6½ú¢çî#GSÃGŽöŒ‹'`ðájÜø‰jÛ[¯ÇÜ-‰¨×6½¤.™ññ˜1™^àÌŒ¹õÍ­¯g QÓ.¼T{Þ4 3½ëô¯ƒ×JV@@@HpL]@HT@kýÏnû®yY)çå¶Üq“q+¦ @@@@@@@HX ª-›<^Ú/½üê„ÛpÛqúåW¹&8:û:ÉÙœàØvüû?Ø«þðÄ#êÙ§žT“¦\ ¦^p©ê×@ÛÞg¨×J†º… € €Y'`e]é0 € € € € € € € € €¤EàÝwÞRMÇ\ë6bTÊgo<Õ 3‹ã˜qN­vúwßû{Ô¡ƒ:-ËÖ'š«—_ü“ºÑÏÔ#+—©íÛÞTÎL,™+Àµ’¹Ç†ž!€ € €@v à˜]Ç‹Þ"€ € € € € € € € € 6Ûßò¬{ì¸s=c’ ˜0qŠçîo½¹Ù3&[¶oÛ*IŽu’ìøsIz|N5Kò#Kæ p­dÞ1¡G € € $8fçq£× € € € € € € € € €¤\`‡Ìà赌3Î+$©ò£ÆzîÿÖ¯yÆd{ÀáCÔ3x\-þÕOÔžxDíû`o¶)§úϵ’S‡“Á € € €@7 äucÛ4 € € € € € € € € €"päð!uèà~×ÞäôRƒ‡ wI¶°OßbUܯ¿:|è`̪Þß»K>|P÷“+‘H‹Úôê‹­¯á#Ǩi^ªÎ>ç„ € =Îk3ÇÕ‘­¯›Öª¡Öµ ÏõêÆ^3ºäгG‹ž"€ € € € €@Wìßçà8pЮèŠ:ë,ïvöJ‚c¶-e\×ß4W]ÙÔ( œ/©M¯¬SGJx¶m«7^{µõ5xÈðÖDÇ ’ðÈãñЄQ}ìȵâ‰@@@À§ÿ‚õ E €d¿Àaû˜úæÁ%jCËÖì #@@ÈIçG8~xø×ꉦÕ]ý¨b«wNŽ“A!€ € € € €@f Ø÷gÇúöë瓊€>}½ÛÙÿÁžT4Õ-u©‹/½B]4ýcjÇ;[ÕÆ—׫wÞ~#©¾¼¿w—úÝcªµ|\Mšv‘šzþtÕ·ØÛ1©F{èÎ\+=ôÀ3l@@HVô/@IDAT‹ Žia¥R@È4‘½êÞ£vG÷gZ×è € €g8?Êñåý?UßéÿY5:ÏûWÊϨ€  € € € € €@œÇ7©ÆÆ£®{Y–¥z÷î듪Âü‚•Ÿ_ NœhŽYe$QÇŽª¢Þ}bÆdzÖZ7¡õåÌä¸ùÕÕæTS㱄»îË /¬U/­{V=ûÜÖYG9;áúر½×J{Ö@@@d¬d+`@@ Óœ™InÌô£Dÿ@@: 8?ÎáÜÇ:÷³, € € € € €@ºôþÁ`gVE'!¯«–>}‹=›:rägL¶8³-^ve‰ZðÙ¯¨f–©á#Ç$ÕucŒÚöÖëꡆûÔ²ÚÿV¯¾ô‚j9q"©:ÙY)®Î@@@ µ$8¦Ö“Ú@@ ÜÿXÿ̓K˜¹1ÃŽ ÝA@ð'à$9:÷³Î}-  € € € € Ng&D¯¥¸_¯”–; •^Ë‘C½B²®<¨ §¨9·V«y þ¬uFg6Ëd–ƒö©§ÖüVÕþï[ÿ:ë,‰ p­$æÆ^ € € €@,¼XlG@rAà·ÇŸWZ¶æÂP € €@pîgûÚ› /ë¡ @@@@ºB ©ñ˜g3}}$zVG€Ÿ=GÙ:pÐ`uUÉêò_«ÞزQm|yÚ÷þž„âÌàèÌäè¼F9»5yrìÙçvéÌœ w>CväZÉA7@@@ gHpÌ™CÉ@@@ £@ÔØjÉÑ':nf@@¬pîk?ÙëRÐVÖõ#€ € € € €@vøIÚêSì=£b*GÛ§O±guÍÍÇ=cr! ÌWS¦]ÜúÚ³{gk¢ãÖ×7©h4šððÞÝþ¶r^Nâê” ¦«ÉS/T½ ‹®¯§ìȵÒSŽ4ãD@@® Á±«¤i@º\`ý‰7Ôû@—·Kƒ € €¤ZÀ¹¯uîogœ—ꪩ@@@@hhô1ƒ£“dוK0?èÙ\OIpl 1tØHå¼®¼ú“jËÆ—ÔÆW֫Ç>âÈ‘Cê¹§ŸT/<û{5Ἡ­³:2¼m“¼o#ÀµÒƒ· € € €@ HpL"U € €@f ¬mÞ”™£W € €$ àÜß’à˜» € € € € €€/?³Òååuí#‡€w{ÍÇ›|/ƒzõ*TNÿ˜ºà’Ë[gbÜ$‰ŽÛÞz]c®3ä–M/·¾†)‰Ž3Ôø “T H¨¾\݉k%W,ãB@@îðþ×wõŒv@@$6·lO²vG@@ s^ãþ6s=A@@@@ "‘ÏQååyϨèYI •=qÇŽ„Zk5zìøÖ×Ñ#‡ÕæW_”×ÕØx´c¨ïõ=»v*çõLQo5eÚÅjÊù—¨Þ}úúÞ?—¹Vrùè26@@è»C6@@ KvE÷uI;4‚ € €@W¼ÇýmW0Ó € € € €=VÀ™½Ïkñ“pèUG<å~fpŒF"ñT™ó±}ú«W\£¦_þ µmëµQfuܹc[Âãvf+\÷§§ÔúçŸVçM¹P•Ü0+áºreG®•\9’Œ@@2E€ÇL9ô@R.ÐhšS^'"€ € Ð]Üßv—<í"€ € € € Ð3¢QïDÁ<3*¦RËO{¶m§²Éœ©Ë²,5þÜÉ­¯ƒö©/¯W[6½¤NœHìY cŒzwû[9ã“Ì@¸V’Ñc_@@@àLÏ4a  €䈀Q&GFÂ0@@PrwËý-ç € € € €¤OÀϬtyyÁôu “šýÌàH‚c'p6õ0H}üšOªË?^¢ÞܲIfu\§Þß³«C«~¸VüJ‡ € €ø ÁÑŸQ € € € € € € € € €䬀Ÿ¤­@Îൣ9{LR=0'AuÒÔ [_{÷¼×:«ã›[6*?3¦º/Ù\×J6=úŽ € €™(@‚c&ú„ € € € € € € € € €@ ØQïDÁ¼@×>rhžƶ=c8S`ÈÐjÈ'G¨+¯¾Aýé™5’ì¸îÌ ¶t*ÀµÒ) ðøß߸‡¼½K•\Yõsíõ̲/ÞíVN € €@v tímÈN#z € € € € € € € € €@N |$/Ú]œLhlãi®-Ë3†€3œc¹}Û›êÕ—Ö©w·¿uf[b p­Ä¤¡W{ëš»èì[¢µ*q­D©»=Ê)F@ÈB³ð Ñe@@@@@@@@@R)ÌzVFž#;zägL*Ž9ìY]Q˜Áqßû{Z“_íU‰´xšxô0¨5©ñ¼É('É‘%>®•ø¼ˆF@@¼Hpô¢@@@@@@@@@8h°çøH8ô¬$Ž€£‡½* ‹¼3ãh2cB£Ñ¨zëÍ×ÔÆ—^P»w½›’~={‚$6ÎP£ÇŽOI}=µ®•žzä7 € €¤K€ÇtÉR/ € € € € € € € € €Y"Яÿ@¥µVƘ˜=ÎÄû÷‹Ùßl,8"I›^]¯6¿ºAojLzùùjÒÔ ÕÔ .UýúHº>*PâȵÂy€ € €¤R€ÇTjR € € € € € € € € €Y(Z·Ø³÷ÇŽQ¶m+˲bƤªÀ™Á°±ñ¨kuNò^Ÿ¾Å®1ÙPè$•¾»ýmõªÌÖ¸}Û›®I¦~Ç3`àY2[ã¥jâäóU0˜ïw7â|p­ø@"@@ˆC€Ç8°E@@@@@@@@@ W¬ÜD¼ÆcG»$©ðØÑÞÌNŸŸeÊ´‹USã1?¡IÇô*,ò]Góñ&õÚ¦—ÕÆ—שÇøÞÏ-pìÙçªó/š¡F9Û-Œ²$rõZI’…Ý@@@„HpLˆ@@@@@@@@@È-? ƒ‡îï’G§¯ÅI2ó³8 ™´ìÝó^kRã›[6©h4’tל™,'M»HM»àRUܯÒõQ·@®^+Þ#'@@ +Œmo4ý]·¶Œ­_p+§ l Á1›Ž}E@@@@@@@@@ M|$ îÚ¹]=.M=ø¨Ú÷ÞÝþÑJŒw~úc×nÛüâóϨçžù]JÚw’ì¦IòæÄIç«`0˜’:©ÄŸ€ŸskÅŸ%Q € €g ¬ß·A¶:/z„ Ž=â03H@@@@@@@@@ÜJœ×òÞ»ïx…¤¤ÜO;ÃGŒNI[]YIó‰ãI5§µVcÏ>W9³RvE¢iRÍá¹Vrøà24@@èr»œœ@@@@@@@@@È<þ’àØ«°HojŒÙ¹=»wªH$¢òòÒ÷øaˉjïž÷böÁ)èÕ«P5d˜kL.ôR“¦^¤¦]x©ê[Ü/—†–•cáZÉÊÃF§@@@ CÒ÷_2tÀt @@@@@@@@@ΰ,KŸ0Imzeý™…'·D£Qå$9Ž56fL²»wíPÆ×jœÙ Ù s}8h°š&³5Nœ4M’Jƒ¹>ܬ×JÖ*:Š € €Y @‚c$ºˆ € € € € € € € € €@Wœsîd×G§;¶mMk‚ãv©ßk5f¼WHÖ–;‰›ãÆOTçKbãˆ4&’f-P†tœk%CÝ@@@¬ Á1ë!@@@@@@@@@@ 5ÃGŽQ…E½US㱘:3<^<ãJUPÐ+fL¢Ç›Õæ\wwfÏwÎDטl,,èU¨&O½HM½`ºê[Ü/‡Ð£ú̵ң7ƒE@@4 Xi¬›ª@@@@@@@@@@ ‹œäÁñ&¹öøÄ‰fõê†ç]c-|éÅçT¤¥Åu÷³Ï9O¹ÆdSá ³†¨k®Ÿ¥ªïøªúØUבܘ%k%KÝD@@Œ Á1ãD@@@@@@@@@ ëÎ9w²gc/½ø'å$:¦ri>Þ$‰“/xV9åüKrŒêݧ¯:vôHÌ®žh>®ÖþáquÍ ³bÆÄ[ðԚߪ––®»õë?P5Ö5&“ {©‹/½RM½`ºêÓ·8“»Jß|p­ø@"È1’’’^ý‡Œ-ÕÚ\.<#•V#µQ#Ò#ä÷ ,£ÌÙ¾[yiý޲íß¾gŽ?¾6nÊDŠ’P¨O«ð¥Ç)cÆ­Ç*mœ›-YWÃd,ÇeÛ­Ì£Ôòw}Ô¶ž5VóÚ•Ë–íȤ1Í;·¿Õ«øKÒïK¿'JÇÉñqŽÅkÒOçµYG"-_~ßiê·.­¨žÌS7eM”¶‡ˆ×Peô¥Ì`9G"Òî^9wöÈû=ž#öZuÂõDëL„&Ni_ÇÚ–Í/«çŸýƒçC†TÉ´ãÙ$ Àµ’» €Ý  •ÝOikBì¦3 ÝËœŠêËyºV’Ú'žQèsƒ$Î8‰RsZÏ­P…§æUÿõòÿÖçî) ›9s~qQqÞ|™uïs’Ôsqkŧ’xhEL&ËnÿaôûŽ$;þŸÝ|ø“NtÔ–Ì ¨b'™UÍI6:½”–Vi'‰HÆãk0’?é$©æU”WÕ<Uö]+—-yæt…q¼)«¬.—Ĩï&sn8ÍÉþNǧK]Óåí]’€yo¤©ùŸV­Zö^ÝIYhyùüQ*øšôìsÒ³‚”U,É@%Tϳt`ÞÀac¿Q^UýwÞØT»nݺ–T¶Ó±®™¡Ð`IJû¦´ÿéD¾¯3¥c%í׃RÏ'$AõyyæîŠÊšÑÿ´|ù"ï_li_OÌ5cEûjˆy-8;jcÚ]1+kSÐÓ? ÏŒ¼Â‚o˹]#,æ·¶ññóV®Ó<‰»Iv¿IÎã»Êç-¼³þEíþA+‰¼ùrq»?#³‚²´pPY@@@@@@@@@@ …GÕKëŸKaÉU5íÂKU@fM‰g¹ªäF^úK%3ɸîöøêõÁû»ÕeW”(ËòÿŒh4UÏ=ý¤zùÅ?¹ÖªðŠO\ê-sH€kÅû`r­x¤ZÀ™™oà1ÿ(IL_?™Ì’¢&ôTeéßHBÔê–¦wvU›$óÝ.É¿ñôiM3KÑhœj$QÈ™åîKV¯~sË*k¾ÔPW»2…ÕǬÊIØÌ+ H¢¨ž3È¥@ ¯³l–¸çT-¸2ÏX?_éR}¢ENæ’€u›ÌúÃý¦éûkÂᣉVÏ~sªªFLþ߉ç©Nl쬒d8^Ä~5öÜ©w?õ¶†píÚÎâ’Ý&3-~Qêø®œûq'úi[>œ|ÉrTs%9õ~[7}ŲeÛüì› 1¹ôYèœ ò/Ûûð3+5úrðÏ5Ú¬‘YXÑx(òµÕ«—NMÍ=¯{Þ1gÄ € € € € € € € € €i8vôˆZûÇÇÓÜŠÿê§L»8îǃ«ó/¾L½ì#Qsà kÕî÷v¨]u½6|”gÇvíÜ®Ö>õ¤Ú»{§g¬pôcøˆÑ¾b Ê.®÷ãŵâîC) Ù¡ê³óú~Iôº<õ·Ö©õ̼Âüß•—/(©¯_²+]í´&' ûIÂùKIðJë"Õ\¯2›ã/êëj¿$ÙilP–I’aBÉN¿ä7<šO‹ÖÅÓGI:ü†$FÞ•vK'iTfsh o-/¿}f}ý}ïÄÓÏxc+æUßlŒNeâ—ß>ˆçXPO–‡jªëõ¿ö»Ÿ8- ‡?’cõ—>bSâ$§Î·Tþ'ç„æß°2¼ô•TTÚuäÒg¡síʹpW:>O&¹~¡°_Þ§$Q¸då²e;ºó¸ekÛ$8fë‘£ß € € € € € € € € €¤YàÒË?¡¶¾¾I9Ih^Ëî÷ÞU+X¤5DMšz¡8hˆê[Ü_õî­UGRû>Ø«6oÜ ì{ß«ºÓåC†T—üºÓë¼A ¸V2ñ¨Ð'@ ~²²šAÚRIrã9ñïß’3Ñäž¼9ºö‘pxw|{{G—•UÑúIè¹Æ;:u2®ÏKbYŸz»q¡ ‡£©«ù£š$‰òÏ%pæG[â'.«~ø¾~öœ>}zpì„)¿’±Õø‰OUŒ´7YƒÏ–W.˜U_·d}ªêm[OyUuµ1úIÎëÎü¢^ÊR”U.¼£¡nѽmû—ÐûP(Pn-‘1U%´;IÂæ<+oMiEõ«–/~!‰ªºu×ú,´dvÅÿÌ/¤T>SÆç™ü'o Õ”(•–¾t¡[ëïΠnxg˗ܲýâÎÊNo3æúÿô:o@À·ÀÜy o³´¹Êm‡¶úÎCáZ?ËæVQ–•–Þ64P˜÷q­¬+åËkŒtq Òf Qº2j¯üÈ»2åñN¹Ù~רj§n±×44,–í, € € € € € € € €t@~~º¥ìvµjùÕÔxÌW'œ$Ƨ/y)X ‹z«OÎ,‹{öÉ4MÄ%ÀµÁ €@F L …ò-K7È,_iOn< ÏOê(\*ë×ŸÚ–Š¿å•ógÈ3Éõ’lå=µv*ìP‡3“]E 0¸\µ&˜É#Ò©[**n?Wjû^²5;ºÈO7„BýŠ…Že÷üâ†Vô±~/³,V.`ñ#~úì7¦¬já_ÉT–?”DJ9»wiíƒQ?.-­úíªUËÞK¦7åÂuyrãé>k50˜g=^VY}ECÝâͧ·gÉ›\ù,tÆ1I]…=ÔeôZOÈ·Ì“¶ÊûJ—µ™# ‘àØî@ê›äc¹¢Ý¦Ž+Z9Ô$8vtaË\+¿fò9·Ð ý)Ï™Gç— TºSõY¹éšxæØåV×Ù¨•ÜhŸ|ï¬[’#•5¿±•©=~øƒU«W¯nvŠX@@@@@@@@èJÏR³Ëç·&9oj첦‹z÷im·oq¿.k“†HF€k%=öEº_à<«×/åqÞOøè‰mŒÙ$Ï¿'È´Ô2µ‰–ÉOŒ+ïGÉ3Ãqåi8‰se•5·4ÔÕ>ä£mϹUUã´ üNžMîíÜ&À(óŽLÚ²JÆõºŒh·d%î–gšûkc¦(mM–‰]¦H_/•]äIg?‹ž'“P=U_WûS?Ñ~cL^Þˆq¡ßøÎâdl{ö¿ÿîo;+k»ÍI*¶Š“m·{½—úßÓÊüƵNÙjo4`ï‘:Ê ·”=BΗ „±TòWú{ÕÕZ®u©seY¨æê†píZ_ûx•‡Ì’>ýHa="?*–sd—œçÏIRä&éÏVyFþ°QöQK[[›bS?K™¡F«é²× 9_Æ~´·÷;éJß¼¢üKd isC §KÿþIŒý-Æl—±ü^ƵÙhûMë䘜%ImŒœìãäÜ+Nã¥ÊÙtÊ<­úIbª“ˆ[ê›a9òY¨Ï ­Ú›âá•kv‡|ŽË~f«œ;óA>à‡É̾#åóñ9GoñúluòFä:¸ÏÉañ/×§ÿj‰Dz¶ÀÌPhpQ ðÛò%¶@¾˜ ùn:ù›[,¥o)ê7ø€üÃ¥ö€Ýôõ5áðÑž­Ëè@@@@@@@@ºZ`à Áª´bA—%9öé[,É T¿þºz¨´‡@R\+Iñ±3 Ð}F;“™ŒŽÕIh”\FµBÛöÒ£¦ùw¿ ‡÷w;sæüâ^}­O[ZI¢Îë,¦³m’ó} ­Váp´³òx¶Yªàçï7¹Ñ–DžÿF#?[¾oCŒvN'^–U.¸PkëÇ’¸vMŒØv›e\ß“YìOÕ,vRß-âzs»FX‘ùtÍš5¯]'Z…ß—„&ßÉr–¬ˆDìo¯Z¾Ø™XËu™>}zpô„©7”þ[9·J\ƒ¥°õÙò€¹Ö¬Û/~øáûxÅ»•;‰›2Eú¿»Åœ*k=÷•Y,y¼ÿSÿÀÒgd»ä}ù[**N5yæór¾ÔÈ}ýj‰ÄÞZZQsÁªåµ/ûk¥]”Ì"ÑòÌ•’dÍUÊD¿U_·ôùv5´_yºíjyùícU0(³CšOËÉo[ÖÙ{9wfKRê©JJí¬”oˑϲʅŸ–Ï ÿÉÆ¬4&ú¯s|öìÙEÁÞfË9v·Ô?)–¿œÇƒb•9Ûµ‘³ƒ¥€çEÛ.š@OòPÍ­ÚÒ?“ÀÁr#–¢E°´ú‹Vá-s*ªç¯\¾ø¹UL5 € € € € € € € €øp·n½íõôïUooÝâkŸD‚Ξ0I]}íMª°Èïsù‰´Â>¤O€k%}¶ÔŒ¤K@RMb'7*Óµ£__^ºÉ«ýÕ«—–˜ŸÈë§åUÕ •±þ[ê.ðÚO’×&—Y…3”:LèµOgå’€8ßwR1ÏÈì{_©¯[²¾³º:ÛÖP·ä%Ù^âY@R" dõ{t@‡½n.mOþñrN O?UQYýRŸá‰B² € € € € € € € €@BÎÌŠ7Þr«šY:Oõ-ö5 ‹ïvz©nš«nœUAr£o53U€k%S ýBü HÒK£¤B}®~Ym¹ŸäÆ5›úe‹ÿ/ª£×IÍÞe®J*ΜN |n,+«d)³òIš×’}{¶_“hâZ}¸ö×2®d.¿C^Ý“„©•Ï«‘ijô,rŒ"Ê6ß‹?<¾þÚ”Ù"ëãõP¸vgCxñïXüËåË}ñÝ4J¦«üé÷*¯ÙçTUÖ–ò—gLý>»ñÊx’;J4Ô-ª5-Ñ e,›;–u\—$Dz²yÕ_î¸ÝïziiÕË(')Ëu‘¾¼ÞÒtâªDÏ‘¶•?ï®_¶ÈINýï¶Ûc¾×­ Ž1‹cÈ$EwÄ*;½Ý˜¿oÇöôNíß<®Ým>Tæµ/9sMŽ×5·{'“ž¹kÆlɶϾVῈûo@³ñx´i†ß䯶õm ‡O,¯[üm;j®ŸƒmËxŸ˜É1‰¹± ÐNÀ™"¼Ü*ª“ÄÏ´+HÃJëôâÚúVEeÍC%%%ÌÄ›cªD@@@@@@@p{ö¹ª²úóêÊ«?©† åìQ:謡ªä†YjÁg¿¢&œç:áŠGM#y\+™wLè àSàx4b_W_Wû+Ÿñ†­\¶äeGË%ÊtÐf£$ä8 `ÞSŽµÙ§í[o~äg’éÊ._V[³fÍšHÛýã}/IA¯ÛÊ,ð56­þ&Þú}ÆÛ2æ…˨ý»+V¸&=jX¶èß$aÕ3‘4` ~"õôêƒÕÿY^W{ëšpø¨W¬Wy}ý’]‘¦I5ά‡®‹eéïÌœ9¿Ø5(Fa°0xç©™,c„8Óž°íHåªUËÞ‹“@Aã¡È×äBð¬Ó™ÑT’ÇÇÓÄìÙ³‹d?×™-åx=XÿÀâïÇSo¬Xç|Ó‘È-’ä»?VÌ©í&/ïºSï³ðoV}–UVO“Q ÀæÅÇZJœäÛdŽIC¸vm$"IŽÊ|L=쫉1œ €@’N’áÀacä‹pV’UÅ·»Ö3ócÙÉÇp|U € € € € € € € [À™aE@Ì ª .¾¬õÕxì¨z{ëõÎÛo¨Ã‡*gýĉæN™z÷ŽÕš9bäuÖaƱ1û¸V><†\+Ù.3èy¶‰Þ¹rù’çR1òú–>]^Y³Dêªv­O«a¥ÕÓW-_ü‚k\'…e¡š«$¹«¦“¢v›$¹ì1IÚüJ»I¬H’ãCåU5ÿ,U|íi÷&I<š,³+zÎPèVOÇ2Û¶¿ 34Þ×q{2ë­³íiUêU‡$4=¹Ïvç9n^j–U«îßSVV}.Ð/˳éCcת‹‹ŠwJùbÇt^b”5Gêv]$ð{+Â÷mp J põꥇ+æ-øª²¿öÚÝXÖd‰yË+îTy^Q'!²×©õÎÿšUoOlëòå÷½!³iþ‹¥-ç™þ˜‹$/_/…¿ˆÁÙöY(Ö?iPÊÍT’RM‹=ëÁïOIR¢Ìû✪sòTàÒ¬åÖ4e±HpŒmC  àK`ÀÐ1߉3¹Q~¬D=-w²¯+mÞ5¶Ù¯µ5RîmÇʯa\(uMòÕ°É—ï—ä<å?ó»q € € € € € € € €¤Z ¨w5õ‚é­¯SuG"ÕØxT575©`~¾*èU¨ z)Ëâ™ÏSFüíy\+=ï˜3bÈF³¨¡nÉÒTö<Òtâïò óȳ¿®¹e<ë|i7îGm©^ý•¤µ#-¶q’âRºÈŒˆß’ç™±MŒU±3nyvÚIL]‚£Qk$¹ñ—±ÚLt»Ì¶÷—r\oXe,ï5‰Üšì,˜õ±¡añ^Iü‚$6tV~j›Ñê«*ú± ‡£§¶yýª#gà%®qÆm>ùwט$ —?°d¹œ/¯»/­Õt\¿‚b5ÆkþS™îówIt½Ó]uÄ®7AýÎ9Þi€l4Ú\+œr9u²iÉ®ÏÂòÊù“ÃàX».r<¾â̘êg¡3[ożš(K-Î] ?)àú¡‹ €€»@żêÙr§ñÿÜ£>,•_éØklõæDdìòºEW××-ºSnèï–ëŸÈû¯I’bUý²E“¹J¦j_,{µø©Wb~<·j¾ó«, € € € € € € € €dŒ@^^ž*.ú¤ ‹Hn̘£CG2I€k%“Ž}A”ŠD£?LµÃªUËÞ“:_óª×Röh¯˜NÊeÒ2=»“íí6É3Ï_{0\»½ÝÆÔ¬H®©õªÊ2ú“^1ñ”Ûvô;ñÄû‰½1(yhŸöŠ5¶}÷ÃßwÀ+.ÑrI\!©pO¸í/}ì\]×Ôòù–÷Ì”r.ݓα9c’sñu·±µ–ÙjgL›[I‚c7,õõKß•=ß±iÉœb™ ɘ?ȱ|pÖ¬ÛûwŒÉôõ¬û,Ô–÷Ì«r,$‡#¥³¾ž:ŽGÞÿgÉaÝxj¿ñ 0ƒc|^D#€§JKo*ö‹œœÞãüÔÂæÙ|¶¡¡voŒÓ›ièeåé9Õÿ%¿Â²Ljwº°“7ò+y– ÜW ³&>ÚI›@@@@@@@@@@@ÀK@f\^úŠWX‚åÎ3“Ýö5JŸåVÞYYiEõty˜yDgem3öíÙ~ïGë©}gZ¢KTPÓu;¥®º"*\7%Ûº$½Ô^òX²õtÜ¿w ð bYÔq{Ûuy.üµsüž¶ÛÒñ^&Öù‰(×Ip,Ë|EÚ~ÀoûÒ÷[¼|¨hßú“dÄ­^³µmõŽ«~m5{Æu™ÄlõŒ‹3@ÎGgÆË+Åv«36IrÛz`ÏŽ·e†ÏãqV•9áYøY¨5ÇkOc›ï§ yõêÕÍe• ¿oiµ(]mär½$8æòÑel V¼Âà¿ÈïG ðjÄ(û[õËßå×±|åòÅÏÍ;÷b« x©Üìßܱ¼íº$Y¨‹þR¶}«ívÞ#€ € € € € € € € € € €€?Û˜ÿó”$/># Pwºïiú¹—ŸYšTsÎÜÚ~‹L%·,ÉVõõ÷½S^Y³FZ½¶]ËÆ•qËl}æ5™åq˰ˆLé­TÒ ŽÚ¨”'7:ý–ãSÖ®ÿ¬Èì?Wáp´“¢”nª/z¨¼ªf—<'>GÚ8Gݸ²nɳ±ÚKÙvcÞòšcÈ2Æ9Wü/¶ý– ¸§MZÚÜURR–k!â¿bïȆºÅË$ÊyåÌ’mŸ…¥¡äžâvZ““õO¹Å$[¶Ûn ·Š~,“\e݌Ɏ=ÙýIpLVý@ G Ì ÍŸâý ¹7æÞúºø“O¡®X±â ÜDU :æ ¹‰»òÔöÎþÊ—àÿ“©«ÿ+ÝS‚wÖ6Û@@@@@@@@@@Èzmþ”®1HrÍNϺµŽ/©K*”¸RÏzUô^ï˜$#lû‡Æ²^•ç§·Ë~Í:¡¶Ô×/uÆ,COíbk³6µ5*õ©OU÷–„¿‹ä™l×%bô ×€ÔÚÒ•'¤º1«Ô:Ð?¹”;qÞ‹$fJç·I ó:cÇ@¶Ë$„i^´~Ë«|Ð+¦m¹hyËRm7ñ^&š ƒ–åù9¨ùåGLÏ;g†ÚŠª…µRûWÓÓBîÖJ‚cî[F†iXyÿ*ÿ¸6aÌ3ï¼¹ñó®1> _K™=û¶9Á¢à³rSåü:Fç‹Výòûäý­þ}çlE@@@@@@@@@@Nd¶A™‰mK§e)Øh©Èa¥ÜS8dF¾^ñ45·ªjœ¤8^àºQ»ëë–>ï“‚Âú𒇥ç•öE·¤>Á±w?u¹<«í~€”Z÷`¸v{Úx²£Ì’À;ÁQâÆú¸ü9#Y1‘>>úèâc‰ìï>FGeOwjIîtŸŽ±C£+—-ÛYQU#ט.îPÔnUŽqåØs§Ž=~ê_¯צ-¡¹]£Ù¶’…Ÿ…’E=Ç#7YEìèoºâPØQ»Þ X$8Ɖ×gÝ„#€9)PQqûxù!‘[¼gÛêk©úe‡¼ÿù1Œy^mÊ ìWKKoêG9 € € € € € € € € € €|$ 2ëe-m³×Em%ÉWž‹û,v·Tðú›:Y5/v²1k7É,‹Ûêë—ìJõt@;‰‚®‹ÌNù‚k@Š e¬>ÚóîwŠ»•|u&¸®©Ï¶ò5S©•J°úº%ë•1+Ýê”^åõÊÿ¬[ e € € € € € € € € € € Ð^@žÃ}³ý–Ô®¢©OpT¶åÕK£Í¯˜¬*×éI2”Ù3¯òr0Z§õéØþnûøV¯D)£ô²_ÆçMŸ>=Xª¾®¢ªúG–ÖÿÙq¬©X‹ŸJ=¾“”%%àæ€¥÷± ÷–W.|ª¢²úç†N—:ÜsRÑÙ ®#Û> ûZã”Ö® ±’mè$zû>7’9<«V­:"ýIÛlÀÉô-“÷uŸÓ5“{Nß@npn¬äFæ3^MËÏ?Üí“H¹Qö7´ ÌqÛW[&$åßu‹¡ @@@@@@@@@@Ú ˜Cm×RýÞ¶ ¨~®ÕJ]|‰UZ q­P e±œšÁQfßÛã5æDÊV—xá[v´K׆ÃMåU Ù*GÄ“œ1}KKo¼jÕýiq‰Õ®Çv«4´`|P« %÷òcr^_!ýt{9ûy9{Ô³xEÝ¢­•5Jr™ëóögT ÉqÒ'™Ðúx  ¾U^Uó¬?f”y4ÒØòèªUËÞ;cŸœÞ]Ÿ…–ÑüN*ù\ו‡L+'[OêÊ6³½-³ýÒèR1¦Þ(7+CÝ•™§R={ã©öœYå&õaéìSÛÎü«/ž[¹ðçíÌ2¶ € € € € € € € € € € €@G™ýípÇm©\onîmµ¦w¥²Víú\ó‡-™×SÙbw×%ÆHC$ÿNô¬×²î‘g¹ÿÛ3.•Æ ’„=×…yNß»<ÁqöìÛÎ ô Ž—þÐöx¹†Î“¤®©’Ü5Y¶žê´{ïOE¥æoKÓ‰/ ó/•öG&Z£œ gɾ·ÉßÛ‚Eª¢rá+2à£Ú˜ß¼fšþ°1>‘hÝÙ°_¶}ÚJóœÂT+'Q¸Ëc«·µg§º¬;YÑ ŽYq˜è$dŠ€Ü\ÝèÕ™žüa¯˜dÊevÈe2¶K‚£Üjs«´ñ½dÚaßôÈ4ñÿâVóµªkήJÜb(C@@@@@@@@@H­€<œÖÇÔööÃÚäÙfÏ#vz7Ó1.·:²ö»•'R6wîÜ~²Ÿt$= +“õZÇâ‘ÜèÄØÚ hMÃÿ+ …ú P…É yµe&hcMPÚL0FO®õý¨I«Í$z]®tºÎl‹¥5³ƒAóGyª¾÷é‚dÞhu¾Œè|yHÿ¯Ï3EG&UÖÇÅqò •LXyùícµÖÝë0V-_¼Þ=&¹Òúú¥ï–WÖl•¾œ»&}éܪªq+–-Û;†r_ H¨cæxî”"€ €ôçþ–@@@@@@@ =ò<|4=5§³Vç¹~÷eûöY—¸é:¢hêgp (k k›™^ÐI÷¿¬²z²$íý¹äŒTJîâ lŸ}®>¼hÕÜÊ…ç[ÆüRž»¿6-‡Pk'/özq»^盟TTÕ¬0&úïõuKŸMK{]Ti¶}Êù:Ì“&jwi¡$½’ó³[|$@‚ãG¼;)PRR’7`èèR¹˜n‘lõ›å’ê}tiɻ֕ÖËÍù@-FŸÜ$«Ö&hLEÕÂõrQ>dŒ©k¨[¼Ù©#Kù¼…_—„Ê…nuÉê“õu‹>ï“겲ʚ9ò³ÿæZ¯Ö,_¶è/\c|fúqò9Œ¬ 3Áàu]w]¦Ú~RJäGÒ¾üNZpIp”«Ò\-1ÛÒÞ@ ƒ†©7#;3¸‡t @@ÿ#äþ–@@@@@@@G`æÌ™2›\ãëÖ­kñˆÉªb£¢'RÝá€62¢×“â©n5uõ;ñg†Bƒ ­Â‘Þ|Ns*ÇhEÝ¢­2®ëdr¡;åð~W2`ÎJz‡š´Î—-ó” „Ê+«·í‘pxw‡(VÓ Çµ5çÉ­jÛ téç eÔ±,þHq£L[ÙÉibÓV?g™€dÝ—:f£¥­åòåô&(¹û@IDATùŠö¼Ð;¢ìë|»O—Š–º^-¯\ø«Ò񻁮»Í6æQI¼œàúRjáܹs½nÖâmÚ5^.&ùÒsï—ôÝI~KzɆã”ô 3°9©/ðê–Ä<ᓊr£ì5^õÈl«3¼b(G ×&Çäú € €@˜Äým:Ú @@@@@@p°ûôéí!¥2[‘g ¤úf5ƒe9Éuq/sç-¼­(Pø¦¤|Q^éMn”© e­'å”üœm”ë„WqÄc‡úºÚ_íß½}´m›OKÖz„'UìäÒhmÕ ·È¤a–TeììKÀh噼0Ñ.½Æ£Êêã«ó Áñ4EÏ~3§jÁ••5OŸLlœ˜b KÒïÈ+,x£|^Í7JB¡¤.ÔáÚ?)£6¹õQÚ+° Šou‹IeYYYõ¹ó»É­Nù"Ü»ý͸Åx•eÓqòKv–›i^ý–›žW¼bRS®_ò®G_êC¹-pEÁ”Ü £C@èQÜßö¨ÃÍ`@@@@@@@Wæ÷ß?ìàj]XRR’ÞÄ5ÏNd~€äÉü^ºôжãžÕ²|Þ¿·´Y*'I±K͉IB£ä}¼" ÿ)EÍ=d7 ª_V{½“lhû@â'¶çš5kŽ7[^¹`V}Ý’õ‰´m<±X¿ëšThÔÕsªªF¯\¶lG"mijÜzF¢ö½ñÔy*6›Ó©1äÂßC¡’<Üu,Æœx(\»Ë5&E…«W/=,¿æpHn¢úŪÒI*.-½mèªU÷ï‰Ãvr]  -µ ÏõꇇëCe| € €9.àÜ×:÷·, € € € € € € €œIx$qQ»&8ZEy$8ž‹ñ·ÅÖòÝó•ÌFØ«>\Û£Š¬Ù\VY’ÎJrcü‹$26km6Éž/‹Ç˶m¿Ò¢¬Mò ýÎøj3ùñŧ/ú‘px·Ô~órf;0xÌ pÕõÚ¨+d’¯dû*¹ujÑœÐü +ÃK;– ¥%AÙ}1ºkö¼úäÞãžWJ‚cÏ;æ­#¾!êW(¬—„­ëº…@«aÚX¿¯˜W]¹üÅÄÛ'aK²ØýJcí+ÉòØ_þíRþ½X1©Ø._ð“å¦ðRºÖÉ—Ñ+1ggûq:c@Y¼¡—Ýë\åpF·]^vÓiï|·öt0oœ”“àè†DYÎ ÜØk†z¢éEµ¡ekΕ"€ €ä¦ÀEÁs”s_Ë‚ € € € € € € €@[™Uð€$ÑŒi»­ãû|Û=²c|O\oRÇ÷ç«"סK2é'Ι9Ó50ƒ o ÕŒ”™ O%©q½VöJ£ÌCõvóK*ŽÆ³g±F[21ùëä±]+}v^ßœ={vQ^¯~Wk+ æIQ¹°³ñxms&ö Xy#qŸñŠ¥<­öyîeôxϘÈù=:…ÕõˆªHp쇹ý §†BùÅVÑcrÁÄõdœÜü¼'_Ê¿‘/¨u’µ7°÷He†[Ê!3É] ”U*³ÆõoßbŒ5­ûH+ËB5W7„k/€¸éÇ=ÒVÌG§2ɘ_ Òšà¨^à53³ü2Aܳ7æÊqŠë fp°•§‡yuÏhý¶WL*Ë%«»\ƒ® ŽVž:[Ú|.•íRÙ& ÿ(Pwõ_ ¾¼ÿ§jwt¶uŸþ"€ €ôpa­÷³Î}-  € € € € € € €mdÂ#™ÁÑ}±óŒó<ñF÷¨ž]úx8|¸¢²&*³õÅžÇQÊú6J¤¶e«–ÌRùSɲà«ÿFý1bG¾œÈDOžõ3P¬]Ãä9 Ë5  |ðÁFiæ7'_ª¼|þ(™€h¶<Ç_&½¿^¶ûî£ w¾ìW}ýÒw» ë=ª ˶·(ËýPÈÌ£ %§&yqûöÈ]Ipì‡}¢Uø}ùpôÜ(‰„+"ûÛ«–/~Á‹kúôéÁѦÞPúo%é¯Ä+ÞÉDWsÿ¬Y·_üðÃ÷yÞ\µ­oû›{îÔ½rS6¤íö¶ï¥þie• .l¨[òRÛí)|ï<_è$QÆ\į¹Ñ¿?f@Œ‚\9N1†—u›-­‡zuZ€·yŤ²\nŠvxÕ'I¾#½b(G '[½ÕwúVýÃÁ{Hrì œ1"€ €9"à$7:÷±Îý,  € € € € € € €gh³KÖÎØÜvƒ¥Z{j»÷g™ìÆ™jÄ%m6èhÞ$YÝÖfSÖ¼ufo”gËK=N—Öñ[ÝUÿÀ¢o¥kp’2ЫnÉÃpÏXóª å'“.Uÿ¼´´jD°0x§tó¯$§ÄÏ$aA |Yöýû4t­GWyÔ¶·)”KXŽD¬Å¨©N¾ÓºuëZb…¤j»3Ù™’öü\k©j3ê!Á1Žbc(-tfXü _»õŠQ‘;êë–>ï+^‚œ‹]^«åíê²Ê…5òòorQvÛ_¾œÆôÍû_‰)w‹ëXæ´5fÂÔ%2žÿ¯cYûuk¾¬§%Á±,T}µür€ë”Þ’é½ò·áp\S†åÒqj,²wM¦Ô*çªëŒ­·»¤¸Pú´Ã³O2}wŠ›¥:²V`tÞõ_¿¢¾yp‰Úв5kÇAÇ@@z†ÀEÁsZgn$¹±goF‰ € € € € € €$$`Ôsòˆóm®ûj}ky Ë+k–Êóõ3´1o¥Þ–™îÞVƼ¨·¶~»¡¡v_ ›KqUæ9I-s«T[¤Ü™Ñ/뙽ñ³2¾Ø3Tž‘1æ?ë¨M[r£ÓŒd¢ p2_zªåìÉàeÕªeïI÷¾Q^~û" þQòZF{uWòï¦{ÅP¿€“¯#Ÿ=¯Éž“cî­uþÈqS'KÒË1cRTpŽ]8Ut~Šªë1Õd\Fs‘ï†Î©ª-“ôÞë«icê÷ÙWÆ“ÜØ±Þ†ºEµ¦%z¡|ÁmîXÖq]’´ÊÊæU;Ùèq.ÆsÕ¹_uî[InÌ–£F?@@@@@@@îˆÚj­WËFu]‚£¤¥])YiçJvÚ§$™ëó–Òÿjië`ÐzÞ*ДW-Ü­B!Ï$;¯1¥£\œþèU¯ŒíS^1™Z.³7~ƳoFín:üþÿóŒK2@žqçU…L©™–¯vã-¯¯¿ïi¹^fìÛícßq>bIH@?ãµ[ ¨fzŤ¢\ÚÉÚωTŒ?Ñ:˜Á1Q¹,Ü/` ~"¿Îàg*ßÿ©¯«ý‚ Q~4!¹¥¾~É®ÒÒÛ® Ÿ›•©nµY–þÎÌ™ó¯^½ô°[\Û²†ºÅ¯–W.|^²Ýg´ÝÞî½Ö#e¦Å’†ðâ'ÛmOråŠP¨Pª¸Õµcv6¨æÇ\c:æâqê0Ĭ\õ3ƒ£Üˆ7uåà,Ë:îÕž\$8z!QÞãœÙÇo*¼L}²×¥jý‰7ÔÚæMjsËvµ+ºO5šfùòKúë¯Ç™2`@@ÄäÇtT‘.PÃÔäàuEÁuIþ¹ò#nYñßè4{!€ € € € € € €¤L`Ç[_7aj“<Çì<ÛÞùbÌyÎ$<+—-ÛÑy@j¶–•Õ ’G4ǹ×f^SápÔ=¦{J£-ö­ Çÿ^¯Õù¥¥U#NÎÞ—öŽÎ;· WñOåÙÖZYÛ”m¿Ób̶Ãï¿ûΚ5k<Ÿ%?ÕÁÒÒÒ¾rŽœ}j=Ö_cÔOV¯^Ý«<Û[ó0´¾Ú«®Dgpœ9sfA¯â³ÎÕ¶ž¤2£ŸQ“¤­þ’#3Ë«ÍDË—/¿ïŠÊšoKÎÌOÝê™MÇJ¹s’Ùnq”%  í§„ö·=å9OKù÷ÜbRQ&ÉÄŸ•G‚Xâ Á1N°l ¯¨¸]~A•zõ_¾øžÜ¿g»3“bʲ;V­ºOYYõuº@¿,רË,xº¸¨8p§´ý#¯~¶-—:ï•õØ ŽR¨-½@þ¤4Áqx P~d…UÐï¿$ɺ(V˜œ “æTT_¾rùâçbÅ$»½,Ts•|äNL¶žž¸¿GzyO$ÉÍ1›¼¼¿”‘¹où0¯ùHäVÉä¤Z¡¡añ^mGY!]£ÕWãr:Ú|è~©Ôõ×äCª¢¤¤ÄóKеsgêê36¹ÁI¾ô½äòqò¡rSXàÙµh×Îà(7m®ç½Ó_¹QKñyï©@ € € € € € € € € € €t‘€i¹Ì«1_Ͼ{UâZnYÆI¢t]l­d–µÌ\NæQ<ãÕ;I’új댈^I–Ïœ9¿XæòÇ3'iUú!“OéËåÝ·äFgïhžñ•àØ|Ô¼yfk)ÞbY7û¬1ßgœóÌütIj¼E,ÎiMnìlÇ<3»³Í©Ú¶jyí+2ÏXÌ$S§#Ë®@óÁTµI= ¬X±Âqur‹\—@žåäV¥m‘ÉÙÒZÚ:ž»&¼e@ÿèB n …Ê×§½ª2¶}÷ÃßwÀ+.Ñòå,Y!óB>á¶¿$‘«‹*Üb:–}øAd:no¿®‹ ²/$™‘rˆ|»ÜؾkÆ<ÓPWûz‡­1Wsý8Åx–m¼»xGËG‚£$.{ýD–º‰ € € € € € € € € € €"Q³ÈIœê¸½ÝºVSœ™ÅÚmKáJYYÍ I2»ÉµJcššGŸvéæBc«_zwAÈëÕï‹ÞqÉEö ü¹ä ðªEò@<“º´²%§Ä{ÉÏÏKkŽÑܪªqr¦Öx÷D"´òà('ÿu¦,Ÿ¤³¶f†BgI§{wVvj›$¥îX7Zçoª¢?óªQ2ƒ«ÊC fyÅ%R^1oA…39["û²ÇŒ~å†@ï@áä"‰9ͪ3Jù@­Á¿'Ý#–/üŸxµ!¿Üð¯˜3ÊmåÙwÉÆ_pÆ~ n°òõmR_žÛîbz¯[yDzqœ::›Ö÷ ŽFuí Ž¶å=ƒ£ü ŽÙtžÑW@@@@@@@@@@âX^²EÂW{íbYúgÓ§OO˳Å:_]fÎsOHÓêþÕ«—öêgw–ׇkë%Wt«Wd–¶šš?Å+.ÑòO}ªÚI”û+¯ý[[#vWœeô^1N¹4—ú‰K0FREòInK_û{œOm*‰ÚÑ5mV;«õÇn…†u^˜üÖBUè™@,9&¾'ÐJ¾G=¯†úº%ë2ò¹¶¬_Ü õóŠ‹§\\+øy<ûÛ^ ­ÙÕí›b­»$˻̫mÉÚÿ¹ ‡£^qɖׇ=$»Üê‘_n¸¢$êãÓ±lùµOÊŒŠÛ;no».³CÎt~¢í¶„ßk÷)ºåW›Gˆ§þžpœâñÈ´XíkÇ®Mp4:êùë r^¥å!™v|è €ÿ?{w'Uyçûÿwªzš†fivQ÷h$jD£ 4¶ ²LbþÉd’ÌLf¹3“›m&ËÜ™ûJfÉ?³Ý;QhiT"n¨qwDAdß—†î¦÷:çþž‚Öî¦êœSÕUÕµ|Î}Uºê<ÏyÎó¼OÕ)ÆÛßþ!€ € € € € € € € €䪀æþÙsí–\0fÜùåÙ/ÆsçÞ5^Ckßö:¬­ÍÉ„ð­ëø•×ZL¥¾` øH¬¹ïqÃ=¬~­ßhþA+zmÖc55ìñêe[ί>¦Ý‘À¥~úÅÓgÎüŦE¦¾è÷XÍ•ùíûhõhNÅË!P(¾OÇLJŽÊ ȵ^óÕõ›02[2BÖÏ=‡·¬¥âU‰úüÞrË]Š%k´êè`ÏsÓ!ª@R>˜QÏFCÊLr_ÃvS½NÜîX«½ú$¨ÝÖÀÕ3®cYV°,Xp¹kŸ3m]çýgîî²'_òª.{âxqú/-Ls?ÔYË_—È¡ëäΖƭZÑý/ŠèÜC¶ŠŠ \£ ´xç¸Wõ<ž € € € € € € € € € € Ö«ª—=¥ñ´Mž“´äsªÜäÙÏg‡p@(?ÏTô(Êâ¼¹fåÒ7}Û«ÝöÛM÷jXγ⡆Õ&–Š—MŸ>ÝwÏÏÂæÎ_øK P.öÑ×ÙíÿÓG?im°M1)Þtß,™“Œ*Ÿúžû²æ~ò®­Zí°´ë×WŽØÖ¿¹öÐF½f3*ç/Œi^cšö9U‹®Ô@êzõ 9²Æ«í=Тlkôó»Êk}/|i` ä©Ù³g—yõukŸ5ëΡEýò6hÈû ·~´y pô6Êè}úËåúÁËóXÄÆµÕKÌVJ6½Y¸uA'ð…X'c…Úï —Xv9в \š}5ƒ>Æpä^_ƒî”K×)—tê«ÿ jöšO^^Èë³æ5DLíNÐö>Ÿå´Ç4(@@@@@@@@@@@ ãœüÄkÒÂ) Ι·è6¯¾^í&ا¿»È«¯ØÎ=û¤I‡Wª«›Ûù©¯éXÖmåCG?3sæ>ª-zXyÇâhÁïy÷Ô8«ã< • ß÷Ó÷ñǬÕ^õê«ï‹Ï?é¼úÅÐnUÎ_ôÃ@0ø;óÞ‹á8}[I¿Xú·5µþíïù;ÿ–þfμ…•±ŒíÖ·²òî‘€Uã½>çÍÕËï_ï6m‰holýŽ~Bê¼FÓkvE °ôÍxCßsïX07¯$_ƒÛÖ…^ç¢Ý[€€£·QF÷°‚–gPP¿ØRú—´Ò¢óyÏ»û…Y¹òÁO5õþ|÷ý]_;WͬZxv×}1½²Äñ8:ÎöšK6Ä2j.]§X\Ò©¯c9 ^ó YVBÿú†×ù!_çkõ‡v@@@@@@@@@@Èlšê%h¨ç·ž«°¬Kä ž}rUUgÿæÞ±ð++ÆlÒ’x7Ghî¶Ëyh劥OtÛ™Ö/W­Xú¯š{Xík’–uU~Iþ«¦J¡¯þ:™*r•óÿƒ_ÁJ[½´µÿ0ÂPQwÙ–ßêïUV-šu Ÿ sæ,*¯œ¿ø ÍxüGvÉéïóTánk×>tD³1Ëü£!Ç¥•w,úñ¬Y³b Qv{μS¬ü¼µƬèÞÖýµÑ}¯“#°fÍÃûô3ò7~Fׂrã4€»nî¼E+o›¿à=Æõ½ªÁî¼¹w,ºAû?)à#úþéç<ôñð®þå==ÒX@«Î]­7K×ͱ¬O\;$¸ñ€Ý¼mX XO«yç(›#Ö•Údn Þe;¡ïÕƒ¦wÚÕå©9g¾eÝ­;Ö¥Áç‹Ùó\«“åÖݱd‰¶kEdÿ[®]'ÿ2éÔÓò 8B’Ò€£¦m‹½„ô‹¹Å«í € € € € € € € € € €@2¾:ÃýWk·ï— Ͻ+Ï'sŒ@.´ž¬ýn~ŸWéï¼Op[¯þZ½É’üü¼@ñâ w,ü‹•NóR]r;Æ´Ý6þ¨<§àŸ5ØX5 ÐiGœ£M¡¦?é´+cžÚ-'¾,,¢k=ÛkÒê9Î ×Wj詽ÝùÁš•K}…¹¡ªª©Uü§zü÷4÷á?Ðg;_«©yp§×¼:··‡ä¡‚ üD÷yÿÎ{@Vhàò·íM-kÂbÇñz®Õ'jE»¯kZd±¾GʽúGk׌Å©ª úy_vŒÑd7}¿$Pr›ZîØé§Î­D¯ëOòJʾ;gþâÿu ÔøS¹3RßHûfWÝ55Ìû‘¦Ffë¹¼? Ž|¨dÙH'd_Ì5Ë—ü»†¸gèå™åë`½§åI°rî¼Åô¾õ˜^Ôm¶#ûõêÕ÷ó`Ër†‰cË™©¯ã~_wÌE p¹ÿè£cý$à˜å[Ãv{Ý-v(¥Gsã×$þ~¥_¿0úÍšuçà5k:­O¤ýíÇÉ/øÿ›ã#µ‡÷$î€cÀ ,rûúÑÄ¿c[­÷E=w”†\»NQÒ|·©àèþi²$è8Lä"õ}îýKË"à˜HtÆB@@@@@@@@@˜î¹ÉãG6Xþõ·½hFµk×6Κ»h^~¾õšvõü]c Ö«A¯5•R|Äš¿hí8O‰mí ‰}@ZBõVIðÜ€mMÖ~“ôW©'kÀç ýYâ1SÍŽ²ûë몫ûêŸfV¯^}|ÖÜ…wäçÉKjä«Ò¥:ÍPû¬Ú©ÁªÇœ³!´÷YNð Õj×[öÛÉ;'pÎÑpÜxýýô™ú;áe±,]¯Ñ¯V…«uÆr”ÈÚê%»*ç/ü'­^èYÙN×a²FßÈ/.X8gÞ¢ѲUkZ¤iûÕÕºŸõ–[î,Ê̳¦éoÛߣëùB÷>Ý_›Ü…®}ö^|ʲ‚3¥xÄZ‘]ÝöÚ¼×æÞ±à[bµš©÷¦×¨\çü¿‡Jþ\ƒm/ꤶk­«Om±¶ÛÛ¶7Úêó좑Á<­ýFë|G;8_ŸŸªÖ©O<7Çip$ôÚO‡gK¡€ÓtâðÅý‡¬ýìzù9¹VãÔ÷Å×M×@—ë«/¯»ìŒ8¢$Wé^UP=åÏâ³øâêÒôþi ô\b ð[ þ‡g¿Dvpœr3;·-XœgæSÀÑüƒLÿêÁr]zø†i|=ë„ÙU‹§­®¾c¤öhû®¬ª*Ö)ÏÖ~zÿ†Õ?¼Ã£O÷朻NÝ2âµ£Ýß²úoyïÿ# ‘k5çÓŠ¥®›û 8º ш € € € € € € € € € €@ö¬Y¹ä½ÊªEwYAëA]•gÈѬ\sƒôÇ×–õ5 j°ÇüO‰>ÌÖù–=~ŸúÔšä orϪË2ºj©Ä¨¹yŽÐ_ú÷ìTõ£?¾­×àÛZÎì)4*6Ôÿñi©}¶ipjMíÁ]õÙŽŸ 5ÿb`°ÄTVŒZ¬ªË–U¬³þk]Â_ëòçÎ[¤U­í:ƒºÆ¡z¥Gk.¤o—c<^è±GĶÿ@‘:–kŽ%hi 6†€£9õÊËVοè~{±ÇT>oÖP›¾¨:uI´ì‘iÉÏ—bÉ—S/:wýü¹gm!±æ®^þÀ>úÒ%ÁëÖ­k™9sæìü’ë4´ûÅe8ç¡cv}­¼bŒ{EP‡BVÝ;Õtoãu† Ìž=Û”(öq­z#šÒÇ©D¿«°m9\;Ditlùm”¦Ïv²à³>ŸTŠnÓ/¹R×î–s¯k{„Æ\½N(Ò|—©àè¾é?Z}ýî£øoÕ/YÏóiuÐfÿ#Ò@@@@@@@@@@2] ¦zÉ*;ä\“õÂb4üöíU+–hÈ,ó7 ÌiH3t–ßÛ×›«Ñš‡ÿ]jªÜ°aC{¼óØP]Ý Nh¦ŽU×xÔ`æ$ý=ö/™Ÿ1‡ç…ÖL­©^ö¸š¾ë5­ w½WŸHíGìúº®ñáHm©Úg2¾![¯^~ÿúT“óœ)`Ѝµ7¿Uï…ÏŸÙšØ=úž»oåÃKÔ××{Vg´„BVÝõ}„ߺÂëLå÷‹+ ˜.ë †$®ùk¹åWôO>|ì¶ sΗªªÓR­ççmú¬…Ÿ¿ŠôÌ©k;y|e¤·}¹zÜLÒ±MÿAêpÔ@y¹6 ózŸÏ‘£‰<'c!€ € € € € € € € € € €@ú ˜ß«oÙWêïÖ’²Ù:rÂvì;W.¿ÿßSvΜ¨fù²·ZCÎe`z+§ërŠp1LÛùiÍòû¿.ÕÕžÁ©.GxaÖb9N¥6µEhNÖ.Ûväïjì¦ë«^²×œ¤¡6ð¾þ°ÝOèÄp4!лqÚ-u?I­ŽÓ ï•¯¯^qÿCI:ÃÆ °fÍšúš‡—|Iß(zŠáPŸ]“Žmÿ…~F¿¦ØC† ñÎêYÒâsðœéæ–3ٷРfôª‚VÜó·-q¯¤¨%„gJ|ÙÍšu§–O–Ý<õ h…Iw»õ‰Ô–Ë×)’Gºî ˆwGýÇqJçxžÏë`JçÄÉ@@@@@@@@@@@ -ÖT/ûÄn•+ôwÝÿKÃ=qWþó³=ÇK!«eêªåK{µržŸ¹ÆÓÇóÚi%Gûgú{ãžÅsâ9Ç™Ç8ïiQœ/Ô¬Xò£3Ûâß³rÅ’§µÂçµzÍvÄ?Š¿#µÐÐïô­wŪå÷ÿ¸s@sýú¥'5lö’Û(Z)rÜmó\åÖ'j›†Ak–/Y¬ÉÊ{ô<Ç¢öKpƒš>ÜÖÔzžV0ým‚‡f¸ž 8ú~øÏ&»q‚ˆ³¢gC}~´V†\£UI'Õ¬XúKÝ«ow­M”÷yÈÏÇ"àØÆ­[^f@Ðr´¢Ö*ÌÐM¿ˆãªàh–kµÚK$?ðs-yµJ£ÖhW_å~óŠóîÔ/G×ÏKȲÝC•Q®C._§($iºÛòüG¨c9&›²Ír¤Âë#®¥‹ 8¦ìŠp¢téÊ[­[å•–esÛ.Ù:*ZÕ[ÿA™®Sf^ €ôH@«ÏK‰U(Ãå2!´\Y8I../A‹¿qÔ#XF@@@@@@@2P`Õª%GuÚßœUµàWyÁÀßëï–ÌIè2g—í8¿\å4ÿ¦s€-¡çH“ÁNDúáÍUUÿZ(þë€eý‘N­(áÓsä°}úÇÚ»þÙT#Løø: ©ðyóÍwO)éü†òîÖ¼DbÃ'Ž³Þ‘Ðk–?ðj´ùkùÆ{õ7š®‰Önöà_êxß³Ž Μyçšü’‚ÿ­ù³^¹·¹¸µi€x³þjò·5Œúœ[?ÚzWà‰êê:ƒy³æ.úy^Pn×_µ»]ßcš•©Ð)²F+¡.Ñ`ã“Ý-,l×BVÝwwÝÔ}G®¿v lå:N¦¯_¿dú%ö&Å"€ç':ÚŒjj–í¯œ¿øwºþ[¢õѶ93gÎ,ñWu1°0Ú8áýŽóñ£Ë—½ìÚ'Jc._§($i¹;äXûƒ^('0:•“×éxžÏrlŽ©¼(œ+­ôÿP'›ße ÏÈA»6­æÆd@H¦€ ñŸtšekûÞðcmÓ+240@ô½^f]ªÆë¶Éœc#€ € € € € € € €ô†€VsüXÏ[Y9ïî+46vþÉÍú‹$#â‹þžæ»b9¿ÑΩ¿ê4kîܻƯ\ùàÖh}¼ö¯]ûÐíóÕ9sþ•ShÝpd‘Û¦zçÕ®¿§µSÃÚ!ûÑÚû_ȵϗO:·¯Y¹ä=Ÿyühμ…bݦú9¦?+Â?E;–×ÏÅ^G¬}Z¹u^óõìæu¯TWG (ú8pôØ,çGœk&à˜Å—\oäõ½<[‹a÷`³ìнF 8š/Þü>fë)t;Íܹ‹'ë êb·>Zö~·v·¶\¿Nn6éÔ É&ñ(x£_\žÃ„®Érô|zV—Ív]|hÊ^:û¤üôø2y§m[ö.’•!€ ƒ€ ûÿ²îy¦émùaÙ) Dýo‚1ŒJW@@@@@@@@ ç5Ë�磸°jÅÒÿÐæÑkÛé0™û/ÿ¦`v§+ê…«êi³ óóä+zÚ/é¯%Ç¢?Ëõu×ßœvä„V¿; !ŸW4Àöœmµ<÷èò‡w'zº«–ßÿcÓ<2b{¬zÉ^hxηÜr׀’¼ë­ Ü¨Fç[Ž Ößõ¬žý»/Fkx´h8ô˜öyß±œ×-[^·ÛäµU«–öJèétøõ»³fÍú~ ¸ìj ^)Kƒ°Îcy÷à¦1Ûu]‡õÍ|X׺CÃ`O·ÛöúÓãt_nÔת«´1þ@eÔ‘#7œöý'mý' už£M§éµ˜ªë¸Hçë:Í{ÿÌŠœŽÓ¤ïýƒ–åÔµj@ç-[œGW-_önä3õ|¯Žý€Žb ݸžÉ¹jùÒͺ×<²œü¾úùpßÙïÞ!÷Z 8fñ5w,û˜%A×ê—ÉV 9åÚ©—v¨G7ûœ–µçIñMÄŠº-¥¬m®G;ÏY iì¨Ch ;ÔêÈ’èÜ[rý:¹ë¤OëŽlsÎäv·’ÔúË”õó;ÚåƳB¡é£ÈLHÀîöCòýã¿•¡c©9!gA@ ƒLøÿÛÇ~-¿(ûšŒÊ’A3gª € € € € € € € €‰èTÅìu; •îIUjj;QW·ÿ(•é:éDyúøãÖjÓ#§Ÿõš6mZþ˜1µ¥4/$uûƒ-Çݪ¿}v`/¦?HÉ\8 i"ðdóòNÛ¶4™ Ó@@ ýÌ÷¦ùþdC@@@@@@@@@ “*ç-úmåüE߯¬Ztû¬¹‹.¼²ªª¸WÖp.õ:oƒÓäžsò Û³¸r_^­Ø—¤y,ë †œ†»j…ò&hû·>™Ü¦U*µÂ¢uqÔ58rEeå‚a55˺”xÕÊ–E–åTê±QÕ¦‡Ö­[×½ƒ¯®“/¦Þï¤aÙMno‡Ó3ü¢þ|.Ù³ur­Ë;óÔéÍ|ÙÈc˲†grdµ,@Ä ˜ïÏ/]"A‹¿”8UFB@@@@@@@@H­€5G3eÉ×Ç0§Ø©œ·xîÛ∳Åkõêå÷¯Oæœn­Z4ÂëV·sh.eÇ“ÕÕÇÜúäb¿Á˜õWÝyÍk‰V xƒWŸLno°›Ô@Ô¢e¶üàìîk,2ò 7–vßßùu($žLÄÆuJ„b²Ç°,Û³‚£¾›¦'{§Æ÷>Oȶ~Ÿš¹pz_à­Ö­rЮíý‰0@2LÀ|šïQ6@@@@@@@@@2U@‹£uùeòpVÈ’QZäêz}þ- Ðý}²×V`Éé9ò=γѣ='› 8fùewÄzÑk‰šF¾Ñ«O&·›d³Vb|Ôm Ž%sÎh·w±¯óGÞ_]}Bn,\§Î°éû|•ݼY9ì:CK®0Õ?]ûô°qzUU_‡kÙbý GôýùvOÅádŒÀ+-žùãŒY E@ Õ|¦Zœó!€ € € € € € € € €$TÀ²<ª":Íš5xBÏÙi°+«ªŠ% ßì´+âSÍp=±!Çwæåøú³~ù¡6ûÅ@¾GŽÕ’ ̇tÍš‡÷¥döìÙeÁ¢Ò_kk·%bÛ;ÛgGÝá=;7lØÐœŒ9Ø!ûÞ@0xG´±õ1ý–[îðøã†Û7ß|w©XÖW¢õ?µß¹Ï½Ý+×É¿U¯ö¬®ÉüÅ+uè2¢²Š‘Whû—>=j*·Š¾ ïO÷û·þÒ³{t"F ƒ6·íÊ Ù2U@ÒK€ïÑôºÌ@@@@@@@@@ Vg›ˆ5-ÚQ¦¢c°(ÿmÿ?Ñúôdÿ°`ñ_Xb•{ŒÑf·:+<úäd³{@&'I²kÑuGw¿S>ttƒ†¡úº­,¯¤àÚþ·>‰j ”þ‰Þ4è÷Ô@¸þêÀ¡£Êù‹iYØŽ#;j{Ö¯_z2ç]U½l}åüE{ôœ#£Œ—_Ø/x«¶-5íÅý•úí Ÿ¹©,3}±q¡˜š1ÛYa,·€£íÀM:› Éš‘mY7{Ä–õ#æ<•¬ó3.é(°?t4§Åœ@ȾG3â21I@@@@@@@@@(¶-Oh<)ja4sXÀ²~0kÖkÖ¬yè`”aâÚ]9oÑ75¯ôwž;ΫV-áß#@yfd"î Њˆí:Ý—½¦¬QÃ?ž5kV?¯~=m7•­€hÀñÌͤ¡uC5™u¹>;'QáÆÓg²Å–%gžµókÎg¯«ê³ç‘ž„o*KEjŠg×)µÞ9¦ÆizÁqý2s,ë›7TUõOÆ çÌYT®7î{\ÇvœÖ¶“íºö¡,htZ²lE,@Ô ð=š:k΄ € € € € € € € € xf§é ÕvÙ²F笞>}º[A4×!º7Ι·ànÍ@ý[÷ýÝ_;ŽÓ²åÝ÷óú”Çx'8¶Ÿò©Ö€¼¢þßJ6Gqÿàw4À8Àë<Žm?äÕ'ÖövǾ×õÇš1sæÌ’éUUZíÒºÞ­¯ÞñÜÇr;8J×) Lºí®®‰ã¬t›–&uËJƒÅìÖ'Þ6+_þÌ«"«0W­]ûБxÏÁqd¢€#úÎgC@¸ø‹ƒ@@@@@@@@@ÒD`]uõaý•ò—¼¦£y+½vvÕâi^}ÝÚo®ª\9áO- ܧý<óy–8?]]½äu·1s¹Í0—q²eí5ÕKj4é»Ík=VÀúÑmUwOòêoû7.ì£Ç~Ïëx«#íör¯~±¶¯©^ö‰Þ¬^ŒvœÞ¤Jò‹Ì(ºYŸFë§¿ü{h×'›L²;¡×)¡œI̱^'Ðj¤ßKtUÔ[n¹k€†¿ëunßyõ¡@@@@@@@@@@@È #ýp&ÉcAZqñ†`PÞ¬œ¿x}勾äѽKóܹw¯œ·è?J‚%»4Üø+¯K‡/4‡ôÊJ»ùçšØuZÀ©¬0%V¥ß¸¯Æê Ñ †—m¨®npïs«Õo ¥ç·yi=VSóÀï~±÷0•5Õ{MÔ#2Go0®Á__.Û¸qc[Ô1âoà:Åo—Ò#WU/}±rÞâÝ„ýÄÖ€ü’²_jû7¢÷‰­¥°oþ¯õœýÜŽÒ/ã-:¿çÜúІ € € € € € € € € € € €d“@Íò^Õðáëš¾îg]ZØê˰¾¬ÇlÑbT[Åqvš‡cÉ.±åc†Ä¥m£´¯þ”Ñš‹š¢Ï]sG]Îí8 ¶X ¥º:Ôe?/ºøír/2M`¿Ýt¯&~xÍ[“ÃËÅ˦OŸ^äÕ7–ö¹ójÐËZìã;d·ÿOýâêRk7VëÍ&jxSÃc·êÀ·¸ nÛ¡ûÜÚ{ÒÆuê‰^Jµ±ÿ‡÷­ÿoÎüÅáÝÏ»GåüE?Ô÷çÝ^=mÇú¡öq¼úÑŽ € € € € € € € € € € €d“€Ó*­…ÍvIJ&ÍR«¡Å[ôçYÀ?¬ÀC`à™`@4¯uÿw4äx›æ¢.Òqýgñ'd‹ý‡«—ß¿-–ùäb_ÿ¨¹¨“Ek~¥ººÉ±ŸúZ’~èʇŽ~fæÌ;}T[ô±òŽÅ?ÐÏï÷¼{j*Ëqx´ú÷ýô§©L©É¯êèÇZ4DV½]6&s~\'ù4kZµ|éÃú~õ¬”¨7Ù˜}Çâ;z2ý9ó/²Äú;¯1ôKøÕÕ+î_áÕv@@@@@@@@@@@@ ÛV­Zr´Õv®ÖìÐG½¹6=ÿ>Ûv®_µ|Ù½9L9w^¦L4]æ©o°Š¹óýYºÌgÇ'›~½qãÆ6?óYµbé¿VÎ[ü% ðÍöìoYWå—ä¿:§jÁ·VU/{ʳ„³gÏ. öÿ=ŸJw&Ü(õÒÖnªÏ%u Yö½yüj<'±mûÞxŽ‹å®S,Z½Û7d‡¾ ßÕ4¾Û½4°œ‡çÌ[tQíÁ]?ܰaC»ßYO›6-ÿ¬ñ“þAûû ‹Óžj‘~çG?@@@@@@@@@@@@ «^²÷檪/J~§™¦‹S=7-¦õd“Ý´p]uõáTŸ;SÏçÊÉÔ5%uÞZEm¤Xòˤž$†Áûû_â3àh†µ[N|5XX:EK£žíu m³‚Áõ•ó=ÙÞîü`ÍÊ¥ozcÚo¨ªê_jÿ©ÿ=µêïç˜pÛùZM̓;}÷³ã£/{±rþâ­Z>v|,Ch³¥õdèÁXމ·/×)^¹Ô§Õ¶ãüJC_t¼NþOç>çÏc9e9>þøƒµ±o_®S¼r©?®½©öoóJÊæ…Ãϧ×>W[AyYCÃïj"ÿ·úúý6ÇÙqDš ·ó‡ÙàYZîñýûº¶¯a_›†o_Ýùɇå«3@@@@@@@@@@@@,ØT]ݺIäo­Zô@AÀþ…H`žæ ½lÍ€ìÓ<Õ2GœûV=¼ts¢ÇÏ…ñ8æÂUî¶FS‰qî æ9|@?˜%Ýš£¾ÔÀÕmü¶´¾'AÍê«Â€þO@‚á –þÏ@Vç“èxMíÁ]© gµ†–HAÞOÓï<—çvȾץ9áM\§„“&eÀ5kÖÔÏ™·ðF ?§oÿ¡~N¢¡á)ú0•¥@?4ÃÍÇP?RæÓd¶X>FúEx°ÍvæmŒ¡’멳ð¿ € € € € € € € € €é$pèÀ>ùxó{®S;~¢Œi~“ @@ütD{®?ý8ã ÉUU›ª«[Ïh`GRI•A3FÀã9ää@IDAT”Ymk<¦•퟉ã4¤fâÎ{šVþBo†Í:[ê=ª!Ëc~Ö¬)ëûüôKVŸ\¾NÉ2MƸ+WÞ¿)$­—hzwU2ÆïSãÁ+í¦K´Œ1áÆ~"€ € € € € € € € €)hoo—7_}A–/ýOÙ¹}k ÏÌ©@@@ Ù„“-Üu|Ž]=ròÕÚµkk^úC LµçWŠÐœGÛ"yôÀ®i«ª—¼’”sÄ0èºuëZ4ˆö ç!ŽóòªåK¶xöKr‡\½NIfMøð>üð—T:¡Ð­ÞžÐ˜ÏcßY³üþÛ×UWNèØ † € € € € € € € € àK`çöOdŲÿ”7_{QB¡¯cè„ € € Y /ònöæ¢ÀéÀÔŸßZµèWùëË™¡/Ë öÀC3“ÎSbËÿÝùé¦G7nÜØÖƒ±’p¨}¯Hð;n;"Ú'}¶Ü¼Néãïw&5ÕË¿²ªêÙáâo8bUY–\©ÇÆ*wäý ýºöЮ7lØœ²ß…Ñ@@@@@@@@ÈQúºòÒóëeǧ½^3!G¯ËF@@l àØéªšªh^æìÓǪ—ìÕÅÿØÒkv\ôš™ë×P_'¡P{x=æÚôí×_úö-•åƒdÜø‰aƒD¬îðÁý²cûÖSï“Óç>ÙPº°¨XŠôaLïð‘cd¨¾_òòÒóWÛÚZÃŽ÷ï•“ õ§'륩ñ¤‚ÁðûÞ¼÷ ¤ïûá2dèððº‚Ú–®[݉ZÙ»{§9|àôç I? MÒ¢ó³½­íÔçZ×ÕñÙ.Ðçæóa®•¹feÊÓuy’-÷¬´Žqb¹vïmoo—Ÿn‘CNß3ô~¾wœlÑ? pêû¥¯Þû„ïÁæ;fäè³vÿñòÐ@@¬HÏÿª5¼,@@@@@@@@H½À¡ƒûdóûoËÖ-›Â§h30¹F n˜‡ÙöïÝõYW‚štÁÅrÞ¤ ÃA°ÏRô¤¥¥Y>|ÿ-Ù±MÃ&º·êYíímŸ…8;¦·yÓ;ᧃWÈè³ÆÉX <´£9)?·~¼IÞxåù¨c_tÉU2ñü©ÛC¡P8XcÖ¼w÷Žˆ}ÌNt4‘=Ÿõyùù§dÂä©2yÊ4)--ûl¿ß'&ÄdÞ+æÜGˆz˜ šGí±#á9n|íÅp°gÒÉ”iWHŸ>ý¢kƒ—å9çN’Ë®šqXó>þèÃ÷äÓ­›Å„#m¶zw¬çxíQÙ¾íãp7à?á|™0iŠ”'ùýi^Ý÷™Ï殟ÈÞ=;5à»3¶êÞ§ûëðçZÌ&ÄÜyûXMÌfBZÃGŒ–1cÏã:wë•çépÏ:t`Ÿ<ý»Õ×ßp:ä±QwšÏЃ÷ý[´æð~sÿ¹ñ–¹®}L¸îåžví3íò«å¼‰ºöéIc6Þ{/˜z©˜G¤Í|·˜Ï֖ޢ͵Œ¶u¥;Ú7½·1ð9z¬Œ=g‚œsÞä´ø¿^×ÀëXS%×kkÕp×8Æ-[6ß{Íç:ÒvP+5>ûä9qüX¤f_û:ª>špêÛo¾,W}ñ­<Î×±tB@@Sy' € € € € € € € € €.°õ£äùgŸp­ÖïMxÄ×L…»k¾t“œ=î¼x‡r=ÎT0|ãÕçÅîlÛvío£©è·ý“ÂÍK®¸Fúö-w¨„g*…½ ×ÍT+LÔ¶mˇRâ¸Ü|Û<)v TµhàçÙõkeçö­ 9µ¹~o¿ñ²>x@¾|ól)ÔJˆ©ÜLÕFS}ÏTXKÔf€Û5´tÙ•ÓeªV¨LÅf‚&$e*&:ØiþõuÇåùg—¯¿(7Ü}WÕ6Ö¹Ð@@L à˜IW‹¹"€ € € € € € € € €LH㕟Ñjs¯wÚ›œ§'äÉÇ‘Iç_$×\w³X–•°¯=&Ïh8íð¡ý 3Ú@ŽãÈG3AÇ·Þ.ÃGމÖ5©û: O­«ÑÊaÑ+>Æ;C÷Éê÷Ë­sî’~¥ýÏÆT-{ê‰1aºDo{v}*««—H弯J~wÉDœßT\4ÕF͵MôfB†¯þþi}9v8ÿ÷ï:žEþ¹}¿L¿jþ¿»þãâ凿õ“ÈG³@@ “8fòÕcî € € € € € € € € ³M'e½†ÔLåºTn~ð¶´µµÊ—nœ%@ Ç§þxó{ò⳿ÓPW[ÇŠe€––fylÕƒòE kN˜<5–C{Ü÷è‘C²ú‘%I©¸Ù1¹Ç…6ó~S‚yŸÿª y¿¬­y iU2ÍùM3S™òú›fwL'i?M•Å—ž_Ÿ´ñ;~ïí×¥¥¹Y®Õªl‰xßwŒÛñsËæ÷å¹§Ö&%¤Ùq¯Ÿ¦’Ÿ ¾¶Ýp«~&¦xu¹=[îY1/ ³æ.’>}“}ºŒÿÞ']³‹f Óõo'L÷XÌO<ÚiF@È@žÿ×¥ \4SF@@@@@@@@2YàÐ}òÈCÿòpc‡ÙÖ7…+.ö´bÞ–>çÖ¯My¸±c¦šÜ†§WÁìéZ:ÆôúiB^¿[³"©áÆŽ9Ô×—ÞÛØñ2ê1U8ͺ“½™÷ÈG¾›ÔÓØ·Gž}òјÎQRÒW c:¦£³ „­<ñß}ë5yvýš^ 7v¬Ñü4Ñ“ õwõøy¶Ü³z ‘&äâ½×x§÷¿T„;.óñÚ£ò´V'fC@@wÏÿ,“{?Z@@@@@@@@@Ò@ NCkkV.‹9Ø·_©œ5ö\é_60\MʽZµŠá1­¶w\¦êÞ¡ƒû|¯ÐTΫ>J.˜z©ïc:wܳk»lЊu±nýJûËè³Î‘~¥eáu—Hkk‹4ž¬—†úzÙ»{‡9| ¦aß}ëU rùU_Šé¸X;›€Í“«¯?áyhžV]ì×€ô×G^~¾Ô=,µ–±uŒX¶·^ÿ}¸Ÿ¥e±Ö­Y.ÍÍM®‡›ê„ˇ¨m)RÛööv1AI jñ8¶ûÀ_{QΛx¡˜s'z«;Q+ëÖ®cê¶ :L&Lš"£ÆŒ•>}K% †»·µ¶Ê ãS}oÕ ­ŸkbÜñé–°ã­sîJȺvnߪÛ§Ý–pF›ùì©.Ê…ßùØÌÏ/W–4Ÿ…Ö–1AÚÇö‡?Ó õugŒá¶ÃTh5!Ço™ëÖÍw[æÜ³¬¨Õ9ý„‚½*{’ð9ð}:uÌÅ{¯Yþóf?°ow'‰èO”–>ZuÑ|¿è]¿[ꤡî„44ÔÅDÞ·g§ìüt«Œ;>ú iA@@ Ç8æø€å#€ € € € € € € € €@f ¼øìº˜Â'O•ÉS¦É ÁÚ9tqäðÁp°iÿÞ]ûvßùê‘£Ï–uor}mˆ±T4Á¡ÉN ‡å ‰¼ŽÎ'4a¼O¶l’wÞ|%~ìÜíùÛo¼,#“B1AÊ •FÛ‚Á<9oÒ…rþ”K4d8øŒn&duâø1Ù½óSyí¥g=Ã}fs¾·ßxIŽ=b=cÐÓ;Æœ}Žž{ŠŒu¶žYáÐT¸4ç}ÿ×Ã?£Óy½‚L ðìqçuÞç&Èê¶™÷äô/ß*C+FDì–_P Ÿ‰¡áÇ¥W^+[>z_ßûO…C¿è´Óœû£MïÊÄó§vÚûSÎ|ù…§|xî„ dâIŰ‘1…+i8öÃ÷ß’-›ß÷ýyøô“ä¨ÞÊÕ¨§[¦Ü³†%ßøîßD\îʇ~ŒFlÔæýtÏ·þ2ZsÚìÏÕ{ïG›Þñ¬JjîSãÏ›,ÃG%EEů™ ¼ù]ùàÝá{qÄNv¾òûgdÔYã¢h#Â.@@rJ€€cN]n‹ € € € € € € € €™,°E+Í™™Ÿmè°rõô2xÈ0?ÝÃ}Làë¶ÛŠ©Îh‚W'ê] …Úå¹§“ÊyàÚ¯s£ó‰ÕËÅT‰ó³Ð°‰YG,!ÊÒþerñ¥_î4Áìò³=óä£rû]÷H©VMLÆæn¼ø²«åB­†iª&FÛLÐÓ8˜Çð£µä#Z]Ñ»ä;_6¤˜ÀèÕ×ÎÐjœ#£ö1 ¦ ãh è˜ÇömË3¿[®îèz6~ðî›I 8º÷‹/—˯œ.A­‚ég3k3•&MX÷¹õkÅT¸óÚ^{ù97~bÄ0¨×±íï½ýš†¤j;^Fýiª–šjб|–;f²æ3d>´ŠÝ®ŸtnŽú|›†{p̆{VT  kÈå{¯Ûw™ 6šs¤Py÷KlÂßL½LCè—jÐñ=yAÿà€ŸªºÇµúîîÛ’ ï>W^#€ € ILš,sE@@@@@@@@rU ¹©Ñwµ·ñÎנ⢸Q&¸5³òn)(8³’_wÿCöŠW5½ÎǼùê ÒØØÐyWÔç—\~Mx±„;V\ÒG®»q–Üpó_•³Z[[44¸RBíí‡IêsSµÑ„×.Ó€[¸±û$L0ñö;ï‰Z™³{ÿH¯Ï9w’Ì®Zìnì~¬ ÍÒ ¬©Zçµíß·ÛW¥I¯qü¶_õÅäªknðnìýäæYóÄT³ôÚÌçñÍ×_ôêµÝ®6¾þû¨í ýˆ¯Q¼áÆŽqÌÏ’>}å+·Í“I\ÜywÔçŸjй'[¶Ü³zbNÇrïíz5ò4}íõ_‘·Þî+ÜØùhŠž ogU.ð}ßÞ®ÕlÙ@@@ ²ÇÈ.ìE@@@@@@@@ÒJàåžòÚΟrI8Ôgªýõd+P.×ϸÍ×n;Pw¢V>úðÝλ¢>ÿµ_–K®øbÔöXLoÆ­Ub„^ÛÑÃ}ÏÑk,¯ö¢âp€sì9¼ºFl7ÇO»üêˆm^;'huKü4!Ÿx¶!C‡Ë%—]ãy¨©nväðÏ~‰èpù®“ /º¼GCƒA œÞ.£ÆŒõçƒwÞS™-žmó¦w¤½­ÍõPóþòWæHŸ¾ý\ûÅÚx…:™°£×fÖf>³ñnÙpÏŠwíév÷Þ®WÄ|ÎgÎ] Ï¿¨kCŒ¯Lå[ô³íܾUÇñÓ•> € €äœ@Ïþ VÎq±`@@@@@@@@@ õû÷î–-½ïy④ϖ«§ÏS]*Û˜±ãåR!ÃÝ;·Ií±#ž§|óÕ}<.Õj†L½Ìs¼X:˜ª|&,æg3MÛ¶ýtíQŸk¾tSÌÕ»Ÿð¬±çJ¿Ò²î»]_—~Ÿ¸vòÑxþÔK}ÛTùLövÞ¤ å¢K®LÈiLøÉ„{½*jš÷ÈKÏ?×9wló®æ6õ’«zT¡3ÚÄ åêkoŒÖÜe¿ŸÏu—N¿È–{V¤µeâ>î½]¯Úµ7Ü*C+FtÝç«aÃG‰©šìµ55ž”Çö{u£@@œ à˜“—E#€ € € € € € € € €@& lõnÌÏ/k¯¿%á˺ø²«¥Ù@Ïqw|êØ2A©­à9Ž© x‘»’±™0àD­\èµÕ×—m[7{uëQ» טʒ=ÝL˜õ Ʋ]ÓmqWnì|}ָλ">?zäpÄý‰ÚY\ÒG®ºæ†D Ç„¿à#è7ÜÛyrõu'<«ZæåçË”‹òí<‡±ã'úú\7ž<Ùù0ßϳážå{±iÞ‘{o× tîÄ ä\Ä®G¹¿2UQýlÇ’|/ô3ú € € ŽÓñª0'@@@@@@@@@à´€ã8²cûVO+®¾N«éõ÷ìk 3¯mÏ®í®]Þ~ãeÏêÁ`ž\7c–ÉûÕ¶«¾øe_Ußyóe×õô´ñÊ/&.7vüßÓ5f¬ ,컿WÇ!ýºHKK³gŸžt0•0 ‹Š{2DÄcÇŸ7YFŒ:+b[ç;}|>;÷÷ ›¾&MMÊš:Ï£l€wp¹¹¹©ó!¾žgË=Ë×b3 ÷Þ®iÊÅWtÝ‘€W}úö“²åž#54Ôyö¡ € €¹(¼ÿ ”‹š¬@@@@@@@@H°ÀáCû¥ñdƒë¨…ErÞ¤)®}zÒxîï€ãþ}»¥½½-âiLài׎O"¶uÞ9aò_!‘ÎÇÄú<¿ @.¹üÏÃŽ9$û÷îòìOš«62žC#Ó§O?1AT?ÛùSb«öè5¦©¸éµµ&1à8hH…Œ=ÇÀÓk®ÝÛ'_8­û®3^ïüÔ;€Üù Û½? ‰¨îÙùœ‘ž—öiw—}ím­]^ûy‘ ÷,?ëÌ„>Ü{»^¥á#FKù !]w&èUÅðQž#575zö¡ € €¹(—‹‹fÍ € € € € € € € € €@¦ìضÅsª& •——¼_3•!‡#ûöìì2—¼ü|)++—²úÐêUímm:ü.}Ì ôS nâùql2vŒ;w¢¼ôÂSâ¼Û»{‡ Ó@L¢·³Æž›Ð!M¸±¤¤¯œh¨Ýk˜0Ê¡û܆‰¹ÍTÁLFu¾¢¢bϹ” äÙ'ž© äuŸÛЊÝw%åõ !Þ•EwkÇLÛlÛ-”zj5±³áž•i×1Ú|¹÷v•¬ß•ɸ÷vœ%¿ °ãiÔŸ¶[`8ê@4 € € e³ì‚²@@@@@@@@È“ už‹4Ø;|å9H;Ô=,mm­®g0Uià0•›9çà!ÞáЃö$tZ}Kû't¼ŽÁüw’Uí0ÏG%ÀŽy&òçà¡Ã9\Ô±úôé'EÅ%QÛMCKK³´µº¿Ï]HQ£©Hjªú½üÂSòâsO&ü¬ÙpÏJ8J/ Ƚ·+|iÙÀ®;üªÀGÀÑq쟕á@@È÷??•kd € € € € € € € € €@F 455zÎ{žç Iìàg ´ªa°Brƒ‡—ûÝŒ'ªSRÒ'¡ãÅ2˜ŸN,ãõvßT†bM¥Ô=»Ü«466ž”þ½ÍòÙùMõźµrôðA9x`¯Ü¿WÚ/&䘬ÍÏç=ÝïYɲIõ¸~®E.Ý{ ½+,öäå§Ñg¿'ëàX@@zC€€co¨sN@@@@@@@@@À‡€ Lymý“\•Êëü^íÍÍM^]¤¼|ˆgŸdtX>ÈsX?ó÷¤S‡âÞ 8&9àÓi™IZÚ¿L R¸ž~>*o666Hÿ²I_{÷˜ › 2Ö8þy¼ö¨Ô="µÇŽh˜±½{÷¤¾Î†{VRR8¸Ÿ{W.Ý{ó}TXìÉå±ÄêÉá‹ € €@N pÌéËÏâ@Èóýl € »ÿm±›q € € € € J&Çt¯Ê×ì£ eaQq*Y?;WaaÑgÏ£=iinŽÖ×þü¼ü¸ŽKÄAÁ`0äũи¨Îæçó/^[k«ÑJŒ'ŽÕG­œÐ@ã‰ãǤNŸ·µµÆ;lÂócî÷¬„£ôÒ€Ü{»Âó¾ëêÁ+@@ÒI€€c:] æ‚ €q ”X…rÒIìÿ‡R\á @2PÀ|²!€ € € € €¤¯€¿°PAú.@gÖ⣂c*+ñuÆ*ðplnnì|HŸ²(dØcŒ PPàNíÁðgê'åçózÆÀQv„B!Ù½s›lßö±:°/\1J×´ÚíÇÀOX4­•¡“áÞÛõÂñ¾ëêÁ+@@ÒI€€c:] æ‚ €q –Ë'í{ã:–ƒ@Èuáú=ʆ € € € € ¾~ÂBùéýÇìšý{i ~‚•‰®àÌã×öñ‰KuXÉÏç¬ÑGÅU¯µ×;"¼û¦|²åC_á`¯ñRÝž ÷¬T›%ë|Ü{»ÊZV ë^!€ € €@Úð_JÒæR0@ˆW`bþhŽñâq €@Î LÐïQ6@@@@@ôµ·Kkk‹ëä‚Z Ð<ÒyKë`e[kkBy„lâé§úfBNtzü|ïJ©ÍMñWû4ÁÀ7^}A6ð¶8Ž“È©§l¬l¹g¥ ,É'âÞ›d`†G@@„ pL%!€ €@o \Y8IÖ6½Ò[§ç¼ €d´€ùeC@@@@HO¶ö6³|ôéÝ.Žm{NÀ ôÎ:ls£â¢çåë•y)®„ µ{®ÓO2Ò [?Þ$/<û„$:Lé\–eɰ£eüy“% áèçÖ¯Ô-®}ÙrÏŠkñix÷Þ4¼(L @@" pŒÈÂN@È$‹ ÆËÐÀ9h×fÒ´™+ €½.`¾?Í÷( € € € € €@z ø¨.hBW&¤Òs:«Â¢bϹµ¶¸Wªô Î~Λ—Ÿçè–L¶¶ÄVÖôš«W5Us|IŸ>^ÜÑþÖ/Éë/o8c¢v˜@〃døÈ12bÔYៅ…Eááw|º%Q§ “-÷¬„¢ôâ`Ü{{ŸS#€ € €@Lcâ¢3 €¤£@Ð È‚¾×Ë/ëIÇé1'@ÒVÀ|šïQ6@@@@@ô0¡ESÎ+ÈÕÖÚâ+DØ[«ìS¹ßOxÌíøxÛüœ7/€c¼¾É<®µ5µG?ÕKJúÆ´äßoX/¼ûFLǸu6÷ f,X.ƒ•ÁC‡‡Æ[YÒí\‘Ú²åžim™¸{o&^5æŒ € €@n pÌÍëΪ@È:E—Ê3MoË;mÛ²nm,@dLÍ'æû“ @@@@@ ½ ‹Š<Ž&èå§RWo­ÔÏÜZ[š{ezþŽüš]¯\“ú©¾é1DLÍ~Þ+Å}ü·mÝw¸1 †ƒŒåƒ†Hù ¡2°|° ($}û–Æ´&;Š©¿ŸÎÙpÏò³ÎLèý7®sD@@#Àyá}€ €Y!`Y–ü°l|ûدå@èXV¬‰E € ,ŠàÀð÷¦ùþdC@@@@HoS«¡¾Îu’^]NA£Ÿ*b---)˜É™§hin:sg·=ÅÅ%Ýöð2LåÒTn~Þ+%%}|M©¡¡Nžæ _};: \!g;WÆœ}N8Ôhª%ötkIB°8îY=uM—ã¹÷¦Ë•` € € à%@ÀÑKˆv@ÈÒ@ùEÙ×äûÇKÈ1c®EHµ€ 7šïKó½É† € € € € þ~*õ'އ+¸¥ëjLE7¯­öد.Ii?zä縥ýxö¡CêLH0•Û±£‡=OWⳂãK֋ߪ¥ÃGÉ5_ºICC<Ïk‡æfïÊ©ŽãÄ4l6ܳbZpwæÞ›Æ‡©!€ € €@žÿ Ÿ.Ãñ@è]QyCä7¿+SóÇõîD8; €i(`¾Í÷¤ù¾dC@@@@È ?©#G¦õbúö+õœßÑÃŶmÏ~‰îpäÐÏ!Kû—yö¡CêN6ÔKSScJNjo¯n0?á>ÌÜñé_ó¾ôÊkevÕ¢¤„ÍüT¥Œ5à˜ ÷,_':qïÍ€‹Ä@@@ ,@GÞ € u¦"Õ?ø†<Ùü†,kxFÚµY·F„ €@,CdAßëeFÑ¥bYV,‡Ò@@@@@ —†TŒO¶|è:‹c>ªºCãÓ¿[-‡î“~¥eRª~þ3û•ö¿.*.9c´òAC%//_ÚÛÛÎhëØ i€L+ä•Ú±+é?M ò¨p(“~)â> ÆŽ}vÜÇû=ðرÃâô+0È×poz×s,3ÐùS.‘i—]íkÌx;µ´xWpÔÉÆ4|6ܳbZpwæÞ›Æ‡©!€ € €@Ž]8x €Ù"`Â7_&_.ºDÞjÝ*¯´|(›ÛvÉþÐQitZÄÑÿdž €@6 XbI‰U(Âå21´\Y8I../A+ËeM € € € € €@Ö 6ÂsGSp<¸oÔןÇ#ÿ¡Ùâ’>²ðž?–@àóÿ.mž›uìݽÃu-‡îOiÀñØÑC …\çd ô\óˆ 8’¢€£ŸJŸg;××ú>úð]Ï~æståÕ×{öëi‡úºãžCÄúûÙpÏòDÉÜ{3äB1M@@޼ @@ «L˜ãÒÂó¬^(‹C@@@@@@¬4¸B‚Á< âµG]ßñÚ£ÒP_'}û•F퓈†æ¦Æp¸Ñm¬²å]Â}+†ô 8nÛú¡L<jÇ!IÿùÉÇ›<ÏaÖSÒ§¯g?:ôŽÀž]ŸÊÔiW$ýäÛ¶nö<ÇÙãÎóìÓÚÚ"õu'<û]0õR æ%÷×;MEÕý{wyÎ%ÆŽ’-÷,O˜ éÀ½7C.ÓD@@ Ç>ÿ3Y9Áò@@@@@@@@@tƒ2xè0ÏimýèÏ>=í°Ç££¿bøÈˆ§©>*âþÎ;÷ìÚ.u'¼+Êu>&Þç¦rãÇ›ß÷<|øÈ1ž}èÐ{æ=c½ÉÜêÄœÇmëWZæ«ú¨ß¹Žu¶ÛéÒ¶w÷N_LmÛ»Êiç eË=«óš2ù9÷ÞL¾zÌ@@Ü à˜;ך•"€ € € € € € € € €@ óÜò‘wX¯§K÷¢Œ6סÃFD¬ìØ}N›7½Ý}WR^ïܾUšOzŽ=bÔYž}èл[|U{2훽ÃÃgŸã]½ÑÌ¡¡Þ»z£é׿l€ù‘Ôm׎O|ok8Ö-Ú} ó8é~ÏêxïMϱÏç/àØÜÜì9–éì°\]Ýqñ 5Ÿ—X·l¸gu_s àþë¶ÉzvŸG¬¯¹÷Æ*F@@è ÷ÿ‹«7fÄ9@@@@@@@@@>(Pî+h÷âsë$GµµÏNäòdãë/‰WÐÉ £Ž2åâ+¢¶u4´47ÉóÏ<Ññ2)?ß~óe9rè€çØ&¤•Ÿ_àÙ½+P¯a½7_1)“xí¥çädC½ëØÅ%}4Ø;ÒµOGcqqqÇSן‡íwmïI£ O>÷äikkõ5L<÷”l¹guò 8×x¬:Ÿ#YϹ÷&K–q@rD 37Ù¸¦HoG÷Ü‘Ž`ôš@^¯™#€ € € € € € € € €ø¸`êe²k‡{ÅcGË;_‘i—]íkL¿Ž×“M>ªØM¾ðb×!Ë ‘‘£ÿ{w&gU' ÿ¼ÕKÒYHH€°…%dQP#«AP@ ÄN"fq›Q?g¼ã7sïÌý®Ž8ÎÌ÷ÍÜgôQÇë8‹a‘Цa jÙT”]v [Ù“îz¿ó6:MºÞªîª^ïóÔSUïùŸÿ9çwš²âÓÿ>S³O?^2îÉÇé8]î°#ÞU2®'/½¸&ÜsWyÅpG=­'CèÓ¿ùÕáC{ì¹wÕFÏND½ÿ¾{ró½;î&Iy¿;Ý4jLn¾,à…5χÉÖæôÐßÞ{Wx~Õ3eÍ# joïÙé˜Cá3«3R!žP›weÚ£F—·Çy¹ªÙî³·ššr @€ÀP˜=ûÂ)iCÝi!MÞ¿ÒíŸ'†$Ò01®uBü¢7&¯½9Mº$MÖ¦!¬ ñu¼÷Büxçööö[–·\úðt)ÌšûñãÒ´p\¡ÿûâ¼'&iÿúšÂ«qÎÏÅu=×ú\Òܼ÷ˆ¤é#±˜qzÉôX7¹ãOUìø{;ÞìxŸ%IS|Ûc÷yóöëàâ“ ñûÚ¬y‹ÖÄ¢º[ŠiqùÚ5Ï.Y¹reÏþBD6gFóüwÔ×%‹â¤$!ÙÿõifKx#ù[Ïãâqñí‘q]ñJ>ÑThúV\ËÍ!/eõ3-q[ÞèõæS–¿¡P¸öÍ]^Ä"µ­K..ÿ r—þå¼>}zýî“&ψë;7.ì#qú“²~;–Öñêõ5e÷²/Ô“ã½Nû\øtÚ¦³ç-º7 éµñßK–-¹äÁrÆC€@í†ËѲµ”™ @€ @€ @€ @€} ð¾NÉ¥ØÞn¸æÊðÄc½?D¤­­-yµ†—㩇yW9sËrdEOûî`^º­ã±G«ó{¦/½¸:\Õ²8lܰ>w쬰ñ¤S>”'`` ¼ºî•põ/ Ö¿Ö«‰e'¡._ziؼicnžSNÿH(Êÿ5Ì#›Â¤}öËÍ›ý¼ÞuÛÏrãÊ È @³“K¯»êG!ûŒ¨äÊŠœ{z•ó¹0Ð?³v¬=Û»¼ë¾_ß]Vu^žZ´ûì­…ªœ 0˜fÍúøþ³æ.üÎȨ'㉆ÿO_\‹þbá[ï¯× ì’9…¤îÒ {øð¬y >=mÚ´†Þg.?ÃŒóö5wÑcÁåI(üϬ¸±üÞ¯GF“ú¸–³bÿÅq÷Íš³èmÿ…œ±¨pj÷tJ¥ãV?sî‚Y&p!),óýäëö•dxs­±k˜¾sý>Úý{fXy&=¨–@ùÿ²®Öˆò @€ @€ @€ @€ @€@ŇzDxÇ‘Çäö+‹áÆë–†{ï¾-´WXÌ´#ùSOü!,¹äûá™§Ûq«Û穇vvjã·þ4<ýd~AoF2jÔ˜pÜIÓ{¤3õ°£Â¯b±a9ÅÇYïáG½;¼ïø„ÑcÆV4ÞÚW^ þþ×ñ3àwaë–Íõí¼yóÆŽS +9©²sÿ¡ð™•­gÜø —Õíë‡ømxê‰GÃ~“êø9I IØ‹D7n\:*„³gÌé¶o­|öÖZX~h³ç,øHšZbñÚ¨¾ž[<ðÀ¤.ülVó­-‹\«ñ³‚¼„¯dÇVûŠ¡YÚÏ5«ÿp,~õWmüÙ¹Í §‡Ðþ¶öZݘ6mZÃSü÷øja­ÆØUÞ8ÞñËwΚ;ÿœÖ%—Þ»«÷¨¾@~XW52 @€ @€ @€ @€ @`ˆ LŒ' žO@üé W•Uä˜ql‰ÅOÙÉbÙ£Z×ô3Î òާ{ïq'‡M±Ðìþûî);G{{[Çé’O<öpÙ}Ê œ|à”pú™3Ê 3€c‘á¶­[JÎèùUÏ„ìQ«¾þõã§Ú«tï=öýÅ—›6n(+Oösÿ›_ÝÑñÈ:ÔÕÕ‡ÝÆï(îÍŠ5ׯ5´mß^V®AYÑói>¯£Ðó?[±ãö.Ÿ×ÄÂçž8f ‡ÂgÖ{îÝQ(ž}–öôÊŠE³¢ò ÷ìiŠªôóÙ[FI @` Ìœ·èK!Mÿ)ªÕøÌ¿|„Ž9¤á[3fÌûÉòåW¬Êï‘qTssãá…Q—ÆÈæüè*E$ÉÔÆBú³b¨ÿÓ*e,™æŒææq»Õ5µÆ“?X2°VIØ;I ·ÄS@ç.½ò’ëk5Œ¼¼% Àñ- ¯ @€ @€ @€ @€ 0(¦L=<Œš=?ܰüÊŽâžžôN;+¼ãÈ£{5löûÆYžñ»O ·ÿ×MñwÓ^åëIçlÇžpjxϱ'…ðûÏ=Y°îsüIÓ; ^Ÿ}ú‰š;Œ5&œ}þœ°ç^ûôz¬†ÆÆpöŒ9aù/ Û·o«8_V𘺚=zre§Zž~öaL<ÍuM'=>÷Ì“aj<³7×`ÿÌÊ>Ž|×{ý¿¼­7 {Öߎ>{{µ…: @€À˜Õ<ÿœXÕøøå¾ì™¦!}>¤É]ñ$Ä‘<–¤áµ47’B[1Iw ¡0®ÒIi¦Å¤ÇÆ¢»ËNãTÆÖjü’ÿFÿ@IDATV|Y‚Ääu£®Ž¹ÎªdñŸZñ/~¤±_úX\ãs…¸ÆbHöN a¿†£ãÏ3]*güqXt¸<Æ• ëu[VÀ¹[aÔMq”c+I×µ* é q­÷„bx¡½®¸&æ¨ Åº} ¡¸oœvülaF\ëø²ò&ɘ˜óê™Í OYÖ²øŽ²ú"@ Ç {L§# @€ @€ @€ @€úO`ï}ö3ç~"\wÕáµW×öÉDG„S>xv˜úŽ£ª6Þ»Þ}l7~B¸iEkؾ­òb¯žN$+X;#yí»E¿ŸÜÓáô«@v’áYç5‡±Ð7+«Օ¤}äüyaÌØøûíUº²BÉ3Ïýh¸þê+B±X¬RÖü4Óâɩӎÿ@( ÁöØ+·ÓsÏ<‘SNÀ`ÿÌÊì²Óc{ZXšõ¦o9Æ•Äøì­DK, ¬0.uýÍræÿ¸J¬]K/ iñ_[¯¼ìöØ'¾/ïš={ÑQi}úÙXè¸0Í+§WŒýèŒÙ ^¾tñ}åÄw3sî¢OÄ¢½ò‹Óôê4mÿßyk<ï¼óF5ŒÞý¼X¼xQÌxwãÇuLì®-»Ÿ¤T–v“è°BÓ?Æ,e7Ƽª­­øwË—^ò«nR¾y{Ú´i “§uF]HþGÜ»éo6tó"uÖ‡ºôGçœsá{®»îò¾ùGw7sq›ÀPxý_¨C}•ÖG€ @€ @€ @€ @€!(ΊEŽG¼ó=5?0+NúèÇ?SÕâÆ[rÀA‡„Ùó>šrØŽ[5}ÞoòA¡9®EqcM™û$y}}CÇiˆÙézµ¸8hj¸ yQU‹wÌsÿ3>º Œ[ÖïÅïèÖ£çÉN‰ŸŸ Çžxê›ÅY¢††Æ°w<ѱÔõÚ«ëÂêUÏ– )»m0fÕÕׇÓÏ:?4*y¸QI‹µ/¿X²½¯}ööµ¸ñ @ –‡Õ5})çš;FšÆ¿ÞP|_ë’KÅ¿ìx沋³ÜK—^|ë’Å_Ü\Ütxìx]îxoÔ×…–»«¸3š›Ç%Iúÿíª­ë½x*å³Å4½`é’Å”³Æk®¹fSë‹—<ܾé˜Xôùå˜oKל}ñ~Vó¢…$ùoe•†ß¥iÛq­K.žYNqc–óž{îÙ~Õ’Å+–.¹ø´bÅÏýr–Ø9blý”5'AôXÀ Ž=¦Ó‘T.ÿàË×Jõ:í˜pêÁûäÿER9´ @€ @€ @€ @€ÀðÙ4*œzúGÂ1ï=>ÜuÛÏ;N«¦@vj]–û¨£ß·SaT5ÇÈrß}bÇi|kž6ÜuûʰêÙ§ª:DvbÝÁ‡¼#d…pYac<£ªù%ë?¬È1;YôÃŽ +oº6¬m]¯'“Úxüû?{wHx&@€Á,0cƼ} iør<•¯ä÷‘¶ÍÛN[¾üŠU%Ëh¼¾¥eu ;wö¼EߋϟËí’t8þun\7c M_‹ÅvùÇo‡ôþ-í›Ïxc~ÝdÛõíû[Z¶ÝÂßÍl^ø³¤\ÿÉ4~בտ{þ¼y““4ü°¬ÌiÚúrqó¢•--ÊŠßEв%/ž5kþM¡¡ðÓøoÃ#vòæ­è>sæœ_Xvå%ß}ó¦TU@cU9%#@€¥n¿âó•Џí›ßûj9Gž—Ê¡ @€ @€ @€†§@V¤ræ¹ Yàƒ÷ÿ6<óäcaãÆõ=Ƙ¸Ç^±°ñ„޳âÀ¾º²B¬³ç‡gŸ~"<òÐï³O=6mêñï­†ìĸ#ã —‡ythŠÅ ®¡+°_,\3ÿÂý÷Ý~à¾Pi1YVôºÿSÂáG¦L=¼ÏŠ`GŒ§Ÿy~˜vÜÉá÷¿ýUxè߆¶íÛ{¼QYQrÇ:báâÞûv_ظc€C="ܺò'¡­­û1Ÿ|ü‘°ní+±yÂŽn½~¬ŸY£ÇŒ çÅϨÇ}0<úðïÃ3O=b±DYÖ¿Ö; ¬}ö–µ…‚ @`€ 445|&~ySrziº­Xl›[âÆÎãlzµí/›ÆÕψµ•ûv¾ßõuVD7{ö…S–.½üñ®myïgÎ]ðΘÿ yqñ0Ê_oÛ¸ýÃ×_ÓòR~l÷ËZß1cöÂÆ¿#rc,îÛ£ûÈêµÔ¥#¾‡:÷ËfüÚõ¯ñͬ ´¼/`%¦ØÚzéó3f|ì´†Q ? !9ªDhüc?Éߟ}öÇ/Y±â²×JÅi#@ g9õé=Kª Ð3ôùŽÇ‹zÖ»g½n~âôžuÔ‹ @€ @€ @€¼ÀË/® OÇBÇçž}2lܰ>lÞ¼)l‰®WcãˆÐ4jtÈN“Ûwòa¿ý Y‘Ô@¹^Šëx橸Žgž ›bÑæŽut.*jhl cÇŽ cwçþúó¤½÷ ûìwÀ@Y†yT )þâç7”ì1ýŒs;Š» zaõªððƒ÷…×<ßñóŸÊvþ™ÉŠÌ²Øìçd¯}ö M94Œ=¶»t}vÛ¶­'*®~þ¹ðÂêçBöó¿uËææžM&›SÓè0rÔ¨ŽŸýì´ÆÉNé8 µÏ&[åãgÖæMê瞎ŸMBö:û|Êöpôè1Ÿ£ŸGñ3)ûLÍN¯ˆÅÝm£ÏÞîdÏý3Žõä:ùK½ÿµõÛ~ô¹Þ'(~ÿ«ñ“. ÓéÕ4âçÚE·^þÙ¯õ*‰Îz)0kî¢{âW£÷–J“Ó¯·^¹¸Ç'(–Ê={ÎüÙ¡P÷ãR1Y[ÚÞ~nkË¥×åÅumŸ57ž¨˜$§u½¿Óû4¼’nogV´·Óý^¼9Þü“êCÝ/bŠòþâMœÃÒ%O¬tÈXøyhhhx(oœ4¤?{eõÓg®\¹²­Ò1JÅÏœ¹`¯dDá¾ø?2“JÅÅ/ä¾tÉâo”ŒÑH€@œàØ#6 @€è­À¶t{¸sëƒá®øxdû³aMq]ØœnímZý  @€T] )&ƇÃöÇ8"œ0âÈИø¿×«-! @€ÔD`âž“Böxϱ'½™?+ðÊŠ·nݲÂƬà¦/Oh|s"¼Ø#®!{¼ç};¯#+úÚ²eK,òj #F6UQèpØkï}CöØq‹ÅŽ"´b±½ãç%ûùˆÅfÙ¼8hjÇcÇܳç¬hnÛÖ­'-fÿÝŽ1r@οóœ+}=?³²âðìÌ¡xù슻jMzç5/< ¯¸1¦mغ±í›µZýÒ+/]‹‰ß-+9F]²wÉö]4Κûñr‹c¿4Iÿ´šÅÙT®¾âÒÛgÏYø¿C!ùË]L­j·Òúú?‹Å…%‹(ãq«¶®oûhµ‹³E,[vÉ ±Hõs±HuY©E¥IøbhnþVhii/§ÊüFåfz @€ Ð ¶´=,Ûtk¸rãʰ.ÝØ‹Lº @€èìq<Ù¾¦ãqã–{Âødt˜3zz˜9êäPŸÔõÍ$ŒB€ @€ª(te9Ùc0_Ù:²"¯ìá"PŽ@VÈ;N&-gÎc²ÂÇì1Ü®¡ò™5TöÍgïPÙIë @€ÀÐh,„¹«I’\wÝåksãz ô‰ÝK8Cŧ†¤»¾ø7l®i]²øò^L¿Û®›Ö¿øÕQãö<7ž~T·A½h8³¹yBÌý‰¼i±xQ-÷0©^5{†$œÞÝ\’xA2jöU!\Ù]ŒûôL d…sÏRêE€ @€] ¼Ð¾6üÉ+ß ÿºá:Å»&r— @`d¨#ûN›}·Í¾ãº @€ @€ @€èx²_,¾+}µ…ö%¥#zßZ á±¼,iR¨ø¯Ì$iáüܼÅôóbzÚ¾bÅŠ­Å4©YþÑuMŸ‹»¦ä_­‰{üвtËzº†rû¥Åðí¼ØB!ýÓ¼íT. À±r3= @€èÀÃÛŸ _xùÛá±¶U=è­  @€'}·Í¾ãfßu] @€ @€ @€ô½@kqó9íaëÁí¡íŒ8úgãi†ÿ˜†ôÇ!¤¿Ž×BV_}Å¥wÖ|fiúxÞ…4-YÈ×µÿŒæùS㉂Gv½ßù}Gñ_Ëâ[;ß«öëÕÅM-Ñu]µófùâÉ—3óòÆÓ¿ZZÚóâzÛÞÚrñµñgçùRy’4œ8½¹yL©mT.P_y= @€ P™@vªÍ—×þÀ©•±‰&@€ÙiŽÙwÝïNübØ«n÷A0cS$@€ @€ @€ 0„báÛU!<W”=~;]þð‚ìÔÄxÀb¯$É-p,&¡¡’Y4 3òâ“4ý·¼˜Þ¶ßÑÒ²yö¼E‹cž/ö6WçþÙÞÄÂÉwÇK^mi·¸O®¸E?Có»-IêÆ×5Ûßö³Öm ä 8Á1—H @€½hKÛÃ_¯»Xqcoõ%@€ÐY‘cö7ûîë"@€ @€ @€ @`àÜxã%ûb6iÒ¾9oœX‡#5 @€ Ð;e›n µ­ê]½  @€ pì;oöÝ×E€ @€ @€ @€À0HëËXu]1!g47‹‘9…t鯫[.{¨Üœ½‰kßúê½i¼z“£kߤ.ÉY_qÈ_uíWË÷q…eŒ—?ïZÎQnCQ@ãPÜUk"@€ 0@¶¥ÛÕWÙ˜ @€Ú dß}·¥mµDv @€ @€ @€„À´iÓf6/øàìy ¾QH’®æ¤ÆF’¤dAd¬6üu³XÍq»Ëµ|ùòõq>w×Þ“ûINÎë—&ÉòbªÙ¾º¸å±¼BÎ4$'Æ1ÕcU^®a/w”ë°@€ @€@ÏîØú`X—nìy=  @€ "ì»ï[§ŒyfZr!O<Vþü·á–’A  ° x àwBHvßEÓÛo¥ámŶ/\ÝrÙïÞÞØË;i:!$¥K“$)”;J¡X|8J‡Ç“')7_•âÞS¥<áæ––×fÏ]Øͺ?Ç1¶ß{ïýã˜OVk\y˜ 澘 @€ @€ @€ @€ @€†„À§ÏÊYFV&û|þk9Qš  °“@vzc’†¡t]aGŸ´¾ÒzåÅ»S‚*¾IB2!/]<9²tÅb§Ò-wŽN›ÒXÙýêÒpÔ´iÓî¹çžíºÖäåQÍÍ!ŽWŽu™HãâÖÄÅí[*>i¯?<¶?Y*Fƒ_ ìÇÁ¿T+ @€ @€ @€ @€ @€ @€ ñôÆO•<ðE¦iúϵ,n̆‰Åz¹§HÆ»/Vì²!?iiy%Þz¨Ëíß&Iã~uÄÎ7kóîbS,nL«›=½+/_R¨;#/F;ƒ_@ãàßC+ @€ @€ @€ @€ @€ @€ +xzã'sœ†Õ›_{ñ/rãz„ô ¼ñÈ kx’ÛórÖ5„³óbªÑÇùp5òtΑ†äßïêu¬­ú¸»Ç=úW ¾‡7: @€ @€ @€ @€ @€ @€òf̘16ž(xp^4 ß^±bÅÖ¼¸Þ´ŸØÜÜçrJ^ŽJNpìÈ•o ¡ðéRy“|"¶ÿC©˜j´ÅbÒxZf52½•£}{ñ…†œšÏ$¼kÆŒyû._~Ū·zÖîÕ\0¾nänßICúL O†bñ©íiúäk/>ûÔÊ•+·Ônd™ oŽÃ{ÿ­ž @€ @€ @€ @€ @€ 0¨v? ¬ 'm7—׋ }“‘§Åî#óRÄbˆ¼˜ÎíÅ-ë¯*Œ÷Ý$ £:ßïü:Ö~þìÇ_½ô’»:߯æë™Í ON’ä°jæÌr½öò3¿™8é€ ±8tL©Üõ£ÿ8¶_T*¦Zm…ÆÝþ[ÉüX8úzÊB¡cÓ&L: 5oÑ ñ¤Î'ã>>¹amúéo¼dcµÆ•‡ÀpÈ)uî<ÖO€ @€ @€ @€ @€ @€ 0ÚëÓ² ·nHÿPóy )sŒÆ2ã:®ºêªuñÅòúÔÕþ,/¦7íI!©Iþx"b[œ×íys‹¥†_ì8±3/°—ígŸýñÝ’BˆŽo¿bg¬3 “bñãññÕTÅo7r‡@o8öFO_ @€ @€ @€ @€ @€ @€>HBqB966Ö×´næ‚yóŠ'ú-,g.ñPÀŠ _ÏÙþòrÇÊ»y³šçŸ“דöÙsæÏŽùg÷¤o9}Òbø·ü¸d÷ú‘ã>Ÿ×»ˆ¦qu wÏË’‹¹E§y9´ °³@M?¨wÊ; @€ @€ @€ @€ @€ @€½(¤ÉKåd(4¤ï+'®‡1…BÚxq,[^ÿ¤âÇÖ%—Þ›†ôî¼üI¡ðý3š›ÇåÅUÒ~vsóž!©û^%}*mmYÜš¦écyýâ)’}~óÇÌ‹ëiû‡?¼`tìû¥¼þq®ih+.É‹ÓN€@e +óM€ @€ @€ @€ @€ @€ ÐÅ$]]Îði([N\ObfÎ[ô'IrJ¹}“4Œ,7v§¸öäïvz¿«7I²ßn…¦eÓ››Ç쪹Ò{çœsáîM…QËã©“{VÚ·ÂøbŒÿF~Ÿdt]¡îÇÕZ_—ñ’±’ï&!Ù£Ëý]¼M®mm½ìÙ]4¸E€@/8öOW @€ @€ @€ @€ @€ @€¾ض¡øt1+Ž+}%aæ´iÓJUÞ:³yþ‡bÁâßVÒ3 a·JâwͶ\¼<žâ¸lÇûîžc±åi £nºà‚ ÆwSÎý3>6iäØú•ñdÊʉïmÌóÅÍ?ŒëË=‘3®ïˆ‰…¦K§OŸÞ³BÑn&:{Þ‚ !YÔMsçÛÅöbÛÿê|Ãkª# À±:޲ @€ @€ @€ @€ @€ @€ôÀu×]¾6¤éyCÅ"½÷tè‘ÿWA{2kÞ¯êênˆ¹GTÐ/ÖÐ…±Åw nÛ´íOBH_ëtk—/³¢ÄˆÝ~ 0ÏÚe@ÎÍÙsæÏ®Õð«8Ù£sB«Ö|GKËæ´˜~½¬„IrþÄIüô¼ó>VÆi‹ùgÍYôå _ÊŒúizÙÕ-—ý®œX1T& À±2/Ñ @€ @€ @€ @€ @€ @€ý,PLÂòò¦PøÒ¬æ…3Ë‹í>jæÌ…gÍ[t}’¿‰Q=¨ÇIÇuŸ½tËòåW¬JÓð?KG½ÞO:<$`®˜=wáÒóçÍÿ@¼[r®ñDÄúÙsžã u?ŽëÛ¿œqª³ìÊK¾×wUY9“䤆Q wf§h–¿‹ ì”ËYsýCReVƹ­ÛÛ¾²‹Tn Pú*ä‚ @€ @€ @€ @€ @€ @€@Ÿ ´µ‡5Ö…‹â€#s-„+cAÛÚ6oýZV,˜ß)`æÜGÄ ?OG\”„0±SSE/“4ìš›ëBKK{Eßn]²ø{ñôÈ3c⌲ú'ɬúP7köÜE«Ó^çþX1 ÏÇ“$_ŽëÙ3IÒ}Bš’ô¼ø¾ÇëÚ1—4‰e€½¸Š[_ýd݈ݎ Irp^š¬ˆ3©«»qV,ÊlkK¿¼|é%ñÔÉüëŒææq»%Mû):”_pZL?ÕÚzùSù#ˆ @ ' {¢¦ @€ @€ @€ @€ @€ @€@¿ \Ó²øéYó|3 …Ü“ cA[V?óÇ M fÎ]ø­P Ë·†ÍO\ßÒ²ºëÎ9çÂÝëFÖO®«O¦Å¢ÀOÇÂÆ÷wéú>W,|6ÆNîÚöæû$©;/4íwMO¿y¯²éæW_œÓ4n¯kâ¼Ê?½0 {Ç¢ÈÏdCbÇ·®ø¦ãýN7ßjîô*H.‹9òNÁìQáæŽa®ºêªu3f/˜ÓPn‹EŽ;î—zŽûzfCCrf,ü|*ÎïÚ´=]Ù^W\•¤uk’mÅõIcñÀbZ?µPH§†4|^Ü£ñ¥rvm+¦é7–µ,þq×ûÞ P=ŽÕ³”‰ @€ @€ @€ @€ @€ @ ^ißò÷êFe'+î[ÖIÒTá¯B]ø«¦0*Ìž»ps,z{"ð­‰r“BHˆÅucÊÊõFPìûR(? …ýc®)Õ·.INˆí=-p +V¬ØzÞyç]Ð0j÷±¸ï”RcU¯-ýÑ+«ŸþÔĽŒV%®4ÙZ¢µ¬¦ì$ÆÙsæÏMCÝe±qTYbPÜ»ãÓ’ºä ñÔÊìF#âN‡B¨ÛQÄ™_Çãw¾âÞ._»æé¿Üù®wT[ û¯ÕE€ @€ @€ @€ @€ @€ @`P ¬liÙÒöóâù‰ë{4ñXð‹áŽŒÅ‚§eÏ7¦émkïnm¹ôºÚ›7‡x‚âéy1yí×\sͦ¶ÍëÎÅw·äÅö¶=ºþçÒ+Ï_¿~}îéŒIH{]à˜Íwé•—^-?Ä\ÕÛù÷¦\û´¶ožµråʶÞäÑ—|ŽùF" @€ @€ @€ @€ @€ @€ @ë’KïMÒtVœÚö>œ^±˜†¿i-nþàµ-‹ŸËÆÝ°¶ð»øT,=‡´×ŽYþåË—¯o½bñiiš~.¤áÕÒcö¤5ݘ‹ѺäâOÅÞŽöÚ+¿þ( U)pÌf›íé¶öô¸XdxoOfß›>Ñ4M‹é×ãÚ?ZZr ;{3–¾¼.ÿCŠ @€ @€ @€ @€ @€ @€ÀXzå⛋í驱 îÉZO1ž,xCš¶°lÉÅ_í\wã—lŒÅ†·•?žyÈùóæŸT*¦‚¶´uÉâïo.n:<„ôÊ ú• 'C.§RÙzå%ÿãrãyŠí{ԗ씦IÕ ³±²ÂÑí›^‰'9ÿ6&ß7~uÚÓûÒbxë•‹ÿº:ùd!@ Žå(‰!@€ @€ @€ @€ @€ @€°ËZß±ùµ¶cb©Ý¥Ù!|UŸhšÞ Ol½ââ³[—\öË]åÇ7þpW÷;ß«Këþ{ç÷½}}}KËê¥W,ž»}{zLvò`\úƒçŒ„±ßå¡X<+ž yþ5-‹ŸîœcĈ¶¦Îï»y½¹›û=¾}Í5×lj½â’¯l*nžRLÓoÄD[zœ¬TÇ4¼÷î¿¿¼úéiÙÏQ©PmT_ ·‚ºúCÊH€ @€ @€ @€ @€ @€ @ º+V\öZ̸`Fóü¿­/þ$ á!IÆôx”4}:VJÞØžxõ’KoÏ˳¶¸©eb]ÓwBHFw›$aÆìÙºtéåvÓ“ûË—.¾/öË=sî‚# !9?$aJ<ƒqŸø¼wÇs{¦IX]žKC²*žŒøl<±ñÆÕÅ-+îhié¶@±0:Í/pLÒz2ïrú¬hiy1Æýù¹Í ¿ÑPHþ¸¤gÆõ¼/îm]9ý»‰‰5“éM¡þý©Çï¿úž{îÙÞMœÛÔX@c¥'@€ @€ @€ @€ @€ @€è;å-—>GûÓ3fü?…¦ñ'Òpb($'ÄÓIÒ0±ka\,tk‹E€/ÆÂ¿cA㓱xîæ¶bñÆ7ò”=ñ•--bpÏ *Ë©tà²%—d§8V~’c7i iØèSúJÃó¥zßzmËâçb–¯fsιp÷£êO%ŽŽ{öθ¯{ÆÂÒ=ã<Çu)žç¹5$é+1æwi’ÞÃÝÅíá®eË.©YQf×9xO€@÷ »·ÑB€ @€ @€ @€ @€ @€ 0H–/_¾>N}Å7W Ç&£F/¤M›Š›_úIKËÚØëä\»(&é…]5t¾—&5/pì<Üu×]žíÙßx¼Ù4mÚ´†²¹ ˆùt7O÷ xK ïãå­H¯ @€ @€ @€ @€ @€ @€Vi¨;,oÁI1y*/F;v%àÇ]©¸G€ @€ @€ @€ @€ @€ @ fÍ]øƒ„?„öðH[1<òbaó£w´´lîó)ÒcCœH©kCºùÞRíÚ ЀÇîdÜ'@€ @€ @€ @€ @€ @€ ÐoÉÌXV8>Ô…Ðû¤M鬹‹ž÷ICúH1$W]µäâk9½s›î—„äÜRc¤ixò'--¯”ŠÑF€î8v'ã> @€ @€ @€ @€ @€ @€~HBº6žœ8~ÇðI¼âëÉÙ#ž^HÃññuM “ðÅ1â£ÔuO©Fm(% À±”Ž6 Pe“æ}ï¢R)¿üŸáÔƒ÷)§Ï*Ý®• @€ @€ @€ @€ @€! $Ù©ˆw¿’ô=3fÌÛwùò+VuÓó–›››B!|6/C¬ºüi^Œvt' À±;÷  @€@ âMùj©´+ï !{”º>}VZªY @€ @€ @€ @€ @€CB },žà8­»¥d':Öl8'¶ÿ[w1½¹¿O]Ó_Ä“"'æäØ^Ü–^™£™Ý ºmÑ@€ @€ @€ @€ @€ @€ @€@¿‹áú¼ Iòå3>6)/®ÒöYs~67þMn¿4½~Ù²Å/çÆ @€@7 »q› @€ @€ @€ @€ @€ @€@ lI7gŽÅ’ã'ÉõMWMŸ>}dɸ gÎÿñx8äÿÉë’¦i[{1ü}^œv”PàXJG @€ @€ @€ @€ @€ @€~XÑÒòbHÃmyC'I8a¤®¹ yÑ´¼ØRíg77ï9kÞ‚¯'¡ðŸ1.·æ( éׯjY|w©œÚ 'PŸ  @€ @€ @€ @€ @€ @€¾HCÛÿiÝíñDŤÔè±ùŒººð«YóÝŠéÿÛzå⟗ŠïÜ6{ö…‡¦õõs,Š÷G†’#½Þ3 é­Å-×9×艀Çž¨éC€ @€ @€ @€ @€ @€ @ Æ­K.»sÖÜ…ÿ‡ùL9CÅÚÄ…Bò¡Øç‘$†4}*{¤Ix:à iR˜TÅɱmrŒÏဒcâëÜß?M7C² ´´´¿yÏ ôP@cát#@€ @€ @€ @€ @€ @€ Pkt[ø«ÐΈg8TîXñ4ÆÃbìa±1Ö/&¯ÊX·£wùµŒ;z¼ùœ¦íÅPüÜUK.}ìÍ{^ @ ½øDêŨº @€ @€ @€ @€ @€ @€ +°lÙâ—·Ó“ÓÊ ®a@U±˜ž¾lÉ¥—Õp© f ‡Ù†[. @€ @€ @€ @€ @€ @€Àึeñs›Û7’¦áÞþ˜yš¦?‰ã¿{YË%·ôÇøÆ$@`è (pº{ke @€ @€ @€ @€ @€ @€CD`EKË‹7˜‹ÒtCŸ,+MÛÓþ¯Ö%‹ÏÎÆï“1 B€À°Pà8¬¶Ûb  @€ @€ @€ @€ @€ @€«Àý--ÛZ¯¼ä·ÃáiZ\OtÜZ‹µ¤!¬Š¹ÿ±Òwµ^±øïãñ–‹Õ¨¯~J  @€ @€ @€ @€ @€ @€¨•Àµ-‹Ÿ‹¹sÎ…Ö8ºnn’%I8¡7ãÅ‚ÆM±ÿ²b‹¯ZrñÍñu|é"@€@m8ÖÖWv @€ @€ @€ @€ @€ @€5¸îºËׯÄÿ’=Î;ïc{Ôj|o! ÓB!}oÂ!ñÜÅñ!$ãÒî–$I¬#J_KC²*>¯Šíñ”Æ$>§«Òbxºm뺛—/_¾¾&•”Ý(pìÆm @€ @€ @€ @€ @€ @€ƒEàšk~ôRœëo<Þ6í£š›ïoiÙö¶7 Ð…~ÛÐ @€ @€ @€ @€ @€ @€ô€âÆ>@6 (p¬˜L @€ÊhJF”*Ž @€ÀðxHl£E @€ @€ @€ @€}$ À±  C€ @`8 L*ŒŽË¶f @€a,à;ð0Þ|K'@€ @€ @€ @€*PàX1™ @€”+pXÃþ冊#@€ 0$|Ûh @€ @€ @€ @€@ (pì#hà @€ŽÇ8b8.Ûš  @€†±Àñ¾ãÝ·t @€ @€ @€ @ RޕЉ'@€ @ lã/wOF—/ @€À`Ⱦûž0âÈÁ¼s'@€ @€ @€ @€ô©€Ç>å6 @€á%И4„9£§¯E[- @€À°Ⱦû6&õÃvýN€ @€ @€ @€¨TÀoZT*&ž @€ŠfŽ:9üt˯Ãcm«*ê'˜ @€À`8¤~ß}÷u @€ @€ @€oøÞ~¯ó'žÓOš÷½¤ó½®¯o¿âóu½ç= @€ÀàPà8ø÷Ð  @€ hú¤.üÍøEá /;¬K7蹚 @€žŒOFw|ç;ûº @€ @€ @€· üð'%k³Ó“$L{Ïî\´Ó;o @€†„@aH¬Â" @€Ð{Õíþv÷O…ì¿] @€†’@ö7û®›}çu @€ @€ @€ @€T& À±2/Ñ @€ôPà “Ãw'~1R¿o3èF€ @`` dßm³ï¸Ùw] @€ @€ @€ @€@åõ•wу @€=ÈNµùç –mº5\¹qeX—nìY"½ @€ô£@vjãœÑÓÃÌQ'‡ú¤®gbh @€ @€ @€ @€ÀàPà8¸÷Ïì  @€ :ìÀ›GŸÎuR¸sëƒá®øxdû³aMq]Øœntë1a @€¡/ДŒ“ ãÃa û‡ãGNqdhLüßëCç­ @€ @€ @€¨µ€ßÀ¨µ°ü @€ìR 1i§Œ<ºã±Ë7  @€ @€ @€ @€ @€Ò…!½:‹#@€ @€ @€ @€ @€ @€ @€¤€Ç¹-&E€ @€ @€ @€ @€ @€ @€†¶@ýÐ^žÕ @€¡'pò—’ª.ê¢/V5d @€ @€ @€ @€ 0ªý»f`I¦@€ 08ÁMµ$ P‰ÀÏï|<œv”Jº ØØl-·Üýä€__OìÔã²·}ÞGãÙÛ>‚î‡aìm? ÷Ñö¶ ûa{Ûè}4¤½í#è~ÆÞöz ioûº†±·ý€ÞGCÚÛ>‚î‡aìm? ÷Ñö¶ ûa{Ûè}4¤½í#è~ÆÞöz ioûº†±·ý€ÞGCÚÛ>‚î‡aìm? ÷Ñö¶  C€ @ LB™q @€ @€ @€ @€ @€ @€ @€Up‚cÕ(%"@€ƒS ;ñp žz8”þZZütØÛþPï›1ímß8÷Ç(ö¶?ÔûfL{Û7Îý1нíõ¾ÓÞö³Q @€ @€ @€ @€ÞNpÞûoõ @€ @€ @€ @€ @€ @€ @ _8ö »A  @€ @€ @€ @€ @€ @€ @€ÀðPà8¼÷ßê  @€ @€ @€ @€ @€ @€ @€@¿(pìvƒ @€ @€ @€ @€ @€ @€ @€á- Àqxï¿Õ @€ @€ @€ @€ @€ @€ @€~PàØ/ì%@€ @€ @€ @€ @€ @€ @€Ã[@ãðÞ«'@€ @€ @€ @€ @€ @€ @€ý" À±_Ø J€ @€ @€ @€ @€ @€ @€†·€Çá½ÿVO€ @€ @€ @€ @€ @€ @€úE@c¿°” @€ @€ @€ @€ @€ @€ oúá½|«'@€èK6Þþ~ÍÊzòÆ©á€0µìxý'`oûϾÖ#ÛÛZ ÷_~{ÛöµÙÞÖZX~ @€ @€ @€ @€è­€{+¨? @€ @€ @€ @€ @€ @€ P±€ÇŠÉt @€ @€ @€ @€ @€ @€ @€z+ À±·‚ú @€ @€ @€ @€ @€ @€ @€ $÷Ðè±Àû?ö/i;ÃŽ§wP8í„)½^ùkÅá ¯|'¬n¥×¹zš`ò阮îŸÚÓîC®ßÓGý!<óÎ? ‰uÙÛ·ÑÞîì1”ÞÙÛ¡´›;¯ÅÞîì1”ÞÙÛ¡´›;¯¥Zß“wÎê @€ @€ @€ÀPøÐ‰‡ù]ñº©'_øý¯¦izÑ^EÓJ’ä¢[/ÿì×*ê$˜ ÐIÀ Ž0¼$@€†ž@ü?Â××]Ú¯ÅCOÕŠ @€ @€ @€ @€ @€ @€@ïê{ŸB @ /nýfu¼ù‰Óûrú}>ÖO¶ü2üfûc}>n×³Ó ‡Ê‰…]×6ÜßÛÛ¡û`oííк+óß­½º!üùn g57”—hm @€au’@IDAT @€ @€ @€ C'8ÃM·d 0\ÚÓb¸tÃO‡Ër­“ @` dßk³ï·. @€ @€ @€ @€ %ŽCi7­…ØIàÞm†5ŵ;Ýó† @€À`Ⱦ×fßo] @€ @€ @€ @€J ‡ÒnZ  °“À[Øé½7 @€³€ï·ƒy÷Ì @€ @€ @€ @€] (pÜ•Š{ @€ÀxpûÓCbA€ @ xÈ÷[? @€ @€ @€ @€ 1ŽClC-‡xKàùö—ßzã @€A.°Ê÷ÛA¾ƒ¦O€ @€ @€ @€ @€@WŽ]E¼'@€†ŒÀ¦tëY‹… @€ðýÖÏ @€ @€ @€ @€CM@ãPÛQë!@€ÞHCúæk/ @€ vßoûš? @€ @€ @€ @€]8vñž @€ @€ @€ @€ @€ @€¨¹€Çš€ @€ @€ @€ @€ @€ @€è* À±«ˆ÷ @€ @€ @€ @€ @€ @€ @€@Í8ÖœØ @€ @€ @€ @€ @€ @€ @€@WŽ]E¼'@€ @€ @€ @€ @€ @€ @€j. À±æÄ @€ @€ @€ @€ @€ @€ @€º (pì*â= @€ @€ @€ @€ @€ @€ PsŽ5'6 @€ @€ @€ @€ @€ @€ ÐU@cWï  @€ @€ @€ @€ @€ @€ @€š (p¬9± @€ @€ @€ @€ @€ @€ @€® »ŠxO€ @€ @€ @€ @€ @€ @€Ô\@c͉ @€ @€ @€ @€ @€ @€ @€tPàØUÄ{ @€ @€ @€ @€ @€ @€ @ æ kNl @€ @€ @€ @€ @€ @€ @ «@}×Þ @€µHÓôk¥²ŸvL8õà}ÂôR1Ú @€ @€ @€ @€ @€ @€CA@ãPØEk @€A#pûŸ¿¨Ôdoûæ÷¾Ž¥Œ´ @€ @€ @€ @€ @€ @€CC@ãÐØG« @€ @€ °“ÀÏï||§÷=ysÚ SzÒM @€ @€ @€ @€”% À±,&A @€ @€—À-w?Ùë +pì5¡ @€ @€ @€ @€”(”hÓD€ @€ @€ @€ @€ @€ @€¨‰€Çš°JJ€ @€ @€ @€ @€ @€ @€”PàXJG @€ @€ @€ @€ @€ @€ PŽ5a•” @€ @€ @€ @€ @€ @€(% À±”Ž6 @€ @€ @€ @€ @€ @€ @ & kÂ*) @€ @€ @€ @€ @€ @€ PJ@c)m @€ @€ @€ @€ @€þöîØ®º>øïwßËE+@Q±€ÕQa;Š´ $á‘ IØ×µuÔÝÙî¬++Ö.:vÚ±:ît§(! áIA¥´…Åîh:"hÛ\ѶÒ Sþ,…¼{öôHÞ{7çÞsß9÷Ý{îçf2çÞs~çûû}?¿÷•Kä›C€ @€jÐàX «  @€ @€ @€ @€ @€ @€ @€@‘€Ç"× @€ @€ @€ @€ @€ @€ @€Z48ÖÂ*( @€ @€ @€ @€ @€ @€ P$ Á±HÇ5 @€ @€ @€ @€ @€ @€ @  ޵° J€ @€ @€ @€ @€ @€ @€ hp,Òq @€ @€ @€ @€ @€ @€¨E@ƒc-¬‚ @€ @€ @€ @€ @€ @€ @€E‹t\#@€ @€ @€ @€ @€ @€ @€jÐàX «  @€ @€ @€ @€ @€ @€ @€@‘€Ç"× @€ @€ @€ @€ @€ @€ @€Z48ÖÂ*( @€ @€ @€ @€ @€ @€ P$ Á±HÇ5 @€ @€ @€ @€ @€ @€ @  ޵° J€ @€ @€ @€ @€ @€ @€ hp,Òq @€ @€ @€ @€ @€ @€¨E@ƒc-¬‚ @€ @€ @€ @€ @€ @€ @€E‹t\#@€ @€ @€ @€ @€ @€ @€jÐàX «  @€ @€ @€ @€ @€ @€ @€@‘€Ç"× @€ @€ @€ @€ @€ @€ @€Z48ÖÂ*( @€ @€ @€ @€ @€ @€ P$ Á±HÇ5 @€ @€ @€ @€ @€ @€ @  ޵° J€ @€ @€ @€ @€ @€ @€ hp,Òq @€ @€ @€ @€ @€ @€¨E@ƒc-¬‚ @€ @€ @€ @€ @€ @€ @€E‹t\#@€ @€ @€ @€ @€ @€ @€jÐàX «  @€À0ćaÖ@€ @ ßo+a„ @€ @€ @€ @€!Ðà8D›a) @€@µ+â²jŠF€ @`|¿]D|S @€ @€ @€ @€ P‹€ÇZX%@€†Aàĉã†aÖ@€ @ “|¿­ÄQ @€ @€ @€ @€†G@ƒãðì…• @€ œ±ääŠ# G€ @`ñN÷ývñðÍL€ @€ @€ @€ @€@-ka”s–9 ˰ @€•ø~[ £  @€ @€ @€ @€ ‘€Ç!Ú K!@€ª8kéiá„Ö±Õ @€À"dßk³ï·^ @€ @€ @€ @€h’€Ç&í¦\ @€Y±6uÁ¬s> @€Eì{möýÖ‹ @€ @€ @€ @€Mð_E5i7åB€Ì¸pùÙáÕK^6ï¼ @€FE û>›}¯õ"@€ @€ @€ @€ @€@Ó486mGåC€Ìˆ1†«Ù^2±rÖy @€Œ‚@ö=6û>›}¯õ"@€ @€ @€ @€ @€@Ó486mGåC€Ì8ºud¸ö˜wjrœ'ã @€À0 dÍÙ÷Øìû¬ @€ @€ @€ @€š( Á±‰»*' @`žÀªÉãçV¾/¼zÉËæ]s‚ @€À° dß[³ï¯Ù÷X/ @€ @€ @€ @€4U`²©‰É‹ 0W {òÍÇŽ}w¸cß=aÛÓw†=í'æñ™ @€À¢ œÐ:6l:ê‚páò³CŒqQ×br @€ @€ @€ @€Ô- Á±nañ  @€À!çnüô5‡|œ÷öCŸ çzâ¼Ó³N\uѬ>”Èþ#ñ‹ŽxmxÓòׄûž{0ìÞxàÀCá‘™ÇÂÞdHÒ_^ @€!C +â²pâÄqáŒ%'‡s–ÎZzZ˜ˆ­ALo @€ @€ @€ @€,º€ÇEß @€qH›ë>\”ïÝBö»èuÕEðŠ|z½–ýGãg/ûÅç÷zq @€ @€ @€ @€ @€ @€@uþ:øê,E"@€ @€ @€ @€ @€ @€ @€zÐàØ#”a @€ @€ @€ @€ @€ @€ @€@u«³‰ @€ @€ @€ @€ @€ @€èQ@ƒcP† @€ @€ @€ @€ @€ @€ @€Õ hp¬ÎR$ @€ @€ @€ @€ @€ @€ @ G Ž=BF€ @€ @€ @€ @€ @€ @€T' Á±:K‘ @€ @€ @€ @€ @€ @€ @€48öe @€ @€ @€ @€ @€ @€ P€Çê,E"@€ @€ @€ @€ @€ @€ @€zÐàØ#”a @€ @€ @€ @€ @€ @€ @€@u«³‰ @€ @€ @€ @€ @€ @€èQ@ƒcP† @€ @€ @€ @€ @€ @€ @€Õ hp¬ÎR$ @€ @€ @€ @€ @€ @€ @ G Ž=BF€ @€ @€ @€ @€ @€ @€T' Á±:K‘ @€ @€ @€ @€ @€ @€ @€48öe @€ @€ @€ @€ @€ @€ P€Çê,E"@€ @€ @€ @€ @€ @€ @€zÐàØ#”a @€ @€ @€ @€ @€ @€ @€@u“Õ…‰ @€hšÀLÒ÷=÷`ؽÿþðÀ‡Â#3…½Éþ¤¿¼X˜@ 1¬ˆËÂIÇ…Ó—œÎYvf8kéia"ú;É&ën @€ @€ @€ @€FE@ƒã¨ì”u @€ @€ $IîØwOØöôaOû‰Îl*ã#5 ?“ì üÑó¿o{vw8¡ulØtÔáÂåg‡ãø`È” @€ @€ @€ @€±Ðà8–Û.i @€ px§ÚÏ„>¹-|ûÀ?ÈjÈŠÿ©/„;ŸýV¸ú˜MáèÖ‘µÌ#( @€ @€ @€ @€†A@ƒã0ì‚5 @€ @€!øÇƒ†>y]øñÌãC²"Ë 0žYƒñ{ÿ£pí1ï «&ï á¼×þB_÷¹‰ @€ @€ @€ @€ƒÐà8(ió @€ @€!Ȟܨ¹qÈ7ÉòÆJ k4ÎjòS+ß×דÏýKÇÊK² @€ @€ @€ @€Œž@kô–lÅ @€ @€@ÕI’„>¹Í“«†À²&Ǭ6³õ"@€ @€ @€ @€ @€@Ó486mGåC€ @€ú¸cß=áÛ~ÐÇn!@ n¬6³õ"@€ @€ @€ @€ @€@Ó486mGåC€ @€J Ì$í°íé;KÞe8ƒÈj4«U/ @€ @€ @€ @€4I@ƒc“vS. @€ @ ûž{0ìi?ÑÇn!@`PYfµêE€ @€ @€ @€ @€& hplÒnÊ… @€ô!°{ÿý}Üå- V-n> @€ @€ @€ @€êÐàX·°ø @€ @`È8ðЯÐòȾ«Vý  @€ @€ @€ @€ Ð0 Ž ÛPé @€ @€²Ì&b+l:ê‚>ît ƒÈj4«U/ @€ @€ @€ @€4IÀդݔ  @€èSàÂåg‡W/yYŸw»:²ÚÌjÔ‹ @€ @€ @€ @€MÐàØ´• @€èC Æ®>fSxÉÄÊ>îv u d5™ÕfV£^ @€ @€ @€ @€hš€Ǧí¨| @€ @€@ŸG·Ž ×óNMŽ}ú¹@ÕYscV“Ymz @€ @€ @€ @€ @ ‰›¸«r"@€ @€} ¬š<>|jåû«—¼¬Ïn#@  ¬³ZÌjÒ‹ @€ @€ @€ @€M˜ljbò"@€ @€úÈž÷±cßîØwOØöôaOû‰þ¹‹Ò'´Ž ›Žº \¸üìc,}¿ @€ @€ @€ @€ 0JGi·¬• @€ H k¬ºèˆ×†7-M¸ï¹Ãîý÷‡<™y,ìMö‡$ýåE€ÀÂbˆaE\Nœ8.œ±ääpβ3ÃYKO ±µ°Àî&@€ @€ @€ @€ @€Àˆhp‘²L @€ °Y£ÕÙË~ñùß‹1¿9  @€ @€ @€ @€ @€æ øëà›»·2#@€ @€ @€ @€ @€ @€ @€C+ Áqh·ÆÂ @€ @€ @€ @€ @€ @€ @€@s486woeF€ @€ @€ @€ @€ @€ @€†V@ƒãÐn… @€ @€ @€ @€ @€ @€ @€æ hplîÞÊŒ @€ @€ @€ @€ @€ @€ ­€Ç¡Ý #@€ @€ @€ @€ @€ @€ @€ÍÐàØÜ½• @€ @€ @€ @€ @€ @€Z ŽC»5F€ @€ @€ @€ @€ @€ @€š+ Á±¹{+3 @€ @€ @€ @€ @€ @€ 0´‡vk,Œ @€ @€ @€ @€ @€ @€4W@ƒcs÷Vf @€ @€ @€ @€ @€ @€ @`h48íÖX @€ @€ @€ @€ @€ @€h®€Çæî­Ì @€ @€ @€ @€ @€ @€ @€ÀÐ hpÚ­±0 @€ @€ @€ @€ @€ @€ Ð\ ŽÍÝ[™ @€ @€ @€ @€ @€ @€ @€¡Ðà8´[ca @€ @€ @€ @€ @€ @€ @ ¹“ÍMMf @€@/o<õÎ^†C€ @€Àˆ \â‚W|Í'î _»ñ= ´à•@€ @€ @€ @€ @€@<Á±‰»*' @€ @€ @€ @€ @€ @€ 0ä‡|ƒ, @€ @€ @€ @€ @€ @€4Q@ƒcwUN @€ @€ @€ @€ @€ @€ @`È48ùY @€ @€ @€ @€ @€ @€h¢€Ç& @€ @€ @€ @€ @€ @€ @€À Lùú,4JàÜŸ¾¦(¡}6œwê‰E#B¸ê¢âë® @€ @€ @€ @€ @€ @€FA@ƒã(ì’5 @€@cbŒ.Jæî¿ !û]ôºê¢¤è²k @€ @€ @€ @€ @€ @€‘hÄ*-’ @€ @€ @€ @€ @€ @€h”€ÇFm§d @€ @€ @€ @€ @€ @€ @€Àhhp}²J @€ @€ @€ @€ @€ @€ Ð( ŽÚNÉ @€ @€ @€ @€ @€ @€ @€ÑÐà8ûd• @€ @€ @€ @€ @€ @€ @ Qµ’!@€ @€ @€ @€ @€ @€ @€£! Áq4öÉ*  @€ @€ @€ @€ @€ @€ @€@£486j;%C€ @€ @€ @€ @€ @€ @€FC@ƒãhì“U @€ @€ @€ @€ @€ @€ @€F hplÔvJ† @€ @€ @€ @€ @€ @€Œ†€ÇÑØ'«$@€ @€ @€ @€ @€ @€ @€ÐàØ¨í”  @€ @€ @€ @€ @€ @€  Ž£±OVI€ @€ @€ @€ @€ @€ @€% Á±QÛ) @€ @€ @€ @€ @€ @€ 0GcŸ¬’ @€ @€ @€ @€ @€ @€4J@ƒc£¶S2 @€ @€ @€ @€ @€ @€ @`4&Gc™VI€Ô%ðA]¡Å%@€ @`QîZðì×¼ÿWCxÿ÷’€ @€ @€ @`ào:çqà“š @€@IOp, f8 @€ @€ @€ @€ @€ @€ °p Ž 7 @€ @€ @€ @€ @€ @€() Á±$˜á @€ @€ @€ @€ @€ @€ @€ÀÂ48.ÜP @€ @€ @€ @€ @€ @€ @ ¤ÀdÉñ† @€ @€ 0ç½öF`•–H€ @€ @€ @€ @€À8 hpçÝ—; @€ @€@cÎýK››Ä @€ @€ @€ @€h†@«iÈ‚ @€ @€ @€ @€ @€ @€% Ž£´[ÖJ€ @€ @€ @€ @€ @€ @€" Á±!)  @€ @€ @€ @€ @€ @€ 0JGi·¬• @€ @€ @€ @€ @€ @€4D@ƒcC6R @€ @€Èt @IDAT @€ @€ @€ @€ @`”48ŽÒnY+ @€ @€ @€ @€ @€ @€hˆ€džl¤4 @€ @€ @€ @€ @€ @€ @€À( LŽÒbë^ëº [Þ•ÎñË…ó$ÉwÜ|ÃÇ Ç¸H€@GK/¿òŠVL~¥ãÅŸž|®®ýÒôÖiúµõ¯üd’·ÎÍ3™ WìœÞº{îyŸ  @€ @€ @€ @€ @€ @€Œ¢€ÇY»/Š1¬Ÿuj™žÒà8×Åg=L´’óCˆÿ¶hè’Ö̧×ǶÁq݆wœ$ÉoÆô5Ï)&Ëçs‚ @€ @€ @€ @€ @€ @€Àˆ hpѳlš'°fÍšÉ'>C‡æÆæ¥+# @€ @€ @€ @€ @€ @€1Ðà8æ?Ò'@`xVòGÒæÆ×ÏŠ¬„@³f’v¸ï¹Ãîý÷‡<™y,ìMö‡$ýåE€@gôŸSaE\Nš8.œ¾ääpβ3ÃYKO ±Õù† ÎªÕ …;ŨÕYÍæŽzX¬zU«½íQrµšK8nµ:ÜûcurŪÕl~߃ó]p$Л@^¯'¦|Æ€þ<8[™ZímŒ" ,V­æó«Ù\‘@o‹U³jµ·ý1Š@.°XµšÏïH€ @`48Žã®Ë™¡X;µéMiÏȺ…Y $IîØwOØöôaOû‰f(%õ d ÀÏ$ûƒôüïÛžÝNh6uA¸pùÙé3ˆce“«ÕÊ(CAÖjΫfs Gå]¯jµÜþM P«¹„#áP«Ã½?VG t­fóúœë;('×ë÷Ó?Î~×ùçÁÙÊÔj¹ý1š@.0èZýÙ¼þ?ל‘@)A׬¾–Úƒ üL`еú³‰½!@€ 0ÆÇxó¥N€Àp¬_å+ÃD²=¤m!ñ"« Ð\§ÚÏ„>¹-|ûÀš›¤Ì X kþý§¾î|ö[áêc6…£[G.xjuÁ„˜'PG­æ“¨Ù\‘@5uÕ«Z­fD! ¨Õ\‘Àp ¨ÕáÞ«# ÔU«Y|߃seGÕÔU¯jµšý…@.PW­æñÕl.áH ºjV­V³?¢ÈêªÕ<¾# @€qh;€ü  °˜oÚòóÉd¸=mn£šÍ% Ô'PE½ªÕúöGd¹€ZÍ% ·€Zîý±:¹@µšÅò=8u$PŸ@õªVëÛ‘ äUÔjKÍæŽê¨¢fÕj}û#2\ ŠZÍc9 @€ ð Ž~ °—lܸjùÄŠ¿ 1œ¹Ó›’ÀX $I>úä6Ícµë’]Lìò³šËj¯ÌK­–Ñ2–ÀÂú­Õ|f5›K8¨_`!õªVëß3ÈÔj.áH`¸ÔêpïÕÈR«Y ߃sIGõ ,¤^Õjýûc¹ÀBj5¡fs Gõ ,¤fÕjýûc¹ÀBj5áH€ @€À _°ðŽX»aË+&ÂÒÿC8m š„À˜ ܱïžðí?sé¬@VsYí•y©Õ2ZƨF ŸZÍgV³¹„#Áô[¯ju0ûc¹€ZÍ% ·€Zîý±:¹@¿µšÝï{p®èH`0ýÖ«ZÌþ˜…@.Ðo­æ÷«Ù\‘À`ú­Yµ:˜ý1 \ ßZÍïw$@€ @à Ž/XxG€ÚÖMm¹¬Ã=1ÄSjŸÌ„™¤¶=}' A «½¬{y©Õ^”Œ!P@™ZÍW fs Gƒ([¯ju°ûc6¹€ZÍ% ·€Zîý±:¹@ÙZÍîó=8×s$0X²õªV»?f# ”­Õü>5›K8¬@ÙšU«ƒÝ³ÈÊÖj~Ÿ# @€³48Îöð‰µ¬^½zIúäÆ?Œq:„xt-“J€À<ûž{0ìi?1ï¼Ô/Õ^Vƒ½¼Ôj/JƨG L­æ+P³¹„#Á ”­Wµ:Øý1\@­æŽ†[@­÷þX\ l­f÷ùœë9¬@ÙzU«ƒÝ³ÈÊÖj~ŸšÍ% V lͪÕÁîÙäek5¿Ï‘ @€Ùg{øD€Ê.Ù¸qÕ)/ÿ¥¿lÅøÊƒ H€@¡Àîý÷^w‘zz­Á^ÇÕ»ZÑ Œ¯@Ù,;~|eeN z2õWflõ+‘Àx ”©¿2cÇ[Uöª(SeÆV¿R Œ·@Ùú+;~¼ueO Z2õWflµ«~ꯟ{H P@™ú+3¶šÕ‰B€@. þr G @€ý hpìßÎè*°vjÓEaé·b ¯ï:Ø*xàÀC•ÇÞ¾Ûc ªÕÞM$P‡@¯µšÏ­fs Gƒ(SeÆ>3h¶@™ú+3¶Ùj²#0x2õWfìà31#f ”­¿²ã›­';ƒ(SeÆ6 ³h¾@?õ×Ï=Í—”!Á”©¿2c³z³õ7>{-S @€ú&ë -2ÆWàmo»â_,=ré5I’üf i{£‹"ðÈÌc‹2¯I ø‰ÀÃ=Ö ZõC`qz­Õ|•j6—p$0x2õWfìà31#f ”©¿2c›­&;ƒ(SeÆ>3h¶@Ùú+;¾Ùz²#0X2õWfì`³0æ ôSýÜÓ|IŒ@™ú+3v0«7 ñPã³×2%@€¨O@ƒc}¶" 0†kÖ¬Y¾òøUï±õÁ4ýE½cøS åaØ›ì¦åX ±èµ{7v€&0 ²5Xvü€Ò0 ±(SeÆŽž$ P Lý•;ÀLE`,ÊÔ_™±c'I([eÇ0Sh¼@™ú+3¶ñp$0`~ꯟ{œ–é4V Lý•ÛX0‰X$õ·Hð¦%@€h”€ÇFm§dXD¸nã–+Òù¯MŸØxJ™u$IøZÉ÷ÒfÈSæ>c è.„¤û #¨M ×ìu\m ˜À˜ ”­Á²ãÇœWú*(SeÆVºHÁHÿM´÷-3-Õ ”©¿2c«]¥h”­¿²ã  P@™ú+3¶ºŠD€@&ÐOýõsmª(SeÆV³:QÈÔ_.áH€ @  ŽýÛ¹“Ï ¬Ú|^k"~<„øšò$Éuÿ·ýìoœøíËßí @€ @€ @€ @€ @€ @€£* ÁqTwn8ÖµcõþבÇšÇqö©Æ]¿øâ'µ&Zw—ž"IfÚ1þ§7mýƒìÞÓ/ßR:„ @€ @€ @€ @€ @€ @€Œ²€ÇQÞ½Á¬=^¼~óê%“á-Ih½"}ÂÜñ1$'„$Ÿö6¾8 ñ`ºŒGcLö¤ï÷¤íŽ?±½;<—|eÇŽm f‰fIìÓ"ýLNN´ÊN$ÉI;\µsúúÝeï5ž @€ @€ @€ @€ @€ @€@S486e'+ÌcÍš5“Çž°êââ[CŒoIÿwB>{ àO^é»ç?¤#B˜HÏ­JO¬úé©ôcëªdI’¬ßxå}IH¾”6smß¹ý†~zó‚ë.¿òCiCå•EÒfË»vl¿þß©úÚÚ [.I;Ý>^7ƯÜrÓõ(ÓãÅaß§Ó·a’Ðþ½gŸú§ß¹ýöÛ÷[òò%@€ @€ @€ @€ @€ @€ p¨€ÇC5¼k7l^—¶-þnŒñ áHïÏúW§±V§o¯^·áÊÏ|vÿ»õÖ›^HÜìÞv’üÙD+~´0NV]zé¥ÿy×®]OŽ«ðbÚÜø®´!ôåE!ÓµßUt½×k£°O½æ2Fãîm'3Wíܾí¯Ç(g© @€ @€ @€ @€ @€ @€8¬@Ú“åE „K6n:wý†-_kÅÖ- mnìàÙJÛ¯š}ðÜ:SM›WÄV¼zeëˆo¬[÷öS2W;†Ït½?†M]ÇT4 .ÒÆÐÂ'¢Æ|îÞ{ï=Ð¸OýæÚŒû’§Ò¦Öÿzà™ÇOÛyó ÿ3LOÏ4#/Y @€ @€ @€ @€ @€ @€¨F@ƒc5Ž#eõêÕKÖmØ|}Öt8ÈÒFÀ3â’%µnæ³úwfïs7$Ir°ðþ$¼á’WŽ©èbÚ¼¹¥[¨ƒ3íîM™‚Œò>uHg N%Ï$IøÃçž9ð²7m½ö¶ÛnÛ;IK‘ @€ @€ @€ @€ @€ @€@i Ž¥Éšqç¦^tÊi¯üÓ[]ójÉ8†—ÄÐúêúË7¿¥Ÿø·ÞzãžÃWŠîM)c+,}{ј*®­Ý°ùŒâkºÄº÷‹ÓŸûÛ.cæ]õ}š—PƒO¤OküaH’ßÚ÷ÏWíØ~ý¸í¶ÿ©ÁéJ @€ @€ @€ @€ @€ @€À‚&A€‘xåÔÔÒ£[+þ<†pv™Å'!<Cò§éÓéî íðèÌD{Oc"´'Nl…öIiÃá¿ ¡uqú4ÃczŠãQiÌ/®Úò†Ó[w÷tÏ!ƒÒu\—Îuñ!§æ½IØ”žü½y*<“¸)ͽðÕn—zcSö©¦ÛígžžhuÙŽdÿ®0==Ó€”¤@€ @€ @€ @€ @€ @€ @` Â<\“¼¢uÄÇÒÆÀž›ÓFÂ]¶ÿû­·ÜðÍn™¬^½zɪ—¿ò!þVÚô·¦Ûøô!‹“a"¹ñ×ýí¿üå/þ‰nã½þÐ÷¿ó•ô)”Æ?ôü¡ïÓø¿´væWíܾí¯=_áûtŠç›(2õÛ¿7ÙwãaæBSöé0é5æô®]»žL“¹¥1 I„ @€ @€ @€ @€ @€ @€À€48zX¦Y7ueö„Åô´ž$üm^µcûçîéi|:èÞ{ï=þ¾=}{ûÚ Wni…ðñ´ÑñÅE÷§ Ч,û¹É?IǬ+7÷Z6×É/å¶4Ÿÿ8÷ÚìÏ­w¤Ÿkip\;µù !Æ“gÏ7ûSŒÉ~|öÙâOMÚ§âL]%@€ @€ @€ @€ @€ @€ @`\Òþ3¯q¸dãÆU±>ÓS¾I²ã±öÞsË47λsûõ[“3¯J’ä¹×æ~N›×®½|ó{çžïþ9éšOúCþö4N-?ë­‰¸¥ÛÛ3í®k<4F3÷éÐ ½'@€ @€ @€ @€ @€ @€ @€@MM_`‡S`"Yö‰ôiŠ+»­.IÂß²}ëewOO?Ýml·ë;vl{äà³Î!ùN·±­V¼öÍo~ÇÑÝÆz}çöþOºÞâ'LÆøóé“×z_ïÏ™š:"sYa¬$ùÑΰÿÏ Ç̹ØÄ}š“¢ @€ @€ @€ @€ @€ @€¨ç©v\‡O`ýú·Ÿc¸¸ÛÊ’ÜõøžfORLºíõú­·Þ¸§½?ùÕ4àžâ{âÑ+ŽžxWñ˜ùWcèþTÊØŠ›æß¹°3'N‘zÆÂ†Ìv7„éé™^gjò>õj` @€ @€ @€ @€ @€ @€ã!Ð4e™LNþûT¡p¿Óć÷ÿóÁËî¾ûîƒU‹íÜyã±=óžnq“Þ¦¦&º;ôúÌþÿwcúyß¡çæ¾O›;ׯY³fùÜó û7÷pÿgzó³!MÞ§Ÿ%é  @€ @€ @€ @€ @€ @€R†7B͸pjjeú¤ÁÝ-›¤Ý¾æË_þüÝÆõ{ý–›·íJŸ ygÑý1ÄS.+Ö™{m×®]O¦œÜ9÷üìÏñècOXõ¶Ùçúÿ´víæãC’\X!I¾¾sûÖïŽ9äbÓ÷éT½%@€ @€ @€ @€ @€ @€ @€€Çqø8râˆ÷¤O0\Q”kúôÆïîLö]W4¦ŠkI;|¢[œV+y_·1ó®·C×µÇ7Í»¯Ï­¥ñŠ4ÞdÑí©i©§7ŽÅ>¹F€ @€ @€ @€ @€ @€ @€ÀX x‚ãlw am·4Ó§7~:LOÏt·Ðë;¦¯ÿR’GŠâÄ$œ³fjꨢ1s¯ÝróÖ»Ò'*>4÷ü¡ŸÓ§C¾yíÚ-Çz®ï÷1n.º7IÂÞgŸš¹¹hÌÜkã°Ossö™ @€ @€ @€ @€ @€ @€ñÐàØð½ÿµ_Û|dÚl÷êniLâ®nc*ºÞNùî,ŒãÄ1K_W8fþÅvšçõóOÏ:³$,I¦féãÃ%Sï83½muñ­É-·ßþ¹§ŠÇ¼puŒöé…¤½#@€ @€ @€ @€ @€ @€ @`¬486|û|Qx]Œq²Kš÷Þ6½µðé‡]î/u9}‚cqƒcm"iý«RAÓÁqæàg“ôUt_Œ­ME×{¹61ÑCŒ$|¦—Xù˜qÚ§uö¹RŸbHº48&Éßï¸yëÝe¢ŽÓ>•q1– @€ @€ @€ @€ @€ @€æ hplîÞ>ŸYL¯tK1‰ñûÝÆTyýÇí}?èö¤Å$ÄsÒ9Kÿ|¶Cñ“Ó§YÆ%±õŽ~ó¹tã¦ób «ŠîObØš^/|’äÜûÇmŸææï3 @€ @€ @€ @€ @€ @€ã'09~)WÆi³ÝY±KÊ­öÌ@wOO?»n㕤Ë:épKK›î⋯xñ­·Þ¸çpc:?¸÷ñ/,Y±ò“Ùý®?®²Çß9ìõ‚ ­¤µ%€f›íøÜg Bt¼4nûÔÁIc"þÏÄGŠR=ÿUá¼SO kŠÆ¸F€ @€ @€ @€ @€ @€h‚€Ç&ìâásHûüâÊÃ_þé•V뺴áðtWå€$9.¤«+zM1™­½Tƒãm·Ý¶w݆-ÛCˆï:\ìtÖÓ/ºrõ®éëï=ܘNçÏ™š:"]òúN×9w÷®›nú‡C>÷òvìö©c4UàëÿŸ½{—»ª~ÎÌÍS*ˆ >Àúì®Ek+vMW-’/ ܬ¯Utkk·ÛÇg}ûZ÷£ÕXµ]ż0aʽ@Ä(jM«èÚ6¶V±Õâ ˆEQAy&$÷ö?ÔÜ;ÿyßy|§3ó?¿sÎï|Ï= ±ùq¶_°±ÞÚ®{ïÞ‘R/¯£ @€ @€ @€ @€ @€ 0  ‡açXÃÊ•+Ê›Js4ô8.«_jxPh§>7V§ÉbZÖÊt) ‰å0gcuÌR)¬Ëßš*p<¾´øì¼pòȺ9ÅtiÝöYGuŸf¡ðˆ @€è À¾~rÛ£}÷Y7´=† @€ @€ @€ @€ 0—€ǹd†àùÌ‚G.+ð:Ê3¡¥ÇéÊæ/­^3ùÍü†ÈŸ›kùyAçÚ01ñÛ¡R™™+æÐçùm˜ë}öðïéÎý÷ü䊇?+þ6ªûT,#‚ @€hGàÄë8¶ã§/ @€ @€ @€ @€Ýhàv¿î'a†î”Cé˜îŒÜ£Q˱åü³êߤÃñ+KK_ÜèJV¬8÷¸<öôzñ)…ËwîÜyo½˜ÙÚFyŸfóðŒ @€ @€ @€ @€ @€ @€ÑPà8Äû\Ž©¥û…$e­ÝàXÍ?ÞŸm)Õ½±ÒºF×:¶dìÜcÝOgbV¿¨rŽÉFyŸæ ñ˜ @€ @€ @€ @€ @€ @€Pà8Ä›œß(øÈ^^©´°Õü§¦¶Þšbüd½þ1„UguÖÒz1µ•Ö?ôy–O)}óªí[¿8KKá£QÞ§B @€ @€ @€ @€ @€ @€ ­€Ç¡ÝÚüÃîèåeÙýíä³™ú7*ÆxÄ‚G,[Y4ÇêÕž™[þB½¸”â¦zíõÚF}ŸêÙh#@€ @€ @€ @€ @€ @€ @`xƆwiV–bv{ åº)„ 3éwëÍSc)›ùj;SÿsÚ·óçÂ’Å5ç8)œŸ·]6g{Þ¥u¥ß÷8×+¥™ûSØØ÷>¶k×®}{ÖüûÔ¼™ @€ @€ @€ @€ @€ @€ ¸€ÇßÀâôÓ—‹bb©ü’¢˜An¿;»ï²üÇ9‹cõµ ¼òÐ5û˜ÇŸB<òÐ矙 õ‹'®ûÙ>ÕåÑH€ @€ @€ @€ @€ @€ @€ÀÐ (pº-}ø‚RˆŸø“ÿÅN?üéð<ùT¥r{~ãUõV”bXuX{,wسƒ¤ðµ++›öü¨ÕÏö©U9ý @€ @€ @€ @€ @€ @€T±AM\Þ ÌìÏ>_ZPPÇóW¬X{ÂÕWoÿׯFm/jåÊ•G—yQ é»1”n YvÓþ”n¼ó‡·Ü´{÷î½í>{ïl&»´T.Ÿ3{k~OcËÏ<ó¼e×\sÙÕ˜—¿üü#CŒ¿6Wü¿=O­ßÞx«}jÜJ$ @€ @€ @€ @€ @€ @€Ã! Àq8öqÎUÜùãïþñÇxw^¬wÄœAyÃØÒ…ÿ9ÛX/¦Sm¥…GþF^R¸.VË «¯R),Èߎ9îÄ4¾vÃm1¤S 7Þ}Gz͵×n¹ç˜6ÿcº²õÚñµ“·äs>~Ž¡,zdùyÛ–jû’£JãùÛâêç9^û³ûÓÖ9Úš~lŸš&Ó @€ @€ @€ @€ @€ @€(¸ÚoÀW'ý߈x gøbE^jøæ+V<²(®Ýöê͈±òÇÃ_±ú Ḽøñùù§“;UÜø³™²…͇Ïzð“¸êÁo)N<øy¶)}bzzËm³5µòÌ>µ¢¦ @€ @€ @€ @€ @€ @€ƒ, Àqw¯ÁÜSþ¼84.[|ÔÅqíE,9ªü_òÆeE£¤,ûXQL³íRviÝ>)žqÖYg-]>1‘ßv_\/6 ¡þXõ:ÏÑfŸæ€ñ˜ @€ @€ @€ @€ @€ @€¡Pà8”ÛúðEMU6O¥”¾ý𧇋¥øö³'ÎÆá-yrúéë‘ô–¢Ñò\S8í(Šk¶ýêÊÖB ŸŸ«_~}äÒ‹—±¬´øåùçEsÅ¥n»ù†ë?1W{«ÏíS«rú @€ @€ @€ @€ @€ @€ 0ˆ qךÏ9¿p0üiq·øˆr©üÿvƒaqt“ñ‘ÇÄKbˆ*î?>5µí–â¸æ# o^,…U1”ή7r^~¹uÏž=ûëÅ´ØfŸZ„Ó @€ @€ @€ @€ @€ @€ÁPà8x{ÖRÆ·f÷]šß<ø£¢Î1Ƨ[Z²uùòå‹‹b›i_½vý{Bˆè“Ídþ{q-…Ü‘Ý[ )Ý=WçüæÆWämgÎÕ^}že3­×ÞN›}jGO_ @€ @€ @€ @€ @€ @€I@ã íV¹~©R¹/eé"Ƴ=îÄÏžuÖ¹ ܶX<âø9ÞBé-Å‘!¯=LÛ®ªlûZ#±­Äì®TîN!Tæî—åEŽGÏÝöt3?ûTG^ @€ @€ @€ @€ @€ @€C%06T«éÁbòâ¸ãW¯™ü­LÕÐ7ÞpýE{öìÙßHðôå[Þ?¾fïæ|+ ãc|Á‚¥ þ憎uLW¶~º0~–€•+W]ZtÔïçóýÎ,͇=J)ÜöxÛa ~0³KÇBùU­ ›eÙ¥­ôk¦}jFK, @€ @€ @€ @€ @€ @€ƒ* À±É‹!>>Äðž&»u-ü‘O~òŸ… «Idû~úªò¢#ÿ]ˆñIEIÅŸËåkÇ×L~êÀôÖ«¯ØòwE}ªí/™˜8êȸä7óþoÉ­Žj¤Ï1YzõÔÔe75ßbàUÛ·~~|톉!œÒÌyæ¾û¬™>­ÆÚ§Våô#@€ @€ @€ @€ @€ @€ @`P8ÊNu(Ï+¯¼ò'+V¯?gÁX¸./r\ØÈ°y¡â Ä3Æ×NÞ”x~<ͤÝ3åì_c*ÿ ÞŸÝf'eiìäR)Rµh0ž•ßÚxt#c×b²”þtº²ù/jß»ÿž>šçùGÍÌcºêšk.»£™>­ÆÚ§Våô#@€ @€ @€ @€ @€ @€ @`P8ÊNu0ÏêMŒ«ÏY·&…ò¶¼qi£CçÅ'å±oŠåø¦±PÎëóo‹Jù”B¹ú¹úà÷êçÆ_)¤«ïøÁÍ¿ÛxDÞ?³9,ûƒ|¤êze3Ù¥ v(È>uÒ0Œ´@þ{W^{ŸFÚÀâ ̧@õ 6òrVQC {žÕZÎlMÂ;Þ 4s^›‰íýJÌH`¸š9~_ëogµ¿÷GvjÍœÕj¿·Öä¼è½@3çÕYíýþ˜‘@M ™³zpÿ?ך†w½hæÌúýµ·{c6 4sVîçs œvþ/ Yx{7³Lixþ^[¾–§ûÁÝôÊ/ìÙø…Ë^a7ç06 0 wÍ_Šfî†À—o½2„™_ÉÿÑø_»1~£cæÿlþá©™ûÆwïÞ} Ñ>ˆ›šÚvK¾ök+¥ïM‡}Ÿn8¾C£¾Ob4 #,°4.áÕ[:ùhô 67ÿ+’áhö 6?œjVE`~š9ÍÄÎÏjÌJ`xš9Íį˜•˜fÎ_3±ó³³^fÏ_³ñÃ+gez/ÐÌùk&¶÷+1#áhåüµÒg¸­Ž@ïš9ÍÄönf"0ÎßpîóuÛÞðŽü •wçêpUù^(nÀ}“2hB@cXÃ:µcëWîŸI¿˜~¥×kËÿM)eé¦vlzm¨Tfz=u¾, —6:oŠió|å9êûÔè‰#@€Àl-;ÛcÏè‘À žAgµGbs4zVkÝÙš„w½hæ¼:«½ß3¨ 8«5 ïú[ÀYíïý‘š@3gµÚÇ?×ä¼è½@3çÕYíýþ˜‘@M ™³ZëãÌÖ$¼è½@3gÖYíýþ˜‘@M ™³Zëã}09öÉ>åÅìEŸ¤#  @ ; »ã:0£~¼²ù{ûï½=¿É1ûÃÒݽI<ýcÊÂiS—oîêÕíEkÙw×mW…n/Š«¶§?ÚH\·bFyŸºej\FCàé N…Z%>xZƒgÐYíÓ ”ÖÈ4zVk ÎlMÂ;Þ 4s^ÕÞï Ԝ՚„wý-à¬ö÷þÈŽ@M ™³Zí㟃krÞ ô^ ™óê¬ö~ÌH &ÐÌY­õqfkÞ ô^ ™3ë¬ö~ÌH &ÐÌY­õñ>8Šçy¯7Îó˜žôN@cï¬ûv¦;wÞ;µ}ËÛîÍî{r–ÒŸæ‰îíJ²)ü0 á¿ýøû7Ÿ:]Ùü¥®ÌÑÄ »víÚ—Bº¬°KJ_œÞ±ù[…q]Õ}ê2«á r_^ôŒ!_¡åèoFÏ`£qý½ZÙ\fÏ`³ñƒ+#sý'ÐÌùk&¶ÿV*#ƒ-ÐÌùk&v°UdO ÿš9ÍÄößJeD`°š=ÍÆ¶Žì ô—@3篙ØþZ¥l ¾@+篕>ƒ/eúC ™ó×Ll¬N†GÀùž½œk%Šç’éòsÅ]6<è/ŽýµóšÍ®Jå‡y!ßÝ7“NÎRxg~oá—ó« gÚL*¯™LŸJ3iâÆ¾þ¸éí›Þ½{÷îmŽÙÁîÙ¥Eƒ¥ cŠÆèdûhîS'E€À( üÂÂSÂq¥e£´dk%Ð7Õ³W=ƒ¼œÕF”ÄèŽ@3gµ–3[“ðN ·Íž×Fîí*ÌF`øZ9«Õ>^ôVÀYí­·Ù´*ÐìY­ÎãϬ­jëG =fÏ«³Úž·ÞZhö¬ÖæqfkÞ ôV Ù3ë¬övÌF &ÐìY­õó>xŠ{¼gŠ{ n: 0ÿqþSA? œyæyË-{q,‡ÓóB¿gÅB|tˆá¨CóN)ì 1ÝžÇ|-Åô71 “í_žžÞrÛ¡±¾wVÀ>uÖÓhæS Ýúwäÿ7vc/søÌw^ÜËéz>×'ïû›ðž;ÿ¢çóšÀ¨ ü×#_^¶äfpV¦H £ÍžÕÚäÎlMÂ;Þ ´r^7¾ÿ/ÛNðº5Ÿl{ %VΪßWGé'ÄZûEÀYí—ú­œÕêˆ~o­ïª•@7Z9¯Îj7v˜ê ´rVk#:³5 ïz'ÐÊ™uV{·?f"Phå¬Öúöêý¥¿üTW¼ƒØ§ÿÁ CÞÞÁ! u¨€âÆCE|'@€#!06«´È–®¹æ²;òÎÕª‡U†œzê© N:éé:PGŽÍ„;o-ïûÉ—*•ûZžHǶìS[|: 0äg,~^øì}þaÿ·‡|¥–G þý‚§„êÙkæå¬6£%–@gZ9«µ™Ùš„w½hç¼ö&C³ Phõ¬ú}ÕÏÞ 8«½õ6VZ=«ÕùüÞÚªº~Zhõ¼:«­yëE UVÏjm>g¶&á@oZ=³ÎjoöÇ,j­žÕZïƒ)P½É1/r Š»´Š»kX Ðÿþ­ý¿G2$@€pƒcw6ûÎìžð¦Û/ ߟ¹½;•Ž/.9æ×Ñ¥G<ø¬ÑÎj£Râ´/ÐÎY­ÍîÌÖ$¼è®@;çÕ ŽÝÝ£8X ³ZÇï«kúL {Îj÷lL “ížÕj.~oí䎋ÀÜížWgun[-:)ÐîY­åâÌÖ$¼è®@»gÖYíîþ@M Ý³Z§ïnp쎲›»àª¸± ¨†$@€ƒ#PœTeJ€hM ZhõÇG¿:TÿËE/º'P=cÕ³ÖJqc5+gµ{{cd ´{Vkc9³5 ïº'ЩóÚ½ L€@U gÕï«~–t_ÀYí¾±tB gµš‡ß[;±Æ P_ çÕY­o¬•@':qVky8³5 ïº'Љ3ë¬voŒL &Љ³ZËûà Tor yAÞà® Ï2WÜØg" Ð{޽77# 0O{Ì·ÊýûO™‡ÙMI`øªg«zscõ¬µórVÛÑÓ—@±@§Îjm&g¶&á@ç:}^;Ÿ¡ ¨ tò¬ú}ÕÏî 8«Ý³52N tò¬Vóò{k'wÇX.ÐÉóê¬>ÜÖ7èäY­ååÌÖ$¼è¼@'Ϭ³Úùý1"š@'ÏjmLïƒ+ È±C{§¸±C†!@€ƒ-;}Ù @€áH·~à!†½\Õg¾óâ^N7ïs¥”§öþmØz÷gò;æ= tãJ˺#^ÎXü¼cçþxá¬úO†üûM [gµ¶Ng¶&á@û<¯ßÿ—m'tÝšO¶=† £@'Ïê¡>~_=TÄw­ 8«­ÛéI —Ý<«Õuø½µ—»i®aèæyuV‡ý§Çúz)ÐͳZ[‡3[“ðN }nžYgµýý1š@7ÏjmŽn½¿ô—ŸÚ¹¿ÌÑ­$|ÜÓÎÿà…! oðeÌOúŠçÇݬ @ üCknŠ” @`t8önïgR¾rÿ¿„/íûFø§ý7‡[g~îMûBÊÿÇ‹Ùb^½4. -ž¾àÄðË‹ž~aá)¡»w1¼³:û^xJ žÀ|œÕZ>ÎlMÂ;ƺ}^86¶¢ tû¬Î5¿ßWç’ñœÀìÎêì.žè7ù:«U¿·öÛOƒ|ú]`¾Î«³Úï?òë7ù:«5g¶&á@cóufÕÆöGšÀ|ÕÚü~WàØiÑÙÇSä8»KݧŠëòh$@€£& ÀqÔvÜz  @ ¯8öõöHŽ @€À@ t¢Àqã›ÿã@­Y² @€ @€ @€ (p|È¢ÛŸ96!¬¸± ,¡ @`4ºwÕÊhøY% @€ @€ @€ @€ @€Œ°ÀuÛÞðŽî0AcKWÜØ˜“( 0b GlÃ-— @€ @€ @€ @€ @€:+P-rŒ1^ØÙQ‡h4ÅC´™–B€:+ À±³žF#@€ @€ @€ @€ @€ @€øÂe¯ß¨Èq–WÜ8 ŠG @€@M@cMÂ; @€ @€ @€ @€ @€hC@‘ã!xŠñ•8T@ã¡"¾ @€ @€ @€ @€ @€ @€9þ Nqc‹?Aº @€ÑPà8Zûmµ @€ @€ @€ @€ @€ Ðe‘/rTÜØåŸ0à @€áPà8<{i% @€ @€ @€ @€ @€ Ð'#[䨸±O~¥A€C`l0Ò”%¬ýÀÆz+yëGËžôØz!¼æeõÛµ @€ @€ @€ @€ @€ý!P-r|áy )¥wôGF]ÎBqc— O€†O@ãðí© @€@ Äëþ»ÿ1„ê¯z¯×¼,ÕkÖF€ @€ @€ @€ @€ô‘ÀÈ9*n죟:© @€Á( Nª2%@€ @€ @€ @€ @€ @€ƒ'P-rÌ/I¸pð2o,ãêÚ®Ûö†º—@46’( @`Ô8ŽÚŽ[/ @€ @€ @€ @€ @€ô\`X‹«ÅÕµõÔ„ @€ÀPÄ¡X…E @€8íܦvSýÂ{Û¢Ýô'@€ @`^ø–ÎüW×}ì h̤H€ @€ @€ @ /<ïCSJCqÛ¡âÆ^üĘƒ ·€‡{­Ž @€ @€ @€ @€ @€úH`XnrTÜØG?TR!@€, Àq€7Oê @€ @€ @€ @€ @€ 0xƒ^䨸qð~ædL€úU@c¿îŒ¼ @€ @€ @€ @€ @€ @`hµÈQqãÐþHZ˜ŽóÂnR @€ @€ @€ @€ @€uA+rTÜ8ê?±ÖO€:/ À±ó¦F$@€ @€ @€ @€ @€ @€ J‘£âƆ¶S Ф€Ç&Á„ @€ @€ @€ @€ @€ @€N ô{‘£âÆNî¶± @€ƒ8¬á3 @€ @€ @€ @€ @€˜~-rTÜ8? ¦$@€#$ Àq„6ÛR  @€ @€ @€ @€ @€ @ ú­ÈQqcÿþ¬ÈŒ ‹€ÇaÙIë @€ @€ @€ @€ @€ @€è—"GÅÿ£d @` 8Ä6I’ @€ @€ @€ @€ @€FE`¾‹7ŽÊOšu @€ùPà8ÿ{  @€ @€ @€ @€ @€ ð0ù*rTÜø°mð…è²€Ç.ž @€ @€ @€ @€ @€´"Ðë"GÅ­ì’> @€@; ÛÑÓ— @€ @€ @€ @€ @€tQ WEŽŠ»¸‰†&@€æPà8' @€ @€ @€ @€ @€ 0ÿÝ.rTÜ8ÿ{, 0ª Guç­› @€ @€ @€ @€ @€F [EŽŠæG@¢ @`(ƆrUE€†Xà…o‰ó¶ºW‘Âk^6oÓwtâ2„K?5–]L³·@ìÓ!ìmŸnLÒ²·@ìÓ!ìmŸnLÒ²·@ìÓ!†ioû”XZ @€ @€ @€üL ZäøÂó>RJïèŠâÆN(ƒhGÀ ŽíèéK€FL ZX-°¬þªz €½ž½÷ÕðWñ±\ØçiÎ™Þ Ö~ ÿS)pœ ÈÞΦ2Ïìípìãl«°·³© Ç3{;û8Û*ìíl*ÃñlÐ÷v8vÁ* @€ @€ @€¨9R @€ ‚@i’”# @€ @€ @€ @€ @€ @€ 0\ ‡k?­† @€ @€ @€ @€ @€ @€ „€ÇØ&I @€ @€ @€ @€ @€ @€ @€áPà8\ûi5 @€ @€ @€ @€ @€ @€ @` 8Ä6I’ @€ @€ @€ @€ @€ @€ —€ÇáÚO«!@€ @€ @€ @€ @€ @€ @€! Àq ¶I’ @€@§¾¸ý‚×}ì ±ú+¥ta§Ç7Þü ØÛù³ïöÌö¶ÛÂó7¾½?ûnÏlo»-<ãÛÛù³73 @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€úM ö[Bò!@€ 0 ãk&_›¯ã9u×’ÒMS—oùŸuc†¬qÅŠs+/;-†Ò ò?Œœ˜/ï˜Ó1)Ä#B ·…nI!|/ÄxKÊÂ÷âþl÷ôô–ü¹×° 8+ííìêµ.N!½âÐQÒL8wº²ùK‡>÷ @€ @€ @€ @€ ÐOcý”Œ\ @€ @|YŒauÝõÄðwyûÐ8®Z5ylX^SxuŒñ©‡›ÄðÀ¿y%†SBþéÁ K)/w\¬^3ùÉ,¤Í{ïüÑÕ»víÚwxO[ÀYiuÿÆ×œÿ¼”Òósõà±yp¬˜?øÙ @€ @€ @€ @€ @€@Ÿ (pìÓ‘ @€AxùÄÄ£—–—üQ~3㺼pqÉC•‹¯,¯Ûªþ™å¥_±ô¨Gß±jÍäæ;²ûÞº»R¹»ñQD>åË—…X¾8/ >¼¸qø–kE @€ @€ @€ @€ @€À äw¢x @€ @ ³ã“¯\ZZz}^Øøº¼þjIgFËJ1þÆ1¥%ÿpöêõÏï̘F!0˜Ç<æÄ óâÆ_ÌìeM€ @€ @€ @€ @€ø7Ž~ @€èœÀÄDy|ÍäGb9Vòݹ)¿Õñ)å±ø…ÕkÖÿ÷ü©?ÓÖíùŒO _V¯ÞðÌR¹T=g±_r’ @€ @€ @€ @€ @ UŽ­ÊéG€ @€ÀƒÕ"ÓNyÖt^rµúÁ‡½øãË9îÄÿÕ‹©ÌA`¾^11ù¸4v…—Íw.æ'@€ @€ @€ @€ @€tBÀM'P4 @€Xv܉œ_'wf YHặBL·¤,Ýcéq!¤“BŒÿ.ëiŽ•_d÷Æñ5“×OíØü¿í#ŽÀ  üÚÄÄñ‹JñÚÃ-wù @€ @€ @€ @€ @€æPà8—Œç @€4$°úœõgå…Š¿ÝHp é¶Å÷…¶LMm»e®>ãçœZˆå×çÅ‹kó˜sÅôü­\{þ7¯Ü¾í³=ó‘ÀPœ½víÊaQõgû”¡XE @€ @€ @€ @€ @€~&P"A€ @€VV¬8÷¸PŠ›òBÄüÒÅú¯¼òš´/={êòMR¯¸±:ÊÔåÛ®Ëodœ<°?û•”ÂõGù¥q¬Ê—-Ÿ˜8¢(V;AXµfò©å°ðóùSÜ8H'W @€ @€ @€ @€ @ !Ž 1 "@€ @`6±% .ÌË —ÍÖvð³²?œÚ¾éÓÓ[n;øyÑç«®ØòålßOŸ“RúDQl ñ1ÇÄ¥¿Y§À ŒOL¾²Ãßæ?Û' JÎò$@€ @€ @€ @€ @€4# À±-± @€<(pöÄùÏÈo•{íƒæø'^:µ}ËÛæh.||å•Wþäöܼ:¤ôÅ¢àüÉß>óÌó .‹ÆÑN`>N=õÔùÍï‹åXÉ ˆœÏ\ÌM€ @€ @€ @€ @€覀Çnê› @€À ”Kcÿ#ÄX®»Ä¼(ñ¦®}ݘwïÞ½÷þ{÷ŸK~»nx G-ᤓŸõ×¥£KS– @€ @€ @€ @€ Ð7 ûf+$B€ @`pV¯>ïÉ!¤Weœeáw÷ìÙ³¿(®‘ö;?ö£²sŠbcˆo^±âÜãŠâúµ½4WŽ-YxÓ굓;Î^»îWú5Oyu^`Õĺ—•Ã¿Ïo"ý¥ÎnD @€ @€ @€ @€ @€@ÿ (pì¿=‘ @€¾ÈÆÆ^óWÝDSøìteóêÆ4Ù8µcëWBJWÕë–gµtlñÂW׋é÷¶œv,„xÎX(ÿu^èøÕUç¬ÝYgµ´ßó–_kguî£V¯Ýpq,•>‘èÛÚ(z @€ @€ @€ @€ @€O@ãà홌  @€Ì«À©§žº /À{UQY–6Å´ÒžBö΢~±”&Šb§=þ|©Tú³…Xv˪5“ïY¹fÃS'w™ÖX¾|ùâñsÖÿÎÂ¥ oÈãÞTX4\o0m @€ @€ @€ @€ @€Pà8€›&e @€ó)pâÉÏ<#¿ºñ¸z9¤¾ÐéÛkóUoqL!\Sû>û{|ÎðÆe¥«÷Æ×L^³rÍäËóµ×¿EsvOç_ ޝ<ï˜ãOüçüÖÆwå»xT£)¥®K)]Úh¼8 @€ @€ @€ @€ @€@? Œõsrr#@€ @ ÿòŠº3вЩ¨±h„úíùíÛË¥xf½¨Ó+óöwÕ‹ж¼Î1þZ9„_[½fò†¼˜ô’;³½—~¦Rù逮g¤Ò^5±þE¥r|w^›úÜæž>òÍì¾ ž—¼Sikózz @€ @€ @€ @€ @€ý'àÇþÛ @€èwÓ‹Ü }¦(¦öÒ™ÝEýó?ìT ‡ûãÉ1–Þ{TyÉ÷ò[?xöÄùÏîöêV¬X{B©\ÚÝtqcJ3Y¿uÅöͯ¹¾R¹°dO€ @€ @€ @€ @€xH@ãC> @€ P 0>~ÞIùíO­–î¸úŠ-_©Ó^ëÔÔ¶[RJß®?J|îʵkŸX?fXZã#ò}yýXyìÇ×N|åòåËdžeuÃ²Ž±±rÓÏÎÿ)˯LoßôÞaq° @€ @€ @€ @€ @€@M é¿\Yëè @€ÑH üÇ¢U§ÿ2É/œëúësE3Ä´è?Å [{ ñE±+ÇâwÆÏÙðÖU«Ö?fØÖ8"ëÙŸBö‡÷ÝùÃçLW6iDÖl™ @€ @€ @€ @€ @€Àˆ (p± ·\ @€íÄ~¾¨óÙ¢˜N´çÅ_»‹Æ‰!=¯(¦ÿÚÓ¶”Bõ×¾vrË KáJ ãwW¯ÜröêõÏog<}{*°'K3ϛھåm»víjëç §Y›Œ @€ @€ @€ @€ ФÀX“ñ  @€iô¬òÆ:¯,K_«ÓÜÁ¦øÕâÁâs‹cú+bzÇ–¯ç­;ë¬ssÁ’±WÇßb|RËYƸ°:ÞØ‚¸./tü», ï½ë‡Ûε,ÚµŽ)„MY¶q:íýH¨Tfº6‘  @€ @€ @€ @€ @€}" À±O6B @€B †gå¹?„ïÅt¢}ïÙÍK*¼”¾ðÆÉNäÒ1vîüØòqÿgþëÝ+×LžQ ñ‚Ùù÷ÂEÏO|n©>ºä¨G¿{ÕšÉÿs ØYÙ|óÜñZz#î̋ߵÿž;Þ·sçÎ{{3§Y @€ @€ @€ @€ @€ó/ÐÆ_Œÿäe@€ @€@ïΘ˜8&†øØº3¦tÿÇ+›o­Ó¡Æ]»¶ÝRøi½áò‚À¥+Vœ{\½˜hˮܱy×ÔŽM+îŸIOÊRø£¼îíäïã£J1þÞÂRøãk6L¯\{þ‹ÛOßVÒ=)…÷ÝÏþ§LmßüÇŠÔ @€ @€ @€ @€T78êÎÉ› @€@g‹O åÂI«·f…Q ¨Î÷ìzÃÅcOÌÛÛ*¬7~/Û~vÛâ[O=õÔ O:å™ãùÜäÅŠ/j9‡Ë1„•å0¶r|Íä?¥”.¹#íÝ´»R¹»å1u,H!ÝS¸dïÝþÏ5×\vGa @€ @€ @€ @€ @€!Pà8¤kY @€:-P‹Ç™büNQL'ÛS 7çzu KcáIùœ_îä¼ó=Öž={öç¿väyì8{âüg”Kå7ä…Ž“!†£ZÍ-Æøôü×Ådž%œ;n:e—\]ÙúÍVÇÓïp,»çîrvÄ+§Ò¾+C¥2sx„' @€ @€ @€ @€ @€ÑPà8Zûmµ @€Z(Åx\QçÒE1lÏ‹¿[4^~SÞãŠb¹ýªÊ¶oäù¿ùôÓ×ÿþ#Žç•Jñ‚âsZ_S<2Æðëc¥Ò_»á3a&\•k1M¾/Î/v|UåW­^3ùÅ™/þî·¿þ{öìÙßä8  @€ @€ @€ @€ @€78ú! @€ @ 1–¦õ´À1K¡°À1…8²ŽíWš®lýôÔöÍãiÿÌs·wæ«·ÔÞüÇ_P.…Ëžxò³n^µfÃ…+V¬=¡ùAô @€ @€ @€ @€ @€eŽ£¼ûÖN€ @  â¢Âð™ÞÞà˜(pÌo,\\˜÷LMm»ezǦwÜþý›O ÙÌ+C Ÿmkù1_ŠáícKÞ4¾fòÏÛKg @€ @€ @€ @€ @€‘©ÕZ, @€- ¤˜åEŽý{{ƒc©Ç‚‚¤G²y÷îÝò…_QýµbbÝÏ•ÊoÈ?ÿ§ÃÑ­€ÄÇBJ§·ÒW @€ @€ @€ @€ @€Ñpƒãhî»U @€h^ ßà˜BoopÌJaoáBbRàX€tueë7§vlzË­Ù½'d)¼:¿óo ºh&@€ @€ @€ @€ @€ ж€Û&4 @€Ñˆù Ž¡ðÇÞ8¦8s_åºß9©À±®ÐC_ªTrÏpiõ׊Õ럻`A¼ ¤pnˆqÉCQ> @€ @€ @€ @€ @€茀;ãh @€C/ßθ°h‘3Y7* ÒD{œ)í+ Oѿإéð€«¯ØòwWlßüš™}wž·^rx„' @€ @€ @€ @€ @€Úð}ÛóÓ› @€ÀÈÄ”/æ×!Ö{ÍôôÏ©œåóÕ¿Á1Ät ^ÎÚæ˜˜(‡¥g†RzcJéô 6Ža<&@€ @€ @€ @€ @€ 0—@Oÿòñ\IxN€ @€@ÿ ¤˜îŽŽ31.îåJJ3ù|õy>÷÷2§AŸkÕªõ‰‹âkó‚Ö×çÛ}b(ØóA_¯ü  @€ @€ @€ @€ @€æO@ãüÙ›™ @€À€ Ä»‹.Í„ž8Æ–å”RØW£=„ñsÎ?-Äòó{_™5.T×è§‚ @€ @€ @€ @€ @ Û »-l| @€C#òǼü­Î+†raÁaîM7ÅØ@AeŒ ç=ýôõ8bY\—;¾1ßÛŸŸ#¬áÇ)†ï5, @€ @€ @€ @€ @`ä8Žü @€ ¤üÇúõ!k¤à°Áé «ÎW*Œ)8®>gò%YLG Õ‘æ½wfŸÙµkÛ¬ÅAV­Yÿôâyaㆼ°±­u§übüT̲‹¦.ß²«Å”t#@€ @€ @€ @€ @€AŽ#¸é–L€ @ 5npìqcŒqqÑZò[÷6óÞR,=«(®í‹ ÏÎÇùz'ÆjfŒåË—óèW†Rxcîö«Íô-6/k¼+„ôÑÂÅÓÛ7}k¶Ï @€ @€ @€ @€ @€ÔPàXOG @€ ä…lw\àJ=.pLYX‹®pLáÇ.b?¬X±ö„±Å _—oÎÎ÷ï„v ò «ÅŒ¸ï'½úê«ó"G/ @€ @€ @€ @€ @€­ (plÍM/ @€#'P Å78†”–ô¦T*œ/…øƒžæÔ'“Ÿ3ù«ÕÛC +óÛú³_^Ԙ׷†])Ë.š®lýTþ¹úÝ‹ @€ @€ @€ @€ @€@[mý%×¶fÖ™ @€ˆw%œb:®(¦“í1…ãCÁµ’1¤‘)p|ùËÏ?ré‘åɼúðyQãÓ°.ð©¿éÎ,…Kg²ìâ«+[o¨«• @€ @€ @€ @€ @€@s ›óM€ @`dfR¼µ\T,—J'ö(O§p¾˜²¡/p\±zòçÇÆªEa]ñEÛT´Gyä?ç·5^|GÚ»iw¥RXØZ4žv @€ @€ @€ @€ @€³ (pœMÅ3 @€(Ï„ëCé°Ç{ÐHÁáÃ:´û%¦¼À±~9_–JCYàøÌ‰‰…§Ä¥«K1¼)/l<­]ʼRúÄLˆ]¹cÓµÏ @€ @€ @€ @€ @€ê (p¬Ë£‘ @€šÀ7~ý['üÌ1Æ9ÿ‘(8¬õèþ{~Óà‰õËóòÇ™™»ŸIïf8kbòÄ1¼>/6}m¾öÇ´=s ? 1ûÈL*]råŽÍßn{< @€ @€ @€ @€ @€hP`ο˜Ü`a @€ŒˆÀž={öçŽÿ’/÷és.9…Ç…‰‰r¨TfæŒéPCõÃÂñu/pÌ‹÷¦¦¶ÝÒ¡)çs˜¸r͆—–bzSLáÌc¹ídRøF>ÆÅwÝ‘m¾öÚ-÷´=ž @€ @€ @€ @€ @€4) À±I0á @€FZ †ëóõÏYàX½Ýq|fác§BèzQáÓ²åX÷ÇüFÉj_#¯?K!kÿ6Äfš¹ïÀ{ äÌ3Ï[¶øˆ±Wå_.È 9OÎï£|àí?K\–BúxÌÂEW\¾ù3³´{D€ @€ @€ @€ @€ @ g {Fm" @€ƒ/RüFý’Â|cåSòÿìzc(?0O]Ô¼ðëu~Ö8µcóEÄõ*fÅêõÏ‹oÌó_›ßÖ¸¤ÝyS ?ÉÇøðþ,»dgeËwÚO @€ @€ @€ @€ @€PàØ Ec @€¼Pîúê%‚¯ÿ·® ¦íæT /*L¥šï€½V¯™ü½¼¨ñO:“vº>ß³‹öß{Ç–;wÞÛ™1B€ @€ @€ @€ @€ @ 3 ;ãh @€#!cöJu×c\ž\X7¨#ÌSw¤™,~¡n@6f!U_¸ é”fRˆ;S–]4]Ùò—Ñš  @€ @€ @€ @€ @€Ì›€Çy£71 @€Á˜ÎöþÓêÒÒæ·8>zÎìcø¥åË—/Þ½{÷Þ9cÚlX>1qDHáyõn“L!ýèÊÊæ¿osªÁéžÂíYHŽû\25uÙMƒ“¸L  @€ @€ @€ @€ @€FU@ã¨î¼u @€hE R™ k7\‘w}Cî‹>þñ¿”·ï®ÓVÓ±qñi!ÆúžIá³ù$ù…ˆCþJákYÊ.ú~Ú»õK•Ê}C¾ZË#@€ @€ @€ @€ @€"ú!xˆj) @€tF eéòXŠõ C9+½,Ÿmwgf<|”,Æ——üð'1}úá†è[J3ùj®Ê²ôþéÊ–¿¢•Y  @€ @€ @€ @€ @€À þರT @€˜J÷ýu áõBSŒ¯ÉÄÄQõbZm[µjòØü2¯©Û?¥û÷ßs઺1ؘBúqÈÒ»îÏ“¯Ø±yµâÆÜD) @€ @€ @€ @€ @€<( ÀñA  @€hH R™ )]Q/6Æpô‘å%o®Ój[\~+ÄxD½þyæôÎûQ½˜AjK)}5…ìµ·ÿæÇ_qùæßÛYÙ|ó å/W @€ @€ @€ @€ @€³ ŒÍöÐ3 @€ÔHYº<–ãëÅÄÞ²bÅŠ÷]}õÕwÕ‹k¦íÌ3Ï[–7þzQŸ<¿?+Šé÷ö¼¨ñ@ˆáÊ™½ÿª[?ßïùÊ @€ @€ @€ @€ @€@³nplVL< @€aº²åó)…ïÖ§ˆË,=ú=õcšk]tÄ‚‹òÛ!Y¯W^ø­<¿ÏÕ‹é綘ÒòÛÿ$ìŸyÒÔöÍWmWÜØÏû%7 @€ @€ @€ @€ @€ÖÜàØºž @€FY Ë‹ð~'†ÒÇê#Ä×­Z»á[ÓÛ7½»~\qëøÚÉ·å·Bž_™¥ø¶<&ÅõkûÔå[:ZÚ¯ë” @€ @€ @€ÿŸ½;¯ë¬ïÄÞ#ɱ…$Á„„@ Z .SèP0Kĉä*vË ¥Cÿ´¥ËÐ)]h§Ói™¡-¥Jo‰mÕ²ã !SÜK( k–R ’}%‰ãX:ïü®'^¤{®¤+YºúÜçquïyç=ïû9美ûèËK€ @€°ƒ£g€ @€ lÛ¼~Sì–X»Sbü£ã/¸pÍ…ºÈ£'õ®\3Šôûu}Ä®’×nß²vK]v @€ @€ @€ @€ @€޼€ü=0 @€9*[ .^±rà×gÊô¿û­ëß{Ýu×íÏxFª‘_ê*»¾œRjöo‹²LySïÊÜ}ëM¿½k×®áV¯±dÉ’ž3ÎzÎGý¯µtN~kKuŠf•@'|Wf¸Á @€ @€ @€ @€ @€¦I Ù/!OÓ\† @€À܈ O-Rñî™2ûcÏ<ó¯‹q¯ÜxCßÊ5ï‰9ü—fóˆdJEñöŸö’ÞþÿºmpÝg›Õ7ÚÎ_uéOwå®wÅÛÕÕ>Ò^ýéЖŸn­VÕlè„ïÊlò6V @€ @€ @€ @€ @€Àt 8N—´ë @€èPáÝwÿ^÷ÂãWî ¡ÕÌ1j^’ºŠÏô­ørÎùƒñù«{sþîÅC·œRõ<¹*»Îˆí4vì{c´=7 -½r.®½ñ[7¼­¥bE @€ @€ @€ @€ @€ÌÇq ‚ @€ÀìرcÇ{W®>§Hé“‘G|R+3‰ýŸ;?ó"ÅxJ±°(ºŠ¢|ôäsûª# yëÞ*¯¼nœ»O>z)? @€ @€ @€ @€ @€8Bûø]Þe  @€èm›×mddø¹È·Mç|"Üøƒ´÷á¥W®»i:¯ëZ @€ @€ @€ @€ @€L^@Àqò†z @€ @ ®ÜxCÚ›¦/ä˜óMU.^ºuëå_w @€ @€ @€ @€ @€˜}޳ïž1 @€+°uëÚëGЇ"vrÜ6•ƒÌ¹Øú`µû'¶o^ûoSy} @€ @€ @€ @€ @€ 0uÝS×µž  @€˜‹WlÚô½˜w__ÿ¥¯MeùÞ"¥§µÍ!·WEõ–m›×oj[Ÿ:"@€ @€ @€ @€ @€ @àˆØÁñˆ°»( @€ÎÜpÕªÝg9ÿjì¸øé˜q5áYçâKU•îÎ[o*Í¿ ¥t^ìêxf‘‹Å)Ç:á¯ø‡ÊŠ"}¦(ò§÷ÿ¸cëº/Zç3 @€ @€ @€ @€ @€³[@Àqvß?£'@€ 0«–.]:ÿøÅ‹#G-Jeþáðƒ{ïºrþðÝÅààȬž˜Á @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€;],G@IDAT @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @`Ƥ3!@€ @€š ô­¼ä…EÑõúfEUª·oÚ°«Y6 @€ @€ @€ @€ @€ÀLèž ƒ0 @€ @ ^ é™eJonVYæòëѾ«Y6 @€ @€ @€ @€ @€ÀL(g Œ @€tªÀŠŸÙ·r`c§Îϼ @€ @€ @€ @€ @€°ƒãDåœG€ @€š,]ºtþ‰'Ÿö¶¢LoOE~¸I©& @€ @€ @€ @€ @€Àœpœ“·Ý¤  @€ @`*V\¸úܜҟ§”žþèu§\ß @€ @€ @€ @€ @€À¬pœ•·Í   @€ @`& ôõ]|zÑÓógE*.H3q€ÆD€ @€ @€ @€ @€˜AŽ3èf  @€ÌN³ûûç=«kÁ[‹œÞ‘R±pvΨ  @€ @€ @€ @€ @€Ó+ à8½Þ®F€ @€(ð¬rÁŽT¤WÇÎ^ @€ @€ @€ @€ @€- ”-Ö)#@€ @€Æˆ]Ñä0 @€ @€ @€ @€ @€ÀŽcÀ8L€ @€ @€ @€ @€ @€ @€L@÷Ôu­g @€ @ ¹ª®Ï]é6ë3W韛µk#@€ @€ @€ @€ @€ÌÇ™r'Œƒ @€Ôl¼ìKQÒøãE€ @€ @€ @€ @€˜õ嬟  @€ @€ @€ @€ @€ @€ @€³N@ÀqÖÝ2&@€ @€ @€ @€ @€ @€ @€³_@ÀqößC3 @€ @€ @€ @€ @€ @€ @€³N@ÀqÖÝ2&@€ @€ @€ @€ @€ @€ @€³_@ÀqößC3 @€ @€ @€ @€ @€ @€ @€³N@ÀqÖÝ2&@€ @€ @€ @€ @€ @€ @€³_@ÀqößC3 @€L¥@§ü÷b§Ì£•{Z)š‚š#uÝ)˜Ê¾.;ù™1·©zjô{¨@'?k‡Îµñ¹ÓÖÁÑæè @€ @€ @€ @€h«@w[{Ó @€À¬XÚßÌñ傟î*ÒEΧç”N/R>=&Ÿ‹Å‘Öx(ŽÝŠ|w.Š;âçFªòÚ\îùì›6}o¦L¸1Š£^’Ê®—GÄäìçI9'¥"”‹|\JŃ1ŸÛ¢í¶œÓm)å[ªªøôÈC{?¶cÇå·Î”y´:Ž V®yz™«©,Ÿ–s>-æwzÌ«ñs~Ì÷ÑÏ÷b¾7¥\|¯Jé†[ª?;8¸»Õþ›Õ]пfIÙ•Ï+r<+E<+ñÌÄsòÔ}çäâÎøyWü¹9§ü©ª¨vÝsË÷¯Ýµk×CÍú6Äâû!­üU®vÜ}ë÷7G@uxݵåÔ™°fõ­¼ä…©èºl´ EÀí©aÔhm‹¯`þv“öÈ¢_Ú´®¿YM_ÿšå©ÌïnVSé¿oÛ¼v]³šÉ´uäÚ[>´yÝ{Çp)û.xYWÇ=^ß³ãÆ¨õp¬FÃÕUÎ[·çÝ—ƒƒ#£:H€ @€ @€ @€ @€æ @ë¿=qL™ ÐiË–]rÜÂãº/‰ÝÜ~>/h×ü oäW{î{Çt_×?ð”£Rñ«±³ä›"l2®pZ³97ÂŽ±ããßí­F~{ªv°ë½põ/DHð/ÇG®ª·mYXx©oÕê×EùGñðOëÜGƦØrÿÝÕ¯¾zý-œ“ú.\ýëE*· Öå*ÿÉPÞý;íøÔZæâ¯‡6¯m2{½º¿ÿģ˅ˆ¹\ðØÁI½É[FЇß6‘Ðè¤.'÷õ]rjÑÓõ¶ø.ÿ|̧Y°nR—аc„òª?¼ñ_oX7Ý;Wδ5ë‘`l×?N ´ÉÉñݼ6žÙ7))zW^zI™º64«‰¥ì-MÂzÍNmÚÖÑko‘/Â¥ï<àü«²«;­‹<ù3m›Øç|}Qåÿ²uËúOì|g @€ @€ @€ @€ @€Î(;k:fC€ 0–@ߪ‹×õï±ÕûÚnl\/ÂU #üñærþ®ï]9pþXc˜ìñƆÑÿÿ<ª+}§(Ó[Û¸;hHûvDLEwYþKßÊ¿i„y*8;'ö­\³!å‡ÚnlÌ  ·UÇœ˜þþ¼ó.:©Ù”^ûÚ‹Oˆk_‘Êò·Éz~*Óoö• ®jôÝìÚSÑÖ§E¸ñK1—6…£L–ù¨¯÷®ZóÖ©óh}ž¿jÕSãùüÿ‹žîoÅ3ûK1Ÿ) 7î›aQœÏßßž~ÖÙÿÚÛ?Ð4|7Úx'z¬Ö¬‰Î}¦7'×Þ¥K»cׯßpã§ÚnlÜÙtvQ–[±rà£Ë—¯:e¦Ýkã!@€ @€ @€ @€ @€Ó- à8Ýâ®G€˜f¥ÆÊ5;nŒ$â1Syùàé¹í¾z\§­ÿ­¹bÅÅgqÖÙŸ‰þß}÷Lé‘#ÛÓ\\{Ïë_ý´E‹OûT²ßßµî)¹)-ë^0ï“}}—>yJú×) @€ @€ @€ @€˜%mý¥óY2gÃ$@€ÌÞÞÕ'Ÿ¸ø´k"õ«Ó9鄼)B•ëŠþþ®v\·wåšØ­î‹‘û‰vô×r©8±ì*?Ñ{áÀZ>§M…ç÷_ò£=ÝÅ?FZî¬6uyx7)=«§+}rÙ²eå;vw§¸v:ýð“Út$Ï9±\ØÂNù«ÈËEñçðmËó8Ö€#|ûëñܰ]Ïý¡×é[µzuNéŠø>/<´m?Ïèò–øNþ§©¸f§¬YSas$úœ‹kooïÀ¢ž2}"þ¾ùÉ©6¿+ŸYÌëúûsûûOõµôO€ @€ @€ @€ @€fª€€ãL½3ÆE€˜¤@ßÊK^˜ŽJ×EHíˆì@!¬KVt-¸,¦1©ízW^zI*ò‡£›£'I2ÑÓ{Ê2} vç{Wt0-ÿý¼¬¿ÿ‰]e÷Ž©Þq³7çÌ…Ç=ñ÷ã4v.ë*Ê¡¸ö¼ýǦêgã‰ÐÞ맪ÿF¿Öü©Ô•ÖG¨Õç°ÊE¾9ç➉Œ+®òúå‚­± áü‰œ?Ö9½«ÖüZ‘ÓÚ˜ÆÔì&7Ö…G9Þ° Ì÷,_¾ê”Qš'|¨SÖ¬ ̰çâÚ{vÿ¼r^ÚOøÓ§ëvÄwéGæw-Ø8]×s @€ @€ @€ @€ 0ÓŽø/ÇÎ4ã!@€t‚À«V‘Š®OŽ7Á®‹\ìˆÀÅ7sUÜ»ÞÝ‘¾ãSÎÏ)Rùì"åçD`²±‹b‹A¿taßÊO m^÷Þ‰¸®¸pàU´ûPœÛj8í‘Ëäü8ï£UQ|·È#7ç¢ëöTTOˆ¹=9ŽŸsxe$´~|Î ÛE5çÔœrú8°.þÄíjï+\žaÍÑkÓ°aÌåó9W,GF®þz¹÷û×>ÜÉÒþþcN(zz„~¶Èå¥1×3Çk_)â“N ÇâÕñgÒóêë¿ôµ1—?‰g·öÒû !͸/Ÿ+r¾!îÍ¿¥\Ü—‹êþ2•ÃUÊÇÅWø e‘Ÿ”S±$ÎyaÜ·Ó÷ŸÛÊÏÊ±Ý ç½'jû[©¯«™-kV9RÆrRìm>9çîG²Ÿ£µ>vlÔsk-Š‘Þ±·srí íg•óÿ&ÖÞŸn¾Šû}C|#‹t{¬…UœwZ|çâ{”Oç`\ÿ¿–øþ½"Bô¯Û¶yÝGZ¸¶ @€ @€ @€ @€ ÐQ­ÿ†lGMÛd @€-зjÍÎø½×´8Ë`匌 ¿oûàe_ª;'võz^Jå{"ñ²ºÚ}í9ﮊ¼dÛæõ_k©þÑ¢ ú/~~Yöüc#HÕây{#qò¾‘áb펭ë¾XwNc§ÂžT®ŠþßÁ”'ÔÕ7Ú#ÐY¸bùdB(½®þ…²,ÿrÌëåâ®ωc·çÝt[;<2üÞ+7Þph]ìØ}“žzVY¤s¢î¢½i¸oÿù1±wÅuÏŽçæµûö3çĽüÐC÷U×ìܹñ¾ÃÚ#ÙÛé«SWׯÖ«[{íë"îÛazÅ(ý5=TkÙôìhÌÅ ÃÃծغþsu¥ÑžzW®Y]¦ügñö„ê£ûêC›Ö •Ú±j;Ê=«ká¿Ä}9k¬šýÇy<óúÈZýõЖŸ‰ãñ¸¶öZ±bÍÙ¹;¿)¾×­~=ïÝ›Ÿß·¯´v•±«:aÍZ±jàóñl4àc¼ò}[7­ki­£ƒ}‡;+–©kC³šxÞ2Ñ`ù\]{c ü^ü}ðÔ±\÷}¿R±=UÕÆûóžO~|pð®Ñj—-»ä¸ùÇ–¯Pü›c ~Öh5£‹þ¿6TíþÑbppF„\G£c @€ @€ @€ @€ @`*§BUŸ @€#(ÐJøå±áåü™aýòÐæ _xìX‹oúú~¶(ÓŸ4 „ìï*‚#×m^ûâýŸë~._¾ê”ØîŸ#lõäºÚF{C®IÃ{yëÖË¿ÞJý5½½«O.JïŠ1´È‹º{âZK¶n½ìÛöÓêûI„òª\åÿ‘÷ïÙ¶mÝ­\oùŠôt±dzZ+õMj®.FÞrŦ Ð\K¯Øìü¸—Åó±°î„Æý‹0ÖÏÔÕÚ> ËxfŠwï¾ï¶wìܹsÏ¡ý6ûÜ×wé“SOùá0=§Y]£-vQ¼m÷½#g­;}_{覆EXµTmþŠ]Kã»ü³ù.Øñ¹ýý‹çw-üÛ¦A×Nˆgò¿Ç®¦¿sÀ¡q¿í„5«1éN8Z{G|㻼mddä·F •~ƾ£©oÕê5±ûëûcliWÝ{Ÿ7™}“±h"@€ @€ @€ @€ @€3V œ±#30 @€q ôö,*‹®?míļáÎ[ozÙDQCƒëþ.B¯Š×½u׋pÇ‹ú.xy]Ýþöž…óÞÙr¸±Êïl„ã&nl\oÛ¶õ·Å®j¯]ë.Š{÷a¬Ÿ1—㋞î¿[¶lYK•±ú×ñØ3äŸmÉZ 76úoìdùÐýÃK"Ì7îëããË—ßyË/O¸±qncGÆØ1vùÌ£íôøx÷ûÞ¥ŸnìTxÈÁ)û˜sõk¸}ëxà m¸ùëÕîó"!ù‘ºÆ3|ò‚ãº'þk„ÍÊ\üVÝu" úͽ»~ÉD¿ËöÿÑÁÁ[†6­}]{ÿÇÇ|ŸŠŸ³­…†NY³Z˜ê¬(±ö|›bí|0¾_??´i]ß8ÃŽrìàúá‘4òŠFØùàžGÿÁâóGoq” @€ @€ @€ @€t®€€cçÞ[3#@€æ @š—ÿ$‚zO¬›z6þ"B}»ví®«mÖ!¶oVE¾4úËÍêm üºšFûŠŸ½ý§ºÚÆ5«ªú•ýý^]m+í`Ù\TÕ\Û]_Ÿ^0ÿØ“jÇXßO}EcîNÕ«"Pº­¾úðŠ«®ºìî”Gþûá-õG‚øoã9¹8ž“‡ê«¯Ø±uý?Ç¿xËÁG»›=£šÿ‚ƒNͧxhÞ6´yýŸM¦÷ëŽãŠxè?V×OÌí-Ëû/}V]Ýhí= zÞ_œcFk{ìXÎWÕðÊ;6ýà±cmxóà½Ão‹ùÕö»ž>»ñè%;aÍšèÜgÚyÖÞÃîÈC#ÃÕ+"@ÿ·‡µŒãÀ¾px5Ò×ø;«î´E7ÂÅ‘sô"@€ @€ @€ @€ @€sG@ÀqîÜk3%@€:\ ·à%)•uÓŒ„Å'"°ñËQW¶¨ë«Ñ!ÇDãwëjãb¯é]¹úÙµuÝ]¿¡©îºº"§ßÙ¶eýŸ×Ö£`ë–õ€ãÊVNIeùEW+µ“©©ªôæñîžxèõ¶nÙ°#ü¿}èñ¦Ÿsñ¥»n½©ñœLêõüÐ{[¹vÙUüä¤.ÔÂÉ‘/úÐЖõïj¡´¶¤rÌ{ò¥ñ-º½¦¸§§,[ÜUõàžrQÖîæ‘©?Þ>xÙ—>sòŸvîÜx_ªFÞÒJO¹,k¿×£õÓ)kÖhs›Ç²µ÷ ÛVå‘7^±uýç:8ÁC[6~:NÝP{z*/_±zIm @€ @€ @€ @€ ÐAŽt3M…˜Û©,.­ˆ0Ô÷Žä7ÖÕ·=v?üƒ}³ÙyZŒÍ©Òòf5+V\ô#QtI³šGÚòç†òƒÿ³¾nür¼2²ŸSwfl±uæiá…uu“iÝ?»}ËÚ-“éãÑs«”sËaÐ}; —NtçÆÇÛ†ÕÎö>íxÛŽåâ–=÷ÿ—¶õmÛ¶îΑ\üJmŸ)-k%Ü{`?çõœߘ?ðØaïs¾Ïà OÖ×("»µî{½ï´®´x”ÓkušU;ÉYR`í=ôFåµÛ6oØxèÑÉ|ÞýðÛ÷­­5tu—S»Ö\_3 @€ @€ @€ @€˜nÇéw= @€ÀÔ4ƒçÕuA³·]9¸î¦ºº ´çTäuuç•9ýL³šÜÝýßbÍwEÌyw•‹bpp¤Y_“i»sd÷¯·´ë`Êo›ÌuêÎÍ#EÛy{ªâïê®÷X{Jߺuíõ}žä›¸_ÿTßE:¡¾fâUQýâUW]v÷Ä{ýÌ ^¡¥kFo}ühY¤×=þ©þݼ²hÞ×CJœŠ98ºX3š—÷ÕVÅ¢Ïiñ}G¬Y-ÎuÆ—Y{¾EÃ##ï>øÈä?íØ±éÑË×ëz*‹ê©u5Ú  @€ @€ @€ @€ @€@' 8vÒÝ4 @`Î ,_±zI‘Niï¾óÖ?Ô¼fâ­yïȆºÝ©"4ø’÷÷/ã*ÀÓ¹c´=v8úøÐ¶ÍëêCW1þ7»ïÏUþýº3c×Áç¿êÒŸ®«›H{#4·mpÝg'rîhç|dpÝÍEŽÈd ¯TUïm¡l%ÕçëŠãù=¾®fí×mÛ¼~hç7?µ*Þ×¼ ö-Šq[©.F6×]w²íUQü[]9•G×ÕÚÞ!kÖ¡Óš­Ÿ­½Þ¹\ìºbpãW<ÔÆ÷Ÿ®ë+餺í @€ @€ @€ @€ @ “;énš  0gº{Šóë&Ÿs±i×®]ÕÕM´}hè²ãÜ]‡Ÿóýqí/D`ï²TT¸xxÁÂÃjâ@ïÊK,R6µÁŽ\T=Úùí>vKÞ½%biµ;þuUå+Ú}íGûÛÑæ~#«VÜRÛgìyçmß»º¶nyÏo®-OSpŒgןDÁß¾þ#¹Èw4ë"œÿñµ¯½¸å]*‡ªÝ¯)öT_3ÉŠªzw.ˉ@Ù7rY}½|¸øÆÐÐÆ^cCºšWêª FÀ럶mÞðåšžÚÒüÙÁÁÝ+V­þpQ¤_kÚa*^Ö´}‚ixï'&x꘧åTü{íž2fA4Dç „nV3Þ¶{î¹çþE‹›gvâ™9v¼ý¶XÿPµç¾ËZ¬PÙu×]·÷Œ³žÛ¸Æ[Æì ¥®ž£{^í—Ys`ÃààÈö¢ønjüù?ñç ×9ç¬nìšØ­Ní+¥Ú€c•Šžñ¢#Ö¬ñNz¦Ö[{¾3)ÿÓÁÚ÷)þ"lü}Øü•Ò¸ÃÍ;ÔJ€ @€ @€ @€ @€f¶€€ã̾?FG€¨¸`Õª3"„÷cM c··¡Í?ß´¦ Cƒ®ŠnÆýŠàÝ+kOÊņښ6DènCJEÓ€cŒûEg÷÷Ï»~pðá¶]:vQܺõòo¶­¿Ç;ª ×Dˆôs—·ç]#0Ù·rÍž°«êãEYŽpŒâ®2Ÿ?Z 8Ôùᮾzý‡mÿ‘œFvEó6Æ+ÇsåNY³Æ3ç™\kí=àîÄŽÃÛ6¯ÿÆGÚú¶,†ï«ý>åb~[/ª3 @€ @€ @€ @€ 0ÃÆõ‹¨3|.†G€˜“eÑS ,òg2ÎÒ¥K»cëÀ—Ö±ªŠ¶‡ïš]ó®[¿ÿ•"†ÍjŠ”<³XðšÖŒ·1×Ç)mß/‚LwÔ%ç¯×ÖL¤ åû'rÚdωÝ?;Ù>Z9?·ðËéÕ­ô5£jr×¼ÆÓÕBÍc%°f=6™YþÆÚ{𠌿GÚ¾öî¿ÊHUDÀ±ö5®ïSmo  @€ @€ @€ @€ @€À pœá7Èð @€µU:µ®&§ü¥ºš#Ù~Ü¢Óžû÷[3†½{î¿ýË55mmnì<˜‹Ô¼4}•eñâ¦ãlÌ9Ý8ÎSZ*Ϲ…àN*¦j·ÃæAÑ–f0þ¢T×ÿ¬ñŸ14´áæØýò¶¦g¦âÄ¥ýýÇ4­™K–,ééí_ýŠ«VÿI™Ò_´}H°fµÝäuhí=>þúÖÁGÚû©kDÀ±½¢z#@€ @€ @€ @€ @€Nèî„I˜ @`N ¤âäºùG°mFïàØÓU?‡¢Èÿ²sçÎ=us‚öÏGŸÿ±Y¿±Ý×âfíãoË·Žÿœöœ‘‹òÞöôtH/‘²-b Éé|åxíþáÈô…{ß³T4Ý¥ñørá“Âàˆìf9†}¹¼ÿÒ3{Rñ¼¢(_”Szq„¼–DíüFý”ܲX³Æ°œu‡­½‡Þ²<5ëߣ—©ªîï*žpèEúßÁ)ùÚt @€ @€ @€ @€ @€À pœA7ÃP @€HÀTÍ+³¦àˆ6ç²XT—般ÚWÌ óõu1¯2‹Ú9¶T¹€cQå©ÚÁ±D­öõí;7Þ×jñäëêwÞ,óp# ûo“¿Öøz8+Nêšßsf‘ÒÓ»Ruf¤MŸÏõÙñ¬=;Ž-Øß[Ý÷pÝä~Îþ5króŸ9g[{¾ñ½˜ÒõbÏž£«…û¢Ã_×' @€ @€ @€ @€ 0—çòÝ7w @ #"T»ƒãp5µ¡ICæÖ&«Ê»&} truw‘ºšž™s:±iÁ8cÇÍÆyJˇncgG´«x¤¦u'Ì”ó}l:çœÊÉM»³qiÿ1' žߥg¦2?#åòEÊψçó1¬c?±<àëÖ|¼ŸÓ¾wqÅÙ¿fµãÈödí=È?ö™Ò€ãAó @€ @€ @€ @€Ø' àèA @€Ìr\ä'¥âJ£M§{x†[Ù1å{G›ÛÔ+k¯»àµwÇ”öLý¼æÄjï];b7¼ûêâ‚eѾ€ã’%KzN?ó¹ËŠ2_Æÿ;2>;æS>2§ɾÁÄêP7¨v"´ÐWG¬Y-ÌsV”X{ºM¹p<Ä @€ @€ @€ @€ 0 ŽÓ€ì @€©Hµ»¡ÝtÓõ3zWªØ±pQ]+iZÃjûïÙÞ‘|OOÙÜ3RÝT<8×Öçäb7Åc&×ÃøÎNEY>Œ]o_¯EÑwášÿV¦¼qÊÂhŒ0ãW# ýUÎÜ[í^4´iÝ+‡6¯ûÛ2WµßÍņ̃SÖ¬ñÌy&×Z{gòÝ16 @€ @€ @€ @€ 07fô/ºÏ[`– @€‰ ìÚµk¸o嚦TÛ¬—£>¥ÑÞÖ R³ë»-å»"¼UsZ®Öô0‘ær¤8®¨Ùã,ç$à8Ü)>'ž¨ÚÀa[‡PVñ=kþ¿!Û©Ž+à¸bÕš÷ÄßRÿýhm&hûa‘ò×ÂækhüR‘«ÏßOù¥«¯^÷@k=L®ªcÖ¬É1Ìœ³­½3ç^  @€ @€ @€ @€˜£ŽsôÆ›6 Ð9©È\LMŽåÂîpŒÝï®Ë7VÅ‘ÙÁ1u•õÁÊ”gàW*yZŽíÞÁ±wåêþ`pãø_dÜ“R¾!ÎüJìÎø•ªª¾º·(oøÈàº_oyÞøêë«;aͪŸå쨰öÎŽûd” @€ @€ @€ @€ @ “;ùîš 0'bG¸»c7¶ÓšMv^Õ<ÙìÜéh‹ðâÝÍ÷½‹}ñRqütŒåÐkDèĺðeÈn?ô<Ÿg€@šæ‹|bÝN‹]{GZÚÁñuýO‰gþ¯Æ£¡Æ/¤¢º"‚ªö|¹Ïù£ÕæTžP··êhç5;Ö kV³ùͦ6kïlº[ÆJ€ @€ @€ @€ @€:S@À±3ï«Y @€sH E8°nºUw~ZÔ\_WwäÚó]uÁ°œÓsŽÄø",ö£aÜüÒ)»yÖ#$ðÔi½nNÏ­{Tî-‡oneLóºŠ÷Æwâ„Vj#`û‡«á_¼bpãW[ªOQŽÐfjþü§”êòÉ]±3Ö¬ƒ¦4‹?X{gñÍ3t @€ @€ @€ @€ ÐãúEÔŽ˜±I @€:M åÚÀTYäçÍäiÇ®sß«_Š9,]ºtÚÿ:"Ûõãuc+rp¬Ešþ‚ÒzÞy4W^¶lÙQ†}v͵ºfpðÞšš¢±{cÊÅòººF{®ŠßÞºyíK§$Üý‡aìJÙü;GŽïß•°f5™=­ÖÞÙs¯Œ” @€ @€ @€ @€tªÀ´ÿ‚x§Bš @àˆ äâs‘Bº¨éõSú±¦ímlì[9°1v|{aÊù;¹(¾“Sñ"ço ßéªÒw¶m[wç¡—»ëÖïeÑâÓˆ8Õч¶=ö9¥Ç-:­±‹ãW;6Åo T¦Úph®ìà8Å·bÂÝÏ[Ðóü8ùš wÐâ‰ñpž;6ÿ÷U.¾ÞJw±{ãâ»ÐUW›sþ‹¡-ëþ ®n2í9¥šïß#Mñ-ϫ֬ñLw&×Z{gòÝ16 @€ @€ @€ @€ 07Æ·ÓÆÜ01K @€À¬©ŠÏÖ 8ÓpŒ¸ÓOEÚé¬H=¯7•Eú£2•[zzÊÏ—G¥;úV­¹¥èï?(¼µk×®áØñÚºyt÷äÖÕ´³ýø'žòÜèo~]Ÿ{Їn¨«Ñ~dª25ŽÓð*kwúŒ@âöV»7þ§Úº\ܲû¾ÛßZ[7É‚Ø9õŒº.rNãúwe'¬Yu&³¥ÝÚ;[î”q @€ @€ @€ @€ @€Î×/¢v.ƒ™ @€f¯À÷¾}ýc‡ÄÝMgó³Î_µê©MkÚÐØÛ;°(¥âŒæ]寃ƒ#‡ÖTEñéCú96Š»ðÐcSù¹,{šïŒ]*¿þÑÁÁ[¦rúž¸@™óÏLüìÖÏŒPo]õðH±­®fùòåÇF8øiuu9¾sçÎ=uu“iqÿ‚ËKëúï޲fչ̖vkïl¹SÆI€ @€ @€ @€ @€:S@À±3ï«Y @€sHàºë®Û»&þs³)ÇNŠ©;Ï»¤YM;ÚrOzem?ÕAÆjäSuçF˜ðgÎë_]þªë§•ö%K–ô©XS[›ó'kk9”^Õ×wÉ©S9€×õ<%‚€¯jvxv¿½c뺯4«i´õôœpZ];ö4|MKu“(:%Íyœ^»ƒi„-{Æs™ŽY³Æ3é™\kíÉwÇØ @€ @€ @€ @€ @€@Ç 8vü-6A @`.äTÕ†#¹zª-Ê2׆(«TŒ:Ö‘=÷];Q>ÜlŒ fOJolVÓ®¶ÓÏ<ûu±+ß“jû«Š¿¯­Qp$Êb^×ÀTਲh|·šÿÛ*×ïÞØãHwn)à¸çþü­©œÓ¾¾Ëòܯ1¯ÅºÇÊ:aÍzl2¿9lgÚÇ›â]®yF*ž¾ÖÞé³v% @€ @€ @€ @€ @àpæ¿„{x½# @€3P`x$¯Íñj:´T<§·à%Mk&ÑØÛ;°(åâ5M»Èy÷žûF>=ZÍŽ;~;&^>ZÛAÇÊâM½½«O>èX›?,]º´;u¥wÔv›‹{ï¿7שּׂSp„ÒÏ-[¶ì¨©ÄòåËÍ©øÅÚ¾óð¶Úš(HEub+uóæuOé¿å.XµêŒXQZ †¦bÜÇNX³½O9§áCòyÁ!ŸgÄGk A€ @€ @€ @€ @€æ¬À”þRìœU5q @€À4 ìÜð¸dmЮ,Óû–,YÒ3ÃKóÒo)5:E€qçÎ÷uýá‘‘wÕ¶ÿx*Ò¢4¯ü«ýŸ§âç‰'ŸþöèwI]ßU‘?|õÕ먫Ó~db'Î3ûÄß™ŠQt/8þâ™<µYß‘<¾uhËÆÏ6«ÙßVætÇþ÷Í~–=ù'šµO²-6c·6¥âØÖú©ùÞÒI§¬YN-ž³½~>ì}J]g÷÷7_#;izX{§ÇÙU @€ @€ @€ @€ @€Ã7q„ 0+rUýYíÀSñ£§?ý¹o«­gÁŠŸa¨Ú]ìöîÍÙ¬ë+7~56¢üx³šF[\낾U«W×ÕM¤½wå¥ÏKeQ†{tÇÌ÷MäÎ9©ø¯ô_üüv^ùÑQÿ¿Ú>«êEMU[×(Jù–VêrQ¾°•º‰Ôô®Zóë)¥—¶znìÜ:¿ÕÚë:aÍ:h>EÞsàçÑÞ?}O÷ £?ÒǬ½Gú¸> @€ @€ @€ @€˜»Žs÷Þ›9 ÐaÛ7|¢(òõµÓJÅoõö_úšÚº –ö÷SôtoŽòš!ó?ïØºþŸëºÐSí.ŽûúÈåûÛ9FŸËû/}VJåGâmÍ\Õéo¶m^÷ÍÆ;¯™/½î²ìÞ¼|ùªSÚ1ÚÞ•«Ÿ;¢F¿±qߨ¯Â~ù®Û¾÷ž±+nyøþê¦8R†LEïTìÆß©Ÿ‰Àâ<ªæŸb‡ÊãšWŒÞÚ)kÖ³»ç€÷£¾í™ßÝÖí¨™àAkïáœF€ @€ @€ @€ @€LJ@ÀqR|N&@€Ì,YWóXXvu툠Y]m+íÝýº»ÊLE:µ®>Æ÷µ½ÞõkuuÚg–@dŸÙ³`Þ?ôõ]R{›üüþKžAØO©Xܬ.Úª‘áü¦]»v ×Ô=Ö|ÕU—Ý]ä|ícÆxÏÿŸqÖsþxŒæ‰N}«~;¾Sk¬ãê ÇŽ«þ€âNX³˜Î]¼õm.Ë_u³{ÔÆ#|ÐÚ{„o€Ë @€ @€ @€ @€ @€9* à8Go¼i @€)04¸îïbÇÖÎ.¥y±íÜßE é7ÏîïŸW[?JÁŠ WŸ{ââÓ¯/RZ6Jó!‡òå[·¬ÿè!ÇüøPµûâØîÖ1 oè)S¹)šï^¾|ùÄBVýý]+VüRWW÷?D¸ñäÇ»ý]ÎÅž\T]y啎^áèŒHéż®O]°ê’WNdœñ¹¸««+‚°Å“êÎ ìû¯Øºþsuu‡¶W©Øqè±Ñ?—¿Ö×?Ð;z[ëG{{õ­ZóÑxþ?ΚÀ¿óZ¿ÚÁ•²fí›U*n;xv‡Šçæ5‹ŸöWS±ûæáWÿkïøÍœA€ @€ @€ @€ @€LN`Fî1¹)9› 0·~àî_î9úÄŸŠ Í4“ˆÝìÿ-ø?žU.Xó#®~ëÖüÐG‹ÁÁ‘fç4ÚÎ_µê©ÝyÞŸE°±/®QûÊE¾s÷Èî_©-< à£ƒƒ·ôö¯¾8u•ŸˆÃu«²Lé×ÓÂã/ê[µú7ï¿«¼úêõÐÝèo#ØxA¹ð•]EñDZßóG/:ühÎÕݶe×oqd¶Dïô®¢ûš¾•k>\ìþí¡¡ß¯ûŠkÎÎÝù]qî¹uµöxîo¾¯Úý›­ÔZ3¯«xgŸhÛaŸËbKÌãƒÃ»÷üÞŽ›~pX{“±ûé³c÷Õ7ÆŽk⻼¨IiÓ¦”‹“‹ø>µ²~ŒÖQ'¬YyÅÚð¯)ÅŠRûJo8ãÏ=ïôg<÷ÿDé-ñg$¥|\„§Ÿ÷cdhóÚåµ]LQµwŠ`uK€ @€ @€ @€ @€Œ) à8& @€Àìhì,¸|ÅÀÊžžÔØ9®6 AÇgFXqG_±àŽ´j`G•ó'Š*}¤¨n)öŒü0-ìzfY¥³£î9<»ÈéEñsaK:9¤\½qçààí-ÕP´mpýßÇnyï|tW¹ZFuOŽpЇŽ=!¿/ÎûXÄ®ªŠê»y¤¸¹z¨º­ká¼'ä4rJ™ÓS"ùŠ˜Ç1'ŽÞÛèGcWÉ«¶mYÿ磷::Û"Ø÷úÜÓµ¦oåÀÿÝ·D*ñk#Eþ÷½Åž»æ—óŸTTùÔxî,¥ò¢xVžÏXkS|ä¹ó5ƒƒ÷¶vÂÁUW®»)ºšŠò¿Ürø§GƒÊÿ¹gÁ¼Õ±“é{ŠªØ±§ØýFPíÐê×¾öâºæw?µ«;-‰™ü\Ìÿ?Zsèçp‰Ç>}?jŸzhÛcŸ#Õw^±à)WÅMÇ›NY³Âôk-O;Öž¨_õx}<]q ¸‡»;^wÝu{o›ÞwÖÞéõv5 @€ @€ @€ @€ 0×çú`þ @€@G ìØºî+}ý±bº,&Xrl D¼æ¤øñ†ÿ½¡ˆMÈÊÆÿYøèndî¡!œV^`TD£~.v;ÜÞJýh5C›ÖýŸ-Ž×›GkõXJ bˆ½1£Þ®Ææ)½¿2>ìÿþŸû›j~FÊëc{~¸wuM™æ'~ÂXÊg«ñ$¼4~¼´QÓxÔ{öçw}üÇ[媮Åsÿ‘ÖÏ9¼ò®‘‡þðÄ®…O9¼u”#ñÜÇØßÏûÛÄøW¬ØóþNì$yk|·ŸѹÓ"AwÌ(gŽy(ν£¨ª×eyjôõþ1 £¡+Eðy‚ÇF¿°f mÞð¥WßñèZÚ˜Ö¸_ñvŸzæœÇÆ}rO°ö¶SW @€ @€ @€ @€ @€@SU½i¡F @€Ù%04¸n[5’_±/¤t†!Ÿ_ܶeÝÚI^ºÚ¼î#)ù–ØÚldºæG’IIDAT’}Mìô¸n¾chÓÚs¯ºê²ËyÍ*œÞÏÎÕÓ1æxNnιzé¶Íë&nlŒu×ààýE9/BÂ?œÐØ#ðaÞçD`îåŸã7æüÏÜpUQŒ|¹n e*^YWS×ÞkVd[sÓ hA£½«èzN+uS\cíb`Ý @€ @€ @€ @€ @€8z @€,°mpÝg‡GªGÀë[Ó6Í\Ü[åꢭ›×þe»®!Ç÷VUõºÝ×®>[é§X«ªüÊØÍì£>6qôšm‘8{èÕî âîýŸ){.¾:R<ü“±‹ßÚuF_)ç¾èoo»úl¡Ÿxä‹ßªv¿â#ƒëþ½QÿÝåWãGP6{åI½Ïö5k÷½wüA¶cêÕW¯ B{Ÿ>´þÀϱSäÓÏ_uéOxl¢ïgóšµsçÎ=#ÕÞKÂòÖ‰Î?iÆs°öNôN: @€ @€ @€ @€hE@À±%5 @€Y.°mÛº;‡6¯}ÓpU=7v%ÜÖöéä|Sì°ø+CÕƒ/Û¾iÓwÛÞÿ£nÛ¼î›[7¯» BX/ŽœÚ'Û~œŽ~7W#Õ+cç²s¶m[[Û¯¡Ã#"Ð9Æ=}s•ªWDøìÛmDìÚXTÕk‡6­}ÝΧlwÑÆ®†»ï~^l"º!žÏ~›_9_ÝøNÅ<– mÞøùÑzí?4Úñuå®ß8ðódÞÏæ5kûàe_~pÏ7ß1H3,àØ˜ƒµw"wÒ9 @€ @€ @€ @€ @€@+Ý­©!@€è Øí1“¾¾•—¼¨(º~.Ų"¥§Ltv‘µúr‘ò»ïºõ{—ïÚµkJw‡¿bÅ…¯*ÊâÒH|¡ 'X3®÷9#æò7{w¯½òÊËï×¹Šg•ÀöMvsÎê;ö„ôæ¢H¯/R1¾Ýòrnìlxu®Š ®Š÷‘ý›ú×£ÊÕËû/ýƒî²ü¥øîÆØÓ1¾r„’ã{sõHª>tÅæ Ÿ©ëçîêÁÁE] ÞfGU›R±|ÅŠ‹ÏÚºõ²«f¼ÇgëšµcǦ,Y²äå§Ÿuvì~yIÌûœØå²¥Ç}yjÔ7þLj¦åÙÏ=±öŽGK- @€ @€ @€ @€ Њ@ü^¬ @€À\X¾bàÇzº‹sÃàåözJ‘ÓÉñsQ|>x·ï\Ü;ÈÝ’‹ôÙà|²J{>yŦMß›)v½+/}^‘Êפ\¼<ÂŽ§äTœœr>)B`]1ßó»1æqc¿±ÈùÆáT]{Ŧ ÿ÷ñïf‹@ï…«¡,Ë¿l6Þ*oضyí‡Æª‰°ï #w¶&ž‰Ÿˆ Ú©Q·ø g&9¥¶Õæk‹½ÕŽ¡¡ 7Õßt_¾|ù±å‚ã_RæâÅE™"°œŸÏþ¢ƒÆƒ‰àîp|ŸoøÝßÛïÆ<®‰\¯~488]Ãmëufãšµ¬¿ÿ‰ Š/Ke±8§ü¤"—'Êq)U7ÇN°éM¹*oÜSÌ®eËŸ$¥6ZO5­öo^¿v[£}&*àœ¨œã @€ @€ @€³O mö-ÙŠ  @€ @€ @`f ¤ÏKÓ¤»Ñšâ_ûÛýŽô 0cò$=7KÓW6ZP–g×Fÿ¶F5úLTÀ=8Q9Ç @€ @€ @€fŸ@6û–lÅ @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,pœ…Ý’  @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,pœ…Ý’  @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,pœ…Ý’  @€ @€ @€ @€ @€ @€ @€Àáp<ÜWÀù  @€ @€ @€ @€ @€ @€ @€À,h›…k¶d @€ @€ @€3V ¯V¿—Ò¿k´À¼š~µQ¿> @€ @€ @€ ÇC¡ì @€ @€ @€‘ÀæþË¿§ªýñ"@€ @€ @€ @€À”ȦôìLŽ @€ @€ @€ @€ @€ @€˜‘Ž3ò²Z @€ @€ @€ @€ @€ @€˜ÚŽSûú˜ @€ @€ @€ @€ @€ @€˜‘Ž3ò²Z @€ @€ @€ @€ @€ @€˜ÚŽSûú˜ @€ @€ @€ @€ @€ @€˜‘Ž3ò²Z @€ @€ @€ @€ @€ @€˜ÚŽSûú˜ @€ @€Ìt†-uºÿb*]©4—ɺM§ûý2Òe&­eäºÕû™r¿»Õ3ýÎ3Sîñé'oÆ @€ @€ @€À”h›’³2) @€ @€ 0,X0÷¸ûŸ±8Mó'FºáIš<0Í“æIzjš&Yžä7Fû IÒôº¤ZýÄ/ò»?uMÿŽ©È± \>긬ãi¥$=3Éó3ò4=#Ió3b®ñ99%Örw´Ýœ&ùÍy’ü&~~­R;”g;¯¹rýúŸN¥5-Y²ä¸lî1¯Œy?5æ}nÌ÷̸>µkqm̳öç»éðð'.ÿÁ$Í;]ܽr~{[raždçÆ¹ï^''yzÿ$ÉOŠ{d8Îû«¸wnŒ÷7ÞsT¯Ivå\ûËIšSˆíî¾ô¬¼½ôÌXÏcã^¿_ü<1î•b'ÆINˆûý¨¸‡väirKš§µûåæ0¸%ÚbÍé—vW*ÿ1Ô¿ö{-›ÐA´hÑ%÷›ÓÑö„jš=1KòÇÇ\˜çéñqÍŽ¯­%Ï“»â¿ëõ›xÿÛ$IßïŸÆº>s×Íùg¯ºjÍ9…i}ø zf]Ë–?!ϳ'dYúø¸ÖŒ{áÄx®wÏ=Üêçq_ÿ<îõŸÇwù+;ª;ú·ö÷ÿzZ_@“oF`F?Ó›PC€ @€ @€hV ~GïE€ @€ @€¦¯@ײUŽÀTwÁ ¶¬_ý¸‚šºÝK–®º¤”æoªW¡¥w nè{wþ¬kiï3"ô²òžy¦ÇÔ©³ùÞ ÔUÕ<Øœï¸"é﯌Yx#ˆ÷¸ööô¥عdOˆgçÎóü»ñKŠ­ì¼mõæÍ›o™ÀÒ¹t嫳4}Õ÷6T“ü ›6¬Y?²ÿ9ÏYyä‘Çg‘¥yWxmâ2$›’dømƒÖ}eä8y!¯¶ãO>mqš¤ÏÇ ÃãäñŽŽ‘!M¿á¹ÄÛ ±¾ïŽwŒÉ¨¿°\>刴ã¸ç„ë‚øyÚÁž'¾gÎÿ£šW‡n¾ñg¶mÛV }²×E]züÜ£Ú{b½±ž'ĉwÇwçKIRýÔîátóÐ@ß7b¬1í*¯Zœfù?ŽÙyoc5IÿvÓ†Õ}jF÷yÞ#²¸¼â¡m¥tUÜÛ+ãûû ÑN>Ç÷t8¾ïŸŠëùM7ü´?îã»G×ׯoϲŒnßû9BÎ7nXý„½Ÿ§âÏÎe+–giim£¹…Å«üÝÙèÐ1û–”/}t©Ô¶6¾_GŒY0²1Mî®TV\Ù¿î[#›æýT¦/îî}L„è7­q÷p~ÉÐÀš¯Õµ²¿«gÕGÒ<h½1ãž¿í¦¯{â¡~î×›v @€ @€ @ µvpl­§Ñ @€ @€ @` ¤IõØ$Íή¿´¼¶ ݯ‹»W>±Ô–öEíÜ:›lˆ Õ¼(]RJÓ%ÝIÇŸ'KWþéÀÆ5Ÿhòð–•-\¸ü˜yÇ´-]÷þ B=Ù3ðAü3Šar^ŒñÏÙǾµkY;oû«ƒ:¦Yì ˜Ô½N±«Ú±#A/î9µ}^Z ÅzšZLä'kaڶޫ+Iõ W®_ûÅ‘c6û¾sÙÊ®FýÝÁܵsÅñµ‰Ï±æÇÛ7Dà÷²á;ÿzhhý/šK+뺺–?(i/½.fö1³âÑ8N~O4]¡©¥'œrÆ›ºzV¾õº|§oûöí»Ç1̸K–Ë'udçÿý¸Mæ4u§4>K{Œó´$ÉžÖÖ–ÿM÷²ÞMÉpú׫¿Ýø°æ{ó¬rtš–ê~j#E˜h¿ïC3£Ïögaí™ÑÖqÄ[âÞî ¯¬³Ñ5ñ=­ý~öyqøóâ>~C×ÒU/ܸú³#ë"È;'¾Ü ®_»‚zˆðÞ£²RÁÑÚî°#{ÆzŸß™W“E­ 7N‡gz„©¿Ï›Øt9}ðX*{ÛÚÚöÜ߇,àxqyùÃã’]óÚ;…æÉ¿ 7È¢… @€ @€Ì ýâm¦,Þ: @€ @€ @€ÀdÔvqŠ]ßáÆÏG˜eÂáÆç–žŸdÙÇ# ð±ZÐæÀþÉi‰0ߥóŽ)ý³ûÄ>M³gzl@›¨p¥i²¬öÝ-¬kQA[Zª†¾*ieuà @€ @€ @€À´hÉ/ߦµ€É @€ @€ @€ ,*¯|ð‰§œþù4Kÿ&²K“HÓ…ÎûLW׊´pê µ'œ´lÕ?Ehd]„°&5,AŸS#·9Brï‰Löï/Òl®ëóˆÝdCž';wÝYÙÐdùž²Zè°-)}!,Ÿ2žãÆ[¬yqÿ½á„¬ã?»º.=c¼Ç·¾{éÊ Kù׆ç+';Ø8znqož‘•Ò«»Ê½/ÝwŸÓþÓžPoºÿΟ9n½ÃkáÔåY2ç˱›Ù#ëM§ö™ô,¬}wk÷Bí»Õêkß›xü%/ï8¶íëµ p«ÇŸéãíÙý/MkáÆû­5žÛ·T*Éï n\÷…¢Úfú§ã3=ß]YÇ«ÑúÂòþ'Þÿ´ç4ªia_–gÉòFãÅt»%®Q> @€ @€ @`z Lö 0½uÌž @€ @€ŒC ³³÷Äö,ýdì øÄq6¡ÒÈÄœ›Ì)]}a¹|Ê„(8¨³såýO8åôOEôæO J[ÚëzYËú’r¹ÔÒG !Ê?ŠáÂMã~‰¤¡~ôò››9pþüùí]ËV®®…›©oUMXž—¶·©kيǶjÌÑãtõ¬\™§é•“ü}®ŸçF$vcç²U¿ß ¦ù®¸÷â¼üPßûµ Ö‚EmYÛ¶ÅÝ+×ü„§^å zfµÝÅw7ž)gµås®~~¹÷SïŠNÍuw_ò°R©íêÚ÷¦h†y’ÿvx8¿`sßÕõOçgúààå×…×ÕEkL²tEaM –ô,fÌçA ‡J“Õ ûu @€ @€ @€Ó^@ÀqÚ_B @€ @€ @€© p~¹<'›“nŠPÙCÕ|"ó°¹¥Žu­>_ײåOH·Gèà­»™ñ"X¶¼»ÔqyÔÆ[ûêî¾ôœôm;j^­4¸xv¹|ìçœÿñ4Ízöœ:>MNI“ì?j»,Nèøuö¬zM’§«ãžŸœJœ{tWÌ!n›ä‹÷œ:ºo¼Ÿ»JƒõŒ÷¸–Õ§É ímÙ§:—­<¯ecÂfʳ°¶Žîe«ÖÝË_šž='K®N³¶IÝ÷­gOÔ¹¬÷ܼ½=ÂÉÉE§‰pã¯*•Ê3‡ú¾^T[Ô?žéÕ¤rYÑ:ó<½xA¹<©;7׿Û76ü»qÏn“»†ûŠæ«Ÿ @€ @€˜Þ‡ýÎÓ›Ïì  @€ @€ @€À=Íæ~ âxOk£ÿÁþw"”ñ‹ÒĹ K–ôôœ™&¥ÏD¶ðÈÂâ ¹.É“¡X×÷cE7äIrCìªw\šçOÒì¼$Ís­íJ×ä?¾˜.??¸¡ïÝ#NsÐoó¶¶㎃(ÖvãM¿þÙ'ŠÆ¨¤ŽÉæ}2L_T;²?ÆÿEšäÏód{RM~U)UoŒ1JIµô€,©ž÷Ë£‚qq„ðŽy\Ý÷izTŒyeg¹÷é›úû®©[7ŽŽ®òŠ‹bNïˆ0›>*î‘_Æ}þå$îÿ˜ÏÒ<¹-Oªwdi6\MócbMÇfI~rž&ócÐÇÇýrFÓƒGaLåè¶ysÞoËã9ndí’òªù1¿¿ãæ^y~}¬å?b]ßÍÓê³{×T;¸š´7û™q;+†\Ííµ¾ÂWš›æY-ˆ»¸°vŠÌgaúÐÒ¼+ƒöyãáïìOã9Çå?Šûâçµû!ð§¤YòÀx>>*îÑç=[ãùtn|"àÝìM8žÎŒÚÅåg‡ÓgB¨0×áéîÝϺràŠkvõ3å™~Cuçà©¥ŽÛâ‹çîØ¯¸WçŸÍëŠÞI >ç9+ŒëØ=ö öµn«í:¹ï“7 @€ @€ @€ÀŒpœ‘—Õ¢ @€ @€ @à äé‹â?Ò?­Þ9÷ì@”&›ÓjuÝùÎÏ|¢¿ÿ¦±j.\~ÌÜ£³fiúÊD=t¬š±Ú"äñö¤\Þšô÷WÆêO[–ñ¾¨o6ÜX Ï¿W*ÃïÝÜù7êœg_ð²sي߉ ßÙìα®·Å.vŸÚ´aÍwëŒ=®æïùázÐ;Æõ\·m۶ᢓŸ›u¼=B"M‡#µyx¸ú–¡5_-{þüùí§}þ³KIú¿#µ ¨>î϶¤”_qÑE—>棽üæ¢úFýµOR*ýS£š½}{îý$_9Þÿ7¸qÝ£=òFͽº»WŸ·å/‹û¥7Öxl3GEí w÷>*vjûf3õ£jÒR)_¡ŸÂߟEXs(É+oܰî+£Æùñ #?tu]zFÒÞþ—^}a܇sFöõ>îEJ}r«B©c£åm3äYعlÕ ãyÑ|¸1ϯÌóÊ?Ýã‹-š×~äñ‹âû›ÿaõüã>>±^_­=Íã¯Ø…÷¬¼”ÕÂ…»µÖ§ÃÕÊCWü°\3å™~MÿŽ®Ø46r‰þÑ?iÇ£NH"@ÙøSˆâCæ¨ @€ @€˜…¿ Ë´  @€ @€ 0y¨nLòM•jåõWö¯ûNÑ ¶n];*%ïŠ?ïîêY¹*ɳ÷ÇØGáµó:³Ž…›’d_˜°è˜±ú#€¸¼éPOž1vß{Õà†µ_k¬±Ú6mXû_Ѿ «Üû‚$KßÑÈmÏñ±Óby>ïŸ<ÖxãnKÓØ=­Þ+¿9vž|_žæÛ«Õ굕¤tk{’ŸÁºs³,{lìê·"BiGÕŽ® '«ë²·½«¼ª¶ÃâïýÜðgž|+O†_\–ÛoˆíÛ·ïŽ?[£qk„±z³$ù¿1דö+õ!BSgqtÛ¿Gs„J&þ:·Ôñš¸OÎ)!Ïû¾`<÷ÈÈ1V;>¿úÂrù­sKóþ-ÎyÑÈþzïÛJÉ ¢ï›õúëµw–W>3Â6ç×믵×›¸yqì˜zY£º±úîÝ…ìe‹Ê½o™“Å=ÔL0µ”ü]Œµ`¬ñ¦b[£ït„B§Å³ðÙåò±išÿ}Ü …ı¦ŸÅ ñGq?Ôv{,|mÙ²å®(Ú!áMËæ¾6v·}}|ž[x ‚=µ†“¼-Âéƒ IâùSMw]0Ô¿þ'…µM̼gúp<ÃÚóü‚®®\ûË&ˆÆ_’ï ¯×?.Ïï¸óæ| ~ @€ @€ @`¦Ä﻽ @€ @€ @€V Dè守BýÁàú¾®f£Ο®_ó¡JZ¹ 4¿Õ7æÇˆâ\0°~õ+n­îxPlWùÚ˜÷PÑî€÷ôœ–fIs¸<ümõ®§Œ'Ü8ZbÓ†Õ}ùîÊïÄZ wºŒ`PgçÒ•8zŒf?/^Üsj–'µPVÃWÌåû»wìú݉Þ##ÿXÿ ƒëWשïÙ^÷}º'àX·»^Gš¥/®×·¯=Ïÿ<¼›»¶ûÚÿÍ–þ¾ë+;oí¬íßs৸^ψëŠÃ¤:eZ¦Û³ðè¬ãÿ„ûý‹óoß]ÙñøfÃ#Çûvÿ® kÞR­ä„Ï-#û¼[ ‚Á§gùŸ‰ ùécWÜך'Éâ™øôÍë[nœ‰Ïôø;çKátí}jc¼KÓRÚž]2FÏA7=¿ÜûÀøÇ.h4P̯ÿª«ÖÜÙ¨F @€ @€ 03gÆu´  @€ @€˜ZwW†« nèû·ƒ™Ö•ë×~1©Vº"ÿãWrj°â-Çê “ÎÉßQ´`íИÊ{Ö÷õnÛ¶m¸ÎPM5G(èûÕ$_ÑÔÚÒäµM :þ¢j¬yÕÀƾ?ß¼ysÃѧúûoÝ´~õÿÀja´”ñ®÷„¢éÄUýú^°­¿ÿŽ¢Ú¢þÚ[Ã;vG4¯ízØð•eé[.\~Lâ:íí/Ù»“e’ÚM²«Z^64´þuk&Ðq׭ï‹/BᘵM#xÖxN±hÑ¢yq\Ã-ãzmܸæíã·^mí~K‡‡Ÿ!ß›êÕìmÏÛÚ†€öÖMÑŸÓêYعlå#â!ÚD8ÿú®;w/¨…oÆ}Sß5ÃÃrLòßÌ83ýØZÀ0v=Ý÷3‹Ö§|7ÙUyÆàຟÕ6Û?SŸéqß}¨Ð MVÖL  ½´g܆ÿ­B^ÍWO`h‡ @€ @€ @€ÓP á/ ¦ázL™ @€ @€vj^yÉ•k¾ÜЉ n\÷…gmáXirÊâî•ó ëÆ(è,÷þnšf½ctí×á²OFhóUÑX¸ÜïÀ:"äø‘¾±N÷¾æ8Ùó"xtÞ¾†½©V«/Àâå-nÏ0µÝö"„³¸hÌ–\}Ó×Õ‚T-±¬oh芫;ka©äÆÆçO™wLé%kÆîÍ“¬0àAÀ·mî¿üc0ñÖ­[×Ý–V+¯nf„<ËÆu¿´Í;®ˆœÛxì|¨qÿøz.ÿA5¯þŸ¢£"¼ü¬¢š©Ú?Ýž…aý®º¶5ôŒPjDx/ڲ劖„cGدW’jí{›Äz¨íôWÊçÔvn|ðè¾?çßÌwå jïû&Ö2“Ÿé•»v­‰@z¥±Lú˜‹ËËÞ¸fü½Yž6üÿñ÷ØÿÄ.ÊŸÿÈŽ @€ @€ @€¦£€€ãt¼jæL€ @€ @€SX _½iÃÚu­œàðŽ]ÞÌN‡¥¶ì‘9ošïСµÛwWò …âÍ)†o޵}¿QMŽ"w”†q@_žlÛ´qÍh?ȆØmïObˆ†¿‰àÆ/vÞ>ü‚ƒÝs¬©nÚ´æW|ùX}#Ûò4yuR.—F¶½_Tî==®ÄcÖåù;ïþ§†5Ñ9°qí@Ñý²gøRzÊxN“åÉéEõ±ÝçgŠjÆÛŸW‹¾Ûyš?3ÆMÇ;öᯟ^Ï®eËŸššuÃW\Wµ2@W;Ù=»õæÿÐðij°³«kÅæ”ÒÏÄuyHËß^Ý™\P{6QÛtÉL~¦×vÙ¿>Q„Q*e-ÝűkÙŠÇÆ­qh²š÷żbz^ @€ @€ @€Àlhø öÙ` @€ @€ @ •ÕÊ?¶r¼ÚXµBü¸¶hÜ,©žVT3F-<¸hŒöýš"]õº-ý}×ïרš‘µÛdh8Zìöô{ ÆÙY­VÞ:ÎC ËŸ[.Ÿ9´æÕêß|ô£—ß\T7ÑþnŽXȧýŒ%é¼îF5£ûædÅ;SƽôÁÉ\[mNq/6 Äî™w59qôü}Ž­ó ŽŽŸhßàຟł¾2úøZè1vùüYüïgãZn¹è¢K]3Õ?O»gaš†¨ã¢liõ®¯{¯ã]·ÿú‘çúöÞϳýç…åò)i{ö™ø¾ŸSl‘¹r÷­ÏÞ´©ï·ÅµÍWÌôgú‰jrY±HziÔÄ¥hÍ+vn¼{c<øvçÅÿ¿ 5³1  @€ @€ 0§ÂU0 @€ @€˜±+à•ýë¾5I‹ùBѸy’Þ¯¨ftÿâî•ó#µpêèöý?ç7ÿöÆëšAìT³ŸòÝ•µ{ò\ ˆmœ~÷ÉårGƒ’¦»â\ÿµ©í'›> ÉÂ#K/ç5*u\»)¿ûƒjZÑ—W“w“eù«ŠjFöÇÜŸ?òóXʆ±Ú[ÙaÄ—§Ù‘E5ûõ§ÙÎý>õ!Ož0VóÁ¶ÅýøOñçÝIžÿI5ÏUóêÃoºñúy¤;mpCß3âÏ‹&;4z°k8àøiø,LóìâÖ1ª!¯æoÕÔ²[·nÝYÍÓI¿e=uv®¼ÿܬãêL?´ðtyò¹ÝwÝò{›7o¾¥°vœ3ý™^ãø^²c(ÂÔ ƒ¡µ@|×ÒUO'ߘå ,hË’=É1û÷4¦Ég·ô¯ùqý= @€ @€ @€ÀLpœiWÔz @€ @€ @à° D8éC“uò/~±xìüØâšý+ÚÚ“âPOž¬ß¶mÛÝûÙºOƒƒ—_£m;`Ä<¿#vLûZ„¿.O“ê[Oîh<àø: iž´<ÜX;UE;ëœr_sìÞø¾¤¿¿²¯a’Þ ö¯þH„V~Ùhøpxò‚rù¨F5#û«;.ª$;\I†Ÿí/‹kóö8LJc×¹¯ÇŸÛb§Á®\¿öK#™”÷yþ?Eãfy>¾{¥Z-3ÍßP ç{¼ý›6¬Y!ÆWlè{ç¦ }‰ÏßÌïÛxç7‘úéö,\\^qv|Þh­{ÂÉý}ŸoTs°}7TïêïU˃z;¯CyüÂrù¤ìˆôê4MÏ+:o<®¾ýæê¡¡¡Û‹j'Ò?ÓŸé5“o÷÷ïŠg÷åE>i–¯(ªi¦ÿø“O^|×Nj\;yÿ_ªñyõ @€ @€ @€‡K å¿„=\ q^ @€ @€ pØÒü?'k®ùyáØi:¾PW ;3-.7©\V\sÕê?æYößfü^žU¯Ív%ß\W[s,½µ¯jš_ÓÚ“ä9ÏYyd“;86| ç鿆­ë¬ÆT>ÃÕ¥¤ié¸Òœ'FM­®øÁ̘üO¢°öç€cjÑ,Nò+M ȱøöñÌ¢ZÚý?YrDÃCj«N>}íüùóWnß¾}wÃâÙÞ9Íž…íYVøLóü“}Y¯éïßÑݳª/ÎóêÉ>×T¿³³÷Ä,KkÏ–ó‹æ1|ü—•]×\Õ¿£¨v"ý³â™~/ÌðprY{{R´£ï .\øªÚN£ñÜ{LìÞØ»÷ýØ?ó;oªÜÁy/ @€ @€ @`6 ØÁq6]mk%@€ @€ @€ÉˆÝcçµïMÖ ²dø¶¢±cG¾¹E5#û—ôôœÇGl;à}ìÊ7¸aÝWhoqÃ`ÿÚÖv±Û´qÍ¿l^¿îÓnüYœ¢åáÆÚ´ÓÝ­8ylòÄÀýÃ’Û·ô÷]ßbººÃÅîf„G—òì©£Û&úùª«ÖÜ9ÑcÇs\žV Mî×ïÀ®\¿>´± eÁ+®ñ²3Î9ÿê%åÞ'”ÎÞîiø,ŒMáN¶ÃÕÊÇÅE­Vªƒ‡â4Ð÷õøÇþ«±Uz|ÇQ']ظ¦qï’%KŽ‹ïZQ˜øÃÛúûïh<’^ @€ @€ @`¦ Œë—»3mñÖC€ @€ @€Z%ÿÑþ×b¬IÛ½®RM ÃWqþÒxÖ“%íÏ*®Ï¿^\3}*"ó“ÁÁµ¿lõŒÓRZŒÉW[}ÞFãÅZ›8_ñ¼ã°ôå¥9Mœw\ß…¯Z͓˚·¶ëéï–Jé—»–õ~tÉÒÞµ`V3ÇÍ–šéö,|v¹|lb ¾¿ùWö¯»öP\ÃÊÎ[¿ÏŠ`œ=¯‹.ºôø#³yŸŒ/×£‹VÁí_÷Ãÿî>Ø‹Î3Ûžéñ(|þ¥Yƒ‹@£¿4÷Ø¥±Ëqírój²º‰¡” @€ @€ @€3L@Àq†]PË!@€ @€ @€Ã#ÿÑþ'óÌ¥JëŽI5}PÑœó4ÿFQÍ´êO''d»gþn‘Cž¦“zŒ>ÿ Õ»T”Ê“ôÉqÜ”ÿ}ÑüùóÛ;Ë+/èîYùŽ,Mß3z­­øïŽqš)ÇnŽ–²tÍQYǯº–­ú|÷²•µ¤¼j~ŒY¡ÙûšnÏ£³#ÎLÒ´a 6Ò†µ wÓ÷ÆÁ\ý¡¡¡Ûc>“¶ðÁÌm2Ž­LçÝvUÜ7-?¿íŽ›ònß¾}wqíÁU̶gú®»v¯ ±†®ñŒ¼¨¶ ãDe#¶ÛÛèØ=ÿÁƾmjô @€ @€ @€3S`ÊÿÂzf²[ @€ @€Ì<üÖÉ\SµzçEãG€n|Áª4¹á˜ùž`OQÙ´éÝ÷nœŒÉæM„s²jå¯éïßA©†»UÆsôâÅ—œ4&1f¶¸¼âìî¥+º»—öþC-½ž…YžžRt-"xµ½¨¦•ýi29AìVα%cåé±Ç”:®ŠLðãš/=æèãÒ74W{pU³í™¾eË¿)zþÅsîˆtÎÑå‰Èvw_zÖžçdƒƒ#HÜݳj÷Òº @€ @€ @€À¬h›U«µX @€ @€ 0I±³Ñm“4ôžawî<²:on«Ïž\ØÕ³êý…u­,Èó#äØpÄRG[mî“ültâE‹.¹_inûY1¿‡”ÒêYñzhÎp×yÑÖ±÷ØÆ³ß[ÕšŸ»wìze{ÇœÇÅù8Ñã^¸_{Iü¼¤}ÞI÷²UߊÄÎUižüÚ|Çg¿Ýß¿k¢cO‡ã¦Û³°š¤§þ‹°iÒ0(Üêë’W“§…“jõYÃxiòúøž1®3§ÉŸ]ܽrÓ•k¾<®ãÆW<+Ÿé±EéeqÛu5¢ŠGaEô QÍX}y©me£gym·álxxõXÇj#@€ @€ @€f¾€€ãÌ¿ÆVH€ @€ @€‡@ Í“I 8NÆ"lP¸ƒãpurƒ›“±®FcæIvS£þ‰ô-Y²äØ8®‰8Rz|£€ÇDÎ]xLA¸±v|5Í/g‚ Ê壎O:¤É¹i–ŸæÙÙIšŸçéÙ1µ£ï6‹’½¯ûÞím9T?‡†Öÿbqwï¢ööüs¶<²%çM“GÆŠ¡É?}h>ïö‡-ëýdžæ[vTîþèÖþþ_·äShéö,ÌÒä…|ÕÉÝ•òÀó×Îwø¾ÎgrZâ0¾pcm±cj©-¹lÁ‚ݶmÛÝ“1³ÙúL¿ùÆë?~âÉgÜ·^£]MŸÖÕu郃—_7.û4YÙ¨>‚®Ÿ¸üÕè#@€ @€ @€f®€€ã̽¶VF€ @€ @€‡P vh›vÇ<ÉOŽPAC¥¶á™pL'aÇJûÑÇ—*NíÎR%iYÀqþüùígœõˆ…I–/‰ðÒ"t^¬þÞðgÜk{n·¸ëßv‡lh ïë]åU—F s}LtßN’­˜T¬;BiWtÍË:*±»ã¶øöç»’oÚÔ÷ÛVœãp1Ýž…±k]áŽivˆŸƒiG¯z±‹àyÇŸ|ú›¢ÿ׫9˜öÙúLÀèpç²Þ57m=¿°Oó¶öK£ÿïêÕŒnïZºü©qØCF·ü\Íóüì= @€ @€ 0»šø×„gˆÕ @€ @€ @€‰äIZ™Èq‡÷˜´pÇë¯ÿö´ n64­´~ÇR’ÐðœS½³”ôü;—­<¯kYᅵqÎù¿LKÉ•fùýòKŸ–¿‹ì_=TIÒGæyþ™I»|±]>ŸVïÏæ$¿èîéÝеlù“&í|‡hàéö,Œ¬m£ÝêîQ«Tià0B¯‡ô|‡èÖhéiâÁò¿&ëû2›ŸéÕjåCE*ÂßË‹jöëOÛz÷û<êCž'wUî¾¥T³ @€ @€ @€À,˜–¿TžE×ÇR  @€ @€ @€À¤,\¸ðˆØMÁïÞ¾}ûiÕ'•]­žp)Í[¶b«çÖÌxyuâ;8.,—OŠ`ã{cGÂoFPï•ñóÄfÎ9j6oXý£Á }DÈñ"pö›IsšÎ‰@èÒ$)}±kÙÊÕ–ËÅ¡»IÐì<îÙ“‹V[ÍJ‡ô9˜åÉEsšÉýñ}»¦p}{Âm—-X°`naí8 fó3ýÊþuß ÿÿlL–ž¿¸»÷1kîé­ýx϶F¯|`hhèöFú @€ @€ @€™- à8³¯¯Õ @€ @€ @€1ªGuä˜##Ù5ò£÷c „ÒÑc÷L“Ö,öœÀkÉÒU—Ì+uü0‚¯ˆ?m¢ùCò¼¡›«kaÃjž¬jþÀƒ¯Œã¿ÝtÃõ§U«ù › ^Ä)Ã1^YoG©ã{Ý=«^zC9´I>ß=²môûHb÷$åritûèÏY’öŽnÛïsž_¿yýº«÷kó @€ @€˜uþ£„YwÉ-˜ @€ @€$I¶†»–­º=‚ w&;òÈSký‡Óä=]=½·F°òòøä™~ê€c–ÚÀa|f_À1¯¾q`ãš7í½Ù¶ô÷]ß¹tå_ÄŽ¦ïÙÛVçgÖ–g|r¹ü˜kúûwÔ©Wól{¦ÆÙ¼yó-Ý=½›âyvÉè¾½Ÿã=µ39â‚(úäÞ¶Ñ?–Ë'Åß}{œÙ-@IDATϋں¯<Íû¢s’‚âuO«ƒ @€ @€˜bŽS삘 @€ @€8Ti’Gp1mpÌæµ 8\ÝÕôæ9óµøÆÜÁþ¾È‚LïW粕åXA„ÇÿŠ ãÎ4Í¿G~3<¾Y­V¿µ;ɾó‘þ¾Ÿo´|Îøê'¯úcýý7Äè¬ý©ívzüI§?>-Õ‚ŽÉ³Ò¿3ÖzŠÚj¡RÖöÚ¨ûý¢ZýH“ß•§gÖ´° îïÓZ8Ü”*vl­íÜ÷G›6ô½·Î$ótx×Kò¶9߈ãujö4giú']K—n\÷…FuÍôÍ–gz#‹Mýk?áÒŸ†{ƒ{1ë\¸páK·nݺs¬±"¹|¬ö½mqõ¿0Ô¿ö‡{?ûI€ @€ @€Ì^lö.ÝÊ  @€ @€ @€ÀìH“4vplüª¶ån\¡÷Sýý·%yÞ8¸—¦¥ãN9åAÓY+v©|wìÙu|SkÈ“Ï W†5¸aõü kÞ4¸aí×ZnÜsî­ïŸæW{h+³jõ{EgŒG'N-·Aÿcô͘®ØåôíñݨnܳÎ+®ôß6±è,IK|r¹ÜÑDmÃ’ÙòLoˆP{F¥ÕÕjâ¹tô¼£ï÷¬±j/î95žw«ï¾¶üC÷½÷Ž @€ @€˜Í‡ý—»³ßÚ  @€ @€ @€ÀaHó_?Ky°§hJS±?ÏÓôÆ¢‰¥•¶‡ÕLÕþÚîFZÜÌüòjò† «Ÿ~eÿºo5S?Þšæcg°)÷;°ÁÁu? —÷ ®_ýœÝw팰cõ1Ï[š\{Þ^úÃ&k•CàŽüî/Ý»“`ý£òäüùóç·×/h]Ïùåòœ†ßº§îHyšü¼™Ùýä‡ßy{\£ÿ*ª`ó¹Èæ½µ¨®‰þÿLo ©æÙ‡ ë²tÉX5msÅõˆÍHë¼ò|ÇŽÛ*ëôj&@€ @€ @€f™À”ûåî,ó·\ @€ @€ pøòäË…'OÓGÖ´¨ kYﺮžUßï^Öû‰xÿþΞÞ×u.[Y^ܽòq½'¶è4“4L^h™f¥gOÒÉ'}ØØ½ñEIìBYt¢!½gpãê7ÕL„I w‘Œ÷êkæä-:vhhý/j;[&»w?:BŽ?mfØÈ Ío¦NÍø>ÑßSqmãÒtÎÏ<ÿ¼†5-ê|Hµãüø®ÍiÑp3b˜íÛ·ïÎ_R¸Sn¬6"u¯¾¸gÅÓ~á3û™ÞŒÏæ «ÏôÏ6ªÍ“´|ë¿9hˆã·n]w[£±õ @€ @€ @€³G`¬_6ÌžÕ[) @€ @€˜Å•jrMÑò#„pÈŽMyJ¤ÒΉ„Ês"Ìõ²,Iÿ>K³ííÙW²#ÒßDøñ†¤\. Ù­i2úÃésEãÆÚžST3Uûc÷Æß/œ[žÜ°ã¶_ÿYaÝA¤I~fѱýÚ´øØààåץûŸ;öÝP´¦è/\wc(S ýâ˜Í#KíÉÂ'ímœgÚ>'& %XóÕj’¼£‰sdmyöÁE‹Ík¢¶nÉL¦×]øèŽ4¿ltÓÈÏñ÷ÚÉ(}Òȶçynü}~D½úÚ®µ›ú×~º^¿v @€ @€ @`ö ü¦}öX1 @€ @€˜-Û·oß¡ª¯6Zo줘¶ås–7ªiE_ÞÞx·§=ç¨&_hŹ&cŒÛ~ûÓo$y~GÑØmóæ¼´¨¦UýÙœc"Ü“®ˆpã_ĘÿšdÙÇÛK¥kO8ùô»j»av÷ô~©kYïú{vÛªÖööãO¯ß;¢'þÔˆO“òöÔtî3cà¹EƒG€¦½¨foWOï_FÀpKXüpÞ1'Ý»†~+-¥ýh{SÜþ—ÆŸ »»W¿·~2~îN>[8nšÎéêZqra‚q Tï¾}sÜ3w5:02‡»¸{åÕl_g¹÷wã~;÷`Ç™©Ç_Óß¿#®ÓKóx­1J^ÕµtUaºÞ83ù™^oÍcµoëï¿#°ûÇêÛÛ÷l-à¸ï•¥éâ}Æz“çk¢YP{,m @€ @€ @`– 8ÎÒ oÙ @€ @€ @ &ïÙ¹¬È"]YTq°ýY–†(«i»¬MÍWìˆ83Û·ƒU½YFHêÕ{vD¬WТö… —“fcï^a”Ø\+‰ \úÄxwöUW­¹³Ñi+mySÇwä?l4NKú²ìÂ&Ç™Ód];æÍÝÉž‰Ÿ¥1kËÙޢơ¾oÅ·±áu¨…º~YÚyK‹Ni˜›7o®¹^1¢iÌ·¥¶ìOÆìhQcš¥“:~‹¦yX‡ÜØ÷™x€ý[Ñ$ö<æÒüƒEîzãÌägz½5×k¯¤ÕËêõÕÚãï“sö…ÀËåR"îx›§ÉêFãé#@€ @€ @€fŸ€€ãì»æVL€ @€ @€ö WòÕµàÔ¾†±Þ¤ÉÃk;‹ÕÕŠ¶ÎÎÞ#dö¼†cåùŽ·U¦ì޵¹çÕä ×°§3=¾m(®;¸ŠŽcK±“ã‹FÉ«ÕÂPWšTO(§Ö?gNÛ¤þÞiIOÏ™q§ö63—HÜ4pŒ›[cNjÀqa¹|¿¸^G6šG„ˆ~ZÛÁ®Q¾ƒ¨¼·èèÖõt•W4 oQ¯¿{éŠî¿»^¿öûn­ìxm|oq_ËØïj¡å£ŽOÿ~ìÞâÖ™úL/^ùþW®_ûùØ¡¸q€½­º¤vTgÒñäØý6žgc¿ò$¿fÓ†¾ïÝ«• @€ @€˜­“ú‹æÙŠjÝ @€ @€ @`º õ¯ý^ÌukÑ|³,}ïüùóÛ‹ê&ÒŸÎI_;ç5¤¥É[·®»m"ãªcûû#+ú£¢óÅ.m}qyùËê&ÚïŽe¯):~O°u¸º¡¨.ËÓßÕÔú³öüqÍÔM°&6ùœ³:`G7w|Áý4bJµ²mÄDZߦé“.,—O»óà[;’ŽÂqº„‚žºîƒÖ~-ÂWÿY·àÞŽ4ËþõÙåò±Euãé€ëIIZzßxŽ™ÍµŸêï¿5­Vþ°Iƒ?\Ò³bA“µû•ÍÔgú~‹lîC<~ò5,MÓ=Ç,M.nTáÇÆã4:X @€ @€ 0cg쥵0 @€ @€ М@ìâ÷Ï…•iòÈ3òˆ×Ö³ »ûÒs"´VTÙ½;ŸáŸj,ÿÅé‘¥¬ôáåòQŵã®H>!ý—F;hÝ7bú‘ÁÁu?»ïóØïªi~ÃØ=û·æIöøý[Z÷©³gÕÿŠÝØžÞ숱#èÜfk¯ì_÷ßl+rÈæfŠ1'åwky–<£h¾±þZÙk2*é[ ‡OÓ“uljÕ÷÷¢‹.=¾#›7»ŽžTxnû6®ÝœäIÿ¾†:oâ{“fIéßï ~שªÛ<#ŸéuWÛ c8ÝÝÝ51_±»îü®®åÊÓdñ˜µÆØ‰ùÖÊ]…¡úºÇë @€ @€ @€f¬À¤üvÆjY @€ @€˜›ú×~2’ß.\Zš¼¾³¼ây…uMì µ·ÕÂ;Cæ_XóÕ&‡=¬e¿¬î¸,Âr…;Fææ¼³Žµ ,h:ˆ×̺{Vþc’¤«š¨­VªÃÕD]²ëŽêõQW7زoŒ4霌]>ãžû½,¾yßyšxÛÓDÙÞ’<©¦ïÝû¡ÞϸfÏíêY9®yÔkd{g¹÷ÉH}ùȶ±ÞWòdh¬vm­ì_=ßßME#ƽð̲yŸ\²dÉqEµú/¾ää¹G·m‹÷“Õé[`÷Ž]¯ŠãMc÷Þך&ÉYGŸ½í¾–æßÍÄgz󫿯òÊõëÏÕOßײÿ»øN¤I{éuñãÜý{F|J“͵Ý7G´xK€ @€ @€Ø# àèF @€ @€ @€’¼’üMCĎȲìÊÎe½Õõׂ}ðÛa¼ÇÕ&Õü…5S¤àšþþy5ÿÛ¦¦“¦ŸxòéŸ^´è’û5U_PÔµtÕëcƒÁ×”íéÎó|]ì\ø­fj?úÑËoŽ·¾TT÷ÇcÏ<çá Õ;íêé}CV*}¼vïÕ©»9MŽ»cìÖJ} zî»÷¾Ö4Éþ¢sÙÊ®ûZî]mdz,K‹×—uó†ÕWÜÙÝŒÀð]»þ(ß·ÕÆ5{RvÄ1_hè»{éŠî¶yíÜNUt.ýc ]qc5Ïÿר½û·Æ3ï•]K{Ÿ¹kñ§™øL/^õØÕjrÙØ=÷´F¸ñ•úãïòÕ ûu @€ @€ @€³V@ÀqÖ^z '@€ @€ @€÷ ö÷}8B=¼¯¥Î»4»a}8‚gy~¹<§NUÃæî¥+/<á”3¾¤é†…{:ó+6®ùXqÝÔ©Ø´qÍ»ò<ÙÜÔŒÒô)íóÚ¿TÛ¥°©ú1Šj»Èu-[õ¶4Kš VÆÜnOv¿aŒ¡ê6UÓfwÌ^ÓUîí¬;P“½'võ¬úXìlø¦8d¿ÏÊmòT{ʶl¹â7€ZÛÌ1r\A©7.^¼x\!ÊÑcw.[ñ;i{Û–$MNÝ7úsß:ºÍçÉZÿ‹øŽüE3£G ë!ÀÝÚ½¬wàâžO‹cÞ«ìnë^Úûì¨ÿD’•>÷÷ƒš9šú›6ö­Žvaø7®U¼’ß³spýáÆì™‰Ïô1ZÐx˯®Ûß[”Õ¿ÿóüçñwyìíE€ @€ @€8P íÀ&- @€ @€ @€ÀlØuçͯj?ò„§D€ñaÖ)‘ÚïÞòЬcÕÖ®ü³üî%ýý•FÇÔú.îé9­-ŸóÏlìŠs¾ò$ÿíŽÊŽ?.,œ‚Õ·þ~éˆc~'Öúà¢é…çCÒR骮= ç¯X»º¿ž].{LÚñ'qük"$×| ¯š¿hpðòëŠÏp_Åp%¹bNiÏ.Ÿsïk­ó.K6FàòƒÃ;vþŸZX¬N՘ͱ;ây±£ÝK"ˆ´*î‘Ç,j¢1Í“û'år©™ûrïp;ª;þr^6ïâ°ëE]z|inÛi¥¶t~„Â^ëyêèšÑŸc§Åw¥?‹ÚÓF÷íûœ¦¥EIÇ·$ÉõûÚ ÞÔîµî¥+^‘¤¥ØÍ´ø×èĘó?œšÍûÓ¶}.&õãØõªIúãÒðîïÈvßÞVû R[rzÔó==O²GÄû{vëŒ7…¯<¿#O*/Œ:¶B¬–ä;nýõÒŽcï¿eßõjføØ3î‹—ÔJ³ý®o|Øóy¿Æ1GŒ€ä¦£hÔÂ@ù˜ƒÏÐÆÍë×ÿ¤séʿ̲ôM,ñåKz–l^¿îÓMÔî+™iÏô} ç›J%¿¬TJÇp̆w¯ç©” @€ @€ @€³H@Àq]lK%@€ @€ @€EC}ßì*÷^š–ÒË£¶0äX/Â8÷‹/ÊÒôEI)‚=µÿ™j¯ìž{þ·8Û³§¬`‹Û‹7m\;­w­«íĹeyRZá¶æ‚!žgÄ?Œkð‡±+Ü=Á¨#j!»7$Õ¤åÐ{ÿ'‚SC7ßxýëF¶çýM•»ßzBi^mgÅS›:.M;bÖKøóŽX~÷²ÞØå0ýqÌãÆXãÉ‘Ù;=ŸG55Ö½Eqìo’jõ…I–=(Æz£cKijÇp¬5°qí@wOïê{U£±÷ë‹P[|.ßsIÒÚK’öö¤#iOîùp_õ8/ÛîJ’voÞ°î+÷àÝ¡غuëÎE‹-iŸwüÖí>ýМ7¿â¦®щ§œÑxGÐ<Ýyhæ3}βiãš÷Äα—ij¶ö½¯ûŠk™fyÛ¿-^¼øQCCC·×-£c¦=ÓÇXbaÓæþ¾ÿŒ@÷wâ/ª‡ï+È¿<0pŵû>zC€ @€ @€%0ò?+Õå# @àÿ³w?@–UõÀÏ}Ý3#¨8.¢!¨ ÿa)Êšƒ+Š»JÀQ†š ÿ$°ÑÄ­J¥’”»I ÝXµYS›J%h6Qæ03íô àÊJ$ŽVtb\Ø`"*h2`Pd@þL3ÓïìyèÀ¼žî×ݯßíw﹟guõ»ÿÎ9¿Ïïv[õšï\ @€ @€@ÆÇÖmmOÆ·<&@ ¿½ëæu)dVÿW Ì¥æäÓã÷v ³šý«ñɉ•Û·o?Ðï:¶=âä;ÒXó =9_ ±QÀñpõvq \bÜwø¡î=)yÂèËÿ¨{ïܶrú>·Š?«]„O¾wæ=ív¼z棎 @€ @€ @€ºÿÝd @€ @€ @€'¶Ž­Ûq`²}Z ‹|wÑHbøq;¶/زiíU‹6ç"L4¾iíû&ãϧӭ‹0]× \lÇoZ{y›58Õuñ4ZŠW¦Cû§9\Ö®” 0ÞžxËgÇÖÝÓ™äÑ[ÿ˜¾µ{Oû 8vB ãí='¹õ½Ç/éhŒ¦{åòm›×^[Ò †‡@ç)ã×½9Ýï éwÔ<.ã©ñ±ØnÿVúýåtAû¹Ï}îìÿ@m§Ñݲeí7Ó“aÿÛ4‡¦Ûõ+çýÒš·Nw`¶}9ýNŸ­ÖéŽ?>¹gCúy˜kX~oÜ÷ÈPãÓÕ` @€ @€ P-Ùÿ@V­õZ  @€ @€ °H×møn{_85…­þba†¾V—æøÊdñøÉ[7­Ï2Ñ æíß³;=ɱý‘}´/¤y_¿ÛáÆ7¯ûýy_Úã‚-›×}!=áóM©g;{œ6CéÉ—ÿ'åhNݺií‡4oºiýc)lö•^“¤'EþÜ;Ï¿ø ½Î™ñX ƒŽoZwIJV^–æÙ=ãy>L7îŸØ÷²ôÓOxhÃ-L ¦ûáí=/OºÍ ê©«Ó“!¯OO%}åøæõK{ÓížžM:ùœÑ§Î˜þ]Œ…€ãô4açwoÿhúÿ«šáð“»Óï‡"¶Â_®X±â™OîœÇ›œ~§Ï£ì'NýÜØØ}!7ÎåºÔ‹ë¶mÛöÐ\Îu @€ @€ Ð\Çæö^å @€ @€ @`V­[×=ž,öÞíö«Rgë¬Ì÷„ïn·Û¿žž˜÷¦m7îœïåu:ÿ†nØ3¾qý÷´'^ÜŽñÓÚ÷–²þîO6üíî»û”Γ8˘£3îÄÃ^“2Y'y=Ìè<1Þ”‚§o\{Öø¦O}º±SŸšnÿ¡ûFâÈoº=Ï÷±4Ü·gßËR…W§2çúIJyN“cüVzÒæ[ÒÏÚ×_¿q×¼pÁ¢t‚][6®[½|Mçɨ¾Í{â'žÐ¯ íöÛÓ“!ßyÃØº»cÙ²Gº=Ãû‰ö7~÷-·Ü²ò@¼{ü‰'ý(ý{Îôg<ñHÒ]ÛÚ}»Î4®ý @€ @€ @€@~éoj^ @€ @€ @€ù ¬8oÍ«—Œ†_LW¾9…zŽ ±xnú~tÚnuÃSÔá¾òÙ‘l_lñº¿ßuNÃ7Î>ûÂg/;rôÌqüÉèUE ǤàÞ1ÉóYSiÒÓOáÐÝ霌Eüû¢þ¾½?|mëÖõC =­X±â™­#–Ÿž€§…V‘‚°ñ5iGO n¦ æT×ýéT÷§Zw¦0ØÒBoúiˆvj¹•ÜN¡Î—ŒŽ´NI½89ÕñÚôõªTgçÞÚa Žq"Ýû?(ŠøƒTë}ÉåÖvˆ×mÝ´á¶Ãε£ñO&‹Öí½ ÒÏПoZ÷þ^ç86\~§Ï&xÞê5w¦ß{/™é¼ôûñ¿§§Ò~`¦ãö @€ @€ @€ 8”ð @€ @€„@+=éî9ai±|rbÿ~øÞ<™®ÖSN9eÉñÇ¿â9FÂQ£“áá{G¨×ÓßúŸ©¼+;ÁÇâÈ#—·âÒ#ö´'~ôù±±Ól)ۘ߫ӯ¾ð¤£ŒÆ£ŠGö?øÓ']æW¬ŠJxçù¿q4Œ|¹×à±>8¾yíGzãX5røÞ‘í<͹(FwôTÞ¿ï[¶\ûížç8H€ @€ @€HŽn @€ @€ @€T@à¼_ºø¼ÐùL¯¥Äо||ãú¿êuŽcÊX¹zÍŸEñk3Í‘žÞøwéé§ÍtÜ~ @€ @€ @€À¡­C7¼'@€ @€ @€ @€áÄ0òÒÙf.ÚÅ]³ã8²NZµjiŠó{cû“½Ž;F€ @€ @€8T`ôÐ ï  @€ @€ @€ Ð4ôDºO†"|7L†;´Ã÷·&îÜ166±è­øúÒëõhœ¸µ×qÇ”)ðòbÙ/¦[ôßÌ4Gzzãž½´7ÍtÜ~ @€ @€ @€ÀTÇ©"¶  @€ @€ @€h˜@qnŠ.#!,I_ÇÆ#âÊÕ—ükÚwG ñŽv(¶mÛ´ö¦2QÎYµæ¸ôd¼szÍ‘Âc;??6¶»×9Ž(U h]6Ëø›o¼ñÓÏrŽÃ @€ @€ @€'Ÿ¤ð† @€ @€ @€& !>˜žœ¸ü`íEz¥÷/è|¥Ðá™­þ]z_jÀqi~-ͱ$}õzÝÒë cÊX¹ò¢ç§ñÏê5ÇäöÇ{wŒ @€ @€ 0U 5u‡m @€ @€ @€4J (fy*b|íŠçÿlY&§­ZuDh…÷Î6~J]Þ<Û9Ž(K .y(ŠôŒÓé_1ÆÛ®Û²þkÓµ— @€ @€ 0½€€ãô.ö @€ @€ @€ Ðø½^¥vžè8ò´%g÷:g!ÇŽ9â·Ò“"žeŒýí}qó,ç8L +.x^ºGßßkðpü³^Ç#@€ @€ @€L' à8Š} @€ @€ @€4F ÝŸ›­ØVQü^'ä5Ûyó=¾rõš÷¦àØÌz]ŒŸÛºuݳžçX¹òâc—¹äæ¢Ïœièâ÷Žf:n? @€ @€ @`&Ç™dì'@€ @€ @€ @ {ãD'àØîYlQ¼pôˆ¥ÛÎ8㌧õÛ%éÉx&Ûág;ÏqƒX¹òÂã‹%­/‡PœÔkì‹ï›èuŽc @€ @€ @€éF¦Ûi @€ @€ @€š"ðÝÛoßóÊ“N~k(Âñ½jNO°{þOÖ©/{åÉ·ûöÛîíun¯cg­ZuÌ«_}Ê-B뤀㬳-BüÐÖÍë¯é5¦cæ#ðÎU½òÅ'½‰âÍ Òy÷ÆJ·èK… i @€ @€ Ð`üE­ÁjJ'@€ @€ @€ @ ;­[×=°¯OOO¥ûö0‹Kóïj·ã™[7møô0×aîŒbû®…W×þ“ñÍŸþÊÂÇ1 @€ @€ ÐdÇ&w_í @€ @€ @€t |vlÝ=“{þ}ŒáÖ®‹´cü|šÿä­cë¿´HSš¦±ˆ z‚cúùø»]“{·tJ&@€ @€ @€, à8`Pà @€ @€ @€ PoÇÆîÿN{Ïi±Ýþ@ˆñÑE©&ÆÉâïŽoZwVgþE™Ó$80Ñî?àãû÷ì>sÇØØDcN€ @€ @€ L ØH"@€ @€ @€ @€@fç¬ZsÜÒVüÃZ«‹",ty1„]!† 1´¯Þºiý·=¾ñÌ P¬\}ÉÄüïé¸açßüå[n¹eÿ ãÚM€ @€ @€˜—€€ã¼¸œL€ @€ @€ @€@Î>ûÂg/}úH 9¶.I¡°SbcØ“®ßÚaݶMk¿Þ§·^W`åù—Ü‘þƒç4k ÛClÿÑ–ÍëoLç§\® @€ @€ @`0Žƒq4  @€ @€ @€ xÇ;.xÎÈ‘K_׊á”Њ¯Ktý¹ùZBñ¬âQEQŒ¦ ØÃ1»Ò÷]éø®‹ô=îŠíp÷ÇúÂõ×_ÿHC¸”YQw½ë]Ë‹eÏ|K«hý‡ºýùP„ãÒ½zLz¿7}ß™îÝNÉÛïMˆ®ß²þÿV´ Ë"@€ @€ @€ @€ @€ @€ @€ƒ'­Zµôà{ß ÔJ`Õª‘Z­×b  @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€@µÞxá_¼çŒ÷|ê„j¯Òê @€rhåVz @€ @€ @€ @€ @€ @`~1Äã÷ïÛ·]Èq~nÎ&@€X˜€€ãÂü\M€ @€ @€ @€ @€ @€<¢cT @ >Žõé•• @€ @€ @€ @€ @€ @ \!Çr}N€ Ð% àØÅaƒ @€ @€ @€ @€ @€ rlø  | @€Àâ 8.žµ™ @€ @€ @€ @€ @€ P!ÇzôÉ*  @€5p¬y-Ÿ @€ @€ @€ @€ @€¥9–ÂjP @€§Ÿ²ðŽ @€ @€ @€ @€ @€rIá  @ GÇ»ª& @€ @€ @€ @€ @€ä" ä˜K'ÕA€8L@Àñ0; @€ @€ @€ @€ @€ @ RBŽ•j‡Å @€% à8(Iã @€ @€ @€ @€ @€ @€@yBŽåÙ™ 0$Ç!Á›– @€ @€ @€ @€ @€æ) ä8O0§ @€ª- àXíþX @€ @€ @€ @€ @€* äx¨†÷ @€Z 8Öº}O€ @€ @€ @€ @€ @€ 96°éJ&@€rp̱«j"@€ @€ @€ @€ @€ @€@îB޹wX} @€@Ðd% @€ @€ @€ @€ @€ @ K!Ç,Ûª( @ 9ŽÍéµJ  @€ @€ @€ @€ @€ Ÿ€c~=U ÐÇÆ´Z¡ @€ @€ @€ @€ @€ÈT@È1ÓÆ*‹È]@À1÷« @€ @€ @€ @€ @€MøIÈñ“M(U @€\sé¤: @€ @€ @€ @€ @€ ÐlËÂÈ¥Í&P= @ ^Žõê—Õ @€ @€ @€ @€ @€ @€Àá;—£güÍ5—ßuø!{ @€ª* àXÕÎX @€ @€ @€ @€ @€ÌE@¸q.JÎ!@€TP@À±‚M±$ @€ @€ @€ @€ @€˜“€p㜘œD€¨¦€€c5ûbU @€ @€ @€ @€ @€ Ð[@¸±·£ @€Ê 8V¾EH€ @€ @€ @€ @€ @€S„§€Ø$@€ÔQ@À±Ž]³f @€ @€ @€ @€ @€4W@¸±¹½W9 ™€€cf U @€ @€ @€ @€ @€2n̸¹J#@€š' àØ¼ž«˜ @€ @€ @€ @€ @€un¬c׬™ ÐC@À±ŽC @€ @€ @€ @€ @€ P áÆJ´Á" @€ƒp¬§Ñ @€ @€ @€ @€ @€ @`°ƒõ4 @ 2Ž•i…… @€ @€ @€ @€ @€ @€ÀáÆ) 6  @€9 8æÔMµ @€ @€ @€ @€ @€ @ áÆ|z© @€À´ŽÓ²ØI€ @€ @€ @€ @€ @€Cn"¾©  @€‹% à¸XÒñO @IDATæ!@€ @€ @€ @€ @€ @€¹7ÎEÉ9 @€ 3h¢ @€ @€ @€ @€ @€ ‰€pc&T @`.ŽsQr @€ @€ @€ @€ @€”- ÜX¶°ñ  @€p¬XC,‡ @€ @€ @€ @€ @€ nl`Ó•L€pt @€ @€ @€ @€ @€ @€À0„‡©on @€À‡ˆoj @€ @€ @€ @€ @€4\@¸±á7€ò  @€f 86»ÿª'@€ @€ @€ @€ @€ @€À°„‡%o^ @€@E+ÒË @€ @€ @€ @€ @€ @€@ƒ„Ôl¥ @€fpœIÆ~ @€ @€ @€ @€ @€(C@¸± Uc @€j( àXæY2 @€ @€ @€ @€ @€j* ÜXÓÆY6 @  Ñ25&ÈU (Š/åZ›º @€ @€ @€ @€ @ ¹1„BŒÇ—, ÜX2°á  @€u(ê¶`ë%@€ @€ @€ @€ @€ @€Á œ~á'®ˆ1^9ØQ»Fnìâ°A€ Ðha @€ @€ @€ @€ @€ @€% 7–ˆkh @€@ëÜ=k'@€ @€ @€ @€ @€ @€@µ„«Ý«#@€ U@Àq¨ü&'@€ @€ @€ @€ @€ @€@¶ÂÙ¶Va @€Á8ÆÑ( @€ @€ @€ @€ @€ ð”€pãSÞ @€Ì à8ŒÝ @€ @€ @€ @€ @€ З€pc_l."@€4O@À±y=W1 @€ @€ @€ @€ @€Ên,KÖ¸ @€ 3lª’ @€ @€ @€ @€ @€ 0áÆ! ›’ PgÇ:wÏÚ  @€ @€ @€ @€ @€ P áÆjôÁ* @€µp¬U»,– @€ @€ @€ @€ @€•n¬\K,ˆ PÇzôÉ*  @€ @€ @€ @€ @€ PEáÆ*vÅš @€5p¬I£,“ @€ @€ @€ @€ @€n¬XC,‡ P7ǺuÌz  @€ @€ @€ @€ @€ 0|áÆá÷À  @€µp¬} @€ @€ @€ @€ @€ @€En\Tn“ @€òpÌ··*#@€ @€ @€ @€ @€ @€À „-j< @€@ƒÜ|¥ @€ @€ @€ @€ @€ @`ÂóÀr* @€Àì޳9ƒ @€ @€ @€ @€ @€Mnlú ~ @€@ Ž% ’ @€ @€ @€ @€ @€ 7fÔL¥ @€ª$0Z¥ÅX  @€ @€ @€ @€ @€TH (îZFÎø›k.¿«B«² @€L<Á1“F*ƒ @€ @€ @€ @€ @€øI¸ñMÂU5 @€À!އ`xK€ @€ @€ @€ @€ @€I …—,]êÉn @€RKå58 @€ @€ @€ @€ @€j&ðÓpãö«/ÝY³•[. @€@ÍkÖ0Ë%@€ @€ @€ @€ @€ @€@i¥ј @àpÇÃMì!@€ @€ @€ @€ @€ @€@ó„›×s @€†,0:äùMO€ @€ @€ @€ @€ @€CˆEعdÉÒ3¶_}éÎ!/Åô @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @ ÚEµ—gu @ Yo8ÿª+‹¢¸¢YUÏ\mŒñC_Ýø«WÎ|F}Žèmw¯ô¶Û#§-½Í©›Ýµèm·GN[z›S7»kÑÛnœ¶ô6§nv×¢·Ý9mémNÝì®Eo»=rÚÒÛœºÙ]‹Þv{ä´¥·9u³»½íöÈiKosêfw-zÛí‘Ó–ÞæÔÍîZô¶Û#§-½Í©›Ýµèm·GN[zÛ»›_¹ö}þÛ÷ÞDŽ @€j)Ъåª-šd*P„âM™–ÖWY9yäTK_ÍœrQN9Õ2¥M}mæä‘S-}5sÊE9yäTË”6õµ™“GNµôÕÌ)åä‘S-SÚÔ×fN9ÕÒW3§\”“GNµLiS_›9yäTK_ÍœrQN9Õ2¥M}mæä‘S-}5sÊE9yäTË”6õµ™“GNµôÕÌ)åä‘S-SÚÔ×fN9ÕÒW3§\”“GNµLiS_›9yäTK_ÍœrQN9Õ2¥M}mæä‘S-}5sÊE9yäTË”6õµ™“GNµôÕL @€ÌY`tÎg:‘ @€ @€ @€ @€ @€XÀé~âŠôÄÎ+4HE..ŠâÊ¿½æ½ªÈr,ƒ¨¡€'8Ö°i–L€ @€ @€ @€ @€ @€ @€ê. àX÷Z? @€ @€ @€ @€ @€ @€¨¡€€c ›fÉ @€ @€ @€ @€ @€ @€ @ îŽuï õ @€ @€ @€ @€ @€ @€ @€ 8Ö°i–L€ @€ @€ @€ @€ @€ @€ê.0Z÷¬Ÿä$pÔ3Âò‡Ë©¢…ÕrÜÑñ„xïUW,l”j\½ú#ñ„{vÕXLV¡·hBIKÐÛ’`+0¬ÞV  %-AoK‚­À°z[&”´½- ¶ÃêmšPÒô¶$Ø «·hBIKÐÛ’`+0¬ÞV  %-AoK‚­À°z[&”´½- ¶ÃêmšPÒô¶$Ø «·hBIKÐÛ’`+0¬ÞV  %-AoK‚5, @€@¥+Ý‹#@€¦ Y% @€ @€ @€ @€ @€ @€ÈJ@À1«v*† @€ @€ @€ @€ @€ @€ÔC@À±}²J @€ @€ @€ @€ @€ @€ •€€cVíT  @€ @€ @€ @€ @€ @€¨‡€€c=úd• @€ @€ @€ @€ @€ @€ @ +Ǭک @€ @€ @€ @€ @€ @€ PÇzôÉ*  @€ @€ @€ @€ @€ @€ @€@V£YU£ @€ @€ @€‹$0ÛáÖ}w†ß¾µÿîpïäaO|<Äô?/ @€ @€ê ¡GËÂÏŽ^¾ä…á´e¯ ¯[zb)<;¦º]³2 @€\sí¬º @€ @€ @€Jˆ1†ÏïýzØðèÍáíK™Ã  @€ @€å tþªÇâÞpç{žøºabGx^ëÙáâgœÞö´×‡¢(Ê›ÜÈ @€t 8vqØ @€ @€ @€ 0³ÀÃíLJÚþaÿ÷f>É @€ @€Ú tþ!«=ü™póÄÿ \~q8ªõôÚÕ`Á @€ê(à9êuìš5 @€ @€ @€,ºÀ÷ü0¼÷Ÿ 7.º¼  @€ @€‹'Ðù‡­:Ÿu> ò"@€ @ |ÇòÍ@€ @€ @€ PsΓç¡O†û&w×¼Ë'@€ @€˜M óPç³ ÎgB^ @€”+ àX®¯Ñ  @€ @€ @€j.c ~hƒpcÍûhù @€ @€ùtBŽÏ„:Ÿ y @€ Pž€€cy¶F&@€ @€ @€È@àó{¿þaÿ÷2¨D  @€ @€óè|&ÔùlÈ‹ @€ò˳52 @€ @€ @€@Í&c;lxôæšWaù @€ @€ý t>ê|FäE€ @€@9£å kT @€ @€ @€õ¸ußáíë_ˆ  @€ @€úè|6ÔùŒèõË^Ö×õ.ú‰À¥o‹=)þåÞ°ý‹·…/õ<ÉA @€,³l«¢ @€ @€ @€!°ãñÛ1Œ1 @€ @€j,ÐùŒHÀqa ¼ìí³\ÃöâØ_ýÐ,g9L€ ¡@+Ú”D€ @€ @€ @` ßÚ÷@Æ1 @€ @€@}|FTßÞY9 @€@õ«ß#+$@€ @€ @€’À½“ ifÓ @€ @€TEÀgDUé„u @€ä( à˜cWÕD€ @€ @€ 0=ññŒc @€ @€ú øŒ¨¾½³r @€ê 8V¿GVH€ @€ @€ 0$âf6- @€ @€@U|FT•NX @€@ŽŽ9vUM @€ @€ @€ @€ @€ @€ @ âŽoå @€ @€ @€ @€ @€ @€ @€s쪚 @€ @€ @€ @€ @€ @€ @€@Å+Þ Ë#@€ @€ @€ @€ @€ @€ @€9 8æØU5 @€ @€ÿŸ½û{µê<óþ¬}Îñ á!ÖAL£Ó 5‰! %½r1!Bi„B(™?`HÓ½( …™ Â@% äb¨R im&ýq¡L2!%ÍÔ±`•¢Æãñ¸Æ•¹)±Ýg›ì½×û>û³ä@ã^®õ<Ÿo.²VÏ÷H€ @€ @€ @€ @€(\@Á±ð€ŒG€ @€ @€ @€ @€ @€ @€2 (8fLÕN @€ @€ @€ @€ @€ @€ @ pÇÂ2 @€ @€ @€ @€ @€ @€È( à˜1U; @€ @€ @€ @€ @€ @€ @€Â Èx @€ @€ @€ @€ @€ @€ @ £€‚cÆTíD€ @€ @€ @€ @€ @€ @€ Pp,< ã @€ @€ @€ @€ @€ @€ @€Œ ŽSµ @€ @€ @€ @€ @€ @€(\@Á±ð€ŒG€ @€ @€ @€ @€ @€ @€2 (8fLÕN @€ @€ @€ @€ @€ @€ @ pÇÂ2 @€ @€ @€ @€ @€ @€È( à˜1U; @€ @€ @€ @€ @€ @€ @€Â Èx @€ @€ @€ @€ @€ @€ @ £€‚cÆTíD€ @€ @€ @€ @€ @€ @€ Pp,< ã @€ @€ @€ @€ @€ @€ @€Œ ŽSµ @€ @€ @€ @€ @€ @€(\@Á±ð€ŒG€ @€ @€ @€ @€ @€ @€2 (8fLÕN @€ @€ @€ @€ @€ @€ @ pÇÂ2 @€ @€ @€ @€ @€ @€È( à˜1U; @€ @€ @€ @€ @€ @€ @€Â Èx @€ @€ @€ @€ @€ @€ @ £€‚cÆTíD€ @€ @€ @€ @€ @€ @€ Pp,< ã @€ @€ @€ @€ @€ @€ @€Œ ŽSµ @€ @€ @€ @€ @€ @€(\@Á±ð€ŒG€ @€ @€ @€ @€ @€ @€2 (8fLÕN @€ @€ @€ @€ @€ @€ @ pÇÂ2 @€ @€ @€ @€ @€ @€È( à˜1U; @€ @€ @€ @€ @€ @€ @€Â Èx @€ @€ @€ @€ @€ @€ @ £€‚cÆTíD€ @€ @€ @€ @€ @€ @€ Pp,< ã @€ @€ @€ @€ @€ @€ @€Œ ŽSµ @€ @€ @€ @€ @€ @€(\@Á±ð€ŒG€ @€ @€ @€ @€ @€ @€2 (8fLÕN @€ @€ @€ @€ @€ @€ @ pÇÂ2 @€ @€ @€ @€ @€ @€È( à˜1U; @€ @€ @€ @€ @€ @€ @€Â Èx @€ @€ @€ @€ @€ @€ @ £À|Æ¥ìD€JhÛöåa³}î3±ï­±Ø9>#@€ @€ @€ @€ @€ @€dPpÌ¢ @ Ÿ|ÿ¹#Æýñ7¾‚ã0#Ÿ @€ @€ @€ @€ @€ @€9säh  @€ @€ @€ @€ @€)ð­ ëݳ±ÿñgŽ6ÃÎZë/ög}F€ P®€‚c¹Ù˜Œ @€ @€ @€ @€ @€Õ |çøÐîb·ßþ¦‰ýk,zdÏ}L€ P¡À Â™L€ @€ @€ @€ @€ @€ @€T. àXy€Æ'@€ @€ @€ @€ @€ @€ @€5 (8Ö˜š™  @€ @€ @€ @€ @€ @€ @€@å Ž•h| @€ @€ @€É 4ÑLîâ®L€ @€ P…€wDUÄdH @€J+ ÎØ @€ @€ @€“ØÐ,Nþ&î@€ @€ P´€wDEÇc8 @€Ê+Ðø @€ @€ @€“Ø:·yrwe @€ @€*îõŽ¨Šœ I€ P§€‚c¹™š @€ @€ @` ;¶Má.nA€ @€ P²ÀCÞ•Ù @€*Pp¬<@ã @€ @€ @€LN`ïâ®É]Ü•  @€ @€ªðŽ¨Š˜ I€ P©€‚c¥Á› @€ @€ @`ò»×íˆ-ƒM“¿‘; @€ @€)нêÞ9 @€˜Œ€‚ãd\]• @€ @€ @ À\3ˆCw>™`+ @€ @€|îÝP÷ŽÈA€ @€Àdü×öd\]• @€ @€ @ ‰Àõ{âá…íI¶± @€ @€À¨Ý;¡î݃ @€É (8NÎÖ•  @€ @€ @€4M/.ŠOÍÝ`+ @€ @€Œ"н êÞ uï† @€LN@Áqr¶®L€ @€ @€ Dà®ÁÆøúÒ³JŽIò´ @€ @`˜@WnìÞuï„ @€LV@Áq²¾®N€ @€ @€ Dàþù{â•»Ÿ‡¶'ÙÈ @€ @€èÞýtwA @€&/0?ù[¸ @€ @€ @€ÝOíÿƦ/Åñ«oıK'âÜ 9³ @€ @`ƶ 6Å¡;ŸŒë÷DÓ43®a} @€ÓPpœžµ; @€ @€ @€$è¾ÁíàÄç×6Þ¼övœZ>gVÞ‹³«çãJ»íÍ_ @€ @€ršhbC³[ç6ÇÎ…m±wqWì^·#æšA¹C›Œ @€@RǤÁZ‹ @€ @€ @`²Ý7¼íYüô‡_“½“« @€ @€ @€ @ §€3’3W[ @€ @€ @€ @€ @€ @€ @€¢‹ŽÇp @€ @€ @€ @€ @€ @€ @ §€‚cÎ\mE€ @€ @€ @€ @€ @€ @€ŠPp,:à @€ @€ @€ @€ @€ @€ @€œ Ž9sµ @€ @€ @€ @€ @€ @€(Z@Á±èx G€ @€ @€ @€ @€ @€ @€r (8æÌÕV @€ @€ @€ @€ @€ @€ @ hÇ¢ã1 @€ @€ @€ @€ @€ @€È) à˜3W[ @€ @€ @€ @€ @€ @€ @€¢‹ŽÇp @€ @€ @€ @€ @€ @€ @ §€‚cÎ\mE€ @€ @€ @€ @€ @€ @€ŠPp,:à @€ @€ @€ @€ @€ @€ @€œ Ž9sµ @€ @€ @€ @€ @€ @€(Z@Á±èx G€ @€ @€ @€ @€ @€ @€r (8æÌÕV @€ @€ @€ @€ @€ @€ @ hÇ¢ã1 @€ @€ @€ @€ @€ @€È ·Ãx@IDAT) à˜3W[ @€ @€ @€ @€ @€ @€ @€¢拞Îp @€ @€ @`®µ+ñÓå3ñ³›_o­¼çn\ŒÚå”°2 @€ ¸£YŒ-ƒ¥xpá¾xtqg<¶¸+Ö5¾%l˜™Ï @€ @€Êð6«¬ÛË3,au @€E ûa8¿Y=÷á×\ýÏXj6ÆÓ÷ÇSžˆùfn”K8‡ @€ @€@¯ ޽ò»9 @€ @€ø߯^ˆ¯\ü^¼sýwH @€ @€ÀÇè~Xο\ú÷8qõñÕ¥/Ä=s›>Öuü! @€ @€ÓLëFîC€ @€ @€üyÿZùm|ùü?*7þy¿K€ @€·)Ðýðœî9³{Þt @€ @€(Y@Á±ätÌF€ @€ @€éº¿¹ñ… ߎîoÙp @€ @€q tÏ™Ýóf÷Üé @€ @€ Pª€‚c©É˜‹ @€ @€Ò \oWã+¿§Ü˜>i  @€ @ ®äØ=wvÏŸ @€ @€% (8–˜Š™ @€ @€ @`&^»òz¼sýw3±«%  @€ @ î¹³{þt @€ @€(Q@Á±ÄTÌD€ @€ @€é®µ+ñêå“é÷´  @€ô/Ð=^k¯÷?ˆ  @€ @€|D@Áñ# þ‘ @€ @€LCàÔò™¸Ø^žÆ­Üƒ @€f\ {þüéòéW°> @€ @€@‰ Ž%¦b& @€ @€H/ðó›G @€˜–ÀÏ<‡N‹Ú} @€ @€nC@Áñ6°œJ€ @€ @€Æ%ðÖÊû㺔ë @€ @€5<‡®Iä @€ @€{@wK @€ @€ pîÆE @€ @`jžC§FíF @€ @€·!0ç:•ø„?sôȰK¼ðÝØ÷ÀÖagD>8üsŸ @€ @€u|Ð.×1¨)  @€ @ …€çÐ1Z‚ @€ N@Á1]¤"@€’š¦yiØ|'Ñ} ;l‡}ì3 @€ @€ @€ @€ @€ @€@ƒ*¦4$ @€ @€ @€ @€ @€ @€ J@Á1Uœ–!@€ @€ @€ @€ @€ @€ @€u(8Ö‘“)  @€ @€ @€ @€ @€ @€ @€@*ÇTqZ† @€ @€ @€ @€ @€ @€Ô! àXGN¦$@€ @€ @€ @€ @€ @€ @€©SÅi @€ @€ @€ @€ @€ @€ P‡€‚c9™’ @€ @€ @€ @€ @€ @€¤PpL§e @€ @€ @€ @€ @€ @€ @€@ ŽuädJ @€ @€ @€ @€ @€ @€ J@Á1Uœ–!@€ @€ @€ @€ @€ @€ @€u(8Ö‘“)  @€ @€ @€ @€ @€ @€ @€@*ÇTqZ† @€ @€ @€ @€ @€ @€Ô! àXGN¦$@€ @€ @€ @€ @€ @€ @€©SÅi @€ @€ @€ @€ @€ @€ P‡€‚c9™’ @€ @€ @€ @€ @€ @€¤PpL§e @€ @€ @€ @€ @€ @€ @€@ ŽuädJ @€ @€ @€ @€ @€ @€ J@Á1Uœ–!@€ @€ @€ @€ @€ @€ @€u(8Ö‘“)  @€ @€ @€ @€ @€ @€ @€@*ùTÛX† @€ @€ @€ @€ @€Šøâvè<ïž“?üUühèI>$@€H) à˜2VK @€ @€ @€ @€ @€ @  Ãט£“ÍÖç^^ã, @€$$ÜÉJ @€ @€ @€ @€ @€ @€ @€@á Ž…d< @€ @€ @€ @€ @€ @€ Q@Á1cªv"@€ @€ @€ @€ @€ @€ @€… (8ñ @€ @€ @€ @€ @€ @€ @€@FÇŒ©Ú‰ @€ @€ @€ @€ @€ @€. àXx@Æ#@€ @€ @€ @€ @€ @€ @€3¦j' @€ @€ @€ @€ @€ @€ P¸€‚cá @€ @€ @€ @€ @€ @€dPp̘ª @€ @€ @€ @€ @€ @€ @€@á Ž…d< @€ @€ @€ @€ @€ @€ Q@Á1cªv"@€ @€ @€ @€ @€ @€ @€… (8ñ @€ @€ @€ @€ @€ @€ @€@FÇŒ©Ú‰ @€ @€ @€ @€ @€ @€. àXx@Æ#@€ @€ @€ @€ @€ @€ @€3¦j' @€ @€ @€ @€ @€ @€ P¸€‚cá @€ @€ @€ @€ @€ @€dPp̘ª @€ @€ @€ @€ @€ @€ @€@á Ž…d< @€ @€ @€ @€ @€ @€ Q@Á1cªv"@€ @€ @€ @€ @€ @€ @€… (8ñ @€ @€ @€ @€ @€ @€ @€@FÇŒ©Ú‰ @€ @€ @€ @€ @€ @€. àXx@Æ#@€ @€ @€ @€ @€ @€ @€3¦j' @€ @€ @€ @€ @€ @€ P¸€‚cá @€ @€ @€ @€ @€ @€dPp̘ª @€ @€ @€ @€ @€ @€ @€@á Ž…d< @€ @€ @€ @€ @€ @€ Q@Á1cªv"@€ @€ @€ @€ @€ @€ @€… (8ñ @€ @€ @€ @€ @€ @€ @€@FÇŒ©Ú‰ @€ @€Š¸£Y,~F @€ @€@Ï¡y²´  @€ @ “€‚c¦4íB€ @€ @€Õl,U3«A  @€ @ ~Ï¡õgh @€ @€@FÇŒ©Ú‰ @€ @€Šxpá¾âg4  @€äðš'K› @€ @€2 (8fJÓ. @€ @€ PÀ#‹;«™Õ  @€ P¿À£žCëÑ @€ @€„ Ž Cµ @€ @€”/°÷æ7–.5ËÔ„ @€ P½@÷üùØâ®ê÷° @€ @€@>Ç|™Úˆ @€ @€*X×,ÄÓ÷W0©  @€ @ vîùs]3_ûæ'@€ @€H( à˜0T+ @€ @€ @€@Omx"¶Ïß[ǰ¦$@€ @€*ºçÎîùÓA€ @€ @ DÇS1 @€ @€Ì„À|3_]úB,5gb_K @€ @€ÀtºçÍî¹³{þt @€ @€(Q@Á±ÄTÌD€ @€ @€3#pÏܦøûMÏ*9ÎLâ%@€ @€Àtºrc÷¼Ù=w: @€ @€”* àXj2æ"@€ @€ @€™øôÂýñÊæ¿íó÷ÎÌÎ%@€ @€É tÏ—Ýsf÷¼é @€ @€ P²À|ÉÙ @€ @€ÌŠ@÷7jüóÝÏÇkW^W/ŸŒ‹íåYYÝž @€ 0&îom|zãþxjÃ1ßÌéª.C€ @€ @`r Ž“³ue @€ @€ p[Ý7ŸþÍÆ}ñ×SËgâç7¿ÞZy?Îݸ´Ë·u-' @€ @€@~;šÅØ2XŠî‹GwÆc‹»b]ã[Âò'oC @€ @€@o³òdi @€ @€H"°®Yˆ}ëÿêï$+Yƒ @€ @€ @€ p‹Àà–ßñ @€ @€ @€ @€ @€ @€ @€ (8NØå  @€ @€ @€ @€ @€ @€ @€[o5ñ; @€ @€ @€ @€ @€ @€ @€À„' ìò @€ @€ @€ @€ @€ @€ @€À­ Ž·šø @€ @€ @€ @€ @€ @€ @`Âó¾¾Ë @€"жíËò·üÏÏ}&ö=°5ößòß @€ @€ @€ @€ @€ @€$PpL¨u @ lŸ|ÿ¹#Ã&üñ7¾‚ã0#Ÿ @€ @€ @€ @€ @€ @€9säh  @€ @€ @€ @€ @€)ð­ ëݳ±ÿñgŽ6ÃÎZë/ög}F€ P®€‚c¹Ù˜Œ @€ @€ @€ @€ @€Õ |çøÐîb·ßþ¦‰ýk,zdÏ}L€ P¡À Â™L€ @€ @€ @€ @€ @€ @€T. àXy€Æ'@€ @€ @€ @€ @€ @€ @€5 (8Ö˜š™  @€ @€ @€ @€ @€ @€ @€@å Ž•h| @€ @€ @€ @€ @€ @€ P£€‚c©™™ @€ @€ @€ @€ @€ @€T. àXy€Æ'@€ @€ @€ @€ @€ @€ @€5 (8Ö˜š™  @€ @€ @€ @€ @€ @€ @€@åó•Ïo| @€*XmoÄ›×ÞŽS˧ãÌÊ{qvõ|\i—£½ùËA€ @€ O ‰&64‹±unsì\Ø{wÅîu;b®ñsyó¥m# @€ @€ @€Àh Ž£99‹ @€1 ´mǯ¾Ç.ˆs7.Œéª.C€ @€ Pº@÷ƒ­.·W㿯ÿχ_ÿöÁ©Ø2؇î|2¬ßMÓ”¾‚ù @€ @€ @€Æ, à8fP—#@€ @à/ üñÆåøÚÅcñË•wþòI>!@€ @€˜î`ýÃÿ5N|ð‹xqéPÜ5Ø83»[” @€ @€ @ b @€ÓøíõßÇ—ÿðOÊÓÀv @€ @€@eÝÄêÞuï @€ @€ @€³# à8;YÛ” @€@oÝßÜøw¿ÿ»ú‡Þfpc @€ @€²ºwGÝ;¤î]’ƒ @€ @€ @`6g#g[ @€èM mÛøÚÅcʽ%àÆ @€ @€zº’c÷.©{§ä @€ @€ @€È/ à˜?c @€èUàøÕ7â—+ïô:ƒ› @€ @€Ô#нKêÞ)9 @€ @€ @€ò (8æÏ؆ @€zXmoıK'z»¿ @€ @€Ô)нSêÞ-9 @€ @€ @€r (8æÎ×v @€zxóÚÛqîÆ…^gps @€ @€úºwJÝ»% @€ @€ @€@nÇÜùÚŽ @€@¯§–O÷z7'@€ @€¨WÀ»¥z³39 @€ @€ @`TÇQ¥œG€ @€Àm œYyï¶ÿŒ?@€ @€ @ ðnÉ¿ @€ @€ @€ü Žù3¶! @€ÞήžïíÞnL€ @€ P·€wKuçgz @€ @€ @€À( Ž£(9‡ @€%p¥]þXÎ"@€ @€ àÝ’ @€ @€ @€ùóglC @€½ ´Ñövo7&@€ @€¨[À»¥ºó3= @€ @€ @`ÇQ”œC€ @€ @€ @€ @€ @€ @€ŒU@Áq¬œ.F€ @€ @€ @€ @€ @€ @€Œ" à8Š’s @€ @€ @€ @€ @€ @€ @€± (8Ž•ÓÅ @€ @€ @€ @€ @€ @€ @€QGQr @€ @€ @€ @€ @€ @€ 0VDZrº @€ @€ @€ @€ @€ @€ 0Š€‚ã(JÎ!@€ @€ @€ @€ @€ @€ @€Æ* à8VN#@€ @€ @€ @€ @€ @€ @€FPpEÉ9 @€ @€ @€ @€ @€ @€ @€ÀXÇÊéb @€ @€ @€ @€ @€ @€ @€À( Ž£(9‡ @€ @€ @€ @€ @€ @€«€‚ãX9]Œ @€ @€ @€ @€ @€ @€E@Áq%ç @€ @€ @€ @€ @€ @€ @€cPp+§‹ @€ @€ @€ @€ @€ @€ @€£(8Ž¢ä @€ @€ @€ @€ @€ @€ @`¬ Žcåt1 @€ @€ @€ @€ @€ @€ @`ÇQ”œC€ @€ @€ @€ @€ @€ @€ŒU@Áq¬œ.F€ @€ @€ @€ @€ @€ @€Œ" à8Š’s @€ @€ @€ @€ @€ @€ @€± (8Ž•ÓÅ @€ @€ @€ @€ @€ @€ @€QGQr @€ @€ @€ @€ @€ @€ 0VDZrº @€ @€ @€ @€ @€ @€ 0Š€‚ã(JÎ!@€ @€ @€ @€ @€ @€ @€Æ* à8VN#@€ @€ @€ @€ @€ @€ @€FPpEÉ9 @€ @€ @€ @€ @€ @€ @€ÀXæÇz5#@€† <þÌÑ#ÃNxá»±ï­ÃΈ8|pøç>%@€ @€ @€ @€ @€ @€Ô  àXCJf$@€4MÓ¼4l™“¿Žè¾†‡¶Ã>ö @€ @€ @€ @€ @€ @  AS’ @€ @€ @€ @€ @€ @€H% à˜*NË @€ @€ @€ @€ @€ @€ @€:ëÈÉ” @€ @€ @€ @€ @€ @€ @ •€‚cª8-C€ @€ @€ @€ @€ @€ @€êPp¬#'S @€ @€ @€ @€ @€ @€ @€T Ž©â´  @€ @€ @€ @€ @€ @€¨C@Á±ŽœLI€ @€ @€ @€ @€ @€ @€R (8¦ŠÓ2 @€ @€ @€ @€ @€ @€ @ Ç:r2% @€ @€ @€ @€ @€ @€H% à˜*NË @€ @€ @€ @€ @€ @€ @€:ëÈÉ” @€ @€ @€ @€ @€ @€ @ •€‚cª8-C€ @€ @€ @€ @€ @€ @€êPp¬#'S @€ @€ @€ @€ @€ @€ @€T Ž©â´  @€ @€ @€ @€ @€ @€¨C@Á±ŽœLI€ @€ @€ @€ @€ @€ @€R (8¦ŠÓ2 @€ @€ @€ @€ @€ @€ @ Ç:r2% @€ @€ @€ @€ @€ @€H% à˜*NË @€ @€ @€ @€ @€ @€ @€:ëÈÉ” @€ @€ @€ @€ @€ @€ @ •À|ªm,C€ @€ @€ @€ @€ @€E |ñ@;tžwÏÆÉþ*~4ô$ @€¤PpL«¥ @€ @€ @€ @€ @€ P†ÀáƒkÌÑÆÉfës/¯q–  @€ îd% @€ @€ @€ @€ @€ @€ @ pÇÂ2 @€ššhjßì @€ @€= x·Ô#¾[ @€ @€ @€¦$ à8%h·!@€ 0‹šÅY\ÛÎ @€ @€cðni ˆ.A€ @€ @€(\@Á±ð€ŒG€ @ f­s›kßì @€ @€= ÜëÝRúnM€ @€ @€˜Ž€‚ãtœÝ… @€ÀL ì\Ø6“{[š @€ @à“ <äÝÒ'Gt @€ @€ @€@á Ž…d< @€5 ì]ÜUóøf'@€ @€èQÀ»¥ñÝš @€ @€ 0%Ç)A»  @€YؽnGllšÅÕíL€ @€ ð ºwJÝ»% @€ @€ @€@nÇÜùÚŽ @€@¯sÍ Ýùd¯3¸9 @€ @€@}Ý;¥îÝ’ƒ @€ @€ @ ·€ÿG(w¾¶#@€ лÀõ{âá…í½Ïa @€ @€:ºwIÝ;% @€ @€ @€@~ÇüÛ @€@¯MÓÄ‹K‡âSsw÷:‡› @€ @€”/нCêÞ%uï” @€ @€ @€ùóglC @€½ Ü5Ø__zVɱ÷$ @€ @€(W +7vïºwI @€ @€ @€ÙPpœœmI€ @ wûçï‰Wî~>^ØÞû, @€ @€(K {gÔ½;êÞ!9 @€ @€ @€fG`~vVµ) @€} t?}ÿ›¾ǯ¾Ç.ˆs7.ô=’û @€ @€ô(°e°)ÝùdX¿'š¦éq·&@€ @€ @€èC@Á±u÷$@€ 0ÃÝ7ª¼ã‘øüúÏÆ›×ÞŽS˧ãÌÊ{qvõ|\i—£½ùËA€ @€ O ‰&64‹±unsì\Ø{wÅîu;b®ä[ÖF @€ @€ @€# (8ŽÄä$ @€Æ-Ð}ãÚžÅOø5îk» @€ @€ @€ @€ @€å øQ˜ågdB @€ @€ @€ @€ @€ @€ N@Á1]¤"@€ @€ @€ @€ @€ @€ @€å (8–Ÿ‘  @€ @€ @€ @€ @€ @€ @€@:Çt‘Zˆ @€ @€ @€ @€ @€ @€”/ àX~F&$@€ @€ @€ @€ @€ @€ @€éÓEj! @€ @€ @€ @€ @€ @€ P¾€‚cù™ @€ @€ @€ @€ @€ @€¤PpL©… @€ @€ @€ @€ @€ @€ @€@ù ŽågdB @€ @€ @€ @€ @€ @€ N@Á1]¤"@€ @€ @€ @€ @€ @€ @€å (8–Ÿ‘  @€ @€ @€ @€ @€ @€ @€@:Çt‘Zˆ @€ @€ @€ @€ @€ @€”/ àX~F&$@€ @€ @€ @€ @€ @€ @€éÓEj! @€ @€ @€ @€ @€ @€ P¾€‚cù™ @€ @€ @€ @€ @€ @€¤PpL©… @€ @€ @€ @€ @€ @€ @€@ù ŽågdB @€ @€ @€ @€ @€ @€ N@Á1]¤"@€ @€ @€ @€ @€ @€ @€å Ì—?¢  @€ãXmoÄ›×ÞŽS˧ãÌÊ{qvõ|\i—£½ùËA€ @`M4±¡YŒ{ç6ÇC Ûbïâ®Ø½nGÌ5~&Ù4ü݃ @€ @€ @€ @€þûÏÀ @€ÀÚ¶ãW߈c—NĹÿcïNàÛ(ÏÄ¿ïHrb’Ž$”«„ôNiÒ6[!Ž-£ØI|P ÷µýww{l)´ºÐm»¥Ûm õ‘`Gµì8@¸›\KC),WáJ8!NbkÞÿ3&‡-KóJ²äHòo>ŸÔš÷}ç=¾3’†Ýyô¸›Gqd†B@@`¨€÷ãošíêñþçþ­î½KMs&«úýNSgŒ?^i­‡À € € € € € € € € Pb8–Ø e9 € Z`‹û¦úÞkmêþ¾'S7¢@@}(àýÇ·üNÝÖûWuáõj¢3aΆ¡@@@@@@@@@ü 8ùížÞ@@Âx¶ÿ%õùM?%¸±0N³@@°x?ÊáÝ¿z÷±l € € € € € € € € €@© àXªg–u!€ €À/sã7_»ZmˆoÚSÆ @@ ]À»õîc½ûY6@@@@@@@@@ p,ųʚ@@`€1F}ïµ6‚÷ˆð@@ ˜¼ Gï~Ö»¯eC@@@@@@@@JM€ÇR;£¬@†Ü´ý^uß“CÊØA@@ ˜¼ûYï¾– @@@@@@@@(5K팲@Ø#7®jÛzÛž}^ € € P¬Þ}­wˆ € € € € € € € €”’Ž¥t6Y  € ¸oçãj£»yH; € €£€w_ëÝß²!€ € € € € € € € €¥$@€c)MÖ‚ €CîÚñð}v@@(fîo‹ùì1w@@@@@@@@H&@€c2Ê@@ $é[_ë` € €xrË…€ € € € € € € € €%&@€c‰P–ƒ €{^Œ¿ºw‡W € €¹À Üßùdú € € € € € € € € (@€c¢û € P2ÛÌŽ’Y A@@€û[®@@@@@@@@(5`©-ˆõ € PÈƘïøÍï”÷¨½ý`5ϯ ué eÒoLK@@ \€ûÛ?AL@@@@@@@@2 À1c2@@ {;Û?{±ßÑwüøª‹”&ÀÑψ:@@@@@@@@@@@( Kã<² @@@@@@@@@@@  ~s£ÿ´žzQÍ;©î*í×Ê–`ÀïXê@@ W€ÇÂ=7Ì @@@@@@@@@@(zknò]ôÖ7Ok5ϲЋ-õT#€ €¡€S„sfÊ € € € € € € € € € € € € € Pä8ù dú € € € € € € € € € € € € €£ŽÅxÖ˜3 € € € € € € € € € € € € €E.,òù3}@@@@@@@@@@@(Rpmã2õ÷ùMßhµ¾«½år¿6Ô!€¥"®]v¼RsýÖãj7ÚÝÞ¶Ö¯ëKÅ@IDAT u‹ŽÅr¦˜' € € € € € € € € € € €%' ÏÔZÕø-K+µNê pôC¢JFÀ(}Œ£õçüäçQ©_ë׆:ŠEÀ)–‰2O@@@@@@@@@@@@@(Kç\²@@@@@@@@@@@@@ŠF€Ç¢9UL@@@@@@@@@@@@@Ò À±tÎ%+A@@@@@@@@@@@@@ hp,šSÅD@@@@@@@@@@@@@(Kç\²@@@@@@@@@@@@@ŠF€Ç¢9UL@@@@@@@@@@@@@Ò À±tÎ%+A@@@@@@@@@@@@@ h‚E3S&Š € € € € € € € € € € € €”°€q݇L@ßo‰ÆÕñ«§b À±˜ÎsE@@@@@@@@@@@@’莮¸_çýcC`L8cb•,@@@@@@@@@@@@@ J€Ç‚:L@@@@@@@@@@@@@±!@€ãØ8Ϭ@@@@@@@@@@@@@‚ À± N“A@@@@@@@@@@@@@`là86Î3«D@@@@@@@@@@@@@  p,¨ÓÁd@@@@@@@@@@@@@8ŽóÌ*@@@@@@@@@@@@ :]‹=Þ¤ÎG!Í%_S±_/ƒ]Ji-ƒ×5Z¯Kåzç:­+FÆ ŽâX … € € € € € € € € € € €¥À¼yóÆpЬJ­Í‡$‚çP¥Õ¡Ú¨CÒ‡h­£ÌF)ß ŒüÓúåº7½`¶ßzW4Ú[ˆ ž‰ìw€Sþ‘€Ò‡+cf­g)mfÉ\e_M—µl—²ÍZ™ÍF©Wäï}q×¹Û8;îZÕÞþl!­©ªªêgüÄÏɼ?,ó>Fæ{¸œï\<*óôþ=¢ûûoéì\ñxžæ­+k憂ê,£œcdìƒÄkš2ú ¥Ìrô˸/ɵ³Q^o|ëqïR;Í ±XÛ‹yšSκ­©Yz„ N‘õ¼_®õ·Éß©r­L‘uL•A¦Èõ¾Ÿ\C½F«×´ÑÞõ²Y ^“2Y³¾»/ÿCO´í±œMh„-\¸ämeåÁºÚù£Ìñ2×CÑ“åœMöÖbŒÚ&C¼*çëyýªRúy?+ëúý¶Íæ7ßÜúæ§PÔ‡—Ðg¡®]öAcœ:Ž>^Îõ»äZ˜*Ÿë¼uM«×åD=/×õór­?/ïå{{ÝÞèšhôå¢>8yùÜfC@B0/^u‘Ü]<šó¹õ©ÓFs¸QëŸ6þÛ¨ŽÇ` € €ä[à–iWä{úG@@@@@(Óß~[Á¬ä䯌ü±õ;®ýÌÈ;)‘“—þò"cÌÅ2MC—.þóŠOg$„k›~'S5–>Öu¶7ÀÒ&euÕâ¦%m¾›ª-]ëhùiŠz'¼¸ñcò|gÃ[óÔS´KZ¼+PêfטÎnÓ{­ŠFãIŽb¡â} ÒŸ’€%A ž»N^vÈúÉ´|´?+™>N—Ÿ%×üë»áº†Ëžyüá–uëÖõårœÄ¾æG"–;åß“ñ?!—IYZWJb'C÷CÒÏG”r> š‹kj»T¿þvggóCC›e¿gœøþZR¾¼žµ1CÞéŒ6Ö? ½ÏŒ`ù¸KåÚn/'³Ä6ò> JÙ™rø™r_^ÜtAleó·“@Þ2ysûœ?#YAÙ du2wÀk@@@@@@@@@@@@ T¼Ì|’µñ" nü³³dÜ8ÜCÏQŽs£DÝàÚ ¯ÏO‰ó-­˜x^‚²~!aQo7æh( ª£Ï9ã'=T]Û¸(GÝZ»ñ6ƒåe(šÝz$ ðTÇÕï¶”Ð`Q]ýIrþîp´Ó™Ûkc` /ó| Àz\®¿ï΋DöK>o»‹êêfH êÏU(ø„¬ë ¹nLœ¸¡•óëYGÏy¼:Òxbb}®ökj›>[áT<.kú´œ•åªßÝýH¿r)é° ©$8µ­ª®îðÝu¥ð·”> ½kÁ{oÉ;WÎMNâéä:>Z"M×ÖÔ5]å}&•Â9ßWkÈÉ ÙW“g\@@@@@@@@@@@@ W # oŸ:}柵£/–Ø%/SWî7­çKpÞïÃáúƒsßùÞ‚“j›~,XË%+¯ÁrèsˆDçuKÜË ò«¢%`³]ÎÏ;÷®6³Wƨ;ߌwdr”tT;Äò¤LŽË´­`UÈõwá§üÃᥳ2=>Óö5‹Î ˜qŠççòؘ87¹6g9}{8ÒxNbÝ÷µþx ¨WÍü9Â~Sî§.sTÙ=‹"ËÞ•ªQ1•—Òg¡÷Þõ®uës ï/)ègÊ'ÿê çºÿ±Ò_¾¿4ÆŠ#ëD@@@@@@@@@@@"¨®nœrô-’ðCù^†ÄģʷŸ‰LÏÇXÕÕ M™>óV ½ùç|ôŸªOY×§%°¬EE"TmFZ.A”_ Ãù#éG"’z®¿~Åætú˜;wn(\ÛÐì¦Ó>WmÄr¶…î×Ö¿?W}&ö®kh0Z¯ÊGàWâX>ûã%$veumÓ'|Ú¤_%מ\ƒ+FûÚ÷&(›àÚÊš†¤?áÂkYBŸ…Ž—]q4Þ»ò™rDДÝ~v¤ñÐÂ;£…?£üüš@á¯;é åKî©ðO·lÌ3±•­W$í€Bð¨ZÜ´ÄÑæd¿F;]uÙuÑ–çýÚk]eå’iòà‡%•öIòå5SÖ1EÒO1J~ Ũ—”2Ï¥ž—›í猫ž×}îÚ®®V)gC@@@@@@@@@@| ̉DÊGwI„Ò‘ùgpßòLñ±ãåË¥ì´Áå#}®]v¼<““`«ÃFÚW6ÇK`Ù²š@y¨S©:9^‘ÎÝVS³ôhéíò‘öhÜxs:}œ‰Lš(÷,OM§}ÎÛh5]ç’e±¶seë ¹ì¿º®é+ʘJ ¥\Šûv˜ƒQ?©¬¬»©§§ý…‘Ì&(ÿ¦,È»ööͦՔPйµº¶áÄ®ŽÖGöÍ$²µT> ½uëT´‰D${ Ôú¨2ÇÜîªà3Y;YÛÆM/?wS²ºÁe^€ÔD§â19~p¹íµôÿ‚VæFcÔ:媗âw£ôPnà`G¹‡Èõòna¬”ø•l} Ôk½Ÿô¹ª:ÒøÑ®hË]iciŽÔ/9ýHa--÷VË5ò¢\ç÷HPäÃ2Ÿ'åù-F¹[íô»ÚL”5Mr”™f´š+G/×ˬ½GÛ_ÉTöV”ýDZfViš+óû¶§·³^ÖòY×#F»O8»Öä,Aj3åb?\®ýYât„t9OŠC^uÓj’¦z¸•Ö¶Ö D> õ;«„öÌLxå=û¬|ŽËqæI¹.ž÷®ù€Ÿ®u¨|>¾[®Ñ³mŸ­^܈¼Vxñ!lé dôÅ™~·´DƶÀüHäÀŠ@ù¥ò%V/_LåÙ|7íú›³¥Ï®˜tàfù—–Ínï·ÖF£[Ƕ.«G@@@@@@@@@@ ‡F{ÉLf¤êQ%–Quk×]¾ÕìøýMÑè¦dmçÏ_6qüþιŽÖŸ“€¨w$k“¬LÂ`®P‘ÈÆ“ÕgRæ¨qWIûtƒ] äùM<Þÿ‹îèŠûSŒ³'𲺶þ=Z;?‘Àµ¥h;¤XÖu¹d±»5WY줿³Åõ¬!ƒd±#§sùÚµkûm‡ã”_!Mi7ÊUÒÝßï^ÚÓÙê%ÖòÝæÎšqÔœÓJÿ›\[ó|KåÀ³åsí‚Kßwýõ+6ÛÚûÕ{›*ø±_›Ýu×¾2­Çû?±•Ëï”r‰ûJo«©išc‚æÓr½4Ê'¥s”´=§²¦ñÝ=-¤Ó>¡L³hYc¥$X³G™ø%±Žå÷&ô1x÷ŽÁ;áðÒY*’ìæ\9!eƒë’½–kg¡¥ž˜« Ôdc伬D> «k›Î•Ï‹ôƒYeLü¶k|áÂ…¡ “Ê5v±ôl*¹Ž§¦ªóʵ‘«ƒmˆ€õM;¤5; €Vp¤ñíè_HÃåF,G›žìhõå)NùÙ‹j–­êl½'GÓ  € € € € € € € € € €cZ@BMR7*ÓwãßZ]þ° iÍšå[¤Í•òï§áº†&eœÿ–¾ÇÙŽ“àµÙÕNùü.¥öÚŽIV/ˆËÒê1æNɾ÷ÅXGÛ}ÉúJVÖÕÑö7)Ÿç=/­ý#?·ã%Ó¢ò\-¯OLÖ_ÆeZKö´T›Ù,™'¯2Ú¬s]÷Ѹ ¼Ræ(yžûÇqÞ/Yýê%(m?ïèx¿jNÕËîòp¤É˰øåÝû¾zШþó-ÁrCºX·n]Ÿü[#…k$«Q2þ§ÌõÀ!v$hjÖ¸ýƒ¿‘âpBUF»ÇÊ¿"×ÉÑÖƒŒyJòž“É52¸ÏÎÎæ‡dÿKgE"—TüZÆ\0¸>Õë`@#u8VGN‘б9©úõʽ€M‰V>_2¦^ã×.Y],¶â)ÿôÂHã¥eŽ\Cé¦Ô÷å˜yÉú+Ä2¿÷´…Ågáé‘È$­Íȵ`%–5='Ääzð²=Z·Õ«Wo“F$Üu¬3þ_%»í·d¼õ@Xä3 @ '‘H@Ò¨_­:j»¹Ìv<ù—#AýçšÚ†—>ø Ï’ã@@@@@@@@@@ð —m õÉX{K8àÆ„®L¬½õ·q?Uh^J¨Kº+¡8‹’V¤YX]Ý8ÕQéeå“0¯¶W7®ÿX¶k±hËïd]§K.¿×mÓ“€©‹%ð,?›œ£~åšËãÛ·[Ùòï’-2æ¯ë¢-ÏwE[ÿе²õWíÍŸ}Ýí=LÒUþ«Ì»Ç–pQ]Ý í¨ôàŒ‰½ên;)“àÆD‰®ŽæÓ¬å‘ĺÄ} r¬®^ÜðùÄòt÷++ëqŒò‚²|7™Ëßûzwžœí52¸ó¢Ñ ±öf/8õ¿—§|­SV§ª$E秪ÛSnÌ×Å;½s»ç ¡/VG[ÖÇw¼^í ­¾'çëc55KíÁ¤Ã-˜’bû,Üß)ÿޏd4m÷ŸnpãàþŠFwvv´^êÆÍ©âóÚà:^g'@pLvn… ðR„‡Š @üÄŠ<ì ¤×Î%5µ×Í›7L¼y0¦K@@@@@@@@@@1-°=ÞïžëhùõHVµ·Ý©ÜxX¡Œ­ ÈñÀì)ÇRt¤ËÌÒIÒ"SùYg{KãÚµkûSt•V±ýÝU¦>­µiõ¯iušy#WÖÜÔ¹²åëÝÝݾAF·F£¯wµ7ÿ§¬ZIfÜ•ÒïÛtä¬þOgGË9k£Ñ­¶¶¶úX¬íÅþÞ> 5^ÖCßÍqôeóç/›èÛ(Ee¨j­‰PåÎÈbl/xp¡í8yæùk«£-ëmí²¨—X;Ób;Î1úŸlm2©wÝøe™´O§í‘ȉC;×ÖÖ¸îÅ×_¿b³­]¶õØ- unó;^Nú¬*]Qã×&±®Ì±g¦”kéê|®Í›“\‹¾±óvÕÔÄùûíKêùÄH¤ü®h´7E“´‹%hìo]Ѷ[Ò> ͆åŸË ¿æ²ŽG»Ìö«ýÚä¢Në\©Ê7 Žã˜/ÊX+ÓOæ~¶íÁ÷~ïH·¿lÛI0â“¶lmF;2ê_;;¬íú ´yÒÚ.Ãr=z/OÛ'½µIÛ“›7>û”døÜžaW…Ó¼? µqÙòd×\‘/ä5kÖ쨮mºÂѪ9_c”r¿8–òÙem W`yè;òû“mƒå^ko½ÐÖ.±~Ugë=UUUïsÆM\.7ûg%ÖÞ— ˃¦è /ú%ƒËy € € € € € € € € € €é ¸Æü6½–™·’àÅ;%êÿ#Í$ÿúáµÁZ4¼th‰d-kÏg°U,¶â™pmãZõ”!#³UÖ-ÙúÌ£’åñ±éýå^ðàˆµQ9nôæ-ç§zÈü“ìHöÆ«T4OR•Ó¢X´ùºp]ã‹òœøÁ©:‡%IÎ~é&ɉ¹½ ªJŽ ¨à‘r]©´9BÖ}¤Œq¤2zÛªŽ¶»S—³rcþaË1äã]+éo®ûð›t´¹pÞ¼yQy/ô§ß±½eWGk»´òþ•ÌVlŸ…•‘ú£ä |œß N޶üÙ¯ÍHë6¸Û¢;?‘$WE—±s¤kéñ8ŽTã@`L ,Š,;Îþr+nÌ5±ŽÌƒw£vww¿&7Q5S§Í¼MnâNÚ]žì¯| þ‹¤®þy¾S‚'›2@@@@@@@@@@Š^@›ÿÍ×$¸æykßZgÔ%J\¥µ_¿ÆÞf„-\÷‡ÆqþOžŸ~Ì8î£ÎNõX,¶Ü[³,=·›«Í]¹íQ©¼a‚ü½WžÉöÝúîöm»JW¦r›tWŸ²K­Ê>$õ^;û&™2ù§¥¡÷oØ1ž”KÂdk3¸Þ ôýÃQã {-I‡fO™6³mîܹ ëÖ­ëÖ€‚½EöYrëç 6æW{˜ŸW^†Úšº¦éýKù¡t{%À±tÏ-+C< œàÈ |‡0æÎgžxèÓ¾mÒ¨ô~-eáÂ%‹B¡»å¦ÊûuŒä›V“Êö þ›T~#yJ@@@@@@@@@@@ ©€d”Ll%­ËA¡£ú·(åÂ!ùÆg2TU]ÝáâønßcŒÚëX~¯o›TÆ¢m×K7Þ¿¼oº/÷Ž&©ɳÚþ'H©u«£-ëó¾À]en“ÖÔŽÒ.`œËŸaÁŠÙÌñæ›[ßÌæ¸L1:.Y<ý©%¸Ó?c «ÚÛŸ¯©k”÷˜ž˜P5dWÎqí¬£ç:ãˆ9_펶ä- yȠŶS„Ÿ…E½È›¬úÝø£q*ܸsŽbgô†Ï°oš#€%)PS³ôù!‘³m‹s]õµ\ý²ÃêÕ×¾"?†±Ø6¦ÜÀ~©²rÉ4[;ê@@@@@@@@@@@`¯€ÈÜ'{yË^w•_Y7ÿ, ‡;*tZBQ’]ó×$…E[$YŸŽÅÚ^Ìõt@{‚¾›d§ü‹oƒWÊZÓÏ>ïOkäÝ™@Ydô^þ\ר´2•Ê3÷'úžpmãõU‹ëψD¦¤1Ÿ1Ó¤Ø> OD&Ip£åýkÞ\]þèhœÄøŽ×ï“Ï adËD€ÇL´h‹ˆ€ ~R~¹Á?Àߨۺ¢-Î%X¬£í>eÌ*¿>eVÁñeçùµ¡@@@@@@@@@@† Ès¸O -Éí^ žûGåêÃl³4ÚÜokSTõ:?A†’=ód›ƒÑ:¯×HâøÜíOÚ¥ŒÒ'Êq4wîÜPu¤áÔšº†9Zÿ,q­¹Ø‹ŸJ?i)KHÀYG·î甿®músMmÿWEšæJþ±¹˜l÷QlŸ…û;ãWZûÄJ´¡èöµ1’ÓÓÓÓó†Ì'oÙ€G2·B>Ö?§k!Ïœ¹!€û@À»±’™O؆–Ÿ¸ØÖ&›z£ÜïjXäw¬vLDê¿ï׆:@@@@@@@@@@,`^¼—ë×®ûæÖ€šäÛ­ÐeX¥ÕA¾J¥ä+© Ž’}o£mÍÙÔ­ÞoÃwÜø¨8Þö†ëš¼l•‡¤Z“\1ûWV.9°§çÚ¼¸¤×RîTFêiõ‰½v4Ç,ö±p,ö3Èü@`Tf5ç ¹Y™æ7¨ÜÈü9×Ùwçeq”›Ôëe v— ÿ«ßWUÛt¤wƒ6¼Ž@@@@@@@@@@Hìo[Ër¹¿cÇ·b ¼+—½jßçšßÉü=—#îë¾$aÌæ<ÌAâïôk¿Žsµ<ËýßÖv¹l`ÌT Øóí1Pôæ>êŽ .y[`|è™ß‘í!ï¡wHP× îš-eå»'í?ûÝ­ró·¯wççBåeñͶG¹Þ&Ç.‘¿KBãTMmÓƒ’ðfmÌšÞ?>î̶ïb8®Ø> ]¥§[S˜jå Úf\õ”¶NjÔ¦SàX§‰I"€@¡ÈÍÕ¶¹HzòëmmFR/Ù!Û%¶O€£ÜjsŽŒqùHÆáX@@@@@@@@@@ÆŠ€<œ×Ç|8ʳÍÖ Žýn~7ó±.¿>r6ùÕgSWUU5IŽK#IOÍ`½µX‚½6®6“ÚæáæE"ûMVåï• yÇhÇ¥s”Òæ(côQ2µý÷é J¢7êJ{¦áe[¬¬i\ ™?ÉSõöTŒä…Vï’½KÒÿê;LÅÇÖ6Þb´YÝß~ýšhôå‘t]ˆÇÛg¡£ÕÁVG7¿Y)‡ï·ïÞÃçSø%8þ9*äzï6 Dg+pÎSnOÐÇmÝõõ›[mmFRïôÇת2ÿo¹»&Àq$Èy<ö¤º«.öëþ[¿U{»åëü3ýz n°€ürŠ|QñU5Ø„× € €Å+àÝß²!€ € € € € € €äG@ž6,ºGyFršíÿ—ì/­G‡ ŽñÐþ“ù¹¬F¥×@\å,Àqîܹ¡YG¼s¾rL•0~P‚´fË"vÊs .ÈUWÀ0ôt¶ü5iZ*™í2Ñ=™$sq2Þ êÔaW8åqÉî¸VÞ‡Q³Sý®««åÕ\Œ±¯û(¶ÏBW)kGíŒòç ÖàÈ–‰€„L&=ѶTteMÃÜPP%¿tpŒ|$¡"Ó”ÑòKæ@I=Û/ I²Åm”×%†dƒÒî]j§¹!kÕ®¥zÒ\ç)M¨‘4 ‡—ÎÒZã߇ÙÜÓÙzŸ›‘ÕÆbËŸ ×6>)s92uOúUuu‡w··?º 5ûB@ÎÛE~ã®}@)ïŸßvþ™ìùù ®«ÐãÔ›fûà"^#€ € P´Þý- € € € € € € €@~äyøx~zÎg¯ÞsýþÛúõ]à¦ïŠâ¹ÏàPÎß1 ½2 G<ÿêÚ†Ù´÷‰©•ØÅ©Åž}.mmz—c̯äùíSòr µöâbO·Ót™¹²¦®±Û˜øcËïÎËx£Ôi±}Êõ:ÝJwG5àP‚^_—ëÂ:-ì Àq¯¯v Ì›7/8yÚŒJy3-ÑêgÉ[jšWµ÷­%¯vÞnÞò )˜±«HvóMȘšº¦ûäMy1¦£«£õ¯\láÅMß’€Ê&¿¾äõöXGó§ýÚ亮º¶q‘ü,Áúö«õ íÍ_öm“fe¡Ÿ§4—QTÍL(têÞ÷Aò©KªíÛ¥F~ ïÛïeŸGyWšq•6Oç}& €@ ˜ªžè¾€gÈÔ@@H_๿eC@@@@@@@À˜?þ8É&w€Ecûºuëú,mŠªÚ¨øÎ\O8 d@´=)žëQsןq³Ïà8?9°Ü)ÿŽÌæ“XR1FÝÍOʺN•äBÈéý¾DÀ¼-wê =i]&%‹• Dµ ­ÛÝí_»!ÝЊÝ<ÈyˆyòëÚu£ú9èõf¤øQæ­nWšØ¼õOÇE& Q÷á)Óf>äh§S¾œ>!_ÑÖ7z²%ʱ޷û\ù ¸Húú¿pmÓ¯++ëIÖ6Ó2ט›%ðò(ßJ5UUUÙnÖ2Ú·½¼™äKÏ^2w/ømÄ[1œ§/²;‹úݶiI›ÛlmrQo”»ÖÖd[=ÞÖ†zJ]`vhf©/‘õ!€ €Œ!c¹¿Cg›¥"€ € € € € € €þî~ûMðo!µ’­ÈÚ†¤ö/jÇñ‚ë2Þª7-©”?!៕ù n”Ô†’@ëv¹$?éå›ð*ã…Xˆu´üzÓ†õ3\ל+s¸ËÒ|DÕ^,ÖNcy ü1Iö©uÆÁi ­¬Á‹Õ÷x\9û¥5yí ÀqÅØ~±¨®þ¤šÚÆ;v6“c GÂÏ–{<¼¸ñ»ó"‘½Q»£-ÿ«ŒzØoŽ2Þ8gÜÄsüÚä²®ººá ¹ó;Ó¯Où"|iýÝàׯVWLçɶ–â¬7ï´Í[nz´µÉM½þ›½ý{Z PÚ'Ž;®´Èê@@Æ”÷·cêt³X@@@@@@@ÀW`ÇË/oñmàUj]>oÞ¼ü®Y'Qø $þàŸ¥Ï ]7㬖áÅMßp´Y.ÉDŸž³¯’€F‰ûxP&‰¢ª^w{§ÆÚ[Nó‚ ãnξãìŽ\»víö®•-Í2‡“ÌξÃ%ªõß$°õ^™_ž‚€\YSÛôÙìfÌQi ³ÉÖÖÕü\ç)–÷֍ޗbEUÌUQ®üLÖ :”L‹æ;ý©|éWÈ Ò…SLù9áðÒù±ØŠg²]‘«Õ5ûß㵪—ú_û¶ÉQ¥.Óµfïû~’lzËG’Þ»ÏSŽx §­æØ&#¡ÿOÙÚä¢~ûw}Å$kŒú»s1} PÌï/;ZMs&«£ÿßAÅÌÆÜ@@ PÀ»¯õîoÙ@@@@@@@ð$`«?\Ûô†<§ï›™l„C¼úQ(+¦³d´»I«€ï”% îq7_óm´*7žFòœ½““Ì‚?‘½/IpãÞ¼’Á7”6HoH¼àýʸ÷n}͹ÿæ›[ÞA·y;tW,‹òƒ³"‘éãtùYŽV „ãôœ|jõ³p]ãëX¹"o ¢ãM6GnÀ¡¼p´”„z߀¬„¶ì–˜ÀܹsC³Ž:îט×8šK“ñf«Pèîpmý‚XGÛ}ٌ߶³U—‡¾ïThÔGÕÕÍXÕÞþl6cdrŒÜZ ûãî5™ô¹»m1Ÿ§Ýk(…¿gD"S$ø`ßµ³óºhË‹¾mrT¹fÍò-òk¯ËMÔ¤T]zAÅ••K¦õô\»1UÊ(u€vTý~§©nù]©/•õ!€ €”¸€w_ëÝß²!€ € € € € € € °[@’ðHà¢ö pt*‚8îKñ·ÏÕ›Ëüã•d#‹¶t¥è¢hŠ«k"2Y nÌ|“@ÆZ›‡åÈÄã×uìSÎÃò ýó™õfÊ2kŸ¿Ö7D£¤÷«½^¶ÓÉÎ<^¼@Guš6êDIò5Ò¹Jlj^YvÿªèrÏŽ-ÇFi PöߌÝ€Co<Ûœüg<öj p{ç|`ŧG"“&Êc°uê>!Ðjº6Îj7Ôv®l½!Ó9x[ÅîW™êX ~”ÇþÊ–Jýå©Úä¢\¾àgËMá,}­“/£-m†Uûy¶ ".ïŽ?Úò£ÞêÖË?w—é÷.¿ñt(x¸Ôàè‡D]É œ1þxu[ï_Õý}O–üZY  € €@i ¼7t¤òîkÙ@@@@@@@, Y7KÍÌÁe‰¯Ë\ÿÈÄöcq¿WmßT¦*|—.Á¤‡xp^æL߆\yv¤ñPÉTøËL¦(A÷iå®2Ê\swüME£ñLŽOÖÖhgr!í:·wÉœ½ß[¸paEpü¤j' æt QyO²õØÊ¼Ä^'ø¯Òî¶¶Ôg! Õ«Ö£Œ>ÂÚ&‡ äúž‘ÃîÆDW8މÓÊ ì(÷É$÷n¥œJÉwÀÐSìi½Ÿô¹ª:ÒøÑ®h‹÷Ñ&ó¸ZÆJàèu&óõò'¯ŽÚèz[ffùe‚Œ³7–ÊyÊè¤pc'¨§Û¦g´~ÊÖ&—õÕ¿^Þƒ¾ŽNP½]Ƽ'—ãÒÅ& ÿQ .< ^}~ÓOÕ†ø¦b›>óE@ãÓSîg½ûZ6@@@@@@@ HÂ#Éà迹Aã=Oü«±]{k4º¥¦¶1.ÙúRçq”º¦O?L¤ž.V-ÉRùS‰²˜œÖüúS¿Ûÿùl=Yû7fŠXû6“ç$ߣP¹zõêm2Ì»þ©pxÙa’€h¡<Ç_-³?MÊÓž£,w™a,¶ü¹Q˜ú˜ÂqÝÇ”ã*$óhVÁ©#€|ߎ“‡à8Oû1Nùòá˜vp£v÷÷»—öt¶þÅÆ5wîÜÐŒ£æœPúß$èož­½‰®æÚ –¾ïúëWXo®÷·þ‰‡n˜uôœ—ä¦ì Áåƒ_Kÿאַ­OWGÛß—çðµ÷|¡D™r¿ÛÌökS6HQQ*ç)ÅòŠ®ØÑzšmÒü´­M.ëå¦èY[ä{¨­ õŒ‰ÎuÙç©o¾v5AŽcᄳF@@ D¼àFï>Ö»ŸeC@@@@@@@`˜€6/JÀÚ°âÁŽì¹np¯‡ Ivã%€:dXÍ +»O**š—^öFy¶¼Òr¹ ¬Ç¸êÂØÊæKòµ8‰™bë[â0ü#Ölä¡~WpâUÒõU••u‡„ÊCÈ4¿"1%é$ ™Pàórì7ò0µ1ÝåV³ýî ¦\ÞÂr&RmFÍñâÖ­[×—ªI®Ê½dgJÆKç½–«1K¡Ká,f°†p¤É˰øå´1êA£úÏu,¿7­öÒÈ{³Ë¿5òrMumS£|£ü§¼)ô;^¾œfÛ?øiök—Xç5ó¨9m²žÿ—X7tßY&ûy p¬Ž4|T~9À7¥·Dz¯º)Í(eX)§¡ç¢x÷$¥ö4¹V}`\½Þ·AŽ+eNÏZç$é»s<,Ý!P´3‚©ŸOù¢úÞkmêþ¾'‹vL@@`l¼7tä@æF‚ÇÆùf• € € € € € € €@VFÝ#8/ñ=VëwûÖç°2\Û¸\ž¯?^ó”Qê)Ét÷”2æñ~õTÀÕOuuµ¼šÃárÜ•¹G‚E«ý:ÕNàt©÷2úÝ&ÙÏ“õ¥ÎP¹kEƘŸÅV¶ä-¸ÑF"Ñ&û?™/3ÕrõðÖÓÓþ‚Lï»áðÒf ýIâZfئ+ñwsmm¨Ï\À‹×‘ÏžGåÈÙ)ÖºìÐÃçÌ–8¤R¶ÉQÅ‘nùÐe9ênÌtSpÍcF~,tQ]Ý IÒ{MZC{ÕÝvR&Á‰ývu4·˜¾ø{ä î‘ĺÄ} Òª®^ÜàE£g¸ëzä"_*æåZwºÑ6a7îZç8¸ÒÕ»_õî[ n,–³Æ<@@@@@@@}#wÕ]¶‘½G K;I¢ÒŽ–è´K0×§¥ÿÃÑÎÊPȹ×§_ ×5mP‘ˆ5Èζ¦|Ô‹ÓŸlýÊÚ>nkS¨õ’½ñÖ¹µ¡wËËÿbm7ÂòŒûá¶.$¥f^b@lãfZ‹­xF÷÷&û6¤qìái´¡IVúNÛašok“‹z§h?'r±þlû ƒc¶rEx\ÀŒ»R~!T¾ÿëhùŒ,Q~4ad[,Ööbeå’SB¡ÛäfeŽ_oŽ£/›?Yëš5Ë·øµ\×ÕÑúáÚ¦{%ÚýøÁåC^k}¨dZœ×m½}HùwNŒDÊ¥‹s|»1æù.µãß6 •¥xž–X”»édp”ñÞÑ\œã8ÛmãÉ{ƒGõcNÀË>~fùÕ?ÿ€ºoçãꮫGúÖ«㯪mf‡|ùøëoÌ™²`@@ìäÇtT…§LU³C3Õ‰ãŽSï/;Z~Ä­(þoôÙ-š£@@@@@@@r&ðì?úëáGÍé•瘽gÛ“oƼÃK³ª½ýÙä rSZ]Ý8UÑ<Ü¿7ó¨ŠFãþmöMm¼Ïý“²üÿëµzWeeÝ!»²÷å}¢UUUÆOü©<Ûú¬VÎÓÊuŸé3æé-/?÷ÌÚµk­Ï’ïž`eeåþr¼}÷~ª¿Æ¨+׬Y³#U}.Êâ0´þ¨­¯l38Ο?Üø‰o;Z»úXŒ~F+c 12 lcf[ßÙ¹âñšÚÆK%fæ§~}HfÓYRï]d®_;ê²Ðö|¿#å9s¥þr¿6¹¨“`âóä‘ ¶ p̬X›×Ô,•_AP•¶ùËßí›6®÷2)æ,º£§çÚÕÕ §êqúyúdÁÓ+&.±d›çàzéóÙOà(•ÚÑõò'§ŽÊÅSO<—Ä×ò­ÓšÉ X)Ÿ§D›¢Û7ªÂö%c”;ªŽ®QÛËŸü’ŽEw±1áÑð?~Ü;þÖ˜Œƒ € € € € € € € € €äJ`ݺu}‡õοHIÕ§$†ÐAS¶Lêÿ#U›\”›>ÍÚ«î°¶ÙG ¶¼úìýS§ÍÜ*€ûùM!XQö)©¿Ø¯M®êœ²‰_–˜…zï”6ÇQ!y1eÚL#Ù0_’LˆOKPâÓ[7›óo¾¹õÍTã†B“g¦ªR®ûo²Ÿ‡CôøS¤Ûñ¶®e]ÞRÓÚÂuß” ²%f¶H.ç0 Áom»èjjšætv6?”V‡Y4êëW Ùf¬uY8\?ÍK$–Åâ#àn£Û7éçd]‘ª™\ Ç.ªiøÐªÎÖ{Rµiyu¤ñdùÈ=f¤ýŒÅã-áåc‘¤4×l‚Á–•ùžoù0aÇýçH$®ººZ_ÒnÜË é»­¾”iÊéøŽ×¯•N}}@>¤jæÍ›gýôܰJÝ0¬hx|™öVÊç)m„m(7…ã¬S‹nG¹ió½î½ùÊZޝ{« @@@@@@@@@@@`”Ì@æ2Û`i=ûnëÄ·ÞqŒD黹ZI–µÂÜvÅQÜi›I}i #¢­áëçÏ_6QryH€ãðÍ Z•yHò)ý!yu”_p£wtˆLWbùÐ}=qò´9ûB’Œ”É·ËCÇHØ3æÎ®Ž–¿'”¦Ü-õó”ráERa´±8ŽrG'G \¶ýD‘œ¦‰ € € € € € € € € € €‰ýqÓìN%–Ù×ê8/³Ø²îTW7N• ³3}»4¦wÇ–ø¾möq¥qÕ¯ìSГƒã'}ÖÞnd-Ê'¾ q “m½Hˆ5¨K+WbJì[YY0¯1FUuu‡Ë•ÚhŸ‰´Ð*íG¹ø×¦ÑgÎâI’5?y›LzB²ºÝe”úì]Ñhïî}þæZ þ [\ŽÔ/°µË¦¾fq}—œ-›c9Æ’Ñ Ò˜(ÿŒ¼IR¦YõV)èv™íWç{Åò…¥m ùå†/ÚÚ «w•uî_?ì¸, œ2½Dú ú.¦×øÕ'Ö‰ó”¸èbÚ7ö ŽFnG×±gp”Ÿ£ À±˜®3æŠ € € € € € € € € € €@=ѶǤùÛœªÕš@IDAT!Ž£1wîܼ<[¬Ëô·$sž@šV×®Y³|‹mžû²>m‰I¬è“¶9H–¶o/Š,;ÎÖ.Ûú¼Á ”ûŠíøÀÖ~·ÃÖÎ1ú[¯Þ ™¤Ó.Ë6*RÖ,±-û§w¼åzÔIܯ´›ü¥Ö'œ‰LO^9òÒrUn –“´h|Fc¯‡XGÛ}F™ÿµ­\;Î/OD&ÙÚeR/®*¸*“ch;T ¯ÑÕC‡bo_ H”wµml‰Ú¿JE£q[»‘ÖÇ¢Í×ÉÆ‹~ýÈ/7œ8/ÙϯMb]çÊ–Û%£âúÄòÁû’r¾÷˃˲~­ýSt˯ lëÝ_™Iÿcá € € € € € € € € € €ŒU‰ø/ëÚµz׬#ßù5k» ÔÔ,=Z‚Ö>o;¬¯ÏCð+ëø‘m-^¦¾€ø]¦qö~Zèý§èŸKüƒd´múºXlùs¶V®6lm¼z£œãÓi—M›êº¦ÿ'I¦>šî±W2>ݶ«¢ËÿOâTlÎx§ü·Òg^⨌£>f›¯¬ß Ff˧@\_jí^ëC':å]¹zÿ.X°tr¹SÑ#YG´ŽMƒ”yyc¦ŠQð"÷%Øî½¶ûî¶µÉQ½+W·ùö¥uà€@Ù‡|Û ¯te͡”„TÈD†”d±³ë—æúj:3ùu‰1tžüÙ ¸V²3úÿ¢ˆÌ=Q1‡kÔqg‡µ;ãŸiÔz< @@@@@@@@@@@  º¢m·HxÚCÖIjõ­êHý™Övi6 ½ ‚–¤,æ/=­I³Û}ÚìE·÷ –³f<”`µÙSò¶yóæ¥ˆ—ÎÂjê~(”Mi´uãnÿ¿§ÑNíÜêzɤ¼àMÿM«ê|dù”kîŸ$`ñÿÁ‡ÖJ¶Ã‰CK|÷Œrõ/|[H¥œ³3Âu ÍÃÖ§W_iÁ¯uvíFEÝbÂ$õ!yã-‹X·:Úâ}aÊ&þŽ2‹€q>œédt¼ÿ·)–}ÔÚ©÷©N«*H££®I«³]ÆÒyÊÄ¥ÚÊ ÕvÛ|‚Á¸í½fë"£zpíãiÓŸQ§4F@@@@@@@@@@¢0qu±mÒ„3ÎqœUÕµ‹lmmõ^`Ÿø­”°±÷ÙÚ*×\dmS îŠF{k¾—Öt´^4uÚÌÛ.\’F¶E{áÅMß’ƒ_±·”pVc–KæÂÓi{ýõ+6ËwÛÚÊõñþÃ>îr[» êu¸®ñB'¸Ñ»ö28N.+µ&íûzwþJÚ[Ÿù×ÊùFumC8“¾ýÚ†ÃËs³¯Ïü¥»£ùf¿¾¨Ë@ÿ¶_wÈ[orÎNpÆMüK¶Aß5‹ëk‚! ÜÖï¶E½]€G»QQ·Ðm ”/¶Qý%É´˜Æxöy'ž˜ÎÎÿ¨÷?$–Ý7'-Œ4¼}hYF{ZK€£1OÅV¶¬Í¤×±tž2q)¤¶F›­¶ùĵÎé¯oØÆsâi·ÓÖõ € € € € € € € € € € PܱhËï$¨çjë*´.ÓJýNϾ9')³¶OÒ fqÃYS¦ÏzHRâÍORPd®í\ÙzCBaAïv­l½RâºÓš¤Ö'…*Bw{Y ÓjŸ¤‘—E.\Ût¹vTZ•2·7T_ÿ…IºJYäêt³:_ G«Sv”fEuuãÔp]Ó ãñ]9$‹Ø%3)Í¡š­^}í+Ó–Î1äØ^ÜxQeeeFA”‰}W×Ö¿G‡‚«%szb]â¾ _–XÆ~~zzÚ_÷È7Òé]Ê)¸kjj;ÕÕDŽñ½V%°;X³¸ñti“r¿“ëû°tÆ¡]ÀžýËÞ- X@²Î,–¾›Ñú ß9®Üànò`§\†•xç›QúD©ò>ìiõ!¯‘ƒæ *òÒ3¤õ2)¼dHEš;Uuõ“IÏðkn´j‘zɈœþ6ÖÎSú2…ÔR[¸ÕG‰¶-· Éó[ê@@@@@@@@@@| |â ÿGkŸzQ­ýýßÔò9úF`,ì|sóC¦œ$ϼë·^y¬Þ‹%¹ôNyÓ±‹þ¥Ól¿AE£q¿c¼ºEuu3‚¦ì¿$°1œ2`P'F™W{ã½_TT4/ݯ"0nâ{d­o·MZ<ÔÀÍa zêï7ßêélM#)”R§G"“&êò–ã¿"qéô¹æ¼XlÅ3¶y ®ï«kËêb)³?ópyuïŽïxÁbƒû±½–숳%£Ý-Ò$×ÈT[ûTõcqŠDé\—»ûèu{¿YáT,Ëw—%û+s«ózq°â€/V×5ýdžø¶Ÿ{™;“µMVVYú^'ü¶DTÉXö·‚QKrz³É¤,cXGËUÄ}†œžÊ´–Ï´  „kj›6ÈçÖurRŸtzQÎî«r=¨µ9X}´Òf¡ìg}]ïž‹$àò¿1ÚÝp ý%À±ÄO¶Û½ßöié¸ñQ pô>ø%ÿE¡?$¿|aì_Y¹äÀžžk7¦j“¬¼ۦ߅*¦üÌ;>Yý@™£²ptŒÓè÷õ#ÿÆÕ;›rìcí<¥`(ðb/ƒ£ÿ»I«€5à0—‹”ëÜ~s©5޹D§/@@@@@@@@@ÈXàü3-‡µVüÙïXZQÕ«Wo«¬i¬ …ô=ÒÔú¬±Ö#^=aUþŠ®kìq¹E¹ú¹¸r7¨ñ7tEàÇÕs¤Ýqò(õ ð9AþVX¦ñVµ1qmÜ ÖD£/§Õ¾Àuww¿VYÓ°8TwˆQZ™.Åé ±?C«ž‘ÀªëLܬÜ´ lÔ;Ý7t™;Ë5Á£Ç%ÁqGËóé å™ð2Yºœ£u dëÌä(¥VG[Ö‡ë~,Ù ­™íd^¬Ñ§Båe Õµ?‘´U=;TïS7D£G]°`éäÀøàŒ@PÏ•§íÏ—õ|8±Mâ¾w!kNÚ¦N>¥u`¡*?tµRëOµï]k5‹ë?«t@²™Ú79GSeÎ?8Ä©øª¶ýI&õ”äºú‡«ôSþ¾§z¾7‚îøÃA5SÚÍ”ùÎ4Êy§¼~+[§¼°nÆl5*~®´“îÙFQÀô¾þòâòI­Þs¾Ò\²qÊuq×Ôr~eg`HaÒ%@²Kú°eAµ”'í¼„ p,á“+K“ÏO=źDǹZÿÛÚ.— Œ™êÍÎo ”½¹gàèÝɯtÈÒ>P’õ/£[išÛm^—¬>UÙ‰‘H¹L¹&Uý®òµÝííO[Ú$V¹ó”PûF28ú_²r/oÿ€\®ÕO2–únØO€£¯• € € € € € € € € € €”Ž@OgËáHãRÐ+dUÖ Goåwð6ùsž£õy* =ÞÿTÈ?oüÀ²åyê·H®Mßµ²­¨³Öy™%`®Ö¨Àryè?½ÀNAÏYòçór>/Yá¼¥Æy";`(ÿ“¦¥´g“À©žÍ×mOA†/6Å·_6%PáeVL™¬jH—Z—ˬ¿.Køz¹,¿¦¶Q²ê§deÓäLÏ”¸ý†cÙ‘c_Q®{®rœÃ¤/ß8–€–€Ú ½¡;W¶uÖÔ56KßM–©ì­– 6Ù‰¼uJ$í‘W ©rRoí nº÷u¯úâJ×tw,¿7¶4ɱÀš5kv,\¸°*T1yí~4ÇݧèÎ\»iÃúó¦NŸåŸÔÈ*pðWMbûE.PUUå¥(NãëÉòA¯*ô8uH`ª:64S8î8õþ²£U@óG#‚å`@@@@@@@@ ººZ^•iº2Rÿ£`Àù¾<[RÓe³Þ5æ‡]fûϰåtŒélWB¤ çG"WŽwÊ¿îhý9™ÚøœOϨ—%éÓ›7¬ÿ//aÎû—½ Ÿóç/{OŤÀÏ%(o™ÄKä6øÄ˜›Š_ëX~wªùKúÆk䉦¤ª÷Ê&ð¯ò'ÛkÖx† .é U”ý@âÏêmq!~sñ«“âGäÑäÏK0êïýÚQ·onˆF7È j+k/ Ô9ò¨Ý9rMÌÎhV^†N¥z$j‹6Þ”xì¸qý’Ȫ,±8q¿7±`¬ïûluœb_¿|ÉìŸÛo˜Qqë;:ÕŒb±¶ÃuM7Êú¤j#uÕ .¬H/ë¢ÓªŸrc[ÕÑv§o›•cù<¥ )Èâ¸Ñ/lo(ãÌÍÉËt¬ãiãà8š'…± J@þCAÝ´ý^Õ¶õ6µÑÝ\Psc2 €äSÀ âÓlW÷??ðouï]jš3YÕïwš:cüñò;0¶Û|ÎŽ¾@@@@@@@@ö…€ds|LÆ ‡k— acçË$óåA’C³‹<§ù7¥Í7m|öÚ|áe;·|·&}YÆøêÙ‘Æ…ý)G›3䑸%hJcN'jnQ®úõ3ÿxhÕºuëúÒ8fDMÖ¬Y¾E:hà×K‚Žó¹&Ε5ì—u§ì*_7{É£Ò‰¯Øìn‹N ”ÿTR\MH5¦<êTYS³ôèÎΧjc+_½úÚW¤Í'ª«¾fÆéeŽQØö^Ûq¶zyNë ^åÆÝU›_~öcí}`ó)äúžÎ–d~Þ¿oW×6Ìv”^$×Äò>>XþNø«ÔF«×ä}ñ¼QúÉÜúœœó›7¸Û×ܦ Pt&˜ÿÏÞÀÉY•‰â>_Uw'ÈFXÂŽ(âJ\@' ²„¤c'²¸]G¯Ž3Þeœ™+3:ŽÞ;ÞÿèO½ŽÎ&’„%tB¢Wp ‹Ê"‹ìKXHBÖî:ÿS!`ÒõUwW/Õý|þÊ®úÎ{ÞsÎsººÚ™~sRcΕÅ'r"F\³Ça¼åéùú†^^9†Ý‡++w Ån +¼Í{Mœž†¸¨Ú03g.8.ý€:¾ZL:ö‚jíÕÚFú>U³JmÅ®p[È9ð&}påÖuMYLã¥Q«\åXPàXÅGÓðXW~.|þ™Åá–m÷ßEZè@¥ØÿŸ×]®ßts8oÂÜ0®ÐíÿM°Y… @€ @€ @€ @€ú.бä‚÷ö=Kõ K/]ô­Qy Úµ£˜¬úÿÀìvœ¨·ýT½tŠÙk››Â{Ò°ïL–|pˆÙþéë¤ôz׿œŽáÙtúÝã©Èç†TÀöÃr¶å‡W,¹ä¡zOwé’ þ>å¬<⺲´ð‘4Ñís>ãŒs&ŽÓtRV §$£Wg1ì—þÖ{¿ä9~÷Ť3<¶¤âÐ5)æ·1‹¿ÌÊá—åmáK—.”¢§ů>mÚ´¿-´Nx{*<1²T_—æ8i÷ÂÍTˆÙ™Öõdúf~2­õþT v]g¹|íŽ<»/·Û×+K¥ ©±÷•ÝfÞsÃ߯¤Ö¯¤¢Î£Ò‰¦SÒ^¼>­ã éñê´ÎÊ÷þKOäŒqSúÞ_equZk:0ÞTñŠ¥Kߺç‘ú~7å¾0e©<êzùYøRÎ¥KÝ‘îVu¹ ±yïôþ¨~ÅðXõ€‘תÀqïyÌÊk²P¬ºÂôarw芟®4H…rWŸ~Øß·¬xEh}*UÄïÛíÒQÊ©­jc¹)ÎMÕØÝ¦HUØ][cXØ}@õ–‘¾OÕu†Nëý÷ÿî®Ã:®³Ú‘Ôé—Ë-pLïßê|gnÇ˺ºî:ŠfB``ê|"üí3ßw­˜B€H Rüÿñ5__œðÁphÓþ 4sS%@€ @€ @€ @€¨·ÀN§˜ýŸrÒIwû†–lBצmÏ®[÷ØÓN¦ÛI§›§W]uÑÚÔtÙŽÇ‹QS¦Li>üðc÷í,†qM]aÝcÅ-ÏT;ýíÅŽƒðdùòå•C¶®Ùñxq©ðql6fÌ„BliÝXÞôÔ÷J¥ÊZÓŸ³71ïI³¯<–켊Ê~vØqã:›â¸,Äâ¶ ñ‰ÅÉ;‡yN [r÷ݵB|¡1SซÇÝ@†ÓËmålmKõúÆÊGÊèŽÒÂ¥ÃiÝ/¬å¶Rië1³ç_˜NHü‹îíáë)é—¯ý«üK…T Y)‚ìöŠYöý+KTþå…^]#}Ÿz…6*G|§ÇʱÖÇv;|LÿjI{{1”J]ÝÆÔ©á¸öö–ôþ\­ö6µ?ÛÑqáÃuR !P9¹QqcCl•I @€À Tþ€Êçå7öùs'9â>š @€ @€ @€ 0DÊ;þ¾~PN¢&½žVåïÐÓ£RÌÔ°M; +ÅÃþÚ±_O§…V.=h*‡ýw;÷¥9 å†ýyðÒÅÔçNnQh}†‘e06…͹GW¥Šòƒ¦N:Œ ]ËçW³¯œÆ—µd³»‹™Ñ>oj*<´»öÊýr9T£ZßJ›}ÊBíY¸­Úl*ßOm]-V‹©WÛ+Ë͇¤ñªà˜N”¼½^ãÉC Òq÷áóÏ,vrc#l–9 @€À  TŠ+Ÿ›•ÏO @€ @€ @€ @€ ÐwŠÇäeÉÊÙy1#­]ã0ÞñëJ¥ué¯U«Ÿ$—eÅ “'2\–.Y|kú{Ý›ª®/ ïë®=+æu×¶ý~ k¶¬⊪19ö)h5ǘå 6)óÇIÕ¿¹„ÀøÞæ_…[¶Ý;Dfc @`è T>7+ŸŸ. @€ @€ @€ @€ ÐÈm³ç»mÎü¿mkŸÿÞi3ç¿öÄööÖAYO!¾)oÜ qSõ:§¼ð}ŸÜ7 w«çKJõXÙêTätPµ®YWÓ+SûýÕb¹-R™NXÌŽïv 1œÐÖ6÷ÀŽŽÅ»ñšN¶e±-õí¶kjºøšk®ÙÒ}@M-ö©&¦ÁJŲ·UûvØ1Ãw¤¯?ìïÙÆBøÓ*ß™Ï_™¯‹ÀèŠå°xÃõ#dµ–I€ê'Pùü|÷è7†bæß?ªŸªL @€ @€ @€ @€ 0°ÙŒTc1!ChNckl›½àátï®â]å-[¶ä‚kûsNg¶Ï?8 Ù™ÕÆHu)÷¯TZS-f$¶ù Æa¿ëñyKÌ Å“ób¹}CyÓEé@·EˆYåj.Nß}“ö?äŒTÜ8n÷û;¿îê ©x²—}ª‡bçȲrî Žé»ijÏãùüùãt•³ŸÌ\ŒB`ðnÚzwX]^;ø1 Ð`•ÏÏÊ程 @€ @€ @€ @€4ª@:m—?&ß^+”…CÓ!W'¥çKtÿ»¿×Ö’…ÿšÆhÎgUNûˆlVà8Ì·=†ì'yKLÕȧäÅ4r{¥²9ÄxEµ5Ä,ÌxI{V8ç%÷v¾Ão—•.¨Ëû´3ìÐ}¾´¼ùŽÓUg˜…*§VécãÔöö½Ó<ª[œþ…§Ò÷çÍ}Jw #pÖÜúã†Y‹‰ @€ð9:ÐâÆ#@€ @€ @€ @€ @€º dYΩˆñ Ó¦Í9¨®cî”ìÄööÖP¶Ó­=>M5\×ï±a„ßláëöËïÚVþI¡9§Ž5 ¯©¼I—/¿äÑ™>}ú„âèq_OXe¡p(—Øãýëž|ø•+Wnî9”»ÊçŠÅYÝåN? ¦žqÆ9¯ºê¢íÛ§Ÿ~î¸eïé.þùûñ;ÕÛkoµOµ[ jd©Ôæ,¸<Íá£Uæ1zÂäCNHí+«Äô©iR6úméû³úÏï¸ýC¯Ü§t&Ð@wl{°fkª @`h øZûa6 @€ @€ @€ @€ ÐSxoÙ”îzUNt,Žn>#µÿ{w1}¹`±õd!›”“c[yk¼4'fD6W/‘$ÃkÑëž~è–I¶!Cí]meMcZ>’Ú?[-¦^m…–q‘~hÌMoÜçS ÛÏ_Ýç€ÃbÛœO¤caï1Ü¿amüе×.z®ã.--¾¶mÎü‡Ó˜‡t“¯yÔØâ™©mQ¥½u|¡-}©v _å‡ÊâJl=.ûTÅÉËñÒ¬U+p Årá´4›•ý5£r–žS¶œÞbñûý5¾¼†¢Àc]OÅi™hŸ£ ±M&I€ @€ @€ @€ @€Ý”ËáêTžÔíÁh•n…,ûÌ´iï[¾|ùÅ«»IÓ«Ûm³çÿYªWú‡ÜÎ1^½téBø¾¨Ü™=ôq«Ò‰ˆiº?Ï›r*5üä´iÓÆæÅõµ½r2bV©Àñ¥W¥:Íã€T™õ–ôì¨z7î©ÊaáKGÝùN6ãÅW1kñùžžlÿ¡²è‰=5õæž}êÚàô鈛~C¨úa³ìÏNnoß3œ1cþ¤ôƒûCUsǸuÛsWTÑH`˜ lŒ[†ÙŠ,‡ œ€Ïѳ6 @€ @€ @€ @€Ô_`sÜtuÊZ®š9ËkjmY6uêÔj¢UM±{ãŒÙsÏM5Pÿ²ûýÝ_Ç;»Êá‹»ß÷úyŽ#à;!–k9>5›Ø4züÇú›£u|ñ©€qbÞ8±\¾8/¦§í±|~Õ>1;õ¬³Î3µ½=v™T-6ýÄ«ž«ZçnÚìS70Cív©Ôb¼¼Ú´R¥î„qÅÖOV‹ém[Öþ[Þ‰¬©séŠ?ÕÛ1ô#Ј1¤ï| Ð+Ÿ£½bÓ‰ @€ @€ @€ @€†ˆÀ5¥Ò“éOÊ–7TïqÂ>¶bzû‚)y±ÕÚOoo߯mμÏg¡ð—[Ÿ—…øùe¥…¿¬–s$·åŽdœá²öŽÒÂŽTé{oÞz²Böwg·Ÿûª¼¸Þ¶Ÿrʼ½RßOåõOs¡³¼$/®§íËK‹ïI?¬~Ò]¿ôCjLó艧N,Œ>==Õ]\úãß'¼ç¶Jew]/ûTWÎ~MËñÒ¼Òi¤Ÿª÷©¨gœqÎÄTÜøçyc§ùý[^Œv @€ @€ @€ @€ @€ @€ÀpHÅHµ½&)gAéÄÅ“‹Åðë¶9 ®m›5ÿ9á»4ÏœyÎÑm³çkLq̃©¸ñ3)WÓ.{x‘ên¸¼¼ù {hrk‡@."©a!P9bõËéñê«Éö*Š—¥ ß¼²TÚP=¶Ç­ÙØ}²4~¶o~ÏìÊŽŽ ÎëyDåäÅTÕû'Ýö,„éLÕÂßT~¹xÕªUÛºÍÑûûÔ{»í¹´´è'm³<” aí~àlbó˜ ÿœÚ?Ò}LÏZFíÝüõ4æØj½Ò‡ñ]i~?¬£ @€ @€ @€ @€ @€ 0œ:–\xc*>üÏ´¦ײ®t°Õ»C!{wêsW:Œêîã•G̃¡žˆYá€B(šÚM±ék8,ÕE½.=¯Zw´ËØ1n(‡l^(•ºv¹ïÅ.µƒîÒÍ‹Fx¬¼éüTñûTÞ¼Så𱓠­‹§N::/¶'í3çÌK…^Ù‚ú”»Êÿ«†¸^…¬-o,¥6Ýo¦â±3Sâ3ª%/—»¾S­½/mö©/zÚ·Cù¯òGÌþËŒ9 þG~\~DÛœùç¥ïÏsó"Ë1;/Åļ8í @€ @€ @€ @€ @€ @€á$·†¿N›Ýß“5¥ZªcRÑâéëÍ …*d…‹ ÅÂõÅB¸¨ò:ÝÿD*r<;ÕE½!å­½/Æ®r(tÙ’ îíÉ|Fblí¨#Qg­ù†RiS,ÇÏ×´¤ô¦›tÀaןuÖûj8m1?cÛ¬ŸIïßOåG¦ª¬/¼¢táok‰íMLådÊTùUê¾o61‘Mè¾=¬êÏùÙ§*òC¬ié’E—¤ï×Ü“ÓÙš>kÁ¬¾LÆìó³ýC^Žô!|ã²K/¸4/N; @€ @€ @€ @€ @€ @`¸ ,]ºðé­åøöT;tç`®-ÿh¹OZºdñ…ƒ9F»©Q&:T晾Á&Ïœ=ÿ¿ •ùÜÏm__µjÕ¶Zæ³ôÒE_k›½à©€ozn|–½µyLó3Úç~liiñ÷sã÷0}úô …Qãÿ&WÃIw•âÆ°>l묜>ׯWWV>¿)?ЛAÊåòù½éד>ö©'ZƒÛUîúD±P¼5UãWûYZ(dñ’³ç¿aíêÏ[¹reg­³ž2eJóG¿êŸR|MÂ!vÖå´ÈZç'Ž @€ @€ @€ @€ @€ 0”®,-|äôööw´Æ|7Õ4?ÐsK‡i}oSyÓ¼kJ¥'zìF¯ZQN£®©_çNQ;$dáŸûu${ä‘ÿj,p¬¤-oyöÅQã^—ŽF}YÞ0©hëåY±xmÛìùßë쌟Y~ù¢_çõ©´ŸÜÞ>~\Öú—©ÿ§’ÕøZúl)Çvt\ô@Íñ½ ¼â’Å?i›³àît|ìÑ=I‘ 0·l}®ë¢žôém¬}ê­ÜÀöK§yÞžŠ†¿šFýïÕFNï…ô™þzŸÉ‡½}Fûü¿ZZZxCµøJÛÙsæþI1¿”žžû|{ù+—^ø³ÚbE @€ @€ @€ @€ @€ @€†§@¥¸ð¸öö_‘®Ô7—ê¨öî÷•ÆØ³ðwKþï4V:cÏU«€ÇZ¥†IܲeËž™6sÞ¬æ¦ð³ôæl©eYé|jssvjÛœù¤Ï+ÓÛmeW±üh‹«³­åõYKùðrl:ªPˆG¥·_*ÌÎJÕ\jÉýBL9Æ/§¢¯Ë^xÝÿ_ãwÒ<¿Ð“q²,^qÕU­íIŸÞÆÚ§ÞÊ |¿ÎMk?×4fÂìíÅÏ9瘷gÅðóT4|kªÈÿvzýÛm1ÞÿTØüøAåæË…âé¸ÇפO±§¶W§áš®T|{ã÷Üþéš‚ @€ @€ @€ @€ @€ @€†¹Àm¥ÒÖÛBøÒ™íó/l)”¿BavªwUïe§GS=Õââw–^²èŽzç ù8Ž„]Þm•“gΚ;;†â…é9f·æn_¦‚«ÃSãdzböñ¦PLõéÕ¨Bú¯B(n/ÄJÿUcAÖ΃¤7ðòµ«Øâ¬­] CKÓç·O~çÉTy^î*Ÿ_¥¹îMö©î¤ý’pùòåëgÌžwJ*þaúö? –ARÑðëÒ£ròchIošƒ*oÃô–ª¼›*WOÞFéƒpõ¶rœ½ª'¹>?Šÿ&@€ @€ @€ @€ @€ @€ o+K I+\pÆçüeË^ÅTäXXê©Nè˪ÓAUSÿ¥å.[rÁuéyzêê­À õ4½í¯_ƒ \~éâe!týÉö*áA\CzCÿgGצ¶•+Wvä4::.|8­ýښnjñ‘¥aË÷kޝSàHß§:1ö{š¥KÝÑÕÕù®T¬ûD¿¶Ó•÷o¶mëÔ¥…îtÛS @€ @€ @€ @€ @€ @€®ºê¢µK/]ô­Ž%œ¸õ¹­ûuÅpj,‡¿Mµ —…o1Þ—Nb\cÜQã×¥º;SûRûâTõ¥ó—±+¶unZ;9å™›Š+µIŠwrîÍS'8öFm˜ôéX²ø¦tÌê›[ ÙòTy|ü@.+½ÙÓû;ücÇ¥ ÿn ÇÝy¬r9œ_,„Óv¾×Ýó˜Å…¡Têꮽ?ïô}êOÛzæ¾¢táí3g.xWlŽ?H§î_ÏÜ{Ìãƒå½kÙåß»Çv7  @€ @€ @€ @€ @€ @€^"°bÅÅO¥›•âÄ=žv\{{Ëm¥ÒÖ—tt£_œàØ/¬“´rÌê¶kÒIŽåLUÄfæñ7©ÂùmƒYÜXYç–õO\Q©¬®eÍ1fß©%®¿bFò>õ—iä½üò në [ߘªw—öGþr¦òàË7–7½1Uú+n|ÅW @€ @€ @€ @€ @€ @€@7Ö±)8ök¸†®X±bcÇ%‹ÎKSG–cürZçæ~Yk O¦3WÿçÓ?8eiiá ý2F’^sÍ5[R!ÚE¹]büùÒ% ïÊë瀑ºOýÌZ÷ôW\rÉC—,l‹]]gn?ž¸ž#TÞC±ü¾tŒñ{¯)•ž¬gj¹ @€ @€ @€ @€ @€ @€ ´@Ó@h¼¡+°£`꿟Ù>ÿËÍ…ì#…,žšN8|cȲbfj&ã÷C9üǸíŠU«VmëC®~èZ>?„â'ª%Ž!¤˜¡sÌ}:þµÎ¤£´øªÛÛpP¡õ#1díYNL}{WTÃ-éôõµOV*Žéó£é}öóâ϶u†/¿|áÍ»Çyݸo{ß·ÒÓ¾]?ýJŸSì2ëî;i—×øâÝ«ÿª§mÎ @`È|ÿ€/ ™¹˜ @€ @€ @€ÔGàä—]_ŸD‘%†Ïf~ìs1ÔPãíçüëß§?ýìP˜K_çþ6ö³?½èÏFÌÞõÕK @à¥Np|©‰;; \uÕEkÓËËv<^l™2eJóᇻog1Œkê ë+nyæ†RiÓ‹ž ¨€}Pî^ vu©ôxêø­í9¦N:zÂäÉ“CרIY!®ïܸmÍŠÑkC©ÔÕ«At"@€ @€ @€ @€ @€ @€ Ð@ h³†ÒTW­Zµ-=Ksª<\CTÀ> ÑÙ1­•+WnNOïßñØq× @€ @€ @€ @€ @€ @€#C 02–i• @€ @€ @€ @€ @€ @€ @€ÀPPà8”vÃ\ @€ @€ @€ @€ @€ @€ @€ÀPà8B6Ú2  @€ @€ @€ @€ @€ @€ @€ÀPPà8”vÃ\ @€ @€ @€ @€ @€ @€ @€ÀPà8B6Ú2  @€ @€ @€ @€ @€ @€ @€ÀPPà8”vÃ\ @€ @€ @€ @€ @€ @€ @€ÀPà8B6Ú2  @€ @€ @€ @€ @€ @€ @€ÀPPà8”vÃ\ @€ @€ @€ @€ @€ @€ @€ÀPà8B6Ú2  @€ @€ @€ @€ @€ @€ @€ÀPPà8”vÃ\ @€ @€ @€ @€ @€ @€ @€ÀPà8B6Ú2  @€ @€ @€ @€ @€ @€ @€ÀPPà8”vÃ\ @€ @€ @€ @€ @€ @€ @€Àh!ë´L 0$bŒŸ«6‘w¾.üéË S«Åh#@€ @€ @€ @€ @€ @€ ŽÃa­Fàç—|ì³Õ&û³¯|óïC¦À±š‘6 @€ @€ @€ @€ @€ @`x(pûh @€ @€ @€ @€ @€’ÿùÝêÓºï±0õ­s¾™U‹Ê;` Z_m @€CW@ãÐÝ3#@€ @€ @€ @€ @€ @€@à œÿ½ªµ‹•õMͲ05g¡ŸÍi×L€ Ѐ…œ³) @€ @€ @€ @€ @€ @€ @€ . À±Á7Ðô  @€ @€ @€ @€ @€ @€ @€@# (plÄ]3g @€ @€ @€ @€ @€ @€ Ðà |MŸ @€ @€ @€ @€ @€ @€4¢€ÇFÜ5s&@€ @€ @€ @€ @€ @€ @€ . À±Á7Ðô  @€ @€ @€ @€ @€ @€ @€@# (plÄ]3g @€ @€ @€ @€ @€ @€ Ðà |MŸ @€ @€ @€ @€ @€ @€4¢€ÇFÜ5s&@€ @€ @€ @€ @€ @€ @€ . À±Á7Ðô  @€ @€ @€ @€ @€ @€ @€@# (plÄ]3g @€ @€ @€ @€ @€ @€ Ðà |MŸ @€ @€ @€ @€ @€ @€4¢€ÇFÜ5s&@€ @€ @€ @€ @€ @€ @€ . À±Á7Ðô  @€ @€ @€ @€ @€ @€ @€@# (plÄ]3g @€ @€ @€ @€ @€ @€ Ðà |MŸ @€ @€ @€ @€ @€ @€4¢€ÇFÜ5s&@€ @€ @€ @€ @€ @€ @€ . À±Á7Ðô  @€²ô Ð;Ÿ£½sÓ‹ @€ @€ @€ @€軀ǾÊ@€ ²À˜lÔ ÏÀð @ q|Ž6îÞ™9 @€ @€ @€ @€ht޾ƒæO€„‹“( @€½8Èçh/åt#@€ @€ @€ @€ @€ú* À±¯‚ú @€ƒ.plóaƒ> @€*ðJŸ£ºuæM€ @€ @€ @€ @€^@cÃo¡ @€'Žz @ —>G{ § @€ @€ @€ @€ ÐgŽ}&”€lã[Ž&ö4ŒO€N òùYùu @€ @€ @€ @€ @€C@ã`¨“¨«@1+„¹{ŸTל’ @€‘ Pùü¬|Žº @€ @€ @€ @€ @€ƒ!à¯Cݘ @€@ÝNý¦ðúæ—×=¯„ @`¸ T>7+ŸŸ. @€ @€ @€ @€ @€À` (p,yã @€uȲ,œ7an˜\ܧ®y%#@€ÃQ òyYùܬ|~º @€ @€ @€ @€ @€ƒ% Àq°äK€Ô]`\a¯ðÅ TäXwY  @`8 TŠ+Ÿ—•ÏM @€ @€ @€ @€ @`08¦¾±  @€º Ú´øÆ>^ßüòºç–4º@åó±ò9Yù¼t @€ @€ @€ @€ @€[ i°'`| @€@½*'R}iâGÂ÷6ÿ*,Þp}X]^[ï!ä#@€ %p@ab˜»÷IáÔÑo Y–5ÔÜM– @€ @€ @€ @€¾ ‡ïÞZÑ•âÓZßÞ=ú᦭w‡¶ÜîØö`x¬ëé°1n 1ýÇE€†£@²0&,N Ç6Nõªp|ËÑ¡˜†ãr­‰ @€ @€ @€ @€h`Ž ¼y¦N€ä TŠ9Þ4êÛùÑ" @€ @€ @€ @€ @€ @€p|Ã@I‡ @€ @€ @€ @€ @€ @€xQ@㋞ @€ @€ @€ @€ @€ @€ @€% Àq ¤C€ @€ @€ @€ @€ @€ @€¼( ÀñE O @€ @€ @€ @€ @€ @€lŽ57ÃqM{úvÍötÓ=†¦@ÓМ–Y @€ @€ @€ @€ @€ @€è™3Ï926ßbv|–…}Ó×I!‹û„&¥÷ Y¶wˆqSÌÂ3YÌÖÆÖ†ô<Ý{"˲·uuýhyiñïûgv}ÊZh›}î›c,¼¹PÈÞcxMš÷¤,† ϯ)<›²?’ÖõHZë#!Ä_m*o*]S*=Ù§Q¾s6mæ¼ Ú¡@IDAT)ÍMá=1ŽIkÜ? ñ€´û§5íCÖ™¦”ö*®NÏW§µ>²ò ak¼º£cñc?]# ЀÇîdÜ'@€ @€ @€ @€ @€ @€ïioŸ<*k}O*fœB65ĺý˜¿Îú{áÅ ¯+«Î²Öô²5Åøâí” ]h.CÛœ«SQÝʱ¼|íꇗ¬\¹²RT7(×´ö¹¯h*f Ò¤çe!;äùiV–°c:ü:>ÝŸ^¾*­+]Ùû[ ­_Mk¹.„òEk¨”Ö±yG¯¿Tò7 W¾xc·'©ˆpmÇ’ Þ¼Ûíº¾œ:ujÓÄ–ÖwfZØ{Òô¨ ðÂÒ¶?{~M•{ÅÔthº·Ó>>›cœ9gÁM1Ä+cŒK–.YtG]')=PàØc2 @€ @€ @€ @€ @€ @€A ­íÜCBsñÓ©Ðí¿¤b¿QõœóóvÙ¬BVœµÏäÃÿ¡mμ/>p÷í W­Zµ­žãTË5mÚœƒšZG}!­m~Š+T‹í®-HY©/:-u?-­ã¼¶Y >Üqé?Þ9>r¶¤¢Â£v¾·ëóøô®¯ëûjÆìym©°ñ§¹Ó—Ì©¥rJÊ5%==¯mö‚ó;7mù»åË/y´/yõ%@ ÷½úÁÕûáô$@€ @€ @€ @€ @€ @€ пgÏ™shÛìùßÍM÷¤B¶OÔ»¸q÷Ù§ª¹#³PøÃ>îîíóOܽ½?^Ïœ½àc©¸ñî´¶÷§üu©Jë8:dqe:åð›§Ÿ~î¸þ˜wOrž=gî[gΞÿ³BV¸<ícŸŠ÷0n!Ù}¨bØ6kþ?Lmoß{1n ÐÏuùáÕÏs”ž @€ @€ @€ @€ @€ @€@M3gÍ{O1Žº3Äý×þ.lÜ}BédÀà Åìmíóß»{[=_W òBþ%­oL=óVr%·Ê)‡mßts¥P´ÞùkÍWYcS(þ,Mè­µöéM\Å0+dçíShýe[Û9‡÷&‡>ô^@cïíô$@€ @€ @€ @€ @€ @€BmsæÍ‹YvEþõ`™£ÓyŠ—Î˜½à=èSkh¡rºb¥ ¯Ö½«œJÙ[~pfûüƒ{›£7ý¦L™ÒÜ6{Þ±Æç—ê:Íš›ol›=÷øï{N€@ÿ (pì__Ù  @€ @€ @€ @€ @€ @€@`ÆœŸ 1» ª5 ÀpU‡Hs¨ƒøÕiÓæT5°ǵ··Ìœ½à’Ôå£=èÖ·Ð,;ª¥~šì[¢ÚzŸÜÞ>þð£ûn–æ×Ö£ÎQY˜œ…Â*§€Ö9³tt#0è?°»™—Û @€ @€ @€ @€ @€ @€jhkŸ{F*(ür¨”ÖxÅK‘¿1ÞC¸7‹a] å …¬ÐYÎ⸠ã !³0%¥|S²ÃkL½=,MelÓ˜–¯¦í=é×MlöŠâ˜+RÛiÝ´ïñvŒá¡bêïMk|¤ÖXÙä¬1¼6Íñ̲½öØyÇÍTªyLr¸(ÅU ës[¥€s\aÌ÷Ó(oêI²´®G³¿›Öº*”Ã]ÅòꔣÊÅ ¡|PšökÓ^NKkPSÞ,Û;å¼bFûüw,--¼¡¦>‚èµ€Ç^ÓéH€ @€ @€ @€ @€ @€ 0Ø•¸P,~¥–yÄt¥b¿E!–ÿ­ãÒ žú¤×µ]3g.8.6Å?K…ŽóSÑÜøZz¥Ø÷N›9ÿµË/_ø›Z⻋™1{ÁûSÑ^íÅ1^c×ÿÍ[ãYg5¦y¯‰g¥âÅϦü¯ìnü´ŽIݵUîg±•¥Ý$:¦Ðú¥”¥æâÆ´“Ë:;Ë_X~ù¢_w“òÅÛS¦Li>ô¨ãN.†ì¯ÒÞM}±¡›'©¨³)ãÅgœqήºê¢µÝ„¹M€@8ÖQ  @€ @€ @€ @€ @€ @€Á8¦Øú©Tœwtîè1ÞBù½Kß”»‡€Ë/¿à¶tû“ïioÿâèâ˜ÿHcž±‡°—Üj*†÷¦›½.p<¹½}|–ÅÿSË ŠéTʇSáß'–.YX9í1÷Z±bÅÆ´$‰.}eaôÿ Yá3éõèÜŽuhk_P9añ/jJÃocèüPÇ’ US| ZµjÕ¶ô¸&=½&‹Î/„ðÿ¥BÇýªõ¯œØ9jlÓ¦˜¶jqÚè›@z?º @€ @€ @€ @€ @€ @€4žÀ´is*ÄP)Ê«z¥ƒïÚ¶iëÛ{[ܸsò«K¥Ç;.¹àÌtï[;ßïöy¶½À±Û漆±…ÖÏ¥b»ýóâÒa”·mîÚô¦Z‹wÎw[©´õò%‹¾PîŠïJ’ÏìÜÖßÏÏž3çЬίiœ;ž.o|kOŠwÏ»tÉ ã¶®×¥ï‰;voÛýurŸ1cÖ¼ï~ßkê'àÇúYÊD€rÞ:盟­ô™ï„?}ÙÕ"BøPíËWO¤• @€ @€ @€ @€ @€@ƒ 4·68dÙÞU—ãÖr¹söòå—Œy\µøB!ûâé§Ÿ»èšk.\W-N½PàØ;7½ @€@¯Òÿ8ùûjW¦ƒç+jׇNëóïãÕÒk#@€ @€ @€ @€ @€ Ð01ÎN€U¯T÷OËJÝR5¨•‚·™³æ~2Š—åu…±)¦ÇŽ©Èðkéo«×ÿİ&n+Ÿ±bÅÅ}*n|a Ë/_xóÙsæžÝŠ?I÷ /ÜﯩðóèTÜ8-/w ñkV?X)ô¬ÛS/_~ñê3æ½+•ý&}Ðý²qcÆ?œÚ¿Ü}Œz+Pý\o³êG€ @€­q[¸qËáéq×¶‡Ãêò3aSÜ’ÓK3 @€hÍF… Â1͇„·Œ:6œ0êU¡%çÿ8ð³4" @€ @€ @€‘'pVûüò,_uå1nØò\çWªÆô¡ñòK_Þ6{þ]©ñ˜ªiŠÙäªí{hl›}î )ï;÷д˭˜Å?¯œH¸ËÍ>¾¸â’Å?Ÿ9kþÿ …ìÓ}LUµ{ljúËT\Xµˆ2U4>ºe}ç{W®\ÙY5Y/—.]ôD*Rýh*R]Z­{ÌÂ'C{ûWC©ÔU-N=PàØs3= @€èƒ@gì K7þ4\úÜÊðL|®™t%@€ 00•ˆãþ®ÕÛ×n^&d{…Y{M 3Ƽ=4eÅ™„Q @€ @€ @€x‰@K!ÿä¿e߾ꪋ־¤so¤½»RºêŽå0©ÇCf…ü“ cXѱdáE=Î]C‡ëŸüû1ã÷;3„ì¸Â{rj{û>)÷ûó:Ærù³ý¹‡©HuÙÌÙ ®O'IžÔÝ\ÒIš‡OÏÆÌ\¥ݟO€@ïªV8÷.¥^ @€سÀ]kÃ'Ö|=üÛ†«7î™È] @€¨üC•ßi+¿ÛV~Çu @€ @€ @€ 08éd¿T|Wýê ]KªGô½µ½yYbVØ+/f÷ö,ÎÞýÞî¯c9~i÷{õz}Í5×l)ǬßòïUlýh:sLµù¦=¾siÜüíj1õh‹åðµ¼<…Büó¼íô\@cÏÍô @€ @ ¿ßöPøøÓ_ ÷v>Ú‹Þº @€z•ßm+¿ãV~×u @€ @€ @€ 0ðåMgt…-/ë '§Ñÿ,Æð¥âe!Ä›Óc]ˆáñ+.Y|c¿Ï,Æ?äQˆ±j!ßîý§µÏ=*(øªÝïïüz{ñ_iáOw¾Wïç—7–’ë3õÎ[É—N¾œ‘—7ÞøÍP*uåÅõµ½£tÁ•é{ç±jy²NœÚÞ¾wµmô\ ©ç]ô @€ @€@Ï*§Ú|fí·ÚØ36Ñ @€ P9ͱò»î7&}2ì_œØ36E @€ @€ @€ÃH ¾- áþ´¢ÊãúôØå:å”y•SÓ‹ý|eYnc9 Í=™Es¡0-/>‹ñßóbúÚ~C©´iæœ SžOö5×Îý+{“ '_ŸNp¬zuÆ,mñ€\i‹¶Íív´,+N(¶¼%µ¿ä{­Û>Èp‚c.‘ @€ú"лÂß=sâÆ¾ êK€ 0¤*EŽ•ßy+¿ûº @€ @€ @€:×^»è¹˜M̺6哊çzTÓNg<;/gg¹ë»y1õh/w•;ê‘gç{oɲ,ïà¶U+J ܹ_>O'8æ.cámý9¹ ŒDýp‰@ÖL€ @€@ß–nüi¸·óѾ%Ñ› @€À¨üÎ[ùÝ×E€ @€ @€ @€ÀˆÅ–V]¬!f{ÈÉííãSAdN!]|îŠÒ…wÖš³/q][ž½)¦«/9vœõ…†üõîýúóuZa ãåÏ»?ç(7á( Àq8îª5 @€"[ã¶pés+‡ÈlLƒ @€@ÿ T~÷Ý;ûwÙ  @€ @€ @€S¦LižÑ>ï]3çÌûr!Ëþ_='5¶0êˆeU "SµáÍiÌr=Çí.×òåË×§ùü¾»öÞÜÏbx{^¿˜e÷äÅÔ³ýñòæ{ó 9cÈNLcªÇª'¼\#^ ï(×€ @€Þ ܰåŽðL|®÷ ô$@€ Ð@•ß}oÜr{xÇè×6ЬM• @€ @€ @€ ÓÚçÙœ…×¥Ú¶Rá݉Y¦¤~£+}Ói‹u½ 1›œ—46¸ª®ƒæ$ËBå4Åì•9a57Ç,ŸçV(w hã ¥Ò¦¶9 K‹8¨»…¤};mÚûö[¾üâÕÝŸO€@Ï8öÌK4 @€=øe*pt @€I¿H¿+pI;n­ @€ @€ @€ÃIଳ޷oqtó‘é´Â—³ò‘é´¾W¤¢¾ãRqß±é^ë kÍ+Ì{!®·_Ë!›œ{D`*…xvÅr¸/ËTÍÓIu‚Ù>¹Ñ…·SÁá·rãê㤴×U3[›*sWàXUI#Ú8Ön%’ @€ ܵíáöN€ @ ±üÜØûgö @€ @€ @€Ã_`j{ûÞCëëÓ ‰Çd…xT G…,cvTåt¾? v:D±zÁÛûÔçY! æf*ÇgscêP¯>Ó§OŸ¦VC¹d6±>#ö"§¸±’©œÅ‰=È(”Ž9@š  @€è½Àêò3½ï¬' @€ð;pnš) @€ @€À¸ãw7‡§ž¬~ÆÞcLJ7¼ñÄ!9ÿþšÔÆç6„Ç{xûcÃúuaËæMasztnÛZ[÷ {í=vÇcÜö¯rxh³WMGÞ! ིçMð^Ù³‹» @`¤ L™2¥ùð#_}z(Äé©víÍ©HïØd±£¸.•Ïm¯ Kç x%]÷;R!÷Ǭ­ë>C?´dYÝ *»šÇN,öÃ*e±+(p(l㌎#b›-’ @€ÀàlŠ[g`£ @€$¿¼a  @€ @`Ø <øÀÂ}÷ÜYu]ûíàˆ(pܼic¸ã¶[·Ýž}fM·&Ï>³ö%mYú íÃŽxy8æØ×†#^vt(6ù“Á— 5ø ï•?n ÷Ê-<#@€ž˜1{Þ±©lñ©€qvª]œT¯ÓÂ7Íwrî8]åº掕bˆÏ&ÏZBscŠ¡°OnÐP(f=ÿ¡lkn#RÀÿµbDn»E @€ @€ @€ @€ @€ÀPØ´ñ¹ðËŸ¯ wÝù»ÐÕÕÙ«©ÆÃ÷ݳýÑ2jtxű¯ o>qjhniéU> Eï•¡¸+æD€WàôööýZ ­ŸK³ø/éýhȺ™THx@žb¹PÜ–SÏöB ÏÕ©¾1³˜N@¬O±d=×Xk®Xv‚c­VâÔ"Ð?¨kY˜ @€ @€ @€ @€ Ј÷Þ}GøÉ¿*'ÒÕëÚºesøí-¿JÅŽw‡“N›˜|p½RËC`мWÞÀ @`È LŸµà}ÅBüV*ž×±+fáGéhËcȶ²pA½ÆLy·å•ÿcרzWKž®PØ»XK` 1éßb›·¾Ò ^H¡à_<}#CŽÃpS-‰ @€ @€ @€ @€O \.‡]Uøýí¿é·É¯{ö™°ìÒ ÂOxG8þMo éD›~Kbý%à½Ò_²ò @€Æh›µàoB¿Šûç—ÜTИrßCüQ áºõåM+¯+•ž­¨Íœ5ï¬êãš¼e”³bÿqî´¢BÓxõ¡M;´~§Ô÷´\ÞÚx“6cCW@ãÐÝ3#@€ @€ @€ @€ @€"ÐÕÕ®ûî²pß=wöûŠc:.åW7ü(<þèÃáôi³B¡PÇ?Äî÷Ù`¤ x¯Œôïë'@€{˜9gÁWSË'ëU€—~e^ŸŠ%ïHå|w¤ßŸo ±ü« Ïn¹öÚ…Ïíyu¿»&/c!T îJu/få5Y¨~d*"½;tÅOÜ k©Pöh‘ä (pÌÒN€ @€ @€ @€ @€úQ rÝ÷®¼,]³Êm¿Y&î³oxõëÞXsKÀ{e°äK€†®À™íó.dá_{2ÃTÔxSÊWįì(o¹5”J]=鿧ؘ&Ö«øo{þ,äÿR³#÷4—þº—Öwh½ro ›×´„1UÓe!4uêÔ¦•+WvV ÔH€@à (plø-´ @€ @€ @€ @€hTûÿpW¸õ¦kšþèÖ1áµoxs8敯 {×mŸÇ}(Üþۛ½wÝ*'Þå]•S'Lœ9ìey¡Ú š€÷ʠј@]>pj¬šç¾ÇÂÊÞ~T5H#ö NüzÙÄ=4½ôV ?é,w~üŠÒ…¿}icïĸOȪ—8fYV¨u”B¹üûP¨žNž|]­ùê÷†:å וJëfΞߕ̺?Ç1µM˜<ù4æýõW†¦€Ç¡¹/fE€ @€ @€ @€ @€Ã\`ãs®]QÓ*;â¨ðÎwŸZÇì•`:ݱò¨œÊxÝÕKÃúõÏVíÓ6×}wY8÷ýÍ--Uc5 ï•ÁP7&ú |è´œ|1¬ÌüØçr¢4 @`ÊéY ÓBõºÂí}b9œ×qéÿ¸K‚:¾ÈB¶O^ºôkwõŠÅlˆ›oÜ+¶ÆTÙýêb8nÊ”)Í«V­Ú¶S×~yz\{{KHãÕb]ãbZÜê´¸ƒªÅg]M¯Lí÷W‹ÑF€@ã Ôüññ—j @€ @€ @€ @€ @`èüúƇ­[6çNèø7½-¼çìÙ57îœì€É‡÷žûápØ/ßùöŸoÞ´1üæ–_î±ÍMƒ-à½2Ø;`| 04Òé¬zàŽi§Ðãÿõgqce˜T¬—{Šd:q±ûbÅ݈¿W*­I·îÜíö®/³¬åà#Ž;v×›ýóêååÖTܘÕù_C‰¿È›mV(žœ£ÆPàØø{h @€ @€ @€ @€ Ð`kž~2ÜqÛ-¹³~Å«^ÞüÖ©¹qÝŒ5:œrÆ{ÃÜ]È‹÷o]ucزyÓ‹¯=!0¼W†Â.˜šéôÆäÎ,†Ç7­{òäÆõ1 ñˆ¼éÈÂÖðd?ÏËYl§çÅÔ£=sJ=òìœ#†ì';¿ÞÓóTZ÷q÷4Ž{ ®@8îdN€ @€ @€ @€ @€†ƒÀ/~öÃN’©º”JQâ;Þõžª1µ4655…ÓΚÆŸP5|ëÖ-áæU7Tê«$<·aýPŸ¦ùõ@À{¥XB  @€À˜6mÚØt¢àËò–œ~åþÚ5×\³%/®/í'¶··¦¹¼#/GONpÜž++ÿ47gÈÞŸSöTLúÁzäÙ9G×¶rncÈÂk¦M›sÐÎýúóùôéÓ'Ìœ3QÛœy_œ9gÁGfΚwê´ö¹¯˜:uêèþWn#]@ãHÿ°~ @€ @€ @€ @€PuÏ® Üwwî˜'¼í¤P,sãj hmÞýž¶ÜÐßÝò«°ñ¹ ¹qC5à¾{ûëáûWw„Çyp¨NÓ¼jð^©JÍÍ«iÙYçu5Åõ!è lô;S÷ܸTlÙÜ“aÊ›×/K}6Vë“N8|åÙ3ç½¥ZL_Ûf´Ï{–eÇô5Ïîý×=ýÐ-é_}ÉýMcZ>²{ßþz]h÷!ds³Pø›4Æ¿†Bá»ÍÅâûpØÆ¶9 OÅ7¶ÍžÉ)§ÌÛ«¿æ /‘( Àq$îº5 @€ @€ @€ @€ @€À  Üþ»›sÇ>øÐ#šד€ýö?0qdõ¿Kîìì wÞ~kOÒ¹ØÊɘ÷Þ}G¸â²EáÒ ÿ=T¼·mÛ6äæiBùÞ+ùF" @€ÀHèjŠ58nÙïéw£B¡Öc×[z2—eË–=“â/ÎëSl*üe^L_Ú³BÖ/ùW®\Ù™æõó¼¹¥"ÎOn?±3/°í§Ÿ~RãK¯Tà™¥yŠß’žuíµ‹ž{i”;ôV@coåô#@€ @€ @€ @€ @€=èêê ¿¿ý7¹½Þø–wäÆô&`Ê[ÞžÛ­R8\®5O=~|ýÕaÑ~-üüÇßÏ>³v¸,mدÃ{eØo± @€> d¡¼O- ZZšúµnfúœ9G¤_c~-s YèQãó9»þ%/wª¼›ÓÖ>÷Œ¼¸Þ´Ïœ5wfÊ?³7}kéËáßó㲉M£Ç,?®o­ã‹ŸHŒó²Är9·è4/‡vvè×Ô»å @€ @€ @€ @€Ù?ð‡°icõÃ>&tHÝOo|A½rŠãaGõÂË=~}úÉÕîpë–Íá77ÿ2\|Á¿„«¯¸$^Ë|b(¼©–¸ÞÄ̘³à¿eYVóÑëY £{3NèʾÛ/ËWh]:µ½}ïÜØÎ8㜉­…1ËÓ©“ûÕÞ—rêüåüÙ^ÅBñ²z­o·ñ²±ûdßÈB¶ïn÷÷ð2»²£ã‡÷Ðà}PàØ<]  @€ @€ @€ @€ @€@OJ'8æ]vD^HŸÚ:äðÜþ¸ûÎܘFX÷ìÚðó_ýÇׯ¿:<ýÔ¾¤a5ï•aµC€ê.°uCùÁ”´RWýÊÂŒ)S¦4WêyëŒö¹ïN‹ÿØ“žéüðq=‰!¶£tÁòtŠãÒ^w÷5[¾sŸÂ˜ïOŸ>}Bw1µÜŸ6í}ŒÛ´2LyB-ñ}y¬¼éü´¾Ü9ÓúŽTh]ø%qnô€÷Jÿ¸ÊJ€†›@gW¸¸¥>›Ö5:wm…pi*hûvç¦-Ÿ« æÆï0cö¼cÓ‰†NÿÉ‚ôO‘LÚ©©GO³öííÅP*õêÕŽ% ¿™N<5 N«ià,kk Ŷ™³<C¼2ÍýÞr ¥“$ŸNëÙ/Ëâ!fG‡,ž•^÷z]/Ì%f© °Wy˳(Ž÷ºô/¾¼,/M¥ˆ3+¯mKE™ñ3Ë/_”NÌ¿Nno?.kýËÔÿSÉ¡ö‚Órü`GÇEä ‚Þ(pìš> @€ @€ @€ @€ @ ‡kžÎ/pÜgÒþ=ÌÚ»ð}÷Íç‰TàØh×>i]'6=¼uÓÆTÀyk¸ý·«ÂúuÏözår9Ü}çï¶?öÛÿÀí…ŽG¥‚Çb“?¿ì5j ½Wj@B€„¥…¶Í™÷•,rO6Lm•_à>ÒÜÚ2oÆìù_ å°|KØtßÕ¥Òã»SžqÆ9‹£›-6eSRQà‡RaãÛvÙýuLW*|8ź{Û‹¯³¬xVh=xE½=*=nzöÉY­ã÷_‘æUûé…Y˜œŠ"?\™G!uüã•^l½ËÍ?6ïô,H.M9òNÁìUáæ Ã,[¶ì™i3çÍjn ?KEŽ-/ܯö5íë©ÍÍÙ©©ðó4¿+cW\ÙU,?šÅâêlky}ÖR>¼›Ž*âQ!†£Ó‚ÏJ{4¡ZÎÝÛÊ1~yiiáe»ß÷šú ø_Øõ³”‰ @€ @€ @€ @€t+°ö駺m{¡aìøÚy¡Oo¾î=6œ5O­îMê!ѧµuLxÃO ¯ŸrBxè{Ãm¿¹)žN­¬ÜaTÚéPÅŠ8óë8Sü®WÚÛåkW?øé]ïzE€@½*ïV @€ @€ @€ @€ Л7o 7n¨:B¡P{í5¶jL½[F --£ª¦ëìì Ÿ«>çª †@cú£ïpØG…Ó§Í ç~àáø7½-´ŽÙ«O3«ìå-¿¾!\ôo„ï®(…‡¼¯OùtÞUÀ{eW¯ @€ê+K¥ !v•ÎO\_=²›ÖTð˜Šá^•~o|gåk‹cüñÖ®ðúŽÒâ«B躵›Q^¼NP<éŽ|²bÅŠ›ž93ßý¨—)jî–\¿sù% ç®_¿>÷tÆ,Ä>8V&vù¥‹—%Ë?ùÿÙ»(9«û@ô·zU«µï»@  Ä&v 6ؘ€q0Û3q&™L&ysNfÞËË™ÄÙ&sNΙ™÷&//™7YÆ;Ʋ€mÌff„Ђv¡]jI½ÕûnäVÓ]_uwUwUõïæÔéªïÞï.¿[ßÇg¥ÿýORbn/x¢%h˜¬ýïïë8ö…'Ÿ|²½Ýë’.»`xK€ @€ @€ @€ @€J!pðÀ¾ÔncVÅ7XeÔè1©C>|0µM¥4ˆÙ/¹âÚpׯýN¸þÆ[Ãô™s4õlòÏ›Þ[~xÿwÂwïþÛðæë¯„¶ÖÖõéä\+¾ @€@_î»÷žW3Ùì’óÚúzîÚwvfßÞ×yìS?\q÷¶ØÏ‘ý5o$?:ó÷™p€cìÿÁ<|ßwïþdòLú¯C6”à¡={4ÛÙùïï»÷›¿×4eÊ”ôø£L(J€c\_ÜÓÖŽì%É#÷«ñó`–Ä4›íÌþY²ö_+V¤væÜŒE ZÒo0Õºrë"@€ @€ @€ @€ @€ƒ$PH&Ä1cÇ Òl>&T¦•ä5©¸úÚÚÚpÖÙ‹ÃçoÿJø•»~#,]vqj6Ë´EØ¿7<óä#áî¿ÿ¿s?ãg¥®•þ¹9‹ w|ïîÇ;;²×Ä¿AQj‹$³àO²ÙöËî¿÷›ßè÷è£ß:š>›oüäšœùù/ÞuE¾6}¨ËÞwïÝÿãXg˲ßëÃyy›&™!L²R.¾ï{ßú/IÃd¹I>ÅŽIuyOŠ ³™¢8Ʊbàh[˾$“cçŸ'Rjùìêlg¸ò¾ïÝýGiëUO€@ñRo0ÅJO @€ @€ @€ @€ @`x k9šºðѦvÒ‡…dp˜d¼; l|¤û¹íM!4t?Üýó±îúù¡‡jIúøÃï¸ã¿¨iúýšLæß$ŸG ´ßŸ {:3á/÷ïÜò=ùä“í«w€’ p,)¯Î  @€ @€ @€ @€ @€@…m“žQ±˜–£FIíîĉã©mª¡A}}CX¼ô‚Ük×Îm¹@Ç ëÞJ2Õtô{y[·l ñWŸwQX´dYÑ4²ßý —]+Ãe§­“”Fàᇿ}(éù+·Üqן×ÕÔüÛ$Âñ_$mbT¿GËf·$vd:ÿñŸï½ç¹´~öw¶¬˜XÛôW!dš{k›„]ÞrÛm_Zðƒ|g}omúsüÁܽ:9/¾þèÖ;¿²¨&d>Ÿ;ÎOr0NO~NËý ar6$.Û²!³= lÜšdl|tgçñ‡Ÿ_±¢×Åšælà˜R2ÙÝ)-ú]ýðŠ{’“ïæ;¾ú_ëk2¿Q“ÉÞ¬çâdokûÝiÙlö±Ðþnó{kþyåÊ•mèË© @@€ãðœJ€ @€ @€ @€ @€ h) ƒc ²ÌRßPŸ:Üp pì 1uÚÌ_W|âÓá5¯‡5o¼ÜßµIŸÞ>|0¼øìOÃ+/<Î:gI.«ãä)ÓûÔÇpjìZN»m­ @ t®¸ç¤÷ß¹å–[þ ¦iÜU5Ùpy¨É\BvY&&vŒKÝÚ“ À=Iàßž$ qS<÷x{gç£õSðDŸ\±âHÒ¸ÿ•”¿áý÷~kmÒ"¾ŠRj²õ£Ÿü%väo0ðÚ®¸{[ÒË7â릛¾4¾qdÝuIˆãg’=[šìëä$°tr2Ïýå˜$£ç‰ÉîKÚ¼‘Íd_Êt†—:Û‹÷ßÿ­’e|µz 0|8Ÿ½¶R @€ @€ @€ @€"B²ÒÕÕ î¯ôÕÖ¦wâx¯I\†Hrð†1¢),»è²pÞ…—æ21¾•:nzo]’ä&ùõé~”˜ ò·Vç^S§ÏL—‡ùg- µµI:Ó‰”ù)®•2ß Ó#@€&ðàƒN¦üðG¯S³OGgFŽW“mhjé<öÁ#+VÄ¿hÑ¿½S½Vï›ÎLvRMÚò²™’8vÂ~ô¸gßÿèuªê¢‹.ªŸ;wѤöÚ0¦®#ÚQ{â@¾ì”§Nô†!Hÿ׉!›š  @€ @€ @€ @€ @€@u´··¥.¤®.=£bj'}hP[@@åpÌàØ0“É„Ùsçç^G kß|-y­ --19OÿÊ®ÛB|=7²9,^zAX|î…¡yÔèþuVeg¹VªlC-‡”©ÀG1øQ)@ ®3L iŽ5ƒàØÛ´W®\Ù–¼â\Êb>½ÍÓq~! ÀñÞ @€ @€ @€ @€ @€’Äì}i¥€Ã´>úR_HÇŽöö¾tYõmG–_~M¸èҫæ ï„5IVÇmïoê÷ºc¶Â•/=^}ùÙpÎâeáÚëoêw_Õr¢k¥ZvÒ: @€jȆڳ3) Êtf6§4QM€8öÈâ  @€ @€ @€ @€(ž@GGz `]‹7£ ¯³³³˜CVM_555aþ‚E¹×ý{Úկ†wÞz=´¶žè׳Ùlغå½~[m'¹VªmG­‡ˆÀîüê?„Lx7t„uíaÝžšcëŸ_±âØ@úì×¹5Ùå!™H¾r${ìÕ|õê Л€ÇÞd'@€ @€ @€ @€ @€E($+]]]}‘F+¬›B28 pL·7~b¸òšO‡K¯¼6¼ûÎ[IVÇ•aÏ®é'jÑ£€k¥G  @€a+¹5 +jC¨O^Ó³MÙ/Üùµ­É±uÙ]×2É91¥¶ÎÖì÷RÚ¨&@€@¯ƒûçžz†  @€ @€ @€ @€ P½õ õ©‹ëèhOmSÌ…ŒW[“žå±˜sªô¾Žµ kß\ÞzóÕpäð¡J_ÎÌßµ2$ì%@€ÊT ù»?NþæÈ¯ä›^M&óo¹åW|ðÁÚ•¯]_ë¾pçW3 nüÓÔó²ÙßÿÝ{SÛi@€^8öã0 @€ @€ @€ @€(–@}}CjWííƒàØÞÞ–:§ÚZŽ©HIƒÛßÏej|oýÚ0Ø™8 ™_%µq­TÒn™+ PjãÙc?Fv&ãôžZ=“™S×ÔðÀµ×^ûÉ'Ÿ|òx1ætëw}9Iùÿ¦õ•ÍfÛ“ Ì¿Hk§žù8æÓQG€ @€ @€ @€ @€Š PHÐV!‹0•S]tPY[ç× Ou{ÓÖÖÖ½ýf.°qß»»Õöýcsó辟T…g¸VªpS-‰è·ÀÃ+Vì¹íί=2áê|d2á² Sç<ôËw|í÷XñÍ•ùÚæ«»ñŽ;&7ÕŽøÝÍü~Ò®÷ Ê:É„ìŸ=°â[/åëSÒüËCšz @€ @€ @€ @€ 0@B‚¶=ƒcGGêª Éà¸uËÆÐÚz"µ¯b4˜5{^hhl,FWýîcÿ¾rAï¬]ÚZ[ûÝÏÉgÏ–.[æœqæÉCÃúg5_+Ãzc-žè·@6´ÿï![û\’Q1“¯“¤úú$û+_øâ× Ùÿ|ß÷îþY¾ö]ën»íK ²uu¿—ôñµäøˆ$ 2µdCöùû:ÿ§Ô† @ E@€c j @€ @€ @€ @€ 0Pú††Ô.Ê2ƒcmú¯>ûôcaÿÞ=©ë+Fƒ_ùò¿ §£«>õÑÙÙ6nx'ظ}ëæ>ÛSãÄwÎâó’ÀƋøñ{j2lUóµ2l7Õ  @€ Üwï·_øÂ_ýû¤“_/¤£$6ñÓ¡&óéäœu!“Y²ÙÍñ•Í„-¡3ìÎfj¦Ö„ÎÙIÝì¤mò3Ì !³,yŸš±ñÔøÙì‘ÎùJX±"ý¯¦œ:Éô,þ/=Ÿç( @€ @€ @€ @€ P @]}}jËŽööÔ6ÅlPH@åˆMŲâú:zäpXûæká­äÕrôÈ€ç?vÜ„\Pc nlhÚL”^L‰:p­”V· @€@E d[Ãp}’ÃñŒB’dc<;i{vȘÄ/f>LÊX{òìÂcOžqêg6ÛÑ:ÿõ÷Þ³áÔ1o 0ŽÀs* @€ @€ @€ @€(D fìK+íƒàXÈxM#›Ó¦]•õÛÞß”ËÖ³6f³Ù¯qÎg&ËÃì¹ó“ß-O~Á\éUÀµÒ+  @` ÜÿÝ{o¾ã«W5ÔfOž&Eòd¼=Û™ýÒý+îyj¨æ`\ªO@€cõí© @€ @€ @€ @€ @€@™ ´u¬åè Îº¥%=#áp pl=q"¼³vu.°ñÀþ½Þ‹ú††°pñ²\ÆÆ˜¹Q)LÀµR˜“V @€ÀðøáŠ»·ÝxÇŸhªù“äof\8ØÉþxäX籯<¼bÅžÁÛxT·€ÇêÞ_«#@€ @€ @€ @€ @€2Ù<*uGLmSÌGJínä0Èà¸wÏ®\Pãº·ß íím©&i ÆŸ˜ jy« @ ´Kë«w @€ @€ @€ @€ jkks[öïíUãè‘á³³3ÔÔÔôÚ¦X1ƒaKË‘¼ÝÅà½Q£ÇämS •1¨të–áÍ$[ã–Mïæ 2-t=ãý³ïf@IDAT'LJ²5^Î^tn¨¯o(ô4í p­€¤  @ ‹À~ôýÉÇ¿¯Ï}îW'ÕŽl¸°&. 5Ù 3!œ™daBfl’‰qLòGW’8¢ì¡lÈlO~nOê·g³™ägv{¶3li?qàñ|ðp—î½%@€@É8–œØ @€ @€ @€ @€ @ „ '‡|Ž1¯åè‘A *Öë’;îhX³bEëÇ* @€À pB|C @€ @€ @€ @€ @€Àð($`ðà}ƒàÇI+1ȬþÊ©ìÞµ=Ôøî;o…ŽŽöO-f²\¸ôü°ô¼‹Ã˜±Iò¥äÕz­”Î @€Á)@ª ŽCÂnP @€ÃC )ÓŽeO ÅZ% @€D >+ @€ @€ÞÆ0¸cÛ–0sö½uQ´ãÛ·nIí«ù¦v2È ^{ù¹ðâs?+ʨ1Èni¼yöÂsC}}}QúÔIa…|÷\+…YjE€ @€ràXî;d~ @€*X`j͸°©cW¯ÀÔ  @€ôM >+ @€ @€Þ&$sieûÖÍiMŠR_È8ÓgÌ.ÊXƒÙɉÖã.“É„¹ó„˜•r0M4Ù*>ÙµRÅ›ki @€è& À±ˆ @€OàìúY‹Ç©' @€ ˆÏÀ  @€ @ 7qI€ãˆ¦‘áø±–Þš„];·…öööPWWº_ïkkm »wmïu±bĈ¦0iÊ´¼mª©²±qDX¸äü°tÙÅaô˜±Õ´´Š\‹k¥"·Í¤  @€ Ð/š~å$ @€ pIã¢ZiB€ @ z.õ \=›i% @€ @ 555aþY óöÜÑÑ‘ rÌÛh€•;w¼²ÙlÞ^bö˜ͰÚË„‰“Ã'®û¥ð•_ÿÝpùÕ× n,“ w­”ÉF˜ @€A tâi&o PiÉÿsàOòÍù“ËÂ5ó¦‡kóµQG€J¸<ùåîq™æp {´’¦m® @€ú%Ÿ}/k\ܯsD€ @€ 3, o½ñjÞ¿¿iC˜9knÞ6©Ü’ôŸVfÍ™ŸÖ¤bëcàæóÏçž¿<Ì(¡sÅ•ÉÄ]+e²¦A€ @€ p,1°î  @€@Wç¾û[Üõs÷÷Ïþ·¿ùFÈpìîâ3•+Щ¿Ò|møÿŽü¨raæ @€ ˆÏ¾ ÿì^ —f @€ @`Ø LŸ9'4lÇZzÿ#¡1ò‚åW„ÆÆEw:~¬%¬]³*o¿1{Þgž·M%V6Žh ‹–œ–œw‘L°®• Ø$S$@€ @€@ü¦EuA€ @€@ï·Ž¼*û* @€ @€BÎ\°(o€cìãõ×^ ç^pIhhh,¤Ë‚Úœ8~, œ|%µíâs/LmSî 2™Lˆšçž¿<ÄL€Je ¸V*sßÌš@OÿøHÞØÅxʵɭûÚžÎír컼÷– @ Jjªd–A€ @€@ L©þ|ü¯…ø‹ß  @€jˆÏ¸ñY7>ó* @€ @€BbÀ]ó¨Ñy›·ž8žúñ¼múZùÌ“„¶¶Ö¼§7!̘57o›r®l12\pñáËÿò߆ÏÜt›àÆrÞ¬ææZ)I @€T¸€Ç ß@Ó'@€ P)çÔÏ=ñwÙu3*eÊæI€ @ ¯@|¶Ï¸ñYW!@€ @€ôE f¼üªëROY»fUXµò…Ôv…4XùâÏÃúwÖ¤6]~ù5!ίR˲‹. —^ùÉ0jô˜J]‚ywp­tÁð– @€U* À±J7Ö² @€”£@ÌjóÿLøð£n’ͱ7Èœ @€ ˆYã3m|¶•¹± 2 @€ @€Î:gIA™_xæ‰ðzè¡ðCï¬]^~áéÔ¦L›Î:{qj;  ¦€ke0µE€ @€Á¨ü!H€ @€Àp¨ËÔ†;š¯ ŸyExáÄÚðbòZ×¶5ìê<ŽeO gk'@€(S¦Lc˜Z3.œ]?+\Ú¸(\Ö¸84düóz™n—i @€ @€À0hi9^õŲYñÒe‡ÚÚÚ>Íçªko+¾ý?C6›Í{Þãß>س3\rùµ¡¦¦ð¼áÅgV¿öRÞþOV^~uzVÉ“mý¬×Jú^¹VÒ´ @€ @€@©üF©dõK€ @€@^†L}øÄˆór¯¼ U @€ @€ @€èAàè‘ÃáùŸ?ÞCÍÐZ¼ô‚>8N˜89œ{Á%auš«^y>ìÜþ~¸ìªë´é³R¹cÛ–ðü3? »wnKmœ—ÌcúŒÙµÕ¨²\+ù÷˵’ßG- @€J- À±ÔÂú'@€ @€ @€ @€ @€½\|éÕaú·B BK+;·o |ï›aâ¤)aá’eaÂÄ)aô˜qadssh9z$>t0ìý`wX»fUØ¿wOZw§ê§L›.½òS§>{C \+å¸+æD€ @€ p¸¡ @€ @€ @€ @€ @€@¿ÃÍ·~)<øƒ{±–£õƒŸ}걂ڦ5jÙ>}ã­}Î>™Ö¯zÅp­[T @€(šò˜†Y @€ @€ @€ @€ @€á)0~¤ð¹/|9Œh9¨#›G…[n»+É9vPÇ5þ ¸Vú+ç< @€”¯€ÇòÝ3#@€ @€ @€ @€ @€a"0aâä\°á`9Ž=&|þö¯†0¦¨$×J%í–¹ @€ @ ]@€cº‘ @€ @€ @€ @€ @ ä1pëö_ýz˜wæ9%kÞY Ãm_üµ0vÜø’Ž£s¥p­”JV¿ @€|ºÁÒˆ @€ @€ @€ @€ @€@O1³â 7ß6o\žyò‘pøÐÁžšõëXÌyÕ5Ÿ g³¤_ç;‰@9 ¸VÊi7Ì… @€ýàØ;g @€ @€ @€ @€ @€’Ì· Ìœ}Fxë×Â{ë׆;¶ö{œ‰“¦†sÏ¿8 l\êêüÚ`¿!X–®•²Ü“"@€ @€@Áþ¥¢`*  @€ @€ @€ @€ @`0n¸é¶Á¦ìǨ««ç]pIîÕrôHظá\fÇC„ø¹µõDkhn¦Î˜¦MŸfÌœ&M™Öc;+_ÀµòáºV*ÿ»l @€ _ŽÃwï­œ @€ @€ @€ @€*D`dó¨°ä¼‹r¯“Snoo--G‰cÇB}CChÑG„ššš“Mü$0ì\+ÃnË-˜ @€ àXáhú @€ @€ @€ @€ 0<êêê˜1ãBˆ/…^\+½Ò¨ @€ @€À ø3MC¾&@€ @€ @€ @€ @€ @€ @€†Ÿ€Çá·çVL€ @€ @€ @€ @€ @€ @€†\@€ão  @€ @€ @€ @€ @€ @€ @€á' Àqøí¹ @€ @€ @€ @€ @€ @€ @€!à8ä[` @€ @€ @€ @€ @€ @€ @`ø p~{nÅ @€ @€ @€ @€ @€ @€ @`Èê†|&@€Žlgxµu}xþÄ[amÛ–°£cohÉžÙäÿ @€ƒ! ™02ÓfÔN ë無 „ÚŒ¿I6þÆ @€ @€ @€ @€ @`è8ý˜ 0ˆÙl6ìêÜÚ1 @€T¢@|®Ï·  @€ @€ @€ @€ªI@€c5í¦µ @€§ <â­Ó>û@€ @ ’<ßVòî™; @€ @€ @€ @€= pìIÅ1 @ *Ö¶m©ŠuX @€(ð¶ç[_ @€ @€ @€ @€ªL@€c•m¨å @€¿ØÑ±÷¼#@€ PáÛ=ßVøš> @€ @€ @€ @€Ý8vñ™¨–쉪Y‹… @€ð|ë;@€ @€ @€ @€ @€@µ p¬¶µ @à”@6dO½÷† @€@¥ x¾­ô4 @€ @€ @€ @€º pì.â3 @€ @€ @€ @€ @€ @€ PrŽ%'6 @€ @€ @€ @€ @€ @€ Ð]@€cwŸ  @€ @€ @€ @€ @€ @€ @€’ p,9± @€ @€ @€ @€ @€ @€ @€î»‹øL€ @€ @€ @€ @€ @€ @€”\@€cɉ @€ @€ @€ @€ @€ @€ @€tàØ]Äg @€ @€ @€ @€ @€ @€ @ äKNl @€ @€ @€ @€ @€ @€ @ »€Çî"> @€ @€ @€ @€ @€ @€ @€%àXrb @€ @€ @€ @€ @€ @€ @€Ý8vñ™ @€ @€ @€ @€ @€ @€(¹@]ÉG0 pJàŠ/þÍŸúÐÛÿø¿Â5ó¦÷PÑåÐ×?Û僷 @€ @€ @€ @€ @€ @€ àX¡gÚ P™™Læùfþäêâ+_ùúg³ùªÕ @€ @€ @€ @€ @€ @€*B ¦"fi’ @€ @€ @€ @€ @€ @€ @€@U p¬ªí´ @€ @€ @€ @€ @€ @€ P+cŸÌ’ @€ @€ @€ @€ @€ @€T•€ÇªÚN‹!@€ @€ @€ @€ @€ @€ @€•! À±2öÉ,  @€ @€ @€ @€ @€ @€ @€@U p¬ªí´ @€ @€ @€ @€ @€ @€ P+cŸÌ’ @€ @€ @€ @€ @€ @€T•€ÇªÚN‹!@€ @€ @€ @€ @€ @€ @€•! À±2öÉ,  @€ @€ @€ @€ @€ @€ @€@U p¬ªí´ @€ @€ @€ @€ @€ @€ P+cŸÌ’ @€ @€ @€ @€ @€ @€T•€ÇªÚN‹!@€ @€ @€ @€ @€ @€ @€•! À±2öÉ,  @€ @€ @€ @€ @€ @€ @€@U p¬ªí´ @€ @€ @€ @€ @€ @€ P+cŸÌ’ @€ @€ @€ @€ @€ @€T•€ÇªÚN‹!@€ @€ @€ @€ @€ @€ @€•! À±2öÉ,  @€ @€ @€ @€ @€ @€ @€@U p¬ªí´ @€ @€ @€ @€ @€ @€ P+cŸÌ’ @€ @€ @€ @€ @€ @€T•@]U­Æb @€ @€ @€ @€ @€ @ ¬þå Ù¼óÙ¸#<ù³×ÃSy©$@€¨JŽU¹­E€ @€ @€ @€ @€ @€òøúgSæ‘ Of¦ÿÖŸ¤´RM€ P…5U¸&K"@€ @€ @€ @€ @€ @€ @€Ê\@€c™oé @€ @€ @€ @€ @€ @€ @€jàX»jM @€ @€ @€ @€ @€ @€ @ ÌêÊ|~¦G€ @€ @€ @€ @€ @`À»wnï¬]·Ÿù …™³ææm£’ @€Š' À±x–z"@€ @€ @€ @€ @€(Sö…5«WæÝ¸ñ8æRI€ @€â Ô·;½ @€ @€ @€ @€ @€(ŽÀ¡ƒûÃã?y 8é… @€ÊN@DzÛ"@€ @€ @€ @€ @€ oööö°ê•çÂkÉ«¦¶vxcX= @€ªX@€co®¥ @€ @€ @€ @€ @€*M`óÆwóO=<›ºÇJÛAó%@€ @€@á ·Ò’ @€ @€ @€ @€ @ D‡L ›Þ[W¢tK€ @€å& À±ÜvÄ| @€ @€ @€ @€ @€ÃH ££#¼¾ò…ðêËÏ„öööa´rK%@€ @€޾ @€ @€ @€ @€ @€C&ððƒß [·¼7d㘠@€†N fè†62 @€ @€ @€ @€ @€Àp8qüØp'°~ @€ [ŽÃvë-œ @€ @€ @€ @€ @€ @€ @ÝÐ md @€ @€ @€ @€ @€ƒ#0aâ¤pÁÅWälòÔéyëU @€ @€@q8×So @€ @€ @€ @€ @€e(0iò´_  @€”@MùLÅL @€ÅÈ„Lq;Ô @€!ð|;„ø†&@€ @€ @€ @€ @ $KªS @ FfËaæ@€ @ (žo‹Â¨ @€ @€ @€ @€ÊH@€cm†© @€Å˜^;±¸ê @€À Ìð|;„ú†&@€ @€ @€ @€ @ K¡ªO @ ,ÕÏ)‹y˜ @€b,ô|[ F} @€ @€ @€ @€ PFËh3L…(®Àå‹‹Û¡Þ @€ ¡€çÛ!Ä74 @€ @€ @€ @€%àXV @€å paÂ0µf|9LÅ @€ H >ׯç[… @€ @€ @€ @€Õ$ À±švÓZ @€Ój35á®Q×vÌ @€•(Ÿkãó­B€ @€ PYÙl¶²&ÜËl«e½,ï´ÃCµÖ¡÷´ÅñCµ­§+µuÕð¾”Õü]ëÉm¸­·'Ç @€ Wºáºpë&@€†‡À #–‡'޽Vµm ¶J @€ª8¿þÌŸk @€ @€òhkm ;¶o ‡üðuø`8òÑû––#¡¶¶.4ŽG„M#Ãä)Ó”i3ôé³Â¨ÑcÊfQ®ãý°}린oïáø±–püxò:v,´¶žuuõ¡iäÈÐÔÔœül#“×´³Ãì¹óÃÈæQe³ŽB'rðÀþðÞ»o'{v Iö,îߑÇBGG{n=qoFFÆOœÎ\°(gPhÿùÚíÙµ#lÚ¸þÃïÉGc=r(wJ㈦0"yEÓè;cÖÜ05ù¾ÔÕ•ç¯úµµµæwíØŽ9üáëèáp¬åh¨©­Í}ïãwü„IÉ÷~F˜2uFn]µI]¹–C÷‡mïoìÙùÑup,¹Ž…É+þlokûðºNÖuòÚnHÞÇë#îUܳqã'–ëòBµÜ³Ê¸n÷Þööö°é½ua÷ÎîÉý"wï8z$„ä|øß—QÉ=°9wŽÿ™5g^Ñî¿}ÜÍ  @€$Ì c @ ¸òWÿvÀžó™ÿ6à.NÛ‹Ç7V†ÃCGÃoïû«°³cßik÷ @€@¹ L«þzÂï„15Íå>Uó#@€ @€ ;Ý»¶‡µo¼Ö¯[“ xê@ ‚Z|î…áœÅçåÁúÓÇ@Î9qâxxëWæ I°I²žþfÏš4yZ˜sÆ™a~8iòÔL)õÜõï¬ /?ÿT¯í.¸øŠ°héù=Öwttäkâš·½¿©Ç6½ŒAz —œ–,»(Œ3®·f½ALñ»Çþ`÷Î^ÛõTƒKŸ{AXvÑe¡¹ytOMúu,Íò¬³‡K®¸¶Ç¾wlÛÞ~kuxoýÚƒûRbç‚…KÃÂÅËÂÄ_ ™WKXµeÓ»aÛÖÍI€ïæ\°U!çåkƒ´fÌœæÎ?;DÇššš|Í¥®îY»wnÿä×{$ òíL®Ñ|eÌØñùªs÷ŸÏÜt[Þ61¸î¹§ÏÛæ¢K¯ ç,:/o›TVã½÷Üó—‡øê©Äÿ¶ÄkkÝÛoä¢ãý°/%xÏš3?Ì?ka8ëœ%eq=õeþCÝöúyO õ ?þ83ý·þ¤ð*»åU_úßH®?®ìU|8ûL&óÇÏ|ç7‡ÍÞUÞY(7òü³Nå¦d> @€@E Ä_ÿ‹q¿þàÀ?r¬è4y @€ÀðˆÁñ9VpãðÚw«%@€ @€Ê[ õĉ°þ7Ã[o¾öîÙ5àÉØ¿7 ´y,¼ôÜÏ’ Çe¹`²HWê‰Þxí¥$Øîµ>§õ4·˜é.¾^}ùÙ\ã%—_S² v­IPf̰×[‰õ=•·ßz=¼øìÏr™{ªO;’^õ…ÜëÌ$`íÚëo õõ i§å‚FW¿úbxåÅŸ÷Ûº½½-¬NökÍê•á¼ / Ë/»¦(>i–Ç’,žÝKÌ`øäc?ÌŠv¯+ôsìãU/ç^13æ¥W}ª_A£…Ž×[»˜±sÕÊçs×sZ`]o}ôvõo¿žúéû­1_ß1x#¯mÜðN¸ú“Ÿ óÎ<'_ó~×Å †/¿ðTˆwýî'߉1£ßÆwßÎl^|ÙÕaÔ¨1ùš—¼.f {:Ù·˜­°Xeú·Âჟ¿34å ¨:‘üüôÑ‡Âæë‹2tÜ¿×^~.ìÙµ3|úÆ_I&ÄÁ,1kc̾wôÈᢠ7&AK—\~m8?ÉP9%6Æ ©µkV¥f ,Æ|:žzâGaåK?×ßxk˜6}V1ºMí£îY©‹¬ÃñÞÿóêKÏ$ßûgú¸§íÝ¿ïƒð£¾› pŒæÍ£Š—Õ¶§ñ#@€ @`pjg£ @€†^ f¾ùËñ¿~oÌíajÍø¡Ÿ @€è&ŸSãój|n•¹±Ž @€ @€!ˆAÏ>õXxâ‘.Ipc×eµ=ùá÷ÃÓOü¸¨!qŒû÷…¾÷Ͱê•çKÜxr-Ùl6¼­¸ç†í[7Ÿ<<è??ؽ3|ÿŸþ®¨Á'±{×öœçáCO:ígÌZ¶â;W´àÆ®oÝò^x`ÅÝ!oV‰üÁ=E n<9÷˜=ñ…gžÈe3ßR–Íß ß½ûorÅÅÎÚ˜6ïXùPbƒ:KYªåžUJ£Áì{8Þ{%àñ¿71sm©®é÷7oøðžt´x׃ù½0 @€§ p<ÝÃ' @ Ê2™LølÓ%ᛓþðã¾>×ty8«nfhÎŒ™äÿ @€ƒ%Ÿ?ãsh|Ï¥ñù4>§ÆçÕøÜª @€ @€ ½À±–£á¡û¾ÞXõÒ Næ­7_ ?M*‹•eñµ«Ã÷“`»=»w ê:Nœ8~xÿwrÁŽƒ:p2ØÞv‡¾w8x`Ɇ>x`_.À¦#ÉÀÙµÄL‡ÿœ Æ€¶R•˜Å,f¦Œòž}êÑ’*\Ãê×^ O>öâ}ïOö{ò纵o„Ÿ<ô½3¦U‰™üûñ}É5ñzI¦P-÷¬’à A§ÃñÞ{üXKrßÿvˆAà¥.ñüоâP @€¨lºÊž¾Ù @€ú'P›© ËÏɽú׃³ @€ @€ @€¨fÝ;·‡G~ôý’d¬+Ämý;kr^×ßxë€þ Öº·ß ?{ô¡B†,I›¤ùäã? û÷í —]õ©­¥Ð Æ ¯Ÿ<˜²µµzJ¿Û>t ¼¹zeXv᥹>b沘…³XÁ©ù&¿#3çÌ /Ë×l@u;·oÍÛö¥“‘#G%A„m¡µõD_N˵a106~ïëêŠ÷믿úbxþç÷y>¥:!ŒÎž;?4]´!ªåžU4!îh8Þ{cïO’û_¼V9°oxü'„[n»k°†4 @€%(Þ¿”`rº$@€ @€ @€ @€ @€À` J‚ÖüÁ=¹ ­¾Œ=jô˜pÆü³ÃØqÂÈæQ!zµ&ÁZû’l{’W̺ח¬V1sÞ´³Ã¹ç/ïË4NµÝºec’¯ïÁ£ÇŒ sÎ8+Œ3.·Ž¦¦‘¹`µ–£‡“¬„‡Ã¶÷7…öì<5N!o^õ…PS[.½â“…4ïw›`óÈ~>˜ÚG  =v|›¼êêëÃþ½{Âþ$X¦3é£/åÕ—ž —,Ëo>üà½áøñcyO¯©© &NIl›ÃˆÄ6fŒ’1(èDʹÝ;^ùâÏÃ9‹Î+Ià衃ûÃÃIÆÃhš¯Lž:=dùaÀÞ˜P[[›kÞÖÚ&}¼—|×'¶…ìIg‹Ý¾usØüÞú0wþ‚‚ÆÖˆ @€òàX~{bF @€ @€ @€ @€ ¡ÀÏúpŸ‚-9?,YvQ˜4yZ³îtñÁž]¹À¦Û¶ôضûÁžùi˜•dé?aR÷ª¼Ÿcb_2 ÆÀ¡%ç]” –›4¥çut0ã½»nMXõÊógê{íåç´i³J„)cÀÞJmm]8gñya鲋“ ÃÉkƒ¬ØÞßü^xñÙŸ¦÷Åâx¯½ülØ·÷à ÖuúѹóÎJÆ^fÍž?Ö,›ÍæÆ}cÕK¹ŸkÐÃÃI@P œwæ9=ÔìP dÍWâwòÚOߦN›Ùc³ú††äš˜š{-¿üš°îí7’ïþc¹ ßOèr0Žýöš×â¥çw9Ú÷·18ó¹§+øÄ³ž{A˜6}VŸ‚+÷%Á±o½ñjX·ö‚¯‡÷Þ};ìMJ¹gMŸ9;üÆïüŸ=.÷ÿô¹€Ñ+“ƒñûôõßú½U—Íñázï}{ͪԌÇñ>µàœ%aÆì3ˆM=îY ~gíëáÍ×WæîÅ=6êáàóÏ<fŸqf¯´=œâ @€e$ À±Œ6ÃT @€ @€ @€ @€ZuI¦¹ÜVH™:}f¸êÚÂä)Ó ižk¾>ûWBÌίŽ9œ÷ÜŽŽöð³Ç~¾pç¿ÈÛ®keìóÇÜb–¸BÊÌ$Ø$®£/A”cÆŽ .¿2ÄàÎX» )O<òÏáö/}=ŒI²&–¢ä n¼ð’«ÂyI6̘5±·=£C|͘9'Éùý$»bz6ÈU+_è­ËF¯ºæ†$ç¬^ÛÄŠL’ÅlN _7¼žøÉ¹ìŽyOJ*ß|ý•’8æ÷¼ / —^~m¨M²`RâÚb¦É¬û³G 1Ã]Zyñ¹Ÿ…3,ê14íÜ“õ«_{1 ’Úòc¯?cÖÒ˜M±/×r×Îb°l¼†â5ñd’Ån˦w»V÷ú~Cä8ÐÇj¸gõ TaÃùÞ›ï¿e1°19÷TÞ}‹cð÷¹ç_’¡/OW‡§“?8PHVÝIöÝ÷7m(i}÷¹úL€ @€@ñjŠ×•ž @€ @€ @€ @€ P¹ǵœímÁÂ¥I âWû·>÷…/‡††gòë.¸{ç¶–M¯ë9¯¼ðthi9ÒõP¯ï/¾ôêÜ<úÜØµ³¦‘ÍáSŸ¹%\ã­eÎjm=‘ þ t´·wí¦¤ïcÖÆ¼vI`“/¸±û$b`âí¿úõ^3svoßÓç³Î^~ùޝ¥7v?7Ý’ÂÆ¬uieÇö÷ Ê4™ÖO¡õW|âúpÅÕ×ÜØµßææÑáÆ[î 1›eZ‰×ã+/ý<­Y¯õ1àjåKÏôZ²bì¸ ¹=êopãÉ~âϑͣÂ/}þΰøÜ »îõý{I ó@JµÜ³bPNçº÷ž¾uIô5×ýR¸áæÛ nìzv Š^˜d¼½å w|ßÞ˜d³U @€¨LŽ•¹ofM€ @€ @€ @€ @€@‘ž{úñ†ÒÊÒeç‚úb¶¿”qã'†ënø|A]äËصƒC÷‡·ßz½ë¡^ß_yͧÃÅ—}¢×ú¾TÄ@¾n¾#Ä`´²wÏ®‚ç˜ÖWZ}㈦\çü³¦5í±>žÑ¥WõX—vpa’Ý2~Æ Ÿþ”)Sg„‹/¹:õÔ˜Ýìƒ=;SÛ£Á¥W~*œwÁ¥ꪶ¶6 8½=Ìž;?µŸ7W½bf¶þ”µkV…ö¶¶¼§ÆkøÓ¿tkh5:o»¾V^–8Å`ǴׯÙþ–j¸gõwíåvž{ïé;¯óÏÝvWX´ô‚Ó+úø)f¾’…”Íׇl6[HSm @€(3ý [™-Æt @€ @€ @€ @€ ÐÛÞëÞ~#õÔYsæ…«®½!ÄìRÅ(sç/Ë 2|ó†°ß©C¾òÂÏ ðXžd3<÷üKRûëKƒ˜•/‹RbÀfggg!MÔæêO~¶ÏÙ»xÆü³Ãè1ãºÎûy⤩¹ïIÞFT.=yAcÇ,Ÿ¥.ç,>/\pñåE&?ÅàÞ´Œšñ;òìSõkÌMÒ³¹ñÊÐÙÛÄÃU×|¦·êÓŽr]ŸvÂGªåžÕÓÚ*ñ˜{ïé»vÍõ7‡©Ófž~°ŸŸ¦Ï˜bÖä´r¬åhس{GZ3õ @€”¡€Ç2ÜS"@€ @€ @€ @€ @`pÖÜX_ß®¹î¦¢OìÂK® cÇMHíwÓ{ù¶b ÔúwÞLí'f¼ ì*E‰Á€‹’Ì…iåð¡aÃúµiÍTƒkbfÉ–ÌznhØ—rÝg?ßïÌ]lj€sÎ8³ë¡ßïý`OÇ‹u°ids¸âêë‹Õ]®ŸÜxeA€…÷vÜáCS³ZÖÕׇe7È·ëæ/XTÐuÝrôh×Ó ~_ ÷¬‚[æ Ý{Oß ³Î. ñô³òŠYQ )ûJ|/,dÚ @€ ÐwŽ}7s @€ @€ @€ @€U$ÍfæëSWtÙUŸJ²éMm××1€.„¤•­[6æmòÚËÏ¥fo¬­­ Ÿºá–PSSº_»âŸ.(ëàªWžË»žV^þ‰âäÍ_°°àéÌž;?L˜8¹àöi §L›‘Ö$œ8q<µÍ@ÄL˜#šÒEç.8gI˜9ûŒëºÜ\ÀõÙµ}Z0pl»pñù%YS×yŒŸ¸|üø±®§ô¾ZîY-¶¹÷ž¾IË.¼ìôEøÔ&°g÷ŽÐrôÈÇŽw=ÐÐ8"œ³xY×CE}öÂôÇÛßíím=Žž¶lz·Çº®.YVPH×súú¾¾¡!\|éÕ©§íý`wرmKj»þ4ˆAsÓ¦ÏêÏ©=žÓÜ<:Ä@ÔBÊÒe}Ëö˜Ög̸™VZKà8iÊ´0ÿ¬Â<ÓæÚ½~Éyu?ô±Ï›ßK@îzÒæé×B1²{v³§÷cÆŽïéðiÇÚÛZOû\ȇj¸g²ÎJhãÞ{ú.͘9'Lœ4åôƒEú4mÆìÔžŽkIm£ @€å'PW~S2# @€ @€ @€ @€ žÀ¦ ëR‹ÁPuu¥ûu«˜rƬ¹aûÖͧͥ®¾>Œ71Œ›¼’ìUímmÉ<êOk?Ä`ÁB2Á-ZzÁÇÎ-Å3Ï^ž}ú±x·íýMazSìrÆü³‹Úe n9rT8zôpÞ~c†Ì˜Á±˜edó¨ÔîJ™Áqa {ãÂæÎ[F$Ù!ó}wîØN$™ Í"ùKŸ¿3ÄLn‡ì‡ž|?z¯¡©Óg¦º´Á˜±ãR»è-h9߉ÕpÏÊ·¾Jªsï=}·Jù‡bÐúÛkV>`·O­'Nt;â# @€• Pºq«„Õ›# @€ @€ @€ @€Ã^`ÓÆôìp¥òŠ›°ìÂKĉ“sŒ1˜1¾šG–90 ¦•˜ pÒä©iÍŠRÈ.>/¬~í¥¼ým/QÇYsæå·?•q/Ò''ÙkjjúÓ}¯çÔ×7ôZw²¢­µïYOž›ïgmmmX°pi¾&®‹cœuÎÒðæë/÷ÚW.KÞæ÷‚s–ôÚ¦kE܃1cÆå^!|ü»Ð–dM,4#g×~ûú~̘ô Ž}í6TÃ=«Ï‹.ÓÜ{Oߘ)ÓÒ3Ξ~FáŸâ=8­ô'`8­Oõ @€”^@€cé@€ @€ @€ @€ @€@™ :t ìK²æ+M#›C)ƒ6NŽ3ÙÅWJ!A6¥Të>ï8^Z€ã®ÛBGGGˆAnÅ*1‹b -vÉ×ìÊßk)¾'1X¯&ñéLœz/ÙÞ«P37É„ÙØ8b=vjÌz™/À1ö²ù½u8¦ZHÐhZ…Ô×õ5oö¥TË=«/k.ç¶î½¿Ø\ÆáÜ{OŽÐÐìÝÑÑ~²¹Ÿ @€T@qÿTT-ÜT  @€ @€ @€ @€ °m˦T„Iªúܸ@IDAT“§¥¶Ê1ÜŽ2!–"ø.ߺ'Nšš.æÿü1e÷Îíùºés]Ì‚YŠì|#F4¥ÎeÜ„I©múÓ`°òºÏm괙ݕäó¤)é™EßO28VZéìÌ”úájúàX ÷¬JÛÇÞæëÞ{ºÌää¿•¥¸÷ž¥¾¡ñäÛ^vvö-`¸×ŽT @€ 0¨•Û` @€ @€ @€ @€”“ÀÑ#‡R§3irzðUj'%l°ïžÐÖÖšw„˜pRp8˜%Ž9yJzpè®[‹:­QcƵ¿“¸Sªl‡ud<9Ïbþœ¹,\3oz¸6_uüÿìÝ ”å ø/²J· â¾±±  ¦mÙ`ƒÝ6X€ ¶žnúòôëîÝ·½og§×xz×ûÆÛózvzúy»÷u»iÛXÍÑ`6ÆÆæÆæ÷%H$$•¤Ê؈‚„:3"3#³2#Á+GFÄßñûçU*ç¿> @€ @€ @€ŠÈ—,4³¸ÛPÓPŽ;¹ßè!ÎÌ‘à¸}ûÖÑ·´üºR¢$Ö1Z¨`æÌìäÔªŸpkžä¨<óuBÅSœÏ>ýxxòñGÂK/¾0²ãE»êtƒ<É¢]5¨íŒgïØÀyßõpD€ @€@~ Žù­”$@€- üâ»_º¤^%·þõ׿" ŽõŒ\#@€:+0WÃÝ;Ö†Û† í|&¬Þ¶ÆC!Nþ³ К@”üð?7šöXŽœ±8yÖÑḙ‡…¨ÒZÅî&@€ @€hH O²ÐŒ™³ª³Ó…·çIpœ¦1äI¬,zÇA‹+â=Øéd¥<ólkŽW³ÆþêÆWÂý÷Ý{ôÁ'A-«¾N_/Ã3«Ófíjϳw¬läÿß âˆ @€Ü~““›JA @€ Ð?ÉÊÓá‡Ûï—m¹)¬¯¾Ú?7RH…߈·‡µ»žùºvÛmaieQX=ÿ´pÆìBEì¦ @€ @€ý)0¼kWرc¨îà’ÕÓ¯nÞº:É&GbåÎ; å­H²)Ä3Ïê›…4ôV%3fd¯”º}[ó«}¦‰wüò§á¡ûï éÿÒ‹[YžY½h?YŸ={'SqŽÀÔÿpÃÔ×Ò+O® +N¹àëuÿ²¬ê·à* @€@· HpìÖÈè @€˜&׫o„¿ÜtY¸wçãÓÔÍè_4¡ø?¿þ/á¦m÷„¿X¸:,¨Ìë_ #'@€ @€t@`ç®9Z©û9û÷·¿H\­f6U¦gÕ}³âbfø¦¥À`‡WÂÞ•9Î|ÿ;mzõ\Æ Œi³{ï3y"ãÞû,ÏìÕsÏ<^-{E¹ÌŠrHWn|ä¡ßd–Üg¿2Ë(0}é{¦Ýɽ[¶¼Òvêm»-X˜kõѼ}ÝoùAõš+äÚóÏ>kÓj5{•ÓÑ*Ë3kô˜zùµgo/GOß  @€è ŽÝ ý @€ @€Ó$‡xšZÖ,˜«h)K€ @€hL`YŽäÀGÎNÖk¬Õ‰¥ó$QNÕ×¥ËöteÇñ­<ôÀ=ãOµåøé'׆m[ßȬ{ßåf–Q`zÍ‘¨ÚJ×>”<|С٫7¦}ز9{õÆ´Üî ¥»¶nÏ<õX®ú«I2p£ÛTÏÑõtû3kt_Ó×Q?5æ8nt©Ë1w·ïÀ³·}¶j&@€ @ $8öO¬” @€ @€ @€ @€IŽ8úØIÎŽ=õêÆWºçŸ{²À£íÛ¶†gŸ~¼nƒaïe“¯Ô8sæ¬pèÇÔ½?½øàoîÉ•x˜YQÕj5Ü}Ç­uJ¼y)íóþ’YNéxè{Ãð®]méÄŽCáþ_ß™Y÷A‡äKpܾ}{f]iv'˽þú¦714/nexfs¥Rÿã¬ízŽïG£Çž½Š)O€ @€‰õÿE8±¼3 @€ @€ @€ @€(•ÀÂE‹s%ÚýìÇׇá&V[˃u×í·†¬D§4qæ¬YSV÷žã>0åµÚ…¡íÛÂOnº®vØ–ý=wþ"¼òÒ‹™u§IZ3fÌÌ,§Àô lN’õî¼ýgméįnýqxcËæºuÏ™;/Iìݯn™ÚÅ9sæÔ^ÖÝ¿üÒºº×[¹˜&Oþø‡×„;w䪦™gJYžY£²S×f¬F·Ñ®×ž½í’U/ @€ý" Á±_"mœ @€ @€ @€ @€L)ðî÷ž8åµÚ…^÷Þu[í°°ý¦W7†r¬bw̱ÇÕmsñžKÂ~û\·Lzñ©'ͽº\feã ¼òòúpׯò%ÃsìñãîvØ­÷Þy[xååì¤ÕFúŸ®ˆúÀ¯ïʼå½IânE™åÒsæÎÏUî¥õíKp¼ïî_…u/ä_íux¸¹Õ1Ëð̬J²BmÖ–&hwãæÙÛQÑ' @€zI@‚c/EK_  @€ @€ @€ @€h‹Àò‹öØ3³î»nÿyxæ©Ç3Ëå-°sÇŽpãõWf®Þ¸×’eaÉÒ}2«}Ïq'e–I üôæë GZç¦W7„믹ûôáöÛn©Ûöø‹;v ?•ë¸,ϬÚ`gÕY¶VfÃ+ëk/»nïÙÛu!Ñ! @€zH@‚cKW  @€ @€ @€ @€hŸÀû?ð¡ÌÊ«ÃÃá†k¿ž|ü‘̲YvíÚ•$y]6$«fmyú–Ö‘&=í³ßYÕ…´ít¯}(³lžéê~W¯¹4W\šØxʇ>–§ZeºHàµMÿþ˷–ͯ·Ô«t%Ôk®¸,lÛúFf=:í“¡RÉÿ1ÇY³ç„¥Ëöͬ7}¿þêÖ›3Ëå-&€¦+—þàêï„ôÑÈ–&97»åy.tû3«6ö4vYÛ¯ï¹=WuV=í¸îÙÛUu @€ Ð/ùÿåß/"ÆI€ @€ @€ @€ @€@_ rØQሣߓ9öjµ~ôƒ+ÂÝ·ßÚô*pO?ùX¸ü[ž}:{5ÈC?:pС™ýª8ýÌÏ„9sçէܧã¸1I°üÅOo Í®"—Öñ›{ïI|Û¾më”mÕ.TÂéŸXf̘Q;eßC¯¿öj’ÈúÍðÜ3O6Õëµß?ò^É“Üx̱LJ¥{g'+ŽïÈ>þÔ¤Çi²Ü=<éµFN¦ïûëþõ»áŽ_þ´‘ÛÞ.»c¨¹Ó ÊòÌJǒ癕®ù“›®kú¹›¶ÓÎͳ·ºê&@€ @ Ìƒeœ± @€ @€ @€ @€ @ SWœÖ¯{.lzuCÝÛÒÛn¿í–ðÈC¿'ÿÖiaÿ͵Ò\ºúÝ­?ùQî ÓUÍ>øá×íËø‹sçÍi¢ÍµWþóøK“§‰^=ò`8ñƒ+F¦f̘9i¹Ñ'ÓÄÆçŸ}*üòç7‡ ¯d¯@Y»÷äS?öÜkiíоÒ÷ð÷¯úv8â¨cà '8ÌßmAæ(ÒUùó›Â3Oe'ô¦•Í;?œxÊŠÌz'+pèáÇ„;“dÃá+)¦ ¾GóÞðþ“~+Ì›¿ÛdÕMyîÕ¯„‡î¿'yü& mß6e¹¬ Û¶½1²*a#+UŽ®³ Ϭt<»/Ücô°¦|ýȃ÷…§Ÿ\ö]~àÈû$ªDag’$úÆ›CUÂ'Îúì”÷¶û‚go»…ÕO€ @€@Y$8–5²ÆE€ @€ @€ @€ @€@ÃéÊ‚KV¼òòoäJzmÓÆpõkÂì$ñ€då¸åû”$J-sçÍKV)œÒëirWš •î_zñù°k×®\ýŠ¢(¬8ý·s­l6¾Â4ùç„|(÷ªr[·n ·Üøýð³›oH’5IØÜmÁîÉ8æ´Ÿ®2·5I zcËæ‘ÄÆ'Ÿx4äY±qt¿Ò$Ðw¿÷Äѧ¼îa4¹7ýZ¶ÏòpH²Êè¢=öIœ5kvHWhÜ’¼W6¼òR’<û@CI°éûþ·>zfHëifK߷Ǿï¤pϿȼ=MTN“M’ßý¾Bºúã‚ GÞ÷ãoN“ÓäΗ_z1<üÀ½áÅ$:Ï–&N¦ófª-íCz=íw3[YžY‹öXœ{øé³çñGœP>}契­ÉJ±ÓµyöN—¼v  @€èe ޽=}'@€ @€ @€ @€ @ pÅÉ ƒ§%+ ÞtÃÕ¹’ÓlO’ŸÒ•ÅÒ¯¢¶§*tÈMWw܉§†­I¢Ù¿¾+wÃûFV—|òñGrß“§àò§qVž¢Êt‘ÀÌ$ÉpÇÐöº=Z÷³!ý*b|3Áø€ƒk©ºãNøàHòåÖ7¶äª'}ßß{çm#_é ƒaÁî G’{ÓdÍÍ›_ »vîÌUW­Pšôü‘z$Ñóg7__;=é~}’øÜl‚cZažY{îµ÷H¢xú,mvK“EÓ¤ò=ïÕl…ÜçÙ[£J @€è#JÕP  @€ @€ @€ @€ KààC Ÿ>wõHÂM® .ô[93qô±-Õ:²^RÏ?üñ¾žŽ-m÷Ä“W„Ož}A˜•$|ÙzKà¤SV„ý’UI;±Í;?œ½êÂd%ÔÖ’ӾΘ93|â¬Ïެ¢ÚLßÓ„ÇtÕÕž{zdßhrcºªåy_ø½pÀA‡…=÷\’Ù…çŸ}*³LV^f¥ÏŠ£ß}\Ö03¯§q›îͳwº# } @€zM@‚c¯EL  @€ @€ @€ @€èˆÀÞËö +Ïÿ7ÉJn‹:Ò^ÚÈÌ™³ÂéÉê‘Ç{|am¾û½'$É^ç$}ViŽŠÒ„µOŸó…p܉œ¶ËÝT¤Ž@º’ᙟ^ö]~`R­_JWÜ;ç‚ß {-YÖzeoÕÖuƧΠ•Jg?&y|²rjš=þ‚‘žì‘+ÁñÉBÆÝëϬÔnÑ{¶dÑ ŽµxöÖ$ì  @€ P_ ³ÿr¯ßW  @€ @€ @€ @€ ÐU»/Ü#œ“$9õ®÷µ=I/MNJW};ôˆc 7ØÿÀC¹ün8ðàà ¯{² Ó„¸UÉXöÙï€É.;×Cƒƒ3FVC,bu½É†½ÿ‡†Ï¬úb˜¿Û› “•iö\ºúäYç]vÛm÷f«È}ßòNž¿N8ùÃc’*g̘öNVt¬·½þÚ¦ðâ ÏÕ+’ûZ/?³ÃigžæÌ—{¼ã ¾ºáåñ§¦õسwZù5N€ @€@ öH?u“ @€ @€ @€ @€Ó"0{ÎÜðáÓ>ÞsÜIáW·þ8<ùø#…ö#MìJë>æØ÷IŒ*´‘¤²…‹¬Æ·~ÝsáW¿¸%¼ðÜÓ…6‘®”wÐ!G„4.MlŒ¢¨ÐúU6}i’ã‡>ú‰pÈáG‡[nü~Øüú¦–;“®ÚxÒ?8èЖëªWAš8¼ê ¿~vË aíÃ÷×+ÚÔµ4‰ò„|(,MÚ™j;òè÷$ ŒÏNuyäü½wÝÎÜgUÝ2y/öò3kϽöç}îâpãuW†“gU£[7­àXë»goMž @€“ HpœÜÅY @€ @€ @€ @€ŒH“TÎøÔy!M|èû³O=Þxcó˜2,ÞsI’Øø‘ÓäÀNmi"ÖYç®Ï=ódxôáß„çž~2lݺ¥éæÓãŽNV¸<üècÜ$ÔV^}“ÄÕÏ®þýðÀ¯ï <øëÐh2Yšôºßþ‡#yO8øÐ#;–;sÖ¬pÚg‡ãO<5Üßááï »vîl:PiRòÈ8’ÄŽ÷™:±±ÖÀ!‡~~Ëî]S·ùÔ†M¯nL‘÷¨ÝÖò¾WŸYóæï><£žXûPXûÈýáÙ§Ÿqçòزùõ‘²Ý˜`íÙ›+„  @€ ЇþDVÝ  @ {âu_ÿrˆÂ%ìáò´N6§- @  >¶þÏ»°WºD€Àd7.ýÚd§#@€ @€˜F /¯Ï$‰ŽÏ?÷TxcËæ°mÛÖ°=ù¿Íœ9+Ì™;/¤«Éí³ü€°ï~†4Iª[¶W’q<ût2ŽgŸ[“¤ÍÚ8F'͘93ì¶Ûîa·»'}s¿tï}ò}÷ï–aèGi’âÏ~|CÝ;Vœþ©‘dÄ© ½ôâ ᑇ~^^¿näýŸ&ÊŽ~ϤIfilú>Y²lßpàÁ‡…yóv›ªºŽß±chdEÅ×=^zñù¾ÿ‡¶oÓ÷´3iÿçÌ™fÏ;òÞOWk\~ÀÁ#«¡v¬³7ԋϬm[ß/<ÿLòlÚÒ×éó)á¼yóGž£#ϣ䙔>SÓÕ+»1¹qª0zöN%ã|Q§tSQUµ\Ï©ÿ®õ­ßú?j½’–GRL§~þワ|ϼ¤˜Ú¦·–ä¹{ÉÏ¿ý‡_™Þ^hèe+8örôô @€ @€ @€ @€iX¼×Ò~½ï„SÞîGšà•&9 mibcšpÓÉßîH/öLÆ~½ïýcÇ‘&}mß¾=IòšfÍžÓ@ŠöƒÀ’½÷ éWm«V«#IhÕêðÈû%}ÿwc²YÚ¯ý¾ÕÝñÑ; @€ 0m±UþÆØW¢1‡=}0,¶câW¦äÕ1s@€ @€ @€ @€ @€.ìòþé P¸!„oü°DYÄåwΈÃÅgPÑ4W!¶ ¶MÊrFlËɉãÛ‰&ÎL˜Î$8ßo'Æ£È3Ñ4þ˜*¶#é™<Ѥ,gĶ,‘œ8±hR–3b[–HN‡ØN4)˱-K$'ŽCl'š”åŒØ–%’Ç!¶MÊrFlËɉãÛ‰&e9#¶e‰äÄqˆíD“²œÛ²Drâ8ÊÛ‰#s† @ +8¶¢ç^ @€ @ o¦3Á±o§i ~C6Mòš%@€ @€ @€ @€ @ ß||«ß߯O€ @€ä˜ÆUþruP¡¦*bÛ´  @€ @€ @€ @€ Њ€ÇVôÜK€ @€ô@$ ®´±¶:giCk` @€ @€ @€ @€t¹€Ç.î @€ @€Ý! ®;âÐŽ^XÁ±ªê$@€ @€ @€ @€ @€@¶€Çl#% @€ @€aÀoQJû.¼ZÚÐ @€ @€ @€ @€].à£y] Ý#@€ @€ºC Šº£zQ¼€‹7U# @€ @€ @€ @€òHpÌ£¤  @€ Ð÷Vp,ï[@‚cyckd @€ @€ @€ @€t·À`wwOï @€y³Ã¦…sãMoKÄáŸÞ~ÝC/ž\V$ÝM¿lo $&·„8ùêñMl'Pl'š”åŒØ–%’Ç!¶Mœ™(°iK[¶…0ÎÄkÎô¶Àk[Ŷ›"è™ÜMÑ(¶/b[¬g7Õ&¶Ýbû"¶ÅzvSmbÛMÑ(¶/b[¬g7Õ&¶Ýbû"¶ÅzvSmbÛMÑ(¶/b[¬g7Õ&¶Ýbû"¶ÅzvSmbÛMÑ(¶/b[¬g7ÕV?¶ñ-ÝÔW}!@€蜀ÇÎYk‰4%°e[üÿüè_º¤vó÷þ¶öª·ö§\ðõ(Š$8ŽŽÚï ?‰–}é+£Ïõâk±5±hR–3b[–HN‡ØN4qf¢À«[¢°}G,Áq"MÏŸÙ”ÄvÛØvK =“»%Å÷Cl‹7í–Ŷ["Q|?ĶxÓn©Ql»%Å÷Cl‹7í–Ŷ["Q|?ĶxÓn©Ql»%Å÷Cl‹7í–Ŷ["Q|?ĶxÓn©Ql»%Å÷Cl‹7í–ËÛnñÔ @€@Y$8–%’ÆA€ @€LX²0K†P“¯ê›ûø­ýȹä|í8Nˤǣö£_Ïš¬™šp¢$•JIb @€ @€ @€ @€è! Ž=,]%@€ @€øí“B¸øÌÆîQº7f Äa0ýÍÖ[I©Én$95Ýœ{ëø­Ã·Ï¥Çµë#¯“ÿIVkö @€ @€ @€ @€èœ@òñ- Ð)~îÿù¼u#íÅqü•_|÷K—4rO#eo¼íцûÔHýµ²?þåá'·?U;ìûý‡O<0|ä—ÂAldžQlÇz”éHlËͱcÛ±e:Û2EsìXÄv¬G™ŽÄ¶LÑ;±ëQ¦#±-S4ÇŽElÇz”éHlËͱcÛ±e:Û2EsìXÄv¬G™ŽÄ¶LÑ;±ëQ¦#±-S4ÇŽElÇz”éHlËͱcÛ±e;j÷gázÉëÔÏÿÝ—Kz©ÏSõ5Š¢K~þí?üÊT×'@€d T² ¸N€ @€ @€ @€ @€ @€ @€(Z@‚cÑ¢ê#@€ @€ @€ @€ @€ @€ @€2$8f)@€ @€ @€ @€ @€ @€ @€- Á±hQõ @€ @€ @€ @€-Èlv@IDAT @€ @€ @€™Qf  @€R ÜxÛ£q©hp @€zTàî^×ÜôpK½ßÑËÂÙ§ÕRn&@€ @€ @€èM|¸ÏŠwièNýüß}9ŽãKº´{ u+Š¢K~þí?üJC7)L€%`ÇQ^ @€ @€ @€ @€ @€ @€ @€àØg­ @€ @€ @€ @€ @€ @€ @€£$8ŽÂð’ @€ @€ @€ @€ @€ @€茀ÇÎ8k… @€ @€ @€ @€ @€ @€% Áq†— @€ @€ @€ @€ @€ @€ @€@g$8vÆY+ @€ @€ @€ @€ @€ @€ @€À( Ž£0¼$@€ @€ @€ @€ @€ @€ @€:# Á±3ÎZ!@€ @€ @€ @€ @€ @€ @€F ŽzÝ÷/Ï9ÿ¢ßKÞW"ŽŸ¾ò{ßúZÝ2. 0©Àg>ûÅÏU¢øÔI/¾urG5|õûk.}¾^™²_;÷‚/þ·8ÄŸ?Îx8|îª5—Þ6þ¼c @€ @€ @€ @€ @€ @€½( ÁqLÔ¢3£(œ;æÔøƒ(Ü™œ’à8ÞÅ1•ø#!D¿_¯èŒÊðß'×û6Áñœó¿pBÇÿ6J¶ NQ<{Â9' @€ @€ @€ @€ @€ @€ô¨€Ç œn P>+V †hà¿Ea’äÆò ׈ @€ @€ @€ @€ @€ @€ú\@‚cŸ¿ ŸîØcÉþ_I’Oìžé r ÇÕp÷Žµá¶¡ÃC;Ÿ ë†7„­ñPˆ“ÿlL.|Ÿ s£YaŸÅáÈû‡“gŽ›yXˆ*“ßPÀYsµDUôÀtÌÕ²9[“°'O`ºæ«¹š/>J¨ ˜«5 {Ý-`®vw|ôŽ@M`ºæjÚ¾ŸƒkQ°'O 6_—%¿>ªC¿N{f®æ‹RjÓ5Wk훳5 {ù¦kΚ«ù⣚ÀtÍÕZûö @€úQ@‚c?Fݘ è:•«V,Éù_»®c:D „q‡n¿#\¶å¦°¾új GhHÚ'&¿okw=?òuí¶ÛÂÒÊ¢°zþiáŒÙ'$kG…5n®F©¢>èä\­ñš³5 { dÍׯjË.m®f)A`2¬¹ZäÏÁiûæêdQpŽ@¶€¹šm¤nèô\MÇì{k7D^zQ 6_K~œ~µó÷Á©¹Ú‹ï}îNÏÕÚ˜ÍÙš„=Æ:=gÍÕÆâ£4š@§çj­]{ @€ý, Á±Ÿ£oìt…À¹ç~ñ˜0_’´®èN(±ÀëÕ7Â_nº,Ü»óñÒÐtV MþϯÿK¸iÛ=á/® *óZÚ2¡ LhÇ\­5bÎÖ$ì #0z¾®Œ?^L¥I-æja”*"0"0z®õspZ±¹ê F XsµXOµh—@»æjÚ_ß[Û5õö«@»æ«¹Ú¯ï(ãn—@»æj­¿ælMž@1íš³æj1ñQ š@»æj­~{ @€ý.Péwã'@€Àt |jÕEûƃáú$¹qÑtöCÛúAàÙ]/…?Þø7’û!ØÆ8-iâp:ÇÒ¹ÖÊf®¶¢ç^ÙEÍÕZKælMž@ñé|½ì› ©Ø\-„Q%&(ò{«¹:)±“ 0W aT ¶ 9WÓÎúÞÚöi Šœ¯æj¿‘ ½íEÎÕZgÍÙš„=⊜³æjññQ#š@‘sµV§= @€!Hpô. @€À4 |rÕª½gU¢%ë6.Ÿ¦.h–@ߤ™ðßoúÇðâðƾ³˜tŽ¥s-sÍlæj3jî!и@«sµÖ¢9[“°'Ð>ךüž:ºG;â]~ â56ñ½Õ÷Õ6F•Æ ˜«ã@èR"æj:4ß[»4ÀºU*"櫹Zª·„Át©@sµ64s¶&aO }EÌYsµ}ñQ3š@sµV—= @€o HpôN @€À4œ}ÁËgÌýiˆÂÑÓм& ô•@Çá/7]&¹±¯¢n°Ó)þ"?séÜkd3WÑR–@ëÍÎÕZËælMž@÷ Ü»ã1? w˜ô°­|oõ}µoCèsµgB¥£}.ÐÊ\Mé|oíó7áwT •ùj®v4TësVæjΜ­IØh¿@+sÖ\m|´@ &ÐÊ\­ÕaO€ @€À;ß±ðŠXyþE‡„™?‹B8¬# j„@Ÿ üpûáÞ÷¹‚áè¬@:çÒ¹×Èf®6¢¥,b𙫵–ÍÙš„=îØXÝÜýÔC%hö{«ï«%yFϘ«=*ísfçjÊæ{kŸ¿y ¿ãÍÎWsµã¡Ò`Ÿ 4;WklælMž@gš³æjgâ£5fçjí~{ @€ÞàøŽ…Wh»À9«.:¯…;¢ÐöÆ4@€@Ž«á²-7‘ @`Ò¹—ÎÁ<›¹šGIíhd®Öz`ÎÖ$ì  @€ÀDF¿·ú¾:ÑÐ0W;¡¬ ­ 4:WÓ}omÝ] šht¾š«Í(»‡@ëÎÕZ‹ælMž@g³æjgã£55Fçjí>{ @€Æ Hpëáˆm8þøãg$+7þ—h ZB´ -¨” wïXÖW_pÞ Ú/νtæÙÌÕÓ¶ºUL€@¶ÀÃ9ç ¹šm©v 䫵>˜³5 { 0¹@#ß+);ykÎ Ь@#󯑲ÍöÇ}L.Ðèük´üä­:K€@3Ì¿FÊ6Ó÷ 0µ@3ó¯™{¦î+4"ÐÈük¤l#}P–ló/ÛH  @€YƒY\'@€Æ>ýéÏí9sÞÌKâ8þ·QHÒmL‹Àºá ÓÒ®F xSà…œsÐ\õŽ!0=‹žß+zÇ»BHþôÑ_þ" T¢PyëëÍו‘ã±ç+aÖνÃáÑž!®ÄaóâMáÅÃüA鉠V  @ [ùù¶‘²Ý:^ý"ЫÌ¿FÊöª‡~èVFç_£å»uÜúE ™”íE }&ÐÍÍÌ¿fîéf}#ÐKÌ¿FÊö’¾è󯢤 @€Ý. Á±Û#¤ô”ÀŠ+fï±dùŸFQåß'ß=’ÛØSñÓÙò l‡Ê7(#"ÐCyç`Þr=4t]%л’µÆ‡fôõµ°=wŸ†½Þ.U# ŽokxA€ÞhäçÛFÊò%@ XFæ_#e‹í¥Úhtþ5Zž0Å 42ÿ)[\ÕD€@*ÐÌükæÚ#ÐÈük¤l1½S 5ó¯&aO€ @ y ŽÍÛ¹“£¢s.¸èsɉ¯&+60úBÖë8·†?š$CþNVY× hL qc7(M€@¡yç`Þr…vNe„(N–nlqKWq´ @€cùù¶‘²c[qD€@«Ì¿FʶÚ/÷ 0V Ñù×hù±­9"@ Fæ_#e[é“{ ˜(ÐÌük枉-;C€@3Ì¿FÊ6Ó÷ 0µ€ù7µ+ @€ò HpÌ+¥¦X¹êÂW¢¿ !zÿE꜎ÿñ‘ê¶/Íù!ªSÌ% @€ ¤«/¶ºÅQµÕ*ÜO€ @€ @€ @€ @€ô±€Ç>~CO? k¹Ž Û\…8µø¬³.ا2P¹¥á&âx¸EÿóUß½ô¯Ó{üìE Wá @€­Dq ŽVpl%î%@€ @€ @€ @€ @€}/ Á±ïß™ÑYç^xüŒÁðÉ8TOV˜[…xiˆ£%Inã^qˆv%5¼Eñúäõú$ÝñÅUo ;â뮼ò²u™µ+P”€8%Ù`=ƒƒ•o q?WÃÅW­ùæmÞ«< @€¢ YÁQ‚cQáP @€ @€ @€ @€èK Ž}öúƒ^±bÅࢥËÏŠBô©EŸLÖôXšÞñÎÚÉ«‘ƒ¤DÉ¥åɉåoJ+Ç3âøÜ ¾xwâï'É\—_uù·Jë(b;ç³_üIBåëÕ•$[Þ|ååßüÃzeо¶òü‹ÎN2Ýþªn½QtÝßýæŸÕ-“ób·Ç)ç0ú­ØÎ8TÿÓ¶×_ù?®¿þú¡~¼ñ @€t—@7ü·:& ŽªÎ9A€ @€ @€ @€ @€È+ Á1¯TŸ”[yþ…ç$i‹ÿWE‡·2ääþ4ßñø¤®ã“—qÎù_üÆ®mCÿû5×|÷…VêMï­Æñ*Ñ_Ö­'Ë?ó™Ïü/W_}õ¦ºå ¼˜|4ø÷’„ÐCëU™ôýæz×ó^ë…8åK•»«_|Õå—Ý×Gc6T @€.°‚cG× @€ @€ @€ @€ @€@Ÿ´¾\CŸ@•}˜g_°ú”sÏ¿èÖJT¹¢ÕäÆI¬*IºãŃsf­=ç³ýÇ«VÍŸ¤LîSW¯¹ôö‡ëÝ´7«2kÁyõÊymåÊ —Ä!œY¯Îd5Ë—žyìëê•ɺÖKqÊK¿\OÞ/T«Õ?¸bxëI’û%êÆI€è (Nÿ.Mk[%?ñÚ @€ @€ @€ @€ @€M Hpl®L·¥I‡ƒaàÖdõÁSÚ9®$épnT‰þbÊœÛÏ9çó´ÒV5 ßȼ? «3ËT šŸ$†Ö]5 ñ?ßu×];›m²ãÔìXËq_üz’Ôú¿í|cãaW}ï[ÿ_X³f¸ã2  @€²DÕÖ%Wªeá0 @€ @€ @€ @€ @`Zÿ4ã4tZ“Åüñ3Î9ÿÂo¦I‡ÅÔ˜¯–$ð¨hÆŒ_žsþêãòÝ1±ÔðÖߊãx×Ä+£ÎÄáCg_pÁòQgÚö2IÞ¼(«ò]ÃÕì¤ÌI*éå8M2œ>8¿Çá¿ìxcç!W~÷Ò¯^{íµ[û`ІH€ Ѓ•ä/Ç´ºÅ+8¶jè~ @€ @€ @€ @€ ÐÏû4ú§¯Zµû‡sCU2óÚB…½£PùɹŸ½ð“ÍÔÍ5ßY¢p]½{“Dʨf~¾^™"®­<ÿ£BˆÞŸQ×]ÿºæŸ“QfÂå^Ó„•øD²ZãÓ!Žÿ|ûæ]˯¼ü›ÿîÚk¿óJ‰‡kh @€%ˆâ# Ž%x+ @€ @€ @€ @€˜6ÁikYÃÓ&p̪U3TæÞ˜|”õ„F:‘|lõ…(Ä7$«Óݪá¥áêú¤ŽPXV Õ}’„ÃcC¨œ•¬f¸0W½Q4?©ó_W®ºèCW­¹ô¶\÷Œ*”ôã“¶ÎujÂËä³¶«““ÿiÂ…O$ ^Œ½îV­6¾zcYâT¦«Õ7¶ TçŸweˆª­ÿÍ#+8öÉ›Å0  @€ @€ @€ @€ @€@›$8¶ ¶›«=¼2çkIb`îäÆ$‘ðê]»ªÿç5W|ëάqüñ3–zÌé!úó$éoEVùd‘ÅÁ0ç·ûóïûÁ¾ýjVùÑןyìë’U(_ŠB´dôùѯ“úßµòüÕï¹êòËî}¾À×I#I”SV™ø m·gÊS\(Kœ¦^iN_}õÕ›’Á\Qš @€@ß±‚c5ªö— @€ @€ @€ @€ @€Å Hp,Þ´«k©§-ïõÊ@tQV«ÃÕÌ>Ž®£œq=B¯  @€è¨Úú?“â(è @€ @€ @€ @€ У­š±GÞ݈gý×d5Å=²ÆÇáﯸüÒónY³fKVÙ¬ëW^yÙº]Ûv~$„ø¬²•JôÕO|⠲ʾ~Õåߺ?éoý&£hßd¥Å£ï+âõÉ«VÍIê9¯n]qüüUaèÆºeÆ],cœÆ Ñ! @€] ÅE¬àXí‚‘è @€ @€ @€ @€ Ы{5r öûÜs?X…³²n‹C|óÆõO§+)¶ Ç5×|g}u(þhRáúúíG æ.ø½úe&^M>’û‰gÇž‰*Ñê±gZ?Z60'ñŒê&d&õýVX³f8okeŽS^å @€èŒ@T- ÁÑ Ž –V @€ @€ @€ @€ @€@I$8–4°ã‡þɹºñN_Ú¼ë¼[n¹e×øû[=¾êªo½U‡ÿ(«žd‘? «V d•}}xèµï$ÇÛGŸÿ:IîÍ=·^™ñ×®¾úêMÉ‚“W??ö8Z°héòO=×üÑÊ•. q|FÝâøW]~é£uËŒºXö8ª— @€t@!+8Jpì‚Hê @€ @€ @€ @€è] ޽»Ü=Ÿ70ç’ çÖ»!Ysãá«âíÿX¯L×âjø¯YõT*ñŸd•™p½2ûEÑê ÷5y¢23ú\Rß`½ÛÓ†Voì‹8Õs @€Ž ’àU;Úg @€ @€ @€ @€ @€åàX®xN:š(„•“^u2Y½ñëaÍšáQ§ÚòòÊ5ßü~âuõ*âpòŠU«æ×+3þÚß»ôædEÅgÆŸ}œ¬ù‰•+/Z<ú\Ó¯£èÂz÷ÆqغíõáïÕ+3þZ?Äiü˜ @€ 0}Qœü+¤Å-¶‚c‹‚n'@€ @€ @€ @€ @€ý- Á±äñÿøÇ/œ—$Û½7k˜»âèê¬2]¯&¡½©n]Q4°p`æIuËL¼XMÆù͉§Çœ™fīƜiâàìU_8:¹íøú·ÆW\ý?¿^¿Ì;Wû(Nï Ú+ @€¦U ª¶þ+8ù 56 @€ @€ @€ @€ @€@³­š±Ù–Ý×y»‡“¢(Ìhì®k×\ZwõÃŒûºœ¬àX?Á1©m ®|°¡J“ÂÑ𮊓­Þ}QTY]ïzžk9êˆÃ7òÔU+ÓOqªÙž @€é(fÇêôBë @€ @€ @€ @€ @€@O Hpìéðew>ˆ2“œÀ;³k*®D’‚˜£½ì~ïÑW|û‰(D?~ìq|ʧW]xÐØs E!ÎHpŒã'¯üÞ¥·4Rk?Å©e  @€hŸ@TZ®<®Ôý3-ׯ @€ @€ @€ @€ @ ÜËßÅáÔ¬!ÆQôXV™"¯¿XÝþxÖJ‹qˆNNÚløý™¬RwåÄd5ËhFTùB³ãùÌ«?Eay½ûã(\š\oèS¾ý§z~® @€ ÐJÜð?¹&t,Nþ1c#@€ @€ @€ @€ @€4+Ðú§›mÙ}H’íŽËjèÿgïNåºêÃàŸ;óž-Éx/ÂâH Á%‰6J  6’Œ¼IYhBÊG“¶ióµIˆ I¾†&i¿Ð&$m0–dcùÅ’°³ª4Á|¤[f16`/’%½¹ç;WÖÈOOoæÎrgÿM"îÌÙÏïœ;ó$ÏÿZÞj€ãm ûC–ÝÙn\)ˆðäK.¹ìqíʬ”·¸ïž?O'D>°RÞÑ´Zè9À1}xËÑvVxRnæáàÛVÈj›4këÔC& @€C¨æÇôkf< @€ @€ @€ @€ @€= ÌõXOµÉHq‚Ù¥C­ÕÞºaóÖ·”–«²@Œg¦ Ƕ-ÖWÏc¿«m¡e™·Ür˾ ›¶ì !{õ²¬£/S¯ß¿nãÖ ÷,\{ûÑÄž\´qãê4äKKŠîÝsà _.)³<{æÖi9€× @€ _ K¿i¥ß‡ûTŸ @€ @€ @€ @€̶€Ç)^ÿuëÖš¦×Á)Ùéý­µKÈ’àÆ¢µ<‹§wÙêáâéÅ·fõÐ2À±(T«…+Ó¥«Çsj«^‘'O9ÜI«ÿÉâ5­²Z¥Ïê:µòN€ @€Àp²¼ƒ¿.– %¯Å’²  @€ @€ @€ @€ @€­úÿ6cë¶åŒX 1rO‚#öÑîëÐÓøw/l»-Äø¹£ ­ð$tn7ÖWÈj™”NüªeæáŒxÿ¡‡î»©}™ãsguŽ—B€ @€À0²¼ÿ_ukù0‡¬/ @€ @€ @€ @€ @`Ê8NÙ‚.N=ÔÎXúzâž×³žÇŸ¾§Ûþ$Å,œ³®¶æ…š\rÉeg§²/nW>Æpã-·Ü²¯]™•òfyVòF€ @€Àp²XA€cæÇᬖ^ @€ @€ @€ @€ @€Àt pœÎu=<«z{:q\HbÞÛ ŽÅø³ƒù¶tŠc£Ý\j!^Ù.iÞÜê¹Ë²,›[š¶üy#ËÛU.¯päõ,¯S É @€ A Ëûÿ'Xà8„¥Ò @€ @€ @€ @€˜Zþ¿Í8µ4“?±t¢àÉ=‹Zí„^Ç¿k׎;c–½§]ýtVÉú‹/¾xM»2æÕ®zôù ÏbüÜ;nØñárJ“fyJq @€ @``Õœà˜l|&@€ @€ @€ @€ @€¦_@€ã¯q–…&zzy~°Ÿñgy£ý‰ŠYö˜ù“N_WÖÇ¥—n}f²|N»r1f×¶Ëo—7ëëÔÎF @€ƒÈòôk_ú|8Á±O@Õ  @€ @€ @€ @€ @€ÀŒ ÌÍøü§zú1ËïÉB½íc_ø+m (³–7þ¶Ÿ®ÿ.¸åûÂêog!{lËvb¸"å]ß2?eäsñÊZhóÅßcØÖ®vy³¾Nílä @€ 08,ïÿwÅ,ý­Òƒ @€ @€ @€ @€ У€Çá&¡Ú¡<»÷„öñ!İj×¶ݓ0ŸnÇøé……ƒlÚr]:!ñÛÔ}ñúõWµ{÷ö»[”©¥É"²å#fÙûß¹pí×[(ɘõu*á‘M€ @€À€²Øæ¹tا;„RŒ @€ @€ @€ @€XQ@€ãŠ,Ó‘¸?<|Ï aMÛÉd!>~íÚµs{÷î]l[pb3ókB¨· p̲l.œ6¥é½y¥)®ßxÕÚ yÞJyÍ´<©ÞÖ©w;5  @€è] Ë+pÌòÞ &¥×<î=ç[Ç”{î‰ßwÌëV/þæÀçgí;õVE¤ @€ @€ @€ @€ @`äG¾ƒÀ_,,Üé¦-e­ÏqLy§sÎÒ(¾<¸‘Œ®åÝ;wüí†M[?‘‚ŸÓrY¸,å­à˜ÕjWµ¬WdÄpÏî~GÛ2%™Ö©H6 @€p‚ã@X5J RûÎùN(þ,}ü¿g¿§©üqí]×—R‚ @€ @€ @€ @€#¨¸ÝV Æ,»«¬‹¬1÷ýee&9?RÙþ„Å~dÆ+Ï]>Çt²åª,‹–§ó: o¿õÖ[“Öý ëÔ½™ @€ô#^ÌÒÿõóˆé7¾ôÙD?Ý«K€ @€ @€ @€ @€L€Ç)XÄöSˆmŸŸ¾Z«ÿxY™IÎ0ß}Œ¡ebV<æëë–Ïñ̳žðò¤sÊòô¥¯Ð>xriá¶Ï­S[™ @€T*Åþÿ9 ÖR€£ @€ @€ @€ @€ @ þ¿ÑØGçª^ †ì¯ÊzIgv¼¸¬Ì$ç¿waážtã;ÚÍ!faýqùYíòãÒ–&Äð©= ×Þ¾4©×çÖ©W9õ @€èE Ëû;½±è3ÖÒ1 @€ @€ @€ @€ @€>æú¨«ê4åU›/‰cÍÂ?¸ä’Í¿ùæ¾1Œ)­[·î´úªSÞCüjj_yþ•C1~ùþo}í+{÷î}xcÈù5µzýU­ÚN_í]ûò—_~ú»Þuý½E™—¾ôŠSB–½¬UùGÒãÛÚçwžk:·R’ @€þ²ô[^ú}ÄÌ ŽýªO€ @€ @€ @€ @€f]@€ã”ï€û¿óÕOžyöù¦`½Ç´›êÜš~.å_Ý®LUyµNùŲ+³pä µµZ˜OŸqöùqÃæ­wg!~9Æðåï?û¾÷m¨Š~w/ìx߆Í[¾–ú|B‹öæO<¹þ“)o{‘¿úÔÚ†tYUjyÉ/Âé íXàØ“" @€ @€ @€ @€ @€@þ¿ÑئqY£H'".¦Q|¸l$)Ôð_\rÉ%'—•ë7¿81«…àxü#+!œ‚8={jUÁGzÊC¶ßëÒ”lýÑW1ÛxôùJOb|÷îÝÛï^)«—4ëÔ‹š: @€ô*PÍ Žy¯Ý«G€ @€ @€ @€ @€8, Àq6BÌÃ+ŸfvúܪS¡¼\%VŸZÿ¿Rãée­Ä<{Y™nóc~MÛ:1{ÉÅ_¼fíÆé´Ëì…íʦ¯ñ¶o«]åyÖ©Œd @€*Èòôëeú|8Á±O@Õ  @€ @€ @€ @€ @€ Àq6Á®…m»bŒ_*›jVË^ÿŠW<£¬\¯ù/~ñU'¥ºÿ²¬~k ‹ùβrÝæß¼°ã‹!†¿jU/¹f~Õé/9½¶ê¥éù‰­ÊÅï¾ã‹Ÿ~w«ü^Ó­S¯rê @€ Э@'8æYì¶[å  @€ @€ @€ @€ @€Çp<†cj_¤Ã”Ï.;©^«ÿù#'–—î²DvòÙÍBöØòzÙ;wíºîkååº/Qzòb-¬ÏBííZNá—;n¿ýöCíÊô˜gz„S @€î²¼ÿˆµâ¯0 @€ @€ @€ @€ @€ÞúÿFcï}«9D;óýפ“¿]Öe–eO?³¶zÇÚµkW••í&ÿÒÍWý~ÙÖêä|ñW;(×S‘{ó} !Æ[UN'7þdÊ{y«ü"=Ïok—ßOžuêGO] @€:Èò¬Ó¢-ËÅš[âÈ @€ @€ @€ @€ @€:àØÓäºmaaÌã;šI–½â̳ÏÿÀÅ_ÖÁi‹å-nxÕÖ_ ¡ö/ËK†{¯{ÇÂuŸê¤l/eö.,<˜¾‚»Ðºnvz r<­u~¸}ã³Nmäe @€ P™@+pÌ8V¶ "@€ @€ @€ @€ @€3*07£óîyÚéë›ç\ºi˿깊+~ù‹Ÿ~óí·ß~¨“fw߸ý7lÚúc)€o]iù,{Þüšù¬ßxå/ì^ØñþÒò+X·nÝiµOýw©¿»BöqI1†¡Å_?.£â„F–_3ê?ÝK³yž_ÓK½nêX§n´”%@€ @ ,ïÿ÷9Á±yu @€ @€ @€ @€ @€¥—jtð< ÙB~¿ƒ¢C)rò“Ÿü§¡ÃÇb@ùïþtýÄSž²ì{˘eÙS²zý}6myïâbüµ›oÚþñ²:EþoÜxê)Ùê_Jõÿe²:µ“:‡Ëäñgvíºþ+—ï±à;nØñW6oýB:¯äiÝ4‘0|¨q}7uz-kz•S @€N²<ý¨ÏGÌò>[P @€ @€ @€ @€˜uŽ3¶öìÙsß%—^õªù¹ð¡äxB'ÓOŠ/™ŸÏ^²a󖯤ÏwÆFÜÛ¨çßÈbý®ì`þ@vBþÄ<Î=µV‹O ±Ì.N§6žÖIÛÍ2yŒ°{aÛŸ7_þß–ÆùÛÝô“eñïz×õ÷vS§×²Ö©W9õ @€èD ‹8Öb'])C€ @€ @€ @€ @€h) À±%Íôf'1^úª+7ÅP¿."®ét¦)¸ñ‰©ìk³zöÚ¹POñéÕ‰µô?µP?üÝØô?=|G6†xó½wÝñ+Ž£’rÛ so<<øÌù5­¤˜uª„Q#̸@úìJ±÷‚/f|˜þŠ{°“‡{µ%eT+åÅßåú{ÄÌgl‚jè^ ÓÏÖ¢eŸ¯ÝûªA *÷jU’Ú!0X÷ê`}µN *nîÕ¢O?W%¯Ý ts¿ºW»÷Uƒ@UÝÜ«Í>ݳM WÃèæžu¯}ôH )Ðͽڬã:þÏ¿â-oyxý Gãôü7÷4—«ŸÙ[®¤W:°ç꿾þçß0È>´M€ŒN ÿo4ŽnìzîCà¦wì ¡ñÓÆß裙¾«¦ŸÍÿlWcÿ†½{÷.öÝX ìÚuÝ×ÒÜß×q•¿¾;xÇå+*8ëëT£f˜a5Ù‰3<{S'0zNïÁNË~FF@`zª9Á1Ÿ3!0!Ý|fvSvB¦o˜&F ›û¯›²` &D ›û¯›²2}Ã$01ÝÞÝ–Ÿ%0ÝÜÝ”€©"‰èåþë¥ÎD¡,1èæþë¦ìOÙÐL¤€ûo"—­tкî5¿‘ÎùÍÒ‚ G ­…àÆáPë…ŒJ@€ã¨äÇ ß];w|â`#þP 2ü݇“~SGŒy|ã®×¾:,,4†ÝÑ_ž‡k:í7L²mTãœõuêt”#@€ÀJçÖÏ\)YCx|‡÷ {uH ¢K²¼³V—T9î©#‘@`à~¶ñù:ðåЖîÕ–42Œ•€{u¬–Ã`´èæ^-ñspKJ.ÐÍýê^ørè€@KnîÕf#îÙ¦„+á tsϺW‡¿>z$Ðèæ^mÖq AŽc²N)¸ñðZŒÉp ƒŒ€ÇÁ¸NL«ï\ØöõCûîI'9æ¿b|p8ÿ+æáù»nÜ6УÛËærà»ßb¸§¬\‘cö¶NÊ ªÌ,¯Ó LµK€Àl<}þüÙ˜¨YSïïðt¯ŽéÖT d±ÿˆµ8ÕF&G`:ýl-ÆîóuWИfEÀ½:++mž“.à^ô4þYèæ^-Lü<+;Ã<ÇQ ›ûÕ½:Ž+hL³"ÐͽÚ4qÏ6%\ _ ›{Ö½:üõÑ#¦@7÷j³ŽëärñZ nñèž O ÿo4o¬zÀ-·Ü²o× Û}_¾ÿÉyŒºyx ]Åð­<„óoÞqáî…m· ¤.½õÖ[į/­ã‡wïÜöùÒr.0«ë4`VÍ 0åøŒ)Ÿ¡éoNïÁNË÷lŽÀd Tr‚c-ý σ¡ tó™ÙMÙ¡NBgf@ ›û¯›²3@gІ*ÐÍý×MÙ¡NBgf@ Ûû¯Ûò3@hІ&ÐÍý×MÙ¡M@GfD —û¯—:3Âiš.ÐÍý×MÙ\fLÀý7ý .ÈqDk,¸qDðº%@€£à8÷±ìõÖ……o¥@¾} ŸšÇð›éܦ£ }6ÅLÆ÷ÆFÜøå/þïïÙ}õ¿·wïÞÅ>Û¬°z~MYcéL’Ò2emT™?›ëT¥ ¶˜%çœð´pvíôYš²¹âÞ+îÁNîÕN””!P­@-Ïún0fNpìQºèæ³µhÖçk¸Š¨PÀ½Z!¦¦ PÀ½:@\M¨P Û{µèÚÏÁ.€¦t!Ðíýê^íWQ t{¯6»vÏ6%\ W Û{Ö½:ÜõѦ@·÷j³žëä ròš n2¸î @€ÀèúÿFãèç`xùË/?ýÄ5s/ÌêáÅé««?¾¿ú¸²Ç…,œº¼ÛÃÅ{R™O¥/º~,ËÃÇòC᣻wo¿{yY¯«°NÕzjÀ¬ ¼ÿ¶ÏOutÂ{ö,üþý>kËj¾F.ð¯Oyeø‰Õ?Ôñ8Ü«S)H ³þþ{ÂÓ>öújëî'}=|á‡?ÕW* й@·Ÿ­EË>_;÷U’@UîÕª$µC`°îÕÁúj@U½Ü«Eß~®j´C s^îW÷jç¾J¨J —{µÙ·{¶)áJ`x½Ü³îÕá­ž4z¹W›u‡u}ÑEø®x…ØÏ¿â-oyx}…Mjj¹€àÆå"^ @€™˜›‰YšdÏïz×õ÷¦ÊETÈ1‘!^xáüŸøôÇ.ÖÃ)spÿõ÷ݶ°°¿çŽTìKÀ:õŧ2S.ð’UÏ Øÿÿ…OúÒ”ÏÔôŒÀ³çŸŠ{¯›‡{µ-e ô/Up‚ãéõÇô?- Б@/Ÿ­EÃ>_;âUˆ@eîÕÊ(5D` îÕòjœ@e½Þ«Åü\Ù2hˆ@G½Þ¯îÕŽx"P™@¯÷jsîÙ¦„+áôzϺW‡³>z!Ðèõ^mÖwLâ$Çä9hý7V³ @`üüVŽñ_##$@€˜ö ¼ûó‡ÂkïysøfãžZjœΩŸþë¯ §ÔNêšÃ½Ú5™ z¸àKß÷ñ'õ\¿¨øରû¾Óçk_Š*(èç³µhÝçk¹±ªp¯V¡¨ ƒp¯ÞXªè÷^-Æàçà*VBÊú½_Ý«åÆJ¨B ß{µ9÷lS•À`ú½gÝ«ƒ]­h ô{¯6ÛÆÕ ŽƒQv’ã\7U“ @`rj“3T#%@€ô&PZýÎi?s8ðª·Ô"@ âð‹{­—àÆ¢}÷j'ÊÊè_ ¸W/^uQß ­ªŸàóµoE h/ÐïgkѺÏ×öÆr T!à^­BQ/à^¼±T!PŽZŒÃÏÁU¬†6´¨â~u¯¶7–K  *îÕæ8ܳM Wƒ¨âžu¯n}´L )PŽÚlËurŠ“C ț܌ÙÈ7ŽÙ‚¾@}ø]ê‘'-¯~ÝÕã4žAåÔäøã«~0|þÐ×Â7ó{Õv ̬À³çŸ~÷ôΪŸÞ—{µ/>• ” 4ïÕï~s1|éŽþN6>ïÜSÃ…ß{¾Ï×Ruô&м_ûýl-z÷ùÚÛ¨E ÷j'Jʽ€{uôk`:¨ò^-úósp'êÊèM ÊûÕ½ÚÛ¨E *ïÕfîÙ¦„+ꪼgݫկ 4ª¼W›múºýÏÞü†A÷1«íõSïÜ{þ³~²bøÑY5¨dÞ‚+aÔ˜tlÒ'`ü @€@ï¿íó±¿&«vŒ1¼÷á¿ ;ü@¸K ãd-žÑŽ¥ÀÙµÓÕyaxɪç†,«î¯îÕ±\nƒš`å÷ê_ü+á/>ü¥¾fôüçœ^ô‚§nÃ=ۥʎX~¿“Ùç ÷jŸ€ªX"à^]‚á)1p¯Žñâ%ƒ¼W‹nü¼ÛS} ò~u¯ö¹8ªX"0È{µÙ{¶)áJ AÞ³îÕþ×G šƒ¼W›} êú¢‹.¨î˃䄷ûü+Þò†‡×Oø4F3|Á£q×+C?´Žá¢¦À¬86m1Ÿ8ø…pÛτϺ#ÜÙøNؤ_¨5SñžMW d! k²ùõ3ÃÓçÏøŒðœžêY­£ú½r¯ö¢¦Î¬ tr¯þÏ}9|ð#ÿ§/ªüÜ'†^ô”cÚpÏÃáRNî×ÒFz(à^íM•™p¯Îôò›ü ¸W'h± u¦Fu¯è~žé­gò=Œê~u¯ö°XªÌ´À¨îÕ&º{¶)áJ 3QݳîÕÎÖG)MQÝ«Íþ«¾ p¬Ztåö9®ìÒ6Upc[™ @`Ö8ÎÚŠ›/X&0«ŽË¼$@€3%ð?Rpã_¦ Ç~?úCO ?ö#Oî§ u  @€ @€ @€ @`€ˆ»¬iAŽË@Ú½ÜØNG˜IÁµ2“œ&M€ @€Æ_ Ïû?±¸^óO ã¿ÒFH€ @€ @€ @€ÃøÐu¯ù÷†Ñ×D÷!¸q¢—Ïà  @€À |qP²Ú%@€ @€c*PE€c­–éì ‹ @€ @€ @€ _ r̲ì ÃïyBzÜ8! e˜ @`ø¾8|s= @€±xÿmŸïÿ§±š‘Á @€e‡áà¡F(éOqÍó|ÉóGÒ)푼¥åŠçy8ç±'‡³Î<©¬+ù @€ @€ @€ 0"]tïŠÀþ—ÿÉÕ1ÆßA×ãÛ¥àÆñ]##@€c 07c0 @€ @`ˆósõPüñ @€ @€ @€ @€ªøëëþêä9qÜXíÓ˜BÚÎÉ” @€ @€ @€ @€ @€ @€ÀHŠ Ç,ËÞ0’ÎÇ©SÁã´ÆB€ÆV@€ãØ. @€ @€ @€ @€ @€ @€À$ Ì|£àÆIܶÆL€F" Àq$ì:%@€ @€ @€ @€ @€ @€i˜Ù GÁÓ¼­ÍT. À±rR  @€ @€ @€ @€ @€ @€f.ÈQp£mO€t) À±K0Å  @€ @€ @€ @€ @€ @€@§3ä(¸±Ó-¡ °D@€ã O  @€ @€ @€ @€ @€ @€@ÕÓä˜eÙ>tÝk~£j7í @€Ó/ Àqúר  @€ @€ @€ @€ @€ @`ÄÓäX7s1¯î  @€ È&t܆M€ @€ P ~óî !;m€]hš*b¸#;÷žXESÚ @€ @€ @€+^—g@IDAT @`8/¸üO®Ž1NÅi‡‚‡³gôB€¦YÀ ŽÓ¼ºæF€ @€zˆa¾×ªê @€ @€ @€ @€­¦å$GÁ­×X й€Çέ”$@€ @€³$07K“5W @€ @€ @€ 0LIrÜ8ÌÝ¢/ 0ݧ{}ÍŽ @€ô&9Á±78µ @€ @€ @€ @€@g“ä(¸±³õUŠèL@€cgNJ @€ @€™ˆ!déÿý›Á̬¸‰ @€ @€ @€ @€À¨&-ÈQpã¨vŠ~  @€Àô ø²âô®­™ @€ @€Þ>}õ|oÕ"@€ @€ @€ @€º˜” GÁÝ®¬ò @€@';QR† @€Ì’À™' pœ¥õ6W @€ @€ @€¹À¸9 nù1 0µ§viMŒ @€ô(ðPc®Çšª @€ @€ @€ @€= Œk£àÆT5 @ #Ž1)D€ @€fH`õœgh¹M• @€ @€ @€ÆG`Ü‚7ŽÏÞ0 0­Nd˜Ö•5/ @€L‘Àó6ýÑOÇž”þÑü¢eCŒ‹iz‡Š?)íPÌãÁÅCY¬È§…Yˆcú“Åp ÖjcŒ§²1¤kã@-ÅúÃõÆâ÷7j©ÜÃÙþCs'<ܘË÷}ú[ßz8콺ègöëóaÕìMÛŒ  @€ @€ @€ @€À8AŽ/¸üOÒ×#âoŒr<‚G©¯o 0;ÙìLÕL  @€ @`Rž¿ùÿG l\;ŠñÇ%y´ß5™þ"}8%=I±”éuñ*=/Ê¥ì¼HÏBV¼Nטâ)³<_6R¹ô<4RÅôŠk£Ô<œ–Ö,Ò¥ÖS{ÅõPjçPâ¡ZzÚIÅ5¬…쓼q÷Gn|í-©Nåøõÿz^¨×a  ЕÀ·î áOßÂß}µuµFžüÊÝás-K¤ ï<æ¯ÿÈÎ~kË22 @€ @€ @€Käxõ¨‚7Žå–0( 0•NpœÊe5) @€L™@–ìï¯YŠ2\¢ùÈóô¿éÿë‡Óä¦réñhác_?RáHÑG›[ÖøÒöš½¦`ÆG“7óÈëZ½þ@Ê8åÑÆ*|¶˜Í™]…jŠn>šÂoý›ãÞ9–7sB*ñ–'}2kYíÏ»ì&Mê“<†?ûÈ ¿ðs“:þ¥ãNû›â`iZËç1<cöºÛv¾f[Ë22 @€ @€ @€¦R`T'9 nœÊídR @`ljc;2#@€ @€MGàØÂØ]ÓiŽS=ç=0\ ¾ÀáÇ¥Ã`'ýO8iøzê1ËÓñzdÙ)é0à“4Í @€ @€ @€Œ¹@äXk˜‚‡%­ @ ) À±)áJ€ @€ã+…ùñ܈F–ÅÅõ\ç=0[  @ Ÿƒ½¨©C€ @€ @€˜a9 nœš-c" @`¢8NÔr, @€fT ' ._úàØÈ”.÷öš£ˆ5Ÿƒ£ô×7 @€ @€ @` ä(¸q Ù @€ÀŒ pœÑ…7m @€L”€“«Ž_®,:>±¢”Z.¦"JÍ @ ½/W© @€ @€ @€Àd *ÈQpãdï £'@€“. ÀqÒWÐø  @€ 0 1ر|c6¸s'8.çöš#è?R~ @€ @€ @€ÆI ê GÁã´ºÆB€fSÀDgsÝÍš @€ˆwþÑkÓ”_“þ§þ-†,ÇÅây,‚䎤¥k4Wœ §yÞLË©S+êiñ‘rE;EZ¾´T¿–ú)ÒŠ²µtÍSýZQ§–Ò›i‹‹¡žò)m.å׃éõ‰ûÃúbX½x(WËçç²¹|.ÖÂ|–Ïͽÿ ÿØÚ,6BhäéÏ‘ëb#{äuJ[ûC¨ÕŽL"ËMói–2>/²Ó‹¢\3­Y÷pÖ‘Ä¥iõú#mYÍôÚ‘¸ÅæßN‹"«‹b'¹¦§'‡pÖé1Ü}_³£"ßãHê` jõæŠ ¦}­ @€@W™“Œ»òR˜ @€ @€ 0 ý9 nœ…]bŽ @`2|aq2ÖÉ(  @€ ÐV`ͪÕ_O±†kŠB«ŽþMïHaº¼aÇ‘àÂ6­¼ÿ?İúÄ6F˜Ufz+ð½çdOþÐôñGNÜL§e'q6OæÌÒ šqÅ´cOð<\®8U³8‘3ÕG®1^p8xõØ.½"@€ÀHžúøð´ßùÇ¿0ÒAtÙù'¾˜ŸñÁOfO»ûÞì̃l®‘Çz#]?sG8·›Ï¶,ĺìZq @€ @€ @€è5ÈQpã lS$@€$pôk¯4fC%@€ @€<ôp:4qL‹S'=ŽXuB\“‚/<œºôpËÃÏÓÿtœ¶¬lÑàÒ<íÖ+ŒLàÉç„ç¦÷¶çŽl=tüÅoÔž÷†ÜuK1ËŽyÜuU @€ @€ @€¦\ Û GÁS¾!LL À‘ã<&pä†L€ @€ daÕ£/z{¶ï@oõ†Q«áÇã˜çÊå<®Ž®@UïÕYÌü¢ºá.Þ @€ @€ @€%P9‹eƒÜX&$Ÿ…€ÇQ¨ë“ @€ ¤³¡b¿M<Ôo ƒ«¿èÇãpëþFœ‰Œ›@UïÕéCÞ Žã¶¸ÆC€ @€ @€3#AŽW·–àÆV2Ò  @€Q ø:ä¨W@ÿ @€¨B ïðÆ.V1Á´áÇã]ëNp<E ÆL º÷êèÇ1[[Ã!@€ @€ @€Œ£@ r|C d¼zùØ7.ñš'Žã´ÆB€ @€žbß!ŽãàèÇã7Æœ¿Ñ"…c&PÕ ŽY&ÀqÌ–Öp @€ @€ @€c+°<ÈQpãØ.• @€Güæo[ @€S ³,ÏúœÇ¡1=Á±ÝŒ±ßÙõ‰3†Õ«;l 'gH˜¹ŠNÛMŸƒóSBb @€ @€ @€C(‚_pùŸ=eéùÕCèR @€ž8öL§" @€ÆG ‹!†>cÇõÇ"ÀñÕ/á;÷‡ð…¯‡ÐÈCXl„×ô§¸iy*W<_úº¨{8ípä#e ˜LA“iù–>/Š´c}¢ÛX¥¯œàX)§Æ 0êNpÌü;î@VH£ @€ @€ @€é(‚§wvfF€L“€/ÆLÓjš  @€³+­›×¥Æ¸8Öj!üÔ‹»œLEÅcŠ€Üw „Âæà¡¤?ÅI—ŵ™v0[:òºÈ+Ò‹Ì¢ü¡"ïÈëây‘^ü9œ–‚1‹¼âšÒŠ ÍF‘äÚ|}øZ¤¥?E°f„ù}ç…ðŒó+š¤f @``U8¦ˆ|ÿŽ;°UÒ0 @€ @€ @€ @€À(|1f”úú&@€ @€@E)/Ïú%˜J‡cÞÉ™§ŽùWÞ\}…Ä’Rpû|ÕT!@€ @€ @€ @€ 0öÇ~‰  @€å)/í×ßC€c~j @ jSHÛ÷<¶êVµ7LªNpÌBæßq‡¹pú"@€ @€ @€ @€š@mh=éˆ @€&NvJÿßßC€c~j @€åU8†, p\Žë5 @€ @€ @€ @€ÀTpœŠe4  @€f^ :Áqæ÷;¹z5CJ¿Å çéA€ @€ @€ @€ @`ú8Nßšš @€3)ó~§}¨Ño ê @€Kª:Á1‹Á ŽKa='@€ @€ @€ @€˜ŽS³”&B€ @€À, dYÖwxâbß-Ìò ˜;8^ ^Ñ Ž!dç•B€ @€ @€ @€ 0§`M @€1Tp‚ã"G @ J¹ªþõ5‹«\m @€ @€ @€ @€Œ@U_±›  @€fQ ýŸàxÈ Ž³¸uÌ™(PÙ Ž1Ìp˜š&@€ @€ @€ @€ 02Ž#£×1 @€ªˆ±ÿ8V· Z"@€I ^Õ¿¾fÁ Žv @€ @€ @€ @€ÀT Tõ›©Ä1) @€LŽ@ÿ'8 pœœÕ6R˜ ¹zE㌙NJ(5C€ @€ @€ @€ 0^¾3^ëa4 @`èï¿íóqèê*¸é½ŸŸúÜ]}µÛXýÌðN_m¨L€<*pïw÷§·=šÐóÓNY~é§žwAøEwëO @€ @€ 0Ó/ºè‚l¦Lž @`"œà8Ëd @€h/PËúÿoy.æ½½²\ Ð@­æó¹;1¥  @€ @€ @€ @€fM@€ã¬­¸ù @€ 0•(¦rYMŠ&\Àçó„/ á @€ @€ @€ @€ \@€ãÀ‰u@€ @€Á Ô«8!*:Áqð+¥˜%޳´ÚæJ€ @€ @€ @€ Ћ€Ç^ÔÔ!@€ @€À˜ T@ÑÈÇlV†C€&[ ŠÏçFîóy²wÑ @€ @€ @€ @€´àØNG @€&D ŠŠÜ Ž²Ú†I€“"P˲¾‡šçNXîQ @€ @€ @€ @€c+ Àql—ÆÀ @€ й@­Öÿ_ïPtî­$èD ^Åç³_@Ð µ2 @€ @€ @€ @€*Ðÿ7`'tâ†M€ @€i¨äG'DMÓ–0J>ŸNpƒ¥4 @€ @€ @€ @€ p¬f  @€ 0LZ–õÝ]C€c߆ @€Kªp,£S—²zN€ @€ @€ @€ 0E§h1M… @€Ù¨"€"à8»ÈÌ  @``>£F«a @€ @€ @€ @€)à8‹h  @€¨$xÂéP6¨\À)Ë•“j @€ @€ @€ @`Š8NÑbš  @€³+P¯e}OÞ Ž}j€'PÉ/!pÊòq® @€ @€ @€ @€¦C@€ãt¬£Y @€ 0ãÕOä3®hú @ zJ~ S–«_- @€ @€ @€ @€Œ…€Ç±Xƒ @€ @€@µÌ Žý ªM€#PÍ/!ˆƒœV  @€ @€ @€ @€ŒX`nÄýU÷6myuжTŒ_Ùuãö7µ-#“Ö½jëeµ,¾`ÅÌ#‰óð;ï\Øöõve&4¯öŠW<³–ÕžŸeÙ3RüÁééð…3BºfÅ5d4¯ï¤¯+~'Ëâ=1fßÎcødþð¿¼ùæ¾1¡s6l @`ˆ‚'†ˆ­+ Ð…€Ïè.°%@€ @€ @€ @€˜9ŽÇ,yö)èèÒc’–¿ÈÂÇS’Çå.^è@ ^‹?–ùþY»¢óµÆŸ¦ü©p|éK¯8eÕɵŸJ/ËBö#)˜ñÔ¥s?zÈҒÖyšJ§'õâϚå›¶|1>îy~Óî…ïYÚ†ç @€¦@­Vk>íùš§ßÀàA€T+PÉgtžW;(­ @€ @€ @€ @€Žc²†A€Àô¬Û´õ)Yˆ¯KaŠ?“Oî{fYöÔëøÔ¬^u:iöoó˜ýîž¸ïÆ°°PœúèA€8,PÅéP†GÛ‰T-PÅgtžûŒ®z]´G€ @€À䌇ÂG|6|4ýùü¡¯…»òûÂþx`r&`¤c'°:;1œ];-\0ÿ„ðÃ'>=üȉÏ'dÕ…ÊÞ»¥Ÿø kïPöïÄo—±›À°ö¯½;vK?ñÖÞx( @€ Ч@õÿ:×ç€T'@€À¤ ¬]»vÕgŸÿ¦ÔøÚtReÿG(­‘Nƒ|V:Ùñú aÍo囯ø¹=7\÷ŠI"@ FÌÃ'~!Üvà3᳇îw6¾ö¥ÿÓÿyh ¤`õ°&ýGîÇ×Ï ß?~¸(ýîçœð´tÊnµoóöcSܵ@Ù~¬$xâÈ Žöd»•×(Û“ÍrU\íÉ*§» ûqº×wÒf·|? gö=…F‹Gï}ÓN}Ë÷ã þNÓ„´'›®­†¹'íÇV« ½)0ÌýØìÓ¾lJ¸¶æ¾´[­‚ô¦À0÷c³Ïå×ÅØ»÷ýu¸ñ¡½á¾øÐòl¯ ô,PÈ~¹q×á?ï{øöpZvRxÕIkÃú5/sY½çv›íݦ„kÕƒÞ»ÅxíߪWM{MAï_{·)íZµÀ ÷nÕãÕ @€IH‡‚y46lÚúç)0éÒæë•¯ñã7ݰí¹+çI%@ À¥›·üi üûgíÊä±ñìÝ;wüm»2㘷nãåÏ®×æ¯Kq0ÏâøÒ×ãÕ»nØö[©OYC„Ÿ¶®ÞÛçgjÿļóÞ‡ÿ&ìxðé7üÞ;mËi>C8»vz¸ò1/ /YõÜÏûêÑ~ì‹Oå$°t?þï/ÜnzϧûryæÓÎ '¯Ýç=²/ÅÙ®¼tOöûYHzŸœíýÔïìíÇ~Õ¯Jào}~XsÿÉ}5÷ WüP8ûÌÇmÃûãQ Oz¨úý±‚=ÙÃB¨rT ê=i?¥õ¤ª÷csöeSµª÷¥ýØË*¨Ó¨z?6Û]ézwãÞðúû® _ZüÆJÙÒ Dà)s¿yÚÖpVýôžÛ·w{¦S±*önѽýÛÇ"¨Ú³@û×Þí™_Å>ªØ»}tßUÕ]tA_néª7…  @€ô&à‡Ö%n—`xJ`Óà¸~ÓÖŸ®…ø–årÂØJ›L‘iïy¨±ïŠ÷.,ÜSZX+ÌR€ãýùCá÷íŸ<ô¥$$èNàÙóO ¿~Ú•á”ÚIÝUî‘ Iï}Qª¼D Š÷Ç¢9{r ª§} T±'íǾ–@å%UìÇfsöeSµ_*ö¥ýØï*¨ß¨b?6ÛZéú¹C_ ¿vï[Ú¸Ž´ §9þÖé?¾oþ¼®û²w»&S¡B~ön1 û·ÂÅÐT×ýì_{·kn*ègïV8ŒÒ¦8–)@€ 0µ1ƒ! @`b6lÜòÊZþû¨‚ ¸©þ'ÕÖ¼ë¢WO,¤‚ÀWﯽçÍ‚‡`=+]²Åž*öV·û±[1åËŠýøßö½«¬XiþwûJË(@ ~Þ#‹ö½Ov¢¬L§öc§RÊ J féWõùÈóGÚðþØ'¤êÇôûþX4fOCêEŸýîIû±ÏPý~÷c³1û²)áZ…@¿ûÒ~¬b´Ñèw?6ÛYéZœÀ$¸q%iø/>tx{±›‡½Û–²ƒèuïc±±"ÚìF ×ýkïv£¬ì zÝ»ƒ‹6  @€Lº€ÇI_Aã'@`dë6_ñ¬®Kèò½4ÞŸ¾–ø®ã‰y¼:Äøºôç߯Þc¾-¥w}´\–…9·¶f{÷cŸŽ U ø­Ôÿþ¾·†o6t:Tøè¬ØSÅÞ*öX§û±S)庸7>Ðm•ãÊg±ËkŽkAGzy,j{Ÿ|ÔгêìÇê,µÔƒ@­šGï=Ø«R*ÐëûcѰ=YÊ«@½îIû±lUJzÝ͆í˦„k•½îKû±ÊUÐVS ×ýج¿Òu16Âëï»ÖÉ+áHª@¬PìÅbOvò°w;QRfÝîÝbLöï0VFt»íÝNT•†@·{wcÒ @€IðíÕI\5c&@`äë6m}J=Ö÷tzrc Z\ !¾5oÄç}ç›wœ¹ë†kr×Îm¯Ûuã¶7Ü´sÛIþã®×þʮ۷¦ô§Æƒ‡žóüWÒW ïêt²)ÈñÒK_µåw;-¯YH÷_xã};7ÎÊ‚`žÅ—8Š=Vìµ²‡ýX&$¿*N‡Êòt6´ ºy,ºõ>Y!¾¦Ž°#‘0$<Ëûî©ÑÈý½¦oE ´èöý±hÇgv+MéUt»'íÇ*ÔµÑJ ÛýØlǾlJ¸B Û}i?b´Ùèv?6뵺îÞ÷×áK‹ßh•-ÀPнXìÉNön'JÊ K ›½[ŒÉþÖÊè§nö¯½Û‰¨2ÃèfïkLú!@€ 0i'mÅŒ—±¨eñRpãc:LŠw¹éP/¸é†m?»{aÛm{÷îMÁŽí»v]ÿ•]7nÓ}ßCüÕôŸKën±–ýòú[.jߺ\³%ðÞ‡ÿ&|òPףΒÙö-Pì±b¯•=ìÇ2!ùýÄZÿÁYàØÏ¨»²@§ï‘Emï“+J­NÀ~¬ÎRK Ä Npüèßõ÷šÎÉ•ìA ›÷Ç¢yŸÙ= «Ò•@7{Ò~ìŠVáºÙÍæí˦„ë ºÙ—öã VA»Mnöc³ÎJ׃ñP¸ñ¡½+eI#02bO,ùOõöîÈ–GÇm:Ù»Euû· ¢¬‘ t²íÝ‘-ŽÛt²wÛT—E€ @`æ8Îü@€@·éôÆg!»¤ƒzyq c:™ñ•·,lÿûÊWä¶……ý»nØö;!f/ìô4Ǭ~ÿ¸†$˜QFÌÃŽ?0£³7ía {­Øs­öc+éU 8Á±*Ií B ì=²èÓûä äµ¹’€ý¸’Š´ då'}—õÿû>YVD>¾:y,:ñ™Ý7µ:èdOÚb*Ö·@'û±Ù‰}Ù”p´@'ûÒ~ô*h¿)ÐÉ~l–mu½íÀgÃ}ñ¡VÙÒ ŒD Ø“9ð™¶}Û»mydŽH “½[ ÍþÑé¶­@'û×ÞmK(sDìÝ M· @€&B@€ãD,“A 0.k×®«…øŸ;O[ŠS;)[Vf××þÏÅFãGCˆ÷–•MÁ—­ßtÕÆ²rò Ì‚À'~!Ü•—Þ6³@aŽC(öZ±çZ=ìÇV2Ò«¨ât('8VµÚY.PöY”÷>¹\ÍëA ؃’Õn+¼‚¿Ûx°UóÒ T&ÐÉûcÑ™ÏìÊÈ5T"ÐÉž´KeW&ÐÉ~lvf_6%\-Ðɾ´½ Úo t²›e[]?–=Œ£ÀGKö¦½;Ž«fL…@ÙÞ-ÊØ¿…‚Ç8 ”í_{wW͘ ²½K‰ @€Ö[ÛÈ!@€ÀqgžuÞK³,{úqËòø{»wî¸nyr?¯o^Øñ¹¼×§6•µ“‚µ¬Œ|³ p[ÉoTs®@»=×.o¸£ÔÛ´ Tr‚côWÄiÝã0¯²÷Á²üq˜ƒ1L@Ù~+ËŸ 3†@ÌZŸòÝiÿ™ÏèN©”ëS “÷¿NÊô9 Õ (ÛoeùGò„@î·NËU0$MH§.•*Ö>!*ÊöcY_Ÿ?ôµ²"ò ŒD lo–ådÐ:%:Ù›”I`e{³,cÖ'BÀÞ´ @€ô.àÛ«½Û©I€À Ä,Û\:í>ySÜÿ—–ë¡Àî…ícøÃ²ª)óYë6o~RY9ù¦]ೇî˜ö)šß˜ ´ÛsíòÆl†3¡±VAðDžMèì {ÊÞËò'aŽÆ89Wòs¢ý89k9#ÍbßÃÌ|F÷m¨ÎÊÞ‹V¼Gvf©T5e{Ò~¬ÆY+ ”íÇf+öeSÂueûÒ~Æ*è£)P¶›åZ]ïÊïk•%ÀHÊöfYþH¯ó™èdovRf¦M~de{³,d×ñÌ Ø›3¿ @€ô! À±I¸³ñIŸ‚ñO˜@»=×.o¦i¸c*PÍ ŽÇty§bXeïƒeùS`c#ð’ŸíDZYª©H^« À1úŒžŠÍ0“({,¦à=rrІX¶'íÇ)Zì ˜JÙ~lNÁ¾lJ¸C l_ÚÃX}4Êöc³\«ëþx U–t#(Û›eù#¼ÎgZ “½ÙI™™F4ù‘ ”íͲü‘ \Ç3/`oÎü@€ Ї€Ç>ðT%@`¶Î «_²ì1mgÃgvïܾ»m™>3÷ìÙs_ÈÂŽ²fÒ)Ž—•‘O`ÚöùáÓ¾Äc7¿v{®]ÞØMÄ€&R V<át¨‰\ûItÙû`Yþ¤ÌÓ8'C l¿•åOÆ,r\|FËJG'¼ÿuR¦“¾”!Љ@Ù~+Ëï¤et*Ðé~ë´\§ý*G @Ù~+Ëo×¶<Ý ØoÝŠ)O€ @€ @€MŽM W” Ô²•‰Y¾+•éÿh†’Žañæ’"!A>»´Œ¦\ þvœrAÓëV Ýžk—×m?ÊXIÀ Ž+©H'²÷Á²üqš‹±L¾@Ù~+ËŸ|3ª@Öÿ?dÑ?ãuÍf¸³NÞÿ:)3Ħ^±@Ù~+˯x8š›qN÷[§åfœÓô+(Ûoeù C3 Øo6 @€ @€ô*à›1½Ê©G€Àì dñ¥“γ––© ÀW¿ðwÿ35“·m*†3×®];×¶ŒL @`jœ55Ki" 0ey­ý_ß;™næ”åN˜”!@€ @€ @€ @€˜@ޏhc4älŒÆb(­¬Sk›®r²˜µ pŒ1.Þs÷·uÕh…o¿ýöCé· ~»]õ,=N9圳ڕ‘G€L@:IºïÉ8ªoB  @€ãœ²|‰ @€ @€ @€ @€Gœìu”“Ù%—^uáü\xY µ BÎÊB<;Ä,MÅÇÅ-¦zwgY¼+=¿+ÄðÍå·…ƒñÝ»ví¸³E›’«°NÕ›Óâ%—\vvÚÿg“¸ìE²oïÝ»÷áeÉ{™.ï*îÉvÌ­ž;'å£]y @€ÀtT<át¨éØ fA€ã%žÇãǾ 5@€ @€ @€ @€ 0¦ÇtaF9¬µk×Î~öy—¤`­Ÿ Yö²tüßÙÅx=0=;ü"•¡ž²ÎK çIJ/k?çc¼tóÖO¤æÞ™NµÛ¹{çöÏmTñØðª­¿–*·¶k+[~p×Îk¾]™ªóÖoÚòŠt$êïµm7ËÞ}Ó ×þbÛ2fŽû:u8É)6?w^ÚÏw§ ÞÇ'#¶ø·Z¤$9gÍá»°Më‹‹µþ¿EÙ¦}Y @€ÀøÄ >öÓ/PŸ  ˜ŸÑS²¦A€ @€ @€ @€ 0ŽaÜF×oºjC ˜úRüÖýÌâHØ…©­ Óó_ß°ië5‹û¼þæ›oèû$¹<Æ÷ÕkÙÛŽ/†óÖ­[÷+{ö칯m¹ 3Spã«S@èSÛ5™ÆþÁvùæMÂ:u:—I)wóMÛ?žÆzö…^8ÿ„§=íœz¬O:Õôñµ=>–ôøø˜^§‡úÈÎ-뮞ǯ—•‘O€L‡@%'8 pœŽÍ` 0Vy–÷=ž,¦yò @€ @€ @€ @€ 0…§pQ{™Ò+6_ù¼¹Xû)@ïy½Ô/©SKçÝýìÜê/Ûðª-¿OÜÿ¦½ –Ôi™½gaÛÇ.Ý´õ3éøÈg´*”ú;±vâ)¯Lùÿ½U™*Óׯ¿ê¬tLÞO´;ï¦8ýïŽ/~úÝýô;IëÔÏ<ǹîí·ß~(ýùjcñgdŸÜ¸å{Ò>_S2€C»woê©’%ã‘M€ P ’Ó¡rÁ\"M @€À¬ TqÊrÞî_fÖ¼  @€ @€ @€ @€¦AÀ·W§aûœC :ü͹PÿЀ‚Ž®ÆÊjÙ¯ŸQ[ý± .âÑŒž¤ït]SZ- W––©¨@vB¶)TÙ6`8 ñº"8®×.'qz«zå'ÔÂKËJž™Ê¤Ø[ @`b%§C ž˜…½bŽ 0\§,×[o @€ @€ @€ @€“% Àq²Ö«ÒÑ^xá…ó6]umtXiÃ%¥@À§góóÙ°éÊç”m™ÝØwp{Œq±e"#†òŠÍ›Ïk[¦¢Ì¼¹¥¬©ÅF^”¹B#“¼N+LGReÙËË›ŠŸ)/£ 0-•O8jZ¶ƒy @€À äNp£Õ0 @€ @€ @€ @€qà8n+2¤ñüøÆ§>ñiÏ|O–ÕJó2¤,œ“…Ú_^úª«^ÖKû7ßüö»BÞÝ®n ¤Ìjá„ËÛ•©"oý¦«žBöJÚºý ×}ª¤ÌqÙ“¾NÇMHB%6\ñ„t"h齓…ì•t¨ @`2ÒßîbŸ‡7§Ÿœÿ<«m” 0IYì{´YîŸqûFÔ @€ @€ @€ @€ÀX øfÌX.Ë`õÌO8¥¶æýéËËÿ´›žÒW±¾‘¾íüÖtrâkc#n\ Òkäáò˜ç¿c¾-Æp_ÇmfÙcb–½cýÆ-u\gIÁÔ×[—¼\ñiúþØ•+fT˜˜Å¬´<ïþôÆiY§ ©5Õ˜Ÿû×!ËNh¾\éšîÓxhÿŽ+áH#@€S,àÇ)^\S#@€‰ˆUœàÓ/!ð @€ @€ @€ @€ 0…sS8'S*¸ ¶úMYž[Rìhv $ܳ¸˜ÿöÍ7mÿøÑÄO.¼ðÂùóžú̯‡ìߦÃ_Ö¶(v49²8êñí/ùå?ø®w]ïÑŒžÜñÅO¿;Byw Ô<«UñÔþ¬ßtå³vïÜñ·­Êô™žºhD™üì‹¿½Û~¦eº·òí^¶qã9©Äϵ/•ÎÍÂÇn¾ù†”ìA€Ì’ÀáŠF3N¿À£ï“ ûÚ @`ºò,ï{BÅç³ @€ @€ @€ @€ià8«ÚfN6n½$>ýb›"fÅð©v×ÎëþæÑÄöÏn¿ýöCéÏ­©Ô­ë7mÝ’Žý½èø¸vµR€âOJNp@1}ÃŒ @`¤•œà˜ pé"êœ @€ @€ @€ @``)þÌcV^±yóyY-\ÓÑ|cÜõ|ßóº n\Þîî×n‹‡ÏŠ1~vyÞò×)ÈqýúW]õÚåéå¯cé|Ò&¿<µ3½^«g[ÊÆ˜7òÒ1.mc:×ié =ïU`ý¦«6¤{å•eõcˆ_yøoÿIY9ù @€Àô ÄZ'DåùÑyú°Íˆt*ÅNK¶,çÇ–42 @€ @€ @€ @€&\À7W'|»~=žø‡é4Å3ÊêÄþô¦Û^¹waáÁ²²eù»ví¸sqÿ¡ !~º¬l­–ýÎK_zÅ)eå–æïÞ¹ý§ñ¶?a2˾'´¸vi½*ž_´qãêÔNû`³¿¾;x7ýMã:u3eWذáÊs³¬öG+ç›cþ«·ÞzëcS½"@€fAÀ ޳°ÊæH€“&û“¶dÆK€ @€ @€ @€ 0DŽCÄeW—^zùÓ²,\R6†tòÛï¹ë+ÅIŠýÿjù#Ý|óÛïÊÄš¼«}ÿÙ)kN©¿º}™ãs³P~*eVË®<¾f)çÖW'Ϭm@f:?g{XXhtÚÓ4¯S§Ê/°víÚUa¾þŽ´×Ï>>÷Ø”ðû‰Ý;w\lªW @€À¬Tà˜§Ÿ:< @€Ê*ù|Ž>Ÿ+[  @€ @€ @€ @€Œ•€Ç±ZŽÁ &ÎÍýRj½íz§Äox`ñ•{÷î]¬z$»wo¿;˯)k7}Wë_„ëeå–æ7|÷íéõÃKÓ–?OÁ—[žÑ×ë쪪_ÓA™£E¦yŽNÒ“®Î<çü·¦=üܲŠ1=BŒ¿œÊU \Ö§| @`Ìjýÿ  ³E5˜t|®žx Xýpxx;°ÿ1…}'?:õðài÷‡ÎønxÂ9§„'>þ´ð½O8=<åü3ÂÓžtfø¾'?6<ý) ß:ÿÎðÀ™÷M:ƒñ @€ @€ @€ @€XQ`nÅT‰S%ð’ÏH' þTÙ¤bž_ý®w]oY¹^óoºqÇžK7mý@È [µ‘…ì‰ë²5—î áÆVe–§ïÙ³ç¾K7oÙæxÙò¼G_g§œ~öy§× ¦õþlýú«ÎJd/ )ê¬å#ÆïÞ¹íó-ó—eLû:-›®— ¬ß´õ ©h›½ýhC)œáww߸í<šâ 0ky&ÀqÖÖÜ|  @`ü¾sÞ]¡øÓîñûg¯o™ýßïÚÞ2O @€ @€ @€ @€Ià8é+ØÁøOª¯~M Ã[Ó®húôß펿µ]™*òbþ0«·p,ú¨ÕâëÒ¥ãÇÃãÊÃ[Óù”mƒÀ²,»2•­$À±vB ¦Ì²¶÷O2½æðØ:üŸ™X§-{D`ë¶þ»CûúŽ7nO{ë—:µH{ýßì¾qû[:-¯ @€ @€ @€ @€ @€ @€qè:€lÜ&`<íR°ÝsÚ—HQ„yc¨Ž·-,ìYvg»q¥@¯“/¹ä²Çµ+³RÞâ¾{þ<…N>°RÞÑ´Zè9À±k[޶³Â“"p3ß¶BVÛ¤Y[§¶3–yñůyÒSŸùŽ´ç;Þ—)¸ñ_í¾áÚß›1*Ó%@€ @€ @€ @€ @€ @€˜2¹)›é+b¦²3ŽMZáU­öÖ ›·÷$¸ÏLAŽ+ æÑ¤úê¹bìw=šRþì–[nÙ·aÓ–!d¯nU:õúýë6n½pϵ··*³RúE7®NC¾t¥¼%i{÷Üp×—¼îäéÌ­S'(³PfÆ+ÏÍN¨Ýœöë?êd¾Emzüs'7v¢¥  @€ @€ @€ @€ @€ @€À¸ p÷êc|ëÖ­;5Uïà”Îìôö¡†} ¢UÕ’àÆ¢ZžÅÓ[Uo—žŽP|kV-‹ºµZ¸2]º p<§¶ê)í”v}‡,^Ó6…ÌY]§(f*iÝÆËŸêµ[ÒžzBG±‘N'ýÙÜxmGå"@€ @€ @€ @€ @€ @€ 0æ¿ù ¯¥@cþäž[68äŒz#ô4þÝ Ûn 1~®ÝpS@çæ°qc½]™åyé4Ì«–§û:Þè¡ûnúÿÙ»09¯ò@ÔçTukón^; ’8la2( ÆH–Z´%ÛR+,!BØBf¸.KÂ}`€ä“dÂb,ÉŽåÂjYÂQ0p¯.q ˆÉ¸ ØŒ1–¥îþÏ=4HB]­Ýµ¼ÅÓOuýç;Û{>ž¦?£Ÿ•Õs*—Þˆ —mYS­Œ}$çT³Å‹ÖMß°Mqã𦅠@€ @€ @€ @€ @€ @€FN@ãy5TNèíUcÛë/bh|“b g­«¬zb³>k×^~f޽¨Q|¾]{÷ÞÛ(æxm£|NÇóög6N½4_!º;ÄxbS{Má΢OœÞ¹íýMÅ "@€ @€ @€ @€ @€ @€ 0 c²NËlC S¾1ßU8 ¯T´wƒc}»ñp±-ŒWþ(‘-xKc%¤Í9ô–fxÆVŽ]cløçe.‹*˜h”Ïi’¡|¼zõê±3Î:÷Osvþf³ÌE³_N¡xÊtmû§›í#Ž @€ @€ @€ @€ @€ @€À ¸ÁqPNªuæâ¨“ÚèÖ?]*•eí.f×®_K1þu£þ¹ôsýš5kV5Šùq[eË¿?Îw)}ö¦ëwüÝqZJò9•â IÀ%—\qÚégûVŠCHÿ8{ðÐc§w*n’4°  @€ @€ @€ @€ @€ @€c82Lc ßèýÅáNÖ‹¹Æ7*Æxâø §­+›cÆ­?›-©Q\JñšFíÚFýœÙ CÛÚÉÍç/?qìïcˆ¿Þì~R7{îàݳçúÛ›í#Ž @€ @€ @€ @€ @€ @€À  Œ Ú‚­·³ü'¤@IDATy‹;c¨6ì ©þ=Ì¥ÿÖ0h‰+ÅÜ?u2õgÒ¡½?V~+–ÝoÁqR¸2·]·`{n(ÆÒæJÈ÷=.ôJiîp Ûj.{>êçTæ3Èí—nÚü«c©º;§ÏéÍî#¥tÕ®âà‹C­6×lq @€ @€ @€ @€ @€ @€Q@ã žZ“kž)â]Ë×7†ÂŠ]µmÓM9PaÿZ«~ØÆ©kó ‰/j°ð‹Ö¯ßò€ééíw,SÉ’õ"È_)ƾ¿vÍW (iõs*áØæ‰É©õ9w®ËÅ+šÜD‘Rñ²];·ÿI“ñ @€ @€ @€ @€ @€ @€ ´@e Woñ †ûîlcHç¬^½zˆ ]‹«ÄÇⲸq¡˜õ“[VçÉ/Ô^^¡áúÖÛœS™ÐàµoØ´õ¹±jyåÍ7¦tON¤uŠשּׂ˜ @€ @€ @€ @€ @€ @ }ŽíÛõ}ÏÕjw‡”æ.4Æê©gõ †1Ü8½sÇ?¥>Ùp 1\¾P{¬T¶,Ôöýç)Üyè»wÜÔ0¦¤Ñ9• XóÄe[_‘—ü?Cþ³ÕÌÒSH_™+fõƶïm&^  @€ @€ @€ @€ @€ @€aPà8,'yü}¤ã7Žßôã§qnì?ÿøÓð}—o©l|Ãb ˜Ø|ö±;Ï7[®ˆ1Mûü¨Ï1üå¾}ûõ¬õΩu³¾ì1±iêUùæÆ7´°¸ápñèݵë>ÕB¡ @€ @€ @€ @€ @€ @€†B@ãPc£M¤5j­·ÅJõIe1ƒÜ~Oqðº|‹ã‚Eˆ±þ¯®;vg<àA—d“}~ä繹иxòÈà†ß;§†<и~ãÖׯ_×ìRsNî=ü½;ÿë®];¾Ölq @€ @€ @€ @€ @€ @€†I@ã0æqö’BüÈqõ(†pÑQ†ìÃjµ;óMŒ75ÚVŠaýO´ÇÊ?ñìÈ)üóîÚ5Ž|Ôî÷Ω]¹þè·aãÔË+1¼ºÙÕ¤”®ÚUÜ»~ïÞ½÷6ÛG @€ @€ @€ @€ @€ @€a¶ ÙÏÑs3ÅG*ã%u¬1üÜÚµ›ÎÙ³çúÛîÝ›OëÖ­;µºâä·§¾Cå‹¡(¾4“ÒïþæW¾´ÿþûz1k1W\]©V/[hì\ä¹ú’K®8íæ›¯»«sñÅWžb|ÚBñ?xžÞÛ¸½ùVçÔ¼U¿Eæ›§r.¿1çPé+6¦ãïOïÜöæÒ` @€ @€ @€ @€ @€ @€†\@ãðÝßþò§Î8óÜ{r±Þ‰¶:¶jÙssûkÅt«­²ìä…7ÇðÃ’°J%ŒçÁO?óÜ4±ië1¤/¦¾xÏ]éٷܲý{ݘwº¶ã–‰MS_És>hñÆ—ŸT}znÛ^o_yJe"¿­¨¿Àk¦8œv,ÐÖòcçÔ2Y_t˜¸lê×BHïŽùU¶ \Ú8bzÖôõÛ¾ŸceñÚ  @€ @€ @€ @€ @€ @€ »@ÉÕ~þýáß_¾q6ïòïÊvš«³~wíÚµ'•ÅuÚ^¿1VB.püÉW½H,¯ãÌ\üø˜üÝùÝ*nüáLE(¶ŸœõÈ'qý>¥8ù£ï÷MJ5=½ýŽã5µóÌ9µ£¶´}Ö¬¹ü~¡®Í¹ZZ(ž vå›J×íº~»âÆ¥=6³ @€ @€ @€ @€ @€ @€ô‘€Ç>:Œ^-%áåcÇÓÆVœò¼ò¸Î"VžRý\ÀxZÙ(©(þ²,¦ÕöÙT\ݰOŠOY³fͪՓ“ù¶ËøÄF±EÇjÔy6ç´LŸ>?aÙ{ó g—-¯^ÜSqé®ÚŽ›Ëbµ @€ @€ @€ @€ @€ @€%Ž#pÚ»jÛv¥”¾P¶ÕX‰¯¾tòʇ—ŵÛ~ÑE[NÈ}_RÖ?¯5…ÙbgY\«í{j;>RøÈBýòõ‘«ÆWœö”Ó*+.Îß/_(.…tÇmŸÿ׿Z¨½ÝçΩ]¹Åï·aãÔ‹òm£—47súoØþæbE @€ @€ @€ @€ @€ @€Ž£qÖùÂÁðÖò­Æª•êû~pƒayt‹ñ¤Óã;òw÷+ïß¿k×µ_)k=¢ôæÅJXCåÒF#çò˘iÓf›sjn1»­›¼â‘)Ä767gzÏ®ÛÞÕ\¬( @€ @€ @€ @€ @€ @€£% ÀqDÎûkÅÁ«ó̓ß*ÛnŒñ‚3*+w¬^½zEYl+í6myKqk}йböšˆk+ä®âÞZHéž…:盟žÛÞÌWsï]¨§ÏS§‚½ï_©ŒÿE£>çW a?ùí¯ßö‚ùÏÞ  @€ @€ @€ @€ @€ @€8Z@ãÑCûéÖZí`*Òë›Ú`Œ—žqæ¹^³æò&n[,qâ²­¯ ¡ò’òÈkÓµ7Õ®ýçfbÛ‰Ù_«Ý“B¨-Ü7ž–‹×N]¸=èåúœSù>hZ?¹ù©9?Uº”î,â¡ û÷￯4V @€ @€ @€ @€ @€ @€Ñ}·½í\wÖ†S/m{€.wüâçÿõí˜ifØé¶¿mbãÖ_ËZëJãcü•ñU㟠ºž7]ÛñÁÒøã¬[·îÔÊòS^‘çûýã4ÿÄ£|ãÝwÃÌì«~¢¡ËæbqõX¨>³a‹¢¸º~­ôqN­h-nl¬V^ÝÌŒù¿'>ZMË&òW4ÞÓ˜™¢¸yOmÇg{:‰Á  @€ @€ @€ @€ @€ @€´! À±E´âƒB oi±[ÏÂOúéŸþ‹Ðdc}Å¡ÿýÌêò“!ÄøŸÊc|H¬Vo™Ø8õÙÙôÊ=7nÿDYŸzû“&'O99®|qîÿ’luJ3}¾S¤gíÚuÝ—šŽo3ð¦ëw|dbÓÖ!<´•!ræ¡Ãß›»®•>íÆ:§våz×oݦ+Ÿ˜ÿü?®™rQïšbþZú×XµòÕ¼ ŽKV@€ @€ @€ @€ @€ @€ pŒ€Çc@†ýãîÝ»¿³vÃ–ËÆÇÂGs‘ã²fö› Ÿ2>Ÿ2±iêK¹Àëýi.ퟫ·ÇTýF<\|7.+Î+ÒØù•J:?¤zÑ`\“ ¼Nmfìù˜"¥·N×¶½oþsïßÓ{ó:ÿ¨•ybL7Ý|óuwµÒ§ÝXçÔ®\ïúUBµç·‹önõF&@€ @€ @€ @€@ÿ ¬ŒËÃÁt¨he#+PÏÍF/¹ÛHGÛR ”ån}mòw)OÈÜÊòWî6ÒÓ¶”e¹»”k37 @€~¨ôû­¯ûß¿‰1ó„÷¶2z.nö繌êªä㨞|î»,ë«ö÷Èþ<»a]UYNÊÇa=ùþÜWY>ίZ^ÎKx_ ²¼”‹q æ˜(ËÇù¸…Þ½ü‚…š<'°¤)ÉM¹»¤Çcòe¹[ï*jZR²ü•»Kz<&o P–» ºj"@€ 0ò G<Þ_ÛöÕ™{ïÌ79Rºgq8ÒÿJExü®¶½zqæ;þ,‡¾{ÇM!…;ßzôÓ”â{~²¸ŸFùœWúø³åLJ¿ÅSú]à‚ñsû}‰Ö7dÿ¹AÎÉÇ!;ìØN£|¬/_NÀ!Ùåäè€oG>øÙòåãè€o§,ëÛó÷È?ä[~YNÊÇ;Ð_nY>ÎoO^ÎKx_ ²¼”‹q æ˜(ËÇù¸…Þ—‹ÈN',Ôì9%¨çäc—?¼áÜr·!Æ%h&wëK“¿Kt@¦m(ÐLþÊ݆„—H ™Ü]¢¥™– @€À@(pˆcêí"÷îÝ{ï®ë·¿êÞâàO)½5Ïv_OfLá›E/ÿö×o»pº¶íÖžÌÑ ûöí;”Bº®´KJ7½sÛçJãz0ªçÔcÖ¦†Ï®§5(ˆ¾x\ÉÿáØw ¶ h”sÚ~ã6Зe9WÖÞ—›²¨(˹²öÞ¼Å÷@Y¾•µ÷݆,h Êò­¬} 7oñ}'ÐL¾5Ów³ (Ë·²öݸ…÷¥@³ùÖl\_nÒ¢N ,ßÊÚnÃÜ×æÛ²8.;au_ïÑâFO ž“ËâXÃË݆<—H ™Ü­/Mþ.Ñ™¶¡@3ù+wj\"frw‰–fZ @€! Àq Žiq¹¯Vûf.ä{Ù¡¹t~‘ÂëBHË·:Îu8{®™LHsiò‹Ÿÿ—N_Í›÷ïß?Ûá˜]ì^\]6X ¡4¦lŒn¶æ9uS°õ±b [gÓƒ@_üÒ²‡†3+þ÷ÅaŒÀ"ê¹VϹ…^òq!Ï{!P–õ9åd/ä¹€œ\HÆó¥K¡nÎ…äãB2ž/…@3ùX_—¿G.ÅéŒæœÍä¤|ÍÜXŠ]7“óë’—óÞ{-ÐL^ÊÇ^Ÿ‚ñçšÉÇùØFïëWý—ð±s…h#°hõ\¬çd3/¹ÛŒ’˜Åh%wëk’¿‹u2æiF •ü•»ÍˆŠY,Vrw±Öd @€ƒ&ëv¼,,pÉ%Wœ¶|ÕØc5\” ýS¸ñþ!†SŽí•R8bº3ÇüsŠéã±/fÂǦ§·ßql¬ÏÝpNÝõ4Qøà­ŸËÿ?¼¯¿>øñð–»ß7¼´³¾xÙÉÏO]ùè†ë‘ y4vQ ™|¬O''»ˆn¨†r²!ÆE‹ nº†ò±!ÆEh6ëËò÷ÈE>œ®Ùœ”#š ‹¼ífóq~Yòr^Â{/šÍKùØËS0ö¼@³ù8ßèý޹»Â ¾ý¶ðô½FaÚôTàÔxBxÇ¿PmþR•»==ƒ7)ÐNîÖ‡–¿M ë©@;ù+w{z$oR Ümrè®…=ùqó»â]Ó4 @€@¯ü¥µW²C>î…^8~ÞyÜo¶N› w­zè;·Öj‡|Û·=ç4pGfÁ–D`Ø óMÂá÷ïú‹ð©™/,‰¯IGCà‘ã o:í¹!ÆÆ½–£‘K½Ëfó±¾N9¹Ô§5óËÉÑ8çAÙ¥|”“uÊÇÑ8çAÙe+ùXß“¿GÊÉî:[ÉIù8¸ç<(+o%ç÷$/ç%¼÷J •¼”½:ãÎ ´’ó}ÊÞ?;óåðʻޣȱ J{OêE xÚ³ÂÏŒ?¸åñånËd:tQ “Ü­/Cþvñ0 Õ²@'ù+w[æÖ¡‹än—Q:”ÇR" @€} Ðø7°û`–@€ôV`Ø ëzwß /¸óíáëswöÓè#)pVõôðŽÓ_N®œÐÔþåcSL‚Úh5ëÓÈÉ6±ukJ@N6Å$h‘äã"A›¦)ùØ“ Eh'ëKó÷ÈE: œ¦œ”#˜(‹´åvòq~iòr^Â{·ÚÉKùØíS0Þ¼@;ù8ß·ì½~#Ó«¿sMøÂìíe¡Ú tMà!cç„׺µ¥›\î+âóbt#wë딿‹qZæ8V ù+wUõy1º‘»‹±Îú KÚ< @€T;é¬/ 0øSÏyákw°<. Zö°ð±ÃŸ ÷$7ÖÒÚŠ@ý—7Þpê³Zú?ºåc+Âb[h'ëãËÉV”Ŷ" '[ÑÛkùØkaã·" [ÑÛkvó±¾.ìõéŒæøíæ¤|Í|éõ®ÛÍÇùuÉËy ïÝh7/åc7OÁXóíæã|ÿ²÷*+ÃSW>*œW„/ÌÜî 3e]´h[ ~ûÒÔ‰…—<Nª¬j{œzG¹ÛŸÎ- t3wëSËß@xGÝÌ_¹ÛÑQèÜ¢@7s·Å©Ûßþî·¿¶íÎ: @€X$78.´i @€@¿ ŒÂ Žóöõ¥úõßÙ>5ó…ùGÞ ´-ðÈñ‡„Wº¹é›H>+âs'æc}n9ÙÉ è{¬€œ/¥@7ò±¾~\ÊS®¹»‘“òq¸rb)wÓ|œ_¿¼œ—ðÞ©@7òR>vz úÏ t#çÇjæýpš èÓácùës3_ ß(¾¦CÍtCà¸+ãòpfåÔð°ñ…Ç,¿ Ö5ý=r´sªÓÝw;'åc§'2Úý»óšòr^Â{;ÝÎKùØÎ)è3/Ðí|œ×; @€½PàØ;[# @€tO ³ßÈîÞ:ŒD€,‘À¨8Î3Ï¥"|òð¿‡[ý[øôÌmáksß÷æí7åÿx˜ˆ!†Uù_ñ=»zF¸`üÜð¸ü/øþÒ²‡†j¬Ì‡tå]>v…qèY¬|¬CÊÉ¡O§®lPNv…Ñ ]]‚4LWäcW Ò%ÅÌÇú’ý=²K7ÄÃ,fNÊÇ!N¤.mm1óq~Éòr^ÂûB‹™—òq¡Sð|^`1óq~Nï @€t_@c÷MH€ Ð}ŽÝ75"(Q-p¨C²X @€ @€ @€ @€´( À±E0á @€K"ÐÝ«g–d &%@€ @€ @€ @€ @€ @€ @€M@ã ˜õ @€ @€ @€ @€ @€ @€ @€!Pà8‡h  @€ @€ @€ @€ @€ @€ @`ÐÆmÁÖK€ @€ @€A8<3ž sE žŠ|ŸR˜›ËÏóûÛ~øý÷Ûr\~¿ÿé'„³ïÒ mÛZ  @€ @€ @€ @€” (p,%@€ @€ @€ö>ö©/‡ßúÿµ?@îù_.ˆ}º.Ë"@€ @€6¿éÏgC Õ6ºþ¨Ë;_’Âçþè£o @€n9ÂëvtáG±)]÷ÑëŸwe‡ËÑ @€ @€ @€ @€@߸Á±oŽÂB @€ Рί_<<Óuƒ˜¨vë§°1ŽÏé @€ @€ @€ @€À0tëWk†Á @€ 0ð)†Ôé&Ïv:‚þ @€À‘ÕŽîV>b¤”ÆŽøä[ @€ @€ @€ @€/ ÀqàÐ @€ ðc˜bñãOí}73×^?½ @€ã Œuë§°1*p<>±§ @€ @€ @€ @€*Э_­Ðí[6 @€†L  78ÎÌ ™‰í @€%pƒã€é  @€ @€ @€ @€úV@cß… @€ @ u”BÇ78žm}^= @€…ªÝú)l ã Ï¢… @€ @€ @€ @€Àà tëWkoçVL€ @€!ˆ]¸ÁQã&†- @€À’ Œuë§°)Œ-éFLN€ @€ @€ @€ @ ËÝúÕš./Ëp @€ ÐŽ@ ßà8;×ÎÌú @€ T« µ´øVG€ @€ÖRçŽ3 [3M€JªÝú)l ã%Si&@€ @€ @€ @€ 0PÝúÕšÚ´Å @€ @`xRÑéÞff;A @àH±ný6&78 ë{ @€ @€ @€ @€èÖ¯Ö <„  @€ @`(bnpTà8©` Ð?Õj—Ö’‚Ç.Q† @€ @€ @€ @ ?8öÇ9X @€º$ç:È Ž êO€Ž¨vë§°1Œ=²O @€ @€ @€ @€[ [¿Z3Ø VO€ @€!ˆ)ne¶ã:]þ @`¸ƺvƒctƒãp¥†Ý @€ @€ @€ @€F^À/ÄŒ| @€ @`˜Rs±Ã mÿP Û?Ôá ]è~Š>ðua %âyo áŸÿ£ÓSYâMtyzgÛeÐ>ÎÙöÑaty)ζˠ퓟ç¶k§ @€ @€ @€ @€@_ ¸Á±/Å¢ @€ Ю@œk·§~ @€@Ÿ ¤0Þç+´< @€ @€ @€ @€- (pl‰K0 @€ú[ ߨÀ±¿Èê @€@û1ºÁ±}==  @€ @€ @€ @€úP@cŠ% @€ @ ]’Çvñô#@€ý.’Ç~?#ë#@€ @€ @€ @€hI@cK\‚  @€ ÐçÑ Ž}~B–G€Úˆq¼ýÎz @€ @€ @€ @€è?Žýw&VD€ @€âlu%@€úYÀ Žý|:ÖF€ @€ @€ @€ І€Ç6Ðt!@€ @€@¿ ÄæúumÖE€t(ãX‡#èN€ @€ @€ @€ @ ¯8öÕqX  @€:HÉ Ž êM€úXÀ Ž}|8–F€ @€ @€ @€ ÐŽ€ñ»5} @€ Я1ÌöëÒZ]×Ìl¼/_HùÆVûõ[ü·î®>3¯éÜ~[×R®ÇÙ.¥~oçv¶½õ]ÊÑíRê1·Àð- @€ @€ @€ @€À0(p†S´ @€Ì ¤4bœÿ4Ðï‡f¡xö ^3Лȋü¦?{R>ŽG¤³=cȾu¶Cv GlÇÙ±Ôß®~ÍXØÿš¡ù –šÓü @€ @€ @€ @€K+PYÚéÍN€ @€]ˆq¦«ãŒè+ÇÝÿäñ¾ZÅ @€ @€ @€ @€è@@cxº @€ @ ïRr£Sߊ @€î TÍŒuo4# @€ @€ @€ @€XZŽKëov @€tY *p첨á @€@? Ü=w’Ç~:k!@€ @€ @€ @€èH@cG|: @€ @ ïfúnED€tMà¤eq¼kƒˆ @€ @€ @€ @€À (p\â0= @€º*ƒÇ®‚Œô—@\>ëÇþ:«!@€ @€ @€ @€è@@cxº @€ @ ßRPàØogb= @ ›÷Í » j, @€ @€ @€ @€%Pฤü&'@€ @€@wbJ³ÝÑh @€@? ,¯Äñ~Zµ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ 0qi @€ @`&6N='ïëî-¥/íºaû›ÆŒhã†M[¯J!=ýØí§¹pùtmÛ­Ç>÷ypüY9îÙU.¼òg+±òøãÃc §¥Nù=ÖßCœË½¾òWŒéΔⷊ>UÜwèoöì¹þöãŽè! @€ @€ @€ @€Y`l‘ç3 @€ ð#øÔ\”´áG÷M ŸÈ8c3±ñÊG¥”žŸ »~òoŠiÅ1á>¼€?+õ#¼øâ+O^qRå7rÚ?-†øØ\ÌxÊ‘Gû£? Gü©øÁ·9:S­­Z6lœú|.|ÜŸŠâÆéÚŽ¿>r ß @€ @€ @€ @€ @`18.¦¶¹ @€ @€ŽV¯^=bõª”lu<œô½Àº[CzaÎùgåBÅ“:^pŒççZÇócµúœ|;æ?)¾qwº÷†P«Õo}ô"@€ @€ @€ @€ @€À¢ Tm& @€ @€º púÎ}m.ôzt†2¾Èż+râÛòÍ‹Ÿ«Äø¢®7³ã|ä/T+ẉêªÏ­Ûtåiö‘ @€ @€ @€ @€=PàØS^ƒ @€ @€ÝX?¹ùÉ!†WtsLcèGu“W<òŒ3Ï; _˜××óŸáåºÆn™Ø4õª<_þèE€ @€ @€ @€ @€Þ ôü—£z¿3 @€ @€£ °aÃÖŸ­T+;sÁ—â«Q8ðÞãú[ŸY­Œ},—>|‘*ùvÔ×MlÚúWO™œ<}‘ç6 @€ @€ @€ @€#( ÀqÝ–  @€ @€À  <}rêi,ìËË6hk·^­LLN=#W¾+ĸ¬•~ÝŒÍÄO=¡²êæÇMN®ìæ¸Æ"@€ @€ @€ @€ @€À±cÇ>ð™ @€ ÐOO›œÿÌ; @€ @€ @€ @€º) À±›šÆ"@€ @€º&°~ãÔÃr!Ö-ùë¼® j }*P‰é­ù¦Å›Y^JáÆ™"½|omû4_Ùµëº/å·7=nròígWW¾$ßôøÚ|«cùÏ+ñ÷ÖONíš®m»µÙ¹Ä @€ @€ @€ @€ @ Yò_bjv$q @€ @€. LLN=#ß÷î\ður—†4 ¾È·7^” y×6±À"Å+vݰýMMÄ7äÖZí`nxÃÄe[ÿßà yÞÒÛc5¼%÷ù•ãè! @€ @€ @€ @€:¨tÐWW @€ @€@W.¼ðÂñ|sãŸÄj¬)nì*­ÁúT`õêÕc•þ¤™åinª“âÆ#çØuÃ5;;7÷„Ò]G>?Þ÷1Äǭ߸eòxmž @€ @€ @€ @€ @ ŽèéK€ @€tMàÒM›|ÞùøÛJŒ/êÚ C:Ð¥¶Ôjseqí¶‡þ÷V—ŸòÌÃéÆˆÅx}­¥ÅÆÐF€ @€ @€ @€ @€#Üàx¤†ï  @€ @€EX½zõŠ‰Ë¶üþ²UË>Ÿ'|A.ÖSÜØ‘|üùJ¥òËN8í+ë7N½eÝÆ­éh8Eàì°ò’\×{bÃÉRø·éÛ§Ætظ{÷îïäâÆeÃä?¦kÊb´ @€ @€ @€ @€4jãÆ@IDAT @ Ž­h‰%@€ @€:ˆ›¦®8ý¬s?+•7梪Sš0¥ðÑ|ÓãÕÍÆf\<­ãK«1|nbãÔÍë6N]œök2Tâ“Ë––b±+Ǥ²¸NÛçÂìžÒ1bxdiŒ @€ @€ @€ @€´  À±,¡ @€ @€@ûë'·.‹¯Ú°iê†Ù™tÕM7nÿX»ãé×¼Àž·"GŸyá…Ž?è¡=«šªL¡rN%ÄsRLç„òç|£â¢¾âÙeÓU‹ôÕ²í @€ @€ @€ @€hV@c³Râ @€ @€ž ä+ánOEñšétß{B­6׳‰†dàéÛÿ%oeóš5—¿x|娳bŒ¿büOmo/ÆeõñÆÆãæ\èø‰¢WÝ÷Ýo^¿oß¾Cm©cS˜É__ÎÁõ¯%{=}rê¹X¶ìÇ™ééí‹z«ä’˜˜ @€ @€ @€ @€EPà¸(Ì&!@€ @€Ž/îÎÅoœùÞ]²wïÞ{ãéB{÷þå·rÛ›ò×›×mœzJ¾ýïy¹Hí’ü¹²PŸòçñ—+•ðÞ•§ÜÿÍë7N½k¶¶·¶í¶ò~"Y`Y%\\¶þ”Â×sLþ#ëE€ @€ @€ @€ @€î(p쎣Q @€ @€–Ò÷RŠïœ¹wæ~X¤×RoÁ?!PìÞ¹m_~ºoÍäÔ¹c•øÜ\èøœ™?Ùäƒâýòÿ}Y%½|bãÖ½Eœ½j÷õ×~¸ÉîÂN Ö cK^éßJ4 @€ @€ @€ @€ @ %Ž-q &@€ @€:H!})¦ðŽûî™}×Í7_wW'cé{|Þ¶øÊ /¼ðµç=ôg'rÔór±âŽÝÄÓ«¹Pr]5Œ­›Ø8õé”Ò;îJ÷]³¿V»§‰ÞB@`bâÊÅžB>鯜G75hÖD€ @€ @€ @€ @€–8¶L¦ @€ Ъ@Q|ïžjqâ3v¥C»C­6×jñ­ 8p`&íÌ=w^:yåë•êo絩\ÃvJë£ý GŒñ‚üuÕaår±ã5³EñŽ=µŸmw<ýúD`|ìe9/–5ZM.lM³+pl„¤ @€ @€ @€ @€–8¶L¦ @€ ЪÀîÝ»¿“ûÜØj?ñݸ©ví¿å‘~÷¢‹¶¼â„SÕJ|^¾­ïÛ=žcxáX¥ò;›¶~(Ì…«vÕ®y¯hL=—Bài““gåyŸ[6w>ïïÙsýíeqÚ  @€ @€ @€ @€ Њ€ÇV´Ä @€ @€Xà–[¶//ÿõ¯K7lyÌØø÷ 7æÏ+ÚÙV¾Í1æ~OÕðä‰[¿Rñgi&¾{zzÛ·ÛOŸÅXQ]ùö|ˆ«Êf.Št]YŒv @€ @€ @€ @€´*Piµƒx @€ @€Á¸éÆí»ñúm¿qÏܽLEñ{!¥Ïw²«\êøS±RyceyüÊÄÆ©÷LlÜüKŒ§oïÖoÜ2C|FÙL)¤/Ý÷Ýoýϲ8í @€ @€ @€ @€hUÀ ޭЉ'@€ @€ ‘Àjµ;óvÞ’¿Þº~ró“bµò¼˜ÂÚcµÍm®È;>3„ê37lœú»¹¯úòþå}˜is<Ýz 01±ùì+ÚÌÐ)°oß¾CÍÄŠ!@€ @€ @€ @€ @€@+nplEK, @€ @`xÒtmÇw]¿m"ÍÌýT‘ÂëòÍ}_ëh»1þJµ®û©óqÛú[_»ví¦s:O箬^½zE¯ÞC8³lÀ”Â'§w,N; @€ @€ @€ @€ÚPàØŽš> @€ @€!صëÚ¯Lï¼æÿ¼óë·йg„>ÜÑvc8«ëÇV.ûÒÄÆ©wv4–Î œqÖ¹ï‰1<ªl ”_!¥ßËq©,V; @€ @€ @€ @€Úk§“> @€ @€Ã/°ÿþÙ¼Ëë_k'7ÿÌX¥úÛùûßÈÅq§¶³ûãX.˜»¨¾útG ~“féòfFËUoœ¾aÛÿÓL¬ @€ @€ @€ @€´#àÇvÔô!@€ @€Œ˜ÀžÚŽÏîÚyÍK¾VÜ{N‘³òÝ~ÿ0b¿Ý‰Ë¶¾¢~“fSÉ·vN_ÙT¬  @€ @€ @€ @€´)àÇ6át#@€ @€Œ¢À­µÚÁ¼ï«ë_k7lùåññø¼ò€1®EAÙóúM[/†ð†fÖ›‹W¿|°¸÷òP«Í5/† @€ @€ @€ @€í ¸Á±]9ý @€ @€#.°çÆíŸ¸ñúmÏž;t÷9™â#ÎÑ·Ûß°qêEù‡€ÿ£©¦t¸(Ò3öÕjßl*^ @€ @€ @€ @€:pƒcxº @€ @€FZ`r²:V]*éù)¥‹bÌwzõ•ÀĦ©ÿ#„øGÍ,*Ÿa¾¼±xÖîÚŽ7/† @€ @€ @€ @€ (pìTP @€ @€Àˆ ¬_¿åqy|NLá·B çæºˆíƉ[ßšOæÅÍ®6—7>ú†×6/Ž @€ @€ @€ @€ (pìTP @€ @€ÀˆL\våãC¬>?Í=#5.SרŸá…ŽŸwþ#®Îj^Ùì ‹^>}Ãö?o6^ @€ @€ @€ @€º! À±ŠÆ @€ @€ ©ÀEm9áÄÓâæ\,÷ü\Ôøón3ÅðÕNÇÐa5kÖ¬Z¶ê´÷åâÓ‹Ž:º%7¾túúkþøè§> @€ @€ @€ @€ @ ÷ {ol @€ @`6\6õ¤"¦“c+÷Ý]|hß¾kï^Œ¹šcýÆ-ÄŸ— ·æÂÆŽöò+ÄøXoßuÃö} Íéyg›ÏŽË*{òyýr3#ÕÏ%¿žïæÆf´Ä @€ @€ @€ @€ Ð ޽P5& @€ 0tùæÁ?®ÄÊ#cc+N?—çù—ŘëÈ9V¯^=vúýÏ]*áù1Æ_;²­ïsYãwCHïM!\•oü\;cèÓœÀºÉ+ª•½¹¸ñAMõHi.ŸÏ³sqã5MÅ "@€ @€ @€ @€ @€@8öÕ @€ @€AX»vÓ9c+–ýf¨ÄçÆÎétíùVÀz1ãU³¿óÞ={öä"G¯^ l¸lËš|Cæu¹¸ñĦæIé`ÂeÓ7l{Sñ‚ @€ @€ @€ @€ Ð#Ž=‚5, @€ @ ß&.›úµúm!…uùÆÆŽ~N”‹óEa_*Š·O×v| _ÿìÕc §^š‹ÿGž¦ÒÔT)ÜYáéÓµm·6/ˆ @€ @€ @€ @€=èè×z¸.C @€ @€ô@à⋯X»aêçÇÆêEas.Š;¡£šÆ¼§\ ù™|[ãUw¥û®Ù_«ÝÓ'Û‰e\rɧ-?iì}ù½ù §œ=xøé{ö\{ó}D @€üÿìÝ ˜žW](ðsÞ™I“t¶,èb¡ \åªÙCIÚ™8Ítɹxñ‚ârñ‚"ˆËu»¯PDT¶f)MÆLÚ”R)\‰^ËjÙTEÛ²t£mšfæ=÷ÿ…¦Í2ó½ßÌ|³æ÷=„ùÞsþgû½ï{ò$Oþ= @€ @€ @€˜] ޳ë«w @€ @€À¼ œ;8¸ìœ¼r}•ÓË"±ñG»0‘:•òžñ”/»jûå×w¡?]LQ`Ýà¥ë­ªksÊï´i$£^{ëøÞ¡=»%¢vj&Ž @€ @€ @€ @€¹à87ÎF!@€ @€Ì™ÀÚÁá3úrzIªÒ‹ã¤ÆGÌxà’îH¹~ûx©þäªí›ÿuÆýé`Zç]ú㽥窔Ó)vPJyÓh½÷ÓÈÈx§mÄ @€ @€ @€ @€ @`®$8Ε´q @€ @€³+/ذéÙU./Ë%—rî™ñp%}&úxÓ·o«7_ý–»gÜŸ¦-008ܧ6^ÉË;ì¤.¥þï£Û·üq‡ñ @€ @€ @€ @€ 0ççœÜ€ @€ @€À"øó’ꙟ†ØÁâÇ÷Ž}£ƒ°!çwñÉËOèý鏸o‘üö¸ÿ×úß >uIåݹN—íܱùý3èGÓ. ¬Úô_S)oŽî:KZ-守¿xtÇ–kº4Ý @€ @€ @€ @€ @`V$8Î «N  @€ @€¥&0º}óe iMëÖoü¡ÞÞüÒÈeŠÓWÌtn¥¤Û£·í¯ë?¹fdË—fÚŸöݸpÓ¯FO¿÷¸£#9õku=¶öª‘+>ÑQA @€ @€ @€ @€˜G Žóˆoh @€ @€ÀtÖo~U$¼ýÞtÚݦÜÉ—í¿ç¶-×\sÍ=G×+™/¡á×DZãoMaüÒ}õÚ«F¯¸i m„ @€ @€ @€ @€ @`Þ$8ν  @€ @€Ó¨SzH5½¦ßiUÊxIùšR×—íÙò×3éJÛÙèß°é7#¹ñµöIª×ì¿çÖ!IªŠ‰#@€ @€ @€ @€ @`!Hp\wÁ @€ @€s!PÒ­u*oËûÇþdtôŠ/ÏÅÆ˜º@œÐù+)O%¹±¼i´Þû‹éškƧ>š @€ @€ @€ @€˜? Žógod @€ @€ÀÜ”ôéºÔ—Ý\îÝú¡‘‘½s3¨Q¦#'7—Tþ Nolü”Ö'çÿ±kûæÿÝ,€ @€ @€ @€ @€ P@‚ã¼)¦D€ @€˜±@)­Óü®®ëòÆ]#[þfÆýé`Ö.þÉÔ:a3>MƒEjãXÊåE»®Ü¼¥)V= @€ @€ @€ @€ª€Ç…zgÌ‹ @€ 0 8ýï[¹No½¯¤7_3²ù+ÓèB“yX»ö¢‡¥*mË)7þ}])i_ªëõ£#[¯‡©’ @€ @€ @€ @€]hüS]IG @€ @€³&'ú}2Nô»ìÖ›¿ºmÏž=÷ÎÚ@:ž¾ã—½3Žmü®¦Î[ɹÔçïÙúÞ¦Xõ @€ @€ @€ @€Xèú2? @€ @€À$‘Ô8–rºj<Õo¼zûÖÿ7I˜â.°~Ãð/ÄÏëlšåçvîØ"¹±3,Q @€ @€ @€ @€,p Ž ü™ @€ @àH\Ê7K.¿—ö×oÝöµ#ë]/ /~JIùâôÆ>åí£Û7¿µƒ@! @€ @€ @€ @€XÅm2I @€ @€Àƒ£;¶¼þÁ+ß³@UõýyÎ鸦5”’>vë-_yYSœz @€ @€ @€ @€,&j1MÖ\  @€ @€,þÁKŸÉOm\OI·Öyßú={öÜÛ+€ @€ @€ @€ @€‹HÀ Ž‹èf™* @€{%¥ÓÖoþå…²òû—/»á†ö/”ù˜ƒ‹ñ]É=ÕkοÝÏXÛßõ”e±´ ›“ºýu}íî‘­ŸŸ“Á B€ @€ @€ @€ @€À’à¸äo± @€ @€ÀbÈ)?&åôú…²†Ï>ûϓDžr;ÌãÅö®\0tÉ3cÎO;d “~Sצ”ã×üz{ªYHpœÿ[a @€ @€ @€ @€–„@µ$Va @€ @€‘@•z^³ˆ¦kª @€ @€ @€ @€˜ Ž³ÂªS @€ @€ œ?4tzœÞø×*%@€ @€ @€ @€ @€À±# Áñع×VJ€ @€ °ªzÙ³À4L @€ @€ @€ @€ó. ÁqÞo  @€ @€K9§çKëµV @€ @€ @€ @€L& Áq2å @€ @€fA ? Ýê’ @€ @€ @€ @€‹N@‚㢻e&L€ @€ °˜JÉ'/æù›; @€ @€ @€ @€º% Á±[’ú!@€ @€ Ð@NI‚cNB @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€@§¹Ó@q @€ @€,| —<5¥ž¶›ië‘«®Üº§]Œ: @€ @€ @€ @€̶@ïl  @€ @€æN ¤üø*ç—¶±*Õç¢~O»u @€ @€ @€ @€˜mj¶Ð? @€ @€íÖ¯¿øì ÃÛÚG©%@€ @€ @€ @€ @€ÀÒp‚ãÒºŸVC€ @€ °ˆV¯^½ü”GœñÊTåWåTî[DS7U @€ @€ @€ @€ÌX@‚㌠u@€ @€ @`êë/Üøü’ósν¿µÇ©3jA€ @€ @€ @€ @€À"จož© @€ @€,>‹ÏL}}œrº /¾é›1 @€ @€ @€ @€º& Á±k”:"@€ @€ 0¹À¹ƒƒËžÐ³â©äWçœVN©† @€ @€ @€ @€dž€Çcã>[% @€ @€À< <¡Z±;§üÜ8¹Ñ‡ @€ @€ @€ @€B ¢@€ @€ @€Àì Ä©§Îþ(F @€ @€ @€ @€ @€Àâà¸xî•™ @€ @€ @€ @€ @€ @€ @€%#лdVb! @€ @€¤R×7–žü{í(Jÿ±]½: @€ @€ @€ @€Ì…€ǹP6 @€ @€9¸jäŠOÄP­_> @€ @€ @€ @€XÐÕ‚žÉ @€ @€ @€ @€ @€ @€ @€KR@‚ã’¼­E€ @€ @€ @€ @€ @€ @€¶€Ç…}ÌŽ @€ @€ @€ @€ @€ @€,I ŽKò¶Z @€ @€ @€ @€ @€ @€XØöý1; @€ @€ @€ @€ @€ @€ °$$8.ÉÛjQ @€ @€ @€ @€ @€ @€ @`a Hp\Ø÷Çì @€ °–ÊŸ-—Ê::yfr'A³3_ãÎÂRt¹”Ÿk›­§F¿G ,ågíȵ¶®—Ú>8Ñ• @€ @€ @€ @€èX ·ãH @€ pÌ ¬<á¡ÕŠïIù¬TÊ™%ç3S.gH\§Ó"[ãÞ(»-§r[Ié›ñócãuõáRíûÐÕW^ùÕ…×ZÇÉé¸ËUÏOFŠÉ¹1χ•’–S~XI夜Ó=±ž¯GÝ×KÉ_ϹÜ\×éïÆïÝÿW»w¿ë–…²ŽNçqÁ†M­J½>WÕw—RΈõëjý\ëýè竱ޯ䒾Zçü™›ë{F>42²·ÓþÛÅ]0¸iUÕSÖ¦ÏJŠg%ž™xNN?Ц¤oÅÏ[ã×M%—Ö©ÞsûÍ_ûðž={îm×ç|Õ=ç9?þäj}•ÓÓâyytÌãÑñŒ<:Öóð¸Þ?ã¹/ñü§ÏÖ9ýC®Ó?|ù_oüà 7ܰ¾æÜ4îúõŸ]úâ=(ùãyxXü<5ÞéSây85Úž÷ë„x×÷–œnÏ%·ÞëÛbÍ·GY¼ùÃûÇÇÿf÷ÈÖÏ73_õKeÏš/¿n{¬í½«W¯^þÐGœ¹.~ùáØ¾³_ÄÞQR~T¼oUì·DùÍñ¾ÝïÚ—S]¿÷?ʽïïÖþÛíû§? @€ @€ @€ @€³)ÿ–Ƈ @€L,°nýÆêëËÿ5’0.:ð4qXÛÒH¬ûlüáóÏÆ÷ÝyùUW]u{ÛàY¨|ÖààCNìYñ³1‡ócOä¬)ÿÇ~b ‘ß•?ÿw]Jã#»¶oýä,Lõ./¸pÓE=¹üÖG|‰¤Àß½rËÛŽ(>p¹jÕª¾3Ï>÷üT¥—DÁ3c½ÿÙ¿¤[ëTÞVò}o¾êÊ+ÿm¢þÛ•HbÊË/ªªª5öªv±Gו»ë’þ,ï¯ÿ÷èè֛ޮŸ^I–W†åk&êýü¡K¼7U/Œ¤ÐÁPD’Vù›ºÔ»o»åkÛ#AulÝu¥éBس6\òÔœz®˜hA‘àvzØ7QÝýeñ –/¶©\Ôô‰Ñ+7¶‹Ü´.Wåõíbê”{×öË7·‹™IÝ’Ü{SzãèöÍ—MâR \8ü‘¼1îñúxÏNš$nÂâØ/ëëRv^Uö¾+ŒŒO¨ @€ @€ @€ @€Àèü9.±…[ @€L,°fÍ%'­<©÷’8Ííg"Aã&Žšzéw’7Ê;ë}w¾z._08üèãrúÅ8Yò%‘l2¥ä´v«k%;Ɖ¹¿Íl`×áÆŸ$Á?l¥®_1ºcËQÉKC_˜Rõûñ‡ýGNÖ¶ÃòȱI;~ñõ×o¹»ƒ6y࿜rõ]°¾·ÔåFËÞ×v#Á§Ñ²¤?Ý~y+!óÏsO9¾Zù¶XËÎèKÙ1žî{åt’Fg4l4¸ä1©¯ç•ñ.ÿL¬§]bÝŒ†ŠdÇHÊ«÷Ëÿü™Ís}råBÛ³¾“Ûó·3mÓ8ÞÍÇ3û´6!©Ã¥—T¹gk»˜ØÊ^Þ&Y¯]Ó¶uKzïMå7#¹ôuGœ¿~ã÷ôæÍ‘Oþø#ë¦w]nLuùï;wlyïôÚkE€ @€ @€ @€ @`ñT‹gªfJ€ @€³-004|ñÊ“zþ=N zs7“[óŽäª•‘üñÒjùCnìß0|þl­¥u‚aôÿ{Çõä/¥*¿¢ w‡MõÀ‰ˆ9 öVÕ? lþ‹V2ÏaópÑ:9q`æ­9UïèBrckU¸ pJþëµk/zX»%wÞÅ'ÇØWçªúß]²^ž«ükÕŠk[}·{6êZÉi‘Üø‰XK—’[³ÌVå¸ÏõmzÅlÌy¢>Ï:=žÏ?I}½ÿÏìÏÅzf-¹ñÀ S:;ž¿·žyιÿÜ?8Ü6ùn¢ùN·l)ìYÓ]ûBkwLW÷Æ©¿Éì^rcëÎæsSUýÕú ÃïY·nèQ í^› @€ @€ @€ @€n Hp즦¾ @€ °HV·’46lú?q2á¶ÈD®ÊŸêÜø³9V»¾×­þSzV~,Ø.i7ºxþÓ²•}\;8|ÆDí[§–wbïÇcìµÕϤ,lŸ»ü„¾¶’7gÒÏTÚF2ÑÏõ–걞ӧҮ“ØV‚a<ìÿ+’o['oÆk0{Ÿõn|~O$T†áK[ãÎÞHG÷ÏÌ™UOþëÁáŸ:º¶{%KeÏêžÈüöt,î½k7~÷©§ñÁHÈ~]¼k½³rr^Ó»bÙ.ý®Yé_§ @€ @€ @€ @€€@Wÿ!éX) @€ @€Àúû7>â”ÓÎx$Býâ›Î(<B^I•›Óà`ÏŒ:º¿qÿ†MÃqZÝÇ#w쇺Ñ_Ç}ätJÕS½¯ÿÂáuܦKç^ò½}½éo#[îœ.uyt79?¡¯'`Íš5‡%ʵN:ìíÍ1v>óèF]*ÉéI§T+[‰°³þi%ä•”Þ ¾]y'›p$ßþr<÷oïÖsä8C7–œ¯Ž÷yå‘usx½ú<÷ÞþþáSûªü¾øýæ‡gÛ<~¯||ZÖó×Ï“Xpο2páðþÑ›_=•vSŽ­òq+ËʱⳚÚFÞ=)•/οƽº;âÏ ªï‰µv"cS?ÑæÕÏ|ÛÊ}ûê8Ññš¸~XC›ý1ö§cì›c¬oDâÝÊÿ¬x>ÎÛSÚVK~mlŽ_q»ºû —ÇF²æîèµm²a¬å£¥Ôo¯ÆÇ¯ÿ\µÿk7ŽŒÜךÉêÁÁN®Ž{l$;ýT*Õ¥±Ö³Z埜Ï?å‘g„cznüšñº/=/ÖòGñì6}0 •¤÷å#©”ÏĽù×\Ò%ÕwU¹«s9)žè‡T©<²ä´*Ú<5îÛ™Ûvò3¦rbïÊeoˆØÁNâ›bËžUW±¤ý­§”ÒûÜωj(›°íµ)ò}Þ¾“{oh?¡Zþ±÷þxðuÜïÏÄù%åoÄ^XG»3â‹÷¨<&žƒ)ý½l¼ψ$úìÚ¾ùÝŒ-„ @€ @€ @€ @€À¢èü_½-š%™( @€t"00´éºøCáó:‰˜HÀ*o{óU#W|¢©Mœêõý9Woˆ„ŒŸhŠ=P_ÊÞ:•U»¶oùlGñ÷]0xñSªªïo[‰T¶Û'oK—ïÞ¹ùãMmZ'öåj(úe$¦<¤)¾U -‘ —ÖÍ$ ¥ÿÂ?[UÕŸN:^I·Æ|N™¼¾ìD·ËÇÆÇ.»zdÛgŽŒ‹“{O~äéçT)?'â~?êÛ&÷l ûÃ÷ÜxnÎ;XvÔÏRÞ÷ò÷ÞY¿ÿºë¶ÝyT}dFö^úÜÜÓó õÜ’½t÷í‚0½z‚þÚ5Z¶m•%}fl¬~ÑÕ;·|¤)4êsÿ†M«\þ8¾žÜA|t_¿xôÊ-oë$v²˜Ö‰rOèYùOq_Ι,æ`ùwϲ%r­þ|tǶ¿òx\;û¬_¿éÜÒ[^ïõp§ïC«çýûË÷Çûö©ÎF™×Iü¡1ýýQ—ÿ0æ8ÜIB^ÄÝc­Ú¹óŠ/ÚO§ßg”W—ºüϲ?½a×®Íßêd¼u뇠¯7Åiù»;‰osÃXùÕWnm%Íuô‰ÓÈÎûwE<+›´î_$c=»)îÈúXÆ3“^¿÷ί¿úºë®Ûwd¿í®.ý®ÜW½3LŸÓ.®U§(~}ïãçL’ ÚÔü@}ÿÐð+#Yµ•¨Úþ§–Æ»üSÓy—íøùƒƒ§-ïYùÖ¶‰®‡4ˆgò·ãTÓ×R4å¯KaÏj-z)$8Ú{'~|ã]Þ5>>þë%•OÜâ@iÚ¸)N}K샪ÉÞkg’@ßf.ª @€ @€ @€ @€Ì‹@5/£” @€æM ¿øÔ*õüŸÎ&P¶~ë–¯üÄt¢FG6ÿe$}<+²¸îh/’;~dàÂáŸlŠ;Xß·rÙë:Nn¬ËëZÉqÓInl·k×–¯Ç©j/ŒSë.ŠËýç0ÙÏXËCS_ï_®Y³¦£„•Éú™Ryœ‚YÆËOµÉ:Mnlõß:ÉòÞ»ÆVE2ß”Xœ_y×·nþòM%¹±Õ¶u"cœŽ§|–‰Nz|°ûßò·N*<¢pÖ.K©)n_1ÕäÆÖ„FG·Þô¹zïÚÈ|wÓã~ÄŠ“z§ü×J6«Júõ¦q"Aô û÷Þ÷cÓ}—íÿ=##7^yù ¢ì-‡–Oú=§Ÿš´®ƒŠ¥²gu°ÔEbï=ü6ÅÞyO¼_?3zåæ)&7¶:*q‚ë;Çóø3ZÉ·÷<ñU$Ÿ?qR @€ @€ @€ @€‹S@‚ãâ¼ofM€ @€i äeå"QïáMDÂÆ›"©oxÏž=cM±íê#‰í u*—F¥]\«.¥)¦U¿~ýÅgGo?ÝÛ³®ë_ˆ¤¿ßlŠí¤>X¶§º¾ ×ö6ÇçX~âÃçØÜOsD$Æ|k,×ÏŠ„Ò]ÍÑGG\{í·å2þÛG×4—ñ[ã9¹8ž“{›£ŽØ½sË?Æ ¿utÍá%­ÓÍW/ÿÃKgç*šWŽnßòÇ3éýÆ‘‘û"Éq}<ôÕÔO¬íåë/}BSÜDõ}+ú^/Î Õ=PVÊ}u=¶a÷î+ÿã².|¹çޱWÆúûŒSOŸØzg§;äRس¦»ö…ÖÎÞ{Ô¹w|¬~F$пõ¨š)H¯ÇZ¿g55‹¤èVrqä9ú @€ @€ @€ @€X—Æ}´  @€t$Ð?8üc9WÃMÁ‘añ¾HØøùˆkL¶hê«UIŽïŽtŒßhŠÁž×¿aããz{~#’¦z›âRɯݵcË㦰sÇ–÷D‚ã†Nšäªú•48ØÓIìLbê:¿tª§'9ÞÎ[w‡ÿ,o{]Ò'n½å+­çdFŸÏ—{/ëdìª'ýðŒê qä½ctÇ–?ì ´1¤•äXö•Kã-úFCp__Uuxªêá=•T5žæ)SpÕÈŸ8¼å̯®»nÛ¹y'=•ªj|¯'êg©ìY­m1–{ïa·­.ã/¾zç–V8Í‹ÑÛþ.šnmlžÓiëÖo\Õ'€ @€ @€ @€ @€À"à¸Hn”i @€ @ ¹J—6õÉPßÞ?^^Ü7Õú8ýðw"yì íÚEÒbN•×µ‹Y¿þ¢ï‰ KÚÅ|§®|d´Üó{ÍqSˆ$Çk"÷ó/šZÆ[g_W^Ø7“ú8½ñCWí¸|ÇLú¸¿mKé8ôÀIccéÒéžÜxè|[‰€auÝ¡e}Çã{'*ïZYI7ï»kì¿w­¿èh×®Íß/éûÌyM'ɽ‡ö³vpøŒxc~ðв£¾—r×¾»Ç¦•ç÷îÜyù\ÏðKܯhî"ŸÜ3ýˆ:Õ/»öÚ+n›~·ŒÔwEÒÒû'®}°´Jù^5[V¥¶ÉÀzÈùí³±¦Cg{FÛÄå±u:õÐ6~_{V‡k]ðaöÞÃoÑØøøë/™ùÕîÝWþGôò¹¦žªTŸÞ£ž @€ @€ @€ @€Àbà¸Xî”y @€ @`†ëÖo\‰HjßM¹í[·|ùíc¦_[öom:*’ìiƒƒ+&¥•ðôüIê(Ž>Þ±kûææ¤«ZLýËž‘‘»J]~«©eœ:øýç]úãMqÓ©o%ÍíÙü¡é´¨Í»G6ß”J¤LvðÉu}YaS©?ÚÏïC›bfPîí[Fgо}Ó:½¹}@œ šÒ”;‰KãÛ›Æi}Ò¿6õQru|SÌ‘õKdÏ:rY‹õÚÞ{è+iÏÕ#Û>}hQ¿ÿ]S_%å‡5Ũ'@€ @€ @€ @€ °X$8.–;ež @€˜¡@o_:¿©‹RÒ•{öì¹·)nºõ££W|9Úî9ª})wÅØ‹„½+rª÷´±+Љ‚þ —~_dÙ4&v”TÿùDí»]vsÙ»#ÒÒOüë©«gt{ìûûÛÝå~#W-ÝÜØgœù­¯õúƸ)”}ß¾©1<Ï^‚c<{oo_þâï.©|³]‘Àù£çwqǧTŽÖ{ÏOû¾{<=+ú}I¼CcÄ)œåãñëÎȘ¼ùê+·~¸Ý˜]©+å‹MýT¥LøN·k·ö¬vë[LuöÞÃïV]Ê;/éÞU$/þ}soå!Í1" @€ @€ @€ @€,ÞÅ1M³$@€ @€™ Dbàºæ>ÆßÑ3Ⱥ~}©ªŠ„²Ï—ªþ\u_úüèè¶^ã@º†OîiLŒ¯ص}ë'zêJõ‡FFö®ÚøÎ”ò/µí0§Ÿh[?ÍÊ<¶ÿ}Ól:i³’Ó¿G¢Ý£' ˆŠHÀùX$ÂŽµ‹™jÝí·ß~ש§µÏÙ‰gæÄ©öÛaü½õ¾;¯è0vZa7ÜpÃþ³ÎyrkŒ—OÚAÎ=}Ç÷=/êß5iÌ¡##ãW¥ôoQÔúõã×aŸçëú½©ª&OpŒàžª¬%8ÖùÑ×_¿åî£K»_RòøÞ”ÚÿSܸj*#/•=k*k^ȱöÞCîNœ8¼kû–ÏRÒÕ¯U»³ñ}*iyWÕ @€ @€ @€ @€y˜Ò?.›Çyš @€f P¥¾æÄÀT>>ƒ!f½éêÕ«{ãèÀ§7 TשëÉwíÆ¼õ–¯}*E²a»˜”óŠÇ§ÿ©mÌT+sº1štýt¾HdúfãTJù\cÌtr¹k:ÍfÚ&NüÐLûè¤}ëà+ù¹ôµ bJϲæÓÓAÌ!KaÏz`1‹ü‹½÷ð',~,Jº¾÷e¼N‘àØø™ÒûÔØ› @€ @€ @€ @€ó( Áqñ M€ @€9¨ócšÆ*¹|¢)f>ëO:õŒ'Åù}'6Ìaÿ¾»¾ñɆ˜®V·N,)·^Ú~ª*=­mÀ+KÉ_žb“ŽÂKé q'§Ù:í°}¢hG+˜zP®Ó So5õ££[oŠÓ/¿Þ¶eN§¬<¡m̨\µjU_ÿàÆg¬ÚøGUÎoêú”–ÀžÕu“yêÐÞ{8|ü>ô/‡—t÷ªg\‚cwEõF€ @€ @€ @€ °ÐzúÍ @€º Ó#šz‰Ä¶}‚c_OóR*ÿtÝu×íkZë,Ô4úüÑvýÆq_§µ«Ÿz]¹eêmºÓ¢¤êŽîôtD/‘e›âɹü”øìýöøÜ%÷¶Þ³œÚžÒøÐjå#Ã`^N³œÄ¾Z7xéÙ}9}JÕ”œŸI^«"vy+~VnÙس&±\tÅöÞ#oY™ýïþaêúî»zÒCŽô°ëxgåµ;l @€ @€ @€ @€æH@‚ãA† @€ó+[ S Ÿò…†€y­.U:µ)£#rÕ>=?“,76¥yU9Ú͹å4 Ž©.³u‚c7‰:íë‹×]·íÎNƒg×|òfUÆZɰÿ:ó±¦ÖÃÚµ=¬gyßÙ)çÇöäúìÈ6}B<×çÆ³öÄ([q°·¦÷ð`ÜÌ~.þ=kfë_8­í½‡ß‹x/fu¿Ø·ïøzåÔáÃÇuE€ @€ @€ @€ @`© Hp\ªwÖº @€ pˆ@$$5žà8VÏnÒÆ!Ó™Þ× ‚™UÕ­Óë|f­r©oK¹§m'¥äSÚL±2Nܼ{ŠMº>v_;›×®â‘šÓ“0s)wF²`Û5—\uܶ‹I+WžprZñ”x—Ÿ«ò¸\ªÇ¥\ÏçãbZ'>ذ:äuk?ßÛtï[Œ¸ø÷¬îqÌoOöÞÃüãœÙYMp1ÖS}gM1““‰Ý¡iRÝDè ¯%±gu°ÎEbï=ì6•$Áñ0 @€ @€ @€ @€f( Áq†€š @€ @`qäÆÓоò•ô©Tqbá©MIX%å9MV;xï÷—Ûûªöb%w÷Ǻ ŽoÀ ~Îu²ÒÓßÚ?*©ä2ãû7l|b¤-þ\$0nˆá"¹¶aÐÎNÓÅ¿gÍŽËÜ÷jï=Ü<~Ÿ?¼Ä @€ @€ @€ @€3à8=m  @€ °Ö¬Ys\$>´aª÷Þpà ûbæµ:òO@Ì©ž—$ÍžÞæÓ/s)'v0Wc]íï˜íln“b#yñ®¦ÓTSIŸîíX38øðՊߌö?“s^”ï³Tö¬éÞÃ…ÖÎÞ»Ðîˆù @€ @€ @€ @€XZ‹òº-­[`5 @€˜]ú„Žo¡Ä] ÿÓÓÁçåd­j,÷¥¾ÆÙímŒ0ç%•9½/¥NËsÕ~™qÊã·ÛGL\{Á…›.ê©Ê[â´Æ“&Žèbi)ã%§¿‰dÌwʼnv÷EÜåÝê} íYÝ"™ï~ì½ó}ŒO€ @€ @€ @€ @` 4ü“º%¼rK#@€ @€À1"°ïßh>Õ0ç«W¯^ÐÿœHÁ¼­é–å’Ò3õc}ãÍãætÏlŒ­Ï™ ÄiŠ'̬‡©µÎ©jL>ŒSožZ¯) \¸éW«\¶ÍZrc$4F2ã§#úMu)ÜQï=uôÊÍÏݾù­U©ßÍ©¬g©ìYSYóB޵÷.ä»cn @€ @€ @€ @€¿À‚þÇ«‹Ÿ×  @€ 0ÿ{öìذéÛ9§ÛÍæøãÕªïj¢R»ñ¦\—Ë­‘¼ÕЬ4'6ô0êj<”Î8+%Kpœî,·‰'ª1á°«S¨êxÏÚÿ÷¦â8Õ)%8®Úô†˜ãË›ßÎV mßN¹|6l> ŸH¥þè]·WŸ¸þúÍwwÖÃÌ¢–Ìž53†…ÓÚÞ»pî…™ @€ @€ @€ @€X‚—àMµ$ @€)S‰ÄÅÜ6Á±ZÙ» ãtÆÛšòë4?'8枪9±2 ŽG>˜ ຤2§ ŽÝ>Á±ÃÆÁ`ŒäÆ©"‘q_Îå3ÑòSq:ã§êºþôþT}æÝ#›ÿ}j½•eS‹oŽ^ {Vó*G„½wqÜ'³$@€ @€ @€ @€ °X$8.Ö;gÞ @€˜‚@œw[œÆvF»&Ëêö íÚÎE]$/ÞÖþÜ»8/§‡ÎÅ\Ž#€NiJ¾Œ²oÙÎõÈs|‚c*§4´Ø³¼£_08üèxæÿl*Š‘Ôø±œê«#±óÝ£õ¾O¦‘‘ñ©´Ÿ(¶äê䦳U'j×®l)ìYíÖ·˜ê콋én™+ @€ @€ @€ @€Å' ÁqñÝ33&@€ @€À”r$65ª{ËwGÌMqóW_nmJ +%?i>æÉb߯í‡Îå‹íÔΓÀés:nÉOnzTî¨ÆnêdNËzÒeñNœÜIl$Øþ¿±zìeWlûtGñS *‘´™Û?ÿ9ç¦üäÃF\{ÖaKZÄöÞE|óL @€ @€ @€ @€À‚˜Ò?.[ð«1A @€˜X —Æ„©*•ñÂ(Sç¾Ú4“kX½zõœÿÇ|"·ë›æ–J–àØˆ4÷‘H÷˜µk/zØ\Œ¼fÍšã"ö‰ cÝûþ‘‘;bRëôÆ\Òº¦¸V}©Ókvn¿üé³’Üý‡aœJÙþ'GNíï –ÀžÕ^dñÔÚ{Ͻ2S @€ @€ @€ @€‹Q`ÎÿÑçbD2g @€,z’>YHµ]GÎß×¶¾‹•†·Å‰oOÍ¥|©¤ô¥’Ó—R)_K_ê©ó—víÚü­#‡»õ–¯}êÔÓθ;Ò©Ž?²îëœWœtê­S?õ@Ù,ùNBenL-µgùVL»ûe+úžß?í:lç¹q’aû¿‹)ést§7¾(Þ…ž¦ØRÊ›Fwlþ¦¸™Ô—œOn~cÌ4Ç[>•Ïس¦²Ü…kï]ÈwÇÜ @€ @€ @€ @€,~©ý×óÿz­€ @€ǤÀx>Ô´ð’æ.Á1Òþsd;YOω„¯—T)ÿ~•«}}ÕG«ãò7†6ÝœKÞÚ³gÏXœ‚øá¦uôö•§6Åt³þ¡Ô“£¿åM}îK÷~¦)FýüÔUn%8ÎÁ§j<é3¯êd"qzãO7Æ•tóÞ;¿ñŠÆ¸ÄÉ©g5uQJžÒßA-…=«Éd±ÔÛ{Ë2O @€ @€ @€ @€‹S`Jÿ¸lq.Ѭ  @€ @à«_¼ñãqBâÞ¶¥<áü¡¡ÓÛÆt¡²¿øÔœÓYí»*ŸK##ãGÆÔ)ýÝ‘eG^ÇAqY6›×UÕ×þdÌ«Ýñì~q÷ÎÍŸjÓªëë;ùŒ¦˜õyìýÅÍ èQyùOFóÆL#Ù²o*Ã,™=k*‹^ȱöÞ…|wÌ @€ @€ @€ @€À¢ศoŸÉ @€ @ s’ëÆäÀH‚ÜØyÓ‹¬ªÒ˜DYç4á\Ç÷Ýùá8‰ò¾v#·5ûr~q»˜nÕyö¹/ˆSùÙØ_þº1FÀ| TiYÏðlNà¸*µÞ­öSšOolÍq¼·t”à¸ï®ò/³¹¦}WÕó;cY‡q„-…=ëÅ<øå¨“i¬Šo¥á9,xî.ì½sgm$ @€ @€ @€ @€Çš@ûXw¬iX/ @€–°ÀØx¹¼Ä§íszRÿàðµ™Aeÿ𩹤çµí¢”½ûîÿ»‰bvïÞýí81ñ]ÕVV¥—ô÷o|Äae]¾X½zuoîɯn춤;\×'`žòY³fÍq³1‰uëÖXrzYcßelWcLäTŸÒIܲe½³ú÷> ;Jg‰¡9M9Áq)ìYGÞ§RòØ‘eG\¯8âzA\Ú{Äm0  @€ @€ @€ @€KR`Vÿ¡Û’³( @€,RÝ#[?SoL´«ªüæU«VõÍÆ2ó²üë)çö‰N‘ÀxÝuÛîœlü±ññ×OVw°<§|j^VýÙÁëÙøyÊ#Î|Uô»ª©ï:•w^ý–»›âÔϯ@œÄyöŠþÚÙ˜Eþ~<“i×wdß2ºcÛ‡ÚŬ«JþæÁïí~V}å‡ÚÕϰ.c]vyÎéÄÎúixï'èd©ìY‡.-ž³ý‡^õ=çžsÛï‘G5š›{ïÜ8… @€ @€ @€ @€À±& ÁñX»ãÖK€ @€À1-PêúrúÞ3ûäW6ÆM1`ýú‹Ï‰d¨ÆSìöï/Ú®ë«G¶}:¢|o»˜V]ŒuÁÀÐÆMqÓ©ïßpé÷ç*5&ÃÝbæ›§3†6ó Óÿ¸`ðâ§tsäûODýo}ÖõÿŠ˜º1®”ËÍÄ•T=µ“¸éÄômúåœóÓ;m'·.ï4öи¥°g¶žTöz=Ñ÷Çîë=y¢òù.³÷Î÷0> @€ @€ @€ @€¥) ÁqiÞW«"@€ @€À„»F¶¾/¥rã„•‡æôëýƒ—>ïТ™|_=8xBêëÝ}4œ Yþq÷Î-ÿØ4V$=5žâx R½¥›ëhõ¹nðÒ'ä\½;¾6¬¥ÿb×öÍ_h}óYø‘°×[U½Û×­zT7fÛ¿aããDÔ‘è7î›ü‰°Ÿ¼õë_}Ãä‡×ÜwWý•(iN†Ì©6NcwêÙ‘°ø;‡ÏªýUœPyRûˆ‰k—ÊžuÈên?äû„_û–÷v5ÉvÂA¦Yhï&œf @€ @€ @€ @€L* ÁqR @€Xše<½®ie‘Žu\UUW÷o>¿)¶©~õêÕËO­Vìˆd¿hŠMuùƘh%=ERØšbc+«žžÝ‘h6ØÛI}ët¿ÞžêosÊiŠù}vÿ=·þRSœú…%¹ˆï[±ìo.i¼Çíf~þà%OŠDؤœNkuõøXyÉž={Æâ¨¾öÚ+nK¥|ø‚I¾Äóÿƒgó¤?˜¤z:Åy`hø5ñNýUk˜R98¥øC‚—žuÈrn=äû„_KUýb웽VÎs¡½wžo€á  @€ @€ @€ @€,A ŽKð¦Z @€Ú ŒŽlþË8ÅñííbÔå¼,ŽûËHhúµs—5ÆO°þÂÏ?å´3oL9¯™ úˆ¢ò®;¶¼çˆÂI/ï­÷^§ÂÝ2iÀƒ}U®®ŒdÍׯ[·nzIVƒƒ=ë7 ÿ\OOïßDrã#ìzâo¥¤}%Õ]sÍ5÷L¡tA äü¸´¬çƒ ]òÌéÌ3Þ™‹{zz"6=²©}$¾åê[>Òwd}Óî#Ë&¾®~i`p¸âºÎKûû‡OÚôžxþ+ZMãï“ÊC:íðÈ¥²gXUN_?|uG_Åsó¼SO;ãÏfãôÍ£G›z‰½wêfZ @€ @€ @€ @€ 0¹À‚ü¯ÁO>]5 @€ Ð ûî¾íçûŽ?å?G"Í÷´ë/N³ký¹ñ>¡Z±é{.ÜøŠåÞ÷¤‘‘ñvmZuç Þ[–ýq$6ÄŸ’Ê·öŽïý…ÆÀCÞ32rsÿàÆ‹sOõ¾(nJ¸ªªœ9¯|èECí®[ÓÈõ×o¹ûî&þ‰T+ŸÙ“ÒÄI|O™8èèÒRêÿ±kÇÖO]£d±D"ß™=©÷ý6½3í{Íè趯5Í}ýúMç–Þò‡ÑöùM±­úxîoº³ÞûkÄ36žÞµ¬'½.Ê—YwÔu•vÄ:Þ>¶wßoîÞ}åUߦ N?}bœ¾úâ8±qS¼Ë§¶ m[•KzDŠ÷©“ýc¢Ž–žÕZWì ÿœsì(Ÿü¢³÷äµg>îÉÿ7BoŽ_ã9—“"yúÑq?ÆG·_¾®±‹Y °÷άn  @€ @€ @€ @€£ÑoÙ @€Û­“×­ÞÐ×—['Ç5&HE¢ãã#Yq÷@ZñÍ<4¼».å}©Î_OõÍißø·óÊžÇWu>7➉€ç¦’$~®ìH¹”ñ\ê_72òŽâ Ú5²å¯ã´¼×ݪÜ!5¸ïŠä wœxrys´û«H7º¶Nõ¿•ñtS}oýõž•ËRòø£ª’ ‘ψu\ëxøÄ½M\§J^»kÇ–7N\«t± Dbß K_Ϧ Ãÿ/N[ÜY‰ŸOåß÷§}·.¯–?2Õå1ñÜ_ÎÕEñ¬<%ž±Î–øçþ¥ï¹£³‡G]3²ù+‘¬ûrª~õ𚣯îOTþ¯}+–mŒ“Lßê´{_Úû¥V¢Ú‘ÑçwñÉ=Ë{OïéÍ«b%ÿ%Öÿ£GÆy.ñØç¯EìéGÖ=pY}kÓŠG_“ÒW(›Â—¥²g…ég;^vì=?ô`|<]QÜc­Óo¸á†ýÖÍí7{ïÜz @€ @€ @€ @€ÀRด﮵ @€ @ Àî›?508' æ+"¬1ɱÕU¤×<,~¼(’ÿ^”Ⲫõ+ï?ìÐ3# §“O+1*R£þKœvxU'ñÅŒ^¹ùFòÙi‘ÄõÒ‰ê',ËyEL±?VÔßÓ:ü±µ„ãFÆÅÁùüy°ªágdyýÕ¾oïߨ¦zÁ ”Û⦟<Ù´âÙj= OOoÅ´õ¾ƒù»÷?þ­òÎ?åî:×â¹wçmŽŽ¼uüÞß=¥geëdÅG];AI<÷1÷WÅóþª1ÿõ†÷ƺ¿'IÞïö##uîŒÈ ;a‚–“EÛo¦º~aªªÇD_o™40*zr$>O3Á±ÕïRسF·oýD$Wóþ½´µ¬)â9ì}ÌÙßsN$8~fÊ»ØÀÞÛEL] @€ @€ @€ @€8†ýç§Ç0ƒ¥ @€ @àØÙ¼«/Ï8¤4‘äó²];6_>áëÑí›_™’/£ÍÆgØ×ôšÇ¸aøêÑ+/þµ×^Ér>‹J äWdzsý\Ì9ž“›J©Ÿ¾kûæ%7¶æºgdä®TÆ×F’ð·§5÷HxŒdÞ'EÂÜO¶~N9¹±”¿½o<=etdëµ)²iUNÏlŠiª_{Vä¶–¶‰ M­úžÔó¤Nâf9ÆÞ;ËÀº'@€ @€ @€ @€ p,Hp<î²5 @€ @ À®‘ͯŸ ^ÿÒ&¬»U%ÝQ—ú¢Û/ÿÓnuIŽ—Õuý‚Hº³[}vÒO+a­®Ë3ã4³ßø8ÄÑg± üÿöî7´®³Žø9'Ù Ói]gíœâ Ù+eÖa'ƒþQ‚U‰KK7±}¡ ÁW¾do}å‹‚%iÖÝxinÚY2C¥™+CÙ«9‘¦ÅÖ¹n¥q&M“œÇßÅf¥M—ô&9÷ú9Prî¹Ï}Îó|žsŸ!ßþ"qvå|9ýD¬ÞïVtì){u>»ú…¨â÷J«îÓì+OiWô7Ûª>ßG?ñÈg?)§wü¦>øfûw.¯Æ \ìHwplöÞî{Öôå‹ÏijvGÕó"U!àøîbÛ{{æ½G€ @€ @€ @€ @€Àío'ä} @€ü«ý­¼šm‰Jp¿ˆJˆs+9å¸ÇKóùÌ#áCµVß§Qz1‚WFXóh«û¾Yau"ͤGõC¿¿Ùû®µÀËõúôùr*ª!¦gã_˃ªÑáñ©É¹ÇÖjçZ­rä׃'¢ëÖõ™V÷}c1c‹ØÒøIV¯ÿ¯bêøø¡ÿDhï¥Û/|•"?ýoîûâÂkË=oç=klllf¾œÝ–o,wþ)Ë+plÎÁ޻ܕô9 @€ @€ @€ @€= @€ ð®@£1øÖÈðÀ÷çÊò³Q•°Ñr–”ÎF…Å”S[Gkµ3-ïÿZ‡áÁ¿|"BXENídËï“ÒÕèw¸œ/¿•ËzCÿjù=t¸&Íc¬éÓe^îˆðÙß[2ˆ¨Ú˜•å×Fj_{nŪ‹6«NOÎmŠ"¢Cñ|Æð[|¤4ÞüNÅŸ—ýñvý\*§êë»z~f¸UÛ<ÏúvïþÖCGŽ~ýVm–z½]÷¬cÇjç7oÞ¼ýS}fOT¿Üóî*—ïëwu±.ŸŒöÍÿ¸lUž­¥¬‰½w)ZÚ @€ @€ @€ @€ ëæ @€ @€‹ ôí>ðð]ÝÙW£Õö{=˜¥|Cü\¯››ëGÊ.G¹¦,98'Ë|æäÑZíÜõk{ÖÿÔ¾MY^|%OÙö;~<åÙ†<¥û#Öu}di2æ7ó˜ˆëYJsyyúhmè×Û8kþ'÷ÿ (Šgo™²ï6†ÞªM„}ÜÙ·ã™ø|Ñ>í6¾ç™‰@cÊó×ã½Õ¦ÓÙlylddè­ú[­ë}}}÷=ë/RöXVäXN›âÙ_ÿž±Ç`"¸;ßç7ã—DoÆ÷öLÌãDTr¿\­á¶ô>í¸gíܳç£=YÏÖ¼È6¦<},Kņ@ùPž—¢Òc3lz6•ÅÄL6uv¬^o†¬ãR{öÞöX'£$@€ @€ @€ @€ °Žk¡îž @€è ¢¿ÿýÙÝùºùéÙË““ÞZÍ -$ÌûûÜWs÷]¹’]<~üð¥ö­«5hEÀñÆ)lÛ¶­ûÃ7>Ð=[Ü5??õöèèh{Û'lÖ >æ÷ܳ®Hw÷L•Ó[¯7Ÿù¶ ËݸKxÝ){Ö¦\é¦öÞJ/Á @€ @€ @€ @€XÇÕqv @€ @` V"à¸ÓpK @€ @€ @€ @€ БEGÎʤ @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦€€cg®«Y @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦€€cg®«Y @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦€€cg®«Y @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦€€cg®«Y @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦€€cg®«Y @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦€€cg®«Y @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦€€cg®«Y @€ @€ @€ @€ @€ @€ @€J 8Vzy Ž @€ @€ @€ @€ @€ @€t¦@wgNˬ @€ @€!§§”YÜ¢|gñ÷½K€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€d•Ö0IDAT @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @`-þ iX'ÂréLIEND®B`‚rocksdb-6.11.4/docs/static/images/compaction/part-range-old.png000066400000000000000000005032531370372246700244170ustar00rootroot00000000000000‰PNG  IHDR8ýOš» sRGB®Îé pHYsgŸÒR@IDATxìÝ |\W}/ð{ïHŽåì !!H!@³SÀ 8ÁŽ%1¶Û2ÊÖBËkytÒGúJËR –`ÙNl–0„Íe   „­°%,ÙǶfîû;¶23w$d[þÎçãjæœÿ=çÜï½sͧÊÏ'I¼ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€‡©@z˜ž·Ó&@€ @€ @€è[2øâèÑ­ËÓäÚk‡ÞÚªFfŠ@ß’¥K’Ò ZO-­U6­]½µU>pNTÎq @€ @€ @€éèšþ)ÍH€ @€ @€f’@úÜ4Mú[QükƒÛ¢_À±’>fŒ@ž¤çdiúŠV'”åÙÕÑ¿µU>pNTÎq @€ @€ @€éȦJ3 @€ @€ @€ @€ @€ @€ @€‡»€€ãá~8 @€ @€ @€ @€ @€ @€ pº)  @€ @€ @€ @€ @€ @€ @€Àá. àx¸ßΟ @€ @€ @€ @€ @€ @€Ç€nJ @€ @€ @€ @€ @€ @€ p¸ 8îw€ó'@€ @€ @€ @€ @€ @€ @€@@Àñ ›’ @€ @€ @€ @€ @€ @€îއûàü  @€ @€ @€ @€ @€ @€ @€Àè:sš’ @€ @€ @€*×jßÎKéßµ:½¼–~µU¿> @€ @€ @€ÇÃã:;K @€ @€ @€Ó"°©rÙ7b¢ú/ @€ @€ @€´ÈZöê$@€ @€ @€ @€ @€ @€ @€L€€ã ’ @€ @€ @€ @€ @€ @€h- àØÚG/ @€ @€ @€ @€ @€ @€ 0ŽS€jH @€ @€ @€ @€ @€ @€ @ µ€€ck½ @€ @€ @€ @€ @€ @€ @€À8Nª!  @€ @€ @€ @€ @€ @€ @€ÖŽ­}ô @€ @€ @€ÃE a'z¨ÿä`ºÓZ¦ê6=Ôï—}]fÒ¹ì{^Óõ~¦Üïîƒéºc½yfÊ=~èÉ[1 @€ @€ ÐP «a«F @€ @€ @`Æ Ì›7oöq÷=cašæOˆtÃý“4¹š'÷Ï“ôÔ4M²<ɯöë’<þ¤é5I­öñ_äw~òªJeûÁˆ1¯\>긬穥$=3Éó3ò4=#Ió3b­ñ99%ÎåÎh»1Mòó$ùMüüZµ–})Ïv\uÅÚµ?=˜ÎiÑ¢EÇe³yE¬û)±îsb½gÆõ©_‹«cõ?ßMGG?±aÃe?˜¢u§ û—ÏíîJ.ȓ윘û¾áur’§÷M’ü¤¸GFcÞ_Žs}¼¿þ®{¤vU²3ÿèððê_NÑš:6lÿ%Ê»KψóyLÜë÷‰Ÿ'ƽrBœÇ‰1É q¿÷Ðöû‹,Íã¸Âk—!Ù˜$£o^·æ+ûŽ3‘÷òê:þäÓ¦Iú¼p¼ ~Ïçƒñgï’eK³´´ºÕÚÂâU-þîluhþEåKU*u­Žï× ömL“;F«ÕeWTÖ|k߿ɼ?ØŸé û!úõEç¸k4¿xdê¯Õu²¿o`ŇÓ<H³1ãž¿å†ë¯yÂt?÷›­G; @€ @€˜ivpœiWÔù @€ @€ @€ÀŒH“Ú±IšÕüÄòú.t÷z]Ô¿ü ¥®t(hçÜ«³Í†R͉ÒE¥4]ÔŸôüy²xùŸnX¿êãmÞ±²ùó—3瘮¥±ëÞD¨çÑ»žÄ?ã&çÆÿœqì[ú– ~°¶ã–¿štÐ1ÍbWÀ¤éuŠ]ÕŽÝdáÂS»ç¤õQœO['ùÉz˜¶«¿o`ðÓÕ¤öú+Ö®þâ¾c¶û¾wÉò¾FýÝdîú\q|}ásc¬¹ñöõø½ttûŽ¿Yû‹v×Òɺ¾¾¥HºK¯•ýA¬¬8d4ŽÉï €¦‹#4µø„SÎxcßÀò·\óƒï mÛ¶m×8†wéürù¤ž¬çocþßÛdV[wJëYºcœ§&IöÔ®®üoú— nLFÓ¿Þ°aå·[Ö~ožUNÓRÓïB}¤í÷}hgôÃýYXftõñ渷Ã+kÇllM|Oë¿~nþܸ_ß·xÅ‹‡×¯üì¾uä_î×/]A½öˆðÞ#³RÁÑúî°ûö4zŸßž×’ 7 ÏôS#ž7±érúÀF*{ÚººvßßÓp¼¨¼ôaqÉ.ŒuíY½æÉ¿ 7Þ›E  @€ @€è”À„~ñÕ©ÉC€ @€ @€t^ ¾‹SìÚø†7~>Â,7Þ{eéyI–}, ­mîÝ?5-æ»dÎ1¥ŸGhä=¹+ÜØ¡©"Ï0'Œ^‘Í>öÛ½K/êа…ÃÔ›]=³"(:±ó‰@áùY-}dáDc .Xöä¸~_ÈÒlCgïÝÕ˜/ŠÖâþ{ã¼rù¨1ÓOÙÇ‹N‹ ê¿$Ý]?Œóú£XGGÃc1˜¥Iöogœ}Þz˃OÛß©ÏýKV¼|N6çqN/ðͬN»gœ7n¥´/éN¾áÔÕ‹ÎÜÓ7~Τgaý^¨·âн ®MG~Ç÷ñÙ‘4ÝÚ?°â½õgÒL¸æâ"\øð®îäSñejø 쿦ü–¼V}Îðú¡Ïìß>±O‡Ø3=6 Í?Xx¦i²¤þÝ-¬ëPAWZª†[¾ªiueË @€ @€ @€À¤:ò˯I­ÀÁ @€ @€ @€@Ç”—?ðÄSNÿ|š¥Ù¥© ¤éüç}¦¯oÙý:¶ðí'-YñOY!¬) ËEÐçÔHçmŠÜûb)Sýû“4›kãú<¼Ái·Õ”çÉŽ·W×µU|wQ=tØ•”¾–OÏqã­Öœ¸ÿ^BÖóŸ}}—œ1ÞãÇ[ß¿xù¥üˆ«ÃóSl»¶¸7ÏÈJé§ûʃÏÛ7ÉÏiÿiw¨7ÝçÏIŽÛìðz8ui–ÌúrìföˆfE‡RûLzÖ¿»õ{¡þÝêô5ˆïM<þ’—õÛõõzP¸ÓãÏôñvïþ—¦õpã}ŠÎ5žÛ7U«Éï ¯_ó…¢ÚvúÅgz¾«º2W«ó ËûžxßӞݪ¦ƒ}Yž%K[ËýQì–ü¹V5ú @€ @€ @€É Lõ/è'·:G @€ @€ @€m ôöžØ¥Ÿˆ]ŸÐöA,ŒLÌ9ɬҧ/(—O™à-ëí]~ßN9ý“½ù“–…îŒóziˆ’r¹Ôá¡÷!Ê?Šáü½ x‰¤‘|ä²Û9tîܹÝ}K–¯¬‡Û©ïTMXž›vw©oɲÇtj̱ãô ,_ž§éSü;W‹Ï³#»¾wÉŠßoQÓ~WÜ{q^6Ý÷~}õ`QWÖµuaÿòǶ¿àƒ¯r= ³úîŠÓñÝgʃºòYŸ~^yðþß=8WÔßñCK¥®O׿7E+Ì“ü·££ùù›*CÿYT[Ô(?Ó‡‡/»&¼>]tŽI–.+¬é@Á¢¥Ïˆõ< åPi²²e¿N @€ @€ @`ÒŽ“&4 @€ @€8ðç•˳²Y鯕=xºV˜‡Î.õ¬éô|}K–>.="Ý¡ƒ§wzìvÆ‹`ÙÒþRÏeQ§ØÙWÿ%gÇ oì¨y­ÚVàâYåò±gœ}ÞÇÒ4œìœ:>MNI“ì?ê»,Nèøõ¬xu’§+㞟šJ[Ì=¶+Ö·MòŽ… NÛ7ÞÏ}¥ž¿ŒÁÆ{\ÇêÓä„î®ì“½K–ŸÛ±1§q ™ò,¬ŸGÿ’kƒîeÓÆ—¦gÍÊ’O§Y×”îÎ;mç3…õ.<'ïîŽpcrrÑ4nüUµZ}ÆÈ†¡¯ÕõÏ„gz-©^Ztžyž^4¯\žÒ›ëkˆí[þݸ{·É£CEëÕO€ @€ @€LNà€ÿÂwrËw4 @€ @€ PxH6ûýÇ{jµøö¿¡Œ_äIúë$ÉkqÜé;#Þ?`¼a±!žAçm\7ôá6æ.,Y40pfš”>ÙÂ# ‹÷)ˆÉ5IžŒÄy}?Î躵ÔsKÜcñÜmüŠ{uÎñÙœ¾è²p᳟½üȸŽýW°·uk}×ɽŸ¼!@€ @€ @€¦D@ÀqJX J€ @€ @€¦Q O_ÿ‘þiÍfܽQšlJkµ5·å;>óñJå†Fµóç/=föÑÙ ²4}E¢Ò¨¦Q[„<Þ–”Ë[’J¥Ú¨¹qݪï6{\Í1ÞóÂuÒ;Æõ\³uëÖÑ¢ÉÏÉzÞ!‘¶ÃŽÚ4:Z{óȆU_-{îܹݧuÞ³JIú¿#5¯¨>îÏ®¤”_~á…—<ú#¹ìÆ¢úVýõOR*ýS«š=}»ïý$_9Þÿ7¼~Í£=òFí½úûWœ—wå/ûe0ÎñØvŽŠÚç/ì|dìÔöÍvêÇÔ¤¥R¾2B?…¿¿‹°æH’Wß4¼nÍWÆŒ±ïÇ/ìû¡¯ï’3’îðê â>œµo_£÷qï,ˆPê“:Jm4GÇÛfȳ°wÉŠÄó¢ýpcž_‘çÕ(ºÇ,X0§ûÈãÄ=ö71þC›ùÇ}|b³¾z{šÇÝq˜¾bÞ奬n,Ü­µ8­UÏÙpù;Á5SžéWU*ÛûbwÒ¸‹^ÒÊ%ú—Eÿ”:!‰eëL!þ!ˆ¶Z£> @€ @€ @ 3…¿ íÌ4F!@€ @€ @€¦J BÍÃI¾±Z«¾îŠÊšïÍ¿eËšØQ)ygüyWßÀòIž½/Æ>¢è¸¯Û›õÌߘ${ÄEÇ4êâÒ¶C=yþÅØ}ï•ÃëV­ÑXÚ6®[ý_Ñ>¯¯<øü$KßÞÊm÷ñ±Óby>ïŸÔh¼q·¥iìžÖì•ß;O¾7OómµZíêjRº¹;ÉÏŠ`Ý9Y–=&võ[¡´£êGWG“•ÍFÙÓÞW^Qßañ÷|nù3O¾•'£/*Ëí7ĶmÛvÅŸ-Ѹ%ÂXƒY’üßXëIûù¡©3Ž8ºëߣ9B%SêyuÜ'gŽç?Ž} Ÿ?ž{dß17lXùíøüª Êå·Ì.Íù·˜óÂ}û›½ï*%Ͼo6ëoÖÞ[^þŒۜ׬¿Þ^lFàæE±cꥭêõݽ ÙK”ß<+‹{¨`j)ù»k^£ñƶVßé…ÏÂg•ËǦiþ÷q/Ç9ý,nˆ?Šû¡¾ÛcákóæÍwDѺ o|h6û5±»íëâóìÂì¨ï0œä]nLPHÏŸZºóü‘ÊÚŸÖ¶Q0óžé£ñ ëjpÌóóûú–Ýoxxõ/Û I¾;¼Þü¸<¿íöó Í ô @€ @€ @€ˆß7{ @€ @€ @€3M B/wDê†×õµnsþùðÚU¬¦Õó#@ó«1} ?F碆m6ööž˜%ííÊ1¯Õ¿½þÚ§O4¸6\úPœ×³b/¿›‹–©'ö-ŒàÙÔ¼â&µü­Õ;oyÐðú¡¿ŠÝ"‡ë×ëÕ¡Ÿo¬¬úëW½ÃÚ•/¿¹¶ý±]åkbÝ#E»^40pZš%íàò|ø·µ;ž<žpãX‰ëV廪¿çR¸Óeƒz{/ÿñc´ûyáÂS³<©‡²Z¾b-ßßµ}çïNôÙwðV*× ¯]Y§¾oßö¦ïÓÝǦÝÍ:Ò,}Q³¾½íyþçáÝÞµÝ{Ðþo6W†®­î¸¹·n´Ͻ?ÅõzzìXW&½÷¡MË¡ö,<:ëù?á~ßbÀüÛwV·?®Ýpã¾ã}»RÙ¹aݪ7תùùásÓ¾}Þ7ˆ`ðéY~Äg"h~zãŠ{Zó$ùA<Ÿ¶imgÂ3ñ™ç|)œ®¾G­Á»4-¥ÝÙÅ z&Ýô¼òàýãg8¿Õ@±¾Ê•W®º½U> @€ @€ @ 3Žq4  @€ @€8˜ÖÎ^7ôo“YÔkW1©Uû"ÿëWrê°â-Çš “ÎÊß^´`ýÐXÊ»7¬ܺuëh“¡ÚjŽPÐ÷kI¾¬­sK“×´5èø‹jqÎ+6¬úóM›6µ }²R¹yãÚ•ÿ7«…AÒR~Ä;cÜŠ–WõÿmX7ôü­•ÊmEµEýõ¶F·ïŠ h^ßõ°å+ËÒ·ÌŸ¿ô˜–EM:»{º_¼g'Ë&%õ›dg­6ºdddí/šÖL ãŽ›G__„Â1ë;šF(ðAã™bÁ‚s⸖;[ÆõÚ<¼~ÕÛÆ3n³Úúý–ŽŽ>/B¾74«ÙÓžwuµ í©;HRÏÂÞ%ËÑ6Àù×wÞ¾k^=|;÷•¡«FG#ä˜ä¿™Ì83ýØzÀ0v=Ý÷3‹Î5þNùn²³úôáá5?+ªm·¦>Óã¾û`¡Aš,+¬™@Awi÷¸-ÿ[‰¼–¯œÀÐ!@€ @€ @€& Ðòÿi?ñB€ @€ @€`Z^}ñV}¹Ë^¿æ 1Îê±Òä”…ýËçÖ5(è-þnšfƒ ºökŠpÙ'"´ùÊh, \îw`“rüpßФ{osLöÜ»·¡CojµÚË"°xY‡†Û=L}·½á,,3‚%Ÿ¾áúkêAªŽXÖç¹üúÚŽzX*¹¾õüé1sŽ)½¸uMãÞ<É ž|ë¦Êeßh<ÂÄ[·lYsKZ«¾ªò,×ýÒ5ç¸z rvë±ó‘Öýãëݰá²ÔòÚÿ):*ÂËÏ,ª9XûµgaX¿3‚®]-=#”Þ 7o¾¼#¡ÄØöëÕ¤Vÿ^Å&±^cê;ý•òYõ8¶ïÞŸóoæ;óyõÀ÷½û&Ö2“ŸéÕ;v®Š@zµµLúè‹ÊKÖºfü½Yž¶üßñ÷ØÿÄ.ÊŸÿÈŽ @€ @€ @€&" à85Ç @€ @€ @€ƒV _¹qÝê5\ÞèöÞÎN‡¥®ì™7ÍŠwhŠÐÚ­»ªù„Bq­ÖÃ7Ź}¿UMŽ"w”†[q¯¾<Ùºqýª÷ß«}’ ±ÛÞŸÄ-ÿÁ_ì¸uôù“ݳÑR7n\õ«¾¬Qß¾myš¼*)—Kû¶½_P<=®ÄcZÖåùm;ný§–5“èܰ~õ†¢ûe÷ð¥ô”ñL“åÉéEõ±ÝçgŠjÆÛŸŽÖ†‹¾Ûyš?#ÆMÇ;ö¯?´ž…}K–>15uë–¯¸¯ìd€®>Ù]»õæÿÐrâð³¯oÙýf•ÒÏÄuyp§¿­¶#9¿þ l£¶í’™üL¯ï²}¼£TÊ:º‹cß’e‰'ZëÐd-ŠuÅò¼ @€ @€ @€éhù îéX€9 @€ @€ @€Î ŒV«ÿعÑî©BˆwW›%µÓŠjô×à ´ï×éª×n® ]»_cg>DÖnw¡åh±ÛÓïµ,gg­V}Ë8),N¹|BäÐ^PT˜×jó‘\vcQÝDû#¸)b!Ÿju|\ô3¥sú[Ռ훕ïL÷Ò¦òÜêkŠ{±e v÷ºkɉc×ßêslWpluüDû†‡×ü,Nè+c¯‡c—ÏŸÅÿýl\ËÍ^xÉqcköχܳ0Í CÔqQ6wz××=×ñŽ[ý†Ès}{ÏçÃýçåò)iwö™ø¾Ÿ]l‘¹zçÍÏÚ¸qè·ÅµíWÌôgún‰Zri±HzIÔÄ¥èÌ+vn½{c<øvåÅÿ» 3«1  @€ @€ Ppt @€ @€ @€™"»^QYó­):/›'é}ŠjÆö/ì_>7R §Žmßÿs~ão¯¿¦ÄþGµû)ßU]½;ÏÕâ€ØÆéwŸT.÷´(i»+æú¯•ÕŸhû€6 ,õ¼,v8œÓª<ÎãêùhUÓ‰¾¼–¼³hœ,Ë_YT³o¬ýyû~nô~4©®kÔÞɶ#þ¨h¼<ÍŽ,ªÙ¯?Ívì÷¹Ñ‡»Î}=þÜ; ^wÅÚÕ_Ú÷˜)yŸçÿS4n–çã»Wjµâ1ÓüõõpNÑÜãí߸nÕÚ1¾jú¡wl\7ôáøüÝ©ü¾w}©?Ôž… ËËΊ/ðÃZëîpreèó­j&Ûw]íŽJ|¯:ԛ캦óøùåòIÙé§Ó4=·hÞxþ|úÖkóGFFn-ªHÿL¦×M¾]©ìŒg÷eE>i–/+ªi§ÿø“On|×Nj];uÿ[ªõ¼z  @€ @€ @€Àá+Ðñ_‚¾”Μ @€ @€`4ÿÏ©ZA„k~^8všŽ/ÔÆÎL ÇMª—×L²¢VûÇ<Ëþ;ÂŒß˳ÚÕÙÎä{ÃÃkêç§ÞÙW-ͯêìˆIòìg/?2‚IŠ[¾FótSË‚ÎuÖb)ŸŠáš‡RÒ´t\iÖ¢¦^WüŠ`f,þ'QXÿs¯cêÑ,Nñ+M ÈqòÝãYE­´ë²äˆ–‡ÔW'œ|úê¹sç.ß¶mÛ®–Ň{ç!ö,ìβÂç`šçïŸêËzU¥²½`ÅPÌ󪩞ë`¿·wðÄ,KëÏ–óŠÖ1|ì—Õí}W]YÙ^T;‘þÃâ™~7ÌèhriwwR´£ïóçÏŸÿÊúN£ñÜsLìÞ8¸ç}ãŸùí7TïŒà¼ @€ @€ 0vpœNms @€ @€ @€©ˆÝcçµïMÕðY2zKÑØ±#ß좚}û œÇGîÛv¯÷±+ßðº5_¹W{‡†+«?RßÅnãúUÿ²iíšOE¸ñg1EÇÃõe§»:p<òØä €+ú‡-·m® ]Ûaº¦ÃÅîf÷ !Ž-.åÙSƶMôó•W®º}¢ÇŽç¸<­š"Ü9®ßÁ]±vm„icÊ‚W\ã%gœ}Þ§•_Pzøv‚ÏÂxÐîd;Z«~l:.j­ZžŽy¶9žS.ŸÍJ?©ûG­-Âä›·ßü«Eõ@hQíDû§gúȆ¡¯Ç?.ð_­­Òã{Ž:é‚Ö5­{-Zt\|׊ÂÄÚZ©ÜÖz$½ @€ @€ @€@§ÆõËÕNOn< @€ @€ @ 3ñí-Fš²Ý몵¤0|ó—Æs6YÒýÌâúüëÅ5‡NEc~2<¼ú—^qZJ ƒ‚ ùj§çm5^œkó¯»Õ¤//ÍjcÞq}b¼Z-O.mcÜú®§¿[*¥_î[2ø‘E‹—ÕƒYíw¸ÔjÏÂg•ËÇF ¶àû›ß~EeÍÕÓq «;nþZ<+‚ñðy]xá%Ç™ÍùD|¹UtÖÜþÐ5?üïþÉî$X4ÏáöLï@áó/ÍZì\ý¥ÙÇ.Ž]Ž[n•›×’•m ¥„ @€ @€è°€€c‡A G€ @€ @€„@üGû?œÊyKÕΓZú€¢5çiþ¢šCª?šaìžù»EyšNé=2vþëjwþ¨((•'é“⸃þ÷UsçÎíî-/?¿`ùÛ³4}÷ØsíÄç°xWŒÓvH9vs¼ ”¥«ŽÊz~Õ·dÅçû—,ÿ«EåscŒÈ ¾¯CíYxtvÄ™Iš¶ ÄFÚ°ônûÞ˜ÌÕ¹5Ö3e»OfmSql=`:ûè®+ã¾yLñøù-·Ý¿`Û¶m»Šk'Wq¸=ÓwÞ±kMˆµtgä…õ]'*±ÝÁVÇîþÖmmU£ @€ @€˜ƒþÆSsÚF%@€ @€ @€3M ¿y*ϨV»ý¶¢ñ#@7¾`UšÜ·pÌ|w°§¨ìéÝ÷®ŸŠÅæm„s²ZuZŽWU*Û#(Õr·Ê¸cŽ^¸ðⓦÂdcf ËËÎê_¼¬¿ñà?ÔÃgžýð[²Rö©Èb¾:nòMb즇nZ·òGIžonZЬ#Âqáø”$ÍÞT*%_íüUÿÀàe}Ë_°páÀ©Í›¹í‡Ö³0ËÓSŠ®E¯¶Õt²?M¦&ˆÝÉ5vd¬<=ö˜RÏ•‘ ~l{ã¥Ç}\úúöj'Wu¸=Ó7o¾ü7EÏ¿xΑÎ:º<ÙþþK´û9ÙâàE÷aµ{i ] @€ @€ @`Zº¦u6“ @€ @€ @€S";Ý2%ß=èŽGÖæÌîô éÉÅ#æß/®9t*ò¤vã¬6ré …ãfÙúV¼¯°®“y~b„[ŽXê骯}J‚Ÿ­&^°àâû”fw?(Ö÷àRZ{P|‡A§ó"Üun´õì9¶õê÷Tuæç®í;_ÑÝ3ë±1ÿý':bÜ ÷‰c/ŽŸwÏ9"é_²â[‘ع2Íó]oÿì·+•ûP8îP{Ö’ô”‘6MZ…;}]òZòã´pQžõŒ—&¯‹ïÉãš9Mþì¢þ坨°êËã:n|Ňå3=¶(½4n»¾VTñï(,‹þ÷·ªiÔ——º–·z–×wÎFGW6:V @€ @€ 0õŽSol @€ @€ 0åižLiÀq*N Â…;8ŽÖ¦6¸9çÕjÌ<ÉnhÕ?‘¾E‹ǵGJoð˜ÈÜ…Ç„ëÇ×ÒüøÂq&X0¯\>êø¤çQIšœ“fùYiž•¤ùYyžžK;úža³(ÙóºçÝž–éú92²ö ûtw矋°å‘™7MgôˆMþéCò9·>tÉà'ò4ß¼½zçG¶T*¿îÈÑ ‡Ú³0K“ûòÕ¦vWÊ{Ï_ŸïÀ}©i‰gÀøÂõeÄŽ©¥®äÒyóæ=fëÖ­wNÅÊ×gú×_û±O>㺸õZíjúÔ¾¾KξìšqÙ§ÉòVõtýü† —ýO«} @€ @€ @€ÀÔ 8N­‘  @€ @€ @€À´ Äm‡\À1Oò“#TÐÒ¨ktfÓ)ØÁ±Ú}ôñ¥–Šwg©št,à8wîÜî3ôðùI–/ŠðÒã#tnœýÝáϸ×vßnq×µ¾í(ØÈ†¡¯÷•W\̵±Ð½;IvbQqÞêLûB oNÖSÝ·Æ÷°’ïL>´qãÐo;1ÇãP{Æ®u…;8¦Ù4?Ó4Ž^ÍbÁs?ùô7FÿÿnV3™öÃõ™ÑÑÞ%ƒ«"nþšf~aŸæ]Ý—Dÿß5«ÛÞ·xéSâ°mß÷s-Ï?¸ïgï  @€ @€ @€éhã_óÞ™ @€ @€¿@ž¤Õñu H wp¼öÚorÁÍ–ªÕÎïàXJ²ZÎy°w–ÒI¯¿wÉòsû– þËgŸ÷Ë´”\a–ß ßyqê‡äï†++GªIúˆ<Ï?3e—/v¢‹Àç3Ãê}Ù¬äýƒëú–,}â”Í7MjÏÂÈÚ¶Ú­î.µjmZ‡zÖù¦éÖèè4ñ`ù_Sõ}9œŸéµZõƒE*ÂßK‹jöëO»÷û<æCž'wTI2¦ÙG @€ @€ @`É_êN£© @€ @€ @€)˜?þ±›ÜqCß¹mÛ¶]5‡TwžTwvzÁ¥4ïØˆ^[;ã嵉ïà8¿\>)‚ï‰ ¿A½WÄÏÛ™óP¨Ù´nå†× !Ç?ˆÀÙo¦tÍi:+¡‹“¤ôž%ËW^P.‡î¦tA‡ÏàqÏž\t¶µ¬4­ÏÁ,On/ZÓLîïÛU…ç·; Üué¼yófÖŽ³àp~¦_QYóðÿÏÖdéy ûݺæ®ÞúÿÖˆwñlkõÊ7ŒŒŒÜÚªB @€ @€ 0µŽSëkt @€ @€ @ @í¨£ŽlмS$»öoð©‘@(ݨýiËbÿÀ ¼-^qñœRÏ#ØøòøÓ5!Ú?$Ï«ºùt=lXË“í8ùÊ9þÛ ×]{Z­–¿ ­àÕ$¦ Çxeƒ=¥žïõ¬xÉ$†rh›yš†KyuZ¿ãÕ$;ªÍåÏIJw¯zJ|×?Ztr±ûæCO8ùŒ7Õ·ÿp}¦ïqÊkù¥{Þ7ûYêJ–5ëÛ·}ö1÷YXô)ÔÒêÊ}ñž @€ @€˜~Çé77# @€ @€8ìvüú×·"¤iOìŽ5µÁµÂEüÞ8´wžªÕƽ«eßâ‘¥ùšØuð˜)¹BhLòä[rzw-ÏÝ\Û~b„žžYfyíÆ)™³Å [·n½sãú¡•±†'ç;w™äùÿŽÔWb}SÞíú¯ýKV¼¼Å²tuB Ïo(¦––¦æ>o2q|·¦u¾&˘þæZþÖ kWþqLœïª%/oWá³5ž¿¯¾h`Ù“;¹ØÃñ™¾¯_¾óÖµñùÎ}ÛÆ¾$ö@R.—ƶýœ%éàØ¶ý>çùµ›Ö®ùô~m> @€ @€ @€Ó.à? ˜vr @€ @€ @€Øí[²âÖr´Ü™ìÈ#O­÷O{ ìPºByZ»!MZç<"÷ƒ¤š¿ö`<¯¬Vý¯ñ¬+v|GÔ¿*Âã9¬iíîSš7Fûnä¿‘䵯ÜvSö+¯º½éA°cxø²kbú¨ÿ¹ \>刴ç‚,M. Žgu<ð™&ïî¼9‚•—ÀSžéS³dz‡ñ]8üŽyí Ö¯zãž›mseèÚÞÅËÿ"v4}÷ž¶&?³®<ûÀ“ÊåG_U©loR3®æÃí™>gÓ¦M7õ nŒçÙÅcûö|Ž{ôÔÞäˆó£è{ÚÆþœ_ê@IDAT.Ÿ÷=7j›¾ò4ŠÎ) Š7V @€ @€ 0F@Àq ˆ @€ @€ @€Àô¤IÁÅ´eÀ1›Ó%àXp9vÕÒgµÎ7Ö㳇+C‘9´_½K–—ã "Ü8þWw¤iþ8ò›áñÍZ­ö­]IöW†~>¾ÑòY㫟ºêV*×Åè¨ÿ©ïvzüI§?.-ÕƒŽÉ3Ó_ÑšZ¯øÐê­%Ék6®[õÇ®zãúUï‰ þÅÄÊØ¾ý>§éCNÉzÞmº_û?NÏôfDµjíÒ¬Tjp¬—fé²øÑ4à8;×.mùßCŒVó•ÍÖ  @€ @€˜>–ÿýé[†™ @€ @€ @€ÃM ¶Lº1B4§·:ïYµÖÈVÇ.}Û“;o˜•Ìiyº&=µ€«ïœÙ²ð î|^yðþ±S῎g‰jüZšÔ®È“üÃõÿ•T*Õñߨ6O³ãÆð×Ý×öªXsýÏß.X°`N×ìcŸ–f¥<æÏŠ Ïï4:Ÿ¢¶z@¨”u½&ê~¿¨VÿÒä·…Gåéƒ k:X÷÷iî *vl­ïÜ÷G× ½§É"óttç‹ó®Y߈ãMjv7giú'}‹—¯_ó…Vuíô.ÏôV+«?áÒŸ†{‹{1ë?þK¶lÙ²£ÑX€\Ú¨}O[\ý/ŒTVÿpÏg?  @€ @€ @€'¸©ÍL€ @€ @€Îi’ÆŽ­_µ®ü­+ô~²R¹%ÉóÖÁ½4-wÊ)8”µb—ÊwÅž]Ç·uyò¹Ñêè#‡×­œ»aݪ7¯[ýµN„wÏç'­!Büwp›7o¾#BB ƒ?^7ô¨|çèi±ã+"ÑUßñ,6­kÿ!£¥}}Kéû§ý³ÞʬVû^ÑŒ±óè„©Eã¶èt‹¾Ó»œ¾-¾Í»ÏsÆ˯Žôß¶qÒY’–>ð¤r¹§Ú–%‡Ë3½%Bý•ÖV¶ª‰çÒÑs޾Ï3Õ,\8pj<ïרüƒ÷¼÷Ž @€ @€8ü—«òäÍM€ @€ @€@4ÿeÑìY2íÁž¢%Œýyž¦×-,­v=´¨æ`í¯ïÞa¤…í¬/¯%¯ß°nåÓ®¨¬ùV;õã­‰`naÀ1v;è~7<¼ægáòÞáµ+Ÿ½ëŽv¬½!ÖyS›çßw—þ°ÍZeã¸-¿óKwï$Øü¨<9oîܹÝÍ :×s^¹<+‚açunăw¤òøä9ñ÷ùÍêë»Ön¬¬þT³~í @€ @€ @€Àô ü¦{údF @€ @€ @àðضmÛ®U}µÕÙÆNŠiW>ki«šNôåÝ­w{Ú=G-ùB'暊1nùíO¿‘äùmEcwÍ™õ’¢šNõg³Ž‰pOº,Âcþk’eë.•®>áäÓï¨ï†Ù?0ø¥¾%ƒkïÚm«ù¬ÝÝÇŸÞ¼wŸžtô“û|š’·§¦³ŸÏ.<4ÝE5{úûÿ2†›Ãâ‡sŽ9éöØ5ô[i)­D íqû_.èï_qÞžú©ø¹k4ùlá¸i:«¯oÙÉ…u Æ-P»óÖMqÏÜÑêÀÈ>ô¢þåOhU3Ù¾ÞòàïÆývÎdÇ™©Ç_U©lëô’<^Eç%¯ì[¼¢0 Ýlœ™üLovÎÚ·V*·v¥Qßž¶¸gëǽ¯,MîýÐèMž¯ŠfAíF6Ú @€ @€ @€Àp<@ð¦%@€ @€ @€’$ß½sY‘Dº¼¨b²ýY–†(ki»¬œ¯Øq4V¶w«f«ŒÔ«vïˆØ¬ Cíóç/=&Íï^a”Ø\+‰ \ú„xwÖ•W®º½Õ´Õ®¼­€ãŽÛò¶§#}YvA›ãÌj³.‰óæÆîdÏ ‹ÇÏRÃãºò Û;Ô8²aè[ñmlyê¡®_–vÜÔ¡) ³À¦M›ê®—ïÓÔðm©+û“†jL³tJÇïÐ2è0Ãë‡>°+ZÄîÇ\š (ÀÝlœ™üLovÎÍÚ«iíÒf}õöøûäì½!ðr¹È–;Þæi²²Õxú @€ @€ @€épœ~s3 @€ @€ @€w ŒVó•õàTK4yX}g±–5“èìí<1BfÏm9DžoßqKõ ÝÁ±¾ö¼–¼¿å9ìîLïš}ìË‹ë&WÑslé"vr|Ñ(y­VêJ“Ú EãÔûgÍêšÒß{-83îÔÁvÖ‰›¶ŽqóomcÌ) 8Î/—ï×ëÈVëˆÑOë;صªÑ7ê{ŠŽŽ`Ý@_yYËðVÑÍúû/ëñû›õk¿Gàæêö×Ä÷ö÷´4~W-u|ú÷{‹[gê3½øÌ÷¯¸bíêÏÇÅ­ì]µEõ£z“ž'Åî·ñƒTkÕ­û|lü6MŸxA¹|JãÎÉ·ö$=…ât MžºéÃëV-ÂWÿÙ´àîŽ4ËþõYåò±Euãé€ëIIZzïxŽ9œk?Y©ÜœÖªئÁ.X6¯ÍÚýÊfê3}¿“lïC<~ò¶,MÓÝÇ,M.jUáÇÖã´:X @€ @€ 0eŽSFk` @€ @€ @ ØÅïŸ ëÒäg<øá¯-¬gAÿ%gGh­0¨²kW~(„jqúo/&H,e¥Í+—*®wEzô é¿´ÚAëžÓ¯ùÙ=Ÿ¿«¥ùu{öoÍ“ìqû·tîSïÀŠÿ»±=­ÝcGÐÙíÖ^QYóßl+rÈfg=Œ1§äw{y–<½h½qþõ0²×T TÓ7Ÿ¦÷?&ëÙØ©ïï…^r|O6g$v=©pn{6¬_½)É“ÊÞ†&oâ{“fIéßï~7©jÚ<#ŸéM϶EÇhºk(ºë _±»îܾ¾¥ÈÓdaÂzcìÄ|sõŽÂP}Óãu @€ @€ @€S&0%¿²Õ˜ @€ @€˜q+«?ɃožXš¼®·¼ì¹…umìuwÕÃ;Cæ_Ù°ê«m{@Ë~YÛ~i„å w<ŒÌ͹'f=«çÍ›×v¯ëXþI’®h£¶V­þUuÉÎÛj×F]Ó`ËÞ1Ò¤w*vùŒ{î÷"°ø¦½ó´ñ&¶;¦²=%yRKß³çC³ŸqÍžÓ7°|\ëh6Ö¾í½åÁ'E õeû¶5z_Í“‘FíÚ:'0\Y9ßßE#ƽðŒ²9ŸX´hÑqEµ­ú.¼øäÙGwm÷[Õék,°kûÎWFÈñ†Æ½÷´¦Iò £OÈÞzOKûïfâ3½ý³¿§òеkÏÕOÝÓ²ÿ»øN¤Iwéµñãœý{öù”&›ê»oîÓâ- @€ @€ p8$Â2 @€ @€ @€Àá,W“¿):ÿˆ/‘eÙ½K/*ª-ê¯û"à·>Âx.ªMjù k’‚«*•íy-ÿÛ¶–“¦xòéŸZ°àâû´U_PÔ·xÅëbƒÁW”íîÎó|Mì\ø­vj?ò‘ËnŒ·¾TT÷ÇcÎ<ûa 5;í|}V*}¬~ï5©iÜœ&G7îhÜA©÷GÏ{ïiM“ì/z—,ï»§erïê;žeY:\|~ùW7­[yåäfst;£wìü£|ßRT×ì‰ÙÇ|u¢¡ïþÅËú»ætGp;}dÑ\ú ŒŒ\~}-ÏÿWãÞý[ã™÷Š¾ÅƒÏØ¿µøÓL|¦ŸuãŠZ-¹´qÏ]­n|E«þø»|eË~ @€ @€ @€Àp<`ô&&@€ @€ @€ö W†>¡žìùÜôgšÎŠÝ°>Á³¿<¯\žÕ´®EGÿâåœpÊßNÒt~‹²»»òË7¬_õÑ⺃§bãúUïÌódS[+JÓ'wÏéþR}—¶êÕw‘ë[²â­i–´¬ŒµÝšì}}ƒ¡š6ÕÒvwÌ^ÝWìm:P›½½ƒ'ö ¬øhìløÆ8d¿OËmsªÝe›7_þ›@­nç˜9®Š Ô.\8®娱{—,û´»ks’&§Œíû9ÈoÛæóÔŒŒ¬ýE|Gþ¢Ñ#Ðõààné_2¸á¢eOcZÞ«ìîê_<ø¬¨ÿx’•>÷÷Ú™GMsë‡VF»0ü×*^É¿ïÞ9¸ùp {fâ3½á‰4Þô«k6Æwã¦eÍïÿ<ÿyü]»E{ @€ @€ @€£@×Á¸(k"@€ @€ @€?·ßøÊî#OxrÚêì#%RÿýÆ›’õ¬xèâå¶!¿ó£I¥RmuL½ï¢ÓºòYÿÁƾ˜£ð•'ùo·W·ÿqaáAXPÛqóï—Ž8æwâ\X´¼ð|pZ*]Ù¡§ÑÑüu#VÅ®nůg•ËÇ“öüIÿêɵè«å/¾ìšâî©­&—Ï*íÞåsö=­MÞeÉú\~`tûŽÿS‹5©jØ»#ž;Ú½8‚H+â9±aQižÜ7)—KíÜ—{†Û^Ûþ—s²9…åI{ÚýŒµÍ‰ëú7]sŽ{eïÀŠ¿¿®zÇ¿ÔwykTÛ¨mQù’Ge¥®¿NòdQÌUüUÈ“ïD¹½Àl£ µ[`xÝÐ{#Äýœ¸< Û:8ži]I©¯ÉŠëâ¹õḨ?ªåÉ/ãêþ6îç“Ò4¿_’§g'i¾ >Oø¾Þ³–<˜™×^jºó¥¥dÖ‡í‘{½‰çñ YÏÛ¢«õNƒ ŽiÏô§XØ´uëÖ;ûV¬Â—)¨%ɪhŠ^ @€ @€ @€ÀÁ( àx0^k"@€ @€ @€‡¡ÀæÍ›ïXØ?¸¤»;ýrœ~a-‚uçDÐk¤/éùM:08RËóO$µôgÕ¤v]²£zk:§tNVKÏ‹º‡EÐç¼ø<1~Îi‹6Ï«i^{ñ–Jå×mÕdE›6mºiaÿòÅÝ]ɨ­.Ãé9aÿœV]ÁªçÕ|kµTûEš—®OwÖnMgÕΨå]geY~V„ãÎŽ0Ï‚Û7žSkôö»wëÏQI²¹2tmßÀòŠÝ w¶‹ó¨ÿþë%Ý=³–÷.|GDZFv$ÛüÑJ庱³^xá%Ç—fwVêJçF(ìEq>O[3ösì´á®ôgQ{ÚØ¾½ŸÓ´´ é¹ÿæ$¹vo[Á›ú½Ö¿xÙË“´»™¿âkþ‡S³9Á¶ÏÅ¢~» þO-I\ÝõãíÙ®[»j³PêJNºÓc½§çIöðx×nñ¦ð•ç·åIõQ'ÐVˆÕÑ‚|ûÍ¿^Üsì}7ï½^í »qÆ}ñâzi¶ßõ»?ïרpÄHnŒ1ŠvA- ”7|†6nZ»ö'½‹—ÿe–¥ïhã_¶h`é†Mk×|ªÚ½%3홾÷ÄÆù¦ZÍ/-•Òq³Ñ]+Ç9•r @€ @€ @`§ÛT @€ @€ @€@k‘ Cßì+^’–ÒË¢²0äX-Â8÷‰/ÌÒô…I)‚=õÿ3'þÔ_Ù]?vÿßâlÏî²z€-"l/Ú¸~õ!½k]}'ÆÌ-ɓҚ·µì ð<#~üa\ƒ?Œ]áî FQ‡ Ù=!©6-wƒÞý"85rãõ×¾vß¶ñ¼¿¡zç[N(Í©ï¬xj[Ç¥iO¬úÏãþ¼'N¿É`ìr˜þ8Öq}œãÉ‘Ù;=ŸGµ5ÖÝEqìo’ZíI–= Æz_«cKijÇp¬µaýê ýƒ+cì­ÆÞ¯/Bmñ¹|×%IëW,Iº»“ž¤;¹ëÃ=Õã¼l»ªIÚ¿iÝš¯Ü3‚wÓ%°eË– ,XÔ=çø-Ú}ÚôÌ›_~Ãu×¾ðÄSÎh½#hžî˜žõ:³l\¿êݱsìÅñ¬­âZ¦YÞõo .|äÈÈÈ­M tÌ´gzƒS,lÚTúÏt'þ¢zXañÞ‚üÿ³w?@–UõÀÏ}Ý3ã@Äq!k‚À”$ ã(&Y2S“áÏ8Hdc*¤*µ•¤ÜMÖh¢‰UÉšÚT*YÍ`z˜žvºp%ŽÖš‰qaCù“0ÿä0tÏLwß³ç¡â0¼~ïu÷{}ï»÷ó¬®~}ï¹çœßç÷úùÊö;÷+;vÜp÷ó?zB€ @€ @€”Nàð?ë—ns6D€ @€ @€ÔO`|ld"Ÿï|.LV@ù)üvÕÄö‘2üG Ì¥æìÛÒí÷öYM ŒþÍøìÔÚÝ»wÏ,t»ÇÆö‡8ûî4×¼BAϯ—ÍPLʽã¹pÌ|Ã1~éÐl8c|lëÿJ¦w>?ïOÒôÎãTÛÃ?üà•©Æmmõùd3â;›‡ËwŽn¾µÏK™¾@ó®¶3SO]Þ ¿ØfXON¥×ܵ;¶l|æ™g:Þ1 QÀñÅêy6® 1zñ©I!È“‡W®úãíî§*½§wWñ‹GåY¸æÅGç>’çñÚ¹Ï:C€ @€ @€”A@À± ]° @€ @€ @àc#{ffó³SX侜èç1|'ù%;F7ÿÏ~.³Ôsn½ãÐlü©`ºc©×nåb?:>ºùÊ06Ö18ÕiÍZ²צqÓÆöð|ÊÇ„ßϧÞù™±‘o5çÝÿdãŸÓ·¼ýqAÇft<ŸÜ˜ä¶´Ÿ¿OgcÜŸ^+Wîܾù†>­`Úy4ïò7¾mäéõð+!½GÍãÒ.‡Ægcžÿfúý¥tAþŠW¼¢óß³ àØBwÇŽÍ_Kw†ýƒ§Zúåu¿¸é]­Nt:V¥÷ôNµ¶:pvrkú}è6, z¦ÐÀx«#@€ @€ @€^(ÐùT/ï' @€ @€ @€À’Ü4¶õ¾üP8+…­þra†í-­ñåÙìà£[*„hó¦'ŸHwrÌ?–B£û„4ï‹â?Å<üôøö‘ß÷¥m.ر}äóéŸoO=ÛÛfXON¥;_þ9kbtó‡hÞzë–gSØìËíIwŠü± /ÞøÖvcæ<— ã£#—§dåûÓ:OÌ9®Ç'’é¶é©C¯Kw0½ºÇS›nq1½>5•O¾>è¶/nª\î ySº+é©ãÛ·|"M/÷toÒÙã†0¢õ³3ÇÖ4aï}w}<ý÷Õ¿ÌqúùÃéý!‹ð×kÖ¬yéóçñ¤Jïéó(û¹¡Ÿ{8„lW7×¥^ܸsçΧºk  @€ @€ Pœ€€cqöV&@€ @€ @€:LLŒ<žî,ö™<c ãLt>ÿÓ1>˜çù¯§;æ½}ç¶m{ç?Áà\qóÍ7OŽoÛò¡É|ê5yŒ’v~ /»á±tkÃßzüáÏlÞ‰³k4çzzæô”ÉjÞÉë¹`VO׉ñÖl<{|ÛæóÆG¯ûj«¹S×´:~ø±¡8ô[‡ÿ<Ïç±4<4yèu©ÂkS™ÝÞ±lžË$Å¿žî´ùÎô»vÉM7mÛ7ï \°$Í`׎m#¦§ãéÍ;£6û6ï…Ÿ»Cg¼>äùLw†¼ð汑ŸcÅŠ™•‡ÿ<Çó©9Ž×þðí·ß>=;¯Lé-¢ý# ÙIËŽZÕ —.èQ¥÷ôyä±ãûosÎÔ„ÍóžÛ @€ @€ @€À’ tü8—|G$@€ @€ @€!îæøthíÚ —ÂÐû³Î YvÂúþ1ƒî YüÄ|ó†Ý»w÷-8Öõ†–pà®±±ÇÒr¿qÁúM²¬‘ýr#‹?ŸîÝöÉshÛH™Éø¹”&ùëþõk76C>‹˜««Kwíºîé4ð½kÖoüØp£ñké5ñ¾TÃuuq«A)ìš’’·Îfù57Žný»VC?öd>9vìÐÊ?Kw;úðã‡?ϲ°fݺKOÙ±ãú{?>Ÿç7ß|Ã·Óø+.ºè½Œ+²Ë1l Y8c>s´›Ã¤€Õùl~ã“}óKuû=he2(ÇnÚ1òOi¯Í¯ß½hÃ{ßÐÙ…é5ñšô{üªôý‡ŸûÂñ1 O¥ß‹oÅíK)ÖK=¿õáüÀ®=ccsGÇÎÇ,>:(VEìóÆ[¾²îâ÷þiÿÜyýì?½gÃåŸÞ9ºùÖÎc[¨Ê{zëêZMÿ=ó™“N9íÛé=ì¸Ö#ž»%é¾ùÔ‚]çš×q @€ @€ @ ÷éoZ @€ @€ @€ÁX³nÓ›– ‡_H;G õœböŠôýØôsãÕÄðux8…|ö¤Ûòìànܶí›/SóÎ?ÿÒ—¯8jøÜqü¹dôÆ,†ãSpïøäù²#iÒݦpèiÌ?Ç,þC–‡ȧÃW&&¶zZ³fÍK+W“€g‡F–‚°ñô´Çc n¦ æLªë±ôG²ÇR­{Sìóé¡·~/D{d¹¥ü9…:|x¨qfêÅ©Ž7§¯7¦:›¯ý—¼hÃ1N¥×þ#YIµ>œ\îÈC¼qbtë/ë@íž Lf»ÚA¤ß¡¿¹ªÝ犨Â{z'Áu6Ý›Þ÷~|®qéýñÒ]i?8×yÇ  @€ @€ @€ò8–§vB€ @€ @€,^ ‘îtw\Xž­ššþÎÓO?ô¸;Ó-õÌ3Ï\vÒIo8nf(3<ž~hèàSíîþ¶ð•úwe3ø˜uÔªF\¾r2Ÿúö-ccO¦ÕR¶±zf¿N<ñ´cf†ã1YˆCÓûã£ß»ÓeõŠUQ_.¼xãÛ†ÃЗÚMóð¡ñí›?ÖnŒså¨Â{zS¶y7ç,ÞÓVyúÐvì¸áî¶cœ$@€ @€ @€J! àXŠ6Ø @€ @€ @€ŠX÷‹×…ÆÐ§Ûí"†üÊñm[þ¦ÝçôS`í†MžeٯεFº{ãß§»7ž=×yÇ  @€ @€ @€r 4ʵ»!@€ @€ @€ @€"bzm§u³<{ Óç ôKà´õë—g!»¸Ýü1æW·;ï @€ @€ P.ármÇn @€ @€ @€ P/tGº«Cî ³áž™<ÜóXcêÞ=ccSK®Ðˆ?ÒFÚ=öÇ©;ÚwŽ@?^Ÿ­ø…ôýws­‘îÞ8yà™|t®óŽ @€ @€ @€åp,_Oìˆ @€ @€ @€Z d¥Xáª0²ôõª¸2®Ýpù¿¥c÷ÄïÉC¶sçèæ[ûIrÁúM'¤;ã]ÐnÛ{ËØØíÆ8G ¯Yãýæß¾k×uOwã4 @€ @€ P"Ç5ÃV @€ @€ @€¨Ÿ@â“éΉ«¾_y–éù«›_)txn#†ÿž÷5à¸< ¿šÖX–¾Ú=nowÒ9ýX»ö²MóŸ×nÙ™ü“íÎ;G€ @€ @€”O Q¾-Ù @€ @€ @€j$eîŠß¼fÍÅ?Ò/‘³×¯_áæO©ËÛ:qž@¿ⲡ«B–¥{œ¶~Äï¼qÇ–¯´>ë( @€ @€ PVDzvƾ @€ @€ @€¨‰@¼¿]¡Í;:½dÙùíÆ,æÜ«†VþfºS䱿˜ÎÅíÆ8M /kÖ\òÊô½ªÝä)àøçíÎ;G€ @€ @€”S@À±œ}±+ @€ @€ @€j"çá³JmdÙk†¼:›ïùµ6} Ç~¿ãu1~vbbäñŽã Ðcµk7¾jÙQËn˲ðÒ¹¦Ž!>þp<°u®óŽ @€ @€ @€åp,ooìŒ @€ @€ @€ˆSÍ€cÞ¶Ô,;qxåò«W¯~IÛqó8yц—¥›CþE§KÒñffóð‡Æ9O ×k×^zR¶¬ñ¥²ÓÚÍcöÉ=ccSíÆ8G€ @€ @€”S`¨œÛ²+ @€ @€ @€ÔCྻîš<õ´3Þ²pR»ŠÓì~tåÑ/;ëu§žq×ÝwÝùP»±íη~ýñozÓ™ÿ5 ÿžŽÿfœ…ø{Û·\ßnNçÌGàÂõ—úšÓ^š¯ýV×wÞy+Þôæ·® ÃCÛÓkôß·óƒcñÙéÉé‹ï¹ç_ZÎõƒqž @€ @€ @€eÈʸ){"@€ @€ @€ @€@Ön¸ì¬†þ.…¹ºún ás!ß>ò…nÖ­»ô”8<üi‰ËÓ5]Ý 2†¸g|vêmall¶ÛuŒ#ÐI`íśƳ]bx"„xwz=#‹áÑÐÇÇ~8ݱñ­é7aU§yšçcÈ?>¾mËow3Ö @€ @€ @ |]ýq¬|Û¶# @€ @€ @€TK`í†M•‡WΧªã=!Ëî 1>ÐüŠYx0äáј5^Ùù«Ó¹W§? §ïáÄ;=ÍÝèzþ÷φ쌣›ïïú t!pцMÿ£‘e¿ÞÅжCR0òÞ‡f'Oß366Õv “ @€ @€ @€@i†K»3#@€ @€ @€ @€@â¡ð_Âòð®t纓»-;"_›Æ¾6S~1ݯyáÐ÷¯î>Ëøý+žÿãlò_Ù9ºU¸ñyOz%^™ö`®ô¹B¸±’¦ @€ @€ @€ ,â/ZîÚÒ @€ @€ @€¨˜ÀÄÄÈã‡òxNº+ÝÝE––Öß—çñ܉ѭ×¹kWX æ,¾ºüOÇ·_÷åÅÏc @€ @€ @ HÇ"õ­M€ @€ @€ @€Ã>36ò­©ÙÉŸ‰1ÜqØá%{c¼%­ÆÄØ–/.Ù¢ª@Ìâ¢îà˜~?þ~ßìß©œ‚  @€ @€ @€@+ØT% @€ @€ @€ 0¸»ÆÆûF>yvÌó†÷/I%1ÎÆg|t伿úK²¦Ej+03•/<àã®éÉ'ÎÝ366U[@… @€ @€ @€ dªE) @€ @€ @€¨”Àë7°¼ÿ0„Ɔ, +z]\ a_ˆak ùµ£[¾ÞëùÍG`lí†Ë§æÿšŽ[÷Þûµ_ºýöÛ§ç˜×a @€ @€ @`À¬a¶K€ @€ @€ @€@ýÎ?ÿÒ—/?z(…—§PØY‹ˆ1L¦ë'òFvŽnþ|zžžzXZµ_~Oú?,œÒÕª1ì1ÿãÛ·ìJãS.׃ @€ @€¨Š€€cU:© @€ @€ @€j!ðîw_rÜÐQËßÒˆáÌЈoIôý±ùZBö²â1Y– § ØÓ1dûÒ÷}éü¾³ô=î‹yxpæàSŸ¿é¦›ž©–"K+ðž÷¼gU¶â¥ïldŸM¡ÛŸ Y8!½VOϤï{Ók÷_SòöþÙ™¸õ¦[þoi ±1 @€ @€ @€ @€ @€ @€ø®Àië×/gA` Ö¯È}Û4 @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€(§ÀÛ.ýË÷­~ß5'—swvE€ Ð/F¿&6/ @€ @€ @€ @€ @€èF †xÒô¡C»…»Ñ2† PÇêôR% @€ @€ @€ @€ @€\(ä8¸Í³s @€ÀÂææ* @€ @€ @€ @€ @€èµ€c¯EÍG€(µ€€c©Ûcs @€ @€ @€ @€ @€¨™€cÍ®\ @ ÎŽuî¾Ú  @€ @€ @€ @€ @€ PF!Ç2vÅž @€=pì9©  @€ @€ @€ @€ @€ @`ÑBŽ‹&4 @ ìŽeïý @€ @€ @€ @€ @€ @ ®BŽuí¼º  @€š8Ö¤ÑÊ$@€ @€ @€ @€ @€ @€À@ 9dÛlš Ѐ€c7JÆ @€ @€ @€ @€ @€ @€@qBŽÅÙ[™ ÐGÇ>âšš @€ @€ @€ @€ @€z$ äØ#HÓ @€Ê# àXž^Ø  @€ @€ @€ @€ @€´rl§ã @`à®e6L€ @€ @€ @€ @€ @€ 9Ö¸ùJ'@€ª& àXµŽª‡ @€ @€ @€ @€ @€Ur¬z‡ÕG€ÔD@À±&V& @€ @€ @€ @€ @€*% äX©v*†¨§€€c=û®j @€ @€ @€ @€ @€ ¾€ãà÷P @€@­kÝ~Å @€ @€ @€ @€ @€ @`À„¼¶O€ÔY@À±ÎÝW; @€ @€ @€ @€ @€ª äX….ª¨¡€€c ›®d @€ @€ @€ @€ @€TN@ȱr-U P}Çê÷X… @€ @€ @€ @€ @€¨‡€c=ú¬J @ 2Ž•i¥B @€ @€ @€ @€ @€ @ 9z @€F@Àq`Ze£ @€ @€ @€ @€ @€ Е€cWL @€Šp,ºÖ'@€ @€ @€ @€ @€ @€Þ 9öÞÔŒ @€ 8öÔt @€ @€ @€ @€ @€ P!Ç’4Â6 @€­[»8J€ @€ @€ @€ @€ @€Ur¬BÕ@€TT «h]Ê"@€•8ç’O~¡’…)Š @€ @€ @€ @€j-C894¿ú÷Ȳ–-_¾z÷µWìíß"f&@€˜€€ã|´Œ%@€ üô%ŸLÿŽ @€ @€ @€ @€ ° !DZ¹ˆ Ð/F¿&6/ @€ @€ @€ @€ @€(•@Œ'M:´{õû®9¹Tû² @€@MkÚxe @€ @€ @€ @€ @€ @ –B޵l»¢  @€r 8–³/vE€ @€ @€ @€ @€ @€ýrì—¬y  @€ópœ—Á @€ @€ @€ @€ @€ P !ÇJ´Q @€À` 8vÿìž @€ @€ @€ @€ @€* ä¸P9× @€z" àØF“ @€ @€ @€ @€ @€ @€À@ 9dÛlš¨†€€c5ú¨  @€ @€ @€ @€ @€X¨€ãBå\G€X”€€ã¢ø\L€ @€ @€ @€ @€ @€•r¬DA€ –€€ã`õËn  @€ @€ @€ @€ @€ @ _BŽý’5/ @ ¥€€cK  @€ @€ @€ @€ @€ @ –B޵l»¢  @€b‹q·* @€ @€ @€ @€ @€”U@ȱ¬±/ @ bŽk¨r @€ @€ @€ @€ @€ @ BŽ=@4 @ ½€€c{g  @€ @€ @€ @€ @€ @ ®BŽuí¼º  @€%p\"hË @€ @€ @€ @€ @€ @€À 9`Ól™ÇAé”} @€ @€ @€ @€ @€ @€@1BŽÅ¸[•¨¼€€cå[¬@ @€ @€ @€ @€ @€X´€ã¢ M@€8R@ÀñH? @€ @€ @€ @€ @€ @€VBŽ­T#@€,X@ÀqÁt.$@€ @€ @€ @€ @€ @€Ú 9Ö®å &@€ú' àØ?[3 @€ @€ @€ @€ @€ @€@„«ØU5 @€8€nI @€ @€ @€ @€ @€p!Ço í @€ep,Cì @€ @€ @€ @€ @€O@ÈqðzfÇ @€@©KÕ›!@€ @€ @€ @€ @€ @€r¨vÙ, P.ÇrõÃn @€ @€ @€ @€ @€ @`Є­cöK€”D@À±$°  @€ @€ @€ @€ @€`à l °äŽKNnA @€ @€ @€ @€ @€¨ ÀÞaèŠ Ö¥$ @€@ßûFkb @€ @€ @€ @€ @€¨‰ÀÞÙð꿽þÊjR¯2  @€=pì £I @€ @€ @€ @€ @€ @ ¦Â5m¼²  @€Å 8.ÞÐ  @€ @€ @€ @€ @€ POáÆzö]Õ @€@{i @€ @€ @€ @€ @€¨•€pc­Ú­X @ ŽýP5' @€ @€ @€ @€ @€TY@¸±ÊÝU °dŽKFm! @€ @€ @€ @€ @€¨€€pcš¨ @ Žåèƒ] @€ @€ @€ @€ @€ @€@ù„Ëß#;$@€H@Àq€še« @€ @€ @€ @€ @€ P˜€pcaô&@€ª* àXÕΪ‹ @€ @€ @€ @€ @€z% ÜØ+Ió @€p< ÃS @€ @€ @€ @€ @€ p„€pã ~$@€ôJ@À±W’æ!@€ @€ @€ @€ @€ @€ª 7V­£ê!@€J% àXªvØ  @€ @€ @€ @€ @€”D@¸±$°  @ ºŽÕí­Ê @€ @€ @€ @€ @€ @`a ss @`^Žóâ2˜ @€„î@IDAT @€ @€ @€ @€*. ÜXñ+(€€cyza' @€ @€ @€ @€ @€ P¬€pc±þV'@€j& àX³†+— @€ @€ @€ @€ @€Z 7¶dq Ð?ÇþÙš™ @€ @€ @€ @€ @€C@¸q0úd— @€@Å+ÖPå @€ @€ @€ @€ @€ @€À¼„çÅe0 @ w޽³4 @€ @€ @€ @€ @€ –€pã`õËn  @€Š 8V¬¡Ê!@€ @€ @€ @€ @€ @€®„»b2ˆ Ð?ÇþÙš™ @€ @€ @€ @€ @€Ê) ÜXξØ P3Çš5\¹ @€ @€ @€ @€ @€¨¹€pcÍ_Ê'@€Ê# àXž^Ø  @€ @€ @€ @€ @€ôW@¸±¿¾f'@€ÌK`x^£ &@€ Ȳ싅nÀâ @€ @€ @€ @€ @ 1„“CŒ'õaêçn<\Ãs @€@ ²ìÁ @€ @€ @€ @€ @€ @ Æç\ú©Ç?ÒGáÆ>âšš °PÆB/t @€ @€ @€ @€ @€áÆh’- @€õp¬gßUM€ @€ @€ @€ @€ @€:7Ö¡Ëj$@€V@Àq`[gã @€ @€ @€ @€ @€ ÐF@¸± ŽS @€28–¡ ö@€ @€ @€ @€ @€ @€½n쥦¹ @€}pì¬i  @€ @€ @€ @€ @€ @ áÆBØ-J€˜¿€€ãüÍ\A€ @€ @€ @€ @€ @€ån,g_ìŠ ÐR@À±%‹ƒ @€ @€ @€ @€ @€ 0`ÂÖ0Û%@€8z  @€ @€ @€ @€ @€ @€À  7zퟨ¥€€c-Û®h @€ @€ @€ @€ @€TF@¸±2­T P7Ǻu\½ @€ @€ @€ @€ @€¨Ž€pcuz© @ †Ž5lº’  @€ @€ @€ @€ @€ PáÆ 4Q  @€@½ëÝÕ @€ @€ @€ @€ @€ @`„±köL€8B@Àñ? @€ @€ @€ @€ @€ @€@©„KÝ›#@€t/ àØ½•‘ @€ @€ @€ @€ @€ P¬€pc±þV'@€ôT@À±§œ&#@€ @€ @€ @€ @€ @€> 7ö Ö´ @€¢‹’·. @€ @€ @€ @€ @€t+ ÜØ­”q @€p fÙ* @€ @€ @€ @€ @€j( ÜXæ+™¨‡Àp=ÊT% @€ @€ @€ @€ @€ œ@–=°" ­þÛë¯|`àönà @€ÜÁ±#‘ @€ @€ @€ @€ @€ °äß 7¾]¸qÉå-H€X2Ç%£¶ @€ @€ @€ @€ @€t%ÂË–/wçÆ®° "@€ ®€€ãàöÎÎ  @€ @€ @€ @€ @€ P=ï…w_{ÅÞê§" @€Ã×ðœ @€ @€ @€ @€ @€Šn,ÎÞÊ @€ @·$ @€ @€ @€ @€ @€! Üxˆ  @€Õ®~‰*$@€ @€ @€ @€ @€ @€2 Ä,ì]¶lùêÝ×^±·Ìû´7 @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ P¬5*‘ @€ @€ @€ @€ @€”BàœK?õáãGJ±™En"˲üŸë?ð{‹œÆå @€@5®]é @€ @€ @€ @€ @€ @€ @€@AŽÁ[– @€ @€ @€ @€ @€ @€ÔY@À±ÎÝW; @€ @€ @€ @€ @€ @€(H@À± xË @€ @€ @€ @€ @€ @€ @€: 8Ö¹ûj'@€ @€ @€ @€ @€ @€ @€ 8oY @€ @€ @€ @€ @€ @€ PgÇ:w_í @€ @€ @€ @€ @€ @€ @  Ç‚à-K€ @€ @€ @€ @€ @€ @€ê, àXçî« @€ @€ @€ @€ @€ @€$ àX¼e  @€ @€ @€ @€ @€ @€ @€@ëÜ}µ @€ @€ @€ @€ @€ @€ @€‚ ‚·, @€ @€ @€ @€ @€ @€¨³€€c»¯v @€ @€ @€ @€ @€ @€ P€€cAð–%@€ @€ @€ @€ @€ @€ @€up¬s÷ÕN€ @€ @€ @€ @€ @€ @€ p,Þ² @€ @€ @€ @€ @€ @€ @ ÎŽuî¾Ú  @€ @€ @€ @€ @€ @€ @€@AŽÁ[– @€ @€ @€ @€ @€ @€ÔY@À±ÎÝW; @€ @€ @€ @€ @€ @€(H@À± xË @€ @€ @€ @€ @€ @€ @€: 8Ö¹ûj'@€ @€ @€ @€ @€ @€ @€ 8oY @€ @€ @€ @€ @€ @€ PgÇ:w_í @€ @€ @€ @€ @€ @€ @  Ç‚à-K€ @€ @€ @€ @€ @€ @€ê, àXçî« @€ @€ @€ @€ @€ @€$ àX¼e  @€ @€ @€ @€ @€ @€ @€@ëÜ}µ @€ @€ @€ @€ @€ @€ @€‚ ‚·, @€ @€ @€ @€ @€ @€¨³€€c»¯v @€ @€ @€ @€ @€ @€ P€€cAð–%@€ @€ @€ @€ @€ @€ @€up¬s÷ÕN€ @€ @€ @€ @€ @€ @€ p,Þ² @€ @€ @€ @€ @€ @€ @ ÎŽuî¾Ú  @€ @€ @€ @€ @€ @€ @€@AŽÁ[– @€ @€ @€ @€ @€ @€ÔY@À±ÎÝW; @€ @€ @€ @€ @€ @€(H@À± xË @€ @€ @€ @€ @€ @€ @€: 8Ö¹ûj'@€ @€ @€ @€ @€ @€ @€ 8oY @€ @€ @€ @€ @€ @€ Pgá:¯v @ „Ïí¹'r @€ @€ @€ @€ @€j üìٯͪU‘j @€ª(àŽU쪚 @€ @€ @€ @€ @€ @€ @€@ÉKÞ Û#@€ @€ @€ @€ @€ @€ @€Up¬bWÕD€ @€ @€ @€ @€ @€ @€J. àXòÙ @€ @€ @€ @€ @€ @€¨¢€€c»ª& @€ @€ @€ @€ @€ @€ PrÇ’7Èö @€ @€ @€ @€ @€ @€ @€@«ØU5 @€ @€ @€ @€ @€ @€ @€’ 8–¼A¶G€ @€ @€ @€ @€ @€ @€ª( àXÅ®ª‰ @€ @€ @€ @€ @€ @€”\@À±ä ²= @€ @€ @€ @€ @€ @€ PEÇ*vUM @€ @€ @€ @€ @€ @€ @ äŽ%oí @€ @€ @€ @€ @€ @€ @€* 8V±«j"@€ @€ @€ @€ @€ @€ @€%p,yƒl @€ @€ @€ @€ @€ @€TQ@À±Š]U @€ @€ @€ @€ @€ @€(¹€€cÉd{ @€ @€ @€ @€ @€ @€ @ ŠŽU쪚 @€ @€ @€ @€ @€ @€ @€@ÉKÞ Û#@€ @€ @€ @€ @€ @€ @€Up¬bWÕD€ @€ @€ @€ @€ @€ @€J.0\òýÙ @€ PÙ˜‡;Ýö¼+|}úÁðÐìãa2 1ýǃ @€@Y²…£²áG†Ž ¯_vb8{Å©á-ËO C™W´,=² @€ @€ @€G@Àqpze§ @€ @ ’1Æp˯†­ûo äOV²FE @€TG ùp<„{g¾õÜ×ÍS{Â+/èÜðó/ùÉeYuŠU  @€ @€ @€ú, àØg`Ó @€ @€s úÔÖðÓ÷Ï=È @€%hþCŸxúÓá¶©ÿ>´jc8¦qtÉwl{ @€ @€ @€(‡@£Û°  @€ @ nßœy4\õÄŸ 7Ö­ñê%@€TX ùw4?ã6?ëz @€ @€ @€ @€@gÇÎFF @€ @€=hÞ¹ñ·Ÿº:<<ûDg6 @€bšŸq›Ÿu›Ÿy= @€ @€ @€ @ ½€€c{g  @€ @€ ÄÃGŸÚ*ÜØcWÓ @€”G rl~æm~öõ @€ @€ @€ @€¹ç¶q† @€èƒÀ-¾þqúþ>ÌlJ @€åh~æm~öõ @€ @€ @€ @€¹ç¶q† @€è±ÀlÌÃÖý·õxVÓ @€(§@ó³oó3° @€ @€ @€´plíâ( @€ Ð;ÝÉŸìÃ̦$@€ P>ægßæg` @€ @€ @€h- àØÚÅQ @€ @ {ÞÕ‡YMI€ @ ¼>—·7vF€ @€ @€ @€@ñŽÅ÷À @€ @€@m¾>ý`mjU( @€¦ÀÝ>{! @€ @€ @€ @`NÇ9iœ @€ @€z-ðÐì㽞Ò| @€J-°ÏgàR÷Çæ @€ @€ @€(V@À±X« @€ @€Z LƃµªW± @€|ö @€ @€ @€ @€ÀÜŽsÛ8C€ @€ôX †ØãMG€ @ Ü>—»?vG€ @€ @€ @€@±ŽÅú[ @€ @€ @€ @€ @€ @€ÔR@À±–mW4 @€ @€ @€ @€ @€ @€(V@À±X« @€ @€ @€ @€ @€ @€ @€Z 8Ö²íŠ&@€ @€ @€ @€ @€ @€ @€Å 8ëou @€ @€ @€ @€ @€ @€ PKÇZ¶]Ñ @€ @€ @€þ?{w÷#WyßüwöÅkl ­±dÅáÅnU!nPÁ%‘zQqÔV®¨¸ ‰(ùª@)T¹ˆÔ¤4HHV[¡J\T¹ •y‘ˆl@.Z»˜º@!¦u¼Šü¶Þ]O}¬\ØÉÌÚûÌž9Ïœçs¬•¼gæyù}~ :s¼ß @€ @€ @€hW@À±]« @€ @€ @€ @€ @€ @€ @€"‹l»¢  @€ @€ @€ @€ @€ @€ @€@»Žíú[ @€ @€ @€ @€ @€ @€) àXdÛM€ @€ @€ @€ @€ @€ @€Úpl×ßê @€ @€ @€ @€ @€ @€ @ HÇ"Û®h @€ @€ @€ @€ @€ @€ Ю€€c»þV'@€ @€ @€ @€ @€ @€ @€E 8ÙvE @€ @€ @€ @€ @€ @€ @€vÛõ·: @€ @€ @€ @€ @€ @€(R@À±È¶+š @€ @€ @€ @€ @€ @€´+ àØ®¿Õ  @€ @€ @€ @€ @€ @€ @€@‘ŽE¶]Ñ @€ @€ @€ @€ @€ @€ @ ]Çvý­N€ @€ @€ @€ @€ @€ @€Šp,²íŠ&@€ @€ @€ @€ @€ @€ @€í 8¶ëou @€ @€ @€ @€ @€ @€ P¤€€c‘mW4 @€ @€ @€ @€ @€ @€hW@À±]« @€ @€ @€ @€ @€ @€ @€"‹l»¢  @€ @€ @€ @€ @€ @€ @€@»Žíú[ @€ @€ @€ @€ @€ @€) àXdÛM€ @€ @€ @€ @€ @€ @€Úpl×ßê @€ @€ @€ @€ @€ @€ @ HÇ"Û®h @€ @€ @€ @€ @€ @€ Ю€€c»þV'@€ @€ @€ @€ @€ @€ @€E 8ÙvE @€ @€ @€ @€ @€ @€ @€vÛõ·: @€ @€ @€ @€ @€ @€(R@À±È¶+š @€ @€ @€ @€ @€ @€´+ àØ®¿Õ  @€ @€ @€ @€ @€ @€ @€@‘ŽE¶]Ñ @€ @€ @€ @€ @€ @€ @ ]Çvý­N€ @€ @€ @€ @€ @€ @€Šp,²íŠ&@€ @€ @€ @€ @€ @€ @€í 8¶ëou @€ @€ @€ @€ @€ @€ P¤€€c‘mW4 @€ @€ @€ @€ @€ @€hW@À±]« @€ @€ @€ @€ @€ @€ @€"‹l»¢  @€ @€ @€ @€ @€ @€ @€@»Žíú[ @€ @€ @€ @€ @€ @€) àXdÛM€ @€ @€ @€ @€ @€ @€Úpl×ßê @€ @€ @€ @€ @€ @€ @ HÇ"Û®h @€ @€ @€ @€ @€ @€ Ю€€c»þV'@€ @€ @€ @€ @€ @€ @€E 8ÙvE @€ @€ @€ @€ @€ @€ @€vÛõ·: @€ @€ @€ @€ @€ @€(R@À±È¶+š @€ @€ @€ @€ @€ @€´+ àØ®¿Õ  @€ @€ @€ @€ @€ @€ @€@‘ŽE¶]Ñ @€ @ *ªv¶* @€–\·oY @€ @€ @€ÆB@Àq,Úd“ @€ @ 몙n¢  @€W(àø ¡< @€ @€ @€"‹l»¢  @€ @€@;×Nnjga« @€hIà:×À-É[– @€ @€ @€qp‡.Ù# @€èˆÀöé­©D @€®Là6×ÀWåY @€ @€ @€) àXdÛM€ @€ÚØ5³£…­J€ @ %×À-Á[– @€ @€ @€±p‹6Ù$ @€è†ÀknŽ-»QŒ* @€\F ¾ö­¯ @€ @€ @€ Ð_@À±¿‹³ @€ @€@“ÕDìÙp3›’ @€@~õµo} ì @€ @€ @€ @€þþEµ¿‹³ @€ @€@C¬Ý·OokhvÓ @€ÈC ¾æ­¯} @€ @€ @€ 0X@Àq°G @€ @€ªªŠ'f÷Ä&¯i`vS @€h_ ¾Ö­¯yëk_ @€ @€ @€ plã @€ @ !«'ÖÇwfrlÈ×´ @€í ÔáÆúZ·¾æu @€ @€ @€ @€ÀòŽËûx” @€hHà†©ÍñÜ5ÇíÓÛZÁ´ @€F+P_ÛÖ׸õµ®ƒ @€ @€ @€./0uù§x @€ @ úSm¾»ñÑxùÌÛ±÷ľ8zîx3 ™• @€@ƒ[&6Æž ÷ÇkwFUU ®dj @€ @€ @€tK@À±[ýT  @€;úÀ¿vÕñÕµ_ŠwÎ~ûçÆ¡…â³¥cqª7½ó @€r¨¢ŠuÕL\;¹)¶Oo]3;âŽ57Çd5‘Ëíƒ @€ @€ @€c# à86­²Q @€ ÐmúÂwÎÜzá«Û•ªŽ @€ @€ @€ @€¨¼•¬Ÿ @€ @€ @€ @€ @€ @€ @`äŽ#'·  @€ @€ @€ @€ @€ @€ àèg€ @€ @€ @€ @€ @€ @€¹€€ãÈÉ-H€ @€ @€ @€ @€ @€ @€8ú @€ @€ @€ @€ @€ @€ @€F. à8rr  @€ @€ @€ @€ @€ @€ @€Ž~ @€ @€ @€ @€ @€ @€ @€‘ 8ŽœÜ‚ @€ @€ @€ @€ @€ @€ @€€€£Ÿ @€ @€ @€ @€ @€ @€ @`äŽ#'·  @€ @€ @€ @€ @€ @€ àèg€ @€ @€ @€ @€ @€ @€¹€€ãÈÉ-H€ @€ @€ @€ @€ @€ @€8ú @€ @€ @€ @€ @€ @€ @€F.05ò-H€ @€ @€@g{ q`þP¼yþëý…Oâè¹¹8Ý›/¢vE @€ä#pU5[&fã–éëã®™íq÷ÌŽXSù§ò|:d' @€ @€ @€@ÉþÕ¦äî« @€ @€@‹½¥xéÔñâÉ×b®w²LI€ @àÊê7Ù8²tôÂ×Îü$f«õñÐúûâÁu÷ÄT5yåy& @€ @€ @€Àª 8®:©  @€ @€”+ðùÒñxrî…8¼øi¹*'@€ÈZ ~Žœøaì;ón<=ûplžÜ˜õ~mŽ @€ @€ Ðe‰.§6 @€ @€F'ðÓ…ã±cÏ 7ŽŽÜJ @€CÔoÊQ_¿Ö×± @€ @€ @€vÛq·* @€ @€N ÔŸÜøíãÏGýi8 @€ã"P_¿Ö×±õõ¬ƒ @€ @€ @`ôŽ£7·" @€ @€N ,ö–âɹ„;ÕUÅ @€Ê¨CŽõõl}]ë @€ @€ @€­€€ãh½­F€ @€ @ s/z#/~Ú¹ºD€ PŽ@}=[_×: @€ @€ @€F+ à8Zo« @€ @€è”ÀÙÞB¼xòµNÕ¤ @€2êëÚ³½Å2‹W5 @€ @€ @ %Ç–à-K€ @Óµ@IDAT€ @  ûçÅ\ïdJQ @€@áõuíùƒ…+(Ÿ @€ @€ 0ZÇÑz[ @€ @€@§Þ:pt @€èŠÀ›®o»ÒJu @€ @€ @€c" à8&²M @€ @€9 ¼¿ðIŽÛ²' @€I®o“Ø "@€ @€ @€$ 8&ÓH€ @€ @€ÀÑss @€tFÀõmgZ© @€ @€ @`LǤQ¶I€ @€ @ GÓ½ù·eO @€’\ß&±D€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€ @€ @€ @€ @€HpL•3Ž @€ @€ @€ @€ @€ @€HpL¦3 @€ @€«ª @€:#àú¶3­T @€ @€ 0&ŽcÒ(Û$@€ @€ £À–‰Ù·eO @€’\ß&±D€ @€ @€HpL¦3 @€ @€[¦¯‡@€ @ 3®o;ÓJ… @€ @€ @€c" à8&²M @€ @€9 Ü9³=ÇmÙ @€$»\ß&¹D€ @€ @€H˜Jh @€ @€vÿðÙj}ÌõN @àW7üûcë|‘ǯ>úíŸÅÇ¿ó³Nxèí¥mÔÛK=ºôÞv©›—Ö¢·—zôûî•ø¿xåüŸq;î½óÆØ}÷Mã¶í¾û}õÀ‡ñú[Gú>VâI½ín×õVoÇAÀÿ“/í’ÿn/õèÒwzÛ¥n^Z‹Þ^êÑ¥ïôöÊ»ùå?ù~ïÊŸ×3üOߨòÚ‘Ý @€4%à›’5/ @€ @€ÖTÓñÐúû ¨T‰ @€ @€ @€ @€¬¶€€ãj‹š @€ @€@a®»'¶M]WXÕÊ%@€ @€ @€ @€ @`XÇa'@€ @€ P¸ÀT5OÏ>³ÕúÂ%”O€ @€ @€ @€ @€ÀJW¢å¹ @€ @€ôØ<¹1þvã#BŽ}uœ$@€ @€ @€ @€ @ Ÿ€€c?ç @€ @€X±À­Ó7Äs›¾Û¦®[ñX @€ @€ @€ @€(O@À±¼ž«˜ @€ @€@cõ'9~ïšÇãÑ èÓS61 @€ @€ @€ @€nLu£ U @€ @€ ‹ÀT5__oüѺߋó‡âÍó_ï/|GÏÍÅéÞ|.Û´ @€ @€ @€ @€Zpl¹–'@€ @€ ÐU5ÕtüþÚß½ðÕÕÕU¦À«>\¶ð#ssñß1·ìsJzðÏÖ%voy´%¿ú_Æëq¤µ¬Fz»ŠyΡ·yöe5v¥·«¡h @€ @€ @€Õp\MMs @€ @€ @€@ç^ëHçkT  @€ @€ @€ @`£XÄ @€ @€ @€ @€ @€ @€ @€‹/Öðw @€ @€ @€ @€ @€ @€ @`$Ž#a¶ @€ @€ @€ @€ @€ @€ p±€€ãÅþN€ @€ @€ @€ @€ @€ @€ŒD`j$«X„ @€ @€ @€ @€ @€Š˜žŠ3 q x @€ý  @€ @€ @€ @€ @€ @€ÀH6_Ýûù‹Ï}s÷H³ @€@öÙïÐ  @€ @€ @€ @€ @€ @€ @€Î T«HA @€ÀŠþmÿû½ ðd @€ P¸ÀSϾ2´ÀSßúƒ¡ç0 @€ @€XNà+¿µo¹‡Û{¬OU×~óoÚÛ@û+ßó§ÿð×½^ï©öw2üªªzêü‹¢û9¼¢ @€@Ù>Á±ìþ«ž @€ @€ @€ @€ @€ @€´" àØ »E  @€ @€ @€ @€ @€ @€ @€@ÙŽe÷_õ @€ @€ @€ @€ @€ @€ @ ©VVµ( @€ @€ @€Â–zçâ³Äþùƒqhá£øléXœêÍGïü @€ @€@¾UT±®š‰ë&7ÅmÓ[c×ÌŽ¸cÍÍ1Yùì™|»fg @€¹ 8æÚû"@€ @€ @€è¤@¯×‹—ϼ{Oì‹£çŽw²FE @€ @€º,P¿AÕÉÞ™ø`ñ.|ýëéý±ebcìÙp<°vgTUÕåòÕF€ @`UW•Ód @€ @€ @€ üòÜÉxfno¼·px𓽠uk @€ @€ @€'ðñâçñØ/þ^¸±¸Î+˜ @€(I ~c«úP}/ÈA€ @€Àå/oä @€ @€ @€†¨?¹ñ¯æžŸ/ýb¨y &@€ @€È_ ¾Tß ªï 9 @€X^@Àqy @€ @€ @€J ×ëÅ3s{…‡R4˜ @€ 0^uȱ¾'Tßr @€ 0X@Àq°G @€ @€ @€ -ðò™·ã½…ÃCÏc @€ @€ñ¨ï Õ÷† @€ plã @€ @€ @€C ,õÎÅÞû†šÃ` @€ @€ñ¨ï Õ÷ˆ @€ôpìïâ, @€ @€ @€¡Þ9ûA=w|èyL@€ @€ 0žõ½¡ú‘ƒ @€þŽý]œ%@€ @€ @€ 0´ÀþùƒCÏa @€ @€ñph¼ûg÷ @€Í 86ëkv @€ @€ @€‚-|TpõJ'@€ @€¨þÓ="? @€ 8¤ñ @€ @€ @€á>[:6ÜF @€ @€Œ½À§î}@€ М€€cs¶f&@€ @€ @€(\àTo¾på @€ @€¸Gäg€ @€À`ÇÁ6!@€ @€ @€ 0”@/zC7˜ @€ @`üÜ#ÿª€ @ 9ÇælÍL€ @€ @€ @€ @€ @€ @€ pã4 @€ @€ @€ @€ @€ @€ М€€cs¶f&@€ @€ @€ @€ @€ @€ @€8€qš @€ @€ @€ @€ @€ @€hN@À±9[3 @€ @€ @€ @€ @€ @€ @€À8M€ @€ @€ @€ @€ @€ @€4' àØœ­™  @€ @€ @€ @€ @€ @€ @€Ž`œ&@€ @€ @€ @€ @€ @€ @€š˜jnj3 @€ @€ @€î Ü{çÝ+JE @€ @€ @€ @€[@·$ @€ @€Œ¯Àî»oßÍÛ9 @€ @€ @€ @ #‰Œöb+ @€ @€ @€ @€ @€ @€ @€@!Ž…4Z™ @€ @€ @€ @€ @€ @€ @ 'Çœºa/ @€ @€ @€ @€ @€ @€ @ ÇB­L @€ @€ @€ @€ @€ @€ “€€cNݰ @€ @€ @€ @€ @€ @€ Pˆ€€c!V& @€ @€ @€ @€ @€ @€ÈI@À1§nØ  @€ @€ @€ @€ @€ @€(D@À±F+“ @€ @€ @€ @€ @€ @€ä$ à˜S7ì… @€ @€ @€ @€ @€ @€" àXH£•I€ @€ @€ @€ @€ @€ @€rpÌ©öB€ @€ @€ @€ @€ @€ @€ p,¤ÑÊ$@€ @€ @€ @€ @€ @€ @€9 8æÔ {!@€ @€ @€ @€ @€ @€ @€…8Òhe @€ @€ @€ @€ @€ @€ @€œsꆽ @€ @€ @€ @€ @€ @€ @€B i´2  @€ @€ @€ @€ @€ @€ @€@NŽ9uÃ^ @€ @€ @€ @€ @€ @€ @€@!Ž…4Z™ @€ @€ @€ @€ @€ @€ @ 'Çœºa/ @€ @€ @€ @€ @€ @€ @ ÇB­L @€ @€ @€ @€ @€ @€ “ÀTN›± @€ @€ @€ @€ @€_¿üÞò{ÿßãñç_þãï߷ܳ~üÏߨ½Üã#@€莀€cwz© @€ @€ @€ @€ @€´*ðÞáêrëßUÜx¹'yœ @  ‰2ÊT% @€ @€ @€ @€ @€ @€ “€€cNݰ @€ @€ @€ @€ @€ @€ Pˆ€€c!V& @€ @€ @€ @€ @€ @€ÈI@À1§nØ  @€ @€ @€ @€ @€ @€(D@À±F+“ @€ @€ @€ @€ @€ @€ä$ à˜S7ì… @€ @€ @€ @€ @€ @€" àXH£•I€ @€ @€ @€ @€ @€ @€rpÌ©öB€ @€ @€ @€ @€ @€ @€ p,¤ÑÊ$@€ @€ @€ @€ @€ @€ @€9 8æÔ {!@€ @€ @€ @€ @€ @€ @€…8Òhe @€ @€ @€ @€ @€ @€ @€œsꆽ @€ @€ @€ @€ @€ @€ @€B i´2  @€ @€ @€ @€ @€ @€ @€@NŽ9uÃ^ @€ @€ @€ @€ @€ @€ @€@!Ž…4Z™ @€ @€ @€ @€ @€ @€ @ 'Çœºa/ @€ @€ @€ @€ @€ @€ @ ÇB­L @€ @€ @€ @€ @€ @€ “€€cNݰ @€ @€ @€N TQuªÅ @€ @€¬\À=¢•›A€ PŽ€€c9½V) @€ @€ @€ÀˆÖU3#^Ñr @€ @€¹ ¸G”[Gì‡ @ 'Çœºa/ @€ @€ @€¸vrS§êQ  @€ @€ÀÊ®shåhF @€# àXL«J€ @€ @€ 0jíÓ[G½¤õ @€ @€2¸Í=¢Ì:b; @€9 8æÔ {!@€ @€ @€è”À®™ªG1 @€ @€+phåfF @€”# àXN¯UJ€ @€ @€ 0b;ÖÜ[&6ŽxUË @€ @€ä"Pߪï9 @€è/ àØßÅY @€ @€ @€C LV±gÃýCÏc @€ @€ñ¨ï Õ÷ˆ @€ôpµÜßÅY @€ @€ @€«"ðÀÚqûô¶U™Ë$ @€ @€ã#Pߪï 9 @€, à8ØÆ# @€ @€ @€†¨ª*ž˜Ý_˜¼fè¹L@€ @€ 0õ½ úžP}oÈA€ @€À`ÇÁ6!@€ @€ @€ °*WO¬ïÌ>"ä¸*š&!@€ @€ä-P‡ë{Aõ=! @€Ë L-ÿ°G  @€ @€ @€‹^=ðáÅß&ý}÷Ý7%3ˆñ¸ajszçÿ8 @€ @€ò¨¢ŠuÕL\;¹)¶Oo]3;âŽ57Çd5‘ï¦íŒ @€@¦Ž™6ƶ @€ @€ @€º-PÿÂÛΙ[/|u»RÕ @€ @€ @€ @ ¿€· éïâ, @€ @€ @€ @€ @€ @€ Р€€cƒ¸¦&@€ @€ @€ @€ @€ @€ @€ú 8öwq– @€ @€ @€ @€ @€ @€hP@À±A\S @€ @€ @€ @€ @€ @€ @€ýû»8K€ @€ @€ @€ @€ @€ @€4( àØ ®©  @€ @€ @€ @€ @€ @€ @€þŽý]œ%@€ @€ @€ @€ @€ @€ @€pl×Ô @€ @€ @€ @€ @€ @€ @€@Çþ.Î @€ @€ @€ @€ @€ @€ @€ 86ˆkj @€ @€ @€ @€ @€ @€ @ ¿€€cg  @€ @€ @€ @€ @€ @€ @€Ä55 @€ @€ @€ @€ @€ @€ Ð_@À±¿‹³ @€ @€ @€ @€ @€ @€ @€@ƒŽ âšš @€ @€ @€ @€ @€ @€è/ àØßÅY @€ @€ @€ @€ @€ @€ @ AÇqMM€ @€ @€ @€ @€ @€ @€ôpìïâ, @€ @€ @€ @€ @€ @€ РÀTƒs›š @€ @€H8Û[ˆó‡âÍó_ï/|GÏÍÅéÞ|ÂL† @€ @€@—®ªfbËÄlÜ2}}Ü5³=îžÙk*¿Öåž« @€ Ð5w³ºÖQõ @€ @€ @€ÀØ ,ö–â¥SoÄ‹'_‹¹Þɱ­ÃÆ  @€ @`4õ›áY:záëGg~³Õúxhý}ñàº{bªšÍ&¬B€ @€ @`Ç!ð %@€ @€ @€«%ðùÒñxrî…8¼øéjMi @€(L ~³œœøaì;ón<=ûplžÜX˜€r  @€ @€ÆM`bÜ6l¿ @€ @€ @ k?]ø8;ö¬pc׫ @€´$P¿yNý:³~½é @€ @€ ³€€cÎݱ7 @€ @€è¼@ýÉß>þ|ÔŸ²á @€ @€«%P¿Î¬_oÖ¯; @€ @€rp̵3öE€ @€ @€Xì-Å“s/7v¾Ó $@€ @€@;uȱ~ÝY¿þt @€ @€ÈQ@À1Ç®Ø @€ @€!ðÒ©7âðâ§EÔªH @€hG ~ÝY¿þt @€ @€ÈQ@À1Ç®Ø @€ @€t^àlo!^<ùZçëT  @€´/P¿þ<Û[l#v@€ @€ @à× Ä· @€ @€ @`ûçÅ\ïä(–² @€.P¿þ<0°på @€ @€ä( à˜cWì‰ @àÿÙ»ø¸Êrñãï{f&mRè*´,m‘ЏTdµ,,¥i2aštI‚€»àõÜPTÀ+nWÔëzÁ$]’Ž™¤)P,‹uÄâÂE@A l-K ¥4M“9ïÿ9¡¥YfÎ{f2“Ì$¿óùÔÌœ÷9ïò=g&9Ï< € € €£^à’àȆ € € €Àp ÜËß¡ÃEÍ8 € € € €à˜¡ € € € € €äJàïÝOçª+úA@@@À*Àß¡V"@@@@F@€Ç@gH@@@@@¶º/ƒ€ € € €À° ðwè°Q3 € € € €@$8f€E( € € € € €¹è4]¹êŠ~@@@° ðw¨•ˆ@@@@ ÁqÐ@@@@@@@@@@@@@±.@‚ãX¿X? € € € € € € € € € € € € €# @‚ã 3$ € € € € € € € € € € € € €c]€DZ~°~@@@@@@@@@@@@@F@€Ç@gH@@@@@@@@@@@@@ƺ@x¬°~@@@@@@@@@@@ܼí(ãÛÑ ÛÕϼ¤Ÿð ¢@@`Ìà8fN5 E@@@@@@@@@@ò+ð½OXú7êgú|ÉE3 € €ÀpÆÈ:Y& € € € € € € € € € € € € € Žt2˜  € € € € € € € € € € € € €cE€DZr¦Y' € € € € € € € € € € € € €$@‚c ¦‚ € € € € € € € € € € € € €ÀX Áq¬œiÖ‰ € € € € € € € € € € € € €@ „ h.L@@@@@@@@@@@Æ@´ºî2YîÛý–l´ÚÜÖÜø5¿Ú@Ñ"­^vŠR¡‹ýÖãj7ÞÞ¼b£_ m‹ ŽÅr¦˜' € € € € € € € € € € €£N@¿_kUå·,­Ô&i'Áщ65Féc­?æ· Ç8KûF¿Ú(§X&Ê<@@@@@@@@@@@@@@`ôà8zÎ%+A@@@@@@@@@@@@@ hHp,šSÅD@@@@@@@@@@@@@=$8ŽžsÉJ@@@@@@@@@@@@@(‹æT1Q@@@@@@@@@@@@@F Ž£ç\²@@@@@@@@@@@@@ŠF€Ç¢9UL@@@@@@@@@@@@@Ñ#@‚ãè9—¬@@@@@@@@@@@@@¢ÍL™( € € € € € € € € € € € € 0ŠŒë>hBú«~K4®þ£_;m“ ŽÅt¶˜+ € € € € € € € € € € € €ŒZöøª?Ëâ¼lŒ gL¬’E>»ˆ.@IDAT"€ € € € € € € € € € € € € €@A àXP§ƒÉ € € € € € € € € € € € € € 06Hpç™U"€ € € € € € € € € € € € € PP$8Ôé`2 € € € € € € € € € € € € €Œ ÇÆyf• € € € € € € € € € € € € €” Žu:˜  € € € € € € € € € € € € €cC€DZqžY% € € € € € € € € € € €äO@ç¯ë鹨óM é|Ò\òu1ûõÒ×e4­¥ïº†ëñh¹Þ¹†ëŠ‘qÂÃ8C!€ € € € € € € € € € € €E)0oÞ¼ñ“ž]®µ9U2xSZ¦:Ì(}¨ÖÊ1Êl•ý[”‘Z?©\÷ÏšÝwÜwâ‚çÅbLvJßRúeÌl£õl¥Íl™«X)s\#=2îóríl•Ç[_¿FÜ»Õsk"±â¹<Í)gÝVU-=ÒDBgÉzÞ!×ú›äç4¹V¦Ê:¦É Såz?@®¡N£ÕËÚhïzÙ./Ë>Y³¾§;™üUG|Å#9›Ð;Z¸pÉ›JJÃïrµsª£Ì)2×ÃŒÑSäœMñÖbŒÚ%C¼$çëEyü’RúEy}?%ëúå®íæ×64½6Ä)õá£è½Ð‰V/{—1λGŸ"çú-r-L“÷õɯ_Óê9QÏÈuýŒ\ëÏÈkù¾N·3¾>¡¨O`N^Þ·Ù@@`, Ü~÷ßå³6 € € €¸ú†»‚†¦»úŠ³Õ¿lýÏ´í4 € € € Û§_Ÿné@ TàÜ7ßY˜33êj}ÈG¿T˜“žY¹ôG_4Æ\=<£åwI\ºú·«><¤ó­®ÿ¹$LUYfº©µ¹á–˜´Í‹ë—„´ùrº¹‘ò†DKãwÓ´;ÑÅu—Ú×ç©'¦‰K¹{o¢ÔטÖvÓ¹ZÅãɔøSñÞ‰èIÂÎ’Þ$ž,Æ–kø!IHùQ²kGC{{ûËYt1èÊŵW8Z_>¨aïW™«ÚZššû¶Ÿw^í„ SœÏ8ÚÈqÖs#§Aµ)ÕóµDËÊûúö“ÍcIò O™>³\+}¡8^ Ó3íG%‡Tß/És7ËÃYßC™ö‘ø b±ãtérÍÏ×yòsæPǑי$›_¹Æíؾõé–7zIŸÃ¶-X°tÊø"52:YÏiC¸[^;÷(åÞÑÝ£Û;Zÿ:„¾RÕ—kÇ|3eãÞ®Ò_ikihô‹ØÆ{áë"å±åÇ…Cº^®íZyý>ÐÉï¹¼N{äõ~‡œÿUÛ¶<—ëx÷Àx¯ÿˆãÜ(3—˾_OÕ‹ZçÏHUP¶¾* € € € € €xß»ŽI € € € € € €@1 x•ù¦<ës’Äôù½É,9Z†ž£}›$D­ïîÜsÙp%±I2ßRIü‘¬ç€€‰€×+‰B’À©?挟TQY]÷±¶–ƵB —°. I¢¨>)›n$¡ðlÇÕq96£ÇE5ËÏçë’¼tF6ãZŽñ0/•¬%R5ô›ÛLçõãñ–crÒ¼¨¦ffÈ”|Z¾*×~ÆÉ€AÆ–÷/_2ª"ªB’SW»ºëóíÍÍO9¶bFÓ{¡w-HõÎo¼þž•}9ùÇm6VÕÔÿh×+=W®_¿rGnz{½à8öÎ9+F@@@@‚ÀY§ÉkcC@@@@@• cµo. éÕ’èujÞ¨õüpiÉ/£Ñåó‰ÏåkœÞä¤é³¿.I8ÿ* ^yݤûC%׫]ª9þ(ÑÒø1ÌÍ〺lb¨Y’ ³JnôæeŒêÚóZ²%“9JÒá—%1òª¼[zI£RÍqª)½(]:?‘Xõd&óÌ4¶jqíÆ8ñ\&~ƒxÎÖ!uW4VW›ˆ7þ<èqâ´$~KÎÕ¿ˆÍEˆ—œºÌQ%ÿ²(¶ìܵñ•ä¢Ó‘ìc4½z¯]¹®ÊÇÛàÞ$×”N Ÿ'‰ÂóÖ67?5’ç­XÇΪœf±.–y#€ € € € € € € € € € € €©*+ë¦E}{^“÷,I1Ǫ’Ð]Äb3RÍe¨û*+kž:cÖ’t5\ ^½S–u}XËU,êÒ/I”ŸÀùéÚƒì—D§Ž[nYµ=Hìܹs#ÑêÚíHrã0nby‚ŽDî‰V/G¾†ÖÔ֭׎DrcŸ5WŽZSY]ÿ>û²(מ\ƒ«†ûÚ÷&, ›‡ðÆòªÚwf¿€‘?r½:R]ñÃñÚ•÷”#Ã¦ä® cu‡ü,¾PÁ±Ï9“_r—ÉÓ·÷Ù5ø¡1O&Ö4]?¸= `¨X\¿ÄÑæL¿¸=®ºîæxã3~1ÅÚV^¾dz¨4ün­œ3ä—×,YÇT¥ÍT£ôʨçå{@ž–’ÇÏÈ‡í§«žÑÝîÆ¶¶&Ùφ € € € € € € € € € €ù˜‹•8Žn“ ¥£ò9Nß¾åžâãLJJWʾsúîêãhõ²Säžä„$[>Ô¾²9Þ«dW*´*U#ÇË-Ò¹Ûªª–#½}m¨=7Ù¤sc±IC¥žåÙAâs£Õ mœ_I•ÅêÖ5M·æ²ÿÊšúOI)Ëoî­@—Ë®3î«wF}§¼¼æÍÏfÜAŸ¢¡ÒÏÊkË»öFfÓjj$ìÜQY]{z[KÓC#3‰ìG-ï…Þ:ŽwÊVˆD,{ ÔúèÇÜåªðå9æÃIpìw è÷Ë/Òª~»>Ñ겋Ç.t;xíòQ×Û©•|ÐÞûØ{.õuM‰î©ª®»ÍU¦q÷Ž;Ö¯_ßå5±!€ € € € € € € € € € €@nŽsÆÿDnç}O€^]cÌßäàg¥ÐÉ rׯ+ÇÍRFϖLJË=Ãåix‰s•Õu¶µ4Þ`lkHEMÍZ…~)÷&O°÷ 0Ê<)©ˆ²®¿ËжHVâ¹§y²6æD¥¤°Ë‰2W¯*ÜédÓ‹¥Õo-ß 4Æ„Ãÿ-Æ¥AãSÅÉÚ¶n{áé_¤jë»ÏKšè”Ý.&§ôÝo{,ý?«•¹ÍµI¹êùdÈÝ*}„”:ÄQî¡r½œ,Œå’¿2ÙÖWo»ÖHŸk+cuïm‹7ÞèKP4¶|Ìé[’k‰Üß,×Èsrß+I‘“ù<&÷Èï0ÊÝéh§ÇÕf¢¬i’£Ìt£Õ\9ê¹^fï?ÚþH¦r`¸¬ä;™uBZE¬~®Ìï bl3f³¬åW²®‡Œvuö®É;X’ÔfÉÅ~„\û³ÅéHéržìŽxmÖM«I’˜ê%â–[c ,`”¼êãBek…öý™ðÊkö)y—ãÌcr]<ã]ò?C;ê0y¿\ÇÓÒµyûµ‘«ƒ­Ÿ€õEÛ/š' €Vh¬î"íèÿ‘ÀƒäƒXŽ6=ÅÑê“SÒ UÕ.[ÛÚtoŽ:¦@@@@@@@@@@Æ´€¤š¤OnT¦-é&?¿6¾òo6¤õëWî˜äßw£5µõÊ8?”¾ÇÙŽ“äµ*ÒùmJ½‘Lh;&U»$ . œÔcÌï¥úÞ剖÷§ê+Õ¾¶–‘ýó¼û¥•£¿åçÖ{¼TZ”Džåñé©úËxŸÖR=-Ýf¶KåÉm6¹®ûpR…^‰(s´ÜÏ}¬ã8ïª~Ë%)íïèdjH×˾ýÑX½Wañ“ûžûþ4ê£z.µ$ËõëbÓ¦MÝòo½ì\/ÉXuR)ð2׃ú x"IS³Çþ_ÙДÑÓcC¥Ÿ’ëäëAÆ<.u /ÊäéÛgkkÃòüŠ b±ëƇÊ~*c.èÛžîq8¤.’¶Œ+cµgIêØœtýzû½„MÉV¾T*¦Þä—ª-‘Xõ¤ìÿðÂXݵ%Ž\CASCê«r̼Týâ>¿×´$…Å{á¹±Ø$­Íɵ`%–5=-Ä'äzðª=Z·uëÖí’ In;ÞÿRÝöóò|¼õ@¬òȆ X,$eÔoÔ!·}¸Ìv<ùãå¨PXÿ¶ªºösÒïáÙBr € € € € € € € € € €>’ô²KR¡>˜hnŒInЕI47ý,©“gKÍóÚR>•TœE)¬›æ¨`Uù$ÍkÅK[7¿/ÛĵD¼ñç²®s¥–ß+¶éIÂÔiÑÅu’x–ŸMÎQrÍ×’»w™XÓø9©™ðÎ×ÍñÆgÚâM¿j[Óô“Öæ†¾âv.å*ÿCæÝa«¸¨¦f¦vT°8c/¹»ÎÈ$¹q D[KC£éN¾UÖòÐÀ¶Ï%ɱ²rqíÇîú¼¼¼æPÇ(/)Ëw“¹ü½»sÏ™Ù^#};¿5ß’hnð’SØwÚǺ7Á1msº)Rtiº¶7öóiñvnß8¨ÿƒuñÆÍÉ®W*=£þ-ƒŸÉùz_UÕR{2éàC fO±½è”~Iܶšw';O šÜØ·¿ãñ=­-M׺Is¶ø¼Ü·ÇÙ “G!€ý¼áQ§¬E?Я!OzË‹k皪꺛çÍ›G%Þ<Ó% € € € € € € € € € €À˜ØìqÏN´4þt( k›Wü^¹É¨$B[?’ã%€ÙKŽ¥éH—˜o)Ò"Sù^kscÝÆ{Òth·$ýÝUfy µiõ:Í<È•5×·®iüt{{»o’Ññø+mÍ ß„Uk"iÈŒ»Aúj›ŽœÕ·¶4^´1ßi‹µµ'+žëéì–DPãU=ôÝG_7þ²‰¾Ai#¥‘ËöU²Lâ•9Üãº=ÕÍϦɢa×+=WÊ ÁÚ§WÑT’Ìdˆ… –Éq¾•-å|­K¬iº>“~ÓÅz×›îé¹P’|·¥‹Ù·ß„Ãgï{\„?‹ê½°²ºö$y lþ´çµîy^òíPÎI[¼ñîžIrTæÅ¡ôñTÿâ@†,à%Î>æ¤6ùv‘ª!w–IZÏŸ:}Öw29„X@@@@@@@@@@ðpMò²µ­M÷úGkM¬Yù;‰\aÖjFyUí\k\Š€ÊXÝ™Z;u)šúí’ä²Û%iórÙiM¸ìw`š'’äx³$~1Mó»e°÷KâÑ oìÈÑ×u?" ‹«rÔ]o7^µ=¹/¼ÜÖ§$4ݵmë“^"UN,½ñ::Vou»¼d)µÕ|=±lbè2ÿ˜Ô­F9ÖOIüZ{|ÕŸS÷ýÞõëWîÐnòŠ =ÇÉèz —Mö"Çû÷m:üÛ3kmm]õ׸_²%ÉËçØb µ½ØÞ Åú†Þ‚R~ ’”*)¼ Ö­[“¤D©û§¤r½×•‰eËV€ ŽÙÊq °W`ÊôY×I–ÿ‚ @äËJÔoäƒßÿÊÛ/ÉëOÊãëåÛKZäÃèÃô#_ž¡?­®ûX&Ç‹ € € € € € € € € € €éLC[ËŠ•éZ³ÙßÓ¹çÓA*†ÂÎ[²é_;j¹í8¹_ùÕî¤É*)ίoI0¼FÖöw¿¹çYòŽ´5iЯAmFml[Óô“Aû‡¸Cªíý«tá›k#÷|?ÛõjÏEC­‚™jªmmMÏKàGRµõÝg´ºBÅb¡¾ûlÆêfÉ™x‡oœ1;»^ëù¶oÌ[׬hµ]/½Ý‡ôŒL†qŒše‹—rŸ¿´ÅdÚ®{Ü„íµm´‘Êœ’ \t[q½F«—&o5žµï&çãr¯bªoP†¯Wë5_Ïð0Âûø¾éö‰ã! €@ ªÅµ 哯¿§h´K’Ÿ7®ú¬ÙÓ3»µ¥á½‰–†ËäýÕòÁúy|¥|JM¢¹áãöœ)rš¤ƒîA¤ÞñŠšeEû­©—Ä^@@@@@@@@@@†_ '™üf®Gíèh~Vú´Bq”;3‹±½äÁ…¶ãäžç+×Å7Ûâ²h—\;Óh;Î1ú_l1™´»nòºLâƒÄž‹M•<´‹m±Æu¯¾å–UÛmqÙ¶K`»Ô¹Óïx9é³+tY•_ÌÀ¶Ç^™R®¥ó¹6oNr-ú&ÄöÎÛUÓÎß﹔γ&8úŸm["±òiYÐ}÷’%àiùß_˹\·`ÁÒÉc ýyѽjÇšD-'e]®«¾î;»^}á‹RÐõÁ}Ïù™™@8³p¢@ö ”—/™®ÝàýE°o_ºŸò-·˜.sI[[ãóéböíß[†þw‹ªj¿/ßÂÒ,½±¯-ÕOù–°£B«æÅbGmŒÇw¦Ša € € € € € € € € € €X¤*àÚøÊ,QÙ6ÿN<Áï`£ô›üÚSµ•WÕΕ›™MÕ¶ŸÙþÒÖÍ7ížÛG¦;¹BEôWz+5¦éZî§>óôX¬ôîx¼3MHàÝ’4ö—¶øŠÛ0pB¨ô#bYæ.ëx¸Íì¾Ñ/&mRXçR¾EpÇ\.c­ :žÌýBÛï=*Ù´¿lã$ñ1[µ6£ õ¯.k¼QǬqÈõèU¼_Èëׯ窱®¿ÞѪ!_cŒæ~IpÍg—µ!€@^Â¥‘/É÷GL± b”{M¢¹é*[ÜÀöµ­M÷VTT¼Ý7q¥|Ø¿``{ßç’dyðT]æ•C¿¦ï~#€ € € € € € € € € € €@0טŸ‹Ì2£§ÔKr‚£6*çÉÞ¼åüTö›Š'R½ñ*O¦hÊé®D¼áæhMÝsrŸø!é:‡Ó¥HÎA‹ä$ÜΡÐL¥BG…Tø(¹.ŽRÚ)ë>JÆ8J½kmËŠ{Ò—³ýÆüÓVcÈ1Æ»V‚o®ûOòO›t´¹jÞ¼yqy-ôïØÙÖÒÔ,QÞ¿Q³Û{aylùÑò>Ñïô&'Çë3Ô¶-î®ø!NÙw¤ÈUÑUìêÚ‡z< ŽCäx“‹bËN´ÿ!Ź)Ñ’yrã>Ôööö—åCTÕ´é³î”qgìÛŸê§üüw)]ýý|—O56û@@@@@@@@@@(zmþ¯5HrÍ3Ö¾µÎ,©K:”¸rk¿*y“=fˆ®ûMã8ÿ'÷O?b÷agz$‘Xé­Y–žÛÍÕæîÜö¨ÔyçÕN„¿·É=Ù¾[Ñí¾¹kte*wJwËÓv©uhr¨äTi÷âì›$fÊäŸ@ïß c<Ù/Eó¼iýOÛ²øˆ-¦o»êþ§£ÆõÝ5è±:aêôY+æÎ[»iÓ¦îAìØ/Pdï…DZ¾jc~²ùyäU¨­ª©o”Þ¯ÈÏ£·WGï¹ee Gþ/ùk ä;„1¿òÑ?ì Ñû¶”… —,Š”Eî‘UÞ·c¤Þ´šTr@ø?¥ñ3©Ø‹ € € € € € € € € € €)¤Ú Tb{$e[v:ªg‡Rþ)R‘o|&CUÔÔ!)Ž'ûcÔ–DËÊû|crИˆ¯¸Eºñþå}ÓݹOpœ0I*÷jûŸ ¥6­‹7nÎû÷`”¹SXÓ'8J\È8ï–ƒ’³™ã† M¯es\¦Ç”*žþÔ’Üé_ŽqÀ k››Ÿ©ª©“טž8 ©ßS9ÇÕ³™sØÌ#çü[{¼1o Íý-¶'Eø^(YÔ‹,¹ÉªÇMÞ6§ÂMº 'äà˜!vF/ø û&•UUK”/¹Ð¶8×UWæê›Ö­[ý¢|ÆbÛ˜òöŠòò%Ómq´#€ € € €Œ¼@©öÿÑ‘Ÿ!3@@@Mü:šÎ&kA@ȇ€$ÈÜ/ýæ­z]ÒU’|eÝü ° 8ÜQ‘sìJñÔü)Å΢Ý%UŸH$V<—ëèö}7©NùG߀7ÊZŒgŸwާ5ôîL¨$@'½¤?×5*P¥R¹çþÌPHß­®»¥bqÝòóc±©æ3fBŠí½ðÜXl’$7Z^¿æµµñ•ÇILv½r¿¼W#[&$8f¢E, n8üAùæÿ£îl‹7þ6—`‰–÷+cÖúõ)³* /¹Ä/†6@@@@˜îL.Œ‰0 @@@1!Àß¡câ4³H@@!È}¸ápë¡¡dî•«· l´ù³-¦¨Úu~’ ¥zæ™6£u^¯‘ãoqw?fK”2JŸ.Ç|nÐܹs#•±Ú³«jj¿åhý½kÍÅs±ø®ô8IYR.9ºé§ôùhuýo«ªk?W«Ÿ+}øç äb²ÜG±½èŒ;Bií›+Ù†^¢wàkc(§§££ãU™OÞªen…|¬M×Bž9sCF@Àû`%d>`Z¾þáj[L6íF¹_Ö*´ÈïX혘´Õ/†6@@@@‘86r¸z"¹uä' @@@Æ„€÷w( € €ø ˜WüZ‡Úæº¯í ©I¾ÝH]f‰UZìÛ¡4J±QUÁQªïåå?°­ÞaÃwÜä°&8ÞwFkê½j•‡¦;ÏrÅX^¾ä ŽŽÕyqI7®e¿S[~dD«·Jîåir]Ÿ.óôÇ{ÇÙœ-}§mnoix¬ªºn$—ùÞo?¨IŽ“9I@çÝ¡º&ZS÷¢<¿Ý(³¡gW÷†ŽŽæg3ªw×{¡cô ÛE%†ó”iå%bëã‡sÌb‹Çb?ƒÌ†U`ÖÑsΗ+Óý•2¿ÍuõÆ}ãyUåCê-2‡ûö þ©ß^Q]”÷mp{@@@@ Eà]ãNPvëG)”¥3@@@8UþeC@@ôRýmGúÖ¡·tuMpËzÓ»†Þ×þ´ï}ͯǙ¿ï/þGR0f{V!ùwzªµ_ǹQîåþ¡5.—ÆL“„=ßC¥aoîÞà¸pá’7…ÆGŽ”ùÒî‘ò:N’ºæHr× ²¯tߤýg¿/*7?»;÷|,RZòNÿ°l{”káMrìù¹$R6NUU×? 7hcn{ØtþúÁx|O¶}ÃqÅö^è*=ÃZÂT+/QxØ6ãªÇµuRÃ6¢ˆÇ¢8ML E@>\o›‹”'¿Å3”v©Ù,¥°}åc¡6É_Ê8‹ € € € _ÓåÆÒÉz‚zÙ¼–ßè@@@`Ì xž6îÄ1ï € €~rp^ýÆÎ¶Mîm¶Vpìq󛸙íܳ=Î(g[¶Ç¦;®¢¢b’´HGÒS†3Y¯w¾–äF/ÆÕfJolþg^,vÀUú6©w¬vÌÑÚ8G+mŽ6F-S;pÿNŸ"zîôÆ4¼j‹åUu #ó¹«~ Cy Õ[dEo‘›ôÿí8SöêñÕu·mÖu&wß²>a(]â±Åö^èhuˆÕÑÍoUÊÁã{ãÜë`ð|  Ž…Ž y†Þ«MÑÙ \€ó”Ûtž­»îs‡-f(íNOr£*ñû–O×$8™c@@@@a(ѵxÂ<õãyý¾¬aX C € € €º€÷÷g‰ö¿× Ð×Àü@@È·€Ü_t ŽF™éReΗ&Ü3ºu*8&#N ù*vc(©r–à8wîÜÈì#Oš¯S! Œï’$­dõ{“?åZë½Üäªó¿ìF¬£µñOÑXýRIÈl–‰–ær2¯'uê¨D˜ҤTwÜ(¯Ã¸Ù£~ÞÖÖøR.Ç©¾Ší½ÐUÊZÁQ;Ãü>¨µ$8²e"Àÿk•‰ÖØŒÕåUµs#au|ÓÁ±òËè`)<]-ßô`’Ò³=Âò¼T‹Û*·Jºã¥Ý»Õsk"±bXK¸ŽÍÓóƪ9OoPäïA4ºt¶ÖúXÿÌöŽÖ¦ûýc†ÖšH¬|:Z]÷˜Ìå¨ô=éwVÔÔÑÞÜüDúZ@@@@FZ ²ìLuçî?©Çzžé©0> € € €À(8*|¨òþþdC@†Kàßóé…íêâw×üpž_Ôïš?r–_;mäC@î‡Oæ£ßüöéÝ×ï¿mÞü`Ñ%nú®(™û Ž!åLõ³ÐCzÈ󯬮=A’ö>!9#Õ’»8­Ø«Ï%â ÕõoqŒù‰ÜwŸŸß)Z{y±çˆÛ9ºÄÜPUS×nLòÛ‰–•÷ú%ã7¿b{/”ëu†ßzzÛ’î°&JÒë+r]X§EÀ~÷[ðh¯À¼yóÂS¦Ï,—Ó…’­~¼¤¦{Mû_Zò¨÷IïËÍ{Cž);fîÝ%OKMĘªšúûåEy³1¦¥­¥é!¯\lÑÅõŸ—„Êz¿¾ä õ®DKÇýbrÝVY]·H¾–à¾ýj}kksÃ'}c6úy ¸Œ¢ 3‘ÈÙû_©§.¥¶ï’ù€¼o¿”|åUiƽWbžÈûL@@@@ k°ü7¯/O®Wéõ²y-ë~8@@@T“õ„Þ¿;½¿?Ù@@áøóc¶»-Õr/òÃ5ÆA`´ ÌŸ?œT“›lYßîM›6u[bŠªÙ¨äž\O8¤T@´¾wåzØœõgÜì+8ÎÅ*uJ¿$“ù $Žª£ö–†Çd]gKq¡Ëäô~U2`Þ”3ôi]"»+ŠE«k›v»»¯¼5ß20Œç¹óÚ›óä׳넆õ}Ð1êµ"~Kñ£Ì[ÛÞ2±y럎‹L@²î£S§ÏzÐÑN«ürú€üж¾ÐS-QŽõ~»Ï•7Š/J_ÿ­®ÿiyyÍ¡©b3Ýç³A/öý§T}EE…íÃZ¦CûÆË‹I~éùÏKæî%¿ y+†ó4äE`rQŸl›–ÄÜi‹ÉE»QîF[?Rmõ[ í € € € €#/pphŠºfÊ%Ê»é” @@@\ xgzozw²!€ € €Àèp8Àþ—¤ZÑè[yîW$Jæ¾×aìÑq¼äºŒ·ŠÅõKÊB¥JúÇGå_~“¥´¡кK.ɺFù¼Êx!–-?ݶeóL×5Ëî¶„©ÙË¥ÑÚ©+ •>"EÃ>4¤Î88€ÑÊš¼2Éa}'•s@ Éô† ŽoPŒí‹j–ŸQU]÷»½‰ÇæXÑtÇKÃ¥ãþ]\÷åy±Ø^¨íñÆ?(£þæ7Goœ3nâE~1¹l«¬¬=X>ù½ß¯OùEøüæG¼Õ/ÆÖVLçɶ–âl7'Ùæ-z°Åä¦]ÿÅÞ~§=†@@@@B8.2S}Úê¨pN¾'®–Ä@@@FPÀûûÒû;Óû{“ @@F§@× /ì°®LëÒyóæå7qÍ:‰ÂüƒW –>3tÝŒ«ZFׯÑf¥T®œèÓsöM’Ð(yHBã÷¤PTÅ+nç´Dsã9^²¡cÜíÙwœÝ‘7nÜݶ¦±Aæp†ÙÓ}„2æ?%±õ>™_ž’€{]TU]ÿÑìfÌQŒÙf‹uu(?×yšåµ5¬ã¥™FQíæUQ®üLÖK:”J‹Wå»ü©üÒ/“ ‡WM5¥E£Kç'«žÌvE®V7Ivî×}×j¹´ÿÔ7&GºDWKš½ïëIªé­JyïbÊ×/ˆÅfŒÓ¥8Z-Žssžð©Õ÷¢5u¯Hbåª!s¨¿À6ÿf¥5¼ ‡òZ ÁÑvR´û&d ˆåé(˜;wndöÑ'þTóê†si2Þ *¹'Z½|A¢eÅýٌܵ§I—F¾ê›ThÔ{ÕÔÌ\ÛÜüT6cdrŒ|(´ö$Ý›2és_l1Ÿ§}k ?ÏŦJ"ð!¾k1fÏÍñÆç|crÔ¸~ýÊòm¯È‡¨Iéºô’ŠËË—LïèX½5] û@@@@ KÀ»ù46á}jQÙêÔäßß»ŸV[Ý—U§é*¬É2@@@(ÕãÔtg²:6r¸:uÜ ê´q'ªÿïèñ93@@È€á‘ÄEí›àè”…Ip´w»z{‰~£’j„ãñÆ6KWß\Y]“IJrcæ›$2vimþ&GþU<þêºîÝÊù›ÜCÿLf½™’Ìâó}k<¾Ez¿ÑûçU;rЬSä?ÙJ¢£:Guºùê\%·N5,Š-ûóÚøJÏŽ-ÇFiIPöߌÞ„Co<Ûœüg<öZIp{ç¼wÅçÆb“&†J’°uöˆh5CçWU‹k«[×4ݚ鼄-Éb÷Ž+Ow¬$?jG•,•ö¯¥‹ÉÅ~ù‚|(|§¥¯MòËèKÌ æb?OƒTÄ;Æ»ã±|)‡·ºÍòÏÆezã½Åo< !í$8ú!ц € € € @‰Ž¨÷?¹÷_N)!€ € € € € €€€TÜ.I4³ü¦Râú'@ú;VÚ:Õîm%ªÌw¹’Lz¨—çUÎô ,àÆ cu‡I¥Âe2EIj¼_+w­Qææ„Ûõ'39>U¬ÑΔBLþÚ{nï–9{ÿ¾²pá²ðøIïÕNH͹’¢òÖTë±íó {…œðHÜl±´g! ÕKÖ£Œ>Ò“ù¾gæ°»1Ñ Žcâ4÷_äœX¬d¢Sv»¼`NéßâÿL>ü<+¿”o“_P›$…ëùdÈÝ*}„”:ÄQî¡RIîd)ÜZ.Uã&û÷´·Uë¤Ïµ•±º÷¶Å½_m2e¬´ Ž^g’1¿\~ä5ÁQ½ÜV™Y¾™ ãê£å”ÜwßÇÒÿI•ÕËßÚÖ²â/}÷çð± Ñ›D™¶KñëÚev¯N¦a´œ§4Ë+ºÝŽÖÓm“–à'l1¹l—EOÙú“$ßÃl1´#€ € € € € € € € € € €@ hóœ$¬ùNÜQ½‰=7ûÑh¤ØWêP? /íOøÅj›W½Qî-/·\.½Ó7®º*±¦áš|­Er@¦Úú–< ÿŒ5[yhß›œøéúåå5‡FJ#—É4?%9%AŠ„EL$ôq9ö3y˜Ú˜îr§Ù}ÏS*/a9é6£æxùN›6mêN’«ý^±3%ãy­åjÌÑÐ Ž£á,f°†h¬Þ«°øÉ@‡õ€Q=—&ZVÞ(^‚¼»ü[/×WV××Éo”oÈ‹ò ¿ãå—Óìq†ÿWb¢~qÛ¼±f=g…¬çÿ lëÿÜY&Ïó’àX«}¯|s€oIoÉô^û‹x|[ÿ9ù?MçÉ¥ÅÓ*%µ§Ëµê;aãê;9n”9=e“”ïÎñ°t‡ € € € € € € € € € €… `Ô½r‹óß©h}²o{£Õu+åþúS´1¥—Jw+cþ™ìQ‡\ýx[[ãK9.Ç]™{%Y´Ò¯Sí„Εv¯¢_ÑmR½ñY_ú •{WdŒù^bMcÞ’½a$mŠÿù2S-WOoÍÏÊô¾.m0‘Èo$¯e¦mº’7×C{æ^¾Ž¼÷<,Gžöh­K;bÎ ’‡ô×´19j8Ê-£Bº$GÝ™n .£yÌÈÀBÕÔÌÔŽº)ÐÐÆ$^rw‘IrãÀ~ÛZMwò­ò î¡mŸK’VeåâZ/=ÃÍX×#ùRé4/׺Òu¶ »I×:Ǿ}ŒÎóÔw…ÅùX®Q{Gm^ÎÕ×ì´'U%Ip´!ÑŽ € € € € € € € € € €@ $]u·mÚF _‚£¤¥!YiÇHvÚy’ÌõaGéÿr´³&qîsÆé£5õ[T,fM²³­)íâô[¿²¶ól1…Ú.Õ?`›Q[:w¼ðïÖ¸!È=îGغ’šyɱ›i{"±êIÝÓ}ŽTìÛàØ#Ä’•€þ½í°PDÍ·Åä¢]Æ)Ú÷‰\¬?Û>ŠâŸíâ8®¿@ÈŒ»A¾!H)ß·¶4^´1·&Oõað³DbÅs=ÝgIŽýƒƒ[ûïq}ÝüùË&ößëÿ¬­¥éÿ¤ô°…I­“J‹óü{ʼõôX¬TŽºÈ÷HcžiS]·ûÆ hçiÀ‹ò©WÁÑ:q­;­19 pg·­;ù&mH´#€ € € € € € € € € € P„OýóÁ?I…Dÿ{˜9Î+“ïåUVÖM“{—ðÇ<¬âñ¤ÌÈ´&»]k‚£äc¼¥¼¼æÐášaEEÅ䪚º¦hMíuU5õªZ\{~ylùqóæÍŸÉÊËË”¤Ó7ÛŽ‘܌֯_ße‹J{o†Öïµõ‘mÇùó竬®=)«»(ZSw•WUTþÝbo(í­­«þ!93×Úúʦ³%†<.T6íÚý­í0)zu±-&í’L|I.úk}ðÂ#g¼ªjé1òa¡Ü¶\IâºkÛÖ'½JŠÆ´½£cõV·Ëœ-nõ?FO,›ºÌ?fp«Vöª”ÚÑË9´=‡„JÅSû&dºJ5eòl4Ÿ§¡iÀÑF•Ùfa”ëÿÇ­ƒ Û]£¬ ŽòM"$8fèJ8 € € € € € € € € € €Å °iÓ¦n¹§ý~s•JŠ:lJ–ùÅä¢ÍDô9Ö~\õ;kÌìxé©?K²¨µHT¸¬äCÃ5E§dâ'åü.×ÊùŒŒù#å8·EB¡‡§NŸµË«†)É÷Hò^óyçÕNð›S$2e–_ûmºçŽ7çéÁ¡z¼ÐRÖMI¶Œ‚$2~¶ªºnX¤õ9ޱ&QºZY«¬ iC8xoÅïm]H’Ô½mClŸ?ÙDí(Ip¼I•¤H(I”Ó§Ê££7lhzmpÔþ=ɰ ”àØµÓ<ºÿ¨<=rœ ö\0λg~®T¨¼P,Ž’Ÿ¡”Ç…Í”ûs´³£µñÉÜñ=F¶çB]/çhHºé#ÐÞÞî¹z¹E¾[(ìx¹UyÛ¤8[^ûÏÛÄ  cß„·˜SÈÀù±ØTùÅu±­+ãºWßrËªí¶¸lÛ[׬h—ºwú/Id³+tY•_ÌÀ¶×߈LÛÀýýŸë‰S¦ÏÌÙ/¤ÊÊÚƒåÛÎï?Æ€gÆü¾­¥ñïö¦}:ÚÏSÚ…IƒÑÆžà8Ì Ž’¸ø›+ŠäT0M@@@@@@@@@@Ø+Г4 ^â”/ˆV'z•Å|c†ÐXYY7M’ÌÞïÛ…1];’[ÁÑ›»qÕO|×ÐÛ¨§„ÇOú¨=nh¥“BŸ<)¶^$ÄšÔ¥•+9%ö­¤$œ×£Ššš#äJ­³ÏD"´ œà(ÿÆ}æ,Ÿ$ÕXóc±7ɤ}+iJRêSwÇ㩎g_.’ÿcëE2ƒk¢±å lqÙ´W-^^ågËæXޱTôhtL•~D^$e~«‘7ô‡ÛÌîýbrÑ&¿ðo°õ#ßÜp¹-fP»«¬s—lü僎Ër‡S¢—Ha¿ÃÅô&¿ömcâ< \t1=7ö ŽF oG×±Wp”¯£ Á±˜®3æŠ € € € € € € € € € €@ñHøzÛ!Ž£ÿgîܹy¹·X—èÏKå<ÿ„4­V¯_¿r‡mž#Ùžˆ7&$Wô1Û¤JÛÅ–h‹Ë¶ý¼ój½D¹OÙŽïMlíq[lqŽÑ/Úb¼v'bÞ$.ËI)iÜ–ƒo¹žút’t“û£±×C¢eÅýF™?ØV®çGçÆb“lq™´K‚ëAJ‡~É1ÄöÈkvuÿ¡x6R’å]i[²ö âñ¤-n¨í‰xÃÍò†ñœ_?òÍ §Ï‹Åð‹ØÖº¦ñ.©¨¸yàþ¾Ï¥:ä|ï›!úîËú±Öµ~ÇÊ· ìêÜ‘\ã3°m,œ§k.¦ç:PÇáMp4:iýö¹®òòGH1;æŠ € € € € € € € € € €Àh|€ÿ¶®O«·Ì>ê¤+­qTU-=F’Ö>n;¬»ÛCò+ëø–m-^¥¾úy¦yö~{#ôSõ÷%ÿA*Ú6}s"±òi[”«Í[Œ×n”sJ¸lb*kêÿŸ™zoÐc%¯d|ÐØµñ•ÿ'y*6g¼Sú3é3/yTÆQï³ÍWÖï%#³åS ©¯µv¯õaÒ¶\½~,X:¥Ô)몣YÇ& ­@^^˜iG£aؼÌ}I¶{›mà£Ûm19jw%áêNß¾´M•œê3¸Ñ•u6 ÞÝoODEL¬ßž,žìý¦…¹þ‡šÖL¾]b '¶n•êŒþß("sOº**æp:étY»3þ•F­Ç€ € € € € € € € € € €-Ð_q»¤§=h¤VŸ¯Œ-¿5.`@o‚P$ìU´e1ìhmúcÀnG4ì9·ó&I–³V<”dµ¦9¥+æÍ›8/Ȫjj¿) ”õbݤÛó¹qjÏN×+&å%oúoZUæ£Ê§\sÿ" ‹×øÞ¿UªNì¿Ç÷™Q®þßi”sv~´¦6£yØúôÚ+cu§KBêGl±I£:l1´M@вuÈë·ÍÖ‹\ gMuÊn¯¨¨˜l‹õk//_2}üá’ä}š_mvíFE1a’:U^xaË"6­‹7z¿°†e“7 ÿG™EÈ8ïÎt2:Ùó³ÞË>jí,÷iÔ èè›u¶7h,§L\ )V>Pí¶Í'NÚ^k¶.2j7!×>ž6=RPPê@IDATuJ0 € € € € € € € € € €€Iª«m“–$œqŽã¬­¬®[d‹µµ{‰}’à·FÒÆÞn‹U®ù¢5¦@îŽÇ;k¾h:Z/š6}Ö . PmÑÞctqýç¥Àà§ì‘’ÎjÌJ©\ø@Ø[nYµ]¸Ç+×Ç;Ž8æÄ¯Ùâ2h×Ñšº«œPè6ïÚËà8¹¬Ô™Äwwîù‰Ä[ïù×ÊùLeum4“¾ýb£Ñe‡;ŽNØ×gþØÞÒ°Á¯/Úr#гkÏ'ä²ÃÖ›œ³Óœqÿ˜mÒwÕâåUᲈ$në“mcÑn ÁÑnTÔ:¤­‰‚ò‹mX¿ A*-Ï>ï'¦µuÕ?%ëýW÷÷nÎX«}sÿ}=ÓÊXy<±¦qc&½Ž¥ó”‰K!ÅmvÚæ“Ô:§ß¾aÏIo­Ú@@@@@@@@@@@ ¸ñÆŸKRÏÖUh]¢•ú¹$ž}vN,VbOPµ¸ö‚©3f?(%ñæ§h°Ë¬n]Ótë€ý´mMÓ ’÷Ðh’ZŸ)‹ÜãU) Ÿ"È«"­®ÿšvT ÄJ™Û«ª»çª]¥Ýåê ÕOEcu•i; ØPYY7-ZS«äx|YÉ"wÉL 8Toغu«_”ܘAŽ‘$Ǧèâº/–——g”D9°ïÊêåoÕ‘ð:IÆœ1°màsI@¾nà>žçG ££ùYy|&HïRPî(IÀ]_U]׺¨fù{äßkU»ÃU‹ëΕø_('ôs¹¾21v{õ/{D°€T;SÞ,}7£õ£¾9nÜâî~ì§T†•|ç4›QútiòÞìeûô!Á7ÉAóúìê÷Ð3¢õ2ÙyM¿†€O*j–¿O&=Ó/ÜhÕ(íR9ø6ÖÎSp™BŠÔÖG'©†5ÁQ²mKmBò‹¹ËC; € € € € € € € € € €¿Àž×¶_™0õ ¹çýx¿ÕÈmõ^.ɵÇ9¥õÇ/®ý÷V³ûV'ýŽñÚÕÔÌ ›’ÿ–ÄÆhÚd€>e^êLv~²Ï®¢yèv½òи‰o•µ¾Ù6iñßÑÚ (”RçÆb“&êÒ•ã?%yÁú\sI"±êIÛ¼ú¶÷$Õê’ºZöÙïywÔI¸¼±§³ëK^²Xß~l¥:â RÑî2É©—kdš->]»äX¬b±Për_nçgËœ²Ebyо}©~ÊÜÊä¼^.›|yeMýmIîú¾W¹3Ulª}±¥osBá/HÖH…Œe)õ7I@–0›j@öe,hiü$qŸ/§§<ÐÁòžV¡hUuýyߺYNêc®QÏÉÙ}I®çƒ´6‡(£QÚ,”çY_×ûæ"¸2Ê9ÚwÜhþI‚ãh>»²6I¶{‡íÝÒq“Úàè½ñK&þs2½CÓñË/ŒËË—Ôѱzkº˜Tû{vmûy¤lê÷¼ãSµ÷îsTÖ ŽŽqêü~ýHÆ¿qõžŸ¥;MÃX;Oi |·WÁÑÿÕ¤UÈšp˜ËEÊunÿp©5 ޹D§/@@@@@@@@@@ T`ݺu»Ê«êª#}¯LÑz¯±$Ö+‰^QUú¢®©ëp¹]¹úé¤r·¨®ä«º,t¬ãê9w¢ÜJ=GN“Ÿe–oLR÷²õñø â ,¨½½ýåòªÚÅ‘°úªt)Nç‹ýù’Xõ¤$VÝl’fc2ä>«Mh«Þ㾪KÜÙ® í8æhIŽ;FîO_(÷„OÎdér޾ÕÖ[­3“£”ZoÜ­©ý¶T/´V¶“ux¹FŠ”–ÔVV×}GÊVut©ÎÇoÇ· uÁ‚¥SBãÃ3Ca=Wî¶¿TÖóî1Ÿ{y²ö§%6}ñ)­C Uéaë”Ú<ðøtϽk­jñò*’j¦öMÎÑ4™ó×uÊþMÛ~#“z\²pþé*ýx¨§ûñN§ûÕ°;þðPXÍ’¸Y2ßYF9'Éã׫uÊëfÌN£’KœtÏ6Œ¦ó•—N:xÝç+ÈàRS®‹Ë¼P§ßù•'½ÏûíLÙ£$H¶I¶*¨Ö„ò”â$8Žâ“+K“÷O=ÕºDǹQhËe€1Ó¼Ùùm¡Ò°7÷Œ½dò­-²ôÞ7”Tý˨ÇWÄêç¶Ç6¥jO·ïôX¬T¦\•®}ïþíÍÍOXb6¹ó4 (ž©àèÉÊgyû¹\«7žT,õÝ$±ŸG_!@@@@@@@@@@=­Æê–ê^%«²&9z+—¼ƒ7ÉK­/Q!Iìñþ§Lþy[ß–-÷S¿~€drõnêÒ¶5+ŠºjW‰Qæª ­”›þƒ%v ‚xΖ—sðq© çíPjœ)²½†ò?-å 76Iœêؾuó•oìÈðÁ¶äî릆ʼʊi‹UõëRëR™õ§e Ÿ.•åWU×I•Cý¸Ìc«¬qºœéY’r@¿c,O䨕ë^¬çpéË7%¤%¡6ƒGoèÖ5+Z«jê¤ïzËTö7KR›<‰½~J¤ì‘׉¨RQ¯?éºÿq€GÝI¥«Ú[VÞ – ¬_¿¾káÂ…‘²)ë%i÷½9î>Mwfõ¶-›/™6c¶EPC!«€}Õ lãy‘ TTTx%Šœc=EÞˆ§ë¿×3ú}…]m¦ø¤i4®º1MÓ»G-ãIÀ3œñ‹ä—ÜDßpmnòmOÑ8VÏS ŠßåUpôßäCk ?ü{ Þ*¿d­ãIuÐÝÁ{$@@@@@@@@@@Š] ols“æìÞd²XŒ$¿}¼mM£$™ÿ& s’¤™|”ß{v$W#5ÿ7‘ìŒnܸ±'ÛylŒÇw*“\(}½šU’ð(‰™'Ê}ìgy?3Nn4æ×{’êm‰øŠ[Äô/¶9H½sl1©Ú_Ú²ù2Ycsª¶áÚçåø&]UßÞÒ°a¸ÆdœÁ^µžÎ—/”÷Â_ nÍí¹æ~ÖÚܸüÕW_µVgÔŠBVõ$¿ <„çÅ"Œ˜U‚`¡¬/”TYÍ_Ê-ß-_ùðˆß:$™³FÅb{¿RÂ/r›|ȪÝÿ,Õ#³£ûµ—[Sµøí«çÉϤÛä©5ÁQ>@Ys¹6IæµgÔK¹“¾@@@@@@@@@@@ ð¼ûê{’îéroý£Ã6[£^q»¤µ¥áÃ6æ0 ”hYqÿž¤y—$0Ý? Ãõ¢·¦k¾’hi¸LÅãÖÄ©~§xâ­E•¦îÍùÚåºF}9ávž}s¼ñoÛä‡ë? É*ÁÑKM¸»–‹]“ÿyj5f§\+—µ¯iX§è6ŽŽŽWÍgÉõð%ïQ0Ô¼f\÷ßå5z‰à|ðÁö\=­ºv>fÂìhc†bô-4¤œ©E½ªÎzþ®Vþ•¥„p…Sø—]yù)Ÿ¬Îóó”_@k¼ìn¿˜Tmcù<¥ò(Ô}޲Wp”ÿ¥Ã:DZŽg”Þ:¬sb0@@@@@@@@@@(ŽøŠGÝ=ê4¹×ýÇ’Ü“uå¿ ‹‘1~—Ô]okkiÑÊyAæšMŒ—˜×½k›Trt¯‘ûÆ­Ås²cð1æ¯Rç݉5_Ü–ýžÖ5wH…Ï÷É9{"û^‚)…†n“Kï´¶–†/öMÐܰ¡é5I6û_/R)ò¨E5ËÏð‹IÛ&É ‰–Æzɬ¼TÆÙ–6.Ç bÚÜݹç8©`zcŽ»¦»¡ ¹~Ôéî:^)³fh]í?Z*CvHUÒkš¾){år—Ú¤É7…÷G¤~dŒ&Áqm@¯Êô8uhhš:>2K>îDõŽ’cTHóGC‚å`@@@@@@@@ ÚÚ_’i¸<¶ü[áóU¹·¤2§Ë0f³kÌ7ÛÌîï÷M`ËéÒÙÞ‚HWÍÅnï”~ÚÑúc2µñ9ŸžQ/Hѧë·oÙüß^5œ÷/z>çÏ_öÖ²I¡ïKRÞ2É—Èmò‰1ŒJ~1Ѳòžtó—ò7ÉMïI×îí™ÐÈl¯Yã%.\¸¤#RVòuÉ?[nË ñ›‹_›$?$·&\’QéGÛÈ Üo‘T—WÕ]©‹äV»‹äš8!£Yy:•êJ¨’Øø‹ÇŽ×#…¬Jîø¼sàŽ±þÜ7ak¬ãûúå—̹ý 3Ì"Žc}E§›Q"±â¹hMým²þéb¤­ráÂ…eÁª.:µéúéÝoÌ#k[VüÞ7&MãX>OiH rwÒèçB¶”qf çäe:Öñ´qIpΓÂX% (¨_ì¾O­Øy§Úên/¨¹1@ò)à%ñ¿fv«ô<Óûo]çÝjº3E-?àuþøSä{`lló9;úF@@@@@@@ ©æøˆŒV/;MÒÆ.•;HæË$‡e;¹Oó/J›onÛúÔê|%áe;·|·>AÆø· cuߊ8úCŽ6çË-;ïô+Ð`N’'jnW®úé“ÿ|pí¦M›º3¤õëWîj%ùõš°ã|B®‰‹e dÝ©$»JâׯxTüŠíî®ø´Péw¥ÄÕ„tcÊ­NåUUKim]õt1¶ýëÖ­~Qb>PYY{¥§—9FÕIbÛÛlÇÙÚå>­'%ax­›t×ná©_µ×ͧÛ;Zÿ*óóþ}¡²ºöGéErM)¯ãCäçŒÞŸJd´zY^Ï¥Ÿ•Ê­OË9ß°Åݽþîxßï”Æì*Óöœºa¯ï·¥õÛ’Ôy´T4+çâm²Ž·Ë¿“dÞµ?¸"§1ríoÕÚl•µJ@s¿«ÌÚ¶–I=ÒÐ÷Jß+¥ï_N7Þ s¶µ4=${½9Ù9@^þ›QÏùŒ½VGñ97ÚݦUÈw…òËä*ùÿÙ»8»ËúNüÏsÎL’ äbP¤ÕÖ¨…jݨTE ‰C"æbÕµ¶Z[w»µÛ¿Ú‹Õ]Ýí_­[·ÛŠ„p ÇLBFñkÔÆk«¢€ÈU—@H2ç÷üŸN&™ó;3sæþ>û:Î9¿çûÜÞ¿™9³»ùð¤w4-§ÆJQÑ/û¤]›Ÿº~‘ñOt ù(åÜÖ4àXt¤9=è9…]ßšÁ š·L÷ûÔ\gâ´ÞrË¿ÿðÄ“Oïkv$uþãrLŽùç÷„&ß™{ñb½~ËÄQ´c#ð³¾{Ÿ?ðñpWýþ±™Ð, @` 4Âÿo¹ÿ#á}ó^Žï8j­ÜR  @€ÀÄøâ ?ñ‚^rÆI#à @€ @€ @€F"Ðï³ÿÙoœJ>éî)aFœWß¹çÁ‡ºó>'ÓõÓäåµ×^¾=7}rßóɪ… vžxâiO髆9õðÐÕ]4;ýíÉŽãðbÓ¦MC¶¶ì{>¹‚|<,Ξ=¯’ft=ZìüÅgjµÆ^ó?gŸ¼Æüq^}ã¹®ÿ.÷ë„NŸÓבæÄª{v¤{ö…“û—yM`P"¦§ìŸ?HiŠŽX€L¥·{Џ}Fó|cã#eVom͆©´ï'örc­¶ûÔe«.Ë'$þÑ×òõåù¯£šü—*9 ÙAúH1~öšÚ%ÿò°Óý> m:5ŽøÎÇÆ±Ö§ :}ÊÿÕ’žžj¨ÕêƒÖ´©áôžžùçwA³ìmn°·÷²ÛÛ4¥aL ÆÉ“âVY$Œ£@ã?Ðø¼üèáè$Çq¼¦&@€É-ð¥¯ß2â 8Ž˜Ð @€ @€ @€£#Pìû÷õãršàèliüFmü;ôül„™&m i_ð±~œò}÷ë¾¼ÑÆÓƒÀ:ŠpÔ€3p£RLÚßn¦=WJC¡í™Æ(ã!°3ã˜f5íj{fÑy\ž¯éŽùDÉï·k>㘠ù¸ûðžÖ:¹q2Ü,k$@€qh„Ÿ›ÏO @€ @€ @€ @€ @`ä)TO-%ñÖ²šéÖ.à8…ïøçjµ‡ò¿Vm~’\ŒÕy 7U6¬[ûüïu¿Ùt1¼f°öX©¬¬mïõîßõð=W7­)itŸJ€&PsJ±<0ØQ=eL–\-Ÿ'§ÿ}LÖbDà3}#|{ÏÍd5–A€&¾@ãs³ñùéA€ @€ @€ @€ @€&³@÷²Uï^¾êÏ»{V½zñÒUÏ>³§§k\öSIÏ/›wGÚÙ<çT6ÀlŸÂ'÷MÁ»5ô-åU# @€ @€ @€ @€ @€ÀØ Ä%9c1/TCèÌÏcRWê^¶úö|í‡)¤!nܸî’ëFsMçö¬zj ñÜfsä\Ê-Ÿ©ÕîoV3Ûü Æ)×Ó×ʶ+Õ³Êj&sûŽbçåùÀ !ÄØxtV/¸Ç#Ž:îœnœ3ðzÿ÷õzÈáÉv<ܧv(Žö1¥'8æï¦E£½ŽÇÇ/Ÿ§^Ä›µ˜…Àø |s÷ÂÝÅöñ_ˆ @€I&Ðøül|Žz @€ @€ @€ @€ @€É*GÛï“ïÍ Åp|>äêeùõïçÝÿí½Íˆáò%ól+iŸ–ÍŽSü¶§¿\¶ÅœF~yYÍdno$›óIŒW7ÛCŠaÉí±rÑ×ú_Há{k—´å‹ûÔvâ¾ÞP’X?‹¡rK(Š[÷¤tËC÷Þ~ëÖ­[5õââJµzá`cç_‹Î9ç¢ù×^{ùÞÄöÙg¿vNˆñUƒÕ?~=}¢y{ë­îSëVãZY«ÕÃòÕëóÞÜd³æ-8îŒÜ¾µIÍˆšŽˆ³^˜¿?›ÿþN{?ôŠM¤3I$pÓžÛ&Ñj-•L,Ÿ£ë~X  @€ @€ @€ @€ U ÝB\8X¯Æ‰ŽÕYçäöÿ;XÍH®Síú“â%cì)v§«Jj¦esó€Ì´$™Z›~辟}ûˆ£OØ‘ÃP‡6ÛYÇìoÊíÙ¬¦]m•sþ(ÿÒX‘p²RÙ{þêáGŸº—¯¾' {KJá–ÛÓ®»îÒGÚ1ï†ÚÚ뺗¯º=ÏyÜ ãuÎ<¬znn»´ÑÞ5·Ò¿4;…¯ñKem£¶÷©Šc3F*ÒU±›Cµ¨¼2¯fëh­¨ˆñì’ØrþKŸ­ùK`" ÜY¿o".Ëš @€À¤ð9:)n“E @€ @€ @€ @€ @€À E>•ãIƒŒÖèV‰ñ‹¿fÓ¦MWÜ=È0úܽlÕïå¼Ò_—vNéS6¬ñßUš‘9H—&‘@>±//÷«eKÎQ÷-^¼ø°²º‘¶7NFŒ•Ž>i輎£s2ë7ó«“ÛnÜ7SаæÀYû_‰Kž|—bÏ“¯öbï/•Kï9XÓp®¹OÃQŸ>½i翤š~˜¥ﬞž¹£±Â%KV‘q¿¡éØ)íÞóHßÕMk4˜b¦]SlG¶C€ÆNÀçèØY›‰ @€ @€ @€ @€Ú/ðXÚù©6»:û¶n|g«c¿‚ƒ¼É9¤ë×½÷ M.í(E$5%G¬~0??Ú|7ñj¥úÉ|‚á ¶Öj;š×¹5vxÌóǧ”÷Œ×ôö^v{yÝÐ+'/æTïoÚ³–ä_0Mƒ¿9~¹vÛ¶m{cø îÓðíÆ´ç†Ú¥_î^¶úg9{üàÇù³çýmnÓà5Ck™yhçGòœ‡5ë•?Œ˜×÷Åf5Ú @€ @€ @€ @€ @€ @€SI wÝe7äðá?ç=½±•}僭~'Tâïä>?̇Qý(¤tkã™b¸-áž+GWBq|n;>׿¯á„œ‹zN~Ý4w´ßÜ)í(B\jµú~×½ÙO uÐýºy3Ùî,v^œ¿¿([wNŸvD¥kí¢E‹f•Õ¥}éò•9èW·Ð§¨}ÿO uÃ*Ù^ñ~ô¿¦Oäu¾w(óĘ®¾öÚË·¥ÏpkݧáÊ}¿¾Ûÿªcö¼e{ÃÏ%ÓçšÅjøj ''ò?žßoOJ·ü"Xûh^Ÿî÷i4mÛ9öյ˾¿téê—¦Îô…|ÚéQíû c¥t[âK7®¿â惶»H€ @€ @€ @€ @€ @€ p€ÀæÍWü"_l„zxÚé==3n¬ÕvÐÑ…Qp‚㨰NžAǬîyôþ|’cñ79E¼clVž¾›Î/ÏpccŸ»¾çêF²º•=§?ÑJÝhÕLçû4Z¦£1îúõ—ÜX»Ÿ—Ó»Fcü'ÆÌñàõ;Ÿ—“þÂO øJ€ @€ @€ @€ @€ @€hƒ€pc‡0„€ã°¦jéæÍ›í½òÒwåÀÔIEJÌû|lTöšÂ½ùÌÕÿvß]·-ÜP[sý¨Ì1„A·lÙ²+Ñ./í’ÒW7¬[óÃÒºQ.˜®÷i”YÛ>üÕW^ù³Þ+×t§zýܽÇ·s†ÆÏP*^“1~õ–ZíÞvm, @€ @€ @€ @€ @€ @€c-Ð1Öšoâ ì Lý×s{V}°³ßT‰éù„Ãç…«#XuÎL¦Ï†"üÓ­?¹ñêmÛ¶íÁX£Ðµ¸8„ê[› œBÈ5ç1=ïÓÄñou%½µµ×žÙÓó…c+]oJ!öÄÎÌ}‡*OáÛùé#Ûï¹íò­[·ŽN¹Õ©#@€ @€ @€ @€ @€ @€ Ð&Ç~SÑú½¶/¯©­ùyÞü_4žçœsÑü™³;^–#Ž/ÏA¿_)B<2Ä0w PJaWˆéþ\ó½Ó×c¾^ì _Û°áÒ{ÖN”÷½ëÖ~3¯%N”õ e“ñ>­¿rÍ›òÏiñ¸¾VÛ™7ú¡ÆóU== fÆYÄÏË¡á“rxxA=ΑÁ)CÞ‘¿-¿BúÊž¾ð/›Ö¯ùÖÀ:ï  @€ @€ @€ @€ @€ @€LvÇÉ~Gyý×^{ùö<Å'÷=ŸœmáÂ…'žxÚSúªaNG=G§×ý¶[ @€ @€ @€ @€ 0‘'Òݰ @`XÇTV? @€Žõ9êÛ€ @€ @€ @€ @€'Çq‚7- Ð>Ó:Ohß`F"@€ÓLà™>G§Ù·] @€ @€ @€ @€ 0q'ν° @`˜gÎ|Ö0{êF€øõ=@€ @€ @€ @€ @€Œ—€€ãxÉ›—h›ÀsgœŽ®ÌoÛx"@€ÓE ñùÙøõ @€ @€ @€ @€ @€Œ‡€€ãx¨›“h«@5VŠC_ÖÖ1 F€¦ƒ@ãó³ñ9êA€ @€ @€ @€ @€ÿŠq<ÔÍI€´]ೞ~½óém×€ @`ª 4>7ŸŸ @€ @€ @€ @€ @€ñ诉ÍK€h§@Œ1¼kÞŠð–û?îªßßΡE€¦œÀ‚êá{?7ŸŸ @€ÀÐþÓ ž6ôNz @€ @€ @€ @€ àx‰  @€Àd˜S9$¼oÞëß?ðq!ÇÉz­›F] nl|^6>7= @€á ¼äŒ“†×Q/ @€ @€ @€ @€ý*û½ó† 0ÉŽï8*|ôð? ¿ÞùôI¾Ë'@€íh|>6>'Ÿ— @€ @€ @€ @€ @€ñp‚ãxßó @€mhœHõùo ŸyìaíŽÏ‡»‹ímŸÃ€ @`2 ]™Vú²ðŠYÏ1ÆÉ´tk%@€ @€ @€ @€ @€¦°€€ã¾¹¶F€¦³@#¼ñÊ®„ß™õ¼ðÍÝ? ×ïú~¸iÏmáÎú}áÑ´+¤ü< @€ÀTˆ!†Ùqf8¦zD8­ó„pæÌg…çÎ8%Tce*nמ @€ @€ @€ @€ @€I, à8‰ož¥ @€å0Çóg>cï³¼Z @€ @€ @€ @€ @€ 0VŽo+ió @€ @€ @€ @€ @€ @€ @€O 8>Iá @€ @€ @€ @€ @€ @€ 0VŽc%m @€ @€ @€ @€ @€ @€ @àIÇ')¼ @€ @€ @€ @€ @€ @€ °W`*fn¦âžöívÑ5&¦@ÇÄ\–U @€ @€ @€ @€ @€ @€¥K/:)uV_R|nŒá)ùë!¦ÃC Gä1RÚ™bx ¦¸=…°=ä×ùÚ=1ÆöÔë_ÚT[û£³ºZé^öÚ¤TyA¥ŸŸRøµ¼î#b óßSx0þ󼯟ç½þ<„ôÅÎÚ–ZíÞÍ:öãâ¥+vv„W¥P95ïñ¨ÒÑù>•÷td ±//)ß«tw~}wÞë]!ׇÝéS½½kïûåš‘Á“q @€ @€ @€ @€ @€ @`J¼ª§gÁÌØõªf\B\”qÇï=æï‰³þžxóÄûÆ®cìÊo»rí1O^ÎäÇïvV«¡{ùê»s¨îKE*6m¿ûöu[·nm„êÆå±¸gÅ3:ªqu^ôÊâq/³±…}Ëùå×¹ùÊÜüöYy_ù_×UéúPÞËçB(.¿ÿ®ŸÕò>Û×ëÉ/ñ;+•kž¼0àEnï]wÉ \nëÛE‹uÌ?úøÅyçæ½*/ÿèÆOlmï«Ç÷Ô¸VÍMÇçkýîså ©3¥¥ËW3…tMJi݆u—ÞÔÖEŒ! 8™L @€ @€ @€ @€ @€ @€É ÐÝýÚãBgõ9èöŸsØof;×üxÀ.^X‰Õ _pâ_w/_ù¾[ôý5Û¶mÛÓÎyšµxñòc;ºf¾7ïmU®«4«¬-ŸHÙȽ2weÞÇ»º/\ýÆÞ«.ù—þõ9È9#‡ Oîmÿ×é¾ýß·÷Ý’e+»s°ñ䵞:’‘sÿFraka~ù®îe«/îÛ¹ëÝ›6]yÇHÆÕ—á ë×ð§Ó“ @€ @€ @€ @€ @€ @€Àè œ¿|ùñÝËV}4tvü8ÙÞÚîpãÀÕçÔÜI1TþéÄSNÿÑ’žUgl÷K—­þýnüQÞÛëòømÉå}œbÚšO9ü‡³Ï~íœÑX÷PÆ<ùŠßZºlÕW*±²>ßÇ…2o%Û½¡aØ}ᪿ^ÔÓsèAj\"@`”ÚòËk”×hx @€ @€ @€ @€ @€ @€- ,½p嫪iær îF;Ø8pAùdÀ+Õø…îžU¯ØÖÎ÷@^ˆáçýÍn縱²[ã”Ã7wÍíøV#(Úîñ[¯±ÇŽPýJ^ÐoµÚg8u ÃX‰ï:¼Òõõîî‹NÎú 0|ÇáÛéI€ @€ @€ @€ @€ @€ 0º—¯\™b¼z4‚CØæ¬|žâUK–­þÝ!ôiµ´Ò8]±ÈkµÃpë§Rv¤_8·gÕS‡;Æpú-\¸°³{ÙÊKÆbý×^Âõþ@IDAT—s§ÅÎκ—­xnÿë^ 0ºŽ£ëkt @€ @€ @€ @€ @€ @€1X²|õÛCŠ—ä ZÇL×tм†Æ1ˆZ¼xù±M ‡ÐxzOÏŒ¥ËV_™»¼yÝFVãÉ3*á ±ÒqÌÈj­÷Y==sO<åôOÇXYÕZ6WŰ †Ê—§€¶ydà 0ˆÀ¸ÿÂd]. @€ @€ @€ @€ @€ @€hI »gÅ99PøÁЈ¶øH!Ý™‘_ )}?…psLᡊ•Xé+bšBen%¤£S óÏ!žØâÐ{ËòRë˜=ãCùMÏPú RŸQ}un{å í½œRøY)÷K7ç=þ¼’÷X„¸ VÂSC ÏÎk<7„xÈA;ﻘ£š§f‡Ës]³²·5œs*³?›gyþPËûº#†ôé¼×m¡÷Ô«ÅÝyŒj(ªÇTBql^ö³ó½\œ÷:¯¥qc<4yõ’žU/ÞP[s}K} 0lÇaÓéH€ @€ @€ @€ @€ @€ 0Þ`\¨Vÿ®•u¤üÈa¿KC*þ±÷ªË¾šûä÷­=–.]}zêH¿—ƒŽ«rhnn+½rí«/]õìMë×|·•úÁj–,[ýºÚk=ܘÒÕ)ÕÿWÙÏ;ï¼Ù‡Ì?/‡ÿ2ÿÌÁæÏû8b°¶Æõ˜†,d S+]È£´nÌwrc__ñÞMë/ý·A†|òòÂ… ;?ùô³ª!þi¾w‹žläEuv„jºâœs.úk¯½|û e. ÐÇ6 ‚ @€ @€ @€ @€ @€ @`|N­v½=‡óN)=¥Ÿ†P¼ºwÝÚo–Ö¤`ýúKnÌ—ßöªžž÷ͪÎþ§<ç9);àRG5¼:_vÀñ¬žž¹1¦ÿÙÊ ŠùTÊÛsðï­Ö­iœöXúؼyó£¹h]‰nxfeÖ ±òÎü~ViÇ6t÷¬nœ°øG- ›Â÷Rè{CïºË¾ÑR}.Ú¶mÛžüÜ’_nÉaÑU•þßt<²YÿƉ3ëøç\ÓݬN#È? @€ @€ @€ @€ @€ @€&ŸÀâÅË­¤Ðå5}䃸gçî 7ÜØðOÕjwõ^yɹùÚÇú_ôuÜp´¹¬á°J×_å°ÝQeuù0Ê«ï|~«áÆþãÝX«í^¿îÒ÷õôÒ| Ûh¿>ùòãc%\ÜÒ<)õÞW<ú[C 7wúKÖ¤=õçä¶ |ŸÝ—,¹på[^÷žö 8¶ÏÒH @€ @€ @€ @€ @€ @€c(ÐÙÕùÆã¡M§LiwQô-Û´éÊ;šÖ ±ñÑûÞ‘B(3ÆxÚÒ¥4Äá÷–/Y¶òWóI‘-ìÒ·v?²gQ#|9œyžè³¡¶æú¾¾r éO\í¯Õ4óÃù4ÅÃËæÉÁË\¿nÍ«·Öj;ÊjËÚ{{×ÞÙ·sÏK¡Ð²ÚJ%¾ïì³_;§¬N;Ãè^7½ @€ @€ @€ @€ @€ @€Àþoýûýß|wïöðº.ÿØ¢×û¿ÿÊ•oδ&BåülúÈÁ¸÷o¬]þí¦EÃhܲ岇–^¸âm¡RýdY÷T©œ–k~RV7°=Ÿ øálžÿIáþ´§8góæ+ÚJÜ´~Í·Î_¾âüŽPýr^Ϩ®–ƒŸ§äpãâûø>.¿pÿÝ·5‚ž9SڞǦMWܽdÉÊ—Æ™ñ»ù{èèÁGsfÏ©¾1·pð- W ù/¸áŽª @€Jv§=á†]7…¯åç÷Üî.;Ó®’^š  @€Œ½@WœŽ®Ì §v~sæiáŒ™Ï 3JþÿÇ~•f$@€ @€ @€CàÛ7—ÅŒÂÓråicµVA€Àd8¯gÕ 1†ç6ÝGJ;v=Ò÷wMkFиþªµë»—­úa!žÚt˜j\дý ÝË^{F÷%iÚïRŠé'îwq„o®¾ríW—^¸ê…J|LJjÚ=utüqþäh¢lœ’¹ëá¾Woݺµ¯é`ÃhܰáÒ{rHõÍ9¤º¡Y÷ÃÛBOχB­VoV§¡ 8ÝL @€F Зêaãÿ®zdkx =2‚‘t%@€ 06ÿÇ-õ»÷>¯{l[˜ ²(,™ý¢Ð«c³³ @€ @€ @€ p€ÀŒJùÉ!Æ_{íåÛèÜÆ 9 ÷Ã<\ó€cŽò”±R~²a ›{×­¹|Èc·Ðáчïý‹Ùs<7„xz åC.yEOÏáyìוuLEñ—£ysHuãÒe«?Ÿø/l-ù$Í/ˆ³—n áªÁj\'@`xMÎÃR/ @€\àžúöðÖû?þqǵÂ'r• @`4þC¿iÛ6þÆõ @€ @€ @€ @`|òÉ~9|×üÑêëšWŒ¼µáæ²QR¬RV3°=¦Êù¯ |ŸŠô×Úõ~Ë–-»ŠGmüCª]oÎ'pÎn¶Þ|°!=öñf5íhKEøpÙ8•Júòí ]@Àqèfz @€ 0 ÿØó³ð–û>nî»c½u!@€ 0ñÛ6þÆmü­ëA€ @€ @€ @€ÀØ ô;Ï©‡]¿R}gåÙ/¥ðÒ'CHßÊχB w]}åÚF}e)ý¤lŽJJMƒ|û/îYqr>QðY¯÷¿7üW[ó¯ý¯µûõ]Å£µìú@»ÇmŒ—O¾\R6n>½ñB­V/«i{oí’kò÷ÎÍÆ‰)œ¹¨§çÐf5Úº@ÇлèA€ @€¡ 4Nµyçö;µqhlª  @€&@ã4ÇÆßº=âmá¨êüI°bK$@€ @€ @€ 0…rðmc·ä5žŸÏÏý/ùÊÆ©‰ù€ÅQ~ÄXp,bèÊ*:+•Åeõ1¥ÿ[V3ÒöëkµK—¯^“ÇyÛHÇêß¿qorpò×ó ŽM})æ[<&|‹ö~­t¶«óª3~3·ð½6h ” 8Á±”H @€#èKõðî.n ¢¾ @€Z rlüÍÛøÛ׃ @€ @€ @€‰#pÝu—>2«I±¾³lžžR†'ŸÎx~Ù˜}EýÓe5íh/êEo;Æé?Æ!sÃoÆËnÛ¶¹¶æ¶þýFóu>Á±4¸XM•ŽæŒM`: é—ãt²g @€F&°áÑ 7÷Ý1²Aô&@€ 0Áó6þöõ @€ @€ @€ @` ¤êŒv]m¡foÉY==ss ²$H—¹ºvÙZs$uõ]~3åÇHÆØ7VcÉþBÈSþÛÀ~£ù>ï°…ùÊ×=šk46©( à8ïª= @€˜ »ÓžpÕ#['Èj,ƒ @€Àè 4þöÝúFw£ @€ @€ @€ 0!.\ع¤gåK—._ùÁJŒßÎEV™ù´cÓ@dN~+ÏY´sÞÁÆÚ´iÓÃy=ÿ1Xûp®Ç^TÖ/Åøã²šv¶ßUíyU‰á˜Ò‘Šô`iM[ óµÇá‚ .˜›—ÖB\2ÎoÏŒC€( 76F*bš?„• P" àX¤™ @€á Ü]<0üÎz @€˜„þž„7Í’  @€ @€ @€¦´ÀÂ… ;O<éWÏ•tAή½ ‡ôNËÞ®Ëñ¹½ º|žà˜'ég/B(=Á1VâCƒ0 -1¶-PYïS¥¨§õj•” 8– i'@€ @€ @€ @€ @€ @€˜K–­ìÉ ËáÆ¡?rqWŒéû¹çwC ß-Šâ{{Båû×ÔÖü|h£¥C«o^BÌÀæÛ8l>Óã­ùÊÖÔÊ8š=EÜ>£y¾1äû1«·¶fC«cª#@`ò 8NÞ{gå @€ @€ @€ @€ @€ @€i+pnϪ§Vbø?CÈ¡ÆoÆP\Bº¦·ØõP«Õ‡Òÿ`µ)Væ·+ü·wüî;Ø<û]Kñ¤ýÞò›¼¿ãÛ5ÅÎðØý3Âì¦ÃÅŽ]´hQÇÖ­[ûšj$@`Ò 8Nú[h @€ @€ @€ @€ @€ @€é'OüHq~K;OáË}Eß[®®]ö½–ê‡R”Òá!68Æ+­Y)Šÿ•æåùäÉç´:^›ê~£Mã„ÏÕj-]¶ªžÍ?Ç1·Í[°à¸<ç-íš×8LLæ¿í&æš­Š @€ @€ @€ @€ @€ @` 4NoŒ),n… á]ë×]òâQ 7æÄ/[G>9²å ÏŽôØ )?šŽ™Âé .ìlZÓ¦ÆÓ{zf„<_›†k “RŒw—ëÏ,«ÑN€Àähù—ãäߪ @€ @€ @€ @€ @€ @€L|zã뛞¸o“9'ø÷½W]ò7£¹çÖ+=E2Ÿ¸ØüˆÇ~ üL­v~ûƒ~—|㌧>íôÓlhÿ•§]§gëí9}­l¼X©žUV£É/ à8ùï¡ @€ @€ @€ @€ @€ @€¦•@>½ñwK7œÂ];º÷OJëFXCzZÙùÈÂ!fxâWËÆ¬v†³ËjÚÑžçyy;Æé?F ñËýßìuN„¶}ÞƒÍãã+Ð1¾Ó› @€ @€ @€ @€ @€ @€@ë‹/>,Ÿ(ø+e=R Þ²eË®²º‘´ŸÙÓÓ•×òâ²1†r‚ãÞ±bñ¯!TÞÐlÜâërûû›Õ´£-‡Iói™íé—cÔ÷_®t–d>cøµÅ‹—»iÓ•wü²çè½ºà‚ æUgÍùH ég1Tn Eqëž”nyèÞÛoݺuëc£7³‘ LoÇé}ÿíž @€ @€ @€ @€ @€ 0©:;çŸÐÒ‚cßçZªAѱqÖKr÷YeCä°egYMÿöⱇ7VfÎýhŒavÿëý_çÌá3Ï_ºò7¯^é×ú_oçë%=«^c<µc6Æz辟}ûˆ£Oؑá‡6»cöŒ7åö¿lVÓ®¶ÊŒ9B\‘ƒ£Y©ì½i‡}Bê^¾úž|Rç-ù>Þ²c{zÃu×]úH»æ5é.Pužî<öO€ @€ @€ @€ @€ @€ 0‘ê©¥€ã®éÇ£¾îJåU-Î1£Åº½e7n| ¿¸¢¬Oµ£òÇe5#i•8*ãçûòº¾Z¶¶5|ÛÞ;Ë GØ~öÙ¯+!|ä€gΙ†£søñ7ó«“…4r…ÀHG¢§/ @€ @€ @€ @€ @€ @€À˜ ÄPÞÊ„3ftŒjnæ‚åËŸ–Oô[ÕÊZò¡€C 8>>fý—“wË»{VœSV7œö¥®XšÇ_:œ¾­ôIEø¿åuq~Ǭ¹¿_^7²Š®¹Õ·æãü²QRQ”†NËÆÐN€Àþ£ú‹zÿ©¼#@€ @€ @€ @€ @€ @€ 02JŠ¿he„Jgz^+uì©TÒŒKrð°ÖúÇ!{×­ýf éëeãÇJåÿœÕÓ3·¬n(íg÷ôbõ†Òg¨µ½µ5½)¥›ËúåS$ß}~ÏkŸUV7Üö—¿|å!¹ïÛËú給ÐW¬+«ÓN€ÀÐ:†V®š @€ @€Loãÿýä¦_üéOš¶7_rÆI¥5  @€ @€ @€8¸@Ó]­œø•Båùy„Ï|”‘]]²|õ‰!¼¸ÕQb ³Z­Ý¯®ßªáêý® |ãSçTº6,êéY¼µVÛ1°y¨ïÏ9ç¢ù3+›ò©“Gµïë‹\ÿÁüühó~ñj¥úɼ¿´c抇óüñ)®äm¼¦·÷²ÛÒà#pž® @€ @€ 0ýN¸±yÀñKá–RÇR" @€ @€ @€ؽ£¸mÖaÕF8®yÎ1†% .|ÿ¶mÛö :Ø0–ô¬øXü›lù‘B˜Órq¿ÂÞÚ%›º—¯ÚC\Òïò/cŒ/9¼2û³\pÁÙ7n|à€‚/,^üš£;gw\—ËŸÝb—•ÝYì¼ø˜j×_åý5 æývD¥kí¢E‹–oݺõ±MÚ¯óÒå+ÿ6‡W÷»4ØË¢^ôý?ƒ5ºN€Àðšÿ"þ¸z @€ @€ @€ @€ @€ @€h»Àµ×^¾=¤tCÙÀ1†ç>í”g½¿¬ní1‡ ßU©V?Çž9„~9CR}¿â¾Gw¿5„ôP¿K}™×tFeæœËÌW´ äâÒ W,í˜Ýùoy±cnl,çúZmg*Ò{J–öxsŒçqô Ÿ?ï¼×4 C¶4V.ê¾põ;sFöí­Ô§”.»ºvÙ÷Z©UC€ÀЇ楚 @€ @€ @€ @€ @€ @`œŠ6µ¶„ÊÛ»{V5=ý°•q–,YuD÷òÕŸÊ' þu®F'ÍmežƒÕlÚtå)…ÿ~°¶×òI‡OÏÌ-K—­Zþò¿Û›®5ŸˆØ±ôÂUgåúÏ„Jõ“yÇ s´ßo¸êÒçýmliž«svç S4[ª?HQ>år^÷²Õï•ÐR°2¯íá°§ï]Ê%Ú Ðц1 A€ @€ @€ @€ @€ @€ @`ÌúêáŠÕð—yÂY¥“VÂU9Ðöñ¾»þª,­ïW°dÙÊÓò‰†o̧#®Ž!ѯiH/c G…žžj¨ÕêC긯¸wÝšȧG¾"·Ô?ÆîŽPí^ºlõ])¤kòÚo.R¸3Ÿ$y_ÞÏ‘1¦cBЧ„˜ÎË¯'Ö’bŽŽàQìzðw«3ç<'Äø+eÃ4Bœ±Z½®;‡2ûúÒ;7­¿4Ÿ:Yþ8«§gîœØõǹÿÛ³CëÓ"½¾·÷ò[ËgPA€Àp‡£¦ @€ @€ @€ @€ @€ @€À¸ l®­¹­{ùÊ¿‹¡Rz²a´5ò3oêìš±rɲU EØ´+ìüé§jµ»nàœs.š_Õq|µ#.Ì¡À7ä`ã Ö |Ÿò#‡oϵÇl{ò}ŒÕóB×S7‡pۓ׆ö"í|ðÞ »æµ9¯«õÓ cXC‘olLUÉùÈoö¾ßïâ/›û½ÊÉ yŒ²S0‡Ü|bš7>°xéÊ ;;ÂWrÈqÆ×›}Í÷õñ9øyk^ß5©ž¶Ö«Å1U‹‡ãŒâÄ"uœ\©¤“C §ä Ÿ—ïѼfcl+Rúà†ÚšO¼î=íplŸ¥‘ @€ @€ @€ @€ @€ @€ÆHàþúcï;¼:»q²â±-McW%„? Õðg]avXºlÕÎzûiðÝrG‡NÈáºC[k_Qîû‹P¯ •Êqy¬5ë[ñŒÜ>Ü€cزeË®óÎ;ï‚ÎÙó·äpß‹›ÍÕ¾¶tÅýwÝöú#œ˜­šµ²q!„™ùN‡J¨>â,ÏqæúýùÞnÚ~÷mïØÿªw´[ ñÓêA€ @€ @€ @€ @€ @€ @`R l­Õv„T?/ŸŸøð°ž9 ÷¬|IãëÃ)ýËîzøõÞÚÚkC¨§l ùÅ—•Õ”µoÞ¼ùѾœ›Ãw_*«i{výÄú+׬xøá‡KOgŒ!8àØXïú«ÖnÌ–¿Ä¼c¤ëIÿ¼÷î­ïìÞºukßHÆÑ—rÇr# @€ @€ @€ @€ @€ @€P wÝÚoÆ”ºóÒöŒáòŠ"…¿î-v¾ôšÚšŸ7æÝ±½ò½ü¥h¾†4â€ccüM›6=Ü{åš—¤”ÞRx°ùœÃiM¤¢ø“Þu—¼>÷.Ž:ê¨òüQ m 86VÛ¸§»ëé9døÍá¬~$}²iJEzOÞûC­VìÉ\ú ð¸@ù/R @€ @€ @€ @€ @€ @€&¨Àú«Ö|®¨§ÿ”q·ŒöóÉ‚ŸN©ïŒ ë.ù‹þ¸ë®»ô‘6üJ³ùóI‘O?ùŠßjV3„¶Ô»nÍÿÙY<úÌÒUCè×´4Ÿ ¹)ŸJù¬Þ«.ýÛ\˜·›ÏS¬?¥£i§FaŠm 86æjG÷ɱø›<øŽ²ùÛÓž¾›ŠðÂÞ«Ö¼»=ã…VJÁ´2ˆ @€ @€ @€ @€ Ð_à¦ÿVøÅ½w÷¿tÀëC›~ãygp}*_xô‘á®;oßûÜñðCa×c;ÃcùÙ·gwèê:$rèaûžsö~}êq'†®Ù‡Le’i¿7?+#ûøò?nýé䬳—„ÇwÀu @€©+°¡¶æú³Ï~ísfÏ­~4_›Ã„±­»MéºêÑ»î²7ßxq>‰ì·ko\¯¦êË_–4«JÛ§jµ»rý²ÅKW½·£^bxuÞúiC£ ÌIÆMù$Ì59Øø™}gÎìë aÆÀËßïxa¤ï7oÞühã]g÷ô|xV¥ëÏ*1þA~?k¤ãÐ?…{‹>°ý®Ûþ¿­[·öÐî£* à8ª¼'@€ @€ @€ @€ @€Àô¸íÖŸ„ŸþøM7äQÇL‹€ãc; 7Ýøíðƒ¿|àþAM|`ûm“}ÂÓžN=íÙái¿rJ¨vøg Mò ~V†ï¹ëŽpãw·t€zŸ—~P  @€Àز岇òW.îYñ7•Ê[sÂñu!ÆC‡½í”nËÁ¿ë걸øêuk¿Z6ÎöâÑÚÕ®„ý¯”ä?ñ/]zÑ)ë×_~ॡl‚&í›Ö¯ùnnn<ß½dÙÊÓ*!žŸÃŽ'å3É_ìý‘)†²ËÏSˆwä`ãíùÄÆëî*Ûr}­6h@±rHÊÇ’GL÷”T »yK­voîü_ÏíYõÁÎJ|S%¦Wäý¾þÕ­á‡?ø÷P¯/l•ÿÁm>îÇ{Ÿ3fÎ Ï8í× Î\:g”žž2ز\'0%Š¢ÿºõ€Ã…¦ÄÞl‚¹À¦ÚÚÿÈ£üáâÅ‹ÿ¼Ò5ïE•Î •xFé91…#ãòßÝ}9xoþÝ›·äðÜçúŠâº}ã´¼ ­µÚŽ\<ü@eË35/ܰîÒ›rEãÙ–G%uš}š?R¸³yÁÈ[¯©­ùyå/ÏsιhþÌÙ/ËÇ—ç{ö«ù¾™ƒ¥GæuÎ8Sþ¿Ví 1ÝŸk¾—búz,Â׋=ák6\:j¡ÌkðžÁ·ÑB€ @€ @€ @€ @€†%pón _þâ§CãôÆv=vïz,|ïÛßÈaÇ…—½ò‚pô‚§¶khã˜t߸áKáž»ï˜të¶` @`l6mÚôpžq˾瓓çàãaqöìy•4£ëÑbç/>S«5ŽSÏ99ƒ 1=¥r°†þ×Rõ€cÿ鮽öòÆ=ûä¾ç“M .ì<ñÄÓžÒW s:êá¡;«»hv:哽 @`ÜÇÞÄ @€ @€ @€ @€ 0Õ§Ê}éó׆ÿøþwGmk=ø@ØxÕ%áyg¼8<÷ù/̇ϔ¥2jK10qøÙ­? ßúÆWÇen“ @€SC`_ð±~ôhA £G…²„c¥Ó€ã`ËÞ¶mÛžül¬eB¬g°uºN€À/Ê~½ü²Ò+ @€ @€ @€ @€ @`Pz½>»e膟˜<¥¾qý—§®^¡JÓEàþûîÝûs6]ökŸ @€‰ BõÔ²uÄ"ÞZV£p‚ãÁT\#@€ÿ?{÷$çyß þב3‘‚È$ÀL‘IQ’,J4m™–¶ì]Û{gímÕ…ºÛZßÙë;o•«öŸµ}»wëÕY2eJ"E*1‹I0!ˆ@d‚Hƒf03}ïÛ¸ 9ÓÝ3Ó3èðyT]Óó>Ïû„Ï3ÝhBøÎC€ @€ @€ @€ Ð 4døÄO~ûö¼Õ‹»úßtÿÞ]ñÒóOÆmÿTÿ;Ó2hi9?}ô;ÑÞv¾Ìgjz @€ÒÜ÷ÀW¿™x+:cGGWì8Vwnçê‡>WšÞ{ÑK]vU$ÉWZ²çÖæ«WG€ž{’q @€ @€ @€ @€)ðêKÏõ:Ü8yêŒ3v\ 1*† g[ÎÄ™Ó§âø»Gãä‰ãEޱyãš;nB,Y¾²è{4$Pi­g[â§|'÷:©´¹›/ @ ï™/&±Â1Qј<¦d‡fï{àkï$×vd#»£+2>ú½o>Ù÷þ ßyïý_–‰Ì½ùZ&Ìïyâá‡ßË×FzpìIÆu @€ @€ @€ @€ P„ÀžÝ;bÃÚWŠh1dè°XvÍõqõ‚¥1bä¨ï9tplysmìÚ±%ÒÓ! •ôÇ1cÇÇô™Wjªž@Å ´œ9ÿè¡8uòDÅÍÝ„  @€ýÈD6ù”s±LR’ç3ÒG:¼³.7$Ï4àØ”‰ÿ.£1yä+kòUª#@€@>º|•ê @€ @€ @€ @€ @€žÒSåž}òñž\R3söUñÀƒ×®º%o¸1½eJrºã÷|!¾pÿWcäÈÑ—ôÒýÓlrdÊÓ¿x4.´·wßÀU*žfúèÃßn¬Ðý3m @ Ÿ™LS³×|þó¿3µŸ£ôxûM÷ß?4êâ{lð›Š$uùL¡6ê  Г€€cO2® @€ @€ @€ @€ @€o¼òB´·/Ð*r¡ÆÏ|á:lxÁ¶—6˜4yZ|ù÷þyÌœ=÷ÒËÝ>?®56®­Û: T¢À®[ã‡ßýF¤'8* @€ÚÈîÊ·îôDÇú!ŸÍצ?uSê‡þOÉI‘ã ôq¡«=ûýmT @ GÇiT @€ @€ @€ @€ @€žÞ;~,¶n^ßsƒßÔÌ_´,®¿ùŽ‚ízjÐÜ<$>ùÙ/Ǥ)Ózjòþõ k^‰¶óçÞÿÞ•(ÐÙÙ/=ÿd<õ³9•´7М  @€’ tuÅÏ uV—ÉüÛÏþw'j×Ûúûøê'áÆWð¾lög<ò­ãÛi@€{€q™ @€ @€ @€ @€äxõ¥ç"›Íæk’ %~ìŸÉÛ¦˜Ê†††øÔç~;F“·y{{[¬[³:o›r¯Ç|:ê @€ @€ @€ @€ @€@7§Oˆ½oïì¦æƒ—n¼åΨ¯¯ÿàÅ>~7tè°¸û3÷¼{S k=ÛR°]¹6x{×öxè“;½ïÐ}å:Mó}{vÅÃÿô÷q4 ¹* @€?øác‘— Yd2qã¸I3ÿ­û¿v]¡¶ùê?}ÿýWÜ÷;¿ÿ—™¨û‡¤]ÁÌQ&²ùèÃßz-_Ÿê PH ¡Põ @€ @€ @€ @€ @€À¶lZ÷Á Ý|7mÆì˜2mF75}¿tÅÄ)1{ÎÕ±g÷Ž;éèèˆm[6ĵ«né±M¹W¤'cîÚ¹5÷7ab,Y¾2æÍ_å>uóëƒÀ¹s­ñÆ+/Äækúp·[ @€Õ-Žÿ%²õ/''*fò­4©¾+ùÝ*oÜ÷;_{*º²ÿþGßÿÖsùÚ_Z÷¥/}e^¶¡áLúøZr}Häé×wf#»úG]çÿ¯Kûñœ}p싚{ @€ @€ @€ @€ @ f:;;cû–׿ò†lÓ—×ÝpkÞ€cÚg¬ä€ã¥.ï½{4^xægñÊ‹ÏÆ‚EËbñ²•1zÌØK›x^¡i÷ÍäÄÑu¯¿íímº Ó&@€ ¬À¾÷íWî{à«ÿ5åŸ3R’M¼;ê2w'÷ìˆLfgd³{ÓG6û¢+Žf3u“ê¢kFR7#i›|™™åÉó‚'6¾?~6Ûҙ߇î|ÿš'裀€cáÜF€ @€ @€ @€ @€µ)ðÎÞÝq®õlÞÅOž:½ä§7^0=Åqæì«bßž·.^úÈ×ãÇŽÄ©“'ª*ØÞv>6®{-÷˜9{nîTdzæ&ÿ.»ˆãe>"äÂåHOè|kûæxõåç¢åÌé^Meò”é1z츢BƽêXc @€@ dÛ㦸+ùè;»Øi&Ÿ“¯NÚ^|`Nò‹™_ÊXñîⳌïxÿk6ÛÙ]òè÷Úõþ5O Ð~¼#õcT· @€ @€ @€ @€ @€ Ø¿owÁ™Ïš=¯`›þ4¸êêEoßýÖÖ‚m*µÁ¾=»âg?þ^|ç›ÿ) <¾mIøQ© ƒïì}÷ÿ‹gžøq¯Ãó-Ï}éÁ:txe,Ö,  @€%xä‘ooïÊÞšØV¢.ûÔM2þÁ®®ì|ï¡o÷©7 @ ÇnP\"@€ @€ @€ @€ @€= ìONp,T¦Íœ]¨I¿ê§NŸUðþÝ;/ë¿}.8¿R48}êD¼üÂÓñÿã…g~Çß=ZŠnõ1@g[ÎÄc?|(Ž=Ô«ÒS:oºí®øøÝ÷F}ýûÇõª  @€•.𓇿uà\gëǒð×^޵$§p?‘Œ¿â‘‡ÿñùË1¾1 ¨^†ê]š• @€ @€ @€ @€ @€Ò œ9}*N|/o§MÍC⊉Sò¶éo刑£bÔè1qúÔÉ»JCd§OŸŒQ£ÆôئZ*::.Ä–Mër)ÓfÆ’å+ãʹó£®Î9å´ÇÉ?ŠïõtÆŒw$ÁÆÉS¦÷ú^7 @€ªMàç?|lñý÷ß4?3ä_'¿âÏ"“1àkÌf;³™øßô½oýûd¬Þ ð €Jp¬ô4 @€ @€ @€ @€4û÷kÚŒÙÉ¿3Îl×ßS§ÏNŽëóvsèÀ¾š8^Š®9} 12-½6-¹&†~iÏ+@ §®¸î¦¸îú[£¾Á?w­€-3E @`6?üpû房¾÷þ¯~»©®ë¯"êHþó£¹ÔÃ'IƃIœñ¡ldÿá‘ïþãÖR÷¯?\ð_|%|%@€ @€ @€ @€ @€Ž¿{´@‹ˆéIÀq0ÊÔé³bÛæüÇcGÅü…Ëc:%ãêùKâì™Ó±ë­mÑÕÙÙç~϶œ‰×W?k^ýU̽zQîTÇI“§õ¹?7žÀ„‰“㎻î WL¼AD€¨0Ÿ<ü­É”¿öÙÏ~å_7 ¯OBŽu_K‚Ž7ögÉÛ­ÉýtE|ëÑï}óéäyòT!@€ÀÀ 8¬¯Þ  @€ @€ @€ @€ @ ŠÞ;^8à8nüÄAYñ„ …Ç9š+­ŒKÖuç§~+n>ך87Ä–7×ęӧú¼Œ®®®Ø¹mSîqÅÄ)¹ ãUIàÑ©€}&°‡ +oüX,X´<Ò @€Â?ýé?HZýçôñ¹Ïýî„úaM×Öe㺨Ë^›œ+?79…qLDftrã¨ä¤ù$G”=ÌÁäëÁ¤þ`6›I¾ff»b_GÛɧ{ì±3…GÕ‚¥p,¥ž @€ @€ @€ @€ @ ÊN·à GŽ]°M)ŒYxœ÷Þ=RŠ¡.KC‡‹kVÞ+®»1öïÝ›7®½oïì×\Ž=Ï=õx¬þÕÓ±`ÉŠX¼ôº9ª°c¿usAƦ¦¸æº›cé5×GcccÁö @€t/ðøãßIÿƒåÉß<>Òhñý÷7m~øáöT¸@€Ë( àxñ M€ @€ @€ @€ @€•#pþü¹hmmÉ;áôÔ¹áÃGæmSªÊ¦ææhjjŽöö¶»ìèèˆÖ³-‘žŒW©%9e&fξ*÷HOrܺi]lݼ>εžíó’Ò½\ÿÆêذ敘uå¼Ü©ŽÓg^ÙçþÜØ7††ÆX¸äš¸öú[" ´* @€+ Ü8°¾z'@ oŽ}ss @€E Í4ǹlÏÿgj]hB€ @€ @€ @ lN|¯à\ÒSÓ@Þ`•#GÅ{ÇåîÌ™Sp¼tqéi‹×ß|G\wÃmñö®íÉ©ŽkâÐ}—6éÕól6{vïÈ=ÆŒŸ :Î_¸,Ò•Hn—,_ ¯ˆæ!Cn = @€ @€@Ù 8–ý™  @€Ê˜T7&öt©Ü˜9 @€ @€ @€KÒ“ •Q£ÇjRÒú4PY0àxêdLš<­¤ã^îÎêëë㪫åéú·¼¹6vl}3ïi–…æ|òÄñxñ—OÄ«/?iÈ1 ॡG¥tipô“Ÿ¹/fÏéi§  @€pô3@€ @€À€ \Ý8]ÀqÀtuL€ @€ @€ 0ØçZÏrd8Ì’ž„W¨œm9S¨IE×EÜzÇ=qÃ-Û7çNu<~¬ï¿ˆõB{{lÚðFî1}敹 ã¬+ç êÉœ½!y&ßÜ<$æÌ[˜§…* @€¨5ÇZÛqë%@€ 0ˆ×7/Œ'ϯÄ E€ @€ @€ @`àŠ 8Ž5ÈÇ…Žmm祌znllŠEK®É=Ž> :îÚ±%:;;û<Ëwö½é# ®.Zv],\¼<† ÖçþÜH€ @€|P@Àñƒ¾#@€ @ „7%Ç1™áq2[ø7Ù–pX] @€ @€ @€Ö"NpLCvƒY› W+ÇK!&MžéãæÝÛ7oˆÍo®Ó§N\Ú¤WÏÏœ9¯¾ôl¼ñÊóqÕüŹS¯˜8¥W}hL€ZX17›w©ÇNÄžÇ3{ò6RI€ P3Ž5³ÕJ€ @`ðš2ñÛÃïˆÿ·å§ƒ?¸  @€ @€ @€”X ˜÷ŸåÕׯíü¹KTNwC† å×ÝË®½!wã–$è¸g÷ŽÈfó‡ozZazäö-sIS¦%AÇU1çªQ__ßÓ-® @ æþöO ,9ÿ™ò'Q •j @€(ü75a™ @€ ŒÀ‡ÝÏœ_»:Ìz%@€ @€ @€ 0H ŽÔÐPøDÅ‚ô¢A}ÊZ<ÁñÄ™L&fÌš“{´œ9[7­K룵µåÃM‹þþÈ¡‘>^6<-¹&-½6†Yôý @€ @€u @€ 0 ™úøwc¾c2Ãr} @€ @€ @€pôô¾B¥˜Àa¡>zS_Ì Ž½é²êÛŽ9*VÝt{<ø‡_O~澘6cv¿Öœžì¹æµã¡oüMüòéŸö«/7 @€ @€Zp‚c­í¸õ @€¸ ëÇÆÿ9öâßžøFœÌž½ 30$ @€ @€ @€þ tv 6q¢bÿgòßz(f¼®®®ÿvƒgï ÔÕÕÅœy s“'ŽÇækcû– ÑÞÞö~›Þ<Éf³ñξݽ¹E[ @€ PóNp¬ù @€G`~ãŒø»ñÿ*æ6LœB€ @€ @€ @ ÄÅœàØÐÐXâQówWÌ ŽŽù ÓÚ1cÇÇ-·ß_ýÿCÜq×½qŤ)…oÒ‚ @€è·€ûM¨ @€ŠHOrüÛq_GZ_ŒïŸý¥Ó‹…ÓŽ @€ @€ @ ,Š 8Ö—á Ž]eáW “Hª /Ï=Ž9˜;Õñ­í›£˜Ó;+a}æH€ @€ÊM@À±ÜvÄ| @€T¹@C¦>î~{|aØÍñJÛÖx5yì¸ðNé:ç²mU¾zË#@€¨D¡™æ˜T7&®nœ74/ŒgãÝJ\†9 @€ @€%èê,l¨Ü–WW__peÙ®®‚m4ø¨ÀÄIScâÝSãæÝ¯½üË$ì¸æ£\!@€ @€ú%0¸“Ò¯©º™ @€jhÊ4ÆÇ†,Ë=ªi]ÖB€ PýÏ&G… @€¨Mú"‹]ƒ&Ìve nF¦®®` >*îå¾=oŦ kâ}»?ÚÀ @€ @ ßŽý&Ô @€ @€ @€ @€Ô‚@cScÁevvvlSÊÅŒW_Wø”ÇRΩÒû:×z6¶nZ[6­–3§+}9æO€ @€ÊZ@À±¬·Çä @€ @€IÙ@IDAT @€ @€ @ \› N¥£cpŽ Ω¾^À± RÒàÐÁý±yãšØ½sk öIœÅÌO @€ PŽÕ¸«ÖD€ @€ @€ @€ @€%(&àXÌ‰Š¥œXgÊúÿT°'ó ÚcǶM¹`ã{ïí©YÑׇYt[  @€ @€ká§€ @€ @€ @€ @€!PLÀqÐOpìì,8óbNp|gßÛÑÞÞV°¯R4˜>ãÊhjn.EW}îãÄ{ïæBÛ·nŒ íí}îçâ3f͉%ËWÅÌÙs/^ò• @€(B@À±$M @€ @€ @€ @€ @€@cSSA„²<Á±¾ð?|é…§âÄñc×WŠ¿ý{ÿ"Æ5O,EW½ê£««+ÞÞµ=l<øÎÞ^ÝÛ]ã4ð:Ѳ$ظ2ÆŒß]× @€ @€…ÿÖ¢@ª  @€ @€ @€ @€ @€@-446\fgGGÁ6¥lPL rÈ¡¥²âú:Ûr&¶nZ[’GëÙ–~Ïô˜q¹Pcnljº¼'Qö{1: @€ @€—Y@Àñ2o€á  @€ @€ @€ @€ @ 2Òû •ŽA83ÞÐaà M»*ëìß“;­1=µ1›Íö{3gÏM‚«bƬ9‘ÉdúÝŸ @€ @€G? @€ @€ @€ @€ @ bŽçZÏÑSéš´¶>‘°–Žímm±}ëÆ\°ñä‰ãý†nljŠ‹–çNlLOnT @€ @€Ò 8–ÖSo @€ @€ @€ @€ P¥Æ(¸²–3§ ¶)eƒ–3§ v7¬Np<~ìH.Ô¸cÛ¦èè¸PФPƒ1cÇçBó.‹4ä¨ @€ @€# à80®z%@€ @€ @€ @€ @€*7þŠ‚+:SDà°`'½hÐrºp rè°ÂÁÌ^ Y6M;;;c÷[Ûbó†7âð¡wJ2¯YW^•WÅŒYsJÒŸN @€ @€üŽù}Ô @€ @€ @€ @€ @€œÀè1ã"“ÉD6›íQ¤Op9jtó­ÄŠ3I¨s˦µ±uÓú8®µßKhjjŽ‹—Çâe+cô˜±ýîO @€ @€@ñŽÅ[iI€ @€ @€ @€ @€5,P__ŸàÆÅÉÇ{T8Ûr&ººº¢®®®Ç6¥ªHO0lmmÉÛ]Þ1rTÞ6•P™†JßÙ÷vlJNkÜ·ç­¼!Ób×3v܄䴯•qõÂ¥ÑØØTìmÚ @€ @€%p,!¦® @€ @€ @€ @€ @ ºÆ¿"oÀ1 ⵞm”PáÙ–Ó±Ó_1eÑ’kâ\ëÙbšö»Í¡ÃŠî£íü¹Ø¶eclÞ¸&NŸ:Qô}ùκr^,]±*¦Ï¼2_3u @€ @€À 8²! @€ @€ @€ @€ @ :Š ž:ùÞ Óq •4YLIåTŽ9˜ 5¾µ}Ktvvô{jéI– –¬ˆ%ËVƨÑcúÝŸ @€ @€Ò8–ÆQ/ @€ @€ @€ @€ Pc‹ :°/¦Í˜=àßÙWpŒbæ[°“An°îõ—ãÕ—Ÿ+ɨi uIÞ¼zÁÒhll,IŸ:!@€ @€J' àX:K= @€ @€ @€ @€ @€@• ŒKs…ÊÁwöjR’úbÆ™2uFIÆÌNÚÚÏ÷k¸L&³®œé©”ƒ4í×dÝL€ @€j\@À±Æ,ŸÔª@g¶+Ö¶ïŒÕm[bë…}q¨óx´fÛ"›üO!@€ 0™ÈİLsL­ gÆMÍ‹âÚ¦yQŸ©ŒáA€ @€ @€@Æ$Ç!C‡Åùs­=öpäðèè舆†û'zÚÛã葃=Î!­2dhL˜89o›jªln ¯ˆ%ËWÆÈQ£«iiÖB€ @€ªV`àþö¤jÉ,Œ¨dl6Oœ=jy&Žt¨ä¥˜; @€@… ¤¿\ãlö|ìì8{<~nuLªޏ3î²*Òß0® @€ @€ P~uuu1窱å͵=N®³³3Òã´é³zlÓߊÇöGúÿç+éé…µðwãÆ_K’Ó¯^°$ •6æ#QG€ @€”™€€c™mˆé @€'pºëlüåɇbý…]7ˆž  @€ôC ý%ÿáôâ™sëâÏÆ<£ê†÷£7· @€ @€ 0Psç-ÌpLÇÝ¿g×€÷%ý*ÓgÎ)Ô¤bëÓàæì9WÇÒ$Ø8uƒ¤ dâ @€ @ B+d£L“èŸÀþŽ£ñoN~#w¾×¿ŽÜM€ @`Ò_Êñß¿÷7ñWcþ f4L„ A€ @€ @€@o¦L›C‡ s­g{¼-=áñšU7GsóÛôµâü¹Öغy}ÞÛÓ“&gϽ:o›J¬l24.^‹—]#G®Ä%˜3 @€ p‰€€ã%ž @€Õ)žÜ(ÜX{kU @€jH9Gú9öïÆ}ÝIŽÕ¼ÑÖV‘·_?»"çmÒ @€ @€@éÒðàœ«Äækzì´½½-6­=®»á¶ÛôµbúW£ãÂ…¼·_9w~ :,o›Jª?ab,Y¾*æ-X þéc%í¹ @€ @€|þ+?ŸŽ: @ â²ÙlüåɇœÜXñ;i @€ÚHCŽéçÙ¿ûG‘ÉdjÁª ”¡ÀÇoœS†³2% @€ @`°æÎ[˜7à˜Îgú×bé5×GSSsɦ×vþ\œ|£`‹–^[°M¹7Hÿ^4 j.]±*ÒS3 @€ @ ú«oO­ˆ¸Dà‰ó¯Çú ».¹â) @€ÊH?ϦŸk?5ôúÊš¸Ù @€ @€¨r4p7|ÄÈ8Ûr¦Ç•¶·Õ/<·ßõÙÛô¶âÅ_>.´ç½mô˜q1uú¬¼mʹrèaqÍÊ›cñ²ëbÄÈQå&NžW]½¨`;  @€ @€å ÐP“0 @€À@¬nëßÿ94sÒ' @€¾ ¤ŸoW5Ïïëíî#@€ @€ P–­­-±aí«e3·%ËWF}}}¯æsë÷ÄÃßþ/‘ÍfóÞ÷ôωwŽëoº#êêŠ?› ³³3^}éÙØ¸îµ¼ý_¬¼é¶Â§J^lëkåTÃk¥r´Í” @€S@Àq0µE€ ªÀÖ ûu<ƒ @€Hm>ß$¯¾  @€ @€.“ÀÙ–3±úWO_¦Ñ?:ì¢%×ô:à8nü±ôšëccAÍõo¬ŽÃ÷Ç·Þ“§Lÿè>tåÐ}±úÅgãèáªéþÛeÉ<¦LÑ}¥«-P ¯•ŠÞ“'@€ @€p0Z @€—[àPçñË=ã @€(™ÀAŸoKf©# @€ @€¥XyÃm±kÇ–HCh…ÊáƒïÄ£ßÿfŒŸ01,^ãÆOŒ‘£ÆÄ°áãõlKœ9}*Ž¿{4¶n^'Ž+ÔÝûõ'O‹nùÄûß{B€ @€¨ÇJØ%s$@€ú$КmëÓ}n"@€ PŽ>ߖ㮘 @€ @€_ 455ǽ_üJ<öÇâ\ëÙ¢XÒãKÏ?UTÛB†wú‹½>}²P¿ê  @€ @€-P7ÐèŸ p¹²‘½\C— @€@É|¾-9©  @€ @€”T`ì¸ ñ¹û~/† VÒ~ u6løˆøü—LN]¨©z @€ @€@Ù 8–Ý–˜ @€ @€ @€ @€T¢À¸ñW䆃r1rT|áË_4\© @€ @€*Q@À±wÍœ  @€ @€ @€ @€ @ ,Òã—÷ãʹót~W^µ ¾ô;£ÇŒÐqtN€ @€H†ì\ß @€ @€ @€ @€ @ ÖÒ“ï¹÷˱÷íñâ/Ÿˆ3§O•Œ =òÖÛ?WÍ_\²>uD€ @€¸\Ž—KÞ¸ @€ @€ @€ @€ PÕ³®œÓfÌŽ-o®‹Ý;·ÆáCïôy½ã'LŠ¥+V&ÁÆ%ÑÐàŸþõÒ @€ @€@Y ø[޲Ú“!@€ @€ @€ @€ @€@uÜóÙ/UÇBú¹Š††ÆXvÍõ¹GëÙ–x{×öÜÉŽ§OŒôûöö¶nG>|dLš:=&O™S§ÍŒ 'wÛÎÅÊðZéýÞxë'"}( @€ PùŽ•¿‡V@€ @€ @€ @€ @€ 0løˆX¼ìºÜãât;::¢µµ%Ú΋Ʀ¦h24š›‡D]]ÝÅ&¾ @€ @€ªV@À±j·Ö @€ @€ @€ @€ @ ÜbÔ¨1éC!@€ @€Ô˜€_ñTcn¹ @€ @€ @€ @€ @€ @€ @ ËaÌ @€ @€ @€ @€ @€ @€Ô˜€€cm¸å @€ @€ @€ @€ @€ @€ @€rp,‡]0 @€ @€ @€ @€ @€ @€ PcŽ5¶á–K€ @€ @€ @€ @€ @€ @€ÊA ¡&a @€ @€ @€ @€ @€ Pùú·ù×pìDü³[~ç?ß‘¯ÕKßý“ç«WG€ P=ŽÕ³—VB€ @€ @€ @€ @€ @€Ë*°~W¦Ðø³#³ 5RO€ Puµ±L«$@€ @€ @€ @€ @€ @€ @€ÊI@À±œvÃ\ @€ @€ @€ @€ @€ @€ @€@8ÖÈF[& @€ @€ @€ @€ @€ @€('ÇrÚ s!@€ @€ @€ @€ @€ @€ @€5" àX#m™ @€ @€ @€ @€ @€ @€ @ œËi7Ì… @€ @€ @€ @€ @€ @€Ôˆ€€cl´e @€ @€ @€ @€ @€ @€ @€rp,§Ý0 @€ @€ @€ @€ @€ @€ P#Ž5²Ñ–I€ @€ @€ @€ @€ @€ @€ÊI@À±œvÃ\ @€ @€ @€ @€ @€ @€ @€@8ÖÈF[& @€ @€ @€ @€ @€ @€('ÇrÚ s!@€ @€ @€ @€ @€ @€ @€5" àX#m™ @€ @€ @€ @€ @€ @€ @ œËi7Ì… @€ @€ @€ @€ @€ @€Ôˆ€€cl´e @€ @€ @€ @€ @€ @€ @€rp,§Ý0 @€ @€ @€ @€ @€ @€ P#Ž5²Ñ–I€ @€ @€ @€ @€ @€ @€ÊI@À±œvÃ\ @€ @€ @€ @€ @€ @€ @€@8ÖÈF[& @€ @€ @€ @€ @€ @€('ÇrÚ s!@€ @€ @€ @€ @€ @€ @€5" àX#m™ @€ @€ @€ @€ @€ @€ @ œËi7Ì… @€ @€ @€ @€ @€ @€Ôˆ€€cl´e @€ @€ @€ @€ @€ @€ @€rp,§Ý0 @€ @€ @€ @€ @€ @€ P#Ž5²Ñ–I€ @€ @€ @€ @€ @€ @€ÊI@À±œvÃ\ @€ @€ @€ @€ @€ @€ @€@8ÖÈF[& @€ @€ @€ @€ @€ @€('†ršŒ¹ @€ @€ @€rxî•ÝýžâÇoœÓï>t@€ @€ @€ @€¨tÇJßAó'@€ @€ @€Axþµ=ýOÀ±ß„: @€ @€ @€ @€ª@ ® Ö`  @€ @€ @€ @€ @€ @€ @€@… 8V؆™. @€ @€ @€ @€ @€ @€¨ÇjØEk @€ @€ @€ @€ @€ @€ @€& àXafº @€ @€ @€ @€ @€ @€ @ «a­ @€ @€ @€ @€ @€ @€T˜€€c…m˜é @€ @€ @€ @€ @€ @€ @€jp¬†]´ @ [Ldº½î" @€Jðù¶wÍœ  @€ @€ @€ @€È' à˜OG PÑÃ2Í=“'@€ p©€Ï·—jxN€ @€ @€ @€ @€@58VÃ.Z ЭÀ”úñÝ^w‘ @€@% Lõù¶·Íœ  @€ @€ @€ @€È# à˜G PÙ gVöÌž @€À% |¾½DÃS @€ @€ @€ @€ªA@À±vÑ @€nnj^Ôíu  @€T¢€Ï·•¸kæL€ @€ @€ @€ @€@>Ç|:ê @€Š¸¶i^Lª[Ñk0y @€©@ú¹6ý|« @€ @€ @€ @€ @ š«i7­…ø€@}¦.qç®ù† @€@% ¤ŸkÓÏ·  @€ @€ @€ @€ªIÀ¿Šª¦Ý´ @à#÷ Y+ç~äº  @€*E ý<›~®U @€ @€ @€ @€ PmŽÕ¶£ÖC€|@ “ÉÄŸy0&×ûÀuß @€¨ôslúy6ý\« @€ @€ @€ @€ @ Ú«mG­‡øˆÀ¨ºáñWcþ@Èñ#2. @€”³@nL?ǦŸg @€ @€ @€ @€T£€€c5îª5 @€˜Ñ01þnÜ×cEãÜÔ¹@€ @ ÜÒÏ­éç×ôs¬B€ @€ @€ @€ @€jh¨Ö…Y @àÃéÉ7=öâ‰ó¯ÇC-ÏÄ‘®nâ{ @€—U`RÝØxpÄqÏU‘Éd.ë\ N€ @€ @€ @€ @€phaý @€e%þ#ñO ½>î²2Ö¶ïŒÕm[bë…}q¨óx´fÛ"›üO!@€ 0™ÈİLsL© gÆMÍ‹âÚ¦yQŸ©ŒáA€ @€ @€ @€ @€Ë. àxÙ·À @€Ë!þ£ñUÍósË1¾1  @€ @€ @€ @€ @€ @€@­ øuðµþ`ý @€ @€ @€ @€ @€ @€ @à28Áñ2 ’ @€ @€ @€ @€ @ ÷GŒí[7æ½qμ…1mú¬¼mT @€ @€@y8–Ç>˜ @€ @€ @€ @€ @€@“'ß‹Í×äm5fìxǼB*  @€ P>uå33!@€ @€ @€ @€ @€¨6Ó§NÄÓ¿x´Ú–e= @€ P'8–Q @€ @€ @€ @€ @€èèèˆõo¼ë’G]}ý+}G€ @€G? @€ @€ @€ @€ @€%Øûö[ñÒóOÄéS'sý 8–”Wg @€¨ǪÙJ !@€ @€ @€ @€ @€\^3§O%ÁÆ'cÏî—w"F'@€ @€Šp¬ˆm2I @€ @€ @€ @€ @€@ù tvvƆ5¯ÄÚ×_ŒŽŽŽò¨™ @€ @€@Y 8–Õv˜  @€ @€ @€ @€ @ ò~þØ÷ã}»+oâfL€ @€—U î²Žnp @€ @€ @€ @€ @€Šh;®â×` @€ 0øŽƒonD @€ @€ @€ @€ @€ @€ Pó 5/€ @€ @€ @€ @€ @ "ÆŸ׬¼9ï\¯˜4%o½J @€(ÇòÙ 3!@€ @€ @€ @€ @€È#0áŠÉ‘> @€¨ºêX†U @€ @€ @€ @€ @€ @€ @€•$ àXI»e® @€ @€ @€ @€ @€ @€ @ J«d#-ƒ @€ @€ @€ @€ @€ @€T’€€c%í–¹ @€ @€ @€ @€ @€ @€ @€*h¨’uX @€ @€ @€ @€ @€\fs³ygpìDì9p<³'o#• @€5# àX3[m¡ @€ @ ÷Ù®XÛ¾3V·m‰­öÅ¡ÎãÑšm‹lò?…þ d"Ã2Í1µ~|,hœ75/Šk›æE}¦®»› @€ @€ @€ÀeøÛ?-0x6þ!3åOþ¢@+Õ @€5" àX#m™ @€ @ 7Ùl6ž8ÿz<ÔòLé:Ñ›[µ%@ H4(|6{>vvÈ=?·:&ÕGÜ÷ Y™L¦Èž4#@€ @€HôïK«áïjªeÅìõåZëå·“¾´©¶õ\j`m—jx>Õü³Ö[­­·;× @€è½€€cïÍÜA€ @€ªZàt×ÙøË“Åú »ªzG Ò@ñ8ýƒxæÜºø³1ƨºáå8Ms"@€ @€TÀ…öö8tp_œ9}ê×3§¢å7Ï[[[¢¾¾!š‡ ‰ææ!1dè°¸bâä˜8yZLž2=FŒU6¿^Çþ8øÎžxïø»qþ\kœ?Ÿ<΋öö¶hhhŒ¡Ã†ÅСÓ¯ÃcXò˜[wt\ˆÉ~mÞ¸&–]{C¬ºñö’| YžKNñüpIO0üåS?ÉE?\Wì÷io®=÷HOƼáÖOô)4Zìx=µKOì\¿fuîõ\(X×S=]OO°Ü•„?ÓGȽvÕ-qõÂ¥ÉɃ{re¹½g¥'¤æ{ýöäyñz¡{ÓSr •ö$\W¨ŸÔm J5¿÷¦¯ëîÊ‘ä¤ÆgŸx,Nìû/H½xêcN]÷ÆËqóÇîJNžÛÝp® @€ PƒŽ5¸é–L€ @€>,þÖÕ¿<ù“? ã{—Y 9¦¯Í¿ûG‘üæÓË<à @€ @€*_`ç¶Mñü³?ëóiùÒðF^{{×ö¸í㟊+çÎÏ×¼Ïué †¯¿ò|¤»®®®>÷“ïÆôD¿·ßÚ– l®¼ñ¶1bT¾æ^—žöB²oéi…¥*»vl‰3§NƧ¿ð@ ͨjK?Ï>ùxì}{gI†N÷oÝë/DZ#‡ãîOÿV4''!fIOmLOß;Ûr¦dæÀ·“ÐÒõ7Ý+’*£¤ÁÆ4$µuóú‚'–b>gNŸŒçŸùi¬yíWq×§¿“§L/E·û¨†÷¬‚‹¬µøÞ›þ³öµ“Ÿûû|:pwÛ{â½wã§~7pLƒæÃG”îTÛîÆs @€ò¨+ÿ)š! @€ 0ÐOœ=Ö_Ø5ÐÃèŸ>¤¯Íô5ª @€ @€ô] i¼ôüSñÌ?pã¥3k=ÛOüäñÂ3?+i $ãä‰÷âÑï3Ö¿±zÀÂ×’þb¼mIxìá‡þK|gïÅ˃þõÝ£‡ãßùû’†/.â葃9Ï3§O]¼ô¯é©eÿÓß—,ÜxiçïìÛ>ü­HÛƒUÒûáC% 7^œ{zzâ+/>“;Í4ýÙȲ÷í·â»ßúO¹@q©Om,4ï4Xùxb˜†:²TË{Ö@ fßµøÞ{: €§Þ¤'×ÔkzÿÞ]¿~O:[ºÀõ`þ\‹ @€Ò 8–ÎRO @€ @ ":³]ñPË39w“&P+ék4}­* @€ @€½8×z6ÿÑ·ãÍõ¯õþæ~ܱeÓºx6 T–ê”Åí[7Æ’°Ý±£‡ú1«ÞßÚÖv>~òÈ?åÂŽ½¿»w÷h<úƒoÅ©“'ú×Qž»O|/°éLN༴¤'þ8 ¦¶*é)féÉ”ƒQÒ@ÞKÏ?9`A¥‹kØ¸îµøåS?)ÙÏýÅ~/~ݱõÍøÅãßôÄÔËUÒ“üžúÙ’×Ć™Bµ¼g Îeè´ß{ÏŸkMÞ÷¿i| Kúüø¿é/P @€¨]ÇÚÝ{+'@€ @€9µí;ãH×ÀýãÌô_ }¦¯U… @€ @ wGLNÿû¯‘†Õ.GÙ¹}s<ó‹Gû*Û±mS<÷äãI¨ëÂåXF.¬ö˧«õL¿×RìÒ×/K‚l~ÍgNŸŒM×¼?µôä²ôÎR…Sß︛'éÏȶ-”»8Üáƒïä¶¿/æë°a#¢©©¹˜¦i“žüéKBܰöÕxöÉÇígð# ûÐ…40z¶¥´'ÏUË{Ö‡¨*öÛZ|ïM¼¿HÞÿÒ÷ÁÁ*'O§“?+ @€jW ¡v—nå @€ @€@*°ºm *@ }­®jž_35E @€ @€@yœNBkýð¡^‡GŒ³ç\£ÇŒ‹aÃGDôjON1|/9mïdòHOÝëÍ©VéÉy“§Îˆ¥+Võ æ}o''â=Þë{GŽ3g_#GÉ­cèÐaÑÞÞ–œ’u&9•ðLØ¿'Þ=v¸WýnXûJÔÕ×Å 7¼W÷õ¶q°y" È9sªà­ 1rôØ<ãÄñcq" Ët%}ô¦¬}íÅX°xyd2™øùcß‹óçÏå½½®®.ÆŸ˜Ø!‰mz¢`”LCAmîýpÇk^ýUÌ_¸,7ö‡ëúûýéS'âçɉ‡©i¾rŤ)±`Ñò˜1kN 1*êëësÍ/´·Ç©¤ÝÉÏñÎ$h[Ìž¤7îÙ½#çxï¿R’uí}{g°}:ß>R—¾v'NžcÇOÈý|4&ÍÆÆ¦H÷.}-´·µE¤MOEM_Ó½=­óÂ…öÜ©˜Ÿüì—>2v_.TÎ{V&gØÝ‹ §þùJ]ò,‡R‹ï½©ûóI˜ýðÁýEmÁØñWÄðäÏÉôÏ—ä…ž{ µœ>--§{D>øÎÞØ»{gÌš3¯¨±5"@€ @ º«k?­† @€ôZ`ë…Ëó›Ë{=Q7¨q¯Õÿ°| @€ @ ×¿zöç½ 7.\¼"/¿.&\1¹Û±. ]¼{ìH.ØTìɯ¼ølLŸyeŒ7¡Û¾{º˜{s’`Z¼ìº\XnÂÄî×qéXiï­›cý«s¯Këzz¾îõ—còäéBIƒ”mI¨´§R_ßó-‹%ËW&!Ã+>Ò, Y:ù^ìß»;^}éÙ‚á¾´ƒt¼u¯¿ïÿuˆõ#þæÂ¬+¯JÆ^Óg\MÍ=á0›ÍæÆ}sýk¹¯=õséõ3I ( ^9·ô¿à, ²æ+éÏäwß“&Oë¶YcSSòš˜”{¬ºéöرíÍägÿ©\è·Û.¹˜Ž½mó†X¸dÅ%W{ÿ4 g¾üÂSEßxõ‚¥±pé51yÊô^…+ßK±[Þ\;¶¾Yôëa÷[Ûâxò~0>1êo©”÷¬)ÓfÄ}ýëv¹?üÎ7rÑn+“‹éÏÓþËÿ¹§ê²¹^«ï½Û6¯/x*iú>5oþâ˜:cv 2´Û=KÃÃÛ·nˆMÖäÞ‹»mÔÍÅÕ/>3fÏí1@ÛÍ-. @€ P%ŽU²‘–A€ @€ú*p¨óx_ouƒ(àµ:ˆØ†"@€ @€*^`GrÒ\n+¦Lš2-n½ãž¸bâ”bšçÚ¤¯/|ù÷#=1 ^m9“÷ÞÎÎŽxÄ}ü³¼í.­Lûü٣ߋô”¸bÊ´$l’®£7!ÊQ£Çĵ«n‰4Ü™KÒ`W1å™'~_þÊƨäÔÄ(ùÂ×^k,KNÃLOM쩤AÏÔ!}L639 òÉ銅Oƒ\¿æ•žºŒ40zëí÷$§qNï±MZ‘ž93 褷wmg~ñhîtǼ7%•›6¼1 Ç|ã.»ö†¸á¦;¢>9³˜’®-=i2 ë>÷ä㑞pW¨¼úòs1wÞÂnà…î½X¿qÝ«IHêÄÅo{üšžZšž¦Ø›×ò¥¥aÙô5”¾&~™œb·oÏ[—V÷ø|WrìoÀ±Þ³zª°ŠZ~ïÍ÷gYlLCÎÝ…Ê?¼Åiø{éŠë“úª$è¸1^H~á@1§êžLNßÝ¿g×€è?ùù¸ëÓ_,êä¬öö¶$4øÃèì踴›}žžÚ˜†×®O6ùžDLüòïþa's~¸}wß_uõ¢ø­û¿V0Üøá{Ó@Ðç“ lzj]¡rèàþ¢Nš,ÔO±õ7쮸ù¶»Š7^Úïðá#ãÓŸ ÒÓ, •ôõøÆk¿*Ô¬Çú4pµæµ{¬¿X1z̸Üõ5Üx±Ÿôë°á#â3_x -½öÒË=>ßûSªå=«?åt¯÷ÞîFC€¾ýÎÏÄ=÷~¹¨pã¥w§¡èɉ·Ÿ¿ïÁ¢ß·ßNN³U @€¨=ÇÚÛs+&@€ @€ÈFößû†òðZ-Ï}1+ @€ @ ü^~áéHC…Ê’å+s¡¾ô´¿þ”1cÇÇ÷|¡¨.òxi§Oˆm[6\z©Çç·Ü~w¬¼ñc=Ö÷¦" òÝsïý‘† •ãÇŽ=ÇB}ªo24àœsÕ‚BM»­Oï¿î†[»­+tqArºeüLC>})'M•×ßVðÖôt³w.Ø® n¸å±ìšúÕU}}}8ýr̘5§`?›Ö¿éÉl})[7¯Ž òÞš¾†ïþÌcøˆ‘yÛõ¶òÆÄ) ;*éÚÒ×l_K5¼gõuíåvŸ÷ÞîHú:ÿÜ—Œ…K®ù`E/¿KO¾M’Å”½oïŒlÖÿwYŒ•6 @€ªI ;WMÖB€ @€ @€ @€ @€@E :°?vl{³à¦Ï¼2n½ãžHO—*E™5g^¬*"d¸ï®8ñÞ»‡|ã•_ðX•œf¸tÅõûëMƒôT¾4,VLI›]]]Å4íW›Û>þ©^ŸžøágϹ:FŽóáËy¿?aRîç$o£"*—¬XUÔØé)Ÿ]æ/Z׬¼©$äá§4Ü[èDÍôgä¥çŸêÓ˜{v>ÍmÅÊ›ûuBgOkjnŽ[oÿdOÕ¸^Ìëú7üæ›jyÏênm•xÍ{ïwíö»îI“§}ðb¿›2uF¤§&*çZÏÆ±£‡ 5SO€ @€@• 8þÿìÝ $Õ™ èç‘GÝ÷]PTqŸâ§T¨Aj ( ’ èu÷hgzºwÖl{mg·w{¦g{mÚzmÇz{m{fm¥æè¨šC B q¸‹£(Ž¢Šº¨;³*#Ö=¥€Ì¬ÌpȈÈϱ4÷÷ü½÷/ž§QV½œM¨p @€ @€ @€ @€tªÀº É==½áÓýn݉Î<çÂ0köÜÔv׿^9a+I”Z÷òs©í$;ž'v5âH’OŒw.L;víÜ^[÷bZµq•'É5ÉÎ’ã=’dÖʼn†Õ]zyÍ;7í'I’és^ÞY™‚mƒJÞ½Ã'é´3Ï~£WӦϳçÌKmi÷î©uT @€ @ _û®|9‰† @€ @€ @€ @€xóưwÏîŠ#ì49Òi댧ð¸Ò7¾ûV8xðÀ¨Ý$ OÖ¿:jÙЛ'œ|Z¦$‘¡ÏTû¹§·7|üÜO¦>¶uËæ°ñ ©õj©$Í-^rx-ŽúÌ´i3B’ˆšå8å´êv{Lk3Ùq3íèo`‚ãü…‹ÃQÇdOðLëÈò“O=kä­C®ß|==yèCo¾‘¾ê±»çÐ>GûÓÓÓ;fY¹à@õ»–Ÿ­tîêê ÇžpJ¥*ã.Kú8æøSÂsÏ>1f[ƒ»ä½ùz8öø“Ǭ3´ ™ƒ™3gþ„pèwá@¼kbÖ9‡¶[íç™3Ówp,‹Õ6òðΪ:è}À»wøÄ,\œ¾ãìð'²_%ïà´£–„á´6• @€ ÐÚ[{~ŒŽ @€ @€ @€ @€;w„mñŽ)S§…F&m”ûNv²K~j9²$Ù4:Qm专þÒ7m|' „$É­^G²‹b’Zïc0¹fSåVñ=I’õ ±O1vû(]4Ž’åñN˜“&MG ÙMv½¬”à˜´òæë¯dNpLë5KÒhZYÊ»2ìúš$oVsäåUMÌ­\×»÷£ÙÜq¸ïÞr½½éÉÞËÕ  @€èúþ3S‚&L @€ @€ @€ @€ZGà ëS3ÁâÔ:Y!Ùnc†‘|W)îyóʼn‹•ÿý$eó{ïVj¦ê²dÌFìÎ7yò”Ա̞;?µN-š•7rl‹6òVC®ç/LßYô­xÇv;ŠÅJI©¿‰¦ÚÇ<¼³Úmǯwïp™ñïÊF¼{˽ôôN*ó\,V—0sV]Û+7–%q§Q»vgØ °<Îzž,ZRÏæÆlkÚ´aò”©c–'}}ûÃþÊßóŠ 4©0Ù‘4ÙÕïÑŸ<~ú£Ô½×<¼³êŽ2A z÷‡Ÿ9{îðu¾êÍàX*ëÜ«æ @€huÊÿ´V«Þø @€ @€ @€ @€èx}ûö¦ÌϤ—ÚH+d‰aN¼«a×$É-X´4¼·±rãÞ=»ëª3uê´º¶WMcYpªio¢ë63)6Ù)õí •wiÜ»wO˜ÕÛ;Ñ,öŸì¾¸óƒíaëû›Â¦÷Þ ›6¾Þß¼1$IŽ:²¬÷Vg5ʦÙíf™‹Nz÷Nš”¾Ãâx樧…Öþxâð, @€õàX_O­ @€ @€ @€ @€ Ðd$a*í˜Õà]©ÒúO+ß¿_Z•0oÞÂÔ:¨0wÞüÔf³Œ?µ‘!¦Ld‚cƒ|†„Ùð3gͽMŒgF†7÷îÝfÍžÓðØGv$²%‰Œ;?Ø1xÞ±}kؾuKؾmKœÌxpdõ†^çáÕP &6žåÝÕIïÞž ;,Žgz¢çqÏ @€ S Ž9Xa @€ @€ @€ @€è}[}W¾ýv¡œ4yÊ„Lé¤I“SûíÛ¿?µN5zº{ª©^׺]]]umo"kv¢ho†ÝÙ²¬×ZÍô÷‡-ñNŒìØÿlÄ ìØvÆŸ诵ٺ?—Å ÕßYuG™ ½{‡ÃûÞ ÷pE€ @€@s$86ÇY/ @€ @€ @€ @€4H [²Poƒz¯O³}vplæN|C£êÍà¸ÿÞ¡Œûs!GI†ãÆG½½éÉ©ãhþG³$GeY¯‡4<ÆðÖ›¯…7^{9l~ïÝÁÝǨÚR·³dIm© Út0Þ½Ã'Î÷n¸‡+ @€š# Á±9Îz!@€ @€ @€ @€ @ AY’…zz'5¨÷ú4»?K‚ãÅ%±²Þ;8vuû«mõøf5;Y)Ë:Û›aÇÕ´Ø·oÛž{öÉðê+/„, jií5»<ï¬f›5ª?ïÞá²QT~à @€š àOš€¬  @€ @€ÈÀ§ÏY‘Ÿ`DB€ @€ <úûû*FÒï˜ü´òÑÒI6+ô÷ו· ɦ.žYv߬KG¿m¤§'}§Ôýûjßí3I |âç? />÷t(•JõzÓÚÊË;«i` îÈ»·ÁÀš'@€ @€@ ŽT!@€ @€ @€eÏœwTù£3 @€´€Àƒ2Œ"ÊPgb«”ŠÅÔD…‰‰£˜alv\L¾ ©ÐÝä0¦Æ™% r´FÖ½ü|øÉC÷†z'ÓŽÖWEaÉaG„c?9âäèýðžÑªÕt//﬚‚oÁ‡¼{[pR ‰ @€ŽàØqS.` @€ @€ @€ @€ùèͰ»`’t•$é …– |Òä)©cë﫼Sej5VÈÒowOO­{¬‘ÔwgÍ´±¦í¦šòðCÖó/]>ù™Kã¤Æ…©ýW[aÿþôÝûJ¥RUÍæáUUÀ-\Ù»·…'ÇÐ @€èñÿóDC%P @€ @€ @€ @€ZQ KÂÔ–-›ZqèŽiúŒ™~ëÃÖ÷7…b±8VqÃîoÙü^jÛ3gÍN­£BóöìÞöíÛÛ”Ž iI¸]]]ƒ;®¦ (IÌ\ÿú+iÕËÏ>ÿÓáŠÕ×7$¹1é Ë®”Õ&8æá•irÚ ’woL’! @€ {;8æ~ŠH€ @€ @€ @€ @ ß ^}å…ŠAn˰ aŪ(ü¯÷ßÞßôn˜1sv˜ÿ̈“ÿ’gÌœ5x=yÊÔCZ›7Qèîî 8¤¬|c` N ‹wÈ›·`QùVÃÏIBåÖ É¡vpløTÔÜA’{øGÖü|Ö·m{?¤%úÍž3?Ss/?ÿlj[IC§œöñpÖ9fj³ÖJ}}é;8ƃ­ªù<¼³ª ¸…+{÷¶ðä @€# Á±c¦Z  @€ @€ @€ @€ò)°hÉa©mmb‚ã¦wß»v}>ر}ÔqM™:-\÷õ?…BáÃòäsÇ;o­ÿðÞh6oÚØÔÇm[7‡Ñ†2ìÞœ¹Ù׆=ä¢)[š”à˜e§Ï#>.SÌ/½ðlj½dáE©õÆ[a×ΩM”Bu Žyxg¥¢´IïÞ6™(Ã$@€ @ ×ý Y®Ã @€ @€ @€ @€y˜¿`qèêªüo½ïؾ5ìÞµ³áû÷íLn¬ÔÑì9ó†%7–ë.^rxùã˜ç×ÖUÞ©rÌk,xõåçSŸLâ™:mzj=&Fàí ¯7¥ã×Ö½˜ÚÏ‘GŸZ§¿¿/ìÚùAj½~vèꮼîSI©ì¨ºñ )µBµ8†¼¼³RaÚ¤‚wo›L”a @€ [ ޹Z @€ @€ @€ @€è ®®®°`Ñ’Ô`×½ô\jñVx;eƤýÅKGOd\¼tYj÷oox#ìü }G¹Ô†2THvn|ùÅ_§Ö\zøòÔ:*Lœ@òitrïîÝ;CÒO¥cÆÌÙ™vÍ:Ö×Y©»º”½óÖ›™v0-Ów9: ¼¼³†ÆÔΟ½{ÛyöŒ @€<HpÌÃ,Š @€ @€ @€ @€@‡ ,ÉøÊKéÉzãeÌ’D9ÖX-9lÔGŽéÅçŸy«!×o¾±.ìÛ»'µíÖ­H­£ÂÄ ¼’!Qu<#\÷bzòð‘ǤïÞ˜Œa÷®ôÝ“z³fÏIN =6¬5SûÅ8¸Úc¬÷ÀÐvZý5t¬Éç(ŠFÞv]*•†]·Ê…wo«Ì„q @€ Щ;uæÅM€ @€ @€ @€ @ GÇŸtjj4Û·m ßy+µ^­öïÛÞzóµŠwuu‡ÅKFß©±·wR8æø“+>Ÿ¾ðë§3%¦6T¡B±X ¿|â‘ 5~S”ŒùˆG§ÖSab^|þ™0pð`CÑßßžûÕ“©myt¶Çýû÷§¶•Tht²ÜÎ;BÖÄÐd½T{äá52æB¡ò_ImÔwpä8ª½öî­VL} @€ÔW òÿMÖ·/­ @€ @€ @€ @€ @ !³çÌË”h÷ÓÝjØm-Ë Ÿzü‘–è”$0öNš4fs§yÞ˜e傾ýû¼·|ÙóÓO>¶l~/µí$I«§§7µž +°+NÖ{òñŸ6d¿xäGaÏî]Ûž2uZœØ{xÅ:åÂ)S¦”?V<¿¿ycÅòñ&É“?úÁÝáÀþLÍÔòNÉË;k(PZ‚câZ‹ÕÐ>õÙ»·Q²Ú%@€ @€@º€Çt#5 @€ @€ @€ @€h~Nê(·m}?<óÔc©õª­°cû¶ð|†]ìN>õÌŠMÏ›¿0~ÄQë$…ë_%óîr©¨°åýMá©_dK†;ùÔ³F<í²Užyò±°åýô¤ÕjÆŸìˆúü¯žJ}äô8q7Š¢ÔzI…)S§gª·ySãŸýå/ÂÆw³ïö:0PÛî˜yxg ¬B¼CmÚ‘$h·âáÝÛŠ³bL @€tŠ€ÇN™iq @€ @€ @€ @€ȹÀ²åG…9sç§FùÔã? Ö¿–Z/k…ýýáûîHݽqÁÂ%aᢥ©Ížv湩u’ ?yè޺Ƒ´¹cûÖpßÝ·¥Æ’Ô=ñ”3B² £=’Ýó¸÷ÎÔݳF³}Û–ðÃ{oO­ž$Ž}ìŒôäãrC3fÌ,¬x~ãÕ—²à[o¾ìáŠ},ìïïy+Óu^ÞYå`'Uض\gë–Må-wöîm¹)1  @€:D@‚c‡L´0  @€ @€ @€ @€t‚ÀÇÏûTj˜Åpÿ=ß o¼örjÝ´ Œ“¼î[ã]ÓŽ,cKÚH’ž–¾<­¹ôÄñÚºSëf©ìîw×Ú›2%À%‰|êsYšU§…>ر-|÷Ÿn»wíר’Pï¾ý–°oïžÔv>uÑB¡ý¯*Nš<%,ZrXj»É÷õ<”Z/k…$4Ù¹ôûw};$ïˆjŽ$ɹÖ#Ë{¡ÕßY娓¹K;~õô㙨ÓÚiD¹wo#TµI€ @€tìjÞ– @€ @€ @€ @€˜P£=1Òi©c(‹á‡ß¿=üòñGjÞîÍ7^ ·ÝüŸÃ[o¦ïyÌq'…åG“:®r…‹/½"L™:­|9æ9‰ã8ÁòÑŸ<jÝE.iã×Ï<1˜ø¶ßÞ1û*ººÂÅŸ_zzzÊ·œÛH`çÛãDÖÃÛÞ¨iÔë^znð»’%¹ñäSÏ ‹§'+ŽÈŠ£ŽykÔë$YîõW_µ¬š›É÷þÞïÞžøùOªyìúý}µíà˜4—wVK–wV²Cæ¼·æ÷nÒO#ïÞFêj› @€£ t~Û] @€ @€ @€ @€´§À…+/ ›6¾vlßZ1€dǶÇ{8¼üâ¯ÂùŸ¼(±â˜L;Í%»ß=òãfÞ2ÙÕìŸþŠcY8uÚô$ÚÜsÇ?Œ,õ:IôzõåÂ9ŸX9˜0ÕÓÓ;j½¡7“ÄÆwÞZ~þ³‡ÂÖ-é;P–Ÿ=ÿÂφù •/ÛP ùïÎ ÇŸxj8ûüO‡é3f¦F‘ìÚøóŸ=6¬OOèM›:uz8ç‚•©íŽVá˜ãNOÆÉ†vRL|O8ùôðñs?¦MŸ1ZscÞÛ¾mKxñ¹§ãwÀ¯Cßþ}cÖK+Ø·oÏ஄ÕìT9´Í<¼³’xfÍž;4¬1?¿ü³áÍ7օÖ­üžD…(ˆ“D÷ìÙ¢¨>Ù—Ç|¶ÑÞ½Ö> @€àx¨‰; @€ @€ @€ @€´±@²³àçâï¸í[™¤>ر-ÜÏÚ09ND\ï·ìˆ#ãD©™aê´iñ.…½!)O’»’d¨ä¼ù½wÂÁƒ3 EQV^ü»™v6Ù`’üsöyŸÊ¼«ÜÞ½»ÃÃ|/üô¡ûãdÍ£6gÌœÇ1}°ÿd—¹½qўݻßxý•eÇÆ¡ãJ’@?vú9CoùÜÆIroò³dé²pt¼Ë蜹ó“'Mš’wÇß•­[6ÇɳÏW•›|ï?ùÙKCÒN-Gò½=õŒsÃÓO>šúx’¨œ$)¾')~쌳C²ûãÌ™³¿÷#N’“äÎ÷7¿^zþ™ð^œåH'“u3Ö‘Œ!)OÆ]Ë‘—wÖœ¹ó2‡Ÿ¼{^{å…Cê'ß$±µ+Þ)v¢ïÞ‰’×/ @€* Á±Sg^Ü @€ @€ @€ @€r,0/Það¢xÄï¿+S’cB±?N~JvK~êu¬¼ø‹áÈ£¯¹¹3Ϲ0ìÍžÿÕS™Û88¸»ä¯½œù™,—-?*\tÉeYªªÓB½q’aßþŠ#Úøî[!ù©ÇÑÝý›ãåG;®æÎ<ûƒÉ—{÷ìÎÔNò½æÉÇ’ºººÃÌY³“{“dÍ]»>ÈÔV¹R’ôü™ßùÒ`¢çOº¯|{Ôó¦8ñ¹ÖǤÁ<¼³æ/X<˜(ž¼Kk=’dÑ$©|î¼µ6Q—ç¼{ë¨ @€dà˜‰I% @€ @€ @€ @€ÚMà¨cNS¯Zî¿û;ƒÉ‹Íÿ'?si8þ¤SÇÕíàNxq;³çÌ þä$ÿ4ûHÆpöyŸgœ}AH>;ÚKàÜ V&¼¾½á†|êÔéáó—9,X¸dÜ}õôö†Ï_öåp÷?Ý诺½$á1Ùu5ù©åHvµ¼èóW„éñn®›2ìôøÎ[ëÃ1ñ.˜ã9Úý•¼NúØ™á—O<2†Á9›èGïÞqM¡‡ „?úÛÊïo¿÷‰kÿne¥ZÜúÏT*WF€  Žù™K‘ @€ @€ @€ @€ 0B`ñ’Ãêk~/|ÿ®[Ãζ(mÌeoï¤ð©Ï~>süÉuëàc§ŸfÍž¸ïŽp ¿úd¯Z’$¬]'y-=|y­Mxn‚’ /ýÒêp_œè›$á5êHÒ¾pùµaúŒ™uë"I”¼ä‹W‡{¿{k(‹uk7­¡³âSÏ:÷“¡P( V;aÚ#±m}HÛý•Ø%»ÇÖšXš@çÙÔ‰ª²‚wo•`ªø­À3¯¥þƒ+BV#@€ üæÿ¾Y @€ @€ @€ @€ @ §Ibà•q’㉧œÑð“䤫¿öûuMn,OË+ŽW]ûÏÊ£Ž+ßjèù°e+Âê8É enJãÝÝ=ƒ»!&»ë5â8bÅ1áŠÕ7Ô5¹±<ÎÃ82\võuaÆŒYå[ ;/[~Tü®øgáìó?ýarcÒYOOoXïèXéØùÁŽðÞ»oWª’¹¬ßY]ÝÝá¢K/S¦NËïÈŠÛ·¾?òÖ„^{÷N(¿Î  @€è;8vÀ$ ‘ @€ @€ @€ @€@§ Lž25|ú¢/„ÓÎ<7üâ‘ î0VO“d׺¤í“Oýø°Ä¨zö‘´5{μÁÝø6m|;üâчûo¿Y×.’ëŽ<úø$Â%‰Q”ºS]û×Xã’$ÇdgÑ£;)<üÀ÷®;ÆÝY²k㹟ølX~ä1ãn«RIâðê¯ýAøéÃ÷‡u/=W©jMeIåÙç}*,Šûë8á¤ÓâÆ·Æ*¼ÿÌS…K—®®X'ka;¿³æ/X®þÊ×Ã÷ÞÞ‹ßUÕ­´ƒcyìÞ½e g @€Ô_@‚cýMµH€ @€ @€ @€ @€@‹ $I*—|ñê$¾øü³á­õ¯…={vÕ<ÚyóƉç îØ˜$6ëH±.»jMx{Ãá•—~Þ~ó°wïîš»OvŒ;)Þáò¸“N SâdPG~‹W¿¼æÂó¿z*¼ü¯BµÉdIÒëáGN8ù´pÔ1'4- ¶wÒ¤pÑ%—‡³Î¹0<÷ì“ᥞ ¨y¢’¤äÁ8âÄÅÅKÇNl,wpô±'†Ÿ=üƒpðàØ}®ý•°cû¶8ynù±qŸÛõ5múŒð¥øõúºú—Ÿ o½ùz(•J™ìÙ½+ìÛ·7ìF½½“”©ÓB²›ÜÒeËÃa‡¯I’T«[â8Þz3Žã­7ÃÞ8i³ÇФ¢žÞÞ0cƬ0cæ¬xì¿9/Z|XXrØ­†qT!$)þôG÷W|båÅ_LF«Òæ÷Þ /¿ø«ðþ¦ƒßÿ$Qvèw&I2K`“ïÉÂ%‡…G¦M›1VsM»ßßß7¸£â{ß ›ß{'$ßÿ¾ýû†=L2þ)S¦…ÉS§~÷“Ý—-?jp7Ô¦ ¶Îµã;kßÞ=áÝw6Äï¦Ý!ùœ¼Ÿ’9œ6múà{tð}¿“’wj²{e+&7Ž5Þ½cÉ¸ß ùà„ ãÂ3þ¿¢þÈ·¿1þF&$úl^øÕÿü¿Æ¿oÿ<[íÖ®¿³ÿügÿø/þ]kÒè @€V°ƒc+ÏŽ± @€ @€ @€ @€ Ðpy …ä猳/ø°¯$Á+IrìëÛ’ÄÆ$ᦙ;4~8*>ÌcH~Îøøð8’¤¯ýû÷ÇI^S¤ÉSªhQÕNX¸xiH~ÊG±XLB+¿/É÷¿“Í’q±â˜ÁŸòØ“s’4×ß×7¸Ób²n'MšÜ’ã:æj?·ã;+IOvÁÌãáÝ›ÇY @€ÍàØLm} @€ @€ @€ @€ Ѓ»¾Å 9IRN;II’Wòã E Iäm¥I³Œyh$ñ1ùé´#/לּ̛wo^fR @€4C ÐŒNôA€ @€ @€ @€ @€ @€ @€* Áq¨†Ï @€ @€ @€ @€ @€ @€ @€@S$86…Y' @€ @€ @€ @€ @€ @€ @€ÀP ŽC5|&@€ @€ @€ @€ @€ @€ @€š" Á±)Ì:!@€ @€ @€ @€ @€ @€ @€† Hpªá3 @€è@(Dµ ´Ÿ€µÚ~sfÄ @€ @€ @€ @€TàXÙG) @€ȽÀÔhRîc <X«y˜E1 @€ @€ @€ @€ 0T@‚ãP Ÿ  @€ @€@ ,éš×Q ™@û ,µVÛoÒŒ˜ @€ @€ @€ @€Š+ò($@€ @€ù8±çˆü)B98ÁZÍÁ,  @€ @€ @€ @€¡‡jøL€ @€:PàüI'u`ÔB&Ð~ÖjûÍ™ @€ @€ @€ @€ PY@‚ce¥ @€ @ ÷göæä>Nhgd&kÕA€ @€ @€ @€ @€< HpÌÓlŠ… @€Ô КéÕð¤Gh–@²F“µê @€ @€ @€ @€ @€@žü­¨<ͦX @€ @€@—L>;œÞstO{ŒF $k3Y£ @€ @€ @€ @€äM@‚cÞfT< @€ @ (ŠÂŸÍ^wÍ­ái Ð(dM&k3Y£ @€ @€ @€ @€äM@‚cÞfT< @€ @ F™…iá/gÿsIŽ5úyŒ@½’äÆdM&kÓA€ @€ @€ @€ @€< HpÌ㬊‰ @€Ô(°¬{aø¿çþëpzÏÑ5¶à1ê!¬Ád-&kÒA€ @€ @€ @€ @€¼ tç50q @€ @€µ $»ÅýÕœ? ?ØÿD¸e÷ƒaSq{m yŠªæ„5Ó/ —L>;DQTõó @€ @€ @ ³zz'…3fU º§··b¹B @€ @€ÀD HpœH}} @€ @€H«.rNøÜä‡_ö¯ õ½^<°!lØö–úB)þÏA€Àø¢…©Ñ¤°¤k^8±çˆpþ¤“™½Ç†®¨0¾†=M€ @€ @€@ÇwÂ)!ùq @€ @€ÚU@‚c»Îœq @€ @€&$‰VgO:~ð§ Ýé‚ @€ @€ @€ @€è ÿ|M¶P  @€ @€ @€ @€ @€ @€ @€@«Hpl•™0 @€ @€ @€ @€ @€ @€ ÐAÝ«P  @€ @€ @€À¸~ôó×ÇÝÆgÎ;jÜmh€ @€ @€ @€ ÐîÛ}Ÿ @€ @€š*ðãÇ×»? Žã&Ô @€ @€ @€ Bb @€ @€ @€ @€ @€ @€ ÐfÛl — @€ @€ @€ @€ @€ @€äA@‚cfQ  @€ @€ @€ @€ @€ @€ @ Í$8¶Ù„. @€ @€ @€ @€ @€ @€ȃ€Ç<Ì¢ @€ @€ @€ @€ @€ @€ @€@› Hpl³ 3\ @€ @€ @€ @€ @€ @€  Žy˜E1 @€ @€ @€ @€ @€ @€ @€6àØff¸ @€ @€ @€ @€ @€ @€ @ ó0‹b @€ @€ @€ @€ @€ @€ @€m& Á±Í&Ìp  @€ @€ @€ @€ @€ @€ @€@$8æaÅ@€ @€ @€ @€ @€ @€ @€ÚL@‚c›M˜á @€ @€ @€ @€ @€ @€ @€<tç!1 @€µ |îüã¢ÚŸö$ @€è @ áW®¾þêBžˆB´¼áé€0P*†[v?H‚ HÖ^²³Öj%u4F šµZ5[–p&Ð\j׫µÚÜùÑZ¬ÕZå]èŠþ:„èãÕ‡\úæËÅ}ÿÍ Ñ”¢êŸö @€ @€@óÞ:¥r‚ãgýaó¥G @€ @€ @€ @€@ HplÃIk¡!'éX¶Æj¡ c(æi ˜zܾì²k—º WÝV©4PŒ¢ÿþÎ[oú?“gOøòõU7á @€ @€ @€ @€ @€ @€í, Á±g¯9c.»êº³zºÃJ¡p\¼ÃÜÂ(”…R´0Îm\P ÑÁx›£¨´)þ¼)Nw|/DÅÇBéÞ;î¸ecs†¨—XÀVí³ê @€ @€ @€ @€ @€ @€È‹€ǼÌdãX¹re÷œEË.‹BôÅE_ˆ·ÿ[”4Ÿlø›#þ4xס+¾·,¾±ì··âËÂ×K=¥ÒU×ÞðËR(}/NæºíÎÛn~ñ·ûtå—oøŸã„Ê*5'[>tÇm7þ‹Juê]¶êšë/3Ýþºb»Qtïí·Þø'ëd,lõyÊF§U;P Åÿ¸oç–ÿpß}÷õuZðâ%@€ @€ @€ @€ @€ @€ 0T@‚ãP ŸÃªk®»2N[üߣ(:n<ñóI¾ãYq[gÅÿìÊknøÖÁ}}ÿËÝwßúîxÚMž-–J?ì*DQ±RXvÅWüwÝu׎ŠõêX'7þ~œzL¥&ã±?T©ž+팓Zÿ§{¶{çwnþÃÚµùˆK @€ @€ @€ @€ @€ @€ê# Á±>ŽmÙÊYgÕså5×ݘ$63€8ðĨ§ççW^³æÌZûØÛs©T:XñùRøÔå×^»¬b:ÆÉ›×§5up ˜ž”9J#ížÒÖSß]û¿N©sHq»ÏÓ!åøF¼[㛡TúÓý».»ã¶ÿÍ=÷|{KŽÃ @€ @€ @€ @€ @€ @€q t» ´ÀÉ«W÷Î,L} áìj_ áÝ(”îw§{*Ãæ®â¦¸®PìZRÅ¥qÂá©!.‹w3œ©Ý(š·ùÝU«¯ÿÔkoz,Ó3C*ÅãøfÜ×eCnò1*…5ñÍÿxHAoD¥hM{Å£X¬~÷ƼÌSE˜‹{vw§_}G©ï®°ví@B @€ @€ @€ @€ @€ @€¦Hpl skur\aÊ_ʼn™“ãD»,þowß~ó“i‘œuÖY=ËŽ9ùâ®ýiœô·2­~¼Ébwè*}ûw÷«g|ÿûÿ¸=­þÐò ¯>o¼ åæ(D ‡Þú9nÿ”U׬9íÎÛnyvèý:~Ž»L¢³Éدooiÿ·Ç¬0FA^æiŒðrsû®»îÚs{n @€ @€ @€ @€ @€ @€& Hplt«tsåê’ÿ$ÓxJá×¥pðëwÜöOdªWzê©§Ä?÷Åï[uÍ ×Bøë8ÑqA¥çãÅå“ftÿq++ÕY–ôuÄ1'ßÇóß,~]øZ|ÝÇU«¯ûTˆ¢#†÷7ü*ŠJßýÁڵۆ߭|•§yª©R @€ @€ @€ @€ @€ @€:U Î?stŠÀå×^»,*„oeŠ·TºckqïÕ$7Žl÷ÎÛn¼©t`à´R©ôâȲ‘×q’ãªU_¾î_¼Ÿ~]J'þ’5n§!ßõBWt}Ú‹ÅÔ1m#Ÿó44BŸ  @€ @€ @€ @€ @€ @€ Р¤/°­)ÐUšô7ñnŠsÓFW*…ÿrûm7]ýðÚµ»ÓꦕßqÇ-î;ð™JϧÕ-¢¿üüç¿63­ÞÐò;o»ù¹x¼•w˜Œ¢ÃâW}®ŸÏ_½zJÜÎÕÛ*•Þ¹3ô=P±ÎˆÂ<ÎÓˆ] @€ @€ @€ @€ @€ @€ @ 1»Úqm=«®úê±Q.KY)”Ú¶éÍd'ÅRZݬåwßýíMžÒgã7U~&š9uf×ïW®shiÒw¥Œ ÑšCŸß%]SbϨbBf1„›ÃÚµY{Êóøv|½轑ŸãäΫV®\9yäýñ]G×exþ[ê|X%Ïóôa> @€ @€ @€ @€ @€ @€ @ ¨˜ðF(—¬^=7Þið÷Ò¢)‹þýïÿãö´zµ–ßþ[îŠ÷…|°ÒóQˆ–_M½ªR‘ewÝu׎xÃÉ;GÞ~Íœ³hÙ—†ß«ýjÕªë†Ré’Š-”JÞyÛM¯T¬3¤0ïó4$T  @€ @€ @€ @€ @€ @€ Á±¾Óº¦|#ÞÁpj¥XãÝ_º³´ÿ›•êÔ£¬T “ÖN¡Pú×iu)/†Ô±GQ´æçj¼Qè¾·×]éñØ´ªÝ;bž*)#@€ @€ @€ @€ @€ @€ @ £ìàØÓ…°*-Ìx÷Æÿ'¬];Vo¼åw¬½ñ{¥PÚX©¨Î_¹zõôJuF–Ýþ›ŠwTÜ0òþÐëxwÈϯZuý¼¡÷jþE×Uz¶T {÷íøN¥:#Ë:ažFÆìš @€ @€ @€ @€ @€ @€Îà˜ó¹ÿß¹nZœlwzZ˜KÑ]iuêT^Œù¬ØVuÍîê=·bC ‹qœ7z{ØžÐSZ=ìN —¯þÚIñcgU~´tû}÷ýÃÎÊu>*í yú(hŸ @€ @€ @€ @€ @€ @€èh Ž9Ÿþi³Â¹Qu§„ùÔ=koª¸ûaÊóUÇ;8VNpŒ[ë*>QU£qåhààß—â£ÒsQTXS©ÅÛ8V>ÒÇ=òùÛoÿÇׯºæ†Ç÷WŽ,ûèºtÁ—V_wä=ko~ã£{U}ŠB)Np¬4þRé;¾sÓÃÕ´ÚIóT‹º @€ @€ @€ @€ @€í%ðG[y¼ïo¿÷‰kÿne¥ZÜúÏT*WF€  Žù™ËQ#‰JáŠÉxñS¥(zuÔ‡tó½âþ×–¦ÄÝŽæX Ñùq÷É£Åj†WþVüÐʱžIú쉢¯Ååÿa¬:•î_qíšOǹË*Õ)Eᦸ¼âN’#Ÿï´y¿k @€ @€ @€ @€ @€|<óZ¥Dc\ÿýæùˆV @€ãHÈ9ˆ“íÎL ¯Phj‚ãck×î Q´±Ò¸âÔÇ—]ö••êŒVvpï¶ŠwˆÜ5ZÙ‡÷ !Ip¬é(” ×Wz0Þ ³T ý_©Îhe6O£¸G€ @€ @€ @€ @€ @€ @€@g ØÁ1ßóç FsSC,¾yåµ7ü]j½zV(•æÅIŽ[ìšÒŒ}SÅJ# ï¹çž½W^sým!D¿?¢èÃ˸×®X}ÃYw­½ñ©oføpþêÕSâ!_•Rõá»n½u}J‘Å7O#\ @€ @€ @€ @€ @€ @€ Ðys<çW\qŬ8¼ »tFs*§6)%¹1鱕æÔÒs¼…â7£®0f‚cÒf¡Öħª&_'Nά8¦¨ô­Šå£vêZ]8¨Ï/_ŸH~Ç ûéõ~? ]cØØôž8rŸ @€ @€ @€ @€ @€ @€6 (plcTŸ9Óß-,<âxqè£hî¸ç?ÿ…CûÌðÍ]×oû×8Ÿz„(\<è~Ôë]>èÞÓ×ãðÈþo>xãÐ>#nŠÓ ·  @€ @€ @€ @€ @€ @€h¥€ÇV†õ[‡Šã(úú·> x-ÎÏ€[­¸œ<¥røãðª .;ñðÃ&O¶\Eñ†Ã¯/ù…÷ßvÛmû—\ËÿAœò›A€ @€ @€ @€ @€ @€ @€ÀŒ (pœñŽÞ~üñQ}¢ÞÜ£úÌòýÇúûv$OqX„¥¯sç~ÆãŸ÷³CˆÖ~ýÐÏ‹‹axñ䡇¾§¡ü•OÂI%Åzß6°Srcþè•¿˜4›†õ)ë^oåš·'%…—EiYaúêõФyö 'Å.ºòÁ(Ä{â8ìyloüs·ß~ÝãO÷™ð»¶Ý¾á¢+îOÖ|ဩV¬:fîœäÞuéý£ŽímHšÕéû¯ý§âmîå¾,N¹É @€ @€ @€ @€ @€ @€ @`ÆF<ÚoÆOgû!y"âÁ„ác£(’R÷wÞyÇŒê7éýôɈQ/$ŽG¾¢ô Iñã%ïN-«¸ñ™•ú¡¶¹ê¡W¢ ¾õ)Ž6~ëýroâøovíºîÁån¹&NEÔŒ!@€ @€ @€ @€ @€ @€ @`–8ÎrôÆÜ{Ü1ºk´v~õ±oÝo²G;÷Ö¤€qí¨Yâ~ÿý£úä½0îo:&ŽÎ:÷Üs^¿qcò´Ëè5ÃúöC>×°Áî‰Ó—  @€ @€ @€ @€ @€ @€h¥€ÇV†ué¡v.lÝÇñç—^=òSÔ‹Þñú—¾ìÈ;å\9óÌËŸ•Ìtõ¨Ù’½Æá`ÿúQýòÞ¿iaÛ}!4.y|äÑ+V¯=kmoõë’÷«õ‹Cüà—ï»÷oÝ/z]œŠÊG€ @€ @€ @€ @€ @€ @€À, (pœÅ¨åßsòÀÁðîÑâgÍõæ>ôO0Ý;gè˜gG…è9£ÇE·ìܹýþÑýò÷ùäÅ^¸ ½×›9)¿ÜvÏ=÷Ö§à=q*g @€ @€ @€ @€ @€ @€³' ÀqöbVhÇ_íïÛ’5ÄiÑ`tnòÔÆãÆ™;ëÓãwïZØú¡ìsõm|m²Ï?ȳNÅ7Þz뎽yÆí+NEåŒ#@€ @€ @€ @€ @€ @€ @`V8ÎJ¤JÜgú$Æ7¼ñ² ã0·=)DvG È“«éÿœÉi Ô/'_åjýñ±"L O®:Æÿçših Ô+'gý¾Ö«8T O®:Îûf ¬»ôšÍ¡ÞQå.ã¸=®-9˦u_³©J¯ä=›>ºãM›«\ÃÜ @€Àô8NÏ~ª+ÿõ·Ý°áÂË~$)r¼9ùcç/˜Öf’7ÿËý}o »w/Ö¹‡;·ß¿á¢+oOÎþÚ±Öãv…ýwŒÕ·ÄN]S‰”¦"@ £GG«Âãñ“=½c˜¾@šƒã¼äê8Jú¨N`Ü\Ív g3 -úòä«\­?>Vì–Àw|æÔ^æÃ?|ñ ïg7~ìU§¹ših Ô/àwµ~s+("'WÓùý¶Q6†@9yòU®–cnEòäj6¿œÍ$´êÈ“³rµþøX‘@&'W³1Úæ ܵýªw&EŽ¡ê"ÇæK4d‡½ð»Ý®¸±!Ѱ  P‰€ÇJXgcÒ×oûä9¯xåÊ^tSò$ÇWÔ¹ëäoêHÿr·ßßùÁ­•þí&ÃÎÔï‡-s½ñ ã(Þj-ÂÌöÞõ8eZ8qîøpßÁŠ 5†^äà8/¹:Ž’>ª7W³ÈÙLBK ~<ù*Wë»%pÒ½ƒ S‰ {F‚¤Žru$“*ð»Z­‰ ”*'WÓ…ý¶–Êo2¹òä«\ÍE«3Ròäj¶°œÍ$´êÈ“³rµþøX‘@&'W³1ÚÙPäØ8%Åi,²Û @€*èU4¯igDà–…­xâ‘äIŽýßqüX=ÛŽÿ-î‡uÓ,nLϹÿ›Þ˜Y>2Ιã8ºvœ~Uõérœª25/Ýx銓ºqP§$ÐPï3åjCh[7W39›Ih Ô/'_åjýñ±""rµˆš1Êð»ZŽ£YT-'WÓ½øm­:"æ'0X O¾ÊÕÁŽî¨Z O®f{‘³™„–@ýyrV®Ö+Èòäj6F;;OÖ%v³³ã–íTqcËê8 @`°€ÇÁ6¹sóÍ7?±ó×ýÎý}§ôãøÝÉÁŸ¬äðqx¨¯=üµ/Ÿ¶kaëÝ•¬‘cÒÛn»mòÉ#‡ÄñÇv]¿õ³#ûUÜ¡«qª˜Õô´\àÕ«^Öò:f Œ›ƒãököiíŽÀì äÍÁ¼ýgWÆÎ 4O OþåéÛ¼“ÚîÈÕîÄÚI›''ÿòômÞIíˆÀl äÍ¿¼ýg[Çî 4K OþåéÛ¬SÚ Ù(’EÆÌ¾”h†@žüËÓ·§³ íí‰å “(r$SñuÅ›ž4K@c³â1ÕÝܶ°ðPRÈ÷«ûãSûqú·ÄOžê¸8ᦒšÉøÃñb¼qÏ}Ÿùö]xßïÞ½ûà„s–8¼¿eÔdq#ûŒš£ÌûÝŒS™‚æ"@ K¯Xù’pBom—Žì¬#æ^šƒã¼äê8Jú¨F O®f;³™„–@½yóU®Ö«(* W‹ÊG`2¿«“ùM .¼¹šîËok]ѱ¥yóU®.õó‰@]ys5Û—œÍ$´êÈ›³rµÞøX@&7W³qÚÙPäXsÌ7Ö n9 0}hú[°ƒ& œ}ö%kW=ÿšh.œ™ú}o‡ç†=7DáØÃ÷ÇaˆâG’>ŸŽ£øQ?|¢ ||×®ë<¼¯Ïå ˆS¹žf#Ð5;îþlòñí}ýí¾O„w=ú¡öÐÉ4TàW×üTxíQ¯{wrul* ”*7W³Åål&¡%PŸ@‘|•«õÅÇJÝXwýk'>ô¦·ýøÓsÈÕ‰)M@ ·€ßÕÜd˜Š@‘\M7ê·u*á²hÇŠä«\íø—Æñ§"P$W³ÊÙLBK >"9+Wë‹•dEr5{ÆÉwÖµÔ’uN¿zò?¢~×û¯š|’%»šüúK¯Ùúá“Ïd†ŠÒ¸A€Ú,0ßæÃ9Ûä·Þºco2KZ²¤2ä´ÓN[ñ¢½ô9çšùÅðèWçöãî……}“¯h†"âTDͺ"pÖê wîû—ð©ŸïÊ‘“ÀÔ^¾âÅ!ͽf' Lš«Ù-)À:.|áÀWÃãñ“-‰¢c˜žÀ ½µáªcÎ o>æ¼°º·²”ÈÕRMB`‰@¹š- g3 -rªÊW¹ZN|ÌB ¨¢À1[®fÂZåø]-ÇÑ,ª¨*WÓ}ûm­:zæïš@Uù*W»öMrÞªªÊÕlßr6“Ð(G ªœ•«åÄÇ,2ªr5›¿ÊVc•º!(rœÐWqã„€† @€öLþ×c´ÇÂI @€@'î¸û³q¾÷Ã'Ÿú\¸{ÿ¿‡ÿ8ðåðÕŇÃñþä/Ôê$G¿Î\@ Q8:ZNœ;>¼tÅIáÕ«^^±ò%a.ê˜m¼!ru<'½*0\ÍÖ—³™„–ÀxÓÊW¹:^|ô" ,—«·¾÷ìvávÓÛ~|èX¹:”ÇMG,—«UÿwÖtrõˆP¸@`¨À´r5Ý”|7 !0­|•«G„ÂC¦•«Ù¦äl&¡%0žÀ´rV®Ž½dÓÊÕlý²Û3N¾³ì)Çšïô«'ÿ#êw½ÿªÉ'k·“wZwé5›C?¼cò™:4ƒâÆÛQ  @€Àh™ù¿ÑGу("ÐÕÇ"VÆ @€ @€T`ÓŸüýÄ£ '^À @€ @€ ÐyŽõ}9æ°VܘKW Ð êµÒ ?§$@€ @€ @€ @€ @€ @€ ܵýªw†¤p¯Ãã]qãxNz @€Ž (pìXÀ— @€ @€ @€ @€ @€ÊH‹£(Ú\î¬-šMqc‹‚é( @ \Žåzš @€ @€ @€ @€ @€:(ðÑoÚ¤Èq™À+n\Å% @ Pà˜Ih  @€ @€ @€ @€ @€ @€ÀŠÃSÜxˆ @€Àá ñ™ @€ @€ @€ @€ @€Päø œâÆ‚ß Ã @€@·8v+ÞNK€ @€ @€ @€ @€ @€ t¾ÈQqcÅß0Ó @€ö(plO,„ @€ @€ @€ @€ @€"ÐÙ"GÅ ùÚ˜ ޳'»$@€ @€ @€ @€ @€ @€è\‘£âÆû†Ú.˜¾€ÇéÇÀ @€ @€ @€ @€ @€ @ ¥)rTÜØÒo°c @€j8Vëkv @€ @€ @€ @€ @€è¸@Û‹£(Ú|×ö«ÞÙñ0;> P@@c4C @€ @€ @€ @€ @€ @€@¶9¦ÅéÙòXèK€Èæ³7Z @ ›?ñêyr§&@€ @€ÅÖ]|M\läÿ:ãä;ÿÿƒw @€ @€ @€@gÒBÀÓ/yoˆã¸O;TÜØ™¯®ƒ @€Ê<Á±2Z @€ @€ @€ @€ @€ @€¥my’£âÆ¥qõ‰(& À±˜›Q @€ @€ @€ @€ @€ @ À¬9*n,vƒ @€e8.ƒâ @€ @€ @€ @€ @€¨R`V‹7Vù­07螀ÇîÅ܉  @€ @€ @€ @€ @€ @ ³V䨸±_[ @€-PàØ²€: @€ @€ @€ @€ @€ÌŽÀ¬9*nœï” @€YPà8KѲW @€ @€ @€ @€ @€h@Ó‹7¶î+ç@ @ 1  !@€ @€ @€ @€ @€ @€® 4µÈQqcW¿‘ÎM€êPàX³U @€ @€ @€ @€ @€ @€ÀP¦9*n.7  @€8–€h  @€ @€ @€ @€ @€ P†@SŠ7–Ms @€£8ŽrŸ @€ @€ @€ @€ @€Ô(0í"GÅ5ÛR @ ã ;þp| @€ @€ @€ @€ @€hžÀ´Š76ï»`G @ Í Û]g#@€ @€ @€ @€ @€ @€™¨»ÈQqãÌ~Ulœ̬€Ç™  @€ @€ @€ @€ @€ @€@Ûê*rTÜØöo’ó @€f (plf\ìŠ @€ @€ @€ @€ @€<-Pu‘£âF_4 @`ZóÓZغ @€ @€ @€ @€ @€ Ð.—¿8z ‡ö†=<íÚÉÍeÒ"ÇÓ/yoˆãøËv(xQqcA8à @€R¢Rf1  @€ @€èˆÀº‹¯þ'3JvHÿ ÈŸ¾µäI§4Ý[ÿ4„O}Þÿ5‘ñ‹m&ѾVlÛÓìDb›I´¯ÛöÅ4;‘ØfíkŶ}1ÍN$¶™DûZ±m_L³‰m&ѾVlÛÓìDb›I´¯™ØÆaStâ›7·/õ()rÜTV‘£âÆúâf% @`yÞò—]%@€ @€ @€ @€ @€ @€š&>É1-Lœt_Š'4ž(C@cŠæ @€ @€ @€ @€ @€ @€5 LZ䨸±¦@Y†) Àq$‘ @€ @€ @€ @€ @€ @ YE‹76+ŽvC€º. À±ëßç'@€ @€ @€ @€ @€ @€™È[䨸q&ÃlÓ @ Õ [^‡#@€ @€ @€ @€ @€ @€6 Œ[䨸±Íßg#@€³+ Àqvcgç @€ @€ @€ @€ @€ @ âÆÃE|&@€š$0ß¤ÍØ  @€ @€ °TࡽaOˆÃµK¯Îæ§ä,?ìü;gs÷åïZlË7mÊŒbÛ”H”¿±-ß´)3ŠmS"Qþ>Ķ|Ó¦Ì(¶M‰DùûÛòM›2£Ø6%åïClË7mÊŒbÛ”H”¿±-ß´)3ÎNlãÝM1kã>Ò"ÇÓ/yoˆãxSz>ÅmŒ²3 @€v (plW<† @€ @€Z&ðÀÃÑžèÄ«6·áXë.ºf}ˆ8f±ÛL¢}­Ø¶/¦Ù‰Ä6“h_+¶í‹iv"±Í$Ú׊mûbšHl3‰öµbÛ¾˜f'ÛL¢}­Ø¶/¦Ù‰Ä6“h_ۦض/:õž(+rLV’÷›ê]Ýj @€| óyéM€ @€ @€ @€ @€ @€-96zƒ6G€xF G‚ @€ @€ @€ @€ @€ @€ P·€ǺŭG€ @€ @€ @€ @€ @€ @€޾ @€ @€ @€ @€ @€ @€ @€@í k'·  @€ @€ @€ @€ @€ @€  ÀÑw€ @€ @€ @€ @€ @€ @€¨]@cíä$@€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ }m´*ØIDAT@€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€À8ÿ ¿EtÃéIEND®B`‚rocksdb-6.11.4/docs/static/images/data-block-hash-index/000077500000000000000000000000001370372246700227605ustar00rootroot00000000000000rocksdb-6.11.4/docs/static/images/data-block-hash-index/block-format-binary-seek.png000066400000000000000000002064341370372246700302660ustar00rootroot00000000000000‰PNG  IHDRÒ&>ÁçsRGB®Îé pHYsgŸÒR@IDATxìÝ `eýÿñç™Ý¤÷!Pî³Dñ*¨Eh›zl~*m“ (‚ *"JUNAPQT’–‚€ØM[Mz@Q¬¢hQPDK)G/Ú$»óü?³Él6iîn’=ÞéÜÏ<Ïkfg’ý>Ï3Æ0 € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € u›õI@@@@@@nŽ:ꨒÝö?∈µG[ã½³¡¾ö¬n6Í©ÅùšïœB$3ä‘@4òJV@@@@@@ò\`÷ýŽ8.‰Ü÷M£¼¤çk¾óür!û ›€7lGæÀ € € € € € € €ä ô<)d @@@@@@@`ø¤Ÿ=GF@@@@@@ÈAé9xRÈ € € € € € € €Àð H>{ŽŒ € € € € € € ƒÒsð¤%@@@@@@@á >|ö@@@@@@@ ¤çàI!K € € € € € € €Ã'@ }øì92 € € € € € € €@ Ds0Od @@@@@(X“f}bÒHÉÁÎF¶Æëœ÷ŒoÌÓÍæ§oØ‘‚Ÿ0óŒGy¥Ç˜ˆÝKiOpIó˜õì?ãuÏ(]·#iûvú;š¿BÜóB<«”)¤çÃY" € € € € € ÷3æT¿ß³ö kÍqAalªDžÑ¼ ºŽš‰¦"V½NñîÅ[-?ýÕò[_MmÒ‡ÊçÌŸf¬w¾R=^éµÅ”v¤uçŠXÍf¥ûÃW̆KïÇ·ö!É› vúÖÍLEeõgì©í«ÝÕKê~Ú>Ÿ›SÍ÷Žš—Ï©¹NÖ‡R*¾ûgc}ÝÇ"T>§ú»Æ³ öµÎÿUC|áç’û otížogŒü"€ € € € € €@¾ ØòXõíÏþ6 ¢w[kR@ü†1ÑÒÿ–ÏšûÿºÝ®mŴؼIå•Õ Ö‹¬²Ö~¤=ˆÞiOkÆ*bÿ…IvâßËæTOí´¶ÛÙÁN¿ÛwZ£ƽTuŽ~Œïò%ˆÞß|gËÜYó»ÐK½|¬lö܃:±ö:;õÔ¹Œ?+LÇ8{_¯;±"@ ½@N$Å@@@@@@ÜP‹Þ¯+Èn ìœ{Z?7ß¿X­Ä¿®é;ÔçúŸ3of” )±õå‡3æ·›œ>«j¿ù½5¶<½Ò¹Mƹß9ã¾£t/Ñò;•î¿Óë™b=³º/AÕÁN?#O=Nª¢€œì¥í¹¯«uõ×Úçssj ùΦù‹O=¸B×ÀK¡ŽµÑ¹át_Ç㢑ÿÓõ52Ø^×ÔË/<ýׯ¾îËvä»]»çû$ÿ € € € € € ³³ªŽUËà  œoÎm´n4ñx2\Ž­˜?î-##§yÎ}R­Çÿµzõê¦p]çñÔ©S£Ñˆ·JËn[ç+íë[^¸¤‹ýlyeÕçŒó®T‹õA`ÔFJ~¬ýZ»ýæ;ý.Ùå"åûBå÷òp¥sþ¥ñ…ßçsu<|gÛ|ݺu-{Ly×ÍjMþ唓µgjüUýø}vóìÜp[u«Kf8ÏB Ez¡Ÿaʇ € € € € €Ã'5'«5ºb™Š^:óýÆúÚº ¢ëïkX´iÅ’Ú5ÄëŽÜèÞ¨ –u7Œ4å3zÉúÛSëÕ =黩Jû ]уM\ã’…×[çÒ­âµìøòXUæ|*©ðŸÁN?×ïšp› …}~Ñ–ïÁ0O67/”›:96ÿ­ªÄðžôvÉ–ºô4ô"8É@@@@@†GÀ9{Lxdϸ†pº·ñÚx|swÛ”—lÏØázEIÏ_Y_÷»p¾»qC}Ý2…T“±þ#ÓéÉÁN?} &*bÕg+ˆûÝô&ê¿1^—n™ž^žcÍ÷`™¯þù-OªǯÒLÖ›Ÿžîe"j#A öÔ\7+–.Î|E@¸Š1+@ ½`O-C@@@@@áPsr˜uKžj™ÎxþuÑ^îï'í÷Â龌}—x<ÜNZ†ïÜìôÃcw7.›S}†*ܤŸ¶îðÝ—Ô^ÝÝö¹²|Gò=ØæO¹7~®ëð•ÀJª#JwZonjY‚6Þ;ØNø×7¯oYÚÛ>¬G Ð¤Ú¥< € € € € € ;ÎÞfÆYóéò9ÕŸ ç2öFŽú œ‘Ô¾ÎlÝòjKfWí½&™ð[ž7RPuô 3ÏèÐ:y°ÓÝÕXïDÿ˜gÍ"­k_ùîÂñºô;Ò»Ú'–íh¾Ûü‘x¼Ù8{Khe­›Nw;¶æÌöuî¶µkokŸg âˆG1)% € € € € € 0ôþÖ-·EFþŒ‚ßoMµ²¶æúŠXÍGŸ¼æ¿Ï<Ô°nݺ–~åÊzhßÞ=;v—’Ù•Õj¤îôc­úãÖØk×´þ·&XçyšÖØÚ·´ïoLòõg7dΛÁN¿ÃÁÚgÊgÏŸ­`ï­jŸª$à|w~c}ÝwÚ·ÈÍ©¬ä{Ì“ISõÌ…)Ekœ«yçªxíߺR6í´ñºTf¶ö  kÄ%èÖ½+(–¼ô‚?Å@@@@@†K`åÊ;^¯ˆU•©ì?*˜¾s*Öc#‘ú½&ùÆž¹BAð%/>ó·»úTWkâ}Ò½±+8¯Ü·óAOè­ÏÖqkÏèê]ÿ+‚QzœÊ„ºìvk×&ÚfS£ÁN?óXáôŒØü 5²ÿ©æSq+½÷ý<ÑûÕe}˜ÖP޳•ï¡0_µ¼öq½Ã}­®‹©QÄ¥Z¥Ÿ×•Wt쨘.™ÑÁ:]#¬Š/þ{WÛ± B k÷B?Ô@@@@@†U !¾ð‰­Mþ;ÔÊú6&õÊé¶ÁÚ‰Ö³Ÿð"^Þ“|©,VsÉ3Oß5\ÝÕXï5ŸÔÕò.K¿¿=ܰӓ;3"b#õš/i]æno\’ûAtUŒÈZ¾‡ÎÜ¥[–+P~Ú¡±Xiú³æàN›æÔl6ò=”æ«ã7ÿS]éß×fñ\I:p®ŠÔòýSëœùûª¥7ÿ)5Í?©ô"=ñ@@@@@rK`åòºG1 Ò¹²æ=é鶉„µ¿ —-·ÇLš<=œÏÆx°Óï)+WÞñz"iN6Æ=l§æÑ“"Q·¦S«éž’–u;šï¡6w¾¹) åÙ¹á´^žVïé–ëázÆ›ôb;ã”@@@@@rV ©9¹&9kÑt‡XΪxíCÆ™†Ûè}êßÖûÖKÃùvú½åoõ²ºç“-椰›{Ó÷ó̈5'Ūvêmßá\¿#ùjóW톸z>x£ÕË\›܉'ž>FÖ³ƒej±¾Íߺå¶áôäØä‚@‡›o.dˆ< € € € € € €Å*PZêí‘.»3ÿÒ´ŸžoPca÷Ùp™‚Ÿ`&œÎga<Øé÷šÅÖ–ù~™2òfjckÞ>Âx«‚`o¯;ã;ï!5OuGol:Pn]d^éø³5c>µL_´²FJ@NHωÓ@&@@@@@@ › ,ð\óÃ]/¿¦öW]{ÓIÙL»?i•Ïž{È´i§ïë>Öy ·UÀüþp:sܯ»W-¶.SïߨˆUŸÎ÷e\;cßî¶ìô»;næòñE÷«iôÿ)œH-·æ˜F.ËfëûÌãekz ùjsÏwí]·[Si=“¾~|ßÔe˃tÈgh>gž¼#€ € € € € €@q |ö³ß±Û~Þî%vÄIçïîYo½W|µÌÞÝ·‡³f`w—ÊnZÖ±æÊ+oÜéË_þôз²”\V26zBEeÕ÷ÝÖ7¿ÛØøÓõÝœ1¯¼²ê<µ>?\ï×Nw;ÓüyëFÌZ[cGj|cyeÍ þ–-5=´&öÊb5§¨•å… Ò-ß'wN7œìôÃãô4n¬¯]1£²ê“ã- ¶S…'» ·>bÌÇ5Û¹¥~OI éºæ{(Í–Ö=\«ù“®›cÚ®¡÷Hª¸ðïõuk‡Œƒ!£º3 €Ý ”Íž{±Ñ÷ê«£µU¤¡¾.]+­û½ { &¹q~ËfÎ?”DŽÖGGë·»çã ¿‘9# € € € €;* Öä#GŽÛý­ÎD‚w„¿ÍóÌ!ÎÙCô]Ð>Цöõ]ÙŠ š×yQAã“Æ?ó+çòÅÍ[öZOO1Ö+Ïã‚ý”!uUîîwÆ>jÿ¨¦Ÿ0ž·³Vd­w†6™¦ï|s½²„ó]+æTÏrž¹=H7p®ÙYû¥ùý<¨7¬6Î*}{*ª iºëx·uó¤ûf°ÒŸ1kþñ‘häÞ Ë:7Û—Ô ³ßÕ¸bNÕ—åtE¸N­ñ¨ÜŸç‡j<ù,ó®ŒTIa~XI!\¯ÏÌEKj¯ç#PÌ´H/æ³OÙ胀ç•,Õ/V‡§6uæ}Ø¥à7Á$7N±Fþ ?‚FoìÑ?É\‘ @@@@þ œÝu£&™±G«1ÏûñÞ£o{Þ®`Þ~ üz™­ÕYCðkÑú—5ñ¢È ’û/:ß¾LûšŽXï¥×ôâ³E_ºé¦O¶{ ×°O˘=]‰Y¯\§é*Cð}Ö š?Á¨/íôÐZ¸ô¬Å+í†/¤t3¡†OËÊcó?¢ w*‰½R›Y[ªôߥà}ðÓºgÛ(ŦåÜ&ìÓ] y3Øé‡YémÜP¿ðÊòÊê=tŽSï†×µqvùœêõõu_ëmßá\?|¥yËÍ?ó&޼^—Gøê»Í-N3Ž@. HÏ¥³A^@@@@@(pu±þW=6œ[Ï{¿Zi¥xoi{±. ‚åÎ=®e:ëþ¥Àù£¾Zp'|ûÌ‚ ?ùª–kQîwý|ñ3Êå ú–«ZÀyš>¾Ç\;ówy\¬ñŠ·ËXÙ_ôûòòòƒÍ¨Ý>/”ÏHo·ŒÕÛO:÷˜ø¾ÿj“»å¾†E›¶ß ã’ÁN¿ãѺŸS«õÏ)˜¾›ÊWl%ϯj~½–¯û½†Í@ò=TækÖüdKEeu£4OKI©GCC]PI…$ÖAÊ[Œ©§Î8¶$z’ª'«8ûë'xÊîªe5FÑ×õÐxM…{ÙoìÖ4ßß¿õY-c@ /**ªwóKÍÛúšYß%·FŒÛÐìî^v[ðÀÛ¡÷Äè)ëN‘n‘Þ¯=¶¯y)ÔírÕäñ؄‘n‘¡{S³ÿø= ‹þÎÚ¸l:X› ¦¹­¨¬ù·2žz¥€^'P¼ß}° Bºä›@Þ¶H¯ˆU««s‘.P EåÈ|æ¶M[³«¦vÕIÑ;T¼¶v’2Â(ö'=4îx³¥ùö_-¿5¨¹Æ€@î Œ4ñúÑmµ|4Œ0%F¿@l±Î<¨Ù?&îVý‚ôÔJþ)HÒäø#¼¨ýuX¸##gkúGá›k×®-¸^ Âò1F` yHW+ô uå ¢G:VïJyÊ:»ÖxþU“íeßz ë«§®Qœ™¢šmSÂ.€ÓûZ3VÝÈ\ìFŒº[Ë~›^Î* Àú±ªˆò@yeÕ—,¼¾@‹I±@@@@@`®¼þÆÉž+] .ÄOSè\m¯äsè;úÛÛÌϾò•šàU” dU qéŸ+Áà‡¡Ó¦6Þ9«×-´î”0ÉÅýØM( | ¤{åsj®ÕC÷ü̳¢‡ñ½)ä»~Òܶry]o5‚ly¬z½wäSž3ŸTZRi9÷üªe —™.Ó„•Õ·¨¶di0¯wƒÜž ïQ>ÎmjItûKÁˆ¨ã›’ý"žÛO5<÷Óµ^© #†eÒCQŸ{ﺲʚ­+–ÔÒíwâp_OúÉ5;RtÝ_TW¡ØrñyYìç„ò#€ € €C) è{ëÅo_Õ1çéûȽç¼Yßß×*H÷/_PóÄPæ…c!€@ߢcFÍO7@uâ‹ÿÞ·=Ù âÈ‹@zyeÍ ªóéÌÓ✿¸9ÙtñÝËn{1syÓ®1^÷´ÖéØŠù—í\ê¥úpçiþNý讨W9sš~ñKõ€à¬ÿÇœ°°þ«k~qós½äå_éõ±Ø×Êü‰õ<÷5U 88\n»~ú¬ªÕªHòŸpc(.Õ|eýÍ(®RSÚAÈÅçå ”D@@@2®¸âû;{£F\¢eêMÖŽÐXÝ·»›~Ó×/¹ð¾wÌÄb\ˆÅ‚¸Ç¹a–œ±ß §#€@»@ÎÒõNçu ¢ûªÉv~c|á€?Ô÷5, Þ½òí£Ž:ê{ãö9|L;S´ L:5Úá5Ökj_›GSñxr…1·Ÿ8óô{FDG>¨f{¹W7ï##QûEM~&JCV@rL `ž—9æJv@@@\¸êÚg"ÞOõ=ãÞÊ«âçæNßù—^üù³Ïõ¼“?Š]`†³ž= pÐg÷…§Ìw» åG +œ¤—Ïž{ˆÞ{~“"~éAAôO4.©ûizÁL¬[·®Å¬[÷Æ$Á®,PZºOPƒ²}ð]~ÒÛJ°fùOþW›3.òÛÖîÝS+ÞÓ^@¦@è¿@¡=/û/À € € €@Ñ Ø«®«ý’JýMÑ£êîõ÷É„ûÌW¾PópÑIP`òP`ZlÞ¤ˆñ¾›Îºõ¿óÈ’xszž H xé©\œð¢×(ˆ>6Ìš‚è7d+ˆ¦Éîì„Òt½8¯éA9WÄݯ.ÝJ—Ù™CS- Ó ˜@@ …ø¼ìŸ[#€ € €Å#°àÚïrÕõu+­µWê'¢fèWmÛðüñы砤ù#P>§úsÓcsÏÌqY쌽¢&ºL±·]ƒåú ¿”Ø´í¦Ìm˜Fvœm‘^6sþz—…YUýO¹ †óÅ0ž6í´ñ%£Gíß°´®«š|ÞŒÙó÷÷¬90áì+æÍmO®^}ûÆÞ\#ÍîŽËŒ-Ý»ÄDöò¿9¹¹é‘¾ä±»ô‚å½äߨ»þ’=ö?|Ï‹îŸ4æ5oÛË666¾ÙSšÝ­ó›ÜH3²}­zGØÖ>—ÇSÎ>¨‡äQA Ô2}Ę·L9ʵÿªûÑ;MÙËXwëy»ç¿nœ}éoÓóÄ·Ö[p}ì¶ß;÷Œ»‡³f'ãû/$·5=½£×åPÙ æq†ò³Õ×r¼75®yì.›J7¯¿?ßÚ×ýºÝ`ܳš—bÙo(î½\Ûƒñ,µÓN=sr´ÄÛÛ÷£#}/ñ\tÛúgúó,š‹çOÜÏ7foç™—Ýæç]½zuÖ*s[1ÜN%þ^¾óöÐó xÖS[^}êÙµk×&ríìór ìƒ € €°@ª+wÏ»SÈî¥èÛzç»3.ºð¬Õ\dІ@Þ ­ÎõY½6jK"å±ê§ xLñ€}vÑ÷Aã‚‚©K÷„Þ¥\É÷ãy{šÉøäl Ý–xw,¿ýæ`Ù:Ϙ©SçŽ?©äªpyÒ$ï]_ÔÎ÷u<#6¿"b" ·ßøJËEk×.î6([VYsµu¦B·±·êæv¿ö;6Ü÷„™gì<:2â+Æsg©ÛœÔûÝK‚•ãF›ŠXõßT{èGëíÆ[;#Í0O™ã3>þ;zÔÖxstS~ŸÖ¥z=ðô=½§<ê†ýå[A[÷݆xÝ=™ûö4ÝSþƒýÊæTOU‹ñ3•ölE‡S 2àFîæ**kž2¾¹·ÉúÝ_øZOÇ ÖÆ£¼’÷yÖžÔq[;£,V³ÇeçÜ›[n^¹òŽ×;.͵9ÿ‰¶Ó’ʘØ]† ‡^ÅœªÙÎØ™º.¦éMLçÁ¶vŒ1ÙLØ2¹²æý¾üŧÿúÓÔ«²± P7f—)s«j¿ˆg¯4Ö}ÔèH¦ÎçJ] é<Ý ­¸äçTAæU™Tкó†½-½>«÷¬ `>9V½QAÔË‚#÷ŠÉfü–GŒùFúˆ½MX/èBéø¶Í|ßùïïm—|[?”÷ƒî®íl?KOšõ‰=J#£®Ö¥xšÎG× kKu¥~ÜEJ*ËçT×¹‚DyeM¥ž÷×h›}»º¤õ¡ÐïköP¥y…™XáÏž{úŠ¥‹ÿÝÛù*ÛeJ•µÞg”îá­ië“ÔÕŽŠ¬kñþ*AMÔ•|Lû«6½’øvWÿŠåyÙË@@@b¸êÚ›¦é¯Æ[Töˆ¾¹fÛÆ.^°`Á€z5+F?ÊŒÀpèËÅÇ=ëVè»oÅNÔ=spæŸjŒþéñ›“¹˜iØ^ ë/|·ßnH—”úîx07ë\Ý@Z_ i¦ù`m_ü7êÛoÑ»¬1®ŽßüJ÷[´¯ÉVšÓ¦M¡Öðõ:a7éøÛÑ,l?jû”âegŽ0Þ£êwJûÒ¾O)HúÙRQ ³sËñ®ÓÐC]‰ÛŸ+˜~JW[(J±·‚‡tDïj‡.–ù¥›{í^¿‹Ý†x‘;6}@çÞ¸{Ùm/¦ç³;á•WV_k½È*…m뜴*È(†ì¶t^Ìëh#‘zõpƒfÓ÷‚®¶ífYpì况{c:ˆÞ͆Û-^Ö›«Æ l·n̘3ï}Š…é—ö º~™™Ó]ÅŽ8ÔïšíÏVOR¥…Ó#ï1}6?ÖÏÏÞþºP‚À_¿‡Áºgmz剫U÷éÁ0Cºö¾2cfuk%Žpa7ãòXÍ\­ ž}©AŸï­¬¿ùá|Œ‡ó~æËÖs/L0¨SùW}®Oײ^ïUÚ.ªWZü`FeÕü   wÝ«ôìü™Öí¦ÛãØšc¬ýcЛBOÛ=µŒÛõÀ‡u¼éw‡Ã{Úv»uÖŒÕçñ²q»D¯ÞnÏó²«Ò³ @@(.«¾}ÓûLÄÖë» wõ—.¨ù"Aôâº(m~ ¬\¶è×ñºòW·%wN8s¤ºpŸå'ÝŒ–ææⵇ7DÏÏK®‡\@ß…æàày'dæÊùö×™óÅ6];c/ÏØåú"|TkÙÝd¹^.ñ“ö¿‘HrŠ3‘£ÙJ3芶ÄN\©c¶€RñQs«õܯ“-fÝÊèÆGOrã'”ÿpç"SܼH¦‘©|ª•i´¤äÇšþp_òl£²GÕzï <|:½ssÖÝn|÷p2yÆxÉñшwˆº?NÛÕh»t€C»o+vWç÷ÍêA²!â›Ej;^Á° Ê£ÞãmÞCÅz@Ó›Ãù®Æ¿ŒÇ7uµwËœ³«l2ñŸ„ïoÐ=b?gíÁÖ7kÝ4í£;6 æ=+x¿ôôØþs£&ú]OêÑÄêsµÊiв¼Ûë·õ}EîZ]u©BÉà ³íå¯ ¬„9¸×0Þ25²õÜ ÓÔýä0=Ž~©³–ºg¤Î›1õÆù¿Õ³ù9?ª7¤ûöm.¢ç³3ŸÓém{†ëäì5峿ýÍØè"íD˜¦nÑýávmû°5îY“4»éÞø6=·tl¯4¦ûäN‘Ñc.Õ~ŸKïÛiBŸ“J݃;TäÐgüeë[Ýwü»ZœyÂxþó6}‹~På%Û/T~Òâ”ÆgfĪo]¯[—™|1»¦ÞEÜúì°`NP˹3VÔ×ÝÌ÷6hû£µMð„¦þäÿÜñ…A»ó\7·•ſߢ.ñ¦#íÓ¶Á”èØ½Î×ôU™;¬\^÷¨æ«Âem-Î?qfãÒÅÿJÏçÙDÙ칩cº[{Eõ^ßÔ’¸z0ŠQ¦wE+“D÷ºLñ¯ëx~æ1ßÔüÃÁχc±ïvêtQóÔ ó=K-6¿Ü¸¤îŠpYOcuc|™ŽÝ!ˆ® åÖ¦ MŸ^³æ'[À¿ª´þ¦×ÖÅø:æ™É{Âå;2V°ì]ºÖõ0©ÏfD7.9seý¢ °žSÃ`}¶º*dEeÕ™ZÞ9ˆ~Ù¶åã?]ßiŸ'5oÛ² €ÿ˜zE†=¸Óv½Îö=+x@EeõeÊÈ7‚ÌèÜ«®¼ÏîÜ•wfFxÿ¶6Ü9X¦`¬K&]ÕªÖÏEæfy;=œ÷ƒL´l=÷Â4uŸ¯óÔØh|ÿ‹:ÇA×üší0üSsKUYⶨ‰ÔëzH¶õYÛIö¿hwýŸžI$ü3W-[øÛ¶ùpô„&îÓO]YeõgT©î{šn­fͧ§Ï¬¹qÕòÚàw€íÝt±öì`….«”ÇoéuË5Ûá¬ù › UzµNí«ìÆÄô£Ü©oç‚ùà™›.[¡?/ƒ¢3 € € €Å.påõ7N6λ[˾Eßg-]÷Ç5Ÿ*vÊ P|­_ÆæV¹ÕKï´nÔ*ôÁ •_8_lc}kýN}‘=3UnÑêkÏêDO“N}yßx¶Ò Þ?­@‡Vá‰ÍoÞ9ˆžÎ`ÛDãÒÚGZœ?G¿€¥ÏkÐJ\«û|=û:ßœ«.HÞÛM=}ØñE÷«õÜÒ ‚ k[ƒvšqVï_μhSÆ\>MÚà½Ä ¢ß¥ó•ji¨ *Û|—œ;ïæ®ˆU¬×1¤k§*€óŠ¢8'7Æk/ZçNGµèßЯ‹iûOë§½uøFù¬¹ÿ¯ÃÆ]ÌÇÖ¹ýbz•3›óÏhXRwfAôôfá„‚XÿQWëçmÛ°m¯lØTÌ®~‡‰DîQž&ÇÜ­ñOmÌÁ zh0Ÿ­3«sλ1<¦Æº.Ü×UÑâä.‚蛥&ýõµ·7¸ o<;¯ìi~¨îYÿ÷Ä•º†Ó44½*x§tWy›1{þ‡Uy,è<5èZ¿±‹`j¸:ïÆÃy?ÈÄÊÖs/3ÍÔ´3+ w¨‚è?Ö¼Óõ W®üShRAíp ]© ºóÝm-›Þ<¢·ó¾bIÝtÍׇûk\âEÝ)ó&››Ýouÿ}IôJ}¶ŽQ}©6èñü±Ñ½1?µ_˜šµïV%£}ÃÙ.Ç…ó¼ì²x,D@@ŠMàòËkwóLÉý麇þÚýÕ O¿yZ<O›åE@ ÏË¡¢Rk¨·(è §î|ÿNãX_³n+÷/7®ÿw{Ðz0²”¦*#ºöÝzz2áNï.Èß9»wÅ=¦ˆCÐÊ-5^\&‡ó=ŽUm‹éõµÁû³» \d¦±"^·Dˆ„Ëè=(œîn¬c”f®s¦¹9s>—§ƒwç–ÍœDYeMºFÿ{ê=åm]º ìuß7'®Œ/j„2Xg½«ÂQ¾ÏPp¼_­»µýȹ2¿àž‰~-=ßõ„böêÌc«¬ŸmŒ/üI×›w¿´/A÷î÷n]£žU—Ì¿T~v –¤‚¾¾JC|á]½í;lë‡è³)±×f܇Úó/Qe‡*wA¾.ÁoN½÷}²{VP©ÉOøó”µ–TöÔ£H4j2+¤O:w¤çy? ‹ kþém›¾ÎÀx8ïø2®·¬=KUYâ/ªÌU¶zYÝóÖÍLðÞ)UTù}æjUòú¼*ïôùÙ™t‰ ·ƒô ruÛ+Ã= ‹þûZ“pCýÂxz‡>L¬Ç7«ÛúÇñ»êíˆ0I]gkÈPàØ6½|µÛYú¥}Ï =]oe:g‡]g‡égŽgÄæ—k›w¤—9÷7µÂ¼5=?„'Çæ¿UM~¥ÈmëýÌ™­¾ïW¬\ºè—C˜jÐ?[­= ´¿ëYÁÈç7®O^?ÌögŸ¡¾g­X¾è!uñ~…®ÜKƒ|êZ(/¯¬©l\R»$Ì÷¸]¢_Õ5› P*ˆ®Û¨©ÎF%Ž0ý¾íÛgÌš|ß¶mßÊ‹8? ·/Ù~j8ïÛçFT/Ù|–ÚT®ºWy;.ÌŸõý§Â龌ƒûŸ®£õºÏ¦zÑ“­ÇJamïÀêKÒ¶Ñ+ZþäEÒugmÐSЯ:l”1“»ÏËŒL2‰ € € Ð'+¯¯«Ö†'닊W·µ4W,Xð™Í}Ú‘@(@œk‘^bL‡@ºZ¤} ]]Aÿ 1žÝÀ玦1žœé¡E­Þ~žëãÄÖD"x'kû`Û‚ÜíK²<å:“‘ƒ{:@çv‰„7 IO‡Ð:ÅÅ÷RPîm]ý(\wp‡ zÛT{ôQ—LLS„/fûZêPˆˆŸ1ï'MË…óýš Þ®–ó* Dm¤²»D<ë¿ä§½88vZ,§÷Ý‘ õ¬p`©õîÕùÙ-HGÁ»7}ç—çA}ŠÝÏ–=7ó@êEà«k×.Þ–¹l0¦‡ãžõ“½\t>Ô^÷½ ·ˆ`>èÞ^¯´øBzµ7©âGdÒAü¯D¢‘{ûûcl¤÷ 2Ãx?è qGŸ{]¥ÙïeήÃ>;ªÃ|fÔ£J: k{ï]¥In·I“Ÿx:s¡ßHÏ\Ôa:WŸ—2É  € € €@¯W]õã}UyûÛ© ΂/~¦ÛFH½&Æ €€@ÎÒmR½3g jE¬ØzÎlP3è˳*°£iÆbA3µ†yRCÊûÒêíWËo}U'ûõ05Ù Z¼ Ú séàCpˆçÆôx0g:¾#ݼ–#-Ò{Ìu—+SA÷hɱê•Ï©þÆ´i§ïrÃXxÔQG•èÓ;5LBÁû•ݵ·ém¼¢¾îµ°1½ó>”žÎ˜˜:uj4óØŠ^¯[Q¿°ÛÖ“»fuRÝè Š †¶¶¢OѓɲáÈKV ÖKbýølYõ ñ‘ŒäžlŒ×Þš1?8“ÃtÏZ·n]K"éÏÓu T®ðÆŒþµ^ÔÞ¤qêù¦õϾ¶-ÙT6Îóa8ï]Òíès¯ËDû¿Ðw^‡ç¾œž§ý2žeδ÷>ÒÏTzÚ<õ|NµàoÝÊ3n÷ž¶×}º`ž—=–“• € € €@¡ ”FnÔߪãõ]ÅÒ‹.8ëÎB/.åC@ 7œëÚ½9âÖ—fæÚs;gÎáô#wǾ–årïPšåÉ1ï2Q3!#Owª¦¾òŠÝ´i“Ý6y²Ýwóf»uën¶iһ˶m¶©i¢mž°Í¶4³ã[šlËØf›h£Fl.¡Àc*)uí¾%#ͬOªUð–ˆ^µÝ¡Ã¥¸zõêœh‘®€Eƒ‚Ôÿì¶Ö¾E®»+h·»¶Û]-N÷Ѷ­•f¬}«¸¿;ê4u±ýÑÆe‹ÿÜm:ý\±×~ï<ºCkxßtÙ{?“õ}|Dûìì§ŠG¿7u<¾53ñ;M9&óØ*w6Žyˆ^§Ëcgì+ë Eqà­¼:]ÏÉ+—õÜýu¯ çÁ}ýlUÌ®>\1äTKý”‘Þ1­ñ ÷0œ÷¬ÕKþU•W®ÔõùÕ Ìú\Î+ÕDõ9|o0 êyá¬TFjÝ;7ÿÎûA7";ôÜë&Í~/ö¼¤î ýzmw Ý[6ë:Úny6¤*Bd|6õ|î퀹ù¼Ì& i!€ € €.pÅu7}PýÍÐ÷oýmož]àÅ¥x €ôI çé-‰ÄúÒ’ö/™­³ÅHïÓ‰Ò"‘C2§àá•fâ•f׉fü®mkÆM4%©vrQǵ#ƒýüoFŽV3ÌÑÆtøÚ]Ë[Ÿ™î°O{Zص9ö<¥2áÿLï¿£¯y™6«zïˆgçª ày:W“ƒýRãhÉ}esjæ­¨¯½½¯iõ´ïÙ)­Ñú¶­¬ûWOÛ÷yïÓ z?l¯_棻¸ AËȧ;ì5fΫuzÇ×d®„鲨{Y;"¢ïŸJÞ™Íɤ›±jÙÍ¿„Ãåm’êúð׈± Ia†ùžõ”ÝpÙ7ñTÅ>U‘ uŸž.·o­\Z{wz~ˆ'ô „/&Mreë%"=Þ‡õ~ÐßÂéöÓ¦MaGïqxÄÚ£Œç©'ÜŠ–ïTÀÉ.A­>Óäèó²ÏùgC@@@±Þ•)ç_sñÅç¾  €µ+αA݉¾^QY´PLÅ[œ5ûåX‹>;Îx“úþízß¹t‡¼qϹóÒ¡~r£5zÏîríêeuÏkÅeú¹¼bNÕE ˜\Ѷa‰õÌ'ÍúĽw/»­½ûô.Sé}¡õœªQ´_вe%Hªwh?Öžjª¥÷$å¦C ]ÇêtMQ€V Ží™Ò_irJJIAtg’ÓV-[ôûÔ<ÿ¤¬çëi¥«›°!©ð0Ü÷¬GâñæýcÕóœ *B?:•Á2ýè·°Ö±Ö[œc5NÕ–ém5m›ÿÔå‡k¡óGèMÆ­GÏxWìpe' Çu õ ¯¬¨¬Ö»lí¥Az:‹ãGDG}W“•;š~ç`åÖDË“;šf°¿µþ¿3ƒ¯:#;uN·ó±Ö>Õy›A›·nO]Ï£ÓŸë^× ž´ãåqÂÏ“õ[†$ž ÷¬•ñºu±šu:}Ç„§PϹ?ürIa>Û:Ÿë¡¼„¾ŒÛŽ­˜?nç‘Þ糟W]–±ík2¦œ{^êêÁð„îiOzÖ\¥µ%[t?YxÏËîËÊ@@@ ôÅ7ÚŠõÍk¿ð½–Œ@\ ¤+[î× Ÿ¥éšñFúŽÕx•~rC ™™ ç'Nm\º8;Ýxg&<ÜÓÎ*žÎDÞ¶HO— m¢aIÝ‚òXõLUdxG°H•æLÅÆ®Ç7wÞ¶?óêN}[†—zñ/UëcójÒèjÛ¤3#íçA¯7›:og}פîßÓ‹#.{HU yÈY¹îY×´Ðî£^Ž×œ«zÿÝñ…¯ I&òä :OН¶Ÿ'ñT©cH†a¿g•ǪN×ç#Do+u¬lvõôKë îù6œ÷ƒ!¹¢òè ÓgÏw4âýB÷¨=3ïѺ÷ÿGóFãûk|/ùçUñÅ/eK?.Óö} ¤æøÀ£ @IDATó2Óƒi@@@ P®¼¾öCú›ñú;ñÅm_¨-ÔrR.@"ÐÖÜv »Þ>jÅ|ofꊻLÏœgz˜œéð.sçEæ Òá3º¼5¦i2,É*Èõ»ðÀ ¨ÛÑþØCÃùŽÕñ@‡w'yž{ë@ÓÊÜOïw+Õ¤û6ù¿Ìõ©i¯ã5©îë;ì³ÝöY^иdáµÆù×¥“µæí#Œ·êÄO“^Æ„ÃÚ+œó–a¾g}¤b¾˜^ÐóCëàÚ_a={SÐëJ¸ªPÆÃz?(Ä,”cƬùÇG¼ÈoRAô¶ôôÅȽªAubc¼nÿñÚÏ®Xº°±s½ÿ‡.Üçeÿ-Ø@@È/ÏØóZsìn\°`AÁ4&ʯ³@n@rU 'évÛ–µú’·½¡µgN=uîÄ\E,º|Y×!©Š…H÷\FkY[P¿D&ëðþîˆõßÑëØY¯Ãu¡ÖÆY ¤+Ô!ä›-/wΫïw¼&;ïÓyûÁ˜oˆ/¼PíûoO§­ÖÇ#&Œ\vh,ÖþâôÊâœð¬ß!®nÉa¾géÕªþ[ReuæI—<^móS•sd°×(;¡àÞ=6œ÷ƒ!¹¦òà êc§HÔ»MרèÖkÏ%ïJôâu÷dµü¼Ìª‰!€ € €9&ðÍoýxŠ*\—é;­&Û¶çXöÈ €Ã.“ôÆÆŸ®W«ôú´Ž5cÇ•–|2=?LzxNz 5‡Ÿ°Í<¦º†-Ì@ºÉhagÕux º”#ŠcÍ›æ0“hn^×q·ì´HW¥štërgÜ?V¯¾}cÇ㣮ßÿÒa™ËÒ±;$ÚëŒ{áÉ¿ÎS€tM¸¥X'N¶~¢yîBhIš‡C›ÔØÚ!i‘>œ÷¬²9Õó¬±ÓÃr+’yÞÊú›ÿ ëä²p™ÖÏ+Ÿ3Z8_ãἂ_6Ê <çth‰nÌ%õ éK‘Â}^fã\ € € «%%Þ9ê¬ÒÓ÷w\|ñ¹z›ÌÕ<“/@†R gƒ;Ic¯Ê„°Î]4}VÕ~™Ë{zëØf?ójQ¸{æ|±N¯\^÷¨j)þ7]~ç>¼c;=_(Î/Ø麖ßÙá4ùÞCæ0³úç·<©Ýž wÕ1N*//om .ìç¸lö܃:TÔðm—­(Õü ýÂÿB{òö½e±3öjŸš©uëÖµl2oÌVð?ØW´Rï¤ÿþÐä ·²z里3Ò¹¢{ÇpݳNžµéÖæª²jeýÂ;ƒò¿øôƒWËâïi ©-¤.Þ‡ó~6-ö k?¨u³Mææp>ëã~^fÝŠ@@@8묗èõŸ²“´>ß]åÈy! €¹%³ôUñÚ¿)0¶:ÍeíÄHÄ»ÍÄb[Ò¦7ÈþÄÚx|³¾{~-²3û¦§‹}·w…ªµ8iœ™ð•p¾pÆí­¶õKeKá”Km=wtX}Κ6½úïÇÂùû.}]¤ZBŽÜUÝ|°‘’o©Uw4‚Mü"=½ÝDû±ƒ®Œ­+½|»M†`Apßh1Éérýwx8}Fή¨¬úf8_Äc_={ü2,pïo'œÎêxîY#J£uÆš ©r9³Ù¸æ³Ã2•.|ãWk>UaK×láuñ>¬÷ƒPºhÇžq=>XûpCCÝv¯ÅÈžNá>/³gDJ € € €@n L>$r²±våê_9ÿ“æVîÈ  €¹!³ô€Çÿ\uñžîÆY†ãÊÍ„e0]™x¼ýTÙSÙj_Ч)ÔöîÓ†y´Q2é_´pkϲ=Æìù“Ûç `ʺôûœuã  D©"”ÇjæêÓõty¬ydíÚµ‰ôüL4[ÿ;Ú½½'k¿xÒ¬Oì1$ÕÅÔªýÔp_]n÷7ÆoþM8ßyì'Üw:\“Öœ1#V}Tçí†b~uüæW|?y²òóRûñ¼KÊ+«ÏmŸ/Î)¿Å_ ûɰôêqàÂfž±s8?X㡾g•UÖÔè™ubXÍ/iŒßúl8ŒWÄ>à|óÝpY¡uñ>œ÷ƒÐ´XÇ'žxú(õ“®x¨^}·2X>/‹õú¡Ü € € P,¤¾s·K‰)' €ýÈé@zÐ]³õS-öÒåR ƪr;aù{c1}I¼cCE¬ú#åsª‚Ý äµÒ­Ù£,V]ÙíÆVL:5Z1§æ&ëÙuZ•÷³+—/ü‡‚>aA0áyÞMÕÅ»3­÷줰¬ù<˺¦oÌ,ƒu~Öºû½+¾(hÙ¾4L_×ȘÒè¨ÅôºÞ¾Óbó&E¢Þ÷2÷°Æ}#s¾ótÛ5¹*\®{…UéGAZᲡ¯\ºè)½¢bZfe uåýŠÊêT—YC™—\:VpžÔ*=}Íéz?¦¤ôºà~ÙŸ|NÍ=\÷’¾î3”÷¬à5$z¹Ø·Ã¼©BÅ šßÎgŽ›6nûªæŸI/+ .Þ‡ó~ö,Ò‰5k~²EŸt¯japÄÔ©sGö•£¬²ú3zAŸ·×½­àž—}µb;@@@ ,¸~¢þn,×w~Ò$îÈÇ2g@†B §é@Cý¸qþu™ ΕO²ÿ® öœÌå}ž>{þ»Õ2ôgúbyñìÌžöSl]æzý‚qi_‚ÅÁ1ÆM:ð>㙚Ìý iZ=‚V>¯‡eRÜò„qfâƒå³æ½+\ÖÛ8x‡°Îŵ¹Øš]ííÓûv©˜UuloåÉÕõ3fV¦÷tß®ÀòŸÔËBºŠ~Y~dã+Oþ(›ùnI¸ t]¬Ó”Ýì)f¦zæ”pYOãsªß5Ñ¿j¿#ÂíЩSÅšŒnãÃ5ÇÍ--稵óé¥Ö¾»ÄDRÙ?”^ÖË„ïo*ÀuÔQ}Òv—dðŠ åg¦~šƒm‚à¾ÞT|sÙìêéÝíS ˛۾¦sº¥½¬ö Ý/ï9qæé»¶/ëz*¸—Í©¹D×Hðú~£!ºgÙHÄ.ÔÉ×V‚ýAšî½s©‚€§þdýd¸\Ϙ‚êâ}8ï¡i±Žu¯ùSFÙKÆí½2c¾ËÉ Ò“žÉµž±ßîWéôºŒôt…ô¼ì¢x,B@@ N`ĸ1³ôMÕìÞK.8û…‚+ B@ Kýj˜¥cö;Ð>¯/v_U‹§Ë2¾ØâYW€ìdîp¾k Z€v“¸­˜]}¸ï¹ã=ã}LÄcÂí¬³G†Ó]7ý/qÓø]K‚wøî¬WÿmzøÃesªç¯¨¯[, ‡ À3n—ÉÑ ¨ÏT>cZ®w”º¤º.þ·¾Ž>$Ü®PÆA7Ååsªç©uéò𼨜™HäþŠ9U L³]ÔÕ;Y?‹MeÆ©5©÷)}Ñÿú¦>‰x“KwçoXÈÔùôÕ¥» Ëf"Þ·NŠU•ß_øZç -½=gÞßX_tm>ȃ=®Cq->Cy= çý ¯á²‘yg–ècrz˜”®«Ï陽©icÓÕAŽpy8N=Ǭù®¶;V•¬ôh7›Ã !š~§¶ îç~¸}æ8wŸ—™¹d@@@ ÐweÁ´þø‹‡Ë#€ €ÀöyH²Ý¸¤î ÈžT0}‘Qé–Qzè­/}VØí; Ò=«Çÿ úâ÷em·I»í¢î wU î-ÛI±±í´>ìþ2ßÐÅJÜÊgÏ¿PM ëÃõ:æÊïÕ5ü&EYÿeùŸ‚oû(õà8a+DeÅlö}óžç¦*@RpôÀCã_̘SõqÙ.–IP‹QEµ¥ú¹ÂŒ4—ÉèaýBö\p>tžö—ÓAòKwµ­}RƒÎÑÑš¸­u.7þ]¹¼îQå©rÓÚó5ïáìCe±šŸ{ÆLeÜE­ðTónú`ðfµæ»ñ‘x<Õúx°J!¿³u<ýt=´šf\ëZ1—ÚI1’µ--‰ywý¼ã;›»N±ÿKƒÀ >¯ÿ§ëâV<Õ^y¯é›ÇO*¹¡"Vó] Ïêºx]¹›¤ ST®÷ëHjaÜž[ùþÆsË̵kïØ.ðÞ]®VÄëê” ’‹ZÓKmé)h{¡Z1_8n’I¨Î?u¼êœí©¿Ñ!Ó-¡ÓZ^4è`‡éÁÑ—Ô.©ˆUíªJ6­Ý{&»B|>ذ´îáT‹ìŸvÃeånüþ ŒÏMÝÚ½#Æ®·ëAë+býI×HP#:¢Kxo] ÿOçl§p[ÍÿKÓj]¿žcƒyÏJõ¬áÙoeäñß›Ö'¾Î÷4ÞÒÒ|Á˜èˆ“Ó×bkï‡v÷lê)­\[7œ÷ƒ\³Êü4Ö×®Ðóêûž5ç„ÇU%ž¯Ž˜8â,UÆZ­ß]wžÝ¢Ð¸î¿æ}z¦éYÖ68wŽžÝúÊ~½mÉÝ7ïÑ>«Z|ó³ ‚D¸i0ÎÕçef™F@@hX°`A©þÞûp0ç5›U¸ € €@÷­­E»_ŸSk ûYsrëÆw?R%Ñ9sú"x_ZÞ«0Ü©š>]?ÓôÅðQ™Á—Ì}L\æüÄa½*—.Zª€Î A ­Ìý•ö8ëÿ)ð1CÇxG0®×–¿w¾yÏŠ¥uÿËÈÊú…wúÎ}De2,ÛXqTûNýbV.ŸËé}§ƒèémÙê|û¿ô|.Møæ›a·Ü©l)З J¤¢öÒT¹RAt­U‚ýÍ„Ãs)ûó¢Køßwgª•ô‡‚ÖÛ×gs>h'üÄñú,´¿§78€5cÛ>3g+¨s±Ú8ÖÈñCZ£ zë|¾ƒÏÜSæW®¼#ýú€p}oã†%u·%É“TÞ§:o«ûBTÇ{‡–T™ù€ò’¢‡Ûꃾ1` da¬ž5¾¯ÖÉW¤“²v¢Ú§ß•‹¯5Hçq0'âñdc|áüÀ$8ß™‡Ò}u—¶kä,«*§“4Nуû°î­×«gƒ 7‘UZ¤{–õ¼È"ÝïÆ„eQNÏêÜûA¸®óøWËo}5éüóÂåºN ª‹÷ἄ¦Å8Nn~þB•û×™e×5º[ª‹ç]¡éïê>|®ž_­Atç6¥žõ oÔø¶ B`¸oê>íÙkKŒÛ/\Öa\`ÏËec@@( ö ¾Ó÷Øîá/}©¦CEé*&EA@ +yHJt ÜP_w¶5þÛ‚`Š‚œõKBÛ+ s©Þ»~‚‰³—.Z5ö:¨Eü¹¾ïŸ¨€Z½w?(÷‚ÊmŒ×¾¿qií#ÝoYXkVÖ×ýîI÷ÆÛ;_Á£ÿô¥t¥¼.Ú’hÚgE}íe}Ùg¨·IµöUY™»fò¥²üÙKª'ÿá|}.Þ΃2ó ÎÉ-ºÞ?›lñ×5Œ^Ipkkv?£«–Þü§ÿ>õà¡ÊPÐ"ò™Þލk¢Iy¯7~ËáÁgnGZ÷¯\¶è×O™ ºO¤®É§{;v°^fO(€ÞkÛ’{¯XR[Û—}ú³ZCE­?ƒ–ò­ƒºD"k¦Çæî.*²± Lü„¤ìïÒ HöT~›“<^-m/èk€º»ô²}ÏÒëG>«à÷Ãã©< ;¿þ#\×Ý8ðëF²2\¯ gÐÅû´p>ßÇÃy?Èw»æõêÕM KjOPï8ŸÐý5èá¡ë!øì9·¼¹%ñ޶g„ ^—£ÏÜ)ZþjæNÍ·>s>œÎÓçe˜}Æ € € €@Ѩ·±éAaõ}]ú;ˆ¢)<E@ Ÿjø—ÿCТS]¯Ÿ`}·jÓí¦–U»ª`cU²×H|M´Xg›ÝŸ»zgwN<ñô1%ãJÞ«~¢Sš‡«Kê=uLÕܳOø-®!èÞ´szêr~®ºH}›M&hJ¸Öüâæç:oShóe3çáEìIêú~‹Ýõžø‘zoõ|›zúÓ²xª1²ù!£V©ùPöiÓ¦ˆŒÙëãz÷aÎxoS™öÓyYA†gtÝýKÓËÔâø‰|(ËpæQãÃ==Y~´]AëÝ—õùxÑóÝŸ6Ú kÖÆãéÙÌkùìšC­u'ûÖîï·»ÎÙ8Ý‚×<©ñ¾>ëâµ阚egž¾kiÉÈSô™:LAéàÞ:Îøöߘ§M2Ù¸bù¢àü ÊPh÷¬AAÊr¢Ãy?ÈrQò"¹à÷—ãK> ž0Õï,o×k-‚û *úª`زdEüÖ.íÁóϳçLõÆr°~¿ÿÚ6ÿë÷5, ^ŸÓåÀó²K"€ € €9#pÕuµ©×±ƒõ¥ìqŸ_}_ÎdŒŒ € ƒŠe1 € € € € € €@! \y]ݪ(ýoõ>öÚ_î_³k³97rE.@È}鹎È! € € € € €ˆX{l°£sæ¾%ÀN €©ô"=ñ@@@@@ Â÷£;ËûÑ‹àtSD@ì HÏž%)!€ € € € € 3 ,ðô~ô÷jJl¥EzΜ2‚ Òóá,‘G@@@@@ú)0rÜî‡ëýèÔ¯ûÓ ¾ôÙÿösw6G@ ¨¤õé§ð € € € € €…*à¬×ú~tÃûÑ õS.@Á >x¶¤Œ € € € € €À° x¼}Øì90 €@þ HÏÿsH @@@@@ØNÀ“j‘žô[x?úv:,@@ gÛójÖ"€ € € € € €@¾ \~yínÑÑö%Ó7^t~õDå_“  € ÐWZ¤÷UŠí@@@@@ÈÈhÿÝ­YujL=OÎÙDÈé¹s.È  € € € € €Ùp‘£R ùö/ÙIT@(.éÅu¾)- € € € € €@Q¸T‹tgô¢8ß@ ÛÒ³-Jz € € € € € ¿@k×îÖ®þ¬@òO€@zþ3rŒ € € € € €@· ®¾aOkíÚà/_Pýd·²@º Þ- +@@@@@È?‘%£ZßîLÐÝå_ È1 €Ã/@ }øÏ9@@@@@@ ‹~[·î†÷£gQ•¤@ŠK€@zqoJ‹ € € € € PàÖØT Ý÷S-Ò ¼´@Á >8®¤Š € € € € €À°8kR]»'|G‹ôa9@ l!‚2 € € € € € €€1W_]»·)µÏç^ûÒ5;c‚ € EúÀÜØ @@@@@œp%­­ÑÕ*}]ÎeŽ !€ GÒóèd‘U@@@@@zpï Öë=éö¼k@@ 'é=é°@@@@@<P=HwÎ'žGç¬"€äžôÜ;'ä@@@@@ ض@ºñ¤L½@H X@@@@@@ ÿ\óÃ]GEK^vÆl¼èüê‰*‘&@@` ´Hˆû € € € € € c#"%­ïGwî¯ÊAô;?d@ ¿¤ç×ù"· € € € € €t)`k}?º5A @v@€@úà±+ € € € € €¹#Ðú~tß8Þž;'…œ €ä©ô<=qd@@@@@L°Eºç ôL¦@€ô ±  € € € € €¹$°`Áõµç¶®»ÿžåRÞÈ  €ù(@ =ÏyF@@@@@ C dÜè#ƒYgÌCñx<™±ŠI@€ô ±  € € € € €¹$àYïÝA~¬±tëžK'†¼ €ä­ô¼=ud@@@@@VkÌÑm`‚ €;.@ }Ç I@@@@@á°môG }xÏGG(h”ƒb € € € € € P”—_^»›ºtßWïG߸më %…F@ Ë´HÏ2(É!€ €ÀÿgïNà$)ëÃqWuÏìÁ± r xD.¯ñ6/p]9vˆ'Â1ƒ‰õ—¬Ñ$`¢Fñ » (¢áЖE¼0Æ[Ä‹¨HD‚\ ì5Ó]ÿoõl÷TÏNÏÎÑs?ýa¦ß®zÏç­îYú[o @€ @`2Ê Óþ˺gٷ׬YS̶µE€f«€@úlYã"@€ @€ @€¹"àþèse¦“&M@ }Ò¨5D€ @€ @€Æ'pîû.8#jHÕR ¤g‰û£rñ’ŒY@ }Ìt  @€ @€ @€ÉH³ôÈsß{Á¹M­¦Ù“óו¤òͦí^ @€cH3‚ @€ @€ @`’²ä›i©ôæsßÁëó–ßõž™&龑¼ýç¼öדÜÍ @€Y+Ð1kGf` @€ @€ @`– T{{¿]*Ï‹«¸§ÿöÏï½àö¤”,Ø9Äâjô4V­?=KÓgnßüëó׬Y³m–1˜pÁ÷Q™ð5@€ @€ @€Œ]àÜ÷­ýIš¦GeY²-M²o%iú'I5y[5é½.MÊ/NJé‹“,Yœ$Ù3ßzΫn{KJ @€¹+`EúÜ{#'@€ @€ @€™)¯>?*MóÕèDG–fo)¥ÿTKgÉŽJVyáÛßôjAôăŒAÀ=ÒÇ€¦ @€ @€˜24+^ƽÖX¡+Ðw>²leÑ¿X陽€@úèÍ” @€ @€ @€S&PíÛ5ÞèL5{Ç[ß´úã× @€À˜ÒÇĦ @€ @€˜;~¹ý»q1÷Þ]Z¯fëÞò¦Õÿ¸Ëv @€Q ¤šL @€ @€ 0uçŸÿ†íqSôï5õ Ë®ÛúÀ¯_Û´Í  @`Ìéc¦S @€ @€L™@ã>éY–Ü´uóŽ®5kÖôMYo4L€f™€@ú,›PÃ!@€ @€ @€9!P ¤gIöËJZ9iÍš×=8'Fm @`’Ò' Z3 @€ @€ @ m½YH¿/ɲßþW¯¾£mõªˆ¨ ¤; @€ @€ @€À غõ×?©$ÉÉo=çU7ϰ®ë. @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€˜dt’ÛÓ @€ @€ @`\˺V½*K“§%Yöóžîõÿ0®Ê&@€Ct ±Í& @€ @ ¾èm#¦ªÚ*àØl+§Ê @`rÎO“t^–%mW³ÇŸúŠç—æ=£oËí›â±½]õª‡f¦€@úÌœ7½&@€ @€™%à‹Þ™5_s©·m?6çž±ÎL'ÌÌyÓk-°lÙªƒ²ŽôišÌïÜëУ½?žè6ÕO€Ó[@ }zÏÞ @€ @€]|Ñ» ‰ €HF£%ïÄ tu•ON¿¦”$O©fé÷®é^û¾‰mPí­²yÉÓò z¾?K’cž¹lÅÞ_Ùpá­òÛN€³_ þ>{ @€ @€3I`¨/zgRÿõ•ÌDü²ßKO_yÊ’%KjÁÖvŒaé¶móKIú¡$IÏŒ/ëÕŽ:Õ16úú¾÷[¿·¿túYAô±9*E€Ù$ >›fÓX @€ @`Nø¢wNL³A @€À4ȯ3¿cÁ/Óréê¸ì÷W¦Q×t¥M7|êâû²mw>*Ë*ÒÓ½ö…mªV5 0ƒÒgðäé: @€ÌM_ôÎÍy7j˜:Wƒ™:ûÉl¹§§gKO÷…ÿmÆÕÝ= @`® ¸Gú\?ŒŸ @€f¤@þEot<ÿ¢×ƒ˜`üj0‹:;îMÒt¿¸ »Ë~O°·ê  @€Àt°"}:Ì‚> @€ @€ @€À´p5˜i;5:F€&LÀŠô £U1 @€ @€ÌWƒ™3¹ä”W^îè|fZJŸ‘dÙaIšü8©fß¿'Ý|é׺»·7Š¥ËW½(‹ri’Ýßsùº.¯} 0ûÒgÿ! @€LC“Nzé¾¥=ö|QÜó‰Iší_ò~?˪ßzðîêM7Üpñ¶Ýuy$_ôžÜµjyš¥O/ƒ³áŠuï­×yb×YPÎ:^œ¦éïÇ @DnÎÛÿYrß'nîîÞQÏ×êù¸ãŽëØsÿ#ž^.%Ï˲äq‘ï ¸Ôí}I5¹-+%·ün[åÒ¯l¸ðVåóíCõíé]] ÷O¯Žzžý9(úþó¤Tý¯ÍwU.[¸_ùñ¥lÏ´’f®Zÿ•áê.î;ì}@éóm}½•;6}ê£ÿ[Ü?Âtzòé+ONÓä™avT˜-оý¢š$·ÅôßyðÞ[¯¿á†úFR×ñ/:û‘ó;˧§¥$ú”eî‰yÿaVMnºæÊõãuT;òG»ë®åÚq“”çy*•ôÿ®½jý/†Ë?xß²®U¯y}Zl¿mÃåëÖäûOî:óÐRÒyj®qæßSݘV“om(ÝsÒÝ]Éóìî1Úci¸÷×q]]{íQYtL¹”Çô1iš˜UÓ$¥ì;I¥ï¦ž+/þñîú3Ôþg.[±÷âŽôèrGrlRÍëN®CiÌÿ÷þï5“| ÷˜où¡˜l#@€À¸âïÔÃËiÇú4IOlT¼ãñ‚¤”&d‹ÿúÄÓÏ~åµW^ôÆþA‰È~q”Ø'þ&]1h—— 0Òçà¤2 @€L@„Þkÿ#Þßç¾-‚ióâ'µ_·+'‹,÷.=cõÿë¹|í{bGÄl‡~Œä‹Þ¸ŸÛ¥I)™AÚæµœÐµòaóÒÒÑÚé;›¬7|¾ÿ°ø‚ùѧ¯8ëÚ+/üvþzðã¸SÎÚgѼŽ7ÆöÕ;ƒÀÑçB®h0ù°¥5K—¯|GÏë×ö6%÷méi«"Àš^å>÷?ju—Vì½òøøR|¯$-½.µFÞ§÷\µîëM¶x±÷å®4-},ßÝ1¯ôÒxU =ïWLˇb ÇÖ›¨u+~õw3Mö>ðˆ;—uñ± ÝëÞ\Ï3Äsiiת¿bï¬Mt!Cô¯+ÚH–u­þj¥/[µñêu?*ìn•lw}­Ú©m_ÚµòÑÏ‹ãE)˲ï—{zÞ°†Þùo1ÇqÞcz¼å‡ØLØ6Q'YÌ„±ë#™ °lùªÓ⤬ÿˆ?%ûG{ãoÕ-ñ×åŽ8áïñúˆøCÓŸÃ-—;®?þÔW9šÏñ™0~}$@€‰HŸWµ @€ @€]" ÷˜˜}-¾È­­ŽÕÌ•,M~áò=âËÞÃvèŒ/Ϡ볪[¶œ¹qã'~·KEcØõ=7‚‹ºvcÕyú“ˆzß_0?:‚|qÙÓ´}xbG©ôÙ%]gµ©û¢»‹Í,;}ÕÆjóÏE¹Û³äþHßcÈƒŠ‡¦YrTÔ¹8BÅʯµ'w­¾÷šîµW7ò·H,=íìc"xù™èÛ¢¼oñEøÃdKôëñѯý¢_Ÿ¯öUï*•ʯ˫ˆKµ®ˆ§Ò#÷Yy™øBý·}þúª<=ÒG~iØè×µ‘ß¼L}¿¿~•E—Ò#cÜGEŸ÷ÍÇûžÒªÞeËV”,Hó€ðókyc¬~?ê98ÉÒ£ÃíÀøyF©#½)VÔ=ùÚî‹0Yõµj§¾ý¤å+_c¼(^Gœ9ûî–¾Ïÿ|Ï'ï­ïõsš<>YpÐ÷bn¿üÑõÞÏûǶ<àq,$é%Ë–¯þÓÍ÷ô¾a¸Uäyþúc$ÇR=oýùy§ž¹ßžó>.¯µ;²$»3~åóý@¼o÷É‘ù¤‡ÃÒùóW„XÝsźO×ëê9¯wÎùyPçôF½Y²=Žë˜ÛôÎ8fßùÞ‹jbÜiúî¾¾Eª®vCã=¦Ç[~¨±Í„mu’ÅLûtêãPWŸ¨ –¥—â~j|@ïÕmñ ý?I¥÷¢k®¼ø§ƒû_{_–;ó÷ú³âoÉ¡ñ·áŽxï}¿7«|êºî 28ÿà×#¹Ìà2ùë,[qÈüÎ8!+®tQJÒGÅg̯ã3à¶J5ùlÒ{ço‡*cÛ(Jé©‘û•i–=¥Ù9ÉÖ-—ôô|òžz ùÕ[Ìïøh¼~Nþ÷e~çüŒtœèA€†HÞÇ^ @€ ÐFôOóa<º#†þÁdÛÝß©¯>àõ嬼¤”¦±r5‚ÑizryÏ=>yŸ]+1Ž_Q×’øÒþÌÚ=_Üÿeè/)è#Èþ˜Øwq¢é>YùÝÑÜ«‹Mî¨ômžWî¬Ñ?W©TÎûÍÏ¿÷¥o¼±·ž¯võý;ÞÁ‰·Ç¶R5>«8?{Cw÷ƒõKK¯N²êgí«½|bW×¼H|)~›oˆ Í$÷¿é†ËLŽ=öØÎC;úïcl›¦Éüެsmd}Füìr™÷v×—÷i¸ÇÒÓWÄÕ J—`9:Óö${þç¯þظ‚>ùæm†ÅeÕx7=v®ÿ؉§®þzGG¶!¾èÀkú§'u­X¶±û M™Gû"V;Ç—Æß¨T+/Ýxå…?\¼§{ÝmqÙö-êìŒ/˜k+Ê#8‘¼&~_0_÷©‹ž_Ö:©V?ÔsÅ…×®#½3úw‘ï"8;Æðˆ=³½Ÿ»Z÷¿”œyD ñ¢žË×­To5îí~UaÛ‘~Jþ…y¶àÀXå›\RØ·K2ËæŸÑü8o»ë\ñu~ìÅ… >Û:"ê}ceË–|¦ WHˆ¹¾3NxõP+º?sÕÇïˆöþ#®€ðµ8aâꘃüj ËOZ~ö36^qÑW‹ýÛ%=ºc©VÞcz¼åk¼ž˜“,fÀÀ§}ãJñwfcüí8qg_ó«Z|?Þs}q¥¸HíĬRœÎu^ÛïȪ•¯§IgäOŽÌÿ"ÄI<¿‰€ø-q5ùõüùß¡(ÿ¾¸Ãïzº×^<^ƒÚ R9zM\%¿•D|lÔÕøüûI´›_ÅåˆèÉ!ñY’ÿzZÿn¿Ç*súùî¾õ…C}~×ëÌ?Ç㸹8ÀßÇÁÂ8Ùî‘=Ýûe}¿g 0”@ýøPûl#@€ @€Ú(_ôn*ˆ^lâÚ«×Þ’Uúò@víQJÊç%±ºµþz,ÏùÌ |ÖPAôz}7|êâû"}qíõÎ/˜ëûêÏè^Ö*ˆ^Ï“?W²¾÷×_Ç ûÃë類kÁ‹,ûöíÿ{Ó«‡Ú_Ü+¹/‹Èù¶¸_÷ÙÅ}C¥#fð]ùÆ¢ÿôöŸ}ge‹ zS¹¯l¸°vl7¶sÎÇ{L·|q\3!=ÄIÏÎ?Ÿ÷=ÎÅgãâx=->l¶Ö£i~’…ï]cµéu|¶ï÷Ã{cÒW}Öæ»zmè^÷Çñ~Úwýt¿¬š½+þîõŸ–_Ý¢Ôñõ(sdäÿF–õ'¯²áòuÏÎó÷d÷=,ީÕNˆ‰âÿxüñ¯Øs¼]=ô°c®Ž•òo‹zJÑ“_V²êÉ÷n«ìÇÊ"P}Xûˆø›¶(«&oˆ~å'yŒG K~µ»ÏïZõÕì§õf²¬óÈzÚ3h%àt­dl'@€ @€S$ÐsÕÅqoæ¬voñøòÿqË’ÅÏWWFøs–UA¢ñ|Áü»ä¡[ú[ŠUwÃ?²¤ò׃.w=düñqïÛOä;#Fr\~Iú!3ÆÆ|s-péj’å—JÕ£ZIooèÈžÞH&1?yS[óKÖ';vô¾ywEóû¹Fäçy¾,I—üí®o—ú7œtúŠçÇJΫâl…yáüÍ¢¿`ç‰-JLÌæW¬ûr 6åµGÐìiùœî®¥‘Ky= ç—ÞXŸŸ(÷¦ØÔ¸ÃpílÎ6Ÿ.¿Éó¤¥Ò9ƒóÎ+-#ºï¿ßÆß«—E üä W­ÿJýÖyçó`jœ¨ówñ¡‘ŸOÉþñ³8>Kþ!?¡¬§û¢/Åæ÷zww%‚êk"ø~Yž?J2Ñüç÷§Çö;®(rF4|Réì3[ú¶³±{ýÆÁ'ËäÓz®X{~eËCO[KJ^ ýu½Lü]Ù«žöL€Z ¤·’± @€L­À‡ëÍgYú¸zzbŸÛóó׺»·F”bsÞ×`°›>qg`c7ÙúwW³þ xmÕg’žÕªP9éèß+Ø{ïÛ±3@Ò*÷®Ûk+OãRüùž4+ýÃI§¯>a×\Ão ƒ|U{Dl²å—Æ>wco¾4{dcËÎD»ë\þúä嫎+—ÊúWw'ߨšÜüTÑ}Ë*ç×Ó±ªtwÁ¦QKáù¤¼îÊ}4Ç`üŠI½pg¿óô®®…õ>ÖžÓ¬vI÷¨øWµìM;G÷¢s>Þcz¼åG7ò©Í=Q'YLí¨fQëYÒÓÓ½¾vRUËQeÕÆgG\JýÂXþ÷íXÎÒBþR6æ•Êù-Ò$ë¿*K¬4϶ÞyÚç¯þؽ-û;J¥¶·ß¾ö Ä ½í«MM 0Ü#}.̲1 @€ @€ÀŒض£òãóûÿ·=îázÔd ÿ‚9ߣ~äƒ}敞÷÷Îûùð4KâÒÖIsp±E­± ð—-v ¹yc÷ºcµßw¢Ÿ¤Î^™Þ?µKòÖ ì lž‘¿Ž@ä'¯¿þ’‡êûFõœUÿ"V†_Aû½ÊåäºXÿù¨ï#;îß¾i$uF0ú µö²´#úü‘¬ÆUƒã¿RÌhT<Î_G+q?÷¨¿?Xœ¥ù¥ö›í®¯©òx±´kųâ̇k"¹0‚Ë_ë{pë ?·©»vBÄ༓ö:-ý¸ÞV =¶žêy´ÇRxÖVý‡üÿ UßpÛb®~´siŸê¢üD—› ùkssYÏSØ5ºdÛç|œÇt2Þò£þ”厹{RþY8–“,–._ua¼òKz×N²ÈO,š²Ìᆷ÷õÞ² sç]QJÙüÝQlíë»eÏzþ,s ýaóÓ¿ÊWµ÷·—­Émv×7û  @€©HŸ:{- @€ @€–×ú¢_-=cÕ¶¤-Ø]±e%¸ã¤SWþ~©œ¾$‚§ÄŠëÇÇó@ ~ 5!=ˆ ×ÚhâÃÑä£ã2äÏ{¿®ØÐþéâSâõâÚ¶Jߺâ¾Ñ¤7\±¾{Yת}#èýÞŒíí=/Ú}Þ‚Å ¶ÆöÕ$ýÄ5É}’¸4ðàz;å¬}¢L~BADÌãÞ¼IüäDµÿúŸÒ;_çyóµGö“z*nw}źóttíb]䵑Ú3¢ÿv{õ„¯lºôÁù&ûuOrÿ/—e‹wDçÅ< HMßžwê™ûÕç§ZÍÁú‘Ö‘õõý$íè¬e/§¥ü„‰Z =¯7Ž“‡ç;⬈ƭj“™¼@IDATGùk"æ|<ÇtÞýñ–%Á”eŸÀ“,¦lLs­á믾äîeg¬ÎWwÆ}Èw{UÖ|ÕøÒ®ÕÛã³p~¼‡k·ä“Yš>5/Ÿ£wÇç×ú1Õ¡ 0mÒ§ÍTè @€hˆxqrüD =Yдg _,9mÕ#:;Ò÷E`yvÆ}Ó|Õò7#Ðys5MnN²ÊKIé3üÜ{"ºZy`Ë'Ò½þk»ö,¥¥ÑFS =:õʼÝü^í~óãèĆîuœpÚË{:Ë ßVJ²—Ęòl¬¶O—GdfùÒdñ³å«^Íën(6³°\~Lãu–lˆû´ÿw)KªÕ|zÜÜ7ÿKÓ«±:½šÆõê³R©ZŠíõýi5ùV£|$Ú]_±î<VNç峺ߢùY~ÌMy =?I!ëZõÛèWœÞ/~Úò˜Wê|T½¢¸’Bíþõ×#yÎÊÕÛs«Ú#Mª'‹õƺÞz=ùóDÍùXéz߯[¾^Ït}ž¨“,¦ëxgq¿â¯¤/>¯ûÏxÁ@ã=ÞçÇ'tþ÷wl,®tQûpˆ+] q’ÕØ*UŠ˜*ô©’×. @€Fฮ®½"PÜ ‹õ0Y'mW¾ ½\N¯Îc襸6 {ðî¾OßpÃÅÛŠYÖµºérëÅ}ãMoÚtéæeËW_k WÄÏ©ùÊÝú}¼Oî:óп o#ú·v¼måå?sÕÇ×{ì±çò˜cNˆšó“N`I>G¿±ñÏ-]¾âÄž+.ÌmjŽ[þ/Y¸×ÎWÕ[¯é^ÿ/;_Œé©ÝõíÒ‰,ùvœñ÷I¹|yøÕ‘v\—ì?î+.œÒ`úÒ¥K÷ˆ¾ö¿ÒdÔ+ÇwçÎ ½¥Í?ëLö©½ŠSk•¯ÕöR¥ó÷’ߪÅù«l­Tnë,÷/~cäðVåG²}"ç|,Çt±Ïã-_¬kº¥‹'C´ó$‹é6Nýi¿À’%/[k'éŒ÷Šíï  @€±ôÿË~,%•!@€ @€&L`dŸ#•§Ù”Ò#ˆÜYê(uGðøàž÷ÅÚéÕ=—¯;éšîu— ¢7ú=‰¾¬ï‚¼úd/XÔÙñ’zSi:ÿ呎ÅãÉÖwô]ZßÞŽço¼±·çе×ô\±î¬Í½½Œ[³¿;Ö’çw;/Ç Î/;¡kåÃêíôô|òžèCmErdøƒúö±>·»¾¡úÑså…WÆõˆÿ"ß *ÙoAùÓK–,Ùí½…‡ª«]Û²ŽŽŒ€tm}g¼Ø®z?×Ý}ŒõWµúJÃß{}¨6³ršß½ö(%½?¨§ó:âøMíu–ŽëRô“1ç£9¦ëc,>·|±®é’ÎO²¨÷eÌ'Y쬠x’E½NϳX`'Ï”²ÿ›Å#54 0gÒçÌT( @€Ì$X ùäz+•©¤òè£pfð0ËÞÓsÅú1ß{¼>®ñ<_{åE߈€å÷ó:²4={ ®¬~Y÷+ê«Ôöµ/•×½áòõÿ/ÿËZ­iºÏ¼¬ôŒ¦òËÜÇ#‚ý”tu•›öåE»ë¢qÙîÇeßµs×s:÷:ô“méûmdSÖ™[Ï—Æ-êé¶<ïôŒ9ŒûÃÇÉ£y¤é1yö¸þóæžîý²X4âþ?ªíK“Ç?½«+n0ŽÇ$Ìy½w#:¦ë™‡xoù!ªœ’Mu’Å” F£“*ÐÙ›Þ^o0N Š«£x @€3]`tÿ“0ÓG«ÿ @€ @`ä«¿ãÚoÍ»ÁâßV·n¿qª»–’gÖû÷ô¾¶žžÊ縸|íÒí±\ùÉùeçOîZù”Z?>ïS¥R”@ßC[>š¯Ð¯9N~¨½Î²oïÜ~à²dñ«jéñüjw}-ú+îÿ.©&ý—ÅOÓS—eûÔVÿ·È>a›;RVê$ÙC[*}m}dÕ¤V_~)æ¥]« 'c ?¤ã_tö#ã²Íµüq ~mpAíþöq\.Ú/ÛçMƒ÷êõ$Íy±OÃÓÅŒ-Òã-ߢÚÉÝüèWŪÖÃòJâËøwä÷W…í(œî¼OuÔU)EX}˜G~"@ìÿ ìaÚÈwmî­|<¿„{ž.w$gÇåÕk«Ñ#ñ“k¯Zÿ_ùöÉxD05¦)_ŸÜQl/ÛöÐ{bÅòïvn{×I'½tßâþѦÛ]ßpíoHï{mœÄñéZž¸ýÉg¬>o¸ü±o¯\Aî#óºãäøüÕ»·íìH³÷ÆüÜS«?Ißuüñ¯Øs$õÏŸ×ññÆ\X;¢’¼yp™­½ÛóW–xË §½üàÁyFúz2ç¼Ø§VÇt1Ïpéñ–®îÉØ7Q'YLFßµ1ÕéOòdIö„©¼šÇT+hŸÌaÿÇw¶ Ò8 @€ @€À´HÓóU¶Ãõå¤3V®ˆ¨ì¿ÖòdÙMq©íÿ.ÿdí‹Ë§÷¯®Ž#8ظ'ùàö?õrØ1_ˆ@ã^ù¾6ÖžçkÇëÚ¥ÛÓ¬»ÖN’¾"žkýŠÁëÛPiÙ«^¾ó¤€–Õ•÷Zpv~ôZ†ÞJÓêäÚ=®«ÕwÔö¥é~¥=÷üÜɧŸU ·ª0¿'ùI§­xÎPûÛ]ßPm4¶uwWîIîIÌßçÛâ ¤7ŸÜµêoûÇ“Áû`i×ê³"ûþ¼™Hýè×·}·–O³ƒË~¦{ýo«Iõ-µíirðüÅ ®[rÚªG ÎW ˺V¾7‚ûù±‡xöþ W®«Ý^ ž'ÎþiVý»Ú¶xÌ+/üÂɧ®È/ßòq\W×^ËN[Ù¸êC=c›ç|¼ÇôxËׇ5íŸ'ê$‹i?p¿@üÝÎ+‰«£tRº¨ÿä®ñת @`ІýŸ÷)ê“f  @€ @€À¬ˆÜ’½8âGËÎ8üÝÕ¬÷s×tì×ù@—.]ºGuþAÇ–J±ª:é¿ßw¬”ÝÄ×Äîˆ Oý£/ëûbgR¾7‚ÆûÅjù×.;cå½ÛîÛqîõ×_òP­wqð““}–•Òì|=0V3ßùˆh“óä7ÞØ;!£èË.H:Ò3kmõ7л£oûGÇÛÖÒÓV=%B!—ÄIÿxÈáG¿7©¤ŸÿY龟ÞÜݽ#¯;?a`Aç¼×Gˆùmùë﮹úÂäéâ#î%ÿ‘¸løË£O$ŽIKßY¶|廲jú_ü¶ï»7Üpñ¶Z]é¼#«¥ÒsÒ4{]äËWôï_¬§žnw}õz‡zþZw÷ÖXE¿¬´Ç_Žþ?±”¦ï‰=îÝxùú ‡Ê?Òm»{”“ô¬8nV䡨üŠ•Jöš‰:~b,Å ìJÿ4úõ¬Îròݘ¯·dÕ¾ÿºæÊ‹o1eKºÎ> \-?9N—ø‘ïiýãÌnÙvÿöw¶ó†dó‡—f‹_nO‰z—t”¿qòòUÿïë/<”ÞÓ ÝÝ.]ú’ýÓÎ=›t$/Ì’Ò벎ä—Qß“×Ù®9ï1=ÞòƒÇ5_ç'Yıþ–rRZ‡aý$‹—lºjݯ†êwþwèaO:7ò{’ÅPem›]qe’÷Ä­YVÄßÊyñY¶&NŽºtÓ¦MÛg×(†Ìô¹3×FJ€ @€Ó@ kGD0îâR:?YÖµúÁ¬Ý+[+×W5ç}Ì’oT«•—n¼òŸMƒ.׺°©û¢»OêZùÊR–ôÄ"ÂYzG¬à=gYתŸÄxvDðà¨Ø¸oÑ“T²ã’rv|lglÛÿÐÃþßCsÌ¿ô\±öüvgÃU뿎ÿ¬'äu‡ç†ë¯¾ä®q·Ó‘<7¯#Fú¨øýüBõ‡g‹+‡u­º-NvØ3NrxxmoÞf–ý"ÙöПEr¨“ªÜÝ÷ÜEtžeÞt{%ié¼üâø{ÐÙ~[£‘½ó¶bSGîȤö‡é²ìˆ™½{óÝ?}Öt ¢×û»±{ýÆj¥ú¼ˆV×V^Gß÷ˆ¾Á§F 0¢©¯’<¹çªu_ß~ÿŽ÷ÆXoè/›>2-eûÕëiûsVýx½Îj%Y[Oç¹çòuçÆX—EŸkÔóT Ç ÕxN# º-î¥ü¯;’ì˜VA’¼l¾ê|C÷Ú7&Y5Vg? —ˆÏFˆ%µÈ;ƒèùëˆÈÇj÷ìÒ¾JßÓZÑkÙÚ]_­Òa~å«p+½É ÑíßÖŽÕRé?cuõqÃÙͮ얨kà’èƒÞaôPV­ž÷¿ÉýOÛxõº‰ ¢×ú™{ÆIçì<¶ÿ§>?y »Dß9š˜µŸfÕì”86VlÚtéæÝ ²6ï=W¬ûË|Þã=ó?‘¿~¢E©D¯Gœ¯öU"ˆÞèãx¡ñÓã-¿;¯i¸¿v’E¼Çß_;&ú¯Æ±®Tî¼%N„ø]Ü~àiÇ]¥rº1N®©]© æò¢Þ¶>¹Õs>sS»ìþp-ï¼RÄ^‘ÿ“‘ÿ¥Cå>ü.>Çö‰ù¾"þÞ´ ´¾`ÙŠCÌ/_G˳šêɲ{ãónÏø›µ ßÇN_\•æýQçßÔòeÉj'Í4ò‚˜*]þá7UÑ. @€ @`´'t­|ؼ¤t{+æGôüŸ¯¹|mížå£­Gþ‰mlâ{¤£˜¨“,F׋¹›{´Ÿ!SHÏgé¸ãŽëØ{ÿÃßç^¼º|4uqi’ï%}½«“Þ{oN>¼ÿV郔¼$@€S+ >µþZ'@€ @€Æ!÷¢ý‹¸çøùy;²Êã®ë¾0îÙî1F›ŽcÐ'ŒA =ñÔÕGvtfO‰['~Ý‘U*ßè¹ê¢ïŒ¡.E @€Iè˜Ä¶4E€ @€h«@\ý5ù*¸äò×ÑÛJ«2h@víÕko‰ªò @` ”fP_u• @€ ÐXº|ÅñqÉÜ'æ²jú¡Æ  @€ 0N+ÒÇ ¨8 @€ 0%¥¤Tú»¼å,Ë~ñà=·^6%½Ð( @€ÌJôY9­E€ @€fÀ –­8ä³.¼½8¢eg¬z{’¤Ï¬mK“÷Ýpà }ÅýÒ @`<ÏxÉG¾· yX^ǃ›³gý`ãŸÿn<õ)K€Ì<ô™7gzL€ @€æ”ÀÂù¥k–u­ž_M’/ÄÍÐïLÓ,èé kYöåî¾õßçˆÁ 0ႨN<ýÈ²ÇÆíCÈ;Ú±G¯ïѧÿŒ5õÐ{¸‰Ã £€ŒN1 @€ @`â–œòÊÓ4=:o©”$OHÒ @€f¼€@úŒŸB @€ @€³W`Ó¦K7ÇèòK·»|û ŸæžË×=k†A÷  @€˜CqU4 @€ @€ @€ @ . ^—ðL€ @€ @€ @€B@ Ýa@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ^À$@€ @€ @€ @€éŽ @€ @€ @€ PH/`H @€ @€ @€ @€tÇ @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ^À$@€ @€ @€ @€éŽ @€ @€ @€ PH/`H @€ @€ @€ @€tÇ @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ÐQHK @€ @€ @€9%ðôøè$ËŽ(:K“ùéÎ Êó–=ýÏþ}sc5¹ëkÝþ¥Æk  @`V ¤ÏÊi5( @€ @€‰@–$‹K¥Òå­ò¦i².I w-%¯‹¼é­Àl'@€³D ~RÝ,Ža @€ @€ @`äC­FNJéº4IåµdY²*ɪV#œtFæ|úK>|CÌù³w×ùºÿêîûvqë¦7lß]^û'GÀ{xrœµB€¹(`Eú\œuc&@€ @€ @ &`5²!ˆ&ÖÄÊó/îN#V¦ý³ úî”&w¿÷ðäzksI p=š¹4lc%@€ @€ @€@’|ý?_{C–d#ºLw¾ù®ûv¬ç6ûFr˜ÿé9ï#™»zÏÍa]Â3ŒD@ }$Jò @€ @€ 0kòÕÈ#œÕÈ#Qš¹yvw˜ÿé;·»›»zÏÍa]Â3ŒD@ }$Jò @€ @€ 0kF²¢ÕJÖY;ý w˜ÿÓ´L 7wõ›Ãº„g©€@úH¥ä#@€ @€ @`Ö ìnE«•¬³vê›Öê80ÿMLÓòE«¹«wÖÖ%< @€ÀHÒG*% @€ @€³V`¸­V²ÎÚiße`Cæ¦i¹a¨¹«wÔÖ%< @€ÀhÒG£%/ @€ @€³V ÕŠV+Ygí”9°ÁÇù’iZnÖÊêŽ!7ÚI ãŒ,Ûµßì?/+Œ³Uóö‹ãI³ DÓöBûÅöjý/ô§Z(ßt,ÆXìˈÛ®|‹qûÙÔ—ÂXòö“‚s>™µmµíé,”êÛ‹FýÙ û å›…Bž‚ñ.å ãŒl…6ÊGª±½Ö§B›Iáø/Ž%+lZ›Ê7õ³Å8‹m.ßÒ¹0Îèb£Íb_úMö%…c)aùVuGƒm- cì÷h?-8ÇEu5õ±VÁ@;AÛÈ×tÌ·Çå‹ÇS4Ó(ßÜ—¡·÷7_h¿P¾É¹°½éÉ+(MÇB¡ŸM},Œ¥V¼o´ÇB­ÿóüÓ4ßV{êmu,äùŠãly,ÆXìc^>-Íþ®MŸ…Åc!oýlêcqŒƒÊ797ÏÙ¾ñ²#ï¯3K Ò—­ºã—[?~þùoØ>³z®· @€ÀPþQ>”Šm @€$±ºg]D8èr Ä:ršBè#ÕÐJËdS"M"$_|^– åg«)¥…õ;Ÿ 1ñ¦=M%JM¯Š-FT~ XS?†ù /²÷§ »šÊ7Õ[È”—*¼,d‹í¯ Yš 䯚wæ5Öé qÖ·ÛË·5§PWÓöFá¡Ú(ÔÜäÀöV}qûUzÒŸl5Îæ6*Hí¬ªàÜTy!c“Ea{ž¿i_±‚B¾¦<…í»”/ì+$›Ð›¶Û«U6ôÞbLnp‘âñЪŸC×Ú_SKç…†íKsgvéjÿ[T;[ÕÝ\b˜W…Á4åjý"?†|4Yrì²½P¾l=ç…>æÕ6•)¶3(_cW«íƒ*kêg¡‘Brg•[š«nµ½Ñ“Z¢Õœ5l ªæÂñªUùÖÛW1Pù@*¯x _+‹<Çpûê5´êËpå Í烬WåyW¬¾)NXh¤Ã¼‘nÞ§eqbEãui¤›¶Çéõ"q‚M#o‹ó(¯#ÙH7o/”)ô±V¾ØÏB;Qk£®8w£‘Ž&é¼|«}Å~6õ¥ÐÆ.íë.Œ³©|aŒµòMãès±ŸqNT£ÏEãZÿ ãl=gå‹s™—7`£îü¬–Ú¶Ú¯§VõÖŠÊ7ÍyaœMÛùµtÞY>>žTîH×|p²!Ú»{ R @€ÀLHŸ©3§ß @€&I Voˆ/B·›‹ïø_^6}áØôEä@žøª³‘?¯§é‹È®Å/¥ã+ÑF™â—¢µ~4}ù[ø"µ°½Øfñ‹ç¼|t¿Qw±Íæ/ í7}Y[[‘8P¾©Í¾´c­ÿ§èJ£®¤7Ò‘¥‘.×ú_(Ùù"ÀÜH·ÇX+_OS?‹Û‹õÚËËûôB›ã/~y^ìˈÛ/´¹Kùãln³Ð—ÂXjý/8— ûŠNMÇBdÊËÕiVpnÑϦò…æû#[#_œHÒH7o/ô±¿V¾Ðf\¢Q>†ÕH7õ±Ð¯¼|ð5ò…fÿöc(üµö õÅ)ûâêtVÌS°¨•/Œ'N²i”‰+W4ÒÅ6«Qq^®þˆf¯‹ÎqåŠÂö<¥Aå«åâ¾2ÕR¹Q¾Z¨kpùlåK…òÕò@½ùšöò• íWŠÛ•/ö­Rè9V7ª–¶ÛË÷W+ùÊ…|•Bù¦í•ºòòåBŠ})ê­ë­Týª•ïèh¼®êî(lïë(ÓÑ9ÎË÷õu6Êwôõ5Ò•B¾ŽBž¾Îÿ>—†Îµ<Ì,sß¿î®8 怙Õk½%@€†HNÇ> @€’lû–Uû¶7XUãX @€ @€˜3× œ3C6P @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€ @€ @€ @€@kôÖ6ö @€ @€ @€ @€ÀHŸƒ“nÈ @€ @€ @€ ÐZ@ ½µ= @€ @€ @€ 0Òçà¤2 @€ @€ @€´Homc @€ @€ @€ÌAô98é†L€ @€ @€ @€­Ò[ÛØC€ @€ @€ @€sP@ }Nº! @€( ¬Y³¦ãŸß{ÁéÅmÃ¥Ïû×µ/Xó¾u.} @€ @€f²€@úLž=}'@€ Ф÷%Iúç¾íuç·ö­ª\³æƒ‹Î}ßÚuI9]½æœU¿m•Ïv @€ @€3]@ }¦Ï þ @€hƒ@Ú[}š¤OÍæ¥7Ÿ÷¾µ+Wyî¿^pÂÂE{ü0MÓ}}½ÿ0x¿× @€ @€f“€@úlšMc!@€ 0F·¾õÕ÷W«ÙÒ$Y”¤éº|uzš% òêJ þ{Z.]—¤É#ãåoÿ›×þpŒÍ(F€ @€ @`F¤ÏˆiÒI @€/°ýÿ-Z¹/o)V§Ÿ¿ö®¥ÓtyþÌjô~¿  @€ @€f·€@úìž_£#@€ 0b5kþê¾|Uú0¬FÇ. @€ @€Ù# >{æÒH @€Œ[ ¿WzTR[•>¨2«ÑxI€ @€ 0{ÒgïÜ @€Q Ôï•>DA«Ñ‡@±‰ @€ @`v ¤ÏÎy5* @€cbUºÕècÖT @€ @`& ¤ÏÄYÓg @€(0Īt«Ñ'Ð[Õ @€ @€ÓO@ }ú͉ @€˜rªt«Ñ§|6t€ @€ @`²Ò'[\{ @€f€@aUºÕè3`¾t‘ @€ @ ½i{«Sö dñhoj#@€©À¶mÛ“‡¶lMö{Ø>#-" @ ­i<ÚU¡ï™Ú%©ž¡ÎÿK“­[·%ñª—&{ì±p¨,¶Ív~.å$>›fÉaÓR Ýï×i9HšPŽ ­]å @€ÌX æ'ù @€ @€¹&ж3:çœñNŽ@~6Þ=¿{(9ýõ—NNƒZ!°¾ciòG;8yÇû¯O¾|ãÏw“Ûîv |à¢ä?ß÷âX¹#9ñU·³ju˜#oÿdÝ»OKî¼÷Á䌿üÄ„´¡R-°bù±É+O96¹â3?HοäkÝœú§±À1O84yÿÛNJnýå½ÉÊ·]9{ªksEÀ19ò™f5r+9§—@÷^š¸ß^I;W’ÕW}>ï•ë’¾JuÊ\ÛÊ·_™Üú‹{§¬ÿòæ%ÉSþð‘É»þý‹Éç¾úÓ)ëÇë^ö´äŒ%˜\²á¦dÝåßš²~œøìÇ&oYýìä¿ãûž·Ç÷>£}<ÿJÉüÎ4ùìw+ÉŽ¾Ñ–Èÿ„#J>ü÷/J~}çýÉKßtÙÀŽIN-\Ø™\·öìZ«Þ3ýøõ÷L;?—òšóϦ©þ<è¡ßfÀK—þQòê?{j[ÿ1{tŒd4V¤FKÞ)¨T]á}Êð5<¤@5î:ฒfÂ6V Ÿì'ŒYÅmp̶SUS&P¿ÉNþì³wʦaZ4œÿÛ§þp,Ô%`!E€ @€ @€ @€t @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ô†$ @€ @€ @€Hw  @€ @€ @€ @€‚€@zC’ @€ @€ @€¤; @€ @€ @€ @€@A@ ½€!I€ @€ @€ @€Ò @€ @€ @€ @  ^À$@€ @€ @€ @€éŽ @€ @€ @€ PH/`H @€ @€ @€ @€tÇ @€ @€ @€(¤0$  @€ @€ @€ @€€@ºc€ @€ @€ @€Ò ’ @€ @€ @€ @@ Ý1@€ @€ @€ @€ é I @€ @€ @€ î @€ @€ @€ @€ŽBZ’ @€ @€¥ÀoîË’Îr–T«£,(; 0mÒ§íÔè @€ @€ÌïÝ&‚>æI  @€Àh\Ú}4Zò @€ @€ @€ @€À¬HŸõSl€ @€ @€ @€ 0ôÑhÉK€ @€ @€ @€³^@ }ÖO± @€ @€ @€ @€ÀhÒG£%/ @€ @€ @€ÌzôY?ÅH€ @€ @€ @€£H–¼ @€ @€ @€ 0ëÒgý  @€ @€ @€ŒF@ }4Zò @€ @€ @€ @€À¬HŸõSl€ @€ @€ @€ 0ôÑhÉK€ @€ @€ @€³^@ }ÖO± @€ @€ @€ @€ÀhÒG£%/ @€ @€ @€ÌzôY?ÅH€ @€ @€ @€£H–¼ @€ @€ @€ 0ëÒgý  @€ @€þÿöî¾âlüøÌJò‘Ã7„rh^$Üôý7-gHìBˆx m$6zr†p½æåBJ¡\vB¥¼ÄᨠÐ7=x)m–B)åniZŽ@.âKÚù?³²YZ9r¼²%姉vggçø®$"=3; € €ôG€@z´È‹ € € € € € € PòÒKþÓA@@@@@@@þHïy@@@@@@@J^€@zÉ_b:ˆ € € € € € € ÐéýÑ"/ € € € € € € €@É H/ùKL@@@@@@@ú#@ ½?ZäE@@@@@@(yé%‰é  € € € € € € €@¤÷G‹¼ € € € € € € €%/@ ½ä/1D@@@@@@èôþh‘@@@@@@@ äÂ%ßC:Xôe‘úw/ú~ÐÒ¨Qîud¿1Û+S]*š^l]]éµ5Ò|&ÍUÛ²:zû* œÿmÙ/„"ïý®;òz`_Ïü{¬È/æ›?f—­½†WFx- Ð’Óƒà5™»#V¹[‘³°ìïAùz|iÜn*îÝ·údߨo´Úq»‘ùêæ&ËU•øžýé=¶Um]›ÌŸ¯ ;ïøî´ËŽÕCúwßÖëâÖ£† i;’ÿ¯(i;ÊË6¾yÏ$^ýÉ÷L>Þ Cýy>Q&C)°ûÎ[ eõÔ]Bº„úBWJPÀÈ£»E—@@@@rÐòÈ![NYø)'&2!€À&‚ü\²UñÙ´ p#0 ß¯h §©·v/Ò G³@@@@@@@ò#@ =?®”Š € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € €Ó £5@IDAT € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € ©À5×ßõÅÙ³oÛjHAåø8>i$!€ € € € € €‘Àœ9û^3÷ίQ“i*žÀÇzÍkº¢¬eö¼Æ«njÚ E@JCh € € € € € €@ÿ®šsÛ§#‘ÈðÙ­mí?¿ÚÐÐÐÙ¿ÈÀÐ ÌžwÛžŽ‰ü^)1Êü¸Ýuç6\0ãáo-Ø’¤oÉWŸ¾#€ € € € €Àf \{í¶Ó‘Êýg]X¿b³ à$È«À5óîë˜ÐÚYçÕ½ž×І°ð«®¿cï²pør¥Õ©Ê¨ÚºÚÆ5ÌüîÊ!lU#0 Ùóî:ÊQÎcRHH^ÓŸ(enk‹Çnl¸ðì÷T0'#°™Ò7ŽÓ@@@@@ !0{îíÄMü£Ë.øÎ;¥nbè¡ŠÊ %pu´épÇ_|ñŒ5¥Þgú‡@1 D£ÑиÃYfŒú·Ìp½º”êvæ®Ve—K€ç[rmBlì2q÷«_0ã7Åx­h3©³oj:×Ñê¦dšÌNß ïá;Ú»:nh¸èÛò~æÀà HóœW ®¡4@@@(Ù76ŽÓŽþÖªÂÞjX¹êžNW]{ù…õo[÷|è^\e®™un½ÊŠD@>›NqBú§)Í-Š€úœ9Ÿ2eê™ 6]>WËRÚŸØ4æù¶µÿî² ¾óŽßqÒ(ën¼óK:äü"ýN ©}“ctjev¹×ò~H•a;éA(RFÁxôaú"Y¦êìîzwÛLÃÌsë¯,˜†Ò@@@@ „®»©é‡2{òû©]”Y1Ù¿§Pê}Ðy­mͺƒÎ]Ú/¶@ 8¢ÑhèÀÃŽ}\n}¤O‹ " ~ͼ;w ™ÐLiߌÄ=|ZšHŠ»Ê=nÖ¹g>™5¶Üå¼üòËzÿý÷×+Wî¤Gþ—^µjkÞ#æT¯®×_«G|2Lo¨\¯·6£œöö º¼¼\w”—é²öv‰ºNg$¢#aÝéÔ‘ˆ»q­ÂaÇ‘s¼<²oŸu\Þaå8²othšÖúÔM_ÐÄÀ=£»®uî9on:?9Ø´ôM‘£²нƷÌ<·îÙ’Az<@@@@ò- ?ê‡+ªwþ¹Ü-p|z]^@]ëE2Ln‘>ô?tç@—•Ìz7?ä’ ÎúKzØGâ¸öÚ[·q*+äVÑj÷,­’€ºÏV³4/‘,Ÿ£ÿg´ºG»Z‚Œ®óHäQ²¾©ÝO¤m$©k“½ƒF6lг{WÒí 2´ÀQŽ=æíÊ5{Äæ•t{<‘Ï;Ë;?™×ž¢¤|©Þ‘ÏI¯,/¯ÍjÏ— iò\)FÒdOâ©Òù߃ÝõJ2’õØT©Ï&K2 YÚå=ÛT›¯§¯Þ¹’Á–k{à•m·l½©yåx¢ 6Õ¶©;¯œëÕâ•/=•³l†ž¼¶\ÏUR¼ÆË1›×ª¤ìÛ2¼~z…{gKF[—mwZ^¯Lk")+ù,ɉ~zé‰síA¯\9èµ*‘YÒymª”oÿðHë“רMŸ¦” ð&'µ@à ·o_ _$ÿK:[þ6̧=¯¶­Ù #…¿·ÖçI € € € €y°j§²Ò®mº«_ÞÝCPÏ%€ÞÝn£âfÊÌ êòëi P\×\×Cýt–ß““´€úus›fHû¦M´'Ù.žÈÿŸÊè ç—è¼ %HÄè%üo”k‡ب~â?y¶#l^;šÁûKþ–)éÞÙɼöX"…±ñ\9nËLTe‡,xgyçÛ! ¶& þ§¤{C:íñš%y¤.¯9×KJ9´ÉÆÿ,Ág1N¼Ôò1e£6æiŸÒ´HåÊür;)¼ûˆlÈç±Û½Û“WB›ÝÁQ™0nç‹Û#6¯¤§bíyö|{¬»&y’ò¥¯^;¯]&\wW(õÛ­ìÛ$[«¸îº®L‰7Þ®7¹ZÊ”|‰Ó$£H$'ú%§WÒl/\[Žô5y®+™íq%w·ÞÙw$£²åJŠä¶ûFöÙöÒB!oÛ 9!¯Ov;îȾœè†ì9¶ëÚË~HöãòJÉyºKÒ¼ý¸ …Ã^º-3‰›X,b±˜‰ËvX¶e¢¹Û‰™XWÌDÊÊL—<Ëõp˺ºLgY¹)ë”çŠ £Ö®s+*†™öö³¡²Í kaÖß`âRqõÚµFnÓn¶Ùæ##·m7ü¤+·p7rçëfÿ”ôã»ßýQù§ö&ÿŽPöÕQ‰Ê¬]5wUì£]?s溾òr þHïy‡\ÀÐ+œÈ…2BïùŸ˜ß ôdíÿÉO¼ø¼úŸ%xF@@Ø"|ë3ÒíZ’é"vmÉô4»o×›LO·ëO¦§Ùõ(ÓÓì¾üø™‘¾¦ª*#Í®céw¾]Û2=Ý®u™žf×¾LO³ûÃÚ*3Ò+*Ê3Òìš™~çÛu4ÓÓíºšéi‰u6ÓS•ê,‹dæíìÈH³ësfž­”]³3=ݮᙞf×ôLO³ûvÏôô°OÞˆ¬šžÏ;_ÖMO·ë…¦§ÙõCÓÓì¾·¦hÚX8óüp–óíZ¤i§Ër¢™iv­Òô|vß[¿4í@Ü'¯üÏ÷Ö4í]€]µwн±¯]ûÔ÷q²®ð=’’(¿åÄ$@ó“®®øÕù ¨Û™Ÿ<›+?ÈO©šM@`Sq /ß4ó¼ú‹6•qsÛ€úØÃþº¤/“€úgú,ǨuªË2sfý+}æã E.pݼ¦ùò‹i}tcµ ™˜×¾vù3qJÚllÿ¸Ýì9|ô#€ž¨^F ˤ[“m±7+In§>kú–ºo·»×”é•lÇØõJèÞ‘léŽO^ù‡OF>[„Œ˜ËH· Ô¤×eᥧÙ}ß¼>ç˸;ßóíj8éåúõÕÏÄžgÇ%¦Ÿo+¦§eë¿ÿù™mõk“­Ã·.Ÿ6É%Éh“=ß®dŸSrzFš½›Ljžä¶\–Œt¿¾ÊÉŒ|¶ ¿¼þ&þõÛ›Ú$Û’|ö5ñé“Íïßþ̺ì¸Õdù©Ï~.þef:yõû¸æÚ'{¾Ÿ•ŸiüýÛŸi’h¿O¿|^SöBÛüŸ×½ÿQz>?›'×kíç”õü_Sö|¿ríð]{,õ!”3ÒìqIÍL÷³Êr¾Ÿ«üè–Q¦´(#ÍÖïß~Ÿó}L²µß¯.¿×”W¿4À>§>äýi~¯i{Ž__í§JjyÞ¶O=^zŽuù×ckʬË/¯4(³M¶ý>mõ˧uÀï3Ùfñ{]ù¾¦|Þg^~.~&>í´çû¾W}òú½Oíù~ïk¿¾úõÓ«ßÇß/¯foó§?r½~öB§Ÿëíû¼×¤¦Œ¼~ï3{¾Ÿ‹¯‰©=?÷ÿÿd¾Ï¼úý®µO_ý^Óö|Û‚ÄsÊß9¾¦ggžïW—ßu²çûçÍôÏúùãóÿÿ×_f™^ý>}õ»Ö~×Ùžo?¼ç”¿üúšì§hm+æ‘”ìl"€lòÿ;"`^>Vvíöʪ¿%ÿGºTþ3&Nù÷ß+òÏ—rÉK(ù¾´«|Î’Kk%ïò¹têøêÛãVÑò9åTT>Y>£l@}ÿ>ÚøZÛšu²¬é¹«ûÈÃ!ŠV`öMwåhçv¿Èûr­Ü¥þ‡ªSÖB¿xÆ¿<¤!„@ÆAJ)À1AjR € € Pòò»ZæC~ÏH—§3ÒüòÙÒd`‡OÞÌó¥ÈŒ|ö|à’‘.Ef¤É0 Œ4¯~Ÿó%)#¯ nÉHóê÷+7Gï|Ÿ¶JMuÉàœŒ´Dû3Ûê—Wºä{¾Ÿ‹__¥I¾çç|­ýœ¤¾u Ð߯¯2Þ(Kû3û%)yýúé]?W¿¼Ù^ÿtÎ¨ËŽŽ³e÷~d¶³ûø§¤¾êÞyý÷¤ÐA Xõ' .mZeÚ»œ5ëì·ý[M*“@CÃGTT•?+ÿo߯¯vÖçQmгç6M‘ÿµ^.Ÿ¡ŸóÍgÌcmkÿ9Q>Ó\ßã$"P¤×ÞpÇaN(¼BÞ§e½º wcõ~¤Ú»æÎšuÎÇ½Ž±ƒ@¤ç•"ƒ°_lÊGŽþ¦ücÁ®³W.¥Ë×¶¿H¾“y³ÿ‘ùEÐÿ‡€Ì|¶lœñ¥Ñï‡ ¿/·Þù~?0ø|¹µS¯’}éõì“׿ýþçË÷àŒrýÚï÷ƒmGÎ?døü¸àïÛþ̶úÕcÏ÷ó—K’Ñ'éffšW€¬É“öðý!Ã÷ÇûCZf¹RUf™v퟇_Þ¼øû´É6Ç¿ý™þv]$Ÿæ kf^ÿ23M¼ú}Î÷»ÖÙ_™åú™ê~øû·?³Ÿ‰öûÔ/ 8¥[É Li6O®×Ú®•^¦Ý·ëK¥§û•ég’8?³_~çûÕcÏ÷k—]ë*£MYüý\ìúVéçË32Òlÿ¼>&>ezçû¸ÊE2êòë§=ßÏÅ®áe¥>ìš^©ûÉíÜß?™}²ekF¹²tWFš]?,Ygê³],ußnûµ_nÍ™‘ÏæÍÕß®Ufó§?üÛŸY—4Ó÷|oMµ´Bí:hiI¾¯S›Ç®–ž7±vZïÔÄÚi½Óìžk×OK{صÔÒ’|ë±yìºl™y3Ï÷«Çžç×.WÖiË¥L›Ç¯­ÞšpiÄ}úd³Ø5áÒ²ú–i×–KÏç/Øéévý¹ô4¿~Ú<²]F^»^]úùv»ô4»ï›WÖºKÏë×O›Ç×ϧMvͼô2í~<ÏH·ëê¥çµë不Ùý°OÞX,³L»>Ÿßùv;ôt»†_zš]Ó/=ÍîÛuþÒÓc²æ_zZ$V–‘fóØõÓóÚõÓÓìú6íWÔ‡wÝ5£+ý8û €Å#pÁ 7 ß.¼Õo¥ÅŸí³ÕÆ|(ƒ+nø0öñcÆg²-Éßä®ô—É¿ôû˜¡nþضÆ=¢¡aƆä¹<#€@Q èëæ5.‘ß'gk½üCtPôd«ß']Ϲ±ñDR—ËçÔ}ŽÏ™ynÝÅ>é$!P” ×ÿxÇÊHùsÒøÑÉÈøÁõò+õ­íJßÐp^ÝGÉtžÈ·ô| S~`v˜;ö›òioiÓw@ݘ\ÝuЬsÏy3°P € € € €@¿æÌkZ,'D³ž$t¨yã*wÍ­ƒ@OoOnusÿÌsëOM?—}(ënºK~[v®òkqÐ3šyÝM_“ÙE—Ë@€q©qO¹ø¼3ÿ'5mŠQàÌ3ïŒì¹oè¼ü’×~£>‘) ·µ¹îõ Ìø°ûD›‹[€@zq_¿-²õÉ€ºüƒAÖ²R{gG0/~[}øP~ ËÞ6Ž € € € €¥- Ÿ‹e2Älß^H=½m› ¨KÐÿ‚YçÕÏM?}(|kç6ïhÕ*ŸKNjk‹!€žÚ^»}íÜ»&É4®€úAv_&Ÿmˆw™#.½èÌ?Ú}«€¬‹~‹¬‹þ¹j›ôá¶¶xìú† Ï~¿XûC»‹_€@zñ_Ã-¶6 >îÐc¾¡½[oùÔ厰‹/>¯þ?·X$:Ž € € € Àu7Þu¬rô£é+ùaüC ZÍm_ÛykC÷×AÓrª²€z\¹±cgžÖS9D&(«®¿cï²HøwÒ˜QÉc=Ùöä³|ÖNÏÚ+ä³öP‰¦¿ÓæÆdÖnR‡çb¸nnãi²鲦âí]s.úö¿‹­´·ô¤—Þ5Ýâzdê~ô©²¬½åû>鲎åųÎ?sNz:û € € € €Á ÌžwÛžZEþ 3%·J–.«UD¿±ÐèÉö&Ÿýê¶/¦½ëÀY³Î~;™g(\††¨¨*VkµŸme)ÐÓµgßtç1Žr®0Zuµ¯ùçÑòÙKÏÃ>…,píwì'3ÑO;懗ž;ã_…ÜVÚ¶e Hß²®wI÷6[@]f¥»Ê5Ç_|Á™—4C@@@†Xà‚n¾mhÔ32Ùás¶)6è¬]5·m]Ç-…<}Sl™uóǶ5î 36lê\Ž#€À èëæ5.‘=“K1€ž.;gîG*'´ÕÌsë—¤cB°ÿŸeH!_¡-·mÒ·Ük_²=ï¾åû)²†úåÉê²FÌÇ]]îA—_4ã’í8C@@@†X@ÖE@~9¹Tè霽êêw¬:5=û P8sæ6^jý}™luêøêÛn¼ðÂO §u´@ Ð¤ú¢}›-`êc?úërK›Ë¥OË?–þüa|õaüci³I9@@@È*pݼ¦‹´13%ˆ>÷£®n¹~æÌuY3ùd@] ||ñyõ?+òîÐ|JRàê›î80¤B_–úü&\’—˜N!€y žwb*jùbã”W>¥; þâÌsëNê6Q? € € € PJ³ol§´9êãØÇ·•rÝçšÙßWeì(@ÞŸxQh PLÒ‹éjÑÖ Ø€zEÕ§¾îºñ —\0ã‘ÆÉ € € € €©¬R5ØF@@@@@@@@@@@@@@@@@àÖî%zaé € € € €(U­;Óhu¨2æíÖæùÿ …"ÀkÓçJD£¡Iºú,G©ƒ]£ÿ´´¹ñ&Ÿ\$!P0ÇGÏØÑ¸¡:Ý®¿?õð¢UÓ0‚„)…B@@@@@¸E+]fŒº'¨æsâ·¶/wÊmX¹\A•K9[œ@à¯Íl‚Åòš­io/ו£nµý`úÍÙú³¥¥çcÐE>ÊŠë2ý7n\dç1\j”þžÌVÝJ…”Š„ÊUíÉuÿP®¹½eÉüÙCaA ¼ôàM)@@ fä¢D@@¨­­ÛÁ„õ;Z«òȈŸ“æX`M¤9ôà5Û‹£Xwò1è"e…ï öcüøñá‘Ûïý´tô Ì[>ë]Œ£¾8Ô‰ùA]<@@|;ÓÀQZfèÓ䥻~ ¨@@þ ˜2u¨ ¢Û3Rc¨6²ÿ¥pƒ'Àkv𬩩ôFn;æL  ”è©ù•kÔdë:Ø(sª1æãªe¥¯@Ør˜‘¾å\kzŠ € € € €X‹ý²*^¥´ÞF…þüé–ëX$§#W^³yå¥ð-K@V Ñ ¶Ë4qå›/õÜsÏuuü^žïïÞæ JD€é%r!é € € € €ä_`Å# W›ö÷v5&þ­ÍÇå¿Fj@``¼fæÇÙ$&L®ÛYk½Ý—Yéw¤Ñ“YxF`Fz‰]Pºƒ € € € €ùhmmÝ 5ü&¿µP:Á ðš Î’’¶\G¹{+òbqór)JLœ2õpG‡¾íõ­ý“ï·¶Þÿa)ö“>!«ô\¥È‡ € PÔ|,êËGã@@@Rí•l€2ï%·KéY«Ð§eÖý©¶O‘Ð¥Ô7ú‚Àæpk÷ÍQã@@¢H~´_;#¡HÑu€#€ € € €À håŽHVQNgr»¤ž]ÞÓŸPyGÏ6l¡ÌHßB/<ÝF@¶8¾ nq—œ#€ €Ù&N\+³¦uqÓµÉôÔçIѺ)ÚèC$Ï¿[–4ÍM;>zÆçB&üuàùY£T…´áe[ÿ›jõO_nnÞd`füøñááÛîuXÈQG£>#åî ´^­\õ–qÔß>nß÷tË‚uÉúüžýÚvX4Z¹­®®—r•öì m[9î¯Ö¾ r›Ð¾aÇ ×qmZšÿ´_™~iãÇŸQ1r;ç@{,Öÿ×òGîyÃ/ß&Òô¤“¦OÒZ!fûˆY•´íW©·”1ϯ_õú+V¬ˆm¢ ïð1_›ºKy$t’v”´I–Ä庿d\õÂÒç/“})6÷GÐåõU³÷ºQNµÍë<úÐüwúÊŸ~l(_³ém9ºvÚèòˆ'¯Û±ŽÒ»eþ)×õ­¸«~®ºÞû(=.û›s-jjNÙVUTÚ÷r]µrÙƒ Þì«®qãÆEFïù…Clž¸ ­´¹ñ}åçXI $îë^’]ëî”qË•|HÚ‡YÓI ½›…§-W€@ú–{íé9 € °e ðep˺Þô@ð°AèÛîu©£Õ%,.“?òðþ’˜^HUmêª9¹þòÖÅ×ˉíù?$¨»PÎ%9–øçPJÂ÷É_e¤½Çæ96:}ë2íÜ%çÔ]e²úcìñ=Mõ»4íŒG\ð»Ÿþ£ªÊÂ?ôúî °´9%—Thw·®pj¦L¿¬uÉü¦”£½6ÓÛV3¹îP9{‰œ¿³m¸÷ðÊv¦ÜVí«•!•o+ù5YòÖúPÓo{˜egäv¡¨ÖÎ"{8\æØ[÷+nÛ%—åVéè¸d^³ä¯D3µ¹ý^ïÕF÷ZÔÒÜtQ2ϳS­›%§]é]è” Ò¾¨Ô¡j£õÿ™ºe7½’r8ÛfÐåe«ÇK¯‰Nÿ–´s¡ì8ƘC]ŸÙç >‡â5›ÞŒÚÉÓPa½@^kû¤“×—÷ÚÕZ††„wx6õXÛ›}-ÖUÄÚ«tè^©c÷PȬ<*ÝïÉææ5ÙêÜi±3Åñ*{ Vã”kËV;%Ìhì²nÕKs0“­›>)¶òÜ#Y¾¼÷<.:­,¹Ÿ|þ‡Z÷V_ƒ¢òeD¹¶ŽÑ{{oHéP¨:¼Ûñ'Ög TÚ`ºÞ_ñÈÂÕÉ>û=oÎà–Ôrú;à+—Ap©å³@®Òs•" € €@¿‚ši0ÐY7ý28Ðúû Ç  € € È»=ª¶Ûë þx3¤e6sÜhõŠÇÉLç=»+ŒH°õ: º~Éݰá´eË~úq ‘ò¾*ÁÃERöÎR¯Ì:ׯJ â= î&3Ö÷´Á]iÃþaÇùù„èÔ}–7ßýAj½µ'Õ}^f›?)çmדn” ø½,}x_žwÖFí#eVKHråèÆIÑúUK›îÉŸe£fòÔ±Ü|\ÚVeÛf´~IL6H»ö•vm#ízʹï;NèÛ¶íèiò”S ]rŸaÏ‘€ÓG±õÿ|Ènçú˜pÂéc¤]Jþ­ì92ªá÷ò×_¥0i’Þ[ú½´y+Û_9vp¶rkkëvPú'rü(/OOÝ¥œ”шÛöòçp'¬_@ÛA6/üó`•—­ždúÄ)Ó¿.}¼[öéû7Ä:zªõþUÉãùzèk6µ]ÞLî=h×åÅ^?]™‰þªøÛ×ï^r…GÛ +Û2¨#·Ç@¯íŠææõ“¦L¯“j.FWªj;€f†_í“N:coí˜Kå5-—A=ÑÚÜ4ß/ß`¤5 ¨¿m4¥þò"¼UÞ—£6ž+¯NGk]”¬ Vsr]kGWGÝßk¯kNšhý¢ú#ù¼‘@µ<œîÓ¤ÈĦ£ä³ûÝÚ)cn1ïßÚÚÚº¡;G¿ž6w@Š}/HSn”Ê>/dØMâá8ΓQt94ÆTíý²R¯'ó¥>çËp åNœ<í+¡cïœòyûÿ£d›#:ü²òYo„Šœ)y“ùÒž7{pKj9rí{ FÛÔ€/9×~¾ð@ pé“R  € €@P3 :ëf _Z?¯@@BÐÿÏÆzäÑ,1ô©öžOd$x½]È„&8ZK0G‚ÑZO öˆäý²wÆþ’²&HÀð4 ­—€ï÷%@oj€^‚4{ȱ…ˆ’öéQaºZªëÌëŒÇÖ–…"É ú“ñx|οßþÓ/Ÿ{î¹®dÓ¼Û¨o¾DÝìS޼~$3nƒ…É<>Ïe*ºGXªäX³êPßmmi|¯;ŸS;¥î„µÆ~mgúI;'}9XúÿYSSóƒ¤O™^RMô´]eã+vGBm÷._¾¼_·G"÷J»¶³åzM\Ú¼à¯à9µÑéÇíÌPÆýùÆä[ûG£6ÎõKùói›*ý»Ö©5ç¯X¼Ñ$q»îþKú6K¸åa±Á¡ÃåOÆmÞƒ.϶©¯GÍIÓäîν’ƿСÌQO=¼h³n}ÞW=éÇ‚xͦ–¹óžc–>L´i„þ»«ÜsVw˜_¥.A`gÓŽtGM•k0Kòî”z¾ßvP×bé’ùOÉ](î’:ì{®þøÉÓï“Ûæÿ*½Ní„ï×±,Å`dÙ„N¹+ÄÐ<†b@Б'ž¶ÍðpÙr]¦xséº|¦½'ÙÁ-ë¤Mãäºî-¯¹|º¦<\þ’,%Pߺ¤ég})Ùr‡EÊï”÷ùI=åÕ!Ÿ‡2E¿'ïý1²mÉÝCô§äÏÕ±XÕ}•™íؤ&u­ìŒô-¯‘Þ|qmdÑ81ϽƬíÝÃÄÞ@·ø•iÓrð•í\Ò¨ô r> € €@@3 ‚˜u3/ƒAÔßà € €ÀÐ õ¾qÝ­ηò^îà‹ä¶¿ ‡M‹M$ðªÿßÄè´ÚeÍ ZzeîïŽÌv–[?wã§ú­Á,3[ß’œ_«ŠDÞ–@•Qþ-©â,ù#q¤Äã±G¾mgzÊBη¶.YðD2=õ¹û¶¶WH¾OISmài¸ùUÉ“½ýŽ:QòVHPìnYë}Zjy²íÊÚÈm°ñ` zU™Ší%ð¥ì-±³>Œ)?M¢ù’]֓޹Yo3ïWÀ„ ߨ’œ·µôyÖÒ%At{šÛÒ<ÿ1y¶|{¨êï'®¥]ÛL[º¤éîôŒv0‚ü¹LfðW;Z}GZ|ˆV§Èíý§ç º¼ôòS÷íkO&†Þ/ia Þ>ß°áèǺCBj=¾Û¼f“åŠåɲíÑå%ý¸Ì¨ÿ† ȘQß=àã¹]ùOBÇor°@×â£öø…[—‡&Èkn×pX5N˜0áó©?jOž~º|Ø÷’Üu\_´tñ¢¿'û7øÏƒ> È‘@ícò>꾓‡ú¿XLM}ôᦿ¥ö]n‹_-3ú¯’Ï“ïŠãvò™ò°Ì ><ë2Ñhh¸*³`¼à­ä—»ƒ¸g®|óOϦ’ãvvóxñŸ)¯Ÿ²åËïó শ%}{ Rän7Æ";Ý] m%ŸÓeàÀ¹¶Ðt´£ÜwÓëkmYØëŽ"rÉèwPƒ[|êËiÀ—Ïy$!ˆô@)@@À 9Ó ˆY7ù2Dý¼*@@ G@f#/÷ ¢§¶ðчÿV3ùŒoépäw6ÝQ¡9*]¦š›ã©ùú³-õ>µîƒ×ëk}_»Ö¬¬Ñ½PÊ•À¯ª”ÙÜ»´6÷ÖI »6—zã&6/¬#Sm^™a?¦¯sl]‚%Xùæ v6nŸ™Éý@•©ž'µ‘²^·-¿Ï@ºLM=Í(²ß.{xþK}žv04¼r¬$9^²«ÿ”v8§];3RnYwæf¿ zjAº}ý•ªbøÙÒ¿,Õmƒ†½éA——ZwúvÍ”i´rš%=b¯ÏÚ®ØÑ+–ý´ÏõˆÓËÈ~P¯Y»Ä–Ì(ž'APûBø—ioòS›¸-·ã¬“; ï³ùA_ ;3¾6ZW'í”A*zŸðˆ/—x¯ššS¶•ÛÏÏõº`Ì/–67ÝÙgãò}pÉ]ÎI ¢ß¼öƒ×.ðû,ë^[þ{2 åå{Ÿb+MXýX8’?ww¨ÑÕßëDÝÈgIû{ge¹Ë…+ƒ~!eü"±Fyÿ€ƒÒ=¨b¥Ô¼R*½í½žeGníþFkó‚·6Õ¢|Y®7xá¹çìíøß—;‘¬µ¯wy´õgÍû ·xµ'ÿê߀¯äY<#˜@â#GA € €[ª€7Ó ûv}‰™c—5Ï_–z»>kcg´.i¼%¾á“ýû²²³näG·VãÆmYÜx´ÌÞy2mdº²³nävqWH¾{¼²ì¬åͺñvm~»6ÛÒ¾&kF&G®{_íÂô?©·½ ¢þ¾úÇ1@@Âh}h¡Ü®Øxk‹K0ø3µªú«j©QïúžÒË4Æí™áiLdïôã¹î¬>y}c^g¯Ûþ[FÅ/Hÿw¶_Nûoe LÿÔ“@ëx{Kz¿|6m┩‡‹×W{«ô~=ܸ¶A«Ä#lKnöë¹\/± *{Ngg×E›:·µõþåïØ|FéÏf亼Œ  Ošv”Dè’àe™8ÿÎ ¢Ë@‹,Ùó“Ðkvër-3wõèD#MC–@iÿû‡kÑÒÜôsù^Ùd#Aà‹Žžñ9o»r¸Ñõ6rìÙ—`»¼<†ð!¯‰œ™xÌÞÙÂ{t %÷s}>ºvÚhY:á/¿Ü¡¥¹ñÜM}–-mn”Ûø»v­m‹[3eº½»F¯Ç±“¿)·î×WÙD¹%ük+ß|~z.¯ôßzê³c¤„zH9*uY ŸSOÊ—a¾ÊMÈ­ôíCkÔ’ÛÃn‘÷Mÿ.Éš¶ôîKY+êðõƦ|e-„ @€@úð8@@ !°q¦ìÛ™m2ÓÀçv}©^‰™©)™ÛvÖM¶[W¦æ¶³n’ûÙgÝôÿË`°õ'[È3 € €@Üžl£1ú3Éíü>ë&Ë—ÀÁˆävŸŸinn“HŸ7ˆTYÉuÕ³ó¿­Íwÿ2ÛÁôt×$‚â0‘¢õéÇ“û!N“õ¤»Vw>LÏõÙÞ@¾WØÙ‘dqþ{âIõÇæzn2ŸØYíRŒyÅ’M¦oâ9‘O›]Òó]^zùvÒ”ºñ!'Ô’©gÛÔšcìÝ üòFÚ&^³Ý·ç—àï-jÍü Úœ¯k[×v¾¼bþ!íŒÈk¸I±ÇÈ+л³‚ÜÓ}–]†!¨>仜 U–;?7º7E܈MnƒÖšµsäšÿÛöQ;Îyé}-s*ÏÛX®š™Ë@žô26µ_R¤‘ù2ÌW¹ž«cÊí³|vvzû¹ü•‡Á-©Õæ:à+õ¶J€[»%I9 € €À,·™9šÚY7Û©Qݹ³Ìºñ¾ Êûóe0Èús,‹l € €C/ÐÞÿkEyâ§S£Õ>ƒÑ"™™Ù%Á¥~?ì ÖQeÎe}oÛÎåNL2ÛSUæRqÍßsɗ̳¬¹é9¹ÕóÒN R›Ó%ýJùÓë¶Í‡E£¶n».¶ºÝÿÄ÷Ú™¼ý÷;23ü‰Ù…Ôc2þ))ïŽÎ5Ës)S‚Ñûy•–6ß!S_µ¬U/qy¹¢RðÆm»/µÈzîR~â®YFg¯ƒ./¤&:íK2…w©¤WJò™Øú¶ãž\Þì ˆHÏ[(û›|Í™Ùィõ_²y=¾ØŸ7öî5Sêȵ¿DNßÃ~&Ø>=k#·t—’yWn•ïÝý£çXv@Š °o‘¢dé o@ʱ+i^@Ñý."_†ù*7ÑÁÄ$ù@ÌyFº´g¬÷ZéÿÀ¥=å“7càRt¿|¥Ë. >`B @@Ÿ¡± v¦Ak€3 r•µ_ÊåDZµòÅ­Jþd™uÓÿ/ƒÁÖŸkiäC@j'~v÷»²n»,QÖOu{ÒëŸxâôÏ:!}ŠûN¢ûʳa$·’)>KÀ¤Qª¸]ªÜMf}~Õ.Á”ZÁ¶ºúÙ¯öÒâ1ïVÙ©ÇsÝnY2¿YÖ­ÞJav}êRß‘Rï‘Õm’¾ÌUú§KÕê¿íøÎ%çØòUÅ»ÅüÞvÃÆîäšzÏ·»÷mޞƙW{6e#èòR˶ÛÒ´/ÈŠËÊÖpDÿ¨Ã=öéå÷­KÏWLû&|£Júµ«×?£z–-hò}-–=Øøxí”úÊQÓäQ)¯? ›iÒnyé×c ‚ä½â-q ソô·ç20å•îsœQn•½«Ç )exV4™'åÐÀ6 m@J¾ óU®§oT¹ý0”ë“óŒtiO —R_ýð•z.Û!@ =EÊ@@¶t<Í4ÈÆºY³n6ãË` õg+Œt@@B°A3;ƒQé2«±@&×}*Ö7I°uŠ·ºã¾ÚÎZþD=^vµzY™øKŽr—èìÈ|4;¾nÃOõÈÊ%p2ÜÑŽ 0ö ¤K£N·õ àŸ¼ÛK ²nõ]²žrk$Ty‰£Ì)Ò§m¼à¦ÒS¥¦Ô¨ê—Ì”ºï.]Ò´"µšÊPhž}£Zdöß8F¹®‡.‹ÑÛ¿djº+³Ó]-÷«7Žã:’ž<®]õûžóe#èòR˶Ûb‘ z™½ rU·©*7ö5WÔt5¬bŒí›÷p¼Û¥'÷ôœïkaçjóW¹AA²®ê’`zñ=2 èÈO³ï5o0Šëš¿ö·÷&{U‡åe-vlÕ ¤Ûrå£kG›.3ž`á•W`Ròi˜kc ½‡¶·v—Ox•ÛŒô|nI6‹g†J€@úPÉS/ € P"ùšiÎ3àY7ýü2xýé² € P°ã£ÑFØÁk ¨ ¡¡ößá~BÚ²“¡ËLôGåÖÚ‹ÖûÙŠ ÛSÛX­ïu»õÔcݶ·¿–»x3vu¢ ¢$×ñž=mg Àmëö5´.{þãýä_òôÝqãÆ7z±ÇJÉvÁIå±×è³RÖ²>¾uÉkã=Âþ¡*“ËÌ»¯/mžCòØæ<]^FŒúƒ €ø/ -¿}Â:ü˜ ÿtË‚¢ ¦GºôJJôTî¤/¯‹`ù¾ÇE§}Z‚èWÚÖÊ {Óÿjù*i_ËÇÓƒA-e³•9ïn¶µ²lÄûýmµ ¹+{†"h•ø,•BRË•Ñ,ý.·¯vÚ€”Ô¾æË0Èr7Ú&îæ'žÓ²o Æà–ìµsü HÏ¿15 € €¥-§™I´àfÝôïË`ðõ'Kä@@ Ð†©Q{õ´Q›!¤K9â„f; Q‚ç1eܳ[—ÌßìÛ¦÷ôm37b&vWX…§I »¢*>EйÝ¥uù7åI&«¶õ]±ûlZPçž{®KþØ5Ä—JðþUe¡ó%Ìy©:CZ9>æñæùÙúZ[ïÿP¼/^ÛK÷smCÐåùµ§õÁÊ-ë¿#ˆ·Irì6¡ŸMÇòåË;üòzZKKÓ{ÒŸÕÒŸQÒŸ}‚jo^¯E4Šhg¡´UÖ©W¯ÉŸ«eÌÊ=òg‚¬»=Uî|pwPýŒr2 ¨ËYûfDòš)ï³=ûÛ^'Ù]uGŸäf=K%´ÅãoEBr? yÈ{wã] ú[_þ’/Ã|•ÛCÚ}7?ùüÌé³'߃[zÚÅC$øÄ¢Ê©@@ ø¼™ÝÝr¦-Òꉄôïd3*_²åÎoæQ¹óâ×׽ߵCËâÆ£[š°tqcãÒæÏÈ×ð¾gÝØ/ƒö‘ã—A›5Ðúm<@@ŠB@fù”lh<>ôôÑ»0N‚yva;Möú¡ ¢Û&<úàÝÏÊdݽæh=Õ>'ætû,3—$g©wôɖݲxþåÊèï{K°¶Ì8‡÷ªÄÞæ^ìÿ‚’i¯c›³ty>m[Ùß.7ž¿ªûÐW"#v¾?¶ûÔ58IÚ  Ê÷¸ýíGž®E­©:_^/‡Zy}Ÿ)óEòbn±ûŽÖów\°{ÅñÈ€ '››×»^OóéþöØ„tâóJNtTן“çÛ÷®ØþÛÛ7ºßå&ËÉöl¤H»¿c§ HIüí¤<¥çË0_ånd0e‰mÓŒt;¸EÞ'ÞÝ‚¸´±l!PÒ ã:Ð @@ hìLù¢ºÚv È™é³ndÅÂúÖÅM—67=~ëÊÜðú÷e0øúsk%¹@@†VÀþ;P–оضB>¹mÏ m‹äßÙŽ:"ÙYÓûÑäöP>ËÍå½[·Ëw€ƒìÔIÑéKr_Û¦xÜ”Ùò±O6ÜãÍ畦 ~°»rñþà=ˬôZU}¦·=¿‚./K[Z—4]¡\åÙÊ”Ýkͨ»²d-üdc¼u±åu±ÃD]å ²¤Ñy¸¢S÷3ÚùoÛ>yßÏ— ú »íªŽsd`ÈZy}UkåÝâÝ&ÅcÀ‚º,È€•/H‡ûKÒz¬E²v­Í‹þž &cä_ñŽiµïaÑheê± ¶ j@Jž åõ˜¸SJÀ×&ᯌÉiFºwNž·ñz  *п¿ÖÆù € €%*üLƒàgÝôïË`ðõ—襧[ € €@‰ ì4æ€3%г§í–‰/³k‚ySÖŽ;VïãaÈáÏÀî£{hmWü'%k³Û¡°šªµ“”óê£Íÿ•MŒ‡ôå2IÀN«¥ÖgÚ?¹^‚xw§]5qâ©[¥ïïvÐåõU‹^}¶sæåqÔ´I'×Ïé+¡“kr½D¥½Y­!¥äNõÌ üZÈ d]ú…rׇr™=ÿž»aÃ…Iӥ͋þ)¯£™v?y‹÷ä±B~b@q•7ˆHú½kM´nj®ý=ækSw‘ÛÉyùåýùLúyr+¹ßÛ4yãVmcFŸ~<ˆýB’/Ã|•kíeíúÄMùêÊùZäapKÎu“< ôù®<×Mñ € €¥"‡™AϺéï—Á ë/•KM?@@¢ÐzûñãÇw¯Úëß‹‰'O—u¿ÕÞQù7®Ìl¼Ó?çভ³«¥Z™ezJ¶Ú9ñ[ÛÞsì/$B5Âæ‘ÙÚÞs¶üI÷nÝ®M³WÒß’g¯]$›?r»ÏujO®ûf÷ €¬Å…FTL•èfbÐ@W¼WÀλݰë^æ¬õ6ÎðáON:錽³&l wâäi_ñËty~uô¤57Ç?TkN‘ë÷›&?â_4)Z×ÜíÉWà­ÍMoÉ«ðÖD3õ.á;/«©9eÛlÍ>>zÆŽªrÇû²O¦}-jtõLyßÔÝÎï-[öÓä /I–»SÞL¿¶;År‹÷ uj3W|hû-w¸ê˜c¾5ÜnoêQ^¾FN°ëÌÇT\]”ž¿­«£g‹|žÍA€@ú S € Pêy™iü¬›þ} ¾þRÐ?@@  ìlÒ‘ÛíõJíÉÓOO]븦¦fØÄ)uÿQsrÝ‚ræKÀ¨B‚Gk]eÎ’I\xè1û_ ä­²-‘ÙògKzµdFí¤hý‰‘rp?Hf3àµZnw¾©`´—osÿŠï¶ãÒ¦í$ig|wuÆ:îÙÜâ’çÕL®;Xzz¯ x]®Ë÷jNªßÿh´{Ý^¥ì€k “óo²çH±ôá=ë0'Ë‘µäïc^€]Ú7Öq"Ï×N™~‘”èøñgTØ|^Y“§1iJýeè};r¼ÁÉ2RŸƒ./µìôígš›ÛÜ ŸÔJû½[8K÷z;Ð#=_¡ïÇ7´]-}ðÖÄ–×É‘ªbÄsvÄÄ“¦íiÛn_ŸÇŸ4í@yýžRá—å:KŸ‚º¼ÿœÌžþ/[§´siëâÆÅ>õË´z·^à ®Öºlèn·?ˆ‚ožÿ‘«\o6¾ô{§òêŠÇ&L®û”—d¯emtú\ù¬µkä3¯åÁ¦Óó?õð¢UÚ¸Wxé2è§,Tù‹I'N³·Ïúލ•÷iÖ ~ `@J¾ óU®ÇhÔ{öYÞ¯ŸµO^Ú&þ zpË&ªã0ƒ*@ }P¹© @(M|Ì4|ÖM?¿ ^i^zz… €•€ö’9¥ ]þnm´~]M´þoºb‡µ!GÿJèS½Îõ¬°´yþï ¥sË›ïþ ®Ìéè“¿}8—IPëýÚhÝóÒßÖèQÈÌ·ähDÅÍøä,`‰€l»ó˜Þ¨™Rÿ]ï´€ÿjyhþÓr{÷¿$‹•Ƶ<ñð½ï'÷7û9¬¾jϵ·”–ër³Ì9iŒªÞ ·—~Mú»²<\.YÇÎ6w„äÕþÉʶߠwݱ¯JoöììL}Ç™£Ãú™‘ÛEÖ‰ßÚŠHÅ{*ìüÆqÔUòúØQ*ÝæøÉÓw³õû<‚.ϧŠIvft,®Ž“ëù›2ú®š“¦çhÞXÊÐnÙ>´w¸ãäµéÍ®·×T®ì½¡Pè ñÿpô˜Ö†C¡ßËëw…Ð ‘IDAT®¤WI4û†[<àkaïPÒ‘…rÍËä5²¾£3~N¶ºk^ðª\‡«ìqyM?iJî·:ÏVææ¤ö€ e‹çß-ýþ•×o­¾ ©?Êûpz÷ݼ «¬/¿Ý¤“ꎗkù+¥óý2k_Óqe¶>¶¨µ·Ë{ÒûŒ•>}F‡CÏŠéÙAM6hnϳw/°Ás;hf¤õwv~œ­¼lé…0 %_†ù*WÞÉÿ÷‘G½î„a¯IÍ”éçw»§›5¸%½\öjéC}¨@( g>릟_¯¿D®3Ý@@ŠRÀ¨5fmLn¼>HPU8{KÐ qkpcÖI,öêµ¼ö¥e.x³Ðú¹¬yþ27î)?oæµ´}˜´ý‰ê"Ѭ­$PùK ºÔúPÓo;ÖtΕ¾®HôAMÞúcÜŸ$Ëvãª1¹=çÖÅM×I_k¥Œ'{Ê‘ëä „™±ò,“ˆM»¬|c§2c½Ù={o¬X±°½¥¹ñʸÇÉ9¯xuÉ"~aùkã­‹½µ¼Í}±xìPYãýÞ¥lÜ º¼%ûo-¨éÝx—:VÚý‘÷Zuœÿ‘€ãxÿÜ…™úó–+×}ðÚWŒëΑ@ùêžVÊ {»/ƒ0þ¤â]‡›ö÷zŽobc ×bävc.‘÷ÎX[¼.yâgw{²U»òÍ®—ëàͰÊ[¼{ïƒÁdÖ¾;VÞkó¼÷޽fZ79¡ÈßdPËÇ2鈿ï„ô2¹–‡z–ÊÜݵ®í 'ž¸÷“l–Jf‹Ë —/×üÈ–+ïÇrÇÑ vPS•µFÊ^­+G|`¹ØA3ö3NóÙ\o/ŸZo HÉ¡¼lóqmbë6Ü.ïÇÄò2ðH¼üCþüJP¼-×ä=í8?.+ }9Õ¸{{Àƒ[|Ê$ !Ï € €#ptí´Ñå¡äKð—z•(·¡”Û¿ïù‘DÖJ³·y“‚£›e†ˆ÷ãV¯“”š>ÑQºÕþPfÉä<™  ;¥¼}ì—iùÎýoYwíD™žqŒÖΕ‰"Ì?Œ«oh]ÒxK²È ¾Q9ìm{Ž—fÌ»òü–|AÜUŠßE¶e©{FËâù÷$Ï ²þd™<#€ € ®À¤“ëÏZoVÿdEsóz[sMô´]*§µ»»2zùóžkÔßÜ ï®X¾|yÇà¶n3j“Û¸×Ä«rÃf?ÇÕ»G¯4]îo–=<ÿ¥ôÒŽ?iê!Ž}Ú1ê~·XNÏ¿9ûrëõäßù'Ë?Öß•5åw“2üf†oNÑÞ9ÞºÙñÐ>d#ÿnß]+Wn[¯_íRñ?Ú™úý-Ø~/Е_”€ÝX£´ùú–Òñ7͆ö¿¦¯KÙA——KEžGbýÞáˆ9X‚³ûÈ7½™xüÙÖ‡î~~ ý*åk!LùlÒm2â`±Ñæ ò^å|v@67¯}ÿ+W¬XóÉÑ“ä•)³ñ%˜}Oë’¦3zdÙ˜8yÚWäN·Ê€”}¥~ùjÝû!ß×_“Ï ¥¬Ÿõ>Ò÷^Í”iò]>$¿¨ÏHΌɟvÐŒ¤ß/Ÿs7ËJ/-×~L<±n_'¬~#MßZÚÚ!¿%·tIÓŠôòúÚ·KMÈçÝÍ6œ¿gâî|}ÑûX¾ ƒ.·&:õËJ…ÉU–;Hô~Øßeä÷˜,]ܘuà”½¦r÷Êëõ3~¯Áë”ëÝ‹ÇoyôÁ»Ÿí]Cb/×ëêw.i)ñadá”… € °å ØÛãÜvÌÕòei†ÊG¥ ;Ó ÖU¯ºV½¬+wLŒPÏH·çv!¼Y¾d}.£,oÖù¦¥aG¦—W—/•zÇ'ò™+[75¤ž³9_ƒ¬?µ-l#€ € PìÇF§o]¦œ•l)—èùl ¬\Rì}¢ý¢@! :*­®T#¾¨LèY\!,ƒ‘žw7lxas¢¤ZÛ[º7#>¯Ð2ûÜÞEC¾›××w¹¯¬xdáÆ;¤žT¤Ûù2 ²\[V…ª>ÎÃetÃŽ<W~ÏyµsMÇò>ï6rMJypKJ7Ù,qé%~é € 0„ÁÍ4pÖÍf} °þ!¼T € €@ µÑéß‘5‘½»@ušøgëHZ…!€ €C&@ }Èè©@@@@(^Y3÷%¹#Ôþ2kô·²®ùaÅÛZŽ €d d¬7‘™…@@@@@6 $Ö5ÖûÛãê[7a @( pitƒ^ € € € € € ’€£ç [—1æõ¾þÀ ÕK5 € €À  0#}Ш©@@@@(.£k§NoqíÉu—j¥ðÒµºiÅŠ±ô<ì#€ €Å.ÀŒôb¿‚´@@@@È“@e¹³´6Z_î*õ eÔ{Z  ëã¼êŒùõº^¿-OUS, € 0¤Ò‡”ŸÊ@@@@@˜pÂéc”ÖØÖÉ­M÷SÚnyÙ·c*v2³Ñ-@(EP)vŠ>!€ € € € € L`ï=>w"¡w¥”  ÛIY¥Í«23}AÇšŽ¯?ÖºèãÕÀÙ € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € PœÿèÊ`0ª{IEND®B`‚rocksdb-6.11.4/docs/static/images/data-block-hash-index/block-format-hash-index.png000066400000000000000000000750701370372246700301050ustar00rootroot00000000000000‰PNG  IHDRȤ*ŒWsRGB®Îé@IDATxìÝ`Uþð7³5=ª ¤‰]PQl€ =ŽŽ4%JS)"Š'‹ ¤E¥„¢ „¢§EÅû£ˆrر`!"½¦gûüoÊf²Ù$›d’Íwîdf^Ÿ™ÝÌ›¶Œa‚ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€€Nà_ã^ÓgHÒg’ÄD]p…,Ú$ÉØôŒ'î<ò‡Ê¨/P'ªBµ+ذ¡)¿7ºØëžœ0²_°y€ @€14Åä—"I’¿¦, ‚ ù‡a½$IÌ럴ÈÀ\¥ª,Ð6ä”´ÿ÷ÊÉÆæsGg3æ-U}¡Lê6p‹’úÊöÿqû È¼ô¹ö¼ïCYnIeÝ3úå'Lç>}å¥âÐç.ÝZRú‹_Ô~©oKen+}½X† @»Ž[4¦7 ÈüizõÅŒ1­vL̪a·%¼zëà§.ø§Áz…H|¨j(EÑEmC^DþvŒXµü ÏL¡å‡¾h£5®t#q}æ-‡² šÅ !w­{+õ¶5±ØbDƒ!§ØI'R„þI³ùY¬Ã'3'Ó¬j é$E÷þI^ún)±÷•¹­Jl @€ T;ßv[ô•C·Ç›x:=cúœÍ‡Î?1~äðj'Uc\ô6äÊvÌ™Þ{Z®}kÊ„FáÍ¢X8<Þ&aÝOAð:c:ï£A2«b_ Ý–RD#s]î«‹ˆ ëà§V|W÷…”è*{ˆ¿ÓÚ ƒ @…BzY+ÝdÈN{{ݺË>eL¼î·µîHÜw»W?ä8w„n¿eì÷S†•3§=úÔ¬ÕŸðuLUQ ›mÞ°Žlkciö–³Ñß|¸âǹƒ´E¶|癣üYcA`¸…¾*nÂR´é“ )µÞؼ;æÁÞ·d•"[¥%Õv°.m£o¸rúüò¾1¢{ æ6h»l¥5ï¢W´÷£åDKîø ¹ÛZScþ¸è B @€@5¨²Adô, ¿ü¦]Žä·TÏa’”|wÿ$—…Vörí YÍ;’¥NWÚÄŸý¦ÛÛË2YMlŸú<§66áÅðÁÓœ.¶wFìßô5cìÃWiÊØ’4‰©z TÕÁ±^µE܉³w‚›Âø˜TÁhù‹¹Üñu#Yåߢ­@€ÂH Bȯ7. Øzm;o`û×ÑàÊBãef¤«nþæá?5Äžq¨Ï¦ÕË{ö¥7ùz÷,^Â/_šŒÒñ6‰u>ûì³?éË5ï“Vçùh’èv^˜‡ÅX#¿MlýŽ:-ÏÀi«oqþýÕ“KúDªÛëåœ!çþKÊ:s£IdÙÖ(Ëþí#wô(áÙhÞÎ?¨ÎŒ“}øÈžÚžÕ vì{Cïl¼­}χsµºôóñ+NÔ=ó¿”±{F;#¥ˆŒø¾nmëžY/LÿR=‰ O./´½Ý>ã𞉢äŽáuÔŽ5œò襩BËG <ó«Ï8~ź§¿^3Üè²×çñ1–¯b÷þá罯듽\ä6¤vغ~Ó•ÞÖ KËOXÚí–Ÿ“1ž7¶9“·¹²o4kX{Í /ؾö*öãÉï&Z²NÇ´nl:9räÈ3úòƒ]öß_iG‡LLþ—;#í.£%á ×qºÅ[=sÏȧ‹.Ÿž«úTÊ]®³iÝäýÜ$žjߢùæ~üë–’ÚQÒþÁÛ—÷ü²Ë•fý{âÏ…l¨î Ï/»²nT¶ãéI“ä«ÃŸÛxUÞ‘†µˆ?õÙ‹sßð=ƒ<ü¹÷®ÊIûtÜâÞ‡&×í½%ë‘ä=‰g~zû©Ú‘‘ûÎgeÝÒ¨¶ñ½×’çn,ªÍúmFvÚÿÍßÿë†6ov1âì§tG÷¡-g#:µMu¶,fß.Xv|ÁÕ ÖJû¹õß¾E}iéò2þìóöê×{Mù¡‘óûÿŒrÓg] Ï`ÃëšWçÏݤù?Bñg¿]?9óZ›È":ƽ{Ù‚…)Z¼¾+T¶xä™7ç:pŸ‹Òóï“Ñ}üî>7lô3Ç|ii[Î\ÇâwnC'ªûø„ޝ­øng¦ã Ùm÷˜®k~ä\ïÞOVÉ»|}À @¨B!½‚ÛuÜ‚1ÖÓ?.22ÓÞIÝÐ,`?H–^Ó’ì|·ùc”ÐsYn·ä‰æ=ÏfÌÁš6éÔë¯#{·Ìë Û}W+·òÒÁ·Aã˜=ÎÄ‚i”5Qp¤¿ûÂêBK*L7u8öpDy´d㥋—½nÓÖõó>“WötùÒ¯]ù)ZÔ÷Œ˜¿`Å›ù!ŒÝ“4ÓfL?4]–¿ì¢>½i ƒcí*;ë¹UŠt¿5昉n•ÌO—¿4¢CT³û'¼’–BKt€ÜcØ“[Dû… ¬®«@´n%¨mHé»÷%™h>è&Gë“V˃¬)¹ÄeôÖlÿ¾Q;Ë´Ýä¦QÞî´ÍMEn?/[ÐÛ\»éà” Åµážñ)ç¾[ÉËLˆÈM]¹jí ¹ü"þ)Ê"e¬NBã)gèÍÝ>H74÷tù÷¬ŸøßÇö~{çþ÷¾”GKÛ4!{Ê‚”Ô9Ú:Ÿ½Ð~ÔkM’|%±NtÖ²+Ö?¬/çî‡fεdšÄoðØüâVþY¹³ÿè üÝfT﫺zµðh³´+[¬Éìg¢“þîúuµ ôh¿¼ç'>5:3:éë´gÍܶfõ†žâä0*‹¿¤‹ïo£;šuŸ°$­È´~eùÜæoß⿺¿r¢©ò½EƒWƒ)Íåq%úUO«ÙlÊà×b_Ú4k›Ñ™À">+oø¸ŸÞ(î‘c(\i뜷ìõUüejì.ú^‹*òsÁXb|Όז­³ñ´˜ @€Jà¯*uºå+©THãÂËœ|9ÿÍæ ŽëÄšçµi{ë ÌZk[B„'U>ø¦Ò=t‹¶68®m\Ö½ÃÍš\yÛÕÌ¿—å•,ñ½§ ±ûÿ¯W4eðxmŠ0¥¶jÞªK\ý6#Ðäƒ>ÀêßÊJâ¦rl7>°æÛ\sË«Ú^}Ãe,%ÖdPJ‘íå­ë9áÍ~Úà˜0J×£üý•±³êà8.ڼ쪶ín°$$NÑò~}È»ƒß%¡­óùÝÞèçò ޽¬a½Ø±×\yóÕQõšŽ•ÔeéÓû–K³ôr=ÍïíÂóžÍŽIúoÊPßËÔø epÌX‡æRíD’|‹.¥Ïß!•šµðl§Ð‰ŽM––بA¾I¦¨}<•WŠŠ|ÌPyÀ¦äb¬ó°§·hƒã&õ,cŸ}`FÔUwŒmì2˜Ó´4|ÎûcøVªå²|nyÝùÛ·øï£üttJH»X}Úžò÷–9v—Òh6{íÓ™ÊàØÃê%DO¹Š¯ùâ#ØèQƒé™~u¢+ÇNßà˜ÒÇÇÍàû{l£Ë}ß5'3Í“ÖÏÚŠçˆo°Øb‘¿ûøz¤Õ²Í•Êÿ³X£·ÅÄVŒ-¯  @Gï•ÑAëî¹éêñ?—£®mâíõÜÜEt›§ü‹Q¾+Kw {ú]¥Á^6¢CŒ|5uKÑŠëÙÇöq{—úl샠ˆ‚WÎxÂH«a›­Oì 6½gçßv(I«º ™ð½Ù•sÍ‘ôˆé­ý*¿ÉÓwÜ,Ř×&ɰµ"œ©«V­¤»Œ¼‘p›î¦¼Êûó1aƒ´5i¿*î<¸½7okœ5‡®Ž­ë¹Ž-çÅñéÊóÄ7Û–ElÙ¸I ¡;?ÿôÛJßèŠÒ-èʺnÙ~•ǧQ¶×^›”)0›6qØ¿[µ‘Gt~pÆí6ç›[šnúââÝ<\v÷]!=æø0é´Pš¹ÇËâøq?U^´&gðʧ}ÿÁvê;ùjõeõ\c…žo¼µ\_Wy¶[~^ƺ\}õøg“Ò•-[>ùüóW8Ù KÀù T8öž¬¿úÆ×Ë;YMî}lÑ7é~òê›nɇ7™÷¼t˜®+Ò3ÚÓ~˜&×CWvÍk’ävXMŽ}ÞZ}½®Ý¼_‹ïüøÑ´/ù·«´ûÇû³îûä®Áï‹¢²^Ýé:JåɬŸNú9‚V¬¦Ì}OÍÚPèê¶½ùë^vUc/¿ßH¾¶ëÉŸYé*ñÑtÇJ+_ñî¾Bªkú0IÞGº´õÜ0Þ¶ð›… ò¢ø¾Òìnê#ÿ¼Ô‰Î¡«Ûë \Ýæ‰Š›ÌÖ\WqñZ\Y?·Z~ý¼¨ï#}š&õØØ… ß\¬ »­Û°iëÌy§ò°z±žy¯¿¾BÁ2Ëý %QâèÝ ÎcÒ:4²<úÜÜdúôM|_›Þáà‘ßápõ¦˜9ï¦ü››ÏQîèp±õôˆŠ/ @€J-P!WùKºµ¤ßÔ #Ïý!6ã¬ÎmºÁ/9¿õõ9Ý3ZÄ-k¥ZQö³òA÷ Í]] ÝjL ß¡gcíõ®ËóœÍ¶&¸Úª”—þNÁ1§—Y}øVòuÚ‘÷Œÿ^˜¥&gï<½NYÎf«V­*|K.åýÈ—Wd~%×/,ò-®&£÷¸V–oNyôÏ,óƒùHµoƒnÊi­ Ž}©Ù½…¬ÜØ&óxH–ÝÙ”Ïyžˆìcòö­oá78æIئì¬Ö'9 ÿ¸<щô»³ždºzKsù¿C{Öföty\ÓÈÓ?yájý€ `éåÙnú¼íšØ{ù Ž•úÈòe¿çÓ•¯|…þî©ït¶ûŸ:8ÎõÝš°±¥t± o­¼^78–KøpB³´<õJé7§Ì-´b»¾;#Y¹­ÚAùV_hÿ7ÔnûŠ–^›—eÿàyÿkK¾I¹Ã!šMÿÈp~=B¾}ßÃ6Ø6ܤ•̼YBî¿Á±œÍx݃£ø‚Ë#&Êô%ãd_¦_ŒJo[ñ®Í ×?Ü/gäI]µ°`ç‹vEîþçà±?øÿ׫ÿ¨ ú+åeýÜú·£¨ï£‚éláÂå…? ׎{ZIçb~ƒc9ØøiOð¯¯ÿ®Ú‘º¸™ßàX)†ÏR½kåï/=Æà›èD–¶¬/G À @ xßUðYJNét[šÎ›·ªÕÛ‚öSlo´wþjZÝÙƒ´ßY}(ïsMâ]3–½¾V/ˆ2ÊžD»¥ÿl†ë:­)ïŒ< l[_AƒcíÎy-Zž ’£Ð~R–ýC.¬¥àð6¹£_ÞʳR»ŠÞ®‰«—vkµœ.ˆÞêAùÄȦò‰ õ¥{ò÷IÞ‘näEz%1à³ôR­6òÝEH+®9n'Ñír^ãÿiÆ;íNåÎÞ®2~n Ö]ô÷QÁt"]Ü¥ß=ó›ô·`Š÷K^ìêNzÔ`ëÖã‘ûùæÜ—¤C @€@ ÆâËÍŸþlïîßõ™•ÁcQfÏ®uÓcºÒAÇêñA` ý¥‘üHç‰]Ýxc­&ï¾üÐKt•ÅM·–åÛTÓ$(:(úò¡oz÷<;n/VìéÊŒ±R"Ïq]c7Ý2[ôd½î±ålçcÓµƒþísûo¥ƒ¥ñƒË–’YóMRŸ§ÕIˆœ³dAÜ2A°ùúï<¸Y¾›—¾rÏ~ªg‘IA¾:ç=üy"«ÉE?Çx°Ud!AE¤3º\Ûlr~þÎó›/?üëŽ-ê×G¼[žŸ:”^DUôïY—g»7`LìÌéC‹xT“ø¼ —½eãË•=ù–´/™Ô}©K[a‡ï… T–ýC+vûÜA[ï:e›E}¹V´9w×ô¹kKy²A+-¸¹©U¿/ر/(q¡1£\€÷§Uwð~¿PŠšÐ‹ôZýcÌ"þ–æülôbƒ¬ûè‰Çä°r|nóËäKELW1k£æ}Þêô÷o/—œ¹èN¹’ÈŠ©¥B€ -P!d~`i5GìÒ^`%Š,3>6z{ïMR{ s!õ­Ò›‹“|¥Êb0ÈWf‹+Á£>Ãi¶šu÷!—C‰;esñËË^ÉÄoy¤q'¿0Ä'/k•+/ñýÌÁÚüÒý”“ïöò©‹šõ›ðj¿œc?o0PœËãMfØ0wÚÈ[§­„Pþvbõgœ¶—°/ù·¦,û‡V=2p˜/7ˆ•æ‰ñÊsò;ö»¿¦“òÙ×ê®{ím™_K>›~aº×žÕƒ‡ÕŠ–mÞ°¶âêáç6¿•³Ô~†+R½BüØ®Æ/Ó›¬µÁ1oA gÔ+§e¨€ Ô Š;H ±ßÛP–6ü…ÁE^AñüwÂ^5ƒ®öSMú¦˜ L}©>”1þ»Åüçjxè­-£Òb…„6òÝ“™‘“~\;º–®Ÿß—|"1BÍ{g‹õ͵úùËÛéêšv u^Ž(2?˜Øü ò@² }~ÐÛù©‹^úàßí~Una7°g>ðZ)}W-‹ˆ/2¸¤+Ó¯egƒè)örhy¶åU]D6xSþo½ÙèÊ‹“>Yõâ —9nÚEWí—>7üªÉ*s%®É«¼:¾/5à×®ë›U–ýƒçï)£¯'TËü­Ûy{gÑ;lÈM_O¿Ü¹Ûèz÷u{ Þæ ƒL«V,{¸¸“*¡hG¨?·¡hS0eÄÙ3Ô+ä.v×èÕ…n‡÷:³ë]Ž|b¯È“UEçC  @€€&PmÈt5WÒ~â„ß&:lØÈuüwzµŽØè¹½{øo²ªÏ¸±NÀŸ±ùû‚5é·Ío¸ÙwÚn1î/¿TLr ü4Mþ[±EöÌfÇùUól­´:ùœçÕ®Ò™ Ùi#ž}“?£Ëîžú^çCF~öùÚÅÕè·š#yšÄX³ä~u1áò)|9ÃÕcðÐ[%i§ïöwê§0pÚº[z&_ Tó¸Ñ–ñeþ“V#FŽXÊou–ãèŸñ+²êÞ;hÜŸ¾B´ˆ ç¯ÈÙøÁÉ¢ý÷Ææ31Ã'/éyO¿Q’öäí­£ú[dy¶¹xU—lgd§AC†}&xÍw÷w4uUç>ƒ‡üP\¶¿5÷¶<õêíû?›~ÔÿPqùB÷ÉSg*£‘õ^['35ٖ諃úòÏqóÇDžûU¾ßÎʰð“6‘ÙÇå7;ujéºUnšØšÉ÷3ºÕº=_õ³cñ¼LQp¤÷ýÌ1[ÏKr¥ç†GŽìtFîð½œ.ÔõêË+ëçV_ÆÅX6Ôk¡Ôkbc’†ÛômèÍ÷ôCÓyØ©L6X§ž³°7Ÿî{ÅF¿»­ÿŽÔ¥Ç" @€@e?,N{f4`d1nºj¢{%m‘)?\8vñÝCŸên¡ß ¾gH¿Ï;°ß±»ÜÌľëŸÔIëÌeõ¼cLZôG ‚ì.ã5SÖþ/“¿QZ4Y¶;œ·8lWÞZMW¼Þ}!¡ ¿BHÍMƒ1%·1½¡ú(¿Uô½Ç~¿¯ÿƒé&KônG¼yìíçRÛò ½Œ¹ÿÚ±<ÇcHœ³ùûó÷õ•n0Y~tºœÏ_ VÚIÛ†.O$ÿd墽_!šs£×”ñ¶7¿Ñ¢‹ÚŽåÙnê\²æN½§ýdïK.N?~Ë0¼EµaÇúÅ wÓï:óÑõ«;Gén€ÚWN)ò™rÞ§@WsyxQuð8mÒì}gpx6wÚÖ[Ù-Ÿó})uϱÃ|?¤ß;â¢ý·¸‘c©öA‹Óóú'æ}åo­žübên^=Ÿ>˜tù]†MKÈ;=P½ÕÚDnEV­õCÉ]øß@b½›>g§`)6¾W¿¥Hkô6¯ dy%wŒ(³DËJˆ‹ùo÷ö wð—ö.µPˆ:,^t@?·¼À@} TQIéJŠTæûƒ… ]éí÷ü1‘£é¦éô]ó˜Ù¿='/s túW_»+úš±£zbQÊësä(ýG§éÖìwéPÿ¡ã¶å¹¼W~·&)ñɲÇR¦Å¾ŒX€ @(V Àñ{±)ƒˆ4Å5ÿ‘'‹6÷‘Ü—ÄÔ¢Áiåvaù8½Ø[?Z=³§±Þåü OžìNg'·3G¾5ZÜé=nŠi¼ðù¥öÜ‚ú¼_­h‰ž‹\- -(w¡jÐ|ÛèÈcѽ—ÕöLi<˜ÿ¾+ÏËÔ¼uèùÎ-ßô¿-Û¨ýл%_zïr::iuÅFxRß^—Úœ—¥Ÿ>¦+ú¾¹è÷^%úÝW%—5ˆ˜ÑdPŠz…I ݱ!¥–'¢NªVŽÝW‹%6jÕ_ù”ìªå×¶¡¶î?§qGz=jÇ’ÚG¥¤¼)ß~®¥Ñ¶#ß|°ª…óyy¶›¿‹Ãß%Á2E«¯È6PÃ?ò=ÿm`ÏnÉ8¤o_ e¡þ•;y¸ÿíæEÖ¡+D´Äïå«-êGÈehQ[_ì¹[¸²ßÕÊþ®¼ÙÜE?ãÃã¢ËŒ±çñe«Q:ÃçúÉß¡¨ý£óÄU}ùà˜Jgk§¯íª/ƒ/ïxó…!Ê™ ›8~ |•YKS+Êø›¶ÌçBDÂ'|nVß"Ï—õSVóçøVœ¾mþÁ³íÊ +öÊí "˥ϊ=/g Óîç¹Ù9I;¹a<“ôM_^Ë’âåemë öÿ"ÒËÁeùÜòŒÚö¥¡2_-òûHKh×—ST*ìãu /sš"öñ8þ]c§Á1?Q‘ G-39ùKiúû\ŽïnƒZw/n¥íSyäÌ<®DžÆj5þÍç˜ @€‚PŽaƒK[åRÍ\ûc-Oæ_‘n{¶éºæ‘çz÷î­Ü² ¥wÑÏEÑí×MS^[¼jî‚•ß×9Ÿñ{T¬ÕàšðHßúáÈî ÚI·±dS}·Ýc &/¿Å1eÝOq§Oÿ§¤¯{*˜ÛL•¾ý&ßÝ,Ö•;xðàb¯´éëigÈ9²_¡–¯y¡4ÛÍ¿©ü6ïÇwÆòð`\üóW¥õ¥[Gž9º»ßÈ=;GŸmÙòžB'gŠjoi÷¢Ê©ˆð{ǽ2Æpú—E¼ì„Øèy—4n¾Þ-F;ø›ÞÏ9ÜqgϜ꜓~bºvuzt‡¬fÝ'¬O«ˆ¶he–ös«å»˜s~‹´ñè¶ZnûYÓàõOè÷þ9¨Ã¾dþßyç­oeÊɰÄǘ³¦>žx4˜ïš‹ÙGÔ @€ªš@µ —3€œ=eAJj+Ÿ¥)i!¢ø[äM&fÌÃFÝ׸çèäB/šÒrw¡gÙ#h¥e½ì±ó¦–xׇ–s@€ T”@Ho±®¨F¢\@ zx¾[,¿´ÍdÈI+npÌ{ã5EÉ·[Læ³Õ£wh% @€@¸ `€î[ýƒ@% ¦(y°ëòXéÅiÚûÜ µ ÷s_^Åyà#®Ê-ðœv¡Ä€ @€@% `€\IШ5A ûöaÛ•~ZXïþ#]“'?ÙóÀò;Ãøsò/­8Q·×°)뤟WÊ/ô«³¬åÈMUöyùš°ÍÐG@€ |šó òÀ±‡£è§SšÄçÌX¸l-ŸK€@(zÒoƒ ¶^R™ŒSRRRð>€’ @€ Pi5f€œDo„ÍûxF¿››³OGO°¥Uš0*‚@ °ÑÛÞ{tÎwƉ>.WÞ•Ìë•kÜblP·ö¢Wç›ÿ#6åWÈj º @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @¨xæ÷ž”d uMUn¨ÛYË«c£Ñf@€ @UZ@’ÄÌc¿n8Ÿîž.I,t㮊*·JcV^ãB·¡*¯Í¨ € @€@U¼r •CØØŠ*7„M¬¾Ea€\}·Z@€ @!À9„˜( € @€ª¯€±ú6-‡ @€ PŒ€$ Ó–lºÄdÏ1µ¬W7cР{3A|Ï3=ÛûÂ’m Ýö³¦&qQ9#Gö;SLÉŒ•6}±…UBduko%ªBˆ0@€ @ÕYྠËúy}½Á¿Qfç®Ô·Vݦï=nþéô¯‹ôaŒyÙím#n˜h[øMÁpÆ‚JOƒóîý“¼&æb›7¼i*Pºœ¾Ôœ}<‰¯n\+Vè=;K‹+q^†rƒio×~£$+5qóà3Ôž-…ÚÓuÜ‚1ÖÓ?.Š6gïZ÷Vj¿Û\àëj´±ÐT@€ @ d.¶íµÁq´Õ¼->¡ñ£9jÏ™ãôtÒ—ÐmØ´uÚà8Â,ìŠOh8EŒéŒ^<ýé~Ç×VŒ¯[žôú¼|¹ûCÏÏÕÇ#:š•jpì_˜n½¨rƒíŸÇµ÷ùÑãÑë[”hpÌWº´²Œó†án±Ê.A€ @ & H‡?™Èûm{¯çæ.ߪZÌ@²4Ü2£é–JH›÷¯ȯ÷¹)®õ°IóÿÐÒv2ù3“3£ÓsŸž^Aa=yxiÓ«eùfwÓàØ”ydxìŽèÆwN>æ‹,ÇBQå–¦½–ÖC`?/ýñ¯sžÙÔ”9úæôHÉm$î|Œ‚rÙˆg×þ¤ ·e\A·-Šþ@€ @ † Hœ Öâ¾ §XÐRp<=ɦ ‚™ôëÆ<þ†æY]tƒc9Ëöîsîã ¹.v¥@ÿ”6½’ÏËøíÕ“fÚ,òàØ¢ÁqÉå–¦½<Ûþ'‡ÜàH¶~ÞÐVZŸùÜùõìgù¼Y‚a Ÿ‡ó„+Èá¼uÑ7@€ Ô@C­–ËØéí:ýùèÑMyðžÎ‹Ú÷ì™[€‚^ZeîŸÏÃÎfÔ½nÌ´Evƒ'ת¤q3ç÷oD§ƒ(fÈa¥M¯DÿšX¿aO¬³§’¯TºÃ¢+Ç%”[†öz.ŸÂÎý:ûý}îyÔpùª9½ŒL0ôO¢ç¥½ì•néK^Mñu,,0@ËÍŠNA€ @ æ |¼pìâ»?ždqå\sì\îìçÖl™}_ÿÓ›]b}89y‘zƒ5µ7>çœÍÎýàvˆñx§ÛÝT(mz-³ç¥Óà˜±Ñršu½>$·Uóç…K(·Ôí¸gÂ"iMÒì »µ‡$1#]ùvw{æÃŽ&j»Õ”½/ÐË»ÔN†Í ä°Ù”è @€   |´ö•kGÍû¤Õ™}ÌóÚ³zx%1þÏcÎ ý|zãÚÔkÙ6fVÒæ±!wti|6öº\·#Ýh´n­ >¿4v¿ã½ ›X©Óë Q—S0¾D‹ƒD•+(`¹¥íµ`kO!÷®Áï‹¢ Ó&ü'c©]i-äP7&Šã´3 åjlϬ4©âÍDó @€ ”Q€nî3qQ_×±}òÏ>¦·Gw<å¯{èg˜ŒÌC?ôÂį–[:•QÚôÊÏ<å±öW¼zïÏïýh  Ä:ç-{}Õäbë*.Ò÷3O%”[Úöªuò{Eì_÷µÉ™6ë¹õmŸ–d7°<2zK$#©¸¦…C^Ò[}€ @€Šéäqsb›ðgkY‹8S½9KrÊ?çd`ãôLљ՘Ҧ÷hdÏ>{ïO7Ml̓Nfš'=ô _t™J(·ŒíÝa»ãþ².—'6qò3cN*ƒzϼš08æ›ä2ïÈ@€ TE{§®êl›öä-þm³Ž?Èþ½Q‹Ï ÍîîËçGÒ£¦OÿÈp¾ì›è lßñ3‡ŸÙ<%F +mz-=Ï+îžtùŽæÝºð°¿ÎYf?=ad?-¾¬ó’Ê-k{=ñÍgð6I’›^bæeÓ¬m¬nùp‹uuÛbh/ @€ÂFÀ&vì[¯¥G”®¡!ÿnâmHƒ”†t/o>÷2¶xÏúÑÏ—¦Ëwö}!ZÜyXltì2IŒÌÊÌ<3Iü¼]¸óЧ¶FÚÏöàå‹‚7=2*~cžÓÝÆãÌíÄÃÚÑo)OŸ»Fû-etzß­Ð.º=ùM]¥®0Öm|Êpó©ïVòåN-M·N~qñn¾ôTÊrƒn¯®7K1 k“2yÉöNjj3]tX/â%]a½yÑ9@€ T›xSßm © ×K‚ÔŽns¾–®Sž™ø#ÍÓ%Ët±õKºjyÒ+yO¸\®ß¾<]ÚÄ\Ún¨ûÈÞ-f`™Ù9IŒÑÕTˆÕäÙ÷ʀțõ· ²zfÏûG¿ü„çÜÁÙüE^ÙÙ™”^™,&¶ïòËê|§­óyiÓó+°úéã߼'if¢1ýÐô]ìŸ?üÔÜ>®ììX}𢖩7l9c«”øâË•0«Ð’9Jß^ƾè-du86-ÂãLlßÔ4ꢆáÊI”0ìº@€ \dÛmÆŽõt• s3ÿ]ÝSL¾$ï·’(|››gÿî‡÷&¤WT+_; YØçŸËƒÏfu3rzúÿ²_Åol>“sîWú-äl6~DÄA¸£Øw•6½_uüf±gÿ$Oie›7,÷].T^ A·—ÜzÒ˹Vðêw ŇEti¶EXt€ @€*V c¯—c¼‘±Ó ÆÓ­ÒèJñÚ<·sÓ÷›=S±5W¿Ò—n=yg›†žA4ÝóÛ·†ž=Ûç‘´\IºŽ[0ÆzúÇE Y©+W­ùÏR•«qœä Fñ€ @¨)Wôµ™ãŒõÇS§Ð³Äy\Âó{ßy$˜±_M!ªúýô=ãÌØˆYÍ>­ê7:t-Ä3È¡³DI€ @¨±7ö]ÐÌh0m¤ñ)‡Ý±wÓ#ûk,F5ëø[jûÛâÒÎŽžðTZ·áϬ5QûE!3ýþ ÒªYWÊÝ\\A.7! € @€@Íè0 å~zñÕ¿øåúѯÕlê×û®ýFIôàuiD]=^’V °¬ðß}Æ@€ @ L,$â|æõöþrØwËT2]Tk:ö$—=Q 3Fƒ÷ä=7ÆÞ4dâ‚?/j£.R市|‘àQ- @€ª»À?ú/î. âJº¥úNÜR]Ý·&ÚÏpû @€ Pj›ú.ng…·½ÌÓkïÆ1?”ºd€@ 7¯c‚ @€ P›h0˽LzüëÇí-Mβ¤í’Û¨ïà„—yJ%€r©¸€ @¸y`ƒGŒâi{]ï÷kßMH¤ÛøeÃ{õ*I+÷˜#*p¸"¯ EYÚŠ0håÞYµ‚0‡ @€Â_ ]Ϲu“¦ ®Æ{keñ;í•\‘ôcAãÊ(•-rA ¡ÜcKQ-’B€ @ÕQÀ5IØÆ˜–]þ(KûwJ’ñÍ¿ÅÐ¥å€c‘¥[GnÝú ®Nò(™§9pà€%` B ÆräEV@€ @  ´k—d¢_î’<·•¶ÛCS~otawÊûÉý“®áy7¯eìÉ #û¿œ¼b#_ï~@²°gÆœ|Ítù’t¯~£Ø½WZ®~øÙ…?ñx>͘‘Ú®{¿ÿûšÒ°÷i}ÄÈ–­\±æa9ÿ@ ÏÚ„ \@€ „™€å²ëzQ—~ûjÃ8ßÕc³õ¨+P7%Á”«?³sîQÁ•sMBlô¼øzÍÇz(ò×cÂik’|µØ=ã‰M’3¾^B­)<ÞMñûOœë–_†}·ÿÿ¾Ž7vEDÇ/“cú¹lkÒS†6ÊOƒ%”OWËç‡Ü€ @¨9¢äØ2}‡—ïiºdðCS~ׇñåœóû;{uµš7ï²væSÿ'L Þ,­éµ6)óÃb=J–æöä5‰6ŸßõzÊsÔl‹uÙåÅ›[šnúââÝòŠ$½·’ë¿i]¾ íŸë(­® —V é!@€ Pn°èRÜ^&Ýù}÷síι9Ù}²³ÒÔÿç¡«Åútëf=õ óg_HÙÚhäž•7é_»Å&ÙÎZžœüdOIÚàBž‹=ùÂâ/|e ‚ÛI+9vgS_ PN;^9KDv@€ @ ìÌL¸—^¬µýàöGúÎîÕ¬û„õiú0¾ÜuÜ¢1ÖÓ_/ÒÂÇÓ3ÆiÏOy›žAî¡… ÚÍ£,¼Ú½nÌ‘_œßÒ»ÿ:6pÈC»Ö­i|— ØøÝÖ4É×öx߸Úm0§ñL•® ‡Jå@€ @ Œ$ Ó`þn,¿)ðÏ<ù%b¿=;î7Á~®Çõ-kߺyÃrqËÆå‚:ò•“né-d}°!¥VÇÁËb- ‰SrœB§ úå€9úu fôX…ö©P(¢ @€ „±@âí6+X'ºù£2u“~ÒÉäq&Ö‹N_f{ñåÝt«µÄ(Œ_6[s ¼äëI(oJ™6'+áŠ)¯7¸Ñw™…L(,€[¬ › € @Ð 4lX÷vÒîûá½ éZ°Ûža2k+æ®Ä ‚W¢Ÿm:›Ý/%9yæQë=®oú'5P¾ÿþa”_²Õ¥ß(©u½è±×_ßeÓî#¹MOìÿh¶ÙdÜ hA Â ì·V † @€ª­€è5Ü-Iì}²š78Ç×ÛÖ2ôáÚ²ÝÀ÷SP% |­ %=µ‹úLÉåQÝŸÈÿ‘×µºyŠ*Ÿ’Rzµ?T’\–º®DñF¨í-T¾œZîƒÜ^¹?/;¿L}Ä·‹ëQÇ)ßÐ÷î£{Öý²¸t%ÆÑ¾¼uÛ‰ˆ6m~ð´lyü²¯°ÜÓ²¥¼l£Û®n;a½„}ËzöìYàw”K, ÿ/É" TŽÀËɯko±”çtðÍ%ùL^§ƒUÂÕx~¤Èƒè°&ùXQ9êTây8¦ <ž¯ÐPÓË‘üÓ—^9ÞÔêSë.¦|9Õ,ׯ¬ä×§ÖM39žW©vž¶`ù¼_rûԾɭ¤ ¦Eù¨\‰§òy ü¸›§—£9†Z>ýûÆCähŠ“cÕòµ¾ñc~9ž·ƒÇÉm¢JL1r<^ÎKÿòv(é)¥¾>mY-_ë›Ï›Ú¡m3¹<¥@¹¹<ʧŒ?ÔòyK¨LªN韺ÌC”ôJ[x§äòø/ƒ—H_åSþºR>ï—ËhTNi”UJ©æáëtöBÎÏÈë|A_>/[N¯Ô'мD¥ ¥<¥nž'?¿R¤¼®–ÏËPÒó–ò"Dy]®WIÿ“ã)@äÔôZùZû(ŽG{Õò¨|Z•hU)*âËt‚ןNJHtR£`ùj~JF“¾?T…yÕx.ÇóóByûè„Kòé@[M¯ë¥¡3 ¾úD*ÀkPóS¸W4PùJ<“(NT×y_ö J}|™þ3P8¯Ÿžc” §å÷ˆ´N…{ Zy”Þc 4ʺ‡âø²Ç£¬ky ”F.OŽóH£QY§tFZv»=òºÑä¡e“dt»•xZ7ÒºÛ¤¬›ÜfÉEËn—ºn¦uZ6¹Ìrz—Ù-™].Éi¶Èëf'-[­’Åá”ׇdµFJv»C^ÏÈ“"ó¢¥ì¨\y=:'Vʈ͑â23åu:Y(ÑÉB‰N ÊëtQ¢“ŠT”×éD#ŸË˼|âß±ONxÈw¬Øqàé‹ÔG|ëJ*ü[‚ݦWw“sêTº3Áÿ ÖÕ¹ÿh; àä@*ƒª€þà­Ú4 … PE6mÚTEZ‚fTeè„è›èßWå­„¶…JÏ ‡Jå@€ @ èÙ“[è¢ÝaØ5t …0@.D‚@€ @@ GA:z½ž/´uÌ!Î ‡óÖEß @€ P^Aj'\_•·ä‡@uÀ3ÈÕa+¡€ @¸ÿ’H¯o³ï]ÿø©‹P=ª„@¥ à r¥“£B@€ TÉ{=ýbÀwÕ¤µh&Ê-€r¹ Q @€ÂS€~ú­ý`Û·áÙ;ô …0@.l‚@€ @€ ŒÈ dì 5FÏ ×˜MŽB€ @ t‚Äèk7n±.RWc\A®ÆM‡ @€@E Ü0`Ñ¥T¶ç‹õãWT(UM䪶EÐ@€ T£WhG¿Œ«ÇU`[  •'€[¬+Ï5A€ @ Úˆ¢p=ýÄž?®6[ …® ‡Be@€ @ Ìèê1½ KÀ9̶+ºS¼ÈÅû € @5S@Úy\N kæÖ¯±½Æ¹Ænzt€ @Úõ}µ‰ 0ïÞÿʃ@UÀ¹ªo!´€ @•% ×vì¿äYI7zDá«ëú¾V·Ã€E7WVõ¨[䋽P? P*ÛË .IJZjÒgâë<\†e@(¿@Ç .i×.©Àw._çáå/%TE_5Ù šw4H’-Ò`Þët:WŶ¢M¨ +BeB'çÎm~¹øñ¬yK®á•ð9_g^q•¢d@5SÀîÉɱ¶¼îãû+ß¹|Î×sìøÎ ×=Âëõî•û&FÆ„+è9ä^ßþg‰pí/ú ýE°Ti›mBºäeŸ ¢A~qŸóu^¥ŽÆA¨†ßnšš!1ö™Qdòw.ŸóõÞÃwn5ÜœA5yϦ±%&£íì’sé†ð9æIDAT˜»Ï—FÿTF$‚@˜`€&Ý€@Mpde¿"IL¾bÌç|½&õ}… P™OF²ÀXŽ\'}çæÚíøÎ­Ì pêâ·Y{™wÔžõãþ{ªG•¸¨ _T~T”E@¾Š,1ùȯàêqY‘€@pòUdIRÅÒ+¸zœ[uNåtIãþ—:fUuîÚ² Ð Áয~ü[Úðtp‰‘ª€ÀeMj³ÓçsXV¶£@xU_iÓ¼.K;vÙîªÞÔ*ß>nù×ñ ,ÏËPm¬ºµ#Øï¿|Ëâë·b¢HIÕ ©a½æõJìÔÙìÔëÐt5.ÆÊjÇE°ÃG/„¦Àr–RÕÚSÎî\ôìM.‰gwþã²mÞÚò½eaç3òÊÔæ¨kT?Žý‘v¶Lùy¦òþ­S+’Y-FvôdfÙÚ@J×¶nÈ~ø­èGKOýõ=«ßô:¹|§ËÍÞùøöÏ.W0‹YùÎåÞÙ9Žjïxú\6ù¯JÜoÊ]µsu¸Dú"õ‘Ù÷ª½eк‹)ôQ%ÿ"ÿþ×ã³­Õ¶îÄFñ,ƒÇg]ÊÝIÕ¥#-š&°¿Of0ýQÄT>~’äè)X–O±`î†uãXT»›ØïÑ@ÇS³öѺµ¢˜ËíeGN¤DÁZ‰|`Ó¸aûíð™ÓVF‚ªÖžÊèsEÖq < fjX‡eç9Ù‰³YÁ$/”&.ÖÊš_Z‹ýtàd¡¸`Êû76žÚÁþüû|°UH' »²U}¶÷§¿ „û¯èºvlAïlb¾ãî}‚>JÕÝ16Úêßmy=Ïá•Îð'°Ë8ÅG ,Ï)‘WÙ àç!¢¬»]ö6Ô‰£üYóx·aàcËÙ‘Óž"+àõÓ®BŸ—"“.X )oB¬ÀÊëÅK*8åÙ?AT¶–, ká-ÀÿÖž¥‹•×\~I¡ý(Pσ çä:Ùê÷¾TÂJ¸¶MCöÉ—Ù×?-!eÕŠ¾ë—±-ÿ÷+;TÆ?¾U«7·5wÜØœmÝù;ø×¹‹Û0ªÝJGytwÚÍ5ï{©&÷½¼»ðm74c&“¡Êü=«jí)¯ïÅÎߘ®ê3ñ»ºøIÿí»þ&y¡4W´¬ÏZ\Z»\ûQyÿÆöéz»´a|™Û`0ˆløý×›¿×Æbã¹w¸8ÚÈà¥AåŠË8º¥üýn5²Ӽ췿‹ªTÖ¼È:\.²µ;Ë~ø±Þf¶ñsË(æMq}¼å ‘™ Ûù£Gײà#ÌŒëi.·c ùwù‹Kv2zËv h„AÀ'póuMY+:1ìô9Ø‘€ @Õ]€ždaGÏ–}ðE7)°s™R™ËˆT®>—§ üÊñ‰ó;_ÌÍÅ•ŸI¯ÃäW²‹KSÜvŽ¢‹ó¡p,ªŽ?œ§ò˾Š*áá%pY“Æ vÂKº‚•B:@€ @k Ãzó¢s€ @€ ¬ÈÁJ! @€ „µÈa½yÑ9@€ @Vä`¥€ @€ÂZä°Þ¼è @€ +€r°RH@€ @a-€rXo^t€ @€‚À9X)¤ƒ @€ °À9¬7/:@€ @Á `€¬ÒA€ @€@X `€Ö›ƒ @eøí¨§l‘«ÚÔitMµi+ ÊÀ¹²¤Q @¨F—ªQkÑÔ²Ôm|mY²!ÂZä°Þ¼è @€ +€r°RH@€ @a-€rXo^t€ @€‚À9X)¤ƒ @€ °À9¬7/:@€ @Á `€¬ÒA€ @€@X `€Ö›ƒ @€ `0@V é @€ @ ¬0@ëÍ‹ÎA€ @€@° +…t€ @€ Ö ‡õæEç @€ @ Xc° -f#ëÔ>1ØäH§¨Á®jUŸEX‚æÖå¾x‹QfÖîŠF¬qý؋׈0©9*ÒÌ®o{ »¤nL˜ôèâwãÒ†qÌéòÖÈ讀Ü÷òîym/«ÇêÔŠ¬2ûMUkOy}/vþúu¢ƒjBl´…µnV‡åä:ƒJïŸèÒ†ñ,†Ê(ÏqQyÿÆ^Ö$¡\û²(ŠLX¹úÀ½ÃÁ±ºŸùïX‡B+@_ÁMMÁ¥D*@€ pqšJªùïéRãq%%C| 8ø×9Ö2±N¡ý&+×+MXæ(³Âè{Mì¿yØ÷zËTFÛ&"ëz½½òž«Lùy¦Y#,löÛv>«lEÜsƒ™{o»LDY{a¨…•×±]Kj„ßäñz¥íŸýÎ0@ñƒÁj!Ft±ÏhÙÕ­Ú %¦€êuI3P@€  XLŒ ë\öCå&õa6°«›•í‰Æø(]’ ”« |€ú¯[ŒÌQÆ1vã:"£q‹‹*›ƒÑÀX(mZ>Ò¹÷ö6¢BÇNe C @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @¨1¶9)õ¦¼ürLevØfKŽçõVf¨+t|Ûñmº –t»m§qàÜÌ:C±V•ð®J[£bÚRŸs!”MµÍ½6󸌶'FŸe¹( € š-`³ÙDkl£U‚Ä~;ç>ÿÚì'ŸÌª(~Pm‰‰~\Y³©F §z¤Šª åV¨€0+yù›’—vde¿b³MHEm}m¿˜ãâ‡K‚pÇñ? ß¾ ¥#åÖÄ2†¾”™ ™¼±kžˆ?Êþã3JÍ*_VÈ?ç! ó±E& Lh4uÂC#ª<#@€@µà½q1_1IJ˜”lÏ´¿f³=šªŽøªö8•yž™wS(ËU;QNð6Ûk±Ö؈½”£¡$±WÊ3Pn;l5ÄÖˆNœœQÖ6êªAˆ£ÁO¦Ëé¼ñßOŽù½¬e"_ÕxþåÅ­MfóW´¯ÄJ’”QÚòsNFY ±H"›LƒÐþag^é¶7&F~UuzY}["_‘oö K®£Ïôr:ñðRiO<à3\}·¨Z^ÞϹ¾e «;áDAÓ¾l\nÏ ÿžòÈ}áX† @¡˜9Ùý¢ ¾CåjÇ1é4èIfNÏ«¥(ûT«í”<^ï}OOJÚêv£¼‹'ðÒ¼e½ ¢øµ@Þg‚(¶‹µÆEŽ˜Dƒ߳ƴ¯ }cBÄš‹×›ð«yèüÜFf}GÎõè¢[žWK¹ž—W>s¦¸Þâ3\œNÍ‹+Ëç<’ö‡%P\¡0¾Zc£&ÐwËcü «š@þCòýÿv¼ÅWøÊ;~¼¡¼|É%'äù¹sµ}q çååŒØXy—%ϳ£2}i¢s"åå܈ly™ዳZ-ò²Ýž+Ï-eÝa¡–:™ívyÙi6És³Óá‹sš”0“Ó(‡¹LNyn2)ë¼:s,‡Õ0“KgTãÔ0·Ñ%§5¸ òœçw•ôF5έ‹3•tn·[Égå¹G]çù %Á£Æ<¾²EQ ó(a-¿šVÎ/*q5LÔå÷ªùE­l5­Á«”ËóªCÍÃã<^¥l-M~yùmô…©iµuž_ðr_¼¢WžkåˆjZžFKOM+VÎïUÂ$µM‚W)‡‚uFj˜š–¤|q^I­WMïUóS_jšR¯ ÉsªÊGÉåeAPÊ‘è!$¹]jZ¾LU(aj>IM«o¿WTËÖÒêÚ_¨T ÏË'*R^¦lJjû%]©:9N«—šæË/©í¤ìJ~5NKËëÈϯԫÏï«Wu¤ìJ9úþûLÔz%ÉH#¹l-=¥¡: ¼69\òè2ÿ6jÆJ~¥M^¯’ž—Âà ¦Qêõ•¨l¿6Jª£R‡šß—&ߟ‚KµÿôÇ\]Wç¼-Ô¹µ^­ú²5S-Žž«TÊáùÕ>ùÒP%3<9/ÑÀ¤Oi?+°Ðwÿÿiú\ð¿Büïý ”¹Dë­ó¹.ÿÝ“ÓÑç˜ÿeöPfûÑœD𹜮pz%¿.—GOR3Q-W $…zé–o]¹¼| —Ë•¢$JãiŽ;<"¥§£ÕMŸ"I§ºÔü<ŸAm/W)Gi'•ÂÛ!÷¯`¸`0z$7õÛ@á/xóçT—²NGFtÕʧ?ž&‹Çë ôfB´îözÜ42;‘ËÛô¤~/±jökLrÏ}cbÜyjg ŸáX)N ˆÏyQYä°¨H}x ;!ÿÀÒ'>þêD_ò2ÿôñ úCï‹ó…QG&%šV Sâèoo4<ŽWÄgô¥¥Ä©åК¼.'Qã¨Z5¿’VÎç +X/ÿ®àñ|ò•­õIm£çkSÑmôÕKG-JyùeÓ’¦¶ß¯?<=)mÑêÕÙðX¥Lÿüºþ«é5wÍ\Χæ÷w§,JrÔ²TGÞ Ì'Ú‰ä8úRËO£¦×Â4ž¿P½ZT+9F_‘J™úþ«í¤Ú Ô[ Í]ë«ÚF^v¡ö«íö™Ë‰üë-¦ÿj «m¤\rµ¶Ê}SÓÓžÚ~¥l-­¾>w­¦•é3Ǫ̈m[úüä‡iõ*s-µÿÔx_x<Ÿ|uðžÉëºþûÛ¨í§™œVŸ^–áù½ùqtbHN§ÕKQʺnÉSÒj޼l:@RÒóC š|uÐ_瓤ÖG]TÒ¨mäqù&j;Ô4^]éÜœO«K+ç§bJœ¯ÝÚº×W?%QÒPr5-_ÖúO'Æä8­ý¢š–§ñÒžÃ碚Æ+äu9N ÓÒK~iå4jz_~C~~_˜ZŽAMKW}uÐ Ny™N¾És:"Í£ÆÉu”0_yžü: jœGM«­ó|zHÏýë0èò{´ü~ÌKiF_ýZ~£æv+iŒ&eÎÓ»Ý&9½Ñí–ç]œQs›”8“Û,§q©ër~—gVãÔugr©af%Ùå’ó;ÍyÎÓ˜j˜Õ*‡YN_œÃáPÂâ¢î¥¯Œ·xz>Ñ.B·WK¯Ú3s’Kó"&\}Rüj¿e¹²ôàüŒÚL0N¦o“Gé¸6JÞ×hG£?ãí’´Kr ôi§oJƒÃ@ßV4•¹(ÐU¶ LžÓ0Ù ù0–æ<½Èç46¥tô—ŒÒñô”Ÿ§çåxiÝ@á4ä¤o]¹%üwP)—§“(•BCOj•˜‘—Ï륶ò11ÿ†(ÄH£t‘Çt-„¾†ù(˜òÓ¥ú⑯ ð/ µ~JAí‘óS:*‹ÿµUËóo.I…ÒÉý¡¯î ÷‹—/¯óvS8,—;È퉧¼ôÿü‰Ò§‘ÝíoLŽø+?TYÂgØ_¤f¯—åsH¬À(>lÖ¬¥q‚E¤Wüåñjœäñxþùôä‡ùm+˜ @€@… ™—‘{#Þb*Ý‹S»mD\äWt—AºY®·XIÎih„§éØw]Hþ>#ýðm›lW8/NÏ«ÖççÞHWÔ?£ Ý:"¤‡¯æ:<™KÖ<Ñ€n³~Âg8x«pJÊÏ9w)ÓYU^©m}”nÅà?ñt6/#‹þ„æ7æ´:0‡ @\`Vòë+éàù˜CæÛ&>Tè™ÄP)ù²Eür¨L/R9!ÿ}TÞór/¥×¿ÃåÑ«vyCþ9/×Yã“Gí1ãéƒ6öÌcÃl6›üÒ-s@€ PÛœ”zÌ`rWäÀØ¿}ü ›EYͶ'FŸöÃzÕ÷™»³¢.ÞŒLNoîÎ3V?{®êkTÍvÀrÉeÞ¤×pìÌÈH{3ÔWäñ®šÛ=”­ªèÏy¹Û:åå—cäF–»$@€ ª-@·n‹ŸKR­ÊhåfIŠáõUF]%Õñ$Y¨-‘%¥+)~àÜÌ:·ÛvKJw±ã_Þ|&({mŸÅk×VØþl;ü½Fn–bFOÓyÒ´¹·|³ui¹·›ùX‡ @Š>  ¦b»QÖƒöb -m$½ÒyñÚÏ+l`RTsú-’?ôÊ×òÛÕ‹JŠð¤Ä‡^É“þhÜŸnþOÉÏ3Óï¹ÿë¡)s7¦LhTYm«õðƒö/Ö>¹aþ°A··,;yuò‡›ß/L^É<Þ^óúí>[ÊKïW0þP5ìio¯[Ó\+CNXÁÿ`€\ÁÀ(ÕN€Î6JýF•®Ù”Gè7*~ÿ‰SÝ(cà/ñÒ•ˆÔ€*MÀã5Ä?6mõ-‡ýoèuÇ ¢”Þª‘õÑ—ç.ت5âÞµô¦âÍIçÇu04ë>aIš¶Þï¦6­7§‰7;Ní—,&ƒ7­§Ø[´ÌtºøÿÛ;¸(«ôŸ™a†îFÞÒ]ÉÖ4]³íï?d·‹ ²ÛE/))å-3Э5sé‚Paâ/y#„®\Rÿºÿ,u×ÌÖ¿’•Š!( È0rf˜™ÿó¼ðÀ\¶TjçóaÞ÷=ï9ÏyÎwÞ9<Ϲ½3ÿ6ÏxùÇ¿RëJF¡^:[ÚÖMë'’Á' Äɲ¦Þoã·¥³†ÂhNwwßÑ_•*ŸèëVrlåÊ M§u“aºôù݃{T&Ä%l°ëÉù8D¾†s÷*Í5’ÃÆ†ö?ŸÙ±qoز¶ŒU–Då¸,‹þ@a¼Ì×.T¿©=BfÄ53 ù¾Z1Úù¾\ߎ²¬NŸÔ)ûè3K—ÔUœŸË‹j]Õêܪ’ÃÍF­ZÓïi–ÎÙý»ÞÕËbœ–?vö35åùQ‹ÐŒùÎEŽk+T{Ý’d,U £póòÉ©4ƒÏÛ¢ßüèÇ’½u9o°U¶Ù˜ËrÜݵûêTÂPkÁŸ0î¿P^Ó¯g]Õɵ„œVvz½}´iF«ð¬%9_üó·öÍÇ6f×Ëñ ç¹V¨…Å(†.M¹THë‹Õô<ÕqÎÊzÈ"—ÏVÕ7 %Z™j<Éi½H§‹ð®%YËdMùÈ6ÿbª1ž9“ãV…9Çk£©Êósˆ '¼æ1JI ºõðI,.OÿXd\òâÆSw½y[ˆÊCüXuÚüjz9…~’3¶^ì6“¬ !Þѫө?¿/º˜Î ºkÝrªjMWjÌá¡ «Ãmys´Ws’ï¥Ý£}¢ üC×ÒoP­óý»Þp1º´¬.~ü„ð ©iR§Ë š¶,¡6o¯ôÛ÷Ô¹§ÕXl½Ì¦êÀ=ÇOð³Ó|ý8µ-Ó’8Ö¾»¿u´³ùçÖLÔ.HòjÍÖ¡µær©-°z[þW.¯â»Oè¾Uxyx¦T™Äí5¦ªÀm{+ ·'Îødtò)NÇuk+mYÞÞýr'•¡²Š:¸ÿDtŸA'¶‡Èù5æˆRÐRˆ.^Ý/_©xºXo^2irÄ][·¼ÂåXk+î©4» ýJrŽ-ÂUë“Sk4Kmß¿^òõ"r@ “˜œ¸ÿ¶ò#éÊ:õ [è5£>¹u&A¼žKˆ6;©gô%ê-…z¯™ÔCÙ{(÷¬ÚúrܪS^ùdHù©½+mÔȳ<¥Âª6À#$6.Én¤q<€Ü(V›»Ï™¼ýûÕ*eM­£6°:ðûsƬ™Q—®JÙËz) 5nŽ3käëœ š~]kSy›É!ݶ·Üî°,{ræE-a£Ð]ç•Vi¬ 2ÔÔ…?6Rˆô~޲ß=xä [Á:’uÅd ¼TQñ»š*1þ\¥%Š 5²CÍyY±.œÛÕ!=µÍ{4Éù«X%9[ž:·4ÒËÓDîÑÓùvg«=Æê82îm £ õ:©öYT^E¦šòðu{K lœÓ­E² £Ý±¾|Ρ½,¥ÄT· r ØT«Tf‹Å¯†X9¯‡mS÷âÒC¦ÚÀ#ùb-Oƒ#jG®®î¥Ú;7Y©2 Lo—s,éDwÔN}.H·Iº&“Œæ.TϤëåÕ>¢¬»OmÚÛOyOV(’û,9Íu›gÒöþø§yR/zr øvTЈÒÿG~¥Ó%1Ä}ís’TéÃU]]°ræMw’œ+I£ÉÊ“âÝ— û ÑÛîÐ4æà%S¢‚?oÇ8>wUW¬îvYh”WwZ$˲‚«m½\RŒñ*åeýš94&®PH#¥sh´¼†:dyÜà¶ÙÕÅKŸ³<²§äèн˜‰ël_\2t æÓ(ovØÛ±3ÅÝ£m¶Ô¸ñù½=³ýz”$þ¶«_‹zËò;ËÑ×Ö²qÃzûh蘨¸X}Á’ÉËJ? åz7QW»}v.¯w÷w½èõÕÒ¨-]ÆM^M} ÁKc&…,Iؚ͛ݩ.¦‘éýÇÛtU(b¥ç„G»¯‰ëž•ñ¾]®4Õ›:‰È96»Rl ¿eTLÜÆf6Õžwfl¢Ž—-c¢,ýi_™•)ïŲ,ú“Ëkýܶü9ñèmu‡’O¾w¨‚Û5É)ooÚÝëÍNž©-=œœ™¾I%ËTdÔ—§9¸8ÊS[›“ºe‹Ì*&hbÌä×?#Â*ï" TTé?IOê¢ß=>›l¬ûõ Žmýõ*å€tcf,Ÿ¯?´å¤Â\;ÔFSb¨ò!çXrdM³Ô3ÌÆ]mÙ¹xžãN=š´ÝƒÞP£`ãî®FÐü´ rŽ¥iCf‹Òï mbS]©*¹\>’ÕËÇ?Íe#Ž >Ö3ÇjSúɫ޿ó­gý: ¨ ðOÀ"‚ïé5ðôµý>Ú–ô1!Å‹­ÓB½n‰-sgkxÔ*SAü„^éÛ’ÿðIêŠa¦nwÌ"×M¤¿9™rB<»l=¸ÇYi›ßŽÈN_ÝŬ½9ÇlÑù±Qè,;Ð_=Šeí c3‰¦`ªú=L”Z¼þÒäûÓZJÿï-!*…ã(|ÿ‘œbÉÙzÀ_?*usRÄ[Þ žž¢š1¢§á¶Y[y^2VY¯Íë_‘ôÒxï«0j$c•Ó3æíçAîÇîñ˜¾mÍ>Ø¡ºgÖ@¾·ùð¥)|”CKFû"‡‘Y9]kǶXrÞbçRÓq@wÕÔÓÖôË"NÙôWçÓ©£ì6u§™Ošû’zótÐ KÃws^÷Ï£sGñéw8ÊjëÜU_ð\b“=9Áf­öO3æEÎlÒŸ²òë#(n´Ê”>ð'z.$Ç„Ò[ÉS’aùþÊ©G(Þwåñ)ßóù7v·^9õÛ&²Lc…ä…ɲÜ2Är–½fÎç4½µÞ9æëw„¸Äž²Ü)¡È¯r|üÔ“ùÈŽ½tì)¤W>çþŒ¸‡ë;=éÑËÞÒHø±ðÉúÑd)A'ÿ˜xwñ+Ž£´;RÆUš‚ £Îþ›q¬»[L~7{\IqpŽ¥$;7¿1ŽÇVs‹L+8Ârt¥ÄðãCÙ9æøXzffÌx¹Élƒ…ì÷HÄì|vŽÿû*Î1çm’c鼯Œåh2•g¥g}ë²#Ëé?ŽvÊÜýî©ÔM'v¾õ¤Çu$-§§¡úƒÃ§\¿W¾¥é3âÓ·É3RŸÅ*>ލ”Ú*¾¦gïº:Ç\fóp,€À¯ž÷”»”ŠW*jõ™qZmmY®¤ts¬|[ÆÝ΄ˆì,2¼ø¿~?ßÊl¬ðß;«Ó–_éߣL«³¤e¦÷R³ÁÇFšrä_û±ü]§*¸×@n8µêJATt¬}++”ÁÏÜÉŠÅì)žÔš‚Ñe·‡þÅî°(†Ï•œ ù•0;׿ã¼~ÐæÕ{gK2»èªÓbhæã½ÿy-ô3n_¿>[»NŽçö[G–þÝÄ,9®¥ã7%žvg‹ mkhƒží1Vٸך«†zh®ì{*úÍSìôðßs/ Í·{y…vÐhwÈÙxÚKÖÉtòÖVå¼ùΚM9ÉÕxÙG4Û«{Î ·"“Ï€¥•&@~G¬1Ü˦¼ÞügÑ_Ûž¡QU~Ÿñ4afG_Û}ÄÙJº(,vžžTc}a£5ûï´öW¾/‹kÿÛ1›ÐÕ|¯£Ç6eU‰‘®ês Å„rgÙM£Ä÷§Óq®-|Ô6&Ó¤üƬäd+´£äˆeOõ”ìùº³MÆÞMس¾VZú`mQuÏüb_¾ñÄñZ³äôÕQ‡”o5éGªiæ‚"4ËÞ–È÷œ_åܯ!Ç{Ì ËÝ/¶0r윞¯›|_tm5•Sy•-=ëªÿšžY/CZÒ¡´õùªÈ©m EQW%=#s2Ë.óo-”Fµù¨)Í¥g¤©v’^íàP_Öµùtxj¯M  Ð9 ¸'õ '+ë¯ðß"M›bMߟ*.íjl®Ø¸s¶ä$ãÎxÁqMH‹½{ÿ`#“–&elndðÉó~…c^íßIc:œ€Àõ#ÐÔ@ãrw,ºëÛ±a|&­¡ã“C…“Ѭª­iæ°DeÛÜôÙK+Jæ rÂDéÑeýi˜å¯›ÚK)9¶¦ÈØ—JeËçÉ´ûáøÅÜŠ&ÌMÛ¸²¹¨]Ï÷8;šÚÙRƒ*:4lzô““U9C{DÏnèp2V«‚ŒUÊÀKëYÈÆ}¥É[r9Øx´Xîò5%£Žl´?»ñkÇ{í=o‹%ë¤ ac‡ºDomEhGtß•ò—ØÑa3ær‡;(¯¾•NBþ‘l;SqÀèGãò¢oŸº>âà Zƒ¬•7º"‡i; –Å|fé¸Xk õÇÛSEð²í‰‹nÐ(kýÔìºfS³é°{¨R–èGíXn®µ°Û´ò¶™³$ç?s¹¦ÕÙrºN}l˜m×’Žò+Ð4ZMˈJ[$,Êú‘RzÝý¤ÛdíXÎùJEOÇ뎜«Z)Oï!ê$ϾA`GÒò¼¥¢Nï¬ ÏàéãÃïXiµ¹*ht¦1Ms‡ºñÞ9ƒƒ|c¸£T¸álRoeeAß Ÿ6ëfÄ1´×¸»õ&™dÍC8m~c<{$ÎB›?#«yJÄ€€@ç!ÐÜmî¸ncbw(Þu˜szh59ƒúßµtÞ‰õ®fýPgiZQ?bã¯Xò²ØõÜ.×géÞrueQ”‡¦z¯›uN+]“³µ‡œ­ðyIã çO¬­6Öï>T5mBbÊúÔ˜ö«Á«½%£~@wÏ©ê®Cþe³V’Q«±—7ê7— W¦4/ý§íÍ¥5‘Ž®nÒî?Mo:\Éé:ª»Ü9à êg9ÿ¹-Äí¨Ñ¯_¯Ê¥ Ÿè+ ôv0;ltÅñ ÷óŠM‘ñí4ýý*qäl±7­Ù žª‘s]¿#?ÿµæ.ÒRçAg-,VÏÄß(¾qŽorM‘Ù’Œûå8ÞpŒêf_«*Çÿ’ŽÊê’ûÚznö|gMuÚÔ¬^5%áäOÈñ Ë.\ˆuK“4ädâÞAwÝý÷ïí9z®.+aad‹ë퉯rÂm€ÙâÊß-Í`nú~lϯvImS…µZêìëHÚ«'Eó»Å‹ŸiýiMÀu¼÷s´ý×Q] ð³àÞæ3`ZÝ`ÜUõEK<\•~O s­ÚÖV5 ìTßqsuaÓX! Ÿu¦Š6¿±ÐÚæn¾n zûÅi4 ›€9§Ç5€t#×Ôô¬ï,l6«²CêÕ}÷>9ǵbÄ.^ïmIy9öÙ¯­7 z»#BvF*.VÓþ?^VÿuæÂX£Ñ·¹ÎnKFÚ[s3>¥å3½BSn2ÓZÉbƒ+9[õëCec5žôyƒŒÖÆ¿š°qQ¼±xê7ñ±sš¤ žðR“ÎU6ÚkHÇ£ç”l´´¥ÛO¹¿ã[êœßF¯yrŽkîÐî¼¢N ³{aK>Çw$-?V›K³ž¾úú¹ŠÈÈI­>#v=nð äü x¸QøŸTCïa«íÀO1îîy/DKëdzû˜—òF0ëW'-_7þKÔµ2-éF1A¹ ÿ©š7ƒÞûæ` £õ[ÿm*䌲e«VÕt ¿"Ëq©)ÜpÞnÇÇzë¨É¼ÛvaÞN2^«ÅÔÅ›š8²²lé(9[±v£zõE¹Qל-6©is*ÉoÝXe㞇‹Ù¸?›úbGù± N¶cÜÏa´;Êké<çYÝyÖ)¯Ô%ù"M7o’¦üÔTùº½ºófeî†sÑôcÞ;£†:~(õHæ©ì²¬¶Ž¶v‡æ<Žñº¡‚.‘ƒ¼~Èñ ´y!(wM2VñSw<ß5ŽGñ&) µ»æ—·Û6œ°˜G¯Wò.3Ò¼v#OQ¶™øN-„«Å;&uԅ㯖Ç9~Û}â}vðÎûD¿°®ò Y¯nIFƒã/fëÙUô ¨¢2ߨYÉegVÐûÑTñHšV>=é˜4aÕÙÞݷǹD^Ó¼½‹¢ Š^gU¬ï¾û„~„£¾õü½Cê“ÓfÌ›¿è•w‡<:-6A‘—E¿E‹Øø¸û4»ÎV³·ýœN”þãFñ´ãÐÔrÃÌÙÑ3ÿòʶ!ã&ÏÏv)9ò.ÿŽW¬Ú’Àéw&„e©ƒ©¼FþXøÔ3/¾øÚƒ‘óÞBƒ G¬Êã4uÆ éÖBÓÒ‹ ÚLO«s³WÏëÅiZõý|N7xÿîÐ*6h¢ÇOˆ<ûΈ‰4ãdlÃÎØƒ{ÙÂä‘쎤µZjié…—˜1cÖü¨…þ<1ê z¾•\?.ïR¥6Šë#Õo¹T¿Ð°ñÒ3"©èÄÏIíëvéø|_·BQ€À' è6Œz†¥ÞÃ&=Ã÷fÕ÷ óš"ÚŠ²ÝÆ7Òg+ÄÍŽ5SVK½æýîÒ:9>„Öâ±Õæ¢j}ŠœœG¸–ê; =üž #qá„€é/¦<Ä›ÇPç^¯êDyÓÙ8•uq¾n1ž×“Qh¶xùEN{6áÕÄOoG¯wQ“SÆ£EKæ?Ìù®&K–ÉÇÏ–‡*Ï»ìëk]àxÏùüá—2 +2O6'aaìša3–Í÷¨ü‘œ-‹älµ×XÕø?*÷¼±N9s&<>mqÂÚ`g{âäÛ¸\Y÷ŽírYoçë«Æón¶½î¥•á*ñtjWÜ9ó§Ì$#>¾Ý“c·+¸Ð(¯J¥¤YMƒÚW|(Çl¢©Ç–ÚD‹ÕÓç ­;.9jÌêC×J_mŠÅÚÝçÍN.ñ ÎYÉÉ,.6FÜe<|åœ1^­¼¢_7·ŸËáWEñÑÛµªÑ™ ë*ZÑÎÿ¯ã9­œul·,âÝe®V[§ÖªT²^nôÞd–Ýý&÷ris„oÅךÝür÷\È5¦+ÍF?ÿ>¤gÕFuâ‚XôäÀ{ž§ÄDîðøp÷ù=r\g³ÅÅï»üü=—оOW𠃯—4ÊËŸrB¶ZzxŸíÄÎÖí½X( qþ 6ˆë\\.:ß’F¡©<£Y5ôÈw¹‡ EÇÓUô{ø½¿Û¨WßÚÌSýí¡½iucæ%s;UTfŠ/Îû’fšì¿K–!×ï”T¿SRýnõõ¶·gÎüì \ç“{®³(@àF ã€Þ¡gu¥²Ý5¶}ýŒˆ>]Rv_MYä0÷¥©y'êÃf”óΞ7{©ü6$åËc_$ c¹dÔ ëc÷J†lv¤ù={Ú{ø÷æž}¬ë'ºÎý—Ëñµ¹J…Ie.ª½¹KÙ¹CYÜðÔ¿ëoÛMŽÿn”  ðŸMà~r®¡¢J¹¦´£æ>#m¦% í1î¨ñ¶Ž‰z-ÖEŸ¿D–ãß­vVâÊ-«B¦-MP i´¤>tóÒ$–Z}qEˆ D€"@ˆ@9'¥ÒoÇ&ƒV«/ç-©æW¯â ­“3+Fƒ­Z) ¡8HÅv¡OÅV¶Ñ) D€"@ˆ /”T|µ,Ó_½ØÍø|p+ˆKH…Ý/½Ø ͧu®Î2Ø<¿>wžO”ðùTKµ"@ˆ D€"@ˆ°?$’í¯OÈ""@ˆ D€"@ˆxNH$?'ðT- D€"@ˆ D€ØÉö×'d D€"@ˆ D€<'$’Ÿxª–"@ˆ D€"@ì‰dû번"@ˆ D€"@žÚê9§j‰ D€"@ˆÀs%Pà F·õâLH¿}6ý•œÃœfmÁ»UD ÁX©D 1§.ž[ºièâP¸ÁpßÚ Å}”¥5ü¾í\ÈSµ î[*K£‡ßC/Ã…"·\C·n¯BCG1îÑü¶„ÆÀ“<åV¼É¯Ï©ÅD€"@ˆ D4ôªÉ‘СøµÉ ºu‚ƒêXQÊ€§ÎY]Óé3% ¯}{µ@l õSîØi9Lpyû xÿesœ ²E$Ë¡KhÄT¡.þD‘œW|稦B\Ðpë ÑÍÔH"@ˆ D€"‹€Fo‰Pé²Ïk´m !9r*lœô¬»eIN'Ïš@Ò58—ÝG ÛÖÏco‡—,qº¸hØ”d¹,ô$Km¾­ÖC‘ŽçBKzqn’H~qú’ZBˆ D€"@žŽÀ«-à‡‘!{¸i*lýj¬‹Ë§X…jTÆÿŠ|îQT©Øs8ÖR¦¬V=蓃»'¼û²£åþåÃW-瀽ÉõSe1¸XÅæ9Íîô<·l0Õ#·5¹Ý¦+vÛ62Œ"@ˆ D€[hѨh{¾|Ã2¤ v· Våö ר+[CÓª2KÉ)w¯BзGáº"»}Ò¦´ó•^iÿüýß´¤>¹?|ôŠt"\Ú¶ ¾È5dØ’Nò¸~àü;¨˜üÅnЭ“ì 5 ¹vyûUh`QvIƸV® ¿xZÔu¶z顇”¸»°n^$ì)ÐÓìs–¼o¹ŠRb`ôøÃp³¦ x¿OºÂÐvµ oƒ.ŽýüLÏÓŽòQá<Éþ3~hÓ÷ã/¶[?γ 8j4B…ë˜P< D€"@ˆ@ ¸5„ÍËÞKv&·ÃÒhK„é¤n[Ø!‡@f7\k½ ß®ëËy7/Þxb±*9ˆÀ«Ù«ÐÒRDMèÐÌ Ä2‰õs=çœZK2:)€@,üqMe¹×° ?ÿÀz¨uúµ+pœ`áw= e̲ŠÀµFü®=Ô°””÷Ä¥ö‘ƒd®R°vXç?|úG0®ƒ•@fʼn¡­_oØ6Ò'oáå ¦Ô–Ñh°ÿ¶¶½¸ém-· t‰·ÏmÖdeøï<™4¦ 4ÅÖm£Q˜õðîJ­FÓ¶¸¶–ezžCîcYÖù<Ëžºå®Ûxåêæ“ñÿWß,mË–0·çiÕMˆ D€’¨T£&Ôέ˓þ$ïJÖ“‡¿®æJTI±°us\NÒšc*Ãð‘õ îØEø›Ÿç*ö‚^o›Ü‹5:¾bö‚bò¤›°!·‡Ú\  &°åHŒå¦=ú~œ‚Í9ÔúÜ‘ÛP£m hi[ vü!ûn@ ŸÛ¥*´©Ì_ä=ò=ÊîXÏSÎÃó?jÌ?8Z¸|, ¶‹ÅõµM᥶-ap!õ˜“ÙÝwˆ—ŠaÇ®íë°R"JÁkUh—©?wÄô‘¶¦/ª<[î„’' ×€@d̰%}‘iP°öð Ð37Ö÷«RØ®GEæyúF– ì&ð}ŸŸA½û ™<̧ºÇ²¹sáw@ÉnòKn÷q.¹â“vòûóÑ¡ßñ#8›ÿ¾tz÷ÎÒ ¦4œ!¨ÿÿçÁîÛD"@ˆ D 'G2µw,Ó‘õ U-þhlš²61uJ Ëßå†i»¾VÀMÃí¤š¶5ùÞèPà¯kàÛºº¥ŠëG£i«! ÛOž‹†ëÃ_6­v†öä°ã^ƒì¡ÖºxØw, âQ0iáu¨_×RÏGÞx9tkçYø-Ü•S€R¨œ=ZÞvr¥ôëZÏ2”;åÌ1øtõMLqÒÝ?†€Æ¬ghÝÆ 6…æÜ^,W1vwYª"€I7oBÛžyAúƒÅIÏn?pij)Ü|R™½Fy"Ùþ(˜û>Ä©R÷Œ¾×Ó×`¸‘C/_½1ÿ´Å‹UâK ñê]^ÕÒ5Æ +sö]gìk“µøþσƒDô·@ì­Ödµ4êµ>ÌúÄTÍDßLÄSˈŠiëÏy:ªoJgŒì/L–ØÎCyµÛα’yD€"@씀 þ>ƒâ¶¹—ɾÊ/ü¯Ã ÍóJÝ [W9BÀºO! wKÌ®û®ÂçèEd[Uz¹4ƒÇв!Ÿ;ö†—/Ñ”»™Ïïú¼” š1ÙP·ycîím1'åB4D±«Ì,ˆV;ÂûÀk=ZB ƒ%‰å„íý´ÁA&µáÚì]8¾µƒåš?ÑYüÊ|ŒýKY$›¬7,ÎýÂÌÂÊÖô…•E÷샰{ëÖº¿ßÅ·&ìxpéï5oüóë4múCî÷èÍý†aô\¿~ë§ð”áäÒÞÒ¨È;BA =R¶ÃQ ýþ`&KDš˜Ù¾Úäeß)i|˜`¿5é‡)÷þÚ[§²|2ÏŽ\{½’Qçº#3¬!^šÖ>°N`§çåÕn;ÅIf"@ˆ°cZ8¶z'LG/äŒ%á[U“K v—0üWÜþ‰¹“q:lAÃu¸…”XŒyÄè¡d­Ä-ˆþHxËTޏ*Œ÷¦e¨µêÖUØc=†×Ž©Ø£i;Ân¨fM9®¬î0¤.o¥¢"˜'ëoÀÏ_¶°z©kp=Q« ÿ¢Â”¬ÈŸÖ®¨ 5LÚÜ*›>ûœõ¿%èA§á3B¾ŠøÐË–;åá¤LDryhx¹¶Q 0ëµÇÀF˜=¥¼·”cðÿ§ûŒãÿÝ:ö²D¯ñILw ˜:nXÔ¼åë7â½K5nؼT,¾[âBlÌØ¹é£…Ý[¬/äü;¡4yˆ÷±9ãÀúÈ*ÄwA«sõTà2‘å(”W»Ëb2•"@ˆ€]H¿ö'™1³Wœƒ÷¾å»Æ•¬w?Á Í©°|ø6ØÁÄ®ÂFû£RKL„+7LžLüš´ë÷XèæW H iKv4…ó‘7øS:–„@ôe¸Ñšç¬ÿ®hS}:Ô³äÄKQ0fÎE6/†…ÁŸXÍU¶¡róK–ÔÍ9T¶0NÍqÈ7{^ˈù}ì*ßÛž¾{ÓF.‰òyôÏîiî ÅßÓÒÚÔpïù~É¢…Õ19<<½yˆX«ªÊ¸:9œr®é{áÒ_ë ËÊ}FÅ•ðoÆ2·eùdRɵúÞn?~ýõ×ÿðÑó)Îú6ä¶¼ÿ¼¯&\ÊÃ`ÿmHcOÇtõô‰mò~6* ¤—€ý­Ýí£À£Í“¶Ñ ‚ ưMPrÌݵµï ý+&¬-  7H]‚ƒwÖàW²×©’$Êñ£b¬Ú*¼÷åúVY 7úhY_ã ‰Xw¿K ¯o>õ¥eª Ÿ>¿£XæÂª™$ëïÜ9GzkŒ çn×#{Gs]¯Üúûõ玤âÐkJ/y½Î½$_³7:÷óÒwìüÑÆ”{~R‘$®’“àxÈçµ`óœG.ùÅÇ%íŽS¾}††°~}2kE³o&E7ï¥Ì÷=-{îS.œ:Muf»Dl¼_ÛË}ówß)OpÏA1ì.©=¶¶—ÙLJaÊÝ͟܉š ÄÏ8Ú™æî,=üÙKø Ñüož‰ D€<5[ç`í…†0®™³©¨ªaIßë0l÷m8—ÔÚr®bgúUkx¸"^ØðiÕ·aØÐHnØÜõPܲE²iË"³U8göè­¦ð4ÒààÙdhnžóÍ—ôïÉ+–áŠîÙ{‹%bp9øìd%­<À|VG Ž ¯}6¡À5|4¬¹Exó·;þ/Î=®Ï]z4osº©açc/˜2¬…e¥ô¿7o…1ål«/»ÉýÖ§yª"§žº±À‡‡ÏcâÔ8§uØŽG¡Ξ¾ ,Ã\{¡ ÔoÈxrÏ¢ÿ¸,™é?E<‚ÏGùO^¼m!_;Ƨ¸ûù~è—ðבÙ!6 üûø}´ä—í?¹çÙÉl:»wùà16Äöó]U^î=xrêý¨õ=YÆ„ôT.L«kÀš_¶oÍ[Š”žOÙ{7b—‡¯0e[—=Ù•¿auì1vñès›VZE矹”>qÀGmýiK;vOÒÞ—Î\dçŸ|òaÀöß»Œ˜·ðVê퉷pX<ŠYYn1Ëò•$˜¹°s¯*î^¸lð'›V±rŠÓ÷]—Äû,ñ ¸ÃׯÕK}‰Œå¯Ùüxœ,.Â>4üwÔüIçüdß31eþó‘»Ã? Y÷c`îû¹¯uª'6ëB¥ulj íí$À¹Óä¾î0`ìí(ôœó[®Ä¥ï¸lIöøJÆ,¼P²þy™¹¿N|÷ßÓ<¼É=›ZÐ@>—Ó¾?ÍïÆë'ç†n/ŒC·7åyyCÐkJ£/žž{-5ÓׯÿÙt·coàsï“ Ü[ŠnÄøú}w·¼é>lÀØs¶Ú]R{lm/³“}Æu?ŽKŒ>˜c±´ûY™þ¾3R6ü²ä‹Úÿ¿4ƺMtNˆ D€”œÀŽ@Ÿ­=,·ÁûÁïÈ6X¹ë*´ù2Wp¥ZáÛ…sTrfçI‹Pˆ…86À²ú1ùåçÌæÈEÅ%`=çÛ”WDd>v*†4ä|4àÚ°¬ßÚ"W2x©&JÁ|]EYp?]MM^â—Pü.lž+»ù2W³þí¿u 7<·}Ôrˆ«'7`Y9Ȭi¥¾+T$>aÇ’„¤“«ÆPT°¼r©àX]ïš½_iÜ®‰Pî±ÍTžFn¿Ë ;Ó5Š‹]ŒAukxú½ñv›†5ø%ò¿Yl3/‡v´éc[#z3…]ŠOj~/¿Ú¾…AâÈ¥7å®_Œ\¤h²./¿sn¨*ÞH×Ú‚*¹§D1¬®šõ›¿c´Ô嘷.Èý BÌÃÊu”½R¿á;,ŸHîfæ_]æ*E¯pY"Òżñj½ÞîÓ¤ÎÇXŽt¢íç£Oâr£gW_§G'vž˜îð[ð`a =ƒ3k8 @fW-ë;•–@æJ¯/Pg:{±ó˜$µÉòï<ŸCÈÝý´l¸;KÛß…1u_}§…ÔÙ3ÈÔ6#xõ¤$v×¾EµnØK0 ¾pbvW«¬˜Ü²eûÚÎ5šú™lØ•r‡ö gt("@ˆ°@~w¶n¼œå ObaþÿîZE8àOâöNGÁoÙ9H´ž«ÊRé2à̾ýðE.A´iÏ5«ùi83kU2–€@Ü ø+!»ÿt 7a‹•à½~àWØx!)gÁIð÷5~Á44mícºÏ÷%Ä›=#.?É.Ÿ%Œ¿vîðq–´Y0süØ{‰/×T$û™‚6Îþò°ÕK“ì{ö~fÒ–¥l¥Þ tÁ½rÅn¼KQ·z íñé¼*¹«–5ì½U|fÎÇ+´iT½×§ÖCEt|HšpnƒV/÷a_ˆ™—‰ ‰_€+g¹o‚{­ëùb¶óC9 õõÌÇãg€×¼Å½g/Ú°å5‡fq(° JlŠz4Æåð>ó‰ŠäëZcS]?™µš÷<¶ª/ygêìUÇ­ê<Þo½ñsuDÀC«8î´u¨ÑIº%`&»x©²vòÊà 9UeJØ®ó°™kdé÷î$˜häÚ¶^Ÿ_; üâoGmFÓeG´±/`ÉÕ¿O¼ÄiÈ$©O›·ãWWšARµÙH½7QoY<ƒÅë{ëç^Æ… ‘'œÞàS9uaЊ5œGš·sÇ&v&0jüGÇ´¬áðÙ7‹–Xó?ƒÏÎî.¸}—¦Šº ¾x(¼¿QÜëÇã3ÈæÖD^zp±ßДÚÕ½>}÷Ý·øúúæyÎ~ þŠ•¹°[ÿF haû¦µ½ðºÈ d¥Xf`ý¿Q°³Èl9äúÞáãé¿ðøN+5÷õ’¬20“¦|ûmcóœòÙmU^žÓÜöX'(¬½¿¾Ûdw„ªç(à†/cÙcð™?à¾% U€ý1cÂÇ}~,&%k+èœ"@ˆ@Å"w Ú(¸ÍvG@›ÝyïÇýuúüuš½ê n:P©U–ïö6ÝZÖÄ™°æ€sf÷Dót|:Lœ®î‹¾é`݂ݰ®²´ôFe‘šQ·ò|Uår~1jMÞ2cáÓQë F]OðÆ/ËI Ép=É¢Œs¥OƒsvÂ¬ë ¬K¥ÖA¦¿P`ú\ÙíðÒòÌ–¦mZ½Â÷?Îó>ʺŽÕx‘_åá_aâ¶öÿ¶m¶NÎG|?òGô„mÈ/ó"{;ïÏ1·•eš’¯@Æ *gNž½h›µhâê¿þÉøkñ5­^èÃE”ÂâÔÅ„®|K· ô«UUC§Î^{<· »†Bb7ôKrÝPüï+î —IÒÿ^™kx9KzèåÜÓ9×™C=d~É€ñ¿)—¼Ý‡?‹p‘þ©ãF¹,þO¦ îŠW=ìPîx{ç–\•Â¥ÐýUù{Á`XDrIú^`T+˜9=Nµ( Dn[U;2¿{¸Ð–qìÊ1ððÂJ X[BøòQûÍØy3óF$· ”Á(v½÷hÇ­-aÀs³zUz)gÏÏÙgl!/?®Ksp/¸>,ë\çi‡û[—ßf‹ÑÍ14€™ð¦·ª÷¸¯×Z dSRœK= ;¾Øv[×Xœó‚ÛË>Š-&»¼ÑÐ$³Ë>á+Hë0üÛ G|á’¦ÒÔʾCgD€"@ˆ@ÙÐÁ…èøªÃäo{A+W9xXmÆûïñKåÒ«X@#ËGtR2Dår(Çð¸[ò‘O!O°®#OQW>E>·¨üõæs3'ÿŠÑ+-@¯´tïÒ«^ÿæN‚BG+=‹Ñií»yHÆ|¥O‹)Jå™ÜÉr_rG˜¯…ŠZ¦SMYE§.—›W½8W.dÁÜïúqÞr«‚Š:Õ$·4%ÑÃèAÏHºÂ;¤¹èûK¯:f+ «ÂÐCjÜÚ[tïÈÞhÜžI'¸›ozk{ r-ªe•ë©N –¡Â¸ ›Ðy™ÿbl…öýSXpG:¤‡ÅKCB¨SÜbvÍîÏD°`˜r."un‚6+ÅŸ•Áó¹ÿøÀ`ÌÏÛ¶Ô¶*×hu^ä©G¥ŒZ÷ó#"ŠLoKç[gkšÒeÂÌE[öÙ’ÓËnËÌ“¬°öºÜ¼íÅgp¶Õ|¿~¼'ÍŽ•\u;ŽübEçD€"@ˆÀs' ƒº5ÝÀƒ ×ãƒ:¾ç÷[æãèHì”@™ˆd¡ 5eÏŽnEµ¹ëØå£¥/®Ì/EÌåÑßW=¾?½Òo£iúx΢Ɵ6Kkpž²NDÇî¢Y!…µiê=qìÄéÌ3]n‚áÎáŽÌX™DËVP¶]¤X ;Wi]š†n™Î–øÊLüòNE?°hÀ¾.ƒ'‡9˜çAW’f+†˜ÊSOQ’Ä œh“IôyÚYœ¾/ªëû#‚þhððüîµFMf[\ô‹»ez!aÊöóõJö2fûÏV]N¸ò[¤çͲ‘¸@ÚQ~4ÛK4¥tSH¹¹ñÅÍWXzMü±®ìÃ.“J½ìÂêµå^aíÕÜ õåImˆŠ¾Pð-£^Pàh[ì 4D€"@ˆ@iPáVP¡ªúktZHø÷.l\}Ú¼5TiÕAå²#Àÿ,Õ¤bá][ ,ÈÃÚg|H\x+Ã$bô¸-—K@^ïÒã„ r—þuól1º§œ¼ T)ÜÒL•¾ç¡¿îôì?pðß;·lj–;½^pš5 z¸ÓFtÈšž 7Ÿƒ^ʼn¡HdöŽãk çy§ ëÖ\‘U%™ÛY¶À‹ÂEƬ¦þ[%*¥Sí£‹¥XVîyªÅí{›ÌÁ.¦7ùðéåRѱªž/­¼z?v̼ ¯$ÇõʾL0»uÅyßRœ÷®‘´µÒ^œ25zm©‹=¡HbyŠc˳H[X{­í®RÙs²Æb²ï±ÏŒÞ¬›n¹M'D€"@ˆÀs$KçüKŸ£T5xe"’s ŸâØý›3¯.­á²£Tì¿íÛôµÞç@Ú{..+ë@ÓBNè…ÆÏúùû7ޝàbT*­CÓ¡ܺáÇ-Ìãg÷Á`HyV&.ÚrÔ½¦0»ÃNõ‡­Tt–ìlK%iÔ×3ù˜ä,…ÿrå°Åã”ë™ø+Ý€«Kf˜ó©ì°/¼¤}Ïç/èØñÃÑL+#àoçÞó.Þo^œ pá/àÂ_å-n|ÄÊ‘8o>@Œ£ö,ýзՊ)n¶§O±9©Ð¼ýÓ|6‹®Ìv{Š.Ë”‚ÿ<°äÖ¯µ<+ùåߘ_$Å"@ˆ¨Àä2 ôl÷2¤e¨+0…òÓôúµ*CwGná òcuéXÊžU©4¿1Ã¥S~qK)‘\\#¬Ó«¯ý´™å"S…mÞ´¹×¶ŸÖg߯¹ÉüHÙ‘9Ï”8Gc˜°kÖ‡s‹p8wªÆ`ž«›3­=^I½ÚF@Ò•*­¼)îMì`óœ`l·ÖLŒ·÷ úÓ{ ¶­X[X¢¾¾ÃxTs6=ÖM7¤Üž­;ÞPn¯áÒdÕY9þ/“ ×¢å›V­0•þ´}ŸŸ-q++Å‘Ï]Ù½ÏßÓÖì0jIÜüEK,Iù…¿,O{‚‹_q¸? 2àª}Ú Ìÿ Mæ”ßM½ƒ\“{@½°vû_q2Ðê+ùغÇs~eW{ +Çúž´žo¨éó€{Sçó`]"@ˆ¨ *)¤0ôƒ7+hëËo³Û½Uì%sÊoc­,7øq±V‘Ïé4÷wéçdFvµüûƒ*κü¤Çà'µÿ[ê[e[Z6gá_½yÅ´#™>™;èûj±x­ï *¿ÊyÚî§*&ž[ß/ÇGÖérŸwd[Cq‘jX³vݤ=!Ó”¦÷0øã¡¹Óv]˜|®TÜÍÌzøYMký¬‡}?Mßã‹§|‡»¨ž˜h¡Ã¨My†4éU kOŽ{ø’fî’¹>9âr]tWinªP]¿X›ó6{SŒyæ93[®xNÝSl‰´:œ^4Äê’; ŸðòMÓ¢ýB¸+{ßæÜéò¿.Üî’Ø“=ycÃ'Ô1Û-‚Áߨ7Å"@ˆ  ùq†¥ x"`?ò8ÏɼâhÎgb¢A$ÿ—Utã¡ÃÊG¡“-ž2N`á°YÓr ^ÝÓ²êmg¿€ä)ã{±•-Fâ°k§óKÖZ®Ëˉ@ SWªÂÌML— 6t zs-í·>ͳǀ±·òphùèÕ&! Œp~¸X9£¹u³'£Híýñä­_êÅÇ÷ÁaÖ œCË®ÛÖ×¾ƒ'÷ Gôö„†,î‰Ê±'»ÎQ‹/(p{dãpê}ûî+öí;£`«lŠ«@÷;¦þØñƒ]°äÁ%L^áv—È«J =E»5f»Ù³8pðÐ}FãËãÏìöŸ±µ¯ÿÇw -‡n"@ˆ¨€ðï$df© #“þ—Z­Ôm…í/N„ØÉçÔ"¾JÞ®cWâjÕVJD©¸ÎŽÚE•Ù‡CËp8´uz6/UÌ­TmÊ­9„i bg&£mÎò²à‡Õ{ßW:{ù<–¿'‘ˆþÃ-®`ÌÇý·±C“¡_¯àv?ê<ð‹ Ì“Y«rúäåùì%lž›‹_¸µºccÑCQäàT=ûÖ>ª¥¨v·ñ«cøzŸ¦®N~£’åV JɤDz´ú&«¸ülì‡Ã‹ÕV<Ø^Ç2©Ó1µVÓÄhÎë"ËÀ¡ì[{áÞK‚.~¦”ØjÖ¹WcîôñŒ­ò¬‡þjd±Ib%úø&ZŽ|ß[" <ÑBÏ·}LT^Ϥ¸}ïé» ­#>/ óó‚‹Å$•.©Ué=¦þ¢ÍÆÎÌ{mçB!E*s=‘•êÏ{@y;^ªlœ¼2x]ó^;ž¶O¡JìɧgG‘Hc0è]qtˆkö‡)/¯ŽØŸ 39>ÓYZCcÐk}^®’>fÁŠm«XYE=/, =pÏcQhÀcÓ[dM£5à~Ó9ž &ÔÓqѺmÍøtìØù£À£2ÍËgÃA"ý[cлõzS:=öõzËso‹Ý%µÇÖöæg·íÖå°`¹¯Ø½ÖÀàdS;è' D€" ÓéSÒÀž†±R¯LÀÉQøý²²²—*-8õ‹u‡íëáîľ[g¥~ŽM,¶«0[%.up&&¸ÄQ…¥Ë}O!]âãØJÕ’:ïvâß$dªÔ=MYþ-kÔÕïýŽ)­¶žú·iÔHù}©«÷,>½V«oš-µÐ©q%‹@fùòÊ¿²£´€Õ~ÓêTKbe šì+nœ)K_`@O—N$a÷U¹=M]‘;‚Ýôrm|‘*º­I kÁ§F?“‹’ûÉ7Kºk”"®’oˆ»ÞlÊ8Pá ^ KD†˜×ëUÎwœðc?“+Q [fnéÌ×Å#7~÷‘–»p€ ãü9o3/÷‘ïûÜñì•jŠ“Üa[«Æ/5Ù»s£ ?ÌÒ·ïYž_ñyÑTªÎyÞµzðÑp™ÝÑ27(ÇæÐÖu59·õ‘Á(tU™råJÆÐ!™Õ¿Iy=°¬(søuÓÜ^UßÜP u<ÆÇéQ\âKZ‹@®æ*™…/˜87u‘9¡[—U LÃè²ð™f™Ý’ÉÄÜÈ vÎ?/2±ñ».(ìÇEê$¯ökÁß×2Ë d¼æ-ìíÞòîU~‹túiQ;q•WÆðyñåIÓll€j•&[?÷¶Ø]R{lm/³5·ÝÚÜv»Êgy6à[GG"@ˆ D€"P|v¡Ô 2ûûõç<?¹æèí"Ê6¬¿E8°}tÝ7p7´{"/„XJâ+Ž=î¦Sé%õ«ž 8°Ü{•ØpÜà­ÿ¸<|xÙ%7‡‚¸ññGÓ‰Õ»ªò×3FöKÀ·49ÄÏÞŽÅíûQ[îº9ߊ¨  &uÝcóë{6¼Y†ÏG¢d`Ϫñõëw7M½ÆÆ³aáð'øúú¦Ù‚ óÅ~q}œzUÁž7o—Œ¡¹žÇüÊ™´½$㉃«“4mê>±Á{%ï´áû ç=ò|Fð™Y¿aWå¡CûáçCã%еM¬Íñ÷8³¸ÚÎÚÌü˜ñém²û)íáë*ê8wËE7=rg銲»¨²è> D€™y’ËWï’'Ù~<Év-’Ë×cMÖ"@ˆ D€û!@"Ù~úÂKH$ÛH.ÕáÖ¶t>¥!D€"@ˆ D€"`¯H$ÛkÏ]D€"@ˆ D€"ðÌ H~æÈ©B"@ˆ D€"`Rཇ!–­.š ûÃ/âž&/NP'߃ g/Bl¿|j~mÓÀÕã‡aäyHÎL„¨ÈƒpèämÜÆ>Còµc°ÿl§‡«¿„ Jw%l}òu8~¢n¥@ò­ÓøL†«ÉöJ£ìúˆDrÙ±¥’‰ D€"@ˆ€ÐÃÁs`ó¡3øèlüj#„ž½ ©/ºsx¬Úð½—YH¨áŸ= ™!"ô7ؽû"d’ãyÝÒ?ø¦,ƒS1 sò'X¼û7¸—Zªæ¨FÃîðß âòCHºvBÃ#àìýÂø•jõvS‰d»é 2„"@ˆ D€<+jHºàå;šyªà¼ì†nsýmXºö4xu Þ~…/N©Áw\P˜’Ú×OI¼Ë$b`r™)hàвï`ÿ¿•`ÐÔqÐÜÃj(@Z4ÍÚ^jÓ?ïÎQ{ÿ¡Ç.Ãc•)?€Ô~ë=2°=xaÖ˜ðõ©ªºy¡ Õåݺ4tâW˜#í“\aºšJˆ D€"P‘µO²^‰ u‰Aá u&Î;9€ƒƒÔ±Ç`ܼ0*™4©[ b/ÇÀc3¼AʹðŠ1{OwsïRXp辥˽êû€úß‚ ¾€—Q ³¶;(˜,ÖCf¦ zýB‘û$£;?S­ÆnR ´Õà9J\´õŸmßÁª©œÐŸÕ»ŽÅòØßWÁ7»c@ÞÌ– o‡—} Ûo˜n{ÕoÎêp Ò¨j"ô0gê¤øL ä¢C.&?¼¥Ø29àán?û$“'¹Lº™ %D€"@ˆ öM€‰-Þcj‰Ì^=œÛgÈî­ýa¶ÿë`òM¦¡Çr쾡†I(¨=vžÎÚûé ~í}0NŸ»æAdÂ}Øv f}ðªY ³¶‹P [ybY”=³ï1²æÓ×Þk p" â„G(’=9›Ñkük wÖ¹c#€Ä3fì Êisê­ƒ¸ä7€¸8\¬¬8‹°\3V`Ç4Ê´gH$—)^*œ"@ˆ D€<{ó篭yæü%¨S»V±+÷é<†¾–>-Í™y0Ñm)vd>ÅìM¢ì=ßðÚ˜2‹U…÷‡v…Èyþ:¤¢Hvæ–Ó£¨ÚëÐÉ5 "S.àÊ×}¡‡ôÎã56HÖZ±ë̺ðÙÐ~&«mȦ‘rpÇd²‡Ú—S ¥j6‰äRÅI…"@ˆ D€çK`Þât =~Ü2BÝ:ÅÊ"ðªû*Èp½ëð «àzL<ħd ãì–Ù{ºlK½ëš|«|ŒÈÓjãÅF:A›® rûuøãØUÉMàÆ‰“\svyÜØ™¢*4nægÿAû¯Ã½‡‰e™—Ì%¥VH$[Á S"@ˆ D€"Pž ÌY¸º¥a ÷^UÀÛ»z±›¹ æ„ÆXòÉ]= Q£vöO¸f%¬ì=¥àÄëVà…"9þÔ)ˆ÷«'$b«¡]soSëÕ÷ h⠸Ƴ9€WýzиJDžˆácéh&@"™"@ˆ D€"ð˜ÒS$ì@n4Cûöé⛚žƒ±­K€CfܪïHð§àZ^\ˆq‰9á÷qeeaïé²›üà[¡;{޲>9î°Ûj-貓•ï3ECèØÈ6_¾ ûþ·.³ÖÔo ¯qndÜêëô!“@vm“'öznüÜæ{póÄ òM T­§}’K'Fˆ D€"@ž=¹A!ÃDBá“@†µg£} æ·v*Ž9™IÇ¥w†foe dýƒÓ°2 )ɸp—½§ã,5ý¸v®Z¶NƒˆMæ…ÉÐ;^Þç#g7So¶ÿ^ªá̉Ëܾ×íÞmbY™úI‚imr÷Fͬ²¢6m7 ä'Opá. <ò$ó$èHˆ D€"@Ê!¹‹×N ‹˜éFƒaÁÔ‰SØù¶mÛÙ¡xA᯻¢ç1%V}½:µo’Ä«~*ÆRÎÕ;þcïé<,ö‚ê:,ž¶ÚunNáq¶ T‡!¾¯f§yÎ ›Cs8 g¸¶ø@ëÆØ‘æàݧ©9 Ol„™­ u- üsô¨™&R݆8|‘à™ípç³VÈ#y’+d·S£‰ D€"@Ê;¥R)œ»8d9È8¼Ú€ÿFó¹ämS@¯/>‚&L_©îCdx'åõ[ÁÐ[£ß¼©¸y”}§ËMû×nÖ¼ Ž2 d×FðÙ·ã¸=’KÎÉsV…öMsнZ¿ >Vû7)v…Ϻ7àŒŽ¿ð'ìeÙ:õí¼Ùªå©pã¾ÅÝn{¦6 žimT D€"@ˆ OM²Tæ\c³@ èþc5no<`JàˆÿY¬ÓéSÒŠ9'9»„Ôä,wMÂý”&Á™}7ûÌÞÓq–ªÓàQnc%R€›e>nvìáÌÉQ:ƒ²²4egrHNÃ^e{.»9Y†c—]…¶•ŒÏ1x¸;¡Y"»Ð§va„mè( D€"@ˆ Jå÷Î2gù/(,Ú£ù‰Œ¾Ó'ÍMæiErîòèºl <‘\¶M(qéö&’iNr‰»’2"@ˆ D€gK@¹`e5¹Dz÷An†9^¯‡®3&\|¶VPmDàÅ&@"ùÅî_j D€"@ˆÀ B`îâuõ„`<„¹6 äë:ƒºó—“ÆÞ}AšGÍ vC€î²›® Cˆ D€"@ˆ@þ¾[¼º9NÛü·xbù”Ê`hýe äüiQ,x:$’ŸŽå&D€"@ˆ eJ`îâ5ÅÑ\LÈÓp QŸÒ^øib™VJ… L€†[WàΧ¦"@ˆ D€Ø7y‹× ŒqˆµÄÆMª'qŸ,R*u¶XÍCb‹A¡°¦PHÄ"E WL‰ÆžW{ ³ì…>ÙAˆ D€"@ 0oÉÚ1(–£@°lê„_´€h#h´:ÀáÙܧh{"€û]nv Mï@ìÉô§¶…éc©ä©Ë)­H$—I*‡"@ˆ D€”Á¼ u P7bqFƒ&L›0|Iq‹fÚX­Ñ–xŸäâÖG韎ó ³}’UjíÓTs3/r%Gûy™C"¹>Dd2 D€"@ˆÀ‹I@©TŠåÎ5׃¡>Ö¢fúxZàˆm/fk©UDÀ> H¶Ï~!«ˆ D€"@*À… å"×Ý(»âüãtÞGYÁ0Ps‰Às'@"ù¹w@ˆ D€"PÑ (­ñ …ûqúñ[8‰ø!NMí>5pÄÙŠÎ…ÚOžÉσ:ÕIˆ D€"@ÌæÎ ö…q^fœG|Û‚.Ó‡ß$@D€<´OòóáNµ"@ˆ D€˜½pmƒøO³@>¯Ï2¶ÂEºH Ó³Až#ÉÏ>UMˆ D€"Pq ÌYÒN$‚c(½p›¦ÃµIífÌ‘ð숤À{C¬kLކýáAýì*/G5iàêñð?ò<$g&BTäA8tò60lÏ:$_;ûϲGDW?hl4A1—/Â…kq…Ú­O¾‡ÂBÔ­H¾uŸ‰Ãp5ùy´ÔÆf•Q2Ée–Š%D€"@ˆ ˜òH `C¬]P ïT¥Æu[0eJZAéK?^Ï͇Î@â£{°ñ«zö6¤V<=dZ5ü³'BC#!!ó1D„þ»w_„ r–fýƒ?aÊò08“1'‚Å»ƒËq©¶U‘yVÿ«Öž.ÔnõÃhØþD\~I׎@hxœ½Ÿi[/P*š“üu&5…"@ˆ DÀþ Ì Z; W°^ Á°fê„€Qhõ3Þ$V I·¼|B3Ï Ø€ôÔ!v_+þàî=v¨Z·Sj6€6ø ¯M‚ù™¶Ñ>*«€ïì &‹õ™©…‡^—rpr”ƒÎ ‡¬¬ãBïo¦ZݤÇ™ø’ÃíøgÛw°êD*'âgõ®c±*ö÷UðÍî7ó‡eƒaîÄup‡¿‹ýÚ°Š®™_‚Ôî<¦õn€ÍÃruR|&ðÛ¬Ã6›ß%ð9Ë䈟 ðpw‘Hdú”<ÉeÒÍT( D€"@ˆ˜;w•›ÐA²=È­Áh|lÐë{NŸ42ÊØ0±Å{CMY¥‡sûLÙ½µ?Ìöó0¤¡Çr쾡†I(¨=vžÎ .ýo·I Wm_Oî5™ÚK»s§¡XL9 GbÚƒû[ÚêdÈŒ²•·–E=«€K¡+8¡Î*D!kî¼×Þk p" â„G(’=9{Ò ê×î¬sÇFx4³y£ž0wT[®ïý½f¬ýîÚ—Þ› °\sóDXÁsj)3ó¹ÉÏ?UNˆ D€"ð¢˜?mM£"P 7ÂYÇÿ‚ºLŸ2òн·×§óúZø´4 dæÁD·¥Ø‘©LnŒ6×ûN— 7o&rvöêkÈìÊ© ÚŽ'È¡¡‹ml+WþU{:¹†AdÊ\ùº/ôð‘‚þÁy¼Fƒe- ^gw[u5Ì$Ys<›v‡ëÿ Ûo$•{iиQÅZ_·’HÎ Å"@ˆ D€§ €ù°-žà%œƒ|Yº._N÷EÚœ5páBÇÊb7ÿðƒGá?o¿as>SBxÕ}dpÂ7¬‚ë1ñŸ’-Œ³ ³÷t™Á™]¼=söz³ ôãâfK[ùÄözt‚6]@äöëðDZ«(’›À'9cvyÜ¬ÍÆyåÕÙ»K‚wýê7îãŠÖ–È B(*ü#@ˆ D€"@J“Àœ…«[â8Õ0ÜæÉgW¥¦÷R*Ç3¿^™†o¬©+ ‡c%£pb§Ë•ë·¡^=pwË!“Š´!&rÌ ±¤“»z@£F íìŸpMe‰»N§WC2Gç÷æ]ÄÙÒ[Û`É`§'^o´/Éñ§NA¼_u8y„yÑ¡]so;µØ¾Í"‘lßýCÖ"@ˆ D€”#s‚BzŠ‚8ÄZŽäPUj܇¸p—•´,ýÆÌY¼öÜ×õ3,ù}nk)<ÁºOuïÒî­â dÜW™r«¾#Áÿ:€kyq!Æ%æ„ßÇ••Y„§¹AͪÒà.´õ2?Ñ-?¹æýt^ïû>hlj«©ývýSÑ:6r€Í—ï¾ÿíníêúmáµÜïGR¹m¾œ-ÑCüÝ$Ó•ÎYáOH$WøG€"@ˆ D€”¹A!ÄBa–%2aíÙ¨C£víÚUˆ³äµ*•krg¡Ÿ8¤ûUSIF5ŠãŸõ:ã“.±ºu¾µg&iL¸34{+[ 뜆 (YHIÆ…»ì=2®ÉöxJPÃî]'àMóBULÜ»À­píŠ+\çZTD[-Ë›q‰íô‡ÞlÿÉGáÌ N"C»w›ä³2õuØyÆv2­‚zíWÌÃÆ¥{@}ošÌw.‰dž‰ D€"@ˆ@ Ì[(áB–…꼩FL+aQ…f›;7ØdâáBŒÁ„®l¿¬u­ñ•Á¸\ø©iµªBK)ä¦Â^wEokJ*¬úz)tjß$‰W!üTŒ%ÓÕ;þcïé< ùïÃî Ûàñå0øâËëÐé-¸yì4Üa~}\Ъ]£†HЖ¶zXÚnÏ'Š†Í¡9 HæŒôÖ±q|°zUs1t5̼Ù;>‚Hs¿zuîÍH#ó´*ìªÞtBˆ D€"@JJ‡R »ôð½P üŪW±3uâˆy%-¯ |ó‚BZuêÖ{!ˆ„k„A;L'Ã=—Ï‚¦©Òâ>ùrÚÄÿ Covøúë™Ê,•Etv\Ñg¨ß¤*ü{á"$¤§Áí7áF\ Èë·‚A_‚k—þ…4 ´k×Ûuºà ÷‚wÞ¨ q±-)IpûÖ}HÁ!ÅîØ–ñ€—ÄÖ¶bYEƒ{êR °³pë–U \t—áÄ­4ðjÝþÛ]å|0âJÀë *f™rò˜ /5—Ø ÉvÓd D€"Zso=@IDAT@ˆ€½˜»xMg@ø3.èS —Á:¨Oé·hÒ$¶T±Ú†‚ØűåÂ`¼ˆâø{UjÜfô,—ýªMŲ˜ŠG€DrÅësj1 D€"@ˆ@1Ì[¼n®(½E­Åí&Õ“¸O)•¸™mÁ´ võ÷q.ÜKYð&— wúAo4®R­_8}ÒÈ(ÛJ¢TD€< Ü»«gQÕAˆ D€"@Êù‹×Eoï÷h·òÒ©ãGLÀs›öTš½d—È(ø…ñ8ÈUXÛ1ã#Ѹ1K«Zªœ2î>‹+«`À¿ôzCYOå–2!.gÎ,c ·ì*esžyq"‘„ ‚ò$ÛA' D€"@ˆ vG@0ñÚù('¡eL·L˜6aÄ[¬œü:ޤþ\0ű„åA¯ñeÜÂéûû·³~\¾ü3µ-åUdí„QÇåN¯Üw_1³§WVY ûÜõ–ÖuIú©¤uwú(ðhYôiIí¡|D "˜;7ØGà >ˆ¿«àˆ×Ûø&² ±¾iÍ€†]©Fo 1¾¥ùžõ0À÷SGèq¶.çYëtzãã”4ný¬€?e=NŽrÐáœä¬¬Š· ~îÀÃÝ D"‘]èS\O€ D€XèŒB¥ÿ@¿ Öq–sôâvë?Â8~ìàÑ–¸žTÆ|j­¢)~³éwq§gleù¥ÊâÉßOÒ ú,ït•¦V «/0[ë™ïžÓË«l;w[Yd;FqgäuøÎïGŽÇöîïϦTYþÀuWo~nóÌ ûT^`…v~£¸ýô4Í)«>}›(/¨f/\ÛòŸf|^Ÿele-çÌùÞsÞ’u“å.5ï¢@þ™´Ä¡ØÉøûn™V¯ª;eˆ^ö&+B¿Q‰@Y áÖeE–Ê%D ü0ê\p—‚À A¯-¥%m5Ýu\ÈiÂiÿÆ5ô~s–¬µxŽËbc¥@ÀŠÐ=`îõ[o¯Ü4æ×'®¼žNŸ­ÿH!ÈHùeûVw6 ¯Ó‡£Oç? `Û*ô¬À/àô›ÞªÞ‚^›3óË_~âlë§ÒhÏÓôi—ñûW~tpÌÖŸ¶µ+ [¨ "PÌYÒßT†¢@vÁ·|‡k÷Y0cJkûì…ÁE"Ég8Ãe0^:°8Ls·tZ­J5¬W*?-ç¿ÛX‹("›€MÞ‹Ü™èš"ð"`K‘7Aê¡WðËS¾¿W×àã}ûÎ( (—S`,Í7¸/aÖéZ‡P o𨤠™³d½E [§)켈º Ì2M™%’Ælý+åZA‰ On~PÝI÷Ž’Bÿ±À¨‘U> Òè¹½@;ùj‹T’2sÑæ}ågñJ+fŒ_ø £ƒµ7º°¼¹ïYóϯ?LŒÃó0Î]NÞk6SbÇ>*©}–rñ³¯gÂ’4¿“‚úÔ¨Uy¤ktmóËÃ8ÿzœ=£ô‚¾ø>ãCú÷ö 00ûLÏÄã¾àñ5øûÖG›û3u5’ìáÙ•®ÀžöÿÇ Á Xy}Æ­rÙ†nù‘=£Ú}|Ôº:'‘À¼ µ£Pã‹Gn‚¼Ü Rvp®1NæRãÆïÁQCmKŠÁhX¡ÕC½©ã‡wÇ!Ø/+=ü<6:‰îÁƯ6BèÙÛZ4Ñ£›`Õ†pø †öD@hh$$d>†ˆÐß`÷î‹a‡­ð'LY§b æäO°x÷op9.µÔ,U?Œ†Ýá¿AÄå‡t턆GÀÙûŒQÅ $’+^ŸS‹‰°‰ç4Ì›½¥ÖJíÑ‘E±mFÓÊΕ‚\«Ôþ[\‰ì0î P°ÌºY“IŒ×*•Ý&³û:Œ‹ŽOêÊÎE>í*“Wr 1 Ä)I鲀߂›D AIz\@íÊ0™óÖòÙr»+5ÝŽÀc2aÑò†!Lë6dHŒ:×j®Ž³+{OÁ—¡±QÙtÇ¿ï¥fZÓc‡eÆùøÃ2eagîª×òqÖGsíC±©Ò‰EŒuŠ¥éÿÄõ=ŽxX»²zr­ÁÉÖéó;—H/Þx¨^©Oºº Š«Ë,‘Üm³uVØsÙé³ðTŽU6›¥¿q—v8ËÛ¤rçm,ùÚ#cÓ.ýtÑY*8æZ¹ædÖGwÂJã ÓÐÉì: ?KLŠ]à,—lsvõšÅžÓ8g{¹rXsS.[ì`Ãæ q§wh¡j•*c®5°, œˆŽï”»öbõ)˜ߓ„H‘Ëa©âØõxIÃnøÂÂpaƒ#¶¿zº~ä›®µ-œ%+‰xq Ì_²ö+P° Ű½Ãx‰P&‹ ‹p(uMœo|Åh4|ñH—\sÚ„€q_MqçÅ£¡†¤[^¾¡™§ ÎcûꞢ¯¥¹[$–T⢤ܸ¨ö2`„ ”‹¡¶ Oeü£fAñª#>hº8\S®jWèû¦G©*r¬‚i1ˆÅàXµ&W®‰Q©UQn ²Çþ/7ðÈP"@^LìûÞàì:høäE¹¥²Î tÊJO2»ÅtnuêtÚ2wÚa~(.„7÷ÞqSXoÇèpq­JÒÇÇÖÿ°ÐLk•ùh9´ª/ygêìUǹ£qLôôýv]Яwö\U½Í\Ú]¶rY°%‹åÄQl|ļ¶êˆ€ÓBAÎ ÞâÎÄt×QΓ$8k¹¯ƒ{­K9¡ÚuIü.iÔ×wn¬çùù=ßá€+XoGOã‡7qóU§|­À7ê³nû}×|Öû`ºöÒŠ‹LTYÚi¾¹ü“]ýÇÌDϪ¶7@%©þØí EKG¸Ë‚wðmæ‹ÊÿˆsÀe’¬¿wüôÓ|ù]æ_‡”fbø–m9š°¬f¹me׺UÞ6ý‹ê¤<´X½ë´·kƬ![•,EãFŠøˆ°‘^«cX\Ñ•«Àr—så ãÜnèñþíJ¾˜·‚òç°_"ý6HD™1?oÛRÛ’Çhœµ×:›·ÜSÒ>­?li»÷üÇÜiæúxÕ’àµö=ƒ—×`%oÝÒY²÷4;qXé@`}\M–ÐO"ðâèׯŸèÍ–]ØoSBmÔã/˜«øb G– 4æ¦Ã!迟:þÓý/^ëó¶H‚«oćn(÷FÀ^õí^ÿYÑœs$ÕƒZ­GÝ$Å•‡sÜÈuaßéôz èt"pp1 h¤Ð¸E€ 8³HQ š5q€?/j­îÛÓ©¹k> oÅâÀø„Ø}­ lèTl#õjä/¤>8Ôlmð%Áq|¼4FÁœÿ¦|úù˜ãy‘Jm#D€ØL@ ~b0 ]ÓÒ’?±Z¨9ßì[çMûuë<€#(€N¬«z;jÃ+&eJÎnºÆ­í”À)½æ-ìz@ x‰« …)ß­=1u¶9J Ðiú€Œ«V£ï³çú¼ûm %O¢:-ÑF ðâòOœ@f%Rï dÞØí7šÖý|Æ2™AŸ)˸·Ó=ïÝ|¢u ŸùîÙ.~[aÿßê Œê….ZÄ/ À£’&$·(z¾ú˜•7ÑåÉÆ¥å ÛVÖþ!ô‘“ü ¡ÐÒñ%Á[õ[¼c\kŸP•ŒÞM= mé\û¿ã—ÆäÊÊ]ê ZŸÊú…¼@f‘F÷†ÇÁ4šéÌbˆ9#¼>tz" ㊇ªmâ!zü÷uñÚ¦(80»{À$+w¹¥\¨/PCàÖÞ†{¿îe/z­6^È¡ëÒ5ÙÝy4oýŒ¾rKÀÑ 1ý}Šå=bÏ[î;|=?ñKøúNúëÿ…A1˜ò%Б”o¸}“Lî\ãgTÃݱ%ü0uÐ? ©ø‰øI£Ó/þjò§è[­8AËýF—‚„W®ÙÔÔ˜Ó°}çA8s/{8¯ÜÕzø÷‡Î²½—öž.3ö<ü´áp&Áôç˽~3¨£¾ŸÝÉúÛ°tíiðê<¼ý _œRƒï¸8ÑȃÎ$Þe1÷™Y(Å9Õ‡–}ûÿ­ƒ¦ŽƒæÙ¢Ò¢!hÖ6xôR˜þy|ù¡‡›'#`WØQ¸“bnöi_ÿ°O«BLøzˆTU‡ Ý<áPÐjÈòî]J Àí\±M"‘\ld”€=›Q*zûvd{ûøF³y°8Ÿ”‰Æáœã˜o'ïÆ9É(MÁ"xðÒñÃMt[Gß»rïñ^_¥þ ?¶usMôè)Íb™+)‡ÔábYæ¢ðÀ>eYÊŽGÙʉype7öN®c-nÙbNDŸ=m:c? ¨6PÕE–õóg=I¹Âyk[}{¶1SÀÓÚè¿\¿>;; )ïqD;ö®`ò:çLð‰¯'77¹=οnà, :ñoÒul›Uƒ9Ž ß>fCÔÅ;(&q}œü5:)­ËL­ÕùŠçí½ÖQÅ9ÏÁ“eÔ¨¬y–`ÝG–È¢Or”k¬Ñí¸÷kѹÌ)Ä2üv£…zCwe‹í\¹ŸªOs•Å_†RÄùŽ]<&ýá••«#"öîÿ ¼æ-î={ÑšBTãóÓ‘¼æ…´…»°-ÕÍí¢'ù:¾ \£JÓ†(•cÒ_„v¯ èõ•:ãÀb…ƒ^[ðþaqàæ¡¨cAà¢0Sq2hR·Ä^ŽÇ)1°;xÈ•sácöž’/Â7ó¶¾éÅ!ÔаŠ®Ý¸`ºæa‰êÀôJpP0Y\ –.x zm‡AQ¯Ú×»IýÔÚ îw@[ÿ9‡ßn܇}Þ…æ½ëX,=û;\S©AŽÃ¨16&r ,áî˽}À[×XŸA港 O—/`i)>õ'*¡2aÑ+b ‘\{ÚLˆ@©¸úõØ«r}–Ïõ=ß™ùÝüL¨²Å¤ø öú ˜xt›+TŸ‰˜32#)fAß—o`\m>Mî#/ÀMñ8æÉ<7Àä†)‹þN×ê›u}lmª½³U*¥…¬[¿ýÓÜåïÛi.¹Çä Å–€™+f)߬tgÏ¡ 5¥þ°]r§o½™÷«—ÿaî›V×Ýwô’ÜÛï¼öÇ©]pÈøÂ®Ú*Õ{UÏÄ$ q/åKGzÛ:¼¹²U¹…Z@–¨Œî Ÿ\Îv¥P‡µ}:Õ úkð­C¾ùa\ò†§ìÓ¼šbBWL`[r›µýÍ„kÇwýsO³ç½×ì0jS\Ay(ž”wè9Jjt “ð³ø®¹=ìcyRo0Ì›>1€½(²þ˜š“Tœ[¼ÇÔ$YÛõpnŸI »·ö‡Ùþ¯³±DÒÐc¹vßPÃ$üæ¡°ótðǦŸ8A,¯ß¾ý¼='“/eðoÜðr®Yø#»í"ÈVžX>½q¼»‚óÌ ³æÎ{í½¶øV< â„G(’=9{Ó ê×î¬sÇFØ}aµY ·:¾Y•»w5|),¿á;OC·¯Úß|nÇ$8ÛËòGÎïbeY•MˆxÑàâX½Æ§J¥”åìùÇ™@F!‚ÓÛðO—,Ó4&ÊÜæ)(–wÏX˜V¹Ñdüræj+ &¬Ø\9 y~_³aÊõÜS– |CÜÙ ÔÿýpÀc¾\ÜóÐÃtÇchï'*@›T¸€WdtÜi9®DÝä%é`>¿õÑí>û^¾éÞ{ ^ì=êbzܶ~Ö;8„QÇÞ<×{g—eŒól»äbb]‡mç‡ì¤a vÀ³·€â Ð^Ù™ÓË_¸}’zÕ²oãCvg¬ãËÈ}|Ú>eåáô²üû G¬WúŸÙÃãYäö®ç¶…®‰@y% T.q·$d­¾) ö[²…ú¶SÆo…™ U©Ð¹°þõé<†~Ø>ïoÈz=d¦©Aì˜Ó¯hßé2á÷Ú×>lȬÍnºÀ f9ÛQ‹òpOTíuèľY¨.àÊצ-¡ôÎC$R-k­|¤yÿ_“½~O\ìË$YÛ^îÐ|ßkv¯g—‹•1ŸG Oòó Nu"`ßP|b §„ ,®mÄùÉ•ú/Y27VÖ]{Æ/ – ·ß®‹[ãa'ÛR©a•JcÞx£Ó®ã÷2kÅG\ •ˆÿfÙm _T‹í%„³>l°ýLî<:çþ‚äîßüÙD|iÃÅþý/ìܲ­™[›™_âb^¾[/ ð¿¿]—A{ÏäÖ>’º}Y·f— 7½£Žï )æ4ï?£›¼öµ+WJx8ä·c+N=pk­??O¾ë¶ÛþÒtб ßÍž`Ï<~Ńœë¡Àe˜€7˜6-ù6»ŽÉ$áo¦p8~ÓÅ3! 2Xo³dfu7™F ÙÀ[©4Úôv2"ßõÚwÐþ´tJÏpÌç-z›——Ë>M¿«£} Ý,b m #oA¢.5ge¡«UùПîïÖœ6®ØO?nJ¡á-éÀ–ŸÔEt½WÍ.yá’#€)´qXÑjCCêñLêÜù ±Er…`åJ™¨Ê$}ð6_åBÉXã<c—[Wº½×=vdOþjÛ¾#XÜ ÿ׊?mÖÜóûñ‹a¾Û§=úŽÚtôtæü£ë>›/˜¤ÜŒžlÿñ.BªÜv\¹‰¼^µiã¾K;ðfQqAäºaí?ÛÿÖcìG͇¿Þ(Ö?ž÷Šdêµð¯°ËßýûؾÃç6î;¼P-jÔÉEúºIm}ü¹Ï©Ž#a×•Æ !™ö¹èØfu5C]mßwÿ÷ž;µ¿Ï}ð£¾ù¹ú .ÀåÉç¢'ÛWGÝ䓳\züý’CŒ õ£hݹùÂ|æÕ>>à­øË™Çb¿únã1Q.( NüÅÌ3±bµŽ‚íÊ.ˆ÷º5 :Öµ)¢”ÞHy}MÿmãéD¨ëâ{CÖ…½X1J\¿hÖ>öþºÏN‹c¦V?t{À=ýÿ9ë7q~½öm|oò =^™•uét´«Ô¨?+ê¡B,Ê3¦ø¬í ©‹œvÎÎ}®V ÿá\›>üçßí¡ßG«Í4®KC9ÔZEÁ/UŸ€4ufR,ç40º»þ6ãïïoøƒÙ× 8^mÉ<þ<¯ËñL­ê*OÒ6. )ÎÐ\q¿OPµhÑœ²vnÅׂ½½œÃRÝaÃ~Áb¶LuÉÈFý·ö ‘œ¾};¥?×€~úNü³@Û4R1\ÎÊÿÊàÀ¯¥(ú…«Ô¢ü&`L€ ”Hɼ¾X“î¹ËÖ¬YwõqûÚ Ý›5SMÁ]“nl@;éñÇ/Y(–X±ã¢X¿Xþîµc·×3÷Ÿ:wÙÒRŠ–øÖZ„BÛRÒ5=–š[<»¶«î×¶”8?µóË?ôÉ9Ñ{õGïèàI.–™ÛÙúÿŸÏß©õꯊù×ù›é Å÷âÎwno˜yvðÈ‘iùoÜÀA"úÒXíKƒÞ^˜syªíò⨌¹,9'´Œ%­²7}øþñ%QäÚs°|Ü1×úŠ&¯×>Q_>3"Cñ™A¦ì2n.;JS|Þ´u?Üìß¹mV¶ëó(ª/¸'ÔŒ¾”ü4¡Œvpq&à)&Sb€1Ч/¾ÈŽÄóæª ‚Xe¹ìi²¬y ×4ÈÕ¼sÛúÁ+W®å·Y­6å|FÙáJ¿þí% ‹'ÚÒ¾ç êý@1CÝÒ0u æ¯vn¢'#²¼¼\6}0q:ý@ÑoO¤6Á~_>·œ¥ÎHVÕË‹²7ûûùÕn£œGÈtÅ×sd£šhžì¶ép+íݲ—rZ=÷µUÏqö¡/éõY?O‹^4gð=ÚNÑÒ)ó7p|ã ÷P$:þ]¢ÚþXfLãú”=É>bL€ ”ÄÓãDEįK ‹ MARäý²4$²w¿ïÔáwWÄÝú|ìü2=þî^Š3o{{‘/]¢G/¿"S·á ûéOýÒ»u#éWÈ¢#è?b‹dqÙ„ðbìÔbqîŽmD±¾æ\žú7¼?³ãõÜ'²(WKñþõÚ笯ܟ‰ÒìÀçM≮"´½´{„í¼1ª@àíi "t:í@ü݉ÂÊÆµT›åˆYª¥GލUÛl;¹ƒÞ…@[Æüùòörð¢†ÖÆfÒ²O¶ÒÚ;ÂŒ/üJ+!«ß¦¡ÖîƒHþ~†@[LJZæ‡VûÖu¬—±÷sú6­u wdý:³ëÚ*–ǪwÕõ@öƱ`‘ì£Â61&ÀŠX¿dܨ.½‡>óáÿrSß;²ó›Sg]ÿÚCÅêz*þÇæ÷^ÞbØ‘Y/<ïèÖÇZc9 ‘Á¦ðöô°ÙC¬§~™RërÒ¿f~øEá÷ø˜ 0&Pɤ¸YI`EñxÀôÚvxšåG%ædÿTØcôo8[–¤áˆÔëÝ í˜gRÉÆVùæ|Ãéî Â<äLZðÏÙÔ¹SKÒM¡µÛÓò»–r3Oîóör!Ôé¥Þ´áM,µëÿhøÛû¨óí:Úü<¬ù=©^¾m¨ A$«Ý §w` ][È=Ô¿ízZ°=“VÌ4ѯÚSýËûé‡]Ží»ß›ŸéÜuKMÞ»¦nÔdÜw&À˜@• °Y‰óŒÁkv¾´1ãƒÁ…ÇÊfþ¥_W'oÅüTäϺK»ÿ5sYÜuìšG­§÷Î RÞzç–­v.͘p7¦MóKH45!9E&y#òãXB "ø]èà;ÆÄD!SuÔ*´¦54\ŽyÉѰڂ÷ŸÅ¹ÜÀõ“_‘Z }e>A×®S²O³öÈxÝ^õFž?x„2!©¼»ì¾›Æz’"DòŠSûi£*P燛«t®œ­¬^®Â/õ¨S—ªý¡¢pg˜¼«C­^zƒ†twô=uËV§@ Î/£~­ j× 5xï1ß5˜?w 0&Pf7:W4øŸÕ)þ—Ï6Žx¥»º@Fq#|° xHŸ!W„ì/ÇçL€ 0wˆKXr+&8 D†Äh|Quä%VèOä$Å’·pܸ‚¿MêÜäŸÏ  ;A_„ ù‰ñ1Ñ?¸Û¦ªZ_ùæ$ô6óB†XÄzʾú‚7Šy{9±ŒavV]¶XI$ír­\¬?½±9Ée0ß’M²0ï"Ú78(?$» 5¸½(~‡½jN2‹d·1WȘ`L€ 0&PV“ã?¬•5#pßøQ¿£BønEò­Ä_~Ú°ªxò-Óôùõ}tú¯P´Ê¥c)ßnFGí)k»Õ¹üŠäêÌÆûVi"Ù ;ïm"™ç${ᇄMbL€ 0&À˜@M 0jÆ ¿9ð$âŠÁ—äHÑgˆb3IÊÇ6+Å_MôªÞf¢õ(Þy¿Õné2qô°?k3î#`O€ErÅ3æ˜`L€ 0&À ˆ›µ ‰dW³T„8TßRè/ˆãd»Ù²`üøa…×6/t'Ѥ„Em™¹®æ:ÈÛÍv{Ó¨aŽìCEJò `L |X$—ßŘ`L€ 0&PFSg.zdí¸‹ŸB¢-5,ÖWûŸbSæüò¿ ©.^}\Ââ.’$\ ÷}uÖ–ÑkæèÑ—‹—ãs&À˜À`‘|#ôø^&À˜`L€ 0R ˜L‹} þÒóð‹êβ+«ìd;ø×R+p¾95aÉ X²|)æ ëÄë¿æ‹Ç_i2‰uÁycL€ ¸•‹d·âäʘ`L€ 0&ÀI3çÝ¢ÑèJ$ ÂkÙºv»’¬Xræ?¢Äìú%Ñ›:+i8¼Çs %,ÿ4klLt,ÊÁ™Ì`LÀýð÷†7&À˜`L€ 0&àSf-é +ÊxŸAêJ­ðüî ;%š³Ž¯0•Íû+MKHžm<u)v…bÇÅ ˜åK«-v<‘¨þ½äV'²,{…>eOruúTq_˜`L€ 0&à#|êSÀ³ñø†{'D­ÈR…Xi¥!Õcý\V³ ¦µ>aï`1¨¾ÐÇyŠ"õƒ@þ°¬õÔäòB#ŸÏÈ"ÖÊUãSÀK@ù{Í@±Höš¡`C˜`Õ‹@ûÞ‹TÆÖ刴ä 0jI`Rüü›µ’!Bv0:xªEôIüò/±å(ó&Lˆ:UžŽ‹¥¡|4A«P]7ïK’ž;jÀÆòÔÅ÷0&ÀÊJ€ErY‰qy&À˜`L€ ÔpÓâ“ïS$é58Œ{…R ×ñ/˜3œx(ÅöaRÒÀ¼ò"2Í\b”å/án‹:O+vê>vTÔÎòÖÇ÷1&ÀÊJ€ErY‰qy&À˜`L€ Ô@Ç'†ûôÂÂMHš%µr„ˆ(ÃÒ§6«-~üèÿ»Q,qq Ã%Y^ùÌÍ®}X!©ë¸QÞh½|?`L ,X$—…—eL€ 0&À˜@ #0)aaC¢B:¡ð݇‡×.)ÿ±“2wÂÈéî@2yFrKICë C!E¸ößË®í{¸&Àj.É5wì¹çL€ 0&À˜¸*i KD¸óÌ/~N’%õ;#Äën,Á”hÉ:ñ>k‰Ä\nÙ¦$$u”‰VC *Šòíù¼óONŸ0&Ë-•s%L€ 02`‘\F`\œ 0&À˜`Õ•„¯ÞP«á3ðèŠuˆ[‹,ÕÊVˆãO°SÂØØè­îî{\|Ò3²$}€°mòÇæÌã}§»Q€»Û^® 0êO€Erõcî!`L€ 0&ÀJ%0yÖâP-É™z„q]µ°¢œ…h}Ç*Ù'Æ >^jå|sj|ò`d°ž,cä¹XGùuu{9«ãÛÊL ƒ~üüjÜ£…eî£/·YéÑî-ÉPæz¼ïË…£ôÇá iÞ‚Âü¹å*ÞÊ\JÙ¼™åSû7SÊ–”åߌ¹¯‰3»Ý[p!um½A=Z‡PÊ÷_“9òajU_gSÚÞƒtI[n‹¨Gta?}³íùGÜK‘t€¶¦fQÓv)2¸²Þ@WÜx+‹d7Â䪘`L€ 0&P•ÄÍLn ¯ñø‹{C¨êÄ Npï‚`]p<Íüß¹sGX*ª?S–üŽê·õ+ÿõvEµÅõ–DÀF?.œBËö†Ð¶·Òד–ÒÖzí©m×–T§è¡#ßþ—|—I¿I}ZTÖú»úíÿÖÑFs5½ûIZ·úJ7æÐ}É% A¯ÙNn¥1s×PèÃAÔ"o%¬ÚKû·†Hv¤ (cuŽâÙi´háûtÞØžfÎ|’´§÷Ѫµ[)ÔÚ˜êh¿£ÕkÏRÇ[îH®,†åê…Ûob‘ìv¤\!`L€ 0&À¼—@tôb]“Hé)ˆâXÌnë´Ô†ýÿYí¶Ä ±¿«Hë{õê¥iÝ®ëBä(xª­ÐåÑcb¢Þ­È6¹î’XèÜ!¢Ð'úP«:—I @Ͼݫ…@½Õêjá5“ô•ªv T?~ø]ðìjü¨±‘ ’uä.,—O£O (ê™–dþd%Q½nÔåÚ4Z Dçvjüê’¨P«%¿za88[É EãžßÜ5fžï [À˜`L€ 0&pU“''×ÓøÐ«IÃá1®/ ")×9IQ–æ)¹s&Æýëª7»é „R W@ ?¡’mW”çÆÇF¯qSõ\M è ŽÒW@Ûj· Ü»jæZºoޓżž6²XlÐLzÒ”êaöÎr:­#xÜbÉ­„>èéŽ{ZB$§ùÖ§V- ´uO¹— /a4u¸v‚’?ÙAm!ÈãÔ:Z•zõ‰(«—×9VŒ)ÄpáÍ֜ߌDhó; ˜w~»Æ¥RcºÍeL€ 0&À˜@Í 0yzR+VÏqˆSçäEeêÂÓté½Y11BUø· X2ê?‡çø~$;o·Ù?zж o˜¸*<5˜^O:—P *ðzf¦í ¯§ŸfæßïN=z?K]Zx/=UΖu˜¾øèsZ»ëD¾}Ûv£~Ïu¢ÐB“ªO§n¦¥®£­§Åj7º‡úE=]dŽ­Ûú`;L³“wPh—AÔèÌ×ôúv =1üòÍ·ð¬ÁmÔiIÈe±é)—6Ì™D_þU‹úŽNmB =ÉÈÚGño-§37ßOã_늇Ùôóç+iÙ†½êCÂ1wúˆ£*õ5mí;o@1¯CâQN£G¨k™Ex¡ «è!þNñƘ`LÀýÚ÷^'ÑÖåƒøß÷ãå™@©à±Õú6|Â8Û9 ÛÞ¼V‘”Äq#£¿.µ7¿9mZr˜¢S×@n÷õ_”§t3&ê77ÃÕ#`µÚ”óYd·«Ž‹½Kd³d“ÁÀ¾ð(Z²³"l ƒAC–c›høT§ƒßB-›Ö¢c{Óè¼³†¾¦8zbÌSåÈr˜âcüµØ(¢Em: ûÔ§=Aiö¤tâóÙ4}C€mN–®>@N"¡^ÝÞp4ø Yl£ìlùú:ŸK©¶–þâïçC˜ò@99WY]͆:- “/’«åâ˜Ð–ž~[>‰lÉTÅù[ÿh’ßȱïпW¥‘O«Þ4gÀÝ´ËYN<è¯4J7;‹×·7{¯ õZõøL è‹})ôÌÁYØý;Lý ÚþˆVÐxÅwö$»Œ¹F&À˜`L€ x„À”)‰u$ƒñY–á9ÆäElk¾ÏíR«!Õ£†ýYÙ†A ßF:i=¾ß ‘¾×JÖ®ÇTL¶ìÊî[UoOˆ-——Ó!ìDlôË\»CošÜûngfæ,x,§Óª:y‚:Ä×Cåüé÷OW9r½{èŸoô¢0¡â²öSܸ%t$ãú.­3EäN=?ê5êŽdÛ)Z9%ž6ž:AË¿ÚGo=áæ>ø;²h\\È«›oÏ  ÞÝWà¢YçàÝùðƒD[ÖPú¦Ÿè Drµ‰,ÚöušzÔåÑd;¹IÒÕÔsäÔ¥©ÑΠ/çÄÓjŒ)ž–86 êuš­Anî³ïß±Höþ1b ™`L€ 0&P*¸ø…wˤB2õ…÷XuüAŠ¹Ç ÍíïšL¡j*›2cQ;|Ë^;j×¹Ùœyéq“idFå[Â-–…@x—~ÔÿÎËÞÎ)…nK­Ÿøh¹Ô‘gÊeÓÁƒgÕîôìÿ„C ‹3ÿæÔ¯ÿ#´ù”EH»Æ]^pdQFSžîß6N]Gé¿î§Lˆdw÷A4SÙ›¦þÝÔ9h mÌØEÛÓzRp=Dñ¯8‡%Æ{¨=Î/ïuÄ›û´zÚ)…•AÔã¥І7WRŽúW£²-÷ÞöX${ïØ°eL€ 0&À˜¸*Rmhð8±„“ô(al—!Õv%qì¨èõW½¹Þ˜Ÿô˜F’>ƺË>°kµ9óøó°ÙÜY på# ¡Ð¦·“‘~¥µï. ýi锞Q Œ êôT¹lº¬šÓ€Õ)ÊÚº+õrxйoÔÔáWuÙ­©NqrÄ æ_»»®V*{ïO÷wkNWì§7¥@$·¤[~Rˆèz/ã(%Å1ó¸Ù­u‹ЀZàÊÏ% qÑ’5êŒErnî,`L€ 0&PÕ L™2ï&ÙGß_RRfшЋðÔ¾GRÞœ±#‡ötãâ“DÈwìÐ( %ïܶaðÊ•+Å2S¼UiДÕiù–ú…P‹Í)kçVJ-ô˜Ã#ålº Æ"`Þ®>QîîC>´J>ý[{ …HNß¾ÒŸk@?}'¼íÔ±M£"–è]IÚ\W±TUpë„÷.,’]$xϘ`L€ 0/&0yFrKVBŠü22Û!…8N‘ZxÖžñŸ™£G_öó§ÎZòL›&l}SÆÆDMð»Ø†ë%pŠ68rûžƒ¨÷M¹¼Ô--0¦¬=ÌÊ₇Êi‚)¬ÑžSYt‰±"]haQúOÒÔ÷ÿ »{F!…ºc;yFdç.X"Évá˜#›³%¬nQOì|#èÑZ¶÷OúâÓ/i¯°¡Ùƒt§p#c«w+üçÎ{#ìú~tmÙ'h§xèPè’ë­š¼g‘\“GŸûΘ`L€ x5^½zis¤zÁ|cñ?œ³Êz»ÇÇF}…”œº¸’{†PjÙèßp6,ql‡UÃÆÆF-¬d3¸¹%}ŽŽ«uP«¶Ùvr½ ,¶Œ ˜âî©rk<²Ðª•[¨õàÉÇNÑ·kv©®ƒnÆUªJ$J]ó¥´@‘ªN΢uÿu&%ƒg<ÀÝ}ÈOƒ¦bªä µîtDòôóGç;>Ô2?3µïM7©öälÿœ~ìI„‰PuíZ½Æ‘µœÃ­‹Œ‹ä"8ø„ 0&À˜`ž' ®)¬×ö—âøa”0\bÊ2E‘f‹‰rM¹ô¼±°Yo h¸ 󣟅ˆä>ãb£?ñ ãØˆ²ð §»~›ž‘I þ9›:wjIº³)´v{Z~=)GNÝç©r!Ôæ™§iÕ®åt~ïz}â~êÜ6nÚAGD(8UulêKçœ"™Ìû)aÜtêØ¥9ÜŽpq5T»õ{âv"ßl7÷â܃›oDjCɪ áÔᎂ8jCX'Òv+-ØžI˦N¤ÝÚ“þ¯XÛ©Ž9qW‘‘“‹œñ `L€ 0&À˜€ÇLMHj1-aÉ|ɨ;&Ér<¼Æ·@pî'»2ò|î¹°±#£†‹àe91ù+!aëE,ÆÜ•²Ç>BnhØ—ýEj)ô•ùm\»NÈ>ÍÚSÿçÛ“.Ÿ?xOlޤ^[µ P:K?lp ä 4âíáêÉPÉnî«ðßPõ¨Suå7 íð…;Ãä]U¶zé5êÛ!\=ݳe«*·mïkW!Þ«Ã`L€ 0÷hß{‘ºuù þ·Æýx¹ÆjD@ S ý;)šXK¸‹³kЛÊ7ˆ­N3@ćzEHuqì¦éóëûèôù–ZÁÞt¬ÔmÂè¨=ÅËñ¹gX­6å|FÙíåûød"C–È%ÖSðuˆÎ’zâ©r–lØx{­Ö—‚ý¯b£ó—³°„•e‚]+C퉻ûP´öë;ó÷ó!«ÝF99HJV›Í’M™ÙÙ˜“íKu®Â£›/±jEj!Ïöº³¶ŒžÞ²üTUàÇ62&àýX${ÿ±…L€ 0&À˜@%`2%øø¾„0ê× oU»¡ÐeÌýPÊSfõGUêÚÔ„%/¤,Eˆ¸±áËr2¿2Ód²V¥>°­L€ 0k`‘|-Bü>`L€ 0&ÀÊH`J|RsY’‡@P¾Š[kATb±rD‘¤EæÌ¬$“i¤ºM«õhñiñÉ#ÐÙ˜?iÓ”€„b£`Pù2By´'Ü8`L t,’KçÃï2&À˜`Làz HE~”d„TKRÜe, ¹I²)sr.ÿ¿ªR]¬ÃÒ´„äièÓh\W0w:ËPÍ*V†O½”€N«Á bü,ÃK‡§ˆY²,‘@tºbk7)U=ODvkoÚX${Óh°-L€ 0&À˜@•#`2ͯeô×÷…~B²¹ÚEÉ×x¹Íš7kÂèÁ¿W¹N9 †¨×ú„½ƒ¾õ…>ÎS©ò‡Uµ?5Ín!º|}Œ5­ÛU¶¿LZBl`L€ 0&P)L¦Å¾>roÌ/ ¡ØÂѨb`\©(Ö„q±ƒ­C<ÐH\|Ò3²$}€)Öô÷csæñ¾xëS¸I&À˜€Ç °Höø°L€ 0&À˜€' Lš9ï­l„yÆ!ƒÕL\D'XdËÉY0~üˆ3ž´¯¢ÛžŸŒprš‡¾ËXy®9ëøëÈöŠn—ëgL€ x+ÉÞ:2l`L€ 0&P¡¦$$?€8jd©–žBCѼ¨ÛII<ò‡òqRÒÀ¼ 5À *Ÿ6+ù_Ç&a BÉß=É Ìb*@ýøù/Ô¸G' ËÜG_n³Ò£Ý[’¡ÒÚ¯¸†,ŽÒ‡3(¤y óW½‹5–K)›7Ó¡œ`jßáfJÙ²“²ü›Ñ#÷5qü1(VÚ[N/¤n¢­—"¨GëJùþk2G>L­êëÝfžíÂ~úfÛò¸—"émMÍ¢¦í:RdpI ÝÖ¬×UÄ"Ù놄 bL€ 0&À*Š<¤FƒƒçeY qØR´ƒU+V>²’mÖĘA?WTÛÞTo¯^½4­Ûu]›¢ð`À ïyôؘèw½ÉF¶¥¢ ØèÇ…ShÙÞÒöVúzÒRÚZ¯=µíÚ’êT=täÛÿÒ‚ï2©ãà7©O ÿ`Zè·ÿ[GÍ!Ôôî'iÝêo(ݘC÷A$«‹ž—p‡§/ÙNn¥1s×PèÃAÔ"o%¬ÚKû·†Hq›i–ÓûhÕÚ­jmLu´ßÑêµg©ã-÷@$—ÄÐmÍz]E,’½nHØ &À˜`LÀÝ&ÅÏ¿Y#ëbù¦A„7©õ+tÒ®(Év3ͯIœÅƒc@ÃXçù dê΃çÆÇF¯q7s®ÏÛ XèÜ!¢Ð'úP«:—Iƒ¡h¨¶!¬9Ý›ñm~ó®âàjÄ9‹ä1ÌÜI&À˜`5‡ÀðቆÐ&ÆgeEF–jº[ô\’|ûU>²Yí³Æø¿šC£ §Ó¦%‡):¬LÔ‹=ÿEyÔuü˜A”ࣚF :‹HO:x Õ-¨À“š™¶ƒV|¼ž~>Z° ˜OP8õèý,uiQà½ôT9[Öaúâ£Ïií.äØsnÛv£~Ïu¢Ð«LªNûþCZôõaÊ1צÁ»ÑòäÚe5:ó5½¾ÝBO „|]•yãÞêñF–„\›³E6Ì™D_þU‹úŽNmB =ÉÈÚGño-§37ßOã_ëª>üHßý--]¶ŽŽ˜÷“±=Ñ·7õ¸«žz!mí;Ao@1¯CâQN£G¨k9D¸³ö*»ÃßIÞ˜`L€ ¸Ÿ@ûÞ‹°’ÑÖå"º•7&Pñ&%,l¨%M4’4Ƈ®ŽÚ¢¢œÆq‰M²Ï›0r`zÅ[á-@ ßF:i=²Xߌ9È{­dí:1fðqï´–­r«Õ¦œÏÈ"»]ýs|Eµ6K6Y`ì o¢%;aÇ24d9¶‰†OuFàC¨eÓZtlowÖÐ×G@ŒyªYS|ì"‚[E´¨MGaŸð†SPGš=©ø|6Mßp‚:7Q¯_Jûþ=š‚9¼bëØ?–ú´®§öÙà+d±²³mäë[Ô«ª®Ä?²Úm”“s•ì´X0L¾H®–‹c"lþmù$Z°%Süoý£I¾ÅǾ_@ÿ^•F>­zÓœwSÚÆ%4eõ~õ}ŸzáÔÈpžRAZö|†=Ô(P¯UÏñ™°‚ÏUž9ä·ãŽ$P¤ÚþˆVÐxÅwö$»cT¹&À˜`LÀc¦ÌX|¯¬‘GÀ€gñE+?¤ZRhþ±#9Î;Bõ—yÌ@7&êÇ 7¢ 40%>é1$}Œxs°YmÎ<þ<ع-«@ØDOïÒúßy™ÂÛ9²õäŒj@IDATð`Âm©õj´à™“gÊeÓÁƒgU,=û?áÈâÌ¿9õëÿm>åCÒîÇçÑ꣎ò‘éºW‰™®Õêªì‹¦þÝÔ9h mÌØEÛÓzRp=ÙNþŠstÉxµÇyö¡#”.zÔš:A »æ$×¹ëAz8èZ›q–þ8’EwTC>åXÉ塯÷0&À˜`!`š>¿¾Qk@Hµ2‰¸êA u|V„T#„x‡ K\|Ò+²,'኉˒wnÛ0xåÊ•¶‚|ÄJ" ¡Ð¦·“‘~¥µï. ýi锞Q Œ îðT¹lº¬šÓ€Õ)Úº+õrxй?ïÈâ40Pˆüê¸ùÓýÝšÓÆûéÇM)É-éÀ–ŸÔŽFt½—‚qt0õ€£ã[iÔ°­%C`e˜Ï…Qä£à&À˜`LÀ[ LJXÔ)…F@?‘³FUÇ»ì$Í·dŸ½£EGy òTqÕNÊäq1Q‹–à3&pui`îjZ~Ÿ jÑ¢9eíÜJ©…âÔ¸ž…Žœ²Ðª©ËéÎy/Sh~ϪÏAèßÚS(Drúöí”þ\úé;á= Žm©´šçܲm Ò»¦=‹\`~:œçQ˜_¾ú)_OX$—ßŘ`L€ T0èèź&‘Ò3HÄõ:$ñ½Žæ$†?Sl”8ntÔ÷lB•« d£ÃÙXöj88AÓ°q±Q «\GØ`8Eœ¹}ÏAÔû&„\^ê–˜FSÖžÀS*qÁCå4Á†DÌ{NeÑ$ÛŠtM …Eé?}HSßÿƒîî…5Î[ûþ¯Q¿ÖJ6~¦½4gù>šÚûvç»ÕhçA¶0в½ÒŸ~‰žbkö Ý)ÜÈØD6&‚pöiÑ‹†½tã¢ó5ûØ>ÚwNK·Ô®®žö"ݽ®É×…‰ 1&À˜`•EÀ4ca]Y¥H4‰¸N¬é‹öÿ“§äÎ;ô¯Ê²¥*µ#æi.³gƒnHî3.6ú“ªÔ¶µ|0öZ½ƒ&²$G`ðH|"%E‰øÏ?¡§þÑ¥l•fŸ#GÚójÕ¶@ ÛNî w!Å–q‰»„^ùý<ë:¿e)}ÙÖD=šºR–‰U‡MC­;Ý‘üý¼ÅÑù޵ÌÏLP/kï œ½ëiáHêÒÔ±¶²íìú÷Ô•jæòî#MôdµãR¾±e‘\>n|`L€ 0&àfq3“[Ki¼Ÿ/ šZï˜n¬üFŠ4ÿ””õßY11ê /nn¶ZTg2%|>ƒ8êq|.ä'ÆÇFÿP-:ÇÈ'· XÑjšc™œH!†ñF~W"ñ;Ó c¯~¯Ç\}Gy,©“q1“ÌX2H¯+:w7¿Â’|Ãéî xe32iÁ?gSçN-Iw6…ÖnOË/rä4Ñ}ž*BmžyšVíZNç÷®¡×'î§Îméà¦޵‘¨ª#„Þ9§HÎsÆdû6íJÑmwPÒöLZ=k%Ý5çe szÈó;VÅ|#Ú ‹!D²ÚpêpÒµ…üú¶]¯öÕ¬·ig«{èV¿‹´yË~ÇÒYõp®n\/ûžErÙ™ñL€ 0&À˜€›˜Á?ì)I¦øjÔoµŸ“Ýš86vÐ7njªÚV#’™ùèô_ažv+ät$"î6atôžjÛájÞ±^½ziþÖþá[$›6B‘åHÙ)†Y©&«sö?_ ã‚Y„Öÿ©HJ *áGIµ‘=e耾ÍÍÃÑUÖI.¥/=þú‹tlöû´'ãmtz}šµ§ç[­X±•ÎŽH`¿µéóý´}í›ù£oRlgÇ|ÝüUþ uêÒ€~ÆúС¢ð"4Ôæ¥7Ȳœ’×î¥#»ðPÁÙ߯­ºQ¿¾ò—«òÜÐç£&7ÔÄU0&À˜(D }ïEH8L´uù þ·¦>t˜2%±Öh€/õ"¤º¡¸Š/úX»WzW1çÍ7npš£$¿–F .aÉ­ø[§aäýŠÅÚ•Ù•FÌ{ÞÞC-ŸfÈ=ŽÐh^a‡gcÙ¿%NÅïÈ%ˆàİ1Œ1OÅ3¥sÖÉTÚæÞ.OBª!è:ŠF ìØ}©-q\Ì@g°d…4_-+63¹3Âo?…Àª˜ëÎÚ2zÎ=Z8ÔxóQ3fø+Íd!Ë /¯:_€"ñ™÷&©áÑN¬+JÂmƒ–‚pè’•T²I)甌K "7É `‘\2&À˜`î#0eʼ›dã«–!‚B¸YÔ q|ra©"å%Ž9ä°ûZ«95MMX‚ÄfÊR-bÔ—åde¦Éd­9<×SÓ´¹ tZ}„†äHuްDðôB“˜Üšï F¤„sÃB'ñ’ªz…íŽéõ\MÑÁ åâÁÅ!Œ„"-+rªÍfKɽlùÃd‘麇÷L€ T ,’«Æ8±•L€ 0&À¼š€©nÝ®Ëc0rÂG;9Ež!ebWÇŽŠ^‡k,æÊ?ŠÒ´YKâpûÁÉŠcÆÅDÍ*u|§X[¯ÑFÈ B¢‘Eë pæÈ7ÆgXvÊÃâ‚¢ˆ9ß©ø §@#‹´œšgµ¦ìÙñÍá•+WŠW¼1&P °H®ƒÈ]`L€ 0&à)bÝV2è^Çm8Æ-ª eÙÉþžÕjO|óAïÂ;ÓnjɪÚ=ôëEB¨:š Uˆ®î&–ÿ‘lôô˜QQ½Ã:ï±Â”°¤¶Q±Gœ/†a8nŠk%:…ÀS,9¶ß%†qž*çI)‡Ù&% ÌóžÞ•ß„|+fK.Drùëà;+€^§E"s…°tWå5êE-ùøH#Ë^¡OKü£áE¬Ø&À˜¨bðï» O„¿+l¶¸Vøœ«&x4e½ƒîø3¸³è¾Í`xˆ6ûœ±#®Å%þ:î¦á5Í\b”e±ÄS[P>|àÝÇŽŠÚé¦ê«\5"¤ÿoíº"Úñ+GÊ ©áÑèÈmø †3B¢¸pß„æ8Œ÷S¤ŽùÂŽÄYJŽ”2aBԩ«ã1úN—³-å^'¹:2ñæ>UÚ:É^AüÞú½gë"E¼›Ä˜`U@ao2{‘«à6ë¼J:¹?Æp|i"ÞøŠä[Ëòróæ¼9fHjáò||ãââ†Kíz|ql.D´NW„X¼ñš½¿õó¦×DØ$;æ «™£#ñCˆ´Ô Ÿ¿’¿E#ÄÙ£÷KvGâ,ˆèTˆé”ãG,ûçÎañþ^WŒ…ðH*ç3²X$W ^·×ZÓErHmÒh4^¡OÙ“ìö7WȘ`øRŸïMÇL¤j˜2sq¤¬‘‡a‰›~ðÄù9¾¹(‡5jå)ÿ;v –sâÍÝDvpICë Cñûó«Ùjénzc(–ª>›ˆJ0ê5’tš¬ŸyÂÎ,Òâߢ§˜©†*¨½v~øðpæ(ÎSÉ)†eÅžš‡ZcWËñ `LÀ ¼B©»¡\`L€ xáM&ñ\d/˜k›#M™ÔMBH5Â|»¢¸SžÐ76Åž˜›ub ŽýÚÕp‰ò˜’ÔIÐVC(b®ì·æÌœ§ªòB&ÓüZº@msÙ&GH2²GC ãG$΂&cIŒ „³I‘H¿vˆa±W[ª%KN5™f—t_+™{’Kæâ­WÙ“Ìždoýl²]L€ 0&à&ìAvÈJªÆdJ 0_†8K8ݪ6«ÐeV`³ÛæŒ5h_%™Rc›‰‹OzF–¤ " È›3÷ʼnÜ*Dš6-¹¡]ìÑÀ°]õ ‹cy0øÏÃX ‚X™kÎ:þº·yìG&$ø„Øü›!q[„ ¯0„©‰³Ž/ö~%˜‚ùÀ²žÃlR8zÉÄÜy•=Éî¤Yñu±'™=Éÿ)ã˜`L€ 0«âw–I~ !ÕG!õ¡9Â\¿‡8N´džXímíj©×ãâ“Moÿ—Ú»2qllÔdOöË4}~}ƒ¤ÇšÂðC KB #<Ÿ•pÒ:>+â#ƒpiÇ&öȾ×T…¤‘E÷¥æåQŠ-;ý–œœxǘ@•!À‰»ªÌP±¡L€ 0&ÀnŒ€˜jн$KòÔ$æ… q“Ïñ‡6›uö„уW¯ñK¥Kµn×u!ÄfNX¡5£ÇÄF½[C¸êõµêß*É22HË‘Š˜/¬z†%„JS@ 8"XÝ”dL€ 0&à¦Î\ô ÉÚð? û4Âm ö?¼$N±­LJ˜çvW7›LÓæ60葘ç¡HĤ®)ŒDZÔ}Í×ÂŽœY®Yà ¯’]y_d‘¶Ú”TÛåã"DÚ^Ýq˜`Õ‹äê0ŠÜ&À˜¨v  Œÿ/`å&!Žï„Ç8Á¸+”iθQÑ;ª]§½ C‚»Î¯Þ­Z­6’ ‰a˜&İ¿j"âÜó1.ˆqÁË))jˆ´¬È"“twx‡ŸU“j)4lllÔBõ^~aL€ 0¯'À"Ù뇈 dL€ 0šDà †·x „ØMjß: a¼ØbË]dzcèɚģ¢újš±°®QdFâ,’åH<|ÀrJjépW4d‡."†¡†Ï"Ó4’e!s4æ Ûí”b³ÙS÷ìøæ°kÍiˆl½1 á2!!š-É}ÆÅFRQýàz™`LÀýX$»Ÿ)×Ș`L Ì¦ÌZÒAV”Ë}Ký÷ÞÈvRææfžøâ‹×´-#ÕèèźðæÔ·©Ë)IøÎ,Òb™¬BÜõÂ; ¯°Â^a±¶0‰¥”RÄÞ,É)¦˜¨ó…î¸âÐdJ 0ø|†ñë„:.bìžýÃùð"–¬Sô×éËäW;ŒBƒõù–•õzþ|PÁr)=íaÄèæðzT ¿¬×+ØÌ*^=‹ä*>€l>`L€ T]Ç'Âû>i6½ø›˜kŒã<µv›5qüèAÛªnï*Ïò)SæÝ$ôxÀ€yÂ!Ä0D*–U¢&®ùÖ8& ‹5¤/àaÑN1l·§hlrêáCöƒå™ãmš>¿¾Nÿ|Ï­ÐvºÍFÝ&ŒŽÞ“ß.0/%pdC2%|—I>úќ޷ç[YÖëù7òAÅÈ>Hsf.¥óÇ#¦¿Mwø:›+ëõе²Ê×Î"¹Ê!w€ 0&Àª5”Ö8H’¥A°Ý•íø4Â}“Ìyæ…¦1ÃOTµ>U´½½zõÒü­]ׯŠbW½Â²B‘Š,£!ŠU†EC£!ŽáF¾h…ã(¢ó…¥ÉfK±Y4©&Dr—Íq Kn… ëQ_´¹_±X»N78Í]õs=L " h}D¢üLªïg,ÒLY¯¹™O*Ž€Æ(þÑ€H¾‰Š¹²^¯8 «EÍâ‘5oL€ 0&À˜@%˜Ÿ|æ¹ ¯qOH:Ú¤¢ü%7÷Dšyùܹ#,•`†W7e®%½&BõKj²,„H«bøVèÞ‚XнP( §©b®°º¶0Ä0ôqêñ#–ýÍ4nfrkY¦µˆ¨ ›·›íö¦QÏ6™€§X‘Jý|Fa™1O™Àí–€¿ŸYí6ÊÉ©y³kăÍÚþ¤Ñh¼BŸyQ†1ä¢L€ 0&À˜ÀuP9ù7|_^ä×6âˆ)+¾¬Tl¶Ä±£n¾ŽjªU0‘ †zÈ Cx´ž`)R]RIˆa‰ê‹ÎŠ/Lù›ãNaå(³š8KˆaJ±)ÖÔ‰1ƒç—­Äƒi3“;+úÖÖ ®;kËèÉkWâpSL€ 0 "À"¹‚ÀrµL€ 0&P³ ˆ9ª!Õ4"°žJÙ‘í’’,çI ÆŒp¬º2™æ×Òj›k ¢í“•F˜´"5ÇÞÛ‰ÒØTŒálìöãRªÈ-A Û¬”’—­ì7™f«…½àejÂ’áz)ÆV£—åde¦ÉdõÓØ&À˜¸A,’o ßΘ`L 0i KÚBè ‡ìƒ÷Ø èn„ÿ&Z²N|/ª¹pùjp,aÙª†v½‰õ#íNÏ0T/¼ÄR˜Ú?U;İ8w:‰OÀ£o0gA Û%{ŠÍž—:qÔ°£(âÕ±¡˜ƒ<}ˆ=Qì4slì€7¼ÝfÁ7&À˜¸>,’¯—bL€ 0&pUb©¡ÆÍåžð€è»âP”µA,ŠùƉãc¢¸êÍUä‘ >!6ÿfº„æ «™¤á!–"Ð]?YôÿËbqáüMÁké€Ã§VŰU²¥dæ]L>fŒ˜G\Õ6iZBò4ôs4 GV0Š;`VUëÛ˘`¥`‘\:~— 0&À˜ÀU ˜f,¬ë£Õ„`Œ9´¡jAE9Q¸„ò”ùcÇ^Ñ*µ©aâ’>’œbXY¤%ÌV”F’„Ææô;ú¦(§Eæh±¤¤cŠ0WØfOµe§çÜî(Tµ_Ñ­O@Ø;è~_èc,Ó%õ3àêÝ+¶ž 0&ÀJ"À"¹$*| 0&À˜@)DFc8L‡ËacÉ #„ñox™{Jºôþ¬Ø˜œRn÷ø[|z}­ú·js…%)B‘±¦0Ä0ŽÅ:ÃbXhbõH‚ìÇòFx‚÷1_ØžïqJNÖ¥“id†Ç;UŒš1ÃÏG´ º!:à’d£§Çް±›äª™€[ˆxþµ|ð§É-Õq%L@§ÕÙuÚš'ÑÄ¿3E6V0ëkUïüðZÅø}&À˜`5›€ð$nTÔÎÙñE&À˜¨X$W‹aäN0&À˜@EpФhÔ?B©¡³ ¦ÿØ-ÖyãÆ N«¨¶¯§^!Þõþ š l¤,˘#Œµ…ÌÆ9Dní«ÔaƒÀ?‚2¯0İd³¥Øs-©ãÇ8s•{jÜ常…á’A»ãÞž¸ÃxxÐ!Ökî0`L †¨y¾ü6ÀÜ]&À˜(ÉÓ“Zi´ð“Ô‹êrEJû²Ïµd*ÿ­ìåˆââËz]„‰³ ܰ”’8¦¦°QW¤—"þÞ¿ˆ]*„ÕùÂ"‹´l)¹—N„¸ÎU ñK‰&ÏHn)ih8c®¹²ËlÍý»é¡'K,Ì™`L Z`OrµNî `L€ Ü^½ziþv_ç'%Y u‰j¯ÅúÆsÆŒþúFê¿Ö½®²!°n¸b×EÊN1,’fIÃð ×-é~aÞ;Š2) I©ñ)dC˜´Öž:aäÀô’îák¥˜’Ôó­WC ‚ï·æÌœ§L¦™¥ßÅï2&À˜@u!À"¹ºŒ$÷ƒ 0&ÀÊMÀ”°¤¶A±GI²4sމŠT/¬DïæåÙç½ùÆÀC宼„ߘ6Í?@-jâ,´&æ #TZi¯°¡„[ð]Æ D°‘–R bØ1|V“u`VŒw' +±?^tÑdZìëŠ ˆ‹OzF–¥Ä8à3ð±9óx_öº{Ñ`±)L€ 0J À"¹ sL€ 0&àyÂK‹‘h+›š°øNI‘ÅÚÆ}àõq¾!ªÌ5_Ì}Ïdz)¿pÙ¤©S߬è1WXÁ\a‘E:_ Sƒ«T‡ˆnå8q lJ•x„%d‘ΕRÆŒ‰:Ž{ ‘ys7© ÉÿÃC‡¤È÷ƒû<<°±ò\sÖñ׋fÜÝ6×ǘ`ÞG€ç${ߘ°EL€ 0&àfSf-é€pägQík"¤úî{}\#k„8~?bƒ6UÖB%޽A=W/_ûEx!u¾RsY¤„FC ;³H7‡WÚW­ÞùHçŽ %‡ ˆSð“Š{Rl’-å‚53uæèÑðóVY¦Î\ô ø·UH³S¹Ë_ÙiüØØ¨¸Ê²ÛaL€ 0ï"À"ٻƃ­aL€ 07x{Frc™è3ˆR¿iñKN*2ECª†«Í(”…Ú¥VR~À?ˆµ!×_­yÓ´¹ z#ÖÆü`‚&„GcI%ìoÆ=ªúuæËROÕ T$%Ežaˆa»XcØ’—b±œ:ÊÊ«‘®Üë’¬ ZÄx©Y±ÛcÇÆF'T®Ü`L€ xõßpo2ˆmaL€ 0&à.&Sb€1ÀwÄëíÅê<@veÞù¼sïëk‡Dú¼‰kŽÎ~¾~#]3­V«fŽÎÃbY%‰ü‹Õ¡žÂ,²D„`Fâ,%U²c9%ˆa%×–:vì@‘]š7/%€pø@I/§ µñìÿ;k½ð{ô½tÐØ,&À˜@%`Or%@æ&˜`L ò ˆ°jc ÏÇx\X c}`zjlÌ€5“fÎklYƒ÷ïÖÁ£üXÃÆ>Ù¸pýv·É¦¡w7æå_óÄÁpŒÉ÷“ß<8&:<À™=È@Cç{ö÷¤]$ƤÆdƒgÇdØã:Úô»öñÜï‰x¨–ˆ1âá1¹cr;ÆäõA§½ŽE²'þVr›LÀ]8ÜÚ]$¹&À˜¨’¬R-Ê Hõ§Jv€fL€ 0&ÀÜJ@MNâÖ¹2&PQ°)5`¾ G@Tc®— 0&À˜`L€ Ôp^)’ËV_t`WEMyê7)ŠvÎÏŠšõFíz.Yù!áÓý£o´žštÿ+)J›sÌöisÌæ¨9È¥CW|v_×?Î(Í+šK/E©3`öQhužÓ_Ѭ¹~&À˜`L€ 0&PÙ®•m@IíÕÆÅsý»JB%•/ëµòÔÿÇg4ò·Íæå@²¶W¼¼’KŠ_ãóÒ ä®3ïð5^ÚýJ´1,ðÖ ¡(¥,‹nVp]óýþŠ^uÿ™Oéɱ\*{´Ý—kdL€ 0&À˜`%à•"ÙA¤¢×”­þÆOÑ’Z7“ˆšõèˆÕÄÆEÖ¡ßãž;ûd{_éxücÒ,_z…Hh4Éû×µD¸ø ³³”³Ùg ²ÕÄqå>370æ]¢!¿N¤[.¦”©FQ^Ü'îç 0&À˜`.^,’UU!ô³¢ø"¶‹’\¹áº6EQü±/±/âÞø¹òNõÊ5ëwÝ7U’.Ìz‘g…Ûvˆù²®òÅ÷…ß—4t±øûâü¶–tËu_»ZÝ®>ˆ½“£êu] ¬vð-Ò7ÑŸÒ¸_·a®‚Ž›ÑÎUÆÊ)ˆÃ]¥¯º7èU$ ûÄÏU ^ç_à³ãìg™ë ¯v¯´tñ²Ù¯$3®6V%•åkL ¦0X³iÐn5Ëøïþ5Éøýºˆr¢¼¸OÜ/êá 0&À˜`‚@‰ÂÒ[Ð,KUZ÷£,šc¾…¹¨o.;¶ØeÛÌ[~u‘² ×ófÎ1gbo[ømƳ®÷‡C°öMT.ˆ{ÿ‰Ÿ¨gë!åN×ûb_Zý…ˉã—Ð^ÔœK6W¸ã<Ýö%æÀ …b¾ìÛ+2f¹B©æÊºÞvø›Íwéô~ùÏÇ!ÄŠÚzúJQê|v¾rdÀìÔ"^ÓpmÄ¢?®=gÂ3úSe´hÓÅaèü“GÐUð9úð—mæû¾ ~‚ã°û ûŸKç~¥ô{i©òá¾æô „—Æý9µ-a»£~‡Þ˜Ë=`6äo óˆE™Aß*ÏŠùÆKÑŽc¬DÙ µÿ‚M¿¹t«\ЛïXä(ã–üÐUwá½Xf÷pzÃç¾£|!x‹Ÿ‹Îì*Ü®èsqžN/£Þô 9á®úÄçç%p^ÏŽóó•7aÙ¹üÏŸ«œºÇý¢lá>F­Q† }•©°ÃõÙýyÙÙŸ¸nJýáœ/Ú,ú9¸ò3[¤]>aL€8ö%Ý’u@%a°™iàî·¨ùùÒÿ<Š÷E9Q^lâ~QoL€ 0&À˜¼X$iæ¡Öñ3naÎvÙ˜‘~î¦èlgˆêÑ$ó1 b³~}c|½[C…@Ú¹G󑢜P½Æ§—ЃýRÐ-Œoˆ÷ÅjŠ?í>ÐMtÚ±•^¿«TÑ}ñí`úìsjѾ;mÿGÅUíÙ™j~WŸq©·`h…ÝA‘y¥q—¦•Âöå?ÿ»¨ÃµÙ¦yDf‚þŠÅ$ûBˆ[÷˜?ÒÈ9··4>çƒñ$ FÿóÎ ±]ç'j¯×`el>Fãn_ôAÑg¹ê.¼7èéèÅËšéÒ…ûëÃfŸls­»ÍÝw®p9¢ºšR>û“ <ðЃsí ãò0Áu:g‰Æ}WÌC~aeÛ¶ô}Nô±7ÆD9hžoÐe§5FŸl°[|vÿóí±gë줿i0Öƒ³?¤ ½ˆ§Òµ?³E{ÁgL€ }sË3ô¿úä£ÐÛsiÀo“èö³;ò¯>×£ö¼M¢œk÷‹zxcL€ 0&À˜€ PæÒÊÄÖý~ãO·‘6«m*ÊÐþs(oùOæ8ÿ¸Þ½ÆÎ3Í^в ÞÈÌ“Áuñ~šHâè÷Û¦7Ÿn?Ãióç>WZýù…®qP¬Ž¤Â6öù’ûàþ‡ïµuîÓNþÚYÕ‚žðæå^VCm_üŒb ɯIµ%Éç‚(óüee­÷êpª#IÒæ>ï(kvÔÏß•­¬ž™dþÜ ;œöj§‹²—Ód­½DWF Qz¿¸.¶ŒŽôÍ ƈu¤ýŽ+´ôyxYÓ/\zç×È ;š6wPD+ç¹Iìm6B}D±}jEÜæ¸ÆÒ¡D£¶)Wå¾"XJ{vvmÙ{)·~!î‡õõ›c¾ë®[ópw± áÐò󻾯3»5rµÿñ  Ê<+¼ÆS>J›úÎóÒ(ÜÕXxz“_ú¶‡äûÈñbµäŸŠ1¯tvù¤~7»’wÍxÞmãÑÆÓ¿K9ÔæáȦ?ç.vPøi‘cLàMïf¼§K¤äºg©§áö/e‹›ÅEyþúÂ7Óþ`«¼¼ôPи¤ÕÏ1Ͻé\ÒÛ}Ã\Œ?~žæ}‡.}'Jêø€¢Ô˜“qzÞ@Ÿ»E}¢¶Ä(¢gæ+×üÌŠ²¼1&P@@‘dZ9œ¬È\ÐáÄ:õ =úÿ>•þÛbýV§]~á;Ïl£—öÎ$­RðÀsKƒn´ªù OñxcL€ 0&À˜€ PXx‘KôTkiK¾Q’dÍÁÉÅŒì[ĵ™í¤¯…@þ¢ä=„ÐNü‰î-ì¦á¹/ßöà"x^!6KxPzý¢ko¥×‘÷'ö5¦íîÓ®–K «UÊ…¼É¶ÓÔGx_?I l*–˜š±My´Ù¯ÔV<™á˜³úAzÊBµh¼çFêyC‚[¸l{`85o’TT ‹÷¿¤l—@þáÛI ¯%S–V£ÉpÝ/öó†œiYø\+ÉõƒŽ,w äü·¯Å]Ï©%¯Q¸¢lö7=¿Žž•!(‡õˆøO~%΃ç3(L$äš3ðLJ ¿·b; ÑGOg¾ª^‡gW|POZÚ‹â¥nµÎþZ¸ÀûOK3„¦ýuŸ¾H…Ë?ÎM§!~G7Èj‘f’d)([‹†-¦_kešë  äõ¡€«OŠ­–ÂØÅ˜Š#¼Üfˆxqc5;¶ZS‘oå×þÌ´ÎGL€ "€ VE ¦ÂÏ¿(„ðË{§Óݧ6©×Ä^œÈ¢¼¸r>6>`L€ 0&À@ ñè-\TÓ„ˆÈŸ›WH\&«·ë]ZõÁóc.‹ +þð!ÔòÔÜ £?ï2þó.¢˜äK›âø=ϬӅPzý®:Kß_»Žˆ0±¯¾¹„ý:„<”²".ÙJõƒ‚ó–a³u2Ô€Þ€ãk$éÕ—)ʤ3Øáõ ‡è¾ü?óÆïæ8æÞ‰÷4Fýî‚2âúý%¦v­ -"8Å=×âþþÃôiÿ=D³>« × ÍPЬ@¿›ð¶À5é „(xÑtŸ"¢]¼g±‰5–D¼ºå\®¾÷»â-GÏÅ#–ëßš„ÒG×*mA¸¿ÓýŠpxÍqƒ£OD'2 ½O®3÷vÕ¡“µþ¾_¹ÎKÚ_û3[Ò]| 0ÿk6€òd==zôõ’F±Ó‹û¨Ù…=toú×x*\ð§äëFÏЗM_rÝÊ{&À˜`L€ äðb‘œocþð´¹¶­ (¥V^Fx§ûƒè 3D˜"Br]ïO—¤,‹ÌÌ_Fƒ217wØ‚]p­±«Lñ}áú‹¿w½çÅë8sÁzWi÷ mØM‡“L}[¸Bsó‹¿óºóžT-BxÅÙÙ̆ /­$Ý/úWꆰíðœ÷Ìë)Ëã^öˆY=1_Vs8sr±‹<Œ(ö^‘Ókq‡GÆz ááûŽæMߤ(Kÿ;ÇôL§:Ãâ‹Ôâ8ɵN/:6¬;\TDýh?eêŠÌmvÜUöWLJ\¾_}s=¬p•H?k»Ç \ç%íý0?úbf£Ç’0×8ºS˜þîè“•þ󚿱¤%ÕãºVžÏ¬ë^Þ3&à „¯½î–¶B½ „q»ôE𬠞Ö7ΆUä=>aL€ 0&À˜@açkÕ¡Ñh@¢¤°›þHzs–…@Æ\P5$7Ðp9ßý(:ô„á\„Ü^ɧlö Jí¤m;v®Hö¤&sµù¾ùvH´áعF(ãOv•)¼þ]Zm  ˜·l ¸Q9lAøj 6„“ Ûu½£ÙBÝÅqÜ˾}„@ÇÚª«Õ¯ðÜŠ÷®¹]'wÍS4œ¨ A Ÿ&J§öMkÿVRÝ+‚è´Á ˜{\T´#<^wÜü¯ZHVÒ}¥]s-å*#2kàä¶FÛþŸ½3‹ªÚø¹³ÀÃâ‚{ŠešYZY)`ieù,-­‡€¶Ø¢±¨½Ò´÷ªS¯^›-¢,nY½ J_𶙿š‘[–[І¢¢² 0ÀÌ=ÿßïwÆf@–æw>¸÷œ{–ßùž;wæwÏïüŽœfÇ€ÂÏúå† Æätñe± ΀¿ÿ(»WŠòÏ{;e®ù¥•j}*À>é>9Šë²ËKëŒmzÏZ BçDÀC  ¼æòGìJé¤ ÛEC‰D€"@ˆ@-ÏT’AÄ™¿‚¢>“r`¶4Ö$?^¤QÑIß®ŠÄ¾ÅÀ¬ò|˜5…ÙÌP¬ õ‡™d_ÿãx­µ„=eöT]Xž2 œã¬àݽqcèò {•¶i •.Óh”wl‰4-)¯¯£’‡ë^Ç ‚uÇB™ÏÍ\ó›º>*^GG\Ó‹ (ûbÜ:þìÆßXešƒûõÎZP>«jü`ÍpLí]àŽí¡¯rðôçzçâÑn€5ÓX{|ìt×Ù/þ·zñqX7ý lÓõl™„™ÿD©Å”Þv¦×^}¸ÿô®#½“?fØß™à9;`«a‹Rq¬xìA[±Œä2º>{YÑÆl9Àú1ð.ŽÙ ]ÿ gaæ1ù vLžºâ“«kç§!ã1a|%x©þßVõ–?`Í7$UUƒGì“ç»OîÊÏ6@‹ ·û"G?Ë›eа…ßðGÖBŸ_„º1ÝîY”ƒ D€"@ˆðfn¬$5x6‰C–d8– ­úÁÖA¶éÉÙë Û@q•/œ€5¼8›™ë}ýeÅ ž ލ«´áúëò™Ï*ýXµÙ£±Õ¢6Éñmκx¬VŒ× ÁéÖåTë+G`Ž`?\KÌBžÀéš^50Cü3¬FyÏÖH~>>’B/€’P°ùZç_ƒ§é P,sú®€~*x5»Ì<3zñdò'· +k€ÃŸ'ªß½~ GÎ%÷‚­ªëÎ^þdWí6Pö9 Âi;+‹ò6̽Nñ®éÂÀQ—‘Í™¨µÚn k¨>{Hx²úSpÞ4ýß N¬1ìõ‡í `Ö¼‹­Iy·yvý*,14]ÇÈÀ û[ [Xu*_›6óêŽr¦µþÂIÜ+¥åš[?€ñ(Ö¥m§ðu†9|/#ä1É®“R¨Kë§øM®§ö^¢ŸÆ ×UÁzùw>=w_>|2U˜Œ}:W,ĦC»a›0ÜLP‹’Ž2 b½ê\ }.8ø× ¬Èù=[×:"à˜À]Ç2Øø£ÙÍ€éx D€"@¸X³r”ÓÓÁÌølŸÛv,‚YVÉ1nÓ#{!F%2tŸ {˜µlË. \àRJÙrèO5ôǾöjÛ `p¶S¾ªÖœÚö2Æåzå¶‘ ¦C®LÐbÖúÁ wÌÆwweXRÜÀëê¶C™ jõTYNû9§bاð’d¬snˆã:LÙ •h®o}ßX·uª@.¬ËÐX^rŸ ¬åþ´®{+Ìvw†UuK¦×XÆ]îYkY[ò|ï1#ÿq·‰ýv¼i·csÈæ†ïN÷…¥ UÍQ]“눨dý{ ìÃï]ûø7¹!'gÝ«f?î…1Éó¼1¹çÏ-Ž»°›"lmþs÷;ØÍšä¸+| ‚]ÕKÉV|_o%‚Íy&ŒÉ&“}m8&j0×z?Η%&·íç$ü*“Þ0&ß5ØÀÛUo…›fŒW³Í¿™ØÞcm÷9QèI0& m<&ÃaL®†1ù 9Ƥ)ƒQ[ÆÆÄG­ðìߨ—0T”´h3ê¹ P€ë)¿²‚ŒªUjê]o«ÎZËåHWò\T\ÅXƒN¼lëm¬²g¯Í†¸ß&İ7òØþ·¸²0ëoޱ’ûu›ùCƒZÆÝõ¶sbÌ–‘Üg¨ëi°.9¯íÑYŸFØy±á¬Œm'D ŽÀ„ÃËØÈü5–ì¡üéÀ§Ù®®·²C†°)¼ÇÐã5ô€­«zĦà.çYL FbGø$Ƨ; z'$ê5 DÀK ¸±¹µ—Žˆwû9X[‹kª;¦°_ÑaרûlõàîèD€x Xçð÷ƒ©õd£ b š+)ÈØ T”1Žér@…ËãG9‰Ž^D€ƒmEÓÞƒz)ê* DÀû’ì}cÞ"=އÙãs°Æ×T£çñçéÞ·E¢J‰ V˜Ž9°EžúÆ’ŠŠðŠkžcûBÃ-ix‚qL·V”±–Çz(x‘™g¬̶½ õ–"Ðî Ô½Ro÷]¥¶$TA(‚µÀj¸¡GÀžÇ`6ìfî-Ùgª›¶'pÇñ/ذÓ,‚T+|ØòkŸg‡:Úw‡ð{ç›ØÒÁ/²Ç÷½Æ|ÀÜ–?Þ-~‹²ÔC'íŸnׇf’ÛÿXS‰ %@3É%Fù¸ ÖïÞÊ2)ÈÑ"@š™À–^÷°ãWJµV)5lñ—*ÈrÓ¨@c>ÌËc=¼‹€¼¢@æÖÞ5ðÔ["@ˆ€ HIve!D€÷$P¥ògiCtìpȵ,uÈËìhÈ5. Šù0?–ÃòXï"@æÖÞ5ÞÔ["@ˆ@c¹uchQ^"@ˆp;µ–¥\ÿj£å:|U“Ê5º!*à–ÈÜÚ-‡…„"D€¸šIv‹a !ˆ D€Ö$ ›[ÓšäÖ¤Nm"@<ƒ)Éž1N$% D€"ÐŒä™dZ“ÜŒP©*"@ˆ@;!@Jr;Hê D€"à:YI¦- \gF9‰ ÞB€”doiê' D€"`!Pç¸ËhI£"@ˆ H@ D€¶$p¡ÌÄÕJ0x4µ¡ð$ çÆ%åm(4í®•JÆ*«ÚVŽØ©ª†Ñ˜Ôމ Ƥ‚ÆDº)ƒÜãsÒc²ï·ý,gûvÍ «YDøðFè¤Ï èׯ6Ö±ÝaLÔðìR»ËçÄMÆD¡PÐoìFª¨pôvŸ± Iˆ€×ˆŒIù¼³»Óhâsvd%äz-ê8 ­FàÍw—>Îagìƒçž~âñVk˜j÷"bSçrc岜¬g.´ûÎR‰@;%@æÖít`©[DÀ“ï~›‰ÂÛ0;$mV“– ?2ÒãR{zR?HV"@<‡€È™^’–s­çHM’zÎ ¥ßWŒéèw¶G  I.&@û$_Ì„Rˆhe¹¹KjX.['7«/ÿ¥ &šBgH;“ú™À…3†ŠÓÏç®ÑUÈùèHˆh*A!‚’¬Äug¤$7"•³K ;#>ihÔ‚Õ×ÞÓ%xß׬Èn&J$DÀ­ ’ìÖÃCÂï$°ïëüQñÜ{^c|EP+ï÷Ñtíi‘1i_pïÈNKÎCG"@ˆ@cˆ\Ѓ; °ÒšhfG÷ø_~ÕýÆ&UY?«½4sߨ:"àHIö‚A¦.O'óÅÌЇÿÈýàÜ´X`ŠñõF°¯*8™›øç9+VËyèHˆhˆ€ z˜Hf\ ™ä†8ѵ¦@ ©ˆ~©#»„¨Waºp0½›V•"D -ÐZ‰¶ Nm"pI²3¿Û–?+ÉÍ2”1Î~Åí¿)vÑ ˆèÔùãÝ€q D€{Œ¦iM²À2·¶ˆÒ.™@vf|*ç|Á°è.7_reT ­J€f’[75Fˆ@óЉÙl©\oÕùÊqä1‡9לò/Ä"@ˆ@»$°IgÄ~ådÄ͇™æË«Žüvãð¼ÏWð'œ °ß¥oä¤äÛ†ŽŽí.ø=²B«¨Êÿet­@IDAT>þQg¯s‚ X¶0©ŠKœTfØú¯2Kõ˲ËËî»ó”{pk—ÂÏÆ±G~´(¢•Ó⦂Y·IÿÕ†R4ñæQ·[Ì™móòuÜ÷äÄ»é¿o0çÏ6ñ‡ïÒY7¼sâÝçùã÷$¦N}VªóN-ò”Éeìt6§“Š™˜6 x‰¾\5”êzß‘^]Ù³“h S Êb£Iè#µ/ S~¶ ÍÓ—}QŽ,øÄÛAY‹B» e™6aêɘIŸI¼¯5ð×G…YË+×…ý’XÖ¶]þøã u\²køäÛ$ Ìÿ׫kzê'ÜfâsŸ¿±¸–»ÿÖ£ÕK Êñ¢oHJr”èRóØ‘• Ž$éúbóÔHµ"ÐRêýh©F¨^"@ˆ€§À=.Qf0‘›š]p¦œJ3ÏL¡|]såõ¿àµˆ‰©]†G'ÄsO¿ÀZ[ɋ֕B”³¾œ€çê‚ÙÁT¬íÞ%Z«íðcj¦ÿÖpÁZ9l¨_A(f .)(èÀÊxöè ­°YÛ½{´Ö/0]_­¸UVR­óÂÔ©¿xŒ!”Um€z‰¶{¯è…*O_Â^âSo{Vn³ƒZ³W®&¹¦èä[Ú`ÿ—ý¡Né•Ç÷ý*ç±=jM0æðàêÛ_1âºsFGXç ý}õÿi„Šb¿ëîfà¾!(ßñÄçÆÕ~K«öh»t~T£ X«UalÊÈd¹l Êr¶bEPeQ¬¶CðÜ …"™Òý%çÁã¡ûï9*õ«»2L€Åߟ– žû+Y«0å!—j•Ï}¹z:»m’¹.©õaúƒ;w†¨ømpç—™JqÂ|þ»B€×Î$‹*rÞå /Êsé²ÓÏŒç&¶¬½¾l½tBTp–7Ýî!IAˆpCµ³ÌGÖϪ:ÂØðac“‚PÊ*UµÆWá³>"6u+8{ð¦˜äËT¦ªòœ¬g.¸a/)à·o»H:hÎ5˜<^«'ŒY¡UUï¾Øt]mÞ••¯§.2n_uŒ=8ê Æ6Íi°›‹fV¸òÛ;@9Äý•`><f®Í/&¬ò}ôñ9!×^¥,¼ùõ¾ÚK+ÿŒþûVTò_Ç’„»Y•I4£ŒìóM7 Â‰ùŽ=<õ+É{ ”[¨ºö¥Gmuxwü|Ü©ü¿¯Ûr´ø¿ÞWºÊy‡ c¦«º+¢KºÜZ¨d«¥ä±Qzçæ t™‡jëøð¥_ÅiTdŸÄ4È‚GíõjÈ—…ùÞfàçö¯Wã`Ý3v‘ó?¢£Öt7”†i{²{…”Ö@’ ÚKÖÔ,>Û(ÕWþ3ÍÏW&â9üIA«0ä _l–ÇBW›LWæm T Í$»Â‹ò4Xm˜_åsùµW±\öTxÑs®¡*ˆ¸D4“|‰©8 ÞG`ÇúY¥Øëܬ§N€rÆlÆ&ÖGPùçÒ<ãC£ôö”Ùó,*]U^©‡?Þø°—¤MÜqþ6ë\~ÿŒÏ;Ðq‰±Üø¸uz£ÎcîZÅ“þŠeì)Ș^Vr:A«©\+¼¹NV1™]ñ`R4”bìç;ú`¼J4³¾DV1MÜm«µÇmL»8€1$(ç5=ÍÑ‹~a|ÍPi½õy¯ E圥nþrÑ…–5Ø7Ž¿±BVyJJ®ûgÿ‚¢L…3åµAY´~é‚n¬HË—à3Ö¿«—¡tœYAþn ^´p•|îŒùìé#ð/Tå{‚…ÞV06Î4¸^œ".€×0ÒšdQA{%» 2^2Ü5sÎ JÕ¿á»âß—\U@ˆ@‹ %¹E°R¥D€xÜf û»#+qkÕ¡_;q£ø%Æ}ê;|ûßp÷fÆø°Ø÷»âÑ“ƒY™61¿Ï¢Êý)Å@É\YNpñxÙ “N*:_–¨7°qú ¹gËó„±ã×*Ö_tm\÷j)ÍWm>bÄ$Xç;Òõ†?¬ãŽÎÑŒú†¤×”´§ÕÚ©˜Ït,÷­ªb(ÝÆp›‚|æÓ£¥-³¾]}A¿+÷ ¢²8–)õ{i„]6Å,Q½±zˆÖ§r³bVñ‚üÒ¢¸R«?xh§þHÞü ¨©Ì|•9–Â8IþØ6IÔº4:s•€lnÍM¤$»ÊŒò5nªŒ…9äã¸l§yj¤ZˆhN¤$7'Mª‹¯'€k™s²Á*›±ìÌ„ÜXf2²ß1®dš¹‘1iLJŦH3‘÷¾ J¥{TT Q”ýÕK’H¯,§eðÕ±ÃÚ„Ž>þ0SÚ´¯•€åËSW«ÐtS…B§?iúÊÑ6LÌÀ/·i£¢ÁN¢uR눳spRVê’®?gLFGa]Àë7 SÍÀbÖõüñúŠ0ý‰ý߇øñtí½×þï;áÎ}¦iFÛY3x]‹ë˜«ýn­[k ÛPIœ9 yò; Ö)ÿù¯þ¶ƒ±q²M½0…N¡)ä™d…  s릤2M&Ër*ag…¥Ù«âÏ6¹*Hˆ@‹hÚ¯™‡*&D€´/¸>yÇq‡±WÙéq³Åšš1Üh*Ã8÷þÌívX¶˜ŠŠÛÛ¶ Bp—ÍÊÚ\O{Qàk¦û¼fÌY#š5:êµz` mPÉ©—Tj¾·^zc"`æ,¼õÞ/]a»)IáUú€B^?~Aßéõ>³ùf1yÆyOL“6º VÖŸÁ­_¼Q1qÈØ1Àôà(LÃÀÏÖüMÙ¶øÙ~·”–þýáñ·¤q ¨©ê¢ضyíÇaV>ãË{ó5AkõêL>{ôÌ7pÈÔ‰ó'#“ì—£Ôæ ›ŽI³ð°™)ÉÍ”êh4øX“Ï D€¸R’Ýi4H"@Ú=œ/fø9kæ1ìhöéÓ÷ˆœÍ5>ÒL`„òö1©«áÆ[}v°ïÇê ÐStÏäQwþÄuÓúó¤¤P>3~ôIزI¿ì·r6¾GE!¬=–ÕÉw-æ)¯wàó^¼ö¯ cjB°ScãðИp|æs£¥m“æM½–§|Ú¡zJô|Iá5UƒfZ?”Þ¡›c„Ùjý²;Êù܇näËÿX ù;;ž¬õ«J¾.ª_ÂõXÙ± `_çCgà?Íû«¶¸6@¬·¾Y®UPûŸ“ÎýÛiÏhز©7¼,ЋŠ00ÛvÁ9¦4ÔÂÀÌÏǪ|÷è(¶pÝÝýÙÝBU1Îd—ûNç1w~Æ_}º'×éúFÝ÷Ÿ9z´Ü>/¼–‘”d0£'%ùÒPRé&¨2–Ìá‚â¦ö²SB1P1"àv\øw;™I "@ˆ@û ^³·ƒ+h¹3°FmSùaL' ¾+À/°cà>X3™&yÏC¼ÔÔI^šåü-q쳸¨têõE³Ù®ã;.5„û÷½#ý?Ëxòôä(ÆÊK¦³o7NÇ´n°”êöŽý…Ç¿“fS1ÍZá´+0ã«T”czo?Mq1l›¤:pj/;ð‘”UÛ™% /üx#ÖyG<~Uõ£^§w~šÏžÙÉ&Ko´ÁŠ—…ÒI…å2r¤Þægk½]×K†H€0¯ü;÷Idçò’ÙÕÊWê_1+Ó}ÒÞ[y*jb"+*‹}•ù¬^fÚÎ=õçN'³)° Tö“ÖòׯcuJùå_¬¹.¼W³]UAÁV ÂÊÉÀ¹Lâ¼s,æ–^ø0+ËòºòxBãÀK*½ÞS€»¶zë×W å&M'›õZ¿Llz T’– €¯°)"@ˆ€¸)vÑ WËɈÿ×1+™"‹,-'3îãÖ—¯[çËL)™²Ê$Ü=«Ê¶MئIÁÖ.½­€ ãu¶×·´7®À Î_Hù öù°Ç*Ë]ÉßXy“_–]ÿ¤ÄAbÃ^…Üy?œµcᬼ Æáî‹ÆÁYyºî˜Àï-™#0ÅÛð¹šÿÜì'žuœ“®–%ò¨ÈxÉöÌDÉñc˶Fµ"àŒ)ÉÎÑu"@ˆ€{ÂcÝ®àŠ¾Û2–¡ÇlXÂûÌD¾·-#a‹{ˆHRÏ"ðú»KâÀiW*ìK6ï™iñž%=I۞ܕE«0¿ðS©˜¨¾Òþ N}NÁŸR½¸pß× Mvf%×OG"Ð ˆ\ÔÃz0‹HInìa}Â-‡]žÖãæèE]~Μq‘ë‰K<šy·öèá#በf»²ff§Ç¿“½2þ LEá+&ˆÂŒGƤ¾“2f*Ô§@ˆ8ìRš½[úo‹’<ïO~íaÎC/•OÔñÄû§ êÖ÷T©²ËåÇUðž3Óï–ãÍuœ¼”ÿôG!·Þrü¢ªu°­Ü‚_¸´%ÚEÛqBUMå)µBµ)bÒ¢[Ûq7©kDÀí ’ìöCD"@O`GVÜï°VùÙíñ›°4çâVð}‹æòë.Ç8ìÍùêðIÉãñœðV Ѭ$CÿÍJ2çBáÃÞ…‹÷|©Lü*™c~XÇúéÂWº E¿Y/«Yïµ›Mnˆñ«ØÓû¶¶p~ø’_X4ÔŽ»]Û½úéâjn&(„îl”Ž,>Ým€H¯!@>¯jê( ÞL ;3ñ;è?þÕálMôÌ,s>°‡²G×®³M ¾fGzÂ^9‰@{'`z•B]gn ÞðÔkyâðnCö&¹yçü‘Oêúç¡Äw¦ yÉ¢ÂËÉ ôbÒôª^ý;Ÿ¬_Ÿ±~´™bJ¥PÞPU}'²e†O4ý»ò\CùÚ㵉¸E{f{ìõ‰x R’=e¤HN"@ˆ@3ÈNKêð±¨(±»Ðµ“Š ï@ìΈûõá>ªÑÕåúÿå®™ãu?P%&ôÏ+ˆL%)jÖŽ»RÒ`{3yš·à‚5ËÒåx¾Ôý˜P ûmC±‹˜jû‚fWóL%cV›†×Ëø çþ= ¥‡ 4yë4Së\R^éØ$ß­0C¦#&WÚñe*VR^âÈi”ÔW”{(Ô<ìnEÖ˜~ùúp3{0«F8Pg=mü A(Þñ2çÆŒäU„iü°›»"@ZƒÀµ÷¤t ’áW«(ÓoŽN¢Ä^U‡w‡ŽeZCjƒ´ÿü')Téç´§s°”dÖû@/Šè~hÁì¨:”ã£.gÏï«fâ Ã[f¹ léS!AÁ²8Å‹Ïᣫv¾¯o—¬‡|¡JPô$%{<(™! ÙI_Ñb[Ϥd~,¨æPز °ü6› i•J’â¢C>K˜ú%Ve‘ÅœéS?WÆáy÷ôÇìkÁ`|1`½CWEÇ߲ϣ`,o¿’=s†ÝåSlˆÅ´ŸÒ\ñ ^ÝóÄËójÇÓ1ļ_É#û]Hœ9®§ù…¢9¹Õþ‡Ç¤^7Ã,nªLÈÉÂ×.ˆh-õŸå­Õ*µCˆ nK½aggÄOF…T‹¢ ŠÙ¾ýnÀ™f6,:eø°¨˜P¢@<›@¡Æ¨Ç€o-óšd8W*Y18ô²˜ûú±½{Ž’Åú·zõÔ¼¬ Ѥ3¦a3R~ÿUîýC›øøIA6°+úiºi¤ÏŠ|]:Â,tÈC9*È—A=!½5s±ži þº€×µÃØCŒõf מLÀ8Öé_cÒ?ð?·ü<“K¥‚iÖúh6ÿ²?&N¡Üz>d¥¢²øêÁšh?I–ÐNÍX[|‘ah.»AY« ûj4{ü¡>¦ì^RçpLÃ6ƒ‚ mh;i–ˆ MqÁùNÓ+*Îõ”drÒ/k¹ås_v¢¤\y+ÓèPw…A;$náïçå<æcÝä²+ãpb‰!änÐç®0&¨$w¯2“óSþXŸÆ2÷ a…O±>¶´kTkòB+YU5ôûðIõKæ6ÍÿQIÇ›"XËÐô¹MBNFü.x?J r›à§F‰ D€"àœ€ì;Z ›^=³¿æxq¹î˜"ö”… ÍbOù€¯yâýB¾´¥:÷“óÚq66ÉJNé:˜?åP^ëüqµí¼–‘7ß:]>¿EòÄ-Éj™ÅÆk¨(¢|_Z{™†6…´å?æOÂ<®ô óYdü‡}ff®§’o<ðç˜nn»ŽŸÓq€2s`6_æ-Õ &Õ(AQeÆ£a,1nëYû±üFLÿ³0ßâq;ú¿|±;x'¥ÓDD§Î¿)vÑ ©OôV!@3É­‚™!D€x>ÙÔ:'cƘݸºÔÄ?Â^)|gê}¨¹_x»výîSOv9³qÀ|óÃ…pœ6‚"ÿ(ø/lgÃ$;÷ÚJ8Œe·céCÉ,]®ûƒ,ב¥kŠÍ°žÛç¼azÏNE8îø¥ÍBÞ&^³¬S3Õ7C£Þn3A¨a"àeHIö²§î"@š‹Àþ¬DÉT5;#aIvÆéÐJCM.Ö-¨Ød…Ê¿pxLjƇE¥]ÉxN¸¸1¥ûØè#ZL®me¬ª$%VN¿0„ý!Ÿûø2X§¯g~~OÉiö޲²ö ˜.ãÖFÁ<»`¯>Se¸[ˆÆlÞ ¨Ì„±|ç “k¡ê.YƦÛª2¡Vß—f¿ZÖõÕ€™²w©_ræzÇzH¥+¨¨Â¦SÒ{ÿÌŸNÓƒÙøOj¶À‹†Ó­g¬cªžr.µ^Í`®´ë8´¸û(»_|èôyAºÞÆÿ²ÓãÌ.8Ý7·Ð`YÐÆ"QóD Ý¸hmJ»ï1u"@Z€€NܽšI?ÌáÝ?E%¿R¥—c¥’§Á¾Ìªíê•{y‘xº3+ñt AUFSg=8àb¼†;T’m+ÅýƒåP]ÅÔ>0C Áþìz}Æ‹8‹Ù«ÓÑ%º‡=‰qëðÁ?jc`º­Z`HÆØ¹Òž °m „Žœd[W!7$‹&€b¥ê±jBÎbËÁ¥~É™Í?J]7B±¬:;…ÐÖ‡Ý>"ä–X˜ýÆaEós'ÍJ—+þÎÞÑ.о´é ¼=£T+„« ])Û*yB÷ópåÈQÂÄÔ}Ù«â϶J›Ôðb4“ìŃO]'D€´œeÞöÕ<éÇ=˜fßʪL7£‚<ˆ Rú¨”DƦ~mƒ2­Å5w-%ÕKœg’UJµËJ²u!ì<X¯fæ½fŽ «u\bßåŸï=_ëtëó˜ì¾ =Qk‚ô xÏH mpßry%¹ŽŒv¶µbÿ±‘ÖM«Ô' /iÁq•œ×ÁÑ%…Òº¬+ý²Î/ŸÛÊ &èhï>°wpŽœ§QGxÁà ŽÎzuúcÉä…­’™4¤áÝ`ßr§Sè_ BY98ðúfúÀóuäàà‡Õ~KgÎÊ2ÁFd5Ì—íÂZº9ªŸx;R’½ý þ"@Z@ö—3Ž3ø‘·?KW½-ýt'nŸÅfýE¡KÏîÝÎEƤ¼q܈@kà‚ ™[ Ìd±ÿmŒ™*Wzð}üt×ÙÏÿ·lñqÎ;<ëý?2³^5ÜåA¦»*ð2=¨t \ß Î²æÁVMÓ’òа¯¨$ja;¤±Ã‚î„™í2Ÿ)šU5½ÃR×çOµÇBP²’½Gµ¯@=ÏCÙpÎë…+ °öøÈòâ«%Y^bù1šÏ¢tœ½ºÌ‹§5lá7ü‘µ ÿ‹à!Û^>{iÎúe¯ ʾëHïäÏ@îà\k&´&è8{;öÆ ­öÊ8Mƒ½¬Ñô» ¨Ï$d‘ŠÎÏL8ë¾]é´ÄÛ[g©?D€"àþ~Κy Í QRXϼ›Lñ0_'­)ŒèÚí]pöMxTr?÷ï IèjÍ­a¶¶Ik’­ûxx§î&Ï`.’Õ‚PU›¯áiZ‡5˜/è ½[¡Ú»ÈaÊ€màŸ“j,—a«²«àÊ6zm2Vâj¿d>·Õñº$–à È^ËÖÂ媗ÇA¤:ƒ¥1v‚ùûw>é ‹[$çd$f‹\x©4»…@$h‡šïÍ];„C]"D€6'À­M ¹X1 ~ƒi.I¡‰ŒM[ ³RåU¦’äf=çÀ,¶Íû@¸1¸$ïÖ &\²’ìÆÝ$ÑH,äýï,e¾þba•» ãn¿N{ï2yÝ)9'3îcw’‡d!í)ÉímD©?D€vL 'ëÜDu•¥‹¼æLPOð5iC ­f™¿zï¶ô3Ï3¦kp6ÏRx5¸_¤5É``LJ²Þ çV³¥kË ÒòMþžÉ£®\ã)ТFP*æ ¥³åÝ"@ˆpH`[ÆÌSp1Å’AuLPŒï7¶£ÚW«ãÁª®«`‰â—9qË-yè„X8x·M™ ®ï“lUœN=œ@æ4a$®Å†7jš[…+Ýg_d¸æd%α€`|¾]òšzš¤,DÀk’ì5CM%D€´Ù™‰;¡—ø'𔽠›1rsô¢þàÞvlóɶ•‰Í9è¿·à Q/00¶ÉÜÚ[ï\‹ }—öu÷4Û2þåi2“¼DÀ’ì £D2"@ˆ@“dgÄ}ñ•‹çò‚]w áNˆn J)(…»ŒÜôÑÏ™3a ÞG€‹¬\@7¦dní}ƒßNz,y¹Vú,,©Ywdý,—œ”µ“®S7ˆ@‹ %¹ÅÐRÅD€"àNögé`ßU¶H–ÉdS©”wª¸2 Ò^ƒ½™'‚cÜ^e¥ì“}_'ÉùèØ¾ À^ÇÒšdð¸Lk’Û÷P·ÛÞ¯† àah—`õ#G›Ðn;J#­H€”äV„MM"@ˆ€ûøyÕŒó Í ²D&nÚ¯fŠ{´°ácɱ)Â,cUõ‘Ý«ss—H[PÉyéØ~ÀV:z%®I®õ˜Þ~zF=ñ&Ùéñ/Â~ò#‡ÝŸÔkÇ—³ò½©ïÔW"ÐHIn ªT' D€xZ“ë'dÁ?ÉÊÙ>W\WÃrÙêá±)p#ÿsGVâV9=Ÿ€‚›`&YÅNŽ»<4½º|{FüOƒ¢tj¯¦@'ÍDWáP D€"@l s/XÓ<.geÂÿð’À*¥R¹ ö(ýièx?ìɬ|ä½oâ5 žC¼Z×®I¦}’=gÔHRG²3Îü[à,wXTòGy(ç`"@ˆ D étÒ çȘn_ƒ6}%(Òý†ŽŸßÙ'À¯NFávP q37% {sa?¿“œó‚çž™ÖÃMÅ$±ˆ D  ÐLr+¦¦ˆ D =@%X'nˈ:}5öPÔø€³derdL×µ¸QŸ›£öÅs nF ’›g’ÉÜÚ͆ĹÃcS'EƤ¾r)uPY"àÍHIöæÑ§¾"@ˆ@³¨ÝfŠíÊšU˜w}¹Á0à>ÊŽ*¥:'"&í ŒGD']A¦ÙHÂÂ… I Ù|ˇè¥ü§ÌŸäjKóþä׿<ÔÕüŽòEAO¼ &Ìa¥…vO`{ú™Ï¹ ŒD‡ƒí¾³ÔA"ÐHIn¨T% D€$°{õÓÅxÌɈßfØÝ¸‰/ĸȔCX@pAxtÚÇ­[Âp]3žSh]:ÎÈ8¯„V•O¿û®_K·Î«YïÒâŠ>.µÃ¹P¸Æ°wáâ=ß»”¿L~•̇1©{¤$7À©ý\Ò‰ð¢näöô„ÚOŸ¨'D õÐr뱦–ˆ DÀË ädÅŸDÛ3¿š.7žÝ/çÜ_b’´=úÏζ—‡$ D€"@€@vVânTÆ™’ꡜ™^¯Òw„—Û:EDL·Âˆ˜Ô%¨Q:»³Ò5úçP`¥™dýåw_† rP€rsßÁšhŸMzI¹úVpv%ý^Š­U}ÕyxݤÑì)8ßiúrÙÌØ†,0”£‚|YOÍË!½5sÓ°i þº`OÇðQAf¬€Í¸§¿y¾FR ì ÈÐMóŽ|M>6Ô—ŒB^9Ô»~Gé÷rß«AAf¬ùùu.ë £gÈ]£«@¹gŸÚ3$&)‰@Û /Ö¶’€"@ˆ@ƒެŸUu„±oäL[x-cŠ[0&Ħ­á"KËÉŒ[`vF³Ì2+WŽœq½NŸU" Âüï<ás̶âÒá•`Ž<Î! Wù,0$Û̯Œ™áßÿ,Oļ®b³}Î 0sì'Íìl–ö£ÐcPO((ã…X°+œ ñʆJřⴙ—C~hÃÅp÷Í-÷ß(l•²s¾äѬ&}»!â+AN[`øÊW}>oQB¯Ëåz£þäKŠ.är¡/Æ™¬?[Ðáì[Ÿþß´õóàå›=E1ê£}¿]'wÊ–·‰U÷Œ ú^®­ÌΈOr'ÙH"àŽHIvÇQ!™ˆhà9–¯_ïÃÆŽ­†¯.ÿ@má¨"à˜À¶Œ™§àj&æÈÉJ<2›š~stÚOïg Ê/™3kN¯«À<¹¤”{qáìï—ñe`ºŒíúø²<ž*öýýÃÎ}[ [ðO]iàŸƒ×ä)Öoj¯“炽úL”Ìn!© 0Ÿ.1‰Bˆò§ÍÜf™éÅò®i~£žO™µ&O.k–SfÑø"Ånp¥/Xð“‰l¾Ôi8!:ìÿìVF‰Cãœô¸¹¤ {Ì‘ mH Mg’÷=0~w˜±Êò¥­„b=ç!ÖBi;TÎ>Üòv…ÑØ›ÕOVºæâ¥)=íØåËÆcg‘ðÍÞè3介ÔN{ú²¢óoñ¤[º³¶U½™ÁØG;˜½+ž0foÈ‘ò=¿®¹ÆÓºÛö­¯¹õy ³i©{¥Þøº5à:áÊnÞÙYeÜûã>¸Òà3Àöya¯«Ù|ÖÏ,Û6œÉ`›¿¹ãû9knnWŸNܶ’­“Å*ÅOƒE‚™®€´=àåöy0-îl¨©|+÷˧i})@SZ“¬Oi—‚7i0GNÕdCó6°¬í» _Ázß^U1µX`/*~®°*™¯õ‘›Xp¯NG—èô¤u:žðs æñ‡µÌ%Ãi Â/Ô~w7øü±­Ë6Ž ·ª%9¥Inøc_Nsž†û‚õMùšÅË‹XuŸ}±Ë/ê—Ü.=‡¼4{Ÿ‹|[NfB–çHM’Ö%Ц3Éׄ†þGÛ!x®ô×94ñ(È !ïÑvî(¥½Ìºv´¬Áb n帢uA¹KkÇãfMâQ·üt±<—ôýzqu®¦€Rwè¾;9õöžÖEÞüÈ2m€r ›¹åœuºtf­A»%²°ÀKß÷±)í_$PÛ$ØËæb》¹§mt¯´ fû­Âš½@¼r£8AxᵓÂò/S¬ÍíjBj{yf9¸Ÿüœ7iû³õ9ñàÞÌØB•Xý_&ˆ…¾¾šŒM}~4Ï z×ãÞà—”dð .mÅàY¬»JøåÃD¡¯ €”T¨3BX/ÖAYt¸žSbßåŸï=ó­Ò#ÆK(ÉCú~ßÿ)MGƺ³¸…G,fÐöò76­"„Ç €W3ó^³.‹Š±w¥/ñœwð/ÞCúU&ª‡iî›?ë±Îïè\f„Gó3®îYÓöî'‡ŸówG}¢ôÆÈÍzêîËœÿ#–4qÓjX‘™ ð“~0GĦ½ö0\²û=Ô¸Ö<#· 0ï“\ôÄàÄäÓÇVÂvG[@IŒËàóq–6Ø·¼çªjðvÊâsV|¶¶cZTÈûO]Êú"G?{ÚåAö*¨Ó *][0mç\ó~䓦%åY“(©d¡©°%”q¼f°Iì2+ípƒK¦ó¡ÿJÊôAšµÇOwýüˇ~<²øÃ–NðemÃ…¾/dGÑëöÌqSRÃ…ô`Òý8þr¶4źŸtîžvf$þµ-=~Pvæ¬?ÝSB’Š´=·úÜ3ñîcƒƒË@AÞò¶-éšÍLÝóŒ¾è8‘À 2í˜.…„Ï,_>å?ž žû+Ù|1m@Íá³v̓þ‚µ¼!;—äkWÿ`Ù3¯ã¾§–Þs H4†Yêf/ §Ã¸$‡ªì„µe¤|ÍÆ¯K‚…U›ûÖKS—þ.dü„&â¬!¹þzu ÈñN¾v@äMÅwîÄ/C­¢2OXµÅRÖqfêÃÏú®í;¦@¾ëMòUO–KýpíMù÷ï 1_²Û÷†ä¨-f9˜åz;?ð›ëîøñ[ë8» Wôͽ想ݗ?~2ˆ›ä&™VUµGøâ§ëÌe“€í÷[Û±ýyÂØ¢›‚Êýô2Ög~uoiZ:ÑBÌt_MÙ®u_ VfùÚîb´öÃÊ_Öœòwµ}¬°*.qRuÁaÉѹ¥¸üáþùZºÌ2¿›¯½~ä€ü]ÙåNiµ¦w„O7Ì1—¹øÿ¯Ð—ëC}Ÿ>UæsWPeq,æÐ¯Ù£Êîh,݆C6oÖÉ­9“]·dž¹ÔÞóNî¬#ƶ£el/þ¬Y·i;¾x­!™ÿx}EX¯íéÇ´wTJæø˜ßüy\üMC½´1jiœœÉåh,„nÊÃú0`½¡o~–¾ÁxùU¥³kbžut¯a9Ûç…móX‡_ày6TUs¨*5ÊÆá5-šwþã ò²þjjφÚÄ2vŸI&x®mxò"àóyòþ{މ•aÚÕ-Ï5¬ƒ£g :y„#9¾ŸðÙU÷9Ç6w¼NÁý DÆ$GÃê¿›Jjß±~V)˜g϶v{Fü&÷—¾i¾þîÒÙ°^i~¹öñOóË S¬ßŽ_ÝOH|fœÆ²ñ#ÿå‹Õç Ó­[º{D x›ÚŠiOƒò|a‰!ß2m iþš¢=Iq=®ÃëÑi|÷°.‡?µÝ'ùÊž'^ž{!cë€ùGô:´÷P6ï“|.ßz¯cÌk»÷2¦=Ê»¦Üp+žK¶tbâ¹벎ú²JØÁÌÌŸ„õíÛÁ/ËßËy`ç†Òëû]€ýœ{ZxHuÓ?$>)e8øûkGVB®Gv€„&-H 1/'[P çUû«|OèõÆÙŒåÏo 3)zè ãØ÷ý ¥%eò÷ǧ‚|õ¶ráÃo ~AßéË+Á” Þ)×n Ñm˜!zæËøš¡þÂøÜ ˜‚:C$íÀ6ËU ¹|˜äßÃBW³Ê“·aYë`*/–~hk^¬òÕl…wAìlÕÖƒ”ß¿³—r3Sû(ª¨œ$õýÕÛ_^øñ$Öã\ëÖðÜ,Wíz)Ñöêå›Ü Ô*Z•bSùüÅD„lò_òµ“u¶!î ëÏeóù f­¿ÀÊ™Ò¼ÄüãÃwhMþO«ò¥ñК£çÎNfB&°^[ÔˆöOLK˜ ò `*Vuëþ$+«¹Y¯/š­ÿÖ€ëÁÔ0–ðzeV3=(ȽÀ쟅„.):á5½žÍsò÷dŽ¶Ý TköêÏ–¯båLÛ!dîÙ’²­o¤cÙY³¶àÍ¥öÇ’±CÖl°-×d¯“ªáqÁ|Nî˜aì:aL¹Ä=Xó2S•ë‹Î¾¥ÿöšÖ½0©kò¢3g2ç®sÖ¸=Þ:)¢ °ôc«ôкiØ&;„Ÿs&½Ø*;öý}!x]vº >WNå²;&…ìwkgý¾{7Â>CFÎB,÷­BY–¿Óñ½&}®¥Ò®ÿ ÆgVµñVP\¬íÐqneyõ8}uå­š‰·€)ä–X“Ó6¡Ï–g’Ÿ*†|g,.T_^ŠÏ ¸üzÆ”€NRaÚîÊhY¯/±½gMám ÉÑüŸ³úQÌýlËHÄ—‰–Š`kPJ#~.7…G¥ödJ!VàÕ«ÚÓ,”€æÖðt ([VþÁ3ӄÜû‚y”²;¸u†/ûzß=$< VñàÆ®›àzÕ2«a|Ï_Àï\ë:À{~Ú)\'\g ?€YZÌoUE½Sëü×Ö½ìõ²°•`nû#ç“iÂÈ`…5~`X÷ú¬“¾Öyq†º!9ëKD1O PÍk~õUùü:,*íÖYq¿{‚Ì$#ðJ8ŧÞò¬½Î€ÙZþÀ¨Ý¨°É×= •×9[^(]oLûrßáþ‘ÛÁcåë©aR]SFÍǸK1£M@Kóx×÷¾îLv{c‰U×cã¢ì6"±†Çî‹îóç°§Ü#)rX7ÎüJ}LjÛÆ›"³ôÙœx‹åž?3òØFÙ}#ñ3)ÝOȶfÂi¦ß¹…=™¥çūᖵôÎÆËöž¶Û¶!1‰¹í3ëô£S§šŸ1sG݈éÎÚ´<“æÞ-å—ë’MúÍ2ܼ¸ð‡ôœ‚±M¸k¼œÏöh¹Çmž5Îähø~ª}F4ñ^µ•‘âîM`ð]oÀúåäÈØ4I'›28"6åÁac“‚Ü[ò†¥{ý½%SÞ|oãÝ¥Ÿ6œ“®öE bbjü·¯^QoˆÀ¥°(œ—^UËÖP%š‚Y_b=;¢î¦MÒd¸ðÇöÒ¢¢2Ïq#Ÿ=}þ…Âl¬—ííŠte%§´šÊµÂ›ëöYç¿âÁ¤hé%ïÏwôéõÒ´Ã’w?*¦bžÒ¿¿¦õᛚ µìŒéML+;¶á>x¹ÌØKŽ4J®q&§)56`7@{ó¿Ùf¹û&b¤Ö£t£ä°TâüäNÉC¸”¯ž’켤ð#ûê`n«0ä oÒaaáÇ?à˜ƒ â¯>ß“ï~e˜åu<\w¥}Kßï8Ö)¿Æç è¸ÄXn|\N“Æ­ŽuùêÎD¸7µ~é‚nd",_q&»ã±”kåT¾¯]’½®\Ã\¾WªË §àl3Ëëu…ô9‚WϾYª½œ×[{^×bÝ™Ë2wìù¶^ô “^‚¤ìíЬ´èÐ L0Ø£ú`ÁÌ^ÕIý<ž»"—£±ÀòõÌÐJOe°eœW½ò®F4Õ»¬³^þá‡oÃìcgé^tÖ¦ôLò©Ü,¼µîëz„»gUÕÅaÅäßÇïêe(§íÉîR¾[SwÍÁ™Í³Æ™ ßOæ6\w"Q²gØûݳ尖9q[zÜ$”ÎEWLT…¨¥¥@Ã'¥FŸ”2šEEáR^ ÒL2H ³ÂÒO œ%—H {UüY£¾âTä½o^bUTœ´+ÒïDé‘I¬÷ýH×þ¸ù„ù·£Ö ™TW*bUÅÊ} @³^r½wzÔ*.Î;®{5[%}ÕÕð Ê‹£ÿ¾¶siÅS ¸½|z˜[Y¿š[ÊT%âõàÐb£ê׿¿;]ëS½•»¿^uU.°æzl›žY©kNe­—AÒQñئšC:B« ÍÆG®°Ž`*zX‹/¾Ü|¥lx†ë1‹?ºÿsý„1ãä&{ÓšûõÆï+f r-æc‰&Þ–-1­aŽõK[Å4B=e¯´¾ìVò4xÚp¥”×<¶³®öèpk(ËëÒ쟹ʻÇЙéìÛ9Éì ½÷]: Þè0ºí¹ï®äÁ¿UÏà){_Ó KÁ‡³°%—å²3v$µ|NäkÍ1^r]uÇ‹ßC¢Íºì<Ç¥6ƒÖ–™uU[éÕC´¨L§lq® ãýnó¬qI«öìº:îöÊRšç@@ ýðg~°pV#(…·Âùí7æ°¬7"'%ßV­ägw¦ÏØïν„¥C`n ß.œÕûáÎ2“lD ¹øh|¹ŠZx[NÖÌzEÍÕÕC<@cõ ·ê_+iôÊr5®] yò{p·ý}­²;>5ðËí\ (¿^öïÒoáDñ8íôéhQq˜¿i{7¶Q,œð Ó>‘6½ :™ê©¸ëi¤\õ”\;r4*É2EÖx9lÇz&·ÁŒ¸xdÊ”ù]õ…aÚáê¾ðrÁÒÄàDí2±:LÛ/ð6Õ6¸ÆÑLV Wš0(+ëaá7ø².ÓÑÇÿ3(ÆZ§Ùž[s´½ÖPÜdoH>ëkÖ}”fc*À1ÔÖ'­óHçÿ»(å¢Wy‡$ .úý¾»ØÀ}Uÿ ¨üó.­–½ƒ•©;ôž«/:ü–öçwÏàpá>Û˜~©ra …毆ꗯùI'æG¯Km–›F@iݶ\‡íQ« X«7°q¸¼Â%ÿ f…Æò¢À%9lµ‰»:î6Å(Ú~H÷SNVüÏÐ¥È~cÍ;,˜”ŠP.,AÏä¬LX•2²ÚP±?wÍœsîÔu“(è•ð5aÙÊ„#Yˆ@ €ÏíÉȘ´¿ JÕhÆtðBKWo~ 7OÕ·$pñ4‡[Šé\¨C¦ñEö'#î_è¬É)—Þg6_Ãü­óæ=1=NÚ(&XY‚é}ƾ¾Åæ¨ú³ÇWh} 8c M â90¹ÖŸ+JÁ™š±ú ˜·9äÂzê…Úmeê¥9‰4]ir ®öµ~oš#]¶üà®ËìÚÙX× òlm÷šháŸßäYJIlð2® (_"¼³j+*Èè¼IR/LÕòÄ¢œÝaû¯'Qxo|:ê59³tî ’S/©Ô|o½ô戸*»“±lÙrqÔM³s:¿é|yd“L¯#³_p·—õå¦éЛü@Õ{(S›g݆/C/öZ­ )Θ~©raƒ«ãå°lÆ}Hoá:)sð^vvû¡£ÀJ¿Ø†Ç¬$2¾¼7Ÿ?êL>{4*ծ䰪ÌáýÔ˜qÇ5ÔVuÒi;$pd½yIÀöôø•Û2â./åü©›JáV_mÀŸ7Å.„ñá1‹† ŠÒá Ì6 ¥yŸdø\6æl›ÊLæ$ŸÓ_¶eÄÃ÷0)ÈÍÉ•êò\íFIfw UÅ~!éúrßé<æÎÏø«O÷ä:]ÿ¨û~â3GÛ1ç¡ôÝ#*¿Ëî(çsº‘/ÿ_`é”èùÐc6l#o„m’M™»)ÿ-×ܹÏu/ã¹D±xÃm¹äúñ¨P(KŒç¯ñ¤†òÙOà¯O³¾îð¼ r˜ØÂoÙIwl”ÖÇM›T´lj¹yÌÜ’9ÜFÓï~„§|ÚO»ªCì\ÀíeLGvn‘.©/»Àg>6Zú‹‹Äss5F¸`¬ÐLâ¯Ç…áv9G'Œ1IJz¾"˸Ôþø…°öX/@&ßµ˜§¼ÞÏ{ñÚ¿&Œ©‘¼)ÿ“ÚoÎðÒÄ™ì.eew‰‹ƒþ^;üáp¢Qÿ•_)ŸùÀhž”Êãâ&ULUä HýäFÈÜmàmš ƒ©u­vœa>%(‹¥ô^ªäÊ/Y.¹¢Ú#Ž%¸0^–¼.žàøêÏù&óÙ±#¤½¹§M›ÚîuPY,¼óÃV|±æì9 Ϥº±˜d ¨‡Oy¬N Ô7a`æçã U¾{ô´ú‹¶@IDATG[¸®¾¹º¼vÎ\Ã¥ûÉÅqGcú wŠ|ÝXX|NÁ[ìÏJ”ÜyädÄÿ;»àL§ £pû®`Ê9ÁªnçE%KÊidTÊ€¶`b«%ùFk’Û‚?µénž¸¨SDLÚ·7G/²6ÖtáH "ÐÊÜJIÑÉ–ƒ€?8í_ÂI\œºe¬oÆÊÉåÁKô•B¬~çþ|ÜÊG[®€‘-ZK;u##¿ª,ä¦)½ôL ë1ÏìÔ•\*À6AÚ`ÅËò~Çr%ªÎa/KkY“6m’ÓBß|þWü–U…°ÿÈixtM®:9¬ËÚž_sí˜I®ÑoÈ=«?òÇv¼ÄìP ;×䨫äŽ&4h;½£¯Vު߹3__p<3Db™ÙܘGúa~Ö¸BÿíGôg/¬ÀtCP·êÚåÑ’`¶ã'Åa–-øØîyfZâô÷úùæ¿‚ó™,ûÅÍ€ˆ›¤þn?zL[ôô…­ L>›õŠLpø¤rµýþŸe<)ݰ%˜þÛôvìíÛAiïî"<¾­ e6çåœòQî‹—Îdw4–¶õ¹.»Ü²ãq1çh¸Â¬» µÒç^(û^¿aíY}ÁÑL?¥ùGm]+ug¶ãëªÌ~ÏM=ŽÚ°ö"¯« æ‘;õ~^r¢öÆ–uŒ®ÈeËκN»ç‚eµ³ñ2—·eg¯kÍÃ1¦?r~‹þ«ÌRÉòDcZ«Zµ¥£œËY›øLª‹bóX€ ó~“ë¨ýœIÑË¿XsÝyx§ßUu?uy¬Ï.–Ù™Íù93«.CÅ×?PðR›tFYiÎΈ*+ûhŽî‘–Âp•ðmDlšd݃ŠsxÔ»–ÏKKÒRT3II&së–¤Lu»;ŸWÍ8?'ST å{C‡N·^‰å|D€¸BLù|Íb¾n]“g*°,_þf çºf{‘Ðraÿ¡•4ºÎ¼æË&Öy+‡ÄyÖnË#o?S¯ÎåËͼZÀŒL3ÍãY×_[¤™:i¼nßÒ÷5ºz&õÖ}iÖs'²7f,›"»«\õ¹nìu.G%ÍéM‘¹ákëÅϧtO6M.‡m8/‡åì\À¾c²4Æ’¬í3t±ÍºzÌõÚiòÒ’\ÃÕû©ÁqÇv–Ïm’)ÿ¥uJ{ ˆè¤+PÖ¡÷¿×æÒˆ˜Ô¿öž”-õÃ}æÌ$_ÜêÍ÷–6ðR¥ @ˆ D€"@ˆ D  R•|6?,:e¸¤4Ç¦ÎÆøuÞ“VÌàysP’¨(GyØöUÍÑwªƒX:^ç/§fÝ•ÜÍ:Ή€7h¶YRo‚F}%D€"@Zž@nî’šì¬ÄÝØÒŽÌ„íÜX&rÓVŒkü|ï¼Ç#cR¤Ý$À4Ûì</6-HKo Š K‡¦ñ£Rí„@î]¬d<£V*~”=Õ·“®Q7ˆ€ËHIve$D€"@Ú’@NÖ3¶gÌØ2lOOøH¬©c‚ó~:Pš·‹MŒqðÒÛ¨ß8°L@òQàçÒQ ^N`[Fb&«6í UvðrÔ}/%Ш//eDÝ&D€"@Ü@Î3ÈJs¶øã¿˜hú§Òh^·Û-LFW‹}¿«+¢ ‚y(“R$%Ù`”§ÝÈVþ”¯V˜ÂFéd«í¾ÏÔA" °ïHF¾JG"@ˆ D€x¬,Ó6–µQÕX\ý„"Äç±Ú'Ó`k›)6ž™µ?K;0Ôœq=lÅDí•\Ÿ ż–|¦ÄèQCztïºžÒ ÎÛ¤#Çv^{3x_ÇIIö¾1§"@ˆh÷v¬ŸU L—;ÊM¦8A©¯ÕwÄY±êȘԭ ¯ÉÎHxó¶‚ÝÉ”&%Í$ËÐèèõ¶g&~ /˜Ý»t™ÇØ·^„x R’½f¨©£D€"@¼—@NVâèý{2Ø0ðÿnÁxÄý‹ú\гÁel"IJ² ‰ŽDdgÄ}N ˆ€·€w¦ˆ D€"àÍtŠ^ë¹-Ð_ι»*§&_©TLóë¶eÄýâÍd¨ïD DNJ¾)Ïž<}æ~2»¦{Âã.oeê# D€"Шõcû1Z=c»ÁôúO‘óј“:12:íÿ†ÝŸÔ ㈀·ض2ÖûóÝ=ºuyÊÛúNýõNdníãN½&D€"@ê0{·Vˆ\»?+ôdöŽ|Y¬®Ù©ðQE(}}pOæðš=ý…òO÷~÷¬´u”œ—ŽD ½Ø–‘ð¯öÚ7ê°%@J²-Š"@ˆ ^G€ fïÖÐqɶ5€_Îʇø³ršÉÄ÷9ö?:AZVDLÊtQäÛW~ û3‹r>:öFàæèEýUL1®êÈî…¹¹KjÚ[ÿ¨?D@&@æÖ2 :"@ˆ ^K@òn ½‡^NwíÈJÈgFåd&d!00Ë>§P(ŸÓù&Œ‡G§>,cœh/~ΜqÖ&_ã{åõ ÚKŸ¨DÀR’íQ¡4"@ˆ DÀ«€“.4±f°-”S%Ù n“Jóèíçvâ5Ø9D”+†ŽŸßãÑ)FÆ,ì爀§ÈN?ý«6½õ®Ÿ§÷…ä'Ž’ìˆ ¥"@ˆ ^C@䢤$ หé6›ZãÞËàûšÜ5ú R]‚âvÆÔ[ñ‹Ž`ž}OØ(î×Lx (Ë/U>—ÃòÒ%&_£aGú?ÎDĦeƒ¬Z¡¼$)õ›ä{dý¬*w–Ÿd#D€¸;šIv÷"ùˆ D€'PR]lÞ'YhsëÆt0ûËÇQAÆ20Ó)rã´2ƒBÄx×uADLêj<:tºˆ@kŸ”2!<&erk´Em–&@JrK¦ú‰ D€·'à_Y)™[ÃL²¿Û [_@¾=cÆŽ½ß=+É_l{ ¢°DÊÒg`爘´Ó¸fã¤4×G±æ%P]yö; /GD§Ý×¼5SmD õ ’ÜúÌ©E"@ˆ DÀÍèt:‘3^!‚bÎÛo¸™x.‹³?+Q¿meÜ:,ûåÓFSM8ÙŒk®¼n8(Ì»ÁsöƒgLG¿Í è3È]£«€m¡®6) Û¡:ZÒÙ L©Š¶#@ǶcO-"@ˆ nD@àL2¹îPâλšÌÏY3e¯Œ—Š"ýûÕÓ3“ÉnTpA\EñvD]oåØU!„K@·A¼ePA@A rˆðâðXø¿‹‚‚¨«ˆ' $䘻ßçééžLfB’I&!ǯ?ê®zê©§¾ÝÓ]OUuµúÎo˜TW5;×Ü¥“íÉX'5É2HRCŽ»¥ç¼çy•b§Syùëw‡Ôå‚@(2Nf» Y“½ÿHL}úË´ï…’² P]ÀI®.gv€€€œW’¤ÐH²$Lrõ_¼+ ~Zouž÷u].»g¾ÑbL4šÜ­(îà-‰ó§Ká´¹²§üoýÓÙºB(‰@ú§VÛÿžßÑ`–ly»ÕÂÇ%É# ª#8ÉÕñ¬À&*'@ßHΕÔ7) 5öäŠ@ûêý‘™”ž®ƒ¾½TV I"–fz—y~ªâ‘öï:q8É5ÿ¢   a  (Z¸Ë è?F’+Èó»õŸ$ƒt5ÉsÚ$ŒO 9ò Š³Þ”´ànárì^?â :Vt9„µ‘€Õ£xR7ŒòÖëšufÔÆs\ûêd¨}UB@@@@ t’A[¸Ëƒ‘äÐ镜ãë5#Ú¹vhçîÿ¾Ì’á–i›}s₾||sù÷ÞÜ=µ5ïc«}v®±çD–£…pfãSbµïôÖÊÁI®•§••€‡§[ÓFßKÆHr¨ðÊ*¿~½›Ew­ñ 9Í×Ìq¤©Y JkZì³ôœ{ßÜsî×uŸ¯¦áO­ ðÛÇ;", ÛwèùƵ¢B¨D­&'¹VŸ^T@@@ Ì$%Ï+‹éÖefVAArœì¬b×Úá©;×kîÌ;õKB~Ø"ǹ®Ëô†|ìuž­h·2Œš»Ñ§ÈzïßôýMÍiQs«ËëÜlêÂYFA@@J% .ÜERI.U¥ üo“5Ÿ•“Ã<üD¶³a¶Ë|– ’qV‡žMŽòþu]¬Q7v£ïc«Yv­õƒKxî”Mòå5ËrX[×ÐŽØ@@@@&ÏXÔU66EùÈm³÷s˜M’uÜS S=\Ó}N#^Œ¾¹›e‰üUHÊÿv®öàÿžÛÀäÎs|±ñ)Õ¡®Ö šL#É5ùìÁv °Î\Rê¬%û iµªL’#-§,©w…•CAØh+f‹=ŒÉ¢÷™›ºÒ¬Üh–›‰èøc·ôœ?‰Ù™¦´ÑÆeÕtëÐ}^Û=çÆ´ëjz‚`-.ˆ @@@ê0ëØA§EY/ É·`gŸvžYV‡±Tûªï~wè6rgÚ°;ÞÐ-)ê"`ÉØ¡CRÓÌ[’æõátyæ[õ!ðÅúáûÜŠç ÙlR}¬‚% PHÓ­ Y`@@@ Ž?ujl}sýä(7Q(ÊŒ§ÆWGqÔøjßÐ=µ©lP.ø2mä·zÎ{–Þj~Ô#Üv­±³åíVKú§V[¯$* Pià$WZ(¨I^µ8Ù ¤…d³Ûé¶ýíùq#ÿ¨IöÃÖs¸¥{êÕ.ƒ¬|•6ô{š–½’Vg»XØ]ýv¾§žcn+çΔÊ"ÀFYž`s;^Ô§ÓWVYÐ ¡ÀtëPhA@@@ Öøvç–¥T¹iªõ»pk×iÞ¹~Äv¹VÙîƒá~U‘%ß’4½›vÕ=¯Eó1¶ª#ðõúÇ%Ås Òh~‡JÅà]Õ¡GI¥éb¤‡zÙJŠdóK@¢­4 ²r ”øXKØ¡ô#""Â,.hÖ8( µ“À™œÑyÈŠŠ»`ã®õcOwHšÿ"¤Ïv®šR;k\ kÕ½»,Ö¯÷eð5ªáé©‹&C©ôû‹¡Ö ¡d¬F`Å”GÄÄÔÿŠƒž®QL>\ø¨HJY+rrí5Êîêhì¦ùýDß§Ö‰¬¼®óóìÛÅw¿ü%>Þ¾?\*kŒžº\÷Šž¤‡ï¹B\Ø,A¼¾â‹Šª KþêfOX*u•Ì|ú2•¾ïÐI±æÃ½â뎔I>Pè¶Z‰»;´Ï¿¾50©ÌÇ}Æ&÷¸AØ]bÕ†ïÊ\¦¿`“†1"õ…®â‘ÑoûG‡´Ï¼kGú.ór½âNô˜,‰».òÓì·•okÙÄ Îä*";¯|~_l¤$ÅKâàñòÛð÷ â÷cáp•¯$úFµÇÏ”¯FYˆ¶- ¢L§¬)ÖHæ˜ÃVÝ<²ë»Ãðª‹bÁQ1ÕnG\þ·&A×Q1âjTHNò¹” @@@@jmZö÷¶eɯÍÝä,w•†=h_þêßý^>'÷ò‹ âžkeQ¦ ˆËÿÏ)N—ó ÒÜ ³Qì*Ÿ—M“5^éQ¡:0Çâ6Mp}zƇØ@ $÷vl#n¸²EI"Aix'9 "@@@@@@ê*8ÉuõÌ£Þ      Aà$!A€€€€€€@]%'¹®žyÔ@@@@@@ ˆœä $ˆ¨«à$×Õ3zƒ€€€€€€“„      u•œäºzæQo p’ƒ @@@@@@ ®€“\WÏ<ê       DNrD€€€€€€ÔUp’ëê™G½A@@@@@‚ÀIB‚ºJNr]=ó¨7€€€€€€@8ÉAH      PW ÀI®«gõ"'9 "@@@@@@ê*8ÉuõÌ£Þ      Aà$!A€€€€€€@]%'¹®žyÔ@@@@@@ ˆœä $ˆ¨«à$×Õ3zƒ€€€€€€“„      u•œäºzæQo p’ƒ @@@@@@ ®0†Rñ¿]Ô@,ôP(Y «hÑ4^¼ôø]ÂîpÕ(&1Qf1ç¹.ÂíñÔ(»«£±±1âõg; ·,Ãu~š6Œ×\~xäÞváRYcôÔåºWô$%ÄE ³IW^Ú¤¢ªÂ’¿ºÙ–JG%-šÄ—©ô‹/HctùŽ2É ÅDEˆ8º¯W¤]TÑglÃzÑÂãQÄí7¶ 4¯LÇ&£,$DU¨Ì»6p,0U;|ýz¥ÚÙƒª¾×åÐÆ†Cr’#-&ѶU£êUëdMËæõjµ…¦¶¾¸Aáö*D 5u4a /øX‹hÖ(6¼Jkˆ¶º\÷pœ¢zä,W§­ºÙSØT†-FѤaŝж‹ÂñŒmT?ºBˆ*Z‡hj€Vt«¨ áàX‰"â£cË~L}‚úRÊ­#šò²ŽŠØ Q%bé2w»Ën·¿d„I3y åµ!Ê"D88úÛ¤ï¨rï§öÑ‚@‰ŽžÈ)1=0‘¯[l     ~\nr¶À/"Ä]î;p’sÊÿ˳iàËLNj¾½<¹½ybÈIåü4á \õ%©^®ÝY®ìªƒCNzE9F˜ A>‹‡¦QHÜ € Ê@€dšÙ‹ ¦ ¬                                  å&`µÎJ°¾6¿q¹ ãy%ÀçŽÏaeq»u›1izNÃÊÒ½ ç‡Vø:?ÜQ*€€€@ˆØáÉ÷äL{ꩳ!f Yœ«ˆØ˜'$ƒhõô˜ÁýIA9×¹hd/iʬÅË8d?›ûºÕ:&+ê»[2ÇÇ·ìOË+ßñ×ïGûüF› ¬A‹j®Ž~“s(&Oܪ'UF-ÔN’h‹Ùúä°ŒÊÐ矀ڑ™gs„ë÷Í5 »“l¹¤¾p;¸Ïÿ @@@ 6°Z­K\ó’"~ÍtžSβÏ9–ÄÄî˜-§à&«õñÐ>°Y› ×‚ºX­sâ,q‘»©*ÍE¼^g¹¿õEŽk2ˆ:Ož¢^ƒ]Q®kLô±Z€é¼VaàÌüÙôÝc }uë•7S¢þ ‡1¾ß2:ºÂ³ºë{gXØœdvŽ#$e¬$¤æO4 º“„}   57|#ãc¿ŠÒ@Ê,[ŽmN8œX_ƒšœcúöj<9@9N‡ãÆž¾¯æQ‚Å^ž:¯­ÉlþоqŠ¢d‡ê,÷}íxt„7T1ˆqÔÖmJׇ>>|ÛÒ±Q_–…ãÐ ¨#ó ­>£ñ»kèw½˜:&—·óÁÿ·L– £+ôÓQ#s„³3ŒTØI~õÕyõ¤óXÒô8MD:b˱SëˆÜIFƒ€€T{“§/¸\6¿¤¶G,5¨ÏPcf–b÷Ìyúé!Ù¡ïß fçX˯¸=ž=›’¼1T}¯¾&ÏXÔU6> ÕöoYœåÞÖÌ8K|ÔI(ÔÖ•|ï““Ýoé˜ÈUÕ·¶5ϲ~3ó››%ñ-qnL>EGRæ)ùî©o>{²,µ ü-££«,Ôj—LE;Ãüi”ÛIÖ.ı’AMJÔ^9§Ë}à ã‡ð/û    n¯Î\ôoƒdx—ôêm™,r\f ‡{vYåW_ßÒaú”4\nû ¯PÄ»óögž–îouÿY-e¡|J'E®r¾ø_Mû÷еã¦<º ½¡BÇs¨Æ+ÞäèúôHR<åuÓûÌÞP• –÷æ×âU}ôfµ0hzµxIqS¬‡¦ûéeý¯ê5¸©L…d\:ö·ä=dÙ (’¢ÆSYZ~Î'kö±^¯Õ^AZ|õ+/ÉF·â¢zËOõ—<…!•å=æüO¡dŠp{ì$o¦þ(‰Ž]·‹ÂhÙØ‰¸¼C?jõwMÌòÈì9BqM_:6þ4Ù´:Çš:º‚HÕˆòt†GF°—Vl_ˆ–¸è1tíŽìqýîË­^qÅ>ýÕLÝ¿à‚cj˜™Yß—Ö Áiu?;.N ãs¢Õ07:Ç'“¥îçGæªaTA¤/Íb‰P÷m¶|5ŒˆðÛ#¨JÛÌ6›ºï0›ÔÐì°ûÒ&oœÉaTãœ&‡šLÞcVAÓ¬Ô8£grú¥µ4-Îetª²²SVCÎï2zåZšË/Íhôʹ\.o>Ù †ní˜ó˲WFvki²Û§Û`ÐâÜÞ8Yϯɪùé¾È¡[‹3øå÷hù ºnMVöxõr¾ 2´<ªNW·.S¨¯ÐF_œ&«s~‰oÙ´y të¤M×cÐd½i^[ÔÛ»Ÿ,§ÑcEͧh6I|»gº(9äÍ é¦$oÔ£ãM!9º«2š¼GËO">Ê®îÓÍÛ+ë—¦ëäçë¡§ŒJš¬§å×u*š,§éöÓÃÊ«[—õ³?ÈF*„óòF¦¨û”Í[®f¿âg£^†^.™æËOEo>=¿–¦Ër…ù5Y¿ü¾r5Ž”¤é+´±‰V®¢©×žîÔI¬ÉSF*SæÒÔxÅûPäý@u}ªœf¿‡›'´éÜ‹ÊxËðé)N·V'ÝEãèÕ©å÷ÉøÕM³ŸŸó,ËRCM–÷©àµM+W·ƒÓôòôs¢§Q#Ä«‡óëçHÓII¾4jü¨ûô×ç+¿ÐFÝ&b®ÊPn¿üZÝ‚lô“Ñäu¦:c¶Ÿ-šNMV׌ÓÕM³Ÿ@‘å´B›Šæ'A_~½¾Ô¶Sãƒ/-¨þ:w½Lµ­\Í&ý|pUMÓ©±Ñl,¾ ¯¬^¦WuÑ:éõÑ™³ ßÔP㨟k5Éwn½z|uõ³?»Îœóëׯ\­>Â/ ÷BÙàüTC/#—À½ÄkS/WãXœ:sN+¼¶µrµs[„q@Ñ,<ÿúùcH’§šËŠõ힢H¤ö—§ñüØÌìúB2Ž£'ïãô[‰f:ì!Óo$ɦ(Û‡$Ë ¹’²]6(‚Bƒ74HÔˆ#×UjHO?Y2²+K!Ë8$ÿ”äH!ɱ<ågyÖã¡c™âÉí¤_¤ªÇ+ÇÏL–×ä’#-ä~’êÃÈú9]•™ïc¤G&5È&y²UöP9Ô2’)™Hêå“Ù£æ'9Òe ²5}öhñŠ$§Ö‡ô2µ^¬_=¦ôÑs%@¯·^dO奅ɧ»Û—Ž‹ü£0VttùÓÀ~‰B|6¹KTL‰¥\ˆjã—Ätý{7º™¨ûôSCú1øÒ|qÜÜ¢ï6Ò»PF“×ãt–ãVô€ôæ×ôБ_~­\M–§¾4RíÍP.e/”ÑuëuÒdÕr5;)P勳ÑWÕÊkk¡nÚÓâ¼iTD‘c–×ãè6¡¦qË‚ã½[Qy]VgÎ2>Æšú1§‘JU—δÐ~o<ËP©Þòtnšœäã®ÛÍÒœE—e-MÓ½r^y_¹š=t#õÕÑw.Š«¿V'’.Rn‘2tîz]5¹ü ûϵ*¤ÕßǽMp~ÍŸ,—¡1ÖϵΓtû¸k6ê×åðÕ_/ƒÌ÷Æù±Õã‚ê¯Õ•Í×uúë)­0N·ÑêñÞü^[|çD«§éåósuîE®1_ý5ÝüHÖ6ª’ºÏ6Ò3’î ?0Ýôx¤Žnm£^ob@w-¿$¹¨{ƒÒT¼÷:1ðãÕïXgîÓòi²:sNÓëÉå‘-ÆF]§^¦7¿–O«?ï«§óæ+ƒk¦ûÕ?°\Ÿ^Yy• ç÷¦QçªS/—’¼Ç~×HaœWVçȺ©sÆ+Ïc´ùÊ >æMÑÊ£*ze49­‰f‡&ãñ³‘ZDj>½,]ç§N1ošÏný˜šlÚF"^R æÑdy_¯?uŽ©iºýM–eÔf!…MÆcàöŸw£N1¯NM^á&$mº,ïëòzœ‡šÁÏ›/NÓCmb5¦çúd¨“SÝ×Ë¢¡›Â42Žõx¸‰ê¯Ï]X†¬¥¹5Yý˜åÝ´² ‡eÈ~ùÝz~7‘¬Ñ¨†¼¯ç7jq.—WÆhò†,ãr™Ty£Ë¥†n¿4£–æ2yÓL.³*ãÔŽÕüN-ͬ¥iÇœfrjqf¯ŒÙéTó;ÌjÈ2f‡g±¨qv‡/Ín·{ã⣤[Æ[,Ï]"4ÕZ™mËÉ›UžNe*$Çãvßôì¸!¿zKÀßšL`òô…—dy75|ËõNò€Ég"å§é™8ŒQ‘tÁeÐUyíʱQGk2—êfûc³ úR[v¥Ï.E9ENóL[v~êjkƒbÐ üíjy1’ìƒX·vÊÓV¡œdV0eÊÂx)Â@+>J¼êc‚¦Tq»Ýу„ßóÀ   •Fàåi Ú˜Œò×Ô‘¦.°E0³iÅâ™åqŽ hp/Èο1 ƒ–ƒãª#À úDÆG}E)M©K¬B«[÷™•×,B’ž%gy0uÊ|—uè¶õÖ+UW›Ú[Òc3óo¤‘õÏÈ9¡Þ1å8Í1›nwç,XõdSšr]úðÛUï èè*[m’¨hg˜?‹d=3;ËÂ,¦á’'èFQÆsκ=®<;nèϺ B'«55Ʊ›FóZPÛcŽbwÌ|æ™ágÂYëò5¸ øNr¸ÙV±¾°†í0#ÿBzÓíyš@c[66jtשÖ§u>|CŽ ½m&¦ºsN,Ynme+OE}¿]ïJõèè*Ę'œa\ýr;É:;ïrÛ–ÇiþþéTAöYêq χÚõ2‚€€€˜2kÉ›4ÊtÔ®H3­c»O8Iqƒ[D[ÌÖ'‡e„S/tU ëkó‹<›£²Ú¦ge]â*0d¯|6.³jjTûJ¹Ôˆ þÖ|9½‰´-;;}y¸Fæ}Î2:ºjßE\£°w†UØIÖmT½÷ØÈQôÞße¶œ£Z­Vu!=!€€€T„€êðÈ&WU8DZyAÊN izNÃc¹ÿËúÔz­=þ-\]S7œŒßµa½æQ¢cUCã·ßŽÞ»wØg¸0²ÚHrà%6b×ô›¢dÉ–ôØo¯ï2$?P¦¦WvgXX¸ŒŸ:5V54,Ú @@@@ª/z×Ù°CQêU……%–Ë«Š²J+ã#E‰ [¢J“« é÷Í:Ö²kÁÊÚý.-­>w&8ÔµÇþÅK“%=;üõ>ðÒÎ+Ù~ýÿÃIÝù§c?˜@ØdÓžzê,¦$ƒF Ô(´”ó¼Õ;ªä_£¸ÀXZI€G~æ­^{^Î.noçmÎ%â›M «ÜiKœ'~_1û›JŸúßóŒÒr­ß~ðÔÑÖ¡žªQ»”»^|Ïýd¨ùÎ)Oí„õ³m¶éïìN™Z”`´Ä;¹:ñ†¨Òi3˜²M²1½2ª’~Ø÷­[Eß6I7_Þê’›¶ºáï-ºú%c·Æbâ PÇ Ü5|ö¸O2˜F«€Òw é+Ø@@ ØÑkäïÑîã-©Š%¿†Fß{dðSSz\çœÕcØ,|úG»&xtë‹ÕÏJûÍÞ–¢ökÑ%áfINÛ“mûò&RÁÖ’ o*}Q8;BD¥‡Wk°6s„Pµ(cƒÒµ€ìY»m[ D.}¹JL÷ÿ\h€XH‡jã@–Ê´òtHŠ!^ôÛ0&&·oi_“4fLzx•k»wÌÒõOn±æ­·§Öœ˜°$לªÃRs0È&z詟>?—âA@ ö(ãÈÏ]cßzÈ‘s&e÷Oǻ՞ÊW¼&åݪ –<¿U’8mÅœRKkˈú ,‹Âå ëEÈúN Í–#jGEª®$)|žšÄ¹·T…Ý—ˆÊsȪ¢¬Ê,#É•IºA ¨-=‡u蔡ª !ˆügŸÍrÚ—Û¯jÕìÿBÌ ñ•ŲnŒþ¤Ð,ýpqgi^éR!Hãåy½ „ µCtËþ«:<4èú9GS ’ÈŠ±˜>~ëyy€Ôæ {‰5¤™ÿzì¹TOîÉd–ã¼ ë'/™?ÿµÀ|ÝŸ{»£ýàç«„ÛÙ’ÓL&Ã÷··mÜwÔ‹/ÿ(«Ók¿2îMáÎnþΪ%·ûw„Ы$ÆO˜âÈ9&~?ÿDïAãÛ¹=JlëÆ™ë_™²L½õµ~x}ö¾×ëåEš¥íwˆîUÜl™Òd“Èö܃_LB}>±P1XÎ*ž³±«—¦ÕíêñôŠ» }¹^R\ ôÅ/Ñ4!bâ¢Eó¬zÝî8aa}Ço—]ÞaJ¯Û^:ÂΪI¶¥¿óöªKtºle†p’+“.tƒ@ %àLù–m¯-=‡!Vâ uˆÀ¦.¯{ÛûoÕ¡JWRUÃÎ’œ6¥ÇàJ²¶tµ²lÎÞ®(Þ\(¶Ê6[{ÎqASiÆK=-ãôÜOý®\yæ¿b®'Ï¦Ž²ÉgÖm·Ävéu½ô¹.3d³2Üù›-•}Ù dÅ׋\7µA+*ûV~>tú²Øá+ý öÙÞäBÙPõü zí.Œ’Îù @ßUÊÂzy¿Ý4gh›«¹,ý¸ûÖw¬Z">Ðmº¸¹qâ ÝMV–ñßž9©\šñ®Xgp¸.¢@˜"c÷:òlÂdŽ.2rÏï>ç}-Ö<6r€\âÂæ1't—¬¬«?Õ->Ã~ÿë,]}Î-þ5ìMñNƒ¨ì}¯ôLð±ò/»:íÿrôdÛm‰Úœgwt<[àLêö\~’r`”å\ŽrZU:!19‡§§[Èá5E6ø¿¬œ“)™®i=z'õ^·zzNXïýƒ^žn?°-…÷c#£×¸•æNG~§­?þº—¢‚_!'ø^zÍ€‡·o¸Äs·++ í‹Çô‰Ì9¦ê³;=ííÎ3êµé‰wÿ?Nçò²þ€Ò=".&vQžC\VàÈëôÖ¶Ü#kgŒhÛ3%Õ÷úDYd3lÛ¡·sró¨C€/uð}í(÷l;øy²D³ëÅ5žqúlöcdzœúöëuݪ•owa›<öì›rQí¿Rd·ˆ°$l¶Ûœ9­*78ÉUIe@u$@ïª<4dâWö_£ùîa2íÍ;ñu‘i2Ûè&tCö”ÖsØÿ¥®<³Û\…nôœ× y²®nÓÅ:i¶¯aÀñØ@@à|p{ä„ÑÏ­ìxèà—« W‚DÊ¥Í-OþÆ&ݦWÓ Æ’O¼Ynuÿ˜éúq›.k»!Ýp‹ýÄϪÓb’=é‰â:ê#1¥ÝCY¿®kÐÍ×¶ZöSÆH)çˆÚ°½á’w•aè~qÔ‰ïçÎ]Zt4FºM|bK»¦¹Ó'M_ê³S··Ù›óÑèmgÚ0æ©+[&Œ´Nšê»÷–6ªÃºî§rŒ/§¼#ÙΪ U#Õo@§¦» ›tN§L”0ºÅ:õú†Ê’óòÖoÆŽKÏ|›¶ÎàrÓæ²)a¯Ë‘#ŒêëB^þ[’íœfž8jË5-ò_¶j£jœçÁ‘S‡›ÏL~áNóÝ×|ã$Ç•¶åÛZ´_9Û–! ›ˆŠKØœ›cëü×q%eæ{žûÐ…¯ñ¢[™+lìèˆèhËv—,rì9¢ó?·í8v¦ U³z‘é‰k•éîßl)‘&‘n޳lÉ>#zœÎ´%ÏJ•²¥s^Þ–mòꉱÈßÛ…I¸m¢ýÄE§ŽÐûÆ&z&ûœi¯´÷/•u[¾ÊìÊ7±Í+fÛhÁ±B›ÿ8êš@6ç±Ízþþï)Ož\m›ÆmÌ)dõynsäEër½È/ØmK6¾ÆMfÏýyÔ6áéeû¯›2ðÒ.rŒø3ï7¥óä´tÒÝRuˆ—ˆ-NÒÕ»K|Ê+º¢j6ˆQ-[ºÄ7*ú@ò$«1+}B¿—3Þ!³U'/Ð|ËÚ‘{¹3ã†KŒw¿0e¾> eÜýýÆo"T'ŽëÛeÂôU›øÝ~Ó®i„º ëýµ­I’U=—<ÜdÁ¤&×­÷©V§}SûdgÅvjã¾uܤe¾ßµ.¸õaËéEô•$&»/¡÷õç.zÛÊié¿^ž6Jû7nÏqÚ¿gì¹Ôµ;ußÛ»³÷ѡꘗUv˺ÅÒ=#S‡[2¾Nݶܷ®´Î[žy׋ɱûæÕ+Wê¬ÆÝßgÜgÙäÓõk`ô—ý R^Öio« *Ò¢€ÔO´œß¨¨²­nÌJ©2œ(jj ÜŸ˜ìqe`2²è&”Pàpt |ÇèåžÃOÚ3Oã©1ÑÔ³IK@dåHIôJükÌ=‡’ÛÑ’÷¹çðlNfJ®Úƒ($¾±žþñýì GšÍÛÍ–ØÍÅðíüϪæá|Ø@@à|ð(Ñ ‡ìØA.‡0˜"÷ºÝ¢å/‡m‡'÷±êvI9Qþ÷GýøÝÝ¿îs‘ƒC÷8Êû½ÓmhùÖ¶3ì´9oi÷P–Ñu½¹ëÛCì óý’[ȧ²³¯)È;Óãp†;•‘,«o÷ÍYœÄ÷Ö+›Y¸1[t£û{69ôì ÇFF­á{¯“F’öüvÐçLk£:[%ÅžP/®Á E2gi£:>™®4 f|.ÙÆr¤YÞnެ·ÆEõ[¼-ãÈ·ËF5*Z¨vÄ£[”ÇY“tu ÏËÔð jÜêõÕóëÇ¥±dù†½ödÖî•û$§½=­/É‹Q%ƒ¬:m·Óç´•fûÇ­…£ÀaïôíA±U9 Ø×7ÏÏo.gü–jðdÅ—ÕAæ|¼]ÕÖ2`Éõ¤9¥.ËF[L6ŠûýXþpNËO™Ñ ö5‹)~ö`é¶T’1>jiÅik¶ýÙCC–¸KˆÃ"uDd«Y}¥!Ë—êÝ2ÚÒ¸Y½¿§sº¾E˜òÓIOÜCÍW/*]í¡r…ˆ¿ž:r‰.òâbþqª7D%ÙÌòó•æÆÃ¶i²át•iYB6-y"VŠ$»üõq'@9ôõâ²63ƒÉ=¥ql¿:NåÔS ¥·K›ÎZ,ß:Þ$EQV×ëN#ë±ä ·jzbÆßI¾K½Õm¿Ï Ç_ò¿ž?ZôœÕ&›Ó³m‘jíåQähj5Œ9»ÈÏAVÅ>^1µ+±î=ê˜Ãî=s'sø~¯œ‹t™­4‹`ذç‹tHí:r{«ûz<Èí-çp9¯¶©¿¿À÷õÝ{R©X=çjGã+è~hi3û©NKi÷Ð@ÝÚ˜ne]ѽt6½³(·º—î•&1å™~ÿô—ug|7KЊÅþS"õôû6oÆým²n]½bv/¾÷^›¶HÐëzµáɘ–Ü¿ÔQ¾·¯X2eœzo7Ço϶™} ~Ûº1;x€éá›bÛ¦½µà¶wVLë%ß4¢-—³âëSý9Ô·âF·^ð¡ÕåJ KcÉN¬1sÿ4ƒdÏÚ0ÉbÙHÏ£ë–J÷-*â´q¥ÚN oó³[ðû“½'&má<ÑŸ¦üÈ#ÓïôJ»‚˺E˜¦?~¿´Ü'OŽ“BžrÌÛFI:;»B/ÿiÓ „º ”>rF3ñBÔgçÁ×ù0P’Nútj;sìéHzÎêñgïò~Bˆ«^—f3ëŒZ'^ãpÁ¨Oé}PÉ÷ÞíÔÃz§‘´A¨Þ´M‹:@ÍD(mˆ¸‰ë;xöC§ãÕñc⹞ÞQe.§ºo[ n6Ù< mh†‡§Hœ~{ðxÞï~¥xUó…tý¹è÷¦{Y7›äœt©ÛFß¹ÕÓïìÛa&çûËÝ7<]Ìr <:}Ç*/7]ê"òåå ÞàS'ŠPd½ùò„¿ãÍq’+ïrGmÈ<ÍßkîF£Ûš3ö¦Z§ÚU¬¯²¶@^•Uô‚T3Ü»åÌkoÉÛ<óËýÍs™ã2ü?^2y\à‚J\‹ýe¼ûÁ·”/ºIg×­XF ï´!–ûà‰–G¸ÙPü#Å« A@ * ˜ä³éÉ)VßhÖFºw‰vC®bÆm=Þ·$[R:ev¼¬ÛS¾†­tíèõ,¯Sµì÷P!êEæ¯7i^‘i“ÿyµÛùžùÍöźì,FÒìž6Å=®¸ð‡±ÿ¢ÎL‰ÓhDÊÓM³³,£:üœ°Ðs"Æ|vû£)3÷óÈÿ|lûƒ^ׯh‰!Žnͬ•Æ2úËIªÓ6»kæ%þ Nñ¨œþ*«í›‡Eu$´ž˜ëˆéÄ ö(bÚ®¹;±8Ç¡XƒCŒLùFé8d±òÙ × ”A³mN+â ñ8¦iÛCæäœy•Þá œ=ÀÅ·ßâŸMDsˆf”]CÓìâüGŽC)³¢²ê4m·‹F¸KÞ¸Ó[6œÈºõÁ‹;qгIøFi¹ª QE]Û¡Ó%ÎÒÐåªshÈ?q‡þ›9—[VåAé'’ȧL×ãµzFr‹ë‡ÒÅDr2ÿï—ï·î9ìÚ8ý¹Å¾ì>Ç;­NwϤ˫è+±_}Òž³e{òÕ{Y(²ç(Næ×í^|qÈ%ÉT—´à'Bu± v€T*½w³Q”ºô`‰e=`ÝrýñUÉy6zw9&Âðùts¶›¢i™ÿV|Ï!KÜ“4âP½ëç¦wÆ7ˆߢÍÝj/²9`mØê@  ¥²ÝC½µµïôÆÀº›:Nxžãÿ$b(‡¦Ü£É1æüíRg„S´¦rn¥éÚÑÍÛ%ºi‰|›»ó–ÝG÷%ê=%Ê2ª£?'Z7‰ð÷v·ÐèÏUEFî»íÊÙzqþaEF·üõ»¯Ž~•>©¼¶—æìkS"{|ªt‰rÚZ¶jž;‘ßÙ}á!éµq´ª5{’N¿Å¯§éÕ úK½–<)™Õw-bÖûÔs^†bÂ.Â׿ÝYO}µª4ån9¡ÏߤF]&}3„Bÿÿ¾¼´šµBú1/BVÜh¹ž^ÝBÝiÔíêòܦŽGv§‘Î5Å„òL~7ýç¦7_Ûx`Ôüþ‘”vA‚‰¦k anvó üZEÿþ}Vò±ÿ¦(ÛŠ l^ßdó±­kç5`OzûyÇ—ú_é/_–}s³©¼1dð£êì _ZS@9ð#§H»ò LJ"Ëwb º‘yë!ì»ÐW–¶C3TŠÔ/0ý|W;ƒÎ” u™ÀG?‰nTõ­sPèPú>‡®Ÿ×m¤vØÒÞMãu›¦N)¼{Ô¢×ʼnoúË·ÿÏqow±ÞÖ²E‚sâ¼EË­^™Ùâ~±p•0E©8]ˆª$pó‚‚fÞÑIS…Š­È=T/øãÒÉ»‡eýyÚôìðçÞÛuäÀÇâîK#F¾­ œ#\3k4Š®F+]§oùíñ7-š¤Œ¿«ÏjÈ–mTÇ,NŠ)/ý™Pf8F·Tò;°Úè—ºn@@DYl¿“>ƒ™sXmÔ;Ý1-ÇŒL>kîš"Ïŵ!J§Åeœĩ{?§åî¢(Q¦Ùü%%£6ÑqIý̘*1ï>±bÐ>ñ¦AKײUi _Jïï­—öJÚ±iB4ó}¢©+Í çsÁ-¼™//ˆ=õ6NXudáľ-†xc½Ù¢¡Jud4i¡ØÍù~¯~RV¯ø˜õ½°â··hu”^þyªëþÛ»Mû 3¾Y³«>ùéðºlLa ˉô¦>Äã,2ònhÓõVq`ãŽn«Ïä ™2"¶ñ5;~ùíûÉâÄ·iI71gÞêéoÌâã鉛î¡ßø™‚Ȥ‡“ÜÜæâKgØ#êŸ<þûÔGz©—c+—-ÛÄC»ê»Ñ4Ã${ƒWur·?šönš?¦E—a³ŽÃÎ’…/ùãé½6Ñ Š¬ã9"¥Gïw]ÞæšA¿e{.9“˜œÆç¨]s%QÑEÖã¶Ó “81l؈'Ýõ¯ÝuòÝ“Þ[µèIJTË;•kI¦úÝÓŠêwÚn¤ú¥Oí–Ø£%éµ3€ŸÏà*Þ Gi›Œâ@ÂA`óÐÈ¿¸û@†1õ$Mß+¢óÌþ¾czÿŒ{ÓL²=]w9ÍXp¢&£­8X|Ï¡!ï¸Ú0xøšS‹5yÑ…¦n³N£\ú(¶ž!€T.à&Qüö'?ç2§ßµªÜe—ùZz ž oíÇ«p“ƒ¼ƒ×^\ä¸ú´Ð'SÅÊ·Zu›ß[:c‹l…ÔNô #IDATB‹S±¯B V©£V%êð(?'xìÕO×ójòþµR½üy?£[:¥ÆWOÖF¿Èi+Ü:lô:músYmçÌ¢s§ÄÐgŽyÑÉZ©ø÷Œ˜TžÖ^¨½ä=E]p+XÆ?>²½ø„%Æ,¹òÇ¥ô^òs4²Üh¶-!þx0b9’Ò:‘šsh¥M'™ Åwœ§qLÌŸ:ø{OÅlçŠ÷õ·…ãuÙÀø·îëy$ôã )cç~¶ôWåú1ôI¨Æ³m9þOÏ£ÏCÍl<"5óÐú^ò ÿUz ¤)æƒg¯¾¯Ð—êMß¾¸éáüŽóÚzRz^‚eÍñ¬æI[~ͺ^·¡:†îˆH‡×.ƒÈÈÌöý;÷ºrަðBsú^-uYä\éw& WÿdR—Ïå6÷ßÊ¥#9©¿üøÙ^aËêkQ6oH[ÍŸIò­Ü¼…F‡Ýæ¸íN·±åÏn=uô—4þzH‹qêh//†Êv˜-ù*S^÷E¡ÅçXÁâm'pZÐFÎ4 G'ÓÔÑh*Ïæ”ÛûóÞ¯sŽþH²[ÜØ&êÖɳVyí ¬²‘ŒIå»ÌÑLǴ㾤Y„ŽNz¹¬C¯ß~µ~ûÕú]Ø ~¼.ÈO¯êÐwó¬ê‚Q€Ày&ÀËéYž(Ž~‘öØêF9£F=9ÀwÙo¦ÿ0Wrd«ï¢¨ò»pÔÓhtǵ8hèôÖï²èËï?›!rS¨ILxò1 —n:WÏáƒ/wúDdþ2íOüøÜsSºµ7¬—¹*y#wþ~R¦†˜ïÿ€8ÏTP<€@$À£&<‚ؽ÷À=—¶¼|d†;Îräà×[Mn‡h—?C_mVÅÑ_Æ{è¹té:9üïkI>¸M]éâ_£Ò_Fß¿÷™ wv;xtë€A£f\Ð⊵¿8yG~æádõòÞsË6ªcnó‚½·ƒW¤M¦´Èú­w=qâ_ß&&§¬Ñ¯mÏ”•ûuÛCÝÒóèöŸ+þ?3]oâiÇsÌ)I}ÝpIëëS~;‘yGÁêä"N3ç/Õö±+x“1“ÕÖÝóö[Btž{m$}:둵±ürv+ÝŽ’BcœØ"çn ”15ïŠLñ0Ç/§éÇÖ*3Äq‘²K›n|QSËŒ¿œ"ÖÙ$yæ{Ž ä0Úé]ä½{lü[uåè¿]äßëöæ›ø˜?#Ca|Džê qoy´JH…ñj¢ö'ÐÆÀc]Ößf5ŽÚ õÅrvžø5‡>×´ëÛ×EŸvʧã&õ£Õó®%ô_bÙFÝßöî¶måxv¤Û\ÔZ½Vµ^ÇÄ =ÛúF¤×<*úô-’ÞÝòËéˆáüÕeã™Ô‘A­äùˆ>¯vìç}æN—ËŽ6mÚØ¥5E­Ü²ìÅ"#éœúþ¤‡>§@Z¸é¯(£'Gصm®¯^Y4/ý8=´TùmT–aóæc– ÏA’혫·“Ø)f=ýÜWîâ¸MEùÕ õ„‰å\T*oöܸfŸuëxV½Ë)»©‹:Bš³l{#cDŽkX¯³|ß8Öê§—×ØçîÜùbá×QP ¿@Sªâ¸D UaÊ8¿þ5*µ¿çÄž7u+ ’'ËM eJJNÂÆu«Õ{D÷eJ£¼O’3 {Õì´èÈõ]?üÃF#-³!m©Üu³bqÓ{Ë…2ü9(ï‡ä» š8¿û©—Ñ8Î<#Ó+r3’[ÒÇíçh·×Ó‚€@Uøgï'öÄÐ*Îe^BŸÇ{Ýoõž#Ñ}nôöw[y”WPn@Sõc=?Ë ’{B“+Ë=´ãF%º8]ºN=¼«ß3›¢l7ô•‹Œ^éézøï{.u}µp7ß^3HެŽš\ä›D ñûŽÛ&kßÖå.l ŒO¿D¹â¸‡Ÿ^}—ãà§[ý¼zíâ^T]ÅqèL«oKÛF‘Dظî-ÿ¬j1åeÉ™ï''ÅðâÈ_eaS•ÑZüj»†¼ú"[×—¦¯RJN+Éö¤O/¿?Š:ŠóÜ7òáüYš·s^Uâh½^x!;ÍhP¯- ““Äõâ#zO÷~ê§aŒ¿¡i×ä Ë$“§ÇqüùÞÈq3îóÚ®:xç²G¯cõ>4Rè\y  ÕMóâÞÍ¥6”8½Ì+óy¡ åãUÿ qÏá¼Õ›©ãY)Ò(â‡æÒ ¿Æ8ð‘O~å%9ÿ[þª° UB@¿_q¸I½~Ë+¾†»ð²ÜCK+ó^úLQÞI{J“ÓÓõ{ï¦Mßð`c±[¡Ì_Q%ÕÛkÑûx± «(R·;pQ£âНn¶g#â@@@@@@B p­¤Ëßò¥O¾t !DA@@@@@@ v¸á$jÓ\4‡G‘»öèGÞáå®´P Â×ãZ@@@@ÂD }Ý$útL^û®‚ÖÜ¢­]s)Ñ‘›05  ~xAGl    ÕÀõ]‡ÿ(dÒ×[ ´½(rīӸ¥ª†æÂ$(™Àÿ¦ •L½²}ãIEND®B`‚rocksdb-6.11.4/docs/static/images/data-block-hash-index/perf-cache-miss.png000066400000000000000000001267741370372246700264550ustar00rootroot00000000000000‰PNG  IHDR`ø,8Ž£sRGB®Îé@IDATxìÝ \UUÞøÿµÏ9À9"75mÔLkÌæ×…jœyº84O5ýÓà­ÀsÔÌ.*2æÞJS4Ë+Úe¥ÂòŠS*ðÔórþÓóôLv³Ë4dŽfÞòˆÜÎmÿ×:p耠ˆà9Àg¿^°÷^{­µ×z’¾¬µ×‚ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@h±À¨ÉÓS'=j;Pº®ä—éBù:ÓõæÚ]ªI‹îó‘Þ%ÞjÕM}pò¥Þß[¾áÏ@‹Úå­¬…{Ù­Ñ¢2=NöwÚ„k½N" €À9†sRH@hsòÒãJ+]V]çýwøA›Íe³Å;.”¯Í@7h®Ý¥6¹%÷y0kÎL£¼ñœß˜s.õþÞò ZÒ.Uׄ%oRÁÒ£c‡~æ­ûBû±É©Œ’el6›Û*÷Ç'ä×ûYÔ4½:8¬ðXYð}¯¹P}\G‚¿ªòS€´Pà¾ä]zsŠî\9äÜу±T¸/\Üå¹[}¿pææ4&òœzæÖfõ¥û³Ÿžë¦ÚßL»KîêÅÞGŽÉHe~°¡â€»£øbïÿ`òâGBŽ~¾îɘÊw$n?à-ÎÏÀE¶+UŽ ~õhâ¶²=ÿ[óK_…ˆÞœºh²óèWéšüÑ‹ ‹È7÷>#Wþy YÞt`±¼%÷v¹]T9U‡ïv©?%=3×TU{eDXšÝ·âóË`Òxü«t!œ";¥<4#síÄÜìlC¥,ãr‡FeÄõñß:køWNyòåQûRo{@¦kÚ†+ €@ ÅÓɳÙûáU&·+Rþ¯­ysüìÌ/¼79~R~¨ýø ßꦂ°‘ã“ò¯p¿êÎgîûð]5–a48K&±ÜxgâúÃÞrO¤½u]Ùgy¹š³:Z¥ ¢äƾ¦‡g/ݯÎU=½ÅI¡GýìÍâ£E넨•!ÝÞ·TŸúUvö6£oÐ1rÜã5ÛOÅÈtƒjƒ*ïÿÍ(fNM™¼ÿø‘tõ‹¬%“2þÒÿÔ®×>­±"ÂâÊY“•k«ß· ßÇ›ßuà=9¤¦†æÌ}Ñ›Zc}…ûHxæÛîö¦>4}I|ðá϶̊qõ½%qëá‘ã'曫®/+4ï·ŽSrc¯ŠisV¾õzßoÍÝÿäÚ³Gu)åÞÄU‡7öÑûººÏF\¨ì=k¾é­\ûFT§iƒó+<ùåtCã¬ôaâ஼-ÿ´«¾®®MwW›Ph²»bäôD“üQñ €@`MÀŒ´½€&öýn]ˆ   Áá…{E̶=eŸë¶nZlv±º¿î´_årep&Ôt<Ý鬼ªÌiŠ.“Á—Å °‹ 9"!ú¿\XvH=›ã äèE™ÍV¤ „[Ì9v,ª*ÏX÷tæÕÕív„Ÿ¶bÄÑ"ÏÿôËFèAaÿÕå¿z噇î‘SÉvÊâb¨\T"$ÍlpœàKµLßËà«KK ~·©ä„Ͷ?XÆ“Ý"ÂÓÎÚM½K+O«gòÆÈ~úŒJ^à>ª)µ›^rd´1¿ÛZâMS{]~fÕng½ Çé°÷–}Ð…1HåÑ‚‚Žˆ*u¤‚I­Ä-ôùizŸ†?5¹j¾Çå–]aÜ6á¸Ñ`¨wOuÕívDf6ÉŸ¥Qß¼R[¨¹Ó^¢öåÝ¥ŠÜt¥q{mQÏ®¸ÿ]Ÿô:¸KØÝz¨oºÙw‹8^óÙšQ}…Ø|À÷Ç €õšûoqýRœ!€´Š@ïWÚÆM[£Ö¿ñÚÝÕQW/P•>÷n¹Õ[yÍó?Þ3&ÕlWGé ÞØ”= gÓ*M–=jâšüŸß«=WåHE¿ý";;Ǹ6km]ÖZ›½ßÃÔµ 9nP{½6 ˆ0Wäx¦9nÕ‚³pŽºö¿ßÙW¨½gË^¯¹»?Q›0»«.Á`ĚϮ–=U± }Mæô7²VÛ¤WPý૦«ç½Ï9žØM‘Õm ?ú >oeþq♞?KRIÏ ùáÆìM›ÌYQó<ÕùÊuéêÔåè¦Á`øîÜ/ãRŸ{\Ì¡ÓQÙCåØK?ä[.\†é¾çÞcãù×’ñfc Àø1@ü&à+2rgxoï´-NSóûŽžÑëMk¸¯ùŸq§Xœ¾Ù¬©ë®+¾Ð0ß²¹©Éñ}eÆ}Öe¬êá(é®ò|{ªúµ¯©Ç%2²¶TçjÛv¢Â`,©tZ¢½+Úøç|5ªóijo¾çÉ0ßœbé%l¿ï–£ÊzqèüÙS¡ï})D>h×ÈÔ¹ ßÇC"Gƒ•Ñ“kOcA\sؼSR*DÏFƒœÆêÈŽÕŠ·lʉڰaÃ- ¿6mÊY;’ÚXÙæ¤5|Í^z$¬±rzT¿ÏK' @à\ï¿÷ç^!@àr¨Ñ 9é«<^.{ÐÌ­®œ¡vñßr SŸ™l°ÙÒwû&ÊcŸ¹uòÌsÛzIÆ^7Lr%¾Õ¯ÆÞ/öæÿ·ež­7su * NT~ùO[n0P«N]û+GÑ{ïw²r™mÞËfNµ`ÙêÍ©tã¼÷©Í¯ùã/šj…ó³gÿÖ¤ÖǨ·9U–%Kº¾£i©ýÙyG´ÎV„«Âº-ôš˜cbÏÆºsïÿ3á•`\XÀ¿/.Ü*r €GÀyºû±0«”ÏÚõ/¥ûê¹ Ãñoe(gqƒ ÊÎÎ ‹N¸¹9u¾•8;G5hÏ1ËôÇþ¸à1UæA!O7§l åiŽÁúÔ‰»räÊ~=®ýåoÔˆà÷ÅÚü÷Ó¾®EýÐ4·CN­t£Õ³x ë°»µþ¾incP;‰-8þlÍ?û>x0oß¾}ï6ü:xðpžØú]T ªz¿Ûwªrïì+©·(IåÇënWé¡Aú µ÷n®âƒÍúùòægtfs~Itf úŽ\^“X:cLœ÷žÃ²žš©†Z®ê¦Õ¬.ç½p‘{Ç©/CU‘zV%=˜’úšZWR|°³ª‘#Cå2(³›b*ŽÉ ®J<0kë—Í*@™.h ÞÙ¥§š„|^îåEÓvž® P÷—Uö¼´nx~­þTËÊjžÛ r”š`h¯òÃW«šºÏ©Š3õFšÊ§Ò·'þìÀ¨¤¤ð”””І_II£Âµøuõ¥šºªì9ýgÍš2T×uö»ï=¤:±¿Ø8_”êÇÒ³úp½:˜}OØŽš”šï®Ú±Õ.ÂåðMç@à\f œkB  Ðæ&ƒ©TÝdÏa×–‰ãGµE©*þn‚J{6A¬Yô²:jÙÔýÆrUò«ã¡é §O;ùÍǽ®ƒxênN¦ký{QôÎû*ï•aÎL9ý°^@Ñœ:üçBCS–³Éå÷§LŸfìò“/ïÙ²Nµùßz‰ý-m»;¬çvQ\-þÓ)DÍ*–ª.wdïMâä¾hÛ&ÇéÉ“&§œgK© ÊêßI»~èÛâø×âõOBÿþè„ÄÜkÃOýyÎŠÜ ¾Øxø!eõkª—š>¤ëwÎ 2));m]ÅÄ ÖV9ªz¿þ˜É¦ Éò”ÿv×~&_‡ðÙ둸ù@…\Z>Ô.bÆŽ‰;=uJßçœ>ñ´Z²?,¸ªP‹ÝæY¥Ó{'CÉ¡õ¬àu“¶‰ÞTö € €5¦B Ð +‡Ô 4#{½,NÏ*„U""êڥŇ狪Sòº.~{CÐ/´!y þ‡Úó˜–OTÿÜm4ÖNg«YÇ\-¦aKN¯ËãëÃǶ¨‡‡Efž)+™`6hžà¬¦1õë©IbÛü±i{ÇóBàI·š69Tã-p‘ûîÏ~Úb·oU¿ím`›‘ë\5vÕÉÒªQº_¨_†?í)’ÉwrýxutþûÔËÛ{p(þnþsïV©U,ëF1ó^^´usU•=V~ÕŽÆÅ™²2O@~öH¥ZgÃóódêò¹°—ÅxG±ò^_ûëøGOÈ×#?yl™2 7; Ödm–ù†Ï}䈙ɦ^U`W¯*ð4Ôç*‡ € €–€œò%§Ãémò1U¯ï²fõ\–‰³Zu«u˜OÐ׬’™é|òš¡Õìe]ÊmÜèᎢ5÷^µùüòˆ€¼÷9Á±·Ý]SxÜ’í7Ye¿'}$ h `~ù>À h àWùŒ–Þøè—Þ,UïÅN!šòü ^ÛC̼ôø¿†óÈkîV³—uU…]™iwwé_÷¼—O÷›{¯Ú|~I’÷>'èö¶»±kª{΢wŸW/6˜˜ŸãÓ]@š k†d@ ³ è?|í™u¨žEë¬-í·öðÊé*‚yæÿ=ûxKëhWå䈙©êtì•aöLm ¨nWm§± € €‚À_<ÓÛfJd ô6 € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € p‘º®kÍ)ÒÜ| ëÚ±kGXÃ4Î@@@¯@³oæv¹—A×èÄäåîÒc)Þö÷‰Ò¬Xê=÷îÇ&§>â8Z´Î‹nv¬ÉÚ2\Ó„;aê3“ Ç¿MWy£,®œÕY¹6o9µ6>)?´êtlöh­››]ì{c@@@@ ::Ãð1üK_Fƒ³$"¢[šKvøp±>føöýÁ©‹&;=Á—."ÃÂ3U¾3U†Ø±c†²èQ*øŠ0kæˆ+ÓŠ+Ö•ÓŠ÷–hîêûTðumûL‚/¯ {@@h(С°¸Ü²+º¸íý†Š’›¶F­Y“>=wa¶YWûNº—ÕaÈQ2ãñ¯äè–Sd§”‡fd®˜›m¨”\îÐÈ~… „ÐEFVöÈukVMWé%ÕŽ«=å÷ê!Æ}ï¿k4”—,zùÍåuur€ € €4èИ8u$Xõ÷úbN]¿jÕ•–9B˜EYîø+Tú=k¾ém’û¾ÕiÚàü O^MÓý† SÇ%ÅÇî—¡˜:ô|sŒ%*XSÛˆ…ýÝ(ƒ³ ‰¢Ÿ'o € € €@;«í´Ý­‡Öë·ÛÕù·§*=éQûòîRç7]iô¤«cµ÷¿ëµïbÚ/„Id-xø®)¹G¯èêvEZ‚Ì'âg¥µ8+£oëSž  É+SyÙ@@@¦:vÖ½·]u|ïÉeïdü¡¿\Ýа(ãÏ}‚ïÞ¢Ò»—CíŽÊj?°—~Hí½[¸ÐƒÔ±E«ø¡Jîw9Þ?¹-ù¸&ì"¤Ûý_îÊëf®*˜±"?weFFŸ·s_òŒ¨y˳G@@|ÔÌ»»m;1r|RY.±¾ð‡ýë k.ôÌK”½®FO€å0{j÷öÒ#ay¬é'ômÙ;Œ —®º¡«(Óf-üzä˜GöZd öËRŽ[ÍúîÂB±[æ}tìˆ=Y¶ßÒ ªé-*H!@@h±À Aƒ¼ £·¸Ž¦ vì0ÙëmëÒ‡^}çÍÝ{öLêÙ³{ÒðÁƒ9Ãze*­vÌX»äÙŠpolæñ ½&æ˜çÀ“YsÏ›üEòìg¿ž4g®E.î‘Ýýæû>·™ä£aSbnèëì1`f¥Ó]”×§® € € €@­@‡ó~Ê«fOùB«/ñ¢ü>zÜý&¹ˆFô¤­GD¢\ß°ßí;Åñ¯Ä;ûJÔYªüòl•¯»]½Y94H?Q›$Fe|Ð_+|i~OËÙœ³s¾:z\^˜¡òÀ‰s‹]úkºmY…pÔY󖽨}[FÞÛò#€ € €À¥ tø°†DñÓ—Ä«¥é{Z9òËž)~Ûî¾÷:Ø_lœ¯ër`¬v :ôázu8ûž°ž$¹\½£ð%¹ G•xáµüѵÙ~ܕȕ:<[ÃÉŒ?fá@@:¯@Ç“‹nÄÙl®ža–L“%lω’òý𞵚ü “]¿˜UûÁÖ**ÆM( µ‹˜±câNOÒ÷ù#§O3ÙpüÛt•?ÊâÊY•kó-;l|Rþæôͱz­››]ì{c@@@@ tø°¡20rËàËdp–DEu_ Á*ì"Æ–v¬Ü÷GàÁ©‹&;eð¥ ]D†…gºäÅ3U†Ø±c†²èQ*øŠ0kæˆ+ÓŠ+Ö•ÓŠ÷–hîêûB«NÇ^ÛÃ>“àË«Â@@ tì0]7˜m¶X£¡ªdã¦mQµOaµê"t]Ôè–<ÐŒ6›ÝrŠì”ªPmðæ ™6i„Íæ¶¸C#¯.\ð Ì2²rFÊüNY>¥¤Úqµ§¾½zˆqží]£¡¼dÑËyËsŽ € €x:ú˜î–=u¹õHo‡Õ^7K|ÏïYóMo‰ö¨NÓç×LKÔ4ÝØoÈ0•¯¤øØý²uèùæô”wªs1bác7ÊàlC¢èçIà € € Є@ÇÀdå²ôÌÂ"Æ~hÿçQ þ°¶‹ÛÙÓâÈñŒ~I˜¨}yw)Ÿ›®4n÷u*î×'꼋9h¿&‘µàỦä½¢«,o 2ŸˆŸ•>Ô⬌¾­Oy‚6$¯Ì·,Ç € € €@CŽ€ÉÞnÏzÑVi²ì±»ƒû/Þ´é´¡ì‡ Såž³¶×-¢átTöP0{é‡|Â…¤Î-ZÅUr¿³ÈñþÉmÉÇ5a!ÝîÿÒppW^7sUÁŒù¹+32ú¼ûÒ¾å9F@@|:ö3`²§îÒÃÎdŒ­y^«¶ç•Nsôê¹ß7yÑú¾fßyl/=f‘{M?¡oËÞa\¸tÕ ]E±˜6ká×#Ç<²×"±_Þ˜rÜjÖwŠÝ2ï£cGìÉÚ°ý–Uµè´¨¨HoQA !€ p‘Ûß~¿Ñü.j”…DÚ…@ÒÚmÚÎ&ÿ}Ø8Ê?÷mÅ»4¨Y¯¯jÉ-;ö˜\„£4Ýv&HNüÕµA¿ÉÉÉÑ‚®½ã72¤ïïs¼[–;Þ3be¬] òlEx°/bè51ÇêÎ5Í=ovòɳŸýbxÒœ¹·½Bt÷›wìûÜf’†M‰¹¡¯³Ç€™•NKtQF\Ÿºr € € €µzlÄšÏú©ˆêʰ³™“å{F»Ö/zrçÐé˺þdËšÏü»¼œ«÷»}§8þ•xg_‰š–˜*¿<[åÇënWoV Òëþ|0*ãƒþZáKó{ZÎæŒ˜óÅÐÑãò •îLœ{XìÒ×XÓmË*„Ã3u±¶šïÚ2ònq£(ˆ Ð!–®zµÑYü.ê7ê$÷%ïjô¿ëÖê~Sÿ>œzæV¿Ü·µúÕÖõtì0¡žÜ"È`¬·@†w˜«Úáô<ûµíî{©Ÿ’ýÅÆùriúºáÆ C®Wågß¶CíÕrõŽÂ—ö Yï ¯åö¤ù~+‘+ux¶†“}3qŒ € €U C€¹oºý”(|I|_jI™8Þ6¨oŸë^üöø‰8ûáO&¨|N\÷ s_”ƒµŠŠq Cå šÇŽ‰;=uJßçœ>ñ´IN] ®*Ôb·«ü#ûÃ&¼=2@® è‰ìa½Þ´—ÚS–Ïš6ô“·’¬*ß-7u9¥öl € € €€¯@‡Ë¢•u7È-ßÛu¦JýǾ¢wíe§' Î’1ƒ»Ò†dՌ彾ö×®à.….·)òøÉcËTðnv¬}}Û¯X\Æ÷}̕ǭ¢ÊÜ‘¸ù€JSÛö”¥s²þOË•ÅÖA=ÝI¾õÖäâ; € €  ^nÕÁ·WRüFv1J— rȽúrkrA›t\¾3,Wˆ»}ò¹d>}íºš|['õ=’Ûý7=ãã­{Ìse Vý§ë×U™5sŠ € €x:|æýœUÐ%Õ×y·&óÉÀ*^ˆúÁ—OMµå|R8D@@ê tè)ˆõ»Ê € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`èæ« € € à_0ÿúsw@@èD`|غ®k$_0iÇ®aÌD@@è´¦ŽÜó¸Ü²+´mO24ì§Ëíê?5F ¸#qsݵ±É©8Ž­³ÙlÂjµŠp³»`MÖ–áš&Ü SŸ™l8þmºª'ÊâÊY•kó­sØø¤üÍé›cõ[7-6»Ø÷Ç € € €€èð#`áÌŠ”}­ýÒ"ÝnwM¦Ÿ.uÕX=8uÑd§ ¾4¡‹È°ðL—,p¦Ê;v̰S¶=J_f­ÀqeZq¥ÑºrúCñ PmÍ]}_hÕéØk{Øg|Õ˜ð@@ÎèÐ#`[ãÃNÈ.Ÿ3p„Õª[d õÀ}Uû<$rÊ¡Ñf“£[N‘Rª Þ\!t}Ò›Ímq‡F^]¸ á;™?#+g¤ sÊò)%ÕŽ«=e÷ê!Æy¶w†ò’E/ç-?—˜@@@ F Ã€5ü G-úÓ`‹Lìf./ÐçW¨ë÷¬ù¦·ŠDûFT§yÓ„¦éÆ~C†©ë%ÅÇîB‰Õ|sŒ%*XSÛˆ…ýÝ(ƒ³ ‰¢Ÿ'o € € €@.«ú:«²xâ—æI^“¨}yw©ã›®4n÷¦©}qÿ»>Qû.æ ýB˜DÖ‚‡ïš’{ôŠ®nW¤%È|"~VúP‹³2ú¶>å Ú¼2•— @@hJ COAlØé¸Œ¢>¦ÂÔþÁ†Š·$î8ì½îtTö–'{釼ij.ô µ·h?TÉýÎ"Çû¢(YÎi´‹n÷iøê¯»»™« f¬ÈÏ]™‘ÑçúîûñOªi­²é­R• € pío¿ßh~5ÊB"íB im«ýoi£ýmò߇£ÍßZ‰MÞ·µn ë4hÐ91µVõj¬úÓ²ܽ c47H´—ñ,Ò¡é'ômÙÙÆŸEÿâæ_Dÿôæìì?~ýÁVˆýòÆ„”8ùLÙîÂÂC¶}püѱ#>kP § € € €€G ÓŒ€ Û¥‡…¦Ûb„¨§nßåûùkƒ<[®Âê¶Ðkbމ=kÎ5Í=Oˆ/ÔÉ{]©¡n{ÿ„èî7¯ß÷¹-D>6%ææ¾«þQa«<¹YQF\ŸA‰[ëFØê*¼Èƒ¶Œ¼/²)dGèàKW½Úè¬ ~uð¾uïÔ3·6úsÚZÝèþì§m6êÑZm¼˜zîKÞÕ¦^MýûÐÖŸSS÷½æí4#`†3ç+èzêI Áõ~·ïTiïì+©÷n¯Ê×Ý®ÒCƒôºñÛQô-Þ?¿§ålΈٙ_8J 6T¸3qîá­ Ï­Qù+„Ã3uQ³!€ € €^Î1¦ë† ›-E-b8ç…™s_ôv¿f¿íî{úè5±¿Ø8_׏Լç¯A‡>\¯r̾'lÇS+å\®Þa³íUâ…×òG¿˜US¾î{‰\©Ã³5œÌX—ƒ@@ Ý ´õÈÊΕC:ÔHT»ÿÀé@« tаáxv’ŠŒ® ³gª÷x#8X«¨+Â,ÆŽ‰;=uÊ´q£Ç›åj‡aÁU…Þ—+|ì›ÔÅ'cBxƒ4cX¯7íî®ý—Ïš6ÔúV’|—˜·ÜÔåÔ9÷ @@:½@§ÀÜ'÷ÍÃWbå°.³›úÄó^_ûkWp—B—Ûyüä±e&|…›k_ßökU&.ãû>æÊãÖQå îHÜ|@¥©m{ÊÒ9ù^°OË•ÅÖA=ÝIÚ,–¤¯áá; С–®|u\‡êA¸ìb bþ¦7(ÙœœóøÊ/ç q·.§+Ê\ê˥ɴµëjÊlÔ÷Hn÷ßôŒ´îy0Ï•ZõŸ„ˆª-§«25%øŽ Б–.}åg²?j¦Ã©_ô@àò tŠìbHeå–ùÕWýMVñBÔ¾|rÔ–óIá@ £L[¹Ò"4MþN„v”>Ñ@ÿtŠ)ˆþ¡å® €EàJ=ì%Ù—;Jè €€ÿÀügÏ@ÚÀâ¯X…A{¼4•&"€´°vð!ÑD@ÿ<»lÍ@ƒA[럻sW@Ž(@Ö?Uú„ pÉS¦¼d2mš»äʨ@ZáàG@Fúè²R_·6r‰$@Z,pØ_tÝtæo³TùÆÜ»÷uU%%ß™ Šª8ðê÷’‚ €\„Àâ´Ì‡dð5ù"Š@f D–šöÆuû‹þ7Å^^Ÿi³EþØòwë­V«1÷ô¾¢ë ¿·ÝšÛ{ðÄŠº‹ € ÐJÏ.e€¦‰×Z©:ªA@ ž€ÿž“/<~xÊô£¬V½è£ÿ*²—•LnW]ðe0JÔ—ok«®èýGK×¥¤½W>~lÂgïdÌîï{c@.E 5558È$¶Èw;F\J=”E@ )¿€ {r~ŠãäáešlY°AèÑ#|ù¿]?hGܤi?È_|Nß뺮eý­¨ë?vnþáàþÉ®Ê3Ö*§!z}áýº.Œò/•ç¾8Ù·Ž@h†€9¢ÏRMh¿hFV² € Ð"¿`æˆ+?¬þ6sêÃÑÓ¯2«¬®õ‰Éu‡ÞéòXåÙUûe›¶hÍàS{ÿš&ÏÕ56@.Iàù´Ìa2øúÃ%UBa@. à·,wÑä²m;32³ÏÛD9úå&©×bžü«æNúHܽþóç" €X²dm?ù;fÝ3’@Kð[v¡vÊøK}×kŸÚ|å˜8>®`MÖÖarÊ!£^ä: €@³äs_&9>GhZ·f  €— à]º„*Z^ôÁÉ3SŸ™j=w™_ùÌ—£0s¿A.Êl00™L{„ÁXr¦Ê›ñ¹Ã2ØQ#Z¸È iÎssþÜÜçÎ);糕yG'>¹ÜUz2EÍ]T[¸ÙY°vÝÖzï+ßAæï ³ÙlÂjµÊ<î‚5Y[†«·„©ÏL6ÿ6]•U#t«³rmêØ» Ÿ”¿9}s¬^`ë¦Åf³d¾†= à'%i™wÊàk‘ŸnÏm@hTÀoSÿñvîÔm…ÿ:4ntÂþE³¦ =òÑÚ.¶Ð'QŽLRÓÞ¸nÜ„¤µV›Íý¯¯öíV rødiôp˜Ìë–Á—Éà.‰ŠˆH3w)ÕKÒ~øj•ë×7 ¾|3ݳæ›Þ ¢oDuš68¿ÂsM>fœ•>LÜ•WR|ì~!—Ë—›ç›Ó`,QÁšÚF,|ìïFœmHý6ª×{²!€øU`ñÊW§É ók#¸9 €Mø-ó¶gIâÈòx¢úÚ±ëŸaÿ³s{tyñÑ›WOávv ú!8Èub`Ÿ¾ÿóİ[¾Ó>à º–½è­á<{9e1Äf‹ 6¸üSÖ½nçæè §Ó|mÓ·ãSxKFíË»Kßt¥q»7Mí‹ûßõI¯ƒ»DsÐþÒ³&‘µàỦä.-:±-9Òd>?+}¨ñà®èÛú”'hCò/î6ßqŒ Ð*‹Wdþ éKkÞVÒ*UR  €­*à÷Ì·7Ç\¯‚˜]µ_¾—<ÇRÎI:·òÅbr³»-ý¤/8ãͼoŸãF?ðƦTšÓQÙ#XîöÒyó¨}¸Ð=å-ZÅUò|g‘ã}Q”,Çéì"¤Ûý_¾úëîn檂+òsWfdô¹¾»Ãþ@ü“'|ëà@àò,Y²6B3h[dðåù·ûòÜ•» € pq€]\Ó/œ{ÄÎÏ~¢:¨¦-^÷“¨‰÷Œøí;_wß[¸~¿pwéÿŸKânúÝì­_xk2{j÷öÒ#ay¬é'ômÙ;Œ —®º¡«(Óf-üzä˜GöZd öËRŽ[ÍúîÂB±[æ}tìˆ=Y¶ßÒ ª]ôRø-º…@ ðá'ÿGŽê=9ÚêwÃö·ßoôÆmu¿FoFb»HZÛ¶onòçnã¨65jò¾mz×¶«œÏ©å¶ƒ ò.žÞòJš(yÁ›(×.’·ßwËQõÐV¿ˆÊç­X“;D¾£kIâo8nx誟uýJíµ )ž­Wau[è51ÇêNäRùóf'‘<ûÙ/†'Í™kqÛû'Dw¿yǾÏm&ùhØ”˜ú:{ ˜Yé´DeÄõ©+Ç €@› üëÀ‘N|µ9$7@hs=&~*ìjép©6NîR½šFËUžéˆîš55„ÞïöâøWâ}%j•ĺ|•¯»=L&„éuæ•ñA­ð¥ù=-gsFÌÎùbèèqya†Êw&Î=,vék¬é¶eÂÑ*Ó_Ú2òöZ°GÚ»Àâ´Œ[ Ór¾BH{ïKsÚßV¿–®zµÑYmu¿æô•<+p_ò®F^Z«ÅMýÜzæV¿Ü·µúu¹ëásºÜâÍ»_‡“/àÔÕŠê0}×0KÕl_®KS‘fÓ÷j¿íî{©ÿš÷çËwƒÕ 7úp½º>ûž°j¯–«w¾´_ˆ*ñÂkù£=i¾ßJDm@Ûp2£o&Ž@ZK`æÒ¥aCzî«S_­åF= €þ¸0õ²eùâL·—$±à»¨Ð÷^ø:_>ª¼@œY³L¼7Ãö¦ŸÜ<^ý|-=äÌÌ©S“•”'èU¥r™û*ñäÒmùžâƒµŠŠq Cí"f옸ÓS§ô}þÈéO›Ü®È°àªB-v[±Ê7ò±?lRsŸŒ  ßæù Œ1¬×›öR{ÊòYÓ†~òV’Uå»å¦.ÿ!ÕQ6@ÀÏ݃»­•Mèçfp{@š-P#`#ÇMø«ÍfuÉQ(O»ârË®(Ù4ûôáÃǶ¨/[ZY¹^`‹jvïdÆ·V>õºuÕ!§¾?~<]·—ǘMî=Ù ûÖ¦Ê{}í¯]Á] ]nSäñ“Ç–©à+Üì,Xûú¶_«ûÅe|ßÇ\yÜ: ª|Á‰›¨4µmOY:Ç!ß öÉÁcy¢²Ø:¨§;I’¥VsdChC%i™OÈ‘/ϾÚð6T €@« Θù2Ùl1ÝÌÕrtÉ3V•?õÝ®²»f“¾§ÚZªÛ+b~ÿæY5-pèÅ(d¯^–*GÖd&×Û­FØÖmhPƒœ®˜+ÄÝjN^Q_.™O_»®&ßÖI}ävÿMÏøøGëžó\¨UÿIˆ¨Úrº*SS‚ï €m%°dåÚ›4¡5çmÕêE@ E€É'¬T®Žr¾©öÃvéa¡é¶h5Up݆mže݇Y­z°]¿Q]¿Ø­60r^¨œ Îdžº)uùe`/Dýà«î¢ü¬Ï´IŸd@ZY`úòå¡B3¨ç¾Ô›BØ@hWž©~ÒbÏâ_ý ÂØ«‡Ö>æy!É rJŸ·}“e÷˜= €@çènŒL—£_?뜽§× €í] p09‚T%,µb¡mž­Ê⬔£_N1ç…™ Yˆ™eZˆÉt°½£Ó~@@蜀Iÿüõ¯ r! õQ Î’)1–¾òy0Ï´Awöœ5D6d€iŽºÎ† Ð9N¹J’t¡Ý9{O¯@Ú»@ =¦¤ræÈ…0¼¨7ydp–þÜÂo¿}gñÀ¨W{±!€tR3f”?·<#Þd4}(oðX'ý9 Û €@{¨0…X»š`§zØôéÓâgÌHþŸ–nW«²!€tr93¿”KÚ>ÙÉè> €@;¨¬-ÞÖ?šŒ Ð §¦=þªü³œ8Á† €@ûœL½Ì^vÎ{À¥z˜VûlXí{ÀÚ0-Eh3SöÓeå{ÛìTŒ €@+ NÖÈ{ÀºzVBTïÛ|Kö¯Ý].;¶…ïke7ªCe³f•¹ÝŽù+„çƒàó   €¤Œ÷€]øó" €@§R?uëzrƒdN@HÀ ÀxX@þ€Ð(@ =<•Ñ›ÿBû„©©“ƒN=§ ƒgšcM~-ò¦ŸTNœ½âÍ\ßòc“Sq-Zg³Ù„ÕjáfwÁš¬-Ãåè›;aê3“ Ç¿MWù£,®œÕY¹6ß²ÃÆ'åoNß«غi±Ùž×8FY)“þ¼8í•çä¿íÏN«h  €@ghÑ_ÿ.΋óSþ|Å5ž Ç{ÏåÎxïqsö'÷Ïp¹E¤Ûíê/ó«¥î#]nWdÃÕ=œºh²S_j¦cdXx¦Z*ëL•!vì˜a§Ôrù*øŠ0kæˆ+ÓŠ+Ö•ÓªkÇCsWßZu:öÚö™_Ž pOþ÷¿Èi…ÞLš‡ ÐnLÏÎØÖÿà‡o¿¬Ûl±•>èáfCÁÄßxtã&ŸÄ Œ¥ÂíÙÙ[j$Ë›}³÷@íå”C£Í&=™/¥*T¼¹B¦Ma³¹-îÐÈ« $|'³Œ¬œ‘²ç«5¥¤Úqµ§Š½zˆqží]£¡¼dÑËyjäŽ @ À¶nÝêºapŒÕdÞ#4­G€qÚ°ß@IDAT7—æ!€t €KÕuÓ¤é ãã¬Vý»ÂÜýzÕÙXål4¸K~Ö'"!;{BÐÚu›†Þÿò‰‹÷wË߯?_ Ëß³æ›Þ*íQ¦ ÎWS …, û ¦KŠÝ_ûúÏ;dœžUê’±ð±¿ep¶!Qôó$ð @ ]¤ÎšrDþv+{ÑÏü¶‹ÒH@€ð{6-#¿müùE6›£ôð×[¼CrÝ‚3m1×ôݸiKÔ¼kr5íßk"ž‹d¬™j,VΚ24uÑ¢Áë2÷oXEÔ¾¼»TÚMW·û^+î×'꼋9h¿&‘µàỦä½¢«œÂ(ÿjz"~VúP‹³2ú¶>å Ú<ßðÅãhO%?þ_²™ËÚASi" €@ðÆ;~éŽz¶êØ&Û!ïÍÍ&}Ï-×öúý“ó_ü›±Òë=üåÍt‘{Ï•,³ûàÉ¿’«"ÞéûsO@Î%à×Ìñý7]| ËQ¦Oö—<7zÌè~£G7öAETèÙÿþcæ[Í^ñçSÝyõî-=¯¸j`éß—E|öá__6T•ÆfVÉQ-KùlfŸcuh/=f‘{M?¡oËÞa\¸tÕ ]E±˜6ká×#Ç<²×"±_Þ˜rÜjÖwŠÝ2ï£cGìÉÚ°ýUþR·¢¢"¦Å\*"å@fTTV‰¿ìúT8žlÑQÿmÞþöûJ]îþ&­mÛ¿c6ÙŸ£ík%6yßÖºÁe®‡Ïé2ƒ·ðv|N-„“Å T/NhyMç–ôνz™Sª®èÚg­š˜ï%åvµú`³°ÚeíÔvE-?tøè1Å]Ü!‘ú®aajê QÔÌÄ<[®Âê¶Ðkbމ=kÎ5Í=Oˆ/ÔÉ{]©¡n{ÿ„èî7¯ß÷¹-D¸Ä”˜›û®úG…­òäþeEq}%n=\W €-ÐÅb·Ý|øûÇÿèvÒ8@ö/à×lkâ Ã;ví ¯>r¤áàS“²C®ùGùÆMM^oι$½Z޾nÓûݾSÿJ¼³¯D½Û+Õ{¡òãu·‡É“Ð ½îÏq£2>课4¿§ålΈÙ9_ =./ÌPyàÎŇÅ.}5ݶ¬B8ö‡MjŽâ“1!¼Aš1¬×›öR{ÊòYÓ†~òV’Uå»å¦.§Ôž @ } dfNt<»ü•„ £þ©\”#¢}µžÖ"€´¿/Cß–Ho'ù—îRXåtGŸ*.žo¯<#$] ú‰iüó«ßJõ½wÞëkí’y]nSäñ“Ç–©à+Üì,Xûú¶_«|qß÷1W·ˆ*_pGíê‰*}{ÊÒ9ù^°OË•ÅÖA=ÝIÚ¬º7•‡ @ ý<3ã‰ýº®?Ö~ZLK@Ú“@‡“/Svæq·ú@ä/SO_å_4_âJ¾x9Wæ•ùTPª¾\2¯¾v*-ÄÖI}ävÿMÏøøGëžó\¨UÿIˆ¨Úrº*ãIç €@»x*e›KV½’.§"&µÛNÐp@€èؘy“—Ou(ó©i‰5S}¯ÉÀJ.¿X?øò¹^[Î'…C@ö,pø_•)}t¹SN9¿µ=÷ƒ¶#€–@@NALM˼nÚôYñS§Nž¬ï¢Èä“¶wïÛžãÀ"¤5 €Qàå—Ÿ¬v8 ræ:ÓÊ;âLŸ@? V¶WiµêEý¥èØáƒ[Ž/Nß•wèje3,eù¸yó6TýiѨÁ~²â¶ €Là™™“öºÝz³ß=ÙÉxè. €@ *±ð±¿×,àîQÝÒTºv ±«}~âŒõpÕûÿªJVçl €\§¦?‘#_QòÊå¸÷@èø€ÉÅ/‚œ•ÑÁ†ê99¹Úê5éÓë­–!»¨4(©Öïèø =D$Ä™©r*üÔ&Ú‚ Ð>iÏê{T/÷PÊ€L³Ùꩺ ÆRáv·Ê{Y¥¥_¤…Ü#¡þ\®¼1HÞè§Br®˜ç¦šœó¯‹Ryü­|­H.²ñ±C¯þÿæ¦$}_¯Qœ €txUÉÉ•K—¾’ ‚ÄîßY:ˆ Ц€iª§K‚änµ ŠÜºÕ*z—Ã# Ÿ³Ì³E ƒá€ç¼ß/Î诅ÇȵGË€êzU…ÜׯéÇÓî2 ë./^#3ݯ2É€M,YùÊ?å’ ›œnû†¹Óÿ]ýœ!€tTY³žøzéÊWY–¾£~Àô ¸L3Q\vùBã2»9fÒø¸ü´´·®3J„mBoKÍx§ÿ¤Ž=¦Î{…io^¬Í’´5ÑKW½š«…˜öišáYoðu±õ¨üª¬ªÃdù—ªSÕÝ’z(ƒ Ðþf%?þFûk5-F$À À¤J·á×)œÒ*SìGm-RƒQ¬Î+*\¿?ÈíŠÂ.–flŸ­ò4gK]žÑsñÊW^× ¦Ïdþ8<µZkëŠSu«{¨{5§MäA@@Î+ÐjIk®‹;aIÊ7Z"<+þX§.º…3³³o ’3ë­ÍñcžúG‹Ó^I°˜‚Š š6®þ•Ö?S÷P÷R÷lýÚ©@@:Š@ =æ1Í¢©^ªÕ7lêåË*QŽ6yèHÏTgçß&LXtíõ†—d¡IçÏÙêW# móÒ•¯üzß?ÝOffN¬yv­ÕoC… € € Ð^j¬!¢ ¼¼ÁWÃk§¦¦w½æzcAsƒ/µ¤°.ôtÝ »·¸*«zT– Q_êX¥y®yò4sùaø©6¨¶4ÖFÒ@@@ ó ÜØì‚Ï£Îü†UW4üX.WèOÂÊ>ž¿tC~Ãk*à1G„ì”CfÿÖðšï¹ ¸ÎÊ1µL·ËùÚÓÓ'ýÃ÷ZƒãSò\}í‘_¹êÚó+ÖüƒÑô˜\q‚&´&,9Mò~ÕÙ¦ûRS“äýØ@@@!*‹ŸõÒÐï6-Î;ßSZ|F]öLMôæSÓÍáÆ7/|¹t]ü±Jh S“?í-{1ûÚ€-%uå«Ï™…˜'­ßËòjqÆs6ÕsxÈ›²m±LG<‡‡@@:¥@à`òy/Ýfó_Fƒ³¤W^sÌæˆï«]Õ–?£¸¡Gù×99?¦¨£šg¾„ç]]õ¯ÔœÉ©†ßèºsÔS)‰Ÿ6výbÓj¸?,NËxCÓL›å4IÏê ëQ#až¶ ‘Øðç € € Ðù'«µ7*J6nÚÕܳòàùÜÐõ‚ª3vk[LTœføsKxpŽ|î,¶Ñ6˶É6¾÷TÊ[½N" € €t@Z„Ã3­°‹Iÿ¼¹úêÝ[råÁ5Må—#_+ÏÙÁ—÷žªnuu/oZýj#ï k¨Â9 € €O p0MsW›# Êì¡1«çÆßל"ÄhZ&óÉ47²yF¾OMMmÖ{é¡ÙIêUgòžMЬmk—IF@@Î à·)ˆC?Ò»\‘9qU°Á Þû%Lqöhiåêøý}Æw“&<šé0—Õ ¢w×3Î[±1wIÚšhùìU£/YVÏ|ÕL;lûàKµWm*“Ó­æðà{&L½¬Y¶yÕì”IjUE6@@è„~ ÀŒ»ÿÙ³²¬lB¥z°Ïñé²Ê Bø^­¹XZêY1W3˜æød÷=”«:Gµå´Cß›ù«{Ê…9FiZÐn™~ÎꈵmŽ÷-Ã1 € €t¿`·LºþPé>ÃBƒÌçFYMú›ÄÀ¨o¾9£¿åzHŽ4“S-5ßZ«žSy3Ô½—¬|õ²iSfWm^¼8£ÿSO%hxs@@èø~ ÀR5M=›uÎ •gdô÷‹Ž÷œZá˯Ò{‹SŽq‰é‡—¬Ìœ+ƒ¯ú³efõ’eõž/ßrþ8®b¡Yè5|Y³§Í!Æ1²M‹üÑ.î‰ € €þ8'ˆñgs†Mš¹âóÂÂý«³>šÙ°Éôÿ*üêÐδ‘ò[Úè†×kε̖¾d¹ñúZ–ZÓ-³ñÒMµ½ñܤ"€ € €@ÇœL¾ˆ9¨ôû!ªÄ³éçŒb¹?6H±쵤ˑ¤ëûÜ.çk¥û#­©¶¨¶/JK¿Êmâž € € à_À À¤ƒjÌÍ=ÝIòù)wC–m)÷}S®ÍÝ£^Sçòùª/žž>é]óGšj‹jSc÷ÒBîi,4@@èØ€yVÔØ{Z“«6²íÕCÌ2ÙÒÈU™¤ïl<ýâSeàtîêͨfÇ®aõ³5Þ&·p®Ÿ3@@è €©1KñJ§%úñqñÍÏXÔGBùeJM{뺸ԱÇÔºîfK˜«‰æ›H¯KŽŸµr¨ÕjÕ­Ö‘²ZqN569õ‘QòºÍfs«|Ç'äË|£„©ÏL®)kÕ'?Ÿ]WiíÁ°ñIù›Ó7ŸÑ lQ>×m“œ†è™Né“C@@è€Ilcís^åvcL¶\pCB.ùå(úhk‘ÉíŠTëZºF•4ö¹¸\Ú?K¯K“œ8¸;¯æÜ³øc½ìÁ©‹&;­Óä="ÃÂ3U”w¦Ê;v̰S¶=ÊpüÛô³V`ޏ2­¸Òh]9ý¡º÷y=4wõ}¡U§c¯íaŸ©Åf{ïÙd›tíZoö € € Ðy*SÏyõ9£§Ü¥°áGÐU?ÙIýÂݺÞðš:×Õ‡K÷¦xìÉõç¼Ù{QN94ÿ*]§ÈN)ÍÈ\;17;Û ^Pær‡Fö+\ ‚¿Œ¬ì‘ëÖ¬š®ÒKªW{ŠË©‘Æ}ï¿k4”—,zùÍåÞ*Õ¾É6i"Â7Ç € € Ð9üö°¦x_Ž¿í„¼v·º®¦ ª½œ²çY”ã•uBÈ—‡;yP»ýd™ÊÛØ—QÔÇT˜j½*•öC×_|h?üáß|÷¬ù¦·‚èQ¦ ίyÿ˜¦éÆYéÃÄÁ]y%ÅÇî—¡˜*âùæ4å(œz™#>öw£ Î6$Š~7y’꾩6Y,}ëÎëtÑàY±º+ € € €@¨°†Î*ðò_ ¯]̹}׳_ªØiiFîÌn½KòQûòîRi7]iÜî{­¸ÿ]Ÿ¨ó.æ ýB˜DÖ‚‡ïš’{ôŠ®r:¤%È|"~VúP‹³2ú¶>å Ú¼&@ß:9F@@Î+p#`¿_ôò`ç‘¢X·³â'Cp½ ÆîvË‘#—]>-Üð# î¡F•N5LHIÁwEÞüçxµ¼ýð©ös0§£²‡ªp`/ýoùp¡©s‹VñC•Üï,r¼/Š’åœ]„t»ÿKÃWÝÝÍ\U0cE~îÊŒŒ>×ww؈Rày¶Ú6yOÜk¢^¿–®zUÿñbû;ñÀ¯Z¥ÑÛß~¿UêñW%8ÔÈ〃ïƒü<´þÏÃmÑ׉~}zù2·ÊqSÿ]ÖßQIkë~¶J¿VÒd6Žj˜µUÏ›¼o«ÞåòUÆçtù¬/åN|N-×4hP½õ"Z^Ó¹%*6ùéÔS_ýÏü›©ž¶ª¿9Õ® àsb(*…ô‘9ë`r £ÁfË3*JžZ¹ãuU“Û\3ÅP=ÔÕ`SËÜûnöÒ#a™ é'ômÙ;Œ —®º¡«(Óf-üzä˜GöZd öËRŽ[ÍúîÂB±[æ}tìˆ=Y¶ß¢ê1„˜™(ëz£ ‰¨2l € € Ðq'“ aÙlžà+Â"r‚»ýlû‰Ã_oÔ³K’¸â–¿üúïïÉ©!A¦äaÿÖð#1õëeÚç¾é&Í™§:Ø+487#ã>*lËßýwù<—&Ö>7ççïtÝ÷«Ø§Š5+Í‹³áõFÖB¯‰9&öl¬©RN‡œ'Äê佃®ÔP·½Bt÷›×ïûÜ"§7N‰¹¹ïªTØ*Oî_V”×gPâÖÃB34ºÜ¼ì꾚J;Æ÷Öú A{ Ä¡æç|ÿeãçáÇŸ‡SÏÜzÎþ|­šuÜ-I„ýmµèa/:'{÷g?½¤¿Ö6õopk}†ç4¸‰„û’w]ºSu«ä¦úÓ*ŸO î{ž"}‰Ï) ?žºÆñ9ÕQÔA@=¦ÓÓR™³&+ÇöÒŠy¹j™‹+Ì?|Ÿ:wÊGš¶¡Z£´øx££JòÒíò«Þf/=8U%)3M(,ü¯Cÿ%¿‚*‹­r £øëWÿÚýjÎ_=Ïxéýnß©ò½³¯Ä¦öÞ­òãuž:Cƒôºù£2>èZ¼~OËÙœ³3¿p”l¨–‹rÔLsÁ]Ÿ …¡v3vLÜé©ÿ{÷U™ÿü93Ã0 ï¥)Öõ+µZìJÔ–õûývñ– :ã]EVMðBe:š¥¨håAJ1SPÌP`k›¿Ýýo»inT–h®dâ/Ü„ærÎÿyÎpÆanÊefø_pÎyÎsžóœ÷ÃÁùržóœ¹ýÞ>íòkìåÏ¥±‹ÊË3ã•],ú›é;PÚW®é½ÏTiJ\·dÁˆcŸÄÓ»k„<4Øï*« «[wžÌBÝ_œÓ°@€ x¿€û`ô½[¦iñ&#‰’ž¡â{ܳ˜\9½6!·¤Œ ÚqnÊÚú~°ßÑà†=óÕ`’É3hBbƒD§™Õ*àÁúÊÛû™çmßòlÌÔ—¿ ¦ÚȲ+×2˜@•¥ }[îÈŒ ‰Nû…¾Kl±v`HÍŠ'âòJ¤b÷'&¿þ{ý¤ÇÎ^Ìcia½øx.b[õšO²ºÜ4ѺŸXšß ˆ\²àåÛê·ÓA<4¶†ƒ/aü<´ýÏC{?qpCD»ýNùZªã–!@ÀÜ'cxñÿ3N¾yê÷VW±Õ¼M«ÖEO›©0VE±u¥¥09-oQòÆ(:Š ÷&Kk8 ±ú ¼¥OxùZÃôkÞ]º™®mΦL4Ì¡/€¦ë–ɾ¬4ȶdÚríÝï|N÷zÅÄL·?&nÄÕ}LHHý~Û‡Ö¡íbÙ`7O‚ÓëšoÎ@€ @À;:=›–sª§Ï_R–D†òN çØ(ƒºÛn`ïÍÜ2bî?„@þ£9‰=ÕµÇY׿ի­;‰/·‚;,P²OáèÓWt°BòŠ=±• ´LÖ’}5œh`CHÃàË!Gý~b ««‹Ãfq‘i¼…7}䜎u@€ @ k4`:㔯æ.+«®¬L¬,¿ÚØñ7=F®—]«XV\jÚS3­ç«¯Æ•ЀgŸ«ü4@ûÃê”´‡]mëˆ4vlVWÇbu^ºð?»Ú†4@€ @Àû:5‹Î©îéO•²Ú’øä}ùrÓ»R†þOŒdÛßüËÕ%lNï%½Åæ.&9Ç)vëõ©7Ýr‘·M“Ø1Ù±i¡rW7QgWÙ‘@€ x™@§`–rÄ?6P1³9ׂ5sÿÈÞ v©šËò&%Î.âáCWûÑ;M÷ª•Ùz½¾ÃºX²c±c²c»ª«+«³«mHƒ @€º†@§`\UÙ¯ó€Ù¥p ìÁ,¹Œ«”òÖY-‹é2ÃÅÄqQªÀ¾™„±c°cÑ¡ó£\Ô„%UÔ×µ‘ÍH† @€º‚@§`òA/}Î?-®{»9ìñéß `ïà²òÖ )¯~Q\Ï ³¥uç9½5QØ7·=»#²²Ù1رœ/­³:²ºJë˜C€ @]S S0ã°{ËÙ‹¸®ý£ÞJÐMm¬ ¦4æÂµgØöÝ86Œ¼}z5qæ"éöçñN˜òëö˜ƒ•© T~ÝÄ/ú°š.Öѹ^X‡ @€ºœ@‡=#åJ6?œ«;wéb%}Ùò÷„̉¢7öëÝ}A¿¾ƒþ.÷ëQséêÕ;Μ==Óª‹ew¿èý/²b^Þ{+75,íô ~ÞÝ÷É念¾Øp‹m=—Åq>GÖløàŒ„¬lê=a®öwNcïù‡š·vèrÀ ¶ ?ÿ›ÖÍy¬C€ @]S S0F¾¾l9fvboyåùD+¯þùBeæÏŽºh ÌW…qƒHóÆŒŒYfÚp¬*È÷ }õñcÎÛë×éèˆd¾Š3Ölü ƒ·Z¶¾¶pöäu™üöúôÿÉ3ØK–]½çËq'zgïŸÆªº±ñfÇt,C€ @]W Ó0FŸ“ž²ðÕœcÉ—þ’¹ÄP]>ƒðÖ`©I|d¤¤OÀuÉïÜ‘Áqz6¢ËI¯¿Nƒ°áª@ß}Ý c;ÖN r¹"a͆÷鋟…ƒ4ùV+w‚3וšLWªY>¥²‡Fðñí+— ÷ÑÕÇéžÃé´Ál+¥©‰ÝùbÁ«SSù° € @èZn€1òÕ1\¦³…õ_,©Á´öÝ«.WXÀ»%êžûdïÑ粜CÚÙPÙ‚*“P¨ˆZÝOÚ\?o:ØrÊ,>óźâÎ×M2H€ @€@—p›¬­Z‚uG¤eÅ­Nyÿ ™ŒcƒsØï¦µÕ1)§‚vˆ7ÑA2 @€ @:uÄöôgÁbkìeÍmylv v,_m©Š² @€ à}^w̱‰êß½5uMJúFN¦x]„±´Ûa›´,ž–µOà-o½š8»Èñ¸X† @€ àJÀ«0é„“lRÌêÕi¡ÄWN_˜ÌM Á\£Õ ¼Nнv uÖI¯Æ•´ºì@€ tY ÀᯠŽûM££!6Õ’¯Ú¦U4ϪU)©wùp¾Ïñ„§ÁX¸{èà†AD ± ŽTÓåJ §iÐU,#²£f¡î/KãiêØ@€ @ 1÷ À£Óòñáʰá‰;N:WxúaASžª«š41¥‚n qÞÞÚõú@êCºûÂ@€ @ ÝÚäy¨6ª%çCoAm=j.>˜2ù^Ç2'ë·Dh𥢉J÷³ã6,C€ @ð÷ À8Ž·húd08[6F Âbf'®7qˆ¥ËeÆŠ­ËBc˘ @€ xš€û`T³Ì!V0Ä­GUÅZ­VWžOdëwYSvîÊ ámªcë˜ @€ xš€[` ïãÍkôÖþÃFÞ€´’1CeCRÒsÞHà @€ Ïè´A8FüzfÌÚ¨”ɪ²ñħö—KWì‰fr¨ôŽÿš={öBß}g@ÕWËÖï̱oÆ @€ è´L~äD/Cuu¬Á%”cL¦"W®”­•²UVV±E`ˆÃ|uÊûz™Œ[îä¶‹­«Ì^|<ÇÞ±’l97qøÉ:¿ntÞÖÔzÏ^½â³âï ÜúáŽgv}”ùP]¯°x–ÿÄÕº'ìûÑàO^v<• ÉJ¬ñOËØ2+'+KÆN³òþÁý WŒ#40KÛ–5&3}ãB–^Qg îJð•Ÿ>ô¹\VS±jÓ¾uö2±@€ @ÀIÀ0ŽÕíÔ5.֩޶Uè¨\nh:qÇ»ïnæ"Öڟ˪¸O{ÀyçÒOÞɆëT—Â…çÛž=£ÏŠÉûGˆï#«(¿ø" ÅØnâ7‹L^Á‚56^9ãŸrœ}Gú‹ ø@€ @ ÷ À8ޝ£•4XÔC_žó·ü´U}é`2ú¥Ð§|ro´~ÒE9Ýî«àÏ6r.Í&/>,h‚¿ù=Ëø«îò/¥BNç=Å–÷‘ï—ÒØ¼<ô©clî§ò9Cˆ‚l[1ù©¹9zðÖ`µêrÌ’Ôj‹aè#}kÆqyö íƒ € @€€³@§‚è\¶.gÏyÝZ\c’GfÑ7² uöl¶Š dËÌÞQÚ“›_ Ý ÇÒÁ7”4ç/©:âKçjEeÑdý§‡¥-fC¶}Poáœ”ÆæDðas5W{ÉHç‹Í‡Hq}ŠÌD|»½ø½ìøßŽtS ­ÏÏÙ–Ö÷¾îfÓocæ]fû`‚ @€ à,à>wÀhÍØs^=Æ,êÅ)ý +@ÀÈŠïÈElkõ&_•Š~±ë »Ë4tÃ’˜ÎÇpîâhª<¯ay8á²›•%¿è°!ÆþjHVÖÇ>…?~¹—b>8.1šŽŒx¤°ðÜG¹_–MŸ4úçr±@€ @€ ¸Õ0V¡M1°;HϰeÖ‘Íé»»ÄA9ÞÏdk­œè³\Y„؃­ÉtÄB34ãÈY! 1Oüä¶‘æÉõÚ@v#Ì>ùßy‘í´­Ó:,#ä;¶òÅY«ÞŸ7…ŽÚ}ÈŽÓßê|é£as#‡ôÛøC­ÎpåÌÚâ´è¾aq{où}eRŠ‹‹i¹%óOþLŠºåš-9D›å¹ÿÞËéù-o³Q  Äoiߎ þîÞ9¾…5¼µl·…ÅíÿôËœ·[®ËB›HDû4ãF›ÐNnÔMTíÔN3›ÂÂÂÄñ)šÉvK›Ýê˜ã|zê”ïO?ýäSðuнÙqÛí,ïX5çàu…šG/Nb¹BÿDzµÏNWÜèóH× _g>ÎÒý}ûÿÖãÓ¾ õ/?³¼—úzö褌ïÌ•¥S”2CÉ“qKK÷Ž{+å¯%f±ë"[Æ@€ @@p»;`Óõ©׋úh½ßT?e‘,B_Š\¾íþ1§· ?(mlb®§x&'ÝŸ”,Þ¹bYu!ü®¸¡Ž»å>óü¹ñG·’3åòåô®Ø vWŒm÷9÷Õ6OzNsàÕ t‚f¾«ì­ùÞÝÆ¶:LÒEçÎŒyZ±ØÚÈ{uÊûz™Œóˆ»JônÝŠÑ#^Ô·‚Y!´‰Àð„íê]ÐÚƒ6ö»ûêwÊq[Zÿ丬_cçÓÒr[›íÓZ±ÎÉvê÷ÖíÔZ±ŽÉïVØØ¥à Åýœvè<±—"ëtß™i€$wõž0çülý›ôýÔEg¿ÕjcH· à/×Ôe·½º©ê ìå„sµµSb ýM$rÒÄèkóçö{ûüµË¯)èh‡¥±‹Ê_®ëŸE¾`_D¾Ü¯N@CI^鯭1Ç3q±¼?îþYPŸ³‘k•Õ‰†ê 1øêDRR3óìÏ…±]ó¶oyÖJÿ°òŠà²+ײà+Pe)ز=÷Y¶=:í—¾*C™v`HÍŠ'âv—°46íOL~ÝLëvììŸÈUÏÛ¸$¡/ônyñÈ Ö O8ÌzJ·ÛtpCžÅj7] @èN ÀöÇ=\ràðáÀ«çÏ«Zx¾ÑÑ=ËWmÎkan÷϶1A f®^ÿþ_h·¼-´K¢Æýkí5H5Ï ³^]83Û jƒ*@€ @ E€±ŽŠˆ¨¦3öÕ¥'H¼¹6ý¨B±‡ãÈÃ]£™“ò/³Å2îųO5“›!@€ àVTКƒ¥gjŸ ÙìFÕr¯ªPf„à˽šµ @h™@§ßkY5»N®M›æÕѳ_’ñŽã¶Ò¯ ®söŸ© •ôkÆ«‰±ûÏ…-€ @poÜsÓöa†ÙJèÛU…#nZÅ«3`¾:Œ‚ @h'`íÛž±hæcei @Þi‹ò<± vîÌ€YxbýQg@€ 8 t©.ˆ´ íÑÇ5;LtKó9B²å‡hFEŒjÓEôz½‰½à플¿Ê9.“p\7çãzåº \³ ´×c½gÈK¯l(œ @€@k¼þXRÁÏ!¦ÍÌ×jµ‚N§ãµÚñ‚>A7ÕÒ¤ýÔñö|ZaÖ´qùtÄ=ÑhÜü7æ°2Ø×œé1YÎûœŸ¿;uw•P  qÞÖë,LüÃ48ü{[”çÎe°sdçŠàË[ uƒ @¸¯ÀŠw%]ã×£”2®„È䄾h«ø‚™±tìpG°—毚c¹PœÉÑ!ƒ5Vº±Ê(‹š4qäU]"+û)5HŨ‚ú¤”äÚ ÇÆHû]ºy¸¿ñZÔ==L‹¹¨¬r)½­çII³Î«JŸx~--»Ù;ym}ü(O`çÆÎ‘k‡€ @€@‡ x}DUPPÊÊçB’Ŭ¾Ìd?;q½OåùÄülZOWµi×D¹N—Jˆ…d%ý¹ðݵDf¦wÌÔ¼ð€Âã~¦ñNÚ¶ì1ô]–ÑZmbEy€¸ï)ÁW¾L÷¹\VS±jSÞ:1­¿Ñ.‰Zü’5ë3¾àdÜÚ%±G;®ãŠ„+/LNZûYÇG‚ @€@Ç xý°ìôô…RðÅh/=§Ofs+/³ïþ\úÉ;Y$Ú/¨.… ϯeÛi`#ÈûGŒd‹å_¤{°Eñ›E¼“Æâ BF¯œñO9 Î>Š#ýÅ„úƃÙ8”Þ+ì C¶ÛaØ9°sAðÕnÄ(€ @ÀM¼>svîõUúoXZ/`ŸTÈé¼§XÚà>òýl.Må¡OcË~*Ÿ3„(ȶ“Ÿš›s¡go Vû¨.Ç,I¡¶†>Ò·f‘צƒoHuhj®_2÷ü×ÿßçèsj«èsS|SyÝq«3«;;v.îXGÔ € @m)Ðå0ké±= ð?É3$H‹Ù vãÔ[8'¥±y |Ø\ÍÕ^2ÒùÁbó¡+¹ e1ßn/~/;{8¯›ÊX°h}~Ά´´¾Ÿæ¼×“åïÈiïÞ½Ö¤„—ßàë‹tÈöKyìÛ9«+«3«;;‡Û) ûB€ @ÀS¼þ0dž=}n–Úp…h”5…ÃóN:ncË*§Såyš¦qÂe!7ë€|eòÆH9Y°dåc&N=¥¦Ø£ŽK,Óª„#……„½1yú¤ÑEÛ>Úo{¶Ì©¼Ö®ÓÞy-ŸŒu&òõ7ÅäòU:ÖˆO=»“_?Ö[å«<8:ªØkŠªA uñ[ÄGM[·S+r7ú;açøV”Òú¬·õE¹Åh'×ͰÿÓC.7ttû£}\6ƒÛ%¢Ü®I\Víä’¥E‰aaa\‹2ÞB¦.slÜü•shð¥%Ä@¶lÏ{ÖÑJniž\¯ T:¦ûßyѾÎqü²¤„ï’ÞünTüëKÕ¼)tÜÐîCœþV§ †Í| Ÿ¥ÇÀÅ‹zhqZt_û~¸@ò䣒û èÀ£¶îP¬n¬Ž¬®˜ @€ Ðպݘ…ëbd¥ÇêG9´úÓ‘ ÜYú?~”'Ÿ®ÐѽôC`ø:óq ]ñ÷ìÖŸöe(WøÞò^êëÙ£“²¿1aJžFf(y2ni)9,¤kSukk‰Yìº(•s«óÛ‰¼×¬OääŠlzì;oõøm¼ßyÁjÑŽŠzÁã icçEÃ7øÝÒÖ§ÖØï„«o<Ü)Çmëóë¨òÐN®¥“7~àò稱Ÿ;×¥Ü~*Úçö ;¢´SG(ßþ1ÐN·oØ%xý°úŽ.¹øÜ—‰d­Ü«²rè ™ûÌóçØÿ:gÊåËé öÛ>ç¾ÚÁ²%=§9 f§ÃÕ› ß;Cˆ‘¼³5‚˜æø­‚ŽÔ!NÎ3uÌrÒÂÙ…«u(NÿOsÄ&ŽBëÀêÂêÔD.l‚ @€€× x÷00 :Ýç¶VT’«gfN™b»ò¼%hÇ,ß§¹ˆmÕ$œ«­[èo"‘“&F_›?·ßÛç¯]~MAG;Ô(…\T®ørå13^ÙÅ:ÎÍ‹ô(ÝE“kzï3Uš×-Y0âØ'ñ´‹#}¹Ø`¿«¶cvîwýÂYWh ~»:%c1Çq«èW‡¶7åÐB¿–¾šë­/ŽîÜÆÑ!@€^ êl›Ýá»@ dz'ìªp¶£*$‹S<6iØå³£ê€ã@€ @ÀÝ:ôŽH‡Ÿ<}™2}Ù—½K¡óñ³ÙRÒDóæò ½cÂRöe¥wŒ„-™¶ {g÷;ŸÓý…^11Óíσ‰[quR¿ŸÀö±íá^ß“cÿ®ßðÁÃjް3_0ÝŽ5Ì3 Ü4}âÌkíx  @€ ðî욃Pì¦ÙÍ/5¦U ! ƒ/‡òë÷sHq¿E}ÂË, µzà dœLcÓ6,䯙 f^à–¼šðòÆiX‚ @€$¯ï‚((æ7X€Ä[…§h·Ä37Ros‰–ÅÊDðu›ŽØ€ @À«€yuó6~r¯.Œ="˜ø‡é¨´—æíM¬ V+óöJÂÞ€ @ðntAôîömòì’’fUÒ ¿_³ñýxú \ í’èÛä7mêèo‰I 3é;Ö0A€ @Í àXsB]`{Ò‚™©tXþ'詞jÅéžbû°}[±²B€ @ K ëÒÍãä_MŒû×UÓÕ_Óãdž¼‘¡Á’Íò²}$c€ @hR]›äéZ×.YRMÏX·&%ã¯tTÇw Ç©‚·?Ÿiÿ~ƒt¬@€ @-À°1u­L,Àÿ}™òÒ™³e–†àKÁ€ @­@Öz³.±GR¬ï®X*†ñ‚ð!ûbË,­Kœý6¦g,4*ÔEVÞäŒð\úÉ;Ùݱ~Au)\x~­¸ãyÿˆ‘l¹¢üâ‹4c‹â7‹L^Á‚56^9ãŸrœ}Gú‹ ø@€ @ ¯ÀÞOòù¸¥ìü;ÙÓyO±íƒûÈ÷³¹4•‡>uŒ-û©|΢ ÛVL~jnÎ…ž¼5Xí£º³$u„ÚbúHßšq\D^µ´æ€ @€\ t™A8\¼”f1z(éÊ ÞÂ9)͉àÃæj®ö’‘Λ‘âÂñíöâ÷²ã;ÒMe,X´>?gCZZßûº›M¿™w™íÓSqq±Ðå  @ câ·´Ùåï²ÂþNØ9Þeþ¶Jlô¸mu€.íä|ÿ§‡\nèèöGû¸l·KD;¹]“¸¬ÚÉ%K‹ÃÂÂniTô–ÞØM¡–ìëuyTNgdªö)üñ˽,{ôÁq‰ÑZ­p¤°ðÜG¹_–MŸ4ú§b° @€ @@À0Ê ¯ïœx½6ݳOþwG^$E;mëÇ/#ä;¶òÅY«ÞŸ7…ŽÚ}ÈŽÓßê|é£as#‡ôÛøC­ÎpåÌÚâ´è¾aq{ÅnöÂna¡=#ï[¨vé‚Wßx¸]ïÂvó_íö×¥Îh®á ‡ÛÕ«±ß íÝN·3ŒÛâ˜h'׊É?pùóÛÑíöqÝ>î–Švr·q]´“k—ÎNÅ0ÚBÿDz†øìtEƒw{¾Î|œ¥ûûö~EãÓ¾ õ/?³¼—úzö褌ïÌ•¥S”2CÉ“qKK÷Ž{+å¯%f±ë"[Æ@€ @@肘8¡tþâ<÷™çϱ?û)—/úˆWýäsî«l1é9Í1‰Wo.|ï !FòÎÖü õÙnÌ*èHâäÜ™ñF,A€ @]WÀë» FëS#~þj‰ê|EuÕP ³bc·ÍÆ;·ÏP踈mÕ$œ«­[èo"‘“&F_›?·ßÛç¯]~MAG;Ô(…\Tn9û3ã•]¬â¼Hß{c3äšÞûL•¦ÄuKŒ8öI¼–¥=4Øï*›c‚ @€ à(àõwÀLåW‡¦¨êêªX¹xærRU]k2š£®Ÿ¯¶ßªÊÛ¾åY«Ò¯ÐÊ+‚Ë®\\Ë‚¯@•¥`ËöÜgÙnÑi¿ôUÊ´CjV<·»„¥±ibòëfú^°cg/æC¹6¬/u¶´¬ª“-žIDATÍø@€ @À.àõwÀ¼»l3=[öuÓ”íD_¼œCÈ3‚ ° ”}Y9š¶%Ó–gïì~çsº¿Ð+&fºýy0qË ®îcBBê÷Ø>¶=ð€ @€@C¯Àžnók4€âi.öÕp¢U ! ƒ/‡õû9¤`m'ÐÞ£Üaö±íj’ @€œ€9‹`€ ÐÁõ›˜»¢Î€@; kG\ @ð% 3#=¥®¨' OðúA8<¹qPw@€ @À»€yW{âl @€ 7@æÆƒªA€ @Þ%€Ì»Úg@€ ¸±07nT € @ð.`ÞÕž8@€ @À€¹qã j€ @€€w ó®öÄÙ@€ @n,€ÌUƒ @€¼K˜wµ'΀ @pc`nÜ8¨ @€ à]À¼«=q6€ @€€ sãÆAÕ @€ ï@æ]퉳 @€ÜX˜7ª@€ x—0ïjOœ  @€ àÆÀܸqP5@€ @À»€yW{âl @€ 7@æÆƒªA€ @Þ%€Ì»Úg@€ ¸±07nT € @ð.`Ní)ç”ÔªÕ‡hZµ2C€ @]F@Ñeδ™” Ÿj¾Pœ©ÓéˆV«%*¾ }ÛžQGxi×qóߘ#+û)•­‡¨­Ù›·åè¤ml>rZ|þîÔÝQB®•Uî¸ Ë€ @€pŒþ ¼4Õ ¾8"`M`†•¦UeQ“&޼*ýˆè „|©¸UPŸ”rƒ\»aáØiûØ¥›‡û¯EÝÓôÁ—¤‚9 @€ à(€Œv9”—§wµ,$+±Æ?-cˬœ¬,™*Yyÿàâ´è¾ ¬î‹õ/ ¥mË“™¾q!Û^QgÀ¶‘S‚¯üô¡Ï岚ŠU›ö­Óð € @€€“@—ÀžK?y'ë‡Ù/¨.… ϯ}8N÷É–÷œ0bss]% ¶Ø½1Û7‹L^Á‚66^9ãŸrœ}Gú‹ ø@€ @À…@—ÀBNç=Å\÷‘ïwô)}ê[7ñ‚?›ûö ÿ! ²mÅä§ææ\èÀ[ƒÕ>ªË1KRG¨-†¡ô­ÇEäU³¼˜ @€ ¸¸­ÿ\èii£æ¯œãWöcê¼HÃÀ'âö—Hõö¯PuáÚ3÷ô¨Y¼jSÞ:"²1:U%e &2ì†}uüoGº«Œ©™¹#6¤¥õ½¯»ÙôÛ˜y—íÙnc¡¸¸X¸Ý±+ @€ p aaaí'uù;`R{ܬl)¦Êó ‡“ç8>7+K~ÿÐaC† ýÕ¬¬} ür/G±G—­Õ G Ï}”ûeÙôI£¿‘ÊÅ€ @€€$Ð凡—[ z½6P)¡°¹ÿÝ‘IÑNÇ$Bh¶ŒïXâg­zÞ:nh÷!;N«ó¥†ÍÒoãµ:Õ3kÙàaq{KкµöŒ¼[Wä† @€ÚB Ëßú?~A~vº¢Á;½ _g>ÎÒý}„›ºŽOû2Ô¿üÌò^êëÙ£“2¾3W–NQÊ %OÆ--Ý;î­t¶_-1û°9&@€ @’@—ÀrŸyþ{ÐêL¹|¹ {_OŸs_í`HIÏiHXâœ[o.|ï !FòÎÖü ¶±• :R‡89wj´¥â; @€ Ðuº|F¹ÚZ¥¦™41úÚü¹ EO˜X®¢£j”ÆBç—*™ñÊ.ÖWq^¤ï@޽¹™NrMï}&> tÝ’#´ŸÄÓwŠòÐ`?ûKœÙ:&@€ @ö;>]š‚ÞÕŠ™úòrSm¤ä¨²¤oÛ;R ²XztÚ/}…‹Ï ©Yñöæ<½”—½ˆù÷úI}hÐÆÒÂzññúw÷l¶oÇ @€ 4èPóôKA¿\¦4='g[φ{ÝX«ßßõ¾7²a € @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€@[ ŒŸ³P?{º6«­ÊC9m/ +¨™0iâ7;VM oûÒQb[ tæµ$×VçÑÕÊi¯ë mÒ¶?Iy}µí™xoiíu-y¯XçœY{\KÞüûNÑ9Í„£z»@Måå)o 2‘ã»ç»fçàò¢?~«Q\/ÚöQþCM•³dÃùÙ#y„IVV®Œ_h*WÝVûËAj‹uèO—j~C ŽÞ¶ƒ ÈbfÄí”*µó" ŸˆÛ_â\æô„7¦Ö\:³QÆ[ƒÙ6•‚/Ú6?èI.<£Ö9/Ömq-MJÐO5_(ÎÔétD«Õ’@_¾mϨ¶¸–»J»¶åõ53å“{«Žíÿ_oµµI ¹¿¯zܲõætÏö:Ï6½¾è,&ÄÍ[g­¼’(ýå"Pe)Ø’¹wD{Õ¿+”Û–×’Ý‹¶Õ؉Sÿ­äM¡ÞQ7íõ Ÿl·oÃÂ- ´Õµ”TðsÈ/ûVíà×£l¿ïÆ“°;dÓô²¼ªd·¤Œ М€L^IÚ îÑ ‚"zÚìüj|ÙþZ oúÈ4?ƒ/–MÜCú°éýºàVÒÍÌNû¶£cZÆôU[ÃÇëtV|±2UôŸó4báºÃ…Ÿ2Yðå«ð-bÛÙP]ÊÅç¼Xwèàké¥ù«æXhðÅþn¬ ̰ҪTeQ“&޼êP+,6#ÐV×WDrýèÞb|uÓ¨2Tšn„ÈÉ¥¦=;ôÑÍT››h£ë‹f¤NÇó4øRÈøŠ  ¹Ò¯°ÊÈEÑ?Dâ³VsíÐÄö¶º–ñRüëËYðÅÒ fKOÇmX¾E6º–Šw%]cÁ—RÆ•™¼‚Ž_23–Ž~‹5sËÝðKÁ-›Å{*u»1?ÿÆR…±2ªO&ÅÔ–Ñ3æíh&DkA)]+‹JÆÝ^Dï|Õ?xD!3Wh‚ú¤4¦rÏ ÿ¯Où⬬lÙö¶?¤ŽÏ dî ñ'ÅiÑ}Ûé6¹–è_…åeÇS ±¬Äÿ´Œ-³r²²dZ+ïŒvjýOãí^_Ý?Kü£ÆÓ÷˜^HÍÈœ•™‘:«êžß¼ÀjròjÝ­¯öp%p»×׈„ÕSýiÁ=ÔÆì»ö„lNO_¸óíÏdgçp·[¶«úvÅ´Û½–$³‘‡où™åÒ:æm+p»?ï*úÇ‹•cB{}¸+k`ö®!æ ;ÅÏÿøÙ´¾mkÚ¹¥¡ bçúwêÑ'Î]¼Èrå—µ¶[DÖ]ZX——êoòr8KÝPVI¹ŒT<ØO19)ù£|©ÒMíoË#'‹ç'Î9Sv>•ý Ée–Š9ꟌÛQ*•ÁæS®Œ1”þ¸G œºiøŒÔŒ=³Ø¶;îy0k¼_ÍûÏÇm,ýݤéÃøëA,ÝÕVÜWQ¨×ÞdM¹0ì+SéW{\åó¤´8úœVùžå,F±ü”¤ðƒ™ê(.b[5¡–'Ç'.7W–Í'õ]úü”Ö¶çWXyY£Ÿÿnäöœ¥®ôsç9­Ò5={Ñz«|YI &˜v3b]Zêöˆ]%èjÚå…_jU¶J˜måIpÑYKžP  aÕkr{ý9ò ¾üÄÛÈÊ+¯ÞTX}ŽÃÞ-pìÜ¥‹Lbðe!wöêÏË”%תe±ñÓFŠÞû‰ãO²à‹ÙÜ«éð›ßӿӓ䴜ŗyÁÏ^ O]8%ø–Ƃ/?¥¼ug©5‘ÈWw^ÿr÷Ûy+šË/,W+íî’b¡·êkMòÈYS÷â)7³¿ÄRC÷aÁ—Z©.diç+Iↅcc¤í4ÐS”§èjXð¤–g…ô]ÁÚrïÑëÅÕ9Ñb× |±ü}®žWÚ÷kÁ‚œ_¬Íž™ûs ²»e–&¯»–BNç=Å÷‘ïwÄ.}ê[7ñ‚GÊŽçDš¹>Üåú¢ßøˆÕ»¨T¹ç­…Sbôôcªãûް+Æõpø˜Òàì¼fÅ#®/Ú À—þL)³–œ8|B³D¯XºtéðÌ4}¨×4DS'â!×;…‘«>°† ©]ñd܈ҮtÂ#®%?g½¾Jÿ Kî¥ö¹Øì±I]égÏc©Í+NYª–éè] ¤bŸOýÝ’Y3æÏ™CEÃ#Nè¿J?,ùuý1éVòØ%ïe+Ï~™÷ÑQóôƒË‘&÷w¨ð]!е›³õ,iô¤éߨ-dè7éã²»„~°—Étºµ´6­Ç~i°ŒÍ,ß5£•õ›WHwrŠt¹8vIêåÙÃÁCî°Lcu5ßäñØè”Åo±Hõþ^|ü²w³Å»³Ó> ½šÕƒ!ô›²,õ‰#¯î·¦„­Óiáh­V &™x—ª¹ým»°ï&’6Á·[pÔ¶òñi_†r…ï)º`J¦rØÖ‘óÞXÀ~Q ík—´~Ÿ˜öÛ5û÷íùvõuñt“ž~µz±pMŒª´ˆ(eu%Ü R×êÜa/»–,fCAê-œsä $‚ãº7,7w}¸Ëõ•·™ÓON9f>ºµø{úÜ)ÝKÿ\FÈâH2P¼î ÑØ9xÈõµî[¢þŠžƒ‰W‡®H]Q%ÎéÓ´‡Ç„Q%î:0PJ󯹧\KÔ^/ÿxa"o¥æ­x;•ŽòJ꓇\K®>÷YK‰½™þs°ª¢-'ÈH3ûK§þ@/K|pTV9[ß=ûñŸÙ³vrñaZ[YÅù lÉJú×iéõ¯G„•Ÿ ei¥•Ü6oíÄúàû—‰¿TÿgT·G[»¿»äoɵàQ×R=¬ó*¦Êów1o³z4s}¸Óõe>{˜þ.h8½÷wë~úaÅ«ÿ÷”ë+ðómÛþ/ã+îï4.>~|à€ÈÉÙïRïúÇ5у¶ž—­yȵ4’ö¸QSú±ø ?ëÐ?6³– ƒ=ÜÞsÐМžr-9î=}nk3²¦pxbîI nqÙï L]L@QH&'þÒ7ÿ9ôîT*íçÒ`b#æµd‡X¬'þ’“YÍ îJù?3ËgåUÁ/:ò­Ã>â⯺«­Ÿc^:šÑ2öƒÜÛ_™“FFvü#ÿ|‘œ³å­×}¨ àôÓQ¯Š†ã~î¾l ¢ñZÎ¥Ñ*ÕOºøEJb°”ËWZ óæö—²›éV&•ó}é50]“vç¾ álƒ„–¬Ð.>:]ûÁx„ÞUÓÄd_nÉn%ׂ']KòúÏó×kt%õ¿;ò")ÚéŽMpËu’~®+À]®¯—毜CÊ~LU+LE[w _A–“ï&Ï8Jh7ªYSGþ•¼g;OO÷”ëëÙ°ûÏüß×ÉÀ ûËÖïÉY¶^ü›Y5íî6LIï¸üë‚õiÚßyz{4VO¸–” ¿sª+§×²sðïþØ•´´gBÏWj4ÅE$—ª}#èš?:{ÁÏÒ¡;WOM÷”kÉñsß8ú»OVö#„È@¶lÏ{6ãCOÕw]o`®]ºtj4HA–›Êº¦E‡“°üß³’Ü_M»œµ%Œ™\óa?€wRÖ¦ï_(Þµ±kbÐÆÕÿuª¹cš*ÏÎgÇùjEìùÂ?Ųü¶þRrò·ãÿ>òåå%4i K÷¤‰u3jjú÷Êÿ ¤Ïô¡ƒfÌš8|EØSÓkÆLœzZÍ[BÙ~ÍíßTÙŽÛlåtT¼@òë-l@<6±6¢½U9“·|¢wæÆètföé~@HÝŠEë?ÉiùΞ—ÓÓ®%¡ÿãIÙqòÙé ÕÖK↯3g·Àü} –¥s‘æÍ]îr}˜rÆ—þ këŽ}á·®£g§ðб»±<’Þ£—¡í\Ò¹u•¹Û\_<:k“Ô#@/ùËÕw‰Ýùÿ9LÚÓ³æžp-I?Ñ_]¸ò “ÝQø•8gˬîW ríŸ ¿Òþ¾{t/BözÍï8v~-ÜæZªÿÜC_[#+=F?‡²y­þÞø;Ϋ»/´ô¯«åó¹ë^±{Þé+²µì?oéüg/\ÃÖÍW¿÷giô2Æ¿”¨?Iÿw·T”Ÿ •ò5·¿”¯¹¹uØãeÍô ôåeæé—Àæl•¥Ý<Ýü·¶µ³î‹ŒŒȾþ+ò¿ú™Õ!ÙìÐ3Ü=ìeí3Ü\Žû§ð õ/ì]fÉ‹Æþ^ªm}AaúªéálÝdZÈF:bá}3ª)íwh{Á1ÛÞÜþ,OK&«ÿ¬96í­ £+Ší#¶Q};5(âb÷;Y¯âGÊÄ»› 6Ò)ÆLœt•uoØÃ²xÍæOô ¶{àJsׂ§]K¹Ï<Ž]“gÊåË7øœûjkž¤ç4<°™\V¹¹ëÃ]®/[w`ú+ñ'WSÿ oäw¥ËÓö¨D¹¾èïEö+{L8<òFwÝï3Sx°JA—{ïä ×Òþ¸ûKÓÏ /ÖN`Ÿ~5ôwCØ'Šjkö‹‘ ˆÞ{Å[[Éc®%Ú1K7—‹Ï}Ñq VîUÙGäõ²ÆÁ0/kЖœNVWNº(bbLš}ísz¿{©²b~eéÖ•ícŸîŠ]ÿŽ—ù§®\¸àÊÉ*óóÖ³_Šw–XùÍíO³°ÏpÍNùô¦‘Óâ ü×¢&Nˆ.ï×»ï‚*«Ü¯òZy¬N5”ÀEëS#~þj‰ê|EuÕP ³bc·ÍÆ;·ÏPèØCèk¢žfÝ t1¤ïdù³á²vÖëo}í©9ñ}tÞlR¨?ÇF?‹þüu‹¦bW’¶$¨Šý§~”E8j Íš¦Ëî–ño.ïÆÍßG‹›ÜŸîîjâœÿ*£yé½xa—NûåYyÞb§d¨ûüß媪çMÕ±¯„ãý²'LŸ™ àªkþøÊ}ì?´µÿ¸3kÚôé¥Ýý ÿZ¿){;ШWÞŠSÕ—¥Ê7jò”)Q¶OŽrÒÓ¿ö¯ë7ïÖ³|ž45w-xÚµDè5Y+¾€D²ß óçö{ûüµË¯)h»i”ÆB.*·ÁuæImå\Wϸ¾ø°UÇ5?°î†ºeÑÆ7úN». ®=wá§-´MH7•©€þ~kÝ]hg7^÷¤ëKqÇiä·™ºTߪÅóçÇŸ«¨'+#éëæÉ¼äÜ|7f¾íªyƵÄþ¯J¼é¢h­–ôÖÔž· dZÜmS¸ms-Ñ^2‚N÷¹ RIf¬ž™9eŠ©/[çyKÐŽY¾O{ËàCΟµÜö‡k[ý;¶>bR¨ŠØpâW˯.g°ôP,fÿ™ïÑ\æî›ÆnÎÿXzq•QΆªg5^„ØÔþ7j*~.°c¼\.Þycÿ!IS^fêNÓ=ƒÕãç —2Ëé°õ<ý°1 Gàb–ÇT~uˆÑhŠª®®Še]Xªª«cMFsÔõóÕÎcˆ9Ø7™ÕÊc“Çþuxo\X©Ï=O¼À¯UbM†*­RÆ—Ìxîîdvbw/Ûú˜•Û_e¢Îž=‘§QElþšb›…æög™Øäï£pìr!ˆ/³m¿³_ÜáÃx:ÀÇÕjSliéÙ=,øbuyô± tÄÎh¸®eíb­OX­Éi4´UU5QRQòúö—ÉdÕt»ÙdŠ4‰_†Hƒ¡ö×R>O›7u-xâµ”·}˳VúÊvM–]¹¸–ýnTY ¶lÏ}ÖÓÚ¦©ú6w}¸Çõå!Û»:^ÓþþUŸèïÈ‹¥Å{X›ˆïKÌÌÑÔ9zÃ6O¹¾>Ùðêv.䮬~)+KL5‘*_”µ²_£ÿOyCû°sðŒk‰þ_å<Ñÿ»X’W½^ÃùÖ=åZ²ýEIÿ’o Ÿ-lŸL‘ ?´©Ï}§ŠE¸¿{îŠ~ݸqâTe¶Í±+’Ófq´½¦öwÎßÔºíX´¦›š²¡ÛšlCVXSûßt°&¤cѹǶMœÞmm’l+ÄÖÿA ¹ý+×UºíX·w-Iõ¡s¯o림$WÎRZSûKyZ2—ŽEç.ÍëÓèïë–ÃSóH6ÕßfÓù×k»úvê’ÿ—5uîtþ¯jì¸Ó›k[vþµÔ$8 @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€¼Y@îÔ©O}½ù;·×Óòû&¯ZÞØö¶L_|ø„æuý’A ²¶,eAðf…7ŸÎ €ÜO`ËÑó~µGôêFˆùa²òððYµm]ËQó–/ô¿rjí¿ÒÆ|8nwI[—ïXÞæ‚C!†â¯zv󓙦—_àÍ«“¶ÿaÍ–Áʲ¢_§¤ôÚÉqz‹”ÞnsxžÔéÎ)ˆ…° ˆãˆà|¬­‡kLßê®"FÒó®ÀÊßýnB…«|ùGúÝdeûWVžÿú×Q&ŽãœÃûóÿ; ôìžWfŒÈ&$_ç|,¬C€ÀÍÜÍIH ´½À´œ =«,:éÃ[ƒK—ËÌëFÝqï1›.;¦ßÎò¨ù+çø•ý˜º8Rh·lÄQÁO‘¢«Q9U´·¦.ãŒOf±ä¨ Ë5ô|sé…ØUû:emóÕ ×Å”ÛsOÓâU›ö­s<Àt}jDuñáC®þòÚ'H–²!m×")‘°zjÀ…o3÷gËr™¥bN„úÁ'ãv”JÛÆhµ‚Š|YY{}èþ 4)æ€ pC]nX` €ÚI :훾ÆÜ„2)ø T+³Õšà v{ÆÊû/:pæd;ºýе_VÒ;$h…Šž;Ø¥êºXé 2eÀa¶|wˆÿY)­=ç² ß'³òßüOßóÒœ$½Á|YI· MJ½Ëª"–ïb%Ÿx.=úNi™Õìg[¶’@&C­¤w¸X[)‚7Ï §ˆ½{§Ô/…yÿ±ÏÚöÁw@hJÀÕšÊm€ V  “Ïн”²º’í;?¹‡Þ)áë ™5y¾~N@uQD« mÁ~¤ÌÜ‚lö,ìÙ1:ÝÔmÏžAZ™JÇzÜ $+ÑàÏ…§ÛºQ Bܚ䤲³ÿ(æÌËLÁ²ÅðELjÕ·ׇ•JïÈ©St¡„Ô.*¯\:И”C÷úݼœ­V¬Ùœ£—¶Ñù:=ÝïÇÔ™{‚»[Méââ½êâ—¾›±Y\9%L»LgT9ù¿¼1É=ÉÒ¹A#3ÈÑ͉Gáß «í~—O¬ ¾Að``Üx¨: O¿êãpîø>ZUl_ðÉÁ—XýïêÙ|ñCþâÂæÒŽ9ëëª+b¥>ò¾ ¡èq!¿ ŽJ³ô'nBܼuÖÊ+‰¶|¹«‡rñÚM;t»Ë+úˆvú}눡\Ëæ§´~°=çyç®r“ç.^dºòËZ ª´Z- Pq¯uÿ=7h“ýy.±²7¾Õi4Ž ÊŸ“7qŸDÈwR¶± ×Ä(K¿Ù“1FÝK“y™­«.m‘É?Ky,ì7:ý6鬩>ø’Éd%,c…·ëâ5ÇFNœt¯¾|d>4G~¹b^»6aÄTÇ|ÇÎ^Ì“‚/–^k’GN8ê”cžÑ“¦c¦Á ^Tþ,²ºn¢tËÎi@a‹ïw¨_¶Er2Iÿû‹?$…¸ÈB,fSV·Ÿ®Ä@Æb¶ôàyl±X†J_„çƒÏ]3ÚïÞj}¬e'†³:<Õßÿ}{]è:ßúgîޙ΋ϥٷµ`A%ãj¤lº‚Š©ß᯺[íé„&…šveT¢ôè;¤ü˜C€€kÜsí‚T@h#¡®ònVԲϚ+2 ¨Ç»½„ƒñúw‹yO ¾¶no*BG4 e#ŽÑïˆPFò°´ =º±;cïÑ|‡WÍüsŸ@ËOÎÇx6ÌçéYú‡'¬Ù?˜/Úó­‰÷ ¥•‚Ý‹¦iŠ¢=C 1Ñ®„ÝiWÂŒZvwm ðüxUðæ¤±Q„ìËw.“‚L¿ãiRüÙ!ö [Ò®Ÿ¯Íš¦+xmlÿÉ¢ÖܸSç´ãw—²;}ì‹°çâ´k&[~ë·ï¾MSo¹>´ YõÅѬ¬{º¯°9›¢ÓOÞaûžvKŒÈ«¶¥2_¿*Bc1:Œb$o¾¹þ/Îw'—ù¦Îœ>=Â"È5Æ]qQ¬¬nªëš˜üËRYl.¯_ˆÜÇ1Ë€ p³î€Ýl‚@hC®ºl,+®Aw¶FÊß½ù½=øbyX·7U·Çì–³Gæ°õ!w§IÝçÑ|9~ðÌdý^[àV¿ÃÓtôA|±Õ]I£¿».S–°pá›ôñýXšõìWbÝîéa^Ì‚/öÌ ®”ƒ^ŸÝºPk¾ås5íÕO>f©ßXEïš±@L?_+ÖÏÕ>ŽiæÃÅz ©[ÁEe‰AÛíÔçF?Hǣؖմ§=•Þ»T|üÐéÓ§?¿ñUúùá c~eÏc_“ëƒÖh¼NQÛtÍ¨Žº3­§´.ÎÕb7ÇiX —À\² €ÚJ€óù3+˱;[SeÏLxmêÄIS¿¡Ïb ìËßxÍþáŸí'7×ü›¿4Hõ9›75=ßWöcƒí2y¥ãº¹²t [?}Å-;}Œgs½³ÅÒy+ã(–à4½ŸøÒɽÙÙ\÷ž&bÅe$õãUã›|òX:\¼š7…b ooþD/{»õa圯ôÑHåIsƒEAïòÕO´ËàCŸv÷Óaõpx®KÊÚ/Èœ’••%£_>OŒYÔËTÀ&ä^-“ò°¹5 ÷—ŽëX† ÆoÂ@€Àí êE¤ú9[!Œ£¥‰Ýï+uÔ„)g®_8ʶû*äE2å†:ëD VÛ«ñôÚfºÄÉY@FæRʬ%þA½>äyK€Tš‰ç5ÿÑ_ÆFivúŸ¥³ÒLÜhZX}{©ö7t¥Ý<Ñî’>Ët{؆á\˜ã‰mQŸnAf{Wý³ï½]ÈžÎR‘â´è¾aq{Å÷wmLšûT±“¦X 74iùòÿ®’Å——É)á¾ñt$DŽŽBï8à†¼âü‹»a€š@Ö6A€Àí ÈBúKÊŽÓQòü#…]ˆÔÝαdÖõ/fïõŠÜØÐÏcÑL¿¥/rx)°ÔÕîK ÐÍö;–×Òe«ÿ„Ϫš4YŸ#v tÜw›ãJ –­½ï[Ah]•ƒW8ï6b匲‘=z¨¯gȮr—òÜN}¤ç°º³T ÃûÙüŸ—YÜ#“þ“¥›x¿Ð铯~#Õû±u6éA!ãMlk7—‘cj° 9zÖÜT[.q H6`¤Ã$‡û]uHÀ" ¸pù‡,ù@¸eñi_†r…ï¹Q€@è»°*øú!ÒÕŠª¢m; Âct:3»“£”‘™2à{ÇÁG Ù(ˆ´ï›BÊÇÊS+Õ…µ&C$ûmX_Ó¸„õûrFÍ_9ǯìÇTû>,#ØïjzÇËžNƒŠ1ô¹/i8À@µ:ÛLƒŽë†*-«Ç+áÆ°Çm/ ¨ÿ6jþ*Zþq1‘ËdV:”üítDŬ}r6¢ s=FÐ.ŠõÝ)}è0ûônž¸_ß ë»k7ïÖ³o¥>ìØ,¸Sž>ô9{ß×»ÛöënÔ‡qôeײ²âúà‰>G'#VBcK‡z/£¾÷3_:½DÏÏ·Ñó#ä‰þ¦‘ó’ëG‡¤íMÛMÁF’ÌÚ'£çí˜9ÖË€ àò/{` ´¥Àî¸'JîŠÏ $J¿B[¹4:ƒ/+¹#ÄgÅÖ|„v—³ø>ðßÃØ§wOBYðÅFïãÅè(ŠôŸ8Ñ|þcVö²ÈälÝP|©­Ù ÖíûØ–Éöß›}["ád q?bµuÓ£#ö[™¥’êUEGü3Ô_ì¥Í& ýwý® fÞy=MÔ#…ÕÕ1øbûd­¼SÅ‚/Û ëÁ)|ϲt­»™çCy±¯Cí¯Åü·X¶¯itÜßY}Ê D|é´X^ý·=ôe×A‘“rõþVú.2)øò¡ÁîýýFJÁÛE¡éfï) 4T>Ñáaöà‹æN>p?»u¨Qš |1=L€ @p#Ö•~)ëÞǪúé©S¾,OsÕfyNúÔ·¹|-ÙNËâZz\Çò¤:°ýÓowùVê3rÂÄr6Šcµó0ñN•au®ÿºí:˜>7‹3céØVuÿtªV!@€ x–À:¼= †Í¹¾CjNƒÎ±ôxZm4çXŸFL€ М~Y6'„í€ È_·p¯‰Öõ\¥*‘D·}w«¹Ó»hc´’fº'ĺâF·ËæöÂv@€ @^"|ø„&3muhGœÎ{ôf))oßÛÁ^GœŽ@€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ @€ ´³Àÿ±~Æ•]dìIEND®B`‚rocksdb-6.11.4/docs/static/images/data-block-hash-index/perf-throughput.png000066400000000000000000001045421370372246700266370ustar00rootroot00000000000000‰PNG  IHDR`ø,8Ž£sRGB®Îé@IDATxìÝ |ÕÝðñ3»›rSî ¢i«ˆ¶ô©­i´Jû>­á¢¾A²ˆˆ—px!((AQ.¨Ú ÊMH^Ö¾ô"¦íó ^b¥”‚!— Éf³»óž³É†MØ„$$ÙIò?›9sæÌ™s¾þ93g„`A@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@.KÀº§(zì¸?Û¼xÜÐKUô‡¯¾êr©2iÿ˜)³“'=’ÑÚ}nÖyè]G'$è‹gÜ7¥¥Ú§ëºI~,ÞúšÕ.ïÁÍ\Ëók~•ùñ²¿3G¥ùÝO& €@-S­-6@ÚL ìÛ/#ÝNׯ¿+þeC'½öK£ßX°À¾`Úˆ'*×™ö/>3¾¸Ü• ë¢U5ç<÷­Ÿ÷”Y^Œy¿ Él©krŸÕê²ZGWzûÛœv©¶$.Ý2XKŒþYcÛ6nVòÃcä1V«Õ ×'<°ÛÛOš¦W‡ï;m NÔ¿ü¡ ±°”CN+Pó×´N+@Ç@Ë6+[oL{WÆÔAWTªãÜ—:8$ìœ*lÒÎ_ªh{Ú_ðì­r»òùOk¹yúh2 w£¿<’¦žGŽÉHea°©ì˜÷NaSO~߬%w9õù†é±å×þlò®cÞã]ž„ç;¥ªÓMlW²=;øÈä¶œwãª~ñ«ñÒË}3Oqž:˜ª ]D…G¦ØJK즸qŽ("+Ú[C—üö ‘³íói÷â­íÞ|Ö € €]lB Ц!—¬ÞZi” u‡E安ï7hòŽ“êðsFŒ·n_œr,“›`õ™’H0¾ @ œ( ûÍøÄ'†8l‰ª)Ñ¡"sõúL«·YñÛm=Ì;ó~µkÄôeÛw«m‹ÜŽtÝ/þúÍÙ9{Iœ*Û7R¤¤¬Íœí=NþcY{hjÒÂÊâüÂíŠRù]ƒ]û^Û¸ýnP8«êy<ÿÆ«¯ñé‰Ó¯»öí;뫼¼•7õ©˜0oåÛ½uZó¡û^9Z7ß»?0k³xjFÒ”£ùy©ê—™Ùä,šzÓÏ'oöªM§¼}ƒí³¬íš³bˆÚ6›DÑMý-Í]öÆnµ=iÍ_d¿þ©¥Ú'2Ô•¹výö{UFuÉóT•Âuì¯rHÝ~–9ÿeo®÷N˜º»‡;/"}ÓÎ;¼¹÷Ï^::øäg󾀼úß"™{'LÜ’ÿ¥çZ.ßr4aìƒE7õ*›©®Ã½&ísäǼ±åí+äµkòÐ_ŸëoÊÓõüº»'¯:yϸG†us—FzÛQßú®µ‡û*×þ‘)ÚÐÝežròvCóœÔâxvÖ¶CÕ×ÕÕùîŠñ‰û,W¬¼=Ñ¢¾¿<ù|A¸H€ì"2@¶(¶ëqÂ^ º…„ì)¶Ûã ËEÂÔ ÃÃS7ìîiIA^°þÈ/³_®v ¹­ò³sÿý¡ú!º¯ÜQ›W,’Vξÿ¬U·€ývús³+ OÉÛáı°Èè×ÏØJ-sˆØ‰ßó'!Þ½£ªM<~Û õ|’'H‘¨ÍjÍUv¡!™,ìå% 9ÇYúëÚ=[‹ÎX­GƒeϯˆŒH)uXú—ŸSÏ•=(÷¹3ôçñ¡Ð‹òƪ#qÏŽ"Ÿl¡;˯ªp;k=ÎJG÷`9°© s*«å {ÕQf“Väz‘ì‰'ðÑŽ«\n³ ¢Uwj`ÞÝl2Õ:§ªÉí®ŒÊk¹B‹sxÊKcš‹>’u»*?¸·y—Z{—·Òëx¶p¸õ0ožZkQý·‰|[ìgkÇôâÍc¾ûH#€\ »`A €S¬¼·[Ï>£7œ_é]âXíçì!q2Ðípˆ5c»\·¾pŒ¡ÒäUŽÏ-`ýÇ/HýÙGOï˜0yé±êŽÍ%'QSlíŽêbz¬ÏUý£ùÞñ‰ÿ+Dj5·˜É@&ØjM¢\ KÚu¸ö±Ýº*Z_´|uf²jŨq|êCdp'£5W/NþɲyÉŸxªûç¼’|üYo¨¼qÔ·ŸPçµÑe‹^\ý¦§9ÓßCšö¦Oð¥j¢ÁóTñùê9¼V Tõ —O?É·Ó?ñžÏåDä™úlÌw7 šüçš‘¼†ŽêÚÍé’#{&“雋«5_œÙ¨ge¹ å픽ô¾DÝ0úæ©´¹Ñ¡]Ý#ÙF:—XçºÞô (p}÷óOõ½ãŒ§iµ û#Ó2»•ŸM(Ýß]>]S•ï§Ý7ötNŠ{«PízsÒmßÜ¿ïª&g¨^Ò‡jeéBK;×ÕýÑ;=í"J|˜ýn‘p‡D©Q¢Ñk« ªäg“³Žyë2ð×Iâà[½žS9SæÍŽ_öÎMê—ŵÑîEÞ2ÆX;ŲÔ2«j«ÇÀß 9…oÛ–ÏO>°|ž®­\³¹_´8ôáWEW–Ë_Tülר[^³Oˆ£…a çN{äü’é·¼"#Þ ßã«Ò—>§\U :$ÔâÊñ|×ÕpŽ÷—r™èé7ÈñwtFœ¦¾¢ýíSy[·Ö·§qùuŸGsç…û{†L¾ús‘°q•R èÄÞŸõ˜€®#€¨ý£X ?,ÊÏŠn]ƒ|ŽÆ^ç°º½˜&GÓN&;ý×”$uëšgñ7G¸É,jÍ®øæ¼û?¾ßú–ø¶XK’ͶþÓÓÝdâ…ßv{ùÅê`§ª6C|­i2UO`áÛªf<;Å$'’øÈ7S¦=cT2Ø NNûEeî_?üælùrë‚,jÊÐU†Õ9¤ÁóT—Õ{{_º/kSÍpX:wîoB‚‚TlYk©¬´‡.]Úí=MKnð{©ÖAÕÞ­Ò²5V³„]{Zäl©Ùö&j{sY#€ÔÄm`èÔugAÔœe}Ègߺ#/æßÏ=ú?]ää½åä §‹ÈÈÈ0—›‚]²Nyëž#¼—< «æÏf‘£qfÓù"-.Ã3ÚvÉã R@=eÊÿZÎâçñC ’ý òÀ;ÍÛœ<1;3#ÃÔýúÿø•ºÍïÛBmá‡)Ýà[¦ÑiMsWZBsÊæ!j„±îq·6À7Ïmò<ßå›×œôgkõ?)Ÿã;räÈûu?ÇŸÌ;¾©wt¬¡óéWß¶WíïH‘Õ·\ùÇnSÛaAz­ÑYWáñZ¶¾ÇF¸ pÑ/ˆ »H!€´…À¡³æä?Ø«ähFˆí»D5‘Ã-“Þ<~9篺‡Í)VÉ™¿ó¨Máè&Q5â¥ê=ëÃÏ©2ûå?êÕ-h7öÒÕíˆíj©,ø"L5øÆžö©÷%%–·:‹ ¨é„¼ePד-òI;ýÕÅ3÷ž«ÎŽÚÊ{Ö”iVÂó«µÖDUoÝ ròª`HŽNvùîàªúª/(+©5êT_9•¿kòŽ™:5")))¬îgêÔ1šz¶ð¢ÅÿSeÿgMæ€9s¦ —Ï™vÞq÷ Õ‰£…æ…òûSz– û7«ÄÜ»Âߩʩúꪞ·¤«pyÞoç»4 €Àî¸`A ˆ€ËÝ%Êj]9sZŸ§N<8îõƒ¹g¨=³‰Ï]tû›úWp¨Œë&N°îŽê9(ýßVkVcèÿyò-'GÊiлz¦g×ÅÜÉ»3Ÿ^žfŸ4èÊ›<·VÌK}nö̳‡K*ïvÿ‡ n«–áI/·žÊÝ0mÒ„s×>_œÌÙæy~ì§½ÄQo™¦®Ýá=w‰Âò!â]«¼íóˆ¡;ªïVqöÈëÖÊsS&MI9»ÀšäïUÈÚ÷‡ÿAä)6~ö?$NÞ~}DÁŸæ­Ø~É÷jŒ‰±5ÔÖøäÔ˜nß쟒Wd+âÝÄÄÄÄ4{¥½ïÆG-V-f½çø¯³ßý4\^óÏÖ&\+ä¤,erjù09!˸ãÏ͘ÖÿżsgžQSö‡Û÷iq;kˆšŠN< þppä§Ää†ZÃ>@ s 4öwqçV¢÷ €@{WÆÔŒ4P¬Þ]WEG.úwqéŒÓgó—«Ê‘!Î=/¯ß]ë¶/upX¥ÖHFm½îC>×-xý§ß%?|¨Äîˆ+9~(.ܢ町»Dšåû¾du5#4uêQ§ò,î^?˜)N}±!Ôr>G(üLNá-Ù¼õ•ÏzYnÕOrÕôÃm6WßÒW5—ûŽÑág¬³’'è2Èúòäémª•áQé%¶¢DÏmŸÖ'·;W[u¶Øž$Šz† ¿×SLõ¾\øB¯!Ë•Ëvšº9ÿ!³«—Úí¨Ûo)kGaÁÍvù} ?r¶Bµ˜E‰Íæ FKóÊÕ §' ²tý\8l±ÞQ¬¬iwŽ~ø±¿ GYlþÙÓžïÏùý¹výÎé›<U}‘#f«56Øä8ÖÄ?øTB@@6·}YÔ­_-}JU§ª»©õÞ?mþ“ rÚúÍÉñ1M=Öhå«l/ÜFçÛ¾æúøÖQ“–ÖñÒlüØ‘~GÑ{®êr-þ½PÓÎòÜÆÞvûÛ§ªŠ_ºk°ú^Y2køÃ TÍ.@)îÈ#€\, žQ’Ï0ÇÅ{›Ÿ£êTu7©ùp÷Ù#ËÕ-eãîø{“Ž5`á*Û £~¾Ml–o¾iimïîpwPó¼—ÏþÆž«º\‹/ø4¥Þ¤Ö)0d°n±Ÿ‹ëîHo[U;…!D@@`÷Ý]Q@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ðu]kÑ © @@:¼@§ "—n\˜óîçá–Òœõoì¾ÅïÕ•ÕØÉÓ_rŸMòâD„8÷¤mØ1\•7+ùáÊS¹.ìsïY»~ÛHMn¿õ‘‰ € €T ˜:ƒD²®[â'LÚm“Á—ÅÓas½ÝaµºÝ2ø²˜ÜEÑ‘‘)æà®ûJìZœ® Ó}3Oqz‚/]D…G¤»d-%vSܸGÔ[!;@@@jªx¤ƒs˜úìüp{q\ïÈð”ãŶ¤Ðzú;|Ö’‡ÃN}.º‡Ú3_]¿Óê[,3C×âó­©B8EF’=Lúf™ÐõI£dÀê‹Ê]ßoÐä'}! € €ø tа>×ß”ñXlïþ«Ö¦Ï¶[Bs\nW¤/‚7­}wp•Q7øRûïZ{¸¯ŠVûGV¤hCw—yŽÑ4Ý|uÌ•ÞvÈ1Ò“Ç@@@ N1¶.iÌáuÕõFœºnêbµF›\Çe ß°÷Í!ANgÈõý,_O˜œ|,úHÖíªŠÁ½Í»|- ÜþI¯ãÙÂáÖÃ|óI#€ € €u:EV·Ó~·¿A*ßá°(uQ‰·Ì‘#BŒ;òXQeùKÁ2s`/ý„wŸZGÝsœoi@@@ÀŸXµÊ¨½ŸõQf9ùÆ }¢'Þ5ê7ïíùgð•_íÛ|T¸»¸V/íûÜRGÑQœ^ß3euŠ6z377Wota "€ € €@« 4È;ùy‹Õ_ïy-v†vRÑ®a·œR³^Yþò‚k·ÇÄŒ´-ü›c•7ÞÿÕGEé÷Ôº´,B „Õ,a×Åž®Ù  € €4 À˜ç{¡†Nkãå*Ù›m½Ês;bDhð¡B™zïH‘š±fùÇn —aAúï1—»nHûrÛÄñ € € pùtLuÕY䌆2K=¦gP1UÕòņ•èú… ÐŽšÊw‚Õ EØ¿YíŸ{Wø;jÍ‚ € €Ô'PHÔW #äÇ'§Ætûfÿœ  ¼"[I¢Y¸DDxTº½ÒÞw㣫³Þ¦úyŸ|X—SŸoPïúºªgß©'ŠÎ? ;ÎÇ a™™;µã?sØbÍ&gÑ•Wô1ïÜ™g,nWTx°}_ú¦wt+ú€ € €­'Ð)FÀ…7ÛíŽ8›'øR˜fQb³%:ì•q¥y¶šy5Þ^ùôF-úªEBXÄ·ùù©*ø ±¸s2žëï)“µ1íNWp×}.·%*ÿìéå*øŠqîIÛ¸óÎÖ»DÔŒ € €E SŒ€5õb麮\ÌòãÖ4Í]÷x¹_®êã’ûÕ‰, € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € € €\B@»Ä~v#€ €@'X¶ê5Ý_wçÌ|Œ3øƒ!h¢€©‰å)Ž € € ÐL°fÂq € € ÐT°¦ŠQ@@h¦X3á8 @@hªXSÅ( € €4S€¬™p† € €4U€¬©b”G@@š)@ÖL8C@@š*@ÖT1Ê#€ € €Í k&‡!€ € €M kªå@@@f €5ŽÃ@@@¦ €5UŒò € € €@3Àš Ça € € €@SÀš*Fy@@@ ™`Í„ã0@@@ ©`M£< € € ÐL°fÂq € € ÐT°¦ŠU—×u]k框 € €TÀÒ™ú¸tËàœw?·”æ¬c÷- õ}ôœ•ÃÍÇ?ÊÂ.22vš4Mèªü¸YÉWžÊÝ`µZEBB‚ˆqïY»~ÛH¹ßÝP}ìC@@:ÅX²®[â'LÚm“ÁWUÄinøÊËòÂ|©bž#<£]÷ÍX<Å)ƒ/‹E…G¤»äÞ»)n܃# ®½ € € €€";0õÙù{q\ïÈðG#®ú¨G§o¾(D“·šó¦ áIçÃÖ¤§MÜž‘a*—õ¹ÜaQ¹kâû5¢jŠ € € €@'èXŸëoÊx,¶wÿUkÓgÛ-¡9.·+²¾k¿&·_hùÙ„«"])Áý~ô€·Ü]k÷Ucaý#+R´¡»Ë<ù𦛝Ž¡ÒÛ9Fzòø‚ € €Ô#Ð)°uIcß=yÕIep©;²ŸÿBŽi‰ek¶?uÆ­wõºEɺ]¥÷6ïòæ©uá€Û?Qk‡[Sk@@@ >N5 G}Þüûç¤>žusç5©ÆÈŽšÌYYÞ=XØK?á-¯ÖBòÝn‰tnn®g–¨‹:@hŠÀ®?|è·8¿›ü²‰\`РA->óù¥„:8©O÷tÝd:že6•=½òíjÛ\u«¡¨šQå…¨/>‹£8/Üg“$ € € P¯#`Õ4÷M·@aô Þ¾fͦ~jèk÷Gÿók!4‘ö¼w7»»ªh¬´,B „Õ,a×Åž9[j¶["Ñ‘vK´‹:@:¾À²U¯ù½ ƒßMÿÚÓCh°jgGññ]d:ÏfIÌÛ÷ÇD•]uo¡Y|pðß™Ýò2“xïH‘UîJVûÕRþñ†ÛÔXX~Æ“Á@@@ Nz ¢zƒWí妄‰× :tPllìµêóŸ±ÿÙ¿24:SMÈqÇ×ýäŽø9ÿ¡þ$x´Ð¼P×å°Xõtbÿf•œ{Wø;Þ<Ö € € €€?š@ÂßÎŽ’ŸœÓí›ýs‚‚BòŠl%‰fTE„G¥Û+í}7>j±j1ëmþú:|Ö’‡»údCFÆ“œ”C1>ñƒ0‡-Ölr]yEÿóÎyÆâvE…Û÷¥oÚy‡¿:ÈC@ = Ôw ✙uŠ3´§kE[@ } tŠ0GaÁÍv»#Îæ ¾Ô…2‹›-Ña¯Œ+ͳÕW£æJš\.ï,ˆž_:YÓîtwÝçr[¢òÏž^®‚¯ˆçž´;ï¬9ˆ € € Pͪ¦¡l]Θ(÷«K“/cn¨,û@hOŒ€µ§«E[@ = 0 G3®š ºÜò0õaA@@-Ð)nAl´@@@V kE\ªF@@|À|5H#€ € €­(À3`­ˆKÕ <{k«NZråóŸ2±ÌÅìä € €D€0ƒ\š € €_€¬ã_czˆ € € 3È…  € € ÐñÀ:þ5¦‡ € € `0ƒ\š € €_€¬ã_czˆ € € 3È…  € € ÐñÀ:þ5¦‡ € € `0ƒ\š € €_€¬ã_czˆ € € 3È…  € € ÐñÀ:þ5¦‡ € € `0ƒ\š € €_€¬ã_czˆ € € 3È…  € € ÐñÀ:þ5¦‡ € € `0ƒ\š € €_€¬ã_czˆ € € 3È…  € € Ðñ,¿‹ôПÀ°YÙº¿ü–ÊÛ»2Fk©º¨@@Ž"ÀXG¹’ô@@ /@føKD@@@ £€u”+I?@@@Àð`†¿D4@@:ŠXG¹’ô@@ /@VÏ%Òu½ÁYü.µ¿žjÉF@@N,Щ¦¡O\ºepaλŸ‡[JsÖ¿±û–º×}îžo¢¿}kñf·½4ÎjµŠ„„1bPӄ䕽eÇÍJ~¸òTý ""Ľgíúm#5M¸½eX#€ € €þ:ÅX²®[â'LÚm“ÁWUÄiög!r·Î=§‚¯`“vL˜ÌEBh"÷”¾!}þýÃÔ÷ÍX<Å)ƒ/Mè"*<"Ý%óJ즸qŽ(ð[!™ €N`iʺ‡:\§è €@› tŠìÀÔgç[ìÅq½#ÃS ІDF¦×ß”ñXlïþ«Ö¦Ï¶[Bs\nW¤?ï̵kg½äŒwßww%/Si—Ûy×ÚÃ}ÕèYÿÈŠmèî2OMÓÍWÇŒPém‡#=y|AèÉÉÉ– ‹)CÞÞ!;H§@ÚD S<¶.iÌáuÕœM‰8{î_ûKuXÏpý­Ò#Y·«ôàÞæ]jí] ÜþI¯ãÙÂáÖüy¬@:ž@hdÿŲWÿÑñzF@ÚR S`Íuüd›:ö 4§¿z¼|X°L쥟ð­/BèA¾Û-‘ÎÍÍÕ[¢ž†ê˜šV3Ð×P±fï«·[Æ4»ÎÆXïys0e@zΉ¿ýÏ?köväŸ5»þðaM?}¹Ï¾ý$ø 4¨Á™Ñ}Ë66Ý”¡ÆÖÙ!ÊzdZF¨ìIxðù}Ã’vöv*Ä›¨^;Šó¸¥Ž › €@Gp8*ÅÇŸåv¤.Ñ@ 0æÿÏM1å™ D¹HÛ˜ugú&!Ì¢*V--‹Pa5KØu±§EΖší–H´F¤]·]Ãfe·ê([}}(xöÖ€œ·nÿÙF+°lÕk;eÙQ¾åëûç[¦½¦eýþœîÈ}n¯×Šv#€@û`¬Îu=û¥Ñ2øªžíÐ&ßïåùE¤_}Û^Uô½#EVßCÊ?Þp›Ú Ò[÷ž>ß“’Fh%+Ó§ÈÕ ¾ÚäÄœ@ Ã tÒL½ÁëâeôüÕÃÌžç¾"ã¹!5³Ê¢;ï¸û„ŠÄŽš꺜«z :±³Jν+üok@ö/ð⊵?4 mEûï =@0’@§¸1>95¦Û7ûç…äÙJ†8D71111Í^iï»ñQ‹U‹YoSïùÒ­Ö÷«.N°xtÉãÆwxÞíåv;#7Û×ÿbdpø¾0‡ˆ÷`ü¹Óú¿˜wîÌ3·+*<ؾO‹ÛYh¤ K[@š/0mÚ+]L&ó›BÓÔãÀ, €´˜@§sÜl·;âl¶’D³‡Î,Jl¶D‡½2®4ÏV3¯†Û³O÷ÜsX^^šàp8bÕÇét)ÍûcHÖÆ´;]Á]÷¹Ü–¨ü³§—«à+"Ĺ'mãÎ;[ìŠP €@Àú^²B¾pypÀB@:œ@§{çå«å•SŸ‹–ÌÌê,ùRå·Ä…[ ë¬*§‰íBÜ¡ëº \ÕÇ%Aëiê–f@ ½ ,]•v&LO´×öÓn@c tЬ¥/ ºÔ`YÕ€YKWN} €H^žÚ[Î{ËŸÕv81 Ðñ:Å-ˆÿ2ÒC@ÐB‚‚7ËY–z´@]T €€_CŽ€ýE×-%û[hEÞá¾}o°}c4(ºlàÀßVøí™ €\¦À’•ëfkBûÕeVÃá €4(`˜,9eÓ Gsÿ;ÉqÞ6:ÝjºÐêê‰ eFBB‚èb1çôíÑíwOXoÝÞwèIJ åH!€ Ð<%+Öýؤ‰šw4G!€ ÐxÀÞ‚('³xhÚì'Ç$$è¹þ˜ë°% 9³ ·ù&“©H}¼Ûj]át 9zªxCRÊ_ÏO÷Àgï­™;Àw?i@š"0û¥—Â4“Èó05å8Ê"€ Ð€Ž€˜¾0©òìÉåê­ÆÁ&q¬{÷ˆ—~úýAïÄOšùœèÂéÛ!9ó ¶þo¹Ýþµw×ïŽâ*/I°;MC6ï;vT¾Ù¬iLŠáëE@ qÝ-Ñ¿—¿‡nh\iJ!€ py ÀB"{UñuúŒ‡†Ìþ~Ì[MW&ϪIzjºw™Ve²«?Ö™‹×-øêƒ¹­ö± € Ð$¥+ÓGËàëá&Da@.C  ØöÅSöʶï]“.ïühƲjþ¤ò°;6ojÆÁ‚ Щ¯øý5šfJëÔt@ Í€Õ×[õ¢ãêwmyŠLÞóMtØ_ç™™ê¥1ç÷hCÓ™|£><ò@.)o2uÙ* ÖTwݧÈÌ&gÑ´ØÐþòy0ÏK™Ýó’ÔY̵–˺w_ÕÍ‚ Ðy^LI—/ZÖ^î<=¦§ €Fè$rºy‹ük¤'¸ª’Û™òÝ^Þí-êNýêewê Ï}ýõ{Küm…75 € $''›5-Sh"¬¡rìC@ -€éºö€ÕZùàØø¢ïõ¹râ‰?|G8½ÁÀªúÝ` –i 4Î Ð~B#ú½(§œÿQûi1-EèȽ±Rʺܖ¨Ü“ÅÛ¬ þat¼õƒµ‹Ÿ*ß晌£#ÃÓ7@ÖX²2í×2øšÕúgâ  €4N p˜¦é;32LWÝxÇO´êg¾Êzì¿ýH¾ˆÙ=5q|Úî5‹û5®”B@ ¶À‹/¾ÒÃ$Ì›d.Ô«Mà €@w ¢ê´ – q@¦îxEN;ÿ¯´E# N}fq»¢ÎÙ‰û&&$Œýºw{ê±­ý~Ì[­85 €@;0…tÝ C¯Þí¨É4@ n¬îôZÅÚÉÛwlÝ}󨧝èÝg‘ç­Ìò]™'Ï–/_”úYÉø± GÏ™6\×ÿØÀ±NÛÙD0–À²”uÓå º÷«U´@ù^c#"<wsáÆÕ+“ßÌÌÔÅŽ¾Ö!'FÂá?›eµ¦WªçÅ^]/&ß ™-oQÏ‹å¦=?òÌÉ/·Yºê#ÂÒ&@¶è¥u[%gÜøaÛŸ™3"€ Ð8Ã`SS6ÝzꟷÈňÿûKìè%gÔób²{ÛÕg»çæÄÆu–R €WàÅi£4ašØq{HÏ@:‚€a°Gf'vœg•âš+ÅÉ+6m•83íÜ?&z¡×ì<#ž™vÿS/¾úÖKÞ<Ö €,^¹¦ŸY˜^G@£ â0h•ŸÌÝæ°Ÿ³Ûq¹'ÛôPÛiOðd23™ÌE óèÙàåúk´Ñai €@Û$''›,še‹|µÉmsF΂ €@ó€л†TZ‘¡Á™Ñѽ9}úó“«]#6oÝzíV9=}ydßµë…÷í >EH"€tbˆ~s5¡ÝÙ‰ è: €@;x6ê£ÿî)˜W„”îY»~“uõêß%ïHÊsÉ<³©¬hÖ²í»½ž»Ö¬˜«òíng˜75 €@çxñ¥´ŸÊÞ/ê¼ô@ ½ < WT*´Ÿ^cYVƒ7T++·„昅ÛsÛaM¾.õræ¶h´®ë*.¬w¹Ôþzd €@‹<µlY¸ÉlÎÐ4Í0Ï3·HǨ@ C æ—ÖO{YŽúJ›}7ê¤Ýu¶»™¸tËàœw?·”æ¬c·œYñâeܬä‡+Oån°Z­B>‡&"BÜrdnÛHMžÓ^jÿÅ5’ƒ ÐW]±Zþl¾®5ê¦N@ZKÀ0ØóÙÁ_Œ;VŽx¹£ÜjŒËYåÝÄØ± …*ò ¦"׃ã侦/ɺn9øÈä¶œwãª:ì?¼»oÆâ)ÎSS5¡‹¨ðÈô[Ib‰Ý7îÁBdE_jÓ[Æ €Í¨þÃX³ç@@ Ðwó5Ø­JQ¤ ¸Ü2ðr»¸Ý"JÈ´÷ ªmUéÙçªÊw‰&=v`ê³ó-öâ¸Þ‘á)õ¾µYÞrhÎ?˜*#?‘‘t>lMzÚÄí¦rOÛ¢rÖ½Õ¿¡ý¹kâûyÛÌ@ õÎ9Î=!„~¤õÏÄ@h9€€íš|ë±w²³# òòBº5²_ññ= ¯Îjdi!ú\SƘ®ç×Ý=yÕÉ{Æ=2¬›»4²îÁw­=ÜWaô¬Hцî.óì×4Ý<'u„8žõæÙ5´Û!ÇHyÌêºõ² Ð:Ëç̱-[ùšUúßx¬uŒ©@ å€©.Œ‰±É•ú´Ê².iÌáuÕ5×7ä}$ëvUdpoó.ßF¸ý“^dzE‰½ü‡ íw¸õ&Êùžƒ4 €@óæÌzlÿ’”ôù2[Ú¼8 @¶¨/iÛVàlÎÊòîª{é'|›!ôê[$õ® í÷=†4 €@Û TØò^’îþ¥íÎÈ™@h¾@ÀGÀ¬{ôh÷Vë9‹IN²áóìWC]º"äüžÔ YÃ*ÓÜ}!utç…‡úä]j¿OÑf'sssÕlû­ºLM;Óªõ×Û‡-csÞV=+•#€@ ÊíâÏ~"*+nJ«Ÿ¿ÞŸ¯-tæ]øÐoM­}^¿'%°À Aƒ|5Usšð°ÊowU½jlð¥:YêÐojNg:Æ\ýv±Ò²ˆ`ßra×Åž®Ú®²¯¿ïQ¤@ÚR 4¤‹¸uðÀ¶<%çB@ Y«ÛêÞ‘Á)â~úÂà{&Ë}¦¯¿þú¢9ã¿÷½ïUnÚÚ²Á¨~õm{EþAñÞ‘"«<{k@Îëí7kèØrRŽT¡‰)¹—õý|m©>/[õšßŸÓ­}Þ–j?õ €Fø؎ɃNv¹zÈ·É,ß&ÄébGÒ’­ž›5ù‘åÿ|wiøÀ+ê~äÃÖÍy˜Ïµpù¤«’;ï¸û„ús´Ð¼P×å¯ïê%èÄþÍ*yÿ÷onhÿÜ»ÂßñÃ@ 0§EÉlù3ü_9;gE@àÒ5Æ¥‹¶~‰Y)oÞpúó÷×鎲XïÙ‚MâØM×v|öó¯þISoHnÆŸœÓí›ýs‚‚BòŠäË•ÍÂ%"£Òí•ö¾µXµ˜õžGŒOü Ìa‹5›œEW^Ñÿżsgž±¸]QáÁö}é›vÞq©ýÍhZÀií°½+cü~oµöØ•Ïê÷¼ƒæÄ ÐæKW¦ Ö4ÓGBh]ÚüämpÂ93kÕŸsõ€µöyÛ€ŽS €†ø˜¯ÂJ9]|Ʀ×ï””Úýª§T´åp‹Ÿ9û¾Õšàž6iüŠ3ÙËÔMZ…7ÛíŽ8›'øR‡šE‰Í–è°WÆ•æÙjæÕÈÚ˜v§+¸ë>—Û•öôr|E„8÷¤mÜy§:êRûU@À Ì5ñŸB×f¶œ@ÿ­úW4ÿ§lZîŒä1G¿|Áå3*Ö;¼<}Uú®‰M«©ñ¥u]W©ú¸ä펺]jãϸ’Œ€Ξ3#€@Û,]ùÚyçÄ=ms¶¶;KkD1Öv×’3!€@ç0Ô˜¿Kðò¤¿÷¸.Õwß™óÎѾÛ-VϘÉÓ_ð¥Îu©ý-ÝêChº€Û^6AÞ¸^=“mÓç@ZCÀ°ØÜ5;X'<¾;Áju—Ÿüb›·ó!¦=³F^{ƒw›5 €þžyfúáÖ’û.º“Á_yò@h CMCŸ¬ë–ÓO>_ÁÉ/·}³o{MÿÍ&wÑ }¢'>ûÒokÚ/ijv‘@@ ^9³ÙÊu+ä­ OÖ[ˆ €´¡€!°™kv÷ûnÖÚ\«5NõÝÛ¨+ƒÓÿóÖþÏ ŸüÂI•¿`ÅZµbA@ ÑG¹ç]÷Ó]šÐ~Üèƒ(ˆ €@+ xcVªþÒÕZ÷èѧ·ZOxK†Xôœ[®ïõÄô…/ÿMM;_ëá/o!Ö €4R =}bå‹)éV³¦}"ßòÖÈÃ(† €@«<«üöpWßFèrüë“£E/Œ}pìÕcÇúë³YD‡•þ×ïÓßnµYý•<@ö+ðLRâá%)éO˜47±·ßËHË@!`¸I8*œ®!‡#Vw»¸ý~*w´ê,ˆâÊÒ @ZO'%n”¯¹ð€q­½l € Ð6¾ƒOmsÆ:gÙ1yÐÉw²³#*òòj^ˆ\§ÈE›1×ýëü–­{.Ê'@†ì%¥C#Â*oE¼¦¡rìC@ µ€©ŽŒ‰±É•ú° € ÐjÉÉ3‹^\õÚX³È“È  €m+ÐlîžÏ£ôí¶®£«g9lr×u]›÷Âü¿0ÿ…M>–@ ^‚gomÕ÷&]ùü§Z½'g­,ðÌÌÇþ¶tåkÏɉž5÷Tn·Þìc›{NŽCè Àþõ‡í3¾)ü÷Âñc86°ÏéÄßô§¾C'–5D+ïß7-Z¹ù{Gs÷'9¬ÖÄË÷k꺓[ wCDZ@¯ÀÇÿø/üøçÿëWò/1Þ¼¦¬ŸNz<¹)å)‹ €€W  XßÁwo,Øwd¡p›<~6+)å¯"!aŒ ÞòQ—.¡ÿÖÌæ²ÊJWÒóe·VTTüÌjµð6^­#C™_¾"¤@.%°cÇ×?_ÿF'¾jt½Ó ›•-çh½eïÊ5¨Ì‚~ž™=é_KS^›©ib­ŸÝd!€ Ð,ÆÆ=ͪ¼±íš|ë1y«¡yâÇ"¤$Åz^¥"##K݇X÷‡º¦iú¦­­™r €4_`nÒciËV½öŸ²†Qͯ…#@¸ `˜gÀd`åNª•G÷Zôƒ~W< -•WçÓª£XH!€ P%P®‹Gå Oâ €@KbÌ·#›Wÿ.Ùw›4 €Hžõع%/­{P˜õ?É? æ—4áÜ €Í0\ö茙Sº w=2‰ÞáŇŸ^¼~o…Ø… €@‹ <ýäã]ºrÝRYá3-V)!€tJc`ºn:oµ¦–]âRäç—¨uŸ»ÄQìF@ ùö’“ C"ûß-ùü´ùµp$ €@g0V&Ÿù Ÿõä„(á’ÿÕ^ J?sØ UîôØ«®Í̬½¿%·Ô»ÇärÉçÍ[®%ÛF] €HNNv.YµÚªéÁŸ¦œ@ #+“¢ëV¾´±Ø÷MyêT—Âo¾(ÿ·²ÌêzÊ5;{ì¤i+\Åg“ÔûÇÔ{Ǻ‹}¯M ÿ64½Ö Ü¸YÉWžÊÝà-âÞ³vý¶‘rªâ†îlv»8@ÀOÏœòï¥)릣5´@ = ´«‡‰ß^½<Ù.•œu=ÙÒØÃ'LÝí–Á—Åä,ŠŽ¾r‘0+sˆXkÊéó¾çºoÆâ)N|iBQáéj¤®ÄnŠ÷àˆßr¤@:¦ÀܤǷtÌžÑ+@¶hWØ„í¶]<*-<Ð$Ÿ= ±Ÿ‹3›ìE[¶îˆ^½ú÷É™[7][î9W¡ë¢ÊIÞšhÎ?˜*„Sd$[“ž6q{F†I•s¹Ã¢r×Ä÷k‹‹Æ9@@@ } îć¦<‘ìoÄr»ûûÎÄ5ó†Ùä.nan]…t.·å[¯n2 ·³&﮵‡û*°þ‘)ÚÐÝU·%ÊgÅÌsRGˆãÙYÛ9FÊÝ-~k¤o›H#€ € €@û0V&G¢VëÂÂ=âõ¦Ÿ¯£ÁBMÛ)ƒ(×#32Ey~Âø±÷MJ˜ð£²ö.5Ù¾‹êZ™é}¶+úHÖíªâÁ½Í»|OP8àöOzÏ·æ›O@@ð0V&gAì1gÞˆ.®ŠPßFV¥]âšîݾ|bÎó_È€è’3^||Ã9»Ö¿l5î‘Ê» ‡,Ùºõœºç0ÔRžóòú]Vï‘ÎÊòîÁrc`/ý„7O­#„ä»M@@ð'`¬L¶ðÕe/ìö6TNó®â l™å´ðN•?m®úÚòË#ÙzxÉšq×øÖ\î ²zþCæ,Þ\ë¥Ï!¾…dÚQœî'b¬SŠM@@èì† ÀÔyxÎâáå'm–Ó¼×<¥¦…ï¢íI4ĪŬ·µè…“^±ÕZ¢F·~q}ЯTÀõÐüW†UùÇû©|ß¶}BÏðÑΘ«çâ(-‹PEk–°ëbO‹œ-5Û—›ÈÍÍmñ¾ºmššv¦ ênø@IDATnV‹n×Û‡-cZô·þ¦Dm¯Þö§Á'¾Ì~ß"³‰ëÄ™u‡Êo™E½]Lˆ “¹ÖÈšw˜«¢ÒÙ]íßyÇÝ'Æx]-4/”SÓ/ò>‹tbÿfµî]áï<½R¥.oiH»n‹†ÍÊnÕQ¶úúPðì­9oÝþ·—m®S{¹R´Ž#°lÕk~N×÷s½ãôœž €m#`¬¬êy/|~ßïÓ³&þ>½Öm}ä ¹zŒ¶Z++œúÕ-Éã|[Ø÷Šø¶84iâë þýnxùëü3ñŽ“Ÿ$ªóÌ‹¿òù/ËÄP­¬l|â¾0ù‚æqÆŸ›1­ÿ‹yçÎ<£‚Âð`û>-ngÃ8¶d£© @@Ú€ÑnAôÜcdžQ¯‹4åD.OfË6;+F³u?È-ßûU"osüבÜ÷¶s‰f“³èÁ¡Ýù>s–µ1íNWp×}.·%*ÿìéå*øŠqîIÛ¸ó΋ÚK € € à#`¬09 }å#S2Ï•‹„WæŒ>}ÙöšU›ïKü?iÁ¶ïÄ ½Ü‹|úÐ"ÉuI÷–EWϼ¨"<·œyѽekêå;öËÛ}ʹd9=mCrl"€ € €uŒ€ÉÆ…†_•-Ê þqÜœ5ñ‘ñ™zph^°ÉTzî¼ã—ºí»XÕþü¢È_'Nšt“'JrW„§=Ùmº6ðÕŠ:}kÖ¦ ºäêÓàÒØr VÂN@@èTÆ Àä$ºÕšê뱤ܑ Ê]ür—Ì/®ÉߟU”"7Ô  € € `Xc`rô©Ïüù¿ kWeåùÐÿ˜µêk‘Ô„ƒ(Š € €@ÀX˜Xµx±ç][M±X¶¬)¥)‹ € €F e§ L8+ € € Ð. 7¦Ôæîù<ºäÏoŒ4WÚ{ÔU¬t¹Âú„Û>^¸ìZ3$Ö-Ç6 € € `4Ã`£ç¼2ü›­K²‚*.ô¼&Ì;WGCEÙ‡ € €FÀX˜®krDOð¥^‚Ü«{¯y!!‘ßV¸*B/ˆ™ÅÝÏ™™y!‡ € € ÐŒ€U‹™MeE[¶¾Ýi# € € ÐX£MÂá¹­°«Eÿ¼±  € € Ð^Œ€É÷€U„Dî±9ÂbWÏ=¬½ ÒN@@@ 1½qø½kô‰«‚M&›j¬Å$JO—ߤÒ1¿?5ñ‘ôJS°­v”h}»•ì_°bËvUŽ@@h/ ÀÌêYn³%–ûhû¤ÏÙÊ…ðÝ[µ³¸Ø3 "˜I@@0¾@@°[&}ÿDñ±~#‚B.Ž²êµ³ˆÑgŽ3 b½@ì@@@ƒ 4KÖ4§t©õBå•k¶÷vÕ'e7Ç--¬Ïlõž=ÑÛ·ê1zôô3õ•!@@0š@@°‹0tÝôßV뉄Cì^³©NÁÑ>wÜÒåÈ/âž® ÆîŸøÁ‡[·ÆªcâEÆÔð-f½ç²‹ê#@@0@íù-ß0ÝåiC°ÈØ÷Çæ~´zëççžœô¿W¨ìøí¶!›'øªjªE<¼æ SÖWað@@ .`´¬†+È$އtÛ£2N%é{¬Ñåo¾Mmÿfý™™™šÝšãp‡ ÐuaØ~¨ö² € € €JÀp‹jPÏP{ææ­™×nÚ°n¸~ãý?Q ]úgûÈ®®ŠP!œbÜÂWyâ– £åäõâðÚø>žm¾ € € €0\¦¬"Ã*>õš½9ïþÏÔm‰f•aîâ-QW›zW‹C­Ë„9H­Y@@@# 2ûêl·å[–>1ø_é]˜8-U_?ë7àïŽâcw¨¯ÿ^;æ…êÎyû µ¾å*S±Z³ € € €F0Ö,ˆš¦»¦<³ÈRxtá»9Ÿ‹kÍ=’«œÊ•÷z–WöiG›ðØžóÅ'ℜ1QÜóV‘‘‘i € €   7öö꓃z^7U¶¬Èbé’Ó-Ä´'$üŠtïåêb±ä¨ôyûy| qmtå"Mž[Õ6  € € `Tc€U+m~ù…Õ2©>5‹®ë“Õ†¦iî§²…íý¿C® wæ'&%®)D@@0°€!0^*ðòæ/ù¾zñr¶w›5 € € Ð €=:cæ”n¢&Öòch½Ã‹?½xý^?;ÉB@@ +`¬L×Mç­ÖÔ²Kpåç—¨Ú%б@@0”€±0y›aø¬''D —ü¯öRPêø™ÃV¨r§Ç^umffíýl!€ € €F0V&µÖ­|ic=hï›òÔ©.…ß.|ÿPþoe™Z“tÔs Ù € € €€a 7 }C2o¯^žl—Žœu=ÙP9ö!€ € €FhWØ„í¶]<Š MÒÑvÌrj|žCk;n΄ € €@»0Ü-ˆMy"Ùß,ˆåv÷ ö‰ *â1›ÜÅ­"/ª±“§¿ä*>›ä¬"Bœ{Ò6ìî{¾q³’®<•»ÁjµŠ„„âÞ³vý¶‘ò…ÐÆˆ }K@@ #`¬L΂è°Z6Èã¯Ï0ý|ý jÖÎV«[EP“»("<úõ’òÊŸ”Ømqº.LÞà꾋§8OLÕ„.¢Â#Ó l%‰%vSܸG‘ݬs € €t c`rÄsæè⪽Xß%®éÞíË'æ<ÿ… †ô‹÷_^ÎðYK;õ¹èjÏ|uýN«om53.Ê2³œ&_§ÈH²‡iCß,º>i” ÜBÝaQ¹kâû š¼ã¤ï±¤@@@¯€±0ÙªW—½°ÛÛ¸?|õU—rãPñ!sÜãÊ5MÓ§ÍõîmÙµöÝÁUB8DÝàË÷,w­=ÜWõ¬Hцî®z]™l“yNêq<;kÛ!ÇH¹›Ù}ÑH#€@« <{k‹ÿ1Ê·±W>ÿ©÷nlßlÒ € p† ÀT_IN)ÍÍþð jº–!2ÄÄ Èg­~p¯¦%;kv´DBÞúØÅj 6¹ŽÊ>¾aï›C‚œÎëûY¾ž09ù˜÷ÑG²nWéÁ½Í»¼yj]8àöOzÏ·æ›O@@ð0\vÿüôaå¹yßìÛÊê´zÖÊjýg¥|&Ëì}&ËO±¦g}-‚ÔAwè€E©‹J¼9"Äø±#mÚúε*ÏYYÞ=X®öÒOx˨u„Ð=Çûæ]N:77·Uÿª­Ú65íÌå4ñ’ÇÖÛ‡-c.yìå¨÷¼—Siå:¿ §níë”:±‡ÿÖðÿ“r/K`×>ô{|Gûùê·“d"€u Ôâwƒnz÷‘¿¼¯úÝ+Üž’tgXFF†Y}nŒ}¬…É\$D°˜7õ¾ Ccuš³9jïg}T$*gW,úA¿È¦NqMìC×:džÃÝuÀ»KãûÖ⻡Êç…×Éb@@¸HÀX#`òV@‹œÚ=ÔRšó»ôÝ—¾M6x¢·Ñ'ÅWzïÑ ¬ö“Å®ñ23Ù»ãr×»†Ýrjô>!®,yÁŠmÛ¬X«ª´Xü|ð­>=åú…Üþ§YTÅ«¥ej ¬f ».ö´ÈÙR³}¹‰Öˆ´ë¶iجìVe«¯­ýÌJ}ç­Ûÿö²ÍujWŠëÔ>®­lœÀ²U¯ùýýÐÑ~¾6NƒR €@Ë mÌóCÿº+Lé~»:P«(7-ýº­ï ‡:ñÉbm¼ïyÍ¡WynGt —'[¿ú¶½*ñÞ‘¢Z³$–¼á6•¤·î}}ê$, € € ÐnŒ€yî±øÁGUÑ¢S¬LÛ=µ¥åß^ùôF-úªEBXÄ·ùù©ºã|lˆÅ“ñ\ÿZsndmL»ÓÜuŸËm‰Ê?{z¹ ¾"Bœ{Ò6î¼³¥ÛD} € € бŒ5&m3â´ÂÄzXijâ6Ý^çËjÊ\ótï r ú–}XõI2V/OÖu]aBÍ‚ï–/~vox÷2-oWÜ.IJœ ^ÕÇ¥^¶¡N96@@@:† Àd`£ÚäÐ6¬®ÚZèȸGs«íµëÕ×Ö[T0%k¿d€WÝO›Z¯5ÔŒ € €IÀX˜®k£­ÖJ‹Œä$&9Ò¥{¯Ž„N_@@@ s í0ÏÌ‚Á&Ç1|uÎKB¯@@@ £ +“·VÊ÷|9Ü]$Ïx`JGE§_ € € Ð9Œu ¢ºaÝÿKØòsóM©ÆI ï¾Ëívv»pyL¢o·’ý Vl‘sa° € € €íGÀX˜œY0ÄjMôòÙÚ{aÁï¶w]\\¢’`^Ö € € Ð.Œ€É[{ÌŸÿ«Èé,b`ô™ã™™ b' € € `8c`’çÕÅ‹÷N‰!€ € €- `¬I8Z CT € €UÀp#` jîžÏ£KþüÆHs¥½G]¸J—+¬O¸íã…ËÞØ]wÛ € € €€‘ €žóÊðo¶.Éj­¸Ð3 ‡ça •c € € `$c`º®éV«'ø2›œE½º÷šùm…«"ôšYÜØýü—LÂqA„ € €´c`ÕffSYÑ–­ïD·BZ‰ € €4NÀh“pxn+ìjÑ?o\ó)… € €´c€É÷€UL˜´Çfq«ç6eñv¦¤o?ßK´@À@Á³·ê~²[,ëÊç?å™èÓ¤"@ õ€ ? wí‘>qU°ÉdS]µ˜Dé©âò›TúÃ#æ÷§&>’^i ¶Õ¦3‰¾ÝJö/X±e»*Ç‚ € €´€`æõ,·ÙË}´‚}Òçlå‰Bøî­ÚY\왑ÌÇŠ$ € €_  Ø-“¾¢øX¿á]ºÚÝnw#oѰˆÑgŽ3 ¢ñ¿¹h! € €Ôh–¬iÎQk>ýgè¾åGv?ÿÔs¯f½T»yl!€ € €G öãUèWèZ©NëÀ¹9% € € ЖÀ¼ 1iç½iÖ € € €@Gè-ˆ¾ §JL¿É;p`ã'§OwñÍ÷—¾gп˴Ó+üí#@@0ª€a°sö¸¤””F‚½i²IÐh£¢Ò.@ã ›•ÝÈÉžš×ö½+cxWóè8 èT† À”ºÙd*ºÔoG9[bTt— ÷:ÕU¢³ € € Ð! € ìî³ ¾Å,ˆâÛŠN € € €€?ÃLÂážÉýµ‘<@@@ C&3LC:Äe¥ € € `D€Ç=AWÝPæºèò©h € € ÐR,#N+”Ñv´T¨@@0¨@ÀGÀ êÒ¨féºÎ”Ã’¢ € €(€€ñ2Œž³r¸ùøGYBØEFÆN“¦É{$}–q³’®<•»ÁjµŠ„„âÞ³vý¶‘²œÛ§I@@@ –#`µ8䆮[„'øR;<ñi­Q®ûf,žâ”Á—ŠÉ¢Â#Ò]²T‰Ý7îÁê@@@ >°:2£¾Ù\'¯fSÞrhÎ?˜*„Sd$[“ž6q{F†©\p¹Ã¢r×Ä÷«)K@@¨#@æ¿&·_hùÙ„«"])Áý~ô€Ï.Oò®µ‡ûª1±þ‘)ÚÐÝežLMÓÍWÇŒPém‡#=y|A@@ü€ù 8²ŸÿBŽe‰ek¶?uÆ­wõÙåIFɺ]%÷6ïòÝW8àöOԶí‡ùæ“F@@|Àª5:¼«Ûusç5™FËqQæ¬,ﮊ쥟ðEŒzï6i@@@ÀŸ³ *]7™¬Ö,³©¬èé•ïlTYnspÕ-†r¯Úö]B|7dÚQœZ'ïr6sss/:çåÔçïØ©igüe·X^½}Ø2¦ÅÎᯢzÏë¯p;Èã:µƒ‹$›Èuâ:)zþ´³Ÿ{»þð¡ß Zoÿü–&èƒ ª5!_KôŠ0©xßÔy T$Ú+,xûš5›úm’Ÿ.EÇ~-ß-Ò^˜÷ã÷,‰VØfQÅUZ¬¶½KØu±§½iÖ € € €@}Œ€IGññ]ä:ÏfIÌÛ÷ÇD…UuO¡Y|pðßýãËÂc2ëZýêÛöŠüƒâ½#EV¹,?ž¥üã ·…ËTXÞ"ÃJ­iW7µf5lVv«Ž²Õׇ‚go Èyk:ÞÎ\§öqÁ¸N\'%ÐQ~î-[õšßŸÓõõ¯¥¯~kÿÿ´weL‹ÿ5»¥ ¨:¶#`òúÞ”0ñº¡C‡нV}þ3öÿ·w7ðQT÷ÂÇϾdóFÞ©¤QKS?jéµ±jE°Ê}î½½An€,  5¨ðˆTQ¢Å@ JC ^ ¢$Å@ˆõ^µOÛÈ­½JQTJ"ˆ4¼$!„ÍîÎsÎl&lÖÝdIBÌn~óù$3sæœ33ß³ÙÌÏ™ÙÐT¤È1ìš+oøEæ°ëÕË xØGÔ¥ƒU–…š&»Çš¦ˆ#ï¯W‹óoÛf¤1G@@|è“"ϦßZ%gê§y1û™·"êgf?úÔßäC9<Ÿ¦™êê&g•Å:ÄÐI3NÍz`ÀÓGOÄ*Þgk(3¥·¨£¹2@@@)@X€—Ùå2ž‚ØÜÓ¥²–¬[s›ËSær[+O|µD_ñQÎÒ5ëŠo PÉ € € €€.@X€¶ç¬’›VɈ-&ùÅË›„¦É''Êtõã2É´5-r±‚ €!$p±ïQîóë[| B4*t²X;AeÐå–EÕ € € ”Cƒb" € € Ðq°ŽR € € ”XPLdB@@:.@ÖqCj@@@‚ ЉL € € €@ÇÀ:nH  € € €@P`A1‘ @@è¸XÇ ©@@J€/bЉL € €†ÀðÙ;4cùbÌßY>Ät1ê¥Nºƒ=`Ý¡8@@èô€õˆfæ$@@Ð8ùØ?]Ôž·>¿þž·Ð™tû3 ¬Û7ˆ € €á"@.-Éy € € €@· ëöMÄ"€ € €@¸pX¸´$ç € €@7à^½Ö°Ö}ØŠ € €tšX§QR € € кXë>lE@@:M€{À:’Š@@è>Ãg︨ߛöÎò!|oZ;š›°v Q@@hX{Ô(ƒ € €´C€¬hA@@Ú#@Ö5Ê € € €í kE@@@ö€µG2 € € €@;ÀÚF@@@ =`íQ£  € € аv Q@@hX{Ô(ƒ € €´C€Ìš¦i&?ÉßH 6ß7 ’€ € €=RÀÚ#ÏÚÏIÏ/ý"éË-‹Ö»ΤÛív‘™9^¤ö7OÍY^¸Î7û¤Ù9S•xòeŠø(wéêµï4™„Û7/ë € € €€!@X“Dù†ù§Tðe3› ³¥Z“(?¦ä/3ÜÀRó»f-šî”Á—Ih"1.>ß%ÓN7˜Ó'MyÒ;Ë € € €€¯X“HTB²'G§ô{yCáE^MjL¸l™Úô×/Ï5£É¡‰–Ê=¹B8Eᜳ±yùk²7šëe—;6±s´¶ù¹%ëÖÜæ²Å”¹ÜÖÄÊ_-QÁW|”³tͺâÛ Tæ € € €€?zÀ”Šü2eùe_ÍC }¡ŠŠ¼RdÞMB Ó4M¯êÇe’ik ¼ò°ˆ € €ø óƒL’ ºT§™§ã,˜äA@@/ÀÄÿ@@ºJ€¬«¤Ù € €ôx°ÿ@@ºJ€¬«¤Ù € €ôx°ÿ@@ºJ€§ v•4ûA@ 4íÝP8LŽU°Pm9Ž@‹ ððìû†^„j©@ I€!ˆ¼@@@. ë"hvƒ € €€ñ@@@ºH€¬‹ Ù  € € @Æk@@è"°.‚f7 € € €¯@@@ ‹ÀºšÝ € € €`¼@@@. ë"hvƒ € €€ñ@@@ºH€¬‹ Ù  € € @Æk@@è"°.‚f7 € € €¯@@@ ‹ÀºšÝ € € €`¼@@@. ë"hvƒ € €€ñ@@@ºH€¬‹ Ù  € € @Æk@@è"°vBkšfjgQŠ!€ € €@ »À†Ÿ4;gÊøÌLÍn·»3å<{ê¸íš&p¼@G²#€ € Ð. ÕïšµhºóXyIh"1.>ß%Ëžn0§Oš8òäTCV@@è¡`Á6¼rh©Ü“+„SÎ9›—¿&{Sa¡¹^–w¹cËó2’ƒ­Š| € € Ð3À‚l÷ÛWï»Ì*óH8·Ì”¶½N/f2i–CFªåÿpÜ©§ñ @@ @Æ79é@É-*íºK-[½·U¥Ü²K­;ÜZ¬w:Ë € € €€¯Oòó °~ç¬'§ÇTîÍ9´þŠ›§m=dd•÷aJtÙ’ƒWõ=;oÑÊ’¥FzGæåååZGÊS@@:.ššÚéñ=`Ø.Q>ù5Gã|’XE@@ü ¨Ûš˜‚°4=iþL]¼Í;{ì•C¿»_õNêðòň´;|PT€ € € ,HBmàM館o¨¶{©ÿ[ÁMj=6B;îÎ2 € € à+@æ+`½xØGÔY«, å/78òþzUdþíqÛ%@@К <Ú99ëϱŽÚ¡³³ºOïO=uü«Û•gk(˹xXÛ5@@z²Ø…´¾ü2æ±S~ñ'‹£n¨Q,>ÊYºzíæ‘&“àÉ… s@@ð+@æ—¥õDMÓÔÐMõã2É/cn=7[@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@N?}nÎý÷dvzÅTØ©ßf;išfêÔ“ ãÊh§Ðl\{iuÒ„I?Z¿hRZgž;©)ÄÅj§Î=JjSßæ{!-¼ÀÅh§p}ß³ÏJNÚ8[s|²ÉíJÑ41Ñdî¶K´ž#ëÙW¯«ÚýÆÇqÖ3»×¾²ýG­åûðò–Ã”Ñ ‹ÍrÿZkù{ò¶®n§ù¥_$}¹eÑzwÙt»Ý.23Ç‹Ôþæ©9Ë ×õävhëÜ»ºÔñLš3¥ñXy§2E|”»tõÚwvÆßs[ç.Ûë¾Ü›ít þìë³?“ç´³#çu߲׿z×Öÿ6»S'cÈYýtV;©ªÇ.X5¼v÷[«¶êQ­ÿýXÄû‡kÆøÙ5I*ÐIï…ͻ嚡™¢S:©zÂ5X§¾ò¨Ìèè§å;g<¶ÀÚP“~iBÜ2‡Qi+óQ÷Î\ßFˆÖJéž»©«Ú)*!aÙ“£Sú½¼¡ðŠ¢ ¯&5&\¶L©ÿõ Çs=W?ø3ï’v’CC-•{r…pŠÂ9gcóò×do*,4×ËÃt¹cËó2’ƒ?br*(³élG$ú¼ýT¦*ëUŽÎÍ/È.ÈÏÍ>}ÕÏþY¥í;yîf5gê¸@GÛIÈ¿×wßV>.ÕïÅ‚õ#Öª¿Ÿ¢"ÓÓ«Js:~„Ô`tô½Ð¨‡kCââÌ;ÚN=áš!ˆçµ×­køÀ¼‡œ'¾\âé"rµ΢†»Ô~T²Éä<7X„Å,ª¯`½{þâW¶'ÕZyO‹˜7kÎôƒ•GsÕ ÌbvVO}íO§­¯0êPóÉsŸ[_±w£8õŽsççæoÌVÛú_umáø˜³/Þ1mEÅ¿Oºgx/÷™•îoÊÈ+O¶–åd^žàZöu¯ÞwT¼¿Ñ_¾PKkÍ9ÔÚ©hõê¹E«Ï·À×·ç,Pœ5Çå6l×ó¹»÷R¸´Óí«÷]¦þ^$œ[fJÛ^§«›LšåáÜ‘âðŽ’ÿpÜ)ÓVuïÖþè¦Éû´ª6>øG“³A¯‹±‰²ÿ¼/:Ý4dm­º ¾{Æœ…5•³„Û•¨j±¹Êþsݦ;ä……S­·Z^eÓ¡êÈq“³~9ØQ{2K­_– –-[]4W-SÖN-¦vå/ÞŽº¡*M½_Þu}âwÍyqŸÑ‡vºAô6ò[¢zËd!zʧ§­:w“vÊX¼íZõ·su¿úýǾvÜh«ž6…÷BÕ&ázÍìë-Ú)œ¯Œvê)ïáÆùöøù¨ûzÎ¥_.—(‡©á,ç6Ê{¶ÌꢣVwQÁW|tTQTt|‘Ë-wv–h¥ö$…×jùf]“øR_1úp3Û!—Ûš¸²¬öˆÜGó°À1,xÈ¡_NqY¿Kg¸Í¶C§jÍY3¦ŽÔ½çŒß§‚/Ue[/RÇŽ_*?£‹ó6Í;îÖbš#„ZuÑvònŽ~ï¯V÷ƈ~qÚïôP[§vJ:Pr‹ò¿îRËVïv¨J¹e—Zw¸µXïô^Þ¯EVm˜vJ_16K™ÅSVçCõê× Õyý|æ“s«Ž-´ WuRBÒ2§6[ç° ÍžòïÐÏ»ò†ÍYYF_Ѷè2•v´FÌY>wÌXc»|ϵV-³ŸUÁWB´¥(!)ù õ~¹yç™òÚM—h½¢òî®°m|jîä±9ò²¨=[>P=-OŒëëõ‘FsáµÐ†swi'DZOoUðƒ.¹üýœ¼‚”  _”óðmÿ ‘áÕ Ï&TÞ Õ„ã5Cà–i¹%”ÚÉûÈÃåšÁûœèóÖ÷eùÏ,êqûùö#R±%¢é“Üì{gMŸ.O]†G&m࢜?š³Ëè>óð E¶Ã²³ñ±_û Õò^~—'iO,YU”£’FMºç£h§üÑêñßâµCò¢Ãl¶Û—Q/c«ñ°ŒU*ß©Wº ԬƧÌ^Uú]ópîÛá‰?ìŽùÎYŽÐÀ°|ÏU±Kï¥ü—A–ü¥¾Ce=ÌÚÉÙXß×&í}G;âÝñB‹ð^‡åQËæ=¥> ººŸ{ÆãÏé½z÷ç½™’r²°¯:¿“Ͻùƒ_mž:íÙCj]NsGefjÂaÖ{©Ú*ï)¢~;DÞ„ÈÞ‰ék«Æç½—b*{áàîcŽÅrÃ&µuäÌÇTÿ„';ÆÍn‹žöóg·nIؽñãgþtnFÉ*SÎÝËÞImÜùRù§ò¾/Q±Y~d&ļ¡â ½§NUÆS[ÎÝ¥,âŒÞ %{NÊàø­æ±?~X-Cæ‚×:ô –æ »ëB½þÇüÜ »k†`_!ÔN¾×€aqÍàÓNmu.ødg5”F½ó×þê¢ãФÆ'¼_Ü/=¿j•\ן¸dAÎN¹¨-Ï{9¹ oEJBcuuΟÉû ‚)ïñqŠÅ¹¯=áY–}S— ú±¬æ£V4P]tX̦êß.^~mNNÎÅ‹¥EGDý]¦ ¨ ðÎpYrrh”Å\Wý«å¯¯SùÜ[]Sþ}b0Î!ÕNM bÌFÝó@a´\‰³->§X³ Í)\Û)ʧ95Gã|’BõìÉáê^·Ç~³±¹iõ´;tÿ‚Wô åü4S ¾Öì<“———²"¯(Ŭ?<&J¾;jfÑFyèš~Ήé…UjýµûoúÂ!çý&uOsõÑ jÉ%.Ê÷ÁœœG‡¤VíOQi5¦ÉjÞxx‡<Ö–Ó qmÕG-´L¿µ6œ»G;Y&»jŽém¥z1GM½bΜe±¦¤Ëõÿoì­“÷†…÷*ï…; 6 Çk†`_]¡ÒN¾×€árÍàÛNê:˜©‡O|2[DÀÃÇÍzlºìÊ•㵘ÔÓ¼‚)ïUHÅzzdv5¶è•Š1*ŸË•ø—Ý|ìUF_ü^Ÿè€Çç÷®>®^À߉µmÊ“£ÚÉöþ÷ÿ¨'ì­yêÑ¿[ÚëÀ­é¿Ò/~¼Ëu÷å`œC©¼½ÇÍzrº¹r¯|°@½X³®ä¶ü—½·†Ör¸µ“¥i°ï™ºxÕÖ<Å^9ô+±ûÕæõpXPOwlmz@~R\‘3é«?-›£ßÿ¥òz%k«¼QwCÃ6z>­8%{„OÅôy¤U;|—ü{•{s£­ŽÝ/­¿.í ±P|r÷½;…³~pö”‘¢dX‹Ba¶bø:­îÑN®Ã–Øä?ŠªÏæ½TÔôõ+9êsîœ0qVŒ;"QËj„F óõôPy/,ØõÑ´p¼föõ*íä} N× ¾íDæ+Òƒ×36Õ^b.Î’OAsˆŒ´ëRGÏ^øyöâmW×Êá0ÉÒ(NE¨Þå õË–¬Þ:WÿDÙ¬éA›I… fŽšÃ³ÔEÑÑZkÖѲÿÒor÷Œ•²ˆ?ïùüƒ÷öV’›¯¦®PÊjídØŽ»t¬¹b—|}©§ì¹b^Wc{¸ÍC­´7½#*÷ˆ7TÛe[äíQÿ·‚›TXl„vÜH õ¹ñàŸ@çñù“÷þo¼|øÆ¥ò¡Ù‡?‘zË=gGOœr ÚíLQeÚ*¨^ßtO=šü{ˆ?^£8©&õ>(G„›Ü#&$Œ”Ÿc½´~KšÉ´E>ü#GmúÑh}8¤e¨¼°—Ù<t© á6µåÜ]ÚéÎY?™Þ4zc >̾©!‚úGnæç|ºË{ሉ“N©÷²žvÍà§Iü&u—v2®Ãýš!ˆ~_†á™qù÷õáyN˜—¨ÜÆYÞ?÷Á±j½ñä§±*íš~ 3îš“³Oþ8««§ùÚ*oäkkîºá¦JõéKyCºÊ«.4Ôœš«U•öÍ雟…^›™}eZZZêСC¯P?ÿ:ô_4F'©=î¹ò†_d»þ›õtÿ”¶œC­”¸úŽ‹~ß—¼ÿðÉÍQÍOÙëþÍðÃ­Š‡ÝqDý]¬²,ô~ˆ8òþz…0ÿö¸m1BlƒÛý¥ú¾ÀÅùãЧÉ/ _½èž4µîù Ç)VÈ'þ`ȽµòMIŽ;ô< Qmo«¼ÊÌ䊈•îMbåæjùtEý=Plz/l®(ß?óóéÿ¾¼_³÷-çîÒN¶¤ú‡”k?hxÚPÍÈû(9V‚¦Kþ ßÞ/u¾¡ò^®× Æk®­y¨´“:p¼fðmzÀ|EÂx½0ÝT%t±[=cÒÄŒS¿œþç¿®©žUSqL ³ù]DŸkõ¡{*csŸœûà‰}§ïp~OïYR,m•—Y‚úÀo»¼¿bäÔ¥± §Ò'NȨðäO»,15§ª²ìöôÁ²SFNî^_¼ÿpDDÔÑêÚÓƒ¢—ÈÎÊZÓÐØpÙº{­vuú³é·ªá…-†Ž˜ýÌ[õÇ3³}êo¡úÉp[ΡÖNò¦“f·7Ýa÷>s_ÁäÉŽdÙvÂív&¬Ï޼5(ví$ÿ.ë&g•Åʧª÷‡Y xúè©ãXåEdœ­¡Ì”^ÜâoMµ_¨N‘?™y¿(Ë9¢ž.8#ëž;Î8#âª7ÌÏ<”pz™<§jŒt´ в§Ú·'öKÍÿÜn/9ÿÏÒ$"RÞjù.&ßO<ãîza†¶ÁžùÞaKÉ/³&çGÅ_ú‡ã§OßᨭÎú¿iîÔE{âþ®†ÚÏhxlvòÔ3æÄº#Ç>[#ÛDôŽr”†û…}h´SCê³'ýÏû›âD}tæô{& -öÒ«Ê/Q¯ƒë“-ãä§‚a=…Ê{aQÑê‹ïcápÍì‹+TÚI=¨-¯|ÛÉ÷ÿïvÖÃL`ëú—®wX£v«GŸ¬:¹P]\}·¯užúG¾ylÜqSÿÔ©j(ÅÞŠ¯6ºäã“Õ£êñE•­•?O¥uoÆÜKÓƒ1š³”äŽ0ÅõÉWÇñű¯ ªäcëÝòBã»}ãç©LŽª“?lhp¤×ÖžÎò C±ˆÓµµYކÆô3Gk}ŸÐ\¯Ùå2î7 éO†[sÅvÒ_2>W/Šúú3™‡c¨úq:݃[kÏæ†í¦ áÖN%ëÖÜæ’dW—•'¾Z¢Þ⣜¥kÖßÖM› ]‡µyZjEÄU7ÿ³z=žª­Ïrԟδ™Ý‡î½ýJõ„Bqåã/Ýè’_qºAK?|ø%qVm·Zoú^.­­òª5ÅFX½‡mjúˆy6é¿ÕQ¯´‰©nù€“µŽ¬ŠŠÃUð¥Žå'7ÆÛ¾öy»;®·|¶ŠÏäûäWåU›èß™X°}„WUa¹Ø–s÷h§¤c²‹K‹½¼Ÿúº‚ªzWfõ‰ ù=›š¸6Ù6§[†eyT¨¼z²¾.× ¾çh=TÚ)\¯µ é=H@Ýw%Ψësîj›÷0$ŸÍú“ÀZ+µuϾä¦oHÚé*Ý/!ÜÚÉ89é2‚y¥Ès”ïuþß ‡Öêi­|kå|·û’s¿æMû øží[_¸­·ælصvέ•o­œï6c_rN;ùâÈuÃÇÏ&=ÉÓn5h»| zý¥{öåÿoÛ_þž”F;õ¤Öæ\@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@z¸Àï÷ïÔ4ÍÜã4Í”³hQÚÇ¥ó“ºâÜ~vùuyóSºb_ìk¸œç ÐýÖì<S÷Á¶~½…h¼æsUZZv]§µ ¼ÖÙí …æÚjY÷E Dr4Íš°zÝwÝ)ÛOR/=~kú„ªæó‘Ç1ó¡ùÿ1(ñìç,øíÎæô‹¸pçÿ}jZyåžÜ7Ž×Í»Yå»+”®X½¾¿Vw"¶oŒéìÝí'LƒfžóÍ·}çΘ$$¸TzMÍ?,?þqºÃd29}óíÛýÁLJe¢¶sD¬)m{ç·¥ïYGÂ@ÀçÀ) €ts©›Ž]R»í¡}nW¢÷¡ZÌÕKïìÿýþcW÷Nïвìc·»¬õ»×¾²õGª«•™³r¦‹ÊòÜ–Y\"o‚µwbzaÕ¨¼S¢Ë–¢^mí’ÿ·£33µ(á……›-&“p7› ¼2ïöª¨¯ÉlNkZ°˜Õw]Ÿxã]s^ܧ’FÌ~fJ¯cøË7}Hôµ?¶¾ÂØ6fîÒ±¶Š]õuÌ{rå–¥F:s@À=oxF` ¶ €\Œ¼’ŠgWÁW|´­(:.1_“ûr¹#ÚvP¿ð¿»¾hUŽ_ô»4#øŠŠ(íÛ·ß<³5j·ñIyý%jÇ}¾wVc´UÈô‹?^öî÷£änzG+õ¾FìÔb2ìv—|ÅØ,eýúöÕÜÖÄm»¾úoãÍ®ÆϲKÄÇÅåGGÇ©u—Ûš¸²¬áˆ¶_Dy·dÏݦÎqÿ‰Æ%Fs@Ö‚غ[@:(ÐP¶øH/Y‡Í|îкW_¿Ê+8Ⱦ[ö"õªÝ=¤ƒ»ð[Üåv%äÐ;9‚Τb…N™ê¾þèg±²¦KãêóWäe7Uºt~^AÊáµÇÄœb±ylœêÕë’ž/µ­üõ9j~ãw-‹½»åÌ+¦ìñü³wÈÞ¹A²wîÙóÃ$…ȾoÁ Ãm•ïf¨²ÞÓ5ýÎÍXð|¾gã~mê˜Çí 6`þ¡dôw…(ö̓Lçê'L>ã)åe$ÿhÚææÞ1ïºXF8/@vÞ‚%@NP=E¦=[d­šX÷àë×x_úžÖ?Ÿ£.ðõ‹üy;´¸¯×Oî\mu–µDZµÝ¿—ô³Äô<ï Ad=[p]í',îÆUQ”ÌwßOûÿûO§­h,樚ù V ÿüÀûoÛív‘™9V\?0räC‹_Ù®ÊÓƒyI>ö—ß09«4‹Ù]}íÛÝó}òùÕÜd¶Us«Ù]«æÆôì´©‡ž5VdÐ7Z…üNTCéê‚âB®8ùó³V£ß\¥ç³“ûtÂ˯¾~¥´ÑÄöªÊtöøXå:A­N¶=o¤gÉ{Ââ7?tGTDT½'Í*%?#šï©ŒÌ½ïž{†85K\Æiéê‚¡wÔ™Ò¸±Û{çk§"Ô¶sNm w:Ë €þŒÿQþ·’Š €@Lµ•cTñÃÙÔ÷Úªßä4_*öÕ»Ô;ûH9LQL²Fa_j-?'{Çú ¯_¡–Õä |ª×¾Rü##¸qß4m²ÚVQÓ¨ÏU¯‘  „|jà¯_(Z¡¶©{Å6-šþާ‹'B®‹€ÿ'‹7¼šäŽë“¯ÊÉ0K쭨ߘ™9Z“dñ¤þñìÖëTð¥öýÛµ%£õœ8žÑ»Ñßތ޿aÉæ½Æö£k›Wq¸¢äÀo{~Êß~{×ÿkl??·ˆ3õõ™ gš{¶N5D§Û4õ’óyäÃ9n¸©RÉVsÕ IDATl4½7²Œ ð €ÿX¾‘“@.PÀ›ô–*â=œ­µ*î›ýÈ”‰“¦|”)§®~bN5_ü«ræOþÚGÕ‹–÷|µVÚf3k-zƒ"DŒ¤‚Ñk¤ Úí™ò§LMÞ+æÖ÷«W®öÔú´1ÿ·Ù7?YejĢĤææûÐü––Ažs÷ÆÕ¶›ºGÊQÿ~­Î8žz§Kuç'ã^³’î[ŒÔ+üõ3©©©·^sMê "á²e*]Þ‰¦¾3­Å¤z Íò'âæÑõs˜m‡TÎÙÅ'+½3nÿ±¨÷ýygb@ Y@`B@à¢hÑ}w‹Ú¯ÅájmœÜÁªÖvrç„ÉÏ;˜¢òDZ-»Í¶¿×ŸsÝ,Ü=M¥êéQÛ‚¼ŸŽh”é·ÌåÓÙeµÂè=3òù›Ï”=u2=;#gÍ+Öò?½ërÛUÏ™ ßô‡jø–5mÞRõ¸x›ùÌ¡™‹··x ˆ‘·½Ç-ÝŒ:ÔÜ”xÙqâÀà½_;‘«úðÈ•žãÝ¡¶ßº©ö‹ÅYsÔ²ï”éþ¼éÉ‘*@<.ök?/Ÿ„h’O¡×ϯé{ÆFþèá[˜u@€`iØ€ ÐQsÒÀEåQ눪•Ú“Lò Š}ëTÃþÆn>Ó×Zœ•"„CÎékJË×rñsù¥À -¾ØsÛR½S´èéñ­3˜u÷u7ÔÊ^O¿8#rWé÷’S.PžÍräýIj¤ÿ)#¯<ÙZ–#ùTÈi¦¾¼á|¾ŽqÖùÚ›qݨù›?ñÎqò¨z°¡ß©ÁíôŒö4¶~O¸üõ ½’þ[£(s@ó AÒòž‘Ÿ9 €@`zÀÛ°@ ¢†Î¼B”½ ƒ#›XYöÕ‘ÌÌñÂl¶V”;3QU¬îÝ[ïÿÑ‘±e*€ˆN™H_ö¾YÜ‘‰ÚŽ‘qÞÁݶW× ’ßGÖ¨†=î:|®DÞã&ý͇\nwJãwUiéîlÜ,MC1÷Ÿ°-™8aÂ#2_¢ú5ã‚áæBÝ·Ö€¹?Ù6EÕqY¢Y¿ßO-3!€ü1]à2lA@ hצÝ|èò…ñÂ#C,5É›‡ô§ºDÿ¤ˆ'^ZÿÆõ*€ˆ¼æßnPWõ·HQOÞSÚp7õÞ4ȧÓ–•‹–jIÉO¨u‡[KQߦ¾¸²¿I ‰¼À2P÷Ïïåý^£åðBሪ푦Aþmä1«›¸?)$Ø”ùî|å×STOšú"ë (FV@ G tüÓ¯ÍÇÉ#€ Ð}œý8UÝU¶ð…‘3»â¨¬•{åƒ9\ò‹¬KZ<,¥+öÍ>@@@o]`E^AÊÑ91]q Ëó^N~·ô™¤®Øû@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@0øÿI Îy §IEND®B`‚rocksdb-6.11.4/docs/static/images/delrange/000077500000000000000000000000001370372246700205125ustar00rootroot00000000000000rocksdb-6.11.4/docs/static/images/delrange/delrange_collapsed.png000066400000000000000000000711211370372246700250310ustar00rootroot00000000000000‰PNG  IHDRªñUB[ž IDATxìÝ„^ùÝ7ðÏsgícªaS¡ô!¤R¡Ñ¡”Dn»6„%‘[«±¡lteå©Ô²Ñj%º»+ÑJ([Y»veKV× K™ê;î†.ËØa˜J=Œ†ÑÇçš½¶“ÉÌœ™ëÇù5¯Ã¸f®sï×÷LNÞó=?þ׿þõ¯EK—………8þ|äëÙ³gcbb¢¥=Õ­& ¼ùæ›155ÕÙ?s?µ¨›€}´n#¢=«ºûèèèh\¸p!òÕB NyœÏýÔ>Z§QÑ–•uÜGÿceÛöýââbܺu+&''cvv¶mÝÓŸ–ä? ¹æ«…@ì£umZ)ÐÝGó˜ŸÇ~ º äÿCóXo­ÛÈhOW Žûh«ƒjÞ+ @€4G@PmÎXi) @€¶…€ º-†Y'  @€ ÐAµ9c¥¥ @€Ø‚ê¶f$@€ @€@sÕæŒ•– @€ @`[<µ-zYq'—––b~~>ò5ŸŸ522RI‹ò–ýùLÙ;vÄ®]»:¯U4$Ûm©K;r<¶û3÷ê¶æ~éw%:¿'uø­â߉Õu®ÜG«ü]Ùû¨}tõ~Ú=¾ÙG£óž:ýß§êãÊê}¥ªŸë°fßëÐŽ•ÿžû­jܸ^3ªû dmþC}ôèј˜˜ˆ;wî ¤Ì^ ɺ³ Ù–lSUËùóçkÕŽlÏv_ê¶ú]YÞ#ëò;[‡ß•ûh•¿³+ÛáßóèÓêp\©Ã>šmèßì£ÑùFþïSõqÅ>ú¸@Ý~WüÎ>>>uùÉŒj #‘±™››‹|n•"Ϻ³ ¹d›ªZò¯hujG¶g»/uÜGý®,ÏVÕáw¥¿+÷Ñ*gW¶Ã>j]ý»Ñ=¾ÙG—ÿŸQ·ÿûTù;»z_©êç:ì£Ù÷:´cå¿ç~g«Ú#7®×ŒêÆ>Ö @€ @€@É‚jÉàª#@€ @€Õ}¬%@€ @€’\£Z2ø¶¬nq1bffù+"ÎÎÎÆ±ˆY\ŒÑ[·òö®Ë_ûö-¿nK$&@€ @ + ¨v%¼V oÖôàAÄíÛF§§óÊùNÝšòçÓ§—ÊGöŒG<qäHÄÄDDEñé6Ï+ @€Õ´*¨æÝÜòqÝ»ºå¼ºßOMMU#¼âYQÙ€*ÛÑ­;Mnݺ5´g‡ŽEÄ¿ý-ž¾r%bnnsî9ëša6¿®]‹8v,9òÅ›+aËŸêÞM5_'''·¼ý 6¨C;êò»ÒÝGÓvå÷ƒ²Þl9ݺ‡ý»RÔžº´£nûh•¿³uû]±.ÿu÷Ѫ=ºí°þû9™9BÝËŠþÍÆú•u¯ü~umTf·nûè²RÝ~WüÎþû÷´ß}4ŸI{èСÈ×~—ÿõ¯ýë_ýR‡ísËð•ÏAªòÓu°¨ª ;"âpDü:"öGDþÜÏ’÷RDü1"öSm  @€ºÀèèh\¸p!Ž9»wïÖÕœ;~üx#‘âû’Ýn/-ųóóñÛ¥¥Ø9À¾çlêõˆ¸8:ð—™6MQ @€|)³±Ý Ã7nıcyWšÞ—VúÛeè¦øîÏ^‡,×£^¿£/ÆH~?À%geOFijÄÒo±s1x€ U @€m,g·žîÞf­ ª9£ÚïTól·G_†Ôxó͈G†Òç «cÿýßcc¯¿.¬EY¡ @€zÈ 6ÈÅsT©¹˺w/âüùˆ‡C¾Š´ˆÿøÇˆÏÚnÇaÓg @€uxbFuii).]ºŸþyçBØõ’qžƒ|ñâŘÉçcn°äöy*îzål°©UuÈ;õþêW›¿³o¿ýÉ€zéRÄáÃ{÷ö[ší  @€¨©ÀAõÁƒqåʕرcG¼úê«ë6;/”½~ýzto/½ÞóÜÊYo;ï7@àî݈ü*sÉÇÝäãk._ŽØÑï}…Ël¸º @€جÀcAõáÇñÚk¯ÅÜÜ\á5žÿüç?#g_‹ž•“3©îÀ»ÙáhÐçr6õêÕˆ|-{¹y3âÅͪ–í®> @€% <•3£ÓÓÓ™Ñ÷Þ{/nß¾½©ªïß¿óóó166W¯^- ¶›*Ô‡š#§|ÿå/Õ´7gUs?uúo5þj%@€ 0d§ÎŸ?ß9…7¯9ÝÊòü£3£jÆt+j-úlžò»°PY‡–>ø 楧Ÿ®¬ *Ž@^v°k×®ÎåéA© @€uxj||<Ž9òU;óšÓ©©©¯~^ï›;wîtVåön”´žRKßÏ?jܺUiçþߟÿ§¾ÿý¸ï:ÕJÇa•çY|ðAçla”¯L @€ú }úôFíÝp]7$oø!+ ¨½@ΦæÇ»‹kV»^  @€ÍÈû ]¼x±çÆæÍz{ ªF»Á0Oñ½víZçTß< 8ß¿{÷nçÙ©V³‘ˆ½whÍíúIÝ=+ØZ äÝÄ=úÕ·\³Z«áÑ @€@¡@¿g»f6ì)¨æTl¦Ü\2¨æi¾Ý%ÃêsÏ=×¹kgþg3gFÞzë­xã7ºYó5gdoܸ±æºÍ¼™³¸o¾ùæf>ê3ÔX`õŒj6Õ5«50M#@€¬È{ e¾ëuÉm{ ª»wïŽcÇŽmXïþýû;3©y÷ΜYÍTœ!v½e3e®·m÷}Aµ+á• @€ÕdÐ,Ê‹E-û¢ôº>¯+ûÁ~ÐÙü°ºo_ÄÅ‹îôÛŽ}G/ @€¬#ðDPíΨ®óùÇÞÎ0ûÍo~³óuôèÑÇÖùa›ìÜqáBÄÿüOÄ'Ÿ ¯ÓYÏk¯EdXµ @€ Ðjžn¦ÔjÛº@žü‡?D<óÌpn®4>ñ»ßE>¼õ¶Ù‚ @€Æ ª²š68g:ß{/âäÉÁ†Õ,÷ƈ<ŸŠ; ×”[³ @€´Y@Pmóè–Ý·<=÷õ×#._ŽØ»·¿`™³´?ýérHuºoÙ#©> @€• }zŸÜÚGÆÇÇ#ÃêÄÄDìÛ·¯ó*¸nͰŠO ªU¨«“ @€ÇrFuvvö±÷ñC·ÌÉÉÉÎÌjÖ“'OƱcÇÌ´xHe¸FuH°Š%@€ @ ^†ïÞ½/¾øb'¨NOO׫Zó•€ ú…o @€ØX?üðÃÎ̪°ZÏTë9.ZE€ @€À2¤æiÀ333C®Iñ[pêVÅ|ž @€ÒòšÒ³gÏöTßÔÔT|öÙgñ׿þ5æææÖ,#ÃêÅ‹ãÚµk®Y]S¨š7ÕjÜÕJ€ @€À&vïÞݹžt}â#y䥥¥xðàA\ºt)ò†JyÚïê%ß?~üx<÷Üs«Wù¹"§þV¯Z @€†/°cÇŽØ»wogÆôòå˱sçÎ'*ÍðzõêÕ5CìöF)‚j)Ì*!@€ @ J|vê©S§â‡?üášÍ¸wï^|ñÅk®ófù‚jùæj$@€ @ œ]ÍGÓŒ=Qûüü|Ü¿ÿ‰÷½Q€ Z»Z  @€¨@`Ïž=qàÀ'jÎkY=zôÄûÞ¨F@P­Æ]­ @€T ³ª_ÿú×+¨Y•[T·¢å³ @€4^àСCïCÛ; ¨¶}„õ @€ÇûÙõTë7&ZD€ @€À¦¦¦ž(=ï <::úÄûÞ¨F@P­Æ]­ @€T 077_5ïl©‡€ ZqÐ  @€†$°¸¸~øaœúhÍSƒ[ÑÑwBPmðài: @€½ äõ«Ÿ|òIü×ýWç4à‡öV­†"àÕ¡°*” @€Aä”Ξ=ÛWQ9ƒšÏPÍ×ÕwþÍ€šÏVýÇ?þ¯¿þº+õ%=¸ÕÁY*‰ @€ ìÞ½;Ž;ÖW©¹}Ô|DÍï~÷»¸}ûöc§ûæìêõë×ãë_ÿz\ºtÉ –úÒÌÆNýŒ£R @€¨±ÀÈÈH<÷ÜsñÞ{ïužšÏP]¹dXýãÿØ ±+ß÷}5‚j5îj%@€ @ |~ê… bÿþýOÔž§çÌj†VKµ‚jµþj'@€ @ d±±±øå/9˺zÉëXgggW¿íç’Õ’ÁUG€ @€@õßûÞ÷b||ü‰†dHýôÓOŸxßå ªåz« @€ŒŽŽÆ¾}ûžhIžö›w¶T+ ¨Vë¯v @€*8tèК5ß¹sgÍ÷½Yž€ Zžµš @€¨‘ÀZרfóV?kµFMÞ6MT·ÍPë( @€+Ö»iÒÄÄÄÊù¾AµtU @€ P½@Þáw­e÷îÝk½í½Õ±UE€ @€@=æææâÁƒO4&OÎ-YªT«õW; @€ܼysÍ úo|#öìÙSA‹T¹R@P]©á{ @€Z/0==W®\‰|Íêå™gž‰]»v­~ÛÏ% ª%ƒ«Ž @€ê2¤žã³²%‚êJ ß @€ P‰ÀîÝ»;wÝVåy7ßo}ë[ñío;ÆÆÆÜ4iXÐ*WP¤b @€è] ƒ¤GÃôî×¶-ÝL©m#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´Mà©¶uHhÀÂBŒLMÅ‹‹‹1¾N³g;Ÿ‰ƒ#FG×ù”·  @€Ú$ ¨¶i4õ…@––"<ˆxë­ˆ»wctf&^]\\¿å 'OFŒG'¬ž:±gOÄŽëoc  @€@£ÕFŸÆh˜ÀÜ\Ä¥Kï¿‘ßovÉ ;=½ü•Ûž8qî\ÄØØfKð9 @€ ¸FµAƒ¥©+³¨Ÿ|qôhÄ•+[ ©«;7ËȲ²Ì,ÛB€ Ð*AµUé3j(AòúõÁË•Á7ËVk8ðšD€è]À©¿½ÛÙ’"nHýÙÏ">,úôÖ×çìj–K^ÇêºÕ­Ú‚ PC3ª5M"Ð{÷"ΟNHí"eÎ:².  @€@+ÕV £N¨¡@Îvþô§ý]ºÙn•Y×fÛäs @€= ª=ÓÙuò”ß¼»o™³œYWÖézÕu‡Å  @€@SÕ¦Œ”vh’@>'5#Sfh̺²Î¬ÛB€ ÐhAµÑçñj*ðÖ[åœò»ºûy pÖm!@€h´€ ÚèáÓx5XXˆ¸{·º†eÝÙ  @€@c<ž¦±C§áj*0=13S]ãffbáîÝXœ˜¨® jn½ÀèèhŒŒŒ´¾Ÿ:H€ªT«’W/¶ dP]\¬®w‹‹qíôé¸&DT7Û æ«W¯Æ‘#G¶AOu‘T# ¨Vã®V혚ª¼oã 1[y+4 Í‹Uþ1¦Í°úF€¾pª] @€j%`FµVá1 Ð…œµŸý÷¼½kV›0jÚH€M0£Ú¤ÑÒV¨…Àùóçcbb⫯;wîÔ¢]A€Ú"`Fµ-#© Pš@Ψ®\\³ºRÃ÷ @ 3ªý* @€(`Fu€˜Š"@ "òù¥““•RÌŒŽÆn§©t ÚTùÒÒRÌÏÏG¾Z @€rÕrœÕB`ûìÛ‘!±ªÇwŒŒÄ‹W¯ÆÉ ̘››‹£GF¾Z @€rÕrœÕB`ûdP˜ž®¦Ïãã1zð`Äèh5õ«µ•;vìhe¿tŠÔUÀ5ªuí"ÐT ˆ«Z„ÔªäÕK€˜€ :0J ð•À©Scc_ýXÚ7YgÖm!@€h´€ ÚèáÓx5س'âĉˆ2O—̺²Î¬ÛB€ ÐhAµÑçñj*¡ñܹˆýûËk`Ö•u–ŽËëš @€ÛJ@PÝVí³JÈÓpÿûrN.³® UE€Ø®‚êvyý&P†@Îr^¸±sçðj˲³Ž2go‡×% @€D„ j7 @`xyîÉ“¯¿>œ™ÕœIͲ³§üo•L€(Y@P-\u¶@7¬~ðAÄw¿;˜@™efYY¦ºív)&@€Ú/ ¨¶Œõ@õ+ƒå™3ýÍ®æ,j–1Èà[½ @€¬xjÅ÷¾%@€Àp2d^º´ü¬Ó·ÞЏ{7bf&bqqãzGF"ÆÇ#\Þ6AãTßͬ%@€4X@Pmðài:F dÀÜ»7â7""¦§—¿¦¦ÖîÎÄDľ}Ë_££kÆ» @€­T[5œ:C a¯Õ™ÉÉåƒ÷®]{öD<ÿüòW†V ¨“@÷øöÖ[wï.ÿ6ÿ »ÖòöÛËÇ· ©ùGÙý(âàA]ËÊ{¨©€ ZÓé©Yssï¿qéRD~¿™%üùÙüúøãˆk×"^|1â‡?ŒØ¹s3%ø ž@7 æñéæÍ­ß>ÿ<"¿>ø`9¨ž>-°o¤”L€ ¸Fu œ–ñO>‰8z4âܹÍÄW77˹?âå—#^ziù¯Õ«?ãg P–@—®_øÏÿŒøýï{?¾åÌë‡F;¶|Œ{ø°¬¨‡zT{„«Íf݃x†Ô «ùs¿K–ñî»Ç®Ì~Ûd{Ø^&¯\‰øÙÏz¨«Å2°æ©Ãþ»ZÆÏ¨€ Z»!ÙBƒº!uñ•Õç*Nœˆ¸wo廾'@€ÃÈ@ùóŸ/Ÿ%4èÙÏ•ŒÍZ @ –‚j-‡e“Ê&eHôA|eõŸ}ñ“Ÿ8 x¥‰ï  @`x$ßygù+¿Ö’Œýõ¯‡{ VÛ•K€m ¨6uó¯À¯¼RÎÖÁ¼©{‰v @ yùGØœM]ƒìQÞ€0Ãê0ñ Û«,l#'îú»´´—.]ŠÏ?ÿ<.\¸£££r<|ø0>úè£øÓŸþ‹_T&&&âðáñgÏžØáÙeúõ´2¨yÝNȲ–<˜ç-þúÓ²jTl7üÄï~WÎaÓ6§y|;u*bïÞí¦­¿¨µÀAõÁƒqåÊ•NÀ|õÕW7lüôôt¼òÊ+qûöíÈ€Û]&''cll¬tOž<)¬vaõšÏIÍ[ô—¹äøæ£òY«ž³Z¦¼º °}òùÞùŒÔ2—|<[>Ö-q##eÖ¬.Ø@à±Ssvôµ×^‹¹M<ƒ3?›!5gSwíÚ¿ùÍoâÆñÆoÄøøx§ŒóçÏÇ=7âÙ€¿‡UÝÀ¸‰1ê¡ô7©" oÜ"k  @ - o¼QÎ)¿«Í2 OM­~×Ï P¡ÀS ‘3£³³³ñÞ{ïufG7Óžwß}·óÙ;wÆÕ«WãÈ‘#_Íœ*þÜ0>‘!9gró)ã9 ae @`û äå,UÝ…7O9¾u+âàÁíë¯ç¨™ÀS9ëyýúõ¯®/ÝLûòZÔ›7ovN÷}æ™g ©¹ý¾}ûâÌ™3ñòË/Ç_þò—N`Í÷,ÈYÍ/¾@A=ñàA,Ü¿‹[<ý7ÿ b!@€ë dP-ãJë4`éÞ½˜ÿì³Xzúéu>ámý ä}[ò,D÷oéÏÑÖÛGà©»æ/\ØüeÌÓˆsÆVPí ÷ùšñ­ð@óóqþèѸµÅ›duo´ÕgïmN€mèÎhVØ·ÿ÷ç?Ç©ï?îoñøVa“UÝ0¼Ë|йKÚ®¹*xêìÙ³‘_Ý%OÓ- ª÷ïßùùùÎ×  yJp7¨Þ¹s'~üãw«ðÚÀìl?[÷¿íÒRŒÌÍEÅ­è¿J @€úäY7Uöû¥ÂÎ¥¥øšã[}ö‰–¶dåÍG[ÚEÝ"00'îú»™’=zÔ9í7¯;]ïñ5ù~ÎÖælêffÓò3ýœÚ϶›és->“q®Á)´ñf-@4‚ @€º d6Ë3u{]2KöT7<·Ú¨œu=}úôV7ûêóÃhÓW…×å›üŹ.ÚA€h’@Φ®|²†kV›4zÚºU¼ÒÅ‹·ºÙWŸÏ›õöT3TzÉ ÙOêt{”7\‰‰‰ÇN9ß½{÷p+T: @ B¼lîèÑ£_ÝÛÅ5«†ª‡.ÐïÙ®™ { ªÃèY—|k¯K^Wûæ›-?!ut4b|<¢iô^}½]ÓcÇŽ ºXå @€j)°zF5éšÕZ•F @ ï”ù®×%·í)¨:t(Þ~ûí^ë]s»A—ÖÕ|vi†ÕŠ—ÞÏ6¯¸áª'@€ @`è4û”ú^Z™7Q²T$Pû©Šº®Zh©@ þ»žøÝÒýK·h¤@O3ªO?ýtçüúúè£NPÍ™³ï|ç;w³EÕgHíãÂä¾%ò?Bjߌ @€Uûö-ß0pÕÛ¥ü˜Çµ<¾Y @ 6=Õ¼F5Ÿyš¯wïÞwÞyç±»–å,ë•+W:ï9rdÍSƒk#д†ìØñ£UÇÆ"nš˜ö @€@òôߟü$"se/yÚq•.»¿ê#@€@z ªÙ¯ƒÆ~ô£xôèQüüç?—_~9&'';ˆ9~üxçúÔ<%øÌ™3_=/ªÍhbPó«Ì%ÿãpî\Äž=eÖª.ØNÏ?_þq&gSOŸ®æÀÛilõ•[xâÕœ%ÍÓuó!Äy}ézK~î·¿ýmguΨþþ÷¿ï|å¹Ýw¿ûÝøÃþãùÜOË`ò úË_FÜ»177ز×+-Oõ>q¢š¿t¯×&ï @€@»òÌü£è‹/–w­j>Ó»ì?þ¶kÔô†Cx"¨æ3Ró.e6wíÚµa¥;wîŒË—/lj'"O÷Íí2äæÍ“8Ð »`eï/\ˆøÙÏ">콜Íl™ÿqøýï#òÕB€†)ÁñÏŽxë­áßá>¯‹}õU³©ÃOe @ G'‚jwFu³ååçó4àü:{öìf7ó¹~r¶;ï¾{ÿ~Ä•+Ã;˜ïܹˆ¿¼yV¿Í¶=ØP ÏÊ3¶ò1lׯïø–!5Ëwæ×†Ãa%ªx"¨VÕõö aõ¿XÞðüÌj†Ô×__ÄœÞCËmB€Öèòëݚ¯¨¥@Ï7Sªeo¶c£ò`~éÒr 䩹yð~ï=!u;îSúL€:tÃê™3ùý –ü£ë3Ï,‡_!u¢Ê @€ÀÐÕ¡Ñ–Xp÷4à>ˆxî¹þ®µÉ°ûÓŸFܸ±|07“Zâ@ªŠèþ16ÿpš³ŸcRžâ{ùòòa…Ôǘý@€: 8õ·Ž£ÒK›òàýÝïFLNFܽñÎ;SS³³Å×÷äõ@PŸ}vùN‹ùš~þ3ÐKûmC€ÖÈãQ†Ôï}/âÝw#nÞŒ˜žŽXXXëÓ¿—Ç· ¨yWß¼“°ëQ÷ñj, ¨ÖxpzjZ”sV5ê³³1}íZL¿öZŒDÄhDt4yˆ_ç®^Ñï?"ïò, öÄn#²@ήæ?§NEÌÌÄìõë1õæ›JóؖǸGñ?_¾îž˜ˆ‰¼;~ÎžŽæZ 4I@PmÒhm¥­8¿ù͘ٿ?N~¹]7¬æ»ó®Í=³[Ÿ%@€êò²ûöÅÔÄDÿ2¨f ÍcÜRDÌùzl÷î˜ð|ÔêÆIÍèS@Pí°I›/FÄl“¬­ @`›8 x¥øÔIÀÍ”ê4ÚB€ @€!¨Ú  @€ @ V‚j­†Cc @€ @@Pµ @€ @€@­ÕZ ‡Æ @€ @€€ j @€ @€Z ªµ!@€ @€AÕ>@€ @€µTk5C€ @€‚ª}€ @€j% ¨Öj84† @€Uû @€ÔJ@P­Õph  @€ªö @€¨•€ Z«áÐ @€Tí @€ P+AµVá1 @€ ¨Ú @€ @ V‚j­†Cc @€ @@Pµ @€ @€@­ÕZ ‡Æ @€ @€€ j @€ @€Z ªµ!@€ @€AÕ>@€ @€µTk5C€ @€‚ª}€ @€j% ¨Öj84† @€Uû @€ÔJ@P­Õph  @€ªö @€¨•€ Z«áÐ @€Tí @€ P+AµVá1 @€ ¨Ú @€ @ V‚j­†Cc @€ @@Pµ @€ @€@­ÕZ ‡Æ @€ @€€ j @€ @€Z ªµ!@€ @€AÕ>@€ @€µTk5C€ @€‚ª}€ @€j% ¨Öj84† @€Uû @€ÔJ@P­Õph  @€ªö @€¨•€ Z«áÐ @€Tí @€ P+AµVá1 @€ ¨Ú @€ @ V‚j­†Cc @€ @@Pµ @€ @€@­ÕZ ‡Æ @€ @€ÀS @€¬ÀHD쎈ñˆ]UôLD<;V½ïGþ- ¨þÛÂw @€zX\Œ˜šŠ¸u+ýå/1õeHÍкrYˆˆG ±ëÿþ߈矘˜ˆØ½;b‡èºÒÉ÷Û[@PÝÞã¯÷ @€ý d@½{7âêÕå×ÅÅèΨ®Utg†5·™œŒ¸ys9¤f`=w.bll­M¼G`Û ¸FuÛ ¹ @€ L`n.âå—#Ž‹øðÈ  [Y––">ÿ<âÊ•ˆÿüψwßÈ÷,¶¹€ ºÍwÝ'@€èQ`z:âèш·ÞÚz@]]e†Óû÷#^ziyfõáÃÕŸð3m% ¨n«áÖY @€䩾ÇG|òÉ`g@3 æìêÏ~!¬d¨ÒLר6sÜ´š @ *œIÍ!Íäý{‡°äìêõëË7Wº|9bdõ혆P§" ÔLÀŒjÍDs @€j,³œ¯¼‘au˜K†ÕwÞYþrÍê0¥•]SAµ¦£Y @€5ÈÀøë_Gܾ]NÃòÆL¿üeăåÔ§5Tk4šB€ Pc Œï¿?ØkR‹º›w¾v­Ü:‹Úd=ÕUA€ ÐpœMÍÀ˜Á±ì%ŸµjVµluõU, ¨V<ª'@€h€ÀìlÄ­[Õ44Ãq>ÇB` ªÛh°u• @ GO?ȰZÕ’7oÊkV-¶‰€ÇÓl“ÖM @€>>þ¸ÒëD§¦ââñã1ãQ5} b}7={ölLLLÔ·´LP­]• @€ ÈëS?û¬Ò,.ÆÌ­[1Yi+T>,cÇŽ «èÆ–ëÔ߯† @€”"0?qÿ~)UmTÉÈF+­#Ð2Aµeª; @€ÈÕþsÀ…n½¸C[ßÄ+àÔ߯† @€l'3ªíí7ß|3&'ÿ}b·kV#ÕöîïzF€ Ð" ï9Ü"Åzvejj걆¹f5©¿í~ @€ °J`ÇŽˆÿý¿W½YþG™òëW#2Õ2µÕE€ Ð<]»"öîm^»µ˜@ƒœúÛàÁÓt @€rFõ[ß*¡¢õ«X‰ñC‡â˜ç¨®Ô5‹‹‹qçÎÈWËú‚êú6Ö @€X8r$âÚµˆŠÂÅÈÄD\¸q#BPmü9;;‘¯–õœú»¾5 @€–ö틯N#ƒ²Z¿šKTK'W! @€@ãFG#¬¦Ùcc‡WS·Z T$ ¨V¯Z @€† ¼øb5³ªÏ?±gOð4—@‚j~¶&@€Ø.yêï™3ys¥²–*ê,«oê!°€ ºŽU @€øá#Nœxì­¡ý°sgÄ/~QÍ,îÐ:¥`›T7çäS @€"ºá1o®4Ì%gm3¤–ЇÙeèA@PíÍ& @€ÛX Oǽ~=bXa5ïî{êTÄ /”{šñ6R]¯Ÿ€ Z¿1Ñ" @€º dHͰúÌ3ƒ “9c{ùòòW~o!°MÕm:ðºM€ Ч@†Õ÷Þ[¾ÁR>B¦Ÿ%Oõýîw—ËËÙTÏLíGÓ¶-T[0ˆº@€ P‘@Îz^ºñßÿñÓŸFl5°f ÍÀ›³¨|0øÚŠXTK _§ú-Àö @€¶µ@ΆîÝ»6óY«÷îEüéOýkÄ£G ‹‹Ë§ïÚµü::qð`Ä‘#ËA5¶ ð•€ ú…o @€ô!Ð ¬Zó16óóKK33Ëaõé§#¾ýíˆ|Í™Tá´l›¶]@Pmûë @€@ùZ»§ïÞ]~ýj$Ðpר6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€yx1N IDAT @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† ª @Í'@€ @€@ÛÕ¶¨þ @€ @ á‚jÃPó  @€ Ð6Aµm#ª? @€h¸€ ÚðÔ| @€´M@PmÛˆê @€. ¨6|5Ÿ @€mTÛ6¢úC€ @€† <ÕkûãâÅ‹133³a£££qáÂ…ÈW  @€(è9¨.,,Äõë×cvvvÃ:vïÞ¯¾úꆟ±’ @€tzªÿüç?cii)FFFâСC×n¡+_s&5?c!@€ @€›è9¨Þ¿?æççcll,®^½9sj!@€ @€ý ô|3¥üãU3¦ýí  @€ @`¥@ÏAõÎ;rÆÇÇÝ(i¥¨ï  @€ @ /žOýÍ»þæ’ן敦§§;¯ù^βîÛ·O€íkhlL€ @€í)ÐSPÍ`Ú},Íßÿþ÷ÎÍ”òç•á5gZÏ;'Nœˆ;vêæ¶Yn¯K?ÛöZ§í @€ @àqÌfEO‡y|‹ÇʉϞ‚êÊPyëÖ­ÎÌéÄÄDç5555Õ™a}饗"ï|òäɰš§Ÿ>}úñná§nHÞÂ&>J€ @€8þ|\¼x±çRóf½=Õ £Ý`˜§ø^»v­sªožœïß½{·óìÔ<8yàÀØ»wï† ÍíúIÝn% @€”"ÐïÙ®™ { ª9›)7— ªyšowɰúÜsÏÅ®]»âèÑ£177o½õV¼ñÆݬùš3²7nÜXsÝfÞÌYÜ7ß|s3õ @€’ÀÙ³g#ó]¯KnÛSPÍg¦;vlÃz÷ïßß™Iýàƒ:§g*λ޲™2×Û¶û¾ Ú•ðJ€ @€j2håÅ¢–õüxš¢‚óJ?øÁ:Ë-õ;ý[TŸõ @€ СÕäÉS„- @€ @`+=Õ<ÅöøñãñÚk¯ÅÒÒÒºõå|sÉkX…Öu™¬ @€ @€=ÕÜ~rr2®\¹ccc;úæ¬j†Ò•Kþœ¥É›Ÿ;uêÔÊÕ¾'@€ @€ë ôt×ß={öĹsç:_ï¿ÿ~箾y—ß¼»S>&&gR3¤~ík_‹ .D~ÞB€ @€6#ÐSPÍ;ú¾ð ò/_¾Ü ¥÷ïß·ß~»ó^®ÏpúÊ+¯Ä‰'"¶ @€ @€ÍôT³à;wÆ™3gâùçŸO?ý4>þøã˜|j>–æÀÓ~7ÓŸ!@€ @€]žƒj3¥ßüæ7;_Gí–é• @€ô,ÐÓÍ”z®Í† @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊTËõV @€ª@V @€ @€@¹‚j¹Þj#@€ @€AµÈj @€(W@P-×[m @€ P ¨YM€ @€å ªåz« @€ Õ «  @€ @ \Aµ\oµ @€ @€@€ Zd5 @€”+ ¨–ë­6 @€(T €¬&@€ @€rÕr½ÕF€ @€‚jÕ @€ P®€ Z®·Ú @€ @ @@P-²š @€ÊxªÜêÔF€$°¸#ssqdi)F"bbUÓg#b*"v/,D,.FŒä§, @`Ç•u`ž|[P}ÒÄ; °Ýææ"nÞì|>xWçç×9›kΟ˜œŒ8r$âðáˆ={"vìXw+ @`› 8®lyÀÕ-“Ù€Z+ððaÄ»ïF\»ñàAÄÒÒæºš3ªwï.Eœ;qâDD~o!@€í+à¸ÒóØ»Fµg: @€@«ff"^z)âå—#îßß|H]5Ï zôhÄ'Ÿô^ÎêrýL€Íp\ék¼Õ¾ølL€ÈYÓ ”Ç/Ϧnvu£ŽwḚ̈zýº°º‘•uh›@÷à¸Ò×È:õ·/> @€@ãîÝ[>M÷³Ïß•œ]ýÙÏ–Ë=yÒu«ƒV"ê'à¸211£:F… @€@#ò´¬Ÿü$b!µ ’×'eX½}»ûŽWh«€ãÊÀFVP¥‚ @ Q ýëˆééá7;ëz啈üŒ…Ú)à¸2ÐqíûÔ߇ÆG}úÓŸb1Ÿ!—Ï™›˜ˆÃ‡Çž={b‡ÛótÀF€È»û¾ÿþ€ ÛD1ˆ¯\‰¸|Ù)À›àò4NÀqe CÖWPžžŽW^y%nß¾K+n>199cccqáÂ…8yò¤°:Ð!Sô-׎æ#hV»ú.s3ä³Y_|1bïÞÍ|Úg @ )Ž+©žOýÍ™Ô ©9›ºk×®øÍo~7n܈7Þx#ÆÇÇcnn.Ο?÷òbb ¨“@Æ|NjÙKUÿ‘)»Ÿê#@€Àvp\øˆ÷<£úî»ïvfRwîÜW¯^#GŽ|5szðàÁ8~üxÌÌÌį~õ«ÈÖ‘‘‘7^ @`Ëy™Jþ‡¢ìÙÔnC?ú(b~>bl¬ûŽWh²€ãÊPF¯§ š×¢Þ¼y³sºï3Ï<óXHÍVîÛ·/Μ9/¿ürüå/éÖ|ÏB€*øâ‹jfS»ÿâ‹XøóŸcqb¢ûŽ×zØÊ&‚@ Ž+³„ZöО‚ê_|¾yòĆý2îg5*±I•èM-¦Õ“c* ã©äÜ(ùµMD@D A LL$þ`ÔœÌÜ0¿¨[ââHJxP¯†C0VÖ“h|³{8Ïø’e9ªÅ8ž¥æ5½®»wï.õ´ÌñÕ)sqý tcpV¯ý;FHº”ˆ€ˆ@ñ¦¦ŒuVÛÛ‹?§ŠG²úË¡C‡Ê¾“õ–å¨âTÆp4+ñºã–§Q®·víZ»lP®çÉ×+žëmßPߥÓéŒXížkôCD@šˆ@¥Ñ®ø†e9ªÕ`\È¡*tω‰ sìØ1{ß,‰£TÏ>û¬¸u”ˆ€4ôÄ„Iz¾Ý)cŒæ¬W™¦§§Ÿ«û]ït5XHÀ‡zebfÆ#:5ÁÐßp§# ‡øwå&Î-ËQíéé1q;7´| ”û,†óþùgCo/N«’ˆ€ˆ€D Ú,¿êŒºbéÛpR5>µtnùÎÀ@R#u>BÚ'"P->Ô+÷ææÌ9fþM8­‰¯ˆ_W‰£Êc”å¨"€o Ÿ~ú©!Úu5ÓeŒãÊ7û“ òE*qœx§@Iå#2 ‹/r •¾H•¼ŽG’rø¦£ðLòõå]ñEßt´âw…ÉnÐlïè0¬/î‹nø"‡ÓQÕ+Ì¥"Û'\';U½2OŽ+—£aÈeüöAŽð»R1Õ+-`¤ƒæœp7s`1?‚2ÒW_}¤R© µµ5˜œœŒ¼Âï¿ÿtvvƘ NGSíÓÓÓA{{»•a||¼Ú·Ëy}_ä€ùdJ* x%ò$™$åðMGÑÓ$ßY_Þ_äðMG+~W¨·Z[ƒÀ˜d>©TŒUTìø¢¾ÈátÔ—ú­b­@;|)Ï}‘Ãé¨ê•y¥rïJ’:Š$>ÈÖÑŠy¨^© ÔÊ}ê?‹qf³Y±b…]#•žK–Ÿ‰J³³³v­ÕT*eÖ¯_uˆ¶‰€ˆ€ˆ@í ttSAofÅ3ÉOãv*¾¿. " "/Õ+ñòüûje9ªK–,1«V­²—\'õòåËÖQ¥+}åÊ•U^’ 0|eÓ¦’O‹íœTÍHN]HD@' z¥*YP–£ÊÖ<åûúõëfttÔ<{ö,Öñ·ôIDAT# ½¬'Nœ°Û6mÚ”ØØÐŒ@ú!" " a6ÓÖÞR›ß3ƒƒÆ¤Rµ¹Ÿî"" "PªWbç\–£ŠÝÝÝfppÐÈÖÑXßÙ:­W²uŇÿ+vT}xßeÀ!cR©¤/!bÒÉ—ŠÂ9’Îî/]˜ ¾¼+¾È±N2ÿUMGq"q IÕpVéIÅIÝ¿?6'Q}Ñ _ä˜ÏÀäÿúP¯Tí])¯/rHGfœ:ŠD>ÈQ5­Óze¡¦øñ_Ù“)ù!¾¤ 8£‚ñªq®¯JØ‘#Æ>ÌX™ …Ôé" " uC@õJ,Y%G5Œºˆˆ€ˆ@]À¨èí5f|ܘááÊ–®a š¾>c.\ˆ5Ü·®ùJxh6ªW*Îq9ª#ÔD@D@†3Õ?nÌ•+óëÒ¥ÅÍÒ‹AòïÏÏ$|îœ1|ººb ÷mÆzh&ªWÊÎmQ-NhH8+VÌ;¬ûösçŽ1/sïž133ÆLMÍ?6ÆÓуºq#ƒ®<*ç´!•B%" P½R<9ªeaÓI" "  OÂq¦|º»çwnnÞYå?ç¤6<= ˆ€ˆ@,T¯”„QŽjI¸t°ˆ€ˆ@S ÷TKZ5µ èáE@D VªWrâlhG5•J™6ZÂÿ^~#'í °ŽÓçó­$>Žú˜+’)LÀé(u>u¿’øF€¥P¨ë¥£¾åŒäq|ÔÑA8íûÙ³gæádžo*12@I|#033cæææ¬~ÊYõ-w$¤£Òß 8ÅI]¼x±œUß3¬ 壞GO¥£M˜ùuòÈ>êhC;ªu¢SD@D@D@D@D@D D@ËÓ„`览€ˆ€ˆ€ˆ€ˆ€ˆ@òä¨&Ÿ’@D@D@D@D@D@D D@Žj†~Š€ˆ€ˆ€ˆ€ˆ€ˆ€$O@Žjòy D@D@D@D@D@D@B䨆`览€ˆ€ˆ€ˆ€ˆ€ˆ@òä¨&Ÿ’@D@D@D@D@D@D D@Žj†~Š€ˆ€ˆ€ˆ€ˆ€ˆ€$O@Žjòy D@D@D@D@D@D@B䨆`览€ˆ€ˆ€ˆ€ˆ€ˆ@òä¨&Ÿ’@D@D@D@D@D@D D@Žj†~Š€ˆ€ˆ€ˆ€ˆ€ˆ€$Oà_É‹¿Ïž=37oÞ4.\0÷îݳ7hoo7›6m2k×®5‹-Šÿ¦º¢I`jjÊ:tÈÌÍÍå=]M§ÓyÑNˆ“Àõë×Í©S§¬Þ¡ù’ÊÙ|t´¯ZŠÕÑsçÎ>…el!]/t íonQe!D°;ûûûͺuëL*•Ê ›à›o¾1ölÔ7šõë×›¶¶¶œçi‡”B [Ï8×éZoo¯iiiyîr^Ø«Aƒ¥Gét:hii Œ1 >‹- vîÜpŒ’$E`|||^fë©û`` )uß&$ðøñã ¯¯Ïê&:š/©œÍGGûªE ¥ütei¾ïBº^­gÑuƒ@¾²½ÃÅ&²;Ÿ>}ŒŽŽÏéj*• ººº‚ÉÉÉÆ¥§HŒ@>=CGѵÞÞÞH]óÁ^m¨UZµ>ùäsâÄ ó /˜¡¡!Û‹JÏÕØØ˜¹zõª9sæŒY¾|¹Ù³gOޮ皴Ab"àzRimÍ×’Ÿo_L¢è2"` PvŽŽŽz« %•³…i5”ª£Ož<±bPŽRÖæJùöå:GÛEèä¬ÍÉÿôJmݺÕöRÍGTÑ}ؤ¤Ã‡/°;¿ûî;³wï^óðáCÓÕÕeÞxã {î7ÌçŸnnß¾mÞyçk¿FõvÙ‹ê €ïƒnÍÎÎšŽŽ³k×.[&b‹^ºtÉ\¾|Ù~fff¬Î†{ñ½°Wsñ«pã~ø!hkk³­{öì h}u‰Ö¬mÛ¶ÙV+ŽáX%H‚ÀÐÐÕCõ˜&A_÷t(i­§ÅtxxxAJ¾^&•³Ž ¾«M \½ÿ~°lÙ2[ÎæÓåj˯ë76WÒ+…}™ÝkšÏî GÐsz÷îÝ ,zÀ>ú裀(@z»Nž<™Ù§"P °žÑsŸÝC_H×|°Wj2%Z „ü®^½Úñœ£$µ$@ˆÏ?þho©ÐÞZ’×½² `¡nJ"P 4¤0þsé#‹/¶Ç¹É’øçûï¿7„Z²°ßìDYL#"éÎ;‡CIJ%. ©ï£ºæô—ô’䋽Ú0cTÃ@™)-j†5¶¹V+—yáXì¨ Ô6ˆ“†þ_ýeõ}üí·ßÌää¤qã©(,ørâ”I×jN4”„KSÅÿn¦ô(**g£¨h[µ”££ÈByJYûâ‹/Úò§ŒÄü®1;ÊN¨Ö³èºG=b zš«ÎF]ýŽ¢rN.(K–,‰„Ãñ\ו»Ë–-‹¬§FÅtöìÙçÆøð ’¡y ¨œmÞ¼¯§'wQ”§¿þú«RÁÃØ¿uë–da&Væ 8r䈜ÕzÊÜ:‘{ãß¾}¶¾ïëëËD°°OŽgddƒ‹‰“úî»ïZÛ“Ó;vdžØ{µaƨºPÞ aý 8ŠVýíÛ·ÛqÕ_~ù¥7W®\±Ó×3¡j®‚óð1$R“P9Û¤_GM# ë=¥,=~ü¸!Ì’2–²–rvÛ¶mö‰h d%ˆ“Æ? !ýýý¶.':êСC™JÊç=u-(…å$"è(å Q~´Ëwºëøb¯6Lª«oð™a½Lô 2–:}ÚlÙ²%̺oýÀø§Þ~ï½÷2¡”L”„Ži(Oýço#<½ùDš°¦ï¹sçlo?¶'N*'áqû¾Ø« ã¨RùP)Ñ’¥$¾`¼@¾D!ñꫯÚB]¦gUŽj>bÚWK*gkI[÷*‡@öXÀ¨kÐ@¸uëV»È=½|ܸ¬¨ãµM pQP8ª„õ2I%Œ…Ξ´[•²ôîÝ»….«ý"ÙÙYsàÀóÅ_X_É…ú¢§èkØI妾ث úËì~ÙcË]]HjH€)4HnRÞ^·œTÎæD£uF€.œS„݌Àuö×ôP>ÎR2„RÒBd!æÃÃÃÏ9©ˆŒ­JT•’ÔŠ )4Î’ŽÃʘiæIah&åúOµ°W¦GÈQ$W¥Îð§‚rS0G§m"*0Æ (‰€oTÎú–#’§\/¿ü²œ…ráé¼ z§˜YúÏ?ÿ4½½½¶ÇjÕªU gº kÜÿ™ ‡f]eŸzüÃdô»4ÄÖ‹³ÊLÒÌÂúéQ:WÊu9¶öjÃô¨ºP Àݸq#r`Z¾~ùå›®5µÔLÑñ"P.*¤Ý»wÛ–Wƒr¥ðD …B/r]CÛE TÎVƒª®'£éÝzýõ×órƒá`¼)‰@©p¥ÄIÅ3ŒI-¦wª§§ÇÞŽñ‚,_•ètaÜk¸×*ê8m\Ð&òÂIÅïaŽ”¡¡¡‚NªOöjÃ8ª 6l°ÓÏóòg'¶±c7nܘ½[ÿ‹@U ¼ôÒK6ÄŒìGµPÔ Ãë®i†(BÚ–•³I‘×}‹%@9KCà™3gÌùóç#O£ÑúêÕ«¶A›%Âò­½ymcl¨/Î*Cu˜Œ&<9b!@ôº2F†i&õÊN88$œ_ •D Tè'!éôžâ°kSúd¯6Lè/™Ç õS§NZJßÿ};³Ÿ+8ˆÉfÔÍŒ«J"PKè(=þRL N+¬ ¿ÀxʵîZ-åÔ½D •³ùèh_Òèe¶JŒ3Æc­^½Ú„C1qFGGí¸B^vîÜ©°Ê¤3­ï]¼xÑJÎP›7o| "¤\£Î-eéÈȈÕSf§vN¶:Š€}088XT/mAt@Ó@‡è%rdzzÚÎô›ú‰žze¯ –FGGƒ––– •J½½½ÁÙ³gí‡ßlcÇ(‰@=zdõÒ,Z´(èîîÒétpôèÑ ¿¿?hkk Ø×ÙÙLNN&!¢îÙ¤¦§§ƒööv«ãããy)¨œÍ‹G;«D X½uëV¦,¥L¥l¥Œ¥¬¥Ì¥ìÅرcG@™¬$¥ë"uv1Ÿìrõ§Ÿ~²u=çvttXå˜ááጻsçÎàñãÇ¥Š§ãEÀ(J7þr¼K¾Ø«Æ Ô(ßOŸ> >þøãL%åàóM…Å>ŽQ¤P9 ­­­Ï P}}}rR“Êœ&¾ïýû÷ƒ®®.ë¬~ýõ×yI¨œÍ‹G;«D XE?/]º¬Y³Æ:¥a;ÀÙ8­rR«”QMpÙ°.ÒÀWÌ'ª\¥Ašr—†“°žb à¤JG›@™ªøˆ»ví*J7þr|8ù`¯þòu×ã>Â&Jåµk×ì#0p🨵‚êñ%s} lˆ± pGGùŸp B\ØE}?¡¤¯7”›Œ—⻘YÑUÎÖ[׿¼¥ê(!o”±|˜d‰p6Ùõ¯> € € € € €%@`» E@@@@ °Íg@@@@@  lÔᢱ € € € € €¶ù € € € € €”í‚:\4@@@@@À l×××›þQ@ é|‡šnÇ–¥#À÷¤tŽ5=mºß“¦Û±% € €”Ž@Y==•ÎѦ§ € Púß³²²²‚h+D@@ZB Ü¿Óoo¶ƒ¹í7á9é´oWaý{w¶š}uöö{ÛÒÙ„u(9.ª¬W÷Clçî[·aGÉõŸ#Ž@ÏîìÐNíÒY•u@@@’ˆ l_wó“¶·fIƒÐyš"0æ¨Ãíöoi6ï´¯|TÁ6½ÀÙ§ißøòÉöÊŠ öíÛþZôý¥ƒ4E຋O´óÎÙ”MÙhQ+n¯iÑý³s@À/pÏumý/yŽE*Àä‘Ez`é € € € € P¬¶‹õÈÒ/@@@@@ Hlé¥[ € € € € €@± Ø.Ö#K¿@@@@@" °]¤–n!€ € € € €Å*@`»X,ýB@@@@ŠT€Àv‘Xº… € € € €«íb=²ô @@@@(RÛEz`é € € € € P¬¶‹õÈÒ/@@@@@ Hlé¥[ € € € € €@± ”kÇè € € €¥,pý´ Þ'ñx¶Ú:³w×ÛšMmÁò¶úƒƒ¥ÌEß@ L€Àv0š‹ € € €@:‡v(KºZE¹Y÷NeÖ½Sk;aXk{mÍA»svmÒmx@| °/G‚v € € € %Í×ÛÆêcj?ô2ëÝ5ü5 •]sVÁí%^ €ä«@VÛ¼õëÝ£c£û\³¯Î¶nßc;vרËË>°ÿ÷ÈË®#ß7xá¡«âšxûÿ°GžZ·¼¥üøë“ì¤1ýbšñþ¦vÁõŒYÆ‹ü8ïŒvÝÅŸÊXc áX‡k^X²ÖnüéÓMrà3ß$¶‚ÜhÔ°ž6õÔá6¨oëÚ¥½u¬nÓ}þ÷ÕÖÙÒUíO¾n›¶îŠyŸ € €@¡ (¨6{ðá­ìŠ3ÚXçꆷ‚ÛZFZ’B;´(=¬¶›Êض²Ü ˆ÷¶ŽvÔ Ãì´ñí–_?g¯­ÚØÔ*Ù@ˆ@®ìû×~Æû‰, yâ.Îì{¨}î”a¶xùûvÛo_$ÀbÅ"@(l°ïyj¿ÝpnE¤#“Ž-·Õ’$Â@¼H<‹D4W…Ÿ~sŠid@æ|fü@»÷æi)ƒÚÁ}被î^ùÅ÷>o ŒS@@bPp[©J\éÙ%šžÄ-ã@|È«Ûa8 (Ütå)¤¿Ãa €@ÚWi|\Ê‘´76³î]ÚÛm3>ÇïQcÐX@ F@©J4‘dª2nHk;~hkSð»Su™iJW_³é =ñbmÛ ”»÷ÝãO.¯ôRŸ¼¿µÞ¾ÿû}ÞâÏÓÚŽÚÚzwm©Sõ-zë€ýyaÛ4á£&Ê<ûÄrУUZýðï;X©R±œ{R¹×F—¢¥¶ÎìãÝõ¶ü½ƒö‡ùûƒ›ð@ |?AÙÝ{ª¼·÷Å3¶ÓNxÐÈmåÎÇ<ÔU;é»» ë#Ð,>sÍâcã"Pþt¦ƒE¹´ÿ¶ð{òÙU‘4#ºKèÔx)HtqÕ_ô{ôŸ—~Ê~vÿ?ü‹yŽ €¼€&’LU~tIeÒ ±ãÝ;µ¶cµ¶ûÿºß^~ë@h•.X¬ ¸ÒWNi㤃+«¾3+·#û¶²[NœEÁö ?]nÕmS÷ÁíÃwËôxý´ Þ'þæv­¯öœÚ©µØÊy¾.aÿüõñ@ {ñgëìí+iÍš˜K‚·Î3M",cŽ:<¸ˆ× €¤%0rh¸õtÁU“áj¢bÿäš×A¿Gÿög†þ<¶\],@@ ] Su´u,Qíѽ}w½­\Ð^[ÓðO# ]QXæT¥M¹Ù7§WD‚ÚªSõ©ÞÝ5Ñú4 ûòImB«S`ÜÔÖÈj×.õC¯]Ñs×^–›Î j»öhõÏÕ¥À¼ö©}S@ZN õ/MŽÛ¦€Âî0MØå/ýïìÉs@ÒèXÝ6nÝ%o|·Ì¿@Áî¿<·ÊÎ9}„±7ò[wùƒá1+ð@(0tÝhgo,ú®ÛÒØ}gãA›ýR)/w°häô•“ЪïóãË“¦q#·µÏÞ8—âã»_ª´Þ]‚Ç~ïÓñ)@¾8±M¤í DÿÏÌÚ˜4( >+x®})ྷ¶>a=  «¨=ÿc¿ý}i¬ƒR”\qF¯.õO#Í“$úð@ ³yØV÷6nÙØÞç.&é¿ WLgG ênº]Ü•»klëG{lé¿Fá¹åé<ê–ôÿ¸ðx¯>Q}º}ý½d Ä«üñÖ bö©ea) ^x誸ÝÞþÀ?âÒ¬$«/Q?Õ¦7ÞÞl÷ÌŒ}·Ã,,¸mÆ™6vDü¨úÍí±ÿøÞŸ€tKÿ1ÃzÚá‡u4wË¿Fìoݾ'i_þÏyãìÒ/דïþüoö÷…ïÄ-÷/kë;ë¶Ù%7>ê_ç!Jç IôüÅÿù ;žú ¬Ûð±Ý÷ø’È÷Å¿}ØódŸñ—^_Ÿi Â>GêKªÏ¼Ò*)å’ÎQíôýÝ´uwB'™Š[æ_ €ÚçN ŽúßSðHÿ4 \¥?ýu…wë¹°ç‰W^}ƒÚÚ]ß™j©r‡‡ÕÛœeÿõ§Ú)ãú‡öSÈëŸÞ¿å×óSx›Óÿ¶²olPû3ãÚõ—Øsu*ÀíïËâåïÛ?}Ú½í=*uÀSŽŽs8ëÓÃSö{p¿Ø»TáóKÞ‹©ŸP ëÒicB§òëŸ>#Í[ž2(­º®:ÿø¸c«¹Ï…RAÜõû…kd×nÊg^A7M<¨>„×79½ñö‡öÝ;þsAH£ƒu&x7Ëè#{…U³lÜÈø‹NËW³N)¾Ð9dÇ®½qO¿….^†]¤ô¯|žß£dß ÷1ø0o„y°=y­ÏéWžzN×¹YŸ?ýÓw0ÙEËÆì3ë&úmtmV ]¿ÿW}ç owZÒIƒãvíÿ}Oç|W @ÈcQZÙ=×ÅßÅæš¬³ºéLÔè¶ {ÜõIti:y»_yû@Â\Õ $kI—¥{'ŸcGQ»QßÚk0èíZ¢>)W·Š«Ë½§GøÝˆu¥@If 6)-‰»pÒˆÖI×÷ï‡ç €™È»À¶žÁÀº<뙕 {~÷ζ£–ð}ÿú£U#};´¯H`K·N^Õº¹(é¶IÈßtåDÛºý“´GÈ6µý ¶oÓW]%§ÀcØ­ú ú„mÖõÅ»€„[O¹`@=,híÖ×£>_ú ø‹Úª@9¥i ¥ûýÓqßµ§6¡wºŸ òôß·ß—0¯iÍoôVMùÌës§öêóœN‘ç/¾÷ù¸À¡.ÀϲÐÈSw÷HXýûÄ_Ì™ýlâsjX…¾L£šeå/: x~ñÌcìռߙdŽþm=O÷<­íÓý=J÷{¡þè;¦ M)åÞç.èV—ÖÑgtÚ5¿ {;§ËtüÒi³¾WñݹcUZç,YjÔx1L\ÓÂÎ@ R@Ú;gŦðÈDG4Ê:UYûa|Jÿ6 4w·†QÙÙÊiíY¾~K4··¿þçÛvFÛþmýëð@ ûiüÌd¿Úƒ3§ž0ÀuÜ£FE' 6hÔUºA5½Éþ`Õ¾©Së*8šíÒ˜6)À¡*Á`p&Û¨`FÖ‹,×6²olQßu\ü#·•Ú"ØV HûH”ŽD#ºƒåõM ëÉ××Áté´3,@¢íó¹T_øì‘¡mt×EºEŸqýËeiÊg^#`uwBcÛª@Fx»Ôê§.ÀÈ/xqfê©Ãž#•†$¸o¥ÃHôÉ¥g.÷õüâwž{d­Ñ»úçRM­Z³Åþðäë¡çµ;¿GúÌ5æ{¡¶/~$jopy:Áaÿ6Z_AwM´Ù’¥1í¦RJÕnÍ l§Râ}@BÐÈç5m+Ú˜õéV¥¬ÑÇ—|¶ÝúxmÚÝQù˜­lxŸÖ1ÛôìM óF_øG€‡U¡À·m­ØQ,‘튂Åßfõëš³ÒŸ×s¯ýë»e<"€ä^ zvÏò¾õÇdSlº%ßÌô7S0¥Ýåt ±ûcT)§ Ñ¥Ô nW‡êLô‡¯ì·ýöE/Сõ”ÔVƒ#W_6e2sî2/0årІµ9l”f¦Ú£ý†@MÔÖ¾¯þÒø¸&(ô·ߎGȹøó£ã‚3ê£ÜÝE=†ÂL–Ž$lD·,)ÍÐ÷îo ߉­ð K”èâƒ.Ä„}tŒç<÷f¤ÞDßçæ÷ y MýÌ+­C0­ï‰RìøÏ'_üÜÑqç¥' W¯Ý9ï¸'KG–†äŸK×»MKæQÁ×ãî“0ŒƒÐ±ê8¨­wqSáôùSPÿð—×½ó­úvý¥ŸŠ;'«NÍ‘/E#Õo½ÿ‘6ßtå)I·~Ó]u'FX:%ÍõÝ»û(_úM;@HW@Aí;gÇ®/ŸÔÆNÖ˜Þ§UÊɵ/‰?udkoÅt÷ÍõÞÙPo£4Ó5±cØä‘j¯+k6žÝ{zTª’°t%þuxŽ ÑÙò£=1­ÐÉF+ „¹‘¦þ€µF9*¨¨þ¢†‚dþ¢‘aE“S)Àîþ¸UPõk7?éÐÃÖÏÖ2M'7ÚRíQ»ì– Mðý¦¾Öùº=¬þ?Îy=Ò¶`ý GÖ)€rùŒÇ#Amm£¾i"ǰÛéu1Á_p å™ + ˜Œ¥8r5̦¹Ët¬4²ØŸÒEÄ¿<·*´ê.c'íÓg*lä·¾ÏJuà¯×}ŸpÌUiêg^Û…}ï~ø¥¸ó‰¾Çú~‹rû‹îT}¯ôù+a¸’¥v «£X–}íæ¿xêÆôG¶ pÿîÿ›îÝ5’hÛlü) –“Ý}ßü¿ú¾éNS›[T>þ‹ˆ:'CµŸÊtî/nnƒÒØ^¿j£¿Í:Þ‰ŠþÿÂßGý?ƒæß+š(”‚ €@± Üûô~[¹>è=ýØro2ÄDý½éü /Wµ?¯u¢usµüó÷›òƒ«¨]ß¾°Â®ŸVáºV{pqe$¯ÑÝO¼Øüÿ_ò÷mýÖ¨Ÿ9Ï@²/×mÐU¾R‡ÂÊÈ¡=âkÄhXQ8l”bp4cØÈGýÁì”ûëW0AÁ·\þûÏ„îJ#žÃЂə.ÿóIqjíC£äýÈà~•‹9X”'Û],¾÷ÀŸ_ .²àˆk¢ ܃,´NX’eon nÎë&h”dXÑ÷#¬Œ9*v2Ã3?ž‚ä–_?¶¹÷™ùÓKCßËÆÂ¦~æ5Š6xHùDç}¿ƒIVàtÅÝ©à^»Ç°Ï·¾Áýë|å‚nÛRyÔ¹FJÔ :§2£~“þxë¡¿IÙø=Jt¡õ›ÿ;™®k»ÝϽ±Ï½ŸÎ£~ï\À<¸~Øù2,ðÜ.¯ñÐKq»Ññ ÆëØ+¨,O>~!.¸¯@(6ßþ5Ö5ë³O ¿±[£»ôˆ†”—ûÉEuvãoöÙ·×Dþ½¶&·^µ×­]iI4ú\“eª½î=µz¶Î¶íŠOEâ?¦j¿¿?©ž'›hÒ_/Ï@2/þ‹•ùý4¹FâÔH¹`Îf»ƒ£oµ“dÕUk6{9TýéÖ¥Úÿ24`ûò²bÖ ¾xãíÍ¡£ê‚ë5÷õÖ$#T?Ú±·¹Õ§µ}¢ †F& bºŠƒÖZþWÖº·ã\ùþWO‹Y®c®cï‚ázT &8ÚW,‚Ÿ…`P\ë6ôb/']xwV»¨ Q²@©ÞWƒ Ô·Kp‘7²6Y½ ‡¥ˆ«¨™ šó™?¢W總¿õî–¸eþ[·ï‰;ŸŒÙ;Æ8,_tØç;xáNûyéõÒKCâ÷Õs5u¹bú8ÓÅÌà$Áõý¯õyÐ…v%[¿Gaß ]˜pç?·ÿ£Fã+WxSËû›v&ÜtÏÞý ßkÉ7¼Nt®Øµ{_Üÿ+è;V’¹†­Ï2@ŠE@Þw6ô‚ÁêÓˆ~Ñൿþå þÓšø×ÍÕsåÃ>vPCšÚ—®9`Ç hí¥qmåÔVúÔNÔÖr—~DùÇ) €…!³À¶»í7‹n£ïß»‹<戸<ž Šiòµ7VoŽüAŸèÖàÆæñö4ü£"ýí|÷ýü/㞇ÌãVÊÀ‚[v%¬%,œpå,¼¡ÉÊ4:<ÑHTíÒoíšpÝÅŸ2ýkLѱ÷ïG£Õƒí`ú…°4$9›(Ò˜ö”úº‰‚DÎ%,PëÞs©I´|ËG»ÝÛ ÂV WÎðé|æ{t½p¦&hÔocÏSÁ§îTæ/×E}Îý£mƒßí?ì.‡ ÓDu bº;`˜ÖèúaºyÂ.šú;<öÙø=ÒþÂÒ|¬ý`»¿)qÏ›{NÓïY¢’«ßºDûO´\çÆ”T†©‹u@(Ù/ÕÙð> &jijòhG"» Õç§_Él:¦:ž4¢udDö[ôÚlw:u+ÿøð> köèœÙÉ/ÓÙ?ë €4M üRlÓêjÖV Æh„­FÁ), 4ht]6Ë~‡†Vï…®ÀBO@yh[¢(ȼÝ\CüéH¦O×´°üÜq+± 'aÄí9º ¡9l©Ï¼‚²ï¬ß×t:M:)¯»•Çæ™èŽÍ_ð¹+kWÿ`–—ï¼i!ŽyK¢zåéþÌ1Éûßܶ²= €@fr6b»1ÍÕmâ»ïò¸ LXþëÆÔ˺ÙÐ-ú ¦¥JI’V(÷«FÂú‹? #Wý2<Ï”@K~æÃîTð§#9Æ——Ûõ7UZ%·^©?jÔ³ù¬;Q®:ÿø¸ß£®]Ú—:ýG@"XþÞA;µSC@·w×2/H¬€·+ÊQíFmŸ6ºµ½üÖ÷–÷¨ ò¥§µ‰¤ôˆy3K/^X~À&ÝÚk—Ò7œÛ0êÜíN“J~¼»!`¯çÕ­ v0%‰^kMåæV™ö©6V]U7jÝÕ«GjoWYfš¼’‚ Ð2yØEXú‚°TA¶æä~kmüHÕ¼½?¸ÏR|­‘Œº;LÖ‘ lßþÀ?bÒŠ4ÕR¹²ƒmqÁì° ô”œ‘«MÕÎüvqßÛ:ÆTܳ[ø¤±1+åðE¦>ó©Ò2¥Û¥°ã.‰Ò6)]†¿(×y0ï¼ÿýb~®4SÊS®”.ú\)(}ÿãKÒ:÷ÈY0‚£óÃî26ç÷(X¯@@l (@{ÒQÑÔg_n·>^Ùí¢7Ú©£ßš”ñgWUÚ;‚Æ¥­`x®‹Ò?|¨Ö~pqe$%‰¿ ʳírgkyï®­½>Þÿ×ýqyM¢ùÍ馹¶;ó¸r/h¾~K½Õþ+v­üÛÊëíêT°œÀ¶_œç €@nšvŸNnÛº·DAèDy²C+ ,t#ô‹½ÜßÁeþ×Ãt÷¿,úçÊMýµ›ÿb·ýöES°Ì_ìùñ×'ùEžÓ…è Œ2Qtì‚i\:’° ôž_ò^&vKK;Ò¡ºmÊÚ;TW¦\'+dò3–O¼©mÔ Á¢t$Ê,ËW\T2¯ïúÎT»ô Çz¿ð×yêSÇÆÞá‘ #xn ®›ß#ícÓÖø<ó©.øèBlK—DÉÚ•ÉïE²ýð €$Ð$’® ì.PW“0º¢ÑÛ£´òþ¹ ¶½O.ŠýÑ­Ÿ­Ç+§´‰µ·ï®7Mléÿ§I%ýiV´¾ô³mâÒ–(Hþ?3kcÖU5ŠÛõSÏ]P[ýñ×›­þQ/ €@bØ_ªÄëåü°€•?0ª@¦ÿµkàÔS‡»§MzT+X£þuŽT:m™ÿÇ÷þìvÖˆçÅËß÷SxÏÇŽèmšŒ-X‚Caé‚Û¥ûú¥××Ç­ªcçFn»7Kyäª3È·Ç%o|×$Ý¡‘ìB•RD¤3j6®âF.hÎg~Õš-q{ ~ãVhÄÝ©,JGö½šýìÊàª%ó:ì¼>bða¡ç©0”1G·Ø_g¶~ÞÛ?Qäá‡uLÚî°ùâŸå‰.wìP•pÏ™ºÈ™p¼ €@‰ (}ˆ+ë·FƒÒnYðQ“H*8íʸ!±¹¦oy¸Ö \+Xì_Ï”¿óÀ>/}‡{Ï¿W§µ¾ŠÖ[·%ÚFÿ:îùÞÚèûÁ"לUa=®¢:˜¾svì¿ïÿ~Ÿ}ë·ûì'Öšk‚Û“Ž¿]õkÝ?>×ÄwítmQ{ÌVÚðU7@ åâÏä-×–Èžÿë?N X£zÝqPì¨ÎSÆõ·YÃzFò£F*5³Çïü7«­­óâïoÚi«ÖlŽ»ýÕlÒIƒý›ÙQƒ3ÑtKz°¨­¥ôù®ÝûbRx(ú_~}IÌñÒHéï_ûo"6¿—‚|²ôžTª° ·Í8Óúöêd»vרÆ-»LÁÏ翳]Jò¹S†Åä îOë‡Mºç¯‡ç¹Ðw+,ñw¯ùŒM»æwq Ò…“/žyLÜòl,hÎgþ™®‰Ka¡ï‡>Û_»ùɸæê»ð…ÏiÚ§Rý¼½î#{yÙû¡ç3m¬¢ÒêøÓŽ(Ø ø+8_Ê“à†×u~ñ½ÏG.ÔÅŒ-Ðh]¬ –u>ŽY”ߣ?<ùzèç'ìüªÆhŽƒ°s^LC[ð…Þ3ýæû×á9 €4M@èÆåÔ¾ú®š¤›üya]ÒÜÓÚ8U7Ü›~@øÞ§÷›þ…•½¢éOîyj\îlÿ6ê›R§ŒÝÆÿ¾ÿùß—0ý£ €ä·@^ØVAi,‚eG¨ÉÒüå½äé=W°â§ßœâýïÞT½¿ýñ¹¦  Bëí㺋?—6ãž™/Ç¥×P= º©m­|bÔ IDATn$²ê¼ûg'l«Ûw)<‹ú,cù‹Ïa£ì•"Ào«Qºz=vÄáÞ1SÐNù³u¼n›ñ9•qÏÓ ZÏœ»,n;´¼@Øè}guAJ_Wô\Étrî»m2ý˜îg^ç7ÞŽO¢Ï¶ÎInDºÎ+ J^0åh/(­ó”>óúnè|æÎ;aýXºjcØâ˜eÿ\7CÌ Eþ"Ñy]Ÿ¡{ožf ¬ÏWº˜©sü÷¿zZÌÅ2GußãKÜSï1¿Gº+&ìó£ó«>?®Íú|èœÌÓÀ¿ðhw»V»ƒçz]äIô›ï¶ã@H$à&³Ôûþ‰.­ß¡]¢wXŽ Pˆ9±­ Í ]Õd#† Ž˜VÐhñò¼¨¿b·õ~ª?òõ‡·òDû‹K¯œˆPujYp¹ÛR}®€õÉcûǯ8o\Ü(Ñ?ýu…¬ Z¥c«"÷<òrpÓ˜× Zÿ«±£Âý+”âÈÕæ|ï25¹§ÿ$z®ïâÈ¡=âF+ø¨¯þåKiÌg^O§uñ]°QîçTå/Ï­Jx—‚¶ »S!Xç¬gJ7 ‰,t^ÿãœ×C?CÝ®Àªþ)ˆNyú…Õq£è³ñ{¤¶$ûü¨½é¶9~er·ÞÝ÷› úÓ9×g²Ô… €Å- Ô J+¢rÓù–l„úå“ÚDÒ–hý§_ñå\i¨‚ÿ"€˜@^ØNd§´&+ +º?lD[غþe ’Þõû…¡#¥×hLY×߆bzþ‡'—ÆuG£Ný#mµ‚RŽ<6/>¥KÜÆ! àK•NAï‡ wÕ…M¶çÞã±e|¼õ¾Bï˜HÔ²–üî¥û™WÀó–_ÏoT¿\Õ?®S•dw*袠ÚPê¥9ç¿.¦þ÷/žñ/Š<ÏÆï‘Ž‚òé–°‘Òén›ÉõÂ&NVK~—“µ‹÷@@ ¿V®‹æ W®í»®nkßýR¥)÷¶û§€÷Ï®ª´†Eó…+Gv:#¼ó»÷´@ ïÛ Ê¸É ®«¾ó„½°dm¢·ã–«Î¯ÿÏœ¤ARÕ™êmǤ KK·Ó"_ Ñôr å êîÿÓ+iú¨ÖÈát|ÚW²´ a“íÛÇë–Ð… Ó Îµôw¯1ŸùÆôKú:·hT°ÎCé”déuÂ&UM§Îb\Gçïþüo¡çªTýÕyHç­°Üèþm³ñ{¤ |:çLýfé÷2Š.T黜ìB£k§.¤ûYwÛðˆ €H@E*HíŠFo÷îZf£´ŠüSÀÛ¥,Ñï×Ö´[o\îqW? €ù%{o|´M-Mð¸iënûÇ+kãÒ$j¢FY+_íÔS‡ÛQƒº[×ÎícnýWе±uêmåY=íÄA^nn7!›Ú¨‰Ã”cU£é´ż4!ÁÛâe¦üµÁŽ Ô<ùì*ûâ™GÛ1ÃzZ×.ícRPÈxËG»MN¦ÐvÇ@ùtÃr¶ªNF®:¥ü}TXÿ”súø£ûÄLÎê¾ÇùòÝSjœt?ó®_º‹A“èuëR“ªAÀ­íñ&ŒÔgXÁt‹ê¾úKãcêsÛjBJTÀÿïEEEyœŽ‡›Äó¯/¾ôBh´ö†gÙø=rç̯]r¢ éß-Ò^]ш}]` ¦ë ¶+ׯeýÆêÍvÅôqÞï²¢åàw9×mc € P< R>¼•M:¶Üzv)³NÕe‘ô$êåîšzû¤ÆlͦƒöÄ‹uI'˜,z‚”†@Y}}}½ëêg¿üÛ[>Û°[‡Çx¶5±a°œtáÝÁE¼Î€&R{ôŽ ãö¤¾ ’ÇU’`Á˜£·Û¿u¦­Yÿ‘]üÍG¬Åb²' I6ƒjjo>„=û´#í_>Ùæ/Zcß¾í¯ÙÃ(ñšÃòêç2_~‰ó7»û×]|¢wÆHÓÿž•••5»>*@r%pÅí5¹ÚûAR Üs]۔밾@ÞØniÒ}’7n×îÛ¸e—7zòÝ÷?jÔh=õA£ƒ)-# Qàa…‘«a*,+>µÕ¯——}P Ý£ € € € €@œí8óD  ì{¨4¦Ÿ·ÆYŸž0·ªR•‹ã”Ü h´vØñxgݶF¥vÈ}ËÙ#M¸þÒø;F”¢Bé+( € € € €@1 äý䑹FW.Õ°2bða^Î_N]ùÌøv÷ζ£æE—®ÚyΓì h¤ª+:·Íø\L®n÷ÞóKÞsOyD  üŸy=¿mÆ™ÞE¸`§–¯þ0¸ˆ× € € € €@Ñ0b;p(MÂÖ¶²ÜÎ9}„÷/°IÜKM:–­\Îq;+ñ7]yJÌä‚a:Œ\ “aY! |ñsGÛ]ß™š²éš`“‚ € € €«#¶CŽì]¿_hº¿©åÖû^hê¦l—û ðeA•*óYà…%kí5îÉçCDÛ@@@@ ™ŒØÔ¨m•«¿4>tB¶M¼E¬ ¶Û>Ñz,Ïœ€Ì{[ÇÐ uqâ/Ï­²GžZú> (D·íNÚìÅË?°útÒux@@@(tÛ Ž ‚Óú÷ÎgʯݣkµuíÜÞ”’Ä_Þß´Ã\}yÙ¤»ðÃäèùª5[¼T$«ÛFö¨c²iënS*F­FXxR$¯¿¹ÉNÛ?æ¢ÛæöØ–vÛß^|› 9Erœé € € €$ˆÒ&_·$ß%7s~vå2'Ÿy~#Z—YwÑ-³µR[1 œtáÝÅÔú‚ € € €@¨9¶CYXˆ € € € € ¯¶óõÈÐ.@@@@@PÛ¡,,D@@@@ÈWÛùzdh € € € € €@¨@Ìä‘S& µÚýBWd!$8âðNÞ›ÚWÚYŸžxEÞA „ŽÚÃë}Ïnøž”ð瀮'èß»Kòx@ $~ry¥u®.³Ú:³Ûž¨µÕ,‰~ÓÉÄ×O«°á}Æ&>¹¨Îþ¼°.ñÊyðÎçÇ—Û™Ç5„œV®?h·>^›­¢  Pl1íÿ¼ôSÅÖ?úƒ@Nºuioß¼bBN÷ÉÎ(4¡ý»Ù7¯èVhͦ½ € €@ÎÔV©(7ëÛ­ÌV³]³£<8´CÃgBÍëÓ5ÿo¾÷·Ñßö<å¥Y P 1ííÍF@@(pŸ]UiÕm£Á»¦tgwM½ýçÝûš²)Û QÿˆåDoß]oú·èÍö÷¥Ü=ŸÈ‰å €@"Û‰dXŽ € € 3æµÕÐLÔ‘³ÀŽüÁÙ×Ö´;g“N"ÝÃæ±œhÌ׿=ZÙIG•Û³jmÛ®úD«³@ @`;ÂK@@@Ü ¬Ùt0a`º]ÛhÐZ£²?© oŸÞ£dNÀœíÙ¥y£é3ת«ióÇõ¶ñ£ØÏf‡vfÝ;•E>ó½»–Ù5S+ìû¿çŽƒÂ;´ZJ€ÀvKɳ_@@@ˆÀ-' |ÍY6j@C`õ õŒލñ¤ÔN4Úý¦ó+¼Ûꇂ۟9¦5iI á ÒFÈ ˜ÀöÌ´œG…Fœ@Y™Y«V­¬¾ÞìàA¾GwipNÊÊʬU«2«¯¯·ƒcG¬ä¤ìÐwDß  €”†€.èøóË@`»4Ž<½DLĶ·mßå2Q1u PJmÊ­s§j;pà€é{DAxªªJ;¤ºÊöÕÖÙŽ{âW` X‡ê*kWU‰ €”Àú-õ6¼OÃ…íC;p»„=]Ef Ķ›Y›#€ € € ƒoeg_n *—±+ÊíœÇ+Ö´?/¬s‹Cry¥7¹ßû[ë#¹•*Bý)m„JmÙû[Ú¢7Ä¥øâÄ66äðV‘uݾ=g£' TÎ=©<&/óöÝõöæûíÞ§÷‡¶ß¿Pí>nhë¸í7m¯·Ù/ÕÙêî<7¤µ]9¹Sï¹ ï¹®mÌò®:pßnn‚D·¡Ú¬}>¿ü€½üÖ·8îQýýÚÙVQnæßOÐTþ:žs^®KZŸúL¨å ÷6\.ì?Ìoüññ×ßØçµ©_c«ôú§Ï^§êh~zU¢>*Ÿý‚å"Ƕ¶cµ¶Þ82(­À³ŽU°8÷Þ][ÛèÉ?ÁmõZŸ“oN¯ˆ´Sò—ÞL|¡#¬–!€¶ù € € €@0ˆ«@¡R=hTlE³Ë"Á4ù\KL›r‹ ¹‘½ HöîÚ*$=uTkûd_½}êÈÖ‘}h´÷¶õ¦uWQPðÂO—§Øž4&¤Ôhçu›æ*ñ6V°Xý¸áÞ}qÇñòI±Áx×­Ø·{ƒ…ÚsüÐÖ^{žx±Î $ë}ÿ>䨉;ý%ˆ +µ®¿Í2¹ü* ˜êx%›8TëzHYdô¶^»>„ù¿½á`BWµuŒ\ñ6üm“ǙǕۆmõ ëru4÷Qfú ¹¢; šZÔ¾ô³m"ŸGÕã¬ô\ŽòWÿôÏY$qü.¥ûìëó'ÛDd†õG‰´Š‚Ú¹ÐVË@ 4l—æq§× € € €@Ñ (¸æ‚Çêœ?……¿³þ§‚k ²% ®úpÁ‘¼Á`¢¢*aÁ:šwÕŽDAE×^·ï°¾ûV߈~Ñ éŸÛ—.E#¡/=­MdÔú¶]õ‘¥FŠ+ªòIE–»¶ƒÁÊÇÿ¿?mã˜êx…µÛ_·ý­@ôCÏÆ¦Q`øÛVD‚åSÆ…_0P?] Wu‡yª.ÿ¨üt/>øÛÚ˜çÚŸF·+Ȭ¢ &©Òã$«_íuu)}ÏSûãÒhŸþ‘Ò2Y¼:>-‰RÉø¿Kϼ?º[u]òÙ6¡#º“µÓüþ'ɶå=@À/ý…ó/å9 € € €&pÜÐ蟸Ê×›(÷´‚É lº¢žŸÉŠ ÀÊýÎÆØQ¶nÝ`i½V~cWúŸÊýç|rQ]h_ÔÈuE)!‚ÅŽV óïK£}vë)·ö·~»/e€Ý­ŸèQ~þ@h¢ ¶¶×E»ýÇ-QýjÿªA­@ü¢7£¦.¬GùÉ]Y¹>ü³¡ºÔ6ÐUd÷ùñÑíÜö}ÔÈw]$ðÿSpW)i\{ÔVZœ¦]pÇZŸ?Ý…àò¦ûëTõžë£ÞóÛ¸uOí·>³a`T—Ú¬Ïgpô¾«'ø(÷9Iô= nÃk@ ‘@ü¯^¢5YŽ € € €@ž (éì…âüMWÐÛÜ›0"š¢Â¿ž{þÊÛ‰';|mMlÀ8YZ…µFÑ®½na Àƒéþõ–úö­Ñ݉ôzO#l³U4Q§+ Ò†ÑÝûz|È7á 4š=YyäùºÈ¨òàzÁc4P¿]0UÛj²ÌdeùÚh |`Ïæ‡M4ê}Ô€V1ÿÔ7ºZmñ_ðHÖ¶Dïù/j¬\w0¡•¶¿ÛGù¹‘úZÿÑ’{éó¼ˆÖNßå BP;Lˆe ÐXسWc·f}@@@ò@À€T`UÁ»TEë¹âÏsì–ù×~ vú—ëùÒwbßKäÛüqìºÁº‚¯“íWëƒÞštÑ_4ÒÙ¥ }Ý{Í}TîrWÒ Òj4±ÿÂBªI+“™j¿þº‚ÇøF²Ë#l$³k»ýæÙ¼àß§&ÕîT~ÿ6î¹ÚèRÖhY:£§•~ÄØýû;8z‘A®©¼\=ÉÔv©`j'“â=hŒ@ô’jc¶b]@@@òHÀ€Ôdé”]ŸD×òž.MïY:AôôjjÚZ >ºÑßýÓøµhÐòé%uvÁ)m¼Šü¼áÜ otð¢7¤UÝ˜Öø«þÀp²:”·»ºmÃUÑÀx²m½ç¯+¸Îð>Ñ@­&U:ŒdEIf²(-NØÄŠ &kÒÎá}Foë3¨ÉURòýíóîÓÝ6¬>Ä×gÿwI®Í-íÚštTô(MMcú×Üý³=¯íâ=¶ô @@@ ‰@K¤“4­QoùƒºÁ±R‚TW•ÙéÇF'T ý;óør/?u0•G£vÞŒ•ïnÍ h7v÷º0j@n÷™¨ îêŸÜ h+°­‰&ÀLTOS—û/ˆ$ª#ß‘ýu ýrû8é¨òŒ^Tqõòˆ¥'@*’Ò;æô@@@ÀÌ>Ù—ÞÈîBÇRº’ï<°Ï4rXÁLWäUzˆŸ\^™ÕüÛnÁÇ}ûƒKòïu&»©z¥à¶òb»¢ %ý£¦ÝòL?*àœ‹¢ýh”¶+ê_ªQón]@dŒØN¦Ã{ € € €E+ЧkqŒõRªWbµÜ¥ÃÐD›Ç i™ PiD®ŸVaßúí>WMNs¼ vDyÕsÝÏ`Â^+/¶›XQï+½Hª 8ÃêiÌ2ú˜DÛeê©/J ãú¨ÇÏÓ:ë}LÔ/–#€@qǯxq z € € ÐDÿHäts$û×óoßÄ&´Øf.¿¶Îä”Á­àî3¯EsqwïT3`c;ã÷ëÞ)½PC:ùƶ#lýõ[££¡•c;KsrN¯Û…¯¾¥3Ahp{k£õeÒKVü¶NûT›œŒLÏÇãM›@ 3?¥?>k޽´è»áú«íÐ.½Vnûh»ÍzržmܼÅf|ãšÌ´<¤–¾üŠýæ·Œ¼3q‰váô³í¡™OØü/F–ù’ ì„qÇF^óòG Ñùâw×ÙÃͲ~}{{ßëL·8Ñyâæÿ½ÓÖ®]ÙݯïüIä9O(TeËWÙâW^³5ï­·?ÜéF¿~}½ïؤÓN‰ü†GÞä  €y. €Ù€ T6ÒÛ7b{í‡Ñ`^:Ûæj† q4ܯF_ûKcFù*·öè­ÌÜõO è¯3çïŽNÙï°ÔþÊ+íÈ/^¸éì?Ù:+Ö´3kXC}Õ(äD#Û“Õ“Í÷4zÙ_vú&6õ/{®‰ k}y¬ÇnmÁÉ!ƒÛMÝŸ.Jø×ý݃v°†÷å¥ ¸ÿý`]y}ëãµöƒ‹+#ùÄ¿9½Ân¸7·w 4¦½¬‹ù-ÞeÔ4û ÀÐSóæÛñÇóñÝ¿ù½½úúr;õ”Ó¬‰Õ@ TÂÎ vß~×=¶woÏE©RýhÐï è»ô³;~m?ÿÕolá¢%1AmU¯‹8ºüý›oµ¹óžÍÀ©@Ü ,X Œ*`zù¤6Iw®÷5QŸŠ‚‚-5bÒFšÙIGµN:’Z)E\ykãƒóÉò,ûGîº}$z|ëƒè¨hML©Àu²¢ ]Q»38uuúU÷öÝQ›/NLþÙðo›«çÇ z)ÐÜØÜïûF¥7´UÒ‘Ð ìè -_=vê¯öíÖñÑcæ¡cêx»ítAÁŸo[s¥Á¡ €MˆžÉš²µo¦ÔãG°iS§DÞÑÆúCyòé§2J:¢ÂHt¾˜ùØl«©Ùg—þÛtØ¿oئ,C jßv×½¶ò­·cÖôFi÷ëkmÛVF–ëûö§Ys nGDx‚ P ^®Ù ÐiÄi¢¦‚ÚnDªúöÊÛÑ x¾õUÁ÷K?Û&´/7_É“­v¿ðFìl€êçÏ®ª´à¨n×Gön_úNÔOë¬ý0úZëÓW¸zô¨ þ`¨ÚìTPUíÖ‚®Ìy9¶Ýny&ÿ±"zŒ•ß9Õä…j{ªu2Õ>'] på Ñ ¼[–êñÑ¢†º°£‘Ðaù±u õž-¯cöÄ‹ÑmÝ~ümÞ§UèçOõËèÊÉmìx_`ÞÕ‘èQwh"SWT¢Ï¨[‡G@ L ùe·°-,[¿þ}YcÓ¦†[œ;w €@P ÑùbÿÎ#µƒb¼F }Ý áO;¢t]Á”#º¸4wÞ3Þ…$Õ¬àöÁ¹ ”>3k"€´°À¯çì÷‚v.µÆ©£Z{©66m¯·ÚýfmÌút+‹õÔ\Ãï}z ·<|÷n”±úãú²nsCÐs`¯Ø~¬\0n"¾ªŠ†uÎ<®Ü&ÝÚËo¼ë_).zv)‹ jkû`z •Ùlÿ3Û˜žÎQÏÝ„”êÁCÏÖyAx­¯ xž=¾Ü6~ÔÐfå4WúWŸ¶QžïÆŽN×J¾TyÅìÛ*@Vpû®«ÛšF:;Õ —NÕe‘6*ÀÝÜö©Î° ¹'*úÌ(å‡êÛ¶³ÁÿÐCÊb.(èN³àq×öjÃO.¯Œ¤© ~þÂŽe´5©Ÿ©þ]Rù ž~l¹)eL6Gî§nk €@¡ D/ 6³å.ð¤QÛW^sƒ)ßµŠòj«(÷µ–g³(o¶òßêu•^=º{ʳ­å•¦2x`ï‘ÿ €@~ $:_¸`œÎ!Êy’è<¡y\^íÃk8§dcÿÔ‰@6ÜÝn_˜:ÙËUïæÂpË'Ÿþi»âÒ/¹—Þ㼿ÍyÍ @òY@ºÿ™;A|ª@¦ýAD½åáÚ¼í’Ò„øû£¾¨úçóÊ],+×ðÒ¬h¹Öר`·½¤v¢íµÝ oDG:«Ž0G·_€ïÿkìÈmíÇíSûwAmUŸ\T—Ó0:Ö à»¢¶øMÔNµ×߯æµµ/¿³Ð£öí?Žj›&õljѨy·e«¢~hd¼Û§”¼.šèX%ëŸÿ³§úüŸ?ÿ±T]/½ýœ4ì=õõ™õ·õŠ3ò/ELê^°´¤@ÆFl·d'‚û^»®aôxŸ>½cÞÚ´éCëÜé˜üß1+ðJZ@©ÂΚhOÅ],+i$:_óü#ÒîñÇ1°•‘#†yiÅ^}m¹·ÊÊ7W'Z•å €äLÀŸëÙÿ<¬ n+8¨4Ê[¬ ¢?xè`ÝUo³_ªK9:T;ó|[·%yz­£@¢?GXû4) [×õ‚멵Yï»þ(­Êˆ#¯Z_ï©/J?’hÂH-W»5Q ‘²pEûÐö‹Þ……¥ú§ö9<>Püñîzo”¼Ò_„víS»µ™º‘ëî½°G­«¢Çd/*˜ªtÊ­T~m/Ùh2ÑyKâû×°—Ôÿ]¿õ PN¶¦Ú*h—e²ÑʲrmUÝ‰ŠŽ—Ú}ö‰å¡Ç\“|*ºÖKUüŸ½à±tŸ²Öhø`ñ÷?ÑqÖrו¶FÇYßµLŒ¶…× P¼EØÖ–Êé­QÛ.m‚UüîïÖê ÏÿBñMz†MHtžPP{柞ôòO?ç¬&×φ´”€î¤ÚþñNo÷Ê£=õÌÓS6eøÐA¦À¶.wìÔÉôýŽîNY + € A¥ ilºu|ÓmÚ ÷¦?zöê»jÒªÖ~“­üŸwÇï7@dX –& ˜†m\¦ÀeXð2¸žÿuSÛëêP›Ó5Õ6ßÿ}¼™«+ø¨ºÃF¸×kÎ릘%Û_cÚ«€qc¿/ÉöÝ”c™nÿÓù>$kï!€@i äU`[¤Å¯¼æ¥/ѨIM^åŠþ¸îѽ» 6ØŽ{LèØšÀòµ×—Û§OŒ™ÀrÙò•¶iófûò%0¥å"ðÎË–Ûª·Öx¹\7•FD#®G3"ô<è<Ííã0 IDAT¡sRUU[oâJ{N“ÇBxwíúHs‡úûYá_OtqØ¥õ ¾Çk@@@òA ¬¾¾>r?Õæ­;Ì÷2gíS@{öS‹ B%Û¹‚ÜÿvÁ¹¦Û¥)äƒ@E›rëÜ©Úêêضí»ò¡I%Ùݱ¡Ñ©©ÊèQ#L£¯ T§’ÊìûUU•vHu•ÕìÛo;vîÉlåÔ–Pà¿þû#“F*·v²4$ +ᜠt¨®²vU•Þÿ••EoÏYØ ÐD+nOo´r«g3@ Q÷\×¶Që³2¦@‹ØVPûžû3:[”n¢G=ßññǑۨõZ·Tk›ë®¾"’n¤0ùi5dJÀ¼suº4 z­E®(Å‚&¼ýïÿú†[Ä#E+à&_U;wîé§&”\µjµ½»n}ä7X¿½Ã† °SNÏ…ŸˆO@@@òQ ÅÛ©íRŽ(÷çÄ ãcÒˆ84¥˜5û)[ùÖÛÞ"m³àù…¶”°À}<‘*¥ùܧƤq9´Ý9DÁ>ð¾púÙ%,G׋]@¿þr¸cM”÷ÇGc.»utHÿæ/XhÓ¦N&‰ƒá@@@ ïZµd‹ôǵe¢ ¶Ú¨‰ ÿóÚ+cFr¯y/š7´%ûÁ¾@ å¸[¸hI¤qª‘Ø àù‹ÒŽè2|È Èâ.ZâMŠYÀŠL`ËÖ­1=RªžŸÿê7‘ ¶.(ë;£zîŠ.ëÂFuS@@@ÈG±½gï'ÞÓ{÷ÖXí¾šÐ‘ÚA´~}{G‚áþÛ«ƒëñJC@wn¸¢ÀÜU_þ’{ú8õ¬3låOïôÞSðnÑâ¥ä•ba1 <>k®×-}W&Ÿ~jÜgÿñYs¼ÑÚîNª?Íšk½{õdN‹bü0Ð'@@@ ÀZ4°­•ÁQ•îIó@ Ç«ÞZÙãð¡ƒSæÖÝJU¢Ò¥ã!19‡#ñ"PÀZ¹ço¸þêÐïÊ´©Slð€Þ¨nG ”aLÖì4xD@@È l§‹ðÏ—_± 7Ù{kß÷&¹Jw;ÖCâÐd²®ô;¢{šô‘I#“òðf‘ |áóSBƒÚ®Û bOœp¢)m‰ŠR†)å. Q@@@ȼ l+€½æÝuöá¦Í¶·¶Ö6mú02±d¾€ÑÈ3ü¥sçŽþ—‹ u…vi/fÖú{ßûÞ÷œÄžO¢кeÙ|<¬{W+//7Mø¦€ +¼ÃôoÏžhàCx_tÁ96ñäñö×gDÖ?¼W<q Ò¹ó‰[¦GO®¹ê‚Ú~”6lØ`›|ú§›T/5_€ÛÍ7¤@@ 9«ßÛÊÝ(Íd[r(íö—]!ÛYa¥Ò" °]d”îdE€ÀvVX©@HK@)J¾úƒÙ¶aË.ûÖ¿O´ cÉ+!ÐB-:yd õ™Ý"€ € €4B`Áâw½IÖôHAŠU`מZk_]éM;ãÖy¦9( ¿¶ó÷ØÐ2@@@ /f>µÜ6nÙe«×nË‹öÐ@ =»u°;¿}–]{ÑølTO a†Ù‰2\)Õ!€ € €‡€Fi/]±Áð9ÊÈâè½@’LŸ|´>²—wÞK²o!€@ 0b»…»G@@òY`Ó–]^óÎ;c„U·«Ìç¦Ò6@ cƒèsÎSþík~8Û4¹$òC€É#óã8Њ`òÈ?€4?'L™fvRàLYàæ#PÄJCÒ¡}EL§ˆ»K×@8åÛž9w™·|úä‘víE'Æ­ÃÈ­#¶sëÍÞ@@@‚PFkÜa£Á A²/›6Æ«Qî™s_Ï`íT…M ÇvSÔØ@@@@ ¤.?w¬M×Ï~óè›rÊÐ’ê;E ±G…6!€ € €-( \²“.¿‰-x Ø5ä§€roßòõI1w±(]“þQ@ ·¶sëÍÞ@@@¼xxÎ2Û³·Ö^]±1ïÛJ@–¸ãí¼ëâb`Kö_r¶KîÓa@@@ ±€Fú'HK¼&ï €H CûJâŽÚe7>jºë…‚Ù °}cö€ € €Œ€2­}ò˜~6úÈ^ÓnŠ´”ÀŒ¯L´Ÿû,ÓD»í«‚Ü-Õö‹@) ”Õ×××»oÞºÃ|/Ýb@ …@E›rëÜ©ÚêêضíäÕJÁÅÛ%*PUUi‡TWY;ý¶cçžU Û$èP]eíª*½ÿ+++K¾2ï"€YÐÈmi( €é pîLߊ5h®#¶›+Èö € € €@ Ô.ƒJ—@ ëÁsçM?}Úæ<÷fÖ÷Ë(EÛ¥xÔé3 € € € €@V^]±Áž_²Ön¾{¾]óÃÙäÞΪ6•—¢@L*’;?!I)~ ès³ÊË[[uû¶vàÀAÛµ{o³ë£ŠQ ¢²µk[aµûØ'ŸÔcéͨªª°ÊŠ6¤"i¶$ €@StrÄ¡víE'6es¶AÐhm¶U4wÁ-_Ÿ²‹@ )1í¦TÀ6 € €™М'äØÎ¬)µ!€@r‹ßµ·Î³öUöØÏ/´êvL~–\Œw@ô4)ï,´ó&°ÁGtMCÖD¤åþw÷ï¯ó¿ä9¤) àƒFm+¡ $) /ЪU+kݺ•úxwd9O@ *PUUi‡TWYíþ:Û±sOô ž!€@D Cu•µ«b”d„' [vÙ‡[v™&<;ÊÈœì“ €¥. %w<¸ÐtA‘P¥þi ÿM`HPSåØ@@(´¹ýB›qÕDFkÁñ¤  P.åÓ̹ËlÒå÷™&𤠀@ãl7΋µ@@@¢àvø¢<¬t òT`Ê)Cí¾[αÁ}õZ8¸_Ãcž6—f!—1©Hò²…4 @@@@"ÐD’÷ýø\[ýÞVî˜)²cKwr#@`;7Îì@@È+{]l{öÖÚygŒôòkçUãh  PB pû‹;?“{Û¯ÂsâlÇ›°@@(jÝŸì3åuU`ûä1ýlõѦs PHšÐן_}cƒÍø÷‰ |Rh+Ù Çv6u©@@ÈC‡çDƒÚäÖÎÃD“@ d4¡ïß9Ë»à¸zÝ6›që¼’µ ã¤`Äv*!ÞG@@ŠLà…Åk½MŸ<²ÈzFw@ÂÐíGn¿ÐîxðE;fxÏÂï=@ Keõõõõ®îÍ[w˜ï¥[Ì#¤¨hSn;U[]ÝÛ¶}WеyÒ¨ªª´Cª«¬fß~Û±sOi"ÐkRt¨®²vU•Þÿ•••¥X›·@æ ,Xü®MÛ¿y•°5 €@N^]±Á¸Ó&§äì,HE’LJ¦!€ € €Ù ¨-YêE²#  ’_ýál»ìÆGM¹¸)”ºíRÿÐ@@@@¼èЮ2’{û¼ë²9Ͻ™÷m¦dS€Àv6u©@@Èݾ~ó¯æ3Ê/OŽÍ@+ $ʽ=eÂPoSM4IA ”˜<²”>}G@@’¸÷ñ%¶tÅ;¬kµ]~îØ’é7EŠM`ÆW&Úä Cȵ]l–þ4Z€Û&c@@@ °”—UAíöUvþ”‘…ÕxZ‹ 'œ@Rçùk~8Ûv²/n] P¬¶‹õÈÒ/@@@à_sŸ{Ë{6}òH«nW‰  €@‘ üè—ó½ ˜ç|•ÜÛEvhéN²úúúz÷þæ­;Ì÷Ò-nÔãã³æØK‹^±®¿ÚíÒÙÛvÛGÛmÖ“ólãæ-6ã×ÄÔwå57D^ù’ ì„qÇF^7öÉ?_~Å~óÛ?F6›8áD»púÙöÐÌ'lþ‚#Ë›»ŸHE1æâ»ó{Û´éC»ðü/äª_ì P ì\¡`÷íwÝc;v´ã›qá«9h2Ðwiæc³íÕ×–‡Ö[S³ÏV¾õ¶÷oî¼glÚÔɦ Ä@â˜rJÃdcÅÓ#z‚ àt7Î-_Ÿd3ç¾n»öÔÔv0<µ@ÆÛ¦QÑ£G°iS§DÐæÎ{Öù…©“›5;R!O@ (+ˆSÀíº«§ÛÀþ}‹²ït l (¨}Û]÷Ú‡nŽÙUçN‡XÇN¼eþÑÛúÎébõÞš}6ùôOÇlà @@È_铎kÜ«+60Ñdœ ŠA cíõëß÷<:v<$ÆeÓ¦†?¢;w €€_ ѹbÿÎ!µýZ‰w;ššZ c“Gºà“Fm+o¶ò]«(¯¶Šr_ûói{ 3üýá­Ü·îÖé^=º{{Pžm-W>]•ÁûgxÏT‡ÍHt®pÁ8?”ë:›%ѹBs¸¼Ú‡Öp^Éf;¨L ,[¾*&ýˆî¬úÏk¯Œ jkŸ}Å¥_òòm»6<ó\tŽ ·ŒG@ÂxxÎ2/×êp./Œ#F+@ìôêÖÁöì­µ¯þp¶Ýûèâìì„ZhŒ¶[ í w¹v]Ãèñ>}zǬ£<ߺíÚMjó&/@3Sʆ°s…‚ƒ*î‚XŠÀâW^‹4UDN?ç¬Èë°'#G ³ŽyK)J”nŒ‚ PX¡7sî2¯ÑÓ',¬ÆÓZ@Œ ôìÖÁîûñ¹víEã3V'!/KE’/RNPý®QÛ.uUüîïÖj&°Ì—#E;È?Dç µgþéIokª `þõŠ•º€»BÇNëâîÈ#‡yóf8»-[·F~SÝ2@ò[@ùT5:ïä1ý¸í<¿­Cr" ÜÛ'íoÚWädì\äe`[iL^]ºÜV¾¹:’çS·ÿ8¢M8y|Â?®5¢ìµ×—Û§OŒ™ÀrÙò•¶iófûò%'4Ÿ*ö@ èܱpábïû¾ýã^+tî>tM:í””½Dç x­ªjk—þÛô”u´P×Ù-I”ŠKw"çÁH´‘FmS@ [`Ê)C­º}¹µ û0Òz@ £½í/º»çGwÏ·ë.oƒèê‹ç„@Y}}}½kéæ­;Ì÷Ò-Îê£?ï¶Ï+W®¶…‹–$ÝçøãÆØeŸŸtÞD —mÊ­s§j««;`Û¶ïÊå®Ù׿҇Üý›ß{wk$Q †iS'Û‹‹^‰¬§`ŸògSr#PUUi‡TWY;ý¶cçžÜ씽4I@‰47†+\vÙìP]eíª*½ÿ+++ËþÙ € €@É Üñà‹‘´U—Mc—Ÿ;¶d-èxa äÕˆí¿<õŒ¹‰âÄ©‘–%¹ãã;Ôr¾köí³¿ââÂT§Õ 1Ý©qû]÷Dîîp»Éb5Jµ¦fŸ÷O©Šঠ€@r]dö&]ökð@@â¸ö¢½ŽhN†û_âÝå£;~(Š@^¶]P[©óÏ™“rD£ÇüïW_[nÏš“r¤PÐi'dNàáÇfŵuGÇÔ3OI2Á‹öø¬¹‘wæöNMŸ€.½úúòHdžó}мÁ@¼Ðè»c†÷´ cûçeûh €@~ (¸=y›»à-#¨_dž֤h•z•Ü®¡? •ÀMüèö~¸cíkW_îâvËæ/Xhšì‚¥)0wÞ³‘´"P~}¥):´KçM&{Å¥_b´vŒ /ˆ»X4žÛÃ¥XŠäŸÀ‚Åïz·”ÿè—óó¯q´@ o”_ÛÞvTþmý£ ÏyØVŠ€‹ÿí¼„^ V}îŒS#ï+½À¢ÅK#¯y‚¥%°jU4]BçN‡$½ƒC“áM>=zþ(-)z‹@z¿¼ç˜‹EºB–) €…!0ó©†;n¦OY ¦• €y)°û“}Þ¤’ç]÷Íœûz^¶‘F! ¼ l>zDÜHËàaÒØÊ½íŠ?°å–ñˆ¥!°ò­·#=þ¸ÔÁ·É§šQÛ1ž +  ¶Ò|¹¢ßZ&jv<"€ù/°ú½­¶tÅ/?êùSlçÿ£… €@þ T·«´^];x ¼ãÁ…vÙš‚ÝòM ¯ÛÇNËgÀ}"ëmÚ¼9òœ' P:Ê»ï/½zöð¿Lø¼ßèù#áJ¼@‰ „µ•þ‹‚ P8ºü¾[α¯^4Þ  € Ð_™h3®šhí«*¬G·ü¶4“m³&W“GvëÚ5­ŽV¶þÚöw¦µ +!€@q lß¾#¦Cé¦K8¬GwóôŽ©„”˜€æ©¸í®{ÍMÞ¬îk¤¶‚ÚÁ\õ%FCw@‚Pp[ÿ( €dB@“IN×/UQYÈ«ÛÁ #õx@ÿ¾‰Þb9”ˆÀöcÛ%Òmº‰@Æ–-_EP;cšT„ € €@q è. ÿ@JIrÓOŸ¶9ϽYœ¦W%W#¶ JŽÆ"€ P ó¼hÏškš„Ù•Ñ£FØ¿_q±{É# €@¸À‚FÕQ@ȶÀ‚—×ÚóKþ-]¹Ñ”²„‚@K äU`[·D§sëóšw×µ”ûE<èÜ©cž´„f PXsç=kš57¦Ñ'œhN?;f/@ò_@£ænàEÛ³·Ö›4rô‘½ò¿Ñ´@  t!U¿?šTr΂7­º}…]{щÝ'_¸yØ^ýλi¶÷ùF˜)(JO sçØÀ¶&“L'϶ÿüQzjô¸Ô‚Aí¶m+mÚÔɦÀ6@ ðž³Ì jŸ<¦ŸÔ.¼ãG‹@B˜>ùhïw瑹ËíËçŒ)ÔnÐî"È«Û6nJ‹tÕ[«#ëõêA`;‚ÁJH ÄN÷ü±æ½õ%¤DWˆ „µ¯¸ôKµ£DgŒ‘·4wîì @ s7ÿj¾Mž0ÄFÙ+s•RSÉ Ø.ÙCOÇ3)@`;“šÔU¬¶‹õÈÒ¯L ØÎ¤&u!€ €‚À«+6è.„•‡mÌ«É#óЇ&!€ € € € €@æ<÷¦}õ‡³í²õÒ”daTYĶ‹øàÒ5@@(lÝŸì³™s_/ìNÐz@T·¯ð&E^½n›wÝC¦ÑÛÒ °®ë!€ € €9xxÎ2»ãÁ…vÓOŸÎñžÙ €Ù˜0¶¿=öó mÊ„¡^€[n é ”§»"ë!€ € €äN a´ö2o‡Ó'ÌÝŽÙ €9ЄÈ3¾2ÑΛ<ÂÑ5‡{fW….ÀˆíB?‚´@@ŠR@£µ÷ì­µ“ÇôcR­¢<Ât @À/ j+×Í¿šï_…çÄ0b;†ƒ € € €@~\~îX/°=yÂüh­@@ ‡÷>ºÄû|îåwí[ÿ>Ñ”¶„‚€_€Û~ ž#€ € €y$píE'r[vš‚ ;;¿s–sd//¸ý£_Î7¥è¢ à`Ķ_ƒç € € € € ÐâJMrç·Ï2¥$QQ.n ~Flû5xŽ € €´°€û¾…›Áî@È é“6ýó—‹ßõ¿äy‰ Ø.ÑO·@@@ ÿô‡ú.´I—ß—£E €äÀÆ-»L©I.»ñQ[ýÞÖ!€ Ð\¥%ÑaJi ĶK“€^#€ €ù% ¶óë˜Ð@@üPÎíÛ\hߺj"ïü?\ÍnaL`{ÿþºfWH”¢€‚µ­@D]ÝR$ Ï¤hÕª•µnÝʬ·øž¤c…’ÐwDßÛ%yøé4 € €@3nþÕ|›³àM¯–k/oÜÕLÐ<ß<&°½yëR‘äù£yù)PѦÜ:wªö‚ÚÛ¶ïÊÏFÒ*ZX ªªÒ©®²š}ûmÇÎ=-Üv@~ t¨®²vU•¶óóðÐ*²& ÜÚÊÊ­ÔY#¦b@ð·þí³lô‘½J¨÷¥ÕÕòÒê.½E@@òG`÷'ûìG¿œo{öÖÚ#·_Hp; -A(P_™h“' ±WVl ¨] Ç0ÝfØNWŠõ@@@ <yL?‚Ú¶¥:@ÒÐ(íàHmåß®nWÉïm},ZQ_è  € € P0·ì²™s—yí>ydÁ´›†"€ PhºCêæ_ηó®{Èæ<׃»Ðú@{ãlÇ›°@@Ⱥ€rj_~îSP;8ª,ë;g €”˜Àà~]½ß|÷|»æ‡³K¬÷ÅÙ]R‘çq¥W € € PÓ']­¤‰ €¶€R(÷ö1Ã{šÛCŽ8´°;Dë=Û|@@@@@ è¦œ2Ô&ŒëWôý,•’ФTŽ4ýD@@¼ÐäUʯMA@Ü hô¶þ¹¢üÛJM²`ñ»n"@`»@ÍD@@â¸ýÁ…ÞäU¯®ØP¢ €°€&“\ºbƒÍ¸užÝñà‹Ü“Òk:íÒ;æô@@ZH@£ÁôÇsûª Üüž-tØ- €Íwñÿ³wàqUgâÿ_u-Y’«ä"÷‚nÛ€mØ ¡Æ$$KÊ?„ä¿ÙͲ›ÝÈn: Y 1¡cºm¸QlÀ W¹aÙÆ’,I¶euÏïyïè^Ý;M£~gæ{žGÌ{Ï=÷œÏ Ö;gÞsÏ­3çKVl'¸mɸƒÛî¿Gô@@bD`ÉÊÆHΟàøtŒ a € •Üž<¶¿<¿b‡|ýÆ©Q9†xì4íx¼ëŒ@@ºD`Áe£ŒëÞ¼`B—\Ÿ‹"€ €@p‘ƒ{Ë}ßšë8¨ù·u] =FqŸm÷Ýz„ € €1*°`ÎhÑ  €¸_`Ñâõ²|ÝÑoZÝsë,÷w8ÎzHŽí8»á @@@@šè×;褹·ïøÑRcövógQ£³lw–4×A@@¸0¿Æ—ƒgÐ €D±À7M“'~y£äõÉ4F‘Ù=5ŠG{]o÷T$Ë^Y.|¸Y~øÏß‘^=s ±“§Êä•×Þ”âÒãrß¿ÜÝaŠ>Ú,ÿíY«ý¹³gÉ- ¯“§—¼$kÖ½oíÿú×¾$3.šb=gÜ!ê½âÀÁÃòÜ ¯ÈüÆïtGõ6Ô{Å¿yH [—}ô¡_YÛl +þÿåu+w–q €€ž[¾]žX¶Iî¹u¦èâT@ˆͯýüƒ·³µ3º¥EOÇã §í:c[ƒB+ß\#OŸbµÕð‘ÇŸ’-Ûvȼ9䢉ƒ×CD ÕÁÞ+4ØýàŸþ"UUÕr1HµÚ–' ¿g/¾¼<\Ž!€´R@gkë×—µ°ðT+9 @˜³¶Í®ÜûÛ7dÉŠmæS»@ ÝÛ:£RgEO¾p¼Üð…ÖPV¼¹Ú˜é8ÿÊyÌ’¶TØ@PïK^xUª«käö¯.”áCóýOã9´ƒÀâ¿?/eå§Û¡%š@ðÐÙÚ•UµrÙÔ!2ylÿÃräScYY=Ã)))5žçäd9öó° „z¯(j|!¨m×böÐb»÷îo¿i @À!0el#/çÂùûy‚ €Ñ+ Vÿñ߯1þ¿ïðI¹ÿ‘5Ñ;˜(îy»¶Íà“ÎÚþÆÝ?ÍÕ©EójkÑÜ׺¿#‹æÍÖœ š[[KÿܾƣæÙÖýC†øf{Ž>´#»AÛ Ð PïÇŽù>Ó÷ÍuÝ‘%Ô{…® `æî×Ï÷¾Ò‘ý m:K`ûŽY³n}g]Žë €q) øj^NfkÇåígÐ €1,`þ?~ÁìÑò½[gÆðHÝ;´v l»iˆ…‡}³Ç èèVIÉ1ÉÉîáÈÿí¨Àˆ{Í5ì½B€ZÌÌâ €¨Ð×ú’_3Rýè`øÐ&êo)@@@.¸ï[sÖÑX·ñ`ô$þ.sm]À²°ð°1kÛL] ¼ÿ~Ñ£Æï×_Û”ÿ;þn7#Fpú^¡¹†5§·ý½BƒÚLOO“…7^® Ž!5š¿ÞüFÄÌéSå‚Ñ#¢¦ïtˆͽIA@øÐ%ïûÝ›r÷/^]DšÒqÉ×tëZÖä öIIi©µ•“†æ’1cFÊôi“BθÖ,?Þ¶C®¾r®cËí;ví}ýk_bËÖÝÎB *4Ò–­;¤ðÐaëýC¿¥1dp¾\yÅ\cñI­£©‘´hz"M3b–Pï7,Oº±€e¯ž9fuˆZM¶åãFÿu¦ö>¥¼ñÖÚ¨GÜ& ³´ôZýj²Î⢠€ €@üäöÉ4»uW‘ÜøÝ§å¿p)É:èö'x½^¯Ùvé‰ ±=5wwÊ£›^|y¹Œ uQ rßð…ùVíPõØ@g ¤¦$KNv†Ô×7Èɲ3yi®%":Óú‘ÇŸ2¾­D?ôꟗ2°î\޵]ÀãI“©®©“ŠÓ•moZ% ?ø§¿ßLÐÿ§ÞuûWdÂø1¢ßxÒ€·Y̼òæs;G 3Ã#Ýš= ĈÀâ¿?oåÕž9}jDÿ¿‘¡3 @ Óö:Ñi×âB € úÍ®'þû&¹çÖ™òãoÏÎA¸¨×]Ø.)-µ(† η¶›Û6xU¥¨¤© k' óûtŒqÔÈáŽç¡ž\0zD¨CìG &–½²ÜúH?ع㶛crœ èJ³çjä*wüh©è6@'°pþD™=m¨£Š.@Mi™@—.iOCRxè°<ð›‡"ê}Ey¹UÏ\ËÚÁÄ…À'‹@ê·9")ýs›òôGRŸ:D³Àö²òÍ5Æ4¯¶=W4‹¾#€nxnùvã+Ź}2Y0Òm7‡þ € [vÉý¯‘'úl’û¾=Wtf7¥y. lÛ»§An{ Û~Œm@ ½Ò=éíÕí àjM×ó÷g—Z}œå<‰ô ë$6@š(>~ÆXJ+.œ?¡ÙúT@@ü2º§JŒ4Ùwø¤Üqï FªÕM /àšÀvønr@Z" ézì¿øÊ ÑŸHË7îþ¡UuîìYagµ*²Ä©Àœ‹†Ê™Ê™<¶œ 0l@h‹€ÎÐ~þÁ[dÑ“ï˜ë·À(Í ¸&°}ýæËü+/o¾ÇÔ@Ú P]U݆³9@p äõÉ”û¾ÅâONž!€ €@kî¹u–|ñê ¢ÿ¾ 4/Ð¥‹Gj¾O³”°¤IÁ#D àŸ+û€_ÎíPM°àl(ö#€ € € €@W øµ5ÿ¶.P­éÏ(N.±=4ìÞ»ßèQÁÞ}Ξ…yöûEJIi©degK^ß>rÇm7‡©Í!ˆEAƒ:†µw߈òïÞã{ÏqœÌbP`ÆESD"-O/yIÖ¬{ߪþèC¿²¶Ù@Ð?.ýÿð ¬Å@@ mKVl7roñ{O“{Û²Kgl3ÒêŽæÝðÑfëy¨ •©Áp­_XxXªkjBUe?İ€.‚7dH¾5Â-ÛvZÛ¡6¶ï(cÇJCf? € ±Àý¬1fOí;t"âs¨ˆ €´Tà—?¸JÌmœ¶èÉõ¢3¸)>. lOŸ6IìéHž~îEÑÀS¨ròT™üõïK‡¯¼‚|vž G³¦7ÍFÕº–½²<äèõýcÉ‹¯…<Î@@ RuÊÖ]ERĬíHɨ‡ €mÐõ<îûæ\¹ã†©,VmsìÒÀv¯ž9rÃæ[Ý©®®‘¿üõ)YñæjkŸ¹¡³¹ÿð§Ç³-gNŸQê³ @ ¶æÎžå˜µ½òÍ5òÄâçDƒØöìýÃ~œm@@ %KVî0ª/œ?A2º5­Ô’6¨‹ €´D`ÁœÑrçMÓ§è7ÇΞ‹ßl]šc[ªªkäÅWV7FƒÛº­?fšŠòr#õˆýÎõë×—ÜÚv¶ˆSo~ý+޽֏IôGß#<žtñÿÐý¤#‰Ó ÃF@ ôÇþ½3åXŸL¹yÁ„vh‘&@@– è¿Iîþ٫Ɖ߻m–hà;ÞJ—¶|þ•—‹'=M–½²B4°mM-¬èLmŒ &Ã>âO@¿ùñýïÜ)<þ”‘wßð^kÚ£ùWΓ²ò Û& € Ðb¡­_Ö?&™­Ýb>N@@v˜2¶¿¼³©Pxdì?|Rî¹uV;¶îþ¦\ØV&¹=aüòáÆ­RP°OJJK³´uöv^ß>2û²™¤qÿëŠ"Щܾï_î6Òù¿äd÷1£FZïO/y©SûÆÅ@@ 6jÇæ}eT €D‹€þ[D–\²b›è¢’Ý=©ÑÒõvëg‚×ëõš­•ž¨ÛSs7 ÐŒ@jJ²ädgH}}ƒœ,;ÓLmw¥ÀÃY,[>öåÅÔÌ4 Né'Mzdx¤º¦N*NWvÎE¹ Q&™á‘nž4ãßc QÖ{º‹@l 0C;¶ï/£C@ š4×v^ŸÌ¸û6Y—.Í/úŽÑ)PVqÚêøüÖ6 € €@8ç–o—/~ïiÙ²«(\5Ž!€ €.0rpoGP[?‡·¸&I§ßq.ˆQ/ðÀo²Æ0oÎ,™qÑëy°“§Êy¸s²³‚Uc € àÐ?—¬Ø.•UµŽý4ßµc¡c € € € €@G äõÍ4šbÙ&ywc¡üñ'×H´~€O`»#_)´&0aüÑ  € Ð[vÉwñªÜ÷͹«y)ÛÉ6@@ ºtaÉù³GÉ}¿{SrûdFmP[Õ lG×kÞ"€ € €@'<¶l“qÍCIA@bI@ók?ÿà-íÿÎ!Çv,½*  € €´Y`ÝÆƒ²uW‘t÷¤ÊÍ &´¹=@@Ü(×Ç—–ÄìÛÝ¿xU–¬Øf>uý#m×ß":ˆ € €]!°pþ„¨þznW˜qM@ˆNMæì/zr½Üñ£¥Q1›;ÁëõzMîÒb{jîæšHMI–œì ©¯o“e|]µ.Ç©€Ç“&=2™ÆH—¯Û#k?:(ýä9¸w—ÞØ®««ïòÑ¢Q@gjëlT]|U¤ €@ €ÎÔÖΟ÷JC¿'BìA@Œßý]¡ €@ç ?#KVl7.ºpþ„ν8WC@\. ¹·Gé%>¹^Ž?#f »«»íl—UT’Ф«ï×JÔ”dÉÉΆ†órªülTŽN#ÐÑOšôÈðHm]½Tœ®ìèËÑ>Q)™á‘n«‹Ê›G§£Z@ÓôÈH“)cûËä±ý£z,t@è¡ýп_#ûpäßîˆkEÚf‚W§˜6–Ò¶M h€ØÖÙÚ'Ëøúj è¨Gf`»º¦ŽÀvÝw†Ú23°­ÿ<ÓoQ@ stæ¶[f uîȹ € Ð:Ç–n4Ò¹uÅ%ŽÛ­ë>g!ËSû† IDAT€ € €Ñ/@P;úï!#@@ÎÐo½i:7]§dËÎ"¹ïÛs;5÷6I;ï^s%@@@@@ &2º¥ù’Ìï%ûŸ”û~÷f§Ž‹ÛÊÍÅ@@@À-wÿâU5¸—|ýÆ©®Ééú €D" ¹·Ÿøï›DS’è“Y˜±Ý™Ú\ @@\!°nãAÙº«H^_³Çý¡ € €@4 ÜyÓ4™=m¨cúï­Ž,¶;R—¶@@@À•KVî0úµpþfk»òÑ)@ˆf jkj’;~´TtîŽ(¶;B•6@@@ÀµúÇÕ±ãgD‹¼yÁ×ö“Ž!€ €Ñ* ÿÎÒͽýÅï=-1{›Àv´¾:è7 € €´J@ÿÈzþÁ[ä¾oÎe¶v«9 @/ ¹·õß[ f–îžTÉì–þ„VMðz½^ó¼Òb{jîæšHMI–œì ©¯o“eóõŠfºÀa\/àñ¤I T×ÔIÅéJ×÷—"ЙéæI3þ=–Ð]àš € € €@» è·åtbA{fl··(í!€ € € € €þAmMKÒ¹· lóC@@¸xléFyà×tØFqÈ @@6 üñÉõVîíåk÷´º5Û­¦ãD@@ˆ³çjdÉŠí²|Ý)9Nê¸h¹oô@bOà‰_ÞhäÞÖ‘=¸ø}Ù²«¨UƒLnÕYœ„ € €D‘ÀsË·KeU­\6uˆLÛ?ŠzNW@@ØÈè–&÷}k®Lº Ïø&]kÿmF`;¶^Œ@@‚¼»±ÐØ»pþ„ GÙ… € Ð٠挸¤æßž=mhÀþ`;¼^¯×>½_±>FÆý¿•ÜCî¡Ûbý5Êÿ+Üþ Œ¬æëô¾oÍ•~½3ä‰e›ŒTs#ò{ íÈ ©… € €Q$pçMÓdä^2elÿ½>söœ444„<­RSS¥›'Mêêê¥ò\U´#l¿³zdíÓg*åüùóaëFãÁô´4IOO5ÆÖÐ{ã“„IIN2nM]}ƒN¿ÆÛ¶ÏII‰’˜˜³÷PMÉ÷°¼âLX‹h=Ø­›GRS’¥ªªFjjk£u!û””$™݌ר¾—ÆbÉèÞÍxêûh,þ¿"!1A’“’Œo0Õë{i }Ÿ1Û:<ý÷Ý싆Ȋu{eÁœÑ¶cðž3$@@‘ÙÓ††uضk¿”Wœ ['”+ãÇ “§*dó¶ØÌ/~Õ¼‹?æ7}\ çªj¢ñ6…íóÈaƒdÔðARSS'§ÏÆÞ‡¤èÛ;Ë0(+?“ie23<ÆLUÕµr¶²:ìýŽÆƒI‰‰Ò»W#XøÞ‡Û£qÍöyò„QÒ?··|r¨H 7[?Ú*dev—Kg\(µµu«÷ðâ©ã¤wÏ,ãw°º&ö>œHMM–œ¬ ÑÀý©òØû÷ŒþNõÊÉ´>D3ÇFî-#oím<%Ƕ©Â# € € € € €@TØŽŠÛD'@@@@@LÛ¦ € € € € €Q!@`;*nD@@@@0l›<"€ € € € €D…í¨¸Mt@@@@@À °mJðˆ € € € € ¶£â6ÑI@@@@@S€À¶)Á# € € € € €@TØŽŠÛD'@@@@@LÛ¦ € € € € €Q!@`;*nD@@@@0l›<"€ € € € €D…í¨¸Mt@@@@@ÀH67Úóñé%/Éšuï[M~ýk_’M±žÛøÆÝ?tì~ô¡_9žû?ÙðÑfyüoÏZ»çΞ%·,¼ÎzÎÄŽï)±s/  € € € ÐØnŽÑ € € €€{<å'%·àcÉ.:$ÝO”HR]­ÑÙÚîRÝ£§”÷,gÌsïâ¬gþ“Ú›Lí< ÇŠ%åýµ’´w—$?& åÖ¼}úÉù~¹Ò0zœÔ]}­µ?7þûõL)©H2†–›Õ ?úÜ™XfÌiêsHƉǸöÎýœ›æØÇâ]€Àv¼¿? € € ÐBA›ß“Á×YÁlûé©•gEz–ž‡öÉ®«n’ªì^ö*l#С©Ï](²¢¾ÜÖÀ·þ3Ijº÷¨ËÚK ¡´$ ©óƒ†„\²nÆeõ*Êö±ŽÐ5HAÒÑÊ´«¤"‰Õ;˸@@@ KΜ=+”£G‹¤²òœ”WT8úѧOÉÎÎ’áC‡H~þ@Ç1·>ÉÛ¹1`¶¶öõÈ”K‚v9VòiRÑGWô–òœ­K·Æš•vN2Sjdt¯bÉëÝÁÐo®–‚‚}²{ï~k|C†ä˘QÃdÎ¥3¥WÏk¿›7¼}s¥aü…’Ðøû–XZ"ØŽå²¶ M6J•S•‰r¶:Áj¯îç%¿wƒÌŸP%}{œúáïÛ@–ò²rÇ{©¾+ƒåK¿~}¢zœC?X³)H*j=²ûD9^•)5ݬû¤ï¡yÝ+dZîAcß«&Yǧõ;(cz[uÙ@ œíp:C@@Z ðÞûÈÞ}û¥®®.äYÇýÙ·o¿Œ9B.ŸsiȺn9Ur4 +:;V‹cÞ?:RNVe¢hôçÓ³92"û˜Ìè h=7ï4Þ+ƒõÙ|ݺu»Œ{\2ëâ`Õ\¿O?4ô_÷t^~À:®Hn,*ûËûJýùÀשõz&G.îÿI³Ù…@d¶#s¢ € € VàÍ·VKaá!GîÝ»I·nÝ­}Œ± nkq{pÛSqÊÞmcû|cíA›ß“Þ…{­@Œæ×®ì+åýËÁóÎsûŽâÊlY{dt@0&#¥Z’ÏKÃùDÇìíýåý¤º>UææïvûÐýûß“cÇJ­}:K[KIÉ1©®öDõñÅWVû£!¸m &‚¤Ošf¨›Õ½YÙæ¦k J’åñuÝ¥¶Þ7CÛìhnVƒ±©³·õ˜þ¼±#]R“½f•¨y<|øSy{õÚ€u–¶–ºÚZÇìí»vKå¹sråîÿÆ~4ÉÖÚw©›JFO°ÞO£è‰µ Nå9zœœØ ÝS|ï-æìmý&Œ¾ß¦'…þ0ØÑOð °íÂS@@@ ¥úuy{P[0³fLúù­o—-[·YA nO›:I23‚Ïni_:¢~b]m@³UY=eÂkOIÏCÎaR]­”éQ|XzÚ'»®ºI4ïv´”І9‚Ú½‡ïú­˜-%CŒo½èŒî³AfuGÉpéf °xdß. € €Ñ/°×–ŸXgi_ñ™9AƒÚ:ÒINK/™áôáÃGÏÝö$µª)øbö­÷]Amó˜ù˜q¢D&¾ö”èÌÄh(:ËОK{p2è6GP[Ç1,ë¸\>x·¤'7Í2ÜWÞ/†èèãÕWΕ;n»ÙÔÖ :;û®Û¿"ééiVýWW¾emGûFú#©©FÝÕ×ìsÓŽ7yéGf ¯•»æTäÒž?±Z¾8½**gk¯ßð‘õ¡ŸÚO¿hª¬öÿàOójnÁU2dHÓ‡:s[ÞÑPô›.þ)HM›-åšÆ ãÖGû{¡µçÛæjë9Y©UÆ·\4•¶Øn‹ç"€ € €"’’š*:K;%%ÅÈ÷ê„ñG9b¸cWyùiÇs·=±Ï*4û¦3³#)ée2ö¥‘Tíò:ŸžiZ(Q7»làÞ}ÒÀÌ'¬ãúÕzMc-ESÜð…!»«³³gLŸjלÛæã¶*DÉFúƒHâ‘€ÞÖO›) £ÇìwÓŽƒ'š¾tŸ‘î•/]|.d÷.R+3‡Gö;²‘N> Aiû7_t¦¶~®húý0Ñ,» ö˜›®}Ôúo\çèŸæÕµ¯£¢ËŸè·YÌ4#ÚÕÙ¥¢ï•¡Š®O ïµZ+Ðô®ØÚ8@@ˆshËíÚž·K2/žgÌ4Ôˆù›Þ©m/ú|è†U®Î¹­Aiûlí¼îö!ÝÙ³ÄøÊ½f<¶ÙÛA+»lç¬éSšíÑUWÌ‘5ëÞ·ê}¼}‡ êËÅm팢 j'íø8 Çš[»öº›ö»mÇá“M‹ðÐômPý¼~j•¬?êH]ª®ö:âüàdÄðau«ÿþÆb¼Z¹¨¨(¢sº²Ò˜U¯ˆýƒAMAR0ï ]Ù¥v»öáŠÞV[ú–i¹­ç¡6ò3OÉöš¦'BÕc?ÁlSa € € ÐÎ:ñÄÉRzü¤/ޝË7Gp¶w®l¹á«ÚñãD.þû"Ñ™Úö’]ä\XÓ~Ì ÛÕG7d:ûï8ØøDg"~ulSà7X·î›;{V³]ëÕ3Gúõëk-2yìXÓ õfOvQÍ©þׇ%qÙ¼iéRû¥ÛÅÛϹЋºotemASZݑ׸XdsýÌÍ:/ö€xsõ»ò¸ý›+: [ÓDRúöée¶++ωæénî[3‘´Ûu4‰®?`/š‚$šÖ!°÷ÝûL]Óë4;™Øö=,ÛO ôoŠçD$@`;"&*!€ € €‘ h»¸¤DÊÊ+¤¼¼Bêjk¥¼¢ùÙ¿‘µÞ5µtF¡}†¡Ù‹¢ñM©*Ì}úX:b¼äozǾ+ ˜ã8è‚'Ç«2½Èë^îxKOr²{D<'ݪ[Váî”9VGmÔö<øKI8$—oZºÔÜþ-Ñ4$n/§*™d猩‰¨Ë½2¢'°­Ai³ÔÖÖÉ‹/¿n> û¨ï±öRRrL2G¸o1ÞXNAbúÛÓ¤%5ÿ­󼌔jÇ7fÌý<"Ðœíæ„8Ž € €D  ³uá3{ŽØP§i.ÈÿèÕNgí¯Êê^D¯]ü©¼½zmÈ`µ.,™š’,ÙÙÙÒ·ooÑÅ#ý¿¿¶ýÂ.m¡|À`—öŒnÅ‹@¸ öùAC¤ú›ßw}ú‘x¹Wñ2Nÿ$ú-˜éO=ñðG­y]ôG˱1“¤à3×F|.ˆUG`;-5E¼^o¬Ž•q!ÐaÉɾE<D( (`þž$&&ò{Èà ¤¤øšÅmG –6|ð‘#¨­ìQ#‡Iï^½#ÎëfÊÞ¹Agl»¹Ïô-´@UUuèƒ~G ê·ÇýOõ5íHÍ7¿ïþAøõ°Gz|Íh2d°Äó¢¼~·?æŸ6œçßÀ1“;h€ŽÀvVV!í gš Hdgu“Ñ2LZ'š’$©üž´³@\)°s×nGíqc/Kf]ìʾ¶¶S•9½ƒžš}ô›­¹dƒ•ú4çÁêtÕ¾©Î`oqe¶ÄjžícÇJ#f¶ÁóúF¶˜_ÄwPÅP9µë\/µ×©ƒ®Ú±Íöìî lëb’‘äÙ®­ïØ~µgëú­³”—ÅnŽ{sŒ±øhÏ•]Óù¤¿³uM¹ücÑ…1uœ€ñ®Á,펦e@@ ¥ú D@y¹sA½‰Ç5ÛyM]Måİ12lý[]Î9r h`»÷'uuÊã#š· 8±“vôí~ZäDÓÅŽžÉ‰(°ýêI¢³ 5Gl~æ)¹°otä£Þ¾£@&ŒÓ4à [{<7·oZîÚ•þàAŠŒæ ¶ kûÅÍM Wø¾5Üœ~Iydõšk§3Ž÷éÛ[Ž—Òwu!Þ~ýšÿ0åÛdçÎ#Õ“ÇgϾD23Ü·xdgvõ52Rk¬E Ëk"›<ûqi~Ww›ëG±€Øæ§(¾ƒt@@\%I@¥`ï>Wõ¹¹ÎTe÷’ÓyùâŸ#vÀ¶¤dÌ…¢Çí¥wá^ûSc»¢¿»ƒ:;;=¹Nªë}³ ‹+³Æà¿£¢Ö#¶àM]÷ ÿ*®}þîú› lðÑfGÿ§O›äxî¶')+_–¤t«nÞÕQ;SÛ>˜ü^ rø¤/P½ãhŠ”žN”¾=œ3¹íõuV÷ÉÊèIñ0xP¾lݺÝÂþŸDØÞ·ï€‘ J›ìÞ½AmK°ó7fœ’’Æ÷N}/-8™'czù>¬Õ›¢Êȳ ÕûãW é{ñkÀÈ@@@vØ·ÿ€±8d¨·~¼] …:ìÚý%£'¶uñ³‰¯=%‡'Ï’âqÓDS“ Úòn@=Ô‰¡£];6³cCzœ‚SyÆS Xo(.3ú0<~T<ÌÚ—œØ #{–XÏݾ±åã²âÍÕ2ÿÊ˃vuÃG›eͺ÷­c“//½zæXÏݸ‘òÖò ÝJYµRô'’RóÕ’ú9Ÿ¤j§×™:¸VŸôÍÚ>[ Ïoì&ß™w6h?4è½nOZÐcnÝ©³³u} PkÑ4O¨žtá„]~ó­ÕRYyÎ:®© ÜZÖ~ç?#êZÞÎÖ"‘ööÎýœñ>kßç¶m bë{¨™ZdÇÉ’—Y.Y©UA»º±d¨œ¬bv}PvF$@`;"&*!€ € €Á ``̣ᄋÁØ9b¸¹ËxÔô#Ûwì´¾jï8O4pói¡ôÙ¿ÓÑÛôŠ2#3jÍëŽýö'§p}@Fû;-÷ |z&Ç Êì/ï'Õõ©29·PìÍ¿½µ4ßQæ¨c¿[·_|e…TUWÉ _Xàèâ²W–ËÊ7×XûÒÓÓdá×XÏݸ‘úâ³’PÛy™5ÉÞcɲó¨ï[ûJ’åwodÊ‚ «dLnS2m©­Aíhš­m¾¦f͘.ËW¾i-ÆûáG›ŒÀµ¦x²FÓ”lÜ´Ùñ~š•6n^ƒÇŽÓ³X6j\Dgmÿ£p¼Œïõ©cæ¶~Ûe[é 9t:øú ÛÃÈZà7IaaSj©GúUØõÃÀÇÿö¬UgÈ|¹ï_î¶žÛxzÉKŽ¿þµ/ÉŒ‹¦«Ê¾¶CÀ°@@ˆD ?  èŸgXêêêdõšwD2ݺù///·5Ú¦`²s²­™Ûz<Ê®«n’ u5ÒóÐþˆ»[•#û/½:âú]]ñòÁ»eõ¡ ¬àö§gsäÓý9¢‹¢imͧmÎF4ûšÛ½B.˜~Å<î¶Ç~ý|¹²5¶°õGƒ0ZJJŽIuuÕe jßuûW\?[;ù£¦ÙåVçcpãÚÉU¢y³Í µ¦&ùßUÒ«ûyIIöÊÙšDÑÙÜfIMöJm}Óss¿[uÖög.Ÿ#o¯^k½gêÌmýÑ÷Í”ÔT©«­u,Ø«cIII‘9³/uë°âª_:k[ß#Ío¿hp[Ý[çK÷ß{‹=…“}Áɸ‚b°í"=É–Úe¸4‚ € €´¿€.V¦_¡·ýz¼~¥^4Ø­Eƒ/úUù…_¼^²³{XÕu¡´h)Û?ÿ96&²\˺Xä_½' ·›Çª3³5¸ÝËãLñ  Æøµf”Ƀ³ØÝ<>í›Ç“.ßÿÎb¸uŸÎLÔ{P[kP»¹E&»z¼ ÇŠƒ.ÙÕýêˆëkNío^~VÆ ð½§˜×Ð@wIE’#¨=2·^Æ hšÉmÖuû£~X¨Ám dÛ‹¾Oêû©ÿû¥¾÷Þtãµåã¶·ÇvÇ è·_t润h2Kýù$ã=ÔÔÎJ;g¼ßšuxD ¥ÌØn©õ@@@?ýŠüõ×~N4öÑ£EFàÅÌûªÁìììl0 W.3Úú:½}¡4­Û\nn¿KvéÓ‚Ï\+%c&Iÿ‰§ü¤dœhÊ-­3´Ïôé/Eã/’òƒ»´Ÿ­½¸·çÝf,|öéÙžr¶6ÍÐÖ`LfJŒîU,ºèd4Í—ýóÿø#Ïö–m;­¯Üë í¡ùƒd̘‘!óo»m¼I;ÜÖ¥í·ïšS)¦Ê‡Ÿ¤JqyS@;#Ý+yÙ 2¾hê’¿½çûÖH‡v¨×à¶þèLí£E%R^Vîh›ß,k IDATz:$?ìšÐ5šŒP@ƒÛ2ËdÏÉ<9S—æXhW?8ìã9c¤а9ª!T Áëõzƒa' € € ƒú'PBB‚¼÷á6)¯pÎÊ…á”+ãÇ “âc'eó¶=±0¤€1\5ïbINJ’Õïn’sUMi3*F鎑ÃɨფªªFNŸ ¾èZ”Íè¶þþõíí›[z¢Bb1,‘™á‘nž4©në@+6.ž:Nz÷Ì’ŠÓ示¦¶-¸û”ÔÔdÉÉÊúú9YvÆÝmeïzådJrrRȳIE’† € € € € ÐQÕGÓš£›‚@¤¤"‰TŠz € € € € T àdžì89PÒ’êÄ“\Ñú%•ÙV[éÉÎÜñÖ6!ÀŒí0ìF@@@@ȪëSŒ|Ú%•YÆ:áÎü¤¢|z6ǪÒ;=öÒƒYƒc£Clw+"€ € € € ?šF$9±ÁðÖãù²±d¨õܾ¡û?,fíÒó&çZÏÙ@ R‘D¢D@@@@+pAÏbÙ~b Q§þ|’œÊ3~²ÒÎYçUÔt³¶uCƒÚÓó>‘¬ÔØ[,×1Pž´»ív'¥A@@@@âO྇Aï>•'Ø6‹0ÛÜŸ‘R-û‘aYÇÍ]<"±íˆ©¨ˆ € € € €á4¸=$û¸ì;•+Ç«2¥².M4÷¶Y4˜‘Z#}d@غÑz01!ÁèzþÀ\©««Öa„ìwÏìƱää$éÞ-=d½X8ÐÍ“ ÃCJr’±/%%9&ïaBãï >ÆêûŒ¾‡jéÝ+K’’bo^hzZª1>}Ÿ‰Õ{èI÷½¿¤¥¥Ää=4_—‰‰ 1ù>£/P[¸’àõz½á*p @@ˆ%ýÈ ÊÄÒ¸  € OÌØŽ§»ÍX@@@À`~/@HôƒPþŸ‰uè8P˜±Ýqæ´Œ € € € € б—$¨h@@@@@À=¶Ýs/è  € € € € €@¶#@¢  € € € € €€{l»ç^Ð@@@@@lG€D@@@@@÷ØvϽ ' € € € € €ØŽ‰* € € € € €î °íž{AO@@@@@" °U@@@@@Ü#@`Û=÷‚ž € € € € €D @`;$ª € € € € €¸G€À¶{î=A@@@@ˆ@€ÀvHTA@@@@pm÷Ü z‚ € € € € í¨‚ € € € € àÛî¹ô@@@@@ Û Q@@@@@À=¶Ýs/è  € € € € €@¶#@¢  € € € € €€{l»ç^Ð@@@@@lG€D@@@@@÷ØvϽ ' € € € € €ØŽ‰* € € € € €î °íž{AO@@@@@" °U@@@@@Ü#@`Û=÷‚ž € € € € €D @`;$ª € € € € €¸G€À¶{î=A@@@@ˆ@€ÀvHTA@@@@pm÷Ü z‚ € € € € í¨‚ € € € € àÛî¹ô@@@@@ Û Q@@@@@À=¶Ýs/è  € € € € €@¶#@¢  € € € € €€{’ÝÓz‚ € €€;–¾´BNž*—^=³å¦ë滣Sô6 ðºn# §·Yà‘ÇŸ1Ú=r¨Ì½lF›Û£âY€÷ôx¾ûŒÝ °mJðˆ € €@£À†¶Ê¾ýe䈡ml/~f™-:&žôtùþwîˆ;ãåo¬–½û åÓ¢ùÍý÷ÆÝøÝ4àö|]»i\ô%zôýÀ,±Ø.*YºÁ+¥å"£ú‹Ü6'ÁfÌ=>¿Þ+…¥"ÇΈü櫱;Îh¸q¼§GÃ]¢-@`»£…i@@ ®^xi¥1~ ’ÇcYµnƒñ!A<Ž1#€@||\(²~¯/ÈÛ7ÛÓƒþà€ÈÑ“´cú&38¢H€ÛQt³è* € € € € €€m^ € € € € €Q%@`;ªnE@@@@ Ç6¯@@ˆ[?üé caˉãGËm_¾!n8  ¿~Ùk,l9ªŸÈ7>K.ðX¸§ŒpÌØ§Ã1@@ˆiõm1·¬ªª‰éq28@ võ-nY]£eŒ @`›× € €Ä­@uUuÜŽ#€±&P[Ç,íX»§Œp¶Ãép @@@@@ÀuäØvÝ-¡C € €ñ*°qóv9[Yi ?£{w™6eBHŠåo¬–½û üÐf%Í=}ê$3j˜¹Ëz,Øû‰”+5ž3RúöéekncÍ;¬*s/›am·eCûäh‰=Z,={æHÏžY2vôÈc.=~Rvì3.™Û¯oÐ1†êÝÕ>vÝo–Seåb§¹_›³ömÝ{ʾ…ròT¹qj·ô4™8á™}Éôˆ¬µ•o­•ýû å\µ/-ж1`@žL4>¤‹½Ÿ]±]û÷¿JÃ'ûäü©2ëòÉÆKÂÀA’zÃBk_s狎Jýª8ÚJô¤KÒ¸‰’<ï³’Ø@sM8ŽímxO¼Ÿ‘óå§Œc‰Ù=%¡g/Iš4E’ÆO4öÕ.["‰ýr%ù’ÙŽóõ‰Óó½5Õ’þ¯?¶Ž×¿·NêßGŽ5öi?‡”Ï]Ûl?ëÞxÝ8'!#3è5­‹4nè8¶oõ]gÀ «ßözÚŸ†MÉùâ£âùåï¬Cþ÷&±gŽ$_òˆ‘ ؎̉Z € €t¨ÀÒ—Vȓϼh\£ÿ\ùé}ß z= /}i¹œløp‹èO°.°ýÈãÏȪuëm™m|¼£@ži…|ñºùrÓuóÍÝzïµ^0mCï¹ö}îì™ß·€‹´ó ¦VÿߟEÊšÚæ%ê ›uÏ=-‰ãƉç'÷›‡‚>Ö,ú­Ô¿÷Žxk#Uµ4žÔ°{§Ô½ú’¤\s¤~õö çÛwj ¸né³!Û3ê.}Fûå‰×ÓM¼…$íÛ÷Ø›°¶k?fl' n<ê˜kžÖ8Ǫ¤}lìgý[oJÚ=ÿ6`]óð"«Í`Át{»º­Amóœ”yW lû÷IÔµo¯ ¸7 …" ›7JíŠ×ÅóÃ[Aøš¿üY´ïþ!êû÷±3Ÿ¿¸Ñ+gÎ%HjŠW.Y€Qû÷òF¯ì:â«ïõM=ÖÀç{^9PªÇÛÓ`øæB‘•[¼rç ñ¦–šßZ¼ÎWg@¯Èú®Am󜙣¼21?°ö«.Zá•ME‚¥â8P*òöN¯|fœÈms·co3’íGÿá•÷ö¿îê]>ûÛfÚ8Ó4¾á}›Ÿ½/O¾ã{dvóÊüÉ òïÏxåH™³z¿w±ŸÕ´*°­ÁèÇV›¯§“¾´½ìðÊ­—…L/^ë•·w:ûc^]ïÅú½":†iCXàÒtáÖ Øn­ç!€ € ÐNÁ‚ÚÁfTÿáOOÈêuë­«j<`žxÒÓ™ÛØÖ¢uN*s5°jO׬[q€tí»X×ûìå—ZÛ­ÙЀ²jµ¤{ÒeЀÓ~ýö5¯üÓûO°YÖš~Äœ®cÒ/ìm›cÒ™îôÖ¢õÕß^4/·ù!Ц® Ö†Ö×q«M°¾ØÛcš °Ý¼5@@hwHƒÚza{@4\TëjÀVgfkÑ ´ÅÍ¢ÁW3µ†|uÖr¸òÕïZ‡uĶûìñPmÝûƒo[ÁkMÃa_àQϹø¢)Æ©æ¬óPíè~56óVë¬jÿbÿè‘ÁSŽøŸ£ÏÍ 4È®ý W4¸­FhÙ·¿é^ès{j‘`ñpívÕ1ïÙ3Ö¥úô±¶[ºaä€Ö“rršÍÁ­YM)¢¥á /í‡ýz$7g~k@=’\Üöó›ÛÖkkÞ? m?O%mZìUg7wE õA‚Ù»Îìn.ÿ¹Ö7R”è¬xÛ¡f{]ñhŸ…«³|à |›ßñ~_Ð4&‘¤äÐàä߯°ü wÍÎ<öî^ßÕ48ü7Î ¶÷Eƒ»:‹XË‘ðoýöÓÂnG’ Fûe¯õÃûÚøø¾Kh Xgy‡+öÙ÷º£Ñ™×f™Þ짯3ÿºË›“Îä6‹.6j/ÕõMÏ‚}PÒt”-h/Ûí%I; € €D( ÁÚHRPhs|55ÔÙÁ‘@'M¼ÀêIɱÆïy7î™qÑ$cK¾ëÞûЪl#Ôlç`uÛsß‚«æYͽ»Þ9ûuÎ¥[Çš›u¾­q¶¶ ýSŸh#» öYmEº¡iAÌYìö~†;Òßý0Ï V×þD°ãnÜç=~¼Uݪ]¶ÄÊ÷¬)0")IãƒÆA«¬gÕ„¦3iLFR‡ûòŒGR·+ëXùÁ‹§27×{žîæêvÂq ÐšÒæfùê Z³\5¹) iî‹ôQgÿš¥¹à«Y¯£5…9ËøÒQ‘]mxc0ضÆmŸAlílÁFÃŽmVjMnVu šmUÕ„ùØ[ÕX'œ¤)VZT‚|¨Ð¢óÛ±²¹h h5Àªll tj 2ÖfÐØÖ?mn–q(ŸÎÚoïß§~³œõ¾hnp-šfÄ>ëÚÞ?ÇìûÆYÞöã­ÝÞ×øÙo{¼F&Ûrzknv‚Û­½+œ‡@ä,¹5@@h“@KƒÚz±“§Ê­knݶ[öì;h=÷ß8UVnÔ¯<>`e."©©>4˜l¸}¶³¦Óèì¢)S´EE%—Ö™ë`6g 0kþj³Øgy›ûZûhÞýpà‘ÇŸ ÛÌÑ£År®ºFN:´žæÌÖ ½ŽEÇùŸüAtv·~ÐæA;dgòWŠ.ä¨Ñš‡Iýš·%颒<ã’ˆËfJ ×,úm+4í:_|TÎWU‹·"¸áù£M+†[¨²©E¶bAÀ¾ˆ¤=Àk›¦¼0g&Od?Òü¶ž[Ræ•âÆ·ßÃ~ÁØæ[èøç|×Ðüß‹|kІ¼hi¹ˆ.–y:üÿBžß4p¬©HJÏ4ÍÐ6Û5‘ÔüÔom žÄ7ÓÛw®}–·ÙFkÍ×H]­È¢¡?$ÑöÍ×ÁñÓÁ¯¦9³u¸¦¿Ñqþö5¯‘ÆFÓçè7 ( Ðþ¶Ûß”@@ÐàfKfj› ì;ÐôÝêÕëÖ›»#~Ìí×7 ®."i¶µòkÛöÙÎsg·}ÑÈ€´q‡¦YúÒr#%ÈÛk×9½¦ÑJÇÁ÷­í‚ÛµhP]Ó’´¤˜ _ÚÏÑ…'ùÛ‡t3Ú¦¦ÑŸ‡{ÚÈ•~éÌi®X4ÒÞgÍãœàé&µK}ý†Ý;Ej?f,F˜4þ ‹šm˜ù°50^·êMswd 4š•Ïï)07%1°µÍFl h:å«Dͬ³|ýê3¡øµÏ&£çk@US]˜9¹E°ÁÎíª}$Ö¢Áàõ¹¶#í‹/•‹{ƧiEþ±Ã—ZeÓ‘ÛšÖ9¶†dξ×û>1¿}únO+£èÒ¦%¬ë†ÛÈ ’¥Hðüù2¯ñºÔ{³ëˆæðyv½W4ÏûŒÑ,Δc´T€ÀvKŨ € €@+4pÙ–,Otsí êê>sFô¶»š°Ïv¾úŠ †€3ÚG¯žÙ.¶¦ÑÀ²ÎtöŸu®3ãÍ|Ö:»»#Š©[šâDǬè”kÞÙ «Ö¼o¥OÑ׊~ø ?z¯þék7·k€>X?Z²OSˆ$Mš"µËž“ó;w³·õü†ÍŸÚW—IÚ—nM]²ääHÊä‹Bv ¡g¯`»Ù‡æ,_úk½ Aî8êCñ_4ÒŸêÑxå½}¾±yLYØKD—ƒz'ˆæ~64ë¸åQƒÔ挑ö);H@6Òs[[O-†™ù®cн˜5…‡=uŒ}ö½Þ÷Ž(:£<¿…o/¡¨Ô35hþ^ïÃí¯¹7ŠñóòF¯Ü6;Á1ÆŽm"¶ãá.3F@@.èÙ³§èbƒO>óbcÚ‰å§÷}OÂå×öïô7¿þeÿ]­~®‹HêdÐú IDATìc k€Ø47g;kP¸%ýkug‚œh¦ürÈØ¥éEÌÓþ³Îÿ±ú]£N¨E#CµÙ’ýz?Ûó~hêýÑÅB5¯º¦‚1óyë}Ò4%ÿzÏ]®š½4~¢xÆO4ØtAHÍm¹ËÊŒ4%ºÀd¨<Ú Y=%íž´„ºXöY¾;›2ÒÇ5ï¶µè ÙPÅœY«Ç5@¬¹»5ÍIÿç9û3"…j²Ó÷÷ðˆÜ3ßÙßNïD4S§„ªªéEÌ™çšÎÃ>+{Í.M’`Ü£`‹F†j³%û5¨ÝžŽšzdþdß· ü¿  ³í5MÉ?ÍcövKîu&Àâ‘ÁT؇ € €@h.è[¿|½Ñ²/§òƒÍ^e`ÿ\«N{..¨}1‘4¨z!ûlgs‘F«¸a楖¾C»¡³ÎÍþùÏ:×E%µL?¶Ý{¬ ~j1û×ÞÐôÞhŠ’ÿùù¬1ê îg_x­½/×ní¥Þ°PË×Ì©­ù¶µhÀúЉ¡¡V 5'páÉŽ04gX·WÛƒmbMÝáæ²fgSÿÆø> Ú]M3bšÍY÷º §ÍmÏ»´ÛÎ íI˜M3Ðn¦‡©Þ.‡.Óôº<ë|{l—öix °Ow›±"€ €¸F@ Œ$¸­ÁY³žÎð^ü̲vƒ¶m¦ÖXÿáVÇlgÍaÝEgA¯Zç àG’#ÛL7¢éTt6³™VEsˆwDÑ÷f0]vtû êŽ0hk›FÎíÆ`tý{ëDÚRÌ(uvxíSmKS~®×ã[µÏ[x ¢ üùÇ:¼OÑ~s–¯ÎêÕÙ½f¾í¹‘¬«:G %Õw¥[TÖü5œh «­é-Ì ðæB÷ÎüÕÙÚ›úF ým.G¶™nDgÝëlfsöýĦ¬CÁ8Z½ïÚiMAæìðÍøoucœˆ.@`»Óɹ  € €>;o[h–uƱ¦ÑV®¿æ*+ö /­ YÏ~®ˆ#I•ñ™93Ó4h¾üUƶΒnÉlgûuÃm›éNBÕÑã>üW+•Èm·Üªªµß>ëüÏYlì×Üá€nI)=~"âê7]·ÀªûëE‰(¸½æ Ö9æ†ÞŸ`ûÍãæãª5ï››2vLÓlgkg'oÔþ½ùàqÍ_þlõÊžÛÜ™zÍ æ¦Ô,ú]DÁíº7^·Î±o¤Üô%IHóMÏÔ ysýÓYÝuËž·7ÑiÛIf©[úlÈëj«}¿Ôù¥q yB°ÏòÝt@Dómk™;¶)`ŒGgtk9~:|0sÅ–¦Àl°v"Ý×/³©æÒ ¡ƒÖôþõË^Y½+|ÿµµÏŽojóÿVy#Jk¡ãi¯Ò\€^?öVS*‘ëmAäP}°§yòŸ“Þ+ ä·¤””…6¶·£³À§ ñÕÕYÛÿó¢7¢‚9j?Ø~ûõtû½‚¦¾™Ìø×éÌçúíO~þ;ãÛcÁþÝÐ’ãÍý¾3ÇŵâC 9>†É(@@pŸ€.øÓû¾'ÿùÀƒ¢å'ŸyÑè¤PVƒÌ_¼n¾u\ëiÊ MQ1÷²ÖÀôÊ›·³–5P~ãuW7 ž}Éty¾1/µ™×Úœ!n5ÜN?úÉËÜÙ3eúÔIŽ~i¿5Êëo¬±‚ÚšfÄ>¶P]PM;¢ù§Íþ›¹ÃCcî·‰_]¹Z¦M™(zOÌ¢ÌïÚ³OnûrSVißtf¸뢎?ÿŸ?û¦N/Ó¦L0O7>Xмè>ÚjôOÓØ¸i«è¸Vsÿëk€áéç^²f¡_>{¦£Ö…:qÃ/}FêßY#É—Í•¤IS$iüD«FÐxé³R·êMcŸ¦ÞÐ…%ý‹îkØñ±4lÞ(:ÓZƒ¸)›>’¤©‰‘^¤ñÍÁݰu³ÔmüPt–sBF¦ã¸VÓ¼Þ©_¿Kj^dœU»ôiعMR>£®¶U¿êRÿÞ;Æ5ýûÔÏSæ_cñM£äyŸµ >®WêÞ_'RV&êwþXqgt-ª¯¡³|76µufðe¶tÁ7~€ïé­ÁÌ;¯Hp¤ºÐ ìSë¼¢³¡SSšZ8~Zƒ’- ²êÙ°Õ>jY¿WÏ÷Šæ7ÓkhPTó„o,ôå£ÖÔ%fÞwVà5¨_Pì[SÇñç7D¶ôÊä¡ ŽE3µí­½²íˆ/¯³¦éµ¨fàUBïù¯¥^™6DdÆè¦qhmµÓ”0ïí+?öÌQ^™AŽlõд#:³ÝL£÷*’b‰¿òîn‘KÆ8gíë ðÝG¼Æ"¡öö¾2;A>=éóVó{ŸñÊgƉLê—y¾Þ£š: Þ?ý@B×ÚM9bÏÝ®÷aÉ{^kºÔûç´üµdï{{lë´úÿ2-‡?-–‡ÿ3«Yýÿ³ýøÉSåò›ûïø¸U‘ :H€ÀvÁÒ, € €‘˜ÁíïþëO?,C·Í`·y\«úóû‡y™£EÇB3èõ'M¸ÀÒê¾HÒ˜ç¶ôQÏÈÕU=h@žTž«2‚úö¶4 ïL¶÷ßÖ´#Ø6‹}·¹/Ø£Ž]ƒÅšRD?X¸ëî{ ¹Ö=r´Ø¸ê¬/šJFs£›¹½—¿±Zô'\)<ü©#°½m‡o1Pó^>òø3–Ë©S§¬@½¶©)cnYø…pÍwʱ†M×Ñ`«E4Ì7d¸$Tsauuêm_Ù/ÏOî—ªŸýØnk% ôÁÞ_ßòœó?ѼÚ~%åªÏ‰÷øq_ŸD¤a÷NãG~}¿Ñ7oÅ)#P¬§i¿’¦L³®ë×T‡>ÕRæ]é§}ÌA®ªþ¤‰“¬€}*ìj°u—[›+þÁÌ_¾(¢ÁdMRWkÉŒ}ôþík¾™Ç+¶&ȶ#^ùÍW[Ô€­w}Am_p{}ã–M}õµ©Ïñù ²8‚,=ÿqC‚ü|™/¸­íhûÚ®¹¥©¿5øwžü™žWïãGgU÷éá´3Ϻ|l`0Ù<ìQÓŽµ-æhŸÅ¬¾¹OƒÈj§Fh€ú‹}Ar=®3ó5øŸÙMäæ zÞ¿]ïû€CÏÓz+¶ŠñãWÕñ¡†ªÍ&´ÞÞÆÿåjú”¥"Ë>ôåzW—ÓUæš¾û ¯5} vuÑpÍ ¶öEÿ_h/» ö9ŽÛÿ_«õš;no‹m:B€T$¡J› € €´@@¬ÿzÏ]Vº ^KQ¡Áíÿùù¬œÛ¡.a§5ÕI$eæôÉV53gõÿkïÞ~åªÊ€¯£•žŒµM ¤€½­Mi AÛ&Z/ P!M° iŒFƒ¨Èé‹/úf|ðÐL4¦‰šx¿&-‚Tkƒõ‚1)•§¢F¡MÆ|{\3ûÌíÌÌ9==³÷o'Íì³÷Úk¯ï·&§9߬ùvûÀ"íÄŠäXY·øC:þ@.ÿ%PŽ<òɾ‰ä|]¿×œôs‘§ŒJ$‹ËãŠ1Å¿òúýîÇ"¹ýЃ‡æ\߯mŒéC÷÷”Gùܧ>Z¬ôÎ5»ãÚì’WŸÇ±Bçcý^‹•ߦ¢>w¿ñ°ÀûH³?ÚÓWŒ-V?Ç øÙ#ŸO+vvÈ9 »‹v8 #¹Ë§tß(Æ&‹m4¼Ê7·Þ7BÉ‹œÌÜzm§,D$5c¥p¼F¢6Ñ_>ÔZµ+xóm&Ùß1Sô™Ë t÷+•?¼'¥‡ï¯ÿHnÇuqý°-’©wÜ<~Y~}†[ù~‘ Îv¹ý¦+›é{SÏ é|~Ðk¹ìHŒ¹œ<tM>Éâò¸bLñ/Æ7l‹÷CÌuøäÚåƒÚG\áÝ=®OßÕšßòõÙ%¯>/¿¯âž—z‹oågmÄXºØßœ*ŸïþFWœ/ÿÛ,¥ÀL³Ùþ›o)Gã^ @€–ÀgýB‘ÜŒ„gù+·Ë`hí!ä²#gÎvVW­Y½*­¿îš9«‚Û Ù‰ÚÞy%x$ÎÇI é¶8}oÛº¹Ýç q—ÛÌ×g÷ù(ÙñÈ‘/‡û%»Û÷û9V­ýê×§ÒùóR£1[4é.-Òïº|,ÆeGâkÚy»vÝÚ´qÛ۱çãý^ãþ±¢»ûúîò$ý®õØb¿¯‹2!8š/œiafõšžò$í“óìä²#Í—:K5£>÷koÜÒ.Õ1OíÓ=}5iÅî··û‰zݹtI$‘cÅ÷RoQ¶%J£´ãícŒ'Çñš çý`©Ç?Éýî¹·µV6>¬ùØ¡ƒ“tqQ®É¥:þþJ«ûU¯Oé=Ûç–‘ˆ3åv‘¤žt‹R?>ÙLù~³+zËyä{m¸jnY‘ùË}Gû«W¥´yÝܲóõ3è|< ³\¢#b‰Ò/v~õ¥ð+·Ô× ãC¬ mÒD|” ‰²,ÿ~5¥•ÿpgw‰–A÷ã¹ìÈ…ÿvZݰv¦(12JB:®•ñÝóÐ]ž¤Óûø{‹õ;=—‹ôû†Rœ?ú¦FcåDçÇÌFPŠdt+-  @€,X½ɡŨ~òø±¢›qW;rïòjêh¿˜ãÎ÷ÿþÏ»)j†O²ÅªµrýëqûˆòÀBï?îx£}”Õ(××^hŸ‹ÙßböµÐ¸]_Ô¿ïA§‹ãÓÇЦäd¬¼Ý~Ýü‰êQÛÍv$FË«’ûµŸô^“^×o ýŽu;b‰Zß‹¹ýàD§Žy|À0É5Ä竳>¬ßK}ý°±-ö¹ø¹_B;ß'Îû j¾ó¹¯.†€R$CUŸ @€¦D V çr ï~çt~…øØÓ' íøŠtüm#@€é8u¶5ö(w2ÊêèéÔÈ X¨€ÄöB]O€ @`Š~qì™öè']íÜîàìÄs=ìîÚ —`8nI€ øæ±ÖC:£‹-W/ #— P ‰íZL³  @€ô DÝÌi_íüÓ'ŽåföFéL‹ÀSÏ·F`\ì'Ób`œŒ. ±=º•– @€*%ƒÊ«w¿í–©‹íçO/òßùÖS7~&@€ŽÀ÷N¤tö\«¦ö¶k:Çí @`€Äö Ç  @€TD VfwoQ[;ÊxÄvÃõÒ»Þ±«»É²ú¹;†øùk}«ãlc6íÿû–Õx † üõosÏÅÏGŸ‰‡F¦tÙëšéÀ®É9·W? PuUP| @€ê.ðØ7¾Nžz.mß¶55³éüù í$‘þÈý÷.k¢HÀõëGÓÍ7½%­~Óªb¬O=ýl:÷Ò?Šýûî÷ÐÈe=ƒG€Ž@ÔÑþ®_›Ò­ã§^H韯´’ÙûnIÙá²G€À‰í!8N @€ÔSàƒØ—þõòËéŠË/¯@$µ# ü³'ŽõÄIá-7nì9¾œüæÔé¢dÊñ_žèÖm{v§;÷ÞÖsÜ^ª½¯{#td¹ <ôà¡bˆk¯ºr¹Õø.¢ÀéSzõ?3é¹3½7Ù±^mí^•þGüNïïâh½$¶ë5ߢ%@€AàÖ7ÐjzšÜ¹÷ötò·¿K¿þ/E‚8VioÞ´>Ý}×{Ó4ÄšëÿñOn¯ÒŽò)·ïÙ%©=ÆÛpæzŒpÔtæŠ7¤™õ›Š>bß¶4˽äÑÒ(¸Ë­bev39×Y¥½nM3íÜ”<0rŒ·‡ßéc`iZY™f³Ù*bTÙF€ @€ @€UððÈ*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%‰í*ͦX @€ @€ P‰íL²  @€ @€ P%ÿ‰ þ…ÃÒé§IEND®B`‚rocksdb-6.11.4/docs/static/images/delrange/delrange_sst_blocks.png000066400000000000000000000617741370372246700252460ustar00rootroot00000000000000‰PNG  IHDRàŸ…y` IDATxìÝ xTå½èÿ·`#lÃ$(¤@‚& X¹hLH+Ô-9ÖPA,JÙ"ÝÄ»-âcqW9úWþ±îZÿ^Ž»r¤pNA‹R*¨;ôQC ¡AA+ H@#jâQÂíÿüÞdMæšL&sYkÍw=OfÖå½|Þ5ÃÌoÞKÓ§OŸ6@°­@M]£lzs¼_û™¼_ó™mËÙÝ‚ådgHNö)3D¦üp¤¤õéÕÝ$¹@@[ô g‹v  €„X¾¶Z–¯Ýò˜›wæçdÉ’Û¯Áû»¹šÔ @@ EÀ¥HCSM@ç ¬Ùôž<±ò-ç € €"ภ@pˆÀW_ý¸½³Ë‹0ÖKd˜T¸›ç € €8N€!¨Žk2 Œ j7L/žŒô”©v¿ô¾ró¬BY4obÊÔ™Š"€ €¸[€pîn_j‡ à¢1CL@Ê-=Ýjê剕o™–ÉÏÉ’[çûµÒøÑÙ~Ïy‚ € €€ÓÀ9½)? €@ʸ10ÕïÌ4qc½R榤¢ € € 05"&NB@@@@ :pѹq € € € € €‹ˆ‰“@@@@ˆN€\tn\… € € € €@Dà"bâ$@@@@¢ W!€ € € € ‘¸ˆ˜8 @@@@èÀEçÆU € € € €D$@."&NB@@@@ :pѹq € € € € ôŽè,NB@°…@ó—ÿ%ÛªwÊSÿ{¥)Ï•ÿí‡ò½Q#äâñß“ôôïtXF½¶¦v¿de á9C;<—ƒ € €± \ì,I @ˆ«€Ðþù¿/òß4³Wþó yä‰g¼Á·/šäoÛß=7pÓàÛƒÿþ¿¤¡ápà!ž#€ €ÄQ€pqÄ%i@@ –ÚóM·¹s®•+.Ÿh‚nh;Üü¥7›¥ËW›œž3ýª+¼ûy€ € <pɳ'g@@ KÖ°ÓË&NðöxË8ëDÿ±!€ €ØW€œ}Û†’!€ €€Ã¬9×þáúÉÈüs»UkHé%é0àö‹²ŸÉgŸ7ÈÐïîV~\Œ € ;怋%)!€ €~_ý™s­úí÷üöGóDÓÒ­³Å´7œû:[!š2p  € €@tà¢sã*@@@@"`jDLœ„ €nÐC÷×}ì­Zöàsd@Æ?tØ{,šk¼$àµ(CZŸ4ùnö9QçXÏÜœ!20Ëuz\ˆ €¤²¸Tn}êŽ €@Š hpé…u›ä•ÿ|#¤À-ÿ}Ž\1y¢ß±®\³»fŸ<ÿâF³©&²æO/›¾ ÎüÉeöO¯öÝòq$iÝýo¿N·šÚýfØ«î‹&÷Iýg²~cyH-ó¯üÇç  Y v"€ €).@.Åoª €@ª =úÌ¿åNSí¹s®•Q#ó¤ß™gJcÓa9ôÙ¢+ý¦u¾5Ë&škt®¶LÏo KƒW¾Û°!Ù¾OÃ>>v¬ÅÌûÖQZºÈC,6 öýÛÝ¿6Iiy/¼`¤©ƒÚhPN‰Õ”Ûqc‡½cQÒ@@Ü$@ÎM­I]@@ SÚ}uæœÀ^nÚ[lÌ÷ΗK‹ äë£þ¸®^£‹ è?íM¦½ì"ííªðZ&ý‹´B¥oíÓ~Vðíß¼ÓoÕVµÉ;7Gþïê?™ú¼ÿA·Çu=@@ °CxŽ € €€ öí?`juÁ¨!k§«‡ÎuÍ5¾‰ÈîûÔ–_Û\aÊ¥CW5x¸©Ë•ÿm’Ù]ýN÷Wu LŸç € €€›À¹¹u© € d`öÕúYбp;¢¹&\Zvܯ 7èðRݬ¹äB•S‡Õênî¼P×°@@@„w €¤”À¹Ã‡™ú>øïÿË,’ ó»u¶Es¦Ù• _geˆçñÏ>o0Éëœx¾›ætè«ï?ßã•ê·ß3ó i¯¸ÀR­T¢¹ÆºÖ 5ø*øè„²SF@@ÀÎÌgçÖ¡l € w]ÝS{·ÍþéÕ¦WœføÔÿ^)_44…Í;škÂ&fƒ}û¦™Rì©ýÈ¥¡ € €€ûÀ¹¯M© €D) =ÜÝz“¹úÓCŸG”J4×D”pO64Ûä¦+¡ê l € €± [ORC@—X½ÂºRh®éJúñ:7ã¬ÿM·×+¶Æ+ÒE@RV€\Ê6=G@ÀÝÚ“ëÙ?¾dV:õNº»fŸè¿P›ž÷ÈϘCçœå=%škôbk±‚ÃÍ_zÓ²ëƒ+&O4EÓ)^Û\!G~²¨jáëò$v"€ €ø °ƒO@@À-5µûÍ¢ ZŸá9CÍoúØZlA(¼x¬ È8ËTYƒd:÷›n: U{…Y[4×èµVÏJ7?/WË'Ÿ2IO¿ê +‹¤ÿ˜å‘ðNù·»m´Ìº…å£e¶V‹Õóô|6@@ÈÀEæÄY € à0¬¬ÞëêžÖöý ™‡:ßÙß¶¿kíöþ}àžÛeÌ÷Î÷>×Ñ\£×iÏ7¨å›¨5äÓw_²ëbËžúµé§>Vàз\”³‹¾ûyŒ € ^ ÇéÓ§O‡?Ì@H†À‚7È;»êMÖOÞ=UÆn$?e‰užZ/­ŸnZ/­_¼¶Oê?“–ã-¦\`:ÄòpóIcÓas(Ó3@tA…޶h®Ñôt8ìƒõrìX‹è¨ÚyP–¯Ýî-}á˜!ÞÇ<@@@À­=NŸ>Íd3nm]ê… `[#G[äúE”¦æ£¶-c¬ –Ö§—èê¯Ö6x`yö‘ŸŠîgC@@ÀÍô€ssëR7@[ èLíö— —~)4Ó7ø¦õ^rûü‚ooVï—ûžÜ,ú— @@7 €sSkR@[ ho·uå»dÕúwLùZZNˆç¬3äªËGÉêGfÊ”Žô DÙ²1*”Þ´¾ZïÀ¡µúüÒ‚韞frS¯iÿºR6¾±Û<ÿô‹¯RªÇ`ŒÈI@@À AµA#P@w hÀmë΢+ΛQ 8zvÙxq®ê®Êƹ6uõÍ& §sá­Ùôž¬Ú°Cn˜:Nf–^(:Ÿœn¸Óãl € €ØU€œ][†r!€8JàýšÏäýÚÏM`H‡ZêPÊq£›çŽªˆƒ [^Y+›ÞÜcrãGgËÓÏm“´>=ebÁð ÞuªEE@p¡86*UBˆ¿€ÕËmÂØaÒ/=M<¸Ar²3äæYEæyüK@­z÷ ”N)ºÀƒ¶‰n‹Ë.3ÏuÁ zʪñ@@ à¡L €®Ðp4¸³øÑ×̼me³ ÍsWTÐe•Ð iÍþ¹ ÿœÖ¶ºwy®+¯jêbú7p.:—1P@@€³A#P@{ h)Ý4è¶déëR³¿QnS,:ܑ͙”KëÓËüÓ6ՠꊇ®5ÏuчAûKIqž3+G©@@l+@ζMCÁ@’!` SÔ`̪õ;åæY…2½dt2ŠBž ÐE51AVÇï—o”üœ,ó<ÁE!;@@— €sYƒR@® h(Ãí]õr×£¯™`›ݬý]K³Ý" 8í§«°jVW²½náj³Šío5Eô¸5¼Õ-u¦ € €ñ ?[RF°±@M]£,~ôU3äðÉ»§š€›Wƒql„Ѐœ.¶¡Á¹'VUJKËIYñÐ  «¨Þo†'3§\8=ö#€ €©+@.uÛžš#€)%` )ÔÊúÿ˜cz0i¯& ¦°!Ð] è¾´y—ôKïk†-kÊ—6 …c†È”Žìnò\ € àppo@Š ^à‰••RQý‘™d_{¶éª—Φ—[x2ŽÄH@çܺó ô;³¯L*È•uå»äùWÞ“«'–™¥ŠOKëͽ#o’A@ì.@Îî-Dù@"РǦŠ=²hÞDÑa€pÓ¿ƒö8 ND ^:lUWÔÕûqã»å‘å2oÆÅrôñfH«öÒ¼ ÿ³"k¼Ê@º € €É —wrEˆ€ØþR¹W®»ò{&pQ^Y+ýÓûšy¸ÒúôŠA$@bªv”UvÈuW^hzÌi€îPÃWR:i$äÄ4¹ € €qè×ÔI@ èüm›ÞÜ#£óÎ6AŠ–ã'dܨÁÞEIq^ s#)'P4v¨YaÕÊQ{ÉøôK3TU/YúºYèA{w2o¡¥Ä_@@À9ô€sN[QR@ åt‘í¤½Ùt"{íñ¶«ös)4‚ DÊÝ ©]a}-¨o–aÙ&à¼àÁ ¢éßþjŠf­‹@èë„à\jß'Ô@ì+@ξmCÉ@”Ð9°tâzíÙ¦½Ùtά—þò\zq®éñ–r T4(—™‘no‹}Mþ^û™*+Ön—«'2çê„ôú ‚|Yrû~'9zL^yóSoàú—o”üœ,¹yV¡ßy*¿|x“úýì#?5 ?èPp}Í” oÈ@@ +ຢŹ € hÝ4À¦Cß}ñ•,¹ýGL‘'!\[nésۤߙi¢=S5€þü+ïɸó›ÅP’[:rG@œ)@ΙíF©@Û hÐMnË×V˪õ;ä¡Ûd†¸Ù® º$ =äÊ+÷Š'ã;f®ÆòÊZyþ•¿Ëä çÉÌÒ M9OFz—Òäd@@ ÕÀ¥Z‹S_@ Æߨ-O¬¬”¦5óKiïßÉácœÉ!€€ tÎ8Æ:~t¶h@î¾'7›n˜6^ta Úé16@@VpÜ  €]Ð/ÞK–¾næ†Z\v™ù¢­ ЦKŒœŒ€ë4è¦ïoVï7CV'^œkzȽ³«Þ,1¹8à¼ëZ !€ €@¤à"•â<@ E´GÛ#Ë+Lo—'ïž*:ÔT÷åçd¦¨ÕF®h®b{b†¥¯ÙôžìøàS™;ãbÞGºɹ € àhpŽn> Ä^@‡•=ýÜ6ÓceéýÓMàíÍêdÂØa¬Z{nRD åtQ‡ºúÃ’Ÿ“efYðà³PË}¿˜lzÖêqÆÎ*É)wkPa@\-@ÎÕÍKå@È´GJջ͠i}zɺò]fþ¦œìŒÈà,@ Ú«¶ÿ™}MÐM‡¸—¿U+Úãö‚üsÌû‘m½ht6A¹ns) €$W€\rýÉHŠ€ÎÑ´í݃2{ê8ÓÓDpƒö— c‡ŠàØ@»èûSÅöý¢sNjÏ8]ðaXöY2³t 9»4å@@NÀuJÄ  €€ótHWÕÎrAÞ9¦gÛªõïHZŸÞRR|‹'8¿y©)% sʽóA½Ì›Q`ê=í_Wš¡¬ÚcN7}¿cŽÊ”º%¨, €Ž çˆf¢ €@×t5­;š/¥Ö®Ÿ~)WOeöu-5ÎFì+ ïw:„UßëtËÙ‹þ(_}Ý"¯,ûï–Ö|sö­%C@· €s{ S?HªM]‹Æ•òÊZÙR]'¥“F˜USŠ"€>ºjó#¿o]ÅyÉíW˜@Ýó¯ü]Fç ”’â<Ÿ3yˆ €ÄW€\|}Iˆ›€öø8ôÅWfH©ßtåRíá6½dtÜò$a@ÀÉ[³é]3×å ÓÆ‹¾w.ÿÓv™<á<™Yz¡“«FÙ@@ÀæàlÞ@ðx¿æ33ÌJƒoe÷®3=8nSì{ @º  sÊé6~t¶l|c·è*¬œ»yV¡èðÖºúfs¬ Ir* € $@.ˆ„ €€}¬ÉÄu^£+ç¯0Á7k¢qû”’’ €îÐ÷[ý‘#';à çvýN3”_rú#ˆ¾/ëP]‘• @ˆT€\¤Rœ‡$@@¿øé–Ö§—̽k­9zLž¸{*_ô`O €@g:dµêݽsÈ­+ß%»ö~.¥GÐK®3<Ž#€ âàRü ú `'VV𹉖Þ?ÝôtÓ¹Šú¥§Ù§€”@ÀO@‡§êV]5?'SY^aæ•[4o¢é%§Çµ§œþ¨Â† €©-@.µÛŸÚ#€@ÖlzOVmØ!7ÿ´P¦üp¤™gˆ/jIl²Fb  CTõ½\@ÑÅqV­G–Üþ#™Tkæ˜Ócù¹YüÀk’@@ÀIàœÔZ”-`­T:± GæÍ(0s õíÓK<鎮…GˆL@ƒq:„U{ÈésÚcÎsÖwdfér‘r €Ž çØ¦£à `wz´bív`Ó•Jõù‘¯™¡Jv/;åCˆ¿€þ0ó~ígÞœ®n­=çî_Pbþê*¬üHÿv @!@.Êä)! s¶­Z¿Cš¾<*‹Ë.ýâ´uçA™0v(_ Râ ’ €@÷tõU È•癄¦ýëJówýÌ1Ë+k傼sX˜§{Ì\ €@RÀ%…L@À ºb©Y¯ös¹oÁdÑܺò÷¥hì03·êH@’+`õ‚Óÿsî{r³4~yTt±}®CZ5 W4vhr Iî € Щ¸N‰8hÐáB:Ïͳ ͪv:Áv^ŽÇÛ[¡ýL!€ ? Ì­Û¼Kš¾üÆÌ)§ÓèœrEc†È ÓÆÇ/cRF@¨ÀEÅÆE *ú…Fƒn rÍýr3(«ŸL/¹€ ³Så& ž €€4 §ÿgé0V]YûÍêý²äé×Íc‡T{ië±üœLÔ†""€ à>pîkSj„ÝÐ/([wüœ,³BöpÓa>×]ù=æÜé†+—"€$G@ƒrºâªþ˜¤?"éÿoKn¿Bjê¥fƒYH³!€ €@|ÀÅ×—Ô@Àú%D7í°|mµÔÖ5Éì©cY­ÔmG@¢xgW½lªØ#£Ï;[¦—Œ]àaWí2ñâ?:;ºD¹ @ € KÃp«€örkl>jzl|c·<»a‡Ìq1󸹵Á© €@§:<µ¢z¿\w¶ùJ{€k¯¹ÙÓÆšÿõøàý;M‡@@ ´¸Ð.ìE— h/7íᦿøßõèkæ×~]H @B èÿ™:<Õ“‘nV\Õ Üo5ŬºªÇt£·\h;ö"€ (@.P„ç à ë—z”úÚ…«eÂØafÎWTŽJ € d²AWŸ÷“‹M@nÕúwÌœ©3KǰHQ’Û†ì@ì)@<‰ác IDATΞíB©@ ³ýQ¾:Ú"/<~½¤õéeV~ë—žÖ¹@:°æ³-Z²ôuii9)º«ö cC@ ÕÀ¥ú@ýpÀâG_“7«?’ç¿ÞÌOc 7uAÕ¨ €ŽÐÕWõßE£³M¸+çÿéfš¬xèZóüýšÏÌÿÙçÙ¼@ pQ q $W@‡¹¼´ùY\v™™{Fn:GövcC@{ X?µ?)¿|x£ Эÿ9¦°:¤uü¨l攳gÓQ*@€‹"I €@|Þ¬ÞoV*-8Â,ž ¿š÷;³¯ ºÅ7gRG@x èêäk6½+µuMf¾V}~×£¯ÊÄ‹sefé…ñΞô@Hˆ¸„0“ tE@WVÛT±Gò†ešÞpÓãºÒ½Üº"ɹ €8O@{ÈmÝyPt!¥é%£E{Î-x`ƒLº$×ô~×Ï_}}Ì au^í(1 €@ª €KÕ–§ÞØH@?`¯yåï’Ö§§Ì›Q`>hkЭhìP>\Û¨(  €ÉÐÏ _~#ù9™¢?Ôé"ò3ä·¿šb†²ÖÔ5HNösÔé|%pãWר“ € ­€ätÕUwqÙef‡ªwJј¡üH-*×!€6 g“† ÄC`ã»eSÅ™÷“‹ÍDÀoVï7èôW6@@{ h¸MoîÏYé2½d´èg¹ç_yO®»òB™TkïÂS:@?p~¼Ñ []ñе& ·®|—™Oî‚üsÌñˆåD@ pA$ì@ ñºX„±CÍý%2?7ÓL¦«ì²!€ € 'VVJM]ƒüöWSÌçÔG–WȰÁ~Ã\“Q.òDœ(@Ή­F™/ CIûŸÙ׬LºàÁ fN·û~1™…ß²T@p¯ÀÆ7vËO¿”›gšÏ¯sïzA´wÜâ²ËL¥uA3'{ ¨ µ¸¨é¸Èt9ø#_3«S-YúºYHáþ_L6ËÇGž g"€ € `SNXÖ…À4ø6íç0ŸoµÇœ~þµ†·”³O›QHž¸äÙ“³Ë¬_uîŒ'V¾%‹o¾Ì;¯B\ÞøT@HQÏXW\ÕÀœþð¬Û“wO5Ï7½¹[ŠÆ 5½æR”‡j#€@ €KáÆ§êñÐùÜt~ ý%pѼ‰b}‰On¤Š € €€ý´·\EõGÒrü”Âúήzyþ•¿ËåÅçš©í_JˆtO€\÷ü¸©«o6¿îé‚ úëžö|ûêëcÌçÆ½ € €@í!w ¾YŽ?i~¸^³é=Yµa‡Ì¼ò{rôñæG쯎¶˜UXÃ$Án@ÀQàÕ\Ö`Ó!¥x[zÿtpÓù-ŠÆµCñ( € €ŽРœnƒö]ðáé?nóä´Çœäòs2ù¡Û‘­K¡@€÷è²O?·Mô?ý¥ÿsºY‚]?L;ÔÌoÑÉåF@@n è4/¯¼¹G®œ4ÂÛc®éËodrñy&(×Í么»¸¸“4À¶íÝ¥lV¡ù…M»Äë¯mãGg;±:”@@W èh”Í•µ2¹8Ï S½ïÉÍÒøåQ¹uÎ÷ÍçvæavUsS\!@ÎÍH%º+ ½Û*¶×Éä çšU™4à¦+•Nºd¸ùÛÝô¹@@â' £VtZ˜üÜ,óù]r啵òì#?5ºªÍþ òω_!Hè@€\8r¯€þb¦ÿ çdg˜¹ÛÖ•ïýO»¤ø<†•º·Ù© € €@ èç{Ýt±´G–WÈû5ŸË} &›ï:ÅÌ ¬~2å‡#Íñb¡ª $ÞIÊ—lH¨€.œ°uçéŸÞ×ÜjêäÀ§ÍÞù"¦—ŒNhyÈ @@â+ 7k[4o¢õÐüÕà[íFï¾_>¼ÑL=xž÷ €Ý \7¹Ü¾Ú½ñËoÌ$­:iëK›wIé¤RRœgßBS2@@H¨€5|UGÉèóúãý•óW˜©i–Þ?Ý”E§¬a>è„6 ™!à:p®kÒÔ­N´ZSרÖíQî{²ÜÜn˜6>uQ¨9 € €D% ß-t!¶O¿øJ–,}]Ž|Ý"+šatëÊß—ñ£²M.ªÄ¹RN€\Ê5¹»*¬½Üt"U ¾]¿hLº$W—]æ®JR@@°€äžåïÒrü„èUýN¢Ï Ç 1sÊÙ¦ l%@ÎVÍAa:Ð@[¿3ûš‰R¯[¸Zú¥÷•'ï™ÊJ¥Áq@@â" ßQÞþ Þ¤­ÓÝèê«+Ön—‰ÃåæY…¦³€ôd¤Ç%Eg€sF;¥t)u†~éi²øÑ×äﵟɓwO5+¥4 •G@@Û è|rºådgˆÎG­Óã芫V9ýŽ£#yô{¤†¸ÔhgGÖrùÚjYµ~‡üöWSÌ„§úË¿9²))4 € €´ è‚«6ìð.§=æÔ7Ëäâ<:p— àbp.n\§UMÿãYúÜ6óѼf²ÓÌŒt3ÜÔiu¡¼ € € €@$:‡\Õ»傼sÌ‚rO?·MÞ¯ýLnþi!‹nllê„ € € è÷"ý®4,;Ã|Wzdy…”¿µW~û«RÓci}zñ½)"MNBÀ>àìÓ®/‰Îƒðì†ÒÒrRî[0ÙôpÓÿ<&ŒJ/7×·>D@@hZŽŸý§sÆ-Yúºè0Öû~1ÙäÖ•ï2É–|?9墿: Ð;yEŠ è«Ö¿#µuM²äö+D‡“Ž>ïlÓ­ZI´ç›þcC@@/ =ÞôŸn‹Ë.ó;ñÈÑcæ;פ‚\‘¶Åë´÷Ü ÓÆó“â É \rý]—ûÆ7vËŽ?5«û躂5—ë*K…@@@› hï8Cnfé´»üŸ—I~n–jnëÊwÉò?m—›ZÈÇÝ0åR@@@œ.°ñݲ©b‰h¹§ŸÛ&i}zJ餑&†àôúQþ® €ë¢™örÓnºzJÙ½ëÌ" ÚË @@@% ÃW߯ýL¦OmzÌé ¬º ãÿ(»Ì<×yãé)JÎ=ûÀEØ–xÓ€[Nv†Ï,꠽嚚¿‘sŠM/º˜dâÂDl€Ó®9ß`JXðàéŸÞWÍ»”Ȫ o@ª„ € € €ÎÐ9åt¡‡‹Fg›8Îu W˱ã'eéýÓM@Nc<ÚSNçœKåÍ8Ã--­·7àväëY|óe,›Êw&uG@@@G èôapÓ9ã~ùðF û?Kf˜@œ.úŸ“%ãGg§ÔÈÆ¤à4Bª±|mµ¬ÙôwYtã¥RRœçÈ‹B#€ € € €t,Ðrü¤Yuµ¶®I–Ü~…ès Ð5XæÍ(èøb‡MxîÍêýrß“årôqWçxë—žæpFŠ € € € Ð À½³«^´Çœ.¶©#$¯]¸Ú¬pß‚É&@§¸r²3º’¬-Ï{N!uB¾œì&º©7ݺÙò~ P € € € €@Ò4n¤A·üœL˜[üè«’Ö§·¬xh† ÐmÝyPòs³7mYÌp­ü—¾.ê›åùǯ_¸¤µ#€ € € €8Z@sKŸÛ&ƒö—›gšÞs›·î•Â1CeRA®­ë“œöpÓžn+ºÖL §ÃL'ŒšR“éÙº•) € € € à2íV^¹×ä4·®|—h@nò„ódzÉhÓcNW`µÃUNN¨¨®3CJu!…òÊZ³‚…ÆäÚ¡Q( € € € €@×tN¹÷k>3Â.È?Çä´Ó˜ö–»aÚx3´Uƒvz,Ñ[D8 °m©®“ÙÓÆ™1¶QÔÀ[ÑØ¡‰./ù!€ € € € ±€Ý´'œÆ·^Úü·‡\Õ΃fž¹’âóÌñˆŒâÄ8]}bÓ›{dtÞÙf íÆ7v›•'´;Ÿ]ºîEQW.A@@@@Àh@nÇŸš€ÜøÑÙ¢#>k뚤lVaÌW^58ÄN£~ýÎL“’â<Ô K'ˆy†´1 € € € €v¨©k”šý ¢Á8ù¹àÁ f¹û”˜¡z\÷÷KOërÑ{üàŸž>m®ê!"­ºœ$^`Ëê²ÄgÚIŽ—^¿´“38lG;ÞKvtŠE™xÄB1ñiØñ5rÓãß&‚"ðÌÂ3’o&ÜO¾Îyœè{…ûÄ9÷%íž@<_[¿ylY÷ ÇÕI¸ã¶ùIÉ—Lí# A7]ó ­O/Y²ôuÑQ¢ºi~Nf— ÙÛ{6Á7/@@@@ð ´-.»Lô_4[Ïh.â@@@@ˆL€\dNœ… € € € €@Tà¢bã"@@@@" ™g!€ € € € •¸¨Ø¸@@@@ÈÀEæÄY € €¶è)·,R´§­×fØë9€ ª6 À…j‚½rçõ{%ÖLY*^%m IDAT1m€ ›V"?¯\#¿« u.ûèH€{©#Ž! Âk$µî‚SòÔª­Ã.==eˆœò Î¥–µEº'pZ^\Ù"Ùf¸ÉÖ^¶ cï)W#€@§¿ylY§çp‚ýl€k«y®\ž-ÐyâÈÄbü®Î·\è9¬÷ï-÷£ÛËäž«Èì‡ÊdvÛÓ-O-•;ÿj‹,=ëlþ:K€{ÉYíEi/Àk$ñæIÏÑÓC¾+Ò€ÑGž)í%‡¶“{öôj›MDÂö ó/ýŽMßÊS{ü÷yçîj›s®èÇgÈü<Ÿs:š‹Î;ïW'çwPîM½eŠ7¿ž2å†3dJ[ríåµzöli;1Òrèémeñ–8ÈÎ{„]è¤|Òò¿ÇµyÛ‘¶m÷˜ïRŸ‚ð0…NK}“ȸ3æ´÷ó1ﳕ§}wµÏuæý2ä5&…Ðyx|Ÿô¾÷&ÀW dÈsƉ§ö/ò›?ï“ó¯š/S½ÿ_Š4U­•ßW]õ‘%rGinû±¦²âÌÛnå×~•H[ÞÖ®¬âkenQ†ˆI¯Z¬"ÞòuX.Ÿóyˆ€l0\$,Mò»?2'ËÐ~Á®-«C/à`†¬vuÞ¸X§×^RÙF€{É6MAAl*ÀkĦ ûbeöh®é´|˜z^yÆ,Jx@dH±Îza‡q¥gÈ3?÷Ñ¢—Y­Ò/ø¦Éë\tszË€¬L>¡Êæ|syåH>â§]*G`ðMsñô–²âçlj‰èRÛZÅŠÃ=f%Í_' ôl|;-õ¾½ßô=aaëðÔÀÚ *ì+„{oðô–üX!"¡¯Ñàtè<óÔçQÝ÷¡bŸ{<É·ùß´rž¢rc±Ïwï¶k Î/øfN'so›!B¡E4°v‡ûÕò.—;~V Ymû*7ËVíkã'WûæY ?Ðà`Óy)\P00mž#`CGô€3n–2\†e{$_öJ¦6ɹDV?t‘ ›V ?z®u^¹W]*¯ŠÕ»í°<{Wø¡¬‘¤ç“=&À½ä´£¼‰à5’hñÄç§½J{™|Õž ~:È£A´½Á2{KYak€-¨'†|ÊK“[F„è —×ˬÆÚÞȩ̈·˜§·\=âD{ï9o>e°zyhPë¤ÜÐ{$t¹OÈ‹b]×ÅUN»TŽrM‘šúç¡_vËßÂËQ'°ï%óKØk'ðx˜çU/+Uµy—Ú¶=£Ð÷Xûq¥¢€¾¿´ý(Q{²uÕÞ¿÷==fõ@+ì-E•!æŒó´. ã{¾—Ü[ØS\3¤¸wëJ×~½m­£þïGšoë{x×Þ[ªÃS· x2L`¬fÓ2Y·»µrVO4OÑÅr~¥ÏÊ©V LD|Ï—P½Û,§Ì¹Z{µ­Â:@&þLƒvlÛÛÖÛî°T¼¼CFh°Î›÷™øc Þ5ËÖ—ý{ÅYYð§8'Jô¯¯É¥Þ¡£>'Ô핊ú‹dvv†œ««F:g\¬Óó)m.ë¶uz6ç£x) ë{:Öé¥@t«ŠžÞrïÂÿå7¥A¬Ö|¿œµî2¢õKaPðMï9.÷gö0_Çå÷ÙsÊ¿È~_Û…¹¦hBodY_LOË‹OÈEÚ3Σ½Ê†o… ú—¢KÏ¢/G{6W¶È=íOydèÛ6ôkÃ&Õ¢ èBÀxÏq¹iÏñàR5ž”·ux¼'Üœq3& ò„ì(Ô@Ÿÿ5ß5?˜œ’}Ú9%/m;%ã {úÍIý}\ö¸I Y¶®|A*|zo6T¾-5E—K¾dÈÀL‘ÛŽeÈ5Á:jëŒDcµüþ±½m5›ó‹Û‚g+}yæŸ`[¦ö´kîÚX-/UåÊÜ¢\™zÕ¹ÒÐx‘éY×TµÙ¯Œþ¹ð g„ø4û–ÒZ¤Áw_wÇ:½î”…k+ë¶uz‰Õ 7‚b}OÇ:½à³'œ€oÏŠ sBöâè!—äiï·SòöžÀÀWk ï9%‡´‡Fv¾ÆÊå?wWPCå²ÜAWF¸£«å8-/V”)¥½Z癋²÷V„…sÙi‰juµm}¸czù¤ËCg tòz·z¯E\ɦSò7Ÿ`HëuÍ3IÊݸï#Ižsœ+д_>ºßKc“H~À˜Ñ¬LíÉ&ÒØfn¸ …2Ф‘!æÌ— AÇÛvx<’%û¼s¾™¡¨ù3dBÞå2—¡§áÔØï@çà† a \ßÔ>üÔj+øX§«r‘Nl¸—bëIjîà5â¾6 Õû,µl<%‡tž¢x¤í„4µ·‹ˆYÈÂÌm·0ÔN¨eDð¾ã¶áífnÊ´ ‡5Ô4|bÝ>òI“ö6î)S&ô”_¶z÷”«Ít§üç¤ëvn$ÚV0­Y}WGˆ Êa©¨Ú/ÚzhªÙë ÎÅ%;E A Àyäç× 7$êÛ£í?šqQ[Pîm™»èoí9ï\o]SŒuz]˳#À½”grq®¯ç¶ JžÙ:D56% ‚›t»šJËárÖ6“ÿ#Á_Ì»Z ΃@Û6% I§ œ–W¶H¶.L8—¥ˆ´ù ˆ·æ¥ì~ý½CS5¨Aß­ö„¼Ô³‰ûÞ—ˆÇÑdH¦®štou”Vð0׎Î9W¦û¬²ê)š,÷ø“íøzŽ"`OpK•Ùª´ù³Jdv¶é#ù¿Ïé²(ºyä\³ï°<û¤oð­íp—ÿÄ:½.€  À½”d²p´¯G7_œ ß:üI{Z\4"ôªžÖqÒt*ha‡p…³®i_ ¢=ŸìÌpW%bwËqJžzü˜l4«¹õ”K’Z—Dx9)î¶­“êJYã/Ð:ßšæ3®ÈwEgkÈgàül±,‘µÄ)9d}EjK^çê¼ÉÛ#NwrßÇR>5Ó:,_´Ýg™žàÕQC›X×´íBŸ´÷ü«tþ¹Ö…VT5‹H†L(>7èk’}õú(ðßsu”žïu<¶·÷’½Û‡Ò%_€×HòÛÀþ%¨ª9i 9¨°¯ÓUPoÓM}öµ=´®É/ï9~®L¿-`ÿÈ™Ú6ïÛ–Ý"f>8 üå]âúàü؃€l4u¸Ü³º,ìJaÖ?/wú­xÚ$¿{ñ#™}Ëp6í:Ù2-2æ}fë¿k¶<µTîüktéE–+g%V€{)±Þäæ<^#Îk3›”xÏqY–ßKæç‰hî™ÂàrÚv<Ä'ë¼0 +“òÉGƒpÏ”Z×·ÿÝÑþ0¢GÖ[ûÂ,¾ï;±­û)yjÓIy¦4ÌJ×+CsßÇ–?SÛ].òç›™Ôîðûÿy¿ÔÔæJ¾Ï|·¯i=±Æ{¾5ô´Y¶¾\Ý6ï[û|pù¥%rþîÀÕT½óÛ Ø Üa©3½ÒB[iàíÒë—ÊõÞ¡§>çýõ5¹ô®·å€Ï.]¾øÙ»–ÊÜõísÅù®yn¬ÙoÊí)š!7ûᲊ¯•¹fµÓfÙZ¹Ï[¿h®ÑÅ: öYiæ—Ηé#½Yµ=8W¦ßjày‰{î !¨çɯW—È¥ò‘áÉœ–x^§‰7GŽV0-TÚΙ;+TéÙ—\ÿ¼L>´§~E ¸æš†ÊdCfk°¯5›fil×jžš4G–È¥¹~¥ÐUZí67\’p.~OÈì‡Ê$ç©¥rç_ýð. p/uŒÓSN€×HÊ5yB+ÜS¦Üp†doúVžÚ“ÐŒÉÌñÜ;Žo”¯@¹fN_™âqó¼NÝq›gÈ„9ó%ÓêMäŽJQ‹˜ –Š?,“Špi6VËï«w4ôþ(®1A¶Ð©µîÝ].¿Ñqm‘ž×Y:QOr.x8én/“{ D.½æÉÿ+Ã>£l×¼Œ{)*wI€×H—¸8¹‹ÁÃI­^0ãŠzË= ûì"h Î½ã¶Æ®zùÛ€¡åV/ªà¶v[ÝÝ[Ÿà¶ã=Þ‰­Ü+éü«Z{åHÖn†}:±U)³³l·ë–Ë5Ì ç:Ë’ÒÚL€{Éf Bql'ÀkÄvMâªU½Ü";´Fžò]WÕŒÊÄ[€{'Þ¤@÷xvßÐ)|øç¿´Îsîɇ,"i6Ê€@ÔIî¢Ü9bwç»BÏs´¥£¡¬?¸B¶Ü2Ü/é럗ëŸ3kêúí÷{â½.¸G‰ßyòLi/¿ m;&÷TžöÛôÄ{]psÙ‘8îÄYÛ&§.¼îÛNó—¶¡žVE|_ËiF0ߜիËJ-ìûˆ÷½£íLŸ^üÓh¦9¥í´à÷±€2†=OD¬#ÚêáŸHزjšÖµmé›?xøžô˜×i‰#wdz$3ª‚‡žWÎ;1~¨4CÌÑÕTµV~_y8ÔÙíû¼×÷àk?‰G8CÀf=à<òó™ÜõÕ¯tš?k¦l1+¤£_zK™l¹ý¼ :Ô50ø¦' ›vüúA§·ïЕQMÐN‹HÐj¬í¹ó(b©81Ex¤hÃ'¨Ú=äš)½MîжÃÑÂaHqšg\éò̃?¶è—ÐÀà›^=¨°¯Ü2"8ïýBj‚v:GS‚Vcõf΃ðÜ;ámÜy$š×½‘ðô–êrM¨÷’¼´ï!­W÷”²…Á‹GèûHÐûN`ðMðô–²â¾éôq÷꺬„(ƒÉGW,‘zÌé-C÷Gôœ×iDL¶?i€Lüñ8Ñ—PSÕvù0Âòf_+w˜Rƒ/0«P^<ŽM‡ºOߺzæô‘Áéx÷èÿfb}L¿<â2z¯ç6Hr¸árÏê2¹'¥Ã^kçJÎ%rÿ´foPï5«§ZA‰üú{ÛuøÁfž¹àW[{Ñfá}®Á·‡4@ÈJ­^Û<à^²MSP› ð±iø¤Xú¥·—̨Mpo€|Ÿfö–²ÂÖ[POë o^šÜ2ÂgQ‡}ÚV[ œè¼µW‰oò~½½A¯ó;‹' àÞI³]3‰æußV—AyÚëÕç5ì}]÷’)…z’Oo8ï{Ho¹&3ÄêÌí±ë“–^î½Æ÷}§‡\S¯ˆh«¬­\­sàu²C7ê-¦¬"¾ï¯šÿ½…=ePao)ªôùAÁ›…)§U> žì¼·°ð:mk^‡ÿÉ•©·Í—©µè°×ZÀ¹’Y We˜½A½×¬žjy—Ëô‘ûd5!þÈ™jVF\‘²µ]`ÞçÞÕ5¯óžÁ'üS² ª®×Z¨¢åoë1bèè__“¹ë[»´^ZÔÞ îGE­ÃN·<µF~Wç›ê^¹óú0«¯|ó…rÌcî%Ç4M’¯‘$Á§H¶áz­…ªþ­CVƒ‚ozòžãrÿ¶Sæ²qùí]Šò[‡îØø…ú”<õ¸O Î7Cï—ô€/Û¾çð8éÜ;Io‚„ š×}{Á4¨äóÚoîˆÚsB6šfzJv¨ñvæ|Ÿ´4“0ï;íù·?ú¸²%‚ VûùÝ«·¾wù¿¿}\y¢uÎMéáW¿¢ ÚóMÏ÷±0Å8-/nް%³/’é¹!ÊaœomýotÒw”¯G5—ã Ø»ÂgΡ¼4y ¸³ùØzÈ%yX;%oï =oÛÇ{NÉ!íááé)Cä”|¬_4ÍгSRï÷¾2¥Vpžäw·\uMrÀ/’cpͲϯ÷›ßIþOê?’-õºk¸üó,¿ÿÙüÏ㙽¸—ìÕ”Æ~¼Fì×&n*Qã ¹gÓIS#*Ý §Loö=Ö—ËÓ‘àšNÊó{]/¹*Ä\Iíió(©Ü;Iå·UæA¯{”nÏq¹©í½Mtµ…gtc.µ0õ±c½‹Êë4Pęϫå÷›ö›²{òÏëþJ¨Mâ ™m¸/ü„÷jÚ/5æÿê\ùAqëTSáOæÎ°_Ný>>Ü4Ë íW£„ »Ê_†œ›iš‡å÷‹ÊeK$‹4Dš$ç%F€{)1Îäâ\^#Îm;'”¼ñtkÐÌÓC¾ÛÝò†]qÏÈUÇÙœ–—V¶˜¡Z.ÒÐqB·÷N¼…‘~Ø×}’‹¯A¸Ç¿•›o}?ÑEîzAƒu‰i½[‡¬¶–WËðo剀ނ!Ên¯Óp2ÎÚßÐ,&ÞåÉè~.쪪20ÔðïRͲå‘í´Z4C:\¤!äõìDÀ¾ö À `æu“úò¯C»&Ùgz¦ ‰Å¡{¦YsÄi¿s}›¡§m×ätéçø½rç]o›Àॷ̔ŸG¼ë°Œ·÷R¼…Ißé¼FœÞ‚ö.fÖ•÷šNË'–´mx˜ô”‹F„žÈš3Ištø©níׄœÓ)l~§ä©U­s+M“k"þB6AÄC€{'ª6K³ý5ùë>±U°Þw‚‡´ZåÐù&µÎ/çé)—Dô~’¨z·çÓµ÷H«nüåu’NÉÊhÝÔ,=,_˜H]†Œºgš5Gœ45µ¥uXÛ®ÉÌêŠÅ>Y·r‡ æ—^+#zmu%}ÎE 9Iž.D¥uÞ®[ZI8P­s¶u¼½Zõ‘ÜS0\†M»NV‹ÿB ù³fÊ ³BêayvíÞ¶„š¤¼ú°Ìž6@L ícß…ÚVA}*ÌB u“{×7iÎ~è ÙwýkòjÇÅãh2¸—’©OÞNà5â„Vrnu¯ÒÖEÂym¯^UÍI™Ÿ×K´gÚâ?gœµÂŸYñpkëb €û[í)™RØSL ­Ñwõ¶UP7ùOTîÍÍLÖÞÓ¬8å†>Rï;Y»÷$$M€{'iô‰Î¸ë¯û8•P{°-ì)Wù¼ŒècÞ#üç¦Ô÷–ÞRï{žw>JßáðmÁ/Oë /VúÏm™¨z[ùèb ·Hàûa'ï“Qó:íLÈÇu¸ÒÖEšjöv€ù°f¿LÍË5=Ónÿ…²Š¯•¹f…ÔfÙZiu£9,Ô4Ë„¢ 1´ß…ÚVAÝf!†Æjy©*פ9aN‰|ñX¹|è Õ„”òŽÛמOH¶dÒM$à†Ë=«ËäžP•¨[î}΄ËCmß÷××ä¢2¹§@LnË´öCÖ£ëËýV;­y®\ž-¸NfgÙ•ÉlëĶ¿:Ô4ÜVóÜy [ó.÷m»RQ @@@@\"@8—4$Õ@@@@°§8{¶ ¥B@@@p‰8—4$Õ@@@@°§8{¶ ¥B@@@p‰8—4$Õ@@@@°§8{¶ ¥B@@@p‰8—4$Õ@@@@°§8{¶ ¥B@@@p‰8—4$Õ@@@@°§8{¶ ¥B@@@p‰8—4$Õ@øÿÛ±c„aþ]cc!u@Ê7 @€h pÍ_¬"@€ @€8"GDPµúè IEND®B`‚rocksdb-6.11.4/docs/static/images/delrange/delrange_uncollapsed.png000066400000000000000000000614161370372246700254020ustar00rootroot00000000000000‰PNG  IHDRªñUB[ž IDATxìݨg~7ð'¹6»¶oÞµ.7ä²ÂÉ…€  $DŠ%!² …ØÁ- ¾DJ³(›nh$²AiÀ]ƒi‚R©Å©!°pC A‰ ܲ¥i%²²& Ûus÷¾ü&=æxîœ3Ç9çÌÌ=÷3 çžyfžç9Ÿ™óç{ž93wÌÍÍÍ¥1®\¹’vïÞâvçÎijjjL©‡µöíÛ—¦§§³ý3öS¦ ØG›¶Eô§S µ._¾<íÙ³'Å­‰@“â}>öSûh“¶Š¾´ 4q½³½ƒãö÷õë×ÓÉ“'Ó±cÇÒÅ‹ÇíáyÚ´-£?-&î£cT[ðn  @€ @`᪠g[é) @€…€ º(6³I€ @€…# ¨.œm¥§ @€X‚ê¢ØÌ$ @€Ž€ ºp¶•ž @€ @`Q,Y²æ9;;›._¾œâ6®ŸµtéÒZz§ìkÊNLL¤+Vd·ut$ú}iJ?b{,ökî5mýÒs%eÏ“&Zçs¶½^ÏSöžÖ„÷•&ì£Ñ‡Öû›}4eŸ3šôÙ§î÷ûè­M{®xÎÞº}šrψj["¾±¹téRŠ éÖy!òh;úSô©®)¾EkR?¢?‹}jâ>ê¹òÕhUž+Mx~´ï£u>gÛûaµv>7Zïoöѯ>g4í³OÏÙÎ}¥®ûMØGã±7¡í¯çž³u푽Û5¢ÚÛG) @€T, ¨V ®9 @€è- ¨ööQJ€ @€ ªƒkŽ @€z ª½}” @€ @€@ÅcuÖß8›[\. uV·8ƒWëïéééŠi¿n®)ýh„ÉÉ“'k»vhë,¦MéGôçØ±c_o°ŠÿjyÔÙ¦í£± ZûkÅ›#k®ÕvÝûhSúÑ´}Ôsåëç‡}ô«WˆÖ>Z·G«öѯ¯“Ù”×ó¦ôÃ>zës¶)Ï•¦ô£õ¾ÿ•Rµÿ·ÚtkÒnذ!Åí ÓssssƒVÒ„õc‹ð×AªóÓM°Ð @€T-°|ùò´gÏž´qãÆ´råÊš› #b[¶lÉ0h)~ Y+ @€ @`‘ÄhlkÀðèÑ£ióæÍ=ò±:ô·%ÑJñ­ûn  @€ @`tqtëöíÛ‡ÖÀXÕQt¨yhÂ*"@€ @€À˜ Dæä¬¿ÃÔT @€ ,0oDuvv6½òÊ+éã?Î~Û-Ç1È{÷îM333=;ëÇ¡¸Ýêé¹²B @€Xtó‚ê… ÒþýûÓÄÄDzñÅ»‚Äe>œâl»½¦8·W=½ÖUF€ @€‹Oà– zõêÕôòË/§K—.þÆóóÏ?O1úZt­œIuÞÅ·cyÄ @€(+°$FFÏ;—Œ¾ýöÛéôéÓ}Õuþüùtùòå499™8PlûªÔB @€ °è–ìÞ½;;„7~sz;ÓµkײU#¦·£fY @€(X²jÕª´qãÆ›ËÅoN§§§oÞïöÇ{ï½—ÅúN”ÔMÉ| @€¸]%;wîLñ¯5;v¬¯ Úߟ¶ŽÛ˜"¸®Y³F€m¡º½)¿ƒ>uêTz÷ÝwSkºYèÿ#'aÛ´iSzä‘G²»!@€X\·œL©ß‡´uYšO?ý4mذ!»ß ^c¤õùçŸO[·níëƒf¬Û ºýö£}¹AÖm¯Çߣˆ“oýà?ÈÎ*›ôøÉO~’âwó=öX¯Å” @€4L ²YÑÕazu9>KÕöPyòäÉlätjj*»NÅ¡Ãq‚¦çž{.ÅÙ·mÛVVãPâíÛ·÷êoϲVHî¹ÂZâä[ÿøÿ˜ý¶¹ÖŽh|AÄè» º 6•N @€nˆó íÝ»÷–y·s'NÖ[*¨FmÃ8Ä÷àÁƒÙ¡¾1’óÏœ9“];5ÂjtríÚµéþûïïÙ·XoÔݳr…ˆQÔøâÂD _ÖëL¿Ë[Ž @ ~AvÏ€¥‚j ÅFÊ)‚jæÛš"¬>ñÄiÅŠÙoÌ⚬‡J¯¾újk‘ÜÛ‘=zôhnY?3cwß¾}ý,j @€‘@œ)ò]Ù)Ö-TãD'›7oîÙîƒ>˜¤ž8q"; 8Rq„ØnS?uv[·5_PmI,¬ÛoÝygúæw,¬NëíЮýîwé·ssC¯W… @€Õ DÐ,Ê‹E=*T‹*ò‰‰‰´nݺA5N¼ÿFM:þìî»Ó#ßøFçl÷™À¯]K?whø"Ûê. @ _`dA5šs}Õ|tsoˆÕoOLÜ:Ó½E'`T}Ñmr˜ ÐUàή%= âÛ-[¶¤—_~¹ç\ãL¾1ÅoX…Ö Š @€ @à¦@© k;v,»æ… nVÖþGœDéÃ?ÌfÅ —zý>µ}= @€ @€Àâ(T}ôÑ499™"ŒÆ¨jܶOq?.K!6–ûÞ÷¾×^ìo @€ ÐU ÔoTW¯^žþùìß;3Õ7ÎògwŠËÄÄHj„Ô»ï¾;íÙ³'Åò& @€ ÐÀ¼ ‡èÆÙyc$4ÎÜ›7Åüï~÷»YÑk¯½–…Òóçϧ7ß|3›åNwíÚ•¶nÝÚµž¼ºÍ#@€ @€Å-0/¨nذ!°¹bÅŠ®:Ë–-K;vìHO>ùd:{ölzÿý÷ÓÅ‹³—¥Y»vmv»V € @€äÌ ª­ÕœeçÍŠ0{ï½÷fÿ6mÚ4¯Ü  @€ p»¥N¦t»Xž @€ô+ ¨ö+e9 @€¨D@P­„Y# @€ Я€ Ú¯”å @€ @ Aµf @€ @€@¿‚j¿R–#@€ @€JÕJ˜5B€ @€ý ªýJYŽ @€*T+aÖ @€ô+ ¨ö+e9 @€¨D@P­„Y# @€ Я€ Ú¯”å @€ @ Aµf @€ @€@¿‚j¿R–#@€ @€JÕJ˜5B€ @€ý ªýJYŽ @€*T+aÖ @€ô+ ¨ö+e9 @€¨D@P­„Y# @€ Я€ Ú¯”å @€ @ Aµf @€ @€@¿‚j¿R–#@€ @€JÕJ˜5B€ @€ý ªýJYŽ @€*T+aÖ @€ô+ ¨ö+e9 @€¨D@P­„Y# @€ Я€ Ú¯”å @€ @ Aµf @€ @€@¿‚j¿R–#@€ @€J–TÒŠF @€ 0d+W®¤ëׯçÖ:11‘V¬X‘âÖ´ðÕ…·Íô˜ @€À¢ˆ€º}ûö4==kq×]w¥C‡¥uëÖå–›ÙlAµÙÛGï @€È˜™™IgΜI1ªÚm:qâ„ Ú §áóýFµáH÷ @€˜/pøðáž!5Ö8}útºtéÒü•Íi¼€ ÚøM¤ƒ @€´ Ä(jŒ¦M.\Hï¿ÿ~ÑbÊ( ¨6p£è @€Ý>øàƒ‡þM³³³)ÿ[ÓÂTÖöÒ[ @€‹Z Bç‘#Gºží·'N¶tñâÅÎÙî7\@PmøÒ= @€¾ˆÐ™w¦ßåË—§U«V}½àÿüÕmùy šÑ(AµQ›Cg @€è%pòäÉÜÒ5kÖ¤gžyfÞªÿG² fª b3é$ @€qíÔãÇçþætãÆ)þÅÈjç'TŠ+™Ž€ ºp¶•ž @€XÔq¥sçÎÍ3˜œœL>úhvèoŒ¬vNq‰š¸Ti᪠g[é) @€E-ÐíÚ©ëÖ­K«W¯NK—.MO>ùdš˜˜˜ç‡ Lj¬iaª c;é% @€E-ÐíÚ©J7mÚt3œÆá¿+W®œg#±ý\ÒfÞŠfÔ" ¨Ö®Q @€nG ÛµS#”NMMݬªó~« [Ðm•»m–€ Ú¬í¡7 @€tôºvjçjçk{Uÿm×höß‚j³·Þ @€XôÝ®…Úí7©1Âêðß…½Ûª {ûé= @€±˜žžÎ½vêªU«RÞY~{þ'd25_@Pmþ6ÒC @€‹V ÎÔ{äÈ‘Ük§®_¿>÷º©qøïÓO?¸îÌ™3)~¯jj¶€ Úìí£w @€µ@œ©7N¤Ô9-_¾È®Õ×\55GÀ¡¿ÍÙzB€ @`Ñ Äecò®:*˜Á-;J;ª>©7%AÕ^@€ @€@#.]º”NŸ>]y_ªÇ•?ÀØ  º7š. @€G÷ß?]¸p¡ò‡ÖëpãÊ;£ÁLÀoTí @€Ô.Ð뺦qYš 6 |£/¾ø"E¾zõê-·uø¯ß©ÞÂRëAµV~ @€ 1’!2oŠËËüô§?ͽjÞòÝæEŽë°¾óÎ;ó‰ÃŸ}öÙÃð¼ŠÍ(%àÐßRlV"@€ @`˜ñÛÔøjÞ´~ýúCjÔ;11‘]Š&n;§QœQ¸³ ÷ûTû·²$ @€#ˆ³üƈfÞ‡ýnܸ1¯¨Ô¼©©©´råÊyëöêü…͹€ :rb  @€ ÐK ×hfËø7¬)Bj·úâ25ñ{USý‚jýÛ@ @€,j^—‡‰ÑÔUÖ‡ýÆïTóꜙ™Iq`Sý‚jýÛ@ @€,Z^×N]¾|yŠß§{zøá‡Sœ ©srøo§H}÷ÕúìµL€ @`Ñ ôºvj·@9(Z¯Üë¤Nƒ¶kýþÕþ­,I€ @€Àz];µ×!ºÃèB·CŠ{]&gíª£?Aµ?'K @€ 0d^¡°×I†Ñ5kÖäþÛ+<£]uô' ¨öçd) @€†,Ðë0ÛñÌ»ŒÌ°º‡ÿnÛ¶-·º8 ñÅ‹sË̬F@P­ÆY+ @€´ ô:qQœ‘÷É'ŸLqøï(§8QSÖÎ)Bj„US}‚j}öZ&@€ °hz] &ÎȇæŽzêÖŽÃG-_\¿ Zld  @€†,AõÊ•+¹µvéÌ]x€™1r‡çMÿÍS©nÞ’êšÒ @€¾ˆßŸnÞ¼yG„ÇgŸ}vÞüQÍxâ‰'ÒÙ³gSŠÜ>ÅaǤï½÷ÞöÙþ®H@P­Z3 @€|-055•â_ÝSþ{øðẻ¡ý‡þv€¸K€ @€õ ªõúk @€:Õw  @€ @ ^Aµ^­ @€ @€@‡€ Úâ. @€Ô+ ¨Öë¯u @€èT;@Ü%@€ @€zÕzýµN€ @€‚jˆ» @€ P¯€ Z¯¿Ö  @€ @ C@Píq— @€êTëõ×: @€tª î @€ @€@½‚j½þZ'@€ @€AµÄ] @€¨W@P­×_ë @€ Ð! ¨v€¸K€ @€õ ªõúk @€:Õw  @€ @ ^Aµ^­ @€ @€@‡€ Úâ. @€Ô+ ¨Öë¯u @€èT;@Ü%@€ @€zÕzýµN€ @€‚jˆ» @€ P¯€ Z¯¿Ö  @€ @ C@Píq— @€êTëõ×: @€tª î @€ @€@½‚j½þZ'@€ @€AµÄ] @€¨W@P­×_ë @€ Ð! ¨v€¸K€ @€õ ªõúk @€:Õw  @€ @ ^Aµ^­ @€ @€@‡€ Úâ. @€Ô+ ¨Öë¯u @€èT;@Ü%@€ @€zÕzýµN€ @€K:î»K€@…37n¤¿ûïÿN¿›«°Õf6õÑó:6==¶lÙ2o¾š °nݺôÔSO¥eË–5¡;ú@€ÆJ@P«ÍéÁ,$ÿüòËôW×®¥«¦|‹/¦cÇŽåšK fãǧØGøÃÖÜÍ @€ñpèïømShÌ|ù¥º@¶•nȘMï¾ûnºråJ^±y @€À‚êxV%@€Å-!õúõë‹Á£'@€#TG€ªJ @€(/à7ªåí¬I`$¿÷ßKßžIÝ*%@ ¼Àn¤Ù_Ζ¯Àš @€@ß‚jßT$PÀÿzê¥oþñ7«iL+ô-ðÙ®ÏÕ¾µ,H€pèï`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`‚ê`~Ö&@€ @€s„ó; IDAT! ªCU @€ & ¨ægm @€²€ :dPÕ @€ @€À`K:WŸM¯¼òJúøãÓž={ÒòåË;¹åþÕ«WÓ©S§Ò»ï¾›®_¿ž•MMM¥G}4­^½:MLLܲ¼; @€ @ —À¼ záÂ…´ÿþ,`¾øâ‹½ÖMçÎK»víJ§OŸNp[Ó±cÇÒäädt·mÛ&¬¶`Ü @€ @€@¡À-‡þÆèèË/¿œ.]ºT¸b,!5FSW¬X‘~øÃ¦£G¦W_}5­Zµ*«c÷îÝéÃ?,¬Ë @€ @ %°äÊ•+ÙÈèÅ‹ÓÛo¿Ž¶ {ݾõÖ[ٲ˖-KH7n¼9rº~ýú´eË–433“^zé¥#¬K—.íU2 @€ Ü£ž2ãÝm?„·›QüõøñãÙ²=öØ-!5ÖY³fMÚ±cG\?øàƒ,°v«Ë| @€ Ð.°$Ó Úšbduzzºu7÷ö“O>Iñ[Ö˜üñ›#©í G€C‚ã0âø-k„W @€(X²sçÎÿZS¦[TÏŸ?Ÿ._¾œ¸[C‚[Aõ½÷ÞKÏ<óL« ·5Ä(xlÛ“'OÖÐúWMFâPóÎé­_ÿ:ýìÎÝY6Î÷Ùv²öÇùë·~®ÿì«3h·Ï÷÷â¸ã›w¤?Øüé÷þÏï¥äDê‹wGðÈsâœý×~ñ‹_ä–›I IñS¸çž{.=øàƒ¹=M꫾¨[`ÞYûéÐ_|‘öO¶n—¯‰ù1Z£©­ËÖôª»[ˆéµN{Y^j/_ì>|8}ÿûßïk[TmõÑ)þ™¾¸ñÑÿLÚnœ½‘þðoþ0-¹¯ÔËv{Uþ&06ñÙá…^H‡êë§KcóÀ=-ðóŸÿ<8q"Ýÿý úqè<^‘ÍâHݲSdÉRŸxú ž·Û©uݾ}ûí®vsùQôéfå üØQ<ØÈºÀiuŸ@e_^ü2ýæ“þ÷Îÿ]Y›"Ðtx‹ë¸÷s~¦?ý[<ÿþïÿž}¹WÊ0W8ÒÞ½{K?¼8Yo© ¡rØSÍAR÷°û3Nõ…­çqÚ¢Ëb˜ýå×׫^¬7ÆAÀgÞqØŠC/A³Gä—RAµW§Ê–MMMe×a-»~ü®vß¾}eW· @€ A Îù®ìë– ª6lHo¾ùfÙvs×[¹reÚ¼ysnY¿3Õ~¥¾ZîŽ;¾™î¼ó[··’¥ ¨@`6ÍÎ~–R2‚Z¶&ÆPà[w~+}óŽoŽá#óšÀlšMŸÍ~–âÖD`1 DÐ4Û• ªq%Ó¸ë®ÿ›¾õ­ÿ·ðˆG@`Ì~÷»_¥O?ý‹·&n_àÏîþ³ôÈ7¹ý­A`È¿úݯÒ_|ú)nMÜž@© z×]we§Ônýö1FC;§8.yff&›WÞ¹¼ûÕ ÄˆêÄÄ·«oX‹ Üq‡ëÐ"Y€@Qý¶÷·.:fW-0áõ¼jrí‰ÀeGœN;®‘a4.?“7ÅuÍâZ«iݺuy‹˜G€ @€æ ” ª÷ÜsOv¡â¨íèÑ£¹—=9uêTTc4õ˜×° @€ @ O TPߨÆ5OãöÌ™3éÈ‘#·\Ã,FY÷ïߟÍÛ¸qcrèo½y @€ 'Pê7ªQÑúõëÓÓO?]°ø…^ÈŽC|ãºPÌ~ŸºfÍš´cÇŽìðß¼ÆÍ#@€ @€ó‚jŒ’ÆèäädÏ€ËýèG?Êê‹Õ×_=û3âw©=ôPzã7ÒªU«:ÛtŸ @€t˜Tã©ÓÓÓYØŒ&õš–-[–^{íµ´uëÖlD5Ö‹#«k×®ÍÂn¯õ• @€ @€NyAµ5¢Ú¹`·û±|ÿvîÜÙm1ó  @€ @€@_¥N¦ÔWÍ"@€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ª£³U3 @€”TK Y… @€F' ¨ŽÎVÍ @€ PB@P-f @€€ ::[5 @€ @€@ AµšU @€ @`t‚êèlÕL€ @€%ÕhV!@€ @€Ñ ,]Õj&@€ PŸÀ~ùŸiæË™Ú:píw×Òoç~;¯ý‹/¦cÇŽÍ›oÆâX³fMZµjÕâFh{ô‚j†?  @€ÆC`æÆLú«k•â¶iÓôôtŠ&íR8Ö¯_ß>{ÑþíÐßE»é=p @€Àø üÝÿ]#CêøŠ{dƒ ÌÌ̤W_}5]¿~}ЪÆb}Au,6£A€ Ð.wÈm{¹¿ 4QàܹséÊ•+MìZå}T+'×  @€ôT{é(#@€ @€ÊœL©rr  @€Ô!ðïÜ•žzêêhZ›æ ¼õÖ§>úbÞ|3¾Tí  @€‹BàÛß¾3ýñsQœ.^¼Ø³•+W¦_|±ç2  @€ @€@K tPýüóÏÓììlZºtiÚ°aCvÛª´ý6FRc @€èG tP=þ|º|ùršœœLH1rj"@€ @€ƒ ”>™ÒµkײU#¦ƒnë @€ @€@»@é úÞ{ïeõ¬ZµÊ‰’ÚEýM€ @€ ”>ô7ÎúSüþ4N¬tîܹì6æÅ(ëš5kØ6•  @€ °8JÕ¦­ËÒ|úé§ÙÉ”â~{x‘ÖçŸ>mݺ5MLLêÆºQoÙiu˶i= @€¸U ²YÑÕan]ãÖ{1ðY*¨¶‡Ê“'Of#§SSSÙmtjzz:a}î¹çRœxÛ¶m…a5%Þ¾}û­=¼{­|«X” @€†,°{÷î´wïÞÒµÆÉzKÕ£­`‡øúhZ½zuš˜˜´ ë @€ @€À"(¨ž;w.íÚµ+>}:ÍÎÎÞd;vìXšœœL{öìIÛ¶mVoÊøƒ @€ŠJÕI£©JwìØ‘î»ï¾tñâÅtðàÁ433“vïÞ¾ó龜z¨¨Ê  @€ @€@&P:¨¾õÖ[ÙHê²eËÒÒÆoŽœ®_¿>mÙ²% «/½ôRŠÖ¥K—"'@€ @€…¥N¦¿E=~üxv¸ïc=vKH׬Y“°ÆïS?øàƒ,°öÄ @€ @ ¥T*¨~òÉ'éÂ… àã?~s$µ]4ìŠ+Ò•+WRü–ÕD€ @€ú(uèïùóçÓåË—Óòå˳ÑÓ¼†âગ.]Jï½÷^zæ™gò3¯F7>JŸ}¶«Æhš<¹¹ß¦ßýîÚ¼¢ÝHŸíúlÞ|3ªÓÂxë×o¥Ÿ]ÿÙÂè¬^ŽLà£Í«û£n¤]^G繘Q@ì¦î¥‚ê_|‘ö¿;°š7ÅüU«Ve£©­ËÖä-ךËÄèkÙiu˶¹Ð×›ýeŠ&†Àì/gSü3 Ð[ J^Hé½–ÒÅ ðË_Φøg"@`´‘Íâ$»e§È’¥‚j?Áóv;£®Û·o¿ÝÕn.?Š>ݬÜ @€ З@\ýeïÞ½}-›·Pœ¬·TPP9ì)‚æ ©{Øý§úâ‰ØØa<==öíÛ7NÏc!@€@mñúê¬öµñgGuy«Ï_Ë _`\ÞW=Ú5rK© :üM’ÒÔÔT:zôhéªÛX±¸$Ži¾€/曘C€²ñF<ÈÑ@eÛµÞ|ïoóMÌ!@`á ,ä÷•ö×á;wfù®ìˆlX*¨nذ!½ùæ›eÛÍ]oåÊ•ióæÍ¹eýÌŒõÿíßþ-;qS„Vµ@¼)ûbtÔÊê'@€ÀâXèï+q”QdÅÈu6™JÕ&æ?ýéOSÝjŽ!ã8L9n£<ÂlSSú;~„øÖT×vŒ>D_šÒØ/}" ²_µ<êìGÓöÑð¬ó9Û”çJSúÑ´}Ôs%e¯¡Mx=oÚ>ê}%eŸy|öùú]¹µz_ùʤ ¯çÑ“&ô£ý³÷•ὯġË{öìézÂݯŸ}ü5Wbú‡ø‡¹‰‰‰¹åË—Ï={6·†_ýêWskÖ¬™K)Ííܹ3w™QÏü¯ÿú¯¹•+Wf}8zô訛ëZSú±=Â$úT×´yóæFõ#úSçÔò¨³MÛGc?­ó9Û”çJSúÑ´}Ôse.{~4áõ¼iûhSÞßì£sÙçŒ&|kí£ÞW¾ú¤Ó„×óèIúÑþÙÇs¶9ï+íŸÉïì#ËÎ[äþûïÏ®‘#—çΛW3®^½š]kubb"­[·.w3  @€ @€@§@© zÏ=÷¤|0«+N€CçÓ©S§² Cé<ð@g±û @€ @ W TP߀ÄYãöÌ™3éÈ‘#ivöë‹'Ç(ëþýû³y7n¬í·¡¹ØL @€h´@©“)Å#Z¿~}zúé§Ó¡C‡Ò /¼‡øÆÖ<˜fffÒš5kÒŽ;Rþk"@€ @€ý”ª1šú£ý(k#FT_ýõì_̈`úÐC¥7Þx#­ZµªŸ~X† @€d¥ƒj¬½lÙ²ôÚk¯¥­[·f#ªqªéøMjŒ¬®]»6MNNb&@€ @€·%0PP–bd5Ž;wî¼­ÆËÂq=¡ܼžk];®K'¿Šm}ªkŠý$.Ü”~Ä—+‹}jÚ>Û£ÎkÛ6å¹Ò”~4áùѾÖùœmï‡}ô«çiÞWš°FZïoöÑ”}ÎhÒgŸºßW죷 4í¹â9{ëöiʽƒjSH“û,N*U÷OˆuOu~¸kìMéG{ŸêúÛ>z«|Sž+MéÇ­:õÜkÊ>Ú”~4eßhJ?êÙ+ç·Ú„÷•¦ì£Mé‡}ôÖý´ ûhô¨ ýhÊ>Ú”~ܺ§4ã^©³þ6£ëzA€ @€ã( ¨ŽãVõ˜ @€ °€Õ¼ñt @€ã( ¨ŽãVõ˜ @€ °€Õ¼ñt @€ã( ¨ŽãVõ˜ @€ °€Æúò4irr2Ûµ ´öÑxÏ÷~¦ ÄçÐx¯·6mËèOK ‰ûèssss­ŽÛíììlº|ùrŠÛxVÇm Çã¹råJº~ýz¶ «ã±MÇíQØGÇm‹Žßãií£RW¬X!¬Žß&^ð(Þçc?µ.øM9¶ ‰ûèXձݓ<0 @€Œ±€ß¨ŽñÆõÐ @€ °Õ…¸Õô™ @€c, ¨ŽñÆõÐ @€ °Õ…¸Õô™ @€c, ¨ŽñÆõÐ @€ °Õ…¸Õô™ @€c, ¨ŽñÆõÐ @€ °Õ…¸Õô™ @€c, ¨ŽñÆõÐ @€ °Õ…¸Õô™ @€c, ¨ŽñÆõÐ @€ °–,ÄNõyvv6ýüç?O'NœH/^Ì_¹reÚ¸qcšššJK—.-ªB9‘ ÌÌ̤½{÷¦ëׯ÷l#öÕ;wö\F!a œ9s&ûì³¹;wÎ-[¶l.¥tË¿¥K—ÎýéŸþé\,c"P—ÀÑ£GoÙ/;÷ÓÖýÍ›7×ÕEí.Bßüæ7sO<ñD¶oÆ>Úkò:ÛKGÙ¨ng×ÏÖki¯Û¢}}TE½ã!Ðëµ0ö»ø,ŸIó>w~ùå—sGŽ™[µjÕ¼}ubbbš;{öìx@yµ ôÚÏb}í±ÇËÝךðyu¬FTã[­Ÿüä'iÿþýé®»îJÏ<óL6Š#Wo¿ýv:}út:tèPZ½zuÚ±cGÏo¸æ}­`! ´FRãÛÖ^ßä÷*RWTC ˆ×Î#Gޤ­*š¼Î )…Àíî£_|ñEÖx×ÚnS¯²në˜O bŸüÁ~}æŒû1*õ'ò'Ù(UÍGõÅÑ}ñ™4¦W^yå–Ï~øazþùçÓåË—ÓC=”ž{î¹lÝ÷ß?½õÖ[é_ÿõ_Ó®]»²Ï¯y£]Y¥þ#P Ù'ö­«W¯¦U«V¥gŸ}6{MŒÏ¢ï¾ûn:uêTöïÊ•+Ù>Û>ŠßˆÏ«µEü4üÑGÍMNNfßìØ±c.¾}mMñmÖSO=•}kËIJ&u<óÌ3Ù~hÄ´}m¶âõ1¾­oLÿüÏÿü–£Pz2ym ºµ@Ù}ô“O>™»ï¾û²×Ù^ûò¨û¯þñh½ƨT|¾ì5íõ¹³ýè€9ýÅ/~q+FÀþöoÿv.ŽŒÑ®ÿøÇ7ËüAàvÚ÷³¹ï¡/Úךðyu¬N¦ß\ºt)û¦àûßÿþ-¿Eo£â›©ø¦ –‰eMªˆo`¯]»–5ë›üªõµ×.ðÞ{ï¥ 6¤-[¶¤×_=û¶µ½¼Ûß^g»É˜?l²ûèo~ó›ÿâ÷WË—/v·ÔG ˆÑø<ûXŒŒvŽzÆý˜åŸ;ã·|ðA6ŸWÿèþè¦jüžõé§ŸNëׯÏFm?^xN‹›+ûƒ@›À'Ÿ|’b?)FR׬YÓVš²ý/öµ8ò$>Ÿ¶ïkMù¼:6A5@ãp‰˜xàÜC}âßµk×fËIJ±Ž‰@•qˆÏùóç³&Ú[¥¼¶:âÃS|Ú¼ysö/N6Wt¢9¯³ŠîR Ì>ý‰×Øx­õãP7QÄ)1ŇÿnûY ެX±"[®u²¤¸sîܹ‡ZFYöÛ9Åkq|‰Ó… RÛh-Œ÷û¼)öµÖþ_ Ä~SS>¯ŽÍoTÛAãLiygX‹y­o­Z¯ýXì¼ ha ÄýÏ?ÿ<Û?cüøãÓÙ³gSë÷TñbÿŠÃ0û¤®Å)_”´Y¿©Šû­3¥ç©xÍS1oTeöÑèK¼žÆkí7¾ñìõ6BA|‹)Î_Ñú2;ïs¨‹zÇO ö£ø _ì§ÝÞ³c?l½¿·EÕ ¹1€rÏ=÷äâÄòQoëu÷¾ûîË]ÎLÝâ‹'Ÿ|2ûÒ®}ÿë\>Ž@‰)¾ÜkíËMù¼:6Aµн6F¼ ìÛ·/{óŠuLªh}[u÷Ýwg'KˆC~"´öÅx‘xøá‡³Ë×t¢Qe?µE OÀëlžŠyMh\E(“ˆÄIiZ£Nã3B|xûË¿üËy‡k6í±èOsâÒFE—‹Ã.c44>üÇQ+­©u’šö`Ð*kÝÆ—ÖQŸZa·Uæ–@?_öå­‡¥·ŽQרçbjÊçÕ± ª1JÕú°Ÿ·!Ì#Ðø°oPñ/ΘâÍ+>PÅá=ñ/®§oL‡ž÷{‚&<}X¼^gï¶_H¼uT@¼žþÇüGv”J|‹ûÿò/ÿ’ÉgbóüÍßü°º6îék|ÿ/¾øbö~ÿÄOÜ<‚%Êϲ!Ǽ›RwïÞ}öŒ#L¿÷½ïÝ|ÄMù¼:6¿QmÊ{SØ(ÐúßêoÛ¶-û]õßÿýß§£G¦þçÎN_'`ˆCÕZop |º´H¼Î.Ò ¿€v| Ø=×Ò×^{-Åa–ñ¯µñ:ûÔSOe(¾ ŒË€˜ S >üÇ!›6mÊÞËã訽{÷Þ<¤²u(ï0ÛTÛˆ×ɉ}4^ã(¿={öd—ïlÕӔϫc3¢Ú‚uK ÉqXo|`ŠÔø-uûYãÛ¬¸¾oLq¦À¸¦eÂÖíðM~œúF€ºâpÌ8Ãeü>ë‘G¹yΊø‚ðþûïO?þñ³0×|ã7ÒÖ­[oîVWŸµ»ðâüo¿ôÒKÿ¿½sgi&ˆÂð~X)‚•`m+е"D­,I!D”Tþ+«t‚‚Pl,ÄB°²²µÐ&`« X óñ LXâf/Áì-ïÀ³3Ù™}÷xæÜ·JI¡$hL©<že¸¼ùDšðNßËËKëíGöDIÅyâÏÛÏ‹¼ZE•͇M K–šÈ+ä „5˜Äêêªe"Ð2žU)ªaˆ©/MÄgÓD[sõ‚@g.`Ð50nllØ—Üã5àpyYAãuND!ࢠPT ë¥HÆr¡;‹v"«ÂK___£.«~!ðg|~~zûûûÞùù¹Õ•\¨/t ½ú•T&Í‹¼ZšÐ_ªûu‚ügOW)"@@˜ÍIqzM%º" >Ûu <\(§„ »ŠÀ»-7à¡"|œWÉJ‰„È(BÌëõú/%•%#«U¥&ÒBC Æ9BÒQXÉ™¦N ©MzÕŸÒWKãQd„(š‹«"'ø³A¹ÌAãtNd…ùjB oˆÏæí‰h=½"0::*e¡Wðô»6x§¨,ýõõåU*뱚™™‰üüé Ö¸ïí ûª®Ò'¿ý q„õ¢¬RIšú'¼?=ˆæ’\—±iÈ«¥ñ¨ºP €{xx¬ŒåëííÍ>gMMúP4^ôŠÒî¼Ô­ù -D…^t»†Î ~ >ÛTuÍ¿Dc4Þ­íííÐT W áMM$E€PJ”T³³3œÔ8Þ©ÅÅE;ù‚¼¾&¨át!ïÕïµ §sB Ð…¼PRÑ{¨‘R­V#•Ô<É«¥QTa KKK–AP~žþÎÆ9ú»¼¼ÜÙ­ïB ¯ŒŒŒØ3؆݀‚&ô¿wM‚Ò¹¬ŸÍ yÍø,†À““ïêê*ðg­ïîî¬A›W„…½{=ð:)<φú¢¬’ªC1qÄ(€ðº".¢š±IDAT’#ˆaš¢^ ¥‚†ò‹‘PM$Eú$$ï) k\™2OòjiByx$­yXJle?Ç8ˆÉæjˆÇ¦âªšH4ŠÇAŠ’àXa]øÂS·÷®¥¹NÍ%Ÿ CG}Y#€w”j•gäcÍÍÍyþPL€f³ió 1¼Ôj5…UfýÐ 8?ttsscWNªÎããcä]!åŒ"(·ðÒÃÃCK§T§vJ²4Šœ€|°¹¹ËK¹ 8 !¼£DŽ´Z-[é7 è:Í•¼jJ֚ͦ3CCC¦R©˜ÓÓS{ð7çècŒšÈK—žç™ááa³°°`öööL£Ñ0kkkfbbÂÐ7==mžŸŸ³X¢æPZ­–™œœ´ôwqqŠ‚øl(<êìqiôéé©ÍKá©ðVx,¼ž ïEØÚÚ2ðd5!?-²gÇ9:ùêËË‹ÝëùíÔÔ”¥QÆÔëõ¶[«ÕÌ÷÷wÒåi¼°¬¯¯Ç¢MG¿Œw-/òªçT–ÏŸŸs||ÜÞ¤ø|²aÑÇ5!lNÕjÕŒÿb P+++RR³z8<ïûû»™µÊêõõu(ⳡð¨³OÄ¥QèóööÖÌÏÏ[¥Ô/8Y¥UJjŸÔ\ÖO‹øâA|ƒ4|ÉŸN‘PRE£@L}¼ÅX´éè—ñþ–yõ s±° òQ £¼¿¿··@â:á?Aï *â=jÍÅF€°!rHp‡FùN¸¡?.ì¢Øw¨Õ ø&ùR|Æ©Š.>[´'\üõ&¥QBÞà±Y"œM²@ñé wà§Å¸ëéÆWII£~ iAP’,Q‹Bˆ|·Á#¡SËZ^ý’5Ã?0IEND®B`‚rocksdb-6.11.4/docs/static/images/delrange/delrange_write_path.png000066400000000000000000003260511370372246700252360ustar00rootroot00000000000000‰PNG  IHDRíã°C! IDATxìÝ ]Õ}'ê%©õ=ŒÂ °A¯8Æ3Âæa3ÄOÏ;® <.ÈøŠb|íOÆ¢(›S—1ŒqàÚ\'£‰ 3™]‡) ^’€ ê€x 5B/Ô·þ§½Oï½ûœîÓ­VŸÓÝߪRz?×ãÛ]‘K?ÖZc:;;;“B€ @€ @€ @€@ÓÆ6­e  @€ @€ @€ PÚùE @€ @€ @€ Ðd¡]“?€æ  @€ @€ @€íü @€ @€ @€h²€Ð®É@ó @€ @€ @€„v~ @€ @€ @€4Y@h×ä y @€ @€ @€B;¿ @€ @€ @€š, ´kòÐ< @€ @€ @€¡ß @€ @€ @€MÚ5ùhž @€ @€ @€€ÐÎï @€ @€ @€& íšü4O€ @€ @€ @@hçw€ @€ @€ @€@“„vMþš'@€ @€ @€ ´ó;@€ @€ @€ @ ÉB»&Í @€ @€ @€Úù @€ @€ @€ Ðd¡]“?€æ  @€ @€ @€íü @€ @€ @€h²€Ð®É@ó @€ @€ @€„v~ @€ @€ @€4Y@h×ä y @€ @€ @€B;¿ @€ @€ @€š, ´kòÐ< @€ @€ @€¡ß @€ @€ @€MÚ5ùhž @€ @€ @€€ÐÎï @€ @€ @€& íšü4O€ @€ @€ @@hçw€ @€ @€ @€@“„vMþš'@€ @€ @€ ´ó;@€ @€ @€ @ ÉB»&Í @€ @€ @€Úù @€ @€ @€ Ðd¡]“?€æ  @€ @€ @€íü @€ @€ @€h²€Ð®É@ó @€ @€ @€„v~ @€ @€ @€4Y@h×ä y @€ @€ @€B;¿ @€ @€ @€š, ´kòÐ< @€ @€ @€¡ß @€ @€ @€MÚ5ùhž @€ @€ @€€ÐÎï @€ @€ @€& íšü4O€ @€ @€ @@hçw€ @€ @€ @€@“„vMþš'@€ @€ @€ ´ó;@€ @€ @€ @ ÉB»&Í @€ @€ @€Úù @€ @€ @€ Ðd¡]“?€æ  @€ @€ @€íü @€ @€ @€h²@[“Û×<Œ"µkצÇ{¬0âsÎ9'Í›7¯pm0N6lØ|ðÁBU§žzjZ°`Aáš @€ @€´‚À˜ÎÎÎÎVèˆ> @€ÀȈmþüù5yÍ5פåË—×¼7‹?üp:óÌ3k¾ºyóæ4{öìš÷\$@€ @€ @€@³,Ù,yí @`” <ûì³uG|ã7¦õ«Ô ì¢þí @€ @€´š€Ð®Õ¾ˆþ @`„ lÚ´©×‘}ï{ßëõ~£7W®\Ù裞#@€ @€ @€@ËíZæSèF—Àܹs ŽÙv…k9¹ãŽ; ¯-Z´¨pî„ @€ @€­( ´ků¢O'žxbºüòË #]µjUἿ'íííéÞ{ï­¾{årÈ!Õs @€ @€hU¡]«~ý"@€À(X¼xqa”åYr…› œ|ÿûß/<õ¥/}©pî„ @€ @€­* ´kÕ/£_åÐ.fÉÅl¹”XZóºë®«¾ËoΛ7¯zî€ @€ @€­, ´k寣o±„e¾”gËåïõv\^Zó?þÇÿØÛãî @€ @€ @ ¥ÚZª7:C€£N –°¼ñÆ«ãŽÙrË–-«ž7z°|ùò£^xaá|_O6lØž}öÙ´iÓ¦jU'œpBš?~š={võZ#+W®¬Ö³dÉ’ïG[¿üå/ÓÖ­[+ÕE; .LS¦Léµúò{§žzj¥}½×k¥¿¾3 7oÞœ{ì±Âãm#ê»çž{*uÍ™3'åg]ƬÉ5kÖ¤§žzªr?ÆÆgT۽뮻 6ù{Õ‡z9È·=}úô¿+ƒaÔK“n @€ @€èS@h×'‘ @` Ä–±”寫Í<üðÃ…¦z£ÎA„U«W¯®ÞÙ{ƒÂD@ôÍo~³Ð¿jC¿>¸è¢‹*Ks.X° |«æù\P¸~ÅWTÎcÜðP·­n¸!]uÕU=ÆÖ×{+V¬HY…†8‰€1Ѽo­×úkÝ•W^Y­ª³³³rc9óÌ3«×³ƒE‹¥G}4Å·¾ä’K²Ë•Ÿ&ö'8ýßùÂx" lôÛvB€ @€ @`,9ˆ˜ª"@€ ”—²Œ ¬?å{ßû^áñ‹/¾¸p>“‡N?ýôJ@”kÕ{ñÅL¸[n¹¥Öí>¯Å̲/~ñ‹•°ª·¶bâyç—âù(ñóÚk¯íó½Çâ¹þ”˜〱¯À.êÍ bYÿúÓ^<a­À.îe}ÈBÞ|ÝÙŒ½üµzÇ1®¬®ì™˜-© @€ @€h¶€Ð®Ù_@û PYž0Ïpûí·§W)å—׌Y{û:k*»rÊáN_ý‰p¬¿Á],A\Œ¹‘}úÆ7¾Q Æâ½üØ{{?žk´o1þÃ;¬ßãöcù`±·>åïÅ »ò Äüýüq9äýîw¿›¿ÝëñC=T¸?˜³2 ;!@€ @€ ÐOËcöÌã 0ø±”åå—_^®"\YºtiŸ­ZµªðL9Ð)Ülà$BÀßú­ßªùd»Ð½˜AcÌÂë«Üzë­…GÎ?ÿü¹ @€ @€ÍÓ™m"Ó¬h—F…@ÌòŠÐ(+±Z~YÃò^f1c.‚˜¾J,ᘯ¶mÛV¥–,YRYº1«§¯ýËÊýŒ÷î¼óκbô1¦|ÊcËÚÎ~Ž3&;,üŒPìÇ?þqýÙbiËÞfÔõç½ûî»/-^¼¸Ðnþ¤Öøcš½p\küHF¸Z«_ÙË1#2œÊ妛n*_êó<ê©5K1ØE%åà1f$öUÊKcFЧ @€ @€h¡]«| ý @€TQî¾ûî^U¾÷½ïîïëR‡å¥6£?,¹RyYÎr@Tèhé$‚±æÊÁTþ±rýq/fÀõ÷½çŸ>_má¸<þ¸ùï|§ðL½“pZ±bEávÌêëO‰vxGåÐ.BÞ˜­Ù[)/ùùϾ·ÇÝ#@€ @€ 0¤B»!åÖô&PQbIȘýU«Äõü’‘1Ó­<[¯Ö{½]+ÏB+÷§·w;î¸Âí'žx¢pÞÛI#ÁX¹þ¨ïÏÿüÏ{«¶r¯Ö{õ^Z¹reáV„–½‰…‡SJ±i¾DÖèŒÃh«Ö »|}åã˜e˜/?ýéOó§…ãèG~Vfü¾4*rB€ @€ @`? ´í§zUK€ú-!JìÏ–WböWyVUT\žvõÕW÷»½ò åYh>ø`Š?”_|±ðX¹®ÂÍÒI#ÁX­@«‘÷Ê{¸ÅÒõJy‰ËsÏ=·Þ£5¯Çš†EX—•ÞöµËž‰ŸÓ§OÏŸ6tü¥/}©ÜFˆ3k•òÌÇÁø}©ÕŽk @€ @€* ´¨œ÷ @`¿|ùË_.„vÂÔ íʳ¬Ê³¼Ò¹r UÞßn u6ûÞö¢+÷-–ƽZAaùòù‰'žXížyæ™ÕS®·Öy„¼å0–Ȭ5ã²¼4æ`ü¾Ôê“k @€ @€*`yÌÊyö‹À…^X¨7‚¤ 6®­]»¶ Åžpý § •9T£>ºPßÖ­[ çƒ}Rž1w×]wõh"~òdÌæôûÒƒÉ @€ @€& íšü4O€EXò1B¸|ùË¿üËüiºûî» çK—.-œ·ÂÉŒ3Z¡CÞ‡ƒ:hHÛ,Ϙ»ýöÛ{ìƒXþý‰Ùœ  @€ @€V°»PÝ7Þ˜~øáµ¾NbIÉþ¾ÓWCuÿóŸÿ|¡©H¿ño®Õ;‰@5¼òe(²‹/¾8ßtºõÖ[{,Yö /8!@€ @€ Ðd¡]“?€æ  @ ¾Àµ×^[ÿfJ•=îz} Ÿ7#(,/yæ™g¦[n¹¥ÏgÔÅL½˜­ï Ç2oÞ¼ã î‹_übu¹Éò¸:::*>å@5öÊÐ.–áŒ6³²zõêôío;;­ü<ÿüó çN @€ @€´’@[+uF_ @€@^ ¼WYþ^yÏ»ü½}9þ³?û³KqæË•W^™âOzÊMŸ>½z;–a,?_½9 ®»îºã‰=ââOŒ??qݺu•뵆ù£ý¨Öåýz-f^Fÿ³}ÎJzgœqFvê' @€ @€–ÚµÜ'Ñ!Ȳ½ÊÊË.ÆýòžwÙ;ûú3f›Ýwß}©àêk…Œ®¬ /Æø#xH ·¡\3ßÇZû f÷Ëûõe×ý$@€ @€ Ð*B»VùúA€._R²?C% Ë3׆bÖT´»mÛ¶´bÅŠí×ê‰wÞygåf…Vµú5Ðk1†Í›7§k®¹¦Ï*âû„Sx5{ ÊZû .Z´(Å J… @€ @€@+ ŒéìììlåêŒ ŽŽŽô“Ÿü$mݺµ2 %K–4<[®½½=ÝsÏ=•÷æÌ™Ó¯™\åwcŸ¼Xv³¿eÆ •ë©§žª¾AäqÇ×ï¥ W®\™6mÚT©çÔSOmøý¡~¯:ДÒÚµkÓ+¯¼RíwÜ‹oqì±Ç(Ë—p¼ð ô]ò}ÌŽO?ýô´zõêì´¦.]º´zî€ @€ @€@+ íZñ«è 0 ;ì°Â»1p Am¡' @€ @€ö³€å1÷3°ê  @€¡Èfdf-^~ùå» ÃO @€ @€–0Ó®¥?Î @€ý3fLáñŸÿüçMßg¯Ð!' @€ @€ê˜iWÆe @`x Äžù2wî\]Ä1 @€ @€@K íZúóè ШÀòåË ^}õÕ…s' @€ @€ZYÀò˜­üuôhH`íÚµiáÂ……g7oÞœfÏž]¸æ„ @€ @€@« ˜iת_F¿ @€†î¾ûî³—_~¹À® â„ @€ @ ÕÚZ½ƒúG€èKàâ‹/NGuTå±éÓ§§ /¼°¯WÜ'@€ @€ ÐR–Çl©Ï¡3 @€ @€ @€£QÀò˜£ñ«3 @€ @€ @€@K íZêsè  @€ @€ @€ÀhÚÆ¯nÌ @€ @€ @€-% ´k©Ï¡3 @€ @€ @€£Q@h7¿º1 @€ @€ @€´”€Ð®¥>‡Î @€ @€ @€ŒF¡ÝhüêÆL€ @€ @€ ÐRB»–ú:C€ @€ @€ 0„v£ñ«3 @€ @€ @€@K íZêsè  @€ @€ @€ÀhÚÆ¯nÌ @€ @€ @€-% ´k©Ï¡3 @€ @€ @€£Q@h7¿º1 @€ @€ @€´”€Ð®¥>‡Î @€ @€ @€ŒF¡ÝhüêÆL€ @€ @€ ÐRB»–ú:C€ @€ @€ 0„v£ñ«3 @€ @€ @€@K íZêsè  @€ @€ @€ÀhÚÆ¯nÌ @€ @€ @€-% ´k©Ï¡3 @€ @€ @€£Q@h7¿º1 @€ @€ @€´”€Ð®¥>‡Î @€ @€ @€ŒF¡ÝhüêÆL€ @€ @€ ÐRB»–ú:C€ @€ @€ 0„v£ñ«3 @€ @€ @€@K íZêsè  @€ @€ @€ÀhÚÆ¯nÌ @€ @€ @€-% ´k©Ï¡3 @€ @€ @€£Q@h7¿º1 @€ @€ @€´”€Ð®¥>‡Î @€ @€ @€ŒF¡ÝhüêÆL€ @€ @€ ÐRB»–ú:C€ @€ @€ 0ÚFã ™£I`ÝÓ›Óºg6§M¯½ž^yííÊП^¿u4+Q!püüé•q>ëiάƒÓ1Gœ>µð¨Q1vƒ$@€ @€Œ1#a Æ@€ÝOm|5ýå߬K«×lJð½4ýðMé€i不¿WyhÖov?ìˆ!ðÚË3*ãxwëä´ýÉiûëL/?7%¶àðôÙq¢oD|eƒ @€ @€F²€Ðn$]c#@`Ô ´oÙ–n¼ã¡´éWÒ‡N|&}hÞkiüÄÝ£ÎÁ€ èxÏØôÊÆYéÕ Ç¤I‡§k/;'Í=r& @€ @€ÚµàGÑ% DàÖ¿X~öÿ=•æ-Z—>4÷ÕTáF°@ÌÄ{úáé7Ž>*-ûÒ¹iÂøq#x´†F€ @€~B»á÷Íô˜]»ßO_¿å§©cÒãé蓟NãÚöî;!@€@^à¥'Loo<9}û+¥8 Ë1 @€ @€@„vMÄ×4öU –ÃüÚw~’>ø›¿LšÿOûZ÷ %o¼2==³jQúÓ?ºÈr™£ä›& @€ ÐúB»ÖÿFzH€š1Ãé_ñ IDATîKÿ×Ó1güMúÀ¬wj>ã"ê ì蘿÷3é»ÿáâ4û©õs @€ @`ˆÆQ;š!@€AøÚwîK‡}üïvƒìª:£E`Ò”]é7Î]]™­ÿ€B€ @€ Ð\¡]sýµN€ Üú«Óî©ëÒaǼ2 ÷½D€ˆYºþÇ  @€ @€4W@h×\­ @ ß_z#­Zû¿ÓÑ'?Ýïw½@€²@„ÿ¯¿÷é‘5/–o9'@€ @€B¡ÝbkŠƒ!ðÿøÑtÔÂ'Ò¸¶½ƒQ: æ~òWéÖ?J‚ @€ @ ‰mMl»GÓwÝuWºçž{ª×¯»îº´`Á‚êy£?üpºù曫_uÕUéŒ3Ψž7zÐÑÑ‘¾ò•¯¤7ß|³òÊ’%KÒÒ¥K}½_ϵ··§›nº)=ÿüóiÆŒéÿøÓ¼yóúU‡‡ ùëžÞœ6¿ýr:mî«#°FH€À Ä2™úrºçÁÿ–œóCÖ®† @€ @€ºZ*´‹ÀîÞ{ï­ö.«Ûn»­zÞèÁ™gžYxtË–-éÑGûÿ_¯Zµ*Ý~ûí…ºöWhwýõ×Úzä‘GÒ†  m;!@€À_<ø÷éC¿ñ  ºÀáÇmL+ÿׯ„vƒ.«B @€ @€@c-µ¹öÚkSñ'–UÙ±4æ¬9Ûíe7²?³ÑhªÀ?²%ýÝ›šÚ @€ @€F«@K…v±‡Ûܹs ߢ^Wx(wûÙÕ*åYsµžÉ_‹™yåÙnÙ/_goÇgŸ}váö 7ÜP8/Ÿ¬]»6Ýxã•åDcIÑü^€åg 02~þøútð‡_ƒ1 ZR`摯¤G×Yž»%?ŽN @€ @€#^ ¥B»Ð¾à‚ èõB¸ÂC¹“zKj–gÍå^©yXž™wùå—×|n°.^qÅióæÍiÍš5iýúõiÙ²eƒUµz![ÞêH“¦ì!£1 ZQ`ò;Ò[ßmÅ®é @€ @`Ä ´\hwþùçÐë…p…‡~}ÒÛRš1k®·ûåúÊ3óöç,»¬íÙ³g§ ¤˜q¨ @ ,ÿÿ ® @` ÄðöÖû«zõ @€ @€ô"Ðr¡ÝÂ… {t7–‚l¤”—Ò\´hQáµòýÂÍÒI,=™/åå+ó÷ @`(âÒÍ´ im½N/½þÆîÑ `ä @€ @€& ´\h7eÊ”TÛ{챆ˆÊKi^{íµ…÷Ê÷ 7s'åy±Ï^Ì‚S ÐLø‡ôøu… @€ @€Fž@[+éâ‹/N«W¯®ví¡‡J±ç[_%¿”fìAwÖYg^‰û·Ýv[áZ­“òŒ¼ò>{åwb&à 7ÜP¹|ôÑG§åË—W‰{<ð@e¯º¸3 ï÷~¯GØÞÞž®¿þúôæ›oV޽ꪫRyIÎ[n¹%…E”-[¶T~fÿçÞ{ïMK–,ÉN«?g̘ÑИã…èCÔÿÄO¤çŸ¾ZGôùÄO¬xF¨ª @€ @€ @€ ®@K†vçœsNa”HõUʳã"ðÊfíåÀÑb߸ÞJyF^yŸ½ò»10ßÇ,´ûÖ·¾•®»îºÂãñ\\ëìì,\ß¼ysʇŽq³Ú]yå•…wÊ'ù>äïõTfa¹ý¬Ž|½+V¬h(@ÍÞõ“ @€ @€ @ o–[3º)‡zYðwî¹ç½ûî» çå“òŒ½‹.º¨2c¯ü\o籌å7ÞØÛ#ºWÞëo@•ä^ŠÀ®¯¥?sWcæÝW¾ò•ê¹hL`×»ÓkOOIíÿ0%Åñp,;·M/¬êúÇ  @€ @€ƒ#Ð’ËcÆÐN?ýôBðuÿý÷§eË–Õu~v\ìg—•òR˜±TfÌ«·7[yÆ^­}Ⲻëý,/cyçw¦ãŽ;.mÛ¶-ýô§?-ìW¯ŽZ×üã§XF3Ê3Ï<“.¹ä’êcè}ç;ß©žgS§NÍ ?cIÌZ]Ø-^¼8~øá•ç£Ï˜ÞqÇiãÆÕ:Â;ž‹? ZYàÉ¿œ–ÞxnLÝ.Î>©3}`ÎÞ4ãè]©mâ®ºÏ Æ76ŽO~6®RÕGvIsNÙ9Õªƒ @€ @€ в¡ÝÉ'Ÿ\àí-l+ÏŽ+ïAT>Ô[³fMýâ²ÆÊ3ö²e6³ûýùAZ„ù€°Ü·þÔ7{öìj•C9¤Ï½úòïÝtÓMùÓ} £l†bþfôùª«®JßøÆ7 Aj̆Úå¥ Њ½vÑßöucRûºÒ&§¿?.M™õÞ~Æž]õÃÃÁntÇ;“Ò{oKãÚ:Ӵ÷võê#@€ @€ @`Zv]«§ÊËAFØV«”gÇ•ƒ¶rPÖÛR›ùp/ÚªbÕêCùZ­À®üL³Îc–]yùÎ{kË—/O±\hVbæ]90ÍîùI€VøÄîLÙŸ“ÿ`Wúèö¤˜i—•µÿ}BÚŸû´z®4ï³ïWþÄñþ,[žŸ~õ£¶´îÎñû³u @€ @€ 0H-ÚÅøÎ;ï¼Â0ciÉZ¥<;®>•C¼Xê±V)På}õj½SïZ,e™ŸaWï¹f\衇 ÍÆòöõ²Ë.+¼ûàƒÎ @ •&MÛ‘²?Ìx/tä»iÞ¹ï¤ã¿_íö «ö_È5áÀiöÇ:*âX!@€ @€ @€@&Ðҡݹ瞛õ³òó¾ûî+œg'ùÙqùýì²ûâÍ;7;­ìͳÍÊ¥@žz)+V¬¨»Œå@êìwÊ!çÙgŸÝpÙ^wÙ ëÖ­Ëý$@€À°˜u|GšyL׌»XNsζ¶H:N€ @€ @€À~hÙ=íbÔ ,( >–cŒ°-¿¯[yv\y)̬‚ .¸ °$äã?Þc?¶ò ´³Î:+{}Dý\¿~}asqï£_WYž3ÿ\þþ'þ0¥¦j·×–>rÖ„4ç”mù×Ó;¯Ps»¿û¯ ÏÅ^~Ù{¸Xx¡Ÿ'˜–Ú×uš']²;M;¼g[Ö½úÔ„:–]ã‹}ÿòádÇk“Sì;%öœý±žõæ»ºå™ iÃϺ‚ØOþQg¡®üsŽ  @€ @€´‚@ýul…Þ¥”Ê3ç}ôÑBÏÊK=–÷³Ë>ù䓳ÃÊÏ|Ö®][¸ñÅÎ @€ÀÈØýn÷¬¹Z#éÙÿ1©2qÚÞô‘³öV—ÖŒ÷žY9.ýã#SkUѯk»¶-´7û¤ÎB;núe±±ãûûÕ‰<cÏ»Ø+pÚáÛ{Ô–îï?3ǰŒqF‰ñÅ3ñlV¦Ìz¯j…qÙ½Z?³g¢Î|øWëY× @€ @€4[ ågÚ•÷[‹ÙrË—/¯ºå÷ZûÙeÆ’š‹-JùYf±´fò=öØcÙ£•ŸçœsNáÜIm… Ö¾á*†™ÀÛ›ºÿ;–ɽߣ÷ Å~wQ"ŒÊϦ›sJJ±äsÿsBå™—16Mš6%ÍþXíw=*¯qaÝã+W+AÖoî¬.g™ŸMÁÖÌc&§f¼Wy6B­O­ë8½lFà§¿¶µF ƒ)»{”²Q¾µÌ2öüÈY»«ýÏžùà‰]3Ã;fãÍ9egv+Í:¾{Ó°¨ ÆÃq/+‡Ûó{f÷ü$@€ @€ Ð*-ÚEØ6wîÜûÙe% ÛÝÏ.{ï¼óÎ+„v±´fÚ•÷³Ë®gïŽäŸkÖ¬ððÊû¸"/ @ ‰ðd³ÃbVVyÿ³7ž;°ØÅ²ŒùÀ.ëv¼3ï¼Îô·ÿ¥kϺ˜å5sîÄ}šáU+øŠê£_è^f³ãŸÇõºÿ^Ö¿ýýóµ§§4Øå-øäž]ô3Æ8ï³S*K[Fðxè ÝŽÓ>´'¥ÔµäåÏKÓ¯=²¸—•©ÜúI€ @€ @ eZ>´ ¹Ë.»,]wÝuUÄ,lkt?»ìÅÓN;-;¬ü¼çž{ÒÒ¥K+Çù}Ýz›±W¨`˜ž”gvØa)ÂQ…£Q ¿\ŒÿC'÷ xÚÿ¡;:ä¸úû¨ÅŒÇ-_Y"3êz÷õ¶‡võÂÁ¨·+„êú+|gGïËzÅ7À.–R+hÌ÷!³Œp´Þ,¹x~ú1;®«Î¼c„£³OšP YcVßœOL¨ÎBÌÚ‰½³±ôfÛÄúß,{ÇO @€ @€Íè^ ¬Ù=é¥ýrØ–íc—ýÌ^íkv\y¼,¨+ïg·xñâ¬Êùsþüù…q­_¿¾pî„#U –ŒÌþlx`Zú_6=ýêGÝÿýJNÙR“™A,{™-‹KUöMù`÷RŒ¯ü¯ÙIÕߟ®¯>d}ŠŸz6ØÅþt™åÔC÷öÚ½ v;îx«Læ—»ÜöÏ]ˈæ+Ûþz÷7=äØ˜™§ @€ @€h}îÕjá¾–÷M‹}ìn»í¶Ôè~vù¡]tÑE) ëâzv<ð@þ‘tòÉ'Î[ñ¤¼åÇÔW#”ÌÛÝ|óÍ©höU‡ûŽÙoå¾Ç¬¯˜aWìâ¹=;º£fô4Åóù°é½RØTn·Î#të-\œyLí%,£ïo¿4¶:£-ÍZˆæÇ¸÷ýnËm¯ŽM{~95»îñž]ÝïÅCù™†[ž—:²øj~iÌØçO!@€ @€ 0†Eh7eÊ”TÛV®\Yðm4tZ²dI!´‹Àîþûï¯ÖKG‡¥"ä\ÚÛÛêûYgUx5¿o}ë[iÙ²e…ëõN:::Ò~ðƒÊí+®¸¢Þc® @ å>ú…⬫qmiÒÞïu ËÝïuÏ–ÛsRW1vÍ‚›\¹ûäÍ;·Ç#-u!»zafttÊ¡cëî™7iZ÷lÀ¨ãc'õØ0?Ø÷Þê^f´kÁb—6Ü6¡»¸Æ9kb¥ßQÏQ‹º÷¼Ë/KŒ* @€ @€†‹À°í³¶]pÁãSN9¥p^ï¤ü\v«W¯®>~ÞyçU‡ÛÁõ×_Ÿ¾óï¤,ЋpmÕªU©¼ÜgÜ¿æškÒ7ÞXbì¸fÍštÕUWÕu—ÍJÌï/(´«: @`tä»CÚË™Çæ!m¼ÁÆb&]sõʃÎüs±„çI—ìNëîìJ3ŸûŸÒ‰Ÿß‘¤îqÌÌ›ÒÇ™ÙË]3ë²³®ŸýšP9ÙÖÞ–f³³rœ_³k_¼â{Î @€ @€´ªÀ° íÊa[´¯ýì²çËÏå»xæÜs[|JD6”zÌ>Œ%/ãOÌJŒ’-™ÙÙÙó¿þõ¯B»ìùì˜qxÈ!‡T[Ë®W/8 @€À(ˆÙxYÙýnß3Ãbß¶¬L?¢ûÝìZ«ýŒ%A˜1ð^M;|{š÷Ù)iÃÏÆUö«ûÇG¦¦j[Í '3ߺþ§G,5º/!jײ—]¡],‡9ó˜®&³¥1#0­µÜiÍŽ¹H€ @€ @ êÿ§õ-й|"l›;wnþRõøòË/¯7r³Ìê• Ô»Õr×/»ì²š}Š€­¯-fÛ­_¿¾æûq1Â̬žzuE°§ @`¤ L˜Ú½Ý»¯7ÚuÿÕ:qJë‡vƒñýf¬#ž€Q^þÅØôÆsÖ¬vì¸níov;Õ|¸‹Ç-îZþ2–ÈÜþæä”_sÖñÝß­ªzå÷êç};þy\Úº©{ÓÁiª¿¬g½ú\'@€ @€ ÐLaÚþù5­Î>ûìš×ë]<ùä“kÞZºtiÍëý½8}úôþ¾Òãù3[«lùòåiÅŠ5g!ÆÌÄûGÝù ³gÏN÷ÜsOe?»¾Àꢭ˜¡ï”—Í×ë˜#I ›Ñcj¢{ùËò;^›œ²0*fž5syÆü,¿]ïŽ+wuÐÏ#[ðû»ªõ¾°j|Ê/šÝÈÏ€kÿ‡)Ùåý ßl†ßkOMÙÒ˜q-¶¨r/ @€ @€bîéCÜð@š‹u13,þdeþüù)–zìO‰ jÛ¶m…å!;ì°×R®¸âŠtê©§V^:uê€Â¬X–3°èW”þ,ÓíÇŸöööªMÇíÅŸ;:: 6Ó@,½C€V˜qô®4ó˜I•=Û"”Û³cj:ôÄ=ÕP.©·_ŸžYÙŽñÉî«cš4=–‡ìêÏ«O¶¥COœœÆ¶u¦=;Ƥ¶Iû'ÔŠ}æŽ[<®âðÆscÒ+k'¤ª{^8Ì:¾#½öô´Še샷g×Ô4ó˜nËx&fîØ:.ŬÅðþô×¶Ö%œyôû©}][¥¾ì¡¸¦ @€ @€nÃ*´ ÜÖ®å?N}ý ÆòïÖ:ŒºöuæZ«ÚÔòrÃI mâ®4ï¼1iÃý+áP,“ùò/b™ÌÚKeÆŒ³fÏôšvøö4ó˜®p¬»¿]êýž4iÚþùʽûúÔÊÞvÑîO©uùÖæ·³j¡Ü «ê[æß«u<}Îî”RñÎt]«õ´k @€ @€ZW`X-ÙºŒzF€­.-£8Ð~ÆòÇ~nGÊ/•Y®kÞgßOÿâß¿—bÆY­2®­³z9œ]Ì_Ëg÷ó?³½áò×ÊÇŽÕ÷Ä©1 o`%߯üq¾¶ÃÄÌÄ®±ÆìÃòžzyËì¹üûq×ÃúXœ©W~.ÕœÕ=ž#NÛ›âšB€ @€ @`¸ ŒéìììþÄáÖ{ý%@€À(XôooMÿæ?ül¸µ‡ºãI…6{f]¡3¥“X¾sïûc*W[±Ÿe˶‰ý ÞÞzéÀô«uͶ;é’Ý)f*øÿógÓê~yàx“ @€ @`@Åõ¤T…— @€ÀèhÅð«ÞWˆ™m­\öÕr˳Ý{ pðžVª¾ @€ @€¨+`y̺4n @€­.°ýÍÉ©}]×,ÂXžÔÒ˜­þÅô @€ @ ž€Ð®žŒë @€@Ë l}¹{–ÝÔÃÞoùþê  @€ @€zB»z2® @€--°gç„´ág]¡ÝÌc:Ó”YïµtuŽ @€ @€@oB»ÞtÜ#@€ZV`ûëÝ[óÎ:~oËöSÇ @€ @€4"Ðý¯]<í @€@‹L;|{úÄNªôfÒ´-Ò+Ý @€ @€ 00¡ÝÀܼE€´€€°®>‚. @€ @€ Š€å1…Q% @€ @€ @€. ´¸7  @€ @€ @€ Š€ÐnPUB€ @€ @€ @`àB»Ûy“ @€ @€ @€À í…Q% @€ @€ @€. ´¸7  @€ @€ @€ Š€ÐnPUB€./ù¯Ó_ýÕÓé7¶#!@€ @€ @€ ´5ü¤  @ O[oý­>Ÿñ @€ @€( ˜iWqN€Axóííéæ;ÿ6ÅO… @€ @€ô% ´ëKÈ} @`çî÷ÓïÿUúüÕ?Þ ÀÏ+ @€ @€m–Çm_Üx R]¿ï~ò7O¥ ?sBZú¹ßL3>pÀöAcš)°ýÍÉéçºþçÆÌcö¤f¼7 î V=jÜK @€ @€†@@h7Èš @€€ðÎïÀhعmlzaU×Äþ)‡ŽM̘Ä`Õ3°Ö½E€ @€ @`ÿ íö¿± PÞU)õà™š^þE÷ŠÏ9košsʶµñÚÓSÒ3+ÇUß=â´½éßX]ÕJ @€ @€ @ îáìãA·  @`ð²ðΞwƒcšì¢Æ˜ÙÕñÚä~W¾ãI…À.*(×ÝïJ[ì…ã[/˜ÞyÅ2­-öit‡ @€ @`” íFù/€á7Ýô·iÆ7Zbè»Áý ³Oê¬V¸åÙþO&ë…lïµîzªŽƒ-ÏŽO¿úQ[Zwçø2"à @€ @€ 02„v#ã;ýøêW?™æÍ›Ù7öÿ£Â»Á1ž|Pgš÷Ù÷+•Å ¹]ïNl¸â=;'¤ ?ëZsÖñ{~σ @€ @€ þOCŒVÕA`? ,ú··îçTO`ÿdáÝOþæ©tágNHK?÷›iÆ,cØí©‡Eh×¾½ýÒø4ëø ½¾íŸ»gž}àÈÝÕ:zÙC @€ @€ØG…v6lH>ø`ŸMOŸ>=wÜqiþüùiÊ”)}>ïè¨Þ±iL`ʬ÷ÒÌcƧ7ž“^{zlšu|cï½òxWÐwÄi{Ó„#è›ÔØ‹¿~*fõ½ûzñ¯ÕÞóëºêW{ÌE™4mGõ¡íoNN;·uM†×Ö™8xOj›¸«z?;ˆwß{««ß½=Ïgíìén¦z-«¯mâÞšíÄýߎ·Ç¥÷÷ŒÉO§îMÌx¯zÞŸƒ²×¾ÔU«ÝrýñL#ߣV]® @€ @€ â¿.6Øb„pý-—_~yúwÿîߥ ô÷UÏ @`Ô Dx·îéÍécÇ͵øì½ŸÞx®­Üu¼69E×[‰g"ä‹I!E6 IDATrȱ{z{´Ç½‡^}jBzaU­§ÛÒGΚ=aWÍðî­—¬ì/•~âS;®3½²vBŠ¥=‹e|úèƧƒŽ|·r9–òlbb6‹Ïeu¼óÊ5÷°û»ÿZ\>ô£_Ø“:²F·<;.µ¯ëë²zãg¤G}jwŸÆùw6ýrj¾§tÄiméðµ­òï÷v¼/ߣ·zÝ#@€ @€ °¿úÚÅ,»”Ûo¿=ÅŸï¾óï˜y7Dï 0ªæypú½ß^>µð¨Q5îÁìô9±¼e×_q[žmKSfõ^ë[/v=;ó˜Î~…Om¸b5ð‹Yzm¿ž ÷Þ[c*AW„y[_ž˜æ—jwYÏvm›^þÛ® 1®E]ùðîW?jK ~ršpàÞm–Ÿ;ù&fÀß™5Ó¯Ÿ¯==%=³²k&_Ö§l|[_Sw„o<7!ü©Ðf­†v¼5&=ùø´ª×쓺ú•‚1Žw_›˜Žý\gݵêÍ® æ÷Èêô“ @€ @€ÀP ô;´Û¶mÛ>õ-‚»õë×§ûï¿_p·O’#ãåööötÏ=÷TË©^xá…ƒò{±ú‡_@F1lÚ·lKÿúê¨ÿº±^Š¥$8mb%ôŠ èðëfôd³äb†^JØEØ÷‘³v÷­>xb× ·¶b6ÞœSêﯷîή=õæ}öýtÈq»*¡Õ‡?•R>8Û¼n|Úµ-UB¯Þžû§ÇǧyçvÏ.Œ™†ŸþZ×y~–Û§¿¶µ×᾿+fÒu¦ÃO~?MýàîB6ç”bß^}²-E{+~Ö~䬽iöoî¬ÖwÔ¢‰Õ†a³çœRœñ×[½Ù½ÁüY~ @€ @€*~‡vµ:¶bÅŠZ—ÓC=”î½÷Þ÷V¯^n¾ùæ´lÙ²÷\]×_}ef6ê5kÖXB5ÃðsÔ ë÷“Ç2—/ÿbB¥Ò·_Ÿf_;0‹{Yéš¡—õþóç¬Î;â“{zvñö´Ã·§yŸ’"¬Š`ðÐꇇñüq‹ßO³Žï(4çoošV™µ—ÍHk乺öæ+Tׯ“9.UõòËù¾E8ÚWhï×ê{ôóð)½ü‹®iŠa5û7'Ôm·Ü8ßߣV;® @€ @€Ø_ƒÚ]qÅ5û×;::Ò~ðƒtå•Wž¹îºëÒUW]5(³ª ;Vo¾ùæ°ê¯Îغý¡š*Ë\Æ~k1s뵧ǦYÇ×n'îE‰Ù_1C¯ÑÒþ]³Æb‰Ççê•éGÄì½®gß}½­n3çÊ]VçÌ£ßOíëºþÊnô¹]ïŽMÌjØÏF<¦º7µÿz|;Þ™”&MÛQ·±ÞúÁݼώ¯œQÁö×ÛҴÛ÷=ê  @€ @€ûI ë_*÷SåQí”)SR„wwÞygVV­ZÕãš -Ö}ë«ç¦ïÿ§ß¶oÝ~ú賎ß[©9‚»Ž×&÷håW¨Î–;è¨==î×»KjFQ"´ê­L8°{ÉÍØÓ­^™tPý}çÆvOL>·û½ýþW|e(½õ§<Ö¾ž=ðnË[ïÿþøå¾;'@€ @€ °¿e¦]#\ºtiúæ7¿™6nÜX}üÉ'ŸL‹/®ž; @€Àh0³nè¾òŽÜ]å¶åÙ¶4eV±í~²ë¯ÁØ·-ö}k´ì}¿;|ÛöêØ´ç—SzuÏ®î÷z¡ÅŠ™t{vŒIù@°ã̵ͮ¾†3ajwh·³£q«Ñú=úòtŸ @€ @`x Yh,—]vYŠe1³û—)-º¡ÿÒ±äâ§MH±ßZü9|A÷>o@e{ÄÍþX÷l¸FzùÞ[]Ë]Ƴ]u40µM¨?›®‘v›ñÌžÒ›ÏOHϬìs3úÑ[›£é{ôæà @€ @€ÀðÒÐnúô郢µvíÚôÌ3Ϥ­[·Vë;õÔSÓüùóe¼rý'œpBZ¸paºW®\™6mÚTéÃ¥—^Úã~Ö¹l_¿ì|É’%iöìÙÙi¯?£/=öXå™9sæ hfb{{{Z¿~}zê©§ªmE]Ç{lš7o^õZb\QoÖ¿ìýø‡vXÍ1–-b¶e¾Ü}÷Ý=ê‹ûçœsÎ>õ5߆cC- ¬jñb{±\vQÞ~i|šuüÎÊñ[/tÿ8}NÌÈX‰½ð¦ô±DfVóÔ¼¬Ž¡üÁæsÿsBu)ÐØ¿/–Í/s3í^X5x³íöu|#ù{ì«÷  @€ @€Z[ û_,‡ Ÿ/¾øb¡•Â-öÜ|óÍ…™zµÞ½æškÒW¿úÕšQ­çó×n¹å–tå•Wæ/Žo¸á†tÕUWU¹Ó.¸à‚êý$c ÐZå?øAzcŸ¿FJ´yï½÷V¬Ñ íᇮ˜å߯Vô냹sç¦o¼±_a`„€7ÝtSå½r}ùóE‹¥Ûo¿½ÐßØÇ°7ãèK­²bÅŠB=µžq@« ëZã‹L;|{J©ë?yíé±iÖñ)Åì± ?ëš96ï³ï§¶‰»úÕÙÉÅ̼®¿B˜±7tä»ýz¸<œìNºdwê²,÷þÀ”Òà‡vý™•8Z¾GYÞ9 @€ @€ÀÈüeëÅç¾ûî+Ü=ñÄ çõN"|š:ujŸ]¼¡OÌòŠw-ž~ú齆IQW,íùñ@`wHY~~o?Vü|û¥ÞÿgÇ–g»÷Íëš=Wn­öùHùµGç* @€ @€ÀhØïËcFÐsýõ×W–IÌ£Ær‡S¦LÉ_êq³ºòKPfÄ’Ž—]vYÊöÈ‹e7c_9Œå7cfZoíÄ’›µ»‹.º(}öÙ•&£þò̲˜u׊å+_ùJÍñ\~ùå餓Nªvù¡‡ê1 /–ÑŒ%Bë-Ýùýï¿ú~vK`Fè™}‹˜qxÿý÷×ìC¼Am|û¬|÷»ß-|·XÞô¨£zÎLŠ} T`Ò´iöIRûº1Õýí¢®CŽíÿ,»¬³Žß›Þx®+djÿ‡)iöÇúž­œ½ÛÌŸ§tp»Þ—Ú&ÖîÍøÉ{«7v¿Û5ã®zá×Ûßœ\ @Ë÷jwí-85}øS=g¿óÊ•ïïE˜߬?e¸~þŒÑ³ @€ @€#[`PB»zÊ%›5kÖô†â¹fêCùz¾öµ¯åOS„u?úÑÒ‚ ×ãdùòå•%1˳òb?¹zmE X+|«µo\Ô×]w¥K.¹¤GÛ­r!f–gØå÷áË÷3LbV^„|ùwb¿¹K/½´fÐyÇwä«H?ÿùÏÓgœQ¸'Ë–-«Ôû×{ì±…û æ¿G„‡ù°õâ‹/®ù} •8!@€À" k_Wükoêû1M¬Ôæ¬ã;ÒkOO«,!ûãíÙ55Íýµ­¥Z†þtÒôãºÂÆWŸlK‡ž89mëL{vŒIm“ºÃ²IÓ»ÍgVŽK“¦PX"ó­—L¿úQѳ¯ÑÄìÄîv¿7-vÒîmìÝ3&½ûz[¡®¸×ß2\¿GÇéy @€ @€‘+пm«ãAO£%B¤uú*@Å̯¬D`÷÷ÿ÷5äì™"pË/á3¹ò!Qölü¬5s¬V`—½³téÒtÊ)§¤˜µVkv^ö\³~†m¾Üyç)ú\¯D€vÛm·Un烻Û/^Üãµ|¸3ìjvÙKQw­:²û~ @`¨ººî¿öæ}öýÔ6q×>ucÞy;Ó†û'V‚»å^XËMÖ_rrŸ¤—coº™Çt… ½ü‹îþ~ô {Ò¤i] …ÍG¿0¾¦­»3öœÞ£±ß]„z”Ì+f<¶¯ën7ÿnÔ7eVÿ—,:²úc/¾áò=òcwL€ @€ 0ºzß\fm"芙wvÑìÝwß]hý¿ý·ÿÖk`—=äŒþõØåûþÇüÇùÓûöU"´Œ™z †‹@„PÔeå tïs—]ËÿŒ%£d{×åïeDZ—Ú±ŸÛ‘"hª÷\\ûŸøÃÙkÕŸãÚº—ªÌWøõAþ^þx ÏE¸•/_ÇÄ©ÝKbÆõƒŽ|7tÉîšcûÈY{Ó'ÿhGÊï8v\÷x²zóý ¯?ÿNÅ#»Ÿý §h+fÌÕ*ùzòÇùg÷õ{äërL€ @€ @`¨º§ìç–c?µZËZÖk6ö¨Ë—Þfu埋ãüÞmqþì³Ïöã"pÊÏ‹çÝ7-f‘Å2Ž­4ÛîñÇ/0|ùË_.œ÷vR*y䑚Ç>ù`ðw÷wÓŸÿùŸ7¦Ö¬ÐE ’@£ËNƾs³?ÖX£óÎ}'Í;·ïg# œu|üIiÇ;“ /´MÜÛël¾˜õöéâJÐ…÷³“Á~.­yçîLG-š˜ö¾ßµ_]½=ä¢í?_[y\ŸþZýýçjõ=‚¹GOH{vvý·CöEŸz+µê©õü¾|Zõ¹F€ @€ @`¨%´‹jùû”åøKh¾øâ‹•½çòÏÖ;Îj±4f­}óê½íç˦M›ò§•ãX3_bfÚìÙ³ó—†ÕqyŒO<ñDŠ}Ròöù÷¯ºêªÂwoüñ<Ýxã–ÂÌC9&@`Ô Ô ¾Z¤¯ ,ßïÁ[„kmó- þñ`÷yð{¨F @€ @€ÝƒÚ•÷Œ‹óX®1~æÃ»wŽ:꨺{ÌeÝŠýìò%B¤þì›—·Þñ+¯¼R¸uÈ!‡·ÛI9¨ ëÁ.1Ûñšk®©„tYÝñm.¸à‚ÊiÜûÒ—¾ÔcVcö¬Ÿ @€ @€ @€µöÛžv1kíž{îé±÷[„oè5»”g¦-\¸°Ù]í/_¾<•gVf pþüù•eFËÁköŒŸ @€ @€ @€=ö[h—5õãÿ8;¬þüþ÷¿_=ŠƒéÓ§÷ÙÌ[o½Õç3舔›7o®Ìº«e³+#½öÚkkÝv @€ @€ @ $0(Ëc–ê,œÆŒ»˜™•_Þòºë®K±?Ú”)S ÏÖ;‰=í®¾úêz·{½ÝÒ¥K{}&n>ÿüó}>3œˆ¥*c)Ò”sÎ9§Ï×â»Æ¬»¯ýëiÕªUéŽ;î(,…dKtÆs û.0mÚ¸´£cBš4e×¾W¦ @€ @€h)ýÚÅh—,YRíâZ=‹/®‰qØa‡®zè¡}îƒWx¡“SO=µðÔ–-[ çÃíäè£.tùüóÏO±Ýþ.¼ÆwŒ?±$æW¾ò•´zõêj³Ü]|ñÅiÁ‚Õk˜ÀAÓ&¦Û' íÆç-ˆÿ0 þ… @€ @`èöûò˜1¤˜•uùå—F×Ûì«x>_ò!Pþú¾O:µðz´ÑÑÑQ¸6X'åå9zè¡ÁªºZOyVÝSO=U½7TÌÝÿý=ö1|à†ª Ú!0¢úÀ¤ÊL»=Hƒ#@ ©ñÄ   @€ @€ ½À„v1¬ò•’mذ¡îˆË!ßÊ•+ë>;óæÍëñZÌþk¤´··§»ï¾»‘G+ÏwÜq…gcÏ·FJø4úlyIËï~÷»41èÏÄÌ»?ù“?)Ô»fÍšÂyþ¤ûìê­­[·VöS+Ï€{óÍ7«Ï”¢Ý˜³Ò²ïG8ËsfaÓºuëÒí·ßž=2 Ÿ¨•ǾpáÂÊþo_|q¥Î}mçÒK/M±,f~<Ñïø³hÑ¢tê©§¦ü̶zímÛ¶­î£îìäâ…Ø«¯ÖržñÝë•N8¡Ç­˜™xà 7¤Ø ðÅ_Lî®X±"]qÅ=žuÀh˜rÀ„ô‘#§¥×^ž‘fQÿÿ·&c%@`ðÞÝ:9½¿kRš{äÌÁ«TM @€ @€ ih7{öìJØ–v–/_^3´‹Ä>xO<ñD%´)(êÈ×S¾Ÿ—÷LË®g?ÿú¯ÿºæì¹ÞBºÀbF^Ì k´D VíâÝ ËAcVgÛ‰ ³Þxzk'k/û¡e#¥7£ìýßâ»×+\Ö*ý±­õ¾kFªÀ¿úMýÄF¡ÝHýÀÆE ‰ÿôì‡Ò9§5¾4w»ªi @€ @€ÀˆÒå1Cð²Ë.+@F˜Tk9Æì¡õb–Õ@JÌ¢ûÒ—¾Ô뫱Üã}÷Ý×ë3ù›1»ìþûï¯ÌË_ïë8µõë×÷õXõþ@Û‰ñD;ø ¤Ô Ùb–^Ë5×\“–-[ÖëkáÒÿ^+s“À(¸ð3ǧ÷^=*½ýÚ´Q0ZC$@`¨vtLHíÏÌK—^pòP5© @€ @€’@¿C»Xºp_J­¥0c¿´ÞJ,‹¸yóæ!P#%¯‚" Œ«¯}Š +–ܬW"Œ:o»í¶AÓ@Jô%ÆÑ[;¶ýüç?¯ÛN#þÑΣ>Zéo#á]<a]ô­^ȶ`Á‚†¿AŒoÍš5)×FJø÷ÜsÎ9Tå£BàË¿szÚø·c5H†Fà¹5'¤}ÎÂËð* @€ @€ÍÓÙÙÙÙߦW®\™6mÚTy-f`E ÓŸaZ>¨‹=Ïz[B±\÷ÚµkÓ+¯¼RíCÜŸ3gN:öØc éÊõåÏÛÛÛÓã?^­;öU;å”SzÔ{Ë-·–»ìïžk¶ÓÑÑ‘~ðƒTºc¬zæû_ë8ÚŠ@î±Ç+ÜŽo7þü…µ¾AìOõõç[:”RŠzóýÈïW¹NçF¢À|ãîtèÇ´LæHü¸ÆD`ˆb/»5÷~&ýðÛ—¦ ãÇ qëš#@€ @€ÈÚe/æŸûÚf;c'@`ß:¶ïJ_üúÿ“>ö¹ÓÓßÛ÷ Õ@€À¨ؽs|züÞ³ÓŸýÑ¿IGöQi`Ð @€ @€Vè÷ò˜­Òqý @€Àhˆ%ìþó¿¿(=qÿ§Sü£»B€<ñÀ'Ó——|F`7<ï @€ @€Y@h7È ª#@€ÀP Ĭ˜«þÍ¿LwÏ™)–·S Ш@„ýkV~*ýö¢³Ó'Ýèkž#@€ @€Ømû±nU @€À~øäÇ>œ>èwÒ7n˜ŽYôHšyøÖýÜ¢ê îò?ùà§ÒŸ#°îSÿ  @€ @€% ´QŸÓ`sœ™nþã‹ÓWoœ˜^9èÅ4ïÔ_¥ISvF c&@ ˜]÷üÚcÒ/ÌMråâÿ¿C!@€ @€h¡]ë| =DŽí»Rìù¥-3>p@ºãOÿtσÿ;ýàG‡¦ðBšs‹éÀéïã$@ ŽÀŽŽ éŸÖÏIÿø÷óÓçÎ81}áÒþެcå2 @€ @ ™B»AÒŸ>}ú Õ¤š}ø»'6¥¯ÿ—‡Ò7þèìô‰ßœ³¯ÕyŸÀ°XrÎo¤ß:ãøôƒû~™~úϤqv¤YǼœþЖJ€'ÄVŸSg H –¿Œ?o¿6-½ºñ˜ÔñöÄô™O“nøöiº‰z‰ @€ 04c:;;;‡¦©‘ÕJ{{{ºçž{*ƒŠÀî /LS¦LYƒ†£‰Àîÿ¼é´k÷ûiÂøqé?}õ\ÁÝ0üŽº ÷áÃÒ¿üäütÂÜC[²Ÿ:E€ @€ PÚ=œ c7ßÞž>õ+]6 Á]&á' @€ @€ ÐÊc[¹súF ?±§×5¿zᕘq3ïbžB€ @€ @€hU¡]«~ýÀ¿úôü´ìËgÞÜ8œ @€ @€ @€-( ´kÁ¢Kû& ¸Û7?o @€ @€ @€C/ ´zs-€àn5A€ @€ @€ š€ÐnÐ(UÔj‚»Vû"úC€ @€ @€ÔÚÕ“q}DôÜm|é1Fƒ @€ @€ @€†¿€ÐnøC#èC ^p÷Gò?’à®<·  @€ @€ @€!Ú ³Fš-ÁÝU—œVèFÇöIpW qB€ @€ @€4I@h×$xͽÀïœ÷é÷{A¡aÁ]à  @€ @€ Ð$¡]“à5Ûßÿí…‚»æÐk• @€ @€èE@h× Ž[#S@p72¿«Q @€ @€ @€á, ´Î_Oß, ¸0  @€ @€ @€ý ´Û¨ª‚»áñô’ @€ @€Œ¡ÝhøÊÆXW@pW—Æ  @€ @€ @`„vCˆ­©ÖܵæwÑ+ @€ @€ 0šÆtvvvަ÷±.ú··†°ú‡_.œ;¸Àÿ«5é¿ÿÕÚBS˜˜þËõŸKsœY¸î„ @€ @€ 0˜fÚ ¦¦º†µ€wÃúóé< @€ @€ÖB»aýùt~°wƒ-ª> @€ @€ @ ¡]#JžU‚»Qõ¹ – @€ @€´„€Ð®%> êS[ IDATƒN´š€à®Õ¾ˆþ @€ @€ @€‘- ´Ùß×èöA@p·x^%@€ @€ @€ú% ´ë—‡G›€àn´}qã%@€ @€ @€ÍÚ5Ç]«ÃH ^p·ì¦Rû–mÃh$ºJ€ @€ @€´ª€Ð®U¿Œ~µ”@wKŸTèSvWýéÿÜTœ @€ @€þöîÎβ¾ý“d2“{HÁ’ „‚baSŒ E¬ˆ¢ÇJw±­Xi½Z‹ÛRkµvËñÓzi7íÑO+í¶+öˆ—rQ©V.X® …HBÂ%÷ÉeÎ翆wÖeÖZ³ÖÌšuý¾ŸÏìYïû>ïsù>Ëš_žç%@€ŒG@h75Ïô¤Àï½ùŒôê—W4vÁ]‡ @€ @€ @`œB»qÂy¬7®º|à®7§Þ¨  @€ @€ @€À¤ í&•WåÝ( ¸ëÆY5& @€ @€ ÐZ¡]kýµÞ¡‚»8Ý&@€ @€ @€m* ´kӉѭöܵÿé! @€ @€è¡]§Ì”~¶¥€à®-§E§ @€ @€ @€@Ç í:nÊt¸Ýwí6#úC€ @€ @€:O@h×ys¦Çm( ¸kÃIÑ% @€ @€ ÐAB»š,]moÁ]{ÏÞ @€ @€ @€vÚµóìè[Ç î:nÊt˜ @€ @€´…€Ð®-¦A'ºI@p×M³i, @€ @€ @ 9B»æ8k¥Çw=6á†K€ @€ @€&( ´›  Ç TÜU’q @€ @€(Ú•Š8'Ð@Á]1UE€ @€ @€ºX@h×Å“khí! ¸kyÐ  @€ @€ ÐÎB»vž}ëî^qæÊ¢ñlÚ²=]õ©ï¦»‹®;!@€ @€ @€zO@h×{snÄ-¸ê÷Ö¤—ž²¬¨õõžJïþè7wE*N @€ @€ @€@ï ízoθEýÓ§¥?»â|Á]‹ü5K€ @€ @€ÚY@h×γ£o]' ¸ëº)5  @€ @€ С]CUB vÁ]íVJ @€ @€ @€^ÚõÊLg[ îÚj:t† @€ @€´\@h×ò)Ð^ÜõêÌ7 @€ @€- ´mâ ¦ îšF­! @€ @€ ÐÖB»¶žëÁ]/̲1 @€ @€ @€êB»ê>îhŠ€à®)Ì!@€ @€ @€m+ ´k۩ѱ^ÜõÚŒ/ @€ @€È íò>h¹€à®åS  @€ @€ @ %B»–°k”@eÁ]ew @€ @€ @€@· íºuf«£w=}:O€ @€ @€êÚÕMæÍÜ5ÇY+ @€ @€ @ „ví0 ú@ ‚€à®ŒË @€ @€ @ Ë„v]6¡†Ó}Ypwêê¥Eƒ[¿á©ôî~3 î;PtÝ  @€ @€ ÐyB»Î›3=îA,¸;vÅ¡E£àî?õÁ]‘Š @€ @€ ÐyB»Î›3=îQ9³úÓg>|a* î~r÷c‚»ýN6 @€ @€t€Ð®{æÒHz@@pדlˆ @€ @€ ГB»žœvƒîdÁ]'Ïž¾ @€ @€ @€òB»ò.®hkÁ][OÎ @€ @€ @€º„vu“y€@{îÚcô‚ @€ @€4B@h×Euh‘€à®Eðš%@€ @€ @€ Ú5Tuš- ¸k¶¸ö @€ @€ @€@ã„v7U#¦ îšN®A @€ @€ ÐP¡]C9UF u‚»ÖÙk™ @€ @€LT@h7QAÏh#Á]M†® @€ @€ @€:„vu`)J w0KúH€ @€ @€Š„vÅÎt…€à®+¦Ñ  @€ @€ @ ‡„v=4Ù†Ú[‚»Þšo£%@€ @€ @€ÎÚuöüé=ª‚»ªöiÛ3»X»ª @€ @€- ´k´¨úh‰@/w±æïþñ—ÓÁ¥ßK«Î¼'Më;Øs Ð+NÚŽ<ã{é÷ÿôËiý†­Ñi½$@€ @€ô €Ð®'Ý t«@/w±ÂîƒñÏiåšMG÷_Ý:•ÆE€@ƒñl:ýõßKúÌõ¹÷`6¸zÕ @€ @€4@@h×DU Ð>ÝÜ}ð/¾‘–¾ø'éÅϵºž Ð3æ ¦_:ÿ¶\ðÿÀA€ @€ Ð^B»öš½!@ ÝÜ]û·¥}sצ¥+o€’*èEü#øà @€ @€h/¡]{͇Þ Ð n îâ=T7ßõŸé˜—Ü× !Õ Ыü?µûçéw>Ú«ÆM€ @€hK¡][N‹N Ðn îþ×W˜Ž:ýî4­ï`#hÔA€@ {æÏÒµ_ýa+> @€ @ ½„ví5zC€@ƒº!¸[{ßé‰g6¦#}²Á:ª#@ Wb›ÌY‡oL×ï?{•À¸  @€ @€m' ´k»)Ñ!-ÁÝ[ßpZQµƒû¤?üÔwÒOî~¬èz;žüã÷þ#ùK÷·c×ô‰8â„õé†ïÿ¬ƒG ë @€ @€îÚu×| Þú†Ó;2¸‹pñŽ»ŸHG»¹ÂÈ\&@€ÀøñlzêéíiÓ–íã«ÀS @€ @€ Ú5”Se´³@'w±5æâe»¼Ë®¿XúF ƒ^pô–ŽXqÜÁĺN€ @€¨Y@hW3•‚tƒ@§w·Üñ`:ô…t½1 І‹V<ž~¸v]öL— @€ @€½' ´ë½97b=/P)¸»ú37¥õ¶¶•Ï–§w¤sÛªO:C€@÷Ìœµ'm}vg÷ ÈH @€ @€, ´ëàÉÓuÆ/P.¸Û±koz÷G¿ÙVÁ]üezü¥ºƒ“!ÿ(à™g÷NFÕê$@€ @€¨S@hW'˜ât@'wñ—éVÚuÏwÎH´›Àìù»ÓS[÷µ[·ô‡ @€ ГB»žœvƒ&@ h÷à.þ2=þRÝA€ @€ @€Ý- ´ëîù5:jh÷ஆ!(B€ @€ @€. ´ëð Ô}# ¸kŒ£Z @€ @€ @`|B»ñ¹yŠ.Üuá¤RÓvm›™»}nî'>; @€ @€¨M@hW›“Rôˆ€à®G&Ú0'M`ïö©é‘›‡ⳃ @€ @€Úúj+¦zG ‚»8¾ðµ»F½c×Þôî~3}æÃ¦cW,¹Þîîù§yiëCS*vsÑÊ¡´xõÁ4ïÈýiƼ=˹Q›À¦ŸÎIë¾=­¶ÂeJ}îÁ´ìŒíeî¸D€ @€ @€@· ø'ðÝ>ÃÆG€À¸ºeÅ]µÀ.`âþý7LK?ù«´õ¡Ùã²òP^`ÿ`å€4_ʧÉØóÜŒôô†Ùé¹ÇgMvSê'@€ @€ Ð0+íF©"ºM ›VÜÅܼô{G¦èàþ))¶.ÜòÀ´´iípÐtÏ?õ¥“/™¬Ø9R·ú?q0Í9¼ü­?ûJþúÉ—ì/[ñÌÊ^w±>-LÏmÑO½ìƒõ=«4 @€ @€V äÿ±U=Ð.ÚX ›‚»Òí/g-LiÁŠ”Y6'·Ú.¦áñ;¦å®µñ”´u×úgïMñSþ˜?rY0:Bá @€ @€Ï ØÓWcDp÷ºW¬.*•½ãný†­E×;ñdñê)ÞmGl—[ : @€ @€ @ ¹VÚ5×[kt¨À•oý•4¸ï@úÖ÷AÜ}æÃ¦cW,¹Þ‰æ/Êvµô}pç@ÚóÌ´t`þýms¦Y wùx®ú‹úv>•ÿãhö¡û«¬V+n¢´/Óú†Rÿ܃ŅRJ…í•Þ,m?î×Ó‡Òú&ë|ÿÞþ´ë©¾"÷é3¦ó¤¾ÁªÍ–sßµmfn‹Ôx0Üfº¿l=ñìî§§åê¯V®jRJ¥Îµ~g¢Þ›g¦}»óÿΨҳÙ8÷ïÉ÷&»–]é8XvœÙýF;—Ž»ÞïVéóÑÏzëÈÆæ7 @€ @€@{ äÿ–´½û©wh¹ÀU—¯Éõ¡[ƒ»±€ŸÞ0»èx¥å­œžŽ:g_š³¸|xÏgïu{é;RŠðdÓÝ#ïË××—Ž>·?-;c{þRɧVÊ?[RðùÓS/𿱫èf„!OÞÛ_¦ý(6܇xG]åí.‹ª›´“ZÆzÂkûS¬˜,w”ºO6”¿«?müQ>~nz:ù’é#ï4¬Ünq¹rm–^{ìö¹e—ŸÕ—Ž8­²qô=¶l ¥Ç’S§§£ÎÎoGúÜã³ÒÚ릗K?ù«¢kñ>Á+F‡œ•Ç›¼ç‰|¿£ÅNù~æu|"@€ @€˜¨€Ðn¢‚ž'@ §º5¸{vc>‰°¡ôØ|_þ½wqoùYSßó»hƳª ÿô§—¼=¹ênpûÔôÐwûG˜%§¥ÁíÃÛsFýÜÒÜŠÁÝßœ1òlôeÞÒƒipGôaêÈõÂ1L>¼ýgv-‘u7Œ”-Ïî§§¤Mk§äúðìÆ´ê‚ԲழŸ…ö™{\»ÿ†iiçSsÓ Ï©tF¹pßøã¾¢q†wªžöÖ™©öÁQ>¥å^òö™cÎóž§§¤{î˜7Ò^ÌsáGÔ¹só@:þ¡Q«ß"„ËBÞØ¾5VƒÎZx0íÚ6577QÇaÇ÷ÌMéç¨ñÿ™ ç‰|¿KûÓ®ßÏy#@€ @€¨Q@hW#”bȺ-¸‹@.[Étô¹å·<0˜rï½;â%ÒÜì+ X–‘Ra¨÷ä=}é…çdZåg+¢¢½%§ì©¯pµTw‹Vކ û{Ú[‹Vö-yQJ?ÿÁÜ\AÏñî©»°'Y`eŽ>wߨðé' ¯Ú —X·ìŒ½…7ísÖÏhð„×(ZMî±õcE6cÞœ´äEåWÜE™ûªWH‡0˜³‰¹*œ¿'ÖN P«•û¯;¦§Uç—_U™­ûö𶚥ó|ÔÙ#«ýÂ8VM.;£xõ[„‹qDзêüç²*SlDcUx±MfvÄ Ï—}p¸?…+û^öÁg³"O–sé¸kù~G'³þ´û÷³"¨ @€ @€ã(Ýk\•xˆ½&ÁÝ«_v\ѰãwW}ê;iӖ꫊jâI<…? ¬ûμÜ*­èF¬æ©´%e<€-X±³l[3f«¨ WdU^„PÑ^áûØb ËØ¾0;â0/P IDATvüb8ôÉÎãw¬*‹#ú[n+ÎØn1Žƒž}lôv‰[š=R.?sÿ¨À.ž~D`G„‡±ò©ÙGa?£/å¶¿Œwõ­º (FH6V_Ã=‚½B÷Âù‹laWK¹±Ú ³ró[Žfó”Çö”ÙßÓèC‡?<Ù½ìw|kybV¾ÒïÉtÏ÷»°?íüý¬äé: @€ @€Àø„vã·ó$=.P.¸‹Àî=úÍšƒ»õ¶¦ O<ÓÉx·WáO¬ºÊ¶)Œ`¥ÚÖŠð†<å:<÷ðüª§]ª•B¨x&VòeÇÞùm;ãZaHtÈŠ|{Yùø]øºØJ±ôØôÓáÐ/BÆÒ÷Ü–¿<í|ªù Ó³~FŸ"4­tÄxcþ²£Z_«¹/:&_G­åwŽöÍú¿«ÕýŽûÙ±«‚ñÁü×!+ÚÐßÍv®öýŽeýi÷ïgC'Ae @€ @€9êÛ‰ª L$¸‹ÀîÝýfºéÇUmc²oFàSnW½íÎXPüÞ¸jÏW+[-ý?.LK›;jÄY`[jÞôã‡Ó[ßpú¨2¾ðÒwä·QÜ¿gJzôÓsÖðv“sê î"ìˆ: C›jaQ£ÆaNJÃ[U>³ajZ°btÍ…«üf-̇BQ²0ôÛþäÔ´ÿöÑs3ºÆ”ö67´ Ûì(Cv½ðwaÀ¸»Éca?êýÜ_ðNºÒU•ñþÄ­ ÿgJ¬ ]~ÖÜtØñûËn‰Zo»Yùvsî”ïgæç7 @€ @€@c„võT=*POpWØWl?+–2©zÃ×óMÌKé¤7îνÓ.¶ÈŒànÖ¡3«"ñαm÷¼SnR;[¥òXýwç-:fVÑ—ÑÇM?Í/›½8¿-ªÜýtþyÃ[ƒæÃ±*M¦¾þÚWV«§Ö{…ahá ¸Jϯޚ™»ãZu~¥’s=ÞYwê¥³ÒÆ÷åÂå˜ï?êO‹VNO‹W¬+d®4êvsî”ïg%O×  @€ @€&& ´›˜Ÿ§  0"PKpWØeÇ™ÍXm—µ—ý^~æ`Ú´vxåÚk§§UçïÎnýŽÕk}·dkÉxßVl/Y¸ýa¬´{äæü‡E4ðdÞ‘ûSJÃá[¬ÀZrê¼4óù­9 Ûp¯(¨,éCl;9§à=|%·‹N ßCVt£ O²m%Û°kuw)Þ9xÒSzzÃì´åi¹w0Æö¦[š–6ß7/­º`oÑ; ën`L¶s·~?'@îQ @€ @€@× íº~Š f T îÞ}éYéãÿëÖ[b–ÍÚ"³´Ýµ–œÚŸ Cb…ÖÒS˯¶+ ìN½t_Ñê¶|³SJ“Úe+é"8Œ>—®˜‹0%¶VŒ•Z¥Ç̱ònø¾Øv²\™ÒgZq>­/¿²oßαWƻвcþòü³ÙµNø]m5cÌSl…ßÏ—cÎ#¼{ô¶´êüÑÿ{ªu¼íæÜ)ßÏZ}•#@€ @€¨O@hWŸ—ÒS RpwÕ§¾SñÙfm‘Y®KOÝ—6­íÏÝzúѾ4gqq©XeI±z-V?µêصmfn[Ìhÿè5{ÒªóÓŽÍ3GÞ­70÷`šµ°üjÁxfê´| µkÛÔ´¨U£ÝÂw½í|ª–Ð.–ÌÉqŒfZ~»p;ÈÂU›•:6gñîçWƒÎ l:{`Ü«íÚ͹S¾Ÿ•æÇu @€ @€‰ äÿ–obõxš "¸{õËŽ+¸2öÇØ"³G!ÙV±½d¼®ð( V¦Ï®ÜWøÔä|Þ»=ÿÇÖð{ÜRî=|Ã+±vV ì¢Gý³÷urz9ñZ ·õŒw¹•ÎIi ¶fÇœ¿Ç/»ÞŠßÏlÈÏW¹öcËËì^e–Uÿ}Øñù1;î @€ @€ŒW úߦ·VÏ @€@ŠàîœÓ_X³Dl‘Ùªcñêƒ#Mo{¸8´›>3¯ÒV±nÓOóÌHe þPØ—Ÿÿ`nzîñY)V–þTk¶p¬›~:§ZÑ–Þ‹UÙ±éîüö—Ùµìw¬4ÌÞå[†V[i˜=Ó¬ß8Æ<•;b6M)ú] ÅvŸÕ¶Âïaáê´¬ÂÕ†ƒ;«/Û͹S¾Ÿ™µß @€ @€ÈÿÓüÆÕ©&H)­ß°5ýÇ}OÔlÑÊ-2ç¹?¥4nl¾ojZ¼:ßíóóáÑý7LK3æÏ*Ú"óé ³ÓϾҜ?NbUàò³úr[dF ?åtô¹ÓaÇï+ ƒ¢ìâÕ;Òæûæå¶ü\÷íiiÿàÜ´håþ¢°+£=ÏNK±‚-±—}ðÙòÍLâÕ…Ç ¦E+‡·&Í­€Ü37~R¾Ÿl=³azŠ9ÉŽågfÛâw¬àŒ9Ú·{^ŠmXã»tpÿ”´ó©¾¢ïLÜ+<²û'¼¶?²b_Ñö—ñ}ËÆa_¬N+=fÌ yØåÉ{úÒá'ÍLSû†Òþ=SRߌ‰ݜ;åûYjîœ @€ @`âÍù[Ö‰÷S è(ìÞýÑo¦»F Õ[d¾õ §W+2)÷b•Ó’Sûs+Ÿâýu±z+²8bÊ“/™>²¬½nzJiþ¨~ÄŠ¥,Lu³–¼h_Úø£Ê+ϲ¦"èzäætÚ[§ŒŒ%»·ê‚½iݹàn¸\¬.,^a˜•mÕïp_uÁ”‘~‡”•ûyÚ[G”­ê{Önæ+ê²÷&f÷²ßñ½É¾kÙµì÷ð÷)·Ù¥‘ßuvùÿ}Å{­fónÞ|Éþ4cÞH5¹ïw»9gnñ¿Åvý~æ}"@€ @€h”€Ð®Q’ê!@€Àóã ìâñØ"s2B»X‘4Öñ‚“ö§Mk#Ki×SÓÒœÅù'âq§^:+müq_.èÊßI¹m‡Ÿ8˜†ß-6¼º©Ü–…Óúò}(ü\XWö9™,JX]öÐw‡ÃµågL/4üŸ‹VîO³î×Uϸ÷ @€ @€íZ€®I Э{·OMÜ<¼ÎáSÓ¬…ãi£ê_ëž"@€ @€ Ð|¡]ó͵H€øùæ¦?Êïð|ô¹Ó²3¶«ÖÍ÷ÍI÷ß0mäÙågL/_*Võí|ªøÑهÊUÅ;æâ˜1oÏH¡]Ûf¦½Û‡ÿÍ´¾¡4ëÐý©o`pä~ö!žÝýôp¿«•‹òY;ûóÍŒ\Ëêë8X¶¸ãÛóÌ´t`ÿ”¬x˜{0ÍZ¸{伞¥^©«\»¥õG™Zæ£\]® @€ @€h…@ñß6¶¢Ú$@€Xò¢iëC}¹ànÇæ™)‚¼jG”‰/ŽÃŽß_­è¨{=yozäær‹ÕûÒÑçö§ÃO,Þ=½avîýrQéKß‘ÒÔiCéñ»úSlíY|LO'_2=-X±3w9¶òÜt÷@™6‹Ëeu<÷ø¬²ï°ûÉ_ozò%ûÓ‚Åá`ôqËÓÒ¦µù°.«7~G@zÔ9ûÆ4.|æ±Ûç–é{JËÏêKGœVÞªðùjŸ'2Õêu @€ @€@³„vÍ×4\`þ²ØÞrø´-ô¥9‹«7ñô£Ãe­ª+|Š€hÝ#_¬Òë{~Þî§§ä‚®óžÝ8V]ÊwYÏ·OM<4Ƶ¨«0¼ûÙWúÒio™úgÕfi¹—¼}fÑ ¸©Ó‡²fêú½ù¾9éþ†Wòe}ÊÆ÷ìÆ)¹qGعõ¡þô’·§¢6Ë5´çé)éž;æx-9u¸_Y ãØ¹y áPÅåêÍ®5r>²:ý&@€ @€ Ð*¡]«äµK€ ˆ­$—Ÿ5 ½":â´ŠY=Ù*¹X¡WÏ‘vö}î¾Q¡Õ N^áÁV¬Æ[vFl»YþX{Ýð;õV½ê@:ì„Á\hõÂsR* ΞX;= nO¹Ð«Z¹ÿºczZu~~ua¬4|Ù‡Ï W¹½ìƒÏ–ïÌóW ÆJº¡tÄK¤¹/ØW¤-;£¸oOÞÓ—¢¿ÕŽuß>÷`ZrÊÞ‘úŽ:{`d…aXÅ*Âeg¯ø«Vov¯‘ó‘Õé7 @€ @€V ”îÇÕª~h—LH p›Ëg6 bå*,¼7¼B¯\©Ñ×¶>4{dÅØò3÷ ìâ‰yGìJ®ÅÁ`„ÕŽ^{ -yÑŽ‘0+Ê.^½#®H‹P«–rcµU­Ù½¿pOn[ÎrïÔ+ì[áj¿ìùr¿£ïËÎØ^4Æx‡`l‹™a[€ÖsLÆ|ÔÓ¾² @€ @€- ´k´¨ú @ %±º,V‰Å±ù¾Ê¼e÷bõW¹`ªRç7ýtxÕXjÎU:æ/ϯÞÛùTåíîEVîXtL¾ŽZË î¬<ærm”»c™Ì=üàÈ£{ž{~oБ+Ūõ=‚»,àŒ§vU±*®uø¬ÑóQ® × @€ @€4S`âÃ×ÌÞj‹TX¼z8PŠÕi;6ÏUò¹Çg¬–[pÔþQ÷+]ˆUlQg…¡U¹òý³ó[¼Ó­Ò1cAå÷ÎM-X(Xk¹}»›óGzµþ”Žu¬²³+Ÿ­½ÿ“1¥}wN€ @€ @ Ù•—4»'Ú#@€8dž”ÒðЏ-ô¥9‹‹+üÅ=ÃìÅŠ¼X™Wëqð@>|ÛþäÔ´ÿö¹5=º0ÿ\M´Y¡XI·Ï”Tîx²öpm¬áôÏ͇v{wÔnÕ«ó1–§û @€ @€- ´ëìùÓ{(ˆ-—ŸÕŸâ}kñsÄi)®ÅÔ¦µÃÁÐ’åWÃ<^ñãƒÀ(0\GmS_åÕtkñx·Ü¶‡ûÓý7äÇÜâ.j¾—æcÔà'éÂüÍ´ô/œ¤ÚUK€ @€ P‹€Ð®%e @ câ}pØÅñ̆éiñêáÐîéGòäÍ_+òÆwÄ»ðæ¼×­Z-s_0þvªÕ;Y÷"Ø|è»ý#[Æûûb;ÐÂm.c¥Ý#77nµÝDÇÒÍó1Q›zžßô‘ÚÕ¦, @€ @`òƒ9 •«’4[`Þ»RJósÍn¾ojZ¼:¥X=¶îÛÃ+ÇV½ê@ꬫ[3Äʼá?2g-<˜¬ØY×óR¸0°;õÒ}iز´÷³SJíêY•Ø+óQ*ïœ @€ @ »ÿ·nÝíet Ð'¼vxûË­MI;6ÏLÏ>6}¤×ó—×·5f<8uZ~›Ë]ÛºóÎXe^q„_ùÀn„±! ·¹,\Í7Vå½0c4úþª[þ¯FW©> @€ @€:ºóoëDèôâŸúÔ;}úO€† Ì;rÿH}[èK›~:¼Ê.¶{œµp÷ȽZ?Ä{ñ­îÚikÈZûŸ•‹‡•ŽÂmúì|HYZþ`;~>³¡úfly ÿÞ¼áÕs¥­•?ï–ù(?º]=mI‹Ö, @€ @€@&PýoÓ²R~ @€˜1oOŠ€.Žx¿]¶‚ì°ãë_e— {ñêƒÙÇ´é§sF>·û‡9ùnpg>$+í÷ô™ùñíÛ9¼â®´Ì®m3GÐÒ{åÎÃþç?˜[îVzîñYiÓÚávb®bÎê9:u>êc3ËîØ±£™Íi‹ @€ @ Œ€Ð® J§]zÍkVuZ—õ—“.P. ›û‚:–‰•ôpñê#«íâýxÝ>7EˆUxÄJ¶ØŽ3î}ÿÃïÕ+¼ßŠÏ3æçø'ïéËõ9¶ÂŒ~Æïì˜1?hÞô\¨–Ý‹ßOo˜îø›þ‘´ð^¥Ï±:1‚»uß™—k/|wäêZ{]~ËÒ¥§Ö?/:•¬\'@€ @€ Ї óV­ZÔùƒ04X`8 Ëÿ1·êURßÀà„ZYuÁÞ´îÆ\pÛd>rsl7YyËÉ 5Ö ‡ãÝt‹VÎËõ9´?Ê÷÷äKö§ó† ›“/™ž~ö•a³áPmtðﻋP¯–#óŠu›ÖæÛ-|6꛳¸þ-K£Ž¬þXIÙ)óQ8övú¼h‘ÿ–h§ùÐ @€ @ 7ò›Ù›ã7jèR¡V½ª?Ū¸8Ï]¹!Ç.eï®+W&Þ¥vü…CiÛÃýió}ùm7 ËÆó±ucá{õ²ûÓúò[U~Îîg¿ ï~Îîg¿ ï~Îîg¿#Üzô¶‘í(³ësó«ðâÚ‚;Ó©—ÎJÜ7jEÝÑçL‡Ÿ8˜ˆ--‡M§NË'«³°áuÒ÷¦Í÷Íô…Óò3÷§Ë…õ~.,;Ñù(¬«×?÷÷—U{ÝÅø  @€ @€Í˜2444úoÜšÙmÕ%pöo\[Tþ¶/_^tî„îˆÿÍÿú|»»ÕE£)Ü^2†Õ7pp«ù&“'¶¦ÝÒ˜ï+[#Æ[cîß;¼+w„}¸5ú(ìs'ÌG£Ç?Þúþ÷Ÿ¿*ùï‰ñêyŽ @€ Ð8+íg©&è1óötÔˆë Ê=¶XùØ70¹\îóäöVí @€ @€Š†ÿÉ{ñ5g @€ @€ @€4Q@h×DlM @€ @€ @€(' ´+§â @€ @€ @€& 횈­)Ô+0oÞ´´gG½)O€ @€ @€& ´ë° Ó]zK`Á¼´g׌Þ´Ñ Ð4øGñ @€ @€­Úµ~ô€2ÃJ»Š:n 0QøGñ @€ @€­Úµ~ô€[07íÞé/Ô+¹A€À„b¥]üã @€ @€@ë„v­Ÿ= @€@E—¬~az扥ï»A€‰lûÅ¢têq+&R…g  @€ @€$ ´k¤j 0ç¼ä…iã‡NFÕê$@€@ÚüÐòtÎéÇ @€ @€h¡]L‚. @ ’ÀœYýéèóÒæ +qãØùìÌt`pF:vÅ¢q=ï! @€ @€Æ íë©64\àÕÿíä´iÝQ ¯W…ô¶À=pdzåYÇõ6‚Ñ @€ @€ÚH@h×F“¡+('ðºW¬N»Ÿ<*=³y^¹Û® @ n=;úÓ¦ûW¥ßºè%u?ë @€ @€ÉÚMŽ«Z  ÐPËßô+iýOnh*#@ wºóÄô¼òô[ð: @€ @€ÚC@h×ó ¨*pÎéG¥CGx·]U%7 ¨E Þe·õÑ#Óůü¥ZŠ+C€ @€ Ð$¡]“ 5C€‰ |êý§õ?8;Å_¸; 0}{§§Î{ IDAT»o|Yú¿ßqêŸ>m=ÅÏܹsS„vL{÷îÍýŽUxñ#Ì늩®{}}}iÚ´i¹ïGö9Â^ @€ @€F í©©.è fâÌtÅt @€ @€ŽðÒ•Ž˜¦‰uòòËÿebxš @€ @€ @`R„v“ÊÛ•ÿê¯ÝÑ  @€ @€ @€²B»²,. @€ @€ @€hž€wÚ5Ϻ¦–6mÙ^S¹¬PµòÓ§¥…‡ÌJkÖ•÷› @€ @€ @  ¦ µa¿z¶K¿ó‡_Kë7<Õñ¿é‚“Ó{.=³!u©„ @€ @€ @`òl9y¶j&@€ @€ @€ P“€Ð®&¦æú7œÖÆú§OK¿yá) ©K% @€ @€ @€“+ ´›\ߺk?çô£Ò±+­û¹Ò^÷Šsï³+½îœ @€ @€ @ ý„ví7'i¢«í¬²kÃIÕ% @€ @€ @€@¡]· @€ @€ @€4C@h× å:ۘ虶Ƭ\q @€ @€ @€@‹„v-ž€JÍw‹L[cVu @€ @€ оB»ö=#@€ @€ @€è¡]›Nôx·È´5f›N¨n @€ @€ @€ªíªà´úV½[dÚ³Õ3¦} @€ @€ @€Àø„vãskÊSõ®¶³Ê®)Ó¢ @€ @€ @€@Äv 'U! @€ @€ @€ú„võy5½t­[dÚ³éS£A @€ @€ @€@Äv £œœŠjÝ"ÓÖ˜“ã¯V @€ @€ @€@3„vÍPÖ @€ @€ @€*B»*8írk¬-2mÙ.3¥ @€ @€ @€ñ íÆçÖÔ§ÆÚ"ÓÖ˜M @€ @€ @€. ´k8©  @€ @€ @€Ô' ´«Ï«e¥+m‘ikÌ–M‰†  @€ @€ @€ Ú5Œrr+ª´E¦­1'×]í @€ @€ @€f횡ܠ6JWÛYe× XÕ @€ @€ @€Z, ´kñhž @€ @€ @€€Ð®ƒ¾¥[dÚ³ƒ&OW  @€ @€ @€U„vUpÚñV¶E¦­1Ûqvô‰ @€ @€ 0>¡ÝøÜûÙϦw¾óå6mÚ”Þô¦7¥Ûn»­¨ÌÇ>ö±tÕUW]ëõ“uëÖ¥3cÑf¿ûOP•ÿö±È[øD€ @€ @€Ô&ÐôwÚÕÖ­|©Ÿn¼ñÆtì±Çæ/¦”>ô¡;IiÕªUé´ÓNkz`Çž @€ @€ @`bmÚÅðæÌ™“þæoþfÔHce™ƒ @€ @€ @€@§ tDhÈkÖ¬e[A: @€ @€ @€tº@_' àì³Ïõn»ZûïÆ»é¦›ÒÝwß~øá‘ÇN?ýôtÒI'¥sÏ=7·¢oäÆÞÿþ÷Ô[uƶ”qìØ±#ýó?ÿsºõÖ[Ó¶mÛÒÂ… Ók_ûÚÜOµ*£ü½÷Þ›ëca¹óÎ;/-[¶lÌþÝu×])ÞóG´ù| ·]fa]å>Go¾ùætÏ=÷¤;ï¼s¤HÔqê©§¦O<±l`ýýô§?=R¾ôÃÅ_\z)wþ¹Ï}®¦í;+õë˜cŽI§œrJ —zÞÛ÷¥/})]ýõ¹>Dß~ó7³¨7ÜpCúá82§1þŠkig²-Š:ê„ @€ @€èJŽ í;ì°º'!ºøÃéóŸÿ|Ùg¿þõ¯\ÿìg?›ÞùÎwŽœWûpÍ5׌܎`'B»Ø®ó5¯yMZ¿~ýȽøm_wÝu£‚¢¸ÏÛßþöQÏdöï–[n) EÙÿ÷O…e#p‹wÜU;"@{×»ÞU­ÈȽ'žx¢($‹¬°½‘‚Ϩt/Îja[„uÖòÎÂ+¯¼2]}õÕ5…­Øö) í"¬»è¢‹J»Ÿ;Ͼ3c}/&Ë¢l§\$@€ @€ @€ºR c¶Ç ýÂÐ¥–Ùˆ@féÒ¥»Ò:"ÀŠUXÕ{D`wÜqÇU ßž}öÙQUFÿ^þò—W|¦ôX‰×¨#V ÖØE›Ú±Š°ÑG¾øÅ/®)°‹¶#8òãy·aÌñÛÞö¶Š]áØÂ)¼*“aQ©-×  @€ @€ @€îè˜Ð.¶,="$«tT[AU陸ÁàûÞ÷¾jEFÝ‹@®Z_F=RŠñTZáU®|#¯Å »Â•‚ã©;¶Ï1wîܲz–}(¥\à«ë Z·lÙ’.¸à‚šÃÜh;¼â;Uîh´E¹6\#@€ @€ @€º[ c¶ÇÌÞ×–MG¼ßnΜ9ÙiÑïØ³\ vÙe—åÞ-wÄGäÊoß¾=ýèG?J÷wW´Ú-¶E¬å=tY£¥[9F;±ýbT÷ߺöÚksï†ËÊÇïø‡(<Í}Ž1žuÖY¹çâBÖ¿o¼qÜïòÕHJé/ÿò/‹.{ì±é½ï}oúå_þå‘ë?þxîoßøÆ7Šl²ïyÏ{Òù矟¦Òàªðýx#…Rª¸eg˜•1Ç—_~y:á„FnÅ6 awÛm·\‹íH?ò‘¤O~ò“#ת}(|6+[mþÚ¯ýÚÈœýÉŸüɨqG™ø^”¶(­ß9 @€ @€ ÐýS††††š9ÌØ~²p›Ë±Þ}‹­ KW†E˜T.@)W>Ÿâ*½ã-ViEèSØFYÕ¶]œ2eJY¶jý*| ôùÒwÆ–ÏDFX)¨,}?]%×lE[VÿXãŒrñL%»¬žÒñÔóµ*·*²Úûû¢ÍxÜ¥—^š5Ÿû]Ͱô{—=߯~õ«eß³Wî{ad¼¿°Ú1‹jõºG€ @€ @€t¯@[oyë­·æÞ1W¦ÅTDÐT)°‹p«´|¬T«:E«´^ÿú×Ìt¬ÞªÚ,øPk`WðHîcGK–,)½\t÷+vEÇ8‰Õ{…Çoÿöož–ý\Í®ìu^Œl…GckÖ¬)¼4ês¬dŒ`²ð¸é¦› OÇüíÆw£’ýÕW_=ªŽXéç @€ @€ @€4Z å¡]lÕ« JbµÒË_þò¢UyÙàÿå_þ%û8êwipsÝu×Õv•XßûÞ÷FÕ_éBlmY)H¬ôLv=¶k¬7 Ìž­÷wé;åbkÐzÞWo{c•qG@š±MæX+Ù²²ñ)<®¿þúÂÓªŸ#  ¶Z÷JÅx¡ƒ @€ @€ Ðh–¿Ó.›ÂÐf¬Æj¶j+¿bu^áqÞyçžVýœ½ë.+´víÚì㘿¯ºêª1ˈp*¶ì̎׼æ5)ÂÈjcËÊNäwiýaÿ–·¼%}ñ‹_¬`M¤ÍjÏÞ~ûíE·ë >KWÈn»ZTi™“Ò÷–)’»tÊ)§ݪô®¾¢BN @€ @€ @€u ´<´«µ¿±2êŸøÄ˜¡Öƒ>XTå;ßù΢ój'[¶l)º½mÛ¶¢óFž|à( í"<;î¸ãr+»~÷wwÌqN¤/±ú°ð}pvÅ ¼Øn2V¯•†aik¬g7nÜXT$V¿Åê¿v9N8á„véŠ~ @€ @€ @€],ÐÖ¡]¼»î¢‹.Jo~ó›kÞ21¶š,<êY}UøÜdŽo±j0ÆWxÄûøâ'ÆþGôGéu¯{]ÃWÀÅûàî¾ûîQïþ{×»Þ•â'ÒØ*´žUo…c¨çséʵÒù«§.e  @€ @€ @€*ÐòÐ.Þ¡\é«Îª½o¬´|'žG(¡Õ%—\2j‹ÐXy—­†‹p¿õ[¿ÕPXÑ[?fmúEÐ?F€ØŒð®°}Ÿ  @€ @€ @€½&0µÕ>ꨣr«èN;í´¢ßíØ~úé“Îã^·n]nÕ]¬p+wÄê· .¸ íØ±£Üíq_‹wÛ·oO±]ft¥G‡±ðýïé­¶ø`Ýu´Ã±.´Ë.»¬¨;^iWTùó'Þx㣶̼ãŽ;ʟеåË—=Ï=÷;!@€ @€ @€ô‚@×…v±ÅdáñéOºð´ã>ÿþïÿ~QŸ~øá¢óÉ:‰Ðð½ï}oQõ=öXÑyáIéûäîºë®ÂÛ?ŸwÞyE÷bE_£ßÝWÔ@NÆkÑ„®i‚ @€ @€hS® íÎ=÷Ü"êØzñãÿxѵj'}îsŸËýT+7‘{·Þzkzÿûߟ{ÝXõ<þøãEEŽ9昢ózO²±ÕŒ=úè£EÕ/[¶¬è¼ðä°Ã+ÿùϧZW÷¶ÕŒÏõX4£?Ú @€ @€ @€ÚO ¯ýº4ñ]}õÕ¹P¬°¦‚â'ŽXÙU¸*»^X¾YŸ#˱׿þõ#ÍVêÓë^÷º‘2ý…“Ç{l:餓rÕmÙ²%ÝvÛm£ªŽ¾E@Véˆm. û¼~ýú¡d6¦ìÞwÞ™N;í´¢jbKÓ(—•‰›ñ|Xö/î–+ª¨MN&bÑ&CÐ  @€ @€ @ É]ÚÅj»|0wÜqe9Ë…R¥K·l,½?çc…QßøÆ7RŒ­ÑGdñSíøÄ'>Qívn%c„€¥ÇXcÊÊñ‹_L\pAÙÀ°–þE=•æ;k£Y¿cUçD,šÕOí @€ @€ @€í#ÐuÛcf´±*ì‰'žYé•]¯åw¬úŠí'ëˆP'VÕsD`+Ò&z¼ç=ï©«Šèg ÕVÙE…ñnºèãx#o¼ñÆô±}¬î*¢·Ürˤšuw¦ãiÓ3 @€ @€ @€@g 4=´;æ˜cŠÄæÏŸ_tÞÈ“’®¿þú[2–¾ç®´ê>ûÙÏæªxf¬ªôùzΣ_ñN¶¹.»ì²ŠF}Ú¾}{Ý]%×5kÖäê»îºërÛ„Vj/ ç ä™@E‰vS§N5O>ùd2 r'œpBìüK/½”.ŠhwÈ!‡ÄòÐN…9)Ƶ@€ @ œÀOhOÊÕ«W[Îü±»Ý¨ýGy¤úhZÏ7w¿•ëw.M‚„ÄdâYª²t„7…ÒÌ$œãªU«¬è—*ïà5-0›­©Žé<å (!2_–-g‰¨ŠË=cÆŒ|U|!@€J”€Þ——3…€r¿8àN³… @Mž@—.]Œ<ìR|÷îÝfíÚµæÒK/5øÃÌ Aƒš<@h ŠíAâœRç$âéÜgœa®½öڌĻ‹/¾Ø 0 ÆWÞg¾Ýu×]þalôèѱ}ç7¿ùM¨`'±Q"–+kÙ²evݹ0qO!.ÿõ¯ù٦܊cãÇ7gŸ}¶½Ç…¿lß¾}\Z«Î7ÝÓ­[7s 'ø§ÍK/½êŦ5õÎ=÷ܸ´þÁücÿÐî« š®ìdyûÌ“q¾üòËC9‹Þ”vV†Âh*4h&ëå¹üØB(Ù³—™ßüf¶ùþ÷‡˜+®øf1ª@™€ÊžÀƒ>ìôָĺ^½z•}»h @ÈvíÚÅ„»mÛ¶™;v˜«¯¾ÚÜÿýFÑ 0@ÈŽ@Ŭi皯°‹A¡Ê]Ó6ñÎÝÈ!‡¸]»ÍäÛàZsÊ@b]*/@Ý#Ë÷Ô}O?ýtÒ5îÂÊq•~øá‡Ä4y¥é?Yßtî¯ý«7n\Z‘SëÒýèG?ŠÉ$À%µÆœBVúöÊ+¯˜ &ø§ì~XÞÉÒº›ÃÚã7šË.»,¡ºGm•È UšÎ[ЕÇ€@± \vÙ,óÆ_šªªææ7.,vu(€@ÙЋr×]w­·»¡C‡2ÑTv½H…!@(ýû÷[;E¯’)BØÿøG‹«C(({³¦ë …Éœ7ožÄÜ9+O*yÞM:ÕHà)”I8òM‚ݻロR“'™Ò(­oW^y¥i_B_˜÷[P°Sf:'O³džl~zùïÿ»*NÀ‹»`ŒùøããNÉs.L°S¢°¼Ý„J\&ÞA³„Êk®¹&T°Ómj«Þ’žo~h$ÿ<û€JÀž=ûl•êë¶¥V?ê@ ” hí•[n¹%VÅÃ; Á.Fƒ@€ ž€^z:òÈ#c"ÝâÅ‹Íí·ßnöíã7jzz¤€ H âD;5щ]~(Å`ÓxwÑEYo«àõ\ËcLåù&»0ÁÌO£}¥QZßäy§<£š8HÌÌ—e:H!)}“xšÊ”·<÷œ…… uׂœu_˜PéÒû[¹ïû&M € @ r (âÂ/ùËØoýÝ©uì0@€ Ì´hÑÂ}ôÑF[Ù /¼`zè!óÕW_e–©!@ÀT¤h§~uÞb s˜J¼SXÄc=Öè-|YP¨’˜Å‹ÍÕGiƒÞvï¼óŽ»œr«² ±>›ÂŽúUT ®§çç‘é~ÉOúÓÈYÈuß·×^{Í?d€ @ ÂÜpà fÍš5¶U:t°ëØUXi @(6mÚØÐ˜®@9!èå{…ÏÄ @ :†×¢§/»”N¼;ÿüóíâò -\#NÇ 8kÖ¬HÞo™BPØßÎ>ûlÿ0Ò¾Öé»í¶Ûbi¿üòËØ~ªnݺ¥ºœÑ5…• q©L>ú裸¼jkkmx˸“ÆØÐ¤¾×¡ÂP¦ò”˜ê{×ÅK?ÿ “÷ßßlݺÕOy?8F"ßHB@€ ’'pß}÷™·ß~ÛÖ³ººÚ®c§ÐN @€@öºvíjnü‹/¾0{÷îµkÛõë×Ï 80ûL¹€@#Pñ¢ëO‰w‡ô¹ûî»ÍôéÓÝ%»•0tçwÚõÏâ.äà@kìù6nÜ8ÿ0Òþ€âÒ½÷Þ{qÇù:P˜H}䑘 “«¼o#Å'ÌóP‚]p­9¥OfAF_äLvç!@€šM›6™'žxÂ6Ú­ÁRUUÕ´ ÐZ@€ '‡z¨}éÆ fË–-öï®ÿøÿ0Šl€A€@zMF´óQ(\äw¿û]3dÈÿ´¹öÚkÍe—]–o»¸‚²8Ðx¾i²!Ÿæ3ßË-åI4•·œïÉ6fÌ»nÝäÉ“MÇŽm1à|O?ýtÜ%y¼åÃ^xá…¸lÏ8ã sÿý÷ÇkÌÁôéÓc·ßxã–ƒXÌŸ?ßHÌt¦ÿÔ¿ñodô"èUWª^“®l!@€ GàŽ;îˆÖ¿ÞðŽÑ`hJüñ¸æ^ýõqÇ@È%½$µjÕ*³{÷n#o»÷ßߌ9Ò´k׎¿År š¼ Š"P¢Ö`“vî¹çfÔ9z³ÃÓts*1,JS^iZ/ 6,.‰ÊŒz¯nœ9sf‚[¡Öv“à˜+S›};ùä“c‡jOcÛ i©™Š£± ±@€ P1,X`.\hÛ£I¢Rô²Óo“O>ùÄÖ±ººÚ®1Íš/‚7n46lˆy)Çnݺ…æÒXÀ ÜðŸc.W}ß‚/غkÙª¬/¾øÂ .Ÿ:Øg§;f[º4·råÊœWPÿ><çù’aöäm7pàÀØßaÏ<óŒ1bÞvÙ#åN@  ¨ÑnÆŒ6Äâ=÷ÜcxàHBšúö7¿ùMB§Ž‚¡4kkk#•ÚQ!'gÍš•ö‡¢„®ÓN;-®žZ«­P ËVîßþö·ñ3,]]]]Üé?ÿùÏ9õâ“€ê « ×yÓM7™k®¹&®ÜTÙ À©òä @€@q ¼óÎ;± ôë×/¶_J;¯¼òŠyï½÷bUšÉ˜&`Œù¯ÿú¯¸Ó×^{­íëtyHÕ¸éÝ»·mOÐ+0.S @€ÊŠ€[ËNCjJÑvíÚUŠÕ*»:é¥Ã›o¾Ù¾ ™®ò›6m²é”^‰ =Tß¡+Vصô²Ï=þN½äÌÜz}É®s¾ò _¯ü—G åu+Û»w¯}±æÀ†¾*¾£–€@á Täë šô‘ו¼Ô\(†­[·ÚeîG»[iÏ?ÿ|ÿTÂþ˜1câ<ÊæRÞ]N(Ò›±¢£9(P)o=7e®z(ÏñãÇ[¯cÇŽ¶LýÐÔvzû$hÿòÎGëÈM UdzÏ>Û^JÅ1xoðXíò=ÕF kéLý3mÚ4sÁ¤l¿8+,¦ÏN}¢Ú ð§n<¨L×gÁòùÃ!H„c@€ PžÖ¬Yc4a,«©©aý”òìÆHµÖ輻z(RÚ`"…& û-äÒÉ{oÑ¢E±P|GužNÄ­^¤táðcZ%K÷úÕW_ÍIØB‰ƒ¾Wr!¢ª"о}û•D1™èÒ¥‹ i«{>øàóÍo~ÓþŸ*¯eË–™dEZ@O "D»dëÐI¸ zÓ%ëÑgŸ}6í.­Á˜TFPŒ +Cë%èG^˜çœÄ»0!1˜Âbæ{6 ‚7ÞxcB;£Ö1Xçàñ”)SŒÞŠ›8qbðRÊc'„?åf⬾l gå+; € @ ü øg—ª—]ùS.~ $ª%ìFezöìi+¹gÏ£e2 ˦ßrÁõÐS‰|Å'Rz5¸ï¾ûâ*uýõ×ÇsPyôÌ•G«L[ ·} Ù­ýéhy䑟ݽl‹GàÛßþ¶QËT¦5 ýgµ iœê­iÇz¥©ïšBaªvïÞm¶ü§?ý©­ÌŽ;L§NŠW1J† P‚*B´“W›¼Û~üãÇyXEá-ï+¦1]z•#á,ªÌO^yú±wê©§f\OyØå[°sõÕpâüQê®·bò½ï}/’§·â®»îº`ËS/Ì›Ñeâ8kÝ@’Æ]O·•hÙØéÊà: @€@a¼þú뱂ô–7V™Þÿý„†i9‚#F„®£å<çäýãD…„ ¼šdÄ ÌüÛ¿ý[ܼÂG}ÔèßÚo½õV¬q$üøÂNì";%M@s.Qæ]ü¾íÞ½{Jè’n0•³ôwتU«ìšv .4C‡5z™F¡2µÆ@ *B´SS$¨i2­¿vë­·¦k$Öi ³d[ɈòVX ÇK¯|ü¼Üu Jï¾û®ùë_ÿšÆÑ¥ñ·Ä´†]”?füû»?cÆ »Æ\*/Bµõ†n0&LHX›/,;ýp iÉ\hP¿Þ/½ô’= /¿ür£…‹“™8ëz&ãAuK~3Yyœ‡ @(=úûS“B2½Å­5í°Ê$ðñÇÇ5La·RyÂUUUÙP}Ç·/VöéÓ'î~ ÆzR½ùæ›FBž¾Ù˜Bàú"»òBPφ$÷@ 8œh§Ò"S¢Ýþýû^¤iÕªUq*E©€J@Åýj•§~ Ë«mîܹqØJS¢ZcÜåå‰vÙe—Yo/÷ãP¢Óرcíwq†¨lyÍé#¡QáY\>J®¼´FBp]¼¬N©m?ü°Ñºs²©S§&¤‰zÂÕqþüùF!(\ž‡z¨ýì ‰~¹º.Ñ,h*}ÁN¢ß¬Y³’ö…ïYø·¿ýÍüä'?‰eéç;²ãƃÂpˆs>ÆCH±œ‚ @(o¿ý¶RUôâV¹ܺ…®…G}´ÛM»MõÒeÚ›I$% ¹…§õ× SÈÃl¿sz¦û&Q0.Ó¿Î> PZ´ž©^ úꫯŒDü³Ï>ÛVPÞvˆv¥ÕWÔ(.ŠíNýq(Ñ+áËå‘j«üåa¦OcLâ–>ÍÇÕAõ:÷ÜsÝaN¶Q8F)÷±Ç‹«O*Á..¡1¶M÷ÜsOœè'aVåF1z!_ã!JH@€ PX~ÈDBc–=¥AÍwø¢&ê³íôûß•(10ê|=”fÍšÙÈ6l0ëׯ7[¶l±Çò˜• ‡A€@ŠíèàÒ#ôŽËälýÞ˜ûK5‚ @È7%K–Ø"4IÔ¦M›|Wù+ÕÆ&Ìü°rݺu3]»vÍÉ$¸¢\óOG‘F2ù}.?½µßXûðÃcŒää[ðØ]Ç¸ÃØVLV®\iûöí·ü~ëhŸmÛ¶Ùë4a"³úméÒ¥6Muuµ>¢†T—°üX%ÓìûU¬3auUV~}ò~çwÂN‡®C¨që‹ð™ŒŸ½êœJ0rkê»Æ8˜—<½¢öûî¹q Æ« 1ô£Ø„B‰pRãÈM„»¼{÷î´"d™“$AÞòŠÕØH6n’º`Á‚¸K m›K òSÞéÆx²òýqü¾Eý¾ó?Á4aÇ©ÂÓç’A0ïb«]Î#3ø WEe’€$Kö OVÿB0 >{U+}‡¢>’Õ¿çµœŽžU²åË—[Ñnß¾}fïÞ½¦e˖ŨeB(9ˆv%×%M§BZSð¿ÿû¿Óþ8WxN­açÛgœá²@€ .d"‚1šô›7o^œÇK0cLçÎÍw¿ûÝ”ÂFØ}𸔍¢hÙZ&“ËÁ2To­+…Ñ Ï‚÷%;ÖDlØÚÚ.½<~|¯wþÈ# -Wëm»ôJó£ýÈÞ"á*ŒÙ¤I“Œ–ðMk~» ­AŸJpÖ½éX(?­±>bĈŒ'áõݶѯŸúçÔSO —~º|îkI,H@ Žïtå¿üò˱$jSc¾ß±ŒŒ1Z'OÏ ÿ»å_×~²1LçŽýþÈöûîòŠòtiöaß§`º|0–QÌc7ÿyãžj÷C=”Pµ~ýú™iÓ¦%œ÷Oä›™þOÕ¸ôǽ_¾Û×wKŸTÏ —¶T¶­[·ŽUE‚©ž{2½,hCà ÐÄ Ú5ñPÈæ_xá…æˆyÛm·}$À~øá tëGéG}d>ûì³Ø=nçÚk¯u»l!@€ @@Ž&;eþQB ?¡‰¿W_}5nÂüI7‚¨|ZÐøšjì‘G õJ—»øI¨H啤M¡ÿ“ vù,7jÞ©&´“å!¡T±†jjj’%-›ó )šNKÖ˜T"M²{Ü÷îW¿úUZa(Ó>ÒxóCâ&«C>Î˳Q/øÏeÕ?:3YÙ~hT¥QxÔÆX¶ÏF­Í·k×®˜§lÔ:dó}—e¶c/J½ Í J •F¢X6l Á,Óïµc&aÞy»s¥ºõÿ&ó_Pßýû÷…4Ç 4uˆvM}°ý£G6O?ý´9í´Ó².õá‡6çž{nÖ÷s# @€@Ó#pÈ!‡˜”]à (þ$¹ Poš,ïÔ©“=µeË»ÞÚ—_~é’Ķ3gÎL&Le-˜ÿš5kB½Y&Nœh×ôÒýòÒjL{Û¶mkà ú!Д¯„$}$ê7.²x§üä5æÖÆÓvÊÇ™ê«õ֧̂OŸ´ípuòïUýÔê ‰+òMÇC×Ó¥ñËð÷SÝ'ÕdÚ®žÊ+¬_%’º¼å•äÆ˜ÒûFá ìgÏž±<®»s®wu«û’Ý<äp衇š£>Ú¥¨0úÎ(œ[ð>­‡&Êú÷+“0†:ÿÔSO™þð‡I›”¬‚ß=WG—‘D§ ë¼õ8,ÿœ¼íüçÑâÅ‹ÍàÁƒÓf¯¶úß=+}?ý¼ƒ™¤º¦´ò°óëâîWÞ΂ß{W]äA9fÌ—,å6Ûïû?þñ„|5~ {n†ÕO7ùß­dß§B2HhH#Nc‹d IDAT¤ëÛ(YCbº~—¨-Xž•aåä›™¼zƒÏµGßk…Ô˜2ý?!qOõôí¾ûî3¿üå/Ó þþ=ÅÚ—0'NëØù¦µíí|"ìCM•¢]Síù"µ{Ê”)v‘ß¿üå/æÁ }¬ÚGa' .¸à‚Èk6óà€ @ iÐÛΪªªÜn“ÙÊK#(hâoêÔ©Iÿ¦Ö=3f̈›”(‘lr]Œƒeh²8l2û¤“N2¯½öZBú°´Ùv’òr¢W0ÕSŸLÄ»áÇÇeã ìrQwMŸyæ™qbâ‰'žXÔ0asçÎk·Rõ«Ö]zå•Wî‘w•¿v\P´Ë¿„BóxB"®?&T}‚Ï“èæ›D^1 £©õæ´Þ¤ÏGcMßÇ^½zùÙÄöÃúHc[cÇ7ÕQ¢—òöǯŸ¦PûA¯: ˆ&Leâ×iÁ‚þa\Ä]ˆx ®AñR/Œ;6¡/]?ÿüóq÷ˆç1Ç“>]¢~ß—/_÷V¾ Ø¿ÿ¸"\ÿ>ñÄV@v»uë–Ö]Ó¶˜ üz”ÂþE]÷=SØVÿïWÇB0 ®_—êÿl}×5N‚ä|“ÿ›\»óµÕs3Ìó÷«¯¾b]»|A'_@ ¬às\VÝU•Õk®¹Æþø—+¼~ Üu×] ×uM(}²,•A…V@€ ä’@ð ô\æ]y)›ošüK:Ko+Òú¦0a&¯ß$Lh9Ì4A§IF?o =a“£a÷G=§2$®$3 wwÜq‡yñÅck&K›ïóšÀ?çœsBE‹ ”ﺸü%ðDbuê©Iî³Î:ËeQ‘[µÏì\#ÃúI“æ¾É‹G^sa‚K§ïMpÜj}»0Ów&ØGa‚»Wåª|ßûÊ]+äV¬Ä·(B¢/ëù®üü¢ìŸâ®1Ö—ÊOç%¬ë®‹™X&ß÷õë×Çe­¾KÖnõ¯„ß|fþy·_,®üRÙ^z饡óLac!ßÌ$ ú߇(ÿgkL¨ ¾½õÖ[þaÉïǺD; €ŒA´c•€&6Ske?:PWÔî¡p@€ PZ´hZF‚áäÔ‰ò° ›ˆ v°Ò(­o@•gÐV­ZwJá»ÒY0MPøKw”ëW~þóŸ[¯ºdéx§‹eì¢ôI!ë§p{¾iâ8™ë§S˜Cy¬Hü¨4“`%Œ£kwpÒ\^¦Q,( …9—G˜Xôï|Ç]NºU?[\ ¶1È*Xy½Àë[ðùá_‹²|6JH Ö)Y>F} ÖÍ¿¶ŸÉ÷=ø=Ô ©LÂÚâ,,Ô±»VL®¥°ÕóÊ÷NU§B0 ŠôŠ£üÿ 6ø‚²þ¿ÎÇÿ«©øäòšBfb€ €hÇ€ @€*œ@S킾šÌÍäe8¥õ=â4<#µjÕ*]’‚]×$¶¼îÒ‰w s÷ÿ÷9÷øK×P…º‹2!›.Ÿ\_÷==”w1ÈÕAã&“qæî+å­¾;QDK×Mîû^¾šLϤŸƒâŒò ÚºuëâNILŠZF&âc\!9:z‹‰•Bü%3­/æÛÈ‘#ýÃŒ÷ƒÏ1½(Õ‚Oªzólì÷=jÿË ;.ƒ°ºëœÂ¡fò¬*3­Qç[ð»â_ îûë0ê¢]Ç€Ê@Ózå´üú‡C€ @ȈÀÖ­[ãÒ}ôÑqÇQ´þ”ïé³mÛ¶„Û´n’oŸ}öYÒnJ§°~ÁIøöíÛûYä|߉wãÆ³¡ÇüuÃ\aªjjjÖs×ó±Íå$|.넺víšËìË.¯¶mÛfT纺º¸ô»ví²ËAÄLqä¯ü4†}S=ß´|9™?|c²çFлI‚fE¦í>ÇÖ®]köìÙ“i66½/Î¦Ë Óï»%_@—we*ÁU"ï]|é¯_±øu(ö~uuuFU(3<©ÿ2ñ¾¨¬oF-râ¹@ 4 Ú•F?P @€ @9!œÔïÓ§OÆùvêÔ)îž5kÖÄë èy"‘¯G¡áæ$Ø=ÿüóq^Hš˜lì$|B¥’œP9ò˜:æ˜cl=Þ{ク”ª»ÄL¼â2¨âXMÊÄ¥B4ªÁ±^|ñ¥Q™}sPØËTÊE“ÇQG'Úé»'Î`;æÎWLcCc*³ ¸á¿˜WX‘ºwïWƒÇ܆Ÿ û>J°{æ™gâÒ륋dV. ’Õ¿çóÍ,ø¶žÃa/˜£í” @Å!€hWîY—ºbÅŠØ]ýG®cß–,YºÞ„Ÿ&Êþ Aƒ~@û¡(†šðGu”|I@€ ”>:ÄURCA“¦Ð|þÄ÷SO=e^{í5£Ic'üIð ŠdÊë¸ãŽ f™÷c Z+H^+š÷MÞ?Ó¦MóO±’#VƒâyÉU8P!ÕWaG}Ï0…ô×–“Èï?W”E*O³@e¨¶ê¥¿¯ï¿ÿ~ËMs5ÎSLbR˜(‰úÝôø)V)QÆb9ŽY­%ç‹vóçÏíÒ7}W›šé傇z(®Ùbæs‹»øõ^¤óÈ K˹Ê$õÿïÊl=­‚ Pí ÜzËTÞpú8¯¹bˆr¹l¶ÚáÆ·ß~;!k yz W[-«7ÃôÁ @€ Ò$°~ýú¸Š%›ûû?îÆ vçœsNÑÅ1Eñ×ÖRUj.Y[CšÂ)¤$ uØ;ì°”i’]Ôš‘Q¼èÂÖ½K–g©œ†o”¥ïžkïo¼WU?êOÜ…FHàrÁ™f•m¿F-'ølŠrŸÚsâ‰'FIKSÊ b•,±|3Ó‹ðÙz¢K°ó½VK Õ ˆí"‚Ê&™N–8'¯5 sÚ÷Ãd’çЮզªY3{K¿U¦s놮ëÙ¶¥éÙ.±;W·0J—‰Õï;`nL {£<¬=x~á†]FieK¶ì6Ûë÷§,Fm—Ðëo'à¹mÊŒ¸@€ dL@ële*DmÙ²%®ŽÍ?©uxü‰åÉ“'›·Þz+éo7©µåJÁ µž^)´5Û:¨3;Ù–U ÷ÃÊŠ]®Ç»„@?$b¡£ò䢟äA8jÔ¨¸°¹ ¡+ÁIâïM¦öæê»ZSSWýR]ÇrÞ¼yq ôlMµÆ™^„7b”59Ë…A\Gù ßÌÚ·o×¶mÛæü¹W€ PòÕž’¯réVP"„)}$T9ï³(5Ù½iWÕÌ ª©6NˆÔ©Úž‹r.ÒT5?Ĩa–켟vá†Ý¦~ÿ~+ðÕïÛot¼}ï>³dó?Yl_bf0 §„§úˆ6@€ T>…ÕóMžzο¶¿xñâo¹0o+rý裎e¥w.Ì]ìdw4‘-o£L'ß—/_žP«°v*QЃJ¿ mZjÀ·¨uPF5µß|_{íµÈ¡Öô«ÆY²~§¥y&“1Ùµk׸&øuŒ»8PQÓnÍê08~T¶¸'å³Êü뛂ãàý÷ß·¿—£äùá‡FIV4'Á1¡—‚ëa0 gõ †´”gp¦/4ä¬2I2Ò˜õíðÃj|ÇXìbÄr`±)KVfAÏS=·µö*@M“@C¼Å¦Ùö¬[­?ºgΜi.ºè"söÙg›»îº+A°s^kç íl®?¾·yèÔÃÌóg 68ñPsÉÈnfRÿöVܪÁ.L…딘wæó«q½Ìý“˜Ùç ‰ã ç©ÞDûÛßš©S§šÛo¿Ýz0¦+‹ë€ @hÊÂD“gžyÆ [é¸hÂøñÇK¦õ{Ò™&¿‹¦O¿zè!óì³ÏF®ƒêê‡öTû4išÌ‚Tò@+´½Í$š'øƒuR˜½`ÓøÇò˜ôM"[¡GÂéwÜaûÀ¿ßßïÞ½»hŬ¸)N1héê%QöOúS𶼿/òšÊ仡´j[²{‚‚úHýœÎ^|ñEóÔSO¥KVÐëA/º`ýÄ2¬ï³­¤^&PèLìôüÈÄ4”nìe’_0m°ß}æ`ÚlŽËA6íÊç=…`èôÜÖXËÄ$ô¥û?!“üH @Å#€§]ìîR`/¿ürèЃjZ™±½Úšñ}Û[oº ²n²I%äùžyòÈ{»v§y{õvóöê±õòœP*±´_¿~æÌ3Ï4§œrJNÿ€o²@Ã!@€*Ž@p $…ÛÒo™“O>9©§†&üžqãÆ…òñ=4ù}Ë-·M²û¡Ä䡿 ^š€Ï—žD,}ÔöTë`I`úç?ÿ·f”8zôèÐv†OåôzÑï–\zUùeÅ]“«ßFÁ2U‰E™z™…y?9!Eá ƒ&áƒ>ˆ­·¥qÕ´¬DP`NÆÏMšûù«^òÀ öꤼƒ¢lÔz56ݰaÃâ¼ÅÔ;vìHùÝS™j»ÒºÐˆ§Ÿ~z¨—c Ý#Q7¬’÷ƶ3÷kIŒTý”JH϶üãŽ;.ÆXy¨|­á9a„„ï‘_†Ä=_œ'`¾Bþdzʔ-Ó_[TÕ¾ ©çlð9à×ßß/u~]Ke?ßÌÂÕÄ®êïÂ&~ôÑG ‚d¤¾N¤ _y.ú&L×^]SxÎ`ÿû÷÷Å×µ÷ÝwM†ûù)¢MX_'ÍuÏ.AÓù°ï]0]¦Çz6Ê3ØÞ©¯ôQ™}ûö{ñ Ù˜ zÄeZTé%Ôø/P¨®©ÄM——Æ—ú_bh²ç¶Ò–צRÙæ›™X õúûæÆžýþ‹ɾ߻víòog€Ê”¢]šŽÓZu ©·Þœµ«jf&õ“°Ô:%OÛñ}Û}ê÷0//¯33—l6 7ì¶¥éç›o¾ÙŠ©×_}ÜŸ'NŒ¼W²<5‘Æ*j;5QÒI'%ËޞϠ`¦ A‘22e¦Y\ÔZßamõ¢`¶êÇÍ›7‡ÞL«c‰Jº'Ø6] ;çç!±*™im® ð 1"(.'»?LˆVÚTý¬¾•@,7Y¹:¯ñ¤ªÁï™Úë‹E©Ê ®£è§Õw=( êz:q'Ѫ¯ýo}ë[q•+Gçóaú.k™“°Ð©©ÆR°.¾qðZcÕ¿çw^‚xž._×ÿï©<´ÊAº¶úz!˜ÉSvíÚµ¡ÏÙ¨ÔÔÔ åA€@°¦]¨zkê׿þµ…œ`'±núèîfƃÌc{ Ø%a—ÓZûOx:©¿]OBž³… Úõƒ @€@9hݺu£ª½mÛ¶„û%ªI¸ó×pJH”ä„<ìN<ñÄ$Wžž;wîÁƒ,ö4¹e=®TYKX”‡B6&áãœsΉóRL–ϤI“’]*Øyyqi2>ªEíÇ`~ê{ <™˜DÍTâ§êžiž~ùš4×xŽj®oý‚Qïml:ÕUã*•ˆ™ª ± —çß#ÖJÕ4n¢|§£æ—«tÉÚ™ì|.ÊuÏF‰ºÙ˜^6HåÉ–Mžþ=šj¬Ð,-…FMf¥Î Y½‹y¾ÌôÕs;ÓÿõÙ>s²){ @ ð´ aë;_š2¨“¹dd7#á+.­ø»ïô1 Öí47¿¹Æ¬Ù±×zBJdýÝï~gÆŽ[Ü R: @€2$ ï¦TÞRé²Kæ•£IÆ‹/¾Ø®; –§D­aeBúÅ_LðÐýþšv® EÉPØ®uëÖ%xÉÓJ^T:²5y( 2$r;5±©5ìÂÂ0&«ƒ›°}ì±ÇÚàîQhº|›¼pBrÆŒIë¡ö©/TçlM“ÇGq„‚c~žŽÔnßÓÓ¿îï+O‰hÉ ‰(©ê¬kòîÔ˜ ó8TYÊC¢Jp½–&F»víš‘€%­'曼 4¡™ÎÂîM¶ŽYº¼’]W´µµ±íL–¿ò®­­õW·nÝ,¿dBƒÚ¼jÕ*›]0”i²2¢œÖCy÷îÝ;AtU:7¹ß§OŸŒúZõã©6«¿£¼Á¶H˜×?ƒªw”ñãòRÔÎË4Y*ëƒ>°·¥c¯´ZßÑ}5qžMû\ý­òV?(l¦Ë_×ÃÑÏ_<ý¼“IqsO6cÁ/3¸/ÌõG&yûÜÕ/“}—‚eïU¸ÜlúL\êêêbßSWŽÚ¡qµ>º¯1ßwyû!c%Fõp¯»î:×”´Û\2H[X„~ß*y¶ßGÌg:¾ÒU³Ì4ž46ÝwKuÒ³Lã2“gfº¶òú;ï¼c´.ŸžSüãcE«˜Ï‹á`hÂí¯àŠo®?’QáåÙ…•6ß¾^îÎ<óL3}úôÒ®0µƒ #ˆv9I6€@Å(GÑ®œ:aΜ9qÞRZ)™gSX»{Xžœƒ Pî|ðÁ8/»_þò— †½¿ÜùQÿò €hWýD-!â Öc€½ÂtH°“9¤Á.À§TµÖ`MuC´×gŸ}¶T«I½ @€ Pôæ¿o™x¡è¾ÆÞï—Í> J! ‹™É³UsY¹¿RÒ@€@¹@´ ôàK/½;sT—Æ-ˈ¼è\Ý´kyˆ-G.öK—.Í{™@€ 4ÐÚI –ΔF/Ù¹ð|.}¹†ørõg @ ´v¨{±;€ @€@n h2ØÞäÝqÇw­¿¤õñzöìWàš5kìz[A/%šzæÖÔÔ˜N:Å®iGkU®[·Îh©— ?>xŠc@€Ê€¢] “ôÓÙ£o2ߨɴ«Â!Ñ1)ÕíÌ%[ÌŽ½ûcÕC´‹¡`€ @9'0dÈÐ<%Ê… s¡‰±"ߘ1c’]æ< &E`ôèÑq¢k¼ÿ’„;—j;qâDÓ«W¯TI¸@€@‰@´ tLÛ¶mcg¶ìÙg.i…ùà ýîbTJoç¹¥ÛÌío¯-½ŠQ#@€ ’  h~D’¨T™WB/É]z饿±Ç õðˆÒº»TÓ*Å]\Ê7ú}ŒÂa>øá#áΙúE"+@€ †@UU•õèWG}}½Ù¸q£]»nÏžø¿Ë»uëfÚ·oo4±ŒA€@jÞŽ?þxûÙ¾}»©««3«V­J¸©OŸ>öÙŠP—€†€ ²%€h—¢ënøNó¿ïo0o¯ÞaS-X·Ó\4k™õ¸›rD'3¾/o/¥À—óKOåU÷ħ›­ê КƒÓía¬Ûhç °… @€@ HÀÓJ¬£T`ðT4 rúðl­èn¦q€ D»ŠÄN­š›ßOìkæ¬Ünî]°Þ¬ØVoIÄÓ§gÛ–V¸;e`G¼ïñå䌼êÄÎÊ:óòòº„òòRHÆ5;öÚ»´}bÑfûqÞØÞmYû.ÓTIÄÕ £µ ⨄» MêßÞLÞÕôëP¼Ä1 @€ @€ @(;ˆvt™¼ºô‘ ¤Ú:ó¼ªæ‡˜¡]Z›‘=Ú˜¡]«­ž<Á°pZ‡NŸ…v… uÁÔb(þ M*‘ƒ @€ @€ @•B%)‹žÛ«Á›Nk¬)tãËËê¬Øä²’g˜Ä'}œIpjðªÍÈ­Í NÕFk±55“ × Òí6 7ìŽcÆBÜäå8i@{3²{›°$œƒ @€ @€ @eOÑ®](AiÊ Nö³½~CHÇÕÛÍ‚µ»b!4]öNà“Èg>l8ë<ò$Þ ª©6[7·áåEVΞdãê÷ï·ê÷í·âÜö½û¬Xçx¤ÚJœsaFÕ´J•”k€ @€ @€ @ " Úå¨%¼i5}déB>î6 Öî4K¶ì6ö|sy:gÅ<ÿ¢1VÀ“0(“xÕ®ª¹Ý×:nødº^ˆuÝ|¯AµËµeŶ=fÓ®}¶.am´Rü#qRˆC»¶¶mÄ›.,.A€ @€ @€ P±íòÔµÓÆöÒ§­1ûØR´î[»MÞhö’­ß¦tÍ]÷E³(UÎVÌË´œ(uqiœ!ký9"l!@€ @€ @€@D»Ž'Zi6ß$ÌIÀ“·½~ŸÝ¦ôüûÃöu¿>…´dá>³ YwÊ‚ @€ @€ @Å$€hWLú_—­ð–ú$ é{¿ù¡)×ì¨7k¶Ç sÙ„¨ C`Ãq¶lÁ©ëVëÚ:–T!-uNfÃu~Æ3–€@€ @jÅV IDAT€ @€ Èí"£*^B_Ìó÷‹W#J† @€ @€ @È%f¹ÌŒ¼ @€ @€ @€ Ì ÚeÎŒ; @€ @€ @€ Sˆv9ÅIf€ @€ @€ @Èœ¢]æÌ¸€ @€ @€ @9%€h—Sœd@€ @€ @€ Ì ÚeÎŒ; @€ @€ @€ Sˆv9ÅIf€ @€ @€ @Èœ¢]æÌ¸€ @€ @€ @9%€h—Sœd@€ @€ @€ Ì ÚeÎŒ; @€ @€ @€ Sˆv9ÅIf€ @€ @€ @Èœ@‹Ìoá@€ @€@aìÚµËÔ××Û´ÕqÐöîÝkvìØ<ö¸mÛ¶¦eË– éZ·nmªªªìymuŒA *­[·Æ’j\~õÕW±c·³sçÎØ¸vçÒm›5kf:t蚬cÇŽ±ó×-Z0ÝÂNJþsuÿþý¦®®.4ý–-[Bϧ:Y]]mô šž»mÚ´±§5®Û·oLÂ1 @ É௸&Ûõ4€ @€@ñ¸‰âÝ»w›={ö˜íÛ·ÇÄ Mkò8ßæ‹+QÊ’"AD&1O“ÎNàÓ¤³&Ÿ±Ê%àd7>ˆ‘JèÈ5Í›7g”¥ŸºÉ‰Ôøü±œQ†$.+zÆI4–x¬g®Äb™ž¹zöæÛ2}ƪ>îYêDj'ðùc9ßõ&@€@1 Ú“>eC€ @¨pš´õEŽB ¹F«ÉïT“ÐÎcÄôÚµk‡§^®;"ùIØÀ¡1ë<8 %pä£YwjØØ•T«V­â=<õòÑùËSãU/=hœ:N÷ÌÍ_©ùËYuw&RKÔ“è,¯S'èùž¦î^¶€ r%€hW®=G½!@€ ”7y¬IWM"û“¯™Vuh×jSõµçZ¿U¦s놟¯=Û¶4=Û%ÿ);²{CȵdåÕï;`nL ±éÒ¯ØVo6íÚg×ì¨7k¶7„6Ü´û+£k©L‚d˜0âijdóžXV Ö5xÆN„ì¸ÿW‚‚žÆ¨„<7nõ¼Å @åH ù/rlMÕùªÌ5Ÿ¯ßfk|‘}ÌÏ& +£ÚSU@€ @ÆNoذ!ãÉc'T81N¢F»–Ͷù¶ªæ‡˜TÂ^ªk®nB$ˆ¸Ég'îYÁowâúeNÌó…'äuêÔÉÔÔÔ°¦“ƒ›Ç­<%5ѯDe‰QmPM+;F%Æi i¬ºq5lÓY¡ºmâÚ‹.¿(cÖM $¿NÜK&’81oãÆ®»6™„‰x³ak•ų“ò’ÔxÝ´i“¯òbvŒvimœç^vˆ"¸EÉ?]š\<Ëð'Qº~ŸÆìn³}o¼Èç×CßçàwZãU÷œÕsƒ ”:D»"ôÐüåÌmÏ+ùÉ÷–!ÚÅh°@€ ”*‰OšÄ_¿~½õÌ‘’Ê$ÊIìÇÈ­­è¡ãr7y™¤J¬0²wŸd^²y·õ$qž%®í¾·|ùr»ž„nݺ™.]º°>žÕÈ­Ä' ËN\N—úUc´g»– [yv¦ÌÒåW*×õ]Ô'lÜ:ïS+FoÞc¬Ýê¥ç„<}ÿeí4f{õêe½›J¥­å^‰ûk×®µÏX1OeŽ5^‡vmmûW"ÝÐ.­­°œê¾r¸æ„¿°1ë<Ÿå ­g¬¼§%êÍy#ÖÖÖÚKðzôèaŸ± cŒA€J‘¢])ö u‚ @€ PBäé¡Id vÉB±9nPMµõBª”‰ãlºÁ “þd³Fä5â„ÿüsû‘"ï;…ÑÄRÐËú®»uÚüÔzfh¼ŽíÝÖn+ÁãÓo_>ö%fê3eP'›½Df=[í3¶v§Ñ3×™žú,]ºÔtïÞÝè9‹àìè°… b@´+&}ʆ @€ Pb$vH¬SÈÆ`øK……”'Î);††Ù+±¦”|u$|ž2°ƒý¨²N ÑV¡ eCä²bÅ Ó¯_?+†”|à \A‰ÊË–-³ðÁ¢åªñ*á Ñ#H'ócç‘8mx+€h¬¾¼¬Îøkã91D¢Ýá‡NèÌÌò¨ýâ‹/ì÷;xYÏØIý;Øg­„;,{â'žÖƒÔëy÷ÜÒ­öYë<åÙ¸råJ£šò=ôÐC­·sö¥r' @ qíÇ»!@€ T y{,Z´(a"YÞšD–ðÁ$rþºÛM.Ë;Dbˆ&—å%"ÓIJÄ;y’ 6̆#Ì_MÊ'gM¶K`ö=ë$.Oê'1´#uyìJ‰ÎòhÒGÈËËëÌÌ϶Ø}«µÙÞ}÷]+48u¿þ/^¼Ø(ä°o—§ÑÉ>cmØGÿ"û9# Ñyúèîö#¡YÏX[=sõ ÑóD/­ <Ø®{—³‚É€ D» `‘€ @€@¥Ðd¥ÓíaY¸Ñ<±h³ïôlY¸p¡›õœÅ @…&€hWhâ”@€ @ Ä(ô¢¼•œiM _ëeäý—€Äß}§Y¸a·¹yîj³b[½õ‘·Ž¬GÅ­`Jפú|`´†3 FÓ†wE¬s@Џu£,ÜdüpƒB$6øá‡føðáMR¸“`'!Èy„J`þÕ7{ÚµêŠØUmŒ]UÂé™GÖ˜›ß\ón–جþ’×@($D»BÒ¦,@€ @%F@k+ù‚„­W…• ©÷O`î]°Þz„¨vîÚµk×äÖ S»`'ñC¢¦ø`¥E@Bª¼ß¾^k×ÓÚƒ®Ž=öØÒªhžk£Ð¶¾`'&z)Ba\±Ò! gÉï'ö5Ï-Ýfng›×®]kŸ±Zëƒ@c ÔÕ՛ŋ746î‡ʈ@ûö­ÌàÁ™ÿ®B´+£N䪫·î4/}ºÊ¬Ø´ÝÌ[¾>v{ç6ÕfT¿.fÜa=Ìèþ]cç3ÝÙ¾g¯yùÓZóQí¦¸üÓåsx·æÖŒK—Œë€ @€@h4gWŒía×§rÇlK‹€Âfj=&måÅ$Ó„MI‘Ç’„f™&Ùÿtr?·Z¥ù¼uÿpB?séóË­—¨ÄÖÚÚZÓ”D¥K—Æ<ìòöúã€Js´6Ôê”LçÖÍÍÕ¯¬´'6ºk×®¦ª ÏóRî·R¯[}ý>sæ™77î,õªR?@ Ç.¹d´Ñ'C´Ë„V™¤¿|ƒ¹qÖ»æÉ÷–%¯ñë —ŽèÞÑÜvæ83eDÿäiC®Ü=ûc3ýѯ3 ¹žî¢]:B\‡ @€@þ ÔÕÕyÈäý¡5ì°Ò' Pno¯Þa½—$‚ȃ©©¬¶nݺXIdVøP¬´ È£LBÕE³æ(ä½Ô”D»M›ö…Ä׫´;‹ÚYc{µ5g©±^Í ‘©zõ¢ï*}x|øáZ3|x~BN×Õí1ú`€@Ó#0þêŒh—1²Ò¾!S1í³u[Íiú§9cÔóÐM»V©ðÈ»nò³Ìœ%kJµƒ @€ÒpKJع5?Ó+¡}ÚWYÑNU’÷YS™PÞ²eK¬FvoÛg§´ ªieZ5?ÄìÙw Ú´´kœ›ÚIPß·oŸÍ¬mËCXs17X ’ËÐnÕÆ,j( Ñ® È‹^È]w½cþû¿ÿÍôîÝ>çuéÒ¥¹çžSÍ›o6xpæ¼2„JŽÀ½÷ÎϺNü*Ë]éÝxÓ¬÷̵O½“UÅä•7¹n–ù×USRÞùão"Ø¥$ÄE@€ ”' ;¿*ÏŠ7ÑZoÚu°¿9ä&CáÀ±¶®ØVÏZv1¥½S¿ï€©ß°ïJ»¶¹«¼´œÕÕÜwçØ–.õÞÿ‰Mé[º=’ßš)låüùµæùç?7Ó¦ÌKaòâË—'_^*L¦€@£ Ú5 _eÜ<{qm¨`7~POsö7·ë×¹–Îýb­]ë.>SÞsòÔûÙ„a.iÜVa7xýÓ¸s ¯ù_'Ë¿nO½yãóµ u‘'ßµ“›n_Ú›/® @€ ¼hÕªU,o…[\³c/ácDJwgÉæ=æ“»cl*¡1Õà-Z˜½{÷Ú¶?øáóû‰}cØ)]ê+§·6kÖ¬t+šãšùmݺgŸyyyѺvXiØ^¿ß<½ø W¯ÿeiלÚeK`öì†ð½¯¾º@ éÀÓ®Búü’‡ÿ•Ð’W~qª™08qãÑý»ZaNBßÄÿy6î>­Swþ7‡†É|lÞÁêuÓ…ÇiþpÖ7ÒªÌË&FS¡D;•A€ @¥AÀŸPþjÿsù‹_š?œx(Â]itOh-$Ø]þÒ £þrÖ”¼@ªªªÌ®]»lÓ%4ß¿ÜhR¹©› ª±*& •‰•M»¿2W¿²ÒÈËη¦$2ûí־„ŠÉ½ ÖÛýàuŽ‹K@/œ÷ì±õB‹[J/Ó•§™ b@´+&ý•-Ñη+O:&²G›<ߔ޷ًVû‡±}…Ït¦°›½:¦^ô;èå÷ùz~ø;~l!@€ Pjú´oc/AHÞ òº[°ng©U³IÖGÉSý‰þ†6mŠP:U·0Uͤ»…v[>wÍ_g$aÅ% aJÞu?4võëдÇì šƒí—/>¼ áFGq·ýýÚ*+¨ºgH·6M×#´¸½QøÒ]hLW²Bdb€ŠIÑ®˜ôsTvp¹‹Ç•QÎßÞ/.}0¿¸‹_¬ÝÖŽ$ìš;·z+?ð ¶€ @(uÓGw7§ 쫦; wúh&¬ðœ™˜ÈOìW…Èœ6¼‹‘8¤ñúÜ[cKNp– "oü¡íìØvÞy‰¹q& ‰Éo×î4o¯–X·=tm6yíjÌêYÅÐøûÝwú…uÕ˜u!Då™(1^ŸvUÍì÷}l¯†1«c,{zBœ_^¾-ÎkÙå(±îœaÍ”AbžîÛÊ& éZ+ï;D;Gƒ- Phˆv…&^¢åi:ͺ°jžÿÍÁfú£¯Ç]:íOÿ´ÇN,\_·;4Ÿ#ºw4Á5îâ2†@()ò4ÐGòHp^!ª¤/àI4‘ 2´kkëù4´Kk;á\R)¡Êhb~áÆ]vÂ~á†]Ö‹QÞJa&þSŽèdù†]çÜA5ä!#¡èåÛŒ¼º|Ô xºCBÒÈ­Í šj£ñ*ÏE,9 ÷˜–lÞmÇ«ó Þ!áCbÝ™GÖ<ƒt5î~?±¯}1â‰O7[ÁÙ= |Ow*­ž³vÌv­F MÄwFßwSûŒ]»+éZ—â:ePM\h踌8¨xÁИ®Áóç×yáuéÒÆb @ `í †º´ вF<òn<ý tj™ ›™¬•ÏN?%Ù%ÎC€ @%LÀyÞÉ»FÂÝËËêìĽ«²&™­§Øêî”PÖd¨›`–0"q¯©™Ø8N‚‡„_H ãá‡Çû&ŒPêsb&o}\¨Q[Ÿ»öŸ[ªÐwÛb™ÙñÚ©ÚŠÏòllª!`ÅFÜôY°v§¿NHŠÁòv$ÎiÌ* ©D%,sò¼»bl+:k¬*Ô¨¶>w=;ôq¦q>¨SµÙC/LèYÛªÉ ¥VœÛ¸Ë,Ù´ÇŽW§2±R˜g[ò³c¶v‡ÝËq¢ˆÖÂs¦ûÛµlnE‘ªæÍ¬w^U³feï™'›7¬çç†]V ’wR“H¤u×4nÅË }÷‡³rôÇ«úÌ7õ›ï=ªkºßy:ë… =_í3·m˲§ÔVûĆÝf{ý>;v5†ƒ\|Fn_ÿ¿4¼|ÒÎŒíݦɊ›ŽÛƒÞxã˃!{„È Â)@  í ‚9…´¯ŽŸ.ÄeXM¯ìÂÒèÜï)véÂã4çŽd®{z^h8L…ÛüéwŽ2§`ÂÖÌ‹eÄ @€òLàÀF,w4 êO.;ï…"³âÇÆÀ/Q¬VÄZçŸ=¸/!@‚HÃDsƒ`½IÅ2'VVZÈÏbñ,v¹AQDõ±!ù6î²!O_¿Í\;ùØP/8…мò‰¹q÷¨ÚwýøøP!î„#ûĵJùësƨqçƒé†õ®1cúw Í3îF @€ &OÀyçå„/¦(?&sA•<‚œ­óa"I0}²ã ˜¢t…\_,Y½8_Y\˜Mתƌ٠°§5Iýïƒ+ƒ-J@ÔИª·¼ñä•÷ýï)µfP@ ‚ Ú•HçNýÏG®‰„±ŸM—þÞŸ|ÛLüŸgãÎ=ùÞ2£Ì×ܹ¸ÄƘñƒz&äëÒôêØÆ<}éÉæ´?ýÓ²Û`^Ác—X!<Ý:xî[@€ @ù"Àäq¾È’o>Å”|”AžÈ%\¾d‘Ëz‘ÒˆÓå#¯dÓÕ™Ž[@ í A9¤ …¢\´&;³tžzò˜;÷¸#ìGE˳­n÷ÞX-†ôìhÒyÕŽsÉÃÿŠ;•.Ì¥ÄC™Âxþí­ÏÌOþ÷•ØýŸ¯ßÛg€ @€ @€@¾ dÓÕG!2åqWUÕÜb @ oíò†6uÆÍF÷ïš:QŽ®îѱQ9­ÞºÓ|¶î Àxƨ­K'ÑíÖ×ínT}¸€ @€ @€@TÙ„Æty7„È\fN:épwŠ- ¼`M»¼¡­œŒk·ìŒkL¦¢›<í|ëÖ¾Ú?d€ @€ @€@Þ¼ùæ—Ö[.ÛfÏ^–í­Ü@ #ˆvájš‰ÛW·Œk¸Öâ»è¡×lØÍ¸ …å¼êsã¼ì”dÚ7‡Rr@€ @€ @ ?+ºé~yÜa€òM€ð˜ù&\ù+¼æÝ;Æ…È|àõO>²ñƒzß{NžxöÂLi§Œèv‰s€ @€ @€rJÀ…·lL¦.Bd6†"÷BQ ÚE¡DsïO¾m&þϳ¡$’ ta‰8ïßÂNs€ @€ @€@Î äÊKNù Úå¼{È ‹y'I’ôt²±è{(ãí™ @9%€h—Sœd@€ @A—?þ¦yàõOƒ§#‡‰ ìz_ý·¸<ðDŠÃÁA–¯Ýj&þϳYÞmÌ£„zq*t ï‰4ýÑ×MÝ-;A 1$ØùžÄÙäörı¿›a>[·5–Ý]?>Þ„¥‹%`€ FhÖèÈ€ @€ ‚@c»dÙÖnÙ™pÉ­ÿ•pÈ€ÀÛËÖe:zR·Ö—‡Â¶bh,Ù‹V76‹Ðû}ÁN Vƒ @ ¿íòË—Ü!@€ @(#[wÕ—Qm©*ŒÙ´s7 @¨„Ç¬Ž¤€ @€Ê…À•'cti¹º£ûwMHÛ»S›„s £‰A ׎èÞÑü× GGÎ6Ù8Ôš`AÒ³cðÇh4…h=áÈ>‘óÑšva¦±ï{Ûe’gX~œƒ @ =D»ôŒH@€ ”1}°DÅârö˜A&LˆK¬aò3½:¶1‹~{–yá“•6QغwÉﮬ+ŒñüöçÑ}jr²Ž×”ýÍ+¿8Õ|\»ÙVXBIS]Ï®XÏžüŽ”ÄÜ‹ÕN‰k¹X{îÕ+¿oCbÊûT¢³ÆpSµbõeSåM»!4eˆvM¹÷i; @€ 2&0¸GG£r!0apo£r  —#Î=îˆr¨*u„ T Ö´«˜®¤!€ @€ @€ @åJÑ®\{ŽzC€ @€ @€ T ÂcVLWÒ@€ @M‹ÀüåÌÜ/ÖÚFb½¥í{öšEk¶ÆÊTÁ[W™£zÖ4z¾¦ÕsM·µ3ß_n¾Ü¼Ý(Ä:Œ‹×n5Ÿ®Ù+Së]c†ôèdúƒ@*«·î43ÞûÂ&ѳîô‘ò¾£žëŸ¬Ùl´Žž3}W†ôì˜÷²]yl!@Å$€h ß®]»Ø™5;öšžm[ÆŽÙ)mÛë÷Ç*XUUÛg€ @¨L7Îz×<ùÞ²Xãæ]35/♄ûæ|bn{þƒXYa;wýøxsþ73±‡s–Àiúg‰÷\wœ«ƒ¿½õ™ùí³ïšÏÖmMšå£˜k'›—ïLÒB¹PV$ØMôõXõŒûÙ„a±ã\íè…ˆ;_þÈ\ûÔ;)³¼ò¤cÌ'ƒàœ’!@ Ü 3ЃC‡™óeÃÛo±ì”,M»¿2o¯Þaë×¹sg£@€ @ ±îžý±rýãi;•£ÉíÉwÎ2ù0ƒ€ÆÞ·oi~ò¿¯¤ìT7 Þcnša4Æ1‹Àìŵ¦ýeI+Ø©~zq¢÷Õ3ºƒ T*D»@ÏŽ9Ò8/­™K¶Ä„ @2Kˆ@ý¾ææ7×me“&M*¡ÚQ@€ @ \ \õ¹q^&QÚ1gÉ+òÉsƒ@! H°“À¬1˜‰IlF¸Ë„isE@áb'þϳg§{Fƒ T"ÂczUZÓ¦M3÷Þ{¯~ýÚ*3mxWsÎP<·¨JâPv¿³Ú,X·ÓÖÇõ_ITŽJ@€ @eK@áÃÂaŽÔÓœýÃcízoÅFóÀëŸÆŽÝÎyyÅÌø“Ü![ä•€DâSïz.´ …Ð¥½½¦uÂf}ôe‚°'á®kì…V“M’€Dæ`¸X8¢{G3í[ƒíz¡:^¶±Î<½`y‚稼Dëî¼€pÄMrôÐh@•MÑ.¤Ï<óL³bÅ óÜsÏYáîÞëÍ‚µ;Íc{°Æ]¯b’'¤úÆ­e§õÿûß]ÂbÕr!@Q\uÕUæóÏ?·IO8áó³Ÿý,Ême“fþüùæÆo´õÕ‹5W_}µZµ9²'ZÇÖUæÜãŽ(x+ÿúæâQãáÿobh]®™<ʆp½ð¡WãÄ;­‰Ð\ð®ËK/}º*r¾Ãzט ƒ{GNŸ«„¿zò­¸¬$Ö=zÑ ¡ÏÌ[0렀 zåiÜçc½¸Šq@(0D»à ù«_ýÊ®‹öÈ#ØZ/íì§—š3‡Ô˜s†u6«A‚® §æ¬ÜnüpƒY²yO¬±h³ÑgRÿöæ”ÍØ^mSäÀ¥\X³c¯™ùÙóÜÒmF!1};å”SÌôéÓñ°ó¡°@9%pÓM7™yóæe•çÔ©ÿ?{÷Guæýÿà‹,Û’eY¾ÊW|·¹ÄÆŽc‹jYsY*!)ì†%oŠð²„Ê ›¤’lò’ÀR@™ä Ø@ €M±dq±œ 6y1 ëà`|llËòM²,Ù–eŒßúqFÝ==£žÑ\zf¾§j˜îž¾œþôñ îgžs.7W]uUVÛ² €€1-‡; ΠÁÞrïÎôΦœV†•7h§à¤2D(•%ÐÞYØñ W½ÓäþÁ¥ŸL°s+× h¾}ñ¾6« ­bd º:ñ^úAƒ·6F 1¤º Ê4Òƒ`Wô€XÁ¨EÝhz3HÈ\Š*W>ëÕVGo/¹8ë—¶ìòíæŠ3¦úæÓÍÌ[ïûøõû ÚùD˜É‡€Æ¨ó–Lºçœ?©Á˜—{¶~§ùA»¦JPÀ@~øáHµ¿õÖ[Í-·ÜbÆi}VB × .LÚå]wÝ•´ŒÙ ´‹`§î2ÒXw+V¬°¯ææf»¥²¾4¶š^5UýlæÝ¢q5fQãºÐŒ`\E]^ª+Ò5;ÚCuZÿì³Ï¶×‚`]Py@|¨‹G  €@nn]zº™ÒPi§Ó®¥éÀaßá”é÷À‹où–¥›ÑøgÞ¢ý«ãž^“R›V×}_¿àÔHÕV©…ÎúywïA_Ý~ûö£W”²u»oµà¾|2S2úñÀ³ÇGª¯Æ´+tñþ0Bÿ¾2ùŽ Ž×·½µûÇõ…>އ@_”rÿý÷güQ ± ×³Ï>k–-[Ö×j°=}x÷Ýwû¼và hç÷H;WSSc¾ô¥/Ù×ÚµkÍÊ•+Íš5kLWW—ÝÎf†}œ§Ê›;²ÚL¯¯6óÆ 6Ó‡WÛÀ^ÚƒTЇ Ðu¿:íûú=þ›c/ÅØ±cÍùçŸoÿg¤i  €¥" îL( €~+NíXo¯¼¿ÛWYeÍy3ç|2S§Ž¯õ¸YÁöß®".'éPÀNãlƱh<;oÉtÌHï¶L#Pª Ø]tÑEö¹r¶çpÉ%—˜Õ«W›%K–d» ¶«`o|ãÆÛÒ#~  ]–×DÝfê¥/YîôR Ïð´[eá­ÙÑa_æîMVe¦×2“† ²¼ª~ýl`/Ëj”Äfì²êâ²åȇ½èÜIMŸ>Ýv}©1ë4MA@ Êø¾÷Þ{#W¥±±1òº¬ˆ € € P¾év3f̰½½uÖY¦¶¶»G‚öövó‡?üÁ<òÈ#fóæÍ>˜·Þz‹ O„™(ê’UÙšÞ’É—]v™ùÕ¯~•Øü¦›nJL3‘‚v}tTö‚Jz©lذÁïÖ¯_o¶lÙ’ÏF,½Œi7.§ÏÔµ¦2ñÆÖ 0c‡V÷F îo³õ4÷â²ä˜ëè:n»¶Tвû\£Õ~Ò¤Ifîܹ6P§€(™ ÑÜX @ °£FbðÂ’s4@ ¤ =¾YIcQù¢ z É¢Ÿ0(yuIKA ”¾ûÝï†fØ=öØc&UàDÙt·ß~»Q°åæ›oݾ” ¨ki <óÌ3fÓ¦MFeýH™ñs= ÚåØTA'½\ùàƒlðN<ôôòfã¹õÔµ¦ |íqK’ß]`Ï}2oLϸꆳªÿIî£Ð÷¹ ƒ}ë4:fôJWl–Ü‘ãveɹ \DZã6c.ݶé>S—Êžs^zר@@@ N™Œ ¬·ÆŠ*ôøfÁ:0_yË¿øé¬Oúš3gf½-"@&cF÷¯€ÝUŸš\Ì<±ËpRe7nÜhfÎìýûwÁ‚楗^2?þ¸¹úê«c{žT¬ü¢´×òWÈß´ËŸ­Ý³²ÇôÒxl®(ç‚yÍÍÍF¯°¬<·¾{Oö>^à²ÛÜçQÞ=Ó|øá83hÐÍ€»¢lÒ§utîʘsA¹yóæY²èúÄÊÆ € € '‰õ5¾=/œ<*¶cCù*ÊLÅ (è¡qÁ\Qà­fÐ@7Ë;±hÞótUl̰Á|ÇÆê Q™| üà?HÚ}Ô€wCeä]pÁ‰.4½Ÿ1¥/@Ю×Ðò4&N°¨[Mõm¬ ž{---v½k¾/åĉjsìØÉvÇOìsÐN]ƒº±æ”%§Àœ[¦L:½( € € €@) Œ>ÔWÝW·íõÍ3ƒ@ÜN_ï Úmln3 &Œ[5©V`\?h·fK32T„€²ì¼ã€é¤—/_)Ã. ˆ. ÃTX†@y´‹ÙuT&šJX@ÏUUÝkª›MWRu¹©Ï]PÓÍ«¯vo5bÄ(3cF÷±\ Íí/ø®ÀœÖQ!Ôa@Âüð‡?4¯~ü?òiÓ¦™»îº+ÒwíÚe¾õ­o%~tÇwD“ïÅ_4Üü…^ðK¿êœ8q¢íIÀýà[!âŒúÁÿíoëÛ¿ö}á…f}óñЬ†¤;~õÚV³«í° >hN¹>@ À—Ï?Ù¨ºòÔ«[Ú9 Þc)ð•OÏ6¿üN¢n+^ßf–}brbž ÊQà©§žJ:­k®¹&iY¾¸{¯­[·šwß}7qÝ[~âŸ0‹-ÊøìßøFb_ÁûL݇ê>R÷”.)eá…欳Î2£/U ÛN÷ˆ‹/Ž|«}+Hê2u\ è-ÎÃ{¯¥~Þ}x§µ¿wÞyÇl߾ݵžö}ê©§šO~ò“}NÏúW­ZeÞ|óÍÄóí_F§œrJJÛx qß½w¯ÿi &_~ùåÞÓ±Óê)JZîµÕ:·Ýv[ÆmÇ=kxíµ×íCÊÖÉûÜCuúþ÷¿ïsÖç‹/W×; %Ç Úå´»SV› îéxÞéBŸc € €@átä-QƒvMMMæá‡önjÿXõ-Ìèè믿ÞlÞ¼9ðI÷¬÷¢«W¯Nycº±1F0ßpà I¿4Õúnß_ùÊW̽÷Þ›øáPª}±ȇÀ­KO7wÿæ/‰]ßðäó‹¿;/r—ƒG™ÿ\¿Õ\:oJämc .˜=Þ·…Úî_Ÿ6É,™Ùè[žnfݶ}¦ýhWFÛ¤ÛŸ!N@:oÐî’ŸüÚ4ýøªŒ~¡@ßì±Ã744ŸÅJàî»ïöÕçÖ[o-ȽΊ+ŒŽ•êÞÎ[©Ë.»ÌƒoÞσÓÞsRÐHcî)¨tÿý÷Ûý×w÷zJVÑ=ªw\´(Û©~¿øÅ/"¹½òÊ+‰{K×í¢Ü‹Î˜1Ã<ùä“‘‚„ò½ä’K‚§š4ïÎ]èz|ç;߉tnGòùîw¿k¼æî3½»ý«î>ø`Ò=ú7Þè]=iÚmü ,hçµÕúóçÏ÷]Ëà>ܼÎáÑG5éêâ­‡œn¹å_ðÍí+ø|îñµ¯}Ínç‚yÁg"ÚÞ+¬=÷_èù~…> ÇC@ˆ¯€n:Î;ï¼H7u: eâeR´ÿÆÆÆÄÈ©¶ÕÕ]t‘½éKµË@| \¹pºo×ÊbºèþçéŠ2òxñ-S{ÓÏÍÕÿ¶ÚîÒ­ÏgäB@Y —ÍŸâÛÕyÿúœm‹ §+/nj2—ÿŸß˜…?|Æh …8vr@¹ñ¶ÇqéŠÚ³Ö™ù­7 ôýø×¯§[ψ€²°‚å3ŸùLpQNç ¹îºël@)JÀNWC™Nêå%›¢ó<ãŒ3BvÞý­Y³ÆÌš5Ë8½ëÞ/øCUï6šVýúrõ^T^rÐú½•(»à>xËä<œkª€wÿª»îç•§â®qº€]°¾:_=;ˆr‚Ûj^?FÖöa;ïúÁöèý¬XÓdÚKžã"€ €1P7ÙÜtD= u £?ˆ£­«_FÍ*Œº_ÖCzP™ÁîÛ4î’3F×™sfŒ3ó'5$v³u»yvý6߸bú°íHWb&ȧÀ—}Ê×E¦Žuã“/Û—z '2uƒ«UxáIë'>d< Ô hûûó켇R NEmÖ›AªïÒçßÜn‚ãßµîônÎ4±P·‰Á¢nóYÊäÞË[ÊvÙÜ#vÞ{Q[]>†9Ü|óÍæ¥—^rÕKzW}•ݦ÷sÎ9Çv©ñßuëŠîuµ`Ió½u“©a%‚EǺöÚkíõÑg©|µÜ•_þò—F]Hª¼ýöÛæê«¯vô44E°¸gÁå™ÎþóŸÝDÝ_*ãÔYÉIcõ¹ ±w#eê|jj¢ým´Ö9~õ«_MÛuÜÕ3Šb‚vž@JJ@7aƒ4‡Dª›ÃÖòà¶ú£>ìfR7zúÕ¥~1—íòÁ_,ê׋úã8ì0:ìf%fÔÊW@™ Ïßt‘¹ùßÿè{)ê+S$¬ ¸¨Û³™ hœ°ÕÿtqÖÝ\Þ}ÅâLÉúôIி]l¦4ÔÚŒÐLw¤ ó?œÝxÈt[ÖG Ðmmm¾Cjl¶|Ý£ƒéÆìÒ½îµxà¤L9ý€S÷b™c‚½¥è8 ЃgÞ}g²Ý /¼`®ºê*ïæ‘¦SYè~ôšk®±ÝVzžšV4©²í~ÿûßÛã†Ý7» ¹{]e¾³æV­Z•2H¤ãƒš2 ôÉWËd PUÕ/UG•òüÜ9dû®û{¯§ö“jüzçtÓM7…ž‡ÆÃËæ¹Àc=–ÔVäv=ôCb‚vÙ^m¶C@Š(üÃ9]U4rª?ðÓmWìÏt3“êzW·Þ>wëyßµ_ýÊ/Õ¶úY7±^cÝeóǹ÷¸L#pâÄ Bp Añ,Ràî¡/Ÿc¾vî\óÓßoè5x§‡È—Ì›l4&žºØ,d©”kYˆóôv#YÈk˜‹c-™ÙhÚïÿ;óè7™û^x3©ËÖà1\¾|þÉæÒySŒÚ{¡J!®c¡Î%Ýq už#†T§«F¬?»aÉ)¶ Þó»¿„fÝ+¯®‹ Ö«PE×±PײPçÄq + ,ªÜsÏ=I‡R/)½e(¹û-o—º'ÓXd3gÎLÚgªÏ>ûlÊ GؽžÛO¦Ûi¼²LƒvaAAw|½ËHVÁ¦ªGšT÷ô©îk½ûuÓrT ÔT ËÂtëÿô§?u“ö]÷ÒÁ`¨wÕÿ¡‡2õõõ6«/xÞu 5-soѽ¾ê˜®¸óhiiñ=PÛtí4ÝöÞÏ‚?ö~¦ë±|ùr_°Úû»n¡§É´+´8ÇC@J@@¿†Ëô-ÊiE¹aTWqùc9Ê9±ô.ðêí—›WÞßmWT—‘¹ hÝqщqzÛבּuv %7ΜQÞ•J IDAT‹ê«àí2³¹Í¼ÝÜê¯î”Æz3kÌp3®nHÔ]²^ŒÀÒx[Ù´•ÞNK]Xnoí°«->yLÚÕ3ißÞ)ø¦@ˆ^›v·™¦¶Cæ­¦ÖÄ* JÎ[Ÿ³‡‰3Q4ï_²01Φ®¯Úp.Š÷{2Ê~3ißÞúé»SYwz­Û¶Ïìê¿·Ÿ\¿´Ëµ(ûC@<ž¹xOêFÆÛ•†b~î¹ç"ߤõvvºù‰òÇïìÙ³}» þ¡îû( ½r¨óžp&ûU Cc(õ¥h™³/ÇbÛâ 䢭¤ª}&ÙA¹hk rè¥ ²²­ïär € € Pl)Sü]Öz3‡rY·àøh™d—¹zÇAËgpÌ3Ê{”[¢ì'ê:Á ¯ööö´›®X±Âv¡™«k Nes-ÓV¸¾û£Ì™3Ç7e¦®Îß-r¥Œwß/ ë € €T†€ºOQßï Ћ2﮾új;0·ú—W_ï@@@ÔÁÀƒÖTFVK0£-Žu,DôcÝ(E÷ÄM?|ÍUÀ.Êq+eSNÉ}×Ï¥`GЮ®uD@ ( þÛÕý‡²îÔUDXÑ`ÍꪂÀ]˜Ë@@@n° #u¡ÇÒ[FYë\Ì:}ùË_6qÉF,¦C¾Ž}ðàÁ|í:Öû¥{ÌX_*‡ €O@YwzéW Ï<óŒQ Î[ôKBî^zé%ïb¦@@@>Ð"ƒEÝ)ê^+Ÿ%ØÅb”c555ùVKõ#NßJe8 Ä» Õ)ë×Ór%¨î,/^œ$£±êÔ{M¦%›k™é1ò½~6á`—¯\pA¾«‹ý“i‹Ë@%@@Ì ™á6nÜ8sà 7ý‘}ë­·ú*ªÀnV( € € €á äxËÃ?œó.2ƒÝpj؃L‹‚JÞ2bÄïlEL‡u]ªñ߃宻îò-Ò8x z>ôÐCöþYÁÚà+,ëÒ·“grq-Ãö[ÈeÁ¶óÖ[oe|ø×^{Í·MÐÅ÷aÍ´+£‹É© € €@ù ǘ۸qcÁOVƒëÆ$xÃI¦]Á/D@@˪ûÖ·¾•Ó3„”€JwPõ°â-K–,ñÎVÄt°ëÒླྀCŽa÷Ë_þÒ诹(aײ?ÜÍÅ9(ÛÐ[î»ï>ïl¯Ó:_·½eÑ¢EÞÙ²&hW¶—–C@r8õÔS}§óë_ÿÚ76£?ro¾ùæ°ú´ìk_ûšoûwß}×7Ï  € € €@€‚vÁàÙöZòÀ˜Ç¼çÆØ¬.ßcÌÏþó࢔óëÖ­Kêî±\º#|ê©§"0ƒ½Ë\rÉ%)ͼD ØEí"2¬KÕG}Ô{¸”Ózðo|#åùΚ5Ë·mXŸ¾²œ¹ð }[nÞ¼Ù¼øâ‹¾eéf‚ç«?aéöQªŸ´+Õ+G½@@ ¢¦M›æ;ß;î¸#åáZqÓ¦MæŒ3Î0Á_ÿùvâ™ÑÏéþ°÷¬jvîÜé5Áºù>d@@@ÀÜ}÷ÝI )µ(°ö™Ï|ÆŽ7ÌŠÓ>‚'Ý7F êþqáÂ…¾jh<»((ßF1Ñ}qcc£‘_º¢k¡à’·\yå•ÞÙ”Ó½í[÷Üçw^Êíƒ{¸ÑóºNéŠ>׸ójk/¼ðBèªêA'X2ÍÈ n6¯[0P­óïí´/µY¯·|ýë_÷Ζõ4A»²¾¼œ €”‹À_ÿõ_'Êç?ÿù¤›ý±­ ýz.x³‘´Ïõ/¯?ìu#£à]ª?¤õÇs𗆟øÄ'<{b@@@ ( l;‚EÁ ât¯•ª D|.¿ürXK÷ÃÌ[n¹%¸{{ÿ–êš:ž» n¨€_¹&Ã,\vZ0P¤ SXÖ›\4†·hßa;Ý[_wÝuì´ß`7Z¦ûü°v⽎®}´µµy«—vZ]µzÛž¦uœ¾–°@µÎAmÎ{sÐç×\s[­ìß”ýr‚ € €@h,Ý0xqúc\7Z®î3÷îÝ9³.‰þ°v\{o*Su›q饗¦ÛŸ!€ € €c~ñ‹_ØL(Xq(šwË 5j”ý(Ý=Þˆ#Üæ‰weÆýà?0Á€›»Çs÷ŽÚ ݾµTÁªÄÁJt"h‘ÎáÉ'ŸLy–ÊÀs×Ì­ä½?ײT÷ÐnýtïòW¶]p\7Ðr÷ê©ê_WW—r÷ÚÖ[7C/·O÷Ù‰'Rî#Ê T«=ÕËÛÖß|óMßóïþW¯^mÂ2½ë”Ó4A»rºšœ  €”µÀƒ>úë<ò¼Á4pç¶ 8Üòàûm·Ý\dço¿ývÓÚÚšø!¦w¥t÷Žn=u±©}”S ~õf±|ùò´Ke~i¬¼àõêm¿Áã¦s¾÷Þ{îéƒÇÐ6½Ý«§ðÚk¯ ݾ·}¦«kªÏÒµ÷°ó îGÏô#æJ*tYIW›sE@’Ъúƒ5JÑ/äššš"Ô¬®VtóIQ]ôË9  € € €@4î^zé%£ P6EYpíííiïõîºë®¬öÿØcm[neþüùöþ8Ø¥eªó”à 7Üêc»\×QÙiQ÷©{t]7õ’µ¸ ¯Ë€‹²ê£@_ºñuÿ0ʾ³YÇCpŒ¾Þö¥ç¯¾újE>s h×[ëàs@¨h‰'úÎ?¬ ß yžÑ×ú<Õ¼úc^Á4 JöGú´iÓBk¨uÕ×¾¶Mµom¨?œus©›Lvéºç­ @@@2PPH÷U õ”Ñ=šÖÓúÊ‚S ¤·¢ý뇜 ò¥û¦‚wÛ¦{÷>£Èô^;l¿™Z¸vÔ—c{T§¾ìKÛ{M2qõÖ#Wí@õY¸ðAK½`A£ùÙÏ.¶ÓQÿØvQ¥X@*V “`S&Hú¥_X6\&ûˆòÇh¶ÇÉv;oý£ÔÏ»¾›ÎűݾxG@@JA ß÷AÙÞŸ…Ùe[×BoVw-˶©ö§åú!noÆQÖIw }–ëg™Zôv޽Õ?ªU”ý¸u²5ÉÅõpuÈÕ;ÝcæJ’ý € € € € € € ¥A»,áØ @@@@@@\ ´Ë•$ûA@@@@@@ K‚vY± € € PNì*§Ó)ûsiéü°ìÏ1Ý 6:fºŽŸH· ŸÅL ÒÛìÆìŠPt-Gާû˜Ï@È›A»¼Ñ²c@@@ ÞC‡MTpmÓ¡Ä4ñX³£=QÉAƒ%¦Ë}Â×fwÑfKåzëGÔ^u® °—iýî#šK¥Ác*õ;¶„.UEÊV€ ]Ù^ZN @@H/ ˆ ‚¬ÙÑaVl9~>…À#oì7öuÚºèúURdôèщkpÏÚÝFw”x tt}d¾÷rS¢’ ‰éJ˜5j”=MeÞùÊ®J8å’?ǵ»™§7¶ÚóèׯŸ©¯¯/ùsâ¨tºººJ'àüKH ûç>%TaªŠ € €d"pâÄ £%\`„ fãÆöCAT–M¾2K‹.ðàú½æ‰ -‰zL:µ¢Ú÷!C̈#LKK‹Qäæßm7?>o‚™4¬*aÂD|tn[½ÃliíîRAf^+é;yüøñf÷îÝæ£>2«¶µ›ªþ»Ì-ŸkªúŸŸ EMú‹‚Ì®û݉'ÚlÉJj³ &(an¸Á,^¼ØžAmm­™9sf Ÿ U¯4‚v•vÅ9_@@@À# ˆJnß¾Ý.UànÍösË¢1fìО5™,¦€‚÷ü©9‘a§ºLŸ>ÝÔÔÔ³ZE9ö´iÓÌÑ£GÍ¡C‡l¦ÝuÏo5מ6Ò\1«ž@HQ®HøA•¹« ³2íT”:kÖ,£Ì¥J*´ç­G(p·ò½ƒöß±¾cçRI±>W˜Õ^u}\Q–dcc£›åJL`Á‚%Vcª‹@·A;Z € € PáʶӃômÛ¶Y u vå³ï™ÏMfƒ!ïŠ×@Ôý£$+CÇ]+eعn÷ÜòJy×asçÎ5o¿ý¶éèè°12zúVó¥SFØLQ²˜Š×”©¤ëáÆ°SMlæÌ™SQ]¹z¯ÀðáÃmànóæÍæÃ?´6ÊUÐîÚÓÞy± <­ ²ºÂ|bÃþDvª0fÌû=[àêp8@ A; € € €€Í&6l˜Ù²e‹9räˆQÆ^çO®5Ÿ›ZgŠT¤[ù^›QÕ[”Y§ ;A*¹(pwÚi§™¦¦&›%ª &eÊ,_·Ç<ñV‹ 8/›1œlÑ5Ùë»bÅæIã *SIÙ¼•–a¤WànÞ¼yö;öÀîñC×ï9lnþÝa3wdµY6½Þ~×pÊåg^ã‚®ØÒjáºÂÔ‘”©ïX]/  €Å hW uމ € €ÄP@¡ÓO?ÝŽ¿´cÇ›¢j*€¤×ˆêæì 5æü)µd†äáúéþª­íF™J ‚x‹$+ð¡ìJ€B 6KtÿþýöÙiÜ?½¦×²gµ[2F{Ür1¥ %µÕ5;ºÛlpŸúÀ”)SŒÆ±£t èß±25&ã|ø„Höí²]ભž=¡Ö~×ÀËmËQög÷ÿÏú2Au•ÇŽk4¡~@A@ Xü_¨Xò@@ˆ¡€\Ž7Îv½¸gÏÓÜÜlÇSU Ñ8UzÕTõ³™w‹ÆÕ˜ECl@/†§ë*ÉsmÓa³vW‡Í¨scy+­qÀt=¬«ôL%¯‹wZF3gδcÜíÚµË(x§Ì;¨ì;½& «ên³CÉõf0-Oe*P§@SXQ†’ ÚQÂ4–¨^{÷îµß±êæUE_îGšWvó¢Û«Ú/%3yª½®m:dßÕÝp°(@§®†ÕfT¥ € Pl‚vž@@ˆ¡€d*‹I/AöíÛg³C\U`ò>\V“ºx›^_mßç6 6d‰8­î‡ñö±Ž-­ÊªéLêFЭ­à\}}½9r¤}°ï–óž^@]êÖnòäÉ6¢ ³ëêU[*ËF/_¥¢öªvª6«Œ<½(= p(H§×ú݇گ·Áž5«NA(˜D¥DP°H/íô‰ÖÖÖD†³ö`Nw‘«J¨½Î©Ww›Uö3¥G@mUítKËÇï­G{> L)¨¬ïXù󃈳 €EàÿîEåçà € € €@üÔý ^~ø¡}¨¬®Ý4&“ËfÒè¿^ ä¹â!zȬ,ö*¡‹B†”E×ðèL>œKØ»‚¤uuuÖY;"‡)E[¦lpVÐNAgµÙC‡üãvwIèÏ›7zH"øUÁµåE@@@ _ýòµcö‹ €ùX°`\þÂ@ bªªú{7räûª˜çD@@@ˆ™v1¸T@l®¿~9óÌ ¦«ëx¶»`;(?üa»yôÑ×óRSêÔ-¦{iž‚ € € €… hWxsŽˆ €@NN;mLN÷ÇÎ@ ~MMí9¯”‚tK—N³ÁºrÔ8qÂèEA@@@ Îíâ|u¨ € €9P îÜs'›%KN6µµU9Ü3»B@@@¾ ´ë« Û#€ € c³Îšh.¼p*º_#ª† € € €€ÚÑ@@(3 ÍÒ¥ ÔM1 CÊìì8@@@ÊS€ ]y^WÎ @@ ÂÔUØçt@@@ÊN€ ]Ù]RN@@ RfÎl0_<Ó|ö³ÓȨ«”‹Îy"€ € €”­A»²½´œ € €@9 ¸@º¾ll¬-ÇSäœ@@@ˆ¥@SS»Y¶ìÉHu[·®É,\ø`Êukk«ÌêÕ×ú>'hçã`@@ø h\º[n9ÓŽQG .~ׇ!€ € €T†€îÉO;mŒyãÝ}>á%KNNÚA»$ € € /³Îšhô¢ € € € €@qÎ=wrN‚v^85éDú%-a € € € € € €$ ,]:-iY¦ Ô5æÂ…I›¸¬À€ IDAT´K"a € € € € € €Éê"sÊ”áÉd°D]cVUõOÚ‚ ] @@@@@@èk¶]Xט:A»po–"€ € € € € € $З ]ª®1u‚vIÔ,@@@@@@@ \@ÝcfÛEfª®1u$‚váÞ,E@@@@@@ T Ûl»T]cê íB©Yˆ € € € € € €@¸@6A»t]cê(í­YŠ € € € € € €@¨€ºÇll¬ ý,ÕÂ3Ïœhªªú§ú˜ ]J>@@@@@@@ …@¦ÙvK–LI±§îÅdÚ¥åáC@@@@@@’Î=wròÂK”aGÐ.‹@@@@@@ÈVà´ÓÆDî"S»t]cªdÚe{%Ø@@@@@@ ¢¢v‘Ù[– ÚUtSâä@@@@@@²ˆÒEf”®1u|‚vÙ^¶C@@@@@¨h(]dFéSˆí*º)qò € € € € € €}8ó̉i7Ò5¦v@Ð.-#"€ € € € € € Z`éÒ©)?ŒÚ5¦v0 å^ø@@('N½( € € €äC`Á‚FÓÐ0Äìß8i÷g5Ñ(p¥iE‰u@@@@@@H!ª ÌsÏœb‹äÅí’MX‚ € € € € € €@d°.2»»Æ<9ò>ÚE¦bE@@@@@@’\™ÞO.l4µµUÞEi§ Ú¥åáC@@@@@@zÐøuÞrá…S½³½N´ë•ˆ@@@@@@H/à ÒeÚ5¦öLÐ.½/Ÿ"€ € € € € € Ы€·;Lït¯~¼A»¨R¬‡ € € € € € €@ ov7ë.ÅêI‹ Ú%‘°@@@@@@̬óï2ÙÀLVf]@@@ )päÈsìØ1{H½wvv&þÃ?4‡NZÞÛ‚!C†˜’o‹«««ÍÀíæz }¯ºåýúõ3555ÁU˜G@ ¤Ô-æ’%SLmmUÆç‘|w’ñ.Ø@@@ÌŽ=jôRN:”nè¡ñG}”Ù³X;ӇР„( ¢â‚yƒ 2z0M $‹ Pb›¸vÙÞÞnkîÚÚjª@G®O±­­-£]*àìÐ.H­‰Ú2’Œ(Kne‹õ½ê~ìÐÕÕ•øÑƒûþãI©]*§2lØ0ÛVÕvÝwnëL@ (Ëîý¯ÏGš'h‰‰•@@@2pÁ 9ÜcoVG¦û+öúzî5©êâ"Ê|r‚#©´â·\Á àLvÙpÞ€rüjœ¾F:—EÖv] ÚeD)ûIíUÁhJi¨½ê徟Ü÷niÔ>¹–ÞxX›uh„Öw-?šHvd  €@ñ²É²S­ ÚÿÚQ@@@ ,Ô ¸*H§@‡ dsrsGV›ª³-& «2#wß¾Ž:ÐŒ­I}+;oô´‡ë:~ÂlØßÝ\ØŠì2-Gº»3l>Ôeš;>´«u;n¶´ ÛÄ·Ì=pfD)¢—.ëE—›>¶¢Ì¸à†Ú«^j¿ *gSjªú™éÃ{ºœ7¦§Ú¶Üÿ¤ÐÝŽ¨`Ô¾Ó•æCÇŒ^©ÊúÝ=íyþ#Fm\ŶåÎîö›j[è ~îÎ ˆ¸¶«e”â ¸ ²¾gÔ^ÂZQkh¿W«»¿KG îo& ëÔÚ¶\Ÿ:h«v®uÒ• û:MWŠli}¿ªmªttù¿W×ïé½›cˆž»¾SÈS{Õ»²ô( €”¢@ê;R<êŒ € € P0=8nmmµ:M+¥L¯djö7ÝÁŒ~fÞ˜Á6@§ù|—ªþ'™t½tŸ¹º¹ÀŸ}ß×™xðœ.°'½\qã8éÁr]]}Ðì>ã=? P8pÀ¾ðÐ+jQ»PÛ™;r° X¨ G ¸Eݺõl zh÷‹aëEi³.ð§ ´¦?8xÔ§Söd¥À³7ø¬ì;D\›%/ìjäv™Tºj·™•]ÛÔÆ­²aæ¢Ürq¹ø.w?û®[|Ö.\`Ú[WÌÛ¿b± 8»6ëºÝL¬À!_ýês!KY„å(pñÅ3ÌßüͬØA»Ø]*„ € €ÄS@ò¤kii±šOWôàV‰èÐÃcofGºíâþ™7ð·h\÷wÞ:wt}d¶è´YzzЬi=xö8•)¢×Ž;lWšÃ‡7õõõöÅÃe¯VöÓz¯öª ‡7h¶G”kl”%gËUýÌ܆Á6X¶~)-KþF‡×ÚïÔNm›m=š”ÝçÆAsAe6)à7µÎ¶[‚AÌçå©×—掰AfµUðôîŠ2rõÒ8b“'O¦ëLãy—Ú¬²½E:}Ç*°¬wÍS²èñ¬±;Q7°«¶µ›UÛM«èÿsMMMF£ 6ëG äQ@(–™vÅ’ç¸ € € 72í²£U¶Ç{ï½—ô Y—Ín³=²Û3[e*  ÈŠÍlvˆwÛšš3uêT£ñÄ(Æ>lß¹s§ñfÖ)ësÙôá¶Í¨+L+ÑØb+¶°m6˜Í¤ ÈøñãÉ5ƨëÖwß}ר›FoQ ôŠY#Ìù“k Ôyaò8­¬Ñ§7¶Ø žw,Q7o¿ý¶™3gNEî°Û´iS"#Tæ[±]`ñRqhcìwÆõóF™+f×›;ÿØœÈnV7Ñú®UÖ@B ô+äÁ8 € € €@¼”ã ØéáåO–N&`¯ËdH}è¢)æŠYõ‰š)p§Ì»J+:o°SPSíUíVÙ^”ø(ª6«Œ&=è \ŧ¦ù­‰2·¼ç­.0qñÉìòËžñÞHýñylöËÔÝ·oŸí‚7ã± €}à/Ú>à±) € € PêÞ€2?ô O=H¾qÁhÛe©«áûï¿ï&+â]Á:šUôýÞ¿šhšqò%x’“†U™{/˜”Üéú)ƒ©’Šºuc.*`÷íO7`ŽqP·ÐÊnv;e¢+ðJA@ Pí %Íq@@@˜ ´··'2–ԥݲéÃcVCª&píi ‰@•‚ a«•å²]»v%ÎKAfÆ[LpÄvB T¹²wï^7Yï.HY_=ÀfqUÄI—øI.74‘Õ¬€kKKK‰ŸÕG(%‚v¥tµ¨+ € € CïƒÈÑCæpÏì*ߣ‡ô\/—U)åÀ‰SÕƒuJi(ãnPÿ“le5¾]¥Õ]–ݰª~‰ì­J9ÿR>Ï9 ƒÕwÙ½‰L € G‚vyÄe× € € €@œúõë¹%lî û¯8_«`Ýv¶÷\¯“Nê†×)Çyï¹ni=ZާX–çÔÒù¡éúèDYž[º“ò~Ç8z<ݪ|3-z2˜½ß;1«&ÕA(CexNœ € € à8q¢òûRÌTUU%>Y»ëi>tŒî"ñذ¯Ólníy \]]m*¥÷ïß?1¾Ô#oì3?>oB|/5K<ò—ýÆ} +U)í5`Œi;zܬÚÖn4®%Þ]™g7õdõ4¨"Ûl¼¯µCÊW çg•å{Žœ € € €@ˆ€7 äÃN˜›·Ý|p°+dMÅE@»Û^Ü—ê¼öt ª@ó¯ì2]Ç ÊüBdpÀ×ï5+¶ô@x­Ô¢öºfGG¥ž~Iœ·²Bo~áãÍŒ0€œ‡’¸xT(‚ver!9 @@@ ¯Ê´»îù­æ‰ -}ÝÛçA@×E“•RéÅuºò½ƒ¶Í*˜I‰—€‚·­Þ‘ô}RÉ] *ÀüÏÿw§ 6óï8^íUµÑ®ûÿ·ºÞßµ¡F €@% ´«¤«Í¹"€ € €)Æ×vg0顲2c¼[¿çpеY\Hû ùù­öº¸¬²©Ã² ±;ÖðꦪwèNÙ¡ÿø›mæžµ»E”â ¨>òÆ~óåçÞ·AW›IÃ*»ÍN¯ï9›åãÍ@tN¼^@ß! ¦*Èì¾CF !»®ðW‚#"€H€ í@@@ÀüÏOŽ1W̪OH(Ó@Ýeê¥q˜(…PŽì+¥ÉÞÌ/Ía.ŸÙs­ [³xmPÿ“ÌCM1Þ@ˆ W>ûž ÞÑÍkᯓÌð×5Ðxƒ.“lDõ;öàˆêÊíSWãì µæ›‹Ç%‚Í )Ð,/µ]çUø+W¹GTW¥ß{¹ÉP½Ý–.7Ô|ÿœñ• Ù#€U€Ÿ•Ÿƒ#€ € €ñÐï$sã‚ÑfQãPûà݉”m§×òuÌù“kÍç¦Öù%ñ¨}ùÔBYu«¶´R—UçÎnÞè!æúy£ÌÜ‘Õ6 ç–Wêû¤aUæ'K'ÛîŸÞØbƒ2SD/9©½ž=¡Æ(pDɽ€M«>8hV¾×f‚]”*rÙôáæÚÓFššª~扷èv÷sS‡Ùv©à¦ ©[bï–¯Ûc¿cÜS›¥äG@Á囨ïX—UçŽ4vè@û«ÿ×éºP@(†µCc"€ € €1P†^Ê®S¶ŒËXÒÃͧ7¶Ú—lÚõ‡òp9×QºµM‡ìCü°Å >}inÖ!Ö ]{ZƒÍU{]ù~["cIA$½‘¡ †(ØGÉ^@ßk››µ»:l› —uMôP°Nß¿€Úß¿œ3Þ¶MµYýûW‘£²kõR³û»¸Æþ»×<%{ýÅý Âý Å»7µÓ+f×Û ³ëv×û9Ó €R€ ]!µ9 € € P"zè®—‚+¶´ú2¿XrÙLzÀ© °¹#ÛÀÈ܆Áös‰œfÁ«©€‡w”ŽØ,Æ`ÐC•reÓë­kÁ+ZbTPC™¢ÊDTÀYmVÆ®t{wÚ,R= Ÿ7f°™^_mÔ^У¤P{í~uÚöôÐÖr]6c¸Q6™©=Ý'jw?>o‚ÍèRæ—‚u.óËf0nk·mYëk]}ÏÚ6;²š`¨C y×÷é†ýGì¿ÿ-­fýî# ×àê Œ*Wÿ¯£ € ‚vq¹Ô@@ˆ¡€Ï9ÎÜxÆ›U£nðÔ]¦+z@j3Å>ÎÑr=¼×vî³#•˜½ Ð)ØáÊ;¿à»ºÅsad×uzŸW;SÐH/e‰ª½*ˆçÍ`ÔôÊ÷ÔõÝÁÄm{®¶®`Þ ŠíÖ Ó´÷ßzË3¡àœÚìùSjmPÉó“ô}©`³^ê2SÝãêÝÌwAg·K}7L^mæÑ&ô];¨"¥ÁÚ¬ËwVÁwY¹nseOA@ níâvE¨ € € C=$vÁž”½ îñÔM^0¥ ˆ^ –¸¢‡ûênlÍ3vh•1¸¿}ð\ÊÁ)»ëÕrä¸Ù°ïˆé8¦÷ž,/gö®‡Æ®«Q½Wbp3Ì%ËÔÞ\0Ä Ô ©ëŽÐ{ Q©+z¸_3°¿ ŠTõïg³óªúõ³ónR|—…Ú¨þ wt·Ytîßl”óQHeµWQr'д¯±;L]{ ¢”‰§Ï½AU}w¸Lgý`B߯ö;wèÀ’ÎÌs9}¿Ê¡ùP—iîøÐl9Йè 7ÝÐÿ_ìw츳¨qHE7Óùð €@üÚÅïšP#@@@ Ö À¹î3UQ=ðW @+üØÄ—%¢uܲÁ½=᧦@€"Ýš»6›ÄP÷p…*Þ‡á:'—õòÁÁ£68×õÑG‘s®Îö|>ÎŽQ°CçL7‚N'¿ïòÖëŠYõö@º¦zè¿¥å¨íJOí7XܲD[x£g w-µDû­©êo?TVi¨¶3yþ ë0 踺k~ýîîÌØLs®º útÛ) ±»‹F÷ïùP®;Ð4ÔÄ^ÓÝ]=ªËGµ_û}ê©‚¾£\[U¦^°èZªMÚàÞÈÁ‰íwoÿ“ì¼²÷Ô¶ QÔF ^ºLo7@R hWJW‹º"€ € €1°Š¡m7y®zʈÐCY÷€Ùf¢u~è>Nz×CèžÒ“¡×³,y*WA‘Þê–|äÞ—¸ Ž®ËE÷îV¨5ìƒýÀXv x¸6« Bº€ËtR}] $JÝm€o`w€/Êú©ÖÉ䘩ö\®ö©Ëj¯vÌ¿p‚uaÞ/ ïýhÁûÃíÔ^õÝ©L¸yÿ–=súžÓK%,Û´gÍž)—½×³$»)oP9»=„o%Ém€Žñ)ÑXŠ PríJî’Qa@@@ þ 襌hÖ2oðÎ_¤å]Çý]Q¦Ë€²;Ïò?Þ̽t]ÈÙÇÕÜNgÉ\R›©Øì¦ÑáÕV`D]¥ªØö{¤»»?Ör©ó¬ófî¹ñ"uLì(RײƒÿMÀa½Yy®2®›M7¯q ]qiÍ{¿‹Ýç¹xfîéÿ/RÅä4íý.ÎÅqÙ €¥$À]F)]-êŠ € €”±€Úê•‹â ¦h<Î…*û ¸ ´–‡I‚ë§šS´^!ÇKU/–——€ëfÓU_Úl0°§1I½ÿÜ1xG@ÌÚeæÅÚ € € €% ÀÃã¸HT1! ¦$>`˜ äòG1=Eª… €@QºsЋrhŠ € € € € € € hG;@@@@@@@ ÈíŠ|8< € € € € € €íh € € € € € €Y€ ]‘/‡G@@@@@@€ m@@@@@@" ´+òàð € € € € € €´£ € € € € € € Pd‚vE¾@@@@@@‚v´@@@@@@Š,@ЮÈ€Ã#€ € € € € € @ÐŽ6€ € € € € € €@‘ùø@@@¼ t=fþsýVÓv¤Ëëòù'›quCò~\€@¶ë¶í3¯¼¿Ûn>±¾Æ,ûÄälwÅvD`ÅëÛÌöÖ{¬Å'1 &,Èq9 €å$@Юœ®&ç‚ € € *°±¹Í\ýo«Ÿ½ðÎNóÌÿXš˜g¸ üàù?›_½¶5Q­Wo¿œ HBƒ‰8 \ò“_ûªuâg×ûæ™A@ÞÚõnÄ € € €}P–ۣܔõnXrJè¶»Ú›g^{ß~FVG( ³xqS“y«©5«­ÓµEoöÙžYñ²Q Çÿ{s"“8Å*)§j‹Þ e²=Sòñ €9 h—SNv† € € ¸ùßÿh~ùàâÈóaAìo{Ü·2‘|Ìd)°iw›9ï_ŸËrkc.›?%4‹S]z3‘n|òeÓ~ÿß™šA³>" ì¼™ÄÙ¨„ý8âŒyÆlÞÓ–ØÝò/~DzmÆ IDATÚ„­—X @è³@¿>ï € € € F /»T»m:p8é#7þWÒ,@ µ[÷d°vôUÝX_Þ-Ôm+¾ ¼¸qW_wº½7`§Ô­0@ò+@Ð.¿¾ì@@@JH íHW Õ–ª"`LËáN@@ Lè³L.$§ € €”ŠÀ­KO7Sj#WwÁä‘Ië6’´LÝhRȵÀŒÑuæëœy·©Ú¡Æ –Ycë‚‹˜G Ïê¢õ‚Ùã#ïGcÚ…µ}o¶]&û ÛË@@ w‚v½± € € €@®\8Ý„â29ĸº!fã÷¾`~ûö»Yظw™ìuH%pêøúœŒãµì“ÍêºØ¼ÕÔj¥@ ãÙ¥Rgy_\ËÅØs¿¿õol—˜Ê>UÐYm˜‚ €ù h—__öŽ € € '™cêŒ^JE`ÉÌF£RÐ#®úÔŒR¨*uD@ lÓ®l.%'‚ € € € € € PªíJõÊQo@@@@@@² {̲¹”œ € € PYë¶í3¯¼¿Ûžt!Æ[ê8zÌllnKS®\e挭ïó}•uå*÷lW¼¾Íloí°…‡qÓî6óNóÄ1uàSëͬ1ú>¤ N`WÛaóÌkïÛUô]wé¼)y‡Qßëo7·£çŠþ­Ì[—÷c»ãñŽ €@1ÚSŸc#€ € €d-ðƒçÿl~õÚÖÄö¯Þ~y^‚g |ükÞ6wÿæ/‰c…M,ÿâ§Í5gÎäÁrˬÀ%?ùµOâÄÏ®÷ÍçjæñÿÞl¾÷ÜŸÍæ=m)wyÙü)掋ÎÈË¿™”僒PÀîÆ'_NÔYßq7,9%1Ÿ« ý âþUoš;þóOiwyëÒÓÍ-u:ç´J|ˆ PêíJý R@@@¼ <ðâ[¾‡Ö餇ÛOýé]óð—Ï53ÇÔ¥[•ÏÈ‹€Ì_ùÅïÍš-ͽî_o½òˆéµ¬€€1æÅMMæ¼}.’…~8¡×êºØ,™ÙiVB@ ÔÓ®Ô®õE@@(ˆÀ7þã•È;W!Kf}ûß2G(R@;µ½(;o½lVpš‚@¡Ô]lÔ€·nÚFÝhR@(G2íÊñªrN € € €}P÷‚aÝaž=}¬¹ò“Óû~íƒýæá—ßIÌ»‰/ÿ|µyæ,u³¼#W‰/^¾2ôêRpJC­ýLã„=ÿæö¤Àžw…c/´‚,¬H™ƒÝÅ bÆè:síY3íx¡šßº¿Ý<»~[RW¯ øŒi¿ÿï莸"['”·A»ò¾¾œ € € ;§^Ýb^yw¤z]8gBÁ»šTäê[í«ŸÆÿzà‹g‡Ž¥ôЗϱ™JÞ±ŸÔí 2ALéÛ3¥'ðæÎÖÈ™huƒ«ÌUŸšQð“|ô›’‚ýýy¡u¹ý¢ù&¬MI ¹à—./|á‘÷{Jc}Qºšüæ¯þÛWGëž¼î‚ÐïÌ»þvqh7šj÷ùcÏW1f@(°A»ƒs8@@@ ÒÂ2ØR™|åÓ³‚b…,zì-Qê Ç Øxƒ} N´óJ–æôæ=mu“ºhÊ肚döŒ¥¼ñ{_H[¹øüM™3þå™D°Oæ]m‡CÓ¥yå*·Ön¼Â¨'~v}ÔUs²ž~Р:º¢€ÝŸÿùò´YsÃNíZ]Àºrß o´s¼#€”cÚ•Í¥äD@@@òh9ÜYð“Òƒ`o¹÷ gzgSN3¬2 N¦Ü)”œ@{gaÇ3\õN“Ïè—~2mÀέ\3h ùöÅg¸YûžI†–oCfÈ@@?hð–¯þLÚ€[WÁfýˆÂÔ•5JA@ œÚ•ÓÕä\@@@21¤º g¤L#=vEˆ܈ZÔ¦·h”ʨ­ŽÞ^r!óÒ–]¾Ý\qÆTß|º™9cë}¿¾c¿ožò! 1ê¼EYtQËüI ¾Ußi>à›g@R {ÌR¿‚Ô@@(1[—žn¦4ÔFªµÆ´+di:à²)Óïߊ\æ-Ú߸º!ÞEL—˜€ºîûú§FªµºHU6P!Ë»{ú÷Û·w½¢”­ûÛ}«÷åû™’Ð.˜=>R}5¦]¡‹÷‡ú÷•Éwl0t{kG¡«Ïñ@È«A»¼ò²s@@@ À• §Çv¬·WÞßí«n¦cCù6f¦,N_ëq³¼cƒ <8¾]Y\N"#ì4Îf‹Æ³ó–LÇŒônË4 €å(@÷˜åxU9'@@@@@@’ Ó®¤.•E@@LNœ8aô¢$ à’l’ë%…ߌ6žë«V¹û+Ô’•òÝS)çYÌ1ê’¶…kYeŽ ‚v´@@@Rd26Tp+ªÐã›ëÀ|å ,ÿâ§³>ékΜ™õ¶lˆ@6™ŒÜ¿vW}jFp1ó €”´A»’¾|T@@@ —ëk|»[8yTldžòU”™ŠPÐCス¢À[Í n–wb%Ð8|ˆ¯>c† æ;Ö'  €@¥ 0¦]¥·Î@@@ !0~øÐÄ´&^ݶ×7Ï q8u|½¯J›{x¾˜A ãêüA»5[šcP+ª€ €@|ÚÅçZP@@@" ,˜<ÒWƒ_½¶Õìj;ì[Æ q¸|þɾê<õêß<3ÄMà+Ÿží«ÒŠ×·ùæ™A@ ’ÚUòÕçÜ@@@’n]zºoÙ O®1Gù–¥›Ñºÿ÷挶I·?>C À³Çû>¾û71/njò-ëmfݶ}oÓÛ>ùTË>1Ù÷Ñ%?ùuÆ?ŽP oÓn²J}Ì €”…A»²¸Œœ € € €@®®\8Ý·+eÛ]tÿóFtEy¼ø–©½éçæê[mþsýÖt«ó9Pwƒ—ÍŸâÛ×yÿúœm‹½›Ü»üÿüÆ,üá3FÛP(„Àù³“ÓxÛ㦷Œ;µg­3ó[ÿnèûñ¯_OÚ @(u¥~Ô@@@\ ¨‹LußöðËï$v«q—ؘ1ºÎœ3cœ™?©!ñÙÖýíæÙõÛÌæ=þ¬¶#]‰u˜@ Ÿw^ö)£à²·ÜøäËF/ôNeêW%>~áIë'>d< Ô hûûó켇R NEmÖ›AªïÒçßÜn‚ãßµîônÎ4 €e!@Ю,.#' € € K{¿p¦Ù¸û@ÒCbælpîåÞ6±¾¦÷•XÌSgžýÇÏÚì£àîÌ ô‚ëh^i …¸êS3Ìë;öuç,QÛì´Q›2 €@É Ð=fÉ_BN@@@ ×Êyþ¦‹lÆ]6ûV¦HXpÙì‹mˆ" qÂVÿÓÅQV ]çî+‡.g!ù¸ëo›å_ütV»WùΞ“Õ¶l„ €@œÈ´‹óÕ¡n € € €EPàî¡/Ÿc¾vî\óÓßoðu—V)=D¾dÞd£1ñÔÅ&¥4¼ÝH–Ú,™ÙhÚïÿ;óè7™û^x3©ËÖàù(¸|ùü“Í¥ó¦µwJi ŒR]š7ÆÜ°äÛïùÝ_B³î‚'¦®‹ Ö‹‚ €@9 ´+Ç«Ê9!€ € €1xõöËÍ+ïï¶5R—‘¹ hÝqщqzÛבּuv %7ΜQ‹ê«àí2³¹Í¼ÝÜjÜ~´Sëͬ1Ã͸º!QwÉz1PKãm¹kœI[éí´Ô…åöÖ»Úâ“Ǥ]=“öíÝ‘‚o „èµiw›ij;dÞjjM¬¢ äœ±õ9ûw˜Ø1Eøþ% ãlêúª ç¢xÛ~”ýfÒ¾½õÓw§²îôZ·mŸÙyàPâ߉ÖÓ÷ûì±Ãº¥ € PîíÊý s~ € € €@‘ôÊU Î{*™ìW ¡Ô—¢}dr̾‹m‹'‹¶’ªö™d墭)È¡—2ð(å+  —‚´¹.™î7“öª®¹h÷©öÍr@JA€1íJá*QG@@@@@@² hWÖ——“C@@@@@(‚v¥p•¨# € € € € € €@Y ´+ëËËÉ!€ € € € € €”‚A»R¸JÔ@@@@@@ ¬Ú•õååä@@@@@@JA€ ])\%êˆ € € € € € PÖíÊúòrr € € € € € €¥ @Ю®uD@@@@@(k‚ve}y99@@@@@@R hW W‰:"€ € € € € €”µA»²¾¼œ € € € € € €@)´+…«D@@@@@@ÊZ`@YŸ'‡ € €/pâÄ £r —ë•-Ïó¢½–çu-ç³¢Íæöêò7Yn=Ù”Ÿ™våwM9#@@@@@@ hWbŒê"€ € € € € €”ŸA»ò»¦œ € € € € € €@‰ ´+± Fu@@@@@@ÊO€ ]ù]SÎ@@@@@@ ÄÚ•Ø£º € € €@®N:é¤Ä®¶´ML3æCÇâ_É<ÔеYG×Gy8»Ì—@%¶Y×^eº¥µ3_´ì7•Ø^óÀÈ.@² h—› € € €å P]]8õ»'¦™ˆ¿Àší‰J4(1]îƒNœâú=´ÙFÌ'ô£©¤ö:pà@Ó¿{u6ìë4]ÇOÄüJQ='°f{‡›4•Ôf'Í €@ÑÚž#€ € €Å2dˆÑCe•5;:ÌŠ-Š[!ŽIàÁõ{ËŒÔ5¬ªªŠ´]9¬TWW—8{ÖîN‚ ™ˆ€2"¿÷rS¢^ÇOLW„k³-š;_ÙU §\òç¸v×!óôÆV{ýúõ3Æ +ùsâ@JG€ ]é\+jŠ € €ä\`òäɉ}*¢€%žÊÒY¾nybCK¢‚S¦LILW„ž» ‚ ÿøë wñ½òì27¿ðÑ»Š‚ÌcÆŒ‰o…óP³‰'~TVmk7·­ÞA×®ypÎÕ.W¾wÐüóÿÝ™ØÝøñã?nI,d@< ´Ë#.»F@@â. Hccc¢š ýão¶%2¹0QTeÖ麸ìUæä“O¶A¢V¬÷ž·w7ÿn» 63Æ].FšC*s÷ºç·&¾K”:}úô4[”çGÊfÖy»À²¸¾üÜû6»¹<ϸ4ÏJß%ÊU6¤ëÆ´¡¡¡â‚Ì¥yõ¨5 P^Êët8@@@L´Óå;vØM5ö’¶nê0síi#ÍØ¡Ý]hfº_Öﻀ2”ycŸÍÐq{ÓµR†¤(WbÑa³fÍ2›6m2‡² 6+CæK§Œ0˦7UýOªDšXœ³²ÉÔf]v*¥ñ3gΜYQ]¹z/†2D¸{÷ÝwÍñãÇDÊæš7zˆ¹öôûî]Ÿé èZ¬ØÜfžØ°?¬ÓÑGe¿g WŽ„ €@·A;Z € € €€;v¬©­­5ï¿ÿ¾éìì´" ‚èµhÜP³lÆpsö„¤  ,>Vli5  zËСCm†‚ •\¸›3gŽinn6MMMæ£>²uª€Ñù“†Ùç´’æCÇÌŠÍì÷…‚ Þ¢ï÷ÃïòJ›VàîÔSOµß±´§¯®]oþÝa3iX•¹bv½ùÜÉuœ Ô0”ñ¸ò½6ß"theF*›—qì t!8  €@’A»$ € € €•) €ÐܹsÍÞ½{m D!*z¸©×ˆê6pwþ”Z2CòÐDd¼jÛAûÙuÏæ£É&L¨Øì:ç|W@hĈ6K´¥¥{¬?u“©®õš^?È|njm·ð‚z}›—óªØoK .kÏ z¨Íj;J·€þ+ãðÀ¶ÍºH(+QcŠ*è¬Gœ=¡Ö¾“1šÛ–#g—õ£ˆ`pYÌú>ј‹úQ@b ´+–<ÇE@@b( —zh9räH¼Û³géêê²5µÝˆ} ©©êg3ð«1‹‡Ø€^ O'ÖU’çÚ¦Ãfí® “Muºê S׆’, ±Ò¦Nj¸ïÞ½Û´¶¶ÚÌ;­©±ÑKÙLÊ]Ô8Ô¾'ï‰%½ ÈSÁå5;ÚCuÚ^ATu-¨Ì]J¸€ÆÕkÿþýFß±®›W—e« ’Š·½ªýR2§ýÑIS÷O”, ¤º±ë4MA@ ØíŠ}8> € € Ce(ë@/u妇ËÞ`ˆÍ²ÙÖžèZLYLsGV›éõÕö}nÃ`ºyó\Wym9ÐiÖï>b¶´vÚ`RØdm¢àœè+pJmÄ^&•Ñ¥ní&Mšd”u·oß¾D0D›*ËF¯§7¶Ú=©½ªªÍ*#O/J€¬¤S{U7­öñùÕ³¦±cÕ)X7úÿµw?¿UTÀOS(Pà«l¢tÃBH@£ÑÄ ²Á…šwºÀ ‰QWîýÜ­æÙ©1æILLˆ 1@¿-m¡¥´ðä3õ\¦·÷¶…þ¸·w^'™Ü™¹÷ÎÌyÍáZç=çÌÁƒ•}n]Ùc­óÅ=s´Ûû÷KEàt}ñ¹q£D´×£ûcZl³ÑûYy$Ãæ6»øzïÑ›us*Çoì¾}ûÜQgc‘Z+à¿î­õ·w @€´½@G1ŽŽÁ]xÓÓÓµÞLQ bÊ=Db]Bâ"sô‰`¯ CÆ…ã"¤û7ðˆ‹Çͺ|ò#$Ý»woÖE`§W]–yü×°Œž^1E/ÑèÉÁóÝ»w—l¬¢êžxüàÎZø<2Ø“Ž ïH–tr‰Ÿ9м1}Õ€.[D/Ðh³:3«<ÙkX>|¸˜¢­Fà<55µ$À‹ß”rˆ{ŠÐ.ß,qü?ƒ©¿»»X~²£Ø>ߊöí6þ _Œ`yqZ­ñß±éâ7V¯ºÕ´¼O€­ÚµJÞ~  @€ °Í"HÊ=CâÐã¢òíÛ·‹ÞL1¼Ûƒ–Ô(_HýŸß'—¬/.6G¨7ÔßS\dŽg7E`²J ¹½Žr0wãÎ\º1=_ô¤‹uk),EСG\Hz¬Eíñ?CgÆsÕ¢D¦Üfgff–…xñ™\cª/ŒD rü?;‹¤±mx» Y˜Cõ"ì˜YH¿ÞšIÓ÷ãu¶¾ªM—#X*·Y¡GSªu½‘o’ˆDÐ\n³å^xñ~Wÿû×t1¥ÿ{´Ûš#pŽàùÙ=‹7L õn«:þûm4z'OÏ-Á\˜Õtå¹èMWn³+Ú» @ =„víq @€¶@\-?·*†x‹ð..4ç‹Í*•{õ4 Gòç‹@¯¯§X,B¾ÁÅÿ}]¼ÝüÙNk þVÚo5bÇÿYìóqñ8."?iÉwíÚU\DŽDÙZ™bǘr‰@$Újn·Ñ†•n5k;:ÇÐ…¹D¸—Ëb8ݸ·ÞZ‚¿Õ‚Š5r‰¡,spœ{#å÷ç5¬ÓÐÐPño<‚š•­÷˜â¹–Q"´‹À9z:ç¶»°°°ì ¢ ãw6~o£ä¶œ?×N¯Ñ>£ä›òïn„ÊÂäv:SŽ…6K@h·Y²¶K€ @€MâBl¾8ÛôCÿ>Ó).:GÉ_ýçsï’úõ«-7ë‘Ãø¾ Å«)VçýÀbZ­Ý–Cçè¡—ÛoYªÙúògêçsxQ¿¾>€kÖ®ë¿g¹ór[nZÙòïçÇ‹{õÎ=Pëׯ¶\ËŸ-ÿ®6k×åÏ›'@€UÚUél«+ @€ ¬6DXI¶U•õÒÛVp°•ÈAIT¼<_9ÞyÈÍ|°zìå÷¼ @€›/ÐøIÈ›¿_{ @€ @€À¦ Õ¶= @€öˆÀX!@€åB»å&Ö @€ °ÍÊ¡] ë¥ @€´^ ßLµÒ°­­?JG@€Ö ³uööL€ @€ÀÄð˜ùÑìÎ. @€š¶¼ ŒÕøW@O;M @€Ž©Õiaa¡6o† @ 5å¿É<³¶5çÀ^ h¡]ûŸ#GH€ @€Àc <ûì³µoÌÎÎÖæÍ @€´F ü7Ù¡C‡ZsöJ€6Úµù rx @€<™@îfffžl¾E€ °aå¿ÉFGG7l»6D€NÚuÒÙT @€j9´‹g§”‡cª}À  @€À– ÌÍÍÕöUßÓ®§§§öžTY@hW峯î @€:Xàù矯Õnjjª6o† @`ënß¾]ì´¯¯/Õ‡v]]][@öH€6ÚµáIqH @€¬_ॗ^ªm$_$ª­0C€ °eÑË.yìØ±Á]¹èiWÖ0O€@•„vU>ûêN€ @ ƒŽ=šžzê©¢†“““\SU#@€´·@ùo±'N,;ØÞÞÞeë¬ @€@„vU<ëêL€ @ "¹·]<×îÎ;©µj @€gÖQW TIDATÚK`bb¢v@ÑÓ®¾Ô÷¼«ß2ª" ´«Ê™VO @€xõÕWkµ.ßá][i† @`Sñ\áñññâ€8FGG—\WWWÚ-!±@€@Åô´«xP} @€.=íò™·nÝÒÛ®ÓO¸ú @€m#pýúõÚИzÙÅßhžg×6§Ë ÐB»68  @€ÍIï¾ûn±ƒxžÊ_ýµy;³e @€B †&¦¢ §³gÏ.“ÑËn‰T\@hWñ ú @€ª ðæ›o¦ï¢LLL¤¸ˆ¤ @€lž@ô²Ëå7ÞH;wîÌ‹µ×;vÔæÍ @€€gÚi @€T@ †^zûí·k5-_Dª­4C€ °!qƒTÜ(%zÙ:ujÙvûúúROOϲõV @ ÊzÚUùì«; @€ œ9sfIo»©©© Õ^U  @€['PŽh*& ´«Ø W] @€Uxíµ×Ò‘#G ‚¶éÚµkUæPw @€À† ÌÍÍ¥«W¯Ö¶yöìÙÚ|yfhh(uuu•W™'@€ä™v @€¸xñbŠ^wQnÞ¼™nݺU1Õ%@€l¼ÀÂÂBº|ùrŠ×(¯¿þz:yòä²õööêe·LÅ , èi§% @€ P)èi÷É'ŸÔêÃdÞ¹s§¶l† @àñ®\¹’fgg‹/Ž¥óçÏ7ÜÈîÝ»õ²k(c%ô´Ó @€¨ @ “ùÎ;ï5çÚýþûï)†sR @€_ †Ÿ˜˜(¾8<<œÞÿýÔ××·lC±nÇŽËÖ[A€‹zÚi  @€TRà½÷ÞKÞE¹ÿ~úã?Rx  @€ÀÚ"¬»qãFñ…å.\¸8Ðpñ,;…š íšÛx‡ @€ˆçÛ>|¸¨e ‘=îòsX:¼êªG€X·@vþùgm;çÎK14f£²sçN½ìÁXG€’€Ð®„a– @€j ÄÝÞŸ}öYÚµkWQñÉÉÉôÛo¿*³ZÍ@m  @€'ˆÞuqÃS©àÔ©S)¦F¥¿¿?íÙ³§Ñ[Ö @€@I ëáÇKËf  @€ P9_ý5}üñÇi||¼¨{OOOzî¹ç’!œ*×T˜XE Bº«W¯ÖþnŠGXwþüù†ßŒ¿«öïߟº»õid%JB»†Y @€ª+pýúõôÑGÕ†xŠ KÏ<óLq‘©º*jN€x$ŸÊçCbž>}:/.yíêêJ###)zÚ) °º€Ðnu#Ÿ @€ @ "³³³éÓO?M?üðC­ÆLO?ý´»Ãk"f @€* DPϯ›››+ªϨûàƒÒ±cÇšr §ÁÁÁ¦ï{ƒ– í–zX"@€ @ âqù矞.]ºT“ˆ»Ã:”öíÛW[g† PøÛ(F$øçŸjÕ=pà@úðÃÓèèhm]ýL 3¾{÷îúÕ–  @`¡Ý 8Þ"@€ @ šqù7ß|“¾øâ‹ªrÙµkWÞyÖ]ñJ€tªÀÂÂBºyóf1ÅsìrK/^LÑÓ®Y‰¿™öìÙÓìmë  @ ‰€Ð® ŒÕ @€T[`~~>ýüóÏ髯¾J¿üòËŒ½{÷áÝŽ;–¬·@€èÖ•o^ŠîìÙ³MŸ_õŽgØEïºíx|¡Ýã›ù @€ˆ»Ê'&&ŠðîË/¿LW®\YRó.3.LÅóZzzz–¼g °fffÒøøx1åçÖÅñ÷õõAÝ™3gVì]×ÝÝ] %Ê+ ðdB»'só- @€*$099™îܹ“¾ûî»ôõ×_A^}õc¨èS\ÜR @€í.0==]„tñ·N9¨ËÇýÊ+¯¤·Þz+Å3ìV*½½½iddÄML+!ykÚ­ÉG @€ ¡]\Њa¢.]º”¾ýöÛt÷î݆0ƒƒƒExÃgÆÝ憈jÈd% °…ñ7̽{÷Š‘â5FˆçÖ5*GMçÎK£££Þ^²n`` u zÚ) °>¡Ýúü|› @€ ÄÅ®©©©â‚WT;žu÷ÓO?¥ü±aï»zšó £Y¯b™ØLö²Y8WÞou/¿ür:vìØª=ëâ{Ò ¹9©ŒhžëÚ­Ð×  @€¨ž@Üá]„x¹Äóî¾ÿþû"È»víZ^í• Ж1œwt'NœH/¾øâŠÏ«+W «««êb$½ëÊ2æ  °~¡Ýú m @€Š Äð˜Þ=xð`‰@„vÿýwº|ùrºuëV1CP ó–0Y @€¶H`xx8:t(9r$íß¿¿˜{ìçðFP½ë„u[tâì†Ê í*wÊU˜ @€xøðañl˜xæ]}x·‘û±- @€@«â9½ÖEï<…6O@h·y¶¶L€ @€@ÅbØÌ˜âÙ1¼Š|Õ%@€&000bŠgòêY×a'Wuh[¡]ÛžF€ @€ÀvˆçÝEx!Þüüüv®Šc'@€* Ϫ‹uýýýÅ« ®'] h;¡]ÛD€ @€@§ Dh·°°æææŠ×è…˼N;ÓêC€¶‡@r1ÔeLåùîh€Ð®uööL€ @€ @€ @ èæ@€ @€ @€ @€@k„v­õ·w @€ @€ @€Ih§ @€ @€ @€h±Àÿ”ߪîºìß…IEND®B`‚rocksdb-6.11.4/docs/static/images/pcache-blockindex.jpg000066400000000000000000001540341370372246700230050ustar00rootroot00000000000000ÿØÿàJFIFHHÿá˜ExifMM*V^(‡ifHH  ) ÿí8Photoshop 3.08BIM8BIM%ÔŒÙ²é€ ˜ìøB~ÿÀ)ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÛCÿÛCÿÝFÿÚ ?þþ( € ( € ( € ( € ( € ( €>døÏû`| øâˆüñ#Vø„¾#ÿ„÷âeõ‡€~|yøÅðûOÔgÒo|câÝGàÿÃ_é~ ðõµý­Ô2ê~+¼Ñí•-n®7›ky¦@AðŸÇƒÞ8Ó~$ëžø…áÝO@øA©iú_Ä›§±ð×…æÕ>ü?øícwsâ=F;MóE›áÅ?øàø‡LÔ/´tŸF“êq_Xj¶–?áŸÛ³ö[ñ>›âa~#j~Òü#ðçPøÁ¨_üVøgñc൶¯ð›I–Ê Sâw¥ø½à_/į‡úuÆ­¡[ÞøÇáÙñO‡ínÜËà‹?|ã8|/ãqàoÛxkÇRè‰à[Qð‡‰ô«OI©è­¥¨ioûNü »øcâOž=¶»øSá_]x3Rñí¶‰â{ Þk¶~)²ðMÇü#:„:+ÇãM<]‡WÅ^þÞð´º´•¤:ËˤjËd{âÿí ð¯àdÞ°ñö©ây¼Aã‡Ö¿áðWÃφŸ~2üDñ%¯†¡°ŸÄÚ¾ðßàïƒüwãÛßøa5m!\øúÖ/j5øGÁÞ;øWcâßw ­þø+Vñ§…¾!øQÒô_xßDÕ’_èZe핦­¨AbàMûþÌvðüCÔ®5Ï‹PxSá>·ñOÃßþ%Kû/þÔQüðÎ¥ðGÅÞ"ðŦ»øÒßá5Æ™àøKÅ7ˆõÛÝhvSxX¸:‹Ùé÷7}àïŠ^ø®øÿÃ~ ñ¿ˆµO…Úþ—áoÿgZê2iš‰u è¾4³ÐºöqèZ–®žñ'‡µ½GOÒ5-BëG³×4ƒ«Åc&£i ýP@P@P@P@P@P@P@P@ÿÐþþ( € ( € ( € ( € ( € ( €?%¿mïØ£â÷ÇÿŽs|Eðo†|%ãok_³³ðZðî«ûnþÕ±¾£­ªxÓTñ ÞêßðÍ_ þÙ¾ðÿ†|[ñÿFøâßüGð¼·ŸðNÿÙ§ö5ñç‰t¿Ù«KýœµO†×|qðOÄ~8ð÷ƒm>7èž¹ð·ˆ4 êmì×VÔüvý‘¿k?Úòç ø»ìóðwÅ<w{ðª/‡><ø‡ñ[Ãÿþ4§Å/ôCâø‡áWÂGÁ_ãÔ~éú&»à-þ!kž"³ñ¥ÅÜúí”ÞÓÆ Oíû'~п¶–‘áMö„²øOðëÀzwů‚#ñÃO„Ÿ¾&êš½÷‚ü¿ÿájþ:i? þ øöK4ðφ4oiº?…´ý#ÃÚ'‰u)übšŸŒäÓtP ¿~ÊŸ´5×ìãÙ"?ü.ñgŠüyáþÏþ/×µßxSOñoÁï†^4ðO‰>Añ–]á¾³ÿgÄ?ÂÞox·Qð?†|m ø‚ÿB³ø‰ac¢7‹o|àðƒWðí•yñ‡À_´å·ÂÏÙ¦‰ø{ñGà^»ð…ÿi/‰ºŸƒuo‡¾9×þøóAøƒ üb›öNÒu¯xÇFñg€5MÄŸeø;«hž&ðÞ©£x…>!XêÞ¶ðÕè‘|ý’ÿjÏÙcÆ3ø·ðÎÇöø»ã¿þ×›ãG„¼iñ'âÂ_ü?ø…ª~ÔŸµwíK¥Íð¯Äú'Á¯Šú·‹|c­þØ^;ðN±§x‡ÂÞ ÖµK?xoÇp5·ˆ¼Uâ­@ø[yðçöðŽ>Úi~'Ѿi¬µÝjÏÀ~7ð¶ñ Ç_ m´O‡’Ãs®hZ‰¼ ãPjÔ?bïÚTÓ¬~Ì> Û| ðßímûC~ÖZGĨþ!øÒoŠšåׯO|{øÝáÿ†Ú‡ÂöøC…´|7ñgã]¶™«øÖß⯈Pø{álü%ˆüM#xlè¯ØÃö&ðì»à„skZ—‹ø‡SøÇác&½áßëšZ5ç†ïÀ;Ÿ…±ïíyðmÿh oxsàõÞµñ7ãí¯ãüAñŸí×ûbxËá¶¢~Ô?´—ÅÏ‹ÞÔ3'Å^1Ѿ/Úi¾Ð|1¨x‡Eø©ø¡ü9ãêy¼Eà­K@Ó¼E¤éÚÿ†¯u¿€}Å@P@P@P@P@P@P@P@P@ÿÑþþ( € ( € ( € ( € ( € ( žk­oYð×ì¡ûNøÃš¾§ x‡@ýž~4ëz»¢_ÝiZ΋¬é_ ¼K}¥êúF§c-½ö›©é·¶ð^X_ÙÏÕÔ1\[ËÑ£¬UmS¨Ó³P›O³Qv}/¼kuêŽ7þcá·?ñWþÓ?øš¶'Çþ/Æ=™¯À£ÆHÞ¹¥K׌"û­Bïúî{V¡ÿ>×ß ÿ†XømÿCí3ÿ‰£ûbvëÿ5Û¿N‡Ôu!w‡qÑæuýÁÂÿò˜ý÷ëm4æ>­Cþ}¯¾BÿÃ,|6ÿ¡¿ö™÷z?¶!#ÿ3¶çvþáÅ|@×üŒj_þ¼aŸþãÓúìÈžŠzA[·½ÿÉGþ[]s9e†„óâÿÚgÿKöÄÿçê§éƒùs»xñN|×üŒ'ÿ‚0ßü¥þuÈö4¿‘äßü°“þ[á§ý ß´Ïþ&í‹ÿÏÜÿ/Ê·ç'õùù¯e†ÿåW¥÷I^Áìi"ÿÉ¿ù`¿ðÊß ÿèmý¦ñ4l^ß÷]ýÿöº®ñâ<éÛý¾zÿÓª—°OúÒú!{ _ÉøÉ~Séø÷ø˜Ã*ü3ÿ¡»ö™ôÿ“Òý±yÿÌî@Çá>÷jÞøCáßìÕûC|@ðwŽÿiÅÞøñgÆ>Õ¿á±ÿkCû+Ä~ðˆ5­Qû©ñÂûL¾û§eksöMBÆòÆçÊòo-.-Ýám¢î¿¯ÇÏä¯ÒûDþ¿­¿/¸ý ª € ( € ( € ( € ( € ( € ( € ÿÒþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVu¿…Wþ½ÏÿIc[¯T{päwü?Ž8ížGs‚¿–âÖšiçÑ}öÛ×}“h÷Çzý=3øtçºç=>n¨»5Û×kú]~{-U›ÏåëïÛ¿ô^˜7ëÏüÿà«ÚQ=¼÷þ¯å÷ö{Äþ¾ÿç¹ü±]0}?¯]þZ.½mîäN?ŸùëþJéƒÒÝ¿¯˜ úžý¿úØÍtÁÝ|ÿ¯ŸW¾û½¢yÛúü;¿Ò÷°¿§·R1ø`œsퟗ8n˜»¥Ûn—ÿ/O¼_ÕöOç{­’·½ë^*?Ç'¯·ãëß““è½Qzö¾ší¯ÞŸßn÷°žÛvv[÷ÓªÖëåd“ÖSŸóþ•tÅ릿־Wß{[ºµ¥ˆÿ^žçüç8ã¿$†$à éƒÖß×çù/[ÛÝðõíþg® ÷à6Úéƒéú¿Ëo×ïýHQÿë#Ÿ¯®@ã¹ÿ6 ×L–þ¿àt}o{¾ÑÏåÛOG§ÍëÑZÜ©+óŸóþ•tÁÝ[~_¯^¨6ëçwÖÛoï5§K­lµWŒƒ§>üã=}zgVÎwdâºc¯^¿wß¾o/R§üi½v ¿çŽÝºLd$YW¦/g·§Üûÿ]­aOñœgÜŸÀšé‹Õkkõí½?¾Ýu° þÏùþUÕ¯õýuùwW´²’³é½í÷¾ºëå§MÒ&^ž¾¼~cúää“Òº ìÿà]üµOî¿{hÉíþ—ݲ¾·>iý´ÿäÎk?û6Žÿú«|Uþ{ÿEéƒþµü¶ù‹úþ¿¥úËéŠÐ€ ( € ( € ( € ( € ( € ( € (ÿÓþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVu¿…Wþ½ÏÿIc[¯T{jôúžO8çGló‚ùfOëúÿ?‘ï¿êß×õÔùéÇ©8ã©é×ÔÉ®˜½—õkõvßîèùÿ\ÿãÝ>˜íòôǦÏòÿ-W—šKì§³ßåÿZéƒÙú?é=uù?šŠ–/wåç•ü¾_’”«ÓüçŸ_½Ó¶F1ëÑz¡¿nºÛîëø?›·¼‡ÿŸóÛóþ¦º`õkçý_'vâ ÿ×{ú÷ŽäWL–þ¿7ù/+ê/¿§MWäߦ¶ëdÈæšhe¸¸–8-à‰æžy#‚¢BòË,’2¢GòHì««9;AÛÓt´Öþ·æÛÛ[ôµÚ뵓ëu¶ÝV»^:ó|ÑoûoþÅ·PÅ=·í{û/Oñ$ÐOÇï…E42®øå†XüXé,NŒ¯‰”t`ÊJ‘·ÐŽÿ@ÕôÝ{—ôµ›OO/%§)ÌêS»µH=™~:½|·]o¼­¯í©û7 ûYþÌÍÜcãÇÂÃúÂWóg°ÎG'âꎣXlGþ©ø®]?ð/5Ñ<ž?|ObÕ¾'|5Ð| Å-wâô_†3éº.µÄm[ÅžÓ| 6âI,!ðæ«‹ï/áðóéºüº®—‹¨G¨MQõ ±–á®íÒ]! ºœŠœÝKµìù[Ÿ2ÝrÙ´ãfÞš$ö+¦ö]Ï!ÿ†ÑýŽÿèì?f‚sÿE×áo?P|R9Çr9Ûé]ÐÃbS×_×ÙTÿäc«ÛüµæÍµüÊËOŠÏäúZÞ®©›öÙýŒíc\þ׳´m5½ºÉ?Ç¿…P¡¸»¸ŠÒÒÒx¬)šêæhmí¢4ÓË1#¼ˆµ¼0øùñ[×ÙÏòþ´Õì+®ûÛU/¾Û_ÓÞ|ÎÚ7yY_ÛGö:óv?³FsÿEÛáiÆ1Î?á*ëQžÉ5Ñ ÿçÍ]oÿ.åÒß.½Ýº-[Œ¶­ºïº~VMvK~¶×dG?í³û[ ’çö¹ý˜­ãi­íÄ“ü{øS 5ÅåÄV–“Å{L×7SAoo;æžX¢…]ÝQºaFµ­ìjà¹yÿuÿÀ]õDÝwZí¯õrqûi~Ç_ôv?³Gqÿ%ßáw¯^|SÜŸSŽÝÊôFK/ÝÎþp–¿}·/]]w_y«¢þÖÿ²—‰5'þý§?g½Äö§a¢èZ‹ñ£áÆ«¬ëZΫuŽ™¤é:e‰§½Ôu=Föâ ; H'º¼ºž+{x¤–DJÝFVÖ]îŸÏ]RõÙoek+;YßËËÊÖ¿U«òVÞ?D'ùü?üŽŽ:û t×Oæ×þ ·oºÇÊŸ¶OŽ’„~O´—ö«ý„çûàÿÇ¿í¿û<Üg@‹$gêFÒ+*Ù[„ä_Å¡ ŸÛ¯Ùÿà]®íЙPjÚkÍ×_}-ûkÓå¹õ_ü,ïpïâñÛ8öê:ÚÿdËùžO¦÷íkÛ¯ŸGö鮟ͯünßu•lŸ$ÿü žyùj¿ØN|nþ=ÿmïÙæàœpI,‘ŸrF¬ªånO•„6þzð¿òn×ÓK]“* [My¢ºë勵mz|·>ªÿ…€îýü^;c§>ÃGB_ì™"ÓÉôÞý­{uóèã^ÂÝ5Óùµÿ‚íÛî±àŸ´wŒWSðO¬DÅŒÿ´ßìd6ï%kïÒt'„ägÜçiZàÌòçC‰ªãnH+»wšVÝ[~·ÛÕÕ¢ã쮭ѻꗕֽ>Wµ¾—¯ÿKúùõnûÚ÷”ß×§uÎppFON¾Àu®˜½S׿¯ËÓÒÝ;I ùñéÏÞéÇNž¤q]0zÛׯåýk¿@üÿŸËü;×LëËòüô·]<îDÖ‰ü¿¯ë欔”ÿWó>øú~UÓ¥¿¯ëåé{3/ëúÛòû‰ë¦OGëç×þË`B‚X…U’Ä’OÉ$¨ÀëÅtÅè¾_ÕúšµÒÿ-•Úÿ?ÇÏFÜ~ZýžTüF×<}ûK^ƒ-¯ÄÉí<-ð“Ì[?~»Ôað¾­hå⇈ïüKñ8^CäÉ}ámÀ¶7ñ<Þ·zU_³Tðû{?~­•ï^væM]?ÝÇ–•¹´œjI%Ì‘)]·}ôKUhß}/kÙ½µVWZŸˆ±oíŸû0h¿³ÿÁ Äß´Àoêº'Âï‡Úmî®ü`ð“}es§xKG²žÎîÓQñ3Û\ÛÏÅ5¼Ñ¤ÌŒžB]¾-Ãg“Í3—S'Í©ÓþÙÌåB¤òìtaVƒÆÕTªRœ©(Nœàã8N”\d¥M8>sðú43 c³ àqª?ÚÙS›ÃbgMâj{9ÂNq”m(Ê7MY¯u©KôwŸ·ìc•æþן³ 8Çúß¿ cƸ©èHûÇÜ–$/ê]t}¦ YûØjÑÕt³Ž»ú7¿5’>¯Ol=x´úÒŸ_X/]ŽgÆ%Ò4/ø$wì½ã íNÎßÃú7àšž%½Öšt:u¶‹§üYý™µ;­ZK¤2Dº}¾Ÿ—Ò݆0-œMpdòÕ~£†=Ÿúá—*õ(Чý­%R¶"­<=*Qç©Ï:µkJ4¨Â+YÎrŒ"•ßÂÙú œ¨&ÔW5/zMF+Xêå'¥Ý¶’ÝžGÿ Íû9qÿ³áÞ{ÅQcÇÐy„wîqÐû·ôï.IÿCþÿć$¿þ¦ÛñûïcÛöTvúÎ×ë8þ]ýv×—ÎÿµíŸð[øká«-;ã€ï.!ý¢?d-ZHmüGg4‰aáÿÚË஽ª]²,„ˆ4ý3M»Ô.¤?,VÖòÊø6ÞLl2‡F™æAQýs/MSϲzGëøoi7c”)Âõ*MÅF•8ʤåFr†uiÒQMb0¯÷´tXœ;Ʀ¾Í^‰ÝËD’mµÙôOü77ìçÇü^χ™íÿ=OÞ?ƒÁí–ìöy6ÿÛÜ;éþ°ä—ÿÔûyí÷Ü¿gEÌN×ëXo_ùüß~·ô±ó·íAûgüÖþøjËNøÃà;ˈhÙ V’ÙÍ"Xxö²ø+¯j—l‹!" ?LÓnõ ©Ëµ¼²¾M·“O)ö0äÎòë™rjž{“Ôj?_Ã)ÍÆÉÉBœ/R¤ÜTiSŒªM¨Fs†uaEE[…w«GlN¿ãSW´j½“nNéE&ÛåNGÑ?ðÜß³Ÿñ{>g·üTö\œ7Nyàñ÷›³Ùdÿô=áïüH2Kÿêu¼÷ûïîè©ÐÛë8?_­a¶ßþ7ß³ó²9~Õ>$ë|á‰Þñ'ˆuŸÚƒöE.®[ßêk§~Ô5kÆ‚Öi$[K «ë†ÚDVÐK4…Rjðx¢žY‡2tsl—UR§ÉG œåXšõ·¥gCŠ©ZväÔ#'§'h©8óâãIaê8×ÃMû­Fž"„æýø­# ’“jîöèžé\þ‚ùÍ~%gnÿ×u¹ùZìùæ­ý|¬ÝÒ·^[õ¾ˆù»öÒ?ñ‡µŸý›?Ç×áoŠ½Ïøcœ‘]Tž©[ªïßÕ¯ÁyÜ_×Ýò[ýï²lúfºÄP@P@P@P@P@P@P@ÿÖþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿƒ[þ½TÿÒÖëÕÑþxýyõíß§‹ü¥uÓÓó¾ºßå®ÍßÝúúï×ñ»Õµ÷I>š¤ÉW§?‰ëõÎ@Î9êNIÏ®˜½[v¾¶ûš?=.HÿëéÛ þ~¹Æ8ã!zbÖšiÛM¾ö¶Û^Û\×üþÿŸ·®˜½WKùí½;}ÞkqKoÇúÛóÛEk‹þÏù?Ñz õòþ¿_»ºÚXŽŸ‡?áîb Ï·VéƒéýzÿÃ~;Ä>hý¥µMG\Ñü-ðÃ7·6>)øÿªÞxJûP°‘á¿ðÏÂ2Ö=C㌠ž2’ØÜÚøJeð‡µHdÓüwãŸL2¥«ÑÁÙ9V’N8uΓÚU$íF³RNwœ“k÷t浺&]»þ]Ëþú/KÓ4íLÓ´m"ÊÛLÒt‹ M/KÓ¬¢H,¬4ý>Þ;[++H#8m­ma†"@(Ñ€ã'+¶Ûm¶ÛݶîÛó{½e{êÇÿÛª_žšü´]OȟدTðÌß³ˆVá~ü%^ã%<áøØ}甩 FÆ )\T±IæXû=aŒ_ø&¤›£^VÙµvÈ_öÜ]í‹Ä«zV¨º=¯ßkk{rŸ£þÕ?ÕÜp;t<6ô=‡ýÌŸ™BÊq6åÕ÷Õ¯%o’ôo¥ôgÒ`ªíw{yÿö²óÒÚß®Ç-ûóûþÉÙÿ£høŸ§ü*ß ýA<žHàó‘^v%ÿ¶b¼ñ5¿ôìïm—}ùmÝm/²‡Ãð¯ÈúcëÇ©ëú{gÔäÙ8!œ^¿¯ôÿá·|Ü©F…ü:äc‡lŽ2FN~‡Óëøo×ï3Ûåg?;Ùëm•ûoÇûûž™Ïà:éƒÒß×ü×püôÓmVß~½¹yU¯rQÈüû~¸ÉÎëÈ;k¦OëoÊÿ^¤5ýwó×_ž‹î¼—ü8g“õÀúäœäã5Ñ·¿ŸüKu•ûˆpÿ? íÇnù uîª/gÿúëýikµÓ×oÆßuï÷ï®¶3žëþß^úmk¿³ógí¡ÿ&qûYÿÙ³üwÿÕ[âŸLÏžüŠê¦ýèùµýKO+²?®ž=¾óéºíP@P@P@P@P@P@P@ÿ×þþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕÐ?ê¿_¯LcÔ}ïå7·õ×mU»½ÇÐþZߦ¿/´ï{Ú÷”ˆz×üyúppzc&º ÷_?ÓôZëçm9“þµ¿õýyõíß§ó?0àóžÞ¤ WT×§õýoçÚ(_óþ9úþ>¾¥zbôÞïúÓ]ôzïÛÊ ¿çüõþ\wÎk¢/oëüºyyés hÿ¯øÖ÷Ü•?Ïáßõîxì'wT]íùÛôvzö¿¥„|µð/þ.‡>#~Ñ·_¾Ñµ™§øOð\·Í ¼ ¬ÝG®ø²Ï _øZbÕõ¨¯#V±à_ ü3½WuZ½JR…,:ø¢½µmåìâ­£¿²§hî¹g*©ÞâZÝýÞ_‡W¯Úýª϶@÷ë×ðçÖ¦ÎÝÿ¯Ÿõ£»q?àúÙ¿/M4»ò³?™ßØ»öÏý—ôOÙÿஃâoÚKà/‡õ]ágí6óNÖþ/ü=Òo¬îl<#£ÚÜÙÞZÞøŠ+ˆ.í®#’mçDž#hšY |®]…Ï'šg×'Í¡MgŒ¨T–[Œ:´eŒªéÔ§7IB¤'SœdÔ ïYŸ‡Ñ¡–;0¾£õü\©Íák¨Îœ±9'rµ(É>hJ-Þ6zE#ôo·ìc•ç~ן³ `7ã÷¨ÎÏ;üT;wìNæÈ¯Óòº8è(ûL.|xjñõÑŧ¯M›ß›Cêpt±QåæÃ׎ÉÞ”úúþ·ÓkYŸEþÊ~<ð?ÿ`ïÙ[Ç^5ñŸ…ú?ÃÈì?á´c¾¿ðÖ³I9Ïü—o…½½AñO|ôç%sÒ·Ž£ú¶#çB¢ü94ûÝ·W²4{¯½]:ö4ô_ÚßöSñ&±¤øwÿ´×ì÷¯øƒ_Ôìt] BÑ~4|6Õ5kXÕn¢°Òô'L±ñ%Åê7×YØXÙA5ÕÝ̱Ao ²¼qÖëˆå*£®Ü©M$–í·$·ræÑj·±WÒK]¬ú÷Ñ«öµŸ¼ú\úÏùÀÆsœdût5qv×妯eÛÎöÉúùétÚvKû©ý¤íîȾ˜ÿ?ŽG§e铜 éƒÖÖþ¾÷ø%¦®öB{V~—·ž÷wówþsƒ×9Î:çn$gƒÔ×L:ÿ_ðÿ¾mÆÿ<ÿSÈëƒÓ`óŒ7L–þ¾ðÞ—³ÕÓ¢òùùõõóü-–ßð÷ûõ·ô~Z3æ¯Û@ÿÆþÖcþ­Ÿã¿þªß}~¼cÔ“€+®›¼¡þ%ÖýW¯õÛc/»ú·¦þ{öM^?N×x‚€ ( € ( € ( € ( € ( € ( € (ÿÐþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕÎ?Î;vê0?LÉô_äØËgÑþ_…þï4•½ß¡ýz¾½nõºÿÉ“éf““×ƒéÆ ëúwÇnzòqŠéƒ³õý~ÿ]ºn„ÿào¿žºôòôWH›×ßÓðLç×8ú¦Ûúþ¾ïÒH=Ã?QÓž¼uëÐcæé‡õþ{ôÛE×[èÀ_óùzûöïý¦OO/é;ÿÃîe5g~úï{Û×nÖÙôØù×ö‘ñ·ÿ¶‡ðŸÁzÆ—ñã¾¹ÿ ßAÕlþðíÅ…Ö§ñ/â »óöiüà -rÿBº™~É/î<£Îë&³n¯é`Ôy¥Z¢R§‡^ÒIý¹]*Tí­ý¥Geï5i'¢‘›}:½êþKÓç±ïðî‰áè>ðÖo£øwÃ6—áíI³B–šf‹¢ØÁ¦éz}²Å`³±¶‚Úfb#‰A$‚j¹ÝFç&å)IÊMÚíÉÞMô»m¿Ç¨Í¿ÓùgõÏoçÇZè‹×µô×m~ôþûw½…ýyÛðóüR½ì~>~Åz¢ŸÙ›öq¸_ W·&?ø~&À ‚ŽxÁõ…,Ry–>ÏEÆG£Öš‘vw]SéäÚiŸ:ßí¸»tÅâU›íZ¢Ò×Mh÷ùÚö?H<©‘årqòúö8Ç9‚:®2kô,§ðnºëÖÉ]o¿^ííÉ{ËépUv¾¶·[~JV¿§mþÏŶW_bÿ‚kþÆW€àÛÚÿÁ/§ÝÏ_ÇÙ²qŸOÇÛ“W“CÚñ5{óãñIkñ:½6kúgØEsB+ù”ßo_Ëï>Ùÿ„ý¹ÍÈ÷Ëž=¹çÔ‘ì «öOìžœ_û¿«½­¾ïþݵËú·“Núh¾ýº|¾VJ^ûJxÅõ?ø*Äͼ\~Ó_±’• I;kÿ®H{lÆ:t9^åçyr£”ãêò5ˇ“øv»QßNŽÿ »ÑßI*‹šÛ5ò»ëî«oøìõgÜëÿëý?3ضíÙ¯Ê#êüßõ{ýúï­™?.Úz=>o^ŠÖåI_˜xàÿ<~_ʺ"ö{z}Ï¿õÚÖÝŸ]t¿­ýçÝÚéì¬ÕÉÉ8çc8ì2ÙÉÚÕÕªé~½¿;ýöêïdGõýmù}Âÿõúví€G¿ uÀܵÓgëý]»«Ú@ìû}ÏÃúWL¶þ¿¥úìõqõý#æÛGØëö²ÿ³høïúü-ñP÷íþGZë¢ýè¯ï/Í|ýoës뢿Ÿ}­Û§¬¾Ÿ¯DA@P@P@P@P@P@P@ÿÑþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕÍý??ǃìsÀî7%Aéµ»Àßóõ¹ô?z½­oÓ_?ï;Ý;^ò¿ùýúý}9^˜½þ¿®šÚûìÓþoþçµ—E~½9%?úÃõ?0ô9Áä «Õki~–_v›­»ÿä·N2;ü¹'·$ôëêä×L^©ÿ_ç×¥¿ HÏÿ®ﮟLö€ùza½¿_Ãþß…¥Úÿ§ãÿÆñùoá ÿ…©ñgâ'Ç©ÿá½õoŸ |ÐÉáß ëŠ~,øÖÏ$ÿ ׼? Å<~mµÿ†~øW[°Á¬È%õ*?cJ–Þü”qü¥8~â›Ñ?ÝÑ—6Ÿ ëN >VÌwmöÑ|·ÿ/—™õgçýsëßùc¿"¦K_›ü—•õµÓ§£õóëÿå°_×§ü1ø3û x/gv¾`/eðá¹’"ðfŒ1¹¾PA;—®ØT¢·ÂåY¬kæùå.kʆyšRÝ7îãë­”“ÒÉkÌã»ä‹Š?£‹Œó Îw§™c¡ºû8ª©õºkËU»å‹Š—êƒõ<ù$7]½8÷q•àpz‚È;YÊ17å×ùÕ]öÑ[WüÖ\ß_ª×/M—ŸO]_þö¾Dñ^«m¢Á(¿eföâ;;M3ÂÿðLëë‹©˜$VÑ[|eý™e’i°F«¼“€Ïõü!Bx¾9ÊpôàêTÄgN”)Å^SœêUŠ„t½äÝ´Zöè~“†„ªTÃÓŠnS¤•ÛmÆÊË5þLÇÿ†Šð?äxÐ}¿Óàã“ØIôêÞü`šþÀÿS3úâ¿ðT¿ù_õó>“û3ký^§þ åúõò|ÜÄ~ñC|'Ðtßé…î£ûPþÇ©oemyÍ9‡ö­ø5rʈ‰Ù/)ù[Xñ›ä¸ë…ó, g˜ºù}z4¨á"çRtåÇš½&ß"µÜ’ZîÒÒç&7^Ž­IRœcFäàÕ“©­l–ïÏ›o3÷¿ãè;c'œsÇR:/\×ò¬6þº}ûß«ôJìùÿëw§’jëKy'¼»Ÿó×ùþuѧ§õ®·ü¼›×”ù½mª¿á³>gÌí£w” ägžüõÎ^¹à‘ü$d–ê‹ÒýWç½ö]ö×Ô‡¿¯áÒÛ»ÚÛé験Ÿ^±ìänÁÓ›ýêþ·ÕyßnÖÑpÿ?ÔöêO¿°šé‹Ûüºú7ú»yÙóOí¥ÿ&uûYÙ´|wÿÕ[âªë¤ß´‡œ£§ÏçnîϦïNl¤¬ÿ ÿ¢ß[¯{Î×Lú~½B€ ( € ( € ( € ( € ( € ( € (ÿÒþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕÍ×ülŒcŽ?¨$J×ò<¶ïý|¡Ûüõ×­ÞÍä×éf“=ÏÓŽ väúsŒ×LÿÕ¿áô饺\?=[»}Ö»óZ+Z7%S‘åïÇÓóÁôÈn¨=:iÒßç½ý;ïrGúþ¼gëÛŸn¹$ð1ótÅèžïúßmáîïp<öŒñ‡ˆ4?Xx/ÀwÍañCã&¿kð·áåô̸ÐurÎöóľ<? | ¦x£Ç¬²+Ãuuáë-%ˆ›TÒÁ©íjFôpñuªÅí(Å¥ ÷¤¡MnÒ—7KÆfô²ÞNËË»ù-§¬ø+ÁúÃßø_À¾±]7Ã^Ð4Ÿ hV*ÆCm¥h¶0éö1<¤ožao®$&[™šI¥&IšåRUjJ¤ÝåRNRokͶôZnôKE§CÚjtÿ?ýqŸ¯'¯·ƒ×úßú½ûw[HþúßäþX®˜>Ÿ×¯ü7ã¼Oëúþ¿3ùèýÿd/Øß^ýþkÏì£û4kZ®«ðcán£«jz§À¯…šŽ£ªjW^ÐæÔ/µ ë¿ Ésy}uy$ÓÞ]]M$ÓÜÉ$³I3³5mG5ļÇ–*»Œqظ¨ûj–JŠ‘q·=—+N6ÛK^_gòWŒ¬±¸µíªr¬^"?ÅšVUf¬—•­¥ûu´A|)ûþÂóy^wìaû&ÍœeýœþÈHÆæûÞfÎ=P¶8;F }îWŒ©>^iÊ[_™¹mê聯ü9lÙô8:ó’JS“³Öòm¾ýW]]ü–š8ú·ì©à?xãöý–< ãOxSÅþ¾ý˜ÿg›{ßø£Ãº?ˆ<+wo¤|9ðUþ•ׇµ[+Íâ2ÿO°½Ó¢–ÒD³»²³¹¶Mm §…‰œ£ŽÅN”d±X‹J.ÒW©4ì×-®›ë³é{Kì"½È]}ˆþ_NÚéÒòëÇì]û`ÿÆ&þÍÔÿ‰øZ÷UÇqÉä V«‰ÒØŠëþâÎÚúÍ'ùzèÂÑì¾ãSDý’?e? ë:Oˆ¼;û1þÏzˆ4NÇZеÝà·Ã}/YÑ5*êí3VÒu=?ÃP^iÚ¦›{ 7–…œð]ÙÝA ͼÑȈk¡b+ÊéÖªÓM4êNI§£VÕ=;ü쮂˲û¡úù=Ç g×ù8"®.ÏËúó]=W[{¤½:Yé§ß×Múi²åI^ãÇÓßþ¬ÿŸN•Ó¯ü>Ÿ¦ï·Ü-ºùÝõ¶ÛûÍiÒë[-Uã"ô=¸9=~¾œ©ëœq]0}>~¿åbZÛîû´¾ºëÿnÚÖåÕr»¯ëÀíÛ@ rI•z`ô·õý?¿Kõ$Qëøÿ\q×ã“’sÎÜ×L6ïýZëÂ!óWí£ÿ&uûYÙ´|wÿÕ[â¯óÿê®ÊOߦÿ¿ý*߇ü¤Muÿ-~v¾Þ.§ÓõëP@P@P@P@P@P@PÿÓþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕÍøtÏaŸNzCü#òù/UÙèìþ^w·ü5·>‹æÕíµß­¿©;öošNO_§×·_Ç¿‰«¦-ÝZö}6ûÅ÷¿+ÛF¿íí¬µ·›½’‹×¯áÓüîœm'æþ˜7}ôíþJëmúúj'÷_ÒÞNûîIþ}³ž¸ã©é×Ô’k¦ëåéo/-|µðàÿÂÚøããÏŒ3fü.ßÀ¯„¤üöךզ§k7Ç_Zd4ryþ.Ñô…ÖSàOc'Ã?›y$°ñ+ïõ§ûŒ5*òò·.&½–ª.-a©½t~ÎR¬÷æö°ºN›!k''²÷cÿ·?½[åæ}OYÅ躿¿óÓªü‘VoúòÓ~Ým󲇷ëßðê=;vÉÎ>n˜½·Ëú¿OóVºD’×T^ß-yoø6ž¾¾zÚ ~þÂ~&K¿Ù¿àE¶ñ¾ÏáÃX‡ÌrV?èñ2ÌGÈ#ÁÚîTe\¡b_ŒË3X×ͳº\úÑÎóX+½í˜b#d›{rô“¶ª\žìOÀ¨â¹ó ÊZÃ1ÇG­Ý±UcdìÖœ¾}Uãk§¾Õ?Õ|ÙÆÑœ©àŸ”vþ»•vVë_¬e8›¨ûÖµÿ«·e·ÃóÜúü fù_õ¯Ý~¢²ºNÖ4b£ŸØçöM÷ý™þÿÝ-ð¯nG~@ΚX—þÙ‹]^&»ßÞ˦¿~—ììÙú:_»¦ÿ¹ø.ºvÛ[;ê´GÓ£üÿú»û~¨ƒºþ¾oç~­ùZíwÿ_ÓðõŽàchÏ8ºc²þ¾OÞ¿ŸÙÑéýÐ?^ý:ýAõÝïƒ÷qü]Q{?ž«ú·ãmÒvH—Ó_'¿­Õ¶µ»Ùõ¸õî?Ã#ëÔ}8Æ1Œô®˜»5úÛõ²ZõOîПM/mUÚýùó{ÎÚ]2EëþOéÆŸLfºbõùÿZwímWMÅþ_ðÚ¦ö·T“Þ[!ÿÓ®@ý~ðìaŒœm^˜=¯øËk½­(ðý zãåÎIàgéŽKtAÿ]¿{ßMÊìš¿mù3ŸÚËþÍ£ã¿þªß}ŸÓ5ÙAûÐòœ5»éó_~¢—Âý§ëÙ0 ( € ( € ( € ( € ( € ( € ( ÿÔþþ( € ( € ( € ( € ( €<ºïã‚tïŠöÿµKË/Æ:…ô¿è†úâÒ5ëMRÿĶLÒµ™Ãëևº•äúMÜVsÜXmºÒŽ¢–Ú ÓþŽ— gøj§aèÔáó*ùf1Ñ”¥‰ÁV¡C_Ûâh8.\U˜P¥ONUa ÉÓÄ*xw_åkq–E†âÚ\НS ârœ6oUáa3 œFg‡ú¶§%,}•bkTÂU…•0íV¼LibÖÔkçª ù›öÔÿ“7ý­?ìÙ¾<ê¬ñUe_ø¿ëÕOý!n½QìÝÀsŽ1Ž>_ažW©-’«ü…§Ÿõ¯ß»î}×üïkùõkåÍÍÓ–ÜÂûþ9Àú‘Ç\®Kg'i5Ó¢òóíÿ€µø~¢ÿÕݵ奵óZ+Z7>>ý«üáˆ~&ý‘üãÏ xoÆÞÕÿi=xê¾ñv‡¦x“úŸöì—ûRjvQÑu›KÝ2óì:•–£iö›i>Ï}gkw IàÓôÿ cN§5R©—b›„Ò’¼ÃµtÕ¯¶®2¶ü»^nä°Sq“Œ¹ág©u»ø¯{=¶}µååž-ýe †Ù‹öz„ŒàÅð_á¼}ˆmðÈNH#$0ÎIÆ]e9~[>W<¿$×ÚÂa§umU;ÿÀ²ê~U™b1æäÅâbÕþÕckíkT·ôÚÑ#ç¿ üøAðÓöªýµ¯‡ ~x V½øÉãý&óTðW|1ámFëLŸöQý¤¯fÓn/4=.Êâ{ /,,.ÞÎIžÝ®,­'’6–Ú†J¯,§Sš>Ò8ÎÎ×”tR}›Œð63[‰eJ¾3Z—ö~&J\Ej”Ô”è(¾IÍÅ´›åvm'Òçíáõÿ=ý°}zb¿àú^»ôÛE×[Úçì³]¯M¾z¾>Ó”x}?Ÿ±å½0z[·áÁßÖëmnŒÉ½ñÛëø~^}±]0w_?ëçÕï¾ïh‡óãû!~ɲˆ?g€:ƽû+~ÍÚæ¯«|øY©jú®±ð7Ꭵ©jšï‚4;›ýCP¾¼ð¼·W—×·RËqwuu4Ó\ÜK$ÒÉ3±w拏bÞa‡Öñ<±Æâ`£íërÅCQ(¨óÙ$—*[YY)Y“(x»àÿíIðŠ?ÙoÄþø®~Ñÿð•k_¼㯄W?ü%â]7Â~4×õ_ˆV2ør_ßxCUÖ´{^ׯ>Ñt«ýµM^Õ“Iñ Z0¥iÿ·'ìsªü1ñÆ};ö›ø%wð·Âž#Ó|â_ÅñÃgGÐüa­¥„š„u77ââÓÅ>#‡VÒn|7áÙm×Yñ®­¥]h¶7ÖÚ•”·¼·ìù}ãiÚ7ü ­|"ñ·ìÓñÏöŸ´>Ÿãï Ið—CðOÀ?ˆ_¾xÆWÄ?jþÍ·6º—Åf—TÔ'Ô­­ü97„uÝ/YŠßPGŽÜ»Ó¿lÙcZø™oð[Døýð§UøÁ{`/tÿ†v¾4Ñá2¾•üÄ8ôx4G»[ÃâCà)¡ñ¼¾1Ûx:TñMÆ•„~ÜÀüÿ‚™xŸâW€dÿŒ¿ÿe|øûhA𒃟4ÿ‰þø££hÞ!øõáûsàï‡~,xKO øÃÁøþëQÓ¼!¤jÚ^âíËǾ… kúž¯m¨¨€¿à§^2ñ%‡Â/ˆ¾)ý“õÏ |øÙûKIû.ø/ân•ñ›À¾*ñT~6½øÏâ€þÖµ„Ϧx^_ ê<ðÔ²ëSh¯ˆu_ xV{ê:\º&¬]Y~·P@ùKâßÛSãv—ã_hzM¿Â{}+Ã?/ÔµÓü'âýkÃvsß_[|QÒ-®o.­ô¸în^ÛM²„M+¬Vꊡ¿§roøC’ä˜üV#‰^'2Ér|Ëõ|Ï,£B5ó,· « 4ªpþ&¤)B¥yBšz“åŠr¨äÛ—ò6wã§`sî ˰t8J8L¯ˆ3ܧ ±Y6qˆÄ¼>U›ã2ê3¯Z—á)T­Rž5*JžŒ9äÔi¨¤`Ãp|}ôø5ÿ†óÆÿ§ü^/ð÷èKz_ñ¸'þñWþ2ž»Í5þWì´g™ÿ÷ÄùõÁø`Ï?ú/øn¾Ÿ¿ðÞxßôÿ‹Ä3ø~Æ(ÿˆ%Á?óÿŠûÈã)ÿègËþ µäÄ}ñþ}p_þ3Ïþ‹Ãþƒãï§Á¯ü7ž7ý?âñ‡¿BXÿˆ%Á?óÿŠ¿ðñ”õÛþi¯ò¿e£ø¾ ÿÏ® ÿÃyÿÑxÃp|}ôø5ÿ†óÆÿ§ü^!ŸÃðÎ1GüA. ÿŸüWÛþGOÿC>_ðU¯#þ#ïˆ?óë‚ÿðÁžô^ðÜ}> á¼ñ¿éÿ‹ü=úÇüA. ÿŸüUÿ‡Œ§®ßóM•û-Ä}ñþ}p_þ3Ïþ‹Ãþƒãï§Á¯ü7ž7ý?âñ þ†qŠ?â pOüÿâ¾ßò8Êúòÿ‚­yñ|AÿŸ\ÿ† óÿ¢óÂ~ |KñOÅ\xËÇw]Wþï øwO_èúφ#Ó“ÂÚ߉õý;V·ŸPño‰µõ¸µK-­ýöžlZÊÒKxżÍ÷|;ÃW åRÉò˜ãka*ãq¸ìBͱL{®ó &[ 8P˰4e„ #:5hÏÚûZªwƒQ—ç|OÅyÇçK=Ï'—ÒÆÓË²ì» ý‡†Æåk²Ìve˜añp©ˆÍs$ßÃÄ( uðçˆdÚÚ|D°´ˆÈçdkµ§Œ¬mãium*H5[xä×4h–Ô´Ýù÷Ä¿ å:¼ARL‚¤ÓÆa=J¹j“åŒe&Üêå•j5 .&^ý ¸á1NStkâ¿¥|&ñf+ã¹ÇìyûWñѹ8㯨?/éÞÊÜKw§ü'âWoù{†í·]}O75WÁËüQÓïÙ|ÚÝ´õvû]'Œ­¿ÖñŒgüAÅŒdÚ¿.Xõ¯rzŸ½þ{÷õïë£rÑŸ•æ´ô—ùk×oyô¾ÏE}¯sã]jí)û±ãþ/ÇŽÇ8Ïüš?í>A?xŽÄ’x88&¼g1 þ¦¹^—ºþ%M:-®–íêî¶9ø¼Nßý@bÓÒÚûJ~ûÚëí~¡ÿŸþ¿çõõîJÿÁ꟣Û–ú§¶Ÿ6¬~Ó-¿¯ë§[ý͸¯ùÿ?çÆº¡¿ù»[ô·áicý[~^¶&S‘Ÿñçüþ+¦V¾×õòwn!øEû x™/?fï6ûÁ’ËáÃXNÜ/è° 0  {\Œ‘ÿ-wy+òyfh«æÙÝ>{ʆušÓé´qõ鮪í8Ù´÷ø¬åËÀèâÕLÃ2ýèf8è»_eŠ«f£m-oÍÅé/Óßê„y_6ÀÎ{¯Ýëß#Œ … ƒ_«e8›òÝÚÝvÿä›¶ö÷wÒß~¿U鯯é§_¿Ku¹òˆµ´Ïø%_쥩Ï4VÐØøoþ ›w,óH±ECñ›öc‘¤’FuDD%ÉU 2O×Òð•)bxë+¡Êr­œÎšŠ‹”¥)Ôª”TnÛnêÊÝU“¿)ú]¹{¤Û”¨$¢®þ(馮û[[7Öî%ïø]ÿ¡ŸBÿÁÅŽ~„}¤‚:c¯L‚\ÿ«8ßúÅÿá5^¿÷ýkê:Òªß~GÓ[§Ëdîûyi«<ßâgÅ _‡áf“k¯é7—Ÿ´ÿìx‘ÛÛê–“O)ö¯ø3;Š9dw!b.ÅTá⃷åxã"Å`øS;ÄÔÂb)–9N¥ ‚æ­J”ÜWrµÛÝ­îsbð•)áç7NqQänR„’_¼‡Wk^éz÷µÚO~¼çÿÖ®Gàò¸çwòľ¿×u¾Û?Ìòþ™ëŒûzÿŽ£¸WL¶·}¦õôKÎö3ôºÛ¿Ï³~šÒvÒé’óÆ?Cœ~ž+¢×˯Îýݺ%v—¯GÚÍín¼©ï$ì¹^9¸Ï§ëœût×8ÅtÁééýk­öôÓfõå™t}þkå¢ïæï¾×ú{ã<ç9ÇËœô?îãæ-Ó§§Ï]õ¿¯šüdù§öÑÿ“:ý¬¿ìÚ>;}ä–ø«¯_cÛŽ„Šì£ñÓÿô¥ëëù[D'³ôgÔ5ïP@P@ûHøóÅ þkþ3ðkéiâ-7]ø{ecý·i-ö—$> øá/jP^[Áqm9ŽçKÕ¯mÄÐN“ÚÉ*]C¾HUìü?Èòþ$âÌ¿&Í>±õ^:W„©XˆË‘æXÜ=JSœ*Cšž#Fnƒ…HÅÓ—,dä|‰¼Cšp¯æyîKõ_í,7‡éÐXÚS­…œ1üG”åØšu©Ó:œµp˜ºôÔéÎ3¥)F¬/(Eþ |jðׯI¨éˆÚOˆôv†ÏÅÞ»ž9u/êR£4:,cPѵ“èz䶺ª8)mmaipveÁÙ—Õ1–Ä`ñ urÌΔpÙ†.ÍÆî^ÇE¸Ã„œL=FµÒ­W~ã¬«Ž²·Á_ ˜a(æùEiÆX¬·8¶“k—Ûàë¨Ê¦F–*’vä­KB‡²WÉl|qñëöˆø™áŸŒ¿fßÙëῃ>#üjñï€|sñsX¾øŸãÝkáÏÆŸ < ­xO“ø‡ÄZdž¼ ñĺÿ‰6é:&¹ðwÂ.øß¡x#Rø3à/|QÒ|Oá;ÿø+Ç><ðç‹2xŸMñŸƒ¾*èß­¼ðÄ6^ø£¨øãG‹~xGá§Åäøs©ë°ñÕ¯ÃOx—Tðž«­ZéZæe{º:û à·íð§ö€´ñDÿ µ_5ÿ‚uk-Æ>ñÿÃ_‰Ÿþ!xVûTÒ­uݼCðÓãƒüñGÓõýò Súơá˜4Ø™n4KýB;k–€óöoÿ‚¨|Dø·ì¡âŸü,ýš´o~ÖZ캋¤ü)ý¬5‰þnð޼yˆ¾$| ÖþxNOèö~šÃâ¹¥üBwðj–œ¶:Þ›öÙì€>ÉÓࣟ±Þ©á¯x¶Ší¶áý7ÀúÝœzÇÁÏŽ»ñBøâh¼ð×Ä|)®|7Ó¼SûCx{â'‹g³ðßuï:7ÄmÅúÖ£¤éþ¾Ônum6;  §ÿ‚‰~ÉøWÃ>*‡Æþ;Õ¥ñf¿ãÿ iŸü;û>þÑ>'øãg­ü(m5~'Úx£öyðçÂWã¿‚cøyý·áçñ­ïŒþè‹Å>›Xº´‡Å¾}P:ëþ _ûÛ¾®->9Ùx†ßBðo€ü}ªj^ ð?Äïh¶Þø¯¤øKXøEvº÷‚ü®è×7ÿ£ñÏ…ôŸƒÞµ¾›Ä?üY©Ià‡º_‰þÛ?×Jø1à|+Ó>(xªÚ÷Yºñ‡„-5‘tÈ5+}OÔ¼G{5ÛÛéÚE– .±¯j—vzV•¥\êW––³€DŸðPOÙXø?Sñ„¾4ñÍœºGÄ=á5çÃ}Oàí¥~Ð ñ#Äž—ÆÞðm‡ìÏ©|-µý¡õmo]ð=­ÿŽt{}+á…òê~Òu¿ÙI7†´=_U²çï¿à¥ÿ±…†á=]¾)øŠÿJñ^‡‰eÔô/‚_¼IaðûAŸâ7ˆ¾Wã­ß‡þjP~Îv–|ã_j¯ññþ6â¿xëEÕ–Î÷Á^+‹Hæ|ÿ7ýžüQ¡üCÖ¼K¥|cð<Þ ý¤üuû2øwÂ÷³çí¯|@ø©ã?Á­jsËð»á¾ðz_ˆ?—þÏ øƒÅ#ÒüáMàG½Ôügs¤ÙF.Ö¼)ûx~Ê~;ñoÂÏø'┞/ñÆ Gã_xoÃ>ø—®ê·ž_øŸÁº¯ˆxºÖ×Âþ=ÿ„o]Õ´}?Púî€?ÿÖþþ(ãÛ÷öm×k¯ÙOâìûáé¼+÷޼EðzîìxÖóV°ðÝׇüñ³á×Ä/i×÷:“­êi.©áo k:n›t‰>«ucous§ÙËq¨Ú€|ñ·þ %`|WñLþÉ–_ þ |;ø¹ðÁº/ü!«ßxÎ÷CñwÇÏÙÿö’ø_ñÛà׉ü%›«XÜü5×´'â§Ã_‹š–›®éÞ o x«K´¶ðÇ‹ÑUôñWìQûcøïö˜Ó?oM_Ãß³‡þ7øÇ¯<-û3iÿ>%kÿ¼_à߃_ÿmO…:§ˆ“àôñüuÕ>&i¾½ø§ã»ŸøXZWï øƒPÑn¼Ò迱wí‘/íCῌM§üøC£kßto_n>þп|uà?Ouðb?ø·Ã‘~Êßþ j_4ߊ­¯ðõŸíCðÏÇ¿ü[âOiZoмEàKMfçWð û;~ÄŸ¶¸ýœÿ`Ù öŒÓÿeß|ý‰á”õ¯ø³à¿ÆOŠ¿xøMÿ“øßà›¿†V6¿³—üëáÄŸþÕ÷ßo¿à¡_ þ'üDÖ?kÝOÀ—Ÿ´Þµñ«Ä´ðjþÈ ¢:—Äß…š¥Çìûã?Å´‡ŠüÞ ÖõÔÕ4?i8ô@PóQñcźV•ñã=…Þ©ekqÆŸŒàžòÚ PMñ+ÅÆZ9fWQ$R$ˆJ èÊãåujÿAøSZ· ð•JtjTŒ¸S†­(S”¢Ür<^©Y´Ó‹WÒI­9}ïó+Œ3 6ŒøÖlMS‡q{p©Vœ&¹ø“3”oIJÎŒ•ùSŒ“Wº8OøO´úi¿_í;>{gýxëôü«ÞþÎÄÿÐ5oü#翵ð_ô†×þ¢)__>eµßWÛ¨è<ÿÄ÷L9ÿ¨•Ÿ¨=î?½ÁUý›‰Óýš¿þ Ÿùk¥úï½´æo‚ÒøÌ2ÿ¸ô|ÿ¾ïÑî·êÐÂ} ÿÐsMúÿiÙóÛ?ëÇ_§åOû;ÿ@Õ¿ðDƒû_ÿA˜mê"•õóæ[]õ}ºñîƒÏüOtßú‰YúƒÞãðéÛÜ_Ù¸?Ù«ÿà™ÿ–º_®ûÛNafø-/ŒÃ/ûGÏûîýë~­ü'Úý4߯öŸ=³þ¼uú~Tÿ³±?ô [ÿH?µð_ô†×þ¢)__>eµßWÛ¨è<ÿÄ÷L9ÿ¨•Ÿ¨=î?½ÁUý›‰Óýš¿þ Ÿùk¥úï½´æo‚ÒøÌ2ÿ¸ô|ÿ¾ïÑî·êÑjׯuñ‘lµ {Æ„!˜ZÝÃpby‚&E)(%ò¤— ¿d›s±ÅLðUiÙÕ£:jN\®tù9¹yn•÷iI^ËNh·k£ZY îJzu\y\Õ*°©(sߕˑÞ7åŸ+{ò´¹¬Ñö7ì§û;kß5½;Ç:ëj:Ÿ k6÷Ðj¶òÍa«øÓ_ѯxl|-}Gu§i:]õºÿkx®ÂH.ÅÔ ¥x~å/ÒûRÑ¿#ñ;Ä à±PÆña„´*F|.S‚ÅSp•\ÂŒ”©WÅb(Íý[.­ÓörXœl'FŽ#ö <4ÇqÆ; Ÿæ—ð®WŒ§^–&”çCf*Êp¡–b)¸UÃá0µéÅâóZ.5}´/© Dkâ°Ÿ¸UürtŸ3~ÚŸòfÿµ§ý›7ÇýUž*¬«ÿ·ýz©ÿ¤1­×ª=›¯on9ÇÇ(ôÁNGü{³éýkm=vôKN_¤—øŽÿ-oÒÍsørqœgœqŒ€=Û–ÎNÓ]Pv~¿×ùšÚSÿ«»¶ok¿G¥­ÅŸþן~üVñì…àŸ‰ÞðwÄê?´Î»q¨xOÇžÑ<_á›ÙôïÙö©½°žóCñ†¡¥\Ía{7–rOjò[\Ãð´r¢²þ¡áO+â‰FiJ2Ëq\ÑjêV«†i=Öén¤“ÖÍÅrùYÅÖ n-§ÏšÒO{µ­ïg²vzÙ£Æü[û þÅ0ùžOì}û.CÔ+öøM8 cTäŒrÎIè7`å8Lù\ðxI'¯½‡¡;é®ôïøÞÖN÷?*Ìñ¸srb±1Õü5ªFÍúM%ò¿VŸü!û7~ÏŸ ÿk?ØçÄÿ ¾üøuâ/ø]ôá¯øᇂ|!­ ÏÙ'ö˜k»ªxBÓïEÃCžÛí<˜Œ±¶ÐWÀñ¯¡áÎ"¦„¡WûS+ŒªÑÃѧQÆUfÜáÉŸ'(¹Y¸¦Ò²rŽÅc+q,©×Åb«SþÏÄÉS«^µH))ÐI¨Îr‹i7gË–»Gíç¯ùún{ñŒWñ„^‹¿—õ¿ç¿Sö¡Ïùÿ'ú/LZÓúüü¶ù4Ó×úüÿ®Û¡ãßóéØp:{œç‚MuEëÚúk¶¿z}»ÞÂ?ŸOÙ öHýƒˆœ­Í9»w”¯ø·ç»ò\·¼}3öWð'¼qûþË>ñ§ƒ<)âÿßþ̳̾ ñG‡4xVîßIøsà«ý. ¯ê¶wz=Ì:eõ…îŸ¶Ž–wv6—VË Öвü¶&¤ã˜c' ÊX¼O,£)FJõgvšiê›Ogm5»GÙÁ'NKXG}ž‹¯ªZmÑsZñë×ö/ýŽðsû'~Í'ßþGÂÓŸüµ¾‡©Ï¶+HbñMkˆ¯ëíjòK˽ï»Ú3(ÅtJý-ßú}­Ù^òÖÑ?dÙKÃzΓâ/þ̳Þâ S°Öô=wDø/ðãKÖt]cJºŠ÷LÕ´NËÃ6÷ºv§§^à å…ýœðÝYÝÁÅ´ÑM=n±5åzÎ-r´êÎIÅ­S\ÖiÝèÖ½•Ù6]—Ü} õüO'¯áÛ¯S’w8­"ûzkÿéýöë¨ÅN¹ý;qÆHÉÁïÀmµÓô²õK_»Íkù=.CÓ_üÿ>‹E}>N._óüñúðrÙê+¦.Ívþ¿¯¹½¿àié{|Þ½­Ê­{’/§¯çþúöïÇÊÝ0zÿÃéúnû}ĵ§ã}¯÷ûßu“{lÜŸþIÀ8Ï8ãÀ÷a–ÎNÖ®˜uþ¿«~¾DŸ4þÚ?òg_µý›GÇ~œÿÍ-ñHÇ?0:àµÙAûô×÷ãÿ¥/–ºùõê'³ôgÔ5ô@›¿·ÇŒ|SáàÅ–âÿx^ÏTÑþ*]j1xGÆ%ð‰Ôn´»ß…Ðé²jøkUÒ¦½1jºšÚÇs$±BonYÙ«ú Àì£*ÌðüYS1Êr¬Î¦¿Ãó<³™*ÄC?uÕã°øˆÑö®…RTÔe?e&Ôl3}!3¼ã*ÆðU³9βªX¼7UÄÇ'Íó,¥âjajðÄpÒÄK.Åa¥YPŽ+©F¬¥{zŽ)97/ƒ?áføÿþŠ·ÆnÿóZ>)ŸÄãÆ–XrqÎ ¯ÜÿÕŽÓþ1~ÿÄ_ ÿçoOÎéìÏç¯õ·Š?è¯ãOüL¸ŸOüËzõ{l¬ð³¼ÿE[ã7þŠýGü&ôïÝ@ÅêÇ ÿÑ-¾…²~Ÿô-ùè´{Ý0ÿ[8£þŠî4ùq—ü×ü¾OO¸?áføÿþŠ·ÆnÿóZ>)ŸÄãÆ–XrqÎ £ýXá½?ãá_üEòþvôüîžÌ?ÖÞ(ÿ¢¿?ñ2â}?ó-ëÕí²°ÂÎñÿýoŒßøz>*wõð˜Ó¿u«7ÿD· ú>Èúзç¢ÑïtÃýlâú+¸ÓåÆ\Oó_ò6ù=>àÿ…›ãÿú*ß»ÿÍhø¦~YaÉÇ8&õc†ôÿŒ_…ñÈ?ùÛÓóº{0ÿ[x£þŠþ4ÿÄˉôÿÌ·¯W¶ÊÁÿ ;ÇÿôU¾3áèø©ßÔÂ`?NýÔ Qþ¬pßýÜ+èø[ WéÿBßž‹G½Óõ³Š?è®ãO—q?ÍÈÛäôûŒýSƾ&×m?³µÿüMñ˜nôûé4Ÿ|Rø‡®é:N¡kªé²Þi:¯‰ï4ëÁg©ÙZ_[Çwo4kuo ž[4jÕÑ„É2|hâpàqQ…jpÅ`x&Âb©ÃF¦º¥ˆÃ`iV¤êP«V”:‘“„çìäŽ\ff41â'Ìp’«‡«S˜q>ŽÁÔ©„ÄRÅáå[ ŠÌêáë*Xš«B5a8ª”¡.Tâœnxkâ?‰| â}3Æþ ÔF•â} 41É.ùtícL–Hå»ðãxŽ¡¡ß˜×Í‹zÜØÝ,:ž›=ýœÔfù[Äm|Ÿ8 ñCç\¶Ž' ˆŠå¥ŽÀÕ’—±ÅQR~õ:°”ðõáR…IÄ×'âLÛ‡3l6{‘b>«™áS…çÍ<6; ))VËs*Kë*í'(§˜zªœ,¨âiS™ûkð#ã6‹ñ×áõŸô}>÷H™/ï4-G½Wi^#Ò–íK+=GÊŠ×ZÓãkˆžÏU±Ì3G'‘u†©m¨i–ÆgÂx¾ ÏkdغôqKÙRÅàñT\WÖp8Žw‡­V‡<ªa+µ *¸jÖœ%x:¸yЯWûÀ¸ÓǼ;C=Áaëàæ«ÖÀãðuã'õLË ¡õªq<¥ÃÅÔ‹£‹¡xTŒ¹*Ɔ*ž# CÇ>=~Íß¼_ñá¯í#û?|Wð·Âo¿¼ 㯄º øðÏTø¹ð·âGÂë>ñMÿ†|YáâGÂ_éÞ ðß‹¼¡ëÞñŸ‡¼we&„×¾(Ó5ÅWˆd²´ùCìÏ•~,ÿÁ8¾6ü`ð÷‚´¯~×Vž;¸žËö‚OúÄß…>ñoÁïkŸµß j:O‰>üÓ¿i øÀ§àg†ü/uðïà¿…~#êŸ< øgÄšÖ­¯èÞ#ñž£âOø¤ ý—?`__²ÅžƒàïþÖ>_†:®­ðÆt{ÙÞKOøÃÆ¿¾ üø®éþñÞ³ñ£ÄÚ/Ãÿ†_.¿´Ò—Àš÷Ã]:òçÂ×g_µñŒ³Y]êº4 !ºúSö0ýŽ#ý•GÅgSÔþjÞ1ø©á¬\|ø9ª|𕮃à]+Q²ðöý™âߊŸ|{­jÚ> ñF¯u}âŠ:¦›gý²šw‡t /î5šü7ÿ†ø9ᯀ?²·Â 7Qðf‘ãÙçR‚/|qð§ÁýÂ>øÍà½ká¿ÄO…<ªj:/‰bñƒmñ¿µys¨x¯ÆvV:Æ—£j·Ú7ˆgÓí*øuÿjŸáÎáMWEøÉð“Iø«ðb?„ ðâ/†?f+í)m.>ø¿Kñ$÷ßtkãçŠuŸŠmñ MÐô¯øÇFøeãÙó±L·¾(ðÞ¢kãÓøXëcöHý§§ñŸÃ_ZGí_ðþOÚoÁ¾øíðç\ñGŒf«íà†¥ð¿ãŸŠ¾x¾OøOàÿ‡¾<ø'ƾ‡:ÏÁÏÜxG]Õþ9xÓTÕ¢oÛxÉõÓâ=ëÁÀAà/ø$îð»à¯Žþx+ã~¥ˆµþÇŸþ|DñÃÍ7X»ð·Å/Øâ?†ZׄõÿøvÇÄú&›ãÿxÛâÉ|GâÏè÷?¡±ðÿŠõŸ øOVÐ/m´¯Y}mû0~Οþxçöø§ñ‡ã„¾,ø÷öñÏ|eª'€þ_| ðo‚­üð·Âß tÏøEÖ>(|Z×µ;Yôÿ [ëz¦¹â»›ùu[ûñÁ§}‡OÓÀ>:ðGüŸöÑ?eo ~Æ~-ý®>ëþh~_†úç„¿c¯xCâæã_‚ß<1ñcோ|Câ{öÐøƒàOØøsâ'‚¼/©x×Âñü,Ñ-¼w¡Zê^´Ô< u5Ô¬€:ýCþ õñ£Zø›íG¬~Ò_›ö¿Ò>*øÇžñv›û;kö_³î›á|øÉð"ÓᦥðFëö„Ô¼y«A«xcãÿÅOj3ãîâ‡àˆúgÄ?‰âøðCáÿ†¿h¿|9ð,WçÆþð=ÚøË¿~&Úø á'‚¬¬~0/ŠZãÅÐ{ü0Åßxæ_ŠŸh¯‡º7Ä þÒÿh/†2ø£ö{ñ‰¼)¦X~ÐÞ —Ã?>üFÑ,?h? j^7Óõ\Øx›áçŠ|%ⅺפѴí#]ƒâŸ6¸ºÐ¡~Ê?°|_³7ÄkïŠ7ŸOÄ?øŸá׎4/ÜIàk Ç­üIøµûRüký­>+xïDŠë‰á kÿþ7kšw‡>(Õ¤ðLJômßx–õ%¼”ô€?ÿ×þµißþÑ:ßí«û/~Ì¿¾8Eð/ÃfÏÚ÷ãoŒõø~x/âN©¬k¿>#þÇ> ðnŒ–þ2ŠH4Ýìþ=x¾ëV:l–—ד[騗p,%¨ç- þ ;ã†øãÿ´{ê>)ñgì¿ñöÉðÅ)¿gÿ…ÝéßôÙËöbømûSø{ÅðŸˆ|q©x‡ÀÞ2Õ~|T𽄾 Ð5ïiZÅ+?èÏ®h^6o`Ÿãïø+÷‚4‡?¾?kÓh¿þÝ~ÕV¾2üHñ> üføkâ_„º‡ì™ûDühµ¾øsñ;ào¾%xgUÖÓÆŸü ^è:mÔ>:İÿÂ-§hµ¯Š|;¯~›ÚþÑÞÒÿf]gö©ø«á|ð†þx¯ã/‰ôoIáçñg…~x_GÕüSµðììþñ£ªxöÆÑ¼8Ÿ¾ è¶Ö^$]Qø¡á_·~>ý–¤¾ÕõÝÇ[Õ|á¿øšÿÃöÞ.¸kÞËþ [ñGÃ_´ßÇ_…Ÿ?fŸ‹¾ÕôŸþžøû:Gsð[WñÿÄ_ß´‹ÿn›¯jÞø‘¢üLºøsqðú?†Ÿ³Î‹®ëz¿‰¼_¡Gàµøcñ9!ÓoïÅ„:ð„ŸðSÿˆúÆOŒŸ|û1|qÔ¾&jÿ¶?Œ~üø+¡Û|¶ñ/‚¼;ð»ö1ý‹~7øòoˆ~6›âuŸÃ™|9/ˆ~7|Dø‡áïŸ\ÿmø ô=L°›Å÷ÞðV¤¯ügöE»ƒÅºÅ¦âÝoÃ>3|Iøi¨x7Æ¿üsâ_‹/ÀÛ+_Å¶ß <-ñ‹Pø‹ðº÷ıԼoà»~øy¢Ýx'HÕõŸêÞ ¼°—H ãWÿ‚ºü:оøâ>¥ð_·‰_®~ x&ñ¾8þË-ð[Äz¤ $ø©mâ}+öŸOðkZD3øCÃú|¾1ƒÆú¿ÄûkŸÛø=m¡}~€?N¾øò/Š? ¼ñ" xŸÁ±xïÂZ‹Â^5´Ól¼]á¥×´ËmKûÄ–Ú.«®è«¬ihûûèšî·¢Í‘¬jšt¶÷×åP@P@| ñËöC¿øýûKxÆþ*¿OøC ü5ð·¬ØÙ^¼^ ñ~·¥ø¯Ç:¼žƒìÿ>“¡MvÁõ\Ï£ƒÆâÜéâ*B_VÁ¥9Ï…û¯JÒ´Í LÓô]ÂÏJÒ4«;}?LÓ4ûx­,l,m"X-m--¡T†ÞÞÞH¢Š4TDPª¿ÄâqÌEl^.µ\N'VuñŠõ%VµjÕdçR­Z“¼§9ɹJRwmÝßC÷\. ÃaðX,=& Fž †ÃÓ…*z`¡JPQ…:táB)F1I-‹õ‰Ð|ÍûjÉ›þÖŸölß?õVxª²¯ü ßõê§þÆ·^¨ö_óƒŒûxg¡È cž@ÂÿAéé×Ïúíoî}3Ûv¼×ü§ÆyÎrA ×=Èþ2KuG§ô¿>›]ÛkÚ×RÏõ¾—¶–··ø¯eÍó_Ça»âìh?êä|WÇsÿyûWñѹ8ãÛþÓü+’\NßýKq;?ú}…ꯧéÚÖ<ÜÕ_/ñÆËïÙ|í»iêíöºO[çÎÀn÷èyï‘• qÆæ8Æ[û'©ð]ïÿýëÞ÷ÑuKÞÓ—ò¼Öž’ÿ-tÿ·í{_gçn§ÆºÌ;?iOØá½~;øìÆpdÚ|‚x8ìrOËœßy¼å ñ ö¶k•é¿ü¼©¥ü½5wÚÌçàxrñ;õ‹éoùyCÏüú]¯µú‰þ}½ÏnIé×Ô’kø²ëåéo/-ÚÅÿ>ãùæ¾k¦OëñþµùMkëúùúÚèzÿWÿ[>ØçëÈ®˜==¯Ÿ_ø?-ˆ? ?a_G{û6ü€8ßgð‹á¬܇ oà½ÿž|nLg'uÝØ—æ²ÌÒ5ó\êŸ5Ý ï5§«‹þ:µ=~ºßn’nZ‚ÑÅ{LÃ2õ†eŽŽëìâjô{Y]YìÚ’´¿Nü©‘äàôÛθ23øŒ¯Á¯ÕòœMù.ö³¿¢ÕhÛví§—-®}n¯ÃwÚëúßå²^e¯Ø°çö8ý“¯ìÏð#ó? |)õg®w"¸ñý»ÿaX‡¯ý}ŸÍwùj~“Oøpßà†ÊöÓäߦ¶ëdÙôÚž~½ýÿ1’zN?2ƒéßúò½ýtìîÂKðéåøÚÖ¾ÊëNm,H:ÿ‘úœÿ.=릧ëúkúz;6f?õ$çñÇ¡õÈã·UÇ!º`ô·õë߯wåm€=úÀûà}@ÆÑžq†é†Þ]/Û³Öÿ––i»û²ÿ­ÿý’×_[ÅïOò1Éé×?íc¶KtÅèŸÏ®ûõ¿à¬¼ì‰ü½^Ï¢µÖ–òOymbA×ÿÕýx®¨½Süûþ ?5múõWÿ‡êÖªNúhÝå'ôã8Ï9ÎH!zç 9ÂFIn˜½~—“ü-¯mÑÍ?¶ü™×ícÿfÑñß·ýRßõëÏCüðÿD>¿¨Ã%~ÖôDõSïÿ ßÂ,ý?䡜}@Fhÿˆ«á·ý”?ðËÄ»á›þ?âx¯ÿD^+ÿ¼#ÿÑõ°Ã%þÖáIêßOøNþqÿ™}†G¦hÿˆ«á·ý”?ðËĽwÿ™7ü?Ì?âx¯ÿD^+ÿ¼#ÿѯêðÉ_µ‡ý=TûÿÂwð‹?Où(g_К?â*ømÿEeü2ñ.ßøfÿ‡øƒÞ+ÿÑŠÿÃïÿôCýlðɵ‡_øRz·Óþ¿„\æDÇ_a‘éš?â*ømÿEeü2ñ/]ÿæMÿóøƒÞ+ÿÑŠÿÃïÿôCëú‡ü2WíaÿDOU>ÿðü"ÏÓþJÇ×ôføŠ¾ÑYCÿ ¼K·þ¿áÃþ ÷ŠÿôEâ¿ðûÂ?ýÿ[‡>~Ð_ |/¨xÛÇ õMºUÆ‘o©êïâφښX¶¹­iúžïe¢øßQÔçõMRÊ>ÉcpÑÇ+Lè!Ý=L›Žø'ˆsFOÄT1¹–*©áð¿Ù™æÕú®¾2ºU±yf>¬×µ­7TÜ¥xùÿ‡¾ pÆUˆÎóîÅ`2¬%L,V-æÜ7‰T^;†ËðîTpYÖ'8Ë‹£ :T*8©sÉ(FRqû1~ξ%ý¢õ¦Ôožû@øM£^˜ ÿ‚k7ëà€Þ¶ºñ>“§|bý±¿e‡~)×ôKWÐ4«‹Zño‰ç±—â6›àÿ kšg‡<_àÍ'Zñ$¨°þÈ^:øÜ?lÏÛ«áïí ñƒá§Ä _Àšì{¤xlü2Ó<[ðïÀV—~0ð¿ÆOOmeð—Æÿþ/]|?ñƱnl?¶ôÛøŠ_èúO†|XòYE©[èz@ßíGûN~ÐøÏñá/À½[ö~ð"|ý‘fý®|mâOÚÃ~4ñFñMºñgÄ ØxÂOበSáþáåøeªêþ,jÍñßÁøÛáöφú²ê3»€~AüPñ×í‰ûGüWñ_Ä]öƒ±øUðÛâïÅø%Ãý;á-ç„?hÝ7Ä~ø]ûGÙøâÞ©à#¬|:ý³~èºV«>©ª¶‰ñÄ~ðG†ümñ;H†ø?Âþ ¦-ÿટ´ï€¾üTñ/Æ/ü)½ñÕï âoÀO‡>øyã8t-wFñ?íð¯à-¿Äüxð÷Æ¿Œ??j_‚ÿ…ïð¿Æ+ñ—…üQû=xò×J¿ÓΫð¯Âö¾0¾ÚðwíSûuüQý¨ÿdï‡^+Öüð“Nðßíq㿇ôû‚Zÿ….þ7x_Ø»Å_´û_ᥗíwñRø3¯èÛë~›£xÃÄ¿à¼ÕføWñÊÁn.xûá—ìß㿉¿,¼ã?ˆ¶ƒÂš_ÃÝ7Æ"ðõ‡„õýÅž;ðß‚­,-®uÏü4Ð(E¥|DñßÃot'ý¶ÿà¢~Ò?g_‡.øaðâ?ÚãÇ‚?hÿ/†¿fïëÚ‚þ| ñ'ÂøÀÞ1ø3uûahÚ–ƒñ[Ķ´üXÖôï^,Ð>ÜÚÉàû üEwÿ„±@>ý…~3ø“â‡Çï‹Æ­ x‡Ã_¿dØoö¯¼ðE¯WâGÃÿ…ž?ø»¢|[Ñ|sá;Xjº‡…ÿ¶õx¼á«Û9ü£Ã¾0ðÖ‰aãj × ª´PÿÐþÌÿhÙþ‡Æ„?|1ûEü}ýœ¾(|ø}ñƒán‰¯ü´ýžµh|Cà_þ!ø9âŸhž(ÑÿhO€?4woí¯~ºÑõOYxsXÓÖ-Vݯ®`ÔY óË/ø&ÏÁ /@𦗣xëãÖ‡â_OûDëÚŸÅmâ|ÚgÆüGý§ü.<-ñ/â犾#Zéëöÿ­`ŠÊ÷Àú‡ƒçðž…à†Ò4- @ðý·‚ô ¶„?à–ô¯ˆCâ×ÄOˆ_þ6|I»ø½ðó㉼EãßþÏžÓ¼q¬|-øKñÛàçƒt_xáÀ¯†~Ötè<=ûD|BÖu­wþ˜> x‹ÄiáqªøÖo xSCð ¬/ìð¡¿e-ö/¾ñŸÅ_àFâ¿ jv…¬øƒÃZ–«ÿ ¿Á?4Ÿ‰þýœ/u›Ÿ<ú—À«+CÓ> É£j sã‹ï‚ÖçÂ7þ?ºÔ¥›Ä.Îüdÿ‚i~̼O/Š—CÖ~ßÞü$ñ7ÂjÛàðŸÃ«iú—Ä/†¼ ãMZ;/ ^\·Äo¿þxÇ|QeueÿÖ¯©xšÛY°ñ&®\ihÁÏÿ½ðޱâßüWñ§íKûVxëãï‹îg-S@øï®êtÏü)ñì¹ñÚo†šßà ø?ö}ð¿Â=0j?íñOÂ~=ðç‰>x«Á¾5ðî½y§á´ÕõoêúàuðGþ Ýðëà¿Å©þ:\|aøõñk⦥ñ_â§Æox§âž·ðÖå¼AãO‹ÿ¾üñ:ÝéžøYà=+LðþàÏÙãÀ÷ð÷‡lôm?úæ»kh­á“áÿxx›ðoü3áÿÃí;UðÏ‚ÿh_Ú“Â>´ð¿Ž|-ð‹Á ñ¯Ã¿Yü¶ñ¾¥o©%÷ÃßøáO†¾$øºïÁAÒþXüzñ¿Æ_xcÃ÷ZÐïtÉÚÞ€1´_ø&†¼9Äc@ý¨ÿiâwÅŸZxïâÄ'Iý—ôË?‰ gðþ?†ÐøOÇŸ,fËÙÅúö Pj-ªøƒà¦¡ãñâ{{mb–ÐYDö§ìåðÀß²ïÀ³ÿÃWÖ%ð?ÂÏ [øgÃóë÷}Ƴuo÷wš‡ö>™¡èv³Þ_]Ý]føwBÐ|7¤¤É¥øsBÑ4;; .ÔÚè € ( € ( € ù›öÔÿ“7ý­?ìÙ¾<ê¬ñUe_ø¿ëÕOý!n½Q샑Œ~˜ìõ瓞2pÚþ8ƒéßúþ·ßɸý8ï_çŒã<óŒgV뜜è‡]~]½;ÜÍèöìí~Û[OÊÏKY^çÆ?µï߇ÿ¼Gû!x'âw|ñÁÚí1¯\_øOÇžÑ<_á›Û;öGýªol'¼ÐüAc¨iWSX^A 圓Ú4–×0E<.’¢²~©á?+⇥(¼³Íª’UpÏT·jIK[;._'8ºÁMŵ%85%u'Ý­SNÍõ’–ÊÖLñ¯þŸ±Lg“ûþËñÇ•û?ü'ˆã‘¯„@Ï‚¤nêr3_Ø™N>^|Iÿ6„醴æ‚ß½î—kŸ”æxŒ\9¹1X˜½~Õbÿ G_EédýïŸü!û7~ÏŸ ÿkOØçÄß ¾üøuâ/ø]ôá¯øᇂz 2Pœ¤›Šº‹´tmé{Kö÷×ééŸÃ§8í×9è1óÁÙ§ßÏü®¿>š«6~Ö/ùü½}ûwþ‹Ó¯¯üëõ[J'·¦¿Õÿ.½¯ü?ÏåÛŸËÓÓúõÿ†üwŽGóõû!~Èÿ±þ¿û;üÕõßÙ[öm×5}WàÇÂÝKVÕ5¿ 5-KTÔ¯¼¡O}¨ê×¾–êòúöîI®n–k‰ä’yI¥oj–m‹þÐÆÁbñ.Æâ¡ûz¼±ŒqŠŠ5”b•’ÑYYE«3òw®±˜”«ÕåŽ+•Y¤”kM(¥ª²²QQíemd~xSö ý†¦ùß±Ÿì¡.GYg_„Çü Â.à\öÜO _y•ãjO—ž¬ç×Þ”öKy;ÛwªK{»rŸC„ÄNvçœÛ]æÛü_{êßÝö|ðèþ½ÿ‚^þÇþÖt=Tð¥îÿÈÐõ_ êze]|cý™tëÝ PÑ® —M¼Ñ¯4Ùe°»Ón-¤³¸²y-e„Àïù¹4=¿R§e/i™bàÓ´¯Íõ…f“ßÊë]>Â×£mãOo7»é¢ôvÕ&ãîðÍŸ±¯ýìÏõÿ…ð¿<ûÏnüœžë_´ÿbBßÁ§åû¸u^©n¼ý6#Ø=ÿËÓù¿«_VyÆŸ‚³?ƒ´/‡~#ø}û>ü ðG‹ôÿÚgö=OŠ<ðŸÀ^ñ—öïÚÇà¶ý­hº ž¥d/´»«Í>ïì÷Qý¢ÆîæÒ]ö×3E^Vy•G”f5•(.L4ÞŠk¦Žï_y»©-õz4'EÇÞ}ü×÷›ü<®þ×ê?åÏSÏÏ8úòHbN1_Aëoë§šíÑy»Ù/áëÛü8Ï\ïÀmµÓÓüÿá¿_¼õý!úù=zõúõ çvN ×L–þ¿àt}o{¾ÑÏåÛOG§ÍëÑZÜ©+óWLŸÖß•ÿ½Cn¾w}m¶þóZtºÖËUxÈ:~|ž½së©É;¹Æ+¦:õ¾¶ÿ÷[[¿VCþ¼í¥þm=-Cæ¯ÛGþLëö±ÿ³iøïӜŭñPì1úÉ$«ÛAþö“Û÷óûI>ÿ×m‰{?F}C_H`P@P@yŸÆ…~ø×ð÷Yøkâ»R×ÃÚýï†îu7Ñ®RÏQ– ø§EñJYÃvñLm’þ}+©áUºŠÒâw³š µ†â/†8ÂyÞ?Ëa‡©ŽÁRÇÓìU7V„gŽËñY{«*jPö’£ T«S„›§*°‚«Ó焾gŒ8W.ãnÆðÖmSK.Ì+eµ1RÁÔD¡—f˜,Ò4aVP©ì£^¦ 4*Ô„UXÑ©QÑ*Ê#ÙxsÚ„4'ÃÒlt/èV0iºF‘¦À–ÖVVé²( …8™Ý‹I,ŒòÌï+»·—ŽÇc3'Ášõä¾ðöw%÷‡'ñˆ®´ÛÈ.-oSûcXÓ庛E×üG¦k ñgösýŸ>=Íᛎ_>üe¸ð]ÍÍçƒçø©ð×Á¿eðµÕëÙI{?‡¤ñ^«>%äšn%ÛiæßíRiö2O½ìíÚ š÷áGÂÝKU½×5/†¾Ô5½KÄ>ñv£¬^ø;÷Z­ÿŠþ$Qø Ä÷º„út—w^!ðDpC„5©æ}KÃI K£]Y¬jÍü3û"þÊ~ Ÿâ-σÿf€>Ÿâþ™«hŸåðÿÁÿ‡ÚC|JÑ5û›»íwDñߨ<=ñ^­ßêþ±¥kŸmÓõ;ýGP¾½·žêúîi@-h_²ì»á ø7Á¾ý›¾h>øuñ ÇâçÃÿ hÿ~éÚþ+é‘Iñ?Â:E¯‡â°ðçÄ[e–+?èðYøšÚ9$Hu5WpÀ>ýœþ xoà”?³”_ <¬|6¥ _|4ñ…ü?¬ø#XÒµ½JóYÖ´ýWÂZað´úf§«j·Óhpè¶ú«\}—LÒ¬tø-làâ§ýˆ?c ¯[|1½ý‘ÿfkÿ‡‘kPÛø Qøð¿Qð|Qø—Äþ ñ·‰|9{á{ ÿÂCã?†ßü_®–´-¬x£À> ñ¢nuo hwvØgö(>²ø[ÿ û.¯Ã-;Å·^>ÓþÇðáL^±ñÍö•o¡_xÆÓÂqøQtoßèV–Úþ¿ ‚ê·Ú,i7wSiê¶ÔØü6ø  ü7øŸñ‹âµž¹«êšßÅè>h¤Oi éžðW‚þhÚÆ•à¯øSMÐô:E±±›ÄÞ&Ô®¯õ{OQº›VŽÎ9­´Ý3O´ˆÝh ÿÑþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕƧÛß·åüC®¦%ˆ/ñ¬^Ï]m³ï÷7÷}ÇÓÿLþ8!zç<ŽŠFIn˜=Vúôõ^¶èºµ×¥ãßü·¿ùnÔ“¾»5|v]ß¿cEÿ«‘ñ_±ãö<ý«Î3“ÀÀAà Œ…ýG™rñCíý›‰ëÞ¶öíëoK^j¹°ró”-×]vÕ_{nõÝõŸIã+lùØ ~ëÓž™›ÄäŒßع=O‚ï{'¾½o׺Ù>Šò?+ÍiüZùíÛþÞ¶×Ù®ÿâøÛY‡Ëý¥?c†Æü/ÓîŸÙöž ô8ާåàØÜÞÒæðןLÛ*v½õö•þW}5wzYœü ^'ö/Å­­ÿ/([¯o^—kí~¢žœz“Ž:ž}@šþ*‹Òûÿ[_®›ï­×KGö°üÿ®ñîŸLöùzböÑ?ëü¼¼ÒW÷G°µÓ¶¾{_ð߮דºFwåç•ü¾_’—á‡ì/âT»ý›~Ûì~ü2ƒ¹1Çà½ó†ã*6ž¿½%…xYngÙ®sKŸZÖgOW«ŒqÕ•Ú´vz]hßY¶ù¥ŠçÌ3óYÓÌq±¶»,MUÚ:'¦Ü¯eg~oÓêê¾oîrvòUÁžGŸ”[õL§~]{jíêõ÷š¶—Ùö²»Ö`jü:ïdïýz|‘ò¹|ºwüÃöNÔ]Ö5³ð÷ü2åØ*¨‹ã?ìÆå™Ê ĨÀõíð7_2Ú)9:¹ÕX$µošuRV¿nžïeÕŸ¥Ñ‹’£®äè­¯Ö=¬ß•®Ów×hôßð¶,qÿ!;N}nâõïûÌœ`v=ñ@¯ëì ¿ôRßõîwü¯ÿ’ýÖæ—¯õIÿ#õ´­Ó­¿­µÖG˜|Uø‰i­Y|0Ó"¿·•îÿiïØíV4¹‰Ý¶~Ö?æ8PÎIYc€ÜO+ò¼o“ÔÂðžy^TgOvÜ$’½ZqÕí­íóétçωÃÊ'.V’p»iÿ chÏ8ÃtÅêŸàÿ§mu½úi{ õü8=9 óÎG©þî;ºa¿õùhîú[Uç{ôï÷»õÙ­4µú-¹¯ö^:~žãùéè8®˜=moëï‚Zjïd/›ÖÚ«þ7óæ|ÎÚ7yH½?Lãß¶}89±šé‡õýuümón2öüK'wµ¿»~·Ñ5þÚ?ògµý›GÇ~ÜÿÉ.ñW^¾ßÝÆqŠíßý|‡þ•×þï³!ìýõ }9€P@P@P@P@P@P@P@ÿÒþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕÂ:ôëÿêöóÆOläs·øÊEå÷þoäôím§$õíêG8Èéü=?çqÎn˜="û~Ÿ/ÑüÅ-¶¾·û¿/×e¹ñí{ðçáÿÅ~È^ øà_üFð~£ûLk×þñç†tOøföãNý‘ÿj›Û ¯t?XêZU̶7°Ayg$Ö%­Ì1O ¤ª¬Ÿ«xKË.*jKšÊñjI«©/m…jé«=RjêÉÙ¥¥8ºÁMŵ%85%{½wZ¦›M§ñ'Ñ+.oñoì)ûAæy?±÷ì¹R<¯Ùÿá +ÛYaèËÕë«M^Úét£ùNgˆÅÛ“‰ŽïÝ­V-}ÓZú/K£çÿ~Íß³ç¯ÚÓö8ñ7Ã/?þxˆ|gø‡¦ÿoøᇂ|#­ý‚óöIý¦ZæÈjžÑ,/¾ÇpÐÃç[ ¾LfXܪ²üïø< ñpø% ²Ír¸º´pôiTqu&ÜéÓŒœ[Šr‹²º‹³²qŽÅã+q4©×ÅbkSY~&J\EZOž‚Œ”')&⮢íz^Òý½õýxÏ×ëí÷²Iã‚kø¢uýyuü’óþ÷íb÷ÿ>Ãùúçã€BôÁééÒßÒwÿ‡Üü?¯áþO>Ø®˜»¯×¿®®Ïþîæ2V-¿®:m²Þ_Ïçì…û#þÇúÿìïðW×e_Ù·[Õõ_ƒ u-[UÕþü1Ôµ-OS¼ð6…q¨j×¾’êòúöîYnnî®æ–k‰ä{‰ÝÞFú Y¶/ëøØ,^%Â8ÜT#oW–*yÅEGšÉ+Y.T¬´I4ÏÉ^6ºÆb¢«Öå†*¼UY«(ÕœRQ÷••¬”m¢ÒÚ¸þ€øOö ý†fò¼ïØÏöP› dËû;| ~ù‡Ïáüç°bÙ¾÷+ÆÔŸ/=YÏgïÎRºÿ·›¾º´ÚQþf}9[žsvky··«ï}[ôµýßFý—>øÇÿ°oì·àx#Â5ð¥û1~Ï6º‡‚|Yá­Äž¾µÒ~ø+PÒíï<5¬Y]è·Pi·ú}…öŸÖn–w–6—6ËÖÐ:|v"µZ9¦.µµ(ÕŽ7ãR”ç “«U>YÁÆI´ä›V²½î¯ìéüèù"ûke×um¶ÛkØÜÿ†ýˆ¿èÎ?enŸôo_ñÿ¨‡ÝþJöí~V}.=ý?Ï×ÛßžÕ”Ÿ×ÝÙwë{ߺŽc¿ÈsÇn2 äó€{ü¹"º`î¼¶ù_~ý@?Ó×ôçŸ^IÜIÆ+ª?ðôî»ßÏ] —~¿Ö5>©%ÌØõïÇòãÛŽ?1ž3Îk¢/åÓM_Ë}§m‰Û[ö׿kßÞéæžËkÅàúñ‘ŒõÇøõþDôÕªíýv¿çm›½…m<ÿ;i¥ï»òŽÖ²æGÍ¿¶ü™×ícÿfÓñß§8ÿ‹[â¡Øcô’H%W·ÿ}Kþ¾CÿJ_«ÿ‚¶–ogèϨkêL€ ( € ( € ( € ( € ( € ( € (ÿÓþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕÁú0_ê;ò+øÂV¿¯ë×õ>œ”sƒý3Îyà…êOcxR2KtAôþ¿­:~¼‡ýXù¯ã²îøû/ý\‹Nr?cÏÚ»Ž3ÉÇm ÈÈ_Õ|'•¸¡»Ì»×½\-÷ûº÷ÕûÇ‘›+àæ»ÊÞºôº¾ºY·®ï¬úO[ÞñŒù÷}H±üLX+û'©ð_ç½ÚÞÿ“Ñ>‰ó]s~WšÓÒZþýµ¶¾‰®¯ü_kPù´ŸìpØãþ¿Ž”t)ý’?iâ?½Ž1ƒÎÞÿÞ¯ Ç ßÃLBÿ©¶Uu{ÿËڊ׺î×­ÝÕŽn‡/¿ûâú[þ^P·~ž~·?Q}ý}¸ëœààòzuõg5üKªwŸõòòÚÒý°_ÌN~÷N:tõ#ŠêƒÖÞ½/ë]ú¿çüþ_áÔ×L­ZK®žw"kDþ_×õóVJ_†_°¿‰óölø žËàÿÃvŒðfŒ02オ€¬AÎ*ƼŒ·3|Ó9‚•Ý ç2¤þXüB]Všhífî—;NGàT±JxüÆ<ÚÓÌ1±{élUU¦‹Eoð·t“i³ôãÁú¡ý×ÏÇ õ=3áï댲ŠýS)ÄÝC^Ï]=Uý÷ë³¶œËSë05~½ìµÛòôZmmú~Ň?±Ïì˜}fsëð·Â¿‡sÇN=ÈÆ¿öìc·üÅâôõOÓÉouk¤~‘áÃüÙ]­?ÇÏFÜ~˜ÿ>Ý=2yN¼ô¥·Ë^[þO_'ç­‘ðÝo«Þëm´Ó]¯yœ88ãñÇ_×È99;±ŠéŽÿçþzÛ»iôë¡2Õ7®¶Ztü¯èïg¢µÙ/ùÿ=¿?êk¦§ëÊ÷õÓ³»2þ{~]Àã¡FyÀÓÓõý?á¯òn@uížý¿P}wc¾:®Þé†ß×O¿{õ~‰]‰þáú[þ¨áÇ_cíœd^˜Ú3ÈÓ »ÿ[=où]mkXM/m®×Ÿfý7¼ú]1õÓ¢>¾·×ôÓðë{|®œ¶µµV{É;%›mù3¯ÚÇÛöhøïØgþIoо¾ÄchÇ# a{°ÿÆ£ÿ_iÿékå÷|ºËgèöôþ¿Sê*ú³œ( € ( € ( € ( € ( € ( € ( €?ÿÔþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÿ[þ½TÿÒÖëÕ¾¼ç·'ðÇãÓ§©ê+ø¶.ÖÕÛM{ú÷>œ‘zzqÉëõôàgÔòÙÇÓgëøþZö×ËK°>5ý®ü £üGñì…á=wPñž—¦ß~Óôó]øâ7Ä…^&ìdoÚ¦î³ñ¯Âÿø?Æt/,*—–Ú~½m£he±¿ŽêÎy­ŸõoÔeÅS„Óq–Yн®ž•°­ZQ´¢Ó¶©­Ó’—œ¹G9E¤Ôàïký÷]ºrµÑ4ÓròþÈ aó6ø×ö¬8ÿžŸ·Gí³>F#þÐrç#É$`ªí¯ìœ§…Ÿ-Õt´~î3ݴሾçµì›÷¿*ÌñX˜óÙÒ¾¿ =5²÷©Km¬Ó],’r—Ïžøá‡_µ‡ìw¯h~!øÑª^7ƈV&ßÇß´oí ñ[Dònd¿Ú]ÞQáÏŠüaáå¿C%¾¦ºRê6Ñ™ÒÚ깸Gùß0Xl?†¸ª”¾±Ï,Ó*ƒö¸Ün" :Ò–”±Š´£/uZq:MÅIFSçŽÅâ+ñ3§UÐåX L“† BM©ÑÞ¥ 4ê5fï';7yF,ý¶^œþ'¯×98ç©9'µ-ïÇ{;íkàïà _QþÂý­¿k Ù5ö£àêôÙé>ø×¥éZm»O+ù~™eicgËKK[[x¢‰=œ3ÁṞþË.Ê)¹cñns§”eIºõ9çRpÂsÔ©7wRuªNW”äÛ¼¿½㱜¸\\ÄóJ8,”¥ígÍ*’ö<Óœ­yÊrr“ÖM·y~…xOö-ø(‹v½ûO¸3íÍûn@;‡öƒŒÀ#ŒÄm¯Ð²¹a¥Ë|. j®ã„ÃÂݾ vóZ{·Vrn<¿Iƒ•9Zôh-UÚ¡Etþí?[Y;%¥Ýù¹½['þ “û!XéºçŠü?£àßø'ƒµ WÂþ0ñO…Ü$šGo#)ÃÒÆq\pµ©Â­*¹–2§(©A§*í'rÉ&“[rÛK]3íµö1åºn4Ò³Úü½tüþãÚ?áCü$ÿ¡³ö”àÆgþØ?\œüu'ÛœgÒ¿`ÿTr¿úáýÀŽ÷õ_¦šÈÏÙÔþi_N·ï×yôõµÏ/ø±à ü8Ò<â¯øßö‚³ñ ?´/ì­¢#k_µ_í7ã-ïFñí3ð“Áþ(Ò5_ øÏâæ½áoMÖ¼/®ëZ=õ޳£ßÛKk}.b^/7:Ἷ ”æˆ`pôçG Vq”iEJ.ÖN.îÍ}–ìÓé¢J‘j\Òø¢Ûïm}æõ~Z®ûŸ¥c×óÿ<çð?Ÿ5ù"}´ÿƒ÷§÷ÛÖÌoOëþß5÷þƒ°üÁêy#Qšêƒ××§Ÿ_š×µºö/×S×ôöÏ©É;²pCtAÙù^Ÿð7wåHü:äc‡lŽ2FN~‡Õýkÿ m_ôÌÞ›ô³ùùìú-®º-cqþþßç¦søκ`ô·ôý;[ñ½ÃóÓMµ[}úôVååV½Ï›lÿù3ŸÚÇþͧã¿oú¥¾*ëŽ2}ðG^A+]¸Wûê?õö´þúë¶½­~ºÜ‰­£×¿ž¿=ºYZòú޾¸æ ( € ( € ( € ( € ( € ( € ( ÿÕþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þ‘!­×ª=xõ‡¨ü²?L`ÈWñ\žŸÉ·åÛÓSéɯýoÄwçÓ‘Ôu5Ó•ý4ÿ;ôÿ‡[ >møì»¾'~ƃÓö‘ñYè21û~Õç>™d`®x=õo en*oþ¥˜­Ÿý>Âþ{j¥mìyY¾_ã‡þÝé·fý-vÎÆVßëx<?äs€¤òGËä…À"¿³2zŸÝ¿­W—ݶš”f´þ?½¿ËNo×ÎîÊ1ø×Z‡Ëý¤ÿc–Æøñã¥Èëû$~ÓØ c€pF0O¢ø9O›Ã‡8Ë^`b?è¯þ —ÿ+=OªOf¥}¶ü…¼¶ûïîùoÅŸˆ:~³§ü1ÓbÔ-f’ëöýŽ‘"Žê'g1þÖ?å;U]²@Œÿ `¿Å_)Ç=|7 çµåF¤cO&å(4•êRZ·»v߯[˜â0Ò…JÍYÁݦþÚÛÝŠ}ïÝÝv—ìóSØsÏr}±“_Êw_×ÍüïÕ¿+]£Íš³þ½WÛK%ç{"Aþz~8ÎqëœvïÅuEè´Ûú¶ï§¦÷]Q¿RNz\Ž;u\r¦/góÕVümºNɧü}úwû÷FyÀ Ó­í÷ÛõvZõOç³!è÷ÞÝôü¯é¯½-•î9{õëÿ×' ÆsГíÐ×L^¿×åÝì­®¾büŸ¯ž—M§d¿ºŸÚNÞïÍ¿¶ü™ßícÿfÑñßÿUoŠ¿úÝ1Ó=°ÝØWûú?õöŸþ–¾–×{ZS/…ú?Mº^Þk^g~ÍÞ_RWØ¡@P@P@P@P@P@P@ÿÖþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨õÑúóõôíÆFzœüÛ_Å0{¯ëþþcêž×Ëúï¹'ùõü;~'×WD=~[Ãß^Û[¢rG|LøKàŸ‹¶>±ñœ&ðˆø—þÿ ê^ñÿþx‹Cñü#¾"ðŒš†âŸ†¾&ð—‰-Ídž¼[â=îÐjËeyc«\Çwm01ìöò¬Û0Éñ[ËqRÂâ])Rua s—³œ£)FÕ£R:ÊnIGnU¥ù³«J¡ÉR*Q½ìî•ÖÛ]ÿZ[sÉ®?c߃×YûN·ûEϑɗöÇý¯¤ê:üräqÏà€BýuøÚ’NŸã!oä†?5j^×ÓÊû#ÍžS–Ôþ& ?)Bíîžúë¯Hö¶¨ùûþÃá—ìõûN|/ø¥usñWÄ>ñF ¾ð•çÄÚãçÄ­#àßÇ=Sþ#ðN‡ªZèß>%x“@?Åÿø«_ðÝjºuáйÇн0Þß×_ë¥þV’_ðÏÔtç¯zôùºaýžý6ÑuÖú0?žØ÷öCøªþÏu‰¯¾jº€ÐÿkÚÃÃv&ûRðV‹yzlôŸ|kÓ42Õ®'”çiv6vq²ÛZÛZÛ¢B¾þà©æ8ïg—e”±Ø·9C)Ë#9Mâj¹Î¥HáêTœ›•J’|ó›r©4Û”æ¡ n/— ‚OëxŽiGƒæoÛTr”åìT¥97yJr”¤Ûmɹ8þ„øOö-ø(‹v½ûNÁî¿n_ÛnÆpsûBF|½»…cß¡erÃË–ø\"½¾ &Ÿ4—éug+Øú<©K—÷4ÚÆ…÷rÓû´vµ•þ×iðà÷ÃÏðOoÙƒáGĽëÄžÕÿfÏÙ–mCJ>!ñ>5Ï„<ð÷Å^¼_è:Ηâ‹{ý/Ä>ÑõX¯íõ¨¯f¹²Su<é-ÂOò/0ÆeyålÇ.ÅWÁcp™Ž&®†¨éb0õcZªU(Ô…¥N¤.ù'YGIEÅÙŸuJN1§(JP”T%BN2‹I4ã(´ÓNÖqzn¯s7þ—ûÑ8ñ¸ÿ»†ý¤qŸü;‡êkëWŠ>#¿ù®¸©z癇_ûgø¯Ê=?]ÆÿÐn3¿ûÞ"ÿú{¾·¶º«&}þ ÉûøgÄ~ñ^•ðÛÄÿÛ~ñG†¼gáéu_¿|Aecâ_kš‰|7©Ë¢ëßu-R}']ÒôýN mSN½³{›(Œö² 1´c<@ã|Ó _/̸·ˆ1øT=ž# ŒÍq¸œ5xs)rU¡V­JsJQR\Ñ÷gÉrµž+R<•1Xªv¼'‰­:r³V¼eU§ªM'~–O–çÜJyþ}óN½2Nìœb¾f_ëüüº/=mîóJÖþ¿á÷ov¶²þôƒ¯óÿ'Œãÿ­Òºa×üßå·ë÷™ÿ$ýqõ9$1c]0z[úÿÑõ½îûDÃ×·øqž¸$߀Ûk¦.ëúþ´ûÞýI}ö×Óï%òèíx¹Oÿ_ùýxú¶wdMuGÕù¿ê÷ûõß[2~]´ôz|Þ½­Ê’¿1ógí¡ÇìwûXÿÙ´üvÿÕ[â¡ü«» ÿÚ(=¿}OÏí¥çývµ‰—Ã/Gå=}çߪ{+ZçÔÕöG(P@P@P@P@P@P@P@ÿ×þþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨õÏ_þ°>øP1´gœa¿‰ ö~_üý/¾÷Ôú‡ß¾ýø.ÎéÞ×$ºÿ/©Ç×ë×Ó•éƒÕi¿õÞß‹]z^(r?Çéÿ}G<Œ…éƒÝ_¦ßð×nÀ;üúóœç/\ôá#$·LëÏúü¼þV÷²jÏ­µòÑ®é¾Ûòë¼®—½ÈxÿÀ¾ø›à¿xŶ’^x{ÅZMÖ“©GÏi{ΠÛêmô`Ϧë:UÚ[êz6«jRïJÕm,õ+)æÖNì=iÐ:Ôß,éÉJ7WNÛ¦´RŒ•ã8½%Óºm ««;ÙÙ$µûµóþóO{^òó/€~:ñ±¦ø‡á§Ä‹¨î~/|¾³ðÏ/V³Æz5å¼—ø­§Ù¦"‹Mø‰¡[Ûè-CYh¾5Ó¼gá;y&ÿ„nY[ÐÄÓ‚tëÐMañ ΜowNJʶRvnTdùS’¼éJGohL[z=ÖÕ´—Uk%~—M6­hý:z~ÌüÞǧNyjÌ^Ú|¼¼µê´Öþˆá×$öäž}@šé‹Õ?Éoúõòû줇_°Ï‰óölø ˜<Ë/ƒß ­™C1*ø+D…20Äd¡ÚÑÔF˜fæËs5_4ÍáÍïPÎ3:Mk¢†6¼#{í~WkÛû±å\òüž/Ÿ˜Ç™^žcޝ·.&¬bìï}´¼b÷iKâ?M<©àÃÏLuÁÎÑŒÿär~cŸºIûú–S‰¿-¥k|¼·æjÚkø·~Y}fµìï¾úþz+tïÖïT;ö-9ýŽdÓÓ?³?À¨ÏÂß zæ=ñÚ¾{ï˜côÿ˜ÜSïÿ/§úyGm6géÿ‡OüÿÒQôÅD×Ïúþµù^Åÿ_×§ü0¿þ¯Ç׸üxäWL^Ú]·}=;­ì/¿§M—àߦ¶o[!GP}þ¼ý>\ž{ŽHÆ~n˜½¾ZòßðºßÉù«Ù õôitÑùëmºÇÊëRoÓùæ?˜ú×T^¿çß×[k­Ó·©õýmùzØw¿süþ‡®sÓ®yƦ[_š½úhíÓvÿžß‡¨P1´gœaº`ú[óÿ7óÑi«½íý_æ/øúéÉiç=:ŽŠFrÝ0þ¾_~÷êý»3þ·zy&®´·’{˱óoí¡ÿ&wûXÙ´üvÿÕ]⟯óüë»ÿCʵ/ý-oÿ ·}I—Ã-õO®Ÿ¥Ö¶×›Wmn_SWÚœ¡@P@P@P@P@P@P@ÿÐþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨õÏÃ×·øqž¸$߀ÛkøŽ.ëþ ço¿ËV™õRÛæ»ë¦úê¾ût÷¹n9O_Ï×ê=xÏ«g9“]Q{?ëð¿õÑlA*õ#ý^˜ž2{wŠè‹µž­}×·}úùy«Ýÿ_§P3ŒóÛ>­×985Õgëý}ý¾î¤K{úy?×WÓÓ—KÜ:ÿ€í‘ŒqÇõd’ ZéƒÖÝÿ¯‘žºõ»Ù¯üšý,ÒgÌßôM[Áú—‡¿hÿiך—ˆþØÞiß|;¥À×ÿ>jÅ}ã ÖÊæj~'ðTÖñ|Bøy·w:®•¬øBÀÛÅã½Ná½| ãQOVIB¼“£)ZÔq)rÒ›ojuoìkl”\jKø0R™ii[XémnÒòÑߪíËeó#èÍ [Ò|K¢é>!Ð5M_B×´ËgEÕ´é’âÇTÒu[XotíFÊæ2Ñ\Z^ÚO Í´èÅd…ÒEbóG, å ®YÓ“„ Õœefš}nši­Ö¡§Gußú·õµ÷5ý^3õíÏ·\’xùº"ôOwýo¶¿ð÷w¸ÏWì{û!üÕ?g¯€ÚÄ·ßlïu¯ƒ 5]Gû ö¸ý¬|5bך—ƒ4KËß²i>ø×¦i:e¡ºšS¦XÙØY¡[k[kX"ŠúL4°PÌ1ÞË.Êi¹c±ns§”åIi|QM;¥«¶÷ë¢Õ}×|ߣÿSõü?™éëÉÜ}ñØ;¯ëúóë{ùZ;ü¾Zÿäµí¥ÒÞò‘zž1ƒÀÿ9ã¢õGT··Êÿ×kß¿[G9oøé³¿]ßõ¦¶¼¤:}Ï|qÇO@¥>ÚÁûÓûíëfH¿‡¯nŸ—Žpp;Û]1z­7íÓþ ×õÒà/ÿ_'¯øä úà“¿'WL]Ÿ—õéÿw{$CÓ¥žš}ýtߦ›.T•î|ÙûgÿÉþÖ?öm_¸ãøµ¾)ãþ¿ô^ü#ÿi¡ÿ_©wÓ÷‘ùnüþ_j%ðË_²ý^ž~÷EÝt[^?S×Ûœ¡@P@P@P@P@P@P@ÿÑþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨õÏóÛðõê6ŒóŒ7ñ õþoðÕy+j}_OU¶¯^»oÚÍÊÒvëÍ'/_¯|cÓœds÷°zÍÓ¥¿ÏÏå÷/[™ç¦Fï¡éíÇQ÷—¦ïZ÷ׯõ¦½ïp%ÿLþ8!zç<ŽŠFIn˜ì¶ý?>›kkÛki)–Û>ÛÛ~ÿwU%ÝáÓ=†}9ê=ðŒsÈ^¨½WžŽÏ¾¿õµ­r>m^Û]úÛú“¿fù¤£¯O_ñÏn§ëŽÝY«¦Un¿/A}ïÊöѯû{k-mæïd£ò§Ã ü ø¥ðï÷¼xuÿþÏÓ¶ÛF&m[âgÁ„õp/†¯/Çl]ÞÕuÿéVñi dgö*?¬ÑX¤Û­K’Ž-2øhbwÞi{*Ï_ÞF3“sÄ(™ìíÑëÖ>vÝyiÑŸYÿŸlç®8êzuõdšÆëåéo/- ?a¯%ßì×ð5r^ÏàÿÃR>loà 2?¡.qòò‘¨Ävgùžo&Ý ã2¤ôòïZ6ø´Õ;j®µQJÒ?§ŠçÇæ*ÿc§&"ªï­¿Ãö’wN?¦~Õ1äÙÀ^ý@õÀç‚pÜãyù[õ<§~Mv·•×]}åë·ý½v¥õ˜Û6÷ß_ÏM>ç»Üø÷U»ûüçöK¼Îß³èðL©óž›>4þÌÜvÀþï©Î0»ð”=·ˆ9u;]ÔÏ10·ø§ˆ^}E¯D¶?H¤¯*׿±Võqôüþëÿ ¿Ú¯×œòw9ämÇC»×õßö?÷»ÙÙ_þß•ö¡õoîëÛÍyz>ÿv‡“ü`ñ—öž•ðÖÇíþÑûNþÇK·ûÛ?k?‚Òdüü`§B¾û«åxß-ö'žÖä·&O¯YÂ6òµÝ÷_u£•z<´å+m(k¿Û†º[º¶ž¶MŸ­¿çÔô8Éä}:óœWòtNÿוïë§gvrÿÃuNÏÍz>šíxëÌõî:zsÏõ¯\.zàó]0}?_Ó_ÓÑÙ³9®¿§ô×§½gtÞÈ”~½¿ý]ÿÎ;WL–þ½{õîü­±¿—៧p8ÁÈÀÏ8ºc¶ÚZ=oùif­{~½úuúƒë»ßîãøºbö>ÿžÚúÛt’%ô×Éïëum­nö}n|ÙûgÉþÖ#þ­§ã·þªïuǘ㑌g½ 'ûÎãÒÿÒãúÿÀèD¾yÅ÷¶Úö~Zó{Ï¥Ó>¨¯¹9€ ( € ( € ( € ( € ( € ( € (ÿÒþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨õÏÃòñÛŒ‚y<àÿ.H¯áè;YþOîÝ«ôÝúÜúÏê÷z߫٭–——oz׊ÿÉëúz úœçq'k¦.Ï×§½¯ø;]½ÿ¯øô·Ý ñÏlgÓõçÔtïíè½P},ÿóOóôÐDƒ‘øž¿_~8îs»qèUza×ú·ùß¾»9ºÿ€çc|¾Ã<¯R[%W¦/O?ë_¿wÜËkþwµüúµòææéËna}ÿà}Hã®×%³“´šé‹ÑyyöÿÀZü?QÀêîÚòÒÚù­­žSñ£á™ø§à‹OÔLJøÁ`ûDÞ ø‡á¹ïà ‚u`.]+ÄšJË~"𖩯xnîO°ê÷hÞŽ¿°ª¦ãÏNiÓ¯M»*”gnx>œÜ¶”$Óå©ÔKš1å™+­õѧn«[»¶ÖêöÑëÙ9Mð[âhø­àkmvûK>ñv‘¨j>øàù%ø3â†å^*ðëÌUîÆ+½º‡uo)"ñ…µ- Ä–qýƒWµwé­IP«Ê¥ÏNIT£STªQž°ž—³iÚqHÊIÆBNëÏf»5ºÿ/-z£ñö=ýþê³ÏÀ]bkï6wºÏÁ¿†Z¦¢4/Úãö±ðå‹^j> Ðîï ž•áïº^•¦Ú5Ôò˜tý2ÆÎÂÍ6ÛÚÛ[Avõôxg‚§˜c½–_”ÁË‹s<§,ŒêIâj9J¤Öž¥INò©RssœÛ”ææÛ?R£ n/— ƒ_íxži,ÊOÛÔmÔ—²RœÜ½éNrr”œ›r“|ß¡^ý‹~J#¯~ÓËž3íÏûnAÎ1Ç•ûA ÝŽ18@2kô,®Xyrß „ß^\6Úß(Zý—6ŽÛj}&Ò¯J‚×¥ 1Õ†–¿wMµ±ÖþÏŸ¼ñþ ÷û/ü+ø£^x‡Á§ìßû2Ü^é©âOhº”—^ðÃßønú/øYÒ¼Sk¨é^"Ð4}ZB×ZŠö[«k›‰’[„—áåÆe™î'—âk`ñ¸LËW ‰ÃÍÒ­‡©íê¥RŒág ¤ß,âã(=couŸoIµr‹qj0qqmJ-$ÓM;¦šÑ§óÑ1áÜ_²?S࿈ÿø’´Ñ?ú¸?¨Ïá–úxx—â¯þºq+×®oŒü½¬¿­¯ªOÖq_ô‰ÿŠߥCKDÿ‚{~Ê×ü5âkøÎm[Â>(ðÏŒôÖ~;þÐ$Ó¬üKàíwOñ?†u9ô/üQÕ4-M´IÓuK{]WM½²k›( ÖÒ¢˜Úq|yÆyž¶1âœ÷‚ÅCÙbp˜¬Ë^…jw¿-JU%*rI¥(Ýi(ÆJÒIÆ'^¼ÓŒëâ'¼'^´ ìÓWR›‹³I«¯=l}©ÿêöãé×·ãÍ|ì^½¯¦»k÷§÷Û½ìeÿn¿ç½þWKv9x?_ÌvÇzò@=øÉZéƒ×úþ¯kö·ÊÒ™lÿ5³ó¿žš]öW²‘/ùÿ>µÓ­¿®žk·Eæïdd?üž9{r3ןÇ+œWL:ÿ›ü¶ý~ð¯âzþžÙÎIÝ“‚µÓ¥¿¯ø[Þï´SÛÓ_ëúØù¯öÏÿ“<ý¬8ÿ›jøïǧüZïúdqïƒÐô8¯Cþñ‡ÿ¯ôôä}Íï¥ìg%îËü;÷ÓMìõù®›ÆçÕ5÷g P@P@P@P@P@P@P@ÿÓþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨õÏ×òü¿ˆ®1žq…þƒÒß׿ÿMö³GÖ~õ￞½ìÝ›iÜ?^sõ÷ úîÇ|u\LZit_•¾ïë¾äImåÓ{^úoåÑkÕÝXxïù_Çï~˜Ç<ƒ…êƒÙúwûº·Û}ûØ’EöùíŸqßÓŽA9x×L«Mÿ®öüZëÒñÿOaúõ·?(Ǩ^˜=mý~iyìöÓs9oÚöÛ¦¿+÷·½fÞ×NGôã=yÎsŒ/\ð3Æ~R2ź`÷_?Óôóó¶œËï×¥í£Vµï-­½½S²Qwçýsëßùc¿"º ô^_ÕÅúöýû6¾Ó½Óµï/•>$ÿÅŒø§añâÐù>!Iáÿü·^-tKõš-áÆy8WB»»‡áÿÄ+ó÷¼ªøgÄÌ:?Ã2µëáŸÖ¨<3Öµ.z¸^òV毆¾·sŠö´•¬ê©Â7•tfý×ÍÑÛ›o”»i¢z^Ö½ùOÏ?ØoÄ)wû5|‰_ç²ø;ð¾ÑùRq‚tAÆÒäm.,1Ò*+—.Ì•|Ï6‚•Ý ã1¥-ŸÃŒ­mšµ­gÕv„yQø-?0×îól-uöqmoN·JK}/côÇÁúŸú‘ŸîƒéëÆŽHÝÎݸŒWê9N&üšë§–ý­k¶Öÿ×7Õ`k_—]÷×¾½´è­g{½Êÿ±qÏìuû'_Ù£àGëð»ÂßOåÇ|f¾O0ð§˜=?ßqoÿ+Ôëo>œºzXýŸðáþé'Óªr?ý}ù?Ïßú.Pvvïý|ÿ­ÛŽÿ?çüñø×TOÇúo§’ò¾¡ý[þ^·üû~G©Éü9íŠéƒºùÿ_Ö¿+Ø_ðWÉõÓm´Ó]6Ô_ó×Î> u뎜óü]1z->_¦ï¥».«{ ¿Ÿ–Ö¿Íú;ÙýÑ”röú~?ÏÓ³ôé×ÓK_ËÕ^É5fן¯õý\xç·ùǾqÇ|tð7tÅê´ûíùëm{?Kˆ_~¼çÿÖ®Gàò¸çwT7þ­÷i+mç{ó_íŸÿ&yûX{~Í_½3ÿ$»ÅXõïÆß”wÁ߃í8}?åõ/Ÿï#æÿ$­«½Œ¥ðËü/¿Ï³×k>kIôºgÕ5÷Ç P@P@P@P@P@P@P@ÿÔþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨õ±ÓùàvúŒõäŒQšþƒÖßÖ¿Òÿ'¼~³õÙÝëçÑ®š^]½ë^+õãÔõý=³êrNìœÝ0}>ïø>¾½:h&®´òü/ÿÚ+YÆÊèr÷:û.?|ÀWT×üú·o-~ë’×þ=õÚÿºý°ÿkÕÆ~9¯n3“ž‡;Fï^Žq§r¥yÇ‚_–[O¿}.Ôz#”å°øpt#émé·ßå¡ô_‚¼áχ~ðŸÃÿi¿Øþð7†tø[Iû]þ¡ý—áÏ éVš.‰§}¿T¹½Ô¯~ŦYZÚý¯Q½¼¿¹ò¼ë»©îI Uj“­Q¹T«9T©+E9Nrr”¬’Š»m«$µÑ-ÒãËî¥d’JÛ[¦÷üüµ±Õ¡ãßóéØp:{œç‚Mo¯ké®Úýéýöï{ùÿ?çùWL¿Óþ¿Oü–@¿çÛ?†sÛùñÖº`õþ¿ÏË¢óÖÞêóü-­¿ßñJí´/¯òî1íÐñžzñŸ—;k¦»tþ´vžwoÏ£ùô½–žv\Ör§Ž{wëÓ·\÷Î}ˆ^˜=-ýz~½o+G9-oøÃëåÒÖóNRÿŸç~˜ê1†é‹ºü?/ëñêHï¯ây=Ýzœ“¸±Åu'ÛÓú½ïë=@ù«öÏÿ“<ý¬?ìÚ¾;vÿª]âŸN8Î2p{ðÚïÂ7õœ?jW]½õùkú™Ô^ì½mí÷ô]ïꯪ«ôŒ( € ( € ( € ( € ( € ( € ( €?ÿÕþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏV8÷zÿõæ¯þÆ·^¨Ãø‹ûGü'øGñ þ%x’ËÁ3xÇáÅO‹v¾2ñN§ øsáö‹áŸ„¾:ø ðç_¶ñŠuÝb 'YÕüQûFü<²ð½©¶š×T?ÛÉ}g}—gªÿa2ü^+W†§*ÊŽ' „•P©S:¸ºìD:PŒœá yv"U]ïBÊQs”~¦SŒZMòÞ.W Q”S»¾íÍ.W{=.îù» ücøEã_‡×_¼ñOáÏ‹>ØÛx†úûâ_†ümáoÀV~šúÛÅ—wž0Óu+¯ZÛøfãLÔíüE,ÚŠG¡Ï§_Ũ½«ÙÜ"hð˜º8•…­…ÄRÄÉÓŠÃÔ¡VÛª¢é%FPU¨¥NÑ—:”ZºhjQk™I8$îÓ¼R»¿¼´qÕò¯ï[cðŸíwû(ø÷Å:Oü ûN~Ïž3ñ¦½©éZ„|)ñ›áLj|O¬jš.‘.¿«iº^ƒ¤xŠûT¿Ô4½µÛëKKI.-tH%Õf‰l"’á;啿t)ε|»FŒ#N­\"8Frä„§RtÔTeSÜ‹—*s|±»m)Á»)ŶôJIô½–­»-};üD2~Ù?²>ŸÇòþÕ³Œ^µñ¾¸ñ¤Ÿþ§… ñlZ4ž#—ÂòøüNºJx|=ºñÑìjgEVÕE¯Ø¸®¨eY§´T?³±þÙÃÚª?TÄ{GMÏÙûNOdåȧîs¥ÉÎù]¤Ò¿>|Ô|qñK¶×~ø:Ãâ—¯hÝ.ew{.h«¦ºZûZ÷÷¯»¾ñ­ZÖ•ß$eÎ:ÞIYòûÎ*îËmu²²Ù>û.h¾'~Ûÿ²Ïˆ|\.øáË¿kšè³ÃÚÚï 7wqÚé×½Xl·]ÓäÃVŒ+S¯RŒçJ¬a_Øaêâ\(Ë’J¥IÓ£5N0¿<´÷låsŠ¿¼šM&®¯%¾É7­ÎÛ[ý ¼-qðsþ‡Á"ÿö¢ðl¿m›OoÙ÷ÆuåÖ,4{ýCMñ§£ø“Ç_þü>¿ƒÃZ†“©Xkv±x×ûZ BÆëM¶Ó®uk‹TÒ–j»Ãb$°u¯õªuàâä¢áBµcÍ'ìœlÓm&œ›’åæ¾¿ºãÓÍ´Ÿß÷èxwÃ?ø(_À?xÃ/ñUÌ¿üOãÿ‡¶ÿ>üø½ñà¿ÇO‰¿ 5/?Ä=Ç~ øyðóã/ÄK‹'Äz—ˆbÒtí^ûGñ2ßøGÅ#SÐtë /íóú52ÌM*“§ b#J«¡[B–+êÔkF§²•*•jФ”£'iEJ N¥&ÚŽ.iÙíuuËÍk^ú6ì×GfµÓS²ðGíÏû4üKÑ>kß þ$økâD ñ†¥àøÓÀ+ø‘¤é_-4ßÿÂwƒàÝrÞöÒi.ïáÕ`ÐÚ\Û­Õݾ¯‹¤êƵ)Òta:—« Ua °¤åKž Ú')ÆÛ&×-¹D¤¬ïèÓ¶—ÖÞž_ƒR÷†Ÿ~üiÒõ=oàïÅ_‡ômTmYÕ¾øßÃ>9Ótimm¯Ž“©ßxcSÕmlu!cyg|,n¤ŽäÙ]Ú݈šÞæg*U¨8ªÔjÑ“\Ê5iÊ›”nÕÒ›WM]+i¦Öˆš{4ý,ÿ/_êÌô¯óþO§ãZEíò×–ÿ…ÖþOÍ^È?¯¿¯àÿ@ÿëþ'¨ã× ;‘]Qß§}Ï[z§Ó­»ï­ºmù7é­ºÙ\zž~½ýÿ1’zN?7LNÿוïë§gvL—áÓËñµ­}•ÖœÚXuÿ#õ9þ\uç5ÓÓõý5ý=›3ú’søãÐúäqÛªãÝ0z[úõï×»ò¶À|ÕûgÿÉž~Ööm_» ÿÉ.ñN=qÿÇÊ3“€+ÐÁÿ¼aÿëý_âGG«Ûåu÷Sá—£ïÛ¯å×V»Þ?UWègP@P@P@P@P@P@P@ÿÖþþ( € ( € ( € ( € ( € ( ™¿mOù3ÚÓþÍ›ãÇþªÏVUÓt+$›n•D’Wm¸4’KVÛÙ/ÔkuêÎÏÚ‹þëö–ø‹ðÇÇw_·GìÓák_‡žñw.|??Ä…Þ'ƒÄPx¯ö•ý‹?hY¯&»“ân”VÑþÈ-à¹4é4ýE/Sâ)מæð‘ÐüMü‰”ḇ-Ãbh.ÎjøYâÍQ<=ñáV—­Yh ¿g ïÙÙü7¤kq|Io±ÝêV:•}{ì,žšS¥G£êˆÏ¨¿fýa¡ìxw6œ¨PÄÒ‡´Áãg"~Ð_5€!øw㋈ÿ³ßƒ-~üeøGãmOÇžø“¬Ûx—ã,OðGß|I­ëºçÂß_jÞ"±øE¨kÿ <5aã«­RY5ê_Ú¸¬fX¼—3¡‚£d+Ò–0¨ëáq4£Ft á„oëu(S„#ˆš„&ñ%AF(†éÆ2Q©MÉò´Ôᤢ^)¶í{¨§ÍÌœ¾½øáïø'Ä_†²oÁ kö˜ý’u²×޼âˆþüEñÿÁŸ‰z'ÄÀÿ>%| Ñ<;â{/x½4§¿^<Óüo?‰5-/_–]kÃJ[L7ššêÚ_&CF¶aŠŽYšÓÅcéT¦ëPÂc0ò£*¸š™ÎŸ%'$š¥*\‘”ŒÝ¦Ôy n‹P=>X4ì䤤”\u¼®ž·ºæ»VI|Rø¿ãì“ûø³Â_¾üÿ‚‰þÇÿ³÷ŸŒÿðÓ6wÿ üá/ øoÂ:Oíð¿àë˜|9ið{ö›ø/ Þ“à¶µ®\xkÅxƒá7Ž®þ"I/Œ¾ê:ƒtKRöp¸œæÂÖÅd9®3†ú‹UêÒÄNue‚¯‹¬œÞ'/Ä5í>³©ÓpÄRTu]F¬¡ÜiÙ¨Ö§ù´M}¥¥¥ånÏGÖ;3Ò¯¾þÍÚ÷ÅmOÇš×ü«öK°ð¦§ñWÁßoþxKÄZ>Ÿá­sľý°gßÚãV›Uðω¿jŸ|5ðþ©«êßuÜx«áoï‡$ñT¾5ƒÇŸï¾+xŸÂ¤ø…ÂY”(F”r ÓP©‡UjP«)œð¬½(Ê}*ÓŠŽ&3TëV­*~Ë4*û‡¹{ûXZ÷²i+©Æw·;W÷ZºJü×|ÖGÝ? ¾,~ÇŸ< ñcÁ+ûjþÍZªüOøŸñÛâ@ÔâÇÂí=t&øÛãOøÆ]XÞ Løz_Kjú—Úì¹,2^µŽš×-o\kZ­ ŸÙ9„=• 5}Wù¾¯Jù›ö .~[òÚ\»sKxÚ4Ÿï!«o⎗ÿ/—ë/ÍÏ~Íÿ5Í'á'…m?àªß²õ¿„~øö`ðu¶{â 8µû~Ï~ƒáî±x4? ~×>øquŽté5sJ¼ñ×Ãÿˆ6𹪋=;ǯ…tû/EîÒ«ƒ­'ãùëUÆTröSÓëUH®ià*UýÛ匕:´©UŒo*Jræ1’’ö°|ª)j¾Ê³¿¿mV»6Ÿ{Fðöcñ—-~1ÁL?dß?Ãß…ßþiü%ºøsð}o~|'øgûAü<¶[ø¯?hˆ†ˆ:³ü~Ôu›Oi³Yø[ÂIáäÓ<7ðþÔøƒWÔkE<Á:¾Ã&Ì`ªÖ¯ˆ—·§Z½«V«…ªìþ©K÷kêÊ.-9ÏJU"D®U½H½ÒËDšîõ×åÑ+¶}9ûKû$þÊ0xÊ÷Ä?·ÇìßñGÅ~.ðÏÂßI¯¿Ç­bE·ðw /Aá;Ÿ¿j¯Ú#Zµ¸ûoüO-‡…üEá_XG{–‰àÍ=à¼Ô5(ÅÒÌq|Š9^6”!:Õ9~¨×¿]Áͯa‚ÃFJÔâ¯8Ê¥îÜåÌ8¸Æþü[vW¿E¶ò—~–ý#÷Gü6ì{ÿG_û5û®ß ÿ/ùÏùëŒf°Ž_˜[\7熮¿÷Ï绽¢ÜàµæßøÿÊû ÿ ŸûÑ×þÍ_ø}~}?ègÁ<çü8 Ñ:Êøb[»V¿þ›û·ïÕ¨¯iOùá~üÑZúÞëd­®ŽÚ¤ÜWþ?ö<ÿ£°ýš½Ïü/_…ÇÛ§ü%þ¾Iì½1Àãº`ñøM[çölúu²óûKÚS·Çšs/]ÓVoÏk(ër_ømØïþŽÃöiÿÃíð»ÿšœ?b1ótÇ¿û¦)ܽ_þF×Zõ·v‘—<ž?|à~_pïømØïþŽÇöh÷?ð½¾óSÛñçæ$ŠÞ8Lb׊ºä¯}·ó³³æ9ãüñûâ|ýûZþÖß²§‰?eOÚkþý¦g½ÄÿìùñŸEÐô-ã?Ã}WXÖuSálj,tÍ'IÓ4ÿÜ^j:ž£{q …œ]Þ]Kñ<’¤mß„Ãbcˆ å‡¯ªÔ›n[$ªE¶ýØÅ$µm½:ÛíLå.hü/f»y]:Ÿ¥µ÷ÇP@P@P@P@P@P@P@ÿ×þþ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( ÿÙrocksdb-6.11.4/docs/static/images/pcache-fileindex.jpg000066400000000000000000001532121370372246700226270ustar00rootroot00000000000000ÿØÿàJFIFHHÿá˜ExifMM*V^(‡ifHH  Ô ÿí8Photoshop 3.08BIM8BIM%ÔŒÙ²é€ ˜ìøB~ÿÀÔÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÛCÿÛCÿÝ;ÿÚ ?þþ( € ( € ( € ( € ( €=þÓŸdÿj7šŸÂÏŒ¿|(¶r|Iø• jzÄvÞ"ºÔ4ß[jº™²›W‚×A»î>*~Ôsü>ø¿§|ðìûñÇã¿.þŠÚ„ ¯~húg‡ü#'‰î=TçÛÏÛ¯áõŸ‰oÐü2øËqðsHøÎ?g­köž·Ò<~i?Sâ?o¼+u¿¡øÍ6›¢üd¸_„¾ ñþŸð~÷ážãˆ5;]cÆvzVƒ¯êÚXà¯Û¯á÷|Cà‡á—Æ]á7Æßü>ø5ûGø‡Hð ?>*x²Þ=v]ÏÃÉ¥|HÖ>+èG×Ãòü:ñ—Ä?…ž ð?ä·Ñ¡ð§‰5Y|màñXáoø)—ìûãŸü>ø•à½â—Š<ã~Ë~+Öµ­#Þ–Çá^£ûbøŸá‡>|;ø—?‹àÓlþ(êÖß<3ãø#Â7¾0Ôüà[Y.Ðïk¾øí⟅PM¡ü@øñð—Mñ~ãï‰^×<3eã}XÔtÄÒîEÒ€z‹ÿn¿|Ò´ {öšøOñ‹ö[Ð|L>$ã럛à߈´{[ῃ¬pý«¿g~Õÿãø1ã ´AáK¯‹ß³wÄ¿éž$ð޼=â½ àíð£ãÞ«à så펛¤üG±øg?€µ ïÚöÏJµñºÅΉât÷Ðu –>6ÿÁ7´Ýé_³ç‰>~Í–Ò¿g¨üàà=–­ðÇÃzÿÀ?Û{á·í°šÇ‚<ñá7zWŽšø“û(øÄ¿|ðá~ŸáOþðÇÙ§ã6…¡x#ÀúU§†l‡ìûûT|.ý¨5 éþÑnü5§iÒ|DÕ¾ß蚎½ ¼š^§âËßÝé¾#¼¶¸ÒµP‡õÏø$ž•{ìó6ñÒÿÂú¯Â|]ðgÄ—³øwos¡|s_|ý¦¾ü2ñ·¡¿Œá›ÂÞ(ø+íIñSѵk cVhúƒо7þÎ_|y'ÅŸ…>$ý¤|o«üX»ø—'ì½ðãÁ:…µ§Äïèº'…®Hñ§>>ëŸ|að³á_Œ>ø'Ãþ ø?ð‡âåçÄ„>4ø¬ë:ÏÄï|uðå͵µÇ‡~;|#±Ò4Í"Çá.,0˧ê÷WwZ½ë½í¼0A|ñ<øz¦ ÁDZq¯&åZT½Ÿ±t––§W››Úö¹zó{½40겓sqåk¥ïò·n½K?Øÿ¶Gý/Ù›ÿ?â§ÿFô8ïž+æWˆõ]¿á*ž¿õ/ÂøuòÓ_;Qüýø¿Øÿ¶Gý/Ù›ÿCâŸ8ÿ»Òõ㞟j¼CªÚ_Ùtõÿ¨©ò…ýwØO’¿µoË—ëúÜ?±ÿlú._³7þ"Å?þ/ëùV±ãúþepÿ¹?ý×VüRîGÕ#üïÿÿ؟¶9ÿfoüDÿŠýƒÿAü k;¨ÿæYÿsrwÿÊçø^'Õ#üïÿÿîý‡ûdÑsý™¿ñ¾)ÿôjÿ_Ê´Ô—üË¡éõ©òëïª.“øÿoþ_¤—ûöÇÿ¢çû3cÛöNø§ü¿áµü:÷ãîÖ«Œê?ù—Ã}þ³/ËØÉþ>—û3õOïëþþÙ¯Ó×áì/Û#þ‹ŸìÍÿˆñSðÿ›ÔïÛ†ëÉ­WÍ¥þÁ{b$¿<;ßNŸ}Ãê¿ôóOðýÿòóúÖÛþÀý²:ÿÂõý™¿ñ~)þ\þÚ_Ï¥j¸®NßìQÿÁïÿ•/žšu½ˆú½¾ßáÿþp£Ãÿ¶Gý_Ù›?öi¿ÿ ÿÆi¿—L“œÖ«‰¤Ý¾§ü/þCüÿÄößüÿ„öÆÿ¢ëû3ÿâ'|RŽ?á´Oãœc¯rkXñ›ÿuŠÿ¸Òÿå¯ß¨:Þüÿ„öÇÿ¢éû3ÿâ'|SïÓþoDc=ºã¹æ´Ž})Ì4ðkÿå]uþo>Äûïä«çöÖÝmü–ã¿áý±¿èºþÌÿø‰ßÿú4«XçmÿÌ2¿ý}ÿîkú¶û‡²þ÷þJôûŸ^ÿ»´œ<5ûcŸù®¿³?Lãþ7â—à?äôO^¹íÜŠÕfíÿË„Ÿok·þR–ûÿŸÙNºüíþSóï÷ ÿ×íÏü_oÙŸú´ïŠ_ËþDôï’1ïZ,Í¿ùr¾U?ûO=Ÿ/o1{??Ãÿºü#?¶?ýoÙŸÿ;âŸÿF·§åZ¬svýÚׯ;ÿ€õôûƒÙùþýÐ_øFloú.ß³?þ"wÅ?þŒúÖ8»ÛÜû¥þq;¿¸—[ÏMºÿäß…þ ^<-ûcÿ%ßögÿÄLø§ßþïL}z uÁãvªµÝ¹øòû…ý_צì_øEloú.ÿ³?þ"oÅ?þ?ê=ñZ)ÝÚßü~mù1_Öÿ—Þiüñ¯Ä¯êü'ñOUð6¿â„_ì>Ùø‡áÿƒ5ÿèÚÖªüø3ñ^ÞæçÃ>#øƒñ6úÓS´½ø›¨hóMЦµ¼µÓ,®c²²šIѬ¡h € ( € ( € ( € ( € ÿÒþþ( € ( € ( € ( € ( €>QÒ?äò>8ÿÙ³þÉçòø§ûh{\vã½~Oâ_ñrú÷ÿÒ°§£Ú§¬öãèOð€x÷ÇcŽƒ©éÚ¿5‹ºêìúýÿð7Ö×Òöx¿§Nœÿ‘Û<}n˜½üzþkUë«ïp×ùl÷ôû…ÿ<ó“þ{WT^ÏÓéÚÿÕ•ÙƒVvóõþ¿®ÄËÈïÛôôãÿÖs¸ºbìןõçßüí¸‡tþ}?ýyü¿Ÿ×ük~=‰ÿ׎œ:cŽ1Œ`öÉÅtÁéÞßÖ¿+-{>–ßçüõÿ>k¦/Eý~½>ZôjÄM]uÿ€µ×U¿}vùNÿçàzóíŸÑºbö{]™Ÿõ¾–诧]ݼՉÏùþ]¿@[¦/U§É[O.‹Ë-5õý~}£ó?Àù*_¶ŸýœÇ…õŽ?dÊܦ( € ( € ( € ( € ( € ÿÓþþ( € ( € ( € ( € ( €>PÒN?lŸŽ>ŸðÌß²†zÑSý´=Hëßóã¯äÞ&;Vɼéã—þM…ó_‚}ô±èàv©ëÊGпçëØØõéÇ|aBþiëóÛçåå»rïú~ÀíÓŒžçl×L?­zëÓÓ²ûÀ_åþÏÿªº`î·Ûõüÿú^ÆSV~]?àkýt]G¡çÔûc·±#××ð8ºcø_ðüÝõõ½È%þ_çüÿú«¦-éòùöí«ôû®=ÿóÁü°+ª×Éÿ_/;ßïIÅ_×GåÕ‹ÿëéùgñì23]0ßúþºöéÒáùm½½m¯KyuÑØzž£¿^¹Î:Ó‰ü³•éƒéóÿ€ºyÿï’Òÿ-­®í¾ÿ-=-ïJ=;qøã¶;{uô8šéƒÓúþºyùôQÌwëÓ¿aþÎnONI9º`î¿­»o}4Ýõ½ÝÀ>žÞŸ†N;ö8úãwËÓ¢}­ø}öºòûÉ—èÿàöÙtë¾¼©\õó×ÛtïžG¿Zé‹ÙôòÓõVí¾žv'óôw_=íÖ–¯k|#×9öþ½¿\cò=kª-ßo–ßÖ¿ä÷×·kïÚû·µíe½ï­“”ž¿®08±‘€3ƒžzu郳üw_«_ŸÉÙ2ÿëvú@G@s†Ç·º õëýímÓ¥Àpÿ=¿ÇòÏù®¨?êÿu¿^Ú.¨OT×õúÿ]·½Ï^Þ¸úàúq’W¢OëÏÉvîþF/üúüßW¾«³óo˜—õ?—NÃñõ'3ÈÛÓ§Õ/üþO¯TÃð_çÕïÓ²×{?µóGÀù*_¶ŸýœÇ…õŽ?dÊéŽÈGÓÀ( € ( € ( € ( € ( €?ÿÔþþ( € ( € ( € ( € ( €>NÒ¿äò>8rGüc?ì¡ÈçþjŸí¡ôþ–+òO¿“koÝã¶×íaz]~zo­GðÕõëý~O¡ÇôÏê3ŽqÇ#žzdó~g¯äÿ¯Íywéiw‹øc¦?®8ôã·Ðdꃳõþ—ÓÕìþrôçŸb}¹Á®ˆ7µ¯óÛÓñ¾šéµˆ­½¿__ø/ï½G^½¸ÿçùfº`÷_wõóýL‰¿LóÓüóõþ†º`ôôó×ôÓn¿5öAÃÿÕ__ÃúñÒº¢ô_‡õ~ô»èÒBðïM»k}ÿ«]1’ÉÉ4Ò$0Â,²Êê‘Åk½ä‘›‘¢ÎîBª‚ÌTk¦.ü¯Ím×þ]ÿ¾íôÕ?òWÕ†ðjþ,øçûQ|Sñ·Š~#|jÑ~x¿àçÀï|ð_€þ8ühøM¡hÿu?ˆ´—„4oÜhÿ Bë™ãe—ýB…eÊx¥ˆs£B«H¬#µêÓ¨Ò¤êÊ—,d•ùŸ½Ìœ~;‰3,^ ƒŽ»¥ÔëJQŠƒrå>Gi'üÍèÕï~Y5së |ðeï—ö¿~Ó39ÿŒÊý¯~îþ8/çœÆðW ìe¸¿mËí)áç{ï†Ã-ûrÓ¼ßK_MQDŽͱ•-Ï^R¿”?ZÚùþ^ïyà¿Øü4ý¨þhñgÅëíÆ³çí¬ø‡CñÿÇŸ´{½gÁ¿ÿek jöÚgÅoøÊËJÔô›/x¦Òí"ßN»’×\½·¹’x=ìÂu‚¥:tiS›¯ÊTéÆkÙÔv|©=ìí¶´¬}>´êósÉÊÉ=zKÓÑl|§û?üð7ÆŸ|SøñCâoíq¨x¾ïö»ý»|5#øoöðý¶¾xwOðçÃßÛcö€øyà]ÃþøsûAxOÀþÑ<1ào øsÃzV•áßivPXiväÀÓ4³Ë÷9NE…Äå˜D°”'*¸jr”Ý(^R²M·Õèî÷}ugj§)Ý©JÍÉo.’jÖº];Û®§ºÃ!|ÿ¡ûöÓü?िðQàù:CÓê¸ÏÌMz_êæÌ%koÝÅ~mÖÚèôi‡°—yk¯Äÿù/ÍÆý´>{ý™?gO…þ:øqâMkÆ??m]cW±ý¡?kZ^¿üoþ dÐø[á×íYñŸá÷t±ŸûNZÛ²h^ 𿇴Dº’/ïÓO[íNæ÷R¸º½ž)äiG›ê´tHßÙ¥­:ŽZÚ(ï­ú¥§2)4õ–í|Ré&¶÷>ï_µô/ü2Àn¿ð°?mN9ãþ KÿèxþNtÏážsÒ¶þÀ ¿æ•µ³öqVü:_O=ÓR°—w®»ÿöÿ½¼¿=þÌŸ³§Âÿ|8ñ&µãŠŸ¶¶±«XþПµ·ƒ­/þ 7ÿ²0ø[á×íWñŸá÷t±ŸûNÚÛ²h^ 𿇴HKûäÓÖ÷S¹½Ô®nïgšy%E¿aOã©ò-áUôvå½÷ïk§) 5{½ÚÝô“]ã·{zÛâ—ПðÈ¿è ~Úž¸ðRoø(—ôòtåßv0ºaÓ_òæ»×’ßûwDüõÑÝY•õwÝýÿý»ïÿŸ>ß|$Ñ|1ð7þ Aâ |Tý«†©ð2ëâÿ õ-göÚý±¼Uuá94_Ø»à·Å=c—Å?5£«Ákãÿk¾ KO VÕåÔe°’'Ò’>CêØÇAB1J4Ý’_j7¾Í»§¿2òZØç©Irëú;ëk¶Û×ÓºGípè:ãù~¾Äþ ­ÏÓîþ¼¯ß«z™þ;ÿ_®Ÿ{½£óOÀù*_¶ŸýœÇ…õŽ?dÊê¿®¢>˜ª € ( € ( € ( € ( € ÿÕþþ( € ( € ( € ( € ( €>NÒÿäò>8ÿÙ³~Êú´ÿm§óü«ò?ÿ‹“×¼wþ•…=,Õ{Þ6ÿÉ¿­øŸB¯L~'úëž}ñ»ó(=¾½wùë¿W®Íõ;‡O Î:ì®1žÈ5Ó·ë¯ü¼ÝúÞþð/áþýXþ]«¢.öžÿ-µv²ÓÒÂ{_ŽÛ^ÿ•íaSùgüþŸuAÙù?—ÏüïÂñÂß×õý2e< ÿ?–ãï]0ßúþºöéÒà8{ÿúÿ—óãÚº`úOÓ_½ÛË[û£>bý¥//ž] »;¡sû]ü䟳7íV}°ß¿c3:Û9Ûº¾Úµ^| /}1¿uÞ®ík£r½›—ÞdóçU<£M[þ¿¦|û2ü@ÓôO |^Ó&¿µ†[_Û_þ &).£×Íý¾ÿiY×r—V]Ë(o™@ ŽÌ Hð^O_Âù5xÑ©8Ô»J0m>ZÕc£Q•ìãm÷]-hýv:”!;;9TZ'Ò¬×ò¾‹½ºÙéÍôWü-M/þ‚¶]¿åö^¿ë898Œuç?/Óÿ`b?è¯þ —ÿ+:~©=ùeßgó_‹çÓ}.|Ïû&üMÓ-~øª6Ô¬Ô¿í1ûiÜo"RVïöÆøït7 :Ε*À䆸ð9"tg%B«¶30†“Öž?M¯êœZ}š¶–±•,,ÜogoiUhžêµEü¯¦šÛ¿/CéøZš_ýl»Ëì>½Öprp9ëÎ~^Ïì GýÕÿÁrÿåf¿Tžü²ï³ù¯ƒEóé¾—>gý“~&é–ß |UjVj_ö˜ý´®7q)+wûc|wºFÇ›g Èelò+ÇÈñ£6¨UvÆfø$õ§ÄÁ¯áôq·–Ý ©afã{;{J«D÷Uª/å}4ÖÝùzLÂÔÒÿè+eÛþ_aõëþ³ƒ“ÈÇ^sòö`b?è¯þ —ÿ+5ú¤÷å—}ŸÍ|/ŸMô¹çZ^¯»û"ÿÁZõ8&Žxç¹øÆ«$n$FòàŸŸ¡ 2–2y8<•øß᥄âô' FGË$Ó\ÔiËTí½ï²Ó¾Ž^:Ï8´Ó´7ìávWÓºW½½ë>_ØÄ<{Žzõûö÷ÇÔÿÎÅ«§ùiùÚÞw~Z_¯Î×}wÖË¢×ÎÇÍÿä©~ÚösÿÖ8ý“+¶ ùþb>˜ª € ( € ( € ( € ( € ÿÖþþ( € ( € ( € ( € ( €>NÒÿäò>8Ù³~Êú´ÿmóÿê¯È|Oþ>MÛÙcºõçÂÿ[o¦šž–á«ë>ý4ÓÏ£¶÷±ôuýséïïÇÓ°õ ùŒ;[çÛþÚï²óqï×ü>·þž›ž:{ãŽ8ãßák¦ëþ¿×åÝí/ÿ¯89÷8çœtÏnk¦OGÞú~›üÕ½Óúþ·ü¾ñÏùÿ?ʺbô_Õ¿ªõ×Îæ2Ý÷½þÿÃÏçfÝ­õß_LjïϾtÅìý?­mo¿îV$“§9Æ9ÏÓóÿ>kªÍyþŸá÷¥vË?ü,¿üBý¤îÿ}§xæxüðp¿1Ûüð&£ˆlÁ@ø«ã+xö+è¶[ÁS|8†ð4š$=*¿º<=®áûÊÝýµE¬__ÝC–éSÚ[I2·}û-—«QÖúút[Ká?ÚàwË¿·GН|pÿb¼ðÿì±û:C¤ËàŒß~ì‹Pøµû[5ßÛá_Žü«!{XjÉ5š¬±ÚIW Þfeõ_øM•|_‰©ØÿeW€Àã*ÑN%F¦+ZtTß/‹4ö_Ùô}:tßÖagNïjUwåK¿eæ–ˆúü·’Óå„#‚ùò¥~–nÿöîÇ™| ý€eO‰~ø•ãßxž'ñOíOûjêºÝî™ñ£ã—†l®o¥ý°þ:,×0è>ø‘£h±^XôÝ2Ò)g2ÜH†iewôð¼oÆV —åœQŸeø = †šã0Øj1©N5f©P¥Zé©ÔJ’åQæœç'ï99{tñxºpTéâñTéÅË–ñ¡ÞnO–šŠ¼›nËW®¦gìÙÿëý’¼SðïÄZž¿àoê7ößjß[ÜKûA~Ñ1´z/ƒÿj?Œ^ðÝ€X>*ÅG¥xwDÒ´¸]—Ï–+5–æY®YßЗˆ|wEGŒx' R´sÌÅ')Ó„æÿÞlܧ'&ùu“mîØþ¿KLv2דíXŽû¯½v¢í{Y·y{N‹ÿ¬ý†|?g5†ð£ÆuÆ«®ë“Aí ûJl“Vñ>¹¨ø—_¾`ÿ\ùÚž»«j:•Ç!~Ñu/–«ŧKøÞ’å§ÅÜINu*¸ÓÎ1ôâêV©*Õª¸Ã¯R­YέY4¥R¬çR¤¥9JD¬v:+–8Ü\Sr~î*²4åÏ/ùyniM¹Éý§&þ&l¯üö.=~xØÿÝÃ~Ò?üöÇó÷ãÕ8å½xljÿðù™óSÿ»NÀó L~7Oú‹ÄZÏmí­Û[µ{vQËÑ¿à•?°Ç‡­&°Ñ~xÃO³¸Õuívh ý¡¿i@’j¾&×5/k÷ì$ø¼Íçjzî­¨êW;~Ñu/–‹U](ñÏSNø³ˆéÅÎ¥WyÎ>œ}¥j“­Z£Œ1I9Ö­RujÏâ©Vs©6å)9f±Øè®X㱑W”¬±UÒæœ¹¤ôšW”›”ŸY6ÝäÛ5Çüö.ãþ-·ºù¸_ÚDôëÿ5q¾‡žpO |½Pã¾6{ñá÷2ùÿÌK·N÷émQ_Ú‡ýã{½â<ÿéç›ñ‹ö}øIû9~Âß¶o„þxfïÉ~|~ñ¦» ÿмeã+íOÄ×?.¼3.©s¬øçÄ$Ö™ÿ°|-áý2+Q¨%”úd" hݧy|¼NcÍ1\̱¸¬Ã7N51XÜE\V&q¦” §Z´¥Rq„ai>XÅEZ1Hä­R¥fêU©R¬Ý¯:µ%9Ë•Z)Îm»$’Wz%emÐ8?\ž‡éÛ¯±5œ^‹Ëôí¶ßŸ}Ž÷òûüþÿW{Gæß€ÿòT¿m?û9 ûÍœ~ɹÇçù×u?‚?×Q]ÿ/÷}ÇÓ`P@P@P@P@P@ÿ×þþ( € ( € ( € ( € ( €>MÒÿäò>8Ù³~Ê'ÿ2—í¡ôýOå_ø£ü|—þ½c¿ô¼/cÒÀmW½ãoü›ç~Öÿ3è1Ô~Ÿçùõ«ó½WùÿWºÑzïòÿ‡óJÞ¿~®Rñ×éÓŒØ ð9ç'Ž™øwmdú—ÅÏÛJ¥ZÎçCðBÞhþÔC"ÛxÿÅ „°k´ éàÔT¥^I:xuÏgª•M¨ÓóNvs‹Wt¡QßOy=tïùu}:m®öØúDÑ´¯hÚO‡´+ m+DÐtË GÒì£YéÚ^—kŽakÂÅmgg6ðF(Ñ)óJOžMÊR|ͽ\›wnî×më·^nš[eÒ˲·éú”5´ÒÿoÚÈÛF¡û,~Ί8ÈÊ|]ý¬Ç<®ß¼q’ÅŽ‰Ü¡_—âlÂ8\nAEÊßXyªZ=ãý—kýö³µÞœ³vQü¿1>Ç2ɠݽ®»jªPßÉó=¯v¬¢ÙôƒõCû¢XòŒž[=²¹9\‚IÈQ‚Ë­õY>+á×{ÁûZéçóÚâÁUÙsiº×ÓnïMô¶þRÙ´»µßÀî~ïìÏûU·nñKö3ÇM½qê ~jûMÓ?äò>8Ù³þÊú´ÿmžý±ÆA9#¯Ç¼Rþ>Mÿ^±ßú^ô° _Xë¿ón»yën‰ÞÇÐ?þ¾ŸÐçò9ôçù|^žkçýoçòØô]ÿ=të}vó]l®‰× ü?Nn:ãß¹%ºbôZ½–½}m~þ~wÒ䇸ÐñïŽÜtîzv®˜½V¶_å¯ãmÿÌýÿõzvíúÝ0zÿ_צžZÜ™­?¯ëú¿Az~¿—ùÿ8®˜?+þ—íÕyþ-ZÒÄùƒà§ü]/ˆÿ¿hkŸßhO%ïÁ¯‚Û¾h—áïƒu¹Ç>0²Î6ÿÂÌø—ax#¹Œ=¶³à¯‡ÿu‹ILwŒkÔ¨ý•*xm¥eZ¶ºûI¯rþ½RiÚÊÕ*ÕZÚÑKVßM—êþoÓç¹õ?ÿ¯üõLf¦OO=M6[ü×Ùon¿-ÿ¯¿É=ÆïÚ‹àwËŸ·GН|pÿb¼ðÿì³û9ŤËàoŒ¿~ì†ÿâ×ínןlO…^=ð\z´Œö°yjË{5¢‰c´–Þ9ç–?ê­e’¯‚Ëñ5#[ìªc2ü6­(ò`9Õ˜ªjRR´UEJpSi9&£|û­åÎ¥ 5Y*XŽYWÃaëÎý;û9V§9Sºk™BÜÖ‹w²;ÿ þÆdòƒë¿´ºÿ«Ï“ûnþÚãò~?Ä‹ƒŒpkêrª¸v¡þÉ€KOƒƒ‚ô÷(S·––VºrûþØ_nü¨üX½›^ý˜ÿj(5ø‘ñÿãÇÆ›xÒÃâ§ìq-¹Ò->2|Hñí§‡¦v»—í“è0iW:Šy ¨=Äv¶iÐf¾Éeô=*pY‚—³§]û*¶¿*Ž×jÞïš¶‘úü»’Òå„c1Šß´Ròµï×áØòoÙKáwükàŠ#ñGо:G¬ÝþÙ¿ðPËIâð×íGûKøB·µÑ¿oÚODÒ­ôÏ xâç†ü)¢Ám¤éÖ6æ #D³Iä‰ïnR[Û›››·Êr,/-Àbg„¥Rup´[œ©&ÛPPÕµ}¢•ü­ïoEBR»NVr–ÜÖÝ«_›§¢³ïk@蟳'Àß YͧxWý ´{_Äüöšoí‘û_ÙÛÍ­x¯]Ô¼Qâ]VH¡øè‘µþ»âMgU×5[£ûÛÝSR¼¼¸y'žFoIðÞ »¼=’»¦¯hè–½RIkd¬´W‰ì¥Þ_s·ÝÎ×_ømÏLý//®þ Kö¹â_ ã_íIáM/TñŠMÓ?äò>8Ù³~ÊýÅ?ÛCÔŽ=§UüwÅ?÷Œ—þ½c¿ô¼.ßðÞzXô°UïxÛÿ&ÚÝ{múKèÓ¦?ÏáÛ¿,Wå°ëþ§ÏM»wrô>í<þoKµ¦Öº½úÙÈ‘_Qמ¸ãp1ë“Æ;q]0w^}zÿZ.ý:Y’IÿÖü{ÃØñÓØµÓuùüþÿÍßwfÚˆçëNýO8®¨½¿áÿàíêÚêîØžÏúÿ;zÛMõ>yý£üQ¯YxGGømàmF}/âGÆý~/†^Õ,ˆûw…¬µ ÝKÇŸ¡aü>ðâ/isN†ÎçÅVþЧu—[¶I},c)ºµ#ÍJ„=¬ÓÚm4©S¿ý=¨ádß#›JÑ|¸KK¯’òêíé¯_¿i{o„¼1¡x'Âþðg…ôè4 øOBÒ|7áý*ØbßNÑt;4Ý2ÆIo.ÖÎÚT»o`¡œ–bj½¤§9NMÉÎNrow&îÛÛ®÷_$Òå‹üÿ‘[ÃëúëÛ§Kù%ñï[M/ö÷ñU«°Q}û+þΫÎ9Ûñwö²P9Úsóä¹ðÄÌ7/Íñ=a1™ íõ‰fË[j×öJ^më²»íZñü¿1Ç2Éàݽ¦媩‡ó×âÙ9mðËìý#àýOýYÉ>½zäÎ ý—|cý—àß‹zžSìÿ¶·üWå —öýý¥¥ã ñç@èAÈÛýÁ™oÖ8_%­Ë~|õÒ¥HvKN_ò»÷ªÃQç£[­MW[TŸ®¶Véç¥Ùô‡ü,ÿËÙëô=zýÿS€8Æw`çåúìúv¾çénÚvÓ;K£êÿÝÓ_ë¾½5éölÙëß°¥×Ûg³w»wÚ~=~×Óîõ2~טœà¶GVcÆ Í6qD=—gõJÚÛµ’ôòzþ7V<µ*&¶œ×Ë™ßDúyZ÷½´´¾Å^Ÿç§oOÈò=ú×­µïºü›]ûü“#寣º»ï¯]bÒ³wzYÄ‘OoÇêýxäôÇ¡®˜>šýßÓü4·K’õíë›Ý½¯n›éÍkŽõýz Ûãçvõ9z`ô·UýZýÚ|ÓûhÿÉœþÖ?öm»Õ-ñPôÐç l]T_½)GóûžÝúëk ìýõÓóû¦k¢/E³·ÝýtÖÛuÜ™­?Kú—ß~Y|ÛðþJ‡í¥ÿg1áoýcŸÙ6½J?ÂÏÿJ~Ÿ—Üd}5Z€P@P@P@P@P@ÿÒþþ( € ( € ( € ( € ( €>MÓ?äò>8Ù³~ÊßóTÿm‡œßBzc5øßŠºWÉ?ëÖ?ÿKÂë×oO¼ô°Uõ޶¿}ÖšyôÓ} ó‡§|qÛØç¯Ë`õ^×Gúµë©è}úéu®®Ú^úíçÞÛ(ȽGøãüOÐk¦ÿ_+ÿÀ_xŸõþ}/}ü–šîIþà~;qŒ Æ;Ý0}?¥é××NÛ_Ó§NÈíž>ƒ·DÛ«þ¿¥ó»Ú!òÏ¿øºÿþ!|qŸ÷þðgö×ÀƒdðOi¡ëVïñ«ÆöyܧþˆšmåGd—JøKi©Y¿Ùµù|ßZoØáéaþÜùqÕõ\Éý^›Ñ?r”½¦ëÞ®ãdãs k&ú]¥Ù÷k¾¾}:\ú©?_óÿÖöÎppJÄ^‹·OêëUê®û«s"_óÀÿ9?çµtÅìý<¿;~p_µÀ†ßÿn¯j9“â,W¾ý–?g+}&OüløÓð•ÿ‹µÃÝ›Ø>üAðT¼¬öЈ®uhïg´Mñ[Ëo Ò$±Ž†K-–#€ÄÕ…|{¥SÁâªÑR§€çT§ˆ£Vtã.UΩN*O•Ëe#àxÁQx¼µÔ¡‡­%KÉ*Ô(Vœ/:Wön¬‚jÜÜ[&ïdåè>ý‹þÊ! ®þÓC„ÿSûp~ÚðãŒy´ {p8^:.âo«Ê§†j옷ÁƒÂSô·%ÛÖñµ¯g¢8°S¤ùs‡[i>6íðÓVßNº½w¥|<øðûà÷í…ð‚ïÁ:Å›Ù|Aû1~ÔPê ñ'öøùñ®Ú8ì>*þÇ["Ïã7įÚxzVk©¾Ùq A¥Üê ä&¥-Ôv–qÛýn©,ºƒ§J;âcÍìà£wìªïÊ£¶½=md¯ËTm.XF>êmÆ*+}¬’^­í£Ò>}ðöý™~"xkâGüUáOâ?~Ôß¶¶«¬Ühÿ~<øWN¸¿ŸöÄøéçÜ[øŸ4OiÆvA,ÑéºU”3\4·2#Ï4òËÓ†ã>,Ë(að9wgX‡ &2ÅP¡INŒ*OÙѧ5)Tœ§.T¹§)IÝɹ{4ñšqP§ˆÄB ÊÐ…zм›vŠšJí·¢Ý·­ÙWöqýÿfO|=ñ­â_|NÕµO?µG…­®®?iÚQd‡@ð?í=ñÁ>ÓCñr%h4_ xFÑ­™“Í{{Þyf˜Ë+ú58÷Ô¢—ñ½*ÿ‘¾;YN…)Iÿ¬¤ßMn’I'*X¼gýâ·ó[£ôðýøQð£À_| ¤|5øg¢Ïáÿhw~ Ô4í2ë[ñ‰.ÒûÅ^$Õü_â+ë­sÅZ¦»âNëVñ&½«j·7ž«y1¸¼‘Al‘DŸ9‰ÆbqØŠ¸Ìez¸¬^"n­|Ez’©ZµIoR­IóJs“W”äå);¶äõ9æÜ›”¥)JNNR“nR”µnR{¶Ûwm½ívÓ£/§ONŸÃÁçñõ9#j ÖŽ×ÚÉKo_ºÄýúë¶ÊúôvVNÒIJE8?çŒwèz}9ö®ˆ½Sü{yõü½n®/ëº~MÞ×ÝÞýn¬þ?N‡ôéèrÇ ƒ]0z÷þ·þ¾d4~Ú?òg_µý›GÇcŒz|-ñXÎ1ôÀã¦3Ô·eŽ+ûÑüÿ­—Ü'³ôgÓ_çü}~½½kh>Ÿwãù.ž¬ºþ¿àÿZ«ZçÍ¿ä¨~Ú_ösÿÖ9ý“kÖ¡ü(ÛßúS0×_Ç©ôÕl@P@P@P@P@ÿÓþþ( € ( € ( € ( € ( €>LÓ?äò>8Ù³~ÊdsÿOöÐõÆG¯=ýÁ_ƼUþ>Iÿ^±ßú^ô°UïxÛñí×·ŸU´¾ºþ_€þôÈ=±Ž27~W“ÿƒ®ÿ›ó³mGÑû­ý7öŸ{Zê÷òc‡¯çý¯Q]0{tü+7òzõ²d¿êÝû·­ïÿKX˜}=ô·±Î?,½1{>ý¿KÚ÷Z+®½.#Á¿hŸxƒÂþ¶ð×.–×â—ÅzÇáoÃ)ü±9ÒüEâ;kéõO½©dûEÃoéÞ&ø©@Ì¢æË“X¡3Þ@éà¡Õs¨›¡Fµeµã¹iÞͧZ¤¡E?²ês;òµ)“v²Ý»/+õù+³Óüà¯ü7ðW…<á[F²ð߃|?¥xoE·’C<ãOÒ,ⲂK»–U’îúá!÷²ƒ=íä³ÝNòM4VêέYÔ›¼êJS“évïeªI]»i¢VJ7±2IE%²Ñtõü»ë~¶:ÑÔ‡ÿ\qøñïšÖ§ôý5ûݼµ¿»™?òÿ?çÿÕ]0w^}¯øÒ!ù)ñç[M3ööñ]œ’KÿÙ[övÈ'ž/‹µ¢ÇÆ -™H@7¸€ƒ>bxA˜G ‹Èh¹Yb%›Y_Fã©C¦®ò²Z·Ñ;¹Çó;Ä{Ë'ƒvU0øÏ›J zü[{Ï´^®?Hø?S?º9ôÏÕxôÎ$ç=B~Vúì£ðëø_ËùþÿÅèÑí²¿žë{kmßÍt½ìh.¾Ñû]ü䟳GíWê?áh~Æ^¸8éÅ}~>¯>_I6ݱTµòö5÷û¯ÓÝÖËD}æO>uW«ä[ôwüô²ùw;_Ù7þIwŠxÿ›—ý´õ±~;vü¾¹÷¯:«ýìëÆÿQ©y«o½Ÿm.{ o›üÇ~ÉŸòK|Ußþ2cöÒïØ~Øß?‡·'§$œƒ]_ï"·ýÎÿQ©zmošé+\ß7ùŸLðé|gŽü`ãò òÜ6þ¿àoóÛu{þ¿^Úuü¯k 8ç¯N˜ävôè9V~wºùêµzÆËF¥%uE輿¥åýk¸iÿ~šõé{hõ½ÕíïKþx dØÏaœ‚xe³¹zbÖ–ü?Ë[iÒþZ܇çý|úúŸ4þÚ9ÿ†9ý¬x?òm»wÿ…[â ; v9çìð+®—ÇOüQüô%ìýõÓóû¦ùíø׃ëÓ8ï[Åë~Ÿ/Õ¯ë],œYówÀ_ù*¶—ýœÇ…¿õŽdÚö0ÿÂý½ÿ¥?ë¯ä£„·gÓU°‚€ ( € ( € ( € ( € (ÿÔþþ( € ( € ( € ( €14?xsÅêñ‰â}#[Ö<5ªÏ¡ê¶¼:gˆü=}.™¯è„¶ΖZÞ‡©[ϧëUËG¦ßC-¥ì\DÑ×^3ŽËåBü/|Ѽiû6üñÿ‰~"þÓ‹ÃÕõù]ZäÿdÀw÷p88oþ <=oÑ+ÝÙ8:”ß-èá¿íÜ6;ÿ†—Ÿ[=mgowѼð/áÿÁÿÛáß‚uŠ÷³kÿ³'íE ¿þ?|xøÓovc‰-Žiñ“âW­ø¬gqÛŽ˜'Œ·eŽ}øzü]~V_'mˆ{?Oë·ç÷LËüô§±ç± /ñm¢ÿ†ýz?M¯fšóÀ_ù*¶—ýœÇ…¿õŽdÚöp®ôcêÿ7ý~[Ïâ/Èújº ( € ( €14ïøsXÕ*…|VY_…¥ŽÃQÌ0ôªJ® ¶#ˆÃã(SÄœêákÒÄSŒ¨Õ„å·\‡hP@P@PÿÕþ«ÿm â‡í•û ~Í_õBÙ§â·ÿjÿkž_jþð¿Æ¯Úár|?>xæ]"ÿIoè?ð®ücû@üS´øo©]Þèþ0Ö>ÙkŽ‚U(ÀuÏxsöV“Zýš`ŸŠ^/¼×¼UûYë^ ¸øUâëÿ´U¿Â;ÝölÑ~,üBøGû'éž!ñ_Â蘷ñ¯SâßÇeøað›Wñ÷Žô- 8ñ7ÆkÍ#Tø“§¯‡õOJð|Mãà Ÿ¾;þÕ |kã߈ƒÆß ü*Õ<)ã»o ßk†í> Ûø'Áà·ôP3ã_éÞðw‹|q¬Ey>‘àß ëÞ+Õ`Óã†kù´ïiWZ½ôV0Ü\ZÛËy%µœ©m÷Vм̋-Ä(ZDô2œ¶¾sšå™FT¡ŠÍs [†yJ!_‰§†£*Ò…:³(Ô«RP¥RJ ¸Ó›J2ó3¬× äÙ¶yŒ…j˜L›,Çæ¸¨aã â'†Ë°µq•áB*R§:Ò¥FJ”gVœ%6”ªB-Ê?ÿÃ|9ÿ¢]ñ›ÿ~ÿó×ÿ=ú×ëŸñx—þ‡Ü+ÿƒóÿþ‡¿_¾çâñ1<-ÿD×ÿá? ôVðßßè—|fÿÀ_…üõùü>´Ä â_úp¯þÏÿú×ÿ%ïäñ1<-ÿDÏÿá? ÿôWý|ÐÃ|9ÿ¢]ñ›ÿ~ÿó×ÿ=úÑÿ'‰è}¿ø??ÿè{õûîñ1<-ÿD×ÿá? ôVðßßè—|fÿÀ_…üõùü>´Ä â_úp¯þÏÿú×ÿ%ïäñ1<-ÿDÏÿá? ÿôWý|ÐÃ|9ÿ¢]ñ›ÿ~ÿó×ÿ=úÑÿ'‰è}¿ø??ÿè{õûîñ1<-ÿD×ÿá? ôVðßßè—|fÿÀ_…üõùü>´Ä â_úp¯þÏÿú×ÿ%ïäñ1<-ÿDÏÿá? ÿôWý|ÐÃ|9ÿ¢]ñ›ÿ~ÿó×ÿ=úÑÿ'‰è}¿ø??ÿè{õûîñ1<-ÿD×ÿá? ôV|YàŽ!øqñÅÿ<-¦êiþ3øãßø›À­Îž$ð×ˆÚ F'*ÄâhKÛP U-l]B¼\*þÃÞ æ\1ÅyßåXzÿPÏx‡=̳>ÅÏ fvaæì#¨ðøœV a0˜˜¼6&j´œœ°8š³ÃÉN‡ìoÃïˆøŸáM/Æ~ Ô—RÑuHÛk0ÞX^BÞ]Ù>&Óµm:à5µý…ÀYmçB>ddwþEÏ2<ˇsŸýO·ùýøö®8½º—ü7¥üî˜äµý|ü÷óÛðµ‰#î;óþxíÛŠé‹z|¾}»jý>ë’~N|wÖÓMý½|Yfï·íß²ÇìêÀ|Øcoñgö·ô=A”;c%Bá|\÷.+"¢Ý¾±<Ù¥g½8e­ªrZZO·-¥#ó;Ä{Ë'…íí0øÇ×^IÐìÓÒý9Ÿ^UfÏ£ü©àDKÂqœd{©àç©ã±gÍ}~OŠønûiå÷÷«æØàÁVÙ_³µ×e¶–}÷êž·÷u"ºµ×ÀÞ~çìÏûUgÜŠ?±ž;Ž£ŒdmWkë3¼ùu%{µ‰¥çÜâ=,ík-tè®”~÷&Ÿ7´êù#¥úß½´ÛDï¶êçŲnjÛKðOÅ‹&ÑoûjÿÁE@Ô¬ý¿¿iysŒ†ÿ–‡‚ö9ÎêþŠà¼±b8[$¬ãw<uµþÎ?Êÿ—¿ß±õ4(óÒNÛʦÖéRkÍî­øi¹ô§ü,'ÿž§¯÷×óûç€už95ôÿØËùþÿÚ¿øÞÔÛêßÝzz—ŸÏ¯.ç©~Â×F÷ö}{¼†7??kùò1ÖOÚóã›{õÏ'È vÿ3ñd=g×ü»ÇT—÷T|´/—CÊš´ê.Óšû¤Õ¾_ðÇØ¯â:œ`Óœœƒ^4º}<´ÿ;.êþZØÎ_¦š~èÿ—Mû_OOóè:ŸSßÂôÁëë÷}ÿ=õí¥îKîþýým¯M´}t¿/½"úsêy»s^yéø郷~û¯Õ¯Ïä옞¿ðÏM{û׿K+?+´|×ûiÉœþÖ]äÙþ;öÿª[â°?…}2lcŒÝt?‰ þ8ÿéKü÷¶–ës7³ô>˜÷ÿëp;žÉÎqSü;Áýÿ×ü7ô†|ßðþJ‡í¥ÿg1áoýcŸÙ6½¬'ð#ë/ý(Æù~GÓUÒIÌø×Åzw<âßk^O¤x7Ã:÷ŠõX4øášþm;ÃÚUÖ¯}Œ7¶òÞImg*[G=Õ´/3"Ëq ‘= §-¯œæ¹fQ…•(b³\Öá§^R…WÇbiá¨Ê´¡N¬ãJ5*ÅÔ”)T’‚n4æÒŒ¼Ìë5Ãd96mžc!Z¦&Ë1ù®*xÂx‰á²ì-\exP…J”©Î´©Q’¥Õ§ M¥*‹rÆÿðßßè—|fÿÀ_…üõÿÏ~µúçü@ž%ÿ¡÷ ÿàüÿÿ¡ï×ï¹øŸüLO Ñ5ÆøOÂßý‡ü7÷ßú%ß¿ðá_ÿ=~­ñx—þ‡Ü+ÿƒóÿþ‡uÿÉ{ùüLO Ñ3ÆøOÂÿýÿ_4ðßßè—|fÿÀ_…üõÿÏ~´Ä â_úp¯þÏÿúý~û‡üLO Ñ5ÆøOÂßý‡ü7÷ßú%ß¿ðá_ÿ=~­ñx—þ‡Ü+ÿƒóÿþ‡uÿÉ{ùüLO Ñ3ÆøOÂÿýÿ_4ðßßè—|fÿÀ_…üõÿÏ~´Ä â_úp¯þÏÿúý~û‡üLO Ñ5ÆøOÂßý‡ü7÷ßú%ß¿ðá_ÿ=~­ñx—þ‡Ü+ÿƒóÿþ‡uÿÉ{ùüLO Ñ3ÆøOÂÿýÿ_4ðßßè—|fÿÀ_…üõÿÏ~´Ä â_úp¯þÏÿúý~û‡üLO Ñ5ÆøOÂßýŸê?u˜¾;xëã§Ãí?VðÍÖ¹®hoo xªM&3â ižðG‡µ ÅøwYñ„v—z®¨Üh×Ð_]jÇÙõX` 5îû0•8)à¼þ®,þ¿–¬Dÿ³ñøœã4ÆÐÅà*cp¸*³œ0øº4ñTeF\=¥ Jð¥^—áØÏ±ÔüDÏ8û†èbòÿ¯âòõ»6–Ú™n"ɲüN3§—c3 §SÄTÁW†"­|_g‰„-:øjÿ­_ >,øSã/ƒí|[áiäAæoD¼1®±ár¯4MbÞ7uŠê1%‚xËZj63ZêZ|×7vó¿ò¯pÆiÂY­l«4¦¹’ö¸\U>g…Çá%)*XÌ-F—5*–jQiT£V3¡Z0­Npö?ñ~Qƹ5ç(¨ù\ n ³ŠÆe˜èF.¾N-¨Õ§Ì¥ ÅÊŽ&„ébpó °œý:¾tú“ã¯þÜßþ|ZñgÁ ö€ñÇÄ?i^Ö|g§|ý—ÿh_ŒÚO…ì|m©^xDë~'ø_ðßÅ~Óîu‹]#R¸‚ÎçTK¨á³–IàW,ëþøùðãÆ¾=—᎓sâkAá-cÇrøgÅ^ñ§ƒu$ðŽ‹ñÄ g×L>*дƒö ÿxkRþÁ—§ˆ¼<úg‹´?í ë:N¯vêšÎ³¤xsGÕ|Câ SOÐô M¾Öu½kV¼·Ó´­#HÒíe¾ÔµMOP»’K?O²‚k»ÛË™b·¶¶†Iæ‘#FzøÛáçü+öfø•¯x{AÒuo‰þÿ„ïÃÞ"ñgÂ{âgÀ_ß |!ñ£Ã¾ðõÇ‹uíCàÿ‹~"øÃ>øƒ=¯„¬ï¼ag¢èZ…Ljuߨ_øëÃÚN¯à»;}>…Ò¾4x_ø)¢~Ð>“ľ(øoâo‡:ÅoOáoøÃÄž+×¼âOÙø£C¹Ñ¾èš%ïŽu=_Qѯí'·ðÝ—‡çñ M/Ø¿³â¼ êÔÆxßâ‚þØhšŸõûOXx“Æ~ øy¢\ÝÇu"j>4ø…âM; Ð!\:ÝëÞ$Õ´í*ÖIU-cžéêx W™;:ÿÖþÌ?lÿ~Åšü-à/ÛoIøuâÿüOñ,–øñá]ïÆM;Å~+ð®—uâƒ6•à›øÎiµ ivWšì´zRO£Gm%ý½å´‘yŠáñEÿ§ñ‡ÁøûáìÝ7Àëz'‰¾|8ñì×a¡xÃ^$ñÇÅH>hÚæð÷Æ tÛkÞ-ø¡ãuðSjh¶Z–µ?Šnç¼’çFÕ¯o%únïö&ýuü ÿì™û5_Cðï^ñŠ|çÀ߆WQø'Äž/ññеï Ç?†%OjÞ#ñi_k—úOÙnuOG·y$Úµ½Êq¿~/þÿí~%Ÿ‰³|$ðÄ_¾ | ø·ñÊ_‡Ñj—~ø³ñ÷Æ—ø3ñm'Dð¾¥}¨üNø‰ã»Y´¯ x—G´Ô|aw®"gŽGÍý“µ?ø'ç|YñcÇß±ÿ†ÿg;âmýõ«üsÔ~ü6ð×ËÒ^x¢êóÄ}ÇÅ$økáOj0jšÞwã >;j–úΧi-íü•€}Ã@x·í$qû;||>Ÿ¾)Ÿü±µÚú¾׎x1wâ¾ÿÕÆÓóûŠñ'þM×ÙÅ_ú¢Ç€_ðvó›ƒÓqç§®»/øé5§Î_güïúÜ»ËïÙíüÊý4]uÑû¡ÿ ý5s×qëô ß¿l0 jW]—®ÿ>š¿é;X)ÿ4–½Ý¾ëÛ·}öÖÑ?á íç7¦ãÏNxëÜç#Ó€ /«®ËþúMió—Ù>·.òûö{2¿M]t~èÂAÿM\õÀÜzýwïÛŒZŸÕ×eë¿Ï¦¯úNÖŠÍ%¯woºöíß}µ´OøH;yÍÁé¸óÓž:÷9ÈôàKêë²ÿ‡þ“Z|åöO­Ë¼¾ýžß̯ÓE×]ºðÓW=p7¿@ÝûöÀã–§õuÙzïóé«þ“µâŸóIkÝÛî½»wßmmþÞspzn<ô玽Îr=8Òúºì¿áÿ¤ÖŸ9}“ërï/¿g·ó+ôÑu×Gî‡ü$gÍnþ">¤á¿>Ø %©ý]>‰·eß_ÃWéò{Å5¯3^­ÚÝt½¿=öÖÑýý€< ñ+ûK[ø¤oîü=ð¯ÄzsYÛhWQõx(­<_gk.ѦØhðE6Ÿi¯D«sâxåò¾‘¦é÷3ÿ;øåžpõJx>… xî#Ëë*•qÔä—ö6jR©•Ô©ž"¶"s…j˜Y.L£Íîâ+Ö„?¦¾<=ÄðÄc¸¢Xйw æXeK/«oõ‡MƳz4æÒÂáðtá,=,t´Í#._{ †ÃÕŸêM7ŸÕa@&iŸòyøfoÙCøtÿmŸþ±Æzu¯Å¼Yþ>Gÿ^±ÿú^õ2åxÖõ‡å=ôkï^zY@úý1âÆ3èÇ=@ù¿'ƒÒý~þŸð|þ[ýþí¯ví¥öo¯^êÏH¨üº^ŸáÓó`®˜½?­~÷×G»¶úÙ õÿ.ýîúï}ÖŠò²?#ÿg/‚/?eÙžþÏöÜý¦´KïÙëà½í¦ƒ¦ø?ö1›IÑ-.¾ønh42mcöGÕuÙtý2ÒÎ]g[Õõim¢‰µRúùæ»—û‹!ÂÓ 3U«Ó½ZCØ´¯¾_ÞÒ›il¯+Ýjä—)ù¶mˆœ'U{*SQœãy*—jíjáVšÛW¶÷_Ëo|øÅšöÛý¦¦êÍðì`9à0‡öG„… q»áCWê¹N\åÉlË£÷VZìÞšsåòj×½´ôÖççY¦=E4ð)ÿ‰ã——ØÆÅvVIõ×O{îïØr7‹ö*ýâ’yn¤‹ö[ýŸ’K©ÖžâDøMá${‰–Ö kešb ޶öÖðofX`Š0¨¿çÎuÿ#Ìå[þf¹]—ÖëZÚß×~—kíBá¿Ý°ÿõâ—þ›¯ç÷ŸRÿŸóþ•qAôû»ú~»u{Ù—5³ûÿ¯—è*àŸ§¿åù€k¦OO=M6ëó_g3ñ“ö®ø1uñKöíñ.£eñ›â¯Â‹~˳´+Ë_„71jãPø»ûYÉçj£â‰R´k5KdÒåÓ-Ù'˜ÞAtâÝâÏ€Êqk*­˜eô±•°õó‡©S˜Ñtc8e®qQÁc°ÔêFr§m ÙÅr¸ÅÊ5=ã<>®3,ž' ó§Gìå*¸šnšu(¹$¨b(ÆJN1¿´S³Š·-ß7¢xOöañãy[mßÚ¢Ðy~ýŒNàs7ìƒ9'iêÙlt >jû<ª†XÔyrú0ÿ#ÿ·âj»·Ý·ÑèÛ8ðTðžêXhGÍUÅ7¯ø«¿-m×Dõæô߇ üGðÛöÂøEq®|~øÁñ¢=_öcý¨bµ´ø¡¥|Ó­ü>Ö_?c†’ãFƒŸþ]Í.¢'Hî×]¼×mÒ;Kc§Ác+^=Ç»R¡K- èÓäæÅÓæ÷ç-¨Öµ¹æì·ûNû;+Äû,¦áÏÉKÅ^Ò”•»7'$¿½§Ÿ¾ Á=üñHø§ã§øùûIx>_~Õß¶Ö·sá¿øám¯†tË»ïÛã¬×‰¤[kß*ÔþÊ–Z-”0Y-µ±Y¸Ÿó¬Ë3Åç9Ž34ÇÎLn:¼±™Ò¥K ÖŸÇ8P£R¤›ûáGhF1²8g9TœêMÞs”§&£');¶££^ÑI-’Z#Øÿ1ÛÐŒqÇ8à™9è 5Œ^‹ó×§oõÝû^Pöôü»^«aïøõã=8ÀÆzr®˜½Ÿm{ztÛäþvd~·Zlü›½¯»ë¾‰6H:¯ùôþ•tÃìõÛm>Z=×çÓ ¿Í¶í¦º«§ß}®´Z4šÿm/ù3ŸÚËù¶ŽçÓ§ÂßüØÁÇ`·Þ8ËvP½¥Öó‡—ÚZ÷þµêCëóÒÿ®Ÿ~Ÿ#éŸOÃôúÝ1ÐüÇ ƒ[AÙ¯óùyß~Ÿ‚móÀOù*¶—ýœÇ…õŽdßËéÛ¥{¸OàGÖ_úS1ŸÄþ_‘ôÕt’x·í$qû;||>Ÿ¾)Ÿü±µÚú¾׎x1wâ¾ÿÕÆÓóûŠñ'þM×ÙÅ_ú¢Ç€_ðvó›ƒÓqç§®»/øé5§Î_güïúÜ»ËïÙíüÊý4]uÑû¡ÿ ý5s×qëô ß¿l0 jW]—®ÿ>š¿é;X)ÿ4–½Ý¾ëÛ·}öÖÑ?á íç7¦ãÏNxëÜç#Ó€ /«®ËþúMió—Ù>·.òûö{2¿M]t~èÂAÿM\õÀÜzýwïÛŒZŸÕ×eë¿Ï¦¯úNÖŠÍ%¯woºöíß}µ´OøH;yÍÁé¸óÓž:÷9ÈôàKêë²ÿ‡þ“Z|åöO­Ë¼¾ýžß̯ÓE×]ºðÓW=p7¿@ÝûöÀã–§õuÙzïóé«þ“µâŸóIkÝÛî½»wßmmþÞspzn<ô玽Îr=8Òúºì¿áÿ¤ÖŸ9}“ërï/¿g·ó+ôÑu×Gî‡ü$ôÕÏ\ ǯÐ7~ý°8À%©ý]v^»üújÿ¤í`x§üÒZ÷vû¯nÝ÷Û[GîŸØ7Á_õ¿ÝüSѯîü9ðÎÖÒ÷Ãþ šhÌ–¿oíÖh­4½>ÒLDðxKP’K©üV£Î´½ûO†´Ö/<@¶‡øÝpþ'£Ã˜šñüC*´±˜>IrÔÈ(O’UkÖ«yO2¢£eÍ(Ô¥ìñÕ¹],?ïßG쇉qyý~*Áâ+e¼/Nl¿êGš—â)ª£†ÃÒ—ºéå‰J¬óUïR­írÜ4ªªÙÕÿ_kù\þÇ?14ØÓÅ'ý¼ÿjOþ0ñÆßø#ÄÚì­oðÊãá—ÇmÀžñõïÃm Çiã;_x'Á^'·ŸWƒGÔu-Odñ®om©XjWvÚY¾±{â 5ý¿m-cáïÄ-+Cð…þ‘}«xoÅš¶™iâß„ú®£ñÁ—ðV¿‰_´çоZZøƒÆÂWÓ|Vý—µÄ¶ƒDñýöð×Ä)âqð¿âf¿á½;RñdZ@Õÿa‰:ïü»öæý”µ;|OÚ—Nøéោ¾ñ¶Ÿð[FÒ¾hŸ¾xká¼+gð¿ög¼×>|2ð>¯ãÛ/øßWøaàOø¶Æñˆ|Qy©èÞ!ñÖ¯á/ €t?|?ûTþÙ?ðÎ~ñ?ì‡âïÙ²døÙñ;ÆÞ2ø‹ðÄžÖ´ÿ‚|þÚò|røÝâ oøãV¿â¿ÛÛXÐk«6ý›<;á_øG㟃¾;é³/†uÏø_XñíoñEøewã¿‚ÿcøAãßx7ÀßõÿƒöZï|Y¨éð&ãÐ ¿ÿÁ?¼=ã_„z3xkþ  |7—À`ø³á_ˆ¼aû0xŸÄõOƒŸî5Æà•ø·ªþÏš–¿ið»SÖ´»¯ŽŸüwà¿?´“â=sÁ~8µ‡@ðÿ†mõðèþ€?ÿ×þ­?oƒ?~/~Ó?ðNFø_ã/‹ÿ àð‡ÅoÚUñwÆŸ„> ð‹ïþišìÇñGÓ_]—â·Ã‹? ´=7ÅúÅÕ¯„mîüUáIî÷TŠÇ@»¶×&²™>cý´¾þÒÒøëÃ~³ÕÿhoÚ#Âú?Ãÿ؆÷Sñ®£àëYí&j>(ðÏìýûyÏñãmï€ü7ð÷¾&ñ7Ä;¿…¾¶—Äþ4Ñ| £ø‡[Ðô¦·¸»Ó“ív«8ÚŸõ _öˆÿ‚ƒÅûRx'ágÅÿ‡¿¾~Ç~-ø©xÓã_Á³÷‰>0üAø‰ñ“á¿Ä}MÑ>üdð¿¾.Éáï‚zÃïñ7Š| ø~}cãEöŸàÛ­~Koͦ€~¢Ð@¾$ðöâïkÞñ ˜Ôt躧‡µÍ=¦¹¶Ú>µc>›©Ù›‹9­®àVW3Àfµ¸‚æ þdE*£¯^ŠË1Ø<ËUÐÆåø¼>;]BV´+áê¨U…JStêÓŒù*Bp•­8J-Äâ̲üo—cò¬ÆÖ2üÏŠËñØw:”•|6…L6&‹©FtëSö´*Îô§ ææ§8Í)-Ã~ÊÿôNµü:¿ù¾¯Ñ¿â3ø‘ÿCúøð×ÿ9Ë?âxUÿDÖ+ÿ~.ÿçèÃ~ÊÿôNµü:¿ù¾£þ#?‰ô?¡ÿˆÿ ó˜?âxUÿDÖ+ÿ~.ÿçèÃ~ÊÿôNµü:¿ù¾£þ#?‰ô?¡ÿˆÿ ó˜?âxUÿDÖ+ÿ~.ÿçèÃ~ÊÿôNµü:¿ù¾£þ#?‰ô?¡ÿˆÿ ó˜?âxUÿDÖ+ÿ~.ÿçèÃ~ÊÿôNµü:¿ù¾£þ#?‰ô?¡ÿˆÿ ó˜?âxUÿDÖ+ÿ~.ÿçèÃ~ÊÿôNµü:¿ù¾£þ#?‰ô?¡ÿˆÿ ó˜?âxUÿDÖ+ÿ~.ÿçèÃ~ÊÿôNµü:¿ù¾£þ#?‰ô?¡ÿˆÿ ó˜?âxUÿDÖ+ÿ~.ÿçéñÀߨ~÷Æ?¾#ê^=ðÿˆ<1ð;Á?~!i>ð·ˆ·¡Õþ!éZG5¨<=ŸÄ.uËŸ‡Qi1Ø´ZíÅÅÁñµ‘·M2úëD–}JëõÎ/ñ†g dX\›ƒÌ8»6áÌ—šfX'„©†É1¬« Ía+ÕÁN®"¯ì½µµ½½½¼6––°ÅmkkmAomo,P[ÛìPà h±Åj©*¢(Pÿ*Ô©:³Z³J•'*•*T“œêNmÊsœäÜ¥9I¹JRmɶÛmÜþ̧Ntèѧ T©B4éR§Â:pŠŒ)Ó„RŒ!¥Æ)F1I$’Hš ° “4Ïù<Žÿͳ~Ê)þÚŽ2=yïî þ+â×ûÆEÿ^³—¿ƒüü×Üz¹nÕ½aÿ·@~~¸÷íü+ÔŒäôÇ`~oÉ`ú~>Kü´ÚûüãÞô~]=:÷Ù;Y(ߥìÃÿ¬xÀÎ;c7ý— ö9ý”øf~œçáw…[ô9ä`–ŠþæÈ*~ã ­—°¥®ûB=4¾ž{zû¿›fð÷ê¿ïI®ºßÖý;nõµŸ6?Œ­¿Ö€¹áÇ¿ºñÇñ…Þø9¯ÖòjŸºþO£øu¾öî’»»qüÏ6¦ýû.ÿƒëø­ö»³Õ™ûɘþÈßöl?þb¯ c·Ðú\ Wùß¿ø\Î?ìk˜ë×ýò®Ý.úzŸÑ˜_÷l?ýx£ÿ¦â}›ú}ǹŸni¯—3ü­Ýz;~ÉŸòK|UÜÿÃKþÚ]±ÿ7ñߎOn˜Ï½ë¦³ýì7·°ÂùÌ5-ÿS%úËó>š¯ýnÝýr}ðª_ëüÕ·Þ϶—¿¯â:œ`Óœœƒ]}?¯Ónºü„öçoÆÎÏå÷X_þ±í×·ðޏÈ'ÿÝ]P{¯òÿ‡ü:tº#¯ßßç¥úvOïå÷¤ç·øþYãß5Ñun«òôÿƒç¥Ð[;§~úõz5tÞªÖq>ký´äÎ?k/û6Žäÿá­ñ^3Àë€s“Ó¶k·ÿyK¿´‡þ”­ø/ëB%×¶¾~o«ïßî>™õýz Ûãçvõ9u‹½ŸËM¿Ïäíóºb>oø ÿ%CöÒÿ³˜ð·þ±Ïì™ôþ_LW¿ƒÿw‡¬¿ô¯—å÷Ïâ/Èúnº‰1|IáíÅÞ×¼)â1¨è'ÑuOkš{Msl/´}jÆ}7S³7s[]À.¬®g€ÍkqÌAüÈ&ŠUG^¼;–c°y–«¡Ëñx|vº„*:8¬%hWÃÕP« ”¦éÕ§òT„á+Zp”[‰Å™eø<ß.ÇåY¬eùž —ã°îu)*øZÿ† ý•ÿèjÿøtþ/ó}_£Ägñ#þ‡ô?ñá¯þs–Äð«þ‰¬Wþ$ü]ÿÏÐÿ† ý•ÿèjÿøtþ/ó}GüF?èCÿþÿç0Äð«þ‰¬Wþ$ü]ÿÏÐÿ† ý•ÿèjÿøtþ/ó}GüF?èCÿþÿç0Äð«þ‰¬Wþ$ü]ÿÏÐÿ† ý•ÿèjÿøtþ/ó}GüF?èCÿþÿç0Äð«þ‰¬Wþ$ü]ÿÏÐÿ† ý•ÿèjÿøtþ/ó}GüF?èCÿþÿç0Äð«þ‰¬Wþ$ü]ÿÏÐÿ† ý•ÿèjÿøtþ/ó}GüF?èCÿþÿç0Äð«þ‰¬Wþ$ü]ÿÏÐÿ† ý•ÿèjÿøtþ/ó}GüF?èCÿþÿç0Äð«þ‰¬Wþ$ü]ÿÏÓâ;OØnoþÔ_¼/¦è>)ð/ìéàíSÂ7QÔo(º½Ôu[;z}e™áïi–Z6‡¢ØÛéºN•§@–¶:}…¤K µ­­¼aR(¢ªîrÄ–þZÅâñXüV#¯W‹ÅÖ©_ˆ¯7RµjÕdçR¥IÊîR”›m¾ý4Gö.ƒË0x\¿/ÃQÁàpT)á°˜L=8Ò¡‡¡F ©R§£B)$’ówlÕ®s¨( € †{{{•D¹‚„Žh.'‰%T¸µ™.-§E‘X,ÖóÅðJ0ñMK+¢šš€ (ÿÐþþ( € ( € ( € ( € ( €>LÓäò>7ÿÙ³~Ê_ù*¶‡øã‚:öšüOÅÇjùýzÌ>^þ_éKÓcÕËv­ëý¸÷þ?¯¦=ÀÚN: c¶2@;¿%ƒ×{_çòßõ=B_u×mßgª½ÖÛÛ¢Ô_ÃÓ¡ý‚:z±Âà×L7òõþ¿.ËMIù÷é~Ú´Þ©ïÝh¯+#ò?ösø!ñ‚÷öQý™ïí?mÏÚkD±¿ýž¾ ÞÙè:g„?c´Öçᯇ&·Ò4ɵÙT×eÓôÈ$[K)uoXÕ嵆ÔuKûæžò_ ÐÃ8ׯNô©YStZ4"Ú^Ҕݗ¯&ûÉì~k›b% •¥Jj3œ}õS_{Fù*ÒIõ}uºKhâø»à—ÆÄ¡¿m¿Újn2|ßþÆ¿¤?²ƒ·^‰þ=5ªõÕ÷¹»þ¿Ëg¿§Ü/ùàœŸóÚºbö~ŸÖ¶·ß÷+5go?_ëúì~2þÕ?nþ(þÝ~%Ô¬¾3|TøQqáÿÙ_öwcøskð~î ]oþ-þÖnfÕSâÂ_‰nd´6j–‹¥É¥Ã‰ç7Q]Ê-ž€ÊqK,­˜eôq•èb3‡©W¢èÂ¥<µÕІ‡§QM ºÑ©Êâ¹%µ?Ïxσ«Ë'‰Â¼éÑÄ{)ή&Í9ÒæIP¯F2æj?ÄS³Š·-åÍèÞý˜|xâ0¿¶ïíS;qåøsö1;H^Fé¿d9ÎpFI%È䃊û\ª†Yhòåô£þF=ôÚóÅTwõ»}R³G žòÛ N=­Sïÿb’Û]ú>oKøwð›Ä l/„WïÇïŒ?Wý˜ÿj­lþ(i_të5ŸÅ?Øáä¸Ñ[àçÁ?„×R˨‹…ŽíuÛ­vÙÒØéÑYH×qíg”°ô²Ê5l]5+N¤¯j­ñÎ_‡ÞìϳÊaNÓÙÅCÝ2NRM_¼å?Á+®Ûž}û+xwÇÞ/ðÄÍn/Úã Ûþ ¥ÚxKÁþý™çðî‹cáïÛ»öŽðõ„tÞ8ýük⩼û=* »¹uj²½ýÅÓC,†ÞÊßë2¾ÀfnVIT¯ƒÃJMU©ÜiB)¤´ŠékmcÑýï½Ê칤¯dïïIoÊÓû¯n‰ÇÛ|ð_ð“w¢x_öÄý¥ôý2÷Å7ñÌ ¡~È×­'ˆ~#xÓ_øƒâëá-ïì«u:.§âßëzŒv‰(´±Žíltë{K k{h=)pf_&›£UµA?mVÊ4ãEhÖÑŠÖúÛ[»‡-^ú¶ßÃÝÝéÊÿ=<ö=“öUñOŠü]ð…¯üiâ½OÇöñkö‹ð/ü%šÎŸá3XÖt_†´GÅ?‡^ºÕ¬¼áÿ xa5$ð¿…´[{ùôoi··PKxlá–yK~y›a©à³JÓŽ?lÿölß²ãÿOöÏã¨ëÓ¯ç_‰x»ü|‹þ½fú^ õrÝ«zÃÿn>‚üýqïÛ°ëŒäôïŒá#‹ùÛMzéÕÁ—tõG¥-¿?N¿×ݰ“‚9¶8Æ:pGNxj鋵žŸ;Ûõo×®ú^Æ‹·g¿“Ö÷{[®ºkãoÙzßwìsû)‘ßöhøéÔ|.ð«½T}ÐþîΩþÏ…ÖËØR×Ë’7Z¥¶Ýíw¦Š?šæð÷ê»}©?÷Ý·{yn®´f?Œ­¿Ö€3÷ñúðqŽ£8-Ðoqž•úÞMSà×]5íµž«]¶¿Ku÷3Í©¿~Þ}6·}_šÝnß+±é¿±'ü™ì‹ÇüÛÀ#íÿ$§Â€Øì{㹿ΜïþG¹Ï_øUÌW]mŒ­®íuëò¾®_ј_÷l?ýx£ÿ¦â}=ý? àvéÆOsǶkŽÖ½uééÙ}æâÿ/óþýUÓu¾ß¯çø÷Òö2š³òéÿ_ë¢ê~PüpÖSNý¼¼_g$>ßû,~Î{Alnh¾-þÖz¨rÄã ¹&HÆV_77ÇG _%£)%íêf¼©½Ü#”í×DïªÑ+óBüÒüÃŽ±Ç1Éàݽ¥ [WêãR†Úi£½ûkxÛÞúGÁú§ú£»…î§©û¼ dƒÆ_˜+²ýžO‰øuíoŸßåæ¹¾×Ÿ‚«²¿þíöõKó½Í4ºûGísð7œíýšjÃÔŸ½ñGö2¿û zã‘Ñ~›6ªêe”o}1tÛùÑÄjõò}ì»Ù8ýîM>oioäOï}ÚYÙ^]›>.ý•|eý•à_‹±ä~Ú¿ðQaŒùkûþÒòޤ|žØç–â¿¢8/-úÇ äum~| ? J>_Ê—ç}YôÔhsÓRÑ')õíRkkv]ý/wéøX_íþœñž=zä_QýýßÌ×êßÕõô½­uénºß–>û ]}·à·óõñ÷öÀŸëæ~ןßߨ<ùÁÚWù‹a츛:¥ÿ>ñ²»Z1_¼öùGÍšåœãÚr_t™öëÓ¿aþÎnONI9¼H;¯ënÛßO[ë{ê™!ôöôü2qß±Ç×¾^˜½—õýÁ&_£ÿƒÛeÓ®úò¤=qÿ×Ï_lqÓ¾yýk¦6Ó·—åk«_môó±?Ÿ£ºùëÿn´µ{[á>ký´äοk/OøfŽÿŸü*ߟÐ×eûÚ]œáù«_'¹2Ùíµ¯-z½“µ–ý/kËé¿_רÈÀÁÏ=ºí¯u¾ë§«_ŸÉÙ2O›þÉPý´ìæ<-ÿ¬sû&ý?—å_Gÿv‡¬ÿô¶c?‰ü¿#éºë$( € ( € ( € ( € ( €?ÿÒþþ( € ( € ( € ( € ( €>JÓ¿äò>7ÿÙ³~Êú´ÿlÿóúö¯Ä|]í,ÇÿKÁy¯^»i»G«–í[Öûqôè8ÇCô¸ûÇýr+ò>ŸÖ¾_'»û®zo];ÿ^_ŸÜ/ÿXuçŽN䎹®˜=-Ûúüîfúúë¥í~êéY½o«Z.–?#ÿg?‚/eÙŸP³ý¶ÿi­Æûözø/yg i¾ýŒeÒtKk†¾šßHÓ&ÖdmW^—OÓ!u´²›YÖµZKXb}GTÔ/^k¶þîÈp°ž ÕzôÛ¥KJn‹Qæ„]—´¥7e²¼›¾ïK™æØ‰Â¥EìiMFSWŸµÖÒÒü•iYîïgÞß 1|]ðKã~nïÛoöš›¹ó|ûŽøÆ"ý‘â àB¯SŒœ×ê™FZåËl˧À²ç«ÛâÀ;÷WÕvw¼¿:Í1és_‚Ÿøž:ý·Ž6š}¬¢½UÙ÷‡ì9Åû~ÇñI<·RÅû-þÏÉ%Ôë Ïpñü'ðš=ÄÉmµ²Í1W[{khw³,0ÅXëüïÏþÝÎz/ílÇålegçÙ-üîíïCaݰÿõâ—þ›¯ç÷ŸR~éëŽ=8íô¸àìý¯ëoÒ[‡ùÈÓž}‰öçº ßkþž¼ôí{hDÒ¶öý}OÁ=\?jƒw¿ÿn¿ê6>*ü'¸ð÷ì·û:ı|8¶øCs¯ö¤ŸhÕSâ‡Âo‰çYP–Ù¥ÄâssԢݢÓʱ1Ë+ãð±uðø¬ÁaªTÄæ]Êž\çh`ñ¸Zu’‹½hÍÆI{9FòSü猨a*ã2ÉâpЯ:t±ÊS©‰‡³¼ésZ4+ÑŒ®Ôu¨§Êâ¹my‹á?Ù‡Çä…ý·¿j˜S<;û¾Òo?öC›<›qÉêùÏÍöÙU ²ÐQÀR­¶#ÿô¼TÝõêõëÊ“G ž òÛ N;ZÕ1-ÿäøMt¿Êòô¿‡ üGð×öÂøC>¹ñûãÆ„Õÿf/Ú‚+[?ŠWÀ6ßÃÍgñSö9y.4Vø9ðOá5ÔÒê"á#»]vë]·Dµ¶þΊÊCv÷ÆJ…,¯èSPæÆSæ´êKjínyÊÖ¾–üuGÙå0§gqs$å%kíyJ’º}4g‰ü ý…4/è?ücí ûHx/þÚ»öÞÖåðσµ¯„PøkJ»Ô?lŸŽ÷WÑé1x‹à׈õ¨­îo{ÆKínýÖâæaÉHSÜËüQâü‡‚ÊrÜ^–…ÃF„*ä¹N*¢JP¯>lF'Z¼Û«Rr\ó—*|±´#GߥŠÄR‚„*F1S©dèaçniÊþüèÊMݽÛvÑiaßb)>!x\×¼CûX~ÕañŸöð=¹³Ö~[Åý‡ðËöˆø£ðÛÑþÊZé<7á=%/¦Ý¶æùg¹Ž8‘ÖõêøÃÇPœb±Ùm¥J„ßü dMóU£J¬õyvÜóvWÑY]¥Ö;gûèhä¿Ýp%ÒO÷Zuÿ4~’|ø5 þÏÿ ´o…Þ×¼Wâ7HÖ|qâñîô‹ÿjúÇÄø—â/ˆïukDðæŽÏ/ˆ¼WªXì4K`²ÖþS¼/3üa™b³|Ùã\'‹ÆÕ•|D©R§B›©4”¥4cN•$ìŸ%:qŠzEF:R”§)NNò”œ¤ÒŒSrwvQJ+[é—d´G°úu펖y8ã¿\ã šÆÚwþ½týog«Œ‡ÿ¬{{Œã íÓ©Q]P{ÿŸõä¿Âz§ÿ þKïütBŽÞøôíǦxàvä“Àæº`ôëåòíøuÕÞÖ³#¿–ï×_5®ºÚVI-Ò>lý´¿äοkû6Žÿ‡üZßzçõž+¶ƒýí/úù -ýåééÒÖÝîL¾®é®÷ÓkÞ×Ýß]ïkÙGéÁÏ?Lvéל`qèy#Íi{?øªÛÕm}S\ÉïýZï»õzŸ7üÿ’¡ûiÙÌx[ÛþlëöMíÆ?Ï^µôØ÷j~³ÿÒ™„þ'òü¦ë¬ € ( € ( € ( € ( € ÿÓþþ( € ( € ( € ( € ( €>JÓ¿äò>7ÿÙ³~Ê?ú´ÿlÿ§óü«ðÿ4¯׬ÇÿKÁz~qêå»Võ‡þÜ{ú“Ó˜ãúvàäúcå-ù^©íצŸŽ·ÛFûëcÓþx dØã§BsÕÕ¯ù=÷óû·_uŒå¿å£ßþ«[¯mT¾6ý—­÷~Ç?²™ÿf^GÂï °þé9ëÕGÝáþðáúŸìø]mûŠZùrFë^Ûo'k»ì~i›Ãߪíö¤þwßvÝí庺јþ3¶Ç›Ç÷€íí´ã×'ì]Ç&¿\ɪ|:ëùi£Õk§Kô·_wó<ÚŸÇóùzêûÛu½ù]KýˆÿäÌd`;~Ì_ÿõUxS¶=sótç¿ÎLñÿÂæu¯üͳ+÷×Yþ­u¿]íèÌ/û¶þ¼QÿÓq>ŸþŸAœuØ\c<kŽoø/ïêô{]ßÎþöâþçÿÕåÚº"ïgnÞ¿-µÓ·¥®„öþ¿ëòGåÆýetïÛÇÆŽÛ~Ûû+þ·©˜þ-þÖk»ƒü>nrT(ÏÍ*²^,Ó°Õ²jM¥íêf­&Úøc”+éµ¹·kKë8_Þü·Ž«ûÇ(‡Yáñoî©A_f—Ž­ýèÜúGÁú§ú¼¿ ?0õà}ÜrÎ ¤oµÊ1?·ü<—§^ž‰ý¯;Wdý~ûZút¶ßúNòÓ[¯´þ×?y'ìÑûVqœÿÍPýŒFp:´s¸’Ë ¯¤ÎjûL²ÚºÅÓùÞŽ#]ü»tëwïrióûOð/Ï·—Me¶¯S¬ý“¿ä—x§þÎ[öÐ?ù¸Ÿ±ž?ǯ$f¼Cýä{ý_ž‡õ×ñ´}ÈìôûSK£·3Û^Ÿ/$ì²oü’ïÿÙËþÚYïÇü6/Çp)˜ÏNN:7EwûÈíü %½‡£ÓnýuºbÂú¾i-­ö›ßTü¶IéÖçÓ‹ÐqŠqz'óþ´·áòèCVmvþ¼ÿ?¼xÿëžsëÛèsÎÞ¨5u®žZž—Ý_O;Qþ1ïŒñߌ~A¾^˜;?ëÓõìûi{€¿ŽztÇNÜpN:6OOÍz`õ¶¿é§çòv3kË_ÆýuÕk{ÆÝ{|'Ͷ‡ü™ÏícÿfÑñßÿUoоžÝ¿,Wnþöšÿ§ÿÒ¾ý|×N—µNöWO_–½z]­]/Ëï}6§?נȈϧãSX;«u_×õ¯Ý ¥Óþ糿tì÷´~×Î??ä¨~Ú_ösÿÖ9ý“}‡òüëé²ÿ÷Z~³ÿÒåýÜóøŸËò>›®ÒB€ ( € ( € ( € ( € (ÿÔþþ( € ( € ( € ( € ( €>IÓÏüfGÆÿÙ›öQÿÕ§ûgÿú¿ppJþãñòúõ˜ÿéx#ÕËv­ëý¸÷ñ×õÈ1ß¾N1Ž>¹¯Ç¢î»ÿ_×~ë{GÓ$öúõ}:uòF3šê‹Ùÿ^ºß§–žm"eýuþ¶Ý§o'e/ÈÿÙÏà‡Æ ßÙGögÔ,ÿm¿ÚkD±¾ýž¾ ÞYèo„?ctÚãᯇ&·Ò4ɵŸÙU×¥ÓôÈ]m,¦Öu­cV’ÖŸQÕ5 ×ší¿¼ò,'‡Ã5^½6éRÒ›¢Ôy¡eí)MÙl¯&ï»ÒÇæ9¶"p©Q{SQ”Õçíu´´¿%ZV{»Ù÷·ÂŒO|øÃ—wí·ûMMÇ>o„?cO8Çîdx˜Å±éóýW(Ë›å¶eƒV~âË^¯o$ú=Uíö]ÑùÖiIJø ÿÄñ÷ÚÛÇK$íß~o»¿a¸ä‹ö,ý"’yn¤ö\ýŸÒK©ÖžáÓá?„Ñî&[h-­„³æH¶ÖÖЇ$E Q…ΜóþGÙÚKOí|˶Ÿí•ºõ½¼öVݸÿCá¿Ý°ý½…/ý77ùýÇÔß玞øãŽ8÷Á8ZâƒVóüàþÝí…ÿõç>çóŽƒ9íÍtÁéèûßOÃÓš·º_Öÿ—Þ~4þÔ߯~(~Ý~%Ô¬>3üVøO?‡¿e¿ÙÖὯ¨usñoö´Oª¯Å„ßβû&Û/ì¹4¸öÏ9¸†îQÁ½lUІY_—ÒÆWÃâqë RxœÆ‹¢¥O.æJ(i4ë?ŠŸ±ËÉ>Œÿ~ |&ºš]D\,wƒ^ºÖíÒ;[_ìØld7rÝzÜCOK+ÃJ….G,m>kTœïûŒE¾7/¾êûh®ãöyL)ÇŸ’ 7‚æ´¥$îúóNNëñ¿£—û)ø_ÇÞ1øñ/]‹ö¤øáàKy?lOø(™gá?øoög¸ðî‹cáßÛ¿öŽðíŒ:|Þ9ýž¼gâ™üû=* »¹uj’½ýÅÑ…à´6övÿS•ðž[˜eÙ~2¶¤êWÁá¥)*Õcv¨BAJËH¤ìãw­´g ÝKÉGDå-”[øŸ]^û]}ö=³ÁŸ³×ˆ|¤^h^ý²i»M:ÿÅ~;ñ­ÌSh²㿈>$xãÄ_|Wt²Ü~ÊÒJ‘^ø«Å:Íí½¢"ÊÚâ;+EŽÚc_N\–Ió:[Q„?WHS‚„tç飿õ|×n*õz>­í®ík}ïúéd^ý•üQâ¯|#’óÆ~,Ô¼q®èß¿h¿ÂY¬éþÒõkEøgûD|Røsák­ZÏÁ¾ð·…ÓRÂþÑ­¯¦Ñü;¤ÚÞ]A-á²I§“w羞3Æ`éG’ ‘„bå)I~î ÝÊíݾ¾ŠËBÕÜSwo[»+oä’¾÷Óï>ŒáÓ××ðÿ=ë’/Eø?Oéz»Ûf¿ý`ã8½ ˜ÎÑ]Q{?Gý/ ùŒãØãÓH°ãöÈøÝÿfÏû(÷ÿª§ûh}?x}WðÏÞrúó˜ëÿoàôòûµÛCÕËv­ëý¸÷þ˜ÇoóÓòôþa¿ƒéýY~ ݵ~÷“qôÉçîÁÇlqÓA9Žrº¡·õ÷õûïòZ‰ìÿÎߟËî±ñ·ì¿cŸÙLŽ?ã>Ëáw…Xvÿ¼pT”`ämþöáêŸìøU}èQ_ù"º¶·ÓMï»»û_˜fð÷ê»m9?;ß}þ}{4Œo[­?Ĺ÷Ûè¹9<íî͆Û_®dÕ5ƒ½ÞŽß—åÝmÒ÷?3Í©ü嵞—Õë­·[ß•ØôŸØ“þLÏöFÿ³bøü5^ã¿áý1šÿ7³ßùg_ö6Ìõ2·ç·ÌþŽÃºá¿ëÅ_áÅú+mú7ïQñ×éÓŒØ ð9ç'Ž™i¯ØçÛ¥µ[¿V|oû(øÎ=+À´ö“i·ýµà¢ÃˆÇ›ÿý¦%Æ£~^¸Æs_Ñ–¼G dU­~|-}£åÛþµ¤£ENŸ5ž²©²óò_Þ·Oåüî}3ÿ /ùíßûç×þºr;:ÿ{_QýŽÿ—ðÿƒÿ ýíQ¯ÕüŸ^óæû´WékÏì7t/~OxE×ÇïÛ|Œœù¿µçÇ7søÿ%?öÑÿ³˜ð·þ±×ì›ì?—ç_W—ºRõŸþ—#–>›®ÒB€ ( € ( € ( € ( € (ÿÖþþ( € ( € ( € ( € ( €>H°ÿ“ÈøÜ3ŒþÌß²·üÕ?Û?§×¶?"M~ã¶' ôç1ÿÓ˜?ëiy+ØõrÝ«zÃò‘ï秇#¶Àcéœüz/]:ÿ^Iüߥ›G¦H{ã¦9þ]OqÆW¿AšéƒÖÖו×ëåÕñï…ÿdïx/Á¾øáŸÚëö•Òü#à h^ð¶’4?Ù:ðé^ðΕk¢hºx¿Ô¿e»½V÷ìZe­°»Ôo.ï®L^}ÝÔ× ò·êØO¸¯ BŠË ©ÅB?ì•%¢VWæÄ½lž½vìåãWɰUÛuaRW¾Ó’»oÉ-nÛòég¤|“ã‡ìÇñ‡Cð.£âß~Ô_´‹u Ýéúö±áð×ì¡6©âïé×+'‹ô =¯ìÅeöoË ý¶ëÂà\Ù_ø†ÎÇG¾·ŽÓTžöÓéòßøÞcMâ2º1•ãŸÙðå§Q«S©QJ¬ïKžÊ¥š”ay)6”O"¿dUÓu0õ§Ý}b²mh®¬ã«[k¥ìí{쀚7¼7ð7à߆þë·>(øoáÏ…ü=à^Ü[^_ë¾м)¤é^ÕïîlìtËIïõ ÒÎæöK}7N…®¤—˰²P-¢ü£Z¾#Œ¯‰Š§‰­ŠÄUÄB)Æ1­V¬êUŒSrj*si.iY$¯ÐúxF4éÆ¿$aEï¤cÊú»/5ÖÊèöè?ÓƒÛŽ¸÷Á#nIe¢Õìµëëk÷óó¾—€=øíÇNç§jé‹ÙôÿƒëmÖ÷·ž·ñ«ö¥ø7}ñ?öêñ>£añŸâ¯Âyü?û-~Î,->]C«›ÿ‹Ÿµœ¢}M~(ü&ø˜ë=‘´ dt§ÒÓd÷"â§0¼]sÀåx˜eÕñù},]z¬zÃÔ©ˆÌ(º*tò×5àñ¸jsnP‹æ«²-©´¥>oθֆ®7,ž' ó§Bº¥)ÔÄÃÙóU¤åhЯF/™Æ:ÍI®_vÉÍKÒ<)û1xñÄ{mïÚ¦ wáÏØ¿åçÒ_Ù`@êT¶3‚ÙËû¬®†Zùyp¡­ýÚøîû{تm¥Úîã{*x7Ël-8öåž'ðæÄ>žvÛ}Hø}ð›Ä l/„Së¿þ0ühM_ödý¨¢¶³ø¡¥| Ó­ü<Ö?c†’ãFoƒ¿¾]K.¤.;Á¯]kp$v–ßÙÐØÈn¥ºõ8’––U‡ö4ù±°æ\õ%µ öÒr•·ï÷Ù³ìò¨S‡?³Š‹åW³“¾½y§&ŸõÙËÃþþÃ>6Ð~(xÂÓö—ý¤¼ ‰¿jïÛw[ x:OÙýü3¤]j?¶WÇ{«øôƒãÙûÅþ$÷7Ò\^²j~#Ô¤Ž{©R £´X-à÷2ßø« À`r|¾YbÁà°xXPUòê5ëZ­x‰¹Õœ¹¥ûÊÒkMŠÉ._z–&­*|qåR©kÓ§'­I¶ýè¶ÝÛÝí¦©$Oð/ö+Õþ#ø+[ñˆ?lÚ¥/ôÿŒ´O-×N‹ö_¶€èŸ h‰¿ |4òG7ìËpÏ'‡<¥>©:ʱÝjmwu½¬2¥¬½_xÒœ¢”ò†*.òœ5ùªÐ¥V[5¢”š[[™É¤çk]ï(|R_Â¥d”—À­¦ŸkNÜ×?Fþ|Ñ>ü2Ѿè"ñg‹¬4kÇ"›ÄÞ8ŸA¹ñVµ­|Bñ牾#x’ÿU ø{žW›Ä>+Õ>Ë•áÝ.ÒÚÅmm– ÂóKùÖg™âsŒÇšã5‰ÆÖuëû~Ê›¨ÔTœ)ÞJš•¯ËÅ7h¥‘Í&äå'ni¹9Z)+Éë·*ѽši¾‡²úv?çëþ}:Öz­7¶žzüýn®`8vöï×ïü‡OÅ@ùº`õÚÿ~Ÿ¯á~šjþcÓžA‡8Èç=&º`ìýàëýz´íxúÿ-žþŸqóOí£ÿ&wûXÿÙ´|vëÛþ-wй}ã·9Uù»°¿Æ¥ÿ_iÿék§ÝÓîÐÍìõ¶ëOÖ÷WÝõßHÝû¿MþŸç§óéÖ´†ÝM;oýmmP›oÑꮼõ{6´NÖ´¾tøÿ%?öÑÿ³˜ð·lÍ~ɽ»~¿SŒ×Öå¿ît½jt·ü¼—NŸ«µÎjŸùyô]u¿õ¶ÇÓuÜ@P@P@P@P@P@ÿ×þþ( € ( € ( € ( € ( €>H°ÿ“ÈøÝÿfÍû(ÿêÒý³ûw¼Ž3œç+øOŒŸïýyÌô¼ëe¿ oXíÞ½û|Ñï‹þG¯Ê:à^ügøä^‹ü¿áöòù%k‘"Ÿ×¯áøŒŒžGa×W¦/gùéþz®ÿ5kØ ??\{öì:ã9=;ã8^˜»4ÿÿ­='’ëøßô³z.‰«ÞúØ=\`p;c#g<ôëÕ¯ÓËçÙïøyéb?;>ûß{ëwÕZÊÿqò—‚ü(ŒŸ îÙ¾|fÔ¼AãOƒSVÇÂßooi¯ØçÛ_Ïï¹Õ~ÉÜ|.ñOýœ·íŸÿ­‰ñÛ·åõü+æ±÷Ðßý×ÿ¨xt¿Ïoºç¹Ÿ¬¿ô¦²oü’ßÙËþÚGëÿ‹ñß±äý9âºk¿ÞÅ_þa°?ö:vÕžñQÙ÷æ’íö¤ü×¥Þúkv}/ÿÖ=>¸Ï×>‡°ÈÍw^}¯øÒ/òÛ{zÛÒÝ×]‰Tñê}½nÝ3Ž~¹9ºbî¿O%Ӧ׷ãwtÌäµüý_ß¾¯OM-aãÓéÿêÇ=zgð8šé‹Ñ?Ÿõ¥¿—BG~½;öìàvàäôä“kª6Ó·–ŸçeÝ_m5æŸÛGþLëö±ôÿ†iøìóx«ùG\g?úk» 툣ÿ_iú|k~Ûï¯m/r%¥üÓüµëÓå½ìùO¦W§ùéÛÓCÈ÷ÎkH=mfúî¿&Öº÷ù;&/–¾Žêï¾½u‹JÍÝégç_€_òSÿmû9 ë~ɵõÙgû/ZŸúrG5Oü¿%ëùýçÓ•Þ@P@P@P@P@P@ÿÐþþ( € ( € ( € ( € ( €>H°ÿ“ÈøÜ?êÙ¿eýZ¶Néë_„xÉþóÃûÿ1ÿÒð[ê—çåØõòφ¿­>—þø{ÙÚÚYÙKÞÇ_óǸã$réÓ WãP}?ÏþeþI]¹z$ƒñŽ¿NßÈ×Lžú¿â¾@Iø{ûS÷ON1Û ÝQn×ëò}:ÿÁ¾÷ÓaKgý_Õµ°¿ýnŸ¨wŽ€˜ñÎÞ˜½Ÿâ×ã­úk׿ìgóêõµûZéôo[ݵ¢ÖË›Ìþ/|4²ø³àMSÂ3j7­çiú÷ƒü[§Åš·‚|sáËØµxÇIYv«ÞhÝ¥ÛÙÈ~Ë«X­Þ‹¨¬úf£{¾†»¡ZR‹N5)·îÕ¥5ËR”š{N-Ç™sr»Ij”‰’ºk_&µ|Ú5ªz»¯?Mmïß/~%x>I|I§[øâ7ƒµ‹ÏüV𥴲K†þ hQZ¾© „“~þçúõæ›âïj“Vð_ˆ¼?©È±Iy$QuÖ¢¨U´‘UhTòò”ïÈåÑN-JD¯ËV2ŽÊÑ”î¼Ó³^›Otû5}ÑìßážáÓ·Ààœc± àúKÓ¯®¶ø³ûXü8øµãoÛ¿_ÔþüPðÃÏì?Ùcöy‹R·ñ¯Â_|M:¬—ßÿk¶’Ê]ã?Â옭’Öh§ŽhµYnüõhg±HdYô¯“噊ʱXÚ™Œ+a1Y”h,3 †¦ãVžVæ«B¾_‹”äýšP•*´œbæ­.fãù¯`°˜œ~SWñJthbU?«Ö¥J6JN~ÑTÃVro•(òN .tÔ”“¤øOáWíŽD^Oíû4¡ÇwìñFlãŽvþÛÐ/å„Ï'¾ÿ*˲ËG•ãל±Xiþ+Io¥’ô³LàÁap~êÖoÞU迺ØXô[j–ÏtPøqáŽ>ý°¾Éñwâ‡Âˆv—Ÿ³'íC…Â>0øGs¥OÅ?ØàßË«Üø£ö€øÕ¹ÔrZ¥¤v~–Éถyõº‰-=N$ÃÑÃåXwJUeϧi(;ráñr›¾»»í×W/µÊ)BŸ´äsÖ*üí7¿”cÕ=Öº|6÷¹_ÙkQý¤,|ð§†âý¯oHÐ|Oû=ü@ñ¦¿emá¯Û—öˆðxŸJý§ü§ê—òiRj Ö¾Ò#µŠê;#Ã[5íÇ¿—ðN3À`3 â1pž'„¨ácÈœhR…£ÍJR×—«}¬®™ê{J‰É%)J×ÿþ¶ûîzÇÃφ¿´ïÃMPðî‡ûB~Ï7vZ޾(|@š]WöXø5Òjÿþ%ø·âŸˆ-¢{OÛÊ!¦Ùkþ2Ôì´hžº·Ò-ìa½ºÔ/#¸¾ºô§À8iÍIâqwP§NÉQ³Ti”Á~g§+7Õ¤¢ù§QihîÞßÍ+µ¿¿ùvýœ|}âï‰? ÿá!ñäÞºñ^™ñ'ã—ÃýRÿÁú«á¿jcá/ÆÿˆŸ ,5;ÃÚ׉üg©iYÒüe©Ýé÷^(ÖþÏurße‰WóüÓ»2Å` ç8á§)NÎzÓŒ¤ß*µ“VQì›ÒòÖ ¸¦ôoGen­o}Óÿ€¶G»©÷ôÇ|ãõö뎤g¹àú}ßוûõoPšüÝü¿M}/²vmƒƒýqžŸŸ?ç“]0×ëù/—¡˜ïÌ~<¦FN8¹Î0 5Ó¢üõéÛðíw~×|ÓûhÿÉ~Ö?ömÓþ-wйtè¹Ê›» ÿ}A®•iy}µnÿ—ßbeú?Ÿè÷}}.ÙôÂútôéü<žS’1Ö´O—õºÛ×îØŸøw¿]vÙ_^ŽÊÉÚI)|íð þJí£ÿg1áoýc¯Ù6¾Ç,ÿr¥ëSÿNHæ©ñ¿—ä»^þ½wÓcéÊï ( € ( € ( € ( € ( €?ÿÑþþ( € ( € ( € ( € ( €>H°ÿ“ÈøÝÿfÏû(ÿêÓý³ûwô<Ž9ÆWðo´Äpÿýy̼¾Þ Óó¿k6_+Ú·ý¹ÿ·uèïkiºKKžõôöôü2qß±Ç×¾_ÆbìÓûÿ­}wé×CÒz?éžÏMþòAŒqϸéøþ™ã¦yÍuC×úÓð^½$‰N:qÀõ÷ã¦FI郌×Lëú·é¯®þ®#×Nÿ×—ç÷ÿ<2lqÓ¡9ÈjéƒÓÓúºß{wù+?Ç^Ž÷¿Ïwµ´¿mb(í×òüð¯¦AíŒq‘»ª-5óÿƒßÏùŸª»A÷[úoí>öµÕïäÏ”þ/†ø)ñLý¤tå1x/P³Ò¼ ûFYÄ1> †îuðoÅù"\wð«VÔ®l¼Yw…-ð¿Ä:ö©¨Kr¾Ðm"ö0ë4ž ÿ9UÁ½ß´·ï0÷ºmb!髯ßB ]VŸ6rÑótÚV¿ÊOtÝûtÞÖ÷~²FªÊC+ÊÃXòG† 7 sœmŒ^Ú;þ?­î´·ž¶ØgåoÆ]l?nïÙ–íß²ßìà8¢ø³û[í+ò‘Í·—;°ÎЦ/°òÊh¿ù_3ijµ…<ªÞ]mi5{Ù]è~cÇuÕ,Ç(¦ßñ0ø­:{µ)tÑ6ïewò“iKéê™ò²ÝBžHàÿ¸ôÇ*£÷9>*ü¾óéµ·ß¾öïëxÙ^¯Â¯®Ÿkççeþ],ùn½×Ú?k|ä/ìÓûVÇø£û vààp}âgu{œAWÚe8}vÆÓ} NÚí¢¶ûi}ã÷™4ù½§ø"ü÷ÛnÚoÓ­ýßÿdß /Àl|òžGí«ÿyãÌÿ‚€þÓ2ûqû̌ק _ÐÜ—{~Èjòssåôï½®»¾Þ]úØúJT\àšï*ší¯´—ŸK_oG­§á?óóÏýò^éüç5õÙôïñþ¿/¾æŸWwÛóÙüÞ¾W^wÒG[û]}·à-ÕÞâßiøÿû`Ï“ÎwþןÛ'ŒsŸqÎ3ÎWùkŒ¡ì¸§:§krc\l¼¡OOëשÉk9+m9ÇÏâw¶½>OÉ´}t?Ï®:z`àžœö|(½Vö¿Ïõíò%­6ÖÖÛ«ß®­½¬·ÓKûÒÿŸò+¦_ëüÕ·Þ϶—2úôïØ³Ûƒ“Ó’NA®ˆ>Ÿ×é·]~NÀ|ÓûhÉ~Ö>ŸðÍ?¿õWx¯ã¿ãòÝò÷á_ï©/ú{O·ó¯ŸáӥХ³ô—õý$}.8ç¯N˜ävôè9G±ÿ“ÈøÝÿfÍû(÷ÿª§ûgóï®~ šüÆ÷Žÿ¯9—þ—‚þ¶û_+Ú·¬:_¤´òOÒÞ#ÞÿýcÛß}ã·]£æüb;.½ÿ¯?ø=ON_ðþ¾o]t¿Îúl=zw¯N1Û‡R1ƒ])íùþ«ý»æÕã$‹ßóéÓOn1îrª/]:ÿ^Iüߥ›@?ÿ¬:óǧNÀòG\×L¶éý~~›é¦¦rVÿ;]+ôkݾ«}^Ú½Œÿ¦1Ÿ›I€ÏN>n˜>ŸéøwéÒÄüþínÝ´½Òw׺ò[¯ì,uk Ý/T³¶Ô4ÝJÒãOÔtûØ"»³¾±¼…íï,îí¦V†âÖæ $‚â PÇ,O$n…ªœšiŸÊ-I4ìÓ_i5fši[[®–¸>¿>û¦ö{ßu¢¼´>kø ¨_ü7×õ¿ÙÄ÷—7RøJ_ü×5å¹¼ñÀÙ/#Ótû ‹É÷Iâ_„:•ůÃÿ¼’K{u¡IðûÅZœïã „‹ÖÄZ¼#‚KÚÉâ§Š·4š]!^7« Y){Zi(ÓŠ3Z7ÓgÝzõièü¬úŸ~Ö>-x×öðñ©ðÇâ€> öWýžáÔ­¼mð—ÄŸF©-ïůÚÅ­æ²—@øÑð”é1Ú%´Ñϱê¯vgFI¬Ò#¸Î®O•æ1ʱXéæJ¶™B‚Áâðøjrj9cŸ¶l2sŸîãÉ*u)YsóBJI›ñÆ ‰Çå5q/§F†%Sú½zt¢ÔêRçUL5w&ùcÉÉ85ï§)•áO…_¶1hÿÙ¢3„aæþÇße< d…ý· SÛžWøE~•eùbPåxõ¶²Åa¦ô[é¥ÞÖÿ õG ƒ÷y~³}5•zR¿Éa£}—ÚÓek·/LøwáŽ>ý°~Éñwâˆv·Ÿ³'í>š  þø¿á%Ε<ÿc“&¯uâÚãTzäWQÉk¤6–~–ÁàžK‹An£Ž×Òâl=>S†tYsã©§í%kañZ4õwëÛ­Ï´Ê)BŸ´äsÖ*üòR{ùBë{óÛ¶´¾Xø3û|sñv›ñgÅ^ ýªô?øoÄŸµ·íůé¾½ø uâ«û[öÌøóyk'ˆ"øÍá¸õ0oî.¦ŠeÐôò‘J°âyåû܇ÆÌÓ…òl¯ ÃðÇ cèåø,4!‹ÇÇ7xºÊ½5‰n·Õ³L=ÂUÝ8rQ‚Tá;Êò—ÔPÇJ…ÒX|-ET|õ!]͹T”õåÄÂ-®f—*ж­I¶{ü0í7ÿG«áŸüFkßÓþ2ûv­{QúDçOõ7ƒ×ýÃÏ_þö—çù7¿´ê[ý×¢ëOÿÍšwߢש÷/ìÇð[RýŸ~ h? µŸ¯ÄMcNñ'ÄßjÞ1OÿÂ'±ªüLø£ã/‰ú‰‡Ã¿Ûž&:e½…ߌfÒ­ã}sPy¡²ŽêIQæh`üo=Îjqs˜ç5°Ø|%LÇ,Lð¸_kõz2šW§EV©Rª¥Z ¥j³KIU“÷6rsœæã¹ÊRjœ\cg´y§'dޜӓ·Wö½÷ÿ­ÿëôéŸn8^·eå·Ëú]µÛ¼§üßü3ß}îí§÷¬ÉG vãüõÈý>™ÅuEìý?®¿—Þd÷cÇùçõžþã¾ ®˜=´ù/øu鿭К¿mù3¯ÚÇþÍ£ã±ßñk|SÈ}ã·]£æî¿öŠ_÷´ûéïÇÍ?×ÍY‰ìýô¿§^qÃLñÀÇË’IàWH;?_ø:ÿ^­;^0úùoó×Ó]u³²IhÒRùßàü”ïÛGþÎcÂßúÇ_²m}¦UþãGÖ¯þ™ÍSã/ÉzÿÁòØúr½€ ( € ( € ( € ( € (ÿÓþþ( € ( € ( € ( € ( €>H°ÿ“ÈøßÿfÍû(ÿêÒý´;wô<ô=ñ_‚xÑþóÃßõç3ÿÒð:~+§–—=|¯jÞ°ÿÛºôwµ´z®›KÞ‡øtǾ3Ç~0qùù‡oëú^šß­VJÿ/éÛW·eé¯-äàpxçéŽGlAÈèrxô9ÍtÃn¿Öëõ[~–dƒ®;qŒc¯aßäp 5ÕuømåúÁ]xý=ˆäݺ œ€qÎCWL^ϯšüÖ»ÁVºDÉu¶½ÿ®½¼ú«ÚKùúã—ð¯pOLvþ.˜½~çë{­?=‘u¶½þÿµ-“µ–÷ëouÃÿÕÐgØ1ƒÇ'lšêƒ³õý|÷íùÝZÒ_ŸÏ~÷Õ¶ú[ðØðÏŽÿ|AâÍEñÃŵ‹âÿÂUükðÎ[¹Åž³}¤¶^!øy®^m& |JðôמÕ¥I‘{s£x²$Õ<1¥ºzX*ѧ9S««×³­k¶•ï ±Zûôgj‘øyìéÝFsdIu[­WùoÕi»¶ú´ÎÝwâLJ~#þÖz—<7%ÔZW‹¿d¿ÙÆálµE–¯£j¶ÿk½3[ðÞ¹bå¤Ó¼EárßQðÿˆ4¹ÜiºÖa$nð;¢Æ×x¹n£\ÓÄfmrËIòÃ)p”^ŠQœf¥M§ïÂI¤õ?1ã¬B†a”FÿÄÃâš_á«EZ×Wwvµú|2>Êð~§Ÿ,ç¨ Î}pÇcñá˜(¾ß(Å|:ùõëÿo=¼íç-,y¸*¯Mu_Þ駪I~;{¶´mÉuöŸÚßà_;¶~Í?µf9ƒñCö2‡/û#9 cæogˆêûL£厧ég‡Åv¾š4µô¾§ÞdÓæöŸà‹¿ÏM,º]nïkéc¨ý“?ä—x¨v´¯íÿ­‡ñÛüôíÛŠù\Sýí>¿ì¸ëþŇôü¾ãÜ[|ßæ}3üÿŸòþ•z­íëùýÚÞö^—‹óõÿÓüó]0zõû¿à«o½ŸÊâû­²ým÷Vò{çØ”Œç‘žœœc ÓÒÿð};?êÏx¯Åè¶·žú§å²ON·?>"~Ùžð·Âø+ïÃGö£ð§‡¿hok?´ô¿~ßücд¯Œ^Ó4/سáçŽ|//ÃOÜø‚߯VZWˆ?á!ñV‹'‡tÕ‚×R]_V´e™/.ߣ†”§–Tötf¨{iû&éIËRö“QämÇ–2ærº²i_ÞÂnÜú¤Ó•µÙ(ßEÙjô]÷<ïÇ?ÿg¯ˆº%ŽüU㿃l­?aŒ~-µ¿ý³~8~Ö>Õ>ß~Ô–¿þ:üI×~&|~³ð׎~øgÃ:‰<'¦ü\ð]¥÷ÂÆð?ˆ!ñÆ·¬Y[xoÄ2A×J¬ŒcVWÅÓû5,;ST9©ABãQɦé·jœñ´mÍ)Õy->Õúë«Qk~‹¦êö—غ×Å9>2üÿ‚Àøßãø›ðU|9㯠üñ>‡âˆ|Yðö};Cý†~Ï㻆úµ…åöƒy Ú|GÕ|Ke®Þxry4±ãëé—RK× µ#J˜Ê[ÁÔŒ—,®ñ3åçO^nE­Ÿ/+²\®Eî§Û£½ÖÚÛÑèü÷ØýxýÐtãð8 žœääÊ6Ó·–ŸçeÝ_m5¿Nß…ú>ÚoØùßàü”ßÛCþÎcÂÿúÇ_²oÓù}q_k”ÿ¸Òõ©·ý|—õ×ô%Oü¿%ëùú\úv½"€ ( € ( € ( € ( € (ÿÔþþ( € ( € ( € ( € ( €>G±ÿ“ÈøÝÿfÍû(þð´ÿlìÿU~ãWûÏÿלËÿKÁióØõò½«zÃ¥úKO$ý-èÒ=ïÿÖ8Î=ûtƒÓÚ+ñX½z?Ãå×Ñ÷òµW¿ãm.öµýäÞïñI·h¨íøqÇn1Ó'ÐýÞI褕êƒ××oêëׯãbïú¿Ÿ]ý~òL÷éß§#üON§±®˜>šúÿ_«û„H9è1øzwà`'޵Ó¥»1=Ÿõù§ù}Âÿúý1âÆ3èÇ=@ùº`ôóþ¿®¿-ŒûýÛ^íÛKìß^½Õž‘Qùt÷ü?Ÿ›¸ÅuEéýk÷¾º=Ý·ÖÈO®½ú~w{=ï¼tÖZÛ¾qéøöÇ•tÅ«®›~¿—÷¿&'úúëë­ßü:µì~h|Qÿ‚|ø“\ý£ükûC|øÕῆ2üEðσ4¿xÅß5‰ºx“Â:¯Ž/çñW†ît_Œ dðùñBøÑ¥ñ.ŽmumC^²»ñ'Ÿ¡¯êâ}1˜l&g,ªxÏ®*¹L±ŸW–BŒjÓŬ/îëªØ,S—²xd©¸J…GN|ЄT>W?áL&‰Áb«â±8yà£Z0T=—-EYÒrçö“ÓÙéÊã~is_N^ÿKý›ÿjm( ‡öŒø&ͤy¿²—Ä>vñóÿlÉ9éÏÝÛ—èðxì.•C ‹Ÿ/YchôÙ¾L½uòÓ¥·1£ÁøZ6¶3+4i~6ZÿVåÕž•ðçàoŽâ÷‡þ*|Rø³ðóƣ þ"|:Ð4|ñ/ÃRÄü'ñ.©¬jú§ˆ~9üXÿ`ÿ…O§XØØYéºQÿ‰µíÍÍôÞLW¡ŽÎcŽÂà °Ò¤£Z”åˆöÍòB¬9,°ôR¿´æ½ßÎï“ÚÁå°ÀsrTE$“çKK;­’óõòÕF§…¾þÒ_"ñ.‰àŽí|%¬|Kø¹ñI²ñ‡ì×ãßø‹Jÿ…µñOÆ?/ôGÄ/íQàÍ3X:>§ã+Í2ÏP¶ð¶‰ö›;Y&²YÌ®Øýc W’U0õÜãJ…'ÉŠ§ËØQ…%„œ£Ì ›\òÕ»IY#³UÕnúw×Ïúí±ÕÿÂ-ûbÑvýšñþ(ÿôgÖЩƒ²ÿgÅyµÒÿæ%·ª×ºø{ºÿÀ[ü¥뵟2ÿÂ-ûbÑvýšñþ(žñ4;ð;{“ü=1žGì1?øUKÿ™å÷“ik¬uê ÿù5µû¿+ü"ß¶'ýoÙ§¶?ãþ)~Ÿñ™äñÓÃ]žë÷m§Öiïÿ„©~?~¡iw_°þw÷ök¿ÍÞÌpð·í‰ž>;~Í?ø‰ßŽ?ømŽ0:œúàéŒð·V£ˆÿˆ;|¾ªŸÝéu©-JÚÊ?ø ®ïß™­îínïFJ<-ûbçþK·ìÓø~Éß³ÿ­¡úqž¹ÅtBxoùõ]Üznÿùl¶ÿƒ©Ÿ½Ý}Ç ñ/àŸíSñOá—þkßg;Ãü ⿇ºÍÎ…û(üG´Õl4?ø~ûÚÆ-ç퉨ØZê¶Œ¯§Iu¦ßÙÃs-=¥Íº=¼½tjЄã5J·4%®jði¸»ê¾¯{]+ëv´ÒÈÞ—ZùÃ~qö—æ;zŽ8ã'Ó'=&œ|¾]¿º»ÚÖ`öôü»^«cç€?òS¿mû9 ÿë~É¿Oþ·¿Zû|£ý—øªéÉz]÷8ê|oåù/_ø>[Nצ@P@P@P@P@P@ÿÕþþ( € ( € ( € ( € ( €?;~4übðçìùñ—ö¶øÙâí3Ä߆¾~Ç_³?Œ5­–úU߉uK#âgíŸ4¶¶¹¬xF›Sº;a´]S\Òl<×Su¨ZÂeü3ÅÌLÃ4áLÓ§S³ 0g%J2•L R›„*O•u儤úE¶‘êå²P†"oUGek½%¢¾—ºÓÏKÆ÷=wÀ¿#×ůÄ߇.ý›õóg­ë:…~6xÓö~ø¯Ã¶Ón|_âï[| øÕñjÍü5áíêZ¥î˜ú4š¶™%åºZj—Oøµ\"O ˆ£˜Óæ§ ÕÁQÌ=:µ\•*3x¼^Ò·$Ý8ÂæQ’‹nåõ”¯ñ'‹I8ݤ½æ”fî£Õ&¼“µÌ}?öºý›®ÃQkþxNïÇž3×üðÚÃÄŸ>Ãyñ;Uðÿˆí¼*dø•ãmY|Jšž¯¨i–:m¬Ëâ8.uÍKÖ´-#\¿‹J®Ïì¼Å{G.ª¡FlK§…Årá£R›«jîTbéòÆ2no÷o’S…IB<ÆnpÑóEs6—½y§gkJÎí¥¦×µ–Òæ| ûzþÆßþÛ_³ÄK?‡)Ƈþ ñwÅveð'Âïˆ^;ð/…¾+x…‰¼Eá=4ÏËâkcQþÚÖü+®[hŸÙ©}ý¨l¥†ØµÜWðzu2¼}_ýšµZxk{\E Uªaáû¸UmÖäå,*AÏ›—–÷qI¦Jœt÷—3ZEü[Úûݶ֖[öû^©ñ+ãïÀ¯ƒ~Óþ0üiøMð¢ÿÅ2øZÃâOÄ_xóÄ­my¦é÷+ ZøŸXÒçÖ>Í}¬iW?Ùésä]jšu¼Û$¼¶W0¸\V&2x|6#©ÿÐ¥Vª‚’”—;„eËt¥k½T_½¤œIJ1ÝÅv¼’¾«ev´º_>¶r9tý¯¿dæðŽãñûO~ÏgÀ¾"ñ5ç‚ü?ã1ñ£áÁð¦»âý65›Pð¦âøI²5?XA,wš-äú•µ´‘ÜM‰ÑÛ¶8 ´tžíaRt¾¯YÔ)]*’"’ƒé&¬öæz“Ï _š6nÉó+_·fÊ>(ý´c¿ë÷‡¼iûWþÍ~×ü-6¥oâmÄßþhZ¿‡®4]Itmf wMÔüMiy¤K£jÅ4Ý^;ømßK¿e´¾ÓF饀ÇÕŒ*SÀã'Š.†´Ô”—4\$©¥.x«ÆßuWZƒœóŠõ”QÖê?´§ìé£x³ÄþÖ>>|Ò¼sàŸ j>8ñ—ƒ5/Š>°ñW„ü£èv¾&Õü]â_Ýk°êÚ…ôŸ ÞÙx‡S×µ[[]/OÐïm5{»¸tû¨.%Úž(B¤p؉R«5N#F£„êJN„eÊã)¹'X¾g$Ò»‹DÉÃ_z<É_ut·¿å¿äÍüvø!ãû-+RðÆO…^5Óµß^ü<Ðïü%ñ Â^#²Ö<¦øjûÆšô«W¼ƒQñ}‡ƒ´ÍGÅ—ž³yµ›o é÷ÚôÖI¥Z\]E¿Õñ¤ÕJ©¸ÁU’*‘q¤æ©Æ£MFÐu$ ¦×/3QM6”²º{5½·ëÛ×Gúžo­~×_ ô üZñUæŸãÓ~ þÐ~ýš¼X–úV–÷w?>#ë¿<= ^è‘>»W~†û㇃ßRÔ.ç±Ô-­­uÙ-ô›¹,ì¢Ô;ia*¹R‚•4ëáªbâÜ¥eN”kÊIûšNØyÙ%$ýÔäîåÉ+ï£KæíÝ®ë¯ÞtþÖÿ²­º|Cy¿i€1'Â+û-+â£?Ƈª¿ µMKÄ7Ó´ÏçÄ8ðýÿ‹-.ü+cg®}†æïÄÖ—^‚'Õí¦´NˆaqMSÿfÄ^µÝ/ÜÔýêQænç¿hZm®ouó6“\ÉÊ?Í4z¥o×ëDrþý·ÿeŸüUÿ…7£|møfÞ5Õ4Ïë^¶—‾Ïñ‹Iø‹áÍCÅ^Ô~ ˆøãáïˆ~ üxðŠ<-ÿ ×â ÕõŸŠ¼aâ h¾<‡[ÓŸÀ~%³ð?O†|Wwàm3M—òº\IìñoÉ™bï†ö|¹¶mý¡Rèb)ã05éUúŽ’\]ugC’^Þœ«Ñö”£^s‡¢èû¼·„]ïûº|—¼\%ïÎíÆÿkM%É&’:öñw¾(üEñÿÃÿ^ Ó´ZÌðüTðÇŒ>iþ;eð5§íñG㿇|?ðÆ}Oâ%Ÿ‡<â3ñ‡Æ~ñˆ¼Mà߈Z/ˆR×Á>#¶ðn…{àØ¬µM{F¾B¾´§‚‚xj”±Î‡ïÞ ‚©S£‡•Z°oJ­:tëÐ;Öƒ«R5œ¢½‹NRS^ô¯%ÉuÍÍ(êÔ~)+´î¬ìš´ü{â—ü÷âÇÄ ľÒj­ÃþÕüCñû]Ñô©>üI€é ñŸö–ñ/í5§C® ûQ|>°ø7‡üCây|+¬Áãý7\ðf¹¢hú>§áøį¯jº÷£„â<5R«,¶u*Bž—Ö°þÿÔòúyt¹l¿* p¦ªÁМ+Bs”jÖ­O’ÎT¤î•K&åö^œÓrÖÒ÷·¶©­4I¶Î‚ëþ ‘ã}þýÅ´7ƒî<ðïã¿Æ¯? 4ÿ üºðÇŒlµÏŽ_µ$_´‡ˆ4ø®O:Ö—ã]H‚ã[ðeƒhžð,Ú®¥w x÷ÄI©j^M3UÖCF󥪫WÁàðx‰UÆ{JR†-ú„*P¤°‘•ɨU|ÕkrÅT¡r|Ñíy«)JJѳ÷§ÎÓ|É5ò]ÝÚ\ÞÇû$~ÀP~Ëïüm7Å=_â%†á|<øe§ëWßïµo x'âÄ]âµi¬¿Ä?Ÿ¼u/µx[ÂvOð›áÁ]#R›I¼ÕµM íõ +ÇçO0£K <§RzòŒpª5kQ¡*qö<5n[T«/öœF2K™F3\®SJ—#zßI(üMêÓ³¼Ú»ÛÝKkÛTŽKÀ?ðMkoøróB?aÕ./uÏØW“Tÿ…p–3˜¿cOÛ/Æ¿µÕ½ƒ ñ½ÔŽž:—Æw·Ý*ø2H'ñ|px•¯ŸÃÐuÏ=uª)ýYÅ(f‘åö÷WÌòÊYkÂð½’ªýÛÕº¥xYÊSìÿ½Öòô„ùÕÕö•ûÊÚ>®']ñoþ ÕைZ<5áÿˆ/Ñ´¯~ÉßµìÃ#üK×¾$þÐÚîœik¯„×Þ1_ü_ø£®x¦ãIðÂü)²UøvšÅ–¬M©=Õ¾§¡Ompú†˜Lê­)ÒœèÒ”©fî!Cõ%ˆQ§ìðÔ)Óæ©íßïT9£ËgÏÍxši¥Ì¯A]¹ë>_´å®±ÛEä®™æ´üêO¿~(üWÒþ9êÔ~*kŸ¡Ö|=öŒZo‡£øñ‡àìwðSÅþ½Oƒ?´oÀýKÅz¢^þÇÞñ]Ç‹ïõŸ‡š†ã]k¾*øYâ;Bñe¯vÿ‚RëÞ>øgâÿ—¿´ƒgð‹Y“ã÷‹|= ü¹ºø•¥üTý ÿeO‰ß²ïˆµøíþ.æx£áÖ‡cñƒÆÞ,Ò| iàÏ xškx¼!à[Ÿ‰‹á ^zs¨Ó« RÂÉ×_V„¿Ú-EÑÂãhã!ÒöP¬Þœ%UÔœu*ªJs\²é]5Íey5îëyEÅëÌ“Z·k_¥Ý¹£ü#û4ê°þÞ~?øÝÿÍÿƒþ økáÇ‚‡ƒ<=4¾ øëöŠÕ|,ß¼UñÃÂú>¯jZ÷‡¯|û>xwÀÿ³üRø³Hð½Æ¥a/ˆ­´=mÂËÄZü¼d^]F‡:yÕŸ´—ï9éác?k <å55S:¸ŸÝÊi>G))>Hfãi¾ËÑÝ÷õJÑéø^\Gÿà–ß¼}¬|fñæµ ü)¼øÉñGö®øEûMxã^©ð+ÂÇÄ߇v |8ø©â_Œþýª~]øËůñÉu[_ˆß³·ÅÏŒZ#_Éá½3ö.𦥦Iâkpê?ð˜øÃs[iÞ¶Òt›_MæªR…9aê(CØò¸V§IÞLUE¥<*‚»ÅI>XFÜ©ë+ÊY8´÷MëÒú;w”ŸÙê奶ØÇ׿àzsiz~“àßÚÄúU¦Ÿá/|,kð|a†-Ká7?g_Øç๣xžO‚ß´gÀ«ÿø¥Çìsá½ÿ‹/µ‡:Ô>1×¼=â„úìú^âK.ˆfOyQ‹nQ«§³ºš¯‰¬¥VUþÓ(®UιŒÒ“‰.>véòi.Ž÷Óºü//ÚMH³ðþ‹¤hptýLÓô‹s<—7ÓLµŠÊØMs>ù®&ÖiÉ,›ärK3Wÿù¨ÿ?…?ìL—þ„ù_þa?ù@{Z¿óö§þ/þMÿ]Ãeþ×ÿôuŸ´Ÿþ?‰ÿüÔSþÄÉÿèS–áÿ•‹ÚÔÿŸ“ÿÀåþ}õÝ~²?á³?lú:ÏÚOÿ§Äÿþj?Ïá[Ã%ÉìŸöN[×þ`0¿Ÿ³¸{ZŸóòø¿Ïü½Ãf~Øôu¿´ŸþO‰ý=?äi×éUý”-²¬·ÿpËÿqKòûïîžÒ§üüŸþ/ø?ŸÞ/ü6oíƒÿG]ûJáôøŸÿÍWo§åü5ý‘•г/_÷%†ÿåQþ»o#ÚTþyÿàrølßÛþŽ»ö”ÿÃéñ?ÿš±þ{ ÙYZÿ™nÿðëò§ý|Ø{JŸÏ?üAÿ ›û`ÿÑ×~ÒŸø}>'ÿóV?—åZC+Ë.ÿá;·ýPïÿ^ÅÏ?ç—þ!᳿l!ÿ7_ûJÿáôø ?—Š›üúäí×û3-ÿ¡~ÿ (ò°ç—óËïÃgþØ_ôuÿ´¯þ_Š?üÔçùÓþÍËÿèÿ„´?ùXsËùå÷È?á³ÿl/ú:ÿÚWÿ¯Åþj*¿³ð þ`°‹þå¨þ”ÿ¯¼ÎU&ž“žÿÍ%Ñ›þ˜ÃgþØôuÿ´¯þ_Š?üÔSŽuþÇ„ÝÌ=þVG´ŸóÏÿ—õýy ÿ ¡ûaÿÑØ~Òßø}¾(ÿóQþ èúŽmƒÂÿá=ÿ¸ÿ¯˜¹çüòÿÀ¥ý^aÿ ¡ûbÑØ~ÒØÿ²íñGÿšéùSú–¦ ¿î^ùGòû¾ÑÏ/ç—ß!éûh~ØŸôv´·Qÿ5Ûâ÷[þ¦ƒþ}:Sú¦þ°ÿø"Ÿÿ ÿ?¼9åüòûäQŸðC/xÇâì·ñoÆ>ñg‰|qâÝ_ö˜ñ7ö·Š7ñf¯ûLþØ¿ÚÞ'ñ¯x—Ä:Ÿöíiñ¯L±:޵¬è7š•çØtË+=>Óí7R}žÆÒÖÖ°[Ç4qÆq›a¸¯8¡†Í3!WÉF†7F•;à°²’…8TP\Ó”¤ùb¯)I½nåîa)ÓxznTá&Ô®ÜSoÞ{Þóûiÿ†4ý?èÔÿfßÇàoÃËþEƒüþ™¯•Yþ{ÿC¬Ûÿ8·¶¿óö?w[ëkZ]Ê—üú§ÿ€Gÿ•€ýdŸðÊŸ³hÏýPφü%ò? {õ¶YîyÿCŒ×ÿ8¿þ^Ê—üú§ÿ€Gÿ•’Ã~Çÿôj³o'ø±Ÿ SŒdx_ɰrTàm­#žçoþg£õÌ1?ù|¶¾ëšú/wpöT¿çÕ?ü?ü¬?á?cþŸðʳoþ߆êOüR Œqßž§"¯ûs:ÿ¡¾iÿ‡ _ÿ/—ÞL©RÝR§è£òK;¶ºhûÝòŸðÆŸ²ý—ìÙ×§ü(Ï…ýsŒgþ~?#Ï#8 ZÇ;Îm®o™ÿá~.û××ßüõW”{*vþ6þU}¯ü¯¿}´|¿Oøc?ØÿþOömÿÃðÀôêOüR¹ãŽ=NI!n9Öqùæø_Šƒ©¯õÝÙÓ¿ðéù%ëÙjšÕßkÛkµf/ü1§ìÿF¥û6uéÿ 3á\‘ŒÿÂ1ý=3€µ²Î³ÿµs%ÿsد_ùùËðNKÙS·ðá·ò®×¿Â×^ûoËw(¨ýŒÿcþ?ãÿfÏü1Ÿ òG¿üRÃü}qµ¤s¬çþ†Ù—•ñدþZ·Wèþ;hÎÿ‡OÑB?rº³¾ªú¤×UÊÅÿ†3ýÿèÔ¿fÏ¡øðï¦áþ‡³Zlçô5Ì¿ð»ÿËì©ÿϸvøSé{ü6ÞÊÉé³ÝÈzþƱñãþKölÿÃð¿ó?ñKdcŽüçŒókã7{æ™§×qNÝwöÝW[Ùì­g̽?ä‡þÿðÆ_±÷ýì×ô?þ޽¿äV=0~½Fq¶­fùµÿäi˜ë¦¸ÌKÿÜÍßÒÞ»‡³§üÿÀ"ðÆ_±÷ýìÖ}ÿáEü.ÿåªÁú}O*Ú¬ß5ÿ¡žaÿ…¸—øûW~ý=ÙÓþHàøc/ØûþGökú‹ø^9ôÿ‘XôŸQœm­#›fŸô3Çüñ˜—Õ/ùùÖúmku»Aìéÿ$?ð‡ü1—ì}ÿF£û5Ÿû¡ ùÿËP÷ôüq÷kOí\Óþ†Xÿü+ÄòÀötÿ’øEÿ†2ýèÔf¿ü1 ÿ/ùOèONßìsLÏþ†8çÿsxÖwòÕî›Öær§åŠÿ·cuòÙ¾º5ÛK'#þËö=ÿ£QýšÿðÅü/çÿ-Oä?,Õ¬×3ÿ¡–?]/õ¼FŸùS^¿ÕˆäòÇÿõù~’¯ìeûdÆ(~ÍCð3áû•OLö'ØùvY¦dÿæaŽùc+÷ÿí½µémC’?ɺ$£ö1ý?ã?f¯ü1_ ¹Çô+ñ>þ•qÌó/úcmç‹ÄòÅn¿.ú äòGðƱçý‡ìÕÏýP¯…ß_úÏóüOñký¥˜ÿÐ~7ÿ ±ü°9#ü‘û¢ðƱçý‡ìÕÿ†+áwÿ2Ç×Ô~9­#™fô÷5]üþ=-§[>—i‡$’?tEÿ†0ýèÔ?f¯ü1? ¿§†?Oç’+E˜ãïþýŒ÷3ˆÿå„û:ÉîÅùµµ–—×ä­e ÿ†0ý?èÔ?f¯_ù!_ ‰ã·üŠÞœõÇ×­c˜cšÿ}Å¿?¬Ö×ÿ'_—f÷´OgOù!ÿ€Å_óÒö[]>é‡ü1‡ìyÿF¡û4ÿá‰ø_ÆîXÉǦ§ÉÝqÇã¯þùŠ~¸šÝl·ç}û;y\=5ö"¿íØë×µ–žk¶–RøcØó§ü2‡ìÕÏýP¯…Ä€?îUÈÀç;¹ü­V?Ó‹ÿŠËoû‰ÿ ?ù÷ ¯îÅ_óëemzݧaëû~ÇŸôiÿ³O°ÿ…ð»™ð¹>¼|ßt‘œatŽ?ÿæ3ü¾±UyoÎúytº»v”JœÙŠóå‹}ö\«ÒÖÒI;[šOÿ†/ýŽÿèÓÿfŸü1? ù—ÿæO¯cè3ÿƒëòÂ9#ü‘û¢ö/ýŽò?ãÿfŸÇàOÂïþe³ùŽÙÅkv1ÿÌ^'ÿÕüù¤úuZ½SkHœ‘þHýÑÿ ]ûôÿ†Oýš?ðÄü-–Kî»iÞý¶º\®._Ø¿ö;éÿ û4Ÿ¯ÀŸ…ÝÈgþlûô8ÇÅ\qùýUúÕŸV—>Ïñ|«–=–ÛÙv潬¯þZ4‡Ã~Ç]?á“¿fŸü1? ON¹ÿŠUHÇÆz‘’VµúÅùýWÿHŽUÙ}ÂÿÃ~Ç_ôiß³G^Ÿð¢>ú‘ŒÿÂ/ŸÐóÓ8 ZF½gÿ/jzsËõ’{]´Û½®»…£Ù}ÂÃ~Ç]?á“ÿfŸ©øð··¯üR Œze}NpR¯ÚÕÿŸµ?ð)òÀ´{/¸_øbïØëþ7öhõçàGÂÑøȬ{dþ£8ÛZÆ­K~^ü¾ï‰½<ï她KG²û…±wìuÿFû4øQ yÿËS#œóеR§üüŸþ/ÃU¶ýomz ´{/¸_øbïØëþ7öhÿÃð»ÿ™JÙT–êrÿÀ¥ßÕíëæ¬)E[ü—màoò•—)ÿ ]ûÿѦþÍøb~ÿó)ý?*¸ÔŸüüŸŸ½/ÎêÎ×}oë|µí¿çdÓ^»ì´c×ö-ýŽèÓfƒÿt#án9÷>–ÓÓ#Òµç—óKïiývï·W·ãË{ÿ†,ýŽèÓfü1 9ÇýÊ£¿ÓñäV‘œŸÚ—þýµm¦Ú=ô³Aýmµþ}¿Å•Ãþ³ö9ÿ£Lý™ÿ ?/ù½½>™ÇÍ¢nû¿½þ–×]ƒúÓ·}–ÿt®/ü1gìsÿF›û3ÿáˆøYÿ̱õõiÓ=ÿàÝy¥}ô½…ý_Óüœ–oø-÷Æ¿ ÿjï‡ÞøYðóÀß ñ˜çvÿŸØeªÿ¨ /[·Òëeº{¤{ø?÷j^’ÿÒÙôhü?Ÿøóœ÷öçò+M{^—Þt‹þ{~}‡¿ôv¶ŽßwϳZßeÚ:ôÝD?ÏøŸ©“ŽW¦6šÒ?ןÊûïkZÏvî¹AÏ?Ìò rx+ÉÈ<l¿¯ë[z_MµÛþ ¿GOÎÉIÏ^Ýzð2Ns€xÅiå÷téëmYo¯s?ëm{m¾›¦Þí^÷Çóúõ<ã‚räz Ú­Ã^ÞZuô¾»-SºÑì݃üõã~‡'9Æp8Ím•ïóþ¾zêû»5oÿg[íåkn®µm7kŽü÷úõÇxìxÆÕÇO½ÿK»KÍ ô{/êû^éÛgÕµ%ÿ?çñ­—õçý[0ý?»®ÖÓo+i»»µØå?\~zõãë‘Û8Åiëú~–Ó«v¤¿ëËËåÿIýyÆ}yõõç?7ëZž{ò9ëƒÏ#Ó#+hÿZßÕ>Ï^í¾·µÀ_ÿ_±üûó×±9«_×õ«Õ;i륓ý~¼çž½‡¿8ëÒ¶Zÿ[y=¶ÓÜÿ?ç¯δ‹õÿ†óº²µ®’zk¥‰’þ¾ZtzïmV¶ZÝùÿëõü§jÔÇðòìçüôïïùV±~·õÿ‡»}ö²¶–°)Ïùç“ÏpxàaZ-ÿ¯øŸÝ`þÏã[-¿¯¿þöÚî×ÿ?çŸÇúv«‹é­¿ÒÞ½·Õ¿çÿ­ÛôüÆ0»_ÖÿŸÞ/ÃååóÛ¾Ý^Œ?—çø‘Øÿžõ¬}=uëÑ;¿;ßvµ–·ôûž·]Ë`ëüùçëéõãø­¿¯ëÏïé?êÝ<¾áÏùçñþ«H¿_ó×wïhîüÛºßq_ÖŸ/שüƒÿÁäñþÿÙ³ø;ÿV—Æ_óÿê­úþ¿¯ÌÿÑþÌ¿eÿù'^1çþngöÐïÿW…ñמ™ã<ç·;¿”øÿ^1Îÿëö¶¿ì8UmwÖÞ›­ÏþíKÒ_ú[>Œ#ùsŸþ¿ëq‘_&tŽÿ?_ò~Ÿ© ¤=>ÿ•íß®ÛY'Ð/§rxŽr8ÎON½nÜŒçåÓúþ¿àßõˆsž3ñ‡~ø?Åž=ñ~¥ý‘á?øk]ñ‡Š5o²_jÙ~ðΗw¬ëz‰±Ó-ou;ϰé–W7?dÓì¼³¥¤÷.?fLMZ8z1ç­^­:4 ­j•eSŠr—*æ”’M¸Å_WeÌ&ÔS“Ù&ߢ×ÏòûÏÿ†©øiÿB—í1Ï?òf¶åŸøQyãðÁäv õqàn+MÂ=]/ÿ1?¾Ë{üõZ^+âãxœ?J‹æ¥ëµ­{黺ѥn`ÿ†©øiÿB—í1ÛþlÃöÃçßþHX<{c ää’­§úÅ?ô(«ÿƒ°ŸüÖZÃÿÏÈõjÑ’ùuZùík]&˜ÃTü4ÿ¡Kö—õÿ“0ý°ýzgþW8äû‡ŒV‘àž(_ó(«ÿƒðž_õ-ßä·½¢}goâ/º^·ÚË]7wZ4¾!áª~ù”¿iÇö0ý°ùü¯Ç¹õÈ«\Å þeð£úÖ—ÜZ¡Ö¤}—ÝÕk~­ím˜¿ðÕ_ ñÿ"—í/ëÏìaûaÿó‹õü»t­q7ý jÿàü#¿žµÓ×ϯ}Åõœ=¿ˆ—þ÷ü:]«hüŸó“ðÏâׂ¾-ÙxŽ÷Á“x™O„|J|!â}3ÆøðÓÄzˆáðï‹“OÔ|+ñ'Ã>ñ5¿ŸáŸøo[´»m+ì7¶µ¬¶—3æEO'—crªë ˜P–³§Êœ&ýœœâ¥û©N:ºrZ¹?wUgî颿ƒMm¢j϶º½÷¿ÝdIñ3âß‚~ØørûÆ“x”Ÿø˜x?Ã_ƒ¼ãÿ‰~#×Íᯠx“[¼»]!¬,¬t›©®n¡ÄAµËòìniYáð"´iÊ«§”ãû¸Ê1”¹ªÎvsвw×f•âNq¦¯7ʯkêõùzy~ KÍÿá«>gþE?Ú`öÿ“/ý°Á?ù‚A~½x#îGƒø‘̲¢òöøWÿ»W[ÿ۪˛/¬ÐÿŸ‹îÃV|1ÿ¡Oö—úÃþØðÅø ðF1òßú£Äô,¨¿î6ÿîÂüþýCë4?çâû¤ðÕŸ ³øDÿiüBÿÛ'ÛþH@ çßéŒÖ«„øŠÚåµ:ÿËü3ô×Ú¾Ÿ[µÍ#ë4?çâû¤ðÕŸ èSý¥ÿØ¿öÄü¿ä„·N=»ŒôZ\)Ä)¯øM©ÿƒðÛ=ÿåâþ»ì'ˆ¡gûÅ÷KüŸå÷ÿ†«øeÿB§í/ôÿ†/ý°¿/ù!YÿÐ¥h¸Wˆ-ÿ"êŸø7 ÿË×åëc7^—ó¯ü™%ÿ’?ÏÉ]${g‚üaáψ^ðŸ¼!¨ÿkøKÇмaámXÚ_Xjxwĺ]®µ¢j`Õ-¬õ;/¶é·¶×?dÔ,í/­¼ß&îÚ ÒH“Æ”'F­J5#ÉR”åNqÓÝœ%É(·4í+««­.¤îͺMlõGxÛö‡ømðûÆxZâ^±âÛ_ h~0¿Ò~ü øáñoû/Þ&Õ?óéìkÑ ç¿ô/Ÿþ Ã~Ä_ðÝ·}lO¶¥üëÿ&ÿåg?mûjü»ñV³à¸4/ÚFOxÃþñF«§¯ìkû^4öº/‹õéZ ä°/ÁºŠëïø’&–àôë„¶šY »H4\5ž-~¡-]¿‹†ºóÓ»üíÖÖmKù×þMÿÊÃÂ_¶¯ÁoøSÃ>4ðÞƒûHj>ñ‡ôoh:…¿ìkû^\A}¢øƒN¶Õt»Øn->\ZÏ ÍÜÅ-µÄÖò$ŠðÊñ²»éþ®gkG–íRƒ_'íÕþï¸=µ/ç];ÿò?×G+Z=ü5wÃÿ2¯í-ÿˆ_ûa’1ïÿ '°úÜõZgKþ`gÿƒ(òç¶¿’R^Ú—ó¯Å'ûw¿•ûòí!?k/„?nÑ,oì>8x{þx[Áún§ãÙcö¢ðW‡ˆümâ=/ÂÓµx»àîá½ûkÄÚÞ‘¢Ú]ëZµ…Ûïíb–æ/9K:¹&kBJÕp“…:P”ç7V‹Q„Uäì¦Û²¾‰]쯠*´’—d–º»è¶²ÿÀ’WÝÚÑúW¯~½sÉãðÎrAqƒ^t]ÿáïÿ·“ÝlÍ6ùtZ'ø¾¾I§Ý3æ§ý¬þ›ínÆÂÃ㇈áñ7Š|©ê~ý–?j/øsþ?xTðŠ´í?Å^ø=­xk[þÄñ6‰«è×—z.¯¨Xý¿NºŠ©|²[×¥“fu©Ó«K )Ó©8KÚSJQ’ºÄMhözôi[”ÍÕ¦´r³M§}Ó_)E[egmzÙF2ÚÃá~üRß´¿þ!‡í‡Ïþ`óß­t,“5¶¸9ÿàt_þä3u)ßI/¹¯Í¿ëºår_øk…ßô*þÒßø…ÿ¶'ÿ8ój¸ä¹ªÿ˜IÿàtWßûÅ}þ[«ê…í!üËúþ¿«¡Ãö±ø]ßÂß´·§?±í‰ÓØÿ‰ãÏ¿BµþÆÌÿè_ø2ÿ,iæ_õý_`ÿ†±ø[ÿB¯í-øþÅÿ¶.9íŸøQ$úœc¨È'&®9>e× /üGïþ/ÎÖ–º§¨{H2ü¯ë]ÑÓxö‰øiñÆ0øDO‰šG‹®¼3®øÂÇJø‰ð3ãÂOíOxgUð΋â KGÔ>*ü:ðf›¬RñŸ…­µ =.òîúí»¤¶XÝs¯Åab§ˆ¢éÂRäMÊ÷¬Ý’ŒÛÙ=Õ½CSŒ“»ÜôÏxÇß<âßøÃQþÈðü5®øÃÅ:¯Ù/õ¥øwÃ:eÖµ­êaÒíoµ;ï±i–WW_dÓ¬o/®L&++Yî!x¦¥QÆ1W”ÚŒR¶®O•-yzõvZïm"Þ×}5û¿¯êç‡ÃXü-ÿ¡_ö—ÿðÅÿ¶)õø±*xö<ç<à†ôVW_ó /^hiéj©þ:õ¹—<;­öµ¾]SMë{;[¢h_øk…Ãþe_Ú[×þLÃöÄõgþO¾y Ï àÖ¿Ù¸ïú—ßOÿ“_–¾Bç‡ó~]¹måk½4vµÇÿÃY|-ÿ¡[ö–çŸù2ÿÛÿœOù÷ûµq˱«þ\KÏÞ‡ÿ$ö]ºoªJG<;¯KY~:;µ†VÚöQaÿ eð°̯ûKâþØŸüâ½ÿ/_½WýŸ‹ÿŸÿÀâñþeý+ÞÎ=vµõVNßeÿðÖŸ 8Ï…ÿiaŸú²ÿÛñïÿ$c^zðr+XàqóæKÖpztÖËU§®ú6ÔSœuÕ|–ߊOÕ'ÛEcÓ~|^ð?Æ ßø&ŸøD&x\ñü#ž"ñtšvŸá_†žñg‰§û7†|%âMnîít¥±³°Òn¦»¹…¶+ëJœê¾X''kî¶_8ù+Öîíµ´µ×õýlyŸü5§ÂÌÿȯûJŸcûþØsÓþHFxçù®ÜÝ+ ˆ_òéýñòÓâ~¾Vó´—<{þ×õèðÖŸ ÿèXý¥¿ñ ÿlA‘ÿ† 㟈Á$VŸV¯ÿ>ßßçÃú¿¯ùŽÿ†´øWÿB¿í-ôÿ†.ý±3ôÿ’?—çW–ôå÷­~jk¾Í>éêñïø_× ÃZ|+ÿ¡cö•ÿÄ.ý±?ùÅåùUû¿Èÿò_þXCk¥¾í?6üãå¢÷…µ§Â¼ÿȯûJõèbÿÛú| Ïä¿Ï5¢¥S¬Zþ»êþþ¯K- ùÿÃ[Ño÷ôij{Ç‚|cáψž ð—ü¨ÿlxGÇ>Ð|cám_ì—ÖÚ¾ñ6•k­hzØ5K[-NËíºeõ­ÏÙ5;;ûo7Ê»µ·¸I CTÚÛ[>ý´êºí¾Ýƒúô¿Ï·_zÿ+?ùç×®:žŸìMlµþ¼öÞ[}ݯ´OëNÛ_e¿Þú¤/ùúÿ“ôýHkm¿à’¿o¬/ëÓú^¿‚qþAÿà¿¿òxß ìÙüÿ«Kã/×ùý3[_ÖÿŸÞ×õ·åëcÿÒþÌ¿eïù'^1ÿ³™ý³ýÿæðþ:v$ר9gæ¯å/?ä±ÎÿëöÿP°¾·ô·Þ{ø?÷j^’ÿÒÙôX<ý~Ÿ‰Æz“Ïñp0o“[-þ—ËþS¤ùíùöÿÐ}ÚÒ_êÿçòÛç`uíùv÷ä`Ôàñ‘ÅjÍŸ¶ÇìwûXOÙ§ã¹õ<|/ñO=—¾AýÞ£Šörÿ Y>ÿò4Ëý?Þè«ô{/MÖ¶¹oáUÿ¯sÿÒYíð”Ç×Íþ—LgŽ€œç¾úÜù°ÿ„¦?ùíú7aïœ2{cñ€øJcëæÿ?˦3€G@Nsß ÂSüöý°÷ÎH=±ø@ü%1õóŸåÓÀ# '9ï†ðïÙæì_|Fý²îTîûKxdgÔ§ìû(§sž0sŸËŒWá¾#Èþ—ý‹pÿ?ßb¿áµ=lðøåùG­›ó³²vÑè?ö‡¼??c;–%B~Òþ%ëÿ±ÿí\™=¹-éÏL®í<;ÿ‘íMïýˆÞÿóû é¿§Ýf,oðWøãù3Û¿á)æûÿù#ƒÈ sÎ1ó~ÜyAÿ L|~ûÓ׌ýGœžx?ì‚ ð”ÇÀó}ÿ‹ü‘Áä9çù€øJcã÷Þž¼gê8äóÁÿdXÿ„¦>›ïü_äŽ Ï8ÇÌä_±qÏìuû'õýš>Ïáo…Oó¯çìÏþFyOöìgËý¢¦þ]¶_3Ö‡ÁðÇòÿ†ôù˜V²é¿¶/ÆÕgÚfýš?eB¯—ñKöËýwq‚lu¯Ò¸þE˜¿û–¿÷‡õÛ]Û«íÎSæß ø™íñÎO7;ÿfßÙM3ëåüPý²ØöäæN?!»9Pö4ñ4qþȲœf\ÿfß‘ãæ8ÛðÇÂêGnê}O>Á¨é/øJcÿžÙ'ëÏ<ö䓞œñ˜¨Ï´ïˆóá热„›ŒŸ´Çìb |Üíý°>6?‡pœž{csyÙ¿üŠóû¯ÿ¦Ùt¿‰ñGóùþ^·¹öWëú瞤gƒøúsÎkò8?ë§ù|ôéæåéÿÃ[kz=nºíßE{þÌ^ K?‡ž2¶2`ÃûL~ÙÀŒ“×öÀøæý†zžäWë¹Wü‹0ö CÿM®×üüõ¹æTþ$ÿÄÿ?ëN›t>‡ÿ„¦?ùíõ?7ÐóÓž{wÀÜkÐ ?á)þzôë×ñè3ŽßÄ^äPÿ LóÛê~o¡ç§<öï¸Ðÿ Ló×§^¿Aœvþ :÷"€<6}Yu/Ûà˜WÞaý™ÿj²}·üSýŒFrqÔ©ÆÜ}N3_?Ä_î´ì!麆Ô>'þÕ_ðçOûiœ~ǵ™ÏOÙŸãÁõà|-ñNOnO~¼pÆ+æ°oý§ ½ÝzWÖÿòò7êµÖ÷÷´VV³GDþ…þG¥ÿÂS_7ùþ]1œ:sžøoÑNÿ„¦?ùíú7aïœ2{cñ€øJcëæÿ?˦3€G@Nsß ÂSüöý°÷ÎH=±ø@ü%1õóŸåÓÀ# '9ï†òÙ®ì_xÿö˸ ¸?í5ᱞ@ù?cïÙAOSܯ>Àão!¼ Ïýæ?õê?úTý??¸Ö|ÿ¯ëüÐ~Ò·‚ÇÇÿ±Ë1PŸ´×‰Oû±ÿí^Ÿîž[°äc%@ù«-þ;×þ]˾¾ôwèß›×wk„öùÿ_×üÖ?á)æûÿù#ƒÈ sÎ1ó{¦Aÿ L|~ûÓ׌ýGœžx?ì‚ ð”ÇÀó}ÿ‹ü‘Áä9çù€øJcã÷Þž¼gê8äóÁÿdXÿ„¦>›ïü_äŽ Ï8ÇÌÁþÄíŸØÛöI=sû2ü>¹ÏÂß õ^r}3Èàcæá—Å.žóÓ¶£þ´Ó¦Ý_–»÷[ŸNÿœ}1œ÷ÏÓ5¤]×üÇæúù[¸Iú}Ïn–õ°~¿—>ýqïý;U­×õþ_ŸÜ‡ß§–×òýzŸÈGü÷þOá¯ý›?ƒ¿õi|eÿ?þªÙmýkçÿ {mwkˆÿÓþÌ¿eÿù'^1çþngöÐïÿW…ñמ™ã<ç·;¿”|Aÿ’Ã;ÿ¯ØýAžþýÚ—¤¿ô·¯Ïôò>Šô÷>¼g=3ž{žGPH=E|”} ~¯Éhì’Ž×:GÿŸóô?OæE¾×òï÷ÿ_p øg=¯ëÓÛ>ÙÅl¶[ô߯žïóõµ€ù§öÔl~Æßµ¡Îý™~< œ…ž+äñ‘ļœ­{üŽòûà?õ.ž½Þši{Ýrç[øUëÜÿô–sÿð’÷ó=p{œzô½ùç’@_ë“æÃþ_úiíËc޹ç#ßsŽNq@ü$½üÏ\ç½o~yäÿ„—þš{rØã®yÈ÷Ç㓜Pÿ /3׹ǯAÛßžy$§û$Ý}³Å¶Æs¿öšÐÇ^»?dÙY ¿×m.§ñ'þ9~~Z}dzÿÂKÿM?ã§ã#ÐüÜúƒ•ôøIé§LwÈö#¯æ;õ$•?á%ÿ¦ŸŽqÓŒq‘Àè~n}AÊ€ð’ÿÓN˜ï9ìG_ÌwêI*Çø#SþÑý³>®íÞWìÇûP÷é»â§ìt;dpןRÍóüEþëGþÂþ›¨mCâáýWõÿ{oí°qûþ×Óöcøöyé‘ð³Å}û÷<Ž£#5|Æýëÿ_èÿéØ/ø;n´¶§Dþ…þGÿ /3׹ǯAÛßžy$ýáøIé§·-Ž:çœ|qÎ99Åð’÷ó=p{œzô½ùç’@Pþ_úiíËc޹ç#ßsŽNq@ü$½üÏ\ç½o~yäïìwuöÏþØÎÿÚwD뿲G쨃'>ÄÌnÀ àfïÿ¯QÿÒ¦k ¾aûb]ý‹Ä±ýÆâ6~ÓºÐÎAûß²?íTžž¬/'žYþñ/úõ/ý*6íoÇÕY žß2—ü$½¼ÏÃ>÷9á,^ñÂKÿM=þ÷sÛ¯=NyBÿÿ /o3ðϰÀíÎG8_Ë Pÿ /ý4÷ûÝÏl¼õ9äu ü@ü$½¼ÏÃ>÷9á,@—ûœþÆ_²1õý˜¾œçþ©W…:1ÆzcñÀVà—Å/ñ?Ï®ÚßËîõkm÷¾«ü^}õùç×®:žŸ—¡8ªõë§š¿Îöµ×ÚAýiÛkì·ûû¥pÿ?\úcñ!µëÒÿæ½oòN?ÈGü÷þOá¯ý›?ƒ¿õi|eúÿ?¦khíóþ­{þ{ßq_Öß—­ÿÔþÌ¿eÿù'^1ÿ³™ý³ÿõ°þ:zàÏcžF1–¯åä±ÎÿëöÿPp§Ðà×û-'å.Ÿß–ÝmköZz)}ý,wüÏ_½Çü¸¯‹òÛú×ñµ­®tå¸ð~¿äúç½?.‚µÏùüs϶‹ÿ€»Àµ¿>¨™msØÏö·>Ÿ³dzÛþ‰gŠùå°?BxÆ0CzùüŽòûåÿú™Gúëù8ç[øUëÜÿô–|ïýº¼üÇëÇ?—~À¨é_×gÍŸ"þÞ¾#ñÆŸû~Ó$øoñ;Åß |aà¯ÿüu£ø»Á1xmµôºðÃïë¶úl^$Ðõø´¨u‹(b¸ÕôX4ïé‰þ•áÍsFÔÒØ%øýûEø¯ÃÞ1ø3ð·àŸˆ~ÜüAñ߯«o‡¾>¶ñ}­ÿ‹î|á;¯Ù÷ãïÆËV_økÆ~ Ôâ×|C'Á¸4ß ¦µ¬Øi—Ö—ºÇ+ÝÛ€q¿±wíUâïÚG¸×íQðËá‡hû¿|U™~x‹Oðv±ñwEøcðç “ã¼|s©k~»ñŸuûAxãö…ð®ª|$ðgé|/ã(<wáÛ_[_ü1¿ñ ¿‰õ ??m߉üs7Áß|»ø-ào‡Ÿ²î½«|Dñ—¯øÚËOÕ?hoÚGã'À/xËW×¼/ñ+Â^°øqð#Oøe7|e­ןÙ!Ðõoø/Mk¿xXì¯ÙûâÆ¥ñá–›â[Æÿ~#\K®xÃI‹Ç¿ t_xÀ+²ð÷‹5­ËWðþ—â]oÅ7pÅ%¾š–÷³éÞ-ñw‡oµ(//¼9â=SE¹²™=£ûtuÜ~œL {ƒÔöÅ}-ûœþÆ_²1Ï_Ù‹à!ÿÌUáOëþüûšÈÏ1ÿ°üg[ÿÌE^½®Ç¯‚ü1ÿÒWüæo‹÷âËöÍø– #Ìý˜¿f~>^vüUý¯ÁëžÍŸð8úOȳÿaÒëÿPô:tü/n¶¹ÅŠøãþÔöêânÞ½yŸOÀã¯Üœ§â/ÂÚ/¾ÿ‚ž|j¼›öˆÔ¼Aðâ_ÙÇÅpüXƒÆ>0x´ÿxÿÁ¿µŽ´¿ i7ð‡‰ítOZÂÏêZŸÃÿ‡-£@ºŽ#ÆšWÄmvãYÕüa¥ø›^á?fïÛ£ãæû3þÎsx“âì»ðgÂÇÃ?:WüUñá÷Ä}gJž/Ù#Ǻ/Á_ ü4¾¹Ó~3xn|QøÃ¥}·ÆðÅám>m[MÓ<âKo |6ñíÆ£t|ûÝiâ&žÒÚi‡—4¶ðK,@J$’5i#êk‰P.-­çÆ<Øb´jæÿuaqgð Äïý§¿c±ƒþÏíað]¹ä¿þ­ ¯›ÿȯ0ÿ°Jÿúm—Oøÿ?Ÿå÷Øý‹ÿ>¿Ž;þ·­~@zŸŸm¿nºýû^Çã¯Â­X[Ù|N€±Ê~Óÿ¶&zqŸÚÃãKuëÈ<ŸN½«ö §þE˜û¡çÿ.ã¯ÏsË©üIÿŽ]-×±½ñ â©àßxÃÅš…5¯k~ðÖµ­hþðó[¦¹âÝSN°žêÃÚT—.¶ðÞëQEaóf(5ÒDB•è~]|7ý²üwðëögý´üqã¯ê:ßdžº¿íã?‡øË¦ÏàËOxƒáçÀ âåǼà÷Õ-<@>øWRKô]L¿›]Ðü-½Ö³®Þk3Þë—@Éð÷ö¦øÅuûE_|ø“ñ3örÓõŸ üA²ð%ßÂí+áÿÄ=#âÄ Rø¦|^?¼s7Å_®‡à]+\Öo<³ëÞÕ¼/ª\|<ñl·´k6~ðÀè—öêœ|Ç¿÷xÇÓ'×Ó®3ŠÁ}@^þÙÿ €$˜¿fÚ`óŒ|ßÿdëíÁ<õ |¿?Ä_î´ì!麆Ô>'þÕ_ðÇÔ¶áÇìaû]œôý˜>>Ÿ~>ø¯'$êö^‰ß ®´}gÂw:‡‹4¿ ߯ۼ_à;_¶Òîü/¿>³­i>×<×Iý¯|c↱ž‡ði¼ |FøÃâxKâ†~&ë~.øÉ¯|3ð–£û1|qøÙ¦ê—²Ïãxûź׊¿áLÛAàÿ|AÔô‹ÿi¥ß‰5»xuî$²îÿbïÚ«Åß´q®x³Çß/ðœÚ@Ñ/·CðßÅöºÎ¯M↲Ô4xbûŸûuyù׎.ý€?QÒ€=oö¸ûUÏíq09ûPixádÿÙe}qÎÞ}³¸ÃxŸûÌëÔô©ÿ[}ưø~§õ×ïöé¸×?²<Ä‘·ö ÔÇ¿Íû'þÔëë·¸è9tn2Ï÷‡ÿ^¥ÿ¥@'·Ìò/íÑ×qúqý01îSÛï™’?µíñÁŸµ¶‹£Éâ›ÍÁð‡ì½â‡^ Ò¾1ê¿ |[ñwÅÿ>?øÿÁß´/øÏÚç‡þ]üð7ÃÏÙw^Õ¾"x‹K×üme§êŸ´7í#ñ“à‹¼e«ëÞø•á/Ø|8ø§ü2›Ç¾2ŽV‚kÏìèz·Šü¦µß‹¼,öWìýñcRøðËMñF­ã‡¿®%×´ýˆbïÙç¯ì¿ðç×? |'Ïðã×?Ë9® ;Îzý©iÿow»Zytk²r×n›u~]žÛŸRñéïþ'¸é‚¼ãs ëú·}6í¨??M»}Ý:[³Ð_×òçß®=ÿ§jÝ^~=ÃðûôòÚþ_¯Sùÿ‚þÿÉãü5ÿ³gðwþ­/Œ¿Öµ†ß?ëúÿ1ÿÕþÌeÿù'~1ÿ³˜ý´;ÿÕá|tçÔ`dàpÞ܆þPñþKóþ¿áºÔïþ—cèpjøZ^’õ~ûûõí¶ßkš?Eõü{g޽3Ôõ'§PHÇ5ñë~·ü­ø|î¾N;Чüzç#§‡ý'ûD_µ„¹ÎïÚ~ÇñÛû*þËËøôíè߆xŒÿã §ÿbÜ2ôýö)ÿOåÐõ°?Áÿ¸’Ó~‘é¶½ú5®âþßSýš/Ù>lãoí?}ßûß²¯íD=ýzsÇLÓðçþGõì[‰ÿÓØ_øÕƒüþ8þR×k~*Û4þ#çoí¯ö¸9ïøúóÓÐ~GæýÈòEÏ?êsô÷=Çû\p@4Ó¬ä[!œç¨9'9ÁåçܸWX V ªp`Èc ‚ð€Û_ípsßñõç§ üÌ÷—ìEÏì_û!ûþÌ?öJ|(Ï¥>ænù–cÿaøÏýH«ý~G¯hÃü1ÿÒSý{úÛCãÚ*÷쟶†qæ~̳w~»~+~×>ݳëíÆ~oÒx þE˜Ïû–Ÿ÷/Cóý,WÇðþ¯úÿ†9_í¯öú__ø·R@üÎï¹9OðûNüa Ÿÿgb8-³âíM÷¸ÉÀ`O ÀÇñ²Î¬?ᘿgb—ð'àþÁÚWá÷‡°@$ᔎF#Äï_Û<›ÛüýN?.8çÂxçSûEÇÂHwgíCûñŸúº¯ƒ‡¦[ºÿ†2MyÙ¿üŠóû¯ÿ¦Ùt¿‰ñEýÌýÕÿ>§·ON}=ò1_/ëþg~ŸäzŸ×k·×ï·k?[Ëð¯Àú§‘qñjØÙûPþØdñ»ö«øÈÞÇ¿÷ê þ¿”ȯ/ÿ°Júm]OâOŠ[úùi÷Ïö×û~ý±ÿâS^‰g§ÏÏ×ùÛý¯\œP¶¶òFà»w7r¹‘œÜàž™ g§ÏÏ×ùÛý¯\œPaû6Þý³öÓðÝŸ²ïí#žøÏÅÙ_îç¯8ÁÎM|÷»a(ØJÿÓU?ONúÙ#jÿêºVùŸiþܱoí|ê×¾?žOòJ|Yß¿¯=ÆGñWÌa?Þ°¿öGÿNG×óûΉ|2ÿ üÏírNþžýqø¨ý¾r7~pmY\}®¤ý×”რ‚H8#w r"€u®IßÓß®?¡÷ÎFà]W;H˜±ÇcŒ³tÉ “Œà`œ@:×$ïéï׊Ðûç#pÔðOiþÑiûXKœîý¨ìùõ+û+~ËŠsí‘ïÈÝÈ_ŸÍ/õ¥§ü¹þ•=õõßÕsj£µ?‡þÞ’ÛMöüåNò_ø(Lÿf´ý“¦Éj+Þsëû*~Ô‹ÏÊO|`ã®pI"žYþñ/úõ/ý*žß?ëúÿ3åßí¯ö¸9ïøúóÓÐ~Gæ÷̃û]IRH% `O%X®Ü®K`Jœn%ISŒÀuœ‚ d0 ‚sœõ$ç8 ü¼û‚7*ë@ ÁU@ 9 tð0@þûký®{þ>¼ôô‘ù€?D?aãŸØ¯ö@>¿²ïÀïÿ$ŸÂ_ãþs^swœµûR¿ž¯ÊëÉ{«]6h{—žšöÕzýÚÇêu9íÿëë×$úõrA8¦½/åÿ oë¾ÁýiÛkì·ûú4®;üþÿ¯ôÇã†Ú/Oø?†ïn»t{¶ƒïÿ‚ÿàzßåxÿ!ð_ßù°}¾þ8ÎzñÛn?·”s¹¾¼vú7R3ߟEçpý¼¿Þ>°}¾þ8ÎzñÛn?·”s¹¾¼vú7R3ߟEçpÙŸðNKµhßµDãßµ ¸ÿ¾e¿Ù…¼zc×è>o¼Gvâ }á7 òýö+î¾»ò­7ÖÇ­‚þý¿/ÉuôèìšNÛ5#þ 5r-4oÙZbpö¡¸Ÿ{ö[ý§‡|g$œóÈãŒÕøsÿ#ú¿ö-Äéì/õ×óq1¿Á_ãKt~ž]:ÝØøÏûyGñ7éõþðã®:úAgýÈòCûy¼OêÝóÈG\}ê?·”~Ÿ_ï:㯠äpûy¼OêÝóÈG\}ê?·”~Ÿ_ï:㯠äpÔߨxçö,ýO¯ì»ðþ ü%ýkùï2ð§˜ï®?×¶"§¥ßߢÓff?Æß}~ëújõÝHø#ö´Ô—í§ãIoì½û:ús·âÇíiïמÍŒýÞH¯Ò¸ þE˜Ïû—þ£Ðþ¿áŽ _Çžîݵg”ÿo¯÷›éõí÷úär0}ó_trž k‹ÿ )ñe÷»àwìô¿ƒÇß´éþñäoÀ8ßµ/Áöþþz¯#ó9Á¯78ÿ‘Vaÿ`uÿôÛ.Ÿñ!þ8þ~òõ±ýÿ/Ïñ#±ÿ=ëñôïÖýuZþvÓÉvÚË›ÔùkO¹ëu×ü¶?ž-W[mwã4$œÇûQþ×€àâý©~0ß8ù³ÐsÉ$|µûQÿ"¼¿þÁ(é´yu?‰?ñËó~Ÿ×}Ηû}¼ß\ô퟾ëѽûy}Oá·¿Ñ '9#*Àöúÿy¾¹éÛ?|þ×£ÛËê ½þŒxý9ÉVõßÙPß¶·„$ù²çíyǾ,þÉ#?xÿw®Aéë_;ÄŸî”?ì%t¿üº«æ­ëù]¸íCâáš?@¿n#Ø«öÀ>Ÿ²çí}O ü[“Û$÷öÎÆæpzâ°¿öGÏþ^Ç×óûìtOá—ø_ä~Po(çs}xíôn¤g¿>‹ÎïÒþÞ_ïNØ>ßg=xíŽ7ÛÊ9Üß^;}©ïÏ¢ó¸þÞ_ïNØ>ßg=xíŽ7ÛÊ9Üß^;}©ïÏ¢ó¸î?ø&µÈºÐ?j™º‡ý©!÷Ïì·û/©þ#ýÐ7|¾˜¯Í_ûTëÌ?ô¹ù¯+èôícz_ ÿïeùt×{^ú ÿ)¹¾ý•§$ájYÇýõû-þÔžBõ$qÔ0äUåï¯î¥¯zÔÛ¶»~Ÿðöý%ðßöòâoÓëýáÇ\uô‚Ïï™öòÿxþŸÕ»ç:ޏûÔo(þ&ý>¿ÞuÇ_AÈ,àöòÿxþŸÕ»ç:ޏûÔo(þ&ý>¿ÞuÇ_AÈ,à­_°ÑÏìQûŸ_Ùkö}?ŸÂo W˜ß¿;·ñËÿJß²·~ö¶ÌÕצÛ_[ÛÎÎïv}P‡ž¹ÏQþzžçƒÇ~¡oúþ·üþð~~›vûºt·g¡ üÿ¯¾xø®1ZEÿWóõKîWÕovîý<­«ò»ü/sùÿ‚þÿÉã|5ÿ³gðwþ­/Œ¿^þÿtÃo˜¿¯ëoËî?ÿ×þÌeÿù'~1ÿ³˜ý´;ÿÕá|tçÔ`dàpÞ܆þMñ þK<ó¯ïðºwÿ`ÂïÓ{nôé{ŸE‚ÿu¥ÿotßß–šèÕô¿ÙùŸEuü}OϯSÔžFF9¯‘þ»~ÛïûΉtßòü6¾«g¢²vñýsÓ<ô>™ëŒ‚H?êÚô¾«}_m,¯{’?·âzúúu=>¾ã­æ|màß üEðo‹~xËM]gÂ;ðοàïhíu{bº·†¼O¥]hšî˜×ºmÍž£f·ÚeõÕ©ºÓï,ïmľu­ÔªH½xzÕ0õib(ÉÓ­B­:Ô¦–°©JJtä”®›Œ’’º¶–³KÞM&šj馚îž}ÇÊCþ çû+gÃFêæiî}¿ä±ŒßG¡ ñŠúuÆÜTÞ¹Ö+ÿ)vÞÊ•ÞÞ^m˜}Wÿ>£ÿ“ü;ËöVÿ¡gâþ$Ïí;ÛžGü.,äqýÜõëòVßë¯Ðç÷Qÿåbx\?JKåÖÿ×mÅÿ‡y~Êßô,|QÿÄ™ý§}qù,ÔAÇ-qã>(ó8Å?•oü§¯ÉömFïš>­BßÂþwÚûuûÕ•¯fù„ÿ‡y~ÊÇþeŸŠ?í3ûN‘Çsÿˆc‡on§+V¸Ë‰ÿèqŠ~IQ_ûÿ]¯pú½ùõ•îû%yuòRÕoaáÞ_²·ý ?~ŸðÓ?´ï®1ÿ%?‘ä÷­cƯCþ}Fÿ>×½¶ù_mÜu”½÷à×À¯†´-{ß 4=ODÓ'×]“@ð·‡ô¸-¤¶VÖºeºÛÛDí3Kåãs,ng^8Œ~"xšñ§*¬Ô9•8ÊrŒmÅ;9ËV¤õµÒ·.„i«F*)»¨«î×Kó'{oµ×[©DøÉð+á‡ÇÝ AðçÅ= SÖôÏ øž/x|èþ1ñ¯µM'İè> ðÂêVš÷€¼Gáv6oø§Ä:Tö¿Úmcsk©Î³ÛJË ¦¸ Ç–ÕxŒ"xjÒ¦é:”ù\½œå8ÞpšI¸E¿u=-¥Úá Å)Æê÷³»W¶ý^ý“JÎGƒø'ìªzøcâþ$Ïí=ý\½ºöýoj<[Äo|ßÿ”´ÿÊK¢zþV÷óxz7þ~Kÿ·_Ö×Ñÿ‡wþÊŸô+üQϧü4Ïí§•­ñ ÿ™®#Ó÷_}ý”ºZÝüþÉõz?óî?wÿtøwì©ÿB¿Åó×ö™ý§‡áÿ%ôzç¨í¶×q ×ü*b?ò—éBÿp}^üûÝÿۯ뽮/ü;ÃöTãþ)Šø“´ï8ÿºÈ;ÿ‘÷kEÅû_ò3Äå?Òšù]_ÓPú½ù÷»ÿºZx'Á¾øsàÏü=ðf˜º/ƒü áÁ¾Ñ–êöùtŸ øcJµÑt-1ou;›ÍFðXiv6¶¢êþòîöqšêæyÙå"UgV¥JÕ$çV­IT©'§4êJó“¶žô¤Ý’ÑíËrÚI%²JË­’]•’øïªv&½ñ„~Ò<Údž¾+|\øz×>Ðu_kz6™}eðóÇžÓµÓuOøŽêÖêúÎâú#«\Ä.¹Žõ0y¾e—Ó•,.¦œçí%(k7Ç™¹FR¿,b­{+u»åÂP„Ÿ½'¶¿§»//ø?gäÏŠÿ²Ÿì£à[ÍÁøuñKâÆOÛ\OàχVµ7í9b>Áo*Û^øËÆÚÐø¹yƒ>èSËÖ|Qyiq-ÅÃE øcLñŠo´Ý÷Ö¡ŸgµTªO3¯N„¿W–›Mô…8òÅΤºCþÞœ¡çöT¿’7íúí¢ù?$´‰¹ð³þ cû8xNÊû[ñâüJñÏÄï%™ñ‹-¾?þÒö]:ãS»Ñ|!á*Óã—w‚|%.¹¬Gá»=_QÖõÖmORÔµ­sRÕ5;Û‡*q6q)Zž:µ:kH¯rS·óN\¯šn×v´Ñ„V ¨Ó_b/åo¹Z_’󾜾Mÿçø=û;\}·ÃžøÃñ[ö|·†ÚÜøAý iñ7àž“eo´ ð÷NÒ>+Ù¯Äï‡ZU¤(¿ð„ËŸ´ h¤ÿ„[PñÜ-¦øCMê‡æµâ¢ó ”k-#/ݪ5ujÓn ÙOM*/ÝIüJ¥P^Æšû ®½×¦º¯$ïþ+û¿Dø7ö"ý‰¾!økJñí¼yâ k3ZÑÿj?ÚjîÎéÞ ã.Ÿ·ÁwgsÖz…ÊC{§ßA=ô÷vó@¹¼û<„Ü*c«ÆQÑ©F Ýoü5ºÕ=šÕ_í5F‹û ÿ&ÿä×å÷¾û~Ë:»á¿Yø7ÇW:§„¼OáŸè+®||ý¡ø§¥hzcjþ Õõ-VæÓIÓl¬’êîf‚Ýʯ¯K9Í(Â*Xº‘§N1§%XÆ+–){—Ñ.÷{ÞîæN•=Ü5z¿7»½›Kä’3ÿáÝ¿²—ý ßêæiâF?î°ŽsÐôã^ˆç™«_ïµò›ÿÛcø®Û; ö4¿‘~+ó“ë§t÷I4ÅðNßÙK§ü"¿û`ÃLþÓÀváq’zžÎH&ª9Þjÿæ2«Ûù_Gý[{µ$èÒþT­]¯Ý¯¾Úy´‰ü¯öPÿ¡[â‡þ$Ïí=“û¬\{ãéÚµYÎgÿA•ò_þAÿ]¶3táü«úû¿®û‡ü;¯öPó*üPöÿŒšý§¿ùñz÷z¸ç›ßWÿ$ÿ%ÿÏg¤OgåG¤ü$ý¾|ñœŸ¾x_ÄÖ>0“Âúǃ³â_Šÿ~"=¿†uýWú޳¦XÙ|Dñ׊´í8j:Ÿ„|9wwuagoy7öM¬&è@%‰â¾;‰‚§ˆ¯:°Œ”Ò’Ž’IÆêÉ=›ë¯›ÔjŽ©$öþ¿Ü|oàß |Gðg‹þøÏL]kÁþ<ðÆ¿àÏèÏw}dº¿†¼Q¥]hzî˜×ÚeÕ–§h·ú]õÕ©ºÓîíomüÃ5ռ蒬Sœ£(Î.Ò„£(¾Î.ñzól×U«þo³M_NCäøw_ì¡ÿB¯ÄÿüI¯Úxþ?òXò²3í÷[ÑY®a÷ªŸ;|ßÀ¶Zﯞ„{8*ÿëý”?èTøŸ×§ü4Ïí=Œç±ÿ…ÅŸÇ‘Æ@"µYž?þ‚jä¿wËoÅnL¡öW]t}/³ôï­ÞÖBúÿeú~'÷ÿ›™ý§ãÿ%„tOQ’ µG2ÇuÄTÿÉÉÚÚö¾÷n) ’?ʾëvÓy]6÷[lÛºqpÿ‚u~Êü'ñ?¹ÿ“™ý§ºç¦áqöç±éÆ@ÛZÿhãè"tC’=—ÜŸKß§—MŸF˜ïøwWìŸÿB¯ÄÿüIŸÚÿŸiÃÿåüûý¼ôÑ®Ýw¾–Hÿ*×Êßsæw¿’ÓºV‘ôÁ€ÿ ¿gí ]ð×­RÐô¿x¢okÿÛ>0ñ·Žu=[ijè>ð»êwz÷|Eâ}zF_øWÃú\ƒR[+k].mk´ÌùÔ«R´”ªÉÎVå¼»ke¦öm¾ý†’KOëK¶•’w·u¥“J×$øÏðáoí è>ø­¡jZî•áÃã?Æ>5ðF§¥x–ßAñ…ÓSµ×¼â/ kѱðÿŠüA¥ÍjuO±]Zê“‹›YdXž-iU'ÍNN-®[§wmü®Ú®É÷ÞÄÉ&õ^{iúßï·k¤Ÿÿáݲwý ÿñ&iï\c?ð¹~¾¾Ü©cq;:²wòŠíþ~Mn¯fˆäoÄ?áݲwý ŸøÏ'ö™ý§ºqœñ—ŒqÆW·9 Z,V#þ~Ëÿ%ÿ%ùiæ>Xöþ½?«|…ÿ‡t~Éßô)üN?_ÚgöžàôÆár{ÇÔgQÅWëVOæ—ùÿé>—Ø\‘íøÿ__˜ú?dïúþ'÷ÿ›˜ý§yÿæ± `ö?“Z}b·üü—ßÿÚ/ÏîHöü«ÿHwü;£öNÿ¡Oâwþ$Ïí=ÿÏü}€­cˆªþÛü? Å··n›» qŸõ·Ý÷þ{\x#Ážøqà¿|<ð^˜º'ƒüá}ÁžÑ’êöùtŸ x_J´Ðô-1ou;›ÍFñl4»u¨^]ÞÎ"ónîgÞWWm¾íêÿ7¥šï§ã±›ÓôVÿ‡½ö¾½´V”z¥ëøÿ‘ýzvã5ªwþ¯×nú[µ»^È_?»µ½ÿF–¤¾Ÿä=Ücôkôí·äöèš¿’‘ýz_çÛ¯½•Ïä#þ ûÿ'ð×þÍŸÁßú´¾2ýŸÓ5×OoŸéóüþñ_Öß—­ÿÐþÌ¿eÿù'^1ÿ³™ý³ÿõ°þ:zàÏcžF1–¯äÏÿä³Ï?ëöÏþ`0¾Ÿ×}¢ÁºÒô—¯Å.ºÝy5­šÖÈú'üÿ‰ãyÜyã€F¯‘ŽßÖž_uºyëÌt½½<»ßN¶]¶×}Ôƒ?\uý}2äõ#ƒÏ5qvkOøo¿§k;ü—,SþÉîyã#¶F>m¿¯êà;üöüûè>íkýM½—[+éÙåôÈõÇ×ñHÉàåAéŠÑoý~kýk°ÿëžýazž9錊Ùl¿M¿{^ïîÏ^ëôäç#žœf®?Ó×®›-}µž÷ºåÉ«7åózéÑtèûÚüÛÄü?sÔòÁäc'}l/’û´½Þ©jžš5£Vó×·_`I9Î:G8´ƒôïú~‹vÛ¿K{ÇõðëÛÕ[u}[i½ÅÏN¿×ë¦:äc­¾[vý:í{§Ùõlwùÿ?n¾?éo¾«î³Óûºím6ò¶›»»]Š:¯ùïýGáW¯ãÿþ¥æ„þîë·ë÷ö%ý}·uúöÉ9'Œ sÉ­¿¯ëoËîž{ò9ëƒÏ#Ó#+H¿/[mêí·›òò ÿëàö?Ÿ~zö g5 ëõç<õì=øéÇ^•´Ï«ùÝtwóoÖ×ÿõÿžzç<ŒçÔ`nµýÁëk>Ÿä'ýuû¼ïýh|ùñ?âþµiâ%øCðsKÓ¼_ñŸP°·Ô/WRk†ðWÂ_´‘[xçâ•匑ÜCk?“p|)à‹ ˜ü ÑþZj÷²jšŒ¾ øÂæßSø‹ñ7Äknþ)ñ¶¯oÅln>̉g¢øsFŠYl¼'àÝ+Oø[Lf´Ó,ÄóßÞÞÕJò¬â¬©Ò‚å¥J òSÕíwyJVN¥IsN{¹$£‰[]]ú¾¶û¶^_qìÎ}úýî?,œŽ5 IþÏãZÅß¾Ýü÷õo¯•»óWŒ¾ 뺉u_Š¿³î©¥ø+â¯p/üiàÝd\§ÂŒ’¢$lþ4Ótø®.|1ãi Š;kŠþ³Û¤v°ø¯IñÞ‡§Ùø~.ÚuâãX„çMi «{Zàoâ‚wn”¥Ý· >h«u_5ÑÿÁóÿ#¯øYñ¯Bø‘uªø[PÒµO|Tðµ¼xÓáW‹ ´>'СžF·ƒZÓ'´–m/Æ> Ô®ÓCñÏ…îµ/ê%ZÊ{?]´Ôô[ SJIª”¦ß%Xk =Úw»„×Ú§'Í×4\d ßÉözüü—ž½º§gþCÛ׿·OOvˆ¾šÛðý-ëÑÛ}P|µõ·ãÕuë§mƒõïëõôë×ŽÝÆ0»_ÖÿŸÞ‡M4ôï¶«_K;ÞGëú瞤gƒøúsÎkX½µü{÷Ù^úlÛºóbÿ†¶ÖôzÝuÛ¾Šöÿ×ëõ玽ñž;ŒVƒþ·§}º^ÚôwLŽçùþ<ãߌ`v5ºw[ßþGÓO+i蜲jÏkÀêºëç}}Z‹¿Ïùÿ?Ê®/úÛõ^WÑé}¬!ÃüúuêyÀã¾AéÓšÛñóîÿ_òNxëÜ€ÜtÆ iøï¯Þúkªï¢Óg ÷ëßõçŽëØç§LšÓúïý]€?¯ù'Ž™ç?{ŽàŠÚ/OéÿWwÙ[ÒÈõý˜¹ü¿ßêäõ Fy«Mü¿­z[ÖþZ\ÎÞZíøigï6¬¶jÏTìÒå?\ÿ.ÿ™ûß{Ž|¿6Ëúëþ~}~ûèû~ïeðü:Ù¸ê¹dçßߌõëþ+H?ëçÚöùèö²z‡ÊÏþ–ø¬¶jÛßUhŸçüÿŸåZ‹úZjº[¾ŠÖzjÓi^Ä€ñùÿ>§•ƒéÏ#¦mbþðú·¬R³è“{oe̞ݿàie¤¥ª]ZI¦´×•Ï^1דÀÉ9ÎàhüO>¿^½@àôÆOO@+tî»ÿ]z_ÒÖüdøÿ{·_§'9Æp8Í\_õý5å}¬þ¿^üõ뎽Éä`c…Ù^~?ø`?Ïùç?ŸçZEëÖï{»ýÛkÕï¦Û8€VŸ×õ·çéc9/ëm–ËÝnÖóKGÞà?Ïÿ_üÿ*Ú/D½|ïÿõë·“q‡æü¶¶ß/–Ëtݵ&#×ê9ê=ñÁçpéšÖÿ×ù«zþW¼OÃôÓUmü®üûŸÈOü÷þOá¯ý›?ƒ¿õi|eú÷÷üë®—Ãó×ü mµÝ®/ëúÛòûÿÑþÌeÿù'^1ÿ³˜ý´?õ°¾:äúŒÝ¾1†þKñþK,óþ¿a¿õ } ýÖ—ý¼ý}ùhú=zEuü}Ož™êz“õ\Žõòoúþºiµôivqê{WÚûh“[iö^·²Aøþ¹àgž™¦ìó‚mþ_wõ©›þ¿¯êã‡óç¯|ð3ž{Ÿ®HÝŒ.Ééæ¾O§M–ëkÛæÔAÿçëþOÓõ!´‹ûï¿éçŸËpqøö>½qÆzuÎ01œV©õù€ÿÇñêxõî1õ\ç8É+ZÃúÒßv¶ïÞÛ]YsÿSúçgæ#¹éÁÉ€ùt_×õ·ÉÞûitÈ’Koê×}tÒÉo¶ŽöAøŸ®sÓ®xcŽã®H'!¶Nÿ×Ýßðví{÷y+[~׿Nþ[Z×i§?ÏõÏCÏnO|A< ¸»~]~ô–½<¿ I[Oëµï×Ézooˆ?îsž¯ŒÀzä‚I°úôò^½“ºwòík¥f;üàúþ¿ÏÜg¤>kõÛóºík-õr]?àù^û?-šÓF¾Ð¿çüÿ‘üÃj·þ¾î»ÿÃÛpÒíþwï¦ÖZ4å*þ½É-Ûž¹ç$ŒV±þ··ž¯þ·ór‘Ç€Hþciúuîx­"ÿá÷·§ËüŸPðüøçÓ¿N½qÜgŠØüç={vû¸íÜV~oþÎêÚzÞÍt°2øïâ—‹§ŒçשêON£#Ö‘o·üöÝëÖís=" øþ¹éžzLõÆA ¡Ógµ¬žÞ©¦íÑZÚ½P_~™äúŸ»Ôdg¥mëòÕ¾Ÿ7³bùþW½´]•¹¾­qàñÿ×ÉíôþYéœj×Ýùü­f¬®ÿ;ì.»mÿÝÿŠÖµì”…­Ö¿×ü?çé{‹úü/tšWÓM×»dÒÜzžÙÇãØ~£^ 5Åúöù/ÊÚ¿=mnV¤?—––í¶®÷ùíkÚÎ.ïø÷õÎ1×ê~¹#v­¯ëÏï#úþ¶ü¾àüO¹Îx¼1ÿë’ $V±wþ¿>ºiÙ=×U _óÛ×§ÞíÏ÷½Aãmh·_ðK?ë®ÀŸ~}@ã'Ž1ž™÷ùŽUv‹ºýoëæ—MÝÔAÃüÿžž=21WþÒýv饬À+eý_Õ‰—ßùÿÁ¾‹ðëÍ®/§ÝëÓóoÊ×\×j9|þk]>å¾Ýú4µ$SŸ\ž¼ýyõúlž+Pþ­m¾÷Õ‹Ï£?¿ø/ïüž7Ã_û6êÒøËõþL×]-có}û-¯wøïßA_Öß—­ÿÒþÌeÿù'^1ÿ³™ý³ÿõ°þ:zã{ò1š¿’üCÿ’Ï<ÿ¯Øoý@ŸE‚ÿu¥§IzüRëÕy4ïf´²Rú+ñÿ=ÎF:÷ëÇŒ_äôóÓî×ÎÛk¥Û:ÿÏeø+;ìö½®ú;Ü3߯מ8¯cž2kRkO[lúÚëÍìöµ¢£¯ùây?{ŽùqZÁéþ]7ó~úëw"Gþ¿—>ýqïý;V‹ëú~šú=€_ËéþqÔ÷Ï>Ø­–Ý~Õüõ»üB@súõúõÆBŽNFîGN•¤^¿5ýu{/OGfþ?áëŸNNr9㠌֢—Ïüÿசïß@ÿ?¯SʨÁu*δÝJ’rœž¯Nš$’IE$’ŒRQŠ\©$’)$•’·õó×çës¯ý.}úãßúv§ý_ô¿]¯¦¶ÑêL×_Ó_¾ß;k×U ¿çüþ5§õýmùýÆ_×õ·å§•ÅG×ü÷þ£ð­¢î¿?;­ý¶óÑ(ü?ÇßÛŸL÷8ÍZßúü|»é·m@wùÿ<þ?Óµn¶þ¿á¾í;ŸóøÕöíô½¿¯5g§]mÈ×ü?¡x¯DÕ|5â}Kñ‡uË3Yе» ]SHÕ´ë¸Ì7V:Ž{ö—v·1ŽX'‰¢‘xe=kxÊP’”[Œ¢îšvi®©­S]×è/ÕÛåú÷¶ºvØùsû+âOìÏûß Á⌿³õ¿3x7ξñ/Æ?ƒ¶ ÷¤ðUÕÔ—·Å¯‡ú|Cwü!š…Å×Ŀ١‡Â:—­WLðn›Ü¥O½îJ5ÿŸHÒ¬ÿ½nUJ¤ß”ðg|'ñÃZWŒ|â /ÅÖà7fµ£ÝGwgr‹#Á+XúþzúôNïÍ뻵Àùÿž'©çŽ„`ŠÑ_×ù{çüç“ØÏcÆF1Íl×õÿ ¿m€?Ïøž1×Çž8`Џ¿ëúkM¯£Ó],Ÿ®:þ¾™rzƒ‘ÁçšÛñóîKþºtî“o½šiõµãÊ¿_óž=OÞûÜpå;´‹ûÞý}z­u¿ÚÑYZÍú>߆»Ù|?¶n:®W)þ¿Ïœà…àžÙ$~U¢Óúÿ†üý,+?ø[âv²Ù«o}U¢ïóþÏò­£·üüþnû/ºÁ÷ùw^]ô_ ÓY&ÒæHQþzzýxõÎsôÉÛkëüÕ½+Þ'ü7ÝÛI7tµRÑY«]¾Y?Ï^1דÀÉ9Îà²þºÿŸŸ_¾Ä?Ÿåþ^]>ë‡âyõúõê¦2zzZAÿ_>×·ÏGµ“ÔAþ?Þí×éÉÎqœFN3Zÿ_ÖÿŸÞÿŸ×¯\uèFO#Ö/Îÿ­ß]lšom_›·¼ ?úüõõ÷'=2;dã5hÿ?ç¯ùôé[§u½ÿàô}4ò¶ž‰É=¿¯òwôüíiçüÿŸåWýMy_G§k>¿ååçw¶š÷OM‡¯òü»sÉ㣦ÃV«oëôÓîá饴۫ò×}vÜþC?à¿¿òxß ìÙüÿ«Kã/׿¿ç]t~þ'ù/ë¯ãh¯ëúÛòûÿÓþÌeÿù'~1ÿ³™ý´=øÿ†ÂøëøŒ{†èq›ù+Ä;ÿ®yïom†ÿÔ 'ß÷içt} ýÖ—ý¼×Ÿ¾ìŸG¯Î;õ>Šü?¨ëÓÔúýW+Ôׯ­ÿÛzÿJþgOo¿³Ñ_É+m¦Éë{EãÛ®sǯL‚8þ.ûH!¶_Ö–ý_á§Mmy)/+|­þwÕ½ž–êšúûñœôÏ_SÈ<ŒŒrH¿ÃçÓ}úZÝ.´mÚ<°8ÿúÁÏõô짦z‘Zÿ_ÖߟÜÿž}zûôúñÛ5´vÿ?ÏúÙ[Ì =ñúœþ£ìG^ç*×ÿNþ]zÿKpßñþ¸ÆrqëÐàä© b¶[/ø?®¿Ö¢{?ëó¿ü-Ãñ?\ç§\ðÇÇ\NCiÓñ¶ºö××£ÚÏKræ÷é»ÒÖ¿¢w½Ýú+rÚöh;þ=ýsŒuúŸ®HÝ€+Q[O—é{íéòÑòï#ñ>ç9àzðÇü®H$‘[Eþ>¿¯•öVÒÚr¡õéä½{'tïåÚ×JÌQþsë“ÇSÓŸ_PF6­¯óþºÿZ;^â¶ŸÓé{ìü¶kMÑÈwùúÿ“ôýHm–ß×üÏÒá÷y$¿$îö½ìš[®QW¨þ¿^}yïÐ`€>m íùuë÷®«kt]['úïÛÉ~¶Ùì˜éeޤšyaåši]#Ž8£RÒI$µcH”3;³*ª‚Ç¡ªü{ >?¸×‘à}WVð—ì× ÒZø‡â^ws¤ø£ã¯”í׆þê–ÒA}áÿ…®VKm{ⵓêøÂ?7Nøg=®”íãwõ!àRuTg‹zÂŒ­(a¶´ë­c*úÞ»MûÕù¦½™Ëo‡«ë/%åçîßdþÑõ_‡|; øKBÒ×IÑ4MÎßNÒô­2Æ¥……¤PÛÚÛ[D‹PÅ"(ã';¹e9T›œå)ÎRr”¤ù¥&÷m»]¿7¯–¥¤’²Ù?†~¾¿™éÿê'ªw×þèŸ}×}ì€?þ¾:öõôÛý*ãýywÙ«h¯çki¼f_ÒÓ_[ß­µ¶—¾·Bÿž}zûôúñÛ8­¿¯¸Ëúþ¿áþàÿ?çüæH?ŸOOÇ¢ôZè›L_×õý~Dª^l’úù œVŸ×õ·õß`þÏùÌ6ÑÛþ þ[½WÊûîÚÿ<úõ÷éõã¶qZG}?«éúöèµÚüÿŸò?mWõýi×Ëîõý_¨½Ï®?Aíq¸dVÖ‹É~wõ×ÞnÑ>Vò½öÖÝ–ÛßËݵϚüeðW]мKªüUýŸµ]3Á_µyÅÿ<¬ ‘ð§ã‘Æ‘“ãM2ÂÞk øÕàŠ;kŠþµÛ¤v°x«JñÖ…aiáø»aZ.*–!9ÓJКþ-ÿ#msAnéIò½y]9>bZ³º²o{í/[=7K½ìß2v]ð·ãV…ñ"ëUð¶£¥jžø«á{x'ñ§Â¯}š/èpÏ!·ƒ[Òî-&ŸLñ‚u+”tÑÞ–iþ:ù®Ÿ‚²OïæVoÊé÷ºå”`Çü}Îqõ÷­“º¿õúþ~j÷2jÎß×éùy;X_óþÈþa´‹õ×Míé¯K$ô꺫%$8sÿ×ç¼ç]vÛþ»ÿ­kÙ)ÿ?ç¯óúf´‹¾þ_Ó×»¿®«™éóùõÚ÷¶‰é§Åµ“µ®çüÿŸçZåòÛîÝ;¿)ZÉ]¤¥éïüúc$çÔóß‘»VÑw_×á«}:ùÙ²þ¿]—}?…üO¿9é×$F8ãåõÁ%…Zû¿?•¬Õ•ßç}„/øþ¹é×ê{àò ÀZÙ;«ÿ_¯çæ¯pñ?\ö¼dc®>^:òH­"íúöý-ew×½ôqÏëÀëÛ“ü^¡¸µþ¿­ÿ?¼V±×¿ß~ûõÓNÉî¶j@V‹øÒÏú뱓õ²^_{Ñ%w¢Ñí£JìP}ñüÿ `Œg>ýzV±Û¿ŸéßO;w²½‰þ­Ûmu}Wø»yŸÈoüóþOá¯ý›?ƒ¿õiüe÷=>¿LWeÿ‰þH_×õ·åëcÿÔþÌeÿù'^1ÿ³™ý³ÿõ°þ:zã{ò1š¿’‹þëKN’õø¥×ªòiÞÍid¥ôWùçõ9ëÜóòç`Šøã¯ú²ü¼º^Úï{ó{õïúóÇuìsÓ¦Mmé~Nýt[Ù-÷÷yS]6{yí£¿¼Þ-®«K.Súÿ’xéžs÷¸à­"ìúúwûú­ÖúéÖñÏúþ¶ü¾áê¯óç8!x'¶I•l¶þ¿]~ðþÏãž{þ¤^žúýmmo¾èSõý:ñóu}x=0"´_ÕÿÈþ_Ó~œœäsÀ ŒÖÑ×õ{Ù[Wäô½€?Ï?Ìò rx+ÉÈ<\wÚÿ×ãøþN9½öò²ý7m[º³³ÑY!Ï^1דÀÉ9Îà²Ø_ÖÚöZo¦é·«j÷¸~?Ÿ×¿8㨠g‘è7iOÃõó×v’ÛK®S^ß%×Ó®ÉÝ;õÙÝŸ÷»uúrsœg“ŒÖ¢ÛÿÙÖûyz«­[MÚãÿÏÿ_¯§?Óµk§õoÕ¯›×}u—ª_•ý/tí³ëÌ¥—­ëz7†´}SÄ>"Õtí AÑ,.µ]cYÕï-ôÝ+JÓ,a{›ÝCQ¿»’+[;;X#’k‹™åH¡‰äuUÊíÊsŒaJrj1ŒS”¥'¢J*í·²K­¶²bz'å½Õ¾wÓE¦ë­Ý®|§™âÚÊHïüMc«øKö`Ym#Áš•½ÞâÏÚ4q$:×,.ÛQðÇÁ«’m+ÀWq[xƒâ=³Ewãˆ4ÏNþñ¨¥q©×š¬m*xGÖ4š¼g‰]j«Æ‹ÒŸ5D«C=eþÝeëýß/v÷Õ}“ì{x--à´´‚[[hc‚ÚÖÞ4†ÞÞÞ$Xá‚c PÄŠ©qªÇj#@\qwnî÷wmêßvîúõß{¶­ïY7ëýFG=‡¿¶F3Òµþ¿­ÿ?K€úÿøúóœŸAšÖ/½îûù|“¿{ß×@ùÿ_|ð=ð?\b´[ÿ_Ö¿/U¸?ë§ùþ_}Åÿ?çñ­–߯?øm>ãoýiåÓ¥·Wôóÿ×ëøÿNÕqÖé~*ÞNúyêOõýoùýãÐöý:_ÏŽƒ¯8­¿¯ëÏï_óÿ×ëøÿNÕ¤_Ÿã~½®µ¾Ÿ4Òjî@ŸóøÖ€çüõíÏøt­¢ïÖýv³üí§’ê¶Ó˜þ»_×q^çúúuëÇAÜt]#ý~¿-¯¦Ý¬ÅøtVÓÓ¾ÛvèÓºr?ϯãŽÇÿ­ë[-¿¯øo»NÁùöÛñÖë¯ßµìyWÅ/ƒ¾ø¯k¥I«>© x·Â÷êøá;ÅÒëz_ÃïÚI4» ukë}Àßt[GÓ>üE¾¹•`ÓtOZÏqr>üLÔ¤1xsVÔ.<+âËçŒxÄ·š•Ô¾Ѻ(ÔNt.ÒW'ïT¦ºÊ/þ^ÁouïE||©sÊn×Ä­²M;/×Éê­¾‹cê•9çüòsÐc銘»­¿®ÿ7}—ÜL•žÿ§Ëä­»¿­ÇŸóÏãý;U¯ë¯oKzßî»$Qþ¯ôÎOJÙÁÞû¿ø~ýUÝ®½óþsÉìç±ã#æ´‡oëîÛ禶órüóúœŒuîyùs‚0Ejžý{þ¼ñÀ{ôé“ZÅüÿà½o²M>‰7oO|zÿ_ðÿ—­Ãñëü»ž=O_½Çü¤U¢?G¦–ïeÕh¶Û]Ó½Åü‘ïßî¨äõ `ñÎíÓºïýuéK[ñ’ùk÷tÒßz-š³Õ=t‹Çáø“ß=ÿáÿ ÿ ñKT¶jÛÞÏázýzõÉüÿ3×»ÀÆ i÷½õÿ†×[õÑi³‰-oþVùuv·v·WßÜwãùýzžqÁ9 ò=íßý|¿?Kç¯ëô99Î3€ÉÆkh½?àßú»¾ÊÚt° þyúõ뎼‚2yÀ[[ÿ_æ­ëù^ñü€ùûç'ŒðçïVËþ÷ÝÿÃ÷ê®íp?Ï¿¿ë×ü:V‘†¿ðuv^ªÏk'iŸóþ•ký[þy[þvÿ‡óífõOF¬_ÿá÷zÙ={ɽ?íèü?Gg§Wåçg{n!¿ð_³ŸÛá¯ý›?ƒ¿õi|eçóÏù»h|üOòBð>í<¿/¸ÿÕþÌeÿù'~1ÿ³™ý´?/øl/޽û9é‚88Å#øˆíƹïý~Âéÿr[^öZzù«Xú<û­/I|ýùoÑ«éÕÅëÖÑú+ðþ£¯OSëõ\¯S_Žžßg¢¿’VÛM“ÖöŠÇñÎxõé@íü]öã ¤_ç麶××[k¯{.aü­®ÖµôÚÚ¦¯ÖúyÜ^¿lñצzž¤ôê æ´3jßðwÛúë§[hó×õÎ@Ï^0>™ëŒ‚híÿÖ}ÊËm,!ÿçŸ^¾ý>¼vÍh¿Ï¥ö^«ÒÝVíXG8úzwþ}±úá¶_×õ¯ç÷'õ9ëÜž™ýz•$ V‘ò×Ïoצ‹®–Ñ]€¿ãœôëžú޼‚I ¡2þ—–©ëmö}®Øwü{úçëõ?\‘»VÑz[úéò{þWÕ™ÛO—é{íéòÑòï#ñ>üç§\ã—×–qßü¿.¿×mXþKºó¿dîû®ÖºVaþ?®O{r{àò áke·õý}öýd­§õÚ÷Ùù-Ö›¯´8vçüH¼ñ'×Ò´‡ËÊÿšõW[tIÚÈ>ï$—äÓ¾×½“KuÊ|ûYøFoˆº—ì»ðÙ¼[âi7ý¤<{Pð凂u{Û‹þÏŸ´ÅÖm#â7„<{à½FÚÛÆžð¶²!Öü+ª%½æ“gcöMJÎÊú×ìx; …Æfuéc!)Òú…y{•'JjN¶íJn3‹pœ ÜeÂRN鸜˜Ê’¥EN;óÅYêž½â×Eµ­mSµåèð þ$ù½Úhvÿ‘_ö:ý ý“Aôþýò ~†¸_…—üÀÔkþÃ1_üжù.–V¹æýz·uÿ€Ä?áAüGÿ£Ñý¦¿ð—ý޾£þm4ƒŽ½FÞ¿6§þ¬pºÛRÿö‹ÿæ—«V¿ëª‘õêÝ×þÿ…ñ#ŸøÍÚkßþ)Øè~§öNϧ;G^‡ ­«\1ÿ@U?ð³ÿË¿®¼»Èúõnëÿˆƒø‘ÿG£ûMc¯üŠÿ±×~GüÚiz½~lBá¾[`§û Äÿòçÿ÷ÖÖ‘õêÝ×þÂ<=à¿:§í9ñ{áÏí›ûD xàGìçñE’/þÈ «¿ˆ~)ü@ý©¼3âxï¯ì¬ðϧG¦üð‹iVÑÙÛËks.±,·IyY_ú¹Ãj1—Ôçg)/÷¬MôPóöÏ/;Zò^­¿6›|1éÿ{Àø ñ uý´?i““Æ|/ûô(vã›ã®(ÿWøqÌïÿaXþ^½_ëvÉxÚ¯·þKþQþ´Ñ$ä¿ð¡>$ÑçþÓ> Â/ûsÿšž g8Ï"Ÿö/ùƒ©ÿ…x‹ü­Yúm÷j\©ßðˆ€¿üÞ‡í3ÏýJß±ßûµ.qÇLõà)ÿap÷ýNÿöˆÿåßðßÞÞ+ë•|¿ò_ëúòÿ âWý‡í3ê?â–ýŽùÿÍMúãqÀ`3T²>_ó ?ü)Ä_Õ5_åÓ”Ÿ×*wü"x?íàï_ þx{ÄÞý³h‰5-SãÇì³ðâåuOþÈvÃÃßÿiÏ„_ü\ñEì¯në¨Åá?ë2è÷&FŠÏVKÉ­o!‚K)n9&A'oªKá›ÿyÄý˜¹ÏÔö]÷ÓT[ªûuþ^šùïý[sÝÿáCüJÿ£Ñý¦q×þEoØë‘ÿˆœLž3Ó·+Gö>B¿æøSˆzö»®®¾_uо·WúQþ¾zþn+ÿ #âOý‡í3ëÿ"·ìvzñÔ~ɧ'Ó× S‚iÿdd:ÿ²ÏËý£øþÿúûÃëu<¾åòêüû[ðøPÿ¿èôiŸ_ù¿c®Gþ"h=3Ó==ˆ§ý•‘Ð4öÿ ŒGN—ö’ï¿Oï_Ý>·WúQ߯U÷ÙßÊì?áD|Iÿ£Ðý¦}äVýŽÏ^:Ù4äúzô pM5•äkl<ü¿_ñ½E}|ý,[©ý(ÛËvíøé(È×ÿfø¯EÕ|7âÚãö…ñ‡µË3ZÑ5¿þÆ:¦“«i·q˜®lu>÷öIšÖîÒâdšÞx¥ŠDá”à†¸åù4Z”põ#%ªkY8ÉuMU¿ÎÚyßÝOSm-ÚÑ·£]¾Oå´§ýŽ#Õl~éþÕüM¯øÉüñ;ö…øU¤øÅ/¥Iâ-C¿ ?hoŠ_ ¼ºÌú&•¢iw:…ƒü'¢i÷VšM‚]IfnLidjùœÆéãñ£Zw„£ÞÜô©Íë¦îrv{t¾Çm7ÍJ{µ¯ãåkè­d´vvÐú“üÿŸò?˜n1€ê=ýsßõü¿•´[×üþýÞÍ€ÿÇ×¾xúðAž˜n‡µý>«ÍZÏkþ·Øðþ£¯OSëõ\¯S[­¯øÏÒ÷ü{uÎxõé@çÅßi5Á÷ôß·åm}zY«HùÙã®1ž¾§‘Ôdc&µþ¿­ÿ?¼‡eýz½ÚY-övw²ÇõÏLóÐ@úg®2mbïýzüï·“Õ®¢ùuÚÖÞÚ}¤Óu­všqx?Ï×ôøÇ§Ó V‹øÒÏ{Áµƒçóëµïmíñ|6NÛ‹[EÝ~·¿õóK¦îê'ËåµöÛ£»ÿ´Wµ¤9ÏläôÏëѱŽ:š¸ïýwKþÞ–ÔVÓåú]½•ïùY4‡þ'ëœôëžã¸ë’ ÈmˆñýrxëÛ“ßO ZEüþïø«û›W»@‰ú“ØzðǧË×$‘Z¿ÏßëÓ¯nOPNÖÑwþ½tÕ·¥žÿ&ÒGçïëøvÇCǧ\Õ¯»úék5½üíg}€ZÙmý›Û×ÒÄËn¿×_Ó~ºÞè+Hü×ø~–×ú\²Ëú¶éy룺õüœÏø/×üž/Ã_û6êÒøËþýußCàâ”}?¼_×õ·åëcÿÖþÌ?eÿù'~1ôÿ†™ý³ýÿæð¾:}1õÏ·¯ä_¿ä´Ï?ëöÿP0§Ñà¿ÝizKÉürµŸUåg³ÚÉKè¿ëþIÏ{Ž˜Á¯õþ_%eÓÏ£—Wé­—žÊÞ]/m{ÝÈ3߯מ8¯cž2j㿯õóô³ôz ååo–šëue³ß]’óþ'ŒuçqçŽ"·_×ôÉ–ÿ­k_ü´ÝõÒ÷bç>¸ëÛ׸ÈäõÎ@ ŒäÖ~Û~·oeu²ÝuH‘ãðþãÎsßÛœV‹M{^—ÞÿžßŸaïýÝ­£·ÝóìÖ·Ùv޽7QÀžüެ '*Á­#úýÿŽýšµž—w\ ¿çŸæyP9<ää¶_×õ­½/¦Ú‰íÿߣ¿§çd¤¿ç¯n½x'9À<â´ƒþ´Ó§“è»ïªZ³?ëm{m¾›¦Þí^÷Çóúõ<ã‚räz Ú­Ã^ÞZuô¾»-SºÑì݃üõã~‡'9Æp8ÍkºÝ믖ž¿ÞõQ6ÿöu¾ÞV¶êëVÓv¸àϯ\q׎nj`-ig鯛ü$öò·®œ«ô{/êû^éÛgÕµ/šþ:?—ñGö0OÚWÅ==ÿcßÚ¿®=Ocù×ÖpµoaŒÅÔnܸ +í¾/¾w¿—àÔ¼üÍÛ üªCumUý4K]wºm;XúT^0Ý?Þþ[Lú‚;E}§ö¤œù÷7Þß?/žÚ¿Í»s™_¿à¨ ¿høÃzV¦xàϵïþÕž7»ñíµöðöÚëÄpxSÀ6öšfq­|Eñî¿iámW]Ö´­3MдÏé¿b»Öu\éÞ Ô=ó©AÂ3›ç¯8à F{òóNíµ'8¨ÉÝÍ»¨Æ R*WNÝ[Ñk®ÎûÙZúnºµ­£ïzííû0_ÁãG›ÇÞ Ð5‡^ñ‹üiáü&øÉà_h:G„ßÁ#[‚ãá÷Œ¼ øÞãÄ0Çñ;áÝ„,4ÏëzÄŸ‡zž…£j:w޼)sªâëâ&ŠJ¤£JiNsö–j¤*J¿ºªœù”béTRiÂJ)¹i÷oÝi­ÞÖn÷Z§wîûŸ] Áýïý >ÃïžÀ½9>nOíHÿ8¹ý;îÞÝw[?$»§cãÝ]Óô_Û_öžÖµK¸ì´Í'ö=ýõJömþM†Ÿñköý»¼¹—nçÛÛÇ$²`Hv£arn—˜ß A©6ç‰ÄAyµ &‹ÏßE9>X¾¼Ò_r†—üv—mlù¸_‡_¶ÄmgÁúÇ‹_´_„Ÿ²Œþêÿ´?Œ1üfOxÇÀž ¶Ð,ümáÙ>=|)›áÏ„£ð.¥âÏÞ» …þ=øêú‰­ßº¿ØæžÃR»Òxªj¤¨SÄ:˜˜ÕT]/dãÏ›‘ª59çÏË-© W¼¯grÖÜ×wµ¶Më{5ÍÖý¶Ñ«%ýWþ =û(è–sK«x³âm–­i/ŒâÔü/ì×ûLŸŠ:|>ðσ|kâëÏü&áüLðæ¥ø7ÇþñdZ¦»á-?OÕ¼;­A¬h·Zœ“[Š®"[{7Éiýcì¥í%RQ«í½œ›9ÂÑ”š”\]”zû//zë^—º½ÚíÒÎëC'ÿðROÙã]Ôþ+ØH>*B¿ ~-Gð§JmàgÇ?j?dÿ…;à7~3øsáï|5×uÿø#Mðß"}GÄÚªhVšrøsÄÓjiáÿˆ¿¯üLåRºTß5?ÞRö¯š½*kÛNŠIN¤# ¹Á¥ J2mN<ªTæ ®ôÕj¿šÖÖJÏUn¯W¦Úr´Tñßü÷öMð†‰ã}KGñ‹~ ÝxCþ$Õ¬‡~|eñ„üc®èGí?|%ñCHø}¨ü2×|i­ü x>&ižÒüU}ªIà³uâ6²:v›¨Mná‹á}*óBÓfžËE×üK«_ë^+ñ•á/ xwÃþð¶®x£Å>(ñ_Šõ½Ã>ð¿†ôm_Ä#ñ¯¦hÚ.›y©^ÛÛË…,ÂU¦©Ó—4š“³j)Fsœ¥)5Æ‹”¥&£Ź4•Ĥݒõßtµ~Vùè¶µí–üÿ øã‹÷¿ -îµí-®|-ð{Qðkëžø¡ü@ñ_Œþ'üIý¦þëþŸà¶¹à}7âg…n>Íû3kþ$ñ¿ˆÍ^ÿV–Úl“×—€oø)ì~÷ZUžãßxŽ]sQ𾋤?„?gÚCÆ6—þ!ñÇ‚Wâ7ƒ|-o¨x_áF­`|W⟗Öô/ Étšî£76qX6£m-šiÏŠ³mB6S“篇ƒQ„ýœåiUO’÷e?…nì¬ÃÞíëïom5Úß%ówgÖ ~)x3â÷€|#ñ;áî²ú÷‚|u¡Xx—ÃZ¬ºf³¡ÜÝéZœ+=¹½ÐüCe¥øƒDÔbƒQÑ5í/MÖ´›è®4í[O²Ô-n-“’¦`éT:’åœ$ã%tìÖŽÎ-ŮҋqkTÚw›NÏtûïgóZ¿_+'Ë0ý’›wÂÏ·]ß´¯ížßŸí‰ñ×=Nyî~n:‘››?iˆçÑ©ÑÂÊþO E¾«µ£ÒûYžæßIßìôë½»ß{«¸ëmî}=úþ\ûõÇ¿ôíYþ>}Êü<»ùÿ?çùV‘]áµë×M¶qÿ¨?ÔòHàÀ÷Ç5¢Óúÿ†üý,ýÉ9ã¯rqÓ5´vÿƒŸÍße÷X=ú÷ýyã€:ö9éÓ&­oýš·¯å{ÄëþIã¦yÏÞã€x"¶_×_óóë÷Ø—Ñþ_‡¯–»Û{‹ŸËð=þ OPrgšÒúùö½¾z=¬ž¤üµù®š3z-š³Õ;;r¨<ý}úúž½s×®œ|Úÿ_ÖÿŸÞ~ÿ5åòWåÙÝ¦ÒæHùÿ=ϧJÖ>·ÿ‡Õ½lšù»uv÷Ãoé|RÕ-š¶÷³^è?Ïÿ^­ úZjº[¾ŠÖzjÓi^Ä™ÿ?~T§<Ž˜ÃnÖ÷ÿƒÑôÓÊÚz')~–ÿ׺ùß^Îè_ó׌uäð2Ns€xÅ\_õ·ê¼¯£ÒûXAý}~½~ðzc'‘€¶ü|û€¿ç®xëƒÛ“œã<9ÆêÒoËú¶»wÑi³ˆ ?ýyë×ÿB0: Ðÿ?çüÿ*Ú.ëþão7ù[¸_Öÿ—Þkëþç÷µoølöÝëø´ïmÏä3þ õÿ'‹ðÛþÍ£Áßú´¾2ÿŸÿUzÿ‰õ¿õøÛk½Äÿà}Úy~_qÿ×þËÿeÿù'ž1íÿ3ûgôêGü6ÇOñöýHoä_¿ä´Ï?ëöÿP0§Ñà¿ÝizKçïË~_N®/^¶ÑŸ×ëëõÉëžüŒ‚9Ýñ±Ÿá­»Ý«wçdޝø}tz&×’}4z'ª•£Ê~>¼ç»$÷‹ˆ9};“Àï%koÚüÉßËkZí4âë“Ç^Üžø<‚xZÒþ¯åýtµ“êÜ•´þ»^û?%ºÓuö…çÉ9àcqòõÎ È­Vû_Ëþ:ê—’_¥îûí¢WIÅŸ1~Ð/åüHýŒÛ¦?ioþ¿±ÿí]ïÓžyüSÖußÏá߉öòx.}ÄMà_ˆšõÇÄŸ†Ú­¦‘¤k>ñcjñÇyáüKðß}5Å8ˆÕ•[Ñru¨â!~{Q«F1‡5íy¢ªRŠ¥V-Ê3‡+÷gN”é_·Õ½7RZ=]5V¾ÚéoDãÁéÿðMø?Å_³Îàhüø'ðÇá2ø÷âGÂ|WªÆžÔ_øá‹;­Å·-ÕõíWÅtñN¬ß¶­‡Ž„9ªU§M9ÍÔ¬êâqkûUF®"8ûË—6ªCÙÂ#öúJ÷w\«~»»·'t®µþmµÕµµþ#§= a¼íHÿ:û̹×õé·Âõó²ôÕ$x`[^þÛŸ´•¥ä]Z^~É?±­­Õ­Ìi5½Í´ßÿoX¦·š)7G4SD푺²I22áˆnéæie¸I©¤Þ;0I©% ±ùmÏßîµÍ×³Ž¿n}?»O]¾VOÏ]ævðO‡Oðꃞ)øÇñ÷Ç|5ðçÄŸ >|=ñ/‰<m§|ð¾·£XøsE¸ðˆ|1ðïÃÞ6ñ/‰þxnÆ|>ñGÅüGÖtM-ç“P¸Öµ›ÍCWºÑñ<ý§¶…<-:ò« ÕêETo8ÊR’©Ö8B¬›•HP§J2–‹–**+Ûy$ÛM´·k½Ûßv£dïò:þÃ^Ó¼oâ?Š>+ø¥ñ‹âoÅøgâg„šÄè¾øaà¯é1øGÃÿ 4!á´i6ÃRÕüS«ø–j:ÂËi3âIJœ)BzT¡*S…8:­FT§V¥ùªÕ©7í%Z\üÓ–‘„cÈ£ïÛKh–šYék½ùžíöô½“<‡Å¿ðJÙãÆ6é¯â¯_ý‡Ä¾ñG‡×Åýœ¾%éþ¿Ñþ|7ýœ5ÄÑü?ñgà'<3p¾8ðOÂ?‡:׈®uý[Ô´x^×Uð÷ƒtC\ðþ«´8²½=•a(O–¦&”¦¥‰«Š4¨biM{:•ªÆ*2ŠtçËSÚÉFpXòÞéÛ™^òr»´–Íè—Gfå¹êRÿÁ>~¿„bð%µÿtï [ü@Ö¾ Úhún¯áËKk+­sö+Ö¿a |9g³Â…­¼1cðg\ºÕtËHŠêvþ<‚ÓTmb_DþŸõš¿?´nŒ§ìãMÉ©;¨ãã˜s¿}'7^<­µoexò©?h ¿]/d¶}'Ï}û鼴骑¥ûHøu¼ û4ü ðKx“_ñü"_´oüÏËâ'‡¢ñ¼š/í¯û7XŪkqøOBð·†WT»ŽÝ%½]Ãz–'f6še¬!cWƒÍc[^^ä9°y­NHJN1¶[Œ©hóʤùW+·4¤ìµ“Þ$*'6íkÆ£ò^ä´µ¥}6×ÖúŸ@üoøM üuø7€uísľò¼QðÿÇ^ñoƒn4h&x ÅZñâ_ ÜÝè>1𞋨ɥø“Ã~ ðæ³i΋âVѵ ëù(gW©í"éÏÜ©NP¨åÉRjr¥Rå”f¹¡9$á8N-§ ÆJ2Œª©}Ûw¾–û/n·ó²²>coø'ï»ÿ‰:Ç|HøÏãރ᭷‡~:kúçâO‡­>|Køßñ SÒ¼7¨éŸtí'ÃÞø¡¥þÐ<øQñ+Á6‹càCáM·…4_ øÂ^"ðÖŸâ¥ëÿYª*R¡a¡‡—µæ¡SÙIÕ£‡¤¥$ësJte†…j5$å5YÎU'8K~ÛKh“¾œ®Ú¤µ÷ÝíÊœtº“»nî' à¯ø&GÃO‡wßð‘ø+ãwÆÿ øþ×₾%èÞ>ðö“û8èzŽ—âx ã_Âùµ»Ÿé³õÂßx«Ç>ý >#x{âOÄ?ü?ñWÄoÆÞºÔüVn¼ á94½*qTê.Z”°²¦éÔ¤éÊx™' •0õTTå‰uau0ô¥J9Ó§}(/i7&ëßtš¶Ï™îÓµù®¬Öɸ¯Y>_Gø?ûüø+¢ø'Cð׋~*ëøâWÃOŠzeߊ5ÿ j:Ž¡âO…Ÿbø E¬ÜØx;J[½;Pð„cV×’¬ïï|PZþÏQÓ´ÆþÆ\ëñ5Zò©)¬<]ZUi54£Ø¬ÉÆõ]¤§îÆîICG6¤'ZknòæwÞÿv—ÙÙó}Aðwᯇ¾ü3ð—¯ _k:‡‡¼§Í§iwž ¹±»Ö. žþïQw¿¸Ólt›d^L€Ûé¶È±*)ˆ¸.Üuó§^¬ëNPR›M¨iÚÚ'&ÖÝdõï{ USm¾ºéùlÿ[í¥®b~Èm»á'ˆ›ûß´í˜}sŸÛ ã§pAnyàu2ì#?iK =ùðXu×›CµžßÓû_M„wÂÐ{{‹ªó}U“·[­©YrýF@çõÎ~¼{îŸ^¸­ãªÿƒ¿oÊýìîi-ü }ÛþvíuaÏõÿ>Ÿ…iý~ë®ý´Ý=ZáÏëDZíŽN1ÏCŒ Õ_×õ`üú޼÷ÉëžüŒ©;´‹¾þ_Ó×»¿®«™é?Ç9ã×8ÈÀ=;÷ÛŒ6€/_ǶxëÓ=ORzuŒs[EÝ_ðý;+½¯´Sþ¶ýn—õ¶áøþ¹éžzLõÆA iÿ]¼Õš{_®ÝîGˮֶöÓí&›û­k´Ó‰éî}xÎzg<÷<Ž zŠÙ;«ÿ_¯çæ¯pùþW½´]•¹¾­rLÿŸóþEiÿþ éeøz8Èùz-“ôèîíüÖÚö´‚µOëµÛ³J÷õZY4·$=Ï瞯q¨ë‘’­c¯=oÞÚï§mÝlÔ“ô^]7ì¯k>ë›¶Öbçùþ¹èyíÉïƒÈ'Z-ÿáÿK?ë®Ä‡â}ÎsÀõàø\I"¶ŽÝüÿ­n¼Òï¥ìÿžÞ½>÷n½êjãøßõïžÎÖÓwÊçþ#§§ôÏ|üÇ*»/ëúþ¬¿Ïùëüÿ:Ò?ϯõwªéuox´3–»í×òwùövNçòÿúÿ“ÅøkÿfÑàïýZ_+ÐÃkMÿ‰þQÛvþo{ï¡úþ´ü¯ÞÎçÿÐþËÿfù'~1ÿ³™ý³ÿõ°¾:}?ϯJþEñþKL÷þ¿aõ} ýÖ¤¼ŸÇ-ŸUÖÖ{=¬”¾‹þ?§©ÎAç¹ÁùsŒ`×ÅÇÓ¿eëÕyüޝÓ_¿em•º^Ú÷¿0¾ù?§¯'F{ò:dÖÁåo/=´×[«tiÞÏk$çüOëÎãÏ0Ekýzþô¶½÷éÙw½»íöv»Ý;Ü3õÇ_×Ó OPr8<óZ-ÿ¥þ—ßs7§õÿ ùzØp<ýÎzõ'“÷¸àm–Ëú·—Ý·ßÔÿžßŸaïýÝ­ õþ¿¯•µòi£¯oË·¿#ž§ŒŽ+PŸë×ëר^§ ƒœê k§_Óµú¾t饬ÿž¼×éÉÎG<8Íh¿¯óéªõûˆkÓM¬µÖþ¿'§½ºz0ü?sÔòÁäc'}l¶]¯Ÿç÷“òù/»KÝê–©é£Z5`ÿ=xÇ^O$ç8€@þÏ?4€ž<ý’ßP:ïÃ~ÓÞ%ñ¹µÿ† —öÅÏ }†àO)ºŸûF6Š/²+ÏèÔË8ºyV gýnŽa™×®¥âEAañl¦žÓ©þ®)Ê««†ÅªÑ”#APp”ÜêFò|ÏÙÆ7Âó)Í¿ÞÍ.Y*j?òéݧ7¤tµ®ÛQ÷?ø[?ÿèÌþ=zÿÈçû%ôë×þƒõõäÕÁý‰ÆÿôÁÿ°þ'ÿèlŸì\Ó¾ÿõö§ÿ(Ÿ­ì„ÿ…³ñã§ü1—Ǭg§ü&²_8ÿ»¡ tõéëÒ©d¿Ä÷Óuÿ$ÖUߺꢞO™®¸NßŨýð>z/-ZbÿÂØøñÿFeñëÿ ?Ù,{õÿ†¡ëïØò3µ_êÿßýïÿðáÄëñÿVŸå÷ û#3þl'þ «oOཿ¦ì'ü-øc/XÈÿ™ÏöKƒ'þ2‡ón;sMpïÿÐ_ÿáÉ—þû ~>z\?²3/æÂvþ%O¿ø==:#Ä?h]Sö—øŸà-Ã~ýŒ¾6ÿhi¿¿fˆwnñïì—köÂ?ÚWá7Åì”~ÓS¼ÿ„gÁzÇöu¾Åš‡Ù­ ° üôôrÌ“Œðøšµ18¾ öRËózö8î#œþ±ŠÊ±¸l2ŸQ²úÝjXš“œ({Z”á9Æ•C*ÌS¼žûµ»R¦ó„£r©4Þ­Úö»H÷/øZßèÌ~={ÿÅiû%sÿ›?Ço_Ó Ãþ­qæŸí\ëý£Äßý þ²ßG¹Ù9Ÿ|#ÿ¸•?ùOÍh­æðµ¾=výŒ¾={ÿÅiû%qÿ›?þ¾¡©pǽ±\ ÿ‡.'ÿèaþ}ƒû'3ï…ÿÁµß÷ ßÖúöhÿ…­ñçþŒÇã׿üVŸ²W¯_ù9þ9úûz)þ«ñ÷ýp7þ¸›ÿ¡…øµê·ì¬Ïþ¡?ðeOþU¯}¥öOøZß»~Æ?½OüVŸ²WzÆOÿœ’j¿Õn?é‰àgÿu.%ÿèqÿ]X¿²s5»Âïÿ?j/“~Éýýv²½Ãþ·ÇŸú3^§þ+OÙ+ׯüœÿ㯷µ?õSÄÿ1\ çÿ ‹›âZéyk/»úé¥Úk«åø^šÚ/ÿ?çüÿ*´@£üðõãŽsœý2vïu½ÿ¯»{íkhº^@ïëþIÏ“Ôá¸éŒ¸¿ëoÕy_G¥ö°¾OéëÉÇžÇ<Ž™5·ãçÜüÿ‰ãyÜyã€F­"þ÷¾¿ðÚë~º-6qsŸ\uíëÜdrzç FrkEýZ~q-{mÓ£³mYh­g­ÒºQ?¿ç›³Î!µ&Z?ÒÖÿ5÷>Eô÷>¼g=3ž{žGPH=Ek_Âß«òZ;$£µÉøÿõýr>¿L~a´[í.ÿõ÷¿†sØúþ½=³íœVËe¿Múùîÿ?[X 3žøürp;ž„‘ߜ瓕­ ìÿ®¿ŸÝùÞ ½ÿëŒg'½J’+Uýuÿ‡&[OmKo³ÕJÞéøŸ®sÓ®xcŽã®H'!µçåoÖ×Þö¿k«{Ñ÷y+[~׿Nþ[Z×i§?ÏõÏCÏnO|A< Ñ_×§®Ÿ'm?®×¿_%é½¾ üO¹Îx¼1ÿë’ $VËúþµén½íqõéä½{'tïåÚ×JÌwùç×'Ž­ÓNã8Âéªû·ï÷ê´Úßœ…Óþ•ï³òÙ­4kí þ~¿äý?RPþ’Wü/{ßkí¢Z&˜ô=¿uôëÏ_^1‘»?./é/;Þëg–÷düÏ¿LèTž™Ù#§õýoýomÀ?}Î;žœŸNàž+xíýÃýúßäû~¹äÀv'ýŸéWoøkÛk½×N×½¬ÖÀçŸ^¸êz~]Á8­¿¯¸üúøëÛ×Ón?V~¯ò_Žšyk²Õ{ÙÍ/%×úÞ÷ù[®ŽáþyõëïÓëÇlâ´þ¿­¿®ûÿ_×ü?ܨççÿ¯[Go?[ú_þÏVØ‘úú÷üøÇ ™À ¤]žŸÕûïÿÛpþÏùÌ6«oóþ¿áºõÿ<úõ÷éõã¶qZEÿ[y.«½ößU{´þ?Ôý:zséÔ_ð|¯ý>ºkëyϯêp{`ôÏÍÎí¢ÿ«~­ßïZ½SÒÂÙvûºkè¶èü½Û\?Ï©ÀíùsÆxü–×ôú¯5k=¯úßaÿ]“oô½»këy/_Çž‡¿¾rpO¡éÁaÛ'óþ¿ËÍ^ì[îšík-¿™+>–LU<ý}ÿ™ì@ïžG¸ÃiývÓ{ÝZÊýôô°¤¿-ü’Öé¿K?'r_óϯ_~Ÿ^;g¯õýoùýæ_×õ·å÷ùÿ?§§êCkë×ç}¼ž­uÿ×êzãß=yþ.GI­ÿðÿ¥žöÿƒk~>¼ç¿^½@àôÆOO@+X~]ºù÷Õo{m²m ùmÑ~›½¯tüöm¡Gåøçßó'<Œãs÷«E¿ôÿ+kÛ]ûX[û:ßo/Uu«i»\wùÿëõü§jÙmý~;YÝÿÃî/—Ëo»tí{>¼ÊJ¾¿§Ìõ8ééƒZEÿ›òïòz_¶ýŸõ¥¿Ëòóv¹/§õ÷=zàóÐŒã#×úþ·_Þ!ý|ÇóïÏ^Ä æµƒóý6íÝí}ô³òˆ¯õöþÙÏJÑ;_/ëk÷[€úÿøúóœŸAšÙ~NÞ¶ëÓú﫟õ÷ÏßõÆ*âìÿ¯ø'[ Ýþ¿Ö¿5m¶¸¿çüþ5·õßúþ»¿Ÿåþ^]>ë‡ùÿëõü§jÒ§ë§Ý§{]u·Å«’%Sõÿ?ˆäœœàñÀÆ iý[~pÿ?çŸÇúv­¢ïÖýv³üí§’ê¶²æüÿŸÆ­_×g×KÚû+€¿çÿ¯ÔãŽüvã9­×õý->á~¯Óþ½µëä¢~½ý~¾zñÛ¸ÆH=—Þ¿ËU®Ý‹G¼Cð謭éßm»wNéÈÿ>¿Ž;þ·­hŸm¿nºýû^ÂÿúùçëéŸ^3ÇqŒVÑzÁ¿Ïç®Ë§K þM=;í¶¶ßg{‡ùõÎOSýÓÿÏN½jÖÿð/ú«zþWn'Ëåµ­Ñ2Öö·G¤l£)Gáø“ß=ÿë‹þÏ?ôíZC·õ÷móÓ[y¹!Ãë××·¯=yîq÷zcµþ¿­ÿ?¼÷ÏùÏ'°žÇŒŒcšÖ>¿ž¾½»ózîípÇü÷9ë߯0EZϹõüsÏr{ŒƒÏ5ºw[ßþGÓO+i蜓Ûúÿ'OÎÖ’ÿ_óž=O'ïqÊE\_õ·ê¼¯£ÒûXÓú¶½—ÃðëfÖ©Ÿ¾2ÿ‚©7…o?lË›o ~Ìš¦Ÿûè?µ¿¯|7›öÊþÏýª|kcû5ü0×¾ K¨é_³Ê~ÏÚ¾‘ øËQÒì4¸õÛω—1hÔn|Zmu™´è|5ªúQÂÝQ÷ªþùÒJ~Ç÷KÚIGZžÑ]¤ïe ^š|Dß}›ë®‹µ›µ–Î-oñ4¨¼cûiÚxKöbý·ÿi&ø{%Ý¿ìcíE%ß„¤ñ\v?ð°?áš|ªø¶à§ˆNu…#ñzéfÉe“HñÐßjuÖF˜Rn­ |×u½ž¶¿/´•¶æ3W]uKF¬Ç}í·}Ûté§Åk§r÷ÀŸÚÎïâßÃ?‹Ÿ‚~Íÿmþi—š¶‹ûþÓÏû^ê ºÓ¼?©ë׺üø3…ã=R;K;_ øyF³'ˆ&¿ %Δ!nô.IB?¼3µëSöI]¥Ž¥Ò½ÛéÙ‚wè»hï·E£ném+-﫹óÃ_ø)γ㆞7øÁª|$øM­ü7ø\>øâ¿‰>~Ôš_Ç)¾|:ñÕÖ½kñ*÷âoƒ¯>ü2ø‹àŸ‰ÿ³Å¦•e㟋 5ÏÛY·Ão[ðg޼Oã? ê¿Û¥ÑI¨óMJ\ÖS¦áÌÖ±q|Ò‹FÚŒ“~õ“ÓR/ý/˾ÝõëÔoÄø* ç†4oø·Ã? þiß¾/j/þ |_ý¢ÿiû;|ø•àO…7Ÿ´ jÖ_åø=ñÚ¿h]Æzþ¹û8x#ÄéÚWÄÏ„> Ÿâ‚øëF:´>´¨ÑÕ®i7NhÂòŒ¥{û¼Ñv¦¢•I&œdÔu·0µí÷»~¿ÒÕòÝ)z\?¶'í#'ÆK‡W_³Â=#ÂòüOÚVç]ÔÿjÍã}/á—ö´ZMΨx+Ãÿ³'ˆ<?Äû©.¼=¡ü^Öü +Ã/öoÅBÐÁ}.ŠåæS•ùœl šrõs‹³ÿ kkiÌmýi¿çßï>[Ò?à²ÖoðÁŸ´V¥à¯ÙÆž ñ¯ð-¼_áoÙÇöÚã¿Äÿƒž ø­a¨ø§Æ,ø³àçýž~[øj÷á_€ôx‚÷Âðkz…÷Šu jþÓ.ì'…5ÙP´¹/Q5ÍñÒ匜^–÷¥{»+ék­ìع´éç­íÿ}4ü-/Ї_µö“ñ/öÇøïû'è^ ¼_>ü<ñN¯ñQõûy4ÍÆ^4Χ¬xIðâi¢o'ÂÖ| «^x¬kw6š†­âmOÃé–·¾Ô.nB3º÷›²ì—[ù»én—Òã¾­_—Ÿ¾þïÙçüõÿ>*£óÖ·Õ$×eÌõëfÆ!Ÿð_¯ù<_†ßömÿÕ¥ñ—üÿú«ÑÃ[Ù»»{ïí[¤¯øcoý+y|—õ±ÿÓþËÿfù'~1ÿ³™ý³ÿÿ …ñÓúŸoÔ†þ@ñþK|÷þ¿a:Ûl¿ ýyn} ýÖ‹ò—]ýùo½õÓº}¯sèóϯ\u=?!ØšøÕ·_žÿ;lu}ýÕì­»K²jÝÕ”µNÈ“¯Ìç¯P@>áºcæÒ?åþZ®º÷Õ^ýCåo$­&µM_ÎñÑ«¦‡õzzŸ_ªåzšÕi¯oëÏòûÅÛïìôWòJÛi²zÞÑAøöëœñëÓ Î?‹¾Òm£·]û[ü–Žþ›kdÁ¯+Yßk_m-­ÕßFí¶ÒRŠõü{g޽3Ôõ'§PHÇ5kñÿ%ýwºnÝ㘪Ç®r:sÇn?»×=r+uóùÿOðvõ°ÿ<úõ÷éõã¶jâüõûô³é»Ùl㣷D—¯\w÷ãõÏltïÒµþ¿­¿?¸ÿSÓß=23î~ï3Ò¶‹þ¾Kïý¯¸ øþ9ÏN¹à¨ëÈ$Ö·þµùk}z~[™½ùi$µNïæ¬¯dîÿþ¹Æ:ýO×$nÀ²Ù_×êM´ù~—¾ÞŸ-.ò?ïÎzuÉŽ8ù}pIaZEú~oåÿ ­¬ì’Éw^wìÓ¿uÚ×JÌ?ÇõÉã¯nO|A<-j+iýv½ö~Ku¦ëíÿ?‡ùú~¤6Ñþ’¾Ÿ×}E²r>ï$—äÓ¾×½“KuÊ·õÿëÞÄþ5k/ëç÷o·qtÿƒ÷½­Ûª¶Í?ˆ”ãúä{œ‘„óœ1[-¿àßÓ¾êßž—°…þ~‡ ü9=ýA<Ò/o_ò¿ßuµ­mw¼Îz>ƒžÓ˜m@?Ï×?SÓòî ⵟçém»¶ô}SêØùúãðõ?ìãñÅZþŸo?êÿ“ˆÿ¥ßËú·æ¤¿çŸ^¾ý>¼vÎ+tî¯ý~6üµò0jϯõ¾ÚuÚîÝm{Èÿ?çüæãý;méªéçªï ¿¯ëúü‡/ù×=:Ÿ_N1NMlßçüþ?L~$6åßü­eÓg}.Ó@çŸ^¾ý>¼vÎ+Uÿïôÿƒèöÿ?çó­£·?ë[¯4»é{õëý|¿5%ëõÿÇ$ç·ãór㾟Õôý{tZÇí-=ùkm¬­Ýä¶NGùõ8ºzséÕ_Ö|¾àþ»]¾¿}»YúÞG_Çžýýó“Éô8ÛÁ`Hm"ý?.Ëõ{õ×ÞnÑ_Õ®ž«^ÖZvk{ie!zþ=OSÇëÀç9Á`Ö¡·Ë¢Ñ?øòº}Ó$S‘ÏøþY$ñƒÔ} ¶‹¿õÿ¿½jönÆrVûÿ®‰}ÏEºWþÏùÌ5¯éõ^jÖ{_þÄŠ=?Ï=½O®1ÛŒVÉÝ_úý?5{€ïÇñÎxúðF=0Ý0kH»_ÕšKî×§,€ü?¨ëÓÔúýW+ÔÖ¿×õ¿ç÷€gßñÎzwé@ç¶îø †Ö.ýüõ¿¦»ßËgºÙ€¿×ߎN1œçÔôë’8­ÿðÿ¥Ÿõ×c=ºê¿Ê÷µ’ì´ “JÈùKÄ?±÷ÃO| ýªgËíwÇQx3ö½ÿáej–šž€¾(ÐÇíá½CÂþ6>½›ÃiZgöVŸ¨Ï7…¿·ôoýŽñb“WÜ*öÒõÆ´”éTJ-Ñö|»´ý›N<Ú§«ZÙÆÿÝlMtîÞ›om>ÒjöéîÛy'xù?‹àžú'‹ü5ûO|;¸ý¦¿iM'áíikñþÛâ‡Â"Ù«þ[9¿hÏë~ñ¥÷„¼GªþÍú¯Å]&ïF]hø£ÂiñY°²ñ&™§®µg¯øaµ/ jCÓ¥/eIΗ³å›ö—|“‹2Uy^ÖvŠî­+¸«t»³O·òÞêñëä奓GÑ^ø¯Ùø+â'€þ$þп~8ét+ï Üj^9ÿ…7àOxSIÕtOGÕ×Áš÷ìåð{à^¡¤êöú˜5»¹u-oG¿°°½ðÏÃ\?h›Œ£Nqw÷yÚm=/í%=­µ—f˜íó¿ËîèîúÙÚÝ­(ù§cÃþ$ñï¼ñ§ãÆÿxóà¸ýŸŸÆŸmþèúî‰ð¿íº®¥6›h~|#øk¦kºÝ¾¥sâiÞ(º‚îÑ‹‘m¯Zk;{g$’§EKÚrÇžÎz+ûÕ$ÒòVÛí4¹U­×ðék¾÷¿®ÖO–ï–ö¹û!ÀÞøAá/‡?>9ü ‹áÂ(þ[ÞxNo„þ(³ñ×èôŸ i mñÁ>|CøoªxŠÎ? ØÞiž*Ò< k:|÷úæš³?…µÝcÃWº*¶rr„gÍ.}yÓR»wNŒºìÛOÕso—§ü×áébßÃÿØ¿áà oiþºñ–£ü=ý“tØËº<úÖŸgcðÃbFºšâóFŸU»ñµ¾mlڽơ.›,HZMI›Ì­YN÷µÜý£ÒÞó¾Ú½7èõÙ´¬+mä­ý}ݾãÎ<ûh:>ü,ñ·íûB|løgðFãáÔ¿¼ ñGNý™¡Ñ¬,þxcQðf‡¤ê÷? f÷ˆô«ÿ êsiZý¶¿¬ß˨‘Íö·ou4úº·m¨BWæqço][\Ò•­¾‹ïû%¯éÛK/•ºú¿+‡ìÁû |#ý“fÑn¾ø‡â_ˆ/´†÷ß çÔ¾!ø“Jñ6­â ?Qø¡âÏŠ÷~!ñ.¯‡´­SXñ\šÿ‹ïô¸õn×O¶ðÆ¢höšT#KK‰t•G=ÒZßM-§-’ò·–®ê÷÷D­m^Šßןßê÷>Ðÿ?çúút¢?ÓÿƒºÒûZþvHgòÿúÿ“ÅøkÿfÑàïýZ_+ÒÃ?qè¾'ºO¢þúý}t2žÿ#ÿÔâ?mOù<ÚÓþÎgãÇþ­?WÌc?Þkô7[/D|Í\à ( šßäÿ Ü€ ( € )­þOò­À( € ( € Ýmó˜0 ( ƒ¿É~A@‚€ Ò?ý¼¿8€V€P@'øÿì´%P[­¾oó¦@P@¬6ùÿÙrocksdb-6.11.4/docs/static/images/pcache-readiopath.jpg000066400000000000000000000377751370372246700230170ustar00rootroot00000000000000ÿØÿàJFIFHHÿá˜ExifMM*V^(‡ifHH  ½ ÿí8Photoshop 3.08BIM8BIM%ÔŒÙ²é€ ˜ìøB~ÿÀ½ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÛCÿÛCÿÝÿÚ ?þݾ1þÔ~ø£ÁñõÏÄ[ÏüEðÿŽü[á |2øñÇã–¿áo†z€´ø’ûIø#ðëâ ÷‡ô/ êŸ<§^êÞ"‡J°’÷Å:eµ¤÷4É«xÇ^ø›à¿ |Dðˆtßø#Ç~Ñü]áiùún¿á½~ÂÛSÒ5{ G{[Û»kˆ÷Æ’ ”$ÑÅ(tP|KñÁþÖþøsÄzÔZnµñSÆW¿übÖ××/âißüqñN÷EŽ[;[‹}>X<ð߯¾"7:´Ö6/ˆöqÝ>§{¦Ù]€IãßxKá‡ükñ#ÇzÌZ‚¾øGÄ^<ñž·-½íêèžðž‘}¯xƒY–ÇL¶½ÔîâÓ´:öí­ôû»ÛCim=Ã$.òüŸðP/ÙfÛÂÞ$ñ^§âü.QðŠ¿g¿Ú+Âoãø×ñEøKð¢ûŸ¼Kð£IøÅã­'ÇŸø¾)oŒ¿ðšü9ø›ðïÆ_ ßÁžÒþ%x‚ÛÆ_ <àß üNÐï`ø{®h^5°°¼ð‚^ë¾×t=kAƒS°ÖtÛ‹° ?jƒŸµgÃ~Ôümcâ}@ÑüYáo‰ÿ~2| ñT¾×®oìtŸé>øÛà‡š÷ˆ<)w¦_éãĺŸ¨èqj6ïaq} áHèèP²†}Û° Ûym«Õ¶‚ c8ïÔä"øàù¼wðº-j'ñå„,|ywáõµ¾óa𞥭j^²ÕÚûì¿Ùl³ë:>£cöD½mB6¶2Ëj–òE4 P@P@P@PÿÐþÅ?hOÙOÚ ö˜øñ[Zñ‡|)à…_³ïíWðÇXÿ…Wñ¯ãÁ‰—^0øåãÿÙÄ~»ÓµÏ„ZÏ….õO iš/ÀŸÿÂGc­ø¦+{}~÷À÷–^×$ŽãRðØÁÿ‚h||ðçü(ûAû8ë¾(ø¤þÃz}·í!k­xƒOø½ðIý”5ŸMñ#Àÿ|3kðZÛH¼ð¿í£øgYO$>1øI¤ÿ?íxÃ?´¿_Ac£|j½øK/ÂÏ Ýj‡öioŒÚëøÇKõÚCö ý­¿j]Añ/t€~ñÇ¿~Ͼ øqðÿÀ´§ÇK 'Åú/ƒm¯Ùö§øÉ­x§öŒð×ìýðûâWÂÿj~ý”ðÝÇÇ?´¥§„uÏþÍþø-·ñ+ö˜øÏà]ãwÄYµ­ÃÖÚ¥ÂëžÖäøsà»]À¾ ›Uð÷‡´]&ÈÇ><Á->'xƒP/ð‹â~£¬x[SðÀÝ3Æ^øïñ—ů¼O­üø‰®ø†óÁzWˆ¿hï…_µ^á¿…¾,Ò¼Y'‰µ_ê~ ñ„o~ |1øo,ÿ íæžÿÅú0‘êßðJÏÚ˜jÿ%Ð|CðZÞçáÿn<7ñ"hüIøe¤xCö”ñïÅω>ð^«gûE¨ iñÜžø[gðƒÄ²ßÃÝC@Ó`ðW‹>éþ¶±²@°¿`¯ØƒÇÿ³?Å-CÆþ'ø]û2ü.ÓÏì½ðwà>«?À?x·_×>.üAømâOêÞ$ø×ñ áÃK{MwÆÖúýœ×_jŸ)Ñé~ÓøJþÇ?ý ´ˆø¥ÿG¥ûLá+ûÿô&Ðÿ #â—ý—í1ÿ„¯ìsÿЛ@ü(Š_ôz_´Çþ¿±ÏÿBmð¢>)Ñé~ÓøJþÇ?ý ´ˆø¥ÿG¥ûLá+ûÿô&Ðÿ #â—ý—í1ÿ„¯ìsÿЛ@ü(Š_ôz_´Çþ¿±ÏÿBmð¢>)Ñé~ÓøJþÇ?ý ´ˆø¥ÿG¥ûLá+ûÿô&Ðÿ #â—ý—í1ÿ„¯ìsÿЛ@ü(Š_ôz_´Çþ¿±ÏÿBmtß³Œ|Gñöjýž~ xÇQþØñwŽ~ü%ñŠuo²XiÿÚž#ñ7€t k\Ô~Á¥ÚØé–_mÔïnn~ɧYYØ[y¾M¥­½ºG€{•ÿÔþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@ó?í§ÿ&qûYÿÙ³üwÿÕ[âªúb€ ( € ( € ( ™ÿbÏù3Ù3þÍŸàGþªß ÐÓPÿÕþþ( € ùönñg…|ð‹]¾ño‰¼?ák-Kö±ý¯¼=§^xYÓ´K[ýĶÇÇ 'AÐì®5;«XnµoU»µÓ46’÷RÔ.­ì¬àšæhãp®h €>Wý°5}+[ý‹¿k;íSÓõ{/øfÿÚ Óíšeíµý¯Ú¬~x¾ÎúÛí²ÍÚ,îàšÖêþe½Ì2Á2¬±º(ÕP–üVøåðOàF“¥ëÿ>0ü-ø5¡kš™Ñ4]kâ·Ä |<Òu}dYÝj'HÒõ/jú=þ¦4û;»ãai4·BÎÒêäÅäA+ ïƒþ%|9ø… µÇ€|à¯[ÞxcÞ7³ŸÁþ*мM ß‚ücq®ÚxCÅÖÒè·×±Ïá]x[ÄöÞ×âgÒµËk°é—wRhú‚Û€vné<’:ÇjÏ$ŽÁæwfªª‚ÌÌpÉÀ›¡ëš/‰´]#ÄžÕô¿xwÄ^Ÿ®hþ‡¨ZjÚ.·¢êÖ‘_éz¾‘ªXKqc©iz•ĺ~¡e<ö—–“Åsm,ÊŽÀkà?Úà'Å?ø·À? ~7ü!øã¯¼‘øïÁ~ø•àÏø·ÁRC¨Üi'‹|9áýgPÖ<8ñjÖ·:d‹¬YY˜õ{‹' s Ç@½@:&¿¡x–ÀêžÖ´Ÿi‚ÿWÒŽ£¢j6z­€Ôü?«^èöšo,f¸·ú&»¦jZ.¯gæ}£MÕ´ûÝ:ò(o-n ˆZ€ (æسþLãöLÿ³gøÿª·Â´ôÅÿÖþþ( € þtücÿCý•¿nïøoöŸñÿ?hŸ|Zøeûl~Ñž8³üiñ¬žÔcøwûn|QºƒH±ø®êºžƒðñõ;ÂöÖͬü%Oê0ë—Wž4º:§Š®¯u°Ù·ý•>I÷¼QûH÷?lŸÚú?ýñÍ?§Ðp(%ý“þ)ÈñOí-ÿý³ÿl7“|w#ôÿ€¯4i?e¯†i¾'ý£?¿ûaþÖò~~gÆöÏãøƒ÷”ñKáÿü?àÇü·àgííñ›Áß´íEñÆþ þÖ~,Õ|1âo‹>"Ñþ Â|kàˆ÷¢ Ÿ„š5øÑ|Y©i6ºëÁ½ãëÿ^¶§egâKXôíjÞÞæèæ€ üÝý¿~|tñŒ?düѾ:_Ëð—â÷Ä?ø×VýœõÙJÓ⟇|?â€|a{¤Y~Ø·Öÿõk ßø“HÒõ‹v²Õ5ëk Éo´ËhZÙï-À?;¾þÈ_¶WÃ{ŸjúçÂßáÅå¿Âë‹^ øQñïá·Ã?ÚOâŸ4Ž¿ðW‰¶ž$ñç ü)¤|[±ñí)û'üeý¢´ï…ÿ¾ü)ñ?ˆu?ˆ¾ø;ªøƒÁz-×ÂÍPß¾ üý¶ôÿø)þ=h_´¯5{k…‡áç‹|9ûZhúÃ…ôæñ?ü&?¿iOAñ2[_Ÿ Ñ®íô¯xÙþü]´ø™ªÞøn]⯃n4xxп`Ù×ö¿øaû*xÃöý¢;Ñõ´ñŒ¼:³èßtÍ[Á8OxÄ-®ø{À@¥àÚjãöjø[ðãÀ¿°ÅïÂω?¿gý+à²xÃJø—ð“DñF›kyâ/¾ñ÷‡ÿd¿ü4øåáoÃà¯xÃ>+ø‹w'‹ü[û4ø—Åpøá߃£Õ¾|@ñÞ'ø^æ þ ~ß6¶Z&“ñ“Á?µ×‰ô]RøÉeð–×À¿µþ“ð»WðˆõïŽ¯ãŸøÛãN½qûW|[×üyðÞËá~³áÏ øÃ0ñWí_uà{o|AðŽ¡ðûÆW‰<2þ'‹Ä±ßíÑð÷ÁÚƒ~ÏÚ¿Åk¾<Õ¿l|ZÐn~=ëºÿ‡¯-¼Yÿ3øñwá†ðÏÀ¿ðÑ_ ü7ðãÆ>7ýuÚ[Kðö¯ð“Æßïa¿ñéßþ*ü=ø‡¨h0Ò?T?b_üjð7ìÿ¡è?µê¾8_üDÔ,àñ•̺‡Šôêž9ׯüá½sUºøÏûDêœÚG‡§²ŠÂ}_ã‡ÄÝj×G}3JÖ)Ñé~ÓøJþÇ?ý ´ˆø¥ÿG¥ûLá+ûÿô&Ðÿ #â—ý—í1ÿ„¯ìsÿЛ@ü(Š_ôz_´Çþ¿±ÏÿBmð¢>)Ñé~ÓøJþÇ?ý ´ˆø¥ÿG¥ûLá+ûÿô&Ðÿ #â—ý—í1ÿ„¯ìsÿЛ@ü(Š_ôz_´Çþ¿±ÏÿBmð¢>)Ñé~ÓøJþÇ?ý ´ˆø¥ÿG¥ûLá+ûÿô&Яü0ø£|'økðóág‡nu;ßü4ð7„¾èWšÔÖ·:ÍÞàÝOðî™s«\XÙi¶3êsÙi°K5žak-ÓJööV°²@€ÍÿÒþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@P@P@P@P@ÿÓþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@P@P@P@P@ÿÔþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@ó?í§ÿ&qûYÿÙ³üwÿÕ[âª?á‹?cŸú4ÏÙŸÿ ?ÂÏþeèÿ†,ýŽèÓ?fü0ÿ ?ù— þ³ö9ÿ£Lý™ÿðÃü,ÿæ^€øbÏØçþ3ögÿÃð³ÿ™z?á‹?cŸú4ÏÙŸÿ ?ÂÏþeèÿ†,ýŽèÓ?fü0ÿ ?ù— þ³ö9ÿ£Lý™ÿðÃü,ÿæ^€øbÏØçþ3ögÿÃð³ÿ™z?á‹?cŸú4ÏÙŸÿ ?ÂÏþeèÿ†,ýŽèÓ?fü0ÿ ?ù— þ³ö9ÿ£Lý™ÿðÃü,ÿæ^€øbÏØçþ3ögÿÃð³ÿ™z?bÏù3Ù3þÍŸàGþªß ÐÓPÿÕþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@ó?í§ÿ&qûYÿÙ³üwÿÕ[âªúb€ ( € ( € ( ™ÿbÏù3Ù3þÍŸàGþªß ÐÓPÿÖþþ( € ø³à'ņ¿ >ß\üIø…àŸ‡Öþ'ý®ÿk¿xjø«Cð´^!ñ_ˆÿm¯Ú?‡ü3¡¾·}bº®¿­ê·Özn•¤Xïïïn­í­ ’i‘í:( ƒþ?|yø#ñÏö.ý³o¾ ü^øiñ^×Ã?hŸøŽ_‡~7ðߌ‡õíá׌4íOG×@Ô¯ßIÔ¬¯mæ·žËP[{…eÈŒ£+0ÞðüÓá§‹>0~Ê'á׃<¥øÿXñí7ûý³Â¾"ð>¥ñ'Á÷ÞÒ?oÙ³]ñíßÄ/éRAuâ†:´Ïkßí%º²Óbø}¦x’ïYÔ4ý"ÚúöÏ‹ÿ n?؇AÒ¼û3]éøWâÖ·ñóâÖ­cû7ü ño‡~üø¦xösð?ÀÿþøK¤~ÏŸ·~£á¿„Þ/ß¾&xÒ²üÑ ¶¹§x[Å`>-|Nÿ‚—ø/áe·ÄɼyñæçÆúׯÛrÙþ ü9ý›ôkûm?Â_ >;xÏÂ?³…üâöAý |Dö~8øq¦iþ"ðÕ×Å]Ãøïay ê6¿´oÁÈ-Æ¡ñCþgíÅá?ˆ¿ ô½Jøéáø£ö“øég®|4ðìå¤hfãÃ>3ÿ‚–þÐZø¬ø°þË_>êÉÿ ÿ{àj·>+ø§û2-ç†î¥øÿkñöÔüe©ivgðkÇŸðR‡Öÿ²—½_Žš§†tŸüÓµÝoâ÷¿Xj^1øsûF|Fð¿íàïŠÓh°¯‹ìì¼)à…ºO€¦øyã+™¨h>$ºø¤|Eøéd—ð‹u>/ðPMáwÀ½CâÅoÚêèxšßö#øçñJÛÁŸ³¿Ã¨ø²Ãÿ±¹¦x[Ä´ßìÝâ/þ ´ø—wñ/_ºÓ4‡¾'Ò|{ðý~|Eû:Øß]kz]Þ½âm-´íF×HE³†K€šüû@ÁK<8ö~ÐV¾ ðWÀ5/ |9ñOÃ?øëâ.«à­ö ¶ñ§‚õ»ÙuOØ×ÚG‰?jŸÚF=3Ã~*ÐuÏÚÍ5M_Å+â_‡—³&›sqiâ} ¨øqñÿöêÕ¥×ü?ñ[Æ?¶ƒþhÿ>*Úèüû"jþ.xšÒ?€_²§ˆ¾ xVç@ñìð¦-Sá—Š>(x›ö»‹Ä;¶ý˜~Üé^#økðóáÝ÷4í#Äz7ˆ&ð¯ì7ûøcƺ.»á¿xsöPý´ø{Å:v¡£ø›Añ6ð‡Áú~½¢ø‹IÕ¡¶Õt½wKÕmîìu};S·ƒP²Ô ¸¶½†+˜¤Eú¾€>gý‹?äÎ?dÏû6ú«|+@LP@ÿ×þþ( € þzußø"ì9ûsx«Mý¨>+éß4/þýµ?hÿÃâ¿|WñEž›®†ÿ¶‡Å¥xoRðgˆfñ7ƒt}ûKðõ¶™/‚t? ko%Åî´š°Öï/oî?_á?gc÷üy/ýwñçÄ[Œý|ïIŸÄÐÿ uû6Ÿ¿ðÆÎ_úïâÜgëçxóøçõÅðÆÿ³!ûÿ<7/ýwŸY¸Ï×ÎÔäÏâhò˜Á?àŸŸ°_À¿Ûsã§À¿…¤ÿ|Wð+ö­ñßÅ?ø³WñG‰t¿øO>øþmgFðå’ɦøWAÑá²Öo´[/ìÿEªÿcÊÖ·ú­ü³]\\~ùP@ùßâߎ¿´÷‹¾ þÓãàf¡û7øSáÿìã øÆš7Ç}ÇTø±¯Ü|øSûFøÎæÓâLJ¾!xoÿ³÷„ô¯‡Ÿ<'¡é>,ñÃŒþoˆ´ÿkz¾‡§xrÏOþÔÀ¾ÿ‚¥ü Óþ#üFøQ'~(ÞxçÁ¥Ï‡ü;áýQøâ câ‰#ý ¼û1ØxwÚ.‹ñ¶ûYð«®üXø¡à;-h-?àÍ”þ×ÛÆw—Ö>Ñ_ø£ãçÄÿ‚ºÆ¿ñÆù<5âo‡¦h,ðÍÏÁI¾'kº¿†ç×u»;]LÅk£Äê?³/üSöyý¬~'ø£áW›ëëÍkAÐøGƺÊO¦Y$Z*Ïãÿ øŠ{Ï Aká]FKÏ ZÛi1y›ÿÁ7¿dIoïu | ã¹f¸‡Å°é?ÇïÚéÞ ÿ„Óâï‚þ>j÷ 4±ñLi¿ µ /Œß<ñ#º¯Ã»O j~ñO‡t»ÿ]è_d…ÐtïØ·önÓtøu<©ßØøãÂvž ñ¥Þ»ñâwˆ¼Aâ½ Óâ/Œ>,¡ñеßj>&Õu«ˆ^=ño‰5]ê²øŸT›V6ޝs¤Øé¶@ÂÏÙ¯áÁ}[ñýÄšMÞ´š½¼zn©ñ#â_‹<)ám;_ñž)Ö´_‡>ñ‹õïü.Ð5?HºæƒðçBðÆ‘1³Ò¬ÍŸöv‡¢ÚX{½P@3þÅŸòg²gý›?ÀýU¾ ¦( ÿÑþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@ó?í§ÿ&qûYÿÙ³üwÿÕ[âªúb€ ( € ( € ( ™ÿbÏù3Ù3þÍŸàGþªß ÐÓPÿÒþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@™­hº7‰tm[þ"ÒtÍÃúþ™¢ëºµakªèÚÖªÚËc©é:¶™}ö:–™©YO=ý…äZÞZÍ-½ÄRC#£|õÿ YûÿѦ~Ìÿøaþó/@ü1gìsÿF™û3ÿá‡øYÿ̽ðÅŸ±ÏýgìÏÿ†ágÿ2ôÃ~Ç?ôiŸ³?þ…ŸüËÐÿ YûÿѦ~Ìÿøaþó/@ü1gìsÿF™û3ÿá‡øYÿ̽ðÅŸ±ÏýgìÏÿ†ágÿ2ôÃ~Ç?ôiŸ³?þ…ŸüËÐÿ YûÿѦ~Ìÿøaþó/@ü1gìsÿF™û3ÿá‡øYÿ̽ðÅŸ±ÏýgìÏÿ†ágÿ2ôÃ~Ç?ôiŸ³?þ…ŸüËÐк.‹£xkFÒ|;áÝ'LÐøwàßü@ñŽ£ýáxg^ñŠuo²_êÙ~ðΕu­kšØ4»[íN÷ìZe•ÍÏÙ4ë+ËûŸ+É´µ¸¸xâ` ÿ†³ø[ÿB¯í1ÿˆ[ûcó„ þÏáoý ¿´Çþ!oíÿ΀øk?…¿ô*þÓø…¿¶7ÿ8J?á¬þÿЫûLâþØßüá(ÿ†³ø[ÿB¯í1ÿˆ[ûcó„ þÏáoý ¿´Çþ!oíÿ΀øk?…¿ô*þÓø…¿¶7ÿ8J?á¬þÿЫûLâþØßüá(ÿ†³ø[ÿB¯í1ÿˆ[ûcó„ þÏáoý ¿´Çþ!oíÿ΀øk?…¿ô*þÓø…¿¶7ÿ8J?á¬þÿЫûLâþØßüá(ܼã|Dðo„¾ x;QþØðŽ|3 øÇÂÚ·Ù/ôÿíOx›JµÖ´=Gì¥­Ž§eöÝ2öÚçìš•ý·›äÝÚÛÜ$‘(M@ÿÖþþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@ó?í§ÿ&qûYÿÙ³üwÿÕ[âªúb€ ( € ( € ( ™ÿbÏù3Ù3þÍŸàGþªß ÐÓPÿ×þþ( € ù›öKÿ’Yâ¯û9ŸÛSÿ[#ãÍ}3@ó?í§ÿ&qûYÿÙ³üwÿÕ[âªúb€ ( € ( € ( ™ÿbÏù3Ù3þÍŸàGþªß ÐÓPÿÐþþ( € þyþ&Ágÿgoø'w޼+û(ü\øMûOë¾+~Øô].×DøãÍ+ÁVÚÆÏÛ âµáoi^;ñ>‘£èß,n¼ãýÄú]—Â/øOîuy¥ÿ„h5—ˆ„Ö6à±ö©øb:ø_ö?OØãö¼oýàpþ_Fjï…㯅¿iOÃö2ý°Oê>°ÿÇœ(íeð´Ì«ûL~±oí‹ÿÎÿ˜ú ücðüãöwÿ‚Š| ý½~ øàÿí-àˆ?~ ~ÖÕ¯|[ðwÅšÂùWÁ~ø—`o.¾*ø{M¾ð׃nu;_Í<7Ä‘à›ÿí{«_ Ùiø†{{+€è΀>mý²¾2ø‹ösýj¿ÚÂ~«ø·àOìÛñËã/…ô¯Úê7ÞÔüEðÃᇊ=x¤7Á 'áN—ð›öiø›û üIñ—~,Ø~Ñ^'Õ<?ÁÏÛ#âìsÇ x[á¯ìç㉫ñšïâðûZÓ4øCL^7ðlúF¤ô_‚¿oïüLñ‹á¯†ÿ?h?j0E¡ŸŒ ¡h ŒŸ³­î³ñ³â‡ìõu£üU°¿ø§a«kz·…¾*|ø»£ø¾ßàŽñ~ Hø«øª{§ðÞ©áMKÄ@0|'ÿ‚Äü'ñWì•ð¿öŽ×üâojÚüð'ÅOü:Ô>øGá„hü_Ž?¼ '¾9|jð‡àáÄÃ÷þ#ø™â} Ã×¾/ñ¿€~xcÅ^3ñ¾¡¬iºP¶«ûoþÐajŸàÕïìÙû2ücø!à›…z¿ÃÏéß|Yðóâ_ìÉû"ü|ñO‹-~/X|fÕü kãm¿i}^ ø /‚·ºo‰Oƒ´¿ Éã]6ïÄ2x£Mæ?f?ø(?Ç?‹>x#ÆxBüYø»ûV|0þηý”i_‚^ à6³ñzÜx_öŸø§ãïü ý¥|1ð5¯Œ> üfº‹ã†‡ãï xµ¯ þØþñ…ßìãð«âÙø»âí_á—¿g/ ð爾"k^Òí4Ïiéñ#BÔô`>ÿÁQ´_|*ðO~ þÏ?|1xß ¾übøéq¡7ÂsÂ_³ÿÃÿŠ~?ñ·ÃÿøÏÆËñŒë> е»¯‡>5ñ,ºwÃ=#âŒ<3àÿÝk3ðχ'Ô4+\+_ÿ‚ÊþÌ^‡C¼ñ‡ƒþ*x+Nñ·‰üC§\xÛVýžü©_|:ðŒ[áþµñ/@ð‰>>i¾:ñ¶†Þ2Ó¼]£h¾ð…¼UñoÄñø^Ö|5ðßVÐ5oêž3ým €>gý‹?äÎ?dÏû6ú«|+@LP@ÿÑþþ( € Àñ…<-âû[;xk@ñE–«é> ÓìüE£éÚÕ­†½ jú¶…­ÙÛêV·QZêú&«kk©i:” [[ÞYÏ Ì1H€ôP@'þÖþÐ<3ûþÖšg†ô=ÃÚk~Î_´%ûiú™e¤Ø›íGᯋ¯5 Ãia½¹»¾»–k«Ë/ι¹–Iæy%vvú€8_Š? <ñ£áŸÄOƒ¿ô_øI>üXð/‹¾xÿÿÚZ¶ý¿àŸè‡…üU¢ÿkèúV»¥jèZ­ýö–‹©éºµŸö6þÎò(n"òߊŸ²ÀŸŒÞ,>ñß…u©<{•àýMñÇ…~!üIøyâíßÀ:¿Œµ¯ ]xcÄ?ü]áSÃÆ“sñǶ-®è7:~³©øsÆž(ð†±}¨xO]Ôô[ ;Oý„eí#RðV«¤x]Òn¼ eá{ HôÏŠßìl<_‚¾"øƒâÿ…n>0i–þðg‡úçÃO޾kVÞø!à¿ü7ømku¯xâN¬êú¦ðÿÆß‚õ/k7×þ'ñg†Æ‰¥x»X×­¼)áDÐÀ2ü-ÿåý<«Új~øoâ}+L²Õþø‚?‡‰ñ»ãÕÏÁYµÿ€ß¾ü)ø;®ßü¼øqðSTÖ~xà·Â7Ãú¦¥àËè5/xcÅ3\Mâ½6n€:?‡ÿ°¯ìÕðÇÇøƒáO |AmkÁþ2øñÁZ?Šþ?þпþxÇŸŒG޼aà_„¿~)øŸág‚¼I­[üDñî¯áoéWÚ>ãOèšÚ^‘¯j–W@ú÷ìeû?x“âUïÅ-WÞ.moVñÿ‚þ+ëþ±øµñkJøMâŠ__Ãxâ?Š~ é~7³øGâ?xbïÁ> Ô´ýXðeåôºÏ„<%®ßÉy­xWÃ×úX!ü×öN€x~+ø®YhÚ:h‘üløè<9â/ øgâ»ñGÂ> øá¶ø’ÚÄχÞñÇŠ… €?ÿÔþþ( € ( € ( € ( € ( € ( € ÿÕþþ( € ( € ( € ( € ( € ( € ÿÖþþ( € ( € ( € ( € ( € ( € ÿ×þþ(Äþ'þÒß³—Á-kDð߯ÿþx‹ÄÖÍ{á½âÅ_øZñš]­ƒ]èš_Šõí&ûV¶[ç[&žÂ â[¶[bþs*P´C47Åqo,sÁ/†ïÀ÷0_I‹-¥ðߊ"ŸA}B)<;ks­#6n”“øeñÿàGÆ»¯Xüø×ð“âÝïƒn ´ñ}ŸÃ/‰ñí×…n†ÚÛÄ–þÖuit;‹™´ûø ‡S[i&’Êî8ÕšÚp€þý¥?gOüIÕþ xOãïÁO|_ðýÖ±e¯|*ðïÅOëtKÏÈÑx‚ÓWð6›¯]xŸMºÐ¥G‹X‚óK‚m2Dd½Ž (Õu}Bðü s¯kZN‰o(¼1Üjú•ž›ƒNÓ/µ½@¤·³A KÔµ{Â¥¾Ë¦i××óìµ´¸•-iú…†¯acªéWÖzž—©ÙÛjn¥§ÜÃ{a¨X^—6wÖ7–Ï%½Ýݼ±Ïmso$Ï ‰,NÈÊÌr€<§\øïð?Ãñ/ÅüeøUáï†~ ñ ß„|añ\ø‡á#À¾ñ^Ÿâ˜| áŸø¶ÿX·Ð4/YxÚâßÁ×z6§¨Ûj6Þ*žMlš¼‰dÀüñ£àïÄ+Ã:ïÿ‹ <{¢xÖãW´ðn±à¿ø_Å:W‹nü? ·õ·†u U¾´×®4Kx&ŸWƒJšîM6¥–õaHÝ”Ò蜺…„¶št÷Öpê„WsXXKs w·°Øyn–ÒÕÜOsŸÚ­¾×$(éoöˆ<âžroó_üwøðÛEñO‰>"üeøSàøÄšgƒ|k¯ø×â„|-¢ø?ÆÖ‡¡xŸGð§ŠuMwX±±ðÿ‰5o x£Ã^!Ó4=Z{MRÿCñ…«ZÚËa«i÷€vþñ_…¼uáÍÆ> ñ&ãø—N¶Ö<;⟠ë:wˆ|9¯é7±‰lõM\Òn.ôÍWN»ˆ‰m¯lng¶ž2)YH4å:ý¨ÿf_…þðOŒ>&~Ñ_¾øKâV›ý³ðçÅ>:ø¹àøwÇúGØ4íSûWÁZ߈ÕtÝ{@×tëcD×4kë]SGÖtNÚ+Ý7UÒµ;g²Ô4íBÎxnìo­'šÚîÚX§‚Y"tvá¼1ñ£àï¼@|'àÏ‹? <]â‘iâ[óᯠxïÂÚÿˆƒ!øÂO¬Eâ¯ø;Ã2øw—ž<ñ~ ñ6‰£>…àm:i`Ô x‡U¶ðŸŠ|+ñsÁ?ÔÛD´Ó/íõ+‹½ÇÃ0ÞÃuq§ÉâQrp€àøÿEÐ.b¼ðo‡üwàÍ7Ãü'§ü,øMàÿ‹QØøÁÚN‰ûèþ¸ýœoð¦|Mýˆ¿hÿˆÿ°§üÿöwдè| ñ3áìãƒ_5ËŸè‹kð“Ç+ÿ‚k|Zýœôëë½OÃÚÝÍæ»g¥|\ñn‡¢Ýê ç×–]{HºM>ÛûfÜêo†~ø¥ñö–øñJûö^ñì¡àÿÙûàÅo…úͯ‰üIðOT›Å“|G¿ø?ÿß¿‡v_~ xòÞçáWÏÃkï]k-·ð+ÛjVž³ð·….N¡ãðÀ„økö#øéeá?‹Ÿuï|BÔ¼uðïö—ý¿iÙcöuÓuÏ‚^ð…÷þ,xƒöŽÓ>ø’÷âÞàüAÐÏ‹¼'ñnM^ }SÆö¶^ÕñM÷ƒ<4|Sð‚€~˜þÇÿ³ïÆ¿€V>7ð<ú·Ä¶ð]×ìÃû0[ø-¾$|pÕ¾)I¢~ÑzW‡~-xwã…·‡5/ën¼ £[A§ü¸m/Fð¼ÿ ¡½{CÁ^Õ˜øžÂPký›<#ûSøjÿÅ’~Ðþ6,±»²ÒÓÂÑŸŠþ" ئ½mNCoàoØkö@ŸLBöjeÔ5oÇuåùvöÃ%Πð¦‘áˆþ;ÿ‚k|8ðßÃÏÞübø¹ð³öÉø ã?ˆþ ð^¯àO k^#ñ7ìßÿBøwñƒö†—ÃÖßàù¼Oâ \[꺸›|cýž¿nx›öøÍð;ÀÿÿfýKã®—ãûmáæ‡ñgáG‡~!éž0ÐeCᯂ>0øâ÷Áõï†)ñÅÿî4=#ÃÿÙ>1ñÔZ†~|>ñе"ò{íà_fø)fñoã=Á?‰SÀövþ=·øAâ Oâÿ‹|A§ëŸ ïÿf{Ÿ øgáÕÇ‹~(~Ù¾&m;âìgþÝÓ~"럲2x«Iñe¥—Н?hFð4×þ»ú'áÇìåñ ?i/Ù§ãü$ý¬ü3àoø“öÑ`Ð~2þÙ7_¼WðÿAø‹ð×à¤zˆ|sa?í ã[=wÀú¿¼ñÎÓÁCÇ_´'ˆ4OWðç‰e¶ðæyiá…`÷ß¾=|8ýµ>*þÕÉðK\øÇàûO|Añ7…¾øOÅ áñ¥þ…ñöÿ‚||Òh_XxûǾð´Zÿïþο¾ _øÄþ(ð‡eðOÄ¿ëþ±Öô»Ý6ËÅ`³àƒ_´oه㿅ôïßü*ñ‡íEûB|Bñ‘ðÿIñ'…¦Õ¿fŸ†?¼u£è¾9ñ"ëžÖïüßô Ýxëö‚Ötßkž#Ð[ãOŠu/hš÷ЬØx›Tçl¯‚ŸâøËû"ø»ösð·í ¥øáÁÏÚWá~µwû$]þÅzo<kã©ÿfáðûÃöúí±{Ãi¼uaðÏ_Y%ðÞ—}¯è׺…Ÿa¥_]G~ó_Àÿ‚·—Ÿ† >Ið»â–—ᦽÿ‚¤~"ð7Ç_é>øEð³á7Å>/Ú Áž&–?Œš_Œ—∾èv·bøI ø‹ÀÿuköÞñ±oi”áþ ÿ‚xþ×ÿüsªü µøÅáïß~Ïß°çÃßi—_´ÇÄ¿ˆšÇ‰¬¼ñËöÁñgÇx&ÆãöÇøGƒ¨xwNñ¯À¯YÞè?¾i§ÃšÇÄo |8ñ„š¾(xgÄ€¥cû-ÁIaøCñ/YÕ>&üsÕ¾&Ç{ðHð†o~2øŠåø1ÄÙóÄžÑíþ,j?l/üpñÂX¢ñ³cÖ©áË®xBóÅQé5ižÐüYâ G¼Õu-<Ý>èÞ5?´'í9ûVüFøgªþÍüoð‡à€âð§Ä¿|1jº‡ÀÛÿŽþ'ñ—ƯËðÏÇ|áý6ãß<+àmîïÆsxô/†w7,‡OÐì¼idÕÿÁ;ìu};ö ýŽ,õ‘åMìÓð`é¶XÍa¡øf_èSx?Ã:àžRÚ‰ü-á94_x«ÄÚh–¾'ñ&—ªxŽËÂÞ³Õ-ü1¤ÿÑþþ( € ( € ( € ( € ( € ( € ÿÙrocksdb-6.11.4/docs/static/images/pcache-tieredstorage.jpg000066400000000000000000002306001370372246700235160ustar00rootroot00000000000000ÿØÿàJFIFHHÿá˜ExifMM*V^(‡ifHH  ´ Éÿí8Photoshop 3.08BIM8BIM%ÔŒÙ²é€ ˜ìøB~ÿÀÉ´ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÛCÿÛCÿÝ7ÿÚ ?þþ( € ( € ( € (óÎ÷þ ½ÿïÓ¼Qqà«ßÚ£áõ¿‹mMןáǵñWöºGc¨.“wqöðëNövú›¥„·¨­h·O&mî€>Ûð/ÄüLÐî|Kàiž*ÐlüSã¯ÝjšLÆ{H_ j€…òà›>'ÿÁA¿bσdø=ñKöŽømை6z†´mwHÖ5K„Ó¼¬xÊ+kèÿ|S”þøs«xžÖöÆ÷@ÓÓõÝCÂÚÍÿ†¯Œ·~ñV”–Òê~ñf‰w¦»á/X[ÞY]^xsĺf•­[ÙßX^Éb¶·Ö³JíTÆ]|CðEÄ- á=߉´«‰'ðg‹>!è ’|kz·‚< ­ø/Ã~/ñ5¶ÜI¥x^ø‰à+RŸxònüM¥F¼òT³ €?ÿÐþþ( € ( € ( € (òÿRÿ”Ñx/þÑñ;ÿZ·á~!xÛö¡ý£µ=Ãþø±ñûCøyðoÿÁQ‡‚þ"üdÿ‚„ülý€l|OñsáŸüöŸø_áß鿾|øÇ«üDÖþü ð·Ã{…³çˆ5-ÂZŸ…µ-^i| ñ3Mð”õMGÇ_ðPx7ö™ø™Çï‹$øõû5ÿÁ$¿aߎ> ømð®ëǾø}ãÿÚ'⦛ûeYø¯ã.³ðŸ\øiá¯jšÿü"ÞÓ¼SÁoáN“¤ëþ7‹MÓÃð;TñÁßøIbø¥à]Åö¥ðÃZe—ÆO |>øK§@u»°Ñ?ø&¯ð;Vÿ‚‚ÿÁIìçûDø³öšøI§üÿ‚véÞøƒâoŒÚÿíg§Ïˆn«kÂ>øõâwÆ"ø§á­Xº¾º›YÕ¼wã{¯ø«Tñ?€ÿ·mbðŒ~Ð@>Hý©þ'üøCªÿÁI´¯~Ö_þk¾2ñÿÄ/|rÿ‚bÁC>ø Æþý¬üY©|0ð—… Ö¿g_øãÀßõ~ÔðÇ„4o^x^ûãV—gão´i¶ tÝzÏUðLŸÅoë? ¾=Á@>%øCÇ?¾|xø£û7Á-|âï†rücñ¿Œþ!|ý—>!|g‹Â¶÷Å?†5ý[XÒ5»ÏÙGᦧ¯ÙéÞ6ðÿ€¯,¾ kßˡţOâ»Ý_è¾"ügñ%Æ·ñGáìûoüdø“û*ë_?à’> Òÿi-/ãö¹ûKÝ|,øãñÿöà·øcñçá¿ÂïÚǺ×ÄKŸˆVšÇÀÛŸø¯Æ¿ ¼EâøKÀZ¿ˆ,tË­3IÑüu}á8€8¯‰_ÿjŸ†¿µÅ߃úGí#¢x3â¯Â/ÚCàOÃ_Ù#Â??à ÿµ¾#|ÕtσW:}ì_±W…ÿf‹zí©áïŒÚÖ½ñOø“ñ¯ÄÞ4ñ'‰ü¨Iã.ÓÅ¿t¯ƒ«s¥€ZÔP@P@P@P@P@Pæü¯þM¿àÇý¤Oþ uÿ¯ ý›¨äø)·ÇßÚƒá‡ÅÏÚ{Køãiöþý‰ÿcŸ^éñø£^д‡Þñ×íãã/‡¿´oÆk­ÁŸîü®øcöy‡Äš–«ñ3Døoãox#Bð¼^/Ó¼+â«¿ ØøwPø»À?h߉^&øað¯À¶e´_³×ÄÏÛçöyøe¥xÇöZý¸¾:~ÛÚž‰§x‹öGýµ<_ûB|6Ó?lÏŸ>Üø»Jñ=ƒ>x‹Gð¶›â?ŠºÇÀϽç‰mµŸj“x'IÐÀ4.þ4~Õº/í…⟇iû@ø/Á_¾~Ü þüøUñGöôý¦n~&øËöM°ñgÿèQ]Á?´/Ù—âƒþiŸ uo he‡Å_|DÕÿàžZÇÄÿÚ÷âLjÿà šçü×ὯíGû&k_üMâ á•æ…ñ+â,~¿ý”ïµ;¯ |ðÃ9bð¦™ðÏ⧇¼à«ÿˆº]߆%Ôükãäø‡Æ úóûaxóÀß³ÿüWö/ý¤?i=gJðoìÇ¢þϵGÂâï&‹LøUðKö˜øãÙïXð®·ñ Åš›Áᯇ“|HøMàωþðŽüG{¦iðÝ®³à‘©ÃsãÛ{KД¿ißÛ/ögñƒh_гÛÞø'Ài¿ÙàW¿nÏ„´/Ùïà÷í âË}#Z¼×4ï~×úO‡|UuðëáÃ? h^ø?ãŸÚ3á}¶¿â†Öü[cð“áî»kâÍäÒ>0øûZÜøšm;áíûgkÿÿc ~Ýß´_‚¼{ñ«Á¿¶§ÅýR=²çÀ_‹?²w­göûñÅÏÃÏMð_âoŠüuñ³Ä¾4ëÞ&ðMßÅë¿xSÂ^ñŽ«á-VÃI×>¶ð?Âß ~Ó?µÏì3áÍ;ö™ý¯%øÉá_ xÅþ3ÔüQ¥x«Ä#Å  jÿµ—íEâ~Îv_i=á×€—ö мEð·âïÅ¿Ûßãì%¬üVý¢4ŒŸþüGñM¯ˆ¾ þÎâýª~3x+Àþøª·ÀÅý‹ª\xÞ÷ÄiðÓâ×5!àpëömÔþ&k_³¿ÀmcãM擨übÕ~ |1Ô¾+j:®øoCÔ~#ßx+D¹ñ½þáÏhñ7‡´ËßË©\Ùh>!ðχõ½ÚX´ÝWFÓo­®,âÿÑþþ( € ( € ( € ( pß"…äZcÇ›|ÄGØë"oPÛ$\íuÈ;]rv°Áàó@ ÿÒþš>.ÁU4üNñß´ßøBl-|â}WÂÊu»=[XÔo§Ñ.žÂëP–K?øv 8¯.àžK+5µºd³û<²ÝÉ,²Eê™G‡K0Ë0Xê•qr–3O•eNœcUsB+žiIÆ-sJé9](Å%)|eÆrÂc±XJtðÑXjÓ¢ÝUR¤å*o–R|¸Š*7’v,¬¬Üµn>uÿ}¶ÿ ‡Ã<عâêÄÏ·ëžÕéÄ/¥üÙ‡§´¡ùýOñ·Ë¡Ãþ½Õþ\þ ­ÿͯoöÒ÷ø{í·ý>öÿ™sÄ}3ÿ òãê ñ é6aÿƒ(ó#þ»^áþ½Õþ\ÿ¸U¿ù·ËôÒÜÁÿ}¶ÿ ‡Ã<عâêÄÏ·ëžÔÄ/¥üÙ‡§´¡ùýOñ·Ë ¯u—ÿ‚«ókÛç}´½ÃþûmÿA†}¿æ\ñ_LÿÂÄü¸úƒGüBú_͘àÊüÈÿ®×¸¯u—¿îoþmòý4·0Ãßm¿è!ðÏö.x‡ú±3íúçµñ é6aéí(~SümòèëÝ_åÁàªßüÚöùßm/pÿ‡¾ÛÐCáŸoù— Ñÿ¾—ófø2‡ÿ2?ëµîëÝ_åÁ/û…[ÿ›|¿M-Ìð÷Ûoú|3Çý‹ž!Çþ¬Lû~¹íGüBú_͘z{JŸÔÿ|ºú÷Wùp_ø*·ÿ6½¾wÛKÜ?áï¶ßôøgÛþeÏuôÏü,O˨4Ä/¥üÙ‡þ ¡ÿÌúí{‡ú÷WùpKþáVÿæß/ÓKsü=öÛþ‚ ñÿbçˆqÿ«>ß®{Qÿ¾—ófžÒ‡çõ?Æß.þ½Õþ\þ ­ÿͯoöÒ÷ø{í·ý>öÿ™sÄ}3ÿ òãê ñ é6aÿƒ(ó#þ»^áþ½Õþ\ÿ¸U¿ù·ËôÒÜÁÿ}¶ÿ ‡Ã<عâêÄÏ·ëžÔÄ/¥üÙ‡§´¡ùýOñ·Ë ¯u—ÿ‚«ókÛç}´½ÃþûmÿA†}¿æ\ñ_LÿÂÄü¸úƒGüBú_͘àÊüÈÿ®×¸¯u—¿îoþmòý4·0Ãßm¿è!ðÏö.x‡ú±3íúçµñ é6aéí(~SümòèëÝ_åÁàªßüÚöùßm/pÿ‡¾ÛÐCáŸoù— Ñÿ¾—ófø2‡ÿ2?ëµîëÝ_åÁ/û…[ÿ›|¿M-Ìð÷Ûoú|3Çý‹ž!Çþ¬Lû~¹íGüBú_͘z{JŸÔÿ|ºú÷Wùp_ø*·ÿ6½¾wÛKÜ?áï¶ßôøgÛþeÏuôÏü,O˨4Ä/¥üÙ‡þ ¡ÿÌúí{‡ú÷WùpKþáVÿæß/ÓKsü=öÛþ‚ ñÿbçˆqÿ«>ß®{Qÿ¾—ófžÒ‡çõ?Æß.þ½Õþ\þ ­ÿͯoöÒ÷ø{í·ý>öÿ™sÄ}3ÿ òãê ñ é6aÿƒ(ó#þ»^áþ½Õþ\ÿ¸U¿ù·ËôÒÜÁÿ}¶ÿ ‡Ã<عâêÄÏ·ëžÔÄ/¥üÙ‡§´¡ùýOñ·Ë ¯u—ÿ‚«ókÛç}´½ÃþûmÿA†}¿æ\ñ_LÿÂÄü¸úƒGüBú_͘àÊüÈÿ®×¸¯u—¿îoþmòý4·0Ãßm¿è!ðÏö.x‡ú±3íúçµñ é6aéí(~SümòèëÝ_åÁàªßüÚöùßm/sï¿Ø»ö¼±ý«tM :/Û¼ ©èP\^ø|]ç]ÙxŽÓQšÄ=•íö©=õ´ú>£ê5¨f·k9ãòå·O‡âÞÿVë`ã•e e*²Q¬£í!:2‚Ÿ½Â3„•H8û‘i©§{FRú¾Ï¶ébe(SŒðÕ)ź\ÊXÉÇÝ”ê8É8I5í$šåj×hûj¾Dú  € ( € ã¾!øÏOøsà?xûU¦Ó¼ásÄ×–ñÈ"’êN¸ÔÒ)Y$Xåº0 x‘•d•K)PEta0òÅâð¸HÉFX¬E ËÎ#Ìû,¼•ýyx_MY9ævÖJt ›ïÉõYòÝëËÏ+mÌþ#ó—ÇuÚ†+¤\*ͯ.eŒ‚•»òÆûòîð÷Ûnú‡Ã?ü'¿ìõ£þ!}/æÌ?ðeþdÿ/ÒKý{«ü¸/ü[îÿ}{þÞÐ?áï¶ßôøgÿ„çˆ1ŽÇþJ'áõïÆ(ÿˆ_Kù³üCÿ˜ß鵺Ü?׺¿Ë‚ÿÁU¿ù»úþöñ?áï¶Ýõ†øNxƒÿž&8}ÙëGüBú_͘àÊüÉþ_¤õî¯òà¿ð]o»ýõïø{@ÿ‡¾ÛÐCáŸþž Æ;ù(Ÿ‡×¿£þ!}/æÌ?ðeþc¦Öëpÿ^êÿ. ÿVÿæïëûÛÄÿ‡¾ÛwÔ>ÿá9âþx˜àõÿg­ñ é6aÿƒ(ó'ù~’?׺¿Ë‚ÿÁu¾ï÷׿áýíþûmÿA†øNxƒìä¢~^übø…ô¿›0ÿÁ”?ùþ›[­Ãý{«ü¸/ü[ÿ›¿¯ïoþûmßPøgÿ„çˆ?ùâc€G×ýž´Ä/¥üÙ‡þ ¡ÿÌŸåúHÿ^êÿ. ÿÖû¿ß^ÿ‡÷´ø{í·ý>ÿá9â c±ÿ’‰ø}{ñŠ?âÒþlÃÿPÿæ7úmn·õî¯òà¿ðUoþnþ¿½¼Oø{í·}CáŸþž ÿ牎_özÑÿ¾—ófø2‡ÿ2—é#ý{«ü¸/ü[îÿ}{þÞÐ?áï¶ßôøgÿ„çˆ1ŽÇþJ'áõïÆ(ÿˆ_Kù³üCÿ˜ß鵺Ü?׺¿Ë‚ÿÁU¿ù»úþöñ?áï¶Ýõ†øNxƒÿž&8}ÙëGüBú_͘àÊüÉþ_¤õî¯òà¿ð]o»ýõïø{@ÿ‡¾ÛÐCáŸþž Æ;ù(Ÿ‡×¿£þ!}/æÌ?ðeþc¦Öëpÿ^êÿ. ÿVÿæïëûÛÄÿ‡¾ÛwÔ>ÿá9âþx˜àõÿg­ñ é6aÿƒ(ó'ù~’?׺¿Ë‚ÿÁu¾ï÷׿áýíþûmÿA†øNxƒìä¢~^übø…ô¿›0ÿÁ”?ùþ›[­Ãý{«ü¸/ü[ÿ›¿¯ïoþûmßPøgÿ„çˆ?ùâc€G×ýž´Ä/¥üÙ‡þ ¡ÿÌŸåúHÿ^êÿ. ÿÖû¿ß^ÿ‡÷´ø{í·ý>ÿá9â c±ÿ’‰ø}{ñŠ?âÒþlÃÿPÿæ7úmn·õî¯òà¿ðUoþnþ¿½¼Oø{í·}CáŸþž ÿ牎_özÑÿ¾—ófø2‡ÿ2—é#ý{«ü¸/ü[îÿ}{þÞÐ?áï¶ßôøgÿ„çˆ1ŽÇþJ'áõïÆ(ÿˆ_Kù³üCÿ˜ß鵺Ü?׺¿Ë‚ÿÁU¿ù»úþöñ?áï¶Ýõ†øNxƒÿž&8}ÙëGüBú_͘àÊüÉþ_¤õî¯òà¿ð]o»ýõïø{@ÿ‡¾ÛÐCáŸþž Æ;ù(Ÿ‡×¿£þ!}/æÌ?ðeþc¦Öëpÿ^êÿ. ÿVÿæïëûÛÅüúßòøgì‡<@?_øX}¸çŽˆÿˆ_Kù³üCÿ™?Ëô‘þ½Õþ\þ ­÷¾½ÿïh~¹þÏ¿­~:üðÅ{;Xl¢ñ~—qröÖ³µÍ¢]éÚþ‹~ÖSÈ«,–3_i·3X´Ê³}’HDÃÍ·òìï,y>kËe7SêµTTåY8Μ*Ú7iMB¤Tìí̬¬}öWŽY– QPúÄœSæJQœ©Ï•èÜy Üo¯.÷ÜÿÓôÚÇH´ÏÇ‰Š±/ñSƇÿ+wKǃ‚sŽN1_×Üð¶Aç•á:Ó¥´º¯.ýïæî%•³üá%ÿ3 Eõþý´óù»nâ´gÏŸØkýÓÔúÃŒtžGLŒÍô¼¿áÿÛG¯–Û_G/ ™¾úþŸÖ¿Ÿ•í#û ºÝý;ôì;wç'æÆ(äÒÛï+íåÑõ÷Ÿ>þnï_–š'¶—ù¸­âa¯÷OSè1Ðry0:31Èûþý´zùmµôr\Í÷×ðôøvµüü¯iØKýÖïéß§aÛ¿89?61G#þ–Ûÿy_o.¯¼ù÷ówzü´Ñ=´¿ÍÅoû ºzŸCøqŽƒ“ÈéÑ™ŽGßðÿí£×Ëm¯£’æo¾¿‡§Ãµ¯çå{HþÂ_î·Ný;ÝùÁÉù±Š9ô¶ßûÊûyt}}çÏ¿›»×妉í¥þn+xŸØkýÓÔúÃŒtžGLŒÌr>ÿ‡ÿm¾[m}—3}õü=>­?+ÚGöÿu»úwéØvïÎOÍŒQÈÿ¥¶ÿÞWÛË£ëï>}üÝÞ¿-4Om/óq[ÄþÃ_îž§Ðþc äò:`tfc‘÷ü?ûhõòÛkèä¹›ï¯áéðíkùù^Ò?°—û­ßÓ¿N÷~pr~lbŽGý-·þò¾Þ]_yóïæîõùi¢{i›ŠÞ'öÿtõ>‡ðã'‘Ó£3¿áÿÛG¯–Û_G%Ìß}O‡k_ÏÊö‘ý„¿Ýnþúv»óƒ“ócr?ém¿÷•öòèúûÏŸ7w¯ËMÛKüÜVñ?°×û§©ô?‡è9<Ž˜˜ä}ÿþÚ=|¶Úú9.fûëøz|;Zþ~W´ì%þëwôïÓ°íßœŸ›£‘ÿKmÿ¼¯·—G×Þ|ûù»½~ZhžÚ_æâ·‰ý†¿Ý=O¡ü8ÇAÉätÀèÌÇ#ïøöÑëå¶×ÑÉs7ß_ÃÓáÚ×óò½¤a/÷[¿§~‡nüàäüØÅú[oýå}¼º>¾óçßÍÝëòÓDöÒÿ7¼Oì5þéê}áÆ:O#¦Ff9Ãÿ¶_-¶¾ŽK™¾úþŸÖ¿Ÿ•í#û ºÝý;ôì;wç'æÆ(äÒÛï+íåÑõ÷Ÿ>þnï_–š'¶—ù¸­âa¯÷OSè1Ðry0:31Èûþý´zùmµôr\Í÷×ðôøvµüü¯iØKýÖïéß§aÛ¿89?61G#þ–Ûÿy_o.¯¼ù÷ówzü´Ñ=´¿ÍÅoû ºzŸCøqŽƒ“ÈéÑ™ŽGßðÿí£×Ëm¯£’æo¾¿‡§Ãµ¯çå{Kú5ÿ‚Ù‹/þÒèõ¿…ƒÿ^><°ì=ùÏ5ø/ŒŠØ¬‡þÁñý?éæïü4³ëy~½á“¾7ôÿ éðWÛm—–¾Gïý~,~ P@P@:þ×C?³DZëð¯ÆCóÑ®…z™ü޲ûeÿú—DàÍäW™Ø3ÿQêz~qü*ÿa/÷[ôôÆIÆ¿yãkû?‘ÿJÝzjû_i_N÷?˜yöì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹‡öÿu¿OLdœ`{ñמ1¶ŽGý+ué«í}¥};Ü9öì´ZùuÓð¶¯í;.Sû º}¸üãÿÀòK¿áÿÛ¿–žN×AÌüôùô¶¿ƒøzkk¹ŸÚOü:/'öýŸ"ç÷~ñ s×åñ׊‡8Ç?‡å_ɼ~­Æâ툣ÿ¨˜_Ïï?¢8?þI¬«þ¼ÔÿÔŠß¡ÿÔú?ö˜ÒüÿÚãlÛs¿â‡±Ç\x‚ø{÷è2;â¿°ø1Æ)دÿòšÿ?êÇó_ÉAœØÃÿ§õÿ x‡ö7\§Onƒ=xàû€Tw–¯¥<0þÆÿcüþלmÈÉSôþ¿¯øØÝr=º õãƒîQÜZ€ìoö?Ïà=yÆÑ‘ÜŒ•?Oëúÿ€ÐÞý•¼M㯠ø'ÄVþ4øcá×ø“âmcÁþðÿ‹|My£kÞ*ñ‡w¢XÝéÚlcE¹Ò£’Kßh¶¶ï¨j¶I5ü[¥E.Ëó˜þ&Áåø¬f¦1¯ý†£‹ÇWÃaéU¡†Ã×iÂ¥Fñ¬í =iJ4¨ÎIAïxžÖ#Äãha«Cƒ¥õÊõ0øZuëNJõ©J”eþêTãyV¥¹Î ¹k%«­üø…¡¿‚­[Ã×Z¶­ãÍ\×t]Ãöך޹·‡ÏQšâÚébó±ÜG‚ÁWÄÐú¾?õSžcW‡Uéeð©k¬7RœêMQµyQÂÃ^e“§Î íÂ䘜]}¶õ©N:xªÎ•L\¡?g/b”%ÇÚ§J5+ÔÃÒHÊœfåòù]ßÃXéWzõß„¼Cƒc©M£ÝëÒhÚ‡ö¾­o;[M§K¬ _ìÑ{Ê´-mö7zìé^¼1¸:•cB¬<«Îš­ µ?o*RŠ’¨¨ó{G¥ÍËk;ÝkÍçK ‰…9V–²£ºR¬éÏÙ*‘|®¯/'2––æ½ôò:Ï|ø‹âKù´èü3©éCÁ>)ñþžž Ó5M$k¾ðŽq®êRx|K`í¬\ÜYÛ4ztV¡»º–EÂy¡ë“œåØjq©,M*±xÜ._7B­*¾Ã‹­Õ{M*QŒåzœö”b¤ìíc¦†Y¯7B¥9}R¾6 ´*Sö´0ôYº7§ûÆãC•ZRi^7÷¹h¾øÚ}_Rðü ñLÚöo%Þ¯¢EáíZM_Jµ…#’kKLK3yco M’MsQ$rÆÍ"£©®§ŽÁF1Öõ-RYmôí;Eðí‹]JÓ˽ԟjÕo›Oðþn°½×õ*Åž»yàœf6Ž4%UNRÄâðø:騺•+âj(EEJP\´áí+ÖnK’…µ“$ë …©Š•UMÆ*†¶&¬æÚ„)Pƒ“m¥'Í9rR¦­ïV©NÊMÇÙ> þÈÞ2ø’O|3ñf¯ð²+kŸˆ¾ðw‰îµxJÆëXÒtÔu/RÑ´w¹²‡Y×4›¹´§Ô>Ä×°Kt°ÛmãTò }JÙ¥ÜÁäð©S9ÎJŸ-7?á8Ân£©N­;EsÓ‹’µìxçŽ~k_|_â?øŠ+a¬xcU»Òo^Êoµé÷-m&"Ô4ËÀ‘-î—¨Àb¿Óo;Ë ‹{¨ð“ öp8ʆÃ9:8šQ«xòTŠ’Ö!váVœ¯ nðœekf/ W‰­…®’«B¤©Ï•óBN/IÂ[JœÕ§NKIBQ’Ü屿Øÿ?€õçFGr2W«ôþ¿¯ø8cuÊtöè3׎¸Gpyj?±¿Øÿ?€õçFGr2Tý?¯ëþö7\§Onƒ=xàû€Tw– ßïø"5¯Ù4Ú91ÚŸÂÆÇýºøüð==:c¯øŒÿïyýƒc¿ôîý{Ã÷|ßþ¿a?ôŠýàiçs÷j¿?R ( € ( žkQŸÙ“ã¸õø_âñùéêdò:Éÿìi—ÿê]#ÏÍ¿äU™ÿØ¿ÿ¨Õâ·ûýŽzôÏøŒ¨îÎkûSúþ¾óù|?±½SôüÇCÐt?6{K?±¿Øç¯LÿˆÁúŽà æëúûÀ?±½SôüÇCÐt?6{K?±¿Øç¯LÿˆÁúŽà æëúûÀúWÆ_²'Œügã Å¿ üI¬|=ÒtM{Ç^ðljîîüUá}ÄšŸcªß階‰¦-Åœw¾(ÐmïdÒ®u,Î¥ “¤pù“¯Ìห©„„pÙ†ž>­z,N'N8lMl<1*R…ZUë8Éà ^PöЦ§ìÚƒrg»ŠÈ1xXbdë`ëO Uq4hÖœ«Q§Zt¡ ’…JTÔ¢§^’—³”Ü9×5×½4ñ—À¯ø#Ä<9¨xzûS†>#Õ|/ã s@°Ôµ_ iz¦“y-Ϭ¥”vð[I<,m¥½û#Ë "RÅ+ÔÁæø n/ÄÓÄS¦³<=,N…z”©bjÓ«R6 æå)%/ySæIÝkkGƒ—bðµ±”gFsúz”15iBu(S©Nn½U¦ÖŽ\·VÑÜÁŸ¼wãoÀz6—áíBΉ.Ѽáok–Ÿákí{\ÔáÒ­mƼld´tŠê\ÝSu<1E;£pÏ›`p41õª×§7–á+cqXz5)TÅS¡B›«9{u$ÜW¹Î¢›i]Ý —âñUp”©ÑœV7O ‡­R… Ö«?gí\\t“÷¹yšI»;5>ïÁÍ“Ú ô›åMJ{«}*qgrmµy,ïO¹TÍÍAb¾­\Ú—)p ‹6Q{!^E'°nœc*±ç58*‘ö±NôïNJk›–ñÕi©Í*U#ËxNÓrTåË.ZŽ2p—³m.{Ir»7g£³G´x»ö\ñgƒ´]Þø£áÆ«âO‡ñØËñáÞƒâØõx ;ëûM%¿·tÃe™y&™¬_ÙiôÖ|AuáýBá`Öb°Û;Åáàø—¯…¥O§Gê,¿[ ìðXçNªö3ö®¬Z4çW,MÄS‹t]KÆ'«‰ÈñXZ5êN¶UpŠ„¥_Ÿ…Sš¦ý¤\#NnIFhЫVTg+TQÕž[wð£Çz~«¥èwÞñe–·®D³èº=߆õ‹}SX†@Æ9´­>k%»Ô"“cl’Ò‚°N·Ö†?R•Zôñ¸IРÜkV†&Œ©Q’µÕZŠnÚººœ“òÖÇ,&.)Ò–µ’t©JHÔªžÎœy¦ŸGïÿ¤ôðþÏßî<)©ø² j³Ã£x®çÁúƇ—ªËâ+R³Ñ¿{u¨è‰§µÅŽ—ga´]_]4^Dì±Éî7,³¬¶8ªXIb¨ÆU°‘ÆQ¬êÑXj´§_êð:î§-J“©ðB*\É^/x +ÇKX*¾†'ÅfU)N…(J¦"œe^r‚—Õp“Æbd¹iÔ¼(QŒMŸ5jJ7朡fÙ{Ư¯|3Ѽ=ªø+ƶ5Ëïø'ÅžñÔ|+y«iše§ˆ-õ Û«ýxtkmöµ&§¦[-¦—rš„~u«jaÄx‡Ì«Ö§‹ÁË)¡ F7 Š ¡‰…*°«:§T©J·Ö”èªUgÍV.›JjÒ¹ä˜Å[Fœðøˆæ5eC ^…W*©NTãYNRŒ'OØûXN«©¨Ó—´\Ðw4´ïÙâ–«®|jðí” ú§Àê^*ñtI«]_AÓ”ÜCÁò%›z+ý-WZÒ°-›PÒ¥†âÊ‘TT〉• “9WT³ìE<. û%zuêZ>Ϝ߰tê?cWãöuS‹i&ʆA˜T«šQŒi:™E×įiñÒ‡½Ï‡vµU:kÚSøyé´Ö®Á¢þÈß5χ¾ ø‘l|7o£üB> —Ú¤å~(Ë0ù†3-ŸÖ|À¼eHÓ‹¡B9…jhÊSu"íMâhOãéR¨ª;Æ3q)d8úØ<6:>ÅRÆ}oêЕGíª¼:µ*(Ñ«ÍP« )Í{J‘äI·çOìoTý?1ÐôÍžÀƾˆñCûýŽzôÏøŒ¨îÎhþ¿¯¼ûÕ?OÌt=Cóg°± ûýŽzôÏøŒ¨îÎhþ¿¯¼ûÕ?OÌt=Cóg°± ëóþ Å•û| ûš/‰—ŽœxóÅCüþ•ü‘âœcžØM/Ç ‡~×mè®ÿ’k)ÿ¯5<¿æ&·õç¿SÿÕûkãî’·¾0LSvÿ‰¾9$óÿC.¤1Áþ\‘žkû#‚â¿Õ>ÿ±^ ÷Úšô·oóØþiâgÿr—ýâmæý§^ÚÝjÖý,œ¼“û çŸÓ¯Ôw=?Eî+éùW§ã·õúôG„åÛ£üú_ñO­¬ùw‘ý†¼~ïÔwéø6Iç9õä‚å_{O¾«î·ü»šú/’óìûjšÕ«ß¥“‘ý†¿óÏé×ê;žŸ‡¢÷r¯OÇoëõè„åÛ£üú_ñO­¬ùw‘ý†¼~ïÔwéø6Iç9õä‚å_{O¾«î·ü»šú/’óìûjšÕ«ß¥“—Úÿ ¿h_|>øð»Â×¾#ø½á]gá§Ž¼_ã ûOü(ðŽ4OØøšç—ºHñŠüa¦]xnæÖ/ ÞZ]χõŠêé*¬Íh°×çYï cñù¦qˆ¥ƒË1T3,¿„¥<^?2ÂÕÂO O ÔTpx E,Eå^Jxšöi]s·³Ê³Ü—Qž+‡«Æbq!‡Â`kÓÄB´èJuq8ºS¥eFi¸Q©u;¥îÚpÞüøq­x~_Ç¢|IøW¡jŸµO®x#M°×µ¯µÏí âÿúN…¡Áÿ ?‡nüKàÉô/é~×ç:LJoƳ¥¥È²¾°±Ù~ãÃ9­ ðÆNX,έË Œ• ]JÔ)cxk“Ö¯VtðxŇÅG‡­^³¡‰§ìêÊ›© NR‚yæ_V”°±Xœ :˜*øeWuªa¹³ÜVeN8φuhJ…jtªsUÃÔs‚’Œ£Ï{Å´Ÿƒ|fÐÝKwñãÀ¾ñ>¹â/ÉàA¢Cª|TmWá·€þ[ê5·Ö¡¶ðߊ®dð×ÍUŽ"QÌ0ò•:5*òâ!R´¨ÂáÍ3¬&a–ÑÁÆZUptr¿a8B 8Š”rúq[ Z¿³±éTN…Hâ(Â…8ÔV©âÓRSÓ ÄXHB¯Ö)ÔT2n|LhÓ­VU²ÚUéÎ/…÷_µƒ£VU§(J ›%Èéxσ|I¢ëºí(5c§ønóã7€|U.…5ìÚ~“ii­X|GðÆðçYé–Í­iÞ Ôt6ÊÚH¯µ™ôM$½··¯¢Çà+a¨ðŸ,jbVQ™`a‰PJ³”*e˜Ì¥byy§Qªñt±''7 Q«V¤š„äxøLe*õ8†îe˜àqR å(RŠœ1Ø|ÅÐM(Ó‹«K :Tâ¹cR£§J<¼ÑRö~ÔÞø‡ã?Q|BмR~ |V¼ðýæ¥øGáÇ€ô?‰Úkh=ð—‰<¿jÚ&±¡ÞøŠ Ÿ é^+Ó~Ç®x›Ä6ð‘_øvôÂ'³MNÃç°œË09#RƒÎ²Èb£Yâqyl ž#.ÇP‹¡J­p„%x`Û~Ò¾½ð}¯„_ÂÞ*µñˆþø·ÀŸ5¡Žtïø¾_Eð—àδ÷wZÅ™‹Fð§Vy<]«ê%o#ÖüE®ÜÃo{Bâ^‡Â8Úy‡ÖjÁà3|3, •~|6 X÷›çÒ…)óÖ­‹ä†œ#£F”¨Êñ†_ëxO`©ÕŽ+—bðØê¯Ùò×ÅDzì¶nR«Jtès:ÓšRu*Ô”c%ïOÁh ÝÅÿ¼Oªh7vú¶§ÁáŸØkVn&²×¡ð„´ÿÂAep‡V:ìž}bÎìûeµìW!Îd_©á|-\.G‚…zs£V£ÅâåFiª”>½ÄãcBq èGRƒ¿,¡dß-Ï>ÄÓÄf¸©ÑœjS§õ|2«îÕú®ŽU#'¼j:.¬'ö“»åºrñ¿ì5ã÷~£¿OÁ²O9ϯ ¾ÿ*ûÚ}õ_u¿àõØòy¯¢ù/>϶©­Z½úY9Økÿ<þ~£¹éøz/qG*ôüvþ¿^ˆN]º?Ï¥ÿúÚÏ—yØkÇîýG~ŸƒdžsŸ^@8!NU÷´ûê¾ëÁë°ù¯¢ù/>϶©­Z½úY9Økÿ<þ~£¹éøz/qG*ôüvþ¿^ˆN]º?Ï¥ÿúÚÏ—y~ãÿÁí—í ]»®¾6?í—Ä1ïØsý9üýãR¶3!óÃcßþUÃ|¿­z°x`ï‡ÍÿëöÿH¯¿ô¿'/Û*üDýL( € ( €>|ý¬fŽc×á—‹ç¥\ õr/ùäßö5Ëÿõ.‰çæßò*Ìÿì_ŒÿÔj§òý†¼þìûuëë÷°:óÓ¦rýµÊ´òÿ;ïê»}Çòê—[úùiøëÚÿµºa¯?»çñÿu cÑ«úóóþ¶]…ͪíþ]|ïÕiku^ða¯?»>Ýzúýì¼ô霃Å«O/ó¾þ«·Ü5.·õòÓñ×µþ+ku þÃ^wÏãþ8ê=@Ç£9Wõççýl» ›UÛüºùߪÒÖê½àþÃ^v}ºõõûØyéÓ9Š9Vž_ç}ýWo¸j]oëå§ã¯küVÖêGÞÿiχ~4°ø› ¤ÿµÑôqá_‰–/5[ËDø›yŠÒ ߨEª®ƒâëuÑük¥á½#L³Òîí¦C"ÍÍhàéàeº£¯áü=\tªb>³–ÔÊ#F5~£MàÚ¯J”«áÝLF_*8е*INê%VâL¾®&x´ñ°TqyÍha# .†:ŒêJŸÖçõ¤éI*Š•uNŽ25(Â8òÛš<Æ‹ûCøLñ¯Ãˆ^üvIü5¨þÏgÅ 4›m†Z^ð%<7 ådž¾Óâ&(»×ÿáþØð®ŽÚoƒ[FÖµÍ_ûc\ÔÁy®û+pÆe<&mT2™{zµLíb]Ö¶G ©,G²¯Rlt«Ñ£EB8Ö–}‚†#/ź¹ŠTªä~ÛMQ† •<©ÑUeNø–ë¹ûR•9ÑÂ*5*Tr«8¾iùÅOŠV_¼Sà_ˆÚN‰{á¿x_KÑôÝ[ÿaXþéóx:ûþ(믇öK«ÝK¦hú–‘¥ÇŠ<1ö; {&±¨Z]ê]wµ÷rL—àóL¯V5°¸šµªPÅ)ÍãêÇKý©c*ºPZ´ksGˆæ©RT*u!IP‚Ÿ—™æÔq˜œ>„E táZ‡$V›ÂÔýݰUe(BpQuhrƪêNŸ¶”£ÑkÞ0ý›5_xƒâ²x+âÞ«ãOxÒÓÆwž¹ŠÏÂúwÂÛëÿZx³Ç—Þø£xª[ïê·ð¦«áïixoÃö–ë'YÖa¹Õ4Ë'? —q-<)Sʨaòì|*ÅÂ5±s(ÓÀUÁàa‰Áb0P£…§í%J¾1PÅâ%]ÒötÝ:Sš:ëãò9âñ‡.aZ¶7KðòtèÃå‹§‰ÅNŽ&Ž*Uq3åUhá½® ©)¹ÍNp‹—µkÿ´çÂMOG°“ã w'Æý:ëÆø ZxŸÃš/Æ{?4zΊúïÅÏxƒ_×4›ß¾âˆµ?ÙÞkúе›«=bÊhÆ ÂyÕ:®¼éåÍAä5–XºÏˆžOS1S¡SÙdØZXz5#Žl?±ÁUXjØj+ÙU\Õ#êVâ ®¥5J51©Ëû^—Ö’«J”0N5iº™ž"¥j–Ò­íq0u©×«ûÚnП ~Ò/†o< šv³ñçZÓ<ñÏá_ÄýSWñ-ΚÞ(ñoƒ>ø[GðÕÞ‘}–¹o š…èÓ¯¢ðç„5+ÝBÏOðÝņªø¯Q¹Šâö_B· âqTñÒ«‡ÊhTÄðæq—P£‡…UC ŽÇãqªR‹žÁ*”Ý|L!Jµjêua…¢š¦rRâ,= àÕ:ÙXPβìmZµ§MÖ¯„Âaháê&•e&á5J„§:t©rÁâgg!šßíàýOámǃôÝÄš&ºßõ/…Ñøf/ ÚKá+Í*ÿÆ÷ž.ÄÓx’O>Ÿ¦7•u¡u¡Û|;“X‹Æv±ÞÙø¶+W Ãð¾cO:§‹­RZ 4£šËñ5•xÊžžáÖêKŸXºQ¯<ÍCê²t烔’‰5sìò¹P§ ”ê}F¦QT){¥‹•uYÕx·É¤•GJ8)Oë N8¤†ü*ø­á‡?|¬8óüCðúûö‘ð´žµ´Ñ5Fy¾<ü'Òtøµ¼7®Ý[Ùë³ñƒ¨iþ6ib¿¶°Ò´­*+Ë+¹5k :î³¼˜æ¹–0’£¥Âتx‡õˆS…<—8­W0Âýcœéb%F½*¸T¥NsœêÎ3‚£:°œ³5ÃàðîKÚ`êgô%E{ÎSÌòÚTðuýov¥RŒáˆ¼g(S‹Sö°„¤ÑiÍÛÂÞ½ño‚ßÅÿ<áOо ð¬ÞÓ¼ð‚}'âŒþ²û~£mðïÄ^»Ðu]+—´ûíKÂú&Ÿ¯jZŽ­à©&×%:q¹ÓrÄp~/똼6õ|£1ÅäØÌUIW¯‰Ìc,²8ʳ§N¦a‡Ì!RL]<²q†*½ZTá BŽÓP«¥$Ã}[[GÛf,6g†¡J Xùa`ªJ:ØIBpÃÏKF')нgÉÏ ;ö­ð¾šx›Nøy©ø{â<¾ øMàíwDÐ-..þêP|ø‹¤jú-°ÖüOâÝoÅ׺Wˆ>èšo€µÏ퓨_IªZ<.òÙ^›¨1©ÁxúŸì51¯—ÒÇç8¬-z³åÇSy¶WR2n|ÐÍêÏJT½œgü8΄õ‡á Ö.žTñu0™n½qO R9v> .zøšØ— Ùl#†ªª:“nrq”¥‹Ãß´'Ã}OÅIÒüGáü!ñìß<aâó¤Z¬|ðšÓþð„òE¬ÜÉyñÇž'ѧ»Ôt+®­®µ¿]4:“Fóµ¿&#†3ZXLu\K§ŠÇçYl0ÉáUy[2Ì3ÏoV¼S£OÂÔ¦©Õ©(Îp±‹£Ì¢ç½÷/ž' ¥ &YŽ©‹ÃF»¤¿ØpyR¥ -ƬLV+NRœ!BU1 ûF®Ï?ì5ç÷|þ?ãŽ£Ô z1¯Õ¹Wõççýl»ͪíþ]|ïÕiku^ða¯?»>Ýzúýì¼ô霃Å«O/ó¾þ«·Ü5.·õòÓñ×µþ+ku þÃ^wÏãþ8ê=@Ç£9Wõççýl» ›UÛüºùߪÒÖê½àþÃ^v}ºõõûØyéÓ9Š9Vž_ç}ýWo¸j]oëå§ã¯küVÖêAý†¼þïŸÇüqÔzF4r¯ëÏÏúÙv6«·ùuó¿U¥­Õ{Çõ[ÿ÷Éý> D8éÞ*_ËÇÞ+¯ä/¿ä²Ïì&—þ¢aÿ«õß©ýÁ¿òLå?õæ¯þ¤ÖõüþóÿÖýøÉ¦ùÿ>+K·;þ%xë¶G)ÕG>ü`u=ªÿfpL/Â\<ÿêU…ÿÓâêíwm;Jþïó/ÉEœÿØÃÿ¥ÿ_Ó<ãû¾Þ;qéßóëúu5õ›þ_«þúémzöV<?±ýÛ}ñž¸ÀéÆð·Óúòø»Û[iÙÜ?¯ëúüÅþÇï·ŽÜzwüúþM.Mÿ /Õÿ}t¶½{+ŸØþ‰íŽ>øÀÏ\`tãø[‚éýy|]í­´ìî×õý~bÿc÷ÛÇn=;þ}N¦—&ÿ†—êÿ¾º[^½•€OìDöÇŸ|`g®0:qü-Átþ¼¾.öÖÚvwëúþ¿1±ûíã·ÿ>¿§SK“ÃKõß]-¯^ÊÀ'ö?¢{cƒÏ¾03×8Àþàº^_{km;;‡õý_˜¿ØýöñÛNÿŸ_Ó©¥É¿á¥ú¿ï®–ׯe`ûÑ=±Áçß댜` p]?¯/‹½µ¶Ãúþ¿¯Ì_ì~ûxíǧϯéÔÒäßðÒý_÷×Kkײ° ýèžØàóïŒ õÆN0?…¸.Ÿ×—ÅÞÚÛNÎáý_׿5ôDpÆ®¡•€t Fêñ¸Ë"«ÆÖ—ž:iÞÿ•Öïûݵïü»6¶ôù?ëú°ïìDöÇŸ|`g®0:qü-Átþ¼¾.öÖÚvwõý_˜¿ØýöñÛNÿŸ_Ó©¥É¿á¥ú¿ï®–ׯe`t`A0AJNz†ìsÈ#‚?ºÝ5òþ´øµ¾šÛNÎáý]tUEUDUEQU@UUÀP   €"—³^¬´ôø£²·O”~Ó»×Ï?[ÿ]z ýèžØàóïŒ õÆN0?…¸.Ÿ×—ÅÞÚÛNÎâþ¿¯ëóû¾Þ;qéßóëúu4¹7ü4¿WýõÒÚõì¬cú'¶8<ûã=qÓŒán §õåñw¶¶Ó³¸_×õù‹ýßo¸ôïùõý:š\›þ_«þúémzöVöKþ )köUøü¸Û¹¾68=¾#ØsÛ¦:b¿Ÿý«fÏã×ᯊ‡ç¦O^®Eÿ#¼›þƹþ¥Ñ<üÛþEYŸý‹ñ¿úTþW±ÿÙ÷éÇáíž¼zã9ý¿Éÿüß¼ü´ë½ÕÚ?•ÃûW>½‰èHÉÈôúûš|‹Ï¯O»íüíò¿Pþ¿¯ëòìö}úqø{g¯¸ÎE.Oøæýçå§]î®Ðö>:®}{Б“‘éõö'4ùŸ^ŸwÛùÛå~¡ý_×äØÿìûôãðöÏ^=qœŠ\Ÿð?ÍûÏËN»Ý] ì|u\úö'¡#'#ÓëìNiò/>½>ï·ó·ÊýCúþ¿¯È?±ÿÙ÷éÇáíž¼zã9¹?à›÷Ÿ–wº»@Øøê¹õìOBFNG§×ØœÓä^}z}ßoço•ú‡õý_cÿ³ïÓÃÛ=xõÆr)rÀÿ7ï?-:ïuv€?±ñÕsëØž„ŒœO¯±9§È¼úôû¾ßÎß+õëúþ¿ þÇÿgß§‡¶zñëŒäRäÿþoÞ~ZuÞêícãªç×±= 9Ÿ_bsO‘yõé÷}¿¾Wê×õý~AýþÏ¿N?lõã×È¥Éÿüß¼ü´ë½ÕÚ§DMâC™²Ú„rŒèä…fDb?ˆ ' ƒG³_ŸNžO›ÑÛå¯Ä;»[¥ïóþ˜ïìö}úqø{g¯¸ÎEŸð?ÍûÏËN»Ý]¡ö>:®}{Б“‘éõö'4ùŸ^ŸwÛùÛå~¡ý_×ä5´D|‰\+ z êrŒ»ƒ`«rƒÊ°8Ú½šõí~ýÄõZióæW|®ï»×žã¿±ñÕsëØž„ŒœO¯±9§È¼úôû¾ßÎß+õõý_cÿ³ïÓÃÛ=xõÆr)rÀÿ7ï?-:ïuv€?±ñÕsëØž„ŒœO¯±9§È¼úôû¾ßÎß+õëúþ¿ þÇÿgß§‡¶zñëŒäRäÿþoÞ~ZuÞêícãªç×±= 9Ÿ_bsO‘yõé÷}¿¾Wê×õý~GôÇûÇå~É£þå·‹×oˆ,ü}âœgŸ.ت_ú‰‡õüþóúGƒ?ä˜ÊëÅOýI¬ÿ×ýø©ð‡Ç£âÄi?á ñÔ7¾7ñ>©kya¥^_Ú\ÚkÅÞ«lñÍgê’Cà·¹†að]Ã:Jvþ¥àî9áhí$Ò´´ÖÇÿ Ç_ô"ø³·üËšÇLçñèzp;tÈÎ }7üD ÿ¢ƒ÷b?ùRþ¾èøŸêÿПÿPû¾7é·N·bÿ£ñ×ý¾-ÿÂwXÿäNǾ[®piÄ@à¯ú(p_ø#_ü¥÷ÿí»‡úŸÅ?ô'ÄÿàT4ÿÉÖÚÛ×[Úâ£ñ×ý¾,íÿ2æ±Ó9ÇüzœÝ23ƒOþ"ÑA‚û±ü©_tOõ?ŠèO‰ÿÀ¨}ßôÛ§[±áQøëþ„_ÿá;¬ò'cß-×84¿â pWý8/ü¯þRûÿöÝÃýOâŸúâð*äëmmë­íq?áQøëþ„_vÿ™sXéœãþ=Nn™Á§ÿ‚¿è Á}ØþT¿¯º'úŸÅ?ô'ÄÿàT>ïúmÓ­Ø¿ð¨üuÿB/‹ðÖ?ù±ï–ëœ_ñ8+þŠþˆ×ÿ)}ÿûnáþ§ñOý ñ?ø ?òu¶¶õÖö¸Ÿð¨üuÿB/‹;̹¬tÎqÿ‡§·LŒàÓÿˆÁ_ôP`¾ìGÿ*_×ÝýOâŸúâð*wÆý6éÖì_øT~:ÿ¡Å¿øNëü‰Ø÷ËuÎ /øˆÿE ÿÄkÿ”¾ÿý·pÿSø§þ„øŸü †Ÿù:Û[zë{\OøT~:ÿ¡Å¿æ\Ö:g8ÿCÓÛ¦FpiÿÄ@à¯ú(0_v#ÿ•/ëî‰þ§ñOý ñ?ø»ã~›tëv/ü*?Ћâßü'uþDì{åºç—üD ÿ¢‡ÿ€â5ÿÊ_þÛ¸©üSÿB|OþCOüm­½u½®'ü*?ЋâÎßó.k3œÇ¡éÀíÓ#84ÿâ pWý/»ÿÊ—õ÷DÿSø§þ„øŸü ‡Ýñ¿Mºu»þŽ¿èEñoþºÇÿ"v=òÝsƒKþ"ÑC‚ÿÀqÿå/¿ÿmÜ?Ôþ)ÿ¡>'ÿ¡§þN¶ÖÞºÞ×þŽ¿èEñgoù—5Ž™Î?ãÐôàv鑜ñ8+þŠ ݈ÿåKúû¢©üSÿB|OþCîøß¦Ý:Ý‹ÿ Ç_ô"ø·ÿ Ýcÿ‘;ùn¹Á¥ÿ‚¿è¡Áà8ò—ßÿ¶îêÿПÿPÓÿ'[ko]ok‰ÿ Ç_ô"ø³·üËšÇLçñèzp;tÈÎ ?øˆÿE îÄò¥ý}Ñ?Ôþ)ÿ¡>'ÿ¡÷|oÓnnÅÿ…Gã¯ú|[ÿ„î±ÿÈ|·\àÒÿˆÁ_ôPà¿ðF¿ùKïÿÛwõ?ŠèO‰ÿÀ¨iÿ“­µ·®·µÄÿ…Gã¯ú|YÛþeÍc¦søô=8ºdgŸüD ÿ¢ƒ÷b?ùRþ¾èŸêÿПÿPû¾7é·N·bÿ£ñ×ý¾-ÿÂwXÿäNǾ[®piÄ@à¯ú(p_ø#_ü¥÷ÿí»‡úŸÅ?ô'ÄÿàT4ÿÉÖÚÛ×[Úâ£ñ×ý¾,íÿ2æ±Ó9ÇüzœÝ23ƒOþ"ÑA‚û±ü©_tOõ?ŠèO‰ÿÀ¨}ßôÛ§[±áQøëþ„_ÿá;¬ò'cß-×84¿â pWý8/ü¯þRûÿöÝÃýOâŸúâð*äëmmë­íq?áQøëþ„_vÿ™sXéœãþ=Nn™Á§ÿ‚¿è Á}ØþT¿¯º'úŸÅ?ô'ÄÿàT>ïúmÓ­Ùú­ÿÕðŠ|!eñ‹Vñƒ¨hVZí×,4¥Ôâû-Íäš>.¸Ôf[IÚa·ûzÎÞå"yeYÊ!‰RY ñcˆr|û”ÿcã#† ‰UêÓHÓR¯V“„"êBRQ¦Ü­t®¶½Õ<>ɳ,§ ˜¼Ë ,,ñ5è:Tç(J|´¡QJMA´“”ýÛêüíy~¡WäÇèA@P@âÿ´^‹}âO¼?¦F%Ô5¿ëzU”lv£\ßZµ´^côH•ä ,‡ aˆšîÊñð™–]Š«e†Ça1yUåìèâ)ÔŸ*êùb캽L}âp8Ü=;{JøLE]ÙsÕ£8Fï¢æ’»è9Ëð“Ç¥WÌð‹¡¾6Ð5)LN~ò-áš 7V†Ybr7G+!¿®×ˆ<Òë ]'gé«ëgû­ú5ÓÏâ?ñJºþÇÄ;6®¥FÎÚ]~ñé×_Âí ÿ Ç_ô"ø·œñÿæ±ßþÝ©¾M?øˆÿE ÿÄò®¾öºå?Ôþ)ÿ¡>'Óš‚ÿÛÞÛ|“w»R_øT~;ÿ¡Å¿_øGuƒøÿÇ ïócõÈÆÚ?â pWý/º¿ÿ*ùŸÄêÿПÿPôÿŸŸ?Ãa?áQøëþ„_óž?áÖ;ÿÛ¯õ ÷É£þ"ÑA‚ÿÀqü«¯§}®¹Oõ?ŠèO‰ôæ ¿ö÷¶ß$ÝîÔ—þŽÿèEño×þÝ`þ?ñè;üØÇ=r1¶øˆÿE î¯ÿʾ_çñúŸÅ?ô'ÄÿàT=?ççÏðØOøT~:ÿ¡żçøG5ŽÿöëýH=òhÿˆÁ_ôP`¿ðGÿ*ëéßk®SýOâŸúâ}9¨/ý½í·É7{µ%ÿ…Gã¿ú|[õÿ„wX?üzÿ61Ï\Œm£þ"ÑA‚û«ÿò¯—ùüAþ§ñOý ñ?øOùùóü6þŽ¿èEño9ãþÍc¿ýºÿR|š?â pWý/üÿʺúwÚë”ÿSø§þ„øŸNj ÿo{mòMÞíIáQøïþ„_ýáÖãÿƒ¿ÍŒs×#hÿˆÁ_ôP`¾êÿü«åþ©üSÿB|OþCÓþ~|ÿ „ÿ…Gã¯ú|[Îxÿ„sXïÿn¿Ôƒß&øˆÿE ÿÄò®¾öºå?Ôþ)ÿ¡>'Óš‚ÿÛÞÛ|“w»R_øT~;ÿ¡Å¿_øGuƒøÿÇ ïócõÈÆÚ?â pWý/º¿ÿ*ùŸÄêÿПÿPôÿŸŸ?Ãa?áQøëþ„_óž?áÖ;ÿÛ¯õ ÷É£þ"ÑA‚ÿÀqü«¯§}®¹Oõ?ŠèO‰ôæ ¿ö÷¶ß$ÝîÔ—þŽÿèEño×þÝ`þ?ñè;üØÇ=r1¶øˆÿE î¯ÿʾ_çñúŸÅ?ô'ÄÿàT=?ççÏðØOøT~:ÿ¡żçøG5ŽÿöëýH=òhÿˆÁ_ôP`¿ðGÿ*ëéßk®SýOâŸúâ}9¨/ý½í·É7{µ%ÿ…Gã¿ú|[õÿ„wX?üzÿ61Ï\Œm£þ"ÑA‚û«ÿò¯—ùüAþ§ñOý ñ?øOùùóü6þŽ¿èEño9ãþÍc¿ýºÿR|š?â pWý/üÿʺúwÚë”ÿSø§þ„øŸNj ÿo{mòMÞíIáQøïþ„_ýáÖãÿƒ¿ÍŒs×#hÿˆÁ_ôP`¾êÿü«åþ©üSÿB|OþCÓþ~|ÿ „ÿ…Gã¯ú|[Îxÿ„sXïÿn¿Ôƒß&øˆÿE ÿÄò®¾öºå?Ôþ)ÿ¡>'Óš‚ÿÛÞÛ|“w»R_øT~;ÿ¡Å¿_øGuƒøÿÇ ïócõÈÆÚ?â pWý/º¿ÿ*ùŸÄêÿПÿPôÿŸŸ?Ãa?áQøëþ„_óž?áÖ;ÿÛ¯õ ÷É£þ"ÑA‚ÿÀqü«¯§}®¹Oõ?ŠèO‰ôæ ¿ö÷¶ß$ÝîÔ—þŽÿèEño×þÝ`þ?ñè;üØÇ=r1¶øˆÿE î¯ÿʾ_çñúŸÅ?ô'ÄÿàT=?ççÏðØÂ/ž|ZKpðæ¯’Iéͨ'Óš‚ÿÛÞÛ|“w»Rþ‚ÿdŸøƒÀ?³·Ã/ ø§N}#]³ÓukûÝ.Y!–âÁuÿë^ ³µºkw’¼†ÇT¶KÈRI½Ðšìc,ßËœe˜á3n'Î3 GW ‰Å)P¨ã(sÂiRçQ•¤”¥M¸ó+¸´Ý®~óÃ8,F]‘e¸<\1(5V J\’J•9[âÚSIÙÙ;¥sÿÐý”ý¤?พ<ø[ñÏâ‡Ã_þΟµmÀ^0Ö|%³âoW—YÖ.t+§°¾Ô."Ómm­­Ræö ÚÚÜä[)¥¤fUþàO±\mÃ8%©ÆXL’žc1ÿѳüúÿjxŸ×Û¿~àרÄ«Ôÿ£“†ÿÄ*·ÿEçËÄÌb艧ÿ‰¿ùÉæº/ÇÝ?â Œôlÿ?ðiâoË©äöéžÃ­ñ*õ?èäá¿ñ ­ÿÑxÿâeñ_ôDÓíÿ% ½?èKßþ¨Ä@ÿèÙþ ýµ1ÿѳüÿÁ§‰¿.§“Û¦{´Ä«Ôÿ£“†ÿÄ*·ÿEãÿ‰—ÅÑO·ü”2ôÿ¡/ø~¡ÿ|cÿ£gø/õþÔñ>?¯·~ýÁ£þ%^§ýœ7þ!U¿ú/üLÆ'þˆšø‘Kÿœžk¢ü}Óþ"øÇÿFÏðcÿž&üºžOn™ì:Ñÿ¯SþŽNÿªßýþ&_ÿDM>ßòPËÓþ„½ÿáú‡üDñþŸà¿×ûSÄøþ¾Ýû÷ø•zŸôrpßø…Vÿè¼_ñ3Ÿú"iÿâE/þry®‹ñ÷Oøˆãý?Áüx›òêy=ºg°ëGüJ½Oú98oüB«ô^?ø™|Wý4ûÉC/Oú÷ÿ‡êñÆ?ú6‚ÿ_íOãúûwïÜ?âUêÑÉÃâ[ÿ¢ñÄÌb艧ÿ‰¿ùÉæº/ÇÝ?â Œôlÿ?ðiâoË©äöéžÃ­ñ*õ?èäá¿ñ ­ÿÑxÿâeñ_ôDÓíÿ% ½?èKßþ¨Ä@ÿèÙþ ýµ1ÿѳüÿÁ§‰¿.§“Û¦{´Ä«Ôÿ£“†ÿÄ*·ÿEãÿ‰—ÅÑO·ü”2ôÿ¡/ø~¡ÿ|cÿ£gø/õþÔñ>?¯·~ýÁ£þ%^§ýœ7þ!U¿ú/üLÆ'þˆšø‘Kÿœžk¢ü}Óþ"øÇÿFÏðcÿž&üºžOn™ì:Ñÿ¯SþŽNÿªßýþ&_ÿDM>ßòPËÓþ„½ÿáú‡üDñþŸà¿×ûSÄøþ¾Ýû÷ø•zŸôrpßø…Vÿè¼_ñ3Ÿú"iÿâE/þry®‹ñ÷Oøˆãý?Áüx›òêy=ºg°ëGüJ½Oú98oüB«ô^?ø™|Wý4ûÉC/Oú÷ÿ‡êñÆ?ú6‚ÿ_íOãúûwïÜ?âUêÑÉÃâ[ÿ¢ñÄÌb艧ÿ‰¿ùÉæº/ÇÝ?â Œôlÿ?ðiâoË©äöéžÃ­ñ*õ?èäá¿ñ ­ÿÑxÿâeñ_ôDÓíÿ% ½?èKßþ¨Ä@ÿèÙþ ýµ1ÿѳüÿÁ§‰¿.§“Û¦{´Ä«Ôÿ£“†ÿÄ*·ÿEãÿ‰—ÅÑO·ü”2ôÿ¡/ø~¡ÿ|cÿ£gø/õþÔñ>?¯·~ýÁ£þ%^§ýœ7þ!U¿ú/üLÆ'þˆšø‘Kÿœžk¢ü}ߨÿø&'íõ¬þÝø§wâo†úÿ|1ñ‡m%ƒÃ…Íi>,±Õ'ÓgŽ;èÒî×Qµ¹ðþ«ò3˰½Œ°²»L‰øoˆÜˆðë>¥’VÍðùÜ19} Ë Ž¡«—7FµlFT«`êb±Ê•Juðµ¬áŒ¯”¥Nº“8þÕá·Għ˜¼²YU\7êu°ï±”äÝ(Õ„é×öyI8ÊÒŒ¨Ç–JË™ZGê|ú(P@P@y¯Æ_ˆv¿ >üFøŸ{¥É­ÚxÁž!ñeÆÉo&§‰¦Ü_½ŠO,SG ºù^cÅ"¨bJ61M&ÚQµÛ²»²»Ñ]¤ÚWݤíÙìc‰¯6‰’rŽZòŒ~)F”%Q¥}.Ôl¯Ôþ^åÿƒ€~/4’4³Á¸a.Æ(æÖ¼Iq$q“ò¬“ǺJê0­"A³d¬K•ýYG賉•RÄx‹‚£^Tá*Ôip…|E*U\S©Nž"\Q…•xBMÆ¥†Ãº‘\î+¸Gùj§Ò^²©5K‚ã:JrTåS?tç()Z8äõc 5g(ª’Qz)Ëã#ÿˆ€>1ÿѳüÿÁ§‰¿úݹëÎxÆkOø•zŸôrpßø…Vÿè¼Ïþ&cÿDM?üH¥ÿÎO?ÖÚòÄÿˆ€>1ÿѳüÿÁ§‰ñõà纞xçš?âUêÑÉÃâ[ÿ¢ñÿÄËâ¿è‰§ÿ‰ ¿ùÊû®ß¨Ä@ÿèÙþ àÓÄßýnÜõç1ÿѳüÿÁ§‰ñõà纞xçš?âUêÑÉÃâ[ÿ¢ñÿÄËâ¿è‰§ÿ‰ ¿ùÊû®ß¨Ä@ÿèÙþ àÓÄßýnÜõç1ÿѳüÿÁ§‰ñõà纞xçš?âUêÑÉÃâ[ÿ¢ñÿÄËâ¿è‰§ÿ‰ ¿ùÊû®ß¨Ä@ÿèÙþ àÓÄßýnÜõç!xvMBÿÃëyý¡—ªéº®£ jöö·¦(êÇûSI»“OšXbìžÜÏͽkù‡:Ê«äyÆi’âjѯˆÊsf[^¾MP­Wˆ©†©VЍ£QS©*NpŒ×2‹³»W—ô§ç4ø‡"ʳºTg‡§™à¨âãB¤£)Òö±»§)GÝ“‹ºæVºÖÊö?ÿÑÂý¯l<ïÚ£ö‰“{ã?ÄnžÞ*Ô‡±ëéžyç¢ÿrøA™ª>pí.ofªÎVÿ™Þdöåè›¶ýôÚ_Á^(áùü@âiwÇÃÿQ0þRÒú~hüéý˜:ã>øüzÆ<{á¿Jþ×Z®eñ~~ïêöé¹ð?Uëºþ¯öwµÞë®ö¸f{?Û¯#¿¡nyçíuüË¿Åóþ^×ÿð‡Õ|ÿ¯?uõ²{k²w°f¸Ï¾? qÏøcû]j¹•ÿÅùû¿«Û¦áõ^»¯êÿg{]îºïk‡ög±ãñ=ºò;úçžphþ×_Ì»ü_?åíø}WÏúó÷_['¶»'{ö`ëŒûãñêðì;†?µÖ«™_ü_Ÿ»ú½ºnUëºþ¯öwµÞë®ö¸f{?Û¯#¿¡nyçíuüË¿Åóþ^×ÿð‡Õ|ÿ¯?uõ²{k²w°f¸Ï¾? qÏøcû]j¹•ÿÅùû¿«Û¦áõ^»¯êÿg{]îºïk‡ög±ãñ=ºò;úçžphþ×_Ì»ü_?åíø}WÏúó÷_['¶»'{ö`ëŒûãñêðì;†?µÖ«™_ü_Ÿ»ú½ºnUëºþ¯öwµÞë®ö¸f{?Û¯#¿¡nyçíuüË¿Åóþ^×ÿð‡Õ|ÿ¯?uõ²{k²w°f¸Ï¾? qÏøcû]j¹•ÿÅùû¿«Û¦áõ^»¯êÿg{]îºïk‡ög±ãñ=ºò;úçžphþ×_Ì»ü_?åíø}WÏúó÷_['¶»'{ö`ëŒûãñêðì;†?µÖ«™_ü_Ÿ»ú½ºnUëºþ¯öwµÞë®ö¸f{?Û¯#¿¡nyçíuüË¿Åóþ^×ÿð‡Õ|ÿ¯?uõ²{k²w°f¸Ï¾? qÏøcû]j¹•ÿÅùû¿«Û¦áõ^»¯êÿg{]îºïk‡ög±ãñ=ºò;úçžphþ×_Ì»ü_?åíø}WÏúó÷_['¶»'{ö`ëŒûãñêðì;†?µÖ«™_ü_Ÿ»ú½ºnUëºþ¯öwµÞë®ö¸f{?Û¯#¿¡nyçíuüË¿Åóþ^×ÿð‡Õ|ÿ¯?uõ²{k²w°f¸Ï¾? qÏøcû]j¹•ÿÅùû¿«Û¦áõ^»¯êÿg{]îºïk‡ög±ãñ=ºò;úçžphþ×_Ì»ü_?åíø}WÏúó÷_['¶»'{ö`ëŒûãñêðì;†?µÖ«™_ü_Ÿ»ú½ºnUëºþ¯öwµÞë®ö¹ý2ÿÁ½}ŸCý«έðhûÿÇŸÄÑÏnÝ‹sžy5ü‡ãþ)b¸Ã+ïËÃxHïùšfï²ïùm´¬~TýžAŸ.ùÅ'ÿ–T¿ú/íèî¿ ? ‚€ ( € (æ/ÛPgöGý¤G¯Áˆÿ-Ëú¨»J/´“û™Ãšk–f+¾ÿ¨õ=??¸ÿ?¿ìÎøÇòüþ¸ÎTzä_éoöºiàW·þJ¼ßOÃÞÿ6–­ÿ®ºòï²ëòÑI³eý1Æ8úärqÁþ#†?¶ûKÿÿíWõëxŸUý5þ¢´ß¿“V¸ŸÙñåùýqœ¨õÈ"ítþÒÿÀ¯oü•y¾Ÿ‡¼,/[ÿ]uåße×墒ÿfËúcŒqõÈäãƒüG l'ö—þÿÚ¯ëÖñ>«úkýEi¿&­q?³;ãËóúã9QëEÚéý¥ÿ^ßù*ó}?xX^·þºë˾˯ËE%þÌ—ôÇãë9ÉÇøŽþØOí/ü ÿµ_×­â}Wô×úŠÓ~þMZâfwÆ?—çõÆr£× Š?µÓûKÿ½¿òUæú~ð°½oýu×—}—_–ŠKý˜;/éŽ1Ç× s“Žñ1ý°ŸÚ_øÿj¿¯[Äú¯é¯õ¦ýüšµÄþÌïŒ/ÏëŒåG®Ak§ö—þ{ä«Íôü=áazßúë¯.û.¿-—û0v_Óc®@ç'â8cûa?´¿ð/þÕ^·‰õ_Ó_ê+Mûù5k‰ý™ßþ_Ÿ×Ê\‚(þ×Oí/ü öÿÉW›éø{ÂÂõ¿õ×^]ö]~Z)/ö`쿦8Ç\ÎN8?ÄpÇöÂià_ýªþ½o꿦¿ÔV›÷òj×û3¾1ü¿?®3•¹Qý®ŸÚ_øíÿ’¯7Óð÷……ë뮼»ìºü´R_ìÁÙLqŽ>¹œœpˆáí„þÒÿÀ¿ûUýzÞ'ÕM¨­7ïäÕ®'ög|cù~\g*=r£û]?´¿ð+Ûÿ%^o§áï Öÿ×]ywÙuùh¤¿Ùƒ²þ˜ã}r98àÿÃÛ ý¥ÿö«úõ¼OªþšÿQZoßÉ«\OìÎøÇòüþ¸ÎTzäGöºiàW·þJ¼ßOÃÞ­ÿ®ºòï²ëòÑI³eý1Æ8úärqÁþ#†?¶ûKÿÿíWõëxŸUý5þ¢´ß¿“V¸ŸÙñåùýqœ¨õÈ"ítþÒÿÀ¯oü•y¾Ÿ‡¼,/[ÿ]uåße×墒ÿfËúcŒqõÈäãƒüG l'ö—þÿÚ¯ëÖñ>«úkýEi¿&­q?³;ãËóúã9QëEÚéý¥ÿ^ßù*ó}?xX^·þºë˾˯ËE%þÌ—ôÇãë9ÉÇøŽþØOí/ü ÿµ_×­â}Wô×úŠÓ~þMZç÷+ÿ¦O/þ ûû9'÷t/¯åñ/Æ¿OåùWùñÆÕ=¯qeOçâLò]÷̱>KòûÏïïcËÀ¼-Ù>ä¯×úí±ÿÒÔý«l¿´ßí&Þ_ã'ÄsÓ©ÿ„³U¾ÞϨÛýIá¾gìx/&¥ÍnG™®íÂÆ=í§}?[\þ&ñÍÆüE;o_úAv—ç®úÛ–>ý›þÏÓ9ÿ=Çn¸é“_sý¯ýÿëî×îíu£±ñ?U}¿?ðzôë}.Ãû0wúþ¹ÇéÜõãjþØÛÞµþmz¯òõèU{Ûþ·Ãý.ÿfÿ³ôÎÏqÛ®:dÓþ×þÿõ÷k¿÷vºÑØ>ªû~>àõéÖú]‡ö`þïõýsÓ¹ëÆÕý±·½küÚõ_åëÐ>ª÷·ü7o‡ú]þþÍÿgéœÿžã·\tɧý¯ýÿëî×îíu£°}Uöü|ÿÁëÓ­ô»ìÁýßëúç§s׫ûcoz×ùµê¿Ë× }Uïoønßô»ü!ý›þÏÓ9ÿ=Çn¸é“Oû_ûÿ×Ý®ÿÝÚëG`ú«íøùÿƒ×§[év\°ðíö­e¥iZ}楪jwvÚ~¦éöÓ^ßê÷“%µ•¥²É=ÕÝÝÄ‘AmmRM<Ò¬Q#»"ÔO;8:“«p„e9ÎrQŒ#yJm餛odµz&U<J³…:TçR­IF:tâçR¤æÔaNŒ%)Nrj1ŒW4›I)_–6ôÿëÚ¾³‡t­ WÔüA=ÌÖPhZ~›{{¬Íy˜g´‹L¶†KÙ.aKæÀ–í,~T…Ñv8¥<ö•:nµLE8RŠRug8Fš‹ÚNoݳrVz^É«§béåØŠÕ–• µq“‚¡NœçZSMÞ*’¦æä¬ÛŠM«6ífgÝhwW7W–“ÙÞZO-­Ý¥Ô2Asmuo#E=½Ä„’á•9a‘ã::†\%G:SŒe‘qœy”¢Ô“NÍI5tÔ–©ÅÙ§utÌ僜%(΄âÜe .YFQ|²Œ¢áxÉ=i5Õ;8Åm4­BêÖÂÂÎâöúöâ;+;H%¹»»»¹•b·µµ·…^Yî'–DŠ"G’YY³¢YÌaJucÁ9JriF1ZÊM»$’Õ·Ëeè´“Žu'p„§9Ê0„#yJs”¹c¥NîNNÑQM¶ôµí"óAº°»º°¾³¸±¾²¸šÎòÊî mîí.í¤hn-®mæËÄ#E4ƲE"¼r(u‘ΔãB¤e Çš3‹RN.Î2M]5%ªqvkUt)àçNR„á(N Âpšåœ%Ë(J.Œ“¼\ZMZÍ;8Æ¿öoû?Lçü÷ºã¦MWö¿÷ÿ¯»]ÿ»µÖŽÂú«íøùÿƒ×§[évÙƒû¿×õÎ?Nç¯WöÆÞõ¯ókÕ—¯@ú«ÞßðݾéwøCû7ýŸ¦sþ{ŽÝqÓ&Ÿö¿÷ÿ¯»]ÿ»µÖŽÁõWÛñóÿ¯N·Òì?³÷¯ëœ~Ï^6¯í½ë_æ×ªÿ/^õW½¿á»|?Òïð‡öoû?Lçü÷ºã¦M?íïÿ_v»ÿwk­ƒê¯·ãçþ^o¥Øfîÿ_×8ý;ž¼m_Û{ֿͯUþ^½ê¯{Ãvø¥ßáìßö~™Ïùî;uÇLšÚÿßþ¾íwþî×Z;Õ_oÇÏü½:ßK°þÌÝþ¿®qúw=xÚ¿¶6÷­›^«ü½zÕ^öÿ†íðÿK¿ÂÙ¿ìý3ŸóÜv뎙4ÿµÿ¿ý}ÚïýÝ®´vª¾ßŸø=zu¾—gô…ÿƒìú_íL c:‡Á¦Çý»|Oüqéù×󟋘¯­q. w¿.E„þd3Gòz÷ûéÏ©û<“;]óJOÿ-)¯/ËóJ?Ñ ~\~æP@P@2þÚ?²Wí:çà߆=sáëï§óüªdí>Ñoî_?Ëï8³=rÜÁwÁbÿôÅOOÏî?ƒoìÁýßÓÿ®}½?¥oÿkïï¥é­ôùÿÁÙ[VŸ¿Uk§ãÿÚyÛÏeË´—û4uÛß8éÇ^ÿýnÞ¹eý±ýÿ—õë½õ¶–ª¾ßÿi®ºýŸÁ±?³÷Oþ¹öôþ”ÿµ÷÷ÒôÖú|ÿàì­«ªµÓñÿí<íç²åÚKýš:íïœtã¯þ·o\²þØþÿËúõÞúÛKÕ_oÇÿ´×]~ÏàØŸÙƒû¿§ÿ\ûzJÚûûézk}>ðvVÕ‡ÕZéøÿöžvóÙrí-Áš÷еkMïâ=vüÍöAÓou}ZûìöòÞ\ M;O†âòäÁio=ÌþLc·‚Yœ‰Þ²«žÒ¡NUkW§F”mÏR¬áNœo%Í9òÆ7””SnÍÙ+6¹µ¡—b15cG B®"¼ù¹(Ч:ÕgÊœåÉN)ÎVŒ\ß,tŒe'¤\Oü0ñÏ&³·ñ¿‚|YàÛB)g°ƒÅ^Ö<=5ô0:Ç4¶‘êöVOu2:$²BŒ‘»…r (¨¡Ä8lW3Ãc0õÔRxzÔ«(¶®”7.VÒvN×ÖÛ7qY>;ãn „upŽ&…\;šŽ’pUiEÉ+¤ìšW³¶œÜËh³¤1\½´«o<’Ç í¬3I§Ž)YDr<+<eWfŒK`¾bnÛûe]¯h¹•›ŠµÒwµÕÝ“ÖÎÊü®×³9žI)8¾Vä£'³q·2‹äIµÌ›³Ó™7k£©ð§Â¿øðßxÆ4:P¶mPxOÃ:×ˆŽš/~Ð,ÍÿöEÿÙÙµ¹û1¸ ͽÀ‹w”þV8Ž#Âáy~µŒÃa¹Ü½›¯^•/iÊ—7'´k›—š7¶ÜËkÞ=X\›ö‹Åã=—/µú®®#Ùóór{OeJ\œü³Pæ²—,”vj9wÞñ› ýÎ¥áÝkO·Òµh´-R{í.þÒ7[ž »¨4kù®-ãŽÏUšÚÆöâ->ᢼ’ ;©RH't¸çÔfࡈ¥78:°Œ*BNtÓŠu"“|ÔÓœSœ\£v•îÑ”òÜM5RU0õ©ÆUB¬§NpTëÉNQ£7*K–«Œ'%NN3’„ä£hÊF'ö`þïéÿ×>ÞŸÒµþ×ßßKÓ[éóÿƒ²¶¬Çê­tüûO;yì¹v•ù¼1©Ûéö:½Æ—“©Ü_[iºœÖ—iúΘ-[R¶±½’1mw>ž/ìMôPJòZ Ë3:Çö˜‹ÇöÜ¥V¤e:jQs‚ù£vâ§Ë.VÒRä—-ìË–¬i´©T*’© u\Z§RT¹=¬iÍÓQœ©ûH9¨»Çž-Ú雸mã/êO£xÁþ'ñž±´·²i^Ð5ojIg Ešíìt‹[Û¥¶‰¤eœÂ"¤Pì7 \ñC‡ÂÇÚb±t0Ôù”UJõ©Ñƒ“WQS¨ÒævvW»ÖÉêͰ™F7QÐÀàñ8ÊÊ2›¥„¡WQB?Ý:4§.UÍg+%­´¹…{ ]é×—V•Íý•ÄÖ·¶7°Mkyiuo#Ç=µÕµÂ¤Ð\A*´SC*$±È…Õƒkê3Œg ‘œd”£(µ(É=SM]4ÓM5)&–›ÜÂx*”ç(T„¡Rœ¥ Âk–pœ]¥EÁIIJé§ÊÓOoyȱðõî©{g¦i–7zŽ¥¨][Øéú}…´×w××·s%½¥¬ ,÷WWWE ½¼<ÓM"Ç3º£ΣÎs«p‚s”å$¡Æ7”¥)iÅ&Ûv[ê­p† ¥I•:r©R¤Ô!NšsIÍÚ0„c)JMòÆ1MɾXÙèE6-´ó[ÜÛËÄÉ öó£Å<2ÆÌ’C,rñË©IÕYv²†…œ¦”£QI5tâÓM=SMhÓOF›M--{ÉK (ÉÆQq”[Œ“ÒQ’vi§ ¦žëÝÖý›"þÌÝý?úçÛÓúUkïï¥é­ôùÿÁÙ[V/ªµÓñÿí<íç²åÚKýš:íïœtã¯þ·o\²þØþÿËúõÞúÛKÕ_oÇÿ´×]~ÏàØŸÙƒû¿§ÿ\ûzJÚûûézk}>ðvVÕ‡ÕZéøÿöžvóÙrí%þÍv÷Î:q׿ÿ[·®Ylåýzï}m¥ƒê¯·ãÿÚk®¿gðlOìÁýßÓÿ®}½?¥?í}ýô½5¾Ÿ?ø;+jÃê­tüûO;yì¹v’ÿfŽ»{ç8ëßÿ­Û×,¿¶?¿òþ½w¾¶ÒÁõWÛñÿí5×_³ø6l?ðK4òÿ`ŸÙé?¹£øÍ/‰ž5ÿçÓ¥ñ-OkÄ|CS~|÷8–žyŽ'Óúï¹ýÁÀåàÎl¯¿ýtô[ÿÓôÚnÃÎý£~=IÇÍñ‹âIèOüθÇlóŒ{ôÏ"¿aàüÕPáÜ ')/gS0ZZßò2ÆKgÚíõïåä>Ã9q†}?æÆ'ÛþaéuP};¾ûjxwöf;~ÛœgæÆ\sõû«ôßÛqþy_}×^Þ_Ò>AáÉ~6ÒúiÈþZý÷û/¶?ñÓŸ¡£ñ韘Qý·畽c÷ö½õ¾¯KùDú£ßúÛ¿#ÙIh'öf;~ÛœgæÆ\sõûªmÇùå}÷]{yHì—ãm/¦œå¯ßq²ûcÿ9ú1ê?™ù…Ûqþy[Ö?kß[êô¿”Oª=ÿ­»ò=—ô–‚fc·èݹÆ~l`uÇ?_º§öÜžWßu×·—ôáÉ~6ÒúiÈþZý÷û/¶?ñÓŸ¡£ñ韘Qý·畽c÷ö½õ¾¯KùDú£ßúÛ¿#ÙIh'öf;~ÛœgæÆ\sõûªmÇùå}÷]{yHì—ãm/¦œå¯ßsÚ¿fÙ´¯~Ñ?U7a#šqHÐÄàÜêa²úUñ•q0¥„­ŒÂåZxìCÅTýÜ•lUòWQýbr猤êûÜ'à Ûx ãÀ­cZñ/ƒ¤Ó.Ç¿ó5?íµüò»ó]{mò²éÒÁõG·õÖÚr%«ûžªú þÌì_öO~¹éÜãž7/í¸ÿ<¼õ],µüµß¯@ú¤¯è÷¾ÝþÂ~ºëÖ×ìÂ}3ô?Ÿcßù‚ŸöÚþy]ù®½¶ùYté`ú£Ûúëm9ÕýÏU}QõìC¨è^ý¥¼­ø“Yм?¤Åáß‹Vj~%ñfŸà=/5¿ƒž?Ñ4{+¯ê×v‡ŸVÕõ &ËS¸½·ò/¯mŒ/öƒ?Ÿæ•b(Óu%)UÁÊч¶v¥ÃT“öIIÔ匜lî–½ªà˜SÃq.µz”hÒ Ö¥|L0´T«eê0Œ±}œhûJ•!MTs§4õn(ú#BоÚ|DmGö’O†6Ÿ‡ƒ–ß]¶ÓjÍö“ñì:Ññ^‘ýyðîûž'ñ‰4}BmB[x¼i¤[Aöøkmâí^}º¶ŽõåO8ÆSÂÛ-«‰úë¯x9eÿQ¢àèÍN5á:TéÔŒcyQ›´–#Ù$œÑôô0MLÉË>Ž[ý“õ+U….!¡âÕo­Ñö2ÁË ‰¯^Œå&¡‰§qàV&nÕ)¯Å+%ñÿÁü.°ñ칤jÞ ý ÿikí}ü/ã_‡Z‰¦iš¯…<â^øKR¸×WRÔ|âûx·Cе›XolõKAðO„õ{è|M•eqт͖0Äã&ó:‘¯–åtãícZ¬ç*u1êªÑPpUiʬ'(Ë•ÅT¯Vœ}œ¦Î Û õì—–Ñ©ÃÔjàóþ$gC€ÃÒ¥ áðxœ3ÃMÕö³ÂâaFµ u`§•há0Õ¤± ”%“û&Ýi|ý¤<).£àßøI,×üGá¶¾Mû[K:Žes3í34j^-õfR«ŽÊë)VP£K0Œç #Æò:ë Ê%Nisû9Yµ…ÚÖf|#N<£ˆ°ÒžÛb+äs£G›Ðɽ¤hK4ö²†"µ\2—³ö±æ„&þ4ÚÛ›é¯øÇàŒüGã+|@ømâ[|[ø!â t _ÇVº¯„ ñ×…ÿd?ŠþÑt [Æv2éw~4øaá?ŒxÁ^1ø¨H°Õ´û¦¿ñ6¸÷Z•Þ«qäáñØÌ5*ÔeG¡í!O–££ˆÎðuªT…&§5êá}µJxtŸ³£J F0‡ÓãKÅbáŠÅà±ñ¾M‰tjâãS V„³JhÕÅGÙÏ—áó/ªa±9ƒj5¡.lEYJ¤ç/ÿ„á@„zgìÀjUøEk=dždño€Gìû‹GÅÉ⺹Žþ_Â?‡Á¡auqᨼLÞE’ïP·±6‰í"ôu¿³úÎgýõÇj¼µ¾¿ì~¦š‡ÁõÏ`±œÏšÞÕû±u ž3˲¥Qaòíÿì˜Iá^#ýб/6œ\ÔÞ'û!ã–X åJ5þ­yJQ¥ båAª^~ÏúçÁ{Ÿ6³ðZÛâÍŠjkß…VZg‰WRøáË»Ø~ Üêkm7ŒuK jÂëÆº'‡¼Wað ]ñ’¿…¯|A§Kmw÷Í¢j^4ó\u<|kóã­¦ÉÊ)ckF—×TÉ% •©Ë \•\ÅÏ »ÖÃäu²z˜?i”Ç2¡‰âz™e8W¾S‡•Oì—YÇëujrÄQ¡^ElW6ÛEÙNNH|íðšü[û›q¾#7Åb)æ1©[1•Zôój*‡#X'„ ñÁÅFqäç”hß*üʪäSQàÁàòÌ\Š¥ä4ð”+pÅiâ犣,Ú9ŒqØIæÒŸ²¯*±¥N§Ö”ž)G-Ž 4ªa¥íœ'VLJfý›¼©èž)ñ†›ðgHñôþ ý¨´mBÒïü5¦øwZ»Ò‡Ã Wথã»ø¯LðíÃjkÿâð׉|u®éÚŽ5½N±Öµ=eìïcžgšæXzsÃÑÄc'C“*”å9T•Jq’ÅG3¥NU"¯O §N„e:P”ý’¥ÍC øúÔqXš9],Z­Äô©R§,5*ªP–YS*©‹†'K'(âqê|ejtqu©ÓyÕäœOŒ¿i #ÀqüVÕ£øya iºTz?…—X³ð¦³¥ëþ‹Æ#Ãzgü%Ï ßèú¶œÚ鼞ãNðþ¹¯x{HÔ¤¾Òô fóF³±u÷òÌîºÂAb«U©QTª£*±p«ì]Yû%54¦ä©Ú*SŒ*TŒc:‘UÏŒâL­XåôèS£ìpŽ­<5Zu°ñÅ<4#ØÊ‹­C’U¹¤á‡¯_J¤§N…IÑ„á?Ù€ëþÉïÓ·=;œ`ÁÆîÿí¸ÿ<¼õ],µüµß¯CÂú¤¯è÷¾ÝþÂ~ºëÖ×ìÂ}3ô?Ÿcßù‚ŸöÚþy]ù®½¶ùYté`ú£Ûúëm9ÕýÏU}Pfv¯û'¿NÜôîq€O—öÜž^z®–ZþZï× }RWô{ßnÿa?]uëk‡öa>™úÏ‚1ïüÁ Oûm<®ü×^Û|¬ºt°}Qíýu¶œ‰jþ窾¨?³;×ý“ß§nzw8À'ƒËûn?Ï/=WK--wëÐ>©+ú=ï·°Ÿ®ºõµÏì³þ ‚ž_ì)ð ?»¦øØ~_¼kþ~ ™TöÙ¦kWþ~f¹œþü~!ô·å÷Ø|NÈ#ü¹uÿ¥z~VíecÿÔ÷ŸÚ&ÈKû@|r|gwƉG·oë_¯‡Ž¹ÉÛíeŸ°ÀB—2÷1åmè;õê·]},-ñ¥~)Î¥mñ~Ÿòê’îï·D¾w¯åù¯ÕÿÀó»rëüðëUñö¼tKWƒHñˆ5-SU–k]#FÐ<)¡ê$×õVê[©mìtÝ#K»¸Åo,²È‘Û[Aqsq .žqʾ;í¢}^­¯[­<¾ÏN,«®¨Qå‹öu«Nu9£N=)×­R¤—´’Œ)Ó”½ØÊR~äbäÒ=?âìÛ©ü8µ0ë_>_ø·ìÔá¾…â-_Rñü±øÓOѵt_øG ´–é´vÃSžõRÍ¥1yò‰¥gWv÷’×Wk;]twÕùkåo{ÒÆðÍ|;ÕÆ`'_“ Q`©U¯<[X¨R©I(:ƒ“§Z’SvŽ×zKÍÇÂOˆ§_ xØøž+©¿‡…5ßíäÓFÓý ú?öyÔÇÒͲÛòö jÿ¶-ÿ/Zuÿ.ö_¡ç,§ë­[Û(iïû.U5ï8´öºÔ³¬|ñÖƒà/ üKÕ¼7¨Yx/Ś7¢k@Ë úÇ…ÓL“V¶™ ‡´ÁÕ#ŽÆKˆÒ=F[-b34š>¤–«ûe]®}­}ìþä½~ë$›.¦M‹£ƒ¡©BqÂâkW¡J£M'W©º‘’zÇø©AËI¸UQrt¦¡êן²o-´5Kü5Õ5é¾Y|h?¬âM3ÃR>­¨éö½ÕôV¶·omovbdiþÛÖו¹­Ìþ§k«kgmùWKZ÷=)p¦28xÖ|J’Ë¡š}Ræ±k: ê{9Ó9J©8B´æ£r©Y£Ã¦øsâŇH7ñ"ÁâK«ýÍ êB{Mµ·{«ëÍ'}š¦©emh¯qsqd.-á¶W’gXƒ2ßöÇ÷ÓéßïÑo¿gÑ$xÿÙ¸¥ìŸÕ± V‹©AºUµ„#Ï)ÒvýäcÍ)AÉEjÝ“r–_ƒ>,°¶’ëQð½¢iW‡@½ÕµO êºviª¼éiú…ìšRÁ äRº–³ô´<¥»±Å/íx¤ýøµdôÿ€ºÙXÕå˜ä”êañtપ.­Z5ãN[K’R’j3»WùžÉ=MOà׌¬¥Ö-`ð–µ®i'Š.<.·¢øw[½ðÕÖ½kªeA§ØÞɤۡ¹½¾¦›aqom©Jg·ˆØÅq(…í…ey$­³énýßm4°ç”ca:ܸzõcG,+ÄR¥]Ñ•uSÙ(B£‚¼ªNÊ’U’‹‚“±¡âÏ€¿<âßøZð6¶#´žÏQÓt}~Ã[±š[xíôGC³ºÕ~Õntq~’ïT³¥Õ¸ëkI~7Z4üšÛ¥ìo>Ƕ¾ªèfå…¯‹U°5'RË ¥xÊ5!B¼*ÁÚÔêQ§*œÉÒçLñÃð÷ÅÂÇFÕO…û.£{ÿ'Œ~Å£Ø[jºµÙðƵö]+L½„ÝYêZŒÿaòll.íÕ®-o.LvóÛ†–9^<²ŸÛß_çm:[¯ßÑ-bWöN1Ƥþ§Šä¥Ô©/«Öå§Nkš3œ­hBQÕIµµÔ¢ÿüW‹‰d𷈣ðäÐË<> }SMk{{Ûm:iãÕZÑlu»; $IÚ8ïnmí›÷Ó¢1ý±ÓÚ'·[ÛîK{kø$Oöv%RUþ­]PiµYѨ©8Æqƒj£\–Sœ`Ú“÷¤£tåitúgÁ_êøgÑn´;ÿøïâ*èþ#²Ô´]S_ð¶Ÿya©}á«kÍ=#Õ?³tûËÝ2O‡ô}föÞy䳬žsk{×»¶šÛÍö»Òë®Ê:£¢žI‹¨±£:3ÃàåŽöUáVJØhNš©: pµNHMÖ—½¨Ó«%)8¨OÍ¿³Ç§¨Î08?îz€?„ díÚöûiþ?’[Û_ÂÛGaü¾û¯ÎNßËq?³×Óß}}1ôã§Ps…?¶?¾¿á½ß>‰jWòü×êÿàyݹ/öxôõÆýÏPðŒ€À§ö½¾Úä–ö×ð¶ÑËïºüäíøü·?}?à‡Ð!ý¦” fOƒMù¯ÅO@?éÑ~s1Äýk1œï~\7_õÿü»ö륮~éá5?g–f«¾>“ÿËxù¿/Ë[7/ÞÚå?X ( € ( šl¡ŸÙCö‰¿üv?ò{\ø·l.%öÃÖu9˜ÿ÷oý‚bôÌÏâÏû=zã¦yÆ¡8ãÐw#®Ôû_í~œëú×{[m?[ê}_Éþ?—5ÿòo? þÎäu?ÏÓ+ÔqÔœõÈ9l}?ëÒÛ+-ú[@ú¿—õÿ7þ_ÞÔ?³×®:gœgúŽ=r:íCû_§:þµÞÖÛOÖú‡ÕüŸãùs_ÿ&óò=³á7ìëãŒFáÛÏi0]øªÃÁ¶W^'Ôn´ËkýrûÃÞ%ñ\ÐZ=¶™¨4°é…5ýf䨎Án4˜ÝÆ«i ³,ç–Þó“³zYìþKÉm¦ºXõ²ÎÅæ¶ú»£N2ÄSÂÆUç:qiЯˆj.4ê¶©ÑÃÎu^Ðæ¦åR<ÙzßÁ-VßV°Ñ|â/ |fÔo,o5 ø;.¿ãIt«k)"IT·²º¶V3+ÆñÁMKáž§7ˆÍ—ŒÙuk_ Þi—Z>—­C}j²^ik›5¥ëYÜýšäÜÀðÑåK«—ÚI]-tjMiÚë}7+Ãø¼-*u¡[Ž£SðŠxµ+8âÜ9•Óœ)VU'NÔd”î¥Ïn>x®âëT±ºð/ˆ¦½ðëHºÕ¥Ç†5)n´7š¬«ª[É`Òé¤iš]þ£"Ý$XX^^[ÚÎéÚéý¨¿ÅwßMÖŸæÓgž²ÜdeV+ ŠŒéiZ*e:v„êZ¬tpµ8N¥¥kBž‘ŒQðgÅÒßYëÞ¼ðŒ¶^Ô¼U ·Šô=cF“SЬ„RJºT3i×+vv‹y]"Óe‘ šú"¢“βæO[i¯ßÑ--§Üïx겜bsZUpÎyã-ˆ§^›©NšOš Âí´í´ ºÍ^Æ¿†g‰Þ.×,¼¤ü4ñ꺇†üAã-?NÔü?y¥G}áÿ h·šî§«ÙÿhÚÛÅs´³h,¥‡ö†§uc¥Ù4×ú­¼£ÎÞKt—^½4·[y%¾šáò,ÓˆxLR¯S[RZNt0ô§Z¤ãÏg(òÖ²|õ% p÷çOŽÿ…iâSaa¨ŸkÇLÖ5DÐ4ËãáíCì:¦³í z&Ÿuöm}ªApiÖï-Üs«"²‡U?µÖÜêé~ oºÖ^ºnŽOìü_%:ŸWÄòTªéRŸ²«ËR´Ÿ,©Â[J£k•Â7•ÓM;4výž~&øëT¸Ðt¿‡ºýæ¥kàï|@Zމyi%Ç…ü5¥]jú†©§Çj†ùe·´{}2;1$º¶¥-¶™§¬÷×[8ó…ÖIê—}ßݳ×f’¾»8\2ÄU•:LB«%|g,©Ô§)a¨S•J• ¥Êç͵MG™Ô›:wœ¢;á÷À;ŸˆÚOŠ|Is­øÀþðD¾Òuÿ|B¿ºÒ´; OųjpxoBû3E×u9ouS£jÏ•¦5œ:}ÍÆ£{aF–eœ(Ù_ënTž‹W»Kîz­lôf¹vI‰ÇÓÄÖŽ#ƒÃágF5ñ8º•iÓlCš£NÔ£Z¬§SÙTÃä‚„IÇG+Þ1ý™¾#xIÖÔ­4Í^|Ÿô}"î[8íu w<«'–MÛ°9*¯û]-9ãþ]w×½¶û™œòÜTaNµL6"4ë>ZU'JªYmjr••G¥­ÿ §øYãÛ]ZË@ºð7Œ-µÝNy­´íãÃ:Ô:¾¡so7W6ö:l¶+yw=½­ÅµÌñA ÉÃ4›T,lÓÄí¾½»ì¶Vµ£~fSʱ‘© 2Áâ£V¤œaJT+*³’Œe(Â›Šœœc(ÊI'Ë&Ö¥[áoˆ4oiþ7½ŠÓ.üYâßYŸ´Ç¬øwÄþ²Òu+3ÄZmÕœ2iÒ_Xêñ\iLU»û¯òntËÛx—öÆ­sv×£ôƒMi¦ª÷*®S^–ž2qJœñ5𒇾ªÑÄP)ÊjrPqsU*m6¥ÉV.Ò¥8œ7öpç#©þ~™^£Ž¤ç®Aȧý±ýôÿ¯Kl¬´WémO«ù_øåýíCû=zã¦yÆ¡8ãÐw#®Ô?µús¯ë]ím´ýo¨}_Éþ?—5ÿòo? þÎäu?ÏÓ+ÔqÔœõÈ9l}?ëÒÛ+-ú[@ú¿—õÿ7þ_ÞÔþ½¿à™ë³ö øŸÝ³ñÐü¾'øÛéü¿*ùSÚÏR÷çÆãåuçÄÔü¹xc$l%ÿ¥w¿ç÷ŸÿÕú›ãÍ—ã§Æ·ÆK|]ø˜òö×§ldyõè¿3ý§ì'^—3\˜¬Zßþ¢ªËÍ«ÞÚ|í«?œ¸²‡?æÒ¶ø¯-½œ}öJúmmîyGözútã§Ns»Ð~¼¿Äÿ¶-öž¾/[é~·ëµãó¿Vþë_-ôòùôïñ$Ðgîÿœ÷ù{uÏ<žôlyùëóó·m? X}ZöÓÊÚmÕ÷Ù.‹F¶½qø ¢i:÷Žt]kP°Ñ“Ç¿ >$xKÖµ[»};LÓõýWA—PЭï¯îŒvÖVúþ¥¤ÛxQ®.$Khß^O´Ë ©šh´¥›FRquy¡;94—:º®ôMÚ×ÑkvÕ®{$)Ó­¥QÆ“Ååxü%:•iÂ5ªP禥6íRTý3Ñ{[ɨ]ÇØ5ïÚœø—ã'‚üUª\øî?‚þ…òÿÂ)¤é:'‡¼eyà/xÃ7ú†“¯xUÓüC»þ-/RÖ4µµñ¾“}{¤ÏoZ†vñ¶žÞqMÔ^ô½Ÿ»}¯ð®o´ŸÅ}9£~®:žÍ|óë®s–%e¸yeòötýLe…ÁÑ¥9­ªš¯ N<¸¨IÂQJ¥&×'¢§í]ð¦|/¦Yê¿­¬ô_j>×/fø9¥^húï—ñNãâv…<ºøËŽ ¶Œêw–vz®ñ[Dñ‡¯höº¥Íæ±e¯ÜØéºlaô÷çðµðÂÍÞ÷åö—ÚÿnúuM¨÷låñT!˜¤¡ƒ©…ªÞ „©Tk0xêRt´ýºKžJ2§˜Ò­N¤G:Ьãø¿ñ{Á¿|†4H¾'è×:GÆï‹ÿ4-#Å·rx§N½ðwÅx¼%¢ëþ0¾ñ=æ±/ü5á+“uo}¥ëÚð”êZ™ñ;^­úßç<âŸ,¦š©=á£ä­$ÓMYE¦ùÕ6Åá±øHУõ¸:9¾g‹§J´Õu<6?êÎ2«ˆ©ˆ•yW¥*ææ¥WÚ{yNX…+ª³üGý¥êŒ¹"ùoÍozÍk½÷{t޼Úcsªõ(aðØD¨ÒŽO‚ËqxL1}–qþ·N”±Ž…G½›ÄB2§î¸EsDú#Pý·||T¾1Ðô¯ˆ:uÎ¥âOøÖóAµðŽ™h¿µíCá/޾hw…µËïk?ÛðéZ¿Œí@Ô´}7áþŸƒôI.|9ªj0év:VßÛ4nå«w^êå÷%my¥zÝ!d­i7îûu8‹ õ˜âiG&¯Ä7¿¹ø£xïEOxEÄ—×’ø#âÃßê1ø¾ãí:ŠôûXtÙ´ØYÅ/g¬¥Ïm´ß›Fïµïþ+]ÇÎ¥›aÞWO‰–2®"4}’,aih}vïOJxŒ=Üæð¸¼#÷ÏÚC¨º]?j¯‡Þ.ðÇÄí=OÅÔÔüW«|Z»ð6Œt;_ ¿†bø‘ñGSø£Ý\xûÁŸ4¦—B°“Rþ(ðŒ|#ñÅpÎúˆ¬ìçÓgÐéçygiJ÷Ÿ*÷Sø½×Ì¥uhïK¢NÌßœà±8l|Æ{JÕsIa©òB‹¤±™Lm+âpøÚR•¹Æ¥|.#Œ„ªûÔëB.1¡Ó¯í“ðÐx×Ǿ+‚Šºd~$ø×Ç} QXjž!:“i†Úo‡…¼?´Í7N²ðÍàhü'â­@ø«DžÃV×d½ð,Ü6ŸtÖq‡¼½éYË™iòÛáþ%•šÑûßáþn•Ÿ`>·ŽÄ/­CÛfÐÍiMáéN­ý’_UifŒcBiª5çõ”ãV«– <ÞÎ^=¬~Òš~¯£ëº7“ãëK-OölðïÁ}/D·K8<=¢x÷Cø™á¿ˆ âk;5•¶°Ð¬`Ñut]~ÎÔkVú•ͬQi¶òÍqk›Î)´ýékMZÍiS™_WÊíË~—{YiÍåÕÍéÕ£ZŠúÄiË ÃåÔé{Š«§Yx¢ËJuÕ¦mªKÍé{]%/g'ýÕ%u·Äõ^ºÛ[k(áˆÆa*ñ%LÙK°õ±1 P”pø¼<ªÒŸ/±œ+ÕMáªÉJ©OÛF<³Tc6£é·_´¯ÃyoíêûâWˆµ ߇<ãŽV? ¼ð÷â‰[â.›eiàáà? øÖ+?GðêêÆæy|I­øÁ5ý@ø†ÿìÌ‘iV©u_ÛSQ眓Œï>XE¦×¹hóÉhÕ›sMõ¶œ¾Lß)Â.¦*­Ià3L&+2X<Wë¦°Üøz8§N·Õ¥ 7R®%VŸ¶—¼”`åoLý«<¦ëxµàø»©Þk7ÿ³·ü$.´}xá¶Ÿð]ð^µ}¨|5ÔÅrGâ KWƒÁ³hžÓßBðrišG‹uûM[Qž_ô›†³Š Òæ›ætïÊ”RkžÏ™¹7g¼c£×™kÖ #]ýr~Ö¦EÍ„”),>9Ul-Z’ÂÉb[«)G *tc:XÒ§^´I§yÏðÏöªø \\k_õZ¯ˆo~!|NñN¬Í¤ÜøîËUо#ør×Ã:ké¶·ÿ¼9០jÞµŽóûr|#âýoN^¦x‡L´¶ˆYÎ(ý©I{ÒÛ•û¿f×’Qµ¶³¿÷or°Î2•\eLtªÔÇfj¦–"3¥‹¡=)B5qô)P©N<ÞÕÇ ^u¡ì骴à—'£Ïñ_ÁÞøyâ?ˆ×^"›\»ñÃ/Ù›Ãz/Á© ·ñ'¼5ƒµMWVÒ¡âãâ?R­?´éF›ªê'xÒj ¥6Û¼TÒŠŸ´NÜÎI»sK›ž=O†£­‹•_j«àx~Œ2ïo„–›.Äåõ'N)â*b"¥O YÕUðxxЕJ‹¯íyÎCPý¤<â«Ïë:v§¯¾› øCöŽñvÞ:ðý‡…µ;¿ü~ðR|(ÒÃsš_ÛÓ™éçÓúî´¶—Ô>¯äúin×õ[ú^ú8ìÙãž=yÇëÐcŸ˜ãÓ<¶ŸöÇ÷ŸßóùöÕú_Pú·•Ö»Ûͽ5^O¥Ò>ÑøñsÂ_ ¼á-wS’åõ_…_õÿ\èZIÒ¦ñ7ˆ4_‰? $ðUŽ£¡iµö™o¬Åá ÂÖ§[U¹ §ZxªÚwŠd¹p»ÓÍ©ò)JK5xó8ÊÍ'ºRŽ»ÚëÖ?]‘âè`0˜J³mO›×ÄΔy=­Jx̲XZs§N¤¡Šj¦œU{茆ÿ´$6sxÇPøË®øÛT×õm3¶Ö<ákI´ˆm<5â ­~ëHñ/„<5ã/…ún­-íÍÄW:­¯Ïâ=;D¼·¸{_I5•Λ4óŠo›ÚNKnV”ZÑ»¦›ŽújÞ™ Ö2–*®eS*Ó¥†§BtaO–yV:¸zxœ *¼ÍÆTçWÛ*OÞö3mJ¯ãÚ·ágŠ´ß‹ÖÖ§ã ÑxçYøÏ«ø+ÃòøVËÂ2è7õ¹¼KfíãŸüHÓ.­´Y¹øËÁ¾.ðÿÄ3Äí¦—Ò¤Ñ"×ßC¹g-RÓ›w—*´zü:©]%·Ã%+]8Ýòz8¼ç/­K3Œ^6_Y«›UÃÓöTèÊœ³ ²¯Oý£¥5J5ñzô1´ñžë¤¦áCÊgøëàÙ?iÿ†_´#EñSÐü)ªü&×üU¡jz>œ·ð×mü eâoèv:»¤ú†µaª\iv÷ÚNŸ¯éêþ%æ<þü›Ž¿Ëuudì¹S³ÖÜÉvjÍÊðyíhâéËýR ËK ƒÁὕ\^®bU%< :µ©ÆzJ¥XTœy©¬EóŒÿj;Sð–½áêìûTø·ˆì´m+ÁSëžøGmñ:]kLÕ­¥ñ/Œo,žïVñ߇íü/¦ëzŒ£¼Ñ¼3³Z¾¶³³²ÐÚ¥œSû2“Öž¶JëßçÓšM;òÙ^Z+Ý6¢tb³ªoZ†x˜¹ÔÉáíaXwW€Yƒ«Æ5ñ>ö*ŒhÂsÅGÙÒ\òJ0¥‡Æµ_Ã=v]NîÆ×â¶½¬k~øÉ£Þk·ž¶ð]°Ô>*Â1.–$ðKüEñƒl5«KÝR½ñ¿Ž<¤øSN×ëGAà[ŸìÈjYÅ m)½'Ú.ö\š)Mnåu½”ñyάªÎ+R¥\.mMÔtáB*Xï«J›xeŽÄaã[žIWÅaéaý³tÓÂ5J.ZZÇíqð×Qñ§†¼Vn~4êR‰¾7øŠ Cá]F;OŒŸ oü Äl¾(ßxÄž7ѵ«ø¼Qâ?øZËáU—ˆ“HÓcþÉK‹};û,yÅhûÓµåÌì´¼m9Úv–·¼4Ú/^]*ç˜ã0ØŽlt¯,ÙâjªTèµË<*ŸÕ)æÂVÄÆ¬•j¸Š_PU” ¥MrÅÂö¾ø¤Xø ÓHñßÚ¼7ið#þ ðÔ ÓäÓ®4O‚~,🈮|S¥øƒQø‡we>»â˜|&ukÏÂ~Ô¬üSâmvßSñµæšuuñgR¼Û^ÍIZ+E/}§ÌîÚ[5ÖüÍ+Jiç˜*tð6…~j1ÉhÖ¢°ô¹yrÌN´ëB¬±ÓN¥_«º‘„pÔkVªåŠpsuøÿ þÔ^‚ßN³ñußÅÙe’÷ö´¶×Âí¯b™g£ÎRj.Ò|©¹9Ç¢”’\ªVw•ßd™‹Î(að5p˜j¸úÕþ£‹¡OZ0£]˘eø‰Rp§‹Ä{:£ƒ­gó”ëWªÜ)Bnrõß~Õ? »ðÖ¤ÇñûFüÕìÛâ œVZ_†n>èÚŸƒµ¯íOо(ø—âņ[{_ÞxƒÁ¾!µð·†|/áæÐt½6OiVoq¢éßÜR”•åM{î)+é%)óËK¾nou$¬ÔS¼},.q–§BúË—Ö²júÏ*)`éTÂÏÚckcq¼yUÃÕXzh{*pTiÁÊP]gãÃ߃¾ðÃ}KÅ.øŠSÀ—H?³×®:äôõÿ€ŽN2}‡ ç4¿¶-§3ÓϧõÝim/¨}_ÉôÒݯê·ô½ôqØþ±¿à›i³ö+ø(ŸÝ‡Ç«ù|Qñ·ùíô+×Ëj{\*=l\ÿð,]fGðªåáÜ¢=°t×Üå±ÿÖûSãE€—ã?Æ91ß>&žƒ¯ü':÷©ÏN˜Ï¯Î3«æÙüÜJÝ«þúnß ïúë{…q&Ÿ=Ì¥ßüþÌmÓË¿—ø|Ûû3Û§ªÀ÷çÿAãº×›ý¹ýî¿Ìÿù÷ïkÙ§{žÕ|¿?þC§]UõÚìOìÁýÓùÏ^}û úžÛOíÝõí´ž¾_m5ÛÏX‡Õ{/×Ñü+þÝo¾’’)I#WCÁWJy ò2 ä÷¨þÝ·Úê¶“ÿ$÷¿]/g{ÜV{­7ÚÿüŠôzë®×hwö`þéüç¯>‡ýÐ}Om§öîúöÚO_/‡¶šíç¬Eõ^Ëõô ÿƒ·[ų=ºz¨~ô1‘»­ÛŸÞëüÏÿ‘¿~ö½šw¸}WËóÿä:uÕ_]®ÄþÌÝ?<õçÐÿº©í´þÝß^ÛIëåðöÓ]¼õˆ}W²ý}¿àíÖñ_ìÏnžªߟýŒdnëGöç÷ºÿ3ÿäoß½¯fîUòüÿùuW×k±?³÷Oä=yô?îƒê{m?·w×¶Òzù|=´×o=bUì¿_Gð¯ø;u¼Wû3Û§ªÀ÷çÿAãºÑý¹ýî¿Ìÿù÷ïkÙ§{‡Õ|¿?þC§]UõÚìOìÁýÓùÏ^}û úžÛOíÝõí´ž¾_m5ÛÏX‡Õ{/×Ñü+þÝoþÌöéê p=ùÿÐxÆFî´n{¯ó?þFýûÚöiÞáõ_/Ïÿé×U}v»û0tþ@óןCþè>§¶Óûw}{m'¯—ÃÛMvóÖ!õ^Ëõô ÿƒ·[ų=ºz¨~ô1‘»­ÛŸÞëüÏÿ‘¿~ö½šw¸}WËóÿä:uÕ_]®ÄþÌÝ?<õçÐÿº©í´þÝß^ÛIëåðöÓ]¼õˆ}W²ý}¿àíÖñ_ìÏnžªߟýŒdnëGöç÷ºÿ3ÿäoß½¯fîUòüÿùuW×k±?³÷Oä=yô?îƒê{m?·w×¶Òzù|=´×o=bUì¿_Gð¯ø;u¼`M Ú9Zt¶…&bwÊ!E‘³‚ÄÈwÍŸR ÝiÿníïvûOm_ò÷õ¶Í=ÇõvÕ®ìºkmôÓ•-:÷þíÙ)ÒQŠ3F£bÑ–UbŽU”²“ʱGeÊóµ˜AÛKûw}{m'¯—ÃÛMvóÖ"ú·m4ówê¾Ê¾ªþ;ÅÿÙžÝ=T¿?úÈÝÖíÏïuþgÿÈß¿{^Í;Ü>«åùÿò:꯮×bfîŸÈzóèÝÔöÚnï¯m¤õòø{i®ÞzÄ>«Ù~¾á_ðvëx¯ög·OUïÏþƒÆ27u£ûsûÝ™ÿò7ïÞ׳N÷ªù~ü‡Nº«ëµÙûaÿo·û=Çí ¸Æaø6Ç×ýgÅŽ½¿,þ /è< Œúä3)Þü’ÂC¯lCê—ømRà ~ÏVÑâi>ÿòëÒ?–¾V÷¿okïO¿ ( € ( ›lAŸÙWö„¿üp?= óéüÿ*âÌݲÜÁöÁbßÝB£9±Ÿî˜¯û¯ÿ¦¦$ŸÙƒÐúçñÔc©?ˆ tZü;ûweü™¥ÿ¤é¦Ÿ;»YÄþwú®ïôÛ¿Ùïù=®ùSû3ýž¾Àý›°ÁçœdgSûwûßù3ÞÿáùÛÎüÚ‡Õ|¿?þC¿­öêÜWû0z\à:Œu'ñÔ‹GöîÊÿù3KÿIÓM>wv³ˆ}Wwúmßì÷üž×|¬:JWhÔº† Åz‡Æð­ó*†¾m œíOíßïäÏ{ÿ‡ço;ójUòÒëM~_akò{µ¥ØÿìÁè}s€xê1ÔŸÄP:-Û»+ÿäÍ/ý'M4ùÝÚÎ!õ]ßé·³ßò{]ò§ögû=}€?û7aƒÏ8ÈÎ0§öï÷¿òg½ÿÃó·ùµªù~ü‡[íÕ¸¯ö`ô>¹Àwv³ˆ}Wwúmßì÷üž×|©ý™þÏ_`þÍØ`óÎ23Œ)ý»ýïü™ïðüíç~mCê¾_Ÿÿ!ßÖûun+ý˜=®pF:“øêE£ûweü™¥ÿ¤é¦Ÿ;»YÄ>«»ý6ïö{þOk¾TþÌÿg¯°ÿfì0yçÆþÝþ÷þL÷¿ø~vó¿6¡õ_/Ïÿïë}º·þ̇×8Ž£Iü@õ¢Ñý»²¿þLÒÿÒtÓOݬâUÝþ›wû=ÿ'µß*f³×Øÿ³v<óŒŒã nÿ{ÿ&{ßü?;yß›Pú¯—çÿÈwõ¾Ý[ŠÿfCëœÇQޤþ zÑhþÝÙ_ÿ&ié:i§ÎîÖqªîÿM»ýžÿ“Úï•?³?ÙëìÿÙ» yÆFq…?·½ÿ“=ïþ¼ïͨ}WËóÿä;úßn­ÅJI•Ð:²ÊÈ®¬­ü$Û”Œ‚pyàŒ*‹=Û^¿ÌÕ¼þ4¿ßwk8‡Õ­ªÒÞºä½ü4µß,qh°[©H Ž'%b#ÜÄ “´a›h I$£ž ¨óÛý­ÿ¼÷¿øëÌ7†mÝÝú·÷kÿ›VÖäƒJ@Ìâ0Âîp‹¹‚ç`-Ì'nà@É*BÑý»²¿þLÒÿÒtÓOݬâ/«}Úé­—³n‹î¶—|«ý™þÏ_`þÍØ`óÎ23Œ)ý»ýïü™ïðüíç~mCê¾_Ÿÿ!ßÖûun+ý˜=®pF:“øêE£ûweü™¥ÿ¤é¦Ÿ;»YÄ>«»ý6ïö{þOk¾TþÌÿg¯°ÿfì0yçÆþÝþ÷þL÷¿ø~vó¿6¡õ_/Ïÿïë}º·êWþ О_ìkðq?º¾?—Å/}?—å_°ðÍ_m‘à*ÿÏÈ֗߉­éù}ÇîÜ9\,l4WþM/ë§¢Øÿ×ý!øáàŸhŸþ/éÚׇµ6ô|NñÖ ‘^i—°-ÆŸ­ø—QÖô›ë9d€E{e{¦j6—0ÝÚ¼ðeÞâh'Eþcâü&uO‰se•æ5iTÆT«J¥ &"µ)Ó«ïÂQ:sƒºwk™5­ìÝÊ3Ì·<×R8ZóJ®q”)Tœe•šq‹[tߺVLòÿøGµúÞuÿŸYþ£þXŽÃÕFxä·Íû ûþ„ù¿þc?ùGõòG•ý—Œÿ þ_ Â=¨ÿÐ>÷ÿgü2D]³×æÁç±½†}ÿB|ßÿ øÏÉÑôê¯åtÙXÅÿ0˜Ÿü[ÿ‘}üúö÷OøGµúÞuÿŸYþ£þXŽÃÕFxä³ö÷ý óü Æòëäƒû/ÿ@x¯ü[ÿ}ü¿@ÿ„{Qÿ }ïþÏødˆ»g¯ÍƒÏb){ ûþ„ù¿þñŸ“£éÕ_Êè?²±‹þa1?ø"·ÿ"ûùõíîŸðj?ô¼ëÿ>³ýGü±‡ªŒð1Égì3ïúæÿøAŒÿå×Éö^3þ€ñ_ø"·ÿ ûù~ÿö£ÿ@ûßüŸðÉvÏ_›žÄRö÷ý óü7ã?'GÓª¿•ÐecüÂbðEoþE÷óëÛÝ?áÔèy×þ}gúùb;Uàc’ÏØgßô'ÍÿðƒÿÊ?¯’ì¼gýâ¿ðEoþA÷òýþíGþ÷¿ø ?á’"íž¿6=ˆ¥ì3ïúæÿøoÆ~N§U+ þÊÆ/ù„ÄÿàŠßü‹ïç×·ºÂ=¨ÿÐ>ó¯üúÏõòÄvª3ÀÇ%Ÿ°Ï¿èO›ÿá3ÿ”_$ÙxÏúÅàŠßüƒïåúü#ÚýïðÃ$EÛ=~l{KØgßô'ÍÿðߌüNªþWAý•Œ_ó ‰ÿÁ¿ùßϯotÿ„{Qÿ }ç_ùõŸê?åˆì=TgŽK?aŸП7ÿ gÿ(þ¾H?²ñŸôŠÿÁ¿ùßËôøGµúÞÿà,ÿ†H‹¶züØ<ö"—°Ï¿èO›ÿá¿ù:>Uü®ƒû+¿æÿ‚+ò/¿Ÿ^Þéÿö£ÿ@ûοóë?ÔËØz¨Ï–~Ã>ÿ¡>oÿ„ÏþQý|eã?èÿ‚+ò¿—èðj?ô½ÿÀYÿ ‘lõù°yìE/aŸП7ÿÃ~3òt}:«ù]öV1Ì&'ÿVÿä_>½½ÓþíGþ÷çÖ¨ÿ–#°õQž9,ý†}ÿB|ßÿ1Ÿü£úù þËÆÐ+ÿVÿä/Ð?áÔè{ÿ€³þ".Ùëó`óØŠ^Ã>ÿ¡>oÿ†ügäèúuWòºì¬bÿ˜LOþ­ÿȾþ}{{§ü#Úýï:ÿϬÿQÿ,Gaê£< rYû ûþ„ù¿þc?ùGõòAý—Œÿ þ_ Â=¨ÿÐ>÷ÿgü2D]³×æÁç±½†}ÿB|ßÿ øÏÉÑôê¯åtÙXÅÿ0˜Ÿü[ÿ‘}üúö÷OøGµúÞuÿŸYþ£þXŽÃÕFxä³ö÷ý óü Æòëäƒû/ÿ@x¯ü[ÿ}ü¿@ÿ„{Qÿ }ïþÏødˆ»g¯ÍƒÏb){ ûþ„ù¿þñŸ“£éÕ_Êè?²±‹þa1?ø"·ÿ"ûùõíîŸðj?ô¼ëÿ>³ýGü±‡ªŒð1Égì3ïúæÿøAŒÿå×Éö^3þ€ñ_ø"·ÿ ûù~‡ìßüOÂþ Òm¾=ëÚŽ‡«éÚ&±?Ã+HÕ5 6îÊËV¾Ð£ñýæ±™qsI¨®›ˆ´oµÜZmâžð[~Ñ Äq~Ùáf0£€Í*ãð˜œ'¶ÅPTV*•J3š§N§;Œ*ÆåNq\Ö³é³>û„p•ð¸\_¶¥R—´¯R2ƒ’Œ,ÚRÖ×¶½zn~ÆWê‡×P@P„~Ô>Ö|Wû9ümð߇´û[\Öþø¿MÒ´Ë8Ì·W××Z5Ôvö¶ñ/Í$ÓHBF‹–f IÅqfTêV˱ô©Eέ\*8-çRt*F[k)4·ëÐÇO^W”èÕŒWyJI|Û?”ÄÐ5'Us¥j³(&­6úÎæ f;‹[›hî-§Fùe‚h£š'VI:_ɯŸEÊ/'Íî°¶®š£f»5ÌŸ£¹øÓʱ‰´ðx›Ý­(UkO>[|Ó³èØÿøGµÚ}çN?ÑgïX‡q“’xõÝ•ŸaŸП7ÿÃ~3òö óû´ö^3o©â¿ðEoþE~vÁÿö¢æyÿ€³ŽßõÇŽNz™êçì3ïúæÿøoÆòóóïû/ÿ@x­éÅoþGÏËå°Â=¨öÓï:qþ‹?|zÄ;Œœ“Ç®ìªö÷ý óü7ã?/`¿?»@þËÆmõÿ¡>oÿ†ügåìç÷hÙx;§ŠÿÁ¿ùùýÛü#Ú‰ÿ˜}çþÎ;×99è:g¨;Ÿ°Ï¿èO›ÿá¿ÿÊ?ÏϼOì¼gýâµÿ§¿ù?/–Áÿö£ÛO¼éÇú,ýñëî2rO»²«Øgßô'Íÿðߌü½‚üþíû/·Ôñ_ø"·ÿ"¿?»`ÿ„{Q?ó¼ÿÀYÇoúãÇ'=Lõsö÷ý óü7ã?ùGùù÷‰ý—Œÿ ÿ¡>oÿ†ügÿ(ÿ?>ñ?²ñŸôŠ×þœVÿä|ü¾[ü#Úm>ó§è³÷ǬC¸ÉÉóÿg¿ëœô3ÔÏØgßô'ÍÿðߌÿåççÞ'ö^3þ€ñZÿÓŠßüŸ—Ë`ÿ„{Qí§Þtãý~øõˆw9']ÙUì3ïúæÿøoÆ~^Á~vý—ŒÛêx¯ü[ÿ‘_ŸÝ°Â=¨Ÿù‡Þà,ã·ýq㓞ƒ¦zƒ¹û ûþ„ù¿þñŸü£üüûÄþËÆÐ+_úq[ÿ‘óòùlðj=´ûΜ¢Ïß±ã'$ñë»*½†}ÿB|ßÿ øÏËØ/ÏîÐ?²ñ›}Oÿ‚+ò+óû¶øGµÿ0ûÏüœvÿ®~_-ƒþíG¶ŸyÓôYûãÖ!Üdäž=weW°Ï¿èO›ÿá¿ù{ùýÚö^3o©â¿ðEoþE~vÁÿö¢æyÿ€³ŽßõÇŽNz™êçì3ïúæÿøoÆòóóïû/ÿ@x­éÅoþGÏËå°Â=¨öÓï:qþ‹?|zÄ;Œœ“Ç®ìªö÷ý óü7ã?/`¿?»@þËÆmõ ÿÃÇ«ÿóëþOZ?â"aèÿálù—ü½_Ù_Û´ÿçÃ_÷?ýµmøùY8ŸðòŸú¦> ÿÃÉ«óž?çÓ¥ñ0¿ôÿð¶?üËþƒqÛ´ÿçÃÿÁ«ÿ‘ü~vwJ!ÿ‚”ÿÕ1ðgþ=_ÿœï_òzÑÿ ÿ@ÿ cÿÌ¿åêþÊþݧÿ>ÿ¸©ÿí«oÇÊÉÄÿ‡”ÿÕ1ðgþM[ÿœñÿ>(ÿˆ‰…ÿ ÿ…±ÿæ_óü‹þݧÿ>þ _üãó³ºQü§þ©ƒ?ðñêÿüçzÿ“Öøˆ˜_úø[þeÿ/WöWöí?ùð×ýÅOÿm[~>VN'ü<§þ©ƒ?ðòjßüçùôéGüDL/ý?ü-ÿ2ÿŸàÜ_öí?ùðÿðjÿäÒˆà¥?õL|ÿ‡Wÿç;×üž´ÄDÂÿÐÿÂØÿó/ùz¿²¿·iÿφ¿î*ûjÛñò²q?áå?õL|ÿ‡“Vÿç<ϧJ?â"aèÿálù—üÿâÿ·iÿχÿƒWÿ#øüìî”Cÿ)ÿªcàÏü£^­r£<=H­9MT^ü\¡(ÍFO–I§´ãµš”½6ØNQ‹ƒ§$¥ù­}bÔ’ïgÓM´û_QW¼v…P@PñÆÚ?Ãox·Çþ [§Ñ< jž#ÕÆ5šñì´«InçKhãGÒ"±«ÈˆXÌ%s­V(Õ¯Rêu*Í¥v¡Nr²êùbì¿Ì™ÉBœ¯hFRvÞÑWvó²?8þ \³"K¿ [¤¨²$Åû¸oáWPÂ+ج~jv1ÝFÉÒÓR¿¶YCo.#Ù3~{/0—|¸8ôrÅÂ2k¥â°óI÷Js·–‡†óÚWÒ‹k£uvóVùém­¥Óÿ)ÿªcàÏü<š·ÿ9ßoïz_ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÄÿ‡”ÿÕ1ðgþM[ÿœï·á×½ñ0¿ôÿð¶?üËú}âþݧÿ>þ _Ÿ+^[yé{ü<§·ü+áãÕ¿¯Ááøsùføˆ˜_úø[þeÿ?D?íÚóáÿàÕÿÈëßKvÖéÅáå=ÿáXø7p¿õRǯ@~('Žì£¹#Óÿˆ‰…ë€kþç"ßÝõUú|í`þݧÿ>þ _ü·Óo?_þ'i>xSâV‡iu§ØxšÊæVÓ¯d‚k7PÓuÍYÓe¸µy-nÿ³õ:þÍ/-œÁy uØæU¯ºËñÔ³,AIRÄS犗ÅwEÛFã8Ê7WN×M§sØ¡Z8Š4ëBê5#tžëVš~4ÿÑô?ÚsÄþ'±ý£¾=ZØëW¶¶°übø ‚'Qj|a«± 662ìÍÉä·9à/òob+ÊsØÆ¤”Vc^É[MVšëýz™fÕ$³,bMÙW™áßð˜øËþ†GÓýbzäPöä¯N¹àWÌýkÿ?eÿ’ÿ_×›<ÿi?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?æaÿ —Œ‡üÌ:ˆî?xž¿õ˯ýó×°£ëXùû/ü”=¤ÿ™‡ü&>2ÿ¡‡QôÿXž¹”=€9+Ó®x}kÿ?eÿ’ÿ_×›i?ægô=ÿ!Õ5]VÇö ›U½žúhî¾ Å“°fHü¯ŠM°aWåÜÌÜâíÒ¿wð‚¤ê`s§RNMbð‰^Ú/cW·wùzŸgÂÒr£‹»o÷´ÿô‰ÐE~Â}PP@P@|Ïûg3/ì›ûF2~øõ”Ž¡‡‡¯ˆ#ÜkÏÍÿäS™ëoøOÆëÛý𮦝÷lGýx­ÿ¦ä?ð™xËþ†G¿b{ùçÓ8ïǯã­b?çì¿òSòŸi?æbÿÂeã/úu#ßýj{sþ­ºãŒmõà >µˆÿŸ²ÿÉCÚOù˜Ÿð™xËþ†G¿b{ùçÓ8ïÇ£ëXùû/ü”=¤ÿ™‹ÿ —Œ¿èaÔõ©íÏú¶ëŽ1·×€(úÖ#þ~Ëÿ%i?æbÂeã/úuüy‰ìçŸLã¿t­b?çì¿òPö“þf/ü&^2ÿ¡‡R=ÿÖ§·?êÛ®8Æß^£ëXùû/ü”=¤ÿ™‰ÿ —Œ¿èaÔ{ñæ'±ÿž}3ŽüqÐ>µˆÿŸ²ÿÉCÚOù˜¿ð™xËþ†H÷ÿZžÜÿ«n¸ã}x­b?çì¿òPö“þf'ü&^2ÿ¡‡QïǘžÇþyôÎ;ñÇ@húÖ#þ~Ëÿ%i?æbÿÂeã/úu#ßýj{sþ­ºãŒmõà >µˆÿŸ²ÿÉCÚOù˜Ÿð™xËþ†G¿b{ùçÓ8ïÇ£ëXùû/ü”=¤ÿ™‹ÿ —Œ¿èaÔõ©íÏú¶ëŽ1·×€(úÖ#þ~Ëÿ%i?æbÂeã/úuüy‰ìçŸLã¿t­b?çì¿òPö“þf/ü&^2ÿ¡‡R=ÿÖ§·?êÛ®8Æß^£ëXùû/ü”=¤ÿ™‰ÿ —Œ¿èaÔ{ñæ'±ÿž}3ŽüqÐ>µˆÿŸ²ÿÉCÚOù˜¿ð™xËþ†H÷ÿZžÜÿ«n¸ã}x­b?çì¿òPö“þf'ü&^2ÿ¡‡QïǘžÇþyôÎ;ñÇ@húÖ#þ~Ëÿ%i?æbÿÂeã/úu#ßýj{sþ­ºãŒmõà >µˆÿŸ²ÿÉCÚOù˜Ÿð™xËþ†G¿b{ùçÓ8ïÇ£ëXùû/ü”=¤ÿ™‹ÿ —Œ¿èaÔõ©íÏú¶ëŽ1·×€(úÖ#þ~Ëÿ%i?æbÂeã/úuüy‰ìçŸLã¿t­b?çì¿òPö“þf/ü&^2ÿ¡‡R=ÿÖ§·?êÛ®8Æß^£ëXùû/ü”=¤ÿ™ŸÙwüââïöøsw+Ïs>›ãy'šN^Y[âwK»€KN~ÕýYÀÒrá<’Rw“Âɶú¿oTý+&må˜6ÝÛ¦ßþO3ÿÒô/Úr×íñéñÃ|bø’IÇ ulò084¿c׎xüOp:ޏù€ì^Üý=ñÏ'Ô­žF€ì}ðpzñÏ€éîQ×0}‹ÛŸ§¾9äúÕ³ÈÀàÐý¾^9ãð=Àê:ãæO±{sô÷Ç<ŸP:¶y_±÷ÁÁëÇ<~§¸G\|À ö/n~žøç“êVÏ#ƒ@ ö>ø8=xçÀt÷¨ë˜>ÅíÏÓßò}@êÙä`ph~Ç߯ñøžàuqó'ؽ¹ú{ãžO¨[<Œ /Øûààõãž?ÓÜ£®>`û·?O|sÉõ«g‘Á û|¼sÇà:{ÔuÇÌŸböçéïŽy> ulò084¿c׎xüOp:ޏù€ì^Üý=ñÏ'Ô­žF€?¡ÿø!^Uí@9ù®þ žGý1ø¢=§qǽ~÷àïûŽwÿaxOý3Tû^þ3þ¾ÒÿÒ$A5û)õa@P@ó?íÏì›ûF_ƒ¾=ÿÔz÷éüÿ*ó³ùæŸö.Æÿê5S Wû¶#þ¼VÿÓr?†óeÔc×1òsƒÐŽ˜¿Œ?¯ëúü™ù0/np:cÓƒŒgŸ—‘’h6]F=qÇó(÷8=é¸þ¿¯ëò`ËÛ§\˜ôàãçåÀädƒš —Q\qüÇÊ=ÎB:`n?¯ëúü˜²öéצ=88Æyùp9 æ€eÔc×1òsƒÐŽ˜ëúþ¿&l½ºuÀéN1ž~\FH9 ÙuõÇÌ|£Üàô#¦ãúþ¿¯É€/np:cÓƒŒgŸ—‘’h6]F=qÇó(÷8=é¸þ¿¯ëò`ËÛ§\˜ôàãçåÀädƒš —Q\qüÇÊ=ÎB:`n?¯ëúü˜²öéצ=88Æyùp9 æ€eÔc×1òsƒÐŽ˜ëúþ¿&l½ºuÀéN1ž~\FH9 ÙuõÇÌ|£Üàô#¦ãúþ¿¯É€/np:cÓƒŒgŸ—‘’h6]F=qÇó(÷8=é¸þ¿¯ëò`ËÛ§\˜ôàãçåÀädƒš —Q\qüÇÊ=ÎB:`n?¯ëúü˜²öéצ=88Æyùp9 æ€?³ø&ìý…>'÷tßNŸ¼kþ•Yð'ü’9ý‚KÿOÕ?MÉ¿äWƒÿ¯_û|ÿÓö¿Ú:ÌIûB|s“hù¾0|JçÇŒu€ ?—nzäWòÉ®,Ïÿìc[ÿmõ½ý4éÕKòÜÞVÌñªúýb_‹Z-íÕjïÕr¥îøÇØG(Ç`zñßÓÛ ÉÎ~_—çå}m§•µ~ŽÝœ>û÷Õ%çm¼Þ—ùÙ9jÃ샴Ã:é×=9ès‘G;ü]¯ÚþšýÚtµœd9Zêúý÷×e¦UžºéÊ•ä}„qòŒqƯý=º œçå9ßù_[iåm_£·Gg ¾ýõIyÛo7¥þvNZ°ûàíðÇN„úuÏNzäQÎÿkö¿¦¿v-gVº¾¿}õÙi§Ug®ºr¥ya|£qëÇOnƒ'9ùNwþWÖÚy[WèíÑÙÈ/¿}R^vÛÍé“–¬>À8;@ü1Ó¡>sÓž‡9s¿ÅÚý¯é¯Ý§KYÆC•®¯¯ß}vZiÕY뮜©^GØG(Ç`zñßÓÛ ÉÎ~Sÿ•õ¶žVÕú;tvr ïßT—¶óz_çdå«=ÇöcÓ!›ö“ýž£¸·Šh%øáðž9áš$’)¢oè ñÍ’HäF*èÊQÑŠ8`دk†Ú—d1kš2β´ÔµN2ÇкjÖi«§u·mc.¼ __ÁǾ/ ¾©§^škoUgòåI¹}/ì×­x'Sý¤þ |vøãˆtO ø_[ñ§Ãý7_‡Ä>ÑüM¬Éñ»á·…kªÇk·ºLz'§–öÛMt¸kIâ{k«;–´»ƒè¿°+`ëñ;<Ê1°Ãá0õñ˜׆#GSûc/ÂûµbêSöÆån/–K–p“S;¾¥:SÇÖÅákF(N­5:0©/­P¦’i'(òÕmò½¥hÚÜÅÚCà§Â/ ø_âv¡ðóÂ:φ.~|nøsà+kÍSÅW>#›Ä:ÄφÞ:ñäãP†k[K[)¼1ªxJ='EšÆ¥¼Ñ®Të/©F÷ÒÇe9V ˜ÔËèV¡<»:À`/W:îµÇ.ÆcÚ©ÍÁ<=L7²¢éÂ-Ñ—ï]Z‘sšÌ0Øj4ëÊ„'CBŠæ¨æçE µšwVNŸ$-gÈ×5Úç>Wºø/ñÇÃÒø²ïÁšÍ¿†íü% øî}bKtQø?Äþ'›Á¾ñ IædiÚ·Š-§Ñ-%ÚYïbxÝ@ ÕòÓË3(P–*XJÑÃÇ CëJ>äp˜œD°x|FÿlL]?ùù­«gœðø…MÕt¦ ©F·5´TêMÒ„÷wR¨œW÷´Rvj_QÜ| ø›ñGöMýŸ/¾ü1ñgŽ…ãßÚx–÷ÂÔ5¦Óbº? ¤Ò†±q¦ÚÎÖé*ÚjrX‹§Ud‚óÉádÛôÓÊs<Ë…² ¹n[‹Ç:XìÿëÂáªWtã)å®’­*PvMF¯'=¬”ùWÄzÂâ+å¸)aèU­Ë[ê:Tå>[Ëe'»h¥no;$“dŸ¾ |$Ñ<ñ^ xGVеï…ZWìÓ®ÁâKŸÞkÅ𸼧ë>*Óo´k˜RÏL´Óµkû{k6kÄX/b¾žòÞêÎ=?Lã(Êè`ó_ªP«G”ÓáÚÞÞX‰ÕX…œ`aW Ò”T ¡Vq•O–Q´”Ü£>X¼V 8Ÿe F¦8sºŽ~ÓëTç(¤¤Ó…µ^òr•҇Ÿ` ~éПN¹éÏCœŠø^wø»_µý5û´ék8ËÇrµÕõûï®ËM:«=uÓ•+ÈûãåãŒ^;ú{t9ÏÊs¿ò¾¶ÓÊÚ¿GnŽÎA}ûê’ó¶ÞoKü융aöÁÚᎠôëžœô9È£þ.×íM~í:ZÎ2­u}~ûë²ÓNªÏ]tåJò>Â8ùF8ã׎þžÝNsòœïü¯­´ò¶¯ÑÛ£³_~ú¤¼í·›Òÿ;'-X}€pvøc§B}:ç§=r(狵û_Ó_»N–³Œ‡+]__¾úì´Óª³×]9R¼°Ž>QŽ8Àõã¿§·A“œü§;ÿ+ëm<­«ôvèìäß¾©/;mæô¿ÎÉËV` ~éПN¹éÏCœŠ9ßâí~×ô×îÓ¥¬ã!Ê×W×ï¾»-4ê¬õ×NT¯#ì#”cŽ0=xïéíÐdç?)ÎÿÊúÛO+jýº;9÷ïªKÎÛy½/ó²rÕŸÐüÚ&ßöœ\c3|8Æ?ƒâ˜ÏãOο~ðißžØfޞƯ§å÷Xûnþ3þ¾Ó[ßìKüÞÿ¡ûé_³ŸXP@P@4þÙ_òj´Oý‘ÿÿé‚ö¼ìãþE9§ý‹±¿úTÃþë‰éûŠÚöýܽ??¸þ(~À9ùG·sÏsÎ:ôöSŠþ+ç×ËÉÛ]=5³ió~GÍóµî½:¶•­¾Ý씬aÜ®·¹Ï éíÆÏð·ìú붯¿VÇ}µõ{ig¯[^×üí°}€sòn:çž:çœuéì§s¿ëåäí®žšÙ´ù—7Î׺ôêÚV¶ûw²R°}„p¸Þç>ƒ§·s?ÂÞw³ë®Úv¾ý[ö×Õí¥ž½m{_ó¶ÁöÏÊ=¸ëžxëžq×§²œQÎÿ¯—“¶ºzkfÓæ\ß;^ëÓ«iZÛíÞÉJÁöýÀzà`{{œúžÜ`QÌÿ yÞÏ®»iÚûõlwÛ_W¶–zõµíÎÛØ?(öã®yã®yÇ^žÊqG;þ¾^NÚéé­›O™s|í{¯N­¥ko·{%+ØG÷ëíîsè:{qG3ü-ç{>ºí§kïÕ±ßm}^ÚYëÖ×µÿ;l}Gûi0ÜþÕß-ç´†îþ iQ=¬ðG<3¬‰p<·†E‘% Xa[œaHÀ_¦àÛT⌒ŠœeŽ‚”d“R÷e£‹M=t·^©¿‹ÐÊm,Çs/l®¬žË®–kÓM~Õ®{/´Âï |T]+á/‹ü5ñïV·ðŸ ôÿü+ý£~6è_t;}gú~%ë.ŸáD†}"×ÂþºÕõ}fëD0¿ÙßÃZ=……õΟ-»µ¿ö-¬[„¦ï9Â*üEa²Š*µ XÌʵ*xkJ”0ØyT«VT­Ë«Ñ§ ¸¸¾_cÆöN\8ºjxü]<,Tá ¸‰F4ì£tܧ'·³„S³ŽŽ²•¬t¿³/€5Ï þ׳v‡ã/M¦\ßüOøE¬/V¶‰¾× ø—Uе}"íà69lõ]úÚîßzö×XsŠÛ‡0µð¼WÃØ|e R•LÇ*«ìëCYÑÄT¥VŒìîœ*Rœ&¯ñFW²nH¼)Ó̰4êÁň›Œ–ð¨ã(JÞò´ãf´ëªzòõúìÛ¬|6ðçÇ/ˆ~øæØx[BÑõï‡Ú'‰×Ä?4ßßß|VðW„õuk峆êÿM±Ó<[÷éÒEqžÍbº³k˜¦^úY l;ÇçÙ>:”p”iWÀÓÄÇ‚§ˆœó<&ªæäS8SÅ&Õ7¹ ù¢ä¥£‚• xÊøÌ-hªPŒèÆ¢ÔoJœµå¼¢£RíFÏXÙ­ý¡~øáÇÆˆ ðª^¯†ôme‡¥:Þ_ZiÚ•…Ž­masx±ÃöÙl#¾?k0B÷KÚ4gd_ˆ0trÌç0Áa¹þ¯F²ö*¤¹êFJq«Jv\Κ¨¡Ìã.^f“rg:Œ0øºôi·É û·wqŒ£Ì“–šÆé^Úµwkž5öÏÊ=¸ëžxëžq×§²œWÎÿ¯—“¶ºzkfÓæãæùÚ÷^[JÖßnöJV°î×ÛÜçÐtöãŽgø[Îö}uÛN×ß«c¾Úú½´³×­¯kþvØ>À9ùG·sÏsÎ:ôöSŠ9ßõòòv×OMlÚ|Ë›çkÝzum+[}»Ù)X>Â?¸\ osŸAÓÛŒ 9Ÿáo;Ùõ×m;_~­ŽûkêöÒÏ^¶½¯ùÛ`ûçåÜuÏý=}FãœWË]_éÒýúÞ>o3þ¾wÓçßï±{uÏb9ÎqÓŸ¡ÛÛƒü%×õýk¢ýBý~Iü­åºòvï¡é¾ xÃ┚Œ^¶²šk ÿèpE}wö#«ø—Ç~(Ó¼+áO éR´R[É­j×——:„Q^Mgi¢k×÷Ñ%‰Gõ2ܧ›:«IÓžŠSŸ'µÄcñTð˜L59|>Ú¬êJiM Õ%8ò%.œ>¶+RI¸Ê”uvæ©^¢¥NœzsÍÍ¿y¤¡ ;®SÑ/?eŸ-ÔpxÆŸ üo‡âPÔoü%â««Ø4MWáGƒµ?ø·@Öm5LÖ-u%Ðt‹ÇÒncÓfе‹•X,µ‡U–X½*¼/„ã 8¼»œs'9á±$¨ÔÊðuqøªiÕ£Fº©õz3tgOZJÔëJÒ”z%—VR\•põ«ÞTêIò<5U« FQŒÔ½œ[‹Pp“VŒ•Û‹ÿ¼ñÙôkÏøD|Mö?Ã×_Ø:¯Ùµæ‰ ’ qkåj†8ѤY<åX¶ù¼?©c9hO꘮LMþ¯?«Õä¯dÛö2µªÙ+¿fåe«VG²­ËìªZ«~Î^Îv©½ù=Ô¤íü­Ù-v¹Ÿÿž»öuºþÄÕͬš\ºâ\ÿfÞùhÐ_:mYfòo¦C¨«iò_‡û"^/ÙZA81Ö~¿/?±¬àé:êJœÜ]?fë)rÙÒU¦ê+ÅOÝrRv'’§Ä¡;ró)rËàO“šûrßÝrÙKK¦¹OlðWìËâ_økÁ^#¶ñŸÃ/¿Äk>ð‡¼Yâ‹^ñgˆ´+ÎïNÓôy´ˆ¤{Ïh¶¶òj:Í”2M¨D¯4`’¾î†±xü.O—QþÑÄÖÂ`hbq5)WÄâ(J”'NŸî%F-ʽÅÕ­ÜÕì®ãÛGZ½*U#W¬T:0©QÂu*A¥(¯qÁ6êÅ.i-Õú#Èõ/†ž0µ¶¸:„|E‹k7~–ì鉰—ÄO-•Öƒ£ Ö}U.#šÜÙ[ܽÉuÂ!"¼‰`±´½ùa11Œqí=…GOë0“‹¡¨ºsª¥½œ_5Õ¬ïïr:U îéT\³öJ\’kÚGÝpM.W+éËy;­/¯4püñ¤Iws€¼lÑh¶kwòÉ¡øšx4­.d¼†ßW½iax¬´Ù’ ÔŠö६¿g¸V00‹G‚Ìš¨þ£‹å…%V´–¢P£7'µ$©G–œ9rÎ^ë䕚´Šöx–¥ûš­F<Òj”´ƒ¿½+AZ.ÏÞnÞëµýãB_ƒ¾(¶¹’=@ÔüJ4¿ è~%Ônü5¥xƒU´Ñ4_èö>%€êò¶ŸY ¸H®înlEͭȱ¾»³T¹žªeøëÎèÕÅ(á°¸ÚÒÃP­V4©b0ñ¯NUZ¥O–œùfßî¹£' “‚S›• éµJ¥¡N¬8NJ15$äì¹m{ÎÜ·^ëi)šÏÀ‰^»ðl7¾ñ$—Þ=ððñ·„ìì,u=NëVÑîDöò\Ãeb—yCmç]Ø©©ÙéÚòØÙC¥Kw¯¨\Ú*ZÛ¦Ÿ5ÄpݽÃD-žx£ŸkJŠÜ°Ãã])Î\D¨ÒçJ‹ 9Â’¦Ò©)TömC‘¸©ÝÇ•´¤ÕÑš…wãN£„oy*m¨¨é+¾]-uÍv¹]—[Ë£ñoÁO|>½ð‰ü-¤^x³CÓ|Káh'YwßéÞ'UžÔÆ .äi‘.¬&oµXM*Á‡‹½­*Ñýͬ߾•”©éRiTWhºÔ1]8Õ§(ºð§RškâŒ×»¶ŠVi5~e¢—)ìÚŸìƒñ×_´ð†‡­|?ñ·OÄ /áoˆ¼áý¿Ä~ ñή÷‰m¤x®ÎúÃKŠH%Ó5Hµh÷Z׆4ÆÓnþ߬۬j_Ø­Â9¥:ôð´ª`q˜Écée•°˜\K–#­Î¡K ”©G‘:uULM ÕÂÓöSö•cdåÕ<¯Æœ%F­g^8iR¥Rõ(Õ•ÒHË‘YrËš¤'Rœy]æ–²ðë6Ó£×î/<#â8lü+¨É¤xŽý´MKû;DÔ£‘c6z­ð·û-Ã;G²+©¢y©³p• x3Àci¬D§ƒÄ¨aj:8šžÂ¯³¡Q;rU©ÉÉNWµ£'ù“JÍ8ðÊ•xªÒ¨•9rÔ—$¹a-n¦ìÔ[¾Í§kZúÙü+ø‡¨Mim§ø Æ—×½ÝÝ…½Ÿ…µË™¯­¬c±žöâÒ(ld’ê 8u=6[©a j6LÈ—ví-C.Ìj8FžRU#)ÓŒ0•æêF œ £æ¡´¥'¤ªSm¥81¬>%µËB³rMÆÔ¦ÛŠQ»I)^ÊQnËNh½n¹hiÿüa«ZÞßé~ñ&¥c¦]› FòÃCÕ/-,/¶>Ç{qmi,6·[Ÿìó¼smV%pÜéàñu£9ÒÂbjÓ¥?gV¥:g s³|•%µÙ7Èä¥mvMŠ4ëJ2”iT”c+JQ„šŒµº“I¥+=ž©wÚ]„>ø×ÆzÇ…4k"çLoÜ^iþ Ôõû=KKÑJþÓ»7Á²xÇýOǨ=BñÛ“_Ð> ÿ¸g¾¹„ÿÓOëSîxKZÎÞÖ—á /-mkþ»Ÿ»õûAõÁ@P@ó_í‘Ïì¥ûC_„>:ÿÓ ïÓùþUçgò)Í?ì]ÿÔj§>/ý×ÿ`õ¿ôÜãCì]±ý;g郎»›ú7ñ=×õýzúýÇä¶Ýwóßúéߣr>Ã׎¿˜`ñëÇø…È4]_×é¯Kê>m¼¿KÛ¦‹[}¯Ð>ÅÛÓ¶~˜8빿£_×õëë÷ öÛ®þ{ÿ];ôn^íá/ÙÞ)ðå§Œ'×<á/ Íá½oÆz–½âíWP°´ð÷ƒt鞃ÄÚ¼:~‘ª^¶â_Üø[ñišž¥}ªèzñ›Oµ±±[É~ƒøÜnž3Úá0ØYáñ¹WÅT«QÂPÅRÀ¬EEJZ’§_UápêŒ+UZ5Û¥ PçŸ} zÔãWš•*NªçUÉ(R§URU$£.IVŸ²‡'4œ£7Ê£”øÏÁºV­­Ü]xg^Òtßx3–š‡„õ¸üCk⟈ÕücáMKòØÁåßéZ–‹¢Ü±iM¾¥ky,}Þ› ข ±¹7Jµj’ÃÖ¥K‚ÁÆxZÿXyæZØÌ-L;„cÏN¥ß’¤g(Ò8Ïš1Џ*ô!9·NpZT¯J~ÑTuéέ)A¥¬e=Ò’•“‚’gŸj|a¤\ÝÙj¾ñ.—{dÚz^Új«gudÚ»§‹YÂ5iAá«)ΜŸ,jB•96”f¹£&ÒM9{ÜJ•xÉ/eUI¥$½œîã{)%˪»I;$î·zô¿~/½Ô¶F4+áéâ+bçŠr£O G OVµ£:Š4£îœç&Ô`¤Úå)a«V¯*6P5Ru]KÂ4áEJSœí%®‘S“è›|§[¬þÌÿ,l¼M­è#@øƒá/ x'Gø‰}ã/jßÛþ›ÂzçˆÃ—öÒ=µŽ§ÜÂÝE©èÚ–™a­évZn§ªßØA¥YKzÝXŽ̨ÇZ”hã°¸LƦ/WÛPxJøªÂ¤y©Ó«Fª—´£V•:ÔéÓ©Z¥(R„¦i<& ¬¡ÉZ*0¯*´dçNsöIëË%%&ù¡(FqŒe7Xó[qðûƃGk¯ ø–Õ|Gm%ç‡ÚãAÕaå”Q ¦ºÑ·Û Ôí ¤žÄÏD|ÆeBZ¼™`±‘ö<ØLTV".x~jcõˆ%w:7ïb£i9AIY§}NgJ²ä½*‹sS¼$”Ò»¼.—4RëÕµÒÄ÷_ øÇðþ=\ .ÿ®ŸŽºi­ôÒëíÒ_¼Wá á§´/ ¦¸Ó~$üø³›¤x˜ø?ÄÞ)Ð|qão xAð–¼’ÃsmâÛ4ø•¦ø‹B|}m—«k+Ûh÷Eõù; G,ÌiâdâèæÜ?›J}9ª|Zý§<ã?‡Þ&ðÃÏøÓÃp\ÝèÞðE߈,t[$Ñþé¶Þ×µêͧjú„Ðx£TøµàËMij.´Él¼G¬D÷ëäAg.YçåX¬¯—å¸lE§C.ÀΪ ¹2G‹«B§²«?ßTÍ0Yq‚•õS©£ N31ÃTÃU¡‡…X;Â…>Elö5% rÎMNXŠ^Ñ¥Í~IIYDóí3ö‹ñ/ƒ¾|1ðÃtÓt?xÅ_Æ£«mÔ¬éJ¿²y})¬\çQãù%>~Èf”\Ê~Ò4°ôªAB2Œ½–"gSÚÊ«nSqu9~¯{Vß¶Qr#ðwí]á»Mkâ>­ñWøË¯Øx³âoˆ>!iZ ¦š—„úv¯e©iÖzF“ãǾ ñoÂÏÚi÷é†{]SÆ^ ‡GÌ'Â7Ma$z¹€âüÄfµs*ÙZXœß£J*>Ñáê­(–2–a‚ÆeØ…NjÒ\n4Ý–\Š5J¥.|L±ÄITÅN´"¹yœ%EF5cZ•\=D¥Ëxε_wÙµϧáOÚËáÖ—¨A.¹Å‹]ÏAøg/‡ô XµÆµªü"øI¤|3ñº‹<3ñÀÞ3øs¯ê7–WÍáÚxXÒ$ÐoÒç^ðI¾lÛLeTf½»Ç„0Ü;R¥J“•J™^OK.ĺª9–—â£5‡ÆR¯ˆJ”¯[êEBNŽg†Œ—;®£xãBò– 9*½ ´fß2§V5%î¿ÞPºŒ möœø{k¢éZ&±Ä›7›àæ‰ðŸ[Ô¥ðÂøµ|1uá/ˆ/ã]"ûAK‰~ Ô|Y¢x»M–+Çý£àû½WðþpÑxŠÆ!o8n*Ë ª¼le,‹•Tœ°ô±K S˜ýj¤¿µp514±Tœc^ž Ó©B”¢±1NšyŽÂ“¬¥õ8a›öqŸ#§_Ú.[b¨:‘©)§ì­(E¥UiZ‡í£j>'ð†·-¯Ä˜ôÆ_·&©â/Xü\³ñF¯âŸ‹w,Ó†Ÿá°¾%mE–ÏNU½·Kë£_W‹²¸VÁb=®c™TÃçyVa‡ž; §‹ËpXLC©ŠÂC3§‹¯‹Ì^"¨?­,=F~Î3”ãVY¦2£>|F"Tñ˜jôÝjt\=sæ«J8ˆÔZîp|Ÿ½öp´S´[ä9ÛOÚ;áö›k¢Ëk¨|iÔ.üá¯Ðü%wáý*ÃÁß->/ÜøÆêßÄߦ>8ԛÚ––¾3M?ÄúKiž4“TÓ<áˆt½^ÆI<Í?ŽK–S¥JK™Íà°ùöYPÃC ™ÿkTÆÊ–#ÿ´*:5!Ti×¥ õZ\ ÕYÄåq„åƒKb±TêÒ¡Gƒ¡RZî†*xêò­VtèNŽ ”0™|í[ê×­)ÅKšžc‡«S¯*_¾ÀÕ¨¥B£‚Š”ýGZ§<œa(QŠ£‡~÷³|×\ßžzÔÍ­k¶°Öélú¶§©5´cÀ×÷RÝxÇ@‘y»†¾ïË15½¾"½d”UjÕj¨«$•IÊ|¶ì¯¶ßyó“—<ç;[žr•»s6íò¹™ö>øÇðþ=\ Æïúéøë¦šßM.¾ÔؽŽzsßÛÐ žŒç8‡'ý[ñÚýz[^—÷'þ½”ÿ´ Æ2¿Ïëñ[¯ôÇÓ Æ0¿Ðž ;åÙçý†áôÅO_Îýï¡÷<#ü gý}¥ÿ¤Kúÿ†?skö³ë€ ( € (æÏÛþMSö†ÿ²EãŸý0ÞWœȧ4ÿ±v7ÿQª˜b¿Ýq?õâ·þ›‘ü~}‹Ð}ëéÓ¿l€ƒ¶¿ˆ¹Ÿoëñ²^VÒëÌüx>Åè3ÿ×ïžx>ÀcžFåwý~}5óûïv€>Åè>ƒõôéß¶@AÛO™öþ¿%åm.¼ÀûcIø·àÏè_|E|}à]nÇà¶£ðŠü-m7ľ ðGˆ¼9ñCľ*¹‹Æ õ-oÃö>6øwãï x¿Kºk}WP·³þß›P»ÓÅõÿ‡ob‹ôL&s‚Ãaò\KÇbðU?Õú™O¶ËêQ­ˆÁâ°¹¾'UbrúÕð´±8húÄÞð‰íÄ>>øsâ»ýcEðŸƒüwâI|-ð«âÿüJ¶ð®‡©Ïáï|EÔõïAão xkKZ@¾Ñþ×®ÛjÉ$ýO‹òzX˜T£…®”s<³Zµ<> ' ©Í}r­IÓÅc%kÚ¨MRÌðÔ}•9{lE:teyÔŒ#9VX˜b°éÃÛTŒiÓ© K÷Ò“J­%Ì£/øõñ’Çã+xû'L×4¨ô= ïRñ¯öìVQKâ/Œþ.»MGâ—4ï±]Þ,š/ˆ.l´+m2k³¡-¶Ž'»µ…å¯Ïq6}C8ywÕ©T¦©áêbqÞ×’õ³Œ|ãS2ÄEÆs^Τ©ÑŒ£ ®0÷Õõc†/Ø{8Ê*0•JÜÖ÷ñUŸ5zŠÍé'¨Ý)Z>÷C¹ø…ûYx§Äÿ¦ñ'†tý2ÏáŒÚ×€5MKÁ— þxKÅÞ9Ó< |5~¾ø…ã_é—ž&ñ{¯xyn£´Ö5ýVÙ¡M>iàck ¼¦gƵq¤êaéSþËž#-©Z„ðYvŒ£xZ²¡ŒÄá¨Kˆ§*¸whWÅ×…¹ƒåPN#6”ñ.T㫹áÜéºT)δ){98V«N›©8¹ÃE*²[^:Z>â/ÛÃ,Ú™ŽJ´¨ÝLiN²£Fœ«Ö§ŠŽ!:”iÕPŒeN>ÂRSN_y©Ý¨õÖµ÷4ojÞ7ÓOÆ¿ËâïŒiñŸQÒüS¤è›>\ÛxWâiàÿ†î|Y{ˆcš÷ǶÖw:Ç“á ð—‚ü? èovë—Z㲆>xèTÌñ?[ÎÖréâ)a­•¨`³,X‰Q¯N¦'6¤©RåxxÖ¡K“© µ§ŠªÜ`cG0ì#…YW•iá±t§Ì½²ö•œÜjFSÄÇ–6pçŒhFNJRu%vãô7‰~;ø/áÑ×¾']kÚŸï¾ |wøAñfÇá•¿Ž<â½;ÂúO‡ô¿ˆcøu†|W­^iú_…[]Ðl<=¨ø¯Fø}%õ—…táØ.ü9¬ßÛý+?˲ùb3:˜—ŒXÞ És:y|q¸ L0Ô¨áñþ×êQÂãkT§O ëQ˜ºu^xPü4~«V¤{êã(Pu1£ªªã°xˆÑUhTTãuùýЧV£Q¦ç U… ^4à©GÙÎqðmwã‚õÿ|BV±¨ßZéÿ&øIátñ7‡lüâk>-xsÇÚ¾Ÿá©¼kñ_“¾м#®ê÷z­÷ŠõKK_^Z¤6ºJk–÷_?‹Ïp8ŒfèÖs…ŽyU×£G [ˆÌ³œ67–ž8ì«Ãa(aq5gUâëFš‘|”}¼¸*ã(N†#’m¨`Þ q…9ÎxŒ]:ºSö؉{:P§RN^Ú\³q²:æø[ì^ƒ?ý~ùçƒì9ä`nüÚïúüúkç÷Þí}‹Ð}ëéÓ¿l€ƒ¶Ÿ3íý~6KÊÚ]y€}‹Ðgÿ¯ß<ð}€Ç<Œ Êïúüúkç÷Þíý[Á7×gì]ðY»Gþe}?—å_Øÿ$~Cÿ`_ûš¯õùªdšåX/úõùNKËúï¹ÿÖûsã5¯™ñ—ãã¯Å¿‰¤~9׺ñè1‚†¿Ž8áÿÆ]Ÿ¯ú™Wý?®·ß£qü§8_ð©Žÿ°‰þ'›ý‡Û¡ú{õÁôÇ$žpvæ¾Vÿ×ã¿§ùw<ßëúÿ‡ûƒì^Þãׯ9ëôÁo¦x4_oøm¼¯§M=îý­§#”-³Dûã,(ÛYw! áŠ3¦íÛ¶»)Àb´ïn¶OO&¾/»DýW_×ü?Ýaßbö÷½yÏ_¦ }3Á¥}¿á¶ò¾4÷»ôlì>ÝÓß®¦9$óƒ·4_úüwôÿ.áý_ðÿp}‹ÛÜzõç=~˜-ôÏ‹íÿ ·•ô駽ߣ`aöè~žýp}1É'œ¹¢ÿ×ã¿§ùwëúÿ‡ûƒì^Þãׯ9ëôÁo¦x4_oøm¼¯§M=îýû·Cô÷ëƒéŽI<àíÍþ¿ý?˸_×ü?Übö÷½yÏ_¦ }3Á¢ûÃmå}:iïwèØØ}º§¿\LrIçnh¿õøïéþ]Ãúþ¿áþàû·¸õëÎzý0[éž Ûþo+éÓO{¿FÀ>ÃíÐý=úàúc’O8;sEÿ¯ÇOòî×õÿ÷ؽ½Ç¯^s×é‚ßLðh¾ßðÛy_Nš{Ýú6ön‡éï×Ó’yÁÛš/ý~;ú—pþ¿¯ø¸>Åíî=zóž¿LúgƒEöÿ†ÛÊútÓÞïѰ }"ÚIVw¶çŒ®É^42.ÒYvÈP·Êwm%²¥›7¥6•¹šNú&ìﯦ©k÷t ¾î݉[OG]¯uÊ8VPFäpêH Œ«(+“ÃFV©RÚÏ]Wm–½n»[ÞÑëÕ€ï°ût?O~¸>˜ä“ÎÜÑëñßÓü»‡õýÃýÁö/oqëלõú`·Ó</·ü6ÞWÓ¦ž÷~€}‡Û¡ú{õÁôÇ$žpvæ‹ÿ_ŽþŸåÜ?¯ëþî?jÿàŽpù7?´zúÁðlÿäO‹çéøpNkúÁW|»<ÿ°Ü/–¾Â¥ôÖÝ:úÜûžþ3þ¾ÒÿÒ%þgíÍ~Ø}xP@P@|ÝûaÿÉ«þÐöIÃíõõ;`zãÎpÙ]~OoÓNÏÓw{X?¯ëoë¾áö/nyú{tv뜄ŒÅ÷ù~;VûÃúþ¿¯ÉØ}¾¾£§lCœcÎ"‹¯ÉíúiÙúnïkõýmýwÜ>ÅíÏ?OoîŽÝsƒÐ‘‘¸¾ÿ/Çoêßx_×õù û·×ÔtíèsŒc9ÃdQuù=¿M;?MÝí`þ¿­¿®û‡Ø½¹çéíýÑÛ®pz27ßåøíý[ïëúþ¿$aöúúް=qŒg8lŠ.¿'·é§g黽¬×õ·õßpû·<ý=¿º;uÎBFFâûü¿¿«}áý_×äƒì>ß_QÓ¶¡Î1Œç ‘E×äöý4ìý7wµƒúþ¶þ»îbö矧·÷Gn¹ÁèHÈÜ_—ã·õo¼?¯ëúü‘^-Úg†Ú]þûÅFí–ÜA*‘¸<ýæ-ƒŸš¥6Ö²“µßᦶ_/;4;¾­¿WýmýnLtô.®cS"UreWÚYTà¢îÁÃm]ÄáMO6é=4ë£}?=4ë¥ìÅý_×è;ì>ß_QÓ¶¡Î1Œç ‘E×äöý4ìý7wµƒúþ¶þ»îbö矧·÷Gn¹ÁèHÈÜ_—ã·õo¼?¯ëúü}‡Ûëê:vÀô9Æ1œá²(ºüžß¦Ÿ¦îö°_Öß×}Ïê3þ Ô»?cƒ«ýÑñ~_¼mõÿ>+û €äŽÈ?ì ÿsUýÔòOù`¿ëÓÿÒäÿ×ýø¿bñ|døÉm:47vÿ¾#››idöæëƽõ¨š&]ñì.ínáޣ͵¹·™s¨ÕüqÇ>/Ïý¤%,¤ãÍ^Qp’¿*qšûWwKÉs~Uœ§ÓtÕëÉ«éu+4×tô·sÏþÅííÏAÎÞÃ'¯Îr¡¾E»ÝïëÛï¶­íë{&ây€l‡§¿ãÀô8?‡ €p¢÷»óëÓ·òïªwµ’ù}‹ÛÛžþƒ½†O^3œåC Þï^ß}µoo[Ù7d==ÿ¡Áü9€½ßŸ^¿—}S½¬—Èì^ÞÜ÷ôíì2zñœç*n÷{úöûí«{zÞɸ€!éïøð=áÈ (½îüúôíü»êíd¾@bööç¿ çoa“׌ç9PÃw»ß×·ßm[ÛÖöMÄÙOÇèpAàEïwç×§oåßTïk%òû··=ý;{ ž¼g9ʆ½Þþ½¾ûjÞÞ·²n Èz{þ<Cƒør /{¿>½;.ú§{Y/ؽ½¹ïè9ÛØdõã9ÎT0Ýî÷õí÷ÛVöõ½“q6CÓßñàzÃ@8Q{ÝùõéÛùwÕ;ÚÉ|€>ÅííÏAÎÞÃ'¯Îr¡†ïw¿¯o¾Ú··­ì›ˆ²žÿÐàþ‚À‹ÞïϯNß˾©ÞÖKäö/on{úvö=xÎs• 7{½ý{}öÕ½½odÜ@ ô÷üx‡ðä^÷~}zvþ]õNö²_ ±{{sßÐs·°ÉëÆsœ¨a»ÝïëÛï¶­íë{&âl‡§¿ãÀô8?‡ €p¢÷»óëÓ·òïªwµ’ù}‹ÛÛžþƒ½†O^3œåC Þï^ß}µoo[Ù7d==ÿ¡Áü9€½ßŸ^¿—}S½¬—Èì^ÞÜ÷ôíì2zñœç*n÷{úöûí«{zÞɸû#ÿ†€­Ïí2)høCdÓ(&!{n¿n®- €l0Úê}̰gÍŠÞöÒWUŽâßÑ> Âk-Îê8IB¦; É&šŒ¹hÔæå•’—-ÒvÛKÚéGî¸I?«ãO•Ö§gѵ{w¶—×îÔý¤¯Û® ( € ( ÿk{K›ÿÙ‡ã啼×WW? |köñ¼ÓÍ#è—aR(3»žÊ “Ú¼üÚ2žU™Æ ÊrËñ±„b®å'†¨¢’Öí¶’VÕ÷0Å&ðØ„“mЬ’Z¶Ý9+%ÕŸÊl0CsWÒG<ƲÃ4.²E4NªñɈ^7V Œ§Xc*kømÞ´“Œ££MY§³M>[8»ÆÖÛµ¯/Ƕӯb_±{gõëÆsïÛ=OÊMí÷=|íóêûwÚ퀟bô—·>‡ŒþYÀÈ8f­ÿ¯Í-»þí¼Oëúþ¿1~ÅíŸ×¯Ï¿l õ?(¯o¹ëçoŸWÛ¾×lû ü½¹ôÅè?/n}ü³pÍ;[þ_šZ?wü7ÛxŸ×õý~bý‹Û?¯^3Ÿ~Øê~P)^ßs×Îß>¯·}®Ø ö/Aù{sèxÏåœ ƒ†iÚßð:üÒÑû¿á¾ÛÄþ¿¯ëóì^ÙýzñœûöÀÏSòJöûž¾vùõ}»ívÀO±zËÛŸCÆ,àd3NÖÿ׿–Ýÿ öÞ'õý_˜¿böÏë׌çß¶zŸ” W·Üõó·Ï«íßk¶}‹Ð~^Üú3ùg ášv·ü¿4´~ïøo¶ñ?¯ëúüÅû¶^¼g>ý°3Ôü R½¾ç¯¾}_nû]°ì^ƒòöçÐñŸË8 Óµ¿àuù¥£÷Ã}·‰ý_׿/ؽ³úõã9÷íž§å•í÷=|íóêûwÚ퀟bô—·>‡ŒþYÀÈ8f­ÿ¯Í-»þí¼Oëúþ¿1~ÅíŸ×¯Ï¿l õ?(¯o¹ëçoŸWÛ¾×lû ü½¹ôÈ!8ÊXÜe.jµd´i4ši­5M=nÕ2Tã•`“M?e{=õœšûÓ¹ÿÐýöý¸äãümÿ^úþ™­+âóŸùUÿ /ý7ÈÅÿ^‘ÿÒQòMyg0P@P@P@P@P@û·ÿýÿ“~‡þÆýwÿItŠû,—ýÉ×ÉþQ=|'ðW¯è·kÖ:B€ ( € (ž£ÿ ûïúó¹ÿÑ2PòÏ®ÈkWÿ°¦¡ÿ¥r×ç3øåþ)~g€÷~¬Ë©P@P@P@P@P@Z±ÿË_úø‡ÿCZþ >ÿȇàŸû|7ÿ¦k*ý‡ð(ÿתúB=è|ÿ #ÿÙrocksdb-6.11.4/docs/static/images/pcache-writeiopath.jpg000066400000000000000000000541301370372246700232160ustar00rootroot00000000000000ÿØÿàJFIFHHÿá˜ExifMM*V^(‡ifHH  Ò Áÿí8Photoshop 3.08BIM8BIM%ÔŒÙ²é€ ˜ìøB~ÿÀÁÒÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÛCÿÛCÿÝ;ÿÚ ?þÔ>1þÔƒáWÅ? üð÷ÀŸŸ|s⯇>0ø¬Ö? ¥ø'ekáïx#Ä~𶳩kWŸþ4ü"K‹©5h°éúW†WÄz¥Ôfê_²D!Q(·Ã_Úÿöeø±‹|nølþ)øÛðÃÁÿþü6Ö|Aøq⯋ZàÑt?ZêÚ'ƒçñ–eµÕ´x[PÓ5MVÿHÐf°ñ?‡uõ{]7[Ó¯.<ëÄðPŸ xBøø3Å?³·í üq›Å? <7¤þÏ—ÖÿOÄ?Z|d¹ñÍ<[áÿ[||¸ø ©øZïQømãm7Yž? ¬xfÿEŽ×Äz&™&³á᪀z•ûj|:»øãŸ:çƒ>&x7GøWñ*ëá_Å_øƒNðm猾ø—Hñæ‡à_]ëw>ñÇŠ>jþðrëÖþ.ñ/ˆ<ãÏéöž²Õä´–ûÄ:]ׇ™?ðPÿÙÚ-sâNƒyyâm.…ß´6›û9ëú†µa¢hšî»7†µ¿xâ&­jÞ"´ÓæøEðöËÀßìü}ã-Bm.mRøñrÚ=ú? ÚÜj þß?²ŠüIøqð¾Œÿ5 oãŽ&øuð·PÑ>$ü1ñ‘ãŸXxoÇž!×4Í h7Ôõ«sáû¯‡šÇƒµIµM‰ü{ øGKmKWÕ€=÷á—Æo„´½O[ø9ñWá¿ÅDÕ[CÖu†ž8ðÏŽ´Í'Z[[köÒ5;ï êz¥­†¦,olï¾Ãu,W&ÊòÖìDmî`‘À=&€ ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ( € ÿÐþÉ>;~Ÿ¿iŸŽ>ø­ñãÁþ,ø[¿~%|¶øgñáö“â«%¹øâÿ‡þ$»ñž™¯j—r¾…¨ØiÞ¹Ð~Ž·×k¯{»§?ì·à"hÿðJŸ¿Ã»o‚Þ:ý§ÏŒþø‡Pø/㟋ZÔ_ ot?Žþ4ø¹ðwöZøoû1/‹<1ñro‹zæŸà¯øÂËá?„ð²Gk}ã}OM8ÕÛZøŸâßøÌ\igÄ©¥é <ê?ðI¸õÍ3Áã^ý oµoZ~Ê> |Pñ ß í!‡âŸíñkEøÙ¦ÿÃUͤEã`<1y¡^þÖ_¶¥?à MGT‡\"´“Çšjø.µðRø“ÿå´ñ׌tïi-¼áø.| ¥Þø[Mø{)1xÿ²·í û)xÞÕ´ßhcÂÚö«á_·~!ð‰×LÕtÿjÞÓín¼'âû;×û ¤þÅ_±•ïìŸoã+ßüK—â—‹<]ៅž“Ä/Æ™ßÁßì¼Woá+Ÿÿh?Ú'[µ¸ûoŽZê:έÛYÊš}ÕçÄÿË©Ç%Íž’Ñz%P@P@P@P@P@P@w¦^jOñgÆÚ|¾9Ò5 ×áßÂûˆ°Ûië¯xKR¿ñ/Åø5_êW‘Æ5Yôˆvºv h–×’¾ŸkyðÃÄé‘Çsy«4 ‰@P@P@P@P@P@wð†óRÔ~|/Ô5Ÿéõ‹ÿ‡~ ¼Õ~%x~ÛO³Ð~!êW^Ó'¾ñΉg¤Ç•k¤x¶êIuý6ÛLŠ=> ;øb²-–5PÿÔþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@yÞ§g©?ÅŸêxHÔ4{_‡,ï¾%Ms§®½á-JÿÄ¿'Ò¼ ¦ÙÉ ÕgÒ>!ÚéÚο­ÜÙÄú}­çÃÅ©ÉÍæ’²€z%P@|ÏñkÆÿáqü+øKð—Ä ü!ÿ Ã?_uÿ|Eøm⯉ŸòLüSð#ÃZV£é^ø¹ðû?ûCþÞ¥{¨jº–¯ÿ ‹kk<닆?áý±¿è»þÌÿø‰¿ÿú4¨ÿ„WöÆÿ¢ïû3ÿâ&üSÿèÒ þ_Ûþ‹¿ìÏÿˆ›ñOÿ£J€øEloú.ÿ³?þ"oÅ?þ*?áý±¿è»þÌÿø‰¿ÿú4¨ÿ„WöÆÿ¢ïû3ÿâ&üSÿèÒ þ_Ûþ‹¿ìÏÿˆ›ñOÿ£J€øEloú.ÿ³?þ"oÅ?þ*?áý±¿è»þÌÿø‰¿ÿú4¨ÿ„WöÆÿ¢ïû3ÿâ&üSÿèÒ þ_Ûþ‹¿ìÏÿˆ›ñOÿ£J€9›¿~Ò¿þ&|Ðþ |AøãŸü[ø™â‡Zµƒ¾øûáψô¿ìÿ¿þ+Xkwˆu¿ÚCâv˜Ûu?†6Z]ÞŸsái~ÓaªÝI õÄ»}}@P@yÞ™g©'ÅŸjøHÓô{¯‡ ìì~%Cs§¶½âÝJÃÄ¿çÕ| ©YÇ!Õ`Ò>Zê:6¿¢\ÞDš}ÕçÄÿE¦I%Íž¬±z%P@ ø[ã'í+ñkŸþü ºðŽñ3â÷í&ûÆ?´·¼3â=SþÅ_|)¿Ö5蟲§ŒôÍûgSð]î©i§Ûx§[û5…å¬s_=À™¦ÿ„«öÆÿ¢û3ÿâY|Sÿè- þ¯ÛþˆGìÏÿ‰eñOÿ ¶€øJ¿loú!³?þ%—Å?þ‚Ú?á*ý±¿è„~Ìÿø–_ÿú hÿ„«öÆÿ¢û3ÿâY|Sÿè- þ¯ÛþˆGìÏÿ‰eñOÿ ¶€øJ¿loú!³?þ%—Å?þ‚Ú?á*ý±¿è„~Ìÿø–_ÿú hÿ„«öÆÿ¢û3ÿâY|Sÿè- þ¯ÛþˆGìÏÿ‰eñOÿ ¶€øJ¿loú!³?þ%—Å?þ‚Úé¾ üOñ—ÄI¾,h<áŸx»á'ÄË_‡Zµƒ¼{ªüFðæ©ý¡ð«áÅk cNñ·ð÷Ꭶ»´Ï‰ÖZ]ÞŸsáh¾Í¥]I õå¼ðº€{•P@wð†ÏRÓ¾|/ÓõŸi 5‹‡~ ³Õ~ø~çO¼Ð~jV¾Ó ¾ð6‰y¤É.•u¤xJê9t 6çL–M>{;e²‘íš6`ÿÕþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@y&³ÿïü/†ÿiÿ„»þÏøTŸÿ±>Å·þOøGá1øÿ Gü$ŸÇÿ wö—ü"ð…mù±¿á?ÝÎÊõº( € ùŸÅ_òxÿ?ìÙÿk?ýZ_±e}1@P@P@ó?Çù*_±gýœÇŠ¿õŽ?k:úb€ ( € òMþßø^ÿ>Íÿ wü%Ÿð©> mý·oü ŸðŽÿÂcñûþøFÿþïí/øKÿá5Ýòÿc·ôëtP@|Ïû&É-ñWýœÇí§ÿ­ñâ€>˜ € ( € ( € ùŸàGü•/ÛOþÎc¿úDze}1@P’|ÿ„wþGÁOøD?á.ÿ„Oþ'ÃøEÿá?Ûÿ ßü#¿ð‡hߨŸðšìù?á.þÍû7ü$›~_ퟶmâ€?ÿÖþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@ñw€è^Ö¿g?‰>2Öm4o„Ÿ~ ü(Ó.uoXþÕ^ ±Õµ=ZËÁPkóYøWDµŠêþ[k{%†wå´O~ÙÚ¿íñ7à£|_ý˜`µøðwàgÅ(|@¿²ÏÅg¸Ôî>0x×öˆðÎ.žl¥ŽÖ ?Ö—°^­Üï~þ"¸‚KkUÓ£–ìøçâÛ;àÿ‚´O[|_ý˜uÙ5Œ_³¿ÂÖ²ŸöYø­`[üqý >|»ÖVtý²®šI¼;kño[Ù•5.->K›8îžîÜ©Ô4_Û*?‰^ÒÇíOû-ÙGyàoˆzƒøÿe‹ÿ>›¯ü0·ÆÐÍû_Ë®½Ÿ×V—E¾“NÖl4˜¦ø‡§®µ¦jדhz(-ð3ŶwÆk~,¹ø¿û0èRi¿h…«eì³ñZý'·øû@|Mø)i¬´ïûeZ´sxŠ×áü> ¸²2iפº|w7‘Ú¥ÝÀ­ø£öÎÒ?h†_âÿìÃ=¯Äƒ¿þ)Mâý–~+%Æ™qðÆ¿³¿„í´h´ñûe4wPë±üq»½žõ®à{ðí¼Û].£$¶€ßu¿ÛàÿÁÏ‹?¿áoþÌþ"ÿ…]ðÏÇŸáÿ†]ø§¤ÿnÂámWÄ¿ØÿÚ¿ðØºŸögöŸögØ¿´?³u±yÿiû ß•öwû’€ ùŸÅ_òxÿ?ìÙÿk?ýZ_±e}1@P@P@ó?Çù*_±gýœÇŠ¿õŽ?k:úb€ ùSÅ~+øùâ_ž3øYð³Æ_<áÿ| ø?ñòóâÂ|OÖu­gâ>;xvâÚÚãß¾ØéfeðO–eÓµ‹«Ë­bõ佂 €<Ã㟊?lïƒþ Ñ&ü´ÖZwý²­Z9¼Ekðþ\Y™4ëR]>;›ÈíRîàŸð߈mWö©øÍðv_Úöj}ÀŸ³ïìÍñ.ÂÌþÊŸ Ôþ,|Fý­<-«\­¬_µ½¾µ ÝÚüÑbyõêº]ȲŽ=#DðýÝ®¹â`ƒãŸŠ?lïƒþ Ñgøÿ%KöÓÿ³˜ð¯þ±Çì™@LP@ço5-Gá7ÂýCYñΑñ?X¿øwà«ÍWâW‡í´û=â¥uá­2{ïè–zLqiVºG‹n¤—_Óm´È£Ó೿†+(ÒÙcUÿ×þþ( € øKö4—ãsþÃß²™³°øWˆGÁ¯‘ڥίâÛ­¾…Þò.î%‹D²¾Æ’.Ã.2hp¶àšœÀÀTÞ§Å–Ô¼r4ûŸ‡pèï¤[/ÃV½±ñ-Ö¥½ýŸÛ&ñÊA¨ZZÝéUóMµ¶%•ãiþZÍt·%@ +_‹?Ú^“QÖþÿcÛé+ñ*ÎËÂÞ%–¯¯¶žëi?µ)ü`m|;¤Cª˜§¹µ×´¯^Üéèö‘ÞZ\È·‘~9øßà‡ü?Sÿ‚ˆjþ:øaû\~Ìþýˆeðw"»ð‡~jž5´Ôu»OíèµûO |8·ñ”6Óµˆ‘´‰u½}þ7ø7L׿•g°±·ŽÕ´´ý8ý“?ä–ø«þÎcöÓÿÖÇøñ@ÿƒ?äü?iû4ØŸÿW'íÿ@í±ÿ$oÁŸöw°þ·‡ìÝ@é©ÙêOñgÁ:„^Ò5 ×áßÅ ;ï‰S\éë¯xKR¿ñ/ ô¯i¶rH5Yôˆvºv³¯ëw6q>ŸkyðÃÃñjrGsy¤¬ ûÿÉñŸýÇíÿÿ­áûHÐã?ù?Ù»þÍöØÿÕÉûP·íÀ—²~ÅßµÒi×¶·­û2|x×¶rßÚÅ'ü*ßá糂ûMšáÈ1Ç}lǨ”c ¶þý¬ú¿_³Âÿ¿û0|Iåû]Çþzc`¿nï„ÿðT~Êß|%û)~ÑŸ|3ñûV‡zþ…ðgÆ¿ u-"îËâOƒõnüøãÄ´7Å=LÓíü/g­É­X^ü<ñJx—Fþ‹O[j»p ÙÁ?¶×€~+~Î:íëñ¯áÇoŽþÍ¿µ“]øÃàçÃCáæ†–ð³?b•[-L]ê+iâMM&Ï­i~ð%«¡Øt&söŠý_ €?,?à«?õ‡žý•¥Oé<3â¿Úó@ðŸuþÕbïë—öý¢|Aƒã_Ú/áU­÷| ¡Oâ¯xgS¶MÓï¢×üG¥h:ä m¨ÉqÁžÿ‚—ø£ösý—¼w«x«â?ÂÏ'‡¾ ÿÁMþ'|ñŸ>-x¿ãw‚>&|Kýž>#øSøðwà·í â¿xOÆßµG€ ð¯Ä÷ð\Þ%¿±³øµñUðn§›.„t èÜ™ñ7Ç¿Û?áÇ/‰¯þ-Ú|\ð¿ßþ }ñÃà冉ñ{Ú*ÿdÿÙÛÀz†½©û[iÿ æð7Â_|dÕ¼³ø¤ø]'ãÿ‹üYñ MÑtÏ^|Mð·Âm{ötÐ4>Ú|"ý©~7|/ð=íæ·ñ·Åžñ>´ßþ6ÛYjŸ å˜h:N¡â»ß xç¿|gÓ-~7þÔQø7öœñíßí™áø(oÁχ_ÿg+/ÚCÇ^&·Ö>ëZgì´Þ/ðµßì“yã_À6? îüâ‹!ñwÄËo…úeÿ´¸¼Añ߯º%熆¥næ-ÿJøïñ†Û^´ð·Æ¯ÙÓáÖ£|Bý‚þ$Øøö?Y«hÿþ.~Úº_Á߉~ ñÞ•cû]ø›YÒ¡Ó<-«ü9“ÇÄí#à'Ä= Ã]ñ_…ücðgáö«ã? êÞ û“ö#ÿ‚ƒø»ö—ý§¾#üÖõ¯„šß„âøwão‰^ÿ„/KÒü=ã- ËÁ?ÏÃmCÃÞ,Óbý >,ø¢þò×G×|q®Ÿü3ýžƒñ»Dñм[ið£Ætˆ<oá/Ùjpø¦ÇÄ>>éÿü?6§}¤Ã'%ð| s©Ë¦ké«Y¿‡Ô°ÓŸ²Ï‡¾4øSãGü?ûD|HðŸÅߌÚgì£û'A㿈¾ðü2ðljõCñ{öÜ’½3ÁOâ/d4÷³²ºxµU·Ô¯-®5[m3D‚ö=ÀÐ?mù#~ ÿ³¸ý€?õ¼?fêúâ€>Gý‰ÿäøÏþÎãöÿÿÖðý¤h3ÀñÊ?à¡_µ§O†8ö4ý„#UWŒÜ^KÆïø(ÓM§Ê€ù˘’Ásnî¢7}ZäDYÒ` ¶Çü‘¿ÙÜ~ÀúÞ³u}q@žŸ³¿‡ÿi{ÏøîãÀÿ¾xwÃRþÓß¶ƒi:OŠÿgxË\²€~Ø?ÕãÔ|C¤~Óþ°Õ&k4©=¯†4tH^+snïÜÎñÏí ðoþ ³¬~Ü ¼Yû6~Ôß²¯†?f/áañcOñÏÁïZx'Äž0Çž4¹Ô¬´…Mñâ_Ä9¼W…®t)n|]¥üWøo£ê6¯£èª Ú>¥qpú]û­ò|×SSšÒãR_Ú7öË]BâÂÚk;ï—öÁøè.æ²´¸º¾¸µ´–q#ÛÛO}y4²E%ÕëLàQÐñÂ÷~8øãŸiþ$×<âÿx›Âö^.ðÆ¡w¤ø“·~ Ño´›ox{T°žÖûM×49®ÓSÒu +›{»;û[{›iáš$‘@?›ÿ‡¿µíYñvo„^'ÔËý…?nÏ?µÆïˆ>ñ瀼á¯h>ñþ¹u¤iwmüuðsÄžø­màMá§Ä‹/ þÒŸºÐ¼¨Áe©&¤ç^#ÿ‚ˆü\øSñ+ž}Ká=—5ÏÚSâçüEcâ95¿üZñ†›ûXZü!±‡ÁŽ¿i‡ž"þÍ÷²§‘ðCÁÿ´6­á¯j^µO>ð'öjk@AÿÁ-j/‹<þøÕo'‡oÛø¯UÐ5ø£Åú^¹à=;ÀñéðœøûÁ¯|DÕ­_âG†<- €~³P@3üÿ’¥ûiÿÙÌxWÿXãöL ¦( € ó¿„6z–ð›á~Ÿ¬øHøa¬X|;ðUž«ð×Ã÷:}æƒðóRµðÖ™÷´KÍ&It«­#ÂWQË i·:d²ióÙØC-”lѳÿÐþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@ò…¾ þÒ¿áñ6‡ðÿã—ÀÛ_ë>/|EÒl|cû4øûÄÞ#Òÿán|Uñ—ÅkýQñ‰ûUø3LÖ?±µ?^évš…·…´O´ØYÚÉ5Š\€9­+à7ía¤übñ߯xhÙâMwÇß >ü0Ôt©d߉_Ù6zOÂ?|hñ^‹¨Yý²ÖóûGS¼øÛ¯Ûjž}̶ÆÛIÑþɼ¢õ¿k‹¾Ò¼)­~Ðÿ³Æ™g¤üKø/ñ>/öMø”.dÕ¾ |bð'Æ}O—í¶]Ä_ÙÚ¶»à;JÖ6F·'I½¾SÚÞ{˜€3õŸ þÔŸð½þý§ã¿Á_øK?áR|kþÄûì›ñGþOøGá1øÿ Gü$™ý´·ÿÂ]ý¥ÿ‡ü![~_ìoøO÷ó²€4>üý¬>x[Uð¦‹ûCþÏž­ñ/ãGÄù®uOÙ7âQ¹VøÛñ‹Çõý>/²~Ùvñgi:ïµ+Gß\&ÊÄ^Ïux..e5_€ßµ†­ñ‹ÀŸæý¡ÿgˆõßü4ø±ðÃNÒ¢ý“~%dÞi?˜ € k"±BʬcmèXQŠ2ByV(î™;—£À UTUDPª *ª€UF¨ 01@ @ X£C”æC•ES™ŸÌ”ð:Ë Hz»Í¹†TX¢BY#Eb\–TU$ÈU¤$€ 22«9ÏÌUKd… Ð@3üwÿ’¥ûÙÌx«ÿXãö³ ¦( ™üað—ãü.?|ZøKñSោ?á/øgðÛáÖ¿áÿˆ¿¼Uñ3þIŸŠ~.x—JÖ4}WÃ?þÿgÿhÂÛÔ¬µ >÷MÕÿäcsm}qnÀiñcà7íañwÂÚW…5¯ÚöxÓ,ôŸ‰þ'Ãs¥þÉ¿…Ìš·Á/Œ^øÏ iòý¯ö˸‹û;V×|§iZÆÈÖäé7·ÂÊ{[Ãos¥ÿÂ+ûcÑwý™ÿñ~)ÿôiPš|'ø ûX|"ð¶«áMö‡ýž5;=[â_Ɖó\ꟲoÄ£s­ñ·ãŽþ3ëú|_dý²íâþÎÒußj:V¾6¹M•ˆ½žêð\\ÊÂøkáWí5gûW|iñÅŸÇï‚­ã¯~Ï_³…/öMø”.dÕ¾ |bð'Æ}O—í¶]Ä_ÙÚ¶»à;JÖ6F·'I½¾SÚÞ{˜€=/þ_Ûþ‹¿ìÏÿˆ›ñOÿ£J€;Ÿ uŸ„ÿG„üEâ}3Æ> ½ñÏÅ¿ˆïˆt_ ]x7F»Ö~-üYñ·Å}NÛIðÍ÷Šgøÿ%KöÓÿ³˜ð¯þ±Çì™@LP@äŸáÿ…ðSþøK¿áÿ…IðãþøOöÿÂwÿïü!Ú7ö'ü&»>OøK¿³~Íÿ &ß—ûgí›x ÿÑþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@yÞ§y©'ÅŸéñxçHÓô{¯‡//¾Ím§¶½âÝJÃÄ¿ Ò¼s¦ÞIÕ`Ò>Zê:έÛYÊš}ÕçÄÿË©Ç%Íž’Ñz%P@|Ïâ¯ù<ölÿµŸþ­/ز€>˜ € ( € ( € ùŸã¿ü•/سþÎcÅ_úǵ}1@P@yÞ™y©?ÅŸiòøçHÔ4{_‡ ï,~Ãm§®½á-JÿÄ¿àÕ|s©^GÕgÒ>!ÚéÚ6¢[^Jú}­çÃK¦GÍæ¬Ò€z%P@3þÉŸòK|Uÿg1ûiÿëcüx ¦( € ( € ( €>gøÿ%KöÓÿ³˜ð¯þ±Çì™@LP@ço5-Gá7ÂýCYñΑñ?X¿øwà«ÍWâW‡í´û=â¥uá­2{ïè–zLqiVºG‹n¤—_Óm´È£Ó೿†+(ÒÙcUÿÒþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@yÞ§g©?ÅŸêxHÔ4{_‡,ï¾%Ms§®½á-JÿÄ¿'Ò¼ ¦ÙÉ ÕgÒ>!ÚéÚο­ÜÙÄú}­çÃÅ©ÉÍæ’²€z%P@|ÏñkÁÿÿáqü+øµð—ß ü_ÿ‡Ã?_µÿüEø“⯆òS&|×> |>øàoü$ø™âˆºµ÷ƒ¾;xûâ7ˆõOí¿þØhúw‡µ¿Ù¿áŽ˜»µ?‰ÖZ¥Þ¡s☾Ía¥]G åÄð¢€}}@P@yÞ™g©'ÅŸjøHÓô{¯‡ ìì~%Cs§¶½âÝJÃÄ¿çÕ| ©YÇ!Õ`Ò>Zê:6¿¢\ÞDš}ÕçÄÿE¦I%Íž¬±z%P@ ü*ñOÆ„'Œ<û(|rñü_/ÚOÆ:g‰üâ¿ÙsþÍsß?hoŠü)¨éÑø»öð—‰­þÑáŸi-wi­xoH¿²¿ûU¤¶£É8¦Â÷ø¥ÿF[ûLáUûÿôYPÿ ßâ—ýoí1ÿ…WìsÿÑe@ü/Š_ôe¿´Çþ_±ÏÿE•ð½þ)Ñ–þÓøU~Ç?ýTÂ÷ø¥ÿF[ûLáUûÿôYPÿ ßâ—ýoí1ÿ…WìsÿÑe@ü/Š_ôe¿´Çþ_±ÏÿE•ð½þ)Ñ–þÓøU~Ç?ýTÂ÷ø¥ÿF[ûLáUûÿôYPÿ ßâ—ýoí1ÿ…WìsÿÑe@ü/Š_ôe¿´Çþ_±ÏÿE•³®™ãí¿Ú7ÆÞ6øsâo…ßð´~9i~1ð¿†ñ"ÚZ/‰&¿û·wv¶¿kŽ:úb€ ( ;øCg©iß ¾éúÏ´†ŇÿYê¿ |?s§Þh?5+_ i_xD¼Òd—JºÒ<%uº›s¦K&Ÿ=„2ÙHöÍ0ÿÓþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@y&³ÿïü/†ÿiÿ„»þÏøTŸÿ±>Å·þOøGá1øÿ Gü$ŸÇÿ wö—ü"ð…mù±¿á?ÝÎÊõº( € ( € ( € ( € ( € ( €<“Fÿ„wþ¿Ä³Â]ÿ gü*O‚ŸÛmÛÿ'ü#¿ð˜ü~ÿ„_þ¿ãÿ„»ûKþÿøMw|¿Øßð€mç}zÝP@P@P@P@P@y'ÀøGáD|ÿ„CþïøDÿáR|8ÿ„_þý¿ðÿÂ;ÿvý‰ÿ ®Ï“þïìß³ÂI·åþÙûfÞ(ÿÔþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@yÞ§y©'ÅŸéñxçHÓô{¯‡//¾Ím§¶½âÝJÃÄ¿ Ò¼s¦ÞIÕ`Ò>Zê:έÛYÊš}ÕçÄÿË©Ç%Íž’Ñz%P@P@P@P@P@P@w¦^jOñgÆÚ|¾9Ò5 ×áßÂûˆ°Ûië¯xKR¿ñ/Åø5_êW‘Æ5Yôˆvºv h–×’¾ŸkyðÃÄé‘Çsy«4 ‰@P@P@P@P@P@wð†óRÔ~|/Ô5Ÿéõ‹ÿ‡~ ¼Õ~%x~ÛO³Ð~!êW^Ó'¾ñΉg¤Ç•k¤x¶êIuý6ÛLŠ=> ;øb²-–5PÿÕþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@yÞ§g©?ÅŸêxHÔ4{_‡,ï¾%Ms§®½á-JÿÄ¿'Ò¼ ¦ÙÉ ÕgÒ>!ÚéÚο­ÜÙÄú}­çÃÅ©ÉÍæ’²€rZí;û5ø£âmÿÁ_ þп¼Eñ“J¾ÖôÍSá.…ñgÀZ¿ÄÝ7RðÒÌÞ#Ó¯ü§ø‚çÅVwÞ[k–Öí.4¨®4¥‚c}'*é~+ñ§ƒ¼¥Ã­øçÅžðfq«hú ¾¯â½wKðî—>»â-JÛFðþ‹ þ¯wgk.­®ê÷–šV§$Íy©êWVÖ6PÏu7ðÅÝÞ«akâ?kÚ_‰t›í V¾ÐuË+}_Fº½Óæ»ÑµÍ3QÑu[x®m?V°¾Ó®ÒËYá@ ¿ˆß>|ð¥çŽþ.|Dð/ÂÏé×6z‡Œ¾#x·@ðG…,nõ[Ètí.ÖóÄ>&Ô4½"ÚçRÔ.-ìl šñ%¼¼žku’yQ—¾1|"øŸg ê? ~*|8ø‡§ø§GÕüCá‹ïxãÃ>-³ñáýGMÑõísA¹Ð5MB cGÑ5}gHÒµ}OO{‹=7QÕtÛÙ ¹¾µŠpG € ( € ( € ( € ( € ó½2ÏRO‹>6Ô%ð6‘§è÷_þÙØüJ†çOm{ꕇ‰~/ϪøR³ŽCªÁ¤|<µÔtmD¹¼‰4û«Ï‰þ ‹L’K›=Ybµ­üQøgá¥×ßÄ<  '…5O h~(}oÅÚ”¾Öü}¦i^Ñõö¾Ômƪx×TÖ´m7ÂZ~ m®üG«é–z<7·:…¤R€[¹øƒà++o^Þxß–õ[} Ç·w>%Ñ ¶ðN·w¢øÄ–š?‹§–õ"ðÞ«sáïø[^·Óõ—²»›Eñ/‡õH¢{cOžà£±Ô,58^çM¾³Ô-â¼Ô4ù'±¹†îïô‹û­+U±ymÞHÒóLÕ,¯4ÝBÙ˜Meiugr‘\Á,jä¿hïÙëà~¥áÝãWǃ?5¥ÔžÒ¾(üPðG€5/%•Í•ëøvÇÅšæ“u­¥Þ¥§ZÝ6›ÊÁseÅ%º·Wô½ľñj©áíDןAÔ£ÑõÄѵ[Q´m^]'K×¢ÒµU²¸¸:v¥&‡®hºÔv7b—Òu/QXž¡i< tP@P@P@P@yßÂ=KNøMð¿OÖ| ¤|0Ö,>ø*ÏUøkáû>óAøy©ZøkL‚ûÀÚ%æ“$ºUÖ‘á+¨åÐ4Û2Y4ùìì!–ÊG¶hÙ€?ÿÖþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@y~­§é’|hð«-—ŽdÖlþü^Óì5>ÚFøkk¦j~+ø!q«Yx¶ð!ŠêZN‹?ÃËg‘$ºÐ4߉òÄ’-œÍás~ʶ½çÄk­Ið¯í¥Øh_·WíñóÀ×Þ1ñŸìWaû%ø_ÂßµÏÚ-<+ñGÂ×ÿ uÙn#ñ@øWâÔz‡„4ý³ÃZÄvêãÄ~Ó´MJ›Eô§ýk/Ú_Æß5/ŽŸ ÿhï‡øqwû¶¥¤ßþÕV^×cñOÃχµæ‹ûD|F±½ýÿh›ëÁ©ëþ;ø=as®C­Â[âûX4½FÞ  ÒuTÒ€<ƒàWìƒÿøGgû"xK×>(x_Á¿ ¿gߨ÷óZYüPÖ|cáÿßx7ÀV±þÓß üo}©þÛz…õíOÄ*>,Ð<+¬Ù—öˆÒ<=áÝ{áöð«Ç¿t‡ºÏÃгüCû*~КŸì+û#ü$¶ñwÅïÚÿ?b߉¿~!øçâNñ_ÇÞñ‡ƒÖÿIøA©üðŒždø/Vžñ‡®~O&‡.¯ñƒûĺÃëš žøÓàïüo^øÑñãV³ðïíEáχþ3Ð?im øAÿh}X®/ÇíeðÄÿ³æ¹ðößÄß¶ÅŽ™§Øj_³wƒþ,é’hÿþþÆ>#Ð4ß_|1Õþ"Âiª¯ÆŸ Aá_¿ðT»ß| ŸYÔ¾&x;Â>>´Ó–ßâ?ˆ¼Yqá9tÚÛâ÷‰|{â/ŒÖº×ü*ím;â'ìë©|!Ótß xÒ×þ {á½EÖ¼ ¦øƒÂÞ+´¹ñ@?¡j( € ( € ( € ( € òý'OÓ#øÑãýV+/ǬÞ|/øC§ßê:…´‹ðÖëLÓÿ‚LKðSá\økXø€ŸtÿÜþÖþøOakñZðXø¹¥xƒN³ñF§«Xü/µ½½·ñ=Sà'üÊ?‡þ7°Ñ´ÚÎ{í'ãÄ_üðüß´Dº}­÷…-ø~ÖÇ㇃¾9êº>­â/ÚS㟅üwã[¯ëÿ³ÇÄ=#Äú†¾€}Ïû)ü*ý»ôÚóâ/¾=ø—Ňáµî«ûGÌÐzÿ[øYâo ø«âÞ™¬~Îz?‚ôÝCö®ñýž‹«ü9øW¯‡¯¯tÙ#öo›í:oŠ­¼W­|LÔµ«/ø„õº€ ( € ( € ( € òÿ‚:~™¤üøC¥h¶^9Ó4m3á€4ý'NøŸm%ŸÄ­?L³ð¦“oaeñÎTŽ[_ÚZÇ-¶–8äƒ_MB'EeePÿ×þþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@P@P@P@P@P@P@P@P@P@P@P@P@ÿÐþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@P@P@P@P@P@P@P@P@P@P@P@P@ÿÑþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@P@P@P@P@P@P@P@P@P@P@P@P@ÿÒþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@P@P@P@P@P@P@P@P@P@P@P@P@ÿÓþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@P@Ú߈ü=ᨬ. ü Ö<^—RxOJø£ñCÁÔ¼P–W6Vw¯áÛkšMÖ¶–wz–ktÚlW+Íý”–êÝ\Õ®µ}*ÇJ¹×ou=>ÏC³ÓæÕîõ›«Ûk}*×J·¶kÉõ;FYVÎ > D{©¯e™m£¶Vå+:€C¦kúµq­ÚhÚÖ“«]økVþÀñ®™¨ÙßÜhïöf™­ÿbëpÚÍ4šV­ý­hÚ¿öuòÛÞfjÚeÿ“öKëI¥Ö q|aá'ñtÞOøuüwmáË_\ø)u½5¼[oá+ÝNïE²ñLþ'X‡Ã·šÎŸ¤ÚërY®›q©XÞXÃr÷V³Ä€5àÿÚóöNø†Þ-O~Ô³¿Ž_À:¡â¿/ƒþ5|5ñ3x+ÂÚL¾N«â_®‹âkãáÍL˜ˆµ cXûg)òînbs¶€>ˆ  “¯èC]O kI&“I—_çQ³ìš7‘iÓkI¤yßom&-Bâ u·6iy4VÍ0šDF󟈟þü ×<%á‹?>ü/ñ'®ÓOð/‡¾"|Hðw‚uÏ_É}e¥Çeá-'ĺ֗â;·Ôõ-;NKm ÉšúþÊÐ!¸º‚7ôm_мCìúµ¤ëiº¶« j3hú•ž§†»¡Þ˦ëz-ì–SN–º¶¨Á=†«¦Îc¼Óïa–ÖîgÑ@5¨Èþ#þÐþêþÐ>.|løGð³^ñÄïkà½â?ÄøWñ}ÌwVvOoá}7ÄÚΙ{â Ò÷P°³xt˜/$[«Û;vQ-Ì (®P@x½¯í#û;ß|S»øeñïà½ßÆÍ>vµ¿ø=kñKÀ×ì®SK‹[{k¿‡Ñk¯âÛiÓF¸ƒWhfÒEÒçŠý”ZIÔíÎë^/ðŸ†Ä§Ä^(ðî‚!:(˜ëZÞ›¥ˆ‰5¨|5áá/Û®`òνâ+‹}E ´êšÔðéV>}ü©tTá¾ý§¿f¯øÏÅ¿|'ûC| ñGÄ/¿‰bñ×<;ñgÀZ׌ü'ƒ/§Ó5¸ŠÓÁ¾7ñÇÅ/øOÂ-ºžÆçT‚ÛÃ>%×õÝ?F×®&Ó,îõ¡Òïn¤’ÂÖâí­à–Dô¯ ø¯Âþ:ðæ‹ãø“@ñ„|K§[kñO…u;Ä>×ô›ØÄÖz¦‹®i77šf«§]ÄÂ[kë ©í§ƒÅ+© @žƒñ§àçŠ|LÞ ðÇÅŸ†ž#ñŠ]ø¾Áü' øïÂÚ¿‰–ûáö§e¢øöÉ´-?U¸ÕVïÁΣ§é>/·6¾w†µ;û+ e,n® …À=.€2uýBð®…­x£ÅÖ“á¿ xsJÔuïø‹_Ôlô} AÐô‹Iµ [YÖµmFkm?KÒ´Ë {‹ÝGQ¾¸‚ÎÊÒ ®nfŠÔ/\ñׂ<3uscâOxWÃ×¶^×4xoPñŸÁß‹¿ >,x?I¾¹Ó5_ü4ñ÷…|wá½7R²µ‚úóO¿×<-«jšeõ­•ÕµåÍ¥ÅÔwZÜAq,i Ñ»€v>ñO†%~ÖrøßÁ_|¢x»Jÿ‚ºüoøUuðâ=σ¯oµÛì» x‹Æºöã³…¢Ö9ñÆãÏÒjÞ'ð ý¦~ücµÖ,¼C¢xÛÄðøÒûNñÕ÷‚ï¼-weâ«phøsðö³Õ?aOÛ³ágÄH|}¬ø×âÿïŒ>øðëâg‰÷ŠtÛþÏVÞƒF.ñŸíƒûdÜZé&ø‡-ô¶ƒÅ´eý†—¨Ï¬jÑÙxWú•´@Š¿gÚ"‚µ¯ì©iû<ë~*ñ?íQã–ñ_„?j«Oü‡Á?'ñGƒþh^ñïÄXµ¯ˆzwÅÛ?ˆŸ²%÷„lbøm¦øá׎m5«†¿ .¼-âM>{ïEà@«hÙƒâ߯¯ÚÓÀÞ-ð×Ä_ü"øUì›ñ·á:×¾¼ÚõÀÖ×¾ ý®Ÿö‡OYø÷Ëøf¢·ÿgÚ¶;Ù·ý¢i°HšE ?‡³½—ü ö¥ÓõÍBÆm[]ý˜ÿcÍcÁ‘@ƒLqð÷Cñÿíu¥ßh¯aqsy?ˆu¿ xûTñ½âØKa§C üKøsáKi·úø“ÆÀ‰~ ýˆÿnmGö|ð—Ã|.ý¡/|Qðûþ ÛûLþÍQhŸ¼mûÅð·Ã>)ø™ðãÁº6…á_ÙžÿösñRüFÔo¼IãøoL¸ñ7í;¨_iz_Ã.îI~Õ×>Ѿ?|bøµâÿŠ?k7Âþ¸ðÄŸ5=kà¿Â ï†:‡íoð™5Ÿ…÷zdÖÞ-øƒw­xÅþ#·øW¤i?tMj>1ñü €ìáÏÛŸà<—ºíkmñ‡â=ÿŒ¬>|4×ì"Ôì¼Y¥ÍñŠÿZñ…¯Å?Žþÿ„›öÊøÿs¤|*…/4ÝOÄvþ ðŸìý¦ÅáS¢_h³O„—Ã:wß´…þ<ü.ø§û\xóÁ±ïíÁ¦þÓ ¾ø/ÃÚŸÄOƒ>°Ò´éÞ+𯎾 |I¸ø½âÏ\ÚxîÇÅž øŸàãámÆ:gŠ|Sã?ø[Äðx"â]]ñÌ øCÿ5ðwÃ?‡ß| sñnçÄž¿]cBý§|qñ/Á|5“ÃöÿðHào‚´ï|>Õ¾#_|X¸’Ïöõÿ„wǾ#øk¨ü'»ð݈a?$¸»»ƒí”ß…¿ à¥_ óÅWÞý¢þ!xsZðoÆoén>.xWMñ_„¾ ø»á§Ã„ð‡Å]WXø½ÿý¦QðmŸ‹¼)­i^YütÔ¼CàÿjšÏ‰üðGѼW®jz€¦~ο¿à¢º'íYðÿÅ?ü_ñO‡:;øro]M⻟xXøzŸ²Ž“á;¯‡ºÕŒŸ¶­ ]xïNý¡ÖêÞ&Ó?cµñF«¬i·!ö…Ô|%­ÞøOTÐñ‡ÀÏÚÅÚçí-ðZÙÇÅvzGÅø(7ÁŸÚKÀŸ´®¯ã??ð­<à_‡Çö_ñ©ãm'JÓþ+j¿-¾$é2|ñ†‰àýþŸo}â{½uoéÔ5=vÔã{¯‡_ðQ†~ðåÏÇÏþÑþ%—Å_¿fo|GðߟŒ:·ÃÍgãg‹/¾+ê·_5‚~6ºýº>$ÂáOøU5[i4}Jýˆü5©øÄÐ×ÀÕ<7>íï?a¯Ú›Æº§5ø#öˆ˜|Ið·Ã¯ ø:çÄÿµxñ‹>|0ø}ÿ—ãƒàψš¥×í¬A¬|Tðïìó©išç…þ ø`üIÖ4› êúÇÅ ï˧ãÕ¯Ù‚ÃâWÂOø×àG<;ñ‹]ð­Ï޾6xûáÄÏüB‹ºvðªÃ]øi…üâˆ>/øƒâߊ²øƒ]Õ¼yâÛÿ‡úW‹cÔ¤µð‚þÔÞ)ð ŸÁ;OÙËÆŸ|Kð×öüýº?jO þѺßþ.ƒ'þ&~Óµ¯Å¯‡pü ÿ„'â÷Š~+Yø¿ã‚~1xGÀzå—üðÊÃÃ~ñ_[ÄÚÄŽ‹cáï€qÞ#ý—à¢iàû/|}Ö5ûߌž±ñŸ,¾>x»W»o‚zìmðÿÃvéàMÁ_¶×ì•¡èßcý©âñ¿ˆ¼m¨Ú|nð—|Q:ë²x×Àš~“áËð ][öYýµ<}à‰? ~6ÙþÒß~"xÃàÌ ð÷Æ/ ~ÖVß þÃáËÏØ7Cø{âøïàß…>/­üIñía¼A¬ëV nö?xkÅ–,|/á¤ø¡€hü2ý˜?ho…¼/ûAÜ~Ï?(¸þÀÒ¼`õÇÿ†´‡>þÖºŸ†¼¨üø‡ûTþÐÚ÷Œþü8‡_ðMþ·û7xw➃ð“àçˆ> k¾×õ¯‡×^.Òµ/x÷ö½ñ¯‡¼âXê>1ñn½á½/\ñ'ˆ/öüöÍøñJÉÿaxÿf |k²ð¿ìáãÏYÞÅû3ê²¥ŸÄü=“öpø‰ð¯Â°xzÇöÉÔ!øMªèñ꿇ô FÚ[M_ĺ\Ï{¦ÁÛÉ{È þ ÿÁ@þ|;“á¶‘ð»ãCØü@³ø$|;©i?¾ h—Ÿ fðïücöšøÏñ·Æ¿£Ðþ/xÃúÆÏ_³7ņZ‡Æ ¿f Åÿ1ø{Æ·Þøá}R-KöžøñQñ†öéøÍñ3⇃ü%§êµÏÃx´êÿµ¿k–š¾Ÿñ[á=Ä’kÞ*Òôˆ¾ñÏŠ¼gy(£Ëû-ÁIo>xãV“âoÇ7ñ¶ƒð§öxÓ>xvãã7ˆ´{kÛ9?mÏÚwÅ¿´gƒu xöÇ{wâÌ?±v»ðoá—†üqãßÚŠ]Zk¶ðèÒ¿ik‰^Ô>/xPϾ.~Ê¿·ïÿg‹? ¼}áŸÚsãm×ÄÙ—âÿ€¾øcÁ´~‹ðŽ?…?|[ñã{ÜZ~Ó>*ý³|`Ÿ¼'â†>&ø/¡x>|Wý§‡|/àïøKÄ:Ž“«ø¡õ-\îÛ×ö_øÍñÃãƒ|gð÷ÁV2ð¯‚þ éí®è§ˆ¼7£ißu/~ÛŸ±‡í¨| »‡YÔ!u_Œ¿ >|Cðt–±d>Ew}o¦øûSƒ@Õ.l¯À=Ëà‡¾"ø—ö¡øÑûNøÇàÿ‰gOø‡àoÂ/„qø[â&¿ð¶ÿÇ>>×~ø·â¿Œ5/ø®„ž?ø•àÝ+þÒ|{iáOÞßxÊjo}ã—Ô´múo…/¼B½ÿúûT¿²¯‚5ºµ»Ñ5ÿ|hñOžÖ|5ñWÆßˆ~"øoÚžKoMÿN§¡¿ü%š}¶™¦xž'‹ZÓôË[[؃}£@P@PÿÕþþ( € ù›ö+ÿ“7ý’ÿìÙ¾ÿê¬ð­}3@P@P@P@P@P@P@P@P@P@P@P@P@P@ÿÖþþ( €>}ý«üñWâGìÑñÛÀÿü­|-øÑâO…Þ1°øQñÃóÙÚê¾ø‡ýu?ƒïḿ³Ô-!µèlmoå–Îrš}ÅÑD¡@><ÿ‚1|*ý¥¾ÿÁ5ÿeÏþ×^.ñ¿Š> rocksdb-6.11.4/docs/static/images/promo-flash.svg000066400000000000000000000032371370372246700217060ustar00rootroot00000000000000 ]> rocksdb-6.11.4/docs/static/images/promo-operations.svg000066400000000000000000000035121370372246700227700ustar00rootroot00000000000000 rocksdb-6.11.4/docs/static/images/promo-performance.svg000066400000000000000000000400231370372246700231040ustar00rootroot00000000000000 netalloy chequered flag rocksdb-6.11.4/docs/static/images/rate-limiter/000077500000000000000000000000001370372246700213275ustar00rootroot00000000000000rocksdb-6.11.4/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png000066400000000000000000005307601370372246700274120ustar00rootroot00000000000000‰PNG  IHDR€àº³K³sRGB®Îé˜iTXtXML:com.adobe.xmp gqs5yd0iWyBQp3MiumaJ FBMD01000ac00300000b0d0000eb1b0000131e0000f7200000e7450000e7670000a56b0000656e00008d720000dc9e0000 1 üó¿Ý@IDATxìÝw€-Eµ5ðGQ‘Œäœ%ƒ( A²y@QAQѧŸ¢¨¤’‘¨(EyŠ*Ar ‚䜃(”ïwïÒò¼™9ó.÷ža»ÿè©®Úµkת>{Õ®®î™ì¥—^ú: B ( Bà•E`òW¶¹j­( B (Æ!P\÷A!P…@!0 èÕd!P…@!P\÷@!P…@!0 èÕd!P…@!P\÷@!P…@!0 èÕd!P…@!P\÷@!P…@!0 èÕd!P…@!P\÷@!P…@!0 èÕd!P…@!P\÷@!P…@!0 èÕd!P…@!P\÷@!P…@!0 èÕd!P…@!P\÷À$!ðôÓO«ÿñ‡ÄßÇÉÿÛßþöâ‹/ºüóŸÿì<È?ŸŽØ_þòb9hDþ_RÃð7FÆ`lèúc=–ËǼÀ /¼¢?ýéOI<ûì³Ë9ùÏ?ÿ|Îxæ™gzˆCÌhM´v `}î¹ç’xê©§’ Ü hòÑóè£F&}yòÉ'sÙÿ¬ɤÁùþb9ðl ±Ê¥ŠC”N£ýë_ûèíVWª‹mÎà‚NQ³6wE“飶]¶ÎÂJº]ÒÖêj=·\»ñXÕ9@M[ŸD´õ÷>buù #0Yûi½Â Wsc ?o®áµ¯}­N¹£¸ž×¼æ5é ß¡HæTSM%Ê)§°ãÉ'ŸœüSLáÒ¹)°Ê°dêßúú׿^§šµüî™i¦™˜dÞ èo|£ôO<1ÝtÓé—šÕ'›l2½ƒ1]V75G™a†Z§xXú‡êBÚjƒÂѳǡigvJ°ÍA9LrN/ :óÔšfši(|Ýë^§¨Ù?`BǧžzjEÕe‰nU¢<ç¦*í¶Ë¡H¾7¼á M³‘e¡nB€Í`I‘{@G ŒéNìtKÏ¢)ì–Ð.Éþ¿Ž6:*¶tZqEÌsNº[ò÷Aä«h¨¨x¨ãú¹TΈ‡â>øb—œ‚K®“OÑy~–¿p–ßß¿4tâÙ]Æ•ðn”´Ò‘“àaã…ãìt“mr¾.[¸ØW¯u?9Bg”L^ï@¡ÿ™t9à¨Eóé¾Cuò-p™t„æÆ(ìkà(Ô\ À£éZXDQÎ>gi†Å6ÕIbß@ÑÍ´¥b 7^Í?/û׊6¸‹Œ"fXù^åôáËÖ"ƒNîj}O‘s#i8ðÆxpѦ›úžÎ±MÍ-¡ˆ6=j‰§;2 áèqÍjÑ?ˆrƒŒû€öTæ+ƒ@ð+ƒó˜m…ãˆcÒCÜe˜‰ëIÄíòΜQsbAI<“õjž=t5 ü0f2,ýÒ#¾2–p»\0¶à ÐpÖ)ÎQ— ™é§Ÿ^@lMÒ¥L=å£aE'’¦P-È Œhn§­I9Ï8ãŒÆˆ6$‘!{࢖Óoc!ã12Œ »”Ö# Ý$¬û‘é>!þP)%ýÅZNlsÉ€i§V¢“ŠšXÏiˆVïô:gs¦i±s£ãnTvºá œKùȸ2ô4wø€6Cƒ~©î¾ò«É¬Ž|žz4L¢‡¤sŸ¨¨º[k@å2»{7ùÊe(~ep³­p¬ñføŽ .ƒ? Wé?Á£ Éû￟s±Ë¡p…"ÈA䇥H¿øAÎQëºn@“2-92ØÁêIghèo®ûÎÌ3Ï µfuVzZÇ×ÓG/ Íoöª³ÔF«˜§ §Ñ9æ˜#f åu'ãÅž˜¤¢îÄã;ë`È顇¸+ å hjV×sW¨¥ibQ5 ¼¢˜šÅi†Å”ïa&înKï4êÐ5X¹- 4ÄÈhÑŸ™6‘‰ ,'(ˆ‘É¥=/ïf*ó#"ì®»îºóÎ;]6ê•&=¤îF2‘ÃTwT¦þMt÷þ’•óJ"PüJ¢=Ûâ„ k®¹&Gƒ`\rµóÏ?ÿ¿øÅsÏ=—3ŠÃJÀ7ˆÃåÝh˜sÎ9aÄïp@_úÒ—š×9ÀÅ™š(l½õÖ›nºé½÷Þ˶° û¥·Ùf—|bèVæ7¿ùÍyç÷£ý¨R‘X\vÙeßýîw#9™|=¢ÕY—øæ€ù‡?üáþç²rè²WGb¦ÿú¯ÿÒ‘Ë/¿üÐC ÕÝ}÷ÝK,±Ä®»îªEÆóì,áÖ•6bã`Œê /¼ð9çœ#=Ûl³a Cîfªî|ÿûßWJŠÂî¡ánòÁ“ä%—\òío›U<Ô­Ö$æà0R·Ýv›‘rOÒ¦Q‡Ñ1ÜÐ0ÁbÍ¢_|ñ¯~õ«_|±Œ Ÿ¬Ù¤¢º`Œ=¿ùÍoŽ?þøÁmÓ = Ck,ùÆ7¾¡iEꦈ Äæž{n÷ pX¢“98¸[ÝÆ½›|å¿BÂ: IA€ƒ˜gžyÞþö· èán¿ýöÕV[ímo{—!‡?"ó6Á‰Äë‘äb¾þõ¯ÿŸU^y½K£úЇÖ]wݤy^ ‹ºÎ'žx"ãyÏ&¹Â +ø1 vCQ©²ôÒKïµ×^ÒI@©á°ÖZk­½öÚ4UzuFóihƒ 6xË[ÞòðÃG3#wß}÷¤cLZo6°*¥¿ÿýï1®ºÛm·0=Õ»”xVúÁ~P§²D¯ÊàòM ž~ï{ß»ÜrËå^jùC”`dVM`}8Ko¸á†&š(VZô‰¤Qï ,ðæ7¿9¶ª©ƒ©EX¾.¬¾úêí–ÙÿHiWªÖ›Þô¦Å[ÌïÂ%m8M»”™».—Ñ6ø¯¬Û¸§n‡Š€_¡‰ÎXm†?5÷7õ61wæ&Ä7¼Ò×¾ö5ÖÕW_Í¡˜¿gEŽÏí†ï#¨¢!“}nEÅnÂØ¯wñÈ¢·æõ0.“ØÏì·¾õ­Ò¿ûÝï)ʹꪫ¶ÜrKŽ;k¶ª‹›¯»îº5ÖXƒ…Œðn:Îç¦ûä($¦™8ØŒ;i~ä‘G„æ¼¼KÍQ(êÕ¨3XžÖ“#Ó9ã/ÉÍ7ßü˜cŽÊtA×’îÖ5œACtÇJ@±–“F³ ÉlFçD«8‰ ÃaE˜°Ù(·µz€0[ÇÝÆîö\p•UVÙm·ÝþøÇ?^ýõ”‘Ätž‰ï¬ËYWÈ-ÑÍÂô,ÁY-ØšÚº¯T¡ÍÀIPå,3wK4í™_™DÿƒÀ€ãÞ_²r^IŠ€_I´Ç`[Yhåªø)ÝãÄI¼OøØ’2.áP,ÃzÊÈ¡8ï¸ãŽä sdŽýöÛÏ’5wcAï¿ÿû¿ÃJ[{ñ>^x¡LÞçSŸúyë¥ï|ç;?ýéO[ÿŒ*.Iõùæ›ðücîlçw^f™ex¥ÙgŸý ƒ"æ`ž’ÉùØÇ>&ˆ‘ƈÇÔc¥dÑEÝb‹-bFø€%2¹H •HT¿ÆçŠ@$Ü™Çoêz)¿ù)̇Nnºé&´Ê*šï»ï¾“N:I”É92@»þ¿ÿ÷ÿÔâ›L¬£n¼ñÆ.➣Ž:Фº|">N1mýò—¿¼ãŽ;>÷¹Ï‘ÔebšŽ@üµÐ9q—”cSµ¸¿å—_>”À€|ä#.Óœý>—^zé­·ÞÊì|±aï½÷VôŽw¼Ãq[΃áï~÷»ŠŽ<òH†éTkZ¦¶<5ç‚ ‡æ2_á:ɰ„#V%âÐI¯·Þz&(JUw€E§L¤õ«_I¥–LæQ~Þyç)¢ß0ÁʬL³™¬-+2£¢2ªèéJ+­$Á<?6 b ²¸j†AÒsè8zbs…$¢P˜Ö~cg7ýÄ—ÆNÚBÒ¬+ª°¯¹Ži;ÍýèG?Š…H×óÔSO=ÕMûáØóû ŠAÒYZE·¢¾èˆË Oƒ'»æ”n]b:…}…¼~ýë_ã?ÔI}”.äÒë²yR?ùä“ÙóƒüÔè0Fl‘EQ‘¶±Ä æRgýR$Ôuüìg?SÅ£}|¬Ý³Ï>Ûc›‚"ýJ•nãî—bèi`Œ³ß®5Q ø`̨s7‰«£˜hüh¹$ã§n.ï̳8;B$Èûz 燽ÓN;™¡ó&ánHëq.|z,ðÝ8îଳÎJf"'{y„ª%Ì÷ 5”†V·Új«HâN~ì±Ç2O×fS“R*Ýù÷¸9<$Œ ½ˆ9óeB¶  ¨’O­µA.U:–ëf\3KxsÑXò­H‹)‰q—‚c íâ±»41Ô"áˆânÔ¥'¬P’Т¹ ›…wãä^zI´'djŒI/`"¾þ-·Ü¢iKŒW+˜m Âè©.3,›çôQÈ~ù|qdœ™ž% ßóž÷ˆY#&ŠUËÇ¥îb—_ùÊWRËÙœcÉ%—Œ°žÆ[L;òÀÅ»zl3T•6Ö Ü†2ƒeôñ±*çŒ_Š·£u—HÑ )i—è\Cácˆé{Š “é ÙR”´N` ÑŽŒ©¤nÌl+",¡ÊæTûÊ7V‰¶M˜Rª¨ÿ¸›îø‰ s#ƒ­Ý*Ï¥Û ŒIçFJºÎ½B – Ý–uL<($Ód!¯¨Ô}ɯaJ,ËãN4ÉÍá?ª³þ,ⱌ‰b/»ì2uù&Ρ¢0…)¼Ú,uâ?ÎWŸ‚V9 °V&Ñ¡µ8 ›<ˆR—\6×ÉÇ©Îawj2ÐV¢FÜ1o¢­Ä|¢®$ãÕ¥±!ôlãŒÕE½VS$þÐk¾ñT‘ñ<5\àjUgc®¸â îU>¶Æ£m Ð1gG'¾Ç1ñÅÄ´EÐL €€(ÑqEZtÉ™1¨îᨭ;kWø+Ê›R"Úµ„ÀZ©…‡˜¡.õ=Od¹ø¸`6@UD«TZw$tìÌÐÇ Ü°NèÀf›m¦ÈN]{ ²Ÿºjo’$at°{tÇ(àZÑ¡| ­|=ae(3—’Ik)tˆç ½Léõç&Ì~7+1º€øÝ!Þpà ]uÕUc  I¡†2¥Ëc{=u?¸Qš† J'aiÚÙɡÖJÇÍQ²´NRuCC³ˆÖ’ÏÊ+¯lË·E~ë=4˜pP#­ó³ð]ïzŠj!ËãN“;3­àD4 ê¶×MƒÚ0¹KaaEùyB¬Û¸Ÿyæ™æ n'öküĒ€)‚ióôQܯº›Í Æ쨬I@ xÀ«ªã¿¶è—ïàü’㪄ÂßúÖ·8,k€ÎÖ?y^‘+²A̸'Ožüª¹l\¥:÷MF‚âVdâ üÇAXžU_ÃÇ… `ïÁÿB¿RžBjä³øEäqpV‘0å<¾VäÇ›à!þ4ž1JtAagV¡]‹[äFµb]—å«þº$¦b4¨ËE< ·°0ŠKÞ˜¨Bõßþö·øÀ%z F9_Ì~8¤ïΜo:ÅñÑCy"±ˆ4QQJIœ&#¥åë²ùŠÇ*ò¡"014¾Äúøµ£Á¡"“ÀôêåkH:‹Ìú•¨×ª#µ:ÅH224Âkª®9<ýôÓ5­XÖALÔ"oŠׯSZ#y´¤H/ г´Æ@ˆé)qÈÑ)šдǨH‘¤ÒM¿*Ž4ЇT' ™”0o£6 ˜.=MoZcòaôøzªé< µÆ)¯ÁÌ&0Ô}–è²åá²ÇáQÕ=nÐ…=÷Ü“’ƒ>øŸøÕÝ?êfºæÒ³Á„Ìý áÀyîŸPî®H-%ó™Ï|á _°GCvEQkS·è‘‹Èù;1ŠxŽÙ|®Hˆ¯äy¢ˆq7P(¯ Rhº i›¨Q&ªÀFñ¡ ¦R%ÁT±”DZڢ9ŽÒÓnÓÎÓP®. z*­Vf4èv÷¨XZC\¿gÞÕÁsšò$íª%“—„¡0×¥Gï!-îUëçÑOˆˆélU èßÍ 1‘Q+‡L{Ð4ä,ùU¾Z@ótÙV¬•oŠcÚ¡›A;ï0«CIšO°ì¨ÂSRû‡]ê»Ý¾3S VDCšV‹þÐ%jöwÓ¯¢RݶtMÇð˜ÃmfA^¤îÆ3²»€E kŸ”ûĽ§ˆÁ¦/ú¨i6Ó ^‡ãݽؼrÜfª\ÛÐ|ŽÃfw)­ÙÁèþ§JDË}´!έmjÍÿŒ¦ †.ÙiQÄóìÍYÝ{ðÕéNÈ;ø÷9k%?"ùnN†¹O&‘ù‡{Ø@³Ö\V¢Û¸û¥Øºhã:X˜êö¶^å%†¹íÈý9฻…ÜHn'7•¡a¹{ Õ¼?STç^!0îÁ@…À$"`‚wÇÔÜS~ùQ.ÍM¤H,’Ì8E‰~dòÎqÇÙÂã’— û†Éä„M…×–ï’) O!¾lÈÉÑ|zÄâIãìoÛÄ¢â»iÐAçô‘3•f³ø ÊÓ zÂOÂë”2¦á@-M¾±…œ†@3^£Q•êêÆGËov¦ g9̓ÃØ¬(Ð50M˜R ª©˜~5#%¨jf Ú% ]¦˜å1 ÚZë­]òŽhbT9ÒÕ“tè @¦¡ÝM¿q߸E…Ö—ääL9µ©Þ™/͘Ø#mê“ÒÜÒIП“¢_ßYÛzDsòƒ¿ŠmVÁþα&¦nC˜ÚVW­faˆXrÔ¢'µ¤„›Z·Km)’˜ã³_ê6îËx‚t'h4ådÚýmuî EÀ=ñÕ«$?ÑxI(4߯á÷ïàš™x8—¸ì8,2¥ùñ«H˜@óŒV¡…V¢ÒÀxx¤šËN¡.ý.O$ͧH0Cfs™Š9“IN„eÆ€”ªƒ]2²Õ%¬Ëâ#ÙäÛe#†–ˆ_V+21ɹ9nÍ5…Ú’/'X©Ò дüN;›Œ|’ÍÎ4Ô2ÓP.Sï¬áÚ†y40µÙù>—Éd³Š­ÑÆÁãÆõÅ]6óšþTlg&©Þi˜*M¡žvÓz†p—›P•v[¦!zPë¬Ë @FF@s¦"ÉÎáhò„[­H+JN»[2‹=Šhn ´^·„Ò(w‡tBÔYEÚ‘ÒLS\ÒàþÉeà¢*”lŠÙz3:ámE­#´E€†ÖÿþlWb(žD«ú?ð£õcö3vŽì„&>¢3‡XœZ~ùª7Ÿ±x“¨ânâ¾O<ñDkÎygŸ‘MX„ãhÈK-™±¤³ÅÎt%«ú˜ÚyGFO&q^!Q–ÇIõéŸo±T$aÈO‘tÚ"¯Vü &šýiX§»'ÐiazD’ÎÀ˜ÉI-šUqÄN‰Ti­$'¥47;‰µ®µq1Ûh6ÐJ.µ%Ý¢dƸLCéWÒF'VÅT™ªhðlU"ìÜê²– $Ó\7ýª4ÍÒ?:6-SB¯Ù¢ “³|V9+J)±†¡tS’.äR»ºô:ñTšîGŒ©ºƒÓ|ÊÓ´n:0q«%ÑÒ"ªœ[ÇÛ`¥¨ b.÷¦„ÎÖ;™iB×:ïÏ&\‰ž 0îããž-ÔQL,~ŸyúÛp(”:ó ȲsïâÒ¯Ú)þ×C29>µº\ž‡^nKˆÑÌï˱+G‘Li-橘K‡ºœE{Næ2D§Zšð¨Œk#ãa3&PKØÃïxnªQÂòù)’ ƒÚž –Éf;¨%#?Š%Q+_[vë°‡|öÔ¤2:B ‡=îÙœ¦L¸¥kÒt*ÊÖ°Vÿ† h9½¦ßeg©véÔÓ˜!Á09ѯ¿‚Há]Š™%JsT1ž1:•ÕÔO€žÖ–*w‘ËzĶÎüWɸ—…¤•¦lh¼Î*¸é þw/ÓMµÜ Òî%GÄÚx©h*iSqgsª°ÌÌ$à©òSµ ¤nòôkl¼†´Åþ‚´#vwÝì¬ÞÌ#Ïç>÷g¨Ä¤#P<龪5XòÂ%í,âœù¿í¸­ŵIûÙs fèÙ ¶/,n4æÅDÂg©Å½¢IüïÙZ”ß„†ø)®°¹xj;²º¬’óÄ“…<& ¬éf XËN­PN³Lþ‘“´ÑˆÉlöD¿3ê ÃàβZ ­®Ž83@£$æ»h(¦E™ËY~z'Ýȉ LÅO)JÓ†zIëVÒtëTSQÚ¢‡  é£Lu;ËÒº·¢UE¬4dÄéiµºìR (rH@IëP—úg-¦QÊåKkZ‚ZV±³›~2Ž ƒQ]¢yÈ@U£‹=ÆÚeç(k‹òȤ¹hSÅ% Š(Q% +¥Ç¡Ô9#Õ,Tê6SbΔ« ŒZF¦†ƒ€Ì „û¤s"H³R}Ñt*FÒ™µÎ0tν'Ѧb”k.0FŒ—iEëI0 žFGiz-_¦4a}g@ÿû“Â:zˆ@pÁ|õªâ¤8?×ø ¿Þ¸ƒü°ý’ HÇ%)åüìãœå¨ÎÁÅ×Äõ¤¦ñªDŠLÒ\I§›ë ó%_q»|Ms:)Jà’R0Æ(¢°£iÖºt¼a§C°|Õ¡yÒt?^;µ˜'SNBÉÈ·^³$­]9Ԧݖ)AIë‹4™éNZ—6­ ­kòùbu•:kÚ% ™]i½E¹õWåŒ1vFYŨ¥Ó=Ð9»jô隆‚d3LrÖ…¨m}P¿ºé/›MwTgaËlª(i&¹g/‡Òè‘PʰL&½õé…*¯¨Õµv§I()ƒ5‘Õ`ð²ªUQä€^À§}âT­×lpÐ “~gòÄ22aEÛiˆ’ô…žTwG)M+ ^—÷aúe¶ºé‹sÿû³Í™šL%&"àIA¯ê…@!P‰À¸9r…@!P…@!ð #Pü ^Í…@!Pã(®û ( B Š€‡ôj²( B (®{ ( B Š€‡ôj²( B (®{ ( B F{ ÞÏkì^ùdß¼MœàS7P!P…@!0IŒ_ˆA§/|á sÍ5×µ×^Ë&iß;Ŧ±ïòË/õ^uÕUˆ09ùX«ô)§œ‚P>úèo¼q·ÝvÃâwÜqG"Ú}÷Ý@Çü=÷Ü£ˆB‰Tßc=|Sþ¨£ŽòϼòßÝS‹þcŽ9zè¡߯ýë4øÇj ¸S·Î…@!P…À¤ ðÏG¡“¢¢·uC–‚ׇ~˜æùæ›O@ìI­ôà3ëiGZF–ÆÁÎþaÎ;Þñ‰îÊ+¯¼ÕV[¹´\¼Øb‹íºë®Y‹¦v…VØvÛm=?V*öÝ|óÍ%¨oºé¦[n¹e.çœsÎÏ~ö³Úõ u9«®º*…þ€´ƒž$ê\…@!PL#n ·m½õÖxÔÿ—FÆwÝujŒz¾{Ë-·X öÒ²£*kÈ‚Ýûï¿¡&N˪µì²Ë^pÁ–¦EÌ7Ýt>Æ£ž{´ŒqÏ>ûlŠï¾ûnEùÈGÈkTd¼è¢‹žwÞy.A韓k‹¨]Ô»öÚk_zé¥þ¹›"4œGÔ$ë( B (&GÀù§fÈÕÚ/²Ô«Ï}îsΨôöÛo·»Jæì³Ï¾ÐB }þóŸOþ%—\"TþùÑpþ}úºë®+öÅ”W_}5™µÖZ+ÿÝ]Ü,†FØŠ®¼òJlºÜrËaßüÛT˜ Zž=èõØ8ÿŸ‹Ú%–XÂê´Ù€GÂÿR›ò: B ( ‰@`ݵbÛ¦vÞygQ,"ü⿈S…¿ˆù7¿ùØ÷?ørýêW¿ê­gºH¹Šƒ­E‹b Qñ.ZUEÚÿQ¿ï¾û\"éÁ`Ôbq—ÙV-Òµ †{ì±\Ð/M„h©‹'°6 P×$`"€®*…@!P…@'ÃLÀ˜ÏÑiÕf—{ï½÷þð‡{ï½÷g?ûÙûÞ÷>+ƈ‰^|ñÅsÏ=·ÄG?úÑ=÷ÜÓ.*+É8R`Š_UÄ‘M'VV×ÊÄġϼ§äãÒ©9gum~C£pÚ¨ŠdV›Í b$áÁÿ%u–ÇéÏv-µò¼9ÕÇêtfBz—óXífú•;ͱvûíþÒ»6èÁdÉÑ^d¸uÁï:Ibl}ÝçÆÚ¸ç&çÆÛ=ÜÃqÒ ×‘Ž…íbîôÄó¡C‡@Öóà¯|å+×]wÞ’*ÑzõÈ-£Pìm·Ý†#%Ôµ¶ló”߆31®t(Ó¥&0·ŠövÙ\­Š¥f9ö] …14“ð+VL?ñĪk‹ Õ"æ,3KÓÒýf{Víq²;q3mø»süú×í9†@7Í9t$sšÑÞ£AìwŸÄ7y3̓’A$ÇvQèüFzè•F nBçO8‡ïŠOºIuŸÃ¡žÜJ—\_ç˜tx£a8 ˜î`GLÁsÏhsÉÇaGý÷3G—ãù÷ïºg+ò[ÞòSN}¶Â,öE¥¶.o´ÑFHw½õÖËoÀ Á6^áE+Ü<õÔSÅÊR¸éÝUVY‰*šgžyn¾ùæ7ÞX-?b ,°€4æöÉŽßýîwX–ïÞzë­¢êq+.¥åãæßþö·l°­[y0l'v6iðÈh)Ò/ʹi ”í™ uS‚H7õ:Ý1 ƒ,Œê.[ÿÐ_=5 4¬.ÝfFy¬Žo·ÁÒñÄåpLn’\öX%$t«£^7Öú®ã\ÍX÷ºÏsç£!ólé84^.7¹D·ŸÆËËç;†åpãæHëÒ¼6¿†8}ß*â8餓tæÛßþvd6ÜpC1.uyàú1wÜqÒê~ÿûß÷I o"¡Æ#]0¸†ROÝB¹݇cuݵOß;/3Ä@ðôCSäG!sßEgõQš6îno½Öå±=îuŸç5ÖVIMµ]òuîv\zx÷'놭sÜwz¥Ãnñ ¿œv9ŽN§œRÿeòò%»Ì!uý<ü£|2ðâiÓ †ö ‘sš.åãf’Ä\2€°³RÄiWkºýÌÂñôËd ¨K@œSYØÙÁ˜:ÆÎzêÍx±ÞõéŽÎæV”À7Í:}ºßy™ávö‹ƒIgÑKÇ¥´™ec{èë>Ï=ŒAø|þ­¹ñFO“~“3OzF¦†L“ó£}5pÒÈ…²ª( IDõòäq擨ªõmå꯸r B ( B ;EÀݱ©’B ( B`È(2hKq!P…@!Ð"àîØTI!P…@!0d´¥¸( B èŽ@pwlª¤( B 2Š€‡ ÚR\…@!PtG ¸;6UR…@!P EÀCm). B (º#PÜ›*) B († "à!ƒ¶…@!PÝ(îŽM•…@!PC†@ðA[Š B ( îwǦJ B ( !C xÈ -Å…@!P…@wŠ€»cS%…@!P…À!Pâ:2â"`<çÀ|‚T]š¡L,+?+ \¦Ãè¿:“Ç ˆ9ÌJSþõ¯•˜zê©§Ÿ~z\ˆ¹ÈÒ†VÍo|ã)Ñ¢ Ø™ úÓŸ´%‘¸%»Ä¦Æ#­×¹( B ˜8FëÊõâEtèŒÿ0âOYy^guÎ>ûléë®»N$½æškR¥E9Ë.»ì5×\Cæ–[nÁÊj¡íX‚¶)$së­·Rû¶·½-[º´¸ÜrË]|ñÅji‚pöN“ð°XM§v­TÓCÆCå%+³( B`Øà¨9mSHXe’Ç”žWòäxèÙ*©7nB4=±yØ8œ—^9§3ykÈç5î¹çñ®Pé*EÀΞԶn/±ÄwÝu—M[Š0â2Ë,ŠuVdÙäEÖt A-Ò¿à‚ ¢^¯,_+Za…òH˜ZEÖ¢)´]K«VÚRуç›nº‰U­‰fFŸR'cä´èl&‘h¾X]…@!PŒ¦›n:\cës¸ '’C<òkIb–^™=lœ„zõiåH'¿õ­oÙu '|ùË_^zé¥ gß“à2a¨ N]Ó–,ü’ÁÇò²(PB‘³*-A›¦!대Ñm vq6mäi# ß’Ö|s¥Dá)êsÎØÐÏ’uY…@!08qì m³„‰fy5K°¸c¶ÙfKjè•ÍÃIÀÈÏœ:ùéMoz“Øw÷Ýww¶ú¼óÎÛqdz"ßú^ } 1±#àÂ|QåÒiù.‰VmÑà²f4P–Ö¦VBù©ØY*Gé„àn…v_KX¾p¶BžÌ:…@!PŒ4ÐG¨¤yl«Í3O!Å]VFãü刀ÃÍ=éÂ0pŸ>„Yç™gp Æµ×^[|Ê)§%|†D1khSª.ßǰ$B¨æ2øR‹,²H^ù…]èY] ¥` §’ª&¢•O~®¹æ ¾ÉIÚ¥?ŒSÔªÛÿ0ZÉd¤cÅWtÎ'¬û WN!P…À°# pòäQ¤'!^Z|ñÅùmŒ.@ 6ä¢$LÞé•ÁÃIÀ}ú {ˆÍ#nùkZÈÅy¸0_øöÚk•âNáé7Þèí^l33¤,8ƒL©"iI«PoÉtéV {ÕØµ'» Pφq?~…›þ$o°/1tN>|œ†Æëë{R]–'Ùj¾þúëÃý}åêº( B`d €z…À 鈗 ŽŒøp "œÃ2˜I!éºô‘BÀú©“‚WëìVžÔ"<Û˜QšíQu±ÅË.eüçðµìϲF?ûì³_tÑEÈ4&)^xáÛßþv`ÙHÍK/½41.LQxvWÙW¥â%—\¢–'µŠìsp»æŸ~E$CÛ »üòË—_~y*%‰ž»Ý6dÌ ’Ö¥ žéB¬[»•_…@!0qä°€º y11?8”Œ>p“ öZÌ6qmý¯ZšQÇÊ+¯ì¥^͇ï`°õûßÿ> qX¯>Äa©AHM§q çƒ>ØóæúLjº ʘB (FYE@Ca̰}ˆcÎìºë®¾¹z½˜öÍ4D•´Jl²Ùf›vÚiM }÷Ý×›»j­¶ÚjçœsŽTfì©Ô*òPÙ§(Md’oÛ›"Ëýмô«_ýª)”°;ëØ[mµªNæ‹q;%û§)”™1Ü÷¨œB ( ‘ÀðˆûoH^Cò6&T £±„üI`Vk†MðбlVzUqéᮎIJV°I*Í.Ú¬T»”™"g¹S%áð¤ÝZ¿еzŒ;³"!GëÛÚ8 ãD=ȰÖŸ%&¤Ê úª°( B`È–£8óž7?â8=l½E×¼3~UÖ”p ‡tHΓWk÷.gK{¯×SäÔ’ï°ˆ/ßAµ)JPK2q3EVü‰aîl_é?`å%§ÿ¹¸?&•S…À¨CÅà£FI½µ¤lÂêì•TŸ…›2ñ%öu¶K SŠe%Ä©Š.zN"/ÿ„}Qi"fÔ«Ô®.½È%8þ¨:Ñm\Ï›iƾ(Ysd4Šmì«­hEtÖQ…@!PL#1b"¹t&4™Ub«Ê"˵ùˆJfË4j”‰D;QP•æó–òµ pûˆ©¦ÇúÈ'PV7¤ž¥iôœ%î–èl«3]p'•. B`”"0¤ðH$`!¬¡òãŠPÃyÜ¢½<¬Å—.í'æûÌ\¬™Xãª.œ'N Så 3Á±Š¢ÞpmcSì«bÖ´ $_[9òê‘´†´ÒÄHö?Š€ûcR9…@!PŒ:^]ÜèPD‹çläB®áZŒ›‘¨hÝbV\›f9.Åv9–þRN’žÖ\ª”¿÷F´”  Q84Üò;•·tpƒ¢…@!PŒ^^]û“xôÑG•þä'?A™ûí·ßC=´ë®»*ÂÁ©Eó4ÓLsÊ)§\qŇv˜"24cß³Î:Ëå·¿ýí;î¸cë­·–¾çž{Ô;R}3y¥,tîìã Uª¨( B`¤! $cRœyÏmÇÃrŒ'ß°ŠÍ*ñÅ_Ì$á©óþç.¹ä’±ðÄOœgžyD®. ZÆ—â×vÚ)b¨wuÖùà?è[ |·ß~û^c5¾ð…/`nzÞö¶·ÑŸ"9ïxÇ;¶Øb‹\.°À[n¹eãÎ 6Ø`›m¶¹ë®»RÊÔ$º‹€»!Sù…@!PŒ"†”€‡s Kõ? ÌK,!ßÚ¯G¹ë¯¿þõ×_÷ÝwcÜ“N:ÉJ²˜Õd„@ØqZ޾ñÆѧUæšk®5×\óÊ+¯tIÉÔSO½é¦›’ê©§œ°i™<òÈå—_þ™Ï|F&¢¥_rÉ%¨=îø~âŸ`LZY|ñÅ/½ôÒyç—°ÀZ´-QG!P…@!0Ñ #¼N£]Z(vÈôÐ7 ÐäÑG-Æ|žò>öØcØqõÕW—^n¹åöß chô¼Ê*«¼þõ¯w){`ŒGï¿ÿþûî»Ï_™˜Û\fÑE•øãÿ(_¦H×m{Z¬ºufÏ›³ßŠò2 h”B¬ŒÅg™e9u…@!P“‚À°pŒÎBDÒØ7;ª°©QæUW]uõÕWï²Ë.‚N9ÒâQ ÑV’wØa‡ã?ÞÚ²üpy\—ÂYa±ØW>–cK—†(’cÉÚÆ+­Œgü)¬'chÔ¢[ìNRûÒ£:즶Åy'ǹŽB ( B`¢Nqb¾f=γ¼Œ±¯UåÍ6ÛÌ(ÏbtbA‹ÆsÏ=7±}ìcn¸¡=ÌS´ª â¤Ç“c¥Ø—°Z¦}Ѷ.k+kמ"akßûbVhÁ«. – /S‚¶v™ð+Ëi6W¢( B ˜†“€±Dxvã<96*Û÷„,<òH¥BR<ÚKTŠ¡=ÙE·ÂÙ¯³Í6¢E·äÑ*%³Î:+1䊉‘´@Ö%yg;³Ð­œpj"oÕ²ò.רŠÅÖŠœg˜a ™ÎÝ„¢é§ŸžZ݉IÝä+¿( B`Ø@mw×m”÷Úá¶YuN¸±žü¿È¯''P‰þè$.lòz…,EŸµn¾ùæ›l²É…^˜RÑg>}CØxÎg$ê˜o¾ùˆsÎ93Î8cJm„öÚ.ìæŸ~y¹H¾æ@iï•õgí"lµ®»î:g¥ŠÄÜyâ»Ì2˰ó +•~øá‡5$°f$…ƒ º†¤óºÚfyª: B (F\=úÀ¢2>üöÛoG1\=ï_°R¨J¦¸‘X¯ì6Ǝ溢tIB¯ð–mVï~÷»W]uÕwÞ) ÂøŒ¼ÇÀBR›¤`ä-aѪ-ÓsÌ1Ç +¬pÞyçEÉ<`¥zíµ×¥µko(y¯ µkÑžþùÖ®I"`µp¼å; C1ÛŠ)ž5ÑÞôB3Ãr Ó.šÔ=it‹ -;£:&GQ;Ã+‹kó_ÏÐ Ôðâ¾ûî«âG§³;úÎ;,bû憜=öØC-¶ Õ*xàöWÛÕ%öýêW¿ªH+ÌPt 'àûo|ãb_S£BF¾#3£¤»Í$ò³.¤Ý$+¿( B`Ø, öš!Þ/$Óêi¸‰MbbØ>Ävì´Õ]vÙe¨N‡ïçY¬¹‰V€\ŠP½G$µŒü³Ÿý¬³ºOQ.¶Øbêú–wvS$–ðA+ïѼÒJ+]pÁá~gé÷¾÷=á¬æ¼pì{“„MœA|ê©§Z‹FÃ>ëqî¹çÊ4$žHxUÉyÀ# Î,×XP²2 B (F'vÀPžx²-Ìù‡8òÅ^ÙvǾÓN;­tL¢9Dΰ7¾ñª`߬h"ì›Kë( B (&®Œ~°Úþûï/Ø¥Q}ó›ßÜzë­ï½÷Þí¶Ûî˜cŽ‘ƒ•‰áHÑäÄ5ß•†8ïñÇÇp‘þ‡zË&·,¡Å‡~8T³Ç%ÚFŸ3Í4²¤\õ4A¹F* ­ÆàXBÇЪk—f ¥úž–Îz5“¤ÁŠR½Î…@!P…ÀÄ!Ðu :ê\pAAð|óÍwÊ)§l¹å–W\qÅ2Ë,sÛm·-µÔRhA"i’ø «½\ P#6E =SZ$mè°Q]$- ûâÎ馛GŠ_…¼ÓO?½|$-ðŵ m£S Mƒ0rµÒÚbëEšh½ÈÂ5 uûèlb&©wê2I¢³ƒÊWf!P…ÀD€çÉãÌ{n^×Èõ‰'žÐØwÜ1÷ÜsãHQé /¼âŠ+²FLŒ«ÐöÂ./_®qêö©B­ÌýöÛO[HkÉ%—<ì°ÃÄ¦È „]®ºêªŠÞþö·ßÿý¸ûÊò Ê™'H]~ù寿þz™,Äžàþò—¿´x®Öꫯ~Ùe—aw:E½ì?ùä“YdE+¬°Âù矯–î8côÌû°ÇY‘ej«å*xà.)yì±ÇöÞ{oiäã=ôPkÚ‚û[o½õ€ß~ûí"cµ~ðƒ¸4¸ùæ›wÞygO” ßñÈ#$1ÈY+Jõιupù** B ˆýXgÞsóÆñÄ€‡…YMž{î¹Y ^tÑEï¼óN’¨ 3Ùe±×¥ Ðq:¿Ü£“€¥)yôÑGQ×o~ó—áÈM7ÝÔ.0ÆP>ûì³èC’U¯±ÆÿøÇÓèb‹-¶ùæ›K‹k¶á†š.¸TqÞyçýÀ>"äúŸã—ÄÔ¢0ÊUTòÁ~0“ ½–&–Cl­­pó¿òû[<:UV…À(A`H xܺ®áIä N·Y…^sÍ5ñîÅ_|ÓM7eS±ó¯~õ«Í6ÛŒù¼ƒ”'ÁÿR0¡uÌ‚¶å_¤­ñÚVM­ ˜[[O¶ î¬×ŸpäöÛo/509°2|öÙg“ðÁ™‡ éÑ2f¹å–»è¢‹ ‚õnÇwT$ˆ—cû /Ô;ï)©õþ÷¿ŸrܯT-–Ü}÷Ýè9®"p¶Új+‹Õ¬uifàÜíЄ"6´bÝ„+¿( B`$ D ܸD¬J‚óçGC=´v{ÌI»V1™KŒˆØðŸ|M¢´UVYEÍ:ë¬VqåxþŠÃÇYÍ»@=1‹Ø"H«ÊgžyæJ+­„‰Ù&Úˆ{)ˆŒ"àW‹ÆV‰]2á¿e—]Ö¼Ú¨ÅrïÑ©”Ùè%ÛÉŒ}uÍž2\ž(ßr·9V¤eÚ€F›ŠÐqµ(a›(¹[gSOö°“ÂÙf›­›på…@!P ;(&ñ•Œ1 qîR©3ÏïŒDÂ&Ò“~LÂÀ1á QÚ'>ñ ,¾øâ‡~¸KS€˜%Be ”ª"»Hc»I7%˜uÛcWó€ë®»Îrè–€§¿ α01Žd€0+äêlZ`NÀBòÌ[KË·†Œe% ¨GT UÉ8d:Ë!¬ˆ6gòè—)m=,$?à¡"ãÍ’HêKöˆeü”¯ÌB ( áEÅ$ÒdòÛ‚7þqûxPiH'¾)ôÊÚÉÃ%áë®ôŠüD‡öïºë®7ÜpƒEZoû|æ3Ÿ± “Ì“Z©¥ÊüóÏß+ƒèîºë.h=þô§?-ÓÎ)œj+ƒ…æšv`_¶¥éLU[ðÁŽ©4b¸9™^a«” Eºˆ–rBêiÈ%òFÒ.£jÀ3ê¥Dkµž ®ÌB ( aG@°‹qfÈ…“G½L¸ëaåM+ê•ÁãÂmÄJÇIZbŠ%Yi/þuÔQ8ø§?ý)V>ñÄZh!Ó‰_üâh:÷0üe FD]÷ÜsÏ;ÞñŸÜÂúYsg!æÃ£™§à6ù.q˜Äår²2e`Ð¹æš ŽYFP—@æ/V¤íç’#8Öœ.€^ìK‰9«å’¥i%³Ms(™!ûdz£“¤ÍŽn­ (Y™…@!P#>?Q/.à·}ú‚ßæÌC –~qÁ¬õX³W6#`mˆ,-“Jh)¡!ƒÂÿèduÖÙe—]lPB<^ݹä’K¼›dq±IøÛCƒ´Žò×]w]Oš=¦ ŒÌ¹ÄÁ¨ÑAÌó]6ËKÃLe?’ÆÁ¦*vl …±`œ)I7  [œª"–Æô€6¯¡ÇäC[ð=›øhEŽ"bÛÉce—ƒpªŠ´iÑ:y;°œ{5Z¥§( B çXaö¾L|5šãð¥<¹PJH;4=C=±ar±:I(‰¥ÌõÖ[0%i]ÂÓbkŠ6ÞxcŸòÈ´E-]ó$Ûó`½ððYg…Ôc§þŠìÐjé¯Î&_‡ayæ­È:´Õìl–GxÀ³Zò)qfÉ€2•Y…@!0°ÞÉÂ8óž›úïÆ€pŒÅd„x¬ÍZ†ÅU±ï•W^iÁVÔh)Ü{·ï{ßû,ób_ìˆlTDo~h% ¹:–¨Z]ëφáLC²ì.FŸM34g¹Õ"O[h[Â¥´„£ÉH'¿éa<%.Ó) 2΂Ýç’yºÖ”C;~Pb¨´HXÂåàòUZ…@!0àÃyò8óž›7™Ke±DØ"üÑh5—i›<’îÌ™D›ð\ŸGª”åž¡"KM¶2>#æÈÆ+èX¦G¹q¤KµÐg¸Öd¯[ÊWQO³¥­uS[ÝȲ3_ÚJ…ÉÁ >³ÎºƒT©¢B ( ‘†ÀÐ0zèì°e^/ö„?BvózLÌv­l,"ߘ¼ \8ÏÑ©gBÒÌFÔUzÐ$jTjŸ3žë|‰¶±5ÖÔDÿ°;Ë÷òIhV… ´Š$B¥0 uVÔ;¼®³uYèPOg&IöwbRéB (F>OgÞsãÿ¹-DŠø5á`øÃ¢´Ø‡y›o(e‡R”ŒÃšYž}¹–iQuÊä³þ½Õœ(¶ À”,aUlèOÆy¢ž0W_dḎVBi[¦–ÙÚe›¶èïì—RmuÊw–v¦`´©ÒGO§d¥ B (F,CJÀSÚeQU„–Zìh¿’œ,ÞÚ$ì¤-ÚÚØ^j ÃÙ/>œ„ÉðœD‹S)±ÉÛÎd¬Y6µY÷vÉ*ˆ¨2î¤C¡30nØò° ;Ѧ#i±å jig:£kjˆžNs)‚€Ì¬i7ó*Q…@!P¼,Æm8êŒÏ°ŽuWû~…¼ÈX‘·`}ýÊç®0bX!á°Ûn»--¡g”ÙI¢/Ëj±êÚb ªkö v¤(.'ƒü"ÐbP ©Ð-11hãNá²YE3#$ª‰èoùi4—´¥],.Ñç/Bäæ5=ý4P˜*Ms±Ê) B É ž<μçvŽã ¤K{Ç< §* E¹ô}(m7#DÀùøfBÆ-ÂíÓh @E%!xÅ»-Ó–½Êréö‘$ÐrkÌ`sò5!ÑÂhŠš*E94žp4wrvšÈJÀ¿jü·x`\*·( Q…Àðä¢CDˆ}}Ô,a4Œ¨Pâ'ì‹”ÊABH×G²$"##ù²Pmì«VBÏ–Àµa_9‰¹Ã¾M`Àre|Ø7¨·±¯œ5Urr h8åIª‡»8X¦]@Ò×’WXa…¯ýë‹,²ˆKÿVÈYôÜãÏcRZG!P…@!0¦˜<û˜„¹X[‘Ö_ÿЬ•VZÉÞ+œÅØÝwßýø€)ï±ÇçœsŽ ™¼ïG’Ïóà1 Tu®( B è%ÿÞ-޵߭æÿŒÏäEþ±ì­ß¯|å+þðj«­æ_ä¿ åí#O‹ÅÍõNNŸ1É¢‚9¹K6sõ¨ËB ( ‘@¢Ó8óž[;n4Æõ$Ø_´q[‡¨ü@ËËþ?®‡ðÙÏ~ [p¶?+«Ó±†|íNê30EÀ}©ËB (F#CJÀÿþ~¤ç¾Y–@ö:Û–uÉ%—øw÷W]uÕ®»îº÷Þ{çU%±¯ui¼›p™|í 7VÙ\…@!0ŒL–ײžÌìÕa± 6fÙêìÿ‰€ý$‰È‹ðZïìv¾í3Œ9MWŒÀ¦+ƒR&…@!ðrÒx܃^ñnlÅ&Új¿Ê—#!D–%»Ä¾h8ï ¥n B ( B`˜’¨edŒP³™Yøër¦™fjû°Â»$vŠ1Ié,Y{¬n6HOxÃ%Y…@!P¼š˜LëY/¼w$Ýþ‹Ÿ0׺´LG#WÔë™1ºuÈ—®õçïžZ‚–Ê, B`t!0´KЉzE±!Z§´Ó*O…µEi‘Á¸xWÚ†,ù¡ù¾2º0-k B ( áE`ò„°yÄ›5g›°ZlVÖŸ¯°•Œ*muZZÝÖ¨‡·3Õz!P…@!0Z÷_oÙÆíÏ£"ÝSN9e¡…òýg/'ðÝyç󱻦ÕMLGã¶ +êSŸ²&íÛõ ŽÑ Võß§2 B -XæÉãÌ{nó`l7–éúÀóÌ3/a!`‰Ì“O>)ÓÙ?.î30EÀ}©ËB (F#CJÀ]— !eŸóŠ+®xÜqÇùÀ6Hûö¤ýVØ÷àƒö)ʬ?c_†³•z4‚[6…@!PÂÀ¸qt;¼‰ôÖ·¾uÛm·½ûî»ý{b"àÓO?ý[ßú–ui‘±Ãvh†…ÅÝ”T~!P…@!PôG ë´MX‰qwÛm·=öØÃÇ)Ñ­ýVXù£ý¨ 8ï?ðÀsÌ1G½¯òœZ‚~•ßÕýB  étW†gÀœýßœzê©÷¢áµÖZk–YfA0b_ùijÏfÕ*tçÝV܉F¥ B ¥ 9£X_uÎÿ„‘0×öf‰Ç[^Î[¿>Ö‘ÕfÔû«_ýj½õÖË÷³ÄÄ-\¥÷Üì"àžCZ B xåRž<ÿyP¯²¤Œe±/f•ƒ•1®WÞγ^$-qã7n²É&èYö•Èbõ+MµX…@!PŒR¦´ªÓp>ùdDÛ¾â 1ž¶øìµ`Ä\áï€_Ï€„¥2 B ] é3àÉ}ôQ«ÐùÎ3\„¿ÈãÓŸþ´—}]z  ‚}ˆ#¤‚t¥ï¼óÎý÷ß+“AÕÂåÑ…éP[[<Ô—þB (^†–€1®ºanÛÌ,’m¶Zyå•%Èym–ã¦Ïy0,Jv™–^,FKEÀ£e¤ÊÎB (A`È XÛØ,,+Þž.Ûî*KÍ3Í4û¥ó9ì"êËT‹dí‚î3~EÀ}©ËB (F#CJÀÿü4Bž–š5#qmîúσž p3ö£y*’îù'8zÆI"1·3d²Óä ¥& Ët!™6fK08EíÉ×5u“v¶ã¬¥ihi‰¦_‹}LiÛhF³%Î*•. B (^.ÿ$`›™Õ ÷ œðÊñ08ËËQ¡7œDrª©¦"œLTÝHëå6ß_½QΆK/½”Uˆ3†É¼ù曽šlÓµI€c©¥–ò©j60 8üðÃ-˜›F,¿üò·ÜrKLb³}8Ó•˜yæ™ \{íµbz­õѾîýèGþÕµK/½ôÙgŸMUÕ±ûÏþsª½ÿýï?ãŒ3fŸ}vtng¸9JûÛÊ) B x5 ðOFc!ĆÌðÞB9È ]á`XA~ˆÛ MŒp²…‰“zCóçŸþºë®‹2-n ѪF½.…ž¯¿þz(.ô/".¸à-¢ÌÃ;Ì?-ÞxãÉl¶Ùfˆóᇶ£ãÑÏþóÛl³ct.»ì²÷ß¿Zúè=ùWGuÔ9çœó±}ìï|§ZˆY﮸âŠM7ÝÔüãü#%ïz×»hFÆÈd`Ò;[ B (^µL†íXç2rDDkƒ´­Îxà;쀄01ŒH@€ˆóÐáPPÑi§†ü0å5×\ƒk^ÂÜ“N:i—]v¹á†ØÐ2Íñãìܾï¾û>ò‘x“ê?øÁí·ß¾ÒJ+½ç=ïùÎw¾cŒ û–µ¸9»»}c„˜ÿï¤l°K¯]¹\pÁU<á„L2\n¸á†>K²ûî»Ï;ï¼.Íà#ÑíP‹åÁG"Jº W~!P…ÀÈDÇñäqæ½·=àç(* ;¢%ðnÈI þZóøÉb¬À—ŒQà˜Z“~ƾZ±hü¥/}IëÑïŒó6Úh£÷½ï}ÒÚuˆË¥Ù™p\E91{ï½÷^tÑE]>òÈ#sÍ5—µåèâ×¾ö5Ó—¸YCçž{®´| tÐAóÌ3¸Ù¿xRtÑEÉO+Âk I:èLb³êJÍœ¤ƒÈWQ!P…ÀD÷±*μçæ[‚Ϊ/²‘N¬–'.Ñ6B!Jo½õÖ4ü.»ì2{²Xf ZœëFˆ0œˆ;d&, ÓE¬']÷xYÚË2²Vÿ9Ï7ß|Ö¢í ¶ñØsYÔë¿$ùO…óÏ?¿÷|4¯¢XÃ…{bvŒÊq0\2/3 áH90ºí¶Û¨ƒr¼Jä?$¦Ô+CÞ¡¢©:øÐCɧ”*[Ö ÂVëºë®sVªÈ§7ußå2Ë,CÀkN8Ø¥´ey él¦ƒ † I3OÓh›å<ªê( ÿÏÞyÀYRûÿÜÛ}s“Ó²‰]$JÎK$’sÎ,°¤%ïJÎ,KN"APA$*"AŠÏ§¾’Ù ë8žH˜àÀ&8ðoÈh |A€c=1—4(Ÿœ1iÆbŠ<7Aê«é… òìiƒ½™ð7æ€xR¬‘%…é÷Ë—€=ôP㢚ÈLŽ•ùŠþXfMóçÏêÌc<“áß!F¯w9ùØcÑÿ»îº‹cpñúë¯gr÷Î;ï§MïhÂn€‚\%9ñ)ã Ë9ƙ٤Ÿ%Î8|áÕ…î{É%—p‰VÌ™+€¯½öZtßóÎ;»B“š3þÆ ‹þFΉK˜àÀ&8ðΠ=xœ Þà‚yr|*<žíŸ<ªÇAh†ÑáŽ?þøŽŽ&âüóÏÿå/iBã8@šðf¢ËêàýODq_´XÂWüŒ3ÂÐPYG0cF~öÙgWo‹P”3fÌ€`âlÆÒ¼Dqî¹çÖ¢ô…àYÔo¶‚búàƒ¢Î¢ôïºë®Ä›ä< 1{Xüøãc‹Fí&^‡¹\J˜'à*öŸ›Lƒ3£¤ñµÑØÃ?7çÄÉ Lp`‚ø·âŠ Œà°¼B˜ÂáoÊ_!Á´0›G" Ó.ÈzØa‡€ÅWpØHø?€Ê8`ÃÌ(exWåú§þ›4m¼è¬LâB B$ê©é °×Óâxf“~6çŒÉ‰õ˜"”w!›"XÌ)dÎðÓTëõq{2ÙàW©‡ø€#ÇÐYshˆ3n¯ø¢®¢›÷:M?¯/Ê9q~‚˜àÀþo9€ò‰ÐhLŒ3‰á$ÁÆe> YǵÓžf æLôJ]4ÏžãsÎ9ç7¿ù Q)˜ñÅ~ðÁqÄ|Ï,™6³aF 椯€©¨co¢5¢&†­Î“€ñvA\ò~™·föXþâ 'Ç šàJGÀKz¼”Ù–‰ßæIÎÀ+ZaØO¾hÀAÍÐKÇÇ7G*ã•OLp`‚˜àÀ¿Ým7Žüd†O€ïVGÀž~ YL]¨ P×2A€ðwÞÁ»ø¥—^ÂY‰«`0L˜sQ‹ÁØ }ôŸ'ŬvQIiv0è0rÉì6'áj÷¸VÊ ÁTaM •œ\ Q…Ç™h@S8K~°âÍÈ?žÍìû8XšÙÈ0>ä¡u*¡r¾€Mä†rê±´È|³éEUi‚˜àÀþÝ8`Âà‚x§Í„ ÄTSÇÅ>?ÇâñÌÿ»`ZÅî ¢Œf©ë/Há'bõ…^ Ž#ƒV «ÂTbÚ~ÿwDüÿW nÀó†M0gõû[VY«b¶Â¦)]5”E~Fb¹™l¸/l»@Y²éUöèdÆIl08Ž#†(ñš¡W3@ñƒä·ªl)gwØ‹¥¼ÇáÑèL2ùÄUÑõÐk+ðÜYãyæA¨\ÀIŠË£©Õ÷r\ž°Cx §^̦-Ér(Òš¯íoU÷Ï_3EÆ?¼7ßüñfMâÇŽÒÅìÎx~rФë“´gb0}\}7-™©ÏB“>êKf~ÈD_-º}h¤S̸+Õ™§ž¿è¡ë°A[œøv0°êHFd!VE“¥ã{ó`U­òßlcÞ£èˆÏIôW3Ûx‘±ßÒÆ\aËì¡¡çŠ`‘4žù3? þz¿L [ÊMY #ûÑzdæÉŠÎKi©€ø§é¬µRêï¥1ÂŒlY•Æ"ÐŒÎðß¿P‚DH2h­¨Öh“*T•S·4 @†P#:£QN*²j(ÑØKŦO"ר¨‹kä£QrãfÃ~6)"åbF0x(^ñ·Ú¬vGF\­ð¸Rmh5zè±–+5'ìÁ7 ¨ašM°7«Á„nC¶lb{K&ä6ϘMË )bT@‡øoPÌ cT“§Äö8²·ò£¯¸ ‰ìlô J>E_ÁUF+c­S±ÜW’ÙwsÏOóÉ7Ts¡Û$ÀÜÓ2P!Ø‹5]N?™¼—W9E2÷Æáº—`Ó ]4er—ž’bçw&äèË\/y8 Ђ»xs s{5Èä¡àÚÿ ²ÿÕàaãuZµ!¦ s6=Ók\ú—¾`¦`úÇ÷ŸåÙêR`¬S†86Ç…?Çd‹ÙSrt|LИl!ùÍ"—` ’Ÿ øÃ6籂à‘Íâvrq ´JÅ•Šª|G¾•ìbÉ ·\«û<Æ÷ÊÈ`1¾7¨2ήº#c? ‚ªi׸;±jè0v]þqQÈ5l̘e3Fò93 Ó;sô ͼ×rIг+ÎD.ÊeV áÍzÆŸEiDj¹Ž*:¶ŽcËÆxO€fõnJÓ(•UÚÇd¦’®òue³×óÕ\2oÓµR­Á´$ÒMîÍ›÷‹¾H2ŠX±3¯Ž¾(ÎkXºËÊÎ0‚GÍzIôl®˜¨”P'Î>NåUGÄ6ÏQ¥èp¶QÆ ÎP·]×,`öhQU}ÆMæ¸ó˜ŽÄ>$Ìí©¤cíTd4€/‚S„ˆhözóǘ(dÓ ù(ŒÝiÕì bú»PSE³‹Æ9îÿÙS³¾ü’‡zÐáa°y·¡íS^SÜàD˜Cá×_%Šs•›Ñ‚¹§2Jrl< ´"m°”4Wÿª®5ô„•9nB)-`Œ=¬5½±±3£›y8wÑa|¯È‰<¾Zw íÿÙÿj˜ºÂªLJ-¤ R› zCT™ÒŠl_s2ßú|ÿañ9Å è2¾Ë D£ÒÓê ’}¼ò®&ùÐöê"Ù*«øcb0˜a4Ää¬J46ñÜe¢7ïQ(-e5¸ŒSV‡æ·˜¾ÊB œ_ È I÷)äñÓ"N¼Æ&ÓœFV‘PKC,ŽÌòã{Z”±mÍ™MnbÔ½‘ÐÛôInW਽®©ú‹–ŽüM÷]Š8DÀº“ª !jNiŠ©P Ìã-Í Ÿ&c€Ä Cõáÿ§wÄ”æäÄì ¹†ç*W†ÖÎy¹ EºüàÒ§ªÌª6(jÛ'Œ5‡´š]¼®ê¥œÕ¡›5¹I»ëlMEàmÕgÒcî9Ew„cdr€ì^reeâÏ,šªœ2˜ Õp ¦žŠ:õ¸ î{þf‹X:ŠaMEtY&pN’çWÒ*-“Ã1s=ª²tÙ¬T¼»ÈiAÀ#ƒ.-ùïÄ6ëî©Ĺ!âó¥²%¯Çásk¬?“ÙµF ÿ;‡r©w~¹\-ጠ¸Ý\FÏù‰Ù\0ŒAVYγ¸Ç¤lxy„ tSn°C(ã Éè¤Á(e0FiÈ´T0´ð‹žrʸQÒOZ¥<÷×xàÍ )Cb³!)f4%- Ò³ 6±"WÈÅ´±¡þšÍÓO¤Î½“YÚ‡»V†qe‘‹ñ7~¼ø­$Ý%3i¬9\#]Qf8IÐnÍNðœq€Žk¢/Ç@¬‰²æ%3ù‰lþœØOpàïs€Õ˜Z¥Þ¨dòŒµ´še¨mQ(gÑ׳%PEŸb,ÔPÑêéBŠa;}á@æ4 o×(esi#¹‰ÐoúÆl‹%ü:VàÝH‘T6aøÙV lI•3%Ì•ˆt½X£æF¶œ1à™å”"=Óy¬žêé—n{æõ›ï~~n¶yè… ýÅe÷?tC0â¨+dk¥jÉr*ÄâšHÅk ØT-@•«ä!Ê ±ÊÀ„uÖb4Å&¦Š ‹V‘û²\»Z*1]emp/bÑ…TÙ͆V¥/Ò/•(bÏ5_²ËUçjµ²°Kn½0 ùL5yÕê^—c*Æ˪ᢠѧR>—’<ÌÇca7€P%škðõCEǯ˜JêÑS©´‰¸1K™ú“ÊeÈ6pC–ÛeWoce-6€UgºÀ]S™¬18Š©°³[,ù»óùT¥–±¨|¶2•TetFP¬w&!à@IDATà}®•^.h… Œ¶—”=«ôŒ² «ãw»±±Ì˜ñ)•T'owã±[ÜäWMÁ|ÇY[Ý7gË›Úá‚@ÙÁ G5,â·6úb½PSÝëÉl±É)žMP–6jt¨ÄK•Ñxeö:ã3g‘wAQ¤"Ï3Ýjàõh\U¹BΨ€7+˃7¦WÔ>ßSM•?¹š‘i Ù™=ZCˆ sÍçò¯\-²ŠÃëAk“7Êa±•ÊeüpÜo±ÎŠÈšE"ª5<üÈñ%“¹$¹`â(s+«?·²RIâ~‹2T‡"ܪ˜~ã¿Þg+Ùd6…ôlÜáà R“‰RN²·k F­.'æD$³Åç "e*"qëgÅc`uKÌÞén‹»X)«EP•°X§SC¢KH÷k IW)Ö‹d£HÐNÄ2ˆsj.jvÙÙWã¹Ñr`®ØÜxð( J VSŽ sK5`5lTÂBC“PÙr]¦“л!,W̤‹_wØì´«¤—#üÊÌDÚu‹(#&„jz½X5±¿æpHg-{À¨¨]6>L4Ó4JŸ¯Rˆõ;Ýep¿_X˸ˆ¡G^95Œ¢ÍXk,= »Å½çìûÌI›?åõ;Ûž»ûìCŽ;üD$#Kÿl•r#è |g»ƒÐ’ínÑöLJ¦Xe XËEU+*—C%buªà*ÔPK÷»õD>«c41äÑM{'LÁM¸ÈܸŸÚ’£P¦{]Þ\¦î!溸C ,ƒŒ;_QQwدy²…¤ßædª´$ɪ?Ÿ1b±Q-×­hÔöFd•/¦°Ö5ð%Í`¦ŽÞé‚zÉšKTºôu>Y2@ˆ<–Ÿ<ôìµâäÏsÃø‰7›àPsO¿pßož8ïÐ[JyÇÅ[ßÈþó÷ßó4ųo-:I‹Ž¨•`6Ödºæ ¨<ƒôDsÁ2%ÁÞf÷É\GUf¢·Üø0Îx\øh 滬.¼¯‰ÖÁàŠ¡OÀ:E½£¦â£ðÈoWQYÄ FôÕ„Õ"£"k=ÏÀ„g¨>42„‚ÇÃë «¬¦Ù˜ù»¼ÿ–Ç’¿8(“\<cê<Xãq%W ¯ƒÙñ²“÷«†áÚRJƒæö|¢h×hNMòÀà g X̧üno±J6ñt+·]i™X–‘Jµ Æ]«³vm¸Àc¹f§/¯b¬Ùý þÿœ›î¬7´ÞÞþ®övÜû°1…\îL¶èö:ó…¬ÍcCŽˆéÕ‚.…­«­ûüÇh7çDÀQ¦KÌ5îàñ†ŽkXv ÄüååO%Ó _ xï%™#ÔO÷›(ŒÖ9 °ÞÂçöcAo˜ÜƒC8OˆIIYó©L àcŠÐf·ƒ®}½ƒ¥ñf-A—·ŠÒƒ´l8«µî½`p"3 D2âÛ ÷ê^§Õ²X(Ù<Ž\¶Âg¨;‚!š@Õ«Ô*a† AÅt8ÖŽ0KWóD=‚vV5ûHð¦¦üíVà)Q kþdbE0Ô¢jÁT,ï F4«†îND2Ÿ”@‡NXMLaTà’X-6™š­óz˜EÿÝ…X¢ªÕ}D©ÐU²÷»ü£#ñ–p'*šq‹W‘gªkdqQ9]ŒDyñ0TWç{Ùµw}×Ó%YÒ¹¬ÝåzT´êMOîÍÙ³÷ùþ¯ÜX-×tú”&ÔÞEŠÁG™Ÿj—î41¸,á°S­J…еêq¸Üvn7«\ …µ">¤ÖñH¦Oi‚?™b–DW¨;Õ›°ždªªý·;î‰ßß_RÖfkÔnTT®X÷{­»n{¸Ûáëüóf-‘I{͘çï¶%-Ëþ»GkåF‹¯­^/Êæ b—–Íè³îÐm. †€ÕžËæ©–¶uë}5+£/?Q6ÔÌ©“DCCÄ6Ôƒo\Ëí€wôôÅôâ¢wŽÆE.žN…{‰¾l¨‰jeŽYs¨Ûž¸F8–Q§mó½Ûß:¾Ø©œpT,+;H6P>ÔŽQ¿P¬§cÔì YEêVÉ"\ËEËmb¾DEÍT´(3ÙváxÖYM(=h‹[…ŒZÆe Œ.WшŒÖnÝ ö–‡Uw[k_¬ ãge(ŸLÛ BUU‡ïrú#¿¸ ËÕ)&RæÉ;›:W,_±V×$@·4ª|³fPtðeŽ,äŠ_<!·»žÂA­ìz=•Ï:Ýî‘øH[¸ÅnqT3e§³]åªnÜyäj…Ö`3CŽ“ÃCÁµü¹BÚã Yù&ƒBF—t{i®šRºËB”·eñ¾îæJ¬ÑÉ”,kt&ˆ_“8€Þ’Îf5‹½« )¢œü„Œ}6'V¸°+béW[ÊLÚD´¹„CâÈœ:Á§cÊ›ÑÂ?·"4ÐñóS”Ÿe1I~~bàï²yØ—ªU¾WÆÝÂÕB#—)i kgs[ذá9[# ’/£rk:·ZôÎÖN„‹¬ ŠKÔ'â Ö⬾ÑôšD~GöjÌÿ!klh&ZY|?™SÔ®¨¥êðÚ¼¢’!ÍqõÁyÇî²TY^B’ªÍ‘P®”®5J™ÑQ”-R8à اâÃèÇ.Mæ~¾`5 z«ŽîóÙаŽÎy9ÕH*L–ºÅiqµGÚôºøœ2 ãé;êpÂ'PÁOãÖ`jniê…èàÖl,}ÆIs˜}ÒaÏëtw_~Ìïyû1»žO)¢gh”UÞ\µé±ˆÈkf:ÓbO•c¹£n0 ©ÕtFøÆ÷Ï ˜¤Èw¶›3ï´wÝx~9æ9l½/8èÞÿ9—‰D³¼À8]sXâ ”-®fTdæDšzTÅÞÑÒSI…{uè†ÒĉÓmµõj+| 6TFTº¿A~Y0lts½Îoœ½Ùš‚½èßòpºåñÔ é„°·ßÄP¾T¨Ú¹E°J÷[-ØŠ’Ó¦î}ä’Do±³¤3÷¿|÷Í‹®½ã™‡T¸–‹§U¸yáïöRj§-ªËͲFÄ„PŒÉtxž@©¢z£I;Ó.ÄkÖº'èqÉÕŒÊ-ͩѪÛÚ¬2vkIÍ=`Am¥*//YJ^^¨&­GnÙˆ:f›ï¸É‡Ï¾=¹¬vÈ–g¹í±oy‚ÀaÑáñ‡ï¼óÜ'<ï–W°•únyäâo«{¯;ó(ÚK[$Ã2Èf˜”¬¥RµUoú‰³/äQ¼øÈ›iNœyúðÂe!ë’®eTz‰ÑРš³Ó|y1¨‡éB‰‘µ=­f O­…“X²jÁ„‡n3ý!$:†!¤ø6Ê0ßCH×}afI&ÇúÛ«|“DJªVå´Ûú"mÁ€Í—®Ö½:ΰF¬&„v°…úQšÓ¢“na-÷‹_–=ä³ùeMK6õß±–žf”WÌCh$”/Y±²¯½+îØªKO"OP­KaÉu…]V4HM¹³€äEÌ@¢V(W^Bcˆ;MIí¹ÍÑ?{ûÁBV5G{**ƒ’élÄ×t×#·ˆLÌ©Ó6½íêÙ÷›ç=¹øzŠÔÁŸ:ã«‚•I×lõB¸Ö=gÝ[mç†,K¾ÿð½Çì{ÒÚöÍÊýh¥ùÔõ¯ÛnÆ^SZ¿!"»ÃXë‚„u©§Ÿ¿Ct(M5ãÑGw´hh?Ü)H¢w4ªqÙh{ê:â#‰jFLÌŸYf¾}æ’± ƒáE{¿¶YÏ.Wlütºjß`Êö‡O]Ð4Ëwó ó0™Û°¹0.! 0õ¤³…\§«UèaGs%åζËS xP]*¡T°\Xw¡¸Ù]O§Ä#V³‚[¬ìíîl§NksTj€ö¨Ÿal}ÒwÉo¬ÿEÖV41}Îf7ßñÚY{ÔãQNü"øÄhTÐή©w*"a ßòêiMªª±ÒŒ²9ˆw1^Ó™áÙ K•]¶Å)¾ý´ÜÍœ:yçgïþÅ B9|cO6‹ÙÿÈ_-¼cv—g’ô ÖMRv8Fs¾µÎS0~*dË´O=ã/7µDçÿ×Â%Ø5¢N:ôÂ{¿æ‚Ž6&UGQo[-ÁŒJ¾ÉÕy޻Ê‹¼"6êòœQ^5'Fl¸Áýå%Z&OÚèÇÆ`‘Cа1§øÝ»ž¿L˜`U‡ì}þ~”µ+w&…WœüäÒÄ»ßÿÃ|—?’ª¥B¶&áìšœ&4à5ùî­´$ìv›7dq”Ôº]ë˪ ~¢mðVUqïf²¬n ÷K'qÑð–.]Ja ™u_T .¹e|-I Ö㯸âªR¾ÁÉ/JËGjØqÐÅZKéªÊ÷ªÓޏP„82×§ßñ¬½·:9•Èá˜åâÜi˜Ëôwt‚óOºA:‹&¶Ï`ÕÖÒA—ËŠ‡É^„n½¨¶ˆ*$l)©S·¸Æž5>ƒ¬„˜WsûÅ¡--¥¼LYÔöÓ÷›»ç5|ç¶bÜqê–×ÌÙùš=w8²Ý-:&íº:hÇsŽÙíÊÖzÕIGœ…,vé®Ê( ÈXj;w¿«ÚììÃf]r×üWUÊyÎVw_ö·õÕMþÉô7—އ[®R úC"ÍÁ?` ®¢Îèå»þt—u¾#Ò¼_±ïqûlz„£ä¬ŒXòVÛZS"ë,Zxឹ϶dóÙeG6dÉÌ\/¸ß}Xñ‘<'îxœt™ÛeQ{mw¬ ®[·ºüÀGÅ0[P{ãLßYu̶Wž¼Í <9çî}Ï[ßzÖžw„«_xäý‡m°àÀÍÏ?v›+ä^@ äÖTwýw³Ë>Œ…=Óœ›d—7¶Yo§eo÷¾Ù•ÒÙÀiÚª©·~§éovnè*y.Ýå™ó7{쀩W“§UŸ)À žq{0ÌâPÎ%’ýs¸$Hq¶åê” Î¢†[öx¯ð±{ZuëÊ'Í7lý󫷺ӹ*ïŠò ä±Oªõmë Dm~ÿ ›¿yʤ§»—oÓ²x£‹×ytÞ·mîÌ›ÖÓùùLddk·õÐÖ³®>ô®»|åŒ)N)nå_9åœu®þî&×^¸ÞMr÷—N†×mýøiS柹ÅÚÝ=Ç~ãºi­3çl~Œ`-ŽÙ¯°ôcuÞ 倡Œ:þ,¾Iý÷åû?X-ê{ÏÚÿ™??PžÔÝ.Œˆ³â(æÀé ,éP]áõ„N¾XP‰:4/Ú|¢·qÿg ”Oßùák~p¤ ®CÍÙçûxò÷”FSMÎîB¿cjp†»P·–\•+Œ]'-\ÿ¤–mOÜcѱ³/ûþkße$´ß†{tµOšbÙ'äoèby¥Ç(¬;ÌÜíÜÍ~‚MfÁa?ŽW{òEfÚïýé­Öˆvø­DÚ6nzamTݰÃÓ–ø:jZ±Z¤­×O[j¡µ›7¿hŸ‡F²¶†æÝÚ:å¦-^ˆé‡º=go|]Ä9­³mãÞ«”/V—-xv»‹¶½çÚílÕZGÇ»üS“5ObÉÒþ×Í<‘ ¶|úÖ»yŸÅÙ¼vÅo.ºbö¶šÿÊ-^ŠØ&_¶ùq&œÙôÍ9Ï}ûø ïùþŸÏ¿x¯Ewüê¸1oë×tã;ؽkùúQùÐÛ‹NÛq^5ý€c-§juÚ¬Ë>îõuOþdE¯ð–HY°éÍk¹gMŽ„–¬x?ìB¯îy¿ÜCMÙ ð~çï’S«ê7?w÷â†mþ+¦{´)<'?Ù¦\S¯ØåG—n{{ÝR³ÔÜÌ~<ï,ºGï}õrzqØAgøk­þbg¾ß:o›g§´Ï:u‹»êÂQœýÐ+ Û"bë>|¿9¼zG-¯NÛå¡€®åúRÍŽæÁ%£k·}c ÷‰Y“½MOò1†5½ÿ^ôWÌVVt”ÅZ£h¬ÛgÁ‚“€ ådÝîcðfXX Q®i6Í%ÞÖj¹¨»œÅbìIÓñ4àµFªÈ‚Qãº*O„Âa|hqîÕ¬¶|Ž/EºXÅÉš'óvVUQ]ÖÔtQϯ†J©2þÆL›àÒ‚Lã˜Ðv»¥\ÄQ¢ê°N׉¹‚7 ³w¢˜âr±¾±Â^¦hä»ÑE;ßgÁCKÛ`¢¥5D)ÖVÔ¢6\öz¦÷9¹rÑcaÌ+Õ‡ÜV[¦‚‹ÅTù° ’Ú7«ªK¾z)uÂngÝ÷œÈÑ š/p‚Xï€êÃBTU¯%×Èx,8F²HG+ôŽºšÚT±¢¼¬X*×Ü|>¦šÇat,«ƒ7=þÇo~O"*•­•¤J`ü³cëÂûC+sŒðaá€"YuêîWßùúErž18Å škà¥Ì%ѨÜêˆ/ÿÁë ׈%À¨¾ §ÕU›¿zñk;Ü9ûG§þüNx±7f™ÿÊ.jÚ˜&cyX_UÇo¹è{/Ÿyæî,zé¡«2—ê´ý®ºýñ‹ÅÚìVGo?çÁî2ÀH^Ъ:w‡‡ý®æÏ}Kž «ºôè».}üÉìQ'ísÜ=ÏÜ/j4“êèmOzðÕ{ŽÛöÌû_]4¦ÛÑ—‚šwà¢à'3™?ãÇ[^·ÏËç¿´“ä/©«÷ùÅE/~ëìM^ø‡#¯Øý¹ù?ÚK攺xçÇ£Ñp¤¹7>ÒØ*akQ#•e¼)€'w b¸‰E5w³»Ë1ïÔîuµ„u83ÜjO$˜R.¶„Zø ak±Ö3:ã™É|ÿq>îG²ê÷Æo¿…þìg¶§û wÿm*—Õlö¯l£®úöÙbêš_î) 9ÕiÛÞqû›s8ç7Þa‚ÿô_lpÛ·þ ͦxüÜ7kϸJ(ûF?èru1+¹âŒÆ˜Œ b.s|È<Ã^"ÎçJ9œã8À_ï’×wçù¼gû÷¨ÖùÕA:AÁˆa­•ª™¦ö芾eÞ§X)ÌealX]±ÙOæ¿q|¸tëçÚB]éDÑç gâAKÐ"ñ1 ß âÉB,Ñ5¯QùJ¾%ÜrÂ׿ß¶ûÿà‚îÔ%ÐTA?¤"–¡Š·ù5\Jþc)o¨”«--CC1]9C¾hof°Û½V.÷8ôlʤÇ0‰¨F¿êëˆvÜÖµµµyû,T[^Š[w{“³¹¬ßçOgÒMѦßH^ÏõÀ‰Ÿ,I-iw´Sï è„l2³Þ[Ü›éíôuâ ™UÙ-”¬äþp&›Ä"•ÊÇ.ymû«·Å®ÛY‹'¼ ,5çHzÄ£ù 5‹yš\‹GÞŸê‹"øú³©µÚÖJÄàFK0Ð7´2ªºZƒ%?ȇ²Ý-zjžšüñåŸÈÅÓÅM!™ÿDMŸSt€?‡)ÿÔ)@Ö’òâ²>´(ß®u”sà†LéesoØVÆxBü8¯<º`^YsðtZ€‰H9źL6kçYïQ.—v®µ*Þ2zµXÒ8›Ôi@3f+™‰dþŸLœDGb‡×ptصþ‘‘ö&0«&²\pDhKåºÃajŽFxw*•†V8d¶LÂ$¾ÀÒ$Yu "é@¿xÓRÉœEÓ~w>_sÁo˜B `fí*®½õZA—¯—Û+5YMƒžDkÔbUA>ƒšEElKæS¥ˆ¯EÄÓ¼½êƒ~âõ›=ªwÙHç´&äl)]pø\bõèæ>ôw´K*RÅ´K·»5giP2ì0m«W?xSäók…¼+àfena¤á’EŽêì½.]øä¥{v#`®¹ÞoÆW]âiì´Á(tø·.sW\žº+—J†Û}+‹KÞÜOó}> kó©£v:ñ¡§î=w—Ëo|jÁœoßuÇ‹§2\ æœ:qûÛTo´­6É_áÛ}½a¯6’ŒÕKmíÅ ½Ó_`ØóЃ×i§î²Èiõé5GƒéX¥i.¯ãã%ï?øßÀ “wºòîW.‘¾ «³Ž½üæ§·øýþÜõõÔ‹Þ\¶nÑXýR»æµ}ŽÛtþýo_!Õ®öžuä3xXÀ–¢}úÕ™;_¿èçÍÙîú;~v×+øéŸßyoí©Óc±D{fr¦šY«}­åýËY‹ÙÕÝ=0<Â&Ÿµ9ôÑÄ(ÑwÚ»Ú–-[ÖÒÞÚ××wÓsGܾÇ[§½¼ù-¿³2Ûk UoøÙ^"£‘0t ¹¬šÈ=®j[@ëYöÑ »á ¸ü…R±£­}hdÐa·K¸2±ë¥r±­µ-—Í.Î~²aûKû—µ†Zú˧wMgES¬˜\gÊ7þ°øSZ§ U%œ¿}AVñ®H/YgÒôR-ŸLÅ'§·d±5€Z”E»Œ¤œÀ@¹VùCı1ÙHŸA„óÃÿ.o‹_øËnÛç­ÓŸÝü–=3’îléüîÏöàŽžf¸ŠG¬Tûr£0ï™®Øû™ù¯î}×a¯/ï]>míi‹/îtò¥‘\¢ÙÕQ/[º·TªGMÙ~br¨Çø6Ày[-UIOjî&ÐD¥9Ð2êïŽö°>þFßnS­¾¼Õå;½Öh[–ZºnÇzÉ>Æ}_"úƬ»özk 9Ò™’Œe]/Q4áBàs8ïdÍu£R­â ‘)ggNžÑ×ßïr:ãÉ„f±Z5-'’ɾrïÖkoóÞ‡ïÎìÞàýï·ÚÛ¾DŸÇ"Ÿäá0VÈ×ò!wˆ[#€ªK”bnA•HMºm0=°û!ÿÑWO¾çÜwxæÜ¿ÑýsÞ{+öûÍÃ[¸<Þ%+—°>¢Ù×´<³r“uf}òÞÏ{i·Gúð°{×þî^oü¾dºóÉ›-[2„…š`§¿4KµO 6•¯9Mð×Ì௶zAOæÔ¼•FV\[$L,ßFáHB xT¦Xv9íœÐË2Εõ¤D’iÔƒ~—åê`˜,J‘ÖÑÑQÖ±0x/ÊFô•K$<“ig‡œ bN ]1«7kÅRµäu°¾°lE]FqàÄ©§¡âñX8Š­ôõ/ïhoGJd1iYV –Zg®Rt²º£æ‚^Ö3 ÑbVZ¡XÄì“Îä½>ÎKJ§ .Usº½ ß’±@ø¾jéec̰ÝÖ½þÛÇ Ä"­m2¨`$Ð`‘~Ùã³7ò⵺÷î=öÔcŸðÄ-È©ŠÙ²ÇiÏ WqÕÏ÷¿`öOx*†’÷¿}úÅ{<|Õ‹Gî·Î¼Ÿ~xÑ[\ÿðïÎ;{×[<ºvÅc§‰Ý“VßÝõg¹|uêÔu†cýùÚëWûG>jë eG;0¹c)䊩dÆëòEÃ-ã8»é{_•d›Ê´®™¬|Îu™~ôÑj½ÚÙÙùÑâЗe–N‰N.×+‰DÌåqjvÛyÏï°`ûgyºººÚrmyÛ£"X|mþDu¨êÎ\÷Â~Œ‡ŽÛvñ‹îã&™ÞÃJ±RÝpøÛ˜8óùõ¯Þþ·=´…êQ ¶{…¡d‹¿tϤÒŸÿa`¦ë…|~ ?ÐåïĺÂ಴?à {ܾޑ~V Eš£ñdÌvŸùÌÆoÿÌU¿Þû¼m.Vó·þþäÛ¿ù’}I~ÉϸŹœݓ¥G©rªÙ×ËÄÚ£í‡ßÝuûñHöO혶¤oqÀɺ$G*xh1à‘tËFÄ—X)>¥eruH†ªÆãE ”†hÀ`°µª;ˆÊ«°Å#‰b|rÏä½+ ¤R(]þû_·ÃËv›;húh`iê@z0¤ÂtÄ0O€-~׊ôòÉá)Àá'ñÅ!dðJ…ÈŒžH7£v¤Ø]ŽB–å¾Å&o´ŠCû—I§¾8ë–}þƒÒmÑÎå£}­Î¿7¼r´ß#väþT6^$U°Wóå,+ëœ6{¡\ªTK)•‰¨7à/àñ_+³$X,0?lN1„1<Œ9!UM…a¤—\.d”žýÓ>aùP|hÞ¯6½tö«¸`ùì®t™xßuËÝÒùhÅ'^Ýí xséœÍI\k/`Öò¸¼žT&å V¦¯íoM§‰gó¸#eÑ= Õz%ªÚ %cÛ–Tþô¡þë¾uƒšþe¸ó¿Ê;Àÿ+¶ýBŒYNÝ]¨JÜ5@„Ôg[¾lpRO+1Ëê +Saq%ëÔ.t^˜R†î‹,àn2• ùV 5\N¦¿Ø$I6µóæ;üò·¯Ê1E^.b4ø Z(–²Î€'“Žûü¬Üg¹"èÉúIkr$lŠHNÌØÅ´Õm« ÷°CÖüØ*Ù²Íã!ØBU>ƒÊÈÔÄXä&¤Qµl×^\oÊ¥ºÝlW%–Ûºš½Ñ>¿úýSR3‹h™3B£­©omyÎÏž¿IÇ ÏIjDk– ê¨#Îxè‰[e.q‡˜e›–rµVuiîØP²=8¬ª·;±kéŽÝ3ÚæÞ±3Xûî|Ä“¿ý`'lDãêŒ=î¾õ•“Ÿ}Õ÷~w±€%çaÆXÚ³c§E7¾p¦hiMêø/úÞ WKŸè+´±¥ÕåÇÜ»àÇ'¹ùü‡Ÿ¹bnyíÌs6¹í¦?ž~Îì[nzk.õœ°ÿ÷=;ÿÚcž¼à{û^·ë ç?´Ûü½_¹â…ÅÕ–q€Gó­Û,éîâ€+ÒhõðýìNýV¼Ž„jÛGýO°ùÖ7v½}Îó¿ùõïÜöÀ´Î™¬I]Ù7xÛ[G\¸ù³Ó§O]޻Ī8Ko¼É+V.³zZ?\ò~wST«—ø(lتçmù´ û:“q‚ÚãokÍ–FžjÅš,Sa}³l*{éËÛ\´ã+osÀèDcnF^à)“É-S½3µ™>¯(5Ès «!c’Ab‰&í,º%ž‹»m.4³µ'OçŽ/É.ÝdÒFýCƒéB:¬Bþž¾ôhk{×»ýïöêê7w¼òÀçR…•„a ûC+âA{'Q¨6žºÝŠ¥ÃµZ»lžÒ5åÝ•ï69šÖލ¦ý¥þV[3šS¦”a¨ØÝÙýAïÌA3¬!ψåuâŤ×áÉç’XY^{¡˜±k¢šEü÷}<ÙúÍZ½ ZÁÝ=™JT+%5£~aE÷{ü}¹¾G ú8æP ÕuŸ×—Ȳ”Y¿àç›_³Çoex¬TKs fÅ— -keúZ†!ÞDî1Vè:Jp¥^)¨Ã…ŽŽöå}Ë/ùùÎ ú5£¨•ù¾éáuйÒh)994}01<)29ÃBÍÛ@ Ÿà‚…ˆcÎb¹`Óìv§­^•9%² ø¦$þ‰KË˦{¦Õ*Ì+9'—t0ïýe’5”M&Ó¡@4›)•j•°«‰¨X§'Tƒ]È !abGœÈ—½ÞXf”·vpŒNœÎd8O ™2QÛÄMFÔ¾€F,ð+Ö‹ñvlg Ftî/QÛ"¾gF³£Íþæx:ðrù¢o«%ŸËèvë`y€%ê ,ú²ý>åõz|È·´Ê‡¬a·×“L§ìvw¡œšÒͦÒÉ2±?ù¾kÐCò=¡îÑDÆ¡œŸ¨Å‘v=Ýñþ‚gOdÁ×¾V–»2‘¾B€–k¨‹DÒñ†×VrÜÚæ*טôËZ¬¬-ÏXUÒ¢’èŽò"€Á,nL—XcγmoØe=ê‹h5]´Xãì~9 3TTŽBXÔ£QµÛ¬Ý®hŠŸ˜µ>'Ñgb5Ÿ»M±’9ÔŠ;û1‹,­ç{ÑXN¾²Žx8ÌéåYãVÄ_jx¨!X+KT;–qdÒÙ¶æӗĬmõXêzЂ{ÃZa­ æle´ ¦ø7l|l¼Í¸Pçô€ìU3›ÖGñ–„! ´8ŧqD•—ÙèÑŽ(ÙX†­ÙñêÕÜTÛ Rá~ëŸ~ÐŒy= X¾Òû‡Ô¼=={«;ÚÕ PsÁ!w6ãüó¶¼ÿÌ­l®O¾~—×ü…®fÝ õ¬ö^ç40øÜo'§-'ë1˜*ö^^€™Ÿpfúj*ùaNóM¹#õ軾Øêèºc×—Ô¨û»[=zñž?ÐCWïóÂ’·âg¬ûã)¾M®Ûé÷­–µï=`É©3¬{]~¹î¯4¯™pEòõÒ{+–tD¦žû‹b±@s(؆Eýfù.íÕå\Ë[‰äûªÓ³nóŸ]¾ÉŸ|´¼ÍÑUHWCµæõZ6øóèåÙ ýè×s£j²»Ô¡¥C–„÷ÌG÷°óu£”®Wœö²»”jTŠÖ ê˜ÿ“CüÕ®sïß°‹©h«êñU6YŒ ÁY½˜õY*C™Ú½Ž­|ÓÜ*·2õ§I~_HÕ¶pÄÞÌì`§s’*ÙxØô’£Ó×ã³…*¼bIŸ­â\7°A¼/í×CœoqtäsØcô¾þ§r3›xÕ–oÖûõh¥Ó f—Ô›T·»nÑÚ†—ÅrL@X´Ö`KÔÛMDõ&´§r±2’u9Üí¶ŽFMcôÖéaµ/¡K:|=íN™J hÄ” ·û»L†`B)9Z=ŸÂ8£»ª¾L6Û˜\©oÜúMt\¦iSƒ¾žZ­pÀON¢Ê÷ÕúàþDÿ9Ïl‚NÑ"hÆË†û{ Ã-‘.¢?=ÍÕ"ëlÜ}à—À <ànJŒä[´˜¯óÕ™áu1>// L‰Î°Z0áèC1lŸŸpt÷{žJÕÒ o¦”%\—nµ9í®„J‚»;MÛy4cT˜SñŸ_ÓçŸNŒPU!_dp fu¸6Ì ä/r3L5âD·‡Ï:1ÐeI|®²,Ý[–©0"’2Of«U,Ø×F«Iž„T:%cE  C«t%]@‹¶ÛáÀ›G²#¼Ë½é^‡æHæ’(¶ùB‡†zvÝ9-0ͧùéÚ:í³ìg T¶Ú#z”héHtk·ÍÇc¶|dE©LàpK‹§Í…»•ÕáVžd÷VkkSÂMÄ¢R¬t…’+ l!â,–UbqQ –Ôþ³•!1£6,σ(„ÓÏ9âQ¨¢C@Úû]|ÂV~à­¹•éf~÷¤-.;y‡óûÑ °c°nÅ›cª}]AĤò54{^Ù Ú@D£ë=~úÓÛ{ù†Æâ€–·M‹®}Ãs‡-¼oNq‰‹W.¿Äþèë×]ÿÌqí®îòPµÐ_ê°÷LmxΦ÷_vø§øgÑ_-åÔ¨Þ)õª«özr³žíþy;~vO˜uãíç¼rÑ7ñšÏÞêvw>0wýÛ´Œ3jk›÷øwæÜ·ËfS·¹ìÉÃC®vmjâƒÒÆmÛLwoø§>œì_¯­†›vìHýbûºm›$†óÕ4“Z›»ë²×öH%³¨æ3Ö^wá#{ö,Ü㕞Àô%\á­…—¯ìØZ| ÓX¶¢3ªZX[Ug.W. ×–æ;özSÏîÙ¤8Âú"—­ä/'¬wìü›€-b)ê~‰ÓkŸ^»Ë6%Ë-Üþw­žIžòÞHbpáö¯g3i§CLXü~Ïh¥Нs0»biæC‹µÜn ®L3 ÊU „/ö© ´\fI[©Z‰gXØœ›Ü>կåDØ)T*#™x)SM–2éZƧ…¦5­Ýíì)&Ë]ÞIö’§³Îˆ®¬·ä3ugÅg¯{âÕÔïä€ÇËÒèÙà«“hÙÊ•Åç*“ã[Ðfäê¹d:1ZŠˆÙåôçrùv­£9ÐæÕü]ÚZ+†úíUoÿà°¨¹VݧûÐzÙ8à''†uÂë0 õDï=äCæ_°Ü”¥)Á)ßo°rp¥Ky ¹nNÍ™Éå†sÓ»¦28D‘MoØ |YàÓåjã…„d´ªëûãÁ%¶¿;BüŠC]¡µl¬•«Û‹…ÚTçôL2¿"¾œ’!†:¼™²ýe ؃Éx5®ÍÙÆ $êŠrf¸8<”žš‰‘éõ_lXKÇ[}­Yþïýnt~šÊeså1c×Q1ŒÃ¼™’Æþ‰©HÈs)o­Ž¾›s£º‘p‡¼Mª%äÀÉÜ•*e‰‚¿Šßäq z˜7$„‹²ŠŒkó½Xd$”,$ÁfîHW —¡Pˆ±w„©nZÃêÏ'˜áƒ%¸åý±ÿ¿«jÀ¤|Mì–RŠ»%wÊ×Ýõ£«EKþúÔg\pç]×Z›äç¾›ŸðøxáÆcv˜÷À¯dÙèœ=¯p¦àˆÇà@?S#µZÕç÷ÜðócOÞþ¦»sι{ÝF Õ»~}±´kUGl=oJhÆ¥·wêÁ7ßùúY|û×>yÄÉߺåî׿ž´óµ÷¼t<ÏuÊî×Üõü…´xâ® ïýíÙÒ¶RírÊc/Ý%õ4ÔQ»Í}èÉ[Ä\ |êê¸Ý/ŽÇÒOýî¶C·¹°³ÕTh¨W[CQ­T¸âG‡KײêúSÀä<ÿÇÇ,ØïÞËŸ9qΖ·û¼ak_¨\/¢VøÃWX[2ô¾·Ù~íósNÝãÊ;»äœCo»é©Ó/Ûñ)kÚÝæ42÷¡o®®:ºÖ´©‰b"Wέl r¶:ÿéÝpÂ;G<°ÞÅ;=uÕ›û\½Çë½°Ý‚ƒŸ¸ü‡ûÏßý‰+^Úÿú^f¬Mñr£È4[çäöt>3Ø7x÷ïO>÷›? iáˆ?šn E}ÑÑ¾Ä´Ž©‹W.cœ>­iú»#ï3ßçA³´X ¹ª¥boów¤qŒéáÏ«}ê“É-k1‰D¦!1‹kyÛÙOê¶LÃ[É&4Ôj®~ÒßjJ'²Ñ@S1WçÓ4Aw$•ç $Œ“D£´—kE¦FÊÃAÝ?ZmÂ- 7q·£aתù\C_®\r*?ߖÛO$ Åžj$ÃïÊÜr<rMÞàƒ%Ï|âÍ)QŸËƒæÄAY*c¬PáP¸žÒ±¦"ìV¨eQÕä²ðåG<?í¥uoÝåm"ûÌÌŒUòÐÐP±œG ¢d&T6 B¨ jÅLÿ”Z-óaîÏí–6— žØ3y,ÞÑ‘BÓŽ®a³ä; ÕX=ÖééÄ-*SÌqš½ßéãÌnF-¸*7Œ °õÊ1þm¾HŽËÄG…Åk{×þ û^DT@¾½SšíywôI®I}>ˆÔènîîfXª:š;V ¯ØÐÀ––5«f"{ËÅ52Å;¥7;âWA‡fÃÈÛhÔ’5B7ûèv›ž¨¤ÜÊée9µª'ò©–`_#nÈgœÆ’a§åXî³0Ùb¾[‹ÀÝm¾6Ü}Á00œFƒ 8 V"Ž(£Ö˜ÙåÕâë–é„e Mwç«yŸÃW-W‡CL͆y ,Q°ââg>£iF"•°i¶þBT‹24FÍ?ö1 Êlz£®Å‹©€5H€ñÞö^n“˜ jPa9c§;Z)• Qå@°:âõv‹ZU¾/Â\}¦–º#ùbeÌ‘Ó÷Vw”›;@Åà®A9.£™ÑöH;žäô=YNÂ%‡Â²æ´9!ßesê8ÎÙlË’ËüÊÇ3æÕ=Ppt$–Ri/ÓN_¡˜Ç*°³«ƒ/µØëÃ¥€jêW±jÃ޵Mg†¼aõÙï•ÿìè.Ÿÿænb¡øÒC”Œ«åš0A¯ÆŒûC>CcãYÇ-¹ÂJÃà‰Újló¿’Ú£¢jU±L (/ÓD5DMñ«ÒRÝŠ¹ŸŒ¬êÖ¦ûÓÍx’M¼pû{=ÉÈ êδ]¸h÷ëÌæÚôhij¤4õ²m_l­­sɦϲÓ\ɵ¨ÿ²ŸcßÞØ(¿Ø½ðØ×]ñŽkg¿Q\ê¹dû—º´ æ¬û¨7ÓƒvxÆF÷Ò–;Ù%†YåÌ$È8b“4.ô ‰žF Xh ‘hØuñ®jý¡)¶õÉÿÃ箹þ‰s\ù®éÑMTÜ}ɳ+C¼zjÁì[pü`8µbüÖ26 +ë8RkG-ÝÖék9Ö±†K\S+ëûÚ¯úæã-±µ$çbÿ‚o<íŽ7û²ÍµAâ>u1;ì±;#ZÓàâxn QqL Îr”ýöšó¶ý~/ÚùeÄôÍÛ¿ULªE;¿[Ò¯ýö¯ÜÕð]{ü)›¯¸ê|!lÍ9£ÖöúÝ2ììrL½qÛÿèvNõÕ£õš¨xeÆUõ¯Œ»Upfhý•#Cͪ#do".d:™mr·¢® ¦ƒ*„øàû2"ÚÕ¦[¥‡r½ƒ}ÍÞ&¾1ÙÑ]ÈfÎ~lÓ¶pǼÇwmö¶‘ytéPØÙœJä}öP)Sc©±ÏèË€îȪÅà_¯iØc åJ—§ç¨î@¦È°7š.æXTEd"$š_ç{%KN0"jð:G:—ïvMÍåftnüö’OÂN"KÖ˜¶ãLVGŠÕYšÃë,M ”LtX«k<•ÍÕóEüÖU~²} ͺ<»Òm÷Þ±ËDðeVSÑc<ËÊ•}¬íjjnYÖXáñ{;œmˆNÍ;Õ2 '£|®:UÍÒ.¾w4œO oÅ 1/Ÿ&Ã¥–*&Û[[™…Éçò,ƒƒZÂz£ •‹KôE±MXì}#} &rß!QÏÊ£NTâËòËù†c“ŠôeW®íšî³ÐÀ&G¦~2º8ªÚ…îµ~»¿w¸×Cüe»‡~r,dÉÀF¡;¦ËiÂvÙ»P¿º\Hi>¿“®ÆÑ”}Ê… 5_¹`Kj PåòÌI«Xr¤ZÇ`ƒjš{AÙøf{—ÝáTŽfGt ÓפE":3®EÃco“«9QJ Gc&5•‰vã2K4Nôup«R"Ê[¡ÓÞÙâmAqÌä3xMºŒKúGúÁfLèAÞ˜/‰¾ÔïÔ½N‡;—/VG˜¥‰¶9$Ãi3Á/÷ùü=\LéNÝJà—M³2ÛÍ—þd1O»fÉñ]ÍR.l´êÍÉ|’IwзÉ×Ä „^0ƒÀOwTGàŽPFi¾lËÍŠΓIâ\?Z25e=š/_-­é%¾u“­ ’rE>}U祯iìr­·´2b ã¬7Å?•§.UI•¥D#ŽÍ  $[þˆN׿5éÞÉ×ô']\~·E³ &:;;–ö.omnV|4“ñ®|Õ‹½|r—Î:±­xô˜«,8Çy2à#­]ñÚ–—úä¥ï;wÇûnyí„‹¿õ£«?äŠCž$P€EâKb©sL)ä´£Îwuœ…,´X5[®Týì½”ÜÔ½?®ÑH£izÙÙÙ¾öºÑKè½ÓKÓ’Óq¡˜^Z yôZz1°Á`ªwïzëìô¦Ñ¨Œ43ÿÏ•ÖÆàÁïÀù'ç=FººººººßþùfΪŒL&‚Ÿd$Ê_ôôqWôÐÕ NûÃÔ]X½úrù’ÊH˜òxw¿¼.l÷« åwoîz˾oEümÃÅT77)®e€»P±­"æ(ÚY¨Ë€;FR¿:ÒèÖa•ªØ sœX`2È!]Œî*BàƒÒ-<{ ùñ.¾¤”4òÇÍ?4Œ dÛA@-Û`Œ (B­8 L·Äûü»UŽKÑ# Å#í°bnÚ0ì0Ç¡ÿ!pÀâ‰rö¢ð@Læ€èßLç1 T†­)¼˜í¬”}¼YTbbr‘’Víäš¼}òбœr$=ìØbÞzlß,m>1,"Dë1¶TFP;2á@h4{Œ¼xr’ÚŒ<¯¤v¼!âèc*zÆ®´J}m‹ç@óê±-ž ܨù|¤HæéÅÆ„6ˆnh ø±³ ÏLq<8ž w'=fÊ‹Vk74—´œÅ½‡- ÍïË:Ž[•GsôÍ ˜t,†Êj“~TyØÐd1»º«=¤k7¶lÃë%ÔÒ,ÿC7ÿ¡EÍr†7)¿é½Ho8õUe˜ñë%ù!FÛ¡®!9ŸlªÙÎk^Ö‘N¸Vݽ梓ƒƒçϼü¬ðÿàŸúíaFf¾%Ÿ=s»[ïæ’¹G}\ÌnûøÀówyøÎO=sÛçš}“ ºA¡å.h A›Mã<^œGÀÙ€ A ‰ºÍ‰.ºDMJzÌšòÌ}ËÏ%P+¡zÀã2§lT×'Ð`À]’‡$‹Éô ËzgÓ6ÁÊÞ `$gPè« Í¢ûa¢[—Žm‰îX;¿qœü­2`¼p‚µ“„ ðnš úŽfd¬¸<÷°eMjÃbm7î±gÜx–Ü×°Ñ0<Q„M!]d–Dw-+Â0Fÿ ß•ü ÒècòZqé>"Zƒ‚¼5ÒZ³64gL’waÙØFrvÃì`µø'kå~n¼ÁõE;Hb1ÛøU+ñ\XH;I¬>iüÝþ¸Þ'õ£ò±»uóö›lHK¬¥ˆdIª+ö°ItÕ^W9›Î范­†• îN¬X‰¦ÿç]~Vl½žŸ÷þ×ÕŽ¬ãÐUŠ#áìc®V®ó’íþs´\½Ë‚É¡­›l]rÊæ”ü›ù·)ÄåLRܯíTiîüãÙ²pùS×2lnPïŸâ4¼zÉÖæî)é Ò¶ÔÛ¨UK°ÃP×Ä¢£Ž„ŸÆêÄgŽ: 8Q>¢<²Tóþ’”GÞ7–èv ÄJ>}°Œd§NqHgÐNX\ ò!«Í¥Ó.ÝV¥'x· èÍn5ÜdkÍFš™âßVP‚‚´ »äu?t¼| «¿ Í*Õ¸|þøÍÚ;JÕÑTfäÑ݆%Nï±ãÕrhx "çª7½{|H°5yë²S¼`á$|ð‹‡ÄfE½Ñ(SŠÇ¸Ifybõæ±) %žTâ<¦ã¡Hךò lHÇÏmx˜³_žÌQÕ+ßÙµ]@ƒ¯7+LoHà`ShZ˜À¹/mÉR΂^ÜeÂÎjÍH+YH¨Äw†PJø©BöÅ´o’o’’¢*@@W -’Ùœå1)X1O‰c> JÑÙrÍ®Ôì8BÂÒ¶ 2+H&Ž j‚e:Ö\7K“\aNAådE¨7´mÖ>d)Ð ThФà¯4lÐOp„°‘H˜:x&ÈÖXë¶*âS­•o(WWCÙ¸ÂkŒ­Î†‚•¡ˆÔþWsR›ÁØpXÓI?èèâcrØ¢ñ\X:s’5»Ì¼Öºé@zuÓÕlŒõ€pB%€² 踺…l7®êÜXùÿ¸óµÛmzëïÞÇ«ÀUÄ™ÉÔšŒýÅAóM}m Úb¶€ •M× •“g7ß,a›Ì!ÆåG¬›öÝÙ·´qÝ@}74€üX«õ0þ±þˆÆ˜œâÿØá_+`Ö¿éMqkë¾Övãhk›u -:Ã+ÀlÕ\E—`-†V+žNgÒ†ÂdðŸ½üŸüS¿¿uø¾Gôî¼¶9e3î Õ‰ìöš.ûCzY-:ÁTR°ÃÖÁ4²j ™Tô B6ÑhJ!i”qGRR*äÁg½40Y˜„h9è-üh§%8Ñp6ÄPüÅUðts¾œOhïÂÀe—0);Ü…jÑ Râ/ô8ØBÌÅŠ6$“'&;!*hèë`¼F8$T™’a¡Qô ,:€o„g‡)´%*Ê£$f\Z¢T ¦+8ל°àxY‘¡Ý2TLਢ–ôzàgÚ—Q­î))©T 0vft¦=ОHÆ;]íe¾-6™RY£ÂÒ£!OÃs$W*@œ‹“£ãW¥>Û²câðHo±.Ƽ-€ô‚=(VЛ³žFÛ.hÔêÑ8ïqöÀ!s¹¶|×È®#™8òÀ‘Äl?Zÿ•D‰'‡ÅÀœ+Ñ'¤7,‰3%¤I<ç& æ)²T8h¿‰î)QÍ-t×è7RÖ’¬Rc]„c4lL 1¯ÿžÍX»ðjÆî ÑɆyÉ]ÉL5&L™«%UÐ<sŠ7·äebºÿƲ©d\,÷] Ú‰6|}kª ‰0ŠçŃ ç1 ™c‰Ôƒé4 jÒ:@Ò'¼G7´ÿ_îcµÛ¯Ãnj&¾ê"r7²F)÷Õ)r#•üÁ‹ÉýàÒ¤‡¿Y¿)šoZïbl ÛǦ羾ÿͪpvëÃ×Ë~ç¿o©ä;Ëš,Ñ÷œý×S0•üëÁï9bY@¾§À7N‘Ücã=†—K†Í†-vГXñŒdxc9è˜kކЩªÑpB ðU=׉%3y·Ç³^ï½uÅÑ$._MV?ñög•€ÿÿÄo >#¬n´:\£Cs¶˜ÙZßÑSg(šÎÙ]µr…êhÙ¶8G~ÎÑH³´fçBÃò0(%DSjÒo÷³N¶\)Ã#.^ηØÞà°Hœ«šQàuÉ4;›)×àtœâuH^ú*½A¤c¹dW{gÿp¿ŸL 0¨&Œu+ø6ÀišR‰–•¡<¦dÏÁg׉{Ø^„oªÝÌ/´œèQAð1É$ ™àØJjBTÁæã:áD‚ Éf'ŸQ×ÐTª­5R.U±²6°}ÏŽù‘,¹eJAt ‚ •ïŠu”ÕòPa¨'2m6—‡^eÃEÊÂô«Bþ¡/‰U;#ŒäÍmÔ‘‰sáO YÑãëÉ—²LD~{ìarq³$’´Š|ÿè“l¿A’ÇU*ÑÆCýhòÝ40A6)O(yJvTa'úبAƒ7É™>Lùõ“ó¬Å©(¤Ÿ7,&‰ýî •´ ./&É!z7Óp`^K.´VÜ2¡5ëñ HóäÜ&[RÏ×ÒH#±–‡'·l²-© *5Áé@ô±¡ ¢&¶DÛ0<³T^¢dH¥@‡ãI^P§¨V%ÄíãÊáR)fiµ¦m~¸3~‘ûØ%Ø?þPð²&æúWZ&|‹´ØkiQ_ kE²*QE/ÔU 4¥ôQà×èP]ÓfÕ\ႤÁ#×ÚúÄó@/ŠEº*ˆÆÐ”"jëm¶´ƒB’ò\Òë^•2 ™ÛE·íïí¥Ñ^»Ãçõ—R³ëŽ0çäòj¡¿Ðï¦Ýu½–Íe:¬Çß´óà†/‚|\ÀÍÅÖtŠ„z ãü.„¨_4Y¯í÷Ðk½óÙ d ÿ‡/ÿ'ÿÄ/ˆH#²ÙÕ¾Æ5G"èä6’mJnô 2+Äj0Òõœ±næ»ûÞ5õ¹²jl5q§/Ö®htöz‘£4„x¼NåÇ&¯‡±;òµÄz@)|Yþ²Ûß)+R8fáFUjÀË Qª@­ë³¯r:Ü.Ú—­ yl¨ë`Õaüy¿ƒ0 |®5„®‚‚ö30eg¯ÀH—R } *^—GG*:â–BȺδ5g e€©¤Îáäƒ&\ è+ÜŽ¢&¡v8ŠnÒ‰” ªAïmQ_±ÀD[§Tp'ˆá©ÁdkIÎhFƒì™´}K“b‘¹–]눹;kÑfÐi†D¤A¸„Þ¢¤öióZlðÖa…$j½3óu€ 3„µàø†±awïuW~L´Üp”¼þ m{Á8õÕS ÇÇTô¦¼‹I”8ÛTʪ(¸9 mbÝÓlyj믵k·Iy‹¨o| 9³%x5éǪ”a<»Óû&icAëJ<²u„ü5ßvлÖÖ1æt†2h!ÙšåÿµåÖû€0‡÷5VÒ*ÿý[0"ÛóCv6¨Ä­6[[Òð±QgÖ¶al&‘¸†³üX•u†‰£÷ƒ`‡|P›Ü-ÄwbÎH¦¦jg{£æ$ -ƒ Úij¥ÐJµÉê+»iÏ(Ï…Öß³ôtâ¯äg^~Vô¦}ñ3?ÇÿŽê1”tsÊJ‹®Ó³çí£`r¶q³O¥¯^p¤ƒs!W¨_\}’Ð\Fü‰qï+*Š”Ì%ÜnONÌ6ñhƒ·hÙ,£eO~ª=%§^'܃«,¦çð…øfPÍ|YÊ#GåŽzcPáVµ æí‚ü-—½º $ æh–»ÖMœ ˜ÆØ)ÌøøQpU3fPNEÙôA˜ñïeÒ§ªE¥ˆ++ò)V]ˆTåY…°i6™ê¿k±.1·Dd[¿ëîèv³§ÆºÎ¼7öÍcßu‡o?nv§Õ©?hkÝå›[|>¸»õY;¶¸ë&ö-û›µ±ñß_þ_Ïn¬áçØù據ü=ÿŸÚðµ‘a= U†ÁÆ¿Ø!‘r ½ÐÇX£ÎÚâËŽGÐm–GðF#-º`Ó!ùÿÔ®ÿß/úw”€‘`ó2œhŸîÙ6pÁ7°çá8º ñõ@7Åd CëˆU¾X,( Ŭ¿¸ ÒdA«¯Q’Äñ8€h~)"&Pë6FyX46Ž­ÕëB„Æ£ÜÙ‚†YÇ¿¹¦Î<ê· –¼yÂV¿qHÍ•a>âÚî{ŒÝ•ÔGº"¾ÑL¿ßË€¿ÓhŸ34ÚÐB0k‚ðRâh“ª;|Éã©¶õVM.´QbJ©ÝH+ž ˆA·á‰û¼¬¬õF—Ð4XY-wÐΩ¹ = õuœÊûýQð‰ÍmLIe´Fo£:©³]éÕ6¸iü`.×܉ÔÖñmØ-Tð7‚Ózx¨˜‘Û¨Óät­ìUÆ7o»&9ävGôš¡W+·³RŠw7ÅJ…¬ª×ѶTI.× 'g;aò¥œº=T¹‰Jlõ{ÅAÓWVÆé‡dŒÈ‚,ü™);²¦ \ˇ@?€‹<'2Ž鮀(W½¼Ïïv¯I¾U¸k$»ªU•+@_b‘ yÞÝBK®’,Sý-íYDlB”U;!B±TŽÐ )#M9\@ÔJq°&ùH3ì@–‚Aq°™n xB¥R„ Îe0?¸÷€Ì×CFÍ[¤@®R¦]‚vYÔS,[ÕKIª:ÔémI‰£ÕqFÕÑ=Z®U¼œ—å?<€¼Ë.›-à÷ŽÛ-P–U‘û•òH ?QÈeÝ! _ÊÚ½ì¢ õ •5(X¬ájå.UÃÓHj!t)ÀúWÞ„âÉ$~Ú.šXÔ†ì¤Y˜çi,—é=Â|™ŠÇœþ~u•—kvWAEê˔߇=ÎrA{O©¦³N¹t¤ŒôGT¢ÍÉUã^t>|Å °)ˆ#bÂPgló.ñò^¤ô;ý3B®_°M@ã´@ ¤z,ÆØf$¸…Åacoö· 34Ó’4¶¨¿#0RŽòÍ#ò(ÀL‚¾p©\ô¹lƤú¶Ó°¦ 8‚Óf E° °Fù„’®<@ªÊ9)¿É'‘D˜ˆ0:§.Q¶çËk#ÐÆF&7a6œ€3`ê¼V­yi87Xèá¡2ÈÀ\Œ¢ Ó!êZjEÐéw«<>R•]¬GÐÃaº-QÏ£yªÈ³äd†ÅɈe#\K?NË©°Ï•û&¤PfGD5Z>X§Ý-BEE¹¼ŽPºš RH¿€EPâÔÐÜÏ&Ro %"%¸  ?¿ü¿NÄø@BoPJWTÐ]ì` â‡S ‚Ö%AÉ`2Ùe¤áÒ¤¾$8æ@È ŠÅPYPnP_”õÅå@õ F=XPØL|K!r÷F%é4#¸ïèè(šñÔåüÔýoþ7Útõ߯Nr×/þuAÌc¢Tõʤ¦nY)^gäš«jÎ{Û`Ä~é+[ .G$¸û¹Ãi½pÇ›»T•Q½šB"1×JTIr€ççá#Èò/­ ļ)*Ÿ¬äˆÔlR‡xHôy§3v·LM±ýÙUb£ »ª-MÑÞU Jéêö¤?ºëÓvÝ]Ÿï–п´S²‹Ç4¯ögÖ·w´hTiP*J¹ÓÙ>˜ìíljì„_»p‡b©ìv„Ó)$ôû#½©õš.…šþ¦vt2¦UfsO#ÍÛòân§ ’ k*²v™qÊí^¿Bz¥ùZ¢»£-¥¥y/ £¢Vã]ÀidäÔ¬…Û«ŠôYú£ÍÃS’ÙÌ8ß”$b ⃇ù'È{ÔJ) x¶°õŒ{¦qU§Ã@d*f¢˜…Hmé|noFKCþB-ðy` gçlËÅ¥=Mã*uå³Ò§U¹¶Ð΀ÉàX?cçcMmi BR:æ2 a-#]¥Mlß‚ç‚ëÄxÀÑÞiQb\ãˆC2ILÉJ·æ}À2 _$4á>„Þ› ._ªšv—¼19Úì¼ùõ}"WÔ î¡,R"˜8¨ˆh듪ì.×`÷ß„­µ8DœÝä€ð"ùzíw±¼\'>ùrÐ…:×µˆ=2¢Ž$÷- –Ü1á®­™-â7ZØÐØ5Z® Sïî&F dÝPÊî@¾LÍ5ôì¦ ¹-”Ï6XÝxZ@æ>ÌãhR\!`fáyeU%ÏË@Rg|v?ð§l4^Ì"HÉíq¶ðîõ•UÀŒaY˜®aÁĉ©øþ¤¯«f£J@bÉ3OS©""aû„®N¸ãÙhhÉ2%(O‰Ê‹:À“]ÉÊ(+Ø‹JN” SÚ{’ZcÛÅrR‰ë MBp8R$­kÖ401¯;„6­6–CwÑÚMP#€.k¥ˆ»ÉÎØK â×I&O\”Ãt…i;ƒ· Øø”’hknKidQôðn@5‘$H" ÓGÖ[0É!O"44š à ƒø0Bµû<˜ÎªƒÐ*xST:ý‘\%­éúJ¥Úa B»”ËfnywêÒì'P—DB‚\@ËYÆE² ›-®Œ€V Õ×õ´’ 6g¾ŠœÕ¨c?$Ä€;-Õõt¶„I. ¡H+B}ΰÀ²F©%Ø™º+[ÐÛ1ÅA'^„8«›šsìà¯W²Î¤†¼\k Ý¨ÚB«Z6ŸÐ08MÕÏ{m3˜Wòud!TD*_e`Å ¿“R­”¡Rã#ãܼàôÁF+Ã(æ÷yŠT¦Ák)ª?ìtÃAd¾ àÜœ=a ãþ!Ÿwµ¼²-Ôš*åC„‡ðr(IŠÏ…ôNŽu—k2˜ ?3\ ˜bña0Ú¸žæ³÷úãûœ}áYs1(7)ÈHý]6ýÆÿ]Á"Á`c”Ò"¨hh!Æv@ŒAJ±Z’‰\‚q†ø‹2¹\PàÆY´®µ'd_‹ƒÈHÆ=žGµØGÍ8‹}‹Hã’¦&$!d·h! t)ë/vþu‘Ñ£sÁ>}éU†.7Û©JuµLŠ6Ms°¼lQžØ1aÎÓ{ûBÕKÿ44,K6Ø¢àPRò šÛx ¶CøP\Bu@úø÷óÛ³ê’Sžn?%Ð⇬U;æÑ.¨UW†yo;0KJe»àlêô´ý÷‘«íÊvÁÍF²§ %d ܾ¿ZäLÓA¸$ìã ÕOy'G72ÒhZÂÜZãmn°2ľFæ ykLÇ07 Áî“Ëz`cÈCy‘JÂL))ˆ†êïºÛ°0b­Ò/ "ÉRÈuFzªz%//»óÝ)µZªŠ,WP4ð­uFü˜”ÞfÏÓÌÒPC®*‹Ú„èæH>²ràó=º÷•s9*•£FYq®Dt–Ê©¬ê*{8Ö+°_ ØÆ‡W¥—98;¸¥ž¦žþÊ`Dˆ±”g…Ø NHh¾XZª$ÄÞÍ<ÀÛ^“\:¥iœØÈÁ¿«XÉ6ìÈ\­\ðöNB 0ªeñþÊöRµpyK|[ eUrE‹;6¢Œ\LjI"¨Å㣓º¦ôIý¹2àš6(1‰Â"BëÁæÃ¤WrS%`{ I*Æ»#mH44.°Mž¢3 ø_¢ÁõŽúb[ß$$ÏçâÌ% ‚%›‡‚‹âcÍþ ]ÿtô%Bç(¹„leL.‚ |6Ê­ª!¦ä|‹îy¶iM*nwøGÔRVÖÜt,õ |ŒnÆ@ÂL†¾µCkðº Aç0uaµÂÐ!OË:2œ q¡XSàqÕ:Ùz;Wm¸:ñmݹÿ„‹dY—‡` Ø\šDî»—ŒÌk\hUcË7r™L*?zê+ã AimjÍ6(ñ#iÞ¡XA]S¦†cHõE±Ñïs2°ž¥JÄ®¡Ü$T{'&DK6ɤ{W1 pšœRx•JzÔ("0 ÎÛ‡ŒÞayɽK/xaå½xxfN",ÈúòoG€A>Aù ¿¢g-ZˆH¨ØBçl ¯Àˆ·H©E€-b RŠ¿jA>‘»ÄU%“I‹f¡G"(F8 Ùw£k¥…Æå ¬8…”´„`ì`ÍÆD[Á8EŽ~ÛbR„T(íúg.-Q¥Yo ½ÔÔ{þ;“5`—)_+»*þê.‰có¶S§t®´e ɵ'%¥›zP×ò“ÛÛtN4zËŒ´n„ÎÑjj Þ{ÕSû¨ÎÌïžïHÕy:wù‡÷Jƒs>=ˆmöäà*u ç‚cF„\”b:åX›°¶ôYÙ(`bŸóÒ%DÁ4èT£0”>ë¥](F@N´8ÚŒÚ ¸ÿœûê}^î µ©•œ Øoè¯éâh¢8œ¯¥o:æ%Šg¯çð‘tÁAZ3øÆUY]RüÐÁØÂ~Ä)åÁ\£ 7‹I”‹1ÝîiFq»7 úÌÁk/Œ™ h$4U©ÓŠ4wvF£] wõæ£ä«é»ÏþÈéµß|ÄÚKÍ=ê­‘l¿Ti ¢Ü¡·[!-GÔt"ã8*(SÕ='í¬4¤å•eAÇÆWû²«Vd— YÇsÛo¬ZÏÉUZDضÓIg‹IåFö_USÜ.’ÛT ¹dTDYqÒ¾H¸=ÌV3á !/ïR†Ö.sÂÖÞÍÑQ¬gÒF¼L•óFÅÂ-p59¥JCÝ ¥XFë V.·Óì ˆ€(“a‡¥Ù^‡¼á ÍÅŒq(QËõ"HESsøËÔœ°¶¤¦gí?YЯžþLss›A*f!Gê\m¤ÜW©çòT*â #¹räÁ N–¨Uö:@= ÆbÀ®Ü ™­].€ ØQ@(³çæ€sÖ€%B’`¢Ý-á–¨3çŸÃ³2³ÜÎ"ÅïЭÏãitá»`J“S^[C 8_´”’â@†»C6ëöuCÄÿ ÿÓ¨EéX04\-Õ‹iuЃû@ ¥ ¹&´ÈÔF” 奴ƒ±÷¥ûz<Š•²vŒwNqÛCc)ex:ØÞ.÷:xxñÔ†Ó룡àyoleG¦eØ>Ϭ}ßJæ2‘ ž÷ýÐár늪"¡A¥bB ©>Ÿ§,çg/žŽüK#ñÔö.á+·ÛËz¹JÛ c€o!ª 0/È`!X‹9¼ ½¢Sr&›©ŠÜÀ«1tXðzžµ¹> «V*ÐÄ@ ùp€éÒkL©®)q$;¢Ð·Ã¡‘&88.o‡Dwx¢rK8€´ì\Û?”Ð 1® É;<¶P8Ü’¯—¡ÚÑj*DZN«ËÐÉ+pݬ©&” ¼IМ  ¥’­!R¥žÎ‰p£HªeÕVS>ª ¬U¢<Ü;Z¦²ÞÎiÉr{JEçüþÀå´Âøqmu¿ ëR[¬éœ= Žó¯I¯Eª«ÖžX[(’”Ö¨_Ô¿²°¼‰?P]+U2;Î]½X’Zì@„®bx{'xi7@h"^$ÏTlöªÃVƒé ‚ø­‹§Ó51ýB¯QÍn2«ÛüŸtìßÑlõßððp{{;™ ",¨2ˆ(ö!›Zä´ß ¨_!òBPÆqË0 Zk‰¶VU ß ™XPÄÕ²þb”¢³eîEIëÖ%ï‹2¸)®ýF‹}c'NÕ<”Ýgã±$Bœ½î `º‚ã@I漕ÁÔ*³ço_ ûÔÉÛÍôµðt±îÍ:¼ts•¶µwëÀ ÔØ@NDl@‡IÝæ´óÞr•7lŸµ.Š8ca#V-S.0‘4Ë»Óñ’¯Þ‚'â[mýÒÒðÖô]Oazân§µ§w ùüpTªÓì`± Q'ØêÎåÕ¶®àCñ&%ûmÓ‚‚šœ\ òaO€ì5F—4¥]AO,•ȇ½Me©Äûí•z*%öÝúù)P ݶó—ˆÉìiž¸,ù…ŸòµÅb#‰·×•<í(Ôs“ZÇ+ªxÎS{Ü{òÛe/|ƪ´ìö1Í…²¶ulûÕ ¤‚—vm\WkßÀj@H"×<µ†©¶"¥äx¦ ŒkÕóÉÎð¸Ô8è¬8= ›(VµÊä0ÖjÑFƒOró1Z"‘È%OO½h¿º8ÔZEw2œ¡4xK;¼™l.ćá@Œ@êTšÀO4d­Á6–èŸuñí¬\áܶœ3®‰ B‰Œ‘ ëK$‹{Ôݶ¬\’ ’¾È[Ó¯3˜V À͇ZÏíâUU&Nk4&j0sU Ò­F&¾æ¾OɼiÇ¿lÛ³/µm€vÖ §> ¯Î /Ÿ¯twu ¬ Gl² ¸b†D…ÑͰ)Â|IÒ e«°ç±‚Ç•/§}~OJͦÕüd÷NTYW+¹RÜòõæMB¬9^Ú»¤ÛÝÊÖ$n¨°äìC˜VC­±6^Af;‚¾àæ4 ðí"žîXqWâL|SÑ+0ëjôư7*Px°çAît)´TèßI É'iSu *q•n÷w9Ünø\øö~!7ð’›j£«>xª›>´¬i+œVªÎÀY>4¡µs0¾™2Ë”498ɨ©År¨,%øa¸ÂrC‚Õ¿¬eyD0ö¶rÛhe´ËÓƒ|ÌîÐ4:Þ;±,VâP‡ºÀb[¦.ß%ºëPj&êþòû!Š©#_."œ7`Vò¸}ÈÀÈ1.Ì¢XVë`)·Ë‹tH#_(»‡e–w$òë]¬4ëcÐ+çîþXµU‡·«(&œ gU¨ 5äÁ 9b'®xàDáùXl#ѶB¥’—$X4á a·qY“kÛŒß퓾õª½ ý3x†’ToŒ÷¢Âúáõ#=¥c›\.<—ˆ½aꎦpk1-–•(Û~Ö‹ÿòEú‹ÍC›æ®˜¿ÕÌ»n£¢[µªÃ‘³›ÓÁ;X§¦B8·C6g*â-H^2üÍÿX,hb ¾LéH[܃B¥X›X³•:¤ÌZ0Ô‘Íëþ@\ûnßP¢/Êû×ËËœ>ýæw¦ß¾ß[`f¿}øuÓçÍzbÚÌ=æÍ}uÚ}g|8:28.4Nu:ˆI±NØ4¯Ó§ë¬œlå[$TRÕ-š·Y\ÙÁ·É2Àˆ|’"möL# Y"BGÕº Œ!øˆ^nÔšƒíyM,syÑ_'ôü*’ñåç7#2½G‹î˜Ýö“mþí$`P;KT]³f >l<(Ž Õ>!ìßsÏ=»ï¾;Žï¶Ûnñxê5P_‡Èûàƒî°ÃR·Ûn»åË—ã -®… ùµ×^Ûe—]pÕž{îùá‡B…àõ}òÉ''NœˆSÛo¿ýÂ… q.H’ËÍŸ?¿«« šð<ð­·Þ2¿U"ˆc±¤jkÿ[ž¸ªÂÂ¥ˆ%‘èÛXju壛>;æª7§ÚÚµk^=ã‘UpÃJ]¦ÎÞåîÇ?˜{ï¢+ïþ`¦=&\öÞ¯g½v`!S¿èÞC¼Ñ¦«ìÝÛ.yçà/òËîXtà /ì!zΛ¿ï#‹çÜú×3.{á°„´œvÉ7Í;vEߊ?|ü-oìÞ¼výÓû>±ô›ÿ|>ÜwPO,~èæÏ¼ôõãs©Ë^úe@˜ÂÉã®ûï#ƒJ¥8åTuμ͓J!阽à€$•ïÓ5Ê5οUE2$ðü…¬ËQ»äõV$Þ¿òmª¶‚Ã]ëϯF²Õ[?&Ô÷úãÿæpÕš`ä|‡ ?ÍíJ¬w:1vÃ×Ôì?ÏŸóÂ7ùB:¿üïGÌ}âXr5½sߨ³‡r>dBýbàK$Ž€¶S#(] ðòn¸´¨UŸC¹ìÍ_®K­uQ\÷ønžaÜŽt•*ë´ ŒFâM§sHœí±ž(ö½M®ë_<檦]»ð°²*Î~çÀ‹ïÝr¹ r `2ìæÚ32\«4ЛN—Gð)ˆ/-R\æú…Gžÿêž©b¦?·R4PFd<ôYN+Èù`ÔwñëûÀ»üÜœy»Cú„´À‡“µ9÷VG–8•h‰5_ûÒ.'oF"µeç.«梷ާ.©}á_¾ìÝ"“™3ïïŽÄÜ™Ì œ]à°MùL·j“ ’!ƒ´?€[5²Á€¾¦ÊI|Ÿù̶F£~ïâéJ£œãkJ+ûò60•síŽÅÇÚhã”['c4ôŒÓm¾N¨  Â)gʈëç„Æˆ>©øÙ¸€lú$ÃlG¨+†9Xøá ¶Ð(àò †qn@êG&% ˆ 4¥Ž;j65AÅ‘) E5süz€„úàôççx¤&§©1ÂŒºn:FÁ…›8xÓpŽ´5>ˆ$9¼C¸öíiñüÐ`iT­y%Íûl )î¹@ÜîYzÎ-ïœ|ó+' “o\8ݨjJUöònL íÎö´˜†Õóº×÷–\\M8aÿµ©uœƒ.ÂNRp9Ý évûùóvr:™r*àöé†&I•@ù}~ÃÃ5“µ’U…ûÂHy0Ü­Z«®;÷¥‰>¿@4è µ]dLj»yDL$¨¼A5ÀˆœÁ›‚UžwPA‰Jy˜•+©¥’\ÄÔå÷ AXJ6NQï÷-ñSð¹#½ qC±DIaW´hˆôPãõÛ>> ˜¬OT×;š Õ‘-­«sò„–­þñ /—¤÷@§Q©ò\‹h/¿þâÚyy‡Û^¬f$£X³ 3ÆÝí%Žù„[ôé4« ìÎZÄáƒí¼]‚¢oñªŒ  †J ¢µ’¡.+÷}™Xê uoe\W¤ÁBÇGÍxs?©‘-DN£kö{oB×Ö·œðÉh¼à°yÊšòy|€¦}3žß}ÎSyÉ”ªZ§”åÒ%óvkóF?K~ãCe}I%”•H¬¥6à…gÛ¦iŒ¤­ÌHÙjúèôªx/™JOpëK'>ÿéâ¯Lè?zù·#Àã@ê¾øâ‹éÓ§ƒš‚Bú´ìÁ=ôЬY³9ä(ЧM›ö‹_üÔ塯¾ë®».»ì²£>å=öX[xN!?%„¡‡~øøã?ãŒ3nè°ÃÃ)s°3˜AÎO9å”Ûn»mݺu'tê\¿~=´Üx£ ÙGqÄ9çœóÉ'Ÿì´ÓN¨M²ˆ}&“±8ƒo}ñ<¾=µä„|ƒ—Fxi[q‰W‹RKò‹O=áò3ž“ϳŽzÚ«›³ã{ˆwƒß–æk;~‡'/Þo±³ÝrÉ»ËGÖœyÈãe¿ˆ‰›o#y§T–_Nň+½€ztù 0‚—m ¹‰GKõÉÀsT—i-ŒQÈ+FJñ•áixªe³q˜Â9Cïq¶àf0㇠NŠ4¯í_ruÇ’ƒJQäòô#³öH£Ð펣R)‡Ê{š!vRkJ ªö¤<|~©{=¥•} vð÷ONð¸ÙåÙåb= ƒ ØÙX38ñtVÌ–}Ò¸mnÞÿÝr%ÑÒᘱû P¿Ã©¸põ5SçÓ¬èÂtO‰)Ý[9X_ œ¾ÍÖ5[&I­ö@7[-ù¼n“›aÏÿ`ê›} ‹®bIÈ–øBÅ)i¬†€"ˆ‰ˆW\Àô?[öqÏø.Ü´– š®ç´‡ÇÙ[ZÍ>zñŠh¤sd°âbZÛ¢“N¾<°P”kŒÇp}Â|JDgxfÚê^:2Ué®#^¿dþ^^gØåޏ>]ar‘òÅÊe<Ž ÞïÛûI“;šºY[#®UÉw‘üñO®‘‘àÓ.,Z÷ž‹vh 9èöE<±ªbô§‘8Ù èÍ{½ !¬=ÔE#B£n@lW0¢aů”Kk¯6DH²°©§³)wØ7%:ùÚ©ï6m$50kï¯ÙãY¤V‘­Î(•”²Ëh¸Œ:KtpÔ‘\d•§®û·Øj;pW¾¹GkKÓÙOP¥¼[Ç{„ %¶CÛ–yjuŠú¼L­ªÑëìˆ7B%Õa¤ynëîí)|³CÊû·,ûå—V¨÷‡Ç³³Þ™–SןôtG\[uÁ[[¶AéR/®Í/ñs6ÚUέȩƒ³ßÛýºO­³ZÝmœ;ç«^žZcá%FŸÿÒæèDÉÙja‚]«£È|²Ï„΂ñYCHhÒ3ƒ¬Ñ,¼=¨°ÖðýiêûE‰•‡Æ:Ë6m§íwžØÕZÔFúKKúÒE¼ FàES¯w·Ø¯Úé9Vö"wÌ’u”ˆðƒnQ®ïк*W¾ó˜å7ºd4SòqÍñ!Žg‘KtPìíÑìe™­ºÒU*jåª,ùßjDÝËæèªìsÕ¼=Ö8ç½m+žÏf½¿™Ñ߈tg÷øòo÷j}ôÑm·Ý6‹šB굃,‹Dt3gΉÅG.¼ð½öÚ %!‚Àßê¾ûî›:uêW\Ù§>øà‹/¾¸­­ ¤úÎ;ï<á„Î<óLÔ|饗‚¬žvÚi؇æùÞ{ïmÆ‘žžÀUsæÌ±¤Ûßÿþ÷àP äã›o¾þüç?CžÆë†zó{^:à!0šUô"‚8àÝ`s…”÷•.×à nºÿ­kôzýºG¦·z®ymwÆOœ"z ëŸZ~‚ŠRæÒWöô·Õïÿàä^ÿè‡âMMÛÿרDh# lª{48çU­\àNÂy%¥?†\„ÞT` ¬Q3޹è÷[ý_ü™ݰ¦¿oÎ èj¥”xøÄORÞPrQ¸¶êIEÊ Çéï§=1㌿¥ìðWD޶md‡ÑT6Ÿ/;YáºÝÿZ‘³×ìuïäöf(p\3÷~%èëjTýš‘@#Õjq²¯û“ÄGpç¾æ£j†túû¹8.à û¹æo ¸8?Ç9÷Ìtèp/ýÅ«b¾Üjò@½ë£ãùu—¿³[4âÿ¸ÿ=Ú×  –AÀâÅ"¤yµ¨¯ 7±E9=s¯GÝM‘Ë÷yþÖÏ~-±É)ÉEe äé6 "²+u¦Ô°eo|åÔ =ã¡Þ¸ü€GYžyàïõèM§¾ý0¶ÎxdëImSÖ$×z8¯Ç…ÄìP¦÷)N3Š~»ã–Cu{™ÉîIÅBiηûÉfàùT@¯êµÂ%/î(Jà Jæ<ç=g~’/ÀIÕñÈÑÅF¾¢Š˜=y7òûx?[¨$ûFW¡g<Œâ›Î}bŸËVR£_uf¤±þ¢ç§ÝóÁÌûßÙä‘/oÂ-Ò©„£æB¢*fMç#LšfÈ2n@GI7Ä&hp2—쌵çʹë÷µ­£ýìþÁù˜`“g0¹þúŽ=ó¡½à–|ÇÇû}î¾õM®(ðAÔšÑ)t’K/=ãé}ç¼wÒ™ÏPÖrS®ôÃmš&¨ q#ÔOE`Ó$ÑÕ Á`nt§×¹*š¸¾¼& wg×ô4ç·ù#ð§2êHïè °VE­>÷½~óØæñ ¬N¦¥75úðé}  ‰mðN–jT®N¢wˆÖaÍèò’‘-V³ ·£^U(‰uÑ,jó‡8»®ÙR5>-;‡Ä%L8ïjÕú‹Ÿ«®<¾âÇWßB'@q*ýÉUè­æVÿ‡Î‹ñÁ?üçlÈÕb“ËSÒ2r­ÀAäèq—wwðˇ–Ýxô«7œò\«ÊºrÛ_@Õm¾JÚ”Y&eÃVNxh¯²-[ª%Dç|Èè$è:Å+U»ÝÅ_óÞ7}´¯Á-¹ÖtÞõÁiw}x1•PôŽ÷®šýò);âÝšn º[hƒG\ÑY¬cqÅÃvØÐÕ‰vÇ=ÿå]Ï}}OžsçFåÔ¥ž³—#æ<ùo;æ¥aiÚ‚ZSøDñ¤kðÑä²£ÚHÂL•òux0 ˆ¼ï~¸ÚhÄ5ç?zù·#À¯¼òʹçž;cÆŒC=0Ä_è¡4^»v-fÕßþö·mAtA ¡~ýõ×A/‰ÄêÕ«AAñ&0CA± úýÞ{ïá ÉwÏ:ë,œBm8 ö¢E‹,?/\*‹[€Àã,®zûí·¡ÔBZ@Ø’õ«_YÄâ2äc(«-Ø‚ïyåU©êâ} èD1@à(BÀˆZÊШ§çÝcÇïu.c/>îqdÞ¾åðÕÇvÑa‡ùÏÏ.Äðêì òÀ>’©Þ>_ ‘Þ˜ê?ß¼­TÖõ‡gîİ#³êЏvçgBF[#©Ç8×ù{]ýû}æ8X×b8;ŽÁ®®Ê¿Ùaöýÿ¼²9ÜÜhˆšº> ”XiZtÛél=«•^•~ÇŸœxûÓ'=³x&ÔÚ JÌôGƒmè3àzø!ËÛ9;×8ûÅc.žüì…Ç#&ÁN³ªÙ¥ ¨Q>ú%–1Ê•l;ΕҜø1TÁwô""„ëev¸ÂÅE.‰õÒEû!.tÎ’ÝÖ‹(ŽÒÜ3̸Ñkó¸$˜ÅV¥i•jä´<nèîÄù‹&ãñ¯~òÄÛæî´»šƒ±¹Ø_ðµ¬^ùÒAÚù‡ÃçRùVº+,‚ Gu £V©ŒQ+]úÚot™nïš/Æáh'#@”n|öœ;z™cÊ×N9)ƒmñ$œ³bÍ­I1…dFCóq®„4|î«;]öÆž"Íl%¼nwÝ¥ÈÊ]‡¬yàȨ©3õ!90šuõƒ'!Ò‘XBö°ŒƒçpÙ~ñÛÛ–€'FUUðòj]ñxxʼÃÝ‘’Ò‚Ûíõúú}@6±\:­CÕÒvW}æþÿéóÁ^½ÇÓ@L ‡µ¸•ˆ~ÃcÒ•[æ æŒLa5ë_浤ChªÐìA9Œ0دí£C|Dìfk ·Ï…×´NܦzxŒ]̼:#‡bî¬ /‚Þ:/±Z¶:|ÃqÏ•ªÂ5‡.¹í¸/EWg¡hoñuËðÐ.zB¬È´ÓPá E­qzUšSoüà_é¼­4& ùðæio#;ÇÃãC¹õàç¥<}û¡Ÿ\ÿæ D2¯"™eo üiÿ•†£ÅÕ7+§Ë3j$ %êù¦p°Áé+ K\Á†Î–ÎeŠ;‚ÞåÿûÈuÙ¤Ôdo»i¿Å‡ïŽC>Eº½aS+²Ñ}v¶NèI9”7¤J¡)DJÐkö?ûø÷Yž»dÚsHûÊïï?<Ü´;™Q-ÅÙà‘ÆH¼;è]O­GBj§Ÿ)™¢–¾bÁ^Uº0œZ3a<´à6ª!o SÌ% tÝnmTYæeŸŒjk gM(0‘ÄœwŸùúáwöû?.>oÂ8÷¿àM9yöŽç/†À5ðŒºþ»×vªÕoPCüX4ÚT®‘¯Ó+~ì¬ãnÉ6ÀTÁΪ1éú0[z}üùÏíŒ^„‚ÚeÐê°; \ÊÙ~Í/ŸSlNÍnsGÜÿõø¢7Cmô 3î\ªæfö,FKZùä±UÇ»À­#ó MA½E¦)hÚ dkq몀Çt¹ø¹žu!¼\ƒÊ¶…\U)É!ÉQõ,Ó·üú½O >(P*ø 0D"c»ôè0†.®8ôžðÄòÜçŽàZ Ô ƒ8À:yë'ªx â¦vHôÊ?Ù‚yu4`3?v¬ª­LþÖÌ2ô“ÝÒüœb”@IDATÂÚ~‚ª`Ü}çw †ƒ•õÀVÑ ²ð¥š4i‚‚,2Žƒ¾öõõÁZ }2ôЏ½E2·Ùf›þþ~ÐQ8j¸"vò.΂rƒÜ‚$ÓÔÔzë­·ÆèÇYø|Ak:¡‘ÆÁñãÇ£6\h@cpŠTÄŒo]nG2™‹,ÅxêE G¹1eÁ[Þ·DÀ¨SO-¾› è·½²îË^úÖä¿ÞsÇk¯ßODØ0ü«WdR}„îBæÚOŽŸ5ïHÍ3:ç¥#nøð¸Ž­}kÒ_Îþüšxß¹‡\œÜÚOIÝùÑÑÙ”2ûïæåë^ýå5Ï\¯ÙýáH÷ãÏ¿çÀx}%ÑMIÔýÿõÎ%{<œNÉeî±/ÞqøÛÿA¡:b>¸+b”ƒþ°ßLÊ@Ú(aç@V‰;~ÝhwÐ`È”Kw}næ¯ÀG „|æ®O]{äÓš¤^óX»ÿÓÓSÉüm‹ëŠû4¹Äç„vÓåݦ´Å¢3výËõ= Åãu^°û#P»ŽswIrÙ¨@;߯|ž¥zÿb¦o ¥âU2ô™SϘ{ЉL¾…ŸpÍÔ·'LïpÓH¾gòKžÛK¬ˆ—¿¹'°/òRÞÏù‘é>á= ¶Áä\(ænxkZALÝøÎ/¯{óè¹ ½ðÙŠbåêEÓ×gúf½{X¨ {6Ô³_Ü`µóŠòQ×íÿØ û>|Ç©ÿp„l𳍰Ehðꃩn89=:7kw9€D]O!­f-èÏ–XΨYqôªEÓ(§vÁ‹{»£Ž;>;m$ßwå¼#»·j¹òñ£ø¨'¥$Käø®\â;ð΢‘µ;í%è‚,Ôr=eÒÄ^±·¬J³Ž|v$3ˆñϸ·ÏwÙãûF››‹efˆK^=’äá(•©$uÝ]±×“ŒÁ‰ P >èö+Cm¾ö¡òH—¯k°4 øz³+í.½#Δ‡ ¶8ó gFÅÞT-‹`ÙÁÚ‚Ä|6’ò2è +#@Z…S@ÒëZé¬W¶Tt©W[öøÜ×¹/í‰Ic΂®}v·Zøõý£2¥ÑÁDßõ§ÿE¬Jp§ŽùZņ˜GÂAï²Ü§Ý--g/Üþ¿žÚ* ‡‚|}bQ‚kñ•$KòÌç¦Ê†xÞ[Ûæ©:§f%©­µ;¥#QY·Í´×]ê¬yÇU»  ¦  ›SeT¹dÌ:ù¡»Þ§Èõ¶ö£f¿gêGåj•s >_8Ÿm´ãÆ…'ÝüÖ)7¼øëûÞ¿ô¶Ï€ƒÅœWŽœóÆ ·¾ûkx ™J&“¸á°×šÃ-`½¢~âl5ýñ&'r¥Výs?ê†÷þ’ÌÇ]¸N¤]0ÉEÙn8ð-—+i´í‘%—c[×R±JìA Ìf`©»÷¤ÎÜñQ—°‘Úpv½Ïé˜uø'ÁõËÞÜ©Ý)ç33^œm¿XÊû®^ÓAÌC­1õ¬œaÜÌY{Ü)´ú‡¥‘[Þ?F©•JJîÆ…ç\ñÀ™0÷Üôâåw¿5ƒçlùlzÆ/ðŒÆáOºåù¿:áô3ྟh˜gÉW–³‘ed„ÑÇ¢Ê8‹û`æÇDÄ¢&?É¡Êü÷Z`µ…$Š6LZ-ãS@nñ*e?xiÁ*lQbÐH@²ZWCJF= œ(®„Qû8 ƒ±åYD¨ â2Ê —q¹U9 ãþb‹ò(òƒØ€ÊQ³e'¶š÷-\-Ñ’ ó™@sØvj”:bïÓ_XòX==|Yw½4 ߘԼ–·—úõÞÿ%¸bµ¤£;Ôc—é+w›íŒ0³_™ÝBÿâ˜-ΞصYaiîê#ž¬±Po_}‚*í5ß­‡}n«1›Ÿ-ÏØê>ãßùÉy0L^pâmJÂߨ–ͶO¦¦Ø¤òÌ-_æÜAŒV¿¯§ àJ ‰˜¼g½¼Ù§¶.?2û½iž´|Ÿ:|¶¡úòd~ûò·ìµ¨T£*í5\~û„laˆr°äY(jÖëg]½ï#ˆÕr»¨-š»2%`—½|Àu{¿ ŠpÕ©·ï½<ȇï?,ëàZmz¢¢§|}­¼~3YpoÙŸŸwÂÓî9à#|gÑöô†›#xÊV*ϮƬÃy;”Ò—}ýï^õÉoOÜådÞh¿ìÔǽúv\#0k×÷¶lùÅ­»¿[•@°KSåÕ«?ž9ó³;¿¼˜ÈöxˆxéÖù§>òÛOáïzæÝ»^<íÉ®X×Ü©o ]ï}§kJYËS¥f{³ßѪ(êi/ízÏ‹+†öµôæãN­ÉÏÅxÜüø|¾Hë Ä|´6m98¼*GòúT"Fƒ-`kÒÙ\·{@Ÿ¥øU/îûñ¯C}zû¢3À©ÜwÆ‚¥+—wuw;úY PÌDRíØŒ»öŸçã¥Õ"EmÕuú²µ€µºÿ¥¿€½¸ç˲²<ç½}/ÙëÐh¶†b¼°ÛM»¿Ùé¸eÿÅQo{RLc(<8Ü"=ÛÅîÃÄuǽ}Ãáoºx¯ï¦S^B,Gpʤ­®ÙõÙžÐÄ?쿨»¹§79ÔíÀTÞ/¯¢aàËS³æÿúÁcßø¯§ž2õêG¾¼#óÑ}¿ì¡'¯Î­¯Öl·~´û»Ïo©{TÝ|˜†ù½rhy ßñIßÇ·ù"°¥ðî-‡åG“¥€¨…U™ÅO.=“ uÑV{Øà%¯L5¶¸ûˆ{3É-b{Ž$†îaOJmüÖ Nh«§¿]Î+^WýÎß½WW|œÝ[Îe@…Î{l»ëyé§¾ÍÑæøÃ5Á žY2‘Ÿ )±¡¢IAÞ›,Œ"xì‚—v‡¯Ÿf—¯]p¨ÅÕ‡<ÖœŽýîåqOžèÁ|"5$øø!zmR`‚(Iü±,FØð}Þ,DZmÑáò°†fêhïF/Aá„Hè~áuÃüóðÞÏßö¿žV­ª¹,Ã2‰Â7Y\“û𢃖Íu‰õsM—ìôüNãö[»fíu‡,œÒ½ÍìÃ^²9jàŒ½ÝõDjGm;áe½ì¹lyé1ÝòÑi 5Ä}6‚Ïö›‘ÒU7˜uàödê¯]xã¯6¥)ОUzi3w+£×5M•ŠÛFHÒ¬}_ƒû„cÕ7ä))g»bê3ÕJÆc3R—›÷Õ¬¿#Ù!L@Å?'dô„ˆÚÈjSlÜýÎÉè¥{<|û§RÂtH j&Å7ƒgùiˆpø¸0çÃUp+Þ:;;aÜ¥˜2e ÎB6ƒLJ Çš»š¶ÄŸªªŸ¦:T„§‘Ãc”änÉ | ½°ì‚ú‚ bõE ©ÊbU,9*Dô&^6®EßAŸlGm(ŒÚPØ¢è8jÆ)D…tAh-n%qܺv@¼A¤±ƒ–`û­ è¼HÂ&‰Þ’P\¬51ºI8 Ió/¾4/U‚î ôF¤›÷ Æ=D´›ß~û‚cZ ‚ʤ?½8õÜrļKŸ®ó´î;lß+ÃzKXii §Ò¾¨øùD_çí‹¿å—/ÿ …rš8…—³|3AëÕRm¢°U\ƒ’ÊHWò´Í6yÊv+Ö¬¸ü¸7<œkJ3v0ÆÑÁÇ?ûä?°îšý ¸ÇQ vwòðÿ绚·HUVÍÚï©h‡gxpýÍ/ž{ç¯æ§F+º!=4«ß:ýY6${œkö}Ž¡:/T4 ¼Ž¶9–¼³ù„Í'TB%±TÎ×vOºg¿yð4ŽhïððÜ“‡3Õìegßsó¼sØ& ~©ørh™þ?öÞ:ºª«oÛÝ=žà)Ú–âî­ EŠSÜ%!B xq m‹S´Hð¢¥Їx²ÝÝϳȽ=ßç{Ï÷ôŒñ¾cÜUÆîÞ+KæškÎùüôùEÈ·m)ixêÃRQ;!2ûÃÛ‰ÒZ3ökÖõyÄ ±r¯·ÍéøÈN3ïz6•êRèý)dmÃëLš ¯çe,¸X7Žº O€ÝêaYÆT9[º’©ôF¼¿UM×nêý ŽE;fõ™›f´,­4ÀuHà€ˆ›g¢'¨Rc.ϲ6§ñò×õù½Êd˜×®*E•ês#“Õ€¤Æ#®;|V‹·íë+V‡=Ѝä ó÷ÞݼÔj·&H”Av¬¨ü±ô¨v_áqZ¥uÔ ¿¶6ÙóÖÈw)êðÚÂá¿väM/ž Òù]¾ù­ÊPÔxg.Jä§aLÊ8rTDz‡­J”Êàí£ï.9Ñq§;’Se.ów÷ĸšûÉÙׯ‹ê¤d¼.{)$2¡7È%|£ÙðݦTö‰ãí’±§ä^Àã@KëÊÇtÏ¢Ùy\—¦6ùÏ©tüQýШf½`¡Cø&» ÐOËž&+“ Öªû»Çb¸.ê¾ë¦É¥Ç ‚Ö3…Xü&Œzy5Âñ.䃖Úç÷ß®pªRo°Õ:ir‘¾\Ä}ñœ3oéY:å|jæðß3Oõ£„­Þw9ôÈ´C­—t¼!`ð áîÏUß*ÔÊ ½Y –¼(~U+©3Dg~ÛDÕø¥ù…”´2•É®G*šR#±ù-TcèÞÐAÄtÒ<ް-¯‹SÕ÷¦È’PìÎçñÛ‹eWKT3䥦s))¯áÍ@$¼Š¦1²ÝË;ü¥ñW|ñ¨ÂmU©%  1¼“ÛÏZ%§|úöÍÛÝwZ®èôlΕ¶›û^trõ¼8$»Í™2Ge£¤o‹Ê¢„=ôîuIÞå>'0^U>B>±‚'(·–Ȥ‚·úgùã*uÉÊÙ6Š•jùŠ‘¹ƒ· ¡)Ã( 5©õN°lNhñCjBZE±=U“Q¨/Kçת)¬¢ kÐT:ƒ¬ß@(ºìJÕÃïÌ=ñ)z`Q÷ƒ*uÒüÚSîÒ Yðñu9X¶Á†‰êe‚G… 3…ÞZ^@<òF\qn8Bc îhV¿ãJ±¸°ômŠ$Íçô—–éÓÒÖ}I×™;æ¼?êvù¢`3&ø/iR§>ä Ç”b-×­P …¦mvÛ“5™52hšå]OBæð"Çr®ë$uQ ,ó—6û¼³…Íq—M%W­«û_„¬ÃD޶8ÜuzbF’ÓãN4F¦rÖ°_L6+GÈæqP‰… öÍ=Nê ú:õëãûU´È;Ñ”búl2«áà‚E¾bXC`d­FeÜèï?ý#7ÅE°ný;n€UlXt ¤±Ü@ž«ÁÈ I?¯ÕÝ„=Õ–j¢ ( ýXmFÀ¹8£gÁ"/ìr èVkÃèk >>qî‹+à¯Õ´݄ۡ1h5[þÅVaµ‹ù &“ÅãÊԊԔĆ5“Ûy=!*l ¢d=0íúLD>tyO%ˆ€Ø0C½IšÞm뢱GfuúÕãN¼{R—-Q«|Y«{“>þ¡^ãçäîß?…#÷?Öèåµë£þïüv¿úíê:¢¦†Lœaº‚«ˆ#™Ô„ãhÁ•FˆYå«IÎo†®QF¤¦¼‡]#Ú·Á]M0aø¥üÄ%ÝÏ_ž”—U0t}S/4üËò;—ºDߘÝØ–]ýâíkC8ÈÞ1ñºÛø,¦’%0øKÜLÜ‚ó/ «ðÞËzÜßLÞ‰gÙzQ‘¾Ð~yóó7+^_v·1Wk/µ”¢Ü0l!›®¶Æ2Ó±*zfÛé=DË¥Ö2È¿1"ç©ÝÆ¥GÉ„žó¦¶ßŒ4R›Ý¸´²M(Z×á®]M Ìéºý›ÎKf Y ­eÖ‰öá@|þá.R‘’ICí –V‘¤Ó&àu1XÌ2P›°&A…ÕDИ¥'2P£ñJ·1ȯØðÇ€ k¥#èȽþѲËMYpøÈÏ‹d*Côq(qÆ]©©¹½ÎÌë°7H Ïíµ=Ɖ oµà»kó¡5î¹’ ØÛrn&åË&ÉÉ£W5™}á³õǼ2.ð 4!û´hÜjy×ç…óºæ°9·ÉÛ‡>tØt¿riÇGjžB%2h̪J8¼ƒ:Q‚ º5ê!¡–0B>â0Û*Óå‰l.}ÔþÈŠYt§³#€,ÙJHp©`6Mø4wÕŒÃ΀ƒ­Œ2d!Ý,3LþŠ—Ö?½Ä%‰ÀhÚ‘þXÀ¶vHï‚Ô ³8Èœ“qY\8ÃkfÔQ µ‡kå¥Á·eËÕ[n¬ßKrBÊ·ŸÞˆ1‚ë\œòc?˜IqÓé¨0º¬+}êÔ¬ýõº¤äÄTè¦v#ï^o¸EŠKßÁÀá1V\›0á<QJfÿÚdÓïOaÐÛ×\šæ0Û7,Àû]}qrÀY¾µ`²„IÛzyœ”ë%5^°¡}ýÔ†sO~œ"¬¡P(±"!”I-V!Ìî¶"ü}â8qÙ‘6Oå«qÜBOY(§Ëj*Åȇ)ÍL.3D% (Š_—‘¢ÐSä|žÞHëæY6ÝïúÝÃ^+/v/5ÜȾ6Po4ÔJj ˆ“šÓãÚ—ÅVŸIbÓ ¼5…Þ:¼ó5SÌÚu‘PÃxüîÊâßxJ‘/àBÊ*%ÿ@3†Ìõ“ÖŸÖ„>tÃéIÈ)Âþ1mî>9¯hyã‚%7:ÙÇ”Y n¡hÙ® 0«§t<´!JÌ Š6ô½,‘)ˆ-VGžúÌÄ‘ÄHvGÊØ:ôE|p9r:CïÊy€jŽ”µï]”²RErûÝ s·hs8¬ »î®™ÜxyÇK ¿­|¥âkò?{Æ—ð3;]òUR1üþös?:¸øæ€=}ÿøîΈÙÝwPÊ= P²Iz„Ê’’µ_bVêc*ž’xLlÄzYåÉܧ?È‚/ hòWZ£•nN¡ŽÁ†äÔaâ"^œäõ;á6Ó2T ©ýA²ý—Uè̶J¹ð­ùeªX§1ýpÈq‚BÉúuî;ºuþä^ßk+Òy<þª/n …‹Å\¤/Ùp÷ó…Ÿü¢([éš a•½Ch¥§RW¾3—ׯY§¤¨tùÃPQ>%Ñndúg{uôfV§¿nZm»ÙÀ¡ ŸéŸ Qr!¬4Áe銸µ>#– Ö¹‰¼gd­Ëe²ì~ÇòÄxˆíñGÓës+µaº³Ê_¢Ñ*Ç&ƒdí_ç78Žp y.’J{9òn÷~U莹Þ'hÃŒPöÐ6«}IûcXîá¤Õ[Í*f’)h‰š µtµ&]¨=·ÝI _†]^\Äã3ç¶Ù ‰™ ÕÂÝí²=Ë“•¡cÈøò¥kÃãÑs;~§IÖ¬üá³5îpx±o ¢ÆŒ42¿õqŽ7”žVóõÙ¸N›øqö¬&¬ ×ìF9ð¼–µ»œ¢J›´·äÈ…}#êgÍå Õ«'u˜v0˜¡¤"Ô :lMDªÐ.;=q~›=2™ªÔT¸óϰ“Ïÿìèý¿Ê$Ì„‘ŽÃ¤ÉR°zkHj:œ¬DnM¿¸È_RGWÿžþÝÖ #¶+˜ßqјÃ+Îö¤2ßÂŽÆIµË+*ÞØJ?Lo°¤Ï/°Bçu71yqàöìv»¸2ÎÔN›Ôɺ?_<;üçÊiõö¨•* »V% x#?¿ÂY\_Þæ“ÅðõãÕ⛤V-aà OÕŒ­Vö¼ –%Üû¦î¦ß»Âª«6±,>0³/»ë3 UVñˆ ×Àȧ#°ÈS}@¥¯0çÛJ‡ÍIgÑt‰Úœç++KS’&4ÚØ¼Qó© 6ÉyL¥ZsÅÐS*pW9ì˜Å<š×Éêú]öÙoùQðÄ"v«/ÝC×Ûʉª‰¡´rMgÁg €bˆ'F™“« áT¨ÄÉgÒErþ®1£!Wþ—ðærnŠÅ­ÿ®ÃŸ©ÉÉ5äµ,o•¥ ~׺5j;^Oñuƒå:]% âÝñHZj͵ÏÓ‰¨¬Â¬%Û,66QOQ# É£µT¢ …øžÈkˆY!'R?#E—ñÖû.hXkÅ,=hÎîqP$’B \1jOˆÔðÄi ÷’Ð…/ôo4i,Α@îåÏ×:¿üìpjý‰‘×ÏJf _¶þ—%hÊôå”16 Œ¢0ÉHLµ–0Qv§?,ࢆ•y—TÈ>Ý Ê íýl®°9W8¡_aÁ¯¶Åâ;@+?(ƒ[Dªc†þ‰ÛR£æßk âQ!}ä€vø‰ö;5úé§ŸÐGØ €9F«V­ÐAV0uöìY‰SÐ_=êÒ¥ ôf€1<Ê8 }‡?aÁ:þ|µ911™Äˆ£Æ¥pA òµk׉„Ÿ¸WFFƉ'ª»Œˆ©FŸjèE{þU¯ Çâ@b"eZùñÚ™ùÓϽÝV,>pyÅŽs9@_$v”U˜x\…é-Q¸l·Eí°›E}Lûñ·©ß(á. ü"³0ÚŠÝ—a“¬'5ß/s&åmÕÖl—ºh׳Á«ouC5\AX0P°Âc{Ã`ƒðÁTý/`”’¿ô§Îø+¡Â1«]öÆ¡×* Ýsú¹SPº|ø.´sz§Ýlu,ïnOZ’Q­jÀd«ôk0î7˜ß4C1}(j G\^¯5Àè›Ùú¢œ•´½Ûc‹Ã…™l†Ð¼ìqëËÌîI­V€¬Š)¦ßÐDñ0A³”gËG‡dNl ™§Kœ(é æçÜéö´üοޣ/ ° Š`”7¢ã„™=óÝd÷6Çðô³?Úº²ÿžݶ©µ¢—¶‡,.±F–ˆ~êÕÖÇ›¬£ý©EÐOTáº[~þ&E=‰ œ:nŸ@(ÊH¨9ó“㺄Ä-Â$ŒªS ½ÂSÞ8£áÄ×=êý´èIXPA5ÝÎ"Kn~ö¤tߌ‘›7ŒrÅ'$ùŠK mÞ T&TsYÅöÆíuRk(bL.‘ ƒa¯Ãmæ ùpÀérq« EÉêZïÊôƨ7¦i¥©ÇöžO¶ô~á¯R§Ä›&Gt©1qÄhH&¡Êíp "ŒÍø[ÀB¶R-C-H›Ó1ö`£DQ"Æ9,çI‚”}ÑÊvw>¬ÑØh3àö:Úïv¥§§¸v*.v]$TYÊ‘e6ØŸ“‘PœQ—1·ßQ:+â¥bP@bE+„ûA‡K‚LbjF…¡ ÑX;ïOÝp{‚Íc¶ñ…`c,m7³iWFÜžöéàGä‹EcÚ¬Ðdö”¾ù~Ft›3‘ÓòƒNp¿7É"àŠS ržHQQaJÌSKªˆäÝ,N¨â(Œf÷cgð/C†ˆV;w1ÅDj Y›ÈN)Öë;Ôé¼´ÓwÈ´ìÛÓ®,E„%%˜cð÷‰r L\oŠÿZv»¯XΟ¥ÛœsÝ+ÌknEó¦Sá½>|yå„NùDì]t¹Ïægß¼Ö?uF Nw­W«æ¸=Ý!¸¥b%›#l¤ú¯,¿(¾ÿê¶V˜MPÊJT2õÜNw2',++¦ ]ˆ¿Hšú@V%V0ð×¢DCnLG)cR’}¨_æo]ci…kž zí½ìb¼)Y´»Qr;ó_ìZœ.K¯Oràµ(ˆ «D\"ç§NkzZ¯µôÓËSâšu´]È"éÖ}y™åa&È$K[À¶/å5Hʈ»Û_5ãj“Å·ÛËÕ ™’¶èh—©§?»µE(d‰G,7O–Ñܬ¦šæÄÇàŽ–Tb+AݼY]’ºÈø†Zœ¸ÈVâäݶ¤ÿ¾I6-h}‚¬]ô.š n¬”$Î:ÒxÎ¥&¡Åa8áx­°·(Lª¢N—„p–]ÿL›J›¡õÒ{mA£hª2ÕOª–0ìYÌ®0'ä!ùAö±Ãq£ÕµÃÓ.}°üú'Q¾]¡$ÓN¶æˆhZ[FX’Ù{ï‚ÎG5‚F>3oZçuã›î¨—Ð`rÛM3[m^ØjËðôÑ2F"Ѥ³Õen?ÛFÃB¡Ÿ”ƒ`¦–­r£á˜I´`¤r,].EÙî”ZЬýóÖÞÉš¶¯C‚"-ÿ§>7ë¾ø»±X8jÔ­]ZõµóÆ,”ƒË9ØW$–.»ÓϪLL”Oûõ.)!t&8ífÿ8çFw>GìóGD4É×möì¾ó-4‡ËOg ¶Þ[´þÚW1FËâÊ_¿Úx=›RÈî[[ñ°b •UÒ„r}9z Nƒ4 ž«­«4D  ÚqdëΉøb@¨hðÀ½Es|ÁO£WžžXQYT'9#óF[ÐÒ¯…@Q~l×ôós?É›ÓrK¢¨æœæQ©b97»"à}ÞáÖ×ÖÝh2›ço#‹Œ£Fªáð9K¯u),)„KuVÇk<)W¢¢Íé–ûy˹ÓGdŸö¾úÞé±â¬¸:Ò`~Ž‚ ¡¦ÐòvâÅæ¡€?÷æ—Ã~Pf,AHðÜóè¬8‹Ë\p¶7†G楹wg]íJxqøR$é‚dx(“stvs…ïÔ¤7& D}rÐདྷ ;Ž ›N­t¬k/Ž´Ù,ù—¾^r~Þ"Âv|õÄè6Zâ¥\n÷ÚEL›ÉF¬þêß¾€X9åX{Ì—‰û¬¹5kóÝq0ͦDA úaú•åD9hó ²ðàà¼3ª­AE–²5èZ(þŠOŒ£Íœs»ÕÒëŸÂ|3¾SÞ¢>ÛPû# Qßè ©? O?Ñzパ£ÊÇî›sO<]³ùìô˜7 kÉ«·¯8rÙÀ®_ é°ò›^?úýÜém.8œ!.)ëØF©Œ^¬ÌÖ%G$¥‰" ;ôø¦œýHvÄ™BšPFä¶2TFL§>zýxéásN~±äl?©\Ø@j‘€·®×•@ÐSOS®ûUÏûý¾Ì¶Çð×ô” Œ%©ÄqÔ¸$|NT “oÛ¯ÿîá$—ß Œ4Ú À?P¡‰¸"Š%©4W¡£è4(Üas†ƒ ɉɸ „í}ò¡pMé: V–¬á7 i Þ;?Vö߬ÕžÑå4ÈÜgôÞ5©kÞ·—b>nøu„› Ǽ¦àÛ5׿A”ç¤>ŒÇ„¼HÂâ~PJ5ßõ¿• J]ù² ^r<$Šø„)êÆNÝPäÛ:ñ÷²¸|1!K dB>b¾ªªJFþØPÈf‰Åœ•άëyÑ`* DÜ[&žËéôò»kÕLûó CF1uá#ãIŠ$йãî4q:*¬d] Ç U¾ °…ʈ¨šÁ"ËN|½õÖÔ Oèw 24‹ËŒ<oõ€ùÝŽM¹Ø•ÅtoîuˆEÔ•§"Oôs9ÝÖ›,E˜§kû_‚u…0Œ”¥sé ÷fÍ»Úeþå^óÎôŽGÙN—Iÿãe¦çËúÌp!²/N"K è^}w°Ýî xBûÿ\–szÔÊ£ƒ£~¶Jš¶ñÔLóælé¢$­;;%÷øä&Im£ 4î©Ãö07,œÖnëðÖßê’ÓhT ´lÃÔÀrQíëÄ\ÆO¬xc|Nˆ/ÕŽH@uµöÜøß” + °€ Ã2ÔÍjØ|òäIà+˜&A» |âH-zä‡~À)°ã#xúôé;vrC¢Áêô"üªeË–‹/!%:݇À®ž’’ìÄŸ 7c5ð#ñÈ]XX¦$wêÔ +þŠ{U·ç¿}È{Ãè¦Óåq0ÿ¼·®Ä|däˆiûl¤Û¿M×Õ¨ƒdƵY7;PPL…k¡,0±WˆIFq ®‡³œdu£+s/tžóåŸqžkÍÍ6”0Ž‹ÐÉWóêH›kˆ¼üMQ2+Ù·Uqw‚Š?áJ£%½æŠ+zëRuO OZºéô†"Ô^o­ ‰{ ¯+bë"梂Ɣ>Ä Ù6†}¡;¬§y-^ jûð„ÔG0CPb1½að‚pÃCD¨È&aIÞ…ßiXªwáBe¢ÌRiK•¦ù ´c˜$å¥Ñ7;ŸŽù²ÞªŽò®a}¶o,ñ)Dë%CX­°¨½‹.t§ L–|òÆd´·×%9Lfg5é!Žß´ªTb·_Ïà9\¨¯-ƒ^õÌ—Î-MªyÁéµ°qgµÚó×’ª¼xQGÕ‚ãÁ"KôÌ=÷lÚØ>k4£N­N«öõÙá«nžßè´˜Ô÷(sr/˜Ðtãö«Ó¨6@9@Ÿ‡ÉÔ¦'6 €M5ïó»~Ÿ]" ÓeÓ|;nDŽ£Ô•Á©‹ î ™-'(K޳eahüЄ6“/ATCæqüÈ;òW€‚:ê…ÔϨ\~å+óz­\õË‚™M÷ÖÒ6(/®Ô±ÒÙ- 1 ¼qg¼Ê¨ˆ6ÐçX¸ÕuáÞs¹#ZÝkÚ6·û–ÕÇ&On´a˳éx__v[Ò dB„a̺۔B>2½õo:…Ük+á1+yà3ò7sƒõ_õ¶d£'Àâ‹¢<Îak9MÀE ¸Ô:Þ´Ÿ»`,Mî¼qË•i¸ÎÌö{yþA QÈ+Š'þáu;¢Qg ˜¤L–TA /Ñá1«Ìú»ZÈ¿²ž‘9C òv\×óéÌk¨‘)'ƒšMý˜ÖVN} ›G/ä=õ°Ü`Î:ÑçÇO¯Á&‘7*E]ŒÈ“Œ ߘœÌ0;ƒ.‰øÀPXB8ÄÀ—ˆ.:ßê2ÒyöPØîI!)˜>iÈîK%uÍÈB¿ã%”å^EYƒ|dF—Ý H¸~ «!×Os†žd$o˜¾§¾XÝñ·¹¿¶ÚÚÕÀž¼KçëX>0›ª´y¢…-±^7ëòp€Ó´딕­µ¤nî±óþXò¼€jzÓxa¥Bfö˜Ý!Orº¢¾Û\ˆDR…/ïì º¿,ò¼Ê)í!|˜¿ãÛç츕‰†Íév(ÿÈЬö·A.”2mVG”:³3Ê,ÂR‰áuÌ®zð…ñY\´@©%GkÚ8ÌfæHa¢°|á €XB¦žÆö…@"à 8â%™7&/è|råÕþÓûn'Lφs³³;÷6úyлŒTmqXÁÅÈ&›ýë -m_±‰ „q1b ï£0âcW?^ðu¿œ}ç2ûvYƒ:.ÌÓªb!C¢ŸUð!µ¬ÑÈÒf;•RÐz¿¢q¸¼ö%öšjŒÇІ¤*( ϺÖ:Fΰ‹þ’W‰L4VD'^Ž+,iî¼ëÔK¡¦jÒ» ÿ“;¡t°áË?yÝ÷×ú·Ó€Ñ*žPöóòò éBîLV+= ÎBñ*ƒ¯Ì‘B ouï€ß 8Šï0,«ýµ¸z ª3€Ë1ô`ÐwT£/Ì °<Y_¼@^:.θâ;ÎÂu ôìÙ34æÈ‘#8 -©¾Ñÿ}ßw)fƒ"ÐÃzá":Ÿ¬YŸKé©'EHº³õŸãŠÛõÚ·tÖ–Ï:|m©"<§`f`Ä©5tA÷ QŸí9wñÃÆxÕžAcvx›(„†CA?˜ÿªäÙ„íMå"™'ìTh•r¢„Øs›¯c I¶®–@$„¤²óòÈC·§oß= ï+.·Ù|:uª@"ŽD½Ãšl[úå‘am3³vO[~mK uö«p µ ÄYmm¶Êœ  ûG}û̵´ PÈEæ_q¦ô(ø„µD¥¸î"å•eÚDõ3Ç. –ñ9‹N·Ùyw ˆÍ+ÒÂDz)STFy³Ù…Ñ¢8ÅAU ˜Õ'\›µH†¨+I»¯¿Mç#|]ay8 ‡F@yMôˆÇeq®ºÞ'äw BÀá®)è±ávî‚»PwÁäf Yv¾õ¬C½kê2–\è`qèWÞì\QQA)ÙÐñ‚¤†¦Éª•½1ù¤i”Õ:[xª=.Ê&¥gPj‹Ì3gÚ€ÙcÚ,Rªy Ú^ZÞÿ¦Œ+ȾÜÓbòAÁ";Ïgy¾&ïÆ7JéJÁúřږ°mÞA š{¾»6Eí‰ØKüÅ&âRÊRUŒÍç‚ÕT±a`øÄ° Èøº‰‡[hŪhÄ£ ìÈ—­ qSc•¾²Ü# ¨4ŒÙ{&^:™VKA#|’¨2l! ªÔ(„ç-!q°¡Y-7Mnš¿°û%F¢× à‡gáÒ@SÌš¨Iž{| ‡ÉaÑDë/ŽÙt!kçÍ(âár¸V\ï[UV²jðþyí7HXÒy}j¹ff»]ëöŽbGB̰WΊš*mèçõ·¾é‡F¬­ÒŒKMyr^‰áí¾gó}çˆ&/˜pÜé2¯øìFÀoÎêsõë–ëFõyìþ&[Ü6ëf 'ñ—û«–Þàb›+Ý…‹Zïe ¹)¼t„¹2¢,µ6é›íõ9âHî©f`ª@QEÂózføâAWB-a±í±›aS$ÈÊIIã‚à q½T#je > É0ïçBˆÐ#â5—Ç®¹9Ôä4 4iÃÄvf/Ljíüú®@ÈYÛõI)µÄ ëô>¼’.â^|eTÞíÅŠÁR¹ùö„¿Î¢yˆœ?±‡ƒr›I^Ÿû"n”Íö9ƒÆÃ.Ì阋ñÏgéôU®ú©®ÈÿbÉOZ¥ ,Ù¬E@_9ª³DÓ?üiëøW•ïô6›Rð¥5l°|ú êT¼´G Óíùªû jQýßþŠ&¬ÊèX8ba‘‡Mà\„ :Ò HŒ=@åêÃ`"¨iÀ'0² ñY ™8X‹Ÿ¸,@j4ŽÇeaj®~wË58;±a°÷Å€ÇwY½BKpú¿zˆG¨å“bhÀƒ¨Ó§¢ó½ñ{Î]ÿþ§CkŽÍÉ<+.½tÛäS¿îS¦PóßïC¨Tªbˆ‰ŠÃ‡.í?ô`õSËÝ””o<ÏšÊ5B›oY»Ã9÷Æ_Q‹2f`jC9ô!º…VCƒ7j‚»“µúöÌÌ3‹b „D&ŸûÝ¡TŒ ú5¬=6_TºÚ“O4,1Wx#æƒM\zxÈÁ[9¤uÍg&á¤<‚<)»õ¯*Ñw½Ž+8v—õÏYGZλÙl‘ÏCOÜtWyìMbŠ¢”¼E”µìøCnÏ‚…O|Ý|íÌ{µ jF•Ò|¥ JŒÀä‚NnºU¦2²öðœfE]úXȸáÎçÖXÅ„“éNIéô MƒÞG³Ž¶Ñð"É-tSÛG‹^ÏܶW©~Æ$¤ŠÙ•¼Õ·!5#ÏKáú{ý! «µºÌ?M¨³{xúDz‹9ƒO|]kÓ¡Èn»Õpî_ƈfÌê»ù¯Òâ1·ÃºöHþÆ‚5»¯ðúJeü¼†ÊµíÿHT7erêáRv7ù¢É6£e[h±`eŸëQI<³óÏ&¯=«ï±K¡Ÿe+„º´úOìÆ€­DCc:,¦dfƆŽçr>ú™íHÍi}3hådRàtÙ¦_ûØ-Rˆ™öXQ²V4þp×=e™¸Ñ×¾§ðO§"VH“…Sºl8Yˇ]³›Ùk:?†ˆÆ'á3lbvpí©[®ÍɽÜUÀ‚wÞÅN\XbAîÂÐ Ñi b‹£Ò°Ê™ŸUT¢à#™3oßGÊp³VšÆ­) ˆ÷}[t– CUÒhUмֲ®¿ãÈ™‡{TºªªP5N@Jœ¥^OB#·Ü_¿ÿÏ-+”ÈØ‹~i'U²/tbK¹{¯¬ƒ9ç™ï1Ĉ¨‚czðeço³w¾š±âÉ(Ô/2ùý ÂVœSbÓ¯œ|-À,†ŸÅ£xláÄ[¦ÝýîQÍ—}±ç×ãsïõùÃùhÉ_=&þÞ’&1D 5*›ý-ÝáÔŸ×\º´í/s>þ-h”¬ès,¯ÿ|>²ÒËJðÑ5åó•oËJ©šB(æˆz4êâ¡sØ|¡›îɶ}|÷™M·ÌüR ×@YJb±­÷–\«¸ÀH¥1ø·ÙÀíyŸîÖEê¿ú«‚²‘@\¦©¾s cHG·ð¤Q%óÖܲ`fèÂ߇’¶¿êµý줬!ÿI]Žn1^³è¾šuÔ•–·+:\@Xê øEÅ _Š‚¥¨|iC€CøÒVÊ'z›ëÑ»ݤ‰®)çxÓ^O»ÒÔ¢ fï„OœJãûA®¤ÌûÒ@ÖwßúûÂEÝÚ|úcÅ«ýÂî÷/¾ú©%Ê×J+eþ5ûa3JÅÌç]ïøÿ€)c %Çñ*OÞߌ·üêÁ#j‡ˆþIåÆ7|hV÷|ç(ýD%*Ô/œÙðÇÈšJ”&¢H³›ãHQêÀU—ÿx(uî{9“RLd\ÛE‚˜To*ñ†Ÿ9b9ïÓ’çVwÕ૬P†‚Ô«xæþaùMt^œùaežh@È•U àHMf¤¹ŸRé5?y±¦×Ï1¸ ´Þ™ø±xÇ£1 F~°žRÿ÷;tµ (…! mß«aòoó/=_ ÿõ\Ø#±¿Œñåï+ü׃«¿ãH9.73öü×Þ0b¿eŒ¬ÿ‰ÁÿëEþÞÚ‰ªg¨ê]ÊEGš‹ÄQ·›ÁæP•¯ f)¶L[ö™‚EmÒ7 ó·æ à3F3äŒû"LT ˆ[\îÚ‚ÚÑWdñ€­jz¢è•ÊŒ…£/*+Š-ˆ¡úmázš 7Dhs†Ë¦FrmbJøYÙDA*–xà4Œ¥çPJ3:&Ž´ËóÐç`ë’tt¡³Âö§)ôâ§?·aB~õÉ¢tEi\WòX¯`(4|¡ße›œºD'd./Hc€f$F¥[Ô“Ó”`vÓ\qÍâ2ËÀù•«Uê"û›ˆ8àáÚW]†•kI‹£ázŠ 50ùQÙ700¸qkÀ—œ¡C{á³RY´>ÊÎþåãü ?ÿi~¡HQÕ~©P)56§Y U{üÜ×}²(Ý žG–‚!´ÉozÛS¾0ÁP.HS~t«„IW!˜ÁƒƒcM"t:üñP-IU+ÒÞ½zÃ熗]ë½´óž¥WG/ëxaÉ/=²FïÎ>=†²7À2†O,šÈ8³’EwŠELò²|Db3ýbW©¥˜Qã½—×ä}v ´Ð¼˜ˆC G|"¥Ð`+[qwP^ŸÓó‘!«ºý‰ ¯)¤jį]ò»Ý˜SÐnqÇ\1? Âç( 9ÉLV@&ÂãK¢bÐC9•lNÜݤhÝÁÕÐ|²åÄ«ÉT{Ð0 ö0™Ðj -€ûÔ`†å¶2“ˆæs˜xS*ï¨ùBŽ˜ïö1À‹©‘ !"ÆÃ~f\ÅŽ|€(<­€!ŒpY m6ZEèmjB†€&öÙ¬‹~l·rȾW¾Îj·C­j4yó'¸×ºÏ®Û]ŵE Vò;[‰“«‰1>Æ;1›–ÌJÄø×ŒÕù°¦>þx󉩔|ÐIf ܾþÈ„¥-vÅùIfÚ«­xÆd2“±ŸüÈÄD©´¼ß¨ÜÜáÍÇ$:Rê8[¸<ü8IdÓŇùWºMì´†ágÐÛ@hãi îP@nø²w ná&“:mŽÑ!Y9?®ê“¬Õ>6ÜM'—»b‘T”H¯²XrÎõÍýò÷…?5û¶ã’°íÆëÉû[l̸ú’„àDd@Ç ¢ôµÏ·˜BJ¥yIA/ tñ˜ßøã95|YÙýNdmtQß»ìØš#VKÒïõ¹¤N‰‡ï~ú"’Ài܈ÓÐcvÂ稂°"bó…è^RÃí zÏÞ:5¼Î‚wW¢ñ_·ß€ê{ƒ…Ù=Ò2c‘8ÔõVC‹ú-¾XÓxÓ—×§þÒ~ëg¿›íp½×L©ó¦ì\¤â „v›3™zã~´âqc[MÞõrË´«6ž˜‡åÛÀ.ß„Í^q Eîk¦cÖpê[Îv*=XëhñÈêk#–öÞ°ôìôÜî÷îýhFßÛL>ê³Uä_7µë–M—'ÏpbõÞxä©=ó7ÌA°gìÉ™½&ò@…7]ÖìUYE²æÐÜ¡hÈ=‚N¸–eA‘K1XäjjÒ)6lzøIÅ£™R 甚ޓÜé¶ì¾•9ÿóMyg¦Îè´rý ¶õ¿ë÷Ûc‚àìKý©·!ßÔ;ÑZÑåõ¥†„¦Ün6Ì゚AX—Ó#L)Cì÷Fu¢Ú(Š5ñbÊ:ýëïŒú¼íÄŸŸlüˆ"z/nTÐtT_üßÜð¼èRlÕôÏÞ 2ö¿ãFÅû¼ß€¾cü„ú‹Ï¿»_€”P‚œ@\h·Õ`\í¦Å©€^À*ŽÁðGVÛœ¡ÔVÇšc'¾à¯P”ñ‰ {þîe[TC2öWoø+îõÿþúïÿöGÙ<&—Ã÷B³Ñ8TR剑ˆFpªš*ÍK¾êØÿðÅ“@; a*í†/ åV¥–Z°h‰P= ò;|I1XêóFùÉ GÔ¼rÿ¤Ÿ>~ä :gŸ¼}ê™ ‡û`Õ˜Ýû?Aœs°Í¼Ž;X ÁKÃ=³&¢c A—"¡®0Ä›sîӬ~?_Ð)oåÕù8kÑÏ_Q–"™Ú턱¬òçûßV‹ÃXèÁ¹¹âæ ´*÷óÍ ürã'¿ª¹µÅlå×ë¹ç[S®œ|Û{W%Ê?ôÕìî»ÖÜ»µË-§Ùä&>eÙÕ³NµÛÓý/0Û5KnRé,3ê+)5Qœe§:Îóök#'µÙ»õñ¨¹mÊàL²p‘#ñQj?'*²°‚yC–“g‹–cº~¯*}m©øÌ"Nm nkõT Âñàò«Sч«ûìÈ<~nóÛ".mÉ©¾”6]_KÉ3ûìXwr$¥÷ɲ÷ù4í{0£øm`r“ ßkJ©å’hˆØÌD…°sï\0òˆ$Rõ7›;ìq¶3”®p9 êÖó35üÅx½î·9¸È®ž7Œ6V2¯±ÍXUOüá†æX$rxðï.gXÅ’¢`îfïî}ŸÇ”ãÇXŒÅ»/v}ñ©v ûȽ8ZT2¸ñ‚£¯VRIÏ€#e·„C”JÁ<€Îб&ÛΞØsõ²ýuÙuìÍX´s}·ÃE•®˜‘­ÉFsãÃÔ¥†'9׿\7äÔÌ}ò>êˆÚŒ².j±wÅùQ &蟼º×¤N GؼlÄ1·ùó_ ë:m´o{]jÑ @IDAT˜ÐˆæSK‹TU¸½RRîB½¾Ì # ·-þðŸ¡ÜÿçHjeQë&Õ™„Œù-¦æ·ß®•ÔâY鉩³;/\s 7¥Y=[ÞÅ1 Ì¢ËÈÛ»I19Ðö$|,ŽÀ4óüu}ï5Q65•TIéôDIí{»¬üêŒÇã’ß'àu`~ÐyX&\ë#še6«ÛsVë3¯[¥ú ÷Ç. ;_Ì=ß}iï£Kl”uæÑURÙ½¿™F™|ñàºWªì†ºµ?°¹\Ï*ï •²rç[…XacØ„q‰L©Y3l§0UÙµù¸svÆÁ"‚ù9Vð= ÛMrzÜ·¾òª‰œhÌJO×—Kå/½8¹Lg ”¦ÐVì\¸·°¦èeP—½¯j.èfq±/‡™ÒQ½VD™^9Wºáǹ¸à†ŽGe%ùw&ewü‘«â¹H±Þç¯÷aǧOî)C*TªPHE/OiŒ(aGÒ?кý‹»L¦´˜Œ»ïe¢»Å!ˆe\£èÁ=²»è4,}%Yä³ZftÕ2HªXÈl}L®Kâ³5™ç»áíLéòÃæS#7ö¸6}w‡_¼r…³:Zûd(Þ]¹—ŠBåªãÔŒPSN': ¢ÓööoÀÕ:+ ý È„ ˜ô"š(vâ' ¨Y Æøò÷€ºŒïÕP ­6)Wc*YlÕG¤«ïò÷@5¾ã׬¾,€7ZÃÚŒƒ«oWý'€:vþ-"ü}÷ê/lÜì‘—ÏF5™pÔ†1ÁD‘3Q9‚L> sCÊL£4QI`'Q Y€ž9©=P¿ ¤ºLaxw¢ýû¸Yp$Û®eaY5‘ÜE.©,³S rrЬ1#³Ë‰UWÆ£ ›^0€Déøi' >€súUÅéå_ìGË,íĆ‹·=Y¾tô‡:¢(èz´Ì©‹* ÞÞƒÁݼ^cja;”3 ŘրÛ`¨HêšÎ·iê Wn‹h<ù—Æü¢bæ›Ù]•uóóMÝo¢øãëün×Äq‰’¨MåV!­½ð£Ÿd:™3ìÂDÊß9mór­”ü!Žøíþˆ3šÁûÐYв¢VQˆÉ—I·\[N=?TwÊ5‰þ õ~ålº9þfvZ”¡ärzϦ>ý³ÁäÌkƒhïô2Ë«I_´º«´Ò-ÉŸù\ä+ ;±ã 4)ÞªHN«– C›5µÕ{l$¡N–š+0…ë|ž`ý÷Wþö5P}dü§›ò/NßegœÈCì¨×ghÖ°eUq…3bOª«<&-=9£ Ä ƒåõœNG,‰NÖÃçcçô=]|&NgnØ3dÇ­ ó§—ßWdïô[Þ*íHê–V¾Ó¤§äŒÚizÃKLRQ"0JøÞº#ó&ÎÐHÖKÈÃACýÝx¯•ª±š¯£¼åNÃÖFxÅr8ÔÛ­çÏÈÎû%ë»_æS˜' ;›Fßè_¯AàÌh¹O¤Ìǰó´ôú”þ¯6cšÐàH–qëzm¶âܱ•BµØl)^=v «nxÆ÷ÓYÂðÔV¿Ð’é4%ê 'N™#‰D\Nùš´ :¾nÅÀ,º* du;ñy•U'#eæÇ`Ë”ë¦j=§ãÞü£òzžº·Ë̾?¬»:rçÄkßd7Y6óèš3ƒ}ydÃí!”¿@Eå‰Më±i#Œ 0ÏàÖpׯÉ“QiãÐî "¬VRVõö{052Éúž—ËÅ)‰ò2ÓÛYƒÖñ“æ©”µó{žsØhðšÃ?·}àQaL´öóŠ$i‚ÃîßrûËìyç³~K­<¸”¸£}L6 ½ê‰™¼‘ðê{³± Œn‘½çUÖ¼G³/uÆŸ&µÛ¹åÞ8ªçmdÊðõ,9=ëÄ0j݈¼î‡Åqù¾!סž®sH©L‰˜ ftáà¼7•…I©í+Êß5Õ‰»¡]{Õ4…ÓcϼÐcvëk®·‰Í£­Oç%XðXOì‚€¸fzí íÞ~iLmPAxɾKÛ©’z P@úÞþí|À°!£Cu€½juV.—W«¹Åj4­]üD\@À èÅŸÞë±TÉ#8zq tbÀ*.¼šâÊøÄaÕשþRýò´ø‚à,œ‚KáxÜúï¸Hõ]ªíÛhØ¿B_êjH§ñy#QÀ?õ 5¾CšËúT¶d0Q÷ÍNé»f#Fqi…Ü… Žf·<Ó¹Fß©-›Ó#wÂG+Ç´È„Ä@ðlDØœÝ „ÆÜÀ2AU`[YÜÙõ‚3ÈÏémzËë%¨ù„í`méXÓ|—qlÚ·ÃþbØœ­†ÓÒ+2Ï æ¨¥‹ŽÈ¼0Òn&êj ‘KÎÞpqñÖ«ÓbLË7¦Üºz9UúáðÓ—tZdxbXØjï´û%´OÖ÷üÍöÊŽUJ0Î"Ç“Ì ƒËÂ/!„†”žõggH@Óñ]×ë_Ènrhù `5»­éªçÄ…\$UV»¤¤œ+PZŒ~q"LHÞXi(j¨‚¢­^á4•4DN¡ÏÍ™ÖfÏè/çCµÈ~ÞèŽÿÄüûË¥WçÜl±àD›€ú¿èùô õÇV\æÝHåG\=d©j¢Pn½3ýç?VO΀…4*"¼ >vÛÝ•KœEñúÒYb…´Üþjû«)ÙW‡lûsʦ#¹Õ+;Þ…"FóöîK÷Û¾{0Nœáùzoÿeϧ:ÝÔ-MQ ×ß$²Ïµ1Y…NOú l)樑«&bKÕ ŽÑ’rS9 "¬AF¥×ñaF+_,2÷PC½½dýo#¶ß/Mb°T05À¨Ä“HQìU(‰XxdóÅÜíw—ç\]ì,N­­üþFæÆÝ9„z~@‘göb .{sß­EÎÊá~‚Ê (‹ÄË$º©éŸ?’'jJmeƈEûAb…×:ȺYŸ® ¡þ„ÁKI{Pe„›à¢d,†eÁ™A1…?"×Ü’°ðFsA‚5¨¬œs©µ]\YÅ(q‡m,‰?ÿѺ¥—²0ÑâA‘½œÖ0©Å†çc dûdIâ±ìN¹ìî” rÉл+|>›I]O ¾m¿l|¿Ìvý¦"3=Ž­:¿~æõ1ÁÞoîGÓµð¨+½˜^ê2Œ;Ñièõ–qŽwÒ¹–e˼[ÿOjÀÛ7fjÁU¿ç«Q’'®—iŒB&‘4Nʹ2c‰eXäÀÓmB0·@)o¹ ø[Å©í¿7_x¼Q¡þUν1„‘3 gѸ>/d è÷LÀ‰t1E$…ú‡±éջ߈Ö_Oi=ŸîÐ2ìr‹Þ£·Ù³¯]ñdüSÇkê& ÉØÖT,Å7£Ÿ|ºÞÁ/‹ËJ#wÓkŠãInº$<±ÿb 8 " îûs˜ÃTHÙk&ÉÞ=HÕÉün‹Ù]Òë2_¢˜ÒrgˆIÛ0çº<]9­Ïöq;P¬h‡·Fî jLÖ¢^8âõ<°N诞Z1¼„QN×ß+F1/J @ø‡Ò‰OL«·¡ó?¿ý2ÿt7M†oíõ™L#·`±ÝþrÎ¥^i:aÈgŽúV}iòé«~3îKÕ‘'鋾 a…ó î+! ¯ØAÕÕ䯾?›2_ ˆ‹‰t#âä•¡ÐÌ$¥‰¢ó“‰-¬·¸PñŒ,ì¹kΧۜf¿Zž\òÎĈ Â\ÚƒÂ;+}Fç–åþ4ÿØÝï7ÿ:L•šx¶¶›ñœ&õ2$n/y†›je‰¸Ñá{sö<šñ—çÎðÓM¹êÐì»-Œóöûcð€ÞJ ¨GÆ+¶@¨«Á€ú¾ýOõñßäA€|€[ðˆX¸h[µÎŠÏj= Š/ˆÂ—j¤Ä)ÈjäF”önqbõuðàâÐMÿ«ÒŒýÕ·¨¾,~V+Ð8¸úø‚ãq"6|Çéø‚ÃþõƒpÊaã)âðé!Þ”ÍÄÄ¡C‘ä“ö†é¸i¼"` -šxÑað~O}©ëªw*þ,Ûô×ê²A’=ìäPꟋÐ^"£Œl3Š ÇçO‘|èeD‰ŠÊb/ê«ùÅo_o»?}s»ß&ŸjEØs{þHc¿ûõs"ýƒ4ïÌ|¾2½Y2ò¨t´ÚT—ïÝ쎫Â,ïÆ9Í!/´Ò8½ÑŠ=S©»GHfÇí9ÇGmù¬²ªÄ£ØÁªB ÅÕgÔTÝß(©€0,¶Ðbà^æqÍV$j’[ÃKˆDb¾¨ß‹¤aÂ;sq-FÝ’ ÂŽrCÙ–ßPÓ;DÆ5]´óÁŠ˜uçÈæY0îl]BlÄuºÓø©o\•o¦VU¼@)9ònV4»ä¸þ±L”‡ï-8 Í^—¡žº³Óª¶o]'Řp¾õ¶Î˜ El¹T‰úc\ÞãYT7z¨5ÚïdȬ`È¡ÑH)Éÿ0üEdl÷i»NnĬN¢¥Q²›é@G+D%D©àåù _ÊŪ¾´Í>VT¼m´§ÜhKM©¿ìHÚ”/$ÊUs.ÖYÞæ·ˆ 8õŽ€›Ï“ºü.Ô´QrÏŠž êÕ#±YT¸)zÛTißw;—ª@¹¿‡üzíg[ÌÄ¡#MFNÃ!<5•”Bµœ§¢Vœ÷ æ%Q}rîΨ¢0X¦ ãÓ‡[®P–êŸ#âwþé^„ß -Ôdã±Ó¨ :)TÈlU@½bÈ@Eð!¯bÜ—$ƒ&BÚѽ8^a2¤ëYx!–)²í¿¨ÿªP†ŽÊ ŽÖÑ|ðöÙ}Ê>$:N2Š ¢y›O.ÁƒÌl³7ÐÌqk΃™ÒûÉøÞ¾»¾]jç#؇Ä}¨¼K)ves\áÀ3ûÿBch(/'YŸìEm«íOƒÅpZÏu‘W&¨YC× a+*oèþøáÚpª¡"ò»þ‘v¢näó6·+-AoâH!Š6ß+?D!Š“$×N¤ÆÏEjaü‘¬S_¬ì!&ÅŒäÛÉk·š…·ÌãIpðùçØAn÷Ýçœå>èÊ`HžñÍÁ¢ò·i) —„c˜n€?ñÁÌJŽ\܆ƒç÷øÑ·fù’ês7è¤q‡«ðÔùWNm½qåõi™ §énÌj÷ ûJ»ÝNl~8ná€c™{Ûï»eÇo“Ñ hLeÅs}e)z›úD¼÷¥Õ| ¯Æ{õ& áäzÉI”«˜EÖ›<¡íüí¿åñ’¼ÔP‘‘ÞŠ nÞ‘Ü|tbðV¡*Èüksq©í~ÿl šG"••oÙ²X|ÈàaRzJ¨”'ŒèXbÙü?[èo¨f@bÆß!|÷Ê«¨qË ›.Í„F=,òÆdÌݧrÑþÜScqýOy}‘¬«C¶|qgÆåËúÀØàÃÁ†×µ3Bª …¹C¶ dÂR{[ÄàJÙË{m‰™’×¶{4«°9öóëM³»-#\ÅÔî{õì6§Ã!¾X©˜ÞyE€_—Óv\Þ Ð`Íûßþí‚°þÃû“^‚0‡ãQ¤4"';°x† ï‡;µÄ3ɤ¶{QTÜk·¶· Š#½,òzÕ³~ÄÖ>ü^ÅI’¨K.TˆÊ‚¿GT>Ï¿« sjƒ“›®SÜOSïÞtuÌ”¶óØ®zòp† À †¼Jï­ýùªç£G÷^¸ça.µ6á¦2¶÷Fg!#—Dó!Ö,âòë‘úB¥rñiœx€®‘$E]Äçq92&—»ðtÇì/®fí„É6»ÓÏkÎ|>¥íæ´š¢Ù?MÍ:dY Ü`ôXŽ,ŸØfõ¶Cs×ö¿È0séa®˜Èââ 8³Øñ(?%D‚Jé6”l—zí1³Ž,³W‰4ê¨0hð=¯™Zóù=óÇšž2ŠÇåAÁ>©TþÎe¶°ªHj•Ÿk݉LJ¬þq2tðÐC§7ôiLŽeetóÅ5äí Ïä"’È$)‹ùQ$*ÍÔJ=uŒôé7àLÁ €Í„Þëüox©Ñz\â=Áèj‰ËãA±÷ÓýÓ¯v£Ã?o¶85‘A÷Ñb¶DN¨ŽJòq‰Þ¬R3KMOSkÈ Ñu{42É¢Ã=— Øí·çGävºjñ…d …HÂ|YôÛŽSÖ´93ûbŸï»â>¥@Ï•ÃÄä°AŸV™b´9go¹êëSóN}†æÍêrdíõ!£:äï}<‡Zàð|`Uâ#ÄÅõÔêÖrJºTG,amLå±庴’@ÉÎ'Ôzç÷GçβÜáJ, MµM•&®Î,GFv—X"Ÿp²ÿœn;òo§:ABz4áþŽYŸe#Ë)nKÙ?ÇCâ,—'Œ¬L¡Pëó‡P…"qËä´U? r3±}žX(^ufÒÐÆKÓSÒ—ïù:gúv³Í@³ð±»xÌH §G9aoØIg2D<Õ;ë[NýÄc©áÈÙkß4† ó_žÜyî–£«3GìÈAc€.>2jÔªßÏC8ÄÈ:»5EeàâÁ?¶í?çСüÑ6¦ êòi±A,Ãõ¾h Fzþ½FZÝc”@'ã¬ÝygêZ Žu÷¾\Ö§À®+´•ÙÁ`EÄœ#CÈŒÒyÑ`hÝñ”t"s;Ÿ‰óàΗðL2’R Ü’.[sf,…³Fæ¯=8g|ÃÍ;nNYÖú åY|¿5Õ*´SHT9U¼oÛÿÍ(uPÒªõ´ü>xáÿÿö£èH†Ö‹aŠA‹AÂCJR¦õ÷A7$Ì2¯>6jɰ£N;tº+â­Q³VNÿcå¦2¢^­ôºÂ@(`¥Hêüi}ºêÁäùC·ìB¤" ÷`-Ã& ¯õ`ÕØü`ÕÈ–®+¹üÓ[‹/·_Ñ¥Æ„Q°(üBa2là´CÇ6RÓÆNæõ9k³è7ÞžEÉ¿h¦†ºþö¾2:Ê« ÷Œ»e,®X°bŠ{Ñâ-îîNp4Á!¸»»kBqhqHBd’Œ»ÏÜ焻¿õ}?ºVßf¥Ãdæ}l?{?;{-"r8¢kq–Å$ýìŒJ–-š¼õ͇$A¿‰Ëñ§£?â~/¬›Ú7äzÅ`†©gÛBͯj}ŸËä9ó#Ø*oÐ'æJ¾`ÑÅ"JÅaæy?+ÕZI§Š$jè¹&ÓZoßñ$I"àôšçÑ UL„q*Í+ÃS¹<.Z¬˜yº,g2Hö™½÷xÆšµ™PhP[&v=X‚XߢþÏÃ6¨]G̶øŠý<û¬ŽË—íNƒ;å¯Iݾ0ÉÞ8e~·{Ž¯Ìšå~üðù ‡ÁAó¬Ã}3†E’\jîü m5{G3ÌbnýÃ"ŽÐVZª` Í¥¥‰ŠÔÒ/ùYvšÖôÒœ‹í±\>Náã¶³/·Ü3äóû/ÌFWÓV´>Éa2²Z\”s£˜^žÃ‰³nK¼ÉÛ%r­ìIÉýrŠÊ[†ÿ"ÎùwCrS²¤Ã%+yvû½ÇB¨1$äõ…¹è@®ÞÙHÅ™‹¬ì¸wá™±iÍïITZdÍ _ÊÊX¿PŒ=s1µ27¿šJ·•Of5\«‘©>äi¬º=’RD*º ÅQ‡>sÇN°"µ{}87A¥VŠž¹‡Z!º0²þy˜¿éÎoƒÜLÕ$ÚÇ;+n™ÒqíÁ7 ¿ëƒù{Gagµ?e(²¢¥®>`ײµ¬—ÏbùìÎyÄôMÍhʾŽso¥nÕ…!ÐUbE¨TWKÅ7¦Ã ö )èßN¢yÉnƒéÀólœ8<¶ Šg×Ó‰Ô×ד¿ÝÑë> ñðìm½0…¬Vwlù¶íOçaw¨ŽôÁü5᪉+¦}Ë~—n þt€9´ Ó]ÿ<7À ê\G~‡óD~­ÚûØ‹ÃGÿ˜LÉÀJVöN?Л’}!P—÷óýŒ§G ®½n÷ãÙã®ßð;ÍæÃô{´˜tâÖÚ „Òy)`> ‚É^'é×hM<;NÉ­{㑹ˆ1 O8¼ýioú19™ÖcÓêí醃>Ãêq¼Ê2Ç[ëf°‰Åá–ÃÕÃáÈå•Ô°“y-Ά½î9÷FP·ÌÀ¨>ºÖô-(bײƒÓÓ:\Xv¼Ãü.Ïn¯McË@øæŽÀI´ Ikt²XOv NP•xº9A–€…<ÅÏ®7UI!ÑV„”ðÚ-‰DÖ½Ù“wvÑORº Þ W«4VƒK”¬Œýi¥D¦Ø:ò±ÙiªeË=̵î|:‚Rc YþËýU×~XkýÞ·ƒñFÖ #Þ„÷®ŽCw5,q†ÑæJY²{ !§ÌƒÓ»×›Z3¾îÏô|€¼Å‘+…‡Búõ¾ ƒÿ½þ+@ 3è`ü 3pƒ‘åpÜNÔøÒ®y.Î[øâ3=}jç¨ËU¸Ñ¡{*Ï?Ô#©bùô}í #³[L¼Õ¶ÀZ¨ehq ¼§@•±…Þ”AjÕ­A©Iö|Xˆ\žp¬*0ÌÏ¿Ùqá ôñ§á•§1\$ÞV›Ê ð*Æaö–©ŠÛMo–I§ £»ìnád&•¡qdÅãÎÛïÿ2ÔÌ*^=òÄ»ìØÚÀ&Ë ƒžÖfÑäŸvfuý"µ§²^±××¾/òIC^ÔöôšÞ÷äB¹ÁoŒç'q˜<ÔÙ˜}f oB­~Ùì7)$òÂü"vHJì¼õmïò‰lòO»;;¿é%9/¦‚æ„Ù ­‹/w#|+A>æ Áí'ŽÏ´%»`v6)ØA™(%G\©õ.´%ë]]½êaúÝöË>þ&ͯCrÉ’†O+£ÉÕ[Xúzr Íòû…Èð™BcµŸ¢šØtïлTk‚®-2Åk’˜&Ï# û¤/ò…z>A’ `ˆu¶b¤,ºå6?×QªÆ‚9>‰?ãN¥o–äVˆ¨¦3y•,9CV‹ð”è#ÁeÙf^¢q°¹}·/94œJ¨ ÞiÉö›s·±ûĶ>]û:¶{ôð [ŽoÙañÖ‹ó õ²“>º=Wr+dóæh#bެô›+)ç«x,òÍ¡ãB4²Øž“p3?ßÇ(žs¹.Þ[Öeó¬cc²º¯wé½R¶6à…ƒ€8à˜ßBR?‘sr‹¾Õ78FôN÷…(ì¶ÚÍ µÄí³ºF@"d(YA‹Ø")ÀçqÔý€?€âÍ7ǤuKùC¨¹Rˆ’Â^!ÒJ¬_XÈî—³õü£–ÑÝ´’! 'qѵØÄç;"Û¤ (,`þåÍgýÀü¾îÏ‘zC¡¨…ÑyQ'ÖV`«jîÂÄi8ùÄ“†la§\£)58ç^ïšÖ`ßò‡0©Éõ­yÔg~ïCéû@nŽè|Ð`.-ŸWòΤÖÇ \2K¤R ö¿c4ªü¼…2ékÈ”ZVïš>"í#ºŒÝr†&ìôËl¯Ù,CÕB8"ß(LŽ­L%!—ßãWˆ©`Ñ} …\3ì?—¢½«,SÊʳlL‰Í[)ºÜ»¢%`—8²bGQÚËÖT#%”Q;´®iYmÈÍ×» 9f ÝSò^_;¶ª#Ï%gDJø¬¥·åRy€¡™yµû¨žë²ÏOÜ>àÁ×¼¯ËþìCf÷ a; ØP3ε]Þ÷JÚ¡¶[{ü1òÄO³[ï(¸!}`ÁþTÛÁj噄xäT‘‡ÉøŸölx8hXÃͱÚÚµÂÀ2›uÙ]}ÐbW‡ÄŠ¡!ÚiôI¢ù9–—NIGäV Ä;®îr;Ãü(†ÉGЙrQÊ:U?}ûHìõÈ“¶^*;_‡–­Ä /H¨º›ÀÛzÑq“_†òÁïÓª¯¯TR=¦£)yŽ€#ÄtGÇ©+—è¨ ‹ÕT¨ùÌ@.‚—ïqy££e~¯}·Ÿî8§Ý;ø1ïQ¥ò¬&ZÉ–º’Ü –Þ6–ÙowD}³³‡ð# ¾6 ÃÆå‡¹áÔÄq“aõÓw\šO¡)Ü–Ÿð`+ Ãk€”´¯ß-–]Že’Ä‹+x¬ ‡ÝÏKí¦èؘ¥o³ŒÒ¡u—G3«»¬aOÈÆPÛ¾•¼WÉ+Ö|öº„xIÑ×§ÒŸJD°’^+S˜À9ȼ”6¢ã2ž+‡|ȶÎY ¬í %8F¯˜¤,§ôÎáQÏVFÿÃ_ÿÓô¿ ø¿½seV›Å ý@Ô@ÌÐè`ÛÛ`,V)•˜~ÐÈ› ³‰¶:<2ª~&Ï)ñR42Q…þK1êDc‘QDø{±D+öʯíˆã„¿50£¹¬€±úÚ€2€O0¬å¢`Nt·J@çT N§ ßôyññq€QøX­®ú^ÿ•Ç ¢O ôãì3}¦´Øšõdä„_ö®¿>¾8†Kz@ h\ZjûHŸú›¡1bÐ|û¾»Ã©Nô²£ YRíŠ<“qï9ùAp(œ«}óñÑ«ºÜexx‘ª¨BAë7«t_Þð¾B%¹¿ÚÂþî븤û‹¹kRÏÌBï™Õí(`ùÌxTm9>ŽÛçcŠ9hç+õXÝ…lGryõûœR¥‚ÉŒ6Ùœ;/˜4tÝÚ‹ç ÎZz` µåÃdPç,o@NH­¾ áqEnjØËiRŸ—¾T‘€Ë4ûŠ¥L¹=D¢Åɇ3H,^©Õ[¸ônï Ãî™?JeB ¶Ð •ö[Ú8ºÅâ-çç-ì¼áÑþ«Z]~ôçîS¦œ¼•5¢óªmû¦SG&”“Ìúyë²ã#‡ÕÎÜñz*]Ržuܺí-§éÜ`ôÙ?¶P3ßCvu¾áø*)öb´‰˜®©k°>ï\|… ¬ªW«ZÂ=á`8$½IæüËSÇý´nãßaO˜%Bµ–Ÿ JËEf4ÎbX˜-l“r|ob¢Ù¹¦¿K=&?“8׸Øä¹oU)Á0FeŽÞ•’„¸V/¸ïæÈÛ-–Wb×Yr¤Õô6‡ü~³ˆ²™=åÙõÃldÅ ïï‹g6Z½¢x}!›¯Þ{|ÚÈY[ïLÁ¡Agu\»ìÂ$X YÝq.h …Þg<®ô‚ÏTçðYÙäjnηĄ¸gÛΪtٓߨ M,nOž¤Ü§¶†<>¶O¤âåæ|•0E¶œ”º$n¹–]Íï²²øù<‘DgD¬ Ö{#½f#+î ¬Æ óêA§£´ Í·öûóoã³Hµ&¨C?‡FËó:QÅ”˜'¹§™I¦g`U;U-¨Ê( ©)åŸ}¹§ˆ‰2XùXà µ5Õ>³²–ÿ‘E=6ꤦïºES&öZ¾îJ5_Z ÙveuŽÑ_\Ž3ýE–ŽÜ5çĺïX( úaá@>N6Ñì–¢®—D:€. 6:¬cÙÈèV+·\Ÿ×Ë;œ¶ê¦°À™_Q^YrÆ$Tð–D«Ë¯:ß®§€ ûáâŽ?:ÌmñZÀÉ%!¿Ãa¶¸¥ÈHa&^o²¦ÅCØý£ÎÕ2pÕ®We ÐûàÍù­Ýà£7w÷h1ЧKŠæ¦jb@€SÞ)útþÍa8Ê!ôŒ¥:8$!¢À7ÂFòJýŒì+³'7ݹæòÐ…..<ÜÁ›êäÕÉw0YÃdt·[î̤SI Z€áµKßqr>ž¸ %;EôCÀ)r½êº^WÜ"·é³yJå2ÄžÝ*U¹é©]+) :}3ìUÍÿ úª€±Òÿ^ÿÕ`Ð0MY.awд“þÇAþ´Jò{6§4‚s‚´  ¢Æµ¢aºwÝ!Š¡º¸ÁeÃ{ÝêÇP¹kÅ«=Ï|>7˜ÚvûÅÇeáį$½ÃAgA}``'{@ÇÉûN¬•º°É- F¶S.aü­\)¦Î×oENbòU/ôÏ+ÆÖ~2~JÓC ¦iãÐ3jiÄÜF”*á঻_ÏFì‰ CœÆÃ%5FµX)—+.~š›ê"Ë=ó`#Ñ;ÑâÌÆ6&H*xí)Grî¡®4‡ÖƒáXeÕ[ÃË?À3 Ìñ-wULªœŸÿyÕ¨wÀ¾hÜË/á×½Ûœ>ü0Šz“«.÷[Ñx;“é·ù½È”ùÃdX•mh÷=å\-(’ñõwo•A5 ° :]† 5xVîk= ÍaÔ¥dÏÅ)øëœþ;Ù_xï}ïhˆmœœÒwåx (q£2–;áLí¥ín|4ü)äÆFÈ„€/ŽŽÌ½ñkî·CÏW—ZúëðËò[çÞº¥Ã­™w[,jú§\ÊË-ú‚§ÌmvÐîðÑiâ¡ù$(¶í&"ᄤV „-ÎÃγþÆ¡—ÉgÈìs÷dnVÈI¶NEmˆôýqt­ø–…_ŒÎÔ¯øÓøj™‘ʸ\£=1¥ÒÔŸe^ê#W$CÂNùeNÖ©¥…y¥b¦ LqoÑ?Oï¹ùâý]TÝj;ìPöê­\dø²d½›;|a‡Z#©°™n*ÝD¡ÖÂÆ3–”b\“•§©"Ü\ý2ן=]ô ÏúÞ7&œjµ¦ç“ÑÛknê¬~V§'¹¥9›uNëP²òÚ Í×îC²+vD@ÂR³Á}Yõ¬e˜Éþõ¼ë1Ù*U‚ÅX:¯Æ £1SÄj8LÎJøb^4ý0íÓoèÒÉɧâ×èÇʼnð&p_ñw'#(‚ÖЭt¹½5âë}s|Àr—i_Ž•ÃB:©–ÐfÜêŠ$‘ôk½~Ë™ÁXs,8Õ4~²­Ó€Ø5¶Ÿ3òT§mÌ6‡TÅŒp¼ihAÃ#Q‰ñ¹yEZe ×/\Üúw•€Ÿ˜¬²½Û¡^OxaM®ÃV¨3W$ñ_¿˜„$iæÎaTC‰š]~ëùþ˜Ñ¤·í{5ƒ®R‡~»Ð9†]Š%‘Y”%a-ÄRº)• ©AИYL#ð"Ôéqñ-&‹üñð¨·,º0uÛµÌþ¿¤ï¿9ŸÆÈX0ACldÒ—éÜ@3PŒý„a M‹U€ šqâ8Lq1¸ W߇{õöÓ‡ónt¡’‰- o¼U…ü:–6¾ÂÀz§5R‘Ínv{±8ìÊIÕ¿åÚƒþmÞ†‰5ZZqZ‹#‰U*üVuÊÑYÔÂc¡ˆâ`Ÿ8½—.¯ŽÌé~&ózŸ)mןçz öÎSÄ`²˜VŸ[-‹AÑ’ !¬C.±•`x q¡èÍ©ø€0°ps}.W)Ek©Ç3Á&$WÃ|±Mb¢á›Ÿ%p/m}ËFœ+ŽwǺÍïu<ýpOı{ÖLä½èìímb|Ííü燠AMÿ^ÿÕ`@Æ€L¾Û­eÿ‡Ø€,  Ý˜,žT”5D@)oA&#Z È”¬®y¯ªð‡"ÿ_ËŸ$ y©­”0öÙLaœªÎ¬»‰p}ðPùIQ©^»{zÆW -Qæ·ÛïúÄWE¬ ¸%Öb³¡VjêÀ=?,èüyò¾+ºn[}fDÿZ[>÷/ÏSòBâ§ï^׎-Šû7KjLüèê%Ü¢So´yHßNý¹nmèKl,;&/°éó£”1[LÂ'î` !ö׊ QäšÔ3~a@böçMصøÈ‘­2¹>Qµ U}Ÿ]¬ÄÐG㇠÷óÕÔi ö¬þsäéü~Gü^£K˽g¯#i¼…¬l»—Ç”º,– búϽ\j½ÊU®üêí‡ ÝÅ>µ›Íâ/î°;¿øCT4}xZ󫜰bF³Ý‘ìÈÉu6CdÅÆ$þñòþ‰¿Ós}Ò¦?E·º_t¾‚(Z#sØ|îp„›¦ÿ@‚çÜj®šzÚŒ«c‘“ŒÐº6Í ãß|±§\¼´²Ñö4«Åe³Û±¦çc¶Œ$ÊÄ”„éµÖ*ù•cD1ÝjŽ“ ¥Q"Õš[Sý¼Åª•X2¤þ ‡Ø"e©O‚¾QІÈk @ô0™Ò0þDƒ DäN.xª"mãÕ¹,ºP*©©eGj„$÷K¾˜)Ëlv›#H†ã~lžVéRµ¨šŸuïb¢c¶Ò­ÎÙz‚vl/B#KTŒ\ïpiP,üqDí o ³Õ!n@Ö¿XÏlbã‰kÛ•ìp)ÓiЏ4NˆÔ°3øÌ¤¨Ê3Zðóx³»Ý(-5Ì蛿òf7Œÿ›ýöfè VP‚dHÜ4{ Œ˜·ÚøáK‰F½s¼$m/ÁÊ*I £+dD Ã&µH0¯Ñ(4ò‘†~n"ì5¬âÉ ©)*­ß÷z$²o22²ñx†$5$P:‹¸MK±Çj5Ÿ{p(É×kej¿I9L…ŒæpeÁTJ“ƒL‡ö â½ÿx›ÄV„z‹-Ï¥±%.ƒZ¢“|Ôå²½¬ŠNÓò6]^5“©t³IÙ|„ Dl9ß\>ºòó‚¼~jEQíwÎû~kAæèmv[E*™79¶v©Íä±ÙÔ"ÕòÓnvy¸PM†‹øJ+vpiÓ•sÎÎ0Ì™y\»Mÿ ;òý}¼¼\€PÆ!¢/qCáÃR{5z +HÚÕ_.VðþÇ:©­–ôÝWò>ÌÖ8Ô àlÊEÏEÖ=fÅã(€,@l Z —Ó­’Gã†tåQ¹€/Âò ’Âb".róéè ^Ïë¼kñ‰!!·=:гhïŒVïÕ{ïN£2 ß ^ÉS]6cDllØÁ* ”6dTzQô¹BÄO>½=ú0 7§q)Äœý$‰UŽÚ˜l 9&ÿEX]ä†DnÁ¡~ ‘*¸v0…ô ®Üãc•ù('ƒ}ì§Ó—"_…µ÷HÀ“Ò›°IXÍäÆvŽXÌ8¥¡Çä.#sFÇ5y¯-UDmíÀåpü$EjVïW;$dbyQû€S• 'ÄŠ a$·Âþäð¬0l´²åýÇþúWÿ×·Ìôÿ] ú²Ÿíz]º~X¤2Z²ètÁÃælݺ4ÌÎà¶k+øÏŽ/uä<™ÛïšvºeZûãËo§\ç#ëzܘxºÕw³:½å *1ýXlq<¿ëŽôíýÁ9ë[<´Š&œî±~ÐÉÛÀ’õ²­Ozô:@5AªtÅþVÌôÎçö¼š»¢qöÌ“£¶ývpÄÁ¾tTÎBØrÉÁ‹ûi°ÈGÔ?6ëêx|~é/«éÊ.|ìcR.u’%MOFå® uÎ1z“­¿OÅ'çT="H§jO¤mFtZ ® qiQ?†j¶ó èüÂó¢nxóåÑÓžq¨÷Æ_ÎͺÝlIû׫ÎÕÇHúÔÝÊõkÆ×“‚Ï/ìV2ïB$Ü|^§]‹Ïü<½åï+Ï ^Ôuï‚K—´»8u{›ymK!,*jS|8+*.ýAU­Àð ¹Ã6›‰¯”™îŒ«ãè. Q©J[¾gÅÙnTxES‰ó×—ÇîÏɨccy¤@ÌœrµÕêη'Ÿª7«íïL†o6 ?ѸΠ¯3|êÏ£»¬Yº‡–&/8<š.‘(¤I».þJ_CrñÑ»ZJ×'Èâ¡!:©q„¡ÒU>Ñ(ØñÈÅAõ‹–'ñ{uÆDÒ4åÙ¬}ÈäEGŽª~·¿!/è»Urp¤§ïîšY½ÎM9ýKÖ`ÏwqcCE.aKTrœ±øø.‡èÀý~Kšž›zþ—eí/ÌÚÛ‘h{bÌÈB O—‘-ç²0¶Ù?î·í^—ÕUP¼òQ¿Y¯eiµ`ØÃEÇLm½B.!bMˆê3a á` @6c™nCwP=eôÝCr“¬›ó¦UÚ?ù|JfLD`ŠxžÅgМŠ,©÷ŒçÒ”U3ä|‹”&é·Äóäð“ãÿÖ»†ÖÉXp®éäj—fßhFR›ý_õã iãÕ£ÏHä¶ñ;C~[xvÃ# ‰ÕW¼àÖx * ù”h|€I1H±þ#C#îÑ‹éÎê‰&6ÊW (1[™@Á²íGä#ÉãWÝÜç6#ÒÙLN‹/^”òq fã8qCn´ÁÓ·697òdƒ­ß™ü2G˜‚ËÆðóg¶ÞÆÖð<јV‡ÄÖâIQØD¬+1)-ø°áé´©²àœ™½!¹Le"–h]UB\)‹Ò1 ùñÜrÍ®s~’' .?ÚI·¯€,v{îšñ8»œÙ{#ˆ6èCŠH™”‹Áùý.1¨^¡&82š'áX£Èÿ¦õ÷œ‘-'2Üár©µ¦µ>Àa3µ J'NnaÖ;Ý ò ·‡Ïúu ‰¡$§³?£·øˆÁUYŠEZvkÐ⬖×tÆ‚(Y`òDHã3ú\.Ôà‡ ±ðE>‹vëmÈÍøBC,¯ë¶&FøóÜÚ§øU¯<÷¿É ³Ð ˜‡ãj6¼¢âàòÒQµHŠ¥ÝJy‚9 v¿©êûÍç™ÇÈ…äÀ‰Mˆ´cÍi³ptˆl½”Þ¯ñ²£/A¯/iY0÷qƒ ÍOå­ºÑ 祿D©.’±ÿÆA*à÷c}`™ÀÐAÈQïøõ¯þ7h!ßѧ¿ ß!A|Ä6»f¢‘4rh)2%èL]\’ZÊ/ 8uŽGÍà‹ŠEv2¾ ½Š\;à_ qÍgx“E$uE³| SM¥!„ ‡L¼Káߨ¨ò[ÈãÓl®™Ï'9ab©Ék°£Õ£¬—ü–ìO,¢)‘¯)ovW¢Î‚5\µE_xÄæâÌÿ²üKR•\­ð÷ˆêeFô•Yâ»á¡~b@M/¼^< ºÜG„až[-‡Eùàήw­Ú’ª€W˜_÷˜…iPƪm_#âð%óÚ\°²òCz0¿Mû ¦.¶ÌÚä9ùá÷QöRg_P/Õ¬ïòÌîPf÷~gq~X×¶=«yjs\"K°˜ˆSH5²ðmʽàC Áò8aZÄéÓëaI%ŠmMŸª\•ÖÖøšHŠ]ùÞ" ߊ”|.qH8å#ùsAÉ„F›‚ îõ§+…›4ÿqInLmð/TÐ¥Ø3¿äˆ´…žoÊè`zÛíÀ XÖú"Ïõ ¶t8^l(‰“4óØEG’>W Õ©DÃò?°VÈQýj ·*"ËzŸŸµ³S¹×ö6úÊókâø)#c6*#d†Âo*q4Ë¡:8|Î7;$“T{cÿV ímP ­Tª3½U Éù? c÷Íç¢+‹§üö5V™Þ9‡ÇQÎo~[®LXRûLNà­½‚óØ­¥öVÝã>ÍV°¥UôUÖW›Êl¶¡þK»MÞIXK Ù~l”˜GŸ®^Fåó•H¿ÕV—Ó"ò¦58“¨­¼¸Õ.³hmcÆZCéŠôØkR¯HXêZÜF뫾¬E~[£*aʽŸ%O6üÙ½Œ½a¶Ýê6€!^ý©ÿ¸ÙI¢*‚P„×Ç´‹d™òìÚˆ”BÇkŸÄG¸ ŸÃ¥G‰/š˜èËĦ(¤á¨j²0MHs#L>à«\ÞðúßW±u~#Ã:Ž>|o ÕA éQ§·ZW1–•"D¦ýp!VX+ÑÕ›ä ©Ì ‡ˆLy×FòÞûõ¸yàò8Hiž’LMÚB«ÞU?>z7mDýEß×aržÙa•0¢ðÓ¾7ú5 ¸ âû˜<ßê+8N ëšýæ&¬xÔãœÝf]Æé‰4]î`!ѹ›áÍ ¦!¶\ÎU‘¼E8”©`¨iº¯ØDâħY{{äˆjë·\¹ºÝ{  ûù¸I-çŽo–iC“3®šöNîü@ÁI,‚-uîÖuu™¦¾ÍK6½0]¼T¸ Ãì䌄õA½œ%Ôä•MP­s2r„Buš°±E yTÕÃå¹uØÅ^™PòÕ¥_Uÿk¼½Ufµ¥›)ˆÕh±ÕRµ;=Üi57 á£ë¥lCtv«×~F¾&d½íŒ G%ky6&ú {ä²…¯”„k$C{fÎ>6B¥"]kŽ=}o“Mn#åVoÿ{ÝJNW¬ L.‰¢U/–Çad CÑ?u«9÷ÔÛ%DRE‘¹¨ôKÚOuÐVõ7®œ?}_»G›ó¡äU=e3X6«:>B³ŠµÞÁ8õ‰Š2F¬ý¾Ÿý|7‘ø Ϋƒ8à󥀤Î!ÂïDº¶šRÔ?ùúWÿ‡»‡4ÂïßüÞ¯ p•PÀ¥¥¥jx88ÒåJ‚& ÛI2Ô‡À¹…^@C7|-=ÒùLäÄÖ˜í|…|MÇ.™…‚(±ÁÁ.ôè–s”ùÎb¡–7­ÿÑëC·ýãÙ—a÷î4Nä+§ˆM~ÿþ¯zu¥nyÈËùìÍG²)_\ûL6’5xdþ!ÚÑâòØÅ5Kį¼ÄÉ#e¢ ßšk²—ÔiZä~ø:…Tu;Ë“àQÀ®Ç©!C~„¶7" Ü Ã[-—ñƒ^(uÈ2!JjÁ0Õ&#ûüìôæ÷Yí.öFª£†^iAgê ˇž~0ò\_éHyFFÄ’™ØyŸ”9«Å9ud´Þü™Á e"ƒY—~¾eFÏ[£ö¤fö4ÛÝ@…Šùa>‡é°øV·yü²ÒÉgWâ¡]“§UŠN’„#—õûCÄQ.n|’Ïånìü@"9,`p~¤PUì*B—𹩠òx3ö[ÙìÊû‚7‰ñq£®ô¤ÂÐÙpb„ä÷‡ÔŸ›Òo²6'6䕨UY!É;CN%YT‘îyƃáËZœu±ÏÖŸ­f“×LÝ—Ñèw”T¾ ;5¹Åf—ÆjuY¤L)³”O -@IDAT˜‡þBSšo„zû.ïy]Ž0ù”)ŒŸ¶Þ_HšºÄK¶ö¹÷׫'È v„e‰Ò˜<ÛG¸ÂR^„ÎaË"G\n»¶ÁS‘#B*_¶óg|1«Ùùùû:mófæÅjó›\Nÿ½ËÜ¡¦‡PWÀAv=‰ÀÉ‚·£dQL+zö”FF`ÞMýesæ£126ÇKP‚¶9#ã¬åµÂD'èE„Älÿ¼úÁÀñ5Ïnxлº´åa!KŒ³ )/–°‚¨3»ó´’8€±³äþ¿MOBʺ¿TØñ‚/ 8„|Ä´BNÄÔÝWuxb'vTQ/Ø;¢3ýõñÈsõfvܵâÒÿ«z¡Ab ¢ŠŒEø$Ħ®$áA,ã(‡œ8;d “¤ÏOÄQ(#–PyM^;ñûaz˜j'³ëoJª`)(d K{e¿µÿˆU¸œù[¯qÏ—ÕO£ëŸÑÊ!ÂrÔUb{x¸ýÑ›iX®m—ànKZ‰’)6ßê‚×̈–É=ë~¹'aˆœvÛ¦±™Ÿÿv²]?:áÑ]®ódvƒ)‘óž­û¡AEjèžœ=¤þj¹”ͱŠnμëÝL›MlÚvñ[ 8¢èŽ%fT¢³+e³ O¹ø*'ÇÊÌè±{啹xôÖ_OÌl:µÃÅÌWeM™$Ý}.?GÈpÚÑåIÊõà›>2±ßìÌ‹…ÒXtÞ0¤´ª‘æ.X:ˆŒP¡)'ëZ›emÅÇgjý,¿Ùkq1 ›ï–ÕO#bÚszZŒ£YXëŽÅžp©Ãœ†û€¬¾èxǽ žrܬއ›,‘‡þåv#Q"l¬Ó«°A&ô ”âë”{Ð3Rò}©7,»…%K‚Ķ-D¢‚öÅ𪗯5'r™PŽƒ±Ù]ÇOd²ñI#Y^ù™‡/ú[Ÿ­`8ž™m/óÅL–(<îPÛå½O;‚Þå§&Å$¨Y¿ÙÌQE°¸ à_U ” ÑV៭©Ìý÷úV—и €®Äo `Bk4hYPõ­Ñl ´;½ånÐ:ñrªðš/¨ÎÂÍá²ÄîŠ.=»M•~ «ß+rhbÔK®öÏor¦VRKkN(Ç_Œ®%Q‘ÒËoÝ*XG)¾Ì—Ùro#dòÈœÖ Ù¨""UVó+±†ú»L²ñêÔá×S7Æ8Ä%‹=dî›–÷Úšv~äà¦#v?ÚFã‡ðq¿»nLÂG«=JØø:²²©_»ëÎú"H†¶Ú¸óa^Ìi²–rO‡@8h>;‚[Ž~†IÒ/O[Sþuž>Ÿà4âULôEß^îê«ÃGÕÙ¥ŒÒ†%¡Oùïå É¢>ÇCÄâ »V=µ ÛÑE·ƒœZÛþžP"ÆÏì{-`Îó˜üBw@(â2YŽ\˧M ŸŸ™N ‚8@ê%ÖI;Þ+ã§kŒÂ}ÌXi‡»ØãwXùA 0r" ]EDm÷éã5XØçRå¤ñÅ…ù4]þrAàÖC# ¬k–Õ½6ëh›ÅTIe“õ‹: ‚Ž0‚ÚÃClÅ&p‹Ä̤•øà¨>·]¡PLÝ>ȽTž–©ÕÑu²¶|ÑH2úœµ”¹pÔ‰è«ñc´BÝ­ë²ÏsæÔ"*°ØäiÁ#M‚lÞÙîKšßûd1D©\™[XŒ #*bDƒÝ…>¹>& c6ôzcÈû–*®¼í×·èÓ¾¡É3­@Ž;8KvA :é,àvG¡Ó{óŒã´ÉJÞ.9ÞõM›Pæ¾1$@¾„Çv1ƒ“7·Òù¾!™ƒO Ö@Ί7ƒðu®œµø×—¬°^®ˆ“yãì¦p± œ±È¡ð™®&Ïoy1ýtCe€0#ˆ1ÔµùóN|_@¨¯ŽfŸáàRÅsCZ—¼€*\ ’#§Õ»âGLi´=ëñp,TÚˆE>=WHb„FÍÔ~;Bo—zS޳ܗÐççàÝFö^|òÂ<СÅm•í´jÃÞé¼xä4B—¸ùŸ&î¡ÐZ+‡qd‰E–G‚#¥Òâæ…‹Ë”EmÖ/8<¡œ¸:M±Ž¥´²õa‘Wá2Û¤jØmåðÍ9Ͼ&Kcãco?>‘2«*Ú±û¦ânÓ[^ mÙwGÃtè^µž{üѪY ÈØ&;v½¦Í  Ìæ7¿’~y Üt1[Xúö2bUúäyMŸÅKk pþâF‹¯úu ‡%” ]ä÷Ü{#“»zS ~¤>«ˆÍ`I`A£”Jêwz9,($ Ÿ76O BÃäÀÝÀ C/–Y` 9#Ä@º™õ´iŸ: ½[´üÇi·êÒÕž´‡Š>"Xvä„S™€à‹¥¤È0{$&Q¥fiÌ-ŒêƒóCª0 nõê¥DCplÒJz‰)•ÐC_1á–W¥o¿óþ÷ßh ¾wÓurR®@“¸JÈ·—Fúè`q†K*á`é"7î͈cƒHt%ôˆz{¶]„Ðý“X8^¤ŒÚ[8ýå‹8\Q‚I!àql ƒŒ©`Ê¥pÂ\[ „ÀGRÙóþ©¿þUÀÿùΡŽó{†:^|÷€‹‹‹£ ùøO5¶þFY)òÇÄ/Æá°i—™ØP'‘)œï¶²ÑÓ/)H”ª¢8§øËœ¾ç=v73Ìí²#ò`G½ÔÙý¤êðn;n}[ò]6kÔâò‹9bÁÌ®UFÇhL_*÷ùxQ‰Á>¯ÃÃ<Î3†ÊÍs®5¹\t\€éD], rAr ß`’ê —¹°Ý`Ê7DÁˆ¢œ Q®£ä>¸Õ¢ˆÒʨ®[{¿¿çàŸ±ê¾IC 4/+Q:SBzYZ“³R•ákÞóDö/‘Q󛞱ºŒQ1±òؼ¼¼¬þjV©Ñߊ¾,ØÔnä5„EëÕ/Í®G “4îòv“mnÿl”kK«2i9ÇÃõYíÎU÷û,kóû¬½§z°¦ç…€Ã)gI–ýx’AÔ)ªòƒ×f38ßçx¥ vtd¹¼O%|ŽÝ{BA“¹ØCŒñB¦lQÃZžÆf³ÆFƬê}¤€õÙ¦1ì¾¹–ÐÂnv‘y¿Íò[ï“#H‘EÎ=Ã9ùBO¬Lz³3rßpˆJ&Žðt@ø0d§YÁDÇϲ§^?EŠ-GhsVe’tR·5N‹»’¶ºÂ+F(6ôø˜öwæüSßf!ÄL.d†™ˆ”|V˜!eµÑ+~îõK;=Zõ¤ÕÂ_néì…qåc>|zÎ"l³Áìô{*SuG$?’xL!¿ÎÉ?­úA=¶_z±½Dæ‹Øuy1–tѸlg@`1;ËÉâ×¶»Îáñ—ô88÷n_L ,î5¶2…ú ªÈd =,ÄO@lw±#v\õÍòJ*+IJ㊅¾—¡öÿÖ +:1zê„©s—ÏPD‰h¬ÑM+èj«›S dÃ%ã«ï“xÕÙ>„Y®w¾ÉWÀŽ`.¿Ô ä4ûç竵œØòòºéãÅ#ËW/ B6„æ¾›2¥,RH³„èÎ;¸á”Ýg²Ø¥ÀG%a+9~}†( ¡ÝíÀU¶kñŠ í'OJn‘¨Î”2!©¥DéPSêBÅKÚA”Y9;RÉ^Ú4˨‹^ß×T*~‰8,לD¼ÊŒ[m—·?Ž˜±a ¸#«Ñ»Ï±äÎԣݞÿ¶»}J˜¬ºÞaJß!aŠÈÉ秨ŠŽ1àbaT`H»i!ƒiŽÕH’‡ßÈä³ï½üãý·½À?wv mXJ¦Ô¿Vœ&[Éæ¾7`¦þiG*p0Üq•TtAÔ„£Žä³ßr*â©"lRÐåqH9Hܦ儈lì'À§C÷CQ©ä—.Sxè>ÄTMn¾;)U`V3.a¨pÈÇ‚÷”ÈÑëKȆ=ú±KHŸcˆSªŽ­d›:7]ŸT.F÷¡ÒJe0Y±JQ¤J "$2ôÏc4,­Ü¥^Uc#©(ôÜy·Ž:.Ò¿Çè}w6ZrH¼À¸â…½ð‘MÒ‚L—ÇÉŒ×ÔÓú ‰+ð M;î®sÈêeñU$ˆ¬Rbû› ˜°¦‘i•Ï }ëN>xwÞafƒGàT@ýKrÔ„Z¦œ'GŽœ-A 2ŽœQSÍ%軎ïÿ³¯ð¾èÅ„FÐÁ¸(H#*Šžæú‹Aö€&ä¨#Ul›P_hpYa©ŽxjDGWôÚ©b‹Mèòl ¢è¢7e@(ÜLõÓìúGÀ3}¯«u~@$ñ<|br†”¦2²Š@~¢ìh®UȶI(§aTøA dë¹#;À«SËoXób<ÍC.$Ëßö,Y¯:­~ûÍ3¯ÑETÊÒ±c‚õ….NȘëËûAQßlé Ÿñ#ä$Çx×ùús‡¨¡±|ME¤Ëç_ßõ¡Ã‡DçJ~·OÁ“åçÌý³WŸºÓ=\KµÞ€ž,Þ»,«Ý…AÙIÝÎ9×[°¼ýÛùMv¤ß6ÿI—MÏtžq—ZÎm~{Ƀ>«Z¾@vîè‡õ¦6Ûyj0áuë×h.%É;ËË“Ï7bk&üœµþä”w¯üspZ§]ËïÁ”Q¯Kã0zødíÙ 3ë|L‰¨XTª§Ã€bu‹þïµX Èܲ«Ïã[ãõÎŽßü.oïcÚ =òÆ_­»º÷‰5O&Mèž¾éê|:(&ÓbòæÅia³[sÒµÖcëíÛtz•¡pš$ô¹A2¨ÁÊ=çf„E’/æ—€•†Œ6‘uúÒ'ZÉÆÏc¦Ö8M”¾¿ n-ý³a&OpøhÁ”AÝ·ï9’I½m\Po°ÒX¤àžç¬K:ÞSˆ£™nEõøª¹ß^{™¶,°éúLºÑ:²qØ_ÑMZëG¯}v‹=A¶.(µ­¾¶ŒÚ@h²Ѐ&¦à´EK¥8=D~@¤¾¶&Î%$Ÿà–…(¼$6 †²g*YÄÊé–êrV_Þ‘•D6!tÅýÚ(Oµi6C3¨#âÌì­ŒZØôÌœ»]²y4êD}Âoƒ›oiýÒà%Ù-ýLvíiuOÚïkjc£äåó‹‚š=  ‘s×} +PíÈ$æÒoí›v½òâ4¸¬W˱Gžmâ*0yRlÏÝÿd5]77ßzó–Ó£¹ØdŽ 2gLߤ>Œ8…#6¶#ò¨9ÕsTóyÙoÓ‘ã x!ì¹¾VÈÙ+,ˆ5¼I‹¬³½ ù­fïjS‹?FzI ÝSPàçtXž½>I7”GÎ]Ï¢‹™OæÈXuj6Æ“ö+°Za ð–šË§ž.§ü®BUP5~† ŸèÑ«(4tLu¡(Û§#ÜpwJØ÷R–ÝòI©Ç½¤ãžÏ®W!¾gÏþ-L-8É{TòºPÓç»Ê ‚(‚¸Kъͤ€ËⳄpŸ)‘l=»œjVôÓ¸=O7ÒAB1ã×Ç׎iú›¡H'®'O˜×l¥K ›YÈ,néþèó»{ub*ãó;Nè_k;¥7‡,¹yÙÖ1(Œd‹Ñƒ‰d ÑœÀ"õ¬ìH£Œ8ÿ±¿@ÿ^ÿá ü¿ÖLÈÀ‚Œ@4 äÅ"UîD b -¥Í^™WŽ%òü]üŒ¨Ûù\AÚ–:@ uŸ#HœÞ™ IX-©ÒÜÎÛŒnW9mí¹Í. ™üßOíݹŠÃ&ÑHËE1ï‡04–IÍh‘S8ùd %k!ËÂp”ÃSÙ mZUåO+™Üc]ùTµõ•kI›ó(Z˜ßlo¹ªu'´^–0‹íÖãOÊÒ˜…dJ“óY—:Ñ[9ÉÒ&Oy2FX,G=&ŸDGG,’î5rˆ>Eë0·þ•è`Åò’ !×îôÄs ã”+2úoÃoüÂçËhý¢|ÔK›\vP>/]Óá2ǯ6›mµ’«Ž¯}<$³)c%é~Þrv]q‘EÈæ´Ü¾ôÕpX»XÌ ؼþô²w’³y^á 8QJ5UÒnrôÏÃ$7Œ ÕŒÞÙû.ÏĦPÖ†Åf#}: 9to}GE<Î⥳õ%×=»[6G ›Ôr‡¤‚ Èö)Òí}¸Õ¡÷q|®™Œ0lÛU¾~ä»EVèá–'#å™1ÊD'(ÕL)ÊþzÿèÈ—5ÔVÆDð8DÊ×ùšóC`v˜&ô:a…Ëá4ŸxŠÁRµ(Ñeó¡'êÄŸ—¬;?ÓLJKœe”púkCIˆ -ªÈæý'iŒÁ‹ƒ­Ì¥WfPÚâ’Y-Ž^VŸ(ëg6Ù3¥åV¾@‘›4¢Þ™&æxóKK¨½jÅ!e±P¹…£%¬µÈí± °)ÿäë_üîòS¾wÖétx!•JÑ„xüøñº†8A2Ú‘I}|v0>N§7þîwÚí¬f;eL„lA›“QRÔ;Ú"IÔ_ŠàóügâJrg)Coç‹YѦÄlÁ¥ØA©³ –`´¬j%Fƒi­wXÅ?ä¥*IÅYÍ,ü«ÐŠœ‘L«(‚Ãœ×æ\²¶¢¾Ä7¶öù_l$i6fì¿nvšÙogúÅ¡TjðÛwi1?¦²’rW2ÕÓ Ú“…u/UW—WøB"¶»0P,–ú—?|ïݹµ+/?9EËëÌ cÉñ¶ÙƒßæjI´œHW§uµ‚‹ŒÞqMv”ã7ÖòË¡]ì+݇ê 5l\ƒ9×é4)XÜx±P÷õEr”DÇ4¤£ßŽ(`×ë%28fÊ5'G&MlµÈëð+}uø2í¼~‚¨Öxgì‘=éÞû%1 ¾Ü¤XÕî:Î{P8mê!×¾ðAï…ÎuN½Œ?fƒ±%1ÒÁ;5Ë3û‹ Zaü7«]ÅK.ö”J‰,Y™«ÿˆŽD¹a_j§ËÉé§Wu8UðÖÊ‡Ü §Îh`2äA¦ÆíD#5îØcÝ ³û¢¶8b>œ‡Ä-/Yý¬ý¬ˆ3!>)ǽ*¾Æ]ê¡®mL™[½M }>)5†¹áÝO@";¸ÈÕæz-ÌM—²pì”ú«²M‡h[Ýs߼æ÷><óRo*»!Ú ãÌdh£ùÊ "1CTò.'Ì zL¥q1òYço°7¯\­6õn!Èk4’Õ’ÕšlLë“ü$o֩ѽڬ<ò¶L´a—?Ò<¦´ÆgÄÁ¨¥šA›vé’uæý*zxY0ó¼‰ö¯M{º2®c½!ÄGAF'ÉuY¶çܬbwH"‚ á¿ýâ¤!èR2¯éÆÑ'›fôܹêÒPÌB/z)I俱¾ âþHr?ð|/ò ðÇ×Û1çx½±í·lº=za«uï_æRý𦇜ùs •¼øg <¾ßùÑÉ…¹A&_f„uæÏ»®¤ÛÄ"Ckgï|½˜~ 6Ž€;ˆ!ß+æAݲdˆÏ2ñ¸¡­×Ë9šÌk½póùý»æ; C1•ÏmÉçý©m(_|n®÷½oûè;9/^e^O×åÕ²=©&€Aø²& “¬o—[걯Þ1ÊuFí=”;>•£ñXQI7·„.ÚÀú ö>_Dõ¦ ´¡•€/êÕpÔ‘«eÍ—öVó…L“Pî9¿Ù1=N`m¬õϦõï8oõ¶´ò &†ËÏG„€4†¶Åm¹ßø4¤ïE­, Πö£öœÉ¦Ä€-®„4'b4¼£ÿÄg°exÁ  ‚F¿r‚µï-殽1jãðÇã6Õ[=fÝ´}©­A&¶—]_Öà’C\ù<ì8 €MpC¾BvÝVÃT,¢AéÝNL<ùÓøAk\Y ÝL ÒJúv]&5Åð?)ÔbíôŸ÷ä9Þ{ÅÆêš¶‡)/nýo«î–\q%jO^<$„Eb†O¥©Nd>íõ¬Ò°>iBBD±#Í6‘1¿Û|p ÑpL<:tº³!bcÓ‡GÕŽ›eÑ_?¾ÿ+NScÛ~”BdJû½t<$¨£ [›ã·†8(ƹԂùG_ÿ*àÿpû t¡qÑxag$B£* ¯7oÞ,†Ú´ê‘-¹,¡ÅéüìüR[’’c5ãJ?cFË»F—sÑÃîËÛ‘0%¥¡ü¹¿wÑyßÊ }ñבÕnŠ™³Ó¬æË×µÞõÕõWrJòžkã@Ù³{_ÌØÞÊ`cüM%_=öx“Ñm·­»>4½¥•Áá¶.¾üËøfk6>ž –^Õ¤ ìðîð’ãQlŸâ3ûµ[zàÌjó²¹ÅùETô€“Èè±,oóLÖæëßGòâJ½ùa…b÷×¥§z§Ézh1nâ/‹XÎ$q˜¹vÐ߯rŸ7SÕsœNbÏîUªŠŽžÚñ(Êf]ëŸ^ã1š™–‡£áH.ÊËuÃþk&_k¹¢Ñ5WÄgpQžH;ŸC.8B¹Ú_H&u[¶ãŬ´ºÖÝYN[Õü¾Ùm\¼¿!‘7ü>¶·9Ï×?˜4­ÆuŒlúéÖ Û™u¥ñâ_.8€Ï!zή Q¿²‰0‰•a˜Ú ™Sì`úÁ2ƺ B"d¹ù6º½rE¢wßžLíŸíâx=†]'²M÷ºGƒöÚ3ãT´Ö{ÄB6ÐöÝÄ#ŽNj|@(9\Ä0-ìvL¬ŒX6üêÇÏ_ç<§÷!›Ý’Ök¯0(fÃSpô+¶z$¶½·³0‹]W–C,fÜ5µEuèD"ü!Žåp&p¡Á«X8·ýV>Ça4YW<]hÊŸ?hSú±±t.²³R†VžŸ 6±å«¾(9Z5éHÈ©íwæàæú ‹Lè´Øò“õƒ.'?{ÀÕ$G’•¨KFåZBV˜Œmº:^Îq‰ß”À`xÓ‡œ²gPÃ:êzÒx2Î…%5OŽÅSî!ïÙuñÖCóöŸ™Û½íP)ƹ/|W¤´•YL¤qVt9d¹©|DõT€îÈ»^nÁk§Ã-‚b„mWD±>°!¿4AUkl­U›vÈ kÏØûŒf¶±lûÆN“zœ_z¢Ó¢îû\ë1¦ÃúÝ,-ûX']›‚¤ú ìéÝš<}g«*„¯_-*‘6¦ËÄN¤*A³ˆHúµ£Üò°ïÒŸ¯Î¹ê€Ÿp‡ßÄ«ñÎüT¯ÅçÍ’$¥'´ôº¼’´wËIZim—>>UÖdu»ƒ~/[Í‹_×ú¡<=·Î^K$ñf5ÍTDE¼~«“jåKF¶¾uÿªÃ7wP• +SUÏÄxSwA&T]³þÁ|rf×+tÁ´þD&/Ë5ÙŒZ1­ èTiØù[;šÄ¶¢ûNÈìfÙŽŸVŸÕñ¸Âè_„èxYž~ãæ85I ,Ì!–Ÿ†£]ÖôÏ€$´°Ç}'û}úÀYó/ÓhÈ- ZÜXf¾8ˆŒ•Bu-3DÜ9‰æL¥ÌêÌ•É1Îo¼7DÓÃ,( LŠ"1LBNž›…'άx ]+W]D÷s±‘u?æ±ü®qó{O¨0V££ÂØàøÚcEPŰ +´-¿‘]\NPÐ,wÌËo‘“(ØÄo|LC§è’–Ì?5XÔüú‚ó#ñþè6ë'õÜ Àâ#cTó5ˆ$1¤æ­ç³H‹#¢­&`aßIƒÞåŸyý«€ÿó}ƒÆýþedX|!Fä,¢”DôS+)‰`(õ¡‡IÃΰI }.©L Qû9ü®rÂ…9fù¥ø+ò¦¹gÀ$›ÛžŸˆRøC wx®râ|£±A&Þ+®RhÔžá G&ˆ«‹‚ ˜ æêNGÄ0¿Ón@ÃðÅÈ8Cìõ{„A×ÉæXiµ·0%ìD¦Öé“ͨ³9‚ÉB'ÁXO„@ês·éÆnûÜnŠ™½¡÷~®žFØÀaѺk ¦ÿ´×ïrÔªmÐCl;ã”Ñ:›^§w²"Tnv«Ó2žHÅdÙÜO<&çP +(^Ýè%Ÿ$rƒRn˜c²å‰#´šl.ˆ½zêÊÙÁ-ÖüöÞÂ;Ê«k>ãî)Z¤¸K°âV(îî’ Á‚w×bÅÝZ Ôh ¥$ÏdÜ}æwôYß_ð|k½]«w³¦C2sßçì³ÏÞûl¹¶Ô·ªóÉXle«S¾O&N6 é~b“¯>_xòež±t.Éû³Ï°†ssg}²J\£;îåK™AaÕ¬Ñ+ ß¹ë©Â6“m&,ì»>às3í]A» u”„m ŽHéö8€­³àF㥃 Ö]é;cÄ ‹ÓúU×­‡Ìjy†Ÿä_Ð7ß Hç[cAÕ©Íwò8Ñ[¿…{ŽmzN¨f^ëC‚°ÉÄÎó…¾L?› ¯^„ë±¹äJeÈãÛ{'óoý:·ÿj‘]ª"2…xfÚ*SØ`‹²]¿pÄì×tÆÕ§;qG%ç‰'"ˆÌ´®Ö§Â×sîðFX‡óìÝÅ|m8Èü«,&v-e}6ð­:^bóEö`pÅÝ^l½zÃÐoD¥j]$ZR)apD6$d%©_»~$ ýˆÐa üNÅ®øÍ%ׂµ–·¼Â’WÞìz޵ 6¶Û,-35\šÂð°6 ¾È³i²ÚÜXu¬³ÂEYÎG:ëÚB7Oé´sïƒs‡¯Ù{$}É—‡×ŸÇ²Ä ¥Ϻ¼ÍI™\#`~Æ0ë—´¿Q!ù`!Ußþ˜ƒGƒM+üS‡e u¢YíŽhÙ5BŽˆ’£YØî8OÊégdRÏuû-Uú)†¡Hc¥¢_ް$={€ÜúÑð~E¸š [m“ù"~`„£â¸Ìb¦´ðWâ%›¾F ì|;áD+T? ‰=(G¡5LOLv¶1„ºd!9ù"œfVóY¹×–Ó£˜‡L­ý›ÉÏ /_¦0éÃf«U[ |Áõ/Ó{l\ûp½uˆ,ëwV/ÛO­XŒ÷Ÿ8Ò”*ßn³NɲUØTÒZëû?E»a4ƒEŽ WzZìF8í _ý´)¼†Y· €À»ÆçÔŒe®rm<ê¨(³éà5žÍWªá!ýd|ïE,bQ…C ³zïôúÄeÓŠ“Y.¹Ù‰æ¯ß#Ðjÿ*+š?(Ý 7x(jl‹-Üx°™UŒc·Ëäʼ\¦ïÈù-$jŸ UÂA““à ×Ôã¡'®åQk@Bl¢J["Šˆ˜°£Ä\ÛNW?°3 ‡|/Ò®0<¨U 1(3P¨MŠ4K”×µV–!9ÜÌ)È6•À1èrW‘ñõw¦ÔlE¹KLŽ<ÈÝì‹~÷áW™tmÄ_I”^„mÌòwg/î¢ëîA;¼}7R£VŽ2ž,m±PÚ žÀ™‚è{ý«€ÿÇKgr‡Tl`»m#«—˼yÿ‹LǤ¶€.4þ¿L¿Q3víð»ª×lô’GŽ*¯‚êZ¸÷øä̸îúÝ(‡V|9¿Ê_âcó¼8Ž q1©Á®ÃP›}>„aÂ!ÆþŸöboïHûÞnr¤ß¾qLé¢3óÀëË[^ …™3o´ßÔK¿å»i”ã!˜ ž"dN³ šúKNw^Óöâü‡s:Ýc#UÄn ØDâ˜÷/.€q3 ‚,ØxwÌÚn·Yl…ËâŠDü ± ÊT0çö Üßc²™xÍ•þ+›æ2Ì(Ë’°5>?‹Á‘ûÝ&„ª@Uâ)TñdqÒÚÅÖÛN'ñ´BæÄyy_þgá¾FË:í_û|Òš^ß™œ¦¢:ÓN_Ù[‡E‚)×$QxËަÛOB¦ >²ÿN.&‚}v6T2wPÞÖÛK0Ú]nXܶŒ'_¬ùvC©G¿õUÚäf‡·±Þ^¡¶óZ6ý‚ŸŒHÌ:ýó*úu1™×3“nddèü2cf‹‹º_Gc†ºŸÕþTúûÜîGÝ WìɼL¨r¬9>œ~®ÎÁÇ­_yu “—G¶"=ª’Žmfû¬ü[YTBU?+ð'iÕŽ.RŸîOYÚâ꺧Sw޹=cK+_æ\@ï “ÓöB¬ç_X“kó·Y »o\qhàÆngݾ5í¾X Íø¢aí–_?ç…ù5U5JLºxå¬oÒ胄½Oò›¾½sŽzl¤YRÛ¥ óSbS“kü^ñÓÄn³˜Jó±s'ñˆ#w ñmýð§>cØnЯˆT¬ìù‚Åu‰XªÕmv³‚AVP±¶õÙZš¤ŒF'ÅB-H'³Äg5»_[ÑXoÑI :ªvyyE¸kÇמs¡5áãà4“Ÿ±)b¡e)¨z_Þó ÒÕ™!aÞ·=±v[ÆÜ¦§ø«Tð‘*ƒhMIþxý„Ýu¶:Q žF'©,+¿ôfñt•çB¢]µxªt#$$ç û·©D«\:ì¢Õã–¹+"Ã7¯¶n=·ÿ˜µÝtH 9l±jñ}üñf,õfãÕGfÏX¸}Ç–±C¿r3"oÍÔ¤‘Åoÿгþ`%ò¨”õ×z¦÷º¸àô— WXe_Üs¼ÏÍfsšz\ҵß2¬’9m¶Š,›"TD"깃ï›ìÐÐoíUй½¢ëÉ%Ú!Hº(íü‰Û9xbÏæCn½;͆’û½'r–u8¹êÒ¼Ö×Þé½·ëOB.O¤T—Ú æuØÁNòÿXüòáësDàò¸½~ ÜÝÝHõ›äŽ{´üPG<Â\æ5:³åÕðE}.m|0€HhúÕß23íð‘{ãèd!^\ddËMÞfÒ}f“Óó(à yæòbµüàá=TåCS2ÉÔ.“ö¼Ú]i2Hø"€Ò‡BO"³G,±VHkð?³†84 ×¾ó[àûãï3íÉ\6ì¼û­å×OÍ ÌÎ*Ÿ;œÞûĚˣèã,dñMþânõû”Ní´ÓŽiAÛµW; •ø\–Ól‹¤ød±ïGÂïà°†Î>ÙEå3ˆ8¸J耙¤kÇ´{o¯‹¥Ê :R†# &0$Õ¿ ¸ZÈüûò7زÞÃ.Ƚ<†‡2øñ6p—™L›¿öà³e”™„dÛ·é<¼Á>q“íÏ7•A7‰~»Jž³“‰ÍWEX¡ƒ?Ÿ_˜¶eV˽5¤Íȇè;+üø®!éüÓu“·¢—öÝñ|6†FÈ¢“Õv"‹ˆuÉÌÒ§JQŒ¦Ä–þ°àUªXž)nKÛb9·þ¾>ܺ^§mûðûv›<,‘¤ßnB"M¨F)ü‘¸â¨ü²Ž7, ó<^/‡Ã÷yÜ|Üÿ9Y†€-ÔCMÖ®›0j—Ï„9L—/#Ò”ZLRÂr…mb„¬×megöøÑý(ìà1õhÞ–°ªË·Ÿ¥´ŸÙêt¢¦Æ¦G=h@ K%ò©‡mGÖÓ#” —”9 (q˜`@. u«šÂnòþT|Qkl¨Y¿.Ó'Ûw{œH-kákì :‘ªp*ÐqÁ5CÄÔ¨Ç*°T%@4p‚+æ0EAn…Õnóú*R¬Ëú+ËðÅ€¿úúHI%¯„‹ž¬d¸ ~$€€†?¶z ­˂x@Ý,z¬+ »˜;7õ€†ŸÈ FiHr~ϬîÀ¡ùo>üA-é"!³4â§DÀx0É»»ˆ}ØÜG]Vw¿·êj×­öŸþaµ­ÝíxžîIÉÝÑÂ4ÊfI˜Ëò Ft©°5à&ÖÕOµy[ö4’§Ê·>ÛAØÀ¥¬ž>î¯!K®·[Ù÷Üês£Ww¾d7›å¹Z¢(Õ;´²ög…*Z8õ\ë­íŽå<úzÛ {9ÍoóÃèŒÌØ]ðP¦D×GK‘#’FìÎ_ˆ •ÝP¯Z+`:@×Pˆ“»ø:&U£11²}Ë„üF_z«³œ•$¢=:äPÖvt¼RK×~倽«¯N”:86Ò°"EýùØÖéâ±ÞlÐFâÄïuÉü¦cêNVè´¶€ÿð½ —ÏŸ¢ä‚-ÕfáéGyt„Äd÷*µ|;R»wѬ`ª¼±§\dr§µûž,Ãrj?åĽ ‚þ·œRGÄ÷’6¥Ön¸èåR¬×Àk<ž²êNç‘‚ðûsÏÏâASÛmUhj|x[¢”I$‚PeÙ§4pìѦɽwæ;ƒºÐ[UQ_—…|Ì™{ª¢Øz·>ýâÇGêhuë¨6ØM“Ú/aØ’Ä -qúò‡Ý1C;º=å3Uù?5’Ô)*apùýo¥ÿsÂ!(`q´š‹E%TKÿ×óËÕéBâqƒŽ B>i•—#äŠÑ»M—_lÃãÅr1ö‚·"BD#Jû{k'hk;бD¨D×OW0"¢ýcþÙöú¿×ÿ’èÏ£¹B€G¢§€“p° šñê V$Ý3( ¢Ç Öàæ¥¼è¢¨ë^™Hë÷—M¢ƒ!6H0‡¡«9ØUe)O®SÃýÎdDJË,F†çÌãéÔ¼e‘}h=ø®“8«6px*Œ•³Fþh“VyšÞa ªälìNP* QØàñ;ÐI‡ÖÄè”·qcåÅ^ý䯪 ?š|6N¼H#M0”+[^­ß8Îh¯±8‘ÔöE)®2Ûž®7$ºX·Ë-cj–ö8ÅGç Ú²S.vÙ7ô£ÍæMÑ%Œ¥Žˆ¾~rLaÉ_nsP§ˆYzµ1¦ó’Um¯ åòÏ‚ÂYƒ§bÑ€£,§PÿVÀÊXÑó:çUȦ×o)ʤZÓFf?±6o”R)¤¤ÓöqÄ2ê.Ã?¹$J– ù¸´çºuÏ–f÷¸"ŒÚD(f U§^D¡Ê½Õ1£÷‰ZÉQËÖ¥KQ"»ožÅgz7Ÿyý?ù&–þ›Ûú7Ÿ~ùõ.,(N~µ¹}2£*°¾gÂçÁ$È?ò8CiRØpÌxâeIå¬Jcy(Èø¾ì›Õã~^œ¶Ív …ÛõˆSê$aÎé ©œš’«"»Û‰2—C•°zÞ=Y"gý;7¡`gÈ®Œ‹YÔçàÆ'0<ä½ääÞSL1ÙÕñ7¦ÌCu…-ÊX•‡[Q%0<ÌA|Ú"†‹ùõâAH»Sª9bäÛíðíI®`JJ“ß?¾\7èa/é@÷ûV¿›-B;Hq$Àü*W°äå®’ÌÓ퀖;ðØûÊW„?–ŠT.ú]“¾]Æ?¼'%H´Q)–Ÿì3¯áYâkÕj‹Ó¸êö¸EwÊeÑh%‹ªmßøÑý°ÌË.ŽÂ¦÷ÚùÍ«|Jm?ɲÿÈ7k ÌèÒØ¨âYÝâñÑ×ûðבÍç`ðT`NØ=´FàïeR))¤ 3‰ŽÇŸ«Í_œ4 Xc8¬ÚuÔ¬4“(v3J| z"@b¢‹€,!fSÀS‚€é¡ïæü}Ûs¿ž¥c=ßÍÐæøÁ×£× ¸µôÊ`ú]Xc\RTñŠr)†„ÑÆQÕbfVä_ËÀWæwÜ?ëz»üÎ?Õ“4¨¬ÔóÒU7ºç~} åXHÙF(‰”§Rð`¡ƒ(e,¤á:<7À@(¾&'Ùsyÿ‹å¨8gki*½‘xœXOI«‚q €Á×YflÛ/3«¬åð!àvbgA"„èƒ …¢jO<‰Ùpcþ|hÐùí6l>·˜æ%4R ŠM 'QÚÂÁVŠ€à!ËäÉ,H£ÄÇD/i¶C«~n‹¡óGOñ•{¥Îäqin†°ÿ;b[òv^œìµÙÔQÕþv ø'ŽÜœ‡oØ`_Dc ŒX)PÞ€RíK^F1Ó Ÿ3!›çžŸl¶®)îþ )EȦØWŠõâKmW¯†æ¯6ð}<nxøÉ~*PJÉÜëN|\J%„ʈ5v³Íêx~]›Þï%¿4Ž®õ±âÓœ»ƒ÷ö÷ý\ñgþ›/|uê8°#GwÈ«½·r¨Ü95OlC;)Wà4—Ý§Ý 7È»ð|áÞ÷¦Ÿj´`ØépY|ÝØ.•&}NÛ_xÜšë{šÂÖDQt‰þ¹*4ëT»©=Ò÷üJÓjòz^¢âÆ¬Äæ/Ëzøenûëóî6IT¾ñö*ã`à³Éô¦‡·§ý¾¡çà[0r/ Þq,9raÝunŽß]Éå1'˲ñ‘ÄÊK›Ÿ0koèr“#Lh™Á‹•T–]|z cÓjÆÑïwRù‚„‰ÙqÖ\\°¨[.SwÁðCR~òôNÙ+Jž¯¬ûþSɲæV_æô}€ÛA›K› XÔûôÆë# f41²ä›çÛ°²|9íNŽ1è"Ž-daÐæÊócø½ EœØÌ˃7Ž»JxžJsÑÖW“!³ú€T#qÜôSí¶M.6•zbŸœÍÂR—޹”óñxÑN:ÚOxXAÈâk¤”…$Õhl²åþ0|raóc>¼Ò¤vJâ¤}} 3ftÝ7ùûÉ+ú¬š{iØ—›Šß“·†ï‰p†Mw«‰0)䘧F¢ ¯ÚÌ>øj;> ‚+ÆŠŒn?þø¥C *ŒE>kÀ¶¤ïí}§&±¹–ð3Êx6"ò6XÜþ9ߥs>):%Å÷®>“á1£{ñzJ­ìÌ·Û)0+Îô2X5 ÈЫ%„-˰úÞ÷DÔX<Õ˜§~Þ ²°•üù}×3½|A0FÊ®·fø¦PˆoµÒ"f bIL²ô; übdÙ,Î`H¬Ñ¤¬évÅÃ1¥9ˆ‹SÿÜwx­T[m‚ÃÀ^jRQþ‰X9ÈÃä9,ô4ÄôUz]‚.ñ0!ßÌ#šÍû&P²`Öø Ì;ôc@Ãa!×e°kµ PµÐkSuò”‰Ôe¶˜þÙA¹@QÓ[ŸoLZÑî²›ÉY³èLú¾áÜt'­º,òù^Þ*øPíq¡ü)rÃÖ ¦ Na?¿œÏV:‚rÔÚ±#Ž¢Â¢ž=gÔˆÚu8 ·¸å1kÈ}¹rV™³$,°Ø2HŒ€— •²Y€ŠV£3”ˆK"Õm«µ,Æ?°Ñ$¹ 6è¨t±ã».³b¥:ÿàJÔó„ ª4ÿÉ×?{ôÿ)ï†oj<†5.TŠŒë=cîÐͺ^ª'j¶sÔµPYxríÌåý«ñ!h”aß²*%lƒH4H4(iˆŒ‹+N",ɾ#ygî­Ù}brДEbønÝâÛiF£7*V»ëöÚ‹¿£V<ÍBÚ—jHLÁDb5õ© ³‘·–Wr7]oÓ3v=Y±ÿ‡åÙ§îÚªkï¶Ó4¹wæÁ²Ñpkåt-dÚµY½ïƒÚJ¨\âþHýÕ1ܸy­6¢ZtpóIðBÞvhÑØ£T…¸ÈijÃ&ÉÒŽçBv ë¤4§‡ ´pc»‘ÅF-3¨hÔ~>¤¤{!«¢ÜLæLȦÇ/9’÷øý{<Œ¼ùó'N rìÆò í7eÞÈâó™\L½£?Ì;|nÁ±«©)€“¢‹¬š}9(t!B©áÈ^3OžÖæÐÄÎëÑá‰NÁ… ·ÁËIþp€ X„'ûñÄ¢í.s€áAYÀØV»YJÎêW'uXºiÊ!Q·*dP)5Жéç¾Än[se”­ÒqæèÚɽWP¶1“‘õ'Rª‚æ¢x–;LhŠ! ½»ƒ!·Ý¨Kš=åÁˆÅá­„K¡%<êX|œ†¾†Â`T¼GÕdÀòZü óø.;f§Ú5æ{‡¹ŠÃ „|»Ñ%!ÚìKýuâ„ô¼áà„wúßq« ƒVaqÑŸ8aAžD„ˆtì8db¢8,B ãŠïf-zöUÑ´äNGد«ŸÏ¼u}gáOhZEϸ(Ù÷ÝTÜJ¥eÂÙ£7Xý ¾ }Á“œ°•žO§ÓK?u y\üeÿ¡Ç™Ç¾Ï**­ËM•@( ‡oìZÒlÛòãã²ûÿ>wOýŒÎœ›!Us¶çÎh‘·óÙB­B¨ §·|“ jº{Ò³ÿS¡Ñ$‹Õõk2¯N­”égØoñÚ$¼Ê“°gmë›>³:Mb1Íã/±uç8Ìe\3¹ûŠ}ÙSÛnßà*$`Š©nc OSSÊú°ý…$X).Õ_Î1óE"ÉÂfgCª¸@p\þÂíÝ/3‰6ÔWÊߘi4n«)÷þе¬zÐy÷×ÅúŠŒ®O‘§åqGrz*g…P‘,ö^Ðú•J¿˜b,£FÇ ×±Y¬{‡Ø\¦ŠÜÖÇ}>ŽLÜÐñZ•ÝQ¹ åP›ºÆ§æˆä‚(–®.³qIQ鱡Ojú>CrKIš)þWúÐ݆"sªª)Û.n“ØìÏ¢w"ªxÏÚ;¸Âè{ÃæÖ¨Ýb~—!1:àÿõPŒ$ÂQ_c„ßÃ>ð¼‡?—ÀðöqiuKÐ-I¥£k„ÂÊo{ßWAò!²-j½Þ[ÜÄt¯9Èrñ“µ©U’ˆB"—è-¿ï~3yl—LÐÖ/%€Øó€ÛU¥`@IDAT††½Õ€Û‰¼¥¾Ä‚?,™Ão[¼e~–#¹^âÔÖëÄA‰ˆ-áùëv1hñíîÿ¦‹ª©ÏdûRXO¯ÿt íæÄi0"¿1lÈi3¾ÙfÏ/q&pY²Éí7s”.¯ZðZþüÎ9!·ÀXáoÖä‹Þ¼`Døõj!« f÷јúþ+‡çwظ¡ß£˜`â·åZ]0*Æ’š±1àCÏ[À•‹0q«Õ­åÏmsÊTbl”ÔnA½ó©5/j}eã_Bú‹u¡ù®êc‡¡ŒGœûB¹|Ïù¨>9Øýr :ú³é+®ŽÄ³F#â/@j†‚]‹¤±”à0`­¡õ0ö&ÑÞ}ÿ8‹vZ Q†\ôH˜óòØOké6q±±ž€ˆúêŠÄ{ä(ãÀ>ÆÈÙß¶SýWA­çÒàëÝ/Öà‹c[l:òÓ4jã)ü~ù¢ÎG¶½[mµÐø @'Üh|‡$Ÿû1DêqZojwÁæ¨ÈSÔž*HV9Œ«wsP-”«IÄ/H8A!—6Jö²Xâ©­ó%Õ_ÄôúbCºpXµpØv½¯ÐÇ,9wÿl´Žëö’í¿ÍBŽr>VõÚ°íÅbðÞú¦×¥LÁʶ7¹*Vú}‘)±F,åœê„utO!5Áÿ5Ú'ʃ1[@Ë”ñŠ•Ãs« ~bǜɥf»uy×ó!a|Kî—k›ß÷ZÍ"§<½ã¡5?Œg«UŒ0ÀóPS†pî ¼I®DTåò(EÀ^!ݾH­€ï#–öÕ 'o|$òUpQNËðÿÁ׿ ø¼x!°ø ÍK™<ÔÈØRIùªÑ¶&ÒÞ,3#«Ýy¾J²¢õUO5ëË];¾™¾²ÛñúÀWB ;š(½P¦ârå|Îá$fö*c0ä9ïC*„#¾XiýM/$8…–Ú’¡Œ¸òãû,T+E#ú»ýê«-Øóºåc>‹¶Ì„qêA ùžD•ET0E!§ÃeZBj-ëÿæfø‹²çéË}rE›1Íò¹šÊv’”4솩âˆ+á…y ßÍUW•¸”Œ(%‰ ú,é×§ç·ýF%Ö ¸áô;] 2–w8Áô3·õèq³\¥;Äãó5,¦ÓãògÜh½®ý hYŠbyHxf»Ü&[)ÞaHt$€(ÍÉw}ñ1ݾ ‚Èù>¾œÉ)¯,SiUÖªÜÛC ûF$vR¹æ#©IŸMß×!«õ9ô%Ee6܈Jq¬­ qý½W}dû‰UÑî®ßkãñó ':»óÊ0*¦¥$£…3çšx͈GyOÿ[êÛâl´¢æ”3ŸoîYÊdK…l…ßN'¹lQîÕXÖ¬àí»C¯3 ð³¾Ù7ñ$ÅõÞûƒž6¬Dè•ck¶MÃÓ3[ŸRmi‘^_®Âîç›ôN¦@îóݼ9b8£@Ž{‘+”k²õÄ4âóÓ\ˆˆCÊY]6¢±’ ¨ XɆ®× Þøþã]8”›8{[s|qï_RëœS@4zÙÉáÓk@OÜpQ‡üOf®îp.Ài8U >‚Àú™ï4[¢Iòwá;9ýî³Ä‘X]Ìó=èÉÃO¦u\¿¹H>²µÇÁYçšç÷zùyB“7Åo”D¦#’Û¨$L‹Áµ½ù ]¬Žagíì}ƒŽµ$æÐ­áøú¸–'¿¬F»ýCdó™ zlõ‘$¤Ôʸëpyï  ÒžûóèhED̉Y¼»ãš•QBÁ†‹‰t:ϱ¯ µà®p“š©©ëŽÀÔ2º? [ÑV[üñƒ%®Fý™®8ˆY(“Oë{øØÙï³þJG©0‰[a+õR¦w»ØÌ.±úþD›$")£Q‹M™Íé+Y¦ÍÉrÿªZÖ>ÄSIg´ÝŒd1ŽÊ¿þÖ<Ê<èˆb·"*äŠØe‘×ÍhTÖHjðǦ-³z LN¨›0Ýf­ZÔùd$Ö”÷l¶×æb¨¸Î°›Ï”¹}a Ç#ã±áA"@~EÔIu¥ÊX“Ci""ÒVAO*’ÖÎêr/芨Ê×Îã §·Ú"’q³_5–3{nÌ<¶(bä.¸NÏY_žºôj.eè«*ÂñÙmdráý.&Ö™ÖC¬<‹-ràÇ…”€’ÝåTÞ·“Áf?”òI%§RÌ×’rfX?ÝKõÎirsõ“´U].¡®aóãeôë2RCS›Æ³¢¨K «†Œ\<žŒFd«…1®¾–cbÖ¨Gãætÿ-÷~?ðdN§?—]oµ{Ø›i,é»Ãf#EV&›tê”~ýæ‘ë€$Š ÐÝåö@Ü„œ44÷¸-ò¨: ±àÏ´4¦ˆò#9R MTÿ“/Zúü“Çÿnì6‚&`3"ðGyɨZS[s;†JÁ´>¶\ú³µBW›ÃZ ¶•ÇD¿-›ð¢ûgØ«ÐÄ|Ìòr³B.÷û,©ÖmâqQgˆ³”N¿iÁ­ä½ƒ~rúœ wXÝã[/Ï•ûx8n=ªÙZ%KarÅHãü–€Çá[÷xè®?H¼±HÊø3ôŸèZÒBÏ/6¿¾NrÍ?_•¥ˆÛrœ î0ðùó¾Oþrï¾×S 6ÆÞ^t£Çª6×E\9‡-¶:mZ)Úĺ`ð‰D„Þ°K\•ßÄ „;¹Ô9ÎBJ™ÛMì µô£¾P#V³l—Ã%*]nÄ’Øaq:TTDk %åU5ë6ùíOáÔb 8r±$et¹°ÍVö^És4ŒXcU‚d“ÍÉã BüŽ á€-¢‹=~½XiЮ—]N‘ß§àó| 1ï?vo@®Kýd(Žh,!ŧ¼ë³INŽ4à5°Õ‚‹·7}Ýuÿ±Û“¨èg’>]']{¸ŸžEÜdA‡ÃaC¤NLjIaQme¢Él2$!@4bÉx\…Ùïô%>3*Ô2>Þ"©ŠáRØl|IBM Á 1õaŽÓÇp…™¤ç¢LVhbs˜lÓäàX7gÐf·¥ë,aD*b¨l‹HÊFŒwã“éÔ¬Aõ<#8¦nÖ™GÙÐý{޼|óz`û 2wÍ$[²Ï\ÊŽ~ ˜2a°~©žÈRj¼-*ºX0˜ê|,QAæ }´í~GzæƒU µ «Ë‰~‚Û7ŸÛ{¿]oNö öp«Ú‚?ü?²êqOÜÙà‡z@“ˆUð3^¡"ð*'eÞߤB–0X×XBfŸzöâÜmx‹±gžÁîÓe̵G©8ƒ2³“É7«ƒŸ‰BQæð‹¤êwŸžŠår¡(õS…/(s4þC·GÀ6qúÃþ@D&fyÝA‘¸2©`¸›Süî&³£Î>aÉåvTE“Qýgœx¹Óæ1ðå̃ ‰ fhõ¡wûI×Woص&ªÑ7¥Ý÷YÂì0_,‘Y=ö ˆ0Gí¨Ò—ˤ|!GfÕGbcb*«~…ÿLaÄÉùS¤e•Ù쇞n ·Å3>\¯6\j>3m- ù²{¬Xñm6æ»¤ËÆõ­º5ëì\¤YP‚S»j÷ˆ?Äà¡v»štлΠZ@¢K@hA´*¡ÿÇnÆðQ¶ÿŸ¯¿;î„BÀèÃ<ÿÇví¿×ÿ’Ú5âôžÁBrâ»=8íi#!>ºWzô°[Þ÷Aö‰ÖˆÞκ=\¾ºßM»Ã¸þÉ,ºÉFb“µ¯/;Ñ›š·.’Ýã¢Ù[+TW¸ qtÜÑó‚ àË€7LÛaR/eÆ ïëǦyfÙÎ^›§?ª­ÉêpMƓۉSÂÂVXXú>¤ôïýžâ$lúùƒ£Ž\®)þp¼Ïèd¯OåT¨Ý¾¾ûmtÜ ¸‰Íi­WëÏÒ¿b„ºéWšœSþ¾ü]MUŠÃáôú½LÚlÝÃ!,´[£é¸R &¹¡ïñ1Û:@&nVRPVµãæç³¾zOE\˜¬¾²’+»Õ3»Í-p¢æöQß¿ãXM¼Ä'2Ø‹5ª(—SŸÑ:?%¾F•Ñ,“Š]^§×[,âk^;ìD­>kÐhÂÉXº±q¶ör`«ƒ>ŽÓçÁ‡wºú쥡,£z3W‘(­nÑÕ^X¯ý#Ø_k¹­ûÝ)÷;ìéõ {#²¿MRÕœtmÈÎ)7glMÃ-o¶Gͦĥ–”Zù8ÒìrCĈð„‚ç:îó[¾´ùL.WK%Έ е{žÎÁÄñèq½2q‡í‡çü^=àDΓ)«Zκ?Ž Ï¿¼’{m>³ ÷‘â²¢#ß.„ ¥ ‰Ó#N®¼øh•Ýë83&Eù'DŽÞÝO?ãBÁÌÉS¿|µºïgjnb\|î‹­_Ÿ{iêÜ>ë@8‚, 8ÐâIô¡B³#rìÈ&Îgn2·éÅ­âqÓÓÖ®L;%ëVwz*åk÷õÑd¶/IË„ùT»à nìâ.6<ê;§Í¾m7'Ó©U%“‹éIÌ,­v§JË%×ñ°R»èXWºÊ„Ll–{à—åYÏ®ú†ægÍlz(ÿåø%ínË‹­ˆÅ„™Þ°ßõóÇTI}ZÑ dh 9óZæ»®ÿ€ò#ŸÑauzFaÔð³¼ë©]&çÏz0smgêìLá0ËVq«ƒÊ~Bk–œd\ϱoÁ3š¼ÒEÅϹÖãYÑöHþ˱cÛ¿øÓh|kÎàùÆJK´¢•äf¸Rå-b©+OßÈ¥JEL*+á%˯NÊësÑWî@õØŠÛ£ˆ`U!6Ò»ÏÄ»€¶~»‡«Ä‘.7¼Q:­ìÀ2Ghû,À&¡­¤VLcé'$56Ð|XG„lÑŒÍϽÞ=§Íw›~˜œÞýÐλ[ð˹w~AE¿N¸' æåQ|è{0°ˆ$O9úàý´AKL†kHë´q%¥ÆMC~,s¼ kÊ÷Ƣˤ—7Äæ³"†ö‚w·úºþh?h;²çüS6/üzw¤ŠÅƒª—Étݧ’"4VÆÈ«L沪ÒzõRçíM£»†z¤ã›_M£Fƒ,LÛ»Ëa'_˜AW<æ#3{ßA¶`æÈb®fN‹uj%/í°š‡ŽçH ‚?ý_m ÑîmhrÄÁiµÛ Õ'óxR¶×‹6ÉpéSq‡WD,H Âþó¯ðÿx äŒBˆÂ`ƒ-¸[í©( ø<þ¢÷Ÿ^Œ5(c½Ø`"T*§µÜ¥òØÍ<,²aøÍÅïRë6()©ðx}éýO»Ýå2‘êÕmc(ª wTRRYI±.Jãp;µ>•Ú°ÖŸ%ïæv8(R «V v: lÚ‚M2ÿZÇÍC s/´=;Ô\nûÄ7\O=`\T)˜5¢¨ü @CNi¬>â,y_ ÑùR#)eܦ¦9CïfÜï–×õ§¢Ò¢Z1µ ÊßgwÓ#³[ËUWš*$§f"‚#Á+äSË«Áì0Ä^®Ü\‰Á˜rOEþ€ç6·=ÄŒXíáÕ}¢5OGÅ%nvÃæ ºC\>—=ù…D0·Ç&£þcÝ”:^ +ûò4 "’n´~$1MLßcêœqâæwx¼ùA§Ÿ‰ÙÊxmÔ¸³-w¾;mo×MîK$ÜÉ»ÖõÿeÊÙf;'ü~ëÍå ÌKï\oÆØî#Ê+ôÍ›vgX9»äqÖ°뻼ûóËC~ Õ^rb®2Îlz ÿáD›ÝspÔë*âÙØóIT\ÔO…¿È8²Ï_‚›.í·Ê_ˆ, û?÷Ç©«ZŸ//}©% A£ÔS‘q¿Ã‰ï^WýºgÄ«@ˆ#Ö˜œ! ‰²y®‰Ðÿ7´W–cÑ=xj Ãk£"ž ÅZ /Y¡jGᕱôVƒøƒTEª‘ä¬ É®çË0¼·¦F·õ;ý“ަ.ðaÝõ‘“zU—Øâ["“ë¨"&3 B’Èú£¿L{å#«znÍ:?ª:`Ifsx+®v§¤®vœ¦Ô¨‡gù+Äô7<£¡)Ê‘ˆsÅý*è!’?üêŽýV¶»³úZ÷•i?Œ­CÞ¹Ì"óœËଜp4JUÝL“K¦wZÏó#Žp¡¢B¿bR‰(‘õkɳ3÷àŒ(°Öýú³CÇ~O)"ј ~Øð4°è³hb0-<²)ÖBA8"ÑÉ_6ÿm Lï“7ƒÊz˜&Ø;Õ¾PGA51àËX*A¹Óo!ÖÖtúø@4V5X*˜ q´ÎÍc ¤œ|¤ÉCú0 ‰»0ŠÈ†EߘßcßR°¿GFpõ"b)Y$Y‡ÏÈÑáµûÒäU½/gí¯VIñIÐóêó¼‘rÑ™p1jt˜Ä-Ìi¯ n¼ÜDçh#™^&jšüõñÕž7£èIú{ ;/æ‰iOopú=® ÙUìO¤e{Ì$ïOÏï¶’cöö&Ú†Ò˜&óþgóêÑççmH•–ö;°îøÄ€š¢òMì·òÀw«õ¢búˆrò—a£@É-ræ?£(ªKÚ_d†ÔÛžN\ÙíкË+w|}ßîg´ÿ51¦Ïä6g™²bkäã™—»°îÈ]‡~õD|¶˜…6… 3²ß„ÀX‚A(­kÏÏb&ÕÓ¡ÿû_ÿ*àÿõâù8 /ºÛÓ ¶°A¸ôhW0ÐÚ‘Â Úñ«¬ì›/œÞ1³€2F¶êÛq [Ü´zªÖ>E[ï´möìünêÒ´ókn ÿbåö:‡ŠÞô}×§½üãÓ[_TXYÀ Ô1ꢲ¯×«Ð*V£1@–÷9]Q\ž—œÕç“Í̉Žâ‘œ7v‘„ð#o{Q1ás5fà- Šjö0¼Œ+óùÞÏ«¬ŒÙ=Å'äñW´»¥Ç &ØípkùZFe¨¬ôу/SÃÕý! 1Åâ\>«Éi–‹å.oYƒ®pøœ¼aw; óÈgži…=¹²ã·r•Âê²UšMã¿,ý!Y‘òå ž;d_q¾ÓÔ¶{÷¼Z€ýµ¢Ë §Ñ·{Ø@Í•9n»3§ÿZ–„_XñšŒÚô¥$ŠŸœÛ¥D-¬éõ› «JW´¾$ÎC‹¾ó…L ÞäOÕ ”lx³«¢>`„™Äï¡þ‚#7ÎÓÍ[@?¶ñÚ×éÝoÛÿ²è”IsS7n-Y4µs&ÓÇNÖ¥šÊ ±Ê”-C¿„Ù6·_Â×òUÂg…/’“¢¬¾*m¬ì·÷?UšOèš;lƒßJBV¦‚‡ô׈×' ÂýAþª*®«lhwÙýA/d-¹ùynÚÏ|ñƒXòP-ï9è¡0™×7×obiXÉ©5›Lm~2š‘á„eJ¾j±7¢»€˜Œk›søû úEHm,b˜|*-‚¸ç ć|_áýÑç eÙSTòA­”È81¾9ŽðÂåÎ9lÞ¼A¥T¾»ßCd²Cù)9´c&ÒÄ>êö¡Ð¾íZgÊn2~äÂC—«±,l„ èµjE© ¨,br•øÜüÉlGÀ:±ãN/Ç71-¥52_òi׺ì”im—ë’$…ã±;›ê¤6ÀÁÎU¬XØèŠTœÈ ê„*ÎÀF[4ÎÚ>lÞ€m\¯"·ÓS¶[¾qЯc•X*XÒæŠR+]Ñí”ËSµùÅ\¬šÁn\ÜípTBòÚqOL[jƒz?¿ùµ^ «Üôiýµi4g¾VèE' …%³Ûç¤_Ÿ±ªß©¬oGRB%áHJ·vÞñ/ÖNP5‰÷#{æ™Ij„­ñ²B_w¿.ûX´ÍÇ` éX”ÅeÎÿúc,„«‚«¤ú˜È ŽìÛ—Et†f’´ÝÄÎø^¡*:ÚÉÿƒÇçLºqϽEXÙ1-3ý<ÆT5ŸTXLÑ:ÚÍ7ä¥N& ØC\Íqª™öè(a­1u7Ôù,!Z¬Áº|U/71*Ia;Šl›ûÜD„KÛžGXJ­IƉÑTaÀêÓ3+Ô’1ývÇ­ïgÜ[:dw?²â›Aó[ÜW5(õ–'³“€ÊU7¥I•ù=_¨0˜ªAßýtnvÿÃN7”\q»ëâž§@U 5Qq E‚WaY(…yŽ‚+r0È¢PʵçÛLJpY?ðRÈÅžw®5†´¼í³ÜïÛ.ïþ¤Vj…gZPÓÒ@ÐBêÐÃ5ÔƒßøÉâ§æ^kµ²õõÕw{gµ}¸¡ó †Öã[|%_ŠšËµi7”2m¡­J©‰u„e“š•&ø7]DÏ(²åòrÜdöç›FMÍ bƒC*ùà ÚŒÖâÄTÝÜÞ[½L« 8¦ÐÖ°ø1¬u˜(ÔPX¥“.:Ôš°ZoœysÑ‘4*š¹dL£Õ°±2.Åa.wì-µ,VÄ—}*s3š›œÓún¸¡Ýã0pþ(‰”Þl)'^cuÞ/nKò‘X ц÷ °bA¨0Ø‘™ßÔ%ܺXÄYã2w`ä`Z°*b¢]ŽÄÄG/8=•ŽA1TB#,žQ†\B—iwëâ¥OѪ%{¶<žƒäÞ­ý_3pzwü®RŠ,6=Ê¢B~—¥cñ£–¶º½îq·G³ñr_ê¶aEÝ÷M9Ö‡ H)Œ&«hÑ”YTuY9R˜l•ü„j::ú Äñ&ßmun~î/¹%-ëÌxTðœÈ™ÜXãj Š¦T¸¡–G Z'}–LMV*ÎùjÙ^Èbí®*ª2/Âržº~Þ¯'\‘xù°S¹§©>[Ðûܦ;c) ­d^“CJ©¸QLãÊpñäÎ6¶ý“­àŇë(a²{íjtn×U[ó³èÈdÃ…• ÛÂWl^ëÑ׋éÉ;…:ŸO¾XŽvιGf~Î"C¢”1_͹=~Ürg!UØDåf¢VJ#Jðƒ[ÝüÀED Â]U^F)@œ_Y¦r>#1ÀäÏj–ãá8ð˺d^´~nA½&Ü *Ro¦÷1ó%amSE‡¯Sr…iõ“)åëæ´ßÍq€Ãe)‹¬iv")Xwígw•Ul¸öžv/$.ÅšÆ7¹åŠG=ÉИÊ›I-q­Î-{Ô5Ô‹—4K6ñb&5=_§N c>û~¿¬•žò£óíÑÿ ˜Òv¥!6)Û&ŒYVˆ¸"#:ìú5:7‡Q·ô·êÅbã âÁÔÃ×ý³UØ?{ôTnþ_»ØdTׯà­×kEÓP—%íO”EÊÏBÔ&…f*B×njÉ}øP0«ç&^¤ø¿µ)$¬ÇîƒÔЗ—u?k·UÄEÇÉ%ªèÄÚém~KÔ¿é_×SÕ2ÀîCòí„;ÅÅÅèÈä2{V÷ú™t}™æ·<ŽÆº\.wý¨;EEE+{9|ŽCv«ìéÇß\9±þ1‘LRj(«ÃMöùÍñlµÙc‚?9ç‹gæˆ'Z,Éëvßç%ÛFÿäruªh³Ó¤“êNÌ/*3–ª¹j»Óî$Î(^‡Éu{¼ uJ ¥‚ }—XŠ6·U«T2ErrбÂ$à 6 ~ˆJ´LÆùcý䇒Ò$6S¥”f´¾,SLJDDÞŽñoL•fP.á2–µ¹#’åv¸\3:™–…@ÎøË»_LNª_Q\VK—T©/WÈùEÅ…«Î¶…ZÍéû zÒ=àÊ¥Ú¥ÏK䜠בóõYN˜¿¬÷á ÎÒ—É/ôÊrZ>PsåTLø¸P›¦\µM‡áˆ8vX¼ªÇ5"e2Åâ6ωˆù±¬dVŸ×Ò¸ÀšóÓèfǹªúp3±Û¶Wç/wýÛv!§ÛM&6âåæ:Úº öÌeòú‚Z‰Z"—¾ûëuT’¬²üªG!nŒÕ¼Ë„ݯÆ2™ì¦D~ìä+š5_ÞÝõËø1M²¾[ý·vüªl\dÙÔ•Æ › Ý´î[jÄÕ™Ùí 8^ˆÎ˜cZoV«5ˆUÇ׬›1éœÍdDZ-—ÇW@ÂUyʨ¨hS•ƒ/TiŒ?*þl¬ýÜoÕ»\ÎUƒž» r,äŸbT°ñí$cû E€Üü³.)ª–½ÜÃðÒ»·†ô;ÿ³ÐjÔÓr1™ºæàóô‡a® u_³»nqÇ„!)Ÿ)]Ñå,JÊë©*ŒUè²i'–ÝÆÓG@ßãbOÄusA7‹Å•J¹Ëo[Üê̆Ã×~õ{ÀëŸÖì^jÔÌ1·Jõ¿+4ldùÁ©”5èÀª+%1|l“áCWœù>6‡ÅhQÈEæ0®¸ASúoß{sv¯Îi µä rŒæBWĉ²a¦ôéÞì~7+8²çb?œx‚¶0ýb!ØÿšÏèxàô£¥4*{vO„ îÐû½ópUÊñ¿ýýg"¢eЮð;ªPdrëûžÎ‚Ú[Úú€‘˜}È¡|""n'rþQ]FaIÊ_Û¨M`"‹Çn ÚüJN’.¦Vaá»%óä1þeW—ã†s;Ò%,n4V+ G[Ë=+#½ÕS_„›(Š_Ñùœ2Þ5oÿ8§„ÖDÓmжGêâÈ]Ÿ^¹3†}Úð™iù¦ÒÂÔ¸äL cúÈÒÏoEÇFÏ»ÑgÕu°Q<¿A}¢ ×ä$I¡>m4¬:yñþ„óñ⮎=\NÏ&ãÛoÚ}&¤ÍèÔM둱o!{‡ýò¢%1f‡f@ùÔ8¹ño jÕjðWÕ«Œ^û/þóÒÇ<ªãÁBòàñmº%Gf–•”ì=ÓÏéwywÿßU‚„2[iFê¯;ìR19tOa)l ПE†‚è&gs8Ø@9‹<ž0íûKÔra`(`ÑÁGÿ*àêíôïËÿG!9qeORBÂáûÙ‹$$$­1/„¯ -PñY]PÃMRú‡ ®Ÿ…ÄÝÅÝ®i¢8&›>§·¿Ò¨V«’’;¿›‹ý°´ÇÑ)ßÖ]Ùú¡ßÊÖwS.䯲ÑÿË¿óéDˆÕ]n mJ£Ò°#¼’·å:MŒÛçæóyKNw_;ìî²KÝ0®µ¯­«J úsX±ʚ>›¿ž¶†»Êè#&o0‚²Ÿb—¹^íæŸJ9¬rƒÓ\?¹iy™A(š %Dìt:‘w+FT†ÅŠ0#QÜ(Ÿå†~\•£œË惤DÇhb+˪Ô:µÓêôDBBž4àñÔzÚ‰fyãž®}ßév8Ú°&‚à Kôg<™š°ƒ¬Òá‹‘<‚xR£jÛ­åÈ +¨ø˜?³¨° Äëdïz:0³×S¾ˆ?ë€bã×Å /ÕÛЯtçÌt ‹üžðš¶÷$üs¯´šõűu·†lþúÒüÝftÛ‘WÏ`ué«ü:‘õ•¬HDÅ—WøªbÉy ÿµ$I«RjçëÕù°PÈ{Cr©°P_Q7¥ÑÇÊB¾tã½AÔH‚,†í Å݉‚ˆˆ¢­õ ’êÛ,–ää8K™Ýçsù­(î÷â¿t"I¥§¬–6>`¬?õŠ$7íéÛ¿yC';½û3¬ êäÛâTuÝ•aA¤¦ˆ+ä3ÍVOñº~NÛ†ÕŒ«Ï@É-[3ïoeàªòî´ šÚ4gÏÓŒe]ä?Ÿ€?Më¿ñèw´Á0þôUÇì“ÏWü—mdåØÍ«ÏNœÙrÓô‹Wö¾±âlûÕ=ön}>e{«ÑRíGo¹Ãjö3ÜB¾°^Ró¾ñk£’4a©ëà•90zŽœ>Kçéánõ½Û_•s»Vÿk;ŸŒ†¾Qò“@„y¶ÇÊR³ÚÝ©ßömáûyͯFŵ\~²5„þ‚nû²ï ÛùåÃgkRsAç=ÒhƒQ4kÌËGŸØ+öÆ‘@À×ñ8"£©`E‡cɉIËú\åsP-ÇTðeÖ*cæýžKº—¬¿5)X.å$dG€š¬C<Úì(é”YD¥ èT€s°ƒp¸(­#¶*ÄÉ7iÚ2^ˑU€ Aú¦ŸäI2Õ=Uå¡ùsp*CRo\Y‘h逻%NG|,EóˆFbµ9‚<¡` bCµSßr€$FÇR3ÕD1ê ñ£V÷ÕS¥ jB<öó Sk®g7ç_Ÿ!„ÍE¬¶Ÿâdµ(ÿHqüC]Á€¶¨{ªÞgµ|3c›Úéë„øFA–LoÓ39ƒ·²ÔsâxL-B¿~_ømñ_[ ]Ýçñšþ÷,>S„ÏÐÉ[,ê{^&“èË*}6n§Ï:”Uð´bFÀrú7Ã^Òj[”ä3‰„e3-iñ”/‘mè|ljrVÐa²eMÝgwWÜ·üúúè9¿ßþ͇&Ip"ÿí< A‰)d ¬¢Nª®&LéÖþ>ýT˵ó9‘µ×†Ïnµ{û“i³žÜþxuM³É€î .½Ü„¤åHÄFIÛôý·w•”=eñDÜÓ÷3Ч}Óm—_xòðÜÒDÓ–_Óê¥ «]<ß®;³Y\.0»XÈ óñ¤r¡©Ø¥ŠBU%ejd_!øÁ!„Û£€îŽö…™ü{ýO)ñ¡ŽÕ@®-‘)cË«t³!ØŠŒÃ Éî¶Ží“l˜ñCÑÛJ¿žáÕGn©œç3³U¼x®_.çDÍÞúùÆÃðáíwŨbr:\“ò wX[Ó¸\&K¼>í¡ßÜ4ðé†.Cv’÷tø’³]Â6FM]¼ÊØÊ 3² õ:ÑøPüw>äÆ fx g%k߃±YgÛrdÄI­—Ø:©¨Ü^r ÿ* î&h?|,°ú­bXlj’ äápˆÏVõ4Ê#”{axX'΂Œ)4ÉæJüîÀÒü®`Œ:nÉ‘®<úh&Z5àLÉ ñ¶üÁevóSXS øÚH€Å ñÄ?$Rðd›AÇÔxŠk1ë7жYùmC1"öˆ V늇_šôЉ“7¢€VÎñqV÷º¹lä^sz«YøØ‚›AO_VG×PÂå^n®4¢•×ã‘Ê•e®JSâ úc¤1.}a»´Ã^ÓS N –za˼ôváºRjУU2ŽØâù¤¢‡kDΗCÜ }ÐÚ¾§ÖÙ7õ<{Ë;ìøâç6ß TCSE§"†ìÿQ÷Ж”UÚpå\ur¼ùvî&4Q²ˆ"3:Ž£ŽâŒú™pHJÎ(¢"Š(FÄŠŠD%tÓtî¾ùÞ“Såü=Õ×ÁYã̬õƒÿZœuÖ¹çžSU§ê­ªwïýìg?;™ÜUâ+È>‚ûzÆ­ï~Ãu#ã£ï;þªÄ<ÀÆçˆkî9/9.`’<±ÔA+!©‰f& ;c0H¥è›xÛݸøÎ‡ÎÝò̳?Ût1k–9sòÌ¿Ù׃²˜ˆéÝ{3²rÕI?—ñ’cðÉSî¼åÞ¤ñ”dí†?šݳwwU)}ñ‘·vf;°OŸ8á.*P1’©»®=ùÛ°vétäêM˜œó½úÊ[?qóß,d‹N`àú@n¥¶d_ÿÔ?_ûØûfgö|ö·o•8ê3ª LhêW÷ƒ -_uÌŠ—ý»ÕÓ7üÝ—þå૱©|<þ7û_ùÞc/KÌ›‹@8‘+ÌŠÜﻣôúåx増¼ûÓs{’Ñ€ÝuSY€38Ä÷^†A.¦øï}ë›ß¼÷;·Ü‡¡[sýÏÞt÷co[Ø»n9²ÏßÂÍ‹²¢þP ÃÖ6ˆ‹ßòÝ›¿ynr*Ëĵ÷Ýê#§\ß_r?y×±vw‚Z¹õ;¾úä…·Ýõñ$ ´ˆw|á9ÇúâoÝܯûáGNúú™Ç|îÂ3¿€ÓùÌÜüR²˜L|õçß¹ä®óo{胡ïÞúäY”HiÊ´(GìõgúŽu…ÂOÛ†‚í SNÅkoüÅßóÎ ÷½£½KÿÒƒïIMôæ§?ûëùˆùÒOÎK‚CŠøÜCgŸûý7Ú–žQÒŸ{üœ¸[¸àë'ÑÒu÷ž:$®èïfn¼çÊÄú6“cdþÚw|ß½#¦åè~ì’÷žpnõ/=ù‘¯}°¦[n½‡‚-ôzk{@ˆÛ`})¸}HBÂ+u‰ÿsâ—X[Åà“4Æ$XjŸÃ1‘TïŸ_ÿg¿õãè(úù·þ+#7úΖŸïºô[O\¸0³ô•§?sÓ]çÞqÏEt$&#D‰ÅÕXTç¾ÿ"œ¸ä„â‰7.Aî«òBxŸSüªÎÞÿ¿oìµd€·oßp•$Iôß]~=ì°Ã0bHRâõî»ïÎår Ãüñ<ò>±P˜ ‡Õ÷o¿ýöõë×c•O<ñ©§žÂ‡ / ƒ77ÝtÓºuëðÕ‘GùôÓOw:p’ÖúÆ7¾±víZØ›×½îu>ø >ŽŠµ¦ñÿôàá§9ÄðG_CIåú‡·Þsú לòúwâ*ψ Sì/5G3 ëÙÐÎ|‰óÁ— ÑÈ\<`?ÿÖ>ÿú?\{ćSca?NñYLÊB|=ÅãâfT…žéÄ~ÉtIú±¿ÿæßîJ“‡C0h!P RÅn;Ÿ9ù!Š—úô ¿!:Z‰YsÕqÏúÍÏСŒÆ zÑn¹©*Ù~ÜS´a² s“"4c`R1Ù²Ûi-‹A.’EŒF5KdaD–!Zˆ(ø–fÖWN}\$H Kˆ”#™ípB(Fè^BªÁ äÞ¶;Àá0µ³¤dé¾ð–£<1vÐZ=™åR$µa6K™1Ë@iÅøçN~±¿à“jŽÙJ.¡´ ©L#èñ<Ê%slÞï‘´­ÒÔåKPJ®èÉ´IßôÆq–ˆŽÈ´¥ø:Ë`U28¤ÔÍÈD÷NÔsûâͧng#í‹§üÒA÷ÍeÀ;•¬~ sŠñCÏøEqPD€•6Ç/?ùöhŽ»â?c[²âfÌEÿ–¿}NGŠžX†QB¯ß8ˆÉh~°€IPÅœ-Š$%„¹;NüSû}½B©Úë³ Ei¯9¨‚cD¡á•ˆáÀâÅ M™Ú ÇÿÒ D·:,cÏV®~ý“\oƒd þôQúÍ´ì qVêóozä“§ÞœÌJP›#>4Ø X4°F!qæÁHðIdiñ¢¿™Õ©Þ@†_&3g*:Ø?,€Énbˆ„µší+|o816h~Ø}0Ÿ±?ÍÓÍ´äõ³)FÒ¢ìyÇ~ë¼×}KÓ!Û›Ÿ_—ËP‡‚>¾0@k¼lXÖèü§Ž¾›‘Äõ°¸8Èø ¿¹â“¯eW.›%‰M›„‰ö®­åÒ+Î=ü_¯;ùÇÕJé’#¾é»Ð(}‚°;£¹”¯y2›ö”Áʬ»ÚšðüÜ!O0ÛÒ«ÝÃF{.8ìsgm8ÿš“n9ÿÀ¯(sùãؽ…ÏøìÇ>ôéµ·SýŠÁ–þÉ#©CÜ›%cÜÄ}âÌÃîM qþî –ïë¸û¾xøØ|ôÈ+.yÛ]gñ­”³òÜÃï=ÿ˜ŸœwÜþlGzÍ©?Š—žxÉ޽,l1ëù7³y$5ÙŒÚ}l7&®=ü‘;ް㬕ß- Fn~à²Ïþâ_¬zX¥V~õg¤QíÚ—Ÿ~[`ú2]øÔ‘÷}êðo_pâ]{7@6HÈÉqØÇõêè8¶÷Ò'Ì•ڽ߆Á´nÏ]ù›ýHa,1жУþ—ÿîQ«¹'´_ÆÉ2-¬~ñ1I•öÕ§Þkc¸\uúôÇ=KÅ[Oß“¡S7¿›×˼³O:çibXÎG- @<î/…¢q1ÜvÿµßüÉuß}àŠ¼$Ñ®ÓYœÓ#üªkùÎc…ýä‰ß]Åo¸ì ß®ã¹öàû®>äû#Í5ûyG||Õ`ù%ðø"áv×ájûä¯Ïl·­»ÿÞ-¿ø²nŨ)úÔc'»ôŽä†G=;\<¡ó¶+‹Ô üLÞØ74‰k‰£iïÜïü Ïø"®m[ö €‚˜¾þ§™øÿËwÉÄâË&ã•õ`&ðs>ÞüCŠgù«e‹óÊbÿ›7¯%¼°°À²ìîÝ»[Pûuœv»ýì³Ï"ùÙÏ~öŸøÄõ×_¿yóæ3Ï<ó¤“NÚ¹sçò˜Þÿý_|ñ¹çž‹a›>úèùùy:ºuã7^uÕUW_}õÞ½{ßþö·qÄÊåAÿáøÁ~ðãÿ8~â=ïyÏ©§žúÒK/ñ<µð»ÿÓˆK‰÷‰çO½â’ï¯TÓ_ûÖ»~öÀ¿|è~\[ƒ^tÅCoþÌOO#u±@WWh«P°¡¶”/5‚zI*þÓM£$î??þüK8y¹¡²½þàÊÇO4ÌëOø-Ø›l 28ìOÈs Mrhû2¨ž­\€jÓAŠ4:Pv4°Z%pNˆð…Î —?ü·­.Dô´ CD´—â²*£îªïúч÷|æ¨ûéˆeBá5~…Ž 6ÄÂÀ8.’yoß3C|â[P’‡µ‰ñ$æZßK€Ëpàz½®cïèH]¦Ï»ï]?ûäsÝAC¢©«O~4xÇ37_Ïa_$òþªkoøØ¡Õc¯ùöéE¢D÷YÅWϾïõm½å“Ë=)Ÿ)ì0¶É2¨œ~9S½âèûbq`Ï”³Y&P¶,-ÝðèõYÔ ‚ƒs!"§È†1f\0 è¤m•À$c…Ó§²^šõò¼Sáía<k”·GD»Â»EÑICó‘÷$ÞÀp¿ð ßB,¢XåCFŽ»áÆw«v"¡ŒyjTÝpÑQ×]qÂ-­V†¬ˆVæú“6eÓ#Wó°ª 1Ñ€a¸K¿9i=Ií…¦ïñL‘ÔWó0ñ}î¤-~£ “pëï/;ÿkïßuueÓK»éÁ÷ãI m C™áÝí]y q ŽlpÎÅßy“Ä”?óÈ{ }› »Ï#ÌÚ¿zdk÷õ¿ûýí§?ýüž—51G ŒèŠ"}ñï¼ä‡c ¼ëžü§Rºú©û‹\B_´¯{üƒ[—öB\lTYáš>Gpç?ø>Í©TÈ ÞN—œQ±–ÿüÞöÀW\ö ñ€éöRΫœ=˜sSæ<5NmÌ«Êô~0–g¬x/s¢r?—ÌìÝøµ ÿU¾[<÷ ñ9ÛæX#©ù¹ðˆÜ~Îo`C«êØWº¦³×¬l.þâOß‘q+l'3Ì­¹êøçÈ^“¿±Ä~ã§Ÿ^›9 ¿€’&fOo%7œÑÊ×ó`:.º{£´YÚ –äA-bíÈ®h‹X—ßhÏÙ×ÞyÖ°Z•ƒ4ÝÍÞðó3‹ñ†`Iºñôͨ³‡«wígO”W˜–AFœÜ8ô$µ}ñã\ü†§Ò|úÖ·>±wj÷í÷PY)ÏO-¬]»¢2 :Åxfu¢‹9ºì¨{¶ü³Ÿ~n~SV=>CŒÀä÷õ–g¹y±°ºº[þØ!·}òð¯ßôþŸûsaž¨ÈVæk'þ(í ÁÂ%fÏ!n>ó>{ÖˆÞÑëyÉ~ñøžìÑK‹Û7ë,kÓûõ;Ö¹BìgÉš e{C?ü?›.^ûÝ/ÿ›”P=ûu¿ºüè{óÙádþhm¼Öè`ÌoöºÄ )ambqaô[v’§@d´—8ó >ú†k1grÝtò!X‡ÞPg{S±ðÂ(ôøóœô¿{‡Ðö¯½°%$Ýð 3z DðŸ/ˆ7°8ÿ»_ûóÚ¯%%¬ï~÷»çw^³Ù„Ñ…-D°‹±€IÆ!…­Å‘Áš~èCªV«X¦ñëá‡þï|KbùãŽ;î¨£Žºì²ËEÙ°a¾Åbp|`Y_ÿú×ÃB_z饚¦‹Å3Î8ãÎ;ï\*¬211q×]w-‡à8Upˆþ<ŠÿáÝÀ›‹bº<2æÌûÄvää¾}÷Ž3ßþö‹õöÒJf]eïÚ¬·Úe3h¼“”@+qù\tÞÃ6b´‰ÐÃV*èó­Üÿw»ÿX ÆE¢âÖ"±³Bd`Ä3JÖ2l—p3lÞ¼çûª¤,€eð#ÁMˆžF$:V'!>m¦Hv…ˆ`b\’iÑäÀó»!£;¸‚EŠi•Ó£Ì.Ùu©˜j:ƒS†¶~àEÙL@¯  ÌCØNÄ3Øð2ÊIÔD/Eø£Š¬ \õL3°2Dèô¾‚ùd°c¨ NVCAŒšz V¨¨VåÇ@”СœJ¹>nÇ@9t5§)v0€¬'#I+q°PÈ¡)mûŠ©l½_SéH5ØT¢ŒCSÇ©!ìî Ü¥‚Ü„ã ÕM4æÃWØÜù@`ÑäÐ[…le²Õ^´¼y˜?xÅäÞ=ÛêDmcñ£K؉ò]µ£Ùj­³ Ú‡NÈZ@쎭²Ï÷†ëõœ¤±<¿»;#érnx¦=3Z-/µö¢:6ïZ`´E„" eÈi‘”ܵô(…'€ƒò!ù( c…Aë$ œ98f°Îª‡Ø  ,‚G Ö“B¹$®2lŽ•$13è» + ×Õ€ Õ[(¨Ýv“!ÅØƒ 'm;è‹/¦øaHv`µ‰ùñU°Æ¨8,+Ò  :ÉË*Úì¡`µ˜‰Ôš­úà ­TäB½×„¬&-JÐ!8Æ0l(¾©‰Ó­)®z±ñ"\ŸŒ¼xZ‘Š!•bÄL½ÝÕ4Ùózt`i<‹tAšÊû8'$GÉ<#3Ûë;V–+µÚ<:êÄa”Qó‘ËÍëýQr%Pø%™äs› }3 .„Ü'pf†éýݘôI“íTÙß²û`ŠY]V$VQ®¦:p…5ûþB@ް¿ŸÙk¶À«[voúÞ®\ôæ{Ô9mX©ë00:BÙÌ€v/yì®:ìI3nŒŽ)/Î<÷ç.¾émOY]2ö  oNlÚŽ=ÐÌ Aˆ5^ ¤Au§9µ6w"¦¦çwˆTQΦmàN“˜! ¿“âøˆ§Í¥”–“ãÜ´¾y8£gmQÊ +h24í¾K˜i^냌G ÒiSSsÛçâqñð%÷QˆS†®ãA^^1k¶&Ôu‹ú<Ó²B«ÓL©iCǂΠN%•Û­›(cë÷LIûz#݉Üêv{ŠD=i:òc”UXcÏ{Sk‡W×:»¹H,pE€Æ2n¢}93"Ö™ÁT![tzc™F×Z#¯3L³žm®îv¬R¶8ßÙ“¥rQ$*œÜñf€Ï‰BÖ YãeHdŽåV~ðk_{Ñã—ßùz\ÂWs×5Oýó§Þ}¯Ú-¤XîlÔd‰³Îøòm|ü¢ ;ÒŒvÑs•½éËwþìãwœ÷óã´sÞx3EóVß)LÚ=‹ÙB)·eê_~朄æâÇQ‹¨ï{u˜µ–£2Ä»°ËŇÀ;!2ˆ7¯]ÄCCC¯Î¯âÆ~­HQ"œEŒ‹¸h0F Fã8|à•+Wþüç?ӛބÞ|à 7Üzë­{öìÙµk×þûïÿÜsÏmܸ[ÀŠøêºë®ëv»µZ ´á‡z¸ôòh~ñ‹_DL  ó†ø…^ÀZø f ö5×\³ûâ'ð»` ÿ—çÀ#:06‚\ÐŽbòxãßøíùûÌV­|ùŒ_wwp~4„&Ö«˜<ºÎÍ¢u 0¹Ï|Æàx,à0 :<ƒ†7iß妅b8P¯º˜‰3r.:¡ßŠLô@7 #”ˆ&ü:^÷ɶ%ÿÑ ÅbHd½…=ƒæ*Iv(ʹز1%åÜ€ÖC—æVιûÐO}øüGÞøõ&‘£S@IDAT·n›ÔrT™exÝíyGÆ11À‰íýÖO¥”A„Î{- ƒd½¦²hô1ñ Øà䕌pÁ%V8q°*vOuL®’ãX„ë¢H¬0åÏ­b×Ñ4o:€SIŽcmÏeY¬˜ BjG‰ßbB‹@4N~Έ~ŒÆû 0l3ìw+†f@PNð;½ùO=vÒ=ÿü\c‘#´Ñªùf³Gèä6îiïŽàd$†m°àäð4× {Å•q 8F¤ÆEŽ‚ôu·!á«ÓwNû•c‘í( w+SÄÒ r-ÉÆŽèFÔ†vì.žûT!˜0DÉÁGéÅÎãõ•±‚Ð²î²Æœ‹`^N ííOxOÀH´5èmq<9pê¡Ôm\0ÌhvUè²-Ód ¹Í?Vª½6•R€zó­Y^/ÿÅŸ9íîKyÏÅGÞ=Q\Ûl6DFe܇>Ñhur´,ÓBËlÁQSÝA¥¸„tý¤<ZÞb\ 5«6z55Ýéxœ8ºd{评êˆ.¥ÒV¿®22&!ç ?HbáÆ0†Õ?ï×ûþä¡6 ÞÊ£­4ˆoKŒ(*Ú¶’F# 5H|H¤Ð‰ÍReìK›^W>rGm+Úp¦9òì‡ßpű÷)è ͯ´!!N˜9BT¿ïÔ8â 豩`wGŠÊä+j=Øm6;«£•¢^b³JÑÀž ã¾å4K¨AšLɘ²;ómsÚ'– <ÊÕh·ÊÔH͈ц/O W¥Â­§?$IæUÇÜ´¶¼ÑokB\•ý ©^Ëu}rΙõìAY•ÒdèXÀ¬ë†5UÈMs›/‚:¦RèqÛ«‡]”Cù Ú#äð„Z.Ù!¦Ôïw’þí„\VKQ(·[-øB·žòDN,ëí€4¥¹V³nÔ[æ‚ …NÐêµ&Ž©¡ã‘IU\•ƒØê2öœŽ_w×·qt<’¯øqtɯšX˹îÎúlÎ¥N¶ënÝ£BR¬À™k”Ȭ–ÒÝ…=%O«RUTÝ&Øò]Ò/ˆhɼ‘ÂLä*ìx«PqÞn ͆»"½ÿ°<9ÝÞƒ¬-ZP¸ýIèÃÙ,£!WwÏ,†B´õ€õ8t 1ŽùEm´˜«Â!s§îלÐWÄœÄe‡3[è*,¬Làš‚Sà·‚‡„Õ„šÙko2F@Xãê ÓÃ È ‰)ª˜)üý§‹Ç> pÈH± i™a^bÐj‘—²RÆw½4¡6h¾H5]4ôHiT6æ2‚¸³»µœ$|­ )hn40 {É 78ŒK¥J!SD"õ–3~¿nø Yîz2;íuÐBrÂ4´p*Ûû„?€T )õEÓw£“W¾ÙõmL†"‘8–"'ÞÀÀÃËå•l±“řጰc~ÚL‰^© ®]°G„CV©¯Óˆ* =»CwæìÃW©ú\¥Ò²?¦idbmbžÝ¿ðºauukÁºþ ¿+Šc”-…=šÄŠ6zùwþá·®½çÛ1‹¼2Ýýi2úßýAX…©ð'0´;a}1í/ouÙ¦à=¬/LÆÿî§þ¼6¦µ×Ƙ3’¸…Báë_ÿ:"Ú÷¾÷½ˆq‘Á…áÄÐ`¼ðšÍfq00Ò«V­‚•…Ý^O`Y1”_¼.³¨ðŒ7–©×ë°¸Xà•Õ1àñ1Àl-½ï3p«à¿W²ñ9vdHd• fW1I;&Æô”ƒOIfT¨¨Di²zÉwN£Ô8¤ —è9Df‰¡ZŽ÷½"úIH £—‘s¼À±#Ù¸.ŽŒ}æ¸"OãŠàÒ‹ÜG(¦„¿ÿ‡úÓíhçI"ÐÜÇ.Kö,‰«"B† ÌŽæ2™Öñ¹(…ñòÚygpnZUS¨xu¨»Þµ—¦ÙNy¼i¶$I hþ j_j¾ô—GºüIÍ®­Ë¬m…M7ð;õ; l0Ba8¢}yò6‰_XM ÆÀN w,Ž÷j$`*œ‚Ai¢òÛO”ƒT†Ërßó{höã*«JŽÈ©Š¶o”`º’ÑKbëD©.2»Ir÷‡¾ä¸E)˜j?i‹‘˜4¸ pf£Wµo #k°#Ž[@°kTrd^7dnHd²!‰¸¸=þÆ,EmmmA^,/Öñ&N+RAN œžB¤€h$Ä t†@ïEÝIؾٳ«©ñ-ñ û@£çJMÅ'ö>…Ò/‹è ’ÆIÁ¸€I’йˆ|4L'ÕÃAu ñ:8¾Äƒ•P¤þtI ­äòѬ٠œÑvÐ\•t“eeØpîóvŒfR¥4ŸOê˜4ÔqFä!‘àzQ{ÿêh7j‚SH÷9½6|–ˆäN-JsHy´§3)¥éOeÓ9@‹£ì(f¥½únhªjÆGév6!Z§÷PLJE+ó4›RÙrÃj 1¹¨ïɾJš áp(ÌŽÌzØnYF?‚ZŠÛ<¦•mÞjͧWd%ÅÜ7¼ÿäêEk—•ŒRÿÑ©§"KòRL“Gñ¶oQÝhÒY~4fj52b‘ö¹§¶‚Û¬)I{~¤S\ˆ“ì€ÜFÌõ‰9ß5S”Ð5fÖ3Ãa×ÎЙ&ÑíYzY©¹=4Û˜Œ]ýÙ4]p GøC©jßD ™1{.²ÑkÄ5Û À3^)ŒNwf-Ç[;`ýðḕjFLTë~ô/±¶´·§9µšÍ¸.Úžmt=#pÑ<{æv¡?ëƒèÑk²hNˆ5«ÏnH¯Ž‚›·¥…†Rœ$”©,»÷¥½”eS»š;v5¶Dš3zܪ»óq4섞Áа¯X~xx›‚‘Fp¼¸¸¸¼$âcÄÓXà˜cŽYÞÔr6þ¿8šŠô^ ¹Çï!­… õ µÇ&Ö6¸M\þo¾ä”¶ w±LÝ›—ϘêÄ4®'Ü{h•žH»r ÿ&È X”3¥Èѽ³mA)«È•&‡}¤ûœØ©¹5€L†6ø´$Ï6øßmv ­6$‘23bj@ Œ8ÁkÑí+²uR%R°O`à5#Ú­Ð{JJ -š‹¸ø$NÑíþZuõ_,®A<8-t—rB^Ѥ Ulõ›ð ryD°|Éó•7‰õER–€$<׃d:M¢=x0$’Ù°Ä^Dº„uøÈYN1ÂMz.á>ç Jc±´ zÓÆÎrqP!ð5ÄýpèÿdÉ€Ïúå#Ì𤢄?1ÀÀpäTÌ€ÀEÆx"™M…qKÀI ±3Tà3¥Â„àÐHJ™öíÐM¥5”CÂâÈ  ‚‰ª2„àdɘÛcî`ÐŽb/Q·ºµÔ':–3X]ž( E•ÊÔúhtPŠÀžBx*mbîÐÉ•‰CÁ:1ônÑÒ1,I%È2  &ôø 9T*-Ãæ° 0„ Þðïý}X ì.RJÞNóÊ\„&´Lx*tËÀ°¨²<"s£=‡švÑ€§ÕuˆÃ31D;ÝÝëR 8¯ g³ÇEÆê´|ø€H÷¡‰ŸÐ„¹z*­1SÍíØA õ^k”ß°¢th£C“ñP–ßϱ€ÁäY.SK 'ÖüéÝ­—@ì " Æ•"ú£D¤K‘¿Uf–h¡AI6ÇpZ6àõ!Ñ„ÓÌŒ¨ÕƒóÏõj¾–»êzÁ3{Ÿ‚±µ#êÒ º‹n³Yëù‚_RGF”ñv¯EHº(cü¸;²Qéèìñ)ĠƀB/,¬Ý8ò£ _ U½GÔ ¥BE"#…Tš’mת³ÄôNkk›®÷ ]“SN%%èÅìL}ÇŸ©¤E9†TÚGB•˜Š9ƒpB—G¿z(tÚñžyòIy¾˜4Ї,R­$ýqyUÝ[ØÒy~É›³ G¤Re¦—êÛàH¿”)CnË/tv€/QÃj;QEÔ5©4À…ÀJKÄRŸÝéH;·ÇpnÇŸ.ñ <€,• ü°À‘’d¶À§}S¤éAoGSi18„5ÍitŒ - ÚÈ%‘I¦Ãdi)Þ8ÈHÄ„"”Ç kŽNŸÊçFñ@³ÝqV[ß½2“2û{åШð4˜ŽÞœªí„{z#œÝw« 'sã0?™"rlDìžÂ¥á*9±MgÎw©ŒÂ»ð’-Ã\ä)Ýf*\¼Z+);ÓÏ hp­J‘¥Šö¨-•’WÏ¢ÐUpERË(å/¿k÷ K›%Z‹c\’åÙ7¼åDAÝ!ÊÕt"3û*=Y-o)©’`Yf° °ÛÆÇÇñÉò·‘až—­ò«ò˯ §cddVf§ 'À(b¤–––p5xàøã¸<@À±<|ØN8ïËã SŠˆV[ƒ)…ån4ÀPLÖZ^¸ È\ËËÃã`IXñeЫã”`y¸Šxý/±c¥Ó*ðDtì¡yZKU- VæÂnM"–:ìÝ8ÅÌ0\9Â.Ië:;¢-؉䙄k‰EeŠe9¢ w’žª-D1›ËX62JèCÀ32pyœœc%nza/:…%Ñö7á^ýÙAÛ—MdbL ‰BúƘ†w9k.ù"òI²}ê¶Q!†’í3 *e l©ãu0°<É7QÄù߉G³•g÷>ÉI‘ Æ åcǰCû´÷’1uaWzÚ21R¸C±ÈŸžûðv|ˆ¯–ͰÔrû]ÓÀlžæ /!—(LL8`vÁìÄ”±Â,;<™[ߊºÀ°9 (J÷Âæbg‰I!ëBF%*qØ T¦0½DŸG_¿WIA` â¾ ôšÛnÖ‘þ‹é•ë[Ž•a‡Ïߦo÷#¯¤å³I§H:ŒÄ`èIË“`³í̤¢Ôu§5únˆHþúɲÐlÕõA­Uƒ#7K&‘7 ódž8õ¬È<3ý Šñ8RF³Û˜C‰½m"ÿ : R¤Ìú.ºÆ**½:u•N—Ù²–9ÀYàÀ{ÈŽÛZYßUß- ¨ù£¸[­£NISRMÒBFÚmîµ|c`wÓƺװöú¡Ñ÷,EI#0¢IŸ`BЈL3n#ƒoÄZAÅ šwQØŽ âP¹¦™ŽÙ“3’O:Z^† ±d@ñ+ÐP–ˆ·×^°BöXvEù$ð¢X6-íÛ&|Íoå3ÙAP Ŷ Rú ­2\L:h¨’Ê·\ßõc·×m_0À¸8ªG-h‘Cì³ç-04ü}«He@`„»C’hšÏÛDô.sÃN°Ôq¬Aßœë-€Ïßµ»ÍjªXRT©,ÉSAßYÊ@óÔ bT÷tda |q‚]ùÒâðôdBíºƒ$³@¡5½.*qsŠÄX Œr(¦kÝèÀö#‹Œh{­þ Ž‹}•²f¡^ié1×nv¬%Øo ,?H_¡…”³ia®µ%úƒR[àý4ˆ&£ù1Ûrßsè…ÿpÆGq³úN&Wã0 LÌð€Zat‘å\žÿªMOO#2†­ÁïàCc,öjüf²e¤ëÕÚÚ_q;°…ðM0:)¼Ç/{…W°¥ÆÆÆ0"ˆÁU^æª!¿;99 [SŠ!ê”SNY¶¸H¡c- ¥ @ãØcü»¼Ö“O> 65І‰‰ Œ;€n¡ñ-L>È\8%ËÈ,:¢j|þ_>H8j6XÎkÞ²îRâzEqví§×n&ÞxÇÚP/_I¬¡)tÛ h%ÄhiÚ7kõØú—¦¶À¶g~ÉY*Ñe–/6ã¹ EnÜSµ|·Ý‘ mR ºÓ‘Kåø¢c¢$Ð.»hÊ-Ü`¬ 4œÞºûÿqÏ‹ ¹;¥æÊr‰Ôšn uÊÑrƒZDêѨ G¥މ‚Öð 9åjqiq:vùȲP8Ë#À’H7I?Nô¾å¡¨•¦Sy?¼c5!ÓØ\»V¯&SÔÑÏ(¹eö£A¥8Ü󇦳v„¾?lXê2ÚßÒÄËÝ­#Ü„íEHPB)/«– Ò÷%†¨ Ã݉ºen¸Ñoˆa i[TÔ;³>¥MÜÿY> `O°k¹"&· ŒX·»vÈõx4nªeÝé{ª–—í=©˜5¹\a¶5/“ó-F†2KÞbžÎÏ66ehÑ }£ëÊàû† ˆÌ"ôª©ku3DžG„²‡_ìq”¤¡sS`b|T¦Øèb¢’£5ðv¥7ÖSbJ·u a2)ê°D¬s{X2ÃpBÇêTRÏÎ%HYÈéŽî‘S ˜µœFž+žƒù×J^.{˜x"0¶"™ =¿#q´îõFâ‚@Iï;@vŽ`@B!“T Ë~¦o8&¤42¼<ÓÞ…´t߃Ë* Ms sKâ8ÀI‚Ø ƒÅeczYÔlÛî4ájT† À³T»O“Œ(f;v2þˆ¢rJèíÈPpÁ¥°¤0LL ÿ¡é² +Áú]¿ëÐÄG íÆr@ ìñŸf¬\Òûu0&Ñv%è€Ë‰/× –ò¨aK|¬†ƒ‚Ô¢';D] æœØEqàÐ1]€«‘цóÈž‚H\· šÇÒ üNØ2B+M§I£àµçº´w`c%æ²+)-ËטÑÙ`ÏZa¼i5q®a·mÏö,€C”a‚‚ ÆÓÃ9&eÓŽ×LÀ2°b#™ƒQ-Fºâ”aöx‰Ô­A–®z¡)’Y=‚Î8xxÍžÝN’)$è e”e­E4m¢ž§òaÈнUÂæÑ#›°ÕnŸ‰qCi¸¢àyhèkC3ϸÁ£åA8,F¢ë­‹¸ÛIÖâ)°Ô-ˆ"£aPDÀhZ™[i‘à"E|D‡¡úàÍe¥ÐuÑU:ø{qŸ6€yŠ>Ëí6º‰b§²‡·éÈD\òüvw®(W¦ÍéƒÄýAMub]¤´0Ž¡|©|[¯ÃýMÑ©Ž««´j†S#‚²äL¦&qKB°%ÀYhh¶Sß4ÎÚ^?-IºÕÅÊvŒ·ˆ˜˜åê†7 ˜1xZr ùF?MôFmЇc%Òy”5êŠ'ÞNÞÈi7¡ûð_NÂÿo.g0±.LÆ4±pýñáòë²éAˆòÿö¹Ök&~øá‡azüq8 Ëaë¦M›ãÂúbhÀ¥zôÑG—‘D±¨Ù=è ƒà"r…á„e…1ÆŠSSS¿úÕ¯`§± Œ(¸ÓßûÞ÷–† 6Ì9l3j`¹QRŒ¯p&ðþùçŸÅzÙäcÅeoè/GŸDÈD¸þºo%½p2²˜_Û<¦×Ä ¢…‰E9/l*T«P¡{ùå—ßvÛm¨2B´ î8Ê„`)9c1Ô![ Û ÄóÎùçŸïÑ-J~ï¸ãŽxV¾ ¦B, ‘,ÔoûÛ_ûÚ×°àe„Ú€©/ºè"¬ Dúæ›oÆZ(†fðmo<þ»AÃFAVýÜM_ÙöøÒyÃ‡Ïøˆ©3›´ZæÎ]ƒÞé¶7tUY¬ë»3i®6è§3Ù Œ¦õéØ0ÊÐ<ÃÑ€†Ë¦ãb¦ÎqEUfí5J­@3c~Íðº\ 02Csma-Ä©ü™‰ÜÈ{ךÊêz­Ž z(;2©­š­/—GtD aý »Ž’ HM,Htýãõú¥Ñ’L 8LVmO?27TÜ4¿ùÏóËÚ†I#zDw¸0b›@´b2xaHMUIJ{ÉšŠÐ¿…Ü ëˆ·€éŽñ5†ˆy~AΧe-°£YsªÈdQƉÐ!'jƒÜv§ Ç8ç΀"ÄT6•î[ML‘6"¬·# '`VHŽ´#›Ãăž/(ùžƒ‡%èí8ng€BärËB=(Z¢+Y±„Z¨…¸Å1Ð[Ð<ÕâÓļn·‡¤*ÒQsö<ÀɪT¶,»I@)ºÚ0ê)ÔG#.Gâ”pd@¢(óŒ,_DÅlR¹XÜÔÚTTН±5=daŒ$óÓ²+²ëLÛl­•ù«Ç‘Œ–RAºÎ 9ÌÍPïÚí—›ufªZÅrQ±3Ô3õ=îT:Î"Û‡@–Ô£¥±¹Á¬BÊÛ퀈9RÀú”B¼Y.TtËìê]è$åÙœY°»,Gu¶>'šÏ9Ædü¬¦¡#ÈpÅ(«Ú „CeÎú¨à¤‰ ¡µ©p´ZD,Á \×Ћµé·Ë¥¡…ޢ릊}äoõO•-–']Ë?ͳý àA¦˜¶·d‡$üÐ\U¯ës=¿±:?é6\ª¸ˆWPŠAàŠ1F~3näÀßJ0EmØs¢~d”¥2ÜŒ~Ø¢¢`rh©›Ž]¡l\Á¥†ÔTwg%Ñ«{ƒ¶F à^aè(¤ƒcÝ uìŒÊ°®7CwÉŸMó …8 Â­,oZx>Ãj>Ðñ„/`Š <¦É˜¶åynQ+2Ã$PuL«$æn?t|UP|ˆ»=–:}ûåÔH×Å­Ú#••+¨V´èFqϵëqg­vÀ¢SaÑ©§ÓyjœP–C'Kô¿”x.ÚÞ 6Ù肦 YW HÓNøh!‰” 2R ²ˆ‚r$à›´¹ÐÉ@éÃ]ßöûbTÐñÇ@zÅCІàFr“¾c˨ ¦ƒd A¤'ùHmXxrbä%mà¡Â*Ú-‘£pu””tS_@£¯ZN»¡R(—Â]a˱ÛQ IȰxhšˆí’Ô ìá²…÷Ç¢a’n¡ú8Ôw{/ç©î.¬.O.µ턺ŸQŽŒ •£º>3–^Ëòé NN§Rµæ:§‡èžJHFhÒ§^yeé{÷Ü{ÚŽ€7«w\^IHRÕ,Ë•W^ øöâUÿ¡×ŒÆ‘#rEt * =öØc°ˆÐ¨ZÆðSXP(l ˆèÇ?þ1æe$áàƒFD‹¯PF ôøûßÿþru/À–N8Q5ø\Ÿÿüça­¡=‰H?„HzÍš5СDÅð²]‡b%XÈÀ# Âivýߢð€Iò7ܯ0Ú;Ï<åô¾åÑž¹î®¿ÿÙž¼ê7ï;ì-«ùØÚm Ù@]ò›Sž¿Vsµy;0?ûäß<ùoÓçÿú˜ÇïÙh«étR¢–iHe…t‘)k\jFŸCŠªŽ$ r8²´è/ø–»àÏ®*LÔÚsCéÊ–æf øgÛà™˜6¢ŽËöäˆ r+É„ká®À1ÊljÚZd}2ZoÆì˸24eΔÙ*æ$7 €x 󎇤’ÓpÀ™}U†qÐò›)̈p‰¼`¼XÊWpoë¡Hjßd˜»}?òËÙ éS5g)ËæzNÉ®kìO.…395…6õQô¹Kk©È‰XYœÐõ~!ŸÝÞ} o–Òý@ô‰ê•UUÁ­îøv×ë”rEЯK#AŒB(šÁ­ì¸ö>–0»dÚn,ndæÀ£AÚÚ@R4G£æ ZÍŠ”Új톭yX¬*O2Иf÷˯‡”B=Z”Iqh¨Üô™¹ž­‡z™iúMxl1A=ÕýþÀèWråz „tQ!…b‘êìFœSD²F³C ï>oýqLõ"wÉ^ÈŠiÄ»°¾"ÍÃ)¡ØTîvR²Ü×ÍŒ˜]_Ù¿„aYÀª8­5k‹¥‡gÍ™ã&Gµç ýxƒ€@¸ ø#ÉzƒìÀœ­ZÝè-õ‰.®E/¶‘ão{ Nb¡óÀ  €3XŽ/ˆÿq= b|•°ëTŸ#ã=dzæû‹€Uò…"è«`*%6’\ìÏeÕBí B±0ȥ󈇀ÏCý-¸ ò"Èð(ßÂUNÓ¬6Æ3cÛ›;KR¥”)íjîÊ+¹À‹ó¥ÒNcW)Sn °·4KËŽí2÷ tùšÛ¤„¼#Ðu/êL–GF{FߣƉ‘0=¯t0ž#JÐé‚5>›ÐñGŠ CÇn!ÆûbZs‚Á¢;SÔÒ"ËŠ2ã{ ýyà { ‰!Ž”¤A­Âœ9gZ¥$7¶ ×DîQ]1“÷Bd8™MŒ^C ¤<…f8‘—À–€J¾Tì›z*—ÙnlW™j熊#àó׿W•V¿QJkÝE ·…R:ê@”›^=-§Xh©ð<2Êm«ú¾¦ª¦7‹œc9W‰àI¸û!”’^29K2 †ºëtz ‚9Èm(äxr¥CÚHÓm#×ά=gƦF)p’†§È„ÜM$VsdÉ Á| ; Ñ>Í´Ðú:U4 4f>p«ùŠa÷Q¦7R²M)BDÛ­í ýqœMgvèÛ*JqÞ™•I>+§Ïxc&ŽÅ‘´wÜÀÔ$ùÀɶֶŽç'æK1Å ï"`$nÀ¢¤ ™©.8JŸõ¦AÞØkíÆ(qP•EžµB+[*˜„Þ ›3½wÝu÷{Ï:ý´Pé¯üø«à„dôWÞÿWgóË àåWLxࢇE„•E±/~Ç{Œ¯8ã_į˟€IôcV| óò*pß‘ÆvfGî_!ÆEÜŒW,³L¹‚EÐýÊaàçs¿òï~³˜TŽLLžø÷Ǿës÷~]r‰¸Ä?wÛ79+ÉBzÄ5ox¤ß oüý›À‹¾ì áÚqÜ¥¿8ñâãîÿìCo»ú´‘áѤŒÝq4>ššÕ5I#€ËÉ z°)òRé 4…ÜÐw¶³˜íH¥êèîY÷{óé¿ÏH%ÃrsÀ~M ÓÀ!± Ÿ0Å€7!Ëé¾1‚ >ò£E5kèMLO ½© ± h6t–Q(R#šZ[Z·µþrU- ïCÅè-¸«±¸(¸HíN©ýÙŸœõˆ¯œÿòÎÍ;¿øÄßÞüÖÍõz7gJ̶62[š”2Á#B‰‘›ÁPi²œ†HKpKààdÕ"jGœ¤ćVm?ì@¥Ïµu˜|?ÊbZÆÛéwÂs²ÿÐ!yÚ$x¤“C+ÐÐ ä›qyø¤‘Ü´6*lº«´õèñWÖJ¶Ð PWâ’Â>Tîö c²°õHt â„gWê°ØÉ„’M³­°µ¦²ªÑªwý±†÷ƒóy5gQHkj}0©Ä¡¡‚Óõ)„MT=µôÎÚ®œ˜gY’aIÝ€”B7S8H$)‘ÝØ9»S„œEØZ_Y¿¸4/H¨¹¸ðfÛs2ØR‘ÀSˆË°^¢ “uKMòÃ\Mívö¬å×ÖÜF…I¾áµ g #‡xÂÝ1ë·ü:RæétªÑ««”::2òÇ™ç‡Õa„ï†o€$¯e4{Þ̧ò€Ð ×P©t©PÚTß´V^ÒŸië¼ÀÃ×2Qn<á"ó'#­)Èõn+-f»6xúÓLÜ5Zš¤ÀîÏ®Îoô\–‘QNöÊÓËcIIà gÚm†Êp"×3{Ðßð€ïÇɉ†hvÇè µ±Ìå‡;V@‡4K…r¿…È3\‘Z;Ò›ÇÃCªsÃÂNÍ5¬ÆDvU£Ó̉Ş=PÀ¤Šãá¡‘m [Šbv—½c]nÕ\{,0„Ô†܆5‰Áh~h¶µH@®˜Ú;»[‘µÍææ*oùo¶±´®¼ßTm¶,V4Ëaèë+«‘ž„« ^ÐÔVÐËQœ&d¬e).×^izÙ°M?ö)AD9ïÌÆ@êÁž£ÄoдƒN†H¡ÅHöR© lðÊêÊ‹;a WWïhìAk«‹Aƒ´N+hf˜ÜÈÈÐsSÏ ÇžáÑ«2-v{pÇÁÇY3b{¨÷™jìX]\™’§¬æ±¦ALmݸyv³JÉØ” øÐk„î ÿ©cB˜…Î …¤ƒ™ÀWV)³\·ß[æPî?ÛœI”W¸ Œau\{-¿Ž#pL …âts:OŒæÓ¹¹ÞúúèòxÉ8ê–Õ\QYÙ›*°F²c3Y ̘FªDºKôÐÆ ͹`± T¶Û'Õ5>fËÎ)4r°C¿Šz0£Qb+»¢ßrðÆ-/¿Z³ÞÑD%°}¤PîÕszhÕÕgëÊjúúŸž™Ô3â¶ÀÔöW~àî…ùOFäÕúÍW?¦~µöì?mH#>Y~ň,láþÇÜ´ozJǵ†W3,°l}—9VøÆ#ˆÛ @IcÙúâCX_ÌÚX_Á–è/…-¦N6JÿÑúâ_|õ?x-;î³ÿrÚ-h ??ÝüÈÉ_úÈé7=ùNÔÝÐ2õΣ®~ÿIgÝ~Á÷ûQ›äÛÞñ»ëÞû³O?üU]ú›AkeŠ EÕRzàМó«Ã»LsÁžZ°öN*…T‚>`<šTï£ _M;,©·ìM ‰¾:èÄ –лã#¿‹E§ãÔpÙl57y„‰j‚ˆòšD `SJ•»Dc—¹½.-"!GJJ%GИ™Þî´¤j©Ô1/èP‡”'5¬TwÖ·]þر;ô­oœûø!;›Ë#ÓÛΖýz05<ñ¡w\GŒ½i7Þ;ë´¯.…[|©ÎWÜ.1ã2¼ï¾rù-™êÓ܉ñ• êøBô¡{ŽZÔ—.ôm5§é QÀGé\F†8‘ÿ})hFP*©gûOA­XÐ]–ÙÛgÄFæøèð O³2)+üœ±[LQŸzò˜%o›<fÒôŽÁ3BÞ5ÅÅ695彨T£>;{Þ“¯c+V¶ÈÌè/Í›ºqÃf½°Y-æ 8(|ð/V g‡ªÐšžòw”Çr+ƇÞóÕ5>ýÆÍ‹Ïa6é8½³>6rq0·£¾¹¼2Eå\OížýÓ#²CÂ5ϼ¹'4.~üd7mô…vnT!vû’¹§µÍ¥ue„•vZߥ]ƒiZB§Í_óÔiB9¾ò™Sã }”cÅå\±ÔhÈÌÐú¬»[Ú>Óæ §%‚mDø*‘#l‹T=Ò¥ÐíøMV <ÖEׂ¹°£þÇ¡jzÞÚ5gîºì©jÞìy¿82VPïÓ $ïÒ'O0is[}瓇í6g; AÍãÔ=4²á£Õû­<ÿÑczv÷Ìû÷‡`¾íB%Ö8™f©†ÑȤ´ŽÕêÚ©Xo-±Þw?è"%ìÑjÀÖ# ÙâR~ÎPÇë…\dÐN †lF@ gó^ ¨W”†R£0—üú\ºæ™Ó_ª=íªýXrt®‹úò.ÑAB}‘˜¡EâÏ}ì¡ãàô¡à ¨6ß‹$Ÿ]x ´2K­*¬k9½‹~w:t>úë7¢¡5œ6‰“÷¶¦•BìŠ÷ýðHôÎøägµú¸9}éý?:Œ&ÙéÆ\ZÎÖìV—Z[S‚ñžÖw6¼Ù]ö˾¬ËEæc¿8 ZÊw¢Z¦(^üË£#Á%ñ¬ßIu I¥~ísÇ…©ö¥?;Üànyáô9g S2üü€™½üé#n~òTe$ 2ÝŒ%Êvä?~yÕêêJ$3›½ÆZ^ÀS<:‡uÙoÞˆúìMS/L¦Æ?üÃS/î]Å!eÑ™‘5œ|ô iç&U®œóËSoÙ|n¬ugÌ-¨ð>añ Ä"´ñþ0û°š†¦k§æmŸ2_0„¹w³Xµ÷˜ÏHy/æ»ÓÎæ>9ßÅm®EÂ\4ê ±flÚñ_?MN£%‡w˜{ÍS§Û‚}ÑïOx¹¹¥< îdåã¼BîÕ·úŠaq­íÖ =zKï½o²Í+yvOçehí;¤‘+‚<XQ½Ôº2f•,[Ör¦Ð6Ŷ¯ô  xÙŽË4ç—'Wewø¿¿aÓ©mýѧqÚ9¿:’ ZÌ,Qî|ò©#ü_‰úÔômOÿóõwœ¹O׎°Ý„™üš~¼f ð²)].ˆ†A˜Œq_f]-£Á°¦Ë†ŸÃƒTà kŠ Úe« <>¼ÂîÂâÂèã«eÓ¾¼0b_ÄÄËv‹-oöâW`­_1ùyîo¿ÿâ[8•p9µØØk|õ‡ç?¹ý_ø~ýWû×ß^ù­ßÞö±O½›Lé_øã)çýð˜K<~(ñýwl¼š+Fï8òR7ÓÛÚypX~£tÝCoÑV2³ÞÎsÁ™}r‰)£²—X ÷Z©.93; ;—üò-×=ÿ¾©`{GXøð½ÇœûÛ7Ô¡×–vÆ+Õ€m;läZ¨Æãò¬Ç#¶V¯¿â÷DZ{ÆÞÔ v EoÁÜ+hìÿyðÍý?®V'ì¸9»Aœ¤ñà@drbf…˜Ô2ä­¥pç-/¿o[ç9~ÔûÁ–ÏÝùô%ð œV¦n{æ#Ÿ{êÝÚ:ëì{Ž¢†[}~——›Gi‘RgwÆÍÖ¤¡âÓÓ[æÜŽ!ED‰XBßFˆ5hô…¿<Þ’é0+þ_öÎÎŽÚhàz½ß{×ÏçŽM1%„B¾Ð!! ½Ú„^L7 „:¡Òè=˜¶÷îëíõ^öûÏêüâÛ`0_ÞþÞíiwµZÍh¤ÑŒF3¯/~ÿ¼7v³6üCzP»p ×¹íÍzóK,´ÛËòooÒÑÙ–™Û8,ðËYÿòœòè–§½²]óÈàœžÙÅ“olû¸ûùóŸÛcä¼3ã¯}iëÛôÀ¨Ô¢Â;Åæ6<ÐvاŸõÆnîO¯˜º¯¥>· 0Ó9Ô˜ýĨÊ䪢—îÿ„ f>hûw¯«íúiG~ÒùÞâÌ,5L¹ïÁÑ¨Š±­/NØã_lxé1Ú,Õé®â¢\°/êA·aÄ: Zœ›E5®ïøIÿ>êãŽwnœú›˜¯;ìî¸ô£§µ¿sþ”ÿ[ü䜗ö®ÞÌ9?1uinØkSs™µgÏÏ|:â{Ç>:*¢ÚjGû>‰¾ã •\M¹”«ûô7vü¤íÝ3þ½M¹ 2×pæiŒÚ\^ÇÜôŒºáÁ¸­+ï‹”jç<´·wDñüö[”œV³‰Ý;‚}Iªv85Ëná3_ص¥8vZ[ŠÕÙÛÞ½|Ê.¹š ›-ã¾ðœÚÍ«,ƒ2/*.ìÍկ풰jš›Ç¾¸Í’x—Åá9þ©kgÌÕgeU:˜Â Äq“GÕoì°´¨ÆÂÙol·$?¿ÛÞÖek™ÚûÞyomÓïî‰w-íY€ábwq5´7':­s'¾¿ß¬Ä'íÖŶÁÆÌÜGƒ7©9}Ê"ž%Wò"4HX±BÀ5#²´“˜8Î@É£§wCóo_ÝæãîbÉÞµ†úÝŒÞ%Ì:“¨Ñë«[3áê†Ó³Ò!Ì ÔÉîvä±Up6ÕÆ¬¥Îlæœ×wJ‡ú¿{tÍFž^gÛâÜìM¶quÌ „ú^KkŸj[› uµe€Ã ÞØë½ö¿øŠºøÍŸ¨*ê›%ÂÙàöK_ÛÏ3*2'ù†ìºªW òSZÕY·±OæÍ5D3œÛµÔ_]×™ £¥šÒû±¯!Ô‘'Œü%{¾ŒøÁM£—’ K£”Ê×ã[`ÁìØTû°âÄw›²à•ß=}€l÷©™á™ÿ¥«ÂS¼ô{¾yFuO¬jÁïþò£tÝ¢kgö|zù;??í_{ ÚRñÏÝ'|´ïF?¬þ$þj¼jѸ—¿_ªËžþü÷–Ϻ>9bpÜFƒ‘ŇlÑ0/9ó“ž@”e~…Ø<žûí ¡ä¸ç¾ßçj«ÞÐ=þo;,ÎM¿åÓcþ¥ùÚ.^¬Þܶ´8Ã6“°®bcjz÷û¢’±–Æ¡<{Ê?iù÷„)»ÆýsÛ¬ï\3uŸ‹þ²ã…S¶ñn2¿Íñüá l¼=Îá‰î²X13qªÙº³/|óçÔ!æ[Ø^œ^5*):EŒÒœª-Ú꿯çÇzÀÑú"w"•j)S,˜¥F¾æZðÕiÎzç.lX¯œÃ>ufÖƒaÏœµ‹$MB?¥d2#à"k ›ûŽ…—–z¹IÉ”³¼‘ú pâáwüfkœ*k“AGÃø¹`¿»0D`,{À¤#w¼à¼snéÍ.<ý ›ÎùõŸÇï÷À9ÿw/îÚR¥¾¶:·{ú³¯¼ñ¥S™q™¤fDÞU£Ô'áw¢¥=®…¹†¾h ­ÇÑrâÞ“ú|mŽa…IÏ~Óû'\õò‘ÒK}êöéçwÚÒ±ÏûéÎÁÅëâ¹Ñé)ß’”g©Q'¾`o©÷“ž¡aÁÃoߌqgQòƒÁ[8U}÷OíPª‚/Ýïo Á`[jaÂÙ—±F< *뤽}—öTŸ½å“Ž·ÕPFÔôwÂSûœ‹¯|ù(¢Â2£[uÝ—ã}“áiz×kj¤š—™bÞý»÷aT=ëÍ={ƒŸ,¶MYÿÀ¿I¶ÐÜyñ›»Sˆwã4Œ§×7_¢üÖ,þ,úFóx¸Só²ï†ý .{uŸ.ïÜxmËáO!-î -ZdžÜ}ÉÔƒ¦'ß‚…/´NK°ŠOÓoYFÇøz¢®õîÙ—¨!êÞ—3ÝŒŒ…AÝKŒi“?:sVú]†­ÉoýŽÚºF§¨m§u¡g°1'5íò·M†z£þÎKŸ?Ì24ØÈÞçjK®Ñ¥w 9¯ÿðä Ïþ?Ÿõ|:ñ£}çGg]õþÑ×¼?nqfö¤7~;é•ñŒS»Þft¨%N•Þ¦‹5Úp™À…ým“Þ=‚ÒÈüÉ·±x_Ÿÿ\`Sû­3Îb7ã5ïœ|ľgø7¶Ö ñ¿ò鳿?õy{Mvê’7†mTµ·üö™=Ïe®FtTwo{t ‹³òxÖí-,õÔqoZ E»m‹gõ½§6eÍtÕŒAsßÔö×#Ô¿N-Í΀•&™¡Á Ü£"U‹®x÷ÿz= @Z‹åÓtmG,¸8´©ûðž9°~Ô~lþ˜yö¡ºàðÇÃÕ-³rž÷‹'JC¢áäÕæžÝ˜¯6TÏÎyÔ2$f’š{=Wß•«ïˆøÆCKf&¦,Ì}¨v„†_¿þƒŸF A‹cT2YÓÒá˜qï¼£€Kð@=.ÉEè,˰·¼}Þž=Íqcp|A~šg8ºœ·¹“ªo»à¥½n˜ù«Eù© ³ÓjFÙ“¡ö>Ï¢–Ò¬°gi»uî©{=:xÓ`ÌßræÏþñ-:íW7W +vZgMxg§Ù‰w&tȼ̇´B¬º†zÄ!Ý£ó'ìÿ‡¹©ó ½þ §ïýX—s-8?ûɵï{Ûì3O||ÏNÛϨÒß>79}øöuQOÛØ½nr`²•=ïÛŽ?ôô†-¹Ú àˆ: ˆv >ÛŒéÐ䬨[ιJm J=ž‘Icdxò»/€h«U¢¡b¾ü“=Y¦6w}šœ’©ïêöÏ¿ìã=hš‰) “&àõ6ÛÜ\CÏ]ó_Xøˆ;ÞJ mƒ7&׺3ÕÑKÞþY·cAŸgAª¶íÊ~CSÒ)®~u¼ð³†ÉÜ þ½äºÜê¥9Oz7È[†Å¨m§c&´AÓ¤k[ÛìŸå;y%×Ô1­ïµ[g“jh§¡g&Þ"§stˆ¡B ‰ŸõèÞô¾¦m%úÛÕïžµRòüôG×}tÜÌÄûW¼sH»m®ocvYÙZ HÌß  +q_‚úO~îxk³D¥FëÛSîž{!äú~Û3|®X×%nƒr IÝÿÉDÈ•Áð†÷NM;®úç9dàHijƒ†×Dñ¿ó¤Hç@IDAT¶¾°ŸÊ±61ÐnƵÅÏB'\8òyãcØiþ¦Œ¹Å˜c­ÆÑŒ7ÆbC.g'5]e,4Œ%ÆI?¸ØXj°…ĈFÂLôF§1v«IÆ{†1Ï8±þZ£Å0F—atFDòø"ë˜l•˜g¿Çrÿ33ÏgÆÙÝcL•¯ŸºÑ˜nŒóÜe¼m>ýÔ/ò­¹æ™ËÏŒ+Æ1³Í6_i5 >Ç×{Œ_ls‚fò‚Õ µ-H¢fMZK§þäDIó”ò#QþõøB0€Ë¬|?°À Ô¼²|fòðV‡ùë<Èw5à -Æi],y¸¤òPxÒ|Ôiœvðù`»¿L ׈ո½üH/1h‹c6¼D°±Ðøí¶WK£´IÉRȤ¥ÌÏf˜…O7Æ×?B«OyW·5)—¯K¦úG KcpEe(“ÒHðŠP£ˆ3%ð{ϸj›GŒ·Œs7½ùü-n8m£ËÆ™›^qLÓ™¶ºñ¤A—üvðõoòàYÍ÷´Ú»Æ¥¡oç5>`Ð@Ô¼Å8bÃSä+e¼™ÕpÊ/£Ü‚+*cbõ´Ý.bàØ9SU"3—&žØnœä7ÁY±ædÐEñ"8usŒ‹÷¼Ñ˜o9E¥nãàQ¿îÿ¨&?oñéwó‚Khuµ5xºÔ8žæWÐ*g>MÉ|¢Û8°ù¡Czù-Ý•ÎÙþ÷ò¡VãÄïÿVd 3ß"i^d ‚PñÏ;ÀØg·õ©RUrÎ6NÝýjIó¢n,ÞÕ˜4Iâ„M' =,4NÞô2ÉÏ¢Ò?J¦éit³3öŸ¹Ôd¦óOË7¹¤J@juÐG ¨ß⬿›1_áuršÙäÌ+úE΀@UیӶ¼ºµÕi\»ÃxÉ’)P·8o‘à&wúŒãwšHxjÖ’šy‰zߘb\Tû*{ú¦ zy Œ¼B­:3:W0©¡£]ë»Ü2\qŒQ2?YlÔŒ’äŠ&åßøTÆ7º¾‰/­7FXëÍD'¢Ž=ìì^zäèíÇÆ?Š§×ªš%„ä½ðùK®øÅÝ=u³È“~rÚÏÞ„žìäý/ºãá+~{Ф›_>ŸÉì¡{ýô”û1tT&Wôømùˆ"Ä3G23Ó<ñÿ.ºëõ+dæˆ<ïRñ˜òÙ,¬ÒXTY 7Žæbbx*ÚøÔO·?ó©w®·÷Y¶|ˆ Š[¨¡±ñ³yÔ qÝöòER&®ª7ÛmëÍöèZ+ƃö’Ï’ ×Ö×øª|sæÏ4¬‘;¾jW,Ôyoyd"•—y7õA@A]Ä\Ø%1d[–¡Rá$[kœwßÒŽš!æLU\;aÇŠJ‹bu³4%–aV4¶yÓ%š“y (ΫØ#¾³€B ÈÔ¦µÅòg¶3çØM)ï-\´xÄÈáäN¦’>¯—Ž©$Ëù¾¶t~÷Ðõ”!~@‘NøºþI‰ËŽ>·N•ëD©Ž;ìœ{š,™5˜™í,žá +{¸×o“0;|GtÞÓoÞ#B?à“ŸÂ9ë³i3Á¦ëSUH!Tƒ©=‡>—+C¢|tª#w>ãÁ·~/zKÄ…ö[þ@­êˆÃNyèõÛQX;êZ"mxÙ‡u‡ª¦ÍÿÐÌÝ?eR¶C¹r¨¿òA­ô;N¬[ñço"ÙT&µw† ·Æª‡T“Ãåð % Ä^7Ö£‹øD±ä’ù]wÙ}Ê{oJ›àB"÷TQ!³ei5q¯mEù8ö@dÁåÂhpÔ‰•`Œ¸;b; 6ËýMÉ’1ABG޹áÇ¿tU=…ù1Y’úì¯o>#øä€^ *Ù­|hw fÊä],rqìñ/øÝùWM¼ž@²Rÿ„ ¬Sýt—_?õæÃdþÙþ>ùÊßdLÂPæò"ipŽqJq¥8[Ôˆa,ÞÁ={n½ËK/¿%ƒŠ®^£aˆ…°W€ÝòR>­F.EuÈ®§üåÕÛIÿl·ž|ín©']@4TAP{ù-BÒæëÜÄpËÅnoÊÃý*~H ž’GE:"×LºCÕG$"i>òñ†'iv@šÉl)¹ÏNꢲ9Å&‚íÖÙn¶á+â;MD™ïæÊɼûßÅá:›ƒpIô•è(ùñЧ˜¶p`Â8FxEk5Î3L̃gá{ôDé¼"óLÀ$À!i¶²Ðƒn^Ô÷ÿû|ÃSÛºZ3¶x¸Ð5|«AïÎ~ÝÛlûÛ7AÏY”_´µ¡:´5o–2X¨³ø­#å4Ä_,5m*g¢‘­løä°à8Ø žcŸaÙßP‚JT¡$Öþ'¾ ®þ¿\f9ZÊhPc.ØîæsîF ¼Òûà-CŸšØ|×ñî³ÏØä¢“7?ãðOªÓøùF§3y»ù…2d2Ǭéž9Ó*y¶Î1Ñù’I(s̸qèöGÉÓ¢‘ŒËÿHºHö4S\ÚHu%±…Éu¶wéÉc–yúþ]%fŽ?Ýè(©ñ;Ü.9ÒsIɛɶÏ7²L%Mic³ëdv¿Ô8jäY2fªû™qꎈ\¥g¸Tƒù{¿Ã$LdŠ¿%#G>nrY0ˆ@ 53+©¥ã’‘) ÙˆiD³|˜l q?ž7¢æ§¶œ+¯šE“d&œ‘Ìfþe¥‰ýüfW;Y#ÅÜ÷ô»ÒF ˆ&q£¸ˆª§îsš´…–ç>yå¤ÑWˆŠÊÑ2%€éÊS+ —ý5$Å':òF±ˆ F´$Íi>.é¶ÌÇâ½Ò>'[óF‡®*¡³NwÈ”õJ¥Mú”V0Û¢¨³ñTÊ‹‹ŒB¾|2ÉÜl H¨ßJ$RÝ|´½k¡ù"ïfP$7ø2¿Š|“í‚B¤ž`<Ãï§‘Ž ö ‘.†!ƒªîÅ]" éR)É,aøÃ£E‹¿:à˜pK‚w;[º$-! þquBée»¶Io4ÍoO=¿D DJWÜÇS²ëŽ ì³Ê²‹Þˆ‚¶Œqè^ÇH£ÄŒŸïùò§ÛXi2 “¢ÍƒWLÌ ÁÑ%zWù$é FŠ0TIúž´¸àM^å¯dDûp`§SI%T—ÉB’R[z¸¢Ë3gÄC^ç"Q’.%4.€sM¿ÎãE>$û‹MÚ$Pó-âgæ‹„{à_’WÀ4_Â>ÚÇ^!®ó…D®a$ X€Îà¼7któË]üx1› RDNÌÆò€SìɱÏY2ÓSÐC?Ô&¼Æ†±PF•ƒ‡-¤ žùNñúÖÅ'R9¤oš0^Jã%”Ç<¢Â ÂÙÌnÖ›ÚÊ—dÅŒbTøÆoTÖSñµÏ×ÿwKDjaƦlW>>þ–Ãþ‰S4cÿ|SÚ ºñÊ.F°Î„1¬j†ryË?&Š|#3tv›bØšÁ`'ÆxËÂ!*Þõëkje œWO½v‘Ú¥‚ÃïÈ×mcßÂâöÃá²±èò°ÿ2[Û„C]ˆÒâ¬þðçË‘GmUO~x?sÕ˜½GÄ)»lsq4°É/âl¢6ñÈ¢¾PÝ ¢+#óÙ ºÿk%–.³½ÁøoìeFªWR)öW{˜Vwu·547¾Ë…µ´“"ýØj³Z¼U*™Èxø›•€CfX@ê)Ûݨ-qó>O™àš×Š <‘´9¿gÇ;"‚d´1î?(@ÈR”~W”ÂŽÙºoÅãA6o-°iȉŀŮézÙ›ºpɡӄ(â2•Îᎉ;-®¹ y—éÝÃöiÙþÄ´[¢=Ú=öQ[5 6ÌB‰udgq ›[|ɵ¬–ü‰ÿÑÖ@±+Þ[kªŠ€oG¦2½ŠI-–2˜™¹ÆPî÷÷])1*ñSPÈ;«6T ¸ '¡fwF<8q…LLSbˆÈªž$ÊægðÂc—%žŒT±,L}tmD‚‘Oƒ4VìM<ƒ^~`Ò°,ÅDŠ=Ü~ϑ䡱âhRI岫ñcÇê¦J§ ~­L»€ÿ9ô·¨I©Hþ|‘¸„Yì •*$Ù¦KÄðªFÓÔÔ7vÅ{j5éB6äw ߦQð‰¸ÉNÛ ‹ lu³mÙg„c‘P¨1Kf¼³yÉ’·ó¨vX=¯D¨‹,åàu‹úó_ï“~dUwüé^Ì€ÙOV;¸^ï²?áfË.#²¡/–ð|lÉr„3b³<ïó]§_u‡sÁ€" öq©Ê/`ˆ±±ãÍž©Û²¾ÙODº±‹¶¼ ëL؃'?\úäÃ~¾2;âÆtjnöeqI„ÛŸa¦ÿ7«ò +¤ÛH¼'XH•R¸DÃÙ²­}Ì8*s‘$ 2ét³‰3¹‡’…æ6ŸS¬$øx0³¬Ç'(·r¬M @+¹4Z4ï1{O4Z=ã÷ì­¯®»ä—Ϥط×X'Üצ~ºã™ì~Û|íZfôþzß“¬vbFxôþª¸À™º){__ø&5¬ÞZŸP"åÂÊð ÊQ9ãŒW ­mFË–HÅ‚^¼$’¯‹õyÜA+Öáhx¤Sä»[õƒª[’óѳ‰Ú ¾ù°×UЦ"!oshÄ ”?x9”ñK¨Ø’ÈQP­I[„×T@¸/ê¯l¡aÐàÞήچºd®Ëkó³!2‘Ì8Ä–\ˆ>Ÿ[|ÒQäè'3êB·¦öfÿ-HåtR:¯ù¨?-=Žhßú0ë<ܰåJ^¼126Gr¡j‚×J¦›*Üi/£><®p˜ˆå¸Ä£—+'õ,þç‹‚)éä2¨¹<„ªÈ0Ø.\4ÔˆQ¼>Ÿ=Áù®²ŒPä ùQž4í‹Sþ¦¼cÁF¸/\ݦJgøE²•k¨AcÀ' Ù,1(‘¤év|´/9ÍCF©ÿ:ðúÓì¢f8Ìwº‰xš(wt¢Xd{ÝKï½(-Œ.€‹È¸·6Ãu#jŤr5Âù˜vÙºS}!N4qUjZë“¿ÿvK¼j]CÎeœ“`ß9Øà&>®a|á3¹lÆSȱK›Œ"¨*à ¼`qBq¦rRPh²y­‚6Ã2;MlDãùêa¤¾™L¶Êú}ñ8>Q2Qrp$–y²Fýó½'/ª: ”ôõÅjArÞMËšsµbÇ¢>ÄŸx&åô€7Ø3å¢Ìülb8ÐôP–ãáL+#- î›H]ÎìÉ6U=R›€›\5BP»2…²ª 7D¡¢Ǫ̃€Oæ\±ý˜V™*'õ!$W&[Ȳu¸*hcz6¼!™(¤ã ¯³âr ¿b“Ê×(’« ûýw˜õO’Œ+2Ä1ß³ùxÒá÷8èBÄëÆß›ô<: ¤b‚b–dCU¸#µÅÅûÔ‡×(²n0²»5U„rÁ-–á²»ÉL¼¼áU›^ø Ås¸ôyCf½ðÀ2g‰ T-ƒÓ9"·J}å0gò%Žb‘Í&\€ÉW•-à`Þߦ—¢7—£<Ãà¹Tu\–ÉØR(J~—=ämêéëµ:*ÄŸ(¹"‘xuÈM±ã„mÎP-e%ªgf¹fÑÀªŸê[ºæË¯—ÿ+kÀk¹Ù’ù´Ïæ Ùš#ímðË«{¥¥uñ-oƒÈò³ŸžûäÛ×HG‡ ÓwRêÒŸ?}é+‡ g…­*ÎûªqÀ[Æp“3KûBžhKæˆ_ù÷çžöéOÃ'rù"]5IˆÛpç2i'‘ Ä:;Håtâˆe‰sUU¹H4 ÔËše–N±[´Áxb‘tg­'”M—™AðXID©x,d¿µ™“"MNQ’ÁA~ûïÐÞ_ÓP]B(`_.Ÿ´…éfLè Vs~‘Mä!4Zû%ƒeòy¥cI£¦²gö"à—u'Þâ!ÿ¤h9p(7ÿs›gÈÉ‚?ÀsÈšg„K‡8å+å­§KÇEC<ÂÊ—eHSž6EUÈbK&œ¸}²±–†Ñ°¤3i7±ƒ,ý_Ç+$º/Ä9{Ñää0H† /}‰£%ã»ÉÚ¥!Íš3ô'Å£ÀQ¿ü#â/Kߥ¼ Áª ¢™¯ NÑKâk…;¼N—#¹É4ŽV‡‚"6#Íbˆ…û){³g¾L5d­Ì*îïAjÃOÆg¨MÖà¡9£Ð¿Äj‚Ép&÷x]$ Ùµìý‘ˆMŠÄ)vÁ0Î`šN[Åû5\¿O´%·„ÞøV"[òù¬yž0{²‰ŸI&g¸kÆmK¸'ÜÐPG®Df&hd!“À^ò¤Ps” "ÎRÍbN$©”ÇdȆÖ~$ *‚Dª«dè—Tª'-^´"´[T4’†üTª³'\_‡ô §%›¹²* þ’é Ǩ9ˆð» CËsY |˜ðâdLÐÏ1-KMœå¸ãÑL à-àÚo0nG2Öç bj`nâašD냯pHh•†àãYsÝWeZ ž§³/«°¥b&i£± MzaF,jrê/›ØPPv2—ö;ý™t:H€oêÅ\GŽD;òzúÂÑ`µÌu˜…æˆ ¶ÍÚp¢¦¢ºeÙÄnK%q«'pÒî¸ ´{ÿr§ ‘T{ÙQÈ¥Nˆ„òÐZñ K®PÀá™Î¦ZÅlW˜–OZ]–l*Ï~9ÒéLÁÍ`e03*1ÔÐÚ|‘vO&•§Œ<Ì.¤ÀÉ bh fÂÖ¡MSÓ`VxYÕ¾©ÿßè°&’oªêÿƒåÚ‰\®ÇPíB5nõÖŒçƒ b©±zrú54fÐã<¤˜Q᥽ðTØ[**]ŠÐ‚âk¨Ht&÷» ö|^¶ «`Û–ÆÏ¿9KÏ¡œ&œÆFÊ‘uàb?¶8æD+DÈ¢“H¯Ê-ÙÚ[ZÑUVœD: ë¡uad=6SÒ›¡`[ÐÃÄŸX„!~‹ëT®]‚ÌÆR4T}‰‚¨›è^!xŒÞvUå áyNz?ÃZPbÝ)‚#ü/ >b×S,ÐI± ’Ø,Hê„4~.ø$š?¼}ÝMó Dy\=0óC.\’ŸdÃó!v(ÿù7.9Ûp0ÅÐYH€d2ñMdwÔ…ÈO0›lhŒ&ö ·ã¡‹$’/†ÄÞ•ð»D¬¶I`Õ€ßá³£Bç˨`¼â ÿµ˜ (ÂØíÈ&¯ôQ1* N ÿ—ðT6‹#J , U~DAŒøVÜþåãyÂ㤌A†ËâA—Š_Lù ì¸júÏÞߢø ’AÎn‡Âb)Bªªƒ°ÄhÞÏçâ7Q„*Pk‰Œ…âÌËîóóf!ÕT%žF²àÙ´°1GmØ9ñœbô¹¦„mÒ ™Ž. «´*¬”ñÏ®Š&þ36ãö‘OJÚl#ÎÄý`¢–MGˆº¬pü»$¢qÞi)66ñ>ÝÝÖð8b½ »´%—GÅ»qŠÒXäSzvUE'Á–˜ßá¸\hR‡§’ix!ªJ"¡“ÈÇÃ]yB.f™6fqÿëj`Ðéí…á5ÕU³dÍžä=S‘BYEi'HdÁ€¹A2\ÚÑ¥èÄÇt¢`Í@™´&Ü # {6–e )|ÞDOÒnGµäHvG|~¼]˜l¼›?êNÓQ½e?Ò²PÒÏ}ÍÉ$˜%ùDÓD‡•(]I!lLF>›Âx†fÐaÒMH¥$dÖ¿ž@W9Ö&à§NÓ6S˜]Õ æ\iúàãèS®‡Éå»Â2¯sª Îu‘PXת<œzwÌñœó}¸[¤™ƒ%£M¦·'é.\ö%ÓçežÎöâÇöç¤ï´6 ±67†R”ÍêI혠 l£ÌJ Uˆ–  DH,Éë95~üå¬ÝBàøÃe4L$ "ªñ}¾h®m/^Ô"½ÐÊ¢'½š#‹Î.!#¸·ŠèìŒlŒýT€|§ò£ÏÓæóæT]΢º4‡ðòMÐÙDF.”Ì’ô™‘•¥Jo•ºÚÍÒÛ¢ºzzÃ=Ì#J,èÊ’‘Aü#&ØúCdä SvdÊ- ¹Ãí Bs8,ÑÇó8¹µƒz\í“ÝÆ@Ç(IÆ´ "}ÁÈ(æƒM™Ö¡ÒðšŽ+îÿç§ÅòY]_>žö ÇFøÁS5* šÒ–ý(¦üãUHç0ä&*°xºb’öyq iÆQ"¦|~¼'h1#ÇHM4ë2Qª'Á8eÖ‘÷Ëõ‘´‰çÿºCFP[$;€º°%Šxµ"ê/#¼¹êûŠgŠž ®¶ÙÞN`ÉM³uW<‹š„wóHŠP s? „vèn—øX¹l6ihjÚ`(ˆSnBlXFŽ5J°ÇpmÖ[wݨ¸q T¥âihQÐnÎ\‘ `Ìw®.Ÿ[ôáI6YU_èíâ~¨6˜'Œ’\¦D­eN*¸vsp'ŸE±3±Wž sQj\Ò×°æbX1Q#›ÇÎì@‹åŸ°ñ&}Õ!D9³'˜ÔÕ’Š‘Uÿ`‰¦ê‡Oóë?LŽ&Ò?Ý Ò›hè«ôüï âá^Ë‹qYKæ}G¹¦-E©œ/B¨ÂÁʃ¹N¢ùj¨c°<¨ T0PÁ@kˆ ^C„}av·ÅãËóåNÚíÔ;_¸[<¡Ãb êÜ_?’À ºZ"8dï¦cUVa1QÚ€`5øT²ýu1ºè„¥'XçW«q;œÿè?'Á¤7°Œ O-{OY6F­­Õ˪pÉÏ_Xhtå\Å[;¯Á»Û¸mï±… Voñ†‡NUchbò 6ëÎY4Ïv›?¶¬ g|J¤ç©M¶%•AÀ…‹iŽÃº2¯bwÓ“÷øL~å Ÿ––Xg¨Š˜¾Äâ [µ’¡‚ *Xë¨0ൌÒ|»xd ø<Vµfˆ¹# LóË®¥½Ub‰ø+–ÌrÎy”O-ª+³x )öTðt‰ dª³áˆ„XÁÕó¢ØEÛܸí¨}úÚÓÇþßyuuþ… ?zâ•§ÅoªMu†gÜõÑ‘ìt:qŸÇ^7úìƒgÝòúq` ÃED.ÁÒEÕ,ËÀrÀ}…©bš˜+¦}`¦'3+Ï(µ¨û3ñî$vƆµŠ-—¦Ur{VyêÅj×ÀÅ‘%ixº:úšš†š—}ºÐʹ‚ *¨``M1PaÀkб/È/Ûɧ)Go£½è-,­ÅpI0|Ѝ )ÿMoŽƒ)âEh?ÕZèØ&QMÀÒ åö¿M†3ÞþúEã6¿c¸oã@qTsÈ9a“;šj«·³ÛÉWüL"’îé8óýU¯ºâØ«ÄkœÔ®îš3IÑ’ ÚÐ${‡¶IÈ„€úÙ~÷äË7˜ûMfLfX²¬c ÍÆŒ‚W|w¤ÝuÔXåßûÔDÑ<p\àb¿‹¼¸ž”}@v´ ýµQHÛùh>3¨¶©ƒÓY…»eáô•£‚ *¨``Í1P>×g«£W]zÑ0Äý¶;v«º½ÏÝýÁv߇N·Ë×9º¶¦þÜ='w'»ªªn}«ÂN8ó”&ã>às cîUã»ÔiÖךôåÄKÄèªí[zg½©+Ï{ÜÒUc”<Ëh¼ãµ·–dc:êbŽ‚úÝ^coxõÖb¨ ™¸=5S{Ä•xÉð~$^ìþç ¯æf¤\)ƒ –DŠ!ðn‰X÷ÅÍdJEsQ3N%\â1Q\b¸ÖX>nÍæx©a.[‰Ù›’•d7;p+G T0PÁÀWÂ@…%´­æ¥FuíÓ'_8ÿŸÓî=yó{ïxýØpבÕc$®ÜUÇõ§÷9—ìD ¹H£(·áÁ,;U _Mêú®ƒ©ßõÚ ¬sCÏ"W+œÔ6š¥Æž?8pÙï´:‹ 7qâLÎj…W­¸ÓòÁ»ëáÊÄ£ÃO\ZÔäžH¦£ÖíÆW…–RV+¾ãðbåß¹°r²Âmñx÷ ìN§Ó’rå¨` ‚ *Xø+ÇZÅ@µ*À«œøµÅZ`«Õ›ÊfßóŽ¿wAËl¸æ/w>vûëÎ@/]2j‹= ×SÙÖ¤ŠçDr-¨=³¬Öô¢öÓ}øô7 k–Åà:ü Æßõãö|thpÈØÝŸú厷œóËǰœò]Á®âÑ0|÷˜}Æ]tü-'ímoKÇïöþÓ¶Ý}Â…g÷t‹ïXÓϱ¶D¿¸|ª9—.Zñ:„”Œ‹¾¼Óšq8K¾dO‡*½/r±šÎ‰©˜Åê&:,ò4sˆë¦Åa·±Ü à¾$pH½VZ)¬‚ *˜¨xÂZËíZTøP÷ oÈ,6ÎÜéödGéž÷ÇÂÛþZOÑyË[§ý³«KÖ@<^ÈÄJo¨¸é¬HKÇè@ÝÄ'¯WÎVå:ê7O¿ñÔY{ÿ9d $Có;f¨ "ö\K8ž¼p,îYÛßš {F~oÃŽ¶Öb4ì©Í.¶L4º&/Þð·EíìPÇîxÞ½/^­Fâ½á@]=¾©¤wƒòXDÞ\.î47 —¼íKœ²·¿à®g¯çÕˆé¸ÿ%üƒ%Ÿ,„}Pß‚¾Îöž¦AuÉdÚbñ¸pøˆ·Öe"¯A4m½Ð~Ds-c¶R\ T0ð­c â ë[Gù×ø dÁ¿p—¨‘oþã)ú8Êg–f7ôŒ9ïîýq”a,­½þï'œùó§ïúç¡gï÷âµ×#F—˜nÆ ^L¢‡ÖF·|Ý?Gü½c÷ÅUªyâÓ¿ÅuJýéàWENuªzoM:êýÝÍ;Ë»EuÎÞ7=0ý>y„éò7‚(–T˜pVkÂJ _K³jká¾Ù\6—ËüAO“ìtáùØÅv#U[,l¸'kÒ!ÕÛÞå¯÷Ø%n»ìÝÝá&‚*âšØía“p.' X+œ H€ìëóÁº²³-ô× ¢Ê« T0ð?ÊðÚnæö ­ÊÈxpµœò/qŸõƒ‡F5oXØp÷.Ÿ6 2¨k«É;¿[ciž°ÕÛÍ NÝòmÿ<çŠ#ÈÉN×Ò–î¶Í¶½ðÀÇ]ц|Ô³™mÛ‡ Ÿ´Õ´¤ÑQ§ˆ0`?sÓÉö\Ð^Û4R–áô 8¨²È–$nÛ ?fÆîM©d¢•Eed[_LæLp””³—‹`¹7û¬D,€Ñâ?Ž|å *·Q%ü[ïïMäëùÒÙ¢ÅáŠGˆWj¨®fÝ—Ø ‡ÿêô§Ÿ¿‘ï³ô‹ò™Ï ˜©¢D'XŒl±ª T0PÁ@«Ã@…¯;_å™Ï ôY~ýã“G·o62´y¬§Ø:¿g¸Lè„ÅíH—U>G¡èÈÕ}ÝÝÝ·=sùí¹º+Ÿþl²¨£7û}³Ó²j†Ííû´µ%PrÊ6£Ó_ýñå;=VÞÕ–(d“‘Ö™WÙ×Ö» owoœ1|ªÏ–ªiö G¸­U=ðá3ˆªÄDAùL±VücúÄ6Ê(•òÙ,,8‰†jê§Ýƹg]2íƒ~_õðÎÍÛir©*«êÂ÷½ñûL¼ÝSUO!?1Ã$lƒD¶Çþ9Ãé‚ Ô ÷eÙÖ›ÏSvfüUPWy§‚ *ø_Â@…¯åÖîÍ•‚ƒ¬ÖÞ‡?¼ƒÍ¾c7ºüÖi#P^³Ý+ÃBþêΡW9ãÌ76·Ûm·÷uçÜüö9zÓQÊa‡QŸ„iG1mdsQ†òµÄ—…›ÌXÜéµ:Q 粡j¾Ýª¥¥eÈ! Q8 !û"—ƒ×2r+ÅU0PÁ@¼–ÓYg…“bLœÃÈÁq¸ïÌCïKu© B#ævÏ»òWqw½ãðí®4Ú©†«ãvžPpw4»7µf‡O 1ïM÷Õê™)~"Qn'áŠí)£C.Y¯ueˆ˜žUxÁ$¤¨0hG•K‚zlDëħUð8~¶)'̘ Fù¢ÍA0&Œ¬ŠH¾€¶q&=gé"‰lÈáQÙLÒåõD;'”<2mNòƒëFT2²½Ž´ýâCþØÝÑÞû“»îº«ªªŠEel¯à¾ð`mŠe–R9U0PÁ@ ¬¼r¼”ïbÕı̢e®“ð§ÓŒRÀ¼3Ê0”Ú(¿/]ëdoNIÅ £NßþÖ©°½³Ž¼ûº§w;ù˜‹ÿüîålÏ% Ð=K&挘³XuÜnºç¡Ÿ 7EIì +›ã';Ÿùìcw{È•÷>q¡,ʦb*XõØ¿ŸPÝpT¼Sªê趃KNÕ†ÊÛïzçWÜA•áÁ;gs¤Ø¥›-¼GÂ3Ó£ª ;E3Èä‹Y‡Ýٓ̆¼T8‘T¡€²ŒRªÇê¬ÁO‡kƒ¬@K°D»Êº¬ ñ™U‡TíA:·«¿Ì;«Qv—:p×sþðï3DJ–Gr8˜M÷ŸÍ•S T0PÁÀê0°Œ³¬.OåÙJ0 ~¦Ì#Bàžäpáò"îµøã=9›²;XâÜüìYÅ„÷…«uÌïæÎв7±ˆu_‘)=åTÛ¼Naf(x‰\ª¼ß–v²:{ïŸ.”û4‚9'” Ù×ãÍï¼]Ö°Áؽo•yAN%:ˆȼÀ…¬ëe'rá¹.•R±î,›ˆ-Êí´¸ÒÙLC¨¦³#…ŽO…»2ᶘÕ^§Ða'Ôƒ‹u&¬ÒIªiu+‡lGªJM°„ö«îyÊÎFb.mýHШ¨œ+¨` ‚ ¾<D+ÇWÁN'Xé4Š%× E‰ ®mÀó^X` cV÷E?¹'h4KÕíøŒÓçì!ë¦5"Jú1ZFå»D¶úÀwwóâjñ1¼ÚP¾DD Äš˜8†Ú,ËúkIÝñäM¢…îU·üæùóžûÅqÛ=xÏGcÅ+VB(yx¥M^ìU…Á€`v ìY]ø|öX\*1´‘¨L*Ú«ê‰õ[tS±±‡]~ë# .r T¦'ÞHEmVì«¢¼Õâ¡àú `ÆÄé\±ÓY+ À•£‚ *¨``M1PqÄñ[• Ú|M8¥xGæîh¨®ŽHƒŽª6©ÝyÁû—:Îbp» vZ¼ -¯òCF5w%ZævδºŠzw¦oj®ïì‹Y3þ\‡»Æ^[ß`É9û:Š —Ëçyõ¿˜ô‹WZ—ÌÖç žôÕ<ï„ñWþñ¹ Oßë÷¹ÏŠÛŒØºµuIÑæ­´U_"–HÍñ5å.{éèŸl{سóž8a·±ö¥ n}ÖÔa¨‡îuÚÓ/ß$2+õt¥ØÓ[ÈØí,s‰ Ž…ÊY«ŽÜòº¦ªQÑÞÙÕÃÝ×îÎûïÔÀìÇoVLYàù•£‚ *˜¨8âøŽ¶k"‘ð{}VT*—*9=Ö†A¡Ã·ë*ÆÀ¹qý˜BÁíËÖfÚJ!UçqùóãDúѽ£¹žX¸Û_5:2'RçbÍ9ÆŒÚrÉü…Þ˜Íjq³`kóÛÓ=²í'·T5ZG92Y›³diPÁl#rm÷ÒÌ¡»ìóÙ[ãò²g¹bßÏ’YûÐ÷â ‘wüC5C5Æ{†[zêe³‹ÓñÑc³ŒÎØáÌE;¡IÃ^{”³Ndeb´ß:¦ÎkkY8Sv÷-MBëggº'éa8«Òm²<,/² ‹zêE`¯ T0PÁ@_ôW@Zÿ+âmÑô¼XÈ–Äî_ñÜŸ_¸®¶ýˆƒ?›:k«†Ý’‘|BÅÇ86íËFmÊÝ †Yz-¥´µÚÑ4Ø5tVûl·s¨Ó°Í™?g¨mP:‘È([Ð6„<*Ǫ­ªwŠFú©L ‹¹Üè%  º?zgš#Ÿ>|Ú連ºÖY“Ž´T{›Ði;"n´Üΰï{Ã~¼xI†ü˜LpºSr؈¤5ˆé…dW·¯¦ù´~›Xà¨7¶Þ vÇŽ–¢ßUV£°íZ[¢3kñàµ\y°8½H Ž‘†ÑaåÙ°ˆ+F_€*oV0PÁÀÿ6**è/hÿU© “ɤÏ€u´w¼~_À“ˆæÆ_xµfhýàç§?̨S’ ÝMŽ‘…l±ÊÊ ìÑMÈâmÁç4ºsíq•iß°`©.åS}jñPGS&_HªŒ×Hb®*£§¯Ëf. [ÝŽX¾§àîªnòÄýU*š±´F7hlHfzç÷DGT”ˆa³õZ3¥ªî’·§#6§Ê6Øý^“Úâ©úqc6Ù:s,jY´ÑVÍ­áéüã&[= Ï1Úλ~³ó¯œ=ƒš,ÛÇÛ ´½‘œ¶Ï lßþâüg²µÞW?x¢·G;ò„è¬î=¶ÜoáGÉ<‡7¶ÏI|ü×÷ïi‹w7)«rT0PÁ@ߨ ºÂ€¿€hVÅ€y-«ÐÕªl2o·9lheE‚.ðÜü+wt¸-IÜ>ÄTZÛ€ªÉ*[TõT±¦ªj~¬m¨uD¤äŒªöï7ú¬mªOÕÁq‹{ÌèÅ@êì×v½÷ð‹[Û««›"ÙÎKßÜý‚½î¸êÕ“ÏÙóiW_qpu¨;²ø¢ó zºÓƒ\žb)2îµ­/Øõž«>-ÉT2•f̦üN`„•Ëf½x|$¢A²PÀ•63. hŽY« ]Ù¶b ¶´Çœ½ª%jëŠYÃÅVo•µ+¿Ôeµ=9B×÷ä{›Œ^ëÂöâÜ’=Ù[hµú²ik\0âh›Ñ÷áÒÜܸ«N™uE8·vέn¨íŒE{rQ8`±º¯TÛ•uZêS°Æ„­§LU¥å¦õæ(«OØv×Kz;“M6çò…L±d(«-àUD€ X٭鎻çGÜ-¥ê¸la¶̮¼U8çPýs‚úþnÉò0 ÛN˜ÂdF6?¡ ˆ‚ p¢8cS$À±uä ¸N*•‚T /ÏØ¾Nû]ø MLC8 Ý ƒïBž¡:tvvêpלÅàcÙdýúÜ:/¶BçßNTTÐÿÁ3 •®¥êq—Á…ËÏ« ³™ Á`Øpa‘ÅKUÆëPѾtÐáÁxjHíð–…‹áÃçïú×IÏL¶‹÷~èò玸öàÏ~hñõh¨3¶~à÷S~Ã[§þð®Û¦Ÿ(¢*ªsê¼ï¿zÊÑ¢.F å>]ž+ÈI5i¿—ÎÿËžÂa§¼Òaf3…ï£v»çOÿ>NDXQûá­ªˆ,²†êü"›B-ûL¸ Å|ÄC䢼[e\ªÛ‘íx±Y9]²Ùù¼Ûáä= ü:ïH.R>TÓøÉrapMuóŽtlhÔÛÛÛÐÐgÒüجë<fm-Zx¡-“ÀçuÕ i%×ëíêêÒØ ³ ì0”@G¯‡¼ëêê€R_5z“ ëVüFUÐü_]…ÞŘ²xñâššæ€}}}ÁššÅKZGlæ-tbY>4Ž8’ÉlÀç…;.šßvî¹ç¶ÏÍùôü9ïî÷ƒŸ¹b›c®b¾Ûã±0`«†¶/-ÔÖK–ù--ßÛhÏ¥ úêjª ¶O:^¶úr~_m)íòäk<ì²v/êUדeÕ¶O±ûö”Ž-ÅkÙšRwcU Ô–ÿ¤z¤ó¦§/@„=y—“Ž!=áO}íôöW6Þ¬6×Óþàc÷åGÎrØêT±Nq ¶fUÑž³» ± GIÅ`¿*•µç}ÊŠ®þ-Fʈœ1‹ÇÇ&g"cá]ÈÅ]N¿*úEëìPÉ|¦èìá£8EúA!¯±G±‹Td~ažÊ‘ƒ°¬¿ú–+7±ntMð'ˆaõ/®¿O týuZ6"ú™êÌ£Bçº]¿Q,³øÊèQå³FÈ7Þ¸É&›(›s¿ƒýÇ?Ÿ-F<Ës¸¿J§“œz î–ŒhOÜÈA€øñ˹Ó8m‹;E†Ñn”ZŒØÒŒ‘6Œ°a|f³Œ£v¸ÃH©N3s‚7æ—Œp,šÎá–šrŠ—Odã‘\i‚"ñ’þ<Ÿ œ¬™ÍH‚É/š/$RÙ¾d±%otô¥—$r‘’ 5øÈä©„d 1ƹ½½í}ùrà%Òip$‡&ÁËcÀ|ø?q¢¡iî2¨ p£2¤DÓiÖ Ê7X¢Bçåe’MšgùÎZLTÖ€ûg¯ÕÕ¸wRœéTŒ,W]uÕu×]Ƕ¢ ~ÿûßß¿ý>ùä3Ÿ×§mOôj4šJJ裪j1¯²)#Ü“LTgo¤7Cul¡ÞŸõ–ȯõjAôSDX÷ CŒžøTµê,.FÅB*åŽËAî#< vö©%Ñ9j¨*Å'Xr5 {®¯VB>´U-I/ ¿Rº^ù‡X£‰6e/ŠÞ8¨>7I×Í'¥lx±MÕä•§P´ 7õذY¯ÓáqØQ ìQÅp>N÷)G!o)±„\°«X<‘NæU®¤ò†Ýâó8«íVo¦­ ̽E€]ð»,u°ÐÅ M£MMMˆA+wä˜Àh‚ ‡Ã¤u Š 婢¢‰ihš[Ldƒ †Uå_ßï#éj4->x°èà üµ¾ÃµªúWè|U˜Y»÷+ ø?øÄ¨D+éW<òÈV[muÒI'577_|ñErÈm·Ý6wÞ\‡]ºLš}”×ç#Ü}6•Ç ÚåµT×ùìU×b‰´”‹Á!›¬¼T툪Do¡àÏŠ F[HãñQ81ê]_ÎÞÈònÆÀ¹#Šjô–Ã1ƒ²6){•5«RÕÉò«²fA‰ÐÀrÎ…U[êTÞI©­ž:¾‘”bjÛlA {#Qe-ºëòÊk(WUPV’ã‘h>)æ̪ ¨Ï© šb¯×òzB9ºËʺ/ºj_ ÞãiTVnAì³°ÕÎ+wQ¹üÎâöõª,~µ0Îúà=zÕ\[¦ttt°$<€õϬæ@oo/g)H…;èB¸üŸ:hbšæjÝôÁÀžˆ ó@ÉÄ Ö Ô4½žzÈv¯Ðù·Ó¬•5à~<3½e@á‚Á”aeÔ¨Q}ôÑ÷¾÷=‚æ†#ñÛoùÃý÷ßÿÙŒi.§«Ï´Á1ó#„ùבb‰8+ûr<*ím¬¯+eÙŸ {+,§p<•Ê*‚"ÀÜ`“®‚Jcv†êÁ–‰€YbŠ×å´ '”rß…Ùs¾½Øâ·yÊZH¤kýõ0M#í°!€â]º:—WÑœJ{•ËÉêl›SùŽ3Á±^f—2Sžjbþº¬"!ç³i‡·¤,TˆÐ„~äVsÍ·3º}O4Uyë’ª”Ì$¹Fœ!FŒÅä Ú°ógâ%å­s˜¹ñÈPF@r0www×××£À^ToÐ2ßh'M'¦𢜠…ئWÕ¼«'‹aˆtÓk™I3ãÕ¼µž>Òª/´‘Hü°^æßœ×SX¾Lµ+t^ÆÒ7º<`-eÊèû’ ¸/#)¸Ö†¼…€Ë%=­.1|è‚ùs;Û»†^SSgr©È¯Ù|Êj÷ˆ1áí-WÂ^_ª­ÊB:tá$ {ÙÅ“·V“Ên›¬* ^´ÄVÙäÝte‡6wæÛf~æA£¶‘ý¦µ‡¨,D“á°O®ïÏKu†êÛËÞ¶¹`ùòÔ¬)5q žË!Î3ø²4?©¢eÿÜfA쩪‘XH2=Xþ`nâ’y„yÀÝ—ù\ž1—Á3`)d‚Ì}zyáî‹B‘HÎËPò¿òŸ§¹][€k]ô@‚éྠz¦Á릨]¡órËêÞ /Ðö†Ü×3ïr†¯“¨¨ û±G¿B‡FãzÉ’%œ±‚FÖ w´†lðæþܧC2Oä>‘ˆ´P-2nN|³Ê Íá°[e#ÏÀ<Àƒ ‚‡ÏŠ“†Ô?ìG¯ëM“³W ÍM£ƒ2¨ÜÐÙ5…k=whzÆÕ¢g=~X¡óå±ËòôKÛ-Ÿá+§+p?êJ0ªb.‹eŸÖ%===m´7YÑ Tm(¹¤˜ï—w ”íões‚«É4œíC`´”‘-å-Y«Ca»$‹†Z¥kòXLínÿ¯Ü€ëúEf… »ŒÂL>FŒÁR:XÒ Éu]»µÿ}h½+]‘ƒÒ!›Ù³g£&¨ð® ƒLAhhš›F/+?ÀƒF˪ÞZïëÎÞÚÚª7=Óý!x¨í^¡sM«Ð9ªPf]ð=ÑDâÒóεBÌÜFæï ˜ ºƒ²¯6µ€Ý²ë6CÛb‹-h²þ¾GK ûêÎbÍÞ%.ŽIDAT9þ|öç—:ŠÂhKõUƒù¨ÍnÉÑNˇäl(‹ü³Z ÔV´Öý5X?ÿ1±<€40Cþ„h¨gÐë'L_PkM':d3dÈ&È$¾àµõ$²Ö0¥ƒ0 cõa  …ºÑ¡mfÛ´$ÃCA…ÎQ¯Æ1¦•ZùÌ€OB«*¼«ÂIy±|££4¨Þå×}—Ñý Ôv¯Ð¹¦|ð µ44iìL¹_ö¶ªÞñåïWp?®À2(†¡2ÇÁÿÂîÕW_}ÇwÀn™Ú?óÌ3;ï¼3:7Æ—²’ í4O™±Do¤Þ­sÔaô4hÈ XOÊê²ø«=V±™Ì°>&¯is%FX"þšÒñú|hîË(Ìô¥Ì}—§ÖgàVRwfÁÐ XŸËJH=@¯ä…z«ÜÄ4:dÏd”Dy*6 ÖS Íwµá·V¿H`+t®›U71äÍ%8¡õðõ2ÄZi÷el`­¶>‚†™´Šé„Nxøá‡ïºë®Y³fMžWwÂ=P¡ç%K—.åR›&}.ï@¸*R›áÀƒµýd£ûç@ïKà›X77Mðª&†/]Æz–i}ú2ëSZ*[¬g|‰êVè\# ÚÖs šžgi’É÷—@á—ÊRÙÜ&½³SÏë!>´Í>ú(kºhÕ†¯¹æšÝvÛ¬Z ¦Hè˜4C0ù¹ÃäÈ8-x³â`AflQ‰$cñ†Ñ¯j6™°9ïÑBðúÏ”™@¦ÌH]‹úß­½Q·5Ü—æ† sɰ&†ïV]¿áÚè†.·»&ƒoø›ë¬xÝÄtv ÆB“z”M/×Y¾ÉW輌]-ÓÙõ¤„„ÕʾN¢Â€¿öVö®éÛBÜ]¡rF» µ¨X"î#®½¬k‘Ø|ñ;É€™Fp”)L“úUøÍÊ §`0!z,ƒ”^G_>§ž?r¦L²ñˆò)–2a]:çws,+s”r—CÒÀ•ÖŸ©q€b¤Öò±Pg¤Ë }ÿ»v4j¨›‰á(˜Xh¸xÄ•)¡Ìz5`‰œº¡—‡K¿Å2PH¹ÝÉÉÍå‘¶ü[ßBzMë¿|•4¢¨<@Àù’ô¿|9ßZÒ¥žT˜fe­ Ó4ôÊ )ø œ¥Àç *Ê‹GdÓÔZ rÒå;ä$MfΫÇV?|¾¹+ øó8ùzwÊFÍx¤Jg³y"zœgÉ”€¿û ¸Ljš^Ë£-#ŽNku«f´å›ezæEî³^Îú(ñòy–OCÊp8:?j|²}=¤¯ý·õö3].¨©çòŸY¡þäÑýŽ¥EFd@ÓÑ]—H”-ç—/pݦÑý0$éf¥† šŒ6â \züâ2Mù0Åô%x(B À®/)¤ÜÊ”ÆMý¨œ¡üÖ·“ žkT ÖU…hIèá{…ª®ý¯ðî·s‰É$Ê*« ~9€_¦À6šdQ“hy\:žÒÄ$Ѐ®Ñ8>W:èžõùV®0àÏãäëÝaü)¨<›4<2ÛÒ–ÏeCgl¢û$yf~ȲrÉòëUâ«¿]îå" '¬ÏC³ô°E6†T•žB–…fò—'•R&h¨œr¸§8VE‘åï®ÛÐ)Óz©V/S@#½úúó"9y·Ì†uOftãfKëºÕ|èxJåuý—ÏIóqÉ^¾Yn\}‡w°<œ•³1}!§÷Áy–/¤œíÛO|ùúÓî@A êi”@CCÃ_þ¿}0õ™öAÌ´PДx)¯ H<Çòd\‚@ˆža3¦‘‡Æ]~½Ò"÷éËåBÖhXi™R .wU+÷×0`x¬ù#ÚHŽu‡ï>ÖðBÔj+K-܇àwôž2ž–Ù £*ã£*yx—GœéðœéºÛS w4C¢@H™rH~Ö“}´«ª?˜atÓ ‘Ð4&A@}©Ù3ÙʨÓ÷×ù™úSmªAC”¡ u¨0÷¹¹<%Eœ¢eËwÜÑ@A ÄJ!Õ(KëªÝ¿ZýÁ†nÊ2=—!ý’ô¯óûgª×ÕÕ5hÐ Útù¦ðxЬ[ z#Áº…z*˜T-/ ðˆ©‰Æ$ cæ¼FãØªÆò°BÓWð ùÚ— ¼Eˆ¤L.Ë e±i‡WýK¿ËTÐæèL&Ëwk'X¹BITPO!9/ßo5šÈL1dó”ÌÍ•f¦«“Mg&£3¯¬Š.uùëöLT /èXv6É%P¬ªþ<|€ZëÀ‡§œ?Éu éò_][͆—Dšæ&ÃçëϰÈŸ¿Ï+”Æ0Ç™ åÁŽü”Ï”e…ò×ÕåÖŸ†Óî„JêÖDAÂB“4°“àéêé]¼¦[ΰ´PœYÛr/óƒ‡å§JŒ<4“Æ< „°Ëœ¸¬•,ÛÚÚ0q‡Ý®”æ—Çé/3Žç•Ž+Åe…'_ëN!—·#ÌYP´XmVK¶P€Ük‚AÊýaã»È€¡KÆ‘•ޤã¯f-š¦WÈV&zèœú)ë…µf`Ül-ô™4½‚bÁOYâæwä`•ˆ¡ª\êIï¥Â+­?7Ë™™[èaš3" &f$˜nëî ê8ʬ¨\þ:OÐ"ÔSCGe¸ZJ Íðaà hJÎ@ªuweVªWÐzfð¦)›d&­‹/3Ø}s8ù õ§2µšjƒp¢³­ŠþËèýæ@[MÉÚÚ.K=iTÐ,p®i;®x(S$Í¡G-ˆ–ீ(Mœ`CãBe0ärƱU+X”?]aÀeT¬¥„Q¤í­¨œíTÙr^K£–þî3`°À8êÑÖ¨IyU’nÌ‹ííílQ‡^y‹üÐ=E-?â“Kεî|èó=a-µÄ×*Fó´vÈ=@Tö÷´Âܶ\îó=@cä…à Ÿ§@ž‚–åÇ‚ò¬ÃK Ϊ+ Ùg¹áhDžêËÏ·©~EN=ZqSÓ É•²®r]È:<aý©°s†k$èKîðî—§ÿ2~¾e`iÝÑheŒž–õ:åÊ <Ð Ë« 2Fcm766ênKšÎHGÑÊ{1ÓôüXíçi,Q8çÏc_8”QÝŸà…ʱ1p×·Ë-@î®»îúÂ+/# ³‹e2œI‹\ GÑ(ñËËï»w@XºR0Ôk¯½V‚"+…?Îû.\rBÜe—]^yå•2È@<ð{%éä»ï¾ûsÏ=Ç#(žÑ–ÄwÞ¹å–[jü¼üòËú-F„òë߆ÁÀ~ÔQGÑ!é„T ÿG«©ÿÿøÇ 6ØÐöÝwßçŸñQc’n®6ß|sí½÷ÞÏ>û,cÁwÌrM¨0u;è ƒ¨'.eð?Cý•È@…ï¹çbqòhÿý÷饗 ÝjŒb8Œ:t(cÖücš»Üš4úí·ß>bÄÞÚo¿ýžzê)ý-2ðzù»ë$±¦õ´^xAW9|ðÁÑ£G¯)ý¯`—ÿ(TÍ%ôI£¨xЖ3-¥Ág=mÚ4TÍ,-iÀ5âìaà 7#GŽ„ÈufÎdûóŸÿüƱUå’—OHTޝ€åò0JZ—pÓM7!ý0-Z´è÷¿ÿ=:wî\­óáæË¨Ù$<ƒÑöɘûꫯò:<pî¾ûnÒÔóW¿úß…³Ò¦¤‰Â©h(áÈ#ä°ÃœöÙgðÀ+@‡>%í®_Üi§t!\Ž3æ¿øF@Ç[ø’ƒ¸¯…0ŠÒˆåÎ:9 8àjE²§Jˆò?ùÉOt}˜} ”Ï%˜üñB#Bä¤u¶•Ò?ù9xDsëœ AµN€å£¬ørfú_A ‹’@4YB±o¼ñ¸qãà¾ä¡YWÕŽµ¾àšäУó?$­Â@}“ÉJxæš AÎpÝÜܬižüå!„¬0Ž­é8ÀGWzTðJÑòeoê‘Eçþàƒ ì¿þõ¯\ê>ï¼ó rÒe&ýeË]wùt‡äû$4eÄaxâΕW^ €$ÐÄ’@2.Ãuúé§£«!?òªÈ×^{­ Á„ ˜x2 3}gñ£Û‹:SI `<¢ꪫ®Úl³Íà : ¨?¯ÈÆ%çË/¿œ‰\‰–Go¿ý675ÒÐ ÐÛ¹Dø!ú-JàÎĉåu6.9@£fÀú¬o~Ëg ´Þxã €åÓËׄ™%нøâ‹ºJÃW\as©×„æ¦à;ðÃ…·´úD¿ˆ8ˆ´Æ6¨Ö÷×í™*éƒnSÆÙÃ?Xˆ.JÃM:•êéNAÃÁ·¸„“š^[Ñ•_)ýëq©½Âº…‘¯ë.Œ’ƒ…!ôX徬++¢wÞy§\ÏÕ´ãz­dÒd ‹¥;3¡š¢M5…3 Ô-^îûôzò0í ØE¯@½+Œck:”»B¢ßì‚UŽ5Âx$?ú¥²M Ã(wp­ïs†ñ@÷43˜ËõâÐàu¦¯2ˆ@²ÚŠšƒsÅ[o½Å°ïA Ç%£ÕÖ[oýÿíÝùË•Åûð?£•6ÛwCKË¢=[1•Zh+LH­,Ú‹E¦DY$m”R´ïaپџñ}Ù›Ïtx|ŽöÈÇçÜçû\óÃÍœÙîk›yÏ53÷% âÖ!¥p˜TǸ8×ÐÄŸ¹ëÞŽtM>¼=D꟡VçĵåâvØaÉ’%&R€Æb%§L™âI>›¦ÆX¶¦GJ¦ ²HÃÓæ1iX{ÔŠ£V\ëÔjýFÜÁ{+R°›égÿØ4¸³ µÅ´¢Ù¼”ÀÑI‰ôrÎ9çÄìe±=Dš-Ñ¥KɬºŸ‡Eú&÷W‡B°/,Ì,ÍqM 851?ü²LOò¡/Úg¥dBÅré>ê86ÖqÀ+F À£Še󉔔á2ˆEµ4M…zlt© ]:ó,cúæ[ìF CŒQÒdÚÎ4!ƲßiòËÔFá`†AŠ«ÂØ7¼JQW•<åjê¨BD]“O&¡ ›Öâl ’Ž 1°q;*л8–=Ó{ñ¨c ÚÑ1ÌËU"+ïR×p@ ¤$KyY„#"Ä¢ç'Åá÷Æo´¨ÎåžU;»bhë 8¬±í,]J´Ð~Y…2~JÄ)ôUXE²bHÄEDvF 'S·ò‹¯œ}µ´Îë={¶yÁ‚‹-ò“ÊH/ÁuŒ;"X +Õ•ÁQ²6¶ÿh\#Ê6 §úùÅ•±g 3t’ÐLx¤J?%2Z‘?ª52r y|Åzm…˜ãŽ"¤P«>H/4h£-ófùé§Ÿ®[·Ž˜œ©«{F_Jª‚ñÞqL]¹²ˆËs³ã€F À£Šeó‰°Á;øïÍ·ôÊ.iÚ¨*ckFÚ|s)a¸ÑK™^tTtÁggì“Í›7Ï–k3KOWÔÅãLàW?WK rHRÅÐî |t¤èå”hãà /4 ã¨p‚ó²…G%‰XÑX3Ž{?鱌!¼ó~$ÊŠ=¬ pYšf2(ôFÄÇ9¼[o÷Gœ^mUÖš[(B#F1qBÀw*Ê%=ìxJl2Á¬’¬%ñx<Œ´ÆÆH&ññBÄäȺµœupfÛÙïŋˢ&ÏÖÁi‰®õ²¢5jígÿJ²6kð†kÖ5@~Ñ $¬m˜‡qš IÑ[å"GXÃu?=‹ÐO_Ùa¥Ø§qJa¢ñþÍ2±ìiˆXp¼ß⇓.¤!h!³«QDZ±ŽÞ2jX•šáJ¤$=н"›Ž5"45‹³l}ÕbÈñŰ ² '=;–Xº±jßGR²)(„µkÍJŠ™#{Z„ °Ñ‡|ôsˆKïU¦SòA¡!©t¤¿áÑpl)2¨)ÁÉõ$A$JaGƒ/Oíd¤V ¿äé`$ [ÛôÓF)é%½=8@g¥têÔ©!ƒÀcýúõÜ ,˜F@YìÙ“BÓ¬ŸXhûÌ@;̃Xä’-o–›¯„MŤ* ÄXÌãùàƒ.\8sæL”øÖHÛ¹·,…îØö™^ÐŒôK÷Ü„ýÓ~ÄBz¬‚ºUŒÙ „eêˆÖl!cÒ¤I&@˜ÊÔ9½˜@¢8Ä÷ÓãÉúÆ»ÅöÙ!­á²ÌE"è ›çÎëD´§Ÿx§tµ0«"iøIh½ãØXÇ~J/î'™Í¤S'Ýô”–eGµ±cÃõ jÖ7ÓV—²V¶‚œÃÞ•cœ¾ôu(ߘeÐÁr>›³¨{E¾I¥ÁÈØj“8£Œ ˜ @Æ2Ý<ƒTgú’‰àpaœu| @âñÐÅLâùçŸÇ²MM\SeötuEµ ^ü$-Ø%) tUŒÓc¤3›Ñ¦WhÇn9”4ùDùºú`­À¦uÈC•ˆ é‘—€5“ DÒ :}Oe{LIóÓPί’…}òÑ ÐŒM¿À¸ùM/†Åÿ€F^ÁÅuÈð}6m{³d=š^fÍšE×LÝénì°^ú•+àE× V‡á‘m-ÄúÙ?U'4¼ç´„ˆePüÒ]HE2Ø'¾“uxNU: %¦~z"9vØ-E3TŠÀ£ŸÔn̘g¥|0ôu~VŠŽ)°yé:x¿ql¬ã€–G À£Šeó‰1;ì8£¬Õ9]e@ßÔl²©`¯^gÞ|s)ÁFÑiôRNÏŒ3 ¯¥9ã”q6¹†N'Prè¯qm ‰kVnxò;W‰+ïÏñǯ)C¶ýÔʧ™ cÓ¬Q6躆N'VÌ¥@,å ö U85ø:ÙA>Ê;mg=ַѱªwD–Se,Ó“ÍÌL_¼B-‡Ë.zu®µÞŸã7“0I¢h¯¦8O'f}lcÿL.Ö¬YƒHY†$Ûä>±SƤÄîèJ'7߉,B0®9Î&ž±^ÜGÕÒ¥Š‚‚~Ž@†~êC2Öëè>°dáèaˆ1g²ÏB§Yš–«ÕŸqƾÍ*2^÷³ÿ À^µjP~zM°mü™õFº‹ƶi)fÚõÃ×Úµk•$éýô8,r@'^˜vŒfq*h\¢Ù,„­Šf«>Ÿ-z‘Œ”ôDª×ÙC¿ql¬ã€–Gd]a $@©jQs{Šäcv×qèoóçÏ×¥-âI­‹ Eˆ¯ÃFÁ­ÐÐNÅøa…=ùžÁu>icÜïLÙMÛAíÔ´ŠöPsGÇPÈÇ2»Ù7ÎG®ˆÇÚêÕ«q=*ý+V¬åH­e÷Qð¥ø*š¢¹¿Âhî#c·¯;³ì)«kaéÒ¥4î;Q„8©Oœ=,[¶ÌäÉ®03À#¸P,Dnî+À¾5[åeTX#(?Ÿxâ >¥Zøé§ŸdYñÌi¬”ÿgÌØ{}tDY¦âf̼×k@J?qmRâr7(å^\$"Ë#ðL~öïN%N’ºpW³ªàÚôEd !ÊjQ.„ %IÌÝX”…ì䎪Çá’Cc§zŸVz¢ùG–ßuU TÓ¦MÓ»Ðå#µt[ÌzÇ:ŽõÒòÆÏ {–¶@ ÔÕ]ó“•Ã$£§Ç¬*ߊõ–Ü‚s•PëÜoÀ’ fÛO„ài ¦z,ÇH 7ã—AÍ¢ ‚]!Äy’ŸsŸù¦tÆÉG·Ô?]™§6­_·Å/tí"¨Æ¯®.ˆ,_¾Ü&â² ¾…”®¬jîNÎkïµ|¨…%É¢Ó|0J ™«ñ›ÃÝôéÓùÍ ë‚ˆFÀš3gŽÅ˜‡tœç`Cfh0a²¢ƒk`IFç Ð²Ü eš#…Aþ)…fô¯\¹2{1›° ûYS±ˆ¢bzAZÈ´ ÁTØ\Š× kÁJL§<]…àXi?=—Â]DÍ\³À-ˆ¥YÎq|ߘ7öSK¯w ÖŒc£ŽýÔ]ÜO2›O7Ä^3«ṏ“j¾ãNz&Ú…7ßb7J€ÉFˆk0b»Id»m jƒ)SV^±°Üàª5Ò¼Ör÷åóA¦Æ"½ÊõsTúW"(¹D”W<ªW=YRL´ƒ[â] ÍP£SäŠ^FƒV¡eE&a6å“P #]êZæ•Bž©ÁÆ`¤mø.h§©8\„ªL‘M‰â›¶ÿfêÍTi¯~½%½Œ0Â(:Ï&Ð٫Ǥt_1­<±{³‹ß†¯d±d£SÌ>VAB°e¦%örú/DZQÇ~¯C2ãÙ’@ ìÒ¦íZjÖýÙ¢3ó©l6¤]j3³Þ’w ¨㳕 zío1Í^^P„Álãa?>ñƉ,Y&’Da¥.öm?Í‹|ô@[›Ø!¦É´ŒxÌŽJ¿F¶‘VE0ð1 tìœyb7õZꔜÖ6,Ó‰z²kúS™‘K´Nk ÃE“L©(ÀßÍS•H#O…Uô$"j/JËãÿÄÛ¦eôP}_q‰Ìï²ÂW¸WøÊ œøe8R`D§À£êä£@ëû½bf{ߨtm‰YöLXKO§,qwT=‹°Àƌө'6#„tU7Fé˜3”ÅüÄ{îºéwLãX×ÈÆã@Þ>âY‰È6ÃÀˆ®…»MÓOØ÷4ìêüÆ ™µ¤[ŽEks°ó ['4cØx"žã(isʰÓdÕË—òBF=嵯 %’–ÛÇ3ÒXðÒÖ—Q…Á™4®Sƒáè=ckC¯P»iûW†p4¢³ô g<9Í»2:mlÆrÛÀ•i‚)ZOÅz$œa‘Êi/lOè—²ÄL[7Ç2Ãð̈óöSdLãÃÞxˆ¸¢`£Rƒ†Q¬ÙúÊ:jó!"¶Î¾3–EØ7‚«‚4¨bXäÓ„vpaÖ“7A¿Y·$ ½Z\#Û?pOh6…TËE<Àð46…#ÖÛ¤ÑxÔT[µS^á4q¿ CRâ2ªÞԚ­À&HÚzY´ƒ‘^uHÙ€“ÿ½†0óéF(!&­¤ ƒãˆ1ŒjÿxŒ%4©Âþ#“­Ç×&Zñ ´Q«Wãhà ®[OUC$ªl6‰ì(‹2íkö,]"e)O—‘­Ii ƱÖGZãýôRÜO2•^( ”J%­(Z‚ފ­¦K%’@I $ÐOÀý$Sé%’@I $PØŠ(ފ­¦K%’@I $ÐOÀý$Sé%’@I $PØŠ(ފ­¦K%’@I $ÐOÀý$Sé%’@I $PØŠ(ފ­¦K%’@I $ÐOÀý$Sé%ÁHÀÇûyq®‘w'€§«7*øÒ_ÄÕž‰¸2¥ýw•O²Ôuç€xkVn²<5˜t æz $æy¶òi§ÕM®Ÿî1’Þ"j…Ú¤»Õ¡UïÍJ"ÊP2?ÝÑ‘fSW;¡AnÚoœ¶^Z–;ñ“[Ï’@g%PqtV5EØD”@ Äe=ÀÆ¥<à$·ó@ ØãBGOXå¶¼\TŒð3Âr‰[·r×|´ÖîbQ.’¥ÍŠ«Ø{URÆ{ÅA¸;5¡¬+Ÿ{i:zE*‚F…‘ê%%%ºQHE/Õ¸F¼HÄÔUa­€2íÚ)÷FáÑ‹$*“7j'ÒiÁìD¢Û“¢eä#ÿ&âÂÈpÑÊW¤$ÐA ”ÜA¥IW` öÁ¨çÏ¥‰¹NÒ€[éÌ ¹ÑOX?@+Þ$à̽’PM³ ˜oHÖÚô„å4X+úò‰5"K°‰»LQ1à'®Yxœê äòEÏ€«[K!º¸©.$j[E<úgF}5‚lI¤Y^á§Æ1¥ iħGvÒ±–¿ë! ¼4ì{£ÿòò”ÂD*”º)ànꥨš ˜5k–ûýX]@×Ú‹Ë–-ó\¾|9`ƒ ä¡àjI?¹ž ê Z ®Ô…XÁxX•tœ?þþó¹@ÑÎ.åa³ÄÜ ."H¼Ú{ULÊß9ÿ¬u»3<ø‹ÔEFàV³ t›m¶´¨‚úèI–feIİ' Í‚dÿ!ïßàIw¹QY1³„¼Bú‰'ž¸nÝ:‰þMAìk<$Õ³$ÐM Ôt7õRTMP €C°aÒ8yòä»ï¾{ÇwôÆŒ@…áV$¯q„ÔøBïŸ ¦€ÖD‚mlpõÜsÏÝ{ï½/¿ürþl#ÿ"ϼ7Îeêfø× ÆùK>¹ÞÕÒ9¬ 1@èuÚÁ”à+â!®tdH—"®|Þ"®ÀÎ\*›˜y¬^½:›è]²°ÏMð |öÙgçwÞ{ï½·çž{fýúhÞ¼ypn×]wBœÔ¬CÍ;î¸ãÐCUl¯½özì±Ç 1XxMÄZþøã_xá…Ù³gK„ß^ôù矟}öÙJjP˯¼ò ¨ó^…o¾ùæ=öØCk0ï¾ûîSÙ^yä‘í¶ÛN­}öÙgéÒ¥€\.§YÈ->ø`6t1DÏ?ÿü3ÏW­ZåÕ¶¥eùyÛm·Ù±Õ~>ýôÓ;í´“õóŸþIª¿õÖ[Ò­rÛ3Ö²¸€TY Ù$îŸ7ÜpRe]qÅ|ë÷ßÿ»ï¾Ò²á-¼Û[n¹ÞÿþûïW^yeðï’5ìa9'8P*˜¥?úÞyço€ç½°_™üÑ»`¼fÓ>b¤ÿöÛoÒyÆâ‚DÇ©xÉ˺ôÒKų¨~ÄGwÜqÿã?dñò•ω-p~ 'ðw9Ío¾ù&z¼HÅ*³tÌæÂB)–‚áë”)S@¦\[¤r-ÏBM«ÊDYÎCe5ø˜cŽRš…èÌ‹8jw—]v ªiÓf°*—]v™£Ok×®…÷jIçŒ*3mÚ´üä[[jæ¹~ûí·Ö™Ñ|ÔQGia5¤¾ôÒKHÕ`¶¥%BYo|íµ×RÀj¶–Í!<Ñ eøýöÛoû¹ÿþû+#ÉÑGmSüä“OÞyçM#n¼ñF³Äx\…1‚¯þå—_@¸mOéJ•@pgUS„MP dyó $’¹¶ü9[°€Ž‚ÐâkW‹ÃA5…L]p,ýú믠Èê®Ä¶ÃªekÚjñ’³f5¥.„ãhòJU‡ÜjÁT[Âï¾û.¸õ.›ÇJæ‹#[­&ÐWƒYñï¼óŽêNi)fB’ Ó·FàÖb2€DžÜ}÷ÝW$”›.@Üx«˜Â ª”œ4i’gpÀ^˰ÙÖ/ü6™@˜Åpåy9J $Æï÷.qŒüV’@G%°á´B…’@I #€àžÁ!#=A]ȃ[A ã‹óV\ K„|A d—TÅ/¾ø‚g ,µ íÖ¬tÍ*`×–÷©V¼mé•6ùÐNQ)‰}ôQ[¿{ï½·\_ýu^µÍ] ×!ÌÓR3çž:e®tdÄ=ýòË/¹³èÇ Ï^®’`ÞlÀŒŽJá4K r#@ 8—¥ š` >-)ƒÐY8`°Ø ‡€ lËæ.˜?Š%·JüñP™˯õ4:D YA/ rÐW-My@‡¾`OuqOPzØa‡9ù¥eh}úé§«›Ï /htÌ µ¶lgΜ©A»³|P¯¥áàpÀ`øÃ?4“ˆwëƒ]ŒØŠÎŒ!h­¼b‰saÛæ4;e³CÌúõë¥xWÊh èÚç¸çÔ•·ËUÌ}ˆG›g…’@—%PÜeímQ©r¤+ˆë<—õp‰N½Ñ¶7‰¥z8Mn=K”@]EÙééQ7Ñ$„@ŽE`-WæÁ9ˆbyœ€¢Ä#¹Ù!V}%Ò@¤Ô1žqÊó›SLÅDToKÜÓ§Ow±Æ]wÝåEÚ‰žŠiV\ËÒµƒ*!?¥km‚¸ §VRñ²ÓNØñŸ¸3 z6ò³1è-ŸóŠÌ?²dfyó¹#¬µ¯üi§æ›(NvĘóêz–:(à*¥H* Œ·xÃÜß³Î:ËiÈÚPm¼éØÒ÷ÁZ—y™@€am ý[ÊwÕn Ôðp믨/ üO$à´”õdlj-ðj/ËåøþO‡FPëŽ-×Y£™ _<Ÿ>ÃÛë%-“@yÀ[&·ªUø%Ž£µe+ÃNJ;ÆÜÖ–‡ˆI'°|9mõ>ǯwlmˆè/R'  €' Ò‹å’À(p&Ù™jÙdÆ T é\ù¶©< “•Tè’j ºKÚ(ZJƒ“ôu‰£s[9ŸÅ-c~³M_Dz ¯š–£SßI´17TJã(ò€ÇQØõª’@W%0â¼tÜß!r‚Û©«œ&f“ ÷uUÞEWI`ƒÊ.;( ”6ÜムÂ!&_îú:H˜ ‹hr^Ì©+>¹´ ú‹î&2åOdíï%$0âcb7rø…²;³àœ…ã ûÚXÈ5–§½œ ø?HÙ£ “IEND®B`‚rocksdb-6.11.4/docs/static/images/rate-limiter/write-KBps-cdf.png000066400000000000000000002350671370372246700245730ustar00rootroot00000000000000‰PNG  IHDR€àº³K³sRGB®Îé˜iTXtXML:com.adobe.xmp q3IHnoqvWT6aQf74WjNx FBMD01000abb030000ae0a0000d5130000a014000044150000f429000074390000343d0000803e00004c3f000026550000 1 -eh°@IDATxìÝ€G‘7ðÙ•%[NØ`cr8>r>&G&cŒl0ƒ}`GŒÁ`2†#ùî8Ž#ƒ‰ÈÎGYÒJZmÞ·ï½ï×S»ãçÕJÞ5’v%Më©·§§ººº¦§þ]Ý=3Mõz=+C©R¥J ”(5°s5мs«+k+5Pj Ô@©R¥’J.ûA©R¥J ”˜ ”<J/«,5Pj Ô@©R%—} Ô@©R¥J ̃Jž¥—U–(5Pj Ô@©€Ë>Pj Ô@©R¥æA%σÒË*K ”(5Pj Ô@ Àe(5Pj Ô@©Ró €çAée•¥J ”(5Pj à²”(5Pj Ô@©yÐ@ Àó ô²ÊR¥J ”(5PpÙJ ”(5Pj ÔÀ(±RvÑ¢E¸ú[ÀDÉì¬ñ‹có™«W¯ÆÿáøÇ>ö1Ì ÂÉ~ä#IN.²Cº}èCúÜç>÷Ûßþv »<òHùx †h« øké@xbÄê¾SÛ©‰Ãûó~÷»ß=þñç±Ñ ý¿þë¿^~ùå0&hNÍf«V­‚Ü«pËî~÷»Ï‰ÉN Öp¾ã!‡BN@¹Œâ{ßûÞÍ äøò¿þõ¯ÃÅŸÿüç(©NA üË_þÑs‚% tÔð×½îupú½ï}ï¾ðÅ£-FêQRïÍ7ß,ç{ßûž+xÊ)§œþù˜?­Y³úþùÏ&äæµ‹3ä$¼Šd"ŽñÁŒò”™¥v„JÞZ-yîJæ“@€Á" è"¾óïÌd ÿó?ÿs® ƒŠx& -àÄ\™ìzh ¨T¥>µGø€<ž`ú±:Îqüîw¿ ­ËÂãP{ÓÔÏzÖ³ '5Zn´öB5еNŒ´«"f°)Ï­5 p:õ­o}ë¸ãŽ3÷ð”§‚ ð’Ñ\|ñÅÔr×»ÞU¾*B¤¿û»¿»ð Õú$¶Q*]αÇërPZ&GYEêU V4X z©W,ŸÌ!C—ØyÐËPj`OÖû[4?Òpîq{\<ŒäoüÙÏ~˜…[0€#ËK6ËúØàðÖ·¾ÕΠŸþô§ÐúG?úÑ>ûìfœbÜO;í4 ¾#PüÚ×¾&­øK_úRè «ÀÒ«^õ*.¦ÉR§Z`Wïøƒ Þ¾ð…’Á˜³|; !«´ÀXœuÖY€Ø8|Ñ‹^ôä'?9â”ióO~ò“ˆ¨O>ùdX…ŒÃ ’U ¨¿EP8Øåó´(ÔBæ‡=ìarœzó›ß ³%ô sÌ1AlbiVí¤‚s*úÕ¯~Elg5áì³ÏV£YâûÜç>r¢v£^¬Š%˜´Ii‡øH¨N •ñŠL`,H¦Ê#ô|tÂÈ·dŸO¿`?ýéO—~ž@uqJí†,‘ž;åúNË,K ìP ¤ñ`J ìÉ(†  ’‡'9O§žzjhIŽMîC ý¦7½ Z3ëfJzç;ß)à1\Œ­[ïÿû9sæ8eÛp°ŠJa6?• <`3¿óÏxFÀ•õK”61x ‡Ö;̱rêÌ3Ï &¤²çè°ÃcˆÁ¡ÉÛ¨Nl;ÕcûX µòÅ)Õ©·`˃'Éù»A êÌ “J«ïr—»¨h7# üÔ§>5(CT²¡”cL  !³MȬ8Vôc†Ÿ«-¥‚ƒ-ÊÈìárH'bºA1-=úÑ.rlªŠ4e@e®9w9 F‹¸p¡½¢ Èo<,Ó¥v´Ê)h÷xJ L×ÀsžóÀ 0ÊŸúÔ§ìý9÷Üs-.¢üQi(TÌÇÂN³š–9yÃÏ~ö³Yùx˜˜êŒ¶œÔË.» ›wʾô¥/ñÒÐÃ!9ð7e1‘~¸•jwê'?ù äæDBM $ Êÿío뽜˜_UÜ’-Q¾"29ˆ( œ2—‹Rš'M0û‡uSÕ=æ1‰-fÈbñ[)g5fÃc>:œ¶Ûþg`Æ­¼úê«! ™•üÃ?üzþ.âðPcì"SóÑÓ$-1gêµO :ÂE¸t•9úJ)®Rù*›Q RèAlH!SÀ* –`âÈÄYÂ)Ìá½9p.»Uj# â9Œi|E€=†aaÇüuZ‘(ãR;G%ï=—µìb€=Z@#@hÑ‘á¶X3.¸à Â‘uf˜>úèxB>hÒ¬ª–(e›óßÿýßCB€…X)œ€°áDš7ìÁTÀZ`&SYpBB 5 Q#Ô-‚’a(_¦*P‚[íÂPuj)^¬ÌHK„ ª0°0ýŽò;ßùÎo~ócÚ“|àµN“ùý`^ÓðGofâ •ÖVc(`D¢i±F딵“YA7›Q*Ë)D¥š •1Q)ùf4\ŽVápµàf.]©BÕTAWÚBçj·ƒ]&“&Á¼!1ndäD“˸ÔÀÎÔ@ À;SÛe]»Œl æ3ÅŽe ÁR{ȰêòãiWû­”²ÏËl6¶vx!Ó€gã˜f’qM–ûðÃçìÚmÂFù°&“#sªP36jÔŠ‚š`ÿZhÊ:ûßÿýßü~M}Û¨E’`B`m ¼´™Y)µh ‰*_Ì6ÊQ„zñW‹&8%P&>üà¹Å^ßA?r¸ÂÄÏÚèJÑŒ&Õ)ùQ¤hB™(5°Ó4PðNSuYÑ®¤À³ò`σ:Œ»'|ø^ãøó ,ˆ‚4ž1x°­×û"l²8ʦ[Ô„4¶L£gñã!%ó·üEÞ›¹P>®gÐÛ lV$ Ї§vØédZôEÌ‘UÚí3û>±ùUÏ)™¯æû:eªÖ°+»ø@/ó® –Ø¦ÓÓk^ó¸ ð”Qˆ OÂXH£†¶6»¾mlþÆ7¾a©™̽sJK5S¦‚2"<Ô!p¥"b¨N‚x`Øô¯SÜhEL$üò—¿„ˆd°¾n5µ{œWÃÃWÖd{°-{j‹ã®ùÿøÇ•åÚÚG¥®b¢‚a`Npnãwl®¦Iùq²!ØAælŠ  T‰2.5°“4à+C©=Y°$šŠ$Ä\·KÍdGž£¯|å+AÆ‹5-ÿž÷¼§Xž“Ü2æ›ó÷îw¿Ûž[g=4 TµâQ%ЈÆYÏð€g=d—¨†å60óMe‚Paá>9ŒàA&À K=ôPΜS¿ÿýïÁ<$‰%º·w ‘£± ¶[klä(…™ "G+H À†`—ï,xº&áÁ{ÄÙ à#ÑxQl‚ƒÁ1’ˆ}æUÈ£Q˜D€¦äyå+_É/'>h¨ÔƒÑEsäDEe\j`Þ5Pî>˜ºwË¿{¤Àèb¸µ^:–c ’3¾ÎÙH³ûÌ&ü*h$ec÷vø[NA”(% ;'`Ïá N©Îb(õ‘|DàÙ,+bØiJV©øÙ*’ˆ ç¨qKÁä jt ÁrüÕbù r!02~$ä–à;zH"¼ÆH¨Î!µÄ$yøå…ÆŠâ…ê¡‘ `o)=ÎjH8Ç’‡lò¥ã¬¦)EHzpV>…VåSµÌ8Œ&ÈŒ "šâPAœ;40@q`/ÆÜu‘(è'¹”J ̇Jž­—u.0 @)¦™}રÎД½kÏw§øR(VTðkÙzéh™R2¥Ùz!ÐU"ªP#°1µËû4«"¸kÉ“ë†bƒ VEš‚zLX¾à !8n)0 '§‚€Ó°­ @Ž]Lœ2D‘“÷\dâ#Ÿ$ÅttÎ;¹ÅAƒ¾Hƒ2bDÎ.¬U ¶ ‰²Ä¦“@}ÎÂÅ¢jÜhILÚ tÊzs<\T° ÎQ 1icp@jĵM`’9tå`ˆ•Ф£ 9èÅ£:4ÑÞF†…À‘‚O§±EÓ(ËÃR;Må.è¦ê²¢…¨ÖÒ„dæoÁR f‚+V*°þÐ’'¬`Jv_)€cö89”þŠx8„¾|S^c”ÅaÐHÄ„m¾*•Iè+Q,KI[ÄÒXiK4!ÜJ­ dQ$GµÈ!¿RdV<$—ÌQ£S‚L‡2 ÓÝQ],W;kÁ5 ç8$@ ¯Sè¡;–Í$0=ú¢1¦!I€®SK󡙇üxb…'VN‰ÑGíbií¥| tˆs xAP&J Ì»JxÞ/A)ÀA/‚ÅÎá‚2 , 1"SC‰ƒñâP½°+`«¼ECÄ*઱ ˆE¤€dUXÈ9…¿" JLCîhZ´ó Hu…Àbô!g2‘Qšàlà+á>£`\2gÑ(’OcNf§Ð»¬óÅ…!ƒU—˜_ ”<¿ú/k/5Pj Ô@©=TåðzáËf—(5Pj ÔÀüj àùÕY{©R¥J ì¡(x½ðe³K ”(5Pj`~5Pðü꿬½Ô@©R¥öP ”¼‡^ø²Ù¥J ”(50¿(x~õ_Ö^j Ô@©R{¨JÞC/|ÙìR¥J ”˜_ ìbc(Ò­y_(Ò÷à¿ÐøNŸùUnYûÂ×@=‹oL‰N~ !oÀ-¯7ô*¨,Þ+ÙœÕó"A‡¢‘¨±Ù·¼D¹1·Lï, 4^ÈÙÔ¹µë¸µ²3¼³kk¤y~zOèTزlä4æ7¦§ÊmëïéGs^ E9}Z"n†¦PEcŒø–blKŠòÜ,50Ýî̲Ø|‘ÁZ¯¿‰Ú½ÑÆûn@²Ø7MeÆ;z$â=´ó%dYïn¬º5¦æÕ²¦Ü´‡Å £flZ¼ëb—hÚ´Ëq›‡.èœ~·Ép+~Ó*’9Ev°’y`~…D!BÔ?Y³¡i2·ü³}4°ËxÀ[¾@.Þ¨±±ø$öNZÜøŽÙí£§’ˬ0A¡€Zò˜™I¡Úœ0ØÛ¥óœiÎSc±¼@í| LÌñ*ÄÕ}Ü:í¢ßV Ç¢ïL‘‘®ÈI§&BU87»xŽâd³’Ž:œj\ù÷¶5°Ëpñ[sÎÞò/zÕ>pûÕ¯~õÌ3Ïô®ZóÏ\d§Š·ÑÞ¶JŠR·ØÀÛÖEnàØçªÐW”¬cžÓ„Qa?'§¶[r¢T/p ÄÕe\M}rP¦Èm¦g ëNJÀEŒà–>3¾§g“ž“~R°à‰`•§Ú³çTOI<£vé¡/W8^7/aªùë_ÿúóŸÿü<Ðkâ}AuÖ3¶¹Ì,5РFswKvó$€Þ’©Ê­Ìc-/œ¢°×õ„ÇÓÂÌü§•‡;H³òðëž#ÂŒÍÁ;Mè6ùÙ¦¼Æ€´è9!BäL~½2Ïš£8-™Uº%ªjÄ$˜VeSÊ‚Ðu¶£v™1€ÞXý-¾sâS£fžïv·»½óïôјw½ë]ðØwÐ|°¬X'ÞŽš*Yí–ãÒд‰t²0M¼Þ žÚ½•L‘NýMNL³ÉjqkVî …ÍS욨Ì2†0³¤ÌûÄ\§¸[]äÄS 7Õ†°‘}µÏQmøé øÌ2njD{u…H!U~H¡èÙ%çúØnÑ.ã\L>ówM;ßtÓMË–-ƒ¾¾KêCr¾O.6íËkÛMI%£ÝWaYní¥¦¼©qÿ--oÊeK8½ÉB5×S\ËÝt6ˆ¡o¹…E™Úù,™u ,ê'ž]ÜW~öíÏICÝN"ÿµäi= c)//gÏ>XÎ>N[°¶Bç1,ÃŽÐÀ6Õ¿#*¼½< Ü®f˜ù¸vZ™‹†Á×¢¯C;°øÄ«W¯.¾‘~{k+Ëí™kšë­0e=zTx 9ÆÂY¦³ž¨|Vòz‹¢SÅŠŒ2±³5àZÎJtyx~³ŒN¦’³Ž-nôˆs¬“Lçóœ<#’sQÚŸ)6“«jLM £×§ ŸäÒLç"QI» Üb,¶A´N˺@·˜aŽ„ƒ[ŸìŽGV­ZemxÛ삆߼mâ]è¬ýh¤Õ4ß—°(¾ ûDµ¡ 8Ú~û˜(x O&lPʨ×7mܘ՛ƆF³zkúMÔ³ÍYF»³¬ßÀlx]EÜTÍ<4ÙRon©·6Õ›Ó¯–~6Jß>‘4§hÚíã° •ŠŽªÓÆ]ù7^Êi wÌøΪò7ä—vSe$h6WÇšF'²JÖ48šb÷ÓàXŠÇj)Žœ[ǵzÕC<ÕJ­V”ŽðMi0Ö’  ¤}š³ÑÑ¡¾öñþöÚP{6Ø^ÛÔ4:Þ–UZjzÖä°M/d½*©5å¿l¢6¼i³ÎXn:ºy0«T#=C¬ø\~š˜eŒ x8Ë<Lê×\lˆ“n‡@_qà ZkÓ±[%õɸmÝŽ¦µq<¶K*ÎJ0Ö 3½@bÍ µ°§µjxx8ÞÔÑ××·råÊ¥K—â0f=4'ÆkÅŠZ·‹¶böb»Äa²]P‰Û{Wäk´ 4Y›[B[k²™•J¶¨·g`pÄRn5Y¤¦ÕÙ!£ÉFÕ–u-[3rãŠÅËnÚ|£½Wµd²j Tàwî×ìKÄsF–Z§¯*Y4sÎ\v©E§-ºñö‹–­FTÍ5—YÈîû/Z¹aÓúŽK7nÒ!öZ²rmÿz0ZOk ©‹L‹Ç[Z&ª.}ÖÒÜb–Žoà¬ÕS_Z¼hñæÍ&í:;;¬/íÈŒ õ¯4˜¨¹ÀüV ׈s=i•#$Ä´³³¹µÕz\–÷ ÓCcÙéçf:îÍ3•ò#uÄEB—7²Ì{µ†ëÛÑ›[ vúª¦™ƒœ‰û.Ÿ×ÓÓÃI³ÐibUcÜ’Ú+l¯†íò AC/:¥Nª‰1ËŒ:‚кºáîk®¹Æ²ñŒ”»\fñðÕ† –/_N~æ Ý«»opçk Ëí‚Ú|w;ÊÂ!ßù 5sÆDÖ²*[ZmèYÜÙÙ YÖ“œ…ç>óèÖÞܸöÚ¿û»¿û—oýs2N½Á.,šô\wÑäÅÜù…A/$ÜýE-:mÑ·Oc·Þý'FG[ÛÛ«•  gBóC›—,ZÜ·qÊeË]à }}ËW¬pa¥gŒ³J-ñéîNÒÖëãCCí½ið¬æìhmªLÔÙ&f§cÑ¢ñ¦ªêpó&}ïæÍë—.^9šFo­ÃïÌQ/ ÷qÖÿ¶óÞ~÷»ß)Ëjéqƒç$·Š°6“&Þgwä“7JµXG×2«Úmµìïx/lðKJÉÅìÿ9bd'¹ïtQ¡ð|n%ÊnqÀñõˆ å‡ç©íز]€ã¾µ‹ÁÒ'¸Â:º„0£šäë=`Û}N§á1›”ÞŽƒšëÝÑ™Z­sÄÍÍJp‡ì®œÌY>uí˜$WŸ†]Ó¸ ³×6™ˆv2Õ逯ü™Þ––6v9kmkg}Æ6g©’džý¸Wì}‡UïûܹYWò‰j#YsgÎ$0˜ÁòË)çªØuÔ¦ÖÕ̹¤Õ>G¤w³XÜ’:mLc¸šÛ±½Í±inK­té›[Ú&½™‰¡Vs*‹¯Ëª-Ë–oÈ‹4­XqS6šïfŸÙžtµ5µ¶ŒŽö/éZÒ7°aÅ’ùüñx*ÒÒÚ?Ò?n¶¹¥ÞÔ£tTGjÕZOs~Vɪ׭¿ñÝï{o×¢Þ«¯¿¶©£Í#Å:ŸU qüˆ°asÿ½ïqÏáñ±—½ôåK²eí áôøÖ<ò-ºí[UõÙ¤ˆkÁ­Ÿ¦šóVø÷²™t;D—®›2±¢î;s3q»‰çÚÏ·-ÕÂ9kkˆ!OÜ€îÊ02ÛEÈ™{Õva½s˜ÄHÐ4òæÍ›Ã oÛ{N‰*u fN #.Þ^ Ý9­Þ²–èbrVL5jWo×–-œh¯6F"š)=×ö&sø›&#ÍÞ@áæá¡îŽE ”MV³X+te/yìëxïûعé¡Î‘,[”5f…}Ñ4ð\å™Ö Äïv´+‰º+úÑFÝ•°ºn4{¶—þ\Ž-ã‹ú)¿::fµ¿­£³uq•„¿‚˜l¢3kÏÆ+Y¥+ë²ÆÛ¶õgÉÖd7ïÝ´wÖÕ±&›xËG.0†`‹Bþk¯½ö÷¸ÇÓ.:ý°·m™Œ/uÀ¬»iÆNŒWì#=ôŽ÷´}ì+èÞWûùÄ„˜NLì:¼®ïú²Uä½–åtd[¶)rætÙùÜJùEÿÇ‘0óîì䀒aI3ñ)hc˜šèásíçÁdе׏P_ÕêíØØ]€]BCzÐAI84 ÖÅékÆ«Î5Џ[¨2F4JÍH¿«djˆ1©û\CbyÛ˜TØÕÛµ ýkš áü'SЮ¬{CÎ6ŠlyÊ$ÚTfZ—¼)Îytw/J.€[„ ׳ñþìèǽ®Þ\ÿòO?ÊFŽ­Ï:öÊjÙèøDgokî&$@O›lÅJæ*O5ÃÄp‚S óE<%êîó7z©‹¸~ýz«BnÛ¢o—FnjJWØ¥˜³Ö¦–¾á +º—ƒ4xƒ¦Ò:Ñžµß4rÃûßÿ~—€‰`L?^yå•[[¨êXjˆV¿òê«îxàA•êĉ¯yíé¸e£`ò~û4ýò&—°6œXÓX–-†`•lܺîâ¬gÍàõwèÞ×2…þÅ`ùIø¥òù¯«%u§¥+öÏyjD»liòë`[N5s£çbå6 ´FZ w@ÊÏç´ã¯KæŽãç°Ÿº«ÃÝx šaÑ xwžk• soÆRÛëmÇs6sºº;¸X:ºþúë÷ßÿØ~ÃŒµ‡*ã&×{hS7ÚZÏÈaaf²|z7KÍ¡ŠmO,̆ÌR*N35Öå ÿ‰!(æ¥gÉYÚ?3ekbò/ÏhÞ¼¡Q÷ÒÔ‘Ø![^ú²¡ÁìøãO¨_7þ¥Ÿ~(m„¶ÒÇlç&ÓÓIŒxþó˜ –bSæ,™ÓÙ‡˜}HЏšº(¢±³g²kQU35ÖՌμ]ZñëürQè´xcÖ÷öwœ³bé²ßýñ`nÑÒ%mí]W­¿Ï}îóš^Ó™™4NaýÐú•=+#½e<švgãäöŽšimµ ŽonnkímêÞ\loimÏ:íÑkÎZó.a2%¡¤ÒbçVÞ÷"–YqÐ[ æÚb_0«eY©©Åh/Gè-㼎¢øm'R‡ºòdd¤äTˆœ8jšÚßÊ„2/nÀ¸û¦hw·¿¸×^{­[·NÃÜ›¬%’íØÈ]€C±B°*ç6o`ª4œ¡Jšåj·£fw>+öÚÍ ‹Hh]´q狱3kt{0Ù0<×Úgàô©Á|K E‚õìðGõõ¯]šÜbÆÙä³ý­ÙèD½µËJ™G”8:éÍHÖsKšlaËÖg/·!j½ÑÀmPî§¢£Šõ^ŽèÆÛ«]w?ÿ¸Y©Åç[.¿üòKϾ´ÀZ“ÏÊZ@ãðÈpwW²³v8[ÊëhßÊÆ×)@ ÔèÙ*ec`Øc ƒ*­K~íT¯¤!rŒ?¶{ik¨5ùÉý²O;_`J›¢·2×0|N!çŸà\hˆùÖBÀp¤ã™öã§ ØýbF]»kàmâËì•°›ð쌒«ª¤SpUΉII¼{h šmj¶®[ïH_6bÓw~Aneh|¨§}y¿Å“’“lÏ^òÈã–õö¼ûç.¬†“9Ÿbe%ã'CSò@9pÀÅ)Ë [Érq&ÿ›¢-sl?H&LØz¨fÙ¢Åé–™¨šš[ºlÙxµÒÖÚ686b6“Ï\ºŽoËæìµxÒ mQÚ´¦üx½:Vƒd£õ±Ö¦Ž‰ä¶x€º5ký]Q÷­F32TÍÅf^qæ+ܰÝ[B§XIbX‚5i až±õƒ¯œ8ï¼óLÊ1Eäf‡yÀžˆ‰"µ4Bˆ™ðsÎ9碋. @u6¬mŒ„œõìb k!oœZ»v-WI¦³êúèG?zê©§Æ) ù¦1q38fÓQý{oAã@™VÉìT¥(…Ǩˆy¯‚U¹ùæ›/¾øbMS$XMk…)w§b¦½F„ÕEÃÏBc:a¶1;2n? ß±ÖÕ•†££udmcü [Ì-¤Î=Õq#AµúùxfÄb#uhÄÓ[fa®ø”˜}˜ÐR‰Ù—š%e À%ϲ«ìždÕlCsÖÓTíHß­ €Ó;<×›œ×‰áæV™#Ù õ†Þ–‰}æ]Ù=˜r±EЗÌñ õ©ðæ§?ý©FÌh;Ä‚@…䔲,øËø@d~ðƒÑü÷ÿ7Ts6 PN`rÔj à+’ ¢^òàF$ÅVÓÈWP¾€@ú„N¸îºë¾ýío+XT”³I Çü“ÿ|à½ï}ï÷½ï}!mc[§ _pšÀé`¥j­#°„· Þáw µ&MÃò(qêæä‡†ÕhdÀL÷ÓèXWçÌ3 “‡ˆÂ³‰±tïùí0y/'Mf^ ‘¼ù&Ø?A!Ë—ÍùaEmÔv!;UÓöù;ÇÛqûTZr)5°P4Vo÷­¤JèËn§9dÛakÕÊÄ0ÏýÛþ¡o½ã–û|¡ˆŸra™$Ã"EBƒ4d2ä'& \¬ˆŽæGâÖñ¢ŽÞÛ=šVÆ(f¢Vílï­ŽÈÞŽ.žqeØÓ5õ¶îÖÚÄ8·½%½({¬¹ÞÕ½˜³Vmé\Sïomêj_¼ú©o|ùŠ}ö]»¡ï|òÐ\`õ“nkñxViÏÚnØp㾦'¶òBŒ¡êÈZ–ä¾þu+–¬jΖK ŒeŒ~K oø¯ÚÇ[59øÇ°Ê…‹lOÉÞ{ï-'@…ãèÐJÁŒ8†À²ÀKN-b¾l¤²Cp Éœ* 7>áõ¹ðSI\ sRa•agÐOqÜÄŠ;D q¹ûÿû¿ÿH/?œ×Æ’‡s|ç;ßù·¿ý­Ç5Æ8«X΄ÐZ­Ò}÷Ý—¢TÆ¡³A©®âJúª«hµ(2c¨T6u´uôöèFYcããí­ížÉš¨™Yš!xäkN¡; • ÅÒ=›zGš­I,,Ȱǽ¥½¥£µÃÈpIçÂÃ;ÊÝÓ‚[.šw]q¸§é¡l/ Ôê7Öê£õqïP} {õFÚ;=mLßúúw9µ¾¦^_·@uVKÞ♓Ç8Z*Ûæo´N+é—4çÌøŽNx¥D½’ÿF¼À¢^¿¹o½óhj㕺»j¬Òg~®§?¬¿¹^¿±^ÿÁÚ+žö–W?÷Ü7>ç¼7ÞõÅO>ìôãþX¯ÿW½ÿ÷õúOê›’²ÓÛNfŠ1Ä}¬2:8”˜K‹‹æÍ˜ðFÈ¡Q4•ÍCAùÆ7¾ñž÷¼'‹ì cŽ9ÆLÀºN9å”âZ¾ð…/<üðÃA—wª „pb§Ÿ~ºÕb`ãì%—\R”’ð6ñ 7ÜPÄÈjÀ³º Ö…^ȳ¤@g_õªW=þñ—Ôâ+2|ä#¨~õ«_@gÏ=÷ܘÓöÁ7³Ç2bœzò“Ÿ|ðÁ+rÄGð¶Ÿð„'ÈßÒŽ©T>ÈøÃ®.µˆÿüç?Äj (¥ Ìa¼‰ú ¿øÅ/~âŸhÌáy ‡ÿøÿhx¡ì]ïz×÷¾÷½r´K,~Ü"½•å`µÚ/öY74¤_¤ôŒ¿4sù‘%çã.þý»RšÊqGÖê6ÿµV'ÆœCÀm̹ä, ¤‡vö´PôÔ€÷´K?S{¯w£ÖÆA _ÉoÑjØúuà ®†ê‡rÒñxOýš…ÀŒz²ëEˆãiq~v’’™* WëÏÛœ•tf$µxÒ:n±õñ°ýÞõ™Ã ÙØ·KEÆêUüê54W ÷ÝPù^}üÇõúWêkxþ O¸äÍ{ë±G\òÖǼ镗×G/¯0ç(o¬'³=02<‰ç¦ýL÷ÕǪ‘¿éú5õáñº o£Õ™ãõ¡×ÅðblíÆ”™xîžrÈ!‡\uÕU¤ýÖ·¾Å <í´Ó¤a ÚÂm=ꨣò‡äJª?å)O¹ï}ï+ W@ò“žô¤»Üå.—]v™` ‡Ž;î¸~S ÂP”ÏVj‰o¼Q]Xšÿ-†mÓæo?õ©Oq|ÿçþ%ñ¤±=öØc¡ï{ÞóE ¥SŸýìg1Wõk^óš{Ýë^ÿò/ÿqsê±}¬¡€ƒ4”  5Œ®Üèw¼ã˜<íiOƒô$”P ßÿþ÷¹ÅW\q>ñÄã”*Ž>úhMàþÊ1‚áà~âŸ`6AµRûØÇäG«eR`œ9æíØ4Àƒ7zÈÓ6”¥„R3ýÜŠsú%¶£ãõ±èºG­>–ÿÆUáF®Ô+z´N ¡ògs¹;€žKž÷Œ2*5°³40mÎ+?´›y8ë]Ü• zC#þžO?zSµsyZN] !ŒªxҺ泺SóÑòb5®µhSS?§ü‚FlROì'te^–¡‚—J¥ù>Ó§Ë—™@Ü4¾ÙœòÚ‰ÍÞè˜õwöiKW,ÿýðÚÎöŽ45]o:åU'/ͺgíi/ùÐò¬w||ÈÃ:{uõfc&ü{[m#J,g­Íõ6oÃ4AÚÞݽdïUiY`ßYiκ—-I3‘^„×mË-Œÿôçÿ{Ñ.ÙgŸ}àÄýïÿ<àùË_Ôœ<Ú»–@¿“‘•r¸³fb¥ÕËãüÞ÷¾çsãzÔ£äE€ú…/|ÁS°› ên”Î$‰bRW¥rÄN4zÊ™ÜVücÙÕáÞð†¼àøÍí/~ñ hG6 Ç!¶¾ 9Í¿ùÍo¡7˜€ÊüàŸþô§;ü§ú§Ÿýìgä7u‚³s¤Õ:ƒ N­&!·1lÛ~ô£‰'¨%&™ñTÖÌv€·w Ÿþù<à#<Øoaû‚ .xÙË^¦ˆ mzS]0™9^cG^ÖÔÝ•ž&ð!sÕMíéÒ¶èa:Ö­ãýóÖ9[ÒÜ*gËîƒg ùÖÄŠÚ[Óë`‡jY •YòäP(x]ŒR”ùЀ'yóO'¤w?»Ú¡ÒÞ+† ­í…Ï8vÍ_­#ewlÉ6e‹fÞ<2’§:ÙŸ¿©Œô·a´“žT¥´RÂ/µÚé"ivst$ãMµvZͽn¤¯¥Ë m§¯ M´÷z¤õÔ^0<6Êp׺²×tÒƒ²¸¥×HU+½é­Ù¾$4jý­%ëñ™!ÞÒ¢Þe^þ°©2¸¬céèÄ(¶E‰›7.[¼¬µ«£R³­šˆþg#ã]íiëïÌ¡¥mht¨»³»)kÛ4°i颥?üý/×mø·û78 K¾ùÍoòá" õ΀`"Ÿÿ8ÁU@/Њ·üë¿þ+èzØÃ†2×Ü/‡LšŽâ°CôðÕ4u,+Ãà·Cøöb1€)q0àòqXp¢f}í¥Rê;ßùŽ‚Ï{Þóh•l$<ì°Ã>ýéOssþóŸ«÷ÙÏ~¶Rª€©t Õ2!b¬%CÐãhñ )V4SÀœ$F$…X>IÁY±³Áð¿þë¿$ñˆGh‚6’í1yÌ—¾ô¥?ýéOæÀp€wNìHç9j=ÊWo\½|8–b9[ðÖÆcÓÙÇq»‡‘ÀV¿Õe¢·;¼©93¬òQ•€§éª<,50¯HcrÿlÉ |ÍÕmÙøÚjûâÎ|ç——¤ûÙoÅÂBßÛÖÓ“ã½ðq‹"°aeJ¤ý©y¢áOVmÏ6V*Ý-]væ\7xó>ñ±Ëo¼¶ÞÙ606²lÕÊõúåOžÿÉÑlÄ[Y¼%ãÏ÷Ôª¶E·›©^¶by½‰ÒÒC^–‰÷î]¶fíšå«V.ï]:Zíjëô–ÏaZ¼dñ²Á1¯í«Œ·t´WÇ*½=[£ß°yãŠÅË»:{†Ç‡»Ú»—,ZŠòSŸùôy§Ÿq§;Ýé¥/}©ÇuÁæzúv|Ì£;½•á®6Æ­Ú5‘}ïØì¡¡ñ‘¶öÂA¦íõá‰&éIél¨'ëyøi/ºÇ¡wý¿ßÿq¯ÅËZÆ«Ë;{.:ëܽš—2|ÜÖ…Þ((\jí>¥ãÍ-š‘–™DÉ?¶Óeîq"ÛÙêô½ÙäuRo½=áƒê¶Œ;;’“qKG½¥#§™™~e÷">ÕøDeY{·RÃCC—ýðg½é4K§æZÁ›ßüæ7@b)TP„SA®fhbV)0#†dPÖT0p’‘ó€‡'š<‰HÃ*àÈ9Ç<àWÏð ¤G€ ½°Bæ¬ÌÆXØ|äG?ú§@8(%’O$!À“ä±Y %ÌÖ ¬,îj8ò(K`Âc+8DƒЋÆì´L.5§ ¦JíÛâgË€³…P „æ‡B¦Hþš½ó²“Žo+™÷Û"ºå|]gÏ_ÉYd¥JÞ/ç 2Sãs÷;ÿ»P¢€Ê•(å˜ ¤·ºQGêYGÍ‹¥ ‘^âì1þžŒÃЖrPÚC›nßÞÌc8Ðm¡è›ôLwW‹Ç+›ÒZfk{[½Ýì·iÚ8ÜßÛ½dÓøÀ¢öE£ùïÁž–E‹7eã-í]ॿº&Ã]OžžzÖé€üW¿ýÍ!w9”i>äÐCŸó¼çî۾ת̒8C6¼2ë^»æÚCVß&ò÷]çøJŒ¤ÁÉ «—TúB0-ä9‰Cþñ‹9ÄS<ƒsÄí¾˜à³Ì#cYo›œžÎ®‰áQï&T`Œ§«ÌµÆµ¶A…Ï r´Ñ”²=SðÆ!¿ó?ÿó?¡¯'sì.ÖY_ùÊW:%ó+_ù 2sÅ`(rã@fI€%AZŒ¿â8¦\OH fßýîw=’ éCá*‚OŒä?ô¡Ì–_ÿú׃äÕ«W[þçþg4-0˜ î“uø‚X‰½×îÞaõÔEe^Ûè;²¹ç;IÑ–)#BJ¤/;Æ"ó-ù2r¤Îç¦qfçäÓ£4ÒhjmnI/OöF‹¦æ}ö^}À¾û[Å´Ø c>ô¡0xólñýÈG>b 1Œ´šÃg®•sÑ€‡§kµÈYˆ}÷»ßmÚ¡¥YÄgu–ª!:k¨†'M†GÈUКR¶}ïAz-Äžt‚ʪ((àð"†@ν ])OLÙlezÙ ´ ϶}Ù ¥"Ï8uÔQ¯{ÝëîxÇ;beG÷ôYÏz>°ÄÊðÄî‚Li0¬ì¯ýkÍ7.G€²(UgŒâù"{­5V+´ÑY Ïâ¿ÿû¿·?üŒ3ΠÁ5ôåR;Å 7¶ôÕ–¨Tþ´ÐýF+]ßYÄu7Ú\‚5Œ[BÒåTHéâ¡qirD›"X eö´P WMÂh{q¸§é¡loÒ€g(ÒcGÇêÞWðØKz ¦:Pïó@ë+ï{Ñs=£îqœ±úúu›ú<]ºÀBz׃ÌãcéÙT!a¯ßì9¡ë£\æÞTûmßõ/<å„g]|ÚcÎxõ!/ÊsÞ}Ú³/xÓÇnº²ž°º²>~y}øêúè_ë#€½¾>zC},=öìÙ£~hO ¥YÍ›‡}N/ ))ƒÆ ‹g=¦ôã!%?ÏNÆÉ8ßøŒÓäC!A÷7Ç#Cé>¿ªYÆ$ä|øƒ‚j,-H+ÄZÄ…pò¹ªÏþómëõ”;À/´ºiº¾~÷»ßØÚ ~ b›§¸¡ÚÇ£VÂ;N¡¤RñËÁs¼7‚™6@à,G<`O%ɤ±ªPQΫ_[gϵ#$æñÓÃüŠC5ÆO¬º£=e8š~sô0÷¢·]"M8¸{TÐAcýÃ1†®q¸G)¡l줒ç+èƒÉ)LAtX¨¬V²~/§l>áð÷¾÷3'Ø99áæ^ê“éµÎÉ• F<õ–æJm¢ÞâŸlCe ³m‘„IfCþøÞÎ+^ý²ýöÝ, lêÏ9,ëM¯CÆjcö'ûi´[ ÊdO—/JË·žÁZRM»Š¸/cýý|å¶Åi1õ–ÀÏ·£1§óMmáÿˆãÇÖD’°;sSP|Æ061æmGqªHÛçÕZIŽiì-r`W7>á§³X¾7^Eq&jÆj(\äD²Š@)k¢Î:äðÑ’¤…=áøâ,qaRŠ*0>@Q&26]í F]‘° Fqs¼Üeµ(‚X‘’?Z™âÂ)/r"¡8ù5Y9„IøB<*2 P]¨Nš´§Éá©OSÊB±Qj†Ø&öÙx¾S4iC•Î4Û¸µýH ¥ø¸Snîd·´Ÿ#z\$8©ÃDÝJVÀD\»­Üþìr úöë®,¹iÌ4L~ÕM»6ûzßÍ7¦Gi†²Ö½:| oxý~Ýû/¬VW&²æv/Þ3¡ w«­Íž±ÈFÌû¾ö-¯íjï˜Û0<ð‘7Ÿ;²¶¥éiÞ4'ɿ᙭¬×}ñ°õ†›n>ÔÕì·¶e±gMF#ã# ±·qvxÊ6ŒÝèXZY¶í(@7ì^¾×šfÒŒt„|eW]ìÍF™8_PMQÿ­¡ïD}¢¥)}ˈÀ^jknëlëloK{£´T“9¬±ø u —*}áJL¥,! DätæIà_;‡á )Ë4CÜ=ŠàìP]€Y äk꼈@IDATÐÈÁS"æ¨+¨“Ä ¢ó8Ë4¯+&3 @ •`ÓÂä'Œ ÏäO㤼ÅsìŽVgeÉŒUº|mm2¡o 4ù­Àþ@Ö˜ÁЇ14Q©³ª“-ºæšk:è @ômÓ b;›¸9ÊIŸÌß=™zã–!½ÀÜD´;£±nI7o9%Ï›êËŠ’&§I‘ê­ (®Ëö߯ îm…›š–n×OnŸæOAÙŸ®øó‡Üµ½©»?yÛEgoÜ<41vÉÙïgØu±[Ýc»CV²UÜѱhÕ*EÁ¶vy*dß}öµ?§É›uó.Œlì‡=^ŸßæÒzµt¨Çã¿ÍMý)¿}Êל¡ …Äãšü0ÑO ò¤£Z÷lúðù—‚ÛvÓÓ™ióæñ «|]ÑSB¶VU{Ûz@Uß@ßÒžEÍí°Ê ÷›m—mMŽÉȤ<›['ø‘cõJ> ™ÌT­v6w†oi [æ6¥Ã F^bÄE~$n=ò¹mÍF5[Ð¥§°ìaoâç§à›D“$Úœ¯ËòÛ?Š¢6` ´È„‹Pq‹2cŠ䀙ðP1A®³µÁ ½f›­ÊD¯`8Ê JEᡆQQ1x2%â#5¡©É,…"Ê:K* Ñ ø e7Àl™¯hd*‹C$”mäµG]`Þ!ÍàFæ@åð§‰ç,nj‘ !£jEä¡ Øn§1Î\BzlnN!ïÑ•R¹übqã@-ú]p\h™Kôí ×.Eo›vpѽ´@'v¥õ¡¸ÞE›ô Bÿˆî%¿H÷CAܘˆ®/'4[6Ò”é=Ež‹mõŽ{Õ[+]žW­dc•¬µ;=È0Ç;xcîz*dV7g $ ÃÑb’;-ÕÊ‹l:_žæ‘«Y«=È9ÔÄk4|lȇó<5QÍ|u7§½ym窽~Õéƒ e;²æ“O;¡20üé‹/µ¨ z û±˜¢TH¼Ì“»U*ÜÎ 'Z€PÀ¤Sá²,i@cà®SÓ Ó8ø ÁsšÙÙRk^ ew’|ÌqƒâaJcv7Ά—ï VŒ?‘Æ -4A&ú¢iì¡‚Á<ÒÎ2˜(ÃmeÐ0¢R4üò1T6˜‡Ý 戭=Çì·RÅ)ú!^(fÓŒ&„ê ,°Ì¨+rvrЛß*S5GÖÔQúÛ˜SÜÛLkòŽ๋³MYÿö“º‹Q§IN³ã>×8nÝÎ%—¯"½ úJ8Ô'ô`ý@_q#Ê¿]˜’Þ¨<׳;ßõЄl¹ 6{mšÍZ/V¹Wi Œ‘€~RœDq~çIò}ü tdÛvš–Ÿ§Ðwd,}Ÿ­Ík¥ÚÚ7öoò•5×÷Ýœ5U;÷µX¾yïÙË_÷ÒcßøÊáÁ¡_|)z{¯Â$-¸Û{Öz›+a &s‘y–Ñ;ŸŒ%ý*m­ãÍMÎö†Yñ¡ûñʸïÞ/^²”­Ý41Ò½zﶦ–ìÚúPwÖ~ôI/9x¿;|üü¿ÿÝïIHŸ{óî¸I¢M…<»k⦛n9Àx!Í ‚‚±Ò)a³|ö%KK¼ ´ˆm†rJY Ï †.`ÆF¤­éíÚk¯u n)"Áþ°Eʲ`E©ÀN– nL>¶’ùR)±¥eªš#¡x°gŠdÂfRAÐÀrkÒ¬b(¨ˆÚ”†Q©³$·o˨B¾QJ¨x“ô*Âf¥´6’™*¬s;ÍWDÕ1’Ž ÔE‡ôÆ `êü­þêÃ;ôãcUF-“uëôÓú}äL˼•¤ós°àØþ@¯Ò ô?½S·Ðt—\¾ªÒ“dŠû]Po(æRæG©e­»°ø¼éJÞ{ÕÕÓÖÑ™™×Û4°1ÝÏÞñߓܜZ†!ÿb8Ž 1÷«:Žû©d-&¹+fÿ¼ 2íqFÜÕÑÙÖbk»Î7÷–J[×_ÆÖõeÍ×g•“?xá›Ïzý²îÞ³^ºmcK}n¨:Ü–Üçô>Åý„ˆÜ­>'5Þ± Jð€@È&¹CÀÀ! Û˜å:Â$vÝu×yÜ–I —L€1>Á2a²—=y’{QΰM-X)þ°ŒÑ@`'¶Ð‹Yãeª@¢ÄŸÉ’ÆAÀ-Ù0!¶]c@I*”rN:é$.Ïh£˜ÁT¯„Zà·„×M{¹n÷»ßýɰ™L(› ;É@¼h ß@¤mtÊë5³Xi‘ªiê“ ÆIà‰ÈßùqÁªN—¿Ñï‹Ã™˜»AÙÁÍpí]u•åE'ÖEâG7rÊ%[Ë)}™4Fµúm;XÌ’ýî£oC´xŸÒœ>”öâG¿©fKN%}IÛPfvÉ1/k,n³ÜNXÆeÛˆdüRU90æšr¹2UßÊmʺÚ¹¶U+,ƘyîËÚ²îÏ9eͦ¾åû­¾ðÔ³—¤ [Õ}i}h|piû¢õë×ì·rõ²Õûmò¢ü Ѽ†ô-G?┣®®OxC¤—Õ*¢Fƒã£Ùî›ÿâ5¦ºwçà.6µk65>ßk¶Ù …X:î¸ã˜íGsôÑGû¬¯ô£ýè-Ðbˆ:ýôÓ´¼uùâ‹/æb¢ £4£ú|˜Mõ ’ôyç‡2 rŽ}æ! zå²7J>êQØœ„—¿üå‘OBU3e1€0iü¾÷½::˵P5á•…y/}éKÅ!Úbþ<°=áƒ!ù¡/zùv¯ipùÞ^y{܃xÞaù¢½$±¾»´7NÿÃ?üÖ–Ç?þñÿó?ÿ*uÊ+¬½“‡Ã&ô£µZ,3$” ä0ÎÒ[†èw;'¾¥ö¢Ï ·ƒ´xî!`¢PïÜl«D<5·-Š|N;uÝâãÿøÕW_}æ™gJK„®z\xƒJýÆ)Õ›Öu júñ¬7£Ô'¶!v¸%oCK{Ê©€=ç:ZOx–Þ%ÛÏN×}ЛjI‡Î_}ó5 ƒõ©dç cñÓ5S ?Xùå6ˆ²‘7Öëצ×2W¯Iof®ÿ_½öøsNzøéÇ<úí'<áÜ×Ýÿ„#¯®×m4º±28<>V÷úÈQ¿Ä`¼Võ.èd‹s³ñÆ›Snü†åûíÖAóßò–·XÈô2g ý÷ÿw˜ÁzH³]t»¨˜ÁXØSŸ—÷m¢Â¶¾ìe/óñàï}ï{J]xá…\Ì“O>ù6ÕvØa‡A}VqA]a£@&0sÈm P¼ôÒKáå~ð”§žzjX*Ç}à@éÅÔàÓ—‚ °™ßÿþ÷ûÛßzó3&¾Ñ¤¢?þññØn¼D~8ÊÞ\­ Ï3øˆ//9‡˜üêW¿rê²Ë.£%f3FDR„x7^ìLu¦< z‡X©(¼mŽ&»Äd©”™òA!Ð…†Ó©”ØpßáÏþs6 ^BYn([ÿ”§<Å.*.¯o)"ðœ13%°ñŒ¡é@eå aT‡žðUEª3 ­ÕdŠhšoš#4Π™G<âQ³l‰I{H¬FßjŒö*ëÓÈÊš±×Š¥Å>þyÔå,kYìbÃmZmkªúNFS[ÖZoíÐ%óNšÆ#‘þÛ㨱àãPz|b¼Ã·±Ó;j²áÑôù®Ž®ÎΜЛ&eþ²n(ÀæëTË@UÀº¸KcÅ%Ø‚ÓÜ2"k‘iÑKî [$4;v Ð…®FMî%7^LÅèúЏ-T˜Ã)ŠOKZ.òég‡Åöiˆ<ÝZ%ŠÚ-ímëmíéì¶%Ê«3[dÖe'sáÃù ôX®Guà¥/ª¤w$Ǿ©¹(¢§k±r•‰Q{­=£î Ë÷Þ‡ßѼ|¯#þñÕË÷^568ÜÙÔrîio[ÞÔã=‘µ‰ÑÎÖÎ|¿uó^KW{¸¤ÚÞº¹ViOßá½U`J’5aɦá®L¨ˆó¤œ Š3‘·;ÅÆÓ¾¯Çg=øàƒ}jЦó¥áái¦û=,& áØ n [!faÙ·9 d^™^] Ðï~÷»K0ÛPsŒsxAÀ¢..,ÁØq§x“8;Ķ.³3òÁ0xºò£.Ø&}|Ðìq dõá^L~b«HŽDð'j$F4lÎÄçŸ¾Ï ³–¼Ý&1öa3#6ËmÞñ«_ýêÛßþv³÷”ùÌg>Sð1IN lK{ù—ÙûÄ8܆‘üeÖÿÅo|á{—}ö.^ä±:Oða®Îî.w„wÞüí1„‚OJçòô@?£Ci«ù}ï}Ÿ'<ñðƒÛîthN¼e¤i2ãÒФtŒf$ð‰qŒa‡«†’Zv[Ölm÷T#…èmTi"]èvºiŒm½=ú(5‹¢”Od1àÅjZ ;¬(QB££cˆÆÍãSÕú’dkÛòpÕ@›Ç€’‹0ÚÚæó>ù·ã—f×_}Ãéï=;½¡jÔ{©Z»Ûz ¯A»G‰æúÜÀXu¤«¥³­µþmªŽ´µt=ÿÕ/_ºzÕHSíâ3/YW]»²Å6Öözm¸· ÎWÛ«|n“žãÞ.ÙÙæ½“üã¦æ–4ºÌ- Ð%ñ-è¹ùÙq7·»_üõ¯ÝŠ/Ð=âˆ#¢uSÝÝa1Ø·3†:&ØX4–9Y(æÕn)–ÁÖNßýe‚!¥i6dVÄ [¼¥ÞXžPw ¾Š•µ@}(´Œûe2Va⯸â Us$!\$'©Œ†,à–ŸûÜç04Œ ¶„SÖqa2ðü‡<Ä|!ñ°âà²~ò A¨i ú‰×+çcû˜i”ŒyLr†}#§¸|ãßPé_þòª£F`ÌùQ;Ùx5,'á™GêÒ(ò8F [3¶hÚ³®§=ýY/xúKÚoýWCLgÿö\’Ÿ 1ÄÉFº²øÉ–ã½èjI3G3¿Ú,ÆF4L‡4I.h&&rh^RX ˜ªíoýÛ(ùßÊk»”×) >ni±Ì¸Æ:=À<³EC6ÆÙï|ç;ÿñÿ¡K™5ÒŸ¶Ñ!ôu½úêC¸¹s(]A «"ý):¥|ã&W¢¦LìžÈïDZá÷'àK~b-Û{žKÌ7ׯFÓ"Y5¯'l|ÍYëbiKWkÖ4˜atÜ9§=îøçõ´Ïɧ½ù’·¼³+½WË^xÖ¦õË›¹Û\í–¡á~suM‹ºÚ=þÛ–&˜“ù¨óM&š¨é®§—gykGz8ÉÏéi¿-ÏT<~n¤øÍZð]Œ0vT‘Û} c€SXRJØ™%3ngóÏð ±°(J€a“ÏŒÀO~ò(…Ò)>1;c\žåŒa—€U<ï{ßû2 ¸€©´¬ƒÚ‰G¤°i*e¦Ø1e?Îæu¤ÉùÑ~”ÂÓ¼´QìTŠT8˜=æ‡ ¢ÆË¿,!› dÁ–GunäWµ„*HE†˜S [gË7  Á Çüà²q²mg“¯i¶¤I„%0E{î¹à&±Ÿ¨!ðÏß;[.Þß[e†ûzÇ6í—Õ¥ï5o—ßAY³_°:0kö‹ôݲžƒ²Ö}³ÚꬲwV‰H·ÊÌAóéŸb© ûD¨”žµÔ|\b9†\Í33šcî‚`]D\QýX/Ô'ⶉ{ ´ ý®ý¿øE›,¬[È´‰ß¾Au)£¹èI3ª(ëчY_‰¾Ž§²ú“±¤‚1ÆqjF&eæî£äÛf½Ý¾ÔÜê«Ap²b1bMj`s%}×zGeòeÂ>{?׆oÌÆ±ìϪ/yã1ýã#ÿüþÏ]pò¹Ë²n“’cÕK(÷_¼‚³]ã—u/[bÆ{hlt݆>{\ä€ÞÖ±*hœ¦Âs'q•8èK¸ÀÝ|Âán»Åå0]ì¶… J±’f¡®ÛlÜë^÷úö·¿ cÜàž2`ÒÈYî#2;ŠcÍÏ\«ÍJžÀÁ–h—²ýÃüðYT4-¡.3¯ K`æñ Þ§|;¨ù²*U\u¬ ³&°‘JíL ÛŹôd”êÌáÉú zŠ3ü´§=ÍŒ±)_àŠ&— Zhn –1„š²–vzk&Á0!3ÿ1´¶ÕÙD“e2†LœÅæ_ÿú׿ÞInÜ`fžpàš|xØ„üèL3ð¤¥Õpº¶a'MìöT'z½H5«ß©{Åæ |NdhÐávÿ¹W‹_;W«ËêÍ+êm+j­K&²eõ–å¾»²•àY‚¤. è¸@·´ª„r×4.\àÑVØÌ=[ŸXPÁt‡FØø@*ýÌídžXéP¯Ò5 ™õ˜HÛb SFZ§/¦%Ü ‘£CÓµïШ'nâ,¶2Õ5­xy¸»iÀöÈ´ûy´21¬gLð®­¿áÿ½/= \¬{ø·¾¦¶yczî'mŠžkøE½ú£ú¦'òßz‚­ÎvX_1°N÷µ½C cý}TQ‰=Ì 7U$ÔêÕ±ôðqúÅ–æ"–ƒhÚo²Ð-´3î3Í©vÏÈ(<2ˆ6`Kò‡?üaõ\4Ó <ÞñŽw;Ÿ¯ºê*@b,þÃþ0”rÊ)§pz˜]{‘<¼™ m*³ìÅò!L,¾z»Ì6‚?öØcAWìFVœTrª< å" 9,¾§žxÛ„¹÷½ïmG´ù,¾`9*n_·Mȱw: š"`Ë<Æ¡]T 1­ûB.‡QÁ 8L°;‹m4ó’K.1§ i´m¿Š²*¢1î ¸%â0•l¬1DÐÅacÌŠjBä0¹BãÙíœn¼#ªù£qûŒUj£S·ÒVª,0…ÀH nÄÆOb0Q o…Íœ³“¯¹ ‚¾nÄÃg!•‹gVYqXt/i½MGùÈG>"F8ì°Ã^üâ+^(+8lG' q=RgŠþT”Ò±¢?ýfKeÎn£ôn€‡7ÂÅqöðÿê¯<äo¸¨x†XÕãH•uRcÕÛñÒeõúýÎ{ÕƒÏ<îWõëê•õz.­)«.k^ÔuŒŽq…«‹a €#D-S/ʘÊ-ÿΛ˜ Tp‚ vfÆÀ5ÂmczÞPV¼ëh`ÁMAëñvÀÛ/a¿MƸÒÔœx =I2) Ýb{bÄ ›ãA¦]ç”’ΧÆ+}}ÇF"NŒÕ}ý·{Iwrs§î Ïÿ‚Èä³ ù‹:R"‡ÏXÊþ›ÖÖ««Vîws6è%KVì³ïª½ß}Ò9«;–oì»™ä×ÕÒ†O{k›˜lʉé-T2v½²*±é†1aX˜ 93†Â7AKž‡ ¶+W¹Õç¢æ«QöOrêǺ¾`éÐ& $Çûkœ2íV1#-Fà&±ý*ÈÜ<·9=_ ,ë]Phoë\¹2½|Ê«NsÃÙàØ¦„– "åÂao£ÊAVŽUâ˜:–žÂPÞ²×U.Ùk/(=Phjéìϲ“Ï8ióšµÍÙˆ}¦«–,³skÓæ+/³C·¥µ-½Ö2<à€ùÜý-Ñw!t ¦ƒ§kˆÄfOöÇ®OæeFñp‘_¢o¡Š21K ,86ù Yã=Ðסe]‰½ÍÐWÚ-á~pWH£1_[Ô /$‡‹ô™.ŸìmÊzZ»”XÞcÿsfò¹V­7ã?ÓG>f¶ñ{òåÙémÜ…ÁæÕªÍžD`‘ÂÚlMœ€ç€·¦Ÿ2k˜šhÛÚùž;¡¯Þï0º„¾0تLÈJ¦”pƒ¨í ¹šò·¼—ŸŠ×Žª=ü)ê¿Æ¡FÇ5“ý'?é±îàHHGÒäÊMæ+¯¼zïΔü*6½Žœ¼ê‘Wšb‘5TTS€',Ïf…¯RÚ«Tð_Y)—Ó é£À²T<ŒêƒF@#0¥ÐFXSúñëÎ-!ÕĹ(2l%])ì˜ó64"àÊ?ÜSÆË¬¶E¼ÅE^ ,ƒéÞ²b‰€ýÐ}#¹Nôák‹ÒQ;%ühÅ,–Êq<'±Áá ¾*‚̬ƒF@# ¨! %à:2*e4ȶ T.2„(²)Ò0!ð½b¹”/ç«~5p½n×D5*F¡rÍÙ—[P¶)â©4>ŸñÆ€7·aŠÑÉ€F âk4S\®)'tûúp¤!* •¸:ÐØŠ§"Ų¬×p숰9GœÅÄ EtG){ÞÒ+§ÇR3E¦§Úïx‘ˆ•ÓÀdØB†*Ô1€F`  %à)ðu‡G _.gšÒùsYì¸ã,à üj9¨ßàÜ­*ƒéÞbוT«¯®Y™ˆµ°Aõç-Ž œ@§E,ZÍç…e»•jç•:h4aÐ< 0:yj `¨mæ²â¬ÃÏ+Wò~/~9pD)åWu ©Ÿqb¦ë»åŠqÚfÌ^/ Pí<Ñ\ìYß2=, ûÂvíx(z:®Ðl„€&àѧS T,Ü 8)úzó·>¼Èl¦ûfÁ¯ÊÉۅ幌h“Yk„³c9^¹‚[JŒµ]w1ýjnVª‰K¥!½{ȉei­ƒF@# ý<5€;Ëý$Rß rIR¡TŒšI¹XˆkêW£bé˜#²¶{Þ¢c‰TW5wâ%g²gûÝ—Þš ¬'ΟS:––ÙÅHUÖ#é Ðh†A@ð0Àèä©€¤Û”KÉL¦YdÙNøÒ~Š^é «Æ¼üpF@ÉœHãíÊJ%á$ÊŽq÷•wFD·#¸ÄòübTØe߉›ÈFVë Ðh6À¸#à\.GKÕú uìééQmw]éŸ-²ó¹ ëëìšN¼££ƒcÉ#Tª«ú¨t¸S/X•¢+2’v=é\ÒÆéÕPä_©–Û³û¬þu‹¢µãG{yË”«|Ì8«ŠÙ÷–‹šô*¤ ×—4SqGÀ©TÊó<Å 0ñ¦¦&¤Ë2Íb±ÈÙÉd2‚ÆFv‘³Uþ¶¶6è™ïãÐ-µU}Ôl&ÍCØ–\‰Tê­¦â)_.8Â:+äO)ùòg2ð—¾Áq8׫ºÙ“+:ÖØÂk±RJeÍ–†È¼Ü¦–k¯W›Ä\'j4 qGÀˆ¹ê]wݵûî»Ã£ŸøÄ'¾õ­oÁ».-V®z{{‰sõºë®›6mZ"‘8øàƒŸ~úiŒN¡gÅÓúk6\Võwx§~,úD¬ÝJ—­x5¤ÑEDªhɱ¬Wrø›©Æ¬º¶L&ãÂÊ÷÷Bº\Ç£¥Ü©0¼Yþ£ƒF@# qGÀ4õþûï_²dÉgœ±bÅŠý÷ßÿˆ#ŽÈf³pp__Ÿmah¸ö¡‡Zºté³Ï>ûÊ+¯|ìcûò—¿üÄOp;Bs ÓV4›@6—M·YžkÞò½EjšèçìúXd©‰ß!°W.xÂ-Yö‘ûÊ’“ã&*g·%ÝX¹mà&jßÛŒ¦è,ÀC`Ü0òë%—\²Ï>ûœrÊ)³gϾð =ôÐ… Æãñ††æwQ2§ÓitÎ7Ýt—öÝwß3f{ì±èŸ÷»ßÁÓгš-žbRw÷­ ИÊ0¼òµUÒîŠ9 ÷U¢à"Ü"ÓÂ¥¡Z '¢ÑI8ÂÆZ«_™xòþKow„Õ—ËJ)9”}ÉKI’yùÜýy½ˆô=À(!0î¾ÿû¿ÿË®æ_|±ê0D;oÞ¼_ÿú×ê÷¥R Q¢eb˜S.1% gM§JPV·è£FàMˆˆææv¹'R¨G†>1iæ$Üx«RŠnµZþ»·ö¸KÎhJ¦=‘+¥T*#M°†0§òLKÀo½¾¬˜ÒŒ;^·nfÏ;ì°’.„ÊÛûßÿþeË–ýùÏ^³f Ï 9X ¸GuÓÃ?þñI|þùç_~ùåüãíííhªaè)ýTuç7>eO`Ǥ܊é”)Še‘¯º!ñn`ß¿Iªv1_øæŸ­&œÅó/h©¨ÏV ’oÉ.áЊ:<Óoáf?Q#0wÛ"ËbÆŒ™Õ¬Y³x P)4Œ94LŒªY="%3Iüâ‹/~ö³ŸU||þùçrÈ!䇡)d*>LÝç-G@ÚÒãº*•ì+UоešI¹-RyÓ…U½¦Æ¦çþãç×]u[RÄÈWnÒI(éY꜕ÔË hÔDâM—¤S5)ŽÀ¸“€a_èVY;ól˜ô•Ë.C%³ZL›,ôÏ_øÂžyæ™^x~ðÁo¸á†ÓO?t2Ôrß(¨C3¯ŒÀ­Ê'œh°Õ%µÔXé·7*AŸN.rÒùFµ_®Šˆ²c²Ø-!ò‰rÒÎgB¡Ö7ËBî C÷õþ¿FkŸN›é¤ö FˆÑ¾l»kÇû‹Ð­ëˆ’)JDB¶\akGX“ë]ѽ™¬ p¥k5âPÚVRà…ææfxa(³¨U¯ubÜ0kŠèXkk+DˆSDä]RPJÔ¯¾ú*qdÜ_üâ?þøw޹뮻Í1ǃMÖ­·ÞÊü1XDÞ(måÊ•ÊK>,»Ë.»'`ØE:GEºCW=½±2‰žj›= µÆPmP…?CÇWn/{Bú×D:éŠrµPºå²›q{ÅŒá0@09о“ÝÀTA¢Y½zµÆð†òBWW—âèYe¨É‡[θ#`¹°2ÿ¯ÿú/§tø/ùËÎ;ïLçwl¿ýöÊÌJ1ñ~ûí×ÒÒÂ%Æ/;î¸#p`~Åâ¥p_gΜ‰ÐŒ½4«•–/_NEJÀèþV|OD;BQúÒ¤@ÀAx-ãù ±•Ap^5qÆVIÙRŸ\bÍ‘ËXK¬ìXÅî iMGã1aç }\7mÇ/'º©‹|xŠ2™ÌP^˜>}:d/@F09•º´.`;FÒÅì¯ô8P@Æp}†\1°R²éAO³¸»»›œŒ_~ùË_’0gΜš÷Ê7bé’_Ý+wZø°¤d¨”‘ž™Hfš<ª®7¢S& ¡l‹ôJ¾Â]a¢<ö˜ÇÅ"+4'ÀD:5k:æcÎý|RØý^÷ŒD³^¡hnyÄßRhƒ55<$AG5qˆ€Ò¼*^@ôBíL#axaíڵР—à Nai2R§0îZ]´hN6¾úÕ¯"ûç;ßa¢÷¤“NºŒò ˆ@Ò‡v>:^{í5NŸ{î9iÓÑLNmùò+v@M'1û…ÐŒ"’±õÔ‹À¦f‹ëÁ¸#`ë*&tá`Xðßþíß~øÃâ—ƒ S¶—]v󵬦ÿ<ðÀW\,R¸>Æ‹txtt g5~A®O–-3JEAÆÜN]\÷ŠÒ—& ¤Ë´oèAÔ3¿é :|öŽpqLî ËîzuñMו;{qþÜÈRav@ŠÅÝpï;Ã6оåq (<Óÿj4ã%ÚÒtGwJò­–c ŒÌ 8"¥ rýâ+„Üüö…ƒtÍüòãÏHþÁHªï’|ÃDuª€F`#4oˆ>bxÂÏûmÓDŸŸ¯Æ iùØ@§©¢¨`nòùÔ‹O›ÞØÔßÝ…òYð±*êd_ \ðœ«à)ö2éîj¶ q§‚Þ²æëÜ­DÀI']è©ÆÛÐ@ãŽÃŒò7!7BbîÇA'}êe b~pãYW kÆêÏèÁ0TÀ­‘-z–sGJVÌ®ÿÕh4CÐðP4t|ê!`ˆÆÆV³˜‹ŠrQ”;+yɼ†È±ÊWx o¾ºR©>zÕmiQÉòxi‘šgŵ†TPã ƒ£LJƃ(ÖHy0Aÿ«Ðh6   x:6(³ÃB.6­ÉÕ˜2‘¤ Ébë,"ø¤ì\×yËÕKYš„÷«„‘$;Hªü õBÖGàà©©î³F@#°yhÞ<œt®É‹@“–è÷úR"`'ßaÝ‘X)ú¾èÄÛç+TG‹ˆaÉ ›A†z‰r¬ý&/BºgÀ¨   xT`Õ…N,Ñ>mFµØŸ°^9*ü/9õÂ¥—;ñØeç/â/ÇXý…\w°Š}9† gEÀôT%O˜.ë†j4ãm„5>žƒnE½PkyqiŠ VSÒ©$t)M§üªˆáFÒ…/YkÏö ¸· ®:ô©õ¥N§¢é—#¢Ër³¢xÂUǾ-3ï¶Ó–ì÷ ‘±hjMeµI´Ùª¥8Äk°õƒ3Áü»‰«ƒ¹ô¿€F4ë×`r!õm`A¹q‡g ¹±¶HÈ%F,®Å’Ÿ/æËeëK‘¶ïøÉ"VåBeÐá§}êŸÞóö¹©– N:mf4ÃT°W©Xiéë*nG ž\ˆéÞh4c„€&à1^W;J 0ÿ†”*é˜_<Ü`¡\ ,ˈǣ±¸ãËýý—:ÿO4‹jBälqÔ§¾m—]/ùâiñÀÛɘž„¯KB²/%¦ˆz9%é Ðhꃀ&àúà¨K/(Ö [ƒ¸F˜Ç_ÄáÀ‰É]PMgsYépnû”H‰Þ˜øòÂ%åýŒ«E5cIÖü檆—>¢Ùe!ð£VÄÔ³½ãå1ëvh&C>W“¡;ºSÞh~/1Ó’?iÇìUäÜo,._x6Ëõ—2©Æd²áoֺΤøäW›‰àá«ïtD¡Aøý+W°%ƒ‰KÇWì0gçŽj¸QaЧ<΀F`«Ð¼ÕêÆŠ€kMR–ʈJW¢\vËe&€½dJn@Ù±6'æÅßuè©;í>÷ÆK/ƒ}ÛEÄ Üé3gËÔ´ñUŒÅÀf€M–€F Nht€ÔÅŒ\áK›g$UµPwp…W)[±¨ašVÔ´¥§IñúòÜùç,lÛ{d·¸ê¬ñ4T*¶_µct`º%xì‡$Šó¥YuÞóRú/f|N6HèÏËôíf¦Zföõ¯þÆ—Ë5E¥R³LD†°û«¥Šm»1KÀÕêO¤âWÊ.‹‘6Ô¤c€F`ë§0M†ZDmâ›Ë冦÷õõ±M£JQ—ØEŽÓZ¢º¤S„Û¢Wd'íêåí@6üªô²á—ƒ*Zd‹™Ý@\tþÝÿŸåO^{·‘qbÂó™ú…¨-'Érbhºì"/üì˜ÑHÜ`ã©§î©F@#0ŠŒ;†h!]ŽòÊ—Ó08…\Ù™S˲*•J±XÌçù<І†Ò±!ž ·Fç´T*)&Q‡©†@ÑëXŽÚI[öBö<×wH¤_”MËa€æDÅg?'ne¬J´Uøàƒûí·Êäüã?úÑ`Yø˜†ÂÄP,‘üàüàakßyóæ!ÀÇÿýßÿM;ãî¡n«Á—ŒÛdmx¿ŠGàÚŽBöæÛnííï»vá ×_ôU„Y~ñ„hnh륑3–Udƒd¹Ù—«p° Ĺ û2ŠQ´€F ^Œ; ˜ŽÝ{ï½ ,xì±ÇöÜsÏo~ó›|pGGG[[Úi˜XYZ‘øÚk¯‘™SDÞ•+W’òŽw¼c=ö d<@9W±Þâ."¨r*+kÈ»v¯’§‡+G§o# ?vö5$ò¨ I6d‘(T„M4¤MKx¥¢g›‘X´×7âéH½=†XŸ¯‰Ây,fïw.»1.¼t¡©s+/º+³‘‚…hø˜°VgüeÈ?Ž #T´.Rñ¢ƒF@#0é( |ü™ñÄÀ¡vPsšœ*ŽP§ô»³³2ª¾/õ*q+ËÁŠêé§ŸþÌg>søá‡ï¶ÛnçŸþG>ò‘ùóçS,ÔÝ*‚TqUÐÜpà ëׯôÑG‰wwwÐ&‰aßuëÖ7àR ÌäWVÖªðµk×’‚¨­.Pš¾4ê SI›x¨’£$HÆW‰H¹ÔŸëëV„i±ÁA]†@IDAT>͉ ÉV„¨ kc´Ü/½æÂi‰Æ{λ 㬌ˆKÐUQîÍ‹¦¦åëVJùW€F`Ê#À²Ø—/? ûÂ|ü¡"5lV¬XAöUÒZ-}k"㎀‘;ÿøÇ?~îsŸ£çÄÁåøÀo~óôŠð¥"È:üÜsÏÝqÇ\pK‰Èå©âÓƒ¢¦M›¦ðetƒ‘JoF@ˆÎ ÙéÓ§sJâPôk%èȶD€™Wd_Ë6oYX,,kˆæ‰iÛ²+º.€F`œ"€%/¼Ë—·N|ö‰Ã u-æ”ãœ9s8¢Ž­#/Œ;F± 5îµ×^t•Á¤ÈÃ+èp`nù& \Rúç›nºiæÌ™gŸ}öêÕ«¹ÂÌ1÷°—Vž³àZd\Ф|¬º`z„cnG8æ^N±íª#ÐõG§ŒòîÆï(öÉBôz{+YTÍü}©„‹¡”ÅfGŲ°{D)‘pÎX¼øÿïßï¹ð†™"9G¤›D2!"ÕlQJÇ®øì'4Ì›†£ 4_{Å/Jƒ —ææfˆÞ…HQ(Á›”߆äë·vÝïz®zHçéySSziænkK~I¤^ˆ“ãË/¿ŒAÖ=÷ÜCÞœö€ äMiŒzòp ¤ ûáˆpLù¤lfi›S£Îó–nªL¹‘o*¸s)OX®˜kd]ì ‡æ…Ÿ-÷µE¢é$ÙO_pž¶¶4ÿâß~€ÊÙòÝ´é”û‹-uÊ”–Tø§ônlÁFó¿o¹‘úF€F`¢# è ï¿úø¯Y³š@ó ×´··#BCj N½:;î˜ ZX F”–xîܹð% z厃ž“A±/ñÇ5òqÇ›ªõH$*%þ&1Aª @"(Ô]®ÊŒ‚š’ E!Œ†1o²(¸Í@s̘‹Î"¥/*ݽ‘ÆŒ%}6ÃÉN4ÚÜZxç{>¢ðÕK®tE…—›QU¤b²Ð(mĽ‹9Q DwÄ1ER¬é3ôþFÛì)êŠ4㔬p­â¾ù½½½hRg̘A{a”¦ÄA᣺tecõ^] ÝšB¢f™ A" ¯ZµŠw(æ*X(,0¼Âþ™ P&jj%ƒãpm\Æ5P8p+ìNQÒYX\s:Mý3 ®¾m(Ù¢bJ3+Š¤Ï µ`È‘¦ 4¥ U°‘X–}휯=hÉ NòëoÅ V“Hr;_aº–´Ÿæ¤7+‚’ˆÙX37DZ(´Iþ}é ÐLu e™  pÊ ¤sU©¦¡âõb_@wðŽ;îˆÎÒ5k,HWÿþ÷¿£…š ¿„âEâȯ˖-»âŠ+ m®BÒ\£š¬üÆ× ©º¦[fDCáʦœÒ¸Q |¸KIÆ$BÕo,D§l3°À‚D¡Ý…ea s+Ã6ŠU°ÎbenÝ-·ß–_úà¾r?A–*°®!\+ˆ `Ç…ÕÔ(Œ.¹ž×¯-[ÎQ?ÚmöuEñŒÆV5=(/¨S(‚@´SšgÔ¢ŠêŠ㎀wÚi'¦¾Ÿyæ™<—göâ‹/~ò“Ÿ„Y¡UTÓ01( ³òýý÷ÿw2rÈ!ŒJ¸D€°Q#ÃÜÃ=lH—|ä\u ûrKÍ€Ki³G òáÊ×éõEDçúÎÖæ¶|)×OšpoÙ5“yÓ\qϾ|u^åÕgï~Š·¹Q®D‚dÙ>Á GRµ\ø+åbu`gì§E:\CŒH­I¸¾ÏK—¦˜ˆ0<´Ù52†û* ­îf¹ãN@ü¿ä’KðÂqùå—Óç¥K—þä'?ùüç?*S)A )ŒS@ ¡–#§*¹*;ç¡€êøE€™–™Ím½Ùžd+õ²ÒHsd¶_Ј¼.ÊdîB±ŒFz„fÕTШÈ_;á}iÌ€P™ËõÜU}ݱ–¶œ0׈üéW·¾»û€½÷?õSG΋7µ³÷‘+üBAZÓ9/“À®'¼P¾eØ‘2°'ºWû­&vÐçœxé-‘n°xMFZ4>fÖk4ãEŠ˜êÞ¤q7Lok Š ^V_è6 cl P8+†}¹ŠÏ,Ä_>¿èŸ•\w¤tcƒ@Õ«š~Ž ¡[¦²ðË~ÄjHgÚÌØS7~«âöδqí,ÕÌùÞžd$Æö sºžm¡€6؉ý±Æ2Ù¸µÙ„§?û©Szó+ªyÏi¶´+ʱy¦ºV€F D`Ü0­‚A™âE“ õµÐ*²¯šâ­‰¶¤(s*µ*KùÖ€}áo‚~¸“î( êtŠâI‹Ïš1göÕ§]ˆ|«Hbe×ÛWv«Ža""'››„øÅ‚‘I(g8ï° l¨ÑIû–tä5—´úom™óäswÈÂ\”Ü­ƒF@# ÆÝ0Œ ³*+ež£ÄÄ\âhŒ‰p$N6Œ¢!lôÏêáfµ*‰¸¿g™Ý»4»¾géi‹pu¶N%KyxnË´¸å؆Õ×Û+W)YÂL' ÂwÕŠ%å3ɾŽA´¼/Üãˆ×_[$E¶Ú%‘‘;[ê ÐhÆq'CºLâÂÁà•¯|Q8«»P/ò®ºª¨!Ë,eŽP5‘š <6 êZë‡À§o;Õì/Í6“OÞþ­¦1û˜˜hm/ôfñÖ Áæ³Ù†¦FÞ «H”¹`ˆX¾Ö˜\ ,2Âó$»‡4ì‰X,ån4©EDQuýZ«KÒh4›À¸#`Ùš ³r¾¡È´ê,«"*¸Ê¦µ Zá°íŽ!Ï)¶8*V ÅO."|òSZU­N¼Z-'œ(YJ¹| ‹9.ó3Ew±ßH¤N»„×!¼ã¾6ÿäζsÏ=·¥¥…aÖ†÷Á‰ÆŒêc²AFЂ(×p U›í‚+ö‡ ËÓPGçcÂuÜTkÕÊÅÏÚåâ·%·³–ÙÖœL)^\/ºšÅœm‡•®I# Ð A`Üð¶éèD@@N ú©R†$S•¬: æäyÕ`\+e"Ìë—Kñhl€} &p½žRÁJ¤û‰ ë+‹Ž›¹ûNÛ™ÉÓN; öå¥!2Hµªì7Câ7¥‹·°ZNe½´Lúß„¹äñ¯àвØ/ f-p™¥ƒF@# #4ð“¦Z˜6$9Îc½Ð ¼KÿH†%*ù7s;Ë=Mé&ÛŽä‹Ùj±œÉ43dÒ§{1­•ÓŸÑhÅn<ú¼¸°gJO”Ò¾æªIÍ8`(ÊAmƒ–­¦u5m@`ÆÚ Ø]IûáØ€:I# ØVhÞVHOÖzÔJ½°›"]úJ*„€¥M2¿ÁМnŸw¾?×ÔØD†\©hÆ"^2Ù+ª®p›ÿ¹¶öö9 ­_;ÿvaH ‡I}d_& ”à‹€šï,ï ÿ°*õm `Ú@E1'ÍÎ…RqÝ#D›H7¹Àzì7”¢4À¨#  xÔ!žÜtb.Ê\dUé]Yýéòp`ËU?¤õuáØ¹%°#"[ùöD|¶÷í”ÃWÞpñ¬hæ‘Ko-–ú°·òE¤¿šK;)¦~•‰»Ú,rè”ÿ&°•K(–Ë“¡&Z æîjQò+‚  iÚg#Z–ÒA# ÐŒ š€Ç÷IS«Òâ*’ƒ÷س+ÔCÒN·7µÁ‹‘D,'¼þ©Ìä+Áº¯Þôõ¾Îî[®¼nŽÑ„Oôt4…«|¥Ôgå\lƵ³´´ m­pˆV³Å¨hÃ?T…yn8’ä+NÜv»(9L(q [W™nÖ³À Ó1€F`›"  x›Â=ù*c zT?)p*…ï ïJ«0¯šLîfwYLoý}ç²{ž~bùšU–ë·'î»ênff+^>j%{²½ÓZšc©JO¿›Ž«_W±/§¬Iž€¥N{¤ê[… *÷.HÏV8,Ò4K:x‘γtÐh4c„€&à1~²TË®÷ÿBÆ% ¬ɰ!W[Úò¢l_u÷Ýw¿Ö½îÞïe¢·*rÉìXµ‚Š_jmh‘T]ñ"É´4âBì ÈÔȾÄGÔBKqyûRwEøŽÈ‹ùÇ|}UÏkr§$oU3Ü‘P–®ƒF@#  4ê“©Î\ ßJÒ•‚0N/ Q¨–=VYÑžR6ËÀ¾kzÖLkš‘ór_ºàÌ&ö,êÉ?rë}°,<Ýåå¬Ë”07[Q·£äh Vͽ(‚ï𲯂5”Æ ÑÓUnjŠJæ/WD5ɤ𺵽ßýéÃÒ ¥ÉNIÑJïâLë ÐhÆMÀcƒûä©ö-•üÀ7 ¿\éëÏ6µµF(„Šà™Še˜ú͉.šÑ—-+³ÝŒËN¿h†ˆÁ®¯ýõ¯»íºÛt+••h8»rCIú- ÄB##ð,ÓD¥(,GlAH¨"õ‚-#EQŒ ÃLƢп€F`¬ÐíöÅ=vÞGªÂÉ ó˜ÞJî”7©D_Öh4£„Àx$`ô”ô*Uì‹°»råÊyóæ 6…U۳õmmmdëïïg• â/>†}IÌçóðñ(A6™ŠU¤ÊÝŠV4šì_Û•nkET]×ÓÝØÔUåz{Ø@°h9=¢ZÎY·^˽ëÖ­Ûc—]<ô°é¢7)%Òž_´Íì+çq‡ŠªœJþÛâ°‰›jI¡È–¸¡Q¹v).í§·»Än»¾í–‡Ï‘’.¯m’$¬ ¯Â®×ŠÚâvé4ÀÖ"P±dk[„?ÂXìñÇï{ß‹\»÷Þ{ÿéO‚})&×­X|ú‰“òØctÒId{×»Þuë­·’‚XÌU;@±ù—@½òXñ«½ýéæV‘-À[ÓZÖ®Z ¿ÙMÝB¬âô.9jáñ}…Ü-—Ýòí;¾µèÔ3÷™¾»!ÊžŸgÜ”4Q.~¥2° °¡•ÆµÈæ·l¸œPïöý‡LVÕMú¶Y çD./?£‚å^ORæ‡êyØBþ¡D}¢ÐhFq'CŸO<ñÄ—¿üåxàCúÐ7¾ñ}÷Ý÷•W^Ù~ûíÑ?#ø2 ã²1ÎóÏ??þük®¹†œ¿þõ¯<ð@Dá³Ï>»··—¹ášÖztp›ð¥¾Qöƒ}ebÁI§±O~jÞWåß@ºLâÊRU C K¸qdßb‡H¤Ä—½ÊÅã3Òsáðʬ®oRw¸‚åÀ’ŒuÐh4c€€L9ÕŽXå.»ìòö·¿ýÉ'Ÿ„ka܃>˜IßGyDݤÔËðôA„úÁTÆÇü/ùËÿüÏÿäÔ÷}î®’ÚU¦Š)¿v:\þ‰þÆÇ;2–EÏåX5ÔÚRÌçàa+Ópä¢S:¢^|zËõ'_ƒ‚;‘l Íðƒ²itvwLoiGÂÌöô&£1Û0C3¬AØ«ã`Úü;´ý!+³¬7”0ð¸þp'`ן?èÒ'~¶¤”±Yx¼ÄΠñÿl“5ÔCÀè¦õRà7`©4AMÀ£aZ4î$àµk×þßÿý‚/JfÄ\úŒ¼téR¦x™†,•zù/ùËo~ó›ßþö·Ð-·LŸ>ý¾ûîcJR‘ñì;ˆê”ùwDºÊn ‘b1IDS±Ôß»×D[fd…8æò¯L›ÝúØü+,Qj¢/Wʤ0Ñåî¾X2‰-©Œ|ÑÐÐ(eR?Ð,ö1¥ˆZãÝpW`rÉyØ- ª‰ƒ½€}7Ì÷Êr$ï†Ü‹÷è00ûK£“â˜^¶¶c%õŦ‹b¡Ð4‹¡*îîîn:¦Ä¹…«ÜË Q›rÕ3AÓõÂ;=ù,»Ò¯êZ›nh"1W“ìªz…sä5§œqó¹l"ôôU4Õd.ßàÇHÞ¤Çð§Š’¤û?óƒÔk?ª#3§o‘ïdù04˜ ž}±inH'+}¢!‡z˜8üó |ã'·HËg[ôu—ú`eJøìÄæÐ˜·Øž¡ Ñq€F`â#©)ÖP"¥â؇yOØ"€ãІë(› KÀp<ôôÓO³Ðö™gžqç#ù\õ•¯|…ùWNÁœÖ0R€)•LY—§ 3C‡Ð*qDaNi̬Y³ ÎÙ³g« uQéþûïe‚ §ïÿû÷»ß)È ÏáCæ+VÀÓ”£ï¾ûîtŠ8Ujm2q/wµ"y¸¢&hº¢Ãt2S~këtÆ;Ù ÔÜÒ)å‚Ò¡WM&®?ù¼ÇϹrž³*ÑD$Yéì’µ…ù¸è‹Š~Cº–šÎ´êÁZ1®"fÒãªê¸[Ñ´,g‹BÈÿx‡|iq´ÑßS€Ø+½r×#éÁ’ÅÛ÷ÙóÖ'—HÁ·,ªårÃÌXCÔ””Õü¤ÛiJc‚MþUé3€F` €8·zõjx—Ï>*UÅ °â¼ gÂwÓŒþëɰ¬hŸŠ L¯b“ü?ÿó?>úè²eËÞóž÷àòâ¶Ûnc^¦M´¾^ ¢d(ö…ƒé<å¿øâ‹L S>â8G#\:âˆ#8ÂÐ4ˆÊpõÚž3gµ(ÙšÁò4È2ð¡p³òïAi” ”Ã5áÒ¡E‡D\„?Ñçç-ƒµEægOúâEW.™i¸óŒköˆÌ˜k*­ë–^° …È4œrÈ 9µÂ–r,þÇåcÈdêHBí§ÀQ¤;(3Kɹ.ÙwΙLÂí^=¨µ+Ž8àÄ?þíwÒ®™÷ºQ8‡UÍ(ù{‘TK+e¤.š°¯7ìë_—fêB4‰,;sæLÈ•Šî”@Œæµµµ~}ê(þÍH_ ¥€å¨"'#bþô§?…›ñ¿ímoc”eÖ æ]wÝ1+h ¤X À,ë_ÿõ_|i G¥1F–ýö·¿ ^ä„Pö³Ÿá9kÏ=÷äT%n²IHº4ÌŠl yƒ5ígDÃØ¾‡›á~n$Zê‹õ&Û³!Æ0ÀAD9æÜ"¶H0ΞϼìüOŸpÄ´ys\¼øW ni_߲ȓ±i-ýÕ\ИÈå‚%·3àuAðm?Ý’9RVt[;*Þ…ëÔ¿ÔòoqP䮎òfÚ@kzYQ„ì˨l­\LtÒǘ%ç»/Ý#•ÏQ±jåjÏ2MÛÂ{ k‚e  7*<|bAÑrü »¯ƒF@#0倠ä=5AÉ÷Æ à˜JBH$RP¯*V®fPÑ&TG: b©Ï‘G©„îwÞù;ßùLÌ%¨ñþûïgÍî&oˉôó®»îBeéUœþùôóÏþ3B‡´‡G³Ï>Ë%|f‘Ž/âÌsÒ›Y;·ÜUþU«V©ÚƒUdÙõGý0x¨Å ÈceÅõ«÷„ë/þ—“¿d¯Áß¹ÞSÀ•3w¬XßAÎBàóë*å²á]ò P¿ÁÕ™:Öj¬s°vu.ŸÞ,Å~U~¾üy”$ÛG}¥ ·Ì—=é æ¿wñ§f¬ »×xE™…Öf½b)¨p#/MP‘·ÈTW^e‰sN«ƒF@# Ø€Ä RœJR4A|Íš5*bR‘­?J.°í1¨Éòâ‹/Æð˜œ°-SôL;¡+‡+ä­¥£ßF¨Eä?à€þð‡?Pˆªñ¬³Î¢=¡N‘¿•¿è¹sçÞrË-dS”‰;\½hži6]àXC¶–ij甫UKíꄌÐ؆_ÈY°c Øt}tþGN=ú3Ÿ ûþ5(½ %“ ²ªVyœä\ÓßCfh˜¸,€ÿ¹ ©0Œ„%«²kGY *‰<µ[Ôƒ×7û_u›<*–7båܶ¬?øÀÌO|f—“NÞç"ɾ¯c&/X·®Ÿ6ÓMÚ_r¥JWàd78/01Ê¥l@):h4SÔ¢°ÂG‚‚I—‡4L\Q9ëÈwÃz‚{·-ZôÑ~ôƒü \EàÓ )Ò¹´‰)XÚ‡“ºZ—£ªW¢g¦çTA¥t[*D¨]µgè-d†\Q טZƒ¹mƒ eDFgéS¿(¸̓Úõa¸r&L:zWB¨FãÊOZ qÔ©ÇÆÒɯ_wkVä#‚MúÌ’(Û™ÿ;Böªï–MU8UQM £hy›Re+-pXp0d^wà=Py¸JDýjq"[¬†¦½!,XRIe²êIYœpè9÷>u£\XD ïÅþ\)žjZ×SJ7ÅÐ~Š«[ã áÛ¢šè‚#üñ7ß|3Œ¨x³ae¸TÇfjQ kªhzQ;ÕJǪð P6¢NÕ2°C2*ì¦Pø[$qÔåÇ}À™¾ô¤s[Ò-ÒísCœyïl!›IdFÛ3r Í¥ aG’¨¢jx[ú×K …4ãÒG닞ÈÿÄE«+Úóý¢)iàä¹)^,䬋޽§«Ø×÷Î1ç1ŽèÌŠ3ÄÞcµ®W# ˜|(šPbgÝ{ÇglÓYÉ¢½óÎ;‰0: °/¹_xáô½°/Â8—`_ˆyÓ¥èÔ±FÀ`oGüUò>~KÅD$Q•D,~úüÓ”ˆ+ÕFõpG¿½¼B¡Ô åJ4+åU$]] eé˜MB"Ψ¦´Qé8»”ÄŒíUVœqô¥kû_½ç…sÌé’×uõ&ŬÑo¿®A# ÐÔa ˜ÙPÈ3cœL¡žåë¬ßT‹ÅÓ=÷܃Žò–K¤Ôq]T}º¥K©!àIn<3q}@^,„mH¦fgfÊ,¬éJ,!³È0 “Ön«€Â·.äW•N6Ø& £ˆá82¶ô泂HÌìËލºý])Œ—Y…¨\§}SwµðÀﯖ.׳Ò;'‚i£Ñb]¦F@#  †%`ì‘qYƒ‹,¨—º‚™‹…•ÿùŸÿ8šqâðôh4K—Y/P°S”m¡×¹bNðúµlKP`¯\fTÃ!û'Ç’l6Ê¡¦y–Úf£‘æH»j½Rí¬Û £yNWsHíÒëÝ<ñÅ_Óï—ýõ nµ"ÚŠýùå¸ÇŒžlŽËFùYèâ51E ö±Û¸X!‘´×^{=÷Üs¸ð.æW¤ ò²â2ƈ ’æ”ybN7¾_Ÿ̈|j<ž|©è$Ù“€‚bM‰”-,ì s †Èæså*X°ñ¨Ä]ù“‚8t õ*ßU¶O‚šå~ á– ¾p}QöÉPˆí1§AtÚbïŸ8åW–Åú¿¿wWv»ó²éÆ–rIùâõè 4@‚Ñ&„pð‡?üa6½ÿÒ—¾ÄR`D^‚° àµ×^»dÉe™Ìµr‰wŽM–£ÇìúÂ,|' ×ZH=¢ÓÍÑï ›¤•rÀ%Ë–³û¾çZ¡¬¼MÚ á2 E¼„RLß(("–;b¬¼\ÄqÿÑ Îþä•«;–=ó§¥Ú9çV§Ñ–[-GQ¥kS„@Ô§À8F`XF· _wÝu4~ñâÅ0.Šhd&ÔÎ1vќ¾\eã„í·ß~÷qJ7ͰpÅî®+ ÅªrçÒ›®kI¤’ªí"¶‘·Ru0xÕ ¿a Üòý 䑱MУ(6¯š¯Þ,“jÙù”æÆ¶†ë™—”FXÑrw®×Œ4]«)a'ìõ¯¿Þ<{š”‘uÐh4a—!©Æ£g†‰Yƒô§?ý SxäÚ) \%»hÄßÚÊÚ‰Ð_ÙÆÚº£©° I-\£ËUÓè3®Ç]rê}—ßÞÎN}ˆ¿U-‚ÊRìš/áD¥vxTÃÀÒ#Hñ—!çTɶF6»ÉšÙàa  ’ˆõ!5WÄÉŸúÚß;[æEÒuÊ"Š/J\i²`IŠÈÕrOV„›NÛ¨6_®ÐL)FuÒ›ˆ;ÌûbÿŒµ3[!AÆ4EM÷®[·nÚ´ijÇtѸÇaÿƒ)õ´Æ[g-Ç–k¸£ÈŒÒ|…¯éú‰Rö$`wr -ìh„sÃÄQ=´I"uäb_ ¿\ÎÇ¢RߥVöZàU“íÉ‹>´0Òï̯“ÂmLtuõµÎj>FWBºáŒ¥SQû¾èרvA®ÐhêÀ°ŒÄ&|Eáˆã‰'ž€_Õ\/ò.êL̯¾ÿýïã4ŠÏ¥bèz4F—Q ËŠÙP^¿$‚ã/9-:”€ÃÍùT­™T:ƒAÀÔ•ä*EÜA3‰¯Z¾lÖܹ ™)cžÝâŒ.M%Z¾þðÁ²)G¸¶ß4»!›­d’™MÝn³ŠÉ ¥épR$LÖ€F@#0ÎÖzê…€O<ñÄ«®º +–$A·Ì ã‘‹-ˆs•¾‘MÍó~NÝæ^ ‹bö´ìDìþëï.–²’oH‚Š%#޾ˆ d9ð8S=¿Àœ5g{QòD9X¿¼ÇB6®ŠOþË™±xÃ×] Ð+ÓâX¹bçÖ²‰L$¼EÝHaèz¬J{.4ÀÄ@`ØS¼ëÃ?üØc}æ3Ÿ¡7ìsP3ufKå|ö%(G•£ÇS¬•åJ9‰ù.»ß:%á¯X·Åns,ƒÆV… ÝÁÄщ1_ø«Õ¥ZP5…õúÊV*Ò<=‚®üèCYeçúïž(§†yO±|Íkí3fd‰U«;æ¶3‹JÏRr§@rp”':h4 À°Œ’™0ïû±} a—©_Å¸èŸ ˜C+å³ò…ßÊ ÑÛ)ØÈH$ÖÕÝÕÖЊ†ö‚[.lžÞ.gVÙÛÒŠHy7´pBþˆk¤8jHù"n*¦ èR±/6ÙYÏNcbA´ýÂA'­]Ù;sÚ¼§ŸYÊÒ#É­ü\1wÆ®UQ)— ÛÍl–7ªÛ%ç:tÁÆŽL¦ê Ðh& ›l±€‡ Ü`±é¯2¼Â ‹h˜YahnFVfbxÓ¥èÔ±F ì–[[ä†?+֮Ȗ K~MNÀ²êʤ¿mÛÆ¸/]NÊ«êÄ:fI#1„ݵ˜‰¥žñ[üèz¹Ø7C2—ó¾p°S‰$ ̯ð\Y,¹Ž •H÷;2y¿€F`b 0¬¬68š3gΙgžÉ$òÌ, в˜X7cu\¤">ûBG䦿´•¹vG²ólTÌsÛRÛÀµ%]@hÖA# ÐÔ E£´á°°t,¤«ö;BùÌD/ŒK¢2¹¢AÄa_ÜD«4(žn.p]¡Ú˜w%‚‘µ[lž§²„—eA »ÏÍú¶aÉ=Ž,ê) ¯ ¯ÏZc[y÷lé ÚÇpùöíû,}l¾À² iÁ×,N™©áirùQ(#"§6´ÊIi]K ÝfXk´Žh4Sa X±,ü§˜èßÿþ÷{ï½7)HÃ`Ƹ€#§ˆ¿u$`ÄS<{p¤dØ8jÇ$º…Y¡[âê¡ÑÕ Å»1#†ÖÖV5z˜Òv°ó>Ûý&c­¢•!ËÉÇŸÌ“‚àäàiØÙÿÁ;7ï_ÄQÞƒ°0xW)‚¹Óv¤Z(¥ã (šu{+ÙdÄ–|Ùç‹D¸ØŸ}ï±3ZæövçššÛnøú|Ñ"ÝnTLi1+Z%SJæ:h4ɉÀ°8x2ÃÒê‘G¹ì²Ë^~ùe&€×®]ËRà“O>ùÒK/…!i¤RÔÔCÅÓ­Ä Æ%@«Ö¥8X±/ÊpÖAq•t¸V†háÒ•hΑÆle&Ùí¯«1‹?·}®ï#¢ 9@ çS·¾³ —Ž0m,¡ªr#½,Ù-€#jˆSŸ~aÃÌN¾fÁe ø!ÇòBd«"‹éW“ü•³=²fö5zY®Ô=ô} þõ½g¾}¯}¿ñÃ[¤ ÙõëD+Âo™]ªn_±Ôo›Ìp˜~¡ž¯V}ú«KÑh4uB`X+hU>º_¦®®.&VÛÛÛÙiÆŒ\BäE&¢ÔÅð%zàº4 ºE¦@äo¥mf(@@%>ÔÄõ’fp•½ŠUíÄa%מZk!u2×N‡Ë?±ÓÝ Çö– û¬ëÏ~bÁµÓ„Üñ€‘K,œ…-ÕÖC¨z×m±Î7`ƒ%Þî†y£A`V ¹¨©'ç5¥¬ÀÍGʾe¤E¿ë¥t{è!56¥þÑ"²WóÂaÞ—ûl$åj¹”‹Æ˜Ý·üÀ6(5Ã03´×žØ@·^# ˜°(š€)”Xß~ «âƒÉ 7ÂSO=u衇BŠÝÝݰ/ùä“O’o!KqŠ$ª¦`ëÒ2JF¦(TÍÈ»Lú"ò*©W¹Ät¦b\5K­ê…žÉõR¯e®K«&p!†zž½½½D*ÕŠíDÍÔkØ5"ð—ZÒ«“¶Fb¦ßœb»_ÑÑáÍnɈnqöç.ëþëºw½mŸdÚ|ø{‹˜åE&v5Y"_êI¤œ\5ë.êgV*›ØMû¢Ð_I¤´eó~õtÓ5‘Vl…À Øùóçãú†nøË_þÂéßÿþ÷xàüóÏ¿ð ûBu°/—F®f‹®B«>øà~ûíÇ,ïÇ?þñýèGª|ũԨÔÎ?ÿùÏgΜ ݪ€°~Ì1ÇtttŒÆ8e‹Ú?¾2£œ@A,¤goG8'Ò—E\·àK7 ã ]4«qƒ"fÖQ˜9/f7fÐ<ýÉEÝ=Ù‡^ºãôûOxâù+äâ!î­å ¥r4•ê*d#N&ofwâl±T.³¤WÄÐYfÝZª Òh4ã a%`4Ï|²-Z„*øŠ+®X¼x1 ß}÷Ý¡·OúÓ]t§©Êüª^úg… ÓÌ ,`ˆ=÷Üó›ßüæÁ ­â™K)˜âU{B@Ã(¢W­Z…\s/”‰Ö0ÈŒH"µ¡r<%Àñª(âý“Á¦?*^ѶlžsÀ®g)ƒ‚zë~ND¢«§¯¥%‰T+DzŽ`vFF9]'}òZß yáFé"£iPÓÍU4Ï–ˆÇ¢ÌQ§ÓÑ7+OÅå_ôÏÒ¯žÞú'¤KÐhÞ ¨„¿²ã`%È©9Jî®Éu8iVn"߬Èͺþ&sÀ”3­^½úø”ŒNö¥MulÁFÍìééùÄ'>1{öìG}DÀ»kµ uwmr—6 %/Y²äµ×^ƒD!Kð,±)_ɦÔK£^x‰ì°/mPì;2k2Œ ìK6 „¼é&Joéœ*da_X™xmûÅ­ìÔXÞÎz-«±;U໑…=Žíð,ë5ì[ylôÚ[ÛãNŠi]'—=RÃüé=î¸ã´{~µH®D ÐR£v0©wÆw<Ë‹çÒhZ¾¡ì+ç…%פ^©ÔÐA# ÐŒ:ó"†É¥"!ñÁ/ðÆFÒ\fÐÁ¢b_¥Ž­Wƒ†%`ª§MG}4MŸ :F0"Müë_ÿúÎw¾“¥ÀJ†ÉÈ\¯!ÑRà^{íE.\Ë+%§2*>I¡Æ?ÿùÏìÁn‰?þñ÷ÙgŸ~ðƒW]ué@6€½:½ ø 3hÒ#ƪýT èÐ3ƒªå׫kcRN cõ&iÔ¦ÀˆžE& mëÒSü9Ç¥—æœ8æCK,7ãVÜß¾Ãu÷}Eº‡Z g,@rŠý¥t‚EÂU?Iº´CY@½œlT¬Ø×àŠbçº4V¢Ðh6’¤ü6Ú6¼‰À )S_xBXİFyaXF4¤&¦Wá$®%BøíoKwØa>èJH™ð6ÑדP##Ѫ‚ôé½ôßþö·=ö؃u7U/_¾œÁ+’±Òzâ‰'Ž<òH„u&­iÞÈVÙô r¥ýŒ0`_ D#­´ a_*%…0bK'ÆÅX&Ãäöš` oFXÉ„Ü:Wr/Êßú„ e7ˆ^©G>åÀ›Eoüþßœ-µÇMbåŠU³g†[*ºnŽáNcZ'©v•Bp¨†–°B©Wq®<ˆáVZžé ÐhFE7Hh|ÿÕÇÍš5Кgï'ôïÔ¤µº´gXF脨n¿ýv¥›U(UBoø¡¤MŠ ¡À_Ö¥AÐ9BÔF‡abàðåöÛoO ‰Ð0˜+èú§RÙ?üð›nº c±÷¼ç=ˆÂ#´©‚‰0Òƒû@ºêÔ`M ‘®ÑYEÌ#8þ/ue»îûÖ}tgmßÚH$Ã.åR9éT]BG›T${â˜÷]Ö¹¶ç»Iî¯0ƒ} ³÷h-¹ª+’ñT1š^—™'ðã ó’j7bY™ªDb®iñ·.I¢ÐŒ„€r2¡xo>‹6Yƒ£œ^Àè¥!ˆ6„88ªéΑJܼkÃ~à 95rUC~D>ûÙÏ.[¶ ½4JiEÀdÛ¼êÞ<@À²* cçÌ)çHE¸!¤æÍ›§² ­bÌÂd¤¹(þVW7:.ãÊah¬°;UHg«c†?.·AAEÝ>Oé)CD€Âu(/ÎUÅ p ñz±/½Væ$§ú¯Fƒ!¹ßüæ7HâJ‡Æø sµ.`í¸ãŽèœ!ÝY³fÁ‚t•ÅÇháé6Á”@@€þ¡ ãeFŠ40J¨9Æzc“ "EÒ\‚Ô)\Íd3Ú X5ðá’’ŒÕä…L¤ü¥8$ûícÛÓlã†#Q?G‹÷ú£ˆµÞý‡#EFTûû"sàٴ̦ˆ§Rik O4㋃6! $õ2Ëë³ðÀËѪÕÃjÔÅò#ä¼–¸÷˜H0ë¶j4T¼5=(/¨S(‚€\Ù)“nÖK†‰€áª¥K—¬0üÕÁ¾Y›{ÜqÇA~5Ò­E¶þì´ÓNÐç3Ï7ç´Ów¸æÚ=~` @6èmJŒÌ&.\ºå€F`(Š&Ó M¯K|X>‡¡}(µ³¢=dGhO­²¢z¾¹P RP¿EþÃmˆžGuë¡a¬½ÞûÞ÷þþ÷¿g•aJ,­Î=÷\>ñ¯xãBG}â‰'¸è¨Ù0‘ÉiZˆ…69ßPðM@Î}eÙߣ"Þ××Ûj.óF:eãC@!–"‚†|F3ñZ0;ãÍ#°Ò±Œ_ù~¯©1-·dC_,<øæ5Ë»gÌÞéë?<³Â\í 1;:[![SÑðbLQ¬u·5À– ð&ž°5•e2]Ì•ƒŽZE5¥n-å-GàNäl(–ˆ2¼bI h˜•.ž…È»í¶UpÊç½±j@ÍTqn® µÖRµÔN‡Ë?¡Ó&ü‹®:íg_±S¼…éØlÉnŠU‘B QJö=C=YrPP–LjðbQìdEõp¸Ý¦Q'í~2Úúµï,,‡K 7…½sÖÌ™5¨Õ¸G¡ÊqB㦯Ðh@@}Ð1Õa%`(b;äC”¸‰¤‹$Žý0JpæM:4‘†áEVs©^-£·ÊŠ¡^V_j…#|øŠ}‘Î•Ô…Ô :ÌR3e ›rZ£„ÚS6’¹t2Ó–hA~uK"ÝC”̓xB;É)u'ÌCW0‘*¹M¹|ok£U‘žÅì!¸ð7d——¢Nb×Ýö<÷î£Ä,¬×D_9—)ÇÏÄÇ9ÀÎË Ge²ÑœÊ”Å_w\# Ð ‡À° ãÂj¸šúþ÷¿Oªc­í¯~õ«_ÿú×ø…Æ=$Òð.»ìÂÊŸ‘ÅÍá*!c+%ËB½Ð-"8wŽ«²GM O+ù˜rT:«•y340BáSíRƒÈ¬y}5½îë-6¤â[¡9N%¤Ì+ ¯à`üäÉe? ìØ©ÆÖ\O®!‘b+…ã?rš¹¾ážß^EV—å`°o\xV1aÛÅB5“¦ïêañ8xœòÈ&Çò­lô?€F`tü¿¡täKE~pÆM¾ðñ)§œÝ^{íµØ'3«Vû0ü†»ßz•ò)‡q)B5€ˆpU¡Ê1Gä]¤sˆ™€^S#„ÁÁõ7W{ë}Ë;qÓ‰4ÇHÔDý@”|¬š#èžV[¸i–~"…Ë"!¿+4Æ™µ)äásº*(Dïyé*¹`TØÛ‰ "dô®¨´Ìjp¢"××oFåÎ!ª‡ÊZ7EÅXöY×­Ðh&o2 ™aŒçgú)*¡“)Xö%TÊ^DR.aU¯µÉ ­* š§|U/ŒÔ;t/zN• žFC8ް«ÌÃáOë <¸@IDATJg®”ûµÓáòOèôý—. :zþýú»3ð­!zò½8)‰rFģ˜¦‡HÄøÃ ‘±ðK×WrA){ïÏ¥ì+šüžJg"’*÷‹L")ó0@âÉûžˆÌõ» ‰x"µç2¡qÓ×h4  hbdNyË@ +«Qöbè¤â°/ÔÈGX¨¦aG5ÏWß.ÔXS`*vW¬ 4CÉܪ=µl0.ìK"‹—Ô%•YŧΑAÇ•è_{] ˹ýú»€ÝJÑ÷J©H¿( ÐZ¢È:_¯‰Få¾÷•ÞÐÞŠùáñÅwž>¿î;爙ro_h–uÂM‘fÜN›¯|ü{±Ü µy÷Ús¼¢ÿÕh4M 0,+õ/»ÿ²¶çÖ[oÝwß}±ºbžõ¹çžÃåòÂ… ùÐÃÍp¤þàn×mžÄxˆ‘ cU3o@?o™áú±QAK ·Zô²¸‹‹dÌaÔâ:³½ÄÙª8òã—ì¶zæÛöØsÑÒD«Èz%ÃÁ—>;Ð@³£¯°¤—æPáŒÙŶ¾Û¼ŸºB€F@#0Y–€•€ ×"’qÄô—ƒ}Q5ŸwÞyÇs ‚Ò ë9×1ÐL(öe:€xm <_Ê'…%S¼±fÂDæEÿ̤¯ƒO«¾¿j=qø—EÍÔ¥/ž,¥[~‘p}{Zª‚mVè ?V1~8“4=«aDj†\cn€F@# ˜PŒ4̬žRÿ"Ka‡Å\) “öߌ‰ó‰WR²ÊVûâÿî×&}•r¿v:þ[þ¦-dØ„òYiƒU¿v¾ê”'/ºc!¹¬\©ÉA—•[“M56{ë„Õ,>õѳS‰V«ùÆÃçKµs’nH´l˜d ¶3bÕ/&Ò¦ôì, µd3 )CÜQþÓA# ÐLRMÔ&õêÛËa%`ªQßqÌ¡!×ýöÛU¹XD#`!È LŸ¼&ûÖ¾qUŒËt€z4<&†MŒŸL6Ž~ET3(œålCTúD*ÝŒ‡ «A|áC'û¼÷þ‹ävSQŠt™mq+Hbíu+!œHhrõ«ZÀ ý0„‰à‹—=d%Ó¸‚B7F# ÐŒ$n2 KaTÌ7ýÙgŸ}Ç;ÞÁÂß<œ^±‰«Ä1}êììäˆ(¼ÉBtâ6C€GÀD€b_¬±8å¹0v3¢^Q}Çr7"œ›HÒ¿U¯8òŸO<|ß/åûrß}îöæk̳ÄHt¯-®È¹ *,< 07Ì;Âö lY„Gp9ñË– 怼ͺ§+Òh4“a ‰£âG}ô _øjgÜ2#]‘xöÙgßxã÷ÝwP ca– IóÅŸtÈL°¡‡àA¨F£™P–qÍù^®ÜHhB»JÅqE^–;þíºÝžOýâág~õˆÔ'³–zNŠ&1{Z|ŽÉ³¹J4×gu¹ñ²h±$óºÑ˜OzñdÀ,²áÓÀ &Ý\€F@#0nilzóÍ7c„…hÅ'^IºÿñÿÖK/½4˶2•’'ÙpÍ…'ú ¬Óq&úÕ¯~õÛsVà$;ªsßîéÉ;µ»Ze$ŒQ$?A€00ÿ`2<# €#Àd‹G”E2"?žÑBB! œ7§ÉáýoŸÙ«ÖîÎ섞™îٺݭ®[ᜯÂW§ªnÝ \ •ÑÁ’ÕáÑ®<~º „ÀâD`ÁÖ€cGÏ_üÅ_0pƒ€àG<â7Ýt±üDÉÁÊ‹ûÔÊ.¨RN…®ü8«|csþ·Ó÷–.͆l¹³Ò¾#»mÕª_¾õw¯ú¸oôv´uô–‡²•;»†—g·g·¶|ôçgfK†³– (uk¶Üî¬e> <ܱf96f›¤©:ÃÚ%t|÷e˜œ®„@B !˜!NAK/εðY@ó™ÈØ¡1Éyî¹çz“œ¬d;}LMÏ0ÿmŽ(e¥ŽöœSÛÛZ{‡ús ¸'Û´eãg›’O$ } 0Ù.h´ú×ý×f›¯ºê*+Á6ÖúðÑÕW_}É%—¼èE/’0ŸØ)ím`ŸHÚGVéñB `ÔÍSNkniÍ:Z»rÛ¹”uõtæm~å#°ª ]üSõN·„@B !˜c&$`Ë›7o¶÷ ÅÚöl5‘$Ïþó¯¿þú?ÿó??ûì³C0;oÍE'öãbšUòÕcRÚzwvtwåðÍYçò*ïâ`¯åÎqžU^)rB !HL X¯mžÙ 3cׇùË_Þpà ÞlyúÓŸSÓ&¥ÍB[Ž-?SÌ/›wvñê¨C5ª©Öe¶ÝQµ€- {³(7‘ÇÏt§äy—1e˜H$ö?&$`ÇšaþÎw¾sòÉ's³zœø6z¶ÏõÚ‹ÁûtÍ¢q¹â,çQ/p·õoíè/g]ÙÎá5SÐùnºñ™èüýÞɶ4‹ÎI΄@B !ÐøLÖÛ"àg=ëYÀ:øàƒ½ÓBÛ¡—-[ÆáøÉØ•ŸµT._Lj|…÷7 ‡†«ûãF‡•µ ”½44|gVYÊè $rò'`ùYÏéJ$ ù@`2¶ú{È!‡8‹ ¾³ëLJÆ®—Mý4€Á6jñVžySÓD µÒ::\݉5œµz·h${Ùkß9Ò>TؼÞ9BÀþÒð4¡MÁ „À¬ȧ -ô"Ô"™øÉüõ±£—¼ä%¦ _ñŠW€nùìô+Äì³HçwžZv`õôô0ˆc;t‘È,Å™‘²;LwG²¸Ÿ»øITœÉÕßßo¶œHñ‰Yб8¢ïÜÙ׃d[Ú3ç••³;6~ó²ÜE·*@K‹µ`o;J+?-:] „@B !0äì,~b_îw½ë]îûØÇ=ôЛo¾·¹Þš5kÞñŽw¬X±‚},z}Ùׄv˜ÔŽû8ì°ÃéÇŽk¼kÒÛ8À|8! ã)öµg›<±yÛ+Ëb¥ CƒYOOuóó`ÕÆÍ–¬èÚ²)[¾.‡g,çárþ‘ßRæÃ N|NœªMB !˜ÊlÇÈ ‡q8F8–{‘~â³-[¶¸ãB £@ÎÈï·¿ý-¶CvD³Œ‘_Å]]¨=ØW¦6`K÷s†›¥‹wc”Y“‡=sžé>`þÙ5ЗmuôSü7Ÿÿ÷÷-?(öZå» ó§˜ñ›±€ÓpŽGº „À< PãÝ¢Uù9ÞÙr/G° >޹\Véºu¹Ñ„óLD#9«¿®Ñw‚ÃQ—»¿ð…/œxâ‰Øô„Np ˆ#©cÚï)‘ùQõ±Çû¼ç=Ï®lž1b¨‹$‹ ‘®Îê¼=ª]ZfázàH¶×'ª4ç[OóMZ¹¼TN*$ ¦@ ŸËÅm8·qš¹'?#]|Œ‰MóÞrË-Ö†#€à˜ ö4Ö_‹åØÙë̪¾ð ¼õ¶·½Í¤·Wò‡8ý£ {3Þ¤e“A`Bš!'³YëØF‘:Ê3{<… ëw8 Å>õ˜×ø6R[¶s¤×׌ª‚•ùç´›[Àc#ùt% „À| PFWÁÁŸÉ“¹éÎÊDiáƒï{ßûÛ <ð@F0CÓÅ fƒì8{‘ñý{Þóžg<ã±û­o}ëãÿø³Î:+ä yäb(`žYà_ýêWªIkßVŒ Èiž|ö’,šòI{Ä:œÝïè\ð•×õ f]ŽÄB¹¥]|[¥]·ª¼hôNŠ$ †F _DŸ(Ÿ± Ñ’Óe³2ùÇDt,»"9–¥ü¹M;Ç̳ðu´8‘èïÿû7¾ñ \²‘ê”SN¹ôÒK¹±¾{pp¬òæ´ÓN{õ«_}ÜqÇÞÈ@–ºÙé‚À¶­Ùê5=ý^ã.g½Û³¾¬­ÓÍçk®q»7­×`’œ „@B`Ž(£Òø¬¯µUyÙ?¤ËKª1Ù‹Û‚@~…ÉËe›âïzÉi{:·Þ¬Mem‡öu×]ÇŸlĈÜm ûÜç>'À‡>ô!§…„/ ˜Rõ’§ÙÓéêÊÆ†²ŽU«žòàS»»{2SÑ>ä?W~ÏoqÕ8wy¥ „@B`n(Ý2+kwG^æ–±2Úc\2L‹©fË®HNä‡zÃ$­—xØõ¢y$jYFèÖ{ÄÂGžl_ ÆÂ³×mÞ&LPo1†Ø«T1ÔpgR‹æ{¡…a‡”EŒÇ^Sh"ÏöÒ@ÉT­•u]y÷O^8pt6\î[…‡‡Ú³áŠÅß>_6!Ý–ùfáJ‡E§+!Hì„Áw ^ rÁzuĦôƒ±°f8û 6ž+h¸Ž¶Ùøªªlä‡xäSö¥­O·Þz+iÐV»ñÆþð‡{iXÞñÆð/~ñ K­{7Ͼ<  Y ’Ñú‰ ‰›‘&êé§Ÿ~å•W~ùË_ŒH’‡rìU Ú\ SZPÇûKÞb2à¦ìúõë}]ýï5zóyÆð‚uOyãôÇM ë­ñšn½ÄºÇ”c˜†’>õÔSÝÍÐ"B’!¶<àî sRD]dÿ¡Þz±¯ñ¢=Ì’ “Tv¾„Ⱥ‰!Iìüúõ¯ 3ÕHš`8ØÑ˜]tÑW\qøá‡G©íyé—‡m}Í5×8KDtÊÊ7ÇÓ pY(€=i2Ÿêþæñ37ÆEOïû6Y&q ¹C£tó‰òï 9å/8í³„iê-\¯äxÊÙÅvàºS1¦7ã:HHÿüÏÿ,?ÜÌŠ´Â¤(c£ƒp‡Å‚ÖE £>ZÖVvüàcbdo[ÖSŸúTâÁ"˜=ãß Ìœ;¯ãIOzÒ»ßýn¦m˜° C#oOËÄv3XÓΠ܃¡ ö îçóÒ†<%Õ$þSß'dó6š¤T’˜ „@ã €ûÐ "+Tÿ̈0JB@¸ãðñïÔEøüí£HˆÖ£Ó«ÀòˆLìBb!iŒ‹hÉ!$;ÕWEÜ0Oë"d½õûÉO~òïÿþïåøÎw¾óâ‹/~Ýë^GóXŸ– ?c‚š 2`³¢ç}Nˆ(Xh b:†ðØ×D$b\NÙº(µ‰Œíšä«ž¹±¢¤¼ „@#"€A,bâ ðÞ÷¾"—_~¹Åæã?YÆ®+Ü rM χ!ó|“ˆ!¤aëÖ=‚ï¦ äw{ê’ø$I5Í£½LAïMö|µ¸>ú½¥žü „@ƒ"`Ça=w+¤d€…Ó±ý¸MJ󲎼w¸Aø ‹–A&¦GèÖ·qxZÐ0– <¨7ÆBÎþB{ñ~~•ì÷¾÷½>ð”— õ‚ .P„A“17N»¦¿øÅ/òdݾ³§<èÖ0¸1€€¦0Ò þtè¥$‹ÑÉž‰4“ÏÞßíMkÀÍT†IÖ„@B`N`ìá¸àŽØÅNà (/0óP ÍòeÊu‘'·ð >¾ Ò»§AHµlTxF\œ]i$‚öФÂþ.òŠErO‹ìÌûóÉ+Ä®õ)R ‡G1zÒÈš~F:˜Ü`Ø}·ˆÍúÓ ÷›¥p$GÖ;0ÔÝ^JŸ‹jÖMr'õF X;¹Æ›>19Êc×abê8פ)Çz—dƒ¥7¸µjìŽf펠˺Úó)ßUw\%S¸Á ,‰“Hì7,Š5Îý¦´f h›¯îÈþêiïð­£áܾk㇮Zͽ—ËF­´z/¸$¯„@B !P’\L*Å1ŸH®˜î¾èûïr%z]»jíúë÷&dª {C%ù% ¹A õ¹sƒkäZr²Ùölë–m±Á¹ <ºú+ܵ³Ð©>4LÉ%A ÅŽ@êp{ gÙcOzκuçzŽyÍ,ŸbÞÑ›¿cV½jÙw—_ú7!H$æDÀsñÂæ0~ø½ÎýÌ+¬#àÖêîç%]ùç(Ò•H$ ˆ@"à^²nËò·´}=²Ê¹ÃU‹wƬ»¯jØûëÂwJ®„@B !¨/it}ñl¼Ôþ6v|þ_¯j·Œv—{‡³Á¾ÒpG¥½”µ·ŒUZQo)³ý989UˆÆ+Â$QB !°8Hðâ,×»µêvvÚæü§ÅßJÙ©#œ•œó+àî „@B`þH=ðüc>¿9nw¨iWæx±êw%âÄÍr ó¦÷~ç·žÖz6uÙÔEøÞá-ù)Êy4këèèö¤rWÛ’ >{Ô õ¡.P¤D „@C!Ðp®UQ©ûpN™/$ûé“ÈñkWWD;44äʾ¼aÃ?…¬­­­½½gn( J˜åk–ôoÈF}©—[+vA·ììg »XÀ»Œà…’/å›H$öWŽ€ñ« ‰2d ^²dI¸S¥R µµ•ÃÏ8ïrðÄÁØ—¡Ì‘.\ð©ì8(+›‚.gÃ#ÙÀàÈðèè’Žž|ʹŠÖ8Ï8$Ì „@B`žh8Þ¾}{Ì!³eQ,7cûbYnËðž ç°ÅâÀÁBº#ìy¯á³iY]]ß­òk©’µ·µ ççA+÷¢èG³±d7|A& E‡@Ñ 7ŠfK—.­CÒ c×TsØÁîýýý,ݰ†ÑmOOb¦CpvPx£¨´°r Œìؼ5¸ZÔ9÷–*ý± +Ÿ¸CxaeL¹' ý†#`†¬¯÷¸8”ó¢[n´¶/›ØÏXŽ`?Easàà´ ‡ñ«»eÉêexçÎÜôËÚZËíù¤}N½¥ªí÷ñ韄@B !˜î=`ÈŒ`ª÷õõ!×îîn„êÂÇaãzÄDŽuß;î¸ãÀD·¸9bfAÞó_Ãç1°}K»¯ÿ–+¥–¬TÎ ]*ãó÷»DÏÙ×8l´àô2Ò.TÒ¿ „@B`®h8 ˜Â[·nýÌg>ó°‡=lùòåO~ò“¿óï XĤ‹\ƒ_ï¼óÎý×]·nùçSO=õ’K.gÛ!绦H¿}é’¬5çȨ…ÀýýùQ”÷¸Òö«{À‘~$ yB  øŸøÄK_úÒ3Ï<óšk®9ñÄŸô¤'mܸ­²ƒƒ‰íÏb_xá…gŸ}ö—¾ô¥;wžrÊ)8ø×¿þ5êõªÒ$à…a-@l×*&«Mk»‚Ú#zLhO’TsëYÏzæ3ŸyÌ1ǼéMozžðš×¼­z/þÒ¿³³ó-oyË¿øÅ“N: @o}ë[ïw¿û}ýë_ß²eË䉨ÑÑÁzFç°.˜­.ûÉÎÝ̶;Ã:Öž¹Ó•H$‹üÂxÓó#Z,ƒtþ耣Pùæ›oæ^½z5S­ðœ¥£á˜ÝyÅW<ç9Ï¡97\õ¨Gýô§?Ų‘qp¼€äý‘(#øW¿ú]¶lY`4.Îô0ÒY»vmà+Yö´Io# Cž@Öº²ŸûØÇŽ?þøóÎ;ïŒ3Îàrü âuämÔcÈ#¤ËíBÆŒcpßvÛmuD¹.E•I$ 9BÝèùq‡ûA$—Ûo¿)˜yÆ kÖ¬aìa"TGŽ€-Ð"àŒP˜íøá‡ãËX—¥¼Jl:øàƒí¨:í´Ó„ùÙÏ~f=øe/{™Ø^a$ŠÔ\ÃWH¤käÂß(_tnÉ 3Q:É?!H$&Y)¼ ÿ½D^±AØ/x„›üD AOuQ¼á1L=É K÷¸ãŽ3 `N=Çö(L;ªðñ 'œðìg?ÛöiìË8žQŒk$Â+v—…‹ÿM7ÝdøSò"À$I¥G „@B !°@¸CŸ˜¹µ¼ÀßÓàÜÌ]Ç×\Ž€:ꨘ^¶[ /RõÚk¯5 ,MXúÅÊÛ`Ä¢¯Q ŽÄÇ€;úè£ùCFÖ^««:¦"‰KÄ%»Ã;ÌÀ'H7,cs{M$y& „À¢A³°tCTË ~ÚT¤kZ”}X˜ˆ³W¿áøÞ÷¾·¥ïo|ãlPƾ뗿üåÓžö4‹ƒMÓcej{EX€ÿøÿÀ@áã V2öuMæ.žšI°§<~Š%G¦¦8âäËðI÷„@B !X”`–Z½ 2Æ Ø×Æ£0ÆÐ 0ŒÀÚð3v73ÿ¯ñùÏþ]ïz?øÁ^zé¥Ï{ÞóQL[>öØcûØÇÚö|ã7åSŸúÔç>÷¹ÓO?+Û;^„œ1.)bB !H$æ»ß2žÓl¦ž8Óó/x£¯.ºè¢sÎ9ÇIX—_~ùƒü`f®ùá .¸à¬³Î2],ÁO~ò“Ž¢<ùä“­Ýš=Žó³c@O=»2!H$ ‚ÀøGì$ï½fÊêeìZÄå0],Œ½WÅâ®iaóÏ,`0±À–Í#)ÛµlæfOBÃæœcA\¹?#‘F¿ec¥,bi‹o š’/e;³‘Ö¬¥-?éÙ¬ÆèŽÖÒõYéïÍUo>¿Ñ5Jò% FE h"ˆ©î26Ü4mm€Â²ñî³½WÌb?‹µ[ì ìkÂ9–im­ öµ$lÓÚ$ì[wS‚ „@B !˜ GÀt°ßÌž4ÌŠz*ŸàW{¦x²nùp‡}lØ|5·-ZümÑ =s§+!H$ ‹@Ã0ÆÅ¦1EÌ{žªIé`\Ö->Æ»® ÚU«VÁ×î-wQ\Ìå†E< –H$ 4"³eƒkÃðu·\Ë2&´§ñ(ŠÐÆ+—C%íGØÈ;ÞâMœH$ †E ávA£Ïâ%¢Ø]d÷âm-‡oÀ4,]¦pàÇDsÇ[¼ ú¬+ •Ʋ¶ÑR6V·Oår¾úíß±–|OÖhÙÇ~û*eçªyØ>8¾=mV9¦È „@B !04œ<:.º$ó“HÒ•H$Í@ÃYÀÍ ç|HÏö8›*7ßMÐ%o%¥+!H$d7b©L]¦»¹ØÄôÝÄ›;«¿Ònð©c™B& yE ð¼Â=ûÌÆ¬í–â/8–fî®{N¼ã†oi—cöù¦ „@B ¾¤)èúâ¹0©f6e { :§ÞqöOõçÂH”rM$ } ,à}ÔhG«[Ÿó=Î9ѺÂvÏ÷@ç¹ì!JÎéJ$ ÆD uÑY.SÊkHÙè8åŽïËŠa~®jÉæaÒ•H$ˆ@ê ±T&‘ Çæ4ÌãF°ß£-YiÀØåòÀ–í-YÅÏYï’®ümét% „@"¸ eŠ"…¥3Ïy”öêÇ£ÚÛóÃ7¶õ÷d]7l›bZ)XB !HÌ3‰€çðÙfWs®&r·ù»+Mß))ãá±á¬­Ò¡hÛ+Ý»ž¥ „@B ±HÜXå1SiÊÃù *ùJcNèÌ:JÙú¡‘ÎŽe3M0ÅK$ ¹E ðÜâ[÷ÔÇ×€w¥›¯W÷[µµtä‹Ãþ|•q,ɾpÑ—6­ßº+`ú7!H$ ôpc•ÇL¥©¾sTeãü}àêêðýü—[òG¥+!H$d7`¡LA¤Ø=0gߨ‘• fYõH핬»kÅà@pòLA „@B`~H<¿x×-·b ôxŠccU®EÀcY_o~$Çš5k·nÙ^· SB „@B !PW‘€GòýDùÕßß?š5Ἁü³s§oÝæ×À€ÕÎüêíŸhQ”q*Šg‹îÞž UЫïþŽ–}~¡%˺²±®±rþ…`°V f½K†–dåÚï5ððE@R(!H,‘€ÙqùÅQ.ç …'&îîÎ_­¹í¶ÛÚÛÛ9ÐpWW‡ËÓŽŽŽR©´cÇŽðÙOï»ÞPÚõï~ CR;!H42HÀAºPëéÉrB«¸–™‹Y¸Û·oGºt‡§ s³}QïÖ­[—,Yâç~uÝ͵\Þ.e-cY%}p¿ªIÙ„@B ©hDÆ©¨tÛ¶mûâã»îº+Ì\°2ƒÈa  #$ŸJ¥‚z—-[Ƴ©JaF–ÆÏ£¼;ruØd=Úu ‡·æ3÷éJ$ FD á8Vv/¼ð“O>­>êQúÅ/~±fÍš¶¶6æo‹3&² ËZ ¶â{Æg<ô¡åyÿûßÿâ‹/b ˜ÄnDÈë"ÓÝï®ä°o•€±¯á‰ ¸=YÀ»àIÿ& FC á‰^tÑE/~ñ‹_ô¢mذáñüCò«¯¾p­­­î&™…aþžrÊ)ßøÆ7>øÁ2—ßüæ7?÷¹Ï=ÿüó¸á†ŠUáFƒ»^ò8tÒß=®*ûòÁ¹6§!àÊhÖ¾LÜ„ô#!H4¥Ü3||õúÒ—¾FÓË/|á Ý?ýéOª¶Y!`,{ŸûÜçÒK/e(3Ž#Ø~ô#þ‚Q*&¥#Ênwc™™éÌŒ.~î¬AŽ Ùû\ýæ¯If‡nñ¶æoÛ]ìˆwf­å¬õìÓ>Ëoùæ¯nPE’X „@B ášÀ1ÿZ_yζ½ù7¿ù ˜žëׯǔ柯ºê*Ê£Þ˜yf óIJìc‚nññ¦M›LSÇâq}aj¬ÔÆò×|ǯ±˜xÞõ³jûöUg¡YÀ]ùZyº „@B h8¾öÚkáô€<À}õêՌݓN:éšk®ùíoËgÕªU VdìnTbãU__Þ½âŠ+¸]K—.åïéD—bWؾÂ;¶bùK0â 6Q" æ¿kª9G»ú3¿Ñx0ƒNÉ9/ø_oî*·v ÜÍÔ &pÊ8!H4<¸èÿƒü ‘ƒqX€L;>qÔE¡†#`J._¾œíú»ßzë­&™ïu¯{mÞ¼™òL^al‡¶ 6±ßßÿþ÷ßüæ7íÃVÜD!ïØÒ‰pc\ ûÉ!zgg§bðâ“G%²°þ»Xxœ€saìN+g½ýý‡· eå¶%]ïû‡gWúÓ"ðÂTÊ=!hL¬¢¶+Åø‰\ðGÓŽ&^s̺hÕpì-#ÌGÕP¹zè¡,Ñ[n¹eÅŠ€@ºžÚŸe/4˜ìÆ2eýèG?ú•¯|%[Y…™x{X‚"ï”7ÖŒÒÅÁ‚5èN®*÷2lów¹se ×ÊYwg·/¯Ï¶íÌú³¥¥|ÛZº „@B`Ъ§ Ýà¸ÙlX1¡†•+WN’Ît5ã?ìh–……MJ㣎:Šn74„+_~ùå/xÁ ó˜Ç|èC×KÀ°_'Á_wÝuÅ­Ã;ŒÛ…¶ï¼óNXC™;;[¶l™(†ðߥe>ÑÌ֭λۗ՞-=èˆÃF6e«;ó“LÒ•H$“ €8n¼ñFD û­–°2ÿ0íPƒaÅM’ÚÔ5cV,hzÙýöÛo§°iç8 4wnÆÐ@±B|ýõ×?ó™Ï<úè£í‘†QLÖãÎÀq¯(àÔ#<CKV»¦¹ƒï×®]‹ÔÃðݸq£§q×^ÓYXÏ]Ì[3­$ dCýÙè/óë–ÕÙàæô1†…-¨”{B !ÐXß<üðÃ1ˆ ¿/˜‚ö“]g•5ˆƒ˜g”©#/4sÌ1ö3_pÁô\·nû·¿ýmÇqÄ´pL@0zØÃö¸Ç=î _øÒ5{ŒwÀ¦±Š¾×b·Àì©l™¶•î‚ÅÔ?¦`=ÉTö^ŸOÏñíU™8«TÉÚÞváÙk^g«ôHßø÷*æS°”WB !h.¬oâZSÍx 0ư‰ÉW¼ÔS<ª—v GÀHôôÓOÿò—¿|Þyç±VÏ=÷Üþð‡ï}ï{>Œ;Üagq<ö±=á„>úѰ€#f§Í$àNóõ$XŠô«k~óo}»`ô,[09SÆ „@B IÐó›s%,®Åa £a¼àO1‹§L2 ÃX¹^j5h¼üå/ßûÞ‡tq!ó÷?øš4çLgžq:Ëüów¿û]<Š,y2mÍ!ÿô§?…£ùú‰b+3¦=)èÆ²æ·åhF:0”a=Q:ã?ÎÂCÙ’u+[[Z+YåÞ÷ù“\¼¾l ¯n¥qôM’$ ú"€>‚ØÁRŽ]Dè &A=åóÏØX¯Üñ$¬zé6Q: ŽÁp^üœ(|cùçõ£ì[ 6]9£=-±ÿGLš¼e.nYz@=_Y›o%S~ „@B`Q#¸)‹—ù¹ôáʆDzµ«k]Òù±·¾ç0'cmÏn߸¾)ÕKB' ý4Ýl…SÏcùç~]ùMúkÉúzój—ÿꪕYÇà¦üieYl"ËMWB !ØÿHÜœeŽxã“„þ x,ëèlïÏÆÖvHW–´Ä¦½l¤mÂ#9›Sí$uB !X<$n¶² ¸*u8ó½£÷[ÿ<2ÖR®8ºú%±üaº „@B HÜ€…²/‘vÍBGááØ>åÌ»I¥¬²cûöòàP¾#«+K¼/(Óó„@B !°`$^0ègŸ1Ú½ÛnÉzDzÓ?ø7Ã}ËÛºòÅá>6qÚd7{˜S „@B`NH<'°Îa¢ÕÕßñÏãsÏùËHý><6ÚÝÕuñ?|.ÿíˆë¡,?+] „@B !Фº!‹e¡ò}Ï»,ß±Ì糌¡,øÚÿü†sÞ¾yìÎά4²³·eùÒ¿ê+;¶¥¯!Mez”H$DÀ ‰þŒógáê?-¥ÜâõäÛwlù̇>Þ™´´w˜þÞ÷¾÷ÍŸ?ã,RÄ„@B !˜SÒôœÂ;‰WI×®«|ã•k4ŸæçëkVŒf};¶¯ÏÇjÏzº—di|5%’L$uA p]`œÇDªÌäõ—_~V¿Nˆ€‡ZËYËÁ=d¾ÏXÊ–,Y/#EÀtO$ †B pCÇ„ÙEÀÕ«á«Tl øö-»³¶ö¬4è ¥l ·/·‹Ó•H$ ‰@"à†,–)5NÀ»øØÛÀÝ]Šs¸¯¿í;œµ··{8] „@B !И$nÌr™Pª±Ö;GÊ;¬ûvØúÜë³À•¬åCÙïøô´0š¿v”o¼Êÿ¿|«U…Ü»}{g{¶åÎÞ|}¸?[qÀòjØig‘"$ „À< x@®g>ÁàíßV3Ïþ”^þµ…V›žßø¾¿éhoé[¾®ËÒï©'½¢o`Ç®Zõ ¥•H$uA p]`œ¿DFrÊ-—‚€s§¬Û[²öÖѱóÏüÛÖrn rÔaÇ~âKHSÐóW0)§„@B !0MÒ‹¢Ól¡ƒW?BXÎM[ÜŠ€¹:+Yyóíw…ߨpVÚ˜•JÙ _iXhqSþ „@B !0Éž˜FõÉW|«Œ^íÁÊ©¸³5ëxè0”õÞÕ{W +wdïÜ”›ÊÜéJ$ †D pCËÄBæ¼ZÊÍ_oeCÃ\£Ö€+¿ºüòåYDz®•[6d¯yÆù­xZÙ.™8¡ô$!H$DÀ ÿô3Ë*ùFhK½>œú“†RÿôOÿtÈ!‡”J¥G?úÑßýîwùŒŒäë™ÃÃÃÊÀ€£ŸÆ~ó›ßìèèÍ©hhÈ뱓]”Ëåþþ~¤ïîç ³«©…¸ÝCŽFºªÛ¬”›ÁUíŠÆ´Û¶vy7x(Ú–-_±$Û™oÅKKüTrI–„@B 1 èí½‡Åt€Y8ZZZ‚ƒÈß×W·#Ž€{zzÎ?ÿü7¼á o|ã7oÞ|ê©§>þñ¿õÖ[é¿m۶ЉÝV*ßü9çœsð1ݱcGkk«GR{¶¢»CâmmmcccRCäüyFú„Ù3……õ©~~0,à|ÀQµ~óV,[¶3ë]Öšµ¶gÛ{7ÚµmcoõK ùÓt% „ÀD ¬á&*à…+VpðOŽÎκpÔpLíøÃOúÓ_ÿú×/_¾üMozÓ£õ¨³Î:‹?: FÜ´iS@ðÙÏ~öq{܆ V$³11Gøìy7–Á»††»ºòÓy2v]><•ÄÆ÷Œ¾à>-Õ™åXέwÖouE˜:K²%mCÙiOû»;7Þ`}xéÚ®|…8] „@B !0)Œ.¼À îÈ¿dcZ±×áFÛqJð²eËxÞu×]“¦4½‡ GÀ×_ýu×]÷Ú×¾–7ß|38XÀßÿþ÷M>(¦ ×®]ëéW¾ò•¾ð… åÓN;M0ã§13Õ{‚a,΄=Ízæ³téRì.º!OÄÂÁ{F_pŸñWËÙX häÌÞ²e³û°½Ïýmß¼âãÙÒÜ6ÞÑ» ùW' H$–†xÁT«ИUÅ <ƒbZ4?c¿~WÃ0»“žÆˆðÐC5!Ì$5è{Ô…¿ËÀä‰O|"SøøwX½88æ &!`Ðíܹõ2¯JD3Ò±®,)CÁðq¬Ôí:¤¦ý`6:ä8ެ’ïu. eCé6g2šm¸iKfä௔õtåC¶t% „Àä°wQÆua¼€MbmXDæ_pÐÓÔ“§6ŧ GÀ `ÒœÂ1)ì±Ç‚&æß9bíÖ XYº"xT<*úDú#oÓ a( )5>[·n7âê5éƒ&Jg¡üƒ€mEóÇ]ý92– –*-w6”²ƒVê¤ÍwþnË–|#] „@B !09¸†ÜÝÍŠÉwò†é…btË–-üW¯^‰Ô‘Ž€™³PÀˆÁÄ€¸é¦›ðeµ0â‰5PÐ$#ÕuòÉ'Ã%†'ˆ3¦ï©=ï:<ÁF:wÞy§¤Üüå‚ÂÜH-xzÏDЧ:ß<êü ›Âm~ÎϤ̆FœÉQ.¯sôÕp¶uýv+¿+[¾lÅ’á|«xº „@B`2b9RVÀ ,@ƒŽ8â>™}ˆžƒ•'KnÊÏŽ€W­Ze ~Íš5ˆ–xÅŠ/tîsŸûàݰzé#wæï%—\"¤ˆîØW°IÔÇîëׯÇîÈUFGu¬9î=&¢csV¸'Im¡¡ÝqÛ<ÿ'_üµñ¬w0{é“¿ÖÙÖµñºí¶»ÚÒIX UB)ß„@B yÐó[ÐdæY—Dºx/Üpà X#h…'2ž|‰sZJ7Ü‹¢†ðÞ‘÷€M;Ãå²Ë.;òÈ#¦xqºp3Öľ1ÕÛÄ€—¸Ð™|„‚ªñºî`e4ÇO>®H3æ"ê»ÞéÏòÞŸméÈÚ–õw±7´#Þì€l©#l9±»â¤¯é§:¡cxðàƒëÜQiK§aÍò=!Xü¬\¹2”IJµ¦`܃bX}õ‚£á,`óìÇsÌÅ_LCìk0rË-·<üáÇ‘^7¢?  Ã&Æ”±ôq@#V²f½JK:Õ&Ç_ÞµœOFwgåÇ>âíëŽ:Ìö«‘1)wµÖí}µ:ª’J$ ÔÉë…&{ôŒ3ÎxÅ+^aúe/{ÙG>ò‘¯~õ«W^y¥ôm?s·4çrÜûÞ÷þãÿ¨Øî¸ã&2{÷Úk¯õÓ .VƾŒæÅ\¨#-9á¶Ø÷œs«·ÊÙÇU?þá!kW¶”ýqã¶Áê°jI{wúáb® I·„@B É(ÕqGu] `ÔÆ²If ½ØÔŒ¼ÝRv,Gú&¢m˜bìZ¦§E¾^f ›‹.–‡‹GµŽbÑ×l¶åâgm˜Æudå–o^UEDµ•lG6ÖuÆéŸüïßßôéÿ[ËàX¹u4sV¶¹/[›f¡·0“d „@ƒ#4{ê.jÃYÀÁ¾ûbÙØrì{Ûm·ñ7-L‹Ùf»×baÖûrOòpÝœïK-Y)ŸdÏßAÊÿlxîÏÆn¸õƒ-ƒwnòæÑƱ~vq¾Oº+±ï|—OÊ/!HL†#`r3IcÙelb9ÞÊ=è ƒ‚YQ¬i¶rü4S;°øãln£•)êß|Á¬þ–Ê¥‘¬}ü=¤ê7 ³¡¥‡­úЧÏ=pY+ó·«ÔÁúíwXVóé—$N$û  ×CÛØÌ®·€*“WIxq(ŽÀÆÍÞâE±ì`O=Š»9jnþa‡ç¢,B'nŒ[·#»öW•‡³òèÆ¡m+VeÛû†z{·š—Þ¸mS¹»5ð¢¬I©„@B`q ÐpgŽÄ 3.&“M/C¿{žãQؾŠqpßÓj´b6Ã|Wõ\43ÌC;•ßÖ[x޵WŒG–v´®lïèÊFZºr|V£)äI$ *Õý² ‹æA`ÇXïÒåËv –‡Æ:{ÌC•–¬Ú –|¤£«”µúÊb>102T*µ”ÌH§+!H$†³€£†É®+Û®¶Œ VV´3‚G³–ÍYå½ÿÛû{ª“Ò­¥r68ZÎFúò-ZéJ$ FD YÀX*“È´´ÔnÛU©§3ÿÔQïp˲ÊõÙÖmCccwlkµîóƒf¢mA-ecUFNc¬IàL „À‚!zçƒ~f· øø E_ß]VAï?ëÌÛ‡JÛÆZú³Ô‹€Ù½cN‰)e“}—bf¤X „@B !PׯùK¤R.e#hµ¥/Ë~;0puï–e+V_ô¾w—òÏ3ä_ ƾ屬e¬” W²8®cþÄK9% „ÀHSÐSªa‚mßÙ½¬gCÕ~ÕûÎ9xÝê<‹G«E„kXÕRªd£¾ËÐ0r'A „@Bà$¾M𣳃¯zÛ«·­ZÙßÚrúËßЙí´ò;<8’Ï7Û Í¤Å×prþªtº „@B H¼B1§;¼s¸£¥’“ö—²;F—dåu~îÎ:+Yi8ʧ…»ó3ªzÇÎò`6Ø–óa6 µççVaÄ’pƒKÚzÖo]¿zÙÚþ‘þZº¶fÛ;³Î¶¬2˜ ÷e}Ë2Û™Ç89\ÝÃ\ýüQv}ÿ—^zéüãÝ~Ü’Žîûµ=ôÜ·¼8§×ÛœûÜý¼G¼ô¨C×ä9+OSУ­½CY©­M¦]ûÐ/=N$ …A á>Æ00__˜ÊǶŒ¶9÷шX–嬿5»%Ë~¿é¶“WÔ5šó±Nf|‘¥Qö'wg•í;ú{–tÄ‚¬WrsB͆F³ŽªA*ð‡Þ÷ž÷–*­[6nÚØUY²d©¬7lØ4<<ø‡<츶¥¥u` kkËŽ½ü:êOžøÄÇ?èèCÖ 0n³MwdËÚ²¶ÎìiúWK;:–/ëùðy×rX~,ôPi´uuÙ7“ú³þ–¬¥##Bº „@B`&ÌéÇïãkH;ó‰Ü2vóú-k–ß™e¿É{²¶w¼õÍ«;zîèÛ¶u ·gåÒÑöʦþ#•Ò)w=hçö­ímm[¶m­”+Ë–/ß²y³l:Û»‡‡vlÛ¾æÀumí¯~íÓ9(k÷îÏ'†êpöÔǼ½­­õ¢mçi”Ë•––Jææ³aÅu×ýþºG>ôÄÍwníìnmÿ¿}$߽´s5©åùç†Æ†ÕQ)UÚϤѥ8 „@B G pëÁ´,à;Ƕõ”:ÛÇJv5YcÝ”e7fÙKÏyËýÛÖ¼êù/>∕¶=™ Þ˜ŸÙ’,;ð–쀲=èÕGyäæÍ›[[ÛûúvvvvûºâŠ+œÉÓ‡ž¶lÞzÈ¡oܰé~ƒÝlÜžî¸ã¶U«Vwv¶â;gæ:›RfV³ M,;Ѫºc½oI–¯e½›³®žj¼+Œì}°‘Û÷‚-ÿ–Gì…ʆ;óát% „ÀLH<Ô&‰3-ÞšíĄ˲Αí-Kþ¸aÓ+Þÿî%•eC?¿þˆŽU·Þrç¦ ›?èP»~ëæ‘R¹õ>«®½öÚ+¯üš›—/Ï|–©¥%s*U¥’ù®„¿žžÜgÇŽ¬«;óJQ%ÿØD•_wI<´Óªn~ŠUKí·Q¬ s´dÛ6fK™¼˜,ëÛœu®¬>­N’ Ž Žöu÷tŒd#½;zÚ…KWB !HÌ9%à´ kE2– ôdíÃ[·U:W ¿#×®¼ÿQÇ~ëkßþÙÇ¿º,(#ú‹íÆV^Wdí^*e=K2Ÿîm+g;û³îÎÜ*ÄhûŠ®aÇFæ;Òu@OîŽ+›Éµë§|½ÌTÉÓlÁ¸/>ùJr–mëÛÙÓÙí•ä®®.Û°úœÒ<ÅEy ù###J\eˆ¯A7»¾46ᑯH„F…²Í®Ú^姬+>ïÍ¡·oßÞc¯D“_¡‹š900@»þþþâëæM®™‰½¼˜¢OÖñúici³+µ îoÚ7IDATò7M­?Úì¾d‰íƶ5UPo°/Jž£Þj0« geÝ¡7Ñ`þf*.˜-gÏâo4ñÜÃêÔÉ>…ÿ=}Ðú´þòà®ü>&3“ÖUx´§Útݽ½ƒÃƒííÎi½[˜<΢º0Ó–-[Ô5ÁàlëÖ­‹CCŠP‡RT£ 5ƒ†‡v»iAAšb&½yoo¯¡†‹€}iqÝu\Á»îwÜqÇnê7ÝOeDfý­{4:=pbß—cÓpôA†ZT ·‘wÔr1Ï im8r÷ùÿ,̖Ѭ2”µ d•Y¥7kíÍÃùsŒUü•Ùµñ7•²R_õßӪ†ZÇ*Óú+¨_þ Ï¢¿M[×·—ÛºÚ»Z+í}ýÛvìܶÝîÅy© Ú¿¿ë®»”8#xÙ²e&Eš][*P„:”¢©Õ¾ÙUÛ«üŒ]­õã&o(»q£wúÃÅNP‚4Ù°aÞbxàÍ®˜¢ÂÊ•ù{Þ¨Œ.wýúõÍ®×BÉß4Cã˜löa— ¦Ú€wÝ ¥ë8"sB†S8òUV”:’Ï<·°ºÆiÑš 6×ÌH¶”[ ±5àø»»`#Òî÷<”d¦|7ùœÿåäŸÓ®ÿóa@6êÞY±ç:§d–q{Gg·-XK½¼h/ý5Ýts&?vìØa½CS+LŠP‡R1Ój6µR“oèÌÞÕ`crKÈEPˆ´P‚îŒ{÷8@!îܹHFÃaùÐË¥øôÌ«W¯ŽŸé>]t×Íqë•B\ãeƒ/uúë_ÿú}ï{_î¾ð…?ÿùÏv! ž®—V›¤XÚîUzm¯žLY5…Í{Ó‡<î–x94¶]c¦÷ú׎ÇÿFÛ-þ™Oý>N¸Uë×!˜-Õ?/) ÷÷/ï¶;ÌÄìÖrV±0Ü;<Ô7’O,ÊKׯ.4§üÄj4ð*ªGóªLŠP‡ T£ 5£o^¥&‘œ²LC ;Œž9ðqg°&ÉzNµ·çïI°o½õÖȈ;XyNóëÄUE}¯)he¤³•]XùsïbM¿i8*tŒŽ Á‚b¿üå/?ó™ÏüÈG>rõÕWßÿþ÷?ùä“/»ì2£é`ëú”ÙødrX©ÕC¦œÿ\=¥*O?7FѱÝUîLå Þ»eCïùW• Ë”ú=?àªúW%~Ü_vê›·Ž[÷µÁFËmíݘxÓ†|ýÌG`v´vîŒs¶f@ƒÆ57&‹]-±ÕÝݽf/©@‘¼NUwRš‹Ø¶8jĬÁéÆp—ªúEE/„†cZnÓ&çØ6ýeaÒBå´§ S5¼86Í-HÙ4 kŸ±ˆ¢ìµO]þÅ/~qÚi§é¡ÑbVFVºósvcq7{9jwºMµTWl`ÁÁòq/왹‡v^s ¯íë4 öQ” ¶÷×Eަ!`7FŠŸ.˜ÈøW¿úÕ‰'ž#ã2“ÏücRz"tØÊèV0—(Z¶x¢A÷ˆÏú¹:òc#3gKmÛÎñ©ó/Èzû«Ü[}¸ÊÝÞåeÇ$ðD¹Ï©àYèÎ9äøL4™SIæ!qÊRMÙ© T€´Œ®Ö<ä>wYè¸MÒháFZ;‡q5'ªŸs'Éü¤¬Ÿüä'ßrË-Ñ #ÓE lÔÏ [½C?^×ivÕ-;Þ9`$Ê‹»Ùõš¨¶ÓËQ÷¢=*MÊêj& ?]ÿ¦!`|\‚€µUn¶/míFÍø“?ùÝ–y" cRÀT¿¦w“¬Z ÷^£ì¨n¨îÞáóKÛ­5zá×'²á±!§`麻➿ ´hQ4ëOFm*Mm¿¶rÍUžQîR·¹TÙèš}Åû:·!Ô©Um—£NÍH±˜°jÜìúªŸ:™bMD{ ›]/F‹.TýÔÖŠŠêg³ë-nÏ»r¤rA:11c>ƒÝ¿gàøŒo¹œẢ"AœxÀ ŸÏ=÷\ÂàÝÇ?þñöa½ûÝïVEŠz79YHæ¨ abP£¿›¼Ë.N¯ bõ ¢ÖrŰžŽn>1çÜ* ç³Ò qÅä¼–€†ýt©7 !Ë|äYôh2S”J¿ áùÈ~Žó@½‘Åtz­²sœó$ÍY‹Öº5[Eiì |¤©S–±2+\Áĵé:e²Éè^ì…6aSŒõRš9Ë;ÊQ”ÖP”,à:VΖw¼ãs&|=VÞÔŽ  V좓zË[ÞrüñÇcù—ùêW¿úž÷¼ç>÷¹Oaî)AѶÕ<CÔ ­=óéîo/Wªç]åtÛæ5¾ŠTi+ØW˜qÎõϱ/¢ó*ð‘K½Y¬Ãð›o¾ÙøI¯?uå´Þk96‹g¨PŒ CÁЮYT˜–œªh¬yïjÜù7 Tæf/Gu’F:kor«¢4âž2Ø0Bm4ãh¸èøÔËl@•ÕF娖*>nuÕ ¼Ê9 ÅLK‹¦±€c’™nôgÂûùÅ/~ñôÓO·¾â8Ž—¾ô¥OúÓyÂk¢Yh‘7ø8tvʈCc£åRyxhPHßö¶öí;¶/]Òæn[8Ú-ÄÌ7w¨(TS9èõF#1à¸;ÐârÅDB¡–†S‹@ÅP¤Ð«Žó] NTQµE,FŽÊÜ€ÒNK$Cü (Œ¢t ‹¦=F1AÃâ½½&‹{†&Œ`ª*ª;娣UÓ4<­ª¿ÏÀE /FÙ+ÓòœcÖUØH”i®ÝƘƒ„áéÔ‘Më£ z)²[µˆÀ»é¥å„ÊAÒ1Ñ/ì9^L`»¥3㟻©@’«!›Ä÷t<â¶QAQ!ÌÌpl[d±Ï1Ù^u<„§òA¥E·+}ÂGi OÇÈ«VÙ˜Œ™V¹HVj’ŠÜ  ë7H-~ò‘ìl†í!R æÀÎí*p /jaœ®<µ„Ø…OŠì&/wB ŽZÅ !§ë @È©E™6o{£«(#D#u¦@5ð&´’.?§Ø?ˆ;ûrŸn1MžÌT+Ú²Jëg¡¾XŠ’À&:Rž¡û^G(›nÿ‰íT^2ÚïZ”;ý£<8Ѐ {¹j xÏÇsFÃŒx¹)-m ²U¥Ð-áIKf¬zª tqW9 éü,ú !íåØëd¦šá‘KRv¨YבQ$iÖº‹\fæT´ü¨Í‘ˆéXðt·r±ª¢ó   ò‹Ÿ)€šZcÈ£JÀafªI"†2µíœghdnZ–`î|ü4œâ–ov‘û´ÊE ¢ÃJjŠOÊ”â)Çè2¸åµçH«Èn¯Ž¢è£»ÉkIõE›'Ô¥¨~²‹¸Å4•ŸS”'ªôìË}¯ÂÏÀæ$Wµ4)P\[ ‹jƘ!|³´Gz©НV;ž|¨S¯þa–å>a?<ƒ²Üub†ö¨4 æâ6ÖFBë(GmD ä3ûz¨Ý’Ê´ÈE-*š‰\„‰Gû+ƒÐ\ûQkûʸ{8bsôä}òäOï‘Ü4~(ËBZåÜYÛÇEZÅür-zx¤ãP·ÜiÅL£–ŽÞD°Â'âÊš#*¥ÆYdžA`&¯4 Vl¤,4¤\D!R±5‰»2dÖ{Rgº8M,E"¢'æÂg*w½pmå%˜G©‘Ó%Œ9I¨r+ùÆÏ"‹” ­]1«ZD£Ú>·Èk*Ž¢ãˆ¤jkH]͉NJMKžBýY–;\…H¤²«ð™½£©Û#á‹P(5 Êh»^ýCÝË}6E¶§¾EjjšZªòÑf©£á¨-³¬‡õ?ê¹¶C*é Stbû3ž€^ÔEˆD[JQB÷pLnGÐz6óñÌk+ u.EýV–ÜÚþ½VlE‘9pí£"5é^tLBz$0ÿ¨";”"ÓÚð÷@fF?ji>ª>ÊÑbXºg¹ €¬?F¶f„‘°x}…O¨<"MPÈ«·©«´ `RiÏ4ª²…Žj`1å.LŒŸÜ q z›z¹¸(Mõ\Ò#œq<Æ£§@¸V¼ÉÝ`s‘‹ÀÄ#ªî›§Êã^€?]y·B*yE{œn¹‡dã O‘àäªíùTmT|¡,¥\4•šd‹4›·=*ÀÒn·ÖThG_j†úQÐM¥¨W¹Ï¬õíYŽªhÑ)E#¥¹[x˜è‹öÊ Ó­‡Z¤O†Ýúá"_¨E®ýŽ€šëS¸áë¾[u,`ÊS!àÚu"c¨ºhBÁÔV&Y­…?-tµ¬VÈBGµŠ‚ÎQâé§{h-Y±¢W¥;‡§Brì ETߢ÷)ÒŸ™#^”—&A‰¨‹ nŸåBSá÷€kOéOŽƒVJ—îÔœ¦ô"a0±|IŇ‘Á½§lJ‡ÌÔ ê(S’?•r‰B”¾ˆ¢ì&¼Ä Às6ýNÄsÉE²‘QQCxlJYÂOWá)"Òºï‰ÕË`$¶~µM 0¹ƒ:´Ð¸jѸDô´IÛ#ÉaR«T-µõ¿Ö?ÜsQ')÷=˜¥íT]wêÇ}·µ—Iäfßÿ¨™Ä ƒ¤ À÷;. ‡…ö©?ªe²âiá@¾{§Ô‰Xyï¡‹ô¦êðò‰!ÙtOÁ‚bê¢wPctµú5åˋ®\˜‚(," 8ŽŸ|¨Ñ9|ÖÂkjFÄ`ÑzêšAG÷z/N$ð’«sCÕËÚ"س\BægOÙ" Æ(¡èb¦ˆƒÜE)—`÷*ö>=‹nÚ &™cµ›lš\Pí;E†8òUF™ñ!Ì´ÊE “90ĵò"ªî°ÈbÆJIG‰èž"µ€×{5>þW@„Ÿ™<³,wÍĘ&ÔTI‚€g9pT t ÝA{ÔŽTPGëPLJSÂ,û‡×Ã=Ë}¯õjžt¤oXºµÑcѧðÙíçžòÔ¥ÿ)·È—ƒ„yCÝß.öǧ?ýéûÞ÷¾ xžðíoÄ`Wn¾1wÏ+ìyß3ä,|þû¿ÿ»Ø¦tøá‡‡À>¹(I¯>;{äˆ#ŽÐrhñï|GY©vÿüÏÿì‘ðzÔ£.½ôÒðG6®‹.ºèÞ÷¾·sòÉ'ï{ß+¤CÔ±j¢}ìc%葤Ԓ"ÌìÔ_úÒ—´ )’Ýk¹èX=듟üd ð?ÿçÿüîw¿6eD¿à‚ ¹áe§‹ˆžö´§©ú££0~Ný^àoÈ,Ö7¾ñ HrÐ+ùÙÏ~#™Æ%kí?øøÇ?îH™é–ËÇ>ö1çÁIíÑ~ôüÇDF!É'>ñ‰#<Ò£?û³?»ä’KÐ|¡c›âÔBê¦C/ìûá8R;±C~UE°ÈS—r¿êª« ìB#ò6dŸiÝ)ÿ¾÷½Ï¡¶ ÓR(XDoêöX”¾÷>ðûßÿþ¬WÿP¯r/ ž¥ãMozÓ±Çk@¦oü÷ÿ÷Ð]Ÿúê«‹™!=§ö‡hòu©‡õ?NlüÊW¾JižÑ¦üÜ øƒü ²<|†%Cf°P Ñn¹9fÜÊu¹ëRÕ’ßüæ7RS‡ô€êPôtä×~Ð;òŒ3ÎàFÌéÐ?ûÙÏêÇõ¿7Þx£®Ä#fTÁÿý¿ÿ·GZ‹7èßüæ7sën‚b?ÿùÏ ‰ƒ¯¿þúw¾óòõ¨.ZÔ&büŸÿó¢ý×>š¨\„ùÌg>£ÈO#G’“ÖA3tñS>o{ÛÛ¸§…Ãm·Ýæô4±|<ª(ñ`šZÙ¦èŽvEÇÿú¯ÿr@«TÄ‚˜ÿßÿû2rŒC%0‰62‘<ØìûÍç1yŒ–Y»k¶„ßï˜ ÅÔøò—¿ ÝŠ»ñ ™´èàüt]‰rĽê7ß7•ƒHº3ŸJ¼C­BYìkE?þñ=Ò„Üßõ®w©g©p}ÿûßXî\»v-‡*èQÌ-ÇÓ׿þõæœeÄNË~ðþA$gŸ}¶µÃVÇ;ÁÈ ÁÇÜC‘òDåRhä ï¬â²tÑ€Ÿ8@j,KîhŠÓÅ!á{ßûÞ˜.oø‚MÑQTU‹H0×Ä[¿E OyÊSþôOÿÔO€«c±”à§™P›ÞŒ>‚Ï^Ë%Ò9óÌ3±¸ªKTEM†’§t?ì°Ãà°üä'?áÛl‡ üHaZw¹ÄàF¬`ܸëó$18è¨kºòÔ«ÜYrÌVÒ†v@(J$|¦uƒþÖ[oëío»¦Ñìí1h½ÃÄ+'æHT¡Ë/¿Ü¨Q‰ §€kîêáDåÕ5rŸJ?L#õ³°"zÔXU4è\ý¤ 4©lÿ‡<$Ò÷ÓÅ-äDòÔ±ÿ1­CÕ°×#ë|¯à~uéìhî£I´Ö_@ß]±éÑ´XžQ6*%wø,>Z‚jaµ†9k†<Ì‹·¾õ­æ‹¢Îa Øn£wà³nÝ:[*L3›XöE ]†¬'b'ÑNRºHÉz¤VQÊ£N8AƒÔј¿åê‹mªŠ0¡ø˜«!F°;Ió`¬çIùøœ'*,å“Ï#6©„]ñ±êD ¦1ÝDl%;]Ä‚¦$ˆ`h¶–ÏtUV@ѵêµ5iGRfõ9ä¥,Ì:@òŸúÔ§äBAùF‰hŸ|„Ük¹D墻ÑEšLSô&¢¨´ê0”ô³ŠÒ¸Zah©¹›ðWÊãžÖ%‹ØPa#ÜU5¥¥jêÎ4Ú(?”§®4àˆ8û»®D­²¥‘XÔH¬ÿ1ª,õ)?¤¨9…œ¡‹õã4¾JW=æP±t~ÒN”è"©ÏÞâã‘j†=âÃ?RSQ¸Õc"zqÀ’Å%ÒÔHˆmŒH•K ‹h’08,„çIe)H >žêD¦‹ƒdÅ&tNÈHgº—idè¡:5 n±Ë/1³'M+ˆ€µ.+G›Vª|ËËä„À2e×*Ó‰ÊE:JÄ]:ª«¢ÇµºH!é‘¢tWšFq»¸)ø`òœúET%yy‘ÐÀ‚·”m+³ìk(žÂ? qZòÔ«Ü @ª˜BÅý OŽ©_Z FõóG?ú‘Aíûßÿþfoê• j 3oo¹Wñ5FSu­Sz:×õp¢r—oÔÒ)öÃ:º˜jDôÒª™ ‹Ã<mPÀÁ¦yJYÔ‚l Q±™æo^ùÊWÒw"yB˜Ù÷?:ÍSÍ4—u–²ß°>Z ¬ [¯Ssú¯èkFAÀ…ƒç<_ŠJî ÞÌ$Û—Ãî_mF­²¡ z–0R ¦§@!AN ›vʘjj³îÒÀ6Z õCÙ µAãTE$"¤¬OªµLÚøCLRõº0ñˆ ÊŽ0• ««¨¬Âº³uV‹Q¤¦EGwžÓÅAjÒŽèð¤>ÄB°i݉$|…ÄÀF¦"X´ŒGžåXÔ·h-/½žI~[»ûÛß‘ÈÔ`"æÕ…ܳ\ô€ò(T&³2‚Fw÷ˆO¡;uHC)âF:S¼È¯k“©ü$Ô^((ºug>ó™z·×¾öµ~ZÅ ätå©W¹«Ò*ªJKY8¨çê`5EM#˜XÒqùiÎÀ'ÆÿæoþÆV6u{„3XT’(;ø UΆ>KÊÑĆ‚6¶S??&êêU'*÷éöêº(ê¹§®ª¢.%¥ ›ØµæÏ,ºÙ£E©¹ì²ËÌÙ«©ºöFÙÞhÒh"yêÕÿ¨N1Pˆi?AM¤zv¬Qƒü®ƒ(zXíS™ifd.€®•ñÑß‘„„ú5Í}š‹Ö„¬òªmzmÝt ÕýÔA¨^bшØ.·Gj$º»K„§d19[aD”‚­ê®6æ <‹ˆÁ÷<ër¡%Í€ä¤ÂXt$ÕDåâ)ñ(Õ—´d •;@<å)îš.ò%dõPÑûLKYÂ()Ý.lg¨Ï=OI(ÙðQšz_Säï©G•‹’"§¸èAÑÐnzÕ" ]„ä@pççˆ^Ô|žS¿d UN,õAÊÆ >ýùÀ>Ð.tɪQê•☮JÇxBFÓ•§^宆0Ö§ÕR)DÂOW_E )ˆ}5%×µŽh€MÝ É ÂD%TKÕC5DER ¦”õØH€¹®‡“”{Ôó(¸v’B$?E\¨~±ª…ç(¥ð§òýîw?mMÝð4ê3(„÷SíEøIä©KÿjºÈ+$$†j¦ØïØŠ# Ì! A]ZÚ/ùKç!ìVÆ3è‚wKaö?mJ$†9"õC±q›<ÑPAõâ°:…x‚„¶à!SuŽ]o +oÃC/<õ©OՉ踵±¯ýë‚Qß#¯Êyëj'zö¾£ÔüDØÖNþüÏÿ\;TQTÓÙ«)ÐBÍM)=&GÈ?Q¹hH£èëÃ’CE&?O:é$q oá`P¤i¬0]$BwÓPOzÒ“¸‰WÛþùLë‚[Œc4­Ð+R£©=Sdj(;ƒÔ? ÷O(&¹OT.:P‰H\}8å”Sž(¢‹E&®Õí\‚z »‡UÂ,œ×KfPަ2¬¤B5}™un¯{YNÓÈDT#¡qÕtå©W¹kÑ^½ÓÞ(µ}vß{®F¼1òà?ûFÏN;!½,KY¶ÊTYpÛìÖ,í*§n„½a<¯Ãª¥| ëu ꌎ÷æºNTHÔ®Ýùd¬ƒŸ¸cÞM¿ŸqÊ´/&Ž’@–¡ÅyçG)/cèø‡àÖêâ‘×àTÁ}èCf>#–Ú鑸NWnU’c£N'^ÏgFŽ‘Â,ï:¸HÁ>a•;ÜúñIÊEë"0ì˜Ý641éÄ]Ïë§Åv«ÅÛîÓÂAïcŽQƒdU‡0¸-3¸Ë:¢“Myió:ˆH‡‰l‡ðÃþðØgb;·×ÑlÚ¤š9Q¹Øˆ§Ë  .):FLœ¢¸åûº×½Î%À@±ÅV6™Û7`†[Ì@#Qäë.wz y“žµA#ÂiEYLKžz•»ñ\” C¤¨…xSwX;0‹èÔŒˆÑ ûŒl øxþíßþ­öÒ,íQ ÑÝ[wá`õPaù¹Põp’rÀ§u×E}&V{ÙË^†Sc¾Ð™€š: †gD Ò: Jôé$òÔ¥ÿ±ÕKvÝu×Cên&Ò}¿#`ƒ>j;KÏWÁèJØL|íŠöTŒëã¥)†ã5äôT'‹}™€´°o"HZŒZ¨]ésñ 5½Ý[tFê™A_ÄbH}ë[ß’ ^Æì%‡×óm” 'Þ ÖÉò¯ígC€YÞèÓôx$µÏr1&Ð÷‘Í«´ÆïbA ž6¥i:wº8gèUã´ÎPvfªéàjQÒÆlr!­ÔÀÝ·Iæ—¿üå±NɬW!¿0ØKi*2Q&)º‡¨‘—¾Æ¡¦¾MêØ£ïàC‡šÅ<=DNã“K€<-eTPšˆæEHË«Ì&s*~²3¬˜z ÀéÊS¯rg­ÖêO½jAŸµ&wëhT^xÝOÝ´~ žñŒg0§  60EjMу€ ¬ÖÙ~ë U÷Yî“SíS+ê¼{44F°W ÔOÓ6è6ŠIcQLžyç*ê­~Uu¤ö)Ï,ûëЦ£›•cѦöSÆRºï€Þ† ¢¬-×wMÕ at+a‘DçR”(ÿ¢çÞ¹(c e¹‹(L¨H-jmD/òªe£ ugÕBm0.AUŸxÑö,—P¶Ð%Ä B*}QC—x:-Škõ±š².”ˆÄ‹¡O­¤*s×âïç^Ë¥¶ ^)Ò4 ‰DâQ¤lkÄ ˆ"Ó™ÝcÌ.‹-¡]€V(R$>uyêXîd ñèÕ¬vHTȶOÇn±¨8 G ‘~ Ýí1Ø·¶3)jQQ|óY'*÷}–ÎnȬ€ÂSš»•ÿèdŠ„OÑj ·0ÉS¯þ'p…!mLŒ~¿#` Gãêõ³a¯ÚŠbb3ªHÔ0öVQí<2*JÜimÉ=|Ä*ƿʾ`…Ý!¹èµ¥_Àp?gï yA'«`¾‰Ê¥Vê„ðR‘îQ§§‹Cà­4Ü&·g cä¥CÈB0žpVF<]Q¾èBì"‘=ËÅ#£Jp‰K­ˆ¹GÙ¢&x$d14‰S¹¥S¤S«H0˜OIÍ@žº”;d¢D„u­¨SwS*â,ÊEÜ"Yn…?£Î4K{ŒºQhG—¨< R'*÷©—”ð"ˆrq/ê$wm».~â¢}ÕRÀDò„¿è.åèM·ÿ7"ºGSеªý‘€aW@`QðAäF¹E›/ê!µü¨m!pt廉«EêÔöÅQ‹ÀA3»uâ‚É+(R+âÎÞ!ñ¢C/ÒçØk¹„' ÅŠ¬C/îH„<*’òhŠ8D 2²‰Ÿ…ÓÒTÜ£ŒCë-:ˆZñŠ»e1y¹È¢`ÄÚÔ$"bñ¨H“"TÓk Š=‹¸6‰£3À!€Ž#ð‰yˆÐ.p«MgêòÔ¥ÜC\ëmµ"MÅ]0nÑî(nÍ­ó@»YÚ£R+ªpEùîÎ<ÔÉÊ}7I¦øS…Rª¢ª®Ý­¸vK1*úCI”ïDòD:³ìü‚ËÃ!åýŽ€£§ˆ†TôE‡>ÅžŸ`Ea^¸h*µÒêw<* Ï(`Ýnm­"vôñ”»¨ŽÜNÐ˸8<â(2­—Ê…Ñ åVF…{Ïr 1Š\ô³ñ(0‰èD:Ú^íÔbô;3SV–Ñýtn²= á‹BävE°(޽–K7Ò¤ ÝCý¢¯Qô¢{…ÉBÅþS¹þEjŠo·Xr,ú/§+OÈV—r'†+ÞMÈiý,Ú¤Vbp³Ÿæök,Š’gã·GÒæ¤Õš áç¹NTîµÀNÝ-5vK5íÜ£áä#Ž]cŽ¢±Pß¹DĺÔC Fjµýpä⮈œûkTEID 5ˆ#ú÷¨.!s!¹Rä_dÈO‹êUô#žFªï¨­a…»H–£H³`±Š4ëNôÑêh´“H|¢r!REä^Hèg!d¤¦…C$.¢Xˆ¤¢ýDjS¼‹R$%Já.ºïH‡ð…¨»U¼‚÷Z.¢ëJ Á"}†¸EoRä^`åiអF…Àá€OHð†»°…‰ô§+Q#A)Ô¶Gž¡Åžº¹g¹GîÚE…á§~×i ¬BªBžr1âS%Gƒ·ÇÀ'Z\ T ª¤NTîS/©)ê„FîQ…”]ìî,ª–À¡;‡`»U­¨lEàÙÔCéG:EŸý°£¶(ˆpD€ñ÷²íôKWB !H$ó†À~wǼ!›2J$ „À$$žœô(!H$s…@"à¹B6¥›H$ IH< 8éQB !H$æ DÀs…lJ7!H$“ xpÒ£„@B !H̉€ç Ù”nB !H$&A ð$à¤G @À›û‘k2P+s‹üŒù=-9‚ƒÛ;þqp„Óâ‘à Qœ·P„wšDáv˜Fávw<•Hd!;)G0‡`„ÃÉá Uáæ±jÅ#˜ŸÒñ4N ‰ˆ![¸Ó=!°"âØ =©ÜР%šuþèªU«ð–ý9»ÇÐ>¯þÜøŒ¿¯;cM_ùv„“¯OÆÝS,î£×»é)$õ•bþܯ[·.>½‡Aã#ƒ ¸\‚!OÙÉÝÝW‡q¹|%Åǃq¥Ÿ¬±µdãƒö¤> ¦¯ áÓ=!°_!x¿*î¤l£#Gå!¹ á7ÌÇ È ¼0…Qi-Ñ7ã9n“ Øg¤_F H´R©DÊ<]GbÇ"J-LÈúL=OÄL¶x*Ò¶¶¶úé<í5kÖ„?i%H6Æ®/éò$†ëÊÙóÄëܲó…âE¬tO쇤)èý°Ð“Ê‹@0ÆzùË_î“ï85dÅçœsâ|Ó›ÞĽññ1µç>÷¹OyÊS‚DYÌèͳØÕ}ô£EoW\q…o‰£É`_)KD.LUÌŠýÄŽ|XÞ¢Èô°Ã{ô£-)a¤†}¯¿þúµk×I0ísÌ1ÒDÆ"ÊëÄO|Å+^áSÒGÿÁ¾>xþÅ/~K_,b ‰}9sOì „tíÏ$ÞŸK?éÞp 4f‚³Þ|ó͘YÆ´í%—\‚Ì~úÓŸºãEü‡…9öØc± (”aqb8)°MßøÆ7bijÏ>ûk_ûšðž"T\Èa•Ç p¬é"È0‹Ã–:ÇÇÂðÁ¾§œrŠ—^z©è|0«úÏþìÏäBB÷ýèG?þñùÈGJ?lbŸ b^Ù’.„—#9ÅbpK-] ýDÀûmÑ'Å`VTÇÖÄUø5(]]}õÕ¯yÍkܯ¹æš ä«®ºJ€Ç=îq¡ ó×´0wP ¶‹ycäÇÇOóÌ8ORA½˜UàX Æ#ÑQ/Þ½ñƙב‹Ï©žtÒI<ò“Ÿüñ¬XII$6‹ ‰ï >ùÉOÞvÛm—]vš'ù•W^ùЇ>Tâæ™Q¯ÄMžÓÓ¯\¹RtéH³Ë É”˜/ÏÒ)Ÿ„ÀŽÄRÇwÜ 'œðÙÏ~Ÿ¡Ïo}ë[ŒÎ¿þë¿fà2‚ñ¥Ä0b{⟈ÌÎ=÷ÜSO=UÈ /¼'n;ãÿ·w7/YmQÇÿÇ!ˆM'¢B‘I è@ˆq‚DCA¾€"9Ð|8QÐp`Ð@È5µ¡:!$þ%÷sï§÷ŽzžÛYgpØÏ~Ö^{Ÿu¨ïó[{ã»w=ÚÙÙ988 Y%–‘R¿Tv[[›!ÒÈËËË\é<¿¶Ù«ªªîÝ»GÂÊ6wwwã%¾ÊKÓ² êà'(^]]mRfFk¢ŠŠ ÄÝÚÚêèè°Hyf îïïÇ]>)i6{LôB’&?6 à?öÖæ…ý#@•b(âYssóÅÅE ùòòÛjkkNNN\š4²Ä/”ÆeŽÑ£1†áñññ“'O”h?¤œœœœššzóæ ”òöíÛµµ50•ü ýjX4···_]]}úôéñãÇ~ k_ÅlJ×XõËu¯®®VVVê×óóóX›ÖÖVj˜ýéé©ß~1°‰¼´FÂF \Ø[Ÿ^ŽŽÂ+°¿½½=«ÔÃÃÃÎÎN€|úôé÷ïßÁ _¿~ýÊí==þœ&R6.Ì( ޠΆ.LÞÞÞNLLLOO¿|ùA‡††ÓÙÙY_90;Dm §†SVS£ÂÙ,\² `Õ¦¯)ø±T6?~ü`ŒÄ¾ZÞ$®!œüeÆ3[¿‹‹‹pȉòœ(lÀ…½õyáå g°´2X0üõë—ß¾}¤†•ßÜÜ@M¬Ð)Òδ©T°-[`6„xË(¼ò¤/­ „ú1›šš~vg•MñÆ)BjÛúÕ†L5_ª¥FFFÌõáÃÀ‰¯»ÂΖg‘²ÓüÛ~~ñâ…ä¶Šk–æ”®õðcÁ˜=<ïîîê$våo•VIM'±ûàÁƒ°W…g4ûÄ À¤”RæðÒÛ9ž={¦í¨¯¯õêf+‚g~˜ñ£aþüyttƒmK/ß½Ù#D6·è³“ãëëë555ÛÛÛzö÷÷{zzx&¬©j‚صPó½½½Rèsss‘»¾Nòœ(ZÀE»ãy½eª5-ÞœíàÚ¾…®®®.ÈÔ¶c¡‘–pf&Ùk9k bò±TH~$uÀ•HEPb—E>‡‡”œ18ÙÛ~–aÖµv(óƒ÷ïß{âÈsÉggg>ª·bCIsKŠé¬v7»õH;«À¢³MM@«Ãf[×~Fx>JAµQ!…yË##PÌ$€‹yßóªË7¡­œ…@b÷çÏŸöPM'LÚVH|ttd+WƒÄkìÎb$Æpgj8ž B»––©ieÒážda$=œpn8Žr¥æK*;‚FÆn®Î••|ÕOžÆAS/Üvfl¾¯bZqÙºYýg…»þƒM_‰q5\ž²ß£$º½ŠËÖ¯ô8c?)|ËgxÖÈ##PØäcø¿vr®ŒÀ¿DÀÿD¸Ew²‹¿»ÆrÅúiSnI#³Ìx.ˆrÅ]P¤GÒ mÒ– Õ@nNÀ•qtj¹‰¢<ÇtŒü³!Ú<³T5Q¬-–qçŸ"W/Æù?£ÿ~F9v£-^ƒ+?¨ä¨ÞR\ Øf ³’çŒ@#.àMÏKÎd2ÒG ÷€KrŒ@F #PÀ$€ xÓó’3Œ@F ôH—þä 2Œ@F €Hð¦ç%g2Œ@é#.ý=Èd2Œ@#.àMÏKÎd2ÒG \ú{+Èd2Fà/-ÙÒf˜ÕIEND®B`‚rocksdb-6.11.4/docs/static/images/rate-limiter/write-KBps-series.png000066400000000000000000011362261370372246700253270ustar00rootroot00000000000000‰PNG  IHDR€àº³K³sRGB®Îé˜iTXtXML:com.adobe.xmp IYC5weHdWwCGcbFPMxxF FBMD01000ac0030000dd100000d12900009e2e000007350000937b00000bba0000cdbd0000bfc4000021cf0000571b0100 1 ”[n@IDATxìÝ€fE•/ðÛ_ç4™0$APÖ,æsƸ†eň p ˆsVÄ„¢Ôõé"úÌ«bZ]×üÜuÍEâ ;éýêžžKïÌ0NÃôtOOͺU§N:u¿ó¯SU·nG»Ý.rÈÈÈÈÈÈؽ¨íÞêrmYYYYYYI€ós555550È<JÏUf d d d d dÎÏ@Ö@Ö@Ö@Ö@ÖÀÁSÁ]¢§0¬úhbb"øG¥ú´’g»õ†„3e›ÙÚ˜™…ƒ®VoÞ¼9"•ê¶Ë?Ä5³¯·â9³`…Ìѡըz&qÂÐ@•JˆR3yÎŒ#oW¶™Äâñ£P*ØŠT–[©½ú±lU|«ÛméC†M›6UZUdÇMØŠg¾Ýèˆ~ÚUæºö °#, µlÙ²h5×ÙÙÉböôôD ëÐßß?[`[«Õ::ÒF§««k¶Lv-=1bˆMã™ZMZæk²¬¥K—†¬0øq«”È’%K¤‹¸£”ަ¯¯O‰LvðWWT|nÌ•)¡’¹Š*«YÙàÙÀÀÀv«#غuëV¬X¡Tww·@«EȯÓEÜ*DÚp»qãFÍGLŒ•+Wn—s•¨T€–¶ã†C¨¥"Ø*‚'i£^yتg%Vš£öD 1ˆŠ,Rvü|&8G¥¡=­ÛA¿A·*¥¢ÞÞ^ׯ_O½âUG@ D©$×W£\”15Šçš5kPŠ`¥þ¨'A÷Y‚R<ìës¬«êpP½¶c¥¢+Á£…^»¢R5µÎµB_·ªà˜B_’ Óê+¯¼Rf«¨`Ûè+]çÆP]¶KŒ^: „–âáÍ„Èñm9DÊè÷Ûo¿ *ÂjÇ*º¾*rúnÐ@àÝ ä½± öE³Á{Ç0‰‡ÅteqØ>fMXŸ°û³Rã‚-씼0‘ØÎŠÉ.$׊0,#ÁXÀ0‚DbúI SeÌ„BÜÒ†¶ƒ‘À¥h&fì5ŠAW6 W\™!:œ0ø¨ðc—472\uÕU¸x]£jâ‘–x¡[…ŒÂ¯¯F¹(c„a.š' É&`e~´‘ê@R1¿M-ñTl· äâF ®*¢Ò¤ÚCþ`ëVpŽÞ¹æšk\iX¬' Î"„‰ùa·Û z<†#ú—$êÚ1pªOÂàZò¡(í¢:L\åâ#²Ý#ñúèã !=Ç0w¦ÎwÀ0gÍ‹2ϋڥáö1èì#~Ç;Þ‘M16g¬C=ôE/zÑÅ_Ìê­]»6°gVJa¼^ñŠW°ž2”¬VaÓgÅgW3|Ll À’¾à/ÐjÆ4l.'I: Ÿ÷¼çxàŒ£Vƒ’¿ímo£œN8쑸½ï}ïûÈG>OöZPJ(õÑ~ô²Ë.SãË_þò]% •pÛÿýU÷Á~Ðõ¯ýë»ßýnŠ%§­aÓkd ðë«ôÄþÊW¾râ‰'Þæ6·QVw¿þõ¯ÿùÏ®”V~j>]pÀª{õ«_z}Ì¥#{ñ‹_ì™)Õœ&$î«Hâñó´ 7¶P)ý+C7¹ûzìcKNdø“Ðõ ƒzá _øûßÿ¾Ú¾°]©<̺æÇ?þñ¹çž‹}‡ív‰ƒ€&ãIðÜë^÷"’cY—^zéêÕ« £9dxÇ;Þƒƒëcµ]zeÿó?ÿóø€V«ÅcF½>&9}ž5à×’CÖÀ\h€ `kpf˜uˆÂvGE Ö}îsŸÛÞö¶A×YÉàgó¦7½ÉH_)F?"³â0ÄBÍtÀ÷¿ÿý£)=¸ýÈG>Â2^~ùåôD¥Üýîw×X%ކÑ9äC^õªW‰°ï®‘(®øLÌôCúУ>ZÊ® š@™ø:Üóž÷|À·j!ä›ßüf«Œ;¨‡7¼á ˆ_ö²—ýùÏFùÕ¯~õw¸°ùáè¶ê,”‚ª*ž`µŠo!X•qט¯Ò·ŠÐ|Ecvl!å‡VUw—»ÜåÞ÷¾wäjûw¾ó]p³›Ý ±Û­xμUûãÿxf6a&ÁVñª ‡vØÝîv7¹ªˆ^®îñتÔV·; Ìccä#!’ï¤T[ñÏ·»GÙf(rØõð³7µh¦”qáO0‚|…‰3 ¬ïíøƒÑººcÛËl…ð aË”bFƒ³ºfËdWÑGÕ\(.HcªY ¥VCÏ,þö·¿¥ŽŽ”ÿøÿ8ùä“ÿô§?ýú׿FÏ…âxýå/yЃ$—߉¹Äðöܪ…§uÅWìÀó»aíÿxòãñ×_:.æ¢+ÅÂ~œy×Ç_Ó4ÄÐá?ø) b9æî¯ñêàïÐRý.Ä£¢ƒ§f \‚UYw¹ë*}f„ä4g¨Úð…<PZ‡‡øˆ“Š¯íˆ¯}íkÿøÇ?ê·3yÎŒã¯vSxêP4p&Á¶qÅSáyÅR»nE©áÑË¡a¿šm‹GÊè/¹ä’}öÙG?i=Hªxð®UNŸG \÷4Ï£¹êŧÖ%bhÂx¹²M3 AÌÙuÔQì¯æŸ~úé&âØ&®ÒÃþðPŠ˜½¼éMoÊ”|ðÁo}ë[eIdU-X†^A.KÔeŠ’=5¿wÒI'™Å½ÉMn¢Kg.êói°:ï¼ó0yÍk^#×­ùÆÏ}îsð%«Ç.ã2¸}ò“Ÿ 5ÝFs^úÒ—ò •:üðßö´§áCU‹ d:µÝ|2¼aš± >šOHqó±Â~ô#q/¼ðBf×Ô. ò“Ÿh¯‘Ý—{ç;ß«SO=õV·º•ê>ýéOK4cIlòü×ý—²Ò/ºè"édÓ:"AWÄ@.Ò]Pzä‘G>ãÏ@I>–²OúÓe‘üâ‹/f©¿÷½ï‰Kå+_ùà?öüÏÿü[3Þ%B£´QwqÄÊR~¤»Â6WmyÝë^§[ïz×»Æ$s´Z¥ûØÇ¾øÅ/þô§? 2Õ™s&ªñÄ'?ùIºÒÑѧ®F$,(ŠÀüK’2&ÜklÁü=îq–‹&$‰ùmY·¾õ­ëî'<á ÔH±gž ”ªúèî`ަê/)Æ d3L¤Ä:Èb!nftžûÜçb¢ß“(ŠsO$Š¢x®°"ƒÙਠ‚U¥“&žÛ œzÇ•rt%n–ü(HB!o|ãõ”Þ×4OÅûßÿ~”~& šŒÞoç—¿üå7¾ñ UÄàÉÏá–·¼%zóü矾ñG4\+LQ˜ôöÄZh„â'dù:·ð@ä50`P‚-ƒÂ`™e)‰ìø±Ç Mƒàq{ô5± 3¬2šðÉ ˆsª€(3-îWÁ̉°ƒâßþö·ÅSÖô²Aÿýßÿm˜I ',аJßüæ7Uô›ßüæ©O}ª,·Äûú×¿Ž‰9mdÓÉÊ‹€ Wk™¦OSF¹9Vî‡?üaq¥X1b0¬(aÀßýÝß©úW¿ú•"Æqè!Œ¯RnµÝìnb×n«ˆeTüïÿþïŸùÌg†~Ž;î¸ À„‰Ô:f×Ôh?RÜ BàüÁ[ü…/|Áú(¨SðSŸúTYÃô$6Î\:º úÿ÷Gsó›ß<ôüÙÏ~ÖmÐó®p#0µL‰ð•}‡©˜3Ó_úÒ—€ .—T4/‚&,¸á”Û˜^6Ò’è–žUWƒ[qè¢Oñ‹_sD–tÄ@WGƒ4££(Y” KAHWVoH‘Ÿ~ îV…,j4>0šÿþ÷¿¯ @(²<€*º5RÐælÍÊP‘ÄÐDȧjâ‰+bäah¸ãö[ßúD|×»Þ%N*}ý…ì”SN1˜°Ð Ë8†`„Œ&ÓR0+nÖeÅmHå6Æä'´m?¹§Î­§EÓ̑ЃÇÏ|2ª°¶-bŠÏûÝï~âz׳Î:Ë-‘}üã÷+ ˆ5ò¸Ò³MpˆQõ”Ûvƒ¦r»¡¦\Å^¥0ëšËœŒ Ûokˆ»‚¢´àƒ2ÜÇ@kÛL˜>šÊ\2O,³Â5ds•…Ùظÿ÷ÿþ z)¸1ļ(q` vN›¸€Òígœ!NT¦'€œ„"˜Ã*ñ@M–—ye¥¨¸& çNwºS,ã…¨‘Ž!7‘o„¤²â ‹Ã?‚977Üh5rǃ@ÄÈCYÕ…eRXU·¼LËÀT!®9 å´ÓNÇAP\Û¹ƒR\ð~ík_SÜèG"åßþö·wˆbàòÀ>P:†£vˆ@ˆ¨Zÿøÿˆ&‚AÉCòñªkD˜rÅÃâY4_œT¦Fq¦p aê£õ¨àì!Q ê##ŠxðèéJq~¤‚nå"‹}a¼ÆG?úÑQ™©‘óç6 %h"—–ö°‡aE<œíBàP]Ðp÷M–D0 PW©öÆ“LN;æ¡R‰°ÈJuÆOÒ½´Ëî­è\)Æy†eŠW†D¸î {NBçŠüîw¿#¿_GHn „6Œ0 SD zB~âçC¥æ¢ ’^ò’—¨+~†Xy*Ü¢T¤ú9hŽVKªþŠÛ|S ä)hÏa»^ÕÊe}Øe¦êÒû“ˆ›Î" ¤.µ#&y µˆJñ—¥:ÒÊ5€PZ¨ÂL{4“xÅC< æ¤èSã³r¸!!I4˜+®¿ô²`L׋xªð(Za¥`‹Ò“†á—¿üeú·zbøBç‘exG*¸¨"0›MTJ8¦fiØÂ&³ 4¦u!?ý¨Ú“O*‰´ŠIÔ4z£FòÄ< MÓq K‚YëQDc‰¤ˆG(ºRb GDpDrØ ȼ”¼7VÁ¶²k£Æd0y¬ƒŸ=$`¶€ ¥ÀÆlkÉ9çœóÝï~׺;Èuc˜0†€a¸Cƒl$.‘Aá.ó–LnswTǰš‡Tå14 “8cÊTÉuÌÌ“t¹&W]Aad1FÖŒ s·Lž*Њ L¡8zcF9š€X£HHÀq³Ž€{Ê*6:®‰]xx<]pï i)ù!±1/ŠkˆJ]ÓÔ¥ÑÄ3n™rvüÐjØMÅ›t%'ùéܕτ†&I«98˜§µægÓA1Làó†k!ø;OëÇ*l½¸E#Eú‚†£æäG¯C-@š‚–ŽØ€ ®³ˆa‰4B!Ò#<µEãúbá W0RÑšºÕMQ©DüéÁ­6R8<<¦âM¢xœñ «ìï}ï{õšíidƒúŠ”H*k1[ë`¡Jeá¯É®Þ1ƒvñ< Ûù NÕ˜„Úõ‚¶Y\—éî!ä”~ñÅSQ<“E€x$p @WC=ã¿GÏ?‰ñ³"¹ºv^¶Lyã5øÆë0sØŽX±Òx¦U7f‘qdüìýÂýò™NF–a ×Maìp‰6Y¦gY f€Är±h 4£ÉL0.rmìbøxV@íaVœ?ç Ñ1°á°FípKu„an ƒuÁ& ’½–ÎV)Ϊ2vÊJ1‰J MpKrqØ/ۘݪ…H %91TcØGöN‚G<âˆzŽ0²8+ÆöCýó?ÿ³D²)†[íªÆMY*Šæ•¢ÔE0m‘…Îñï54¤…:²è™’Ï>ûlû‡‘©Ñ B֤ͩF‹0—N·®±ûZ)0 ”, yÕàŸ>iXGèÚF,E]ˆí’3²ó(€A»YoûÛcw4E¤ÀWeC9Pœ¨ R D0T#õ?¨—4“®REG Ú£v"\M•OÒ½Cdš#NWjÔ ‘‹§Ö©H-TaDhbß‚k|`§i …ðeq  xMØMʲ,­EÆUjñ¨+EjÑ•¤"¼ë¬B¨]0tKóþÍ™gžÉV2¸öìGYîŒT;ÛÇ<‘S$ü0m †Òµ”‹cNìÙ†côÀ÷b—+€aÔ»â@PdüÁ‹²Ãˆù#’ª5Äölv߬ihOK'h²hða7‰!®\|^Ð2O«v N0®°ùR[lk]ˆmôƒ›ÉUðËÚ`˜ž Sl©%0ú”€Œ¡UA">Š“<ø°éjw«õ‚,)3†A&~u“±mè8òè&j¤Õ€=JÃÌ‹h A† QI¨kœøAª``F½) Ò R)&4ãJ]V4¤<ëYÏr˧w¥Ì Ä´¶Z$ÆpJ¹V“<ž )‘îS¾lñoÿöoRblÀ”Bl1U/›'’òØÎÍ¡4t³¹ÁDŽ=\zÍ“ly%vûcÏLª`§CÌ^@†z=TD¥(ÀI{u“æÇÀNsë âÙaG…½Š„AC<ÂÄN=r·)K~²èÇÐC<`Ñ;;-c&¼ ú²æBÌ ¶ì£+|ò2‚ˆß9g“ˆ,ž»lÑÎÏ.‚RÛ[`ŒQ|HÅjp/”b¯e1ÁÒÇ;0L¤ä²˜ˆz^ z` Tà7ßN:1ÌËyÝV\‘ T«MkÒöì€"éÄæõ>å)OaÅðÁ€+F9Àú5”èmóá²nn öÔ®ñ„žóœçTé׌ ðý¸â’¡¿Å-n¡¢F«ck0!©À›[@ÂŒBSîläÚGME …Cflu¤3¬‘KÏ¡¥¸ÚnÙq3Æ”©Æ  îå¨Ðƒ!ÀF¬ïLû‹„Ì6NF QU„[¨Š ¬¥:QÁJóª#gô ôÓÀ­Š¹õiHl¿›á”÷»ì/‹â„ÁJ0/°DºJu“I…ÌDqˆMÞzÒËÀɘO-º$Ã0¥„Çt·‡Ç&„'›†Èñ`XR5¢sí‡"€lí‚ö䢡´!Y]Æ(œu·FžgêÒ¿žX³)ÕEâžBzÎ1b4à1v¤¥ŽP6^ŠÓLò—¥ÓþdéýèGE 5Œ½”òB¹Ñ’.ÓãÁÜ”½‘-flw'†[Õi{°¢UWœ#1_w¿ÒØ'‡¬]®€IlÃØ±}QñU]L[›#•ņ2|pAY™Ké“ N¢ë_qF& `§îg?ûÙrÙ&´âªc¾e‰Ç5Å-,‚˜ "Oñ¨EÙ™”!Lu•P¥„A¯ngFd¡$VʇgA­˜Y„`A€YÑq‘à&ØÕúßÌR!XЋWYÑð¨H¢ ‰Š'n‘^1TKHÎCÝVZôÔ2¸"ÀM„J£Cc| ëg6?×h‘ Ì.Dí•Jƒ¶UŠqXT§ ô(¹ÚÂH¬4ŒÏ C]\QÆ3¹QuõÈUÏv0¯V½²…2C$µ#£L‚Eë¤WÀ,ZU]0GÕ+T ㊌Àâ®AYå†äq‹U)±J¯¤­ŠäÈî×@àݯó½¢F¦A;Ãâˆ0@~ùaÈüò‹ ˜©‹Ê"TF3è+;ÎR xVF$Ї ÷~…‘~åyk“›o Wa^Y@‰*­LóL‘*žh‚lD½U“EBNñ°’U];ˆTLªHXÞ¨+T&X“Iˆ9ʨ.ô:¬Ú"²míRªÄ¨H©hE•Áª²ÔUzÐI„$;S‡,Uô"(Uçªv£i•$¢q[µqìx„ÜÜh Ó'LÃZÙÅMˆZ@¾H¨E$樃?> Y\C3jêÇhKE£ ÑöH¡ðˆH¯zGŠV‡þÕ+ÝU¢J)$Ò+ñdQcð”X53غJ¬ô&w¦fpŽ.ˆ6VY=-s~y0ŒÚ‰XÕúH§‡PiˆZÕ.£¨bfzŽïf dÞÍ ß‹ªc&â÷¿ƒ63+aY¦WöÎm˜-¶ƒ!“^Y¢Š[e× ¹ÈDìPµ÷Ä´§6Ó¹64¡Ê‹/¾8øã)j©¸UÖ«pMЈWæ8ÌzزÊ^cX‰¬Ü*Ü*žU-U¸[ܤϔŠ#«*zˆD×0ÖU®â‚z±b‘Å#K©(èVd&D…ùƪª—o1—«En·­‰jÁ³*.¥jµR!'‚¨ÑóbÈR0j¬ ±Ñ€ÛjS˜w«¬Ð‡Ø®BÔ(RU$lOe%DïÄ3&9"Ñ:|¢”6†$Ãèt·²6ú(‚ƒÄh””P]T§êªx¤TéU“5­RòLIP† •º*eÅ(ôOæ™Ï[Õê™’(B6¶’§*K†(æë® ˆDÆÍà %²¤[׌¸2e]*U]‘Ž¿8Ûçªê¸­È2ëpQœ]&<&L|ÿr«0‚).þ(NF·ŠÏ¶B¢—Žý(X¥`NZlƒƒêä2šÈЋÃH~aðT/âd‹”¸ ôŒ­Hh/ŽÎ4cÑ™,U`>3[·Ôˆz×&º¦Rµô(%¢HÕ|·¡‰Š‡ò£,HS/‚™af‹@…ñÌö ¨´â/E_G Ñ/®øÛެ¬2è”3RÈ©öh{°Y©,Åg>ÑÑ0O×Dï Á°’¹Òjð1z j´4Rhøg2¡™J*E‚ÕLýT²EK‰a]¢ßEè0BH*DÎÒñ¡(òÄkZÕ³Ú“N™Q¤z˜50ú·ª7Gv³2ïf…ï-Õ1LÌLÓ¦åaE¶2:aÃRT4¡©°¶•Ö2¦Á!Ð(J1Oh*CéVíȪˆ‚RØ5dØÌŸx˜¤Š?¶a(⓰銣g÷+zœ°€"L[X4ͳ8“r«8”ÅYDY™ugÚÄ­…CE)N9›IObTU+îe4S.9éa+IðTD:eÒ¿j:Ff, e)ÂÄc.¥¶ª/Bo¡´ &@EF·8#½©<@uቡ”¼Ð†DLÈ=Ý·•¢(³‚aœU­”J=Uiqˆž­ž+2HD¬E8‡äª¨éñ¤IQ sPD:eºÆC.ËíV|É&]ÔB¥È€ƒâJ!RÜÆ¨«ª+¤ $>•lÑ„`J–E!QVDÖ̧"µè¨)EÉÁ6_çQ€çQù‹¼ê°qamà³2aV¢å .«$ÄmeÝ2ˆlJØz·lŠ[Ö­"–Èöá `„­œiþÔ¨.VF®³Î$…¥SP]aÇѸÅ- Y°%3zqb€ 4 VÌ #Å­D4a邳[òT¦ [Y3CeCUA-®‹­©¥¯ ’PÓØÍ`®^mÁMp&ƒ"zdBU…F‘DV%¶,݆„‘˾‡9–K'Ú2Sà º‚,ø+( ûï¾ûf˜'ïÄk£A ‰˜dã7…&%†F2A)VΉHœ¯äÀ°*]ÀŠ¢.2lu0Ól•žé³²²²²"¿ãï8øàƒÁáqÇ÷¥/} v‚d9¹ä’KªóÀ¡ }³ |Â]·úЇîr—»8ÙÕ×è~ó›ß(0 ›?ýéOßë^÷Bó¸Ç=C®muì™gžyÄGÀlŸ®ûÆ7¾¿à±@ó­o}ë¶·½-DЃ䛲>>•¡oœG˜¬¬¬¬¬¥4µºÂóŸÿü[ÝêVßüæ7 õ²—½ÌgË~ûÛߊ›XþéOª©n9©!2”ˆ¯vfhꛯ¯zÕ«Akç©ÊýÀ>àöÜsÏ•òš×¼CŸmRoûÛ!±¬?üáï~÷»‘9ü]–‚>AêÖÇe$ü’—¼èþîw¿“ì£l¾f d d d d Ü ¤ƒÅT0ñûÉO~Ò×NÌýÌùìoyË[BB(kòYÜ 3Ÿúš:D<ì°Ãó˜Ç™ô‡=ìa't’ÛË/¿üè£~ÎsžcnÙ­ë=ïyÏ äãrpŸþô§GE¼ÛûÝï~'Ÿ|22‹Í7¿ùÍŸúÔ§Š êúû¿ÿû'?ùÉh"%ŠD<_³²²²²n€Ü4ô‰O|"w6¦‚}#Å·ä8£Â·¿ýí;ÞñŽ"¦…¡¬9j“̾æ+ܼÛ¾ð…%Uú\ùêÕ«¿ð…/ÀKñþð‡~ô£A¯"YóÉ|hÈ­"îòø@cĦ©¿øÅ/b¢ŸØ|5…º•õˆG<âûßÿ¾Il)üï-ªË׬¬¬¬¬ À•Çi}@{ì±–xO8á ™k×®59l‰þÝéNwzå+_ Seý÷ÿ7h4qmº8(ò‡ÀÔßÿþ÷]t‘”ÛÜæ6¾« ­m¤òe:ìK^?ÿùÏí±âõŠs»åpÀüé_ÿú×?ùÉOÐyä‘*Uó?ÿùÏPYEöddlsÈÈÈÈȸÁXp¬%üÔ·½ímGuÔç>÷9므ÐÖ'la˜ï{ÁÀ¿SN9åÍo~³íZè­ãrLÁ$pu+}ëMnrq)èÀu Àøu“Ì&·qãÎä#ƒÁÀxÕªU Ò£$‰‚¶hqÄ(î*‹‹,’CÖ@Ö@Ö@Ö@ÖÀ ÖÀ‚Y5¾Â<+¸gŸ}ö3žñ @( _xá…‡z(X=þøã_ÿú׿üå/¿úê«a­Üð†E8¯X„Çoqd¡/¸…¦¼ç€ê(…'JX«8qemx!’«²®²„NK¹¾P±5é4ª÷úèGz4еê…ÅÑ®m[áˆgFVYôý»­"EwW]}4‹#½²Ñ鋾Çós^=·aÉ)$l{•¾K"]»„Ë `´‚&ug:”Б‡ Í?ÛÛlkô¸råʨˆYhSÇÿõ_ÿÅCuë‡a :œ`o ƒa+ÁrýT€($6½Œ>~9|YÓÑPYY8Íë I8Ó€ÖT³º >ûRI‚x“6jI¶½â`òœ´êâ+Ĺè¼mkßý)´&)úQ·êÍÅ Ãz6Fxž«Ñ[ŒÒv¿òç·F?èn×øYUÃü 6µ{¼µÑs‘Eü‹®´—Ÿsªû# …N7cʸA“H¯tuƒ#óÀåÀàx¦5ÀcÍUðf#46?lZصB_¨\m„† ì yfŠâ\á»Þõ®" îÇ?þq èï|gü-ã8¾ÃÆ«Ûßþöª¾Ç=îkùË_Ú‡E­l‡Õ_µ[ †µv~Y?Þo¿ý¤yIE1MHÄ×§qµè-kƶ’Yøß×Wdq¤ë>Z…F¡ÌÅѨm[áç§™ñTÝjÌó¶ô‹8ÅÀ+æ“4¿ú/Ööê_ÍŒ‡\ï{+rW™à…©±üœG¿°f,¿¾7pWصý5oSÐ~±[=Á~Ï,št'`h$ôuÀ:üðÃM;;%CÖ×¾ö5è+¶}ùË_¾Å-nY:è ÄŸÿü祃mã”_üâvW¹™ÞPúãÿÈSqëWdw´RpLþÝßýÝw¿û]ˆN­R¾÷½ï=á O@Æ!Vêë_ÿzŒ”U»íÓáÑúùUÓˈ· jÇJoúj‘GÙïv±Í÷Œº†… C,¾ˆÛëð$xbUBÛkcwÐ.]¬áº»|Äc°ƒ"{t––j²+«¥!®žEüÓÖ¹ù9×Ñ ÀÄ*cJô¾”˜ý¢Ÿ]æ÷Wb+c ç8»·|VéÿøÇ5ÒI!$ŸÕndêö]ïz—¬O|ââ~ ŸýìgéèŒ3Îà¿õ­o•u饗J—kÙ­wˆy´¶tq¦MG¥ïyÏ{`-sΧŸ~:2Y±dë]d¿±~ðƒ<æ7¼á 0õg?ûn<ã`+¾Ý`!«ÈÛíR.šDWµÅ¶êÐ*qñE€G·jWü2«Û½$¢£uwÕØ™A•¸(#ñëö ,ÊÖÍlT~ÎiCwó£ªßû®x%HWÁîêõVo5%%éý"[Mòð&O;í4'`p5 6a*˜ä§zH;ñÊY•~ö)Gá1˜„ÓvG;ZÒ4u¸¡öT;ÒXvV;]ë˜cŽQ)† ŸuÖY—]v?û#ùˆ–Sթݦë¿øÅö‚ñ}ŸùÌgz™8´Dl•^ß\„Üh‘VЍH¸û•¼jŒç‡!æj2µx@- èÊÝPûî¯B»43V=â“¢ùFZ»_˜y¬ÑÔPÕtíÙ6ˆg~›£ªý„£ñk5+áá—>G5Î/Ûüœ‡þ™5ë‰lZ ²ýÒa®ßU½3oì9Ž6h˜0³=©¬[Ì Ëò”Ë+®¸Â-PGÖ4/ÕÌ,=:¬CQ–™`.-ͺV(è–íP7¿«J§SÜ!OUÖP@œ 1)]1Ÿ)vÄ# ç@¦HŸ)ç¶¥öè”m[Wésnׄ6:tæc¶âE™µmoû,ʆWbC˜…êvQFòs®[!Bx½áYíÀøß€gàáÄ (¿KгhBü°ÍEÃE Oyœ(#žq¨‰²Ô³ƒÀ£#1”q;3"nŽ,Ìv«xlÔŠx qeVBï(U²mCˆDUŠ€™vu»-ñ"H‰–†–ôŽÈân¯6zªÍžRMèaôæÎ7!Úëç ∇aç™ìA”:=ñÛ÷°K•v»µbçEÍÏyèʳOu¸j‘H92;¯Ïë£\|}Âí¹éÑgá‹kÅ^h ÷ܾ˒g Ì¡6֋^ÔZÍ¢1ULu=E­³ènN6ÚÍ¢«¯gd󺡥ƒí¢Ù*ZÅ’9&³Þ 0æ×5ì9c®Û(ú·IçÂäßnw¦ÈÈÈØýèï.’ÏÜ9:2V+ºzоޢ«Vt¯[»±³«¯³Ý]L¶††dÚ¬s|Ëa>»_Ì\ãîÑ@àÝ£ç\KÖ@Ö@Ö@ÒÀÄfçôÕkÛ]´{§Æ‹ñ ­KW67EgQï*&kE«{ÃÆÑþž¥Ye‹[€wÿæÖe d ,$ ÔŠZO:ȯ>Ùîh×:µþ®Þþžúš¢3%—UnË—®j´çÆ‘-íÌÿ€óC555°»4P+z†:Ç'LCwÍ¢é KÁ×݃Å1w8®p~|GQ__´ÆŠÑ‘FO­gw‰•ë™ dž½çZ³²öB ŒNÔÛ5w©¥ÓlÛE·»4Š.­&ŠÕËIËÛ‹î¡bb¬pÆø^¨¡½«É€÷®þέÍȘG tt•{°j^Œ,ÚÞR¬ë/NN°¿Î¾Âê°36Ë‹´I:#ð99ÐÛÏñí´ÂÛ(ßg¶Ç:V ­ºvͺ¾žŽþb²oô_¾òá©Z«»¿¶k^vÙÕ6¿âÏékH€ç¤s3ωZ3Ó¬=\Si‹3Xíèô¢‘fÖ}]›ÅIǼè쯾Ӽtçk ÍŽF"Ëa^50§œ§ çµosåYY{—,óÂ[§9´ÒX®e`µââËþ˜"¦ }¹Í»HõÑfç³2/æÞÍmËÈXPð†o9«\ú¼úÂÝŽâ®÷¾cyüF)oWQK«Å{×>TOíaòôœè9OAωZ3Ó¬=\í‹G5gL–ÑE»+v`IÞtåø’CúÓ-'¸»Õî˜lîbxoñ/þœNAç†=þùÈ ÈÈØS4Ðáô«4ßœ¦›…“8ZN¢ú‹%7í÷oË)Ñ53ÔN.÷K§¼­òô¢íÚܰ¬¬§–™å.GN¶ŠÛ­šE˾hn0\¾ví&kÀ5/OMèÉÖ”4,8ù³@»TÙÞ¥êÌ̲²²v ò¬ä§ã&5 …}Ñ°Ö q­ø–€ÛÓß+«Ñ*:ùÂ9,j ä^ÔÝ›—55° 4€ýÝ¢æ²å¯56Q_}àÊf»ð †vGáÄ&«³}^P·ë…ɼëuš9f Üp ÔÇÜ(F¦Š‘¢ulCÑž,šõbd²XS—'ñúb]:,©³¡ÇfÚö( 8ݰ¦Ï½>þ[t÷]½Em¸¯;í‰î,–-LY݃½HrXÔÈSЋº{sãö< t¾ÿ]ºG¥ì6ÊÚ“SKÛØuƒàÂQšì²y'‡¬¬=T€÷ÐŽËb/V ¤]²1Oy] c¦r¢àûôßd:Ûq ÙAºNG9–5°çi ¿<'}–ßžµî LM*w£å+(¢íô^¨-³÷9ê!·=àèâòC/ûëÕ½GNv oYûÅož3}láÞ ™ÜƬùÐÀœ¾œxNº4ðœ¨uo`Úò¹º–^N¯ÙåYt™{îp(!xSqƒß{ÎwŸ×pR’—IØà½á©Èmœ? Ì)ç)èùëØ\sÖÀv4¦ Ëã‚Ó?1ÛsÒÁÀ×Ýýb íÛIÇwð—óO8)*‡¬=QyôžØkYæÅ«zÑÑ„­Îj€¬]µv:'É +­±zrv÷)6¬ç 7ëÎj˜,Ú¾›CÖ@ÖÀžª À{jÏe¹§L=;¡Üòœfž·üÕúKÏxªèîj=Eg¯SýË7”§r«²ö äù«½¢›s#÷ 8¨¿ÓéŒ$W¯$9Ð5ÁrÇ ¨—Ce·'7uôAgé9d d ì©ȼ§ö\–{‘j nÿè†Vowx¸³˜l–ßÑ)F68'‰g¼Â,usӺΕ‹T ¹YY{ò.è9éä¼ zNÔº70õµºÎ¢Ù“vAwyÉ_éè{ÿ§­lßtêÏ«'êµk‡/^uÓ¾ ë¯þò·ß—¿V·7<¹ó¨9ÝxNz6ðœ¨uo`:€íqæ`0ìÓ웜ƒUœúß;ý+÷öúïèµÅàò¢ð—CÖ@ÖÀœi`N8oš³~ËŒ³n€:Ç‹ÎqÓÍM_Çéª]­´Ú,Þúí*~sÉx É>¬ÁÕeÊ ¨"ÉÈXȼ0ú!K‘5°â[KßÉñ¥Øô6p­Þô²¯£°ÌG;Ç¿£8üVËZð¹Q4íÌŠw†³ê²²öL ä)è9é·<='jݘšjî,Æj €û‹¦ÝXeBcróúîƒÒ’ü,cíØÔÚáNñ¡{ƒVr³æKy z¾4ŸëÍØíàü–ç?· /$¥×Â!a`¸Ïai1xy19µ¦Y¬èiLWïvùr…YY»L În4SSS}}}n»ººÄ[­–Äz½ÞÝ==ï622244d¡uëÖ­X±B¼"k6›µZ-†0Xõô˜ÅK¡Ýn» Vh:;;£"YØÊ'‰ÜþþéO¾E]“““½½ãÞì'-ç°çkÀx  vƒÍé=)ƒ×VLÇgû}DÇkZÃ9½ôœvšµ0*¿ÀØ®BÔÚQ4˻β€£°Ué¯]4äxUÚG•îô¯âåkÓS›{ûж%t,º|¼1$vç‰LÊŽ¶“ÇšæúÓˆg¯ÛTÞ,FBÿIÕíRùAè(]»6 ÷/õJúÄÈÔ£—™÷ª°×yÀ€VC\Wèë ‰%B¾ JÅ¡ïøø¸,0¹i“¢ô…£›7oºÁ²Ê L…¾UP èUŒŽŽFE7nÁ6ÐE}Ñ Žºªâ²·˜Üꯄƺ#‹ZGBO?¶tr×–ã3oŒ>Ê/cÞ~ÞR±ÛŽôíãéP¦Çaµ¢î­.’T]0xj³¤uvԌԷ‹ŠÏ~X'cQÖ‘*goˆ´‹ÞV:ã´·hS—ˆk©·v×ÈØÄ`ÿÒщIh<0˜\ŽæT²É9ìB ,8@ò_+¿òÂýöÛÏ.N¯¼ÞÊÓ•¸~ýú%K–@Üðh¥Œ¹á®n!qà.žƒƒƒ Z¢\5Šgµ@Y‹úº…åAŒ«:ÆŠì à†måU‹ÿÉYY Q¼¢é°žÒÏ¥ ˜ÈmÒ¶$üý±ÁºtP“']úaüfáq™]09ÜuµÒ)$EGóÝL¤ˆ::‹žÉ±F_orr)í¤N¸á}"©M{À¥8ÿË–’Ž6©q‚UŸÆþ{S¸n"º0®egÐØ–»tyrr¶Ò,zm1Ý,¿kØó™xqãÛ¾åGuã9í"Ú VùÜVÍ~ûï¿¿DàêzÕUWI tÍš5"ÜYj–/_@ wÉ‚ Rš+– q¯¹æ€ñ¤D€ ªÍZc:UJÑìéël·[=ݨêãÎ4M½w¦›?À±ÕŒéWDü´n<Ÿ]Æ!æ~c¹€]×¥K—Š\{íµU5áƒÂH¹à9ÒÅEd!ÆÁ’0Åqš#=ÈÀ9WúÂQ8l›v†¬Ð7f­+ô% ‚ Û°aC  /æQc¾f ,r L£ï–V¦]K[p+Ìv\·äÏþ_»¨ü•A]ñ[¥R”Ðd¦Š·ÀEBÐ3QO?e?Êf1võÆ¢ÝcY‹fc‹Ç\‚JYÇO:5$jM•Âc#‡Y¢øž}ݘ··í¦7 I–tS­ÞNÓTé¶d0sØ…ØŽþw!÷À :¾â¯¸ýío/rÏ{ÞóK_ú’ŒŸ+W®<ÿüóo~ó›Œ{챟ûÜç'P4ù¬¢w¾ó~ðƒe=àøáÈé”ȯ5ƒ}ÁÜêV·â?ö±ýÖ·¾€»«Þ÷¾÷Ýÿþ÷RŸÿü竸‚8Ÿ{î¹·¾õ­Ý>éIOú×ý×eË–I æ7 u¹HÖÀž¦&ӽХÇÄËL/IE|‹÷Ä–ÌÞœ”~i ¤ w“†$•ï_ùרº¼û®ñô—Π㣕ëÁeér"µ(Ö¯;õÝ#i·6uµW-ë:è ƒd™|Ž‚w¹Ë]N<ñDqì ~ÎsžºÕ÷¾÷½?þxYRV¯^}Ê)§T»}ÈCò´§=-°yß}÷É^L †÷º×½¢‘²ƒ+V<éÓN1”ÄØ}ÎÊX¸ð¼7Ú ãÕVÂÇ©v}¤¾y¬µy¢=:Ùmnh´ÇƧֻú!NNmš}C€§²Ól¤êç]þ5Ç&7•è[Bo«>²nªÑö5ãí5ãíkGÚkëíñ$x€¸×¶Û×´Ûm?í¨W¶¯N·,Ѧ¨aC{¾´›cÍ$öD{<”õ»ŒúDZR¦G̳oÅž[‚ù\GS­öúV{íÔÄšöøèô˜fmûq•¸ÜÖ@IDAT·yReêi76M•0¼ç¶tG’‡¹^»vm€Q¼¹Ê’›|Ýÿý¹dâÊSV…V;b·Óy³Ÿ3‚»"hF´Ä¯P±$ùãÿx­Z^2å \µŸ'zõÕWÃHÄ”e)÷¾÷½ï…^ˆ‚¹‡?üá`O¹ÇsÌ×¾ö5<•¢S3ϱ5ZŠ g¥Üòhe=èAÚgŸ}¤c¶¿ûÝ杖cMïw½ë]«-]êúÁ~@û±L¥•ÀÛÄZ²¬U«V¹*¸]²œ˜5°Ð5¢³kÚ ëeßî®>Ãäô‚7}Ò^Ãf_wï¦õzO÷ ÝÃTN——i}´ŠF_OOijkÿº¦ht ,MÇpýc펉¢£«j=é‡Xî½zÜ}ŸñO÷zÎóþñÍÉ©'ÜçÔ“ñÚç>ýU>uÜ}O ›‰þú—Ë{jv¥íÓ©šë¬NTº×Í?§f·:{{†Ø¿µk×QfwÏpQH“З§õåµCRœ’›¶›w·6Ï^DÍÝ•ýÇÜ*'|áÈy}&6úØ-dd1 *…ýG{ƒv‰0ó¶¯R‚:!š#qͽhó›ß¼øÅ/uGu”Iéßþö·U\eQqÄ@ô^yå•Ts»ÛÝN"z·¼å-Í3Q¸Ýö¶·³«hùÈ#4mƒ4ÄJѬÍb°mÎúóŸÿ 2• Ù\-<ÿéO2,2›„Þ"vEPE@¯©lò“[ÆW49’5°§hÀk´ ¯:ŠÞÞN¯ t[´=¶£w통û.[ÕÙQbd½¹|é~ çµm¾ø-…éiéxÚØ †Ë¤Ž×÷tv ®Ú÷ Æšv×’ôvêäDÿ`ÀH¡ !l.Îÿþ¹I€‘â„ÇœúñÏîsÐ׎Ýå}ûÞd0VxðøäHwo÷ØÔØ`ÏRUNC°"vÞ£*ëßs/#×L ­âì³òóŠí±®Ñ5õÿéyKZûvO wO®zì­_¶êeW\þ¿ý¾Úâýö%L»àÖV_0 b“PôlÀ3“G\¡À®êñy`°WµAã°[ÀäìF>ãŒ3lª«oyË[ît§;A28Š@ã]£,|¥#Z£,ð©`p7uLe\Eè”':åR+n)U‡Ï`U ˆ•åadh0D,…jáGHÙ6+ყ‘Dt¡N•²ƒ"Û2É)Y E~v|FïÉÖŠÁN_e*6®Y±|Éêe«¯¹zÍ~fü"›Ý 0AoüÝѧ¸*ê eK—¤7_uµ¡èêpîõ?=ú”Ïüò hjÜvÉqóÖÝéhˆá¢qmÑåÄØáb´¾)' t]ÛÅøµ|·ò»å‘ý½¢Þ×3TµŽïº.$ôÝëœà¡C´Ôäævòqû D†öíüô×ÎJ:V<ý~g|îÇoMïfù£gy‹ô¤vž¡fç¡/á_¾rNš«ô7kí§WnEž‰…é«ÖÄèh_×P1Ò(úºçÁbyçA­kÇjˆ9ÞíFÑéª MÔÇ75V®ÂEýª¢ÝßFŸõŒ×ýæ·¿=ôÐC»'^øÐw¬m^Þ±lò¼ož5²i´wIÏèäè`ï2‡·½¥ X‰½€[E­¿˜4éØ×½y|b°«ÏÛG¸ÇIûwÚßXrÀþ«O¼ï»Æ»6u­lžsÁë»VnQÕ¢û—ц¯•kËns´@$æ’ñè¼ «Ñ¼¸€á]¥€ù`ÍÀ^À[Õ$èÀ¸­&œíŠzãßxÚi§ò…‚h ¨¦»¥Ì3×Ð †Æ,tçjû•yf)vrÚ€jd®1¨1®Q#¸Å“§€-ÅëF˜‹pv8਋(—‚XÙnP»Ü •® ìß.qNÌXÐðîdZï}Ï;ÞùÞ³ßÙ-zŠæ¦bŲþPk}Që-Zú’otH8ø¿°·ÔFkb*-å­ž¢ß«1ÓË“:kªÆf1ÔÕë Ê¢ÙÛ×Õß×»áÚ‘eýCÝÿçÛg惼&ÀôŸoÆ»>qJá-~Óuãûá‘É̓½Ó“Øð–sŸªŸa¯`èëЫžÁîfÑèí/?eÑ.¾ô£³ ‡ëð€ßø‘žV˜Zй,ß|ÂÅÜþJjﱩd°ó .„W}m :üðÃáq Ï®’&y~ó´2E ÉÀ…¨Ú,¡ Ô<úè£ei3,ôƒüõ¯í6À˜F>ø`”0Øõøƒ¬àöÇ?þQz§ñùÝï~§.¹ Ô‹F7½éM¡¯½mTŒRºIÄe‰+«?¬:Ëd‰[øÄë»j±‰ÖAqñë#ÎéY ]>dúq²øÕÏ»K”íä—sjæä:Š•Ý‡WÅ5åålÛ3}òÆVÅÚ}>u\ëÜxùUpôI÷?ùÄ¿ùùÇ|´65pÒÑÏ>ö¶Ç½âé¯n]ÝîèÒ?6‹úðÊÞbÀš0¤h|óVdë/V.Ù/±&v9 UŸH§à5l€.½Ý´-³\xÞòÚqJÙ‹B§…ûƈ}ìÅÄx±qss]Ò›uã•ZÑ¿d*M>[ÜsÀ`÷dÑm(´8ƒs–}5Øð tàEH‚¾²¤F~Wia>‡4_ «&uµ \rÈ!Nذ™Ó©‘úЇns›ÛP 2hê>ðà–"¾ÿýïÛÃŒÆ';¥ím¶WY– îu#Y Ú¯|å+q ¿ýío{§Nƒm îÈŽ;Üá¼^³ÄÿùŸÿy{܃w«”!÷ùÈG¢„»ßøÆ7î|ç;ë˜þ„w»Ý>0DÒ(d 12¿ùïí2̉Yó©®­¹žžâÐN‹©€uæ5‹Çý¬Cûmºjb°sø´>Ùo®_»ñCW?o–ÒÂÀÒ ‰Ónh0h›ËZj jcñɯ¼?!èæâ”G]pÆWÏJ~˜%Þ%E{¬\²ä¦C7µeºÞœìëìo¶¦|‚pj|²‡÷lõ©Ñ¸æÊ5I~È2Ut/éZÚ¹|¼9UnzM%'xZbѽÎæuvýÝcÅæ®¢³Ç÷©ºm/±ˆ^oŒ$•,+dõ<ÍEL›uZg‹åÀnƒ¤˜+·WØp@ÃtŽH.±Kk—µëî~÷»Ú˜U>çœs´ó¼óÎ wÕA˜ƒF<Äaªá=ïyÍÏù ŽõìµÂL´7O¦S@á½Céà G%LzÑ·éÖ‰z:¡BN™?Çšzâ¡§rÛ÷½é6ŸzÑò>gèìßì¼ÇÝìEéÅÝö憗mÉ1Ò>î§¥·E¹ ™µ8Þâõ[ô2nËÇcNÞp„5›‘vs¤=5’N‡˜šÚì ˆvû/íãïô:Í®O9c"U]j¨Þ\?Õ¾¢Þ¾fssfþÖŽŒ·Æèp¢uÞKžu fW DtM¯•ê\îxŠØÑ"#Í‹½ÎÜlh`³UwêÅåÚ´ÑõJôø„'HK¥¨m 3﨎4ÚkZíY”1ý>4 ‚ÝTú×_Ë›ÁckSm^°^Ó~Ä-ž©Ó[Žèh·7yñ»ž¶à¤.ðWjñH¨§OôPÝß씳 ©aPLgãÔè2ì*IÓüêB Ϋ:ì°Ã4ÛõÃþ0ñxÆö¤‰8òf7»™¬‡>ô¡Ž¥œ)ùé§Ÿn¿´¬»ÝínÞÙåOËåzº~æ3Ÿñb’,-:#úÞñŽw”åý`'VÎdþ£®G=êQŸýìg#+Äæ3‰·Šc(%:L$úo+š|›5°­Jôe‹Ãª•‡E”ìü¦ƒáã[š¿ÝÀéP‹ß¶Û?o¿òfg¦ÈEåyãíÍã0oýøH³µ¡}¯›?=Y~hÐpæÅ,c?mÁ•v¾FûʱëÆ"õúd¿þkÚ?âåºíF}jtr£±˜üHPbóÌEéοÞZž(Ï÷˜¥8sNΆû‹&·§ 8€G¦ FÆW´Ú›r¥¦aÎÆV ‡n5¢n^sUyfH"‘åÚyÈI¥f×'a¥¡œ=­*-yÏFkc{dSàKÚíKÛ'ÜíM)>ÚÞîö¯M¸…Ù»A?!ä,®ÔG8íåþšEà¹N#–ˆ^N# noÓ0ƒõkp=š¼rsÒ†x‰”ä–¥Ê2Óy³¥,f®|Nfºª{Õo¸xmØ\Únÿº}Ò‘g¦Þ¿²Ý¤üF{óºÑÔ‚z{“É`÷h9*QyvÕï Ôs Àó¹¬a׬øÆ¾p˜‡Æâ««7´*úØók±±Ÿ¹"“h-•Çç÷ßÿ(…,ÞVr½U)˽5ÀÞ³\¸‹‰ÄàoÞ‹êÇ^èJ˜ÉØUè´Êj½ÓŸW+£>|Ps†/Å¥EÊ´TZe%º¹ åf«¿­•#is“eàþâšuë—îÓ7Öê[bsÓ©§ž”Öe-¥Î~ uú –§ «!Ö‚Ûå›»õµSÝC=iáÙ‡yÖ5Òû©Þ\µwÓ×jGÃCNÈòRwÚcµ!mÓ¼veÚæG_n{^hwÅdùµ{[›R¿újSê¼–oƒÅØxÃib]åkYMù]EÓ;ÐV]KÝœüÔø³g)ÑÞTt”ë²>‘Ê£lÛÀ\îa.ùÍúÀ¡üSi]åƒE¤lY`¯òsd¦f(nfòüÅÃé¬v9ÙÓD–Xq*˜ÍS†MVËã„©@hY@Úû?"VàÊ‘õö‘[¨i-œ+ˆFYØ[¢Ô…³c¶¤  bŒ¬B_‰rvAF¤ÈÊ!k`k `(L˜k²­a«ÓµƒÅ"7=°s&»¼¨Óèiœsá§ tW¯Û´lõÒF±adbÓ’åE»wÿ[”"±%`r–!¾ìØ-­)¿ ØväNÑÕQlü릥«–<íÞ'lüssYÇ¡+VŸô¨S/»öÒï°ïé>cpYßú‘‰ —_räMn‘ÞM2V)VÕJÐ;QL\Qô´àö 9X¬£ðsú¾®oë¦VÓj«4È÷!cgšts$ki;·#w‹É¾žÁÔ4ÃŽÞbÓ•ÍÂY—É Ç_<3”ïÁ˜%·Šn<Ô[tÂtÙŽY­pÖD¢ì«J#›k‹ç<ôUg|ä ÈaXF=ÅŠýt} FòÔŒ2H©þ$ˆgžVÍöÿYp í€"Ô„ÙpFyœq.I¸³ÐÛüŒ ZãL`éážò¡ñq‹À•G DÁ'w6à“§+ú*è+žñD{³a¹ˆ²q‹s ¤l_95kàFj Œie[grcΪàÍ\!¾¹µpŒ—S€—°É¢ËW/¹ró•ˆ{–A_ Ww0óÒd÷Ù’šÝÆiºhçƒ6m±Þ3¢©Ò´uyéê%¶ã~ôç¤÷Œ'Š—>àœ³¿t:ëïã´K0-êÛïÈ[âø{=shã!·Xy¿ÖHóÄ;¼µØwdèf]ïúìkw^’ÝCIp^kê^_HìèNpξ֗Zë†Y]]­ÉbS› &“n½ƒ;Yì;|PzGÈp/ˆ((==¦GºÒíÌÇc§㔳’WƒRœ†‰–‰ÖTÇ@ªÜÃC+Ò›Hem©‡5 áuà}»Þœ¨uòÛk¥S>xÅßNK²w.8½ Ó„0 : WW y¨â ˆ"€¬ 9 T ŸR¢äF#F)ÏØMŽmÀ-&☇Ç,Rw"ޏJ°¢”ˆÄ²v½Ž’…Nï§ÆÝu¶ŒQKYé«òÍÒßìJîÞÖ[ź ›V,[V¤;‡î\>¼ÏT±±;Ìpó×ÝC[wv<ÝÀÔ‚Q“^ãÅ@øyÑd`kŽyjm‚N;cÒY­%Cœçv1Ññ±ï}(MA¯/NûÇÏø;/K/˘åušDš[@ìµ¢Û‘žNKŸXÖ:H¦/×oxåëßòî—ÖúµþÞÎZË'2áî±}¼aÆÄÆb°wÅîù²ÁÝëǯ8ÿ?Î-Ÿ„z‰Á>¥\~M9˜]àñZ^(¡5Ù4 ÚEWÓ'7j½×¶ûô‰$ª]žÆ;âœàIó­Ú@¢·­GŒ%šœ—"Íš—§{^볓fï£N'5.¨V²)ಂÉÊõ ÷4/yæ\q,úÊ­È9°„¸˜Àæ˜FF0óV\à* [:‰bÐŽCådG]•„!Æv¯jĤâ)^9ÙۥωYÓ` Kè-gsÓÑ$ÐI¶¯ú¥Öú6Š)§Tõ%ohõ­ÆõÆ«í}ݓͩfw_½hÖ‹qk²KÓÑ °VØ’ÓÝ7™ä{³Éü¤qâ=k¯¹×tÚ‡ ødã'ZEÿäŸü·½ÿÛ/÷’Ùýdï!™ëTqÜ]ŽÛ¿çõk÷ŸœØoòŠ‘?t­j|æÂÏþhÌYHÅhg1ØÁ­Ô¢RþÔtínw¿å£þã¢ÿ›Æ1šM¹­q_½èí‹¶#~ÜÝN=ÿ§'¿q—kw˜ {;Ú¥3ÏìÆ?ÕCEù|Ýä2×@ 3ÃÍþcïøìÞÑýVtßdt|¬Þ¿¾Ý=yíпòÝofㆠKW-³G{¼1ÙÛÓ[v#ÁÓÁbåãjlQ/ÒÍž>\Þbî*{¾àxÏî«-Òëž À[”‘ÿ…²Ò›dXÙfçЈ®Î%X3ÙèìòÚh»†Æìàܺx,2“žÖ[N¹« õ;š¡]p{Ò:â¢92n™Ç{¡õž´“*ï:«8d*µ#¸«]tÓ@‰JöV¥5èæHÑÙv`qÉ7È3Ÿ%x#ÁX«˜(AzsqìC^ñÙ¾9Ñp‚qYp‹Ef•Ë-cd³²K_"šÞ.w¯7œÿ£W¥)_] ƒ9§>A?‚f6ŠcŽzæ×~ö¡ÀèmÂê®·;F›éy±–<ˆ¦F‹³<Ÿaˆ3I›ìŽiuw®+Ö÷ù,a19à|ŽKûÙžrþÏÎKþ>`S´áÛÔá>–‘Ô¦ÚS½=voõuõß'išu…Îí±¬yn/s Àiˆ™CÖ@ÖÀÑ€ìÂ]³yÍbŠ›ëŒ'Hœdk£k'Yçžž´à×]ƒ}üÆ{nƒó}}TûÀÇÕz†Ç6Lô=“Su(°aØsxR?»ºÓ·„ÆÂÏJ"EÒ_é0•¯Ò@gÓÄF؅䄦N6Ü"((MΟÖÖÓbs¸à:ÜúH1•¬(ZûLŠøv|B ¼Ð‚™‹€´û¼8M^ŒOyÀ³Ž»÷ žýð÷·&›'<êu'>啽Çñ©c*á5ÊHgª_WÜæÎ·ÿ4Lm"$1kZ›¾ÕÌom0¾Ûxí„e†®Þ¾î®nð:P˜\®MðÀ£vIÅòÖýõ±ÖeE­QôúlR·-ùæǫ̃ûDôØè„Ýûýý^‹êêêž´u§5i(ÐÕ7ËáÀl¥ßóé=Ñ9d d , °{É÷-1Øú\ú…&»Ú,‡{å©5EÏR)¨µ»;zçÞ¿+}ßr¯Õº!>è@kˆÕ_æ3¼­b?†Úamѵ,}°Ñ[n¨­"³¡i)9 ö ¶}¤'½´Ó„>V8»Š>ž—gðP~¡ØÚc 8mÓÐÖÔD­k©ËÕc—C¬GºÖ˜šœèÙòÝ…ÙÊ5Wô­q+ÖEG_g{$!‹óþ탩“Å“ôÂs¾ú®bÒÞ³n§×&/ö5FûzÓ4Gÿ`ñª·¾ mB†Íp­œß5ô(ÕfóSü;k©Áäð²> Äqbó„W>º{iî/™Ø\>^¼íâÒ˼oÜÝÙwð¦Í×ö÷ŒÙ”³¬5ÅÉmõõ×–÷ë$Û°:Ó`¡àFOtõtL6';à hÖZš»³2Í,™sÖÀ^¯?Hf9BDØð…×—Žádñܧž‚¢l_Ñ7ÉIžë`šÔ·°'ŠÃ÷»sÚËäOíñbr]Úx’Ÿ{|’‚pÕ¦šåâ¬DÚªµü¸2ÐC½¨k7¸õçÍuØéJŸ/ùISñgGÏà䦺ÆÿþýÿSK;·G6öôj…õ½…Ù‹+‹}—–ZrÕ¦´šKÃõbŸW:«Xž7ýö–OYohõt¤aF‰¸Ã—s0Î-7ýÃÍ÷%M¸Î6Œóœužù‹v«¯«¯¯«{L{¼%Vö¸ë7x–Åtøðð’î®Þ%ÃËìÒî±Ñµ«vÏÛã1H>ZTŒo˜2jÔWÄétÌt;Ô@_ç55°@4ÀÜv¦©E¶q)|o¾‹ÓÓ™ð­(ÖcWL%³(eY±Ü·ùæ8<ùa/šíìkí·yóè‰~ëhqEモçž÷¡AŽ ßhm1våXz·5Õµ²cùÿúÀýÎIjõ „׎´e«}œ;ù¾¾¡0¨É›/+n®âD |«8½†ƒC‹ÞÕ 0ÖO<ö5ŸºàuìÿPÿÒñMcýK–‰;á˜×¶K‹Æë×?úާ ­œèèlž÷õôºí¯~EÇŠRÚ4ñaŽÁd¿' Q/Z“SÝ6¹uv·¦ÊíÇšœà6¹ùЛ6²Q`‚ÓY‡[ötö4F6÷všÁHŸRöý«Â&µ °žñ¨SÏýâéilÕQŒn^Û·´955Ôkº×™u@£h®-îpè]Óþs4žÚ>ó×Þ+úº}¢ªôÖg!Ì^H:»ÞÚ ”›œ5°[5`/½(šœNNš‚æ[ÄŸ—júŠÕƒ'$C¬-ÏiŽ÷¸|âïLŽÏDñì‡}ä¬O>=©Â‹(œ62§·8 ÿ°´09ÔãÕ‡ð—s³PW;Ë7aR$5 —îòöñx}Š{圜®öÕÅ›^þ¶·žÿRdÖ†ÕÖP‚xˆ”æ„ îV½ظ"•'RoÑß9ç”Y4µ$=ç+ïHªk¼Ë¿øË3¨kÃåéMáÂ_| 5eº£Þ1Ð1ºiCï¾ãc­ær\µzûMúÓL£æS󣓽Céò9±ñX ^J7; [Eƒ'›†3|S»¯ßÀ¯ì S £Æ4é´žbÉL]×Ùv{¥!èØPPð¦?¦¹ô‚ÓšÅÄH½{iGwÇ›(Å\8— À §/²$Yå<3'8CÁÐ1ŠÝ ÿšÅS~î>=­›|Æ}^ѵ¬¶¦qÍ_={®¸³øÌ/6lJk«©:èÛ(޽ÇñûuѸº{iÿðÉ÷8­ceýŠâòó¿þÉbŸÙu"-!£]bp¹«Ä`kŠÍV£ ·kÂ[© ¸v¿ûÙzÑMW‚{XJ‡ƒPKæ³µcE÷•Ã)â§þ¢c–ÂÌNôL­ô•5¤=1ºìÈ~_R˜˜ªõµ!ïVyip‰–µ{:7ãÑ!±‰ÿ± ý=ÇèïíO»üÍ@[ª+gÒÈma¢Ølöy°Ëæ¿–5÷ŽþòÁ[3½›züÊò=sоª(nÒßY_•”OÛÅ#ï~Ü!˨v××µO}ðkׯ;äðÿºþ’|éí†h}éDÕ±ÉÑÛµr¸~ d¾~Ý䜬ݯҰÚ^ÔÙa0Ùô×,>~á™ÉÉhϺçëÏýÉ›“é……ÉZÎqXÞ™üìŽbÕ>&N·ÔU+>û¥ô‘âŸïþÞ÷ÿôÉgJ/Æl!Øé5® ÀC(¯e“{zŠú˜£k5ÇM§)îä„­^~@²þ vÌYÛq•îÒm»8ñÞÿÜ¿î€ú/9ôy÷~ÏÄð5íÕþ?ïLS£ *Äû»½Ç³‰lKk­©«jƒ+ 54¤Yƒu‹øEW«¯{Úƒ§\¾m½ß|Ö%ŒM ̶(.EfàðlÛjü3YL ê9³ùSæŽ µŠŸôÒ‰õSº¢õ©÷ÿØxk¼½bä_}q±OozÔX+¾ø½O;öD¹ïþêÓ?Ÿöøró‰O~eÚ¯n$Ø5g‡|;Ô@àª'gf ìf †•¯åLë»b»‹°f¿<àÂâÀW'¡8-åZñÜ ÈB¬p.óÕïþÙc&‹ñŽÂJ«Õ½òMV±W[›&¥Äp9u9æ5¡îÖÄ¤ãœÆGGû‡™f_²í$L[îæ™–ºoCw‚í±´¡{|¢èwÜVW121ÕSt8È®¯½à˜·Lü¹{i{ààî›=å6¯é[Rk ¯ÿÈï.†'‹ÞÖ•W\³z¿›|øWïko´¹¸xìÞû¹o‰çd­¾¡X·lrÀp>ˆ Õ´*Ù=>Õè {ÚÆ”°=¸–¤÷Í~—x–^é)尔直 y ]BÊTÌ#É@O¹X(‰²Lß*nýžþ¦&ÆÊ—µ¤>çÈ÷.[¹¼{ =¸¹÷¸[ž²dÿ¾µ£W|îGK¼¥‡4ðZet‘^5³Á¹]ïs0n«ßûD­·Y¬muŒ÷v­"dwYGz)(§U3O=ÕaÏÚ耴„ò,—7šW¦¥ïúø—ŽOï%§>ª›«nÔdæüËÇÒ °{åò¤Õt}—šf¡ÝšˆVC«61,9õv¦=íï µû— ®_·nùÊ[Ð7ÉZšã°É%°ã%B þåÅš ë–,_f3. ccE³gxøÝ_}yò¶'ŠÓø‘ó~ð:”ëÊ €òL®>àÀ$š¦&íZâ@&÷kýºÍËW '̵k¼X¿yýŠ}WnX>]=LŽ•z,)1ÚB»å°¢LB¼ \É(«® ‹ïV)OÇëãCÝýéì$ê™whò~ô‚tþF»xÞño=÷ßÎH¸,ðºN¤€$o%È2?ÙѨ9ã¢Ë»·)¤kYLD ¹äS—Y3.eæô}%dº/3ÊOléžÉÆÔP³«C×uuO42fQ»èÒÿI5J´šå²ˆ!Ê”s8:ÚÝ6Ü鸉býÔå'=ò­ccû-Û¿cIû±wxéÍï¾ÿ—¾ï“ïY²:á­zð(åò¯»FÓDRNhíN‹½ˆÿ¡Ž²²ŒJCu4a“ÊDg2÷w¾î¯[µù:NG11^ÌùACLeªÝü²ô-ÿmOzØÉËjn¾¦cªÑ÷¨»¿ò¦K¯Ø´þëß=o¤>:T묷êå*vÇðÊáM㛽ÄZÚÚé‹|ʦ­ë¹d¨{e½³·«æʱþåëÚkú:†¦lÒ‘^0e¥…¡â’ÆUi$g1iMNŽÚž5Ð×;¾ybˆ‡ kíÀÜ”€¡Yì¿tÅÚkÆŸþ€—|êßÞ¥ì>Kö[;²|Er× †žÓf·Pqª!½Tû¿ôÏH‹„4ÐÓsɧ,`å>Óô•–´ïh9ȳÃÀúJë(Ö4þšè,Ws+Æ+1jÒ5½Èc¯ëí%½ËJøên9†dš¤ü§¬z‹OZ 1CB÷ÿûn R2šMoõ½}ÿŸ½ó€“¤ªöÿíê®êîê8yv6 >Š¢"Q‚ˆ‚•œA’ ¢€ɰä$9P‚ÁÄ{<ÃSòÆÉ3+tWý¿çÖì°øw…æ~zk+ܺáÜšó»çÜsÏ1Ò¡ë‰#­OxÙpb‰ô'6\+jK\¢7‰Ï+¢1›²AÙkx ñf[áÖgΉ)˜ýì¿Å w>s¦¼Â€Ór& ku+™©°èÍÝ×%Bë^ ‡õºÇïõ‹)~¯ðTÿVh D¼SsØÈe`Ook›?Ø¢´ÜÓD5]RßM?½DØ®R‡îpÙÜ»Oaå¯2.{Wⶉ«§fœµÌ–Øn¡&¶ŠjŠ5€Gx§\wÊZØþFOQ½†ùX±)-ûkŽjæss\†U~–VëeÔtÊ0c9ʤ˜l^;Læ5KÝ}Ïr œKªÎBº'>ƒ½[5e³¢r¦t*‚µòt JEû)–|º2Éþº¤µ»ÑŠá•'•’8jtG•Zg?:ݦ—Éà*¹éùNÌ-äYò€ÈEWüP2±‰ã‹ª\èÉSVT{TxtDg;6ZnokϰÝ7Ð Z™•LÈÍZU>‘–@îÉÄ%)jžîÎD;ÿ¾;M%Ä•3?ÞCU®e;Ylf¥Ù<î{‡ÊkЊiCj„ñ˜Mèâ[ñšÜÖh6Õ‰×0° » ™¥õÈ U+Õñ€Ÿí¤|ŠÐM“…i•£öé+ý1cðO¯HZ…®ÏÞ‡¿Ž÷CŸ§ú8EŒp+…8"oW†®KÈ/ĽÈWÒrî ‚_ Ï\R£€Dt> «L¨?¿ôY¸Ã¡R·ŠY0êäàø(ÇñJ‰i¤2³õÑ\.&¯BMëCA¦i‰¤Ÿk´2A6çY,;vª»¯X0T¶Þ­.{è8ÕÆº¯ß4Z˜O»@7€áx- ¤UH]uGPK~Ž«Ú|W$N´å Íìpòq®ÉÓ¨  K €;¬è.QR"r-Z‘ФÏRàÄOç2dVÁïµõÔ‰—)(i)Ö™“„ü5p¥AÁñšçÄ–4rê–G¯IÝRƒ£…iÙzmœW£6,9:v¯huîöÿéU1Ä“é ­Ãd¹0\KóEÐp¤da‰÷HÑQŸjÚèL\Nü'{¨y–’2µø(¤ÙYo1r~CÕ®Õbë§i…÷㪙ùí:~¨™WÙ±x®yŠâŸŒœyUFŠrGeÚìlÒóÙ@zJ\Rûå— ·åý&›þÆ–d|?üÿ&ãò~èöT§(°bQ ^kÀýSv¡Šc:c¢GE&Yî !Ò LZaÃÚÁ!îpüˆ©UÖîæIà†x¯¦9 N.‰å´Ù‘ëÆ1! ›xä7Àj^x3nZä‡÷frÞ(“îb{1n6q×AÆ6Ç÷2½y©ËÁÁpÙÁÃÇ=EÝi°šJᮡ»¡ðòXsZµõ¦;|u‹ƒwøêaPë˜Í/8`ã£öùò>ŠÅ_,ˆ@‡ó‰ßRD"€ÐY4ôbp4 ’`Ä‚TZ^•­?‚à“?Š"3¥ jù¨¯Ýr ÙSƒ„tTÂy'¾£¬ Œ7;zÛûÇÞdD£Úä(;y)ËE .9ýªÈò\Ê×ñ(ÁL,£XÖû®t½”!«JIiQÓ¢Žú„ûòHrbNÆ>ÞšºøŒ«(¶‰VƒV$Ð}è¥%º Õd›TÂôc‰ú˜k¦ ôýä²¶­ŒÁ’C»ÅýF\=þÌm ²3ˉKMýåð6ÙtuR¦kò§op`U@ž,mÉ£÷øÿÿÖ{œ‚SÝ›¢À;I îùwE)½M…€hŽãe³6¬ÏñÜ4ûA—w'Dã 7Vé4öÆM·á& ØWWÞtŽ0Þ­5ÊvÜ6ÜD.!˱#CõŽößôdÒ%aëšµË%'BÖIäáÖ„£Wù^}iÕbm»ÓøœhÄ0‹N"ÖeK~8QW¢¶ ’–@5Üjéì N²z‰3éJ©jâ~Ë­”+íEdy'y1ÞÙ¦†¥È¨² ¹eÐ$Ðr:†«¯ŠúÓÓ Åù n1@¼¤ÄŒ D±.£#¨,À< c¼M ä’“‰ÿäÿèJÿÏft†Õs¿yIƒ˜õI9Ì Àáz-kS]ñxƈ€AìßÐî?EÃŒûl+ôä$äÊ€ÒØh!ÓÎC¼2h&Ó ƒ-äÒ ©Læ.z|ú~fÙ>í –$œ–“Чd­Ùo¥Íôx¹œMgÑc¼F‡¨ÿïõã¿×Gxª+àÌüñ-Ú>ù”“ üx‹œoû62OÜ@²­;N&/oÃм^Î4Öroždœg$"òŽj¿Z‚láP1Tç!/æ8qÒë»8÷n—ý»¨X‰0¤ÅkODMÖ–ÝVÙÎfaÖáãQÆ’(Jl¤(IV+Ùq+JX62ñª0t™ª¤ (ª›—}ÃÒÔ$.*¬º`hµÃžîúͶ,ÖÕjñ¢þi}=™LYÈhˆð~ÍKÛn;ÒÖ¾´Ë]ßB‚áE¥Ži…X<@Q¯ÕrÙ"OÓ8©–žÐ¦ÁËó^\yú;l²s8–œ–Ÿ…Ç­ÖÞcu×®,¾ì† ’½tš¤}ªhrhÂpNQK'ãÑ'¯´sÔÁ›]}ÆUÛ‹x*ík²DRå%^…Ûj6â±4ÊA_žÊ-cG¨Ñ£:Лž£Š¤Jÿ…wÿânñö\UìzéCO^!´¢.ÉÀd'í´Ât<)Rz 7Y¬±7í„á;¾Ù`·°Ìž€üTµM¼és/n²ÿÊd°x(”©9ÍtÊ$¢¢x!Öñ‰„Í÷£‚Œ ;=æ±%ó yQÂóÖû Þoý•ïn*MQà?–(útÛ¢ãÒÍü;¾<ñh‚—.ñ=g½2ÎîcŒœ3À0§RIÍ4¨XêFhE 3åÎÝ:l«sY®.†AðÕOîÓ=»c°¼èGOÜ(^Ÿ¢þ¸ñF3_ZdçÍu¾¿Ím®ã ô§fv­'dáÇ`L[=i±šuðÑñ—ûŠ}¨‰ñ†Õ ‚8û°½¦ÅÆ ü·Xl½åh¥ažZ&*â†R¤|Ô¶u)³Q•ÀÃ’‡u3•C+q|É6ã¯=D’=Ûh ©9*Yo“¯ÐgÈ:4~» #…3œu7ÑO@"&DšWÌ٣Ãù|Ž-LaÓ«×™Ò˜Ï5!Ô —åô®¬LòÞ?i €ß?c=ÕÓ) ,3â6检5̸O¬Â6  x\N|PÁIDÇzáCÇ 2%Õ7wüÞywœÈIÀ: i‚õO6`l.»ÿ8¹©ïîõ¤ïÎ{þo/Ížõ¹Ññ¡C¾xú¼Òïg¯óó®< p­Ô\8Ð?gR¯%íæA4±ÈÂÄ3¨Ç-+²ˆ3GAšÐ-±\-¸–J œu’¢­•Ȇ[|îkwßqs¾ÝjŽ‹nÙ4ÌbQ”ÃRÿÀ&,ŽÀ˜N5¶¸,ƒ&ÔI tS°™4E5S»n°ß]ÉT`l¤šî@8å“nXÉ4:rƒ¾¸Æ,ãÿ…¹Ø9‰…WT™T¨St)›­—¬–K&5Ê&àŽ|¼[¨k´ŠøK"WR9•±l!…ú½¯ØU©àôSá…¹†<ÄEömº­d:îÕ~Ó^KBò4ÞsIbö%?Jg«ø™¾„Wc+°ò=•ax™–Êݘ›pÓ©ºµ÷žGt[í¥yµ¶ÎUößèÌ÷ù9[í´¹ÇÆ¦I¹ŽÓÍ8jöþññ®bÑuÐ7d1±î(ôÊ6ý,dŽã«ÑáR±«@W³ÿ‹BiÚVšàÿ¬ñ˜jÍÞ‚KqÌ¥¹ç[ä~çnûlIØÂ˜á§‘˜ë¸Är ¬‘ÈP„ƃ…Àäèšãê/?+h>£/#i¾ð}9pÆÏâ« `B³TðÌ)¯·Ôž[^xíÏŽÇxª‰¬¬i4²É3Ðq²³VJâ}~"yã—˜&ˆU” ]>wcìt‘ÖÅöøÊÙ·þèhPÏL521YÚðëŽÍHÓ¼†hqE¾ÌÄ«^-m§½Àiâ,º‘Ý{»ý;;‘…ãùî7:ØK5ð³ø£‡®õ¬ßJíD(ÆX¬PíQ ‚i[–­Ë´Ýo¶Ø—Ì®\ñ ùêЋlª¤ÐAw™|K'¨ )#÷äD능‚Ží½ÆXÿKm³xtãÏ/“½XLh='›Ë…n9ž]×ÏecÝ&¾>L[v. ÊËúmË X,´ÖåsòZ,F˸a.uÛ£gH,ÝÝb€ì[é:TbêÖ¨ä 9Tô¦%¬ëî?_-j©öøž|óÚ_ÇœÃmÄúô@[.>™®Ôêc]Åц{b>ÇR¹¨©Iqµ×f‡þহßÅíq,¯Q]0tï¯4Àï¯ñžêí–„…÷b`de¨‘›–ïÿX0y¡‰G' }âmшn`±æòM5{>Aäz'X# L|HA]T×ìºqÄ竉bXE\_y„Ån'“!ô€_õŒYð!jV¹`¢6I,©j}ÔÎdê^- ¢C+“Æé#ŲrÙ/¬_ŠuT,™PC± 6Y"¤‚S(Hu]©„ÝðØt›J¤,Pçºk®`Q•œ»mrä­¿¼D€ %ð@³Ø“(ù611£³ÅzBù öd"DÚ-¯OÎIÝ÷ۛ䑖€+aÍD+š€Ø‰ˆè )ÃÈ„B+D‘Em½}bÆUPQèÈi¿äK…c~,×Å ˜~3@E‘ñVÒk¶°~6‰!qPUp D1Šø5è¥)1 Q¨c¨¥ .NOŽŒŽÚð8m6T­24kÕ3hj™;é·ÀÊéâ½*3WeÒG/hXÉ´×ò«µáBÆfGx6-[­ª§#Ó.ƒ¦’.NU^‘"/ qÒ [oVü›AÅsµ÷Q’1žJS˜¢À4{æù?ø_ÄÉ ç˽Ù-Y½DO{â·Î¯,ôÂjNšM§¨ ¤Pbå ï [ĪË6á³àVN]vóE²¸XPµ ª—Y‰ú¤û†E®Þ¶³ÛG<è3§ºÁÍ­Ñüé_½èÏŸ°ëº·Êȵ‚Ê0ù{™Ê%4Ùí&Œ@€qOU<Ö^£®ƒ(úWGÍœ•hÏgÒv™§QÖQ+çæ¨ùtÁãÅê[[óÕU÷8þ ¦êù#7»ì€MOÿúƇÕ# J™["CÉɡҘXÉ$‹hi P¨b—lÊ·ë®±é¶ì±ó&t3ŽûìÇl~þŽŸÜ—À~x´ÅΆ*—ñY5Í~°6ÚˆW²L é¹1q–i‚¦ ÄASRºÎLG~ùâÊÊ·Ï Z&B÷èX-SbiÌ~»œD«œW #fíðÅcÇ3*(š‰dÉ—ù e U/'žøÝ:›»&˜NED“îÑN7PÕÅÕ…!ž%±üÆŸ‰òÚ»2„^ðêþчœåH«‰0ˆ—†›U*ËÆ$­b ¶¥^,- E®Ù Û­2‚2&X|°ä”“l%jµ‘Ž|¦6ZÇü€Ný3Bð˜êMΑsJÐ3V=ðêemj-Ízÿ¤) øý3ÖS=¢À²S†,†Iê¿û—\—Ä„û¬"¾HÒòÜ`å:Aô?Tš¼ÔfÔ:nªì\Òk9¦¶z±u È¤Ô _&ä¨#6»ãø+¶§.2´ÍRrª)á“„"$È»­V¹•,lE­±„&é2ûöõô†ž«&Ú÷ðxµÀ°]YãØÝØ?ö\gW윾wægI_:ëê_ã‰ãQYeà³wÇ3I;ÓL,„Ú#èWŽDŠŽÉ+ºz‚ÍaxB3R8#EÔ׋ݴfEMS¼¢ŽÜT»ß ð§œØXcte`9¸ø1Ù)ù´o^þö…¿K.¹ü“h"1²Æ‚’s$@ R1d,¢ªIA)¶4;ÂVî¼\Óî[Þ™œ,*Ú=©=7?Vu•G› ôà=f.&‘ zÀ“RÓÈ$ðÎQPZÆ ²o´Ðî¾¾ÉqaÕX÷ƒqÁߨü”1§µÌ<íâ〷ZÍ™7úêʳW:õzÓÎ%ˆK‘J'’9‚«-[ðrA'H#'P>ÛaP¬&ñk)Ï)üYâ5š L‚ÕG–JÅuD÷‰öèÔûIž HQ¶Ò¨´á`‹¢!¹¥ðD<õZfœqsõ‚.à”ÃB˜q“ù TqR”%#¥÷Nc·=Ú¸¥j[¡IVÂL§$2q£á&0ŽTê‘´t‚$KØ®[²“š"/wöÙñ´ÞÆ*v­+ß™ùÐty·M*úÝÉf±á~~®“©¹ù‘[ž8ßìº š~c.ŒÃÙÇ[næjể‰ò__|éøÍî®æÆœ¸_.{mqµYxÕ‰½rý“7²KÉ&Ä"ݬ’›­EÒÁ¶B§ hVÅêf2kÖ£A+4³É"îY´&|‹Ïܘa 묶Ê1S) XÝb«kó@IDATÞL(cl¤=´;†gÓ$s¯D§Ls˜Ôñv‹¥-™œÈòz«‰-—Þ5Ó'¢­è‰ÎL¥wœ|”éû~TrïxS¾3pdž„á¡NËa£Zz©º”í7ÊVýГ?}tr ¦÷ÎÔùJ©‰U>Z©‘–pYá0|! Ÿ ZõÎðÕ0ì×?î/ïT ÃÅa8/<âãç‡Ãº1­Ð÷h%}9p¡›WåÚ«¸´r$C^ ÃWt] üèiRé îc-ô+R•zÐñа†(*¤Z(ÄýfÐò[Í×ÿµtÀú·óƇB@:A #£yQáΫF­j8Òp\!·™´‚—Ã}W;(\]×K€"ê¸m¦ Çà iY)taÅýz÷á|¹ ‡[R‡ºMùŸ «cžKÍžÖ‡Æäãa¼¸Á*¤:âXTé?>³%?ú,Ýæ³ôÃqoŒ¾ó¼ôr.Ã?„ácáq]W…/†ò1<î÷³¥Áó¥`:"UÐöj¸cÇñ Mn-r9a¥ÕÂ1'ôJ^C2Ó°EaHÉh³òuRþ濫mW=_þ4(ªúU†¨Vw¤Òø0|IÊ?fJ3t6—¶ñÈ K Få„ÖÐAÃÓ—Ã#f%ßðߤSûoxŸ“;wFJB²ápßOòeQ2¨EHNsyÆ-åÙÄŸ‡ãúR0—‚™G <âçœG—ïHÍSðŠ>ƒšjÿÛ£ÀxNj-÷½îÑ+Øf“ï±J‹*…•p0äàÕJ»«{µüóo‹ÃÌ %&q¯ýynûñ]לñ9g¸}¨ñ[]ÏV‡½ù·=vÑ?_ð¿–“µÞÞŒ©…õWU‰ý‡JgKìABƬ×k¶Oè[K|*«7-[E¸q ¼$â ýEH¥ë]jÐ] &ʘA!l¡Ëu¤2~­Z¯ ØE×FÚ2mޱZAÚJÇbØ£a =éèxÉI¨®¼íB¨8\kæÓØgeƒ¬…ÑxªŽ`6U6ñ“D‡Pqëñ¤ÄýIÍ=mWl%õ¼RV~+•ʰë*ËÁ ›^–VSÊ–µZ^%»ê„¶Ü2¶ùð¶÷={7q#´Ü«Š™¬ˆçØ.{eš]wÙæë÷Þw³xÓY¤FÑNv1PÆ]ßç?NõQ>@ö§0v (,ߣvÿäqér¾˜è ‹Õ=6ÛÝȱ]h!ö,³(«éWi¸q”:ˆj¡ªúúzÅ×3ºµœ/«ðn&ˆaÜ2Ó„ôÃìšúæ>'ÿßÈ÷äg§ÝÕÜðòØ,§nÕq%‰IMä²uwÌFX§)u÷»\hz#\¯¶ËgO­ÇŸïž•»ò¾‹eà°SkksúT( 0°Š³oŠÍSêÔ‹–š{Íéh¹ÙÄlÇì8¾F1é*«pqQÄb(C6(gcGæfQ-0¹«;¬I7ë “Íç×2Ä÷ç’a–ޝ éµ¿ð´SÍž¢ÀÛ¢Û%P¢–ÕpÏAF³ì%òV¡=K}°@T‰°28³VDG<ñmÕ÷ÿ{Ùªê­>>*ÇÐÀ²˜cï‡u÷nÞä¨ý·8벟#  â:"âÜÿ¿bÿÕçã%Õ¦­šÛ» €Aš­ÀįcÛf· {$ßU$(†rv™Ðеiå… y5ÔÝeYW…ž¬ªÆÙìÚ²X! €@ÁéBº¦ä6LÃSN“,áŠö6 ô†a"Þ06=x³Ê°[V´Ê² gŸ¯ŠcsdI*w)oD;%Ô¢ ¨D«„Ì~Xò ÏVÜ…dªÔÓb"”÷Ç­ƒw<Îq›µ`¥—>}ʦ÷e^:§o &þÔo%Rñz]ÙFÒõÜbžP~&“ÈÇÖ‘ИêŸW>ü¨Co»÷:ˆYnÔâi¶U Pgc=“MÀ0DÀv)‘â{hF*]ÿØ‘î÷°íNºîg×k½·úòvGÉð~Kµ¥’„›Øk“ƒ*/¹«´¯Ô¬C·¿Äê2¼ÜèÜ{OHâÞ*øa`Š#+ ¨™ôä<ûcJ>ô§Ì}âp™ UZ{ìr$já8;‘ Z#i½(óQ“ÇÛ“§_sN±½‰vÙøÈ[î?W& cjï-ºæ®shÒv[p×c—cRÝl<—j›­¼<9k±Qù“⃮5“=L¬ØÒœ`÷™˜¦imöLµ4†ï <†Pø#3Ù¦8åJ*[`eȲL¶(ÄÐ@ñ8.}^‘“ŒúTš¢Àû—l á﹡Šþšpæ3ðD#£‰`ɰ`½Ã•¿ôå vz4?ÉDâÑÖ&TqöÁâ>…ÙK¶:a ľ­pÊå˜òO%¦ºì'1!NrX*•ªx„&ÒæOlßT ‚ÓF»–-Uêc9»ˆVØjâ2ÑGäÔí]Ý à{Š}AȾ)ûrYnŽc§²¨‹‹V¡á7àÎHŽ:~‘X‘&æ#´brª¬R©#3Õø Á }»1Þ쉭š4gœ¼ÃÏ^¶ÿ°(üÓCÏ^'ã…[~~Ó¾•»íUüxá¨/Üö—Øsöj­›îþ~ÁîEÚω’ò²X'Ôµçža  êú=¿û«Ë‡Ñ®ŽÞò©—Èv‡준ß”˜ Õ±Õ€@KoW¾±çXÌ0T¾˜ÁŸ5øªQDá3C(f´|Ü$¦Ÿ*U†ò¹,Û}­bÆy5HåÅÆ¸1 ¦R|0–ºççç,¨O›¥²jâW¤¡~pߥ"øÆÕ ›_sÚ÷f«óõš4N=3Ù*â*ãüŠ™Ü˜ÔÜk¯yþJwÔtTo⺇/öÐ4½NO7Ù_4ñ«Ñ*v1wÇýèâáÒ}>NVÕ+íQ?š£.0oà×ÉWŽ‹¥˜ç`Ô. Nf'´c Ådú˜¯œ6üû #\'­ºvZÿøb‡]i Þüà\©mA@,Î6H]ôí.‰ËAu&ê)L*#Ç œ¦x¼©¦¿} ì>íŽÙ]«ØVç´ò:G¯sMÉ[læÛ/yò Q#âô—¿yÍl`Âó–cÂÂToóAâr°%i[Z¢žã²[W+"%°mF)j×åØŠ6ŒZ­VÎXÝøŽR)×"J¸a-gÑ´šÛŒãçá¬ã¯9óü#¶©-[{rb)›f/™Ê +F±8:VÉf3VVUý*¬8v81"ü Ï%ói*1<<ÞÙU¤)à~ºÄbíM×RV.íbvdÅ5“—üôt¡XMíû‘ï^õÈɪ{sT£Íº›`—«{ôç*"ÔÓ‡mòs~|°ÊìDƒV5Â<›jÂR¿… ŒJ‚=0O Ž\ø™±ÔªÆ ìêí<ìÓ—ŒåJµìàO]7ç%zÚvøô7“ao±Ù•©'ÏXÿþ…ƒóûVëîYAÜb~^&‰<„í8î3™ý0,@hc<:•Í,jïDÐgïJšfj†ö1¨yåñ„³óò}âŒk– dû˜^cí̦Mˆ’cKTÙ+Z]ß„Þ>Ÿ kÍbý¯莠u˜ß!}5v‘âÒ¹Rš ‡_˜¾ÒjLŒª¨)-Ëaã-öõvZ"¼jÕî‰ÀÊ_î©çÍ”FRÍŽÏ–š½ˆ«bqÍ|-XKf`NSíòùoÝòÄ™íuc`áüîÎ52MÓ«4r…t©6š.ò±MäDÓZ‚xŽ'{þ™g#žŠá‹¿]Ç |˜àxð¦šþö)°öœ®Eó_©4G:»2ý¥rGgÑ»Pļà°È‘ ,€Ì_;,nù&ŒŠ€àTÃ)õãQH8Ò­rØ™Tö æ°xË„O-Ïä«z¦3©}:4T&‰è3<%J *¬x!STüÅ|1Òxˆ ÂežŸ †•qd ‰í¢V:«–EdAäí–k°HúiÏ8°^ðͽ¾áGW‰zX¢é(±ˆÎ+ƒbüd¤–,ª*výˆ”‡õ²ïåT ïÆÝAq"4›--‰‚öû¡áDÙ¨\‘Ø<ÀQ•µé¯~îÐ{ØÈD¥VG¡Cf?uÈÇ™VÖs[ëç¾6¿2d ZììùÓ ¾þ/GNè'h-zúî6Ú·ß§åÈquÜ6÷£Þ¬¾éóç½Ü5»ýÐíÎn-®Z£÷ýæziµH¼Ò ý±2"Ó '¦Ú:»ñ=ÂV4±+kÔ“zÂÔð±gR3¨ ¬ °[­Õ³Û­²~ù“_Ézí½öìr£õ­í.ë//.ÎJ^xû Òâög‹µÎ#6»{<õr<Ý̺╄@IÝøèå2¸euÈçï¹êá…ÂÌh„y0,ñŸ~èwê"}Ÿ1æ/"h$L£ª^ÓcT 11'aZ&ñ,M“˜_ñxÜu±õˆ<Ôà/ÔA÷U.—s¹7=Ï#>9û5ÈC J˜<¡(z9yx+Ï60c‹Jæ-εã Iê“ÉOFÞMj¤Õâ2:¡%FƒsJ&F¶¬L¥÷Žùí†Ìúùk?ôK§^ôä·»Ý6;Ae·—¿pþ88â` ]o„ûÅ—]źÌ4bw#3à>` 8"š ña˜ðA4,zå“‚E/g®Šßá ˆ¨éàO)†öS L¥¬.ᡞšæ}XÖ>™%,ë_[gcF‚-M²|)²bLBPÊR%ûYÀG`YË|0[F¤!»cƒ A} RÈŽ Ð Õ½¦š`qÜ6ƒF#Ì 0WkåT¡C¸ L–±å±’š5hËœ‡øHŽ•·’um¸<¥$·\¨MÂ\{$†ÇN\tó²Jí¨S?zÏ·óeÁò¸:l“²t€qá]šV_ûÔ~7?|¥´“oŒEÏzö„ßo"OjÛv»û·7È9H^ƒ]IÔ GÞœØtN +„ OC‰˜fA¢©üó—Ë—†åáú˜™Jµ¥Q?¸¶ÓŸGü϶³¼”É ÛÌV)ùƒß~WꪨÃ?1÷‚‡·¦yítùý¿¸CHšwÛã‚è'lðãÓnÿ¢l›6Ô··¼UÝd€8-ßîNñVBÿè3¢ªgä-š]yUåX÷Uoûj’ iå?«Ýx#Å ~­ÈI&YïJÒ¶ÜöÜÀ! L¥%Ï8ãŒY³f®«¬²Ê]wÝÁ$,XÀÓéÓ§s‡“îîî­¶ÚŠûúž{î¹ÿøÇÁ×O|âÏ<ó ªU؃}o½õÖŽŽîl³Í6¿ûÝï€RÎù;äéõ×_ÿ¡}ˆ·Ö]wÝ{’IíÐÐÐ-·Ü²Þzë‘sûí·§Üû¹}ÑÊñîTz/PÀWøI¨«WÿW\'¦U®` …ð×Î' —B„'¨±êåÙojJX¢ÓÊÉKµóç¾þ/~wÏÎ-s{lô½}·ýÞW6úŒ­6:jç=Î Rî©;ݹÛzç½Ýê42ú°XeÜywH7‡/ CÔÂ@k®+µÑl· T‰8˜n¾fÈÔE∧mƒ`qP9Í\O•G*B †€‘l‰`T}ñs{SšK¦L³âÖ|íÔF¥ )Õï-и«X¶ûŠ=Ê[”ë²ð\¯È$P¯þm´­¤JýŽ|ü®ª.jšØ·Córb‡¿T°YÍÕÎOã8ÚðŒÇ[ÃÄ|F.½æÇ   eºPõOÏÿAðžÞ¤p#¢1U\§îñÔÁŸ¸èë_XZ4râöײíw¶ÙtÉÉ¢G¤ž×Ó¦M#+u•†œÓÎ8{«õÜë‹çø~°ÿ/üÆngnûé¯Éß&TÙÓŠøÞ’M ¾Q "Ùwà 7œ?þu×]·ñÆ_vÙe»ì² 8ºï¾û~dCTýío»úê«Ï¥R‰ËH~½üòËO?ýô£>z»í¶»÷Þ{ÎÁAOæAÓ /¼ðÄO<å”SvÚi§ .¸à#ùÈÿüÏÿ|ô£EŒæòÈ#¼óÎ;çÌ™óè£n»í¶ .ììì¤ðû￯½ö¢Ø­·ÞúÔSOÝa‡þüç?“ ØF2ž’€ßfyÔZÆ’°·_B,²6^Iˆ`‡f‹Y³u¶JàžOvý{°ëå®óMÀ°á\ G«OÄM˜-œèÖ_Ý$Œ,¦Ýîìëî9QXj4EX4YªÌiñ6"#€UîxÄ¥?º Ø%«Ó)õ“'oÒ•Õ [ͽðç‡J{›–5‰R_З$³ÊÆ Xgàã/Ãôë®ieÈÒÿÂè‡Õµ Ïoym»¬¿_¶Û®ÞõË[¤j]B„¸¥…`É/ë‰X'Ùõ—Õ;Z¬Ï´×ŽÏë;~“Ï/üÑ).ºãÑ‹R›Q7 ñø%Õ®_<¨å¥Sno»»ÊëÝXµb½£7’QñŒá5]¡@] ãk8÷C? ÿ† 9ž°ú ásâ/eß9ß:俎:`æa'ô{dîâ3ÖzxOu)î2Îë¼U\Ð\dpä}Š…¼z˜¥ûòž,ùþ _ØRc-Üw³oˆ“ŠFØÀé=kÉ‹xÉx5Üaõ“¥(ιà º@´&'c¡ßbt ½Ãâ+fñ3#Õœ±æ9߲ϼ8ÿØöŽëúß]~»íZé̓8/ê¦Î w™ùí ß/#¡·°Ê›•±áry\Š`”èÃñ¿á«'®6žÕ朗‘'4à…ðˆ™—…Ö´ú[¸_áx)œ¾{xáý¡ ö%ï?d9§Ðš¼7ÒQÚí±²ÂçÛ…_gÆ…G`ušÅÄ#‰ÀP+W³Ù”OäÒp—å›`/üUp„³p$a‹ãÌå2Ší ëÊk.ñ}¯ey¨0Ó1áPË5í¹ùÉm‰îpQÊÜ7øV~ÕØÂÊ‹?yòÀCca¼ÕºìڹРH²eÎ2&Q¶ƒ¾Q§_{wtd¬½£-ašåÒXÚΛ–){fH0ß‚úÛà_Á’ìªB¡ÑÑr®#“`IK%‰M6¯…]ÂjÚQ#~½£Ífk× ¼x—=¼Èfü|aò4ÒhOãj OX:ÒA}”œØÜuád\Të,††e`+›ìT ÕöÛízûonÄ¢úš.F#:Til‘îÀZ€a ÓŸAbül<8J «—[¿/%Âje=‹š‹=rµcË•nûD/¢ÁWq¨‰ ÞTÕôÔÊZèuóʭˆéž|P°¶®öÞìšË¯?At³,Ù2] ?vL~5Q$ˆ¡ìotwé*#¼‚†KÒE¿>E^)«C7¼yUØh7µÒþ²Æ‘P=rº¨|ÇÔÙ{=xôÜ-Áþ:"!Kס9µ,‹¶³•«šÈ'_®=' *ò`¾ŸŠËì-4ë±D*VJ.k5žÚj‘7@ùQ)¼sÓ…„I‹T2鸘yãÆ+Éă±æ£™+˜ê‘ßß­ˆò@2f©:Ϧ«ŽÛ›ÖýZÒîÿw€Ã¥é¡/ÈÇp/‘PþóŸ£y,RxàÐ÷Úk¯]yå•Á×C=ød—œ<¹) Ë,@z||3rðK/½ÄMdVVv#K‡B¡€E;Í#.Ñcc\MíT êƒý‘ÁsÔ ŠA&æH&?S‡h~°tã§ÎWP 4ƒ›m.üÍÃůŒ^¦Ât›ãVÂ2ó¬A2+<ßñ²øå_ž)Ž‹e.®Xæ ì”è6piÕ©%ì !†·z䎠à«iy¶F©kŸÐ®®ÚoÓnýï3¥º\P«ŒX©‘«øF”+ˆMr½êg:–€#¼V {«ÕðÐÞÑáà"Ѝu…<‹™¥F §Mé”)bw\ýø©»`å, ñ ÐKôªœÕïƒ}M]Ô¾Ÿ9àò›/_e5&ߨT¬\ª‰Ÿ*ß¿ùÉ !œ¨Ñòâ™m·2nÛ[lVücåj=“ÍѦ‘Csq¯ØTýã#™ÎŠ\ðbs¥¬Z=óáØ q Ù]+.!º8¶Ôm·ÏÕpíè™vlÑ÷}É„ºëWw5­†4“†å°ÃÂJáÔÞ%¶Rº×2–K˜btƒHD;l¾__lZ0JÀ¾ö>uz0­ÙïÜ÷ø%ò Ößlk˜†}©àDÞÏ“պɌÁÄx+Ý—d¡Yolòô‘:¬€Ü,çUc¾JÕx÷ˆÌ3,uùS—ªì˜Ê¥ü0i¶ D™=êéÒ¯ŽþÀ– =›˜é3dÅDVh‹ñ:|¢Å‚'žýEÐhô(”Ë5–«ÃX¡^òxMK¨=þCÊR-¦mA§8åÀdzììeÛ3™ƒ¤­NäuVœÀÜ]lãð3>låpšÉP‡-¾ÿ\ÆW¦EkVðô®ðÒBd$ $p´C®íïïÇ* £ªƒ:("2É[$´Ç¬È"ŸtÒI˜JÃ,î«Q6 AáL9À$iF0&ñ"ÙȃXŒ)uÀä$ñ ÷9ò /‚Ç`0mà]î "ó.µó 7i-8Õ5u\Ñ)5¬š7Öaõ Àž¤ÞøÉ/ï•Na …î 1*‹B§‹x¿KÛð圈Ճ1’  +1À1ÚÃX¢Ø.D§C+T´‰4,jy'¤FZá(«€K 8n-oeŠDï‘,╃?ìtÒª½˜ä–5EfSH–<“ ¤Ká Ñ )N¯Òé VÒ×kÓ^£´ E&ó'톎#'ÓEpXò3+Mñ¤è³“ÙËe€j—)ƒ™1EËJãMÕÓpyÉtN•Q7çý„Ûp›©,“/BB6:ftc‚Å|½DP_©È‹jàyñÌı Š©£¸ßã‡^'w˜ÀI³ÔŒžéb:·`H‰Ö˜¤v¿Iw’#ôð2"ÖÚ9ÂM”p ¾´\Ÿã]“„ljÛÙÈ„¢çd  2s²ò"µƒ»€4`̨_|‘rx 7zÊ‘JW^ye„é¨ oÕ­Iá˜Zx…2¹5ì­^™ºÿ.RÀV=ØŒJ’cQµõV«lŒŸIDN“ôz…¾õΚF'»dmT80_t WGA3ÞBÚS©†‘¾ qX.õ<æÃ×u½dˆ—ñmˆæ¡™ÊÈàÁj$¥Zv³n8ªÕžt­V ägNHC1£¶ÌÓl·ˆB.æáÐdDЂËnÿsl¢›6muT¬ˆ.Ý“®Ê À1˱È&ìÖÒ;n&ˆÎ@lͱÅ÷•j¶IM„¨MUÔaŸún¦ÞSHõeƺúÈ÷U(þÏ=D¥(Fâ3¥Þ‰¯EðbèÕ¦<@×&ª×ûyÜ.e%‹Œ›|T(W…ælç)ñL%7Ø”„«)Ñ»•üЃ°ù‹×õ?ÓS+΃ЉÙÉxkÚ}5s.xî¨Q5D³s"ÄqD<åã-¦‰"ÜÔNLémZ‡O£FkшóHˆ™ ú?ýëw,³<•?«šÙ”›U…Ì ÂUªëáßž-ðG¬ÚõÀXÉšnž­Èôg3X>Ån‚uŒªBŽÍ¼h$ë#C™BVÕ€VzÐŒá)¥¥vÛü»¿£7œ½±»Á1³.±â+‡—Þ Øß•ñj§*ÈòŸß*¡~öãâ{DW2wh¦bu‰¤({ÍôG»ÔçôvN#Á–#£í¦,YrfŽžuR' š€o§®ÉwßM¦à¸Hg@8Úí)BoÙ'_tÑE@#:ÌÆ!ò“Wc ¯@M 4Zý³Ñ9óQÃ+tÑ€+ÔP“5]̤yD9dã5²É˜;äe¡,°‹^mµÕÈ­:c~Å9Ò0é¹çž‹–~£vFíáõ7&2ðeþñìêê¢ ´ÿÙ¦îLQàíRà_û¬üXÈŽ©¡‘Á× FÿV)âq†š;÷BøoºoÖCDOÉ®Yâß¿Ç^™<þýã7\ÃÇ¢ü‚ŸæÈÛL2dëµÊ,^øÂ´i3Á|µûÆG_ÿdzQoœÁñ‘¦ÜB® 1éæ¼ôr_W/Qx›Cê[GòÏý-Ÿ-t×ç»ãec¤”½ééó•;f#©³Ö¥¨Ì[4‡ Ö45¤è à49Ñoùâ½r QO°Ã§=rˆä†˜ÜÑkñ}3VˆŠ©ý6>”bŠí³’v|‹°ÖzÓGj¯\qýY©^yƒÆF?ùo"Q'}ׯXQ`‰èG¼kw˜•qõ‘|¨YCîoÅ$ª1Ö»+Ã^*cõåSc¥ ‘V¡¾éºfÊÆû¦)á–Ì‘åŽ=¹à³ÍC¡ ®ç[†3l+îãbºRn´·%ž¢Ä•W'LLš~ A†WGG²…ŒÌ$˜')°•9ú¤3à«®‰€X¦N3˜.®J·½Øá ¦Ìô®[ïsãƒWKÏCuû?×Õ ^vÖSÉf?&`CÄ+£šMàÜ$+Ü%´ažD‹ô¤”¢_Kdá÷Ž%$+ „µN¸= ‹²Né˜!F‹’\‚PÁ;…¾ø®0B!Ý ! "‰¥´‘Í3&T;î¸ã©§ž ”º‘Á3VÍ¸ÝøÅ/~d ÂA#–„g̘²RÔ‡?üa.ÙGD!Ì\~øá7Þ˜däã-ʶÉùÄO`í AYÐæ|ðA ­£© 6Úh£hÊHÿøÇ?¦. $ýò—¿¤IQ½¬F÷ßx¤; .ËÆ‘ W4oBéýåæ7–3uçýI8Í;®Jd>X‰£ìE=à²'¸ 5*×.¦ÅAIœü œL<8]tdý¹ÖÂG±Gg¢Z‰Œ0™ûµ“‰»Qó—>¾–åMΈ+GaÚô—¥K`ZéŸáÖ[ÙtÜscÓz×”H#bùSLöÉ'›?}Üb= ª^odì4לּz/TiÖ%2ĹWG¦¡:híÓ/|ð0ÁE4ÆqÕ=#7M ¦õêm¤¯ëŒÎE3Ð7ÓUN´ežÐB#=[GÍ£k9•íî‘Îpî¨Cö;µþHÖì^ÐÚcã {zZ^ýó hð>[úÓ¿ž&äBŸ"ØŒO/Y«Ö´Ò• ‘¹ÁÅù—((Ð~ æÐÎI<§b¥rqtà¶:ù´ïÈ8šÙ0Ñ«$r,ód.᪷;ä¶».I¤%@BÂN–}Pƒ´®<Úô¥`$5¤f!»èD JÒË<ÆNª½ÆÂÏÀJ #D”#NÂj¶å€W§¸Ùö ?ˆ¥ó6h_ß꬛žqô5í½²L–zUQˆ7d±Y„ó&FX/ËWGwùµT!Ía¼®)è²ÔL{9¡ÝüH“PE7øñÉ,e9¯s¼CäÚ!ð8‚¾ |£üRpÎ%'N¿ê&ßìÕäÓ 0‰†`0HÉ%G¦!س«¨ÌDÐ$sŽÂ ¬]vÙ…½@8´úÁ~€Ã œl "3gÙsÏ=?üð5×\Øm=ù䓈є̋8ñÀ\‹2[v?þøã¸¸âjßu×]ÙüéO°|衇Ài¶!ñ p 'üñˆ‡¬Ûo¿ýé§Ÿ¾âŠ+"l†ú¬:GÍ{ã ˜¢G“&N¤#Sê7fžº3E7R@ñjæ[†‰ò[’xÊN…}/[Úg«o5ÜzÌŠRw]©·jx/ºúÆË`ñW×D¹1‚V¬l±2ZɵåÊ•j¾-K4;âç-ݰ×1Ùà¨ÙQÞº€‰ÎBov0NG” 3 F à0p`DLü• ›Â¾ÓXÄ Cn”›éB"§Ñwxd4“Éa¥åÆš  Z…ªÁô¢R4÷ÆlŽUÐfÝ–x4±&®Iº¤Ù¢ø%SZêPÒ Í}Ecll³ÞÞèþD},6<:¸çÖO‡I§l8w=x!Ð^{Qrüg^÷Y¤gYYc„Õ.¤Fß ^Y„¥vZ–œ)ÆË0˜|òŸS¢€ûQÝ4Ns7Pí,?“„­Å¹NQ¶Cz×TÖ(zc‘ÅñÎB5{kñΉ8I”ÃݾxÄ ·Ÿ¯+7eVÝÎ?Åñd *L¶ÑÛâ‹VôV;»e¥¯©jâ÷ÓÈ5 ÒŠ§“ñZÝÏŒ™ÓÔl".Ü÷À•Ò¼aµß¶gÏ}àhéS´œ¡ûF—¢ófóu;ÙRÀ,„DƒÛTz% GˆYµŒ¹Åœ¨Ñ“I7(š d¢È}é¥Ë%‰KŠ~‡èƒH¡ˆjÈQÀ21lœ;æ…o}¬¸AÀ­A-lᥒ&+æ„wX_uÄI…qÞÝçþ€&3Sý’ûœD­š<ê‡oÑHýVä7BÊ›,“fX¥mÕÐ¥©¥p¯O¹ÇšßùæZ·îÛuÛ×?³ç¦s¶8ðˇù¸‰pÃêpã¯yA¿ÑÂF#ÄoEÒÊuqãÀoÇÿ:@>”Cg¸¯h2y€køx¢ºE™8êS^—Ñ¿]?þc<Ôrá ê}ô{rN#ñ¡Éånë~ß}V»ïà>~<ÆÃ­ÖÜCòÔÃV…|‘OŠRTÃÀ iˆ¦0GúNÕBiµB8 ,JAè@ÌjÍ«W溸E\Áª„FÂöèÔ¯ÅǾ3O–nòc(=)ŠR(¶U ÷ÜøPqW·ÃkS¸êÀÑíÇã…ökQ·ûì~r[<|„ó‡+Ü Žv#ü’£F=Ñ{Aø’vÐñŠôý¸MÏÁK†´„*pRÒ]Ê.‡;~þ ¯mtøÉ½·Ü~Á6½‡ìôÙÿ¶ãáã”K-tzпF8¾pH܂̞¼)ÔÄá ‹!ã(XZÑ Ž÷—tR(öΤMP^·ãÉBÑwRjp3‚€É§oóD‚LbÞ¿ÿ„>Si¤¦‡€b¯Þ}ËMZ ª"S¨¤H‘éu£Ö2CaªeàÈÍ7Õ÷N®æFËÌ4’É¿ti<¥I {¤dˆˆÓ†è ç’@Tû›[òp¤ ädö‘#qÿMóOÝœ¢À$´88yõ÷'‘ͰV‡áèX;5þ>Ó?¾Æ(—䨭¶Üá§ooöLúµ0&bF”¢¢õ±Q«¦³Yqˆ¡‰WëªÍ^¨É̼å×ï¶DmûZšÙ¹5¡£ž|¨ßjÆù´ÇM.£29¡‡XÀ‚¿3ä$Š„OŒ¨ƒ¶úá¥Ïí(ºSä-Nh9ÊÚAzÊ©”,@³˜m@QYu×):¢V­UÂL'›_T£ÅŸx­-׆ µ4›Ÿn%ˆø© wÍúŽÎOÂÕãœAm Þ–î ­¬¶Ýxß»ÿx•h•i*[* ƒ­/|êÀû¾Lä]ÚéÈ0bÒÔ„ïzzË,Ë×”‹}†PXéÎ%­˜ ‘¾G‡é6`Šõ2†$ÂÙ¤]Òæ Uvã{rŒmÖÙ}厵 »UmÅ[1ÛJþ±ï×·ß}I&GÂW"ƒnûÑ=î~ò:¡í˜ÂW’d­Q‘+lJCŒ–2Õv›|ó®ÇÏÃvª 3q±yò\eÓAü¯ŠÙ&ŸÝsöŒžµ_ZçÅ…/ö®Ý»¸>ßìRsï9õ+ŸÛùÎ_Ý*åÓF>FhO¢íX@ÓÅêÄí®úÞÓûò4Àë«6\fdÙ!ìÖýd›‰-—ï4L3£ÆÄ ÕËŠúZDpsE<jò2¶ÝBýC+Â’„l£ZÖÏ_Š}³Ûh'A!ÊÂMÐQ8@—ÝŒ€éÍÊøWî-ã_ï¿RÅ›¿>ÑáHRYé9 úFë¾¼­³Òmîp¸Fø]FïFh L’ŸcΡölˆÎ” îr GiQ0©£Â'Á˜@bä‰2D¦I”I#Jæ)3r¾y—ôݨ_ nTo”“B–.ö¼>õè}Nƒ B„µšÚÆ^#ÌG’à\\ûÐwu¦öÀ°HS¥ò‚SnÐâï¤îxvFüNÖ>YZ’ݾØ%S%vŒØØØh[;ëxüGÆOKòê¶„’" uä#‚az4ÁY$›ö» '{cYð›•îX’ Ø„`ÀZ‚ÔÒ,`«\[‚ô9&?æÐÉî´–žc§l6j±À=Ø6ÊžŸVM¦›8/IªL:6¿Ñ´i]|sZKKm‘J5Oè)4—M©ÜDñ(ýe1–. «­¯iú•¤ê(Î’9  eU›ÝØý¿¹L ä>Ê»˜ªxvÎ2€’¸+•Ì$t’ôh¢;T p¢Q, .¨oØÒméšþ†ã¦R¢ñ¯Öª©”ÅnÙxF–LQÎg 9¤\;ìéÎu7ë~Ö%û³=µåú»¬±Êœ‘ùÃEkæ^|§îV×þäß<}ÿÌ e˜9´ÍbEO_é×T=±µè±ìrn*ë±²2`ØTûo~ö£O\+h]Vï~ß7®ÚFÜ]Ñ5KÝùó[¥1Øjg;³nÝI¦4|2ÑÝ©³n¬±3þ¡JW®ÛdÓþ¶S&Ó/|M†l>EÏÖ#š„ï,ôÿúEEމ©T„êÒ0x¬‡iÉõÛÿ?#ChX7â"7‘ȇü=RÃË`Æ9ïF}D„;=)%ø ÈÊÈ@G¼3L–^½„l(Ò±÷ß¿æÉcÈódŠ ëg9™É9æKAY¬Å€>V’Æ\IÌFî¤YÑjÛ3š@«¤7€¸†a.˜ªpŸª˜9x ’unö)I‹™Ú³¼+ > è¨ã·»üô;ʶÉå€OdP[í½á‰×üô{ÔØÆ§@æX½‘ÃÂÙNK´ê±…ö¾¸9mþÈ¢Šñj¶/6R_xÿÓ·IïYn§>˜quðs[Å}½ùØÀZeo Õ^)›ƒåäøí]AŽ$,'aÆØc­”,lÓ6=σb_ß`×›þt#-)«+ÎLÂr¾KZ)ËU®×òÓq»>æ¶%ÒòJ4¦ôŠp)w‘ “´‰q—éZ²‘J2¼ DXÚ²'BâHŽn&—‡#ÎÏe´*üökþOà·ß“ÿ¨€[† ôeðhØ?ìÿ¨fO5æÝ§€f=k†‹²Rö‹\i^dˆè‘Ÿê­FÕç2%-ßÈ××GŠW’&¯9‡û¡„-R/IRè Ÿ;C•b"[¡‰·|˜J ŒZz0‹ÂÑÄÚ„=K)oHðR­PÜóË߸ö¶‹Ev41ýÑ’¯´†æ‚*ˆ³øÃNËеR·\ŸGâ8„["‘GvU¼-‘YkE2&ˆB#,,oƒ¦¦¶G^Ò˜%Œg‘¸*›˜)ˆ'Ç]?›ÌŠäê¨?|XÒm›Õ;gþÐKekžÝYÕ»î¾Åfó*€…SÞ 4߉s˜ªÂD+FóF5ãKÄL¢:¦‘óSbU†G3íµ¬Þû«»Vcb„K hóC.ýÉEòˆËŽ eT›zD¯ˆ%×≉È×6:ææßœ5gº9GQ\›ê€õ®»ü±=‰ÇµÕÕsŸÞGV¦i!>%Þ=âñjšOµ‘–¦©¾úÙýôôÒò! )æ7+‰Î¸'žÇP*'R‰,–èFÊpa QK›A+–C'Ï—êCê¤Ú|ý½~öÌ䜜OŒé ¼sÙÌ@ªh‹ï°²’°¼Ì±ŠcÄÏaæÔøN TË¿ùË›&šó–Ÿ©®üɱ`pE­½êz"¶’QOÏnyê6Ô¿ Y«ø.ÎMo_›èGÊOšM&ðøZªzãî¢V¼1ÒXŒ¾<5+ví#ç}ÿ¶îyåÛ^ûK'>¸ûyÏžpý³‡ý®ÿ~Õ‡‡¯r+5ŽíT#Pwå~—éÙá›Ef©”ˆ…ŕڙu`•ž¶ÚÙ‚Ìå€YŸUÅp«â °Òí»UFH>èT«Î\SÆǬh PN³--!;Œá8kŽMà]«¢\aSð ;tS ïRDÛþD w œ€m†š÷"߈I2"œxE–‰ÿü[TJ2Ô+ÿ·P¢EÁ"ù ¼¾ê¨@Ú÷ŒÎv%ø‚¨ä‰'øMÝ}‹o»5ö¬NÃÓþ;^ÑP Sݵ+n9—X÷$^Òï a0 ƒ¦æ%ŃpÒ Ý-þ’u2‘I_¦áÙªU›™ö^ Ѿ² .b$RU”ôl‚ìHêiïÖ‡ÇMSmû‰#î~ô|.¿¶ù33«ŠŽ4£fÎZI*†}RÛmÓ §>R)ںͤ刊^äM~º´Ú°ˆ–4ÇI#iàa»ÞŒ‘úì½ÜUõÿ·ÎloOO£QÄ(i H•&¡ƒ Bè½·¡½„–ÐB/ H‚¢_ð+Š€tHÊ>Ïö2;»ó{Ÿ;OƒâÿeTü›¯Ïe™ì3sçÞsÏÌžÏ=çž{C~¼üÆV«môÒÛ¿a À,ò*¾`-ášuêÐøhé gü˜Ó!;Ðj#(qÃvÝþ€¦íýúÂ-–––üôÇçü*Ò¸ç±åyVø3¥™09ðø¸=³—ª}v9fÎüë0†Óϵw\ˆÔ°çÙh,ÝWÊúmo:AN6\+§³,(Î[×P¯~ú‚jöå‹ñ¶ $ ñ{=Q3Ž*¹ë×öX3¹aÈ7!6vÝô`3Ó°“¡»î»%d…îl®-–”qw vø(+i$MezX³(«Ÿì{1 è4µ8»x·ÍŽsÌ2;gϽÃb¯O11Ò@IDATs„ +NÐd]–ô‡¡­âe€Wñ8JþÿE nù ÷ð1êU_[ëë¢2zTa¨mÙŸ²¼òßùïŸ[ÿì ZÜ‹bº¼xpâ/Cm°Ö7Dö¨b¶k“<b9Èe]_îÕ&sHõ©ï/¿öÄrw-ºi×mßõ2»ëÙ åRIøíWÝ}ˆ –v_ETþ?ßR×Êr—>%˜Är`£ ,/#õÝ/úX}ß !±ÉÌVP—ý $7”ä#¬©£:mU*Rn´örùEÁÝP1|Øw/ {/u|#lg¦lq»°ófIñÙÂQ´ÛfØ4%S•äWI@‰aÂ<€¦8fDTóH0vHàÍŠ| °‡L‡²±ê›c¶ƒ<>Ú*6–ˆÊuRwýº}•H£Nª*Gx_@"ÚŽ•¸T/„qÖÌßÏ~*Ï…!s„Úp["Àc5 V+bIb5c´ð­¿úZç_w xÊAçÏ}ñE0oÛÀP_(ÛÌ4êÕZ.ô4ˆP‚S²®a{[á°Î2²X´Û‘w>;S.%Õ”}¯¿úÑ£òy¹‚*¨‘ ´ŸlØ4DYÖoæ Ïù{°*ý; À«ÒÓ¥õ¿†®ýÙ]ÕÈ:`»#ÉË[¬Ø-{ïM~’œ\Púø™ys¾8†hPUGmsN(bˆ$¸ë&fÆ¥—ä÷ûYXJP+Y¶f¢PW_~õô›/—´ Q]núÙƒã‹Ãßù«)àÒP-ý½èX`<¾±bÑEÓÄbŒvb‰Ä%mùÍ®ìùSNó‡û·¬“J¹èÔK/¼ât‰¸Äâ'Û–Bd”û€I¼jBݹ€©ol ·I˜-l¦é¨8[jBl­K98m«»²ýË®ýŸC„Np‹#šÙú‰{µgbѰë5»Oj§,M€^±%¯%kn_Du „éE—Có‡Ìź’j,L 1e%Ú¿ì2ê¬4 ¡…0êñÑ#Ö_5ÕF0Q/ÙºüdG¨”‰L—øè wc¸ÙÈë¯øÂdÈÁ°­ýØÑè 1†F+Ïën_X¾³¥W÷˜,Þ'TÌQɺ ø½õ:ÉÙZm—[þ—^{XÈpáQcÕ÷†!’h]]ñ¸`ê (ëýT¥–çR^œ†,+øU¤Õõº¾Tö©½öØwÎÜûƒI–ÖM1*p2 Ú«kÉËÀ+á¨wßùƒË?™“q³W ö7Ò]ò>PX3v,1¡Ÿ¯Òe'•æý(ñ£øk¸ø±ü¼ ÿ;êî'fŠæáS‡muÊì—¯–“IUR! å(ÒîgËgûEÈ»zêÍ'’×T?Þfꜗo =]N…pûÌ-\Ð…•J„dMýþµ? Üué ëéçPPyq*¥ØEÕÙÕhVs¾0Sí…$ýˆK±t-‹£ºÁ¿hæušf`Ûü0=oÿö½aØ@²s.ä­ÔjÞ€cúX”es §\wðJJè¦Úõkû Mð—RÍB˜}¶gn;×®ž`g[øÊ}Ÿý°ôÞ ¿:ž”œÞ'=8ó¶Éª¥Ý°,2“IÉE)i•.õ2&(ľ^o;§œFÀ‡ µô À©ö»ÓÛN4ÛÛí¯Ÿ¶á£U#ÛSºrö”Àš*hµW&5¨§zDLÉø¼$¡â²z¼Mž?v‚¸xšá\R~'@´å““»Å­Z5bƹqéÂBw{|¯Í§¤ë=íµq‘b÷›žOš5«ïþ®sj^O26ðI¹},)eV©šO$#᨟¬½…B.JV²ÍXBV¶þú./Î{L¶·ÔN[OÊ8ézg¢õÕi?|bq~±e¦ÁTõY·–”‚ãÃr>!Âq¢eŠf†#¢L3<˜´H+Ö©¶Ê:ò4`NC­¾z· 2W1 ÔZ¥ºsÖYg]rå§ÙèÄVi<¶þú5v­Je€W¥§5Jë–koz°4èHÞªŠ"I)|wTHý—óDKúå­"! ¡®"&öUÝ/(æö U{\FL./¨VzMqüØ ¨|@MÜ\YIýá»øWŸ j‡ªˆ1ç¹kd†‘kj>‰!THó®nÊ_zÈË»YáßÚÔ”1Dj»Ü•émU_ð“×O{Òz—°nB{J-74È ¡An€®_"jbI]²Ï §Ýû=9{ëûеÜϨ";ÈãðÙ½øã¼¨Â.0X²õ‘;™ù‚<7ˆFϨP3QÖЃ 8áGu<äJT•ú¼‘b¤a™ÍŠ“íÂÑz_d¤B_h)Â^þÖl¤Uýùñ^û=1ïÁf½æ·Ä&7Á†˜òÌzáòw:äá—o§ãŠª&‚1?s2!×TwWõxÎW QY5mÇ—/{r? <$Wc¸“( íÝNN\Ñ™LƒÉš•gÍ•%çTJ²HÅRFTȦÓ¾šK´Ø~uúVsNž½³fŽ´ÉÆ)Œþî·÷ýóçÃÁr~0믭 Ÿh 2|n/¨©GÜÐ\쬬æ)„ŽÛqJv¨˜êwãçJûp²T÷ÄXXýâåãñKe™VÛ%)¥ÌËVí‹0ZF9ð_ÌÂiñ8u׳§?|¾Ê(’ÈÐBìj䨑oR¨Œ®€‡©гl½7m ‚Õ$" ‰ ÎÀ­’TåâÙ¸„ÏL@vÜ âêêÐïŸsÛüó$[.Zå¬oADeÌê8.í³áä~9CúͰ„WnªŠÖGIBk MA”ŒFÖûò½-ñ‚iF ŸUm‰Å*ªÜŒý’#€Gu5y½;:¢þ¡`‡½ùIßùQõO*e?I§›[«l@H5rØ×/6"a–½ùïú­9Fé“p"7ýzó:~“¹Óþpu¾Rˆ† ØãƒNÅ2õbY3Þ~ÉQêSíÚm‡ ØcJðÊÔ*Ž`«8ùò3-£ø'8€ÌBÜ4ÔàRG(bñÈòÛdÑ ‡ÑhD+¨z_Ÿ(>ÚÔ©…Ô ·¸è«[笀™ *+TùÌW7!ÕÈÆæZ•ŒkÙ6ýz¯CÛ'Q^^‚†Ò¼·¥ h4_Lo•à$`cÔ2"‘øe¼n°[b2ÄX ø¬P)ìW×θ m,ô‘(P‚*©Ì]0ÉËÌùLjüTçïóàÙs÷Þ²Ceˆ-µe 0“>m,VT£\#.U)Ïûyó=3EÍ]„¡ZÓ Û„k`*À¦£*ɈŠûo—Ž1í2.ô9¯ºñæNáÆ±]só¼“D—⼩¬ª*5jìbE‚M¬Q©½3ʽýhÊØüjµeÍžä—ÏÚêÁO2Ÿ,UK:»ÒXA›q3ú]XMÚþâ 1âÛ&&±Å¥¡õ›Cñ¥wÏ!œ3𞪠5æ CÌ øÁ2y¸E5¡m¼Ñ`nå1ÉàE‰ ‹¨÷ß×3ŸH2n7íz¥ ÇMB‡ÈƦ®X˜íN2ÌŠj®.íxT˜h\¨…ƒúüR¶;©Õ" *5žglÚ%6u…PE¦ÞI;LWYÝ^ZÓÑ}ì–×…ÞOî{ñÒp«®6g0 gÕ|¹–‰¯ÖP¹rÞJ$º¢NØhÖØÎq'oq÷ØŽ³rߟʿÏ{³Ï½z«¼€ dyHhãv©ðïn¥ippé ¬hšo}ñtùAUÔ{ÞyÛs{IMy3õ‘šùÁz¡ÕN¢V‘]ÃáÌvßè¤6ÏøÔ`Ûz_ýÊAß>ÙJädøÖG®b#2¯Óª^FxU‚£ôÿs@p ÷XªÃØ@Јïín<#mîCeCP#;ذê{Á¶•,Ø_õ~òy,¿Sc­†ušÆ¿^LuÚ)ýèu—LÕTñ镺 éì‘í¨ÚÞ©I‚i›Ï눫K4"nB×’æ5-W—ˆ·Nì{¢ì3™(ÐgÄ%ü½HŠo(WuàÉL7 FŽ"䢉-wXså‹G²)´´Sñ1ß»ÂW ¬›Y¯RìŸôõ½]VfÍäôÙç‹fàÄclÉ_ŒÁ  tô +¢)U|DÙš5Ïëø(álíc:Äc©‚mÍnt:,j?{´Ñ˜]m6ëŽáø1î\D T‹†G›pË‘­,o ®*äXL™‰º]cOûxÉå×Tw>{µZ œ™ºÎõÓß8Zà ÏdH©1ê¼9Se´æQ·>v³A ¿:iõ+oþý©˜†ÊEÁi6´Z9B73T“íÏ$FÄR ©ð•¾Kª¢XC^²ÇÔÛ_Ÿ^,˜éøPY¥jiøƒz0ë ¶–Tr™pÁÝÑØ÷6>X‚0óZž¤ ÛaØú(oo¨«ò¥.cƒÉݲP}àŒxüç·ÑÑɇk¾¾±ašNa)ë<4¡«ããòÿ^ûö”0~R~§Q«H \N̺çdÁõÓ©¡ìf%/q¼ã(ÒÏ1‡žmô™ª`¥2±ãv=#ßju·f>t IÌk¶Õ¬Dãñ°¼†äjF+Í©tF,ÇM½¸ØW_kÁÚÇsú–·vNH}Øé×ÿìâ|©?’4`ö§‹?Øó%ÙrµLuÄôçßYhªxŸ:y—[®DŒ§}÷\y •'è—É cü²š/^Ýož~ɦ¼<>V7XZ±$¨VÔn;~›kšßXgŒôµÏE½­ûŠ}sûx$n7䣿U¸Œð*üðFIÿç9pë+S¥‘‚:ñð+¯˜u‚H:¶e"´UW–AEsÒ‰ƒ$ˆ.iÃ;WªSm‚™ƒ„B@¯0_¶Üê3rQãÇÏ`°–¬,ÉÙÅnHЗª¢³]‡DtbpD!cS)¤²‘%øè«wª¥"¦§ìÙÕŸ"­j8µSƒW>öÓ0JEµåy7þìä ØH© ;,©„(<‰$ÐRÃȤÇÓo,ÆÀ}É”èß²÷°qQp…‰…Üî*%!ÕY"Ò¶M™kIøh\– …lÜ'›GeSgPaÓV•ã6:9j¤rK+ã:Çî³Îñ‰d2è˜I#ag•¿uCY¤ G"­‚ª?ªëóEɬªÅ²«UpF G‘ÌuãszŸ1¹z²µh§é'~°'ÈRêU±Tl‹¶YõZ4 hSZsç?,7–È{xÑ­/; ÛpæU¡"šp®WT³ QRËi ^²¬“ͬ¼Š$b‘P èbi…›“o¸]¢‹ˆŠ[SÅ^uݳ§c²¾`Ÿë®yî" ¼Rê§½åOS‘®H[˜xg$£áÿzÝ—!’s³nD‚w>zº¶P²ÃU¡h¢h®òâZ¥ïhG·?æKŒŒI0dÑ¢%0_²"-RK Bd¯ªÁ>á$—îxV^<±Ê†ÄX%ÎF%"Ãah´°H¿HUµô#5ûã„ceuÊVW_öÀ²·8W¡R+#ðe€å=-£X59fyL2¼Óÿºˆ¹º˜Ä4³ßã÷²©îb@]ÙA ”­P$îыŋ“U7T)þàÈÅ¿YÈÀÒ/—]Ë3áQˆ%% >XÒ&¦lnÇÇW·æúQ#ÎH…[.‹ô‡p®Èî›Þˆ3ç5‹¢ÛAGFÌìÞÏEë¡Iñ¥±\õû}^OË%q—U©V#dŽ‚¥ëåd_ †B¨›‹U-Y!0ýVìrPn¶ýá`Ý&©‚øäª…ˆyͳÓdñ2¬ÿæ9¼v•ÐÓPmyÑeR2aq[¨ÄY*ŠÕ÷¯ŠëMÍB{0ÄœÁ[*”£h”p™Hš Ií1%™„‡Ì6¥V‹¤¥dX0ˆã(Y%d¯kÉÂ"w#Ÿú8ÿ®ÌÃX&hDca}iBè)Ë<‡´†ø3xøùëé"â÷±Ñu‹õw}ùWs`ÈÞDäňœ—䀆"­“½ì“?ö¦»XýÞ×~Ÿørøžg¶žö|"Õå‡ìpVd™2Ëþžøº—ï7{¡õ¿¶¾ç‘SøÖï·/o3Þ*« kªáòAWNÈ|,ã¬ÆfSå*ÅhÌ߬û|xÁ1gªûQGlŽ«¡º<ˆh‡;Û¹ŠtT[Õ¡ì²Î¶.íXóøØ40|ìn.š¾6©LRŒºñù/{v Џ™–ì NÕ76ÙÎBJ „†E-•òb³Ö´¥”ÑÙŸƒ1ÜKMòZºÉ7y»‡ó’"v}ÊÒžÚ8øHè=MµûÆ„+Ñâ8¢ïÇ¿j~:ô¶Ç)<úòíCñzÈ1Ì~uÀÎnäZ÷Ï»P„𩉨HTZÐê?_4UIaüÐE0„¡r*à tÃ)øãr…?åNþÏãk©:4:iL[U˜³\-$C)R6Ú~ªN˜zÞ´Yçx“Ê\AvQ¶Ûmù²UËÚ¡Z2Ó“ü8ðv¯ýÉzë®uጩž1*ÛêííØ³fœˆK=‹q½ IT¡ûnÕku²7¬†ÔÙ…åôpMþuì–Õ´ DåRë f³é S§R*…£q¾µå³yvû¯ìóÜÿ< z[MMÞòŠ6{µê’`gר÷sïxcUÛ“¿é•Äö[m¨x€‰q)ð?vû¯s…ÌuPIM<Ç9íûtñÂî1=üR‡orüÌß^Å<¥);ÜrõS‡ËŠ&ìôX7&4#Ö"Ø6 °dþt޳Ϻroï7Åý{i­?M1/‚®@™Iƒ r;o.Ͷ:æ[7¶ÖèXÔƒ®¾Ì³4±ŽñÇìü9¯MSÝä*ª¨ztv7ì1 gÍTc‚•÷Ô*e –Ž{“¾ßê}øg7¤`fÖC¼ r/ˆ1½S™r©Žˆ@5¼à×êâÝ‹û“çþÀ`Éxõ´‰f÷o¼Fb­ú'¤;Œ"Õš¯‘õ¿÷èÏg‹Í¼ªŽÞü†ë_þ©<î5ä--•ðéÓ¿+žTQ²Þ—ýìDy{™·éwÕÿì½(²¡SHAìloøƒäš‘Á|ß#óg4‰¾å/…}_láç6š é‹eñhëÿµð㔃jbªÇŸWî1h…Øá^‘1ÿhÁm‰[E%ŒqÉW/ýáO}9b¼c¡S0ðs R€³À0˱8¹è+«Â2Asª¸­9ê‘yw»êÚO·¿õšÛ“ÐH\ªª „–׿PGœŒvÀ!ÈJÞ4ºµ†q—$…téV5,‰üP*Úé´È»rµ!tõˆäº®¤Iob¿'´n6_¨ùâ,Û\N†2(ýUV=áeL½ÿÛ>±U‚¾hº°ÂGƾޛp‘ER7ÔÑÛÜ6ëOáQû½•µ«a`µž1ìePr{“~¥h[/ÞkÌ_ZM§I˜I™9y¼…Js}Ù*™A“ :„€6Cf~ˆ-.a¡CiºKûÀ©0k£<§é3p0?íçÞ|@¦p†í·d(¨ýmåâ€×®“²I Þ  * SdXÊYÐ5K1!Â$·†<ÊÍæØ1c³N6ä„ãáø-¯^…µEÆHÔÇ\YøG;|ØÜйÖi’6±BöC_G¨ÉûŸ3cÖy ýλÞÞ¤;Æ}9ÍÐ' ‚}xF†£3 Ø6;=ä_­T)Œñt‡¾XȬÕs¡D@ž-_ՠʵ~ö¡–Õ–Í¿â¥2·¸t¿×¯·/üÇ –d8$ªnƒv,‚N쵊ìÿè†_×vØë›Ëö°z+Íð?ºéãÀOó¶/%=‡ûùÕ¢ˆçÕ™;^4ãé³…쎂T(C¹¤yTã€xÔp‹Ɖ¿w` n5š§/Ùnú‡k †ÿúÙçÊÛ”b½W=¸—Ìuêê¤]îhÞeö0Yo&ͤk¦y¾jàÍhåÀ/XD¾8EÁ>ˆUµï{ßÿòlV = Û¯úŽJ³×CçèFPie ñÿÁN‡Ø†Ú‘ˆåd‘ñÃÿI;LžõÐ 1}{ÉÚÖLØèo°kvs†Ç{¤K8L–s쀨L•|­Øcv£ÄÙª$ A†ZEåš ú{È>h—¨³vž{·¼vÛzºuîFäâ‹ûüß÷útQø÷ãCæÂf÷&‚9¨1KA7+µã÷úìfË/ªq R©ýa¿ßS/70ipë&cTÊ*“6ñ-bÅTÆDï-­ñ`ÎeCŽñÑðxH'zz}éÔR¥ÎEæ’·n‡üþÁiûÌR¾¤\ãêgŸ€‹ÁBë ðbñ(Hì¢VâCí ?IyYcž¹Uå©S˜ú´¼„È—< 8sd+(Ó¸áõ 2-õ“-O»é•K†5.0  ÙôởQÈ"v,]ê8þ»WÆy>¬¼÷ð/oˆW¼A¼:¾. }‚j¯oLŠY=ky6OåÚOúÖ5­Ø`-½þ‰2v¨„%F€PÑÐÑ$c¡IÄæÅPÇ¢;†ÓƒsB-¢2“¢·êâšU÷yzœk=B˜Ö«NÛáÊK~v‚;Ç1à¡vë ç²cÞŠªE…ZÆdq]ìÏQž€æh¯o‰ûu/ýRåUÈ…é±qTÇûŒe…ç^¹Kº¨«©›Ÿtá/Îbsï»Ý9µ~£Ç»æî½w»šסFø£·Td_‹@¥ ­©j„ÓŠÊìs•.£¼J?¾Qâÿi – µBa´-šªÃ³šèˆá€ª¼!$Џ"ÕÊñË]%%¡êW¦€¾!‘IŒ'›Ð¦’Ôã.[9‘nõO•1^%;Ø«‘reš‡ÎÞ¥ c=©¸IôŒé,§àCæcG¬QÅ;eŸO ˜†p«2yÃ"ÞÄêéžæP±n•¢Ýf¾^¿ï•é"¦ù )rÄÊ”¦$Í‹ËÎ{¨âOм«Ë¡X£\6;”Œ§Lƒ,Cr/Î4Ñq ÙrÊ—M/$FJ³ÚN^ïUî}l žh®'ŽÙûº…C Zk¾òø“ó$ÚÕ`˜^cj n9"ˆ1@rÅT›À]4Ò&ä-/‡v#—¿]W°áèZÒm ÖE\fÅþï~†OêÁÊx7tÿîfjзʫá!|•dÙ3`oON<ð˜<ÑBŒÐ%Õ²ª±ô-]äØ:çßú›³e°cu{5µó–‡>òämá.y‡*6ÕÃZ(¸8ù›7ϘœtËsá-cÊBÉ@@öIkrše/ ëm¿˜%C¶š ž+öÐæ AžÃvÅöGtÊŠ¦Zöဴƒ‚UYï‚“¸tÚ§ÒŒE`º EÂV ’-+ì¬ÚÖÒÊÒv3í'‰33†C„5XͪAŪÙ~f)õ²cží¿9é¹WfqÞR½ ;WK;xøM’8¡Lœd÷V=÷•œ†¿Hº£ ;V–nŽÍe _4°Ó·özò9²¦ÎtŒyü1"M«PoTû=cº‚Míu‘5ˆ-"t–{«÷ðôµRi@´Õ™w>u!]çK²CÙŒÑ%«ž žG`«Ä’ ¸#è…K†JEÇ8XýÞì’þÌXL£.4.ȹ¼hÝwÅ Zbƒ ëeTcHÂýeä8|Fš2™ H-e¨„D£‚8o‚p£YpŠá‚¤ä¡A¬èpËR㿞՜ÄÉP™fH<çr0¬ÁJªLò¨L…ªJn(Ê1Û‹+9öZf„ôɤ¸¯ÿðûûØ鯵};XœûƒÛ{cKí¤uýÏÎà Ðç‹—kµ@¹ÂãËn0¼ã:'¬Ñ¾®wÈšÿÒñk_БI,«ý©¥%a&Îæ~U[Z7SâúvÇÎ:fýÏ~uÛ·}þŠ·c(G~gÎÁK×ñcOaЂ‹*†®‚ŒR(f[Ùò˜N8 á͇÷ÿ„4ÕØ(~ä–—– A¼¯OfuvsY[$âñ‡Z]7;dBà«õw½éµOÞ衺¿åoTÖðnõã/Ÿ‹&µÐOwœé¤– >¼ÿW·2ât—øpÙ8z/S6±PPmU‘˜e²4÷2™³¦á:]_ÿäéÔdGšßô7Ê*`¹Ÿ¬MúòªzàUõÉÒý/á€ß@dÖ#ÆNÔ$†b$ÙZmÊ6·½Ï?¶tͧµ"`6^¿],‡#ö߿¿k§TöÄ 2e÷÷÷Cf"ì]04õðSâNÆZbXMÚòdqú«½Ïþönc¥›Í)“n"KR:o9–áAÆÒÍm?$©l\ËM¦©”l¤‘ʾÕqµwsoJT­ú5=8a¡Å+£$Íee½SŠG}òö1½¬Ü"¬Á“F‹•J® ç°ÆÀ”¯£ˆ°8õöAž`˜WMÙùz™6q) ŽÝ÷Ô›ïÑîå^Ž ç[ ÚáÎHØî²ûé9½ûéÄ k_w×iš‘A§¤BÌF[õ`4Yî%j¤žÉñ0“`°C„ñß÷ÊUüðÚ³ž?6è1ÒáT³’c÷—7šûòݲ®_P§nüØ´¹»ÈdE©ó÷zòÚ§bjrÑáœqÓî†ÃÙr(¥_ܼêÐÃ.~îå2}‘]pbJikùÇ?¾.ËØp@øÄ«&GS;¬ßv%.«vàUûùRÿOrÀVµ|!—‰M|ò×wŠÀªâMzþu/œ-r‡_~¤ADR¿V ÍÕ´Ú±²Ýy¢ÉZ1g&#éöŽJ«^¨;WÏÜûÔ­¢pÕþÛÉÞ‘kŒQ±"®l)«ü _§œ¨Ø¹=Aǃ”j: ÊÒšA"OE}F”…Ûr\z1˪86pãüéà÷@ÑNŽ'ó Ž U˜85k¡'²žÐ¬¡¾6qS‘é„ ¬jÙ€®*Wò€7(VÎj­Zb‹T2Ô#Á¹ò¤^Ç”ì-ãxõ³A¨Ü(g¸jM"kxÕ¢ú{Kâj°94DEk( ‚ñ?¯Ýã”Û~{YiÈŠ&ÉËÎa2ã6êõFÌ`YׇƒÕ0“\@ÕÈIÙ:õ—Gü®ÿ’£.@þåYÁp)0;€~a^C(¡º ’Õ×\U…*Þ@£ê­Ôs^3EHbÕÛ;KDÃfà™ß?,(eª­·ÞkldB¤O8cßìòrÛàbû£§þw6ÍGÒ:¨uHýü÷R9›ëÅ»ÓlÎZ¤.8ðšËï9nò¦‡­±ÑrÛÞ¯Þ´Éÿ.Pï{Úê¾u+Ó–oÈjýÁd -ÔdÍÝpÄÅ òªêm6ÑuüèœOö´}å½·?:™“¿ÿÐ[Ëþ8¾§;U[ÿ¤­®'6¦Þx¾û’­žØí+||ÅÓ§^¸ÓgÞzŽbý…ýâ{Ÿzõ¬KÙúŒZ¿Ý·WÍ é/=…uúúuÑŒ¿ØºkÞéòÒ¢!³E¬ª¿3 ¯LssÄgH OØrèžSœÞX¦°vÛø S~<§ê«šŸcc¢Çìuu‹•å¥U•Ø]æ…uü¶'Î|äFaW]ùal™Ý€Ú^zp<ijæß‚ÚwÇïÿÍÙþRGJ‹ØZІÔ7œ%²;¤R!ÐW\®&r·î•Òã=rÛËM7°§²Û&GfÆ%—å=9ÿƒþX÷ M!Ø*— a#L¶tUÕI üª+ôezŠEÄXÚh6"ø…d¶%†å²šûÜý ÜRå¨Jd—: W~=‘½³ 38åŠ,†ñ e…H‰­ÏaÖŠ²û—Ü!Û%ÞÈÈÑ­­¡wxp+oN™k¾mkô•Ù øÍÒ¶¨Ñ†JàÙÆp‚êO#íÞdkHÕ;íˆÄÖñÓÅy?›†¦…ã7¿sÚœ=UpOÐëô¼ìùƒeBYU'l|Ë…/O {Œ ¹¿ü¾ /o»CtK´v.EÔó/Ý"áÏuÀ#—ܾ»o¨7=12ÖøCg<œš±ËsãÛ¾|ÀÆ“ï~c†pžÀº°ºí±«åKN±éµ7ÿúXé4«ßúÂ[~}&Oñ¨M.’És,&zI± Ћ#Ìr ²à-OÕåpYY œtÆ Uz’ÿ]Ð[“ÔÊÞV9Ÿ%à æ–Hh%/2°ÿ 2 ÀÿAc””?¼µ¶FŸb±ï„}gV?¬¥ª=«›ŸµþËv´àY3wñ¼IÑ`²ÚjÜdKÞðÊÿÚY*3$T.Ä%’´ÖQ,2â…Ø­ã#S¹Ó!áøóÖ0VšõÚ˿꧄(húåùˆ§®ž8¨Žv(¯×«yìÓ1•ö·Œ¶îa¹Yèï3ÚÉÑJÁ› ©¿J$dæ'É Sí¿Í±³çÏQU’Ž!€LÌYpP–d…²WÌVÿîmf›ZæD&°Ñy„rÌÕ}æfÂöæúÉNñ¥Â7ûà}¹cÞu‘@t&â@ò±;]˜*ŽéÆÂF$Ñþãõ'‡{‚ÅÁܯÞîk†˜¹Ôë­Hô†Bƶë ùî—‘?唣«ò ÌJ×?K‘3îÀå›[póÖ™t3¬ƒ¿r[qI®­#¹×6{†–$¿Rþî·ë›?þ_Êÿþا{q®ã´Äïó¢Îg­\ÂHÊ“L«^vt(~EY¸Çò1/”ã´?†²"mÕåYŽ.{òÁ$K¸Éì}^þžr&ðz,ÉV2_ V›­X`¬<q…tC¾f Ã~cuÕ“ JQ8ŠCî¨M¯3;Ã13Z.gOÙnf|IŶ::W´K PÄZİÖï S%µ¶œa½›Õ÷NÍ—6tÎwd&Š)¨Ž¹û «Ú˜¶î³Õ¡j%à8ÓŒõðýSJ¾ìm÷Ü”êöUb¥3XRw-ôÅÎKoà~&c¼ñ‰3„<KÂGgu†|ÃÏåJÕEþìê×>r‹sÃD:Y™ùòIhä«E¾Äo¤ïý‚P¥=ÔêKòj\Ø– žˆ¸lظ¬êeåð«úˆGéåÀŠ`‡«–/ ßÊ~³ç;õ}}y{‚ùµ¥Ö‡…,Q:DG ùÍRÕÇž‡JUE1+Qª ȇ4G‘3Io°ÉÒi±0 dúÔ½¨ˆBŠ—Ì<~Ôš•hÚ­ÚRkŒÁ-B x"¤Ÿ,Œ5$z/PP âLO¤†3TÙ±<[õ2*VÚ”gï­wžýìc8“œ@ûK¶{)|ô½ZKÌH˜dJúŠ/è‹Ï,»`€,×á›d#”:耣~æN™ ”Ô”Íî½ûÍsD=Bø"fHU†šáeéü@ ±ާj«¡åY¿ºT Š:g‡;¦¿x¢ts[\xA/±-$ æHqéqÿti^ñÈÕ/ñ§lß¡Hðm Ç‘*ú„{py I©‰ýÂÅhЗsæ>$ ÜB5kÛ_ÿìî2´uà¼ã%>HÜ+B⢪*Çò„2I z^¡ñÂ…ùã Ùå$| y(hz&íyz`iÏêµu*}Õ‹¶œHª…å!3‘²bÍS÷xÐŽV?¬þᑯÈKå[–µ &¥¤*°ý ¬–8cá8Z8»µd&ž¤FáÆ_êm¸euþŽ÷œýÜþ®#ÕämndLŒ<¦®¹ÿª#w¸µ8PY-¹fyÐó“ͧ™íž~'{ß —ʃ³TµºD–êùNük3˜¶Î|bçK·{éäÇ·:ÿ‡soþõéÒTZ?_ü³øeðè]Æïé•ùG³!ù¦8ƒj–¿ËøÑÆ3#,…Ûdrô\ñý'Ãmmªûôù»ž±×ƒ3Ÿ>—ŽNÿÞœ©ß½7n³ Î[Î[½{⛜e§úʵþÙ¯?ÂËÕvÒEÛì‰'Béð »Âã[žŽFÂúBØh$¬[m&ÀÈ@~ßü{E‚âDŠW~¹Ý €H‚«Ê8yd§ÈJ•V=Û$¨¯Ìô êäï“QΙöÌ1fHä‹Q·šlH ˆ‹{.W€•êç﫬UÙ᪮®æ¦Ä÷©^¶M+ éÙT{mw¨]·¿18iIþ“VÛ ª†î}₵ï·N퉬ó%û–-+;ÙD·gYý£9¯ÎrAZPÄ£¦nÑôGÎp Œ‚%&+Î"~û¤°&“ÃXÛ'µ}Ò¤3îºç¢rME2ª\ǺU® ¥ÂIOWaèñ-5yÛé3Þ˜¬ %aÿθ噋0ß±0Ëz}YòÙÉ’þ·Þ8óÕ£´hÖVG¯:uû—>5Y0ÉTíp¯^)¤‚HN Vé#gøW#çr]ÖÕ†½ú F¡ày7¨L©©C×»ì¶_"j(*—b’+ÙjT‚¶æ¢ƒ²E[çiÖÕ98§Jʰ•Ûíš#…Ý\yËŒËâã¦ßØü•ßÎVGY¼„q~Æ5y·ëgÎ.qŸé­× ëTwjq‹Eµ÷®ÇÍþÙ5ÃÜã8ß‹Fï(v¦“HI“*Ô¶Žwmg¢+édúû•Ôc|ë£Ö‡¿v§p˜j°GÃ=–åÃð£bødIßúaî—–Úy«3ŸxöB¹‹‡Ð¯=à¼ÛÒÑ¡¹—F ê°M§ÞúÂômvðƒoßÁ{˜\W‡ï}Ü-³®Q ñ©f/{0åÃíÛ³‹k™‰¾j%g†3AÃÓ–‰VÊz—$ñ Ë’VhpŸ8“ø)eä¥ ݬ ÜfP1‚ÐØqqyûþüzÌ|mŠX­+ê¬ýï¸ögg˜Ùh‡(™ú¸wàU%\ÐuõƒàÈ%çªf&'l·}`ŽËœ)¥ :pG­Ð Ç%hI@ueaFµIso¹jû¢ú‘qéÏi~U/Ÿ}WõÑŒÒÿz»¤³!ll|]ÅX*ÁÝ6_t¦¦ü+Ë»é¢Jð1ä7™t[RD6ÞLåሿT«iARc¹@YÙnþîú2”åeù§ˆ=<8ä;\h¨Ô:r¤©JÞ¥„Í ­ŠùJ,%YHeY¶©²Â%mKg˜fRý`‹½Ç†' ~<j…Ýú„X[4×ì¿sÞ ˆ¶z?ø [õrÝE3=BÏÀ/)z=¹ò@&’Æ8@°¦ƒö:qàýêšíë{<‘7ùqa µñº»ÎyâH2¡Ö6ªCý¡t{´3ñÐÓ׉®\S‡owʹǦŠòćÿUuÀ–'Üý›+…žcáImh|„R„0d2®¿*.ÍÄ‘/DR†ÜS4ZÌ÷Ê\£qˆc@lä¡÷UZ݆tûíäއœš~å*Úºâš+xÄ~ªwvòq‰>!hàØô‚I²¹Ø¡mÀ³íC†ŒmJÞ9}KUb±éWŽÑ,¾ÀA|·ÄtOõ´É€A1xä$VTõOÃi¬' ªƒ;ú¡Wn4½äýU™ ¦òBa­‘7G½©L åb<“°d”šJ³]¦$²Ñ¬Q´KƒÞD»~m ˆÔ8äëàgðˆdv’$Øb¼bbt5õ±ÿO o³Us"xÖq9BÓHZòT4[ØUæ7%6òJëôèQÏ»IgÍ”‡²ô£rw&òö'oqµFàTôí3~_ÓKÖ‹áŸ*¶L3z<2¦U´0šÑ2Êÿ|H¤_› Ë)yh¸Èâ¥å§ÄN&uVv$¦i6rjÿH4;›ý¾žq«½ÿÑ»Ç0ýíe¿[sÃ1_n¢ƒLþJµðxñÕú¢Ëè2’D@òEˆ½L¤üzÃï ¨ m‰;±5(áø–þ$R Ý´\ Ľ¥z5" ¯–n(¯)Â7j1ÀI*mOÎ~øXTÔÖ©{ž7}Î9Â3ÌxË;[¿Èõ‘R®ÔH 0Xˆ“¸‹§b–SNEb § Æ‹Kw>z…€PC»Zѽ‚ýƒ"®‘ðE:ïP¡Lª–h:f$I*v3¥CÃOWl£‰ ˆîÞp…•I¨#ÜÙzðßWˆ\‘° øãÝwßïîFª…æä£Ž{÷º;ºBΘ£']<Øèó&š÷>w]©d…ârµƒER®K¾.ç€>¿üà^Õ—ÊVY²cM¶œ(ppj‚›4¹2è„Uwÿ€Ú»«ê¾þxáþ_Üæ¶,ì¶ |ÝËÇBã™7¼òÛ'y([|{ÏžØ8_»Lê¨ï_Ö ³Å¥¿q›p­´B(˜°@gIµ{ÖVŸÊL‘ibOc=l!žnJ@É®Jç éžpŸŽ >Fb¥e½Ñ¶N¹—‚ÑCÓªÙE3cø³®ç'Õ*ÚvÕoJE° ɸ±Ë:ˆò“ ówFÞ+Ÿé·<µe¥þLtM=.¦NtÀs^¬¯‹ù=àõÔ’_.a´`8\óÏý¹Þ‰Çbn¹æóšÑ|½mÕ½Z„×é‘ßÍ¢ÿÛŸ™Ù´²=A&e2ýæQ´M^çïÒ®²‡Ñ5à/äÑ®ÿËØÊïÕ²˜Ý˜ü€HÒBÄ2rZP‘UH”+WªkËn0Ö…IšéS‡îtAטž‹fJLÚ|ŽtîzET«†,Úa¥ _h¡'2Nÿ+¸¢e =¢.‡CÑfÝñùu"$•–èµjqF//‹³Þ¦ÀÍ n‹h%Ü̳ý™®vî mœH³†Úî+{<ÿÆÃr;%Þb]4N™ÚøÅ«•?Ü5`÷Òò#·ò¢x«ùZ(jŠõ‚AÕÔþß9ñž—¯ µáabÅeŽlM¢Wÿy˜~ùüíBPbò› á)³ÍmÛ¶ÁaL ð4Õ”I7^}óQ¢ òhHB9šZ=m`"!óòVq‘ÅU1Øòq Ï.Þ@ž Ô!{üôö§npYÑ$¤6 ·ÎúwøÚ÷Çø7ö~¥3¸º¿Q¡½XðÕ=å+~ù1¯Aïv½Þk„CÕº2ƒ ã+öÈ(N·KË uà÷'ßõ ¾Âùf¬áiQ¡– )2ôÀlÆÂ(ážX¤bî@ˆ7^qk&o“VÐGÞ« êìMwµ3ZáqSÏ“¥žÄ×à1Õ"G,σí¿Õ†QD´|Þ¯rÖ²4@Ž)#ìµTÌ@UÖz¿PQþZ3€Ã¤Ã!SÞLá;ª$ »¿Ø2ºüÅòw´õÿhüY,"1‘`?bŠ+=‡5`.€Áÿ@!5,x+ìDâ›.ÔzwÁ[b¼õÊÂ%›—í†1⡦ ¿èâ e ùt¥ÿ’ÿ7Ä-*ëÑüËk…´V¨“eO6ãŽBÚÁ•|e°MVgqÙÁ)–HŠØ®EoÈt§zûR)IÌNmôÕÖ™ M1p-_Ý‘ÁdÑŠ¡œÀ™š<¾ˆU¯ ¯°¹\+‡L“|ø¤‡ùþ|‚]¼\é"‚žÍ"â}=’è5«Rm…ñʶ›$õm6ª€¤JÍ~}¦ôëo8þ@nPϱ—Þ|Jà"á€Uóˆw$ê/•!F/@ˆ…ãsk »dy*åb8œ¹x£H2U$%ÖZåi¼V·X)®Ô+d:Ü¢ßá²æ„{î/ŽÃèF£aÆ 2ƒ`°·8wBcª–ý^ðÄšJ@ªè5 áD0ƒ!œæHnn—øC¡xbg&òX®fþ3ƒ¢Ó@IDAT‡žrzbDüEéÄ 2uòzŸ}íAý¼:bƒ{â¢CÇ⩉õfðÂ}Ê>úÝø¹/ÍÁ3OôP¹lEC¾X†Aš ýò¸í|D(à£Yóšaûs©¬|‚²ö2åïŒý8}qEªe$aPô3r ØúèèììÀ›9­ý"õïQjÊêzx*ó0˜gñ4Qÿy˜² OJƒ•h:Qîs¼ANìTj[}Å<Á‹-ÔeË“L ¥¸­ÃoÒ–…&êË«àÁ×*Hø(ÉÿM@óÓõÉTš}™d{(›Z’v!aÊ,ôïz%ÙúÙÏîÔj‡G}:ðA¢',™ADÅ–t™ˆ‡‘Y¡úï)ZÀЕˆ°åI‚ä±K,èq å(™¼ÕB?[4WÇ8 >Òº•›(uÒ" Xé©4šˆµV&-! ªvMÂÿÆÕͳ¦ Z°¡ˆqiX×z!2ÁýÈÉ€¿C9˜@‚£e†K“µ;Ô¤o«Z-$:­“‘•×Í9Aé„%­]* ‘>^¨ã9¦jý"ÇÙãS‹/3˜‘‘‚¾úòp{òãbŒH÷ŠÆþVëzƒ]¾8!“"ºU©árßò§µ!•Ú(õ8Ÿ¥dÑS,Ç‹ ž ˜Äðª“ìâUÃ3¨…#²fª&Tè }ºxQ¥†K YÍ vµHIˆýÀd—z!´HÓ€Óò`a©cAüDz*X$Á/UÚtâɧ…ÌkÇÃÐfäp ˆ“„&=¨¼t÷ÃÄ]6ÄHOTÁk`Ó ÿò¾±Ÿ‰+¼Oná•€ûz1|f•ügÔý…<¶Qô¿Œ­H~”î‡F"Õdê®å¥–Ðü(õ%˜WªØlÅåÁ‰NЏ¿hüHp6!<¤Ýô›¾Z£n ­~h7œ•ê`e+34 ã•£ûG‡H!ÁÊ®y-»LÌ<ª`‚…YÚ gËeD$n´ZÅ,Ú5ðhõPIëGCu§jzB^ð ïd(ª!µøÂ‚ ˜ña7ÿb@ÔÆ^ ƒl@Ùêb’a¡h…qö°l—–içðfŠŠ¼PÇp’w`¼Uv¢Îœ]È¥—伋Xö½ÿÑ[”Ѳƒ$¬sꎓl² Ha5­©¢é$ü¿Õªñ“ xHÇ‹g ¡,YÑ—±k›bs;®ü©ºÂ)æ¡®ùÛ'~L žÖÚšM,r•ÑÌ=N0î©5«>L£“ Ø©µ4i†;<ýréDrf¤¸”¸¢‚WëÕª*ØÔ²}^ŸÏ\F…Øœ`D°ÿ‡¸Ï´G5¢Y7ÑÒ(VS=Ë¡žæ¸ K^ê’UñÕ’µMÙ¡ÄÈTôi°Šõ`¬[\Äx.ÜÀÀŽK×ßùñ³¿¹Wn¶Ôvñüë7s ˜uüظ+œ`bêº&6ÕŽíÖn¬›_ê y3]‰šóñPîÓ;ÿt'‹¬¤[ƒ®ôî¡Óý¶:õ¾ù— üCŸ€~Kš¢Šz ‰óP-áV«Qõ™kÕT^L.`ž µãHÒ©Oá#6XÄm¶dو׫QÌUâLæÐóyýŠjßœ{ÿsç62²Ò-¿nŽòº3Є1Há §åñR…´¿Ðòÿ Z¬+Dh4Xí¨×ë»ùÊåˆÌ1IÄXã…ï:+Y¶½›Ý?Rø%P!…ûùîù"¦IáÉTH(áK©TŠF%™X8B†äCÕE2“/ÿ>44”J1ã%8œdäv‡P­ViÍí‡R‚Öºç©6Rßmʬ[Ç¥üç(Á-$då=nSî-£Ç/¼Ì…I €yŠ·À•cˆ§ Ú{ë#f¿x³œAÊñ#5›>A¬•›³úæCúðÞñA/’õpx<~ÚbD5L Ø`¿ðŸºt-¿ƒ¿.Ð×=«E˜‡”Ze7«˜,%“|*ÑW1D­Ð–OÄ›Ïðú·…¡%¥á½%L£UzÔàFkºó(`BoÈD«;ǽoØŒ[+²ÙÇ.R¾Òq‚ÈÆù»ç#UêG[žð {yõhÊ8°¡;¡D‡<mÄ7{šM\ˆ=F£i‘:ÍÒÍîàvÏR#Ã^)˜DAèþDË­€U­Íp¥”ÇŸÏ*—‚Á¨Œ õÔ/«®K3hO4èP§Û°úèš³§rµ ÁdÒKTL ê‡ÕVº°<ÄîGA ¶Aù0Z~ÞK&DoÌý¶ì¦ÅÒúrŽôÂMø<û-Bj(,2hb<—›£Õ&ó$”X±Á0W‘/¬ïCü³¿%š´H%¿?de*Md³oyUËaÆHOœŒáoðS¿zT½ªŽÜíöés’G¯WVj0~n”J*$Œ´¼Áy楪‘Hh%L5µû&G=òËe‡€V:wÛöˆGŸ¿Yäð„lŽòƒ¥ã‚Ú}ÓãùŸ«d8í8x‰æÌ«@l±`œõ=F:ò©T³ó ~ÜBÜ¢k p°Di¬¡FX.²›™LPòe†¹Š¶Ï-#¾Ô}ùË_~ôÑGßùÎw&NœèÂ<}]ÐúÜvþÖIÔ½DG#}q†î?PŒ\¸pá¸qã@;_A_*¸•Tè¡r>ŸO&“ #—8ɽüé¶ úÒâæª“Ê.$»øM\¥&híÂ3¸ä^¥þ„HZvóŸC…?™Ð2ä¹½Œ¿X0…¯ ¯ÌÐ Ä=?<¤g]»ƒòSä<*¡SñÉVˆ•`ýsg¢.ÒDâþ¤éKú䥢i ¡œi¶ü²þú… €Aa%˜Î´@Òdè_Ê„.‚LrÉ%Ü=À`<ÖßÝ« ¨l‰AøKý¼¸U1âáVÜû¸4ÜŒm—·Ûá뺾4NÎàÇÛ"*²´5ühü„âqÀ¶†êšÐîúÉ:Ì+kz.Á.Vk’E<WhfÛ öDô,Â-x­fŪ&ÍöFáÀg)Ôc]NŒ}XŒÕ56HD.Ç ÇbùÁD&dÉÙ±QQ"•µ*Š&HFãŸ_8×k +ÃjäçÝJ%޹¦ˆX(ÖláaÜÀtLõBµŽ„Xé¦Í¦íÑûÖ"ùz"dðò‚C¹rÀô†È‰$S&4Sý²¹C>°›– öP±ñÅeÛèÛR»lþãûfßé”X&D5aÈYâÌ@ÿJ¥ õáÅÙê…Wî©Ô*áÙe•tw µ¾Â*€r˜/U"’JWúø¨÷Má*$HÄ™ .Jw¤ößrgÓi3[k– õƒ¶RQƒ‹–üêýŸQ‰úÖòÊÌ«ñþµEÅEŸPSRi{Œ|gøÿÕš~–}a)|_ûÕ—R_’§ÀÐéšîT‚lòüîu¶‹»Ãëž¹ÇË¿òÊŸÞÿã«oü¬Z­Á._˜]Éú'o©ïxàƒóî㥰£ò–ñ¶£†èh«pÑïÁçÑÀfO=õÔ9çœóûßÿž—ï±ÇÛo¿ý¨;vìØ›nºé?øßs¹..B^3óœ«È‚vn ÐŽÂw€^úr,D»,é…ï@ Øéêâ§ · "5H¦¥4bY·œ® ñɽTã*5éÅUd¹Å=ÑñržÆùN¡z„Ùl–vøNeÈvÕk·Îèñ ç2žÅ0Ü - ô‡Í ÃÂÎ"—½‘V^y»ý> á|_¹¢Å1·èMLˆBW&rb}1ÉIѧݯÃo¬>ý¯?ˆýWw'z ^±îäÊ0ò Ò‡†WÁ‡«r‹èJœG@b]¶ÝqÍéö4Ëmü7<"}þ³£)èH×Ð6ZZs‹oäRщ‚Â-õÃÍ~V±˜?aÕÓ·›^&$—7‘k4põŠg UÎ+r4th$º$ž}Õ$D+V\‘°Ó¨Èæ86™Iá.«MP )sÉ„?÷„&EŒ"[·}³ÕDŸD:S.“øOoË¡6µô=Ãèë`T¯›¢ZÊ%áZ~À`ó«nó3‡aþ`Q xõ–Ç1ᦀÏ[(ç™Û„¾ÒèÛTh2Éqüi­áI†Œ¤J%"»$ÖlÜä±êî]:ÓOªÜÈE¤`Ç)ðdl x’„ƒx‚ˆÒjË®+ Ò‰.¶jµ%xè(ýåFŽÞÃŽªdU£'“-ɦƤ²EöM‘» "Ù#K@_ƒ´ˆÿW¿Ó-F#h FÍ2|ÁtwŠ>ïyþ A»{²O½ó…«¡q PF—*ôæãí ¡¡_Þ­Š…V¯Ñhûµ'<,Àù¼&|çªö ¨õæd×=Yïi¶¢–iÕU)¬¢µjýÑçnJêê'ß:oú‹?q’{xâªZ®‹3­È²»G~ò¦—œè¾Îa|š*[űW¼ðøó‹ ?W]uÕú믞ӦMÛ{ï½o¸á†ã?þüóÏßrË-9éjŸ.j~~C㬋|îEPm}9Ä‚v®z ĺè .Z´ìw5W¦ºtÊ%꥜_i²¹žÌ…µÕšÆA_ÎS[¨Ã%FäVpõxװ̺£HÌwhà „e2ü˜ËœïëëëèèÎ]`v‰=~±àqX+|AŸˆ¿ZVzÌ´w~÷~:Ò–iK°ý é1Ñ!kÑ]ßhU'Ð0‚2Çúû‹ÖµG"!£5þ鹺+E†[r%>‚â‹..·dЫHjMª0’^‘?©5ršåÄŠ…»ä^êè{W¸$Ø Õ¹a¹ J«ø#03®¾{²Xò¹ þ¡3UÄk."+BRhb…9([@å|½Ð43Ø”Ä ÛrlC¶éŠ2|© Ã9ê¯Ãç媌ˆŸ¤þB¼hÇ+KÆ>„#žá»·Üˆ{8^ïxIåm#øâ‚ÝÆÒÁ­ò×GYñ±3­HÆíZ©Īìa‹-"ˆÁåb¹núÒYÝf<ì7’5ØïûÈg^™ ¡„",f2a”U¼¿*2„(y›Ùˆ[)'CipJž~U»Ø'.ÜŒ”Ï@8#¾øu!¾&˽ ¿Ý°üäK&s¦ÆŠ»÷vÌ~ùn[Ó,álq(Á“ݨÙ93J`)ÿ±§$SXtЀTîevÚ‚‘b§L@U–'È£A¡Æc®ªâ‰Ývد^mn–?pìøqûn<Å®„£alêí¡ >_àø®ZVþ¸×óÁ‹o= ml>>`÷ã|-s/G óàM&“”pa¥ïÑ×o‡èþì'±³Ÿyw‘žyX¢Jw¸s Å Vë‰üå'K º5϶ª†–V ªÚÏ–q÷™ÿWÁó…Ó/´úðÃQ|´§Ÿ~ú7Þ}Óéô 'œ°Å[~.z1j€íûô‚ n#¼Ü˜yeoóMœÚ:—Ÿ)èËuÿ¤0Ieþt×§] †rWæ•JØÌ¤GŠ,þJŽ\œOw% Âdò'¦Pc·›…V—CËï¢U-¯C^+ƒ9œÇ5OWT¥ê@<ÂÔÐjâBņ!Õ,zÙ㩨‰mkŠQ†—Ó4rù%Í€•–cè×£pŸ©Ðÿï|ðnWWO[¤£œuŽ;ò¤Þü¸§}ÌÆ¡G|ó¦Xwo¹Õ?söu’F0Öce—H˜,ß¹W|í‚ÚiÜ”0QM•nöˆêIèô˜L("*Áæ  •Ê)&Àújëˆ3<Ë.V«‚è6AA@ŽA€…ÚžÍËp­·Ku¿ÞpE” ÛjÎ~h–¬Oª&íyþý\-k °J«¹nwá ÏŸ)-„íœ5™š§uÏϯ)6cïûÎ=êÚ;^˜®¹ËÅÒ‘GµÇWc»Ô—23ÃìÁcI{† Éß ºu “~Þ‰«wmÿí=z‚«{«f2ݵïÖÇÇÇG²Ew?x;Cez‘<žU·€Ÿ[@)Î3.÷*лæškòk¯]+ìúWÕªªõ×wÿ—¾éÍhQ|^,ÀW^yåÂ… 1wÓÚ3fL›6mÍš5¨ÝØãI“&5 }÷¼ó΃žŸ~ú ¸ ’æÍ›wíµ×:tš»téRWWW]] =öM˜8qâwß}Ù@ûÞ½{©œŸ@ß{ï½—g}ûí·C‡ÅÀŽÆ£”°ø=`ÀnüòË/?þø=zà,ñ™»xz Íÿ¥vþ_¡£©d\´ ‹Ûëa-ñÙ„ìüžUÕ?Š"A@ ’;9è‰gÔ”•îiÌ\MäVýº$ãõòXIFLªýŸ·aøW¦ù?GÎÏ4üâIz£~Aß/kÔÈG—e>“y#&»Bá¶fH@;?áߪë­R@®ÇäªTõ›•vâçøYE†KàÆ…%–×ËfìNYâízÞU¨>Æ21`>ºf’Ãê ½¡<…ä&²á-ºAâeµådôK‰¡¾Ÿÿ¾üÃ×(ˆºŠÂ i‘t8Î!‡hizITTLÄ„ÛÊk«<àçCïV"ÉKÅeMµÑ<ÔŸ‹ÈÿhÉzI­ÁüâK‹,:«$»$gH4•ƹH]ÖÏÿòI&§Ú°ØÌ©G«DNRæx³Õ˜f{§ìÁ€–^§F\6bøÙãfô|Ê,ì{ÔC÷XzK÷;ƒÙÛ.kÀ˜°h“J¯‡µ•$« W6Šá`nD›Ôšoj×$«§fMª!Çz²2—xÉò‘Ì4Ö6Ôâ^‘oÈRΜå†.Szž>yøñÏæòî¾jÚ˜+ÆßrÅ0¡0©\6C6Îb»Æ»ËOÐ7xÑc¶ö²&É;Å˨jGm…„תœ6º´¸÷\‚*§è¤šÚÊ„ÕZ¬ÞK³`3=GºHX,جè<‚%£`2‚XÙã„_1o4iºu¬@Æšó”võ$¹…qbšXÉäo`ÃB4`¹½l a¿A~.*+ŒÞ̘²0Á4*‘)Å$¶l™¥ßØÙ?‚qíwîÜ©Ûf:ê¨ýû‘åä …+W®ä¬âœ–«ÿüëOÑoÅ ­Cæûï¿Ï½Z0µ]»v¢k§ÓÇwÜu×]ÇO!ŸÝ»wïß¿?'ð 'œ|rÎA=—]vÙ Aƒ8çh±7ß|³ö‹|ð“þ@å®b‚æ:•€è,os;_yõs¢K :uÒ+„ ."sðùŸt¿¶´N¯ä?)ÿ?ý£`†s$hHØ>… ©é†’Á íÓdzí½A¿öêù‡×© U}BSlÐmøã\l–m¾ðÉ ¿ÿæ#–Ë ó§?Vž†˜¯Ëû:š¦Ê ðµèý¨nšÆ„BʺhŠÌ4ÕŠ¡ÕQ€ëuÒ{\ÍX×€£šÌÕ¿£§PF5MKÔ2ù£§x ]Om©PcªzœÛS4+êæO”¢L2€Á"—  [7…ruÉt, )Ú#¹¦* _©FÓ·œ1QT"jˆ]±\ª>Ý„&–þ+zDÙCŸŽåê~Ö€îúDk¡|¢8Æs7\7 ¯c. ÇÑ;íýÚÒÖxîú‹o”vð‡¢)­ÏDÙQHë·¿R‚¹‘ 8‹Õ%nìy“(¾´ú9iÌ¥h2çñ\º>wÍ9wˆÒ¹/7꤉h~¹/r#<ër_çDí;,å3™SzmG*µ¦Ð^ì ¢ òD.R9´Õçn:ö!¹"ër7Ÿý°œ¶•ú!›«E…Õb Ÿaß›ñ“Ü8ßzÔMùÓæÂ¥'ÞFUÙ0ª:OgF`]ÈÅÄ>¡¿2‡²õ\Ʀý57ÅóD®*Ý &ÓÍøšå¢‘HМ #X2 Xª É\äY˜6Âu z4âÒK4á@îæ£fÞz¤¡ÙaûÀ¼õãN}ò–vã=EŒR«Ìñh.¡E¦I´ÛúÜ¡Ú\US®™žÊ5 øÖså)X ^„5¨†ºÿöf®3pŸs®ýU|Ä€akÕ5ZžÌ éÖÔÔ\z饀 ê Š)':®¼ýöÛçž{.jbË*×ÿ…C_²m¹‘5ZÚÆu€MwãÆ¶oß¾o¾ùïk @*ðÌ:4fgJ‰ô§?ý ý’(ɯX¡Ÿþy:èûï¿hA\žÂWîB—;w._1kS!Ú6Î\gÁø¬³ÎÚ²eË£>ŠbÍO«V­â:òƒ€ºÜ²æÝÒQ-Äÿò2¨îÒ×Ñõ%ç_øŸ<'e CÑÔÑ{~öêü*ôˆÄ]­´þ™Ó®Ã-Yp‘Ëä–¬š†%Ec©`“%É=L"v“©´ƒ=î%“°¶À†ÀÆm,/jk‰¦d@Éš™mÈ9Wkwºqž9I2©Ô‹¨®E(ˆ¤G­µk>™ûÓ"±²™ ö Ý‚¤(×¼žuzñ>e¯UC27¸ì‰Ýçvþ¨R“š ¿é$ )›Éˆ×ûÅ—U(2&½xá’ë–”“¾àoBÒÖ(k•1jH’È õÃŽKj<¦\¬:Åšð!d&·›=©`Òâ°¦Š]øÈ[És¬(xdÏŠãqÊöõ’½GkÈËdB9"Ÿ¯Dä„Ù’•‹Íò îÁÁSÙÓ±Ds‰±"L~›ƒÕ¹ÃfÒÒ4<½³EõæÉ #z¤Y“$®áYAì¨Ñœ3³þͳ­Zn à¡ãˆ‰JRÐøÈ:!f?±œÄƒ)!ÑÄ Š»¥¯8)+º+ŠNŽØ\®Ù á`(fó1ÿ««txý»¼¯÷{þüÊËl,I†ü(“ÖšC¼ÔÐÆAÐgÚL6g3Í&ôG‰IJB‚`IF‚ p¤ÑbÎGe=Ñý¥ÛS~£%&²…ô 208)'“lÜ{G÷Q 5ÉÖ–ãO¬î»°âÕ°ï`£Ú?ÿÃYÊmŒeMi»ÕÀñ7  B*YªšÌWtžûò÷bVº¿Ñéó»ÿ\¨Ëe»F‡í€è3«¤+¡+'KRq»ª߬T™ªö¼¦Šo”åptxö質}Æf¶HΦ“…dG E]nk<Õl¶æÂ*È»cW>S.+ÆàNSZ©c’uÙ£låãO\bneÅNܪ¾ýâ“?8œÜõü´à‹ÉfG ËÝ[KÀJa¨ÔKä/>MÆXë¯$—*¬†‡1LȆ½™/©¶áýÊ]Ì¢»j °@©Â@ ±ZÒ s:k Ée³¸$D(SÆÆÇá´rç”-`xöø5Ÿ.wòªxT½¶Ô‰Rš® û Ü14yœŸ 1‹ ³ÆŸN…²–¢ÚDS »eŒ™VõO~1Ìn*aùÖNëŸ}ÿF™É&u÷åŸH6.¬Yšç—WK6– ð}ÊYÐP.’9aâ=žŒy—;^ Ræ9 >'BYÙà¿Î¡;顪ècàHK½ºÉ ÀyH?oùéß<·apä£"Ë3àñ /ôïß¿¤¤ð@_ ˆbzΊ‹/¾˜ÂÈÝ_éWÄš§×Fýx`mÚ´ =•õWìÆ<‹UaÝá‹Ïòòr&V¸ªªŠ»Ú¶m -FcŒÉ˜£)Í´‚[ ˜“ÆÆÆü‘iú1mÔ½©Ï8ã З[»w‰YÒÑ—Îa0ðÁ†$dðU§ÛÿîA1=hJ#3Èÿnáÿ‹BÉsZNä Bç¹òåÓÂF‰zÄ…Ç‚§‡‹RaŽL *b²¸Ë‰¯­Ù_\^BY«E¶Í6ái¢?‚ÏŸ+‘š07¢6'!¼F°ÈïCUâb*ŒYþ´ò æ^ä–ŒÌÂà5јxzzœÂ>äZZ]qñ/½´Þ‚+G§Ð°ÍmÆñYÁd4ÆCa»Ó-&2€æ†Akƒ„vØå.'R1žA¶çHÏE<¨DËjN2pÍ CŒðÉ?7[[—D¬‡m&‘&,ýY¼’ä¡)”vzÍ‘,Iþ“ùdFІ-.$ ÉVˆ5‹# øàÑlqÀ“²¦Ì–y6|‘@±xÍ#(ËîF8§ñ‚….Mo†ªš<ùy2X8ÿÐ ‘ <Ëa„™æÒþ¥{õñýKgÿe¼éU`WF)š-%9U…"NšÉDbnZÒØ?v$Mdⱌݫòù—ëQââds5¼ àVÉ9é ÍYä• á1×~šÌ ÏDý^MA—J«õȉ¸Ú£a¯´¹Q*qùøTQâgçTÏËÄ%¹.K‘±DЀ%"¸Kh(aAñ´)§ÁËælðÄ,¾%›hÌØÜáp(ßã²{5ùÃ^"‚cmA'­qÚœps¼,f[ O{¿2V[­¦2«3“ I 4¢gÕ»æ7ùlÊ>õkÂÄ}·lÄ'åï{7†µòT(-é,sym Ù&‡‘ä¶â§kB@ ¼Ùb°›CÙx"m'‚ †g#Ý„=šŠ¨ Ã5'²øW:eÊd]¤bDyÛ†÷ž<÷ÃÙ§¹6ò¸ˆß±¥²)g¹/‘$/¾dâBÉ€4ú¹ @Û`#+d„¿!k… nÿ;Ò¢þßú&wZe§£³­î:åáLy¦6{`ÓËðâ60ïâq“ƒW7w(íâµò:DB"’Ú‹ ¨-Ç•4dî ˆ þã¹ë¼Å0RY±ÒXœ¦вBùM!”³•1á<¿½L$€¦ˆ#Ï"’)cpd‚‡ÏeÁ"b,3~YóîëûI§%Õí—ÌZ÷Éqºf&0( ˆ‹üç}"ï9_qf°[…üÊ$å‰ËëÁŽÁ\ÇÔi¤ ´þJ(ÿûPAA7]+Ó¡^ \€‚ºÎù+=VÑ€ypÅ3x$S]]Zùúë¯ã3Œ˜×µkW<›Ð ™ Œ„ šÛì¿O½¢’b=>|8Öã!C†~:”Bئc?S’þBÙ… P²BƒŽšô#_A_îÕÑ”Ÿ8ç@¥ºð¡«³Ô@§SL‡sÊðžë<.¢r*Ñ%ŠýíAI @½JmÔüÿ‰Ù[ɯxEÞþ¿>˜ð{ý¯®¢2ýâp:œ”'2“Ì”ŽäÌN²¾«þF®Û²ØZdI³éœÅÅÉÖÔÏŸX½y¯äL¯Xâ^ÐJÑgHGë´ÃŠV IF£0oní±‘‚Y#…Né¥ °H”¶Ù=äq—:1…aE#acr«!L}(ŽèGHϦ’´f ç6¿5gLÖ‡í¾"Q¢à)P•V>¦WîMÉk˘%7#h” Ôì'U.:©…¬·¢XDÕ~a*p´u*Ÿ‡à‰  „»7ad>£ßL«ÁK¤üVw,”pxØ•-lsDªÓeES0Í%bÄIFDÌĨ”x¿DÒFg¡Ä±ÓkCÊf¤QäÁ‘…»Ð‚˜ŠíˆâlšÊM4\â\ÉØ1çå4á@ëeИ†‰ä"j<¥Ä°/ÝŽFûóp&ÈŒd4‰S“Ñäj bP6Liì²ùœÜLe­XºÃ*ݨÌÅ! &VL”~cÞ*`š×¢V2^-Ÿ¡UyÚxb™€#çËcàžè…hÂèÝ¤Ç M~¶gYrN: VWŽÜϨ¤„­–Taª_-—1ÄSÈ>ú⻾ö¸+笭¯³ùÜ—­¹9è÷»ívW¨©Þ™'q'u‡kŠ +EÓEŒó±ëƒyéÊ9‚u÷Ï­úd¡6Y©W–X)×](ªI‰¨2ƒ»ìla4ç&t¼Pœ¶Ù )â5EOºn]º6[f9Á›ñª@¢,ÿŒxpß=¯{èœ ÷¾Õä¾íÚ%T˜`K£zóóÍÒi5ï¢WÆoëI·ß}ã¬M.•éÃrÑ5÷'À3pW@ˆ‘AW.Ä .†¾øÍ>¶Î÷ùØ›W"ÇF„Ðb?¡0m¶©ß7¾­¼7ÛPaYmÉ;wêüÑÇãáDñ|„ÄT™!ëŠmÝëK$)¦&wYüø»“Älޏ…-šàìº:G‰O°ÆX£ÇáB­Åö#œÍëŒGx;m×^ÞóëÐæûö¸#^íj;»Ø}TΔqÚÚø;ŸÞº#jL1CÆŒ¹àåÇìiŒ™t1W<¹fôHÄM¢˜M>ìÚÁ¬Û+Sóõ?-’V7W)oQ,õ#$Teg,ãu¾¸k‹¼„ÚiäÊ«€SŒ Ú¬¼ôÜo~¸Æê4Óª9‚‘°×%LéW9tÄÕÑî­sl]ÕüóŸÿ 0.(l\¡ÌÎÿÿ)zúE.æÌP+y06X€mxðàÁ$â Úçé§ŸnÓ¦ÍæÍ›O;í4@¿bâ|(´@ ‹©à™^É¿ÿ‰Êha=¾ð ñ„Z½z5þÌ@)£rD}>¬«ã8Hs Ta §! ›3­à(--å:j+¸—FÍà"è®g³‚x0•Ÿ¨Ÿžå^ àn†lÁ9ÀŒ`Aô;' .È:îrã?j)CÅO<»74CŸóÊÿw_‡)k–KùXÒþ´‡Â—[þ@»#‚‡RP/ ßA~‡p{än}á)‰æLž±RX ù"I†N‰²à%ÌPP@0€?*äÍ'O2ãF³ù‰ ³5Ž9ˆ6,¥@`ùãœCƒêÉ&›ìNö*J›«BÙ‹¾¦›n¹üÖáWÝ?¼÷c®ÜQ7^´ô¦.óºwfÀê•ò!È;lö¨xÉñ W.êUIW¦Úaµu;ùFy™IÏ[+ÁŽòÇÃE*QfÌú^º0a’fò2 †ä¦$éżíSQÿ•§õcGVźãAV€”7~ š=ýL ÊUußI€':|œ'pÁ³ï럾S&–7LyIemÖx¡¦ðÙ2ttŸŽÁòØÊ€0 ´%ju¸±˜$år{}&“xÙ2¡ÅÙK± r*_ÅËáë£û=‚ùQšOñ Æ®Bþœ¶Á .j/}„¥fŸ8öbÈdkõp®¡»rU¢ã¦ (ޏjŠJ—Xr…*d2X=ñDØ”óNØÔšß¯`/û(ÈŸSN–RjZË~JqUìñ›ÅHËÛ-laÌã®4¨†h´ª¨¸T5gP”û_:PÅØ`O›¤ô®Øž5½ ²1Õ˜TAó±ê[mçŸÕ N“Y —Î˨M¯>Áír#ƒÃSP+ÊP­ÌXåÍÕç½5`ܶÓ¦xÚº{]çºp“Lòü62Û3Êo*B?£6°Sv‰Äâ“RdÞ(³K# É’µXÄDŒ·ˆ‰!ÕÑÐFƇÖ©)f#‚ îFÖÚeUßKªè°EJh¤#cÆHH΀Š<”9gv×WeY’5%b Öáøt5:"{"ÐŒGH¥p-¨É%‚f„±±ÒbTõ…´S[¤€aö¬äù™4'ߟ—˸¢Iö¦pq/[ÜéC;Ž©övM¸É©M;–½ºûÑ%;¯ç¥1ZlK?ðôúÕ2:üá(fümRE†”‹Uyar5ðŒ©¼ÃéÆ 7ñD"T>7–¡Ä¾àO­dÒѦÝEN mÊbÆÀœžVþtZàÉb¢Q7vzœïôûÎ}Æ)ÝuÆ .wëq_~†=µDQÎ%‰ª×éNDun!íû786@ ÀÛù¸ÖèHú²p "èèûo>ë—·›Á°GÇ!] À fÌ xCa9ô_IM0î’%KX”¥" p€ÁÜõËzÿåsîÛµkWÜ•Ÿ}öY\Ó„ÿòI/ð ”®¬ã‚¾t á¿€4?A Ï; +b…˜Vðɰ¥\á\ï_––õŸø¤N~ÅÍ›jùzì±Ç‚Ùz82åi,š´Þ-Ö× (ù·CDÑW¤Íú—÷¨øÛjÿå+w†2qý/§"ã9(òWû}ʯ0†jS,3¶à"Ë^*Ã’3ç é¬ÛnµTsòë’\SÉ¢;GíºLêa c£;M,š·¾F² yÓ ÆO8£2Í>È  î½cñ¡?5–˜ÊÓ†ÈènÓÅν¡põeógŸ³á`*Ó±èxCÖ8÷ªÇVœòèu»š£†éÝßId«Ãö†®˜¡ü†TÙžMÏ“vJÄ;{3CŒW5$‹tBc51eĵ“’s¦ïmGsóÅCðlrûæßo)‘2uµåù¡Úœ'¬°PIK{ïxw ™§ìšŸ´ |ÐBV³ Õ‘+¢ëâßk{Ô(Gû=¬’'°9k(y¤3qÀ2`òÉŽ“¨.¿xðë;W‰ëy.CÈÖvKî:U™:Qdþ,ê𧢩˕¨;/°"Û ¥½9ÔpOÏÍU_`½Z±yÒØóçÍß<>Ou9†—²y›*/'&3†õ Þ~™çô‰#›çõÔŃe¯²©¸«(¿X)`êâVlQQ¯Ë_qûQ­ÿpåœÎî¥ß}W¾pÿ[wŠ¥:¦†õš&×Q¾RÞW8Aˆ/ȯtv˜š DÐÁt­… ]”ë` Œ}L×¥Çj³Ä¢:ž°û@/ðÃÅ?üÕPÜ”ø %e”ŸÈÏLÐ-ŠÁ<ð†{-ô¥*|£pTîܹ3èËW]wó Ýll຅yëÖ­}ûöE`aY—ã•W^Aiæ('iAD€7ò ¾Íø”Ñ hæÊ¶mÛºvíJW‚Ž$ßX·nÝÌ™3y õìÚµ ogzM—f’—C`ċݻw÷éÓ28xHÌçß=è P?UQ-#ǧ>®÷–ÿÖ‹ž1ÏaÉ-‡œ‹K޼þò)‘fi‡(PG¾±,§_’B¼{~*))s9ì‘Æøø±¾þêkŸÛWšê<úÊÇCÆæ©ñ…æWUÅ +í6ür5›'b=÷jŇTãWŽx"#uÉPˆ.¨qQ,ðá(¨­rYTpBí,Pâü…öN†b²‘œÙ% *T‘ì~øE¸N¹Û ã0{Òf¼Å4ÿ­dB¶Eg ÷¨üVÃ6C¢ŽE\ä añ¸ÁfTù¦“¸$ù.m:~D†H¨xC:¨íüZ³ªY³k±`Võ>Жž©©i(èÈÞëAœ†7DK+ËПü¨mög1fÙÛ©Ü’ÈInª6N.øçR¦Àqlkåªq’(£&ž·lÍîÉÒ ’ÈPÅ û„Of‘`Äë+°¥AKõÃ;Uï2³ T…\YŸH9P®I;øIK̦Œ›ÖébP ×e¸QŽ¥T½ìÊúõ{Í’B÷21º6”wXØ®Öÿ·2ù[cKríôª¢‡#¬rjk²bÞf«!éÐlm ›”FË`Äb¤œª•çd]È€æróB#Rf'›¾ÛªÃµen‰›°`qE@ºsèäàžTYòhW²tÿŸG^òXãOOmxÄÛÁÏ&B# n½òñåïÜå¡ëP(5›Íž$Õ:É6RÔШšØ÷žG^›ÃÌ`ïZ|{ÅÚ'wPÑËÁÅÅý_–¡yX<Ý í´öæÔÆ×Ò7—Ÿts…ÕøÓw{ÛËü_E¡uõA¯Ê'—s$À,«~xw_‘HLjìUÓŸÿðö ŠàÄÀx ƨ=ïq%üzç<.3¥p“ZºñaéŠ&õh·—žýôiÊë ±X$(RQNÚ,ú6ï‰ø[i»Qy›Ê†tžd6[-ÉŽÃ/]^mø>œ«~óÓµ$nî4±CÁÑÆTÒUwôä 7$=™oãlÛ½D%ʇ (T%Ŭ„Ù™X%óŸ|á¾;.|hÙî{! ­í¡ÄÆ¥ÖQ§dÅb§KC—Ž?q£Ç^˜iRgæõ›~æ wE¦&spÉÚ'v[¢R&?õЛ#UA@zZ†:ôóWúýGöyò§à¡ÒVížZ0˜1E<â>V,xœ1X°ÐáÍz—ùå Ðò Uµúúó0–¨QSæÆBÁÀOÉâææ |ÿËÜúv®cóöµñÇZ?Üåw­òŽŸzöóFÂj3xbÎ)×<4×ÞÿÚg›eü_ ƒuK0VØ5ݨ£/ÚVa´)í£ì¡§éìý?nÿ¯f@Hÿ¡@ÇQ†Ç/*ï?08ŠT”o½õñ?ØTQA,V…Á*"‘tôÕ~E4 þ™*À¨pÀ´º}øØlÄZà*m¬¯¿©ç ³3–¶5?·ãÉœ!dpg{Ÿ1¥Äݶ0Ý>ˆõéü€­Mä`æ«w>|•U6y&B:'VU•üseùqòh%=”⪤UAV±äÒÆRO‰˜EåÇ+AšWÏ^ûÚä¡=®Úç§§Õ-—ß¶i÷r“8Þ;e”ê(í—™‰Òü-ÛPˆÌa÷™Ý†Ê&U;²ÿcÁªðÑžãcͦdÔátVà„¾f@IDAT£‘x*ûÀûà X‹ñ÷NÞøÞ òÔçMÿÞƒò ¸ËÜ–ÞøÔN}©SgÚ / d„ŸÃóaãüô+jSâN¥·A×z9öÁ3Œ·@ªÛƒ>øñÇ“ЏX¨„8Ÿ:uê‡~¨þ×Ð@¥1ÔÐbgY÷³Ï>û±÷‚[È´°÷Ô¶m[¼±è‘)S¦Ó °7Þx=ÅD ÊÒ/lÜ„CV¯^½pC'æ ?ÛÔ€?Wuu5@‹0¡C,Ÿ„Z¡ sFo€ÿå—_&~ Â8†Š1`Ö¬YüÔ»wo¤v`„Zz@¯–ót %1Z-¿Bwµ|ýŸ>Ñž æ¹Ú‹Î +çbëåSgœB/—‰¶¦-¬¸kãb³[ƒ!qµ3.£ÝÇWhBJý‚¾ÔŠWË냠œTÅc‘å^”oí)Ô+OùÁ´ñ—??ïw7([' þɨob{QåXZo¿ïÑÅr?Dþ®õ•@ ~Ø“±ÏhèKåuT=ŽÃ¼tô„ˆèIuÍùý¶¾»AÃaTý.lKºÉWª`âEO[sŽP¶Úîî2÷±7'ÈTgâ׬¦^ò†Ç›'óªL»·Q¼ùžÅÏΡ@¥åY¹ÇY=ª&œ3}î¦òCÎ9žOç¬í“•\üÎÞº¦\ylÙÇýóó‹p·Q£/\óNÿ¹°¸"ÜØ4¿÷k{ªþà,·Î}c\´^ þ†@ÂÙ}ï/5µ7S’—í8ç¢ß)wrlïóßóÊGS¤í5õ¢Ñn_xç5óvìZ%Î ŒÊä¦f·IÙÌøšžãÝõ¹üT…5ÑaÀ…ël£Õ‰¯·¿=_&¹› 1‘¿y[çôÜÙºôø™—¿gs[ïé±1íhòâhoÑש•××AH¿þJi=ðo´@!5Á´õúÀ;˜9 Ö1Xççºâ÷o?ö³Á™ºXõÄf ìaéÃHþ þrÊ)€ßwÜA°¸ž{ö쉷3ˆ‚Uÿ,A:ú‚º²Hå¨×¨›à±~¥E À]i€Æl¨¹ÄGñ,ýv­£)· Ërp;?ÑGœP×9@ÓAƒéȃôC~~ºþúë)Cp;exÙµ°Ð!úHp½elŽTôüO×}c!6¹’y–hT½{^»}×F¦³Åb'ó½J“cј—5ÞˆDÏ&™ì¼c-œ]k¸£?MRæåSHhA ¥Ä£–é6p­¬ôª,ó€æ,áDüò擎o~±‚¾!\D áú›•‰ÄÙE•7D·Ä]¼QÒ šŒ†$‘„Yà²ê7]òæ5EC>/«»Fü´-DÇ‚vûX< Áðàxèø·h«\…¼çˆ$fŒdìMJ0ÿ³ßé"¿AŃ‘w!ô„ɵ‡Ê€R+?“TòC:=ç/NÇ-$×yÊV·§éóW?Ú Ë[ýÚÓSNyòž{-¾äѯž;­ËÚ…ŸLÛk¡tB=ŒƒÕ2̤ô'ô+ÞWx„j …|T\_øÆ“‰@“Í•—® ŠÌCp.>bßa¦T ž]"Ü'­®Šh»67‰ÈФé"%ûž’`#vyܱX$°ó/&lDû'/ŒÖØÊ‚ÕÄ©ñ8–a7)lo:úb>‚¾Ü #•ûÕ¸>ó~øþÐQíÎÔäîºj¦ÑB·å†s'yJÂëÿ´ã‹·©ÜM ·ii ãæf„c@^éêú`Y!ð•Ëšö<¼dYÏSV·*ò”A_É®nnrû}ÉHÔÊ®1 :á¤(ßð(°8  &£ö¤ðþ¢ˆ¸4ç\ÖBQ,Âõ{ÜåÞ-’ŠÍÌ3îÒV/™>$eÆÓŸOY!£në=ãÑ­·aÓP‹¶„'­³`Îeï:ry+zíiÌšéþ.Ù}F;ÝØ¦©>U¿æõמ=|ãÛOˆ¤¯“ŠÕ–{Ijk(Lõ³P‡tr÷uÓRõÎØ’r_›é7®°Ø£µÍU‹?z =GbfUkL$ •厅 îÑ–Í©Š®ÀñۦܙB¡îD“ êÚó‡HÍIsiõ“{½šs%9ó™ÿ˜&‡žŸS*«qÙŠó¼…1Sü«ïY])¢8…Z¼øµ²ã€<Ò¤<e×]<8r´òe‰ûn9yNei 4g}v‡d¬Ç;BÒ4‡²û ä&ä–l$kd«ö¬ …ëÉQ…è`åëa³,‚ìAñ¿E(™µ»÷è žè…rd²ˆÚñæS&¬ë5êžsÖ­Ù¼HÄ©z5äÜ1Oº ¤d^±ª *BZÉ Ã˜š5Jö.å ésÔÑ8®÷ÜŽMÎ ·Í=êÜçÂE{“ŽÚ5; gU—s®èˆ€õûÔ>¾ šu¶l[AD=Ì­TS2ë%,-¢zwêWîî`ɹ:õ5D‘@ ±™ˆïì’Ó ¹ÿ%Ç_=+qØÐÞxÆ„³—4"[?Xzs—Ï~5Cš @ÖH/¥ö2¤láJÙÁýÊÛF±:ú“È[4¤Ï|©v>ôÎMúìxÆS¼wëèî›þ®¯ˆqÔF_Õ©c¼—M:mÛI†Óíʇ:~“È5Ÿî¹ÔsÕ'ŒIg|Ò®ÎHlWŸ>cÁûƒå`n™¬Ø–vXé½î~©;"ð¾ühzCyúŒ{q¨*W»±ÝæŽÝ0TÙ/–—A£ŠÍškU¢Õ´·Ì¿ö¥ú=àöÉÝúµ®8uüY+ZåwÚýù&sàìƒÅKån3¬Í¡Lò­w–Ê+'ÆíeäñÎfƒ#»â½Giø†H3ÕÄË_œ±­Ûà^sW½3AJúÔÌmÓd¬I¬¯6»ñúJàEæÕæßðAwÊ‚3Ø ^:˜éÐqxÖÛ‚Uº²Pé†û1°Í]hÒÿl7€g-Lr®;˜ñt*D£Bêt b¡‹ #ÌHÑpÅšsä€0]ñMÅvªÕIyÖë§h „ë·Q˜:Hë%õë4™¸‘OžÂ§~]§M?ÿ­}Jp¤Á½\Qï„ùòGµ¸°Š7“î$ሇ©Ùá¢Ó¢ñ˜ÃŽÁÆ/[ÒÊq¤'”ªÔÎÓé¬9i4€À’–2Säíâ0IK­jþYÙâ­¾ëë˃ùñ¦Kf<ÿîÃòP–ÉëG¶ÅƒÍ¶RŠÍnÔЀ*$u3hÀª¤0?šj°®Âfgft“êzfß·>ÞDˆÌ(‘Hº½y¤¼µÚ LÒàC\A; G W*‡Óå°{ƒo(þ—vž¸bËSHîâ¢Ú?Wv &rX= ÂÑñ­G<Üh/"’Æ5ÄÅ›˜V9Ì”>L«açÝßÑp&éL¤© ÇÛ;ŽD¢…ÇLØ|µˆùЖReÞãä2l\˲êÎ.+¬lAn¶¶zã>‡íÀó[W,}q‰tETMí¶õ—†é:GЗžª‡¿fLÙÆP:bK»Ü„°$ÕÀón-6¶6ݶ˜÷Ž3–:òü#º,Zúö(QÌjã¶§åöŒšrÚk³Þê>þŠ•Ïìž&d3*¦|ÙZƒ»A½¸{Èq5ñÚ5 ׌C 1.(l1ee»ºÄJK¿¼¸}•>^×3yãç³Å¼ Í2G„K6DöJ1rm¦CsÞÉ­ÉxÆNd°]kBÅ ‡¼K.ÙVâ+Ší<“›‚MGûNÍÙé }±—” ÁÇ%K£Éœv~£üý¥Êç+£¿ò”L'‚µíM'Éhl û}îfö'Èã6(Ár’JÅš,ï‚æIq5ùÒ­‹ž»Iµ’Y7´ûÈ•//¦±oº]}/Ý5à‚§ÜÜO©~‚pšU·¬ˆ†”«yÄØ›·|²AäŒZ5þôUó>Óüðýê–óÇ >Ôº]OÈ4kR_¹mÒ¶«dÞ£jתֶz÷ÞÙur¾¡¢é§Ôq¥gŒ9sm¹½ËâeëGz[«›ºO|nã#Ž"²Ì¨ —Íö9ΘÒ}[‘¯bÆU¯güÍûýXàkõÀM¯™‹Œý/{4Z|Èî4¸ë<¥©ŽÁ½žVîãk¢{ü6ßÁDêÄÒSÞ]ª1©¶¤(qýÂ¥Žîx>ç"å(Ssª9l1bYs"SPäzÜddÄl ¹Ý^KÛ˜*ºÂöa§¿d¯úIÓ+¯Zluj‹ËMÝâÙÜä>Ï–xâÏQ…óþ>Ðeæì­¤þˆšvõÛ+7Ý ó!¬¦^¿nÑË£ä¡,ë0çá6dÌ4ì“’¸9²Ôè‰45Tåù=¸g'‰?gåŬ¬ûd­·2tjþŒÌ+¦+21¡J%þñ^G×~ë‡l>¯c'`,é b‹– .º@,íä"ØÆPa|Ffm=ø_C_êù% êj4Ð =<&Î9€§ƒ¢îÅEî⺥:î‚Ð\á\E½N*¡Â–J(‘´k¹~Â9¸KýÜ¢ßKyŠñP º…¿`ýú/‘ø—¿þï>×窆¾ GÀªëP—žuM¾ ß`ƒËä½óº©,Ôì³þ~ë«/‘²Ñ@@zÄhh 4ç‰g žÉ¥W\¤4g ­6+b/7{¾Ãh°HJ ~a¹Ä@ZZÜ‚´QLà\¢î‘Šq Ù/1Œô.¾¾@ l‹‹¼05P-®F÷ŸñÄö…#»ŽYüî7·³¤+©(>¯1?à³»‚¡CVË\¥¼“üÚ‘´ ª™;,Öp$EŠ«†·t‘«w–[:„{ïî´¦¬CÁŸ>k¶Tmýr™Í@%ɸ²„4HŽ(hÈ’îô$*$w1ä'Ò!­I[r ²WSHM¸ð9\|ù¶l³cPçi$$ˆ›ã.+> ¾RGëÈM楪’‡£™ÛyÁÌ+?’æÃtÐ#ªæÃ’ˆñr›”Ã`[øæ0Œr£=½hÕ±(j’Mî°R1'"†r—j^HÜX¨^x÷ ir³²:Í3I¥ãjÍŽ§nè0ò¸ÒÖu ÉmO¬Ôf’Ù!çÌôÚç?3ö7âÜEKßÂBhVó¶ \*–ZzÜúì[OIº)®Ð0JƒúSãçªd ŒÒãC?ÎX}˜32á`ƒ'ˆ*Ô´ N¹Å¨J+¤Qs¯¹çù¤ÃOl£ò›‡÷¾¿ÀYÑ”ŒŽ¿~ñWOîT9Á,³¤ÝÀ¸\›4©Ñï_¸ó>é.¾5Ìó¶­WËÔåV–0§ô ƒÜ†Ä–ËR{ˆ…Qbk‹ò¸‡I_íUõûý…¶P¦ËæAðɦÊúµª¸Fd>¦ ¹¹‘Dµ‘úSLJŽ@oºãú Ë^ IN¶Í…ѹ’N/’Ÿ›-Pçµ9.%`NAMèa\‚ÁzY!.ÓÄ>&ðžfi]„øm÷ÌíwÎíõr]U3þí‘dʵ²âpä€Å!ñµU Õƒº±˜ÌîtJO]óþƒ¡½k>Oý÷]½iúSÝo¾ô¡uÿq/¢XºV»æž7 ˜sÁg¦ˆmì÷Œºì±EÛïqÖÆÊ¢üJO»ÃÍI’7tÀÅÙ7(ˆ‹\–b/Qd…9-¿'ÉÌ ÜÊ’r—°R­Vnz†ôx†hB?ô÷ÁÁª¶ì^iE$ʪq'­Ç'ÞnÝÜdohŽ–[*çtÞŠœ<ûºå“_¼žl ìQ–.2—šT]æLxz #?ÎtM©ç>X*“,öñ=WX¼ Å’»+¿\͘…ø*õÄó2ÌÊØ¡„ÄtJ è3jý;‹ÊmªêkJä®ßö!þ]ÀŸ:Øè€DuíÚ4ÞÀZÔ\veÐA.WDæè±°úíÿBP›~RIËWý"$¡yõ¬_Ç4 XòÜ_b6%¡S'X¿‹2œPŒ 9á^½u )Êu*á:'ˆ@ 'à:¿RR¯ŸÛ9ôŸx…ù 0뇎ӔçÆßäA‹É¡"K«áÍßm•&¤ÕKÆ/yZœHÑ`%¤lÊ’PÏ%©‘óóò¶œøè¸|êvl^!K¾ž˜™rÒñT’Œ‰.’¹ófSJcòšñDò~êÎ7Äßð•Äþ€cˆR¬)Êhu)5ꦉ‹žCk^ú(^ÝäO(gGrìøëÚR™&»Ib]}žb•ô „À@ÁzRÍkš(ép+$ʸԟS«_qÛ™ ÞØwF—×f¼Þ}Ìžùbúí=µeT4«Š< ¢kðS ÈacLYJ¡XÔË%P›ÑØ3L¤^Z[O\{Œ{ò-+g¿9T0 ¥ÄÍ•®«UÏÞôaüP2ˆÔÇ«m.ï´ó_ÊÚ}c/Ýd8ºyOÝËÏ´™-Ã/šg26%¬5+v>eLxdIŒÄØ8#Šf üš Ó"ïßÖv]]dêÏ ÕßÖw6–J«?öø¶ô¾„râÐP~D#”-DŒj_rô}›-éóñ+ΞÚgÇâß]K™Ñ½ÂÐ'w{*Ï\:£Ç˾bßøËŸ‰eªìþÌ£;§%îsäv˜à±c˜6Èäuëb~ÚðÊã¢ÿ%ÔŒNÝ»eð½ÝÖ®ùf¼H Fõýžïølço«K*f6'lJ1ƒ§É¡¯–¾w-ÝpÏ…/’Hháî~ÒóÜN݈߰—sB‚Ã`‘(²”êÝLïN·— €Mx DQã$A'½¹kÕâf)ב!â=jøÑnv ®Ä›b§·>·a¨18ºò¼a/òFÌèö¼Ç'©Çnê1è¹×ži¶îgbCÊNôr6îÄ™)7“`’YcSCú zû¼H¢‘ Ù9Ø­.¬çޚˈØóqÃ4’6€(ÿª¼¨lrÏùM ‡¾n³ï­·7Ö5ÅÐD¯å®”**?¯óƒ$O l+U% %ÍBo" eš„WNY½iÖ-n[ýñrúÁcuÆšc›x“ÊóѬè4œ%ÄG¼=0\B|]˜”dŠ{‚¼íX®Èwü±æ‡£;1ß?kS_1ÔÃûªÔ^>ñÖT‘`bÃ.BÝÆ*©Í†À=¸ÓØU¯Í¿«ïÔåÛ”1Âó«ÏÂå;îá–,Ð3x6†z„»`–‡µÑ{äOf뤴ÚûÓ!ªŠ¡ŠÃ§XJüÒì-·Þ|ÒÂg?}Çåë–½:H–íêºƽøûG›« Äv/L4coåÃk_ŸÇ“Òd$Ç!›Ü_Í¢-,<–\ßæë{ßýšÇÀ ê¹—äïjdA[Œjè%sW¾4á¨SÛʼ¢ºŸy¥Y ²ì¿g&Xmýã FyHß_|w¦€zP)‚d¹hKUŽDç¿7ÉÄå-Q›pSâ¡uªÿ…³_øê!z¥ Üše=Ù(²iw¢>édÝ"¥†œ¼àé÷Ǥ“nw+2N4cÁŠ 6½¶xüE³"‰`ž#¯4ÕnúYÏ 7ûËJçÐ…2>üÝr™9ÐWMûHê¤c0ºüÞ †ùœ7ú*Œ[Vþ´² ¿' ŽKYCˆ“„¨ÍŸ¬“‰‘R7œ1nþw+ãUôƒ>âìûÍÍE¶|/n÷_³¾¦¾Öî`P-9õšÕL›¦pcʘ{ù2Ò‰¥ŒÉç~7CÖçÅ…Ž\Ü¢šsÞ:¿’ï†=þ½^e~ߟo-NUÞwú3nkg¹hqç]öÆÒ‡n~åÞÍ=™†Ò´1)ʆL?Í[Ô­—ÝýÔkäTʸ•«ˆÙLFÈF6ú"ÅN8ó±¸Íøìd‘k®k28òdÕ>J²3”Ù›s¥2½?ëÓNÚYb¯ÌÅ› =¸ãÌü¥ŸÃ7cã t ßÚAßðùâúº†üÖùXÈ™0j°i†W€½Lª,¡CfOE%;’UT;'™^WY½ûi¬/ÉpÚG/á·H4¨ÂD)Àžj4‹1úyRIƒÇ‘\Р ă+ÀVeòN€4\Ô1ŒlŽ@ ¸Œ@\á+Zƒ”ºù_h>UýR©ÕE:ø C .ç”ù„0ýJáû:FrByÀ•Jô…j¨å"…¡S¯sNô§ð…©„ƒô‹4Ao2ÅôæèM¦GK¹¥¥|ËÅßĉö.Š°Ï 9q™¬åbÏt;¼Ân¸®·’éƒîÓ÷Å—7áùf-÷H‡èÍÔ׃µ‹F'{¿howqès¹m˜¯õÚxWåà?þ²VI+Øè,–t†õÍ?æUäfb2¥¼¤´ÞzÉÂJ÷‰ölá=gïòØý®x¼ÐÑê¦sF=÷é"IÙ“}yv°öå«& x¸ú ;÷ñÙ¯ÞE év’¡7/Ï_´d @1åuŽÔÞßm‘-˜?·Ë+¥ÞÂû.xÉì³ ëþ„«Ð±§r×–­kÒx¢a`d!œœ„)ƒ"¼ä&ucŸþ[7mFõ[N¹ãYkqÌ´xÀ[?Öü!b«Ž$A9ˆ!=ç•öØ~÷3=fw}ñ®MÝf÷‹ÞµüôÉý>’å^‰®(VNR Ôô[iŽ5¤c^_¦ÓØó^·ÙƒíÚW¦ëëÍî¸bý–L'I­nì2qýûÄh-ãbUŽRû¥=:¿úÆëV¦xr÷é3İ¥J‹ m¦w~áè²8ÿýòÒÖ÷]»«!¾‡¬zº,·çÜG3ãâ×myÎ{‡®èùÇÞ8¡²ò˜á—l±šJ÷Ø™°}mÊ«ZóÎ#äóµçeÒÆT,c¡~"ꌤÔÄ ‹ÆæŒéDÖï/d%¯¦^s)7ªžgŒo—W¨Nùì­îïñÒ%oÛ,Æç·Kð»gßïýVX- %X› ä_218iŽò ãVu¡*††~íYÃÚÛŽ7„œyö6»ì®O[®è6oó[ã½Z%LÀÀ¥Š0–ÂÍY{)®ï’†ŒKÅÍ&¤!ò€²—ÅÈŽœhËW7G\¹H(…µ+sÑÇ15ïÕ)2ÊYu畼só}W½vÿsÝ•5ˆÔŸÎ؇\¿ý€ÍP£ŽiwÂç̱æ[«ÂßløúIÉÝí‹bÑu¶¶4¨‰˜À#UŸ¬+–ÀbÜõ˜Æ"Óe 6Ì­$?ÆæÃ/±Y¸ræŽ×w{`à–ék{cÁð´JGÚx—`‰Qê²tþÆ;¤+4¼„û‰ⶺ|Eµ³Önv¶Î”>Üåµ GÇúÚ=sÿc&­[Ñí°wÌéú#k<ö†‚ ³¥úàÛÏÌ_â{dÃ-ãoyA¤+hÀEøä%­#ÔM:Ä‹s9ÙR’‹ÇV®Ês—øØ0#ÂnR2n(&Åá!Í*ãG¬«U6Ff—‘¥Y{AT5އ1S:Tì·í9ðeqëŠv\=úÌ%:ýi¶($GœA¶v‹tÎDü3ÉwØl‹t›2 &ŒYù²„Á._²>ÀJX½Üãù’<è{ï'8sÎV¹m¹„4 ý/yÀÜèrÇóòÕY(( ¦FOY¼Ùý㬣z]sÛ–—ÇLÊ}hŽÑ–ÔºéÎU§ˆ_.ö.³jõMGi‚Y-"dÅÑœVÛN¹wäÎu*ÒUàDcߤ¾¢@IÍi|÷2;ë©xq-¿†cNØP"wš0eè…ðPlj=W:£ÅÅM¥'—¹çpÉ¥†CA3‚ªI+—ªÔÉÑÞ|=!|=CÐî`‰4™P¿_aI{cæ¨ÏA¸W*i-yu÷Sbºîè1cÙG³D&`ùdä3—ô|8îªßþáSéPÀ\àŒ-dìÌÃdÊ$|ȶòôÑÝ5T[Šò;§¢Á™½_Mf GôÛ¼ô«>iY}P³öIæ­æ|²~8&*f{[¦Kõ^ÖJ}ïÞ»áíûhÂ-=n^ý‡gep“G ;oÅŠõÃ>¯ ë îÙÝ~ÊÀG†íœ8誙ϼ¥ù·“åÄœø»“”'­ªØ4‰äbg:!uµÚ+CNo ÙˆS¡Œ1íw·&/uˆ]4pJ×äW3K‘@~¥3¬óÊ"MI6åÊz% %Ù«£¯ÞÇSwÌzôœmÔ îý•KOØ[¦NœÖ;ÎÕÔfâ1$éœ+µp냞֎»ýjé¸Çý¤«Îï³xO68ä¨)öÊlCt߆÷×I¸4öªËÄìD43b–¿%29ãꎞ —½3 W”îd\Û*°¬ë@ €®èŽ(šäzçwH}¥{?qÜ¢Ë<, 3†l”¬ÑRPü%Bóbüßñ›ît(hæýƒÝ‹:Û,L˜ùY’–þ—æñ k>›š³9ò’9ÃK„l†¬ %’µ@XG‚=Yµä¼?iËuç xñ“µÂÙ–žlE&'{šãršêI*+0ý/¸5.ñ“Õ6äzrä·¹¨á¡.›ãñ:ÿ;¶=Uƒù&8²K8ˤQ6uýi=^øfÇÀ®w9"ǧN·Û9âêåÊw(¬óòòÜhô¡‰L¶ñÖ‹–—m?T{à™S¿q㈳<òÖªvÞóÇxÏ e‡?ÛM}ÄÕ‹âcc'GÂÁ™×®o ÞÛ½)“4=Ô}·Ãí—΄)Û¡×G®GC˜W½Ùp(dOûìælöö4Á'ÔÖdœ-]Ó©»k€”äªD÷e*NîF5¨Ç,W´ÈØ`çöÇny§.ðiÆÆV‰ùpÉ"öZ¨WQ‹1ào„ªt‰÷˜I}ÖVUéüúµïÏaCÀ#b#E’Ь Ä"„ûŠO­~{¹h<~BÁ„©ÎÿݨÇ<¾ô“»½vû¸-WˆcSˆMèà¾Éä¶×߯=t¨¸¼\ˆ"6ÑjÁ$Moÿá¹µõÍíÛÕ5Þsݪ íaó¡µï½Ðp¸© UÁÌ<ÊŽ‡}T *Nü©ðÛ'¾ŠÞ#t¼F¹Û©º€*ñ™XmªCß•1W©H(ôxËV¼:Sº4¬&u}cù–ÑòŽ€è„½Š™0S¨ ÓxÓæíψîkT qVê¶ÎOXS_ºöÁ~o/ÝÕ"ðZ1Ó6IÅmÌ%cl‰ÛÔ¸W*GgåÓYV„—ÀaåM±ü ,&4å’R"&Ñ-á¦zæ'Ë ¶x’±Öv5f4Ñ6-Æü<ÜÞH‘¢ÆöšõuàG¯*é=yx§å¶Ö™¤-¼à½ ¶<’ç°±Ì &¶4aDÅÕ§U—#ëòÈ^¼cZ£ ÔÈq–öà‡@ϬÞ=kL§ù¥Ö£ðPKæjK|ÅÉxUÈØ0í¥žã¯ÚðÈ»ìá¡$ÕUc¬!ßQÀÄ wÝ.¶+¥Æ^þøÓïÍ’ñ¥âê–Ëî/ö—G똀ñ¾=Fô•¬mxîãÕŒ©þþRµ•sîîênB#bcš1Œ ŠŒˆ A$)ˆA ¢ Šäœ“€¢‚ˆuŒ3:ÎŒaF‘LÓ¹«+çs?û”ãú­õ{ßûç]‹u=¶E…ž¸óþîîO{*´V­vÝGË…²ëD@ï%Ô2fŽ·4Ceìž¶¶˜Ói`†F@, 5´ç’y;öµu2ͳ©jçÙ%Æœ%Ž7lgoh_·Ñ_ê®æa  ÙwËÞ™rï cgîœ>ü†ggÀJ¹-Ez(eA³ ØñQð'[•š1ÿ)#®aж¨¬ÝbËX´(»‚| àCxB5êºçf}údŽœT.£uåêh}S»@pÃçˤÁqõô»N8uNŸ¦bk¡Ò}rCký¾Ö}£®X‘ï¨íËý絯fE{ V€ÀdpHZÕÚ·ç «Ë«;»ŽÝüÁta´“^ «Õã—M[öé8¹„fCbRàEbwS“¯~ݮʒIƒË>£ßGµÚÁ†BfëOsɘ›_ÂWò÷`?¼·¹c¨êhÓ±1]çR0Ñ0,wŸ> úD[ë±–nmM‹âbuÚN½èli€ðimó—ùhLÄËAAýTñÔ¦½«@¯¦¿}λþ³éww½ñƒD $4=‘µå½;ÖØJ&¥IÕ$£F±BŠÞ0q«¶â’­3ß¹ý¥ö4¬-+;©¹-r»žë±£ƒª×mó3ûï\õŽà1qrŸsǬÿLÏžòŠÝ#%4T³ds«fðJ&>÷†­¬€Ÿq E+Ì9àI0ºàq´bNi‘qƒþ¢ ±Ð( ÅAVRAU”wLD¥|uæeQ±äˆÃ1^P3ßxQE|P¯¥3ÞzH˜l÷f›d6»­Øm¸Êïìv—Q+»ïòñå¶öC®ñ~‡"ºø½yå¾\ý‘ƒ•SÿpÒ¥"úpTd=a—0Uë­UGSŸ¨ì5´-Mœo'†”êÏšÅig H`‰¸ý¼}{³_Ä3ÒŠÚÍMxãŠ{.½áÀ —“‘z§‹’Ö–d›÷Ã/æÑ‘¾?]p™¹L׃g¯¸õêBùÔž«›÷àØ¡üTa]Üß3>pµn×S2ïJÍÚ1 Î},ñŸ_%0³9šŽÌš9™yû„¥Ëf©Y+£/\¹ðû‡×OvÏø%[žEïåæŒ wÖO÷sú~üéjœ«»‹ªþÀcV=W‚ àyVuוYcVŸ»¼­xÌv7Õ¥²Zr_æ?ó¾îǾHù2Ò0fŒ*$]ïr`!kñPÙ嘞š…ɪœ¬#ý¶ š.5Nt‘·Ï•ã¶QVRŸm›Ø@>‘XÆýz&÷Œå2­©i}ú -95yƒ^KPÌ0`஀{D'óî;eʔɓ'S$¬±cÇè$¢zrðovÝãx$~ozi´â†W_qQ­7©Fôžýâ–‘Âwac°ûêÿêŸ$Í× ˆÍ*æ >·ŽžQ¢ƒ\’';b®3S9èë»t>wÜe¯6´þÜ®CF1R4 ™¾YA-݈ØìOhkÜçýº|jÙ×Sxܺ¿MÇìîä‘=Y%pO„axÅÄí2¹¸OÊí ·õPõ=ÜD´;KÍ`„sLnÜÚGA!B(‚R¨jáû³¹óÎæÞ®¢¬`ˆò«Ãu÷åC]ÉvO‰ûÊWŒ®9á„L*õô‡=¥Ií¥#iµë‹™B|5ž-fjxL… ê2ò‰…‚ðÒF±±`¬1AÝ Cc‹jX¯ ÇZ¢áPGj™N¾ûÕ´E«é\QŒó²ûý°ö§‘r[„ج:˜øyÓGó^4mñÎ^“zîºòŒçú×>¹e˜Ð²¼z▵˶/“@0R‹¢T#K‡ÝÅc‡ŽÂ”QÞ)ãºùí%½4õÁwëГbÈÛ#5š°|æ ‹±©›Á è»'rí7?s«T’JÆ“:è|"¨˜²ðއ/žÌœä.Ú§^ùV[¼Ùïs$LàB°‘ž±£·ŒF¹j”l½ÔBR=d‰Š{û&LƒØâ‚ÕeA±&÷žzõ!k0E žØ)©½X$0ÞRwœòô™Õç=vÙBÏ _lÒŽ:ÂñUï¼, 7d‚#JŒwæ(ª?ˆÛ¸C¨]:—ïËìD®µ­xàíoÖZ©A÷õ¿<Ñ´.G‚€a§XÁÉãHGã›ßÞ$«+­î?÷™5Ÿ?%cþ\À”&*ÛW2^‡Õ‹@V|™ls’»ŠIžn³ )¹vC¯3ïÍçy#‰Jy2à2f‹Í˜ÏR»B¸/ÓĪàWÊI¶4WšÚ ûô°,ÆŠnQqÐé먌Mʤ9Cö|]~ÐM¯~oŽôѦæTmí¿õšaWÏûÑ0áFð­Îêî®CëÂ]7¹®åি¯æåÕ¦·—ÊäfÔ€sÇ?÷ÁÃc/Û6rk·écþ#{Áª,ÿ&SªK’ /qÖZÄPÅ~ÈÜu[¿M;60uQ»øþYÜüW9˜_7¶Á÷.Œ01Í À]«¦ùÞùz£< ÙtÊòßÔ ý²ãòE|ºðQ Ñ%K2Üu–ÿûß»víJ)!ê.ð¬wݺu$#Í›7ªônBÌù¿› Ë™ÿßm6ºÊ$ìfðý/ßÈžgïñÊf‚!Ð’º&ÌG=âÔ éGf;UÒT¼9ã&UC§ý/¼1lä%}Öç‘ó6VVÜFŸ–2Û®Ç{-ý±þ§úW½;‘9X^ŠÒ²€þW}ºJ¥ 9B‰Çüºœûoÿ&Efð@C4J±Û&²vÒI]%:(—"e¼hJk»ÉlÌñN!4$š•‹fôlj&!Ä”Eb éÜž ‰³6gTeÖäÄÞF´yoÜk§r÷„r%ØóÔ~·ç‡õ\Úp¨©ÊÛ%[¸e~Cô Ý˜å“Ùyê¼@Ôå!gT¹ ¢þÈ€9³5òD‡²å;oý¬?ßLðÆÄI7A‚G_·~Æç}\ùôÚ}SDÄည¼­ä…S%\´&‚zö*tµLD›ÚŸÿN„X+ÑN:Ý%ãcõ9Ši—â…&“›$ŸL—8³"w wà c(|@˜DzáþÔ®›½YYª¥µ1LV(³(áR榖Dµ«Ÿ¨)7ý­[¿xå Ÿ÷›uÏû£Öu"“xݹϩ2DVs E šÁeöÄËCûv½O0)ºÎ8l ðfÀ†(•qƒÒ¯u[÷~¯~µ9>Ç(…•MµMâÁb¹²@……\mjH1¤ÜñÄ¿AwÊ)Â:Û¤ßN,4úÌÞrуۿ\μGãÓ ­…Z¹ ×3ErÀb娿Äü·ÆžYÆü±^Ø24GçAÄ좎ÄOöu9€E¿&OçëÙp3K:þ OÎØ7zs•f-nùÇs)Uo¥¨Qž&ê>¤"õ!À4a´«Ô¦=KRŤƒÄ'›^/äšØoñä-›Zš¥<Å‚S!—.f­˜L\RÂû|*+H2ÿÇ÷a.i±ØœK‘Ãô¦dO†¿~òÉ' P‚‘™úˆÓ§O3f ' õo\ ¿âãq‘t|ÏÖÿ­'v¤H¢VÒ¼êtFµ›Ðx¸/Þ0FåÀ„Zõ—ü]ŠS4ÆY錮hUãÏ-c³$}©Zàè«§ßÿ®¿•Zås¾„pÈÝ/¾´c„ÐkáèTPp¤³»Õdê!B'bÔr…£ÅМ>Va;í¬Áf˜C¶ñuX¹L¬Œ*è"à>d‹nuviž=G©[ªÚU"Zp•á(…òѾknöšËS‰ìØKÁjÍÝöåÂtKÞSn«×R€†X9Õž[üê,‡«‹è…Cî˜>ÿõ± †_„¡?âs²ÅäÜ­IkT?ðÊ ëAçSjÝGº›Šqq©O{àÄçÔ'£ŒÐ9ɳžî¶%iÓŠ¸`!pyõã¡«N7A@ã€ßÂ}ÚG…õœÙkµ ròÊí’v¹î¯siÔþÖ¯”k­&ÕïÚa+?šKè±*QĈžqSIo|;©Ò{DeMäÈIÚ2º:õr¸¬ÛÒ.{¹Õh¦R²Õ”¢yX/ìÕ«ÖïžÃM„(ú¢Êü‹Ž…EÐQ‰D¦%[ž°ã~JÝó‡'Ou]úà¹ËÎ?ù¤)=6µ¢ÜôÙªož‚4ÿß+–æ4‘I¦"~GuÉDaùÚ2Ç  Uʤ:«»5Û–6´Y¬ÄŒd¦-ÓKÆÂ‰'ËHˇ”'x°Py£š{Ç®aÛ®Ÿ|󆉇‰’$AeøêO^¶Óc1Ï9í¶;.öÊ+s…7“¶¶Â ˆÞ¶¹ 1-ºö£§ù>“HÚ„åå³Z>šJùþ'ÔÒÜ\áªN†ÙÆ¥ŽüR+L.kVͪƶڸ¿™î•m¯¦Ð/åÉQtåÛŠfª’@ÿóêèAõîÃ`Äò\,P¨±¨|Nœ«Òëu¯æü4èÏ˼!Û÷G¾ —uNóúÜ—Z¡¦€ÊâDÍÿxœìk¸µ$½Bc¢™½©ø!¦ÛXø-nT Õ‘ÅdšZTÏÛîØñ·W¤'™×ƒÙÕp(Ö¿Ûs[?›'-ÅF€|F?e,ÞtKàÔ–†þ€#+ëø=D‚ÀŒ£·ÄGᬥ d‚­ø 4…†~øá¯žxâ ¾)åñ†3KÇoœ˜/?Ž÷Ð Æt>ã!^¦ –¬›!ÔÙ¤²‰8YÕ¬B®ÿ_¤ë_]¡9£•(,±0¹€¯ßÊO ®xk±¤œrÝóc·\}÷%ã6~5MÈÜIзcòÔS×cÐ&ìNc<Ñæv¹£Ù„×iƒÿGã ·‡°ê‚Ë^%"¶Oü¾"€Þa!ÝÓìµó©$¦Jð(4Œ…åä4ª£Ð ¬m$‚º†X¤Ñã[Ù2gÝä’ü~ÿU£¦¿öìjþàÓå˜C!ÍÏ2ÐÀ}&Í®p¢VßÔÆm·ÿe¯å>(u@JáÇ´¼ZÉýMu¸Th,šÀî ›ÍdÑrÉbYý¥uAÍ 5­fԿ܃Ó¯û\¹–À­ P˜ôlµ ïâ]/‰ àšdë(lÌÕ^Èzì˜òÕ–¯àÚyÛŸº¿;é¤Bå—½:†Èˆãµ UýA´`-&²8l± o[•)™K†íhâ†l$muPk© Ü¦G/8ç½ÅªÄd½ëÐk¶6~›9+|êIÉ ÆòQÚÚhh_?{óÕÙ§Ü9C­qÜ€ ÑŸ(µx¢5å>«òÜ#{¤CY몿¨¤ * ‚Üuå#JóþsùI'ö¼ï’Y8.È0[²{:!ÔµçŸîÚñ‰nÜfU xñG­)WëØ/smPCœ³ìý§d|h$+ª zž7¨²ò¬x}þdû5c.^p¸í?Þrû HÛÔ+ÛŸà„äOIl̈é´,“º[v‚`¡Gâ þ ÓP°šÓÊ8–Þ$hT»Þ_.b„…œì©Oo"‹Œû‡¹mé1ÿ¡Ï÷þrH;ÁúÒû·ªàÅ"+ˆð&>Mß݆§@*ƒÍŠrÒ³ç£öhegûÉM‡[]ºÜÒ"ÁæÅoŽÑü̆±¡¡±CEõc—ηƈ³¦â”G¯Ÿ¼7÷çÉžM›Vã*eŸ¸ñ™œÝôÔà XY†v›SbõÄZ x#eAn/k N•“ËÔ¢¦>8oÂæ¡ëÞ]¯ìE« [1•$‰ßUmñ´ÛV(WÀ 82…´ ¯®f†.£ÖQë)¡ÆuÛhÍiÓ?è/£mVƒ¯Zºâõqã{-~öÕc¯ÔËèVháj.ŸÎ0Ô¦ å­ê¾ËW­ýzí¹çOc¹¡ •´¨`®3Ü]îÆÐ¡êé˦ÂL¤òøªÕ/-—ÕÈü2}úJÞñÕ"YB5ôÒ•óv –KLjØ¥Î}g¦#ÉYæ?‰5˜»R#!éfMlxe-7 8}™|ÄÏ™ÜìX=Ç·ÄØØu{¦35²Ø€óÀøÏ eß©®2õý‘_ʪ;Ô”6®Z¼P,˜)^‚%Yá°©¸úu¹ò½ÙLñ³+zMéñšÓí ÅËöÿtàÙχ – ð5¬[SÑY¾`$ê ãëYs™;ŒH¶ü¤±è«H±©ñÒ†´uñ–Åï=Ìs÷X^2h¸aR2o×pI¸ºÃ‘è¡7-w]3öÜ…µIJ²5«³‚‡,†(™Vª´46d˪Áãû0—^¡z¿åøò AÎ|„×Âh±ES‰b½Ý»w¿ä’K0Yãô…I“ ÌkéòßC ïUð´ÞbÿÔÉ Åa÷–V8LK3°”Šàr8mü¦S T9IT¸ 2adCTÂä6'ˆ.‹p.ç S*e‰,Ü3WNbÏ“8bl,®â:2DÑ­]DréGÈä{·Aê2¡¬Huª|#CÎì¸ šíCp6ªA½',Ú<¦e.œ5­<ëåÞ.rú%uÓa\Ógɨr ²àÕªÖï™ÀCͨ¡‚A4ù4ÀlàÆÉ°Ëim¨š-"¤r­Cu:páó§lsjZR5¶?M9e¡C¾ÖðÿbµŒ2],ØÌæ‘€/zdÆS#òV !šƒÌ‚:e ¨ÖL-ÌR\GÈoÑ8Ó0WOÙÚuì­?É8‰8íÆc$wÀ6œ+Ca=1xÉ¿Ž| ­(Há)P-©`•ORªEoõy%Kܘ%Rºh%°Mƾðj°#¬ú-Žƒ*Ñ â¡j7·I$‹¾*)"(÷´´L¼~KŠx»íÁçmYÒ ­&§Ñl…"S,+7‡G³aÖ?ïï5P9 Ù”—õÉp»¬¬a¸ùq}˜á¯¨¿èÁÄòžÎP˜dƒoÅOð`˜1ÐèÄõõõ8ƒ9“€añýoE~C®8®‡ã÷Æ—FêÀv•HÝ6²= ÐfÎò”mÀæ .’ÓÇî&V6i ‹E]ù¤¹öyË^b‹ð³0¬Å(ÑNúÖe—ê÷‡ñ¯à–”i>Šo÷¿wË'#f;¯Å)aD¤œZ…J²³jÄ-s_Ü<,ß,®h!"NÁÇϬ©l*ŸN…¼PG¤ýb¼-áºr)õ—¯ß!F œ,I&@Ç ‰O²èwñ¢;o|äön]%Û;§ë¹dùÇ€oèJ AiY|Íš ¨MŒ­f—j¡Æ0Kž Y2Ÿ$¨Im|}¼h\}ç.\þ‘î*6¨Ù=7Jñ|IXÜ:bö94»¨ðÉL’Š=À¸:\ä r àU1¹áäágó·-–ö¾|éÂeNº¬7O¦èìë–mØ3Ÿ›‹W¸LžB>˜`;nÀ…+ˆ –ŸS7_ßÏã’xÉóm·è±n¿ójŸX¸ur9Fmùþ×Ü»òíMÎNäµè†_O>ÆdÄÆt¡I:m½{>¾å­ܧ‹§þWOõ¾°·×Pî4T˜å·œódÙ ¾‚¯yå{DSc'H³tà¾MÍåA˜’]ÕÓ°XU™îm…aÀP#aÍHà^~fS#Ï}SyJ³ÎÒI+ (ÈY¡d­Ê9 /®é?âÞõ“–÷™=dÇÈí=sMÊ–fêÇ^6gú»§ÜøÊÓ«ÉÒÁKèj‹5e$Ë@2"RŸ#À«8Êu¸ò‡oU¨uu´Ÿƒü£Šso}íhõ7Ó×=ã0¸Ñb}Ö2¯ªöºžMŠUA`¥õà}®Z¬/-¦X¾N8q|õôÄ{Ö6¥£pDw¦|x×uÌŸÚªó[v/mmTÞN*–W~ø>îÑŒÚüùJDR•KõìvëŽ=ï(›=ÛÔj °ˆ4Æ¿&J#³K¨9’j&Üœ·4çAwmj¿Á–•at¨».ztÓsâmy;zHœË‹¥ŽUe«a="ocͤÕëŸèØšº¯çƒF£ó±«_öºÿ0¢ë§Ô§œÚgÞ?~þ÷–æ©Ã²„Æ÷YJ:êàn[YÝÁN#»M×ò-/¾6£˜0LÞÙë…+¿ÅRÊi"»t”ñ¯­ml_2ÈÖR7w{èõíK%0â’°j7ké4šçÒådeƼHÕK´æÉ»2W£®_¼|Ï£¿2Rf³^=Ûûm2MÚzYdS{Ežº|+ÈôÅXÖð÷»â‰¸½Ëëêí ÄAi.>ɤŸ‡4>Zª¬Pª<þÆÛo¿g‹4¿bŽ.iü/e+ñJZ0'ð¦ÔkN(½ùýõxv´]ç :æ­l•Ní&0jzà¯ÁügdˆŒ’ö… âñ+}Ïf1ð »¿H’RŽâ0$“O·…é+êÇ 0 ˆŒn½–ü_úG9„—{p€ÃÅOy-K@Ö…ÉGt¦HýøVq>Û²¹”Çc„Ö°ž5‡—°ä´P…–œE’„ЃX¦è ‰ö Np4Þäusž¡?ࢵêçý-ÂBAT.þU¥y¤ÏhĹd!õôr޼ÚðÄ{/bðy³)«¥ÍÿøUÜ6CKäàÜÝc9ïÔ 5âìÉʪ4sƒgÊCoŽhƒèbW Ñ —L¥ I»ÍÜ–mö¸‚t7Ëû¼24­Í-åÁŽ×\tÿ;ï®uDçè<.ð<ãÇG6%üžk|µ6 Pñ(•æîà&иtŸ9y1ñf*^«×ÿ²RH¶U 9yÄ|Ê9tÒÇSÑÒøŒ•VÍÀ8„M!Dš8SA¶ýo,¨V“©†¢¦­½ßLgBmùÇiÔüòñÛ~޳2¨ÚÖ\[¡ÙoëáNeÁßFUUw]=б •ÍõŧÍ.ïΙ£³Þ{D4Eº»_y­’¶[î Ïô— k*dÆ@WñhJæ1~™FcW‡ê°h,íÔÀ›ç4í­ïbë°–O¹bSÂ}bÈË?Fþ·Åö|M\:¾…»ÊË)‰f\vXòn4 ¦6ª–PžõQ¯Ý6z÷mÓº/WM§Lè¹µ˜®|êÆ]æ„ÿ¹^ì^Ïð+†¿ü×—¥Í,Æf«¦H©#d˜;­rÞyÙ5àwV6×69Åz5¬/t¿ª¸WfMÙ'1ƒ5··h+ Nˆ·g0ÙÊŠùö²¢ôP@¥ÒpâWÞ_qûé7†Â§Û¡NÚI•B?Å÷hx¦é©ÏF(ç¿.ƒ q^ŒØÕ@“5(ÑéjÆùÌ»Øþ\8h½õÄA]Î 6Ü»ú³Wd;࣎•¶\vJZMèµóÅ]=TÍŸó‡o™%TcîYÿüº‡ï³Ö‘´ÎL$pViYëü[69“å/^ý÷†lÜ ö'Eæ`jìªC%žÝ0`Æn¢WF6¤Í>R³`ÝʳØú_qÿ²Í ½äòÂ4ÈÃÆú!»Keˆóg%”,_Üöˆ:r쇊 ÄiÃlœ°òögïÝ>þÝ;ž¼aÞÊOf"gË~—¨ï¨C‚ÿ¥ŒÃñxH ÔÜ’ ¹”D7Höý­3°a-ç —À¼R £ ×–ÌÔ¿3à߆ëx#Z¶ä$ŽC»K~!èÔå·(mTÈ‘@<šx!B¸·úE¼ ÿ4kÜí’*â§@>¶-Æm½DœÜCΕ ôÛ’'a˜“ß8UÿËfbVW£ªÃ«³}R,äûÏM7G\½ÂfwÜv΃å'y‹>╳Iy@ÍFƒ³¬ÔõôÞõùé 6+b¶œÂn‹´w£Çä x—Hö —ðò3OºPŠe;¤<5”ÿQ¶J7¡Ð†,ÉEb´ÝUëñ@Ûn*_¶m·|å²^»GZ e@+upÿ¾ÿ>SŸW®Y8sÕ=â0ƒµc¡Î¤e†¼–¦5<ØAuUnžûÁvŽ¥{¹»#µ=év¢a@khEõ»jRÑ™z‡Û"" H5«\ûV@¨(Y4%áÐ9¼ûL*3UcÎ[úüš;óáÂðˆ>U-–H¨Ü/º¬FUi ¬M‰y¢ïP}»«0Ÿœ@â÷tÛ’ŸéÎJ¹||‰á;q0‘Êû« ùÃx’t µË<ÚÚ©º"ÏSd»Òw¾Ì5Ífù»%‚gÇÖß”ÑHªI—-±üVÕE:uÿÕÊ’@,¢ƒL r9_”¢pó±¼ÅƒÄ¡­ÿ|)ß ú<}GÛÂ/éýÕ™ÛÐŽ(æFã^ø–M}þËnì!£dS`·Ýwþ¢µ[e›…¾€S\Õ«_#x⛈y)«fÝ´'•)::jûî¸|‡µ}!nOmÙ1—V’íd-%õù‚› Á¸­ÂGÙäie?è1‚ÛÅ—üYñá ³ ‹ÍÙ™¤¨Å%¨#—s˦…ùäÕó¦ X]w åÄÊSž½ã5Dú‰×®4`ÆæV+X &ŸÏ©S(éâñ{ˆ¸ÄDéC‰¢ÝÂ\‰‚.Ÿùž7ðZØ0ßɹ†Eºd²æ wÀ‹|üŽÂï-ÿm¤x3)ÿ°ÅÙ¿zw›Žjeí `>ë9óÞBef³Á!–jâ¨~›|yuDQ(ÎDEQ‹S¢]psZÝ’ÆÄ}ù™ƒEÿ…Œè×J`ª<:Y¥DkÍUnT–Be°K`“ãGOÚûUC'×E¡P0MÝ%-sò™'L_6A˜œèò´­ú•W'– fChUƒ"1{ÈŒ%\užL'ýñPÿ›Gæ"&cÚšŒD‡tŸD±µ‚ Ea‘²re¡©©®ÜaᨓҤÀiÔÕQm¨4fMµh?ÿJ=KVTÞçI÷øèSž ä …Ò馭ÇÍYQZé7Äj£7Ö…+Ýn»]ã¢Ñú’¢’J R„f§R›þ)UÅþí¤nE™;ª>-àbxŽ˜x¨ÜN›Æk‚ FÔu—÷yûóõ¹¶Œ¥‚Z"ÔÿDö¡dù»­&W žçš‘\<ÓõIÛúÝ?¾jD}KiÆ‚áÖ ïï’¬l-ކt]ÛþÅY™^ >lZ7éÚ c¦›ôŽ£ nëúÈk{ó 2~ì­zÉB:Ê 9ÕJ²œA‡ap¨™;ÇÑ>W®$wŠ š„åÌO}æŽå_¶îÞðíj«/HÍLñªEòLîºpäiÕ×þX_ék7ä¦É-™†Þ7õÝòÉj¡ÅJMè¾*诒wž8c•@Í­’jö„ÑŽ¾îåß &M¡­œfó²T°Ç˜¹0äÀ…^Mk<£î½hâº&Ë˜ë ®³ïcr#ÁV±¦&[Ä~}ÝeC, ‹pq0È1þQ¤"T1çÕá2°J ºy¸3U&¼ !‰r¶6]žp-vŸßã/àê•Ä9Í‹]( †*®;󳓚ÎË —Þ{õ+Ö`ó±cÑIb¹¾óXk”yïWgH«ZšT›£M©Òªw÷a²~Sg”õ|ñʃŽOÛŸg{yÑD[… DoìW ÁdTYËëg Í!ùØ¡ ¬j¯˜«Õ§8nî6æ «½é`"X8©[‡³gõz=„m@ïJ"]ëÕ¬é·>9öÝç²þÔ“ot£§Cûý啦É"JßÂ×_qmûsÒë2V<Ð}²ò´5d޾õ×Ͳ§gY-êTíRõ³zjÀ‚˜à«ÙÒ…í<šþåS‚@EU`-§•xi ó¹LÑj÷U;ÇV‡ºì;²×Wc:ùeàuýº›˜‘4ì4z(¨ ÿñzˆ\±‚¿–ª R|nÊ÷%~ -Ù¢K]äKØ0ï1M—0zóñÚûßÛý€¾£tÌoè›:»ÔÔÓOM›9ãIW¹|Ìg³fÊšЀ‚ œ%l4è—®À ŸEaTyoál¸ F0çèÜWüp|Ë\@T´üÉ=e_¡óA6y³Åe¤^$fdpÊœ3çÏõ¥QM¼ûikîtßÔ9›'”ôž|!å÷…à …´ Ù"ïy¨ ¦…–‘‚ö6ÆZü4£È¥œZAåZ<‘õT¯åÏlxPn%œ7›Œ7:<Á²²vâÍÂŒ§ð/ÝšAŒ–ŬCdÒ AbÌêî LûZj“5¶ž¸vƒ1il;XkªUŽ¡PÒlD³V^¶¦áDO[̹p7Õ'ËBîp¸:Óš4kÎ^·õ©ŠžJ%Á€©ãÐ?Ï7yZ“ÉÔÒwž=©òLa6ö|:–£8ŒÛ'O&gÉ‚U³.JÁ¹¼•êBkrªxˆq,ªŽÎ3À:Eí@ª*÷Ir+º`Qó·>,ÉššzÙŽÙôwÛk»?•¾;ˆ—f„ëÕk{ÖÈ…ö†%/¯»O¸ßËñ+ðLÏF€ i #ð‹:Ó~©\î »EWqÐíÂjÀ퓜å]îíº(ìñŽí³¼)³ÏX–]òÑÌõ_/ýÇO®8ñ²Wú­î®<çªÊ ^X€.±Jܸ2jÓG³1ü¢†ŽºtÉüï&ÊŠMÒG”¥3¤Ò”¦%Î lB®l…dÖDn,B@¦y}ö Wr´V%u·t高ÉœÃÔM‰ ::Ã+¼ÎÚÒªhÏ–âÖ}rrgc,Ódr”½ñÙ|k­tꟶ6Æ^xw kæ±?®{éÓ{yŠß"LP†ˆ!íÀÄPó’á¢.q6Ó‚³ßé™ã¾²Qoò–t$¯î?ñº7 ÛC°#Û”Ùµg·l™õÈÍ3W}0QV1ªhzÝž ØÊ4ë½ÏÞñÆèE=¤×z¨¾‘€2BÉ´òDÀ5óƒÇeKú¡ï#^½zòñ‰¯“+¥ß6SØ @iˆ…±TçǯÜ>ZЍ‚³ÂbKFi¸rIµåÎ)啾)w”Ôß åq>¿7_Ö¶Î#uB …bÊß?ÿþXyO†õ›1¦ãdÈÛ¨pNܳŽeæ $û¨Éͤcn{(,Øœ6è3Á5¬SŸLgb¯4;¯ú]¾²Ì ›Ê«\Á#MGÚ\Qkçâ¼wG”¤¨âBN}ûïDßrµqïY 8©ƒ„ȃ…N‹eâ…¨ $'²€µë•µ‹î•²ªeŸ£= Úç=°{—ÿò”œiRßGþ¢jn*õ™öëmúGÓéN úÏè{Öó5ÕÕF‡6éíû“.©ä}8Õ„d#áýÈè{R®ËTUæÂebmχhÀ¿™ á²è¸¥Ný¦÷Ýw0æ .¸Žû"?K$¨ÑXR…çqø½íÿ_ùà?ò¯þMyY•ÄÓrPìf(ŠQê?‚ ¿„¿r¯Â)å$ùd·[⩈Ëé‡hÇ2 @*¡!`u³Qié¯\óß²Y"&lÒÒs -‚-ÈGön]nÌE0X ¼>·õ1¡A¦¸ËCcR’Í2¬Ø2Cè‹M ºpáso=,\7ÓPF—û¤29éIô€°a3u…œFH9·%êº ¯â hGÊÞ¬jqToaTó×½ ?Q!øÊ/®ì©wÓâ/¤uíËÚί?”yæú’öc‘Ô±…ûFåci3V…ìm½ܺm­Ók O‹?˜ÄÀ#ׯžMœ¯t–NqŸœuÛdsþDš1ó!Pºoø±¶åÀ«ß.jgeì£ ï-¨$ Cå.ƒÝ !ÄRo|J ½ÿiC«Mµú¬™öýy«òî5û´#õ)o ú$gàÑËf4„Øj´•ëçŒÆ·,)Ëwz¡Ç;`1?rÅêˆçXÌùÃŽÖ”W¾ŽzdÚiûx‚ vï„•jqt¸¡öFߟªzØãž|.w¢ïLkÖz¬iï¢L•Ö2¤´œÅ“U ‰#2L'•†ƒVŒ[ÿ¶ %,'•Â}ÉK«¬ª,•ŒmJ´…®Û»Ý²uÃv™.²SÞG%›º¾ëC®hÕ‰Ž.±cµ3îÚÔ‰'Ë“ >ŽûÎ;Æ›šÏHR¨zèúá jÖŧÐÜ@¡`j——²Ì—׊Ry´°áÍÅ&HÑD2ïvºLž|&’±Ù¥ ¤ÛáG8±›Í‚½Õ†Zm ë™· k1,Rx¦ç®6ûÞUw[šwkM}¯î™~„jJtœk°äÚrI‡“–K¢IDV¯×ñ«öê’Ÿôã°›c…H@D]ìÜmh…iM2ñì.ªUèŒËY'vuk¯mÝ™´í­ R%;Ë&lU¾ S •SEb´¤©2Qr`–všÙh³ÕægV~ÞëÂÄ®Ÿ:–'?¸uì‹çí#§zÔ]]Ÿiá±·l¬êpÚ×-h µd=¹»'¯ye¶L+;ROŒ}Œý…ÜÆÀâÃÆÙÔQ¦øhcm°¢ü¨ÙRFøENu<¯cÝáCÒZzB|¾¨3ÙÔ2¡J­Z2GVE³z¬ÇºÕߎ‘a"rÏÂ.G‹÷FÕ‹"åoÎ NÍ\Ê´’ëÛÃ\â¦%u–¡ä€¡þÖ8ñÊ•+GMb1Ò°gÐ9† 2{öl¾AK†ys2¶ëßyðoƒv\¿Ñ,‰ºÚ¦ªò‘ßcjH¯ ¦”-Íw°ø‡ÿl¬Øì˜½r)P‚ æZ^ÜaoäQ,Ð:\-„õ€:,¦_± à `û“¥˜Njt%W×"ôï&Þ#šTvÝ{‹ÍÌ@¨…*xÒîT<w6;A2bÚJ)$áÔ ¨¦ (VØÃÝù‚ËbU2a@*¦ña9'Vý³jG¥o*êd$Zk@´K_€5ªßEsV$ÍrÇhØô*"䉼”&Z€Æ+جYh!õc%*¦3~~M;ÔO§ÿœé"ˆÄè¤Û3ÿ»Õ²RTFºë©O6µhÀGªÔ…ì͹†µÂÑ|²M×/)›(› ƒnª¡å ¡" …VÑæ|ÆþV•Ý+O̪C¹¿§*âÔ)Q;´®bÆÙHÙ=O¡R2yÑâ5K$f³£ù¨ ÏÅçíŸKÙ°Ø÷¸ð±¼ÄM„D2°Fõð)·®úçk<…âÍF’³Ï}ØI«ÚÔݹñ«ÙPê¬h8)J´pZÄñ·ó/ÝÖO&’Lk©8„v\DT1,ð^µæ›yò+uœþ8âù_^”Ç%ÕCwnTï—‚ƒ,n!üÂC:ý¨¬ä`Áç!î²›ö9ŒlÏ>JKñ"3¬¼Ø&쨅hš«±ñÏÒx`a5*8baw}1Ue¨D¡n¿ì™Ü%|9²æÇ´×Ö=;ý’;cå>ZúõËr-T>¥:§»INw3™ î2CªÅ`X-< ÀÄLÂbs„ÒffÜJ™É£4KKiàlXÕø4nÂ4õÌ·o¼j`‡BØ7z³=¶êÖd¾%[ñý=·w^¥=fM4<úüíK]½öãëßÍ)˜ ` Âm?èû!§M«€CÍ~›ŽÈÒêsΘõ{>Ùbru4Yse–ŸÔ3×­˜ò}Î9flá\‹“œèÆÊUš¯RûÔ†÷Gó«ÍQÔÕAñOd~1‹Êú!îéÔbÁ#݆¡v{ÑÐlõJv 2Áä Èäº?÷ºvþ#5çëò#ãÑ·ëP·Á~j®BËx 6G®‹×œXòíà˜ý‡嵤ÎÛ¢P4"䔯˜!ÖßYW}Þ—L™­–h{G¥ª/À=¸j8Ø™ýÖ³ÓW¾|Úîÿþýö}ë¿}™Â*RǺ¡œŠ:ÑW—3Ø:ŒÚ?e*áµÔ’ªi?Xdîz7* ´‡ÞgcvW©0cõbx9•ç¿kö>Ðt±4R©ÿþ÷Ýwßu×]×§ON}üñÇ׬Y4È”œK†ã6þŸïòû·ÇÛRUU]ÉFSY6UÜZy5ìú©swM`c¤!C¢~‰EÈî¶Jtª…µÄƒÞÖæ†@¢>¥C:¶%§Šò+¬çW,áƒ,BºPp `æWÔ\ 4⹄Dx¥Ñ@ü$8FRé‡î¾}ÓR‘µk…›[¼âSÍY0…²å#­ápP 1!=4žÇÜáÌèlg2žu¹Ë°è K:2lOJÚ„U ý”‘ÐPÁ³Ö¹/ ‚9ã!C§þ0…˜Ügò‚õG÷yaÆŽÇ]®xks±¼Ü(Ü—òòÉd™À… Ñ?Èú ^8䇙€h¨€èÕ~Õ…+dA’êÀèqg’V%ˆ”Ž˜T¤)±ùŸs]0έnWqr¢-jõ:ï¿mvÊÒ¸…à±` XåahÂØ,è þr‰]6QF V¬«EÁ#_¨€Ã=K¤'£åí„—CûRXÂ$J]ZB{,¤tóÁªZŽ6çƒ ^¶&,áp¤9óèÕ÷­~g-#Óš˜v bÓ9WÁˆ=úˆ¯ #J§žK@“÷¾„TC¨ óþÍ×TTÐKs¯Ôx½1™mókNaºü¡@Í$ý¯ÔÕ Ú‡:Ê}¥øðd½Z[ÆÒ«˺QY\»4@è>­¢>Œ„ EZ´°-äÊI†ÌƒÝFiªÙe¯Lf;ôùó,Kø—‚«~퇯š}~²;LVKk«æp¨'’%KÕéI4ç|N óR˜jM„žl2í%ùÛ¦ú_2fno—zó‹ÅÂ>‹jÖ>µãÊuj¿›afë'/Ëøò%Ic“ÿôÂK»†ãGÖE°8VÛzB5eN(7n3Û$Z¯aXµÁ™"¢~õÖ—˜šÕ™'K"ªVܾ¹ÿ ·M½ê w¤;‰™P~=’©¤ÅªÙÉ+͆Ŝ %Õ‰„ ûÜöÀú]ÛEXË©UïÏ’=›QÏœ¿õ‰Ý·sˆ©±$nßú¸#ä&;‹ØAw'æÙ”7SɉØ~š†¹Ù7â)? _ Ð} ¿%ÁΩ׊Ðgþ8}akþÏ /\ÊáÇsRß¾}y:u*8Y<òÈc=ë…¢¡4ÿEù?âqøm4Ýæµó"6¬6r`;ê´H³Ž` þò‰:A'ZĨÅ ·ÕÖT@0 ![”ʦ†ë4±:Â{e_ NÑ,Ž>v-@DÄlñ ¤S¸t·òBNÃ;Ë Au¹ÊIÍLÝ:K ýsD¨ÛOz iƒ Õu‘ØîÛó«°_ú‰ªawM˜»pê­='¿öõĵàTלuL­s1b,êèÿ-¢9[}Üð–?=¶ýÛ— Å„„Zߢ̯,TN+Ä+L¤JP˜Vª<å |6^¾ÚúêSUT.žz¯n’RV‹GµX£Äî•(ÍüÓh¼ã£*N+˦ÒUø±KŸo22BslNkÖ›¯w¾x¾¥c:–¨@Ä®™Ûâ¡öå'¸rfÀÚ¾Úßɘ6u(«xjçm2l6qæ²f£b ãhQ÷½äý€å¦â9 P!Àö$Vä«$ýK’ "x÷ík’ÙÀ¸~'7`rd¢D I9›"©wpY#Uƒ®¶hí\Ä ¿ÝâŒÕÕÓ ¢Ç5!‹ Z°qXÝ­ù¦°±B4˜Qƒ9qÔÙÕ•çÝûÁ'ëäæ 0H6bDÜûá.:^÷PñK_}AËhk+'‹ÕÂÌÂÒ0ÛˆUÆp®Õiurï…Ó«Ë}榤-ãísÉtJhlýn×ç˘̥Âu| ]ǽåê‘UŽ3MµžbMcÆUÑ>ç­[ÿW=ÉÕªÆÜºöùeƒeI—@üYef©Ýk¤1]3Ä­$Îy¬Øs𪋻–'ƒ ÖXAyÉÍ‚ øˆ1Ë’ÎJÏZ¿P–+è‰Ñ~H„-Ø?gšTÀQ&¸'Êúô•ë•õr˜­/îyLä¡Ãêž[žÜðΙ˜D} Ÿø“d‘‰í²Ë›.èÐZAeo4Ò›ÇÜëÌÞ§»²ž3Ë:?ÙmÆ!ÿ±h ¾ùÍ¥Ên(ZÔ¦ÝۦÆß½ö…6Æò&’Z¾@â>ŸÈ$eö ¤#P„«‹}íR&÷’M£d~]Å–†¶ É4RSmø¢¿÷Noïq[“±Œe6€4Xm,»bŽj,V3Áš‚ƒ‰˜@ ¼UmñÉì3V<•Wä'V ³Ndwì+YÙR‘d¤pÜ"ucÉq7šg~0Uš-R`Òæ rºÍ©Þ]Ybå¾èÉ[Íðçüù:|JhÑòíU² ŽßãeÀp_°®°-Ã})ˆD2\Ë3ÐÐû÷ï/Ù®á¾t¾dˆ>~Gá÷–ÿ6.»íÃê²ðZAt̓-ˆ±ìO(5_²C¤îŠæbb…2™ú÷°qÇZ‘š ¹4æL¤¡HMXa²Dô}Æ«Ní±×r‚ì¼ÿ>WËåÓb¹ÝNh„9ÙÚæ@Ä.¸‰Í—Gªv†“aÓî^;–>±æ‚D´ù黿íMþËYí]¶}¦Ü8’*ÐVM;« €<X}&ÐNjKÐ!eðCàº~¬¹µ²£ë´x.9ì‚Íg•u}ìʹ/½6 >ÂiV—¤bØe“UÖ@"‘³5«C¹"¬êa:d$’šn‘Žì²>ævZ♈Á¦5$ZÛ#’{ ëw-‘Ÿ±²Ÿ¿lÏ¹á«Æ½rí¾“F­ý÷,FF„hRîØ:uÛíò ¼‡.r•E=yó†çÞ¸ ŽoÜ5@ŒõbJ¦‘Ó’*2È|Àz§L1ÕÑVÔžbÉ´¶ÙÀÔäÑ$/eÔ†íz1 p°#D…ê“‘mßmâ&½/¹å/³…çñhžKw æ-ï®)xM`•8X!oç;7ªä]×N{cç8OºJÚ–P¹Z½üd£^àµí”³Åމš A÷¨¥oÌÀ$Áýƒ¶j¡¼¨§­y“H"Ì»¹K—ŽÂøÙV ‘d|༨̸/°žÓ12E–*¶úŸÛ5V,5î–×§½9Hz”Vã/œ¿þÝ!¢ùÁŠòtñXvÞ’§åBØ^Nmß1û¾Ë–úóölÑ`q¸ý_8pÚýœ³æ›Géì¡Ú£r-·*¢Yf/Ì(‰Z©ëQõu•þ‰s8Ó1âP99ŸwößB‚ᾋ†­Ý3וñˆÔLÜuÖÐD±˜t¦âPË?–BÑ·_}kGëÉ-{QÓNÝ¿!ënk6ì]ûÉó,˜B4iÂàŽ©”Cµ7{Ú‰5›ñï}ÁÃŽTûb}™Ëå}膗óöˆËc¤Ñ©ù³HÏ}âžñE»ösë¿ß @=¦¶}¶EV5¸Ü~ö…¯ÆKˆP%·: ¸—Xf·¶æËdoa“ˆeÍ>’ ä…“|TEì›TóÀ#6'¶Âj7å“Q“Ý Ð\"‘&*XN…o}U`×ðàMRÃ7œþL߈°dÃè>V2v ò²1—( Œ0ÓÔ˜*ó;Ä¢‚cºP#Ò¥¢÷M:ÜØ°TÙ5ߘ«XgjÚ–ïžévº³mbôÒ'RtýRŠù5—ÞùÎ_7‚N /[㘦Ԗ·Éz0ªÉwæQ™ ö Ï:Îÿ•Ó/°®:tè°lÙ²”º‰{oñÌ™3kjjø¦”( Wþ?ó”Žóùÿ{óÙ•É\ÚN®¤¦Áe8`võÚç+d[ò…]5·ÆlX§5O;(8€g·–4[]ÑEO’šBqåÕ#§$Qª ³`¤d"r—ÒÁ÷B õ䙲d—ò,Ä3™ƒ(Oùýž‹zåÔjrG*Ç\ú¦)ájwJ‡—\4jØ;SÞyZù3Y`ÙÆ<„k*ów4Qù“”"•z¢’xQMŒêò oþøë×yÜÒO¦ H¨‡nX¾tャ®ÙþÒŽa‡¸ÜªêŽ¥½U|p‡¬5ñT¢ÂV~O¯Áþd±¶¬Âqöcç®°‡Û?zÍ gscüà럯§SnNÃTÐåÏ •¶¤Á¦‡úŒ½nôô3ÍÁ„åt4§ªÎ¬ú•û¢+Ðo3‰.:t>DÿòÙ—vŽO4áÍðØU!eðW€Sð«—>6PÞ..Ì€¤I¨IÕýµÀWà¦ò”໤DV¾æ–æŠ2ÑEÜe#õÍ¡p(޵›êpPP}ø¡’<hCð’1&k&KÊ(ùÏGÔWl뺯ƒ½fÜ9¯Ø’ΙWolÒb[ú¥‡KØTƒ[„†,ãJ¦ FÉfPLK©@s»+µ:­Eò¯¼”Ø¥Åf.äqþLp&…è&AÜ‚AV«Ûa¥­(j!é².ðµ´•õÀü©û[%*í‰eüA pðcjñÊâ¤(“%®š ÀËÿ¬ýÆ xìíefQ±S_fφ¯Útq!0¾0yÖ9'•ë­¹c)k l­›5[4OºéÙÊìÉÓ¯xUµ„'^¹èëÔßµpž Äéf5õÓÒŠ'î2ÓÖ¿mQ1)Ë1®Çœi¯ô“ÇÑ#îb-šÈm¦°púUuµ?ÉÊœÒa-sÛ,N¦ÉÓú i†¶¥_M{ÖªlÞ0óge(‚l±â£÷L\öÆ32_ZTZ¹3yO(¶›BHyaQôr+ÉAÎ`FD âʬñt«ƒŠc€,Öõ3ê¸\.;KߊZÑ ºÏ,1†¢'ÔÞùá×›¥åIؼcï\”«­É½Î›¬…¢éjmá‚Y5‰h,Ýœ •W8½²ãìKYÀqÕE}Í©òN¡ º¸Ïé}æ\»ÙâÇÛ“1mýë<¦õ©Kw€±~W_U£j©a¶ù<æb}ƈåZS£z¬˜µZÞíR]Xù¬¼Ö®;Ú-^é5»¦ >Û÷†*¿V[†I<®ÿ7Œ|Ûm· 6lß¾}=zô ŸAoݺuÉ’%ØŸ1;—¯PKYLÇõ@üÞøÒ ©Ø-xcÉ}Œã}5˜8ü°CräÈÄe½T Ÿø~Á“ÓMsU8£¤€¹kÚü]`, ÅO #. [#WBî A|Òÿ$õˆ[ó*·’lò‚å!gpö?oŽIRdžt©ÿ˜zúþÝSÖ^‹ðȵ³UÇk[íµÊÓª™³¸& hzçó{ÿqìÎè]Öœ{þ=rÉÜ„»çSOîHëÞÍPIM…ó¢@ß~Iß­¯†Ý{Ýe+}é{ÀNì÷Ê‘ô)oÓ†ç¶/·×Ç Éb²Âça CÞ^PbØãº~ôÒWý‡t]2ÿãÇéM¯+B@º0!Vj¡3>f²9$>§Z¾ÓhÐUcÇ@]ª1qTÞTà¶,ä5â}ÑKzYgŸET[‹â”¾o{ žC{ÂRÝNíñk&§|±ïÍ|ëÄ;'G)êP=n¸/ì †¾¼8šOÜwåÞÎÅýµÿ|ë“wK4 äH¼ååàAfòmsÀë)†€J2cHG7pYh®… ç{µØ-rTQ6Þ?:¿ßSÓ|ú)Ù?`Iíì>£‹uO¬ºòéc{/pÕ”Šò@Y¸ýÐËžO™RmΆ­ß/ˆ‘k²œÞ½çìrJç³;8ºŒ½fS‹å—´ïØšÏçeãQ«Ã{ãE·TäºZ†LhÄõëâ¦ÃK|ö7OÚ‘óàÞu±¾½§ACÑr{†Þü|y ðó¾ƒÕ«ß5Íi äê>gÅО3Ú¨ÜdŠìüfžßÿIl¸N mD*Z¬x{SóøËZ‹/|zÃ#׿¾ðó^÷ŸÿìšwÇ· ëwØN#‹LËe-~ªâò)…²±”%W9Ò¹êY­Ló¢©IKÆ?Ëö1Ko™yçOl¼IÔ/„ƒˆWTaOÓdÖf*˜ˆiH©#ð:&-ͤî{ ïê +Œs!‹%@mêõotœŠcjé»óÄà òÔ5+~ú ˆ¥Åo"~Abõùxûùn}oyØÑ‰ŒS™sö8vt1cùh™°bÙ/‚”jˆí„—Ö*í¾t;Ù\ÄŒð‘S³žœûÌ¢aò 3ÃîÁ8‚ ;®ëú}ôÑJ:{Bñ ¶§åNµhós|ï÷ýqá¶NäËÄb`ž.)_ÔÎËÚ=U—mÉ¿·gµÜ¼Qè½eËža²… jâ­ë¥1à©ÅüŠbpøªù9Qâ |6yÁ•ž'.Ú5óãë«($…Õ"ÄBäÞ7©þ=†®øh^àdݬ&YˆÈ¥Å.›æ8=þWLÌ3ðäÉ“ƒÁà¨Q£¦M›»å=–g¸2>à’ &Íi¿cq§Óÿ?5ÛDm©ÑaµRûB„£P ‹ øAìTIªQ_W‡i¤S]U!íÎÆ«¼dy X‘ËEËi4Žï /¯ ?p‰c±ÕÝÀºô-øp_=âÉH $”Û:%áêÃÆ‹í7àÅi»FÈGŽöêçègÊu-lÃL©›4ejP,™BÊŽ]¬ æM#Fë×}ÿØ7ï~ÍÜ™«‡oƒVÒ¤‚:ÛxÛØ_ºt)¡|¦ªc-i‹—’¢¶ºèö`ÑÜ&‘.M*\¢èdCD¹× $1¡Â$k–Ûv8³ˆN]Ÿ^ÿ×)ñtÒJh8Ô<«†^´ë¾÷êZ¿pϦ}…Æc¯|5GÖFH)—Þ˜×X¼ã&sÁ™O¥ ®€: -ÛªíÖhtÀ›5¨q¬°Ø G~j<Å}Q!c~æìuì¡s½×²RÈRá|›ZÿÞdYØèÞ¨þDBÔÃ]ŸÌ'”3ó綸{ÀåÓBí}‡[S>|œÞ™Ó˜däÿñá×Ì4»Ø„Í—ÎR‘°ÉÜ=£ìLa{1Õ%~êø W> ¾+™eÉ~‡›ŠOˆk¢Û³Z‡´ßF½#îA…]~¡ê%…tnäaÖË  ä¥T(o{òâõöœç|˹¶¹gýë_¦ï×ü°óÛ)"1³<¥MÅÖu);}ò¹Ÿ;ËÚ¿x¢±<M&VìY‚´¨÷LzÔ¦Ïi-M†rŸMÌNz§ü|¼ÿ+&ÝÛ2Ý9r$QДgÀþ å5×\Cå¸/ñÏÐåRI†ß}ÀÇëüÿ_í¶(Êe è°™!(%4Ôü.ÖUKk[¤œjyX§bÉÊŠÊÛvŠÁ–ÍýàAHÃ7•vñ>@IDAT¬—ÆI0° ô,Ü—?v2VÑÒQú†W‰Ä¥×lv¢ý¢Æ8°C#Ãt=ªþ0N:½`%RÔ#xg Ì'¼öÕíË¡&F³C´ñŒ6hL®1[;ßÓÎýÐå‹ì×£÷.ž³cà Ë^ôª°)ð˜ãÏ}óÔÊsF^0Çè7Ïß>DvT¼zÍ‚Ož}ÝeŒ§“}L„X*o%8¯îíõˆ!îZ»Dò™G¨ÕŒmƒo½àÍæd¹˜VÂh– aTòˆ®é64êXa9±ºâ¤Æ#mmcù#ZÂ4yÅÑœp¬jíu¥@wÍ~r¹ÔmßÝÉsfÛ^KÐV3µÇü‚Å2ö–¥É”æ×{ôŠIfÍP¨Ou4ž.„ÎB ž¬š_ŠÆç‰w†2¤q.BoÁ² ÊûÈëfÏÞ>ò•m3…ÙÔèËÖý?ì½utTWÛ6¾ÇÝg2qZ(´¸wîPÜ]‚BH‚„à‚»»{¡¥Bi)” ÑÉÌdÜåwí™>üžo½ïß³Þ>ïZ|ë9‹N'gÎÙgï}ö¾¯ÛïU—†Ñ7‚—…ÃÇ` ¬TÀLB@‘kdù¯ÞTdV 2ø¾Øá)°³K¨6Ä@ '[M› Y&Þ5¢^Ùäø–9+ª]ɾ;:«Û­Ç{!†˜N©nte$™=Ž:sÜœ0iã†mÚyu*xà ÌÔ3åȵt6*#á^tÏãdUxM~ÌJb‘CT žxc×Ãòó”B«ê¹&\¯¼3»'ÇHºdv¼ÏSùg7Í)XdˆP½؃°£ô‹ÉóQÑ.*7Y06cÓé|1Á<š[—¨"Ȧ­k$ÐåIR‡ÝsµOV<çÜ ŠÐ[`ìH··8/UD⌸Sñ ÉD1·X.RIT2”x.Ø;¡ƒË§a_x9(¾GGm!³:.]w~™ÃîœgœµLJnv0ãöPâm–ÕéŽËêÌjq=ÒW%½Ù¡÷œwì(ŸÝ§Û{{ÐÚDƒN<|k×r²$ñÚò+q~rÿÝt{Bï~t“ZÑõèûÅ]Œhðª`©KìX?Åì¤zÛãX± £ R®™Ñpm B¿ñ^¨’w4áD¹‚® CÀ³q|¾€‹²‡´bò³B_§!¥û«Òþë´6‹‚(µC®×;"TÕ™¦Ã‹À")Ã'«)ï¸<Ãìßf” À‘¸dΠo®©Õ[¾ìÞ~€ÌI\±öÜ"úÝH"E•霣ï*‰«¼ˆ¡†çBXÅOÏ~š`TDòl„M¹}¾8/Ð3£8j3Œ1gªf\ÿí·ß6lØÐjµŒ¾³Fš“ðŸ^ÿÿ3À°Åˆ¡ŽPXìP È?5)R_*µ Y(¦Š ?ÅâÇ©Yaó¸ÚŸD=oø-Žë””y…e¹‡w³¤2;rˇ¨+ŒmØç-6à&d±¡ÕîhJ@TX ®M‡S„,Q p b¨ÄQ ¹cHèÌÞ{Ò¿ÚÌqJ#¸Ñ‹Zü#ð¶\c<÷$»ØéQ«¹›î„R뽇*oÍÎÇsg$î_~4°J uzÊu"&W$ØåÄ%‰“U-²äO霱ùn2ïvzôÚ’u}2}V ­”ðÚ˜(ŒŒˆUp ,Á[¡Òë2±ëâõ—Ó†·Ý(äJ'uZÏ0Ô^Þ±Ðe5*±T7R( ×îoBçG6Í.‚$-A >‹Ïkõ:Üó×­ºŠR6]´ Óá·¸},Wrêúa:™$©ß•E—.ê¸#óÎ8: ~’œxÊã² žƒ¹¾õQi•Ìv׊µ/ 9¿žøn CŽà”1ÞÒ×ò‡í„qÑE=oh¯` tÔØCáÕÀìì2¢æ„5MÖ°·ÉB)´ð+!CÚ-f9ø¬2^M­Äh{ïóUò­©w¿¦PŠÆAÇñ. Df¯ Ð¥ÔwA@ï6ÞTXU%­ý9ùls‹çìkm·Ksnu¥a#B(ÄÙCâ•p—Îç½.ˆ‚¢Zm:\̺ã¦)$á´Kÿdš¨S4fáE ¤’2Öïô Tàí04ˆ»¶úðVSÑžˆð%.†È¤9Ww,«yJì»m<;RÈb:&v|“ãnñ{†¢$ïîb@é\à'Â^—î½°Ì÷! X(–ãCõQFË ø$’¸‹µ‘¸¿CT.—ÓyÕD¾7è¿!êyœJß½nT!o¾ˆf¶ÖMªw1(ò»áíOnB_¨WJYïÃêo›Ö_Ô ¢âoÀEËê"  È …z”~z÷4¦µy”6Âo*sØ‚RovÀ±ãÞàô´Öô^¼‘Ð QiþÚŒð)óæ7XµêÉ|޳ŒJÉ.2¬í9S†Ò„Ѽʳ›ìvËmf¹qÃÕeXcZæ”—ZÕ¢8y0a½24¼6¿Ã·þ‡9´` ñ¶“õg2é6d!Àß˵¸Ž}‹y´³˜}['ž¼–vjdlåsr¿ÏÝq–Åž/5ÕvŠœ3Kâw˜Õb¢p‘rlºÐùHkÂÆ«K–õÜOüÒR=©,­ñAõG™½d`—GṦ'bt«UÈî.Ô)x<혖KYZ«ÁòÛÉ›Ç@H<4·³O÷`C¨ýç(^À0Ð< `ç¡s†Û3N"K%Ρá ¸mÒ¤ $àpC ž†Wþçøcð&C°Š„,à;CŒ¿qŽjaí¤€F|ü‡Mˆ4ŽAýö»#@IÇ÷Þ¾óðb¨‹ƒNCJ}+8!”±¡4ñ»lŽÝåEŒ¬ ^x–‡À×^¬‚5iF¹S‰羪Û5ž]Mˈå°ùcûgøí‹ÎTYY1çÁ Ђ¬¯N‹Êy­àÊ¡ƒVYÃE†{~åÀ¥2€ˆŠ.ù¯J)ãì#ëÎ.¤”ÂA’¾:ù`ļV¹«O^Üwÿêë)ý²0Ú`|I)8$T«èäG¸ cZ‘ß_LtV"•€ÑhÙÿxÚÇõIí/û¬—‘íð,i•ý§ëG”ÿÌÃ} ‰yg/j°göɾI/ÇÈ•"?'õ¬Iý²·^œ "!º… ˆºdæÀé9›7$>ë´ð³Ú”È#g«ãâmˆOeó·^œ1·ëa…_3cOû¼Þïߘ^­46™BÝAB¶\’Hßš‡tkÖûâ¥Ó”ìò£zNò¼W’BE=u+ŸÁ“Tç LÉ+düR)®†ÕêN|ò™å³¤èʳ<ô‚¢)„ÂеñÒd`[ZëCó/ ™=àrö.ôWL2qöL/*ä¨ÙŸ1KXËZÞIJÜ¢?R.-ÞxznÛp§Õ[ÙFLrÃÉ´e >øÚúr×ã4ŠÙlÒ«æ„JܪéxuÞôfçb$ñ+Ú|¿èiƒa]SÜL¥~g|ɇˆìÊF:êcævØÌ|®ØãñóE¢Kwׂ}ØÝ_÷XÊ(Vi´ùæäù]¯í¸×R·¼­'çR‘”Ïß2gGîÌv])ã¸_P¼±0è’Åz„Ÿà=gX‰P"© €à­õ]T0Êvž~âÑ*øqü6ƒN¬Vq-ÜEFµ^Éù Õâà ÃÜ»’w<©D­ˆY´¡ÅÒ~W–èLŸ‹wŠ›.9;PsÖçÑ'7ß«ŽÔNè´Í ­/sgJqð垟¶b ƒênŽ–iA‘FTU_^¸|àa )“²£)3PÔ!ä­YYþâ P¸2FZûÊhð0VþkkéYØòpö1ae2f[o'»÷Ϥ’«•Ìèr9É‹ôÐþ¨cdSzä¼g=‹k"ÎX¿R®£DSaIiŪZd$¦eU mFç1ÏhVDdœŠ˜(§E¯ñ'ÒµÐu]\Zvø›è^æ›IçÚ%wÏñ˜aa Ù2“ßšÞò„Α­¨ŒîÙòÝ«Ï%î›XôòÍë5wZNñaïõ3Í'B–jãù)”Õð’”Îgw_IW)–"X‡U‚,0ŸøA³N†áàÖ9C®MOO?{ölåÊ•¡dîÞ½;ðX[\\ <?{ö¬fÍš¸ Ñø žYRúÄçá?ÝÿÇ €.„lDŠÁ@\ÔH=„÷ÔA_ü€”|Öö³)Å,'Â\u@mð=Ø].ˆ:òúÙ dûs3Ù±"T2ôNˆ´"¤²@Ê!”¨%°;™½D÷°Lk™µët2•9LdT§UtïA@’«ß”ÓDWx"ªÆü\ ‰µ˜)ªªÿ\WÀNXÕúûššs:_{£HŠ¢]*ä–æ,x&«X=¥ßIF5§þ.Œ.Òß,J“àpêè «2ŽÅîãCEì pýðêƒDNî7Ëéõ@ˆ@Q$ÝüHÄ,§)¾ÌÜË[¿Ù:˜Õn52bB‰uÈglb5Qå×Ç+^Øn]åˆv:ã;®Ø?﫯T4¦×¶G&²4ˆ…ËqÜ|¸“”Ø1´Œ£‰©½[p¼SVï3ÀoÊâ òLÛ‹ 5H9Ìò)€ýž&‰¿0|â, Ê‚tZš…fÀMÜóã_¢êž¼­“;®WFV7¾sóüL¸ 9<:^D`öA§o»˜B¤}1>äñõ¹ l¡àE€CœBñÂæGVžÄ‚…OLʪ\#Ñ]HeeÕžyäT‰ÜdeâÓ…‹ëìNùqÌ¡AT‹¢rfÑ®§ãR›ü˜zô«¹}v­¹?–â:ä3Ô{¦Ï¨U¯Žó%ç½*•‰# eF&[:±ÑæêªÌê˜ì±ùTœzl³|~§DnûÅtõò«ÃÀB¡)W S‘òƒ"Jd*˜"¶×)‚GÃ$ŽDÙÌF1åe3¶ìίY“aUÅ•µ–Š‘Œ*å‹­LirI*Sai6FðW¬PZML®¸ÞFÌêöHŠšÑâL´RžR÷„¾îeî¢S?çQ …· –`fÊlf(HN>ÚEßJQ < SÐÆ*Å*uþ»?ãã¾Ä{Ù³g!Æ»´éEŸÏ0ór⨋éšÔ ÈZ(!uŽŒíºëI*t¿£º/g[Yµ+Ô°èÊêë'×µÿÔêÀ, {îo¥¯RHŽü1eL»äìû³¾Èñúž¬Äà”³8“šm±ùÝ™¶‘¶wÁÛ³êìBùF‰)B)Œ Zƒ ë«P±É²î'í|Ý^ž¾½k†%BnõÊ!’M èõ‘…ÝÖ¯¼<ƒ#7Ì96‹9sðÁå—óÞfa1—‘ʈøb¥°‚žˆãXôøU¤ˆ¢û=tK‰‹M·$8 L‰$ ¬\L~w?&ÊvOgÒÈ:;YÚðŠ€%á«h‚J•‡™Qÿhœ¶Zæ R¥ÂF¸å–¯kÍ”+efli‰ixó •Rë10p×B d³ˆ¡D'¯áxêã“>h9B“É…;Æ] 1’M¾xñ¢¤¤$œè  $†W”ÒuêÔ©R¥ œ°p1¤aHƸ7¬Äþ¤'â?ÿk°…þéaÖ8Hþ?¡oø‚ð•HJ€¡E„LV‰XŸQ®úå Ó£"jÁkÃrTeA-3ƒ'‚TG¸C_Ç$]õ¾üÝiätåò™*–62ÍXb®!xð¹´YðÚ<ÔöÌuÊc`ùœ,mÕ¸V‹¸q–Í·7ˆäØî” ¹˜ƒ+÷×ýÓô¾6|`E°`AéÙx?VwìúI5áÌ_Ì~nݾ©Í¯½9v]Û;×Z\îã0æwߪ—~xï~}ãÉQ(ªUb(CHæ%†7x<ûî—”ÄÀ+ãµøÝ<ÅDûA+rü‡ R~"RÐt€™¼dC“{)W{fµ?¼úÛѳzžZ·¯¥Y¸ƒ2>H •ˆL1kz ¸«˜J\íŽDŒ¹]vé­VQ@­i'7>¯®ˆ úKºÜŠehKƒoæô¼êŒ,ñË-ÛnÂüôn1âôí}´·vÅ­JÇäã-fà3¥Þ™T2íBmz¬HpLL½ ´XJÏáèÞhxun}µ­’V¥³Ïï²1м ëð¯ÉÃárcÅõfö8êâÙx,žÂ­Z4æCÐÅ\Ð}OÖóQn·‡gáÊC~ÚX«1²Ð.„Ä#|9r$‡åR² Ëö¥'GmzaÂÞîi}n.¾ÑŒšMë’½ñ§¿ªó\XõMïémw_~q˜ökv%éÕ¢û™/@Õæ°~äøRú"Œ$¥ÉÍôËí²'ÜH¿>’.¼ ùæÑ*¯ÛɬjGjó¿}®ÆªÞw8Ç¢f{™þü¯ðe¼¤V;3o›Ô<«zDgw9å²jÅ ¯É*ãðÒϯ¤]Ë¿h¼)$Þ€WN„ðnÇ”B ÆJà©3 _´Ê8³Þ SËâ+V£?! ÌØ5‚ T¦>sY§ÝÒhyiy9›ÃXØæ$œã²^/%Ejeå/EdÏþ%´Ï^2«ÑªÕw®ŸÕ¨mê×`úÂ?CHÆ…1Á K ADÕâËcðt¼ÓÉ·m¹3‰’—̯^ÊŒLÔfqçkq+–•3ƒ~µ6Þh2ët»ŸMš:xù¨K•-ò_Íî‘£IpÿZôãÎ3G…±t€æ‚̳Ñ]:¢ý¢}W$Ý:±ÿNª0"¾õ#á#饻ÐãvÚì…U¦–94È”~Km:žP!âh{4œ®‰“–õaŠÆPeéÙè\ñ|Taôú<‹&ïð½’(#âìF'ÃæpØ.G0ùÞÀäG3Ä –µ>™•qÖ|ŸC'´V_ï̼8Ž®QÕ3‹ªvà²í"ªø' \BM…Yú”6¬¼á"HT€1þÊÆÆÆž3ø ©9 ïtq%ŒÁ5t,¸çáŠõ)OÂúþO3ðqA‡ñ5ÄÔ†ˆA˜Pÿõ7%Rál¯Ãbª”àúɉo÷ÌPˆ#fÐ|Vñð€vÓ¢¯ Šˆo2ÚlO@ŽÔŒØœRYY´’)dc:ƒ!“Ãâ‹Üo=<ÄfBkà.ú"݆ëc«Dï)BD W!_rºQ´ëÚæ´PBÐ,ÐJìXB†Ÿ¯’Úõ\Æ÷=gõÞ·ý›¯é) ÂM| {˱Ièóº=Ÿ{çWßHƒb”x°¼pcB†rònÞ= 2 GZû;2z@æÑóIøó³ˆ¶ÉÍol2+¢S­™•*”•¸¹:÷ÖŠ£eðÄ©*††š“`€Õ§ÍLm&&)qÄîn÷m<£þ®¦Ïy~ÁÌ:ÇkÕ®øôéÇ%Óy‘ñ#ÆÛTjŒ ²EB.sêå¯ÖŒØËòcPCÐ//ÍY®/¶ÉE²ˆX¡ÎðSi]û5h]7 £¿\=©KRY‘^K*3ÍÂÅ­ºörAÁÎS˧Üê)e+¥ñVŽkÜWg£â%¶oñ©9~¸ã RV 6;§J,¸ðí~„Pcó›–ÅÈ–œO¤ÄîQ”•,MܳrÛòN†ûs.Ä0Sæ¤?˜™ÒútÖQ š›éKô’S©@.´Ót¥Hæ,c#3 ´!\žTÁ³2ÜzŸC:ÖR²ùȲ”á‡Òw ™2l×Þè[†AÓ¥g| '!¨§7¼¦“0ô4["RÉqûùÇ rÓZ_c‹ÜKïöÀb›U{?U[M>¯MnY}¿C½£éÝ8R!ó‹]IOÆbÁêBÑ\X|²åú ÀØò|ŸéåÒt«˜ÄOû`£ À5ìEZ 4űxñâ… ˜µZmjj*ÔÔø 0.ƒ¼‹ÄXÐE#)4nÄIÈÇÁŸödü§÷ÀP(šÿi°mÿƒ†íþƒq]邲”&õE,…H VàhH‰®Ù…ÐGØR±­‘WˆÅd‹Tl)E…°óCú=ŸžO +¼)í,Í(îwê]H b£8ߘû`ñ’6¹©¹ƒguÝ·îæ Âœæ)6XHU8—( )Ã;,@öè*¥MÙÎÒÄ 29wt«Ù¹O²Qü” ÏP¤dT—EŒR&«Ð_WÛ8P@çL¤82ÅZ£§kV®†¡ÎA*Oäô–y,'KÎ@Ĩ?à`…WoçPÁ„ò™˜ôî<§ ¿ŽÎl¬ª–Mk±­0â·@´çôµm§Îå¬ìvtá껺?p9½b† ÜoVÇz8Žw?½¯(®Â`úæœêMi–žœa„mê©Êk{>Å@œvfÖõùô'3YÖroΣI›‡\Ÿrª›Td~ŸJ%å4g}U¢pVÜrp:|YÙîüÂc=¦6Û»óÙrPÞ 'Qjë&s:ìÞùû¢‰)®V»¼Å Ó¹æÁbÝ3&Ç0=‚´VÇ2éÐæÙÅ®7©ÿèõ­XÃ1ÊøÍ!…YÕÈšËøÌ R,‰OÇ”’„XM䥿ÐÔüÍõœ–?Àâbe¯¹9œv8@²Z>Ë5Î×Y8ôÂìé› ŽñƒSºm•ª5³&ìXw9T Ë#õ•Ê)êcý˜|ú/´ZyußU7±œ3{ìr‹0R’ È™‡±ö„JžšVU¡î˜<×å´°„ryÈÚMgU@J? <qÙQŠ’^€ù‰ˆy ]FcF-e+až¯ߦ üZSD¹m‰OææXpoÒÀUìšeF©åb¡Â¥÷Tå|&ºý,è°x;]*•” áÀ‰f±ã¤^sôÙ½ƒØ=.)ÌóÁ€ÓâÉ@4>áƒ%sØŒA„µÐj¨v¡X† 4ä`üÕ4¾qq><\ÜøQó†çOxþî®CcùùèŽï8ów?äßÒ\7ÐÑ0 ‡¾Ð:þzïø§è¿Î#}>1| œÎr…@RV¢×DÄÑ_¡™†9Ä#LNX6S™ ºY×\H¦NŽæUyùí›(u´XÍ)²½fGyòήö¤4€Š§ÚÉ¢¶ÇWìè¿jèÕù·:%w9œqkð‚F»¥ÎjrägfèÄÂ|»ÑÀ,åDW_ýËh ùcY§ûKo¶˜Ó6wíãÑÔ(‹¨]ä»ÕÒbØÉ´K2¹îò-g—dµ¿»àr«ùͰU¶•·Æ¡ÿ:üdÄ—«âÔQ釣‹{oI;7Òuwÿ§cŽ×™Ûóúš‡èQE±ãÑ…W.êyŸí° 3ØDìG‡‹åt™ÌŽüaDœ?žÏýîzk$3S¿å·±xиAówÞZ…˧[ÿòÇ·­¯XÉ`XܯKÌÓ[„ַ⛾™MWMYÜðPÚ£!)nòø¬%'[£ÍŒžÇ“ÏõOyîV^ø#x¢Z-.µqß²L­=ZTÑÜÀ^—T*m}åÿÜõÂðËY°2z2¦kænäòǦ YXn«j¿¶óNœ_MYL™Øz1ŸYù]}dV»ýëv_ÙðÖ—Œ/ <‰É]ŒDt¯¼¿íúi$fiÖÈiënm¤,©Œè½xßõ4gÀÇCé¬0…úÇø7ý¤¨‡Ï0=1ǃþ.zN…{3Z — ï膟k 醽®ÂXÖ<ãb£Ñ¯iü¾ñ?ŸáÀ¼a¢>EvoqÃÐv„cp€éâ %v®¤gL4pA$TEC>ÍEÿ”ÆØí¨c ÷+˜BÑ&¸Õð{™`¨©•4—ŒoºuǃItBP¥ÃnG¢†Î‹XežE¥äh+“JoJÞð5B»Ï@‚‰_å©ïwÃý×m$&D<Ô®üà£ìogLj›Äd3bJ‡°«á*c á†ý¡œ‰h Ãþïsñ,¸qÓd³è›‹Œo²Tl©,Dñ ,N·Ðè`ˆÀ·öÊÂI-V‹Ù ¾+æòõº·*µØæ³¬¸–LQ§mÆÆ‘ÜC hب¯·oÜ8'B×KÉôk7ÜŸƒÒlDe„«'Ë!Ÿ½©2©[Â(T ”© ô¼GÄðšÞ©Å|e!é²—©Ha°P³@xSüZ+j†RL|¶Náq$FOL2®JÏÓ—òb[¼ãÞB:]`€¬Ø¸/ ðünÞ­F˜·ì¶ç£ÄµÿЙøbaYÄ/Á_d1ìmÇüF&.È»›E_$fÀK¾n²xï•4ÚŽ^Å…j® Mô| »ûÿP¬/_ô =_Òæ(_Óhz+*W¹+A¡(2‹ÍŽ€uçÍ飿JÏý!¢•é!å8Éžú¿Ã[Ü…\#X õ[×saâeeZñŽckgvÛŸsvø6óý´Š¾kßɤ6{|Ê\&ƒ[#Œö{ÊìL½'Úšy! n}3ºÞ_²ERŸ™¡.!È!\ Ú#ZMäkªÝµz™ˆïwð)Þe]£¥Š€¬+1Ø”&Ó¿m2§é)![ÎñKy~5ÃÏwr¹ÛÅyb,×+-™~EÐkA c‡!PÂÑíçò¹§úÑÛ[n –3b qÈÎ X¼Ñ{D½2t¼ Â›ùÓtª ca‡Í…OÑÎFR›ßæE£€"£9"ŸÁ[îg±™A¥×åÙQëðm))ÿr8nœÙ+Gâ‘³ËÆQãáIJ å"©ˆËöÝÅŽ?‡4ÞwèÖºHð/ĹÒk°mñÉö‚£åʋ͈»£ ÀÀýû‘' ‚øüw0u »SAÆwà1 ŒG>|¸~ýúññà`©ÅŸa†RßÑ!è¢!þâûß~€!®£{x"GgÂ,4Þ0BãO<ÝÀÍÃÌ¿àŒ%¬9Ç]5䆆Ÿp¾‡ù†Ìž .À`ñ‰?Ã_0'áh´ŒëÃVó¿}ÈÿޱD±SpÐŒTx}¡¸[@f¨(ˆÍØŒT£‡ÿ˜|øFý÷]'áƒòkÿ8`²CY3TT§pŒKhzv·'‚ kÈWLSLƒØ&™e©ÄzvŽÈÁÐjI)Û\AGjÎÞÂCÑ…^‡\¡Î}˜Š:´˜©-Ïû|^»˾éѧ~N]m]Ï[T+ ÀÛç-È^„%õ‡FT ‘| ¼Yÿ~.2"ˆÞʵ_¼âdÚÔÁ»×ü<Ë3¢FáPSNË÷é­ÔÒ8èXgÂ’Í#Nü5è­€œÑŽG>šÉ9.¹Å£­gæÞÖ·~aó™WGµ¹ƒóèô r’ò$J’ÊI­×U£ìZQˆÔþÓÀ% ˆ9¦BG$̘(Îgv›ƒ÷­ÑíST“Í?²<©Ý¡ C^À6Øe‡E gÉÜ%ÅÕbâg^«³ ýþ¬ÛÃ'·¿½èØàÌ.÷æœk™Öùh¿Û³:?ñZ­…½Ï®¼3np×)a‡é73U¿Õ&/"/Okú¬*·&×Áw:KT,ÉV.`F¾ß—Ò,Â9y7²ÐOäþ@Y&‹ef"çUˆ~ÑšŒ1”a€ˆµääWÜ[@R…dù·´8¶ö»[z=šu.qõл0à.?Üg{»§ðª¹YùC 2pgL~¡üƒYš¯ó?ˆçUªôå{ð<<$VñgÙenœcÕ¥eÄ=br—Uè¹Ó›÷%PДFï±`à[TnM¿¹„v€C¦vß·éÞè‘ ÖG”UOúü6Û®V£Öuy'õ “>Ï˼7’Àà^)Ùùýfx©óüdt—Ô½;S i4¼u í64›@ÅÖj‡È—UÖûô«·àÄ™¬ôöWSÎtÙîRÞ“®„Sø±¦å¡9§†Ìo¾cÕýñ9=®Í8Ýq^oûÜõ)šºˆ¨8váµ´c<’Úroêͯiû 2¥qÞæË“¨}ZÇC=.ïçvÜ_F,,1‰ªþº¼ÜÍò %rE±í½J%(õ‰™*‡,¨·8—Ú;öh3q~ÌpXs~JœÈÍYõ/%]l·¾ã^Ó¶Õ&ì$­Ëýìg-0!ˆŽÇp ¥ßM"uöO_Îg„¯šüÙÚ-æ`ó»ï2k¬¤j;®ä;Q{°Àö \ævd‡ùy—Vé4‡m÷I•¿(•Té¾Oû]¹ìéîrý,‹¡a60ÐÍË"ü˜WXnµÑJë;Dcž‘ù@D­ —ÙË{-£x‡ÿâ±ÕhMϧ²\H'‹ßKº~>ãÒ7ë‰ÄŽä0žŠÏ¨{|ü‘p‘T¨3õ»>‹:/°»SNâݹWZYm„K”ó(³¤”¡‘þ¯ÐBz•ç*¹zÂmZï÷‹Ñt"8d–æ4ÿ ¾{HM}u8+àiÔŒçâéŸòÁ†p t Ã0>lcœÈAº2dÈï¿ÿ;1N~`ŒÐ(Â…1÷·L…N§CòË—/‘ëe—àn¿   jÕª¸©8Ï/„##EW}×®]»wï^8o×­[wÓ¦Màà;†~}ÁFL:ÃAUcäÔ„7AÏñ+nY±bÅ»wï¾üòËE‹õêÕ+<عÑ2ÊNüòË/½{÷†3p†­Ý@ß0¿ò· ößÝ]©ÿØ øBÙFóR8çÚ[ô®‚Ó/H[øòÐ=ÿw†©í;îFöJkí3þP*i=1b XLäÿ$‚y²rÁ&³PvùóD ‡Ý(É¥Ø6E.ƒ‘ÏSκI§·/ëpZ#˜Ôuk…*òÂ×/ª*jZËζÌf‡Ì"ŽP+3P®•p>¢‹LÅõXr}„ŠîÑš ’بïÙ_¡?CÛO—9*\ðC¾·0’½ ;O¢È¸9|vÇS:£n~‡?¸.Ç¥TËç7ßïæ[Tcq(2Óªœ^kktT´ÏçAÉ ;˜Úg_~ùŸÐXnÛŸLY Ù}fqÚÈ‹·öKï|1åv7R¡àmbãñøÕôd_Þ²+.›ØíÀº=­’úŸ,2½šÚð7m¤zY£J¡&¹ûe—Ó”1ø´L+MkïeAóòȈ”.ÒON¯"­J)– è¶èؽÃh}Ge΃±9­3QÛ¥4ü^y¤ÜÂ4În¹?È3y™‰LZ ð@nJÅÌ ŒP´Qïyæ*ðòch6àþ ¿¾ÒföTˆÑ.‘m ƒÒù¤ÇâX?Ùïx‡”)©#Ê5‹åÿè.’»Èoæ•Ù¸¦ƒ²¨DUJV ;eq±–îlC"Ú lLAËC—Òµ0—ú€‹»oä #‡¤!6í Â/ó¦;\h688® ,ñHÞO3æÀúŽÄlY„(ò¥á—(¶P-Ôdw8þgÂõ-×w`JyVI™;RÌóšC™¨p;Ä·œDµ—=\âTchQü¨ùÝŽ2-Á”g%RÆØŽ‡™cÐjÕúe«iT±K›…F"«å5®F5¶ù.uä‡huD¹_6§ÅÑÏësÞåÿ)ÑV]œxÌ } &Ô¨9©í–«@£å¯x¼S:·ÇÚ5§æPª‹Œi¾x÷Ý´•=Ž$éìUMn½u¿¨ÑÄè¶b^½¹r»+¬ Aæm¨|v\K·gY7þɬ-ÍHD jŒ@á¯%ŠÞÒÆÑ1&ädZ¾;ôº¡!sÞšÇîá"˜–ì†Lú¶wòÁNúZ-$ö&È_xïØá„D)*Òÿ™Ù„'ØwõýIFüˆ}BHáˆùLwvëµøÍ–µ¼©‹zõšõôòóí w¾Éj3¡^e¹Ïª«&Jzgz±¶§^g-˜7z›Ýoš8"¡bL§,‰Dä×ÛD¢Q]2Y ¦ÕU|ôâ:ú¸Où`ɰЮ԰Pïrn###ß¿Ÿ€ Ïa „,ô‚71ŸáóË< H¼…PãæÍ›à¾hlÀ_à öÝw߆ñ,Öø3,¿nÛ¶måÊ•¨^ ˜DsãÆâ€^@,êçlùòåƒ ÊÉÉ<ÿðÃõêÕ‡?áJïjժݼy@[XX¨V«ÑøÅ‹Gf „0`Ào¿ý†Ë0p ù’€±è^Ç~Âú„Þ >¿Øh!ؤѽ43ÕÿuAèÒÿÛÝ—È© ·4ƒŠb(,‡rê°ý~ðv(ÿç¢eËHíâ0Ф¿Y„â¯\øæ8™Ø´G®­ tË%\¹ñƒY+B6D®ŒVö]wjꨯ–.=Ó{BëŒí¿%ã úRw’ÔðˆMsï7šØÿ$%,â¶úyJø–@ñ‡„_SŽÏÇ÷  B¯“¸ r/T¥®÷àÝ ÔÌì#KÛ:\iÝÐ{[0­û5§½h÷Ó©ôIjv²ÔoÕTàÍ;yûÏhZÔ^ ÝÙß2Ðæ;†ÉÉõä>Ü@ò]ÕêT@›´Þ²‰H¸ ÝÛ2ôÁ^jß÷x!ݼväÀ@ Yñ„Ü! dnÛkÑ¢TRG·ÁzÉsëyÕ.K#™<<Ý`µÆ¿n, TŸÙñ¨“_„À6"ô™M¥2y6ÖZPÓÐɺp“ ¡HXÀ$r mFœä¢qðvà¶qx‹œ²>aÈ ”&ôØb²ùÚb½Ç-Œ‰ÖÖàé”ð²Òz"’x†…æíÜ`Òý?·Ó¡yHV­;àŒî"yT®€aÀ†` ªT*¬ãñãÇ#ùóĉá¢0dSœÇ÷ììlÊ€@Ü‚ó×< '5j*!¢ šÞ£“@Óýû÷Cê­Q£žßÁ% GÑ™N:%%%ázTjºwïzŽ*tuݺuè?j:a€é_ý5// ¬…dH:ß|Fã“…AäùÅÚ.í{Z ®½:…"b qÉa£ÔÜ8W±/Vµlòn}À€?…³‘ÃëêÕ«ø  ¾víÚø¾ÂñŠ+€¦þùg|œCäÅèähTÐ`ÜÅ5xŽZµjÎq1΄µñè¾õýEWßÿè/0d“Nê?¡ï?~ü—þ|} ´&è5Ê*0·£=òø1‘AG¬Ž¤»I Ûuu'*äA)hsye;‹‹T”l*Aô @Y,ÄF-µù”2vaA~Ld<õ¦A¯¤Øô’nb1r"T%v¥ïÈ8‰ ¯ ©R.@(„LÊtaEƒßÇy-ȵ‰¹‘k ¨.n*2dz¼±-c(Ž<ïȬ}'ku(g›Õ¿•…(”æòóÖ4¿n‘½±ðŒð–U¨y~)Ûñõi·Žï0š’íª)/‰¶ñËö<¦c"o6}6t4ò+Ó÷€hÀ潤ÇÖ¥ÛÇPdBg„¤Íg³ªòTD_¼ã®ëzøWýu¯Ì|îû“è9%@ ,HžÅ+Äø|BT«§4qr«MÑ’ØåínÄÊ«Mou­¿‚JYPú:²bü#‘#aÍF‹ÏD¸8l;ýÛ®iÝ.,¾Ý}b×íÛLîÕl¼ZÀOPÄÿYÊw)4*mÚ »j¤uüfUÓïø¾jyžÉ‚šÍm.sUü|^©ŒGáNš=o’»/θ›°>ƒ¦Öñ}±bèQ‰#zJõƒÂè@«ŠÊƒc¨Ïf©,ª Ý…RBÂËlA²ãéBúvœdX­¤÷2!°æŒ¼ìE¢#ÛWâeº”ËëŸ`ÔWY·‹ÊÁ˜:<(@R{ìH==žƒŠ¸o8µc+ TWÕ%ñˆ¶[ q%QÔâFÇ8<‰Ï«Ê¯·è³],µÂ,2å|7XÂŽ1LËëJXµ1ù{º‘Q'ê&õ-Î<0q`Ÿ[n £/ÑFz4\vþñRê<È&s‡-‡ÿ_ î7²bQ3·Qäsó&4Û£¨ãq ×í\¸ñÂrŠ‘!ƱØ#ò©[B€G‰AŽÊÉlË+Š&‚^-6žA9jp8Xè)Ô9 &­"šN,T¹NiŸ&½µd3­(vÕœ6à¼4¾<ÿ·wJWÝbQ*·•]tmi&ò¿bÙ¢à"¥@&Ô (Öw|ãõ»—_ïG¤ý¨Å”E ¬=‰Ê“ò.fNmºcÓÍñØ_|‹I¦[ O²*°¢8.!&"”WžÞé ϧrñ¸®:óªïFv*§BMu‹´6ç˜>eŒJ–™;vlÿìm—gÑ×tÒóùÛMVŽZ‚7étÐôq¤")d¼áxásR…ª1 ˆ¶B…c =ZIbURš]ŠD;íFJ·ÀÚˆÕbA¼ `Á4R4.àò!E ,ŠêÐ?åóö„~‬  ‚pvxÜ"Iäcà~,}ôfúè q9 Ø¡þ†/¾øâ&~ÕªUEà_ØÏ  PÄ™V­ZA‡ I´oß¾™™™èÒóçÏÑIdÇÄã¡ÁÆ'Ô×›7o†²àŠ?!ãâ× hÎ!ÜCÅ ØÆÅP­ãOüŠQha ƈ ã'¨ßqOÇ'`6æ?þø¨+?ôE×é*°Ë†¾ƒîS° A/þúNÿƿӿÿÅ#Ü&}•àÀ™‚-£mbíÀ3ˤ L4ØY-rOrœV–©9påè°;y4• 2h#؀ΘÔ0 s€ÃfràÏ¡ˆDdžיR]ÒõõPõ.¸£ûn׋_–ñŸÝ~p™¢äÞPYu'±Z‰&Dé –KPUÂC®=9F÷9TŽÝ»º”)<Èç#©mQ¯‹Tn“È]6h•c||®…éɹE}žq$·[?>½7=Vµ;6ÿúØUÝö™ˆE&­¼=ºµ‘­×¡óC[Í;ød5:ô=F8¼§<ßLç» VO/¹ýó::R+YØb÷¬£ƒ‰p0¥Å˜¼ˆÐ¿Ý’µÚ„¹#vX–‰=7‹aã”ü±éþ¼5îoxØ1½ã]£ñÝÚŸ¾žÞ:wÃÑ`M&w8²åVÈ{¼˜¤µ9Öòõ/ ¯»•gDãg^ÒÌÌrtoa½Í.û ¤7)0Îþ[´ª¦Uoå±E…®’h€‹ŒÎؘê6 2\2LW¤V¡÷Ùp/3šœ¹± ^Öh<³éãŽráùú³:íš×n«©¬ˆïl¸ÿCqþÄ©U±@IDATÛ~LEE#:jÄ5!æOG¸‰$“"Ð]¡@æ]ý㤔æ¹ñQu¼… g'†T[D^˜ìŽHyµä݉|(}5ð‹…ù­Ñ¥@tÜÂ-gú-êwnÅ…žsZÚza–Óœnyt$¹ÅžŒÛ£Êòá W;X¨)3tèæZ ÌF2¥CÞ¾§“0ù³›n­XµòŸèb8QŽR_ei¥O±_PN[I^ž%ßl,°{íþ:ö¯–7>ã1§ôÜ|ôá"ÂaA¤DE={)Så'kŽÑ*8| zòí/q8‘A™/iU#šŸm:$"äbšxycÍTG-‹S¡¢CCh4Gð¤ã†Ð9Y#CUTެžPZT0©Å Ï¿åõÒSçNÓ6a5o³ãù>Œ]2’ÔŽwR®õøºÅܽ§ÖP| aòÏè=^Q%ÓŪØ5Drîà/§GÔ㬿³–¢;ú€ŒÍ šGn—,ûò¤Œïæ#øëÓj­ †Ãà;7Y€¦3‰¾E)½•£Äd1Üö»³‚¶!”‹Î€ •‹`­®6tþ¿œÞ Jë©FÍå™\~À…J B±†Ó»õX©)Z&«Â÷¦µËIü6›^΋Žá×_Úôš<–[f.ŒsÕ¬,©àؽ6ëèÆ d9ëN¬8ñÍFÊŽ£K\²#74>d1s}êè‹ÍA¡„x»fJ@0y6jÔP‡OœC/ øHƒÖTÜ< CòGÇÉÿáy˜t^Ãßq¼t麴gÏž„„àëôéÓqq\‰„…xØÑIð ¸rpX‰ ™–Ý01‚{‚êõ`¡ þÛ²1 agÐ2®Ç4‹¥†À@2®‡±j\ù?éÿÖí TxUÔ+*8D *èV3Žð'¾@E bA7Ý¿vÀßPΕƒ;Y­Qô” ²' ‡Ë$h4´¨¹*ªBÀd7™=6]÷‡Ij­XIScf§UÅåÏÅ<÷î;G¸¸ˆ\bå)B¤y m9s!MÖ±ÿþf´°¬É1µ•¸rïL *oc¥Út@¸‰,™•¶nÁìÑ©û/¥RZ#ä­v8‚B{ ÔETJíƒæ3İlVÜÜ“3ßô¬®“ÚO‰ˆl¸ìØHÂnEŘ6/) gÏÙζq*yR×P^Nø¥º‚™­oY…F%$‡T ~IÞP9s\“Û ûg‚ÚT>Mk(éµÏäQˆ¸f­•B/H Ñý®Ü?:8ªÇ¬}w×QáCLÆwIÛöp.(ôp“_m—ˆ¼…AøLU™‡Î—óþ¤‰0ÏZ/mó o)ÎÔ®•€J˜š‰ Ò·gÔh,Ôé_m½™A'SHFGnÊýaê—WÞî’Ô~]æóYK{í|c|)DóÁÇ º2L‰²¸ñí—íøii÷Æ3Ú”Õ"9×_•¯ˆ˜Öü"Ì–ënŒEWg·Y>í2]¡ÃTg.gzeR9SV¯Ë“ìàÐÀ€Éøà}ޱ«”NØQoýW÷5þ¸êc[è&ÑfR†0ÜÉ ÷¾´­½>ÍXbTÆÁÇÃqƒÐ~k\#_š&âžf±áë®G@à'µ9ä7ú£TQà®âcã1ë/¥Qî Ðb}è,Iý¥H´$#Ù?NÂR\Úóà‚ýíVÖ?€ÑýÉP:¨ÆË%®­V°É¤®Çsö_[õñ’»½V¼8ïr7èÞ…Q¡±(¹€êÑM¡d¤£¬B%UõQÕs#"5•>ÈD.©º†8yÀ©Œs}¨‚WW‹Ãgö°¸b¡ÔÇtn»¸‘Δ~Z»šòFXêâ*Cªe#]P?œI ÷jç›/¢šJc`ëFîãPB·$/¥¢##æÜùšpZWªŒ"M¡Åf6Á3Í#¾¾ƒ]fC°{6v¸p^\YX`Œ˜Hm‹ G¼é·ºÓ­á'ýZfx±£G÷¢xƒž ò”ìüšŒÛY}í¨oæœhš6ôjö•1tiá'±óËè˜Dª‰0YôH€JÜ@KøT³\ÄuäÊvž‹…‘No½mãÕ™š,‹UTðèÙz‹Éâ—”ê Ϥ;u×¶9ç&Î̇ÓÝŠÕ… â-,f;jï±5i 4! žG«!ÅÐn²;Œ:a8 ;4c0G]±b¤=H“Íš5C hƒIaÄÈÃr@¾°4üwÍÀG£2:F³x>ñ tÅŸ­qjj*\¥–ÐN¡á>€-€¼Ä( ‘Æè'܈Ëp x˜´ÃŒ+qàœÇ'nÁÀc`0FŠ{q"2îÅÓqK˜ ùtÐ74%A€ð7¤S Ú€TÆ ó:(Ò¡œ•ŹtÉÿ Üg9hóJ§–"zض,à!-Il;QÍŠ#VŽßɘ2`›ÁhôF¿:y-oëÞLJšdJë›ϧT{Ô; j7öl7ÞîbW(lªÅ-m}Äo°kxQ""7é Lmèz‡!‘ӻД—|«ÕÄT*p"›OHC b‡(özD-¤·+ aPuÖÌ-#p#/^üÞù¥#Øö8°Ã­D bfŸ€+§uÏÞˆ ’]õºSbÍùn,%RhÇŠ¢yü}N/ºß{\‡T‰K›ÒjŸÀÎÛ0ìü÷þo¬"ó†—‹ÅD#‰¢¼ˆîX Âè-“HÕ)/‚¨8¢_>†‡80™èªšmzî9_•¿AÜÿc(S±ŸX^L›i¶êâŸQ¡Bô’.Ù– }öà1£Þ°P…ÿ‘€°+“§¦_Î?ʦÃul«ˆ@¯% ô¥9”´¹ìÎ8:/™ÔpÏÖ{4¯$ˆûÒ¡ûЫ ·×g×Ü3ûzÿ íž¿7äoü¹¬8šñC°Å" zG‘T©â˜^ÔŽBª„þãYV»†ÇõD¥v¹d³3<ñÞG0ÕÅÅ&tvÓó.ð\‹i *‹/(¦E<þó{Lˆ2Z‰ô ßRæ–ÆñvÜXiïÄŸ‹ñ\—ºèÀ™ ô|ôY™7†dw¾ŽÒt9£.é†)½–—ó‹ÍýÅÇ1uádð½QH‰©,(S> åU1ùZeÕRãkºÞ 71¾HøàiðvÞ•Mk~«ºD´:ñV¾»h\ËfL ÔÿêÌ£m0n}˽w„Þh!óš\ÜskôüV·ƒ¨-Á|çòÉY"³Î:¹w–ô3oæ®ùDíF&sì&‹Û&åSvH„Bœ~Ÿ H™0pAQ€HD‘">çëæérµ_ïÿýàϹð¤Ò{uµ(ƒ€*,†©¤DG )c¼¡ XxXBp9® ÂËz_~a³È$‚“X8˜’C%&6UÑ–+kèÒ"nT²ëÒ~*†)¼ÑÈ=žÔê¨6ÀÞÕùü“J? ‚@\QD÷…˜8œ¡XêE+\ê=ÙµçWÏ ^+`Å PQcÝ«¥eºÌ ¿—8JRFÿ¡ó—Œèö;o–ÑU!'û.l£í°H¹Á®P©n;ŠI#71Ð Z`Êæ/Ã4n¸¦ñÕWg¦·»™r³åœyŽ%£ç1¿ÄÏòõÜb”ÿ±å6U2Ñ!£IŒVî€ÓÁqÜHÄÎBa Ê¡|Ò+0?`à.ÁÀªââbXdaLÝ·oß’%K€:ð]9r$b“€L¸Ÿ{@N¸ø„B²&&žb€"|G?ñS‡–-[†NBÞÅŸè^øzà(:؆ºªfÜ‚ïNâ'|Ç'pã;7bì@ÏBÐóëׯñ< 7ü+>ñЄ„Œ7܇p—þë'ã<ž‚[Ð&΄;ö_/þ_;ƒå3í?˜z>û<›'Ô4µœL(ÌÅ>Í·€.—V» +Âóóßö°ô¥®Ðø?eK‘Â>àA’~&'À✻Џ°£ðËÒ͇çA†pŠÄ‹tõÐ"y 'ZLÕz|2²íô¼ëˆv>;ñ€¼µ•'dqŸ‘Å=Î/;Ö{I³{‘¼:¥žR³8@4³šÞ,ô¼8ölj0!Ѿšk›^cî! f1’ N:tu]02yî‘uÿH+ˆZ¶;ÈqÕ_Ýà¶eu%^++.ïû©”|àF ˜Ä"ÛO.¥g˜¤\òŽbOÌþ­Ã¸Þ©DÕ'±ë8­3’[*Ѱ*ÚÍNä–â![ƒÓ+ârg^î³î{B}Áê!3ØÝ?áÑŽ#7h9E`¦=ÂÜ€ .RÁUzÖ@æ†A9À1¼2dI¶S»:pnë•T<­ˆÝ{n> »ÁON³’v ïY½:,ßrg í6‡ ïž ïv\§P$ý<7„¾ð¤19R†Õd†F=ßø‘µ×]T ĸBìéhîT¢Aø0mBþšÞ—‚ìX®ˆ3·ÏÕ2Áó÷[w~ºàA…JgÆ)å°¡û7¹mJ©Ê$%{î`9ýpÐZÞp³Ø$ˆðT´—ÛW}~ÃɶÆð: Ãfod¾=~Ä¢D8dˆ~ xKL¦‚ØißÖýN~w…°`T¦A#)ж3ƒîàGtu¨ÚË\Ôy/ÃÄåó„·™§ëŇînÅBBçq µ2ÈÁ'éPEp„ŽÉÄXKã··*`¸â¤ qj£Ã. ‹ƒeÎ÷ê«Ä.¬aªÏå¼’>!C$ØycAo0í`*az‡9΀½C†eÅQN¼ýå²6§åâÚïo¶ü²}âU QÒž5"›)äi ‡·Ó¯ó”Óg7³D´1åÛx’óg¡©qm6BU#¾ ±À±˜É±ÁE‘Œë¹z÷ÙyDÍä Pë7@”ÌÃW6A¨|…%$ NôÂüË"kaY2„"Ú,"w%^—Û+âO,l.Ÿ<¿Çθ|k.W:äó´'¬írfì±ck£dÞcšÂûŠil¬ðàž¡9"ÆŸŽHUZ»ÓãE>W)™w»Ç¢^+“®ŒÒlÊ¡û›ûwuüþ¬I  .!ÈG M‡P Äâ o$“X\0*™NC‚Õo#0ÅÏ캑ØLw gøcP?wȶµ÷&Ò Gš¼v¹p#Ùpg2íFŠO?’‹ ›. ªv ÐY\w ¸ƒ¹’RÂó?=>ÚX(@¨~AíÃÁ> æÐ³~Ô‰M>¢Òÿð©X­T ‹Ï°Œð;%™]4ü„]i˜TŠƒ 8ÃE~O€Iô),Yþ] KŸxbX|lx€N¢«øçÑ & 4lý…ù:gü ä€ãtÑè‡0›°é"r ?aâp~B›•*UÂ\ ”Å(ðâŒ"Œëár…Ÿ`~Æwô¢»a´·ó±WøóŸL îB›p·Öh4¸/ïŸ/øßÿŽMº „!É1´°9L_‡Å.R![jáWäÀ Bè6½‡ÙzOˆúo{ÎfZpx¦­Hÿ¾}Ž?:%àñ¡r{Bر]ĤØú‚–)„Þ;q!BìK°÷ä×?~&Á  ~7’êb¿±X`ÝáÈä÷ …~›¯œËVޤf—¾T42”ëà†…ªg,e Fä±;S±)gôZê-·ÊÜÑuâzþnEHr†ý¥àן³çÁ¡+Šø¤ðâ…C'KÆ Gqí VåÓ#Ô‡Ý J©×±ûÉTJm' deÿ{o%ŵïîªv÷™éQ\$$Aî  \‚3ÈÀà:¸»K°Ü% Ä圓œ‡Ƨ]«»ªë}vw./ëÊïwrï{YïÖêÕ«ººjïï–úºP:7²ÇÔm—óè)—RzIÚJÊÕOp~üì$|Í{ó ²Žþt ¨ÅìÖ‡ž{/§á‰E_ôÿ}Æ-îI•UKÈ»ã‡Í—i(’ךŽipMyžžªYóáÉ2¡¬BéØq}.‡^€óáÒ¦`z4g7Ô¹IaAþˆ·‹œÞbÄqí»°Óõaש»ÎçQbŒZyZ]`¼Ê0¾òJè6Ø(J& z5EýP …ÁYùP YIaD1Z„g’£Ÿ/¤ …õNŠÂõŠ›í®a­^øü{{ÃØËÕf—a-¨>42R|ÏßðÐ;9)„CÅÑÀÛ(9pa5žp¿“ò¦*äôHb ‹)—KùH ;¬ãl‰ßbMMý¥ä~¯žSµ&¹¿,râHžÒ(QJd>'¯3H^Šgb!ÊÉAÑ52V#›2ì*ò$ö 1Õ¾(ÁíÁï%Kk ªèp1vÑO*±/QÐJáþŒDÌÔ³;ÑíYØQªJ42éø×{@­± d*ðU¡h(`Òyx3æø¦B©¢ìšTËe\™Ka0­;3Î9O²[n™x¤ç´.[V^ iÊ$jE_xâΜm—úÛ ºu½¤Ë;㎽ *º ¢úa,`ÐhdPtâß?é­)Iˆ º wÀáhYøð&äü„ò„à¿Bþÿ`¡Å€AÆð$(@ ‰¶'è^Ç O *pDJÜŒoÜŒoEL Ó/š…öð9â@ïܹ3¾¡…ÃÁ$ ”ärülÓ¦ nçrõêÕÖ­[chYYY˜><•ˆ*Æ·o߆·3&]X¸/^¼X«V­kƒ¿Zµj•`y@¤Ïž=‹¾Ð Ž»wï‚Iô‹F^ÌOâß?~ƒoÅ…Ù8áÕà›ÀÓüŸåæ?¶ð§Ÿ9cãyáJ4NWW ¯„ÆŸºrá9B©¿"MkÇ^ˆ[0( °²/Äúÿ[4` Þ§¥ñ>Ègœ¼áX‰^Œxä2%f€(ƒ—2³Ï(sÊLI¼yuhEÕk_Fß171 Y@=qTÅRÖL…ÃN¸5Ô*¥Ÿ•܆U—‘„ýrQ¦ :C÷ˆ¬^NYP·îê|Æ©¯^N'u#Œ|â¡w‰ø.%-"+¯L5Òn¤Ä•º™òú‰Ugûl|ÿѸ«Niv~Õ—](ª…5:ym”f!à”'f2#`ëÙnÄsÛ/|wè‹C F”œ¾ŠOT8®ñJSYrYVÑó’-¿,pê5:p!éº* u:îX÷åpJ†¤ëÔQx€åäÒÆ#db×ck÷wÇ¿£š,æbá=?-ä¡Í×I”C¶b)&‚Ñ}zs#År2ªGîÖCS' `ø»Á%Œë3rã×Û\>ïÄ>Ë%^ïW´–io¯üÕñ¥,)úÉ¥³tì Ì%ñ‰5AVÞUÇ_wRUê´QõOzLùbZÉÑÛË©¦‰¬=4›!§ùñõ?MÌé’g´ÅŠ=¿Õ4ô]4䎊±¢”þÖuYLƒüÃÑ´pæÌ–~E´}mÅž[ãüL@Îð¾€Ã¨®îGx*h,ÖÚòm¿ÌÁ¦›úÖ¦¼‹cÑ¥²¦Ëß}á²JZÕÓý0ÃËà^›ÌaÐí™çr°%¤¤{‡¡g®íF§žˆ[i@b„˜ XBð~ @·SI¸ͪ+©¨¿1-Á…è7¹Ë‰µD ˜“êÈÏ;Â17ëPNm¹rý¥9Xˆìv[©[¢¬)4)ýT¼]Kù f¸‡Ì ¢KåoÊXBÒÒ¨`¨"+OO¦Á¨ÜêPö¹x¤oó<…M<ðÙBJWâO^ 85߸ÿò8úÖ(ˆÝ—Š ¹°Îᘠ´ÊÝÐÞdj£ƒ¬!“SÞ8˜‘‹…>F¡£ &º˜‰NÞÔÐF'1ªþ’d­Zž¬ßw)¥ß„ôòí¦O«Õ %@ÄýÙ±C†oº°#í:¼»`@¼¼½ðã|¥ mJÌæÉ÷e8ú÷ÈHmЍBéƒJis’Uá–,=={è;cD$&WdhŸ«?zm»:¹´8ôhßݽôõŒó 1øZYiÖŠ ¿Å®}ú¼ÌhÕA—ÎÊTÔ÷™*PÓÌçD±/žzâðñfÞhEB)ÂÁœcùʈ€-¡&+oŒÓqÉæ[³ü‘¨A®0&ÓÜzV«’òxkÔ¨Ž¥¦Zl„ÊÂD#G¢JÌWŒ÷{%*M¼f$‡ñÕ@ d‰oP_ >àmH§8O´ŸVtú_ì1ñ8f‚ö $‹#!耺$‚|¥ƒ´ZhècA¨@í*šPñÐt‚ÿ)¢MŒ´3Aç@,Á@á ,h¿ Ô½{÷Ý»w#lQR ¦qÃÿEˆ0”ä7nÜ@0ܯ0"€4|øp¸k!b äöܹs·nÝ:|ø0ØD|p³fÍ@,¯\¹:0¤Ä„ äõ Ð`ƒ Ž;vçÎmÛ¶%h3f?Áü§ãƒ£H0M8IH“ÿéÍÿ‘ò ¤…"mì!Ø•`€Š‡ a§áM~‡ƒo…Êá*M¶e`«%ö"`“€£ø¯@õÜD‚FÐÊâ"Zªf(š´¨C‹óKì))¿ºäs—ë¬FÜ÷b¼õbñ2¤ç«£”æÌˆŸ±‰¶w_[®ÌTr×ÁKÿЈF9›wy•GýdQ‹­Ý5³õ"©Å´òÓ±”l_+‰³ÐÝn-ã5[U/«åz¿´b|÷Õ1º¢˜Kª°1å´ž{üêò{Å÷oÜÙ9­õqÔ™™×ã;OÀ•Û¯@UY2aÆÔugòB¬ÈhÁ‚_>µ÷²ëgcVü@¿j—…1:¿›hAÅ1%À•¬—l¼úјftzJ×» ‰ßEÏ‚N'"b¹Zt{'wÞRÂ*R#{Î/'ZæAñ—DÞŒ‹Ùj «à¡Bàáq¦Ø ¡3$œ‹hM”68ñge Ð5ÖHPF‚hn„ôpþ>9}­3ÚZuuÀÌîû—nœF• ÐßÂ. bÁL¼(äR)$i!ŸÔW^^I!3'iÊYçz'èÍá lŽG¼Ø"AÌm”a—_œ‰y|ËWïÌo|×±æÊË3(N$$§ãÖE§&Dpw1ˆ“Yä~üEÄ »»¨ÐÛέ§ðÿF[”‚ä!3ß›ï|Êè…ÚÕ+×xòäÇ•mš*ϱê? ðÊÆ7f˜/Ae(º9 ædx/ˆp—†i“ Ñ  RÈË”„1’Wg#" 6 ? r2h HµÉCJ¸c¬M%ac’€.EÆØ‘÷‚Êp.’u‰cÞ<™l´Ïí~áYä¾ÎÀª¡ôÏ$žX¾>Ùüñµ¡aMÇjDQ„˜ «Pr#$¼K fNŠÌÙ'•žH¦ëèéÅ”Ž2±¨Û/3ë)䈖 $zýŒÌ’/c‚6Ö<¿ù°?šœ–”{¢ÿÈÖ«·Á«Y­àz¡Œ†ƒðCPØÍcºe{òrøR³+yõ—_~â3Ȳü3oŠJ‰ú¤®¡sË’çe¿iÔ1?W®” ¹ ½¢w›±oI¬À³Rµ ŒMرr¨½áÿ,ì8²‹¸ñÄ2Ôxgdz÷+6Çž°´R’Τ/81 ×çv;½àLOjxÆ Ž;ªW&¬Š"‘íçá´Â&È‘9£<±1ê"R¤é–-z# *;Ũ{Ø^‡ÒøHª]:ºÃb}ШTUòÈ×WĬ² 4ºãçé…åÅ•SL¨â_$IfC¨L?z¿ôAÝvR¿j´ÃêºÉ+;>¼:z5¬V–ÔŠdÂ@lñ[Ñf0ëI±³ô¼pã„sß›ooºøÝ˳t *_ª _ªÔ&ð ÀfaúCâîX<ù‰Æ©XªöξÕèoBŸeëîÎ@ŲŠ !°B_,ÏA_b"\ÙÇè0‘ržâÈ÷HåY”o€3oPØþÓ‚!¯oŇB:·àÚHøú ˆÅSÀ¤Ð¤< %[UÔ:Αšê×@³éáÄ@XF«Ð Tk‚^g£Ó(’Íw¦A¦D=Ž édªˆêVEÈç—èà-¯Ó¢¶¼–˜ù€¬ÄQ:kâ’g÷žeZ^Õ©^–Fu÷ž~—l3è”ʇ㕣lD¯Ð†£œÞ`á"¢ Â{^÷ ‰Ù=1è+†ŒÝˆ¥‡Ï¢VÂWu‚N‡üÈäæA•ƒ¾°»\™R…‰É%5­ºÒ–.ºHòº˜zr ©_8dÕ¡£L&‰³1ÚU8+"›O.'ÅdNë£pι݇Ô-ŒÀ5/ÄHÕú,ñ™¨Þ •rãÒaTO®Ïj¼zô-Ã$;ˆ>œNg,‰rªŠHYt*ïü|*ć}a=Œ)%¤w“™'¯-]ÜæÞìc5ç¼yªRršÇ#qGJIʃ2ñ7m¦<ïÌúr“Ç,3Hâr0ìÈ‹·}={J×]«nÓ`Œïä°¼Œu†-RXÒ˜4,±‰ÄíýsÍYÀ}£{¯Þr…Ê=”Æ`¤ø&dä+˶}?ƒ^D½ ¨ Â(…ŒerØÖ°lÞ(‹$טa ™#ëžT†¬Fb¨d­üƒçˆBª3(SQP©Lx(X½“wãá…ˆ·¦÷)s¨°ÖéàÓªgü|ï—ª>eRzf±ËÑ{6}7“¨c"†¿àÛ Ç…“F¯Jù³âBi’Ü­s9¸òÌ‚š6\œ n/¢‚ Z¸YY2ªëÔ­çò(ú“‘2IBw`z¸%½ÿåDªû°Ý̲§l¦­^yQ‰Õ$ Êp ºúÔG”†!‹Eë9Gî.t¥PqJÊb/"BEª4]Œ°‘ªUuütO¥à y9—¿ûî¬u×–#²¤•åŒÒ[âWªRSy|ê»-&̳A-«Tðjºy@¤(àÐQïjW`*ð„X8°ò6Ø¿ñ ­Ã¾u¨ÑÜî'€ÔA¤`°’*¥e%¾¤4dnhi|qu£zÝ¿ùþ v`•ž$’&ؾ­åtÈ]tbþšŽ'&íî³~È“|ïwy_¿CûJ‹o×)±–˜d–X°ÊñNï´cŎᳺï]råƒY]<,{xìÁ|l:™H÷†„·Èá%°ÈÎ>täØÃç7¡ìeDJ9.Žh®è¤N㪵˜Çúdc•_íUÒK¹G;÷OÁ .|çÔœ³oS4%åË>×—ŽCÆ’bE\&–òO"À@ûÀxÿNŒÄE Cø¼Ð«¿ }ñÍô¯}¡ `‰\: [HŠhá'<° ö%ÄmøNÐ'p/®üë'è"Ñ,(+@J@•øN4Žî@W.&†€ûA*À€EÀE4vîÁ þ¶Ä@㳌GpC¢kœ µÄýh'x¾è(ñ×ú :똷ÄIâžDGÿéýÿí9:A¨áýQÑÏ£V7Ö+V"ŠÿÅïÄ©µ×ŠÏD±H1OÔ\|-¤ÿxË¿¸T|‰>ëðƒˆvFdN¦-»0ƒ˜5Ñ‘ï…ó*ý SA ý"/½>^¢/x¢‡§Ð„¾Ù‹>U.ˆNq\Ó…b>©ýõÅb˜®±Á2AÛô ¢­áóÅáµ>ËDñ ÑØ7–Š…bÇ“ÑhÔÀ•œ–3ÄgAü¬@+ØÄñY‹pó j“è`qíÜÅ_Ä-õ>_ª:¾Fzu óµò_Öúm4Ù"~/ŽOÚ†6EìÀ ‚å ˆíRÏZ0·Úþéö] ^9Ô?eVÿW³qÏ[õߥӈøY\_í¤xFœ—ò¥ø¹¸8}®`Âß6L§`cOEĘӃ™Ã¸úgÌ âÃÁL8ÅVUûÑë¸sˆ~ãS‡U ÂÛWžÒÏÔ—v‰¿Šc4+ÅÇ¢ø›(>‡T_LûŃXm|\âˆÆs²k®˜ªÉ[c»´\aŠtݪj‡¦¿ºƒ¶€ÇÍA±¢ ¤]¸Äì77‹ßŠS3–њŒa8?Š3joÆá3­Ö)̉ø\ïÇÿ*äC¥aôæ c<˜"_1Úéå…¿T[g優K?PgOMÝ4Ù¾c|êÎiu÷ ©10$v‡_tñt]ôQlº‡¢ø…8£ÎŒnhÕ1´1,'ÒÉÿ‡8"cí¬zèÀ¥@º<î £¢GpsôNl:xx'¶sXòÅêåˆÅñ–K8ÚZ‘8½îš‰É«fÛvfK÷-VÞ˜©;;$cEßš3¿–M‡L·¤è.u‹Pj ~±,Ô³ú@:êG⤖[qC ¼ ¼y16°èð¸ËC11 ذ:èí£o¿€ I]‹-GGæGTž„qy.º:ô±_3º c%ý‰Íï;ا,h}n¤}ÝDÓ†å–=›LÞkضÖnìb1jbŠ]b—Œ.´Ùø2Åðvcñ`á~WTÞ‡÷ºæZz¥Tš¹†î=,öyØÞ6‡5Ê¥‚ÅyBxͱÙ*ÄÁiyc5{7¥~–Gní”UÌ%ßͳœ¢ ?ÖÝ¢/hÐÝ31·áÁ{ç1g¶~XNnÌ#W¶Ôüí-;ÄX”¾§!w¯_lZi Ý ñýŒ†ü¢€¥¦?ñrý$Ž‘-FGT€„+ÄQÖUØØ3j/¿ÃL éX0xa½x—½Îéç1þ?óHP ;З¶Ç9t«è?A @8´ãÏê•…¬†Î@0 ¢ÌàWÖñ믿†ñÙa%ýâ‹/@A‰¡ˆFäÊ¡{܉ðAp‰Ÿÿú7Z‚¡Bv|!‡áô/Ñ8XEÈ” ”øùB–Åp?þ0ð‚†Ö£Àˆ 4ظ9Á¿à-ãAŒDãß ñ®'šÅpÐZb6Ð&Nð8:E›½~¤÷ÇÑ&zÇ 9¦ü»{þç~‚¦k]Í´A-&ø‰„òÍ&Ìè¿ná°½!Wdp£¹c;/ëÝl"˜M à ôð6Ü iø?‚ {\‘÷ŸTæÈ¤ü©v´ž°#bdô¨ôH‰„8¬NTRmÈ“2pÆÍH Þ¿ÿÜ*| p ò0{8úˆŸD\&è¡á£V(¤÷€ž)¤ñümåp¥ü/£ÒQ™¨Ì½ñÄ HR©qR½,<º”$Cþ ÊÔ1/Ó½>0dßÕÆ[¸Éô÷–Înð©ÿkÈ’ÌëÍD+Fø›k&§¤  ÁÀÑö5¤õ8µB¢Ù+ßïײÆù'"uМƒý}¿øÐµe0ÝÞüæ(d%ä£nKÝ¥Ž­Ï!qq¯›A—ûÖ:H•R3{4):eP•O^‚‹F×L*Q7‚T2V§Â^JHù¨ùæ¡+‡¨ $XæŽ&ð3gÜOèH’AÓõ´ˆÎv²i“v^âÄ{,%Ûö-ú +¯N‘9tÓOtªeª:yOÿ[8 C+(ˆ» ¿.&ùO1Ï*8¨c5ã–¢…YHo\«Í“;}â( Ìzzpã9³%Þ1H”f…/â—)Xqº|o——pe‚ Q0¢ÎŸPÂÑòµgìùn™„cW]¶þÒ‡+Ž|°¹AèúÓÙEiT¾E”'v íúÒTâ¨(Ä18TãŠÙüCˆEÁ_&½ýþ½'tÆÌDDîµ 凑¾¯/ g]-’Žmúà,8Ö6K‹˜J‹`Ô°Oã€Ôx°æÎäE—?\vwIj[rº«–Íúøê¾<.†ÂðçGµG-× ‰Z™¬5ÓåHáolB|¹µÕz©òê  CŒéÄB àKfF3rü'\õÈOCt& VÓ‡›¡šEu`760ªýQ °è—îæÝÿþÑÖÛ©´Šé>Àt¹p’nÔp@¯Ã¨‚©CN~OJoÄŸðÔˆ†n`¯…˜O ŸæéÔ­Æ8°õÌ¡mæ¨b†œ.ŸÎè°uü[‹ÐÑKU^Á[„úÍC~DmÆ(>Šƒ°C3dï­)V]]WQÌHÒcQ™¤ñnô›³'ô˜Š5 ª]à ’ÇÆ8•dÞþ險zÑ&x$é’jV’ê~™ÚüôÀ‹ú÷žŽÐñ ä)A…&B*eB™NLj«bJ>Š@DÒ¯M:WJ’Š„éØtˆtÂìQrDÔ0lÀ‚¥ æ$¬4ÝÒt¯h‰R×{ÜÇÐ2g쇆Tx ?íT"¨~&Ð8;D”?§×ø˜¨ÿ3šC ²‚‘ÄqEú““›› ,|÷î]ø7á@Ih†Q¦7AÜ?QÿœÀõ ‰¾Ð ¼ð"œøëI{ê‹ ~qž8I<‚sœ$Î_<júÇ›t:1œ½8Á 0°|ê瘩BB¨´Ç0,ôKø3Lð‚RòGwP4ˆêÞpR±Gˆs<#̳õæÃ-0‡Ärkáà ‰¯V#Ræ[©•ÇÁÃKIB\P©ø~üæ¡úf¥Q:sI”†P¿g9*Σ$¥lt{ƒœVíeåÂÁŸg&{ýí#N}Ï\?S ¯”z¹HÀ µâåäÿž.+’Q ‚ý`÷Cá7Æ( 3Ó…¥°òuAÑ(Žv3’e?¬DYs4K“HìÇ,ªRÃSðÒ!bÅ+‘Ûœ¾ ³Îè÷…µÐ¬…Y& §Ê%´ôü…=e¯ç½ò-Ã) ¯Mjù‘Að¤ùàÙù@í«ñ5"!Ç3A©Ñ´íîaŠG06¨sÑB9©õ´²âQf ù°Ûò]7²åRµ<!tb?«~¢j@ü¤ä §’_Δ…£F6²é’m§g}øÆ‚]ßΎ˽ܺt_j)MŽÜ‚aƒ )‹³pué05H‚o ¡›ñ©pfŽÊÉ’{3{¶ï{íâ±-rXônçkÆ uY“ 5$¯›DÛÜÚ»<’’u7gƒ®¾ßeâÁÖRx@Ú*`dÛ½j°äQ¤l¥@B÷ŒA©šb¯Kk}‚Ç*O:öÝz@bõëç£â ~þp1Em¨<ˆ½†™çÀZ°|ðRRƤ‘— ^£c¢àâ 3s¹tÊ«;^‹ZPÕU6ÙjA$Åÿ^ÿGÏoѺ> ' °ªJ¾¼OæÀHñÞ õ+޾oVõd0{@”@DQb2†¼%%R¦b™¨!#¢Ñ&wû¥UÔî‡bH …@”Ê´Ó¯³J@Yéá!³:ì_r`±•"¿³V Å¢,ꪩmt)cdJŸ¼¨¨™øÆ'fá Y+´Î²KÏ÷B#e÷ÕjÖ¦C@´*^:em:2Ĥaº¼¤º¬©9ñöAiŽÂ碪K ¶IPh Ùóã&z36-G¿ ƺÕpõ©ÆÕ‡oõ<à2®Ñ:]Ød—Uy#4rmío2µY-¸v3SözÌÅÞ¤¢ƒ·7I%,e,iÉ.¢ŠªY­4Jú¶~ìØ.!‚ÖwÍÊó“´0Í¢ÇȘ(Ém¾}õ—#ÌO³èÌ!¾´âdÛfW©·=S_uZ“³ÉÆ—¾tSeæˆñÉöÁ6ñû>‰¬° ­aÕ|çŠëÃX­\«=IÎyõ¸Þ¥WÉ*-jx%K—ĩ䅅‚"¶èvq-6(HäRäÕ7<–ìMËÊxs^½#!«zùñ‹z^c‚ˆæÔÔá =U<«‹#Šð®+¹Dú ä¿ _?‹u ɰúókjÈËš ¡bÇýèO‹6fP••~â/ÐÝă Ì0¯âÙ*ò¦¿t R@D„Ä âŠ/\ å "ÔV-J(¹Þ¿¥ÄÞÊÀ{êÊGÔrEDàáGÃÕU:d|‘³rFd¥ õÆ‘òòÁÆ‚ßD`Ìãþ,àeU´Æ'¥‘xm 8Xˆ“EVI© ì0¬çòÈFD›¢ôîÞÃ1.x ?ˆKú¶û`÷]Ú¬ßs¦€SˆAòH²:ëÁÝ£‚( Z§–„¸$ê:-˜yx*uœ±ù಻MÞõÅêÍú§é«h|6&¢ÓH4ᘠ—¨OœÜLÁÃ$…áBhVÀUyœUR¥©èµÀJ@£2°Ïd!$O+¬c¶›fu=aCäGˆ¥5É鳇A9Ú7ïvåæY´*qÑFжc¤ÌW€y`-(©wüI%#ºålÛ²hL¿9[ö.¤Ë&C{m*ؤ’3´“ß[¹Ñ]fnùbiFC>ýnO÷Ö#6ïØœžI!¾væ…q‹ òñµÅ”¾–‘5Í>ó Fª\÷Ùl Pàü$eɧõõ¦uÞ&¸X½ÓV¥j¥uæ)ìŒ6M¾þÄLŠþèb’«ÉÅ»ôà€²ŒÀ½!RÅ ÕC:®;°^äRò΃Òôµ¸r©œ7OépÎ5ÿp eÓÞ:˜®JÏn}À_¥bÓÍI¾B¢3µŸŽÚÜû³Hy$á¶}9—N2¦:FÆ ›½ùÐb……GÀXÞúT&HÙógTÍÈvöY7¤ÿÔ=7ó¼^Ao­–ýæÒå{g&öƒ‹°åt¯öo²àØW ñ¬§ÜE­1ƒ$ÌyÁuÉ¡  E„PßI"ƒ‹#2¢û´í'7¬:9•B3™ÐüäºÏßÅ›Úfó¸Ö«lê•8ÊŽl·Y 7ž ƒCF6û)GK×a,lU’áo.—³z«ZjÒé8?$9…÷ðåÕ€ªgë!§íÁÄNêë²³zt?w­ÆóŒ0ºíVM [èx|äβŒW“ÜÃNo92õh$¦§þÂXúÝtE@ÑqøÉÈ÷&m;³&†7Z§P² ÞEa0"ÏMÖÏÁdòðY†?”_Oi€,~ï Áhßd¯R¯Ûl5c¨`UõG×Ñ˱a¢{¾½¼Ëçóï´ÉmrV›)n¹ÐN&æ_$í_é_#£ûqÌÆÖú¨ñ'F¥qc¿cã>é‹^İU!&©äÈI$~¶ÉÈ–Û·ý2›dR»½“;ìÛxe<Ýñêæíß™×rÿýÇOÖ|5<Ñl4æ<¡ewßÙÞïΈõfݾåFöàŽ³°'æ|:®Xë|§“›KOAYæ¿uÌj4Ìjwh29][Î?\ñÙ$0î>6ÂÃí/~üžŠ2AÒdºV¸AÁÔŒLz¡pþꫯ@•áÓ°aCÄ mÞ¼¹R¥J ÄÐÇâæÿ%½Üñ¢@ØöTñËÂÇzˆrµR0|±@ƒð„PÑ! GŠ êöç“=G(#+úƒ^ƒÚˆL] VID¶ðé³´Ì,dð3¨­Èt…E(Eyj´+ Ǩ1<™hTÞ¬×ðQêH%‘²â=’›³>oïx† ¢ ñB±¨.ü´{8»úQ é[+œ‡ÙTS¢†xkz·å»ÑÎ Á2fB—mZV?¥óúUW>¢xuuËÎbc -û~ã샷–›céZ” ƒÈ%%®ÒÀà÷?Ì2×xôu~uciC7VKÕ™’•ÏÓš‘ŸpŒ ÎÓø œ(TÍ%—o¦S&ƒ«Ï[ÿÕPœNkQ·  ‚z©iš.©Iæ­ˆè-@3¤¤"œdUÂé©; ,ør5ÅìÅäšÿº–";-Öt6• ^Srõ;»lnÖÓ„˜™‡ŒlŸ½íÆrôb¬!ÜñÅúèz¹fƒ*+žÀŒ‹&nËÙ;r÷íѹ2¨Óôý—VЖK) ’ªªM~!•‚5Ó¡FCNªãÅGŠàUÌ›Œi¼¡^¤©‘$Ë$¨<6uýüÁ³_Ö~=JçL¢4ÏAÈñåCŽ[¶ðI‘]Ÿd¶kŸ”ÿL¥LàeŒ )Çü“ÚÄà 𗚬êvÝ]jÜðú—uu¦fíHÅòsÉ·;ï.ÜkòñÏ÷S dT—ëÎ £è;F²{ì[q~p^ÝóË¿8yØ@LHGIög%ê,dH¦ÝaÃbøˆúÒL‡¼«L®¨<¦Ó˜Íp‘cIŠU¡elB8ÐɈ4bAãzÔ+àȳ{ˆü{ß<#™Æ†”£_¤½p#Èš¶l0›®]\NÇ2 A¶â)¦‘ÁÊâMñJh§—AÉ㥠qDÃÖÀX’w'ž QUݲ·¹œÂ)ÑýGo¹°…JºÈ%%ƒÛ :ø cÕü_ŸLï¸YËhìµ[=-þY”lÍÀ–Ã@|Ò¨‚!kNÎÝuÁ’OÞÏízxâá3{†”“wc]Y¸øí€{0auöï¼3hrãÛF;åèК2HJÕ¡¿ÓüMãßÉÙz…êxé WôÇI×ôú§e¢*ËÚ4§ÕM­´Æü¶wýä'$õÐkR6œïÝá¦qíçMè•«Qê„:êâç¶>% Jf7ÙŸaHeô…töâƒÅÉ•ïÓŸA2·ÕwƒŒZ™è «·"K^G.yÙbŒR'D#L×K•jÓù4‘hØueR2¯í‘,SV^—S*™Â`ÉØÜóÇ‚ð½K ï ¤,ë;\f¡)J#‹/ÑòˆZìÓ2ríþ¾‘o嬹6#·Ó®¹§ú`ÔS{€(œÛþ´)j•‹*™T­P‰ºÞ*tÚ’µ´SL(#CÜ…'"VÔ|‚¾þqˆxäWHÅå=/û|QH½úXÓù]æ‹)E¨”·8·ÉÌ* J|®4Mº'R˜ûyßœV{–|1dv›#Ó¶uÚ6‡b%±ªÒ¨Öø/~HAA¡Ñ†¸ á?aCEª Ø€!ݺE‹0#JÆT\y¡à…Ð 9¡Œ…µ¹±þâÞ_ÔÑ@IDATSñ'£VüõÄë—¨ÙˆP ê©"Š #‘Êüaª…¢çÈFI.4X •?臞 ’€V­ó{CZ¥†nâç®4{E[´'ÍÏ„ Q) ãŽKºB qØZô!„£Z¥¯þ¢Ñxçr⹇¤Eˆ QÓÄ1Câ|Àkô­F¹ ó7ϰSy‘$@ìòñ…é\¸É‚~'çì=²ñ†mß~D‰ èD)±qÕ@ðF+À~»P…©)òÅK®FæÍ™³GéO Òpæž«ñˆ8Z÷À|†O`~q@H¢`ÞXªTLó¤A‚t9EÇe„·ÁuŒ`>²z =Hózh`r³›•mš¾~{_ÄÅË!F¨cÎ#¤iƒF³:®ƒDUZVl6é'´_cÅ 2†Ø «Ïͤø ‘^ •ñ)'²2´Š÷|²žbUt `@f þå,]-y€êú& ÞœjP1ÌlP%ÔÕ8 %NnÇd$Üo°LűŽcðCŸ¯¡TM…€™ñ‹ë]¯©® ¥¥¬S¨b{%»Á!ƒQÿAµiv{uSÍWùŒÏ»Ïow!ïú»“:lØqyŘS7ßÈ£Q¢0pªÐaF¬Hc‹e™Í)0¦•û*2Rív%ÕÞQúìàB&½¿kÍ'²¯ɉ’}÷=/ΟÔqKV,sE‡œ&سî¸ÌªYHdm+˰+ÒåQMEÔ³¸Õù"1Ó7£Ñ]é/Ô[B-VÆÙ¢•ÄuϾ¹‘gD¦ë!"Yã.,‹5ª,éÿ¤€="†5’@êäÆgBÁüJU*'É+ÓÀž±Ñ™ÞmJØûœ Hê½ÜjbûÕ¿ªœÉ6ÛþÓ$­@$ÎûÂâ‡E.ê.k”H§¼¬ßárϽPÔ±ùÎFÌdª5.Ü“‹)‘ÑuâöËk±·÷Ø8¬Iîê½ÓÖ8÷Ѫ®Øêo¶Ôà…ý†EÁLÂÊŒ€¨ßâw1 =œŠ)ÿWp/ K…¤¬T:¨ygÈàã×÷QÀ\D´RWCº¦ ðÑ´®"šçënÎ@Ê/ìÞ—”MÑuj¨öÐÊ‹-¶dÎ'(DmUõKymî¦HÓËQ¦¦¹é9Bæ’u¼"d1šŸüöpt§Õ”2©é§k.öÐ •—~28±s¶ úfä†Fy¯WO®ä*¯ð _úñ“q]ûo<}xf÷ ¾KŠôuAà#® 4Ço¥÷).†k”ÅÆšÞ¥èf7¿¡Ô ‹ÞÛ—siðÆ›ãçu?F·4yµÚþ(TCå ³ÿÕEï–8{3­U9ˆp^ѽ¨ÅáœÛýA„¢¢ýàÔÈ“‚çA€[ ‘ÅïÞL5Õ,.*JÇ$×=ó=Q1ìª[C— 9 Îuiϳžò€9íûòÖgµó¬¦ÓŒUÖ´ÿ­¦åçÆæ´ÈªCab3Ù!‘…« &˜ÓªbÖˆGb×fÄÂ|8P dÞûm6óú×$QvF›=/×z¾æ.º¯iÖô¿¸ LÙ5U ¦ û(H,|—öîÝ‹uêÔI\Äm‰t*k\LÍp3ÆÍÿK}ÿmz0›À‘ñìq¢€K9ãñF% Zl^.E‚({Ar‘* :ãïù:(ÙçèCfŠm!–YM”©”“Þo½òÎA„Ñ”•–&YÁ½Kz„ñˆ ™2êŽÊ4´v¥hÄ⨠6(ü™žÉr:î\tfݳ @ƒÿÆ¢J `SxÓ\ÐAñ4yáïéŠB¤g«ÉU õ‡7Úže«=ùµ#r¥˜–j+)/° ç58.e4~ògÖô—ï¼Q£Ï„F‡œ’æê²ufSd"$Íj¡/­› ì5ñÀWk‘w 4{dïéEeŽÊI5I‘&Z!_Þýš‘ÉèVsF¥×ì×óÃV†'Å€UÈê[¹€,M©xeH6¤†-—EUJi K n]¯š£?ù~ ½?>uàÍ£„H¹àë‘ ¦ãÛ®ØðU‚özuêØ6kÔ2-‘¦ò GUÚ”fOO“ÔTº©¶8ÎÔl Q1½ÚbÊ¢ÕTuí|ý9¯\dxõŽV¿$_ÞÒõŸíûédžk[ÀôÒµSY(³²¶žŠ×Ìo} ’6-»ñš€’uªžþv9ѸI؆% Œñ›ÚûžG™Ö*lDúÍEX›>3Ä»÷þ4—ï&«:~tÏJiF¹`±‘³Vº "Š3zÔfˆŸºº,·×¶ù»Íî´oø…ÁÃ;ÏÚqk wÜ[›7~:¦²´áþ.c(°-opE¦aPîå”WÞ ¸¯ÁÖT&¨×nüÏû÷l­<¢‚ã’զɪ”ú‹ŒÊôi͎錆×2ÞýÒ~URª” ×24¡£ðQÝZj¨Ÿ?ɯ,«>åµ Fudã=}#Äjë–ßÿ”RÍj͸_Tf³dY2ªþrï[R•iÍNÅ’ñ2ù;ŽïØqbÝ‚òQóÜõæSŸ2ž Ä/å|Ì(KzÉÒ)ô 9û9Y(6ÿRç)Mo.:×c^¯:µdnÛÛžç…Kµ]N0rÆÏy²’Úƒ´[‘XÛIfv8¸ôâûÉ|m4øðËÒ•}o|Á:¤mØ a©sêé#}2ãæÿ©·jÚZÛ|TëpzRJ.Ìîô­þQ[Z{õëÇm|%¶ÓGU ZŽ:IàmÝo¡Í–1½ÇQÑ%¯“‘åt•†Ÿë¾¹ßé+«áiÍûŒ _¹¨Ð¤ÔT&gh &#çS·sñg½—µ¸¸ôóN‹Ûžž}µçêÅ“÷5 ) ð.Oh¶aÝ•¾ÙÍÏC•ŠûÕ\ÞÖ…®Áî¤õ:ÝFÕÁRXÎíV1}~£íR \3ŠÍæ4ÀÃT¨6Þè?ÿGÕ,•üÞß| 8œÀ§ëMO½eHÝ"åU­Õ‹½Å‚W™Ûâ¶×bbºi¯\J±Øõlõ ;^šßëfî­Î›ºý¸þïlŠÚEb¡¿\®—™¢']›1ìR­ÅÎϾÞ%âÒÑá#ô¨1Œ$ÉRƒb$°¦Ñ¿[%·Û¤é F%#›ÂÅ\s:_xç=¼‰…Ü)ƪaGÀ¢KU3‘ G¬¤~Éé†sˆá9)I–›#‚×jT9Š,j»z 3¨`X×ö×dyS¸Èèu–RŸyôDëy«˜jz9OI¹+Õžõ¬ø£À7¥cÞ½Ð}ý+¶-G'ë¯y0p÷J8AÉ 2 ) ô'pNXž¡Ð€ˆ =3~BÞM¢1X\ÍÆsNRÍyøó FÜ+ü.eÊBmØÖPJFãVÔ†@ŒIòT¸ ‘#<‡ >úŸ„Ô¸ÍÀ§€È `A>ãÉØ9›Î.â´œBO}¸ppÎHï^ñ)2eíã))Rñ}W”8áëýJi+…^dV)Žh‚»?ŸI%<PB‡ ‹œí!Z>E× .ZkX.#¨³‡N?¨»aïçãµ½•sþ-4žÝäÐò›Vô¸È…c™Éë¥&©6Õá.Ô˜ S2ïN':BPYtW7D†Ô±ç›e‹úädá…LJQêî#c[/Út+‡â¸ YÕö‹hX, ¯½×O½Ýlò©¯ãþɘŒ­)ˆ§,jH—ñNH œIÈ’Ì›)É “Àf¶àâ hLËUpf¯å­E‚¨#’H±¯PnT¬þlÄ´^ûVž¼òÃëÓ¶¡o,CrÛ\gàR„õŠcu!eM÷?Sf$<›K!É”ö›V;³îY ±ÕK­ÿCÑ÷b`$±ÇÂͬê™/ö¢²2$` vÇE¦6ÝŸwjКnG']{oF§O–}Ó‹*±ÍüoŒÞ{aÇ»¯M¬¬¬¯-ÉÔFì|Œq .‹A_äyZ%5íYÑ}…‰W tª[ÐÊt‚®šÏëÆÆP%¦ðžë{ÎR~ö·Ýt*°%d¸œ»nó‚áæì¸»pz»+¾÷~³±omšÐ~S¸\. éÒTŠPŠ:Y.BÕÍEb¢€Âü±àÒ/ßÛõæ/´Îé·cÑ…át×Iɦf_ŠHïIäFbx"mçŒÃvWÕâH’<ÉfHv‡­,ç×Fœ~òÜVÝä`Kgî{ë˜Ûxëü+£–õ¸:ãb;ºtdrƒ­«¯óúáͨSôÞo“OÖšßòVîgoe·¸²üÛöuÈ[ÿíԉݖ­=:ƒÎ[„LâÜTº[‚dæÛû—„Îos‰ Z (Q«ÀË¡ÌËëtfÌ£2êžVètÖÕWë-íú] eˆKBÞ­: „A¥ iÄar‘€¡DFDžG~r€Dm¨^ÁçäQ­Ô*EîBQãɾÖ=Žk¾;ÍVÓUæ²h’´ŒÅí)N1°OQª¬yQ4ßEŠ|Ú Ÿ¶|×tð lÚ|i캮w'ìiº±ïWž€söÎËÚ傪Z©5Ÿ€ûa½^‹X;øKB­%WÈü(j-çC Q j‹rÊ©ÞT wñí×n¸6q|ý“5Òk;ËœB$ªÓhCN§0sœ !êdm2‚Èpe,uªÙüÖ§Ív…#ô÷y·²×v¸â(UH ¥›bW>¼9 Š47ü¤dÉ&ˆ7]£Ôú¿7ä٘Ɋ¬H®.v§#ý¹ÁT„òÌŒ,(zÍj“#È%©’ G«cD` f‚b‰Ig̼ŸõT(ĩӘJžV˜T¯ÃgÓð,§OQÕVÀÉ#â"ŽZ¶W +ò#*GLåv…­ø|Üö>¿æ?v¦Òü”•(“ˆú„–*…Žr­BãâJPì,¥N«Al•^kôz‚x6ÿ„¹½qÞ¥Þr?qæ?Ýz}BŠÂNÉ|‚Òƒ9ï ô«¢öúå1  )Ll¸íWSÖžÿÆu‰,lrX’Q ÌYaò'_lª·¹YCA,ú4Fž3\‰’§7];6W(ÞyÐ\–®1˜ïo&¿ÊôÖFÖ9‚£iP!ÎzóèÚÖw”A­2¤©j¨GàèX7í5<5½][¢)Y>~màWòö›%žd¥`g|æ^U§Oix|bæiò»òQÆ–_okñÕæ½SR#Y£N¶dÕ¿?ñK7~1Þ"TÎipVx¨ÉixùÙgŽ%½ï¬éþ·ÙoÜò»„yW[[ŒiË¿ï&(ý¹‡Fœø|ÓÁ]¹äq|þ1¥¤è½¦¡š]b»_ôk%U†—”Ž¿ñr•ÌW&nëEÁED’<Ä6IÈÐvk,ÊôoÿÈù$_M‹Õ_ë …¿a*JÜüñçkÎ ò Á|©¦dÇeÑZ|A_jZÚ‚‹ý}žˆZg ‚ZºèØ ذÔ-uHšUkSÍÔ`h‹Eƒšç`êG¾ºŒyfÛ:âz©6­é66¢˜\áÁs›°I$åúJ¦— R[ÀEWQÆÝ ýÍ)3Ûôãwµ´'eä´ C1!1¢‘W :“ÛëÈÌ7ŽüœMnOg2åDqttùã‡ùûý3E‘–e®ê,wË·A!‰ ìªæeg‘PòØ3ómË»Ê0ÔÈmt4ê -îð)˜¤qo¬³i2¿u1ÃXmÛÐ‡Š²Å-o!!äÁ>¿&I‹›^VúáPN¤ÎLÌÛû5—`NEà>Éô6 (cQAݳV¶¿cŠeè ï‹z+Jù`…U3è¹Eg›ú½;¾k¥ÕÐL‡Å¿Î>ÿFÎÅ&R/)-°I-5,5«kúEÀdf2e!“@MŒŸ÷·²©!/E7ÊqD«›ª>,ÿM¹ìKMؘB'OÉ}óDY¾Ë,±ˆ~8° Ï–D¯•‡HI‘ã§0É÷sÏìå%:§?`ÐÚÜ·^'ñ‡óǪ>볬 Z.geƒ j//ê%A_#OÖ²ˆÒxÝŒViPM¼Õr°¼ÂÀxñƒÚ}AÞÁ? « »/d\œ`t 4ba©Sjˆý!ïÕ Á7á8ýŸŠ? |º)XF‰t\ µ?áEÉ'uÎBvZ‘åçI—Wžÿöâøv‹7œ™=¥ÏÊU(=FÈ´ÞV?²ó’m—fM貎çäýV#W£8S’5ùä!l‡oç¢M`äÉï­+/.9ðÅÒ¡Íçî¾$pdr»Õ«/MžÐ:oÝõ©[¯Î(|]§—qв¢P±ÜfÏwßßó·év¶ëÆN:àéÝnÂÉoÖQ='Vß…$§íá˜õ߬¼[ÔË‘ã7*SˆžP…ÊÀø¢2¬×œkïÏ|c÷Ò»CW·9;ùb·½¯;ÝŽZ=!‚~¿—Çyeú¨ÝÄeI|Ѓ+àuQ…¢’¨BRÄǵIç.­2â¯Ibuåa“G0„žßßýãäémv¬¸4ÎGCÛgïþtùȦ³¶ý°dDÏÕÛoN¦@FÉGÍè*ª§±UbÞU¥yúØcN ˆŠP¹¼®ÌÔJHËârCLf}Ñeuǯ'Ÿo¼¥Ï/郗¸ÛïÕi¼.¹ ßr_¡†¸ì\}yÆ„ÖkÓ¬5ò)M×§« ë 0Q¯–˜]…¢ÜÄ;}Å"ÒPªÔE|د/;øc<2¸„É}åŠ!¬KÕ+ <% ©Ä$) ßIL[–wfÒÄÆ›m²,âÒXT©A?xwl~Ú×A æ¶:§Öªð*UŒgX” ˆÊô¬A*Qºðrn‡ÓóÿÖ“2X`Â0·n²¦ûíI»Z®pmòõ¶tÄÈš®gÝ>Hè#V§5ú=^Ü¹•PTÔS’bÍ D0X˜4ZâHáüË}!}.osCâ‚'€E-Dªžð(ÕP*–!7$g佯Ð9ù@Ä"³ŠlÈ#+ˆhò|ƒ]¾þU“/M?Ûqi›s9à 굆?ƒcФ(÷—kÌjg BiPEcQ$ÌZ~µÏ”×>MÕWF™]6F+öÁGCI8È ÈNlê$0 /7V’@h‚jIÌ-<6"NHbtøÖ c”Œ¬,)Rh(¿^‰Ì–JŸZ"keÚ ãö#¥…©Q$°yIb(õMÒTcxe€óH ¼­dF•ÕÍ=†Ec j­:"Ê€(4²¨4$*ÄHŒÓh´\ :ÿbïYMN"zPîChrKêDnà æF¹&‰Jâç=°t‚î$Kj™L¢R“ªæ¨TR\!Td"Í­Û¥7ËÝ;P›X'Á¸%4ðyǃ¼+Å¢G\þ€KŽ=ŠäàAƒd"›Ò ú#EF­#T¡ÕŠŽØ}Þ^°ôÎTÊC£”õß|ü÷JÀ>A}q’z?qòâjê„¶ߠĉ!¿|%.þÿü;„L^Dßg0Cà h†©¤ ßKàP`7yIûÆØú«qESšt&yfZÑþSH‡â3Z5Lé¦a¸ºHŠ:l[sbŒFHZüÉRÎ0ÿd¿Ã—ç>–;¥Ýº -Wo•Ç=ùäéÍv¤²0©’¹ožÒ«g¿ö‰¤¬ò¼&7l\Ã$y͈ž2fe4UpêÓTõ§µØ® TÆÍ Ýij´•ÅAE ƒºÈÜGQ(Ð3¤Ob”ÚDÄL!€7¤u³³Ïc`b•…HÍZÙùï2¶ö‚–ß{¸R%E¥¹Mv€4Òð‡Ç¨2U»ÂoÒ›X 1ƒ. 54³u³©ªL=Ô† + jd|–E…è™WEŠùªº:KZ^1*Íiz²¯•¯Cf=áe̘t ‚)f/@Ö¨¢cN%éäÆP4s¢1WL¾õVHéË>Û:ÖpyünY«‡GØ¢7/B—Ÿ×ö&RI!]ĤӯοՒzf^mhQ‘57†ËÕ*)œÔŸ9RÙÿ·h*_-9”%÷¥"껨œ‚ ´EdÓ%r‰Fª‰¸¢ª°A°PWg?éQc*xà˜|¹‰+Z1ív§R2"ë* ùäê‚Ìå ®Vмbò¤³Š“[èÖx òV@×%6Y’QH1òiQjÔœÖΦ‹¼f½#í¥É*ç6<7·õÙ…/M­s|s»”e†Í]¿×Ìkšß\ñÖÅM¿T—§¥pUÓ˜*úˆ‘q‰7(P{0‚ò…˜S¢Ë"å~Ši 3¯5`•¨45JÛÂî——·¿ÅFQ7NŽ,ýÔ$¤š bÁj4ó03Ä8AzMîE·Z™Lþ¶Ñü»½—~3”¾àHPa£%Ü)[ !KÎŽÉûiôì»}T l,È'-{©‰\‘•ÑxcY¨B0JìlHu²:Æ>§í­˜OµæÚ‡:îUïSc’â%øÉN<ûšVŽè6A!•|x¾©BªÉ¾Ü]#׎?ÙAÂ:óîôŠ ÿ\qg˜Ë“¿øf£A¶²Ý§\Г¬0Â3À®4•»òcQ?4Á#Ïg$'©§Ÿy‰gÝ6&IæTZcv¬TeIÍ•‡³Õ¥–ÇÇ™]v‹7ÅLU9ô:—iIãoŒ¡äµ¿v}r³I¢‹îv/æy¥Qè¤!—œ‹†U`XY$¾…!Õ‘dOÀÏÈcEîG:BCü¬*„LQÑéãŸO¹Õa£…Ž¢ÒiªLD†{„Q£ò—Ÿ8Ô2)â6¡lw‡py € TJ—žŸJYR0øþ‹Tþ‹áÿ[à‡IÌpÛ­¯ß§–(|5G@–€m„÷ÖÌTUeÇ“pª±Ê£G…¯Uk-åŸV¯u{]ˆA4%™a„df2›Ë°ÄW®\EÔaÍEò“ñ†4ªÑš­_OšÛùð‚3ýs;EôÂô}]fu:i1&ÃÜðj•άOz’ÿL)QÊ-XM•66âB­~˜{þ5°šo­\9ëñ£ÂÌ´z%eÅaRlNÑ#a³ËíÍ"•¢aAÃcA‰B¿mG O“‡ø Ê´)ôJ‡Û`©Dc›\§é¯ùe« 2_*«þoO~©UêÊ ŠCM+ܯÁª‚AY9|ËLæ§®'©©¶€Û'…TÆî])Ф'ÜC­U_ZQ®’ YÝÇd)ÒGÅš’TÔn+ •v•[|øü^½—*‰>_œ*ã- ßñ¥g)žTÜ(¥ŒBiw¦xøLBu«Ô}RðH®„· ÒåvÀ†Rî*ËmFd‘L ™ À•_ÉPùïº_ò Z2à“Y'{å5½1õRë#].T""p6dV@Ê;™Âæ{E©›aÃ>®bág}Vvÿ“ûs®5óÖ’4[ýÒ§>³ÜlR)BÁA”¬õïç?3d /uÚÙôžÓ…ü¨ÃŽø¨^¢õ Ž$‰IúBbPaT>q=O‘WöE"6sF˜‹è¹àÿÅÞ{FIQnmÿ•sWç8†l@1 ((*(Š zÌr ’sP ˜Ž9=ŠTPI"¢"J†É©swuWêJýßíû~{¾<<ïzžõwÖ,×r4=ÕU÷}ッëw-Ù9ì–vRH;ï¡fsÉŠqŸÉIm``°–‚°{ xÙe—9eÓ¨WÆW Âß³eÐÌj .ò®Ÿ¼î²MÓvŽõ†mw¾1f®³Gof}ÁŒ$*û%èíU>={Þû£Ößü›&™<ä.Ñ^©”¯¬tHF¤y£Tò ^]-}Ï}ÿîÏSÀHºî‹Û*óK(8À|úãUè)°•ÂÈ猴ÙÔç$÷Ì7n‡^ôK›áZù¦E>QÕ±“–»I’upn0¨äH” *+rE9M«ÃþH"“î­Ë¦Û8HSP©4’„Üvš$ ¦I äC_ŒX|ÕK‹·Ýóè5oÏ~ï–ÙcÞ Ë4V"Âîj)Uà„õæ“`gXäîÐßì”;jB5ðLq:8ßÊi³=Èy¡¦„×ä´/áh-̇ ÂÊ Ð¥‹‚pL>„­KwÈÇúVGsEè—&<"’=Êt¹É˜ ©_›Gde~Éû0ÙÈç4¬O}¿ãÍM*löܯ‡>wi›bUæFàbó|ùBVps¼;dõÕ{g}0|ÍÕMã,þl"ðpüÁÑ*¸ý|VÍP ä’©,ò»ê’yÈÑ4~O:ÓM …*HOÁLÁÀž£«ÔÌ K)•±?,i<Äöñe¨8C)[ñBÒÈ{¼dë–‹ö«ªNmŒJzý8åJÚ€$-{Ý>(±Žôªæ«§n¶öâÝpÀOIŒÐö$0‡„ª”ÑàÝQgóyCäÇÃáMRe–ó`f·>Ú U°h€îÇD”»Š->ÈfêY¼ïƒ‘_ý…¤–~uãšk7yüžŸÚ÷‹çY÷–cU\]¯±cÍî©‹†"ëÊk)Òƒ¥ Ù@ ö÷äÑÇ¿ÿøM›¦o_¹½HQ·]^؇ÿ³_ÿñ ø?ûöÿöêã✆Ñ8äÛ¡ ­ì¾Ð Í!³¯Xùâ'«¯Ÿ´þ«‹-å“Åáj;†Ùnh†ÝN´ŠégÆqKÂÃl½Ükº±PÌ]oäÜdah—êÌϹu刽 Ìyà3±{ËÏßQN{œ¬ëÑqÛÃL•Ü]0òZXôöä[›:Ô‡ýVªjq„Ñ5î‘‘;ë" ë®;¼üü]5ô)™£e·V•lJ…ñê~î³í¸×JÐÿÜz?*p‹¾/•”™ßœ«” 8%]jyè«A ­M}k`ĉ1ʼO†º)iææþ¬»ÿŠ­·Ky.•"¤"£Ä™!µ#m[ i°|@´-°ÿÐbÄ ¬'›ƒ8oOÊçó¹ж}ž©R2$ ›d׬-#ò 2 VÎÃuMF€œë”¬Ä­Ürë¿­Xðú= ¿¸…\þ`Ô>¯ïP»Ä‘§PN£’vqz¸‘ZO jjéœõñhÕ2ÿñÉYLyð³Q¬¿ç“s€€™-æi‹­AêãÆW_¿[ë"WßaØÄË—´«€ß‡rÁ÷›Cr@ˆ È¼fD”1ýlÉ?ÔÛøêèï©|K柹äËZä$»KxzÇD€xNýjdAˆÏûvb‡qäéCcÒÈ!P¡Çs¹™Ÿô [Ÿ³ÓNeí„Eª™rOG®Åëጔ•„œlU:{+“À~¤Ur™ $é‰ \-Õ¹F&V¸þ¶÷†dÈÖöœ Ú§ì;ë][û›uöäƒ ÐJ®•)ô¾<îû*:öÜù_f{åçÇ솞ÌÚ­”íÃ}J öAœ¥\åùÌߘŠ-Òn–a2¥ 4MU¤xÚ€Ó¤R Ñ{6¬:ÒɾèÌ3_&$}á¥oÍ>÷•Iýž>Íü&Iœí3ð¡î>A¹.¹×ª4T@-S9ý蚊 ôž-;TZ6)´iÉi£%&gü{Ì”ç}6L¤½îˆl{æ°€ Í½G SK«©"¢†¨*†ð•L:àéµöºñ?"jõê‹wçÚÅGÇÿs/Ã"98›‚\Ÿ>,^Û›/¹*õA¿˜f]q9à|élÂA2M$M5(œW9i€N°ÓePÅÉÀð­ êEĈËéFo-¬æ‰òþ~ý½=…ãÙb®.6¸¤Q%L;uÅ<ôý<@E=µïI #óꔯÎÊ–»Á6ÓÓ NçZøÑðâ‹dËŒŸòŠä…žô±Y[ÏTÒ½k/ù’Ö•wn?HƒÂN®Ð¢E4˜{cPì"%1ØvA"ð£K‹%µ¾Oÿn;ו/òd]‰ÇCµ$7 z°d*.NxxÛð¦ÒÑã‚}\}&pÞÃ\Šö"޹a¯+Ý㯛³á2¿§ÁVkÔ0/[-º<0Ñ ¾J#Žã.’t«©·k`øôR _}Á~šôã–LÞTÉOÚß=4«¼¡Ð"IÒjé¿°!ãåÖ\—¹Þw½ŸŽª%Àèpy# Ç Í)Æ‹%Þ÷¾M§¶ç$M%^¹õ€a—c5ágïüÌDSªÙý⾩ù¢ÜÚ’!ºZ/"‹Fè¡‚@©“í,´úïþl(+÷|ÜØý'Ëp“ýq@-kÿ—ýUÿÉ`—Vô±.ž @5Bª:,æ rÏØ©/}òÔƒgE˜Æ ]—îQüC0“Ò d,gJÀh 3!¦!”…ŠÏïÓ­’a•Lø†Y¥ù!zm°Í`³¯k 2®Lù Ýãuu%;ïýÔ¥?LÝxöÚKw™¶9wûèîèºþéêu×ü€Zh„ãlÉÆÌŒ'SZ/+!>`è¦×ëà ΢E;kŸKšÉÄ < y~G³Œ“ ï̹m»¤9šèåzÓÝœ‹†+LtM+jÅr°–‚=ª@-òBNÉ‘b4VUÈäi“r9,ie‰‘ $kãàˆVéw v^NŒôÏšքzÍ ‰ÊÑaÔP9bDÅ9R†K5[;Û WfÍæÄO'kòq«ÀÐò`5ÔD˜'ù¼’ó}e0‚yt‹$äÐH[.¡*ç¢4 À˜0ks,Åô1>øõýŒWÓõN$Í1<´ã‚Àç*ÙС3+ØGoZiƒåØvt»ŒÁÊö ž èØá¤%È’­¬29]s{ê•’ÂË=™C‘:zÖ†»f]òÖê¯þ~ÿè¥Ïí^øÄÐ_‚×òŠWðt)Íá`rÅl @z‡~\¼1%–óp^Ñê)»}œI¨]Ùã«w÷9ë^þa ×'½äÅï­ºâ_¹”ÌònŠã޵÷† {·¯©—€ ZÌç<,§d%˜õñÖÇs=u\}^UxÆ'[Â0°9a™ÒÚ| ¹l>è öd{Ât´CÐf¯w×&@ŽìOü\ÏVÓ$/ôúi/÷Ýò-ÓVÝôΜn†·1íÂ×1%@•xÎÄ‚<ŸUº\¸ÈúÂ9#S"»IÁ¬+ŒíÍ&A2ÙÌY#ÇbŒ/Ä4Åúìí—Ì?ïMgC¡ú®ÞŒÏ]UàµÀŽã(‰!ª–!išÄ½jœ‡ÏCe\´†$YÚÒ,ØÚÓªuE1›´Uˆ¼v–§lZPkBëÔ_‚;„aaº‰ªŒ@k†V,Ÿƒd·_ŒY -[p8ƒb ]* ) îEµèá[’‡æyáÌ«Þ\óí­+Æ9ï³±O\¶_—lÖ@§&SroÌþ)ûCŒ #œÕžnñù=!d4 ù<ÿÐå¥Üq£›…߈)gô$ÀïÀ—®M7|d(Õ•ët!'*žà/ÿÑ   ùrîR’/!`Æ(4ìèÇ]¯®„¥–“d!é˜çšå_|X íß›,ª&|´ùºžèée8jƒÐˆƒS?zØM¸ rÞå3Š’ÊÚ Å²Ê#rÄhÎgªèjȘªò u/_J""F`"K“”¤I^Ê‹Z Ø%+ÿ> ,”T “cxž²ldS*$Ž‘P¥q%²!h<[v…ï ¨Ë}l¨2ˆ!‡jf¾ùÝ…µUߺiŒ4vÄ[Þ9Ë5MØðȦ +.Ü:ï«Kž¸4ƒ|'ýκ¯ïz|ôO>Ú4ÿÓà@]¼Øæ`ŽeÐ<'R j22œAe·”ää7?^Cÿäü¿¾Ü_ð½&ÿs`]ºª{\!P;Ciå€ ´à”¶á*jÐÊ/îÑ’åÛ¯1Š¢€š#©=ˆ ÌEײväl3¿=«-Ó6ù­!U‘ªL)7ÛÈ€Xµý2è¶1ev' Áäv –Ö”›%ÓÉnf°¢€§–„ssá§/=üKkÓ‹·´Ûœú ‡L9̯S¿=Í'Èóvœçø– ÙmN>Ý1ý›óH2µ`ÛøX)'w@{yˆwHØ ëiPH­hĘS-Û%R1‘TÆd„˜ˆãë’t›õ,Ý;Q1R ÞæöP.-(qðñÀÕH'N[Q¡€õ’ÄpILÛÀ]VGD¿$giš9oÂ(ÂÃHöÆ,܆:L 8œ´ÂWZm1ô¤w›õ-Ë~•'× ˆÍ]?qõ†©oþFYY) Á‘Ž¥SXS©ÕãUXÁ=5þCåßQÊ#·] ø¢_íBÝzu&§‰XÌ#Võè6änÖ»d—s»8÷˜ùžü±tùx'r€è=ÆQ‹N9DÅ é ‡Ñ0TÂð¼ìIx#|JÚ.û«¼ùR'írô‚Å•Ãf’žwæ3åTvíEïÔêÇG~”Æ»ÛäßULj–›!‹“· )6è2Hë¡/.îεÏÜtî †úŸ–ÒZf}Õ˜°MÙÜhW'a¼êªØ` *¾¾c™óùßL ú©îxjûš¸àŠûN¤17.èVaÅþëï¿~On7f;Ë •²»ô8AQ99å+ºVìnp û[³-C¢'°T•u"tkó*¢eÒ¹F¶^¦ ­ìÆ‚)Èlh†3€KƒÝ¬ûêvº_ïʪ>³Q˜H¸ÃÉÌ7©¬‡)T ÍÙ6 5'md %ï¤[m˾» gMxÏ‚×ÃHEÀ(‹Á@ݧYõLQMOÝy~RïÀ=&ÃÚÓw‡yò‹ö Ï#M+öŽ¥)8Ed¼ÛÚŸ®r8Š£Ø0Oiè?>©ëÉïçaꙑ‚ƒ%E´¤I Tžµ{ì¬íãþx A³ÇríQ¶¯ƒxá2”Hf¨|[°“#œ²^o•¬k-ñã¬XÎj-¼K¥È‹uGŸöñ™ÑX#EƒOTiñ(ƒˆ™ýÝû†Š—çâ½I†g<|D q/NýÎ#ž´ÖóÀÆÆ“š¼ñì9»/,–^°c0LJò?7úCÅR²:PךIíýÁEÙµ·QJùüXQë ûØ2 ÷Ñ"œ 6ÌA&°áôŒdAºî *ÃÛò MCÒrªžd–Á=`J&_Q· æwouRË‚r¢×l# çä@CU@èênêì'e?;ÄEž±bôÁB:øØ˜À0rÝ<"[ûõʽ¾¦Âfÿ_þU9×üõõ'^¢”ñrŒœ/T:rÐ É 3Ç>£M;åÕݸìì-<â~~ìÁ‚!å”ß#2,ã¦Ý½J/J#.‚‡œŸUî©õV?yýþß›„Pÿ[×·àºô‚otDñÐé8ê̾µ}Ö_ò#Øž»PI¶éý#'}RWQÔÀàÖêJ9®)Ö?/ÛGÝÊdSãQáÕq?+ŒuûÞ½ó‰oîñǼñTÒgÉùeŠIËR²§»-(x9‚ŠúƒJ9o•KAB)kÏŒ9OÀï®òDòR:ì÷kzÁ :ãÅÃ9­g !2š þñàæÃí,Ò1âš–éζT»«5P¬ô–îžržô!aÌ  z¡RÀ"&ALU!ÆFxW*™Ý¥^̤ƒ\UR–^ºrg€ð—Ëï÷p4•Õ3*¢ôh]9$ËD©¹—¾ûèß¾XsûÖ®\OŸP¿œ.Áð ZÁCt0(ú\ L9 aÿñÎPÞ\»o2즫÷þCSŠ¥f7GKFjéÕ;»Y©t nËìÍ$†xNʳ!¿|¿;2í‹‹½PXùS€cè)ƒNYráF„>qå†U¾ máW®Ý Žézº¾&ÊÝ“ŽG…ª3BÃâ9”ƒPÔùÇ»ÃBþàý/Ÿ ¹’•%±Ân‰PeD V ƒc¨[=Pß Tv_hK`ˆ /»ÜTŽÎ,fOÿâ!ü¦YéLA2…¿róû±cé_’Èšb‚œÉJ,ˆQèñÉjNqËÎ…Bt<ÝõÈ[ºD°FâwrGzîzj¤›«aà5U–BE̤V]ñîÚioÿã¶•pþoÿú«ý§‚ÐE& MHʧï8wN”ê+·—Ãç#£ŽŒ? :XÓêŠbQž@Ñ”ñx<` 5’Ïí‹KñЕ)‚î`«Ô€Pæ jKÉi/„Z˜'°.¦óIQ#Áhgª‹£`i"Þ|Îãö劎áAO2‘a/„§”°Ýi9ïìL¸r¡„=":Un÷wîüÝò‰OóŽjbFÕ.©³ÑÝ7#寕‡*Án$“O¦ÄiÞ¡™\ºYȹPÞ]âJF¡©.è)B@Ór§Üf [Ý Ðë I¨“š…›U:›D’ É™ù»–%\%°Ð°xÚI–Ò¿®áøï‡¼U$[obJ‰cbHWTfËñ‹¾ã?àÉeÕB.ñ÷EË9S)±Fˆ€Â2â`ùìƒ^Ú’"xŒÁ\“nIƒüöŠ+•qñ.÷FCÑΞ.g¡šA<ïä@ ž(µ?öÅ]÷Ÿõq»±œUý JÛ‡Hƒ„H&ÒR&%ܬ±a ¤”C»s½¼‡uPÝãat)kªcØà•q‰> jð­Œ|b!i÷Ó3 L§­ ÖkTT©¡ªª³Ð”*&N¦Oµ ³³Üî zpØ!EÈWH—Sà²4‡’¹EŸ_ºî‚ïžÑiyÚ{çÄxá˜÷—n™8眷W½sËú»~žòþ°å7~ Ý×…_]íùÃ7y¹jPÌ‚Š eoIç[üÜ–hŽrCI‡' óܸ7igk½µÝ¹˜)€l4H“h:¯ƒüyQSzeØ3X^*çY¯–Nõö¯9%ÑŒ@Ë#pžÉªº?xj”ƒ®¬A•wžNÖLZ¸´dßeK®øW2  7èæ[tY´e¾Ot@I)´Z8p‚ U”m€?(’uŒõR¹<²rç ðXα3B0‘Õæ˜_P• HÌüø`YV“Hª­5+û¼‚ÃâLÑÎû½þö܉"’ ² EÒf}8.T…¤í +.~}Þ'·/=û§Ñ“~ïÝë¯à?Ê^)™›ß½ðµ~KÉÐǦ"žj\̓ü­¨+$]#ÔAÈ8-x "lQVHΖ(M±ð,€æËùóVOÁ(ø ¿f)2"h/X¹ÈÖ Úî˜kñ§Wß8xÅ>'©iÍ)8^ÞO+Üda¨¼XP —Á'TvAKÚ!t”ÒRÅ£¯‹ û‰hýŸþÌ¥/ßïìê\òÅ™ïLüU.j4ç-èÅñmòqî+Ö_Ô A܆%è9Á¡3©·UdtD*I$ xBjVì4ãƒØDz™¬ ‡é^µÃ` (‹ŠMI±Õœ“å0.Ž&{€ÕC!\íÿêŒ![Æåp`(c•2—ö¸k“Rúnð1ùøXVÉü!ÏAç9ÈGL›ƒÑ '0€ê`d\urAƒ‘`x ƒÐ ¤ H˜ýðW s.øpÕw×=}Åçnº|ý¥»á8kÔ¶pN€ÈGL™r£. 'À¬@£+8r”,:›À¶Z‡ÁÕñ¯]ë =óŸþú«ýŸ¾ÂêëÃseTž-8ܾö媕¯Üýô·÷x)1T\1¡¶T§‹t¥ qÜù}s¾:§¨åàþÍd®˜­ WIÅ, x†‡Ée q—¡ö’AQ x–Odz¦  F¬ ;Жji¨ªQHM½t[NNú€”&€ÅDQècë!Eb² vuI6˜9‰X¦Ç+zðÙóDŸͧ×9œ3ýƒ3¾<}ǹ©b›ðlæê‡ÐÕb$ŸOûp÷¤¯•ÆÎ2žE_œiNŒªN%àÕÐIàAÛ­¤D€Ú¾ÁãPˆòc_ó± ŒE©š–•7ŠlꇒĊ­cÚ»[Vì¹&ïéL6Íür"-š«7þÍCÄV|zS®%ìŸ{<üï)K¿œô{fvî¬.§Xâ01X†°0Ä•ú(½A™"ñ4þÀæá²Y°å -»è»+4»f÷DÁM-Ú=¾§§sÙΫ½¼ë‘oG– ¿ûŒ7.~ìó»–Þþ ÂUœ!¬Y‚|((+q Cƒl!ƒ"Q­1Ke%§=¶g,C¹›þîHÑo¿ œŽ‹¶]ß“ÈÏýðbŒåûþràëÎÿ||c¸/ø¹g¾uXNúúÀ×…x»º¤ÅÖÒõ z‡m$Ê…Õ”FbзòÐxö)IÛGTÙ2ûÏÑ-\9HÉ‚š) eÅÅC|þ°c.×S÷ì+¡ÊЉŸCiFý9ç¾¹tÔg^>ŒCénã4!²´«¨Hs¿¯ã…Ç<-¯©›ÏñÓ>#mÃtÀ\ѪªAXAPÙr,È"¸á9ŒœÂ"Õ¬óY}H)@úÛУ.k™,b.ÞvWY¶V|ua¢Ïnú¬V†©{Ü¡à¹RŒx¨Ú³r;@ü¡g¤¹7ž+(õ\_(‚ ŠE¢†Í@£e—îºE3Š‚O˜{ÑæEc·Û6çq×ceùl ›¾yÜÃ[Ça,Œð§¸ÏÈ—UªtÊ Ûü£p¥%|PE2˜bô!Âe)ß_‘sÙR÷S·¿;çüµÐPwâ²3·÷§Ë ·L>T?iß™é~VS°õ¡΄›AÔŒX±Ækxu¤MÃÛ’SîÎ}U2h%K Aj9Û“K6–ýz‡áe輯×eŒ~>­­æèÉ_íªÚ°¯Ï§?ù·¥lª¢E×òÙæˆÃ]âÎ)û'¼;ø©¿Äþ ¢È£àÛE+iÕ<Ür's ¹ØvY1d?Âó„]Uûmì–~ ‘Õ!œÐš<”Ï µ¬¦¥LÀ†é\йmÖÊH-+|ˆ–‘x¦Ô´vû¤ùW¼¸|Ó¤…gonö)sŠœÓ˲‹´+vƒpªèîú­û×z¦‹³¹B!2„üá$‹å`†˜'Ìæ“£§‚—VÑ-‚âÄŒ•,w/þqB¥!J"·@4†ø‰~´#"EËmûª+\“¿ºèÆ\2 ÍeÉAW=‡¨a6\ÀfE)Ö„QîŶÂí)Z‰9‚®$£&¥jze^zbÓ ˜~ÚÛjNšôÜзoÀqL J%KéSv7$%æmò! x iww©n­ÚúA=¥ÈB­E;ÑRÀ£•Z×OJ0)Ùvx¶Y¬Ê4<ŽvVÕYr®]WsUHÐ6 n GKT Éˆ€X• UèâK?¼|M·´$ }\É<ñÒ€q ᬌ²%îùîô%ã>/ÍŒOÆ=>üÓéÛ¯šuÁ†Õ{'Ü7ê­ç÷ü}ݸ©DºW¯¨½Ò6ó»¿/ñö’]·<:ºÀ1bªØ L†N@VYg©‰Õd2)½¤|@]Ö¡™_” 0Míôô­Í¹a€p6“(^0Ì$ó¢'s÷ÞNY8j‡Ç#ªš4ÿë ž÷õC/~tô§³w\µxäç}&Оi-C¡Œ¾ó¤Y¯þ°zæ˜÷Š9]–­~y”_^ uvs{3…Ä ‘`ÆŽâoç`æåUõ.ØÃXÄåûô(Y0+tɵûΨ ÚOg S'>ýÄŒêÈÔþ/÷gñ:÷;¸›F#òXBi«„˜Ô«yèŽ×Ó Å€Ú8p"“K¾´cê¤Ú!„I$2§òg@Ô£Œ.¯æS>Ú3icE C ë.Û_H+!ÊçbÉb!«”• PÝ)K•q;†åD€flKͱ\_¨³ýÇ(Ì‹DV³)“£0¦ m‡xoO‹=„ W޽cÍK ýþìü¿¼Þ_-èÿrIþ'ÿ€F`¹óËz zgËÓ¯5{É×—W5ˆ|qÎÚnN÷?ºã6ÛsÌÕ˜šòÁ@€ÝÌýv¢7XŸ0 àD4IH%¢]î8÷ªÙNÉx÷ÞÞ¯ZpÀ4§@ÜhIm¥$`›]Á>NªGK¢$Dº#Ü)V`khDƒ¨ÝÏã’4T3Ù& Œò…öÙÛNíRÞ¶ñìtI©÷–»Ç>Ϥ7µnïÙêH—yÔà‹Üñ²Bk¶Û犺+©÷ß$ˆNFŽ þ *Äî3m×9¶Ðœ5ÎÞ6&Q쬪ŠˆÖL§Ûèž#˄ޑìΙEÂEOÛ6¿4¬î6;¡î‰QU ó(ÿ§`ºÑ,Õ‘êb)Ÿ·² ðž²£ŸS§MþzÐ÷Éï×þ4 °‰J¯)Lg.#;%”ÇüaJ'5DrÚ‘[Ÿ;¹.\;yû¹¿å~zxï¹’WºÇ©hîöƒ™ÚÆ,&F½Ã’š,H²U(”â4ùªUw~X1¢È›ûžMËÇ–l¹Duz…ñGØ™EªnåMÅŽE@ÿ §qÖÇ®o0 c—hE–<0Pƒw†E†aIðk¿€ŠÄünH™TKx~ÙÖkŸx펇®\¶à†—êª=Å7ü@Ó}kP.㙾a¬ µÓ'¯ mKìÓTdª¥ûÖ8ôH©‹$<`Ø=Ôö[»Ñ“Æ3s¿îÒÍßõWÜ™é{†ö²GšÌ-3¾½höžÛ—šÖ³ÑºšF_ÿŽD7ôE<¬=UYñ“]Á `%G¥2|`jÙÐrkyã·™‚X¼çý~qågë-ß¶¥ïÃ{û¨ìˆS.ý—¶Ï¯$Éí>_Ðp"ïÝš÷ ì|!‹¬²ÉÓýçAæ`ä©ÙT x*DÉ#"QÜÙp0Â`²³j#X?ðyC[›‚ÝY† &›Í‘´™6zœg÷ɱ¼B48ƒí.¶Ê¨®)GÛæ$È´U @yY¶œx¹” ÐÁ†ŽnÊÕaGYÅçfš›ÛŸ½âx tþÂáÿº¿Ï³gÔ^m¼pÅ!qã¸gþ¹ÛÊ–[)–WŽþ‚ƒ+/üÅ}‹.û¥Zÿò˜ßK ý4~¥ò~$R/6>;òƒ°è{rÜ> gŠ èÏb€vTA<´K¿¬Ø²áÀ”/( æñTÇÌÝã†4ýó‹`ï“RèvGéIHZm7’ ï^lÜo.Ò—Ë+õÂÀçG¦ŠÁW.=,Õ°ãr„Ÿw¹-àØÂ^G‡N›6èùþîAµ|5_f ûÝ­4׈!cï@ZšÚ[!h:ê^8`öÀEDŒöUÔUàªê`¿f¥Í&‚•¬©ö?2rÓâK߬@å4$bâ@<ÌAupâïFܾç´þ§{æ~Ý´¯®Î9Ýë˜.ÉúªÝwÒåÈ#ÇHÆe HÊ…š[ðóßàôÚ]7—èüŒmcËdΗ—ÇbÌ_Î ÇÏ“Íwz86UêYpÖ¦™çnXsÉ>C2x‚sp§]ê_†yéQ‡ãŠ;/êMÔ9•êê­Æ@É¡¡q‹=áŽveœý0zí¶1ó^@„å[ïY±áÁÕkA|ÿ“w‚ÿÖ{û«þo]¦ÿþRaÖ ´Xž0¥"HòÀ„¥Àˆ·óøS[î‡ÙJ…áB"÷y@3È×v<ùȰ—üL•ÖÁE=}ŽäÅ|A””ó‰DŒŠºé¨0tÔI”RUÁ˜’Ê1€ouÕ©FÞ&µ°zq(:4Š„vŒ`*“`0ã x$“¤U0Óo¿êåkv¶åz|~w´ÊÓy<]Òý~¤Žá­¸v´ üN@O€¬ÑåLÙq¡‘eù*/±ioµ˜éJ¬ùvñò›^šÿÎ=kÏÚUÐ!óÐð.àRô¢ ÜÔ"\Œîhª^€È¯Û£JX6nà9‘÷ÀŸB-Ô/† 9Ip‹²®hŽár ª®Aa–•³ÃCJ m©&^²Õ–ž£§œy2Äè¶·ÅÔŸñ؆+W\v`Þ;Ca¿{é–÷¼6jÑØ——l¿{Õ›¸Öþ@K{ì/_¤£;IóîH¤.Dt¡¶º*“N¹yÆ6´__>÷ÂOCÅÁ¤wáá‘°êU4;2åôå뿞¿ü²Móÿ=¾¢A1ç/úµË–(ÈÞÊ,€:LÂø Ó("ù ®À: Ñ 3ʤoWYÌAÛ–Emw©5Þbˆ Þ»¿‚§–ªà—+Ty|ìžî¶\¬omSk›OˆÜ ºsr“öóø±_n¾gÜæ—>¹ *•ûN~ͪÜùËEb®ýGŸ4¦µ³UÀ²ˆ%ÈXG€{€¥»}yjå+"/Üðãñ¶ Š <Ž  CE§¬è9)ºÄDTØ“0ᆼ{R§ H0ÙY<è‹™ëø=}Vÿ³Ë’c ½o2Ù£}OÙÁPÂ^õí}‹.aÉg÷V.WYóÏà.Ì—C(é:T¤/äØ@«.\MR~¼;×óEÁðçB«*8ù¦€»¦¯»|í(’çŒà‚€à¡/Ïx{RÓ-Ïô[9fïÜ Ã_šðƒ‚>$”C ØA¢ÛãdûPˆÓL•Œ|ÒL‹ e@¼?¤:Ý05éãá+ÏÿÂÇœnê‚êêTÁª$‚ƒ>QÍŬlÙ‡Õa>áå F.«ô ŽÌöJnoT¡°x.Ó(˜‰l&F7`%6…f hS­„X‚CEV†|£Áe›  O”ÍK-žÓ¹PWª«*{dãp¸§óœGMð!CÀQVÎÁÓÁøÝ!p€/öþû>2ò]è-þxÜÃç}|RäÔRÅC aOkÌ–~0ðˆû«|­]mrÝ>´º‰Õ;˜=â_~ó·c^]»ûÎdžo¢1!cþ°I¬ ΀.õ¨V¢Å¬.â>ð:ð æ¨8¤A`±^ׯ飵AžpŸ°øÂÏÃ\ Zdâ°»>ùìX¯¦_úÔÎ…¨»„Ì™ø†“€ÀQ˜÷ûdÝ›> ‡Sœç:ÖütmÅâ¡#ËïüpþËר{åÖ›æóQâú…ûO “µ`”£Iªìôy]=½>Á—‘3^ÚMÐd%vEO[ùAºRªÕÄ{ºe¦šÚ]?±áõ{> ª{¸Ó]¼2)É}§šÍÃ!jÕ‘Ör^yñ“å•í?üõWü¾ÀêË«0ƒDòí•t?é¡×21Ñ ÐòJ*íƒÃç嘢Bž*?<±,)Z¼Œ' ¥c}cÙ>¡RGùê\‰Ë7JLgžk-0ÝY¾Uó·Ú‘¶cÚ÷½TK“Ñœw²)z¬46YËwâ¾Í§Æ Mpô.Su ½kÛ"|HäîÏG†#D.“îøM¦õºÒr{4ÃŒÐþúÐ’Ý­Ýû`BkMæŽÍßvæìÓ”?òÀ7_ÞÆ»N•…ÜŒ½$ÊzôçY³>¼/J°1Í´AÇr³Â ÷¤Àé$§e€ä$Jpyý²™.•3„ËÖµF`ó“EÚer1¾]nSAå+&îÝqV›ÐtÐøyæîÑVX¶é<™Í.ØsIìäÀOù]î9½7ܼô£ñÅ|ûGw·ìçqs`mÈEþ©VH¯¾öÛ5ã?ÇÛž½å#ŠF_¼ãsèÊí¦Öø>!Dð!wB/2Q/Ä›Š¿­û~4Y_¸åLOM~é×£DšrÆ+o`lšg ,˜3áC8-=<è±ztØŠa›nû…ëö,>÷õ×ÖCY±ä¢_Ïš½ùl·_X²ý¢ŠhñÖóÄÎ ¨fÀâb"’Ž ˆÛ1˜™Ûâ]ëöÝ‚T÷[öùuZ€^òà •¾42yüËM½í´}ä‹“ÿÙ4~Õwà ž½ó>¹‰Å—�ª¯ «[6½©®ÌºªýᜬÕОx¹Ã`ÚáÈy›/Çxcþ¦ë݈ïÉÏ&$Û¾7^|åè·AÒ’èÑÖ})êÖæmœ"³Ó¶ ëÄ‹¦‚á yF˜°nAïHBÑ%P¦¨&±§·ÏZüÆT­ˆ7ör÷úq©‚üÀ'×vjøê/gSÑšG÷LRÅŽJ œ¯4áw©E¦|?,Sµ·Ì ~>ç(^4ñø HÚ"åÒ~ß®¿çPÕNë4ÅK»d4[@ò´`JX\ê5¡O@ÝPÍ›i@`.¸ñpï/³Ælö…ħ'lgpºQO D„oˆè%½=Ù^è£`åjOäsŠ‚è¦îqcF:2Þí‰AäCóºË>õ¹ÜmÊ ÿI µä›5³þ—;ŠtÜö)2“@Y«„L4 ƒ 6×Ô–ÖÜ š‹¿-ØwY‘ë¾ãÓº~}úá"eŒûwMœ³çöµGnZþÓPÛl–~œºåŒE{o0túçg=ñóµ½éÖUc¿~füÞA¾Ãçd,OÑÍäiª€ƒ”×Ï Y© â†òF/<òàIg‚Æ#—½6hp´;wXEõ¡Pp°¢Û|t¥—³0ÓÙõ›ÛeRn—A€4ÊyæúÁ1ÂkÕƒî?ÿ5€tP‹_yßžp5Ïßx‰Ø æÑÂô}£fl¹ôИ¢';gÏ9ümÜæ0ãÅt¡šltûø¡Ì%á}Œ JFÊ2W¿6uíGw™ùÒ“;¯¢ gÖ{ÎÜvëv=ðéµ8/Nzçrà®Ü½`õws~p{V*<½ýÅÈ>ùÕx@[$d¼µÁ›_òÞM°Äð ºþÇ»ÒÉŽýĬíg=¶k|¾”žsës¡h4“.òœ¢gñT ˜òÉ$Wþ?{'_…ô&[Ÿ›°ŸD)¨çŽ+Í4ÆQ4@ïR(“<„t@|)í*tXPÈ•WïH;î»ùÃD¦µÒj PÓÊd•' Ä4—UZß0)åa¡ÉÅAÒŽô6§àÖD=•[“Dæ¿ÿàŒoåYHM Ÿ¸dóõÏíº)ׇ+ƒ›Üšáÿr—¹9gî:5vÆì3>˜{Úë@W~jìnâÏ=ïkÑoß9ò¡¦<ëÖ¯st.‰glÔùöV¸¨ h« …•YÂô,¿`˲k7”¡òƒ Ö‚QUDÌøx y ½jƈ°L—4©úc†RèpK=Ú”=”CŠ è•N„ž1ÛŸõਗ퀔ÄÚÖþ|M«|0m´ƒXÁK@3Ž TÜF!%›-=¼e8r~q¥ÎhÁþB·Ò¢ÓF±¬$­nØ™Í"ùb rüÉb%`Gä£Q¶æ¾oNÑQ}ò® m™ƒlÈ¢=öý¯s»0¯ ¦!Ü@/ÑôªËŽûy®©¬-üvÂŒ#3¥ö¹;ÎÁY4xÄ6²Û%6~ Ô Ô€Ââ[_ìD:–üã«›¨?Jb>8Iß|ÑCLxÖ»h7¶üšÍ+.Þ r Ç~>óœ aŒçÜI@6§“rQ…£i‚Ðrpˆ£ÐU+æ­ëÜÜmçÁÏJ*=ûµ«ÛqǤw‡+X¼Ä&Mî$=fÒéì-õöP³Â‹ÔU5BÜäg­ýàá «ïäׯtzùæ;ZŽw?·ï®¢c¬ÝwÜ-*;FµãᎂÇكש:=ì³)ƒý(í¡¦ìD!ö„7÷,˜4â…?¿wÕ9‰²E•5pfBÜÁ™ˆã‰±?ôìŒDÅ¢‘T2¸P,%p.Ä@a$¸@§¯°$‘‰3¢Ð!0}i< ¿¾ó±{/»3CÒ)Å<¤Óõ{Ý©d/ð§fl¿æŽ‹æ¼¶kU¥å~ùgÓ_¿bõدg}qñ£¾ž½éâ'/ÞõÐÛç/½÷ß ?»ÖÓJæBæ]ûáŠç®[xûûK?˜8ëŒ÷W7ö˜W¯<„"tÔ+’â”]C¿ä[¯Mçó‹w[ÿ·¯¦l³àâÇu…ñ¸jmÒ2])d"YÝ"1°¸f­.KLÍÛqÕ¼1o®Ør+| ¯|céÆÛà³^:ú“…›¯ž5òÛÕ__8óî·×l¼¥â+5`BôîsÜ4ÿŽ·–¿ÿ÷‰gÍxÿ絕£LËÈä‘‹¨äé!¢nöûC_þÛ—é|(î8pØR¡ÊŸNjŠtG´¼“[go)Œ…uÇl£%AÄÓÈ2-4!ÉÝÏþ8oÞ¸õ+>˜§ÃEc¶`WÝÇÒ-ýˆF¨›c›žÚ8mÍi‡‡@ìÁÃx:‘T´-@½crEõð…ñ, z([C›¡u¯ùäoÏÜþÊäçïZyáfÑÓ4Î ;k‚“B)ÀØnh ézÜó›7Ø¢±o»¦ð>1ÜÒÞ ¾fI~?ä*{ÝË~ȧÀ \¬ ñ²¸'n›¨«œ(fÂö1óˆ?æw©žåÜV¹D²xüwp‚\üÞˆ[¯\xIñáÞBóÌíg<~ñ§(iMûzÂÒ›Ü|µ®Y<$»Èª•à}çfº †åU¿‚2Ö“_> üÖkV¿¹µrâ¯ì¸ ƒVùPæ‹›.…IeJ ¤·ÞŸ¼‚ÿ——û«ý_.Éÿà$¡¿Ðæ}bËBèç`@†ÇT¥qDyAù‹ð,+@Ä?â[ýéeïÿ²¬r¸‘Ç?Ÿê9 f©Üjn$£õ>þý8Ø\|"x*-C˜ƒiÖúƒ §¿¥×Ï&»;WoWL%(_÷Óý³>ùû’Ÿ'ÓµøÌKÿiÐqWdiØQ+&?\.㳯ßMû©ûǾZ¢º ¼möØ·«ë½“Æ>óиw¦^·aýGhyÅd<>cÓÅ0Ã2¿H`³"e9”êìhb±Õ_Þlt±ÕDF ÄÆ‘@¬ÝlÇð‹@h¡e‚§xÐ]¯¤™@QÐA" "’If >ætÏò óqÁç·Mym×t8®ýâïVÞš¿ñ /â]öÕ„!UCf¿yÅ¿~XðäÇ÷®ØwMÚL®Þ7çþ×n/Ú™5ßÜa’ʺŸ.§æý¯˜·í*Üg?öã5½JüÉ®íÈw¯ß?eÉ—×í¾%•νðý½–\zò“‡ýèa(ž}®Áï8ÒÝš[õÝäÉo]ÑX–kúç÷½¼c>¬­•7ÔI%mÞÄ/a¹YqÙ.¯/¶ðœ·X âp¼Gfç§&¿y^ÑJQ8lmp¨•APDÎß>¡#stÕÞ‰÷}6rÁŽk€µzçUO~8ÿ‰Ís±脵WíÊÁÎo­u)B™Ž!áÍwhs?í-G9'D#RAOc4f /m›6c仜÷âp’bUyÓ2P÷Õ /ÜÑÕ%ªÜ¨×v@!^Ÿ/æ4èañ°6zÁðUÔQrÇC}ðì¾Pèp¦b‹J[TÆ$ ð¶!­Ï®DTÁÞRjîi0wÃ…<Ï4ˆ—g7cRŽ*¥ TÝqt˜Í³TÈ=D—ñªHÝ[{¿þÍcP ¿ðñsÏîYòÈGW¤Šy Дsï{çÒ RTS𘾶wUåöFx¢síåŸCLá£mFM{Ù¨OME{ùÆ}½H;컓Ç=6ç¶W'ž²Æ0¬©·?#Y©‡¯x‹ëË-¼îõ9W¼ÚªžHÛÈåèØºK?ÎÉ]ÿ{ß$Eµ¶fzrγ9A@Ä’sQ”kÉX–¸,–l )ˆ$g1ƒ(—Í;;9tOOÏLÏüžf¿ºõÕ÷ýõ«ºV}·êvmMõöôtŸ>}ΛÎû>O y_•åŸ3tß÷o¸>¯P®ºù‰-<×›„—\$Q¨•”,"÷'Ô¥~ò@¡ŽNï°‰’Ëf؇ÛŤá9ƒ—.°Ž#îÙ=¿”YâSF(:û¦`h‘ÃD·8mé~ïÙ”n:[)ÈEHñÑ›Õêty¶¿zƒq…¥yXþWšÎŽiú\ ڑļoÛ”6@'øx„ÎB&Y&àG•B¡H¾øë>ËÏ¿-bå59Œì´ àrïZ¾bÚ™æÏd6ôÆÎóRfN¿OE2N%•"Õž¿”QñoÃÅêøº¯-9<¨øüÈ$…wï±(•€,cQ!Áù—¿³`Âçï­r!&O¸2fܱ1Ž0§Ð¥d¥dξÐ7èã,òÞ9¿dR·•SzmˆÅ%Óötr¢Í½Ò …RSp¥—Aï‘0/j\%€KW ð/³¢WËùÇ1Y ­\öÃp‘4²ï—ÅNLŽ¡XñiRúʋީvC¦¡L¸rj*Àƒ.ZÔ³YiI0¼–ÒpAV%£Vþôî‹£gœ!Ï_|Ã鉘°Â¥éK‚ˆÝÌ{Ãæ¾Ù>_¡U0Ñð¸AŸvïø‰ „÷o‚FEÕ¶e¸Q–h#¥¤1«Š˜§è\ï%½ï —åŸkƒÉ¶|ÄÝy§LþǺëoâÆµ`SC¦‡ U{dëÙy˜ö bÈ–Æ …nz@.*Ò} x²êB>Žà^ãY]D–~÷ÚÌÎû#ެ’,øé=áç4y»å†]·&ç½òe"ņ ˆfÏ´¬ª,ãC€Ö¢$‰²…G?‚Øš9b뎫„+sdüó石µ[Ñú¢TÆlu N*£±¤Åfãb h!õ±[Üçx¸Qy› ©¿àWÈ%Jq‚óQd©áyøU¹$:õò3ë_šûmï™m¿,êyøy`®Ô¢’Wõ¾F™Ð%ÁÚg"âf¹i}×ï"÷ÉÀÛ°´'¿ô Ø€¦Þr~R¤¡[âF° WBGÖ»DÛ»„ý…¤¼†Ÿ/?; /6Õâñµé´Ð£¨‹‚#M²žGŸ£?ÁB„Þæý:tŽ\š!¨[qGc¯Lz8Øp&Oô«(¨‘) Ó¾z¦°ï5¥NY -¿Úƒ¤¼ˆÛÍn±½ð#,ðǸZ• ß&©H–7‘ä ØJÖn|ý`ɓʴõ9ŠÛ³X!÷ëXsJ¼Á¾þàwb\+×0q7¹X¡lÔ¢+%RßìP›sž;§Nã\èç•€…”ü“Õ×ÞÅ"nñðo¢4-æ„^¼>÷‚>_ñ*Ö`OæŸì:»ÿ±5ç.é÷ɼoßû¨Ý¦Í?MœÑéüÂS/}åêŽK1ùº®†¬D¿é "6,‹ÊЇj¨Ý¡×…¡%œ–þ‹óÇlK€ ÉeR,R(}<ßú`&¿øÜ œ3¹í^)-_u~‚ðvdä£^Û6_ÿ†ä¡GͨÕh§~Õq 9í7ÉÓ"3ކ®’Œïº]IY úVˆ «:_Õˆ”EÝnòTR“©*­*]óã ’ì˜7påšó³ßë²é“kgôÛ¿úø8¡ý>²òõݳ¼¶¨ï±õ§F M…Šå¤¨98ïZx.SÇ1Bx2qðx”èË=Í1ÖÄQ£"a¯¡eæ¤T–Þ˶ä¶IʈIÒöw(1aq†hAL¢IŠ`T×°·U ä%_~aYï+J•Oˆ¶ Ô&²éìrÜBjE9QäÒ˜Ë%áóë¯O™ý,L”¹}×Ï<9”P˜S{®&ÚWhÍcaÊc^$Ȗ߆ ´ø…}išÌ5¿=ÍDh¶àé!‡(zY½¤ÔÎq¿ã'ù·‡Ì{q‹Mór$Ìn¼2[ˆZ?øŒˆH‹\9޹_²í\z½ ý7Z*K.5–‘Mªzüñ¶Âe]D¥5¼ßfNiNã²´úœ©vUz›‰[HyÕ/=Êñ½S3Á¸wyGj¥ ùkë:}/#fÔ}ËÈš¨ øÀÆáûJÂŽF¡O„2‚§‹5¤È!C¶‚ì9³Sx² Õ‘U7‡bÄ{ÅéOÛ-Œ²ßí? ø_üî0~úô?ê£ÞkíÚËû/"Á\MbA¯“J%b?~Œo^fµ@gž #±±oýr÷n9’p›«8K´±ú ·Xlá 7{ÀÇã–'ãù¿ÏÇ÷åjZ¤vXÒæ„B-I³Y&÷ÚÊZ}Õ±'®í‚¼ÆÌT¥P\‘ˆ͉×p—­6û}¢àê`J#žƒÉ#õ+Ê)ɸ~ëyWº^l¯-­¡lÌ %iš¢ /½¿¥ÿÁêà£9§ß‡˜ÃLøtðŸåþj¬õj(…ÁU1˜ãŠdC€J6MËqU»@üš@‚ŽFÙu¿£ùðêî³mõWÈÄ™,[Š×S‹eÌTSúÿ5÷zóB[: œÕs»T±Zu\bרeá 7ßäËÓÞÐIS!ŽuIEd„šÓOžyöYaN"0ð4fH2ÜèJ§ìÏw^Ÿ¥•Ù]¥ÉÆ9­gõ?ë¬.«êUš“nK°ò‚_Ú,¹k_»e²gNïzbÍÕþbÈ`ÎcƒÎ†ª&óúǽ™à'fB‰¥ÝÿDUkºÁÌ‹ƒyƒ¾Yvb0ÒŽTåœ=CÖ¼zLAVÕ/ºÆ]1yð:£Êâc#õôvW™gï¥ÿ=ÚèA‚Þ¾]ÀA^ð²a:Z_ŸE¢q¬ƒ&d²Šà“¹/xó<³ïL‡Mè^˜nmö^›}³-oÄqwÕæ³WÝË´9¡RŽë¹:Æû?þa©pÙÊ{½“º}Îsa5`ªãÊñ=V&¤†sí½¼SÐÓ!²ºû¥1‡Z¯èxvÑ…áPäË}-•ˆÓ–€/lÔèûgSÙNÞe“šÄî[%V ð9%1_ؽ¨ïQ&T*“뇚r`˜ˆ}ÑS•#&6{:Æáªï£Íšìø‚±«–œ™2â†j¯¾\˜ÂÌålÐ{„V׃–Š …Ž8ÅPÌúâêÇÞêrdï‘õB\ß’$ Ú§uÞA‰t2ÉK¯ÓdUäõܧѹ­jºÖÇ'ÄïuS!µÄ¢&•(‡u#JbòCù}/sÂj+™×y•NcŒ1ÊúŸ«¸¡ù€ÜBßÒaJïõë¯NA ¯¼ºúãÓ¡56ÿöùÆ/ß?ÕÔÒPI‹ã|ÔåCV³),vqÉhZZj!‘˜·©¥Å輬_¥RY~*6iW?’*RÃzÍžˆ'ÞX쫪Æiúx“oª¡Ѝ=û~þúÞÉ9æ½vdù¥!†¥“Ÿ´õ¿óöŸ5àñÛ‹€ù#tÝ_«7*Z’¬¯}ÆŠ˜di£­sŸyXvC¢ÇåFÄ­…„Å÷¦B@¿Ùi̾Ó;…¦`²!X¡˜Õ“»¯ÝpnÚ¨6+(¿1R7Y5q!ýä9*¢6rõ S_c©¢ÿd”•ón½+èªtجÀ0ð€,~ëèÂÏ­ëtnê­˜´oõZµ÷ÊLLlèѹ]VÇ=†² –V¢Q[¸¶Ïéi{ÔïÔæïûŒé±içʼnhä>EoͬXþÊ¥°/Qx)˜W3Ý5å:[lÆ™7–´9•nÍ.«r(ˆÚ.1I(‘ESÆŒŸˆð ýÕ¨‰ÊɨW]é¶›RÝ^W’ŠEµl\U¥*?yÈøÙ¦éÍ"U\Š& àù¸Ï˜¢Ÿrê•·Ú¯R›ÆÊ—42Ê}’¢5BQ¥Ò¥d€)©ŽßýäÆ;BÐÆ»NŒçEq©\BèJ’L'ѨV«>??sn§‹€K Cô˜4r…¦¤Âe´fø¢4/GÎN, lýó]0(ªÓµxÆÑǪ½ žNR:K*6±£QUZþ›’È FŸK=„N†B¥ÈòÎ_Gü¢¬ÔVwk=GfõÝSôõè¹ýÖ®89 &Îâw.<9bt§…2»Qì‘f†²#^V–ByÔÅJ"òZRZt«oÑ;_ÏÚúêÒ7Ç8qDm‰lW°ÌOU$,Áí×– ¦‡v½H1ÆX4áŒ9ÖÿØJ¢¨ç€v@àÌÍç¾rV*ž¸ûâsmÙîÇe:¹ÚŒÌ:Ü y+BÁSÔÁDX¶noÞä—n@>6GV ¾˜ˆDfï»iÐ †ŽÉÁS/M9ÜVH=•Á­\lOoRZêH ˜%à÷ÑÄCP¶ÉpñÅÙÓ[/YssÁô.ùk~*ÀˆßoÌoÉàÙœ™qžH~iÒ¨QIùý‚›Cêœ<œ³¨ÿME*ÆOuKŒñGÌ}‘f™3K<%s/ϰAâ4jBFmDŸnκQñ³F¢(ø­¿0`ÏzÉŒ~»¬¢Lšà5Æý.íÏ…Ã/‘8»½1¾ãi;—´ í ªÄº(s“5Ò .® "“Zæo¼Q0¯ãÉå'úÎ}õ÷G[ÎõäÒ‹}ÿcëÂ/žzÃÝ>Ï¥áü8YûúÅi«»æ>Vp|à´Ÿ®=ó®`>u¸1) ô'¿üɆ3ï-íylþ×?yÛår‡£°´Ê|áÏŠ»v™=ƒB4S—•àyS“®L«d+suõïj$)¤‰H2ÊFPYågCaS'Ä:, xõ M0›vOûpÈÜmçV¸2+M¢ŒQígøzîËTu=2¸÷;ßÿ ¯IøƒÀ@þ"vJÉØ~ ªç´ÉʨpÒL5’pºH±Ø«Ý%‹oô+ê÷ݬýH YÑc»Û•™©m;åO—“ÚMU+’pj, Öðz »‚7)!öÏ:'XBxäÕ#oˆ&1€T¿§d¥WkÊkÊ…5Íôóý›6|>R&‡Ë Ì× òߣñ¢«oÏì|L¡ÑÅJ[ŠÞí-e"žFYœå> á¦K„ãQ žSävSe+î † |0€$N™~èúš©= ×#L:ÌœÀUô›»÷îí¯˜CäËÆ +áïÞþÖ5àsûáïîûÿÿë+½ÁñpQwܦ¡RÇ3öVÕV4×4­xR&¬Oœ-¸2I˜ç ™9à3!ñ @tº0±1‹k”†]$–rÌ!hk|e'3w.žÝü²YaQ"Ì ë—Ø~< Á‘…ÜäÜ~yš°Ho½9Ѭ7¯Íž U½1½ ƒ¬GŸè_!ŸŠdšF•;ýðË+{_š}ªËœÞS fQŸ]œ7´a˜ß¢{~Ì §µé™Â5U$-½¡ rPÏD(±fõ¥a3»™´¿MAšH< ›€8ïj¶¾‹ºg$9»Â0Œ$5ÃE¦oµ¦g #ÓþÚÆú‹‡\{£Z¹Öù¡t—Ÿ„r,’Õ}q¿“óöô^ÜñÌøÃ/nèü£^d# 7J€ ‘Szi°ÛSåVð-eJôqŽJÊe®2›¡þš÷®O_Û^0;" TÅ|Ç9æjz0Ù§ejño\̈ĢÂß*èúŬ“Cæw;K‡Y½Í¸øÄËìøí»='~zrW›”ëE{Žî !jº´« T&£¶¬üN–13â‹Ä|ܪŽ_G¥Lˆ" £`éˆAc­©)µŒ‚„BеŸ)}"Áb— Aœ7úmÏÅØGïå?bÙõ"ˆî÷y1JÓô`wagöÞãd« Y>K0E—ÿQÐîâŠßÀë^ûs5î>±ëîÜ´Öcv4…‹/¬}xe–fÌÅ(5ÇCãÔ“ÔI!×c+¯´[ušþ§Ç¿Œeš4¬ë£ÞÄ–'ž!v#ñ"‘G«4N<ÓdaÇK l q…UÅ“ ÿ2ïÛ«^>S.o«ëì‰×âÝ}ôÒ:µÅžˆ‰ê§5vÝõ…(½Ióñ K‹—š,4 }‹™°°D"ÿdß¹/l_qჽ®¡´iò^ÄÐ[X¸eÉ´ŽKR͹[oi]¯Wñë¢dñÀ‡$®›ðÂ'Œ¶BЬxeHÉi'tþDÌ’QŠŠjÖ¼ÙWI/èzŒG%0"F 2²ûG®oÆËz³Ã<½Á:uð§R¹béc@š“*eÆT3Ò¢Fj˜×ã/€Ï¬p9 ¶2Yë ¨šT¦æZ”ÿ±üêH»$ݺŽL=>Ø™†.T\%ˆB¶Ÿ_'Ì÷8™ÙîX–%kÜçMVõº`鼎{±p®RX½ž¨ElQ Ý_)züà¢T×oÁ·½'´Ûyèöˆ²ÍïÝ,}\3ÿÁÜŸØi>W]8ýµv}«òêÊu?ôÎï}®ÆW»äfÿeí¯ù“N‘À¡Ä"—Ù’‘?øÄÿO‡/H@«âÈ3†vÚömí–ŒX»àÄ4áÑdÁ¶ÁsFìäXš°ð)úo¾ýÇþW¿@0wEHscn×F½øý³¶®¾ ™QšÁŠÂ÷£··Üz§è‡fm&ˆ3¤ ö:\%ÓÔv•\]î ò`[PßGx±§Ù‹­»Ÿýþ`RË+Rl»O¾?eDÑú/f­ëzkêÆVK‡\Ò«%ϼ²°Ë'Š\ݼ#Ç«Èyj*ò¨ªú²rfƒ5YÊ&Þë$Èq( ˆªYôÊ6imE< cX"gçþølá[{ÿýn«ìqWH'Uº]5*9'ñúƒå&…D©ТÜ)ºˆ¾Úç¡(F¥”sq;”“ó¡Ö ö£è‰á°ðfRXjn-1˜•6?ëWʤÑd<ãäR…?Æd벂A¿B®s"¡ W¬! Qñq SÒÍ¿Øu{·»pc›sC”på¤ÔDfžk:û•“+OôÝ4ä'Ì@!å(šÂw÷" J™uåÌÌyÝ –_Ê/ì³%Œ.¾÷Q¤Bì¾ó|Þg—–Íé¶+ÉWž›4ãåb‰È”žþƒÇej‹4%[Zî¹¾öÒ¬w:ÍýìêŠ1ƒçï<¼rdÝÏIe:Aå¤J«W ©*M¦–¥Ð$ü8ò§Ü"&>A«&±é¬a†#ÊD\*s=ÉÊmV[éÔJ”Œ?D)d†4ݓн¸„Õ(¤ŽÌÞØê7Íÿ¾©7ª,PŠ`YÉ!*%í9vÖ+ÅaºÔbQ‰ü/”…om->ªëÌUÛ@è%T¹ ͈nèAV,uÎùº\Ûü6´â‰ß,6$mZdÿrFYžxh@¥)á*ÉÏ[oäcˆ{JÌÕˆv+ë×¶°šmÑ(Øy£ÉD0‹Z öêZ¯A‘ŠxÌvQ-}OeŽÏþvRf>-wkTɆ‡o½Ûw²"‘“ôhsEõânA+¥7J@ßg¨ò—'ch­{æs#ɺi)Ýví­·ÛlÜõÓ$A— T &ù­N¥&réÀ5Ý í’;ÈEŒF8ïK7éJNˆª^*±¼a11 ”  ±Ó¾íW4ðZ©ãÁ³†Ö>ƒçX©€OY¾úú † ÑŠfkÀ"åJ•›~@·éY$¨*]øÃÛÿåT…ü¹mÍñ _¹&ÁþcRµN+%+©ZädëJ6×uå±-Æ2À³ ™TöHÀh3^íŒ)=Egƽ°¢Æ{ü×"Á'ÃjOÆuÙ³õûÑ‚zæÉ[móÚynÖ¤.Û7^ø`nŸ}+¾~Óü£ÅII|ËÙ)†¯+>©•«¹UgÆÌ¶gõéÑcºï¼6aRÂ'æ,–ç/•5Ô½SíæØ ÄB.FlUÜ@IDATÞcȰ<|TÒ²a+÷¯$Šy®C±âÌ‹ƒóûeÃRoRGÓ”"½Oy,cR2t>?ËEñHrÕWa,ê¹ÊmêçPŸÐzJ! Ðë¯m”ì\ãs¦æÂ(¤ŸÔ”¦Xm ¿ª,!™s²ãâîg^ê%t ’àKÈì–{V]Øî˜F¡ð‰j\þÇŠN;æ^›×¿8É+,É ×¹Ô?€Åç]Ù)ØOóöøoîàíåA¡«"•|ÅÆS;;”!‹[¸ïäS­—¼Z‘ú»ãöÌqΚ'µvY†(" …ùLKvåãªâƒ KÞk»ë“ßWc .nñÖµ?ü6§ü%üH‚ †jc›?¼«1D¿rÓ›czÚð|4jOLo-÷=>ýý€@W%e )A— F¶Œï›¿åxŒNÓ%ÓQ¥b¢R7tsIIyËf/ÏÚ9pý ÃŽÝÑ>H‡iµXžeÖQû@•R µ[¾h=UV5–£C1T}¯¶Ô"IÓIGìañwïoÖÑžX•á[ûsŠÜŽD#*­ Ô:á˜?æÕrƒ«¢ 9°ØìóúäuC#¼Ivg¯û|"©IK< E¡Ÿ7µa¢La×›sî´¥BÆJ…ÿI}MV½„…Z!jH[7]½äùCz3€á-°š©òlä}íÇž:·cꀲ R$SÕ'ã|ôèžL•4X”aÐ(Y„—2·×îêGåáÓ"¥µËþÊM³ÙÚŒ¼—Ž"‰è…ÔvÞ¿ŸT€B*spqÚ ±›š>èèš«¯-èøå¢‹ï~Ü÷4Cb(òsœXj^(LÏáµÒA¾¸ÏM90!ÖeÕSÏ·œßé¼É¦¶­;É|z!›{ßøèÛvè®gÞ¹ÿÇÍ9'†|:¢¤¤ª|Ùù×µÇ¹ÏØÞ?ü~<šÅ6ß(Yµà…ã.48RŒF¥#-Õ”†¶Å”w †¯×íѧjp…âϧ"”ºætÿÏÖKm@q¢*wu³ì&C7ô]ñÎgh-`Y¶g :sÍ«*6^NÌÃqǵ#ŽáoŸðóò'rµ8]” ëâ.— äxÄtÅäó³+p…÷ŸÝa9×^ïƒW0·Ç~ i£< ÒPˆ D§’Wæ`þáex¡³›_Õ‘æåîhþÍQøJXŸR‘‘ÆtlÒyü§£Vwýjæ‰áx6’(ºÒwy¯_‘ *`íâ+N~8³CáªksÐ' »_þò×ZÔGqÉæõ›<¹DÅ>VR ®¾&ÄÑ9_÷þvÒ®VyÃN¤Ô3/;Ðs`^z™zR—b£Æºtø7y¤&\ñÉ•™ƒ»Oh弽ϯVÞ,‰¢¶Ñ¶¦ï9ÂØvv½)5¶YSc>V•d7_ß$vøC³ÿÍ7ôжeШ®d‰,²aIáäˆ4êæÜS;|#NNþ¨XÛXâ¤ìûu½ y»í^}óÌçü^G…Y­%-^l„a )¯I¥È¸©Ã/†ÖK5;Ýz>ÖvI³/U¤‹""‰4ôįÇ̸hÇEBIKF Û(æÃRØ‘ÓáÞs:è|ã)m>7Æ­ Ï´´7~R{_¡óÛÞJÓ7K²`K$t|˜5n¤VUºªÚGéÒLÔ¦[Tã÷~pX‰Ÿp„¼z)pw©¦Æ†˜È€¬› üÌŒÌ7wÔƒh(ìv• ±È:_š&õ!ý M‘n ä‹‚ù¸*ZYßP|«ýMrŸYÜîb}CÃDÀÇ‹#pyôR[c´&½;â©@2ªÃcÊ,­‚ji«}áÄn|u4ZÞ³,UݦÆùý†!_†ÿÊ>{fŸ®­­'ÍV(È–vgÅâˆ2Ølmçû¶D³‰Í?Ûôý;FUZ8F¹¦ÏϦm(ä'q.U•ZÉToìª V[U¹r^ïu‡E$’cJÇSä·=‚¼jˆ]ÆZ¤¿( ÚŠ~ZÓ€ü xì‹LoQ˜ ¡ü™Ìn*GWŠI™üêF½N+„u„ )—œn°é z…ŒqTÝ]ÑŽm*m­äR™Ÿ ¾¢k=»ÕbÍUÆ2hðAáD¨NtvjÆ;[Cƒmÿôƒ:/Ü¡ÌHm¸¬Ó–*Gí–kx¹†¥Ìæ*<Õ£eeÏS2ubê‰ Ú|>÷XÇ~·¨lËÏöÆhÙ;M¼¨ý—°P[’…l)zzƒ’TC”³è3“˜X'\&ªr̺“ÂðSÛ÷Â&*Ã(w #ñLäÇñ$Ë`‡~]÷åØmí¯‹ü\¸²v”¡ŽGE€ˆä·?(è6œYÕ[ÿ˜‹EÏŸ2Ë `V4ÅSC5qÑfefV×äJíÄýÍ·¾õÈhŽL9ÒSçÈ3’–ó!>)FH9tùb©êl/Ñ+³°Ð0«Õe™N¶ôI;4ï™´55H2öY‰ŒR38bSˆüˆÞÂñÂÝÃ’ÒŠ?Šúøãz›ÛQal¼¶óeILÝ@ž¾ùå_ã’HR™P„¬R­|E—³<ÇÌ?=D^¢ÎëV½x²¦Æ™nÏL 7ys~³“ˆhoéwËèÏZöì%oÂݻوrGé¬&_ `ɉ¡«úÕ™¿˜qªLÍ wÊÊ¿~ŸÒµšÛÿHR–³jêaGI5êýDneIÝÕñA­ê±F`µ{È/`´ƒf£ÔUY‰+²„zJŒ"XœPER’žn¥P1ƒ H8¦víå)X5yÄ=MÈÕ‹i2æ•© ³³*ÿЦØúSQQ•Žžúá§ëξ+ôƒœ¨ÝÙË4t¼j”àXD ‰Å–‚îâãàóR‰9w­¢úž×b@xÅ‹¿×ìK5—™jÉzpß™)·'EÉ0¤¤ û‘üãC0œdIS€K8—&heYuEÈÃÇþ”—6­?9ŸÐ³wÿæûâ•é·D¬‹Ý¥±•-·ÚËæª_¨ ²ZIN•³º±6GÆKޝœ8£~› Ë ÿÆÛBÐÿâ—³ÔÞ&5ÂŽw{ÎÛ½kùŠþWæì„á2±ÿ›~–÷ , "ÓÚnZ{w"$ÔØ—¶Û”鞀W,ŽKŬEb”Tk¬Òìhó ðhœV€£Tdq%ë‡Iͤ˩k‡<íòû3úå­¾¾ #økü^îÈ•ycº¬±:”*ò´È–½t;{@ç#lªV”¦)¥™™µ…A+‘»I es ªÌAÛ­Yz¹ÔïòÔçT2‰¤4^’¦Ês:ïfdNNðh“Ilˆ‡+EZŸ‹‚lLbB¡F¿T”Te,?úúÚþ—Pžáò8MÒ”H,¨gGÈõR¹H d|3À¯/cY6ÆÇ CœþDå2 Ð#&­Nœf8Æ‘iÈôù}‰´8 .•1…E¡sÁx"hÖ™Gl¿yðÏ2¬Ô±2µ21$˜zɨ)U÷G((‘,Ê Ó4ˆq2Ãí{ 3vÙ+[5¢Ü­6*Òàÿ©$(Wö„É r¨œþÝž…"܉teËËG¤ Y¨Ö*Øég_ÝÑù2R‚b¿L§S«–ªÔqe<P>"Oºã ¤¬ÄIˆç@!Þ„‡e /0 ÑRˆ&y™·ÊœªtªE '®•`Í•žv®áÂß1©ß *1s¤ÖfTæPܽPÌ›ÝÂxëÑ…6-CÛÌw?Þ„jžäAà„*¥ÇjTHª—Z]!µVš!ýªR8©È¬²Ø`çãƒöTªÏ+QÔ]M¾Ëÿiäªþ_©ˆÕ]˧Kê—ÅÏæ¦7ðúžDQ¼¤”ɵ*3ë“IbéájuŒùt‰ReR(2 Õ4˜¥D‰´ŠX ÕN>E–eK”$©&ð&é»ä±*CLº.¦öÅd@Fg^ ‡°½*Ì’Âããÿ1xº†hô~{JuCMÒbeT%«b"TÚĈ”*åSËúB=ü£ãÜÝÇV Q4ຈËã*EÕ·Éåñó`è²Ê@Œ`¬xä¿J¤¼ÚÄ£°1üL{Ç#6ÃÐ(¢ÂlB-Òjô¶Ç~—J”, ˆ‡ï<77#ÚÔþÇó¶Xz–XïK<Œ‘j‘ŠŠ]H*ª’Õðf––9v[²¬Ãy_¼µfàoI:ªâe$†¢]¸±RôÀ™ej©/êFà£~FNEeeŠ"KE´Ø«×B~š$‘ž(Ñ© €â{¢ñöþÀK’æ`ÙæHØHò">™äg^踵﹠OjS4a#Љi,Ú'Þ?y³Ý•nHª’à‚dS‡K)ÉLŠÈjdÈ•4IÔþxD)KcâáÉg[|2ø;1vm‚ L;½¦–«2és±¡UÄÜNOý¬z„çjjëÔibÒ˜AF³‘¯ñÞÝþ븼Á+—íâj“[]0(?É>¹çâXöš‰°,­¢4´Ÿ¼ýó ùÝQQ%QO%#v­6é Š¸ø”ê‘PäSº¾þÔ(.“Ú/ožè¨EºácúdTF‰Ëy0ë•J%Äs" bUQZÖóOöüº\p ¢÷“ÄÈ`‰Ùל™ ¸`ÔF'ó¥Š6y4?weKIX}¾óì?ý Õ¼)NÂ2 Ïűxì‹Ë«Œ·×þ\3Ò @þKðÿu¹¿5ýQC*Çãq¹\>,…B‰Al‡dP}È0‡ø„ÈV©0=„ Ì$ÿÜǯŒFŒb u: ¶ƒ–@ ¡K¡+qׇ[Vwÿþó|ìc%n]wÈõðÐíiµZ´?Acð‰ÆÔ]ªî'ÿý3&2YÐ=Ü;·ðå‹O)êúÍ,@A)ÉÔ§­»´–ˆ!µU$.ç=døà%oŽ4¡˜è$I)dG÷š³çh!£±ÞÚqy¯`›Ódä€%Ž-ø°ëgÛ¿3îµù[O/ÅÁ #§_XÿZŸ‰V­÷˜Ÿa›É=K´I¥ø‰¸rѹ!…ï}1gß…Ý¿.Õ#g‰I¯Q…85›®Ž¿ÄTýf­·pøEÈ5 NuŒJYÞô6©]v³çº!ǽNIDÑ7Í/±ªÝ}¹&1é—¦Bä'Lô»à¿Kr”§ŸÌX=ìן·’¡h²¶Óuoˆ«BS~l‹n™Õu»>ÞÀiZ*oÖÄ^âøóVPÄúl\yÀmTe Äœ!KЬΣ0ôFnit g°ÇÂâÊFA–®´>ˆ0aÀ0ê•F. FÜ5γȶ•J'Á'$á¹G: ¸-AŒ—ÊE¼+ëð˜ñ$…ªY0·”‹,úe°¶U&JÙˆGêó­VÉÝÒåû9—7|;aúsßX©T‹´7HšÂ©<~Ãi:镈ZnÜ?ªhÄw³¶uØ7ìî›ûš­uHœ”Ì<7ø“!7ÜžêÙ†mêû$Fâ4çL[ZIÑçÀÈç÷¨ðš)ecµÉþ¨4P«Œ)¿È±æû‚ù%…íŠ5±,êg«g2Wx½£Eîc5º$Ë=šq±ïªî'D2zÆÕ‘]fHeóŽLÙÐÍ©âõ’äãd´ÕÓQh}’"5*ÿ KBmIÑ-Ù;lgïÏ$C’6î×ÓØPHw‰Mì¬_}½ÇÌýuRÕȈ¾s}áªs×öÔ5æýv+>>1Fá²A#>¿B¤æuŠ%;Î}Â]ÚV=îìÜÖmÁzê%O¨â"]m"!1d{WÞh%º£Ù/2.åzöÚçÖÌèµT½À_ ºB©)VŸÏƒòpPt€E!!æVž'˜¡ùÀKFW T? r2¥w¦6WÃäÆY­N£˜t¥É²~G¨hKŸ—«'Uñ± OÍ‹b" 7”òqGsç >\øÛÐwú/ýìÂüWÛ/5ËÙ'–Íè|X£´,:ܾÚÒ玚u)aŸØ¥Iq5Íy´©ÙN1^já×].ÄÏÑôIˆÌ|aõ…nóG,[º'oÚÀ=)fbZ7ãx£¢Q·#ìCµ8®ãa.5˜¤#D Je'æä¸SÝóG‚½"K“‡]4Rå*vÖ¥>ó®)s–mZ0õfu•£­7Ý¬Î¾ÃøÀÉÊåŸïs|c¯ãlœš}±ïœ¡'^ʳV#)å÷Âþ{S¶!nÖU™S¨ŒÇ ¥‚HôNò¼«Ñcou_º÷§ù‚Mÿ;)ú LQ[Sd•~§Ež‰}güÁÒë„Õ†öÅq´¡ÎÀ-êÎÄÁÿ½¡Š¿w1âœþêj,ëuH¦1,ø ÎCŧN¢J†ë§´äð· „l@‚u«(ð–!•>®‰×"“ rB éZàè¹;Ìe ñPNá ‘ŸY‘ñå™MŸŸ/Üru¦—JäÊÑçÚ”#ŸD¬„„õTrK|1çä•eMßOçb’Ù—'%UT„òRÒ v/ù x‰erq HOì°sÊàÕcÍÅr¥D!ܺÆéµZlHðyò{5æ9•D–B]dL×é“ûm\òY·úV„ñb…=nãM­tq݈“ùÝ÷3qÞ¤q†jfôY9eè"&Ææê•gÚÕWîYr¹¿ŸøÆ^hƒRÕÍ׆j’:dŸI¸Äâ¯zË• ‡¿B£7œ Ÿ9EŸv½ Xÿ…ÇzhM’…—•:ŸD†Xì™*b† bœl~ëN˜pšÐh™T׋Ç€ò(#ò®ª2Rc±Ú…þ„„„ n IÕ›¢l`zçS€`b>!@g͹ØföåÎ:£•ã¡H¸èôì-×V"dÚ{{ø~^öÞÎÚüú¥"Õb©WØñ4´ŸX¤ÚÔ÷f-磤b©,^éx°æ‡îÑ„¯Ä;5Mx N\í%ôRâTy$¹"U™T1Sº­Äçv_ÆŠDl<œi·cmŸ÷ÓÍF>ì,R,‘k´“ÛïÊ5b‰^‘ÅÒÊëø¹<\áE0êƒ T‚–A†GS<ô=)úqDFCë’ãà ‡ž s5ò?Mˆà6cnPö”ŒYg^E42Ç‚JYòÑpÄ@Bç®ì·SP ©†g³ ÿDD¬¤ŒygGé$(÷ qI—Õ ” áœG&‰ä o’¦‘A£b˜ ÆïÀ¨XÔ;µ’¨-šgqM$gó•)y_ 5ÈÓ*J|gßÊ?;:!¡Vœ|×Eí0™Øw†è¨—ƽ÷ÂGè÷^YƒS/^0_¯´Î9ýŠÝbðÒuâçÑhЬ”'(Ž’ŠxEX¤ˆDyo„ñ®ë}Ë,3ávÑPHXQ€"[`®ªzˆoʰkþµA6Œ jÁ8ñÜдôú¶÷TÊ4)6ðÚrH4Ûuθnù”zÄž©½W–DýødJº`ÐMäŠDBe—R«|nèÌÅq™Ae‹3¹ÜÂq2ÌÎXêÍÚŠšJGeÁõnR-ÌJÎ+"î0Vò÷´•©p@YÃÔ€HX¬BÜœZÔã«%=¶ƒž@£3Žo{ ðLßM'{l¹ÚŠã|³v½®¢¬Û޾[xrHŒˆ?ͪ7ëy:Ï=;ˆ @ùCvÉÁSš ýM(šöSuy©ÌÉðÜ‘úÚ Ô1ûüNžŽ¯è{zNç¢ñ­g ÞKt²Rî¢jJI­Ö˜‚ä ™DiÓv\ zÍÑëŽlXu|9…—Ê6ÿÊ0ø>y=WLì”7oàÞ*×Ãñ}¦Nÿ°`h×qHB§AÑ(O ‘p£ä)Éûë»Ag-|ÿl5)ãå̬ Ï"e2e8ôoØ=¿ ú½6½¾ðäk˾ŸåíÚ€G,R˜4öÏä]ë‘ÿí`*Av^˜ºñÌ»!QMR´¨•~R“ø]ê¾ëª }}°,åÿö‹Àuf fÐÿ•­ÎéDk _ëÜP¨=ü M i ½] Ý À<„â¬S·ÐµøaŽÄñh4Z§8áõâ_h€ â·8 ÿâLèà:G¶NµãŽãv8Ž‹c[+ _Ùãñà:¸;N†ê­s¯ëÎùߟ`o?}m§U–±æèŒ¹Ý¶å6ã…ã« …tƒ hD°)©B\õЙžcƒË…A„…ÁH’—^y^°U¿£ ^‚ IäÀ™EB{T%˜iûoï=èS¿aX§iFi&WAš˜"ñÅ=ÎÉÕ@9¢O4N:±JF23^ä¬4UM$XxáÙÕ¯žÐJ,àçÓh’ F9²¹^Ú¢scp»‚Æ×!€qq¸˜XHÓÊ êÙÂv× Ä”L€Áô‹4q&Šˆæt*niêð$ü§Ê` Ó‘’ °äòHjŠå÷Ç?7iXÿ*sÉšmóZȦ¹7ÛÎíµÙ–«d2À® $Öô(ùQ9”0ÊåÅ#óÈ‹6Q¼B6{Ð…# Å m<ËäÜ ߪŸ=Íj[÷_å?Z-f¥ÂGUˆŽéšty³G§kõ,°(øÁªH€ (7€ëÛ`(ußÕmkÑõq0bаZ芚†:kƒŒæýññ `Ú³ÒGhÛ¼¶§Õâla¦fF‚71 @`y5kF¶¥ÎÙŽ£% ¢© mªŒŽ·ÜØßá .ÿyÀŠ¡£ÃâR‰ÌËH¸ºÄl4Ät-¸xöÅ—Þ±”2q{Ï/™Ù}®–ЧãÈ%Æ" â!ɘËSnÖ©MÂ͸Mjh²ú ™–‹(–þÎ2kÔ ã «QËûBÎÜ”t6è€bƒW‹¥Z-V6I™ûΔ—î¦Ë3ãT™Ý`ñÆjÅà}‹ƒÌVé¼'sjâ¸Su«€s ŠH¡¾Q°õ‹|›Þ>©ó%«M3çÅsK§ß¤1­’H+"~ÿì—?¦b s$]ÕFy5˜!‚±@$VMº ªä¢îû½I2¤8¦¢)ªË¼õËÚü‚¢îæ¶–R‰³ ý © äÏ^˜  C$Z ' ’0‘ëgðXr—ï%Y%€£¡V{ÛЇ5ž‡×û.ízʦËZxî9Ÿÿò1›Úêä_y¯²¸ýE,ñ(p­\‹#VƒÉ©"ûïÍÎïº;ZD(ÅÄÅ2õR›xxñä÷Ê'X<z‚É¥Pбl–šjjj…±P¶$Sôâõă—¶-ÑFrä²JVäac2›­A"jAZò“š_Öõ=Œ¡Ð(ŠƒZÅ$t~:´¶ë…–é¯à"MsšTz* ‡æœéZ4ìs¬žrŽxº%E¸>ž]ÊHˆ!J*ÁåŒÏîu^®P'Å÷ÌÖ4w%£W¤OíºWªæ‹½­Ã‚ŠÔ³·E«V¸BD–°— IÅ#úbUKáF‰«Ö­T«Ð—,õfíyqù+?ˆ£<ÛÅÑþ3[’á@š8EÄFU– OtÁ5§uûtí_ï¢s`Mj÷Å ÆNúî¨Î‘ƒaÿTþEY•<Š~"—§æ&ì©PZÒwï‚Ö ƒn'–Ýk‰bXGnW:lZÕú3ƒÐÛÎP|íEäõæ3÷ß0´çtÞ@Œ]Ô™žJÝY¾pð]1KÙ%ä0óÚŸõ…}õt&½,…Jhœt˜‘³K{ŸŸ¼{ªÁk…s‘w>úèÙÕ²'©bðÑR;‚ô´Ü$“˜Jhñ¿ñö. E mçv»- Vë-”%ôbeeeFF:çÈd2(ËºŽ¯ûûУP½ÐÐu¿Â‘:ozÚ§á'P¢ø-NÃ'N¨ /ã‡øG Ñq4qÝõ±ƒ¯p¤®%N§Óf³ACC1×Ýý|&I>„Y™2êåùj“.VO–HV²‡«R¥†7_ˆ…Æíˆýt¤ ªÐ÷§ø+þª¸5T­‚8 =>%h… dƒºÅAp“ªbÒÆ$B-,“§‚3pv‡¡ÚP½ìf5µ~&–l^¯óÄ­Ä:Vª¬Æçprn^Q›8†©Ê0«—™†‰QØÿxœ÷Ï?÷Öò7/Ì;ÚG¹c.!w„5µÏ6p¤Ç<)§LÑZD œ5É´«í 5Æõ(°[²|{[ïk:ÊPrYíi^&ÆÄ\‹ovE 7tù¾6^¶üáÓI ˆf£ñ2òv¯YD¢°„4¢ˆÖ.kÍU)¤1p|’Ê)WºCÿá‘ñ EÓnÌZÓnz÷#izµËQYøðݼޛ—}ñzkÚ •²dj†|ÂîÁBô2JÖ½ê¨ 5ÎnÆ•¾¿ê™Ó):ÛÇ”"•H*öFR¥³bù¨‚=0ÙôAMŠêKƒYâ ‚å«Aîý½õ°EmÉÒdÍûlÌ¢—.Z M¼(4UWù7}êÛšz±Ï¾ÞKbrH¢ #V¹e„0v“<™ˆ1`º¯R²ÓT|›=¹Ýâ 7.ðMÞñÁÐ_ÿtÒ¡w'·:Eì܆[ƒ…׌!!ÓÛm^sæ#áa!:Â$ïÙ=­Ä/"íQìAˆ€Ö5i0Xæìíyßá6°Š´:%ƒe€Ð麷Ýè–ßùGR¯{ÊÁ¦Îm$ÿÆ@\pzËÂ5—ç@8­øuˆ HÊõkco±hÊÝ(]Üh×N:Ò«àÕ]ù'Þ†À9rýª½SÆõ[³õÂt¨äåý¿6RiOjEç‡à©v)r_FÙ¬C=Võº0óËnkºŸËMmüCÍŸveÚ´oŸ]ÿæ•)O“VöúK™bÞȳTÐÎÔ:ªÚ6ot¸T"õSÁ¯ºÞsVÏ3E7{ o›œÌ±­èó?ì±­K+²Û­+~mÞÈ$ktëÉlƒôÃÉÞ€'5ÃHÓAµRã÷úé6‰QZ5ÇRdÑ\äËÕê~˜u¬Pò‹Ì,ô° –K7Ÿ¿ªßm(™’"% ÆÅÙ´Öd˜5M#ãÅüÜK]— =‘wªÿÄö«2ÒSž Z®woZd`(SJš/_ú‹ &ÑÚüÞ']5$Ô5ïDsôLQ‡ŸÀÕÍsœ^¢}çÒÌ:ç.¬Ê¤TîµiŠí×Ï,ÌÙT²8bÆR£/Îú¤~,P/ØÖ`âKÙ‹– {’mNû™e—»BO¯pŒ^ÓηÀÈY<ú³+l¡²«.<Ó3ö†1_H'±,ÄÄ:*÷ˆ ̼“Lï|`ÍÅ‘xõËzì.«¶T·12z† ;™mÌùÃ÷»†XTš´'ôCNZUøÖÜüpæ ]`hþâ§ùcº&—é[Ýkæ#EºÒªd‚Nk*>¦¤&ÖÑ"ÛÒšvV0äq$õÏ5—ça¬Žë<ëÃ¥9ÉPòí·¦pŒÁ¨Ì®|TÛ(·1í÷H༈Të¨Ó‰3z¬\}b6¬«½7%µ·´ž14Nk[µ{”@’j˜#Õ*Ú”fåªC2Za’dS1e¥ý–(O$9FîXóódAÓ³äýç§}|xíú®G¦ü8dÔk“6Ý*àíaÏÄÄf©íéú?à¹Áïªóý  ê¼5üû/¹åÿ9üϧBlNpšüçAìüSÝB¹â[t ö?UuÓ\çã8ÔvÞÅ'þ­»® ¿?ĆÅuê‚Ûÿý.ØÇ·PØu:J½N%ãxyyyVVÖÿ8ùŸÿ&‘âAÀÜe¦Éá-'}uqãèV{5 ³É¬®ðþAkÅMUøv/Ñn•–HžÌá­W¸«`Z jU,˜¤¼NxÕ@Ç¡bÂÑ5v ¼S°BÖ0äí›vžøá€O5RÉê£ñ«9}¿–€E@a¹Mlº+}¬·š‰^åŠ=‰›Ê¶Ì„ W¸Ëò.ÛøXbÁÙñoöÊßw·±î¤Ø%⬂qÅYK–¾~Tù绲ÿyOÈgÍ¡‚P:( ´`•WAä‘Y(P•ñ¬8Wz]>·]]Ÿ5zÝ*èHäÇbƒkW7Ã5÷ nÊÔçÖX"Íò¾é³½ÏýpÜ=õj{!—Ï/'ïvšÿéÕ¥hÞ‡ ¶]ÉÇÎܾKØú!—•®ˆG %qN€2›ÛþS2sî®ÅcöM8úæÒ——(b 0ÿfžî¶ñíߘxÜ @µ«&ó»-Ý5k~ßcI– ðƒ(H‘ªÜ_âD HÒQ#Ë.:ëÊK ^=°äöH¢òk ¨”À/—$;L†/ûÜ£W)‘ȤS"&ï ú²ì k]œ?Q­1‚ü8ÆÅä.§_®Èžo彰ב˜.´â»Ñ£¼Ÿ¥²K†Äsᤌ¶…°{èL^^‡/Tîl“ØÄ§Öøù=`µY±(aŽ1V°Øi%ZJ]V¸m³©ªœ¾F]Å"eeÕ‘8‹³Óë»”ƒ¶Õõêö)g>ØÒ /®¸×gÄkNµN>2_­ïv3C×àv¶U’6?ŒpJÆ6kÜìÏ_K6?{É+'…j¥pD)VH’z>©d¥Uj°¿ˆÚ©¨,>ñæî¡wãA \&î(`6ÊÙd0ö"‘ò\kïÆ rd:õ›U§—ðâGÛon€;4dëëßÛüòìî'W^í;¿ïI.A#©ØËx 6‹~!ŸVdÓX#h€ZY*³bp“Oµ™Ý~¿Íh›ŽìŒ+ò ¶!*ºèçÑïw\` ©¬±,Y(‡M°\ꯪšÑ&¶Y 9–ü£õŒF¶ÆÑ@ ²ü| ü5ÖÅ®=3vq§3²„‚áh‹Á`=\<ŠHÐνn§Ùdð¹f“1ê–ϹØc]Ï_yMxÆ¥Bœ¹V š7ê{ûVU³Ôˆ?ŽB&&X«Q‘$­×7¨ì”,…] ÔØx°Ây»QÎsgÊÜÓ "A¬ LVv8x1MV­ÿ…ùášÓËÆ7ø8Sö\ªÔdɶW™&üÝP?VUë´i[óœª(ÛQ]…7…(Y\j1e?ÍÐ9âOb"*'½MÀ锯ÊÄ´¿ajÓšªZ¥ª3vP”+Ì; (FQ­KŠ#ºœ’‡ÎŸä  £#£u °½8òvßOú”·AÂÝý°Ù&õØs«àÇ3úíY}jô¤6?nüþ¥9ý>)¼þžàÃl¾´èËù c»‰cÔ¶óÓA¡ämW¿5CxÝJòA¯‚¸GdlwUeŠÅl ½ì¡+•Ö ,)NÔbHªÔhÁ¦Sè‚Jç¼óœ‡8šÞæÛqãp8™¯á¸®«¶ž‰7Îùãr½dòÐIömÄ9Û‡}ñÁž7¦Íž»öü –‡®FlJˆþ­Ûߪ€ÿˉü[àÿ÷âu˺ýõWÊ„îôz½ˆã:ëÖ­kݺ5z¤]»v·nÝ‚R¬‹ÃåݸqcË–-¡PÛ¶mûÃ?`á¶î+Ù±cG«V­ð«.]ºüüóÏuQë:oxçÎ5ÂþsÏ=wèС6ÉA¨_¿>®Ó¯_?\ͨ[i†ö­3‚þyòß ÐXž+5Êp€ýêöFÌÞÌçõ[nô]º¿SýYŸž[üñ—›œ8BdBcµŒöOum®v‚§PJá‡|c0ˆ ü¿üÞX»X¦Sʨ£X”(bxabâ„B#qð©!nw¥I£Z~z`=sª<‘XóÛÈŠÀŸ ÷´÷²å[Ïæa¸#ICÈ´L½ÝP Ùwç)Ö½¡ö½',vD¡'Íä‘ã N±pÒŠ®Ÿ¹o(OC¹è§íkf0§I1’1¶2¡Qic¬híOïa%µ:ôBE'°†-ˆfÖ/ªýŽ0“i"S‰èdÍú7/:EOÊ¢7º8¥ÿÚqýòǽ¼Ð®2 = !ž¤W°ÄdÅï/9Þ• ¥åe³ õ…èÇ'îòªM6¨~vò ª]­2šä:¹;^ ‡H#C…JÝ¥þ°G˜¤È!Û®€ý¬SIÄ“7‚"wÐŽ[_9¤Yº¦(L5ê!¿C+{_Ph#‚5À> ±ñra_DŽ]Úpæ—u…¿t0eÆj"bŸ\­ª½gKQ*Å2ž“ÄXùâCCs2UYEÞæBºýN->7„BêL„|¾û㟦ˆë§è2Öyuçé·ÝÌPòíinbÜñ¦Õñ²ù ›Šzg5ýlïï6—ÃHO;ûŒ±^xÖÕÁþz§Ä¹Ô“èýÖú*‘†/u— ì?öõÁÓæO)žõöú»¥Íé½5¯óÚ°Â/6$ËOÙŠþ‡?ô8XÞ(å™%W»®;ûA}s³â ùUwkmòŒ ½¯Ú”™±0Rä‘ ‡ìf!•W€CáE6IŠ5Å bÊmïßa‚a[8ô© )?yË9':¸@ÁäON=Þ'd)ƒÎ%í]uQHs•™¢è1ÈÐÝ—=.ÿ£`ØI³E—×ý¸Å‚ shÝ¥ÑZ±bÉ7™Š˜U•™kl\K{¤ U4$m¬n…Z¯(\Ùÿ¢^m`BþÝoÞØðÎÉÉ/l°d]qûñ‘%+ÎÏîZR–œy¶ûüÃcÖË—…„\a=ÛC^iÔ7oØ‚£T׸>þy–}²öæXŒ¥…—{©4z•DoÒù-¶ºxÔ£6ÉÏøºÇ²ïÞ € Z![ÕõL <×û½òß<cfÿ£%÷‚×4Ù\•èAXp+±ê«6ˆ 9 µ`{é%yWºOÚÝRgZT–É;Ú%}–âY>a¸ï®œím5…«Ø„KüØyoó÷o¯9» ÓkËoïcÝ:QkH21U Ëú(*’`êŽïW,Ý5<œ¤•=Ê»ÐkÞµD•Œ¾Öë.¸ÜžŽDUfSï=(ûì½pU×Û7ºŽ»ŸR¤”Òâšàî.Å)î A’„` îNq-î)ÒR¨á=Éq÷s+ç½ÿyï÷ÊÌw¿Þ™¾3wsØÙ²öÞKžßãÇÌò[˜QDKJålÙ/Ž'D`•½”ùVËüìaûPñùûýÝÖæ/[uyÖŽ[‹øN¥,ð±ö ƒœUë(ŒäG0ý<—ž­bUÊCUÓ¹óÅ*æ¬æ?py¬©½Sg\6±Ë’œÍ)¡Næ–i8%òäë0Ę^©‹.±hI¼ãæò=ù‹+Þ:ë†730]:ÀÆ4Ifìí‡Ú›¨—… KtrˆÜ:®ÉÕ¡éÛÄQúùR&¸Œ¯H"7˜*±yj$ì$ÞOŒiV‡ý…:,j–‰VÑf™ò; ¨ÿ=íýŸ¸O…®Ô¹ùôéÓAƒ€!Ëí”Jý‘Í›7'''oÙ²eøðáË—/oÓ¦ÍÇ«W¯½ñáÇ׮];a„«W¯8p@[VV¼ŒŒŒ0ã®­[·¶nÝúàÁƒMš4ùôéStt4àÀ¼dÉ’ÜÜܦM›^¿~}È!oß¾Õh4€óK—.9rÓ¦M]»vÅs»uë†gA"‡6û_ÒöÚorq5Pk½Í)ŒFí7:½žžCR{¬ÿ7‰µ$?’8ö8²¹V™RГÒ=ÈÀLpЄ%ÇlFrc7-NO”¢âHÑÀ*QS>=û з’p”SJÞ†òºßiž3ð˜ÉakS¯í»_K¶÷zÎGu`¬rD¡Øõ3¢6?ÄÚOHupb–…È¢ã¿ï–mTWœÈ_oÕÏT$€‚jlÔ èÞa´ØTdËj·5?®Ò7êóFÁ`Ú<â½ÏkÕÙµ¹$`²;Ø ›Å7­×)·tçKšwzÂÀ¥»N­â#W#$Gð×ÂÈ€'8ºå)J½Q(d¦%¯ F„…Ï8Ü~Êft¿Æ›×of}½“ÎѨ¿Äg.è¸ÂiåDËšªBŸ/*£ße‡Ó³²ÿ‘ ×n³Â5¥†Û&šßò…JÞ{IË|6©Ÿv£éê1Þ|ü}åÐsåz7zØ(å¢Î[øL¿L¡\Ùþd¤&þmé³-Ã*6¯Œ'Ý=â9ÇÉÝ.U¿¯ø}Ùƒþ”Ií—Se&–Å*3ƱŠO¢œ˜ÌÛ›¸fÊ1‡±” ,¿;6+ñ¶~|~ù¢=σLn.¬[¡/_ßù@+:«ÎOb¾Âï1²üÑVHE!þ ÒUäÞ™A©a€8wäÿ£ÚTÌ~Ö]ÙˆÛ‰rQ±ì¢ž…Ǻ_GãÞï¾^´êäêE}ί¾ÐÇW¯=5O`d dfÒªlÔÌñ“”ÁÙ’¡Uþ>i}ZŸ3"¨Wÿ}ñËe‰^i@Îá+rÛ]‘³b¨Ò¿pp q¡ó³€/0»L᜘{žŠH©º´ü³(J¡ÓÀg©ù1V—¯ñFlÛªAwL6Ó¶'£×´}´®Óƒæê¯è{´¢0uôÈÖÇßÊ ÔžÞt~ MB5QÂ_Zsx¤Ô4”3þZu£*µ¯‘T«ÎLiq®·ŽÁh°8ÌÕ˜ÕÙž…¸lv»J îIŸA…TjÓ›G¯i¹uú¹,Ü€ _°r²aòõÒ׺êÒ/>Û>çõ~ûË,O\NÓÇ| Ùý|vdn¯A‰5RZKˆÁrÌnŸÂ÷h¤Áú,—¡xS[«‡ä’(“Õ´²õS…Æ'‚[”NÈ.âÓZÉçÝP aŒPJËCýºÎp×^²%ñ©¥@gv‘Y÷:oi÷ û´¦Âm4zýæI¯Î–þŸŽÇéò˜\þÆ5{­J¼êq85Œ0![ʂƗLl±9L£æ1yÈåâ{ÔÏöDDÊ\H£sSNJ6aY­m/¯¸Þ]]]Vö©E80?WÞø–"‹,ëôKÚ‘/v$šù^ì)>_pû‡æ3ÚoÜôræÒ›W]™ŽY‘Òå×Ï \¬ÐÓ,`*¦jî—{Å2EW=ìf‹µ/™\ÁUÍb½NÁÑÌnvœ­¡J&•4š˜¿oë…1xèÌNy@ÊÙn"ä.Ö%ç®ôF§´ÒBU¬‘À°/ƒg”œŽ²¤É¢\eH‘ÙDtÈ^YK|gG;:Q _Þíªkð†YB úMbcÉÛ'5¸õ`@q  5iRØYÚ÷²JÛa·b•¯­@`™÷å IL)_­ç‡ùÍŒ²½·ÖSüþŸ¼ýãTÐà'N 6 YZZŠ?¡õpƒ ¾üòKœâÂ3  `>räLÂ7Æ)Ȭ¸*åþýûãö}ûö¼kÖ¬ è*‡éܹ3PBÎÆqÈÓÛ¶m⢜B;iiiÐH׫W/>>þÊ•+h¢3`í£Á©¸ÿ£n<Ô¾Ûä HëtÙYÀU,$ü}t‘‰æïºº¶ TÉàA3NÜß„Fi:È1eIÈÂÈ…²'Èåðaðó3±Ø½>âCø:ÓL˜Ð ãzù.i¢ÔÅ:dŒ/‰ßÅõ=^®ÉÅppÂå'Æ.ítjÕÍÍOF*5ãÿJü¾Ó¼×Qò_Oˆ0w,=Iî{,ûìPJîÅ0§ÐtÊôšªÅ'WÔ £0_Lv7¹ oÚ‰/ºñ\ÝçG>3Âh7­¸Þ ОÓã–ÑþZ,‹€°l·»×ÜíB…Eȸ`22~AÁEµŠ“µwò‚þ'Ö\L¯Ä[ùÈê'm´n̽yçÛ­l~$L’à´æ^o•7à™&ì€# ˜P×O0 J·>^FÛG‡àåñö1”²°s*ŸÉM;—º¢õÖצ.ovÏlŒz,ÈcÅ…=ˆUƒ¼¨TL¼UkA£Ýq‚Ö,·Ì°ù&I ZÁç7`è¡¿‡ãÞ¨¶3ý°‘ -/~¨çš1× -Ÿ7?›8¡ÿ²ÝwÓªžŽ!áÒ@fbzîùÔ…]Nå\hþ:ˆ Ï6’éMW ƒ¢œûóGµ™qèö¦”aW2vwKéð#ÊIU”W„K… ®&åµù•Å#Ó5B.å´Ž—Àó!éà¼Ó­0×´¿+æË<(YèrH„*ÜF©QêD¥È«Í¨uPìò=HÆdw˜8(´"æ,ˆ„/Œ¨OL‡SÈ.À³Î¹îj»ì^{’÷ÉÞ2ð)â°>“æ@€YÀ«ÎÞ|%wI›K¯Æo'áì8‚úŽ`bpˆ..êÙ»eB1ÓaSÉ”:½Û/ð§>ì…ImqLÆ ³–0Ä\™DÆ„ Þî”+ÂPû/hññý>$CÞ1[hê]e¿RÞ£¶zÄÑE»†­ìü€‡|ÃLïüëír»]g2âÊlÖ„ðÚZÃ[ç“@â3Z£$†Ëó)ýç1p×p“J—zxÑ |怤V?åK2lnŸYήmñU‰iÖíî¸$­Ó6• ΤåòµL¾‡eðøŒËî´ÚÐñ Û-ãA)ïœÛùBü‘–œë=ú´¶,°!ÐÒwlÞ÷y'Ò·u“ùI[¹„æ„©nc‰ý•Üzg¹˜%Vˆð½ƒ#WÚÃQc“R…ÖúñŒ §$ ´ÀY>æÏ˜=VoººL¾uüZdˆCvÏr„"ʬ3×t»¼àZ÷ô®çü OVS\ü…&N@«ˆ0» so5I{:ãü0 ë;ÿ‚²˜ ®·ƒ*kvóÝŒ€‚aN¨¦‘:,ZÛ炯+%“îKk~¨¡lôûfJ¸·Ì%2‡q(p$¢ÒW¥’ \]˜<–“?çæ4 „æ*i„šJÈá.?3ìrÝ/ê™:ŽH`­ðÅðøFÄ`Ä_©$’ïoD~ßv÷Ž?'е‰AÆ ŒÈÿÇè?`¿èþv0Èÿ?k»wïÐwéÒ¥Ó¦Møáåàî„x`A)=cÆ ôÐ’q»vínß¾NºøÏ?ÿœl9!ß«ÿÊ Íò¸ ¿$•ëö#ß®Ó϶#)+]T\²ë.Í1I×0¢„˜ ا®`øß€•—0¨˜ 7Àð9¹ŒbÂa{ ÓFævÞH¥^/ì¹ÒÍ6çü´jî•ï^û~Ÿÿ¤‡VT\Ì)XõlÚòkc²êb¨šSŠÿ-ÏÚñCöÜ!³)í³…|¶è‹é+"ìÏ)úÂô pAýR±ÆÃõ±j~W½žˆÀEx[%19³.5Yñ¨S¨#÷]ÖŸÓ>3^­ÈO„,¾eÕ¨Ýt]aÃ{ÚdÔ˜wFø’5÷ÛÕä=™ÜiÚ’¾™)m·1ÝìEÌí°ßag,éy`Z÷œ¶„³$N½qCó«ž®‰zìÒøU-gvyÈóIX.qN¯Ç<.jéR:¾tø~|”_Ÿýð«ÕO¾Yÿ¤£Mvwïói»~›‘u|2úAïùƒâº€LoOËÂëÁÌ Û—½(éK¤ñÂìAEVÿ7åN ;Æ–ü¬g¹¨0ýÙ¸¬'(úò ’]9ø.˜G)ÏÑÇMF6ðð²¸i·Rñ'Gs‡ ¹ÐEzþ¢…×».<„ òaR. õRæw~ø~@®;!P,15?Ò)·Â÷+#´Kd8ðh!ªÊ„d %›\yKëmêðÂ]¤ gÖ]Øù /(˜Ò{)eàB @Â5χ÷ ƒ3¨ >l^’AVÐq gÄqìrŒÖAÿ›Þ3Úßç˜$…Ö¬ó£$o„à }·àb[\ƒ¥K½ÚÑ-qù˜ Ëê"xãµ°.¡øÁÏNv©”Á­êdüH»•Ô³ð¤¥6ðjl‹0sàat—®Ø‚œ¤t Áù¿L²ùi.(ó]´‚(œE¾R;tä/Uµ€ÎffpE9ém%„Åœt²²¾ÀE–\I¿;tþ±öl5ÏÆ³•úJ?HZû¬»Sàµ2L òë³x¼JbTI46–£ÀZšñ„ú¦Q> ЫHë{ßz…21²‘çt;íg9t¶1á¢"ãNnùŠGƒ­ì÷^QñÒÛíÜÂÜâ±":Ž0‘õ«Š CS^Ô(ËåyÕ?_Là‘xÔÜ@Bp–+bsË»k[G¸Ô´‹=–¾ìä’èŒ>“€çdø {ûüÎð‹œH¿ =“¡@¼œÝ¿ä,¼78?²Õ LQ»úeÞÓ‰t=ràý°bíõ©™7úæ>é#ÀÙ¸òÖÍÏfq¥R˜`Òò‡¤]J²/?Ý,ó0õ^”e¡Rº¾K›žRêbÅ-é|x×µœíçRvý2Ê,~¶úçñ™&eý4È祅*¿}WÜï“ñjt~ùO´Ð©tŽ¿QÃ̃ƒØƒæï;æ,îwH³,¢œÄKi …;#7\ gúF ˆð¯(‡ÓÍq‰æµÞ3µËJâUùœü³OÁocõ‰(HµøIK ïe‰ÐdáùÞ¸?ºÂm¨çm—ÚX¦y=wRA_âÀ˜Ži·dê·;òº=¶¼T¢@IDATQ$Aè J œjæXÕgE}^m†¹õ O"2X Rûže&˜(îÂsæ­*ýT¹ù~ ÚD+¡PUÈxÿ¾E þàû¡?ÿýÿ¯÷ÿ¬ "éóçÏa‘ÍÈȽP€WPP¥1œa¯ÅqüFEEiµÚ¢¢"Êè‘jÕªŒCh ùõÇ%%%¸ i°[ཌX”¡OÆh'!!^ÍÐ-cÿÛo¿]·nnyùò%\h›ñrѨƒÀ¨À$ ˜½Cèõþ×_dO…w¼¹Šx^èSH@OûH§ H6–{dÓîíÙx–:Leäÿ|þö§ËÉà#–ÕG CšÏ…zœ\®‚‹¸¾*ÊÅ!å¯qž fm •C@.$ª¸(P®PŒô 4icö/Û4Æ Œê–q(?…¾&U$ÇŒøcÜ+ÀËä=4UäH+QAµÅù¸ B‚Àø=6ÄÔ:[üYý³9Â-r†\(v •áßwÜÁW»½LÓÖÛË,’×XB;¤Œj±Y%PኋKÇÊÒE›TjbD» l_hÐm;˜­@Ö].Ù~ …Éêsfåéá”â@¾Ä,H7YÛòŠÐÅœs¥ëŠÄãR¯`GÇq¢¯9\f¡ñõÑ5Jµ&» b(-æ’ÝGðFÅcð1|âd»ÕpYfQ÷*CNóß•L¥¡BŒÃç›Òç„Ûî®_3¦øU¹Ë#®Ý裹þ¨³| üs =¡vнؑð‰V°»9Þ¤®kvžY ñ5@ƒIXâÖ„s[…v_ƒp#¿ëàÀëå6‡'Ègpô޲á*‹¥bÇÍÙcÛmÜø`¾yf á7ºèpxy›a€ìDnÒLi&Œ%EL [ÊxkýsÃcª! c ü€¬ß}YPÏã»4¨³¬í6×ÓûüLCwh®1“Ô“cWt=ƒöמ6³Õ¥jÊZ• ¾Ð$Øtn‡*âÍ||?eO{PáÙÍ®²˜H:†Xh& ƒïg{H¡µz}Ƭð¥vb‘"*¼ˆÈ4ïÇÞ úÝr¹˜"åò×ë»ÞG-8E¸bòÎûúÝ a™½¸nT]é³–5Z'·Ü§áK% þÂÖ$ªèÔ~7ÅJÁÔŽ{¤1žÝoÇ®nƒÙ¾Ý7Cî=?¾è\Ïùmö×Ò´u˜, ò{®HÚ¶æò)à!2•Ôo äõ¼-â¨Q^d^ÓÃ\L| 'ýÜÌv§ÃÉrúÝþXV Üå1Xi=îÁ¯^-ÓÁ¡—XÃ%áKZ^ ò*‚†H ÞÇeµÔüª¸è3›áI»?`u÷ã<9™v¡^jç«à  ¤ÄGPÝI‡k+‰Ge=jñ? Ú‹‰ê†õ1ŠÏV„»œvS¡C¦©ÌmAîO–L¤ÑW–Ó‘ŽÌQnª„jIªd¾7¼ŠS7ÀB¶Ú=aáØM=wNü+'ñþÂmx,¾> ,’{i6« óA”C«¶ Ã »ÿ½Xì‹ öàð‰{®Ì\¢”„g}wÛá¶èܶ½˜‡ëQ®8åL×´Æ'¹‰'-3Ã<µ¤RIvÓû,)Ò?Â[„?C‡fÂ×Mî3ÞáAõ®S…ký0f™ïw“·|×­ ß|‘¶£íÔ¦ÇòC0EþjwùŸ¨ÛóœÚ'ꉾ6 r‰ ­Ò“®ó¹"Z߉  o”¸9láºãÑ8Æ‹ Êâs"ë4 v”+Σ.2z”" Ûã6¤ü8.®IMºc/] tedÿ™Ì]oýehDj㾩\Ùct"*U«P5IÈÑZJXJ{z~¿nGCêµ¹b„NABø›6h@!z¡±P¨*>öq0$†üvCÎC¡üNÓcéÂýgm@P /Þ °z3€˜òdF_@-1z¨À]tS¨û€Ö¸21ŽàFˆ¼Àfôi€Ñ \¨p·à8âHÈÒŒ?C} ” G;x ΆšîâšEpÔ#þâú Bæ÷£.òÀCˇ*é ” ¢-Dzi—#˜§gÖ‰‹Y”7ÇM¨î%ðÓüÀ$nôƒ.ƒ-€4ªs1ƒv ¨aªÅ¨êÍkþù€ªsÚ-O<¬wkýðì&ò´®·yŽðhäX("¢¢p,˽çæN.fÌ"@’sySµ3æ+æ=‡ÈQÕÇEÎþ´Ëç7º}‡GsœÆ9@ʃíJËmõí$ ©§©VS÷Þ‡0T©qsl¥qÇÓï)¥@ËL²%YrÝmÙw§DyöyZWq,™g'Ч˜HÊ€GÎÍÜL%`y]JSüSr†yÎ"žbª†ÂëᆬI3)”Ã¥¢….ªEq<Ÿ@o²p ÜÚ<ˆÓ&SòÉF9£ï-¼3–6\£X4aÈÞÛ»G´™~äæf¼Õ"¨Ê„K+¢åuÇRCí[LÜŸÛµÐnAÙµ¤­g†!#£š·»U…AÍÀ”RÀs’þÃFž¹v˜RR(ð-FÂü¾²ÙM‰õËi=¶yÌÌimÎ)`Ôö‹×?ê€ËV·}DH-»Å¾øaçìÄcH±~¢õº¤sø(ânñÂØ,¥± )ŽÄYcÐ?[§õÍ3¼16 oЊÙNïøÊø‘Vç°ò%*y¼¢_Ò6aŒçÈÍY!©1ïDÚ˜æ»xÜX“ùö')„N³ú´|Oø¼&ižú&ÓÃ`®|·ðc‘šŸ`/3ðâÊ·Q¤ºê?1Ï’m“¬úÂæ%±"ý~³…ݤ^A)# õJÖ“öÓ€e½vS‘¹ =0j ÇæÝèyëàé¶Ü’Ýé(»Ïíbrùz“Ã÷Æñ"û§ºþ:j5Y?>þɇÏÙ÷Æ®I: ãfÎýÑxù¬qwJ|Ÿ¶‚RÉÐÕ˜‡^Rÿ͘&Û«3ƒ\5¿ÂYLø¾9­3â•Û–=zñû{1/yqé[žLäó+„Ùº;#Wv9_\XÙí"Óìf+9~žßH¶çIG)·>Èô±éÛ¢|_q$'LÀ†­DîRxæé…'.èt|ÛËatôËëo™³…-ã”à•Ïíyõqíví¡Çó0ß<™ÝÜñl ö×6¾o±[8RþÞ§Óæ¶½¾ødû±W«YZR´dOô’ç´ö÷²ß>_Xl-_‘G§3&ßh“Ö÷]§ÐÆ…PƒÙ·ýÜ:½$«ÝE³­dÍÏ‹̹zâBõOHñ€NƒdÁÙQè Š¾PTˆ‰<\Pn/J}ÜyvÒZ?+LÁqØ\ÖGKI«¶ÉÏrÁbÁŠÃú¥)_€¿œ WågØY¿ÙóIÜèÁßǰùzÖ‚~)‡]N‚ô“=ÓF +ÂŽÄ-dvŸí%¾&5›1¬®Ÿ´7@¥ ¦çYt«>ÿèÐ’ò²#˜'‘vù£A¸ýì£puÁ C¤2©ZXÿç?@Є¿"c$$•… á‹/¾ô†ìžÀŽoïÿùC-PŒùGm@V #ИbCðû/(E€Ù ÇA &ú Â.:  #îÅç„Pýˆ?¾èµšâöñ‹Æðh  ‰³hŽËpK‰ñìà@zdž§cÐ8Á½ÿi¿ACÉf³|.·ËêæË…7™I8~¤J¢ ‡™ ‚ŽD/ Áž*ž×jqøYG>§—#äQ¢a“+À=rŒ[Ëô—¨¿Vô·Û8N“W.Ÿw0‘?˜dNëc³G-Ò”,Ü44ûvvü½Òn‰‰¬ƒ)Þ¥å¸Xû·C2½óe;÷}PªÛ#¿4`3•‰U26jÜ!ÖC2Y¬:©$/ððÁNú†.2ÿë{ –Äïaìøq¬);蕠ט8Réy…™MÏ€o@ò‡~ Su]fe‚s‡š¨gâBé—U¢é¢ ‡PÑÆn¨<y.;ʱdLvæ¡d|æÐNËŽÝK4,‡%fTKc¢£-V†˜¯fÇø½6‰DT¦/@QŠÜáºÈ8Åäo×FV‡ÿ¢±EcÚ.²9Ìšx¼x‚½×Öß±U|v!Èe|Ò[ªàF!QåÔv? ÒèÆ[ü<_ªŽ”¤ <ÁP =ö¢Ís<ïKõq}©’ Íð: ñ+Å” íÿœe–ª²#k¼~Rm-sÊeêŒÖù.ähàD#b,Àâ/hu0&,ÂáFA¢ØíŽWKMú)L¿|ôžÒO¯š%´(|óé`¯3ÄÃß,U£’ Ôªj_…«ÄQ(RÔ4XtšH{Úþ®º4Z¬gÿš†ÙÒéÛé7îmƈLn·'œ†kfËmî{©&þ&#ÂbòœÁkÉ 3‹$Î~̇4Ý&ÏÑÕø~–Óbó¬¿éß徘²ªÁÄ  !WÈ\ÙïªÓgý¾KÎŽû ñ™(íl31¬i—',í°‹0¤‘ÊH?½å>¯ä WÂq2Ã2G?0XJÖ¶ 2¬>».fVÿËQT ìNeYf²ÓPó¬­#~³1¸Å™Ýí,ŠßU|àÇ#:¯ŠÍ‚§€zZ‡sbr~Ag&ÏSȾÉ%|dì7«÷=_„Œ>WO2½ÅÅ: µ>ýºýñb Ù¬Ö{2O÷¥<ö™µÂtÊÜ ×üömêJ“Òr,¥ÎæVH9(`uåõP¹#¿¢}ˆÂ#úJl………[qnv†p çP6+4 LÅ)ô2ž…³x\­ZµBˆ`†Ñ ß±ƒ–‘OÄõÿÍTš‹ÂeÑ"0>¢šÓmã²HìO“‡Í Àï¦Rq8­ea`xÍDª–ÂÔäq¹ …=ªeÇüBUÆÁ‡(æ¹ÈšÙÇÒjb3Úd“š_Sô—´rÃÍ¡ËÚ\U…«½N'¬ë·‚a‘!)Ý÷­ÇišEÚ¤iW†-èwgÍÕ$úİ*‚‚Æ™&‚m‚2Ô¬/}£‹ªYU7I”¢"6¸Y^ŒC—+dØ`p:ô€ÈZKª¢•ñh-µçÍ·©šÿëõÍoÇsëúžÛE„äìõ z£ n’¬ åPNÂBÆuÉY}f”—e(˼«Émc‘¢–~‘Z\ r!\`|¼’~8´²edZ³]ႚ|G‰^ïú¢FÍrŸÁò},x¹ýçù”qñ“½6ì¿CÓ:Nj›KŸˆ^BÀƒòFa‘|tãžÛpptíÅñÊ&bn ÛÍC±‚©_m­Q/¢°@»õÁ|¼äªA;§oïMdÍðØ)­Ž„4Õ¼Í)y ’•ýÚËÙV û+E·¹"Z)þT¡æLéW› å¬oùŒ¸Ø0ÁçŠ?‘yÙ#4uµ&<ƒ¤~ïšçC([ÀD/Ý\»{<$¿EÖ¬~º½1§Ó†=¥ÃèžÎ ™“˜¶áÙ0|ÅøÉDÝU¦aì¾Q•¬ÃGZGÐ@Ø9ß\­)i³$\¯­N.á†ùœ™ŽFNÓ ‘: .ï%‰êŸûl(M€Ðþti‰Ëa§Üm=¶ÿŠ}OWfaLá3ÜÝ-.“Ïdr ±3mUŠ#$,!¢Õ3{]ÔêŠaJ“Íé8dþFr™_\Ç–|xÆäæÙK´^Þí ÕZ&fEð‘QBû…ó­NÚãŠù¿èŒîA"SÈ+…%CÅ "gPÚQ+Úæ1®“- ˳HŒ¸&L°”a‡}»R‰ÚVÚp¥zzÛµ ¹46:lIû#pnçr˜ µÒ{þ@‚5å ¸D üÜdÛŸ3rûý8û@¯Õ£/BýG?;táì©ó´ŸÍrE£Ø¨„ÔÞ;aƒ <a^}øU¬¬W'lñʼnÀÂäþ ß—Ìè»+Èô~,µ`ÐZ¿öøÆÇbjŠ >Z :û©à­*)èªÌ"t1¬À&æšëí±¿°Õ¥åÏz,lz¤ØÉ÷#͸tÖ9$¨‰{±¡ÛsÖ¼ê|›mý^a"U‹nAyÊL«âÃvÌFY×jסÇ:ýrU!:_Q«¹ÏDØá$ïÞøïj-c{$r™#‚ï·û¬,§kq»ËB>7R®²;­g©L ’BµÜ½nÄÆ âª7¡ $6×q±FÞÀ+w~2 ”)äøm~í¾ü¹TSžâ¶;šÎmlNÒ(ºmÅ'Գħñ3ÏŽœßrƒµT¯ép1ôåfQ-ùرËöÝLóXkÍ‘¡¤†E Š20ý†7cøÙ ‡©³YÂ$Õ£â4ÚOA€½ýìJw) U ˆ‚%B"O(ðé‡'7^ž}y¥&XÛtt ‹¦MÊæÇpÈ €TN½œ1®É.Så²ü¶ë…ô<õljT³‘å½!ÈñgÝŸAW½ÌôXV˜XLØ~þ&ôÅ[´vAÛ þÀ!4D;.5!ô­zï¿çt럵…4ìÀ9|0 7¤%FïÀ ò(âýp…=‚ž‚wtee%N…¤Ràèëׯa!fãŸlF;€RÁ>úûõë×Â/ÚÄdØ@³ø³N:Àl˜áüŒ?q=,Ð ÜŽWÂÅ¡QÁ©ÿ¸©eJqq9݆⠩ —¸ßí§‹€bS@5'XûNf/ºxÍÙ,,[X¥•eZM´ +¼{³q—ì¥- Y ÐÈc+DÎ;áGX)Ì(ˆVþ–®+üC®dV!˜t³2¬vs-nw qC«Í._{¥-–Mzý£y}n9œ:JÀ)c¢1ñ W®q™„*qÖâÌMG×Ã,K •ESO  ¤ÇH–Ž[Á«ù¹ÌPMÊž×,Ç-wõh1ûÒËÜQ}çD1›è ø‘dœ…‚“iõ˜Wþ2xtËÔ9>m¸2´þÄïÙƒÏ:ùK]À9  À£ŒŒí¾tß-ZÁ©E£féa»- *â0K]ØÐE*R·YÂô¬¤ÜîG^Ür{öõÉëÜB¢ŒÜ‘ûfŸ‹6‹>}œßu³AïŒÕÔ™Út7ñÛÂTÒŽ ”“ïÏ=|v=k|äÀ¯Y©-/ò½J‹Ýºöa¿5ýœ—Û+eÌ®™Ý¶n¼5Uç/ŸÐwQ||õ²Ïƈˆ¯t8‹È‰YÞö‡ÊR]„fõ[NË麦×-ƒ¹,ëÁl¼-fËŽñ/Ê>}^q£÷æq·§Ÿi?ãÛ›žÏÄbì¦~»™v¥ì}\eÏb“ôV·>~·íùäåê¸R|rá¯ÍhtDY\"²à&HØÌæ‡Xn†ðýúûÙ M+¾>Nê Ñ 7ÆÓ„ß x½sG%ö>~çúj`÷ù;X‹Ñ‰Y)úá²ÜK>ÒzQÇ›îN_1øÐÌ6ë7ïÁVùÈ_ßx_è0«™ÿËÕÝsƒâEÝŽXå…ÚÊ÷§ïïåÁGC²à'};Ž=yc¹_%M7û§ÚB_+ „Å¿·õ¶Žw+õô ŸõZ…³‡…9Ä n3!ø;6ˆ^@_ÀDDCÑ0«p<Âq` P 2^Hÿúw<“¶ñà¶䀣À]à+DXð|!›Þ¹sÙ6BæsçÎ 0 ̺Ø~üñǶmÛâ“p#rt ~b+$]ø6#r ÎV’qäâÅ‹íÛ·GW"ÞÙ9~øá‡ôôtŒ4Ú¹uë¼ÑãtcccOž<`øK?~ü¡Mx+lxDH§ý÷6,‰~|©¤Jh–×çäD"õÒQ ÞâàDò}ÚO»te ˱‚L\’±ëtŠ×D4áшa$6o\tmÚªŸto9öò£}B9Óå5ñ¡™——PÊÅ&'>îuð ˆYòºE'Ÿ¯…xPXa˜…bxš.‚Á÷Íj³Í.Ð%ò“* H!û!yÊÎìm“FNXxø|V7¨ÔvÒÉË;±’Kÿ4ƒ°ÃšÃ–ÃàŠµÑ«Õ€o^uy`ošÒt废–ÓÀyÑUÊßЯfÚÙ§ËF·ØqàÅ÷ /²E.‚jm£Š~®ÊbE‰çVù+a±Áòå‰Â«‚@×v6Uw6ÊÉ+|ÊÜáûmff‚G³¹áóZ/ç}µO,1)¯ÓZæý¬rGä$>ŒöÚлÔψŒ —•||K±ÄN5¨g1¢P zPß[.’:m† mÒvŸYÆ0ªÑøÌ”+͉‰B7ò+ùv·ŽjÍ ÙÈÀÌ”úéB+wßX†+qpjóŒ­Wh¢ŸE­¶fžŸº®ÿ£yç[®H:ã ²yJ¸¯*°dàžÌKã3{|Hí¼6&¼qQÉd¸•…Iwد¨Æ]|xÅZ/ù®ÇŠƒ÷VPö«˜Ì¼r~ûÆJ—F\-¹ÃAf€‹“s;î0ØJcâ4ÅŸJ¢¿6—±æv]d:ÿ$ù×ïÜÞ•?êà\Cqa[&¬.Q‰üú²êÂê.—DˆDa€•W{ÁSiXÛ),©:W›¼•ìñ´”ùFk#rL¦ñ‹‚ÂrµÜ¿wYfãcF‰Ö¥v™‰µ5¯gzóÊH¾Ó^–Úu/²[±œeýO}NMŒÔVa¥“2bQ²—|"Ðx“;½ü7Ã)’&ó¥Ë;m6˜}Rqd€Á‘E!9Ê/˜Ð3§]© >,P](ÃlP`:-¿[lb$ ‹²PøÑ‘ æ3B˜`FUMƒ®Îny-ùv—â‚ò…¬…@ÆðJV¶='—JJÊ‹¼<ߊágQȽìí‡z‘ õeF1[e,sTüvÌ·ŵ*(¯èb§á¡}1*XÈÒn?@b&^”@éJXÖé²…ÿø*,ò|$ŠM=óÝ4ž¾ŒŸ˜,èHÉIYñ;ª" í·ÓqN/l×nî±oöÚ*«f›}¨-fø¤¤ŒS¨ . ÐK¦·»'fóà ªÄÛhæHT6ãø¹ÞÌGVŽþ¸ïBe°d؈ÙÇ.äâ)so4!š"φá»Þ¼Œˆüf^‹c¨¬õÍÊ·§¢L5’›Æ|2|—Áû)6Ø©¢¸,^½¾ÓM+胇To¾²ÉêÊðwßT0«ÅÙrÕMQŒkï™=À-­Å_ TÂh+‡$¶[Ý£‰òÖŽ5â yÏ^ý $¢úÂÈ”ngé¸b×¶ÛÙt5•Oħ`° .Ÿ¨2A k„uH _T¸dJhãQ®ãÇ¡ôz#©4YÇH ‹d”I• uïð`Ë^… rúf^Zì¡eOF¦ö<²úÌR½Š·¶‘ßQ'³ ãñ† -{ ®Ü0H[S“2„aJmµ:³áîAœT"þÃʣǕW”hdl– Iñ\Ø)Yý6HVhV-vBèòÀ…v:DöF!…(èÿÿ[oó`p"Ðà FЋ~ú†äàÙ³g#6 ‰« ¼"¥Æ³gÏNŸ>êˆÌÌLP±]¾|` ås¨‡:wî\„ü‚Á9tèÐÝ»w‘Òˆ  5kÖØ±cq –öïß@#äë¡ûâÅ‹ò„µjÕêìÙ³x²vàY CH1þŸv4œ!‘@R¸T!¡½ÇÀä:ã 6±( ¾)tàj¨3a*‘`’€ Ø=Lø)0ØH Aï…˜*V;P_êH,Û‘Ûû隃Ëêç[PV(JVÎKT¯ö›rÏ-Á]Y-.¾;lGÒ/v»9åTdW±œ·ïéJb¨êÛÓ¯íâ³·³p—×*¥ÔÁOT¢pPR<ÎR3 #5¬Yï8MSJÍafÒ»Üã2EËÀuËd„z™7;qoîãq º¦ãAH¥”~w>ZÎíy”+ N=Ö 4¯ÛZê¾ H`†ÌJ›Å8†‘ÜËAYt?Éô12¯Ä]‹ºÿ°úøú,È|¸ÌO¾o¹eýé¹ôÞÈwDoÝ;›žrI52v>~´Ã‹WŽßZ蜟KK–|W!~zôÙ6ÊùÈίž;K<É÷',Ÿ¸´lÓ‘e”šÈÉN¿ÂÒ¹äþДž;ל›5²KšÇîŽFL<ÛvIâ†Ìûs0<š†ü½°A~›§¬“ÓãÓ‘qÛF¿"øG阽3‰„'a,¦|¹pÛã:+ m ɬ–{é,Â1T(s™í!}¸§C& ¬qÇ1‰ÙR€+‘)©Üö†îTI¢FKšB!Yú½A”õm»?3|ió3ËQ¾]Ç éC/¥žïA@™yî !ê»–ßí¹¼×ãøjµV^›Iç$*Ø ×#&Í «n ŸÙêÐÖcÇuØÎ5W?¢¾W³G\¡½Íw ì:ãÔðáDãõ|$,BL? œ‡DHʼnX@“D…EÔyÿ†ËBB¬)`­ÌDˆâUØÁØÁ¾ $›oU•ľ“ljöXÄ4zQ­C·¨ÇãÍŒþ 7]ÏÁ+y½t²agý…™TèבeoÏ9ÙÎRtnçemòQâwÑÝÖxbZï'ù)ÌëIFÒU^€Ëdó ‹·ºý#½Ã'«^\a¬/OüëSqÞÏýHl?º PzˆHªYqôü¹ñþ:Ì»öÑU­ƒuü±X<üK&>3½î3Ó½çíúh0Ç *‘$óѼù5Oò5ºÞY¤_ÿ™·No¤Éìð'ú õMpq€,iŸks°dªz|0¹ë!a½¹Çcq•Qì3of&øroÍÆû÷k×3šû­­œ´RtIo}Ê+rÏ»qÝÝ™”®±ˆ*>¶ÿ'ήas‰¾¢˜v>åÞ©fnÍUjBodö»j~Í„éX‚t~ó¼6G+•o¾X¡ ‹B® º8H€â«:nþ{6h=Cè‹æB¾A€!|`ð4”[ÈÐù÷<s<„íWsW;àD¶oßž‘‘Å24ÀxI€€²ìªU«¯ ¹®€‘:uŽ2!Ñ’q æÛÞ½{§‘U=ºwïÞ½ê-++ТMüB¶õ2 )´. é8ݳgO€}h;zô(Ä©~ýú!ȸ}ûö <šý/¿ˆÈ$ Y\¨@ø\L+x@ªŒ16’Ûé©×ä1$Ûì³C)“ºMfyXà“ý¾’#®fúŠOb™$æ7òsê«ÆtbâiB,?)V„D*6±¹0¥|u¸†¬þ«ï¾¯`—ꌆüšû»lj÷ªÜ3Þ »Vã©Íc²a„F &Ddz½~‘#Fgâ…:?Œ|k²0¸Â`…ôE¡çM@ƒhëíTNå‘YöæÝGŸZcDŒY§f¦tÞŠ 'EÜW¯^¨.w˜$ò/] ç4ôÚXl–Õcó½j¡ È}ôجbm3Álr…u*C"p³K‹|F…¤ÃA~—‘˜–=jµªó¥CÈãÚÆ]nÞÊk{ŸT•~¨­m¾š’Zf«­¢„_¸8~­ç‘•{øØÒÊÍ·Ö‚|Ìÿ*cíåŒHv»Ã_¸Z8ã5±ø4fógLž4µF…î«jœ8¯ë$Ú+"¯Œkð•±øD$|ïfù2/Íù®[Š€£A5o‘@(¤Þ2í{Žu¦ñ QP£¦…Þ(ÃÂrKP¨èÜf_Æ5¢¤ˆx¹ÕíÉwZQŠÆ!ºN:}`+Q°)VÀYÈä¶/ ï÷í(­Ê€ÏѲµ×Ó0 [OÛõl &LŸösÏÿ¶žp-~& y;Æòù>1 Rlg:"b+,'>“ï‹‚¨úl+>Î}±ÄºÉG^V‡_™ºH;ãê'9}ÏïõTÒp âÚű,Óß{*™B¡ÝY,‰Q,¾ÕŠ¢(t­6 %ÊR—#V]—ñ‘'%a•ú7”.þƒ-r$—ŒI6Ú¥qíì­·—Œê·Ñ }†3LfâòìA¾’÷A]YÉ(Q[O-+ª°qÄs„,*"N9 Ô^«v¥ O+¬,á½Ëû}úÜ \8l ~Éüï®­=×eL‹ÃûÑáÀ{#Û;7 ZØ}kÎÝ©£{ç¸3‹6ŽÒºl&åc\dNÃ^^Ø{ÉP“Èx[œÛ`©ïp…YÞÛĘl‚ò£w·‘öÍVBK¬ôy‘`‚LíµjÛö¥s­¥¬çõÙf½ü“_i °•ü…ès*`,%#ûÎ:|4š$À€[²äû삜G+è[aVIܰ§Šþˆ\NÏL˜Àø_ð C'±4䱄(z‚qj…Mà³k™‘Ë ‘̱ÌRw¾ƒ©Ë¹›2­íê-MMÌø¦¤«C¥ýÿ ÷xÖȾ1Áì¿81Ü8/ÕÉÍz“Y"l`ó³+ÙŸó^ œ(M$€ÅýÅ·ÚLî³sûËIb&ãœ.®æ!Á7@ŽzŽNøSþ—¤øo:è"à7$w…þ-̓¨ÿ³6À´Á nÈ áP‡Ï0Ĉ¹#FŒÀǃÁ{‡Ð;àP ËbHãîÂ8Ç6~üø1cÆ ‘ž!ôÁ¸ âÔàÁƒC²rè8erªdzœöfX°…×üwè‹Ó—šÿçñHÚ€ƒ…Ï µ• }l–©«ܨ訛Îãµ²ù>½£rÍÓyà1W48X‹+×{>°%ŒÁ‰K£šø?z~æñuŸ%ÈV2X~–˜…\ÿUb„Ó3õP#¢i4®Í¶½§`*Îîpxz÷ƒ`\ÞSí£Úµkñ>dÖEuˆO¶"¢A"\¡Dd1:w |éEE{&ïOÇ_©eÇÏ©XáxzrR'@.qÙüÓ»œ-¶±¥ÚSù«|NÆ…L9ɳ 8Ї¦uùÃ)[þ ’ÌL<)á snôÌìøÓïJ>:dåÀ¤ßß•~!©å¶zù ©ޱ>N´°~¡Õ ⊥&\ÒöMxâÄÒ@*ï|­Rgpº îp•,GÀV]Pè Õƒš')wh­ ;[ê\w e ð\PèAeásÚíÕˆ%©Ðäúà¡F¢¸<`Í»>Jl2§ù!«ü£ËKxŤX]d/áÜÙ·ª$-4Å&q²z'†C·²ÿ]¤›µ§ã¦±§fœ¸ºë¾DŽ&>†ÅÐéˆXØ2V©¿Ø-3¸ýœ0^D)϶ ÇÂj…® 1P¾QÝ>±úGµ&id.­«ŽŽ²¸õs{­tZpÓ£“ ÐHZ=¨ G5vŸÉã–U_ËFz D ±(ö!›Ìh¿kSþ Ío.¹þÍêN7ÂÂkÐÏ—óÐè)»WYaÿ°êÎ@:”øpYÚù¬RQbÒòü¹D‚ª‡i„Ñ«Ô&O¦`ªÌ>ŽÍ#S)\¢ã˜ÒþLÆÅþ¼ÒDþR|wãoƒh;À3 „–°ôL5?m:7ós`}Ç«s¯tÍévfã™±ÐßRD¯ Q}N`K4Q ð 3Oؤ»–~¨ .[ÔmßâþR®2'åt½îŠõ¯©ô2= ‹ «BP妫 £[/É;›yçåÚYÍå=YÕð¶ßCê*ë¿2üd»ù"ׄ¶é»Ÿ¥‚ѱȸ¤õRG„B¡²V”ÎlûC¼Z=¾ïŽŸ£`)ùšÀ¤ :Êgæk¹69 ¼3Ÿ9)qž§”-ü2¼šféˆ-ö2eLtæÎžkÚ>ŒÆýjy§‰N>ß’([R,hÊdL‹1û¯ï‡ãšÛîb‹MM~ûô߸þ·a‹[}c(êb™n©˜Ï°ÌNCú¸Kï+ÞÉë÷v๥EÅà5iߢ3±œYÄì@¹*1…g 2¼[Àä6‰80_9Ü•^uׄ|.@yjÌÆ¸› N4ŨœnÇŒ¦)›~ÏÀGÌ‘êÈÝ0Ã+Üvi„àYßœ—q)7æâåÇtÛ²ÿ'ê4>©ÅœnÀNæ¡€mÜ;{øúÜ sqsêW«Óo ßÞ÷Ô=_B=¿ó¥µw«V“›¤µ¿œ|µã–?=ÚâПí&,m>›Í}_ô&çñdúE>"fG‚§ Õ‚×€ÇÍË=”…~; .}®%ëï3‰`Þ•î`Dö¼ZRlXv¯×ª1—þЊ¨ÚOJZ6sXÛ.g»å³Zœæ'ÈÆvHß÷0©aVTâÜ:tú!­ õ€ñÚÿ“·üBâ)p·Zµjÿ‚Xt2$Z`*ðVÞPŸ‡dßr tgCð pņˀ ÀÎíãb죱0Ð3„ZðþØhhÚDU#¡ B-c?Þ¡ƒÿëïÿ ÀXV J‘Ëò ˜”ËŽ~‡ù·¨ÅQ±D‘z¹ë¢gžnºªëÛ¥—Ú§wû1õA/J숤‘hÄÖºàL§ŒüáÄO«Àš‹¸Ž Çës°6#‘^ T@Oe—hhmÁS³ˆ¹ AÒ$÷ÁHªq‚VvÄ®ƒ/fƒ€®W?d0üÓ®µ]×¹6>®©Œ X˜Æ€Õ'1nx4pNÿuϰ©HŒ©Eü˜Î(ÒNÄ2™€-‰ƒaOÌF¬Ñk t A­n‡g9M @Q›ˆH"±-fs«wbä)apv Z”)ÒÚýѲ†ŸÌo—Ýj»©_¡Ñâ%u‹:.Gâ ê¦l½iÈ¡±[Yíß¾³~ ¾µIĵ¢¾*(ý°ôJk4¾¿qa©¬)Ž*µ–*”‘(„æhi¿â5ðÎÜÄÉ ÛðsPU_—¸v^þü1‰ÙûŸ'SN¨J{I8&vxAò©![;þ,h&_ùj~ŸŸÜ0sqɼ¾«Ø>…Ï& Ä̬ûc‹šÍ㸠zs¾Ñ«uè{“kçº ãÑÚà…GÀvϸÔ{aŸÓ9·P" Ì9ò%šª¤­¦ùy ”ܧQ¥P@Ž=ôâ8,™Òcöûs¨²oe ©o£®‘ÖÀ¥ª)¼¨ÔTãs¡ÿV*>Uš£5¨„DIíK9SP°RßÛû\ddØŠÄsB®¸¢˜±¦ïoFÏípNu±9 Áçv­•)f§öú!ýÚpÚEjþE„á­%š‹<&†Î +ã©#Þê^,ŸMg‘“ätÉ[pe}+¸û¸>À•Z'0½(þ³f}5ÅT‡wpÞ˜}vf-O¹0§Íý…ÚÌè|jÓ•N+YbÁ­›Ÿ k4ïè“ua'¡Dÿ@& »çæ>òY>üJ8“&0Á·T”¢ö/] Ö iëµÜu¨Î£·¦·éÆ,žXœub/Ì(¸ç[4¸le“FmT†ÅÁTñ@Jyü’Ä“WíKX¿Q7Úr€|ßg実– %LK¡œhBÞ“> ZœÛÔþÜŒ+}÷?›F—Ÿ„k¢g†v[y,9mÐCÌŸ­³:äíØ°YíÎ ½1ëzý>ïA Qd7½ÈGfÝèz²uð{­Ñ™×'_ëЪHê‡;½öšÛ?Òš´Y4ÓÖó‰Ù(«ÎH(C),UŒŒl±´†¿ŸI ö²Îù’JŽÐ‰Ú‚˜o‹ó‰¤®ÙùKZhr.h|2ïÚ E#oí;:„®&Qøš¬z{ÅÅöà àóBËÿÿL—ßß·/Àð–Úa 1š4bCÒ'ÐÀ qèH%ÜWš—…_ )¬Èh¿¸÷_lÿ‚ðЋãb\ÂÚЕ¡ã0¿£܈_<¿¡ãˆC;ÿÉ/.ù7 ÆnÀôÈГd>T«<±eKãUgÎí³WĬxR®RÌj±Yöµ÷mé[ +nÜе{oÍ7ˆ>¡ŽD€$R„>Žùœd|§ib¿TêQ8õ¾´¥¦Þ$ÕQ‚« .d¼ £„õOë}"šHù{)d# hŠÞQ„"Je¤Ü†zñe_¨ë˜Êõ>&BaàzP« Äðé'Û¯éxuAþ ÍÃ..Ù—8­ë^äŸMîyX®è°W.ŽJi}ÖËÕˆŒ-©@äEÁ9-3¦(¿WYC[a,‰IÌö «3 D‚¯´•±ê´÷ýÐsyKÀP™½ÅpõÝ7æ‚ÁR¶fô^vPͲ†WçÖ ¨c}^úÇ·ÕêOl–—Îi¥ hJ¬ŸdJñ÷ÇkfŽúËï/¤ô‘}V€Ç7éXA`!jꮼxÀ”Q–òÅ1ªš©ý.¸ˆßÔw˜) ñF2¼WúÑ©>¨%ñ™R³AŠI(-v®ß3*eèÑø„h}±»~Lãâ2£*2^_l­)àV:u¢0™Á«·‚¹ÃW™Ýž='VÎ<ÖbnËcV½µ¶²æ´#M³ÇŸJ¾ØlzÿçôA ‰NRª…•dMØhòº¸2ÑÄ–gb¢jID¬7¦çÓ†ßÚr8mq׃¨/™Ýü„Ð-UójèlöôΙ¾@J÷C0^du?îágôÞ‹¿ñ‘µÐøã§5OVðêr‚u* Î̾¾r‰Þ(Wt#;%…U6j³ó"ñ™F(¤K‡¡aÊø`‰¸n åÒ¦7Ãù1\%ôËÄm%<#%l½¢Lj1ûU„ZšÜ#+{ÏbweÕªÔRúÎӳ蜴ƒñˆàwÁ!Z-  |­S`^pvöì¯WÇGÄZ Öp¡¦´à圯²9’s'ÙŽJY‚*åxpøe½³8Z]ýmñqu5ttÀCàÝ}˜G âäÖŽk†yË 'éH©û<¥EÐ[ÑB¡¨iõÄåC®(8§Ã6³ÏF¨ëmn>ÕB¡½@š;½èè-ÉÝ?áTÆõ—oè h¡Œ…H«”Õ>ò¡Ðë^±¸Š‰ñ–(§Î"›­ gÖø¢½At+÷_>/tu!"ããq;.Êkz“Ûî’³ê”W"vGŒZY]O~¬^ºëÆ,\SöÖ>Z霜Ú3{ëåä˜È°ôSS0=æµÝ±îÎ÷Üp˜ý«úÛaaˆƒÈo_^ñ†/çN=U3·¿Åe¶Ä«k±}¨‹âŽ(ÞU¾Í¼ß}v›Ó>›K­ˆB!ê0‘æ'6[0±ý¶ÏÁ—jµàȳ ˜á¹}4k­…|ñõVË»„ åWSN'¼šVQãarBÊWÑí«w˜8`ŸTê(ûK[OÐÉ>:od½ÞŽbÃ訓žèÐüOÜþqp#»¼x¡?®!u<. á.„QÁ~CÈ †pŒÛq*„ |!@c„Ðrhûh ‹[B÷âú0»Ðþ_ðÚÐq<ë_Hü‡œŠ»U˜ ôý7 B “yìÚNzÜF¦õ\£÷n¿I‹Õ`öÌn‘{#ùÿbï­£«º¶öáut÷“ǽP ´¸{pwhq'H „1"@p ®Á¥¸;¥¥PƒBqˆ'ÇÝ¿g%wÜqÇûýþú~÷ãv|wšÙgí%sM[s>sU‡ËjEÔòƒ([4‘¸‰{ñ•Åô¡…Tst³¹>?ñ¾ PÙuu3¥TIè¿ü\å `R€$J±ñ‘©h$Êï²Ë8â@b§ÍZ~ýï‰ÝQ7Rk(+-,yÎcñBÄ¡ÓÝs' [zé˜í»©Æë挎ŸØo‘ùCúlýæg)³ö“¸v{²/NÌèy­¦:ÒiqJ=*·nsϧe’‡Œ'$DÝÔgéí%~xŒÅéJºÓ+£ïý¥Ú¥Æ•þ)G:PÈ<È’s¯×UZ\è$Þh!P' B%Ñ)-p•0¢vÍöÉýwÊBø9g–-í5Ïæ'o®GLõ’Y;,÷–s£—ÍYuhÆ¢^[ÐÛÕ{烻Վh•Õ÷§’F¥øK†'\Øâ"Oî *Ͱó6]O‹kIM¥‰ˆü µ;Z,u{=b¾< +ˆ½ØƒzÐÛ€¶Çx»ÙTD¬_fÏ;Çv³ì»;K¬º82X¸•‹.#Š`8Åï€AÌ"rG ‚P/´ƒ`wl[ÊyÝ$ãôàøö($oàSÊ¡þèjÒ¨ƒ”iâÛ ©UVžp.$z¥˜|ÿ×sJ9b²õÂT;êò[ht-æ5ÊåJ‚V%wÇ•Y+;ý^ò±xë_}i#x(Ÿd\¿ Ùîµ·&îìüÔBëÊR/õ˜ÓoGêÍ©¤„^\ϼ·¦àAs:m¦g™«±&bÿ@6ÿºü}qÇ^µ'áve›Ø¨fÑñb]y3˜­‰ý³jø;‹Y¡¸Myåa>^Ã,ÛØîj]yÝs™½È0¡ùÞ Î"”m=“zàæb 6÷׉óÛ®–ÍŠùZQãUíîð%|CIQ´¬Ûúgb®((çK‚Ù=Ž¿nízõçÏw~Ê ¤h?÷§%óÛì[÷hÂÊ.gÖ=ƒ½³~ð.l%> W„]l˜ÊUàY]s$ ðé E¦ä/ÛÌf™^¹'ïlÒ¤)ËMÀp*•¥º¤î— ¢/{ùž¼S[„*ÂÇn…ø7“É]SvÞJ:ùÓÊwl½?u)#Înøy4z¾¨óe:KØûZº…™%ù²æWúÂÞ€*[ÍØÔ˜I8j­ ©¸õ‘!ÓêFÔ3~4Õã5IhrÒçñ7Qõð• TÂHàš %b†Ç-.)VøÁ }H+è'b‡4~{¬ÔÊöON FÕ’.§}†Úv©m.7•}™Ôâp˜2 ÷§àäkÐô 4‰¹÷hÍ•¤.g¬fK¡r Ä@IDATî]š~ñ™5í ,öøj°¿™Ûå0T]RñqßÕi!¯‘‹w°õsW£sý¿Ž©-â+Ž–:‹?T ¤Üňew0)æÐík;£Å>Yl£}êzübÇ=‘J*—ªG¶ÏñŠÊyŒ%ÿÚVh`iý5®Ö¼à£Éosé_3¤¦Ì)³Émå¾RʾÁ³lÄ‹"ĸ 0O~r¥0ج ñjû¹tz ,! î®›=üRÀ[ ¢ráÚÊ2š"2¸y¼D¬ŽôÖ…ººÛù(YM$á7Ô5YÔ)?¨rNï—L·žElrZâo·šì2–.¡ç /ßn '^}XýÅrMm%¯16ܲa¹o žIUu4¨ùË cYE‹[î «+#R °¸Ã‹Æ>}À‚mשÇ"nøüërˇh¦†¸ê*ç[oÁþ›©, Yw.~iÌŽŒK#—µ:®(õåå">c2x4ŠÐÃM˜Êd´¥\NS_˜qøaþÍ”0+£)Á,¹øf¼Ní¸Édw®ít«¦¢A|³óÚ:±ße~øXÀHŽ_Ï ŠlÁ–k×=Z@Ÿ :Ñ}߯¢S§ÇáÅÉ”S½Hd/º!] !@xí%!ru\ÿ]úR;*ª­ìFͨƒJ÷ìî+„Z¤W~=¯Ó>™ÚŽ09S•¹3½~Wê÷Ô󌱸½Û!TIkJ]¨Ë«b·˜òÕA&Ôõexƒå{Ç·9ų³’¯Él™mDÂcµ™ì‰ÈäWžaáQ()Ì—÷J²R‘\»v[‚>ÖŒxäöª"5Õô¿a>CˆÎTºü£YÅf<DjÜãÒnk26QJ2æ|ÿGÁï”`b"_Ëé⋤ô5f>„<+¾0®Ï­p€ç˜4¡µ«YÐî)†/ˆŒçcq¡Z*²yôYƒ.bÞÁ.óKP(‰#m‘nã¹öd†D~Qó]Á ¡€+”*R&¡ãpƒÕG ÀM:åм7o~ÆJÅO_}(arïŒG­6ðóúírKKì¼nåU¶û7þó­’|UòŸ¢¯«¾‚\%Gñ²r²j)àÁ†p­²kñnÀ[Q|‹·¸ ?„Å…¸ð9lß*™ )Ž{þ‡í‹ªn¨Òªžòü[ÉöÃey¡£=ˆ´b!)ɇØÛÊÍ ‡Åî>˜Kåq¥¨N«qqÙž˜#£­74H[ze-Ò…‰-Ì}_Ï„„ö¬3 ‰Úkãñ _¨ñªô /ˆCF÷”pµ¯_„(LX>a ˆ¦¨Sf¿~²©Zy…¯ Ïãrü|)¼ËË^*Å‚RniüéÎ[¦=œy¬ Í4¨´Øh—Þ’)öå=™N^è#bî¤F»wÝŸ83fÍ–“ ³†Þ’Ú¼†€œ#6?¸®ŸjÜ9r+|§gêtX{t ÕЀlúem:²xvßbç7.ƒgý•AÙ²ˆEm0ºªZlŸÂiaõVMSÓãp¡–]€XüŒ>ÀG¢R,¼Ø Ûo^›W uÐüC.äÞº"$ÕE AQ©A£®SZbD]™Xj÷;l®‚!©'ú-|Åä/s¨õ°ä&™Ê®7úšÊ›šmŸ_û_„ˆtüå‹]*P *°ªV·Æä·ú²nŒÈnw_ÀÒ÷³óUÖÓ˜¤Á{ü>'ã ½•ŠyÊw†¢Z!u¾»Ù˜êT2cdîÖGô”½ümP ~Á–.k—v|J″n·YÎåÙÿŒì#Ü˶“7Óàó<Ü™ÍkI[Ç'NÂæ|4˜ß¡èdhTć¢]Xh¨-ü]EºZ¸ÞWöÙúë¾ÇËgÛ¶éÒt*Yqõceß’µƒ­Õé^RÞLrFYôíN½Þ)áÅBK‹”—ŸXKX ¤mƒ>·Sô§=P¸áÏ…Ô°E„”K˜Ù♳hQ»¯¡Ü·–G@ZÁÈY¨†Y¡×›Íõ›Ô¬,üì6øÔ¤Ò„“7›9vã–sh¸™„,éµ»´À´÷Á‚Ô.çOõ§Bî™A¹é@sD‰Éö?]†Œ‡1t`ÄV’Óý™¥Üµê‡Ö«\ßÝcu¿SîdÅõÁÐ-\H:ˆY´ºþyi>Ñ:8ö9ÚÚѧE8Ñ~&“gÊÍBDcpxÎ@dPZnü˜øb%ž›ÙâŒÇ§¬«Üfæy4EdipϦ{ëæ Ï\z “Ø>`0R›o®.¨©aË}†€Ã†Š±RŒŒù…%ùÔïT)P+] sº®Øx2™î8<á†p‘‰-Öì>±j4ö7  £†©Šr“YWl¾N qÎé¹kãIðXÛsþޟסôäÌ^i[Ï.£Ž.Üo Óû¦lCxZëµg÷ïèÆ©cwÎ?xݺKó÷Ì˺N¼èº;Ij·#d):ÜáaòïöÊìy¥ScfåZ ý…[ïŽE&µOßõ,aÉÈÅÅf¢âEó>±uLm‹MlqÚ;VoT^þqj~ÈQôs}Ì›€­BùÎVêæƒk¾V©¥\¾òƒùÓ®çSèzé)±¬ìðKiaÑÖ—•+;½¤?ÿe–È£ŒK‘RCbIÈòkQäØÍúD«HέЗ8TRçÃæ?–±ÓcõòÅ2J6˜@+Y:õ@ÆN ==µÓ¦·fS2†^îá##ZN=zmGlûY¹76Ol;m÷ÛéªùT¸³Ü`Ýñx>ÕÐ¥q¨õžO»ô¿sA@ÀrÃß*©çTIÿûþŸÍ¸ÿûvÿ?·€¡B@þkÅ œ Wù¥!1lˆÀ*a\%J1UB/ ›ñ[Úª¨fÌ>ÄÍø9š­’âxQ%‰ñnF#¸þ)žÑí*±Ûª†€Fp?nÀõÏAá'Uüó“¾ ‹ó !•ôôþÀ‰³1Ù–Ÿ iGñÄ}8aúÿÌ‰åØ B‘‡ eåVS5ií ãë´Ÿ+cM½$vÐF*¼à±Ãi6ËéSˆBÔ¥ÁmuZf™(Ùàz7>"©ùM ,&£÷öü㳑‡ƒƒ½qíæ8‚8ò-7³ðÃìž/ì€m þÔ‡“°¾é΄(C ÂëÕ}eúŠŒû•~6hn7eÖ ,.Ûc¢–WMè¨Î•hlåz­<"¦{ÜÅëÙ”À’s`2¸RR·3).´~Õ™Ê*<2«ÞÍ?#ünt]ÜÅ$I¥~RQhŒ4™s¶arÌ ¹È{ºgÖàüÅ×F¢Ê ÁôñÏMæ¶?¶áÒ·`‹9mŸi!XSZqšf(•7Å…%_F½¢õi¶³µïã£uóµŒäø‰”–Úç q”["pŽëC Ò’b;à “»¾itK{_ñ²ì¹£8 Èt^Y5V±%õö@t{RŸ]^0éH"Z‡!X>!€˜nD—ߨj4¢Ž–¿}ú) ¬©„B¤øØžS)Bù:¤¹»®)ê8ˆ[^q5¬Û¸ã`Ú nm”Æä:o©³¦` i=Sî Å.v¹ìÓ†$ ÙA·T4U…÷þ°^±°>Re³‡ýEe9‹.®•¬tøç!nCüDjׂ„¼(DŠp?vÎÎ]ˆ‚!ô¡°¨?ws,UpÞù^˜ºicÓ0.x‰9‘ð;ãè4‹.·“Ìh‘™}»GBßc3ïŠNæÌS&»QÃÔ6:œÕI„¾¸ÔÊu1|¦Ax“™;ZB .xs5°Íô(i(Wú¦B$a±Ís·òÍD  ]ÝÿtñÌÎÛ’.Œ\Ü.;éò¸”oN1A <øBf8—d 8´¢2 ›Ö‹C4h@Ô†¦Ê„€¼.F$ãð•‘ýšv³/”QzÃaˆG ªf9ÑÈjRˆ$íüäÙóp'¦ÄË e5[aÊü¯ÿ8 ¡ˆéüW¬Ç*é‹«Dã?'»JþSÁ‹ZÉU÷ü«—ÍVµüÏ{ªÞ¢‘ÿÑNÕo«¾Åë•»ÿãÑÿ|û¯/*yÝ*p>sÙ ððÞ`_„$Tî²2prp,Œs]ØR8Q¼Ð4µÙHU?ÌÍ%N}J÷ˆ²‚ˆ½Ê¾r9.ŠK¸lzœ Øä6?à3²ù0%ˆAPˆøhÊ2¢AbŸŽ_j*)ª.hÇ —ôÞ†’œvÆ}ðV28Žê¡‹6Sþˆçb±€Áá]ÅR”Ê{ì ²¼—Þ ~NC'°Î"bd½þ+—¿ÊŸbÿä]ɦ^5¸×<äû›áuä¼%ÓzlÚ¶{9õ×AQ5“åNGÈ#–Ý ±?êI=/$î“Øg_ÝFµçîéFé"Oô‘­ Y:n(ÞŽvçMÄ[SݨÜè³ìêˆ)§ja×¬î€ îŸ>èøÂ‹]¡Õ.j}â]Í|Ø5¡ÍôÛ]3»?|Uð’„¶e ¢q¤-ÆLÙ‹òõñe&GqÆc㻇ª–£X@{%§>\y˜Ü0mçÏËâ¾Þžýó? &®rfAN¿Ýÿ*y_ªTjYŽÙï°õáá‚Ø~Û8* ÕQdÄXVªT+Hã•Ú­%Ÿ…¾°åW.íp(ûî8p®ŒÙÔŽO97&µÓb㬽³ˆ²ò’>ñ'ŽS²öHwÚ<ÀPqxdÃYqƒŽ¯?;låÀ‹+ïŽ_ÝöК“c¨÷ÒK–ößæ˜Ü3ã‘LJ§›xˬeRp9 ßwYÒ;Í£ÑXÓY}st¡ÑË£’¤–!3 Õ@ ¤wøI¢,°T¶Ã è¿éäjˆ™ÆõZŒl±¨/ÀèNq‡§b-’”u)O/öýIÅ’<ˆÆ¸o·{J4¨¬ô!ÈÈÅU¢â]~zäº1'æ‰A•Ó{ŠŒÅ*åFÌ0’²ý€r³–L©?§´!'c{NgüZ¾I Éé{Ò%àg ¼>×Úz’M»'ôNÙwcüºNÚËK/Ç%O9¿=ákü©Çz€fü*Ûšó9˜œ™­òŽÿ>3¹ÿ¹´ãs©…„›ÈŠ'’e’Z£è$Ûq|óm¸±ÃÉâ t3ºž0»ËÒç Þ••,›t«ÌSìåYö^NIÝG8°›]dzÌôzŸšEhjpðf÷8ïycd¿:}•†l{@õNY7èáüü6“¿ÉÛù×JÆ•S-ãiõ¥oÕ:`³óü\­Àô™ý|t¥A „œ\d\ÙÁ÷ðçÓ-à+2'Ý•r5"²¨¼ßybþN®)„f#oiûÑ Ü³ý܆*ýfÉÀc™ç—Ó„!;H|ï=úO®Új¨úfÕÿiv–ñÔìåŽå\8¿Å‰Au*úƒ@jeD\™;è 2Õ—§a Љ+Hùà'®oîÜtü­+û›W6}îѳ۬@´˜œz}”€‡oÿÛ°MM¯—Þû‚H,œu>ÆÀŠïÌÙß§B“ ?D=´æííCj|RD‰‰œ[T\d'É;‡ÖÊœ…t|Rîþäåp“шV@– ?vðN&]l¨JjÄÀbÊllŽ”ä\}´‹ò/³•hQŠ9h58ÎÝ<Œh4µ¤]þ¦‡“V º~°Ïì‘§×D½Óè ìÉÏÁ‡ƒ=ªõþ­¯ÿ àóòñ8l ùÜ({D„H™‡åÔ­€ßîtÀ3è=TcC½x•Ø—ÛÆB‡6Ÿ£’FÖmhüd›Ù}?/DfåØv\È {ÒLõ:“sx€> ¿hÚlxËU]¬Ý¤A͘f˼!v0 +÷S*:H$*N5#¡HáÙ<0›‘9v }øîa É 5Ö£Ô‹ìQ!±9,)Û_I 2`Árî ÒtKUúP§H ¨¹•m*Ã$(¸Æu‹rÿÌxà ÿÄ^lä‘ôs£(/Ãö > Æ4ñt7"i°eÌcò«Miu8ïÙèù1‰Ž&E d/XRhXµeD^Óc){žþdF\·]ªjá‰Ý®iTJiP<§Ý:ê«@oÞá$N:“øyµÚ ‡\JË'€¢B‹¤þ´‚nNB>…”€M c aÃK~ûø„~àü¼ê×Q ¿Ì”ÖD-?œá{ßrB€%§wÂy,g"C[.ˆ"u«E„“ÖÖ~÷5MÞÌûSŽ·[šÛMáéAc™ƒ Dƒ¡¤ þØ¿>/­ý¥ewiŒØ.ê$€/û­#×eýŒ.Iκ)Mzø'ÂD¬ÚÉí.zÝux°òÒ°íƒþ˜¶«îƱ(q€È/$0,d¬ˆ’l~´hbÛ$¢NWªÒç'lK]‡ƒù2 5ìÚò’XP„ñ¿PÂvÇx_ür?ÿ7ZjJ³Ó«oö"ÂŽ0¹QBG¬n}¶Šr@\p¡ˆIöái“¿:•{£Wò×—ÞŽ©ò$O걟ÈFÌ¿D«¦, ”’ÿ8þ䲦ÃÀƒˆ=ž€{ËwOž”]Úý{ÒÁÇÛèÌØÈÂ[’nMÝ6þðï«AŸ…Å‘HE4¸¼Ûãg$J°cº¥+qp1 ªÌyŒ¤”û«QÌ) Á¢ƒ\ƒP*,‹Ûßàjíé¿÷'ŒG*»mk´eíå…Ó;§lû9©J*Ïlõ,íÀhœ´MˆOlÙucżæ Û®lƒª´µ×ùˆ0ÙìÃ-I5:½Tƒaá´ÀOw“¿Ìï ·*¬ !0?‚MÔ¢Z˜Øo‡, DZKÜÕEõG7ËF¥ ¢Š—®¯¡ôby„Htn#î´´Õ/ªQÚå›Fÿôð·Ë¢Z(Oˆiƒ¦ºÊcóð [8).Å€éo!§ÑO/yXq‰.4>Á[0µè1I¾O9Ò/g̉EWgUëPSÇ›³§Å†ñç~ßsIë£bÔJ’•·ûQÆœ3•hš•m¢Ùܘ ã£@j§ûû¯/£Ób Ô¥ —•ÑÝŠ…ÀTò£éõ‹IPüQ•±$PœÐwãoèd£œL¿ÊÍ0ŽjzZ¦Ãª¨¹D.Wjub7ˆ“sút7VVºrÚZg1; T†D‡-·Ëų¬»:ÿã7—¶TA½ôÈ¿ð2N„[I$z2qü¬Ýç÷£q©šo×[Ű\Àþ„¶H ÈñŠ@^%­&B‡€FÀÛ@!(Æä(‰ÆDgíï}ýWÿ›×ˆ8àÆÐÐhÀ‹å4Z„*™µ˜„(^†æþŸË÷àlK€³Ná’oöž¸µ‘rp3™ÑcÿÖãÁ&ô_G?GÀ<˜0UdZÖûš:—mÄzècn£[ ÓJA2vàwù§Á}‡mn³º¤RA…Oq³ØÞŰ&Ù!ªF”ޱýä€`ÓÃZ¨çâr Ú­n5'D/7LL6C2.%&5=Z]SícÅGµ\>oźÛ0äGS™G¨œ³¡Ï÷^'vã}ukT3ÛøL,pö²!‡Š-újáÓûÿZ½uÝ‘öŠ¥¬ ½–…è8:íàøß¸eÉ“þ\q}RÒùqËûç€egÿ-xBä£Öü5rIûóJ~˜Å«ßö - Öæ¥)”âí ÄßúÉìæë5Œ&þbß”¡'n뉦æJ¸eœRY-u+¿Þ?»Sf(?Šã¡qžN¸ÇnJm±Ó¡,J´ qY‡½<3wYç½<;£í^)W\ö±(ãÆ‚„ŽWÕʲ àǤõ¼‘ÑëºÍæÞ7“©°” ÄA£³`í¨#û§ݳÞ) ÎÜÞ¡L¥ßÂNsØÀÍßÿb¤ƒ‰ÝÉ0W¯æS«ËI ¾ÚØï‚ÑV×åÄVAž\À §6´>£S\Mêöøt óF2žhâ‘Mî”ä1‹C$߬ìõ$D©Û8âìœG0KŠ(¨Š•2Lè~k( UEá¤Ìñð½ñy³jQo?¾Š”ÒPÍíç7Ž}N¢2Rà„1†$ù2ˆY!9ŒÊÄJÂíG†»‰=áF£cí´G\Öåtät½ib p¼2}tζ ‹èÚAâAy«,E!þZyR å äC“áùÝ"önFȃ˘r‹üžDTƒª’¦È40c)Ç^°+è"SbfçÝÙäþƒ„ ¿Lnq¬¡=œO„&“5ý»óÏ÷=] 1‚ze5¢0Ȩofë"¿Yþݯ®’ØA›l®¼HÉÖü0F'Ûy!{ïñLJ²hÀÞÃWã¨bÇ'‹†'çä­˜?4Öa¶¶2MqxQM…°fµ6röНgñ5Êà…òq¥È9`Œ,† ߀ïAH‰ÁäVËd>×ý¦]Gld,Κì”y8½z^Zj„Œé<¥º¶Ž¯€ÍñJ—ŒÚƒšÖ±1»eÕ¨[æa;'÷Èê̬(ã†]Y"ŠÆNÁy_§Êõ²[ýX[Ô¸¢¼0£ýi÷yö³Zo,ÈýqeP’ð7à7Õ,Ï>?ŠgÄÓõÍJ?='uгòÂRÚ j y.§õƒçwº_° l¢’Ãdˆ ‡ :mD(¦ ·õÄš!_Ì8ùxëÊ¡{ãòzÐüX«¸1¼ÔÿwX¥ø„´FmÐÁ“Ïß¿@×ÿ ¥ ×!ÓIÈZ:¡í’}ç2qÚ˜8î ‘XñU™ë ¦Ä‰˜(rå¢Ãô-ÿæìoÞ}J&ÿYê,@ÓRºAXj•‘Õ"¹P%¹z¨(Ž#[.ΑæY©–zõÜÅøU~ž€þol‡}–W«ULiÇõq£߸ Œ+ºŸe•3zn’ñØŒ8ÅÀ©ÜÀö„K åW ‰”šŸá/«´™¬D‚ .âà*Œ%dÆwsÕ©«DÎ÷¶›ÕúyM5ÿÕ‡’ê=(Ýcã ˆFªD-?b)D† –l¼ëƒz¨ /UN:Skg_³» ›'7†ŠåcÖöýÁëòÌÝßgY¯Ù÷†¬íóÒjÿ¨VØ­ÄVnv2*EtuòYÿb×£åÚ¯=þdÁ”.ÛöÝNÃÎMnueõƒ~Kzo^qe¬º²¨JØäýže‚åX}v¸Ã·Í²ÃÔjW±„ˆC'´Ýè5ûÚJ¦_®•6ô9´ÃCàã°Îäñ^\ƒ½¹ûñ*ºùÑÕRbû¬D¦Íù—k(¯Ì'}'Ô<*¢oiòá*ˆ "P¨ –>D´“ÊnDž}zNñ¿O§‡PtÔ¬¿£oí¿íùu–Ýãö]Ë0Ò‹wwRlGœAήÇ»nF­²*¤ˆÈÃ\a#ùIƒ­1zû'z‚PIÉvKáÑÛP¢#,Ê úÃ(ÄîhÄ\Q©‰·4 Ë]VI—€>ÎrÔÿ¨ ™†Íë° rX‘ý)@51NÈo€‘‚Ù±­@Ë®ŠW§[åïzýWÿ›W[ôÁ Ê€ãÕ tbXÃ8ÖˆÍJc>Ü”0mAp *QóuË®t§CE–¶=”qq XØigÞÃlÐhv·K[~œ*ÌqaéVÊ£3Úí—† )×€[YÀ×”›²%7G ëSŠà1`¸(cíw*«“ƒ'7 êž8¾ùÕeTAé_‘Qºçee³GÞñú~5¸;þëNw‘™ øŸÛ#v3>fh÷É_ÈÚsÊ"dbí‚vûu\T>’äö¿ROef•Í/ %sü‰N³¤ëN€¢e_îœÐñ°ß+K¿Ð‡N+ÌäáCÊùGÁ€{ÆÆLâxÛ-èµ5¢¦lR—xV ‘ÞYzúÄfÊø`4Tšt>™…¾æàÌ×–qqähò„{®ÎGH¨÷›L(9úl;äeŒî:éðƒ]¨4£ßnµªá¿â‘á]§«ÜÕeÖh–I•>ùñ{ٟؿ_y°Óø–ì{Ù?¶ß~*Ò8ø•Ñe—!È.IÌ8ø% HØnÚ¬¬>9ta]©2² }¾Ti VDÕ {ûûËõ}ØŒ¨~ÐéÏèxÁÎó ÊêÆ»Cvôxå18§oiš:æÂÌËÛŸœñͦEm…6Ìä”|ˆëÿ§DêMîqÒãç$u8/FOôëó߉¸nvÛ,UD(…F…Pô’!='1j‘3`n­2øÅœn'øÒBoаáD² Dlþ«>@ È8¸¾Bw¹ÐÀYódÆ2ë‹ÜÍ?Å.puÕÁ ~—‡’½…@'¤¥–ŒYb!k'ßñsÙHA™ÕiKiÜ„,5µm8åQ@[ó»H¤7šJ> IEMUD‡È"&_i=¥Ýú#o) $OE‹} 0‡‡be±ôò‚ª …þLéŸwvu@_©"#!ÿ ʼnã¸ýU®£`‰VÖÁêÌéžµñìâ1÷¹•þØt.ÔCæ?vw(ø „/û,œïHÄ`³b–y4>g~`ËIO¤ÀŽÞ[ƒÎ¬¸Ý^háóÕÊ0_É¿òË夙¶%÷IdȤ"€¯á–…úcä#}ÄÈ)Æú¢Œ7xŠÑîÐr„çÏ¡t[†ü¢ËÜ Râ~Ìþ ¿•fnÙáç;Ò:Zûpu€c­)3â ¾>¶gPP/ª!z.F™%ಡ…÷_¥â» Ç„W!L¥ª*• ޏ ñ-èXágš:Á ´r+á¬C'qú-(œ!FþFa¥£NF$®Ú˜ÇÔ6¹žÈXñZ#“MnTp¶T©Ø~n&ã§¡¡s:Å碯q?!&Mݺw#_Æâ2˜LX™ŒÁ6ý»_ÿ¹yÀ×™¥î2zØã‚º ƒI¤CfÝ•¯?¼ÓF*<¤%ÎYõîÙ•PUçã@ §\/{˜m•¹:ß"†H`³^¿Ë—~(sfßë*œØwo j•W3u/+«ù±€óéÂ;@Ê¢–4©ö•ÓcC¾ ‚-m~ƒT+ØŠèìˆ1„báO4³c»äå™B G™Ô;}×õ*#@sÇ^¬ÛÞ2AyˆÀ„| †ÑÂ7#¹lŸÍcpH4ÂG%w¶i¶4ä ŽkÃäì-‡b¿ë¹v q㲑”ž;4¢z–Foÿ˜…5v’8ða‡[v»÷ƒ*}Køö¯VŸŸÛ/1÷a*ö-5)°ÍÄÄ'@Á6ËHÆvˆ;x“Fö.“³5~zÛ$—Ú'ôEÖ(ïðø QË…îAAàsõËʾ¸Ø-Aˆ; Žs9Qé`Ml³rwþÊ݃¼*}–ùq6D©›ƒ´oÂX•˜<ÙO3qº*¸‰CÖû_û¢„"¶—ågVÀÊs"ÂyÛÅÅ4Á×CåúèæËß]E9‚™¤ Ì[–7eùÐ#A¿€ƒZkÄfÆ™»ÆÐB' ñ…¼²1eO^½®Uû‹PžÓéà*Ø^ŸH£ÖJd¼¿^>ª];šhÚ)//—k”z[9jT0Âêþ Tà~k³™ë…VhË®îúuUG|dp«Õ§nÆSÖ È˜,0À˜ MZ‹òõl¤œfÙ³„-¤â`„˧®mûs,ý‰©ÒÃi'ƒÚ-;ý[Ž9WMË-ýÕÎil°ÙQ¦ßÍs˲ONÅŠÏé{rãã!h<«Ã¡Åùc¨R©ëŒí6÷à tøv2´{܉Ÿ²1'¶’b‰8le»½+|K §Pö9½ÂÛ]dT¶¤“ÃW~s‰!*/ÌìuÚ‰S£™ Ï•DâgÉ$rõízGF½Ža6ɘÚ$wÇ/±È4Õ dMû«%%žìç}‡U#,ÚëðhÊTlÏÏáVÒÍNIm΋]òZªZ¿[ŸIª3å¶r®‚må˜}v—Ò.Éø~âŠ>Çm.‡6,<ÀX^ß© [(Ýr4}TÿØ#·hB %BB:µë{ûáyl…¸ˆœ¨°š^;Saµ›øNh–ÕÙ‹vÆiÃ6à¾Siuúg$JÞ,Ð_€Í39ƒ%kî¦*/ànºœJ¹7˜Îä:D¸þAD³ÉîɘiL²ÃSÁˆ…à@27ÇF¥2½ y ×d_X8µçš÷Ò}Šd‡ ¯E,õ µ‰=ýKnè],GÿcZ~wñÙÚ‘ÙfÑÑK94kÀE×]xêÖš„¯Š8ŠÄ}—Í9žvoZKjy0åû±ÈÕ.r”«TZ68z m©ˆ¤ö^WøÊÓ0¤Éà“ŠÄ^oWnŸk%žj•Ý« ñòWÿ@k±P¾ÃÕA†™(õ×w—2"ŸD!b›2ÚÀÆk9f‘Õ†ÕŠ2%_9/®sªÒYûÓ'ŸD§â‡;]6³Ì,’Jœ”2pIÒõÌq—ñùÑlк¡¤¦¦ŽûÎ?Ìþ†¯)},VTË¿±½W×Ù—ïoò#@–=¬½V—L^ì±Ö@àÿò…üd¥âïÿ/ò€ÿ—'ó½y`JÀ:¬htÿL›6=oÿNAeéÜšÕk@[õXh&+{9Ý¥@ ¬÷ v^Û‰ O¬¼ÞûgcŸ[8‘ÓÑŠ/’½}üf‹È·éïlú$åþ8Š2(%ú'?’Ú f³”Ã#° “†œ\w¦2jƒG·vêÉvðe˜†4À‚£§ÙŸœÊqÀ‘…vž ÑMx¯`ð‰‘Rdžk“áÉ ß:cÚNkÈt±|×i£>=U yb2óû>3å}°ñ°ç]rç–ËÐg@èPÞ¾Ì!vÐþ_¬¿;cM/kvç –€•z:~I×{îöœÚq_ö…d:ûÐD`ݱ츿߰ig®lç ?T'öÙlvQpàhBO8Èj£Âå_$öm÷ׂÁeÖÿYÀåg]œRu÷]‡¹rg4ä7#%QpF¡x²q °(J)±tÒT\€ªB˜ Ã0@~8WD1³ˆ³Áø=¨¦‚ˆÌ€FÃË;¼ˆ:|HPÞ¼âð8Êû„¤ÇÔž@Ö%˜)2&¢P¼RÁMg}{V]5½Sö¶q³;äoº=]ÍùòÕáŽËïF¤\2©Gž¿¼ çvòÒ.ù9WúÍïx>édßCß.Ìï¿¢ÇÎäs“AË»îκ;sˆ`"­¦®Æb™ssÄÚ.Wfé]0õ%aÍ^ㇰœ`=:tÓŽ)Á"ˆøôŒ…›l¼¶˜Šj/‰k–‘xyزη¶]ë7¾Åê` >"§Ó*ùX,lÐäVÇäRAòõ9 «ÄÞ§“ºÊ _…­7 iô†âFüÆÈG/ ‚&ñþSËhC  7ª%h‡…ü"W¡ŒÎn.‰T5ÄÒ×®Ö<ýHWLÅÆ–Wæ\î¹±×_z“-½ëO¶6»q…A¯–D¾Ï~8jn̾M˜O/ɳT”|جŽ;ãÏvŸÚ6gÿÏéêãBR›£²ÜuØŠ $Ñ‘W‡dĘÖ\Y”Øv_êÁÉ=¯#¦ÁPbÊÃ×t¹*£l¡'ïîlŸ´Î§—åu[3Êe4³\Êø"faß (ùɘC??¥Cž]Tn 1o»‰Póæ¾D¬ \^'‘p-!á«”§©¬ˆQ}Ç¿tËd|BûÒH+ýU¶ÜÈ)ÚHN_G m'‹c÷q.&<ÙÅ÷`B,ÅFY˜xBŸ ¬ÃÃ5ð%*A=Ôî é?ŒÍˆ9 ‘\ê5S% @œnWü ½)?}+Gñ‘Ê™à•îß[^s]KÄHE‹kX,0ÿeÀ8Ké¾Ë§¼++ ÑF4çŽùfµ\#·Î%°”äø¥ÝTóp‘¹ÝŽeŸNŸR¹Áå¡ÄHg9Näb¡xÇÅ ì ܶhøãœ#-)Çp’•÷£“À|£&Ç4,٤Χ#K×õ›ZÞå@Êždªÿá¢L Ò Î©¡ìÞ Omé•ð1Mï«Rú{á?«÷>‰Àî6päHQs«PñB$ÀÆ2û4«ÅŠÚ:2®ÓòYXÁ~ˆÃ”Õy@j´¥ßæKýk(ür.%êâ´£A¯+ºoJ>‡ŠñµR†ýöëd™øÈ_¯ë}9^Tf¨øŠÕ1ý«”þˆävW¬\£ Ìîš /¿W&ü‚²žýwZÓá²H­P§,l–Ûøg7Gfâ¾ ¨ Ö€(šÔ]xxÓ1ÇîT(†öJ;ñ4ûaQƒó÷›ÜßÇ.¼Ä†ºh‘C)ÿÅvÅ)Œ»ýô°Ý¡¦ÊCJlB'©Áé<íFKüÐÅ Á´¥Ê²³}‰Ú‡J.°±YP¥“:J)Î#dj¤´;^ËZ‡SHEøÈ´zGêP«¿ë›ò5Üš¢Zã®K ¶z$ãÓZ»˜äQõ³gN¬SKh|åƒj{A„¦µ½çV8˜ÏënÍÅùhzë©g»5x3zOó2+?Ì'æÇ¶ß\ìyƒ¸£··„„í%“»ïÜyxä:Q^\è0F^Ôr’èJ“ñ éïì ÆGP‹7°‰î-W'in%(ÇËã.ꜧR괬М®GÃmÎìúù,¾$ —#Žfkg·Íiˆ{Ô>¢ˆç‘4\Óÿ^°L±¹çï!\Á¢¶y‘(ÿî'!ÞJØaÔ –rfôÚ©4«û2팅…¯ÏÓ†|f+4Ƀ®Ãçµ:/d‹·ÞS!Œg@i3­¥ÌbJëaáÞÈ®“ˆ¨[(ïó¸ç}=Yë캹ÍMçcZ§Ý¯ƒ¾Zèt¾ ñÊš³:|SØkeÓÍŸôï™pßÖ ¹uq¹ù3ÑÔ¦¾Dí° D3%ŠEœß&+îôwƒ#;ª¥Râ4‹|qä…¹¡(èh'!=§”IíÄçåðP°hqÛÃ|åk?·dý«­\°à22ºSÂáËéx‘Þ«Àä6®zÖmB)Iêõë°3_SYn$i£ö@%ü~¢"ëŸS%^½œp” 1H9ÈÞO Пj~*>"ÆçEü­—–9|å±õæ¼µ1µÓÆèÏîrAàLo—ÊŽð•ó­¤U6¿˜ §«„JíXH=ÿWä …ƒ•ˆ"1jÀ8Oí°1`$ÕLMvõÄv‚*»^þó–ëKæÌ^Ž¢TrCš²ÉÀfñ8ÒÆ=)·›xA5«Š|.+öñ|QÕ«?ûðÒÍw‹å¨ãÅ@´tÇpÜŌܫAåÌ $2œ©ÅJ$Ìk·hý…•!K9¡Ï"1_÷uºÊ¶ºÙž—¾×’É&¸©”¤TŽ…w+à'"Š×æAêôÕ¼Ó/×ËPöã3{\mê_¨é‹T¨¬_غ,oz^% ÷;Õ«cîjìÊägm#à (\dcÅ&h!8æFmg¯dNûõj|ùáóu‘"òsPà—†Øm€HceÜiK·'Æ‘‰…=Ò][6¡SìÇ•X~XPð¦z¡ÜNª 8‘¡ ð “ïk@ü·Ã!) JxBD¤‹LÆÛ„iI…´ÑUTõvI¾¢+ ¥Þx4S÷¸Œ˜›Eôø.Ô$Šô5hXÚ$ç«›…º»fÞûã×öaAÅ€öAßþæ×ð¿y±IeŒÊ uÏ'U±hИ8þ4¸¤¤@;†m+®I¬€—@ÄVÀ"c{󹋻¬Ðzë½~U¡‹ŠœÙq}d¨Öa ®~þÕ§÷|™pï+ÀA²šÜ_t­uÛF™ öo¹Ý}Ãð—|ߺçâ˜Ý›®,¦J¨—Œþ&“òSÈU>QFñø\çã×7¾ ÿÆh--6ÿÁÑ:wÜÉ¡ª+S±àîK`ªaE-9ÉðV™õY-E¨%`!ig2ǘIï÷“ø‘VŸ'±+¡ÒòýLÆí%“;.äI|\³\gn•ÝõgaßmµK¼D&b•iVÌ-—•Øé¸F†bA<+çôÜI 6Õª^‹v=„üá[ª|¾Óúäg ©¥Ž9åç å1_Í«R‹~^„@ÑÒÓ½Öv{èòeWÛ£Ûñ“‚MLí¸u§’Hó¡Tdñ¯¿t`cç³vuYÙkËÊ›3éð*§‡yY-¯¨IÈʳhª&„M2¯ÍKú.¡PÉg 0YL/þ’€B‹à•¶Œ¯À%¿œ¸]6Q[CTn.ð±Ù+OŽIw9ëÒhjg#®Ê@x.ˆPÁ’Loº²º¸9ÇßàSK»­5Š8‚Ú~3gÄZSyiýê;vð¼`ÐÍp½»í¢-  Ä0b±@b´¿ Uk“ûªÙ£Ú&ˆu‘2Ÿo=ºœˆ®D²\ÂQðV i1·ë¦m?%V T&TB¡'šZâd£L.™7$õùÓ—-k¶õ9Ùˆî²è*ûtâáFßÃÒJ"|6„¢Tp\ŽÌ;ã0®ø–ÇÁ–ô:ä}%l¬è`/´/üêÿ«Ò7“º·îgâhi½¹*ceôÐå‡O®$¥° 9¡Vîž¹ˆtM¹SäÓ®kåCü³¶Ë|µØPÈËÛqz}f‹£v›=L]{ü©&hpjß«”ï`»‚‡5ð¶!¯)µeC©³:ö«ï-5'€ i#ç®§£Õ“´ºç<œûýPêî†åTBBj×§ ŠT¤Seu-"Ò©ñ¡­PÄo¢âˆC¤!#J:8v/y_jâòYÚ¤Ž÷¤|fq39OGbª}}háµá›ºÿ)+ì ‹Ý嵿¢)"ÅÖbYr«» ®nKÌG1[1¯ÝÙõwÐX Á>'8´ÚJ]5¼}IæÎÞ¼öÀ,^Þ"­›¼5>'Ò¡Lu1%0k°1hNRªcqèppáC # ¦â¿’ºÚÎ<{a ¦Hˆ|4ðV¶!Á‡‹+Ž…PëÈú§£¨ˆò’i]s/ÆŽï8wÿ0üréÚ,89ÊÁªž z[̽±rk—>·Ô_–qg=g ®boü”K©ÎM†4Šßõj½ò ¡R9"¿? jD×ožÞ»ïÏåT×´‘cs¼IǾÖl1•…苌4áðá(ÑŒ‘¾w¾§}€\©E{”è£F`£QzM(d”_­?•ˆœ“Í‹Ò ä‰¦ %t›‹Lr5 nØ?^&Â.¡µ+å'À´U · ‹ÌH;Ÿ}d]'ºn/¼¯¸ÚJ™ˆ&EúûDÑ‹ÒþAþr_˜=p–F|6šU*¹ÖJF6ˆÍ¿š *ZÛûºÔÚtE£ÛI‹%_žˆª£²}Äü§ $O§—˜õM¤ ÒÜhtÉE(‘F¦ TD4FšLæTƒ!'÷Ú³üDï¼V?Ç]댓o,:2Ä6÷îb›U|eV÷K2Fj6ØäõÂökJH`™÷œ®Ä¸ö¦?븴㱌;Ã¥¡U—[ãçsb®"-pÆÄ­RñmlÛdF([Ôé8iÆ·ZiCê‡oÛ³ÄRPzìx>Õ9€MRÀÐH â5UDx R(‘•ê{Àh[{˜æ8`“^Y{î[èFñý2F7ZX;´q…ÙbÕÙ¦Ë*Ôºð|“OLJÊÊCÅZ¬øKçÝX#l&`)wªÕbè0® RÿþÞ×ð¿yý ìá/%!~£hãÉ¡q1ýl¹”YKhÎé°oV¿³|\ÌÊGWÎùvãÆü9Ø¥}zÌûþÆz„d4«ßšö†„;ÀU7ò+¦0`×[2Z]âñ9!ʨ„î• @Vl<ó¶Ä´fùÛóG®žÓt¯Àšs;•Ü.7âÄ¡gItgV)Œ`Ž&báUæ’b? fÕÄ ªKÎý6±øWN„¸~˜´­ÜR‚’¶Ú-=~š”H@åùyH$T÷,81Lœc××TozNh G”ýÍñõo\¾ºò pô_FÖs:½½Y%‘¾³¾üJÜÙo'…¶’´>;ýv‰R%Õ:%B\3,4ÒRâó%aª°×%TúÁ-:ü&øŽv—ƒØ^‶eÁ¬:é}ò¸¨ËÐØ >Hæôùa;ÏÖkcj†ÖÑl‡"Œ¯vª1WÏõOÖŽ|·àÖ@Žœ–eÄØ±$³¾YU›Ý0£Õq1Q¤t9ÐøËíerGš%nè/ªÒðÂX€Ã Ôè ¤wËmZßü¹@ëT¯uä}P¨âòÅÀ-­„ò§÷?Èç áÉÌè}Oäe,FÄJüþöámÝH^Qpåµv9îV«£cZ8á &¢zjÌOl‡¤ÂQº´Ï+¯Ð‘‹°7Élv“ÍÕ&Þí”ÖùaPÀzg|ã•Ûåáìøþ›>[~=ô&S2¹÷ø‡ö BT˜?T=02>Q”*x2ºÙ.Ï¿ÿÚz7ö´ˆ¼1U‚Z@ºˆÉ¢©yÖŸmuÌÄD¶°ù:Îû .‰kr°G_Bö¨=MÐÛ–áÑ OAɆ–Ÿ×ƒ_#© ˜Îv‹O¦β‡Ç&L¥g¨|qe2ÂÙBQ¢³‡²òp4Z@øÑu›Dƒ ?QõÔFjH›âY˜F˜t„çõ{¬´œïïxœðËÀ¦Óñ‚Úm¨"ÁzCS¡¹ì…ƒ¦¯9¸H9§TðቖEÉ €.1(n‰luÏb§²¸ÂT‹©&’ÈþöÞ»ªãû뮹Ü¥EŠ»»—EZ\‚K „`Aƒ»wwZh)•o¡8ÄõºË¹÷}æö÷þƒ®·^×êiV®œ3³gfûþl³Ã"fqfmh½i$õ·°‚P&P; Bë69Fart*Ša‰Mì85›Ÿ‡úµÅøˆ!ê=žntÍà5j¼ðv9ˆ£† >POG­À’»)²N/™Ð 7J[3’'3ú<S@ÊRF5|®â•×ÊT1ëúäMŽOgñÙkª9­F'ó!L­¼~{'¥`hü Ë:á8\î±¹ßÞµzͨÛ(ãüö–ü±æ÷ï(s´“”ûŽ¿Z‰éÏov™--¿Øh:ƒ³¡XÒÏöíîë/ÎRUü6HZ±;®l½Ëíà»lr¬…Ëƒî ®U¤Rpºý|!¿ÂQ¨Ri>šÞ€àÑÑÊ*Ûg)iow•Ç×⾪~:oð¯;îÒèÝÈaéº@¤dR»ÙIF7šwüÑÆ€’‰a¬)zϰYèŠ`$ÌšD~Qà ŸÔ3? „‚ðÆ¨ z‘†IúýÌÞ¾?™]«³4çʺ÷À[U4ºd¢©ú 6X×|ô`r9-7æžšŸ æ¥0GÑBø`kñHíZ_Яû$aÆ8ÜÆ[Ì +êÞòEU¦r¥*vQ«#|­9€¾ËUx Ï’+¸Ä!e`¸jP$D‘!õÔúÉ/À `eë±Ø~ê?壟%É:3 4©!k¹°Ñ!DKI Þ²¶'íÒ*£ äèì€/Tvè羂7×_Mã8Èòsy\åÞßN\n¶@øþóg*ä*ÕbP¬Ü‹Éˆ’ 1ã!Ðt—¾qØÅuφ¯í}wÉÉ®9#[Tfû7h±±û}®P2­ÛÒ7×  ˆáZV@,Ž”€ HaRîÿ9Hl•ïsv¢B ,ûÇO4úXÌÅæH¿C]ñxeáWgsn Áuß•rs ¨ šlý¾ŠÒGOé¿þät*êJÉúÁ¿›Ëœ1ªˆX­ös…p}Û»"®È`a¯k·©¸×âÆW”"I°Ò(GÀ…Qð h$t°âN¢d³rÆž+zar§^«Ý$×#³Â+¶óìr¶†nŒ3(ûÆÓ±©O8~ç]Y™ØÝÌ v”lä6²QÊÇŒP=›ûɈֳònÝu}y¥¨äp±ÐµL-à_|ý'€ÿéÅCC5JTÞËß_bK¹ý¥Ц6‡ ÝÀÃ-ùˆÙáà)1ÂÚ”§ØˆÆ>½å&=7¼ž¤±„‘®ìz’£”2_,/uè¯Ð$¢þ«²7„ÅËòÜëÏ2ózæùrŸ]3½ÝMË!Wiè­°›Ás±kÅÄÌÿ\ì²ßÊÈ:¸WÃKHvž5¹Gºð6~¢Ëëg^~ûþ…V§H?–)U'¼^À)r§€Í—KV£«Œ|ÒËî\3à.WäB·:1ÌÜÓÇ•Îf Z¾oš§Ê•2£Ã¯—Fð3Çç£3“e)øuÙsi ÁÐl“Í FpQ‰C >X† f@ú—“ÅaM¿>ùüD0Œ-QñpN·˜®µÅĹã|å¾°¸h¯×S#¦QI‘Y©Š\}"h®hw AeŽXäv›üAdŒÔî7$ n’T÷yx”jz~{¢îI¤°“S¨å‡˜ùì™|kÒÁŽ[zžv¢Q²Í`‰YÄ`µkHjV­¹2cÁýYÝN‰|Âf1ÍÞ}Hðk¥ h7Æ!‡ÆëJV< ~’ÜlÕÄüi4b*&^€‡h‰$Á‘v¢Ëâ†wÂHXÀã$¢à꣉h`V½üõãÏX˜2¿Ø³`xŽËÃÖÖ¬=·ÇE$ÂÉ8f·Ø©Žâ¥»ËXa”1Q"…lî¢eÅŸqu›C6ðô.º¦!ÞDcj2q4L ¥{dä0¥´ÓAÊÕ®lq"&¦¾[ÄßbŸ^¯Ÿ=à°™óÁ%©<ýÃöç^ò¿Aƒr Ó/ngpæ©ÉC߃æb¡bD—ÙœÏÒpol7<¹õ¹±'µy÷_‰àsZ•úÈð>“NßÙc­|-×*°ogõÚ)eÕXÞꂆ{`Ðï±’úËÍ‹ß{Ç<”Ç÷<\Éc‘Éßd2Âc3Zbcun§avŸ VqåÁÇk°Ó`™Â*GÞõlsâ˜=›·|O!‚‹ÆØ£žÝ©8U[¯êguØ,h(B ëþC‹>•¿Ü}‘»ã˜¯’ûôÛÌB¦¦¤µÝˆPË~·3`[÷ÃX ü$g¿ »o€@ývxâÑ;¹8H#{ÎÔø"‚uåJ‰KfÙi®*ŽŸw1[ WÑ­°\ž#ÖŒ,©E.ŽGû°˜èпä|ºîX @àˆ(¢èCC¦t€]jW–/²:á"QÒ–T)) aƒ,éYwNÓ¦8/ÑÚØä6û9¼àÖIW |ŸæŽ3òØj… \Å—¡;ÂûêWÛ>Ì [ ò†Ž‘@î²ñTšØLån ÄS.Y+”_QÊ#ßõÊpûìKfoµ1þĬBvP¬•5›?ü̆ûÃ8zïÄîIôât«ØÒÀœ…I&ÔA¢öœN«š?b›´ª©^ûÞô"B¤¨‘P£¬¸ÂãpfÞ†¤÷¾'f« eÆæºú†ÊOó:å±Ä®™]o{˜M5!œeÎH L¥[ÏYVt<¯×Ä.¿>»kÑ€õën,ÀŠ+, íS"H ƒ,4S»è(#ô‰¥<Ê ˆå^O€ö<øÎ,"E²†ôA˜8̘0Ï ¨0(¢pCÿî‹ÊŠÿ®£¬X Ï׈«;­ÃZ-OSUþN%‰ŒR4žßₚ«^=àZ¤¤ó÷5ÏGˆ£R›]r'– íÊìðœiõ"A¥Óœz¥ÓúA¿2•XÈŸÊ Qü瓪HY‚Kíe|ô£ç³#e‘De²”“ czÏ\¿¼Ê¬üˆ6 †ƒqn':6zØx$L dä]PWdcu7ZØŠôiùPa¸úÓª¿#›'I<Ú—²T†ÌëvÎŽN2꺸•__¾ìúÄe]¶E6H˜}¬34úéò)‹demáï¿ä§€Á üd•£œ„'lM^ø®E{ËN/YÜ3=ûæòËí_¼7|žrmèÚ¡…ø Ç«_v·ïÒ.ùU•o´IÆ“ cûÏÛkJÝèfËúqg^r>È_d(WSLJ€V[ƒÏÊ£Iu-ý}¥Ð/•úµÔ¡ qÀÓÏ9ÐVÝÌîó·=Ù@yeÛ”½WS“r¾ß›6<\ ¦“Üsݦ“éýòBÜÇf;%2{ÄÔ*83<Ú8NËy}n"§§üÔ• Ì÷|½ûŒý £ˆG^f,ÖÉcÈMúÿ²ˆº”ìA2oöãçi?µ>¨€ÌµvÓ³%ÔB…tÄWíUæÔ9‹%58HþÃ…  ÛžÚøëÈ9íöÕ,7 5À_rýèé‰Ä4 =pâñ+ûÍÄA3”Λ¸P+Ç÷ÈQ‡k¼f–ÄüÕ¬º§z·9ìª=÷VÝ,ÌuV“=uî*«%‰w&®ëwrÖ¾~ó¦lßxu8öÏ‚ŽùËX8êü¶ÿͦR ýšª ½„bC±•HrGÑ­ØA¦ M‘$ÖBŸ–Û¨ìóë)Cdk¬;¯¥ìý5™+™ÓxËfÔ•aóȈجu›â¾3½<óëº:(õé›¶÷B&=,X :ÙpsfJ»•–êØõ¬“û͸7fˈGT›‡@‰TwÙú›]±O¶.Ûøh*öÅ¢¾[!WÛöùÃçZ±µÎ(¡X‘=ôºÃÂ0\?î((ÀhùPÝiÎqT‘úÏaEh—¤¦à<CµäoùEÁÆàóÏ2 ªxM>œüMÀnTr)€ŠÃ¥b3c`XW·…5GŽ…ù_ÿ àxñN^Ë –‘æ ¾õì;^A²½çÄq—põA[ú?µX\lìˈ¶•e¥4Ÿß>-7 ~´ž ¾Ûãv¸\%kÙ° Äç ²%ž@pÖ}7Ž:eó—mz4ÌqYÇC5šÖ™y¸ëœv‡s‘ÜÄ©;·×žˆ¸Èäñ é|p®ìà\ ©óƒ°e¢0ƒº_lb@#á`¡k‹e™Än±­¾Ô/æt¾mw¹"E5ÌÞÏju”ÑWiòùݾP½üÁW úÝÞvâ's”A­:ó‡á`m[{ÿ\YbOC 2¼XpI‰ARFû±³HÞ³lz}dãßZMý8&PpCßøÁI‹4hçG*ô+ºï—‰Ô^ Ò¯ÓŒs¿m¯¡£Å ؘ¤Ôôq¦ mæÈ庀ZasZ&õ}/Ù tƒ"¢Hî‹×Ïë é#UAÍ&sºnͽ:kR÷[túTô6Z® #Ê(¥~Z›(.W"1ä:ØÔ~1v¦ÏEã »m†aóavFôåµIS(½ýü°¦€‹BN# vCtS9-³#r²¨ –ð {È5¬Ü10ÐQhÓL[‚޼HÊÔ¢’ ® :†©¤Õ¾* pA% rõ2 cF%Qúã¨/ÄðPÇÛxvn¸¸Ýµ­‡/vuÝ©±3ºåRêA:ÏêJú3Nßšn§ÅjѼorªåÆ>ýf_»½Å.¢•å‚(Š5æuZùr¹B'Xè>`YŒSf‘ÇV¬Ánî¸5i\ïå”»W¦Z¥›•ÿ=vy™è­, £˜52Û¯DÑÕSã#eF~b–ÿÏC´xðN#V-3)µ˜åð!²å¯ “ÇÍç.×e _>*ßÍ)™;8Q¤k0oÐ^…PP^•¤ŽÁge$ÊjwéêŸz-í´S¬ët ÖOýTê¨XÑï|QÁÛÚõšz^ÓûKH!󧈦 ÂUH*Mnw”!îæ ‘\Œ*ªÄÎ8'Ž(gŰº1VùÔëE2=WéØxmᢾ»Ðȳƒý+—´Ú¯¬õu‹™gÜFC™jHPmÏ%þÈ¥'VD†·Z}ú‡ô-‚Âÿ¤ÜüÌE=QÿC)à¶£èMpiÍî¾Çóðù½W„ù„Bxºü6[ÈC}"¸?;f~׃>.ƒ&ÓîêèÔÇ•ÙÉ46¿_'(¯6*}‘|¯ÐÄg{8ʬî 좜çcp[$žep(wv`b…U€ô¥ü³Æ?½dÕ/£)—·!Õv~Þó ëšß‘ö®üà7_¤ËÃ|‘rBÏ¥F$U+]l­nOXÔ焃kñòÂðÁèJ`©â * ~Dx'\áÕ”à\²í÷iA%hÎT>–¨9ó®uŸßóD‘áw"îèæ½¥”‡°1£›¹Š ?‚" ¬ßâ7·VÉãv-äÍï˜p³5¼F>”Ä(˜—†×òÚᙾ£OÑm“Ä|MLYm?aQÃ]BÛÍØ}püKã~c@e®°Z5²ÒŠ2ޏôÈ£Pw*´¨«pÚ<“únf¹ùó»¯dy`ÖŠäÕ™Öh%Oå°É[Ìõ°XÞ@ ¿Çgc™0’ä3U† ø#µ5´o«ÿôå\õµ(…"r¦¸×®hnì÷õf×·ŠÕ¾N¨ì1û½i]6Áêš303÷bRÿ–“bTqö¶Ú[/©Ó£H'SG–ðMÍÅ‚´ÿ´9¹ ›ÇYm {¡Dkg'3{f±<¢N=¶“Þå,2Ñ´ô»'Œ‘µåñ*\qTQRÊ`q ”žêÒHú]BæÎÜt'‰ÞC*9ï¨HV£ô"î#ÄyH‚¹îšZ—ÅBQ}_S´ó>ÂËó <ï-é4%•­$ã\v?Bök=áʽ*&T.a%‹gg\Li‹o9Dœ(êbá’>§(ÁùA†eaóU¥®×˜½ƒ”.AU©³†?)¿;ö…3¬¤Ftt– l7‘Ü;t|zƒ˜£:Ý«Ú_u-èòn@£:<±„Ìÿö؆s“Aä!]çPâ„®ã3)å?“ÔÎBIÒé¸grïüŒü‰#·n>·€Î‚KFõÚ|üahg‚lÚƒ^>RGÐ.‹WE…Þbm¸úOócºg„pL=~.O A‹½U)@?8´téA¾j[WþfɼË\aa9±1u NGÆ÷÷ŠŠ ¸Zï·½ºefŸ0pîö¾ÌóÓ@áw²A%$ê§=½þú*zúð,^9ŒYà2‘¦Ñ_R=uðÂí§s0x¿ßžui-}ËB|D9”©çºÒÓ'FD);>²¤ºX~‡o­î7è„=, ²›ÃÍåòEl-‹t{ÍY÷Æ$w½¸âÖÀän·5QQÖÂJ_У”IŽÈ-dýïÔ•ÝWn@`÷ƒÏm™±£Ý²jÈ)Ìš'x=±€M—¯ã§V.¢/¿ŸÇâ2ˆR9 „®X"·9ªe œ÷…eùïúG)Às ¶ÝJ*ÞÊÞjÎéÛ¹9èXâ!«»ßÌì3J!·˜-ï ?É9ÚJ—9\‰Î¨N_ÙßN¡¥·ÇÏkzN/Žžßþ®[Q§²V‰’ºv0 ›»¸ïA§ÇZ×Ù£¸º´fƒ¨9]r¦@£¥I—Y´‹º¬-«´$ˆ¿Hêz€Ë1½¯øíØ_éIæc[‡l üÆ>®&fôçIJƒMHH©µ’2ÄÊs^OÀ‹3_™Ýë<Ëa )õ%ËãÚÔ·‚í“*¤áT~ƒ‹á7'ˆ>?|¾˜AãÜY^h3\/6‹í²Ø‚©Mæ HBîÃÙ¾j‚æx”ñ•ÑÃoPÐÄï £wøi žh‘~ÄHæöÙ¤`i×÷9É×Ìk²å.èà& 2Gq0\ÙdL'HS¿7`Ê¿´•TÓPc×âÜFÊâ–žnš9ôZñÛ*®HÎp_fµ>p ÁúqÂÆ™®Z4•Eþ÷Çþ—ŽÃ̱†r_q¼YD+CË*4ÊúÄÅ-//'Šh‰+¸øb£­ƒþZÙµ´ ‡çë—8œÿ<t[}é{ð'zYÁ9*  áð¼ð°êBxPµ œðj‘qƒ—é̵؟D Ü®Y ŸŠ”ÚUä<èäò™(ÍCQØôo˜‹p´rм#”™" “†'¹~U¢X´¶ãغ9œðì’c÷v ‡ã—òwží¢“‡ô"<›‘Éä!ö¬ ï|¶¥°TŠcdh³…gŸçP΋ÉâqÀµÉ®_gM˜T·V£eƒwâ9÷Ò ‡XifÑÒ{á wö(Œo¡Ô•$¨·ò7N m˜ÁµcUñ¸§­5±mðI裬<4Z Þˆ›†ÎúrsÄâó7p5Ë­Á­¦RT3ö©=¶„×T$ƒÁ.•šs:¢:ã°ášöú Ø£Ð+k\Ïòð\ß×:£—ÉEæOÞ‰ÍWIEDj’¡Á˜Œ£”F}®ªóFŸC¥ÒU8a‘úϾÿIj#sŸôi8¸AìÓ›¬ŒeÕ[Þd?OÍöŠ·¦cš‘X0êH)¯€°P~™t~(]JÿÕšƒ/–Ò);É”n9‡žäPRŠáœý‰šý ™Úux?2¸í8ôi‹SÔåÊWu9zîÊÜ?¥FXú,P X´VI‰5‡‰ï*‘h\AOŠËôÓÜ¿‡©ë MIavp$R¥V¦™s¼åº.¿KE6+š8“€K‚gÜé¾`ðÕõ·ûn›v{湉”†èŠæ 8*RPþ.ŠÙñg† Ð6[Êß>¹šWì×xoÜ:Nu™Úy^ÞÍ RYØhšÄ¨¥ú$Îߌnȉ‘IUh´[ç_}Þÿ]ÿ$ ƒƒÎ`·éÂên¹)VÇ2³¥\µÓj+óV*å*Ž—ËýBžä³³Äå” õÔã4ºÛ ¹f„º¡Ò©O(w¹J^"tq«£† KºÚzméà°¡¥}Îen‚Ú¤5Srh%Oµ{Fw]üÍg^}¶mü¨ ÊûÀª +IT=-&ÿmïE‡Î¬£"(kݵÁ8´+Z_È\>þãHŠJ·5¥÷—û· Þ˜óë>¾LŒóÐãÁb,Ò£í÷w_ì!hhúžÄ“úv‹bI—3kŸƒ[ ÌŠa± ŒÉ#ž)8a`"Œ…œytÇlÓµ¹ó{lJ»>wu¯¼O–þ=ÙYí÷m½3iÕkGo§R+Q« Ù}îÛ™Ãv¾Þ´p Ù«úžñÙö»—óSŸŒ'“oŒÊè{°ÈëÑÈÃѶ1Œ§yW]€:ÏHu ˜:oæ²y.§oY#|!ï}Ñ›šµ£g÷[+v5“°¤NO ¦. X]B9IpVr ¯ÈX–Ú¥€Á$Ù`òWý@š€‰aÒPùÁëpqQ7a‰¤l±„¤Ì;¼êÚ¸«ÖÀ£6ðWo¡µ_ÕÁB€lOë°W¦å,è½nýET\±½LÀÅò+¼2Z™­‹ä¡¼oßÅ̞׉Ž'•źýÖü[éT¿ÁëØqqdJ¯e»ÎgáŸ:Q‹™Ý.¹øoöÝ]ˆ"˜§(FyÚÙgû¨2$!I½¯»9îoýiöÀ]ã»/>ø8ûôUj ê–|òjv A:ìCearÿó~×`)²Nþëƒõ÷±]†yx>r}ÖÈ^ OÝÌÔjÞ…Ç3®ŽNí•-Š{ÿ¶HH€ª,°ûvŒ¿9ýBÏ"÷ºŽh˜ƒ¦»J)è•%`€3•ò}*—F.n~FÁ•fµ¹,eÂ)øKò $©ûÛÕKÆõ[¨–¶¯,.Œˆ’,tÐÅUèc¥.)ÂÄA¢aB$bÛ+‚R- ì^ G16yWö>:6šŠ¨ñ´ë¦ZCE‘8ML-‰Úym•RÅdU÷ÓL„1ïÙlª¸@‰™ÈUi9@0`Ü#òhçî¾? FvVŸçLÀmv¹„NB¤Îì­0:‘òq6‘ÞßF6t?å på7Øõµ£ºº:ãiŸ¤Îw8ݲ³ ·5.ƒäÚÓó Úe︺xÅW‡ÒO„º<½GæÔžY‰’ÏC› }vño\§,ŠÓÜpMé½ØÇµK…5PBH)‰^("X})ŠB»â-Hº´uïÎɉý’‚ðMᕯ¢¬Ô¡Wv˜îÀjwJÙ?zÙw‡3[=ðÚŠkƒ— Ûiaf–.,^¡œÞ};_óyóÃìçSNâ#™w×PB1仯’÷>I7m€dô9¿¼{ ÚË%á‘JýÚcãj.éys퓞–ÀÇ ÃŸ¢“Óÿíä™6ayQ±MÏÔhñ¥¹ªU˜bÖ™æs»o8?ÊߊNaÈãå!3 ÞŸ¸‘Õ ñ°‘X_1D¥BqÐërºh!:ˆÿÖ_ÿ àzå€}E÷šIo±XŽß\FÕsà*·<•óÓHœ´®WRovXÝóêŠ{èÍþ$ÚØhÊ@__MO”ätø}á¡„U#?¦Üÿ6¥ÏaÊ›ð˜uPˆ©c\6•X]Zø!{ä!—{T(*Y>&¿ÜbbùÅÔœõ“??ÿFÏL$5=Ô"È—¤®6þéôò>˜B2ˆ·^‰ó©ÄÓ³Í%ZTš¾'['ý¥¨]aù#õ§¯éF̵ÇÃìûßñXŒn 5«Ýƒ_¤â”ø—vY–xe&SmE}ŠMŠ‰Ô Póâ^—ÐãŠT0 üÁ'‡NÍXÛ่|%!£ÇC\Ÿ,¢pņ_æ‚Dþ(0†)c"ˆèïœï«ÏÿKŒ¸¾W¦ã7Žâ¢~kSöÛ0ñÙê[í𡂦qYÙr‹VÑg•’Åý±eì57ÇRN*&3ú/Þt3{i¯¼¬3cgwßrôcÊÿÉ-¼k#›úÞ,ªú%ý#0þ´öÇ\nOê£WôÚ¼úfâœ>ɹ?dà~†?³3`áH2føRÎd/ðÅWw[Ôð®>,ÂŒþPG Ü(Hê¾æs'm^}&qJËK»‡ûɪ‘;èÁ.ù|ÐŽˆ#ï\ ȲòÄʲ¡f±Éë÷?aóâÊÏTÜâ#²`Ø¢õÇ×ÑÂV|}—LDÃC;^8)Ü1y̲½û³èCŒ@öt©ˆ²WXc*«ÌD)¤nLÚ‚Øë~%Is&õ̰’ªwT‰³ÍögfÌüf0¨£©D«Ž°“q­Ò¤åua’?èãDËëñ&žœÎüAKjáYHD…‚ã#ÑÜhº´$í×gb»¬w†ŠÍ§ÎëZœœOË·ö+3ﬤ&{,) ü &" G¸•r–Im’ëÆÔ¢œ=áÅ(‚ö ÂÙXµe“Ó JÝj‡@ÃkÑ{Å€#ºpõÜÎÛ7½˜>.åÄQù9 aÙã—¼–` 'p¢³Š‘«F üžÓ|k-EÃ¥­w Øêi+tÀŠm1µÙæ¼›‰Óg¬Þqg…”¯Îîòc•ÑE¯U'ŽŒP(îéJ œ®œ¢dDøC¦èå\®cªmYw¿Nìwbóšú>|ÜZ>˜¾ß4D~hHrl*Äh‹t²]°¯Uã…6¯wÓQ×_„€×ª¹P * ͱqÀ7ødTï‰,;?Ì-F­ît)’aóW˰ØÍ®Í#o|*z Õò’ÿLó*‘Ò î…åAàð…ÒRC¥^ KÀIÎÝ?Lßâ¥õޝ½3'öÊ;va6CZ»©€P»Çàídn-›ž,¤äµì6רûhL™H&'vɦ]ۨ ûŽîÆM¶v»^VZšq{V0cäAz6QÕC‹l3$òDZt\ -1rþ…áNo!Î ›ë@° ½ãdÀ+ôÞïd«òw\˜O—ÕB–u8:ïrÏÃr7>Y‚Sàw1(J¤ Ô$4»=JdtÄ„@ÈŽ'BkùípQ¥;F/Äø·Y6±ÿÌý×¶¡>Gˆ~]b¹¯Ý®%8qaŒŠ2”;Ȉ·ð³ r‹ˆ”Œµeÿœi-r2|·`Ðíw&.êp=çñ×4·Ý±íèÍ[M— gÃo¤´ É¶R; [Æ1¡Ûph$X8µF3/¿3¨´¦\Çž™'zY¯åÝwUY ˜È64„;™ð¡Þ-аÃ6Bo¡œUb-”Ýæ&µÔÒ¦p}l“`Ý€sŸØ¿ä)¿Ë,”¹Å)v# ^9~¹ûÛõÄþÇøÿ§7üOÿÓäv’?ÝT¨d×~>úu³)y»–þÝMsfýeجµ5Ý[ƒdfǬmO§|×c£´¢‰’ˆÒ®Î§{=t›L4>eÐ…mN§GL-ôÖ´«v¾MÉkù[µÅ䪶:ƒ>4\Ûÿ$š F)uTx hWìIlt¬A”º†³õêV×]vMwX_•Ûí ñk—¶¿Ô0¡ö˜Ú³U:¥ˆ[ ½N]οÓÍõÊy\Ÿ#•ÊïJþ—ùäͽ.»A[@²î‡ñÔ’æ’áí¿xò¸Ž“#Ég/*Nö9ÂØºƒ¼*ªêµRYôøœì0^”Õ'e»®2[D‚&©ÛbtЬªe®ðÊxêeor«yÚÒ8J}<ÄsdçÝtðÁaMÖ¥îkµ¨Ç‰£wçÓs[oˆßBDrÆÀØŠ8°Háî»&¶Èúéåk}xDÆõ^S:ÝØuo ØÙú[¶Dµ0dƒÓ_zøYöè6seÖXÀX* ­Žú¨¯®±³Ý·-ÑóR2“˜˜š‡˜ŠX5ô­Ó{ÞøTXAIxRãÛÏ‹;Ôè ‰Z¿ŒT»+”À HµBn/±P Åå/7wå1‚*?+“­ ¸ÄΓOvcü K6¡}ŽN'Ö¨ÔŸßäŒ=òKÅŸÒÆÒ´3Ë¥r²µ0¯ŠªÊ£?åÑUö¹ó6Ýœºfä•M'g€!–¿ ÒN ‘YNÝþBÔaÁª ±¦æäY=à”„—tCA€Ëvú%"QEù³Za1¦7ñÒÈUƒ.²½Qä-"ļ7d¾7\qô;оOãŒæìF›Û<ä8•;Z^/þŠ´t¡Þ‘ÑŸß~þ¥&¼Ï:´™$LPDT)\Ô÷кcßÎh»O.D{ø7|>;hQOýrkÃF&t<ÈV~ˆ‰Ò¥í™E\_²¸÷¯Í¿ U®@ìê jEAŒIòMÿ©§Ñš%WaHï±’“wöc-<dÉý"§ÔþQ€¿*4oëu||߸N»Ò¢ÏHÆÉT±|8ÕíruT´ÓôkÂ"iÐQ‡¸°K­•e ú`t,l`<Âê}MPü@´8á`wûáÕÄžñbÏkŸ¿O sŒFœ<àÓb'ÓúäíÜ6•eÓ˨íIcíÕø$(*×-‚Z6ꃗòFw^­ ÔÕWÄqí6ŸÙüõ¿˜[`(â© •1%̉½?&Ñ'$þr†«äpkZ@‡õ÷çàžN’ã‘[HUF–«(QRtÈ(„n¹ˆ}§Q§¯¶ =ÉŽÈ6A’þðv“D&n„¦Ný@«5oò?Šr»ì/bYMrûž“á~g‹!B©‡`ùôÓ;ï/¼š%[Þæ´š­Ÿ×䘠®¥Ø÷ç‘gÛ´Ñ|tþ‹ãØŸ8¡ˆ/è<åÒÍ]¾èÆ+;ñtUÁA s¨åÈ¥•—–¤þ*7ïããø5“^R ¿XÞñ±;ücWHkHjMiš¡­a59¦wÉòs‚|AØè/ש%µ6 ¾2O?<×P̸9ƒ\ÕõT±õÍö·iÃÀ† .>Z?³þúmÏP 8.4‰Ù¥ÿîë?üO¯ø ;uæV„p,=3²í§¬Eí³\Z… Æ-Ö €¸ù& ‹ƒ|ß!= uòM,½?R-ׯíyÁ'ô®srÕÑQ¬ÄË1ð*#|İ—lv@¢§Þø2kÂ_Ë΄Ú"ñÈò¶ÇR;r%â@˜N\Û_n¼‡Ç$J´Á€?àA*Œ'K¼Úd~¯ýGÞBež… '6ßM†Ií½3íÊDŒ*¹Ç)]xغþ§¿ •Žž~a÷[9ôë?ýÓ ð}1ÐjY q!+(c˧ì„9& ~¸ãF_°ƒœ&gÝ>tΖ{}®Ã¿R¼{Ê8ÊÉÁ‰Æßë°©ËK­2|åƒ>à\£GdQ½–;©‡ôm—\;ú `¶Ï™5|󲳉n±:<. Q/룞ÒÙÿhYò°¥´«±ãï$&/;lð‘{çñuÁOªÌÈ1£./ºGæ¡cˆ29Џ È©m¼Š§æ}N†´O'%6`ŠÉîY‘Á„Ô1¥6Y˜"6©ÇùBçû5ã§tɵ°Eª¨ …peTGñ!–´â»f—“϶ÞÔí×oK<Ó»1µ÷‘´[¾ïòçžÿ%a?0bW¥³0"2‰gŸM¯©Núõa;‡þ¶ .4lÔйÞsz®F mV6¤Ó¼sO6b)vZwPFÞéøuMÕò”é¹Î×-+VÊ×-lwr˜árŸ%/ê‘Ã2×ãùtr!Ú/º]6F¦U¥H¤Ã0’-=ŽîzžLûŒÏGÆuYüé"쨙-ev¿íå{ŒF×üÞ‡Ñæˆa¼B%ð~¨¡«WÿÔ©Gÿ‰ Ñx§þýâëß?ƒÐrüÿçjO†÷ù^J¢§ôHõ3šiÖÁØ ”ôöøº¬v•nKr‹£åªÿ1pX¡²ñªqõ䈽î;O!fpä& œwàÞÆ½G^¼OSó‡ÖMª¯Ž·½·Õ²5ßòÅM] †Á…î¶ä³8›Ä ƒMÙO/ü†~;Y×ò!§22|¯ÈäSýåÿ(ìôTú`ÄaV±{D¢ñ;9©ÖÜ!¯ñu‡A½qÈÿÊ©ÍmÒ£˜ÄÇgx.®ôuÎåaHNù»áhîÀ{u½=P•ÝûŽÝmçÉEr¶Âè!ëÇ?ùÄXƒ.âí˜Òá×› #±°Ú\@uwÄÏí±“#à3F^B°An——~éÆ#[×ô%GÄ­'m¼¢Î)W5'B¶à~gL¶É‡žJA$˜HX .S\ûÅÓ¨ ÏÅ·ðO]Tû:7îg»˜çÓòÝQ}Æ.Ò £Övÿ¹Ž5*¥æ‹º2Q["†ºhôæu‡Át¢‹Û¬iuuéɾQlI¯ôÊðJ³MÀâ ƒ2{ήA¡ë¹J^ p•®-Úï2:YÜOherïó<©Ò\¹ýá’9­ŠÕ P:t<_cW#.ÒmhõœÇG.²cíˆ|‘¶vYU Ü7üH Eó\˜‰6¨jk q”eJ‘²Ø XÞ÷©@¤>"{OVôØ„ØߪmÆ^´=ºÔMÞ& AO Áëj”{ÿ+Êe$©ïú-÷,ï=§¦_¼²ý5¿û‹y=Nx¥(Ö½íAÆÚ†÷_¼dþ cŒ†’0•6«Ó]6º!Ÿ'ö8z½·û,Àq㫃ԛ.³¹DAy»óËCÆ‚‘m¾áÈ»ùßuËÙûS(í Ïo« ññ¶_÷«ºZÌÇæ¯<;0£Ûñ/D5¡NųÂÖõë‚{Í·ÿÓgÓ®h*LàMëzä­ºÚnÿxáq.˜õ²6—²Nà4rr}Ó-„s¶4¹§&íN6wÔvÎÜ£®³†UoSûŒ›A ­^þ¡âÓ Í€5 €î¨å¶Hj{E&gsþM‰ªjllSY¡>- ÷¥": ö—š ÏÏåù€^Á‡aߨ¢‡Mö·|µCKÐ þÞkcý?¬*=+bqƒel»îSø™qÖ –8ŽÜ_rqv ¬±ý*`[À— (T‚Í!B¼þéö¶Ÿ[Ê^¸«QÞ÷¿š=o2 Æ×—t»° ФZ°’qÙi+Üzr}¥ ¯»Â^š}è>HRYžo, ªF8§—(‚U€3A Ùõn£ƒ8PâìG¹Q³¢x^SÅ@XO6µ`tî¾Ø ²ã " ï¨ù^¢u’ïÚmŠ´uã{D5‚ WÔÌ—òâŸ8É ´“èJ µéÈ/u%õGQ™ ©l _|üJX¤§ÇÙO¾Fã77ðvñˈ·+ì#ÓtÍV´Ï_}eù+µAï˜ßbµ½õ@àÖUíàk‰ôÂÔаeÓù<™Ò|þ®w`ø¢íÊèöK¢µë¹¥Œ=ÉÌA1S®}¹ãúBS‡ÐÕ‡µ,³ÙdÖˆE[ﬣ´bÁ VIj™M¤xµo\Ôºj2¦ËšÇ—BBçtx**­K~"û{£µY«}m ˜À€éM~‘Õüdà?Úû ›(P4 ‹û_|aþwý£à‘37÷èÅ1»îæLëž³ó(ÍA¥Z[Éôé wÇШdHUE}"뼓‰ qbçÜ¥O-ï¾Û`$2©ÙlÿeÆWÍÛ}~W$ƒ³‡AÖspF»½µ"k$Âåz.|N!RÌ?ØŽhÚ-“?ò‹-õÚj%j—ÈÂN ¨ à÷ºØŽí¬ëzoу/sú¾]p£Ë¶®ŸbaqµuÃ/߀kû\[Òe»ˆ&kE©dÒ´®·Y¯Ë̺ßocç«ótX9à÷­Oz½‡2;P F°—dù9éXGbUÿß[5üâÇ—ëqâ15oÀP¨ríóp.w퀇¼ E ỏÓÕf1ÞòjSæ7 ö²8iíôýC!ó–v¼l®0d>ŸÕó2úèxYÞŒ«Ï–’Èh_íšKƒætØ’~gØ¢–g£„ Œ×Ïã ø2ßÔ3áK-iB"Œ¬X>!2«Qæ¶<( €Î°§gß“Úsg€09Ê:õ÷ÝûÉû("ãz"Õú7• L쿱~,÷™— ¹S?|þ  I©Ÿ1`GI:Àü0ñ YÔëôÆ›¡Ì8¼Ž †/˜r¡Ïº‘¬f[0Àª2U)„j©Xíj†Óå@k;Ÿ$åî—ýŸm•a:eÀB*}…6V…HïÙgÉ©ëkÃã°Y$…ìp7zèØy&‚e¯nqAˬÉ·Rû\U y6K(´oqËï—£¹ÕêÎ<kX$wÞÅ‘+ûžMEÎb¸ö˜—T”U-ìx?F' 8r¹¸ÐúQ'“p9¢æwáÒX“½Øp¢|tîù®8Ù]nU•§ÞÿzËàKïušÛåR|D|æ­~óZïS+DK®Hn{‡`¯ALB…-°Y]2¹œ‡¨õß[Åß>NÍÔn9yJÅ"Ëg«Z¡šv&ë˜1Ê”|û{ú]İ1BX`4[‚ƒã9¬m"ת•×qWÛ¿}@t;Ëÿ<õÃVì¥äiºÊvŒ7Ì‹hë´·.³U à.£xƒâÉí°[þeqZN\ÞIeLÈ»‰k0º ™¤yqYEÚÀ ˜]ñÇ’ÜÁ<@8±Ë÷û¯ì¡ö“ní‡ßûí4ÇÇá‰$¨g“ðp¬Eâ(A <Ýçéý.ê|]U,>þZÕ ·E!:¿›ì=:wYïK~kÇ ®~0tMç{I·[vãó¹#¿Ú#¥àS°;©–P…hˆ${ü~“Ù.’Ä•]©]· ˆjtç…rÔŠø” >fáW‡5µÕ6‹]îV9â¥Ý ¦Œ^gŒåf…:lýàkŎόʙûó›Ý-aÌߎŠxZŒÖ[Jøáäø½µ8›]•²TV·-õÁÀ¹ß} “EPÃÐà!HÇ´J ô8J-âP^¹’Üs”Rc·Ú\2û€éµCèÑËÜý/86©Tú[É“0e˨8Õò _M(Wþ1ÏóõŸþ‡W/À#Ž*ô£ýË«P H– Þå*·ÇK𖙬µbIßSNÕû-W“Ày' È” Óë#T¥Óa];íøƒ_îÖø¢MO€Ø Ò”6 ä6%sdXœ¬‘­íúj݈Gø«*Džpù¤ýÀͺ¿Yd~‡Égúáë3†n¦Œ††Ë8ÈŒÆ++žR÷`Þ­éã{%ɸ:»£Q7Ð|f˜JQ;–T.‰ ‡rþbò¬¾©Ù§Òæ~³åà/ÉT}ƃ¯Î(d’í¾£ºy$e²Fß5«6=’s~>5«\¾¦´ûƒðîœër?.¢2ÌÞ¢liϱBMZÇóZÒªÜP­’¨q[Iüäv¢Ô‡y8 ·Ù&1^·³†ª^Nç_übIÎ×ÏJ™BboJê„xŒÌÐA3Ue@i÷õË×q¤ñfô|®g}‘ÔòVµ­,6&¾¢Ô®Ž´[]vÆ—òÃ̓žŽ;Vn·'Q¤s_`Ày­ÿXÞäBúýA­âß—˜1µe÷úƒMÏï›K=~X¡—ÈQTFjÙ"ð·N¨’EFƸ‹ÏHÍ)aEW¤œÿza£Ã9Æøö}úmÓÓñ+;œz©öšþE°$¤:YÚ…1 ˈ/wæÿj`Wô^8S L(áÈe‚¥ùCRœ^u°Ïösν¢F$Õ Æ®Ç·ð3±[âþ'›ƒê÷te¹€P˜·ýðFV€ŽÆ¸˜¥w»oêùÍwg]nÕý.Ÿ-]|¿UÞÈ7F›7AñjúSsZlîõ'€s~nHÎE ¬OHÜÌOWQ£-ð”dç‰T*K›í¨ð¾ÃZ/ízwÍå¾ ×Š^',Låªk“S:ßwØM+ŸvØ<öVâÑØ„èO`µUdþ„5`ÁpÑ}½ÖÊüŽøcyûÜ@¹\­³>ØFè˧‘–Ú_£.`é°#”;£tЩÄ\Œ°Ñä$¡®:qc<1¶¶fÞ©<±ñ\CÙ'"mˆgÑ¡"0)­4—ëäêá¿ ÷6ñ µü„Ù öê£õ³Z¬aän‘J ±ôÚ<Èá`Ùœ¥ôÅÉ‚ÖTE¦ /jræöæ6®;;r}ÿíã¯Í Uè¾p)Ú,Àvº}Z±–ŒÇj;à¸ìŽŠðºaÿ+þC® ßý(‘f)›t“Ȱ΋ÙFyóÈÖVdî„Þ~”HÂ` ²²²aBÍ9»:nþpÞjÝHöÒãõX-ÈÚÉãr%ßuK´UXù.i4¿vRËýÂZ¬J›ùT›-[Ûô"—¥KîtÒÂ6FV€ÑólŸÙj¦±0 I½;{ÜgÕ»÷ï¢ãj;XMƒ´üöËûÝ8þh6Ž'½ÊpNƒÐ³—îï‡ñ¬ëwãç¨Y¬ØøøŠ7Ži3ÆX°©eæÜü¤ #¾i–qì~r‹ø0÷£ÔM±[øhJØeÇÂQÛ¢êé:٪ٵ.¹çu3œ 9¦Œ(;‡œ¾< ¢c4N1ññU(I›¥Ñòí´?7·n³°UÛ'Уènû&^œòÎì>wÛÇM“»Ü}c<¥6ÕÀÎÖ€Mþ«¯ÿð?¼|v€ kÙFð!.ÑGÒ~2kN/_Ù2ëÉý¹ûÇPX>OtO÷rsLÅgVv¹S#q_Ç%’Ñ”-šC2q(.× WF6Kîûå¬«ï¶æ4yQa}¿îòl:9èiÓ{jþå<ìW³¸œÆ“8¤”õšŠp™oÉͶ«§œëéÝnôõ?Žc$ofÒŠ#™ÐuÇ¥Ÿ·ác:/ ’ÌNÊ*>Ry£$[ï§A» póam2õ’¿9 ×…#M:¥ÿ%4n£ÕÉ4[”‘JOöÐcå‚rôFKeB¹D¥à¥ö8(V±å10IBlN@¶ÜM^3è€ù*&v”¶ŽÏÄąż4ü™50ß]îPO„ÇýXô>AªzÝKÎ÷Z9ð¨›X„±ò‰ïâÆØÇLYp¨Â\ÿônIrW–U-ˆfµL óxÀyÀV•©Ê/óy–.RevYTš°÷ocô16{UêÀ+AV`ùˆëªðpLf×c|±¿‘¯Ó¢;:ß0bu”fõ„ý&Nu@ì傌²tô.G)#qHj‡%ìèqEÉRfô>.äëŒ&܇b¾˜Ã•Ll¶J/öxáD„•¢+¼úôƒŒ+ÚÜ?_*åoõ'KÂ]õÍaÀLå0hðºÊdn¿k¤ft+§Åj¨6nd2#Ö/&qˆx…T"좪꼨‹èÔ !2!òÁ¨N#pif\ãí¼¦Ë$ÈMíp•GØz…_ñ -à?8úuAå±H¸iü=&hùTPAÂ[#œ&Ög"è„h:øØ{`9ªs]tuîéžœ÷ì¨-!‰MF‘s°1¶±ÁØÆ6 A @Y@ 0˜œ H6!2ƒÁ2`0 ¡°Óì™Ù{:§ûµTÏuªî9ç½z—[õ¨wG»vfz÷L¯^kýéû¿/ f^ýÁ¤ À2…ÒǦzŠ$G‚f•²3p燋@3BqìôãÞ3ͭјΠ­~v‰õÉ!aÎiÏ T>œýã·Ö•]gŸýù¢÷÷dZÜì3Wª0íÜòó?üg8ÂÍþ'׼⦗§Ï=c-÷  à$µYç<²Ýù|ÆO5µÌwOîßøõ——óY8ìÍ8i©ª3×»•ç¸VmlÆË{ =_o(Ä»O=újN¦¹{çƒ7¦zèl¼`•œ5kùõž«îyýÒÅS>HEºËÃîüèÚ Í!7My»é5¬¸lÚÉ+L¹Ñ¤êO½³‹ÅWlô1*h&&jy$¤iŠœ»Û™ûåŽÇ¤•xÝö¸®\Ÿæ}ñ¬Yk.Câu.|S˜ê0ßÁ‚ʋݫ>š†pÎ)ë._sèÝ?bêÑ™ž1ç„'Œ–µô‡¯£ jÑ©q¬ˆ~ô‘r©£sBð Fn÷ý­Üiu–ò¼m3fËÝØ^bçÇÆg)’ ˹zÝj9Îï]˜Ÿáàên;ös\Á¦ú×KÞ;gåéOÿæÅÀvâ›À‡ñvLH¬âž»=XÔ£dÚkÿêš`Êäü£žÆžÀæÁÇ y«™œéŠN²t¿?ïÃr©4÷ÕÓo>éÅkî?}ÖÙ%S]Ža BôÖÿS¡Ú3|¨¿s?,»j•¶3^½díž|áΠüï@¬ìyšÓ4{ãk'^üÚxÌç:ó別,é7Ç_£qdÏõÆ@œää~vìô'^[/ûË‚M6[‡º¢8¸`BëF7if¾ öRÐÎ×I$IÀ ý]|׈ÿÏ¿Ze:ÀdbiݳîwÁÊL‘_y <忬ùÿ~½rÅI«0ÃDGºïø·² NÍzýwÇß1í¤[~¯|è$ŒëÞøÁ‚O.uF¾{ñíŸ^R¥·[›æ]ðW`Šþ7Nª6äŽÔøÒ]¯ýüñw—@OˆESQÛ~8¬Ô½½9˪á‹å;€z2b§XÁÑʶ¶iú³8z]¼>=[õ¾³åoŸR³·.zùÄy¯íc§ÿ¹êƒßö·Ÿlú }Óoß;aö{¿h·æ¼ùsEfV¼9ké+³` Gébn÷M4F”Q_ Y,?Æ^õÊTàK ©^ £5êÀ §3å®H{â’SߌÜâC_î+LŒu$‡ZÛ›t½Ø(A9®¢Œ6ÚÂ_?pÑY7T•¯0¶DA¯Y±zaÏ®‰Š6:Ü,†óáªY[髸?ªw_0ñÊ™ÇÜðË}ç…­]§ýÖ凮[ûa»¬– æ2#_äW=>3"¥ªN³3Ó×ï“‘³n߯Ç]‰|,$5Z¥î\–ÜéϽhÍâq srú³ŽRƒ á v, ‹ÎûiîCýüÙ,±j-ϱX ¦wæþosj’nÆ"NÏ­ÇþuÖé/Ï=õu½{ÓÜŽYøÊÒîxi:Ýžzzå/™‹ºì¯ŽŸÍ™·_uÜïgñD&<éúCVHᎳ¦ü*h§Ý±âžþ˳ßl¼õãó )Õ²M¥¯.^½7×éÎS„l.äp°E~{ôB\x¢ðd­*,ÑѲ«ðwŸ½¾'¶›Yef¼tl,‘œýÈùT9*+¹½‡²ÅèÇþ}î¡ÏÞ|ä‹ðØ=áXôÙ?Ò´’RIé_o|Ü+SI@ï:û¡#RÉt`_ñ“†Oó¾ö¾ìœºò„»Úi}Æ9KÏ;rîÙÞtåÏÿ¢)î™S®¼äüYÇü ¬hŒ|‹Ú$ଆˆ•ùŒ#_4ŸžsÖ²;×,!µlm;‘3¨ûèUt3Å–œ·¦%lÃñ¹qÑ«^Ç£Ý|×óׇüB˜*DÙ4TZ;¥ä/Æ/¸öð%év(ˆÂ‘¥h“_3×8ý7O (èX"¶F¶˜Í!Oÿõæ¢SvıHG è(Hý¾ãÿc€¿åš;Íkeº’Á&»#<ÅîïèÁ¢u£h†>îÆsžŠØ‰Y'<Žß¦F%B^â}Ö¾í/Óîxýš'þrCPãÄš;‰üêX<0®ð”án£uÇgôº¼U"ÓŽX6ÿœqdLËN;m1Xóê]Á €Æ§;Œ9l<‘ÉOX&Ö³ÍÞ¶ãT•ਠ_¥´²Þp CŽ­dæ1+qÌYS§ °šèüÃ?ìDUòÀÚ+o{æá§ø8–_œÊƼüìƒ×yé÷§¼^®Ïœ²CgP%w™¼ÈìÚ!~þý;:q!à>J‘_=ßß ÑBNQÌš‰îD°¯ ö°!ä<>Í.xþdzþ|^Ë/_ÿÎ]»æ~ÿÄy€Ï¿e¢«œ¿ü…Ÿ+Âûá«WÿjÎ+W„#üPq³BÊbÄ)>“T\ôü’L,18°c¸ô¥ v }˜H ìl@‹^¸à¤#Ï»â‚YîVúÊ3þ¼ðÔÃ1ˆÊ‰ë ’¸p<ôÖ¼ÙiC_ b’‘mv9•I.øÁÓ#c›+­\Øâh{ëÀ&Ç÷.¬(~½éÄäüE}ñêSoý`nÁ’w¦Š¢xÇGWßùæ¼¹_ÄÑâƒ/êÂèÅ$%†pþ.E%^ÚvûÆ÷Ï9ò·~Ñk?üÁò%ÏιïݣеR‰ÂXQ~Àº”Ô}ÅÞ.8ì… Û;û{o֔ƾßÛ0ºQkë ¡H°©_<»ßh½È0¯VË•¢i©³Žx`ÕåÏ6[µYG-ó›Û(9Ðù) ÿ w ˜j"˜« 焃֗l`™1÷¤èeG\4ë´›}hâžùÊ/޼E÷ÛË_¼^Úmï^ Ä|ÿF[ n¢M®ÿÁ££ßvæ_:„q‹}rÉ ùhME „¦ç>|0{ñ «J0¿ÿ`ö'?ÆŠr8¦ ¨ƒ ÛŸ<'8CthgˆïJÈÇ,<ýY¼¸üì×· ÁA„Ìd4§ùè&Þá‰*Ä«YhŠãx×FÕÜumÃl-|éØÿ­à„)4%Ôðé²ÕƒËŒ7{ñÛØìÞòÈåËü gK!>±øíé3^¾ôÕ?þᢃ®ÇœRÐIÅ;®:鉖X7¥6à>ó~»zØÛ~͹sáø®þû£Á<÷È…ß(h‰«N[åŒé7ýìñ釭ˆ¥ø›þò¸Þq¾%l+—Ô"™÷ÚÉXbša­<ý#° Ï;žŸÒFÎðøk;‹¢;9iůÑ.¨?˜ÑJ¹¯»oÄ‘ŽÍ+©’¶}ñ´wЫ?ãŒÛ®8þúË Ø»˜Bí)m6Þ_þÙ%_5—þuÆ“kþùÅë–ÿéâ5Ÿýpù]ÜøÚ‡÷ã ¶Sì?p¬w,´§Ÿƒ;‹äÇ]Å`–jÙÅ7¾r¶móùB³©A…|æºï/QF–l¨~„ߟ-L´f^ü„ÚR•’R`òÌé{( çS×­.þã …]öÁ© èºãSj#¥;^º`\r¿þ|Mðéðü 3ì/ýÓô;ÞøR®†\éþ÷çÝüøÌX3Œ'Ž­Ë¤HÐDfüùå™xàÿ›<í ×ù`gtˆåòLáÂwûàÿ<¾ÍÀjªÍrµþb–Z.±õ¨Õt°bA=¨AfÝûÃÇ?1ﵟÜ|âëÃÿîµC1)/ÿþª``y`«‚±1ÉÅ;÷ùE­}{LÈ´Ó–(š:N9-J'ýѦ [þ9)s5ê_ÖW>3¡Z.Í8r•'úËÿxÙÜ+ï[¸ô—¿›>ë¶Õ7^vðÍa®lMJº‘§ü±\#=K^9€Æ]„É5UŠ[PæÝkéÑk«h¬cÁ¾“w&í\I=P ¾¸ý£ìÄhKp¡×ux*æs.ª9j£ ï:jWþì­í•mcå!86/èì¨í>¶³0Í<γøµ_b÷*)ÄÄí[ªc¨ŽÃË ìâõwŸ9çÂç0—û°œ`áµ”Æ6’΂ ƒcÃ'ó~þö‚?%ÚyÇñ—žôT«]¢½¼åS7¦mlØ\¸ÁMêü¾yu¤¶OaßUS×I¢zgí½*”“¯=îîª×v¢ÆÚ Oâ´×ëê,l.'cvM˜sò'™Ø>—íýÜëÏœqÜ/›Üý—‹§¼nÁ«ß[pBuö¾ H›Üvæ›tTc"÷¼ûk˜Š‹¸ã¦]½÷ë–ËþþŒõŠV¦y?ê½ñø¿ÉIê{.¸ÿ‹yØ‚) šÃ;N‹1‰ÓÁð†ˆÚв‰¾V5 Êp·4‰Àã°Ã‹RIf7nÁcûà¯ùñóËž?¹‡•ŸüôÖS_öÖT=Ʀ]ÃÉSy×1±ähsG棅¦ÒÊD󦯛¾Š¶ªr©ÊP¢·vЦ‚¦˜.-¸FÔFK]dKé\Žüúì[¾¨wñÞæ(k/T4ív{  Ò‰?;,Prĸ]qò ¹pt(uÞx΃¥áJ2<ªÌn¬m˜é­6FLϸù¬ç8Ùž~üÃR¶eÑÆÒ?]½rÍ/ƒl‘ ãwÅ$ôàë ñk¨£CíB&ó_zä5–¾òØûB\캟5uÞ9ÏÆ¥lGj"ÖBº>‘l =0¨—M],ÚñîÏSŠMÕAî¦æÚkß>)ç0yè/‹1ž¿9p¾,N˜s커Ü=kï¿gÂÍÞÿ•ɉï~§ ‡CÚ°ý+ƒUvë¾oöºûÌ:dE[VÝçå„óUaÌVL¨ -[óƒ*&ÁÄÀà» ­›5ý –½yÑ©]·:κfÊ]·¼ö£9¿˜eÇMu´”Æ'nÐCMŽ®xáÀå'~4Ü™œÝ­YoB] QWn9ó½kž:œa…é<ôô篸o ¾Û¿Óª™m4“Q¼x&†ý×g.§yîîãÚaÌ®û-®.ð`à-utË[î¯J,ã¢ÿ¾ ç¼ïÓEt¾þëçç÷=5ÿí3î?þï¦ rÀ¸¡˜­(•»~üÂÐØ–P>4祋ïý `¿o¶q‚ ù0‚{¿†ÂïŽ%Œ;n‘ {ÿuHWr\½fÞtàgmˆôÄUG½2a:¶Gì •Ò ¾ÄϽó‘÷/ÓæàW±þŒˆ&H>ÃÁ5]n&6ïòãÿàoùîY¾žŽ\ǬDŸ¿ÿô^v³"0áÐ/´¸a+a¯_¨£#vâ³|&/õ@0ó–nFÒá §\ùÐûËñ….>xI’‰¥Í‰ËOù0ÚêYtàsŠUî,ô^»ö¼%{}““Å-•~aO£‚Övwú{Dó|CXøæY3O|KKÙ0‘MšúÎ5×b¢ß|ÂzÊ 3ŸúÑ­g¯õÛéû¼MYòX!´w[Ç4Úz˜Îò\fÆAOæ#]Øàæ_ô§œÓ{õ~QuË;tpã‹Je—Ð8CüšñáŠ9†x hU-³bôŠUûÌÓVßp2t¹!Ý*ýîÐí°YnŽ¢¼}ë“?cëñ…ǯ3}Oä©B²¼AT4rí)/Bo´«oÖ2¾-vUÞ”~ºû“©d Œ-bãTpy±éÃbAIÔ$­íÔ‚#GY‹ŽI™ËWºà¤Çæ½vîM'®»ä¥cn9æ‘tü€j£âWÝ•§½×íkè¸3_›Šíø¶“×ÖËåDgþægκ ULœV$·l8Æ1wÜöå´Ž[í6x½ÉÅ嚈®9zXþâýÓríŠGoöÓ*ÎP(7Lýs6žµ¦•g­=ïºófíÌjør Ã£“µ57Nuþ꥽o;Èõš¾Oçþs}Ðô…H(sõ‘¶í1QòÖ]úâõóO~5¤Ù/rÓio,9ûU“ÆÐA0cGÝw‡ iÚÛ`Ÿ`K<¾‚ÍK¸9Sÿd«v4DK9Á$›u ¾–ëq.~Ïyë5C©ÚjȲH{Â̵§_yÐïûÇ ¢:Ø­Rðû·Sh´¾v!‹Éïžw'Ýxþ+_|ZØ›_¼úÊ{æ_ Íu2óÄ5KÖ}ÿ’ƒ™¼Ë„àÕÉ#ï/Á¸]zŠU/M¿þ¬µ‹×{í)ÏÏzèô§­™÷ôÙóOšs $© Š¡4ŠQ( ´PfXôÄ4ÜÄé'ÝÕÒª|0ƒ¦ìÀTì 1vÈënÂGÌÊ…sf ΊÏxbÜ[|ÚõfCâú÷›°ï•û>™N¬5.L’¿}þÜàz °ß.{ö'5ÒÈå§= C~yÆÕ÷½|+ Ø´_ÌåÊôªÑ/÷ÌÿÞŸã¿åèç®YwèÝçû¬À‚£SPôJÑ„jðqná±oó ߓꭌÂEÃèükÁˆ,:o¸$¯˜úp®#»¥¸å† 0õ©v[‡e2©l,VÐ4i7 |L’ó÷¾ê±O—¯c¬Àœ ËV¶Y!99XYŒ1<9¬Ø6Ùµ0 Ž×u—®¼é±+~t좬 Ym-¢&nüÅj¨'Ê‘‚(³šið¢8T)]xâ tª©T¶­þhM0Y?ð<æOH†¯D,Ué–,†›nÝ¢L‘¥.YuféuÇÞwý±÷.^°âDЛ@u;h´‰_õæ±ö†œ„ó\ø3”ž¸ò°Ç3‘8_—²BW íÀ øþæX£?±k­Zk–wÿWIçófÆïøí+îþÛôDSJàÏá*±l L¢n ‡|»ïîã»”‚Þ¾};³ã1WŠ¢ðûCÑ4xt¿üñÞÞ^¼~衇¾õÖ[xÑ÷}oÇãÞ{ïíëëÃ[‡vØ›o¾é"?eÃùË—/ßu×]ñÖá‡þþûï·P¹ð<ü!Þzä‘G&L˜ÀqÜqÇ÷Î;ïà˲t>3˜WájþçÊ1ƒÖq'ŒIÿØ{+n\w¡®m•Dó¾uׯy÷¦‡žùÕ=ožÏª¼¨&:I¯¨‡$9Ò´Ú!!*PÖA;c°f’~ÏÍüM'3‰*‡£Fg¤Ù‘³ú¢J; %"E‹Ht Q’“H ä¼ë í½xÿ÷ZïÍG¼›ÕÆc¥íƹpò +øÛ=Sÿ!xšm.9ãEZ㥦Ôï÷Še¡ tõöˆJŠ*ÓnúŠZ8¥‘h‹q%4ûè§#xÚè· ã¥Wò™_K÷½l,y!ôÖ@¼,Šl¥3Öò,Xn™I/8æ'½¾ä™p†|ó«çŠF¢ÀM`Ëñ]¥CäRÇÕ{¼ÐÏí!ÕSB=™—™÷ži¤­B¤ÖÑMv‰·:fí·bá±wÜtÀsy·/GÆç¬^z$ •„†ð[dú¡ j8ÅDE®icw¾ÞÖäǾå;o8ä-^frPuô̈­€-*#=÷7úðqÛQ M3»ÐµÀ=Ê8ƒ4>¶'Ù‘¯‘óêÁOfº«Í­r¬uË[ó–¿v?ìÄHs ½a+ß=¡aY.Jôx×H VÙdv ™m<¯Êì¥G=ª‰a âÈL~éq’Ì ’]Ñ«I1§+é§âf&TŸsö´ ìX|ñÑÏ‘J:I%VœúßtYƒ÷MkÁq VÏô#ÿü«Ã¼à°%¾0aNéÀÄ!‡ßbôÞÍjÍÆ¬×Oò\  2®-ήë[P Cˆc|Æ·Céù¹‡®ûÝÑ«±“ÒC^FízúÍ[äZ8®&„r²Ç90çÅSç<Š'è³ßÝCU¶rfâöSÿ1s¯2ÆÁ)½göAÏ\wÀC{'›±ûó{W7÷Ä{ÿ‡Ÿ\~äøêßî³àº#Ÿºuõ¬9SÇ’¹êÈÅ—0{Á w–ÿÙXú£§Ú_±×žð§»—%w¿8›—•Y¯þ ­6T½õC\‹‘…ôC/ÌyàÙ¥’×3—ý²D.˜z_]uo:æP甸E h[·î:ó;›kçõb ¨5íˆE¢Í,=ᥬ³÷ʃ×ç@ùÙß[sý÷ž–ÌN‰—¿ýÚ×(Ú¯›Ç \ñú$ωتví[Çw% —7í ¥A@EP‘w@׃ûû-=vnìÿ> žÃÀXàÉÎw¡¯ŽW`,vü¿òä»d€7oÞŒ‹ÿôÓO1: Ê| IR­V{á…Î?ÿüùó猌L:õì³Ïþì³Ï`/14Ï<óÌå—_¾xñâüãçœsÞUP¬Û1²wÜqÇ-·ÜrÅW‹Åã?þÈ#„e¥iè\R°Ù?ÿùϯ¼òÊþóŸ'žxâQGõÕW_ñ<«îXTõþ«ApaµÙfà¬ÂˆcÇQIGG$1ÁvP'3ÎY~í”G¬&& YËwÇ꥽ºönõ¹«ÎÎ1Þúÿ0õø]gþ­Z©'èT”D#4Åù±²ñ‡3>óhÓ§Ç:ðé^|œÒAÒ^tK& ñ‘ó¶ªÃh“)ôÉ“ë*à ˜;ÔÌOMÉ}`^=Î#!Ç“äQ’éËù޾߽6^Ž)¼&Џè­skÕ«JÙP§Ú@ÒÁÊFŠ)ÊÌJ´ò™.eÂݎ窙RnÃYpÔ«h ñbÖ¨×ÍŽ7­¢yëk§‰­0ïÃÊsJI{úÇž*Ó­(­òa"ƒýâÆµÓç>: ŒÍn…êa»Y z‚Ù{NX—q:®ßÿ!ì³—¸âú)OvŠ“;Ãéš¾­dl<¤¿Kî? Ïþ‚Ç“Ÿ½ú•‹8ðk»‹†_ˆÆÛñmQTj¹uåÝP›öŠ“¾hon_ºç ¦®ùݹ÷<úúíA:r mËkÇ(žQ¼ÀÀàG' ¯+¸wx`ÔUl5þ`cl·ŽïÕÊÞíOäôêÄ(ò«^»°]ii¥|(kùN’ËÕ\à8-Þe<›ñÜq×>w‚ß$­“«å™ZªGÜëú;ÏiIÁ§ß{H!ZðU¿Gž4ïéŸ²Š¼bõÙxéç¿ÔÀsh@ˆbᩆ\¿6VÉ Y(ÞpÐã·uðó=€cF¼å1—¯ÝÀlÚ ÙçjãD­wý‹§3ÍDׇ$5…ñÜî ö{.bÄâvLh…fÞŠ/™ò=ÊÞpè;«Îùt`xdÁ!où å5ü…G=†¸=ø2 ÀÐcp\ZÅÆ$Ê„ýÄÈPÛ4­…[N[däñé]Ë-5Dç9'#zó)/ËaåÉq‰‰×ð„1,XEfÉ1¯Í?ôÝ¢SæÜvrÚêZúêEQ+KÊQ±¡ÑùÏc¢³;Ï7Í«Ÿýa¥Z]ôîÉèr6Þ’#ž^qôó»õÓfT}ÑOÄùNUeGŽl˜‹5÷® Öÿf¿?,?v}}{è–)_Ýuò n½Ù®WjdEÉÈp)(I"LÝ|ö›£ Õ¡uRØ66Nâò[ZÅÉÝ{Ã#¹î¨Ç®þÞã|‹MY9Ü ¡™ÄMŸ˜<«–éâ=˜ ë{‡ŽÕô|dnh[¯{4 P´&”í¢²ŒK•¤¬qû;D.=äîkNz± Ž—åL4šÿõºÃZ¾Ñ´ÇJiúê“£bÎÑeÖÎt„'ïÚu0ù’ä… ´»•5Á‹nHôÙ%‡Ô>Ií­°à°ÁjBVÀÕÇÌr:YvìË5K‹…: ½Ë¦ü ð?œù™ÙRo?þ¯i*qãíaö(nµ:齂LØŽ½Üìèfú¶ØØqªv1þë8 étVÿÝi†a;d©üoçAátßΙþ÷Ÿå±Ç[¸pá¦M›`1@°”pOvÚžžž0Õ±X ƒ~úé§?üðÃ8!¬2BícŽ9Ÿ'H`‡þ›‡Npû˜‚>:lŒ*¹l'¥Pä–wNA*fÖ‘¯£c}×xV2EWGã9˜9Œ1 ÀÍl ¨˜f¹ ™Ûœª $Řo  úJm" nZÐ l²ŽÀÙqXˆŒÀÂ×䑿ņsÀJ"óyã_{³9x.Ý) ’C \¹nEé|>–RêõXXh L´æ¢–ƒm¤&%F5shWwЉôįyPuPˆ¨tR¤·ä€asÇÇ%í@PAæÈ¡ÍD„ i`˜për„‡ŠÃ¸\¯¡ºí¶†k“¤(Da|ÃcF¶×ûRÝÿÍ^üÿü-äDaP`#þãŸàE˜ ˜˜€oû°/©jæßÎã»d€øÃ ¼÷Þ{;‡ ÆugN`ëÖ­ãÇÇëûí·ßNGöÑ-‚WSúÉ'ŸÀ#™Œ1›;wî=÷ÜS*•0ŽxkíÚµpñ:–æ­·ÞŠ·éb¸»ºº^~ùåý÷ß¾v°•+WÞ~ûí6l@Œƒqcvží¾ ž©Ò4Ä<Ǽï¢Á8%a¯®S·”;ºÅÊp…7;²BG­5'©®pÿpÌúM…É‘ü7Ê@G$c:nKwdާØM9uÍLp)D½M³º|oqŸS^ˆqâ0Ø,; l»°å"ppƒ%´<úºÄG, ýÒ Ùœ_%«†cÑ/ªå.n‚n7Û}Ka08 „L±¬Éz|J¹üåž™'ÝR9)*D·™[ú½ßÔ7‚N*I©B«ÚFÞ)(ì×”ç ý̸剆ÌR4'"º¨·þC°ï”Œ±‰‰ÝKutë¡JÚEêߢmí£;^à Y¾ÅG™nÍr A7ÍŠ AÊÕD©®›ûå÷ùftK´º“…bm`ÏÎÝZÃ5ŸæJžÊb"4[ÕŠ²y’Ð嘀Û8¼ØAI±R½!ˆ´ï€ìG+x{¸ÍØÃ6kZ”ĘG»e¹Vƒqi!@›@N 7,ò¦¥ щŸï&÷˜`Bñq/’ª£åƒ[¡Þ&c"b˜ˆg%[^3J²ìoAÞUç3ô8Þ‹6I-#ç‡ÔQŽ9µÙýâ ¶/,š¸Ô%—P-P¦ `E@\ %È58î[8ª|`È ü„£s†×4…2ójzsÖº“n;Myt$̦̖–‹ JÈqã); êmÈ/#9øo8Ó^à'±4ÃÃýÂMØql|.÷X–VìÑ´×z>Õ¹µZƒ»ƒ0?ǧ[V£JQÒ‘¥·7¿”H("çFÔ1pËLÜ ¶L'Ì%-¬(eB_BÉ3DŠãÜø"7 rM`>=p+v%Æ—ë¶Cª×¼?yÙIn6ó,‰d’^£>ÐÓ“ß¼}H"“£t§G Ï öVuÊ<áCr²¬¶2©ñë«Ës‚ã5,·jñŽÈþ#Š’I’J­)2™°i*epA碉ØB’ቡÁ­4¼¡ˆ©ªí\ª{¬]ª™Õ¾pgˆõ‡ŸÉ$–'— ê—"ïQáòHmˆg²²”d|»>°Î® ct™s:"ÿrÄí¡D[o§Â©ÁöÒ*!>Ц 0WcN´>Ù·c¿Ñ"Š“^ZN—Õr'>´^CE“A*Åcš²™­8‰á›N ëaJv«¦$õB%sŒ#‚ ×ÔºCÙBˆÕô&* sþrò’#_rÜ<ÂÒ„’„X½Z¢!º¥†xdl–Ž©¦yU½šàz,x²é/}EPLfÏøÁ•v}³óùnùòð7]bJ7àF÷3l®èÔÑyP€H´m]ÃJ­-ú¢LÇL•KÊ öF•ÑD8_kW#4% pÔñ5«ZŒ²nËÛEÚEÑF¶‘÷ÇÇ»67*]¡=ÂF—棃Z3i³âU¬˜ô\ÖC/•‚&¿G„6|Äwš½lèŒe ꔡ1ËMEû\l(´ˆfËhdÓYE«Dc~¹8œ’òª†’³ïs®)šzÌÚFFîÚ=¯_E$¸­Éÿyþ÷ÊNƒ‚|*X·ð;ÿÎóÀÜ`ÇØiw%G£@µ};o/†ÿv¾Ïy–v»8±é¾ûî‹´,îŒ3p4žøá‡pI`21jxƒ…°cÊŸþ9Æx7x IéqãÆÁ¾~ñÅü1^GÈxÅh:“É Ã6#³C|Œð2þ8!l?mvžçŸàùÿüPà@IDAT Ñw‰ÄážxuÅï.{ä_‹‡õV¾x¦2Z]¶ú§7½s,GfØ<Ë$+í tO"•H³¤¡4ã ó©U⡎\™ÑF˜ µåka^ÒM­e79@ÿ\„z ÒT&kœfrªÉª8 ÜP²F»êŒ€Î¹LŠi÷$ó¨úœò¶ê?öˆ§M{»J•2…$¨žN%˜†ó¹à )Õ`Q^òÖ51®»nÚ{ Õ‹9½kçxèáêFŸiy”âQºK9ði=Â{¾ä{ahç8¾‹IÀ•ÃŽ˜®j´'''ë£< íß§ƒµbd\É+I¢¨SµŠ;ä ^‰”Æ¢ˆÎZªß€®"É-£[‘C÷jͨHœüÙð—ñE/&ijÝR”½r«æN”®X§b¨ÕÚOÓªÞŒr™Ó¯†eL“åÆûb4 –n,î^ ™€4úº"²¹Qš>xX…G•Eƒ¥e0Fœ@\RZŽMV ¯íÑ]¤ß±Mëæ ˆ¤w†—”S4WU·3²[³K-º~ÜîG–œ­nP¤‚ ‚• Ý(Ü1âD£É"å8·—OÉŒMÀû #€¦@ùB`˜]Çe5=³F ŸIl.#¿ðÄGGÙBá–ëO¤öj[@6õY\ŽŠwÒ“„Ø×¿w\9.~çCàÙMFc1!8™ŠA°,±|GDVŒjBîª*9ª7#8’µZ¢ gÙ„ÈjƒÍÏ“|Jà⊠42At¨¢¶f¢¦Ý)7!2–QÏʱ®DŽ6-SkØú˜d5L@ ql(#‡«õÍ>Aõ<õK¿™é T qy6Ê.åÁѾd˜JO´àå`BƒSÕÑ%:0'g²RÇ`sT€ÉD8M™ üH"ˆ‘! ç¨åÂ,á ãK ,Rq6 ón«aÇ©´ªLGq:£y¶C¡æÚÕB¼%t7,>D±5´ÙÙg0ìºl„Ú›í…î›zÛ0w íå\‚J•[ÃÈTjÞPTr|»ÁŠÅ|õÅ莰T 7×â¶ÃŒ„Bu¿V#Õ0J­Y‹4"„g}&ì8‰0)8Z¨6àqff¸ªç“»(8 Z ³Cê–¢‘`7 @bí\Ë€1¡<Ó4º™Ž¡Ö¶œÇÏpsk›C«OCL„ÜTÍð´>ZIà·ÑQ)Õ4 ݵU qÞ‡áx-¢Àl×µ&…  «n#Гõ™ƒ!‘JŠr“H$MDiI–܃¼L‡]l« r' 8JÈÑáZ1ÎZª 8Sˆ 'èœƧ~TÕá„Psª6r)Háx\š²¹ôî‹vèf[kzÓ(¹Ö¬‡Þ µt h"ì=ŠúÄOÄbu­QÓA§d³!¤åAäf±×w“Bžþ„Ñ“í´t߀jñÍ– ¼·š]ö­ïk5MGapµŒæ¥ÙU[Xì £]WûrmÇßØÚš ù°o†ýÀµ›DÉ…ÒJ€]¶F;£…!ÿXÑÚžO75.B-Ü#uÇ#(ŽŒlª}5ªƒÖÛ†—0¢‘!¨9èËqKé¦zb‘ø =fâ9¤…T;ÊïÛgëà ÅfK)¶G#ŒÜ™Jcn¤AÁ!Òµ .Ê£r™x= ¼nÀ¢¬ÛAiü@{3z×aêrÉœØ2"$ŒÀ£JʨIÅRpë›*¬Ð91²gy¬IøÒwIô–í^!Y¶PÔê Û))‚ÏÑ1RAö{[4˜rZ<Bµa¨Zµ ”ÄðP+VkúhQð-ˆk7A÷Œ|’ ¬VàcÑ¡„¥i ÇΫs~à@f6Œ…“ ‡ÛæpW<Œ&~,a‘Žï‘ÚOWMÍlK<Ò{(E1 )®0çè®þÁDEÿ_bqþÓÍù¿ûüNtì+,B;XŠ)UU ‚ªDM]Áãÿ— ô÷¿ÿOŒ^|ñÅ€\¡(ûõ×_ïÌÎã ÆF`¼0š@â¿°£pacLqÂbsŽTÜ„¹p|`P1܈zñ @±Þl6a€ñW8woí¶Ûnx‚³ý7˜å‚9!™´±r鹋°]qür_Ëíב–óØé<Á¯“*œö§µ• ÛÿÒÖ¤X!_£ÔƒL2¾°†"¡Fd1…WW7Qƒä-ìÇ‘FÃò† +ˆÏÂn ÀnÇ6±>-§ÅQ H÷èN½Ÿí€‹Ý2ÂÛªdBÇÁÀ¥9uµ“‘s m¶‹ Zh›M†¢ØNŽ •&e!Dü¾la ½Ý5ÑËšù9]çX/ËBN–Ä‘è0*È(¹®àb‘i.CÂ1’mŒCö˜eÑò¸º«ì“z_¿6ó½ŠÓé­ðQ/ ÀC6ÒôÐ-ͨàøRÈ…Ö¬dp~D&Ç…v­V9DëäëPÈM0a4óD´òõý1U+kF‚ôæÇq’1´÷xåö ÊZ T¯[t y{`¡y;)º‘/'HY÷¸Ð£AÄŒ¥šl¥ÊÔk¢VT?5¹Ò ¹QäyÛ³óÑÜ$q\±±9ŽÜªoä¡Mås2×&#"Ël×V¢å˸´1£˜@"ÂIRZÔÐ|UÙ#¹{Ù½â3=©ÃÑb1=ÛÚB©5À^$4Ó”ØJ‘r{ÄfGÆüu½Þ!uvH‰¦…jh yâOETý›<ÏÖÛƒb›>°ã Á’bFv"Ùmdh`¿ÂnIRÝJŒã*­­;PfÀœQÿŽ}wT <°Á°•aãuµˆE¦Xõë²Üï“Ï$š¤-‰B‘ šŒ^ÔJNHTý#ä3ÑÝÇlÝ@—´ÇŒ‘Îø>-ßû¦¶®3û™¦ KÙc;WæIv´¥"{QR‡(VWÉVݬ"-Ž˜5Êjsüàö瞌+ PÒ™‚®Cp@E¾û›êH6;œú4—«C0’ú¢ô¯ÎXz[ë ÈKþQâ1É"5Ï鋞· 7+É]r”Ñ6Ãtð‡Ù\³¿ˆÐªÖa 42Ö"ßTÝÏɺ ²q2¹å‚˜½3X dŒ Ijl…]Gy ®í¥ù”)&u±/Ô-q¤Tñ\ŽtÖ‰ú6Ð"Z"Šå$EC¡ä:½ú‰ÿ(™Ûâ©4/ä]ê¢RPÙ 5MR¶P¢šK“sÓP¡4—f GAK…ÖGý‘±6ÊÀ}.F»ˆ¡óɧÍ/2¡l>Ü…Ûš¡¢øíj£Õ-9>ÞÁƒØÜ‹‹ã‹e7*ô}YÙ&ŠÈÒÙíÖH‡ ºÞ˜àòß…Û Õ¥4›ÛVÉNÌž‹r)Žx”j 5¾.DC4lƒ×®|TϾ rŸ±V½´r÷ЙfñD’r°FÌ A:Íš¼ËþšŸMdû­L”|Òx'Â'b$—J$LgÌ ‡™Hƒæ8ÇDcÒ÷"á ⢼VM¨Aº¤‚z’ 4Qpƒ |¢Lœ×תCÜÙQÚZBJkÊd±³›$4£u/”´1Q?" í´A Ôªûáö$gk'€„DËîØ¿…_;M,6yœ ¿wfvaº»»w&Pñþ‹ 6å[øÈ§øÎDÀ0Š0´;í(FO€º‚ÕDTŠÁBÌ ÷‰ëÂa@sÊ2Ì6¢X„­È`ã<òùèÓv$Œn5O !:ºƒ)@è0ë[ª0àçÄX”‹”€Ç@%Ê¥`Fˆ„4о|`32Æ(Àɦ«A¡¯íklLˆgL†ÌzºiÕ±—•Z%pFvˆYÝRpÎf£RÀ†lÔ°àZN3Çåu :Ž‚œ‡ƒ6£¼HçX½lG±V±e7™üMµzÇPOD‚Wà5^< Ÿ‘hHÔíšaé`O´Ýf<$ rEo±§S „÷S¬nDÁ;^?éF`X,ŽdI!i˜SW˜£0ÇgI¼Þ,¡‘,¨&0ÿa-S~Œ‹˜†f:†Hq(7≠'×UÔiX´]¢iTµ”B"ë°'Â³Š¤Ô×­¢Ì&€› ñ!Ûת(9“l–Ë¥¥ÄƱ/AvŸkŠP#À åPÜå¹H+!C˜ÂJ…²ƒÍ2f/X–Ûí±´ô¸!-dXn‚Ï¢DšKäÂTxKyk.Ó9T.QDÖl*Bg[ŠËa:jVŠšVÛFW}4UGL°pvš@›D®¦µêX˜ ™p<H`ètL2Ý1«\µˆ. ¨èÔÓB 8dý>‘#t'ORÛƒ:Àƒ2SUK¸øÕ†aö'Ö•íèpó;Ó0 gÂX5¨]¡<ÏÈñ° óÛ[ƒuÒ"Š ,ˆixÛ H›zâùb¥ˆ’.ÀØ®‘ý&F„DâàfóÆ×= ™pH—Y5»áÀ…'®â£7‰êˆv6üFµUSt .TG¬P5š‡÷QÒ+å@Î9©ûŠn5b8J8ÕjAƒ xË`RbOEUÆK»6þ&n")TošÙ¥â”b[†”o{tM*xrÝ´º‘˜š¶MàÈû¹°i/…}9"3(Œ|ã“*bç.!dÙ#-fùT¹^ B„ª@ý85@Å2lš:tµC&î:ÀeÉ-î6™íÀ5cF#Љ>£¬JÐbq¾fÝðìIg惜o”žƒ*°oôA ËTØ}°‰Š¥†QËúÛåÚ.š¦ø@½ÍL ªïè³æøâ¡´kck¸¬ˆ|DK²ü/€ø¶õí 2ô €gQ©D®¥‘BÛµ¡¤€a‹o+ã‰ÌБšaÇx¥å¨—k™)1<©Ò,ê¤ÑÍ6Ô$ÝXn¨Y‰Ó9h wZ@PÂO‚.ž¢7u÷HØN¬®p ¢¢ŒFMµI”ôíãÓì¶Ê¶ªH· 6-ʪšº‚Ú˜%GX]+뀺Oð:T[Á|Mà šm:”¬éµ0ƒ±êûB”²¨n˜-ÔÚð}ÂlØp xl!%âEUç éHWYŠq1ÝQÍv,”×T‡§û¶1ÜÃò§²Kx¢¦ŽÁ„ 8žÏ†í¢“Ä&-MòšÚ |”?D”üÑ–ÜÞF h±¨„cŠ– Ç"¡êèŸõÛv²tD(i})ê=ØLw±ëBŒ0àkå*..‡8¼+ú)ÓRœQDÄ4﹘çÊ®H¦´J‘MbA‡üqŠnkhšH-¤”ô/;b)SmˆðE¼:Ê "%â(Û±òQAWê‡ÄoT Ã$³gêͳ5F5uGlÊÀš%¤ì°6B·)Ö¯ZÚvCízK&t¹{¯Têižn“a™ÈžÐ3Û!›Ë¸hþ·I“¸cÚÎÏk(T>ÅÄ5N=ÒA ¼¸ ör ã)³ô¶‘MË#‹Pô[V¸8y•SQ@–£ !'´E Òb•@fJ¼mð$6ݨ“¸ïYK’M¸^&* ·*T„ߪWXF¶)^#0q]u³Æë~'r\µélG³]…þ0•ÎÖ1AÁréj< S(U4lf$Í úpy/>g´”ÒÔþQXÓÑœö_A:y=lFG¤ßn­ÚÄb̈ Û·L…Š¸Žªf, i•m(Ð"1œÌ’X¼]ŠÓæ¶:êÒà…ڨăթžÆ£m¦]·ë±D¬Q/¥C´í(è„ä‘Ê6ñ©V\RKÊÝ(oï±Óvà|çvV$wžfÿE† vÏÿÿ¯øðšÿ×Oö¿ó ¯¾ú* ·¯¼ò Œ.fòÝwßíè@½&¹çž{Â[yýõ×a5‘X†qŠj=öÀ8âu%¸^„am…]½Ìªh”’„¶^‚ªæ—²áPDdZöXR†]ÞNëŽ9f Z#¹XCR;‡}Ñóêê•f4ƒkÞnêÈY¡ó²âØø¢1ÄJ´ íÕv#ŸH"B²ˆ‚MÞô°=MfPl‰AXÝÿä¥Ê²Œ.ˆ£Úè‘€e6J!6Ì@¿¨~¡P  ¸È¢ÆÒvÙ4·;ÖG[ƒ‚« (üih“‰'¿s‚ü)º‡©ŠVœ›Ü4«#åía"¨fÙPËoù6š²A7/ª5-¼‘AA…-)ŠÔš¥45½fQݤ+´aNjÃcÇ–(‡ M¯k`$öW1> Ö$Œõm}‘žO*ë…v€FĬI„¯)c9…”F]© :¡±òƒ‘@ϸpÔ¹Á¬\×Ë©œ0Ýe†×œ¦e¶°3ñ D˜xÇEU€VÉ $GÙ(X¶•VŒŠ œhø† ëèO!:HkF@©’æè4ŠÜѤDí Yc¸‚àÀCZv‘q\W!¾ vCĵx€MÉ€ó¼[ÔGÙ§å¤ ëè“:²JÁºØw4²ã7‹æÃÁÐ Š _†Ê8;ÍhGÁMûˆ³‹æ`ˆ…mC6^kµ¨ëÏöS.8¹Í(¼›a1Ãýª3†i† µ˜@{r£l±¶¬Ûn>Þ-²‘r³Ã-·ԵЩë>…þ"’ ‘‡>É–ŠlŠv5¸Á±h´3Vjòäÿƒ½³—¤:×ýªªv—Ý»·âqnA$8A‚»Ï ƒ» ‚» \$I° #Œnm÷®ª–ªû[ÝpNνçÞ?òÀÉ!w×ÓÓÓ»ºzÕª¯V­o}ö¾½!ØßzËU³fø´ÊJä(Þ“ÃUŽXy“dzB[M3õ¶ãßëq§Í±‚(x…?ß(‘ g´ô°?d4%@šÅßüÔÆ[#ð½êö é %3Nú®}îè¯5KŽãÿþM#’úßÖàŒ¤=£VÜ@#RôÀ\tÑE×\s ú“EKZÊxƒ 6 dè®»îúwõ‰~½øâ‹ÑµèÝ{j"ÚÁÛÀOð3œuÖYhq”.…ÂÔ £ƒñ6Ó=ž3gz-~à 7Üwß}€bÆhFwù¯¯‚!æ_yëfl£{ÞzèãmŽÙ€ªµM^ý¥þxÞÓûmyêZ÷\·€RÚ ßØê¯¯<îªôxމê˜Õ%Ç®˜º’,„›v¹Ù(Ú£$>‰¥”ÐT랈£VuRÓî¯e{=ø[óÌSÍšÖ0â º^2Aa¾ } ð¦‰å5‚£žÕ£SUƒ¢ v¯+`4ªm»Õî®úzWÿ4½B³ÝŸ¼:òÜuï&ÝqžÕz³·Ibò)zÌü-ݬ ø£Ðzð ïe{Xš·º]ªÇ¥bbFbîx–ôLR–xŽlžÞvÜÙ*Bͨ»mŸß¬™&Ѹxtª©¶ÿV~¿W„œ‚ôj3$¤TÂìÅJzžZ‹šN€Íטž§1…9»û’Slç3>e/ÓžÔ¶`Õ$ÄHÙ,5µ‡“*mr–]2Z¹é}IȈfĦè£M“y€*¦'иÿ™_’Z;€q£JôN\¬i8€ç¾Ý‹–ËE¯âŒ†š‰?܃Q ùëf%âñ-œl ùØÑ@¤e¶ „bY—‘^_·íð¨²õª-ê—(Zl›M*¦#~§_æ=ÜlS`4 Ó sFÏÔ¥Å%ë$×h4°¯¯nšg;IB µ¬hÐ×hÔð<(2´ßIy–N`^˜¡Ž°ßY©ç€sÎãn[ ’cØÊ]ÙYâÌUšù°Ó‡õ`¶ªu»ä0)(2{Ü#×ë”jÅ€‹|mPžZ7έÕn3Ébã2íQåì±½;ö®,}Je«—Ze¨Œ –ò¤î‚€ÎÈåèxþÉMVlÅïõqÍ^2ãì¾b.™rg©]DÌ;×F\¹(–84lÙœ—dè†IÕjöz£ùÚ¨C«F|VD&:ëk/W©]r7m¯êžªêir± Í1¨P¯X•x0h5 ».žÑ“tâómém‹\¡A£Çí&/Hj‚¶BR:!{V‰Q_U'öê™Q¨WÇ›¯Ì®ÀpM»U²«šáj“:e·ãeã mãbÅ=-ݤ>·‹À;«»Õì EÑ9 |#è¦rº ‘üe-k, 4ÐÑ~Ö+yú.ƒ±izE®} ™wÚ*±¿Ë«×ª j¶í ¾áPñBÑü¤0Øañò9(™–±'r‰ÁZ¡Ä§$˜Ì"ù–½Áã„€}ÄÝ«)æM²¤-UJ¸mU’`ìªÞ.•­¶ó”Þ™Ziй&ç½v'pQئAgܬSHÆD‘«:h‡[f ä7FfN™º(õ‰Ói%ÝSId‡W+£‹fÎ!ÁÜu‰D¸R¼p÷„>ɇÅê“™I˜N œ8ñ%Š`=gX®@¾J±_yõ)S‹¥Z<Ü7n¦¼¾À€öõØPpº×훨¦âÎD’¬B)‘F :­@{…±HIêÛ¹¬¡“u*ßìvÁí‹1†Eñï¶/~-gí¬_¿––¾ùF®ºê**}»² œ¥¸×^{áàÌÇsÌ7ÞxÓM7¡hŸ}öYð&ñ' bùêðÃ燗\r uJ bQ.LÍ.!aÀ³.¿üò³Ï>œ'Ÿ|òµ×^#' O›`i=øàƒãÄÆ5Í×Xc h¾Â¹ü½\¯(¤džvCI§Ø¸â˜£³-–Ÿ‰Õ%¾¹wF)Z_±ÉÌÔ¨ô‡AÂR %¬“å ò­aþtULa-«¦þ ëWšˆ¶% )¥F†¤Ç7’ý```¨Ø*W¬ú”À´b­X¶ò!·§?ûdâÄ/º2¿*.zADÑÈ$ËqÁÇ £Y^nV×öeìX–œÀRíƒá¸ËŒ Í(©R™B Î eHîä[mÕòepƒÂvÓS´ŒD¸7ßÊ**$ÕÖK20ïöP)+ðß-³¹©‹jØ$.[lP.˜AK³ )¼Y–Z\nd0¤Á7ÛTâb_Öì|®’b 9,ŒH[Z™6V%¶/¯´©e ÒU¨šÐ¢ü=þ´1žðõ`‡‘h–j÷…z׺æÒæ’pŒ¡; ëè à‡<8iêÒÎ ²û½WÕ(ëijÝjÀï% X®¦†ñøUŒT)¿,¡¹{!5°VhýNL—x$ƒŒpkÇs€©¥š¹V®o(Y…‘ò*¿0Àw hÓb ©‚¿ÐE(Cï’ Ë´Lä5 §£øÊµÜÔÞÕ¦֦䷮7< F:Ng Ôªa›D¬{¢½S¢Cšå“EÉe‰¶¡´-‡jÛ‘Ô”hO¡²¼mWV‹LYš]ÊI Þd¦8Œ“ß ?Ï.)й™|Q!±Â'ÄVd‡ƒžèw‡~ Ã‘,­4-âJ ¹§{4OÈ‹çÓY¯×Ú î¸gJ Î»^ÁëÂ'ReZÎá_h‘œ¡„—è5ñÝ|=Ïòsu±6)#õÔp)E2¾KóæÓÌÕÛëI®]3L™‘®¦X°âùêE†Ï/ùA†KHÔ·ÝwÒ/ÀCaA~KE¯—«m¯3Ø*“!‹á©ñ¤Ã£acÃS3Yºx(ˆªVq|ÕjÍjâ~—å‚Kšïî2 š[/Ë&q€[æó¿½òm"DÅRz08ÕniGE¸­ë™ÊhŸ;‰ <Ö,ÃÜéÖb.-˜¶Æ‹êDt¦c¸¾Púªxp¿åÛ· ˆç;æl7^Ëíïéb²±6ÁŠe'vm7pË’}É·]%ÍâǸ©¹e8±ÿÞŠÅ ¦5öŒ£ž9E7o‹ÖøLÅÌ)htÏû»ïß¾ßo_~ù¹=û~1PlÂj®Ô´>ûš·öÃhiÇZd@Šeê™»œ¥Žõ_üÂñŒ¡yÛ/ЋêyÜáÆŸ«ã§õLs9¢©jšéÒi±¨œ(+TŸÆ‡<ËG–öyâ`Ó”Ëõ¡™v­=Ö^µžoõ‰úp§™´ÜD±EÔ°Ö#HP¤~ Üˬ$”µxÈ}¡Fº4†ÎtùÉ^iùpû´‘‘UmÜj)A gc|¦ºËÈh D½j…¤Ü|<ÐS »‚ÔR«ŠÏ¹o X*VËu§$㉉ñ º‡¢7+82N¥l‚Ïk3¹³Ðޏc¬^A,” ¼($D"£”)‚…Tzt-Çu,e¦‡¯Úª:‡ Pë_ˆO¦N™žnL¯i庥~ÿûJÏzS¦BX!ôrÓ|dO·Z˜Hä–Ù&I˜Žx04Qï ŒU2~wÜl·3­<Å'>—“ÄvÔ[L æH[‘æd§N\®•èõMtJ2Ù»|åò€3@Ä Ÿ9Þ09‘Ÿæ¯-ëõ% uLáÁa£@.(ÎU d¢BS¶é¿ÉáÍŒHC­¡·áÃ+ļ2¼T1*¡攄Žò:è$°mµ°u½Ü;#2e¢8Öï•j8ùƒÃ5Ê`"86YàÄNKK]Æ ¤&Â6¬¨Íd•´Kü2¢eµ(N"”D6*@bá2ükíŠWñøü«j©u‘hOH,T612ŽH8P©Mà”ÎTL\‚p^zÑì ÿ@4±¨ðÞÔØÔjÅ6šZÔÕŸn¤ ßvn‰›nÐ%°´ZjªL xDšez+ÏìMŸÌ~öŸ~¹ÌXQ©“…H½~3c,køS½kº2Úsç¯}ë§;©;÷Ï? )‹Ê¸ûíÁHbL_â„ßS)¶r¬L#ž¾^1-èN¸(Ì$]Y3¢ª¯Â™Ú&íÄÔiurjÍìÄJ½¥îñÙ ŸþЯÏîZßçñWõ’?ˆþJ”+R*((]ôö6ÅvêÌç¶„]†£ñÞøg€Ôí:â0Dq)ÄYç¾ùÓH2Ð §ÿi“Ššn,[}úŒŠÈ3—EURQ )»ç÷?˜6ˆãŸùN%8ì÷†ÑÇyïm´ %IXÞÕ‘òù÷ YÖ^r}#CËSíÅ‹§ÐMRë?ˬ‚êrŽ×W•jýáØ*ó Ö †¨cÑR•G3ö¯)ï ™e¶Ã(Úa/$„ƒ2 Qñ‡ÕR- ¹¾ö§õy‡|!꣸Gy`'YÄ8“ÙVžB)¿ÇÛ Âí–Ø[U³œèKnFHj¦ ϹS™ ±h\æ‰é÷ évW <5xÈxô:«¿>^YL8*~P|¶ˆk¨MæBøþ‹ÂÌXù ü +Š+êêpk)k´ìKßòùñ¸ "xûÄD-óÎÀß®=ßüúák’X]›µ«;y§aÔ$³?®fþìîÁ±ÜUÉèft*å@(Zt3£8ѵ‰E¿rp·wìçÈ®Zå°‰»Ò>ÇÐHWó¼±u×ÿ啱՗#£‚üqH\"â ËlBˆÐ¨9¦8â»pǧ+úãòåtsÅÌHýâS<è‚qcÔTªƒS“§nöðDc¤ Mœ±Ó#õ`!8ä|mÅ‚’Hù°ï|a»êocä¨PsÞG‡¬,÷CªVlh#5ñ…¡Œ~Qü´ôˆ ;ãž0± 5»óng35^Lj¢Ü1ªVl˜ã"ŸoÔ2íʧl:¬gõ‘ ÞÛrQõWŸ‡æÑV1ߪØa%çXå˜V;öÑ5”©…ÆgV¬å †‡[YšF¥n+õ¶Zmzë*šî¢î)¦Û«䳎tAMcµ\IËæD†|¥/ê‹ëJq°¿§(2_Ø‹(ð¥¦vT¤'š©uÖ\Y]”SÆ+ž|F¤R\„æ±Kþh?·ýåôçy?{k¤–[ÙHÕ;©ù­s^ÝÉJ8Ï~v=YRü îÎ N‰ÕÜòæÒ:f± èâÁë¾;©…¿‹³î+iI+cµ"µ£þ°Ž³ßê[=ƺÇ3¨½ºäùóÞÞÊJèh…{qò‹ß¯x3`q–#©?g^}Çß·zx\Y¶°ôÁD{Y#\²czÅ™ ¿d§¿H}þ›_½4RZÎÄGœáÆßè ­FÅˆí†øG,­Œ˜ I»VV¯êšnñí2 —¾¼¼ì{l0aŽg›Yl։֢ƚŸ9kÔgƒœsS4ü¡‚×ô¾µ—e—ל2'>ͼUSGJb„0„“ti@fT’쀀¢:cJäàûWoy°k ÃÖ„ÏVZÛתº Á„«â(þyü 5nùæ¦)cXwW²ä„Ãf¶âµ–WR7žüžö–Y&Bn M,.,£:ú‹Úò”–Öµ3ÞÝlÌ9>Þ\Úpæ½1»g¦¿«üýJÅ™)ˆ‰…ù3Í1Um-//jù©Ë«qû‹üb`²Ö°X ´ÅÌØt´ËHj”àíG#ïN 'š¿.”+ÐÚ¯L}a¸ 5%íM8———,×—¨1«ê,}&>œÑ{èC ”e÷Eb<šì«W¥âTIM”/­UÓÈõ0ÑFº³ î§áÈ:ðÖ»ÆâÎQãýRk 0n^·N>y8¢-ihÊŒ@Ü=1B ä.XÓã«k.,öMo)3SG?¿¶Z«·EѪ–*ÍQ³ƒÆèF«o0R^š6G §ÎBªém{z‚+íÑfÀN)™RcTÑôT}¸'ãó;7ÜäG?]Z^d¹ªB3 MH9F4`øŠ£KkK“±~fXÒøËî\Ù›.»azÒËj¹ì*d\©ðZ¾C\qíÓHx°ŒÅ¼NdaóÓ…í%jÐNÛ£ñ©îV(gõ”êþ‰f¼ä4ëáô¸ý9ãÄ(·=J]1‚AdüVoß eW#¢)ÑĨCL^”(ê°{º0%|ËŸ(Ènu/ŸùÀŸèo>w+ˆøIW­Òf÷ ¢¤Q伓kÍNôtWaób óhÕÞ=ãù®Äãd¾Ìœ~à­Žx;-–ž}ðm'ìÖ1{ŸÄ:°R;í~êÛ]xæ¯~WQ«—²à¨]nò ˆ\{deéóø€’VÓmÿĺ)=]s—е8[B«R;â1ê—yIqý+§`OW##Ö`¡ZÅç¹G_÷îðûzãf ¾¼ºB•.ê® Wý£ ÙŒµôÂ××"7¢*ʆ‚)»¸þf5´B$J4hz‹Ô'нC¶¸ç¦mþZ­PÛ/R6Mž(ÍÀc†¨Èû®j„ÜHÑp­j{ÆmO¦b“Nb´KQäãÒ?YõÞo>ܧ,Áüeoï­GsŸäßé['  ÕFŪk<í^é˜Vœ÷Ö–—>·/*¶é·†+ãÌÚmŸá «ž*÷›§¿¾‹ íÈ9³¶¬ñN!´²*„xÂò½žHµŽ9âF92ÖŠ—ö~0l†ò×ÿñp®èÎ¥‡-*¿—U‡==VÑ7ž÷,¿ò½=óÁE@’»>Ô¦y’GÛK<äêWAk€Bhºê¦«Z÷äëÞLÍ—¢F<åœ^±?i$'R®EyßÈSq#Šß¿¡ÒWêŸæ_ÖþP‹5 ‘óNmñÈw¬Þ¬kÄÇsÞ¥ÎizÞ¹¼¦ŽÙŸ©½•þÕ}ŸæÿdDÇ/xwS=6˜ÙÊû—.ª¾ÓHL@4»¸ö¶Õà à‡déÈ{7T{›ƒSúV5Fü& w%/¹|“Ú·Þv”[ÉÌâÚ;—|¸SÚ¹XÀãÜüðoù7¯ø`·eÍ¿Nû^>vö/müåÊ÷÷fåËNÕdÚ9£õæÈó³_Þ£Ï^ýö¡—¾·WÁ;væc;7üÖyº»èJˆSŸÙÎÛ v»¢VJV=k–òV©¤–š.Ýué.ÒäLÝ]§K÷Výcuoºê˶¥ãî[@k39(däý‚þ«¦«º ªÍºË¬ºªÇ¬zôL£’i—/zSòØ'W‹·—¢W‹"èR¶AtÝv8Iç VÔÚÒÂBVf£íÏ‚Óû¨ÇV+ûRy׊´ºð¸—71b±ªõù™{î©e<#g½µý 6h÷—ÍD:X™ö-RV+Ìûpûî÷i<ÿÜ"Õ!œZ®ôÁØb=1Ò\”s»¦Zî~+£ ;‡„·Nzi»³ÞÝãÄùÛ‡gú#þž±Öø”èÖú¾wN÷†ÝþP å0›¾j;VBæEØ• ëó-Ëì™Ú\ÞøÈ5¿ Këï›át_ﯫ^¿äˆ×WµÎ‹×X50R ŒÀ, ‚c…Ðx) _eªêˤ­•f°œ·G+ZöÈÇ6‹÷‡œ1åØù›4=ù”¾ä¸—¾¯ÍœkÄŒVë®Ò>ODêªÄ]Y•[ÚrTç}¸ÏÂüÛ翾/ƒ®8f ˆØf ïLûÿnM%LΡqÒ+›€VvìË?˜¨Nà3×ÂD ,‚Gº+J¸Ž~â»ÁpjâÍþŽx wV–Ûþ´â,8bfΟÙ?ýˆG~€gçLÃÑj8Í&5*DÞNªŠÏù€SžøS™¡X:ðºžèû‹>½þ/»ùž{yƱò¨ç¶L;—äÝ‹/ÿëÏFÛeÜ‹æ½õ‹?¿ˆqâ™JÈ 7FØ—“ÿ9+vþ‡ûVt÷[ÐI²z©±Ñ|å|[îÀœrßâk4Òê8'B‚¢QÇüìš›Ÿ>…)rÿMN;¦Þ¼àTé\4ÄœM÷ÜžgÿâLão·~|œLóÃþA'@ $ ê­–[AwLÃu_Ñ¢§â„/¾þɳOÜâÁëæïÎNÏÛF…ú¥¹oýç߬-o»êÙ#eÝ:G:Å ¿8÷úW.” ¿´ÌIy/Š9»=kÖ•ÁÔPüÆj%èóXβˆwx!¼¹ä•-¥î'Û†ÓÉ¡#Ðð¹Å¥ß{åÌ7·¹pÛùjØ>û©]; …N›-qÄçÞ¾àBÃbõ@ÖZCœ±ç-—½u´Ô ¼øù°¸tÛ ªyræÈ?Õ!§­ºüÃ=äUw =y"K¿Û97<}‘ÜÃGoé@çó¯·žKÌ’Þ¯¾ùÔYÇþöª?Ÿ,“Òæ¤oxáï>>WžŽz\×¼#4ÐÅìMoTjΩõMô9ÛL oT+Y™ ìùå•â4åìü¤-öýî1¿·Ã†«Š¶¾äú7Î’r@-q؆wõôL»â¹m¤“ñr¥ ÖI¿ ·øú›9áÄm/ÁÏéö;®yùÙ6E²Õñw¿vƒ¼ Ú!-ÂE©bÿuOxð­ëe#š˜³õõ {fdmg=é…%CÑnr™òÍlJŒÎ˜:e8½êâw—?§.JGì<ëöùW}y›!ƒÑ'Nàá‹_ÚGöŸAÒû)?›wͳshsζ7Ì[p<};sçÛ/„”9wÁ%ÎØñ†ËÈaŒÑBSœ°ó•×?;›K;y»›ì†uí+ÇŸ±Ý—=qøÅ{üáì§ö:cóGâ±ÁYlÂ`˜»íõ—¿tBW§lþ»k^:˜›8wó'ÖXmõÏ—} Ìyïqì/Ϻé—Èa3&.Ûïþt)uÍ«§É?±SðMqþöŸÿà>×í÷§¿É•;¾…ט;8çùŸÊÁVWüL¹ZÓÈã©W”>ƨUç½rÄ;ýþ¼G÷½t¯gÉÈ<ÿ…]îØ¨3_ W&Jˆ"CFQõl sÎ/7&b® ú€·‘À‹PsA×:ÿíž´ým×¾v$8h«cï}þ&9 y„йSR>N¢E< @è8èÎ7½aæa}ñŽƈíš|üùµœwR-bü»FH&våtï¸Ñ.?üÔ¡»z×›wÉÙM¿ÜþÈû_?Ïçîùù>tÓQû]rë³gɬ)Žûù…7þî\9œâäÍþí³û±ÕÓ·¿±+ Lùœ3¡€$£Sä\0€R”¨ (T~BãÌJl|âøí{Ãã'ÏÚö®«ž=ôô­¯/¬ûqnÔð´§Gì˜ß½r3Ñ4PÕ]äŠëâè«ØL @IDATíæÝòØœ¶º©'½nSVZYñÞàÊô'ádÄŽVyûŒBãsO4}˳1¡ÌÞûbW;–έïþi¾œuFº aíŸÑߣBÜbí}ìñ»äóÌSÍSd‰¶8ñþ®ãqÚi«_Ú±™Æ¶IÇô‰üâþ¡X¥Z¯gtU9º†vÕ‹ûa ŠV•|lUIðÃ×ßìß>}Sʫ撻šƒË§ˆ·H2¨Ø÷—Gü~Áí´œq-ëq'•Ž*!FÅY'_vÉígÈy–Ÿ€JÙ)x |KSŸÃð"9cÓgú§$VŒ}Û¿ØNHµ<ûùSÿv»dNâWäü¤¬9'Ï¾êÆ«¥œ›âÐŽ¿ë©Ùmo2uoØ·n±Øh¶j‰A׵Ϟ&»G8[ëD–™¦-qÜN'ßøÒoåÌ+ƒ×•C¶Û¯î¼çn™¦Ê~¼³*“®!?W"œBN…\igß1ëß5Å¿¦«ê¨/BÎpË˳‹Ã ßòÕ_ºë¾{ä‘3×EXí1lˆù&¤ˆjºH²(gÿ )f”gajãD¼˜Ò&ÄQ‡œÔìÿäÝáþø”xØŸÏ¥œ5_²ZŠÈµ¢RRdšu ‚EÊ®&ŽaKÍÛâõ/^.eE‡Q·Ù´8}Ï«³ËKÓ’k3 Ж/ôF¼…JéÊG {Gmsã­/ÇñÇþòž´ñ9õÐk‡~` «¥‰ÑY×mi†fûpp[ºG 4Ê¥~GÃH[-o~ …û¦kKW½ ³ñï^}D ðâÒx5ÅÉÛ^_Ï;nûô˜~pX,|[ÀþŽ`OØóŒëŸº¬«Ø™,)þ¦æ² õLOe yLFȈXÁtV`¿Þrn%WŸ6Xž•-ú’¹ h¿·dÿœ9l„‡ Á„œõ²ŽÄ³Æ*1”úÍã³NÚóbˆ uÐhÔ‹Î%”©«qõ¶Çobg$и§îs!…$¤à#ø•N§@±½ã¥kŽß}ö ó¯dAh­¿Øìð'^½C®ÝâÈí/¼íÙså% qÔO/ó9Â8¼*\²‘‚ëƒX´§V«K®…Ta°ozK'§P¥X PÎÝòöì#·=÷¶»P^Ò`PŒŠC6?ÿîWÏ——™gÍþíÊ/†ßv5ý½Á‰•9»iÔRSHBH«V54G¨¯G¢TýÖ#ñÀ• ’—ã‡ýhV<” ¢M¡°¾áóºC=éÍëö„[¡S7\ ôg*-ïpé“«?ß™ãgo~sØ^£<Àw²î­Y™TlIÓ*>üçKe÷º«ps ÚîS/ÿù›/‰šƒn&¿ù¦·oT¦4¹}­(Ûö„=¤~oç ² ö ÛŸhS” ªôû¸MfÛU›øÆ)[\jØ®7˵íŒm¯°÷_w¿â«}gžüËèµÇ­óúASŸdó#é*ûy×ýWÛ~Û>{Ãì/:;9 — ´‰n”¦(y•VW´å~°€é/ß"¨öý·ØËmû3û´ͳÚ²…aŽ‘w¤Xä|dÙàèwà,€Œey§Ãÿþ¢îg$Ìç"­–ŠVCyvÊJVÕäþšàcÓfQ®óN “l¶{_>µÏžq¿ý‘m`3õ2º$%ƒÀq•ògúJ‘…]™(Êq–î¯ø9=YÕàà_o|¶”FÖn"+ö7v³läsÝ«nþÍ>tóeƒ\oç>šYÐúìâʪl°Û Iþ2Ò¯PD~‹ü Y-³íqÛ^jŸÚÿ‡9žwÎXrcüƒóÄüyÉgNñ_'ÅÅËò쾌BYÏ—(á°!±@¯Xm’/KŽN\G>¤Ó6Ý瞘vµÅÉé@çÉ“2 3ö¯7<[Þ_ÎÎåðýëu.–’A>]-±_ç¢#78Ã^e˱‘*ÙESžÊÎÕò#²5®—¦øùB{î÷/?rfçÈOm{Q§ÙOí7¹\¶Ï«h7GáÁ¶·ŽîÅ·lp¯xü‹Fì£gœ'ÏÂgdPŒÛGoz}wdÊ! ~C·ó5{|<ñk+Ù‡}÷¢“~x=?þ‡7ËAµÌ>éóörÊëžyøzs½þ¬#zÆÎkî/o(#„a)QÆ«òVr9Ü}žŽîxã!NÄžåö!?9Ãâ*øÌaûòÈ_NóþAŽ[Äò®}áºÏœ9õ ÙÏÎT /p©}@ï\F{jãûNËœHvÞþÙôÝdk9{»Îšeœp¥ü¼Ìcݹ¹$þñ“nOòö>GOäí Þ9×°}ÌÚçòàw›’á¼#ö‰ëŸ'IW˜ü¤Ûlgï»ù;m¸ÏE>sHô7³7¼}öF7ùýs¤T9&môÓ¤ÌùᨽÏÔãºW!¿úL6{òw/úò’W؇¬q™!ÜîÝä'«ìÓֻ愩7ÙïÙG»—?aç¸}Ä”ÛÎX{¾¼¹ô“nóÇì9k<}ÊÔ_ö¹ûˆÕí̲3^³RjÖj æÎ0FIÉç°aÒ#œo|CÁcs–G¼ó¹ûç×râ®Ëæ›^CüÿÔ>\nêÌýv=Î:íº®•K9l5Š£äWJ+!µ†PqïÓWvì‡ïuúO^!˜†xè½k¤éVïýÐoîÞOôîs´»l/ˆP¬ Ôr´ ó„èÞj`nCµFÁ"˜¤amqëƒÒغ•%m^Ì\cg›ÜknÒ4áE‚h͵àݻ䊕$9¬%S¬™Ü@¬jŠ)N¬áxŸcáò¿ ï®ryÉѰ»ó²{Ø‘TœËWt# ¾ÍˆÒëh“ñ’’î)­NP‡' ,„%YMz¹BÞçúiœ3ªmž&…ÚL6Péút¹³W,§°‹ ÙR‹:Y wAáѨ®ÔÄYœA¡ó•¢àJurÁ:«x²ò ,û/÷oÝ(ù@­p‡ìà!›‰MÁ =¼BÙæø¨>4Ý{û#×bðÁ«°5%¦«år8òA2P)ì¥K„xÉéÅ\ÖSn¡m‚Õ­vîŽ*ª¡Ô­/7{ÓGŒRåú›µÃ &ä7HQ¸ml¦ÞÞž¶ÕvE¼¼c©ê˜t h Îl¥ FòÕ|4…RÚ œ•„ âwÜW°Èý¦@½x¤ÖZJØo$gPÂEêqçîw q?Ô cÔ†CÒ$7vruÍ…ÏüÛý²%pHÉR¨Í®šø)TQòzÎô:z%|¸G¬0>zèÝ¥_š+âôÜå)b¥ñ7ÙŒ0múÑj| EZ[$Ÿ ™^Ó±àÉM-“÷%Ö±{”!ÉÊÎF')‹áB/Aïê?€Ö´íŽ—Ï))ÀòÊ’|¦â·Ìî\·8f·³oñ2yî)€Ñ+ªÑ™»Ú&“œ˜ÕšÛã§Af“°mZNø-9’¿â‘®Ù3X”Orc¡3¼PÎ5,E›4ÈoÙO| H΃ã¡Çn“]Z)Øæóyó—ÇsÆ®¯ˆ‘lyÕ„§¼ÃòH¾"ùºDo1\[(§Mü+ž¬Ü.–a y…;d07pU2Q¬(¨ {ÃL^wþåršm¥,©€;8òÊ _£,zQŒ&@Š*’ß«@3­=v‰?N¿…^ѾhwÖ`.ˆõ'¬Š/ŠÒͰß%¡Â{KÇý”mC xƒÓƒ B ÌÊIÞE*±É®¢Ð¨E–«âG1–[Õ—œŠ'Û6Å­>í•õ,꣇W½{üž[&»Šn6ÚñÞ>Ìç˽‰˜Âý£V×¶„ý‹û¸ðˆ?IL¡7<­Ù&šJÍ'Ó?/fM6‰ŠÀçHoµÕСȄ BÅ¿-gs&n&tÙ{QvufR7Àº  ñ–\„x¡—rmW-ôÑÐ-ŠJ'%¦¢íí R‘¥oѦlMIÎè†ÛãzüÕû¥Šb³ZÙ\¥hVÍ·y5#OÊWV«”ˆ{úÕˆD$¤Ë”ãPa­y\=+Gk&C8ÃÔR%/;@ã<ªêí÷ïö“SŽþœÏg·.ÞçŽLa ŠŽë_¹„.5H9dcAйÝQÖLÜfįbY‡Ÿ“¥¥ÊõV…Ñ $éCnWgùH­w#-•/@%X¼N-¢Äúv—²r‰¾†nŒl9)*åá`dPÒÿ°ÞvŒbÑÓ»¨æq’ÖðƒD‰îäºx¡´À°ˆ†¤|øLø‚õ 0<è8Aºq»`èŒPô-D6[HôR€ J…p"ZOé>ªÈ,QRM:€‹*àUŽôpiÓEª=*5t5{•Ås'kÉ …,~çþãO×ê ãHä-"L£]¯W£±°pq¡¹ÅÔiý´,7~Ö™¬XS‰Pü>ñÈŸ¯Bål-Ôë—ò䀄ø<õá”äZœº¢¼/; ˜¬5êêâG?¼W*iFTV&ø6Ñ“¼ÿKŽÚùxI#™P f Môô8$ºW›e¸ÁíŒ aÀóN Šâø–o“1à¯û®¬é¦l´çÎßûù-OÌ$ŸUN]õ£Knýø,ÆÍ±ëßuӇʠej×Ûeø Á¬'›y¨”É„!šä·$¢Ã»ÃRêI¼ATG±î€ ½Š¥€™ÌcI¾e=Ô½¹XâªCU)”õËF17¤Yk¶Íˆ/„# ˆ;¹ ó°lºÂn*KT?‹*q8@TxмJ?`aFSò'y½ÚÓ !ÎHAq¹å1‰KFcé<¨/NØ4áE¢œ¨»ùg™5#"ÍáÎ$ÂZCN~xh.F«54Ÿ»¡PPA2ÈY~î›Wüj“xá:érà:Ü$*zärª@G-Lãð×òîìT*ÕjÔIË5WÁŠù^ž ë9»£Kd?™€…L\ex ùkÀ¤ÝÜ .˜ü¡‰|RæÎõ³¦©ÖêAк=§ä`œ 0V±ž÷ø¤±B¸ €&ýZ¨Un;QÕHƒúO§DVìLütGÌç €ØÜ.š¾d\Jš”°æLûŒUØ4­¶ÃÛ¹ ­î” ¨7ágÀ!« Øb:);¬lÚ¸g,O¨ƒA#»l5L Çý23€>#ï‚8gë.zèDy•\åî|è}ÏÞÅŠp5ÖfÈ–!/k¬ºâ Ð|F©Bº'Í3}‘~hž”¶j OVQ/è¾/ðtA¦3øyüècÞÚÂ¤Š¹Ð p; &Aï¡Py@Ôš©ñtbYwÖGH‹rr]æ1H­‰‘Jé+ÚÆa·(»s¨-» z9% ‘`ÝÂïE5F§ÖƒŸá‘ˆdÕ¬C“‹§ÖÕ&œ‹Â:ºîe£¡Á\ª´l€ïÖ³tš•†u믒ËüÎjkÄ\ŸH NK2}À‡E˜ƒ1;ÙÁÔïŠ3½ÉºÆÖIÀ¾ƒ'ÔÑΆßtJÊ1ž{ðè©v®¸°°†Êaæòò´Qõ«W¶,(&®Îd Ù=¹3D/ ̾J½ð@iT` íu,¬9<$.yÝ^ê˜ ãHöz¢I i‡|¨%åi•²j€xxÑYëäŒ0,Î\~‹hi?!™ºV³ŽÿšÚ3„¦9Á(a™¤‹•d¤ÏãÒ€8´à‘‡W±¼A& ¬0ÚVF³7mʶ!™ú¤•Ëõ«‡$ “ÔÏ Ý¨4]äTAÕT3Ú‚© !™ÀYh6>ðo1 ÌbC B4™áÄ/_‡#‘Dpºk*u;A¯T V¯H0Û¦Ž{S;äs佈Ilav¶æÃ•Šc»ÉF/Á‰É_h_Ö 2²U¬ä#A0¹‘žL? Åûcµºå ZÅjZ:·?02VìóãEg¸$eBœL­ðC¡ÀM×d€Î%×QÇß~˜×Ir&T<qc—òpøáw÷éÛ™è8–3NÊX:ÐÛ®Z®ÞrÚZÌQw%Úš€ó#SÀ-šo\1¡L Ãz„ìì|y4 Q>ëwãÛúÒaLÞW¯XRûL:oØXUЃ€¸ýåÛì ¨m÷@w”µ5—. ÷†{(´A0Ð&&£}ˆ7Цn™VÀðà®°Evx¼‹‘y¬bv¬iùi̓6tG; Ü¹ÝMÈLÚ»øPXj½ÁŠRÓÌl>K,y¿.ÄÌT‹ãJ ÆEÎÍŸ ºŒ}Ù‚ÏŽ·ŒVÜ/mt¶BºMDfÍåV¬u“‡¬)>¨ºyø¶+¬Xå2LKp|ۖѳ> ¨¤LPEUÕhÀÉ@Ì =½áb= ¤¦z_yûn96öÍÕaÔåö4Ú¦Ò€‡µ³BãÁ¡“îÙ‚ l÷Þ0wÄð˜Và ;˜m0ŒYùº‚þ|©Ió´Y-¥#p§Þ45ŠÄ¹ÝðŒV²PÇÆ£¡°ÀéÏŸM•{zä㈺õ%}ªRâ o. þ-cÖ7 ù`@ÉJCQHqÈQÓyräßÊmRÝ· L¤¾í‚NSûù–‡Ïþ>ü´ÑÁá ‘èY´b‘з¼ïýKäˆçð·ó¯ÜKL?VN†@ûB³ëﮫ5%=lÓ}}õîêú°£ú“‡}!Û ¤nzü$©Îbb¼Ü3„Ú'ŽÈìÒhWò劌Kü?Õ+جÚÚî;õê«·3 f+ÅP°‡‡§¯³§ƒS]Ìvº£ÊÙ³Õ•'mwÇ*ÍcwºGm|úèÈ~2êáÃ:aMíœÆuÉͬåžÉwíƒÓ“X~ÀÚ¡}¾÷y}rކ¨ULàÚ@K1+bô«ÿòÄßÜ{…$@Rü(»#IŒIÌʶ¢k Ô¡¦¥Qx­\27=\’‘ÒíUßLº°!ì§}¢º@™Lèp?0cz½>KU¶3™yÝ~,n‚¥×Tô,xM(ºÙY´ÓA˜äù¡´^Z–/‰¡(ꡊӣáZÀÞcYûÜ`šJ,xL!s=(¡ù•!=9E5*Ò$å'üã­¶­x<Ì8–©W逴–)ðr©kïì+—J¡`Üè˜ ÑÃì‰ý ¬ pÁæ ÄPGд›ã@æJø8›7¡èª]Ń!‘] =`•׆fG#A™TÖôD¢·×¬ÕI4ÑË%o0HF(n2’‰€˜lL\r$û¨(‡šÆë‰@W kŽh<Ô¶LH’\>ÖNm€+ÕòÅFo¿¿*ùò¶2Eâz¨A¼®Š¾j&rl¡Ž5H֪ׅ¬Ñ¢‚‡Ã t¶ c-ÒK‡õ„nB) \™SS…E+ÐÇ ÃŒ†ÆQ¢]:ļÒð‚1ÏÍÍm2rñ;Ó++){‡‡ šyÆÌœO…}¹ÈBL7 ï;˜ÃŒ4†kw¢Žˆ"`0!†…—×›“kUŒB ²õP_ÝÓòs¤ßãÖeÍ 7Í¥¹1¸1éT1‡xxj?.ìr%JF"‰Ã[úúy4,űͰìò⛀Mú$ø1•Rg †»j¤’±ˆÑ¬ÈÁsy…цaÊr4kugr‘:t|*GTpÖ× V/ÚÚh¶x¸žËÍè2yHÀÃ*ÕJA·J½eh M"e¬Ì2ˆÕPB„C\AÔïlTe쥧'ÊbØíÃ#]Õ^9í0HÙãT¢Î@¶–‰ùûUœ @RâÕ†ÂÀhGE1ÚºÏï«ÕkþȬmxbˆ°c…3ÈsHªªˆÞ²½šâÄ›Ž+«ÉÝ ¨Y9芦î‘nv½IÔK™0»ðÁïì`Ы•º—•ªJŠ ½€ª {ƒSÞˆðº˜ß¸ŽŽ¶êŽÞî»ÜÙ½¯ôÿ[¹Mº ¿îÛ†³¯e9‚1B5xß\¡Ã¶¹êÎfÉ §Ê#nB± ÆÔâ<¬XŸ'íqõµ&s%YÉ©ÐÍ0‰&ÀšmgLŠíŽûî™Óëׯ¾Pÿ"ï§-­“öïÞ¹Hš×Q'e§£­:WÏ| ø=;È»L†#OºÝ¹w½Ô)ü¥ÝPfVÄ\'é@^µÛµõ‘f¸º"=…ÕŠz64¨Í~~3éUæ̇<¢nZdaŽš×~¶Ãn¯<ÿ3#3¿¢¦Ñ‚<|Et,_Çël<`DØb†*g'BL£<0Ÿ†ævÏåÿZÂ"Cªö†„ÇmÅ4O÷Ø:ïªLMaÏÞä6)I LJàŸ#Iô?GîÿàY-Ú¥©tv‡Ð·*(løñ’Œâ=pÃ:\„ÓXÛzëR˰HÆbΨ+DPО[“7jèÆ·.•?çî}ß5ÏŸ6g—[¯ít©ÉÑ{šAªQ6T.áçetŠõ%ößáÒé¦Ã ÑÐy05Y&ÚjÅp·‰¶ˆ[Ÿ;I¶oˆS·»‹˜Œ¿W[šýd¿¿Üÿи¾ä¡?ÝÂ*ž£„g ˜/¾úÌ Ä3¬GYÁºƒDRYTàhÆ’Â+eµpÁ‹ í«hPþ6õf(ÖÂ;ÜH¬;jFU €KFÑʨ!½[4ÑÕ¾òRºËZ4ø— ÝξɷI LJ`RÿR˜´€¿îÛY•ÚÃïn¶þÖk'×m;+ •Õ6ÃþéöÅ/_$]„køBA?…Y(–] Æ¡ä•S¬Jš¬n©p–䚈£w»ð–[Ï•z4 ŽúÙ·>xÜìmÁ敯ÿºk›’Rcº&´ÝDCÑÀÔKЧ0ÛŸ‰4šîP`æXf<ØEýù3ïÝ+´ ¹ŠtñcB¹¼h_ûÿè„ÿt=JqïÍO{佫¥‚§cN™üDj.’ª—uŸÏÔ—…UVFÞKO _J‡àMУº½¡fÍ’Á8Ÿ¨å-LÝqóÝûô.Û \ÏùVÏײ‰çÿéYj3åÂ÷„7Oþ¸³ éü_šU‰ôŽMn“˜”À¤þ)˜´€ÿ)bÿGOŠ*’‰Îçß|L¢0:Å)ßwék„9´ ;=äg h<Ô[PÚ"‡”* `ö 2·%mbY$`ŠªgLdã¡•ÚñÈ=~;®§ÏþÉsŠêñ=sv¸5£,¹ûÅ«”~lÃ]ݶlo![‰¸ƒ}3"<}»L›ªŠÝ7=ïÉ¿\@;6º¶E¥LÃá -ä5?‘A%=Fø¦QÆ$µi3ž£ƒzMÒÖÈÈ))Ï2“PÉšX ÑCâx1 ¶;:;¾¡*B­uÕKÀÔ½ë»=ýÜSÒw bMX¥Üà¹×Ÿ’mAüj»sxýB©ò;nùŽòþÊÒý;#¸óQjýÉmR“˜”À¿¤&'¸¯ù¶>Q']µƒ&DKY¡ ;˜x\ù“tMÛß®„Ç»Ã÷gemÝòkf¾±û÷Àۼ׆GŠURKIU=.ÎÜñá5Ûß“åzuqÛ£'Ïÿ󥿱co8QJçç=}ÔÝ/_%íCô:˜¤3ÀI†Š^‹Éà%[Ež‹T·Z F¡B‹.'ëÃÖåtø.œ.äa q%.|¨Ø±~™ÿáË‚Š²Ñ¤,•<ç±lÞA±(gÄÒ%+ÓNór®FºF³‚—›ä.üøsB„¬A™ODëø½é”— ¼Ò2à;°Z¢ÁÒ„Bøp*i:“s‚ÿ;íÛ¹+ ÎÉñù5ÏÉæ&%0)ÿ9èä•ýÏéη¿'Ò‰JògБ¯5у¢Ï±¸ö±4õP¡.7 ªš˜Ì i±Z|ó—·2*"^W_k]ö¬åÛHÚ£ qÌ–§EÅt-?M1´“¿{mÿºá•­eY#;´l£)3úRéÄí.®æ~oÉë ?…Ž-å2áx†ÛR¹pÇ)ª!¸+0teiú#–ÞrO%“ñ÷øË &8à0pü(˜êƲðaÎâ›Nh·Üq­ Ðú¥>À뤜¦nèñD,_®ÅƒþZ¥[ð^ûïùØ‚'C¡D—®Äeº¥ÿ¤È– uít€?;šÔÙ©‘eçHn…+)µxS7¼u¿LiæŒì ÍIB{ÙU¶–e9Õ‡Ž%uªi;Éofh(² (¿?ð£¶#]uk»ZðVð;£\[ÃKVö'†7 §ŸxÉÂW„ü½^g|ÏŸÎvûÕr#3ÿ»::˜RÙç¿ï±«šš:œ-Ít£¿îüý[GÿýŽÉÏ“˜”À¤þU$0™„õußIPõ+Õdß@.[uºdfv¯›-€ÀÙzã½_}í‘]Ö9%©M_³w‘%céÞ¦FW‡Ÿ{zxõìXIó™®¡RÖ±-M1Ac5—ðJ®Î^ýÎÉÒ?Œòn‰ƒ~vì½oÝÔµ•¿L'§A7½~’m˜(è.ð†Ü¾–bƒ (r䆮sµšM’£Û÷ðÓ7ã,•'Ͳnàøl'o«-Úvî½/u(ðPäìçEå½eXe8èÉÒ"ù x<“RÞJ½ŽJº§¥½Û9y§«²]^“kÄ®4&ß'%0)‚&“°þ BÿÇO©˜^j× LefTsU#) ÍØÉP úÌ»×H5Vs¶ºüᮑ!Ò²8}ç;®xõðC6;óî—/•ʲ%NÝþFc zûÚèÇjðô-nÎë÷ÿM~»ÂZ,3³æåh( ¢Tò»«µjÀrƒ«e5­ Ê”¬™ÂÔ ‘¿Ìo5vº@º‘˜ªD [p®·áÀ3.‹ÿɦæüªÄ®Á9LÂ÷0F9|U´óI.# Úº»:0ëgw½~& ?ñ‡»¥…îé(MCÚ¨ì!êûí+7 p+°¹ñ§ÜѾr'›ÔÀÿiû?vü§o'ÿ˜”À¤&%ð-–À¤yñ5ß¼f«át‡lb§-±ïöÇüþ¹›3+Eb†Ì1&üYZŽ×¸ãéES3îl¥c>b<;Ž¢ &á’¥7{ovÚ#¯_={ëûÎxxÓ9Û¾<ïÙݺAè9½Ú|cáË4S­¨‡TfÈxŽ ÅB4"ýϺ¡{=([klb<Ð×ç 6Ô4+ˆ6Ô,5ÌFÝòü(7§C^Þ?Ø'Z`ä‹ý¶<àŽ¿Þ8³†U3å¨à(xà—©R„¥{ï×J£´"æîùûËÚ·<åÐ ¾q”5/¢Î˜¬ô?"|1U’½PvŒgìLj’%§·tüçMZØ_m|ü{Ëç¿ûò«ƒ&ÿŸ”À¤&%ð¯!Iü5ßGSuÖ”)EM¨L!8Ñ+øÑ‰±ðÌüx~ºkÆI?=K1£ŠåœÑ»x4sÖ¿>}í±q3\ï›»õõÍ}ÀOçÜÿμÞàTz–m®:#£­Qdè¶;ãÊ¥—º¡Áf.×Â!/„A.·$»u»ÝÑ$´A×é ðKFµ¿oJ¦Qô¹6:Zbÿb'ÛNÈð\ÎzM€-ØÔ­þÕ ‹ŒïM¨kz;¬x–p-ªÕÔ²Ëãøþ UAKˆB-I][QÀµ¬U xp(Hðì_¸$Ââõwߤ&¤VB2œÛ]h½¯ÌaiåKà‘ÿ¤r;WfDµÉ&·I LJ`Rÿ’ø{ƒã_òÿ»/Ê« Çúñ Q½¾LB&[Åý\§,é¹þ…C ®}ì’ßÎŸå¬ ÎºzßïgÛK?ù“÷FÎ{t· ;zù“'üæé3g„×#Û†iSÜýÆY˜›šŸD*™ZÑB7šÂ=ôCFШ¶DËás‡5˃.–AܶˆCÞËiByà.!pq@q j060äªMR¶€«ÄcŒ $tiø%®ÐÓAµ½p×%°d+oi–7àÁ_,9HšF í ©ƒ„ÓòŠe`܇%Zl6S AÅ„Íßnk2¯J‚õû$¦.67!èÄ\`éV¤Þí¾äùÙP±òõwŠ«wÒðí gò}R“øW–À¤ü5ß]M»ÿx÷B®vöÖ·ÌhnpÕOþÚ „Çlôhë_»ñ²çfó~ø.hÉkÅ~‚ÒÉþM²ž¬•ü áRÓEb’8ø;¬ÝûÃ+6}yÃÈO.ÜæqÃ?QnæîxþLI (§5§P·‹Zí xí²¼ƒŠ¶õz;¼úîó2‡‰c@ó€A4¯¢ÞDOUWÎöö÷F5Ô3Uº†b›@ñ®vµŽ+ÌŠíó*‡üüp;OtúûŠÑØÎçŒgF×ZwÆ5·œC5S3ÛÉ®š±Î ‰^’ZÞ:éI~nñoJyà6áèPµ€Ô$¾CY_­Â?$B2Ö«´¡Rˆ+ë e~©\%ƒMg£Ïì“É^Ý¿å{×î*áÿÝcýGM~š”À¤&%ðí–Àdô×}ÿ0V ±qr×ͽ»õ*ë6ëŠh£‘¦éšHOô©Û²]"õ€‹æ]y‰›•µ–‡*û©–Ü^F] ƒåi»6%NØñ™ë?ÜEš£ù ítß•V®\Ãøf·Ù'ÜÝ~"ÄØ´Xc³ñ~ò¯`È´TdMêQi“P]Vµ¦G&¤QçD”šúbI½ðåA]ÕË» KµÛùj!©¤'pGL“o“˜”À?G“YÐÿ¹ÿƒgE™µ ’Úþ¾ ˜sßÚR*°8nÛ«nü`Ö%»ÜqÖ“‡£ó.ÞjøÐ¦\°Ç¢³ÿñE{=yéû‹>ILÄWçoüØ…ÜcÞVoœùÚÏ¥¾lŠKŸ;S*¥¢8aüø„Ì æ…Ž$± 6ìq(ÖdF4¸•@?qð±¿{þ&¯©]€­nÉŸ+’¿ýŠ*ƒ#Ôeë* g–æ4[uÊ”= K6Õ®¢Æ]À ]È{\ËVq &Áã&ܨ(H°”\RÓˆzÜùüHÌ=D㉞zÞðõxTWCS­F>5G½Q¨—ð¯Ïƒ2çìV£V'͇ÿ¤€ùŠ3r]MüÊ}òg“˜”À¤¾e˜tAÍ7l¿®käj¤·T+]>mç;Ó–]ñô¥zhœøœÄ¥‹·y¼í ž³ë|Ûe_´ýKU_á}ޏûÍÛ‰­žºÅµA»ÿÚíÿÍï÷ÏÝò©ÔÐ2£/ýÀüËPz2“9‘Š …j‹C6=9èlé¾578bó;`_ ô8³àP£Òš¬eJ+×®„×&,•N¬ÐZTûjN§»Ö²ÂðÒ”U¯(®°ÔÐ>1‘–ºòOZ IfQÚPIXCXÚ¤år>ò'ÝnÖb±LÈb³Äþûðä *®VÍ(ú<^|ë~?,©Äw%|½¨Cë‚ïVvå«­cêÊ+âEÓ¼Küå_4ùÿ¤&%0)Q Ø“ÛÿS0õð²¾|µ-» uÏW¯ÿø¢V]Ú²3­jµWÌ<úûçÚã¶=lŸ1ój{¡½Çw´K6{˜qž=jï?u¶½ÄžÛw©=fïºÞÞv³sš¼mçì7Ü×.ÛotŠ½Ô¾ð»7ó[±+¶±üñ6gÉÙö2ûÀ5ϰk¶]µ^ïyÌ[¾¿iŸü£½Ø>vú­sâŸ~eNÏg͸ãÔµ/8tÝ£ì‚m¶Ù vÆ.”íBÕ©Ú+ô¸†Ýg9rú™òtiy:öp½U»Z³ Ýn>~) d N4G7¹®=8[î¨Ú~ÿ"{Ä–/®·^oåóÝÎãºn¶‹RLV•ïÚv®a§ËÖXÝζäU5¥\»ânË“ óÿyk&¿œ”À¤&%ðMKÍoYLHv³Éœdó¹ûç×rÞI ø\X™¦ ï*¦áó»%P…ö¿Ú»;ªzà³w{Í&¤BBQìbW¬ (öÞõïî ˆÁú,OÄ‚}â»`ã!¢AAZèi$Ùlï{ÿŸ¹¿drÝì†$²»wà ËäÜ3çœùµóûžß™3g’¡þt{Å*0&µƒÖ' :»’ «}n0ùþOÎMç»’Esö’þïÎ0c\­þèâ蘭[ðá cû“‹~ý-›uŒtÚ\#Ù¸Êî”i0šÆ=ɹ~)mD\•´ÖÌMg¡}&¡7ýÃkó±=ê4¬_:42øÎ£¿4\ÕÑ?’|ì7ǼëÉ_ýØE¯I«Û²ªôœX|ëñjcÒÒ;º±jdx^}{µ_=¥÷ë’ÖÆö´°;ºoÑÉKÀu}c} ^¯'*sÓU…š¡¥ƒ·ŽÖ7%­}{¤»t %ýkFÓö½Ž¬Ziƒ­®®î†öš± c…Ö¦tW¬d°PHÓFý/þõ5áÒ À[Žô!p~äÈ%K`7•@À»¨Ø:Û]9ªª ¯{{†­Hnh®éêòÞ7††º«ÒÅM5ÉÂæ}Ò„§¹ëS¬éºm,N0Ó졽쟜.wòšï¥'¸ýèÙŸhòwîÈR6gdÏtrسa›Ly­’™òÕÔ]Isÿi³·$Ëæ,7µýåKŽO±ycrú ~pÂO^Ÿ‚œJZÚç§€j‹ D¬òÖÐ@ÛB_÷­i¯^ˆÄ±ÎbuM:H«w'£½…t4`ï*wM6®íh[ÔÚXðÒ’æÊQûU~޽ý “k†Ûç%-îÙ÷¸ƒ?]¨¯~ôÒ'¥L¡ $Õ{¦¤r¨ošÓŸ Ì©óÈ#µUµµ…椣gcCSã@_o{˼Àß”rÈžžÜÑQvÓÒïü”K —@.ÝCù*è»Ñ#Ìrl'Ì?øL½G¡#µµµ­¢º,tH–·ñ¬}_ÜTÓÖ·fpŸºeÃŵ w.Øk¯;¯êZ0o¯>[?ŽÌIê;ûª½AÛ8vËÀÊ9óæ6Õ55´v oê[Þ¾¼¾»aS߯šc5­ÉÆÕ› £ÉØžýIOÍ‚áåƒÃ=sï¬ò˜uݼÆÑúšú®kÖßgþa½…Î.yá{^uŸ<ýÇž\_xÀiûÆ|ä‹?:9Yš†¶E›aUY`5<Ú70¯µ5…ÉŽäMO:±Ð=¿½nÑàPÏâ}æ_öÏ‹çî×xö/Ï4”èìjl†Æø®±«*c3ö°ºÁÛ¾I²29å•ÿÓÛº¦k°oúeýƒ}£M·ö]}Á g¥X>”<ꈣ¿÷»óZÚÛÖ×>ýa/½èÿΓOšVP§ßB*-KÿÕ¸¿RN±Êˆ@”½yË´P~äÈ%K`z%0¥« · Ëô²´ÜVpÑÔÜ3ÅÂQŸ;J—H—þ"$Í´Çx¬¢.½XtpríØå§]öÚÏ_ù–¿®ûÕ™|IWÒ}Á•g |{6®'–÷¼ß¦6;bY°ý€GØ¥„ ¼iDhnbEB$RèMåêU&o+ƒæqwÌn'r äÈ%0ë%OAïº ûúúìêØØØ488R[Sc£ãÑ!›&§sË]Ia¤á€e÷ë¾¥Ï>S{6ì½bÑþ×ÞzˆþÞåsØÐwWÓpõº¾[×/h-Ο_·lãÐúÞ¡¾ºú9ó’Úæ¤iÃíwù"¼ÜØÛ;¿0oSïû·:Ø5Ð\=ß B]k{öŸ{hï@±/}p»¨~éð†dQó¾knÙ8˜Ôì½äPóÕ-µK€×†õ#M5ó%úº‡Ó‘A[R¨oé+Tõw Ô#¾9Ö®Ozn/î?gÅÈpmgqÅØknÝ”b䊛›-uN§§ÿå(mn•æT-)9z“ê}†ÒYëb2w…G¼I[ë¼t¶¼6iišŸÜ–o­¯é´S‡…ÕÉÆ;ºÒל¼:evŵ‰ÏFi ØV;`ï)•àø_n™ÿÈ%K —Àî%| únô9Ùtwwwkë0ÑÓÝïyp]]õ·¬9þø÷V¯kÞP÷׬|Ý!ïÚpCOÛм=k nJºV,<èöu[ óo[9¿ª­­¥fC÷šÖdOy¯¶.k­n¬kìÚà3A­ÍmÅ!ß¶áU×Þ‹–ݸöÆÆ¤¡¥ºåÚÖŸ4µ7vÍߣ}Áš¾›çÍ_´aU¡-™×,¸MÆæµ-»¾ëº¾½Võ×uxšÛÛ=xð^ýûM—Ï? ÙX{ó@ëº Åµ?ûÃ…Þ,J燒ç?â5+꺩qϪCê:ö­îj½¡ªq¨8§gxÎÆë7]Þ´WòÍ_~ch¨«®¹´weqóˆ GpÚ14Ü0VÛ5K¯?ãˆw· zt=w¤£yÏ%KïèþGÍXaÁè2Ÿ;Ü0²¶  u«7®j]Ú<ÒÚsçu?¿ôüQ³Ð©lâ•p§±0lò)è’4òS.\3!)‚ÎønT:«f£‰¦Æ–z{jkÒUYJ (‡ì.ù¬'¶<뀶C;Wõîß´¼³¯«6i°äHÒP-Q=Ö9º¾¹®82Úß6º¤¦PÓ3ÖUW[×9Üa u±¿¶¦®P(t uÌIÚšëZ:á_RÛÞÔ¾¾oÃ[¯:øÔ§\”¬knòé`{ý¤3{Ms±Õ'ç%sWûDBÃØ;.9踣¿üÙß¼ö}ÏøqqýœSÿ˜ãŽúüg/{³Ð< R!”ƒPë­D¢£ÉÛùæýç{ÙÒ8Õ ³òµÓˆMSô xÄ.¾kû¼^œ.c.Ú úÒâS¿~Æy¯ôêsz—ŽäÌçþøÝß?*] V“¼îñ<ûOLƒj{gÕÙÊ-é¶§¤¥¯§G â}Ê)?r äÈ%0C˜RO7CœUÒmûû= MÄqé1d¿c@PLQÆfŠ;xÒ»¥ŒWÁGGÒg˜Í- u–B+›Î¤¦óµ#Io¡a¤g £µ±¾ch]}u:?ÝbÉPmƒw`×ÛÙ¢o¬c¸º«'Yß—l,F:‡ïòÊÒP}÷P¡ÛçþÂÆ÷ýþá›V ­4…\ßTu{ßMõ¾ë[ô =ÕcCuCv¹6Ã=Ð9º©>©ªo(ö$«S½uIû¼ô¢¤p¬O)ê·.¹„»@ ”FúG{`a ®¶kIæ.Nº ·÷Õ¬êéë–ßï…ª KCŠÌF6ãb)¿tÒØØØêú¤¿Ú–Z6—$¢ê±´Íæä®áëӧηÀ{äŽêkÓ%`¥1×n¼!”|-{Ùjt°´5¥'¾1±]Êk鹲ޮ.cŸxJ߃–ð6š·ý¬,*§…0DîÒˆN4-7Ÿ›„sظÑ; é‹¡ŽÝXï¹O…Uœƒ›¶·½‹Ïùy¦k°®D¢§§ÇúZûÛÛÛï¥Õšêš±â˜’ª‹P7w?“±£&¢‡†J›S½H¯©qšMëªÚ†ºkîZ=zó]ÉíU’Û× ÜZß4Ö1²ºnþHOÍÚªùƒ×]ÕQ\Ó¾§iÚá¶M]5kní¼æ¶~þõ—njº³qùXwõºµ}·ìµ×üM£«A×P[÷ÈÜžÕC7µíS,›WTÍí­íº}àš½—Ìï«]âµVWõŽ®.BÄ»’… ç§€ÚQ¼tQlljHQP@Œàöd¤i`ÍÐ} ›Fk¼Ô4ÉÜô*ìÒñ8p1gl]ms­Bú Ä‘ž°²ªi$iìMƒfk³–ûSiƒLQï`Ò·xMz¯¹¦ ’G>ùAéÊ/#œtˆ0Vkk®Ò$tÉKX ÚRtÛrÇmµ5s9mmžˆ§xl@/c°ž1ÌE3sg¸«wÔÔÔDw€Äè0$j¦þ®œCCËNæÍ›×ÛÛKã<õwž™;äv>MrçJò#$ ;E"v<‘æe²°‡Fl 5šîÕ4:8ØïÌoÝ*ÛÅ©«héÅÉ¡é¾T×ß¿ø;Åÿ+ÿRüòâß/*ÿ·øé¥*þ¡øá9¦ùÿH/}ú€_Y<¾áÜâ¥i•Sîû]ç´úµÅâߋſ?¸ä»ÅË‹ÅϘÿ³tƒªÛÒ°Š7n¾Z¼ºèï´–ó‹,j9mÓUu»¥ø©ý¯Hë^WÚNkc"§ÛuÙzª¯Øq×P·íÆe×-u?´üÒSïÿÓâUÅâÍÅâšbqSqxMº¹Wqd¸8VÚž ›þÆÒ<;WùÛ0Ø?(™nnÕ·ªc£½«zmw5:¼±8Ô[ì+vE•Q·Úd£¬õ%blìec­1;Ëô Œm20–þÖÎæöÓD¥‚ƒ ÍŒ©0ðS©ÄN!]pHëë×ÓhzFäÄÏÝò¼iӦо2ÇnÉcÆTnç! Hи ?3Aý;‰üðæk3èãL leŒŒ˜qš;w%ïºké’…»>ÉGÍ¥ç¾âã––6Ú]ë6{øÎ[×¾ùÍÇ­émmžÿ»?\ù¸CŸ1§waÿÆBCa¨j¡ÅóܸۖáÎú–¦íó7t®ªjï\ßsgSã²ÆBCÓHsCSÝÍÝ×7Ìkèîÿò>äÝ¡·¾â´MCuƒ …;šæU/mXëõ—Åçû`ÂüÆe_8û ÏhßóÒ¬êì]ŸìչǢæEk‹U¾rù©¯æ›úzÅgM7?|¨ÐY·°g´­ã ¿:-,[“žÁ¤¡)} H<ŠÝgþúödÙↃWÝÒQÛÐ5\ÕW7¬ØÚó•_ž‘~Û¨ÅÌ|7SI¥i¹–ûBƒ×ˆjM9Wõ%…â†ôûHmµ£½ÕUu£…BcµÍHÒÕ}cE{S7 '£sºZÒ¨zx8iõÔØÞn…ªôköÛJßPÞÒ~z‹Rp®z¥ ƒÆõ:C)•FÞ´Ñ£`¿ÔZHC@©[óœç¿è?ü‘ÌóÑáQÃÞѾ>ßhggg¨Á–þ‘Þ®áR¤(¢KwQnIö>ŠDOzÜ—ÒÍ’W߸àiTz}ñ˜ö÷^ß´ß©iˆ)xÈ–Ð×íwrÝÞZÚÚnÏŠcwÓÝž»‹o?üŒt«çË‹ïÜÿ?‹£ë‹#=ÅMÆ¥M¡ï*…Ž=Å·rBÚ”¸Y›.•q»1ÿÇŸIâµÅ±ëK%Ó ™‡D®ÂÖ‘t»åááýé^Ͷn¾³øÊýOã!§§1ñºâ; J…ü#CcÅá4"õÿ–Xr¤84’î-2)võ‡ÆŠ·ÞùwM¦µ¿ß?JÃÚT&þFGG†ûF7 ÉèééS½w'*–bßRÉL8ÔIDATX–7ÑŸÖLw—®¬Ã-Ú°aC „™M:r¯à­ræD¦yu­¯ýë‡~ø1ÇsÀ}öýÀûOzÄ#ù±}lõšÕfœbÕ‰jJFåþáî.ŸCJ|…ÁBš¤{¤Ûw fý‚š4§=¹«¸&Y‘NÞ®¼]­…÷™c­VÕ¢ª.kH!ôPR·‡&“Ѻ¤Óæ­I±)( WÕ oé]qߥéÛM'{+xt¤° ª¦½ØF =…ºæÞêâÜýºõÄÃs“ÁÚÁþš®5ƒ«ê 'ó`Y:&‚w0¯±qNçà`MÒ44j)V[R]]šNßj˜S_Û6š.jNŠ€&]5ÒÔÜÒ9`+ê-óÏi*Žƒë›ªêJ»J ­“Z›I' ³º¿aIˆ_%Ä-ìï(Ì­¨íòÃd¬{ ŽM:Û:)T[Î"|ù-ðHi8³ån•ñ¯ÏD"Ä‹ úšŠ4RÔMúˆ¾`03±`ØÏÝHVä }q-!ü>qOûr;Ÿ‘çÏ€7ËÙ„ƒ®gZÊÊ•+÷Ûo¿+®¸÷ 76Ôžú‘SÏ:ë¬;î¸EéÁÒ#@ƒý¦¦˜‘>%5Œ6ÂH_ð=…ê¶ö¡þMÕ^±m€`i$YååæAª’àL”é¯Ö'‹kä—Vç'½ÅÁ–¤ÞþÒÕU‚Ôž¤«5™×ä-b ˜!:°(Ù´~¸}n­…êš¼ÕdǪ^Ÿî]Ü“†à*¦QcÏhÒ_ãíÜd`Á𥱵ÓÅþB½uѵխŤ¦w°¯ÝjìÑ>.®kH[VK‘°ÎWDŠÎ_Zê­­5Î(T'µÕ±þY>~«|)˜]ÕQ¿hnmµÉêêþ¡Ú¡ÆÆ¹C=õ^¸*ôôFjšÒƒýë[ý=½m- G’zׯš Bðz‰@ͺµÒþjÑ2<’ ‚_¬p±rFbàŇÅC2K8à€ŽºwÁ:NDKØ´ü{xõ{÷¾÷.!Éä›%²Ðö†n6ÇèRMC­ÀtÏ}÷^µúö¿^ý·ÃîÿšúÂèØpSSæíís½“3â•Úúd´Ï—îMÁžPK†VCYê.ÖÖ•V"ÍK—,‰êÒ9‡fŸC²´†G­ÒJ™-Ué;N ©^jÓ-œSlJ1}ùeóѾ(-á7l\žN=§1®Cåôͤ´RÜJ×M-Nóu¥¦æT—D‚/:ªK£ƒ4U:@õ–#(l©-­½ŠLY¥ÜÒ/,¤Mµ:k’9cuÅB¡©žê±B!i)1Q*ÛhÇd¯–ôGk!}£#ÅyÇD6hôà/½Zaôåv#2_ÂO-X°À¢ #sÊÉ!X€±od÷^’f¨A¬`ØúvsL¹ gè3fç¥QøhºQß0ÏZ•¾ºå[m"„bòœ#ßðƒŸœ•úõ¿ªªÌAM¬¶1 fç†Ý,\â´ó‰œß°QùMqô(ãúq,öÒ}˼yƒÃcipÜа|Ù²›6κ¥ŸíÖH÷ø°PM½í­Š¶‘,q ¢V—ÐŽ¹†ÛSÈêØØÕÖÞX´˜Î{Æv[´d4íËc…¢l~bZù"šBÏËåǤ\ö(« o5a•Y©7:@¯)¦ÒêsR:­­ºå¬ækg‰§ßx_€4“hŸ1ì®rà¢×cSûÐ×À+ `g¥WùåKf>vnK=Ÿa©MªFSï˜þ(Vû×s±”“öö6ˆ<Ð9Ô07©ªÞ²Ô ”˜4uKÄ´<,cø÷o›ðfÂŒðúöJ’ÕtPïhR_[˜ÓÒÊÛzåqÙ²½¢Â°T×4øº@Uâ C5 É@WrôQÇî9¼Ì.ÏÝIï±G¾¦¦¥jÓÀÆ^tÁܹ>ƒN²¦FFYÚXªzkLùïksZ¸ãŽ;–.]ÐkJÖÌ1Þƒ#Ä`i»·d¸Óý²çXc}r»õvËóðÙÀ+Œa7dUÏ-éwÕªUô.ü5â4ko¯¬ç#÷ ägÌÎùÃô/ZðÈËQ`—ºNBšþ¤»§“õÒ¦§}ÃÃ"Lm¿ãÓ¥´0-}´?Ë‹íNi[O˜#9¸`Ž˜yÄôÔîÄæöyÉTJŸa Û¯8«¯ZpÇgã"v ™ÕìlŸø™±sxºyŸÏ€}-]þ™zQ|l1yÁQoøÖgù\Rí-Êm­vœŸÚƒUSzL ú®[·.^Eû÷oœ. ÊàL¹’…˜æþ÷¿ÿ‡>ô¡ðªÖûz+é9Ï~nŸ†GªMŠª¶Â­MÖd±oýölKÑԒ檤³wSÁËE¾•àà ÉÜy›_XÚ*êoý9kSñØ/&!—ñÁR³9ÉYËÖ¤„C\3À8õ“;¾·¡/éP1ö%Bé~šˆ“Ên–_0=fÁ] ¯430ì˜åÕÉ;#¥º•þá#½Jb½£­7¬…ZW×Ô Zˆ3¥=‚ŠÙ¶‘–A¶´{ùIé‘þ÷o¿¼Y†Ê. ‰Yµ¯}íkþð‡o[µöáøI'½÷)G>¹§»«µÕ£ˆbOOúi{3ö§o…²†ÑÁÒ;HFjž ¤K/¥s&bÂÒðÍ9ÛIjóÐ.wó_•3ÖBÌÅeIb2ƨ™â§O%JŸÃr5˜â{Vhó™¢Cõ»÷”,Ï@ 1F ú{Ê W ‚gÌÎ#,I7æ1óSÐ¥=÷øRsŽÃTƶ}E;ȘZ¦ëb­»qöæ9ÑLÜ#ZËønÄ(Y^¨´iSd”V6G2+gR*LBŽ„¹*Ý»£¾O¸^ ²ž”w¼Œk¾Fš÷1ü!$x%>H KÅ¢¢Z.ep%ÓU™ KDÝ(in'f2ãgåœÑo4+B2Áì¶ôc ãÁ»«ŽHãEEâÊpš\’S9lf”P Â2ÚÊ‘Æ;Ê3ÔñSf„J*âK:²”£°LGð°¥)Zû DÝìîÓœØ)úqš‰%¸ jñ&ág¹¸üÄ£Kã쿼‘haÚÎA'ªBƒäïÖAÿŽëqÊåÀm–V=—8E[³Îé[ÁᑦÂãj «cðtÁn£WJgÞIWU†pTwdC¨¬@–Р»lëǶãÆ©&àqù÷~¦Öcè–†…ºl}sš»å`3ã†l·™NXd°6æèÀB˜¦£À„Ó2áѶ°¼¹V8è¨Ë¬³ö³b˜0¹ªûA&Œë«D¡£–Ô;ž~ýYmÝkæàʹ'ŸòK•¦+4^N^üÌüQ\R¸\³je˜¤@¤ÃlÂ!’^†ÖåÏTzé§Y%3ýâ;ßûŸ)NÝ7¬ÑŽ¡–õ"¾ü©A¥È¡ä,'àLûRDö—n8ù‘©i\º£8ýšBõ\Ã)]Û<\6\Žòì¶ÜM…—[;¡•Pk2?0Ž€ìgÀ™(¯ìÕÖlÆT_Õ£3írguéŽÊÙQTbÌ@³#è‡1(áj¼ ž™¯ÑU—}øhÝ[±x|"í”Û´’[åSaOR}_!Öô"2 (mŸ~…ÉA·„1!ù¸Ä ôÞ¬»T9ÇM´•;4Ó»³ürÜ gÂÁ`X_ ¬q÷„PÞ`9¿ª«5ÙÕò’S”ÞYúƒ`ü ðÝŽ mÇíŠx¹Ûf)ÍÑ[r²*•%‡ˆVJ³Ð1™¸9¹¸d2:à„æXeLÀ˜ÒÝ2%ä›”(Z9tvè’N›4« Gõ]ðc,d[?PNL–Î8Å$‰0ˆ¸ø/ ÝZÞ£ˆ-? C#Ý^æÛ¥²w·Tøn©1> Îòg&‘è'»=Ãr)zr”aÖLãZØì›+Ëf÷é¸d‹•+<ã?1ÅgÁ@ôËœ~kå}³„IPºwäG޾aóŒ³9!xAê„ÐÈ=K°¦…Ëy/oÖUBsV Cñ×~¦š§?´Œƒd;eÿå§\JSPq—ˆzÑÌžÅÁ'ûås‰Ûpš_a°ZåÃqSÐÌ.òOt0°0¶-E¤;44ëNÈA«v;ºæ­É¸´mÛÛ÷c“ùrÈ/o3àriL”Þ96+2\ú¾®¦ŒÖƪü7’Žô7?4ÚÖ&"bòÐÌ(®{KpL“Q¿«ŒÕUæË½rÖ.1z%ÃîàÕå¤fÑT9'ÔCß*ŸòË3ÎÆ"XÀTpZ>b(§ßÕ¬Ãã”ää#GÅ !m¦¹Üzìø‘©‰jødÓŽÌ ^™¬@Ô4 £ú'i…¥ñ+3ÀF;ÒQXƒÒ„£‘,s+3‘Úqú™w8eÄcŠ~CÅ»lÿÓÏ.ôµŽы˿^U)rH¿.G0¶ÉOg˜%7GÀ›wâ(ɬÊVѾ&¾=Î •)JGß {®Ä2#'U‡#ÚdÀ̘ºs×½ƒ~ŒILæ&TwÀŠ¥,³2#{KHë×–Ø7õ\q 'ÅÁ¡*oiðÚšðŽÕV—bßÉÚ)kmK3ü/ä_ÊMvû1ñðÎbe9åΗ)kG³º:ùÄ‚~†¾`û·˜þ«1@έd¬¡d;ôã GHë´ÒÁ ¾ r½Ê[›~Ö&¼c¦)Úwð#Ÿã  0%AË|S€î´P®Ê êáãˆbó´1ú’ä„wwÓ©ø¹³ôoK~™tŒ6vÊþ·mjrBÑ1#m/Ëû+N>«ž~Þõ_Xì[Ç¥ 8u˜5£IµO›Ë*}Óm{òc™a¢ è¡ñRƒŸÒÑ£~ªty•¸”õ?wÄmë2ã³s ²_yâß’Œ-~圯>òÑG´7·?ñÈ#/¾äbc}+úúÒ'Áã´øø¼ ùèëã§žzêþûïÏ]ÚžÓv`Ð%(”ðS&ã~êSŸúË_þ’F]¾ÿýïïµ×^ ö Ox‚mL¢ŠF4û•¯|åˆ#Ž?éIOºøbòIŸ¢ñÎÂxF^PõŒg<ã…/|¡îÃÈ´ú¿üå/xàx|ÚÓžvÑEÅØƒ|ܹçž{ðÁ“U´–Ý«´úÓŸ>ÿùÏ·õ¥üã§Jò¸$¯ÅË„—Ï{Þó~ñ‹_NqDïd²bÅ 9zÔ£è4´égˆë¡}(!<÷¹ÏýÁ~œˆ¥g }Ýzg駸ÿùŸÿQ1|+ÖèôÐCÝYû Lÿ9…ÑñNM6ñŽ>»Szœ~9l nR©mýµ}ßi¨;G±>mwóÍ7ÛAÅÖ’!v(aÇÃåË—“Ï!‡Â’å¨EË(ßüæ7wÁMè⎜KDæ§–åE~6«üŸÿùŸùùÏþ¶ÛnûØÇ>FÜ7Þx£«\[V¦Â_ìï‰O|¢ ±/½ôRôõ«_5Ìÿ¯ÿú/ôó>ÐW7þÒ—¾„ÁÓO?'½êª«‚µÏ|æ3¼Ò·¿ýíüãð[X¹r%‡ëªZ•&‹­¦¿…Xó„ÃG>ò|½ô¥/õ3XÓ?Õ¿øEŸýq•@þïÿþÏ%…ã)ù4ʼn'žèéÚßþö·¨Å*XÂw¾óë®»î´ÓN“VÝ%ÞÐe*áüóŸÿmÜb¤qzöÙgKNh3ì“O>™~±4úÓŸ¶Z ûÔýÑ~”º¹9|…LˆÈ%[G¹¤q_ÚŽZáµ?§ÿœuIã ÊúÉO~ÂÞûÞ÷bÍð"ôÂÚý<çœsþùÏžyæ™è_½zuzÖYgùÉž'³ˆèƒü bÁé ª›ag˜ô§?ý ä Ëað˜ú¾ÊòÅ'dO¦ÇÙ(ÂÇæe—]†k‘(f97gù{ØÃì:ÌÚýdä¼JCkèKD“ù±]ðšÝöHÑ>?vMa¾™1°²gÖÛÞö6Aƒ…{ÜãÞð†7d&¾kw™þZÁú!ˆýîw¿`óÅ/~ñ}ï{ßXÈg½þõ¯Ï:°PàÝï~·’Š3¾üå/!èó÷»ßýN:é$µÖ*M>™WB|€qðû»ßýNoÔq-ë´BÀ×½îuQÀÀë‘|ä[ßúÖ¹$ÈÃc´cÓw¼ã¼¼Š>jùÿñÌÃ%9~ô££ÍhÇY-®93¤,:ôõØÇ>ö¯xEvÓ×¾öµôÖ»dÉ’W¾ò•(ô“Äžò”§¼úÕ¯ÆÅ¾‚樥ܽà/ˆŸB épvÎGy$›Q+ÚÄrˆ+»ãŒ$„tèÏ„Œ£>î$D2`òùÿïÿéæ*ô ™)¼lkÿÁ‹F87½éMñ³ÜØ"g:Ïx$s,VmªÅ€;¸Ðµm¾cM~5ëLØðØ Ì¿XÃ3ö™f4PðÓlùoû[³Vò „ñ[åÀ˜¥£›\sÍ5F™âûíØ?°ÐA?"uÉ*ºÒŒ0릎Zïþð Rk˜\ –üŒn®0â'Óã,’sÅt=DxÕ«^ågh±ÀËL”%Ç45A±y…ÍN»¤0qóc;ë´3áQ‰»@LHh¥eR’G&á=õFZ䘨ÓœsÄÔ¬¤•FÿdôpL±W*_)¼‹büˆ‡dÔãm,„ðJ ;C/ÌJ;3VühÁÕµCPÜݾ¢äcLzuE˜JY¼ðÿþïÿ¾å-o¹à‚ P+¬çŒôd<¢Ÿ3mŽ|<’™Ð8à£Ã0«ôj™*jY1·p`Ÿ !@3]ĸ'û9 jr;¡­gÇs̯~õ«—½ìeVc™<§SÔ’á(#„¢k¢%Hã‘ra°K¤A –T)]Ž«ÎÊ»¤º¦ JÀ›òýtIbFšußãŽ;Î*ëæ¨Sæ'ŒåÇÕÐi‡Z ¢_FüdöìÓ»b ©ÃÝϧn *z(^ÂàÙ?Õø,“ƒŸ“é;³BdÎb1 òd_&0ø®`\¯t•é.]º”–Y²ŽoøõÉO~’Z­P‹b™èÆù±õÚ™ðØÚù'¼œgN&p”JÁÊø.ÕP—RWˈ]bQx²¦**_ïŠÊLÐ/q-?åy˜Gzx ï©€ŒSô³N)‡@ÂùÊןµ 3K¨%¨R²2åƒGd#•32AçQ¨@PŽ|94ˆóÌ.8•‰—PzŠÃUôʵ©Ãëç@ˆI° e¤#…3 W½¼©©N{và/zÑ‹ h;öØc}‘ósŸûhvU‚Âÿ:ó\ôëRFæ•HŒ¬¥”Ñ”a-êD¡©0•©æk²öi–ÊPhòFPhÕ::¿÷½ï½ÿýï×»Þ¥V h %ÍîàÍòyjsì¡©…Ìmíß%Y’d08%éɈ™†|Š3nðPŸ^¤ÑïÖFÁN 71®@h|[=*9[äƒ|8Ó¯@ã”è+͆™%^qÅŒß O¢0‹eÏDAbêRŸÞÃí¬Ðæ„GÀŠåî3iQ!f–j€É³P=É¡6úc¬¬œÎôö»o±2J ÞxwqfsæKŸóœç˜Ióì‡?Û }tZÇ,]ÙàT ¤K (LZêÄb¥É‡sÄKÄ€áSŸúÔÕW_me,Rñb¸€ìð¼xÄlD3ŠéÌú§Âªk-2Ã}»êùD1g9ÈÛY™ÈwÞ6'»4Õ ¢¸‡<ä!l˜Ê8k¾I|Àg…1{Œ;r`aá¤ÁÇaßà h%ä+¦ )ŰñšÕ‡n’ÀÕÈ™j¦¶Ó>yªâ±·•½–J)ì9k ½.e8‚HŒà‰i‡§Æ¾’~†&³ì“§’œ®HCΌءV·Æ…þè!múɃ0ÌJ0`¬M¦ÇÙ"Éüô5Šˆá~Í«³UìKcÍÄzîmC³ÊV¼“†ºFT$¦¤3‰mëÇvÖhvÂ#à År÷™áX醽¾QØM7Ý=ÓÙ5î†Y‡¿ûF+ ‹äepç¬7òD–‰ztdÉ „@Þäx¯Äsá]‚šWä¤HÃb`~VNÌÁjž™ÿQK€¾•&^)œlÈÞ+((¹R%juT¯â|àõL“h6œÖ8àÌz>¤º„|nŒ3:Ð EÃ4óÑAy\Å`Ì܆4ˆˆ¢ƒxÑžˆ0c$+ú•/Räà¢î*ËÌ®f§-‘i„Šé޹hêª#ò‰‚B¡TLfŽ£ HÇÒª í¿œµ¸—®$sBó×òTü¤P7:¬çš~âÎûÙÇ1»­³’Y¢2åò'sÆŒÔ`Ó¢BþÇ$ ˜[Îz·tä'—¥¤*—µ³~lû~ Z–È8ÅN'2ÝDsÖ£ƒIS!eû™ùî¾Á Ud ëÌP6£%¼'Oʸù¬0tÝ›½2Ür¿ó<œš«FgýaVÈý4ˆfcaP„»¬ÇnK?-Ç$« +ÝXÿkñdB,¦‹Î”Δ8a"´CãÈø2ØBñ®dÀŒà€H#<uû©°’Êã7\|´¯A ±ì[’ÉxØC&Ì(<ýçðÂqßèË!`-ΖPyöÇÌe³ <†4”÷3,|[û7W¤®öÃ~œ¦ŸÍò;%rp§WfÊ Ú‚)ù 6gËõHP].‡rëÂ^Ê9ò“%G§& ?Ùg¦ —¬á"™:¬|ØA?æ^“ùLžå‰­cŸòÜ<½# ž(>—«’Cñ&7¿3N÷;ÒøL•AyX­4'‹\I»p7agÁZx±o8bÏAæ—5Â)ú2èÀ°Ù"Ÿè™hÖ1Â1M¦ßh1ˆÆ/'¥ÓF¼hÍi¨.¤f!ŠfEé=úÒ8jåDœÖNÜ“ˆŸ `“L$¬%úB0èRèZ#œš«„@ªäcçFac3"„TÄPh-Á5Ö‚΢aÑR¨žBCsP›ÙÆvìúbË$¦ùD­hašÏÁfŒœÀ0î2eQYŒ·ÈU´ƒ©Éô8[äš w„)…±ÑÖ¤£SsYDáP†Ø-3ˆº|ZôßòcÛñj<à ŲC™4=*:a¨(,àJšÄ`j‡Z¬€B@‚u2DýPGE.˜¬îÊ4MŸ†ëÏ GŒw~*@Ú„¤ZܱgÃJj‡(ˆ3Îf‹|pQŽºžŒþ€&aðÁe“OÈ$Î8‘"¢ˆŸÉ¤TŒ5”dÎ7Üš1ÅOaŠƒ?%±Œi%¥!êŒÞ™´LÖ"ᬘ«$íKÄ0.<¾Óä鞸·À ÚØ9–cΙ…ËG<ɇýû©€3ÆÕÅòdömºöÏx´Æ úô¡MVíÖ`;‚{Êò3t$kºóS±Éô8‹ä€—è³ÃÒ¬WÄZøgŠæî0®@è‹Z©‰Ê” {ÞY?ƨ4¸­9á‘OAO(–ÍŒ'…z&µÑwèòx%}Êu¹è;ÚâL—cvåk ”3ÂÃB`üŒD¸-é˜Ý !¨E+ü¬ŠZˆÖf…|è.c!ü¬®›ÉaBýò\º¤ AEáLV! òa™d*-‘©xµX#Nœ=ã=ÎÊà;®ÊäÔœ¬p†Tn¬%^‰€™: 2^'ãhËøB§«Š9›åd -gSÆÃìÁ|T‰fgŠÓ¸oP(IÐ"•šèQ&ÓuU5IL¨ÇhjVÈYR^2ÆŸi3st¡¦Œkr ;þ*ëõÁ²óú± ý@ÖȸDÀã’ÿÌ%K —@.\Ó!­aÊtÜ-¿G.\¹r äÈ%P’@À¹!äÈ%K —@.@À3 ôü–¹r äÈ%K àÜr äÈ%K —À H àz~Ë\¹r äÈ%pn¹r äÈ%K`$ð =¿e.\¹r äÈ8·\$[dÔØ A:˱W€2äØÁÇÎ vÈ Û%@:ò•Œ=äD¾;N(ŸU‰½ÊsŽ{¹K´cS‚ÈT]"ö ¢å¸KÜ.Êh!»…ê‘©J–©€ TwDûÙKÙéå )ÄO{#D/ÑrÐ)S³±­DpÎje9y"—@¥I ߈£Ò4’Ós¯–(ŠíÆ€“D¶WXù.Z±¿àQ2vè%8€¤JÔREùh0Û÷'¶RRÅh ’¼m7¥Š¯Ü(ÛZ)/±­nÜ}\Ýlo©(íH£Äí™%­–}µb?©òòvþ³“b¨B¶áTà+ôÆÆŠÑ>ÜU>J16š¶«â„ÔF•üœK B$p…("'#—Àf Kx¨€ ЂCðœÈ?ÀÒ× Á€ JÅ®.Ác9ñ3v׃F±'sdj=¾·QZJú0æ¦Ê¸)$‹oùéTa6ε!˜—Fž³tüÇÎÏ6Ç8@;Í€S-»Wj‚úY«2µ£qüJhSÎѸÖÜT‰øÔ–Ý‘d‚Ù@â=¸”¹*Sùteê%§ê^*¾ð…¾ži|®–Hì½÷ÞÎÒùÈG„€ßùÎwàÔx2aŒŸ`IÌç'@‚pK¢L@zýŒÏ÷ªùΑfp¸}ôÑÿøÇÝÅ¡MßÒxóS³ò¥ï#!)ÒÚ‡Ü2‘AcØø ÊhVu?–@^äH+¬AÕãZéPU´Á‹{©õŽw¼ã‰O|"®•‰[#8Ð72_úÒ—^~ùåˆ1QE#y\’h~ª\ ä\¹ºÉ)»Jà+_ùJ€åÊ•+>øà7¼á ÿüç?A(:ùä“Aé+^ñ b[ñ©± ˆ‚CÉ%&H\Já€^Ѥ/ÔÂ'‡´FŸЬXù÷¿ÿýu×]÷â¿8Ä®5Ga|+&ò#­–¸ÕˆÝ"®ZÓ¦¬:+Ï]u¨…ùA‰œ¨›•Q>žà"À\Õ ¸Å®MSËÑî$â+ËÊ¿ímo{ÙË^vÓM7Ae´eˆçç\(€+P)9I÷^ Ý þ}(Í〫P¼Á*Xº@‘L à“|…]’ ™@š€X;TÌ|²³«À)*‚:uÀï£ý(ôõ)ɸ”|4è.@FË*Jh- €=75ý몳f…¹¾V‰6£€R™ # A›F‚0Li¿ÚjÇ!çŽ;îpwwIJyfÅ_µIPˆWþ±}ì}îsŸ /¼0ˆDv6,ÐB~ä¨@ ä\JÉIºWK [ä o p"ø$Æ6üà=ì\¼xñ÷¾÷½g?ûÙðé~÷»ß‡>ô¡ßüæ7â?x¶Ï>û|ûÛßV,ÅSU“·@  ªuþùçǺèNÅ¡óUW]õ³ŸýìU¯z•4ìtþþ÷¿5’_÷º×¹ äÜÎÕ“N:iéÒ¥.=ðüüç?Ñažfü©O} BLJ<ä!æÌч†îû*{ØÃ¾ô¥/áÑ-`*Â^ò’—¸„>è ƒðïªÐö¸ãŽ[²d‰A F3jEêÇ{¬Þpý©O}êoû[u=®ÆàÓŸþô3Î8#FYˆ¬©üÈ%P¡0HÌ\¹*D"ÈÝà™G°/ùË3 È9á„”‰øïœsÎqõøãç\ܽñÆ–Ÿfb]‚Xï{ßûDÌ_ýêWýüú׿îÒyç—œ0|ºÑÓžö´@å ž¹—wÞy糞õ¬ýöÛO(‚UdxmpÁhð¬³ÎJ*O9å”ý÷ß_¦ôÏþóåË—Ë‘6mîI¶!‚´êý¾÷½ï•¬—ƒmËW_}µÖ »Kx|ó›ß ¼ÿøÇ?þýïÍk^ãÒGTO—ßýîw“†õW¿úÕ±MÄ ”¡¯[û|IäG.Š•@:Û“¹r Tˆü‚˜+Vx ¥@ à1Ÿ w?üá +¡Žh)yóÍ7‹,•TÀOx «.¹ä’€(iAsÆTû á"ÒKˆ>­ÿÊŠýøÇ?VìG?ú‘Ѫ;J ý4øþ÷¿ßO·s È}àJ?—Î=÷Ü(¬€˜Õ9ªÄX!þôÓOWÒ%L‰ÑEÀ‘ïüà?XPëÒí·ß®Ì7¿ùMeütÀé'<á PÄ^tÑE‘éœI,î{Àˆ¶3²by"—@J Ÿ‚ÖÍó#—@¥H [$ B@¯ˆ“×0ñ u /°}Šhâ²eˤ]h:?â g|u€™¿=üðÃ=™æ‡µéé²9[™*š¿•o©×CúPíƒUùÏ|æ3¸‚Ñ÷¼ç==¦²]…|Ìc¶ÝÅ-॥[~ ¾U<ôÐCã1¶4cÿâ¿P viŒ@eq-v.»ì2g·ƒßòÕu6ˆ¡†G¹Bp­)ã’ZÏ{ÞóÌu?þñ·–[ÐoTa” °Iéi÷%=SÖW\q…´ZÙl¶t~ä¨@ ä\JÉIº÷J3üKsc¡“ÅMðË€ÊU™Š‰}£< ¼¹õÖ[…’ ¸ª$Ø3O«˜"z˜ óD¥Jp%`…ãêJË„»ñ~ãß8äCþìg? ÝEƒ&«=Fêß*íµ×Z¶ §ã¾r‚´¡Ó€.P´„JEÚ®BMÅ`3ˆ5ÿì§L+S;î¨ð}ï{_Ô"O-xoL‰eðƒ¼à/øâ¿ˆM”¨¨)-„d2$ò#—@eJ ]æG.\•#7’ 5À y߀(`éQ¨Là•!ZàÇ’c1¬‚Ó®Â'ç?üá&fÕUÈ)Då?™`U]l“ ¤;­¾6Ÿ,ÇSØo}ë[oyË[„§ê*ÿ·¿ýMƒ. s3z,–ò–[n1y®(îì¹zõj´ÕEùŸþô'uá+°‡ÍjYŠ%p:ÜBE,k­örÉ÷Ò”´hˆ¿ôÒK­)óÌÛ24‡å Þ¤5Bù‘K b%GÀ«šœ°{£àØN¤9,Éš£@;eàe€p*¼jÕ*e<¸T€ P)ŒÅ&{] è2sk=lSË”r€·ê Sè ´ÜÚUGD¨*Š€ct´ÔK@РÇ2+ѧÀm°ö‡?ü¡¦Ôr@Vçç>÷¹î%`Õ¦ŸÈþË_þ¢$È €¥òcÊZ1†®BkCC $FJú‰ùfÂßùÎwbSü-ß¡ÍiK»)ᔲóS. •@n ª˜œ¬{§ ‰¨9¤ÉÌÊ'x¬Œâ¤-(+ˆŒpY¢ƒ=®vØaÏþó-f¶œ Lz{ê©§z˜ ¿A ,T€)l&Ùº-eÜNôìÝè(ðÕ”2V]¹ô =È#dNYÉ,,ö,öâ‹/¶=Èg>óXì½d=³–l–šmFö1ÇsöÙgÇ’i¼¹d-°c ¦fC ¨Eü6!yÊSžbµ³vä|âŸ0ªPÒCh„Ù®D™k®¹æ´ÓNÃÅQG¥)4€ÒK+llíçç\•+Ý ?r ä¨ À“´ˆaÏcÎ ÌT­?r""Œ¥ÈqU~,3VÌÄ5èUÐqÕª`/CVbULgÓÂYÚt®ZfaXd~ík_‹y]ÿð‡?ÜÛ·ò‘çìm]¡6Œƒà¸j‘6$†ÍѶLŒ8Ÿxâ‰òÑióç>÷9/Óa$Â9¤Å¬/zÑ‹,×Â`ébÑziµŒ^ùÊWÚŠR¾×¨Ä¾Øsaüîw¿‹ÂxwÉFž~šÁv6¶ˆKù9—@eJ ÿƒÞ¹*EPà-1ìmfwáSL>gT‚x ¨Ä1ÑÊ¿eF•¬¤LÕˆVZËbM“ÉàÓOµä«èvV>¶Øð2W@ªŽwDF$\ÍèD€ŸÑ¬òàÏtƒH—P,®Š§#6uÕGÝ:æ¢÷By£,3j)©³ßˆWlœL"#úQz”A€ZAaTÏϹ*S9W¦^rªr L·LP[iå}$x ÅÁdÀótÓ±K÷3¹òÊ+}LBX®™ ìw©É¼R.)—@þ xÊEœß —@åK@Äyä‘G EÀ¢Uè ½*ŸìŒB‘ºÑƒÌr„†"àü1p&Ÿ}¯U}Îx.­º"H¯6aó·`Ì®ÄÖŸò-‡Xfab#]ñTçÞ«%GÀ÷jõçÌç d3·ñ³|Aò¬Q¶ö;K˜?7‘nÕô¬ ?'òÞ)<¾wê=ç:—À¿H ^·Ã¦ºoÍ¢ Ò‹I WÈ.±~ýz,Ä»IÿÂdþ#—@…I €+L!99¹fHÙÛÆö˜´}*Êßî!¢vâ¶7&Ÿ³µcù«À;!¾¼èLHàÿESŠY —ëàIEND®B`‚rocksdb-6.11.4/docs/static/images/tree_example1.png000066400000000000000000000426141370372246700222010ustar00rootroot00000000000000‰PNG  IHDR¦g´óJiCCPICC ProfileX ­YyºÖ"ÿ²ñ7EŽEó·~¬‚‘üã‰ÿ”òOK(€\6ÿɉîE/¢½hzmC›zmAûÐv:þ­³ùšw‚ÿ~ÍiÍ£áІÐ?<*u*Ó*ßÿüúk+RèÐcÇl`b,À$*:‰+bWá@+ª¿’‚ˆšŠªô5ÎÀ‚ÓÚZp÷ÿC V@sÎ-éh¡G¸ çaÛ?4)8ŸX:¸]àG‹ÿ%Ká°Â™Á „€86©-°3` ì ðÞÐë! jv€T öƒƒà(e œç@#hm ô€»àx^À±1Þƒ9ð¬ B@˜2‹#’ˆ<¢†è úˆb‹8!žˆ/ŒP‘8d’ŽìGò‘bä$R‹\@® Èmä!ò E¦‘OÈ7 Š!a81‚)Œ2Fc„±Á¸`¶b‚1Û0ɘ L.¦S9‹iÂt`îbž`F0ï1‹(@™PnTUDuPÔõBƒPš‚f£…hZ¶ÂX?BGÐt‹Ç’±"XE8>-±®Xì6l 6[Œ=mÂvaaG±sØŸ8fœN§‹³Âyà‚q ¸L\!®w× çÎî çÆKãµáÜôćá·ãsðÇñ øø‡ø1ü"@à%Èôö !–I8F8K¸N L–˜„Ô̼¨ i … g®1 0¼eXadc”dÔe´g `LbÌc¬bleìgœ`\!²¥‰zDb1•XD¬'v_˜˜˜Ä˜Ö392…2íf*b:Ït‹i”i™ÄA’#™¶âH¹¤Ò Ò3Ò33³³!³s,s.s-óMæaæ%2‹‹KË.––&––YVFVIV#VoÖdÖBÖ‹¬ý¬3lŒlRl&l¶¶¶+lCl‹ìdvUv{öHöö3ì·Ù§8Rf•79ÆÈ(YœlBö'§“«ÈÝä N<§4§gç~Îsœ÷9ç¸8¸4¸Ü¸¹J¸Ú¹F¸Qn)n+îî<îFîAîoë×­ \·o]ýºu_yøy yy²yxžð|ãá5ã ç=ÄÛÌûŠË'ÇçÈ—Àw‚¯›o†Ÿ“¿?6#ÿsŒ€œ€“ÀvJ>EA!A ÁhÁc‚7g„¸… …„ „® M “…õ…C… „¯ ¿á1‰)陵=)z_tELZÌU,M¬Aì•8Q\GïïvVŠÏE_œ¯»ïßï{JeÑÏʯÔoÎßÄÿ¨ÿûÀ‚€é@½ÀüÀ·AzAùASÁzÁ‡ƒ§C B CfBMB‹CçÃ,Ãʾ†Û‡×„¯F¸G4D2DúF^¡rPé]QBQ‰Q£å£3£G¶én;²mŽfC«ŽAb¶Æ´ÄrÂÃs_œLÜž¸Ñxýø’ø¥·„‹‰ì‰Ôľ$¹¤}Io“Í“OmÇn÷ßÞ¹CtGêŽÑF;O¦ )~)»ÄweìšØm±ût*15<õ^šJZ~Úçt÷ôÖ ÁŒÝc{,öÔe²dÒ2‡²6d•íÅî Ý{Ÿú¾cû~fdßÙ¯²¿pÿ÷ÿœ;TXÍ Ê½Ÿ§•wâ þ õàà!ƒC§óÙó“óÇo:ÜT R]ðùˆÏ‘Û……eG‰G㎎Ùµ“8vðØ÷ââ'%Æ% ¥¥ûJ¿8>pÂðD}™`Ùþ²oå¡åOOZœlªª(¬ÄWÆWNV¹UõžÒ9U[ÍW½¿úG µfä´Óé®ZíÚÚ3gòê0uquÓg·œ}pÎô\K½býÉî†ýçÁù¸óï.ø^l´i켨s±þ’ä¥ÒËäËÙMHSRÓ\sHóH‹gËÃ+ÖW:[7´^¾ªtµ¦M´­¤«=ïñZƵÕëÉ×oDߘéîëôé|qÓãæã.Ç®ûÝ6Ý·zÌ{nöõ^¿¥w«í¶îí+wtî4ßÕºÛÔ§Ùwùžæ½Ë÷µî7õk÷·êylõøî»']ŸmyðtêYijùçñÏW^ì~‰{™ýŠíUá°ÀpÅkÙ× #Z#í£¦£}oœß¼ó{?3þ}"c’y²ð­ðÛÚ)µ©¶ióéï6¿›xý~e&óû‡ÒY™ÙK ?öÍyÌMÌÓæW?å,ð.Ô|ָܹè°8ü%òËÊ×ì%Þ¥ÓË:˽ßÜ¿½]IøNø^ôCöGëO›Ÿ/W#WW£)4ÊÚY…OLPŸj`ö€ü"˯œk‘ȱ²³ Õgº Ü-| !!‰1¸™É•äÈìÆâÄêÅæËÂA%grvs³¯ÛÌSÍ;Ï¿Q Kð™°ŠHŠècqy‰tÉWÒº2E²_ä].)q)'©ŒªÙª7j hej\ï¢Û¶QLo¯þ¬¡“ÑeÓD³g:–G­m\lë6Ù“Œ#Ž:w¸LºÝe=Ì=½½h›Ó·ÜZâ}Ò§Ú÷4¥Æ¯Æ¿2àDàÁ ]ÁÔïP»°áòü‘L‘ߨÓQO£{·5Ójb c÷ÄÅÆû'8%%)$¯ÛŽlŸÙ1´³#å쮣»SS#Ó<ÒM2”öðeâ2ç³^î½µïRvùþœœòrsóræ:¿ÿðÞ‚´#; cFùs+¶*ÙPªp\àÓ‰oe“å÷O^¬(ªÜQåwʲZ©†³fåô›Ú;g.ÔŸM;QïÚ`p^öùÂÏÆ©‹.µ\.mÚÝìßbvEº•¡uêjoÛ©öÔk[®kß`»1ÝÑÑYt“ÚeÒÍÛ=ßó ÷â­ÂÛÉw¼ï÷ÉÜc¹·xÿe׃³ ¤< ~ìðDwP|ˆyèëÓÑgÏ;_4¾¬xuhx÷먑­£Öo4ÇDƉãŸ&žM^[5•5þnÓ{¥ÒÌЇòÙðÚs¸¹¡ù³Ÿ2|?-J~aýòãëÜÒÔò›ooVƾOþøðóóêêZüÍ0F¨2:‹íÀeá J \ o{ˆÕL9¤xfKVm6Av&öŸdeNo®î>”W›/„ÿ¤À+!>aG‘\Ñ>q¢„…ä©Û2 ²Ör¹ò…•B”/«¢jöêÇ4Æ´µt:u™68o,Ñ55Œ4j4^6Õ7K7o³xj9mµdƒ·eß$d'm¯ì î¨å¤å¬î¢è*éÆïÎê<>z{ÝÛܺåäÖÝÞ›}4}Y}§)]~åþÛ\•ƒˆAÁ7BJBÂÃ"o"Û©Ç¢hѶۤh€ö<æbì¸àx£þ„ω÷“j’Ó¶oÞ¡±“´s2åú®¢Ý1©iúéâ ³{g¶f•ïݳ/<Ûa¿foÎêñÜ[yõóÅço>lP ry]xýhyQê1J±q‰x)¶tâxï‰Ú²ýåÔ“ê•\•KU/N]«®¨I;íWk|F¬­;{ó\U}zCÀyÓ ØÆñ‹Ý—N]Îh h6m‘¸‚¿2Óúäêµ¶ÚöÃ×R®‡ßðì°èÔ¾)ÓÅ×ÍÒƒéYê½5qûÕÁ»ý}·ïõÝïïøàÑÃÁ§ž?~ùdxpdèÍÓñg“Ïß¾x÷ræÕÜðÂë/#Ëo1Éq»‰¤Éš·Ï¦ÙßÙ½?4ólVîcúÜØ'Û…«‹ê_–Ô—¯®˜úú;þJÈ{ÌqÔË€½‚‹Çkáç - )Œ–DâS©9ŒÅŒU”õ';žƒ›,ǹžËŒÛm]ÏnÞ#|çø»†¿ sЍˆÚ‰E‰çJœ“ì—Zá‘5 –ÏQ8¯8¤ôCETÕ\-D=[ã¬æ=­Òz]Ë wé•ê×Ôž0Ê3N1 7õ034—²`´˜²ì¶:akcnËi;²éŒÍ^Ëþ›C»cŠ“®Ó¢óy—pWI××nÅî.,½ž©^º^ ›Ïn Þ*¼uÈû •/âÛFIö[ï÷Õ¿) 6P-p>¨!82D.d*´:, \$üUDq¤•LíÚm¹ ·í&-5F?æ{앸ÄxÍøù„úÄð$餱ä28R¸w ìÌM±Ü…;ŽÉ´ùôöŒì=n™â™³®îÍÚç”-”ýnÿåœÝlr¹sGóÎŒ?dœÏšÿòpmAü“BöÂá£uEIÇÌ‹9‹G᮹ó¸í ¾SeMå'+$+V*«ÎŸÊ®¨18ÍúKíÀ™†ºœ³¡ç,ꥰ ãço^¨lL¿pÉú²n“J³t‹ðžVΫìmìíœ×ø¯KÞPë0íô¼Ó•×ÝÐ3л|[äÎÖ»£÷öök<˜(<¨ÿTî¹âËMÃy#ŸÆvN*Nƒ÷ ‰ŸŒó–¾GÑãÿ«öFßðZTÁº‹Û\a6KÇ’pÿèÀ—õ³G`Ö’Ó÷wÿ@à.‚L°²Ä«BÊ`#Ì3=aµe;ȃÙä˜?N#(³DkÄÙ‰!—`øÂQÄlÂDaa.ažb~ b¨ÁLý=Ü‹Œ±Tl)öv'ó²l\î#^ïÏÅ÷à´ 4Â9Â;i† †j¸ZÉ2F2^`üJÔ'fû™ø˜™.ð$ ©Y€yók–ZV2k ë6o¶Gì6ì=0—é!Û’ŸpR8ç¹Ò¹y¹/¬sZ·ÄSÎkÍû•¯†ßS€Y K0EHWhY¸Ud§¨‰Iì©xµD‚¤µ”„4"="Ó)[WºD…@Eg%3e]UU95)uI iMy-5í :ªëeu%7ˆo”ГÑW6Ð5´6Úbk’gZo6`þÍRÊÊÍ:Û¦Óö‡©}¾Ã˜“Žó¸û¹—y¬zQ6÷lUô.ôE(Q~¯œ{ƒ C®„…÷En¡ÎEï£IÅÜŽÛ– œø89w‡} ç®×©çÓ³öøeîcÎÊ©È:h˜Ï~xüÈ•£yÇBJLŽ‹œX-­è®ª¯.:½çLÜY¯z“óÊ—8šˆ-¸V|ó5Þò&]”žÌ[wÞÜãíw}Xø˜o°æ™ú‹«Ãº#ÇÄ&²Þ¾y§>“<Û27» ²hý5fùØÊÕ#këP AXoЀõgX I@¬! „ ‘AŒ‘-H"R€4ÂØÂ1šÌ˜ßßÂÌ¡<¨1‰C{Ь8Ö›óói˜•;Â\¼÷¯ˆÆWâ_ [¥„W "  µ sŒ:Œ©Œ}Db ñ"ÊäÂTM‚I÷™u˜«`Ö›Îò•5’õ-›?Û{0û,G™@>Æ©Ìy‹+æ£Õë\yˆ<7ywñéñýäïÈtšî)sW–àø"9"uOºM¦A¶Z®L¾T¡Dñ„R•r½J«êµaõEMV-mïõqºyê6ÞÑ›1`7Ô5 4.0é1]1W³ ZÖY}°Q³MÜÔmÏë°Í±ÏYÖe¯ëŒ»£G«—Ôæ‚­8ïDŸYJˆßX€àxpXÈ\Xjgd]”iô(-5V"®/!.I<ùñ޽)»–S[Ò“ödá÷ödgä˜ä¢y½‡²Û!>+*+*U<¾XÖ~2«Òé”põÌé«göŸµ«'7¼ºP{1á²e3Ë|ëݶêk{nvZu)÷pßBnÏÜ}~ïnûà hOß>-îþ’øªùµÿ(ã›3ã–Ão©Sßޥ͠vÏ®ÌEÏ-¸|¾ñEþká2ø¼rï‡æÏ’ßñÇFÀø4¬4™Ã¹ ÒA h`Æ^±‚¡,XêCæ0\˜˜ÌL feG áŒ?‰>°±1Ø3Ø1œ œí¸<ÞÿŒ L"4¾Á*JÃ8£ã>Æ×DMb.qšÉŒ©’ÄDÚIúÄÆ< ëS¬Q¬+l9ìâì×9üÈLäVÎ(.y®÷pf§ò¸ñªð±ðÍñ t 6 U —‰”ˆ«¯—h“¼-5$=-‹È ÈoPðQÜ«tYyRU@Í]ý¨Æ - íhkºìB7vëKì3\0ö34³7ï³´·´ñ·]°£ÚO8ú8 »ø¸NºGyüô:¸Ezk‡/ëWà„¾.1J-‡gsé˜ù¸k yIþÛ7îäNYÜý4íZFmfáÞäl×µ\rÞÒ¡áýG–;X’v<¾,âd`¥ï)ŸïZJó9ÕÖóû.iÊjñkÕmcm¹^ß‘xS¯ô\¿µýŽæÝ÷*ú=² ì{t÷ 2¨=ñ´üÙÀ ðRñ•ËpÂë‘úÑŽ7÷ÆÇŸL<œìzÛ8U:½óÏ{Mxb|õ¡f–öQëãÒÜ•ùØOJŸ¦J?Û/bÏÙú•øõÒÒÖeüò¹oNß–VŽ7ø>ú#ý§øÏÎU_züc‚ÔaÍ^É–‡WW¤`­2€‡VWW*VWTÂdã%7"~ýŸCg¦ÿOTÚLG=&»éï_ÿ[ ÑX´Ôû1iTXtXML:com.adobe.xmp 773 166 «{˜o+TIDATxíQHéûïß=¿ ÿƒ¹Ià‡¹Ià½QëEš ¶XŰ‚laëBmÁvá_Z*)•.Ô.¤-Tºxp‰K¬¢.‚ÂAáÐÂ\7)’› üÀÂÿLcæøN2™™d¾sÑLžyæ}žïç}ß™'o&ö›‹‹ ‚ @@@Là¿9X;¤ƒ€€€€LõÆ€€€€Ó  rú€~ÔC   N'€zÈé#úA@@Pa €€€8À?úAÀ2’p¼ÿ!}J<Áo§¦ýnšˆ(þ~Xü~6ê+g%î¤þ*¯Ëˆ–\,KAZ `Ú|—$Éí–/'ØôÀú>^ð£dÓ˱ËÞÁá¾^WµÍüé»Ôj"+U Äåñ\ùÕ×ïs—6{’(ì<¿‡w„šFu4W˜2ßÅt‚NöH$/ne ÈÚYM`}ÈYý µö!ðåÃ~àarvZüÈöBQIÆë÷—Ê¡6jÉ4™6€µ´&滵ýbxtÔC†#EƒN! ‘98šUêžn•í™ÝÚ}Ðe‡L‡ÈdŽ Bò&pÈDpˆLæ€iðÿ—I§;©¿ ÄëòG'¢åÿª›Ù†c;ÚÔ® `$b‘œ·Ôž$InwýÿË4r‡aŸÍ´2Œ@-Ëd´ tî‰@g!ê™M˜3Žil‘ ³M¦‘ˆ[&ãÜ75Zry|W~õõûœq"ùÛ”DaçùÝp8¼#tý'pãø¢%3 ¸¸‚1Gröt‹ŽíH$?=­L0¦‘+†ì$¦t¾”š\ÜÊVNc¶É4VÎP½òÉTt®‰ íÌÈ“ÎìïwŒœ†l–¹z!YÌTn’Ì©Í4²Û£V.™šgwôõ;¿5=Pé“rYArb¹$1Wݧ¾b6WéRçIKf­6ë‚HÙtd4¶òE ö"-Wk7IÌfÅZöAÀ¶Ø#Y<ýqáõT˜#9{òGžÌÿ ºÝ=.o€œïÒáÌ4êâî{²ùlØïñø'H&'OBf›L£Ž@pÐ&à}·Ç߬ø0gÓX9ƒï5w[>‰¯½"å5f›L#_Çy]WÕɦ﬜ķ薌­.Ò‹n0'$uRúVëˇ2ôÐ+1󮱆o=Á ‡HÅÊ7 —¾Þ¾©©á^÷¥ { `_Œ‘\Èe½óîÜébxté“üùŽŽf¦±Yâéû3í—¯ÆÌ6™Æfá5IØX\œ ¦ÈÛ#~zœ9ã˜FucÚqgî™_‹†|¥5(Ù‘Ù&Өݬ£è«‡¤‚\Ÿì¾{º¸ø4ñá„“<]è÷ôÇÈò‡/„¿§È½Ÿ†¨ÛÓÔ¾Á[oßÞ§Å6èP.—/¿J×YŠóo’ÒÇ;¦Q·@Ix>ºp~#>/(!Ì6™FÝp€“€ÛÛ?1qûá”—ì'v>SæŒc™í1ÙôË•üØölˆ®T˜m2ÕS°SK ÁïË.Ý®.Å LLÝöúôÐìOn—W>æúaž,|8ì÷‘±Ç}ÕREí©Õæ¥ýêžËGß»ü|×Ux×±®Žd7Òù‡ko…|t%•.•¦ËX,Š¢Ëã¹:Õ,$!‰}z¼÷,ZvÕHÝ(, ºxGFéwg&—ÏÄ[!¢{^;ßÅÃ¥}šÓdø“’ÙHøÞúÞ°î@ºtu¿s£õ!Ú'b>W …lžîÉO|º½tõïL(@(ê륛Rûx£cdÿÑRjèñ¬¼>¨í©n³ä®þGÊÒí«#$“ÉdKC(NBúy8f›L#»UXAôïqw”ûf`ìñöÏQùdæŒcK¡xæ»Ç_yfD*ôž`_P^>vë»°èæßo...šÊû—èƒD¬¿8×DÄÚSè»Rý»ÚãØ3HÂV$Fަ¯ûK+:9ÔµMe”ÄÓý— Ëùõƒ·}MFRµ)gÄ42RmY&£M˜@ ãpOyf©'¶ÆŒSMCÌwëF†öúPÜzSßßS£†Å Læ(~%ÄK—*›¬RX—LÕuTJ?]:ˆ'_4[ Q.ÌYÈ4² ¶*“Õ&l Ðqx'‚ÖÌbÚ댘ïV‹&ׇ¬L±AÀ&¤ìç …”Gæl’SÒpˆÌ6C“]EÀ!Á!2YCõ‹ l   N"Ðè÷eNâ­   Î%€zȹ}å    ÔC   N'€zÈé#úA@@Pa €€€8ê!§è@=„1   àt¨‡œ> @@@õÆ€€€€Ó  rú€þæ H9AÈ6z§œé™ÒÈÓ*™‘ÉE¨‡XT`Òù±XZäðìh‡Èìè>Bò&pÈDpˆLæ€iòÿ·g¶#8Œ@¯3ô:D¦3:*›'à‰à™Œq€ÿÏ•&hL ³53÷ú¼ÖçaòàVÐ]c¬l†Ü²Kçk2/kUdÖ Ç>t=­‰PcÇ|ï†Q€z¨zL& ‰¹|ÑUÌü{@’p‹^¿?²½PT’ñúý¥r¨ƒZ2M¦p `-­‰€ùnm¿õáHÑ SHÂF$FŽf•º§[e;Df·vtEÀ!Á!2™£Ï1±À\ýó÷8Ü:ÝÅ!2;½›» 8d"8D&k´`}ˆE6'ÀïíÔÛÐ    À"€zˆE6'@=ä¤Þ†VÔC,*°€€8‰ê!'õ6´‚€€° bQ @@@ÀIP9©·¡@@@€Eõ‹ l   N"€zÈI½ ­   ,¨‡XT`p‹¼ÉYág *LúœN|WB2³•±B(oL€H™­ÊèP^m=kLBû&“N”îôæ1“,˜2öúÏ;Zöûòa?ð09;¬6,йžá_Þ¬‘>wÕæEÇCDH½>®š”)—ýZ(öx{}žKïzŸl:2¹D^j/‹„hzÖhà[déëñ±÷Õö^?É<š\x¹;ük UµèmÊ íÄ=üKò£·×Uø²4¹ðîpøÙˆ_ožíð7E»œ¸°»¼zN_éð¶õfI c/’÷zètwõxìLÄ §‰É…T0¾–ð÷¸z|âŒo'Gýî"_“Kg½Üx;ÁŒ’;ž[JÅ“{Ñ K*ww¿ýsÚd$N¯‡¤¬p^ÈŸ‘sï—Ì`Q]ÁPÐC¤ô7Wä+{`í`3TÛ'RýµþóÎâ•eXͿٞdßÝþ¡dr/Ìß ÇxÇ q~¦É$î¾'›ÏJ‰N D΂¿›yÚ ñK%µ§¾4·ˆX—|‹oÍÔNrÇ±å“øÚ›Ý;/m»¢[2>´ºp¨}ÿ÷åB«hÉmÒD™\âéû3í¿\r«0ûÕTí’°±¸8M‘·mpá7Q»¸3÷€Ì¯EC¾¬Ù=¬#ž‰@ä«ûê]ù/&vk¿vבnû]M"å…r4þóîLdc"¹9­ûæ-äY½ûîén~K ¯å¥i~ 4[¦$<]8¿_·Á$7U»ÛÛ?1áÿ6˜XI%v¦~½jëì½¶qÓ´gÓ/WòcÛ³!"Ùú¹Ó€ÐÏTã룄ò_ÞÏ=šÌ¹ŽžÑw¶ÛLBµ½Jþ<ÐÛóåã…;˃¿Í¿ã]׿¡T ‰å³±WqÛ~}h"ñ뽇Œ‘OŸÎþ:£ºïÈ×uï5Ç^)xJËûõ_„)‡˜öªq`bê¶Ÿˆ™ýÉíò^=¦Âï’ǽëò©m•C &ɤ3<û4ôxïYômÔZß´IÚ‰gpd„‘~wfrùL¼²Ã‡¾ök—ö)ñɰ¼ÒJ·;‘ð½õ½é>;¨W2ºòoûÈá<>y®û|#?=Ø—£Aí3®à 7eù}›/€òC“Áþ ŸB™ ¯ÚKéuÙYðÖ %YÂ~ℌmÛ¶*Ã7ˆ”Ù]Ú§¨¼¥¨Ì/Æ–6¦¢Oüæv>ê!6oQI!W …l^ìíq)¨n/í¯3¡@††CnI‹.í;€”ÍæIQ  J™L&Ûôû´}ÙIoU+¢1ÔF2i14K‘W÷ Y!_tõ–®€Æ§Þr‹j™­j'¥àró™Ô»3ræ0e2ÜýË÷S*þËs‰ûko‡ÖošçÖ¾Áÿz²¼|˜Ÿ·m1ÄDd8woŸ—A鉟¿õž¥„Dm¸8ÄD¡ "7+/܈o›|×o “ÿñ@äU‚óóL¶¯¯G~ŠÈŠí›‹‹ +âÚ*¦´{7²q#¹y«²:'ef"sòãÔÊæ½·÷ç´‡i¤ÏÂ&bRŠcàÞúæt_å´«¯§_ujë;3dJÂN$¶R##°~°Yó½š#¦îš¡þšx&rG6±Ç¿þ<.¶~3E{U¦<Ο>³E§WsªÛ1HÍ`šŸ±qyh úgM“åKå7Ûí1=êGé­Y@ÒO'—\ÛGf¯‚°$7¶™DeJòµr”}-Ï;dʨì¤&Ÿr¯veó“ÊõªdU— .€€€@gèæßÛwVO [« ²Š<â‚€€Ø…ê!»ôò°Šê!«È#.€€€€] ²KO « ²Š<â‚€€Ø…ê!»ôò°Šê!«È#.€€€€] ²KO «ts=$ ;á™É*´­ÅåOžß³µŒÚr6òüžmI´ ò+â÷lCšæ5É/“ßÓ¼ìÛ‰_&¿gÒ4¯I~™üžæe߆Hü2ù=Û¦yM¶(³Áí%î¤þ*¯Ëˆ–ÿ/øVuémS’$·»éÿ޵X$ç-eÜZxvhÞ6ù“ç÷dgDxSÒ8iæm“?y~϶bªd-ÕÎN‰iåÍ“y²#o þ.æ÷Ô3xódkgœMM„0®` Wf›ü2ù=Ù@ر­ÌTUF]ÚyñËäöÔÊS%H6ðæÉ:Y#»I¶UÝ,·LÂ祈¢máÍS»Î#¼Z“Ùh}Èåñ\ùÕ×ïsœ)s¸q·)¦wÃáH$/ne9Zf¸¸6 Snc&L#e*«IÙÓ-ú¾þé©nýÒ.ͽ²m|• úÚäOž×SúœN|WÊjf+ÓLJWÙIQÍV¦g­ÌL:Qî·™„PîMvw\UÓè$ ;ÏåÝ©´H½™Fkµ³Rbk×—§Š +Œ¤ÕÆ;’ áòd_C˜É·„HÚ™‘/ ô öÝâNõ ¦/—ÌR÷py2ó$¬)CôåY?Bth׈K&?vžm!Ì@ìî°ˆÒ‘õwÃ6a2âbÅ?Bê­ü¾A=äEÇoMOT§I¹¬ 9±r·sÕ}ê+fs•¢‚ÔyÒJZmÖ‘¾{_mïím¿8yýrW¸tÄl¶\a\[Ûv—W啤b¹ñôÇ…×SñäÁÁöã¡ý…»»UEœq$Œ½Hîí}¤ÛDŸG>«å69Ck¹&&ï,eî¯%iJ¿Môµž’;0¾Ü–·Ûñ„xzéJž¥2¥ìîÜR*¾þ‘vܺyh•@ˆ!m6)ž&ž.¥ò/¶ßFCAÃRr{üA¿¼ùŠ»ûd*>Aë>ke~=Þ ÇÑ>ŸÛíŸz6O>eªã¦®;øIºýCÉäÞÑæÛšs˜Fkµ3SRR®ÓÞbžì@í`5À›Ùe]C˜É·„°Ñ„ýÁ ü_°’}«*í4ùÊÊ“9eZÏ“S{ëšDQ9Mg[Fk00‡å@ˆênØ& ê@JŸ{±ªô3ïëuõP];Ùô•“øöÝ’ñ¡Õ…Cz¿ FㄤNJ_%|ù°B†âƒò‘áY××[ñôý‰öW/)ÄÛ75005,¯E²‰;sÈüZ4ä+}‚“Û,ä²ÞùaWît1<ºôI^8ÒŒV‘«sô«¾ðbb7[Z\j½ÍVÔJy!OÈ£IåÛ­ïv>Ëå«Q) »/OÈÔ÷r¯Öfsb½¡qrþ.-ä$Iüò™~'˜WÊyuwèiß zˆT”‘]n £Qæúaž,|8ì÷‘±ÇÊã2òÙjO­6/í—{ôF‰}z¼÷,z5K—–÷ÄÃ¥}ÚÈdø“ÒÔHøÞúÞ°›ÞFó×>Þ ùèB] ªI@E—GùL;<ý¢ŒôùF~z<°ÿ®ô5\£6µ2êˆüàK°?(ÿLppd‚¼NÐ…=O£”¸dÒì„ýÄ ÛVÊ!Jª:£Ä4hÇ3rÿíÑ}Ù!³53÷—GyºNÝ•å•Iïe²B—Ò^5«F‹µ+Y]M‰ö²¾¡Ø,ݬʰ;š×«”uv ÏàÈ!#ýîÌäò™x+dõì—¬TUÖ”iáÓÞ(P;ÆeÛõyV>h=BT@ä@êî°ûn8]¾¡D3΋Õe/µ×h}ˆ^ Ä|®@ Ù<Ý“¿õq{éªï™P P(Ô×K7eôx£cdÿÑRjèñ¬¼.¬í©n³ä®ú‡”ÉXŠ ¼º?PŸÞV¾t’Ý„ôópxæðÒ :W‡ÁC .mÛÛëq/ñÆ×’ãOphжqž/ÐG··–—ÈÐí@åþyü|tt4¢<©G’yIRV8\^> LõÓ³´©ÝŽaGܽ}^’J¤úEÒq*A}6ªQJ|2KóyùäFœÖÇåÍZ™4 úp–rßz:÷úüñÏß–úÑJº|2¥,ݾ 9Zce2Ùò£l £ÕÚ)ÑÒ5›¢w€UÆK;_Ù×¥ÇD#B^Ì~N½;#7üôRÙ P;ATÚfçɘ2 òäRu×jo¨’t[_˳£6O:eÔó½AžM‘‹!Õ=®A ¶‚(5ξ¶ˆf ]«vÑ^’2?ŽÎÉÏÎÈόス?§=¾‘䋩؃Øjɸ·¾9]ú¥ñO?ú´’ýþÛʃ>LOf›¥¦êþ‘¾ž¦è/äìAlR µ~°Ù§%EzàüòË­º3u¾¥+%ý†¯‡ôû‚òÒ{8ŸŠ=¢!©äo— T}ãSäSŠþ¸ŽÈ×7M–yGA74ÿâ7úeÝ|šmj´b¨Ù3øÛ‹©ÉG±Ò:Ø7ÛóröÚ)qɤ‰ýõ^^ŠVË¡FmªG£1Jþ¦¿L~϶&ÜîÆ[’ÙÿŸkfkfîõy-ø‡Éƒ[Aw=°v°rË.v3ÖäSV $_+GÙ×ò¬±C¦ŒÊn@jò)÷jWv1?y©\¯Ê@„FWoŒ¦GHùšËzéÎzHsù¢«˜ù=ö€$?þà*½~-~d{¡¨pðúý¥rÈvF­äÕÝ§å ™èbÛoÚ5êñ©5’»lÌ3µ3Rw¡¡3¥Áe„¨¯*UKwÖCŠ rocksdb-6.11.4/docs/static/og_image.png000066400000000000000000000423471370372246700177530ustar00rootroot00000000000000‰PNG  IHDRwnS ÄN DiCCPICC ProfileH –wT×ÇßÌl/´]–"eé½·¤.½H•& ËîKYÖe°7D"ŠˆV$(bÀh(+¢Xì "J F•ÌÆõ÷;'ùýNÞw>ó}÷žwçÞûÎ(!a¬@¶P"Žô÷fÆÅ'0ñ½D€6p¸¹¢Ð(¿h€®@_63u’ñ_ àõ-€Z®[„3™éÿïC‘+K€ÂÑ;?—‹r!ÊYù‘LŸD™ž’)c#c1š ʪ2NûÄæú|bOó²…<ÔG–³ˆ—Í“qÊó¤|”‘”‹òü|”o ¬Ÿ%Í üez6Ÿ“ †"Ó%|n:ÊÖ(SÄÑ‘l”ç@ ¤}Å)_±„_€æ ;G´D,HK—0¹&Lgg3€ŸŸÅ—H,Â9ÜLŽ˜Çdçd‹8Â%|úfYP’Õ–‰ÙÑÆÙÑÑÂÖ-ÿçõ›Ÿ½þd½ýäñ2âÏžAŒž/Ú—Ø/ZN-¬)´6[¾h);h[€êÝ/šþ>ä híûê{²yI—HD.VVùùù–>×RVÐÏë:|öü{øêù ¢‚¡[£°R¡Fá„ ”"MÑF1L1[±Tñ°âeÅ'Jx%C%_%žR¡Ò¥óJ#4„¦GcÓ¸´u´:ÚÚ(G7¢Ò3è%ôïè½ôIe%e{ååååSÊC „aÈdd1ÊÇ·ïT4U¼Tø*›TšTT¦Uç¨zªòU‹U›Uoª¾ScªùªeªmUkS{ ŽQ7UPÏWߣ~A}b}Žëîœâ9ÇæÜÕ€5L5"5–iÐèјÒÔÒô×iîÔ<¯9¡ÅÐòÔÊЪÐ:­5®MÓv×hWhŸÑ~ÊTfz1³˜UÌ.椎†N€ŽTg¿N¯ÎŒ®‘î|ݵºÍºôHz,½T½ ½N½I}mýPýåúúw ˆ,ƒtƒÝÓ†F†±† Û Ÿ©-5j4ºoL5ö0^l\k|ÃgÂ2É4ÙmrÍ6u0M7­1í3ƒÍÍf»ÍúͱæÎæBóZóA Š…—EžE£Å°%Ã2Är­e›ås+}««­VÝV­¬³¬ë¬ïÙ(ÙÙ¬µé°ùÝÖÔ–k[c{ÃŽjçg·Ê®Ýî…½™=ß~ýmšC¨Ã‡N‡ŽNŽbÇ&Çq'}§d§]Nƒ,:+œUʺäŒuöv^å|Òù­‹£‹Äå˜Ëo®®™®‡]ŸÌ5šËŸ[7wÄM×ã¶ßmÈéžì¾Ï}ÈCǃãQëñÈSÏ“çYï9æeâ•áuÄë¹·µ·Ø»Å{šíÂ^Á>ëƒøøûûôú*ùÎ÷­ö}è§ë—æ×è7éïà¿Ìÿl6 8`kÀ` f 7°!p2È)hEPW0%8*¸:øQˆiˆ8¤# ÝzžÁ<á¼¶0¶-ìA¸Qøâð#pá5#m"—GvGÑ¢’¢G½ŽöŽ.‹¾7ßx¾t~gŒ|LbLCÌt¬OlyìPœUÜŠ¸«ñêñ‚øö|BLB}ÂÔßÛŒ&:$%ÞZh´°`áåEꋲJ’Oâ$OÆ&Ç&N~Ï ãÔr¦RSv¥LrÙÜÜgIsKÛ–6žî‘^™>!` ª/22öfLg†e̜͊ÍjÎ&d'gŸ* 3…]9Z99ý"3Q‘hh±Ëâí‹'ÅÁâú\(wan»„ŽþLõH¥ë¥Ãyîy5yoòcò( z–˜.Ù´dl©ßÒo—a–q—u.×Y¾fùð ¯ûWB+SVv®Ò[U¸jtµÿêCkHk2×ü´ÖzmùÚWëb×uj®.Y￾±H®H\4¸ÁuÃÞ˜‚½›ì6íÜô±˜W|¥Äº¤²ä})·ôÊ76ßT}3»9uso™cÙž-¸-Â-·¶zl=T®X¾´|d[è¶Ö fEqÅ«íIÛ/WÚWîÝAÚ!Ý1TRÕ¾Sç–ï«Ó«oÖx×4ïÒØµi×ônÞî=ž{šöjî-ÙûnŸ`ßíýþû[k k+àäx\S×ý-ëÛ†zõú’ú…‡Eêjpjh8¬q¸¬n”6ŽI§¾ökÎ×:úçöŸð8wÝçúÅ7®Þœw³ÿÖü[·‡nón?¹“uçÅݼ»3÷VßÇÞ/~ ð ò¡ÆÃÚŸM~nr:5ì3Üó(êѽîȳ_ry?Zø˜ú¸rL{¬á‰í““ã~ãמ.x:úLôlf¢èWÅ_w=7~þÃož¿õLÆM޾¿˜ý½ô¥Ú˃¯ì_uN…O=|ýzfºøÚ›CoYo»ßž›É_õÁäCÇÇà÷g³ggÿ˜óüÑ` pHYs.#.#x¥?viTXtXML:com.adobe.xmp 2013-11-12T12:25:13 Adobe Photoshop CS6 (Macintosh) 1 300 1 2 2 300 375 1 110 ŘB66IDATxí|EûÇ)$ô@è½7é‚( +*¢(ö¢èk÷µ¡¢bEÅ‚Šï{ï *Šˆ€ "š4‘!!ÿïî’e™Í].—KØùavvÚ>wóÛ§ÎÄU­ZuË–- R@€ €¥J•*€^ƒ. (à¥@€4^jù€bK&>¶=½(PÀ @€2A‚Ë€bLebLР»€ (c$¸ (P ÆP&Æ º (PÀ @¢q\†¡@Ýêjß\YqúvŠVÿ¦bp+ @@K¶‡!³ÍëëÅ;•5]YiÊZ©ß«Og•.¡RÅU¢¨J Ù0¸Pà0§Ç¿À÷7ÌïàèVº±ŸNï-•–vJR¦TÄÊo]£¬,eîQ\MûI½nÖž¬0=· Ža‰)äß½ƒî¼PÇ'“VK»´hI‰O´éØrÍð ãâT²’g7/«Š)¥¤6¡!ÉÜ8|) ŒùÝ'щt×%jÙAJVYøbejêÆM&Mi1iÖêz.I­‘.W5“©=›U<9@“˜Áu@( Ì~?ƒŠe”ö¦’›ËBø—ÝV8>ˆB[5¨[ÚçßÔ:ºÃÊRå3´ÃÓ0KñET4ÉSd Ȧ@ —ɦDöÿ×ôÑã÷H›mˆÉ.´þhJkíš"ŠïŽK–…2®øRBíÎÑô…ÞA> @@¡—qJ@‹¥’µ9KéÛ<â%ÌU¬²;5 1@Ï©º”¤]éÞªA> @@½PfßOáü“´ìc½ô¤’0$y"§Š£èÝäŒMذkñlõ¸@s–ìë*È(àR °Q¦zwèQ¦ymM~N¯<«²•¤ß³Õ1¹N¹©„Æ>¥§é˹Ö*8L)Px(S9U³þ§eŸiÎÕb1ÿkÒÎÕ¯¨séimœFJ•Dvù‚¯É1!(ÕÓ×uÅHKc¤€BQ P&!^ŸŒT‹“¿VÍúhéƒ8ЩBI}>Z>l3/‹l ô@2*©Ÿg¤<üp£eK‹YÎxþ®0ÿx=ö†ÿ^PP  À~($Kvr’Zµ–Y^'Zdɉ…„oû=­÷¢óúàI•m,-°ÝyñˆqR1mÍH¼xlËySÿ¶qê 7϶üeÐì¢ôY¶Ú`ÓXÇ裩٭‚ÿ ABZëÛwêËÉRCMúTmûªÇÚâW¯†˜bA_ØC“ßVÙjÒ;\#Q k,NYLüÍÚ´ÝFa¸›$mÚ¨l”¬o5yó u¿¥ ¦ôPàP£@!ñ2mУjñ™ÞýúÀSð¦sô RÒZi%¥oŸòejóZ›Ê¥î¶x–í*•šqÏ Ç(þ†ÞZJT3U5sºúWJHÖwÓõçŠÿ Á  Àg¦ð'Z¦„¼N¿Ž+ü‘5âre­PÖÏÊúRYß*󗸛6‘.êֳî©ñYß+ëkeMTÖLûC6Õ¸ü]SŸ:³ † (p°SàDKÖ¨¨ÑýWª K»œîúZÃ^-<2ޤ‡[Š!K½‚"&A™qS•Eš±$¦&©Ä‹Á/>G_ƒE)H DEBÒ˸sp¼žyUe*Ú.üéjÕÀ½Sà™»èÆaA¸ö:Ø‘®"Åö\×óÏzÕ×dJvtŠ>¡lØ>Ó`€€‡ ;Ž©v%-ùFª Å?hĽò…µ?K!¤KNÐ /Ki6ã…VØ–â6 À¹°} “Á£·¤ÅgY¦¥õ6_âTÕO_«ÃU…0Ó`ˆ€‡€Ä´|VÌÖ›?éúG ”Íkê´*Ù¦ %`‡“@LÔ1¦hë?‰WÝß²MÓõ×ô]j…M‚DA (P Z ð/Ô¯Fj; ;;•Ä?¥ ­ÚàÆÄçl¤À¢”bgô2`÷ø.ΜËRÿAµW>hùêÌ¿úwYU¡|º‚½©²¿¬àÿ€QPÀ+ú«ƒNMKJÚc)b€üñ@¶òE8âr³úw^9­ïÌÖ×—‡‘¡¯]Ø›"N-š¨_]rª*×°ûÌÒ;ŸhÀÚ\¤€‡ eÖЗêü~¶*d›Æ}{z'Æé²35r°Ê4´£—Ù KŸU¹ ¸W¾Tz|RÖ^ˆ)­¯4xwb­7ï˜R§þvk‹¼Í*Q2ó¹‘3­ Y4ƒv«*áÜRƒÚêÙNƒNWÓ#$ìãôƉ,Ig Ñò&ªß_ÿ ¬) ÀáDÂC™ªåtýùºá©¼´Ü^ºÛuï«1&v¿žzp°jµ–6ÚJ„#e*~«ðå­Q<ã˜k3ثʲ%ý½¾È×Û²¨úØI+îkºÀª IÀ—4ÛöTÙfpÐËü­FGYv÷_ðµñ¥ÊåÕ¹¥®ì£nGIéaŽÛ;H0:½7|)3É FPÆG¿ à§@a LébÔOÃ*ñaµ½ü`ZèýÑú“Ë¥ã;hÔUjyŒ"`íCrðÅ響E©åw¼Øâ/ÓŽö(©hV©¢[W¡´­‘vµ0IÚ²1qáœÒõªn)›²Û²j—Õ}«÷mNwÖßâEtt]ÜK}»+ <¢-x”®?¿Wj)•©iƒKº–/Ô·ïê©75uÞ¾¶A. ÀáC‚õ—ai_rº¬rxú®µ8KúÈ”ióBU:Y;c¡§èÐ\#ª[o»ç•ö_/¾0¬D›»_o*¡ïf–›¾ u`ï?K”ʴЄDóT ~¸Ù˜—ÛžÞgî[wΈߖeY»÷(õmÜj=ÄCWZr_À¥˜ýh¨oȧ艇tó휤ÌRúú;}Go«,椀‡% Ö_æŒîsQ·­í­¿0›À,Цڑ¦N—ÆbšÖÑ—êì¾6v CAu‚´âèk³¤ðÏÅŧÍ/ß±éºZUwìÅç®Í}té´¡K¯ Ö‰(0A%*+]¿¤a÷.2siéŒqIE²,áº:ë8=÷‘ŵtM (pÐQÀûÒÙäQOüy[ zï…j¼üY1.•ƒÌáNëeÚ5Õð+Ôƒ¥Èš\f{îüǜ\^kêÑôÀ+ù"zjI]Õ_C/SBUûH6Ô± Ê*¯ûŸ¯7ôñ£îúÇ7#¦‰ÏÚ˳$hwz|Ú˜¸åËÂÜÃ|HLÉ™Þ:¶rׂ7qkƒn8õÏ!=– ¸±"')¡áZµh©Jeµf£~ýS»—«È±úõ#Ý3FïLr™€ö£@Ìx™Æ55þM{S=Nµ?Ã’fq²±D#mÛ®GRó1Eâte?-ùPÆ*P@ Ã*ló#™éqS~O…3™•Vzë–Ä} ËyóI‹¬·òšK0`;š £œ>û®Â9WýÛü’º  Ü~zJJɲqBS-ý÷ܽWgÝ«ËÎW‹~ÄdÓ'ø? @Nˆö·jªnºHÿAra™£|E»ÁÊg­¢"©a½ÿ_þ@7?¡µ˜–ó‘Îî¥QƒU£•­B^gsWâôYQÿ÷AµënR¯?^¸öWËíDT;<ú2Ì •Ó*Y;:ÞÐiöMÏë÷ãø;gYšiæ  À÷P“ú …^‡±*jíªÕ/J¥|#hPà ¡@ ´¿ûè‰[•T=[xa ³>Y™h+¶èƒ7tÓú}Y¾(rBG=t•Ž<ÆÞ±áw»+PÀ ÆrÒZõï²òÌvï&%gY0^0î’á/%(P.&»E\\VB¼%A!žÀíjuuÅeZ´R“Þ²[B+ìš5ªXAË&hé:mبåëµz½Öü­eë´u›6m׎]Öf£ðn›4H °Æ@š-[ÐLD“^ü¯.¾ÞƇ¹pð¥¦ÅG|÷nyRSçDӭۦ㑺o Žëeƒ\ápÔ¨`3ðG§@Á<„’5-Ë楴]Ÿ»05´ò§2úî§Ôñ_Ô»åìyõê v¦úê5pƒ•¿àx½<Ú–ž–Ø<ŽÓ Ï$a/j#ƒò¿—x¨te¤[X³î­Ü ¿7kùfMþUo~fÁ] na¢G™1C4h˜„§K‹Ä"„£IÔÌu×}ü}¾ˆÙ¬®îºLga‹-ËíìEŠÒzëë*ŸN©6ì¹ժﴴ3ÞT\Û¶$ž3ªmÃÚ›¾|þ^Ö®>¥¼=[° ¤ „KÔF\:%Môûd5꿯¯ö õÒH5io³QlŽx§áV¤7ÊùAàƒÌõÀ ´<‰š;IÇ^¡¿£Äsw¤ Pà £(“œœœž¾¿,ÁSœx”c»Ø1,W|Þjè9ºa„߯Eit¢ QÔïԸÔ´…„f„þY½n*Ž`’xÒÈö“'7KÙÒã˜uÖ¾ ^>¡œÞ›Tù¾çÚÿ¸´äE]ÒÊT´=wqÉÔ¬¹e2¶+¥l†Õ' Àä%kålµd/@{ªhqRó§)9]Éô'µÉ¶”oPÅÎê^Ac?u#È8,(°bòœ8Âí·ÛëÇYÞpEuç=þbž»ò6(S\WŸ«¡DQW¶wœB!RÎÆ/„' `_í8w8Ôd_™›ËRñd„«8Â#­ÀkGÆ)¯ÿ{¿Úy·W¯õêÙ£¾-Q"ÓâqHÀ *íµé¯Íâ¸]Ù™Ç'ˆÏñíÕ¿‡ZÖWµU(­x0|¡íNíø[¶è¯-šµH/}hyÊ,ÿÆÖ…;L–‘ÛÔöR3Ö’ž‚Pà°¢@4(Óµ•RŽ”–Ú/s^ã¥tÚ…ú`J¾è6è ¼FeêÚoþEvÏ•5ñûr¼vä-çýÚíè –Pã$˜‘2»0ïƒï6_ß{É^ÈYÉN…êÕjíÐÁ?Ö¬´µj¥–; w³ôÃB$™¤ÅËJ¯\[´a“mVC ¤h¢?&«um ËÒ}õ“ø’’U!EÕËeì&¡¶iÅ:+ì`Èf§ÕõËlíÜl)höì±¶µþf*©¢J Í R@ÌѠÌE¨c‘X¢¤?:_Ó¿·n¿PMÛÛÎ/à Y#Ié»ãï}¯Á7Sg”ÜÑõè qÀÁnû.ìÃFõ뽪ß«,‘ô1b‡‹fÝ}ýK)‹2Åáeöè쎫Þúôï6íV7¨±ÍRå ;Õõáxs“¶gc„=F¸?é»´r­õ •­PëóBÝ Ê v0hîϸtB[ãƒP\iºîÙÜ[åX£g'+Щy'Û@³4Û„äTÝ£ø„¬”b A\Jñ Ô '9l ú2 ûTÀ´B¡j_Òö¯¿N]šþ½üµ’³,|°à›â4|¨î|.»fð@€@wFÚ7{”G%áè/J*m†Ö£àÌc:º…î¤c{Ú ÅŸÙø@¸i·3³î;kaý”Ý—ö\‡jÆQÊ%TcÖ\:̔ӄò¢ÚùOü®Ìø<ÉI½BË——[4o e³uÙúj¦;d (P @(g”‰OPœÃM0Ÿ8íŽXÐp¦ß¼ž‚tæöR_ns1ˆBUìË5ö%l‹Ý3~·G4ÙòP§Ù–L„ö×™©Ã°8òâ’ L%µySâ‘·vÙ¼9iÞ#“ªTÛµ×–ìŒêtÈT ¨m12¯<§ËîÙ+¹U‚L@€A<£Ì?[,ßÖÒ©öd¶«z5I{†çšØ&îÖ‹4…EнW<a8^“¾-·m[‘ÞíÖ$šˆ,ã$¦†ìƒîdq!¦¢¾ø¦ü/´zѬ»®µ"œT\sg–Zö[yíJøjVùóë­ÜÛй ×2V³D¼_¦èúQú&`a²)wèý_µjÕ‡z(..nZwÜ•’“ÇÿÞ{ïzOz°Ï(Ãü^ùBÝúÙód%¯×°ëôÚD-ÉÉæ’œ ËÏÒ}WªT=ÛÜó»ÍS¸b}$âƒÏæ—–¯ÛòµÅ,Dpè€$E¼%2á4!Þ(1« Ê`Å—)±;Ží\ Ú¬z¶Ïyà›»Z×ÛdùÂÑfcp-^Kæé¹·ôàËûir²G*Ôÿ±doƒ‰‹6%$$”/_ž·tV–ûä9ôEJy“ãoùÏ?–‡WÚµ 5Þ~iÝ:B`þ‰/Ldfa¾P÷Ûܽ{÷Î;wìÀK-šD?Œf,§_gÄŒŒ ÆÚ¶˜ß ʼ;Yã–Øîù|wëWC³'¨Ï­š8mßl8Ýí¢¨¯RÕ#m­Ê_øñƒ#,wlV›†›ºŸ°tÃúbç³rŸ/ÕâµqmBÙ”L œú¨„ÓQ /hYq{¿cÓâX®®¤FtÑl«…G,«2{§G¸Ã˜·ôüûû&Vø¹ª•T·ŠŽo£¾gé‰ÑÖV{Q§êÕ«ÿþûïüÂÿPœ_ uø]nذaÁ‚S¦LùꫯfÍš“MÔó¨Y³æÂ… ) ó…ºß¦³ì—.]úÇL:õ§Ÿ~â›Ý¸Ñu$3ú6/K—.Mý’%K†Ëi㌘™™ ¨ñ{ûùçŸ?üðCFÜ´)Ç(㘮;C<+͵Y´ª° ‰úé;½û­erjTCýŽ·ðXqHj'+s…€ëu›ÍqÀ¶p¤7v¦’žPó†Ç:Ü?dÚÕý—Z'–8 XA%TÞvÞC)ãêkhEK6îÂÿhéb}û‹F¿¥éó²îÿ KÊêÞZ'wRë†J@eO›]ÖŸý0ú©Ô­[wñâÅQ·_¿~ýÃ?üä“OÚXÓ¸qãùóç{©tçw>Ü[ò/Éׯ_Ñ¢EQO†8çÿýïÏ<ó ï\;)S¦Lääï ¦xäÈ‘(¼m—¿B¨’|EKÎ|V­úJÎÓ=Dñ°àÑÚ¢a8HÜ*hÍt=ñ†®î£*5ìÈig^N[x—ÏåLÛŒ„Ž7už;µþ‘Íxt2Vm‹Í¡s„ útx"xb\ʹ»Y«×èÓ5þsMš‘Íþ8CÊßøDµ¬§“;ëäŽÖ™*‰ðS|ÀY^6øï0íªºôZ½øiô³©]»ö’%K¢oo·\µjÕi§6}úô|öó¯m~¡L>_îWðÊ+¯\rÉ%pn‰?“’’²bÅ xÿ­ÈK€ïcŽ99ò& ŒÃ DÞd_ÍÎCô} µêaŸ=´Õ—UÙb‘.Ô%SVª¤hÌXÝ÷¼¶íҰ˳õ,Ng@šo¿hÖÕ¶ÌÕž#ªmJX¥ ÊÜ)b96‡˜Ú´ÃÚÃeÝ&Í^¬æhþ -\ªÝFWÞn _µ¢jWQ÷V:á(‹y©^ÓzX+vá;ÎËû]xµQ0“È»ÄܧOŸ÷ß? ’dä3jæF .¸ S§N}ûö„©É­³p÷›4i2gΜ-ZäIÕ=ÊlOWëÖ.œçõ·÷|@û Ö¦ð&$ ¡´ÅÂd¬Ð OéÖÑ{O;kQO ¿C*Û ãУµ»w))nÏØK~½ü˜íþ¿ÃöÖ­© /꺧­P ÎÝÆNQ»díã[è i¨6ÒP«½ÒÐ^žDÜãA waÍjíÚµÓ¦Msi$%%ñúª\¹r°ŽfÂæÒ´iÓß~ûͼ\P Ì›7…Kb¢¹0)Aä)[¶lÅŠëÕ«çŸ#…¿üò ëÿ×_õßͱÓÛ_|Ú¸‹â¦D‰¨Š«U«†LgÜ­R¥Êĉ›7on”‡¹4&LÕo ª1ïè®+Ô®¡ÊVµ­B€‹£V®Ð—ïhè3JÃÝ.;Y0.Îs%k÷θ9?”©^a{År»öq4p7ÛUº\Æñõm–.õ“¬ˆD¶†rU4Ù]ÆÿU*¨NU{¤Né¬vM”ˆžÅ‘†ü<‹£o*ŒIYc|÷ÝwguV˜Ñ€XÜÿþ÷¿½zõ2ª}òÉ'ˆ`Fapy`)€žå¾ûî ?‡ZµjsÎ9_|q£FŒšßÿ=èà·²ÕœË~ø9+Ç[na›6m˜OÈ,ûR³fÍý¥—^ÚW6çòak…½ùÃlõ¼JµÎP‹uý-ºç. º^í¨Öiºøžý †n€ Kÿ¸±kCªù¬n›ËOéóXÛ Âð8n¢•¶Â%j3€§ÂMEtû@MyVioè‡WtßpuìªD扮}±==ø×ÎU¸ssF+Zn0\â½õîÝûŠ+®0êñc=÷ÜsÂàòÀR†%× ,[¶ì@ý„VÛ¨Œäõ×_7 C]/^<Ô-·|ÆŒ={ö|ì±ÇÜ's×]w%a.c€2Nïìêùå4=úºî§gßÑ´yÊÌiù±YgŒXŒ@‚öìŠû~!¦—"¿,+½iSKKJ¶ÐnT°3.²dé/ŒJ…˜`J~yMÃVÇnJäëpe…-åôh…8µ}Cùe¥}÷öÏ;vèСû—é¶Ûn3J‚ËKÈ¿Pæ‰áìÌ3Ï4&Œj^Ã(Ìñ2ò±®»î:ÌçÞNà‚s”ĽuÜ|ÌPÆí1×̸Ïmu ÖÉY:­¬ºñ¬ö«Ë¥²Y®Ý´¨²âtÍCMjfijœW5™ú}y®}Dz§«q7iªÞ} y–˜<ذaà —Þ‡¸QĤó “BwÞyçž{î1†ö³F…(.o¾ùf£ÕI'd”„º<(óú7öV›ÀÇZÝyUÚ„÷^¹r–1Žy¨Œf,HyòÅ#ÇŒkþýœT+è‰Då5VdC¡¥çoVO„‰yÙV³B¸€Â_Æ;š?CäöÞ òî¾ûnæk×®¹ŠÒy}4D'£I«V­Œ’P—ùÕþ†ê7Lùªõš4Q]aôðüÈTѲöù°(k½i;ËК‹¬DÎ0q,VÅõWš––â÷Þ‹téMÒ[|;8†xù½õÙgŸÁÑx{騱ã[o½å-‰0ZVT 8ãµ…f‹<Ù8ÃŒ•ššŠ9ƒˆ ¸-¼<ðC…sFA1¦a¬n)R„ç ÓzÖJ•*•+WΙ!•7oÞ¼uëVÖ/àá ¿ûíÛ·ã!êu‘€eˆ|wÜqü¹·¹ÑF{‰ø—«¯¾úË/¿4*„¿ìÖ­«¥Aƒá«ÁF]h',÷Þ{oøÊ¹ÞÅ.óí·ß.*¯½öCþ¶-[¶äVŽë­ :ØiĈ¯¾ú*^sÞ»1ϯY³@Ä“Øí9GŸ÷n×Ѓ¤ös`^ÖìG“…ãŒ%ÙR /ÃÇÁ,8¼Ÿ€!2L°‚v.Ô›ßØ• òO«Úúè%Û³Ž˜gn9Ü鮞äÚøéxoyó¼ÛñcI‡oý† âñ•'¸á†¾þúë\!Æ;ÊÌ™3½—Ñå?ÿüsb>ýôS$ÀNÛ¶mñ|ËbŒiüõ?åO #΀ž1ÎŰ|§¥¥EØÿA6ŽƒDH¥´}GB»[:·ÿo§;¬ˆGŒ 'SNO¾kç òOÕýøšÅOâ…è¦D+^b£¡€ÿ?ꨣ¼…nÞ€-·œ ¼Ïƒ>è- •4h‘x¡îŽœã-üÊr,¼^Í”·>|Á‰'žè-qòÏ#¹S CÅ:ÿßÿý_¨V1,Gxôö3…o··$Ÿy^3Æ &râ‰é¬Îö.œŽ ¯”~˜š:}r Åeý8oa·îë÷š´¡ Î2Õ•1_7Í'‰riž§ŸÆ+¹ª­Ž90$Ée†±ºÈoo‘œùÇ®~/aXÔÆp„_Ò-›`®:ãŒ3uñöÓM7ñþ¿ÒêÔ©3fÌo+òOÀSà Œ^M*ÓFÙŒP€7*xË’ÏÝsPýø=†Ž;î8c&Îå5×\ch"€vá#ŒÝPx1”Áª]»vHK<;uж c‹œéùƒõqS`»†Ÿ%ŠB|óŒV}ô‘Qêò,©šåõ¾`H%{l))C©eÒ•È©YeSvïs¥…—A»TV_jò1+ŸšfÄÚs;w®Qâ½DL0Lð>¼«Ãèyäî¢"õöóôÓOùâ-qóð>˜xÜK2,~cKMï]'Ï4œýŸü·",A‘aˆ/¼ð‹/¾˜csV,|Š÷Ö¨Q£ ˆñÞ%Z+ „2nÄ¥Äsµ4GŽ}¼ŒÐ‡«®ÊÃË¿0P¦\i¨”RjYG qäA–Û–#‡Øè"tVô½¶Ò× )`^Gè‡7Õ£rSµr"f•ÃUÐþÚZ´ˆŠ&YûEáorKR»0>NÍj«×™Òoö¨Ÿõä_½(\Ã<ôùçŸoð>D]†§+˜˜¯ó;¡æ(VÔaWŠ0SŠÉ-0ÉËÛh{Ùe—yK¼y\c¼—ä#w2òk‘®bré§m®ÖCƒUôOA…7Ñ’3Kû?øë‡*)@”I)a­s—Ú6ÒåwÛ»‘çà®uÉ;,@î¬Êž!yTãu5ñuu’]˜—ÿg>£Š¼×Ù¼ ƒü5>2,åiv† ‡zâ¥zå•WzŸW+Ckè½KÞˆÇCÅ€;ŒQ'ÇK´^”ª† bHR4Dä¹è¢‹¼=à`â_0Þ ùϣܽñƽý¡Ô1N5¿_L>UBÞÑc’÷ëàsµ1±% ¼›ÁÐ1x¾ü!1ÛwÇwë­·æiÎÞåž§†¹T>¥ù:Ø\Õ±-lpY”Ý$ÔzFMƒZ¤žUíéuUHƒCv?9ýßµ¥*b YfkŽ¡'9÷Ò)qø †Á˜ Ö óܨZ ÙÁ'L}ï-TÂlträ‘Gº…ÄûQ†! V<¼žØí-ê ‹çã?6šãà~ÿ7¿pa˜u ÿ7(ÃÊPóû§Äö1¡ÄXeJ˜|ê©§¢’ µâs%ÒÂSÛëƒÉâL• ¶zèävs˜ÖàKQ©ŽTKógé”QB #ôï|\Ü 3™ƒêV„žQXg­ñÙ5ÎÐhwÑ¿xKг¾ñÆÞ’ðy¼Ñ¼p2ök(üq½‘ÛG½Gž÷Kˆè5±—…ïÁ¯õÈÑÚ¾“½ ;f@¡ñFÉçè(¶QG1Œ{^æê3õä(kmš`=Wùqž¤(`¤{¿Þd{ã«âÖ^-³¦iô½ð¾Q/—õ«©]w[ø:å”SJü~Á:¤-æéÁ˲Ïo<ÌP†¸žP¼ ïjïé¨ •Y_k˜Eùʼ@á7h4ñƒˆQbÔp”\«ÁRyßÛ¼GÍö7¹6̱FN†1ä”5ÃFfÔçî?þˆs3[»·b~éßñ+ò"œ 1ÈMxÙ¸õ‘›"A™¼ILíêë—74àõ>JõÔ\´pçÅòXU±ðeý:==Lí³ù'¶·aÀ¸Y­ £òµsŸ*Ç çÆ6è|x°]:„ñïÄJmœìã€úÀm>çbp"G¾¹s×`UõV†zÈÐ_•£¾dÃJo\ŠRlCpQwˆQ‰!пpZ ƒ@&è”Ùý/ê±"lèå[&l$aÛÈ«yÉH+ü°"ñNÌÊôí¢iï¨DEi²>¥—î·%´­¸ÃÔÔâßu×ݪ}ª®zpß‘&½ÚÚ0ÊTÑÏïõÓ‹ü©"©9â{_˜Ùª"ñ`©sûí·J;~ögŠdþhaP zkâ5뽌$oˆH~yÍÀ•""’:ì#çj¢@ ß™Hº2êp$î6(GñKBKúòË/³=Q‡ËB÷Á®×‘<¹qŠv„·gìdžŠÔ„³á` ûíöyÆÑþ°Ý”á Ïôü‘w›cMŒ÷Äm <ظ[ [ÿò ÆÁÕ1WÊ8“ch¥ñ¤þË\P&1A_=¤‹GØZðGÞFJO×Û¯©eu¤ÏCÛ¤ \´ð¦»–VMÑSý°ý3Ü[R­²º¶ÕWjîx-y]Õ !ªÛÈî§ d!›î7ÐAFMâ#ùÑŽäþ˜£ð”õóð~k‹á±»d8é„"»®&•­±U0ÂâDØOäÕØúψÞð7GÞ[®5„:‘+àŒ†á/ýü¬ÿT ¹  †Åû^ÓO/+ýK¢13ZuNWß[5{¡¿7«¤”°.[gà†§K£²^W*§ÍuÛúi¬–½¡‰/éæ[Õ¯wf½ÎÞ=“¿é6{µwÌ࿜)pê©§7Ð „Ñ;•9ÚÝÛ ^­·$|ÞpACäÿEG‘ ¿´Kþ¾†ÊÙ±…–(îçU0èLìþûÑ’î—Bmǵ_¥¼_;Ôø¥`¯¹¡L–>žª©f_58G5OÖà´j­¿Ÿ}%8ûNMX—p–±° Žf~¢Ï¦ï«k®X¼÷×—O(í-ýøªF ÓQ]”Zm”°‚C€ˆ„Ýê03ZçJ·0À Ã:@å\÷m5$&šäi‡4 ½Þ)ålíÂbKo«˜ç‰{ð§‰ÏQ$ZÌ~F¥‹ÛU·j@^™ %4c‚žzFÇŸ¤¤<¾vxÜ y‰`ôÜfw@ïÇYÛz¨„bÒ°±š×#Ö?1À8˜{ËñËò^†É³[°×E…šl°æ¯wcÈeWæ¯ÛFôÇa³‘mlGq¶wû4(·<ŸÄRðD(ª|ŽÅÆÆ1OF„}¨þcÿ«?­£í¼U –’øÿi~Z¨ÑÍòÆUµàC5aWxŸ¥¶} eС‘°åÇ)³@\[#"FvÉ4ªò5´FƒÇeªQÇIŸ†ÿ>áË~Õ¯Óð‰'žðöÀ¾Y‘茼M¢Èãä‚IÈÛ+ÿfãÞ yͬ\Žî¼öiÔG6LËT $üædF'^¢Wò‡›Ž92’æ1F™Ê©ªWKÚl«KÊXnxG¬YkRM3ßPj[áÂÜ1ÌÝ.ëPÍ—Ø£Àðg.ášE…i4á—=|øð0 C°áùríµ×†jâß¹‘±éD¨¶ùñâ#(É`î(ñœâMtòþóŸÕ+ìªal3 :3V„’'±@·ûôìÝG02‘¦±E‘“†¯?¼É>s¼ÌVÕæx3ï… ð Iͬ¨‡îÖæÈļvл£O<Ô¢‚ØZ+ïOÛ02啾L?þÛ~óÜòˆao:öØcáp¢ ÕwˆœýØ)6öÄ«ÐÅ3…Êa)†5 tË1šÙ‘¥ŽÝ×0·Ï˜1ƒþÙß ¯¬©›ß:|êFño–ê)üå„  Æm×{‹CàØ9G^à`ÿJ„*`ÁAX@Ÿmp(øÅ°% Ò"xäí–<{Y%9^òu`Ý÷b™c »Ùц'jü ¥E‚ †fŽ£8…œÁ®§ÀF¤§*N¼`8ôñ†ÞÇi¹l‡¡!rH 3cçÖ°‹uç½Ò|KVÚ•¦¢'äÚªpI/½ÀW€N× Vˆ¨ÑÁS ˆel®á7éΗò5m~^†JŸšßï.üü°üš`ìGa¶bbˆ]lÐüá`Êë—_*sã˜1ã÷ÊLpÜð;¿ûgˆnØ8/Å©Ã2æ€$T¶0G˜QÑ x·æ§Ðƒ Ó0Î0!¾1ÿ…>›˜&c>ìzc+0‡Õ©§½/8¿ñ°‚ ç £—4~W=:,ü§µí ¹óÎ;Ó#B5bz¤‰Cf¨¼åÄgDhÆabÌËœÞÉÖ×V²·tµwV!ó7õ—uÆ)v«¿E.E 4n¦ÙïåbBR07ø!² ‘qöh…çH¨ž°Fñ«24ÁT&BÖ=T+ʸž0Ü[¼Ã,V©[âdà\HF¡{I}|üÝË(2°8›+‚×9çÛÛ¯}b¸ ǀ抡˜´s„oÿùÌÃ^ñÎGùì3TsÜaëÂÀ™¿a,õ2õªªAaU´;Sîո/ýÙ%OߨÇØŒLIû0Éb‡ÐßÒRmÞ­šü¶: 2Ÿ=Šk¿ÂïI·Æ¢¢ |DÕ xqaæeIÿN:¸WDhÐE9Må¼ú’ù„¡ýTò þùs¢‹ñ\†™~Êß6×v>çœsBUóO5TÍPåhF`NñcÊb!(T?‘”ûq "až †žc‰2[whøP]3XuOÐÐWs™v©bšp—®¼ÝVÄàƒX¡úùÀÂdjóÍúYWQ—Ú 3¼ëÃê—Åéæ#ÏbÌD[¼ï ;¥Ñ!ÿTÀ:cxµÕ¸ÄÇ%1[êæieò,h[hÈn þ'5F!ÌcJŽŒŒ¿­±¿„Ñ—8Cš`¶›ôž:Ĺ´hðßCÉžÈPX¬8¯ ñŽîŸª÷®‘‡>t»|ùrÔaÀ1{óuÀKFx¶ c›Šýû/‘F‘£ùº±ý£‚›Ã•!Šíßc¬—ñO4TI©âêÒBK×(çÝXb]¨ µ& target) : target_(std::move(target)) {} Status Read(size_t n, Slice* result, char* scratch) override { IOOptions io_opts; IODebugContext dbg; return target_->Read(n, io_opts, result, scratch, &dbg); } Status Skip(uint64_t n) override { return target_->Skip(n); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } Status InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } Status PositionedRead(uint64_t offset, size_t n, Slice* result, char* scratch) override { IOOptions io_opts; IODebugContext dbg; return target_->PositionedRead(offset, n, io_opts, result, scratch, &dbg); } private: std::unique_ptr target_; }; class CompositeRandomAccessFileWrapper : public RandomAccessFile { public: explicit CompositeRandomAccessFileWrapper( std::unique_ptr& target) : target_(std::move(target)) {} Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { IOOptions io_opts; IODebugContext dbg; return target_->Read(offset, n, io_opts, result, scratch, &dbg); } Status MultiRead(ReadRequest* reqs, size_t num_reqs) override { IOOptions io_opts; IODebugContext dbg; std::vector fs_reqs; Status status; fs_reqs.resize(num_reqs); for (size_t i = 0; i < num_reqs; ++i) { fs_reqs[i].offset = reqs[i].offset; fs_reqs[i].len = reqs[i].len; fs_reqs[i].scratch = reqs[i].scratch; fs_reqs[i].status = IOStatus::OK(); } status = target_->MultiRead(fs_reqs.data(), num_reqs, io_opts, &dbg); for (size_t i = 0; i < num_reqs; ++i) { reqs[i].result = fs_reqs[i].result; reqs[i].status = fs_reqs[i].status; } return status; } Status Prefetch(uint64_t offset, size_t n) override { IOOptions io_opts; IODebugContext dbg; return target_->Prefetch(offset, n, io_opts, &dbg); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); }; void Hint(AccessPattern pattern) override { target_->Hint((FSRandomAccessFile::AccessPattern)pattern); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } Status InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } private: std::unique_ptr target_; }; class CompositeWritableFileWrapper : public WritableFile { public: explicit CompositeWritableFileWrapper(std::unique_ptr& t) : target_(std::move(t)) {} Status Append(const Slice& data) override { IOOptions io_opts; IODebugContext dbg; return target_->Append(data, io_opts, &dbg); } Status PositionedAppend(const Slice& data, uint64_t offset) override { IOOptions io_opts; IODebugContext dbg; return target_->PositionedAppend(data, offset, io_opts, &dbg); } Status Truncate(uint64_t size) override { IOOptions io_opts; IODebugContext dbg; return target_->Truncate(size, io_opts, &dbg); } Status Close() override { IOOptions io_opts; IODebugContext dbg; return target_->Close(io_opts, &dbg); } Status Flush() override { IOOptions io_opts; IODebugContext dbg; return target_->Flush(io_opts, &dbg); } Status Sync() override { IOOptions io_opts; IODebugContext dbg; return target_->Sync(io_opts, &dbg); } Status Fsync() override { IOOptions io_opts; IODebugContext dbg; return target_->Fsync(io_opts, &dbg); } bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override { target_->SetWriteLifeTimeHint(hint); } Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { return target_->GetWriteLifeTimeHint(); } uint64_t GetFileSize() override { IOOptions io_opts; IODebugContext dbg; return target_->GetFileSize(io_opts, &dbg); } void SetPreallocationBlockSize(size_t size) override { target_->SetPreallocationBlockSize(size); } void GetPreallocationStatus(size_t* block_size, size_t* last_allocated_block) override { target_->GetPreallocationStatus(block_size, last_allocated_block); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } Status InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } Status RangeSync(uint64_t offset, uint64_t nbytes) override { IOOptions io_opts; IODebugContext dbg; return target_->RangeSync(offset, nbytes, io_opts, &dbg); } void PrepareWrite(size_t offset, size_t len) override { IOOptions io_opts; IODebugContext dbg; target_->PrepareWrite(offset, len, io_opts, &dbg); } Status Allocate(uint64_t offset, uint64_t len) override { IOOptions io_opts; IODebugContext dbg; return target_->Allocate(offset, len, io_opts, &dbg); } std::unique_ptr* target() { return &target_; } private: std::unique_ptr target_; }; class CompositeRandomRWFileWrapper : public RandomRWFile { public: explicit CompositeRandomRWFileWrapper(std::unique_ptr& target) : target_(std::move(target)) {} bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } Status Write(uint64_t offset, const Slice& data) override { IOOptions io_opts; IODebugContext dbg; return target_->Write(offset, data, io_opts, &dbg); } Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { IOOptions io_opts; IODebugContext dbg; return target_->Read(offset, n, io_opts, result, scratch, &dbg); } Status Flush() override { IOOptions io_opts; IODebugContext dbg; return target_->Flush(io_opts, &dbg); } Status Sync() override { IOOptions io_opts; IODebugContext dbg; return target_->Sync(io_opts, &dbg); } Status Fsync() override { IOOptions io_opts; IODebugContext dbg; return target_->Fsync(io_opts, &dbg); } Status Close() override { IOOptions io_opts; IODebugContext dbg; return target_->Close(io_opts, &dbg); } private: std::unique_ptr target_; }; class CompositeDirectoryWrapper : public Directory { public: explicit CompositeDirectoryWrapper(std::unique_ptr& target) : target_(std::move(target)) {} Status Fsync() override { IOOptions io_opts; IODebugContext dbg; return target_->Fsync(io_opts, &dbg); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } private: std::unique_ptr target_; }; class CompositeEnvWrapper : public Env { public: // Initialize a CompositeEnvWrapper that delegates all thread/time related // calls to env, and all file operations to fs explicit CompositeEnvWrapper(Env* env, std::shared_ptr fs) : Env(fs), env_target_(env) {} ~CompositeEnvWrapper() {} // Return the target to which this Env forwards all calls Env* env_target() const { return env_target_; } Status RegisterDbPaths(const std::vector& paths) override { return file_system_->RegisterDbPaths(paths); } Status UnregisterDbPaths(const std::vector& paths) override { return file_system_->UnregisterDbPaths(paths); } // The following text is boilerplate that forwards all methods to target() Status NewSequentialFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { IODebugContext dbg; std::unique_ptr file; Status status; status = file_system_->NewSequentialFile(f, FileOptions(options), &file, &dbg); if (status.ok()) { r->reset(new CompositeSequentialFileWrapper(file)); } return status; } Status NewRandomAccessFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { IODebugContext dbg; std::unique_ptr file; Status status; status = file_system_->NewRandomAccessFile(f, FileOptions(options), &file, &dbg); if (status.ok()) { r->reset(new CompositeRandomAccessFileWrapper(file)); } return status; } Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { IODebugContext dbg; std::unique_ptr file; Status status; status = file_system_->NewWritableFile(f, FileOptions(options), &file, &dbg); if (status.ok()) { r->reset(new CompositeWritableFileWrapper(file)); } return status; } Status ReopenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { IODebugContext dbg; Status status; std::unique_ptr file; status = file_system_->ReopenWritableFile(fname, FileOptions(options), &file, &dbg); if (status.ok()) { result->reset(new CompositeWritableFileWrapper(file)); } return status; } Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* r, const EnvOptions& options) override { IODebugContext dbg; Status status; std::unique_ptr file; status = file_system_->ReuseWritableFile(fname, old_fname, FileOptions(options), &file, &dbg); if (status.ok()) { r->reset(new CompositeWritableFileWrapper(file)); } return status; } Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { IODebugContext dbg; std::unique_ptr file; Status status; status = file_system_->NewRandomRWFile(fname, FileOptions(options), &file, &dbg); if (status.ok()) { result->reset(new CompositeRandomRWFileWrapper(file)); } return status; } Status NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) override { return file_system_->NewMemoryMappedFileBuffer(fname, result); } Status NewDirectory(const std::string& name, std::unique_ptr* result) override { IOOptions io_opts; IODebugContext dbg; std::unique_ptr dir; Status status; status = file_system_->NewDirectory(name, io_opts, &dir, &dbg); if (status.ok()) { result->reset(new CompositeDirectoryWrapper(dir)); } return status; } Status FileExists(const std::string& f) override { IOOptions io_opts; IODebugContext dbg; return file_system_->FileExists(f, io_opts, &dbg); } Status GetChildren(const std::string& dir, std::vector* r) override { IOOptions io_opts; IODebugContext dbg; return file_system_->GetChildren(dir, io_opts, r, &dbg); } Status GetChildrenFileAttributes( const std::string& dir, std::vector* result) override { IOOptions io_opts; IODebugContext dbg; return file_system_->GetChildrenFileAttributes(dir, io_opts, result, &dbg); } Status DeleteFile(const std::string& f) override { IOOptions io_opts; IODebugContext dbg; return file_system_->DeleteFile(f, io_opts, &dbg); } Status Truncate(const std::string& fname, size_t size) override { IOOptions io_opts; IODebugContext dbg; return file_system_->Truncate(fname, size, io_opts, &dbg); } Status CreateDir(const std::string& d) override { IOOptions io_opts; IODebugContext dbg; return file_system_->CreateDir(d, io_opts, &dbg); } Status CreateDirIfMissing(const std::string& d) override { IOOptions io_opts; IODebugContext dbg; return file_system_->CreateDirIfMissing(d, io_opts, &dbg); } Status DeleteDir(const std::string& d) override { IOOptions io_opts; IODebugContext dbg; return file_system_->DeleteDir(d, io_opts, &dbg); } Status GetFileSize(const std::string& f, uint64_t* s) override { IOOptions io_opts; IODebugContext dbg; return file_system_->GetFileSize(f, io_opts, s, &dbg); } Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) override { IOOptions io_opts; IODebugContext dbg; return file_system_->GetFileModificationTime(fname, io_opts, file_mtime, &dbg); } Status RenameFile(const std::string& s, const std::string& t) override { IOOptions io_opts; IODebugContext dbg; return file_system_->RenameFile(s, t, io_opts, &dbg); } Status LinkFile(const std::string& s, const std::string& t) override { IOOptions io_opts; IODebugContext dbg; return file_system_->LinkFile(s, t, io_opts, &dbg); } Status NumFileLinks(const std::string& fname, uint64_t* count) override { IOOptions io_opts; IODebugContext dbg; return file_system_->NumFileLinks(fname, io_opts, count, &dbg); } Status AreFilesSame(const std::string& first, const std::string& second, bool* res) override { IOOptions io_opts; IODebugContext dbg; return file_system_->AreFilesSame(first, second, io_opts, res, &dbg); } Status LockFile(const std::string& f, FileLock** l) override { IOOptions io_opts; IODebugContext dbg; return file_system_->LockFile(f, io_opts, l, &dbg); } Status UnlockFile(FileLock* l) override { IOOptions io_opts; IODebugContext dbg; return file_system_->UnlockFile(l, io_opts, &dbg); } Status GetAbsolutePath(const std::string& db_path, std::string* output_path) override { IOOptions io_opts; IODebugContext dbg; return file_system_->GetAbsolutePath(db_path, io_opts, output_path, &dbg); } Status NewLogger(const std::string& fname, std::shared_ptr* result) override { IOOptions io_opts; IODebugContext dbg; return file_system_->NewLogger(fname, io_opts, result, &dbg); } Status IsDirectory(const std::string& path, bool* is_dir) override { IOOptions io_opts; IODebugContext dbg; return file_system_->IsDirectory(path, io_opts, is_dir, &dbg); } #if !defined(OS_WIN) && !defined(ROCKSDB_NO_DYNAMIC_EXTENSION) Status LoadLibrary(const std::string& lib_name, const std::string& search_path, std::shared_ptr* result) override { return env_target_->LoadLibrary(lib_name, search_path, result); } #endif void Schedule(void (*f)(void* arg), void* a, Priority pri, void* tag = nullptr, void (*u)(void* arg) = nullptr) override { return env_target_->Schedule(f, a, pri, tag, u); } int UnSchedule(void* tag, Priority pri) override { return env_target_->UnSchedule(tag, pri); } void StartThread(void (*f)(void*), void* a) override { return env_target_->StartThread(f, a); } void WaitForJoin() override { return env_target_->WaitForJoin(); } unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override { return env_target_->GetThreadPoolQueueLen(pri); } Status GetTestDirectory(std::string* path) override { IOOptions io_opts; IODebugContext dbg; return file_system_->GetTestDirectory(io_opts, path, &dbg); } uint64_t NowMicros() override { return env_target_->NowMicros(); } uint64_t NowNanos() override { return env_target_->NowNanos(); } uint64_t NowCPUNanos() override { return env_target_->NowCPUNanos(); } void SleepForMicroseconds(int micros) override { env_target_->SleepForMicroseconds(micros); } Status GetHostName(char* name, uint64_t len) override { return env_target_->GetHostName(name, len); } Status GetCurrentTime(int64_t* unix_time) override { return env_target_->GetCurrentTime(unix_time); } void SetBackgroundThreads(int num, Priority pri) override { return env_target_->SetBackgroundThreads(num, pri); } int GetBackgroundThreads(Priority pri) override { return env_target_->GetBackgroundThreads(pri); } Status SetAllowNonOwnerAccess(bool allow_non_owner_access) override { return env_target_->SetAllowNonOwnerAccess(allow_non_owner_access); } void IncBackgroundThreadsIfNeeded(int num, Priority pri) override { return env_target_->IncBackgroundThreadsIfNeeded(num, pri); } void LowerThreadPoolIOPriority(Priority pool = LOW) override { env_target_->LowerThreadPoolIOPriority(pool); } void LowerThreadPoolCPUPriority(Priority pool = LOW) override { env_target_->LowerThreadPoolCPUPriority(pool); } std::string TimeToString(uint64_t time) override { return env_target_->TimeToString(time); } Status GetThreadList(std::vector* thread_list) override { return env_target_->GetThreadList(thread_list); } ThreadStatusUpdater* GetThreadStatusUpdater() const override { return env_target_->GetThreadStatusUpdater(); } uint64_t GetThreadID() const override { return env_target_->GetThreadID(); } std::string GenerateUniqueId() override { return env_target_->GenerateUniqueId(); } EnvOptions OptimizeForLogRead(const EnvOptions& env_options) const override { return file_system_->OptimizeForLogRead(FileOptions(env_options)); } EnvOptions OptimizeForManifestRead( const EnvOptions& env_options) const override { return file_system_->OptimizeForManifestRead(FileOptions(env_options)); } EnvOptions OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const override { return file_system_->OptimizeForLogWrite(FileOptions(env_options), db_options); } EnvOptions OptimizeForManifestWrite( const EnvOptions& env_options) const override { return file_system_->OptimizeForManifestWrite(FileOptions(env_options)); } EnvOptions OptimizeForCompactionTableWrite( const EnvOptions& env_options, const ImmutableDBOptions& immutable_ops) const override { return file_system_->OptimizeForCompactionTableWrite( FileOptions(env_options), immutable_ops); } EnvOptions OptimizeForCompactionTableRead( const EnvOptions& env_options, const ImmutableDBOptions& db_options) const override { return file_system_->OptimizeForCompactionTableRead( FileOptions(env_options), db_options); } Status GetFreeSpace(const std::string& path, uint64_t* diskfree) override { IOOptions io_opts; IODebugContext dbg; return file_system_->GetFreeSpace(path, io_opts, diskfree, &dbg); } private: Env* env_target_; }; class LegacySequentialFileWrapper : public FSSequentialFile { public: explicit LegacySequentialFileWrapper( std::unique_ptr&& _target) : target_(std::move(_target)) {} IOStatus Read(size_t n, const IOOptions& /*options*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Read(n, result, scratch)); } IOStatus Skip(uint64_t n) override { return status_to_io_status(target_->Skip(n)); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } IOStatus InvalidateCache(size_t offset, size_t length) override { return status_to_io_status(target_->InvalidateCache(offset, length)); } IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& /*options*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) override { return status_to_io_status( target_->PositionedRead(offset, n, result, scratch)); } SequentialFile* target() { return target_.get(); } private: std::unique_ptr target_; }; class LegacyRandomAccessFileWrapper : public FSRandomAccessFile { public: explicit LegacyRandomAccessFileWrapper( std::unique_ptr&& target) : target_(std::move(target)) {} IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*options*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) const override { return status_to_io_status(target_->Read(offset, n, result, scratch)); } IOStatus MultiRead(FSReadRequest* fs_reqs, size_t num_reqs, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { std::vector reqs; Status status; reqs.reserve(num_reqs); for (size_t i = 0; i < num_reqs; ++i) { ReadRequest req; req.offset = fs_reqs[i].offset; req.len = fs_reqs[i].len; req.scratch = fs_reqs[i].scratch; req.status = Status::OK(); reqs.emplace_back(req); } status = target_->MultiRead(reqs.data(), num_reqs); for (size_t i = 0; i < num_reqs; ++i) { fs_reqs[i].result = reqs[i].result; fs_reqs[i].status = status_to_io_status(std::move(reqs[i].status)); } return status_to_io_status(std::move(status)); ; } IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Prefetch(offset, n)); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); }; void Hint(AccessPattern pattern) override { target_->Hint((RandomAccessFile::AccessPattern)pattern); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } IOStatus InvalidateCache(size_t offset, size_t length) override { return status_to_io_status(target_->InvalidateCache(offset, length)); } private: std::unique_ptr target_; }; class LegacyWritableFileWrapper : public FSWritableFile { public: explicit LegacyWritableFileWrapper(std::unique_ptr&& _target) : target_(std::move(_target)) {} IOStatus Append(const Slice& data, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Append(data)); } IOStatus PositionedAppend(const Slice& data, uint64_t offset, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->PositionedAppend(data, offset)); } IOStatus Truncate(uint64_t size, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Truncate(size)); } IOStatus Close(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Close()); } IOStatus Flush(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Flush()); } IOStatus Sync(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Sync()); } IOStatus Fsync(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Fsync()); } bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override { target_->SetWriteLifeTimeHint(hint); } Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { return target_->GetWriteLifeTimeHint(); } uint64_t GetFileSize(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return target_->GetFileSize(); } void SetPreallocationBlockSize(size_t size) override { target_->SetPreallocationBlockSize(size); } void GetPreallocationStatus(size_t* block_size, size_t* last_allocated_block) override { target_->GetPreallocationStatus(block_size, last_allocated_block); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } IOStatus InvalidateCache(size_t offset, size_t length) override { return status_to_io_status(target_->InvalidateCache(offset, length)); } IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->RangeSync(offset, nbytes)); } void PrepareWrite(size_t offset, size_t len, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { target_->PrepareWrite(offset, len); } IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Allocate(offset, len)); } WritableFile* target() { return target_.get(); } private: std::unique_ptr target_; }; class LegacyRandomRWFileWrapper : public FSRandomRWFile { public: explicit LegacyRandomRWFileWrapper(std::unique_ptr&& target) : target_(std::move(target)) {} bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Write(offset, data)); } IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*options*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) const override { return status_to_io_status(target_->Read(offset, n, result, scratch)); } IOStatus Flush(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Flush()); } IOStatus Sync(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Sync()); } IOStatus Fsync(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Fsync()); } IOStatus Close(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Close()); } private: std::unique_ptr target_; }; class LegacyDirectoryWrapper : public FSDirectory { public: explicit LegacyDirectoryWrapper(std::unique_ptr&& target) : target_(std::move(target)) {} IOStatus Fsync(const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Fsync()); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } private: std::unique_ptr target_; }; class LegacyFileSystemWrapper : public FileSystem { public: // Initialize an EnvWrapper that delegates all calls to *t explicit LegacyFileSystemWrapper(Env* t) : target_(t) {} ~LegacyFileSystemWrapper() override {} const char* Name() const override { return "Legacy File System"; } // Return the target to which this Env forwards all calls Env* target() const { return target_; } // The following text is boilerplate that forwards all methods to target() IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* /*dbg*/) override { std::unique_ptr file; Status s = target_->NewSequentialFile(f, &file, file_opts); if (s.ok()) { r->reset(new LegacySequentialFileWrapper(std::move(file))); } return status_to_io_status(std::move(s)); } IOStatus NewRandomAccessFile(const std::string& f, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* /*dbg*/) override { std::unique_ptr file; Status s = target_->NewRandomAccessFile(f, &file, file_opts); if (s.ok()) { r->reset(new LegacyRandomAccessFileWrapper(std::move(file))); } return status_to_io_status(std::move(s)); } IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* /*dbg*/) override { std::unique_ptr file; Status s = target_->NewWritableFile(f, &file, file_opts); if (s.ok()) { r->reset(new LegacyWritableFileWrapper(std::move(file))); } return status_to_io_status(std::move(s)); } IOStatus ReopenWritableFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* /*dbg*/) override { std::unique_ptr file; Status s = target_->ReopenWritableFile(fname, &file, file_opts); if (s.ok()) { result->reset(new LegacyWritableFileWrapper(std::move(file))); } return status_to_io_status(std::move(s)); } IOStatus ReuseWritableFile(const std::string& fname, const std::string& old_fname, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* /*dbg*/) override { std::unique_ptr file; Status s = target_->ReuseWritableFile(fname, old_fname, &file, file_opts); if (s.ok()) { r->reset(new LegacyWritableFileWrapper(std::move(file))); } return status_to_io_status(std::move(s)); } IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* /*dbg*/) override { std::unique_ptr file; Status s = target_->NewRandomRWFile(fname, &file, file_opts); if (s.ok()) { result->reset(new LegacyRandomRWFileWrapper(std::move(file))); } return status_to_io_status(std::move(s)); } IOStatus NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) override { return status_to_io_status( target_->NewMemoryMappedFileBuffer(fname, result)); } IOStatus NewDirectory(const std::string& name, const IOOptions& /*io_opts*/, std::unique_ptr* result, IODebugContext* /*dbg*/) override { std::unique_ptr dir; Status s = target_->NewDirectory(name, &dir); if (s.ok()) { result->reset(new LegacyDirectoryWrapper(std::move(dir))); } return status_to_io_status(std::move(s)); } IOStatus FileExists(const std::string& f, const IOOptions& /*io_opts*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->FileExists(f)); } IOStatus GetChildren(const std::string& dir, const IOOptions& /*io_opts*/, std::vector* r, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->GetChildren(dir, r)); } IOStatus GetChildrenFileAttributes(const std::string& dir, const IOOptions& /*options*/, std::vector* result, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->GetChildrenFileAttributes(dir, result)); } IOStatus DeleteFile(const std::string& f, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->DeleteFile(f)); } IOStatus Truncate(const std::string& fname, size_t size, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->Truncate(fname, size)); } IOStatus CreateDir(const std::string& d, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->CreateDir(d)); } IOStatus CreateDirIfMissing(const std::string& d, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->CreateDirIfMissing(d)); } IOStatus DeleteDir(const std::string& d, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->DeleteDir(d)); } IOStatus GetFileSize(const std::string& f, const IOOptions& /*options*/, uint64_t* s, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->GetFileSize(f, s)); } IOStatus GetFileModificationTime(const std::string& fname, const IOOptions& /*options*/, uint64_t* file_mtime, IODebugContext* /*dbg*/) override { return status_to_io_status( target_->GetFileModificationTime(fname, file_mtime)); } IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& /*options*/, std::string* output_path, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->GetAbsolutePath(db_path, output_path)); } IOStatus RenameFile(const std::string& s, const std::string& t, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->RenameFile(s, t)); } IOStatus LinkFile(const std::string& s, const std::string& t, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->LinkFile(s, t)); } IOStatus NumFileLinks(const std::string& fname, const IOOptions& /*options*/, uint64_t* count, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->NumFileLinks(fname, count)); } IOStatus AreFilesSame(const std::string& first, const std::string& second, const IOOptions& /*options*/, bool* res, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->AreFilesSame(first, second, res)); } IOStatus LockFile(const std::string& f, const IOOptions& /*options*/, FileLock** l, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->LockFile(f, l)); } IOStatus UnlockFile(FileLock* l, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->UnlockFile(l)); } IOStatus GetTestDirectory(const IOOptions& /*options*/, std::string* path, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->GetTestDirectory(path)); } IOStatus NewLogger(const std::string& fname, const IOOptions& /*options*/, std::shared_ptr* result, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->NewLogger(fname, result)); } void SanitizeFileOptions(FileOptions* opts) const override { target_->SanitizeEnvOptions(opts); } FileOptions OptimizeForLogRead( const FileOptions& file_options) const override { return target_->OptimizeForLogRead(file_options); } FileOptions OptimizeForManifestRead( const FileOptions& file_options) const override { return target_->OptimizeForManifestRead(file_options); } FileOptions OptimizeForLogWrite(const FileOptions& file_options, const DBOptions& db_options) const override { return target_->OptimizeForLogWrite(file_options, db_options); } FileOptions OptimizeForManifestWrite( const FileOptions& file_options) const override { return target_->OptimizeForManifestWrite(file_options); } FileOptions OptimizeForCompactionTableWrite( const FileOptions& file_options, const ImmutableDBOptions& immutable_ops) const override { return target_->OptimizeForCompactionTableWrite(file_options, immutable_ops); } FileOptions OptimizeForCompactionTableRead( const FileOptions& file_options, const ImmutableDBOptions& db_options) const override { return target_->OptimizeForCompactionTableRead(file_options, db_options); } IOStatus GetFreeSpace(const std::string& path, const IOOptions& /*options*/, uint64_t* diskfree, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->GetFreeSpace(path, diskfree)); } IOStatus IsDirectory(const std::string& path, const IOOptions& /*options*/, bool* is_dir, IODebugContext* /*dbg*/) override { return status_to_io_status(target_->IsDirectory(path, is_dir)); } private: Env* target_; }; inline std::unique_ptr NewLegacySequentialFileWrapper( std::unique_ptr& file) { return std::unique_ptr( new LegacySequentialFileWrapper(std::move(file))); } inline std::unique_ptr NewLegacyRandomAccessFileWrapper( std::unique_ptr& file) { return std::unique_ptr( new LegacyRandomAccessFileWrapper(std::move(file))); } inline std::unique_ptr NewLegacyWritableFileWrapper( std::unique_ptr&& file) { return std::unique_ptr( new LegacyWritableFileWrapper(std::move(file))); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/env.cc000066400000000000000000000326541370372246700151460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/env.h" #include #include "env/composite_env_wrapper.h" #include "logging/env_logger.h" #include "memory/arena.h" #include "options/db_options.h" #include "port/port.h" #include "port/sys_time.h" #include "rocksdb/options.h" #include "rocksdb/utilities/object_registry.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { Env::Env() : thread_status_updater_(nullptr) { file_system_ = std::make_shared(this); } Env::Env(std::shared_ptr fs) : thread_status_updater_(nullptr), file_system_(fs) {} Env::~Env() { } Status Env::NewLogger(const std::string& fname, std::shared_ptr* result) { return NewEnvLogger(fname, this, result); } Status Env::LoadEnv(const std::string& value, Env** result) { Env* env = *result; Status s; #ifndef ROCKSDB_LITE s = ObjectRegistry::NewInstance()->NewStaticObject(value, &env); #else s = Status::NotSupported("Cannot load environment in LITE mode: ", value); #endif if (s.ok()) { *result = env; } return s; } Status Env::LoadEnv(const std::string& value, Env** result, std::shared_ptr* guard) { assert(result); Status s; #ifndef ROCKSDB_LITE Env* env = nullptr; std::unique_ptr uniq_guard; std::string err_msg; assert(guard != nullptr); env = ObjectRegistry::NewInstance()->NewObject(value, &uniq_guard, &err_msg); if (!env) { s = Status::NotFound(std::string("Cannot load ") + Env::Type() + ": " + value); env = Env::Default(); } if (s.ok() && uniq_guard) { guard->reset(uniq_guard.release()); *result = guard->get(); } else { *result = env; } #else (void)result; (void)guard; s = Status::NotSupported("Cannot load environment in LITE mode: ", value); #endif return s; } std::string Env::PriorityToString(Env::Priority priority) { switch (priority) { case Env::Priority::BOTTOM: return "Bottom"; case Env::Priority::LOW: return "Low"; case Env::Priority::HIGH: return "High"; case Env::Priority::USER: return "User"; case Env::Priority::TOTAL: assert(false); } return "Invalid"; } uint64_t Env::GetThreadID() const { std::hash hasher; return hasher(std::this_thread::get_id()); } Status Env::ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* result, const EnvOptions& options) { Status s = RenameFile(old_fname, fname); if (!s.ok()) { return s; } return NewWritableFile(fname, result, options); } Status Env::GetChildrenFileAttributes(const std::string& dir, std::vector* result) { assert(result != nullptr); std::vector child_fnames; Status s = GetChildren(dir, &child_fnames); if (!s.ok()) { return s; } result->resize(child_fnames.size()); size_t result_size = 0; for (size_t i = 0; i < child_fnames.size(); ++i) { const std::string path = dir + "/" + child_fnames[i]; if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) { if (FileExists(path).IsNotFound()) { // The file may have been deleted since we listed the directory continue; } return s; } (*result)[result_size].name = std::move(child_fnames[i]); result_size++; } result->resize(result_size); return Status::OK(); } SequentialFile::~SequentialFile() { } RandomAccessFile::~RandomAccessFile() { } WritableFile::~WritableFile() { } MemoryMappedFileBuffer::~MemoryMappedFileBuffer() {} Logger::~Logger() {} Status Logger::Close() { if (!closed_) { closed_ = true; return CloseImpl(); } else { return Status::OK(); } } Status Logger::CloseImpl() { return Status::NotSupported(); } FileLock::~FileLock() { } void LogFlush(Logger *info_log) { if (info_log) { info_log->Flush(); } } static void Logv(Logger *info_log, const char* format, va_list ap) { if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::INFO_LEVEL) { info_log->Logv(InfoLogLevel::INFO_LEVEL, format, ap); } } void Log(Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Logv(info_log, format, ap); va_end(ap); } void Logger::Logv(const InfoLogLevel log_level, const char* format, va_list ap) { static const char* kInfoLogLevelNames[5] = { "DEBUG", "INFO", "WARN", "ERROR", "FATAL" }; if (log_level < log_level_) { return; } if (log_level == InfoLogLevel::INFO_LEVEL) { // Doesn't print log level if it is INFO level. // This is to avoid unexpected performance regression after we add // the feature of log level. All the logs before we add the feature // are INFO level. We don't want to add extra costs to those existing // logging. Logv(format, ap); } else if (log_level == InfoLogLevel::HEADER_LEVEL) { LogHeader(format, ap); } else { char new_format[500]; snprintf(new_format, sizeof(new_format) - 1, "[%s] %s", kInfoLogLevelNames[log_level], format); Logv(new_format, ap); } } static void Logv(const InfoLogLevel log_level, Logger *info_log, const char *format, va_list ap) { if (info_log && info_log->GetInfoLogLevel() <= log_level) { if (log_level == InfoLogLevel::HEADER_LEVEL) { info_log->LogHeader(format, ap); } else { info_log->Logv(log_level, format, ap); } } } void Log(const InfoLogLevel log_level, Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Logv(log_level, info_log, format, ap); va_end(ap); } static void Headerv(Logger *info_log, const char *format, va_list ap) { if (info_log) { info_log->LogHeader(format, ap); } } void Header(Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Headerv(info_log, format, ap); va_end(ap); } static void Debugv(Logger* info_log, const char* format, va_list ap) { if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) { info_log->Logv(InfoLogLevel::DEBUG_LEVEL, format, ap); } } void Debug(Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Debugv(info_log, format, ap); va_end(ap); } static void Infov(Logger* info_log, const char* format, va_list ap) { if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::INFO_LEVEL) { info_log->Logv(InfoLogLevel::INFO_LEVEL, format, ap); } } void Info(Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Infov(info_log, format, ap); va_end(ap); } static void Warnv(Logger* info_log, const char* format, va_list ap) { if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::WARN_LEVEL) { info_log->Logv(InfoLogLevel::WARN_LEVEL, format, ap); } } void Warn(Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Warnv(info_log, format, ap); va_end(ap); } static void Errorv(Logger* info_log, const char* format, va_list ap) { if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::ERROR_LEVEL) { info_log->Logv(InfoLogLevel::ERROR_LEVEL, format, ap); } } void Error(Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Errorv(info_log, format, ap); va_end(ap); } static void Fatalv(Logger* info_log, const char* format, va_list ap) { if (info_log && info_log->GetInfoLogLevel() <= InfoLogLevel::FATAL_LEVEL) { info_log->Logv(InfoLogLevel::FATAL_LEVEL, format, ap); } } void Fatal(Logger* info_log, const char* format, ...) { va_list ap; va_start(ap, format); Fatalv(info_log, format, ap); va_end(ap); } void LogFlush(const std::shared_ptr& info_log) { LogFlush(info_log.get()); } void Log(const InfoLogLevel log_level, const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Logv(log_level, info_log.get(), format, ap); va_end(ap); } void Header(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Headerv(info_log.get(), format, ap); va_end(ap); } void Debug(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Debugv(info_log.get(), format, ap); va_end(ap); } void Info(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Infov(info_log.get(), format, ap); va_end(ap); } void Warn(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Warnv(info_log.get(), format, ap); va_end(ap); } void Error(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Errorv(info_log.get(), format, ap); va_end(ap); } void Fatal(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Fatalv(info_log.get(), format, ap); va_end(ap); } void Log(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Logv(info_log.get(), format, ap); va_end(ap); } Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname, bool should_sync) { LegacyFileSystemWrapper lfsw(env); return WriteStringToFile(&lfsw, data, fname, should_sync); } Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { LegacyFileSystemWrapper lfsw(env); return ReadFileToString(&lfsw, fname, data); } EnvWrapper::~EnvWrapper() { } namespace { // anonymous namespace void AssignEnvOptions(EnvOptions* env_options, const DBOptions& options) { env_options->use_mmap_reads = options.allow_mmap_reads; env_options->use_mmap_writes = options.allow_mmap_writes; env_options->use_direct_reads = options.use_direct_reads; env_options->set_fd_cloexec = options.is_fd_close_on_exec; env_options->bytes_per_sync = options.bytes_per_sync; env_options->compaction_readahead_size = options.compaction_readahead_size; env_options->random_access_max_buffer_size = options.random_access_max_buffer_size; env_options->rate_limiter = options.rate_limiter.get(); env_options->writable_file_max_buffer_size = options.writable_file_max_buffer_size; env_options->allow_fallocate = options.allow_fallocate; env_options->strict_bytes_per_sync = options.strict_bytes_per_sync; options.env->SanitizeEnvOptions(env_options); } } EnvOptions Env::OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const { EnvOptions optimized_env_options(env_options); optimized_env_options.bytes_per_sync = db_options.wal_bytes_per_sync; optimized_env_options.writable_file_max_buffer_size = db_options.writable_file_max_buffer_size; return optimized_env_options; } EnvOptions Env::OptimizeForManifestWrite(const EnvOptions& env_options) const { return env_options; } EnvOptions Env::OptimizeForLogRead(const EnvOptions& env_options) const { EnvOptions optimized_env_options(env_options); optimized_env_options.use_direct_reads = false; return optimized_env_options; } EnvOptions Env::OptimizeForManifestRead(const EnvOptions& env_options) const { EnvOptions optimized_env_options(env_options); optimized_env_options.use_direct_reads = false; return optimized_env_options; } EnvOptions Env::OptimizeForCompactionTableWrite( const EnvOptions& env_options, const ImmutableDBOptions& db_options) const { EnvOptions optimized_env_options(env_options); optimized_env_options.use_direct_writes = db_options.use_direct_io_for_flush_and_compaction; return optimized_env_options; } EnvOptions Env::OptimizeForCompactionTableRead( const EnvOptions& env_options, const ImmutableDBOptions& db_options) const { EnvOptions optimized_env_options(env_options); optimized_env_options.use_direct_reads = db_options.use_direct_reads; return optimized_env_options; } EnvOptions::EnvOptions(const DBOptions& options) { AssignEnvOptions(this, options); } EnvOptions::EnvOptions() { DBOptions options; AssignEnvOptions(this, options); } Status NewEnvLogger(const std::string& fname, Env* env, std::shared_ptr* result) { EnvOptions options; // TODO: Tune the buffer size. options.writable_file_max_buffer_size = 1024 * 1024; std::unique_ptr writable_file; const auto status = env->NewWritableFile(fname, &writable_file, options); if (!status.ok()) { return status; } *result = std::make_shared( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, options, env); return Status::OK(); } const std::shared_ptr& Env::GetFileSystem() const { return file_system_; } #ifdef OS_WIN std::unique_ptr NewCompositeEnv(std::shared_ptr fs) { return std::unique_ptr(new CompositeEnvWrapper(Env::Default(), fs)); } #endif } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/env_basic_test.cc000066400000000000000000000311141370372246700173340ustar00rootroot00000000000000// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #include #include #include #include "env/mock_env.h" #include "rocksdb/env.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { // Normalizes trivial differences across Envs such that these test cases can // run on all Envs. class NormalizingEnvWrapper : public EnvWrapper { public: explicit NormalizingEnvWrapper(Env* base) : EnvWrapper(base) {} // Removes . and .. from directory listing Status GetChildren(const std::string& dir, std::vector* result) override { Status status = EnvWrapper::GetChildren(dir, result); if (status.ok()) { result->erase(std::remove_if(result->begin(), result->end(), [](const std::string& s) { return s == "." || s == ".."; }), result->end()); } return status; } // Removes . and .. from directory listing Status GetChildrenFileAttributes( const std::string& dir, std::vector* result) override { Status status = EnvWrapper::GetChildrenFileAttributes(dir, result); if (status.ok()) { result->erase(std::remove_if(result->begin(), result->end(), [](const FileAttributes& fa) { return fa.name == "." || fa.name == ".."; }), result->end()); } return status; } }; class EnvBasicTestWithParam : public testing::Test, public ::testing::WithParamInterface { public: Env* env_; const EnvOptions soptions_; std::string test_dir_; EnvBasicTestWithParam() : env_(GetParam()) { test_dir_ = test::PerThreadDBPath(env_, "env_basic_test"); } void SetUp() override { env_->CreateDirIfMissing(test_dir_); } void TearDown() override { std::vector files; env_->GetChildren(test_dir_, &files); for (const auto& file : files) { // don't know whether it's file or directory, try both. The tests must // only create files or empty directories, so one must succeed, else the // directory's corrupted. Status s = env_->DeleteFile(test_dir_ + "/" + file); if (!s.ok()) { ASSERT_OK(env_->DeleteDir(test_dir_ + "/" + file)); } } } }; class EnvMoreTestWithParam : public EnvBasicTestWithParam {}; static std::unique_ptr def_env(new NormalizingEnvWrapper(Env::Default())); INSTANTIATE_TEST_CASE_P(EnvDefault, EnvBasicTestWithParam, ::testing::Values(def_env.get())); INSTANTIATE_TEST_CASE_P(EnvDefault, EnvMoreTestWithParam, ::testing::Values(def_env.get())); static std::unique_ptr mock_env(new MockEnv(Env::Default())); INSTANTIATE_TEST_CASE_P(MockEnv, EnvBasicTestWithParam, ::testing::Values(mock_env.get())); #ifndef ROCKSDB_LITE static std::unique_ptr mem_env(NewMemEnv(Env::Default())); INSTANTIATE_TEST_CASE_P(MemEnv, EnvBasicTestWithParam, ::testing::Values(mem_env.get())); namespace { // Returns a vector of 0 or 1 Env*, depending whether an Env is registered for // TEST_ENV_URI. // // The purpose of returning an empty vector (instead of nullptr) is that gtest // ValuesIn() will skip running tests when given an empty collection. std::vector GetCustomEnvs() { static Env* custom_env; static bool init = false; if (!init) { init = true; const char* uri = getenv("TEST_ENV_URI"); if (uri != nullptr) { Env::LoadEnv(uri, &custom_env); } } std::vector res; if (custom_env != nullptr) { res.emplace_back(custom_env); } return res; } } // anonymous namespace INSTANTIATE_TEST_CASE_P(CustomEnv, EnvBasicTestWithParam, ::testing::ValuesIn(GetCustomEnvs())); INSTANTIATE_TEST_CASE_P(CustomEnv, EnvMoreTestWithParam, ::testing::ValuesIn(GetCustomEnvs())); #endif // ROCKSDB_LITE TEST_P(EnvBasicTestWithParam, Basics) { uint64_t file_size; std::unique_ptr writable_file; std::vector children; // Check that the directory is empty. ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/non_existent")); ASSERT_TRUE(!env_->GetFileSize(test_dir_ + "/non_existent", &file_size).ok()); ASSERT_OK(env_->GetChildren(test_dir_, &children)); ASSERT_EQ(0U, children.size()); // Create a file. ASSERT_OK(env_->NewWritableFile(test_dir_ + "/f", &writable_file, soptions_)); ASSERT_OK(writable_file->Close()); writable_file.reset(); // Check that the file exists. ASSERT_OK(env_->FileExists(test_dir_ + "/f")); ASSERT_OK(env_->GetFileSize(test_dir_ + "/f", &file_size)); ASSERT_EQ(0U, file_size); ASSERT_OK(env_->GetChildren(test_dir_, &children)); ASSERT_EQ(1U, children.size()); ASSERT_EQ("f", children[0]); ASSERT_OK(env_->DeleteFile(test_dir_ + "/f")); // Write to the file. ASSERT_OK( env_->NewWritableFile(test_dir_ + "/f1", &writable_file, soptions_)); ASSERT_OK(writable_file->Append("abc")); ASSERT_OK(writable_file->Close()); writable_file.reset(); ASSERT_OK( env_->NewWritableFile(test_dir_ + "/f2", &writable_file, soptions_)); ASSERT_OK(writable_file->Close()); writable_file.reset(); // Check for expected size. ASSERT_OK(env_->GetFileSize(test_dir_ + "/f1", &file_size)); ASSERT_EQ(3U, file_size); // Check that renaming works. ASSERT_TRUE( !env_->RenameFile(test_dir_ + "/non_existent", test_dir_ + "/g").ok()); ASSERT_OK(env_->RenameFile(test_dir_ + "/f1", test_dir_ + "/g")); ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/f1")); ASSERT_OK(env_->FileExists(test_dir_ + "/g")); ASSERT_OK(env_->GetFileSize(test_dir_ + "/g", &file_size)); ASSERT_EQ(3U, file_size); // Check that renaming overwriting works ASSERT_OK(env_->RenameFile(test_dir_ + "/f2", test_dir_ + "/g")); ASSERT_OK(env_->GetFileSize(test_dir_ + "/g", &file_size)); ASSERT_EQ(0U, file_size); // Check that opening non-existent file fails. std::unique_ptr seq_file; std::unique_ptr rand_file; ASSERT_TRUE(!env_->NewSequentialFile(test_dir_ + "/non_existent", &seq_file, soptions_) .ok()); ASSERT_TRUE(!seq_file); ASSERT_TRUE(!env_->NewRandomAccessFile(test_dir_ + "/non_existent", &rand_file, soptions_) .ok()); ASSERT_TRUE(!rand_file); // Check that deleting works. ASSERT_TRUE(!env_->DeleteFile(test_dir_ + "/non_existent").ok()); ASSERT_OK(env_->DeleteFile(test_dir_ + "/g")); ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/g")); ASSERT_OK(env_->GetChildren(test_dir_, &children)); ASSERT_EQ(0U, children.size()); ASSERT_TRUE( env_->GetChildren(test_dir_ + "/non_existent", &children).IsNotFound()); } TEST_P(EnvBasicTestWithParam, ReadWrite) { std::unique_ptr writable_file; std::unique_ptr seq_file; std::unique_ptr rand_file; Slice result; char scratch[100]; ASSERT_OK(env_->NewWritableFile(test_dir_ + "/f", &writable_file, soptions_)); ASSERT_OK(writable_file->Append("hello ")); ASSERT_OK(writable_file->Append("world")); ASSERT_OK(writable_file->Close()); writable_file.reset(); // Read sequentially. ASSERT_OK(env_->NewSequentialFile(test_dir_ + "/f", &seq_file, soptions_)); ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello". ASSERT_EQ(0, result.compare("hello")); ASSERT_OK(seq_file->Skip(1)); ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world". ASSERT_EQ(0, result.compare("world")); ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF. ASSERT_EQ(0U, result.size()); ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file. ASSERT_OK(seq_file->Read(1000, &result, scratch)); ASSERT_EQ(0U, result.size()); // Random reads. ASSERT_OK(env_->NewRandomAccessFile(test_dir_ + "/f", &rand_file, soptions_)); ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world". ASSERT_EQ(0, result.compare("world")); ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello". ASSERT_EQ(0, result.compare("hello")); ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d". ASSERT_EQ(0, result.compare("d")); // Too high offset. ASSERT_TRUE(rand_file->Read(1000, 5, &result, scratch).ok()); } TEST_P(EnvBasicTestWithParam, Misc) { std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile(test_dir_ + "/b", &writable_file, soptions_)); // These are no-ops, but we test they return success. ASSERT_OK(writable_file->Sync()); ASSERT_OK(writable_file->Flush()); ASSERT_OK(writable_file->Close()); writable_file.reset(); } TEST_P(EnvBasicTestWithParam, LargeWrite) { const size_t kWriteSize = 300 * 1024; char* scratch = new char[kWriteSize * 2]; std::string write_data; for (size_t i = 0; i < kWriteSize; ++i) { write_data.append(1, static_cast(i)); } std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile(test_dir_ + "/f", &writable_file, soptions_)); ASSERT_OK(writable_file->Append("foo")); ASSERT_OK(writable_file->Append(write_data)); ASSERT_OK(writable_file->Close()); writable_file.reset(); std::unique_ptr seq_file; Slice result; ASSERT_OK(env_->NewSequentialFile(test_dir_ + "/f", &seq_file, soptions_)); ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". ASSERT_EQ(0, result.compare("foo")); size_t read = 0; std::string read_data; while (read < kWriteSize) { ASSERT_OK(seq_file->Read(kWriteSize - read, &result, scratch)); read_data.append(result.data(), result.size()); read += result.size(); } ASSERT_TRUE(write_data == read_data); delete [] scratch; } TEST_P(EnvMoreTestWithParam, GetModTime) { ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/dir1")); uint64_t mtime1 = 0x0; ASSERT_OK(env_->GetFileModificationTime(test_dir_ + "/dir1", &mtime1)); } TEST_P(EnvMoreTestWithParam, MakeDir) { ASSERT_OK(env_->CreateDir(test_dir_ + "/j")); ASSERT_OK(env_->FileExists(test_dir_ + "/j")); std::vector children; env_->GetChildren(test_dir_, &children); ASSERT_EQ(1U, children.size()); // fail because file already exists ASSERT_TRUE(!env_->CreateDir(test_dir_ + "/j").ok()); ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/j")); ASSERT_OK(env_->DeleteDir(test_dir_ + "/j")); ASSERT_EQ(Status::NotFound(), env_->FileExists(test_dir_ + "/j")); } TEST_P(EnvMoreTestWithParam, GetChildren) { // empty folder returns empty vector std::vector children; std::vector childAttr; ASSERT_OK(env_->CreateDirIfMissing(test_dir_)); ASSERT_OK(env_->GetChildren(test_dir_, &children)); ASSERT_OK(env_->FileExists(test_dir_)); ASSERT_OK(env_->GetChildrenFileAttributes(test_dir_, &childAttr)); ASSERT_EQ(0U, children.size()); ASSERT_EQ(0U, childAttr.size()); // folder with contents returns relative path to test dir ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/niu")); ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/you")); ASSERT_OK(env_->CreateDirIfMissing(test_dir_ + "/guo")); ASSERT_OK(env_->GetChildren(test_dir_, &children)); ASSERT_OK(env_->GetChildrenFileAttributes(test_dir_, &childAttr)); ASSERT_EQ(3U, children.size()); ASSERT_EQ(3U, childAttr.size()); for (auto each : children) { env_->DeleteDir(test_dir_ + "/" + each); } // necessary for default POSIX env // non-exist directory returns IOError ASSERT_OK(env_->DeleteDir(test_dir_)); ASSERT_TRUE(!env_->FileExists(test_dir_).ok()); ASSERT_TRUE(!env_->GetChildren(test_dir_, &children).ok()); ASSERT_TRUE(!env_->GetChildrenFileAttributes(test_dir_, &childAttr).ok()); // if dir is a file, returns IOError ASSERT_OK(env_->CreateDir(test_dir_)); std::unique_ptr writable_file; ASSERT_OK( env_->NewWritableFile(test_dir_ + "/file", &writable_file, soptions_)); ASSERT_OK(writable_file->Close()); writable_file.reset(); ASSERT_TRUE(!env_->GetChildren(test_dir_ + "/file", &children).ok()); ASSERT_EQ(0U, children.size()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/env/env_chroot.cc000066400000000000000000000311341370372246700165140ustar00rootroot00000000000000// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #if !defined(ROCKSDB_LITE) && !defined(OS_WIN) #include "env/env_chroot.h" #include #include #include #include #include #include #include #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class ChrootEnv : public EnvWrapper { public: ChrootEnv(Env* base_env, const std::string& chroot_dir) : EnvWrapper(base_env) { #if defined(OS_AIX) char resolvedName[PATH_MAX]; char* real_chroot_dir = realpath(chroot_dir.c_str(), resolvedName); #else char* real_chroot_dir = realpath(chroot_dir.c_str(), nullptr); #endif // chroot_dir must exist so realpath() returns non-nullptr. assert(real_chroot_dir != nullptr); chroot_dir_ = real_chroot_dir; #if !defined(OS_AIX) free(real_chroot_dir); #endif } Status RegisterDbPaths(const std::vector& paths) override { std::vector encoded_paths; encoded_paths.reserve(paths.size()); for (auto& path : paths) { auto status_and_enc_path = EncodePathWithNewBasename(path); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } encoded_paths.emplace_back(status_and_enc_path.second); } return EnvWrapper::Env::RegisterDbPaths(encoded_paths); } Status UnregisterDbPaths(const std::vector& paths) override { std::vector encoded_paths; encoded_paths.reserve(paths.size()); for (auto& path : paths) { auto status_and_enc_path = EncodePathWithNewBasename(path); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } encoded_paths.emplace_back(status_and_enc_path.second); } return EnvWrapper::Env::UnregisterDbPaths(encoded_paths); } Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::NewSequentialFile(status_and_enc_path.second, result, options); } Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::NewRandomAccessFile(status_and_enc_path.second, result, options); } Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::NewWritableFile(status_and_enc_path.second, result, options); } Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } auto status_and_old_enc_path = EncodePath(old_fname); if (!status_and_old_enc_path.first.ok()) { return status_and_old_enc_path.first; } return EnvWrapper::ReuseWritableFile(status_and_old_enc_path.second, status_and_old_enc_path.second, result, options); } Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::NewRandomRWFile(status_and_enc_path.second, result, options); } Status NewDirectory(const std::string& dir, std::unique_ptr* result) override { auto status_and_enc_path = EncodePathWithNewBasename(dir); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::NewDirectory(status_and_enc_path.second, result); } Status FileExists(const std::string& fname) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::FileExists(status_and_enc_path.second); } Status GetChildren(const std::string& dir, std::vector* result) override { auto status_and_enc_path = EncodePath(dir); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::GetChildren(status_and_enc_path.second, result); } Status GetChildrenFileAttributes( const std::string& dir, std::vector* result) override { auto status_and_enc_path = EncodePath(dir); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::GetChildrenFileAttributes(status_and_enc_path.second, result); } Status DeleteFile(const std::string& fname) override { auto status_and_enc_path = EncodePath(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::DeleteFile(status_and_enc_path.second); } Status CreateDir(const std::string& dirname) override { auto status_and_enc_path = EncodePathWithNewBasename(dirname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::CreateDir(status_and_enc_path.second); } Status CreateDirIfMissing(const std::string& dirname) override { auto status_and_enc_path = EncodePathWithNewBasename(dirname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::CreateDirIfMissing(status_and_enc_path.second); } Status DeleteDir(const std::string& dirname) override { auto status_and_enc_path = EncodePath(dirname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::DeleteDir(status_and_enc_path.second); } Status GetFileSize(const std::string& fname, uint64_t* file_size) override { auto status_and_enc_path = EncodePath(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::GetFileSize(status_and_enc_path.second, file_size); } Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) override { auto status_and_enc_path = EncodePath(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::GetFileModificationTime(status_and_enc_path.second, file_mtime); } Status RenameFile(const std::string& src, const std::string& dest) override { auto status_and_src_enc_path = EncodePath(src); if (!status_and_src_enc_path.first.ok()) { return status_and_src_enc_path.first; } auto status_and_dest_enc_path = EncodePathWithNewBasename(dest); if (!status_and_dest_enc_path.first.ok()) { return status_and_dest_enc_path.first; } return EnvWrapper::RenameFile(status_and_src_enc_path.second, status_and_dest_enc_path.second); } Status LinkFile(const std::string& src, const std::string& dest) override { auto status_and_src_enc_path = EncodePath(src); if (!status_and_src_enc_path.first.ok()) { return status_and_src_enc_path.first; } auto status_and_dest_enc_path = EncodePathWithNewBasename(dest); if (!status_and_dest_enc_path.first.ok()) { return status_and_dest_enc_path.first; } return EnvWrapper::LinkFile(status_and_src_enc_path.second, status_and_dest_enc_path.second); } Status LockFile(const std::string& fname, FileLock** lock) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } // FileLock subclasses may store path (e.g., PosixFileLock stores it). We // can skip stripping the chroot directory from this path because callers // shouldn't use it. return EnvWrapper::LockFile(status_and_enc_path.second, lock); } Status GetTestDirectory(std::string* path) override { // Adapted from PosixEnv's implementation since it doesn't provide a way to // create directory in the chroot. char buf[256]; snprintf(buf, sizeof(buf), "/rocksdbtest-%d", static_cast(geteuid())); *path = buf; // Directory may already exist, so ignore return CreateDir(*path); return Status::OK(); } Status NewLogger(const std::string& fname, std::shared_ptr* result) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::NewLogger(status_and_enc_path.second, result); } Status GetAbsolutePath(const std::string& db_path, std::string* output_path) override { auto status_and_enc_path = EncodePath(db_path); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; } return EnvWrapper::GetAbsolutePath(status_and_enc_path.second, output_path); } private: // Returns status and expanded absolute path including the chroot directory. // Checks whether the provided path breaks out of the chroot. If it returns // non-OK status, the returned path should not be used. std::pair EncodePath(const std::string& path) { if (path.empty() || path[0] != '/') { return {Status::InvalidArgument(path, "Not an absolute path"), ""}; } std::pair res; res.second = chroot_dir_ + path; #if defined(OS_AIX) char resolvedName[PATH_MAX]; char* normalized_path = realpath(res.second.c_str(), resolvedName); #else char* normalized_path = realpath(res.second.c_str(), nullptr); #endif if (normalized_path == nullptr) { res.first = Status::NotFound(res.second, strerror(errno)); } else if (strlen(normalized_path) < chroot_dir_.size() || strncmp(normalized_path, chroot_dir_.c_str(), chroot_dir_.size()) != 0) { res.first = Status::IOError(res.second, "Attempted to access path outside chroot"); } else { res.first = Status::OK(); } #if !defined(OS_AIX) free(normalized_path); #endif return res; } // Similar to EncodePath() except assumes the basename in the path hasn't been // created yet. std::pair EncodePathWithNewBasename( const std::string& path) { if (path.empty() || path[0] != '/') { return {Status::InvalidArgument(path, "Not an absolute path"), ""}; } // Basename may be followed by trailing slashes size_t final_idx = path.find_last_not_of('/'); if (final_idx == std::string::npos) { // It's only slashes so no basename to extract return EncodePath(path); } // Pull off the basename temporarily since realname(3) (used by // EncodePath()) requires a path that exists size_t base_sep = path.rfind('/', final_idx); auto status_and_enc_path = EncodePath(path.substr(0, base_sep + 1)); status_and_enc_path.second.append(path.substr(base_sep + 1)); return status_and_enc_path; } std::string chroot_dir_; }; Env* NewChrootEnv(Env* base_env, const std::string& chroot_dir) { if (!base_env->FileExists(chroot_dir).ok()) { return nullptr; } return new ChrootEnv(base_env, chroot_dir); } } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) && !defined(OS_WIN) rocksdb-6.11.4/env/env_chroot.h000066400000000000000000000013001370372246700163460ustar00rootroot00000000000000// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #if !defined(ROCKSDB_LITE) && !defined(OS_WIN) #include #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { // Returns an Env that translates paths such that the root directory appears to // be chroot_dir. chroot_dir should refer to an existing directory. Env* NewChrootEnv(Env* base_env, const std::string& chroot_dir); } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) && !defined(OS_WIN) rocksdb-6.11.4/env/env_encryption.cc000066400000000000000000001052341370372246700174130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "rocksdb/env_encryption.h" #include #include #include #include #include "monitoring/perf_context_imp.h" #include "util/aligned_buffer.h" #include "util/coding.h" #include "util/random.h" #endif namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE class EncryptedSequentialFile : public SequentialFile { private: std::unique_ptr file_; std::unique_ptr stream_; uint64_t offset_; size_t prefixLength_; public: // Default ctor. Given underlying sequential file is supposed to be at // offset == prefixLength. EncryptedSequentialFile(SequentialFile* f, BlockAccessCipherStream* s, size_t prefixLength) : file_(f), stream_(s), offset_(prefixLength), prefixLength_(prefixLength) { } // Read up to "n" bytes from the file. "scratch[0..n-1]" may be // written by this routine. Sets "*result" to the data that was // read (including if fewer than "n" bytes were successfully read). // May set "*result" to point at data in "scratch[0..n-1]", so // "scratch[0..n-1]" must be live when "*result" is used. // If an error was encountered, returns a non-OK status. // // REQUIRES: External synchronization Status Read(size_t n, Slice* result, char* scratch) override { assert(scratch); Status status = file_->Read(n, result, scratch); if (!status.ok()) { return status; } { PERF_TIMER_GUARD(decrypt_data_nanos); status = stream_->Decrypt(offset_, (char*)result->data(), result->size()); } offset_ += result->size(); // We've already ready data from disk, so update // offset_ even if decryption fails. return status; } // Skip "n" bytes from the file. This is guaranteed to be no // slower that reading the same data, but may be faster. // // If end of file is reached, skipping will stop at the end of the // file, and Skip will return OK. // // REQUIRES: External synchronization Status Skip(uint64_t n) override { auto status = file_->Skip(n); if (!status.ok()) { return status; } offset_ += n; return status; } // Indicates the upper layers if the current SequentialFile implementation // uses direct IO. bool use_direct_io() const override { return file_->use_direct_io(); } // Use the returned alignment value to allocate // aligned buffer for Direct I/O size_t GetRequiredBufferAlignment() const override { return file_->GetRequiredBufferAlignment(); } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. Status InvalidateCache(size_t offset, size_t length) override { return file_->InvalidateCache(offset + prefixLength_, length); } // Positioned Read for direct I/O // If Direct I/O enabled, offset, n, and scratch should be properly aligned Status PositionedRead(uint64_t offset, size_t n, Slice* result, char* scratch) override { assert(scratch); offset += prefixLength_; // Skip prefix auto status = file_->PositionedRead(offset, n, result, scratch); if (!status.ok()) { return status; } offset_ = offset + result->size(); { PERF_TIMER_GUARD(decrypt_data_nanos); status = stream_->Decrypt(offset, (char*)result->data(), result->size()); } return status; } }; // A file abstraction for randomly reading the contents of a file. class EncryptedRandomAccessFile : public RandomAccessFile { private: std::unique_ptr file_; std::unique_ptr stream_; size_t prefixLength_; public: EncryptedRandomAccessFile(RandomAccessFile* f, BlockAccessCipherStream* s, size_t prefixLength) : file_(f), stream_(s), prefixLength_(prefixLength) { } // Read up to "n" bytes from the file starting at "offset". // "scratch[0..n-1]" may be written by this routine. Sets "*result" // to the data that was read (including if fewer than "n" bytes were // successfully read). May set "*result" to point at data in // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when // "*result" is used. If an error was encountered, returns a non-OK // status. // // Safe for concurrent use by multiple threads. // If Direct I/O enabled, offset, n, and scratch should be aligned properly. Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { assert(scratch); offset += prefixLength_; auto status = file_->Read(offset, n, result, scratch); if (!status.ok()) { return status; } { PERF_TIMER_GUARD(decrypt_data_nanos); status = stream_->Decrypt(offset, (char*)result->data(), result->size()); } return status; } // Readahead the file starting from offset by n bytes for caching. Status Prefetch(uint64_t offset, size_t n) override { //return Status::OK(); return file_->Prefetch(offset + prefixLength_, n); } // Tries to get an unique ID for this file that will be the same each time // the file is opened (and will stay the same while the file is open). // Furthermore, it tries to make this ID at most "max_size" bytes. If such an // ID can be created this function returns the length of the ID and places it // in "id"; otherwise, this function returns 0, in which case "id" // may not have been modified. // // This function guarantees, for IDs from a given environment, two unique ids // cannot be made equal to each other by adding arbitrary bytes to one of // them. That is, no unique ID is the prefix of another. // // This function guarantees that the returned ID will not be interpretable as // a single varint. // // Note: these IDs are only valid for the duration of the process. size_t GetUniqueId(char* id, size_t max_size) const override { return file_->GetUniqueId(id, max_size); }; void Hint(AccessPattern pattern) override { file_->Hint(pattern); } // Indicates the upper layers if the current RandomAccessFile implementation // uses direct IO. bool use_direct_io() const override { return file_->use_direct_io(); } // Use the returned alignment value to allocate // aligned buffer for Direct I/O size_t GetRequiredBufferAlignment() const override { return file_->GetRequiredBufferAlignment(); } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. Status InvalidateCache(size_t offset, size_t length) override { return file_->InvalidateCache(offset + prefixLength_, length); } }; // A file abstraction for sequential writing. The implementation // must provide buffering since callers may append small fragments // at a time to the file. class EncryptedWritableFile : public WritableFileWrapper { private: std::unique_ptr file_; std::unique_ptr stream_; size_t prefixLength_; public: // Default ctor. Prefix is assumed to be written already. EncryptedWritableFile(WritableFile* f, BlockAccessCipherStream* s, size_t prefixLength) : WritableFileWrapper(f), file_(f), stream_(s), prefixLength_(prefixLength) { } Status Append(const Slice& data) override { AlignedBuffer buf; Status status; Slice dataToAppend(data); if (data.size() > 0) { auto offset = file_->GetFileSize(); // size including prefix // Encrypt in cloned buffer buf.Alignment(GetRequiredBufferAlignment()); buf.AllocateNewBuffer(data.size()); // TODO (sagar0): Modify AlignedBuffer.Append to allow doing a memmove // so that the next two lines can be replaced with buf.Append(). memmove(buf.BufferStart(), data.data(), data.size()); buf.Size(data.size()); { PERF_TIMER_GUARD(encrypt_data_nanos); status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize()); } if (!status.ok()) { return status; } dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize()); } status = file_->Append(dataToAppend); if (!status.ok()) { return status; } return status; } Status PositionedAppend(const Slice& data, uint64_t offset) override { AlignedBuffer buf; Status status; Slice dataToAppend(data); offset += prefixLength_; if (data.size() > 0) { // Encrypt in cloned buffer buf.Alignment(GetRequiredBufferAlignment()); buf.AllocateNewBuffer(data.size()); memmove(buf.BufferStart(), data.data(), data.size()); buf.Size(data.size()); { PERF_TIMER_GUARD(encrypt_data_nanos); status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize()); } if (!status.ok()) { return status; } dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize()); } status = file_->PositionedAppend(dataToAppend, offset); if (!status.ok()) { return status; } return status; } // Indicates the upper layers if the current WritableFile implementation // uses direct IO. bool use_direct_io() const override { return file_->use_direct_io(); } // Use the returned alignment value to allocate // aligned buffer for Direct I/O size_t GetRequiredBufferAlignment() const override { return file_->GetRequiredBufferAlignment(); } /* * Get the size of valid data in the file. */ uint64_t GetFileSize() override { return file_->GetFileSize() - prefixLength_; } // Truncate is necessary to trim the file to the correct size // before closing. It is not always possible to keep track of the file // size due to whole pages writes. The behavior is undefined if called // with other writes to follow. Status Truncate(uint64_t size) override { return file_->Truncate(size + prefixLength_); } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. // This call has no effect on dirty pages in the cache. Status InvalidateCache(size_t offset, size_t length) override { return file_->InvalidateCache(offset + prefixLength_, length); } // Sync a file range with disk. // offset is the starting byte of the file range to be synchronized. // nbytes specifies the length of the range to be synchronized. // This asks the OS to initiate flushing the cached data to disk, // without waiting for completion. // Default implementation does nothing. Status RangeSync(uint64_t offset, uint64_t nbytes) override { return file_->RangeSync(offset + prefixLength_, nbytes); } // PrepareWrite performs any necessary preparation for a write // before the write actually occurs. This allows for pre-allocation // of space on devices where it can result in less file // fragmentation and/or less waste from over-zealous filesystem // pre-allocation. void PrepareWrite(size_t offset, size_t len) override { file_->PrepareWrite(offset + prefixLength_, len); } // Pre-allocates space for a file. Status Allocate(uint64_t offset, uint64_t len) override { return file_->Allocate(offset + prefixLength_, len); } }; // A file abstraction for random reading and writing. class EncryptedRandomRWFile : public RandomRWFile { private: std::unique_ptr file_; std::unique_ptr stream_; size_t prefixLength_; public: EncryptedRandomRWFile(RandomRWFile* f, BlockAccessCipherStream* s, size_t prefixLength) : file_(f), stream_(s), prefixLength_(prefixLength) {} // Indicates if the class makes use of direct I/O // If false you must pass aligned buffer to Write() bool use_direct_io() const override { return file_->use_direct_io(); } // Use the returned alignment value to allocate // aligned buffer for Direct I/O size_t GetRequiredBufferAlignment() const override { return file_->GetRequiredBufferAlignment(); } // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. // Pass aligned buffer when use_direct_io() returns true. Status Write(uint64_t offset, const Slice& data) override { AlignedBuffer buf; Status status; Slice dataToWrite(data); offset += prefixLength_; if (data.size() > 0) { // Encrypt in cloned buffer buf.Alignment(GetRequiredBufferAlignment()); buf.AllocateNewBuffer(data.size()); memmove(buf.BufferStart(), data.data(), data.size()); buf.Size(data.size()); { PERF_TIMER_GUARD(encrypt_data_nanos); status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize()); } if (!status.ok()) { return status; } dataToWrite = Slice(buf.BufferStart(), buf.CurrentSize()); } status = file_->Write(offset, dataToWrite); return status; } // Read up to `n` bytes starting from offset `offset` and store them in // result, provided `scratch` size should be at least `n`. // Returns Status::OK() on success. Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { assert(scratch); offset += prefixLength_; auto status = file_->Read(offset, n, result, scratch); if (!status.ok()) { return status; } { PERF_TIMER_GUARD(decrypt_data_nanos); status = stream_->Decrypt(offset, (char*)result->data(), result->size()); } return status; } Status Flush() override { return file_->Flush(); } Status Sync() override { return file_->Sync(); } Status Fsync() override { return file_->Fsync(); } Status Close() override { return file_->Close(); } }; // EncryptedEnv implements an Env wrapper that adds encryption to files stored on disk. class EncryptedEnv : public EnvWrapper { public: EncryptedEnv(Env* base_env, EncryptionProvider *provider) : EnvWrapper(base_env) { provider_ = provider; } // NewSequentialFile opens a file for sequential reading. Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_reads) { return Status::InvalidArgument(); } // Open file using underlying Env implementation std::unique_ptr underlying; auto status = EnvWrapper::NewSequentialFile(fname, &underlying, options); if (!status.ok()) { return status; } // Read prefix (if needed) AlignedBuffer prefixBuf; Slice prefixSlice; size_t prefixLength = provider_->GetPrefixLength(); if (prefixLength > 0) { // Read prefix prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); prefixBuf.AllocateNewBuffer(prefixLength); status = underlying->Read(prefixLength, &prefixSlice, prefixBuf.BufferStart()); if (!status.ok()) { return status; } prefixBuf.Size(prefixLength); } // Create cipher stream std::unique_ptr stream; status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); if (!status.ok()) { return status; } (*result) = std::unique_ptr(new EncryptedSequentialFile(underlying.release(), stream.release(), prefixLength)); return Status::OK(); } // NewRandomAccessFile opens a file for random read access. Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_reads) { return Status::InvalidArgument(); } // Open file using underlying Env implementation std::unique_ptr underlying; auto status = EnvWrapper::NewRandomAccessFile(fname, &underlying, options); if (!status.ok()) { return status; } // Read prefix (if needed) AlignedBuffer prefixBuf; Slice prefixSlice; size_t prefixLength = provider_->GetPrefixLength(); if (prefixLength > 0) { // Read prefix prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); prefixBuf.AllocateNewBuffer(prefixLength); status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart()); if (!status.ok()) { return status; } prefixBuf.Size(prefixLength); } // Create cipher stream std::unique_ptr stream; status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); if (!status.ok()) { return status; } (*result) = std::unique_ptr(new EncryptedRandomAccessFile(underlying.release(), stream.release(), prefixLength)); return Status::OK(); } // NewWritableFile opens a file for sequential writing. Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_writes) { return Status::InvalidArgument(); } // Open file using underlying Env implementation std::unique_ptr underlying; Status status = EnvWrapper::NewWritableFile(fname, &underlying, options); if (!status.ok()) { return status; } // Initialize & write prefix (if needed) AlignedBuffer prefixBuf; Slice prefixSlice; size_t prefixLength = provider_->GetPrefixLength(); if (prefixLength > 0) { // Initialize prefix prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); prefixBuf.AllocateNewBuffer(prefixLength); provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); prefixBuf.Size(prefixLength); prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize()); // Write prefix status = underlying->Append(prefixSlice); if (!status.ok()) { return status; } } // Create cipher stream std::unique_ptr stream; status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); if (!status.ok()) { return status; } (*result) = std::unique_ptr(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength)); return Status::OK(); } // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a // new file. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. // // The returned file will only be accessed by one thread at a time. Status ReopenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_writes) { return Status::InvalidArgument(); } // Open file using underlying Env implementation std::unique_ptr underlying; Status status = EnvWrapper::ReopenWritableFile(fname, &underlying, options); if (!status.ok()) { return status; } // Initialize & write prefix (if needed) AlignedBuffer prefixBuf; Slice prefixSlice; size_t prefixLength = provider_->GetPrefixLength(); if (prefixLength > 0) { // Initialize prefix prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); prefixBuf.AllocateNewBuffer(prefixLength); provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); prefixBuf.Size(prefixLength); prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize()); // Write prefix status = underlying->Append(prefixSlice); if (!status.ok()) { return status; } } // Create cipher stream std::unique_ptr stream; status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); if (!status.ok()) { return status; } (*result) = std::unique_ptr(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength)); return Status::OK(); } // Reuse an existing file by renaming it and opening it as writable. Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_writes) { return Status::InvalidArgument(); } // Open file using underlying Env implementation std::unique_ptr underlying; Status status = EnvWrapper::ReuseWritableFile(fname, old_fname, &underlying, options); if (!status.ok()) { return status; } // Initialize & write prefix (if needed) AlignedBuffer prefixBuf; Slice prefixSlice; size_t prefixLength = provider_->GetPrefixLength(); if (prefixLength > 0) { // Initialize prefix prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); prefixBuf.AllocateNewBuffer(prefixLength); provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); prefixBuf.Size(prefixLength); prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize()); // Write prefix status = underlying->Append(prefixSlice); if (!status.ok()) { return status; } } // Create cipher stream std::unique_ptr stream; status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); if (!status.ok()) { return status; } (*result) = std::unique_ptr(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength)); return Status::OK(); } // Open `fname` for random read and write, if file doesn't exist the file // will be created. On success, stores a pointer to the new file in // *result and returns OK. On failure returns non-OK. // // The returned file will only be accessed by one thread at a time. Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_reads || options.use_mmap_writes) { return Status::InvalidArgument(); } // Check file exists bool isNewFile = !FileExists(fname).ok(); // Open file using underlying Env implementation std::unique_ptr underlying; Status status = EnvWrapper::NewRandomRWFile(fname, &underlying, options); if (!status.ok()) { return status; } // Read or Initialize & write prefix (if needed) AlignedBuffer prefixBuf; Slice prefixSlice; size_t prefixLength = provider_->GetPrefixLength(); if (prefixLength > 0) { prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); prefixBuf.AllocateNewBuffer(prefixLength); if (!isNewFile) { // File already exists, read prefix status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart()); if (!status.ok()) { return status; } prefixBuf.Size(prefixLength); } else { // File is new, initialize & write prefix provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); prefixBuf.Size(prefixLength); prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize()); // Write prefix status = underlying->Write(0, prefixSlice); if (!status.ok()) { return status; } } } // Create cipher stream std::unique_ptr stream; status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); if (!status.ok()) { return status; } (*result) = std::unique_ptr(new EncryptedRandomRWFile(underlying.release(), stream.release(), prefixLength)); return Status::OK(); } // Store in *result the attributes of the children of the specified directory. // In case the implementation lists the directory prior to iterating the files // and files are concurrently deleted, the deleted files will be omitted from // result. // The name attributes are relative to "dir". // Original contents of *results are dropped. // Returns OK if "dir" exists and "*result" contains its children. // NotFound if "dir" does not exist, the calling process does not have // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered Status GetChildrenFileAttributes( const std::string& dir, std::vector* result) override { auto status = EnvWrapper::GetChildrenFileAttributes(dir, result); if (!status.ok()) { return status; } size_t prefixLength = provider_->GetPrefixLength(); for (auto it = std::begin(*result); it!=std::end(*result); ++it) { assert(it->size_bytes >= prefixLength); it->size_bytes -= prefixLength; } return Status::OK(); } // Store the size of fname in *file_size. Status GetFileSize(const std::string& fname, uint64_t* file_size) override { auto status = EnvWrapper::GetFileSize(fname, file_size); if (!status.ok()) { return status; } size_t prefixLength = provider_->GetPrefixLength(); assert(*file_size >= prefixLength); *file_size -= prefixLength; return Status::OK(); } private: EncryptionProvider *provider_; }; // Returns an Env that encrypts data when stored on disk and decrypts data when // read from disk. Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider) { return new EncryptedEnv(base_env, provider); } // Encrypt one or more (partial) blocks of data at the file offset. // Length of data is given in dataSize. Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t dataSize) { // Calculate block index auto blockSize = BlockSize(); uint64_t blockIndex = fileOffset / blockSize; size_t blockOffset = fileOffset % blockSize; std::unique_ptr blockBuffer; std::string scratch; AllocateScratch(scratch); // Encrypt individual blocks. while (1) { char *block = data; size_t n = std::min(dataSize, blockSize - blockOffset); if (n != blockSize) { // We're not encrypting a full block. // Copy data to blockBuffer if (!blockBuffer.get()) { // Allocate buffer blockBuffer = std::unique_ptr(new char[blockSize]); } block = blockBuffer.get(); // Copy plain data to block buffer memmove(block + blockOffset, data, n); } auto status = EncryptBlock(blockIndex, block, (char*)scratch.data()); if (!status.ok()) { return status; } if (block != data) { // Copy encrypted data back to `data`. memmove(data, block + blockOffset, n); } dataSize -= n; if (dataSize == 0) { return Status::OK(); } data += n; blockOffset = 0; blockIndex++; } } // Decrypt one or more (partial) blocks of data at the file offset. // Length of data is given in dataSize. Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t dataSize) { // Calculate block index auto blockSize = BlockSize(); uint64_t blockIndex = fileOffset / blockSize; size_t blockOffset = fileOffset % blockSize; std::unique_ptr blockBuffer; std::string scratch; AllocateScratch(scratch); // Decrypt individual blocks. while (1) { char *block = data; size_t n = std::min(dataSize, blockSize - blockOffset); if (n != blockSize) { // We're not decrypting a full block. // Copy data to blockBuffer if (!blockBuffer.get()) { // Allocate buffer blockBuffer = std::unique_ptr(new char[blockSize]); } block = blockBuffer.get(); // Copy encrypted data to block buffer memmove(block + blockOffset, data, n); } auto status = DecryptBlock(blockIndex, block, (char*)scratch.data()); if (!status.ok()) { return status; } if (block != data) { // Copy decrypted data back to `data`. memmove(data, block + blockOffset, n); } // Simply decrementing dataSize by n could cause it to underflow, // which will very likely make it read over the original bounds later assert(dataSize >= n); if (dataSize < n) { return Status::Corruption("Cannot decrypt data at given offset"); } dataSize -= n; if (dataSize == 0) { return Status::OK(); } data += n; blockOffset = 0; blockIndex++; } } // Encrypt a block of data. // Length of data is equal to BlockSize(). Status ROT13BlockCipher::Encrypt(char *data) { for (size_t i = 0; i < blockSize_; ++i) { data[i] += 13; } return Status::OK(); } // Decrypt a block of data. // Length of data is equal to BlockSize(). Status ROT13BlockCipher::Decrypt(char *data) { return Encrypt(data); } // Allocate scratch space which is passed to EncryptBlock/DecryptBlock. void CTRCipherStream::AllocateScratch(std::string& scratch) { auto blockSize = cipher_.BlockSize(); scratch.reserve(blockSize); } // Encrypt a block of data at the given block index. // Length of data is equal to BlockSize(); Status CTRCipherStream::EncryptBlock(uint64_t blockIndex, char *data, char* scratch) { // Create nonce + counter auto blockSize = cipher_.BlockSize(); memmove(scratch, iv_.data(), blockSize); EncodeFixed64(scratch, blockIndex + initialCounter_); // Encrypt nonce+counter auto status = cipher_.Encrypt(scratch); if (!status.ok()) { return status; } // XOR data with ciphertext. for (size_t i = 0; i < blockSize; i++) { data[i] = data[i] ^ scratch[i]; } return Status::OK(); } // Decrypt a block of data at the given block index. // Length of data is equal to BlockSize(); Status CTRCipherStream::DecryptBlock(uint64_t blockIndex, char *data, char* scratch) { // For CTR decryption & encryption are the same return EncryptBlock(blockIndex, data, scratch); } // GetPrefixLength returns the length of the prefix that is added to every file // and used for storing encryption options. // For optimal performance, the prefix length should be a multiple of // the page size. size_t CTREncryptionProvider::GetPrefixLength() { return defaultPrefixLength; } // decodeCTRParameters decodes the initial counter & IV from the given // (plain text) prefix. static void decodeCTRParameters(const char *prefix, size_t blockSize, uint64_t &initialCounter, Slice &iv) { // First block contains 64-bit initial counter initialCounter = DecodeFixed64(prefix); // Second block contains IV iv = Slice(prefix + blockSize, blockSize); } // CreateNewPrefix initialized an allocated block of prefix memory // for a new file. Status CTREncryptionProvider::CreateNewPrefix(const std::string& /*fname*/, char* prefix, size_t prefixLength) { // Create & seed rnd. Random rnd((uint32_t)Env::Default()->NowMicros()); // Fill entire prefix block with random values. for (size_t i = 0; i < prefixLength; i++) { prefix[i] = rnd.Uniform(256) & 0xFF; } // Take random data to extract initial counter & IV auto blockSize = cipher_.BlockSize(); uint64_t initialCounter; Slice prefixIV; decodeCTRParameters(prefix, blockSize, initialCounter, prefixIV); // Now populate the rest of the prefix, starting from the third block. PopulateSecretPrefixPart(prefix + (2 * blockSize), prefixLength - (2 * blockSize), blockSize); // Encrypt the prefix, starting from block 2 (leave block 0, 1 with initial // counter & IV unencrypted) CTRCipherStream cipherStream(cipher_, prefixIV.data(), initialCounter); Status status; { PERF_TIMER_GUARD(encrypt_data_nanos); status = cipherStream.Encrypt(0, prefix + (2 * blockSize), prefixLength - (2 * blockSize)); } if (!status.ok()) { return status; } return Status::OK(); } // PopulateSecretPrefixPart initializes the data into a new prefix block // in plain text. // Returns the amount of space (starting from the start of the prefix) // that has been initialized. size_t CTREncryptionProvider::PopulateSecretPrefixPart(char* /*prefix*/, size_t /*prefixLength*/, size_t /*blockSize*/) { // Nothing to do here, put in custom data in override when needed. return 0; } Status CTREncryptionProvider::CreateCipherStream( const std::string& fname, const EnvOptions& options, Slice& prefix, std::unique_ptr* result) { // Read plain text part of prefix. auto blockSize = cipher_.BlockSize(); uint64_t initialCounter; Slice iv; decodeCTRParameters(prefix.data(), blockSize, initialCounter, iv); // If the prefix is smaller than twice the block size, we would below read a // very large chunk of the file (and very likely read over the bounds) assert(prefix.size() >= 2 * blockSize); if (prefix.size() < 2 * blockSize) { return Status::Corruption("Unable to read from file " + fname + ": read attempt would read beyond file bounds"); } // Decrypt the encrypted part of the prefix, starting from block 2 (block 0, 1 // with initial counter & IV are unencrypted) CTRCipherStream cipherStream(cipher_, iv.data(), initialCounter); Status status; { PERF_TIMER_GUARD(decrypt_data_nanos); status = cipherStream.Decrypt(0, (char*)prefix.data() + (2 * blockSize), prefix.size() - (2 * blockSize)); } if (!status.ok()) { return status; } // Create cipher stream return CreateCipherStreamFromPrefix(fname, options, initialCounter, iv, prefix, result); } // CreateCipherStreamFromPrefix creates a block access cipher stream for a file given // given name and options. The given prefix is already decrypted. Status CTREncryptionProvider::CreateCipherStreamFromPrefix( const std::string& /*fname*/, const EnvOptions& /*options*/, uint64_t initialCounter, const Slice& iv, const Slice& /*prefix*/, std::unique_ptr* result) { (*result) = std::unique_ptr( new CTRCipherStream(cipher_, iv.data(), initialCounter)); return Status::OK(); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/env_hdfs.cc000066400000000000000000000471741370372246700161550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "rocksdb/env.h" #include "hdfs/env_hdfs.h" #ifdef USE_HDFS #ifndef ROCKSDB_HDFS_FILE_C #define ROCKSDB_HDFS_FILE_C #include #include #include #include #include #include #include "logging/logging.h" #include "rocksdb/status.h" #include "util/string_util.h" #define HDFS_EXISTS 0 #define HDFS_DOESNT_EXIST -1 #define HDFS_SUCCESS 0 // // This file defines an HDFS environment for rocksdb. It uses the libhdfs // api to access HDFS. All HDFS files created by one instance of rocksdb // will reside on the same HDFS cluster. // namespace ROCKSDB_NAMESPACE { namespace { // Log error message static Status IOError(const std::string& context, int err_number) { return (err_number == ENOSPC) ? Status::NoSpace(context, strerror(err_number)) : (err_number == ENOENT) ? Status::PathNotFound(context, strerror(err_number)) : Status::IOError(context, strerror(err_number)); } // assume that there is one global logger for now. It is not thread-safe, // but need not be because the logger is initialized at db-open time. static Logger* mylog = nullptr; // Used for reading a file from HDFS. It implements both sequential-read // access methods as well as random read access methods. class HdfsReadableFile : virtual public SequentialFile, virtual public RandomAccessFile { private: hdfsFS fileSys_; std::string filename_; hdfsFile hfile_; public: HdfsReadableFile(hdfsFS fileSys, const std::string& fname) : fileSys_(fileSys), filename_(fname), hfile_(nullptr) { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile opening file %s\n", filename_.c_str()); hfile_ = hdfsOpenFile(fileSys_, filename_.c_str(), O_RDONLY, 0, 0, 0); ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile opened file %s hfile_=0x%p\n", filename_.c_str(), hfile_); } virtual ~HdfsReadableFile() { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile closing file %s\n", filename_.c_str()); hdfsCloseFile(fileSys_, hfile_); ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile closed file %s\n", filename_.c_str()); hfile_ = nullptr; } bool isValid() { return hfile_ != nullptr; } // sequential access, read data at current offset in file virtual Status Read(size_t n, Slice* result, char* scratch) { Status s; ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile reading %s %ld\n", filename_.c_str(), n); char* buffer = scratch; size_t total_bytes_read = 0; tSize bytes_read = 0; tSize remaining_bytes = (tSize)n; // Read a total of n bytes repeatedly until we hit error or eof while (remaining_bytes > 0) { bytes_read = hdfsRead(fileSys_, hfile_, buffer, remaining_bytes); if (bytes_read <= 0) { break; } assert(bytes_read <= remaining_bytes); total_bytes_read += bytes_read; remaining_bytes -= bytes_read; buffer += bytes_read; } assert(total_bytes_read <= n); ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile read %s\n", filename_.c_str()); if (bytes_read < 0) { s = IOError(filename_, errno); } else { *result = Slice(scratch, total_bytes_read); } return s; } // random access, read data from specified offset in file virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { Status s; ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile preading %s\n", filename_.c_str()); ssize_t bytes_read = hdfsPread(fileSys_, hfile_, offset, (void*)scratch, (tSize)n); ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile pread %s\n", filename_.c_str()); *result = Slice(scratch, (bytes_read < 0) ? 0 : bytes_read); if (bytes_read < 0) { // An error: return a non-ok status s = IOError(filename_, errno); } return s; } virtual Status Skip(uint64_t n) { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile skip %s\n", filename_.c_str()); // get current offset from file tOffset current = hdfsTell(fileSys_, hfile_); if (current < 0) { return IOError(filename_, errno); } // seek to new offset in file tOffset newoffset = current + n; int val = hdfsSeek(fileSys_, hfile_, newoffset); if (val < 0) { return IOError(filename_, errno); } return Status::OK(); } private: // returns true if we are at the end of file, false otherwise bool feof() { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile feof %s\n", filename_.c_str()); if (hdfsTell(fileSys_, hfile_) == fileSize()) { return true; } return false; } // the current size of the file tOffset fileSize() { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsReadableFile fileSize %s\n", filename_.c_str()); hdfsFileInfo* pFileInfo = hdfsGetPathInfo(fileSys_, filename_.c_str()); tOffset size = 0L; if (pFileInfo != nullptr) { size = pFileInfo->mSize; hdfsFreeFileInfo(pFileInfo, 1); } else { throw HdfsFatalException("fileSize on unknown file " + filename_); } return size; } }; // Appends to an existing file in HDFS. class HdfsWritableFile: public WritableFile { private: hdfsFS fileSys_; std::string filename_; hdfsFile hfile_; public: HdfsWritableFile(hdfsFS fileSys, const std::string& fname, const EnvOptions& options) : WritableFile(options), fileSys_(fileSys), filename_(fname), hfile_(nullptr) { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile opening %s\n", filename_.c_str()); hfile_ = hdfsOpenFile(fileSys_, filename_.c_str(), O_WRONLY, 0, 0, 0); ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile opened %s\n", filename_.c_str()); assert(hfile_ != nullptr); } virtual ~HdfsWritableFile() { if (hfile_ != nullptr) { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile closing %s\n", filename_.c_str()); hdfsCloseFile(fileSys_, hfile_); ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile closed %s\n", filename_.c_str()); hfile_ = nullptr; } } // If the file was successfully created, then this returns true. // Otherwise returns false. bool isValid() { return hfile_ != nullptr; } // The name of the file, mostly needed for debug logging. const std::string& getName() { return filename_; } virtual Status Append(const Slice& data) { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile Append %s\n", filename_.c_str()); const char* src = data.data(); size_t left = data.size(); size_t ret = hdfsWrite(fileSys_, hfile_, src, static_cast(left)); ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile Appended %s\n", filename_.c_str()); if (ret != left) { return IOError(filename_, errno); } return Status::OK(); } virtual Status Flush() { return Status::OK(); } virtual Status Sync() { Status s; ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile Sync %s\n", filename_.c_str()); if (hdfsFlush(fileSys_, hfile_) == -1) { return IOError(filename_, errno); } if (hdfsHSync(fileSys_, hfile_) == -1) { return IOError(filename_, errno); } ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile Synced %s\n", filename_.c_str()); return Status::OK(); } // This is used by HdfsLogger to write data to the debug log file virtual Status Append(const char* src, size_t size) { if (hdfsWrite(fileSys_, hfile_, src, static_cast(size)) != static_cast(size)) { return IOError(filename_, errno); } return Status::OK(); } virtual Status Close() { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile closing %s\n", filename_.c_str()); if (hdfsCloseFile(fileSys_, hfile_) != 0) { return IOError(filename_, errno); } ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsWritableFile closed %s\n", filename_.c_str()); hfile_ = nullptr; return Status::OK(); } }; // The object that implements the debug logs to reside in HDFS. class HdfsLogger : public Logger { private: HdfsWritableFile* file_; uint64_t (*gettid_)(); // Return the thread id for the current thread Status HdfsCloseHelper() { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsLogger closed %s\n", file_->getName().c_str()); if (mylog != nullptr && mylog == this) { mylog = nullptr; } return Status::OK(); } protected: virtual Status CloseImpl() override { return HdfsCloseHelper(); } public: HdfsLogger(HdfsWritableFile* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { ROCKS_LOG_DEBUG(mylog, "[hdfs] HdfsLogger opened %s\n", file_->getName().c_str()); } ~HdfsLogger() override { if (!closed_) { closed_ = true; HdfsCloseHelper(); } } using Logger::Logv; void Logv(const char* format, va_list ap) override { const uint64_t thread_id = (*gettid_)(); // We try twice: the first time with a fixed-size stack allocated buffer, // and the second time with a much larger dynamically allocated buffer. char buffer[500]; for (int iter = 0; iter < 2; iter++) { char* base; int bufsize; if (iter == 0) { bufsize = sizeof(buffer); base = buffer; } else { bufsize = 30000; base = new char[bufsize]; } char* p = base; char* limit = base + bufsize; struct timeval now_tv; gettimeofday(&now_tv, nullptr); const time_t seconds = now_tv.tv_sec; struct tm t; localtime_r(&seconds, &t); p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(now_tv.tv_usec), static_cast(thread_id)); // Print the message if (p < limit) { va_list backup_ap; va_copy(backup_ap, ap); p += vsnprintf(p, limit - p, format, backup_ap); va_end(backup_ap); } // Truncate to available space if necessary if (p >= limit) { if (iter == 0) { continue; // Try again with larger buffer } else { p = limit - 1; } } // Add newline if necessary if (p == base || p[-1] != '\n') { *p++ = '\n'; } assert(p <= limit); file_->Append(base, p-base); file_->Flush(); if (base != buffer) { delete[] base; } break; } } }; } // namespace // Finally, the hdfs environment const std::string HdfsEnv::kProto = "hdfs://"; const std::string HdfsEnv::pathsep = "/"; // open a file for sequential reading Status HdfsEnv::NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& /*options*/) { result->reset(); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); if (f == nullptr || !f->isValid()) { delete f; *result = nullptr; return IOError(fname, errno); } result->reset(dynamic_cast(f)); return Status::OK(); } // open a file for random reading Status HdfsEnv::NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& /*options*/) { result->reset(); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); if (f == nullptr || !f->isValid()) { delete f; *result = nullptr; return IOError(fname, errno); } result->reset(dynamic_cast(f)); return Status::OK(); } // create a new file for writing Status HdfsEnv::NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) { result->reset(); Status s; HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname, options); if (f == nullptr || !f->isValid()) { delete f; *result = nullptr; return IOError(fname, errno); } result->reset(dynamic_cast(f)); return Status::OK(); } class HdfsDirectory : public Directory { public: explicit HdfsDirectory(int fd) : fd_(fd) {} ~HdfsDirectory() {} Status Fsync() override { return Status::OK(); } int GetFd() const { return fd_; } private: int fd_; }; Status HdfsEnv::NewDirectory(const std::string& name, std::unique_ptr* result) { int value = hdfsExists(fileSys_, name.c_str()); switch (value) { case HDFS_EXISTS: result->reset(new HdfsDirectory(0)); return Status::OK(); default: // fail if the directory doesn't exist ROCKS_LOG_FATAL(mylog, "NewDirectory hdfsExists call failed"); throw HdfsFatalException("hdfsExists call failed with error " + ToString(value) + " on path " + name + ".\n"); } } Status HdfsEnv::FileExists(const std::string& fname) { int value = hdfsExists(fileSys_, fname.c_str()); switch (value) { case HDFS_EXISTS: return Status::OK(); case HDFS_DOESNT_EXIST: return Status::NotFound(); default: // anything else should be an error ROCKS_LOG_FATAL(mylog, "FileExists hdfsExists call failed"); return Status::IOError("hdfsExists call failed with error " + ToString(value) + " on path " + fname + ".\n"); } } Status HdfsEnv::GetChildren(const std::string& path, std::vector* result) { int value = hdfsExists(fileSys_, path.c_str()); switch (value) { case HDFS_EXISTS: { // directory exists int numEntries = 0; hdfsFileInfo* pHdfsFileInfo = 0; pHdfsFileInfo = hdfsListDirectory(fileSys_, path.c_str(), &numEntries); if (numEntries >= 0) { for(int i = 0; i < numEntries; i++) { std::string pathname(pHdfsFileInfo[i].mName); size_t pos = pathname.rfind("/"); if (std::string::npos != pos) { result->push_back(pathname.substr(pos + 1)); } } if (pHdfsFileInfo != nullptr) { hdfsFreeFileInfo(pHdfsFileInfo, numEntries); } } else { // numEntries < 0 indicates error ROCKS_LOG_FATAL(mylog, "hdfsListDirectory call failed with error "); throw HdfsFatalException( "hdfsListDirectory call failed negative error.\n"); } break; } case HDFS_DOESNT_EXIST: // directory does not exist, exit return Status::NotFound(); default: // anything else should be an error ROCKS_LOG_FATAL(mylog, "GetChildren hdfsExists call failed"); throw HdfsFatalException("hdfsExists call failed with error " + ToString(value) + ".\n"); } return Status::OK(); } Status HdfsEnv::DeleteFile(const std::string& fname) { if (hdfsDelete(fileSys_, fname.c_str(), 1) == 0) { return Status::OK(); } return IOError(fname, errno); }; Status HdfsEnv::CreateDir(const std::string& name) { if (hdfsCreateDirectory(fileSys_, name.c_str()) == 0) { return Status::OK(); } return IOError(name, errno); }; Status HdfsEnv::CreateDirIfMissing(const std::string& name) { const int value = hdfsExists(fileSys_, name.c_str()); // Not atomic. state might change b/w hdfsExists and CreateDir. switch (value) { case HDFS_EXISTS: return Status::OK(); case HDFS_DOESNT_EXIST: return CreateDir(name); default: // anything else should be an error ROCKS_LOG_FATAL(mylog, "CreateDirIfMissing hdfsExists call failed"); throw HdfsFatalException("hdfsExists call failed with error " + ToString(value) + ".\n"); } }; Status HdfsEnv::DeleteDir(const std::string& name) { return DeleteFile(name); }; Status HdfsEnv::GetFileSize(const std::string& fname, uint64_t* size) { *size = 0L; hdfsFileInfo* pFileInfo = hdfsGetPathInfo(fileSys_, fname.c_str()); if (pFileInfo != nullptr) { *size = pFileInfo->mSize; hdfsFreeFileInfo(pFileInfo, 1); return Status::OK(); } return IOError(fname, errno); } Status HdfsEnv::GetFileModificationTime(const std::string& fname, uint64_t* time) { hdfsFileInfo* pFileInfo = hdfsGetPathInfo(fileSys_, fname.c_str()); if (pFileInfo != nullptr) { *time = static_cast(pFileInfo->mLastMod); hdfsFreeFileInfo(pFileInfo, 1); return Status::OK(); } return IOError(fname, errno); } // The rename is not atomic. HDFS does not allow a renaming if the // target already exists. So, we delete the target before attempting the // rename. Status HdfsEnv::RenameFile(const std::string& src, const std::string& target) { hdfsDelete(fileSys_, target.c_str(), 1); if (hdfsRename(fileSys_, src.c_str(), target.c_str()) == 0) { return Status::OK(); } return IOError(src, errno); } Status HdfsEnv::LockFile(const std::string& /*fname*/, FileLock** lock) { // there isn's a very good way to atomically check and create // a file via libhdfs *lock = nullptr; return Status::OK(); } Status HdfsEnv::UnlockFile(FileLock* /*lock*/) { return Status::OK(); } Status HdfsEnv::NewLogger(const std::string& fname, std::shared_ptr* result) { // EnvOptions is used exclusively for its `strict_bytes_per_sync` value. That // option is only intended for WAL/flush/compaction writes, so turn it off in // the logger. EnvOptions options; options.strict_bytes_per_sync = false; HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname, options); if (f == nullptr || !f->isValid()) { delete f; *result = nullptr; return IOError(fname, errno); } HdfsLogger* h = new HdfsLogger(f, &HdfsEnv::gettid); result->reset(h); if (mylog == nullptr) { // mylog = h; // uncomment this for detailed logging } return Status::OK(); } Status HdfsEnv::IsDirectory(const std::string& path, bool* is_dir) { hdfsFileInfo* pFileInfo = hdfsGetPathInfo(fileSys_, path.c_str()); if (pFileInfo != nullptr) { if (is_dir != nullptr) { *is_dir = (pFileInfo->mKind == kObjectKindDirectory); } hdfsFreeFileInfo(pFileInfo, 1); return Status::OK(); } return IOError(path, errno); } // The factory method for creating an HDFS Env Status NewHdfsEnv(Env** hdfs_env, const std::string& fsname) { *hdfs_env = new HdfsEnv(fsname); return Status::OK(); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_HDFS_FILE_C #else // USE_HDFS // dummy placeholders used when HDFS is not available namespace ROCKSDB_NAMESPACE { Status HdfsEnv::NewSequentialFile(const std::string& /*fname*/, std::unique_ptr* /*result*/, const EnvOptions& /*options*/) { return Status::NotSupported("Not compiled with hdfs support"); } Status NewHdfsEnv(Env** /*hdfs_env*/, const std::string& /*fsname*/) { return Status::NotSupported("Not compiled with hdfs support"); } } // namespace ROCKSDB_NAMESPACE #endif rocksdb-6.11.4/env/env_posix.cc000066400000000000000000000403561370372246700163660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors #include #ifndef ROCKSDB_NO_DYNAMIC_EXTENSION #include #endif #include #include #if defined(OS_LINUX) #include #endif #if defined(ROCKSDB_IOURING_PRESENT) #include #endif #include #include #include #include #include #include #include #include #if defined(OS_LINUX) || defined(OS_SOLARIS) || defined(OS_ANDROID) #include #include #include #endif #include #include #include #if defined(ROCKSDB_IOURING_PRESENT) #include #endif #include #include // Get nano time includes #if defined(OS_LINUX) || defined(OS_FREEBSD) #elif defined(__MACH__) #include #include #include #else #include #endif #include #include #include #include "env/composite_env_wrapper.h" #include "env/io_posix.h" #include "logging/logging.h" #include "logging/posix_logger.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/thread_status_updater.h" #include "port/port.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "test_util/sync_point.h" #include "util/coding.h" #include "util/compression_context_cache.h" #include "util/random.h" #include "util/string_util.h" #include "util/thread_local.h" #include "util/threadpool_imp.h" #if !defined(TMPFS_MAGIC) #define TMPFS_MAGIC 0x01021994 #endif #if !defined(XFS_SUPER_MAGIC) #define XFS_SUPER_MAGIC 0x58465342 #endif #if !defined(EXT4_SUPER_MAGIC) #define EXT4_SUPER_MAGIC 0xEF53 #endif namespace ROCKSDB_NAMESPACE { #if defined(OS_WIN) static const std::string kSharedLibExt = ".dll"; static const char kPathSeparator = ';'; #else static const char kPathSeparator = ':'; #if defined(OS_MACOSX) static const std::string kSharedLibExt = ".dylib"; #else static const std::string kSharedLibExt = ".so"; #endif #endif namespace { ThreadStatusUpdater* CreateThreadStatusUpdater() { return new ThreadStatusUpdater(); } #ifndef ROCKSDB_NO_DYNAMIC_EXTENSION class PosixDynamicLibrary : public DynamicLibrary { public: PosixDynamicLibrary(const std::string& name, void* handle) : name_(name), handle_(handle) {} ~PosixDynamicLibrary() override { dlclose(handle_); } Status LoadSymbol(const std::string& sym_name, void** func) override { assert(nullptr != func); dlerror(); // Clear any old error *func = dlsym(handle_, sym_name.c_str()); if (*func != nullptr) { return Status::OK(); } else { char* err = dlerror(); return Status::NotFound("Error finding symbol: " + sym_name, err); } } const char* Name() const override { return name_.c_str(); } private: std::string name_; void* handle_; }; #endif // !ROCKSDB_NO_DYNAMIC_EXTENSION class PosixEnv : public CompositeEnvWrapper { public: // This constructor is for constructing non-default Envs, mainly by // NewCompositeEnv(). It allows new instances to share the same // threadpool and other resources as the default Env, while allowing // a non-default FileSystem implementation PosixEnv(const PosixEnv* default_env, std::shared_ptr fs); ~PosixEnv() override { if (this == Env::Default()) { for (const auto tid : threads_to_join_) { pthread_join(tid, nullptr); } for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) { thread_pools_[pool_id].JoinAllThreads(); } // Do not delete the thread_status_updater_ in order to avoid the // free after use when Env::Default() is destructed while some other // child threads are still trying to update thread status. All // PosixEnv instances use the same thread_status_updater_, so never // explicitly delete it. } } void SetFD_CLOEXEC(int fd, const EnvOptions* options) { if ((options == nullptr || options->set_fd_cloexec) && fd > 0) { fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); } } #ifndef ROCKSDB_NO_DYNAMIC_EXTENSION // Loads the named library into the result. // If the input name is empty, the current executable is loaded // On *nix systems, a "lib" prefix is added to the name if one is not supplied // Comparably, the appropriate shared library extension is added to the name // if not supplied. If search_path is not specified, the shared library will // be loaded using the default path (LD_LIBRARY_PATH) If search_path is // specified, the shared library will be searched for in the directories // provided by the search path Status LoadLibrary(const std::string& name, const std::string& path, std::shared_ptr* result) override { Status status; assert(result != nullptr); if (name.empty()) { void* hndl = dlopen(NULL, RTLD_NOW); if (hndl != nullptr) { result->reset(new PosixDynamicLibrary(name, hndl)); return Status::OK(); } } else { std::string library_name = name; if (library_name.find(kSharedLibExt) == std::string::npos) { library_name = library_name + kSharedLibExt; } #if !defined(OS_WIN) if (library_name.find('/') == std::string::npos && library_name.compare(0, 3, "lib") != 0) { library_name = "lib" + library_name; } #endif if (path.empty()) { void* hndl = dlopen(library_name.c_str(), RTLD_NOW); if (hndl != nullptr) { result->reset(new PosixDynamicLibrary(library_name, hndl)); return Status::OK(); } } else { std::string local_path; std::stringstream ss(path); while (getline(ss, local_path, kPathSeparator)) { if (!path.empty()) { std::string full_name = local_path + "/" + library_name; void* hndl = dlopen(full_name.c_str(), RTLD_NOW); if (hndl != nullptr) { result->reset(new PosixDynamicLibrary(full_name, hndl)); return Status::OK(); } } } } } return Status::IOError( IOErrorMsg("Failed to open shared library: xs", name), dlerror()); } #endif // !ROCKSDB_NO_DYNAMIC_EXTENSION void Schedule(void (*function)(void* arg1), void* arg, Priority pri = LOW, void* tag = nullptr, void (*unschedFunction)(void* arg) = nullptr) override; int UnSchedule(void* arg, Priority pri) override; void StartThread(void (*function)(void* arg), void* arg) override; void WaitForJoin() override; unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override; Status GetThreadList(std::vector* thread_list) override { assert(thread_status_updater_); return thread_status_updater_->GetThreadList(thread_list); } static uint64_t gettid(pthread_t tid) { uint64_t thread_id = 0; memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid))); return thread_id; } static uint64_t gettid() { pthread_t tid = pthread_self(); return gettid(tid); } uint64_t GetThreadID() const override { return gettid(pthread_self()); } uint64_t NowMicros() override { struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; } uint64_t NowNanos() override { #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_AIX) struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; #elif defined(OS_SOLARIS) return gethrtime(); #elif defined(__MACH__) clock_serv_t cclock; mach_timespec_t ts; host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); clock_get_time(cclock, &ts); mach_port_deallocate(mach_task_self(), cclock); return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; #else return std::chrono::duration_cast( std::chrono::steady_clock::now().time_since_epoch()).count(); #endif } uint64_t NowCPUNanos() override { #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_AIX) || \ (defined(__MACH__) && defined(__MAC_10_12)) struct timespec ts; clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; #endif return 0; } void SleepForMicroseconds(int micros) override { usleep(micros); } Status GetHostName(char* name, uint64_t len) override { int ret = gethostname(name, static_cast(len)); if (ret < 0) { if (errno == EFAULT || errno == EINVAL) { return Status::InvalidArgument(strerror(errno)); } else { return IOError("GetHostName", name, errno); } } return Status::OK(); } Status GetCurrentTime(int64_t* unix_time) override { time_t ret = time(nullptr); if (ret == (time_t) -1) { return IOError("GetCurrentTime", "", errno); } *unix_time = (int64_t) ret; return Status::OK(); } ThreadStatusUpdater* GetThreadStatusUpdater() const override { return Env::GetThreadStatusUpdater(); } std::string GenerateUniqueId() override { return Env::GenerateUniqueId(); } // Allow increasing the number of worker threads. void SetBackgroundThreads(int num, Priority pri) override { assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH); thread_pools_[pri].SetBackgroundThreads(num); } int GetBackgroundThreads(Priority pri) override { assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH); return thread_pools_[pri].GetBackgroundThreads(); } Status SetAllowNonOwnerAccess(bool allow_non_owner_access) override { allow_non_owner_access_ = allow_non_owner_access; return Status::OK(); } // Allow increasing the number of worker threads. void IncBackgroundThreadsIfNeeded(int num, Priority pri) override { assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH); thread_pools_[pri].IncBackgroundThreadsIfNeeded(num); } void LowerThreadPoolIOPriority(Priority pool = LOW) override { assert(pool >= Priority::BOTTOM && pool <= Priority::HIGH); #ifdef OS_LINUX thread_pools_[pool].LowerIOPriority(); #else (void)pool; #endif } void LowerThreadPoolCPUPriority(Priority pool = LOW) override { assert(pool >= Priority::BOTTOM && pool <= Priority::HIGH); #ifdef OS_LINUX thread_pools_[pool].LowerCPUPriority(); #else (void)pool; #endif } std::string TimeToString(uint64_t secondsSince1970) override { const time_t seconds = (time_t)secondsSince1970; struct tm t; int maxsize = 64; std::string dummy; dummy.reserve(maxsize); dummy.resize(maxsize); char* p = &dummy[0]; localtime_r(&seconds, &t); snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec); return dummy; } private: friend Env* Env::Default(); // Constructs the default Env, a singleton PosixEnv(); // The below 4 members are only used by the default PosixEnv instance. // Non-default instances simply maintain references to the backing // members in te default instance std::vector thread_pools_storage_; pthread_mutex_t mu_storage_; std::vector threads_to_join_storage_; bool allow_non_owner_access_storage_; std::vector& thread_pools_; pthread_mutex_t& mu_; std::vector& threads_to_join_; // If true, allow non owner read access for db files. Otherwise, non-owner // has no access to db files. bool& allow_non_owner_access_; }; PosixEnv::PosixEnv() : CompositeEnvWrapper(this, FileSystem::Default()), thread_pools_storage_(Priority::TOTAL), allow_non_owner_access_storage_(true), thread_pools_(thread_pools_storage_), mu_(mu_storage_), threads_to_join_(threads_to_join_storage_), allow_non_owner_access_(allow_non_owner_access_storage_) { ThreadPoolImpl::PthreadCall("mutex_init", pthread_mutex_init(&mu_, nullptr)); for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) { thread_pools_[pool_id].SetThreadPriority( static_cast(pool_id)); // This allows later initializing the thread-local-env of each thread. thread_pools_[pool_id].SetHostEnv(this); } thread_status_updater_ = CreateThreadStatusUpdater(); } PosixEnv::PosixEnv(const PosixEnv* default_env, std::shared_ptr fs) : CompositeEnvWrapper(this, fs), thread_pools_(default_env->thread_pools_), mu_(default_env->mu_), threads_to_join_(default_env->threads_to_join_), allow_non_owner_access_(default_env->allow_non_owner_access_) { thread_status_updater_ = default_env->thread_status_updater_; } void PosixEnv::Schedule(void (*function)(void* arg1), void* arg, Priority pri, void* tag, void (*unschedFunction)(void* arg)) { assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH); thread_pools_[pri].Schedule(function, arg, tag, unschedFunction); } int PosixEnv::UnSchedule(void* arg, Priority pri) { return thread_pools_[pri].UnSchedule(arg); } unsigned int PosixEnv::GetThreadPoolQueueLen(Priority pri) const { assert(pri >= Priority::BOTTOM && pri <= Priority::HIGH); return thread_pools_[pri].GetQueueLen(); } struct StartThreadState { void (*user_function)(void*); void* arg; }; static void* StartThreadWrapper(void* arg) { StartThreadState* state = reinterpret_cast(arg); state->user_function(state->arg); delete state; return nullptr; } void PosixEnv::StartThread(void (*function)(void* arg), void* arg) { pthread_t t; StartThreadState* state = new StartThreadState; state->user_function = function; state->arg = arg; ThreadPoolImpl::PthreadCall( "start thread", pthread_create(&t, nullptr, &StartThreadWrapper, state)); ThreadPoolImpl::PthreadCall("lock", pthread_mutex_lock(&mu_)); threads_to_join_.push_back(t); ThreadPoolImpl::PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } void PosixEnv::WaitForJoin() { for (const auto tid : threads_to_join_) { pthread_join(tid, nullptr); } threads_to_join_.clear(); } } // namespace std::string Env::GenerateUniqueId() { std::string uuid_file = "/proc/sys/kernel/random/uuid"; Status s = FileExists(uuid_file); if (s.ok()) { std::string uuid; s = ReadFileToString(this, uuid_file, &uuid); if (s.ok()) { return uuid; } } // Could not read uuid_file - generate uuid using "nanos-random" Random64 r(time(nullptr)); uint64_t random_uuid_portion = r.Uniform(std::numeric_limits::max()); uint64_t nanos_uuid_portion = NowNanos(); char uuid2[200]; snprintf(uuid2, 200, "%lx-%lx", (unsigned long)nanos_uuid_portion, (unsigned long)random_uuid_portion); return uuid2; } // // Default Posix Env // Env* Env::Default() { // The following function call initializes the singletons of ThreadLocalPtr // right before the static default_env. This guarantees default_env will // always being destructed before the ThreadLocalPtr singletons get // destructed as C++ guarantees that the destructions of static variables // is in the reverse order of their constructions. // // Since static members are destructed in the reverse order // of their construction, having this call here guarantees that // the destructor of static PosixEnv will go first, then the // the singletons of ThreadLocalPtr. ThreadLocalPtr::InitSingletons(); CompressionContextCache::InitSingleton(); INIT_SYNC_POINT_SINGLETONS(); static PosixEnv default_env; return &default_env; } std::unique_ptr NewCompositeEnv(std::shared_ptr fs) { PosixEnv* default_env = static_cast(Env::Default()); return std::unique_ptr(new PosixEnv(default_env, fs)); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/env_test.cc000066400000000000000000002050201370372246700161720ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef OS_WIN #include #endif #include #include #include #include #include #ifdef OS_LINUX #include #include #include #include #include #endif #ifdef ROCKSDB_FALLOCATE_PRESENT #include #endif #include "env/env_chroot.h" #include "logging/log_buffer.h" #include "port/malloc.h" #include "port/port.h" #include "rocksdb/env.h" #include "test_util/fault_injection_test_env.h" #include "test_util/fault_injection_test_fs.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/mutexlock.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { using port::kPageSize; static const int kDelayMicros = 100000; struct Deleter { explicit Deleter(void (*fn)(void*)) : fn_(fn) {} void operator()(void* ptr) { assert(fn_); assert(ptr); (*fn_)(ptr); } void (*fn_)(void*); }; std::unique_ptr NewAligned(const size_t size, const char ch) { char* ptr = nullptr; #ifdef OS_WIN if (nullptr == (ptr = reinterpret_cast(_aligned_malloc(size, kPageSize)))) { return std::unique_ptr(nullptr, Deleter(_aligned_free)); } std::unique_ptr uptr(ptr, Deleter(_aligned_free)); #else if (posix_memalign(reinterpret_cast(&ptr), kPageSize, size) != 0) { return std::unique_ptr(nullptr, Deleter(free)); } std::unique_ptr uptr(ptr, Deleter(free)); #endif memset(uptr.get(), ch, size); return uptr; } class EnvPosixTest : public testing::Test { private: port::Mutex mu_; std::string events_; public: Env* env_; bool direct_io_; EnvPosixTest() : env_(Env::Default()), direct_io_(false) {} }; class EnvPosixTestWithParam : public EnvPosixTest, public ::testing::WithParamInterface> { public: EnvPosixTestWithParam() { std::pair param_pair = GetParam(); env_ = param_pair.first; direct_io_ = param_pair.second; } void WaitThreadPoolsEmpty() { // Wait until the thread pools are empty. while (env_->GetThreadPoolQueueLen(Env::Priority::LOW) != 0) { Env::Default()->SleepForMicroseconds(kDelayMicros); } while (env_->GetThreadPoolQueueLen(Env::Priority::HIGH) != 0) { Env::Default()->SleepForMicroseconds(kDelayMicros); } } ~EnvPosixTestWithParam() override { WaitThreadPoolsEmpty(); } }; static void SetBool(void* ptr) { reinterpret_cast*>(ptr)->store(true); } TEST_F(EnvPosixTest, DISABLED_RunImmediately) { for (int pri = Env::BOTTOM; pri < Env::TOTAL; ++pri) { std::atomic called(false); env_->SetBackgroundThreads(1, static_cast(pri)); env_->Schedule(&SetBool, &called, static_cast(pri)); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_TRUE(called.load()); } } TEST_F(EnvPosixTest, RunEventually) { std::atomic called(false); env_->StartThread(&SetBool, &called); env_->WaitForJoin(); ASSERT_TRUE(called.load()); } #ifdef OS_WIN TEST_F(EnvPosixTest, AreFilesSame) { { bool tmp; if (env_->AreFilesSame("", "", &tmp).IsNotSupported()) { fprintf(stderr, "skipping EnvBasicTestWithParam.AreFilesSame due to " "unsupported Env::AreFilesSame\n"); return; } } const EnvOptions soptions; auto* env = Env::Default(); std::string same_file_name = test::PerThreadDBPath(env, "same_file"); std::string same_file_link_name = same_file_name + "_link"; std::unique_ptr same_file; ASSERT_OK(env->NewWritableFile(same_file_name, &same_file, soptions)); same_file->Append("random_data"); ASSERT_OK(same_file->Flush()); same_file.reset(); ASSERT_OK(env->LinkFile(same_file_name, same_file_link_name)); bool result = false; ASSERT_OK(env->AreFilesSame(same_file_name, same_file_link_name, &result)); ASSERT_TRUE(result); } #endif #ifdef OS_LINUX TEST_F(EnvPosixTest, DISABLED_FilePermission) { // Only works for Linux environment if (env_ == Env::Default()) { EnvOptions soptions; std::vector fileNames{ test::PerThreadDBPath(env_, "testfile"), test::PerThreadDBPath(env_, "testfile1")}; std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fileNames[0], &wfile, soptions)); ASSERT_OK(env_->NewWritableFile(fileNames[1], &wfile, soptions)); wfile.reset(); std::unique_ptr rwfile; ASSERT_OK(env_->NewRandomRWFile(fileNames[1], &rwfile, soptions)); struct stat sb; for (const auto& filename : fileNames) { if (::stat(filename.c_str(), &sb) == 0) { ASSERT_EQ(sb.st_mode & 0777, 0644); } env_->DeleteFile(filename); } env_->SetAllowNonOwnerAccess(false); ASSERT_OK(env_->NewWritableFile(fileNames[0], &wfile, soptions)); ASSERT_OK(env_->NewWritableFile(fileNames[1], &wfile, soptions)); wfile.reset(); ASSERT_OK(env_->NewRandomRWFile(fileNames[1], &rwfile, soptions)); for (const auto& filename : fileNames) { if (::stat(filename.c_str(), &sb) == 0) { ASSERT_EQ(sb.st_mode & 0777, 0600); } env_->DeleteFile(filename); } } } #endif TEST_F(EnvPosixTest, MemoryMappedFileBuffer) { const int kFileBytes = 1 << 15; // 32 KB std::string expected_data; std::string fname = test::PerThreadDBPath(env_, "testfile"); { std::unique_ptr wfile; const EnvOptions soptions; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); Random rnd(301); test::RandomString(&rnd, kFileBytes, &expected_data); ASSERT_OK(wfile->Append(expected_data)); } std::unique_ptr mmap_buffer; Status status = env_->NewMemoryMappedFileBuffer(fname, &mmap_buffer); // it should be supported at least on linux #if !defined(OS_LINUX) if (status.IsNotSupported()) { fprintf(stderr, "skipping EnvPosixTest.MemoryMappedFileBuffer due to " "unsupported Env::NewMemoryMappedFileBuffer\n"); return; } #endif // !defined(OS_LINUX) ASSERT_OK(status); ASSERT_NE(nullptr, mmap_buffer.get()); ASSERT_NE(nullptr, mmap_buffer->GetBase()); ASSERT_EQ(kFileBytes, mmap_buffer->GetLen()); std::string actual_data(reinterpret_cast(mmap_buffer->GetBase()), mmap_buffer->GetLen()); ASSERT_EQ(expected_data, actual_data); } #ifndef ROCKSDB_NO_DYNAMIC_EXTENSION TEST_F(EnvPosixTest, LoadRocksDBLibrary) { std::shared_ptr library; std::function function; Status status = env_->LoadLibrary("no-such-library", "", &library); ASSERT_NOK(status); ASSERT_EQ(nullptr, library.get()); status = env_->LoadLibrary("rocksdb", "", &library); if (status.ok()) { // If we have can find a rocksdb shared library ASSERT_NE(nullptr, library.get()); ASSERT_OK(library->LoadFunction("rocksdb_create_default_env", &function)); // from C definition ASSERT_NE(nullptr, function); ASSERT_NOK(library->LoadFunction("no-such-method", &function)); ASSERT_EQ(nullptr, function); ASSERT_OK(env_->LoadLibrary(library->Name(), "", &library)); } else { ASSERT_EQ(nullptr, library.get()); } } #endif // !ROCKSDB_NO_DYNAMIC_EXTENSION #if !defined(OS_WIN) && !defined(ROCKSDB_NO_DYNAMIC_EXTENSION) TEST_F(EnvPosixTest, LoadRocksDBLibraryWithSearchPath) { std::shared_ptr library; std::function function; ASSERT_NOK(env_->LoadLibrary("no-such-library", "/tmp", &library)); ASSERT_EQ(nullptr, library.get()); ASSERT_NOK(env_->LoadLibrary("dl", "/tmp", &library)); ASSERT_EQ(nullptr, library.get()); Status status = env_->LoadLibrary("rocksdb", "/tmp:./", &library); if (status.ok()) { ASSERT_NE(nullptr, library.get()); ASSERT_OK(env_->LoadLibrary(library->Name(), "", &library)); } char buff[1024]; std::string cwd = getcwd(buff, sizeof(buff)); status = env_->LoadLibrary("rocksdb", "/tmp:" + cwd, &library); if (status.ok()) { ASSERT_NE(nullptr, library.get()); ASSERT_OK(env_->LoadLibrary(library->Name(), "", &library)); } } #endif // !OS_WIN && !ROCKSDB_NO_DYNAMIC_EXTENSION TEST_P(EnvPosixTestWithParam, UnSchedule) { std::atomic called(false); env_->SetBackgroundThreads(1, Env::LOW); /* Block the low priority queue */ test::SleepingBackgroundTask sleeping_task, sleeping_task1; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::LOW); /* Schedule another task */ env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task1, Env::Priority::LOW, &sleeping_task1); /* Remove it with a different tag */ ASSERT_EQ(0, env_->UnSchedule(&called, Env::Priority::LOW)); /* Remove it from the queue with the right tag */ ASSERT_EQ(1, env_->UnSchedule(&sleeping_task1, Env::Priority::LOW)); // Unblock background thread sleeping_task.WakeUp(); /* Schedule another task */ env_->Schedule(&SetBool, &called); for (int i = 0; i < kDelayMicros; i++) { if (called.load()) { break; } Env::Default()->SleepForMicroseconds(1); } ASSERT_TRUE(called.load()); ASSERT_TRUE(!sleeping_task.IsSleeping() && !sleeping_task1.IsSleeping()); WaitThreadPoolsEmpty(); } // This tests assumes that the last scheduled // task will run last. In fact, in the allotted // sleeping time nothing may actually run or they may // run in any order. The purpose of the test is unclear. #ifndef OS_WIN TEST_P(EnvPosixTestWithParam, RunMany) { env_->SetBackgroundThreads(1, Env::LOW); std::atomic last_id(0); struct CB { std::atomic* last_id_ptr; // Pointer to shared slot int id; // Order# for the execution of this callback CB(std::atomic* p, int i) : last_id_ptr(p), id(i) {} static void Run(void* v) { CB* cb = reinterpret_cast(v); int cur = cb->last_id_ptr->load(); ASSERT_EQ(cb->id - 1, cur); cb->last_id_ptr->store(cb->id); } }; // Schedule in different order than start time CB cb1(&last_id, 1); CB cb2(&last_id, 2); CB cb3(&last_id, 3); CB cb4(&last_id, 4); env_->Schedule(&CB::Run, &cb1); env_->Schedule(&CB::Run, &cb2); env_->Schedule(&CB::Run, &cb3); env_->Schedule(&CB::Run, &cb4); Env::Default()->SleepForMicroseconds(kDelayMicros); int cur = last_id.load(std::memory_order_acquire); ASSERT_EQ(4, cur); WaitThreadPoolsEmpty(); } #endif struct State { port::Mutex mu; int val; int num_running; }; static void ThreadBody(void* arg) { State* s = reinterpret_cast(arg); s->mu.Lock(); s->val += 1; s->num_running -= 1; s->mu.Unlock(); } TEST_P(EnvPosixTestWithParam, StartThread) { State state; state.val = 0; state.num_running = 3; for (int i = 0; i < 3; i++) { env_->StartThread(&ThreadBody, &state); } while (true) { state.mu.Lock(); int num = state.num_running; state.mu.Unlock(); if (num == 0) { break; } Env::Default()->SleepForMicroseconds(kDelayMicros); } ASSERT_EQ(state.val, 3); WaitThreadPoolsEmpty(); } TEST_P(EnvPosixTestWithParam, TwoPools) { // Data structures to signal tasks to run. port::Mutex mutex; port::CondVar cv(&mutex); bool should_start = false; class CB { public: CB(const std::string& pool_name, int pool_size, port::Mutex* trigger_mu, port::CondVar* trigger_cv, bool* _should_start) : mu_(), num_running_(0), num_finished_(0), pool_size_(pool_size), pool_name_(pool_name), trigger_mu_(trigger_mu), trigger_cv_(trigger_cv), should_start_(_should_start) {} static void Run(void* v) { CB* cb = reinterpret_cast(v); cb->Run(); } void Run() { { MutexLock l(&mu_); num_running_++; // make sure we don't have more than pool_size_ jobs running. ASSERT_LE(num_running_, pool_size_.load()); } { MutexLock l(trigger_mu_); while (!(*should_start_)) { trigger_cv_->Wait(); } } { MutexLock l(&mu_); num_running_--; num_finished_++; } } int NumFinished() { MutexLock l(&mu_); return num_finished_; } void Reset(int pool_size) { pool_size_.store(pool_size); num_finished_ = 0; } private: port::Mutex mu_; int num_running_; int num_finished_; std::atomic pool_size_; std::string pool_name_; port::Mutex* trigger_mu_; port::CondVar* trigger_cv_; bool* should_start_; }; const int kLowPoolSize = 2; const int kHighPoolSize = 4; const int kJobs = 8; CB low_pool_job("low", kLowPoolSize, &mutex, &cv, &should_start); CB high_pool_job("high", kHighPoolSize, &mutex, &cv, &should_start); env_->SetBackgroundThreads(kLowPoolSize); env_->SetBackgroundThreads(kHighPoolSize, Env::Priority::HIGH); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); // schedule same number of jobs in each pool for (int i = 0; i < kJobs; i++) { env_->Schedule(&CB::Run, &low_pool_job); env_->Schedule(&CB::Run, &high_pool_job, Env::Priority::HIGH); } // Wait a short while for the jobs to be dispatched. int sleep_count = 0; while ((unsigned int)(kJobs - kLowPoolSize) != env_->GetThreadPoolQueueLen(Env::Priority::LOW) || (unsigned int)(kJobs - kHighPoolSize) != env_->GetThreadPoolQueueLen(Env::Priority::HIGH)) { env_->SleepForMicroseconds(kDelayMicros); if (++sleep_count > 100) { break; } } ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen()); ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen(Env::Priority::LOW)); ASSERT_EQ((unsigned int)(kJobs - kHighPoolSize), env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); // Trigger jobs to run. { MutexLock l(&mutex); should_start = true; cv.SignalAll(); } // wait for all jobs to finish while (low_pool_job.NumFinished() < kJobs || high_pool_job.NumFinished() < kJobs) { env_->SleepForMicroseconds(kDelayMicros); } ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); // Hold jobs to schedule; should_start = false; // call IncBackgroundThreadsIfNeeded to two pools. One increasing and // the other decreasing env_->IncBackgroundThreadsIfNeeded(kLowPoolSize - 1, Env::Priority::LOW); env_->IncBackgroundThreadsIfNeeded(kHighPoolSize + 1, Env::Priority::HIGH); high_pool_job.Reset(kHighPoolSize + 1); low_pool_job.Reset(kLowPoolSize); // schedule same number of jobs in each pool for (int i = 0; i < kJobs; i++) { env_->Schedule(&CB::Run, &low_pool_job); env_->Schedule(&CB::Run, &high_pool_job, Env::Priority::HIGH); } // Wait a short while for the jobs to be dispatched. sleep_count = 0; while ((unsigned int)(kJobs - kLowPoolSize) != env_->GetThreadPoolQueueLen(Env::Priority::LOW) || (unsigned int)(kJobs - (kHighPoolSize + 1)) != env_->GetThreadPoolQueueLen(Env::Priority::HIGH)) { env_->SleepForMicroseconds(kDelayMicros); if (++sleep_count > 100) { break; } } ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen()); ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen(Env::Priority::LOW)); ASSERT_EQ((unsigned int)(kJobs - (kHighPoolSize + 1)), env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); // Trigger jobs to run. { MutexLock l(&mutex); should_start = true; cv.SignalAll(); } // wait for all jobs to finish while (low_pool_job.NumFinished() < kJobs || high_pool_job.NumFinished() < kJobs) { env_->SleepForMicroseconds(kDelayMicros); } env_->SetBackgroundThreads(kHighPoolSize, Env::Priority::HIGH); WaitThreadPoolsEmpty(); } TEST_P(EnvPosixTestWithParam, DecreaseNumBgThreads) { constexpr int kWaitMicros = 60000000; // 1min std::vector tasks(10); // Set number of thread to 1 first. env_->SetBackgroundThreads(1, Env::Priority::HIGH); // Schedule 3 tasks. 0 running; Task 1, 2 waiting. for (size_t i = 0; i < 3; i++) { env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[i], Env::Priority::HIGH); } ASSERT_FALSE(tasks[0].TimedWaitUntilSleeping(kWaitMicros)); ASSERT_EQ(2U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); ASSERT_TRUE(tasks[0].IsSleeping()); ASSERT_TRUE(!tasks[1].IsSleeping()); ASSERT_TRUE(!tasks[2].IsSleeping()); // Increase to 2 threads. Task 0, 1 running; 2 waiting env_->SetBackgroundThreads(2, Env::Priority::HIGH); ASSERT_FALSE(tasks[1].TimedWaitUntilSleeping(kWaitMicros)); ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); ASSERT_TRUE(tasks[0].IsSleeping()); ASSERT_TRUE(tasks[1].IsSleeping()); ASSERT_TRUE(!tasks[2].IsSleeping()); // Shrink back to 1 thread. Still task 0, 1 running, 2 waiting env_->SetBackgroundThreads(1, Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); ASSERT_TRUE(tasks[0].IsSleeping()); ASSERT_TRUE(tasks[1].IsSleeping()); ASSERT_TRUE(!tasks[2].IsSleeping()); // The last task finishes. Task 0 running, 2 waiting. tasks[1].WakeUp(); ASSERT_FALSE(tasks[1].TimedWaitUntilDone(kWaitMicros)); ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); ASSERT_TRUE(tasks[0].IsSleeping()); ASSERT_TRUE(!tasks[1].IsSleeping()); ASSERT_TRUE(!tasks[2].IsSleeping()); // Increase to 5 threads. Task 0 and 2 running. env_->SetBackgroundThreads(5, Env::Priority::HIGH); ASSERT_FALSE(tasks[2].TimedWaitUntilSleeping(kWaitMicros)); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); ASSERT_TRUE(tasks[0].IsSleeping()); ASSERT_TRUE(!tasks[1].IsSleeping()); ASSERT_TRUE(tasks[2].IsSleeping()); // Change number of threads a couple of times while there is no sufficient // tasks. env_->SetBackgroundThreads(7, Env::Priority::HIGH); tasks[2].WakeUp(); ASSERT_FALSE(tasks[2].TimedWaitUntilDone(kWaitMicros)); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); env_->SetBackgroundThreads(3, Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); env_->SetBackgroundThreads(4, Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); env_->SetBackgroundThreads(5, Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); env_->SetBackgroundThreads(4, Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); Env::Default()->SleepForMicroseconds(kDelayMicros * 50); // Enqueue 5 more tasks. Thread pool size now is 4. // Task 0, 3, 4, 5 running;6, 7 waiting. for (size_t i = 3; i < 8; i++) { env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[i], Env::Priority::HIGH); } for (size_t i = 3; i <= 5; i++) { ASSERT_FALSE(tasks[i].TimedWaitUntilSleeping(kWaitMicros)); } ASSERT_EQ(2U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); ASSERT_TRUE(tasks[0].IsSleeping()); ASSERT_TRUE(!tasks[1].IsSleeping()); ASSERT_TRUE(!tasks[2].IsSleeping()); ASSERT_TRUE(tasks[3].IsSleeping()); ASSERT_TRUE(tasks[4].IsSleeping()); ASSERT_TRUE(tasks[5].IsSleeping()); ASSERT_TRUE(!tasks[6].IsSleeping()); ASSERT_TRUE(!tasks[7].IsSleeping()); // Wake up task 0, 3 and 4. Task 5, 6, 7 running. tasks[0].WakeUp(); tasks[3].WakeUp(); tasks[4].WakeUp(); for (size_t i = 5; i < 8; i++) { ASSERT_FALSE(tasks[i].TimedWaitUntilSleeping(kWaitMicros)); } ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); for (size_t i = 5; i < 8; i++) { ASSERT_TRUE(tasks[i].IsSleeping()); } // Shrink back to 1 thread. Still task 5, 6, 7 running env_->SetBackgroundThreads(1, Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_TRUE(tasks[5].IsSleeping()); ASSERT_TRUE(tasks[6].IsSleeping()); ASSERT_TRUE(tasks[7].IsSleeping()); // Wake up task 6. Task 5, 7 running tasks[6].WakeUp(); ASSERT_FALSE(tasks[6].TimedWaitUntilDone(kWaitMicros)); ASSERT_TRUE(tasks[5].IsSleeping()); ASSERT_TRUE(!tasks[6].IsSleeping()); ASSERT_TRUE(tasks[7].IsSleeping()); // Wake up threads 7. Task 5 running tasks[7].WakeUp(); ASSERT_FALSE(tasks[7].TimedWaitUntilDone(kWaitMicros)); ASSERT_TRUE(!tasks[7].IsSleeping()); // Enqueue thread 8 and 9. Task 5 running; one of 8, 9 might be running. env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[8], Env::Priority::HIGH); env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &tasks[9], Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_GT(env_->GetThreadPoolQueueLen(Env::Priority::HIGH), (unsigned int)0); ASSERT_TRUE(!tasks[8].IsSleeping() || !tasks[9].IsSleeping()); // Increase to 4 threads. Task 5, 8, 9 running. env_->SetBackgroundThreads(4, Env::Priority::HIGH); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_EQ((unsigned int)0, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); ASSERT_TRUE(tasks[8].IsSleeping()); ASSERT_TRUE(tasks[9].IsSleeping()); // Shrink to 1 thread env_->SetBackgroundThreads(1, Env::Priority::HIGH); // Wake up thread 9. tasks[9].WakeUp(); ASSERT_FALSE(tasks[9].TimedWaitUntilDone(kWaitMicros)); ASSERT_TRUE(!tasks[9].IsSleeping()); ASSERT_TRUE(tasks[8].IsSleeping()); // Wake up thread 8 tasks[8].WakeUp(); ASSERT_FALSE(tasks[8].TimedWaitUntilDone(kWaitMicros)); ASSERT_TRUE(!tasks[8].IsSleeping()); // Wake up the last thread tasks[5].WakeUp(); ASSERT_FALSE(tasks[5].TimedWaitUntilDone(kWaitMicros)); WaitThreadPoolsEmpty(); } #if (defined OS_LINUX || defined OS_WIN) // Travis doesn't support fallocate or getting unique ID from files for whatever // reason. #ifndef TRAVIS namespace { bool IsSingleVarint(const std::string& s) { Slice slice(s); uint64_t v; if (!GetVarint64(&slice, &v)) { return false; } return slice.size() == 0; } bool IsUniqueIDValid(const std::string& s) { return !s.empty() && !IsSingleVarint(s); } const size_t MAX_ID_SIZE = 100; char temp_id[MAX_ID_SIZE]; } // namespace // Determine whether we can use the FS_IOC_GETVERSION ioctl // on a file in directory DIR. Create a temporary file therein, // try to apply the ioctl (save that result), cleanup and // return the result. Return true if it is supported, and // false if anything fails. // Note that this function "knows" that dir has just been created // and is empty, so we create a simply-named test file: "f". bool ioctl_support__FS_IOC_GETVERSION(const std::string& dir) { #ifdef OS_WIN return true; #else const std::string file = dir + "/f"; int fd; do { fd = open(file.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); } while (fd < 0 && errno == EINTR); long int version; bool ok = (fd >= 0 && ioctl(fd, FS_IOC_GETVERSION, &version) >= 0); close(fd); unlink(file.c_str()); return ok; #endif } // To ensure that Env::GetUniqueId-related tests work correctly, the files // should be stored in regular storage like "hard disk" or "flash device", // and not on a tmpfs file system (like /dev/shm and /tmp on some systems). // Otherwise we cannot get the correct id. // // This function serves as the replacement for test::TmpDir(), which may be // customized to be on a file system that doesn't work with GetUniqueId(). class IoctlFriendlyTmpdir { public: explicit IoctlFriendlyTmpdir() { char dir_buf[100]; const char *fmt = "%s/rocksdb.XXXXXX"; const char *tmp = getenv("TEST_IOCTL_FRIENDLY_TMPDIR"); #ifdef OS_WIN #define rmdir _rmdir if(tmp == nullptr) { tmp = getenv("TMP"); } snprintf(dir_buf, sizeof dir_buf, fmt, tmp); auto result = _mktemp(dir_buf); assert(result != nullptr); BOOL ret = CreateDirectory(dir_buf, NULL); assert(ret == TRUE); dir_ = dir_buf; #else std::list candidate_dir_list = {"/var/tmp", "/tmp"}; // If $TEST_IOCTL_FRIENDLY_TMPDIR/rocksdb.XXXXXX fits, use // $TEST_IOCTL_FRIENDLY_TMPDIR; subtract 2 for the "%s", and // add 1 for the trailing NUL byte. if (tmp && strlen(tmp) + strlen(fmt) - 2 + 1 <= sizeof dir_buf) { // use $TEST_IOCTL_FRIENDLY_TMPDIR value candidate_dir_list.push_front(tmp); } for (const std::string& d : candidate_dir_list) { snprintf(dir_buf, sizeof dir_buf, fmt, d.c_str()); if (mkdtemp(dir_buf)) { if (ioctl_support__FS_IOC_GETVERSION(dir_buf)) { dir_ = dir_buf; return; } else { // Diagnose ioctl-related failure only if this is the // directory specified via that envvar. if (tmp && tmp == d) { fprintf(stderr, "TEST_IOCTL_FRIENDLY_TMPDIR-specified directory is " "not suitable: %s\n", d.c_str()); } rmdir(dir_buf); // ignore failure } } else { // mkdtemp failed: diagnose it, but don't give up. fprintf(stderr, "mkdtemp(%s/...) failed: %s\n", d.c_str(), strerror(errno)); } } fprintf(stderr, "failed to find an ioctl-friendly temporary directory;" " specify one via the TEST_IOCTL_FRIENDLY_TMPDIR envvar\n"); std::abort(); #endif } ~IoctlFriendlyTmpdir() { rmdir(dir_.c_str()); } const std::string& name() const { return dir_; } private: std::string dir_; }; #ifndef ROCKSDB_LITE TEST_F(EnvPosixTest, PositionedAppend) { std::unique_ptr writable_file; EnvOptions options; options.use_direct_writes = true; options.use_mmap_writes = false; IoctlFriendlyTmpdir ift; ASSERT_OK(env_->NewWritableFile(ift.name() + "/f", &writable_file, options)); const size_t kBlockSize = 4096; const size_t kDataSize = kPageSize; // Write a page worth of 'a' auto data_ptr = NewAligned(kDataSize, 'a'); Slice data_a(data_ptr.get(), kDataSize); ASSERT_OK(writable_file->PositionedAppend(data_a, 0U)); // Write a page worth of 'b' right after the first sector data_ptr = NewAligned(kDataSize, 'b'); Slice data_b(data_ptr.get(), kDataSize); ASSERT_OK(writable_file->PositionedAppend(data_b, kBlockSize)); ASSERT_OK(writable_file->Close()); // The file now has 1 sector worth of a followed by a page worth of b // Verify the above std::unique_ptr seq_file; ASSERT_OK(env_->NewSequentialFile(ift.name() + "/f", &seq_file, options)); size_t scratch_len = kPageSize * 2; std::unique_ptr scratch(new char[scratch_len]); Slice result; ASSERT_OK(seq_file->Read(scratch_len, &result, scratch.get())); ASSERT_EQ(kPageSize + kBlockSize, result.size()); ASSERT_EQ('a', result[kBlockSize - 1]); ASSERT_EQ('b', result[kBlockSize]); } #endif // !ROCKSDB_LITE // `GetUniqueId()` temporarily returns zero on Windows. `BlockBasedTable` can // handle a return value of zero but this test case cannot. #ifndef OS_WIN TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) { // Create file. if (env_ == Env::Default()) { EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; IoctlFriendlyTmpdir ift; std::string fname = ift.name() + "/testfile"; std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); std::unique_ptr file; // Get Unique ID ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); ASSERT_TRUE(id_size > 0); std::string unique_id1(temp_id, id_size); ASSERT_TRUE(IsUniqueIDValid(unique_id1)); // Get Unique ID again ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); ASSERT_TRUE(id_size > 0); std::string unique_id2(temp_id, id_size); ASSERT_TRUE(IsUniqueIDValid(unique_id2)); // Get Unique ID again after waiting some time. env_->SleepForMicroseconds(1000000); ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); ASSERT_TRUE(id_size > 0); std::string unique_id3(temp_id, id_size); ASSERT_TRUE(IsUniqueIDValid(unique_id3)); // Check IDs are the same. ASSERT_EQ(unique_id1, unique_id2); ASSERT_EQ(unique_id2, unique_id3); // Delete the file env_->DeleteFile(fname); } } #endif // !defined(OS_WIN) // only works in linux platforms #ifdef ROCKSDB_FALLOCATE_PRESENT TEST_P(EnvPosixTestWithParam, AllocateTest) { if (env_ == Env::Default()) { IoctlFriendlyTmpdir ift; std::string fname = ift.name() + "/preallocate_testfile"; // Try fallocate in a file to see whether the target file system supports // it. // Skip the test if fallocate is not supported. std::string fname_test_fallocate = ift.name() + "/preallocate_testfile_2"; int fd = -1; do { fd = open(fname_test_fallocate.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); } while (fd < 0 && errno == EINTR); ASSERT_GT(fd, 0); int alloc_status = fallocate(fd, 0, 0, 1); int err_number = 0; if (alloc_status != 0) { err_number = errno; fprintf(stderr, "Warning: fallocate() fails, %s\n", strerror(err_number)); } close(fd); ASSERT_OK(env_->DeleteFile(fname_test_fallocate)); if (alloc_status != 0 && err_number == EOPNOTSUPP) { // The filesystem containing the file does not support fallocate return; } EnvOptions soptions; soptions.use_mmap_writes = false; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); // allocate 100 MB size_t kPreallocateSize = 100 * 1024 * 1024; size_t kBlockSize = 512; size_t kDataSize = 1024 * 1024; auto data_ptr = NewAligned(kDataSize, 'A'); Slice data(data_ptr.get(), kDataSize); wfile->SetPreallocationBlockSize(kPreallocateSize); wfile->PrepareWrite(wfile->GetFileSize(), kDataSize); ASSERT_OK(wfile->Append(data)); ASSERT_OK(wfile->Flush()); struct stat f_stat; ASSERT_EQ(stat(fname.c_str(), &f_stat), 0); ASSERT_EQ((unsigned int)kDataSize, f_stat.st_size); // verify that blocks are preallocated // Note here that we don't check the exact number of blocks preallocated -- // we only require that number of allocated blocks is at least what we // expect. // It looks like some FS give us more blocks that we asked for. That's fine. // It might be worth investigating further. ASSERT_LE((unsigned int)(kPreallocateSize / kBlockSize), f_stat.st_blocks); // close the file, should deallocate the blocks wfile.reset(); stat(fname.c_str(), &f_stat); ASSERT_EQ((unsigned int)kDataSize, f_stat.st_size); // verify that preallocated blocks were deallocated on file close // Because the FS might give us more blocks, we add a full page to the size // and expect the number of blocks to be less or equal to that. ASSERT_GE((f_stat.st_size + kPageSize + kBlockSize - 1) / kBlockSize, (unsigned int)f_stat.st_blocks); } } #endif // ROCKSDB_FALLOCATE_PRESENT // Returns true if any of the strings in ss are the prefix of another string. bool HasPrefix(const std::unordered_set& ss) { for (const std::string& s: ss) { if (s.empty()) { return true; } for (size_t i = 1; i < s.size(); ++i) { if (ss.count(s.substr(0, i)) != 0) { return true; } } } return false; } // `GetUniqueId()` temporarily returns zero on Windows. `BlockBasedTable` can // handle a return value of zero but this test case cannot. #ifndef OS_WIN TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) { if (env_ == Env::Default()) { // Check whether a bunch of concurrently existing files have unique IDs. EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; // Create the files IoctlFriendlyTmpdir ift; std::vector fnames; for (int i = 0; i < 1000; ++i) { fnames.push_back(ift.name() + "/" + "testfile" + ToString(i)); // Create file. std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile, soptions)); } // Collect and check whether the IDs are unique. std::unordered_set ids; for (const std::string fname : fnames) { std::unique_ptr file; std::string unique_id; ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); ASSERT_TRUE(id_size > 0); unique_id = std::string(temp_id, id_size); ASSERT_TRUE(IsUniqueIDValid(unique_id)); ASSERT_TRUE(ids.count(unique_id) == 0); ids.insert(unique_id); } // Delete the files for (const std::string fname : fnames) { ASSERT_OK(env_->DeleteFile(fname)); } ASSERT_TRUE(!HasPrefix(ids)); } } TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDDeletes) { if (env_ == Env::Default()) { EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; IoctlFriendlyTmpdir ift; std::string fname = ift.name() + "/" + "testfile"; // Check that after file is deleted we don't get same ID again in a new // file. std::unordered_set ids; for (int i = 0; i < 1000; ++i) { // Create file. { std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); } // Get Unique ID std::string unique_id; { std::unique_ptr file; ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); ASSERT_TRUE(id_size > 0); unique_id = std::string(temp_id, id_size); } ASSERT_TRUE(IsUniqueIDValid(unique_id)); ASSERT_TRUE(ids.count(unique_id) == 0); ids.insert(unique_id); // Delete the file ASSERT_OK(env_->DeleteFile(fname)); } ASSERT_TRUE(!HasPrefix(ids)); } } #endif // !defined(OS_WIN) TEST_P(EnvPosixTestWithParam, MultiRead) { EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; std::string fname = test::PerThreadDBPath(env_, "testfile"); const size_t kSectorSize = 4096; const size_t kNumSectors = 8; // Create file. { std::unique_ptr wfile; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ !defined(OS_AIX) if (soptions.use_direct_writes) { soptions.use_direct_writes = false; } #endif ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); for (size_t i = 0; i < kNumSectors; ++i) { auto data = NewAligned(kSectorSize * 8, static_cast(i + 1)); Slice slice(data.get(), kSectorSize); ASSERT_OK(wfile->Append(slice)); } ASSERT_OK(wfile->Close()); } // More attempts to simulate more partial result sequences. for (uint32_t attempt = 0; attempt < 20; attempt++) { // Random Read Random rnd(301 + attempt); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "PosixRandomAccessFile::MultiRead:io_uring_result", [&](void* arg) { if (attempt > 0) { // No failure in the first attempt. size_t& bytes_read = *static_cast(arg); if (rnd.OneIn(4)) { bytes_read = 0; } else if (rnd.OneIn(3)) { bytes_read = static_cast( rnd.Uniform(static_cast(bytes_read))); } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); std::unique_ptr file; std::vector reqs(3); std::vector> data; uint64_t offset = 0; for (size_t i = 0; i < reqs.size(); ++i) { reqs[i].offset = offset; offset += 2 * kSectorSize; reqs[i].len = kSectorSize; data.emplace_back(NewAligned(kSectorSize, 0)); reqs[i].scratch = data.back().get(); } #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \ !defined(OS_AIX) if (soptions.use_direct_reads) { soptions.use_direct_reads = false; } #endif ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); ASSERT_OK(file->MultiRead(reqs.data(), reqs.size())); for (size_t i = 0; i < reqs.size(); ++i) { auto buf = NewAligned(kSectorSize * 8, static_cast(i * 2 + 1)); ASSERT_OK(reqs[i].status); ASSERT_EQ(memcmp(reqs[i].scratch, buf.get(), kSectorSize), 0); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(EnvPosixTest, MultiReadNonAlignedLargeNum) { // In this test we don't do aligned read, wo it doesn't work for // direct I/O case. EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = false; std::string fname = test::PerThreadDBPath(env_, "testfile"); const size_t kTotalSize = 81920; std::string expected_data; Random rnd(301); test::RandomString(&rnd, kTotalSize, &expected_data); // Create file. { std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); ASSERT_OK(wfile->Append(expected_data)); ASSERT_OK(wfile->Close()); } // More attempts to simulate more partial result sequences. for (uint32_t attempt = 0; attempt < 25; attempt++) { // Right now kIoUringDepth is hard coded as 256, so we need very large // number of keys to cover the case of multiple rounds of submissions. // Right now the test latency is still acceptable. If it ends up with // too long, we can modify the io uring depth with SyncPoint here. const int num_reads = rnd.Uniform(512) + 1; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "PosixRandomAccessFile::MultiRead:io_uring_result", [&](void* arg) { if (attempt > 5) { // Improve partial result rates in second half of the run to // cover the case of repeated partial results. int odd = (attempt < 15) ? num_reads / 2 : 4; // No failure in first several attempts. size_t& bytes_read = *static_cast(arg); if (rnd.OneIn(odd)) { bytes_read = 0; } else if (rnd.OneIn(odd / 2)) { bytes_read = static_cast( rnd.Uniform(static_cast(bytes_read))); } } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); // Generate (offset, len) pairs std::set start_offsets; for (int i = 0; i < num_reads; i++) { int rnd_off; // No repeat offsets. while (start_offsets.find(rnd_off = rnd.Uniform(81920)) != start_offsets.end()) {} start_offsets.insert(rnd_off); } std::vector offsets; std::vector lens; // std::set already sorted the offsets. for (int so: start_offsets) { offsets.push_back(so); } for (size_t i = 0; i + 1 < offsets.size(); i++) { lens.push_back(static_cast(rnd.Uniform(static_cast(offsets[i + 1] - offsets[i])) + 1)); } lens.push_back(static_cast(rnd.Uniform(static_cast(kTotalSize - offsets.back())) + 1)); ASSERT_EQ(num_reads, lens.size()); // Create requests std::vector scratches; scratches.reserve(num_reads); std::vector reqs(num_reads); for (size_t i = 0; i < reqs.size(); ++i) { reqs[i].offset = offsets[i]; reqs[i].len = lens[i]; scratches.emplace_back(reqs[i].len, ' '); reqs[i].scratch = const_cast(scratches.back().data()); } // Query the data std::unique_ptr file; ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); ASSERT_OK(file->MultiRead(reqs.data(), reqs.size())); // Validate results for (int i = 0; i < num_reads; ++i) { ASSERT_OK(reqs[i].status); ASSERT_EQ(Slice(expected_data.data() + offsets[i], lens[i]).ToString(true), reqs[i].result.ToString(true)); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } // Only works in linux platforms #ifdef OS_WIN TEST_P(EnvPosixTestWithParam, DISABLED_InvalidateCache) { #else TEST_P(EnvPosixTestWithParam, InvalidateCache) { #endif ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; std::string fname = test::PerThreadDBPath(env_, "testfile"); const size_t kSectorSize = 512; auto data = NewAligned(kSectorSize, 0); Slice slice(data.get(), kSectorSize); // Create file. { std::unique_ptr wfile; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) if (soptions.use_direct_writes) { soptions.use_direct_writes = false; } #endif ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); ASSERT_OK(wfile->Append(slice)); ASSERT_OK(wfile->InvalidateCache(0, 0)); ASSERT_OK(wfile->Close()); } // Random Read { std::unique_ptr file; auto scratch = NewAligned(kSectorSize, 0); Slice result; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) if (soptions.use_direct_reads) { soptions.use_direct_reads = false; } #endif ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); ASSERT_OK(file->Read(0, kSectorSize, &result, scratch.get())); ASSERT_EQ(memcmp(scratch.get(), data.get(), kSectorSize), 0); ASSERT_OK(file->InvalidateCache(0, 11)); ASSERT_OK(file->InvalidateCache(0, 0)); } // Sequential Read { std::unique_ptr file; auto scratch = NewAligned(kSectorSize, 0); Slice result; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) if (soptions.use_direct_reads) { soptions.use_direct_reads = false; } #endif ASSERT_OK(env_->NewSequentialFile(fname, &file, soptions)); if (file->use_direct_io()) { ASSERT_OK(file->PositionedRead(0, kSectorSize, &result, scratch.get())); } else { ASSERT_OK(file->Read(kSectorSize, &result, scratch.get())); } ASSERT_EQ(memcmp(scratch.get(), data.get(), kSectorSize), 0); ASSERT_OK(file->InvalidateCache(0, 11)); ASSERT_OK(file->InvalidateCache(0, 0)); } // Delete the file ASSERT_OK(env_->DeleteFile(fname)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); } #endif // not TRAVIS #endif // OS_LINUX || OS_WIN class TestLogger : public Logger { public: using Logger::Logv; void Logv(const char* format, va_list ap) override { log_count++; char new_format[550]; std::fill_n(new_format, sizeof(new_format), '2'); { va_list backup_ap; va_copy(backup_ap, ap); int n = vsnprintf(new_format, sizeof(new_format) - 1, format, backup_ap); // 48 bytes for extra information + bytes allocated // When we have n == -1 there is not a terminating zero expected #ifdef OS_WIN if (n < 0) { char_0_count++; } #endif if (new_format[0] == '[') { // "[DEBUG] " ASSERT_TRUE(n <= 56 + (512 - static_cast(sizeof(struct timeval)))); } else { ASSERT_TRUE(n <= 48 + (512 - static_cast(sizeof(struct timeval)))); } va_end(backup_ap); } for (size_t i = 0; i < sizeof(new_format); i++) { if (new_format[i] == 'x') { char_x_count++; } else if (new_format[i] == '\0') { char_0_count++; } } } int log_count; int char_x_count; int char_0_count; }; TEST_P(EnvPosixTestWithParam, LogBufferTest) { TestLogger test_logger; test_logger.SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); test_logger.log_count = 0; test_logger.char_x_count = 0; test_logger.char_0_count = 0; LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, &test_logger); LogBuffer log_buffer_debug(DEBUG_LEVEL, &test_logger); char bytes200[200]; std::fill_n(bytes200, sizeof(bytes200), '1'); bytes200[sizeof(bytes200) - 1] = '\0'; char bytes600[600]; std::fill_n(bytes600, sizeof(bytes600), '1'); bytes600[sizeof(bytes600) - 1] = '\0'; char bytes9000[9000]; std::fill_n(bytes9000, sizeof(bytes9000), '1'); bytes9000[sizeof(bytes9000) - 1] = '\0'; ROCKS_LOG_BUFFER(&log_buffer, "x%sx", bytes200); ROCKS_LOG_BUFFER(&log_buffer, "x%sx", bytes600); ROCKS_LOG_BUFFER(&log_buffer, "x%sx%sx%sx", bytes200, bytes200, bytes200); ROCKS_LOG_BUFFER(&log_buffer, "x%sx%sx", bytes200, bytes600); ROCKS_LOG_BUFFER(&log_buffer, "x%sx%sx", bytes600, bytes9000); ROCKS_LOG_BUFFER(&log_buffer_debug, "x%sx", bytes200); test_logger.SetInfoLogLevel(DEBUG_LEVEL); ROCKS_LOG_BUFFER(&log_buffer_debug, "x%sx%sx%sx", bytes600, bytes9000, bytes200); ASSERT_EQ(0, test_logger.log_count); log_buffer.FlushBufferToLog(); log_buffer_debug.FlushBufferToLog(); ASSERT_EQ(6, test_logger.log_count); ASSERT_EQ(6, test_logger.char_0_count); ASSERT_EQ(10, test_logger.char_x_count); } class TestLogger2 : public Logger { public: explicit TestLogger2(size_t max_log_size) : max_log_size_(max_log_size) {} using Logger::Logv; void Logv(const char* format, va_list ap) override { char new_format[2000]; std::fill_n(new_format, sizeof(new_format), '2'); { va_list backup_ap; va_copy(backup_ap, ap); int n = vsnprintf(new_format, sizeof(new_format) - 1, format, backup_ap); // 48 bytes for extra information + bytes allocated ASSERT_TRUE( n <= 48 + static_cast(max_log_size_ - sizeof(struct timeval))); ASSERT_TRUE(n > static_cast(max_log_size_ - sizeof(struct timeval))); va_end(backup_ap); } } size_t max_log_size_; }; TEST_P(EnvPosixTestWithParam, LogBufferMaxSizeTest) { char bytes9000[9000]; std::fill_n(bytes9000, sizeof(bytes9000), '1'); bytes9000[sizeof(bytes9000) - 1] = '\0'; for (size_t max_log_size = 256; max_log_size <= 1024; max_log_size += 1024 - 256) { TestLogger2 test_logger(max_log_size); test_logger.SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, &test_logger); ROCKS_LOG_BUFFER_MAX_SZ(&log_buffer, max_log_size, "%s", bytes9000); log_buffer.FlushBufferToLog(); } } TEST_P(EnvPosixTestWithParam, Preallocation) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); const std::string src = test::PerThreadDBPath(env_, "testfile"); std::unique_ptr srcfile; EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD) if (soptions.use_direct_writes) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "NewWritableFile:O_DIRECT", [&](void* arg) { int* val = static_cast(arg); *val &= ~O_DIRECT; }); } #endif ASSERT_OK(env_->NewWritableFile(src, &srcfile, soptions)); srcfile->SetPreallocationBlockSize(1024 * 1024); // No writes should mean no preallocation size_t block_size, last_allocated_block; srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); ASSERT_EQ(last_allocated_block, 0UL); // Small write should preallocate one block size_t kStrSize = 4096; auto data = NewAligned(kStrSize, 'A'); Slice str(data.get(), kStrSize); srcfile->PrepareWrite(srcfile->GetFileSize(), kStrSize); srcfile->Append(str); srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); ASSERT_EQ(last_allocated_block, 1UL); // Write an entire preallocation block, make sure we increased by two. { auto buf_ptr = NewAligned(block_size, ' '); Slice buf(buf_ptr.get(), block_size); srcfile->PrepareWrite(srcfile->GetFileSize(), block_size); srcfile->Append(buf); srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); ASSERT_EQ(last_allocated_block, 2UL); } // Write five more blocks at once, ensure we're where we need to be. { auto buf_ptr = NewAligned(block_size * 5, ' '); Slice buf = Slice(buf_ptr.get(), block_size * 5); srcfile->PrepareWrite(srcfile->GetFileSize(), buf.size()); srcfile->Append(buf); srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); ASSERT_EQ(last_allocated_block, 7UL); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); } // Test that the two ways to get children file attributes (in bulk or // individually) behave consistently. TEST_P(EnvPosixTestWithParam, ConsistentChildrenAttributes) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; const int kNumChildren = 10; std::string data; for (int i = 0; i < kNumChildren; ++i) { const std::string path = test::TmpDir(env_) + "/" + "testfile_" + std::to_string(i); std::unique_ptr file; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD) if (soptions.use_direct_writes) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "NewWritableFile:O_DIRECT", [&](void* arg) { int* val = static_cast(arg); *val &= ~O_DIRECT; }); } #endif ASSERT_OK(env_->NewWritableFile(path, &file, soptions)); auto buf_ptr = NewAligned(data.size(), 'T'); Slice buf(buf_ptr.get(), data.size()); file->Append(buf); data.append(std::string(4096, 'T')); } std::vector file_attrs; ASSERT_OK(env_->GetChildrenFileAttributes(test::TmpDir(env_), &file_attrs)); for (int i = 0; i < kNumChildren; ++i) { const std::string name = "testfile_" + std::to_string(i); const std::string path = test::TmpDir(env_) + "/" + name; auto file_attrs_iter = std::find_if( file_attrs.begin(), file_attrs.end(), [&name](const Env::FileAttributes& fm) { return fm.name == name; }); ASSERT_TRUE(file_attrs_iter != file_attrs.end()); uint64_t size; ASSERT_OK(env_->GetFileSize(path, &size)); ASSERT_EQ(size, 4096 * i); ASSERT_EQ(size, file_attrs_iter->size_bytes); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); } // Test that all WritableFileWrapper forwards all calls to WritableFile. TEST_P(EnvPosixTestWithParam, WritableFileWrapper) { class Base : public WritableFile { public: mutable int *step_; void inc(int x) const { EXPECT_EQ(x, (*step_)++); } explicit Base(int* step) : step_(step) { inc(0); } Status Append(const Slice& /*data*/) override { inc(1); return Status::OK(); } Status PositionedAppend(const Slice& /*data*/, uint64_t /*offset*/) override { inc(2); return Status::OK(); } Status Truncate(uint64_t /*size*/) override { inc(3); return Status::OK(); } Status Close() override { inc(4); return Status::OK(); } Status Flush() override { inc(5); return Status::OK(); } Status Sync() override { inc(6); return Status::OK(); } Status Fsync() override { inc(7); return Status::OK(); } bool IsSyncThreadSafe() const override { inc(8); return true; } bool use_direct_io() const override { inc(9); return true; } size_t GetRequiredBufferAlignment() const override { inc(10); return 0; } void SetIOPriority(Env::IOPriority /*pri*/) override { inc(11); } Env::IOPriority GetIOPriority() override { inc(12); return Env::IOPriority::IO_LOW; } void SetWriteLifeTimeHint(Env::WriteLifeTimeHint /*hint*/) override { inc(13); } Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { inc(14); return Env::WriteLifeTimeHint::WLTH_NOT_SET; } uint64_t GetFileSize() override { inc(15); return 0; } void SetPreallocationBlockSize(size_t /*size*/) override { inc(16); } void GetPreallocationStatus(size_t* /*block_size*/, size_t* /*last_allocated_block*/) override { inc(17); } size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override { inc(18); return 0; } Status InvalidateCache(size_t /*offset*/, size_t /*length*/) override { inc(19); return Status::OK(); } Status RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/) override { inc(20); return Status::OK(); } void PrepareWrite(size_t /*offset*/, size_t /*len*/) override { inc(21); } Status Allocate(uint64_t /*offset*/, uint64_t /*len*/) override { inc(22); return Status::OK(); } public: ~Base() override { inc(23); } }; class Wrapper : public WritableFileWrapper { public: explicit Wrapper(WritableFile* target) : WritableFileWrapper(target) {} }; int step = 0; { Base b(&step); Wrapper w(&b); w.Append(Slice()); w.PositionedAppend(Slice(), 0); w.Truncate(0); w.Close(); w.Flush(); w.Sync(); w.Fsync(); w.IsSyncThreadSafe(); w.use_direct_io(); w.GetRequiredBufferAlignment(); w.SetIOPriority(Env::IOPriority::IO_HIGH); w.GetIOPriority(); w.SetWriteLifeTimeHint(Env::WriteLifeTimeHint::WLTH_NOT_SET); w.GetWriteLifeTimeHint(); w.GetFileSize(); w.SetPreallocationBlockSize(0); w.GetPreallocationStatus(nullptr, nullptr); w.GetUniqueId(nullptr, 0); w.InvalidateCache(0, 0); w.RangeSync(0, 0); w.PrepareWrite(0, 0); w.Allocate(0, 0); } EXPECT_EQ(24, step); } TEST_P(EnvPosixTestWithParam, PosixRandomRWFile) { const std::string path = test::PerThreadDBPath(env_, "random_rw_file"); env_->DeleteFile(path); std::unique_ptr file; // Cannot open non-existing file. ASSERT_NOK(env_->NewRandomRWFile(path, &file, EnvOptions())); // Create the file using WriteableFile { std::unique_ptr wf; ASSERT_OK(env_->NewWritableFile(path, &wf, EnvOptions())); } ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); char buf[10000]; Slice read_res; ASSERT_OK(file->Write(0, "ABCD")); ASSERT_OK(file->Read(0, 10, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ABCD"); ASSERT_OK(file->Write(2, "XXXX")); ASSERT_OK(file->Read(0, 10, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ABXXXX"); ASSERT_OK(file->Write(10, "ZZZ")); ASSERT_OK(file->Read(10, 10, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ZZZ"); ASSERT_OK(file->Write(11, "Y")); ASSERT_OK(file->Read(10, 10, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ZYZ"); ASSERT_OK(file->Write(200, "FFFFF")); ASSERT_OK(file->Read(200, 10, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "FFFFF"); ASSERT_OK(file->Write(205, "XXXX")); ASSERT_OK(file->Read(200, 10, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "FFFFFXXXX"); ASSERT_OK(file->Write(5, "QQQQ")); ASSERT_OK(file->Read(0, 9, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ABXXXQQQQ"); ASSERT_OK(file->Read(2, 4, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "XXXQ"); // Close file and reopen it file->Close(); ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); ASSERT_OK(file->Read(0, 9, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ABXXXQQQQ"); ASSERT_OK(file->Read(10, 3, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ZYZ"); ASSERT_OK(file->Read(200, 9, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "FFFFFXXXX"); ASSERT_OK(file->Write(4, "TTTTTTTTTTTTTTTT")); ASSERT_OK(file->Read(0, 10, &read_res, buf)); ASSERT_EQ(read_res.ToString(), "ABXXTTTTTT"); // Clean up env_->DeleteFile(path); } class RandomRWFileWithMirrorString { public: explicit RandomRWFileWithMirrorString(RandomRWFile* _file) : file_(_file) {} void Write(size_t offset, const std::string& data) { // Write to mirror string StringWrite(offset, data); // Write to file Status s = file_->Write(offset, data); ASSERT_OK(s) << s.ToString(); } void Read(size_t offset = 0, size_t n = 1000000) { Slice str_res(nullptr, 0); if (offset < file_mirror_.size()) { size_t str_res_sz = std::min(file_mirror_.size() - offset, n); str_res = Slice(file_mirror_.data() + offset, str_res_sz); StopSliceAtNull(&str_res); } Slice file_res; Status s = file_->Read(offset, n, &file_res, buf_); ASSERT_OK(s) << s.ToString(); StopSliceAtNull(&file_res); ASSERT_EQ(str_res.ToString(), file_res.ToString()) << offset << " " << n; } void SetFile(RandomRWFile* _file) { file_ = _file; } private: void StringWrite(size_t offset, const std::string& src) { if (offset + src.size() > file_mirror_.size()) { file_mirror_.resize(offset + src.size(), '\0'); } char* pos = const_cast(file_mirror_.data() + offset); memcpy(pos, src.data(), src.size()); } void StopSliceAtNull(Slice* slc) { for (size_t i = 0; i < slc->size(); i++) { if ((*slc)[i] == '\0') { *slc = Slice(slc->data(), i); break; } } } char buf_[10000]; RandomRWFile* file_; std::string file_mirror_; }; TEST_P(EnvPosixTestWithParam, PosixRandomRWFileRandomized) { const std::string path = test::PerThreadDBPath(env_, "random_rw_file_rand"); env_->DeleteFile(path); std::unique_ptr file; #ifdef OS_LINUX // Cannot open non-existing file. ASSERT_NOK(env_->NewRandomRWFile(path, &file, EnvOptions())); #endif // Create the file using WriteableFile { std::unique_ptr wf; ASSERT_OK(env_->NewWritableFile(path, &wf, EnvOptions())); } ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); RandomRWFileWithMirrorString file_with_mirror(file.get()); Random rnd(301); std::string buf; for (int i = 0; i < 10000; i++) { // Genrate random data test::RandomString(&rnd, 10, &buf); // Pick random offset for write size_t write_off = rnd.Next() % 1000; file_with_mirror.Write(write_off, buf); // Pick random offset for read size_t read_off = rnd.Next() % 1000; size_t read_sz = rnd.Next() % 20; file_with_mirror.Read(read_off, read_sz); if (i % 500 == 0) { // Reopen the file every 500 iters ASSERT_OK(env_->NewRandomRWFile(path, &file, EnvOptions())); file_with_mirror.SetFile(file.get()); } } // clean up env_->DeleteFile(path); } class TestEnv : public EnvWrapper { public: explicit TestEnv() : EnvWrapper(Env::Default()), close_count(0) { } class TestLogger : public Logger { public: using Logger::Logv; TestLogger(TestEnv* env_ptr) : Logger() { env = env_ptr; } ~TestLogger() override { if (!closed_) { CloseHelper(); } } void Logv(const char* /*format*/, va_list /*ap*/) override{}; protected: Status CloseImpl() override { return CloseHelper(); } private: Status CloseHelper() { env->CloseCountInc();; return Status::OK(); } TestEnv* env; }; void CloseCountInc() { close_count++; } int GetCloseCount() { return close_count; } Status NewLogger(const std::string& /*fname*/, std::shared_ptr* result) override { result->reset(new TestLogger(this)); return Status::OK(); } private: int close_count; }; class EnvTest : public testing::Test { public: EnvTest() : test_directory_(test::PerThreadDBPath("env_test")) {} protected: const std::string test_directory_; }; TEST_F(EnvTest, Close) { TestEnv* env = new TestEnv(); std::shared_ptr logger; Status s; s = env->NewLogger("", &logger); ASSERT_EQ(s, Status::OK()); logger.get()->Close(); ASSERT_EQ(env->GetCloseCount(), 1); // Call Close() again. CloseHelper() should not be called again logger.get()->Close(); ASSERT_EQ(env->GetCloseCount(), 1); logger.reset(); ASSERT_EQ(env->GetCloseCount(), 1); s = env->NewLogger("", &logger); ASSERT_EQ(s, Status::OK()); logger.reset(); ASSERT_EQ(env->GetCloseCount(), 2); delete env; } INSTANTIATE_TEST_CASE_P(DefaultEnvWithoutDirectIO, EnvPosixTestWithParam, ::testing::Values(std::pair(Env::Default(), false))); #if !defined(ROCKSDB_LITE) INSTANTIATE_TEST_CASE_P(DefaultEnvWithDirectIO, EnvPosixTestWithParam, ::testing::Values(std::pair(Env::Default(), true))); #endif // !defined(ROCKSDB_LITE) #if !defined(ROCKSDB_LITE) && !defined(OS_WIN) static std::unique_ptr chroot_env( NewChrootEnv(Env::Default(), test::TmpDir(Env::Default()))); INSTANTIATE_TEST_CASE_P( ChrootEnvWithoutDirectIO, EnvPosixTestWithParam, ::testing::Values(std::pair(chroot_env.get(), false))); INSTANTIATE_TEST_CASE_P( ChrootEnvWithDirectIO, EnvPosixTestWithParam, ::testing::Values(std::pair(chroot_env.get(), true))); #endif // !defined(ROCKSDB_LITE) && !defined(OS_WIN) class EnvFSTestWithParam : public ::testing::Test, public ::testing::WithParamInterface> { public: EnvFSTestWithParam() { bool env_non_null = std::get<0>(GetParam()); bool env_default = std::get<1>(GetParam()); bool fs_default = std::get<2>(GetParam()); env_ = env_non_null ? (env_default ? Env::Default() : nullptr) : nullptr; fs_ = fs_default ? FileSystem::Default() : std::make_shared(FileSystem::Default()); if (env_non_null && env_default && !fs_default) { env_ptr_ = NewCompositeEnv(fs_); } if (env_non_null && !env_default && fs_default) { env_ptr_ = std::unique_ptr(new FaultInjectionTestEnv(Env::Default())); fs_.reset(); } if (env_non_null && !env_default && !fs_default) { env_ptr_.reset(new FaultInjectionTestEnv(Env::Default())); composite_env_ptr_.reset(new CompositeEnvWrapper(env_ptr_.get(), fs_)); env_ = composite_env_ptr_.get(); } else { env_ = env_ptr_.get(); } dbname1_ = test::PerThreadDBPath("env_fs_test1"); dbname2_ = test::PerThreadDBPath("env_fs_test2"); } ~EnvFSTestWithParam() = default; Env* env_; std::unique_ptr env_ptr_; std::unique_ptr composite_env_ptr_; std::shared_ptr fs_; std::string dbname1_; std::string dbname2_; }; TEST_P(EnvFSTestWithParam, OptionsTest) { Options opts; opts.env = env_; opts.create_if_missing = true; std::string dbname = dbname1_; if (env_) { if (fs_) { ASSERT_EQ(fs_.get(), env_->GetFileSystem().get()); } else { ASSERT_NE(FileSystem::Default().get(), env_->GetFileSystem().get()); } } for (int i = 0; i < 2; ++i) { DB* db; Status s = DB::Open(opts, dbname, &db); ASSERT_OK(s); WriteOptions wo; db->Put(wo, "a", "a"); db->Flush(FlushOptions()); db->Put(wo, "b", "b"); db->Flush(FlushOptions()); db->CompactRange(CompactRangeOptions(), nullptr, nullptr); std::string val; ASSERT_OK(db->Get(ReadOptions(), "a", &val)); ASSERT_EQ("a", val); ASSERT_OK(db->Get(ReadOptions(), "b", &val)); ASSERT_EQ("b", val); db->Close(); delete db; DestroyDB(dbname, opts); dbname = dbname2_; } } // The parameters are as follows - // 1. True means Options::env is non-null, false means null // 2. True means use Env::Default, false means custom // 3. True means use FileSystem::Default, false means custom INSTANTIATE_TEST_CASE_P( EnvFSTest, EnvFSTestWithParam, ::testing::Combine(::testing::Bool(), ::testing::Bool(), ::testing::Bool())); // This test ensures that default Env and those allocated by // NewCompositeEnv() all share the same threadpool TEST_F(EnvTest, MultipleCompositeEnv) { std::shared_ptr fs1 = std::make_shared(FileSystem::Default()); std::shared_ptr fs2 = std::make_shared(FileSystem::Default()); std::unique_ptr env1 = NewCompositeEnv(fs1); std::unique_ptr env2 = NewCompositeEnv(fs2); Env::Default()->SetBackgroundThreads(8, Env::HIGH); Env::Default()->SetBackgroundThreads(16, Env::LOW); ASSERT_EQ(env1->GetBackgroundThreads(Env::LOW), 16); ASSERT_EQ(env1->GetBackgroundThreads(Env::HIGH), 8); ASSERT_EQ(env2->GetBackgroundThreads(Env::LOW), 16); ASSERT_EQ(env2->GetBackgroundThreads(Env::HIGH), 8); } TEST_F(EnvTest, IsDirectory) { Status s = Env::Default()->CreateDirIfMissing(test_directory_); ASSERT_OK(s); const std::string test_sub_dir = test_directory_ + "sub1"; const std::string test_file_path = test_directory_ + "file1"; ASSERT_OK(Env::Default()->CreateDirIfMissing(test_sub_dir)); bool is_dir = false; ASSERT_OK(Env::Default()->IsDirectory(test_sub_dir, &is_dir)); ASSERT_TRUE(is_dir); { std::unique_ptr wfile; s = Env::Default()->GetFileSystem()->NewWritableFile( test_file_path, FileOptions(), &wfile, /*dbg=*/nullptr); ASSERT_OK(s); std::unique_ptr fwriter; fwriter.reset(new WritableFileWriter(std::move(wfile), test_file_path, FileOptions(), Env::Default())); constexpr char buf[] = "test"; s = fwriter->Append(buf); ASSERT_OK(s); } ASSERT_OK(Env::Default()->IsDirectory(test_file_path, &is_dir)); ASSERT_FALSE(is_dir); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/env/file_system.cc000066400000000000000000000106771370372246700167020ustar00rootroot00000000000000// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "env/composite_env_wrapper.h" #include "rocksdb/file_system.h" #include "options/db_options.h" #include "rocksdb/utilities/object_registry.h" namespace ROCKSDB_NAMESPACE { FileSystem::FileSystem() {} FileSystem::~FileSystem() {} Status FileSystem::Load(const std::string& value, std::shared_ptr* result) { Status s; #ifndef ROCKSDB_LITE s = ObjectRegistry::NewInstance()->NewSharedObject(value, result); #else (void)result; s = Status::NotSupported("Cannot load FileSystem in LITE mode: ", value); #endif return s; } IOStatus FileSystem::ReuseWritableFile(const std::string& fname, const std::string& old_fname, const FileOptions& opts, std::unique_ptr* result, IODebugContext* dbg) { IOStatus s = RenameFile(old_fname, fname, opts.io_options, dbg); if (!s.ok()) { return s; } return NewWritableFile(fname, opts, result, dbg); } FileOptions FileSystem::OptimizeForLogRead( const FileOptions& file_options) const { FileOptions optimized_file_options(file_options); optimized_file_options.use_direct_reads = false; return optimized_file_options; } FileOptions FileSystem::OptimizeForManifestRead( const FileOptions& file_options) const { FileOptions optimized_file_options(file_options); optimized_file_options.use_direct_reads = false; return optimized_file_options; } FileOptions FileSystem::OptimizeForLogWrite(const FileOptions& file_options, const DBOptions& db_options) const { FileOptions optimized_file_options(file_options); optimized_file_options.bytes_per_sync = db_options.wal_bytes_per_sync; optimized_file_options.writable_file_max_buffer_size = db_options.writable_file_max_buffer_size; return optimized_file_options; } FileOptions FileSystem::OptimizeForManifestWrite( const FileOptions& file_options) const { return file_options; } FileOptions FileSystem::OptimizeForCompactionTableWrite( const FileOptions& file_options, const ImmutableDBOptions& db_options) const { FileOptions optimized_file_options(file_options); optimized_file_options.use_direct_writes = db_options.use_direct_io_for_flush_and_compaction; return optimized_file_options; } FileOptions FileSystem::OptimizeForCompactionTableRead( const FileOptions& file_options, const ImmutableDBOptions& db_options) const { FileOptions optimized_file_options(file_options); optimized_file_options.use_direct_reads = db_options.use_direct_reads; return optimized_file_options; } IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, const std::string& fname, bool should_sync) { std::unique_ptr file; EnvOptions soptions; IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr); if (!s.ok()) { return s; } s = file->Append(data, IOOptions(), nullptr); if (s.ok() && should_sync) { s = file->Sync(IOOptions(), nullptr); } if (!s.ok()) { fs->DeleteFile(fname, IOOptions(), nullptr); } return s; } IOStatus ReadFileToString(FileSystem* fs, const std::string& fname, std::string* data) { FileOptions soptions; data->clear(); std::unique_ptr file; IOStatus s = status_to_io_status( fs->NewSequentialFile(fname, soptions, &file, nullptr)); if (!s.ok()) { return s; } static const int kBufferSize = 8192; char* space = new char[kBufferSize]; while (true) { Slice fragment; s = file->Read(kBufferSize, IOOptions(), &fragment, space, nullptr); if (!s.ok()) { break; } data->append(fragment.data(), fragment.size()); if (fragment.empty()) { break; } } delete[] space; return s; } #ifdef OS_WIN std::shared_ptr FileSystem::Default() { static LegacyFileSystemWrapper default_fs(Env::Default()); static std::shared_ptr default_fs_ptr( &default_fs, [](LegacyFileSystemWrapper*) {}); return default_fs_ptr; } #endif } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/fs_posix.cc000066400000000000000000001023521370372246700162010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors #include #ifndef ROCKSDB_NO_DYNAMIC_EXTENSION #include #endif #include #include #if defined(OS_LINUX) #include #endif #include #include #include #include #include #include #include #include #if defined(OS_LINUX) || defined(OS_SOLARIS) || defined(OS_ANDROID) #include #include #include #endif #include #include #include #include #include // Get nano time includes #if defined(OS_LINUX) || defined(OS_FREEBSD) #elif defined(__MACH__) #include #include #include #else #include #endif #include #include #include #include "env/composite_env_wrapper.h" #include "env/io_posix.h" #include "logging/logging.h" #include "logging/posix_logger.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/thread_status_updater.h" #include "port/port.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "test_util/sync_point.h" #include "util/coding.h" #include "util/compression_context_cache.h" #include "util/random.h" #include "util/string_util.h" #include "util/thread_local.h" #include "util/threadpool_imp.h" #if !defined(TMPFS_MAGIC) #define TMPFS_MAGIC 0x01021994 #endif #if !defined(XFS_SUPER_MAGIC) #define XFS_SUPER_MAGIC 0x58465342 #endif #if !defined(EXT4_SUPER_MAGIC) #define EXT4_SUPER_MAGIC 0xEF53 #endif namespace ROCKSDB_NAMESPACE { namespace { inline mode_t GetDBFileMode(bool allow_non_owner_access) { return allow_non_owner_access ? 0644 : 0600; } static uint64_t gettid() { return Env::Default()->GetThreadID(); } // list of pathnames that are locked // Only used for error message. struct LockHoldingInfo { int64_t acquire_time; uint64_t acquiring_thread; }; static std::map locked_files; static port::Mutex mutex_locked_files; static int LockOrUnlock(int fd, bool lock) { errno = 0; struct flock f; memset(&f, 0, sizeof(f)); f.l_type = (lock ? F_WRLCK : F_UNLCK); f.l_whence = SEEK_SET; f.l_start = 0; f.l_len = 0; // Lock/unlock entire file int value = fcntl(fd, F_SETLK, &f); return value; } class PosixFileLock : public FileLock { public: int fd_; std::string filename; }; int cloexec_flags(int flags, const EnvOptions* options) { // If the system supports opening the file with cloexec enabled, // do so, as this avoids a race condition if a db is opened around // the same time that a child process is forked #ifdef O_CLOEXEC if (options == nullptr || options->set_fd_cloexec) { flags |= O_CLOEXEC; } #else (void)options; #endif return flags; } class PosixFileSystem : public FileSystem { public: PosixFileSystem(); const char* Name() const override { return "Posix File System"; } ~PosixFileSystem() override {} void SetFD_CLOEXEC(int fd, const EnvOptions* options) { if ((options == nullptr || options->set_fd_cloexec) && fd > 0) { fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); } } IOStatus NewSequentialFile(const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) override { result->reset(); int fd = -1; int flags = cloexec_flags(O_RDONLY, &options); FILE* file = nullptr; if (options.use_direct_reads && !options.use_mmap_reads) { #ifdef ROCKSDB_LITE return IOStatus::IOError(fname, "Direct I/O not supported in RocksDB lite"); #endif // !ROCKSDB_LITE #if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS) flags |= O_DIRECT; #endif } do { IOSTATS_TIMER_GUARD(open_nanos); fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_)); } while (fd < 0 && errno == EINTR); if (fd < 0) { return IOError("While opening a file for sequentially reading", fname, errno); } SetFD_CLOEXEC(fd, &options); if (options.use_direct_reads && !options.use_mmap_reads) { #ifdef OS_MACOSX if (fcntl(fd, F_NOCACHE, 1) == -1) { close(fd); return IOError("While fcntl NoCache", fname, errno); } #endif } else { do { IOSTATS_TIMER_GUARD(open_nanos); file = fdopen(fd, "r"); } while (file == nullptr && errno == EINTR); if (file == nullptr) { close(fd); return IOError("While opening file for sequentially read", fname, errno); } } result->reset(new PosixSequentialFile( fname, file, fd, GetLogicalBlockSizeForReadIfNeeded(options, fname, fd), options)); return IOStatus::OK(); } IOStatus NewRandomAccessFile(const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) override { result->reset(); IOStatus s; int fd; int flags = cloexec_flags(O_RDONLY, &options); if (options.use_direct_reads && !options.use_mmap_reads) { #ifdef ROCKSDB_LITE return IOStatus::IOError(fname, "Direct I/O not supported in RocksDB lite"); #endif // !ROCKSDB_LITE #if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS) flags |= O_DIRECT; TEST_SYNC_POINT_CALLBACK("NewRandomAccessFile:O_DIRECT", &flags); #endif } do { IOSTATS_TIMER_GUARD(open_nanos); fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_)); } while (fd < 0 && errno == EINTR); if (fd < 0) { return IOError("While open a file for random read", fname, errno); } SetFD_CLOEXEC(fd, &options); if (options.use_mmap_reads && sizeof(void*) >= 8) { // Use of mmap for random reads has been removed because it // kills performance when storage is fast. // Use mmap when virtual address-space is plentiful. uint64_t size; IOOptions opts; s = GetFileSize(fname, opts, &size, nullptr); if (s.ok()) { void* base = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0); if (base != MAP_FAILED) { result->reset( new PosixMmapReadableFile(fd, fname, base, size, options)); } else { s = IOError("while mmap file for read", fname, errno); close(fd); } } else { close(fd); } } else { if (options.use_direct_reads && !options.use_mmap_reads) { #ifdef OS_MACOSX if (fcntl(fd, F_NOCACHE, 1) == -1) { close(fd); return IOError("while fcntl NoCache", fname, errno); } #endif } result->reset(new PosixRandomAccessFile( fname, fd, GetLogicalBlockSizeForReadIfNeeded(options, fname, fd), options #if defined(ROCKSDB_IOURING_PRESENT) , thread_local_io_urings_.get() #endif )); } return s; } virtual IOStatus OpenWritableFile(const std::string& fname, const FileOptions& options, bool reopen, std::unique_ptr* result, IODebugContext* /*dbg*/) { result->reset(); IOStatus s; int fd = -1; int flags = (reopen) ? (O_CREAT | O_APPEND) : (O_CREAT | O_TRUNC); // Direct IO mode with O_DIRECT flag or F_NOCAHCE (MAC OSX) if (options.use_direct_writes && !options.use_mmap_writes) { // Note: we should avoid O_APPEND here due to ta the following bug: // POSIX requires that opening a file with the O_APPEND flag should // have no affect on the location at which pwrite() writes data. // However, on Linux, if a file is opened with O_APPEND, pwrite() // appends data to the end of the file, regardless of the value of // offset. // More info here: https://linux.die.net/man/2/pwrite #ifdef ROCKSDB_LITE return IOStatus::IOError(fname, "Direct I/O not supported in RocksDB lite"); #endif // ROCKSDB_LITE flags |= O_WRONLY; #if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS) flags |= O_DIRECT; #endif TEST_SYNC_POINT_CALLBACK("NewWritableFile:O_DIRECT", &flags); } else if (options.use_mmap_writes) { // non-direct I/O flags |= O_RDWR; } else { flags |= O_WRONLY; } flags = cloexec_flags(flags, &options); do { IOSTATS_TIMER_GUARD(open_nanos); fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_)); } while (fd < 0 && errno == EINTR); if (fd < 0) { s = IOError("While open a file for appending", fname, errno); return s; } SetFD_CLOEXEC(fd, &options); if (options.use_mmap_writes) { if (!checkedDiskForMmap_) { // this will be executed once in the program's lifetime. // do not use mmapWrite on non ext-3/xfs/tmpfs systems. if (!SupportsFastAllocate(fname)) { forceMmapOff_ = true; } checkedDiskForMmap_ = true; } } if (options.use_mmap_writes && !forceMmapOff_) { result->reset(new PosixMmapFile(fname, fd, page_size_, options)); } else if (options.use_direct_writes && !options.use_mmap_writes) { #ifdef OS_MACOSX if (fcntl(fd, F_NOCACHE, 1) == -1) { close(fd); s = IOError("While fcntl NoCache an opened file for appending", fname, errno); return s; } #elif defined(OS_SOLARIS) if (directio(fd, DIRECTIO_ON) == -1) { if (errno != ENOTTY) { // ZFS filesystems don't support DIRECTIO_ON close(fd); s = IOError("While calling directio()", fname, errno); return s; } } #endif result->reset(new PosixWritableFile( fname, fd, GetLogicalBlockSizeForWriteIfNeeded(options, fname, fd), options)); } else { // disable mmap writes EnvOptions no_mmap_writes_options = options; no_mmap_writes_options.use_mmap_writes = false; result->reset( new PosixWritableFile(fname, fd, GetLogicalBlockSizeForWriteIfNeeded( no_mmap_writes_options, fname, fd), no_mmap_writes_options)); } return s; } IOStatus NewWritableFile(const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* dbg) override { return OpenWritableFile(fname, options, false, result, dbg); } IOStatus ReopenWritableFile(const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* dbg) override { return OpenWritableFile(fname, options, true, result, dbg); } IOStatus ReuseWritableFile(const std::string& fname, const std::string& old_fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) override { result->reset(); IOStatus s; int fd = -1; int flags = 0; // Direct IO mode with O_DIRECT flag or F_NOCAHCE (MAC OSX) if (options.use_direct_writes && !options.use_mmap_writes) { #ifdef ROCKSDB_LITE return IOStatus::IOError(fname, "Direct I/O not supported in RocksDB lite"); #endif // !ROCKSDB_LITE flags |= O_WRONLY; #if !defined(OS_MACOSX) && !defined(OS_OPENBSD) && !defined(OS_SOLARIS) flags |= O_DIRECT; #endif TEST_SYNC_POINT_CALLBACK("NewWritableFile:O_DIRECT", &flags); } else if (options.use_mmap_writes) { // mmap needs O_RDWR mode flags |= O_RDWR; } else { flags |= O_WRONLY; } flags = cloexec_flags(flags, &options); do { IOSTATS_TIMER_GUARD(open_nanos); fd = open(old_fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_)); } while (fd < 0 && errno == EINTR); if (fd < 0) { s = IOError("while reopen file for write", fname, errno); return s; } SetFD_CLOEXEC(fd, &options); // rename into place if (rename(old_fname.c_str(), fname.c_str()) != 0) { s = IOError("while rename file to " + fname, old_fname, errno); close(fd); return s; } if (options.use_mmap_writes) { if (!checkedDiskForMmap_) { // this will be executed once in the program's lifetime. // do not use mmapWrite on non ext-3/xfs/tmpfs systems. if (!SupportsFastAllocate(fname)) { forceMmapOff_ = true; } checkedDiskForMmap_ = true; } } if (options.use_mmap_writes && !forceMmapOff_) { result->reset(new PosixMmapFile(fname, fd, page_size_, options)); } else if (options.use_direct_writes && !options.use_mmap_writes) { #ifdef OS_MACOSX if (fcntl(fd, F_NOCACHE, 1) == -1) { close(fd); s = IOError("while fcntl NoCache for reopened file for append", fname, errno); return s; } #elif defined(OS_SOLARIS) if (directio(fd, DIRECTIO_ON) == -1) { if (errno != ENOTTY) { // ZFS filesystems don't support DIRECTIO_ON close(fd); s = IOError("while calling directio()", fname, errno); return s; } } #endif result->reset(new PosixWritableFile( fname, fd, GetLogicalBlockSizeForWriteIfNeeded(options, fname, fd), options)); } else { // disable mmap writes FileOptions no_mmap_writes_options = options; no_mmap_writes_options.use_mmap_writes = false; result->reset( new PosixWritableFile(fname, fd, GetLogicalBlockSizeForWriteIfNeeded( no_mmap_writes_options, fname, fd), no_mmap_writes_options)); } return s; } IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* /*dbg*/) override { int fd = -1; int flags = cloexec_flags(O_RDWR, &options); while (fd < 0) { IOSTATS_TIMER_GUARD(open_nanos); fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_)); if (fd < 0) { // Error while opening the file if (errno == EINTR) { continue; } return IOError("While open file for random read/write", fname, errno); } } SetFD_CLOEXEC(fd, &options); result->reset(new PosixRandomRWFile(fname, fd, options)); return IOStatus::OK(); } IOStatus NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) override { int fd = -1; IOStatus status; int flags = cloexec_flags(O_RDWR, nullptr); while (fd < 0) { IOSTATS_TIMER_GUARD(open_nanos); fd = open(fname.c_str(), flags, 0644); if (fd < 0) { // Error while opening the file if (errno == EINTR) { continue; } status = IOError("While open file for raw mmap buffer access", fname, errno); break; } } uint64_t size; if (status.ok()) { IOOptions opts; status = GetFileSize(fname, opts, &size, nullptr); } void* base = nullptr; if (status.ok()) { base = mmap(nullptr, static_cast(size), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (base == MAP_FAILED) { status = IOError("while mmap file for read", fname, errno); } } if (status.ok()) { result->reset( new PosixMemoryMappedFileBuffer(base, static_cast(size))); } if (fd >= 0) { // don't need to keep it open after mmap has been called close(fd); } return status; } IOStatus NewDirectory(const std::string& name, const IOOptions& /*opts*/, std::unique_ptr* result, IODebugContext* /*dbg*/) override { result->reset(); int fd; int flags = cloexec_flags(0, nullptr); { IOSTATS_TIMER_GUARD(open_nanos); fd = open(name.c_str(), flags); } if (fd < 0) { return IOError("While open directory", name, errno); } else { result->reset(new PosixDirectory(fd)); } return IOStatus::OK(); } IOStatus NewLogger(const std::string& fname, const IOOptions& /*opts*/, std::shared_ptr* result, IODebugContext* /*dbg*/) override { FILE* f; { IOSTATS_TIMER_GUARD(open_nanos); f = fopen(fname.c_str(), "w" #ifdef __GLIBC_PREREQ #if __GLIBC_PREREQ(2, 7) "e" // glibc extension to enable O_CLOEXEC #endif #endif ); } if (f == nullptr) { result->reset(); return status_to_io_status( IOError("when fopen a file for new logger", fname, errno)); } else { int fd = fileno(f); #ifdef ROCKSDB_FALLOCATE_PRESENT fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024); #endif SetFD_CLOEXEC(fd, nullptr); result->reset(new PosixLogger(f, &gettid, Env::Default())); return IOStatus::OK(); } } IOStatus FileExists(const std::string& fname, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { int result = access(fname.c_str(), F_OK); if (result == 0) { return IOStatus::OK(); } int err = errno; switch (err) { case EACCES: case ELOOP: case ENAMETOOLONG: case ENOENT: case ENOTDIR: return IOStatus::NotFound(); default: assert(err == EIO || err == ENOMEM); return IOStatus::IOError("Unexpected error(" + ToString(err) + ") accessing file `" + fname + "' "); } } IOStatus GetChildren(const std::string& dir, const IOOptions& /*opts*/, std::vector* result, IODebugContext* /*dbg*/) override { result->clear(); DIR* d = opendir(dir.c_str()); if (d == nullptr) { switch (errno) { case EACCES: case ENOENT: case ENOTDIR: return IOStatus::NotFound(); default: return IOError("While opendir", dir, errno); } } struct dirent* entry; while ((entry = readdir(d)) != nullptr) { result->push_back(entry->d_name); } closedir(d); return IOStatus::OK(); } IOStatus DeleteFile(const std::string& fname, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { IOStatus result; if (unlink(fname.c_str()) != 0) { result = IOError("while unlink() file", fname, errno); } return result; } IOStatus CreateDir(const std::string& name, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { IOStatus result; if (mkdir(name.c_str(), 0755) != 0) { result = IOError("While mkdir", name, errno); } return result; } IOStatus CreateDirIfMissing(const std::string& name, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { IOStatus result; if (mkdir(name.c_str(), 0755) != 0) { if (errno != EEXIST) { result = IOError("While mkdir if missing", name, errno); } else if (!DirExists(name)) { // Check that name is actually a // directory. // Message is taken from mkdir result = IOStatus::IOError("`" + name + "' exists but is not a directory"); } } return result; } IOStatus DeleteDir(const std::string& name, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { IOStatus result; if (rmdir(name.c_str()) != 0) { result = IOError("file rmdir", name, errno); } return result; } IOStatus GetFileSize(const std::string& fname, const IOOptions& /*opts*/, uint64_t* size, IODebugContext* /*dbg*/) override { IOStatus s; struct stat sbuf; if (stat(fname.c_str(), &sbuf) != 0) { *size = 0; s = IOError("while stat a file for size", fname, errno); } else { *size = sbuf.st_size; } return s; } IOStatus GetFileModificationTime(const std::string& fname, const IOOptions& /*opts*/, uint64_t* file_mtime, IODebugContext* /*dbg*/) override { struct stat s; if (stat(fname.c_str(), &s) != 0) { return IOError("while stat a file for modification time", fname, errno); } *file_mtime = static_cast(s.st_mtime); return IOStatus::OK(); } IOStatus RenameFile(const std::string& src, const std::string& target, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { IOStatus result; if (rename(src.c_str(), target.c_str()) != 0) { result = IOError("While renaming a file to " + target, src, errno); } return result; } IOStatus LinkFile(const std::string& src, const std::string& target, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { IOStatus result; if (link(src.c_str(), target.c_str()) != 0) { if (errno == EXDEV) { return IOStatus::NotSupported("No cross FS links allowed"); } result = IOError("while link file to " + target, src, errno); } return result; } IOStatus NumFileLinks(const std::string& fname, const IOOptions& /*opts*/, uint64_t* count, IODebugContext* /*dbg*/) override { struct stat s; if (stat(fname.c_str(), &s) != 0) { return IOError("while stat a file for num file links", fname, errno); } *count = static_cast(s.st_nlink); return IOStatus::OK(); } IOStatus AreFilesSame(const std::string& first, const std::string& second, const IOOptions& /*opts*/, bool* res, IODebugContext* /*dbg*/) override { struct stat statbuf[2]; if (stat(first.c_str(), &statbuf[0]) != 0) { return IOError("stat file", first, errno); } if (stat(second.c_str(), &statbuf[1]) != 0) { return IOError("stat file", second, errno); } if (major(statbuf[0].st_dev) != major(statbuf[1].st_dev) || minor(statbuf[0].st_dev) != minor(statbuf[1].st_dev) || statbuf[0].st_ino != statbuf[1].st_ino) { *res = false; } else { *res = true; } return IOStatus::OK(); } IOStatus LockFile(const std::string& fname, const IOOptions& /*opts*/, FileLock** lock, IODebugContext* /*dbg*/) override { *lock = nullptr; IOStatus result; LockHoldingInfo lhi; int64_t current_time = 0; // Ignore status code as the time is only used for error message. Env::Default()->GetCurrentTime(¤t_time); lhi.acquire_time = current_time; lhi.acquiring_thread = Env::Default()->GetThreadID(); mutex_locked_files.Lock(); // If it already exists in the locked_files set, then it is already locked, // and fail this lock attempt. Otherwise, insert it into locked_files. // This check is needed because fcntl() does not detect lock conflict // if the fcntl is issued by the same thread that earlier acquired // this lock. // We must do this check *before* opening the file: // Otherwise, we will open a new file descriptor. Locks are associated with // a process, not a file descriptor and when *any* file descriptor is // closed, all locks the process holds for that *file* are released const auto it_success = locked_files.insert({fname, lhi}); if (it_success.second == false) { mutex_locked_files.Unlock(); errno = ENOLCK; LockHoldingInfo& prev_info = it_success.first->second; // Note that the thread ID printed is the same one as the one in // posix logger, but posix logger prints it hex format. return IOError("lock hold by current process, acquire time " + ToString(prev_info.acquire_time) + " acquiring thread " + ToString(prev_info.acquiring_thread), fname, errno); } int fd; int flags = cloexec_flags(O_RDWR | O_CREAT, nullptr); { IOSTATS_TIMER_GUARD(open_nanos); fd = open(fname.c_str(), flags, 0644); } if (fd < 0) { result = IOError("while open a file for lock", fname, errno); } else if (LockOrUnlock(fd, true) == -1) { // if there is an error in locking, then remove the pathname from // lockedfiles locked_files.erase(fname); result = IOError("While lock file", fname, errno); close(fd); } else { SetFD_CLOEXEC(fd, nullptr); PosixFileLock* my_lock = new PosixFileLock; my_lock->fd_ = fd; my_lock->filename = fname; *lock = my_lock; } mutex_locked_files.Unlock(); return result; } IOStatus UnlockFile(FileLock* lock, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { PosixFileLock* my_lock = reinterpret_cast(lock); IOStatus result; mutex_locked_files.Lock(); // If we are unlocking, then verify that we had locked it earlier, // it should already exist in locked_files. Remove it from locked_files. if (locked_files.erase(my_lock->filename) != 1) { errno = ENOLCK; result = IOError("unlock", my_lock->filename, errno); } else if (LockOrUnlock(my_lock->fd_, false) == -1) { result = IOError("unlock", my_lock->filename, errno); } close(my_lock->fd_); delete my_lock; mutex_locked_files.Unlock(); return result; } IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& /*opts*/, std::string* output_path, IODebugContext* /*dbg*/) override { if (!db_path.empty() && db_path[0] == '/') { *output_path = db_path; return IOStatus::OK(); } char the_path[256]; char* ret = getcwd(the_path, 256); if (ret == nullptr) { return IOStatus::IOError(strerror(errno)); } *output_path = ret; return IOStatus::OK(); } IOStatus GetTestDirectory(const IOOptions& /*opts*/, std::string* result, IODebugContext* /*dbg*/) override { const char* env = getenv("TEST_TMPDIR"); if (env && env[0] != '\0') { *result = env; } else { char buf[100]; snprintf(buf, sizeof(buf), "/tmp/rocksdbtest-%d", int(geteuid())); *result = buf; } // Directory may already exist { IOOptions opts; CreateDir(*result, opts, nullptr); } return IOStatus::OK(); } IOStatus GetFreeSpace(const std::string& fname, const IOOptions& /*opts*/, uint64_t* free_space, IODebugContext* /*dbg*/) override { struct statvfs sbuf; if (statvfs(fname.c_str(), &sbuf) < 0) { return IOError("While doing statvfs", fname, errno); } *free_space = ((uint64_t)sbuf.f_bsize * sbuf.f_bfree); return IOStatus::OK(); } IOStatus IsDirectory(const std::string& path, const IOOptions& /*opts*/, bool* is_dir, IODebugContext* /*dbg*/) override { // First open int fd = -1; int flags = cloexec_flags(O_RDONLY, nullptr); { IOSTATS_TIMER_GUARD(open_nanos); fd = open(path.c_str(), flags); } if (fd < 0) { return IOError("While open for IsDirectory()", path, errno); } IOStatus io_s; struct stat sbuf; if (fstat(fd, &sbuf) < 0) { io_s = IOError("While doing stat for IsDirectory()", path, errno); } close(fd); if (io_s.ok() && nullptr != is_dir) { *is_dir = S_ISDIR(sbuf.st_mode); } return io_s; } FileOptions OptimizeForLogWrite(const FileOptions& file_options, const DBOptions& db_options) const override { FileOptions optimized = file_options; optimized.use_mmap_writes = false; optimized.use_direct_writes = false; optimized.bytes_per_sync = db_options.wal_bytes_per_sync; // TODO(icanadi) it's faster if fallocate_with_keep_size is false, but it // breaks TransactionLogIteratorStallAtLastRecord unit test. Fix the unit // test and make this false optimized.fallocate_with_keep_size = true; optimized.writable_file_max_buffer_size = db_options.writable_file_max_buffer_size; return optimized; } FileOptions OptimizeForManifestWrite( const FileOptions& file_options) const override { FileOptions optimized = file_options; optimized.use_mmap_writes = false; optimized.use_direct_writes = false; optimized.fallocate_with_keep_size = true; return optimized; } #ifdef OS_LINUX Status RegisterDbPaths(const std::vector& paths) override { return logical_block_size_cache_.RefAndCacheLogicalBlockSize(paths); } Status UnregisterDbPaths(const std::vector& paths) override { logical_block_size_cache_.UnrefAndTryRemoveCachedLogicalBlockSize(paths); return Status::OK(); } #endif private: bool checkedDiskForMmap_; bool forceMmapOff_; // do we override Env options? // Returns true iff the named directory exists and is a directory. virtual bool DirExists(const std::string& dname) { struct stat statbuf; if (stat(dname.c_str(), &statbuf) == 0) { return S_ISDIR(statbuf.st_mode); } return false; // stat() failed return false } bool SupportsFastAllocate(const std::string& path) { #ifdef ROCKSDB_FALLOCATE_PRESENT struct statfs s; if (statfs(path.c_str(), &s)) { return false; } switch (s.f_type) { case EXT4_SUPER_MAGIC: return true; case XFS_SUPER_MAGIC: return true; case TMPFS_MAGIC: return true; default: return false; } #else (void)path; return false; #endif } #if defined(ROCKSDB_IOURING_PRESENT) // io_uring instance std::unique_ptr thread_local_io_urings_; #endif size_t page_size_; // If true, allow non owner read access for db files. Otherwise, non-owner // has no access to db files. bool allow_non_owner_access_; #ifdef OS_LINUX static LogicalBlockSizeCache logical_block_size_cache_; #endif static size_t GetLogicalBlockSize(const std::string& fname, int fd); // In non-direct IO mode, this directly returns kDefaultPageSize. // Otherwise call GetLogicalBlockSize. static size_t GetLogicalBlockSizeForReadIfNeeded(const EnvOptions& options, const std::string& fname, int fd); static size_t GetLogicalBlockSizeForWriteIfNeeded(const EnvOptions& options, const std::string& fname, int fd); }; #ifdef OS_LINUX LogicalBlockSizeCache PosixFileSystem::logical_block_size_cache_; #endif size_t PosixFileSystem::GetLogicalBlockSize(const std::string& fname, int fd) { #ifdef OS_LINUX return logical_block_size_cache_.GetLogicalBlockSize(fname, fd); #else (void)fname; return PosixHelper::GetLogicalBlockSizeOfFd(fd); #endif } size_t PosixFileSystem::GetLogicalBlockSizeForReadIfNeeded( const EnvOptions& options, const std::string& fname, int fd) { return options.use_direct_reads ? PosixFileSystem::GetLogicalBlockSize(fname, fd) : kDefaultPageSize; } size_t PosixFileSystem::GetLogicalBlockSizeForWriteIfNeeded( const EnvOptions& options, const std::string& fname, int fd) { return options.use_direct_writes ? PosixFileSystem::GetLogicalBlockSize(fname, fd) : kDefaultPageSize; } PosixFileSystem::PosixFileSystem() : checkedDiskForMmap_(false), forceMmapOff_(false), page_size_(getpagesize()), allow_non_owner_access_(true) { #if defined(ROCKSDB_IOURING_PRESENT) // Test whether IOUring is supported, and if it does, create a managing // object for thread local point so that in the future thread-local // io_uring can be created. struct io_uring* new_io_uring = CreateIOUring(); if (new_io_uring != nullptr) { thread_local_io_urings_.reset(new ThreadLocalPtr(DeleteIOUring)); delete new_io_uring; } #endif } } // namespace // // Default Posix FileSystem // std::shared_ptr FileSystem::Default() { static PosixFileSystem default_fs; static std::shared_ptr default_fs_ptr( &default_fs, [](PosixFileSystem*) {}); return default_fs_ptr; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/io_posix.cc000066400000000000000000001317141370372246700162040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifdef ROCKSDB_LIB_IO_POSIX #include "env/io_posix.h" #include #include #include #if defined(OS_LINUX) #include #ifndef FALLOC_FL_KEEP_SIZE #include #endif #endif #include #include #include #include #include #include #include #ifdef OS_LINUX #include #include #include #endif #include "monitoring/iostats_context_imp.h" #include "port/port.h" #include "rocksdb/slice.h" #include "test_util/sync_point.h" #include "util/autovector.h" #include "util/coding.h" #include "util/string_util.h" #if defined(OS_LINUX) && !defined(F_SET_RW_HINT) #define F_LINUX_SPECIFIC_BASE 1024 #define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12) #endif namespace ROCKSDB_NAMESPACE { std::string IOErrorMsg(const std::string& context, const std::string& file_name) { if (file_name.empty()) { return context; } return context + ": " + file_name; } // file_name can be left empty if it is not unkown. IOStatus IOError(const std::string& context, const std::string& file_name, int err_number) { switch (err_number) { case ENOSPC: { IOStatus s = IOStatus::NoSpace(IOErrorMsg(context, file_name), strerror(err_number)); s.SetRetryable(true); return s; } case ESTALE: return IOStatus::IOError(IOStatus::kStaleFile); case ENOENT: return IOStatus::PathNotFound(IOErrorMsg(context, file_name), strerror(err_number)); default: return IOStatus::IOError(IOErrorMsg(context, file_name), strerror(err_number)); } } // A wrapper for fadvise, if the platform doesn't support fadvise, // it will simply return 0. int Fadvise(int fd, off_t offset, size_t len, int advice) { #ifdef OS_LINUX return posix_fadvise(fd, offset, len, advice); #else (void)fd; (void)offset; (void)len; (void)advice; return 0; // simply do nothing. #endif } namespace { // On MacOS (and probably *BSD), the posix write and pwrite calls do not support // buffers larger than 2^31-1 bytes. These two wrappers fix this issue by // cutting the buffer in 1GB chunks. We use this chunk size to be sure to keep // the writes aligned. bool PosixWrite(int fd, const char* buf, size_t nbyte) { const size_t kLimit1Gb = 1UL << 30; const char* src = buf; size_t left = nbyte; while (left != 0) { size_t bytes_to_write = std::min(left, kLimit1Gb); ssize_t done = write(fd, src, bytes_to_write); if (done < 0) { if (errno == EINTR) { continue; } return false; } left -= done; src += done; } return true; } bool PosixPositionedWrite(int fd, const char* buf, size_t nbyte, off_t offset) { const size_t kLimit1Gb = 1UL << 30; const char* src = buf; size_t left = nbyte; while (left != 0) { size_t bytes_to_write = std::min(left, kLimit1Gb); ssize_t done = pwrite(fd, src, bytes_to_write, offset); if (done < 0) { if (errno == EINTR) { continue; } return false; } left -= done; offset += done; src += done; } return true; } #ifdef ROCKSDB_RANGESYNC_PRESENT #if !defined(ZFS_SUPER_MAGIC) // The magic number for ZFS was not exposed until recently. It should be fixed // forever so we can just copy the magic number here. #define ZFS_SUPER_MAGIC 0x2fc12fc1 #endif bool IsSyncFileRangeSupported(int fd) { // The approach taken in this function is to build a blacklist of cases where // we know `sync_file_range` definitely will not work properly despite passing // the compile-time check (`ROCKSDB_RANGESYNC_PRESENT`). If we are unsure, or // if any of the checks fail in unexpected ways, we allow `sync_file_range` to // be used. This way should minimize risk of impacting existing use cases. struct statfs buf; int ret = fstatfs(fd, &buf); assert(ret == 0); if (ret == 0 && buf.f_type == ZFS_SUPER_MAGIC) { // Testing on ZFS showed the writeback did not happen asynchronously when // `sync_file_range` was called, even though it returned success. Avoid it // and use `fdatasync` instead to preserve the contract of `bytes_per_sync`, // even though this'll incur extra I/O for metadata. return false; } ret = sync_file_range(fd, 0 /* offset */, 0 /* nbytes */, 0 /* flags */); assert(!(ret == -1 && errno != ENOSYS)); if (ret == -1 && errno == ENOSYS) { // `sync_file_range` is not implemented on all platforms even if // compile-time checks pass and a supported filesystem is in-use. For // example, using ext4 on WSL (Windows Subsystem for Linux), // `sync_file_range()` returns `ENOSYS` // ("Function not implemented"). return false; } // None of the cases on the blacklist matched, so allow `sync_file_range` use. return true; } #undef ZFS_SUPER_MAGIC #endif // ROCKSDB_RANGESYNC_PRESENT } // anonymous namespace /* * DirectIOHelper */ namespace { bool IsSectorAligned(const size_t off, size_t sector_size) { assert((sector_size & (sector_size - 1)) == 0); return (off & (sector_size - 1)) == 0; } #ifndef NDEBUG bool IsSectorAligned(const void* ptr, size_t sector_size) { return uintptr_t(ptr) % sector_size == 0; } #endif } // namespace /* * PosixSequentialFile */ PosixSequentialFile::PosixSequentialFile(const std::string& fname, FILE* file, int fd, size_t logical_block_size, const EnvOptions& options) : filename_(fname), file_(file), fd_(fd), use_direct_io_(options.use_direct_reads), logical_sector_size_(logical_block_size) { assert(!options.use_direct_reads || !options.use_mmap_reads); } PosixSequentialFile::~PosixSequentialFile() { if (!use_direct_io()) { assert(file_); fclose(file_); } else { assert(fd_); close(fd_); } } IOStatus PosixSequentialFile::Read(size_t n, const IOOptions& /*opts*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) { assert(result != nullptr && !use_direct_io()); IOStatus s; size_t r = 0; do { clearerr(file_); r = fread_unlocked(scratch, 1, n, file_); } while (r == 0 && ferror(file_) && errno == EINTR); *result = Slice(scratch, r); if (r < n) { if (feof(file_)) { // We leave status as ok if we hit the end of the file // We also clear the error so that the reads can continue // if a new data is written to the file clearerr(file_); } else { // A partial read with an error: return a non-ok status s = IOError("While reading file sequentially", filename_, errno); } } return s; } IOStatus PosixSequentialFile::PositionedRead(uint64_t offset, size_t n, const IOOptions& /*opts*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) { assert(use_direct_io()); assert(IsSectorAligned(offset, GetRequiredBufferAlignment())); assert(IsSectorAligned(n, GetRequiredBufferAlignment())); assert(IsSectorAligned(scratch, GetRequiredBufferAlignment())); IOStatus s; ssize_t r = -1; size_t left = n; char* ptr = scratch; while (left > 0) { r = pread(fd_, ptr, left, static_cast(offset)); if (r <= 0) { if (r == -1 && errno == EINTR) { continue; } break; } ptr += r; offset += r; left -= r; if (!IsSectorAligned(r, GetRequiredBufferAlignment())) { // Bytes reads don't fill sectors. Should only happen at the end // of the file. break; } } if (r < 0) { // An error: return a non-ok status s = IOError( "While pread " + ToString(n) + " bytes from offset " + ToString(offset), filename_, errno); } *result = Slice(scratch, (r < 0) ? 0 : n - left); return s; } IOStatus PosixSequentialFile::Skip(uint64_t n) { if (fseek(file_, static_cast(n), SEEK_CUR)) { return IOError("While fseek to skip " + ToString(n) + " bytes", filename_, errno); } return IOStatus::OK(); } IOStatus PosixSequentialFile::InvalidateCache(size_t offset, size_t length) { #ifndef OS_LINUX (void)offset; (void)length; return IOStatus::OK(); #else if (!use_direct_io()) { // free OS pages int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); if (ret != 0) { return IOError("While fadvise NotNeeded offset " + ToString(offset) + " len " + ToString(length), filename_, errno); } } return IOStatus::OK(); #endif } /* * PosixRandomAccessFile */ #if defined(OS_LINUX) size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) { if (max_size < kMaxVarint64Length * 3) { return 0; } struct stat buf; int result = fstat(fd, &buf); if (result == -1) { return 0; } long version = 0; result = ioctl(fd, FS_IOC_GETVERSION, &version); TEST_SYNC_POINT_CALLBACK("GetUniqueIdFromFile:FS_IOC_GETVERSION", &result); if (result == -1) { return 0; } uint64_t uversion = (uint64_t)version; char* rid = id; rid = EncodeVarint64(rid, buf.st_dev); rid = EncodeVarint64(rid, buf.st_ino); rid = EncodeVarint64(rid, uversion); assert(rid >= id); return static_cast(rid - id); } #endif #if defined(OS_MACOSX) || defined(OS_AIX) size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) { if (max_size < kMaxVarint64Length * 3) { return 0; } struct stat buf; int result = fstat(fd, &buf); if (result == -1) { return 0; } char* rid = id; rid = EncodeVarint64(rid, buf.st_dev); rid = EncodeVarint64(rid, buf.st_ino); rid = EncodeVarint64(rid, buf.st_gen); assert(rid >= id); return static_cast(rid - id); } #endif #ifdef OS_LINUX std::string RemoveTrailingSlash(const std::string& path) { std::string p = path; if (p.size() > 1 && p.back() == '/') { p.pop_back(); } return p; } Status LogicalBlockSizeCache::RefAndCacheLogicalBlockSize( const std::vector& directories) { std::vector dirs; dirs.reserve(directories.size()); for (auto& d : directories) { dirs.emplace_back(RemoveTrailingSlash(d)); } std::map dir_sizes; { ReadLock lock(&cache_mutex_); for (const auto& dir : dirs) { if (cache_.find(dir) == cache_.end()) { dir_sizes.emplace(dir, 0); } } } Status s; for (auto& dir_size : dir_sizes) { s = get_logical_block_size_of_directory_(dir_size.first, &dir_size.second); if (!s.ok()) { return s; } } WriteLock lock(&cache_mutex_); for (const auto& dir : dirs) { auto& v = cache_[dir]; v.ref++; auto dir_size = dir_sizes.find(dir); if (dir_size != dir_sizes.end()) { v.size = dir_size->second; } } return s; } void LogicalBlockSizeCache::UnrefAndTryRemoveCachedLogicalBlockSize( const std::vector& directories) { std::vector dirs; dirs.reserve(directories.size()); for (auto& dir : directories) { dirs.emplace_back(RemoveTrailingSlash(dir)); } WriteLock lock(&cache_mutex_); for (const auto& dir : dirs) { auto it = cache_.find(dir); if (it != cache_.end() && !(--(it->second.ref))) { cache_.erase(it); } } } size_t LogicalBlockSizeCache::GetLogicalBlockSize(const std::string& fname, int fd) { std::string dir = fname.substr(0, fname.find_last_of("/")); if (dir.empty()) { dir = "/"; } { ReadLock lock(&cache_mutex_); auto it = cache_.find(dir); if (it != cache_.end()) { return it->second.size; } } return get_logical_block_size_of_fd_(fd); } #endif Status PosixHelper::GetLogicalBlockSizeOfDirectory(const std::string& directory, size_t* size) { int fd = open(directory.c_str(), O_DIRECTORY | O_RDONLY); if (fd == -1) { close(fd); return Status::IOError("Cannot open directory " + directory); } *size = PosixHelper::GetLogicalBlockSizeOfFd(fd); close(fd); return Status::OK(); } size_t PosixHelper::GetLogicalBlockSizeOfFd(int fd) { #ifdef OS_LINUX struct stat buf; int result = fstat(fd, &buf); if (result == -1) { return kDefaultPageSize; } if (major(buf.st_dev) == 0) { // Unnamed devices (e.g. non-device mounts), reserved as null device number. // These don't have an entry in /sys/dev/block/. Return a sensible default. return kDefaultPageSize; } // Reading queue/logical_block_size does not require special permissions. const int kBufferSize = 100; char path[kBufferSize]; char real_path[PATH_MAX + 1]; snprintf(path, kBufferSize, "/sys/dev/block/%u:%u", major(buf.st_dev), minor(buf.st_dev)); if (realpath(path, real_path) == nullptr) { return kDefaultPageSize; } std::string device_dir(real_path); if (!device_dir.empty() && device_dir.back() == '/') { device_dir.pop_back(); } // NOTE: sda3 and nvme0n1p1 do not have a `queue/` subdir, only the parent sda // and nvme0n1 have it. // $ ls -al '/sys/dev/block/8:3' // lrwxrwxrwx. 1 root root 0 Jun 26 01:38 /sys/dev/block/8:3 -> // ../../block/sda/sda3 // $ ls -al '/sys/dev/block/259:4' // lrwxrwxrwx 1 root root 0 Jan 31 16:04 /sys/dev/block/259:4 -> // ../../devices/pci0000:17/0000:17:00.0/0000:18:00.0/nvme/nvme0/nvme0n1/nvme0n1p1 size_t parent_end = device_dir.rfind('/', device_dir.length() - 1); if (parent_end == std::string::npos) { return kDefaultPageSize; } size_t parent_begin = device_dir.rfind('/', parent_end - 1); if (parent_begin == std::string::npos) { return kDefaultPageSize; } std::string parent = device_dir.substr(parent_begin + 1, parent_end - parent_begin - 1); std::string child = device_dir.substr(parent_end + 1, std::string::npos); if (parent != "block" && (child.compare(0, 4, "nvme") || child.find('p') != std::string::npos)) { device_dir = device_dir.substr(0, parent_end); } std::string fname = device_dir + "/queue/logical_block_size"; FILE* fp; size_t size = 0; fp = fopen(fname.c_str(), "r"); if (fp != nullptr) { char* line = nullptr; size_t len = 0; if (getline(&line, &len, fp) != -1) { sscanf(line, "%zu", &size); } free(line); fclose(fp); } if (size != 0 && (size & (size - 1)) == 0) { return size; } #endif (void)fd; return kDefaultPageSize; } /* * PosixRandomAccessFile * * pread() based random-access */ PosixRandomAccessFile::PosixRandomAccessFile( const std::string& fname, int fd, size_t logical_block_size, const EnvOptions& options #if defined(ROCKSDB_IOURING_PRESENT) , ThreadLocalPtr* thread_local_io_urings #endif ) : filename_(fname), fd_(fd), use_direct_io_(options.use_direct_reads), logical_sector_size_(logical_block_size) #if defined(ROCKSDB_IOURING_PRESENT) , thread_local_io_urings_(thread_local_io_urings) #endif { assert(!options.use_direct_reads || !options.use_mmap_reads); assert(!options.use_mmap_reads || sizeof(void*) < 8); } PosixRandomAccessFile::~PosixRandomAccessFile() { close(fd_); } IOStatus PosixRandomAccessFile::Read(uint64_t offset, size_t n, const IOOptions& /*opts*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) const { if (use_direct_io()) { assert(IsSectorAligned(offset, GetRequiredBufferAlignment())); assert(IsSectorAligned(n, GetRequiredBufferAlignment())); assert(IsSectorAligned(scratch, GetRequiredBufferAlignment())); } IOStatus s; ssize_t r = -1; size_t left = n; char* ptr = scratch; while (left > 0) { r = pread(fd_, ptr, left, static_cast(offset)); if (r <= 0) { if (r == -1 && errno == EINTR) { continue; } break; } ptr += r; offset += r; left -= r; if (use_direct_io() && r % static_cast(GetRequiredBufferAlignment()) != 0) { // Bytes reads don't fill sectors. Should only happen at the end // of the file. break; } } if (r < 0) { // An error: return a non-ok status s = IOError( "While pread offset " + ToString(offset) + " len " + ToString(n), filename_, errno); } *result = Slice(scratch, (r < 0) ? 0 : n - left); return s; } IOStatus PosixRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs, const IOOptions& options, IODebugContext* dbg) { if (use_direct_io()) { for (size_t i = 0; i < num_reqs; i++) { assert(IsSectorAligned(reqs[i].offset, GetRequiredBufferAlignment())); assert(IsSectorAligned(reqs[i].len, GetRequiredBufferAlignment())); assert(IsSectorAligned(reqs[i].scratch, GetRequiredBufferAlignment())); } } #if defined(ROCKSDB_IOURING_PRESENT) struct io_uring* iu = nullptr; if (thread_local_io_urings_) { iu = static_cast(thread_local_io_urings_->Get()); if (iu == nullptr) { iu = CreateIOUring(); if (iu != nullptr) { thread_local_io_urings_->Reset(iu); } } } // Init failed, platform doesn't support io_uring. Fall back to // serialized reads if (iu == nullptr) { return FSRandomAccessFile::MultiRead(reqs, num_reqs, options, dbg); } struct WrappedReadRequest { FSReadRequest* req; struct iovec iov; size_t finished_len; explicit WrappedReadRequest(FSReadRequest* r) : req(r), finished_len(0) {} }; autovector req_wraps; autovector incomplete_rq_list; for (size_t i = 0; i < num_reqs; i++) { req_wraps.emplace_back(&reqs[i]); } size_t reqs_off = 0; while (num_reqs > reqs_off || !incomplete_rq_list.empty()) { size_t this_reqs = (num_reqs - reqs_off) + incomplete_rq_list.size(); // If requests exceed depth, split it into batches if (this_reqs > kIoUringDepth) this_reqs = kIoUringDepth; assert(incomplete_rq_list.size() <= this_reqs); for (size_t i = 0; i < this_reqs; i++) { WrappedReadRequest* rep_to_submit; if (i < incomplete_rq_list.size()) { rep_to_submit = incomplete_rq_list[i]; } else { rep_to_submit = &req_wraps[reqs_off++]; } assert(rep_to_submit->req->len > rep_to_submit->finished_len); rep_to_submit->iov.iov_base = rep_to_submit->req->scratch + rep_to_submit->finished_len; rep_to_submit->iov.iov_len = rep_to_submit->req->len - rep_to_submit->finished_len; struct io_uring_sqe* sqe; sqe = io_uring_get_sqe(iu); io_uring_prep_readv( sqe, fd_, &rep_to_submit->iov, 1, rep_to_submit->req->offset + rep_to_submit->finished_len); io_uring_sqe_set_data(sqe, rep_to_submit); } incomplete_rq_list.clear(); ssize_t ret = io_uring_submit_and_wait(iu, static_cast(this_reqs)); if (static_cast(ret) != this_reqs) { fprintf(stderr, "ret = %ld this_reqs: %ld\n", (long)ret, (long)this_reqs); } assert(static_cast(ret) == this_reqs); for (size_t i = 0; i < this_reqs; i++) { struct io_uring_cqe* cqe; WrappedReadRequest* req_wrap; // We could use the peek variant here, but this seems safer in terms // of our initial wait not reaping all completions ret = io_uring_wait_cqe(iu, &cqe); assert(!ret); req_wrap = static_cast(io_uring_cqe_get_data(cqe)); FSReadRequest* req = req_wrap->req; if (cqe->res < 0) { req->result = Slice(req->scratch, 0); req->status = IOError("Req failed", filename_, cqe->res); } else { size_t bytes_read = static_cast(cqe->res); TEST_SYNC_POINT_CALLBACK( "PosixRandomAccessFile::MultiRead:io_uring_result", &bytes_read); if (bytes_read == req_wrap->iov.iov_len) { req->result = Slice(req->scratch, req->len); req->status = IOStatus::OK(); } else if (bytes_read == 0) { // cqe->res == 0 can means EOF, or can mean partial results. See // comment // https://github.com/facebook/rocksdb/pull/6441#issuecomment-589843435 // Fall back to pread in this case. if (use_direct_io() && !IsSectorAligned(req_wrap->finished_len, GetRequiredBufferAlignment())) { // Bytes reads don't fill sectors. Should only happen at the end // of the file. req->result = Slice(req->scratch, req_wrap->finished_len); req->status = IOStatus::OK(); } else { Slice tmp_slice; req->status = Read(req->offset + req_wrap->finished_len, req->len - req_wrap->finished_len, options, &tmp_slice, req->scratch + req_wrap->finished_len, dbg); req->result = Slice(req->scratch, req_wrap->finished_len + tmp_slice.size()); } } else if (bytes_read < req_wrap->iov.iov_len) { assert(bytes_read > 0); assert(bytes_read + req_wrap->finished_len < req->len); req_wrap->finished_len += bytes_read; incomplete_rq_list.push_back(req_wrap); } else { req->result = Slice(req->scratch, 0); req->status = IOError("Req returned more bytes than requested", filename_, cqe->res); } } io_uring_cqe_seen(iu, cqe); } } return IOStatus::OK(); #else return FSRandomAccessFile::MultiRead(reqs, num_reqs, options, dbg); #endif } IOStatus PosixRandomAccessFile::Prefetch(uint64_t offset, size_t n, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus s; if (!use_direct_io()) { ssize_t r = 0; #ifdef OS_LINUX r = readahead(fd_, offset, n); #endif #ifdef OS_MACOSX radvisory advice; advice.ra_offset = static_cast(offset); advice.ra_count = static_cast(n); r = fcntl(fd_, F_RDADVISE, &advice); #endif if (r == -1) { s = IOError("While prefetching offset " + ToString(offset) + " len " + ToString(n), filename_, errno); } } return s; } #if defined(OS_LINUX) || defined(OS_MACOSX) || defined(OS_AIX) size_t PosixRandomAccessFile::GetUniqueId(char* id, size_t max_size) const { return PosixHelper::GetUniqueIdFromFile(fd_, id, max_size); } #endif void PosixRandomAccessFile::Hint(AccessPattern pattern) { if (use_direct_io()) { return; } switch (pattern) { case kNormal: Fadvise(fd_, 0, 0, POSIX_FADV_NORMAL); break; case kRandom: Fadvise(fd_, 0, 0, POSIX_FADV_RANDOM); break; case kSequential: Fadvise(fd_, 0, 0, POSIX_FADV_SEQUENTIAL); break; case kWillNeed: Fadvise(fd_, 0, 0, POSIX_FADV_WILLNEED); break; case kWontNeed: Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED); break; default: assert(false); break; } } IOStatus PosixRandomAccessFile::InvalidateCache(size_t offset, size_t length) { if (use_direct_io()) { return IOStatus::OK(); } #ifndef OS_LINUX (void)offset; (void)length; return IOStatus::OK(); #else // free OS pages int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); if (ret == 0) { return IOStatus::OK(); } return IOError("While fadvise NotNeeded offset " + ToString(offset) + " len " + ToString(length), filename_, errno); #endif } /* * PosixMmapReadableFile * * mmap() based random-access */ // base[0,length-1] contains the mmapped contents of the file. PosixMmapReadableFile::PosixMmapReadableFile(const int fd, const std::string& fname, void* base, size_t length, const EnvOptions& options) : fd_(fd), filename_(fname), mmapped_region_(base), length_(length) { #ifdef NDEBUG (void)options; #endif fd_ = fd_ + 0; // suppress the warning for used variables assert(options.use_mmap_reads); assert(!options.use_direct_reads); } PosixMmapReadableFile::~PosixMmapReadableFile() { int ret = munmap(mmapped_region_, length_); if (ret != 0) { fprintf(stdout, "failed to munmap %p length %" ROCKSDB_PRIszt " \n", mmapped_region_, length_); } close(fd_); } IOStatus PosixMmapReadableFile::Read(uint64_t offset, size_t n, const IOOptions& /*opts*/, Slice* result, char* /*scratch*/, IODebugContext* /*dbg*/) const { IOStatus s; if (offset > length_) { *result = Slice(); return IOError("While mmap read offset " + ToString(offset) + " larger than file length " + ToString(length_), filename_, EINVAL); } else if (offset + n > length_) { n = static_cast(length_ - offset); } *result = Slice(reinterpret_cast(mmapped_region_) + offset, n); return s; } IOStatus PosixMmapReadableFile::InvalidateCache(size_t offset, size_t length) { #ifndef OS_LINUX (void)offset; (void)length; return IOStatus::OK(); #else // free OS pages int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); if (ret == 0) { return IOStatus::OK(); } return IOError("While fadvise not needed. Offset " + ToString(offset) + " len" + ToString(length), filename_, errno); #endif } /* * PosixMmapFile * * We preallocate up to an extra megabyte and use memcpy to append new * data to the file. This is safe since we either properly close the * file before reading from it, or for log files, the reading code * knows enough to skip zero suffixes. */ IOStatus PosixMmapFile::UnmapCurrentRegion() { TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0", rocksdb_kill_odds); if (base_ != nullptr) { int munmap_status = munmap(base_, limit_ - base_); if (munmap_status != 0) { return IOError("While munmap", filename_, munmap_status); } file_offset_ += limit_ - base_; base_ = nullptr; limit_ = nullptr; last_sync_ = nullptr; dst_ = nullptr; // Increase the amount we map the next time, but capped at 1MB if (map_size_ < (1 << 20)) { map_size_ *= 2; } } return IOStatus::OK(); } IOStatus PosixMmapFile::MapNewRegion() { #ifdef ROCKSDB_FALLOCATE_PRESENT assert(base_ == nullptr); TEST_KILL_RANDOM("PosixMmapFile::UnmapCurrentRegion:0", rocksdb_kill_odds); // we can't fallocate with FALLOC_FL_KEEP_SIZE here if (allow_fallocate_) { IOSTATS_TIMER_GUARD(allocate_nanos); int alloc_status = fallocate(fd_, 0, file_offset_, map_size_); if (alloc_status != 0) { // fallback to posix_fallocate alloc_status = posix_fallocate(fd_, file_offset_, map_size_); } if (alloc_status != 0) { return IOStatus::IOError("Error allocating space to file : " + filename_ + "Error : " + strerror(alloc_status)); } } TEST_KILL_RANDOM("PosixMmapFile::Append:1", rocksdb_kill_odds); void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, file_offset_); if (ptr == MAP_FAILED) { return IOStatus::IOError("MMap failed on " + filename_); } TEST_KILL_RANDOM("PosixMmapFile::Append:2", rocksdb_kill_odds); base_ = reinterpret_cast(ptr); limit_ = base_ + map_size_; dst_ = base_; last_sync_ = base_; return IOStatus::OK(); #else return IOStatus::NotSupported("This platform doesn't support fallocate()"); #endif } IOStatus PosixMmapFile::Msync() { if (dst_ == last_sync_) { return IOStatus::OK(); } // Find the beginnings of the pages that contain the first and last // bytes to be synced. size_t p1 = TruncateToPageBoundary(last_sync_ - base_); size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1); last_sync_ = dst_; TEST_KILL_RANDOM("PosixMmapFile::Msync:0", rocksdb_kill_odds); if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) { return IOError("While msync", filename_, errno); } return IOStatus::OK(); } PosixMmapFile::PosixMmapFile(const std::string& fname, int fd, size_t page_size, const EnvOptions& options) : filename_(fname), fd_(fd), page_size_(page_size), map_size_(Roundup(65536, page_size)), base_(nullptr), limit_(nullptr), dst_(nullptr), last_sync_(nullptr), file_offset_(0) { #ifdef ROCKSDB_FALLOCATE_PRESENT allow_fallocate_ = options.allow_fallocate; fallocate_with_keep_size_ = options.fallocate_with_keep_size; #else (void)options; #endif assert((page_size & (page_size - 1)) == 0); assert(options.use_mmap_writes); assert(!options.use_direct_writes); } PosixMmapFile::~PosixMmapFile() { if (fd_ >= 0) { PosixMmapFile::Close(IOOptions(), nullptr); } } IOStatus PosixMmapFile::Append(const Slice& data, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { const char* src = data.data(); size_t left = data.size(); while (left > 0) { assert(base_ <= dst_); assert(dst_ <= limit_); size_t avail = limit_ - dst_; if (avail == 0) { IOStatus s = UnmapCurrentRegion(); if (!s.ok()) { return s; } s = MapNewRegion(); if (!s.ok()) { return s; } TEST_KILL_RANDOM("PosixMmapFile::Append:0", rocksdb_kill_odds); } size_t n = (left <= avail) ? left : avail; assert(dst_); memcpy(dst_, src, n); dst_ += n; src += n; left -= n; } return IOStatus::OK(); } IOStatus PosixMmapFile::Close(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus s; size_t unused = limit_ - dst_; s = UnmapCurrentRegion(); if (!s.ok()) { s = IOError("While closing mmapped file", filename_, errno); } else if (unused > 0) { // Trim the extra space at the end of the file if (ftruncate(fd_, file_offset_ - unused) < 0) { s = IOError("While ftruncating mmaped file", filename_, errno); } } if (close(fd_) < 0) { if (s.ok()) { s = IOError("While closing mmapped file", filename_, errno); } } fd_ = -1; base_ = nullptr; limit_ = nullptr; return s; } IOStatus PosixMmapFile::Flush(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { return IOStatus::OK(); } IOStatus PosixMmapFile::Sync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (fdatasync(fd_) < 0) { return IOError("While fdatasync mmapped file", filename_, errno); } return Msync(); } /** * Flush data as well as metadata to stable storage. */ IOStatus PosixMmapFile::Fsync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (fsync(fd_) < 0) { return IOError("While fsync mmaped file", filename_, errno); } return Msync(); } /** * Get the size of valid data in the file. This will not match the * size that is returned from the filesystem because we use mmap * to extend file by map_size every time. */ uint64_t PosixMmapFile::GetFileSize(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { size_t used = dst_ - base_; return file_offset_ + used; } IOStatus PosixMmapFile::InvalidateCache(size_t offset, size_t length) { #ifndef OS_LINUX (void)offset; (void)length; return IOStatus::OK(); #else // free OS pages int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); if (ret == 0) { return IOStatus::OK(); } return IOError("While fadvise NotNeeded mmapped file", filename_, errno); #endif } #ifdef ROCKSDB_FALLOCATE_PRESENT IOStatus PosixMmapFile::Allocate(uint64_t offset, uint64_t len, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { assert(offset <= static_cast(std::numeric_limits::max())); assert(len <= static_cast(std::numeric_limits::max())); TEST_KILL_RANDOM("PosixMmapFile::Allocate:0", rocksdb_kill_odds); int alloc_status = 0; if (allow_fallocate_) { alloc_status = fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, static_cast(offset), static_cast(len)); } if (alloc_status == 0) { return IOStatus::OK(); } else { return IOError( "While fallocate offset " + ToString(offset) + " len " + ToString(len), filename_, errno); } } #endif /* * PosixWritableFile * * Use posix write to write data to a file. */ PosixWritableFile::PosixWritableFile(const std::string& fname, int fd, size_t logical_block_size, const EnvOptions& options) : FSWritableFile(options), filename_(fname), use_direct_io_(options.use_direct_writes), fd_(fd), filesize_(0), logical_sector_size_(logical_block_size) { #ifdef ROCKSDB_FALLOCATE_PRESENT allow_fallocate_ = options.allow_fallocate; fallocate_with_keep_size_ = options.fallocate_with_keep_size; #endif #ifdef ROCKSDB_RANGESYNC_PRESENT sync_file_range_supported_ = IsSyncFileRangeSupported(fd_); #endif // ROCKSDB_RANGESYNC_PRESENT assert(!options.use_mmap_writes); } PosixWritableFile::~PosixWritableFile() { if (fd_ >= 0) { PosixWritableFile::Close(IOOptions(), nullptr); } } IOStatus PosixWritableFile::Append(const Slice& data, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (use_direct_io()) { assert(IsSectorAligned(data.size(), GetRequiredBufferAlignment())); assert(IsSectorAligned(data.data(), GetRequiredBufferAlignment())); } const char* src = data.data(); size_t nbytes = data.size(); if (!PosixWrite(fd_, src, nbytes)) { return IOError("While appending to file", filename_, errno); } filesize_ += nbytes; return IOStatus::OK(); } IOStatus PosixWritableFile::PositionedAppend(const Slice& data, uint64_t offset, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (use_direct_io()) { assert(IsSectorAligned(offset, GetRequiredBufferAlignment())); assert(IsSectorAligned(data.size(), GetRequiredBufferAlignment())); assert(IsSectorAligned(data.data(), GetRequiredBufferAlignment())); } assert(offset <= static_cast(std::numeric_limits::max())); const char* src = data.data(); size_t nbytes = data.size(); if (!PosixPositionedWrite(fd_, src, nbytes, static_cast(offset))) { return IOError("While pwrite to file at offset " + ToString(offset), filename_, errno); } filesize_ = offset + nbytes; return IOStatus::OK(); } IOStatus PosixWritableFile::Truncate(uint64_t size, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus s; int r = ftruncate(fd_, size); if (r < 0) { s = IOError("While ftruncate file to size " + ToString(size), filename_, errno); } else { filesize_ = size; } return s; } IOStatus PosixWritableFile::Close(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { IOStatus s; size_t block_size; size_t last_allocated_block; GetPreallocationStatus(&block_size, &last_allocated_block); if (last_allocated_block > 0) { // trim the extra space preallocated at the end of the file // NOTE(ljin): we probably don't want to surface failure as an IOError, // but it will be nice to log these errors. int dummy __attribute__((__unused__)); dummy = ftruncate(fd_, filesize_); #if defined(ROCKSDB_FALLOCATE_PRESENT) && defined(FALLOC_FL_PUNCH_HOLE) && \ !defined(TRAVIS) // in some file systems, ftruncate only trims trailing space if the // new file size is smaller than the current size. Calling fallocate // with FALLOC_FL_PUNCH_HOLE flag to explicitly release these unused // blocks. FALLOC_FL_PUNCH_HOLE is supported on at least the following // filesystems: // XFS (since Linux 2.6.38) // ext4 (since Linux 3.0) // Btrfs (since Linux 3.7) // tmpfs (since Linux 3.5) // We ignore error since failure of this operation does not affect // correctness. // TRAVIS - this code does not work on TRAVIS filesystems. // the FALLOC_FL_KEEP_SIZE option is expected to not change the size // of the file, but it does. Simple strace report will show that. // While we work with Travis-CI team to figure out if this is a // quirk of Docker/AUFS, we will comment this out. struct stat file_stats; int result = fstat(fd_, &file_stats); // After ftruncate, we check whether ftruncate has the correct behavior. // If not, we should hack it with FALLOC_FL_PUNCH_HOLE if (result == 0 && (file_stats.st_size + file_stats.st_blksize - 1) / file_stats.st_blksize != file_stats.st_blocks / (file_stats.st_blksize / 512)) { IOSTATS_TIMER_GUARD(allocate_nanos); if (allow_fallocate_) { fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, filesize_, block_size * last_allocated_block - filesize_); } } #endif } if (close(fd_) < 0) { s = IOError("While closing file after writing", filename_, errno); } fd_ = -1; return s; } // write out the cached data to the OS cache IOStatus PosixWritableFile::Flush(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { return IOStatus::OK(); } IOStatus PosixWritableFile::Sync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (fdatasync(fd_) < 0) { return IOError("While fdatasync", filename_, errno); } return IOStatus::OK(); } IOStatus PosixWritableFile::Fsync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (fsync(fd_) < 0) { return IOError("While fsync", filename_, errno); } return IOStatus::OK(); } bool PosixWritableFile::IsSyncThreadSafe() const { return true; } uint64_t PosixWritableFile::GetFileSize(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { return filesize_; } void PosixWritableFile::SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) { #ifdef OS_LINUX // Suppress Valgrind "Unimplemented functionality" error. #ifndef ROCKSDB_VALGRIND_RUN if (hint == write_hint_) { return; } if (fcntl(fd_, F_SET_RW_HINT, &hint) == 0) { write_hint_ = hint; } #else (void)hint; #endif // ROCKSDB_VALGRIND_RUN #else (void)hint; #endif // OS_LINUX } IOStatus PosixWritableFile::InvalidateCache(size_t offset, size_t length) { if (use_direct_io()) { return IOStatus::OK(); } #ifndef OS_LINUX (void)offset; (void)length; return IOStatus::OK(); #else // free OS pages int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); if (ret == 0) { return IOStatus::OK(); } return IOError("While fadvise NotNeeded", filename_, errno); #endif } #ifdef ROCKSDB_FALLOCATE_PRESENT IOStatus PosixWritableFile::Allocate(uint64_t offset, uint64_t len, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { assert(offset <= static_cast(std::numeric_limits::max())); assert(len <= static_cast(std::numeric_limits::max())); TEST_KILL_RANDOM("PosixWritableFile::Allocate:0", rocksdb_kill_odds); IOSTATS_TIMER_GUARD(allocate_nanos); int alloc_status = 0; if (allow_fallocate_) { alloc_status = fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, static_cast(offset), static_cast(len)); } if (alloc_status == 0) { return IOStatus::OK(); } else { return IOError( "While fallocate offset " + ToString(offset) + " len " + ToString(len), filename_, errno); } } #endif IOStatus PosixWritableFile::RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& opts, IODebugContext* dbg) { #ifdef ROCKSDB_RANGESYNC_PRESENT assert(offset <= static_cast(std::numeric_limits::max())); assert(nbytes <= static_cast(std::numeric_limits::max())); if (sync_file_range_supported_) { int ret; if (strict_bytes_per_sync_) { // Specifying `SYNC_FILE_RANGE_WAIT_BEFORE` together with an offset/length // that spans all bytes written so far tells `sync_file_range` to wait for // any outstanding writeback requests to finish before issuing a new one. ret = sync_file_range(fd_, 0, static_cast(offset + nbytes), SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE); } else { ret = sync_file_range(fd_, static_cast(offset), static_cast(nbytes), SYNC_FILE_RANGE_WRITE); } if (ret != 0) { return IOError("While sync_file_range returned " + ToString(ret), filename_, errno); } return IOStatus::OK(); } #endif // ROCKSDB_RANGESYNC_PRESENT return FSWritableFile::RangeSync(offset, nbytes, opts, dbg); } #ifdef OS_LINUX size_t PosixWritableFile::GetUniqueId(char* id, size_t max_size) const { return PosixHelper::GetUniqueIdFromFile(fd_, id, max_size); } #endif /* * PosixRandomRWFile */ PosixRandomRWFile::PosixRandomRWFile(const std::string& fname, int fd, const EnvOptions& /*options*/) : filename_(fname), fd_(fd) {} PosixRandomRWFile::~PosixRandomRWFile() { if (fd_ >= 0) { Close(IOOptions(), nullptr); } } IOStatus PosixRandomRWFile::Write(uint64_t offset, const Slice& data, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { const char* src = data.data(); size_t nbytes = data.size(); if (!PosixPositionedWrite(fd_, src, nbytes, static_cast(offset))) { return IOError( "While write random read/write file at offset " + ToString(offset), filename_, errno); } return IOStatus::OK(); } IOStatus PosixRandomRWFile::Read(uint64_t offset, size_t n, const IOOptions& /*opts*/, Slice* result, char* scratch, IODebugContext* /*dbg*/) const { size_t left = n; char* ptr = scratch; while (left > 0) { ssize_t done = pread(fd_, ptr, left, offset); if (done < 0) { // error while reading from file if (errno == EINTR) { // read was interrupted, try again. continue; } return IOError("While reading random read/write file offset " + ToString(offset) + " len " + ToString(n), filename_, errno); } else if (done == 0) { // Nothing more to read break; } // Read `done` bytes ptr += done; offset += done; left -= done; } *result = Slice(scratch, n - left); return IOStatus::OK(); } IOStatus PosixRandomRWFile::Flush(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { return IOStatus::OK(); } IOStatus PosixRandomRWFile::Sync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (fdatasync(fd_) < 0) { return IOError("While fdatasync random read/write file", filename_, errno); } return IOStatus::OK(); } IOStatus PosixRandomRWFile::Fsync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (fsync(fd_) < 0) { return IOError("While fsync random read/write file", filename_, errno); } return IOStatus::OK(); } IOStatus PosixRandomRWFile::Close(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { if (close(fd_) < 0) { return IOError("While close random read/write file", filename_, errno); } fd_ = -1; return IOStatus::OK(); } PosixMemoryMappedFileBuffer::~PosixMemoryMappedFileBuffer() { // TODO should have error handling though not much we can do... munmap(this->base_, length_); } /* * PosixDirectory */ PosixDirectory::~PosixDirectory() { close(fd_); } IOStatus PosixDirectory::Fsync(const IOOptions& /*opts*/, IODebugContext* /*dbg*/) { #ifndef OS_AIX if (fsync(fd_) == -1) { return IOError("While fsync", "a directory", errno); } #endif return IOStatus::OK(); } } // namespace ROCKSDB_NAMESPACE #endif rocksdb-6.11.4/env/io_posix.h000066400000000000000000000337101370372246700160430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #if defined(ROCKSDB_IOURING_PRESENT) #include #include #endif #include #include #include #include #include #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/io_status.h" #include "util/mutexlock.h" #include "util/thread_local.h" // For non linux platform, the following macros are used only as place // holder. #if !(defined OS_LINUX) && !(defined CYGWIN) && !(defined OS_AIX) #define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */ #define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */ #define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */ #define POSIX_FADV_WILLNEED 3 /* [MC1] will need these pages */ #define POSIX_FADV_DONTNEED 4 /* [MC1] don't need these pages */ #endif namespace ROCKSDB_NAMESPACE { std::string IOErrorMsg(const std::string& context, const std::string& file_name); // file_name can be left empty if it is not unkown. IOStatus IOError(const std::string& context, const std::string& file_name, int err_number); class PosixHelper { public: static size_t GetUniqueIdFromFile(int fd, char* id, size_t max_size); static size_t GetLogicalBlockSizeOfFd(int fd); static Status GetLogicalBlockSizeOfDirectory(const std::string& directory, size_t* size); }; #ifdef OS_LINUX // Files under a specific directory have the same logical block size. // This class caches the logical block size for the specified directories to // save the CPU cost of computing the size. // Safe for concurrent access from multiple threads without any external // synchronization. class LogicalBlockSizeCache { public: LogicalBlockSizeCache( std::function get_logical_block_size_of_fd = PosixHelper::GetLogicalBlockSizeOfFd, std::function get_logical_block_size_of_directory = PosixHelper::GetLogicalBlockSizeOfDirectory) : get_logical_block_size_of_fd_(get_logical_block_size_of_fd), get_logical_block_size_of_directory_( get_logical_block_size_of_directory) {} // Takes the following actions: // 1. Increases reference count of the directories; // 2. If the directory's logical block size is not cached, // compute the buffer size and cache the result. Status RefAndCacheLogicalBlockSize( const std::vector& directories); // Takes the following actions: // 1. Decreases reference count of the directories; // 2. If the reference count of a directory reaches 0, remove the directory // from the cache. void UnrefAndTryRemoveCachedLogicalBlockSize( const std::vector& directories); // Returns the logical block size for the file. // // If the file is under a cached directory, return the cached size. // Otherwise, the size is computed. size_t GetLogicalBlockSize(const std::string& fname, int fd); int GetRefCount(const std::string& dir) { ReadLock lock(&cache_mutex_); auto it = cache_.find(dir); if (it == cache_.end()) { return 0; } return it->second.ref; } size_t Size() const { return cache_.size(); } bool Contains(const std::string& dir) { ReadLock lock(&cache_mutex_); return cache_.find(dir) != cache_.end(); } private: struct CacheValue { CacheValue() : size(0), ref(0) {} // Logical block size of the directory. size_t size; // Reference count of the directory. int ref; }; std::function get_logical_block_size_of_fd_; std::function get_logical_block_size_of_directory_; std::map cache_; port::RWMutex cache_mutex_; }; #endif class PosixSequentialFile : public FSSequentialFile { private: std::string filename_; FILE* file_; int fd_; bool use_direct_io_; size_t logical_sector_size_; public: PosixSequentialFile(const std::string& fname, FILE* file, int fd, size_t logical_block_size, const EnvOptions& options); virtual ~PosixSequentialFile(); virtual IOStatus Read(size_t n, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) override; virtual IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) override; virtual IOStatus Skip(uint64_t n) override; virtual IOStatus InvalidateCache(size_t offset, size_t length) override; virtual bool use_direct_io() const override { return use_direct_io_; } virtual size_t GetRequiredBufferAlignment() const override { return logical_sector_size_; } }; #if defined(ROCKSDB_IOURING_PRESENT) // io_uring instance queue depth const unsigned int kIoUringDepth = 256; inline void DeleteIOUring(void* p) { struct io_uring* iu = static_cast(p); delete iu; } inline struct io_uring* CreateIOUring() { struct io_uring* new_io_uring = new struct io_uring; int ret = io_uring_queue_init(kIoUringDepth, new_io_uring, 0); if (ret) { delete new_io_uring; new_io_uring = nullptr; } return new_io_uring; } #endif // defined(ROCKSDB_IOURING_PRESENT) class PosixRandomAccessFile : public FSRandomAccessFile { protected: std::string filename_; int fd_; bool use_direct_io_; size_t logical_sector_size_; #if defined(ROCKSDB_IOURING_PRESENT) ThreadLocalPtr* thread_local_io_urings_; #endif public: PosixRandomAccessFile(const std::string& fname, int fd, size_t logical_block_size, const EnvOptions& options #if defined(ROCKSDB_IOURING_PRESENT) , ThreadLocalPtr* thread_local_io_urings #endif ); virtual ~PosixRandomAccessFile(); virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) const override; virtual IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, const IOOptions& options, IODebugContext* dbg) override; virtual IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& opts, IODebugContext* dbg) override; #if defined(OS_LINUX) || defined(OS_MACOSX) || defined(OS_AIX) virtual size_t GetUniqueId(char* id, size_t max_size) const override; #endif virtual void Hint(AccessPattern pattern) override; virtual IOStatus InvalidateCache(size_t offset, size_t length) override; virtual bool use_direct_io() const override { return use_direct_io_; } virtual size_t GetRequiredBufferAlignment() const override { return logical_sector_size_; } }; class PosixWritableFile : public FSWritableFile { protected: const std::string filename_; const bool use_direct_io_; int fd_; uint64_t filesize_; size_t logical_sector_size_; #ifdef ROCKSDB_FALLOCATE_PRESENT bool allow_fallocate_; bool fallocate_with_keep_size_; #endif #ifdef ROCKSDB_RANGESYNC_PRESENT // Even if the syscall is present, the filesystem may still not properly // support it, so we need to do a dynamic check too. bool sync_file_range_supported_; #endif // ROCKSDB_RANGESYNC_PRESENT public: explicit PosixWritableFile(const std::string& fname, int fd, size_t logical_block_size, const EnvOptions& options); virtual ~PosixWritableFile(); // Need to implement this so the file is truncated correctly // with direct I/O virtual IOStatus Truncate(uint64_t size, const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Append(const Slice& data, const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus PositionedAppend(const Slice& data, uint64_t offset, const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override; virtual bool IsSyncThreadSafe() const override; virtual bool use_direct_io() const override { return use_direct_io_; } virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override; virtual uint64_t GetFileSize(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus InvalidateCache(size_t offset, size_t length) override; virtual size_t GetRequiredBufferAlignment() const override { return logical_sector_size_; } #ifdef ROCKSDB_FALLOCATE_PRESENT virtual IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& opts, IODebugContext* dbg) override; #endif virtual IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& opts, IODebugContext* dbg) override; #ifdef OS_LINUX virtual size_t GetUniqueId(char* id, size_t max_size) const override; #endif }; // mmap() based random-access class PosixMmapReadableFile : public FSRandomAccessFile { private: int fd_; std::string filename_; void* mmapped_region_; size_t length_; public: PosixMmapReadableFile(const int fd, const std::string& fname, void* base, size_t length, const EnvOptions& options); virtual ~PosixMmapReadableFile(); virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) const override; virtual IOStatus InvalidateCache(size_t offset, size_t length) override; }; class PosixMmapFile : public FSWritableFile { private: std::string filename_; int fd_; size_t page_size_; size_t map_size_; // How much extra memory to map at a time char* base_; // The mapped region char* limit_; // Limit of the mapped region char* dst_; // Where to write next (in range [base_,limit_]) char* last_sync_; // Where have we synced up to uint64_t file_offset_; // Offset of base_ in file #ifdef ROCKSDB_FALLOCATE_PRESENT bool allow_fallocate_; // If false, fallocate calls are bypassed bool fallocate_with_keep_size_; #endif // Roundup x to a multiple of y static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; } size_t TruncateToPageBoundary(size_t s) { s -= (s & (page_size_ - 1)); assert((s % page_size_) == 0); return s; } IOStatus MapNewRegion(); IOStatus UnmapCurrentRegion(); IOStatus Msync(); public: PosixMmapFile(const std::string& fname, int fd, size_t page_size, const EnvOptions& options); ~PosixMmapFile(); // Means Close() will properly take care of truncate // and it does not need any additional information virtual IOStatus Truncate(uint64_t /*size*/, const IOOptions& /*opts*/, IODebugContext* /*dbg*/) override { return IOStatus::OK(); } virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Append(const Slice& data, const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override; virtual uint64_t GetFileSize(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus InvalidateCache(size_t offset, size_t length) override; #ifdef ROCKSDB_FALLOCATE_PRESENT virtual IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& opts, IODebugContext* dbg) override; #endif }; class PosixRandomRWFile : public FSRandomRWFile { public: explicit PosixRandomRWFile(const std::string& fname, int fd, const EnvOptions& options); virtual ~PosixRandomRWFile(); virtual IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) const override; virtual IOStatus Flush(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Sync(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override; virtual IOStatus Close(const IOOptions& opts, IODebugContext* dbg) override; private: const std::string filename_; int fd_; }; struct PosixMemoryMappedFileBuffer : public MemoryMappedFileBuffer { PosixMemoryMappedFileBuffer(void* _base, size_t _length) : MemoryMappedFileBuffer(_base, _length) {} virtual ~PosixMemoryMappedFileBuffer(); }; class PosixDirectory : public FSDirectory { public: explicit PosixDirectory(int fd) : fd_(fd) {} ~PosixDirectory(); virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override; private: int fd_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/io_posix_test.cc000066400000000000000000000102561370372246700172400ustar00rootroot00000000000000// Copyright (c) 2020-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "test_util/testharness.h" #ifdef ROCKSDB_LIB_IO_POSIX #include "env/io_posix.h" namespace ROCKSDB_NAMESPACE { #ifdef OS_LINUX class LogicalBlockSizeCacheTest : public testing::Test {}; // Tests the caching behavior. TEST_F(LogicalBlockSizeCacheTest, Cache) { int ncall = 0; auto get_fd_block_size = [&](int fd) { ncall++; return fd; }; std::map dir_fds{ {"/", 0}, {"/db", 1}, {"/db1", 2}, {"/db2", 3}, }; auto get_dir_block_size = [&](const std::string& dir, size_t* size) { ncall++; *size = dir_fds[dir]; return Status::OK(); }; LogicalBlockSizeCache cache(get_fd_block_size, get_dir_block_size); ASSERT_EQ(0, ncall); ASSERT_EQ(0, cache.Size()); ASSERT_EQ(6, cache.GetLogicalBlockSize("/sst", 6)); ASSERT_EQ(1, ncall); ASSERT_EQ(7, cache.GetLogicalBlockSize("/db/sst1", 7)); ASSERT_EQ(2, ncall); ASSERT_EQ(8, cache.GetLogicalBlockSize("/db/sst2", 8)); ASSERT_EQ(3, ncall); ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/", "/db1/", "/db2"})); ASSERT_EQ(3, cache.Size()); ASSERT_TRUE(cache.Contains("/")); ASSERT_TRUE(cache.Contains("/db1")); ASSERT_TRUE(cache.Contains("/db2")); ASSERT_EQ(6, ncall); // Block size for / is cached. ASSERT_EQ(0, cache.GetLogicalBlockSize("/sst", 6)); ASSERT_EQ(6, ncall); // No cached size for /db. ASSERT_EQ(7, cache.GetLogicalBlockSize("/db/sst1", 7)); ASSERT_EQ(7, ncall); ASSERT_EQ(8, cache.GetLogicalBlockSize("/db/sst2", 8)); ASSERT_EQ(8, ncall); // Block size for /db1 is cached. ASSERT_EQ(2, cache.GetLogicalBlockSize("/db1/sst1", 4)); ASSERT_EQ(8, ncall); ASSERT_EQ(2, cache.GetLogicalBlockSize("/db1/sst2", 5)); ASSERT_EQ(8, ncall); // Block size for /db2 is cached. ASSERT_EQ(3, cache.GetLogicalBlockSize("/db2/sst1", 6)); ASSERT_EQ(8, ncall); ASSERT_EQ(3, cache.GetLogicalBlockSize("/db2/sst2", 7)); ASSERT_EQ(8, ncall); ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/db"})); ASSERT_EQ(4, cache.Size()); ASSERT_TRUE(cache.Contains("/")); ASSERT_TRUE(cache.Contains("/db1")); ASSERT_TRUE(cache.Contains("/db2")); ASSERT_TRUE(cache.Contains("/db")); ASSERT_EQ(9, ncall); // Block size for /db is cached. ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst1", 7)); ASSERT_EQ(9, ncall); ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst2", 8)); ASSERT_EQ(9, ncall); } // Tests the reference counting behavior. TEST_F(LogicalBlockSizeCacheTest, Ref) { int ncall = 0; auto get_fd_block_size = [&](int fd) { ncall++; return fd; }; std::map dir_fds{ {"/db", 0}, }; auto get_dir_block_size = [&](const std::string& dir, size_t* size) { ncall++; *size = dir_fds[dir]; return Status::OK(); }; LogicalBlockSizeCache cache(get_fd_block_size, get_dir_block_size); ASSERT_EQ(0, ncall); ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst0", 1)); ASSERT_EQ(1, ncall); ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/db"})); ASSERT_EQ(2, ncall); ASSERT_EQ(1, cache.GetRefCount("/db")); // Block size for /db is cached. Ref count = 1. ASSERT_EQ(0, cache.GetLogicalBlockSize("/db/sst1", 1)); ASSERT_EQ(2, ncall); // Ref count = 2, but won't recompute the cached buffer size. ASSERT_OK(cache.RefAndCacheLogicalBlockSize({"/db"})); ASSERT_EQ(2, cache.GetRefCount("/db")); ASSERT_EQ(2, ncall); // Ref count = 1. cache.UnrefAndTryRemoveCachedLogicalBlockSize({"/db"}); ASSERT_EQ(1, cache.GetRefCount("/db")); // Block size for /db is still cached. ASSERT_EQ(0, cache.GetLogicalBlockSize("/db/sst2", 1)); ASSERT_EQ(2, ncall); // Ref count = 0 and cached buffer size for /db is removed. cache.UnrefAndTryRemoveCachedLogicalBlockSize({"/db"}); ASSERT_EQ(0, cache.Size()); ASSERT_EQ(1, cache.GetLogicalBlockSize("/db/sst0", 1)); ASSERT_EQ(3, ncall); } #endif } // namespace ROCKSDB_NAMESPACE #endif int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/env/mock_env.cc000066400000000000000000000510401370372246700161450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "env/mock_env.h" #include #include #include "file/filename.h" #include "port/sys_time.h" #include "util/cast_util.h" #include "util/murmurhash.h" #include "util/random.h" #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { class MemFile { public: explicit MemFile(Env* env, const std::string& fn, bool _is_lock_file = false) : env_(env), fn_(fn), refs_(0), is_lock_file_(_is_lock_file), locked_(false), size_(0), modified_time_(Now()), rnd_(static_cast( MurmurHash(fn.data(), static_cast(fn.size()), 0))), fsynced_bytes_(0) {} // No copying allowed. MemFile(const MemFile&) = delete; void operator=(const MemFile&) = delete; void Ref() { MutexLock lock(&mutex_); ++refs_; } bool is_lock_file() const { return is_lock_file_; } bool Lock() { assert(is_lock_file_); MutexLock lock(&mutex_); if (locked_) { return false; } else { locked_ = true; return true; } } void Unlock() { assert(is_lock_file_); MutexLock lock(&mutex_); locked_ = false; } void Unref() { bool do_delete = false; { MutexLock lock(&mutex_); --refs_; assert(refs_ >= 0); if (refs_ <= 0) { do_delete = true; } } if (do_delete) { delete this; } } uint64_t Size() const { return size_; } void Truncate(size_t size) { MutexLock lock(&mutex_); if (size < size_) { data_.resize(size); size_ = size; } } void CorruptBuffer() { if (fsynced_bytes_ >= size_) { return; } uint64_t buffered_bytes = size_ - fsynced_bytes_; uint64_t start = fsynced_bytes_ + rnd_.Uniform(static_cast(buffered_bytes)); uint64_t end = std::min(start + 512, size_.load()); MutexLock lock(&mutex_); for (uint64_t pos = start; pos < end; ++pos) { data_[static_cast(pos)] = static_cast(rnd_.Uniform(256)); } } Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { MutexLock lock(&mutex_); const uint64_t available = Size() - std::min(Size(), offset); size_t offset_ = static_cast(offset); if (n > available) { n = static_cast(available); } if (n == 0) { *result = Slice(); return Status::OK(); } if (scratch) { memcpy(scratch, &(data_[offset_]), n); *result = Slice(scratch, n); } else { *result = Slice(&(data_[offset_]), n); } return Status::OK(); } Status Write(uint64_t offset, const Slice& data) { MutexLock lock(&mutex_); size_t offset_ = static_cast(offset); if (offset + data.size() > data_.size()) { data_.resize(offset_ + data.size()); } data_.replace(offset_, data.size(), data.data(), data.size()); size_ = data_.size(); modified_time_ = Now(); return Status::OK(); } Status Append(const Slice& data) { MutexLock lock(&mutex_); data_.append(data.data(), data.size()); size_ = data_.size(); modified_time_ = Now(); return Status::OK(); } Status Fsync() { fsynced_bytes_ = size_.load(); return Status::OK(); } uint64_t ModifiedTime() const { return modified_time_; } private: uint64_t Now() { int64_t unix_time = 0; auto s = env_->GetCurrentTime(&unix_time); assert(s.ok()); return static_cast(unix_time); } // Private since only Unref() should be used to delete it. ~MemFile() { assert(refs_ == 0); } Env* env_; const std::string fn_; mutable port::Mutex mutex_; int refs_; bool is_lock_file_; bool locked_; // Data written into this file, all bytes before fsynced_bytes are // persistent. std::string data_; std::atomic size_; std::atomic modified_time_; Random rnd_; std::atomic fsynced_bytes_; }; namespace { class MockSequentialFile : public SequentialFile { public: explicit MockSequentialFile(MemFile* file) : file_(file), pos_(0) { file_->Ref(); } ~MockSequentialFile() override { file_->Unref(); } Status Read(size_t n, Slice* result, char* scratch) override { Status s = file_->Read(pos_, n, result, scratch); if (s.ok()) { pos_ += result->size(); } return s; } Status Skip(uint64_t n) override { if (pos_ > file_->Size()) { return Status::IOError("pos_ > file_->Size()"); } const uint64_t available = file_->Size() - pos_; if (n > available) { n = available; } pos_ += static_cast(n); return Status::OK(); } private: MemFile* file_; size_t pos_; }; class MockRandomAccessFile : public RandomAccessFile { public: explicit MockRandomAccessFile(MemFile* file) : file_(file) { file_->Ref(); } ~MockRandomAccessFile() override { file_->Unref(); } Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { return file_->Read(offset, n, result, scratch); } private: MemFile* file_; }; class MockRandomRWFile : public RandomRWFile { public: explicit MockRandomRWFile(MemFile* file) : file_(file) { file_->Ref(); } ~MockRandomRWFile() override { file_->Unref(); } Status Write(uint64_t offset, const Slice& data) override { return file_->Write(offset, data); } Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { return file_->Read(offset, n, result, scratch); } Status Close() override { return file_->Fsync(); } Status Flush() override { return Status::OK(); } Status Sync() override { return file_->Fsync(); } private: MemFile* file_; }; class MockWritableFile : public WritableFile { public: MockWritableFile(MemFile* file, RateLimiter* rate_limiter) : file_(file), rate_limiter_(rate_limiter) { file_->Ref(); } ~MockWritableFile() override { file_->Unref(); } Status Append(const Slice& data) override { size_t bytes_written = 0; while (bytes_written < data.size()) { auto bytes = RequestToken(data.size() - bytes_written); Status s = file_->Append(Slice(data.data() + bytes_written, bytes)); if (!s.ok()) { return s; } bytes_written += bytes; } return Status::OK(); } Status Truncate(uint64_t size) override { file_->Truncate(static_cast(size)); return Status::OK(); } Status Close() override { return file_->Fsync(); } Status Flush() override { return Status::OK(); } Status Sync() override { return file_->Fsync(); } uint64_t GetFileSize() override { return file_->Size(); } private: inline size_t RequestToken(size_t bytes) { if (rate_limiter_ && io_priority_ < Env::IO_TOTAL) { bytes = std::min( bytes, static_cast(rate_limiter_->GetSingleBurstBytes())); rate_limiter_->Request(bytes, io_priority_); } return bytes; } MemFile* file_; RateLimiter* rate_limiter_; }; class MockEnvDirectory : public Directory { public: Status Fsync() override { return Status::OK(); } }; class MockEnvFileLock : public FileLock { public: explicit MockEnvFileLock(const std::string& fname) : fname_(fname) {} std::string FileName() const { return fname_; } private: const std::string fname_; }; class TestMemLogger : public Logger { private: std::unique_ptr file_; std::atomic_size_t log_size_; static const uint64_t flush_every_seconds_ = 5; std::atomic_uint_fast64_t last_flush_micros_; Env* env_; std::atomic flush_pending_; public: TestMemLogger(std::unique_ptr f, Env* env, const InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL) : Logger(log_level), file_(std::move(f)), log_size_(0), last_flush_micros_(0), env_(env), flush_pending_(false) {} ~TestMemLogger() override {} void Flush() override { if (flush_pending_) { flush_pending_ = false; } last_flush_micros_ = env_->NowMicros(); } using Logger::Logv; void Logv(const char* format, va_list ap) override { // We try twice: the first time with a fixed-size stack allocated buffer, // and the second time with a much larger dynamically allocated buffer. char buffer[500]; for (int iter = 0; iter < 2; iter++) { char* base; int bufsize; if (iter == 0) { bufsize = sizeof(buffer); base = buffer; } else { bufsize = 30000; base = new char[bufsize]; } char* p = base; char* limit = base + bufsize; struct timeval now_tv; gettimeofday(&now_tv, nullptr); const time_t seconds = now_tv.tv_sec; struct tm t; memset(&t, 0, sizeof(t)); struct tm* ret __attribute__((__unused__)); ret = localtime_r(&seconds, &t); assert(ret); p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(now_tv.tv_usec)); // Print the message if (p < limit) { va_list backup_ap; va_copy(backup_ap, ap); p += vsnprintf(p, limit - p, format, backup_ap); va_end(backup_ap); } // Truncate to available space if necessary if (p >= limit) { if (iter == 0) { continue; // Try again with larger buffer } else { p = limit - 1; } } // Add newline if necessary if (p == base || p[-1] != '\n') { *p++ = '\n'; } assert(p <= limit); const size_t write_size = p - base; file_->Append(Slice(base, write_size)); flush_pending_ = true; log_size_ += write_size; uint64_t now_micros = static_cast(now_tv.tv_sec) * 1000000 + now_tv.tv_usec; if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) { flush_pending_ = false; last_flush_micros_ = now_micros; } if (base != buffer) { delete[] base; } break; } } size_t GetLogFileSize() const override { return log_size_; } }; } // Anonymous namespace MockEnv::MockEnv(Env* base_env) : EnvWrapper(base_env), fake_sleep_micros_(0) {} MockEnv::~MockEnv() { for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i) { i->second->Unref(); } } // Partial implementation of the Env interface. Status MockEnv::NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& /*soptions*/) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); if (file_map_.find(fn) == file_map_.end()) { *result = nullptr; return Status::IOError(fn, "File not found"); } auto* f = file_map_[fn]; if (f->is_lock_file()) { return Status::InvalidArgument(fn, "Cannot open a lock file."); } result->reset(new MockSequentialFile(f)); return Status::OK(); } Status MockEnv::NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& /*soptions*/) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); if (file_map_.find(fn) == file_map_.end()) { *result = nullptr; return Status::IOError(fn, "File not found"); } auto* f = file_map_[fn]; if (f->is_lock_file()) { return Status::InvalidArgument(fn, "Cannot open a lock file."); } result->reset(new MockRandomAccessFile(f)); return Status::OK(); } Status MockEnv::NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& /*soptions*/) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); if (file_map_.find(fn) == file_map_.end()) { *result = nullptr; return Status::IOError(fn, "File not found"); } auto* f = file_map_[fn]; if (f->is_lock_file()) { return Status::InvalidArgument(fn, "Cannot open a lock file."); } result->reset(new MockRandomRWFile(f)); return Status::OK(); } Status MockEnv::ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* result, const EnvOptions& options) { auto s = RenameFile(old_fname, fname); if (!s.ok()) { return s; } result->reset(); return NewWritableFile(fname, result, options); } Status MockEnv::NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& env_options) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); if (file_map_.find(fn) != file_map_.end()) { DeleteFileInternal(fn); } MemFile* file = new MemFile(this, fn, false); file->Ref(); file_map_[fn] = file; result->reset(new MockWritableFile(file, env_options.rate_limiter)); return Status::OK(); } Status MockEnv::NewDirectory(const std::string& /*name*/, std::unique_ptr* result) { result->reset(new MockEnvDirectory()); return Status::OK(); } Status MockEnv::FileExists(const std::string& fname) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); if (file_map_.find(fn) != file_map_.end()) { // File exists return Status::OK(); } // Now also check if fn exists as a dir for (const auto& iter : file_map_) { const std::string& filename = iter.first; if (filename.size() >= fn.size() + 1 && filename[fn.size()] == '/' && Slice(filename).starts_with(Slice(fn))) { return Status::OK(); } } return Status::NotFound(); } Status MockEnv::GetChildren(const std::string& dir, std::vector* result) { auto d = NormalizePath(dir); bool found_dir = false; { MutexLock lock(&mutex_); result->clear(); for (const auto& iter : file_map_) { const std::string& filename = iter.first; if (filename == d) { found_dir = true; } else if (filename.size() >= d.size() + 1 && filename[d.size()] == '/' && Slice(filename).starts_with(Slice(d))) { found_dir = true; size_t next_slash = filename.find('/', d.size() + 1); if (next_slash != std::string::npos) { result->push_back( filename.substr(d.size() + 1, next_slash - d.size() - 1)); } else { result->push_back(filename.substr(d.size() + 1)); } } } } result->erase(std::unique(result->begin(), result->end()), result->end()); return found_dir ? Status::OK() : Status::NotFound(); } void MockEnv::DeleteFileInternal(const std::string& fname) { assert(fname == NormalizePath(fname)); const auto& pair = file_map_.find(fname); if (pair != file_map_.end()) { pair->second->Unref(); file_map_.erase(fname); } } Status MockEnv::DeleteFile(const std::string& fname) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); if (file_map_.find(fn) == file_map_.end()) { return Status::IOError(fn, "File not found"); } DeleteFileInternal(fn); return Status::OK(); } Status MockEnv::Truncate(const std::string& fname, size_t size) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); auto iter = file_map_.find(fn); if (iter == file_map_.end()) { return Status::IOError(fn, "File not found"); } iter->second->Truncate(size); return Status::OK(); } Status MockEnv::CreateDir(const std::string& dirname) { auto dn = NormalizePath(dirname); MutexLock lock(&mutex_); if (file_map_.find(dn) == file_map_.end()) { MemFile* file = new MemFile(this, dn, false); file->Ref(); file_map_[dn] = file; } else { return Status::IOError(); } return Status::OK(); } Status MockEnv::CreateDirIfMissing(const std::string& dirname) { CreateDir(dirname); return Status::OK(); } Status MockEnv::DeleteDir(const std::string& dirname) { return DeleteFile(dirname); } Status MockEnv::GetFileSize(const std::string& fname, uint64_t* file_size) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); auto iter = file_map_.find(fn); if (iter == file_map_.end()) { return Status::IOError(fn, "File not found"); } *file_size = iter->second->Size(); return Status::OK(); } Status MockEnv::GetFileModificationTime(const std::string& fname, uint64_t* time) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); auto iter = file_map_.find(fn); if (iter == file_map_.end()) { return Status::IOError(fn, "File not found"); } *time = iter->second->ModifiedTime(); return Status::OK(); } Status MockEnv::RenameFile(const std::string& src, const std::string& dest) { auto s = NormalizePath(src); auto t = NormalizePath(dest); MutexLock lock(&mutex_); if (file_map_.find(s) == file_map_.end()) { return Status::IOError(s, "File not found"); } DeleteFileInternal(t); file_map_[t] = file_map_[s]; file_map_.erase(s); return Status::OK(); } Status MockEnv::LinkFile(const std::string& src, const std::string& dest) { auto s = NormalizePath(src); auto t = NormalizePath(dest); MutexLock lock(&mutex_); if (file_map_.find(s) == file_map_.end()) { return Status::IOError(s, "File not found"); } DeleteFileInternal(t); file_map_[t] = file_map_[s]; file_map_[t]->Ref(); // Otherwise it might get deleted when noone uses s return Status::OK(); } Status MockEnv::NewLogger(const std::string& fname, std::shared_ptr* result) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); auto iter = file_map_.find(fn); MemFile* file = nullptr; if (iter == file_map_.end()) { file = new MemFile(this, fn, false); file->Ref(); file_map_[fn] = file; } else { file = iter->second; } std::unique_ptr f(new MockWritableFile(file, nullptr)); result->reset(new TestMemLogger(std::move(f), this)); return Status::OK(); } Status MockEnv::LockFile(const std::string& fname, FileLock** flock) { auto fn = NormalizePath(fname); { MutexLock lock(&mutex_); if (file_map_.find(fn) != file_map_.end()) { if (!file_map_[fn]->is_lock_file()) { return Status::InvalidArgument(fname, "Not a lock file."); } if (!file_map_[fn]->Lock()) { return Status::IOError(fn, "Lock is already held."); } } else { auto* file = new MemFile(this, fn, true); file->Ref(); file->Lock(); file_map_[fn] = file; } } *flock = new MockEnvFileLock(fn); return Status::OK(); } Status MockEnv::UnlockFile(FileLock* flock) { std::string fn = static_cast_with_check(flock)->FileName(); { MutexLock lock(&mutex_); if (file_map_.find(fn) != file_map_.end()) { if (!file_map_[fn]->is_lock_file()) { return Status::InvalidArgument(fn, "Not a lock file."); } file_map_[fn]->Unlock(); } } delete flock; return Status::OK(); } Status MockEnv::GetTestDirectory(std::string* path) { *path = "/test"; return Status::OK(); } Status MockEnv::GetCurrentTime(int64_t* unix_time) { auto s = EnvWrapper::GetCurrentTime(unix_time); if (s.ok()) { *unix_time += fake_sleep_micros_.load() / (1000 * 1000); } return s; } uint64_t MockEnv::NowMicros() { return EnvWrapper::NowMicros() + fake_sleep_micros_.load(); } uint64_t MockEnv::NowNanos() { return EnvWrapper::NowNanos() + fake_sleep_micros_.load() * 1000; } Status MockEnv::CorruptBuffer(const std::string& fname) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); auto iter = file_map_.find(fn); if (iter == file_map_.end()) { return Status::IOError(fn, "File not found"); } iter->second->CorruptBuffer(); return Status::OK(); } void MockEnv::FakeSleepForMicroseconds(int64_t micros) { fake_sleep_micros_.fetch_add(micros); } #ifndef ROCKSDB_LITE // This is to maintain the behavior before swithcing from InMemoryEnv to MockEnv Env* NewMemEnv(Env* base_env) { return new MockEnv(base_env); } #else // ROCKSDB_LITE Env* NewMemEnv(Env* /*base_env*/) { return nullptr; } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/mock_env.h000066400000000000000000000077211370372246700160160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "rocksdb/env.h" #include "rocksdb/status.h" #include "port/port.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { class MemFile; class MockEnv : public EnvWrapper { public: explicit MockEnv(Env* base_env); ~MockEnv() override; // Partial implementation of the Env interface. Status RegisterDbPaths(const std::vector& /*paths*/) override { return Status::OK(); } Status UnregisterDbPaths(const std::vector& /*paths*/) override { return Status::OK(); } Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) override; Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) override; Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* result, const EnvOptions& options) override; Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& env_options) override; Status NewDirectory(const std::string& name, std::unique_ptr* result) override; Status FileExists(const std::string& fname) override; Status GetChildren(const std::string& dir, std::vector* result) override; void DeleteFileInternal(const std::string& fname); Status DeleteFile(const std::string& fname) override; Status Truncate(const std::string& fname, size_t size) override; Status CreateDir(const std::string& dirname) override; Status CreateDirIfMissing(const std::string& dirname) override; Status DeleteDir(const std::string& dirname) override; Status GetFileSize(const std::string& fname, uint64_t* file_size) override; Status GetFileModificationTime(const std::string& fname, uint64_t* time) override; Status RenameFile(const std::string& src, const std::string& target) override; Status LinkFile(const std::string& src, const std::string& target) override; Status NewLogger(const std::string& fname, std::shared_ptr* result) override; Status LockFile(const std::string& fname, FileLock** flock) override; Status UnlockFile(FileLock* flock) override; Status GetTestDirectory(std::string* path) override; // Results of these can be affected by FakeSleepForMicroseconds() Status GetCurrentTime(int64_t* unix_time) override; uint64_t NowMicros() override; uint64_t NowNanos() override; Status CorruptBuffer(const std::string& fname); // Doesn't really sleep, just affects output of GetCurrentTime(), NowMicros() // and NowNanos() void FakeSleepForMicroseconds(int64_t micros); private: // Map from filenames to MemFile objects, representing a simple file system. typedef std::map FileSystem; port::Mutex mutex_; FileSystem file_map_; // Protected by mutex_. std::atomic fake_sleep_micros_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/env/mock_env_test.cc000066400000000000000000000053271370372246700172130ustar00rootroot00000000000000// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include "env/mock_env.h" #include #include #include "rocksdb/env.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class MockEnvTest : public testing::Test { public: MockEnv* env_; const EnvOptions soptions_; MockEnvTest() : env_(new MockEnv(Env::Default())) { } ~MockEnvTest() override { delete env_; } }; TEST_F(MockEnvTest, Corrupt) { const std::string kGood = "this is a good string, synced to disk"; const std::string kCorrupted = "this part may be corrupted"; const std::string kFileName = "/dir/f"; std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile(kFileName, &writable_file, soptions_)); ASSERT_OK(writable_file->Append(kGood)); ASSERT_TRUE(writable_file->GetFileSize() == kGood.size()); std::string scratch; scratch.resize(kGood.size() + kCorrupted.size() + 16); Slice result; std::unique_ptr rand_file; ASSERT_OK(env_->NewRandomAccessFile(kFileName, &rand_file, soptions_)); ASSERT_OK(rand_file->Read(0, kGood.size(), &result, &(scratch[0]))); ASSERT_EQ(result.compare(kGood), 0); // Sync + corrupt => no change ASSERT_OK(writable_file->Fsync()); ASSERT_OK(dynamic_cast(env_)->CorruptBuffer(kFileName)); result.clear(); ASSERT_OK(rand_file->Read(0, kGood.size(), &result, &(scratch[0]))); ASSERT_EQ(result.compare(kGood), 0); // Add new data and corrupt it ASSERT_OK(writable_file->Append(kCorrupted)); ASSERT_TRUE(writable_file->GetFileSize() == kGood.size() + kCorrupted.size()); result.clear(); ASSERT_OK(rand_file->Read(kGood.size(), kCorrupted.size(), &result, &(scratch[0]))); ASSERT_EQ(result.compare(kCorrupted), 0); // Corrupted ASSERT_OK(dynamic_cast(env_)->CorruptBuffer(kFileName)); result.clear(); ASSERT_OK(rand_file->Read(kGood.size(), kCorrupted.size(), &result, &(scratch[0]))); ASSERT_NE(result.compare(kCorrupted), 0); } TEST_F(MockEnvTest, FakeSleeping) { int64_t now = 0; auto s = env_->GetCurrentTime(&now); ASSERT_OK(s); env_->FakeSleepForMicroseconds(3 * 1000 * 1000); int64_t after_sleep = 0; s = env_->GetCurrentTime(&after_sleep); ASSERT_OK(s); auto delta = after_sleep - now; // this will be true unless test runs for 2 seconds ASSERT_TRUE(delta == 3 || delta == 4); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/examples/000077500000000000000000000000001370372246700150635ustar00rootroot00000000000000rocksdb-6.11.4/examples/.gitignore000066400000000000000000000003101370372246700170450ustar00rootroot00000000000000c_simple_example column_families_example compact_files_example compaction_filter_example multi_processes_example optimistic_transaction_example options_file_example simple_example transaction_example rocksdb-6.11.4/examples/Makefile000066400000000000000000000047301370372246700165270ustar00rootroot00000000000000include ../make_config.mk ifndef DISABLE_JEMALLOC ifdef JEMALLOC PLATFORM_CXXFLAGS += -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE endif EXEC_LDFLAGS := $(JEMALLOC_LIB) $(EXEC_LDFLAGS) -lpthread PLATFORM_CXXFLAGS += $(JEMALLOC_INCLUDE) endif ifneq ($(USE_RTTI), 1) CXXFLAGS += -fno-rtti endif .PHONY: clean librocksdb all: simple_example column_families_example compact_files_example c_simple_example optimistic_transaction_example transaction_example compaction_filter_example options_file_example simple_example: librocksdb simple_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) column_families_example: librocksdb column_families_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) compaction_filter_example: librocksdb compaction_filter_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) compact_files_example: librocksdb compact_files_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) .c.o: $(CC) $(CFLAGS) -c $< -o $@ -I../include c_simple_example: librocksdb c_simple_example.o $(CXX) $@.o -o$@ ../librocksdb.a $(PLATFORM_LDFLAGS) $(EXEC_LDFLAGS) optimistic_transaction_example: librocksdb optimistic_transaction_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) transaction_example: librocksdb transaction_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) options_file_example: librocksdb options_file_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) multi_processes_example: librocksdb multi_processes_example.cc $(CXX) $(CXXFLAGS) $@.cc -o$@ ../librocksdb.a -I../include -O2 -std=c++11 $(PLATFORM_LDFLAGS) $(PLATFORM_CXXFLAGS) $(EXEC_LDFLAGS) clean: rm -rf ./simple_example ./column_families_example ./compact_files_example ./compaction_filter_example ./c_simple_example c_simple_example.o ./optimistic_transaction_example ./transaction_example ./options_file_example ./multi_processes_example librocksdb: cd .. && $(MAKE) static_lib rocksdb-6.11.4/examples/README.md000066400000000000000000000001701370372246700163400ustar00rootroot000000000000001. Compile RocksDB first by executing `make static_lib` in parent dir 2. Compile all examples: `cd examples/; make all` rocksdb-6.11.4/examples/c_simple_example.c000066400000000000000000000051331370372246700205370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include "rocksdb/c.h" #include // sysconf() - get CPU count const char DBPath[] = "/tmp/rocksdb_simple_example"; const char DBBackupPath[] = "/tmp/rocksdb_simple_example_backup"; int main(int argc, char **argv) { rocksdb_t *db; rocksdb_backup_engine_t *be; rocksdb_options_t *options = rocksdb_options_create(); // Optimize RocksDB. This is the easiest way to // get RocksDB to perform well long cpus = sysconf(_SC_NPROCESSORS_ONLN); // get # of online cores rocksdb_options_increase_parallelism(options, (int)(cpus)); rocksdb_options_optimize_level_style_compaction(options, 0); // create the DB if it's not already present rocksdb_options_set_create_if_missing(options, 1); // open DB char *err = NULL; db = rocksdb_open(options, DBPath, &err); assert(!err); // open Backup Engine that we will use for backing up our database be = rocksdb_backup_engine_open(options, DBBackupPath, &err); assert(!err); // Put key-value rocksdb_writeoptions_t *writeoptions = rocksdb_writeoptions_create(); const char key[] = "key"; const char *value = "value"; rocksdb_put(db, writeoptions, key, strlen(key), value, strlen(value) + 1, &err); assert(!err); // Get value rocksdb_readoptions_t *readoptions = rocksdb_readoptions_create(); size_t len; char *returned_value = rocksdb_get(db, readoptions, key, strlen(key), &len, &err); assert(!err); assert(strcmp(returned_value, "value") == 0); free(returned_value); // create new backup in a directory specified by DBBackupPath rocksdb_backup_engine_create_new_backup(be, db, &err); assert(!err); rocksdb_close(db); // If something is wrong, you might want to restore data from last backup rocksdb_restore_options_t *restore_options = rocksdb_restore_options_create(); rocksdb_backup_engine_restore_db_from_latest_backup(be, DBPath, DBPath, restore_options, &err); assert(!err); rocksdb_restore_options_destroy(restore_options); db = rocksdb_open(options, DBPath, &err); assert(!err); // cleanup rocksdb_writeoptions_destroy(writeoptions); rocksdb_readoptions_destroy(readoptions); rocksdb_options_destroy(options); rocksdb_backup_engine_close(be); rocksdb_close(db); return 0; } rocksdb-6.11.4/examples/column_families_example.cc000066400000000000000000000040621370372246700222550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include "rocksdb/db.h" #include "rocksdb/slice.h" #include "rocksdb/options.h" using namespace ROCKSDB_NAMESPACE; std::string kDBPath = "/tmp/rocksdb_column_families_example"; int main() { // open DB Options options; options.create_if_missing = true; DB* db; Status s = DB::Open(options, kDBPath, &db); assert(s.ok()); // create column family ColumnFamilyHandle* cf; s = db->CreateColumnFamily(ColumnFamilyOptions(), "new_cf", &cf); assert(s.ok()); // close DB s = db->DestroyColumnFamilyHandle(cf); assert(s.ok()); delete db; // open DB with two column families std::vector column_families; // have to open default column family column_families.push_back(ColumnFamilyDescriptor( kDefaultColumnFamilyName, ColumnFamilyOptions())); // open the new one, too column_families.push_back(ColumnFamilyDescriptor( "new_cf", ColumnFamilyOptions())); std::vector handles; s = DB::Open(DBOptions(), kDBPath, column_families, &handles, &db); assert(s.ok()); // put and get from non-default column family s = db->Put(WriteOptions(), handles[1], Slice("key"), Slice("value")); assert(s.ok()); std::string value; s = db->Get(ReadOptions(), handles[1], Slice("key"), &value); assert(s.ok()); // atomic write WriteBatch batch; batch.Put(handles[0], Slice("key2"), Slice("value2")); batch.Put(handles[1], Slice("key3"), Slice("value3")); batch.Delete(handles[0], Slice("key")); s = db->Write(WriteOptions(), &batch); assert(s.ok()); // drop column family s = db->DropColumnFamily(handles[1]); assert(s.ok()); // close db for (auto handle : handles) { s = db->DestroyColumnFamilyHandle(handle); assert(s.ok()); } delete db; return 0; } rocksdb-6.11.4/examples/compact_files_example.cc000066400000000000000000000133451370372246700217230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // An example code demonstrating how to use CompactFiles, EventListener, // and GetColumnFamilyMetaData APIs to implement custom compaction algorithm. #include #include #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/options.h" using namespace ROCKSDB_NAMESPACE; std::string kDBPath = "/tmp/rocksdb_compact_files_example"; struct CompactionTask; // This is an example interface of external-compaction algorithm. // Compaction algorithm can be implemented outside the core-RocksDB // code by using the pluggable compaction APIs that RocksDb provides. class Compactor : public EventListener { public: // Picks and returns a compaction task given the specified DB // and column family. It is the caller's responsibility to // destroy the returned CompactionTask. Returns "nullptr" // if it cannot find a proper compaction task. virtual CompactionTask* PickCompaction( DB* db, const std::string& cf_name) = 0; // Schedule and run the specified compaction task in background. virtual void ScheduleCompaction(CompactionTask *task) = 0; }; // Example structure that describes a compaction task. struct CompactionTask { CompactionTask( DB* _db, Compactor* _compactor, const std::string& _column_family_name, const std::vector& _input_file_names, const int _output_level, const CompactionOptions& _compact_options, bool _retry_on_fail) : db(_db), compactor(_compactor), column_family_name(_column_family_name), input_file_names(_input_file_names), output_level(_output_level), compact_options(_compact_options), retry_on_fail(_retry_on_fail) {} DB* db; Compactor* compactor; const std::string& column_family_name; std::vector input_file_names; int output_level; CompactionOptions compact_options; bool retry_on_fail; }; // A simple compaction algorithm that always compacts everything // to the highest level whenever possible. class FullCompactor : public Compactor { public: explicit FullCompactor(const Options options) : options_(options) { compact_options_.compression = options_.compression; compact_options_.output_file_size_limit = options_.target_file_size_base; } // When flush happens, it determines whether to trigger compaction. If // triggered_writes_stop is true, it will also set the retry flag of // compaction-task to true. void OnFlushCompleted( DB* db, const FlushJobInfo& info) override { CompactionTask* task = PickCompaction(db, info.cf_name); if (task != nullptr) { if (info.triggered_writes_stop) { task->retry_on_fail = true; } // Schedule compaction in a different thread. ScheduleCompaction(task); } } // Always pick a compaction which includes all files whenever possible. CompactionTask* PickCompaction( DB* db, const std::string& cf_name) override { ColumnFamilyMetaData cf_meta; db->GetColumnFamilyMetaData(&cf_meta); std::vector input_file_names; for (auto level : cf_meta.levels) { for (auto file : level.files) { if (file.being_compacted) { return nullptr; } input_file_names.push_back(file.name); } } return new CompactionTask( db, this, cf_name, input_file_names, options_.num_levels - 1, compact_options_, false); } // Schedule the specified compaction task in background. void ScheduleCompaction(CompactionTask* task) override { options_.env->Schedule(&FullCompactor::CompactFiles, task); } static void CompactFiles(void* arg) { std::unique_ptr task( reinterpret_cast(arg)); assert(task); assert(task->db); Status s = task->db->CompactFiles( task->compact_options, task->input_file_names, task->output_level); printf("CompactFiles() finished with status %s\n", s.ToString().c_str()); if (!s.ok() && !s.IsIOError() && task->retry_on_fail) { // If a compaction task with its retry_on_fail=true failed, // try to schedule another compaction in case the reason // is not an IO error. CompactionTask* new_task = task->compactor->PickCompaction( task->db, task->column_family_name); task->compactor->ScheduleCompaction(new_task); } } private: Options options_; CompactionOptions compact_options_; }; int main() { Options options; options.create_if_missing = true; // Disable RocksDB background compaction. options.compaction_style = kCompactionStyleNone; // Small slowdown and stop trigger for experimental purpose. options.level0_slowdown_writes_trigger = 3; options.level0_stop_writes_trigger = 5; options.IncreaseParallelism(5); options.listeners.emplace_back(new FullCompactor(options)); DB* db = nullptr; DestroyDB(kDBPath, options); Status s = DB::Open(options, kDBPath, &db); assert(s.ok()); assert(db); // if background compaction is not working, write will stall // because of options.level0_stop_writes_trigger for (int i = 1000; i < 99999; ++i) { db->Put(WriteOptions(), std::to_string(i), std::string(500, 'a' + (i % 26))); } // verify the values are still there std::string value; for (int i = 1000; i < 99999; ++i) { db->Get(ReadOptions(), std::to_string(i), &value); assert(value == std::string(500, 'a' + (i % 26))); } // close the db. delete db; return 0; } rocksdb-6.11.4/examples/compaction_filter_example.cc000066400000000000000000000060541370372246700226130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include class MyMerge : public ROCKSDB_NAMESPACE::MergeOperator { public: virtual bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override { merge_out->new_value.clear(); if (merge_in.existing_value != nullptr) { merge_out->new_value.assign(merge_in.existing_value->data(), merge_in.existing_value->size()); } for (const ROCKSDB_NAMESPACE::Slice& m : merge_in.operand_list) { fprintf(stderr, "Merge(%s)\n", m.ToString().c_str()); // the compaction filter filters out bad values assert(m.ToString() != "bad"); merge_out->new_value.assign(m.data(), m.size()); } return true; } const char* Name() const override { return "MyMerge"; } }; class MyFilter : public ROCKSDB_NAMESPACE::CompactionFilter { public: bool Filter(int level, const ROCKSDB_NAMESPACE::Slice& key, const ROCKSDB_NAMESPACE::Slice& existing_value, std::string* new_value, bool* value_changed) const override { fprintf(stderr, "Filter(%s)\n", key.ToString().c_str()); ++count_; assert(*value_changed == false); return false; } bool FilterMergeOperand( int level, const ROCKSDB_NAMESPACE::Slice& key, const ROCKSDB_NAMESPACE::Slice& existing_value) const override { fprintf(stderr, "FilterMerge(%s)\n", key.ToString().c_str()); ++merge_count_; return existing_value == "bad"; } const char* Name() const override { return "MyFilter"; } mutable int count_ = 0; mutable int merge_count_ = 0; }; int main() { ROCKSDB_NAMESPACE::DB* raw_db; ROCKSDB_NAMESPACE::Status status; MyFilter filter; int ret = system("rm -rf /tmp/rocksmergetest"); if (ret != 0) { fprintf(stderr, "Error deleting /tmp/rocksmergetest, code: %d\n", ret); return ret; } ROCKSDB_NAMESPACE::Options options; options.create_if_missing = true; options.merge_operator.reset(new MyMerge); options.compaction_filter = &filter; status = ROCKSDB_NAMESPACE::DB::Open(options, "/tmp/rocksmergetest", &raw_db); assert(status.ok()); std::unique_ptr db(raw_db); ROCKSDB_NAMESPACE::WriteOptions wopts; db->Merge(wopts, "0", "bad"); // This is filtered out db->Merge(wopts, "1", "data1"); db->Merge(wopts, "1", "bad"); db->Merge(wopts, "1", "data2"); db->Merge(wopts, "1", "bad"); db->Merge(wopts, "3", "data3"); db->CompactRange(ROCKSDB_NAMESPACE::CompactRangeOptions(), nullptr, nullptr); fprintf(stderr, "filter.count_ = %d\n", filter.count_); assert(filter.count_ == 0); fprintf(stderr, "filter.merge_count_ = %d\n", filter.merge_count_); assert(filter.merge_count_ == 6); } rocksdb-6.11.4/examples/multi_processes_example.cc000066400000000000000000000274551370372246700223420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // How to use this example // Open two terminals, in one of them, run `./multi_processes_example 0` to // start a process running the primary instance. This will create a new DB in // kDBPath. The process will run for a while inserting keys to the normal // RocksDB database. // Next, go to the other terminal and run `./multi_processes_example 1` to // start a process running the secondary instance. This will create a secondary // instance following the aforementioned primary instance. This process will // run for a while, tailing the logs of the primary. After process with primary // instance exits, this process will keep running until you hit 'CTRL+C'. #include #include #include #include #include #include #include #include #if defined(OS_LINUX) #include #include #include #include #include #include #endif // !OS_LINUX #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" using ROCKSDB_NAMESPACE::ColumnFamilyDescriptor; using ROCKSDB_NAMESPACE::ColumnFamilyHandle; using ROCKSDB_NAMESPACE::ColumnFamilyOptions; using ROCKSDB_NAMESPACE::DB; using ROCKSDB_NAMESPACE::FlushOptions; using ROCKSDB_NAMESPACE::Iterator; using ROCKSDB_NAMESPACE::Options; using ROCKSDB_NAMESPACE::ReadOptions; using ROCKSDB_NAMESPACE::Slice; using ROCKSDB_NAMESPACE::Status; using ROCKSDB_NAMESPACE::WriteOptions; const std::string kDBPath = "/tmp/rocksdb_multi_processes_example"; const std::string kPrimaryStatusFile = "/tmp/rocksdb_multi_processes_example_primary_status"; const uint64_t kMaxKey = 600000; const size_t kMaxValueLength = 256; const size_t kNumKeysPerFlush = 1000; const std::vector& GetColumnFamilyNames() { static std::vector column_family_names = { ROCKSDB_NAMESPACE::kDefaultColumnFamilyName, "pikachu"}; return column_family_names; } inline bool IsLittleEndian() { uint32_t x = 1; return *reinterpret_cast(&x) != 0; } static std::atomic& ShouldSecondaryWait() { static std::atomic should_secondary_wait{1}; return should_secondary_wait; } static std::string Key(uint64_t k) { std::string ret; if (IsLittleEndian()) { ret.append(reinterpret_cast(&k), sizeof(k)); } else { char buf[sizeof(k)]; buf[0] = k & 0xff; buf[1] = (k >> 8) & 0xff; buf[2] = (k >> 16) & 0xff; buf[3] = (k >> 24) & 0xff; buf[4] = (k >> 32) & 0xff; buf[5] = (k >> 40) & 0xff; buf[6] = (k >> 48) & 0xff; buf[7] = (k >> 56) & 0xff; ret.append(buf, sizeof(k)); } size_t i = 0, j = ret.size() - 1; while (i < j) { char tmp = ret[i]; ret[i] = ret[j]; ret[j] = tmp; ++i; --j; } return ret; } static uint64_t Key(std::string key) { assert(key.size() == sizeof(uint64_t)); size_t i = 0, j = key.size() - 1; while (i < j) { char tmp = key[i]; key[i] = key[j]; key[j] = tmp; ++i; --j; } uint64_t ret = 0; if (IsLittleEndian()) { memcpy(&ret, key.c_str(), sizeof(uint64_t)); } else { const char* buf = key.c_str(); ret |= static_cast(buf[0]); ret |= (static_cast(buf[1]) << 8); ret |= (static_cast(buf[2]) << 16); ret |= (static_cast(buf[3]) << 24); ret |= (static_cast(buf[4]) << 32); ret |= (static_cast(buf[5]) << 40); ret |= (static_cast(buf[6]) << 48); ret |= (static_cast(buf[7]) << 56); } return ret; } static Slice GenerateRandomValue(const size_t max_length, char scratch[]) { size_t sz = 1 + (std::rand() % max_length); int rnd = std::rand(); for (size_t i = 0; i != sz; ++i) { scratch[i] = static_cast(rnd ^ i); } return Slice(scratch, sz); } static bool ShouldCloseDB() { return true; } // TODO: port this example to other systems. It should be straightforward for // POSIX-compliant systems. #if defined(OS_LINUX) void CreateDB() { long my_pid = static_cast(getpid()); Options options; Status s = ROCKSDB_NAMESPACE::DestroyDB(kDBPath, options); if (!s.ok()) { fprintf(stderr, "[process %ld] Failed to destroy DB: %s\n", my_pid, s.ToString().c_str()); assert(false); } options.create_if_missing = true; DB* db = nullptr; s = DB::Open(options, kDBPath, &db); if (!s.ok()) { fprintf(stderr, "[process %ld] Failed to open DB: %s\n", my_pid, s.ToString().c_str()); assert(false); } std::vector handles; ColumnFamilyOptions cf_opts(options); for (const auto& cf_name : GetColumnFamilyNames()) { if (ROCKSDB_NAMESPACE::kDefaultColumnFamilyName != cf_name) { ColumnFamilyHandle* handle = nullptr; s = db->CreateColumnFamily(cf_opts, cf_name, &handle); if (!s.ok()) { fprintf(stderr, "[process %ld] Failed to create CF %s: %s\n", my_pid, cf_name.c_str(), s.ToString().c_str()); assert(false); } handles.push_back(handle); } } fprintf(stdout, "[process %ld] Column families created\n", my_pid); for (auto h : handles) { delete h; } handles.clear(); delete db; } void RunPrimary() { long my_pid = static_cast(getpid()); fprintf(stdout, "[process %ld] Primary instance starts\n", my_pid); CreateDB(); std::srand(time(nullptr)); DB* db = nullptr; Options options; options.create_if_missing = false; std::vector column_families; for (const auto& cf_name : GetColumnFamilyNames()) { column_families.push_back(ColumnFamilyDescriptor(cf_name, options)); } std::vector handles; WriteOptions write_opts; char val_buf[kMaxValueLength] = {0}; uint64_t curr_key = 0; while (curr_key < kMaxKey) { Status s; if (nullptr == db) { s = DB::Open(options, kDBPath, column_families, &handles, &db); if (!s.ok()) { fprintf(stderr, "[process %ld] Failed to open DB: %s\n", my_pid, s.ToString().c_str()); assert(false); } } assert(nullptr != db); assert(handles.size() == GetColumnFamilyNames().size()); for (auto h : handles) { assert(nullptr != h); for (size_t i = 0; i != kNumKeysPerFlush; ++i) { Slice key = Key(curr_key + static_cast(i)); Slice value = GenerateRandomValue(kMaxValueLength, val_buf); s = db->Put(write_opts, h, key, value); if (!s.ok()) { fprintf(stderr, "[process %ld] Failed to insert\n", my_pid); assert(false); } } s = db->Flush(FlushOptions(), h); if (!s.ok()) { fprintf(stderr, "[process %ld] Failed to flush\n", my_pid); assert(false); } } curr_key += static_cast(kNumKeysPerFlush); if (ShouldCloseDB()) { for (auto h : handles) { delete h; } handles.clear(); delete db; db = nullptr; } } if (nullptr != db) { for (auto h : handles) { delete h; } handles.clear(); delete db; db = nullptr; } fprintf(stdout, "[process %ld] Finished adding keys\n", my_pid); } void secondary_instance_sigint_handler(int signal) { ShouldSecondaryWait().store(0, std::memory_order_relaxed); fprintf(stdout, "\n"); fflush(stdout); }; void RunSecondary() { ::signal(SIGINT, secondary_instance_sigint_handler); long my_pid = static_cast(getpid()); const std::string kSecondaryPath = "/tmp/rocksdb_multi_processes_example_secondary"; // Create directory if necessary if (nullptr == opendir(kSecondaryPath.c_str())) { int ret = mkdir(kSecondaryPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); if (ret < 0) { perror("failed to create directory for secondary instance"); exit(0); } } DB* db = nullptr; Options options; options.create_if_missing = false; options.max_open_files = -1; Status s = DB::OpenAsSecondary(options, kDBPath, kSecondaryPath, &db); if (!s.ok()) { fprintf(stderr, "[process %ld] Failed to open in secondary mode: %s\n", my_pid, s.ToString().c_str()); assert(false); } else { fprintf(stdout, "[process %ld] Secondary instance starts\n", my_pid); } ReadOptions ropts; ropts.verify_checksums = true; ropts.total_order_seek = true; std::vector test_threads; test_threads.emplace_back([&]() { while (1 == ShouldSecondaryWait().load(std::memory_order_relaxed)) { std::unique_ptr iter(db->NewIterator(ropts)); iter->SeekToFirst(); size_t count = 0; for (; iter->Valid(); iter->Next()) { ++count; } } fprintf(stdout, "[process %ld] Range_scan thread finished\n", my_pid); }); test_threads.emplace_back([&]() { std::srand(time(nullptr)); while (1 == ShouldSecondaryWait().load(std::memory_order_relaxed)) { Slice key = Key(std::rand() % kMaxKey); std::string value; db->Get(ropts, key, &value); } fprintf(stdout, "[process %ld] Point lookup thread finished\n", my_pid); }); uint64_t curr_key = 0; while (1 == ShouldSecondaryWait().load(std::memory_order_relaxed)) { s = db->TryCatchUpWithPrimary(); if (!s.ok()) { fprintf(stderr, "[process %ld] error while trying to catch up with " "primary %s\n", my_pid, s.ToString().c_str()); assert(false); } { std::unique_ptr iter(db->NewIterator(ropts)); if (!iter) { fprintf(stderr, "[process %ld] Failed to create iterator\n", my_pid); assert(false); } iter->SeekToLast(); if (iter->Valid()) { uint64_t curr_max_key = Key(iter->key().ToString()); if (curr_max_key != curr_key) { fprintf(stdout, "[process %ld] Observed key %" PRIu64 "\n", my_pid, curr_key); curr_key = curr_max_key; } } } std::this_thread::sleep_for(std::chrono::seconds(1)); } s = db->TryCatchUpWithPrimary(); if (!s.ok()) { fprintf(stderr, "[process %ld] error while trying to catch up with " "primary %s\n", my_pid, s.ToString().c_str()); assert(false); } std::vector column_families; for (const auto& cf_name : GetColumnFamilyNames()) { column_families.push_back(ColumnFamilyDescriptor(cf_name, options)); } std::vector handles; DB* verification_db = nullptr; s = DB::OpenForReadOnly(options, kDBPath, column_families, &handles, &verification_db); assert(s.ok()); Iterator* iter1 = verification_db->NewIterator(ropts); iter1->SeekToFirst(); Iterator* iter = db->NewIterator(ropts); iter->SeekToFirst(); for (; iter->Valid() && iter1->Valid(); iter->Next(), iter1->Next()) { if (iter->key().ToString() != iter1->key().ToString()) { fprintf(stderr, "%" PRIu64 "!= %" PRIu64 "\n", Key(iter->key().ToString()), Key(iter1->key().ToString())); assert(false); } else if (iter->value().ToString() != iter1->value().ToString()) { fprintf(stderr, "Value mismatch\n"); assert(false); } } fprintf(stdout, "[process %ld] Verification succeeded\n", my_pid); for (auto& thr : test_threads) { thr.join(); } delete iter; delete iter1; delete db; delete verification_db; } int main(int argc, char** argv) { if (argc < 2) { fprintf(stderr, "%s <0 for primary, 1 for secondary>\n", argv[0]); return 0; } if (atoi(argv[1]) == 0) { RunPrimary(); } else { RunSecondary(); } return 0; } #else // OS_LINUX int main() { fpritnf(stderr, "Not implemented.\n"); return 0; } #endif // !OS_LINUX rocksdb-6.11.4/examples/optimistic_transaction_example.cc000066400000000000000000000117611370372246700237040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/optimistic_transaction_db.h" using namespace ROCKSDB_NAMESPACE; std::string kDBPath = "/tmp/rocksdb_transaction_example"; int main() { // open DB Options options; options.create_if_missing = true; DB* db; OptimisticTransactionDB* txn_db; Status s = OptimisticTransactionDB::Open(options, kDBPath, &txn_db); assert(s.ok()); db = txn_db->GetBaseDB(); WriteOptions write_options; ReadOptions read_options; OptimisticTransactionOptions txn_options; std::string value; //////////////////////////////////////////////////////// // // Simple OptimisticTransaction Example ("Read Committed") // //////////////////////////////////////////////////////// // Start a transaction Transaction* txn = txn_db->BeginTransaction(write_options); assert(txn); // Read a key in this transaction s = txn->Get(read_options, "abc", &value); assert(s.IsNotFound()); // Write a key in this transaction s = txn->Put("abc", "xyz"); assert(s.ok()); // Read a key OUTSIDE this transaction. Does not affect txn. s = db->Get(read_options, "abc", &value); assert(s.IsNotFound()); // Write a key OUTSIDE of this transaction. // Does not affect txn since this is an unrelated key. If we wrote key 'abc' // here, the transaction would fail to commit. s = db->Put(write_options, "xyz", "zzz"); assert(s.ok()); s = db->Put(write_options, "abc", "def"); assert(s.ok()); // Commit transaction s = txn->Commit(); assert(s.IsBusy()); delete txn; s = db->Get(read_options, "xyz", &value); assert(s.ok()); assert(value == "zzz"); s = db->Get(read_options, "abc", &value); assert(s.ok()); assert(value == "def"); //////////////////////////////////////////////////////// // // "Repeatable Read" (Snapshot Isolation) Example // -- Using a single Snapshot // //////////////////////////////////////////////////////// // Set a snapshot at start of transaction by setting set_snapshot=true txn_options.set_snapshot = true; txn = txn_db->BeginTransaction(write_options, txn_options); const Snapshot* snapshot = txn->GetSnapshot(); // Write a key OUTSIDE of transaction s = db->Put(write_options, "abc", "xyz"); assert(s.ok()); // Read a key using the snapshot read_options.snapshot = snapshot; s = txn->GetForUpdate(read_options, "abc", &value); assert(s.ok()); assert(value == "def"); // Attempt to commit transaction s = txn->Commit(); // Transaction could not commit since the write outside of the txn conflicted // with the read! assert(s.IsBusy()); delete txn; // Clear snapshot from read options since it is no longer valid read_options.snapshot = nullptr; snapshot = nullptr; s = db->Get(read_options, "abc", &value); assert(s.ok()); assert(value == "xyz"); //////////////////////////////////////////////////////// // // "Read Committed" (Monotonic Atomic Views) Example // --Using multiple Snapshots // //////////////////////////////////////////////////////// // In this example, we set the snapshot multiple times. This is probably // only necessary if you have very strict isolation requirements to // implement. // Set a snapshot at start of transaction txn_options.set_snapshot = true; txn = txn_db->BeginTransaction(write_options, txn_options); // Do some reads and writes to key "x" read_options.snapshot = db->GetSnapshot(); s = txn->Get(read_options, "x", &value); assert(s.IsNotFound()); s = txn->Put("x", "x"); assert(s.ok()); // The transaction hasn't committed, so the write is not visible // outside of txn. s = db->Get(read_options, "x", &value); assert(s.IsNotFound()); // Do a write outside of the transaction to key "y" s = db->Put(write_options, "y", "z"); assert(s.ok()); // Set a new snapshot in the transaction txn->SetSnapshot(); read_options.snapshot = db->GetSnapshot(); // Do some reads and writes to key "y" s = txn->GetForUpdate(read_options, "y", &value); assert(s.ok()); assert(value == "z"); txn->Put("y", "y"); // Commit. Since the snapshot was advanced, the write done outside of the // transaction does not prevent this transaction from Committing. s = txn->Commit(); assert(s.ok()); delete txn; // Clear snapshot from read options since it is no longer valid read_options.snapshot = nullptr; // txn is committed, read the latest values. s = db->Get(read_options, "x", &value); assert(s.ok()); assert(value == "x"); s = db->Get(read_options, "y", &value); assert(s.ok()); assert(value == "y"); // Cleanup delete txn_db; DestroyDB(kDBPath, options); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/examples/options_file_example.cc000066400000000000000000000077731370372246700216150ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file demonstrates how to use the utility functions defined in // rocksdb/utilities/options_util.h to open a rocksdb database without // remembering all the rocksdb options. #include #include #include #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/table.h" #include "rocksdb/utilities/options_util.h" using namespace ROCKSDB_NAMESPACE; std::string kDBPath = "/tmp/rocksdb_options_file_example"; namespace { // A dummy compaction filter class DummyCompactionFilter : public CompactionFilter { public: virtual ~DummyCompactionFilter() {} virtual bool Filter(int level, const Slice& key, const Slice& existing_value, std::string* new_value, bool* value_changed) const { return false; } virtual const char* Name() const { return "DummyCompactionFilter"; } }; } // namespace int main() { DBOptions db_opt; db_opt.create_if_missing = true; std::vector cf_descs; cf_descs.push_back({kDefaultColumnFamilyName, ColumnFamilyOptions()}); cf_descs.push_back({"new_cf", ColumnFamilyOptions()}); // initialize BlockBasedTableOptions auto cache = NewLRUCache(1 * 1024 * 1024 * 1024); BlockBasedTableOptions bbt_opts; bbt_opts.block_size = 32 * 1024; bbt_opts.block_cache = cache; // initialize column families options std::unique_ptr compaction_filter; compaction_filter.reset(new DummyCompactionFilter()); cf_descs[0].options.table_factory.reset(NewBlockBasedTableFactory(bbt_opts)); cf_descs[0].options.compaction_filter = compaction_filter.get(); cf_descs[1].options.table_factory.reset(NewBlockBasedTableFactory(bbt_opts)); // destroy and open DB DB* db; Status s = DestroyDB(kDBPath, Options(db_opt, cf_descs[0].options)); assert(s.ok()); s = DB::Open(Options(db_opt, cf_descs[0].options), kDBPath, &db); assert(s.ok()); // Create column family, and rocksdb will persist the options. ColumnFamilyHandle* cf; s = db->CreateColumnFamily(ColumnFamilyOptions(), "new_cf", &cf); assert(s.ok()); // close DB delete cf; delete db; // In the following code, we will reopen the rocksdb instance using // the options file stored in the db directory. // Load the options file. DBOptions loaded_db_opt; std::vector loaded_cf_descs; ConfigOptions config_options; s = LoadLatestOptions(config_options, kDBPath, &loaded_db_opt, &loaded_cf_descs); assert(s.ok()); assert(loaded_db_opt.create_if_missing == db_opt.create_if_missing); // Initialize pointer options for each column family for (size_t i = 0; i < loaded_cf_descs.size(); ++i) { auto* loaded_bbt_opt = reinterpret_cast( loaded_cf_descs[0].options.table_factory->GetOptions()); // Expect the same as BlockBasedTableOptions will be loaded form file. assert(loaded_bbt_opt->block_size == bbt_opts.block_size); // However, block_cache needs to be manually initialized as documented // in rocksdb/utilities/options_util.h. loaded_bbt_opt->block_cache = cache; } // In addition, as pointer options are initialized with default value, // we need to properly initialized all the pointer options if non-defalut // values are used before calling DB::Open(). assert(loaded_cf_descs[0].options.compaction_filter == nullptr); loaded_cf_descs[0].options.compaction_filter = compaction_filter.get(); // reopen the db using the loaded options. std::vector handles; s = DB::Open(loaded_db_opt, kDBPath, loaded_cf_descs, &handles, &db); assert(s.ok()); // close DB for (auto* handle : handles) { delete handle; } delete db; } rocksdb-6.11.4/examples/rocksdb_option_file_example.ini000066400000000000000000000110651370372246700233200ustar00rootroot00000000000000# This is a RocksDB option file. # # A typical RocksDB options file has four sections, which are # Version section, DBOptions section, at least one CFOptions # section, and one TableOptions section for each column family. # The RocksDB options file in general follows the basic INI # file format with the following extensions / modifications: # # * Escaped characters # We escaped the following characters: # - \n -- line feed - new line # - \r -- carriage return # - \\ -- backslash \ # - \: -- colon symbol : # - \# -- hash tag # # * Comments # We support # style comments. Comments can appear at the ending # part of a line. # * Statements # A statement is of the form option_name = value. # Each statement contains a '=', where extra white-spaces # are supported. However, we don't support multi-lined statement. # Furthermore, each line can only contain at most one statement. # * Sections # Sections are of the form [SecitonTitle "SectionArgument"], # where section argument is optional. # * List # We use colon-separated string to represent a list. # For instance, n1:n2:n3:n4 is a list containing four values. # # Below is an example of a RocksDB options file: [Version] rocksdb_version=4.3.0 options_file_version=1.1 [DBOptions] stats_dump_period_sec=600 max_manifest_file_size=18446744073709551615 bytes_per_sync=8388608 delayed_write_rate=2097152 WAL_ttl_seconds=0 WAL_size_limit_MB=0 max_subcompactions=1 wal_dir= wal_bytes_per_sync=0 db_write_buffer_size=0 keep_log_file_num=1000 table_cache_numshardbits=4 max_file_opening_threads=1 writable_file_max_buffer_size=1048576 random_access_max_buffer_size=1048576 use_fsync=false max_total_wal_size=0 max_open_files=-1 skip_stats_update_on_db_open=false max_background_compactions=16 manifest_preallocation_size=4194304 max_background_flushes=7 is_fd_close_on_exec=true max_log_file_size=0 advise_random_on_open=true create_missing_column_families=false paranoid_checks=true delete_obsolete_files_period_micros=21600000000 log_file_time_to_roll=0 compaction_readahead_size=0 create_if_missing=false use_adaptive_mutex=false enable_thread_tracking=false allow_fallocate=true error_if_exists=false recycle_log_file_num=0 skip_log_error_on_recovery=false db_log_dir= new_table_reader_for_compaction_inputs=true allow_mmap_reads=false allow_mmap_writes=false use_direct_reads=false use_direct_writes=false [CFOptions "default"] compaction_style=kCompactionStyleLevel compaction_filter=nullptr num_levels=6 table_factory=BlockBasedTable comparator=leveldb.BytewiseComparator max_sequential_skip_in_iterations=8 soft_rate_limit=0.000000 max_bytes_for_level_base=1073741824 memtable_prefix_bloom_probes=6 memtable_prefix_bloom_bits=0 memtable_prefix_bloom_huge_page_tlb_size=0 max_successive_merges=0 arena_block_size=16777216 min_write_buffer_number_to_merge=1 target_file_size_multiplier=1 source_compaction_factor=1 max_bytes_for_level_multiplier=8 max_bytes_for_level_multiplier_additional=2:3:5 compaction_filter_factory=nullptr max_write_buffer_number=8 level0_stop_writes_trigger=20 compression=kSnappyCompression level0_file_num_compaction_trigger=4 purge_redundant_kvs_while_flush=true max_write_buffer_size_to_maintain=0 memtable_factory=SkipListFactory max_grandparent_overlap_factor=8 expanded_compaction_factor=25 hard_pending_compaction_bytes_limit=137438953472 inplace_update_num_locks=10000 level_compaction_dynamic_level_bytes=true level0_slowdown_writes_trigger=12 filter_deletes=false verify_checksums_in_compaction=true min_partial_merge_operands=2 paranoid_file_checks=false target_file_size_base=134217728 optimize_filters_for_hits=false merge_operator=PutOperator compression_per_level=kNoCompression:kNoCompression:kNoCompression:kSnappyCompression:kSnappyCompression:kSnappyCompression compaction_measure_io_stats=false prefix_extractor=nullptr bloom_locality=0 write_buffer_size=134217728 disable_auto_compactions=false inplace_update_support=false [TableOptions/BlockBasedTable "default"] format_version=2 whole_key_filtering=true no_block_cache=false checksum=kCRC32c filter_policy=rocksdb.BuiltinBloomFilter block_size_deviation=10 block_size=8192 block_restart_interval=16 cache_index_and_filter_blocks=false pin_l0_filter_and_index_blocks_in_cache=false pin_top_level_index_and_filter=false index_type=kBinarySearch hash_index_allow_collision=true flush_block_policy_factory=FlushBlockBySizePolicyFactory rocksdb-6.11.4/examples/simple_example.cc000066400000000000000000000046041370372246700204020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include "rocksdb/db.h" #include "rocksdb/slice.h" #include "rocksdb/options.h" using namespace ROCKSDB_NAMESPACE; std::string kDBPath = "/tmp/rocksdb_simple_example"; int main() { DB* db; Options options; // Optimize RocksDB. This is the easiest way to get RocksDB to perform well options.IncreaseParallelism(); options.OptimizeLevelStyleCompaction(); // create the DB if it's not already present options.create_if_missing = true; // open DB Status s = DB::Open(options, kDBPath, &db); assert(s.ok()); // Put key-value s = db->Put(WriteOptions(), "key1", "value"); assert(s.ok()); std::string value; // get value s = db->Get(ReadOptions(), "key1", &value); assert(s.ok()); assert(value == "value"); // atomically apply a set of updates { WriteBatch batch; batch.Delete("key1"); batch.Put("key2", value); s = db->Write(WriteOptions(), &batch); } s = db->Get(ReadOptions(), "key1", &value); assert(s.IsNotFound()); db->Get(ReadOptions(), "key2", &value); assert(value == "value"); { PinnableSlice pinnable_val; db->Get(ReadOptions(), db->DefaultColumnFamily(), "key2", &pinnable_val); assert(pinnable_val == "value"); } { std::string string_val; // If it cannot pin the value, it copies the value to its internal buffer. // The intenral buffer could be set during construction. PinnableSlice pinnable_val(&string_val); db->Get(ReadOptions(), db->DefaultColumnFamily(), "key2", &pinnable_val); assert(pinnable_val == "value"); // If the value is not pinned, the internal buffer must have the value. assert(pinnable_val.IsPinned() || string_val == "value"); } PinnableSlice pinnable_val; db->Get(ReadOptions(), db->DefaultColumnFamily(), "key1", &pinnable_val); assert(s.IsNotFound()); // Reset PinnableSlice after each use and before each reuse pinnable_val.Reset(); db->Get(ReadOptions(), db->DefaultColumnFamily(), "key2", &pinnable_val); assert(pinnable_val == "value"); pinnable_val.Reset(); // The Slice pointed by pinnable_val is not valid after this point delete db; return 0; } rocksdb-6.11.4/examples/transaction_example.cc000066400000000000000000000124031370372246700214320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" using namespace ROCKSDB_NAMESPACE; std::string kDBPath = "/tmp/rocksdb_transaction_example"; int main() { // open DB Options options; TransactionDBOptions txn_db_options; options.create_if_missing = true; TransactionDB* txn_db; Status s = TransactionDB::Open(options, txn_db_options, kDBPath, &txn_db); assert(s.ok()); WriteOptions write_options; ReadOptions read_options; TransactionOptions txn_options; std::string value; //////////////////////////////////////////////////////// // // Simple Transaction Example ("Read Committed") // //////////////////////////////////////////////////////// // Start a transaction Transaction* txn = txn_db->BeginTransaction(write_options); assert(txn); // Read a key in this transaction s = txn->Get(read_options, "abc", &value); assert(s.IsNotFound()); // Write a key in this transaction s = txn->Put("abc", "def"); assert(s.ok()); // Read a key OUTSIDE this transaction. Does not affect txn. s = txn_db->Get(read_options, "abc", &value); assert(s.IsNotFound()); // Write a key OUTSIDE of this transaction. // Does not affect txn since this is an unrelated key. s = txn_db->Put(write_options, "xyz", "zzz"); assert(s.ok()); // Write a key OUTSIDE of this transaction. // Fail because the key conflicts with the key written in txn. s = txn_db->Put(write_options, "abc", "def"); assert(s.subcode() == Status::kLockTimeout); // Value for key "xyz" has been committed, can be read in txn. s = txn->Get(read_options, "xyz", &value); assert(s.ok()); assert(value == "zzz"); // Commit transaction s = txn->Commit(); assert(s.ok()); delete txn; // Value is committed, can be read now. s = txn_db->Get(read_options, "abc", &value); assert(s.ok()); assert(value == "def"); //////////////////////////////////////////////////////// // // "Repeatable Read" (Snapshot Isolation) Example // -- Using a single Snapshot // //////////////////////////////////////////////////////// // Set a snapshot at start of transaction by setting set_snapshot=true txn_options.set_snapshot = true; txn = txn_db->BeginTransaction(write_options, txn_options); const Snapshot* snapshot = txn->GetSnapshot(); // Write a key OUTSIDE of transaction s = txn_db->Put(write_options, "abc", "xyz"); assert(s.ok()); // Read the latest committed value. s = txn->Get(read_options, "abc", &value); assert(s.ok()); assert(value == "xyz"); // Read the snapshotted value. read_options.snapshot = snapshot; s = txn->Get(read_options, "abc", &value); assert(s.ok()); assert(value == "def"); // Attempt to read a key using the snapshot. This will fail since // the previous write outside this txn conflicts with this read. s = txn->GetForUpdate(read_options, "abc", &value); assert(s.IsBusy()); txn->Rollback(); // Snapshot will be released upon deleting the transaction. delete txn; // Clear snapshot from read options since it is no longer valid read_options.snapshot = nullptr; snapshot = nullptr; //////////////////////////////////////////////////////// // // "Read Committed" (Monotonic Atomic Views) Example // --Using multiple Snapshots // //////////////////////////////////////////////////////// // In this example, we set the snapshot multiple times. This is probably // only necessary if you have very strict isolation requirements to // implement. // Set a snapshot at start of transaction txn_options.set_snapshot = true; txn = txn_db->BeginTransaction(write_options, txn_options); // Do some reads and writes to key "x" read_options.snapshot = txn_db->GetSnapshot(); s = txn->Get(read_options, "x", &value); assert(s.IsNotFound()); s = txn->Put("x", "x"); assert(s.ok()); // Do a write outside of the transaction to key "y" s = txn_db->Put(write_options, "y", "y1"); assert(s.ok()); // Set a new snapshot in the transaction txn->SetSnapshot(); txn->SetSavePoint(); read_options.snapshot = txn_db->GetSnapshot(); // Do some reads and writes to key "y" // Since the snapshot was advanced, the write done outside of the // transaction does not conflict. s = txn->GetForUpdate(read_options, "y", &value); assert(s.ok()); assert(value == "y1"); s = txn->Put("y", "y2"); assert(s.ok()); // Decide we want to revert the last write from this transaction. txn->RollbackToSavePoint(); // Commit. s = txn->Commit(); assert(s.ok()); delete txn; // Clear snapshot from read options since it is no longer valid read_options.snapshot = nullptr; // db state is at the save point. s = txn_db->Get(read_options, "x", &value); assert(s.ok()); assert(value == "x"); s = txn_db->Get(read_options, "y", &value); assert(s.ok()); assert(value == "y1"); // Cleanup delete txn_db; DestroyDB(kDBPath, options); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/file/000077500000000000000000000000001370372246700141645ustar00rootroot00000000000000rocksdb-6.11.4/file/delete_scheduler.cc000066400000000000000000000316541370372246700200040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "file/delete_scheduler.h" #include #include #include #include "file/sst_file_manager_impl.h" #include "logging/logging.h" #include "port/port.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { DeleteScheduler::DeleteScheduler(Env* env, FileSystem* fs, int64_t rate_bytes_per_sec, Logger* info_log, SstFileManagerImpl* sst_file_manager, double max_trash_db_ratio, uint64_t bytes_max_delete_chunk) : env_(env), fs_(fs), total_trash_size_(0), rate_bytes_per_sec_(rate_bytes_per_sec), pending_files_(0), bytes_max_delete_chunk_(bytes_max_delete_chunk), closing_(false), cv_(&mu_), bg_thread_(nullptr), info_log_(info_log), sst_file_manager_(sst_file_manager), max_trash_db_ratio_(max_trash_db_ratio) { assert(sst_file_manager != nullptr); assert(max_trash_db_ratio >= 0); MaybeCreateBackgroundThread(); } DeleteScheduler::~DeleteScheduler() { { InstrumentedMutexLock l(&mu_); closing_ = true; cv_.SignalAll(); } if (bg_thread_) { bg_thread_->join(); } } Status DeleteScheduler::DeleteFile(const std::string& file_path, const std::string& dir_to_sync, const bool force_bg) { Status s; if (rate_bytes_per_sec_.load() <= 0 || (!force_bg && total_trash_size_.load() > sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load())) { // Rate limiting is disabled or trash size makes up more than // max_trash_db_ratio_ (default 25%) of the total DB size TEST_SYNC_POINT("DeleteScheduler::DeleteFile"); s = fs_->DeleteFile(file_path, IOOptions(), nullptr); if (s.ok()) { sst_file_manager_->OnDeleteFile(file_path); ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately, rate_bytes_per_sec %" PRIi64 ", total_trash_size %" PRIu64 " max_trash_db_ratio %lf", file_path.c_str(), rate_bytes_per_sec_.load(), total_trash_size_.load(), max_trash_db_ratio_.load()); RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY); } return s; } // Move file to trash std::string trash_file; s = MarkAsTrash(file_path, &trash_file); ROCKS_LOG_INFO(info_log_, "Mark file: %s as trash -- %s", trash_file.c_str(), s.ToString().c_str()); if (!s.ok()) { ROCKS_LOG_ERROR(info_log_, "Failed to mark %s as trash -- %s", file_path.c_str(), s.ToString().c_str()); s = fs_->DeleteFile(file_path, IOOptions(), nullptr); if (s.ok()) { sst_file_manager_->OnDeleteFile(file_path); ROCKS_LOG_INFO(info_log_, "Deleted file %s immediately", trash_file.c_str()); RecordTick(stats_.get(), FILES_DELETED_IMMEDIATELY); } return s; } RecordTick(stats_.get(), FILES_MARKED_TRASH); // Update the total trash size uint64_t trash_file_size = 0; fs_->GetFileSize(trash_file, IOOptions(), &trash_file_size, nullptr); total_trash_size_.fetch_add(trash_file_size); // Add file to delete queue { InstrumentedMutexLock l(&mu_); queue_.emplace(trash_file, dir_to_sync); pending_files_++; if (pending_files_ == 1) { cv_.SignalAll(); } } return s; } std::map DeleteScheduler::GetBackgroundErrors() { InstrumentedMutexLock l(&mu_); return bg_errors_; } const std::string DeleteScheduler::kTrashExtension = ".trash"; bool DeleteScheduler::IsTrashFile(const std::string& file_path) { return (file_path.size() >= kTrashExtension.size() && file_path.rfind(kTrashExtension) == file_path.size() - kTrashExtension.size()); } Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm, const std::string& path) { Status s; // Check if there are any files marked as trash in this path std::vector files_in_path; s = env->GetChildren(path, &files_in_path); if (!s.ok()) { return s; } for (const std::string& current_file : files_in_path) { if (!DeleteScheduler::IsTrashFile(current_file)) { // not a trash file, skip continue; } Status file_delete; std::string trash_file = path + "/" + current_file; if (sfm) { // We have an SstFileManager that will schedule the file delete sfm->OnAddFile(trash_file); file_delete = sfm->ScheduleFileDeletion(trash_file, path); } else { // Delete the file immediately file_delete = env->DeleteFile(trash_file); } if (s.ok() && !file_delete.ok()) { s = file_delete; } } return s; } Status DeleteScheduler::MarkAsTrash(const std::string& file_path, std::string* trash_file) { // Sanity check of the path size_t idx = file_path.rfind("/"); if (idx == std::string::npos || idx == file_path.size() - 1) { return Status::InvalidArgument("file_path is corrupted"); } Status s; if (DeleteScheduler::IsTrashFile(file_path)) { // This is already a trash file *trash_file = file_path; return s; } *trash_file = file_path + kTrashExtension; // TODO(tec) : Implement Env::RenameFileIfNotExist and remove // file_move_mu mutex. int cnt = 0; InstrumentedMutexLock l(&file_move_mu_); while (true) { s = fs_->FileExists(*trash_file, IOOptions(), nullptr); if (s.IsNotFound()) { // We found a path for our file in trash s = fs_->RenameFile(file_path, *trash_file, IOOptions(), nullptr); break; } else if (s.ok()) { // Name conflict, generate new random suffix *trash_file = file_path + std::to_string(cnt) + kTrashExtension; } else { // Error during FileExists call, we cannot continue break; } cnt++; } if (s.ok()) { sst_file_manager_->OnMoveFile(file_path, *trash_file); } return s; } void DeleteScheduler::BackgroundEmptyTrash() { TEST_SYNC_POINT("DeleteScheduler::BackgroundEmptyTrash"); while (true) { InstrumentedMutexLock l(&mu_); while (queue_.empty() && !closing_) { cv_.Wait(); } if (closing_) { return; } // Delete all files in queue_ uint64_t start_time = env_->NowMicros(); uint64_t total_deleted_bytes = 0; int64_t current_delete_rate = rate_bytes_per_sec_.load(); while (!queue_.empty() && !closing_) { if (current_delete_rate != rate_bytes_per_sec_.load()) { // User changed the delete rate current_delete_rate = rate_bytes_per_sec_.load(); start_time = env_->NowMicros(); total_deleted_bytes = 0; ROCKS_LOG_INFO(info_log_, "rate_bytes_per_sec is changed to %" PRIi64, current_delete_rate); } // Get new file to delete const FileAndDir& fad = queue_.front(); std::string path_in_trash = fad.fname; // We don't need to hold the lock while deleting the file mu_.Unlock(); uint64_t deleted_bytes = 0; bool is_complete = true; // Delete file from trash and update total_penlty value Status s = DeleteTrashFile(path_in_trash, fad.dir, &deleted_bytes, &is_complete); total_deleted_bytes += deleted_bytes; mu_.Lock(); if (is_complete) { queue_.pop(); } if (!s.ok()) { bg_errors_[path_in_trash] = s; } // Apply penalty if necessary uint64_t total_penalty; if (current_delete_rate > 0) { // rate limiting is enabled total_penalty = ((total_deleted_bytes * kMicrosInSecond) / current_delete_rate); ROCKS_LOG_INFO(info_log_, "Rate limiting is enabled with penalty %" PRIu64 "after deleting file %s", total_penalty, path_in_trash.c_str()); while (!closing_ && !cv_.TimedWait(start_time + total_penalty)) { } } else { // rate limiting is disabled total_penalty = 0; ROCKS_LOG_INFO(info_log_, "Rate limiting is disabled after deleting file %s", path_in_trash.c_str()); } TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait", &total_penalty); if (is_complete) { pending_files_--; } if (pending_files_ == 0) { // Unblock WaitForEmptyTrash since there are no more files waiting // to be deleted cv_.SignalAll(); } } } } Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, const std::string& dir_to_sync, uint64_t* deleted_bytes, bool* is_complete) { uint64_t file_size; Status s = fs_->GetFileSize(path_in_trash, IOOptions(), &file_size, nullptr); *is_complete = true; TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile"); if (s.ok()) { bool need_full_delete = true; if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) { uint64_t num_hard_links = 2; // We don't have to worry aobut data race between linking a new // file after the number of file link check and ftruncte because // the file is now in trash and no hardlink is supposed to create // to trash files by RocksDB. Status my_status = fs_->NumFileLinks(path_in_trash, IOOptions(), &num_hard_links, nullptr); if (my_status.ok()) { if (num_hard_links == 1) { std::unique_ptr wf; my_status = fs_->ReopenWritableFile(path_in_trash, FileOptions(), &wf, nullptr); if (my_status.ok()) { my_status = wf->Truncate(file_size - bytes_max_delete_chunk_, IOOptions(), nullptr); if (my_status.ok()) { TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync"); my_status = wf->Fsync(IOOptions(), nullptr); } } if (my_status.ok()) { *deleted_bytes = bytes_max_delete_chunk_; need_full_delete = false; *is_complete = false; } else { ROCKS_LOG_WARN(info_log_, "Failed to partially delete %s from trash -- %s", path_in_trash.c_str(), my_status.ToString().c_str()); } } else { ROCKS_LOG_INFO(info_log_, "Cannot delete %s slowly through ftruncate from trash " "as it has other links", path_in_trash.c_str()); } } else if (!num_link_error_printed_) { ROCKS_LOG_INFO( info_log_, "Cannot delete files slowly through ftruncate from trash " "as Env::NumFileLinks() returns error: %s", my_status.ToString().c_str()); num_link_error_printed_ = true; } } if (need_full_delete) { s = fs_->DeleteFile(path_in_trash, IOOptions(), nullptr); if (!dir_to_sync.empty()) { std::unique_ptr dir_obj; if (s.ok()) { s = fs_->NewDirectory(dir_to_sync, IOOptions(), &dir_obj, nullptr); } if (s.ok()) { s = dir_obj->Fsync(IOOptions(), nullptr); TEST_SYNC_POINT_CALLBACK( "DeleteScheduler::DeleteTrashFile::AfterSyncDir", reinterpret_cast(const_cast(&dir_to_sync))); } } *deleted_bytes = file_size; sst_file_manager_->OnDeleteFile(path_in_trash); } } if (!s.ok()) { // Error while getting file size or while deleting ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s", path_in_trash.c_str(), s.ToString().c_str()); *deleted_bytes = 0; } else { total_trash_size_.fetch_sub(*deleted_bytes); } return s; } void DeleteScheduler::WaitForEmptyTrash() { InstrumentedMutexLock l(&mu_); while (pending_files_ > 0 && !closing_) { cv_.Wait(); } } void DeleteScheduler::MaybeCreateBackgroundThread() { if (bg_thread_ == nullptr && rate_bytes_per_sec_.load() > 0) { bg_thread_.reset( new port::Thread(&DeleteScheduler::BackgroundEmptyTrash, this)); ROCKS_LOG_INFO(info_log_, "Created background thread for deletion scheduler with " "rate_bytes_per_sec: %" PRIi64, rate_bytes_per_sec_.load()); } } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/file/delete_scheduler.h000066400000000000000000000120741370372246700176410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include #include "monitoring/instrumented_mutex.h" #include "port/port.h" #include "rocksdb/file_system.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Env; class Logger; class SstFileManagerImpl; // DeleteScheduler allows the DB to enforce a rate limit on file deletion, // Instead of deleteing files immediately, files are marked as trash // and deleted in a background thread that apply sleep penlty between deletes // if they are happening in a rate faster than rate_bytes_per_sec, // // Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this // case DeleteScheduler will delete files immediately. class DeleteScheduler { public: DeleteScheduler(Env* env, FileSystem* fs, int64_t rate_bytes_per_sec, Logger* info_log, SstFileManagerImpl* sst_file_manager, double max_trash_db_ratio, uint64_t bytes_max_delete_chunk); ~DeleteScheduler(); // Return delete rate limit in bytes per second int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_.load(); } // Set delete rate limit in bytes per second void SetRateBytesPerSecond(int64_t bytes_per_sec) { rate_bytes_per_sec_.store(bytes_per_sec); MaybeCreateBackgroundThread(); } // Mark file as trash directory and schedule it's deletion. If force_bg is // set, it forces the file to always be deleted in the background thread, // except when rate limiting is disabled Status DeleteFile(const std::string& fname, const std::string& dir_to_sync, const bool force_bg = false); // Wait for all files being deleteing in the background to finish or for // destructor to be called. void WaitForEmptyTrash(); // Return a map containing errors that happened in BackgroundEmptyTrash // file_path => error status std::map GetBackgroundErrors(); uint64_t GetTotalTrashSize() { return total_trash_size_.load(); } // Return trash/DB size ratio where new files will be deleted immediately double GetMaxTrashDBRatio() { return max_trash_db_ratio_.load(); } // Update trash/DB size ratio where new files will be deleted immediately void SetMaxTrashDBRatio(double r) { assert(r >= 0); max_trash_db_ratio_.store(r); } static const std::string kTrashExtension; static bool IsTrashFile(const std::string& file_path); // Check if there are any .trash filse in path, and schedule their deletion // Or delete immediately if sst_file_manager is nullptr static Status CleanupDirectory(Env* env, SstFileManagerImpl* sfm, const std::string& path); void SetStatisticsPtr(const std::shared_ptr& stats) { stats_ = stats; } private: Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash); Status DeleteTrashFile(const std::string& path_in_trash, const std::string& dir_to_sync, uint64_t* deleted_bytes, bool* is_complete); void BackgroundEmptyTrash(); void MaybeCreateBackgroundThread(); Env* env_; FileSystem* fs_; // total size of trash files std::atomic total_trash_size_; // Maximum number of bytes that should be deleted per second std::atomic rate_bytes_per_sec_; // Mutex to protect queue_, pending_files_, bg_errors_, closing_ InstrumentedMutex mu_; struct FileAndDir { FileAndDir(const std::string& f, const std::string& d) : fname(f), dir(d) {} std::string fname; std::string dir; // empty will be skipped. }; // Queue of trash files that need to be deleted std::queue queue_; // Number of trash files that are waiting to be deleted int32_t pending_files_; uint64_t bytes_max_delete_chunk_; // Errors that happened in BackgroundEmptyTrash (file_path => error) std::map bg_errors_; bool num_link_error_printed_ = false; // Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop bool closing_; // Condition variable signaled in these conditions // - pending_files_ value change from 0 => 1 // - pending_files_ value change from 1 => 0 // - closing_ value is set to true InstrumentedCondVar cv_; // Background thread running BackgroundEmptyTrash std::unique_ptr bg_thread_; // Mutex to protect threads from file name conflicts InstrumentedMutex file_move_mu_; Logger* info_log_; SstFileManagerImpl* sst_file_manager_; // If the trash size constitutes for more than this fraction of the total DB // size we will start deleting new files passed to DeleteScheduler // immediately std::atomic max_trash_db_ratio_; static const uint64_t kMicrosInSecond = 1000 * 1000LL; std::shared_ptr stats_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/file/delete_scheduler_test.cc000066400000000000000000000650511370372246700210410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include "file/delete_scheduler.h" #include "file/sst_file_manager_impl.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" #ifndef ROCKSDB_LITE namespace ROCKSDB_NAMESPACE { class DeleteSchedulerTest : public testing::Test { public: DeleteSchedulerTest() : env_(Env::Default()) { const int kNumDataDirs = 3; dummy_files_dirs_.reserve(kNumDataDirs); for (size_t i = 0; i < kNumDataDirs; ++i) { dummy_files_dirs_.emplace_back( test::PerThreadDBPath(env_, "delete_scheduler_dummy_data_dir") + ToString(i)); DestroyAndCreateDir(dummy_files_dirs_.back()); } stats_ = ROCKSDB_NAMESPACE::CreateDBStatistics(); } ~DeleteSchedulerTest() override { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); for (const auto& dummy_files_dir : dummy_files_dirs_) { test::DestroyDir(env_, dummy_files_dir); } } void DestroyAndCreateDir(const std::string& dir) { ASSERT_OK(test::DestroyDir(env_, dir)); EXPECT_OK(env_->CreateDir(dir)); } int CountNormalFiles(size_t dummy_files_dirs_idx = 0) { std::vector files_in_dir; EXPECT_OK(env_->GetChildren(dummy_files_dirs_[dummy_files_dirs_idx], &files_in_dir)); int normal_cnt = 0; for (auto& f : files_in_dir) { if (!DeleteScheduler::IsTrashFile(f) && f != "." && f != "..") { normal_cnt++; } } return normal_cnt; } int CountTrashFiles(size_t dummy_files_dirs_idx = 0) { std::vector files_in_dir; EXPECT_OK(env_->GetChildren(dummy_files_dirs_[dummy_files_dirs_idx], &files_in_dir)); int trash_cnt = 0; for (auto& f : files_in_dir) { if (DeleteScheduler::IsTrashFile(f)) { trash_cnt++; } } return trash_cnt; } std::string NewDummyFile(const std::string& file_name, uint64_t size = 1024, size_t dummy_files_dirs_idx = 0) { std::string file_path = dummy_files_dirs_[dummy_files_dirs_idx] + "/" + file_name; std::unique_ptr f; env_->NewWritableFile(file_path, &f, EnvOptions()); std::string data(size, 'A'); EXPECT_OK(f->Append(data)); EXPECT_OK(f->Close()); sst_file_mgr_->OnAddFile(file_path, false); return file_path; } void NewDeleteScheduler() { // Tests in this file are for DeleteScheduler component and don't create any // DBs, so we need to set max_trash_db_ratio to 100% (instead of default // 25%) std::shared_ptr fs(std::make_shared(env_)); sst_file_mgr_.reset( new SstFileManagerImpl(env_, fs, nullptr, rate_bytes_per_sec_, /* max_trash_db_ratio= */ 1.1, 128 * 1024)); delete_scheduler_ = sst_file_mgr_->delete_scheduler(); sst_file_mgr_->SetStatisticsPtr(stats_); } Env* env_; std::vector dummy_files_dirs_; int64_t rate_bytes_per_sec_; DeleteScheduler* delete_scheduler_; std::unique_ptr sst_file_mgr_; std::shared_ptr stats_; }; // Test the basic functionality of DeleteScheduler (Rate Limiting). // 1- Create 100 dummy files // 2- Delete the 100 dummy files using DeleteScheduler // --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 3- Wait for DeleteScheduler to delete all files in trash // 4- Verify that BackgroundEmptyTrash used to correct penlties for the files // 5- Make sure that all created files were completely deleted TEST_F(DeleteSchedulerTest, BasicRateLimiting) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::BasicRateLimiting:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); std::vector penalties; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); int dir_synced = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile::AfterSyncDir", [&](void* arg) { dir_synced++; std::string* dir = reinterpret_cast(arg); EXPECT_EQ(dummy_files_dirs_[0], *dir); }); int num_files = 100; // 100 files uint64_t file_size = 1024; // every file is 1 kb std::vector delete_kbs_per_sec = {512, 200, 100, 50, 25}; for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { penalties.clear(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); DestroyAndCreateDir(dummy_files_dirs_[0]); rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; NewDeleteScheduler(); dir_synced = 0; // Create 100 dummy files, every file is 1 Kb std::vector generated_files; for (int i = 0; i < num_files; i++) { std::string file_name = "file" + ToString(i) + ".data"; generated_files.push_back(NewDummyFile(file_name, file_size)); } // Delete dummy files and measure time spent to empty trash for (int i = 0; i < num_files; i++) { ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], dummy_files_dirs_[0])); } ASSERT_EQ(CountNormalFiles(), 0); uint64_t delete_start_time = env_->NowMicros(); TEST_SYNC_POINT("DeleteSchedulerTest::BasicRateLimiting:1"); delete_scheduler_->WaitForEmptyTrash(); uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 0); uint64_t total_files_size = 0; uint64_t expected_penlty = 0; ASSERT_EQ(penalties.size(), num_files); for (int i = 0; i < num_files; i++) { total_files_size += file_size; expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec_); ASSERT_EQ(expected_penlty, penalties[i]); } ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); ASSERT_EQ(num_files, dir_synced); ASSERT_EQ(CountTrashFiles(), 0); ASSERT_EQ(num_files, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(DeleteSchedulerTest, MultiDirectoryDeletionsScheduled) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::MultiDbPathDeletionsScheduled:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1 << 20; // 1MB NewDeleteScheduler(); // Generate dummy files in multiple directories const size_t kNumFiles = dummy_files_dirs_.size(); const size_t kFileSize = 1 << 10; // 1KB std::vector generated_files; for (size_t i = 0; i < kNumFiles; i++) { generated_files.push_back(NewDummyFile("file", kFileSize, i)); ASSERT_EQ(1, CountNormalFiles(i)); } // Mark dummy files as trash for (size_t i = 0; i < kNumFiles; i++) { ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], "")); ASSERT_EQ(0, CountNormalFiles(i)); ASSERT_EQ(1, CountTrashFiles(i)); } TEST_SYNC_POINT("DeleteSchedulerTest::MultiDbPathDeletionsScheduled:1"); delete_scheduler_->WaitForEmptyTrash(); // Verify dummy files eventually got deleted for (size_t i = 0; i < kNumFiles; i++) { ASSERT_EQ(0, CountNormalFiles(i)); ASSERT_EQ(0, CountTrashFiles(i)); } ASSERT_EQ(kNumFiles, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } // Same as the BasicRateLimiting test but delete files in multiple threads. // 1- Create 100 dummy files // 2- Delete the 100 dummy files using DeleteScheduler using 10 threads // --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 3- Wait for DeleteScheduler to delete all files in queue // 4- Verify that BackgroundEmptyTrash used to correct penlties for the files // 5- Make sure that all created files were completely deleted TEST_F(DeleteSchedulerTest, RateLimitingMultiThreaded) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::RateLimitingMultiThreaded:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); std::vector penalties; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); int thread_cnt = 10; int num_files = 10; // 10 files per thread uint64_t file_size = 1024; // every file is 1 kb std::vector delete_kbs_per_sec = {512, 200, 100, 50, 25}; for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { penalties.clear(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); DestroyAndCreateDir(dummy_files_dirs_[0]); rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; NewDeleteScheduler(); // Create 100 dummy files, every file is 1 Kb std::vector generated_files; for (int i = 0; i < num_files * thread_cnt; i++) { std::string file_name = "file" + ToString(i) + ".data"; generated_files.push_back(NewDummyFile(file_name, file_size)); } // Delete dummy files using 10 threads and measure time spent to empty trash std::atomic thread_num(0); std::vector threads; std::function delete_thread = [&]() { int idx = thread_num.fetch_add(1); int range_start = idx * num_files; int range_end = range_start + num_files; for (int j = range_start; j < range_end; j++) { ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[j], "")); } }; for (int i = 0; i < thread_cnt; i++) { threads.emplace_back(delete_thread); } for (size_t i = 0; i < threads.size(); i++) { threads[i].join(); } uint64_t delete_start_time = env_->NowMicros(); TEST_SYNC_POINT("DeleteSchedulerTest::RateLimitingMultiThreaded:1"); delete_scheduler_->WaitForEmptyTrash(); uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 0); uint64_t total_files_size = 0; uint64_t expected_penlty = 0; ASSERT_EQ(penalties.size(), num_files * thread_cnt); for (int i = 0; i < num_files * thread_cnt; i++) { total_files_size += file_size; expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec_); ASSERT_EQ(expected_penlty, penalties[i]); } ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); ASSERT_EQ(CountNormalFiles(), 0); ASSERT_EQ(CountTrashFiles(), 0); ASSERT_EQ(num_files * thread_cnt, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } // Disable rate limiting by setting rate_bytes_per_sec_ to 0 and make sure // that when DeleteScheduler delete a file it delete it immediately and don't // move it to trash TEST_F(DeleteSchedulerTest, DisableRateLimiting) { int bg_delete_file = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 0; NewDeleteScheduler(); constexpr int num_files = 10; for (int i = 0; i < num_files; i++) { // Every file we delete will be deleted immediately std::string dummy_file = NewDummyFile("dummy.data"); ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file, "")); ASSERT_TRUE(env_->FileExists(dummy_file).IsNotFound()); ASSERT_EQ(CountNormalFiles(), 0); ASSERT_EQ(CountTrashFiles(), 0); } ASSERT_EQ(bg_delete_file, 0); ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ(num_files, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } // Testing that moving files to trash with the same name is not a problem // 1- Create 10 files with the same name "conflict.data" // 2- Delete the 10 files using DeleteScheduler // 3- Make sure that trash directory contain 10 files ("conflict.data" x 10) // --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 4- Make sure that files are deleted from trash TEST_F(DeleteSchedulerTest, ConflictNames) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::ConflictNames:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec NewDeleteScheduler(); // Create "conflict.data" and move it to trash 10 times for (int i = 0; i < 10; i++) { std::string dummy_file = NewDummyFile("conflict.data"); ASSERT_OK(delete_scheduler_->DeleteFile(dummy_file, "")); } ASSERT_EQ(CountNormalFiles(), 0); // 10 files ("conflict.data" x 10) in trash ASSERT_EQ(CountTrashFiles(), 10); // Hold BackgroundEmptyTrash TEST_SYNC_POINT("DeleteSchedulerTest::ConflictNames:1"); delete_scheduler_->WaitForEmptyTrash(); ASSERT_EQ(CountTrashFiles(), 0); auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 0); ASSERT_EQ(10, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } // 1- Create 10 dummy files // 2- Delete the 10 files using DeleteScheduler (move them to trsah) // 3- Delete the 10 files directly (using env_->DeleteFile) // --- Hold DeleteScheduler::BackgroundEmptyTrash --- // 4- Make sure that DeleteScheduler failed to delete the 10 files and // reported 10 background errors TEST_F(DeleteSchedulerTest, BackgroundError) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::BackgroundError:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 Mb/sec NewDeleteScheduler(); // Generate 10 dummy files and move them to trash for (int i = 0; i < 10; i++) { std::string file_name = "data_" + ToString(i) + ".data"; ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), "")); } ASSERT_EQ(CountNormalFiles(), 0); ASSERT_EQ(CountTrashFiles(), 10); // Delete 10 files from trash, this will cause background errors in // BackgroundEmptyTrash since we already deleted the files it was // goind to delete for (int i = 0; i < 10; i++) { std::string file_name = "data_" + ToString(i) + ".data.trash"; ASSERT_OK(env_->DeleteFile(dummy_files_dirs_[0] + "/" + file_name)); } // Hold BackgroundEmptyTrash TEST_SYNC_POINT("DeleteSchedulerTest::BackgroundError:1"); delete_scheduler_->WaitForEmptyTrash(); auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 10); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } // 1- Create 10 dummy files // 2- Delete 10 dummy files using DeleteScheduler // 3- Wait for DeleteScheduler to delete all files in queue // 4- Make sure all files in trash directory were deleted // 5- Repeat previous steps 5 times TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) { int bg_delete_file = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec NewDeleteScheduler(); // Move files to trash, wait for empty trash, start again for (int run = 1; run <= 5; run++) { // Generate 10 dummy files and move them to trash for (int i = 0; i < 10; i++) { std::string file_name = "data_" + ToString(i) + ".data"; ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), "")); } ASSERT_EQ(CountNormalFiles(), 0); delete_scheduler_->WaitForEmptyTrash(); ASSERT_EQ(bg_delete_file, 10 * run); ASSERT_EQ(CountTrashFiles(), 0); auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 0); ASSERT_EQ(10, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ(0, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); } ASSERT_EQ(bg_delete_file, 50); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); } TEST_F(DeleteSchedulerTest, DeletePartialFile) { int bg_delete_file = 0; int bg_fsync = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void*) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:Fsync", [&](void*) { bg_fsync++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec NewDeleteScheduler(); // Should delete in 4 batch ASSERT_OK( delete_scheduler_->DeleteFile(NewDummyFile("data_1", 500 * 1024), "")); ASSERT_OK( delete_scheduler_->DeleteFile(NewDummyFile("data_2", 100 * 1024), "")); // Should delete in 2 batch ASSERT_OK( delete_scheduler_->DeleteFile(NewDummyFile("data_2", 200 * 1024), "")); delete_scheduler_->WaitForEmptyTrash(); auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 0); ASSERT_EQ(7, bg_delete_file); ASSERT_EQ(4, bg_fsync); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); } #ifdef OS_LINUX TEST_F(DeleteSchedulerTest, NoPartialDeleteWithLink) { int bg_delete_file = 0; int bg_fsync = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void*) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:Fsync", [&](void*) { bg_fsync++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec NewDeleteScheduler(); std::string file1 = NewDummyFile("data_1", 500 * 1024); std::string file2 = NewDummyFile("data_2", 100 * 1024); ASSERT_OK(env_->LinkFile(file1, dummy_files_dirs_[0] + "/data_1b")); ASSERT_OK(env_->LinkFile(file2, dummy_files_dirs_[0] + "/data_2b")); // Should delete in 4 batch if there is no hardlink ASSERT_OK(delete_scheduler_->DeleteFile(file1, "")); ASSERT_OK(delete_scheduler_->DeleteFile(file2, "")); delete_scheduler_->WaitForEmptyTrash(); auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 0); ASSERT_EQ(2, bg_delete_file); ASSERT_EQ(0, bg_fsync); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); } #endif // 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec) // 2- Delete 100 files using DeleteScheduler // 3- Delete the DeleteScheduler (call the destructor while queue is not empty) // 4- Make sure that not all files were deleted from trash and that // DeleteScheduler background thread did not delete all files TEST_F(DeleteSchedulerTest, DestructorWithNonEmptyQueue) { int bg_delete_file = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 1; // 1 Byte / sec NewDeleteScheduler(); for (int i = 0; i < 100; i++) { std::string file_name = "data_" + ToString(i) + ".data"; ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), "")); } // Deleting 100 files will need >28 hours to delete // we will delete the DeleteScheduler while delete queue is not empty sst_file_mgr_.reset(); ASSERT_LT(bg_delete_file, 100); ASSERT_GT(CountTrashFiles(), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DeleteSchedulerTest, DISABLED_DynamicRateLimiting1) { std::vector penalties; int bg_delete_file = 0; int fg_delete_file = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::BackgroundEmptyTrash:Wait", [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DeleteSchedulerTest::DynamicRateLimiting1:1", "DeleteScheduler::BackgroundEmptyTrash"}, }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); rate_bytes_per_sec_ = 0; // Disable rate limiting initially NewDeleteScheduler(); int num_files = 10; // 10 files uint64_t file_size = 1024; // every file is 1 kb std::vector delete_kbs_per_sec = {512, 200, 0, 100, 50, -2, 25}; for (size_t t = 0; t < delete_kbs_per_sec.size(); t++) { penalties.clear(); bg_delete_file = 0; fg_delete_file = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); DestroyAndCreateDir(dummy_files_dirs_[0]); rate_bytes_per_sec_ = delete_kbs_per_sec[t] * 1024; delete_scheduler_->SetRateBytesPerSecond(rate_bytes_per_sec_); // Create 100 dummy files, every file is 1 Kb std::vector generated_files; for (int i = 0; i < num_files; i++) { std::string file_name = "file" + ToString(i) + ".data"; generated_files.push_back(NewDummyFile(file_name, file_size)); } // Delete dummy files and measure time spent to empty trash for (int i = 0; i < num_files; i++) { ASSERT_OK(delete_scheduler_->DeleteFile(generated_files[i], "")); } ASSERT_EQ(CountNormalFiles(), 0); if (rate_bytes_per_sec_ > 0) { uint64_t delete_start_time = env_->NowMicros(); TEST_SYNC_POINT("DeleteSchedulerTest::DynamicRateLimiting1:1"); delete_scheduler_->WaitForEmptyTrash(); uint64_t time_spent_deleting = env_->NowMicros() - delete_start_time; auto bg_errors = delete_scheduler_->GetBackgroundErrors(); ASSERT_EQ(bg_errors.size(), 0); uint64_t total_files_size = 0; uint64_t expected_penlty = 0; ASSERT_EQ(penalties.size(), num_files); for (int i = 0; i < num_files; i++) { total_files_size += file_size; expected_penlty = ((total_files_size * 1000000) / rate_bytes_per_sec_); ASSERT_EQ(expected_penlty, penalties[i]); } ASSERT_GT(time_spent_deleting, expected_penlty * 0.9); ASSERT_EQ(bg_delete_file, num_files); ASSERT_EQ(fg_delete_file, 0); } else { ASSERT_EQ(penalties.size(), 0); ASSERT_EQ(bg_delete_file, 0); ASSERT_EQ(fg_delete_file, num_files); } ASSERT_EQ(CountTrashFiles(), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } TEST_F(DeleteSchedulerTest, ImmediateDeleteOn25PercDBSize) { int bg_delete_file = 0; int fg_delete_file = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteTrashFile:DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { fg_delete_file++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); int num_files = 100; // 100 files uint64_t file_size = 1024 * 10; // 100 KB as a file size rate_bytes_per_sec_ = 1; // 1 byte per sec (very slow trash delete) NewDeleteScheduler(); delete_scheduler_->SetMaxTrashDBRatio(0.25); std::vector generated_files; for (int i = 0; i < num_files; i++) { std::string file_name = "file" + ToString(i) + ".data"; generated_files.push_back(NewDummyFile(file_name, file_size)); } for (std::string& file_name : generated_files) { delete_scheduler_->DeleteFile(file_name, ""); } // When we end up with 26 files in trash we will start // deleting new files immediately ASSERT_EQ(fg_delete_file, 74); ASSERT_EQ(26, stats_->getAndResetTickerCount(FILES_MARKED_TRASH)); ASSERT_EQ(74, stats_->getAndResetTickerCount(FILES_DELETED_IMMEDIATELY)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DeleteSchedulerTest, IsTrashCheck) { // Trash files ASSERT_TRUE(DeleteScheduler::IsTrashFile("x.trash")); ASSERT_TRUE(DeleteScheduler::IsTrashFile(".trash")); ASSERT_TRUE(DeleteScheduler::IsTrashFile("abc.sst.trash")); ASSERT_TRUE(DeleteScheduler::IsTrashFile("/a/b/c/abc..sst.trash")); ASSERT_TRUE(DeleteScheduler::IsTrashFile("log.trash")); ASSERT_TRUE(DeleteScheduler::IsTrashFile("^^^^^.log.trash")); ASSERT_TRUE(DeleteScheduler::IsTrashFile("abc.t.trash")); // Not trash files ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.sst")); ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.txt")); ASSERT_FALSE(DeleteScheduler::IsTrashFile("/a/b/c/abc.sst")); ASSERT_FALSE(DeleteScheduler::IsTrashFile("/a/b/c/abc.sstrash")); ASSERT_FALSE(DeleteScheduler::IsTrashFile("^^^^^.trashh")); ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.ttrash")); ASSERT_FALSE(DeleteScheduler::IsTrashFile(".ttrash")); ASSERT_FALSE(DeleteScheduler::IsTrashFile("abc.trashx")); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else int main(int /*argc*/, char** /*argv*/) { printf("DeleteScheduler is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/file/file_prefetch_buffer.cc000066400000000000000000000127251370372246700206320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/file_prefetch_buffer.h" #include #include #include "file/random_access_file_reader.h" #include "monitoring/histogram.h" #include "monitoring/iostats_context_imp.h" #include "port/port.h" #include "test_util/sync_point.h" #include "util/random.h" #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n, bool for_compaction) { if (!enable_ || reader == nullptr) { return Status::OK(); } size_t alignment = reader->file()->GetRequiredBufferAlignment(); size_t offset_ = static_cast(offset); uint64_t rounddown_offset = Rounddown(offset_, alignment); uint64_t roundup_end = Roundup(offset_ + n, alignment); uint64_t roundup_len = roundup_end - rounddown_offset; assert(roundup_len >= alignment); assert(roundup_len % alignment == 0); // Check if requested bytes are in the existing buffer_. // If all bytes exist -- return. // If only a few bytes exist -- reuse them & read only what is really needed. // This is typically the case of incremental reading of data. // If no bytes exist in buffer -- full pread. Status s; uint64_t chunk_offset_in_buffer = 0; uint64_t chunk_len = 0; bool copy_data_to_new_buffer = false; if (buffer_.CurrentSize() > 0 && offset >= buffer_offset_ && offset <= buffer_offset_ + buffer_.CurrentSize()) { if (offset + n <= buffer_offset_ + buffer_.CurrentSize()) { // All requested bytes are already in the buffer. So no need to Read // again. return s; } else { // Only a few requested bytes are in the buffer. memmove those chunk of // bytes to the beginning, and memcpy them back into the new buffer if a // new buffer is created. chunk_offset_in_buffer = Rounddown(static_cast(offset - buffer_offset_), alignment); chunk_len = buffer_.CurrentSize() - chunk_offset_in_buffer; assert(chunk_offset_in_buffer % alignment == 0); assert(chunk_len % alignment == 0); assert(chunk_offset_in_buffer + chunk_len <= buffer_offset_ + buffer_.CurrentSize()); if (chunk_len > 0) { copy_data_to_new_buffer = true; } else { // this reset is not necessary, but just to be safe. chunk_offset_in_buffer = 0; } } } // Create a new buffer only if current capacity is not sufficient, and memcopy // bytes from old buffer if needed (i.e., if chunk_len is greater than 0). if (buffer_.Capacity() < roundup_len) { buffer_.Alignment(alignment); buffer_.AllocateNewBuffer(static_cast(roundup_len), copy_data_to_new_buffer, chunk_offset_in_buffer, static_cast(chunk_len)); } else if (chunk_len > 0) { // New buffer not needed. But memmove bytes from tail to the beginning since // chunk_len is greater than 0. buffer_.RefitTail(static_cast(chunk_offset_in_buffer), static_cast(chunk_len)); } Slice result; size_t read_len = static_cast(roundup_len - chunk_len); s = reader->Read(IOOptions(), rounddown_offset + chunk_len, read_len, &result, buffer_.BufferStart() + chunk_len, nullptr, for_compaction); #ifndef NDEBUG if (!s.ok() || result.size() < read_len) { // Fake an IO error to force db_stress fault injection to ignore // truncated read errors IGNORE_STATUS_IF_ERROR(Status::IOError()); } #endif if (s.ok()) { buffer_offset_ = rounddown_offset; buffer_.Size(static_cast(chunk_len) + result.size()); } return s; } bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n, Slice* result, bool for_compaction) { if (track_min_offset_ && offset < min_offset_read_) { min_offset_read_ = static_cast(offset); } if (!enable_ || offset < buffer_offset_) { return false; } // If the buffer contains only a few of the requested bytes: // If readahead is enabled: prefetch the remaining bytes + readadhead bytes // and satisfy the request. // If readahead is not enabled: return false. if (offset + n > buffer_offset_ + buffer_.CurrentSize()) { if (readahead_size_ > 0) { assert(file_reader_ != nullptr); assert(max_readahead_size_ >= readahead_size_); Status s; if (for_compaction) { s = Prefetch(file_reader_, offset, std::max(n, readahead_size_), for_compaction); } else { s = Prefetch(file_reader_, offset, n + readahead_size_, for_compaction); } if (!s.ok()) { return false; } readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2); } else { return false; } } uint64_t offset_in_buffer = offset - buffer_offset_; *result = Slice(buffer_.BufferStart() + offset_in_buffer, n); return true; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/file_prefetch_buffer.h000066400000000000000000000102401370372246700204620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "file/random_access_file_reader.h" #include "port/port.h" #include "rocksdb/env.h" #include "util/aligned_buffer.h" namespace ROCKSDB_NAMESPACE { // FilePrefetchBuffer is a smart buffer to store and read data from a file. class FilePrefetchBuffer { public: // Constructor. // // All arguments are optional. // file_reader : the file reader to use. Can be a nullptr. // readahead_size : the initial readahead size. // max_readahead_size : the maximum readahead size. // If max_readahead_size > readahead_size, the readahead size will be // doubled on every IO until max_readahead_size is hit. // Typically this is set as a multiple of readahead_size. // max_readahead_size should be greater than equal to readahead_size. // enable : controls whether reading from the buffer is enabled. // If false, TryReadFromCache() always return false, and we only take stats // for the minimum offset if track_min_offset = true. // track_min_offset : Track the minimum offset ever read and collect stats on // it. Used for adaptable readahead of the file footer/metadata. // // Automatic readhead is enabled for a file if file_reader, readahead_size, // and max_readahead_size are passed in. // If file_reader is a nullptr, setting readadhead_size and max_readahead_size // does not make any sense. So it does nothing. // A user can construct a FilePrefetchBuffer without any arguments, but use // `Prefetch` to load data into the buffer. FilePrefetchBuffer(RandomAccessFileReader* file_reader = nullptr, size_t readadhead_size = 0, size_t max_readahead_size = 0, bool enable = true, bool track_min_offset = false) : buffer_offset_(0), file_reader_(file_reader), readahead_size_(readadhead_size), max_readahead_size_(max_readahead_size), min_offset_read_(port::kMaxSizet), enable_(enable), track_min_offset_(track_min_offset) {} // Load data into the buffer from a file. // reader : the file reader. // offset : the file offset to start reading from. // n : the number of bytes to read. // for_compaction : if prefetch is done for compaction read. Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n, bool for_compaction = false); // Tries returning the data for a file raed from this buffer, if that data is // in the buffer. // It handles tracking the minimum read offset if track_min_offset = true. // It also does the exponential readahead when readadhead_size is set as part // of the constructor. // // offset : the file offset. // n : the number of bytes. // result : output buffer to put the data into. // for_compaction : if cache read is done for compaction read. bool TryReadFromCache(uint64_t offset, size_t n, Slice* result, bool for_compaction = false); // The minimum `offset` ever passed to TryReadFromCache(). This will nly be // tracked if track_min_offset = true. size_t min_offset_read() const { return min_offset_read_; } private: AlignedBuffer buffer_; uint64_t buffer_offset_; RandomAccessFileReader* file_reader_; size_t readahead_size_; size_t max_readahead_size_; // The minimum `offset` ever passed to TryReadFromCache(). size_t min_offset_read_; // if false, TryReadFromCache() always return false, and we only take stats // for track_min_offset_ if track_min_offset_ = true bool enable_; // If true, track minimum `offset` ever passed to TryReadFromCache(), which // can be fetched from min_offset_read(). bool track_min_offset_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/file_util.cc000066400000000000000000000144721370372246700164570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "file/file_util.h" #include #include #include "file/random_access_file_reader.h" #include "file/sequence_file_reader.h" #include "file/sst_file_manager_impl.h" #include "file/writable_file_writer.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { // Utility function to copy a file up to a specified length IOStatus CopyFile(FileSystem* fs, const std::string& source, const std::string& destination, uint64_t size, bool use_fsync) { const FileOptions soptions; IOStatus io_s; std::unique_ptr src_reader; std::unique_ptr dest_writer; { std::unique_ptr srcfile; io_s = fs->NewSequentialFile(source, soptions, &srcfile, nullptr); if (!io_s.ok()) { return io_s; } std::unique_ptr destfile; io_s = fs->NewWritableFile(destination, soptions, &destfile, nullptr); if (!io_s.ok()) { return io_s; } if (size == 0) { // default argument means copy everything io_s = fs->GetFileSize(source, IOOptions(), &size, nullptr); if (!io_s.ok()) { return io_s; } } src_reader.reset(new SequentialFileReader(std::move(srcfile), source)); dest_writer.reset( new WritableFileWriter(std::move(destfile), destination, soptions)); } char buffer[4096]; Slice slice; while (size > 0) { size_t bytes_to_read = std::min(sizeof(buffer), static_cast(size)); io_s = status_to_io_status(src_reader->Read(bytes_to_read, &slice, buffer)); if (!io_s.ok()) { return io_s; } if (slice.size() == 0) { return IOStatus::Corruption("file too small"); } io_s = dest_writer->Append(slice); if (!io_s.ok()) { return io_s; } size -= slice.size(); } return dest_writer->Sync(use_fsync); } // Utility function to create a file with the provided contents IOStatus CreateFile(FileSystem* fs, const std::string& destination, const std::string& contents, bool use_fsync) { const EnvOptions soptions; IOStatus io_s; std::unique_ptr dest_writer; std::unique_ptr destfile; io_s = fs->NewWritableFile(destination, soptions, &destfile, nullptr); if (!io_s.ok()) { return io_s; } dest_writer.reset( new WritableFileWriter(std::move(destfile), destination, soptions)); io_s = dest_writer->Append(Slice(contents)); if (!io_s.ok()) { return io_s; } return dest_writer->Sync(use_fsync); } Status DeleteDBFile(const ImmutableDBOptions* db_options, const std::string& fname, const std::string& dir_to_sync, const bool force_bg, const bool force_fg) { #ifndef ROCKSDB_LITE SstFileManagerImpl* sfm = static_cast(db_options->sst_file_manager.get()); if (sfm && !force_fg) { return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg); } else { return db_options->env->DeleteFile(fname); } #else (void)dir_to_sync; (void)force_bg; (void)force_fg; // SstFileManager is not supported in ROCKSDB_LITE // Delete file immediately return db_options->env->DeleteFile(fname); #endif } bool IsWalDirSameAsDBPath(const ImmutableDBOptions* db_options) { bool same = false; assert(!db_options->db_paths.empty()); Status s = db_options->env->AreFilesSame(db_options->wal_dir, db_options->db_paths[0].path, &same); if (s.IsNotSupported()) { same = db_options->wal_dir == db_options->db_paths[0].path; } return same; } IOStatus GenerateOneFileChecksum(FileSystem* fs, const std::string& file_path, FileChecksumGenFactory* checksum_factory, std::string* file_checksum, std::string* file_checksum_func_name, size_t verify_checksums_readahead_size, bool allow_mmap_reads) { if (checksum_factory == nullptr) { return IOStatus::InvalidArgument("Checksum factory is invalid"); } assert(file_checksum != nullptr); assert(file_checksum_func_name != nullptr); FileChecksumGenContext gen_context; std::unique_ptr checksum_generator = checksum_factory->CreateFileChecksumGenerator(gen_context); uint64_t size; IOStatus io_s; std::unique_ptr reader; { std::unique_ptr r_file; io_s = fs->NewRandomAccessFile(file_path, FileOptions(), &r_file, nullptr); if (!io_s.ok()) { return io_s; } io_s = fs->GetFileSize(file_path, IOOptions(), &size, nullptr); if (!io_s.ok()) { return io_s; } reader.reset(new RandomAccessFileReader(std::move(r_file), file_path)); } // Found that 256 KB readahead size provides the best performance, based on // experiments, for auto readahead. Experiment data is in PR #3282. size_t default_max_read_ahead_size = 256 * 1024; size_t readahead_size = (verify_checksums_readahead_size != 0) ? verify_checksums_readahead_size : default_max_read_ahead_size; FilePrefetchBuffer prefetch_buffer( reader.get(), readahead_size /* readadhead_size */, readahead_size /* max_readahead_size */, !allow_mmap_reads /* enable */); Slice slice; uint64_t offset = 0; while (size > 0) { size_t bytes_to_read = static_cast(std::min(uint64_t{readahead_size}, size)); if (!prefetch_buffer.TryReadFromCache(offset, bytes_to_read, &slice, false)) { return IOStatus::Corruption("file read failed"); } if (slice.size() == 0) { return IOStatus::Corruption("file too small"); } checksum_generator->Update(slice.data(), slice.size()); size -= slice.size(); offset += slice.size(); } checksum_generator->Finalize(); *file_checksum = checksum_generator->GetChecksum(); *file_checksum_func_name = checksum_generator->Name(); return IOStatus::OK(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/file_util.h000066400000000000000000000040101370372246700163040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include "file/filename.h" #include "options/db_options.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/sst_file_writer.h" #include "rocksdb/status.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { // use_fsync maps to options.use_fsync, which determines the way that // the file is synced after copying. extern IOStatus CopyFile(FileSystem* fs, const std::string& source, const std::string& destination, uint64_t size, bool use_fsync); extern IOStatus CreateFile(FileSystem* fs, const std::string& destination, const std::string& contents, bool use_fsync); extern Status DeleteDBFile(const ImmutableDBOptions* db_options, const std::string& fname, const std::string& path_to_sync, const bool force_bg, const bool force_fg); extern bool IsWalDirSameAsDBPath(const ImmutableDBOptions* db_options); extern IOStatus GenerateOneFileChecksum( FileSystem* fs, const std::string& file_path, FileChecksumGenFactory* checksum_factory, std::string* file_checksum, std::string* file_checksum_func_name, size_t verify_checksums_readahead_size, bool allow_mmap_reads); inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro, Env* env, IOOptions& opts) { if (!env) { env = Env::Default(); } if (ro.deadline.count()) { std::chrono::microseconds now = std::chrono::microseconds(env->NowMicros()); if (now > ro.deadline) { return IOStatus::TimedOut("Deadline exceeded"); } opts.timeout = ro.deadline - now; } return IOStatus::OK(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/filename.cc000066400000000000000000000344111370372246700162560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/filename.h" #include #include #include #include #include "file/writable_file_writer.h" #include "logging/logging.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" #include "util/stop_watch.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { static const std::string kRocksDbTFileExt = "sst"; static const std::string kLevelDbTFileExt = "ldb"; static const std::string kRocksDBBlobFileExt = "blob"; // Given a path, flatten the path name by replacing all chars not in // {[0-9,a-z,A-Z,-,_,.]} with _. And append '_LOG\0' at the end. // Return the number of chars stored in dest not including the trailing '\0'. static size_t GetInfoLogPrefix(const std::string& path, char* dest, int len) { const char suffix[] = "_LOG"; size_t write_idx = 0; size_t i = 0; size_t src_len = path.size(); while (i < src_len && write_idx < len - sizeof(suffix)) { if ((path[i] >= 'a' && path[i] <= 'z') || (path[i] >= '0' && path[i] <= '9') || (path[i] >= 'A' && path[i] <= 'Z') || path[i] == '-' || path[i] == '.' || path[i] == '_'){ dest[write_idx++] = path[i]; } else { if (i > 0) { dest[write_idx++] = '_'; } } i++; } assert(sizeof(suffix) <= len - write_idx); // "\0" is automatically added by snprintf snprintf(dest + write_idx, len - write_idx, suffix); write_idx += sizeof(suffix) - 1; return write_idx; } static std::string MakeFileName(uint64_t number, const char* suffix) { char buf[100]; snprintf(buf, sizeof(buf), "%06llu.%s", static_cast(number), suffix); return buf; } static std::string MakeFileName(const std::string& name, uint64_t number, const char* suffix) { return name + "/" + MakeFileName(number, suffix); } std::string LogFileName(const std::string& name, uint64_t number) { assert(number > 0); return MakeFileName(name, number, "log"); } std::string LogFileName(uint64_t number) { assert(number > 0); return MakeFileName(number, "log"); } std::string BlobFileName(uint64_t number) { assert(number > 0); return MakeFileName(number, kRocksDBBlobFileExt.c_str()); } std::string BlobFileName(const std::string& blobdirname, uint64_t number) { assert(number > 0); return MakeFileName(blobdirname, number, kRocksDBBlobFileExt.c_str()); } std::string BlobFileName(const std::string& dbname, const std::string& blob_dir, uint64_t number) { assert(number > 0); return MakeFileName(dbname + "/" + blob_dir, number, kRocksDBBlobFileExt.c_str()); } std::string ArchivalDirectory(const std::string& dir) { return dir + "/" + ARCHIVAL_DIR; } std::string ArchivedLogFileName(const std::string& name, uint64_t number) { assert(number > 0); return MakeFileName(name + "/" + ARCHIVAL_DIR, number, "log"); } std::string MakeTableFileName(const std::string& path, uint64_t number) { return MakeFileName(path, number, kRocksDbTFileExt.c_str()); } std::string MakeTableFileName(uint64_t number) { return MakeFileName(number, kRocksDbTFileExt.c_str()); } std::string Rocks2LevelTableFileName(const std::string& fullname) { assert(fullname.size() > kRocksDbTFileExt.size() + 1); if (fullname.size() <= kRocksDbTFileExt.size() + 1) { return ""; } return fullname.substr(0, fullname.size() - kRocksDbTFileExt.size()) + kLevelDbTFileExt; } uint64_t TableFileNameToNumber(const std::string& name) { uint64_t number = 0; uint64_t base = 1; int pos = static_cast(name.find_last_of('.')); while (--pos >= 0 && name[pos] >= '0' && name[pos] <= '9') { number += (name[pos] - '0') * base; base *= 10; } return number; } std::string TableFileName(const std::vector& db_paths, uint64_t number, uint32_t path_id) { assert(number > 0); std::string path; if (path_id >= db_paths.size()) { path = db_paths.back().path; } else { path = db_paths[path_id].path; } return MakeTableFileName(path, number); } void FormatFileNumber(uint64_t number, uint32_t path_id, char* out_buf, size_t out_buf_size) { if (path_id == 0) { snprintf(out_buf, out_buf_size, "%" PRIu64, number); } else { snprintf(out_buf, out_buf_size, "%" PRIu64 "(path " "%" PRIu32 ")", number, path_id); } } std::string DescriptorFileName(const std::string& dbname, uint64_t number) { assert(number > 0); char buf[100]; snprintf(buf, sizeof(buf), "/MANIFEST-%06llu", static_cast(number)); return dbname + buf; } std::string CurrentFileName(const std::string& dbname) { return dbname + "/CURRENT"; } std::string LockFileName(const std::string& dbname) { return dbname + "/LOCK"; } std::string TempFileName(const std::string& dbname, uint64_t number) { return MakeFileName(dbname, number, kTempFileNameSuffix.c_str()); } InfoLogPrefix::InfoLogPrefix(bool has_log_dir, const std::string& db_absolute_path) { if (!has_log_dir) { const char kInfoLogPrefix[] = "LOG"; // "\0" is automatically added to the end snprintf(buf, sizeof(buf), kInfoLogPrefix); prefix = Slice(buf, sizeof(kInfoLogPrefix) - 1); } else { size_t len = GetInfoLogPrefix(db_absolute_path, buf, sizeof(buf)); prefix = Slice(buf, len); } } std::string InfoLogFileName(const std::string& dbname, const std::string& db_path, const std::string& log_dir) { if (log_dir.empty()) { return dbname + "/LOG"; } InfoLogPrefix info_log_prefix(true, db_path); return log_dir + "/" + info_log_prefix.buf; } // Return the name of the old info log file for "dbname". std::string OldInfoLogFileName(const std::string& dbname, uint64_t ts, const std::string& db_path, const std::string& log_dir) { char buf[50]; snprintf(buf, sizeof(buf), "%llu", static_cast(ts)); if (log_dir.empty()) { return dbname + "/LOG.old." + buf; } InfoLogPrefix info_log_prefix(true, db_path); return log_dir + "/" + info_log_prefix.buf + ".old." + buf; } std::string OptionsFileName(const std::string& dbname, uint64_t file_num) { char buffer[256]; snprintf(buffer, sizeof(buffer), "%s%06" PRIu64, kOptionsFileNamePrefix.c_str(), file_num); return dbname + "/" + buffer; } std::string TempOptionsFileName(const std::string& dbname, uint64_t file_num) { char buffer[256]; snprintf(buffer, sizeof(buffer), "%s%06" PRIu64 ".%s", kOptionsFileNamePrefix.c_str(), file_num, kTempFileNameSuffix.c_str()); return dbname + "/" + buffer; } std::string MetaDatabaseName(const std::string& dbname, uint64_t number) { char buf[100]; snprintf(buf, sizeof(buf), "/METADB-%llu", static_cast(number)); return dbname + buf; } std::string IdentityFileName(const std::string& dbname) { return dbname + "/IDENTITY"; } // Owned filenames have the form: // dbname/IDENTITY // dbname/CURRENT // dbname/LOCK // dbname/ // dbname/.old.[0-9]+ // dbname/MANIFEST-[0-9]+ // dbname/[0-9]+.(log|sst|blob) // dbname/METADB-[0-9]+ // dbname/OPTIONS-[0-9]+ // dbname/OPTIONS-[0-9]+.dbtmp // Disregards / at the beginning bool ParseFileName(const std::string& fname, uint64_t* number, FileType* type, WalFileType* log_type) { return ParseFileName(fname, number, "", type, log_type); } bool ParseFileName(const std::string& fname, uint64_t* number, const Slice& info_log_name_prefix, FileType* type, WalFileType* log_type) { Slice rest(fname); if (fname.length() > 1 && fname[0] == '/') { rest.remove_prefix(1); } if (rest == "IDENTITY") { *number = 0; *type = kIdentityFile; } else if (rest == "CURRENT") { *number = 0; *type = kCurrentFile; } else if (rest == "LOCK") { *number = 0; *type = kDBLockFile; } else if (info_log_name_prefix.size() > 0 && rest.starts_with(info_log_name_prefix)) { rest.remove_prefix(info_log_name_prefix.size()); if (rest == "" || rest == ".old") { *number = 0; *type = kInfoLogFile; } else if (rest.starts_with(".old.")) { uint64_t ts_suffix; // sizeof also counts the trailing '\0'. rest.remove_prefix(sizeof(".old.") - 1); if (!ConsumeDecimalNumber(&rest, &ts_suffix)) { return false; } *number = ts_suffix; *type = kInfoLogFile; } } else if (rest.starts_with("MANIFEST-")) { rest.remove_prefix(strlen("MANIFEST-")); uint64_t num; if (!ConsumeDecimalNumber(&rest, &num)) { return false; } if (!rest.empty()) { return false; } *type = kDescriptorFile; *number = num; } else if (rest.starts_with("METADB-")) { rest.remove_prefix(strlen("METADB-")); uint64_t num; if (!ConsumeDecimalNumber(&rest, &num)) { return false; } if (!rest.empty()) { return false; } *type = kMetaDatabase; *number = num; } else if (rest.starts_with(kOptionsFileNamePrefix)) { uint64_t ts_suffix; bool is_temp_file = false; rest.remove_prefix(kOptionsFileNamePrefix.size()); const std::string kTempFileNameSuffixWithDot = std::string(".") + kTempFileNameSuffix; if (rest.ends_with(kTempFileNameSuffixWithDot)) { rest.remove_suffix(kTempFileNameSuffixWithDot.size()); is_temp_file = true; } if (!ConsumeDecimalNumber(&rest, &ts_suffix)) { return false; } *number = ts_suffix; *type = is_temp_file ? kTempFile : kOptionsFile; } else { // Avoid strtoull() to keep filename format independent of the // current locale bool archive_dir_found = false; if (rest.starts_with(ARCHIVAL_DIR)) { if (rest.size() <= ARCHIVAL_DIR.size()) { return false; } rest.remove_prefix(ARCHIVAL_DIR.size() + 1); // Add 1 to remove / also if (log_type) { *log_type = kArchivedLogFile; } archive_dir_found = true; } uint64_t num; if (!ConsumeDecimalNumber(&rest, &num)) { return false; } if (rest.size() <= 1 || rest[0] != '.') { return false; } rest.remove_prefix(1); Slice suffix = rest; if (suffix == Slice("log")) { *type = kLogFile; if (log_type && !archive_dir_found) { *log_type = kAliveLogFile; } } else if (archive_dir_found) { return false; // Archive dir can contain only log files } else if (suffix == Slice(kRocksDbTFileExt) || suffix == Slice(kLevelDbTFileExt)) { *type = kTableFile; } else if (suffix == Slice(kRocksDBBlobFileExt)) { *type = kBlobFile; } else if (suffix == Slice(kTempFileNameSuffix)) { *type = kTempFile; } else { return false; } *number = num; } return true; } IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname, uint64_t descriptor_number, FSDirectory* directory_to_fsync) { // Remove leading "dbname/" and add newline to manifest file name std::string manifest = DescriptorFileName(dbname, descriptor_number); Slice contents = manifest; assert(contents.starts_with(dbname + "/")); contents.remove_prefix(dbname.size() + 1); std::string tmp = TempFileName(dbname, descriptor_number); IOStatus s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true); if (s.ok()) { TEST_KILL_RANDOM("SetCurrentFile:0", rocksdb_kill_odds * REDUCE_ODDS2); s = fs->RenameFile(tmp, CurrentFileName(dbname), IOOptions(), nullptr); TEST_KILL_RANDOM("SetCurrentFile:1", rocksdb_kill_odds * REDUCE_ODDS2); } if (s.ok()) { if (directory_to_fsync != nullptr) { s = directory_to_fsync->Fsync(IOOptions(), nullptr); } } else { fs->DeleteFile(tmp, IOOptions(), nullptr); } return s; } Status SetIdentityFile(Env* env, const std::string& dbname, const std::string& db_id) { std::string id; if (db_id.empty()) { id = env->GenerateUniqueId(); } else { id = db_id; } assert(!id.empty()); // Reserve the filename dbname/000000.dbtmp for the temporary identity file std::string tmp = TempFileName(dbname, 0); Status s = WriteStringToFile(env, id, tmp, true); if (s.ok()) { s = env->RenameFile(tmp, IdentityFileName(dbname)); } if (!s.ok()) { env->DeleteFile(tmp); } return s; } IOStatus SyncManifest(Env* env, const ImmutableDBOptions* db_options, WritableFileWriter* file) { TEST_KILL_RANDOM("SyncManifest:0", rocksdb_kill_odds * REDUCE_ODDS2); StopWatch sw(env, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS); return file->Sync(db_options->use_fsync); } Status GetInfoLogFiles(Env* env, const std::string& db_log_dir, const std::string& dbname, std::string* parent_dir, std::vector* info_log_list) { assert(parent_dir != nullptr); assert(info_log_list != nullptr); uint64_t number = 0; FileType type = kLogFile; if (!db_log_dir.empty()) { *parent_dir = db_log_dir; } else { *parent_dir = dbname; } InfoLogPrefix info_log_prefix(!db_log_dir.empty(), dbname); std::vector file_names; Status s = env->GetChildren(*parent_dir, &file_names); if (!s.ok()) { return s; } for (auto& f : file_names) { if (ParseFileName(f, &number, info_log_prefix.prefix, &type) && (type == kInfoLogFile)) { info_log_list->push_back(f); } } return Status::OK(); } std::string NormalizePath(const std::string& path) { std::string dst; for (auto c : path) { if (!dst.empty() && c == kFilePathSeparator && dst.back() == kFilePathSeparator) { continue; } dst.push_back(c); } return dst; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/filename.h000066400000000000000000000170411370372246700161200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // File names used by DB code #pragma once #include #include #include #include #include "options/db_options.h" #include "port/port.h" #include "rocksdb/file_system.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "rocksdb/transaction_log.h" namespace ROCKSDB_NAMESPACE { class Env; class Directory; class WritableFileWriter; #ifdef OS_WIN const char kFilePathSeparator = '\\'; #else const char kFilePathSeparator = '/'; #endif enum FileType { kLogFile, kDBLockFile, kTableFile, kDescriptorFile, kCurrentFile, kTempFile, kInfoLogFile, // Either the current one, or an old one kMetaDatabase, kIdentityFile, kOptionsFile, kBlobFile }; // Return the name of the log file with the specified number // in the db named by "dbname". The result will be prefixed with // "dbname". extern std::string LogFileName(const std::string& dbname, uint64_t number); extern std::string LogFileName(uint64_t number); extern std::string BlobFileName(uint64_t number); extern std::string BlobFileName(const std::string& bdirname, uint64_t number); extern std::string BlobFileName(const std::string& dbname, const std::string& blob_dir, uint64_t number); static const std::string ARCHIVAL_DIR = "archive"; extern std::string ArchivalDirectory(const std::string& dbname); // Return the name of the archived log file with the specified number // in the db named by "dbname". The result will be prefixed with "dbname". extern std::string ArchivedLogFileName(const std::string& dbname, uint64_t num); extern std::string MakeTableFileName(const std::string& name, uint64_t number); extern std::string MakeTableFileName(uint64_t number); // Return the name of sstable with LevelDB suffix // created from RocksDB sstable suffixed name extern std::string Rocks2LevelTableFileName(const std::string& fullname); // the reverse function of MakeTableFileName // TODO(yhchiang): could merge this function with ParseFileName() extern uint64_t TableFileNameToNumber(const std::string& name); // Return the name of the sstable with the specified number // in the db named by "dbname". The result will be prefixed with // "dbname". extern std::string TableFileName(const std::vector& db_paths, uint64_t number, uint32_t path_id); // Sufficient buffer size for FormatFileNumber. const size_t kFormatFileNumberBufSize = 38; extern void FormatFileNumber(uint64_t number, uint32_t path_id, char* out_buf, size_t out_buf_size); // Return the name of the descriptor file for the db named by // "dbname" and the specified incarnation number. The result will be // prefixed with "dbname". extern std::string DescriptorFileName(const std::string& dbname, uint64_t number); // Return the name of the current file. This file contains the name // of the current manifest file. The result will be prefixed with // "dbname". extern std::string CurrentFileName(const std::string& dbname); // Return the name of the lock file for the db named by // "dbname". The result will be prefixed with "dbname". extern std::string LockFileName(const std::string& dbname); // Return the name of a temporary file owned by the db named "dbname". // The result will be prefixed with "dbname". extern std::string TempFileName(const std::string& dbname, uint64_t number); // A helper structure for prefix of info log names. struct InfoLogPrefix { char buf[260]; Slice prefix; // Prefix with DB absolute path encoded explicit InfoLogPrefix(bool has_log_dir, const std::string& db_absolute_path); // Default Prefix explicit InfoLogPrefix(); }; // Return the name of the info log file for "dbname". extern std::string InfoLogFileName(const std::string& dbname, const std::string& db_path = "", const std::string& log_dir = ""); // Return the name of the old info log file for "dbname". extern std::string OldInfoLogFileName(const std::string& dbname, uint64_t ts, const std::string& db_path = "", const std::string& log_dir = ""); static const std::string kOptionsFileNamePrefix = "OPTIONS-"; static const std::string kTempFileNameSuffix = "dbtmp"; // Return a options file name given the "dbname" and file number. // Format: OPTIONS-[number].dbtmp extern std::string OptionsFileName(const std::string& dbname, uint64_t file_num); // Return a temp options file name given the "dbname" and file number. // Format: OPTIONS-[number] extern std::string TempOptionsFileName(const std::string& dbname, uint64_t file_num); // Return the name to use for a metadatabase. The result will be prefixed with // "dbname". extern std::string MetaDatabaseName(const std::string& dbname, uint64_t number); // Return the name of the Identity file which stores a unique number for the db // that will get regenerated if the db loses all its data and is recreated fresh // either from a backup-image or empty extern std::string IdentityFileName(const std::string& dbname); // If filename is a rocksdb file, store the type of the file in *type. // The number encoded in the filename is stored in *number. If the // filename was successfully parsed, returns true. Else return false. // info_log_name_prefix is the path of info logs. extern bool ParseFileName(const std::string& filename, uint64_t* number, const Slice& info_log_name_prefix, FileType* type, WalFileType* log_type = nullptr); // Same as previous function, but skip info log files. extern bool ParseFileName(const std::string& filename, uint64_t* number, FileType* type, WalFileType* log_type = nullptr); // Make the CURRENT file point to the descriptor file with the // specified number. extern IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname, uint64_t descriptor_number, FSDirectory* directory_to_fsync); // Make the IDENTITY file for the db extern Status SetIdentityFile(Env* env, const std::string& dbname, const std::string& db_id = {}); // Sync manifest file `file`. extern IOStatus SyncManifest(Env* env, const ImmutableDBOptions* db_options, WritableFileWriter* file); // Return list of file names of info logs in `file_names`. // The list only contains file name. The parent directory name is stored // in `parent_dir`. // `db_log_dir` should be the one as in options.db_log_dir extern Status GetInfoLogFiles(Env* env, const std::string& db_log_dir, const std::string& dbname, std::string* parent_dir, std::vector* file_names); extern std::string NormalizePath(const std::string& path); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/random_access_file_reader.cc000066400000000000000000000256271370372246700216310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/random_access_file_reader.h" #include #include #include "monitoring/histogram.h" #include "monitoring/iostats_context_imp.h" #include "port/port.h" #include "table/format.h" #include "test_util/sync_point.h" #include "util/random.h" #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result, char* scratch, AlignedBuf* aligned_buf, bool for_compaction) const { (void)aligned_buf; TEST_SYNC_POINT_CALLBACK("RandomAccessFileReader::Read", nullptr); Status s; uint64_t elapsed = 0; { StopWatch sw(env_, stats_, hist_type_, (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, true /*delay_enabled*/); auto prev_perf_level = GetPerfLevel(); IOSTATS_TIMER_GUARD(read_nanos); if (use_direct_io()) { #ifndef ROCKSDB_LITE size_t alignment = file_->GetRequiredBufferAlignment(); size_t aligned_offset = TruncateToPageBoundary(alignment, static_cast(offset)); size_t offset_advance = static_cast(offset) - aligned_offset; size_t read_size = Roundup(static_cast(offset + n), alignment) - aligned_offset; AlignedBuffer buf; buf.Alignment(alignment); buf.AllocateNewBuffer(read_size); while (buf.CurrentSize() < read_size) { size_t allowed; if (for_compaction && rate_limiter_ != nullptr) { allowed = rate_limiter_->RequestToken( buf.Capacity() - buf.CurrentSize(), buf.Alignment(), Env::IOPriority::IO_LOW, stats_, RateLimiter::OpType::kRead); } else { assert(buf.CurrentSize() == 0); allowed = read_size; } Slice tmp; FileOperationInfo::TimePoint start_ts; uint64_t orig_offset = 0; if (ShouldNotifyListeners()) { start_ts = std::chrono::system_clock::now(); orig_offset = aligned_offset + buf.CurrentSize(); } { IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_); // Only user reads are expected to specify a timeout. And user reads // are not subjected to rate_limiter and should go through only // one iteration of this loop, so we don't need to check and adjust // the opts.timeout before calling file_->Read assert(!opts.timeout.count() || allowed == read_size); s = file_->Read(aligned_offset + buf.CurrentSize(), allowed, opts, &tmp, buf.Destination(), nullptr); } if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::system_clock::now(); NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, finish_ts, s); } buf.Size(buf.CurrentSize() + tmp.size()); if (!s.ok() || tmp.size() < allowed) { break; } } size_t res_len = 0; if (s.ok() && offset_advance < buf.CurrentSize()) { res_len = std::min(buf.CurrentSize() - offset_advance, n); if (aligned_buf == nullptr) { buf.Read(scratch, offset_advance, res_len); } else { scratch = buf.BufferStart() + offset_advance; aligned_buf->reset(buf.Release()); } } *result = Slice(scratch, res_len); #endif // !ROCKSDB_LITE } else { size_t pos = 0; const char* res_scratch = nullptr; while (pos < n) { size_t allowed; if (for_compaction && rate_limiter_ != nullptr) { if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) { sw.DelayStart(); } allowed = rate_limiter_->RequestToken(n - pos, 0 /* alignment */, Env::IOPriority::IO_LOW, stats_, RateLimiter::OpType::kRead); if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) { sw.DelayStop(); } } else { allowed = n; } Slice tmp_result; #ifndef ROCKSDB_LITE FileOperationInfo::TimePoint start_ts; if (ShouldNotifyListeners()) { start_ts = std::chrono::system_clock::now(); } #endif { IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_); // Only user reads are expected to specify a timeout. And user reads // are not subjected to rate_limiter and should go through only // one iteration of this loop, so we don't need to check and adjust // the opts.timeout before calling file_->Read assert(!opts.timeout.count() || allowed == n); s = file_->Read(offset + pos, allowed, opts, &tmp_result, scratch + pos, nullptr); } #ifndef ROCKSDB_LITE if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::system_clock::now(); NotifyOnFileReadFinish(offset + pos, tmp_result.size(), start_ts, finish_ts, s); } #endif if (res_scratch == nullptr) { // we can't simply use `scratch` because reads of mmap'd files return // data in a different buffer. res_scratch = tmp_result.data(); } else { // make sure chunks are inserted contiguously into `res_scratch`. assert(tmp_result.data() == res_scratch + pos); } pos += tmp_result.size(); if (!s.ok() || tmp_result.size() < allowed) { break; } } *result = Slice(res_scratch, s.ok() ? pos : 0); } IOSTATS_ADD_IF_POSITIVE(bytes_read, result->size()); SetPerfLevel(prev_perf_level); } if (stats_ != nullptr && file_read_hist_ != nullptr) { file_read_hist_->Add(elapsed); } return s; } size_t End(const FSReadRequest& r) { return static_cast(r.offset) + r.len; } FSReadRequest Align(const FSReadRequest& r, size_t alignment) { FSReadRequest req; req.offset = static_cast( TruncateToPageBoundary(alignment, static_cast(r.offset))); req.len = Roundup(End(r), alignment) - req.offset; req.scratch = nullptr; return req; } // Try to merge src to dest if they have overlap. // // Each request represents an inclusive interval [offset, offset + len]. // If the intervals have overlap, update offset and len to represent the // merged interval, and return true. // Otherwise, do nothing and return false. bool TryMerge(FSReadRequest* dest, const FSReadRequest& src) { size_t dest_offset = static_cast(dest->offset); size_t src_offset = static_cast(src.offset); size_t dest_end = End(*dest); size_t src_end = End(src); if (std::max(dest_offset, dest_offset) > std::min(dest_end, src_end)) { return false; } dest->offset = static_cast(std::min(dest_offset, src_offset)); dest->len = std::max(dest_end, src_end) - dest->offset; return true; } Status RandomAccessFileReader::MultiRead(const IOOptions& opts, FSReadRequest* read_reqs, size_t num_reqs, AlignedBuf* aligned_buf) const { (void)aligned_buf; // suppress warning of unused variable in LITE mode assert(num_reqs > 0); Status s; uint64_t elapsed = 0; { StopWatch sw(env_, stats_, hist_type_, (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, true /*delay_enabled*/); auto prev_perf_level = GetPerfLevel(); IOSTATS_TIMER_GUARD(read_nanos); FSReadRequest* fs_reqs = read_reqs; size_t num_fs_reqs = num_reqs; #ifndef ROCKSDB_LITE std::vector aligned_reqs; if (use_direct_io()) { // num_reqs is the max possible size, // this can reduce std::vecector's internal resize operations. aligned_reqs.reserve(num_reqs); // Align and merge the read requests. size_t alignment = file_->GetRequiredBufferAlignment(); aligned_reqs.push_back(Align(read_reqs[0], alignment)); for (size_t i = 1; i < num_reqs; i++) { const auto& r = Align(read_reqs[i], alignment); if (!TryMerge(&aligned_reqs.back(), r)) { aligned_reqs.push_back(r); } } // Allocate aligned buffer and let scratch buffers point to it. size_t total_len = 0; for (const auto& r : aligned_reqs) { total_len += r.len; } AlignedBuffer buf; buf.Alignment(alignment); buf.AllocateNewBuffer(total_len); char* scratch = buf.BufferStart(); for (auto& r : aligned_reqs) { r.scratch = scratch; scratch += r.len; } aligned_buf->reset(buf.Release()); fs_reqs = aligned_reqs.data(); num_fs_reqs = aligned_reqs.size(); } #endif // ROCKSDB_LITE #ifndef ROCKSDB_LITE FileOperationInfo::TimePoint start_ts; if (ShouldNotifyListeners()) { start_ts = std::chrono::system_clock::now(); } #endif // ROCKSDB_LITE { IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_); s = file_->MultiRead(fs_reqs, num_fs_reqs, opts, nullptr); } #ifndef ROCKSDB_LITE if (use_direct_io()) { // Populate results in the unaligned read requests. size_t aligned_i = 0; for (size_t i = 0; i < num_reqs; i++) { auto& r = read_reqs[i]; if (static_cast(r.offset) > End(aligned_reqs[aligned_i])) { aligned_i++; } const auto& fs_r = fs_reqs[aligned_i]; r.status = fs_r.status; if (r.status.ok()) { uint64_t offset = r.offset - fs_r.offset; size_t len = std::min(r.len, static_cast(fs_r.len - offset)); r.result = Slice(fs_r.scratch + offset, len); } else { r.result = Slice(); } } } #endif // ROCKSDB_LITE for (size_t i = 0; i < num_reqs; ++i) { #ifndef ROCKSDB_LITE if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::system_clock::now(); NotifyOnFileReadFinish(read_reqs[i].offset, read_reqs[i].result.size(), start_ts, finish_ts, read_reqs[i].status); } #endif // ROCKSDB_LITE IOSTATS_ADD_IF_POSITIVE(bytes_read, read_reqs[i].result.size()); } SetPerfLevel(prev_perf_level); } if (stats_ != nullptr && file_read_hist_ != nullptr) { file_read_hist_->Add(elapsed); } return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/random_access_file_reader.h000066400000000000000000000117651370372246700214710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/listener.h" #include "rocksdb/rate_limiter.h" #include "util/aligned_buffer.h" namespace ROCKSDB_NAMESPACE { class Statistics; class HistogramImpl; using AlignedBuf = std::unique_ptr; // RandomAccessFileReader is a wrapper on top of Env::RnadomAccessFile. It is // responsible for: // - Handling Buffered and Direct reads appropriately. // - Rate limiting compaction reads. // - Notifying any interested listeners on the completion of a read. // - Updating IO stats. class RandomAccessFileReader { private: #ifndef ROCKSDB_LITE void NotifyOnFileReadFinish(uint64_t offset, size_t length, const FileOperationInfo::TimePoint& start_ts, const FileOperationInfo::TimePoint& finish_ts, const Status& status) const { FileOperationInfo info(file_name_, start_ts, finish_ts); info.offset = offset; info.length = length; info.status = status; for (auto& listener : listeners_) { listener->OnFileReadFinish(info); } } #endif // ROCKSDB_LITE bool ShouldNotifyListeners() const { return !listeners_.empty(); } std::unique_ptr file_; std::string file_name_; Env* env_; Statistics* stats_; uint32_t hist_type_; HistogramImpl* file_read_hist_; RateLimiter* rate_limiter_; std::vector> listeners_; public: explicit RandomAccessFileReader( std::unique_ptr&& raf, const std::string& _file_name, Env* _env = nullptr, Statistics* stats = nullptr, uint32_t hist_type = 0, HistogramImpl* file_read_hist = nullptr, RateLimiter* rate_limiter = nullptr, const std::vector>& listeners = {}) : file_(std::move(raf)), file_name_(std::move(_file_name)), env_(_env), stats_(stats), hist_type_(hist_type), file_read_hist_(file_read_hist), rate_limiter_(rate_limiter), listeners_() { #ifndef ROCKSDB_LITE std::for_each(listeners.begin(), listeners.end(), [this](const std::shared_ptr& e) { if (e->ShouldBeNotifiedOnFileIO()) { listeners_.emplace_back(e); } }); #else // !ROCKSDB_LITE (void)listeners; #endif } RandomAccessFileReader(RandomAccessFileReader&& o) ROCKSDB_NOEXCEPT { *this = std::move(o); } RandomAccessFileReader& operator=(RandomAccessFileReader&& o) ROCKSDB_NOEXCEPT { file_ = std::move(o.file_); env_ = std::move(o.env_); stats_ = std::move(o.stats_); hist_type_ = std::move(o.hist_type_); file_read_hist_ = std::move(o.file_read_hist_); rate_limiter_ = std::move(o.rate_limiter_); return *this; } RandomAccessFileReader(const RandomAccessFileReader&) = delete; RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete; // In non-direct IO mode, // 1. if using mmap, result is stored in a buffer other than scratch; // 2. if not using mmap, result is stored in the buffer starting from scratch. // // In direct IO mode, an aligned buffer is allocated internally. // 1. If aligned_buf is null, then results are copied to the buffer // starting from scratch; // 2. Otherwise, scratch is not used and can be null, the aligned_buf owns // the internally allocated buffer on return, and the result refers to a // region in aligned_buf. Status Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result, char* scratch, AlignedBuf* aligned_buf, bool for_compaction = false) const; // REQUIRES: // num_reqs > 0, reqs do not overlap, and offsets in reqs are increasing. // In non-direct IO mode, aligned_buf should be null; // In direct IO mode, aligned_buf stores the aligned buffer allocated inside // MultiRead, the result Slices in reqs refer to aligned_buf. Status MultiRead(const IOOptions& opts, FSReadRequest* reqs, size_t num_reqs, AlignedBuf* aligned_buf) const; Status Prefetch(uint64_t offset, size_t n) const { return file_->Prefetch(offset, n, IOOptions(), nullptr); } FSRandomAccessFile* file() { return file_.get(); } std::string file_name() const { return file_name_; } bool use_direct_io() const { return file_->use_direct_io(); } Env* env() const { return env_; } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/random_access_file_reader_test.cc000066400000000000000000000165411370372246700226630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/file_system.h" #include "file/random_access_file_reader.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class RandomAccessFileReaderTest : public testing::Test { public: void SetUp() override { test::SetupSyncPointsToMockDirectIO(); env_ = Env::Default(); fs_ = FileSystem::Default(); test_dir_ = test::PerThreadDBPath("random_access_file_reader_test"); ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); ComputeAndSetAlignment(); } void TearDown() override { EXPECT_OK(test::DestroyDir(env_, test_dir_)); } void Write(const std::string& fname, const std::string& content) { std::unique_ptr f; ASSERT_OK(fs_->NewWritableFile(Path(fname), FileOptions(), &f, nullptr)); ASSERT_OK(f->Append(content, IOOptions(), nullptr)); ASSERT_OK(f->Close(IOOptions(), nullptr)); } void Read(const std::string& fname, const FileOptions& opts, std::unique_ptr* reader) { std::string fpath = Path(fname); std::unique_ptr f; ASSERT_OK(fs_->NewRandomAccessFile(fpath, opts, &f, nullptr)); (*reader).reset(new RandomAccessFileReader(std::move(f), fpath, env_)); } void AssertResult(const std::string& content, const std::vector& reqs) { for (const auto& r : reqs) { ASSERT_OK(r.status); ASSERT_EQ(r.len, r.result.size()); ASSERT_EQ(content.substr(r.offset, r.len), r.result.ToString()); } } size_t alignment() const { return alignment_; } private: Env* env_; std::shared_ptr fs_; std::string test_dir_; size_t alignment_; std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } void ComputeAndSetAlignment() { std::string f = "get_alignment"; Write(f, ""); std::unique_ptr r; Read(f, FileOptions(), &r); alignment_ = r->file()->GetRequiredBufferAlignment(); EXPECT_OK(fs_->DeleteFile(Path(f), IOOptions(), nullptr)); } }; // Skip the following tests in lite mode since direct I/O is unsupported. #ifndef ROCKSDB_LITE TEST_F(RandomAccessFileReaderTest, ReadDirectIO) { std::string fname = "read-direct-io"; Random rand(0); std::string content; test::RandomString(&rand, static_cast(alignment()), &content); Write(fname, content); FileOptions opts; opts.use_direct_reads = true; std::unique_ptr r; Read(fname, opts, &r); ASSERT_TRUE(r->use_direct_io()); size_t offset = alignment() / 2; size_t len = alignment() / 3; Slice result; AlignedBuf buf; for (bool for_compaction : {true, false}) { ASSERT_OK(r->Read(IOOptions(), offset, len, &result, nullptr, &buf, for_compaction)); ASSERT_EQ(result.ToString(), content.substr(offset, len)); } } TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) { // Creates a file with 3 pages. std::string fname = "multi-read-direct-io"; Random rand(0); std::string content; test::RandomString(&rand, 3 * static_cast(alignment()), &content); Write(fname, content); FileOptions opts; opts.use_direct_reads = true; std::unique_ptr r; Read(fname, opts, &r); ASSERT_TRUE(r->use_direct_io()); { // Reads 2 blocks in the 1st page. // The results should be SharedSlices of the same underlying buffer. // // Illustration (each x is a 1/4 page) // First page: xxxx // 1st block: x // 2nd block: xx FSReadRequest r0; r0.offset = 0; r0.len = alignment() / 4; r0.scratch = nullptr; FSReadRequest r1; r1.offset = alignment() / 2; r1.len = alignment() / 2; r1.scratch = nullptr; std::vector reqs; reqs.push_back(std::move(r0)); reqs.push_back(std::move(r1)); AlignedBuf aligned_buf; ASSERT_OK( r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); } { // Reads 3 blocks: // 1st block in the 1st page; // 2nd block from the middle of the 1st page to the middle of the 2nd page; // 3rd block in the 2nd page. // The results should be SharedSlices of the same underlying buffer. // // Illustration (each x is a 1/4 page) // 2 pages: xxxxxxxx // 1st block: x // 2nd block: xxxx // 3rd block: x FSReadRequest r0; r0.offset = 0; r0.len = alignment() / 4; r0.scratch = nullptr; FSReadRequest r1; r1.offset = alignment() / 2; r1.len = alignment(); r1.scratch = nullptr; FSReadRequest r2; r2.offset = 2 * alignment() - alignment() / 4; r2.len = alignment() / 4; r2.scratch = nullptr; std::vector reqs; reqs.push_back(std::move(r0)); reqs.push_back(std::move(r1)); reqs.push_back(std::move(r2)); AlignedBuf aligned_buf; ASSERT_OK( r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); } { // Reads 3 blocks: // 1st block in the middle of the 1st page; // 2nd block in the middle of the 2nd page; // 3rd block in the middle of the 3rd page. // The results should be SharedSlices of the same underlying buffer. // // Illustration (each x is a 1/4 page) // 3 pages: xxxxxxxxxxxx // 1st block: xx // 2nd block: xx // 3rd block: xx FSReadRequest r0; r0.offset = alignment() / 4; r0.len = alignment() / 2; r0.scratch = nullptr; FSReadRequest r1; r1.offset = alignment() + alignment() / 4; r1.len = alignment() / 2; r1.scratch = nullptr; FSReadRequest r2; r2.offset = 2 * alignment() + alignment() / 4; r2.len = alignment() / 2; r2.scratch = nullptr; std::vector reqs; reqs.push_back(std::move(r0)); reqs.push_back(std::move(r1)); reqs.push_back(std::move(r2)); AlignedBuf aligned_buf; ASSERT_OK( r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); } { // Reads 2 blocks: // 1st block in the middle of the 1st page; // 2nd block in the middle of the 3rd page. // The results are two different buffers. // // Illustration (each x is a 1/4 page) // 3 pages: xxxxxxxxxxxx // 1st block: xx // 2nd block: xx FSReadRequest r0; r0.offset = alignment() / 4; r0.len = alignment() / 2; r0.scratch = nullptr; FSReadRequest r1; r1.offset = 2 * alignment() + alignment() / 4; r1.len = alignment() / 2; r1.scratch = nullptr; std::vector reqs; reqs.push_back(std::move(r0)); reqs.push_back(std::move(r1)); AlignedBuf aligned_buf; ASSERT_OK( r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); } } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/file/read_write_util.cc000066400000000000000000000043011370372246700176530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/read_write_util.h" #include #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { IOStatus NewWritableFile(FileSystem* fs, const std::string& fname, std::unique_ptr* result, const FileOptions& options) { IOStatus s = fs->NewWritableFile(fname, options, result, nullptr); TEST_KILL_RANDOM("NewWritableFile:0", rocksdb_kill_odds * REDUCE_ODDS2); return s; } bool ReadOneLine(std::istringstream* iss, SequentialFileReader* seq_file_reader, std::string* output, bool* has_data, Status* result) { const int kBufferSize = 8192; char buffer[kBufferSize + 1]; Slice input_slice; std::string line; bool has_complete_line = false; while (!has_complete_line) { if (std::getline(*iss, line)) { has_complete_line = !iss->eof(); } else { has_complete_line = false; } if (!has_complete_line) { // if we're not sure whether we have a complete line, // further read from the file. if (*has_data) { *result = seq_file_reader->Read(kBufferSize, &input_slice, buffer); } if (input_slice.size() == 0) { // meaning we have read all the data *has_data = false; break; } else { iss->str(line + input_slice.ToString()); // reset the internal state of iss so that we can keep reading it. iss->clear(); *has_data = (input_slice.size() == kBufferSize); continue; } } } *output = line; return *has_data || has_complete_line; } #ifndef NDEBUG bool IsFileSectorAligned(const size_t off, size_t sector_size) { return off % sector_size == 0; } #endif // NDEBUG } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/read_write_util.h000066400000000000000000000025021370372246700175160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "file/sequence_file_reader.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" namespace ROCKSDB_NAMESPACE { // Returns a WritableFile. // // env : the Env. // fname : the file name. // result : output arg. A WritableFile based on `fname` returned. // options : the Env Options. extern IOStatus NewWritableFile(FileSystem* fs, const std::string& fname, std::unique_ptr* result, const FileOptions& options); // Read a single line from a file. bool ReadOneLine(std::istringstream* iss, SequentialFileReader* seq_file_reader, std::string* output, bool* has_data, Status* result); #ifndef NDEBUG bool IsFileSectorAligned(const size_t off, size_t sector_size); #endif // NDEBUG } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/readahead_raf.cc000066400000000000000000000137371370372246700172340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/readahead_raf.h" #include #include #include "file/read_write_util.h" #include "util/aligned_buffer.h" #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { namespace { class ReadaheadRandomAccessFile : public RandomAccessFile { public: ReadaheadRandomAccessFile(std::unique_ptr&& file, size_t readahead_size) : file_(std::move(file)), alignment_(file_->GetRequiredBufferAlignment()), readahead_size_(Roundup(readahead_size, alignment_)), buffer_(), buffer_offset_(0) { buffer_.Alignment(alignment_); buffer_.AllocateNewBuffer(readahead_size_); } ReadaheadRandomAccessFile(const ReadaheadRandomAccessFile&) = delete; ReadaheadRandomAccessFile& operator=(const ReadaheadRandomAccessFile&) = delete; Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { // Read-ahead only make sense if we have some slack left after reading if (n + alignment_ >= readahead_size_) { return file_->Read(offset, n, result, scratch); } std::unique_lock lk(lock_); size_t cached_len = 0; // Check if there is a cache hit, meaning that [offset, offset + n) is // either completely or partially in the buffer. If it's completely cached, // including end of file case when offset + n is greater than EOF, then // return. if (TryReadFromCache(offset, n, &cached_len, scratch) && (cached_len == n || buffer_.CurrentSize() < readahead_size_)) { // We read exactly what we needed, or we hit end of file - return. *result = Slice(scratch, cached_len); return Status::OK(); } size_t advanced_offset = static_cast(offset + cached_len); // In the case of cache hit advanced_offset is already aligned, means that // chunk_offset equals to advanced_offset size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset); Status s = ReadIntoBuffer(chunk_offset, readahead_size_); if (s.ok()) { // The data we need is now in cache, so we can safely read it size_t remaining_len; TryReadFromCache(advanced_offset, n - cached_len, &remaining_len, scratch + cached_len); *result = Slice(scratch, cached_len + remaining_len); } return s; } Status Prefetch(uint64_t offset, size_t n) override { if (n < readahead_size_) { // Don't allow smaller prefetches than the configured `readahead_size_`. // `Read()` assumes a smaller prefetch buffer indicates EOF was reached. return Status::OK(); } std::unique_lock lk(lock_); size_t offset_ = static_cast(offset); size_t prefetch_offset = TruncateToPageBoundary(alignment_, offset_); if (prefetch_offset == buffer_offset_) { return Status::OK(); } return ReadIntoBuffer(prefetch_offset, Roundup(offset_ + n, alignment_) - prefetch_offset); } size_t GetUniqueId(char* id, size_t max_size) const override { return file_->GetUniqueId(id, max_size); } void Hint(AccessPattern pattern) override { file_->Hint(pattern); } Status InvalidateCache(size_t offset, size_t length) override { std::unique_lock lk(lock_); buffer_.Clear(); return file_->InvalidateCache(offset, length); } bool use_direct_io() const override { return file_->use_direct_io(); } private: // Tries to read from buffer_ n bytes starting at offset. If anything was read // from the cache, it sets cached_len to the number of bytes actually read, // copies these number of bytes to scratch and returns true. // If nothing was read sets cached_len to 0 and returns false. bool TryReadFromCache(uint64_t offset, size_t n, size_t* cached_len, char* scratch) const { if (offset < buffer_offset_ || offset >= buffer_offset_ + buffer_.CurrentSize()) { *cached_len = 0; return false; } uint64_t offset_in_buffer = offset - buffer_offset_; *cached_len = std::min( buffer_.CurrentSize() - static_cast(offset_in_buffer), n); memcpy(scratch, buffer_.BufferStart() + offset_in_buffer, *cached_len); return true; } // Reads into buffer_ the next n bytes from file_ starting at offset. // Can actually read less if EOF was reached. // Returns the status of the read operastion on the file. Status ReadIntoBuffer(uint64_t offset, size_t n) const { if (n > buffer_.Capacity()) { n = buffer_.Capacity(); } assert(IsFileSectorAligned(offset, alignment_)); assert(IsFileSectorAligned(n, alignment_)); Slice result; Status s = file_->Read(offset, n, &result, buffer_.BufferStart()); if (s.ok()) { buffer_offset_ = offset; buffer_.Size(result.size()); assert(result.size() == 0 || buffer_.BufferStart() == result.data()); } return s; } const std::unique_ptr file_; const size_t alignment_; const size_t readahead_size_; mutable std::mutex lock_; // The buffer storing the prefetched data mutable AlignedBuffer buffer_; // The offset in file_, corresponding to data stored in buffer_ mutable uint64_t buffer_offset_; }; } // namespace std::unique_ptr NewReadaheadRandomAccessFile( std::unique_ptr&& file, size_t readahead_size) { std::unique_ptr result( new ReadaheadRandomAccessFile(std::move(file), readahead_size)); return result; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/readahead_raf.h000066400000000000000000000023541370372246700170670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { // This file provides the following main abstractions: // SequentialFileReader : wrapper over Env::SequentialFile // RandomAccessFileReader : wrapper over Env::RandomAccessFile // WritableFileWriter : wrapper over Env::WritableFile // In addition, it also exposed NewReadaheadRandomAccessFile, NewWritableFile, // and ReadOneLine primitives. // NewReadaheadRandomAccessFile provides a wrapper over RandomAccessFile to // always prefetch additional data with every read. This is mainly used in // Compaction Table Readers. std::unique_ptr NewReadaheadRandomAccessFile( std::unique_ptr&& file, size_t readahead_size); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/sequence_file_reader.cc000066400000000000000000000207731370372246700206350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/sequence_file_reader.h" #include #include #include "file/read_write_util.h" #include "monitoring/histogram.h" #include "monitoring/iostats_context_imp.h" #include "port/port.h" #include "test_util/sync_point.h" #include "util/aligned_buffer.h" #include "util/random.h" #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { Status SequentialFileReader::Read(size_t n, Slice* result, char* scratch) { Status s; if (use_direct_io()) { #ifndef ROCKSDB_LITE size_t offset = offset_.fetch_add(n); size_t alignment = file_->GetRequiredBufferAlignment(); size_t aligned_offset = TruncateToPageBoundary(alignment, offset); size_t offset_advance = offset - aligned_offset; size_t size = Roundup(offset + n, alignment) - aligned_offset; size_t r = 0; AlignedBuffer buf; buf.Alignment(alignment); buf.AllocateNewBuffer(size); Slice tmp; s = file_->PositionedRead(aligned_offset, size, IOOptions(), &tmp, buf.BufferStart(), nullptr); if (s.ok() && offset_advance < tmp.size()) { buf.Size(tmp.size()); r = buf.Read(scratch, offset_advance, std::min(tmp.size() - offset_advance, n)); } *result = Slice(scratch, r); #endif // !ROCKSDB_LITE } else { s = file_->Read(n, IOOptions(), result, scratch, nullptr); } IOSTATS_ADD(bytes_read, result->size()); return s; } Status SequentialFileReader::Skip(uint64_t n) { #ifndef ROCKSDB_LITE if (use_direct_io()) { offset_ += static_cast(n); return Status::OK(); } #endif // !ROCKSDB_LITE return file_->Skip(n); } namespace { // This class wraps a SequentialFile, exposing same API, with the differenece // of being able to prefetch up to readahead_size bytes and then serve them // from memory, avoiding the entire round-trip if, for example, the data for the // file is actually remote. class ReadaheadSequentialFile : public FSSequentialFile { public: ReadaheadSequentialFile(std::unique_ptr&& file, size_t readahead_size) : file_(std::move(file)), alignment_(file_->GetRequiredBufferAlignment()), readahead_size_(Roundup(readahead_size, alignment_)), buffer_(), buffer_offset_(0), read_offset_(0) { buffer_.Alignment(alignment_); buffer_.AllocateNewBuffer(readahead_size_); } ReadaheadSequentialFile(const ReadaheadSequentialFile&) = delete; ReadaheadSequentialFile& operator=(const ReadaheadSequentialFile&) = delete; IOStatus Read(size_t n, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) override { std::unique_lock lk(lock_); size_t cached_len = 0; // Check if there is a cache hit, meaning that [offset, offset + n) is // either completely or partially in the buffer. If it's completely cached, // including end of file case when offset + n is greater than EOF, then // return. if (TryReadFromCache(n, &cached_len, scratch) && (cached_len == n || buffer_.CurrentSize() < readahead_size_)) { // We read exactly what we needed, or we hit end of file - return. *result = Slice(scratch, cached_len); return IOStatus::OK(); } n -= cached_len; IOStatus s; // Read-ahead only make sense if we have some slack left after reading if (n + alignment_ >= readahead_size_) { s = file_->Read(n, opts, result, scratch + cached_len, dbg); if (s.ok()) { read_offset_ += result->size(); *result = Slice(scratch, cached_len + result->size()); } buffer_.Clear(); return s; } s = ReadIntoBuffer(readahead_size_, opts, dbg); if (s.ok()) { // The data we need is now in cache, so we can safely read it size_t remaining_len; TryReadFromCache(n, &remaining_len, scratch + cached_len); *result = Slice(scratch, cached_len + remaining_len); } return s; } IOStatus Skip(uint64_t n) override { std::unique_lock lk(lock_); IOStatus s = IOStatus::OK(); // First check if we need to skip already cached data if (buffer_.CurrentSize() > 0) { // Do we need to skip beyond cached data? if (read_offset_ + n >= buffer_offset_ + buffer_.CurrentSize()) { // Yes. Skip whaterver is in memory and adjust offset accordingly n -= buffer_offset_ + buffer_.CurrentSize() - read_offset_; read_offset_ = buffer_offset_ + buffer_.CurrentSize(); } else { // No. The entire section to be skipped is entirely i cache. read_offset_ += n; n = 0; } } if (n > 0) { // We still need to skip more, so call the file API for skipping s = file_->Skip(n); if (s.ok()) { read_offset_ += n; } buffer_.Clear(); } return s; } IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& opts, Slice* result, char* scratch, IODebugContext* dbg) override { return file_->PositionedRead(offset, n, opts, result, scratch, dbg); } IOStatus InvalidateCache(size_t offset, size_t length) override { std::unique_lock lk(lock_); buffer_.Clear(); return file_->InvalidateCache(offset, length); } bool use_direct_io() const override { return file_->use_direct_io(); } private: // Tries to read from buffer_ n bytes. If anything was read from the cache, it // sets cached_len to the number of bytes actually read, copies these number // of bytes to scratch and returns true. // If nothing was read sets cached_len to 0 and returns false. bool TryReadFromCache(size_t n, size_t* cached_len, char* scratch) { if (read_offset_ < buffer_offset_ || read_offset_ >= buffer_offset_ + buffer_.CurrentSize()) { *cached_len = 0; return false; } uint64_t offset_in_buffer = read_offset_ - buffer_offset_; *cached_len = std::min( buffer_.CurrentSize() - static_cast(offset_in_buffer), n); memcpy(scratch, buffer_.BufferStart() + offset_in_buffer, *cached_len); read_offset_ += *cached_len; return true; } // Reads into buffer_ the next n bytes from file_. // Can actually read less if EOF was reached. // Returns the status of the read operastion on the file. IOStatus ReadIntoBuffer(size_t n, const IOOptions& opts, IODebugContext* dbg) { if (n > buffer_.Capacity()) { n = buffer_.Capacity(); } assert(IsFileSectorAligned(n, alignment_)); Slice result; IOStatus s = file_->Read(n, opts, &result, buffer_.BufferStart(), dbg); if (s.ok()) { buffer_offset_ = read_offset_; buffer_.Size(result.size()); assert(result.size() == 0 || buffer_.BufferStart() == result.data()); } return s; } const std::unique_ptr file_; const size_t alignment_; const size_t readahead_size_; std::mutex lock_; // The buffer storing the prefetched data AlignedBuffer buffer_; // The offset in file_, corresponding to data stored in buffer_ uint64_t buffer_offset_; // The offset up to which data was read from file_. In fact, it can be larger // than the actual file size, since the file_->Skip(n) call doesn't return the // actual number of bytes that were skipped, which can be less than n. // This is not a problemm since read_offset_ is monotonically increasing and // its only use is to figure out if next piece of data should be read from // buffer_ or file_ directly. uint64_t read_offset_; }; } // namespace std::unique_ptr SequentialFileReader::NewReadaheadSequentialFile( std::unique_ptr&& file, size_t readahead_size) { if (file->GetRequiredBufferAlignment() >= readahead_size) { // Short-circuit and return the original file if readahead_size is // too small and hence doesn't make sense to be used for prefetching. return std::move(file); } std::unique_ptr result( new ReadaheadSequentialFile(std::move(file), readahead_size)); return result; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/sequence_file_reader.h000066400000000000000000000046631370372246700204770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" namespace ROCKSDB_NAMESPACE { // SequentialFileReader is a wrapper on top of Env::SequentialFile. It handles // Buffered (i.e when page cache is enabled) and Direct (with O_DIRECT / page // cache disabled) reads appropriately, and also updates the IO stats. class SequentialFileReader { private: std::unique_ptr file_; std::string file_name_; std::atomic offset_{0}; // read offset public: explicit SequentialFileReader(std::unique_ptr&& _file, const std::string& _file_name) : file_(std::move(_file)), file_name_(_file_name) {} explicit SequentialFileReader(std::unique_ptr&& _file, const std::string& _file_name, size_t _readahead_size) : file_(NewReadaheadSequentialFile(std::move(_file), _readahead_size)), file_name_(_file_name) {} SequentialFileReader(SequentialFileReader&& o) ROCKSDB_NOEXCEPT { *this = std::move(o); } SequentialFileReader& operator=(SequentialFileReader&& o) ROCKSDB_NOEXCEPT { file_ = std::move(o.file_); return *this; } SequentialFileReader(const SequentialFileReader&) = delete; SequentialFileReader& operator=(const SequentialFileReader&) = delete; Status Read(size_t n, Slice* result, char* scratch); Status Skip(uint64_t n); FSSequentialFile* file() { return file_.get(); } std::string file_name() { return file_name_; } bool use_direct_io() const { return file_->use_direct_io(); } private: // NewReadaheadSequentialFile provides a wrapper over SequentialFile to // always prefetch additional data with every read. static std::unique_ptr NewReadaheadSequentialFile( std::unique_ptr&& file, size_t readahead_size); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/sst_file_manager_impl.cc000066400000000000000000000462571370372246700210340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "file/sst_file_manager_impl.h" #include #include #include "db/db_impl/db_impl.h" #include "env/composite_env_wrapper.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/sst_file_manager.h" #include "test_util/sync_point.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr fs, std::shared_ptr logger, int64_t rate_bytes_per_sec, double max_trash_db_ratio, uint64_t bytes_max_delete_chunk) : env_(env), fs_(fs), logger_(logger), total_files_size_(0), in_progress_files_size_(0), compaction_buffer_size_(0), cur_compactions_reserved_size_(0), max_allowed_space_(0), delete_scheduler_(env, fs_.get(), rate_bytes_per_sec, logger.get(), this, max_trash_db_ratio, bytes_max_delete_chunk), cv_(&mu_), closing_(false), bg_thread_(nullptr), reserved_disk_buffer_(0), free_space_trigger_(0), cur_instance_(nullptr) {} SstFileManagerImpl::~SstFileManagerImpl() { Close(); } void SstFileManagerImpl::Close() { { MutexLock l(&mu_); if (closing_) { return; } closing_ = true; cv_.SignalAll(); } if (bg_thread_) { bg_thread_->join(); } } Status SstFileManagerImpl::OnAddFile(const std::string& file_path, bool compaction) { uint64_t file_size; Status s = fs_->GetFileSize(file_path, IOOptions(), &file_size, nullptr); if (s.ok()) { MutexLock l(&mu_); OnAddFileImpl(file_path, file_size, compaction); } TEST_SYNC_POINT("SstFileManagerImpl::OnAddFile"); return s; } Status SstFileManagerImpl::OnAddFile(const std::string& file_path, uint64_t file_size, bool compaction) { MutexLock l(&mu_); OnAddFileImpl(file_path, file_size, compaction); TEST_SYNC_POINT("SstFileManagerImpl::OnAddFile"); return Status::OK(); } Status SstFileManagerImpl::OnDeleteFile(const std::string& file_path) { { MutexLock l(&mu_); OnDeleteFileImpl(file_path); } TEST_SYNC_POINT("SstFileManagerImpl::OnDeleteFile"); return Status::OK(); } void SstFileManagerImpl::OnCompactionCompletion(Compaction* c) { MutexLock l(&mu_); uint64_t size_added_by_compaction = 0; for (size_t i = 0; i < c->num_input_levels(); i++) { for (size_t j = 0; j < c->num_input_files(i); j++) { FileMetaData* filemeta = c->input(i, j); size_added_by_compaction += filemeta->fd.GetFileSize(); } } cur_compactions_reserved_size_ -= size_added_by_compaction; auto new_files = c->edit()->GetNewFiles(); for (auto& new_file : new_files) { auto fn = TableFileName(c->immutable_cf_options()->cf_paths, new_file.second.fd.GetNumber(), new_file.second.fd.GetPathId()); if (in_progress_files_.find(fn) != in_progress_files_.end()) { auto tracked_file = tracked_files_.find(fn); assert(tracked_file != tracked_files_.end()); in_progress_files_size_ -= tracked_file->second; in_progress_files_.erase(fn); } } } Status SstFileManagerImpl::OnMoveFile(const std::string& old_path, const std::string& new_path, uint64_t* file_size) { { MutexLock l(&mu_); if (file_size != nullptr) { *file_size = tracked_files_[old_path]; } OnAddFileImpl(new_path, tracked_files_[old_path], false); OnDeleteFileImpl(old_path); } TEST_SYNC_POINT("SstFileManagerImpl::OnMoveFile"); return Status::OK(); } void SstFileManagerImpl::SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) { MutexLock l(&mu_); max_allowed_space_ = max_allowed_space; } void SstFileManagerImpl::SetCompactionBufferSize( uint64_t compaction_buffer_size) { MutexLock l(&mu_); compaction_buffer_size_ = compaction_buffer_size; } bool SstFileManagerImpl::IsMaxAllowedSpaceReached() { MutexLock l(&mu_); if (max_allowed_space_ <= 0) { return false; } return total_files_size_ >= max_allowed_space_; } bool SstFileManagerImpl::IsMaxAllowedSpaceReachedIncludingCompactions() { MutexLock l(&mu_); if (max_allowed_space_ <= 0) { return false; } return total_files_size_ + cur_compactions_reserved_size_ >= max_allowed_space_; } bool SstFileManagerImpl::EnoughRoomForCompaction( ColumnFamilyData* cfd, const std::vector& inputs, Status bg_error) { MutexLock l(&mu_); uint64_t size_added_by_compaction = 0; // First check if we even have the space to do the compaction for (size_t i = 0; i < inputs.size(); i++) { for (size_t j = 0; j < inputs[i].size(); j++) { FileMetaData* filemeta = inputs[i][j]; size_added_by_compaction += filemeta->fd.GetFileSize(); } } // Update cur_compactions_reserved_size_ so concurrent compaction // don't max out space size_t needed_headroom = cur_compactions_reserved_size_ + size_added_by_compaction + compaction_buffer_size_; if (max_allowed_space_ != 0 && (needed_headroom + total_files_size_ > max_allowed_space_)) { return false; } // Implement more aggressive checks only if this DB instance has already // seen a NoSpace() error. This is tin order to contain a single potentially // misbehaving DB instance and prevent it from slowing down compactions of // other DB instances if (CheckFreeSpace() && bg_error == Status::NoSpace()) { auto fn = TableFileName(cfd->ioptions()->cf_paths, inputs[0][0]->fd.GetNumber(), inputs[0][0]->fd.GetPathId()); uint64_t free_space = 0; fs_->GetFreeSpace(fn, IOOptions(), &free_space, nullptr); // needed_headroom is based on current size reserved by compactions, // minus any files created by running compactions as they would count // against the reserved size. If user didn't specify any compaction // buffer, add reserved_disk_buffer_ that's calculated by default so the // compaction doesn't end up leaving nothing for logs and flush SSTs if (compaction_buffer_size_ == 0) { needed_headroom += reserved_disk_buffer_; } needed_headroom -= in_progress_files_size_; if (free_space < needed_headroom + size_added_by_compaction) { // We hit the condition of not enough disk space ROCKS_LOG_ERROR(logger_, "free space [%" PRIu64 " bytes] is less than " "needed headroom [%" ROCKSDB_PRIszt " bytes]\n", free_space, needed_headroom); return false; } } cur_compactions_reserved_size_ += size_added_by_compaction; // Take a snapshot of cur_compactions_reserved_size_ for when we encounter // a NoSpace error. free_space_trigger_ = cur_compactions_reserved_size_; return true; } uint64_t SstFileManagerImpl::GetCompactionsReservedSize() { MutexLock l(&mu_); return cur_compactions_reserved_size_; } uint64_t SstFileManagerImpl::GetTotalSize() { MutexLock l(&mu_); return total_files_size_; } std::unordered_map SstFileManagerImpl::GetTrackedFiles() { MutexLock l(&mu_); return tracked_files_; } int64_t SstFileManagerImpl::GetDeleteRateBytesPerSecond() { return delete_scheduler_.GetRateBytesPerSecond(); } void SstFileManagerImpl::SetDeleteRateBytesPerSecond(int64_t delete_rate) { return delete_scheduler_.SetRateBytesPerSecond(delete_rate); } double SstFileManagerImpl::GetMaxTrashDBRatio() { return delete_scheduler_.GetMaxTrashDBRatio(); } void SstFileManagerImpl::SetMaxTrashDBRatio(double r) { return delete_scheduler_.SetMaxTrashDBRatio(r); } uint64_t SstFileManagerImpl::GetTotalTrashSize() { return delete_scheduler_.GetTotalTrashSize(); } void SstFileManagerImpl::ReserveDiskBuffer(uint64_t size, const std::string& path) { MutexLock l(&mu_); reserved_disk_buffer_ += size; if (path_.empty()) { path_ = path; } } void SstFileManagerImpl::ClearError() { while (true) { MutexLock l(&mu_); if (closing_) { return; } uint64_t free_space = 0; Status s = fs_->GetFreeSpace(path_, IOOptions(), &free_space, nullptr); free_space = max_allowed_space_ > 0 ? std::min(max_allowed_space_, free_space) : free_space; if (s.ok()) { // In case of multi-DB instances, some of them may have experienced a // soft error and some a hard error. In the SstFileManagerImpl, a hard // error will basically override previously reported soft errors. Once // we clear the hard error, we don't keep track of previous errors for // now if (bg_err_.severity() == Status::Severity::kHardError) { if (free_space < reserved_disk_buffer_) { ROCKS_LOG_ERROR(logger_, "free space [%" PRIu64 " bytes] is less than " "required disk buffer [%" PRIu64 " bytes]\n", free_space, reserved_disk_buffer_); ROCKS_LOG_ERROR(logger_, "Cannot clear hard error\n"); s = Status::NoSpace(); } } else if (bg_err_.severity() == Status::Severity::kSoftError) { if (free_space < free_space_trigger_) { ROCKS_LOG_WARN(logger_, "free space [%" PRIu64 " bytes] is less than " "free space for compaction trigger [%" PRIu64 " bytes]\n", free_space, free_space_trigger_); ROCKS_LOG_WARN(logger_, "Cannot clear soft error\n"); s = Status::NoSpace(); } } } // Someone could have called CancelErrorRecovery() and the list could have // become empty, so check again here if (s.ok() && !error_handler_list_.empty()) { auto error_handler = error_handler_list_.front(); // Since we will release the mutex, set cur_instance_ to signal to the // shutdown thread, if it calls // CancelErrorRecovery() the meantime, // to indicate that this DB instance is busy. The DB instance is // guaranteed to not be deleted before RecoverFromBGError() returns, // since the ErrorHandler::recovery_in_prog_ flag would be true cur_instance_ = error_handler; mu_.Unlock(); s = error_handler->RecoverFromBGError(); TEST_SYNC_POINT("SstFileManagerImpl::ErrorCleared"); mu_.Lock(); // The DB instance might have been deleted while we were // waiting for the mutex, so check cur_instance_ to make sure its // still non-null if (cur_instance_) { // Check for error again, since the instance may have recovered but // immediately got another error. If that's the case, and the new // error is also a NoSpace() non-fatal error, leave the instance in // the list Status err = cur_instance_->GetBGError(); if (s.ok() && err == Status::NoSpace() && err.severity() < Status::Severity::kFatalError) { s = err; } cur_instance_ = nullptr; } if (s.ok() || s.IsShutdownInProgress() || (!s.ok() && s.severity() >= Status::Severity::kFatalError)) { // If shutdown is in progress, abandon this handler instance // and continue with the others error_handler_list_.pop_front(); } } if (!error_handler_list_.empty()) { // If there are more instances to be recovered, reschedule after 5 // seconds int64_t wait_until = env_->NowMicros() + 5000000; cv_.TimedWait(wait_until); } // Check again for error_handler_list_ empty, as a DB instance shutdown // could have removed it from the queue while we were in timed wait if (error_handler_list_.empty()) { ROCKS_LOG_INFO(logger_, "Clearing error\n"); bg_err_ = Status::OK(); return; } } } void SstFileManagerImpl::StartErrorRecovery(ErrorHandler* handler, Status bg_error) { MutexLock l(&mu_); if (bg_error.severity() == Status::Severity::kSoftError) { if (bg_err_.ok()) { // Setting bg_err_ basically means we're in degraded mode // Assume that all pending compactions will fail similarly. The trigger // for clearing this condition is set to current compaction reserved // size, so we stop checking disk space available in // EnoughRoomForCompaction once this much free space is available bg_err_ = bg_error; } } else if (bg_error.severity() == Status::Severity::kHardError) { bg_err_ = bg_error; } else { assert(false); } // If this is the first instance of this error, kick of a thread to poll // and recover from this condition if (error_handler_list_.empty()) { error_handler_list_.push_back(handler); // Release lock before calling join. Its ok to do so because // error_handler_list_ is now non-empty, so no other invocation of this // function will execute this piece of code mu_.Unlock(); if (bg_thread_) { bg_thread_->join(); } // Start a new thread. The previous one would have exited. bg_thread_.reset(new port::Thread(&SstFileManagerImpl::ClearError, this)); mu_.Lock(); } else { // Check if this DB instance is already in the list for (auto iter = error_handler_list_.begin(); iter != error_handler_list_.end(); ++iter) { if ((*iter) == handler) { return; } } error_handler_list_.push_back(handler); } } bool SstFileManagerImpl::CancelErrorRecovery(ErrorHandler* handler) { MutexLock l(&mu_); if (cur_instance_ == handler) { // This instance is currently busy attempting to recover // Nullify it so the recovery thread doesn't attempt to access it again cur_instance_ = nullptr; return false; } for (auto iter = error_handler_list_.begin(); iter != error_handler_list_.end(); ++iter) { if ((*iter) == handler) { error_handler_list_.erase(iter); return true; } } return false; } Status SstFileManagerImpl::ScheduleFileDeletion( const std::string& file_path, const std::string& path_to_sync, const bool force_bg) { TEST_SYNC_POINT_CALLBACK("SstFileManagerImpl::ScheduleFileDeletion", const_cast(&file_path)); return delete_scheduler_.DeleteFile(file_path, path_to_sync, force_bg); } void SstFileManagerImpl::WaitForEmptyTrash() { delete_scheduler_.WaitForEmptyTrash(); } void SstFileManagerImpl::OnAddFileImpl(const std::string& file_path, uint64_t file_size, bool compaction) { auto tracked_file = tracked_files_.find(file_path); if (tracked_file != tracked_files_.end()) { // File was added before, we will just update the size assert(!compaction); total_files_size_ -= tracked_file->second; total_files_size_ += file_size; cur_compactions_reserved_size_ -= file_size; } else { total_files_size_ += file_size; if (compaction) { // Keep track of the size of files created by in-progress compactions. // When calculating whether there's enough headroom for new compactions, // this will be subtracted from cur_compactions_reserved_size_. // Otherwise, compactions will be double counted. in_progress_files_size_ += file_size; in_progress_files_.insert(file_path); } } tracked_files_[file_path] = file_size; } void SstFileManagerImpl::OnDeleteFileImpl(const std::string& file_path) { auto tracked_file = tracked_files_.find(file_path); if (tracked_file == tracked_files_.end()) { // File is not tracked assert(in_progress_files_.find(file_path) == in_progress_files_.end()); return; } total_files_size_ -= tracked_file->second; // Check if it belonged to an in-progress compaction if (in_progress_files_.find(file_path) != in_progress_files_.end()) { in_progress_files_size_ -= tracked_file->second; in_progress_files_.erase(file_path); } tracked_files_.erase(tracked_file); } SstFileManager* NewSstFileManager(Env* env, std::shared_ptr info_log, std::string trash_dir, int64_t rate_bytes_per_sec, bool delete_existing_trash, Status* status, double max_trash_db_ratio, uint64_t bytes_max_delete_chunk) { std::shared_ptr fs; if (env == Env::Default()) { fs = FileSystem::Default(); } else { fs.reset(new LegacyFileSystemWrapper(env)); } return NewSstFileManager(env, fs, info_log, trash_dir, rate_bytes_per_sec, delete_existing_trash, status, max_trash_db_ratio, bytes_max_delete_chunk); } SstFileManager* NewSstFileManager(Env* env, std::shared_ptr fs, std::shared_ptr info_log, const std::string& trash_dir, int64_t rate_bytes_per_sec, bool delete_existing_trash, Status* status, double max_trash_db_ratio, uint64_t bytes_max_delete_chunk) { SstFileManagerImpl* res = new SstFileManagerImpl(env, fs, info_log, rate_bytes_per_sec, max_trash_db_ratio, bytes_max_delete_chunk); // trash_dir is deprecated and not needed anymore, but if user passed it // we will still remove files in it. Status s; if (delete_existing_trash && trash_dir != "") { std::vector files_in_trash; s = fs->GetChildren(trash_dir, IOOptions(), &files_in_trash, nullptr); if (s.ok()) { for (const std::string& trash_file : files_in_trash) { if (trash_file == "." || trash_file == "..") { continue; } std::string path_in_trash = trash_dir + "/" + trash_file; res->OnAddFile(path_in_trash); Status file_delete = res->ScheduleFileDeletion(path_in_trash, trash_dir); if (s.ok() && !file_delete.ok()) { s = file_delete; } } } } if (status) { *status = s; } return res; } #else SstFileManager* NewSstFileManager(Env* /*env*/, std::shared_ptr /*info_log*/, std::string /*trash_dir*/, int64_t /*rate_bytes_per_sec*/, bool /*delete_existing_trash*/, Status* status, double /*max_trash_db_ratio*/, uint64_t /*bytes_max_delete_chunk*/) { if (status) { *status = Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE"); } return nullptr; } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/sst_file_manager_impl.h000066400000000000000000000174161370372246700206710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include "port/port.h" #include "db/compaction/compaction.h" #include "db/error_handler.h" #include "file/delete_scheduler.h" #include "rocksdb/file_system.h" #include "rocksdb/sst_file_manager.h" namespace ROCKSDB_NAMESPACE { class Env; class Logger; // SstFileManager is used to track SST files in the DB and control there // deletion rate. // All SstFileManager public functions are thread-safe. class SstFileManagerImpl : public SstFileManager { public: explicit SstFileManagerImpl(Env* env, std::shared_ptr fs, std::shared_ptr logger, int64_t rate_bytes_per_sec, double max_trash_db_ratio, uint64_t bytes_max_delete_chunk); ~SstFileManagerImpl(); // DB will call OnAddFile whenever a new sst file is added. Status OnAddFile(const std::string& file_path, bool compaction = false); // Overload where size of the file is provided by the caller rather than // queried from the filesystem. This is an optimization. Status OnAddFile(const std::string& file_path, uint64_t file_size, bool compaction); // DB will call OnDeleteFile whenever an sst file is deleted. Status OnDeleteFile(const std::string& file_path); // DB will call OnMoveFile whenever an sst file is move to a new path. Status OnMoveFile(const std::string& old_path, const std::string& new_path, uint64_t* file_size = nullptr); // Update the maximum allowed space that should be used by RocksDB, if // the total size of the SST files exceeds max_allowed_space, writes to // RocksDB will fail. // // Setting max_allowed_space to 0 will disable this feature, maximum allowed // space will be infinite (Default value). // // thread-safe. void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override; void SetCompactionBufferSize(uint64_t compaction_buffer_size) override; // Return true if the total size of SST files exceeded the maximum allowed // space usage. // // thread-safe. bool IsMaxAllowedSpaceReached() override; bool IsMaxAllowedSpaceReachedIncludingCompactions() override; // Returns true is there is enough (approximate) space for the specified // compaction. Space is approximate because this function conservatively // estimates how much space is currently being used by compactions (i.e. // if a compaction has started, this function bumps the used space by // the full compaction size). bool EnoughRoomForCompaction(ColumnFamilyData* cfd, const std::vector& inputs, Status bg_error); // Bookkeeping so total_file_sizes_ goes back to normal after compaction // finishes void OnCompactionCompletion(Compaction* c); uint64_t GetCompactionsReservedSize(); // Return the total size of all tracked files. uint64_t GetTotalSize() override; // Return a map containing all tracked files and there corresponding sizes. std::unordered_map GetTrackedFiles() override; // Return delete rate limit in bytes per second. virtual int64_t GetDeleteRateBytesPerSecond() override; // Update the delete rate limit in bytes per second. virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override; // Return trash/DB size ratio where new files will be deleted immediately virtual double GetMaxTrashDBRatio() override; // Update trash/DB size ratio where new files will be deleted immediately virtual void SetMaxTrashDBRatio(double ratio) override; // Return the total size of trash files uint64_t GetTotalTrashSize() override; // Called by each DB instance using this sst file manager to reserve // disk buffer space for recovery from out of space errors void ReserveDiskBuffer(uint64_t buffer, const std::string& path); // Set a flag upon encountering disk full. May enqueue the ErrorHandler // instance for background polling and recovery void StartErrorRecovery(ErrorHandler* db, Status bg_error); // Remove the given Errorhandler instance from the recovery queue. Its // not guaranteed bool CancelErrorRecovery(ErrorHandler* db); // Mark file as trash and schedule it's deletion. If force_bg is set, it // forces the file to be deleting in the background regardless of DB size, // except when rate limited delete is disabled virtual Status ScheduleFileDeletion(const std::string& file_path, const std::string& dir_to_sync, const bool force_bg = false); // Wait for all files being deleteing in the background to finish or for // destructor to be called. virtual void WaitForEmptyTrash(); DeleteScheduler* delete_scheduler() { return &delete_scheduler_; } // Stop the error recovery background thread. This should be called only // once in the object's lifetime, and before the destructor void Close(); void SetStatisticsPtr(const std::shared_ptr& stats) override { stats_ = stats; delete_scheduler_.SetStatisticsPtr(stats); } private: // REQUIRES: mutex locked void OnAddFileImpl(const std::string& file_path, uint64_t file_size, bool compaction); // REQUIRES: mutex locked void OnDeleteFileImpl(const std::string& file_path); void ClearError(); bool CheckFreeSpace() { return bg_err_.severity() == Status::Severity::kSoftError; } Env* env_; std::shared_ptr fs_; std::shared_ptr logger_; // Mutex to protect tracked_files_, total_files_size_ port::Mutex mu_; // The summation of the sizes of all files in tracked_files_ map uint64_t total_files_size_; // The summation of all output files of in-progress compactions uint64_t in_progress_files_size_; // Compactions should only execute if they can leave at least // this amount of buffer space for logs and flushes uint64_t compaction_buffer_size_; // Estimated size of the current ongoing compactions uint64_t cur_compactions_reserved_size_; // A map containing all tracked files and there sizes // file_path => file_size std::unordered_map tracked_files_; // A set of files belonging to in-progress compactions std::unordered_set in_progress_files_; // The maximum allowed space (in bytes) for sst files. uint64_t max_allowed_space_; // DeleteScheduler used to throttle file deletition. DeleteScheduler delete_scheduler_; port::CondVar cv_; // Flag to force error recovery thread to exit bool closing_; // Background error recovery thread std::unique_ptr bg_thread_; // A path in the filesystem corresponding to this SFM. This is used for // calling Env::GetFreeSpace. Posix requires a path in the filesystem std::string path_; // Save the current background error Status bg_err_; // Amount of free disk headroom before allowing recovery from hard errors uint64_t reserved_disk_buffer_; // For soft errors, amount of free disk space before we can allow // compactions to run full throttle. If disk space is below this trigger, // compactions will be gated by free disk space > input size uint64_t free_space_trigger_; // List of database error handler instances tracked by this sst file manager std::list error_handler_list_; // Pointer to ErrorHandler instance that is currently processing recovery ErrorHandler* cur_instance_; std::shared_ptr stats_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/file/writable_file_writer.cc000066400000000000000000000325171370372246700207070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/writable_file_writer.h" #include #include #include "db/version_edit.h" #include "monitoring/histogram.h" #include "monitoring/iostats_context_imp.h" #include "port/port.h" #include "test_util/sync_point.h" #include "util/random.h" #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { IOStatus WritableFileWriter::Append(const Slice& data) { const char* src = data.data(); size_t left = data.size(); IOStatus s; pending_sync_ = true; TEST_KILL_RANDOM("WritableFileWriter::Append:0", rocksdb_kill_odds * REDUCE_ODDS2); // Calculate the checksum of appended data UpdateFileChecksum(data); { IOSTATS_TIMER_GUARD(prepare_write_nanos); TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite"); writable_file_->PrepareWrite(static_cast(GetFileSize()), left, IOOptions(), nullptr); } // See whether we need to enlarge the buffer to avoid the flush if (buf_.Capacity() - buf_.CurrentSize() < left) { for (size_t cap = buf_.Capacity(); cap < max_buffer_size_; // There is still room to increase cap *= 2) { // See whether the next available size is large enough. // Buffer will never be increased to more than max_buffer_size_. size_t desired_capacity = std::min(cap * 2, max_buffer_size_); if (desired_capacity - buf_.CurrentSize() >= left || (use_direct_io() && desired_capacity == max_buffer_size_)) { buf_.AllocateNewBuffer(desired_capacity, true); break; } } } // Flush only when buffered I/O if (!use_direct_io() && (buf_.Capacity() - buf_.CurrentSize()) < left) { if (buf_.CurrentSize() > 0) { s = Flush(); if (!s.ok()) { return s; } } assert(buf_.CurrentSize() == 0); } // We never write directly to disk with direct I/O on. // or we simply use it for its original purpose to accumulate many small // chunks if (use_direct_io() || (buf_.Capacity() >= left)) { while (left > 0) { size_t appended = buf_.Append(src, left); left -= appended; src += appended; if (left > 0) { s = Flush(); if (!s.ok()) { break; } } } } else { // Writing directly to file bypassing the buffer assert(buf_.CurrentSize() == 0); s = WriteBuffered(src, left); } TEST_KILL_RANDOM("WritableFileWriter::Append:1", rocksdb_kill_odds); if (s.ok()) { filesize_ += data.size(); } return s; } IOStatus WritableFileWriter::Pad(const size_t pad_bytes) { assert(pad_bytes < kDefaultPageSize); size_t left = pad_bytes; size_t cap = buf_.Capacity() - buf_.CurrentSize(); // Assume pad_bytes is small compared to buf_ capacity. So we always // use buf_ rather than write directly to file in certain cases like // Append() does. while (left) { size_t append_bytes = std::min(cap, left); buf_.PadWith(append_bytes, 0); left -= append_bytes; if (left > 0) { IOStatus s = Flush(); if (!s.ok()) { return s; } } cap = buf_.Capacity() - buf_.CurrentSize(); } pending_sync_ = true; filesize_ += pad_bytes; return IOStatus::OK(); } IOStatus WritableFileWriter::Close() { // Do not quit immediately on failure the file MUST be closed IOStatus s; // Possible to close it twice now as we MUST close // in __dtor, simply flushing is not enough // Windows when pre-allocating does not fill with zeros // also with unbuffered access we also set the end of data. if (!writable_file_) { return s; } s = Flush(); // flush cache to OS IOStatus interim; // In direct I/O mode we write whole pages so // we need to let the file know where data ends. if (use_direct_io()) { interim = writable_file_->Truncate(filesize_, IOOptions(), nullptr); if (interim.ok()) { interim = writable_file_->Fsync(IOOptions(), nullptr); } if (!interim.ok() && s.ok()) { s = interim; } } TEST_KILL_RANDOM("WritableFileWriter::Close:0", rocksdb_kill_odds); interim = writable_file_->Close(IOOptions(), nullptr); if (!interim.ok() && s.ok()) { s = interim; } writable_file_.reset(); TEST_KILL_RANDOM("WritableFileWriter::Close:1", rocksdb_kill_odds); if (s.ok() && checksum_generator_ != nullptr && !checksum_finalized_) { checksum_generator_->Finalize(); checksum_finalized_ = true; } return s; } // write out the cached data to the OS cache or storage if direct I/O // enabled IOStatus WritableFileWriter::Flush() { IOStatus s; TEST_KILL_RANDOM("WritableFileWriter::Flush:0", rocksdb_kill_odds * REDUCE_ODDS2); if (buf_.CurrentSize() > 0) { if (use_direct_io()) { #ifndef ROCKSDB_LITE if (pending_sync_) { s = WriteDirect(); } #endif // !ROCKSDB_LITE } else { s = WriteBuffered(buf_.BufferStart(), buf_.CurrentSize()); } if (!s.ok()) { return s; } } s = writable_file_->Flush(IOOptions(), nullptr); if (!s.ok()) { return s; } // sync OS cache to disk for every bytes_per_sync_ // TODO: give log file and sst file different options (log // files could be potentially cached in OS for their whole // life time, thus we might not want to flush at all). // We try to avoid sync to the last 1MB of data. For two reasons: // (1) avoid rewrite the same page that is modified later. // (2) for older version of OS, write can block while writing out // the page. // Xfs does neighbor page flushing outside of the specified ranges. We // need to make sure sync range is far from the write offset. if (!use_direct_io() && bytes_per_sync_) { const uint64_t kBytesNotSyncRange = 1024 * 1024; // recent 1MB is not synced. const uint64_t kBytesAlignWhenSync = 4 * 1024; // Align 4KB. if (filesize_ > kBytesNotSyncRange) { uint64_t offset_sync_to = filesize_ - kBytesNotSyncRange; offset_sync_to -= offset_sync_to % kBytesAlignWhenSync; assert(offset_sync_to >= last_sync_size_); if (offset_sync_to > 0 && offset_sync_to - last_sync_size_ >= bytes_per_sync_) { s = RangeSync(last_sync_size_, offset_sync_to - last_sync_size_); last_sync_size_ = offset_sync_to; } } } return s; } std::string WritableFileWriter::GetFileChecksum() { if (checksum_generator_ != nullptr) { assert(checksum_finalized_); return checksum_generator_->GetChecksum(); } else { return kUnknownFileChecksum; } } const char* WritableFileWriter::GetFileChecksumFuncName() const { if (checksum_generator_ != nullptr) { return checksum_generator_->Name(); } else { return kUnknownFileChecksumFuncName; } } IOStatus WritableFileWriter::Sync(bool use_fsync) { IOStatus s = Flush(); if (!s.ok()) { return s; } TEST_KILL_RANDOM("WritableFileWriter::Sync:0", rocksdb_kill_odds); if (!use_direct_io() && pending_sync_) { s = SyncInternal(use_fsync); if (!s.ok()) { return s; } } TEST_KILL_RANDOM("WritableFileWriter::Sync:1", rocksdb_kill_odds); pending_sync_ = false; return IOStatus::OK(); } IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) { if (!writable_file_->IsSyncThreadSafe()) { return IOStatus::NotSupported( "Can't WritableFileWriter::SyncWithoutFlush() because " "WritableFile::IsSyncThreadSafe() is false"); } TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:1"); IOStatus s = SyncInternal(use_fsync); TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:2"); return s; } IOStatus WritableFileWriter::SyncInternal(bool use_fsync) { IOStatus s; IOSTATS_TIMER_GUARD(fsync_nanos); TEST_SYNC_POINT("WritableFileWriter::SyncInternal:0"); auto prev_perf_level = GetPerfLevel(); IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_); if (use_fsync) { s = writable_file_->Fsync(IOOptions(), nullptr); } else { s = writable_file_->Sync(IOOptions(), nullptr); } SetPerfLevel(prev_perf_level); return s; } IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) { IOSTATS_TIMER_GUARD(range_sync_nanos); TEST_SYNC_POINT("WritableFileWriter::RangeSync:0"); return writable_file_->RangeSync(offset, nbytes, IOOptions(), nullptr); } // This method writes to disk the specified data and makes use of the rate // limiter if available IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) { IOStatus s; assert(!use_direct_io()); const char* src = data; size_t left = size; while (left > 0) { size_t allowed; if (rate_limiter_ != nullptr) { allowed = rate_limiter_->RequestToken( left, 0 /* alignment */, writable_file_->GetIOPriority(), stats_, RateLimiter::OpType::kWrite); } else { allowed = left; } { IOSTATS_TIMER_GUARD(write_nanos); TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); #ifndef ROCKSDB_LITE FileOperationInfo::TimePoint start_ts; uint64_t old_size = writable_file_->GetFileSize(IOOptions(), nullptr); if (ShouldNotifyListeners()) { start_ts = std::chrono::system_clock::now(); old_size = next_write_offset_; } #endif { auto prev_perf_level = GetPerfLevel(); IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_); s = writable_file_->Append(Slice(src, allowed), IOOptions(), nullptr); SetPerfLevel(prev_perf_level); } #ifndef ROCKSDB_LITE if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::system_clock::now(); NotifyOnFileWriteFinish(old_size, allowed, start_ts, finish_ts, s); } #endif if (!s.ok()) { return s; } } IOSTATS_ADD(bytes_written, allowed); TEST_KILL_RANDOM("WritableFileWriter::WriteBuffered:0", rocksdb_kill_odds); left -= allowed; src += allowed; } buf_.Size(0); return s; } void WritableFileWriter::UpdateFileChecksum(const Slice& data) { if (checksum_generator_ != nullptr) { checksum_generator_->Update(data.data(), data.size()); } } // This flushes the accumulated data in the buffer. We pad data with zeros if // necessary to the whole page. // However, during automatic flushes padding would not be necessary. // We always use RateLimiter if available. We move (Refit) any buffer bytes // that are left over the // whole number of pages to be written again on the next flush because we can // only write on aligned // offsets. #ifndef ROCKSDB_LITE IOStatus WritableFileWriter::WriteDirect() { assert(use_direct_io()); IOStatus s; const size_t alignment = buf_.Alignment(); assert((next_write_offset_ % alignment) == 0); // Calculate whole page final file advance if all writes succeed size_t file_advance = TruncateToPageBoundary(alignment, buf_.CurrentSize()); // Calculate the leftover tail, we write it here padded with zeros BUT we // will write // it again in the future either on Close() OR when the current whole page // fills out size_t leftover_tail = buf_.CurrentSize() - file_advance; // Round up and pad buf_.PadToAlignmentWith(0); const char* src = buf_.BufferStart(); uint64_t write_offset = next_write_offset_; size_t left = buf_.CurrentSize(); while (left > 0) { // Check how much is allowed size_t size; if (rate_limiter_ != nullptr) { size = rate_limiter_->RequestToken(left, buf_.Alignment(), writable_file_->GetIOPriority(), stats_, RateLimiter::OpType::kWrite); } else { size = left; } { IOSTATS_TIMER_GUARD(write_nanos); TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); FileOperationInfo::TimePoint start_ts; if (ShouldNotifyListeners()) { start_ts = std::chrono::system_clock::now(); } // direct writes must be positional s = writable_file_->PositionedAppend(Slice(src, size), write_offset, IOOptions(), nullptr); if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::system_clock::now(); NotifyOnFileWriteFinish(write_offset, size, start_ts, finish_ts, s); } if (!s.ok()) { buf_.Size(file_advance + leftover_tail); return s; } } IOSTATS_ADD(bytes_written, size); left -= size; src += size; write_offset += size; assert((next_write_offset_ % alignment) == 0); } if (s.ok()) { // Move the tail to the beginning of the buffer // This never happens during normal Append but rather during // explicit call to Flush()/Sync() or Close() buf_.RefitTail(file_advance, leftover_tail); // This is where we start writing next time which may or not be // the actual file size on disk. They match if the buffer size // is a multiple of whole pages otherwise filesize_ is leftover_tail // behind next_write_offset_ += file_advance; } return s; } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/file/writable_file_writer.h000066400000000000000000000140051370372246700205410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "db/version_edit.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/file_checksum.h" #include "rocksdb/file_system.h" #include "rocksdb/io_status.h" #include "rocksdb/listener.h" #include "rocksdb/rate_limiter.h" #include "test_util/sync_point.h" #include "util/aligned_buffer.h" namespace ROCKSDB_NAMESPACE { class Statistics; // WritableFileWriter is a wrapper on top of Env::WritableFile. It provides // facilities to: // - Handle Buffered and Direct writes. // - Rate limit writes. // - Flush and Sync the data to the underlying filesystem. // - Notify any interested listeners on the completion of a write. // - Update IO stats. class WritableFileWriter { private: #ifndef ROCKSDB_LITE void NotifyOnFileWriteFinish(uint64_t offset, size_t length, const FileOperationInfo::TimePoint& start_ts, const FileOperationInfo::TimePoint& finish_ts, const IOStatus& io_status) { FileOperationInfo info(file_name_, start_ts, finish_ts); info.offset = offset; info.length = length; info.status = io_status; for (auto& listener : listeners_) { listener->OnFileWriteFinish(info); } info.status.PermitUncheckedError(); } #endif // ROCKSDB_LITE bool ShouldNotifyListeners() const { return !listeners_.empty(); } void UpdateFileChecksum(const Slice& data); std::unique_ptr writable_file_; std::string file_name_; Env* env_; AlignedBuffer buf_; size_t max_buffer_size_; // Actually written data size can be used for truncate // not counting padding data uint64_t filesize_; #ifndef ROCKSDB_LITE // This is necessary when we use unbuffered access // and writes must happen on aligned offsets // so we need to go back and write that page again uint64_t next_write_offset_; #endif // ROCKSDB_LITE bool pending_sync_; uint64_t last_sync_size_; uint64_t bytes_per_sync_; RateLimiter* rate_limiter_; Statistics* stats_; std::vector> listeners_; std::unique_ptr checksum_generator_; bool checksum_finalized_; public: WritableFileWriter( std::unique_ptr&& file, const std::string& _file_name, const FileOptions& options, Env* env = nullptr, Statistics* stats = nullptr, const std::vector>& listeners = {}, FileChecksumGenFactory* file_checksum_gen_factory = nullptr) : writable_file_(std::move(file)), file_name_(_file_name), env_(env), buf_(), max_buffer_size_(options.writable_file_max_buffer_size), filesize_(0), #ifndef ROCKSDB_LITE next_write_offset_(0), #endif // ROCKSDB_LITE pending_sync_(false), last_sync_size_(0), bytes_per_sync_(options.bytes_per_sync), rate_limiter_(options.rate_limiter), stats_(stats), listeners_(), checksum_generator_(nullptr), checksum_finalized_(false) { TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0", reinterpret_cast(max_buffer_size_)); buf_.Alignment(writable_file_->GetRequiredBufferAlignment()); buf_.AllocateNewBuffer(std::min((size_t)65536, max_buffer_size_)); #ifndef ROCKSDB_LITE std::for_each(listeners.begin(), listeners.end(), [this](const std::shared_ptr& e) { if (e->ShouldBeNotifiedOnFileIO()) { listeners_.emplace_back(e); } }); #else // !ROCKSDB_LITE (void)listeners; #endif if (file_checksum_gen_factory != nullptr) { FileChecksumGenContext checksum_gen_context; checksum_gen_context.file_name = _file_name; checksum_generator_ = file_checksum_gen_factory->CreateFileChecksumGenerator( checksum_gen_context); } } WritableFileWriter(const WritableFileWriter&) = delete; WritableFileWriter& operator=(const WritableFileWriter&) = delete; ~WritableFileWriter() { auto s = Close(); s.PermitUncheckedError(); } std::string file_name() const { return file_name_; } IOStatus Append(const Slice& data); IOStatus Pad(const size_t pad_bytes); IOStatus Flush(); IOStatus Close(); IOStatus Sync(bool use_fsync); // Sync only the data that was already Flush()ed. Safe to call concurrently // with Append() and Flush(). If !writable_file_->IsSyncThreadSafe(), // returns NotSupported status. IOStatus SyncWithoutFlush(bool use_fsync); uint64_t GetFileSize() const { return filesize_; } IOStatus InvalidateCache(size_t offset, size_t length) { return writable_file_->InvalidateCache(offset, length); } FSWritableFile* writable_file() const { return writable_file_.get(); } bool use_direct_io() { return writable_file_->use_direct_io(); } bool TEST_BufferIsEmpty() { return buf_.CurrentSize() == 0; } void TEST_SetFileChecksumGenerator( FileChecksumGenerator* checksum_generator) { checksum_generator_.reset(checksum_generator); } std::string GetFileChecksum(); const char* GetFileChecksumFuncName() const; private: // Used when os buffering is OFF and we are writing // DMA such as in Direct I/O mode #ifndef ROCKSDB_LITE IOStatus WriteDirect(); #endif // !ROCKSDB_LITE // Normal write IOStatus WriteBuffered(const char* data, size_t size); IOStatus RangeSync(uint64_t offset, uint64_t nbytes); IOStatus SyncInternal(bool use_fsync); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/hdfs/000077500000000000000000000000001370372246700141715ustar00rootroot00000000000000rocksdb-6.11.4/hdfs/README000066400000000000000000000016621370372246700150560ustar00rootroot00000000000000This directory contains the hdfs extensions needed to make rocksdb store files in HDFS. It has been compiled and testing against CDH 4.4 (2.0.0+1475-1.cdh4.4.0.p0.23~precise-cdh4.4.0). The configuration assumes that packages libhdfs0, libhdfs0-dev are installed which basically means that hdfs.h is in /usr/include and libhdfs in /usr/lib The env_hdfs.h file defines the rocksdb objects that are needed to talk to an underlying filesystem. If you want to compile rocksdb with hdfs support, please set the following environment variables appropriately (also defined in setup.sh for convenience) USE_HDFS=1 JAVA_HOME=/usr/local/jdk-7u79-64 LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/jdk-7u79-64/jre/lib/amd64/server:/usr/local/jdk-7u79-64/jre/lib/amd64/:./snappy/libs make clean all db_bench To run dbbench, set CLASSPATH to include your hadoop distribution db_bench --hdfs="hdfs://hbaseudbperf001.snc1.facebook.com:9000" rocksdb-6.11.4/hdfs/env_hdfs.h000066400000000000000000000300341370372246700161360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include #include #include #include "port/sys_time.h" #include "rocksdb/env.h" #include "rocksdb/status.h" #ifdef USE_HDFS #include namespace ROCKSDB_NAMESPACE { // Thrown during execution when there is an issue with the supplied // arguments. class HdfsUsageException : public std::exception { }; // A simple exception that indicates something went wrong that is not // recoverable. The intention is for the message to be printed (with // nothing else) and the process terminate. class HdfsFatalException : public std::exception { public: explicit HdfsFatalException(const std::string& s) : what_(s) { } virtual ~HdfsFatalException() throw() { } virtual const char* what() const throw() { return what_.c_str(); } private: const std::string what_; }; // // The HDFS environment for rocksdb. This class overrides all the // file/dir access methods and delegates the thread-mgmt methods to the // default posix environment. // class HdfsEnv : public Env { public: explicit HdfsEnv(const std::string& fsname) : fsname_(fsname) { posixEnv = Env::Default(); fileSys_ = connectToPath(fsname_); } virtual ~HdfsEnv() { fprintf(stderr, "Destroying HdfsEnv::Default()\n"); hdfsDisconnect(fileSys_); } Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; Status NewDirectory(const std::string& name, std::unique_ptr* result) override; Status FileExists(const std::string& fname) override; Status GetChildren(const std::string& path, std::vector* result) override; Status DeleteFile(const std::string& fname) override; Status CreateDir(const std::string& name) override; Status CreateDirIfMissing(const std::string& name) override; Status DeleteDir(const std::string& name) override; Status GetFileSize(const std::string& fname, uint64_t* size) override; Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) override; Status RenameFile(const std::string& src, const std::string& target) override; Status LinkFile(const std::string& /*src*/, const std::string& /*target*/) override { return Status::NotSupported(); // not supported } Status LockFile(const std::string& fname, FileLock** lock) override; Status UnlockFile(FileLock* lock) override; Status NewLogger(const std::string& fname, std::shared_ptr* result) override; Status IsDirectory(const std::string& path, bool* is_dir) override; void Schedule(void (*function)(void* arg), void* arg, Priority pri = LOW, void* tag = nullptr, void (*unschedFunction)(void* arg) = 0) override { posixEnv->Schedule(function, arg, pri, tag, unschedFunction); } int UnSchedule(void* tag, Priority pri) override { return posixEnv->UnSchedule(tag, pri); } void StartThread(void (*function)(void* arg), void* arg) override { posixEnv->StartThread(function, arg); } void WaitForJoin() override { posixEnv->WaitForJoin(); } unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override { return posixEnv->GetThreadPoolQueueLen(pri); } Status GetTestDirectory(std::string* path) override { return posixEnv->GetTestDirectory(path); } uint64_t NowMicros() override { return posixEnv->NowMicros(); } void SleepForMicroseconds(int micros) override { posixEnv->SleepForMicroseconds(micros); } Status GetHostName(char* name, uint64_t len) override { return posixEnv->GetHostName(name, len); } Status GetCurrentTime(int64_t* unix_time) override { return posixEnv->GetCurrentTime(unix_time); } Status GetAbsolutePath(const std::string& db_path, std::string* output_path) override { return posixEnv->GetAbsolutePath(db_path, output_path); } void SetBackgroundThreads(int number, Priority pri = LOW) override { posixEnv->SetBackgroundThreads(number, pri); } int GetBackgroundThreads(Priority pri = LOW) override { return posixEnv->GetBackgroundThreads(pri); } void IncBackgroundThreadsIfNeeded(int number, Priority pri) override { posixEnv->IncBackgroundThreadsIfNeeded(number, pri); } std::string TimeToString(uint64_t number) override { return posixEnv->TimeToString(number); } static uint64_t gettid() { assert(sizeof(pthread_t) <= sizeof(uint64_t)); return (uint64_t)pthread_self(); } uint64_t GetThreadID() const override { return HdfsEnv::gettid(); } private: std::string fsname_; // string of the form "hdfs://hostname:port/" hdfsFS fileSys_; // a single FileSystem object for all files Env* posixEnv; // This object is derived from Env, but not from // posixEnv. We have posixnv as an encapsulated // object here so that we can use posix timers, // posix threads, etc. static const std::string kProto; static const std::string pathsep; /** * If the URI is specified of the form hdfs://server:port/path, * then connect to the specified cluster * else connect to default. */ hdfsFS connectToPath(const std::string& uri) { if (uri.empty()) { return nullptr; } if (uri.find(kProto) != 0) { // uri doesn't start with hdfs:// -> use default:0, which is special // to libhdfs. return hdfsConnectNewInstance("default", 0); } const std::string hostport = uri.substr(kProto.length()); std::vector parts; split(hostport, ':', parts); if (parts.size() != 2) { throw HdfsFatalException("Bad uri for hdfs " + uri); } // parts[0] = hosts, parts[1] = port/xxx/yyy std::string host(parts[0]); std::string remaining(parts[1]); int rem = static_cast(remaining.find(pathsep)); std::string portStr = (rem == 0 ? remaining : remaining.substr(0, rem)); tPort port; port = atoi(portStr.c_str()); if (port == 0) { throw HdfsFatalException("Bad host-port for hdfs " + uri); } hdfsFS fs = hdfsConnectNewInstance(host.c_str(), port); return fs; } void split(const std::string &s, char delim, std::vector &elems) { elems.clear(); size_t prev = 0; size_t pos = s.find(delim); while (pos != std::string::npos) { elems.push_back(s.substr(prev, pos)); prev = pos + 1; pos = s.find(delim, prev); } elems.push_back(s.substr(prev, s.size())); } }; } // namespace ROCKSDB_NAMESPACE #else // USE_HDFS namespace ROCKSDB_NAMESPACE { class HdfsEnv : public Env { public: explicit HdfsEnv(const std::string& /*fsname*/) { fprintf(stderr, "You have not build rocksdb with HDFS support\n"); fprintf(stderr, "Please see hdfs/README for details\n"); abort(); } virtual ~HdfsEnv() { } virtual Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; virtual Status NewRandomAccessFile( const std::string& /*fname*/, std::unique_ptr* /*result*/, const EnvOptions& /*options*/) override { return Status::NotSupported(); } virtual Status NewWritableFile(const std::string& /*fname*/, std::unique_ptr* /*result*/, const EnvOptions& /*options*/) override { return Status::NotSupported(); } virtual Status NewDirectory(const std::string& /*name*/, std::unique_ptr* /*result*/) override { return Status::NotSupported(); } virtual Status FileExists(const std::string& /*fname*/) override { return Status::NotSupported(); } virtual Status GetChildren(const std::string& /*path*/, std::vector* /*result*/) override { return Status::NotSupported(); } virtual Status DeleteFile(const std::string& /*fname*/) override { return Status::NotSupported(); } virtual Status CreateDir(const std::string& /*name*/) override { return Status::NotSupported(); } virtual Status CreateDirIfMissing(const std::string& /*name*/) override { return Status::NotSupported(); } virtual Status DeleteDir(const std::string& /*name*/) override { return Status::NotSupported(); } virtual Status GetFileSize(const std::string& /*fname*/, uint64_t* /*size*/) override { return Status::NotSupported(); } virtual Status GetFileModificationTime(const std::string& /*fname*/, uint64_t* /*time*/) override { return Status::NotSupported(); } virtual Status RenameFile(const std::string& /*src*/, const std::string& /*target*/) override { return Status::NotSupported(); } virtual Status LinkFile(const std::string& /*src*/, const std::string& /*target*/) override { return Status::NotSupported(); } virtual Status LockFile(const std::string& /*fname*/, FileLock** /*lock*/) override { return Status::NotSupported(); } virtual Status UnlockFile(FileLock* /*lock*/) override { return Status::NotSupported(); } virtual Status NewLogger(const std::string& /*fname*/, std::shared_ptr* /*result*/) override { return Status::NotSupported(); } Status IsDirectory(const std::string& /*path*/, bool* /*is_dir*/) override { return Status::NotSupported(); } virtual void Schedule(void (* /*function*/)(void* arg), void* /*arg*/, Priority /*pri*/ = LOW, void* /*tag*/ = nullptr, void (* /*unschedFunction*/)(void* arg) = 0) override {} virtual int UnSchedule(void* /*tag*/, Priority /*pri*/) override { return 0; } virtual void StartThread(void (* /*function*/)(void* arg), void* /*arg*/) override {} virtual void WaitForJoin() override {} virtual unsigned int GetThreadPoolQueueLen( Priority /*pri*/ = LOW) const override { return 0; } virtual Status GetTestDirectory(std::string* /*path*/) override { return Status::NotSupported(); } virtual uint64_t NowMicros() override { return 0; } virtual void SleepForMicroseconds(int /*micros*/) override {} virtual Status GetHostName(char* /*name*/, uint64_t /*len*/) override { return Status::NotSupported(); } virtual Status GetCurrentTime(int64_t* /*unix_time*/) override { return Status::NotSupported(); } virtual Status GetAbsolutePath(const std::string& /*db_path*/, std::string* /*outputpath*/) override { return Status::NotSupported(); } virtual void SetBackgroundThreads(int /*number*/, Priority /*pri*/ = LOW) override {} virtual int GetBackgroundThreads(Priority /*pri*/ = LOW) override { return 0; } virtual void IncBackgroundThreadsIfNeeded(int /*number*/, Priority /*pri*/) override {} virtual std::string TimeToString(uint64_t /*number*/) override { return ""; } virtual uint64_t GetThreadID() const override { return 0; } }; } // namespace ROCKSDB_NAMESPACE #endif // USE_HDFS rocksdb-6.11.4/hdfs/setup.sh000077500000000000000000000010431370372246700156660ustar00rootroot00000000000000# shellcheck disable=SC2148 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. export USE_HDFS=1 export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64/server:$JAVA_HOME/jre/lib/amd64:$HADOOP_HOME/lib/native export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob` for f in `find /usr/lib/hadoop-hdfs | grep jar`; do export CLASSPATH=$CLASSPATH:$f; done for f in `find /usr/lib/hadoop | grep jar`; do export CLASSPATH=$CLASSPATH:$f; done for f in `find /usr/lib/hadoop/client | grep jar`; do export CLASSPATH=$CLASSPATH:$f; done rocksdb-6.11.4/include/000077500000000000000000000000001370372246700146705ustar00rootroot00000000000000rocksdb-6.11.4/include/rocksdb/000077500000000000000000000000001370372246700163175ustar00rootroot00000000000000rocksdb-6.11.4/include/rocksdb/advanced_options.h000066400000000000000000001000531370372246700220070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/memtablerep.h" #include "rocksdb/universal_compaction.h" namespace ROCKSDB_NAMESPACE { class Slice; class SliceTransform; enum CompressionType : unsigned char; class TablePropertiesCollectorFactory; class TableFactory; struct Options; enum CompactionStyle : char { // level based compaction style kCompactionStyleLevel = 0x0, // Universal compaction style // Not supported in ROCKSDB_LITE. kCompactionStyleUniversal = 0x1, // FIFO compaction style // Not supported in ROCKSDB_LITE kCompactionStyleFIFO = 0x2, // Disable background compaction. Compaction jobs are submitted // via CompactFiles(). // Not supported in ROCKSDB_LITE kCompactionStyleNone = 0x3, }; // In Level-based compaction, it Determines which file from a level to be // picked to merge to the next level. We suggest people try // kMinOverlappingRatio first when you tune your database. enum CompactionPri : char { // Slightly prioritize larger files by size compensated by #deletes kByCompensatedSize = 0x0, // First compact files whose data's latest update time is oldest. // Try this if you only update some hot keys in small ranges. kOldestLargestSeqFirst = 0x1, // First compact files whose range hasn't been compacted to the next level // for the longest. If your updates are random across the key space, // write amplification is slightly better with this option. kOldestSmallestSeqFirst = 0x2, // First compact files whose ratio between overlapping size in next level // and its size is the smallest. It in many cases can optimize write // amplification. kMinOverlappingRatio = 0x3, }; struct CompactionOptionsFIFO { // once the total sum of table files reaches this, we will delete the oldest // table file // Default: 1GB uint64_t max_table_files_size; // If true, try to do compaction to compact smaller files into larger ones. // Minimum files to compact follows options.level0_file_num_compaction_trigger // and compaction won't trigger if average compact bytes per del file is // larger than options.write_buffer_size. This is to protect large files // from being compacted again. // Default: false; bool allow_compaction = false; CompactionOptionsFIFO() : max_table_files_size(1 * 1024 * 1024 * 1024) {} CompactionOptionsFIFO(uint64_t _max_table_files_size, bool _allow_compaction) : max_table_files_size(_max_table_files_size), allow_compaction(_allow_compaction) {} }; // Compression options for different compression algorithms like Zlib struct CompressionOptions { // RocksDB's generic default compression level. Internally it'll be translated // to the default compression level specific to the library being used (see // comment above `ColumnFamilyOptions::compression`). // // The default value is the max 16-bit int as it'll be written out in OPTIONS // file, which should be portable. const static int kDefaultCompressionLevel = 32767; int window_bits; int level; int strategy; // Maximum size of dictionaries used to prime the compression library. // Enabling dictionary can improve compression ratios when there are // repetitions across data blocks. // // The dictionary is created by sampling the SST file data. If // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's // dictionary generator. Otherwise, the random samples are used directly as // the dictionary. // // When compression dictionary is disabled, we compress and write each block // before buffering data for the next one. When compression dictionary is // enabled, we buffer all SST file data in-memory so we can sample it, as data // can only be compressed and written after the dictionary has been finalized. // So users of this feature may see increased memory usage. // // Default: 0. uint32_t max_dict_bytes; // Maximum size of training data passed to zstd's dictionary trainer. Using // zstd's dictionary trainer can achieve even better compression ratio // improvements than using `max_dict_bytes` alone. // // The training data will be used to generate a dictionary of max_dict_bytes. // // Default: 0. uint32_t zstd_max_train_bytes; // Number of threads for parallel compression. // Parallel compression is enabled only if threads > 1. // THE FEATURE IS STILL EXPERIMENTAL // // This option is valid only when BlockBasedTable is used. // // When parallel compression is enabled, SST size file sizes might be // more inflated compared to the target size, because more data of unknown // compressed size is in flight when compression is parallelized. To be // reasonably accurate, this inflation is also estimated by using historical // compression ratio and current bytes inflight. // // Default: 1. uint32_t parallel_threads; // When the compression options are set by the user, it will be set to "true". // For bottommost_compression_opts, to enable it, user must set enabled=true. // Otherwise, bottommost compression will use compression_opts as default // compression options. // // For compression_opts, if compression_opts.enabled=false, it is still // used as compression options for compression process. // // Default: false. bool enabled; CompressionOptions() : window_bits(-14), level(kDefaultCompressionLevel), strategy(0), max_dict_bytes(0), zstd_max_train_bytes(0), parallel_threads(1), enabled(false) {} CompressionOptions(int wbits, int _lev, int _strategy, int _max_dict_bytes, int _zstd_max_train_bytes, int _parallel_threads, bool _enabled) : window_bits(wbits), level(_lev), strategy(_strategy), max_dict_bytes(_max_dict_bytes), zstd_max_train_bytes(_zstd_max_train_bytes), parallel_threads(_parallel_threads), enabled(_enabled) {} }; enum UpdateStatus { // Return status For inplace update callback UPDATE_FAILED = 0, // Nothing to update UPDATED_INPLACE = 1, // Value updated inplace UPDATED = 2, // No inplace update. Merged value set }; struct AdvancedColumnFamilyOptions { // The maximum number of write buffers that are built up in memory. // The default and the minimum number is 2, so that when 1 write buffer // is being flushed to storage, new writes can continue to the other // write buffer. // If max_write_buffer_number > 3, writing will be slowed down to // options.delayed_write_rate if we are writing to the last write buffer // allowed. // // Default: 2 // // Dynamically changeable through SetOptions() API int max_write_buffer_number = 2; // The minimum number of write buffers that will be merged together // before writing to storage. If set to 1, then // all write buffers are flushed to L0 as individual files and this increases // read amplification because a get request has to check in all of these // files. Also, an in-memory merge may result in writing lesser // data to storage if there are duplicate records in each of these // individual write buffers. Default: 1 int min_write_buffer_number_to_merge = 1; // DEPRECATED // The total maximum number of write buffers to maintain in memory including // copies of buffers that have already been flushed. Unlike // max_write_buffer_number, this parameter does not affect flushing. // This parameter is being replaced by max_write_buffer_size_to_maintain. // If both parameters are set to non-zero values, this parameter will be // ignored. int max_write_buffer_number_to_maintain = 0; // The total maximum size(bytes) of write buffers to maintain in memory // including copies of buffers that have already been flushed. This parameter // only affects trimming of flushed buffers and does not affect flushing. // This controls the maximum amount of write history that will be available // in memory for conflict checking when Transactions are used. The actual // size of write history (flushed Memtables) might be higher than this limit // if further trimming will reduce write history total size below this // limit. For example, if max_write_buffer_size_to_maintain is set to 64MB, // and there are three flushed Memtables, with sizes of 32MB, 20MB, 20MB. // Because trimming the next Memtable of size 20MB will reduce total memory // usage to 52MB which is below the limit, RocksDB will stop trimming. // // When using an OptimisticTransactionDB: // If this value is too low, some transactions may fail at commit time due // to not being able to determine whether there were any write conflicts. // // When using a TransactionDB: // If Transaction::SetSnapshot is used, TransactionDB will read either // in-memory write buffers or SST files to do write-conflict checking. // Increasing this value can reduce the number of reads to SST files // done for conflict detection. // // Setting this value to 0 will cause write buffers to be freed immediately // after they are flushed. If this value is set to -1, // 'max_write_buffer_number * write_buffer_size' will be used. // // Default: // If using a TransactionDB/OptimisticTransactionDB, the default value will // be set to the value of 'max_write_buffer_number * write_buffer_size' // if it is not explicitly set by the user. Otherwise, the default is 0. int64_t max_write_buffer_size_to_maintain = 0; // Allows thread-safe inplace updates. If this is true, there is no way to // achieve point-in-time consistency using snapshot or iterator (assuming // concurrent updates). Hence iterator and multi-get will return results // which are not consistent as of any point-in-time. // If inplace_callback function is not set, // Put(key, new_value) will update inplace the existing_value iff // * key exists in current memtable // * new sizeof(new_value) <= sizeof(existing_value) // * existing_value for that key is a put i.e. kTypeValue // If inplace_callback function is set, check doc for inplace_callback. // Default: false. bool inplace_update_support = false; // Number of locks used for inplace update // Default: 10000, if inplace_update_support = true, else 0. // // Dynamically changeable through SetOptions() API size_t inplace_update_num_locks = 10000; // existing_value - pointer to previous value (from both memtable and sst). // nullptr if key doesn't exist // existing_value_size - pointer to size of existing_value). // nullptr if key doesn't exist // delta_value - Delta value to be merged with the existing_value. // Stored in transaction logs. // merged_value - Set when delta is applied on the previous value. // Applicable only when inplace_update_support is true, // this callback function is called at the time of updating the memtable // as part of a Put operation, lets say Put(key, delta_value). It allows the // 'delta_value' specified as part of the Put operation to be merged with // an 'existing_value' of the key in the database. // If the merged value is smaller in size that the 'existing_value', // then this function can update the 'existing_value' buffer inplace and // the corresponding 'existing_value'_size pointer, if it wishes to. // The callback should return UpdateStatus::UPDATED_INPLACE. // In this case. (In this case, the snapshot-semantics of the rocksdb // Iterator is not atomic anymore). // If the merged value is larger in size than the 'existing_value' or the // application does not wish to modify the 'existing_value' buffer inplace, // then the merged value should be returned via *merge_value. It is set by // merging the 'existing_value' and the Put 'delta_value'. The callback should // return UpdateStatus::UPDATED in this case. This merged value will be added // to the memtable. // If merging fails or the application does not wish to take any action, // then the callback should return UpdateStatus::UPDATE_FAILED. // Please remember that the original call from the application is Put(key, // delta_value). So the transaction log (if enabled) will still contain (key, // delta_value). The 'merged_value' is not stored in the transaction log. // Hence the inplace_callback function should be consistent across db reopens. // Default: nullptr UpdateStatus (*inplace_callback)(char* existing_value, uint32_t* existing_value_size, Slice delta_value, std::string* merged_value) = nullptr; // if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, // create prefix bloom for memtable with the size of // write_buffer_size * memtable_prefix_bloom_size_ratio. // If it is larger than 0.25, it is sanitized to 0.25. // // Default: 0 (disable) // // Dynamically changeable through SetOptions() API double memtable_prefix_bloom_size_ratio = 0.0; // Enable whole key bloom filter in memtable. Note this will only take effect // if memtable_prefix_bloom_size_ratio is not 0. Enabling whole key filtering // can potentially reduce CPU usage for point-look-ups. // // Default: false (disable) // // Dynamically changeable through SetOptions() API bool memtable_whole_key_filtering = false; // Page size for huge page for the arena used by the memtable. If <=0, it // won't allocate from huge page but from malloc. // Users are responsible to reserve huge pages for it to be allocated. For // example: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt // If there isn't enough free huge page available, it will fall back to // malloc. // // Dynamically changeable through SetOptions() API size_t memtable_huge_page_size = 0; // If non-nullptr, memtable will use the specified function to extract // prefixes for keys, and for each prefix maintain a hint of insert location // to reduce CPU usage for inserting keys with the prefix. Keys out of // domain of the prefix extractor will be insert without using hints. // // Currently only the default skiplist based memtable implements the feature. // All other memtable implementation will ignore the option. It incurs ~250 // additional bytes of memory overhead to store a hint for each prefix. // Also concurrent writes (when allow_concurrent_memtable_write is true) will // ignore the option. // // The option is best suited for workloads where keys will likely to insert // to a location close the last inserted key with the same prefix. // One example could be inserting keys of the form (prefix + timestamp), // and keys of the same prefix always comes in with time order. Another // example would be updating the same key over and over again, in which case // the prefix can be the key itself. // // Default: nullptr (disable) std::shared_ptr memtable_insert_with_hint_prefix_extractor = nullptr; // Control locality of bloom filter probes to improve CPU cache hit rate. // This option now only applies to plaintable prefix bloom. This // optimization is turned off when set to 0, and positive number to turn // it on. // Default: 0 uint32_t bloom_locality = 0; // size of one block in arena memory allocation. // If <= 0, a proper value is automatically calculated (usually 1/8 of // writer_buffer_size, rounded up to a multiple of 4KB). // // There are two additional restriction of the specified size: // (1) size should be in the range of [4096, 2 << 30] and // (2) be the multiple of the CPU word (which helps with the memory // alignment). // // We'll automatically check and adjust the size number to make sure it // conforms to the restrictions. // // Default: 0 // // Dynamically changeable through SetOptions() API size_t arena_block_size = 0; // Different levels can have different compression policies. There // are cases where most lower levels would like to use quick compression // algorithms while the higher levels (which have more data) use // compression algorithms that have better compression but could // be slower. This array, if non-empty, should have an entry for // each level of the database; these override the value specified in // the previous field 'compression'. // // NOTICE if level_compaction_dynamic_level_bytes=true, // compression_per_level[0] still determines L0, but other elements // of the array are based on base level (the level L0 files are merged // to), and may not match the level users see from info log for metadata. // If L0 files are merged to level-n, then, for i>0, compression_per_level[i] // determines compaction type for level n+i-1. // For example, if we have three 5 levels, and we determine to merge L0 // data to L4 (which means L1..L3 will be empty), then the new files go to // L4 uses compression type compression_per_level[1]. // If now L0 is merged to L2. Data goes to L2 will be compressed // according to compression_per_level[1], L3 using compression_per_level[2] // and L4 using compression_per_level[3]. Compaction for each level can // change when data grows. std::vector compression_per_level; // Number of levels for this database int num_levels = 7; // Soft limit on number of level-0 files. We start slowing down writes at this // point. A value <0 means that no writing slow down will be triggered by // number of files in level-0. // // Default: 20 // // Dynamically changeable through SetOptions() API int level0_slowdown_writes_trigger = 20; // Maximum number of level-0 files. We stop writes at this point. // // Default: 36 // // Dynamically changeable through SetOptions() API int level0_stop_writes_trigger = 36; // Target file size for compaction. // target_file_size_base is per-file size for level-1. // Target file size for level L can be calculated by // target_file_size_base * (target_file_size_multiplier ^ (L-1)) // For example, if target_file_size_base is 2MB and // target_file_size_multiplier is 10, then each file on level-1 will // be 2MB, and each file on level 2 will be 20MB, // and each file on level-3 will be 200MB. // // Default: 64MB. // // Dynamically changeable through SetOptions() API uint64_t target_file_size_base = 64 * 1048576; // By default target_file_size_multiplier is 1, which means // by default files in different levels will have similar size. // // Dynamically changeable through SetOptions() API int target_file_size_multiplier = 1; // If true, RocksDB will pick target size of each level dynamically. // We will pick a base level b >= 1. L0 will be directly merged into level b, // instead of always into level 1. Level 1 to b-1 need to be empty. // We try to pick b and its target size so that // 1. target size is in the range of // (max_bytes_for_level_base / max_bytes_for_level_multiplier, // max_bytes_for_level_base] // 2. target size of the last level (level num_levels-1) equals to extra size // of the level. // At the same time max_bytes_for_level_multiplier and // max_bytes_for_level_multiplier_additional are still satisfied. // (When L0 is too large, we make some adjustment. See below.) // // With this option on, from an empty DB, we make last level the base level, // which means merging L0 data into the last level, until it exceeds // max_bytes_for_level_base. And then we make the second last level to be // base level, to start to merge L0 data to second last level, with its // target size to be 1/max_bytes_for_level_multiplier of the last level's // extra size. After the data accumulates more so that we need to move the // base level to the third last one, and so on. // // For example, assume max_bytes_for_level_multiplier=10, num_levels=6, // and max_bytes_for_level_base=10MB. // Target sizes of level 1 to 5 starts with: // [- - - - 10MB] // with base level is level. Target sizes of level 1 to 4 are not applicable // because they will not be used. // Until the size of Level 5 grows to more than 10MB, say 11MB, we make // base target to level 4 and now the targets looks like: // [- - - 1.1MB 11MB] // While data are accumulated, size targets are tuned based on actual data // of level 5. When level 5 has 50MB of data, the target is like: // [- - - 5MB 50MB] // Until level 5's actual size is more than 100MB, say 101MB. Now if we keep // level 4 to be the base level, its target size needs to be 10.1MB, which // doesn't satisfy the target size range. So now we make level 3 the target // size and the target sizes of the levels look like: // [- - 1.01MB 10.1MB 101MB] // In the same way, while level 5 further grows, all levels' targets grow, // like // [- - 5MB 50MB 500MB] // Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the // base level and make levels' target sizes like this: // [- 1.001MB 10.01MB 100.1MB 1001MB] // and go on... // // By doing it, we give max_bytes_for_level_multiplier a priority against // max_bytes_for_level_base, for a more predictable LSM tree shape. It is // useful to limit worse case space amplification. // // // If the compaction from L0 is lagged behind, a special mode will be turned // on to prioritize write amplification against max_bytes_for_level_multiplier // or max_bytes_for_level_base. The L0 compaction is lagged behind by looking // at number of L0 files and total L0 size. If number of L0 files is at least // the double of level0_file_num_compaction_trigger, or the total size is // at least max_bytes_for_level_base, this mode is on. The target of L1 grows // to the actual data size in L0, and then determine the target for each level // so that each level will have the same level multiplier. // // For example, when L0 size is 100MB, the size of last level is 1600MB, // max_bytes_for_level_base = 80MB, and max_bytes_for_level_multiplier = 10. // Since L0 size is larger than max_bytes_for_level_base, this is a L0 // compaction backlogged mode. So that the L1 size is determined to be 100MB. // Based on max_bytes_for_level_multiplier = 10, at least 3 non-0 levels will // be needed. The level multiplier will be calculated to be 4 and the three // levels' target to be [100MB, 400MB, 1600MB]. // // In this mode, The number of levels will be no more than the normal mode, // and the level multiplier will be lower. The write amplification will // likely to be reduced. // // // max_bytes_for_level_multiplier_additional is ignored with this flag on. // // Turning this feature on or off for an existing DB can cause unexpected // LSM tree structure so it's not recommended. // // Default: false bool level_compaction_dynamic_level_bytes = false; // Default: 10. // // Dynamically changeable through SetOptions() API double max_bytes_for_level_multiplier = 10; // Different max-size multipliers for different levels. // These are multiplied by max_bytes_for_level_multiplier to arrive // at the max-size of each level. // // Default: 1 // // Dynamically changeable through SetOptions() API std::vector max_bytes_for_level_multiplier_additional = std::vector(num_levels, 1); // We try to limit number of bytes in one compaction to be lower than this // threshold. But it's not guaranteed. // Value 0 will be sanitized. // // Default: target_file_size_base * 25 // // Dynamically changeable through SetOptions() API uint64_t max_compaction_bytes = 0; // All writes will be slowed down to at least delayed_write_rate if estimated // bytes needed to be compaction exceed this threshold. // // Default: 64GB // // Dynamically changeable through SetOptions() API uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull; // All writes are stopped if estimated bytes needed to be compaction exceed // this threshold. // // Default: 256GB // // Dynamically changeable through SetOptions() API uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull; // The compaction style. Default: kCompactionStyleLevel CompactionStyle compaction_style = kCompactionStyleLevel; // If level compaction_style = kCompactionStyleLevel, for each level, // which files are prioritized to be picked to compact. // Default: kMinOverlappingRatio CompactionPri compaction_pri = kMinOverlappingRatio; // The options needed to support Universal Style compactions // // Dynamically changeable through SetOptions() API // Dynamic change example: // SetOptions("compaction_options_universal", "{size_ratio=2;}") CompactionOptionsUniversal compaction_options_universal; // The options for FIFO compaction style // // Dynamically changeable through SetOptions() API // Dynamic change example: // SetOptions("compaction_options_fifo", "{max_table_files_size=100;}") CompactionOptionsFIFO compaction_options_fifo; // An iteration->Next() sequentially skips over keys with the same // user-key unless this option is set. This number specifies the number // of keys (with the same userkey) that will be sequentially // skipped before a reseek is issued. // // Default: 8 // // Dynamically changeable through SetOptions() API uint64_t max_sequential_skip_in_iterations = 8; // This is a factory that provides MemTableRep objects. // Default: a factory that provides a skip-list-based implementation of // MemTableRep. std::shared_ptr memtable_factory = std::shared_ptr(new SkipListFactory); // Block-based table related options are moved to BlockBasedTableOptions. // Related options that were originally here but now moved include: // no_block_cache // block_cache // block_cache_compressed // block_size // block_size_deviation // block_restart_interval // filter_policy // whole_key_filtering // If you'd like to customize some of these options, you will need to // use NewBlockBasedTableFactory() to construct a new table factory. // This option allows user to collect their own interested statistics of // the tables. // Default: empty vector -- no user-defined statistics collection will be // performed. typedef std::vector> TablePropertiesCollectorFactories; TablePropertiesCollectorFactories table_properties_collector_factories; // Maximum number of successive merge operations on a key in the memtable. // // When a merge operation is added to the memtable and the maximum number of // successive merges is reached, the value of the key will be calculated and // inserted into the memtable instead of the merge operation. This will // ensure that there are never more than max_successive_merges merge // operations in the memtable. // // Default: 0 (disabled) // // Dynamically changeable through SetOptions() API size_t max_successive_merges = 0; // This flag specifies that the implementation should optimize the filters // mainly for cases where keys are found rather than also optimize for keys // missed. This would be used in cases where the application knows that // there are very few misses or the performance in the case of misses is not // important. // // For now, this flag allows us to not store filters for the last level i.e // the largest level which contains data of the LSM store. For keys which // are hits, the filters in this level are not useful because we will search // for the data anyway. NOTE: the filters in other levels are still useful // even for key hit because they tell us whether to look in that level or go // to the higher level. // // Default: false bool optimize_filters_for_hits = false; // After writing every SST file, reopen it and read all the keys. // // Default: false // // Dynamically changeable through SetOptions() API bool paranoid_file_checks = false; // In debug mode, RocksDB run consistency checks on the LSM every time the LSM // change (Flush, Compaction, AddFile). These checks are disabled in release // mode, use this option to enable them in release mode as well. // Default: false bool force_consistency_checks = false; // Measure IO stats in compactions and flushes, if true. // // Default: false // // Dynamically changeable through SetOptions() API bool report_bg_io_stats = false; // Files older than TTL will go through the compaction process. // Pre-req: This needs max_open_files to be set to -1. // In Level: Non-bottom-level files older than TTL will go through the // compation process. // In FIFO: Files older than TTL will be deleted. // unit: seconds. Ex: 1 day = 1 * 24 * 60 * 60 // In FIFO, this option will have the same meaning as // periodic_compaction_seconds. Whichever stricter will be used. // 0 means disabling. // UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to // pick default. // // Default: 30 days for leveled compaction + block based table. disable // otherwise. // // Dynamically changeable through SetOptions() API uint64_t ttl = 0xfffffffffffffffe; // Files older than this value will be picked up for compaction, and // re-written to the same level as they were before. // // A file's age is computed by looking at file_creation_time or creation_time // table properties in order, if they have valid non-zero values; if not, the // age is based on the file's last modified time (given by the underlying // Env). // // Supported in Level and FIFO compaction. // In FIFO compaction, this option has the same meaning as TTL and whichever // stricter will be used. // Pre-req: max_open_file == -1. // unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60 // // Values: // 0: Turn off Periodic compactions. // UINT64_MAX - 1 (i.e 0xfffffffffffffffe): Let RocksDB control this feature // as needed. For now, RocksDB will change this value to 30 days // (i.e 30 * 24 * 60 * 60) so that every file goes through the compaction // process at least once every 30 days if not compacted sooner. // In FIFO compaction, since the option has the same meaning as ttl, // when this value is left default, and ttl is left to 0, 30 days will be // used. Otherwise, min(ttl, periodic_compaction_seconds) will be used. // // Default: UINT64_MAX - 1 (allow RocksDB to auto-tune) // // Dynamically changeable through SetOptions() API uint64_t periodic_compaction_seconds = 0xfffffffffffffffe; // If this option is set then 1 in N blocks are compressed // using a fast (lz4) and slow (zstd) compression algorithm. // The compressibility is reported as stats and the stored // data is left uncompressed (unless compression is also requested). uint64_t sample_for_compression = 0; // Create ColumnFamilyOptions with default values for all fields AdvancedColumnFamilyOptions(); // Create ColumnFamilyOptions from Options explicit AdvancedColumnFamilyOptions(const Options& options); // ---------------- OPTIONS NOT SUPPORTED ANYMORE ---------------- // NOT SUPPORTED ANYMORE // This does not do anything anymore. int max_mem_compaction_level; // NOT SUPPORTED ANYMORE -- this options is no longer used // Puts are delayed to options.delayed_write_rate when any level has a // compaction score that exceeds soft_rate_limit. This is ignored when == 0.0. // // Default: 0 (disabled) // // Dynamically changeable through SetOptions() API double soft_rate_limit = 0.0; // NOT SUPPORTED ANYMORE -- this options is no longer used double hard_rate_limit = 0.0; // NOT SUPPORTED ANYMORE -- this options is no longer used unsigned int rate_limit_delay_max_milliseconds = 100; // NOT SUPPORTED ANYMORE // Does not have any effect. bool purge_redundant_kvs_while_flush = true; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/c.h000066400000000000000000002627351370372246700167310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). /* Copyright (c) 2011 The LevelDB Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. See the AUTHORS file for names of contributors. C bindings for rocksdb. May be useful as a stable ABI that can be used by programs that keep rocksdb in a shared library, or for a JNI api. Does not support: . getters for the option types . custom comparators that implement key shortening . capturing post-write-snapshot . custom iter, db, env, cache implementations using just the C bindings Some conventions: (1) We expose just opaque struct pointers and functions to clients. This allows us to change internal representations without having to recompile clients. (2) For simplicity, there is no equivalent to the Slice type. Instead, the caller has to pass the pointer and length as separate arguments. (3) Errors are represented by a null-terminated c string. NULL means no error. All operations that can raise an error are passed a "char** errptr" as the last argument. One of the following must be true on entry: *errptr == NULL *errptr points to a malloc()ed null-terminated error message On success, a leveldb routine leaves *errptr unchanged. On failure, leveldb frees the old value of *errptr and set *errptr to a malloc()ed error message. (4) Bools have the type unsigned char (0 == false; rest == true) (5) All of the pointer arguments must be non-NULL. */ #pragma once #ifdef _WIN32 #ifdef ROCKSDB_DLL #ifdef ROCKSDB_LIBRARY_EXPORTS #define ROCKSDB_LIBRARY_API __declspec(dllexport) #else #define ROCKSDB_LIBRARY_API __declspec(dllimport) #endif #else #define ROCKSDB_LIBRARY_API #endif #else #define ROCKSDB_LIBRARY_API #endif #ifdef __cplusplus extern "C" { #endif #include #include #include /* Exported types */ typedef struct rocksdb_t rocksdb_t; typedef struct rocksdb_backup_engine_t rocksdb_backup_engine_t; typedef struct rocksdb_backup_engine_info_t rocksdb_backup_engine_info_t; typedef struct rocksdb_restore_options_t rocksdb_restore_options_t; typedef struct rocksdb_cache_t rocksdb_cache_t; typedef struct rocksdb_compactionfilter_t rocksdb_compactionfilter_t; typedef struct rocksdb_compactionfiltercontext_t rocksdb_compactionfiltercontext_t; typedef struct rocksdb_compactionfilterfactory_t rocksdb_compactionfilterfactory_t; typedef struct rocksdb_comparator_t rocksdb_comparator_t; typedef struct rocksdb_dbpath_t rocksdb_dbpath_t; typedef struct rocksdb_env_t rocksdb_env_t; typedef struct rocksdb_fifo_compaction_options_t rocksdb_fifo_compaction_options_t; typedef struct rocksdb_filelock_t rocksdb_filelock_t; typedef struct rocksdb_filterpolicy_t rocksdb_filterpolicy_t; typedef struct rocksdb_flushoptions_t rocksdb_flushoptions_t; typedef struct rocksdb_iterator_t rocksdb_iterator_t; typedef struct rocksdb_logger_t rocksdb_logger_t; typedef struct rocksdb_mergeoperator_t rocksdb_mergeoperator_t; typedef struct rocksdb_options_t rocksdb_options_t; typedef struct rocksdb_compactoptions_t rocksdb_compactoptions_t; typedef struct rocksdb_block_based_table_options_t rocksdb_block_based_table_options_t; typedef struct rocksdb_cuckoo_table_options_t rocksdb_cuckoo_table_options_t; typedef struct rocksdb_randomfile_t rocksdb_randomfile_t; typedef struct rocksdb_readoptions_t rocksdb_readoptions_t; typedef struct rocksdb_seqfile_t rocksdb_seqfile_t; typedef struct rocksdb_slicetransform_t rocksdb_slicetransform_t; typedef struct rocksdb_snapshot_t rocksdb_snapshot_t; typedef struct rocksdb_writablefile_t rocksdb_writablefile_t; typedef struct rocksdb_writebatch_t rocksdb_writebatch_t; typedef struct rocksdb_writebatch_wi_t rocksdb_writebatch_wi_t; typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t; typedef struct rocksdb_universal_compaction_options_t rocksdb_universal_compaction_options_t; typedef struct rocksdb_livefiles_t rocksdb_livefiles_t; typedef struct rocksdb_column_family_handle_t rocksdb_column_family_handle_t; typedef struct rocksdb_envoptions_t rocksdb_envoptions_t; typedef struct rocksdb_ingestexternalfileoptions_t rocksdb_ingestexternalfileoptions_t; typedef struct rocksdb_sstfilewriter_t rocksdb_sstfilewriter_t; typedef struct rocksdb_ratelimiter_t rocksdb_ratelimiter_t; typedef struct rocksdb_perfcontext_t rocksdb_perfcontext_t; typedef struct rocksdb_pinnableslice_t rocksdb_pinnableslice_t; typedef struct rocksdb_transactiondb_options_t rocksdb_transactiondb_options_t; typedef struct rocksdb_transactiondb_t rocksdb_transactiondb_t; typedef struct rocksdb_transaction_options_t rocksdb_transaction_options_t; typedef struct rocksdb_optimistictransactiondb_t rocksdb_optimistictransactiondb_t; typedef struct rocksdb_optimistictransaction_options_t rocksdb_optimistictransaction_options_t; typedef struct rocksdb_transaction_t rocksdb_transaction_t; typedef struct rocksdb_checkpoint_t rocksdb_checkpoint_t; typedef struct rocksdb_wal_iterator_t rocksdb_wal_iterator_t; typedef struct rocksdb_wal_readoptions_t rocksdb_wal_readoptions_t; typedef struct rocksdb_memory_consumers_t rocksdb_memory_consumers_t; typedef struct rocksdb_memory_usage_t rocksdb_memory_usage_t; /* DB operations */ extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open( const rocksdb_options_t* options, const char* name, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_with_ttl( const rocksdb_options_t* options, const char* name, int ttl, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_for_read_only( const rocksdb_options_t* options, const char* name, unsigned char error_if_log_file_exist, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_as_secondary( const rocksdb_options_t* options, const char* name, const char* secondary_path, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_backup_engine_t* rocksdb_backup_engine_open( const rocksdb_options_t* options, const char* path, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_create_new_backup( rocksdb_backup_engine_t* be, rocksdb_t* db, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_create_new_backup_flush( rocksdb_backup_engine_t* be, rocksdb_t* db, unsigned char flush_before_backup, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_purge_old_backups( rocksdb_backup_engine_t* be, uint32_t num_backups_to_keep, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_restore_options_t* rocksdb_restore_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_restore_options_destroy( rocksdb_restore_options_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_restore_options_set_keep_log_files( rocksdb_restore_options_t* opt, int v); extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_verify_backup(rocksdb_backup_engine_t* be, uint32_t backup_id, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_restore_db_from_latest_backup( rocksdb_backup_engine_t* be, const char* db_dir, const char* wal_dir, const rocksdb_restore_options_t* restore_options, char** errptr); extern ROCKSDB_LIBRARY_API const rocksdb_backup_engine_info_t* rocksdb_backup_engine_get_backup_info(rocksdb_backup_engine_t* be); extern ROCKSDB_LIBRARY_API int rocksdb_backup_engine_info_count( const rocksdb_backup_engine_info_t* info); extern ROCKSDB_LIBRARY_API int64_t rocksdb_backup_engine_info_timestamp(const rocksdb_backup_engine_info_t* info, int index); extern ROCKSDB_LIBRARY_API uint32_t rocksdb_backup_engine_info_backup_id(const rocksdb_backup_engine_info_t* info, int index); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_backup_engine_info_size(const rocksdb_backup_engine_info_t* info, int index); extern ROCKSDB_LIBRARY_API uint32_t rocksdb_backup_engine_info_number_files( const rocksdb_backup_engine_info_t* info, int index); extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_info_destroy( const rocksdb_backup_engine_info_t* info); extern ROCKSDB_LIBRARY_API void rocksdb_backup_engine_close( rocksdb_backup_engine_t* be); extern ROCKSDB_LIBRARY_API rocksdb_checkpoint_t* rocksdb_checkpoint_object_create(rocksdb_t* db, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_checkpoint_create( rocksdb_checkpoint_t* checkpoint, const char* checkpoint_dir, uint64_t log_size_for_flush, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_checkpoint_object_destroy( rocksdb_checkpoint_t* checkpoint); extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_column_families( const rocksdb_options_t* options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_for_read_only_column_families( const rocksdb_options_t* options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, unsigned char error_if_log_file_exist, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_open_as_secondary_column_families( const rocksdb_options_t* options, const char* name, const char* secondary_path, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** colummn_family_handles, char** errptr); extern ROCKSDB_LIBRARY_API char** rocksdb_list_column_families( const rocksdb_options_t* options, const char* name, size_t* lencf, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_list_column_families_destroy( char** list, size_t len); extern ROCKSDB_LIBRARY_API rocksdb_column_family_handle_t* rocksdb_create_column_family(rocksdb_t* db, const rocksdb_options_t* column_family_options, const char* column_family_name, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_drop_column_family( rocksdb_t* db, rocksdb_column_family_handle_t* handle, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_column_family_handle_destroy( rocksdb_column_family_handle_t*); extern ROCKSDB_LIBRARY_API void rocksdb_close(rocksdb_t* db); extern ROCKSDB_LIBRARY_API void rocksdb_put( rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_put_cf( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_delete( rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key, size_t keylen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_delete_cf( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_delete_range_cf( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_merge( rocksdb_t* db, const rocksdb_writeoptions_t* options, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_merge_cf( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_write( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_writebatch_t* batch, char** errptr); /* Returns NULL if not found. A malloc()ed array otherwise. Stores the length of the array in *vallen. */ extern ROCKSDB_LIBRARY_API char* rocksdb_get( rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, size_t keylen, size_t* vallen, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_get_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr); // if values_list[i] == NULL and errs[i] == NULL, // then we got status.IsNotFound(), which we will not return. // all errors except status status.ok() and status.IsNotFound() are returned. // // errs, values_list and values_list_sizes must be num_keys in length, // allocated by the caller. // errs is a list of strings as opposed to the conventional one error, // where errs[i] is the status for retrieval of keys_list[i]. // each non-NULL errs entry is a malloc()ed, null terminated string. // each non-NULL values_list entry is a malloc()ed array, with // the length for each stored in values_list_sizes[i]. extern ROCKSDB_LIBRARY_API void rocksdb_multi_get( rocksdb_t* db, const rocksdb_readoptions_t* options, size_t num_keys, const char* const* keys_list, const size_t* keys_list_sizes, char** values_list, size_t* values_list_sizes, char** errs); extern ROCKSDB_LIBRARY_API void rocksdb_multi_get_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, const rocksdb_column_family_handle_t* const* column_families, size_t num_keys, const char* const* keys_list, const size_t* keys_list_sizes, char** values_list, size_t* values_list_sizes, char** errs); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_create_iterator( rocksdb_t* db, const rocksdb_readoptions_t* options); extern ROCKSDB_LIBRARY_API rocksdb_wal_iterator_t* rocksdb_get_updates_since( rocksdb_t* db, uint64_t seq_number, const rocksdb_wal_readoptions_t* options, char** errptr ); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_create_iterator_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family); extern ROCKSDB_LIBRARY_API void rocksdb_create_iterators( rocksdb_t *db, rocksdb_readoptions_t* opts, rocksdb_column_family_handle_t** column_families, rocksdb_iterator_t** iterators, size_t size, char** errptr); extern ROCKSDB_LIBRARY_API const rocksdb_snapshot_t* rocksdb_create_snapshot( rocksdb_t* db); extern ROCKSDB_LIBRARY_API void rocksdb_release_snapshot( rocksdb_t* db, const rocksdb_snapshot_t* snapshot); /* Returns NULL if property name is unknown. Else returns a pointer to a malloc()-ed null-terminated value. */ extern ROCKSDB_LIBRARY_API char* rocksdb_property_value(rocksdb_t* db, const char* propname); /* returns 0 on success, -1 otherwise */ int rocksdb_property_int( rocksdb_t* db, const char* propname, uint64_t *out_val); /* returns 0 on success, -1 otherwise */ int rocksdb_property_int_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* propname, uint64_t *out_val); extern ROCKSDB_LIBRARY_API char* rocksdb_property_value_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* propname); extern ROCKSDB_LIBRARY_API void rocksdb_approximate_sizes( rocksdb_t* db, int num_ranges, const char* const* range_start_key, const size_t* range_start_key_len, const char* const* range_limit_key, const size_t* range_limit_key_len, uint64_t* sizes); extern ROCKSDB_LIBRARY_API void rocksdb_approximate_sizes_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, int num_ranges, const char* const* range_start_key, const size_t* range_start_key_len, const char* const* range_limit_key, const size_t* range_limit_key_len, uint64_t* sizes); extern ROCKSDB_LIBRARY_API void rocksdb_compact_range(rocksdb_t* db, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len); extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len); extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_opt( rocksdb_t* db, rocksdb_compactoptions_t* opt, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len); extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_cf_opt( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, rocksdb_compactoptions_t* opt, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len); extern ROCKSDB_LIBRARY_API void rocksdb_delete_file(rocksdb_t* db, const char* name); extern ROCKSDB_LIBRARY_API const rocksdb_livefiles_t* rocksdb_livefiles( rocksdb_t* db); extern ROCKSDB_LIBRARY_API void rocksdb_flush( rocksdb_t* db, const rocksdb_flushoptions_t* options, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_flush_cf( rocksdb_t* db, const rocksdb_flushoptions_t* options, rocksdb_column_family_handle_t* column_family, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_disable_file_deletions(rocksdb_t* db, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_enable_file_deletions( rocksdb_t* db, unsigned char force, char** errptr); /* Management operations */ extern ROCKSDB_LIBRARY_API void rocksdb_destroy_db( const rocksdb_options_t* options, const char* name, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_repair_db( const rocksdb_options_t* options, const char* name, char** errptr); /* Iterator */ extern ROCKSDB_LIBRARY_API void rocksdb_iter_destroy(rocksdb_iterator_t*); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_iter_valid( const rocksdb_iterator_t*); extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek_to_first(rocksdb_iterator_t*); extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek_to_last(rocksdb_iterator_t*); extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek(rocksdb_iterator_t*, const char* k, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_iter_seek_for_prev(rocksdb_iterator_t*, const char* k, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_iter_next(rocksdb_iterator_t*); extern ROCKSDB_LIBRARY_API void rocksdb_iter_prev(rocksdb_iterator_t*); extern ROCKSDB_LIBRARY_API const char* rocksdb_iter_key( const rocksdb_iterator_t*, size_t* klen); extern ROCKSDB_LIBRARY_API const char* rocksdb_iter_value( const rocksdb_iterator_t*, size_t* vlen); extern ROCKSDB_LIBRARY_API void rocksdb_iter_get_error( const rocksdb_iterator_t*, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_wal_iter_next(rocksdb_wal_iterator_t* iter); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_wal_iter_valid( const rocksdb_wal_iterator_t*); extern ROCKSDB_LIBRARY_API void rocksdb_wal_iter_status (const rocksdb_wal_iterator_t* iter, char** errptr) ; extern ROCKSDB_LIBRARY_API rocksdb_writebatch_t* rocksdb_wal_iter_get_batch (const rocksdb_wal_iterator_t* iter, uint64_t* seq) ; extern ROCKSDB_LIBRARY_API uint64_t rocksdb_get_latest_sequence_number (rocksdb_t *db); extern ROCKSDB_LIBRARY_API void rocksdb_wal_iter_destroy (const rocksdb_wal_iterator_t* iter) ; /* Write batch */ extern ROCKSDB_LIBRARY_API rocksdb_writebatch_t* rocksdb_writebatch_create(); extern ROCKSDB_LIBRARY_API rocksdb_writebatch_t* rocksdb_writebatch_create_from( const char* rep, size_t size); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_destroy( rocksdb_writebatch_t*); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_clear(rocksdb_writebatch_t*); extern ROCKSDB_LIBRARY_API int rocksdb_writebatch_count(rocksdb_writebatch_t*); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_put(rocksdb_writebatch_t*, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_put_cf( rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_putv( rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_putv_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_merge(rocksdb_writebatch_t*, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_merge_cf( rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_mergev( rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_mergev_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete(rocksdb_writebatch_t*, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_singledelete( rocksdb_writebatch_t* b, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_cf( rocksdb_writebatch_t*, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_singledelete_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_deletev( rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_deletev_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_range( rocksdb_writebatch_t* b, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_range_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_rangev( rocksdb_writebatch_t* b, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_delete_rangev_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_put_log_data( rocksdb_writebatch_t*, const char* blob, size_t len); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_iterate( rocksdb_writebatch_t*, void* state, void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), void (*deleted)(void*, const char* k, size_t klen)); extern ROCKSDB_LIBRARY_API const char* rocksdb_writebatch_data( rocksdb_writebatch_t*, size_t* size); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_set_save_point( rocksdb_writebatch_t*); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_rollback_to_save_point( rocksdb_writebatch_t*, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_pop_save_point( rocksdb_writebatch_t*, char** errptr); /* Write batch with index */ extern ROCKSDB_LIBRARY_API rocksdb_writebatch_wi_t* rocksdb_writebatch_wi_create( size_t reserved_bytes, unsigned char overwrite_keys); extern ROCKSDB_LIBRARY_API rocksdb_writebatch_wi_t* rocksdb_writebatch_wi_create_from( const char* rep, size_t size); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_destroy( rocksdb_writebatch_wi_t*); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_clear(rocksdb_writebatch_wi_t*); extern ROCKSDB_LIBRARY_API int rocksdb_writebatch_wi_count(rocksdb_writebatch_wi_t* b); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_put(rocksdb_writebatch_wi_t*, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_put_cf( rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_putv( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_putv_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_merge(rocksdb_writebatch_wi_t*, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_merge_cf( rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_mergev( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_mergev_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes, int num_values, const char* const* values_list, const size_t* values_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete(rocksdb_writebatch_wi_t*, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_singledelete( rocksdb_writebatch_wi_t*, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_cf( rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_singledelete_cf( rocksdb_writebatch_wi_t*, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_deletev( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_deletev_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* keys_list, const size_t* keys_list_sizes); // DO NOT USE - rocksdb_writebatch_wi_delete_range is not yet supported extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_range( rocksdb_writebatch_wi_t* b, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len); // DO NOT USE - rocksdb_writebatch_wi_delete_range_cf is not yet supported extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_range_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* end_key, size_t end_key_len); // DO NOT USE - rocksdb_writebatch_wi_delete_rangev is not yet supported extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_rangev( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes); // DO NOT USE - rocksdb_writebatch_wi_delete_rangev_cf is not yet supported extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_delete_rangev_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, int num_keys, const char* const* start_keys_list, const size_t* start_keys_list_sizes, const char* const* end_keys_list, const size_t* end_keys_list_sizes); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_put_log_data( rocksdb_writebatch_wi_t*, const char* blob, size_t len); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_iterate( rocksdb_writebatch_wi_t* b, void* state, void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), void (*deleted)(void*, const char* k, size_t klen)); extern ROCKSDB_LIBRARY_API const char* rocksdb_writebatch_wi_data( rocksdb_writebatch_wi_t* b, size_t* size); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_set_save_point( rocksdb_writebatch_wi_t*); extern ROCKSDB_LIBRARY_API void rocksdb_writebatch_wi_rollback_to_save_point( rocksdb_writebatch_wi_t*, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch( rocksdb_writebatch_wi_t* wbwi, const rocksdb_options_t* options, const char* key, size_t keylen, size_t* vallen, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch_cf( rocksdb_writebatch_wi_t* wbwi, const rocksdb_options_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch_and_db( rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, size_t keylen, size_t* vallen, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_writebatch_wi_get_from_batch_and_db_cf( rocksdb_writebatch_wi_t* wbwi, rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_write_writebatch_wi( rocksdb_t* db, const rocksdb_writeoptions_t* options, rocksdb_writebatch_wi_t* wbwi, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_writebatch_wi_create_iterator_with_base( rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_writebatch_wi_create_iterator_with_base_cf( rocksdb_writebatch_wi_t* wbwi, rocksdb_iterator_t* base_iterator, rocksdb_column_family_handle_t* cf); /* Block based table options */ extern ROCKSDB_LIBRARY_API rocksdb_block_based_table_options_t* rocksdb_block_based_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_destroy( rocksdb_block_based_table_options_t* options); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_block_size( rocksdb_block_based_table_options_t* options, size_t block_size); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_block_size_deviation( rocksdb_block_based_table_options_t* options, int block_size_deviation); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_block_restart_interval( rocksdb_block_based_table_options_t* options, int block_restart_interval); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_index_block_restart_interval( rocksdb_block_based_table_options_t* options, int index_block_restart_interval); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_metadata_block_size( rocksdb_block_based_table_options_t* options, uint64_t metadata_block_size); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_partition_filters( rocksdb_block_based_table_options_t* options, unsigned char partition_filters); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_use_delta_encoding( rocksdb_block_based_table_options_t* options, unsigned char use_delta_encoding); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_filter_policy( rocksdb_block_based_table_options_t* options, rocksdb_filterpolicy_t* filter_policy); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_no_block_cache( rocksdb_block_based_table_options_t* options, unsigned char no_block_cache); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_block_cache( rocksdb_block_based_table_options_t* options, rocksdb_cache_t* block_cache); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_block_cache_compressed( rocksdb_block_based_table_options_t* options, rocksdb_cache_t* block_cache_compressed); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_whole_key_filtering( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_format_version( rocksdb_block_based_table_options_t*, int); enum { rocksdb_block_based_table_index_type_binary_search = 0, rocksdb_block_based_table_index_type_hash_search = 1, rocksdb_block_based_table_index_type_two_level_index_search = 2, }; extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_index_type( rocksdb_block_based_table_options_t*, int); // uses one of the above enums enum { rocksdb_block_based_table_data_block_index_type_binary_search = 0, rocksdb_block_based_table_data_block_index_type_binary_search_and_hash = 1, }; extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_data_block_index_type( rocksdb_block_based_table_options_t*, int); // uses one of the above enums extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_data_block_hash_ratio( rocksdb_block_based_table_options_t* options, double v); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_hash_index_allow_collision( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_cache_index_and_filter_blocks( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_cache_index_and_filter_blocks_with_high_priority( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_block_based_options_set_pin_top_level_index_and_filter( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_block_based_table_factory( rocksdb_options_t* opt, rocksdb_block_based_table_options_t* table_options); /* Cuckoo table options */ extern ROCKSDB_LIBRARY_API rocksdb_cuckoo_table_options_t* rocksdb_cuckoo_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_destroy( rocksdb_cuckoo_table_options_t* options); extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_hash_ratio( rocksdb_cuckoo_table_options_t* options, double v); extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_max_search_depth( rocksdb_cuckoo_table_options_t* options, uint32_t v); extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_cuckoo_block_size( rocksdb_cuckoo_table_options_t* options, uint32_t v); extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_identity_as_first_hash( rocksdb_cuckoo_table_options_t* options, unsigned char v); extern ROCKSDB_LIBRARY_API void rocksdb_cuckoo_options_set_use_module_hash( rocksdb_cuckoo_table_options_t* options, unsigned char v); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_cuckoo_table_factory( rocksdb_options_t* opt, rocksdb_cuckoo_table_options_t* table_options); /* Options */ extern ROCKSDB_LIBRARY_API void rocksdb_set_options( rocksdb_t* db, int count, const char* const keys[], const char* const values[], char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_set_options_cf( rocksdb_t* db, rocksdb_column_family_handle_t* handle, int count, const char* const keys[], const char* const values[], char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_options_t* rocksdb_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_options_destroy(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API rocksdb_options_t* rocksdb_options_create_copy( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_increase_parallelism( rocksdb_options_t* opt, int total_threads); extern ROCKSDB_LIBRARY_API void rocksdb_options_optimize_for_point_lookup( rocksdb_options_t* opt, uint64_t block_cache_size_mb); extern ROCKSDB_LIBRARY_API void rocksdb_options_optimize_level_style_compaction( rocksdb_options_t* opt, uint64_t memtable_memory_budget); extern ROCKSDB_LIBRARY_API void rocksdb_options_optimize_universal_style_compaction( rocksdb_options_t* opt, uint64_t memtable_memory_budget); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_allow_ingest_behind(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_allow_ingest_behind(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compaction_filter( rocksdb_options_t*, rocksdb_compactionfilter_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compaction_filter_factory( rocksdb_options_t*, rocksdb_compactionfilterfactory_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_compaction_readahead_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API size_t rocksdb_options_get_compaction_readahead_size(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_comparator( rocksdb_options_t*, rocksdb_comparator_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_merge_operator( rocksdb_options_t*, rocksdb_mergeoperator_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_uint64add_merge_operator( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression_per_level( rocksdb_options_t* opt, int* level_values, size_t num_levels); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_create_if_missing( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_create_if_missing( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_create_missing_column_families(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_create_missing_column_families(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_error_if_exists( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_error_if_exists( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_paranoid_checks( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_paranoid_checks( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_db_paths(rocksdb_options_t*, const rocksdb_dbpath_t** path_values, size_t num_paths); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_env(rocksdb_options_t*, rocksdb_env_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_info_log(rocksdb_options_t*, rocksdb_logger_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_info_log_level( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_info_log_level( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_write_buffer_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API size_t rocksdb_options_get_write_buffer_size(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_db_write_buffer_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API size_t rocksdb_options_get_db_write_buffer_size(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_open_files( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_open_files( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_file_opening_threads( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_file_opening_threads( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_total_wal_size( rocksdb_options_t* opt, uint64_t n); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_options_get_max_total_wal_size(rocksdb_options_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression_options( rocksdb_options_t*, int, int, int, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression_options_zstd_max_train_bytes(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bottommost_compression_options(rocksdb_options_t*, int, int, int, int, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes( rocksdb_options_t*, int, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prefix_extractor( rocksdb_options_t*, rocksdb_slicetransform_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_num_levels( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_num_levels( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_level0_file_num_compaction_trigger(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_level0_file_num_compaction_trigger(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_level0_slowdown_writes_trigger(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_level0_slowdown_writes_trigger(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_level0_stop_writes_trigger( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_level0_stop_writes_trigger( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_mem_compaction_level( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_target_file_size_base( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_options_get_target_file_size_base(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_target_file_size_multiplier( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_target_file_size_multiplier( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_bytes_for_level_base( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_options_get_max_bytes_for_level_base(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_level_compaction_dynamic_level_bytes(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_level_compaction_dynamic_level_bytes(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_bytes_for_level_multiplier(rocksdb_options_t*, double); extern ROCKSDB_LIBRARY_API double rocksdb_options_get_max_bytes_for_level_multiplier(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_bytes_for_level_multiplier_additional( rocksdb_options_t*, int* level_values, size_t num_levels); extern ROCKSDB_LIBRARY_API void rocksdb_options_enable_statistics( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_skip_stats_update_on_db_open(rocksdb_options_t* opt, unsigned char val); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_skip_stats_update_on_db_open(rocksdb_options_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open( rocksdb_options_t* opt, unsigned char val); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open( rocksdb_options_t* opt); /* returns a pointer to a malloc()-ed, null terminated string */ extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string( rocksdb_options_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_write_buffer_number( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_write_buffer_number( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_min_write_buffer_number_to_merge(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_write_buffer_number_to_maintain(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API int rocksdb_options_get_max_write_buffer_number_to_maintain(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_write_buffer_size_to_maintain(rocksdb_options_t*, int64_t); extern ROCKSDB_LIBRARY_API int64_t rocksdb_options_get_max_write_buffer_size_to_maintain(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_enable_pipelined_write( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_enable_pipelined_write(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_unordered_write( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_unordered_write( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_subcompactions( rocksdb_options_t*, uint32_t); extern ROCKSDB_LIBRARY_API uint32_t rocksdb_options_get_max_subcompactions(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_background_jobs( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_background_compactions( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_base_background_compactions( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_background_flushes( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_log_file_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_log_file_time_to_roll( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_keep_log_file_num( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_recycle_log_file_num( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_soft_rate_limit( rocksdb_options_t*, double); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_hard_rate_limit( rocksdb_options_t*, double); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_soft_pending_compaction_bytes_limit( rocksdb_options_t* opt, size_t v); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_hard_pending_compaction_bytes_limit( rocksdb_options_t* opt, size_t v); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_rate_limit_delay_max_milliseconds(rocksdb_options_t*, unsigned int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_manifest_file_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_table_cache_numshardbits( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_table_cache_remove_scan_count_limit(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_arena_block_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_fsync( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_db_log_dir( rocksdb_options_t*, const char*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_dir(rocksdb_options_t*, const char*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_WAL_ttl_seconds( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_WAL_size_limit_MB( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_manifest_preallocation_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_purge_redundant_kvs_while_flush(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_allow_mmap_reads( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_allow_mmap_writes( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_direct_reads( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_direct_io_for_flush_and_compaction(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_is_fd_close_on_exec( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_skip_log_error_on_recovery( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_stats_dump_period_sec( rocksdb_options_t*, unsigned int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_advise_random_on_open( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_access_hint_on_compaction_start(rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_use_adaptive_mutex( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bytes_per_sync( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_bytes_per_sync( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_writable_file_max_buffer_size(rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_allow_concurrent_memtable_write(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_enable_write_thread_adaptive_yield(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_sequential_skip_in_iterations(rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_disable_auto_compactions( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_optimize_filters_for_hits( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_delete_obsolete_files_period_micros(rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_prepare_for_bulk_load( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_vector_rep( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_prefix_bloom_size_ratio( rocksdb_options_t*, double); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_compaction_bytes( rocksdb_options_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_hash_skip_list_rep( rocksdb_options_t*, size_t, int32_t, int32_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_hash_link_list_rep( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_plain_table_factory( rocksdb_options_t*, uint32_t, int, double, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_min_level_to_compress( rocksdb_options_t* opt, int level); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_huge_page_size( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_successive_merges( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bloom_locality( rocksdb_options_t*, uint32_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_inplace_update_support( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_inplace_update_num_locks( rocksdb_options_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_report_bg_io_stats( rocksdb_options_t*, int); enum { rocksdb_tolerate_corrupted_tail_records_recovery = 0, rocksdb_absolute_consistency_recovery = 1, rocksdb_point_in_time_recovery = 2, rocksdb_skip_any_corrupted_records_recovery = 3 }; extern ROCKSDB_LIBRARY_API void rocksdb_options_set_wal_recovery_mode( rocksdb_options_t*, int); enum { rocksdb_no_compression = 0, rocksdb_snappy_compression = 1, rocksdb_zlib_compression = 2, rocksdb_bz2_compression = 3, rocksdb_lz4_compression = 4, rocksdb_lz4hc_compression = 5, rocksdb_xpress_compression = 6, rocksdb_zstd_compression = 7 }; extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compression( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_bottommost_compression( rocksdb_options_t*, int); enum { rocksdb_level_compaction = 0, rocksdb_universal_compaction = 1, rocksdb_fifo_compaction = 2 }; extern ROCKSDB_LIBRARY_API void rocksdb_options_set_compaction_style( rocksdb_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_universal_compaction_options( rocksdb_options_t*, rocksdb_universal_compaction_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_fifo_compaction_options( rocksdb_options_t* opt, rocksdb_fifo_compaction_options_t* fifo); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_ratelimiter( rocksdb_options_t* opt, rocksdb_ratelimiter_t* limiter); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_atomic_flush( rocksdb_options_t* opt, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_row_cache( rocksdb_options_t* opt, rocksdb_cache_t* cache ); /* RateLimiter */ extern ROCKSDB_LIBRARY_API rocksdb_ratelimiter_t* rocksdb_ratelimiter_create( int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness); extern ROCKSDB_LIBRARY_API void rocksdb_ratelimiter_destroy(rocksdb_ratelimiter_t*); /* PerfContext */ enum { rocksdb_uninitialized = 0, rocksdb_disable = 1, rocksdb_enable_count = 2, rocksdb_enable_time_except_for_mutex = 3, rocksdb_enable_time = 4, rocksdb_out_of_bounds = 5 }; enum { rocksdb_user_key_comparison_count = 0, rocksdb_block_cache_hit_count, rocksdb_block_read_count, rocksdb_block_read_byte, rocksdb_block_read_time, rocksdb_block_checksum_time, rocksdb_block_decompress_time, rocksdb_get_read_bytes, rocksdb_multiget_read_bytes, rocksdb_iter_read_bytes, rocksdb_internal_key_skipped_count, rocksdb_internal_delete_skipped_count, rocksdb_internal_recent_skipped_count, rocksdb_internal_merge_count, rocksdb_get_snapshot_time, rocksdb_get_from_memtable_time, rocksdb_get_from_memtable_count, rocksdb_get_post_process_time, rocksdb_get_from_output_files_time, rocksdb_seek_on_memtable_time, rocksdb_seek_on_memtable_count, rocksdb_next_on_memtable_count, rocksdb_prev_on_memtable_count, rocksdb_seek_child_seek_time, rocksdb_seek_child_seek_count, rocksdb_seek_min_heap_time, rocksdb_seek_max_heap_time, rocksdb_seek_internal_seek_time, rocksdb_find_next_user_entry_time, rocksdb_write_wal_time, rocksdb_write_memtable_time, rocksdb_write_delay_time, rocksdb_write_pre_and_post_process_time, rocksdb_db_mutex_lock_nanos, rocksdb_db_condition_wait_nanos, rocksdb_merge_operator_time_nanos, rocksdb_read_index_block_nanos, rocksdb_read_filter_block_nanos, rocksdb_new_table_block_iter_nanos, rocksdb_new_table_iterator_nanos, rocksdb_block_seek_nanos, rocksdb_find_table_nanos, rocksdb_bloom_memtable_hit_count, rocksdb_bloom_memtable_miss_count, rocksdb_bloom_sst_hit_count, rocksdb_bloom_sst_miss_count, rocksdb_key_lock_wait_time, rocksdb_key_lock_wait_count, rocksdb_env_new_sequential_file_nanos, rocksdb_env_new_random_access_file_nanos, rocksdb_env_new_writable_file_nanos, rocksdb_env_reuse_writable_file_nanos, rocksdb_env_new_random_rw_file_nanos, rocksdb_env_new_directory_nanos, rocksdb_env_file_exists_nanos, rocksdb_env_get_children_nanos, rocksdb_env_get_children_file_attributes_nanos, rocksdb_env_delete_file_nanos, rocksdb_env_create_dir_nanos, rocksdb_env_create_dir_if_missing_nanos, rocksdb_env_delete_dir_nanos, rocksdb_env_get_file_size_nanos, rocksdb_env_get_file_modification_time_nanos, rocksdb_env_rename_file_nanos, rocksdb_env_link_file_nanos, rocksdb_env_lock_file_nanos, rocksdb_env_unlock_file_nanos, rocksdb_env_new_logger_nanos, rocksdb_total_metric_count = 68 }; extern ROCKSDB_LIBRARY_API void rocksdb_set_perf_level(int); extern ROCKSDB_LIBRARY_API rocksdb_perfcontext_t* rocksdb_perfcontext_create(); extern ROCKSDB_LIBRARY_API void rocksdb_perfcontext_reset( rocksdb_perfcontext_t* context); extern ROCKSDB_LIBRARY_API char* rocksdb_perfcontext_report( rocksdb_perfcontext_t* context, unsigned char exclude_zero_counters); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_perfcontext_metric( rocksdb_perfcontext_t* context, int metric); extern ROCKSDB_LIBRARY_API void rocksdb_perfcontext_destroy( rocksdb_perfcontext_t* context); /* Compaction Filter */ extern ROCKSDB_LIBRARY_API rocksdb_compactionfilter_t* rocksdb_compactionfilter_create( void* state, void (*destructor)(void*), unsigned char (*filter)(void*, int level, const char* key, size_t key_length, const char* existing_value, size_t value_length, char** new_value, size_t* new_value_length, unsigned char* value_changed), const char* (*name)(void*)); extern ROCKSDB_LIBRARY_API void rocksdb_compactionfilter_set_ignore_snapshots( rocksdb_compactionfilter_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_compactionfilter_destroy( rocksdb_compactionfilter_t*); /* Compaction Filter Context */ extern ROCKSDB_LIBRARY_API unsigned char rocksdb_compactionfiltercontext_is_full_compaction( rocksdb_compactionfiltercontext_t* context); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_compactionfiltercontext_is_manual_compaction( rocksdb_compactionfiltercontext_t* context); /* Compaction Filter Factory */ extern ROCKSDB_LIBRARY_API rocksdb_compactionfilterfactory_t* rocksdb_compactionfilterfactory_create( void* state, void (*destructor)(void*), rocksdb_compactionfilter_t* (*create_compaction_filter)( void*, rocksdb_compactionfiltercontext_t* context), const char* (*name)(void*)); extern ROCKSDB_LIBRARY_API void rocksdb_compactionfilterfactory_destroy( rocksdb_compactionfilterfactory_t*); /* Comparator */ extern ROCKSDB_LIBRARY_API rocksdb_comparator_t* rocksdb_comparator_create( void* state, void (*destructor)(void*), int (*compare)(void*, const char* a, size_t alen, const char* b, size_t blen), const char* (*name)(void*)); extern ROCKSDB_LIBRARY_API void rocksdb_comparator_destroy( rocksdb_comparator_t*); /* Filter policy */ extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( void* state, void (*destructor)(void*), char* (*create_filter)(void*, const char* const* key_array, const size_t* key_length_array, int num_keys, size_t* filter_length), unsigned char (*key_may_match)(void*, const char* key, size_t length, const char* filter, size_t filter_length), void (*delete_filter)(void*, const char* filter, size_t filter_length), const char* (*name)(void*)); extern ROCKSDB_LIBRARY_API void rocksdb_filterpolicy_destroy( rocksdb_filterpolicy_t*); extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key); extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_full(int bits_per_key); /* Merge Operator */ extern ROCKSDB_LIBRARY_API rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( void* state, void (*destructor)(void*), char* (*full_merge)(void*, const char* key, size_t key_length, const char* existing_value, size_t existing_value_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length), char* (*partial_merge)(void*, const char* key, size_t key_length, const char* const* operands_list, const size_t* operands_list_length, int num_operands, unsigned char* success, size_t* new_value_length), void (*delete_value)(void*, const char* value, size_t value_length), const char* (*name)(void*)); extern ROCKSDB_LIBRARY_API void rocksdb_mergeoperator_destroy( rocksdb_mergeoperator_t*); /* Read options */ extern ROCKSDB_LIBRARY_API rocksdb_readoptions_t* rocksdb_readoptions_create(); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_destroy( rocksdb_readoptions_t*); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_verify_checksums( rocksdb_readoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_fill_cache( rocksdb_readoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_snapshot( rocksdb_readoptions_t*, const rocksdb_snapshot_t*); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_iterate_upper_bound( rocksdb_readoptions_t*, const char* key, size_t keylen); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_iterate_lower_bound( rocksdb_readoptions_t*, const char* key, size_t keylen); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_read_tier( rocksdb_readoptions_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_tailing( rocksdb_readoptions_t*, unsigned char); // The functionality that this option controlled has been removed. extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_managed( rocksdb_readoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_readahead_size( rocksdb_readoptions_t*, size_t); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_prefix_same_as_start( rocksdb_readoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_pin_data( rocksdb_readoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_total_order_seek( rocksdb_readoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_max_skippable_internal_keys( rocksdb_readoptions_t*, uint64_t); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_background_purge_on_iterator_cleanup( rocksdb_readoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_ignore_range_deletions( rocksdb_readoptions_t*, unsigned char); /* Write options */ extern ROCKSDB_LIBRARY_API rocksdb_writeoptions_t* rocksdb_writeoptions_create(); extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_destroy( rocksdb_writeoptions_t*); extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_sync( rocksdb_writeoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_disable_WAL( rocksdb_writeoptions_t* opt, int disable); extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_ignore_missing_column_families( rocksdb_writeoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_no_slowdown( rocksdb_writeoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_low_pri( rocksdb_writeoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_writeoptions_set_memtable_insert_hint_per_batch(rocksdb_writeoptions_t*, unsigned char); /* Compact range options */ extern ROCKSDB_LIBRARY_API rocksdb_compactoptions_t* rocksdb_compactoptions_create(); extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_destroy( rocksdb_compactoptions_t*); extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_set_exclusive_manual_compaction( rocksdb_compactoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_set_bottommost_level_compaction( rocksdb_compactoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_set_change_level( rocksdb_compactoptions_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_compactoptions_set_target_level( rocksdb_compactoptions_t*, int); /* Flush options */ extern ROCKSDB_LIBRARY_API rocksdb_flushoptions_t* rocksdb_flushoptions_create(); extern ROCKSDB_LIBRARY_API void rocksdb_flushoptions_destroy( rocksdb_flushoptions_t*); extern ROCKSDB_LIBRARY_API void rocksdb_flushoptions_set_wait( rocksdb_flushoptions_t*, unsigned char); /* Cache */ extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru( size_t capacity); extern ROCKSDB_LIBRARY_API void rocksdb_cache_destroy(rocksdb_cache_t* cache); extern ROCKSDB_LIBRARY_API void rocksdb_cache_set_capacity( rocksdb_cache_t* cache, size_t capacity); extern ROCKSDB_LIBRARY_API size_t rocksdb_cache_get_usage(rocksdb_cache_t* cache); extern ROCKSDB_LIBRARY_API size_t rocksdb_cache_get_pinned_usage(rocksdb_cache_t* cache); /* DBPath */ extern ROCKSDB_LIBRARY_API rocksdb_dbpath_t* rocksdb_dbpath_create(const char* path, uint64_t target_size); extern ROCKSDB_LIBRARY_API void rocksdb_dbpath_destroy(rocksdb_dbpath_t*); /* Env */ extern ROCKSDB_LIBRARY_API rocksdb_env_t* rocksdb_create_default_env(); extern ROCKSDB_LIBRARY_API rocksdb_env_t* rocksdb_create_mem_env(); extern ROCKSDB_LIBRARY_API void rocksdb_env_set_background_threads( rocksdb_env_t* env, int n); extern ROCKSDB_LIBRARY_API void rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env, int n); extern ROCKSDB_LIBRARY_API void rocksdb_env_join_all_threads( rocksdb_env_t* env); extern ROCKSDB_LIBRARY_API void rocksdb_env_lower_thread_pool_io_priority(rocksdb_env_t* env); extern ROCKSDB_LIBRARY_API void rocksdb_env_lower_high_priority_thread_pool_io_priority(rocksdb_env_t* env); extern ROCKSDB_LIBRARY_API void rocksdb_env_lower_thread_pool_cpu_priority(rocksdb_env_t* env); extern ROCKSDB_LIBRARY_API void rocksdb_env_lower_high_priority_thread_pool_cpu_priority(rocksdb_env_t* env); extern ROCKSDB_LIBRARY_API void rocksdb_env_destroy(rocksdb_env_t*); extern ROCKSDB_LIBRARY_API rocksdb_envoptions_t* rocksdb_envoptions_create(); extern ROCKSDB_LIBRARY_API void rocksdb_envoptions_destroy( rocksdb_envoptions_t* opt); /* SstFile */ extern ROCKSDB_LIBRARY_API rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create(const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options); extern ROCKSDB_LIBRARY_API rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create_with_comparator( const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options, const rocksdb_comparator_t* comparator); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_open( rocksdb_sstfilewriter_t* writer, const char* name, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_add( rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_put( rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_merge( rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_delete( rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_finish( rocksdb_sstfilewriter_t* writer, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_file_size( rocksdb_sstfilewriter_t* writer, uint64_t* file_size); extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_destroy( rocksdb_sstfilewriter_t* writer); extern ROCKSDB_LIBRARY_API rocksdb_ingestexternalfileoptions_t* rocksdb_ingestexternalfileoptions_create(); extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_set_move_files( rocksdb_ingestexternalfileoptions_t* opt, unsigned char move_files); extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_set_snapshot_consistency( rocksdb_ingestexternalfileoptions_t* opt, unsigned char snapshot_consistency); extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_set_allow_global_seqno( rocksdb_ingestexternalfileoptions_t* opt, unsigned char allow_global_seqno); extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_set_allow_blocking_flush( rocksdb_ingestexternalfileoptions_t* opt, unsigned char allow_blocking_flush); extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_set_ingest_behind( rocksdb_ingestexternalfileoptions_t* opt, unsigned char ingest_behind); extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_destroy( rocksdb_ingestexternalfileoptions_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_ingest_external_file( rocksdb_t* db, const char* const* file_list, const size_t list_len, const rocksdb_ingestexternalfileoptions_t* opt, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_ingest_external_file_cf( rocksdb_t* db, rocksdb_column_family_handle_t* handle, const char* const* file_list, const size_t list_len, const rocksdb_ingestexternalfileoptions_t* opt, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_try_catch_up_with_primary( rocksdb_t* db, char** errptr); /* SliceTransform */ extern ROCKSDB_LIBRARY_API rocksdb_slicetransform_t* rocksdb_slicetransform_create( void* state, void (*destructor)(void*), char* (*transform)(void*, const char* key, size_t length, size_t* dst_length), unsigned char (*in_domain)(void*, const char* key, size_t length), unsigned char (*in_range)(void*, const char* key, size_t length), const char* (*name)(void*)); extern ROCKSDB_LIBRARY_API rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t); extern ROCKSDB_LIBRARY_API rocksdb_slicetransform_t* rocksdb_slicetransform_create_noop(); extern ROCKSDB_LIBRARY_API void rocksdb_slicetransform_destroy( rocksdb_slicetransform_t*); /* Universal Compaction options */ enum { rocksdb_similar_size_compaction_stop_style = 0, rocksdb_total_size_compaction_stop_style = 1 }; extern ROCKSDB_LIBRARY_API rocksdb_universal_compaction_options_t* rocksdb_universal_compaction_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_set_size_ratio( rocksdb_universal_compaction_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_set_min_merge_width( rocksdb_universal_compaction_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_set_max_merge_width( rocksdb_universal_compaction_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_set_max_size_amplification_percent( rocksdb_universal_compaction_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_set_compression_size_percent( rocksdb_universal_compaction_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_set_stop_style( rocksdb_universal_compaction_options_t*, int); extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_destroy( rocksdb_universal_compaction_options_t*); extern ROCKSDB_LIBRARY_API rocksdb_fifo_compaction_options_t* rocksdb_fifo_compaction_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_fifo_compaction_options_set_max_table_files_size( rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size); extern ROCKSDB_LIBRARY_API void rocksdb_fifo_compaction_options_destroy( rocksdb_fifo_compaction_options_t* fifo_opts); extern ROCKSDB_LIBRARY_API int rocksdb_livefiles_count( const rocksdb_livefiles_t*); extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_name( const rocksdb_livefiles_t*, int index); extern ROCKSDB_LIBRARY_API int rocksdb_livefiles_level( const rocksdb_livefiles_t*, int index); extern ROCKSDB_LIBRARY_API size_t rocksdb_livefiles_size(const rocksdb_livefiles_t*, int index); extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_smallestkey( const rocksdb_livefiles_t*, int index, size_t* size); extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_largestkey( const rocksdb_livefiles_t*, int index, size_t* size); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_livefiles_entries( const rocksdb_livefiles_t*, int index); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_livefiles_deletions( const rocksdb_livefiles_t*, int index); extern ROCKSDB_LIBRARY_API void rocksdb_livefiles_destroy( const rocksdb_livefiles_t*); /* Utility Helpers */ extern ROCKSDB_LIBRARY_API void rocksdb_get_options_from_string( const rocksdb_options_t* base_options, const char* opts_str, rocksdb_options_t* new_options, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_delete_file_in_range( rocksdb_t* db, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_delete_file_in_range_cf( rocksdb_t* db, rocksdb_column_family_handle_t* column_family, const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len, char** errptr); /* Transactions */ extern ROCKSDB_LIBRARY_API rocksdb_column_family_handle_t* rocksdb_transactiondb_create_column_family( rocksdb_transactiondb_t* txn_db, const rocksdb_options_t* column_family_options, const char* column_family_name, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_transactiondb_t* rocksdb_transactiondb_open( const rocksdb_options_t* options, const rocksdb_transactiondb_options_t* txn_db_options, const char* name, char** errptr); rocksdb_transactiondb_t* rocksdb_transactiondb_open_column_families( const rocksdb_options_t* options, const rocksdb_transactiondb_options_t* txn_db_options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, char** errptr); extern ROCKSDB_LIBRARY_API const rocksdb_snapshot_t* rocksdb_transactiondb_create_snapshot(rocksdb_transactiondb_t* txn_db); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_release_snapshot( rocksdb_transactiondb_t* txn_db, const rocksdb_snapshot_t* snapshot); extern ROCKSDB_LIBRARY_API rocksdb_transaction_t* rocksdb_transaction_begin( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* write_options, const rocksdb_transaction_options_t* txn_options, rocksdb_transaction_t* old_txn); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_commit( rocksdb_transaction_t* txn, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_rollback( rocksdb_transaction_t* txn, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_set_savepoint( rocksdb_transaction_t* txn); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_rollback_to_savepoint( rocksdb_transaction_t* txn, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_destroy( rocksdb_transaction_t* txn); // This snapshot should be freed using rocksdb_free extern ROCKSDB_LIBRARY_API const rocksdb_snapshot_t* rocksdb_transaction_get_snapshot(rocksdb_transaction_t* txn); extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, const char* key, size_t klen, size_t* vlen, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get_cf( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, size_t* vlen, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_transaction_get_for_update( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, const char* key, size_t klen, size_t* vlen, unsigned char exclusive, char** errptr); char* rocksdb_transaction_get_for_update_cf( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, size_t* vlen, unsigned char exclusive, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_transactiondb_get( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, const char* key, size_t klen, size_t* vlen, char** errptr); extern ROCKSDB_LIBRARY_API char* rocksdb_transactiondb_get_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, size_t* vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_put( rocksdb_transaction_t* txn, const char* key, size_t klen, const char* val, size_t vlen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_put_cf( rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_put( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, const char* key, size_t klen, const char* val, size_t vlen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_put_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, const char* val, size_t vallen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_write( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, rocksdb_writebatch_t *batch, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_merge( rocksdb_transaction_t* txn, const char* key, size_t klen, const char* val, size_t vlen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_merge_cf( rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_merge( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, const char* key, size_t klen, const char* val, size_t vlen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_merge_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, const char* val, size_t vlen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_delete( rocksdb_transaction_t* txn, const char* key, size_t klen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_delete_cf( rocksdb_transaction_t* txn, rocksdb_column_family_handle_t* column_family, const char* key, size_t klen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_delete( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, const char* key, size_t klen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_delete_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_transaction_create_iterator(rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_transaction_create_iterator_cf( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_transactiondb_create_iterator(rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options); extern ROCKSDB_LIBRARY_API rocksdb_iterator_t* rocksdb_transactiondb_create_iterator_cf( rocksdb_transactiondb_t* txn_db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_close( rocksdb_transactiondb_t* txn_db); extern ROCKSDB_LIBRARY_API rocksdb_checkpoint_t* rocksdb_transactiondb_checkpoint_object_create(rocksdb_transactiondb_t* txn_db, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_optimistictransactiondb_t* rocksdb_optimistictransactiondb_open(const rocksdb_options_t* options, const char* name, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_optimistictransactiondb_t* rocksdb_optimistictransactiondb_open_column_families( const rocksdb_options_t* options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_t* rocksdb_optimistictransactiondb_get_base_db( rocksdb_optimistictransactiondb_t* otxn_db); extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransactiondb_close_base_db( rocksdb_t* base_db); extern ROCKSDB_LIBRARY_API rocksdb_transaction_t* rocksdb_optimistictransaction_begin( rocksdb_optimistictransactiondb_t* otxn_db, const rocksdb_writeoptions_t* write_options, const rocksdb_optimistictransaction_options_t* otxn_options, rocksdb_transaction_t* old_txn); extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransactiondb_close( rocksdb_optimistictransactiondb_t* otxn_db); /* Transaction Options */ extern ROCKSDB_LIBRARY_API rocksdb_transactiondb_options_t* rocksdb_transactiondb_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_destroy( rocksdb_transactiondb_options_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_set_max_num_locks( rocksdb_transactiondb_options_t* opt, int64_t max_num_locks); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_set_num_stripes( rocksdb_transactiondb_options_t* opt, size_t num_stripes); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_set_transaction_lock_timeout( rocksdb_transactiondb_options_t* opt, int64_t txn_lock_timeout); extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_options_set_default_lock_timeout( rocksdb_transactiondb_options_t* opt, int64_t default_lock_timeout); extern ROCKSDB_LIBRARY_API rocksdb_transaction_options_t* rocksdb_transaction_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_destroy( rocksdb_transaction_options_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_set_snapshot( rocksdb_transaction_options_t* opt, unsigned char v); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_deadlock_detect( rocksdb_transaction_options_t* opt, unsigned char v); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_lock_timeout( rocksdb_transaction_options_t* opt, int64_t lock_timeout); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_expiration( rocksdb_transaction_options_t* opt, int64_t expiration); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_deadlock_detect_depth( rocksdb_transaction_options_t* opt, int64_t depth); extern ROCKSDB_LIBRARY_API void rocksdb_transaction_options_set_max_write_batch_size( rocksdb_transaction_options_t* opt, size_t size); extern ROCKSDB_LIBRARY_API rocksdb_optimistictransaction_options_t* rocksdb_optimistictransaction_options_create(); extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransaction_options_destroy( rocksdb_optimistictransaction_options_t* opt); extern ROCKSDB_LIBRARY_API void rocksdb_optimistictransaction_options_set_set_snapshot( rocksdb_optimistictransaction_options_t* opt, unsigned char v); // referring to convention (3), this should be used by client // to free memory that was malloc()ed extern ROCKSDB_LIBRARY_API void rocksdb_free(void* ptr); extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_get_pinned( rocksdb_t* db, const rocksdb_readoptions_t* options, const char* key, size_t keylen, char** errptr); extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_get_pinned_cf( rocksdb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, size_t keylen, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_pinnableslice_destroy( rocksdb_pinnableslice_t* v); extern ROCKSDB_LIBRARY_API const char* rocksdb_pinnableslice_value( const rocksdb_pinnableslice_t* t, size_t* vlen); extern ROCKSDB_LIBRARY_API rocksdb_memory_consumers_t* rocksdb_memory_consumers_create(); extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_add_db( rocksdb_memory_consumers_t* consumers, rocksdb_t* db); extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_add_cache( rocksdb_memory_consumers_t* consumers, rocksdb_cache_t* cache); extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_destroy( rocksdb_memory_consumers_t* consumers); extern ROCKSDB_LIBRARY_API rocksdb_memory_usage_t* rocksdb_approximate_memory_usage_create(rocksdb_memory_consumers_t* consumers, char** errptr); extern ROCKSDB_LIBRARY_API void rocksdb_approximate_memory_usage_destroy( rocksdb_memory_usage_t* usage); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_approximate_memory_usage_get_mem_table_total( rocksdb_memory_usage_t* memory_usage); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_approximate_memory_usage_get_mem_table_unflushed( rocksdb_memory_usage_t* memory_usage); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_approximate_memory_usage_get_mem_table_readers_total( rocksdb_memory_usage_t* memory_usage); extern ROCKSDB_LIBRARY_API uint64_t rocksdb_approximate_memory_usage_get_cache_total( rocksdb_memory_usage_t* memory_usage); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_dump_malloc_stats( rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_memtable_whole_key_filtering(rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_cancel_all_background_work( rocksdb_t* db, unsigned char wait); #ifdef __cplusplus } /* end extern "C" */ #endif rocksdb-6.11.4/include/rocksdb/cache.h000066400000000000000000000303551370372246700175410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // A Cache is an interface that maps keys to values. It has internal // synchronization and may be safely accessed concurrently from // multiple threads. It may automatically evict entries to make room // for new entries. Values have a specified charge against the cache // capacity. For example, a cache where the values are variable // length strings, may use the length of the string as the charge for // the string. // // A builtin cache implementation with a least-recently-used eviction // policy is provided. Clients may use their own implementations if // they want something more sophisticated (like scan-resistance, a // custom eviction policy, variable cache sizing, etc.) #pragma once #include #include #include #include "rocksdb/memory_allocator.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Cache; struct ConfigOptions; extern const bool kDefaultToAdaptiveMutex; enum CacheMetadataChargePolicy { kDontChargeCacheMetadata, kFullChargeCacheMetadata }; const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy = kFullChargeCacheMetadata; struct LRUCacheOptions { // Capacity of the cache. size_t capacity = 0; // Cache is sharded into 2^num_shard_bits shards, // by hash of key. Refer to NewLRUCache for further // information. int num_shard_bits = -1; // If strict_capacity_limit is set, // insert to the cache will fail when cache is full. bool strict_capacity_limit = false; // Percentage of cache reserved for high priority entries. // If greater than zero, the LRU list will be split into a high-pri // list and a low-pri list. High-pri entries will be insert to the // tail of high-pri list, while low-pri entries will be first inserted to // the low-pri list (the midpoint). This is refered to as // midpoint insertion strategy to make entries never get hit in cache // age out faster. // // See also // BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority. double high_pri_pool_ratio = 0.5; // If non-nullptr will use this allocator instead of system allocator when // allocating memory for cache blocks. Call this method before you start using // the cache! // // Caveat: when the cache is used as block cache, the memory allocator is // ignored when dealing with compression libraries that allocate memory // internally (currently only XPRESS). std::shared_ptr memory_allocator; // Whether to use adaptive mutexes for cache shards. Note that adaptive // mutexes need to be supported by the platform in order for this to have any // effect. The default value is true if RocksDB is compiled with // -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise. bool use_adaptive_mutex = kDefaultToAdaptiveMutex; CacheMetadataChargePolicy metadata_charge_policy = kDefaultCacheMetadataChargePolicy; LRUCacheOptions() {} LRUCacheOptions(size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit, double _high_pri_pool_ratio, std::shared_ptr _memory_allocator = nullptr, bool _use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy _metadata_charge_policy = kDefaultCacheMetadataChargePolicy) : capacity(_capacity), num_shard_bits(_num_shard_bits), strict_capacity_limit(_strict_capacity_limit), high_pri_pool_ratio(_high_pri_pool_ratio), memory_allocator(std::move(_memory_allocator)), use_adaptive_mutex(_use_adaptive_mutex), metadata_charge_policy(_metadata_charge_policy) {} }; // Create a new cache with a fixed size capacity. The cache is sharded // to 2^num_shard_bits shards, by hash of the key. The total capacity // is divided and evenly assigned to each shard. If strict_capacity_limit // is set, insert to the cache will fail when cache is full. User can also // set percentage of the cache reserves for high priority entries via // high_pri_pool_pct. // num_shard_bits = -1 means it is automatically determined: every shard // will be at least 512KB and number of shard bits will not exceed 6. extern std::shared_ptr NewLRUCache( size_t capacity, int num_shard_bits = -1, bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5, std::shared_ptr memory_allocator = nullptr, bool use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy metadata_charge_policy = kDefaultCacheMetadataChargePolicy); extern std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts); // Similar to NewLRUCache, but create a cache based on CLOCK algorithm with // better concurrent performance in some cases. See util/clock_cache.cc for // more detail. // // Return nullptr if it is not supported. extern std::shared_ptr NewClockCache( size_t capacity, int num_shard_bits = -1, bool strict_capacity_limit = false, CacheMetadataChargePolicy metadata_charge_policy = kDefaultCacheMetadataChargePolicy); class Cache { public: // Depending on implementation, cache entries with high priority could be less // likely to get evicted than low priority entries. enum class Priority { HIGH, LOW }; Cache(std::shared_ptr allocator = nullptr) : memory_allocator_(std::move(allocator)) {} // No copying allowed Cache(const Cache&) = delete; Cache& operator=(const Cache&) = delete; // Creates a new Cache based on the input value string and returns the result. // Currently, this method can be used to create LRUCaches only // @param config_options // @param value The value might be: // - an old-style cache ("1M") -- equivalent to NewLRUCache(1024*102( // - Name-value option pairs -- "capacity=1M; num_shard_bits=4; // For the LRUCache, the values are defined in LRUCacheOptions. // @param result The new Cache object // @return OK if the cache was sucessfully created // @return NotFound if an invalid name was specified in the value // @return InvalidArgument if either the options were not valid static Status CreateFromString(const ConfigOptions& config_options, const std::string& value, std::shared_ptr* result); // Destroys all existing entries by calling the "deleter" // function that was passed via the Insert() function. // // @See Insert virtual ~Cache() {} // Opaque handle to an entry stored in the cache. struct Handle {}; // The type of the Cache virtual const char* Name() const = 0; // Insert a mapping from key->value into the cache and assign it // the specified charge against the total cache capacity. // If strict_capacity_limit is true and cache reaches its full capacity, // return Status::Incomplete. // // If handle is not nullptr, returns a handle that corresponds to the // mapping. The caller must call this->Release(handle) when the returned // mapping is no longer needed. In case of error caller is responsible to // cleanup the value (i.e. calling "deleter"). // // If handle is nullptr, it is as if Release is called immediately after // insert. In case of error value will be cleanup. // // When the inserted entry is no longer needed, the key and // value will be passed to "deleter". virtual Status Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Handle** handle = nullptr, Priority priority = Priority::LOW) = 0; // If the cache has no mapping for "key", returns nullptr. // // Else return a handle that corresponds to the mapping. The caller // must call this->Release(handle) when the returned mapping is no // longer needed. // If stats is not nullptr, relative tickers could be used inside the // function. virtual Handle* Lookup(const Slice& key, Statistics* stats = nullptr) = 0; // Increments the reference count for the handle if it refers to an entry in // the cache. Returns true if refcount was incremented; otherwise, returns // false. // REQUIRES: handle must have been returned by a method on *this. virtual bool Ref(Handle* handle) = 0; /** * Release a mapping returned by a previous Lookup(). A released entry might * still remain in cache in case it is later looked up by others. If * force_erase is set then it also erase it from the cache if there is no * other reference to it. Erasing it should call the deleter function that * was provided when the * entry was inserted. * * Returns true if the entry was also erased. */ // REQUIRES: handle must not have been released yet. // REQUIRES: handle must have been returned by a method on *this. virtual bool Release(Handle* handle, bool force_erase = false) = 0; // Return the value encapsulated in a handle returned by a // successful Lookup(). // REQUIRES: handle must not have been released yet. // REQUIRES: handle must have been returned by a method on *this. virtual void* Value(Handle* handle) = 0; // If the cache contains entry for key, erase it. Note that the // underlying entry will be kept around until all existing handles // to it have been released. virtual void Erase(const Slice& key) = 0; // Return a new numeric id. May be used by multiple clients who are // sharding the same cache to partition the key space. Typically the // client will allocate a new id at startup and prepend the id to // its cache keys. virtual uint64_t NewId() = 0; // sets the maximum configured capacity of the cache. When the new // capacity is less than the old capacity and the existing usage is // greater than new capacity, the implementation will do its best job to // purge the released entries from the cache in order to lower the usage virtual void SetCapacity(size_t capacity) = 0; // Set whether to return error on insertion when cache reaches its full // capacity. virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0; // Get the flag whether to return error on insertion when cache reaches its // full capacity. virtual bool HasStrictCapacityLimit() const = 0; // returns the maximum configured capacity of the cache virtual size_t GetCapacity() const = 0; // returns the memory size for the entries residing in the cache. virtual size_t GetUsage() const = 0; // returns the memory size for a specific entry in the cache. virtual size_t GetUsage(Handle* handle) const = 0; // returns the memory size for the entries in use by the system virtual size_t GetPinnedUsage() const = 0; // returns the charge for the specific entry in the cache. virtual size_t GetCharge(Handle* handle) const = 0; // Call this on shutdown if you want to speed it up. Cache will disown // any underlying data and will not free it on delete. This call will leak // memory - call this only if you're shutting down the process. // Any attempts of using cache after this call will fail terribly. // Always delete the DB object before calling this method! virtual void DisownData(){ // default implementation is noop } // Apply callback to all entries in the cache // If thread_safe is true, it will also lock the accesses. Otherwise, it will // access the cache without the lock held virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), bool thread_safe) = 0; // Remove all entries. // Prerequisite: no entry is referenced. virtual void EraseUnRefEntries() = 0; virtual std::string GetPrintableOptions() const { return ""; } MemoryAllocator* memory_allocator() const { return memory_allocator_.get(); } private: std::shared_ptr memory_allocator_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/cleanable.h000066400000000000000000000041501370372246700203760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class Cleanable { public: Cleanable(); // No copy constructor and copy assignment allowed. Cleanable(Cleanable&) = delete; Cleanable& operator=(Cleanable&) = delete; ~Cleanable(); // Move constructor and move assignment is allowed. Cleanable(Cleanable&&); Cleanable& operator=(Cleanable&&); // Clients are allowed to register function/arg1/arg2 triples that // will be invoked when this iterator is destroyed. // // Note that unlike all of the preceding methods, this method is // not abstract and therefore clients should not override it. typedef void (*CleanupFunction)(void* arg1, void* arg2); void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); void DelegateCleanupsTo(Cleanable* other); // DoCleanup and also resets the pointers for reuse inline void Reset() { DoCleanup(); cleanup_.function = nullptr; cleanup_.next = nullptr; } protected: struct Cleanup { CleanupFunction function; void* arg1; void* arg2; Cleanup* next; }; Cleanup cleanup_; // It also becomes the owner of c void RegisterCleanup(Cleanup* c); private: // Performs all the cleanups. It does not reset the pointers. Making it // private // to prevent misuse inline void DoCleanup() { if (cleanup_.function != nullptr) { (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); for (Cleanup* c = cleanup_.next; c != nullptr;) { (*c->function)(c->arg1, c->arg2); Cleanup* next = c->next; delete c; c = next; } } } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/compaction_filter.h000066400000000000000000000215751370372246700222030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2013 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class Slice; class SliceTransform; // Context information of a compaction run struct CompactionFilterContext { // Does this compaction run include all data files bool is_full_compaction; // Is this compaction requested by the client (true), // or is it occurring as an automatic compaction process bool is_manual_compaction; }; // CompactionFilter allows an application to modify/delete a key-value at // the time of compaction. class CompactionFilter { public: enum ValueType { kValue, kMergeOperand, kBlobIndex, // used internally by BlobDB. }; enum class Decision { kKeep, kRemove, kChangeValue, kRemoveAndSkipUntil, }; enum class BlobDecision { kKeep, kChangeValue, kCorruption, kIOError }; // Context information of a compaction run struct Context { // Does this compaction run include all data files bool is_full_compaction; // Is this compaction requested by the client (true), // or is it occurring as an automatic compaction process bool is_manual_compaction; // Which column family this compaction is for. uint32_t column_family_id; }; virtual ~CompactionFilter() {} // The compaction process invokes this // method for kv that is being compacted. A return value // of false indicates that the kv should be preserved in the // output of this compaction run and a return value of true // indicates that this key-value should be removed from the // output of the compaction. The application can inspect // the existing value of the key and make decision based on it. // // Key-Values that are results of merge operation during compaction are not // passed into this function. Currently, when you have a mix of Put()s and // Merge()s on a same key, we only guarantee to process the merge operands // through the compaction filters. Put()s might be processed, or might not. // // When the value is to be preserved, the application has the option // to modify the existing_value and pass it back through new_value. // value_changed needs to be set to true in this case. // // Note that RocksDB snapshots (i.e. call GetSnapshot() API on a // DB* object) will not guarantee to preserve the state of the DB with // CompactionFilter. Data seen from a snapshot might disppear after a // compaction finishes. If you use snapshots, think twice about whether you // want to use compaction filter and whether you are using it in a safe way. // // If multithreaded compaction is being used *and* a single CompactionFilter // instance was supplied via Options::compaction_filter, this method may be // called from different threads concurrently. The application must ensure // that the call is thread-safe. // // If the CompactionFilter was created by a factory, then it will only ever // be used by a single thread that is doing the compaction run, and this // call does not need to be thread-safe. However, multiple filters may be // in existence and operating concurrently. virtual bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*existing_value*/, std::string* /*new_value*/, bool* /*value_changed*/) const { return false; } // The compaction process invokes this method on every merge operand. If this // method returns true, the merge operand will be ignored and not written out // in the compaction output // // Note: If you are using a TransactionDB, it is not recommended to implement // FilterMergeOperand(). If a Merge operation is filtered out, TransactionDB // may not realize there is a write conflict and may allow a Transaction to // Commit that should have failed. Instead, it is better to implement any // Merge filtering inside the MergeOperator. virtual bool FilterMergeOperand(int /*level*/, const Slice& /*key*/, const Slice& /*operand*/) const { return false; } // An extended API. Called for both values and merge operands. // Allows changing value and skipping ranges of keys. // The default implementation uses Filter() and FilterMergeOperand(). // If you're overriding this method, no need to override the other two. // `value_type` indicates whether this key-value corresponds to a normal // value (e.g. written with Put()) or a merge operand (written with Merge()). // // Possible return values: // * kKeep - keep the key-value pair. // * kRemove - remove the key-value pair or merge operand. // * kChangeValue - keep the key and change the value/operand to *new_value. // * kRemoveAndSkipUntil - remove this key-value pair, and also remove // all key-value pairs with key in [key, *skip_until). This range // of keys will be skipped without reading, potentially saving some // IO operations compared to removing the keys one by one. // // *skip_until <= key is treated the same as Decision::kKeep // (since the range [key, *skip_until) is empty). // // Caveats: // - The keys are skipped even if there are snapshots containing them, // i.e. values removed by kRemoveAndSkipUntil can disappear from a // snapshot - beware if you're using TransactionDB or // DB::GetSnapshot(). // - If value for a key was overwritten or merged into (multiple Put()s // or Merge()s), and compaction filter skips this key with // kRemoveAndSkipUntil, it's possible that it will remove only // the new value, exposing the old value that was supposed to be // overwritten. // - Doesn't work with PlainTableFactory in prefix mode. // - If you use kRemoveAndSkipUntil, consider also reducing // compaction_readahead_size option. // // Note: If you are using a TransactionDB, it is not recommended to filter // out or modify merge operands (ValueType::kMergeOperand). // If a merge operation is filtered out, TransactionDB may not realize there // is a write conflict and may allow a Transaction to Commit that should have // failed. Instead, it is better to implement any Merge filtering inside the // MergeOperator. virtual Decision FilterV2(int level, const Slice& key, ValueType value_type, const Slice& existing_value, std::string* new_value, std::string* /*skip_until*/) const { switch (value_type) { case ValueType::kValue: { bool value_changed = false; bool rv = Filter(level, key, existing_value, new_value, &value_changed); if (rv) { return Decision::kRemove; } return value_changed ? Decision::kChangeValue : Decision::kKeep; } case ValueType::kMergeOperand: { bool rv = FilterMergeOperand(level, key, existing_value); return rv ? Decision::kRemove : Decision::kKeep; } case ValueType::kBlobIndex: return Decision::kKeep; } assert(false); return Decision::kKeep; } // Internal (BlobDB) use only. Do not override in application code. virtual BlobDecision PrepareBlobOutput(const Slice& /* key */, const Slice& /* existing_value */, std::string* /* new_value */) const { return BlobDecision::kKeep; } // This function is deprecated. Snapshots will always be ignored for // compaction filters, because we realized that not ignoring snapshots doesn't // provide the gurantee we initially thought it would provide. Repeatable // reads will not be guaranteed anyway. If you override the function and // returns false, we will fail the compaction. virtual bool IgnoreSnapshots() const { return true; } // Returns a name that identifies this compaction filter. // The name will be printed to LOG file on start up for diagnosis. virtual const char* Name() const = 0; }; // Each compaction will create a new CompactionFilter allowing the // application to know about different compactions class CompactionFilterFactory { public: virtual ~CompactionFilterFactory() {} virtual std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) = 0; // Returns a name that identifies this compaction filter factory. virtual const char* Name() const = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/compaction_job_stats.h000066400000000000000000000063001370372246700226730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { struct CompactionJobStats { CompactionJobStats() { Reset(); } void Reset(); // Aggregate the CompactionJobStats from another instance with this one void Add(const CompactionJobStats& stats); // the elapsed time of this compaction in microseconds. uint64_t elapsed_micros; // the elapsed CPU time of this compaction in microseconds. uint64_t cpu_micros; // the number of compaction input records. uint64_t num_input_records; // the number of compaction input files. size_t num_input_files; // the number of compaction input files at the output level. size_t num_input_files_at_output_level; // the number of compaction output records. uint64_t num_output_records; // the number of compaction output files. size_t num_output_files; // true if the compaction is a manual compaction bool is_manual_compaction; // the size of the compaction input in bytes. uint64_t total_input_bytes; // the size of the compaction output in bytes. uint64_t total_output_bytes; // number of records being replaced by newer record associated with same key. // this could be a new value or a deletion entry for that key so this field // sums up all updated and deleted keys uint64_t num_records_replaced; // the sum of the uncompressed input keys in bytes. uint64_t total_input_raw_key_bytes; // the sum of the uncompressed input values in bytes. uint64_t total_input_raw_value_bytes; // the number of deletion entries before compaction. Deletion entries // can disappear after compaction because they expired uint64_t num_input_deletion_records; // number of deletion records that were found obsolete and discarded // because it is not possible to delete any more keys with this entry // (i.e. all possible deletions resulting from it have been completed) uint64_t num_expired_deletion_records; // number of corrupt keys (ParseInternalKey returned false when applied to // the key) encountered and written out. uint64_t num_corrupt_keys; // Following counters are only populated if // options.report_bg_io_stats = true; // Time spent on file's Append() call. uint64_t file_write_nanos; // Time spent on sync file range. uint64_t file_range_sync_nanos; // Time spent on file fsync. uint64_t file_fsync_nanos; // Time spent on preparing file write (fallocate, etc) uint64_t file_prepare_write_nanos; // 0-terminated strings storing the first 8 bytes of the smallest and // largest key in the output. static const size_t kMaxPrefixLength = 8; std::string smallest_output_key_prefix; std::string largest_output_key_prefix; // number of single-deletes which do not meet a put uint64_t num_single_del_fallthru; // number of single-deletes which meet something other than a put uint64_t num_single_del_mismatch; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/comparator.h000066400000000000000000000120651370372246700206430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class Slice; // A Comparator object provides a total order across slices that are // used as keys in an sstable or a database. A Comparator implementation // must be thread-safe since rocksdb may invoke its methods concurrently // from multiple threads. class Comparator { public: Comparator() : timestamp_size_(0) {} Comparator(size_t ts_sz) : timestamp_size_(ts_sz) {} Comparator(const Comparator& orig) : timestamp_size_(orig.timestamp_size_) {} Comparator& operator=(const Comparator& rhs) { if (this != &rhs) { timestamp_size_ = rhs.timestamp_size_; } return *this; } virtual ~Comparator() {} static const char* Type() { return "Comparator"; } // Three-way comparison. Returns value: // < 0 iff "a" < "b", // == 0 iff "a" == "b", // > 0 iff "a" > "b" // Note that Compare(a, b) also compares timestamp if timestamp size is // non-zero. For the same user key with different timestamps, larger (newer) // timestamp comes first. virtual int Compare(const Slice& a, const Slice& b) const = 0; // Compares two slices for equality. The following invariant should always // hold (and is the default implementation): // Equal(a, b) iff Compare(a, b) == 0 // Overwrite only if equality comparisons can be done more efficiently than // three-way comparisons. virtual bool Equal(const Slice& a, const Slice& b) const { return Compare(a, b) == 0; } // The name of the comparator. Used to check for comparator // mismatches (i.e., a DB created with one comparator is // accessed using a different comparator. // // The client of this package should switch to a new name whenever // the comparator implementation changes in a way that will cause // the relative ordering of any two keys to change. // // Names starting with "rocksdb." are reserved and should not be used // by any clients of this package. virtual const char* Name() const = 0; // Advanced functions: these are used to reduce the space requirements // for internal data structures like index blocks. // If *start < limit, changes *start to a short string in [start,limit). // Simple comparator implementations may return with *start unchanged, // i.e., an implementation of this method that does nothing is correct. virtual void FindShortestSeparator(std::string* start, const Slice& limit) const = 0; // Changes *key to a short string >= *key. // Simple comparator implementations may return with *key unchanged, // i.e., an implementation of this method that does nothing is correct. virtual void FindShortSuccessor(std::string* key) const = 0; // if it is a wrapped comparator, may return the root one. // return itself it is not wrapped. virtual const Comparator* GetRootComparator() const { return this; } // given two keys, determine if t is the successor of s virtual bool IsSameLengthImmediateSuccessor(const Slice& /*s*/, const Slice& /*t*/) const { return false; } // return true if two keys with different byte sequences can be regarded // as equal by this comparator. // The major use case is to determine if DataBlockHashIndex is compatible // with the customized comparator. virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; } inline size_t timestamp_size() const { return timestamp_size_; } int CompareWithoutTimestamp(const Slice& a, const Slice& b) const { return CompareWithoutTimestamp(a, /*a_has_ts=*/true, b, /*b_has_ts=*/true); } // For two events e1 and e2 whose timestamps are t1 and t2 respectively, // Returns value: // < 0 iff t1 < t2 // == 0 iff t1 == t2 // > 0 iff t1 > t2 // Note that an all-zero byte array will be the smallest (oldest) timestamp // of the same length. virtual int CompareTimestamp(const Slice& /*ts1*/, const Slice& /*ts2*/) const { return 0; } virtual int CompareWithoutTimestamp(const Slice& a, bool /*a_has_ts*/, const Slice& b, bool /*b_has_ts*/) const { return Compare(a, b); } private: size_t timestamp_size_; }; // Return a builtin comparator that uses lexicographic byte-wise // ordering. The result remains the property of this module and // must not be deleted. extern const Comparator* BytewiseComparator(); // Return a builtin comparator that uses reverse lexicographic byte-wise // ordering. extern const Comparator* ReverseBytewiseComparator(); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/concurrent_task_limiter.h000066400000000000000000000031231370372246700234200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/env.h" #include "rocksdb/statistics.h" namespace ROCKSDB_NAMESPACE { class ConcurrentTaskLimiter { public: virtual ~ConcurrentTaskLimiter() {} // Returns a name that identifies this concurrent task limiter. virtual const std::string& GetName() const = 0; // Set max concurrent tasks. // limit = 0 means no new task allowed. // limit < 0 means no limitation. virtual void SetMaxOutstandingTask(int32_t limit) = 0; // Reset to unlimited max concurrent task. virtual void ResetMaxOutstandingTask() = 0; // Returns current outstanding task count. virtual int32_t GetOutstandingTask() const = 0; }; // Create a ConcurrentTaskLimiter that can be shared with mulitple CFs // across RocksDB instances to control concurrent tasks. // // @param name: Name of the limiter. // @param limit: max concurrent tasks. // limit = 0 means no new task allowed. // limit < 0 means no limitation. extern ConcurrentTaskLimiter* NewConcurrentTaskLimiter(const std::string& name, int32_t limit); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/convenience.h000066400000000000000000000553611370372246700207760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/db.h" #include "rocksdb/status.h" #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { class Env; struct ColumnFamilyOptions; struct DBOptions; struct Options; // ConfigOptions containing the parameters/controls for // comparing objects and converting to/from strings. // These settings control how the methods // treat errors (e.g. ignore_unknown_objects), the format // of the serialization (e.g. delimiter), and how to compare // options (sanity_level). struct ConfigOptions { // This enum defines the RocksDB options sanity level. enum SanityLevel : unsigned char { kSanityLevelNone = 0x01, // Performs no sanity check at all. // Performs minimum check to ensure the RocksDB instance can be // opened without corrupting / mis-interpreting the data. kSanityLevelLooselyCompatible = 0x02, // Perform exact match sanity check. kSanityLevelExactMatch = 0xFF, }; enum Depth { kDepthDefault, // Traverse nested options that are not flagged as "shallow" kDepthShallow, // Do not traverse into any nested options kDepthDetailed, // Traverse nested options, overriding the options shallow // setting }; // When true, any unused options will be ignored and OK will be returned bool ignore_unknown_options = false; // If the strings are escaped (old-style?) bool input_strings_escaped = true; // The separator between options when converting to a string std::string delimiter = ";"; // Controls how to traverse options during print/match stages Depth depth = Depth::kDepthDefault; // Controls how options are serialized // Controls how pedantic the comparison must be for equivalency SanityLevel sanity_level = SanityLevel::kSanityLevelExactMatch; // `file_readahead_size` is used for readahead for the option file. size_t file_readahead_size = 512 * 1024; // The environment to use for this option Env* env = Env::Default(); bool IsShallow() const { return depth == Depth::kDepthShallow; } bool IsDetailed() const { return depth == Depth::kDepthDetailed; } bool IsCheckDisabled() const { return sanity_level == SanityLevel::kSanityLevelNone; } bool IsCheckEnabled(SanityLevel level) const { return (level > SanityLevel::kSanityLevelNone && level <= sanity_level); } }; #ifndef ROCKSDB_LITE // The following set of functions provide a way to construct RocksDB Options // from a string or a string-to-string map. Here're the general rule of // setting option values from strings by type. Some RocksDB types are also // supported in these APIs. Please refer to the comment of the function itself // to find more information about how to config those RocksDB types. // // * Strings: // Strings will be used as values directly without any truncating or // trimming. // // * Booleans: // - "true" or "1" => true // - "false" or "0" => false. // [Example]: // - {"optimize_filters_for_hits", "1"} in GetColumnFamilyOptionsFromMap, or // - "optimize_filters_for_hits=true" in GetColumnFamilyOptionsFromString. // // * Integers: // Integers are converted directly from string, in addition to the following // units that we support: // - 'k' or 'K' => 2^10 // - 'm' or 'M' => 2^20 // - 'g' or 'G' => 2^30 // - 't' or 'T' => 2^40 // only for unsigned int with sufficient bits. // [Example]: // - {"arena_block_size", "19G"} in GetColumnFamilyOptionsFromMap, or // - "arena_block_size=19G" in GetColumnFamilyOptionsFromString. // // * Doubles / Floating Points: // Doubles / Floating Points are converted directly from string. Note that // currently we do not support units. // [Example]: // - {"hard_rate_limit", "2.1"} in GetColumnFamilyOptionsFromMap, or // - "hard_rate_limit=2.1" in GetColumnFamilyOptionsFromString. // * Array / Vectors: // An array is specified by a list of values, where ':' is used as // the delimiter to separate each value. // [Example]: // - {"compression_per_level", "kNoCompression:kSnappyCompression"} // in GetColumnFamilyOptionsFromMap, or // - "compression_per_level=kNoCompression:kSnappyCompression" in // GetColumnFamilyOptionsFromMapString // * Enums: // The valid values of each enum are identical to the names of its constants. // [Example]: // - CompressionType: valid values are "kNoCompression", // "kSnappyCompression", "kZlibCompression", "kBZip2Compression", ... // - CompactionStyle: valid values are "kCompactionStyleLevel", // "kCompactionStyleUniversal", "kCompactionStyleFIFO", and // "kCompactionStyleNone". // // Take a default ColumnFamilyOptions "base_options" in addition to a // map "opts_map" of option name to option value to construct the new // ColumnFamilyOptions "new_options". // // Below are the instructions of how to config some non-primitive-typed // options in ColumnFOptions: // // * table_factory: // table_factory can be configured using our custom nested-option syntax. // // {option_a=value_a; option_b=value_b; option_c=value_c; ... } // // A nested option is enclosed by two curly braces, within which there are // multiple option assignments. Each assignment is of the form // "variable_name=value;". // // Currently we support the following types of TableFactory: // - BlockBasedTableFactory: // Use name "block_based_table_factory" to initialize table_factory with // BlockBasedTableFactory. Its BlockBasedTableFactoryOptions can be // configured using the nested-option syntax. // [Example]: // * {"block_based_table_factory", "{block_cache=1M;block_size=4k;}"} // is equivalent to assigning table_factory with a BlockBasedTableFactory // that has 1M LRU block-cache with block size equals to 4k: // ColumnFamilyOptions cf_opt; // BlockBasedTableOptions blk_opt; // blk_opt.block_cache = NewLRUCache(1 * 1024 * 1024); // blk_opt.block_size = 4 * 1024; // cf_opt.table_factory.reset(NewBlockBasedTableFactory(blk_opt)); // - PlainTableFactory: // Use name "plain_table_factory" to initialize table_factory with // PlainTableFactory. Its PlainTableFactoryOptions can be configured using // the nested-option syntax. // [Example]: // * {"plain_table_factory", "{user_key_len=66;bloom_bits_per_key=20;}"} // // * memtable_factory: // Use "memtable" to config memtable_factory. Here are the supported // memtable factories: // - SkipList: // Pass "skip_list:" to config memtable to use SkipList, // or simply "skip_list" to use the default SkipList. // [Example]: // * {"memtable", "skip_list:5"} is equivalent to setting // memtable to SkipListFactory(5). // - PrefixHash: // Pass "prfix_hash:" to config memtable // to use PrefixHash, or simply "prefix_hash" to use the default // PrefixHash. // [Example]: // * {"memtable", "prefix_hash:1000"} is equivalent to setting // memtable to NewHashSkipListRepFactory(hash_bucket_count). // - HashLinkedList: // Pass "hash_linkedlist:" to config memtable // to use HashLinkedList, or simply "hash_linkedlist" to use the default // HashLinkedList. // [Example]: // * {"memtable", "hash_linkedlist:1000"} is equivalent to // setting memtable to NewHashLinkListRepFactory(1000). // - VectorRepFactory: // Pass "vector:" to config memtable to use VectorRepFactory, // or simply "vector" to use the default Vector memtable. // [Example]: // * {"memtable", "vector:1024"} is equivalent to setting memtable // to VectorRepFactory(1024). // // * compression_opts: // Use "compression_opts" to config compression_opts. The value format // is of the form ":::". // [Example]: // * {"compression_opts", "4:5:6:7"} is equivalent to setting: // ColumnFamilyOptions cf_opt; // cf_opt.compression_opts.window_bits = 4; // cf_opt.compression_opts.level = 5; // cf_opt.compression_opts.strategy = 6; // cf_opt.compression_opts.max_dict_bytes = 7; // // The GetColumnFamilyOptionsFromMap(ConfigOptions, ...) should be used; the // alternative signature may be deprecated in a future release. The equivalent // functionality can be achieved by setting the corresponding options in // the ConfigOptions parameter. // // @param config_options controls how the map is processed. // @param base_options the default options of the output "new_options". // @param opts_map an option name to value map for specifying how "new_options" // should be set. // @param new_options the resulting options based on "base_options" with the // change specified in "opts_map". // @param input_strings_escaped when set to true, each escaped characters // prefixed by '\' in the values of the opts_map will be further converted // back to the raw string before assigning to the associated options. // @param ignore_unknown_options when set to true, unknown options are ignored // instead of resulting in an unknown-option error. // @return Status::OK() on success. Otherwise, a non-ok status indicating // error will be returned, and "new_options" will be set to "base_options". // @return Status::NotFound means the one (or more) of the option name in // the opts_map is not valid for this option // @return Status::NotSupported means we do not know how to parse one of the // value for this option // @return Status::InvalidArgument means the one of the option values is not // valid for this option. Status GetColumnFamilyOptionsFromMap( const ConfigOptions& config_options, const ColumnFamilyOptions& base_options, const std::unordered_map& opts_map, ColumnFamilyOptions* new_options); Status GetColumnFamilyOptionsFromMap( const ColumnFamilyOptions& base_options, const std::unordered_map& opts_map, ColumnFamilyOptions* new_options, bool input_strings_escaped = false, bool ignore_unknown_options = false); // Take a default DBOptions "base_options" in addition to a // map "opts_map" of option name to option value to construct the new // DBOptions "new_options". // // Below are the instructions of how to config some non-primitive-typed // options in DBOptions: // // * rate_limiter_bytes_per_sec: // RateLimiter can be configured directly by specifying its bytes_per_sec. // [Example]: // - Passing {"rate_limiter_bytes_per_sec", "1024"} is equivalent to // passing NewGenericRateLimiter(1024) to rate_limiter_bytes_per_sec. // // The GetDBOptionsFromMap(ConfigOptions, ...) should be used; the // alternative signature may be deprecated in a future release. The equivalent // functionality can be achieved by setting the corresponding options in // the ConfigOptions parameter. // // @param config_options controls how the map is processed. // @param base_options the default options of the output "new_options". // @param opts_map an option name to value map for specifying how "new_options" // should be set. // @param new_options the resulting options based on "base_options" with the // change specified in "opts_map". // @param input_strings_escaped when set to true, each escaped characters // prefixed by '\' in the values of the opts_map will be further converted // back to the raw string before assigning to the associated options. // @param ignore_unknown_options when set to true, unknown options are ignored // instead of resulting in an unknown-option error. // @return Status::OK() on success. Otherwise, a non-ok status indicating // error will be returned, and "new_options" will be set to "base_options". // @return Status::NotFound means the one (or more) of the option name in // the opts_map is not valid for this option // @return Status::NotSupported means we do not know how to parse one of the // value for this option // @return Status::InvalidArgument means the one of the option values is not // valid for this option. Status GetDBOptionsFromMap( const ConfigOptions& cfg_options, const DBOptions& base_options, const std::unordered_map& opts_map, DBOptions* new_options); Status GetDBOptionsFromMap( const DBOptions& base_options, const std::unordered_map& opts_map, DBOptions* new_options, bool input_strings_escaped = false, bool ignore_unknown_options = false); // Take a default BlockBasedTableOptions "table_options" in addition to a // map "opts_map" of option name to option value to construct the new // BlockBasedTableOptions "new_table_options". // // Below are the instructions of how to config some non-primitive-typed // options in BlockBasedTableOptions: // // * filter_policy: // We currently only support the following FilterPolicy in the convenience // functions: // - BloomFilter: use "bloomfilter:[bits_per_key]:[use_block_based_builder]" // to specify BloomFilter. The above string is equivalent to calling // NewBloomFilterPolicy(bits_per_key, use_block_based_builder). // [Example]: // - Pass {"filter_policy", "bloomfilter:4:true"} in // GetBlockBasedTableOptionsFromMap to use a BloomFilter with 4-bits // per key and use_block_based_builder enabled. // // * block_cache / block_cache_compressed: // We currently only support LRU cache in the GetOptions API. The LRU // cache can be set by directly specifying its size. // [Example]: // - Passing {"block_cache", "1M"} in GetBlockBasedTableOptionsFromMap is // equivalent to setting block_cache using NewLRUCache(1024 * 1024). // // The GetBlockBasedTableOptionsFromMap(ConfigOptions, ...) should be used; // the alternative signature may be deprecated in a future release. The // equivalent functionality can be achieved by setting the corresponding // options in the ConfigOptions parameter. // // @param config_options controls how the map is processed. // @param table_options the default options of the output "new_table_options". // @param opts_map an option name to value map for specifying how // "new_table_options" should be set. // @param new_table_options the resulting options based on "table_options" // with the change specified in "opts_map". // @param input_strings_escaped when set to true, each escaped characters // prefixed by '\' in the values of the opts_map will be further converted // back to the raw string before assigning to the associated options. // @param ignore_unknown_options when set to true, unknown options are ignored // instead of resulting in an unknown-option error. // @return Status::OK() on success. Otherwise, a non-ok status indicating // error will be returned, and "new_table_options" will be set to // "table_options". Status GetBlockBasedTableOptionsFromMap( const ConfigOptions& config_options, const BlockBasedTableOptions& table_options, const std::unordered_map& opts_map, BlockBasedTableOptions* new_table_options); Status GetBlockBasedTableOptionsFromMap( const BlockBasedTableOptions& table_options, const std::unordered_map& opts_map, BlockBasedTableOptions* new_table_options, bool input_strings_escaped = false, bool ignore_unknown_options = false); // Take a default PlainTableOptions "table_options" in addition to a // map "opts_map" of option name to option value to construct the new // PlainTableOptions "new_table_options". // // The GetPlainTableOptionsFromMap(ConfigOptions, ...) should be used; the // alternative signature may be deprecated in a future release. The equivalent // functionality can be achieved by setting the corresponding options in // the ConfigOptions parameter. // // @param config_options controls how the map is processed. // @param table_options the default options of the output "new_table_options". // @param opts_map an option name to value map for specifying how // "new_table_options" should be set. // @param new_table_options the resulting options based on "table_options" // with the change specified in "opts_map". // @param input_strings_escaped when set to true, each escaped characters // prefixed by '\' in the values of the opts_map will be further converted // back to the raw string before assigning to the associated options. // @param ignore_unknown_options when set to true, unknown options are ignored // instead of resulting in an unknown-option error. // @return Status::OK() on success. Otherwise, a non-ok status indicating // error will be returned, and "new_table_options" will be set to // "table_options". Status GetPlainTableOptionsFromMap( const ConfigOptions& config_options, const PlainTableOptions& table_options, const std::unordered_map& opts_map, PlainTableOptions* new_table_options); Status GetPlainTableOptionsFromMap( const PlainTableOptions& table_options, const std::unordered_map& opts_map, PlainTableOptions* new_table_options, bool input_strings_escaped = false, bool ignore_unknown_options = false); // Take a string representation of option names and values, apply them into the // base_options, and return the new options as a result. The string has the // following format: // "write_buffer_size=1024;max_write_buffer_number=2" // Nested options config is also possible. For example, you can define // BlockBasedTableOptions as part of the string for block-based table factory: // "write_buffer_size=1024;block_based_table_factory={block_size=4k};" // "max_write_buffer_num=2" // // // The GetColumnFamilyOptionsFromString(ConfigOptions, ...) should be used; the // alternative signature may be deprecated in a future release. The equivalent // functionality can be achieved by setting the corresponding options in // the ConfigOptions parameter. Status GetColumnFamilyOptionsFromString(const ConfigOptions& config_options, const ColumnFamilyOptions& base_options, const std::string& opts_str, ColumnFamilyOptions* new_options); Status GetColumnFamilyOptionsFromString(const ColumnFamilyOptions& base_options, const std::string& opts_str, ColumnFamilyOptions* new_options); Status GetDBOptionsFromString(const ConfigOptions& config_options, const DBOptions& base_options, const std::string& opts_str, DBOptions* new_options); Status GetDBOptionsFromString(const DBOptions& base_options, const std::string& opts_str, DBOptions* new_options); Status GetStringFromDBOptions(const ConfigOptions& config_options, const DBOptions& db_options, std::string* opts_str); Status GetStringFromDBOptions(std::string* opts_str, const DBOptions& db_options, const std::string& delimiter = "; "); Status GetStringFromColumnFamilyOptions(const ConfigOptions& config_options, const ColumnFamilyOptions& cf_options, std::string* opts_str); Status GetStringFromColumnFamilyOptions(std::string* opts_str, const ColumnFamilyOptions& cf_options, const std::string& delimiter = "; "); Status GetStringFromCompressionType(std::string* compression_str, CompressionType compression_type); std::vector GetSupportedCompressions(); Status GetBlockBasedTableOptionsFromString( const BlockBasedTableOptions& table_options, const std::string& opts_str, BlockBasedTableOptions* new_table_options); Status GetBlockBasedTableOptionsFromString( const ConfigOptions& config_options, const BlockBasedTableOptions& table_options, const std::string& opts_str, BlockBasedTableOptions* new_table_options); Status GetPlainTableOptionsFromString(const PlainTableOptions& table_options, const std::string& opts_str, PlainTableOptions* new_table_options); Status GetPlainTableOptionsFromString(const ConfigOptions& config_options, const PlainTableOptions& table_options, const std::string& opts_str, PlainTableOptions* new_table_options); Status GetMemTableRepFactoryFromString( const std::string& opts_str, std::unique_ptr* new_mem_factory); Status GetOptionsFromString(const Options& base_options, const std::string& opts_str, Options* new_options); Status GetOptionsFromString(const ConfigOptions& config_options, const Options& base_options, const std::string& opts_str, Options* new_options); Status StringToMap(const std::string& opts_str, std::unordered_map* opts_map); // Request stopping background work, if wait is true wait until it's done void CancelAllBackgroundWork(DB* db, bool wait = false); // Delete files which are entirely in the given range // Could leave some keys in the range which are in files which are not // entirely in the range. Also leaves L0 files regardless of whether they're // in the range. // Snapshots before the delete might not see the data in the given range. Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end, bool include_end = true); // Delete files in multiple ranges at once // Delete files in a lot of ranges one at a time can be slow, use this API for // better performance in that case. Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family, const RangePtr* ranges, size_t n, bool include_end = true); // Verify the checksum of file Status VerifySstFileChecksum(const Options& options, const EnvOptions& env_options, const std::string& file_path); // Verify the checksum of file Status VerifySstFileChecksum(const Options& options, const EnvOptions& env_options, const ReadOptions& read_options, const std::string& file_path); #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/db.h000066400000000000000000002271051370372246700170640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include "rocksdb/iterator.h" #include "rocksdb/listener.h" #include "rocksdb/metadata.h" #include "rocksdb/options.h" #include "rocksdb/snapshot.h" #include "rocksdb/sst_file_writer.h" #include "rocksdb/thread_status.h" #include "rocksdb/transaction_log.h" #include "rocksdb/types.h" #include "rocksdb/version.h" #ifdef _WIN32 // Windows API macro interference #undef DeleteFile #endif #if defined(__GNUC__) || defined(__clang__) #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__)) #elif _WIN32 #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated) #endif namespace ROCKSDB_NAMESPACE { struct Options; struct DBOptions; struct ColumnFamilyOptions; struct ReadOptions; struct WriteOptions; struct FlushOptions; struct CompactionOptions; struct CompactRangeOptions; struct TableProperties; struct ExternalSstFileInfo; class WriteBatch; class Env; class EventListener; class StatsHistoryIterator; class TraceWriter; #ifdef ROCKSDB_LITE class CompactionJobInfo; #endif class FileSystem; extern const std::string kDefaultColumnFamilyName; extern const std::string kPersistentStatsColumnFamilyName; struct ColumnFamilyDescriptor { std::string name; ColumnFamilyOptions options; ColumnFamilyDescriptor() : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {} ColumnFamilyDescriptor(const std::string& _name, const ColumnFamilyOptions& _options) : name(_name), options(_options) {} }; class ColumnFamilyHandle { public: virtual ~ColumnFamilyHandle() {} // Returns the name of the column family associated with the current handle. virtual const std::string& GetName() const = 0; // Returns the ID of the column family associated with the current handle. virtual uint32_t GetID() const = 0; // Fills "*desc" with the up-to-date descriptor of the column family // associated with this handle. Since it fills "*desc" with the up-to-date // information, this call might internally lock and release DB mutex to // access the up-to-date CF options. In addition, all the pointer-typed // options cannot be referenced any longer than the original options exist. // // Note that this function is not supported in RocksDBLite. virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0; // Returns the comparator of the column family associated with the // current handle. virtual const Comparator* GetComparator() const = 0; }; static const int kMajorVersion = __ROCKSDB_MAJOR__; static const int kMinorVersion = __ROCKSDB_MINOR__; // A range of keys struct Range { Slice start; Slice limit; Range() {} Range(const Slice& s, const Slice& l) : start(s), limit(l) {} }; struct RangePtr { const Slice* start; const Slice* limit; RangePtr() : start(nullptr), limit(nullptr) {} RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) {} }; // It is valid that files_checksums and files_checksum_func_names are both // empty (no checksum informaiton is provided for ingestion). Otherwise, // their sizes should be the same as external_files. The file order should // be the same in three vectors and guaranteed by the caller. struct IngestExternalFileArg { ColumnFamilyHandle* column_family = nullptr; std::vector external_files; IngestExternalFileOptions options; std::vector files_checksums; std::vector files_checksum_func_names; }; struct GetMergeOperandsOptions { int expected_max_number_of_operands = 0; }; // A collections of table properties objects, where // key: is the table's file name. // value: the table properties object of the given table. typedef std::unordered_map> TablePropertiesCollection; // A DB is a persistent, versioned ordered map from keys to values. // A DB is safe for concurrent access from multiple threads without // any external synchronization. // DB is an abstract base class with one primary implementation (DBImpl) // and a number of wrapper implementations. class DB { public: // Open the database with the specified "name". // Stores a pointer to a heap-allocated database in *dbptr and returns // OK on success. // Stores nullptr in *dbptr and returns a non-OK status on error. // Caller should delete *dbptr when it is no longer needed. static Status Open(const Options& options, const std::string& name, DB** dbptr); // Open the database for read only. All DB interfaces // that modify data, like put/delete, will return error. // If the db is opened in read only mode, then no compactions // will happen. // // Not supported in ROCKSDB_LITE, in which case the function will // return Status::NotSupported. static Status OpenForReadOnly(const Options& options, const std::string& name, DB** dbptr, bool error_if_log_file_exist = false); // Open the database for read only with column families. When opening DB with // read only, you can specify only a subset of column families in the // database that should be opened. However, you always need to specify default // column family. The default column family name is 'default' and it's stored // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName // // Not supported in ROCKSDB_LITE, in which case the function will // return Status::NotSupported. static Status OpenForReadOnly( const DBOptions& db_options, const std::string& name, const std::vector& column_families, std::vector* handles, DB** dbptr, bool error_if_log_file_exist = false); // The following OpenAsSecondary functions create a secondary instance that // can dynamically tail the MANIFEST of a primary that must have already been // created. User can call TryCatchUpWithPrimary to make the secondary // instance catch up with primary (WAL tailing is NOT supported now) whenever // the user feels necessary. Column families created by the primary after the // secondary instance starts are currently ignored by the secondary instance. // Column families opened by secondary and dropped by the primary will be // dropped by secondary as well. However the user of the secondary instance // can still access the data of such dropped column family as long as they // do not destroy the corresponding column family handle. // WAL tailing is not supported at present, but will arrive soon. // // The options argument specifies the options to open the secondary instance. // The name argument specifies the name of the primary db that you have used // to open the primary instance. // The secondary_path argument points to a directory where the secondary // instance stores its info log. // The dbptr is an out-arg corresponding to the opened secondary instance. // The pointer points to a heap-allocated database, and the user should // delete it after use. // Open DB as secondary instance with only the default column family. // Return OK on success, non-OK on failures. static Status OpenAsSecondary(const Options& options, const std::string& name, const std::string& secondary_path, DB** dbptr); // Open DB as secondary instance with column families. You can open a subset // of column families in secondary mode. // The db_options specify the database specific options. // The name argument specifies the name of the primary db that you have used // to open the primary instance. // The secondary_path argument points to a directory where the secondary // instance stores its info log. // The column_families argument specifieds a list of column families to open. // If any of the column families does not exist, the function returns non-OK // status. // The handles is an out-arg corresponding to the opened database column // familiy handles. // The dbptr is an out-arg corresponding to the opened secondary instance. // The pointer points to a heap-allocated database, and the caller should // delete it after use. Before deleting the dbptr, the user should also // delete the pointers stored in handles vector. // Return OK on success, on-OK on failures. static Status OpenAsSecondary( const DBOptions& db_options, const std::string& name, const std::string& secondary_path, const std::vector& column_families, std::vector* handles, DB** dbptr); // Open DB with column families. // db_options specify database specific options // column_families is the vector of all column families in the database, // containing column family name and options. You need to open ALL column // families in the database. To get the list of column families, you can use // ListColumnFamilies(). Also, you can open only a subset of column families // for read-only access. // The default column family name is 'default' and it's stored // in ROCKSDB_NAMESPACE::kDefaultColumnFamilyName. // If everything is OK, handles will on return be the same size // as column_families --- handles[i] will be a handle that you // will use to operate on column family column_family[i]. // Before delete DB, you have to close All column families by calling // DestroyColumnFamilyHandle() with all the handles. static Status Open(const DBOptions& db_options, const std::string& name, const std::vector& column_families, std::vector* handles, DB** dbptr); virtual Status Resume() { return Status::NotSupported(); } // Close the DB by releasing resources, closing files etc. This should be // called before calling the destructor so that the caller can get back a // status in case there are any errors. This will not fsync the WAL files. // If syncing is required, the caller must first call SyncWAL(), or Write() // using an empty write batch with WriteOptions.sync=true. // Regardless of the return status, the DB must be freed. // If the return status is Aborted(), closing fails because there is // unreleased snapshot in the system. In this case, users can release // the unreleased snapshots and try again and expect it to succeed. For // other status, recalling Close() will be no-op. // If the return status is NotSupported(), then the DB implementation does // cleanup in the destructor virtual Status Close() { return Status::NotSupported(); } // ListColumnFamilies will open the DB specified by argument name // and return the list of all column families in that DB // through column_families argument. The ordering of // column families in column_families is unspecified. static Status ListColumnFamilies(const DBOptions& db_options, const std::string& name, std::vector* column_families); // Abstract class ctor DB() {} // No copying allowed DB(const DB&) = delete; void operator=(const DB&) = delete; virtual ~DB(); // Create a column_family and return the handle of column family // through the argument handle. virtual Status CreateColumnFamily(const ColumnFamilyOptions& options, const std::string& column_family_name, ColumnFamilyHandle** handle); // Bulk create column families with the same column family options. // Return the handles of the column families through the argument handles. // In case of error, the request may succeed partially, and handles will // contain column family handles that it managed to create, and have size // equal to the number of created column families. virtual Status CreateColumnFamilies( const ColumnFamilyOptions& options, const std::vector& column_family_names, std::vector* handles); // Bulk create column families. // Return the handles of the column families through the argument handles. // In case of error, the request may succeed partially, and handles will // contain column family handles that it managed to create, and have size // equal to the number of created column families. virtual Status CreateColumnFamilies( const std::vector& column_families, std::vector* handles); // Drop a column family specified by column_family handle. This call // only records a drop record in the manifest and prevents the column // family from flushing and compacting. virtual Status DropColumnFamily(ColumnFamilyHandle* column_family); // Bulk drop column families. This call only records drop records in the // manifest and prevents the column families from flushing and compacting. // In case of error, the request may succeed partially. User may call // ListColumnFamilies to check the result. virtual Status DropColumnFamilies( const std::vector& column_families); // Close a column family specified by column_family handle and destroy // the column family handle specified to avoid double deletion. This call // deletes the column family handle by default. Use this method to // close column family instead of deleting column family handle directly virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family); // Set the database entry for "key" to "value". // If "key" already exists, it will be overwritten. // Returns OK on success, and a non-OK status on error. // Note: consider setting options.sync = true. virtual Status Put(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) = 0; virtual Status Put(const WriteOptions& options, const Slice& key, const Slice& value) { return Put(options, DefaultColumnFamily(), key, value); } // Remove the database entry (if any) for "key". Returns OK on // success, and a non-OK status on error. It is not an error if "key" // did not exist in the database. // Note: consider setting options.sync = true. virtual Status Delete(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key) = 0; virtual Status Delete(const WriteOptions& options, const Slice& key) { return Delete(options, DefaultColumnFamily(), key); } // Remove the database entry for "key". Requires that the key exists // and was not overwritten. Returns OK on success, and a non-OK status // on error. It is not an error if "key" did not exist in the database. // // If a key is overwritten (by calling Put() multiple times), then the result // of calling SingleDelete() on this key is undefined. SingleDelete() only // behaves correctly if there has been only one Put() for this key since the // previous call to SingleDelete() for this key. // // This feature is currently an experimental performance optimization // for a very specific workload. It is up to the caller to ensure that // SingleDelete is only used for a key that is not deleted using Delete() or // written using Merge(). Mixing SingleDelete operations with Deletes and // Merges can result in undefined behavior. // // Note: consider setting options.sync = true. virtual Status SingleDelete(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key) = 0; virtual Status SingleDelete(const WriteOptions& options, const Slice& key) { return SingleDelete(options, DefaultColumnFamily(), key); } // Removes the database entries in the range ["begin_key", "end_key"), i.e., // including "begin_key" and excluding "end_key". Returns OK on success, and // a non-OK status on error. It is not an error if the database does not // contain any existing data in the range ["begin_key", "end_key"). // // If "end_key" comes before "start_key" according to the user's comparator, // a `Status::InvalidArgument` is returned. // // This feature is now usable in production, with the following caveats: // 1) Accumulating many range tombstones in the memtable will degrade read // performance; this can be avoided by manually flushing occasionally. // 2) Limiting the maximum number of open files in the presence of range // tombstones can degrade read performance. To avoid this problem, set // max_open_files to -1 whenever possible. virtual Status DeleteRange(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key); // Merge the database entry for "key" with "value". Returns OK on success, // and a non-OK status on error. The semantics of this operation is // determined by the user provided merge_operator when opening DB. // Note: consider setting options.sync = true. virtual Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) = 0; virtual Status Merge(const WriteOptions& options, const Slice& key, const Slice& value) { return Merge(options, DefaultColumnFamily(), key, value); } // Apply the specified updates to the database. // If `updates` contains no update, WAL will still be synced if // options.sync=true. // Returns OK on success, non-OK on failure. // Note: consider setting options.sync = true. virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0; // If the database contains an entry for "key" store the // corresponding value in *value and return OK. // // If timestamp is enabled and a non-null timestamp pointer is passed in, // timestamp is returned. // // If there is no entry for "key" leave *value unchanged and return // a status for which Status::IsNotFound() returns true. // // May return some other Status on an error. virtual inline Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value) { assert(value != nullptr); PinnableSlice pinnable_val(value); assert(!pinnable_val.IsPinned()); auto s = Get(options, column_family, key, &pinnable_val); if (s.ok() && pinnable_val.IsPinned()) { value->assign(pinnable_val.data(), pinnable_val.size()); } // else value is already assigned return s; } virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) = 0; virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) { return Get(options, DefaultColumnFamily(), key, value); } // Get() methods that return timestamp. Derived DB classes don't need to worry // about this group of methods if they don't care about timestamp feature. virtual inline Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value, std::string* timestamp) { assert(value != nullptr); PinnableSlice pinnable_val(value); assert(!pinnable_val.IsPinned()); auto s = Get(options, column_family, key, &pinnable_val, timestamp); if (s.ok() && pinnable_val.IsPinned()) { value->assign(pinnable_val.data(), pinnable_val.size()); } // else value is already assigned return s; } virtual Status Get(const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, PinnableSlice* /*value*/, std::string* /*timestamp*/) { return Status::NotSupported( "Get() that returns timestamp is not implemented."); } virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value, std::string* timestamp) { return Get(options, DefaultColumnFamily(), key, value, timestamp); } // Returns all the merge operands corresponding to the key. If the // number of merge operands in DB is greater than // merge_operands_options.expected_max_number_of_operands // no merge operands are returned and status is Incomplete. Merge operands // returned are in the order of insertion. // merge_operands- Points to an array of at-least // merge_operands_options.expected_max_number_of_operands and the // caller is responsible for allocating it. If the status // returned is Incomplete then number_of_operands will contain // the total number of merge operands found in DB for key. virtual Status GetMergeOperands( const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* merge_operands, GetMergeOperandsOptions* get_merge_operands_options, int* number_of_operands) = 0; // Consistent Get of many keys across column families without the need // for an explicit snapshot. NOTE: the implementation of this MultiGet API // does not have the performance benefits of the void-returning MultiGet // functions. // // If keys[i] does not exist in the database, then the i'th returned // status will be one for which Status::IsNotFound() is true, and // (*values)[i] will be set to some arbitrary value (often ""). Otherwise, // the i'th returned status will have Status::ok() true, and (*values)[i] // will store the value associated with keys[i]. // // (*values) will always be resized to be the same size as (keys). // Similarly, the number of returned statuses will be the number of keys. // Note: keys will not be "de-duplicated". Duplicate keys will return // duplicate values in order. virtual std::vector MultiGet( const ReadOptions& options, const std::vector& column_family, const std::vector& keys, std::vector* values) = 0; virtual std::vector MultiGet(const ReadOptions& options, const std::vector& keys, std::vector* values) { return MultiGet( options, std::vector(keys.size(), DefaultColumnFamily()), keys, values); } virtual std::vector MultiGet( const ReadOptions& /*options*/, const std::vector& /*column_family*/, const std::vector& keys, std::vector* /*values*/, std::vector* /*timestamps*/) { return std::vector( keys.size(), Status::NotSupported( "MultiGet() returning timestamps not implemented.")); } virtual std::vector MultiGet(const ReadOptions& options, const std::vector& keys, std::vector* values, std::vector* timestamps) { return MultiGet( options, std::vector(keys.size(), DefaultColumnFamily()), keys, values, timestamps); } // Overloaded MultiGet API that improves performance by batching operations // in the read path for greater efficiency. Currently, only the block based // table format with full filters are supported. Other table formats such // as plain table, block based table with block based filters and // partitioned indexes will still work, but will not get any performance // benefits. // Parameters - // options - ReadOptions // column_family - ColumnFamilyHandle* that the keys belong to. All the keys // passed to the API are restricted to a single column family // num_keys - Number of keys to lookup // keys - Pointer to C style array of key Slices with num_keys elements // values - Pointer to C style array of PinnableSlices with num_keys elements // statuses - Pointer to C style array of Status with num_keys elements // sorted_input - If true, it means the input keys are already sorted by key // order, so the MultiGet() API doesn't have to sort them // again. If false, the keys will be copied and sorted // internally by the API - the input array will not be // modified virtual void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool /*sorted_input*/ = false) { std::vector cf; std::vector user_keys; std::vector status; std::vector vals; for (size_t i = 0; i < num_keys; ++i) { cf.emplace_back(column_family); user_keys.emplace_back(keys[i]); } status = MultiGet(options, cf, user_keys, &vals); std::copy(status.begin(), status.end(), statuses); for (auto& value : vals) { values->PinSelf(value); values++; } } virtual void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool /*sorted_input*/ = false) { std::vector cf; std::vector user_keys; std::vector status; std::vector vals; std::vector tss; for (size_t i = 0; i < num_keys; ++i) { cf.emplace_back(column_family); user_keys.emplace_back(keys[i]); } status = MultiGet(options, cf, user_keys, &vals, &tss); std::copy(status.begin(), status.end(), statuses); std::copy(tss.begin(), tss.end(), timestamps); for (auto& value : vals) { values->PinSelf(value); values++; } } // Overloaded MultiGet API that improves performance by batching operations // in the read path for greater efficiency. Currently, only the block based // table format with full filters are supported. Other table formats such // as plain table, block based table with block based filters and // partitioned indexes will still work, but will not get any performance // benefits. // Parameters - // options - ReadOptions // column_family - ColumnFamilyHandle* that the keys belong to. All the keys // passed to the API are restricted to a single column family // num_keys - Number of keys to lookup // keys - Pointer to C style array of key Slices with num_keys elements // values - Pointer to C style array of PinnableSlices with num_keys elements // statuses - Pointer to C style array of Status with num_keys elements // sorted_input - If true, it means the input keys are already sorted by key // order, so the MultiGet() API doesn't have to sort them // again. If false, the keys will be copied and sorted // internally by the API - the input array will not be // modified virtual void MultiGet(const ReadOptions& options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, Status* statuses, const bool /*sorted_input*/ = false) { std::vector cf; std::vector user_keys; std::vector status; std::vector vals; for (size_t i = 0; i < num_keys; ++i) { cf.emplace_back(column_families[i]); user_keys.emplace_back(keys[i]); } status = MultiGet(options, cf, user_keys, &vals); std::copy(status.begin(), status.end(), statuses); for (auto& value : vals) { values->PinSelf(value); values++; } } virtual void MultiGet(const ReadOptions& options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool /*sorted_input*/ = false) { std::vector cf; std::vector user_keys; std::vector status; std::vector vals; std::vector tss; for (size_t i = 0; i < num_keys; ++i) { cf.emplace_back(column_families[i]); user_keys.emplace_back(keys[i]); } status = MultiGet(options, cf, user_keys, &vals, &tss); std::copy(status.begin(), status.end(), statuses); std::copy(tss.begin(), tss.end(), timestamps); for (auto& value : vals) { values->PinSelf(value); values++; } } // If the key definitely does not exist in the database, then this method // returns false, else true. If the caller wants to obtain value when the key // is found in memory, a bool for 'value_found' must be passed. 'value_found' // will be true on return if value has been set properly. // This check is potentially lighter-weight than invoking DB::Get(). One way // to make this lighter weight is to avoid doing any IOs. // Default implementation here returns true and sets 'value_found' to false virtual bool KeyMayExist(const ReadOptions& /*options*/, ColumnFamilyHandle* /*column_family*/, const Slice& /*key*/, std::string* /*value*/, std::string* /*timestamp*/, bool* value_found = nullptr) { if (value_found != nullptr) { *value_found = false; } return true; } virtual bool KeyMayExist(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value, bool* value_found = nullptr) { return KeyMayExist(options, column_family, key, value, /*timestamp=*/nullptr, value_found); } virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, std::string* value, bool* value_found = nullptr) { return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found); } virtual bool KeyMayExist(const ReadOptions& options, const Slice& key, std::string* value, std::string* timestamp, bool* value_found = nullptr) { return KeyMayExist(options, DefaultColumnFamily(), key, value, timestamp, value_found); } // Return a heap-allocated iterator over the contents of the database. // The result of NewIterator() is initially invalid (caller must // call one of the Seek methods on the iterator before using it). // // Caller should delete the iterator when it is no longer needed. // The returned iterator should be deleted before this db is deleted. virtual Iterator* NewIterator(const ReadOptions& options, ColumnFamilyHandle* column_family) = 0; virtual Iterator* NewIterator(const ReadOptions& options) { return NewIterator(options, DefaultColumnFamily()); } // Returns iterators from a consistent database state across multiple // column families. Iterators are heap allocated and need to be deleted // before the db is deleted virtual Status NewIterators( const ReadOptions& options, const std::vector& column_families, std::vector* iterators) = 0; // Return a handle to the current DB state. Iterators created with // this handle will all observe a stable snapshot of the current DB // state. The caller must call ReleaseSnapshot(result) when the // snapshot is no longer needed. // // nullptr will be returned if the DB fails to take a snapshot or does // not support snapshot. virtual const Snapshot* GetSnapshot() = 0; // Release a previously acquired snapshot. The caller must not // use "snapshot" after this call. virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; #ifndef ROCKSDB_LITE // Contains all valid property arguments for GetProperty(). // // NOTE: Property names cannot end in numbers since those are interpreted as // arguments, e.g., see kNumFilesAtLevelPrefix. struct Properties { // "rocksdb.num-files-at-level" - returns string containing the number // of files at level , where is an ASCII representation of a // level number (e.g., "0"). static const std::string kNumFilesAtLevelPrefix; // "rocksdb.compression-ratio-at-level" - returns string containing the // compression ratio of data at level , where is an ASCII // representation of a level number (e.g., "0"). Here, compression // ratio is defined as uncompressed data size / compressed file size. // Returns "-1.0" if no open files at level . static const std::string kCompressionRatioAtLevelPrefix; // "rocksdb.stats" - returns a multi-line string containing the data // described by kCFStats followed by the data described by kDBStats. static const std::string kStats; // "rocksdb.sstables" - returns a multi-line string summarizing current // SST files. static const std::string kSSTables; // "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and // "rocksdb.cf-file-histogram" together. See below for description // of the two. static const std::string kCFStats; // "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with // general columm family stats per-level over db's lifetime ("L"), // aggregated over db's lifetime ("Sum"), and aggregated over the // interval since the last retrieval ("Int"). // It could also be used to return the stats in the format of the map. // In this case there will a pair of string to array of double for // each level as well as for "Sum". "Int" stats will not be affected // when this form of stats are retrieved. static const std::string kCFStatsNoFileHistogram; // "rocksdb.cf-file-histogram" - print out how many file reads to every // level, as well as the histogram of latency of single requests. static const std::string kCFFileHistogram; // "rocksdb.dbstats" - returns a multi-line string with general database // stats, both cumulative (over the db's lifetime) and interval (since // the last retrieval of kDBStats). static const std::string kDBStats; // "rocksdb.levelstats" - returns multi-line string containing the number // of files per level and total size of each level (MB). static const std::string kLevelStats; // "rocksdb.num-immutable-mem-table" - returns number of immutable // memtables that have not yet been flushed. static const std::string kNumImmutableMemTable; // "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable // memtables that have already been flushed. static const std::string kNumImmutableMemTableFlushed; // "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is // pending; otherwise, returns 0. static const std::string kMemTableFlushPending; // "rocksdb.num-running-flushes" - returns the number of currently running // flushes. static const std::string kNumRunningFlushes; // "rocksdb.compaction-pending" - returns 1 if at least one compaction is // pending; otherwise, returns 0. static const std::string kCompactionPending; // "rocksdb.num-running-compactions" - returns the number of currently // running compactions. static const std::string kNumRunningCompactions; // "rocksdb.background-errors" - returns accumulated number of background // errors. static const std::string kBackgroundErrors; // "rocksdb.cur-size-active-mem-table" - returns approximate size of active // memtable (bytes). static const std::string kCurSizeActiveMemTable; // "rocksdb.cur-size-all-mem-tables" - returns approximate size of active // and unflushed immutable memtables (bytes). static const std::string kCurSizeAllMemTables; // "rocksdb.size-all-mem-tables" - returns approximate size of active, // unflushed immutable, and pinned immutable memtables (bytes). static const std::string kSizeAllMemTables; // "rocksdb.num-entries-active-mem-table" - returns total number of entries // in the active memtable. static const std::string kNumEntriesActiveMemTable; // "rocksdb.num-entries-imm-mem-tables" - returns total number of entries // in the unflushed immutable memtables. static const std::string kNumEntriesImmMemTables; // "rocksdb.num-deletes-active-mem-table" - returns total number of delete // entries in the active memtable. static const std::string kNumDeletesActiveMemTable; // "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete // entries in the unflushed immutable memtables. static const std::string kNumDeletesImmMemTables; // "rocksdb.estimate-num-keys" - returns estimated number of total keys in // the active and unflushed immutable memtables and storage. static const std::string kEstimateNumKeys; // "rocksdb.estimate-table-readers-mem" - returns estimated memory used for // reading SST tables, excluding memory used in block cache (e.g., // filter and index blocks). static const std::string kEstimateTableReadersMem; // "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete // files is enabled; otherwise, returns a non-zero number. static const std::string kIsFileDeletionsEnabled; // "rocksdb.num-snapshots" - returns number of unreleased snapshots of the // database. static const std::string kNumSnapshots; // "rocksdb.oldest-snapshot-time" - returns number representing unix // timestamp of oldest unreleased snapshot. static const std::string kOldestSnapshotTime; // "rocksdb.oldest-snapshot-sequence" - returns number representing // sequence number of oldest unreleased snapshot. static const std::string kOldestSnapshotSequence; // "rocksdb.num-live-versions" - returns number of live versions. `Version` // is an internal data structure. See version_set.h for details. More // live versions often mean more SST files are held from being deleted, // by iterators or unfinished compactions. static const std::string kNumLiveVersions; // "rocksdb.current-super-version-number" - returns number of current LSM // version. It is a uint64_t integer number, incremented after there is // any change to the LSM tree. The number is not preserved after restarting // the DB. After DB restart, it will start from 0 again. static const std::string kCurrentSuperVersionNumber; // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of // live data in bytes. static const std::string kEstimateLiveDataSize; // "rocksdb.min-log-number-to-keep" - return the minimum log number of the // log files that should be kept. static const std::string kMinLogNumberToKeep; // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file // number for an obsolete SST to be kept. The max value of `uint64_t` // will be returned if all obsolete files can be deleted. static const std::string kMinObsoleteSstNumberToKeep; // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST // files. // WARNING: may slow down online queries if there are too many files. static const std::string kTotalSstFilesSize; // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST // files belong to the latest LSM tree. static const std::string kLiveSstFilesSize; // "rocksdb.base-level" - returns number of level to which L0 data will be // compacted. static const std::string kBaseLevel; // "rocksdb.estimate-pending-compaction-bytes" - returns estimated total // number of bytes compaction needs to rewrite to get all levels down // to under target size. Not valid for other compactions than level- // based. static const std::string kEstimatePendingCompactionBytes; // "rocksdb.aggregated-table-properties" - returns a string representation // of the aggregated table properties of the target column family. static const std::string kAggregatedTableProperties; // "rocksdb.aggregated-table-properties-at-level", same as the previous // one but only returns the aggregated table properties of the // specified level "N" at the target column family. static const std::string kAggregatedTablePropertiesAtLevel; // "rocksdb.actual-delayed-write-rate" - returns the current actual delayed // write rate. 0 means no delay. static const std::string kActualDelayedWriteRate; // "rocksdb.is-write-stopped" - Return 1 if write has been stopped. static const std::string kIsWriteStopped; // "rocksdb.estimate-oldest-key-time" - returns an estimation of // oldest key timestamp in the DB. Currently only available for // FIFO compaction with // compaction_options_fifo.allow_compaction = false. static const std::string kEstimateOldestKeyTime; // "rocksdb.block-cache-capacity" - returns block cache capacity. static const std::string kBlockCacheCapacity; // "rocksdb.block-cache-usage" - returns the memory size for the entries // residing in block cache. static const std::string kBlockCacheUsage; // "rocksdb.block-cache-pinned-usage" - returns the memory size for the // entries being pinned. static const std::string kBlockCachePinnedUsage; // "rocksdb.options-statistics" - returns multi-line string // of options.statistics static const std::string kOptionsStatistics; }; #endif /* ROCKSDB_LITE */ // DB implementations can export properties about their state via this method. // If "property" is a valid property understood by this DB implementation (see // Properties struct above for valid options), fills "*value" with its current // value and returns true. Otherwise, returns false. virtual bool GetProperty(ColumnFamilyHandle* column_family, const Slice& property, std::string* value) = 0; virtual bool GetProperty(const Slice& property, std::string* value) { return GetProperty(DefaultColumnFamily(), property, value); } virtual bool GetMapProperty(ColumnFamilyHandle* column_family, const Slice& property, std::map* value) = 0; virtual bool GetMapProperty(const Slice& property, std::map* value) { return GetMapProperty(DefaultColumnFamily(), property, value); } // Similar to GetProperty(), but only works for a subset of properties whose // return value is an integer. Return the value by integer. Supported // properties: // "rocksdb.num-immutable-mem-table" // "rocksdb.mem-table-flush-pending" // "rocksdb.compaction-pending" // "rocksdb.background-errors" // "rocksdb.cur-size-active-mem-table" // "rocksdb.cur-size-all-mem-tables" // "rocksdb.size-all-mem-tables" // "rocksdb.num-entries-active-mem-table" // "rocksdb.num-entries-imm-mem-tables" // "rocksdb.num-deletes-active-mem-table" // "rocksdb.num-deletes-imm-mem-tables" // "rocksdb.estimate-num-keys" // "rocksdb.estimate-table-readers-mem" // "rocksdb.is-file-deletions-enabled" // "rocksdb.num-snapshots" // "rocksdb.oldest-snapshot-time" // "rocksdb.num-live-versions" // "rocksdb.current-super-version-number" // "rocksdb.estimate-live-data-size" // "rocksdb.min-log-number-to-keep" // "rocksdb.min-obsolete-sst-number-to-keep" // "rocksdb.total-sst-files-size" // "rocksdb.live-sst-files-size" // "rocksdb.base-level" // "rocksdb.estimate-pending-compaction-bytes" // "rocksdb.num-running-compactions" // "rocksdb.num-running-flushes" // "rocksdb.actual-delayed-write-rate" // "rocksdb.is-write-stopped" // "rocksdb.estimate-oldest-key-time" // "rocksdb.block-cache-capacity" // "rocksdb.block-cache-usage" // "rocksdb.block-cache-pinned-usage" virtual bool GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) = 0; virtual bool GetIntProperty(const Slice& property, uint64_t* value) { return GetIntProperty(DefaultColumnFamily(), property, value); } // Reset internal stats for DB and all column families. // Note this doesn't reset options.statistics as it is not owned by // DB. virtual Status ResetStats() { return Status::NotSupported("Not implemented"); } // Same as GetIntProperty(), but this one returns the aggregated int // property from all column families. virtual bool GetAggregatedIntProperty(const Slice& property, uint64_t* value) = 0; // Flags for DB::GetSizeApproximation that specify whether memtable // stats should be included, or file stats approximation or both enum SizeApproximationFlags : uint8_t { NONE = 0, INCLUDE_MEMTABLES = 1 << 0, INCLUDE_FILES = 1 << 1 }; // For each i in [0,n-1], store in "sizes[i]", the approximate // file system space used by keys in "[range[i].start .. range[i].limit)" // in a single column family. // // Note that the returned sizes measure file system space usage, so // if the user data compresses by a factor of ten, the returned // sizes will be one-tenth the size of the corresponding user data size. virtual Status GetApproximateSizes(const SizeApproximationOptions& options, ColumnFamilyHandle* column_family, const Range* ranges, int n, uint64_t* sizes) = 0; // Simpler versions of the GetApproximateSizes() method above. // The include_flags argumenbt must of type DB::SizeApproximationFlags // and can not be NONE. virtual void GetApproximateSizes(ColumnFamilyHandle* column_family, const Range* ranges, int n, uint64_t* sizes, uint8_t include_flags = INCLUDE_FILES) { SizeApproximationOptions options; options.include_memtabtles = (include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0; options.include_files = (include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0; GetApproximateSizes(options, column_family, ranges, n, sizes); } virtual void GetApproximateSizes(const Range* ranges, int n, uint64_t* sizes, uint8_t include_flags = INCLUDE_FILES) { GetApproximateSizes(DefaultColumnFamily(), ranges, n, sizes, include_flags); } // The method is similar to GetApproximateSizes, except it // returns approximate number of records in memtables. virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family, const Range& range, uint64_t* const count, uint64_t* const size) = 0; virtual void GetApproximateMemTableStats(const Range& range, uint64_t* const count, uint64_t* const size) { GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size); } // Deprecated versions of GetApproximateSizes ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( const Range* range, int n, uint64_t* sizes, bool include_memtable) { uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; if (include_memtable) { include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; } GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags); } ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes( ColumnFamilyHandle* column_family, const Range* range, int n, uint64_t* sizes, bool include_memtable) { uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES; if (include_memtable) { include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES; } GetApproximateSizes(column_family, range, n, sizes, include_flags); } // Compact the underlying storage for the key range [*begin,*end]. // The actual compaction interval might be superset of [*begin, *end]. // In particular, deleted and overwritten versions are discarded, // and the data is rearranged to reduce the cost of operations // needed to access the data. This operation should typically only // be invoked by users who understand the underlying implementation. // // begin==nullptr is treated as a key before all keys in the database. // end==nullptr is treated as a key after all keys in the database. // Therefore the following call will compact the entire database: // db->CompactRange(options, nullptr, nullptr); // Note that after the entire database is compacted, all data are pushed // down to the last level containing any data. If the total data size after // compaction is reduced, that level might not be appropriate for hosting all // the files. In this case, client could set options.change_level to true, to // move the files back to the minimum level capable of holding the data set // or a given level (specified by non-negative options.target_level). virtual Status CompactRange(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) = 0; virtual Status CompactRange(const CompactRangeOptions& options, const Slice* begin, const Slice* end) { return CompactRange(options, DefaultColumnFamily(), begin, end); } ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end, bool change_level = false, int target_level = -1, uint32_t target_path_id = 0) { CompactRangeOptions options; options.change_level = change_level; options.target_level = target_level; options.target_path_id = target_path_id; return CompactRange(options, column_family, begin, end); } ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange( const Slice* begin, const Slice* end, bool change_level = false, int target_level = -1, uint32_t target_path_id = 0) { CompactRangeOptions options; options.change_level = change_level; options.target_level = target_level; options.target_path_id = target_path_id; return CompactRange(options, DefaultColumnFamily(), begin, end); } virtual Status SetOptions( ColumnFamilyHandle* /*column_family*/, const std::unordered_map& /*new_options*/) { return Status::NotSupported("Not implemented"); } virtual Status SetOptions( const std::unordered_map& new_options) { return SetOptions(DefaultColumnFamily(), new_options); } virtual Status SetDBOptions( const std::unordered_map& new_options) = 0; // CompactFiles() inputs a list of files specified by file numbers and // compacts them to the specified level. Note that the behavior is different // from CompactRange() in that CompactFiles() performs the compaction job // using the CURRENT thread. // // @see GetDataBaseMetaData // @see GetColumnFamilyMetaData virtual Status CompactFiles( const CompactionOptions& compact_options, ColumnFamilyHandle* column_family, const std::vector& input_file_names, const int output_level, const int output_path_id = -1, std::vector* const output_file_names = nullptr, CompactionJobInfo* compaction_job_info = nullptr) = 0; virtual Status CompactFiles( const CompactionOptions& compact_options, const std::vector& input_file_names, const int output_level, const int output_path_id = -1, std::vector* const output_file_names = nullptr, CompactionJobInfo* compaction_job_info = nullptr) { return CompactFiles(compact_options, DefaultColumnFamily(), input_file_names, output_level, output_path_id, output_file_names, compaction_job_info); } // This function will wait until all currently running background processes // finish. After it returns, no background process will be run until // ContinueBackgroundWork is called, once for each preceding OK-returning // call to PauseBackgroundWork. virtual Status PauseBackgroundWork() = 0; virtual Status ContinueBackgroundWork() = 0; // This function will enable automatic compactions for the given column // families if they were previously disabled. The function will first set the // disable_auto_compactions option for each column family to 'false', after // which it will schedule a flush/compaction. // // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API // does NOT schedule a flush/compaction afterwards, and only changes the // parameter itself within the column family option. // virtual Status EnableAutoCompaction( const std::vector& column_family_handles) = 0; virtual void DisableManualCompaction() = 0; virtual void EnableManualCompaction() = 0; // Number of levels used for this DB. virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0; virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); } // Maximum level to which a new compacted memtable is pushed if it // does not create overlap. virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0; virtual int MaxMemCompactionLevel() { return MaxMemCompactionLevel(DefaultColumnFamily()); } // Number of files in level-0 that would stop writes. virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0; virtual int Level0StopWriteTrigger() { return Level0StopWriteTrigger(DefaultColumnFamily()); } // Get DB name -- the exact same name that was provided as an argument to // DB::Open() virtual const std::string& GetName() const = 0; // Get Env object from the DB virtual Env* GetEnv() const = 0; virtual FileSystem* GetFileSystem() const; // Get DB Options that we use. During the process of opening the // column family, the options provided when calling DB::Open() or // DB::CreateColumnFamily() will have been "sanitized" and transformed // in an implementation-defined manner. virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0; virtual Options GetOptions() const { return GetOptions(DefaultColumnFamily()); } virtual DBOptions GetDBOptions() const = 0; // Flush all mem-table data. // Flush a single column family, even when atomic flush is enabled. To flush // multiple column families, use Flush(options, column_families). virtual Status Flush(const FlushOptions& options, ColumnFamilyHandle* column_family) = 0; virtual Status Flush(const FlushOptions& options) { return Flush(options, DefaultColumnFamily()); } // Flushes multiple column families. // If atomic flush is not enabled, Flush(options, column_families) is // equivalent to calling Flush(options, column_family) multiple times. // If atomic flush is enabled, Flush(options, column_families) will flush all // column families specified in 'column_families' up to the latest sequence // number at the time when flush is requested. // Note that RocksDB 5.15 and earlier may not be able to open later versions // with atomic flush enabled. virtual Status Flush( const FlushOptions& options, const std::vector& column_families) = 0; // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL // afterwards. virtual Status FlushWAL(bool /*sync*/) { return Status::NotSupported("FlushWAL not implemented"); } // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the // same as Write() with sync=true: in the latter case the changes won't be // visible until the sync is done. // Currently only works if allow_mmap_writes = false in Options. virtual Status SyncWAL() = 0; // Lock the WAL. Also flushes the WAL after locking. virtual Status LockWAL() { return Status::NotSupported("LockWAL not implemented"); } // Unlock the WAL. virtual Status UnlockWAL() { return Status::NotSupported("UnlockWAL not implemented"); } // The sequence number of the most recent transaction. virtual SequenceNumber GetLatestSequenceNumber() const = 0; // Instructs DB to preserve deletes with sequence numbers >= passed seqnum. // Has no effect if DBOptions.preserve_deletes is set to false. // This function assumes that user calls this function with monotonically // increasing seqnums (otherwise we can't guarantee that a particular delete // hasn't been already processed); returns true if the value was successfully // updated, false if user attempted to call if with seqnum <= current value. virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0; // Prevent file deletions. Compactions will continue to occur, // but no obsolete files will be deleted. Calling this multiple // times have the same effect as calling it once. virtual Status DisableFileDeletions() = 0; // Allow compactions to delete obsolete files. // If force == true, the call to EnableFileDeletions() will guarantee that // file deletions are enabled after the call, even if DisableFileDeletions() // was called multiple times before. // If force == false, EnableFileDeletions will only enable file deletion // after it's been called at least as many times as DisableFileDeletions(), // enabling the two methods to be called by two threads concurrently without // synchronization -- i.e., file deletions will be enabled only after both // threads call EnableFileDeletions() virtual Status EnableFileDeletions(bool force = true) = 0; #ifndef ROCKSDB_LITE // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup // Retrieve the list of all files in the database. The files are // relative to the dbname and are not absolute paths. Despite being relative // paths, the file names begin with "/". The valid size of the manifest file // is returned in manifest_file_size. The manifest file is an ever growing // file, but only the portion specified by manifest_file_size is valid for // this snapshot. Setting flush_memtable to true does Flush before recording // the live files. Setting flush_memtable to false is useful when we don't // want to wait for flush which may have to wait for compaction to complete // taking an indeterminate time. // // In case you have multiple column families, even if flush_memtable is true, // you still need to call GetSortedWalFiles after GetLiveFiles to compensate // for new data that arrived to already-flushed column families while other // column families were flushing virtual Status GetLiveFiles(std::vector&, uint64_t* manifest_file_size, bool flush_memtable = true) = 0; // Retrieve the sorted list of all wal files with earliest file first virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0; // Retrieve information about the current wal file // // Note that the log might have rolled after this call in which case // the current_log_file would not point to the current log file. // // Additionally, for the sake of optimization current_log_file->StartSequence // would always be set to 0 virtual Status GetCurrentWalFile( std::unique_ptr* current_log_file) = 0; // Retrieves the creation time of the oldest file in the DB. // This API only works if max_open_files = -1, if it is not then // Status returned is Status::NotSupported() // The file creation time is set using the env provided to the DB. // If the DB was created from a very old release then its possible that // the SST files might not have file_creation_time property and even after // moving to a newer release its possible that some files never got compacted // and may not have file_creation_time property. In both the cases // file_creation_time is considered 0 which means this API will return // creation_time = 0 as there wouldn't be a timestamp lower than 0. virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0; // Note: this API is not yet consistent with WritePrepared transactions. // Sets iter to an iterator that is positioned at a write-batch containing // seq_number. If the sequence number is non existent, it returns an iterator // at the first available seq_no after the requested seq_no // Returns Status::OK if iterator is valid // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to // use this api, else the WAL files will get // cleared aggressively and the iterator might keep getting invalid before // an update is read. virtual Status GetUpdatesSince( SequenceNumber seq_number, std::unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options = TransactionLogIterator::ReadOptions()) = 0; // Windows API macro interference #undef DeleteFile // Delete the file name from the db directory and update the internal state to // reflect that. Supports deletion of sst and log files only. 'name' must be // path relative to the db directory. eg. 000001.sst, /archive/000003.log virtual Status DeleteFile(std::string name) = 0; // Returns a list of all table files with their level, start key // and end key virtual void GetLiveFilesMetaData( std::vector* /*metadata*/) {} // Obtains the meta data of the specified column family of the DB. virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, ColumnFamilyMetaData* /*metadata*/) {} // Get the metadata of the default column family. void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) { GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); } // IngestExternalFile() will load a list of external SST files (1) into the DB // Two primary modes are supported: // - Duplicate keys in the new files will overwrite exiting keys (default) // - Duplicate keys will be skipped (set ingest_behind=true) // In the first mode we will try to find the lowest possible level that // the file can fit in, and ingest the file into this level (2). A file that // have a key range that overlap with the memtable key range will require us // to Flush the memtable first before ingesting the file. // In the second mode we will always ingest in the bottom most level (see // docs to IngestExternalFileOptions::ingest_behind). // // (1) External SST files can be created using SstFileWriter // (2) We will try to ingest the files to the lowest possible level // even if the file compression doesn't match the level compression // (3) If IngestExternalFileOptions->ingest_behind is set to true, // we always ingest at the bottommost level, which should be reserved // for this purpose (see DBOPtions::allow_ingest_behind flag). virtual Status IngestExternalFile( ColumnFamilyHandle* column_family, const std::vector& external_files, const IngestExternalFileOptions& options) = 0; virtual Status IngestExternalFile( const std::vector& external_files, const IngestExternalFileOptions& options) { return IngestExternalFile(DefaultColumnFamily(), external_files, options); } // IngestExternalFiles() will ingest files for multiple column families, and // record the result atomically to the MANIFEST. // If this function returns OK, all column families' ingestion must succeed. // If this function returns NOK, or the process crashes, then non-of the // files will be ingested into the database after recovery. // Note that it is possible for application to observe a mixed state during // the execution of this function. If the user performs range scan over the // column families with iterators, iterator on one column family may return // ingested data, while iterator on other column family returns old data. // Users can use snapshot for a consistent view of data. // If your db ingests multiple SST files using this API, i.e. args.size() // > 1, then RocksDB 5.15 and earlier will not be able to open it. // // REQUIRES: each arg corresponds to a different column family: namely, for // 0 <= i < j < len(args), args[i].column_family != args[j].column_family. virtual Status IngestExternalFiles( const std::vector& args) = 0; // CreateColumnFamilyWithImport() will create a new column family with // column_family_name and import external SST files specified in metadata into // this column family. // (1) External SST files can be created using SstFileWriter. // (2) External SST files can be exported from a particular column family in // an existing DB. // Option in import_options specifies whether the external files are copied or // moved (default is copy). When option specifies copy, managing files at // external_file_path is caller's responsibility. When option specifies a // move, the call ensures that the specified files at external_file_path are // deleted on successful return and files are not modified on any error // return. // On error return, column family handle returned will be nullptr. // ColumnFamily will be present on successful return and will not be present // on error return. ColumnFamily may be present on any crash during this call. virtual Status CreateColumnFamilyWithImport( const ColumnFamilyOptions& options, const std::string& column_family_name, const ImportColumnFamilyOptions& import_options, const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) = 0; virtual Status VerifyChecksum(const ReadOptions& read_options) = 0; virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } // AddFile() is deprecated, please use IngestExternalFile() ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( ColumnFamilyHandle* column_family, const std::vector& file_path_list, bool move_file = false, bool skip_snapshot_check = false) { IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(column_family, file_path_list, ifo); } ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( const std::vector& file_path_list, bool move_file = false, bool skip_snapshot_check = false) { IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo); } // AddFile() is deprecated, please use IngestExternalFile() ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( ColumnFamilyHandle* column_family, const std::string& file_path, bool move_file = false, bool skip_snapshot_check = false) { IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(column_family, {file_path}, ifo); } ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( const std::string& file_path, bool move_file = false, bool skip_snapshot_check = false) { IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo); } // Load table file with information "file_info" into "column_family" ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( ColumnFamilyHandle* column_family, const std::vector& file_info_list, bool move_file = false, bool skip_snapshot_check = false) { std::vector external_files; for (const ExternalSstFileInfo& file_info : file_info_list) { external_files.push_back(file_info.file_path); } IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(column_family, external_files, ifo); } ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( const std::vector& file_info_list, bool move_file = false, bool skip_snapshot_check = false) { std::vector external_files; for (const ExternalSstFileInfo& file_info : file_info_list) { external_files.push_back(file_info.file_path); } IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(DefaultColumnFamily(), external_files, ifo); } ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info, bool move_file = false, bool skip_snapshot_check = false) { IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(column_family, {file_info->file_path}, ifo); } ROCKSDB_DEPRECATED_FUNC virtual Status AddFile( const ExternalSstFileInfo* file_info, bool move_file = false, bool skip_snapshot_check = false) { IngestExternalFileOptions ifo; ifo.move_files = move_file; ifo.snapshot_consistency = !skip_snapshot_check; ifo.allow_global_seqno = false; ifo.allow_blocking_flush = false; return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path}, ifo); } #endif // ROCKSDB_LITE // Returns the unique ID which is read from IDENTITY file during the opening // of database by setting in the identity variable // Returns Status::OK if identity could be set properly virtual Status GetDbIdentity(std::string& identity) const = 0; // Returns default column family handle virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0; #ifndef ROCKSDB_LITE virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, TablePropertiesCollection* props) = 0; virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { return GetPropertiesOfAllTables(DefaultColumnFamily(), props); } virtual Status GetPropertiesOfTablesInRange( ColumnFamilyHandle* column_family, const Range* range, std::size_t n, TablePropertiesCollection* props) = 0; virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/, const Slice* /*begin*/, const Slice* /*end*/) { return Status::NotSupported("SuggestCompactRange() is not implemented."); } virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/, int /*target_level*/) { return Status::NotSupported("PromoteL0() is not implemented."); } // Trace DB operations. Use EndTrace() to stop tracing. virtual Status StartTrace(const TraceOptions& /*options*/, std::unique_ptr&& /*trace_writer*/) { return Status::NotSupported("StartTrace() is not implemented."); } virtual Status EndTrace() { return Status::NotSupported("EndTrace() is not implemented."); } // Trace block cache accesses. Use EndBlockCacheTrace() to stop tracing. virtual Status StartBlockCacheTrace( const TraceOptions& /*options*/, std::unique_ptr&& /*trace_writer*/) { return Status::NotSupported("StartBlockCacheTrace() is not implemented."); } virtual Status EndBlockCacheTrace() { return Status::NotSupported("EndBlockCacheTrace() is not implemented."); } #endif // ROCKSDB_LITE // Needed for StackableDB virtual DB* GetRootDB() { return this; } // Given a window [start_time, end_time), setup a StatsHistoryIterator // to access stats history. Note the start_time and end_time are epoch // time measured in seconds, and end_time is an exclusive bound. virtual Status GetStatsHistory( uint64_t /*start_time*/, uint64_t /*end_time*/, std::unique_ptr* /*stats_iterator*/) { return Status::NotSupported("GetStatsHistory() is not implemented."); } #ifndef ROCKSDB_LITE // Make the secondary instance catch up with the primary by tailing and // replaying the MANIFEST and WAL of the primary. // Column families created by the primary after the secondary instance starts // will be ignored unless the secondary instance closes and restarts with the // newly created column families. // Column families that exist before secondary instance starts and dropped by // the primary afterwards will be marked as dropped. However, as long as the // secondary instance does not delete the corresponding column family // handles, the data of the column family is still accessible to the // secondary. // TODO: we will support WAL tailing soon. virtual Status TryCatchUpWithPrimary() { return Status::NotSupported("Supported only by secondary instance"); } #endif // !ROCKSDB_LITE }; // Destroy the contents of the specified database. // Be very careful using this method. Status DestroyDB(const std::string& name, const Options& options, const std::vector& column_families = std::vector()); #ifndef ROCKSDB_LITE // If a DB cannot be opened, you may attempt to call this method to // resurrect as much of the contents of the database as possible. // Some data may be lost, so be careful when calling this function // on a database that contains important information. // // With this API, we will warn and skip data associated with column families not // specified in column_families. // // @param column_families Descriptors for known column families Status RepairDB(const std::string& dbname, const DBOptions& db_options, const std::vector& column_families); // @param unknown_cf_opts Options for column families encountered during the // repair that were not specified in column_families. Status RepairDB(const std::string& dbname, const DBOptions& db_options, const std::vector& column_families, const ColumnFamilyOptions& unknown_cf_opts); // @param options These options will be used for the database and for ALL column // families encountered during the repair Status RepairDB(const std::string& dbname, const Options& options); #endif } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/db_bench_tool.h000066400000000000000000000006451370372246700212560ustar00rootroot00000000000000// Copyright (c) 2013-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { int db_bench_tool(int argc, char** argv); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/db_dump_tool.h000066400000000000000000000023441370372246700211420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include "rocksdb/db.h" namespace ROCKSDB_NAMESPACE { struct DumpOptions { // Database that will be dumped std::string db_path; // File location that will contain dump output std::string dump_location; // Don't include db information header in the dump bool anonymous = false; }; class DbDumpTool { public: bool Run(const DumpOptions& dump_options, ROCKSDB_NAMESPACE::Options options = ROCKSDB_NAMESPACE::Options()); }; struct UndumpOptions { // Database that we will load the dumped file into std::string db_path; // File location of the dumped file that will be loaded std::string dump_location; // Compact the db after loading the dumped file bool compact_db = false; }; class DbUndumpTool { public: bool Run(const UndumpOptions& undump_options, ROCKSDB_NAMESPACE::Options options = ROCKSDB_NAMESPACE::Options()); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/db_stress_tool.h000066400000000000000000000006461370372246700215230ustar00rootroot00000000000000// Copyright (c) 2013-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { int db_stress_tool(int argc, char** argv); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/env.h000066400000000000000000001745321370372246700172740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // An Env is an interface used by the rocksdb implementation to access // operating system functionality like the filesystem etc. Callers // may wish to provide a custom Env object when opening a database to // get fine gain control; e.g., to rate limit file system operations. // // All Env implementations are safe for concurrent access from // multiple threads without any external synchronization. #pragma once #include #include #include #include #include #include #include #include "rocksdb/status.h" #include "rocksdb/thread_status.h" #ifdef _WIN32 // Windows API macro interference #undef DeleteFile #undef GetCurrentTime #endif #if defined(__GNUC__) || defined(__clang__) #define ROCKSDB_PRINTF_FORMAT_ATTR(format_param, dots_param) \ __attribute__((__format__(__printf__, format_param, dots_param))) #else #define ROCKSDB_PRINTF_FORMAT_ATTR(format_param, dots_param) #endif namespace ROCKSDB_NAMESPACE { class DynamicLibrary; class FileLock; class Logger; class RandomAccessFile; class SequentialFile; class Slice; class WritableFile; class RandomRWFile; class MemoryMappedFileBuffer; class Directory; struct DBOptions; struct ImmutableDBOptions; struct MutableDBOptions; class RateLimiter; class ThreadStatusUpdater; struct ThreadStatus; class FileSystem; const size_t kDefaultPageSize = 4 * 1024; // Options while opening a file to read/write struct EnvOptions { // Construct with default Options EnvOptions(); // Construct from Options explicit EnvOptions(const DBOptions& options); // If true, then use mmap to read data bool use_mmap_reads = false; // If true, then use mmap to write data bool use_mmap_writes = true; // If true, then use O_DIRECT for reading data bool use_direct_reads = false; // If true, then use O_DIRECT for writing data bool use_direct_writes = false; // If false, fallocate() calls are bypassed bool allow_fallocate = true; // If true, set the FD_CLOEXEC on open fd. bool set_fd_cloexec = true; // Allows OS to incrementally sync files to disk while they are being // written, in the background. Issue one request for every bytes_per_sync // written. 0 turns it off. // Default: 0 uint64_t bytes_per_sync = 0; // When true, guarantees the file has at most `bytes_per_sync` bytes submitted // for writeback at any given time. // // - If `sync_file_range` is supported it achieves this by waiting for any // prior `sync_file_range`s to finish before proceeding. In this way, // processing (compression, etc.) can proceed uninhibited in the gap // between `sync_file_range`s, and we block only when I/O falls behind. // - Otherwise the `WritableFile::Sync` method is used. Note this mechanism // always blocks, thus preventing the interleaving of I/O and processing. // // Note: Enabling this option does not provide any additional persistence // guarantees, as it may use `sync_file_range`, which does not write out // metadata. // // Default: false bool strict_bytes_per_sync = false; // If true, we will preallocate the file with FALLOC_FL_KEEP_SIZE flag, which // means that file size won't change as part of preallocation. // If false, preallocation will also change the file size. This option will // improve the performance in workloads where you sync the data on every // write. By default, we set it to true for MANIFEST writes and false for // WAL writes bool fallocate_with_keep_size = true; // See DBOptions doc size_t compaction_readahead_size = 0; // See DBOptions doc size_t random_access_max_buffer_size = 0; // See DBOptions doc size_t writable_file_max_buffer_size = 1024 * 1024; // If not nullptr, write rate limiting is enabled for flush and compaction RateLimiter* rate_limiter = nullptr; }; class Env { public: struct FileAttributes { // File name std::string name; // Size of file in bytes uint64_t size_bytes; }; Env(); // Construct an Env with a separate FileSystem implementation Env(std::shared_ptr fs); // No copying allowed Env(const Env&) = delete; void operator=(const Env&) = delete; virtual ~Env(); static const char* Type() { return "Environment"; } // Loads the environment specified by the input value into the result static Status LoadEnv(const std::string& value, Env** result); // Loads the environment specified by the input value into the result static Status LoadEnv(const std::string& value, Env** result, std::shared_ptr* guard); // Return a default environment suitable for the current operating // system. Sophisticated users may wish to provide their own Env // implementation instead of relying on this default environment. // // The result of Default() belongs to rocksdb and must never be deleted. static Env* Default(); // See FileSystem::RegisterDbPaths. virtual Status RegisterDbPaths(const std::vector& /*paths*/) { return Status::OK(); } // See FileSystem::UnregisterDbPaths. virtual Status UnregisterDbPaths(const std::vector& /*paths*/) { return Status::OK(); } // Create a brand new sequentially-readable file with the specified name. // On success, stores a pointer to the new file in *result and returns OK. // On failure stores nullptr in *result and returns non-OK. If the file does // not exist, returns a non-OK status. // // The returned file will only be accessed by one thread at a time. virtual Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) = 0; // Create a brand new random access read-only file with the // specified name. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. If the file does not exist, returns a non-OK // status. // // The returned file may be concurrently accessed by multiple threads. virtual Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) = 0; // These values match Linux definition // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56 enum WriteLifeTimeHint { WLTH_NOT_SET = 0, // No hint information set WLTH_NONE, // No hints about write life time WLTH_SHORT, // Data written has a short life time WLTH_MEDIUM, // Data written has a medium life time WLTH_LONG, // Data written has a long life time WLTH_EXTREME, // Data written has an extremely long life time }; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a // new file. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) = 0; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a // new file. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual Status ReopenWritableFile(const std::string& /*fname*/, std::unique_ptr* /*result*/, const EnvOptions& /*options*/) { return Status::NotSupported("Env::ReopenWritableFile() not supported."); } // Reuse an existing file by renaming it and opening it as writable. virtual Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* result, const EnvOptions& options); // Open `fname` for random read and write, if file doesn't exist the file // will be created. On success, stores a pointer to the new file in // *result and returns OK. On failure returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual Status NewRandomRWFile(const std::string& /*fname*/, std::unique_ptr* /*result*/, const EnvOptions& /*options*/) { return Status::NotSupported("RandomRWFile is not implemented in this Env"); } // Opens `fname` as a memory-mapped file for read and write (in-place updates // only, i.e., no appends). On success, stores a raw buffer covering the whole // file in `*result`. The file must exist prior to this call. virtual Status NewMemoryMappedFileBuffer( const std::string& /*fname*/, std::unique_ptr* /*result*/) { return Status::NotSupported( "MemoryMappedFileBuffer is not implemented in this Env"); } // Create an object that represents a directory. Will fail if directory // doesn't exist. If the directory exists, it will open the directory // and create a new Directory object. // // On success, stores a pointer to the new Directory in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. virtual Status NewDirectory(const std::string& name, std::unique_ptr* result) = 0; // Returns OK if the named file exists. // NotFound if the named file does not exist, // the calling process does not have permission to determine // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered virtual Status FileExists(const std::string& fname) = 0; // Store in *result the names of the children of the specified directory. // The names are relative to "dir". // Original contents of *results are dropped. // Returns OK if "dir" exists and "*result" contains its children. // NotFound if "dir" does not exist, the calling process does not have // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered virtual Status GetChildren(const std::string& dir, std::vector* result) = 0; // Store in *result the attributes of the children of the specified directory. // In case the implementation lists the directory prior to iterating the files // and files are concurrently deleted, the deleted files will be omitted from // result. // The name attributes are relative to "dir". // Original contents of *results are dropped. // Returns OK if "dir" exists and "*result" contains its children. // NotFound if "dir" does not exist, the calling process does not have // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered virtual Status GetChildrenFileAttributes(const std::string& dir, std::vector* result); // Delete the named file. virtual Status DeleteFile(const std::string& fname) = 0; // Truncate the named file to the specified size. virtual Status Truncate(const std::string& /*fname*/, size_t /*size*/) { return Status::NotSupported("Truncate is not supported for this Env"); } // Create the specified directory. Returns error if directory exists. virtual Status CreateDir(const std::string& dirname) = 0; // Creates directory if missing. Return Ok if it exists, or successful in // Creating. virtual Status CreateDirIfMissing(const std::string& dirname) = 0; // Delete the specified directory. // Many implementations of this function will only delete a directory if it is // empty. virtual Status DeleteDir(const std::string& dirname) = 0; // Store the size of fname in *file_size. virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0; // Store the last modification time of fname in *file_mtime. virtual Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) = 0; // Rename file src to target. virtual Status RenameFile(const std::string& src, const std::string& target) = 0; // Hard Link file src to target. virtual Status LinkFile(const std::string& /*src*/, const std::string& /*target*/) { return Status::NotSupported("LinkFile is not supported for this Env"); } virtual Status NumFileLinks(const std::string& /*fname*/, uint64_t* /*count*/) { return Status::NotSupported( "Getting number of file links is not supported for this Env"); } virtual Status AreFilesSame(const std::string& /*first*/, const std::string& /*second*/, bool* /*res*/) { return Status::NotSupported("AreFilesSame is not supported for this Env"); } // Lock the specified file. Used to prevent concurrent access to // the same db by multiple processes. On failure, stores nullptr in // *lock and returns non-OK. // // On success, stores a pointer to the object that represents the // acquired lock in *lock and returns OK. The caller should call // UnlockFile(*lock) to release the lock. If the process exits, // the lock will be automatically released. // // If somebody else already holds the lock, finishes immediately // with a failure. I.e., this call does not wait for existing locks // to go away. // // May create the named file if it does not already exist. virtual Status LockFile(const std::string& fname, FileLock** lock) = 0; // Release the lock acquired by a previous successful call to LockFile. // REQUIRES: lock was returned by a successful LockFile() call // REQUIRES: lock has not already been unlocked. virtual Status UnlockFile(FileLock* lock) = 0; // Opens `lib_name` as a dynamic library. // If the 'search_path' is specified, breaks the path into its components // based on the appropriate platform separator (";" or ";") and looks for the // library in those directories. If 'search path is not specified, uses the // default library path search mechanism (such as LD_LIBRARY_PATH). On // success, stores a dynamic library in `*result`. virtual Status LoadLibrary(const std::string& /*lib_name*/, const std::string& /*search_path */, std::shared_ptr* /*result*/) { return Status::NotSupported("LoadLibrary is not implemented in this Env"); } // Priority for scheduling job in thread pool enum Priority { BOTTOM, LOW, HIGH, USER, TOTAL }; static std::string PriorityToString(Priority priority); // Priority for requesting bytes in rate limiter scheduler enum IOPriority { IO_LOW = 0, IO_HIGH = 1, IO_TOTAL = 2 }; // Arrange to run "(*function)(arg)" once in a background thread, in // the thread pool specified by pri. By default, jobs go to the 'LOW' // priority thread pool. // "function" may run in an unspecified thread. Multiple functions // added to the same Env may run concurrently in different threads. // I.e., the caller may not assume that background work items are // serialized. // When the UnSchedule function is called, the unschedFunction // registered at the time of Schedule is invoked with arg as a parameter. virtual void Schedule(void (*function)(void* arg), void* arg, Priority pri = LOW, void* tag = nullptr, void (*unschedFunction)(void* arg) = nullptr) = 0; // Arrange to remove jobs for given arg from the queue_ if they are not // already scheduled. Caller is expected to have exclusive lock on arg. virtual int UnSchedule(void* /*arg*/, Priority /*pri*/) { return 0; } // Start a new thread, invoking "function(arg)" within the new thread. // When "function(arg)" returns, the thread will be destroyed. virtual void StartThread(void (*function)(void* arg), void* arg) = 0; // Wait for all threads started by StartThread to terminate. virtual void WaitForJoin() {} // Get thread pool queue length for specific thread pool. virtual unsigned int GetThreadPoolQueueLen(Priority /*pri*/ = LOW) const { return 0; } // *path is set to a temporary directory that can be used for testing. It may // or many not have just been created. The directory may or may not differ // between runs of the same process, but subsequent calls will return the // same directory. virtual Status GetTestDirectory(std::string* path) = 0; // Create and returns a default logger (an instance of EnvLogger) for storing // informational messages. Derived classes can overide to provide custom // logger. virtual Status NewLogger(const std::string& fname, std::shared_ptr* result); // Returns the number of micro-seconds since some fixed point in time. // It is often used as system time such as in GenericRateLimiter // and other places so a port needs to return system time in order to work. virtual uint64_t NowMicros() = 0; // Returns the number of nano-seconds since some fixed point in time. Only // useful for computing deltas of time in one run. // Default implementation simply relies on NowMicros. // In platform-specific implementations, NowNanos() should return time points // that are MONOTONIC. virtual uint64_t NowNanos() { return NowMicros() * 1000; } // 0 indicates not supported. virtual uint64_t NowCPUNanos() { return 0; } // Sleep/delay the thread for the prescribed number of micro-seconds. virtual void SleepForMicroseconds(int micros) = 0; // Get the current host name. virtual Status GetHostName(char* name, uint64_t len) = 0; // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC). // Only overwrites *unix_time on success. virtual Status GetCurrentTime(int64_t* unix_time) = 0; // Get full directory name for this db. virtual Status GetAbsolutePath(const std::string& db_path, std::string* output_path) = 0; // The number of background worker threads of a specific thread pool // for this environment. 'LOW' is the default pool. // default number: 1 virtual void SetBackgroundThreads(int number, Priority pri = LOW) = 0; virtual int GetBackgroundThreads(Priority pri = LOW) = 0; virtual Status SetAllowNonOwnerAccess(bool /*allow_non_owner_access*/) { return Status::NotSupported("Env::SetAllowNonOwnerAccess() not supported."); } // Enlarge number of background worker threads of a specific thread pool // for this environment if it is smaller than specified. 'LOW' is the default // pool. virtual void IncBackgroundThreadsIfNeeded(int number, Priority pri) = 0; // Lower IO priority for threads from the specified pool. virtual void LowerThreadPoolIOPriority(Priority /*pool*/ = LOW) {} // Lower CPU priority for threads from the specified pool. virtual void LowerThreadPoolCPUPriority(Priority /*pool*/ = LOW) {} // Converts seconds-since-Jan-01-1970 to a printable string virtual std::string TimeToString(uint64_t time) = 0; // Generates a unique id that can be used to identify a db virtual std::string GenerateUniqueId(); // OptimizeForLogWrite will create a new EnvOptions object that is a copy of // the EnvOptions in the parameters, but is optimized for reading log files. virtual EnvOptions OptimizeForLogRead(const EnvOptions& env_options) const; // OptimizeForManifestRead will create a new EnvOptions object that is a copy // of the EnvOptions in the parameters, but is optimized for reading manifest // files. virtual EnvOptions OptimizeForManifestRead( const EnvOptions& env_options) const; // OptimizeForLogWrite will create a new EnvOptions object that is a copy of // the EnvOptions in the parameters, but is optimized for writing log files. // Default implementation returns the copy of the same object. virtual EnvOptions OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const; // OptimizeForManifestWrite will create a new EnvOptions object that is a copy // of the EnvOptions in the parameters, but is optimized for writing manifest // files. Default implementation returns the copy of the same object. virtual EnvOptions OptimizeForManifestWrite( const EnvOptions& env_options) const; // OptimizeForCompactionTableWrite will create a new EnvOptions object that is // a copy of the EnvOptions in the parameters, but is optimized for writing // table files. virtual EnvOptions OptimizeForCompactionTableWrite( const EnvOptions& env_options, const ImmutableDBOptions& immutable_ops) const; // OptimizeForCompactionTableWrite will create a new EnvOptions object that // is a copy of the EnvOptions in the parameters, but is optimized for reading // table files. virtual EnvOptions OptimizeForCompactionTableRead( const EnvOptions& env_options, const ImmutableDBOptions& db_options) const; // Returns the status of all threads that belong to the current Env. virtual Status GetThreadList(std::vector* /*thread_list*/) { return Status::NotSupported("Env::GetThreadList() not supported."); } // Returns the pointer to ThreadStatusUpdater. This function will be // used in RocksDB internally to update thread status and supports // GetThreadList(). virtual ThreadStatusUpdater* GetThreadStatusUpdater() const { return thread_status_updater_; } // Returns the ID of the current thread. virtual uint64_t GetThreadID() const; // This seems to clash with a macro on Windows, so #undef it here #undef GetFreeSpace // Get the amount of free disk space virtual Status GetFreeSpace(const std::string& /*path*/, uint64_t* /*diskfree*/) { return Status::NotSupported("Env::GetFreeSpace() not supported."); } // Check whether the specified path is a directory virtual Status IsDirectory(const std::string& /*path*/, bool* /*is_dir*/) { return Status::NotSupported("Env::IsDirectory() not supported."); } virtual void SanitizeEnvOptions(EnvOptions* /*env_opts*/) const {} // Get the FileSystem implementation this Env was constructed with. It // could be a fully implemented one, or a wrapper class around the Env const std::shared_ptr& GetFileSystem() const; // If you're adding methods here, remember to add them to EnvWrapper too. protected: // The pointer to an internal structure that will update the // status of each thread. ThreadStatusUpdater* thread_status_updater_; // Pointer to the underlying FileSystem implementation std::shared_ptr file_system_; }; // The factory function to construct a ThreadStatusUpdater. Any Env // that supports GetThreadList() feature should call this function in its // constructor to initialize thread_status_updater_. ThreadStatusUpdater* CreateThreadStatusUpdater(); // A file abstraction for reading sequentially through a file class SequentialFile { public: SequentialFile() {} virtual ~SequentialFile(); // Read up to "n" bytes from the file. "scratch[0..n-1]" may be // written by this routine. Sets "*result" to the data that was // read (including if fewer than "n" bytes were successfully read). // May set "*result" to point at data in "scratch[0..n-1]", so // "scratch[0..n-1]" must be live when "*result" is used. // If an error was encountered, returns a non-OK status. // // REQUIRES: External synchronization virtual Status Read(size_t n, Slice* result, char* scratch) = 0; // Skip "n" bytes from the file. This is guaranteed to be no // slower that reading the same data, but may be faster. // // If end of file is reached, skipping will stop at the end of the // file, and Skip will return OK. // // REQUIRES: External synchronization virtual Status Skip(uint64_t n) = 0; // Indicates the upper layers if the current SequentialFile implementation // uses direct IO. virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. virtual Status InvalidateCache(size_t /*offset*/, size_t /*length*/) { return Status::NotSupported( "SequentialFile::InvalidateCache not supported."); } // Positioned Read for direct I/O // If Direct I/O enabled, offset, n, and scratch should be properly aligned virtual Status PositionedRead(uint64_t /*offset*/, size_t /*n*/, Slice* /*result*/, char* /*scratch*/) { return Status::NotSupported( "SequentialFile::PositionedRead() not supported."); } // If you're adding methods here, remember to add them to // SequentialFileWrapper too. }; // A read IO request structure for use in MultiRead struct ReadRequest { // File offset in bytes uint64_t offset; // Length to read in bytes size_t len; // A buffer that MultiRead() can optionally place data in. It can // ignore this and allocate its own buffer char* scratch; // Output parameter set by MultiRead() to point to the data buffer, and // the number of valid bytes Slice result; // Status of read Status status; }; // A file abstraction for randomly reading the contents of a file. class RandomAccessFile { public: RandomAccessFile() {} virtual ~RandomAccessFile(); // Read up to "n" bytes from the file starting at "offset". // "scratch[0..n-1]" may be written by this routine. Sets "*result" // to the data that was read (including if fewer than "n" bytes were // successfully read). May set "*result" to point at data in // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when // "*result" is used. If an error was encountered, returns a non-OK // status. // // Safe for concurrent use by multiple threads. // If Direct I/O enabled, offset, n, and scratch should be aligned properly. virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const = 0; // Readahead the file starting from offset by n bytes for caching. virtual Status Prefetch(uint64_t /*offset*/, size_t /*n*/) { return Status::OK(); } // Read a bunch of blocks as described by reqs. The blocks can // optionally be read in parallel. This is a synchronous call, i.e it // should return after all reads have completed. The reads will be // non-overlapping. If the function return Status is not ok, status of // individual requests will be ignored and return status will be assumed // for all read requests. The function return status is only meant for any // any errors that occur before even processing specific read requests virtual Status MultiRead(ReadRequest* reqs, size_t num_reqs) { assert(reqs != nullptr); for (size_t i = 0; i < num_reqs; ++i) { ReadRequest& req = reqs[i]; req.status = Read(req.offset, req.len, &req.result, req.scratch); } return Status::OK(); } // Tries to get an unique ID for this file that will be the same each time // the file is opened (and will stay the same while the file is open). // Furthermore, it tries to make this ID at most "max_size" bytes. If such an // ID can be created this function returns the length of the ID and places it // in "id"; otherwise, this function returns 0, in which case "id" // may not have been modified. // // This function guarantees, for IDs from a given environment, two unique ids // cannot be made equal to each other by adding arbitrary bytes to one of // them. That is, no unique ID is the prefix of another. // // This function guarantees that the returned ID will not be interpretable as // a single varint. // // Note: these IDs are only valid for the duration of the process. virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; // Default implementation to prevent issues with backwards // compatibility. } enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED }; virtual void Hint(AccessPattern /*pattern*/) {} // Indicates the upper layers if the current RandomAccessFile implementation // uses direct IO. virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. virtual Status InvalidateCache(size_t /*offset*/, size_t /*length*/) { return Status::NotSupported( "RandomAccessFile::InvalidateCache not supported."); } // If you're adding methods here, remember to add them to // RandomAccessFileWrapper too. }; // A file abstraction for sequential writing. The implementation // must provide buffering since callers may append small fragments // at a time to the file. class WritableFile { public: WritableFile() : last_preallocated_block_(0), preallocation_block_size_(0), io_priority_(Env::IO_TOTAL), write_hint_(Env::WLTH_NOT_SET), strict_bytes_per_sync_(false) {} explicit WritableFile(const EnvOptions& options) : last_preallocated_block_(0), preallocation_block_size_(0), io_priority_(Env::IO_TOTAL), write_hint_(Env::WLTH_NOT_SET), strict_bytes_per_sync_(options.strict_bytes_per_sync) {} // No copying allowed WritableFile(const WritableFile&) = delete; void operator=(const WritableFile&) = delete; virtual ~WritableFile(); // Append data to the end of the file // Note: A WriteabelFile object must support either Append or // PositionedAppend, so the users cannot mix the two. virtual Status Append(const Slice& data) = 0; // PositionedAppend data to the specified offset. The new EOF after append // must be larger than the previous EOF. This is to be used when writes are // not backed by OS buffers and hence has to always start from the start of // the sector. The implementation thus needs to also rewrite the last // partial sector. // Note: PositionAppend does not guarantee moving the file offset after the // write. A WritableFile object must support either Append or // PositionedAppend, so the users cannot mix the two. // // PositionedAppend() can only happen on the page/sector boundaries. For that // reason, if the last write was an incomplete sector we still need to rewind // back to the nearest sector/page and rewrite the portion of it with whatever // we need to add. We need to keep where we stop writing. // // PositionedAppend() can only write whole sectors. For that reason we have to // pad with zeros for the last write and trim the file when closing according // to the position we keep in the previous step. // // PositionedAppend() requires aligned buffer to be passed in. The alignment // required is queried via GetRequiredBufferAlignment() virtual Status PositionedAppend(const Slice& /* data */, uint64_t /* offset */) { return Status::NotSupported( "WritableFile::PositionedAppend() not supported."); } // Truncate is necessary to trim the file to the correct size // before closing. It is not always possible to keep track of the file // size due to whole pages writes. The behavior is undefined if called // with other writes to follow. virtual Status Truncate(uint64_t /*size*/) { return Status::OK(); } virtual Status Close() = 0; virtual Status Flush() = 0; virtual Status Sync() = 0; // sync data /* * Sync data and/or metadata as well. * By default, sync only data. * Override this method for environments where we need to sync * metadata as well. */ virtual Status Fsync() { return Sync(); } // true if Sync() and Fsync() are safe to call concurrently with Append() // and Flush(). virtual bool IsSyncThreadSafe() const { return false; } // Indicates the upper layers if the current WritableFile implementation // uses direct IO. virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } /* * Change the priority in rate limiter if rate limiting is enabled. * If rate limiting is not enabled, this call has no effect. */ virtual void SetIOPriority(Env::IOPriority pri) { io_priority_ = pri; } virtual Env::IOPriority GetIOPriority() { return io_priority_; } virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) { write_hint_ = hint; } virtual Env::WriteLifeTimeHint GetWriteLifeTimeHint() { return write_hint_; } /* * Get the size of valid data in the file. */ virtual uint64_t GetFileSize() { return 0; } /* * Get and set the default pre-allocation block size for writes to * this file. If non-zero, then Allocate will be used to extend the * underlying storage of a file (generally via fallocate) if the Env * instance supports it. */ virtual void SetPreallocationBlockSize(size_t size) { preallocation_block_size_ = size; } virtual void GetPreallocationStatus(size_t* block_size, size_t* last_allocated_block) { *last_allocated_block = last_preallocated_block_; *block_size = preallocation_block_size_; } // For documentation, refer to RandomAccessFile::GetUniqueId() virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; // Default implementation to prevent issues with backwards } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. // This call has no effect on dirty pages in the cache. virtual Status InvalidateCache(size_t /*offset*/, size_t /*length*/) { return Status::NotSupported("WritableFile::InvalidateCache not supported."); } // Sync a file range with disk. // offset is the starting byte of the file range to be synchronized. // nbytes specifies the length of the range to be synchronized. // This asks the OS to initiate flushing the cached data to disk, // without waiting for completion. // Default implementation does nothing. virtual Status RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/) { if (strict_bytes_per_sync_) { return Sync(); } return Status::OK(); } // PrepareWrite performs any necessary preparation for a write // before the write actually occurs. This allows for pre-allocation // of space on devices where it can result in less file // fragmentation and/or less waste from over-zealous filesystem // pre-allocation. virtual void PrepareWrite(size_t offset, size_t len) { if (preallocation_block_size_ == 0) { return; } // If this write would cross one or more preallocation blocks, // determine what the last preallocation block necessary to // cover this write would be and Allocate to that point. const auto block_size = preallocation_block_size_; size_t new_last_preallocated_block = (offset + len + block_size - 1) / block_size; if (new_last_preallocated_block > last_preallocated_block_) { size_t num_spanned_blocks = new_last_preallocated_block - last_preallocated_block_; Allocate(block_size * last_preallocated_block_, block_size * num_spanned_blocks); last_preallocated_block_ = new_last_preallocated_block; } } // Pre-allocates space for a file. virtual Status Allocate(uint64_t /*offset*/, uint64_t /*len*/) { return Status::OK(); } // If you're adding methods here, remember to add them to // WritableFileWrapper too. protected: size_t preallocation_block_size() { return preallocation_block_size_; } private: size_t last_preallocated_block_; size_t preallocation_block_size_; protected: Env::IOPriority io_priority_; Env::WriteLifeTimeHint write_hint_; const bool strict_bytes_per_sync_; }; // A file abstraction for random reading and writing. class RandomRWFile { public: RandomRWFile() {} // No copying allowed RandomRWFile(const RandomRWFile&) = delete; RandomRWFile& operator=(const RandomRWFile&) = delete; virtual ~RandomRWFile() {} // Indicates if the class makes use of direct I/O // If false you must pass aligned buffer to Write() virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. // Pass aligned buffer when use_direct_io() returns true. virtual Status Write(uint64_t offset, const Slice& data) = 0; // Read up to `n` bytes starting from offset `offset` and store them in // result, provided `scratch` size should be at least `n`. // Returns Status::OK() on success. virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const = 0; virtual Status Flush() = 0; virtual Status Sync() = 0; virtual Status Fsync() { return Sync(); } virtual Status Close() = 0; // If you're adding methods here, remember to add them to // RandomRWFileWrapper too. }; // MemoryMappedFileBuffer object represents a memory-mapped file's raw buffer. // Subclasses should release the mapping upon destruction. class MemoryMappedFileBuffer { public: MemoryMappedFileBuffer(void* _base, size_t _length) : base_(_base), length_(_length) {} virtual ~MemoryMappedFileBuffer() = 0; // We do not want to unmap this twice. We can make this class // movable if desired, however, since MemoryMappedFileBuffer(const MemoryMappedFileBuffer&) = delete; MemoryMappedFileBuffer& operator=(const MemoryMappedFileBuffer&) = delete; void* GetBase() const { return base_; } size_t GetLen() const { return length_; } protected: void* base_; const size_t length_; }; // Directory object represents collection of files and implements // filesystem operations that can be executed on directories. class Directory { public: virtual ~Directory() {} // Fsync directory. Can be called concurrently from multiple threads. virtual Status Fsync() = 0; virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; } // If you're adding methods here, remember to add them to // DirectoryWrapper too. }; enum InfoLogLevel : unsigned char { DEBUG_LEVEL = 0, INFO_LEVEL, WARN_LEVEL, ERROR_LEVEL, FATAL_LEVEL, HEADER_LEVEL, NUM_INFO_LOG_LEVELS, }; // An interface for writing log messages. class Logger { public: size_t kDoNotSupportGetLogFileSize = (std::numeric_limits::max)(); explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL) : closed_(false), log_level_(log_level) {} // No copying allowed Logger(const Logger&) = delete; void operator=(const Logger&) = delete; virtual ~Logger(); // Close the log file. Must be called before destructor. If the return // status is NotSupported(), it means the implementation does cleanup in // the destructor virtual Status Close(); // Write a header to the log file with the specified format // It is recommended that you log all header information at the start of the // application. But it is not enforced. virtual void LogHeader(const char* format, va_list ap) { // Default implementation does a simple INFO level log write. // Please override as per the logger class requirement. Logv(format, ap); } // Write an entry to the log file with the specified format. virtual void Logv(const char* format, va_list ap) = 0; // Write an entry to the log file with the specified log level // and format. Any log with level under the internal log level // of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be // printed. virtual void Logv(const InfoLogLevel log_level, const char* format, va_list ap); virtual size_t GetLogFileSize() const { return kDoNotSupportGetLogFileSize; } // Flush to the OS buffers virtual void Flush() {} virtual InfoLogLevel GetInfoLogLevel() const { return log_level_; } virtual void SetInfoLogLevel(const InfoLogLevel log_level) { log_level_ = log_level; } // If you're adding methods here, remember to add them to LoggerWrapper too. protected: virtual Status CloseImpl(); bool closed_; private: InfoLogLevel log_level_; }; // Identifies a locked file. class FileLock { public: FileLock() {} virtual ~FileLock(); private: // No copying allowed FileLock(const FileLock&) = delete; void operator=(const FileLock&) = delete; }; class DynamicLibrary { public: virtual ~DynamicLibrary() {} // Returns the name of the dynamic library. virtual const char* Name() const = 0; // Loads the symbol for sym_name from the library and updates the input // function. Returns the loaded symbol. template Status LoadFunction(const std::string& sym_name, std::function* function) { assert(nullptr != function); void* ptr = nullptr; Status s = LoadSymbol(sym_name, &ptr); *function = reinterpret_cast(ptr); return s; } // Loads and returns the symbol for sym_name from the library. virtual Status LoadSymbol(const std::string& sym_name, void** func) = 0; }; extern void LogFlush(const std::shared_ptr& info_log); extern void Log(const InfoLogLevel log_level, const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(3, 4); // a set of log functions with different log levels. extern void Header(const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Debug(const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Info(const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Warn(const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Error(const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Fatal(const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); // Log the specified data to *info_log if info_log is non-nullptr. // The default info log level is InfoLogLevel::INFO_LEVEL. extern void Log(const std::shared_ptr& info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void LogFlush(Logger* info_log); extern void Log(const InfoLogLevel log_level, Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(3, 4); // The default info log level is InfoLogLevel::INFO_LEVEL. extern void Log(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); // a set of log functions with different log levels. extern void Header(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Debug(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Info(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Warn(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Error(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); extern void Fatal(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); // A utility routine: write "data" to the named file. extern Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname, bool should_sync = false); // A utility routine: read contents of named file into *data extern Status ReadFileToString(Env* env, const std::string& fname, std::string* data); // Below are helpers for wrapping most of the classes in this file. // They forward all calls to another instance of the class. // Useful when wrapping the default implementations. // Typical usage is to inherit your wrapper from *Wrapper, e.g.: // // class MySequentialFileWrapper : public // ROCKSDB_NAMESPACE::SequentialFileWrapper { // public: // MySequentialFileWrapper(ROCKSDB_NAMESPACE::SequentialFile* target): // ROCKSDB_NAMESPACE::SequentialFileWrapper(target) {} // Status Read(size_t n, Slice* result, char* scratch) override { // cout << "Doing a read of size " << n << "!" << endl; // return ROCKSDB_NAMESPACE::SequentialFileWrapper::Read(n, result, // scratch); // } // // All other methods are forwarded to target_ automatically. // }; // // This is often more convenient than inheriting the class directly because // (a) Don't have to override and forward all methods - the Wrapper will // forward everything you're not explicitly overriding. // (b) Don't need to update the wrapper when more methods are added to the // rocksdb class. Unless you actually want to override the behavior. // (And unless rocksdb people forgot to update the *Wrapper class.) // An implementation of Env that forwards all calls to another Env. // May be useful to clients who wish to override just part of the // functionality of another Env. class EnvWrapper : public Env { public: // Initialize an EnvWrapper that delegates all calls to *t explicit EnvWrapper(Env* t) : target_(t) {} ~EnvWrapper() override; // Return the target to which this Env forwards all calls Env* target() const { return target_; } // The following text is boilerplate that forwards all methods to target() Status RegisterDbPaths(const std::vector& paths) override { return target_->RegisterDbPaths(paths); } Status UnregisterDbPaths(const std::vector& paths) override { return target_->UnregisterDbPaths(paths); } Status NewSequentialFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { return target_->NewSequentialFile(f, r, options); } Status NewRandomAccessFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { return target_->NewRandomAccessFile(f, r, options); } Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { return target_->NewWritableFile(f, r, options); } Status ReopenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { return target_->ReopenWritableFile(fname, result, options); } Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* r, const EnvOptions& options) override { return target_->ReuseWritableFile(fname, old_fname, r, options); } Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override { return target_->NewRandomRWFile(fname, result, options); } Status NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) override { return target_->NewMemoryMappedFileBuffer(fname, result); } Status NewDirectory(const std::string& name, std::unique_ptr* result) override { return target_->NewDirectory(name, result); } Status FileExists(const std::string& f) override { return target_->FileExists(f); } Status GetChildren(const std::string& dir, std::vector* r) override { return target_->GetChildren(dir, r); } Status GetChildrenFileAttributes( const std::string& dir, std::vector* result) override { return target_->GetChildrenFileAttributes(dir, result); } Status DeleteFile(const std::string& f) override { return target_->DeleteFile(f); } Status Truncate(const std::string& fname, size_t size) override { return target_->Truncate(fname, size); } Status CreateDir(const std::string& d) override { return target_->CreateDir(d); } Status CreateDirIfMissing(const std::string& d) override { return target_->CreateDirIfMissing(d); } Status DeleteDir(const std::string& d) override { return target_->DeleteDir(d); } Status GetFileSize(const std::string& f, uint64_t* s) override { return target_->GetFileSize(f, s); } Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) override { return target_->GetFileModificationTime(fname, file_mtime); } Status RenameFile(const std::string& s, const std::string& t) override { return target_->RenameFile(s, t); } Status LinkFile(const std::string& s, const std::string& t) override { return target_->LinkFile(s, t); } Status NumFileLinks(const std::string& fname, uint64_t* count) override { return target_->NumFileLinks(fname, count); } Status AreFilesSame(const std::string& first, const std::string& second, bool* res) override { return target_->AreFilesSame(first, second, res); } Status LockFile(const std::string& f, FileLock** l) override { return target_->LockFile(f, l); } Status UnlockFile(FileLock* l) override { return target_->UnlockFile(l); } Status IsDirectory(const std::string& path, bool* is_dir) override { return target_->IsDirectory(path, is_dir); } Status LoadLibrary(const std::string& lib_name, const std::string& search_path, std::shared_ptr* result) override { return target_->LoadLibrary(lib_name, search_path, result); } void Schedule(void (*f)(void* arg), void* a, Priority pri, void* tag = nullptr, void (*u)(void* arg) = nullptr) override { return target_->Schedule(f, a, pri, tag, u); } int UnSchedule(void* tag, Priority pri) override { return target_->UnSchedule(tag, pri); } void StartThread(void (*f)(void*), void* a) override { return target_->StartThread(f, a); } void WaitForJoin() override { return target_->WaitForJoin(); } unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const override { return target_->GetThreadPoolQueueLen(pri); } Status GetTestDirectory(std::string* path) override { return target_->GetTestDirectory(path); } Status NewLogger(const std::string& fname, std::shared_ptr* result) override { return target_->NewLogger(fname, result); } uint64_t NowMicros() override { return target_->NowMicros(); } uint64_t NowNanos() override { return target_->NowNanos(); } uint64_t NowCPUNanos() override { return target_->NowCPUNanos(); } void SleepForMicroseconds(int micros) override { target_->SleepForMicroseconds(micros); } Status GetHostName(char* name, uint64_t len) override { return target_->GetHostName(name, len); } Status GetCurrentTime(int64_t* unix_time) override { return target_->GetCurrentTime(unix_time); } Status GetAbsolutePath(const std::string& db_path, std::string* output_path) override { return target_->GetAbsolutePath(db_path, output_path); } void SetBackgroundThreads(int num, Priority pri) override { return target_->SetBackgroundThreads(num, pri); } int GetBackgroundThreads(Priority pri) override { return target_->GetBackgroundThreads(pri); } Status SetAllowNonOwnerAccess(bool allow_non_owner_access) override { return target_->SetAllowNonOwnerAccess(allow_non_owner_access); } void IncBackgroundThreadsIfNeeded(int num, Priority pri) override { return target_->IncBackgroundThreadsIfNeeded(num, pri); } void LowerThreadPoolIOPriority(Priority pool = LOW) override { target_->LowerThreadPoolIOPriority(pool); } void LowerThreadPoolCPUPriority(Priority pool = LOW) override { target_->LowerThreadPoolCPUPriority(pool); } std::string TimeToString(uint64_t time) override { return target_->TimeToString(time); } Status GetThreadList(std::vector* thread_list) override { return target_->GetThreadList(thread_list); } ThreadStatusUpdater* GetThreadStatusUpdater() const override { return target_->GetThreadStatusUpdater(); } uint64_t GetThreadID() const override { return target_->GetThreadID(); } std::string GenerateUniqueId() override { return target_->GenerateUniqueId(); } EnvOptions OptimizeForLogRead(const EnvOptions& env_options) const override { return target_->OptimizeForLogRead(env_options); } EnvOptions OptimizeForManifestRead( const EnvOptions& env_options) const override { return target_->OptimizeForManifestRead(env_options); } EnvOptions OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const override { return target_->OptimizeForLogWrite(env_options, db_options); } EnvOptions OptimizeForManifestWrite( const EnvOptions& env_options) const override { return target_->OptimizeForManifestWrite(env_options); } EnvOptions OptimizeForCompactionTableWrite( const EnvOptions& env_options, const ImmutableDBOptions& immutable_ops) const override { return target_->OptimizeForCompactionTableWrite(env_options, immutable_ops); } EnvOptions OptimizeForCompactionTableRead( const EnvOptions& env_options, const ImmutableDBOptions& db_options) const override { return target_->OptimizeForCompactionTableRead(env_options, db_options); } Status GetFreeSpace(const std::string& path, uint64_t* diskfree) override { return target_->GetFreeSpace(path, diskfree); } void SanitizeEnvOptions(EnvOptions* env_opts) const override { target_->SanitizeEnvOptions(env_opts); } private: Env* target_; }; class SequentialFileWrapper : public SequentialFile { public: explicit SequentialFileWrapper(SequentialFile* target) : target_(target) {} Status Read(size_t n, Slice* result, char* scratch) override { return target_->Read(n, result, scratch); } Status Skip(uint64_t n) override { return target_->Skip(n); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } Status InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } Status PositionedRead(uint64_t offset, size_t n, Slice* result, char* scratch) override { return target_->PositionedRead(offset, n, result, scratch); } private: SequentialFile* target_; }; class RandomAccessFileWrapper : public RandomAccessFile { public: explicit RandomAccessFileWrapper(RandomAccessFile* target) : target_(target) {} Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { return target_->Read(offset, n, result, scratch); } Status MultiRead(ReadRequest* reqs, size_t num_reqs) override { return target_->MultiRead(reqs, num_reqs); } Status Prefetch(uint64_t offset, size_t n) override { return target_->Prefetch(offset, n); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } void Hint(AccessPattern pattern) override { target_->Hint(pattern); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } Status InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } private: RandomAccessFile* target_; }; class WritableFileWrapper : public WritableFile { public: explicit WritableFileWrapper(WritableFile* t) : target_(t) {} Status Append(const Slice& data) override { return target_->Append(data); } Status PositionedAppend(const Slice& data, uint64_t offset) override { return target_->PositionedAppend(data, offset); } Status Truncate(uint64_t size) override { return target_->Truncate(size); } Status Close() override { return target_->Close(); } Status Flush() override { return target_->Flush(); } Status Sync() override { return target_->Sync(); } Status Fsync() override { return target_->Fsync(); } bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } void SetIOPriority(Env::IOPriority pri) override { target_->SetIOPriority(pri); } Env::IOPriority GetIOPriority() override { return target_->GetIOPriority(); } void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override { target_->SetWriteLifeTimeHint(hint); } Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { return target_->GetWriteLifeTimeHint(); } uint64_t GetFileSize() override { return target_->GetFileSize(); } void SetPreallocationBlockSize(size_t size) override { target_->SetPreallocationBlockSize(size); } void GetPreallocationStatus(size_t* block_size, size_t* last_allocated_block) override { target_->GetPreallocationStatus(block_size, last_allocated_block); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } Status InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } Status RangeSync(uint64_t offset, uint64_t nbytes) override { return target_->RangeSync(offset, nbytes); } void PrepareWrite(size_t offset, size_t len) override { target_->PrepareWrite(offset, len); } Status Allocate(uint64_t offset, uint64_t len) override { return target_->Allocate(offset, len); } private: WritableFile* target_; }; class RandomRWFileWrapper : public RandomRWFile { public: explicit RandomRWFileWrapper(RandomRWFile* target) : target_(target) {} bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } Status Write(uint64_t offset, const Slice& data) override { return target_->Write(offset, data); } Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { return target_->Read(offset, n, result, scratch); } Status Flush() override { return target_->Flush(); } Status Sync() override { return target_->Sync(); } Status Fsync() override { return target_->Fsync(); } Status Close() override { return target_->Close(); } private: RandomRWFile* target_; }; class DirectoryWrapper : public Directory { public: explicit DirectoryWrapper(Directory* target) : target_(target) {} Status Fsync() override { return target_->Fsync(); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } private: Directory* target_; }; class LoggerWrapper : public Logger { public: explicit LoggerWrapper(Logger* target) : target_(target) {} Status Close() override { return target_->Close(); } void LogHeader(const char* format, va_list ap) override { return target_->LogHeader(format, ap); } void Logv(const char* format, va_list ap) override { return target_->Logv(format, ap); } void Logv(const InfoLogLevel log_level, const char* format, va_list ap) override { return target_->Logv(log_level, format, ap); } size_t GetLogFileSize() const override { return target_->GetLogFileSize(); } void Flush() override { return target_->Flush(); } InfoLogLevel GetInfoLogLevel() const override { return target_->GetInfoLogLevel(); } void SetInfoLogLevel(const InfoLogLevel log_level) override { return target_->SetInfoLogLevel(log_level); } private: Logger* target_; }; // Returns a new environment that stores its data in memory and delegates // all non-file-storage tasks to base_env. The caller must delete the result // when it is no longer needed. // *base_env must remain live while the result is in use. Env* NewMemEnv(Env* base_env); // Returns a new environment that is used for HDFS environment. // This is a factory method for HdfsEnv declared in hdfs/env_hdfs.h Status NewHdfsEnv(Env** hdfs_env, const std::string& fsname); // Returns a new environment that measures function call times for filesystem // operations, reporting results to variables in PerfContext. // This is a factory method for TimedEnv defined in utilities/env_timed.cc. Env* NewTimedEnv(Env* base_env); // Returns an instance of logger that can be used for storing informational // messages. // This is a factory method for EnvLogger declared in logging/env_logging.h Status NewEnvLogger(const std::string& fname, Env* env, std::shared_ptr* result); std::unique_ptr NewCompositeEnv(std::shared_ptr fs); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/env_encryption.h000066400000000000000000000167351370372246700215460ustar00rootroot00000000000000// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #if !defined(ROCKSDB_LITE) #include #include "env.h" namespace ROCKSDB_NAMESPACE { class EncryptionProvider; // Returns an Env that encrypts data when stored on disk and decrypts data when // read from disk. Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider); // BlockAccessCipherStream is the base class for any cipher stream that // supports random access at block level (without requiring data from other // blocks). E.g. CTR (Counter operation mode) supports this requirement. class BlockAccessCipherStream { public: virtual ~BlockAccessCipherStream(){}; // BlockSize returns the size of each block supported by this cipher stream. virtual size_t BlockSize() = 0; // Encrypt one or more (partial) blocks of data at the file offset. // Length of data is given in dataSize. virtual Status Encrypt(uint64_t fileOffset, char* data, size_t dataSize); // Decrypt one or more (partial) blocks of data at the file offset. // Length of data is given in dataSize. virtual Status Decrypt(uint64_t fileOffset, char* data, size_t dataSize); protected: // Allocate scratch space which is passed to EncryptBlock/DecryptBlock. virtual void AllocateScratch(std::string&) = 0; // Encrypt a block of data at the given block index. // Length of data is equal to BlockSize(); virtual Status EncryptBlock(uint64_t blockIndex, char* data, char* scratch) = 0; // Decrypt a block of data at the given block index. // Length of data is equal to BlockSize(); virtual Status DecryptBlock(uint64_t blockIndex, char* data, char* scratch) = 0; }; // BlockCipher class BlockCipher { public: virtual ~BlockCipher(){}; // BlockSize returns the size of each block supported by this cipher stream. virtual size_t BlockSize() = 0; // Encrypt a block of data. // Length of data is equal to BlockSize(). virtual Status Encrypt(char* data) = 0; // Decrypt a block of data. // Length of data is equal to BlockSize(). virtual Status Decrypt(char* data) = 0; }; // Implements a BlockCipher using ROT13. // // Note: This is a sample implementation of BlockCipher, // it is NOT considered safe and should NOT be used in production. class ROT13BlockCipher : public BlockCipher { private: size_t blockSize_; public: ROT13BlockCipher(size_t blockSize) : blockSize_(blockSize) {} virtual ~ROT13BlockCipher(){}; // BlockSize returns the size of each block supported by this cipher stream. virtual size_t BlockSize() override { return blockSize_; } // Encrypt a block of data. // Length of data is equal to BlockSize(). virtual Status Encrypt(char* data) override; // Decrypt a block of data. // Length of data is equal to BlockSize(). virtual Status Decrypt(char* data) override; }; // CTRCipherStream implements BlockAccessCipherStream using an // Counter operations mode. // See https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation // // Note: This is a possible implementation of BlockAccessCipherStream, // it is considered suitable for use. class CTRCipherStream final : public BlockAccessCipherStream { private: BlockCipher& cipher_; std::string iv_; uint64_t initialCounter_; public: CTRCipherStream(BlockCipher& c, const char* iv, uint64_t initialCounter) : cipher_(c), iv_(iv, c.BlockSize()), initialCounter_(initialCounter){}; virtual ~CTRCipherStream(){}; // BlockSize returns the size of each block supported by this cipher stream. virtual size_t BlockSize() override { return cipher_.BlockSize(); } protected: // Allocate scratch space which is passed to EncryptBlock/DecryptBlock. virtual void AllocateScratch(std::string&) override; // Encrypt a block of data at the given block index. // Length of data is equal to BlockSize(); virtual Status EncryptBlock(uint64_t blockIndex, char* data, char* scratch) override; // Decrypt a block of data at the given block index. // Length of data is equal to BlockSize(); virtual Status DecryptBlock(uint64_t blockIndex, char* data, char* scratch) override; }; // The encryption provider is used to create a cipher stream for a specific // file. The returned cipher stream will be used for actual // encryption/decryption actions. class EncryptionProvider { public: virtual ~EncryptionProvider(){}; // GetPrefixLength returns the length of the prefix that is added to every // file and used for storing encryption options. For optimal performance, the // prefix length should be a multiple of the page size. virtual size_t GetPrefixLength() = 0; // CreateNewPrefix initialized an allocated block of prefix memory // for a new file. virtual Status CreateNewPrefix(const std::string& fname, char* prefix, size_t prefixLength) = 0; // CreateCipherStream creates a block access cipher stream for a file given // given name and options. virtual Status CreateCipherStream( const std::string& fname, const EnvOptions& options, Slice& prefix, std::unique_ptr* result) = 0; }; // This encryption provider uses a CTR cipher stream, with a given block cipher // and IV. // // Note: This is a possible implementation of EncryptionProvider, // it is considered suitable for use, provided a safe BlockCipher is used. class CTREncryptionProvider : public EncryptionProvider { private: BlockCipher& cipher_; protected: const static size_t defaultPrefixLength = 4096; public: CTREncryptionProvider(BlockCipher& c) : cipher_(c){}; virtual ~CTREncryptionProvider() {} // GetPrefixLength returns the length of the prefix that is added to every // file and used for storing encryption options. For optimal performance, the // prefix length should be a multiple of the page size. virtual size_t GetPrefixLength() override; // CreateNewPrefix initialized an allocated block of prefix memory // for a new file. virtual Status CreateNewPrefix(const std::string& fname, char* prefix, size_t prefixLength) override; // CreateCipherStream creates a block access cipher stream for a file given // given name and options. virtual Status CreateCipherStream( const std::string& fname, const EnvOptions& options, Slice& prefix, std::unique_ptr* result) override; protected: // PopulateSecretPrefixPart initializes the data into a new prefix block // that will be encrypted. This function will store the data in plain text. // It will be encrypted later (before written to disk). // Returns the amount of space (starting from the start of the prefix) // that has been initialized. virtual size_t PopulateSecretPrefixPart(char* prefix, size_t prefixLength, size_t blockSize); // CreateCipherStreamFromPrefix creates a block access cipher stream for a // file given given name and options. The given prefix is already decrypted. virtual Status CreateCipherStreamFromPrefix( const std::string& fname, const EnvOptions& options, uint64_t initialCounter, const Slice& iv, const Slice& prefix, std::unique_ptr* result); }; } // namespace ROCKSDB_NAMESPACE #endif // !defined(ROCKSDB_LITE) rocksdb-6.11.4/include/rocksdb/experimental.h000066400000000000000000000022211370372246700211620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/db.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { namespace experimental { // Supported only for Leveled compaction Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end); Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end); // Move all L0 files to target_level skipping compaction. // This operation succeeds only if the files in L0 have disjoint ranges; this // is guaranteed to happen, for instance, if keys are inserted in sorted // order. Furthermore, all levels between 1 and target_level must be empty. // If any of the above condition is violated, InvalidArgument will be // returned. Status PromoteL0(DB* db, ColumnFamilyHandle* column_family, int target_level = 1); } // namespace experimental } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/file_checksum.h000066400000000000000000000110441370372246700212710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2013 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { // The unknown file checksum. constexpr char kUnknownFileChecksum[] = ""; // The unknown sst file checksum function name. constexpr char kUnknownFileChecksumFuncName[] = "Unknown"; struct FileChecksumGenContext { std::string file_name; }; // FileChecksumGenerator is the class to generates the checksum value // for each file when the file is written to the file system. // Implementations may assume that // * Finalize is called at most once during the life of the object // * All calls to Update come before Finalize // * All calls to GetChecksum come after Finalize class FileChecksumGenerator { public: virtual ~FileChecksumGenerator() {} // Update the current result after process the data. For different checksum // functions, the temporal results may be stored and used in Update to // include the new data. virtual void Update(const char* data, size_t n) = 0; // Generate the final results if no further new data will be updated. virtual void Finalize() = 0; // Get the checksum. The result should not be the empty string and may // include arbitrary bytes, including non-printable characters. virtual std::string GetChecksum() const = 0; // Returns a name that identifies the current file checksum function. virtual const char* Name() const = 0; }; // Create the FileChecksumGenerator object for each SST file. class FileChecksumGenFactory { public: virtual ~FileChecksumGenFactory() {} // Create a new FileChecksumGenerator. virtual std::unique_ptr CreateFileChecksumGenerator( const FileChecksumGenContext& context) = 0; // Return the name of this FileChecksumGenFactory. virtual const char* Name() const = 0; }; // FileChecksumList stores the checksum information of a list of files (e.g., // SST files). The FileChecksumLIst can be used to store the checksum // information of all SST file getting from the MANIFEST, which are // the checksum information of all valid SST file of a DB instance. It can // also be used to store the checksum information of a list of SST files to // be ingested. class FileChecksumList { public: virtual ~FileChecksumList() {} // Clean the previously stored file checksum information. virtual void reset() = 0; // Get the number of checksums in the checksum list virtual size_t size() const = 0; // Return all the file checksum information being stored in a unordered_map. // File_number is the key, the first part of the value is checksum value, // and the second part of the value is checksum function name. virtual Status GetAllFileChecksums( std::vector* file_numbers, std::vector* checksums, std::vector* checksum_func_names) = 0; // Given the file_number, it searches if the file checksum information is // stored. virtual Status SearchOneFileChecksum(uint64_t file_number, std::string* checksum, std::string* checksum_func_name) = 0; // Insert the checksum information of one file to the FileChecksumList. virtual Status InsertOneFileChecksum( uint64_t file_number, const std::string& checksum, const std::string& checksum_func_name) = 0; // Remove the checksum information of one SST file. virtual Status RemoveOneFileChecksum(uint64_t file_number) = 0; }; // Create a new file checksum list. extern FileChecksumList* NewFileChecksumList(); // Return a shared_ptr of the builtin Crc32c based file checksum generatory // factory object, which can be shared to create the Crc32c based checksum // generator object. // Note: this implementation is compatible with many other crc32c checksum // implementations and uses big-endian encoding of the result, unlike most // other crc32c checksums in RocksDB, which alter the result with // crc32c::Mask and use little-endian encoding. extern std::shared_ptr GetFileChecksumGenCrc32cFactory(); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/file_system.h000066400000000000000000001620771370372246700210300ustar00rootroot00000000000000// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // A FileSystem is an interface used by the rocksdb implementation to access // storage functionality like the filesystem etc. Callers // may wish to provide a custom FileSystem object when opening a database to // get fine gain control; e.g., to rate limit file system operations. // // All FileSystem implementations are safe for concurrent access from // multiple threads without any external synchronization. // // WARNING: Since this is a new interface, it is expected that there will be // some changes as storage systems are ported over. #pragma once #include #include #include #include #include #include #include #include #include #include "rocksdb/env.h" #include "rocksdb/io_status.h" #include "rocksdb/options.h" #include "rocksdb/thread_status.h" namespace ROCKSDB_NAMESPACE { class FileLock; class FSDirectory; class FSRandomAccessFile; class FSRandomRWFile; class FSSequentialFile; class FSWritableFile; class Logger; class Slice; struct ImmutableDBOptions; struct MutableDBOptions; class RateLimiter; using AccessPattern = RandomAccessFile::AccessPattern; using FileAttributes = Env::FileAttributes; // Priority of an IO request. This is a hint and does not guarantee any // particular QoS. // IO_LOW - Typically background reads/writes such as compaction/flush // IO_HIGH - Typically user reads/synchronous WAL writes enum class IOPriority : uint8_t { kIOLow, kIOHigh, kIOTotal, }; // Type of the data begin read/written. It can be passed down as a flag // for the FileSystem implementation to optionally handle different types in // different ways enum class IOType : uint8_t { kData, kFilter, kIndex, kMetadata, kWAL, kManifest, kLog, kUnknown, kInvalid, }; // Per-request options that can be passed down to the FileSystem // implementation. These are hints and are not necessarily guaranteed to be // honored. More hints can be added here in the future to indicate things like // storage media (HDD/SSD) to be used, replication level etc. struct IOOptions { // Timeout for the operation in microseconds std::chrono::microseconds timeout; // Priority - high or low IOPriority prio; // Type of data being read/written IOType type; IOOptions() : timeout(0), prio(IOPriority::kIOLow), type(IOType::kUnknown) {} }; // File scope options that control how a file is opened/created and accessed // while its open. We may add more options here in the future such as // redundancy level, media to use etc. struct FileOptions : EnvOptions { // Embedded IOOptions to control the parameters for any IOs that need // to be issued for the file open/creation IOOptions io_options; FileOptions() : EnvOptions() {} FileOptions(const DBOptions& opts) : EnvOptions(opts) {} FileOptions(const EnvOptions& opts) : EnvOptions(opts) {} FileOptions(const FileOptions& opts) : EnvOptions(opts), io_options(opts.io_options) {} FileOptions& operator=(const FileOptions& opts) = default; }; // A structure to pass back some debugging information from the FileSystem // implementation to RocksDB in case of an IO error struct IODebugContext { // file_path to be filled in by RocksDB in case of an error std::string file_path; // A map of counter names to values - set by the FileSystem implementation std::map counters; // To be set by the FileSystem implementation std::string msg; IODebugContext() {} void AddCounter(std::string& name, uint64_t value) { counters.emplace(name, value); } std::string ToString() { std::ostringstream ss; ss << file_path << ", "; for (auto counter : counters) { ss << counter.first << " = " << counter.second << ","; } ss << msg; return ss.str(); } }; // The FileSystem, FSSequentialFile, FSRandomAccessFile, FSWritableFile, // FSRandomRWFileclass, and FSDIrectory classes define the interface between // RocksDB and storage systems, such as Posix filesystems, // remote filesystems etc. // The interface allows for fine grained control of individual IO operations, // such as setting a timeout, prioritization, hints on data placement, // different handling based on type of IO etc. // This is accomplished by passing an instance of IOOptions to every // API call that can potentially perform IO. Additionally, each such API is // passed a pointer to a IODebugContext structure that can be used by the // storage system to include troubleshooting information. The return values // of the APIs is of type IOStatus, which can indicate an error code/sub-code, // as well as metadata about the error such as its scope and whether its // retryable. class FileSystem { public: FileSystem(); // No copying allowed FileSystem(const FileSystem&) = delete; virtual ~FileSystem(); virtual const char* Name() const = 0; static const char* Type() { return "FileSystem"; } // Loads the FileSystem specified by the input value into the result static Status Load(const std::string& value, std::shared_ptr* result); // Return a default fie_system suitable for the current operating // system. Sophisticated users may wish to provide their own Env // implementation instead of relying on this default file_system // // The result of Default() belongs to rocksdb and must never be deleted. static std::shared_ptr Default(); // Handles the event when a new DB or a new ColumnFamily starts using the // specified data paths. // // The data paths might be shared by different DBs or ColumnFamilies, // so RegisterDbPaths might be called with the same data paths. // For example, when CreateColumnFamily is called multiple times with the same // data path, RegisterDbPaths will also be called with the same data path. // // If the return status is ok, then the paths must be correspondingly // called in UnregisterDbPaths; // otherwise this method should have no side effect, and UnregisterDbPaths // do not need to be called for the paths. // // Different implementations may take different actions. // By default, it's a no-op and returns Status::OK. virtual Status RegisterDbPaths(const std::vector& /*paths*/) { return Status::OK(); } // Handles the event a DB or a ColumnFamily stops using the specified data // paths. // // It should be called corresponding to each successful RegisterDbPaths. // // Different implementations may take different actions. // By default, it's a no-op and returns Status::OK. virtual Status UnregisterDbPaths(const std::vector& /*paths*/) { return Status::OK(); } // Create a brand new sequentially-readable file with the specified name. // On success, stores a pointer to the new file in *result and returns OK. // On failure stores nullptr in *result and returns non-OK. If the file does // not exist, returns a non-OK status. // // The returned file will only be accessed by one thread at a time. virtual IOStatus NewSequentialFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) = 0; // Create a brand new random access read-only file with the // specified name. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. If the file does not exist, returns a non-OK // status. // // The returned file may be concurrently accessed by multiple threads. virtual IOStatus NewRandomAccessFile( const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) = 0; // These values match Linux definition // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56 enum WriteLifeTimeHint { kWLTHNotSet = 0, // No hint information set kWLTHNone, // No hints about write life time kWLTHShort, // Data written has a short life time kWLTHMedium, // Data written has a medium life time kWLTHLong, // Data written has a long life time kWLTHExtreme, // Data written has an extremely long life time }; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a // new file. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual IOStatus NewWritableFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) = 0; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a // new file. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual IOStatus ReopenWritableFile( const std::string& /*fname*/, const FileOptions& /*options*/, std::unique_ptr* /*result*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported(); } // Reuse an existing file by renaming it and opening it as writable. virtual IOStatus ReuseWritableFile(const std::string& fname, const std::string& old_fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg); // Open `fname` for random read and write, if file doesn't exist the file // will be created. On success, stores a pointer to the new file in // *result and returns OK. On failure returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual IOStatus NewRandomRWFile(const std::string& /*fname*/, const FileOptions& /*options*/, std::unique_ptr* /*result*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported( "RandomRWFile is not implemented in this FileSystem"); } // Opens `fname` as a memory-mapped file for read and write (in-place updates // only, i.e., no appends). On success, stores a raw buffer covering the whole // file in `*result`. The file must exist prior to this call. virtual IOStatus NewMemoryMappedFileBuffer( const std::string& /*fname*/, std::unique_ptr* /*result*/) { return IOStatus::NotSupported( "MemoryMappedFileBuffer is not implemented in this FileSystem"); } // Create an object that represents a directory. Will fail if directory // doesn't exist. If the directory exists, it will open the directory // and create a new Directory object. // // On success, stores a pointer to the new Directory in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. virtual IOStatus NewDirectory(const std::string& name, const IOOptions& io_opts, std::unique_ptr* result, IODebugContext* dbg) = 0; // Returns OK if the named file exists. // NotFound if the named file does not exist, // the calling process does not have permission to determine // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered virtual IOStatus FileExists(const std::string& fname, const IOOptions& options, IODebugContext* dbg) = 0; // Store in *result the names of the children of the specified directory. // The names are relative to "dir". // Original contents of *results are dropped. // Returns OK if "dir" exists and "*result" contains its children. // NotFound if "dir" does not exist, the calling process does not have // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered virtual IOStatus GetChildren(const std::string& dir, const IOOptions& options, std::vector* result, IODebugContext* dbg) = 0; // Store in *result the attributes of the children of the specified directory. // In case the implementation lists the directory prior to iterating the files // and files are concurrently deleted, the deleted files will be omitted from // result. // The name attributes are relative to "dir". // Original contents of *results are dropped. // Returns OK if "dir" exists and "*result" contains its children. // NotFound if "dir" does not exist, the calling process does not have // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered virtual IOStatus GetChildrenFileAttributes( const std::string& dir, const IOOptions& options, std::vector* result, IODebugContext* dbg) { assert(result != nullptr); std::vector child_fnames; IOStatus s = GetChildren(dir, options, &child_fnames, dbg); if (!s.ok()) { return s; } result->resize(child_fnames.size()); size_t result_size = 0; for (size_t i = 0; i < child_fnames.size(); ++i) { const std::string path = dir + "/" + child_fnames[i]; if (!(s = GetFileSize(path, options, &(*result)[result_size].size_bytes, dbg)) .ok()) { if (FileExists(path, options, dbg).IsNotFound()) { // The file may have been deleted since we listed the directory continue; } return s; } (*result)[result_size].name = std::move(child_fnames[i]); result_size++; } result->resize(result_size); return IOStatus::OK(); } // Delete the named file. virtual IOStatus DeleteFile(const std::string& fname, const IOOptions& options, IODebugContext* dbg) = 0; // Truncate the named file to the specified size. virtual IOStatus Truncate(const std::string& /*fname*/, size_t /*size*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported("Truncate is not supported for this FileSystem"); } // Create the specified directory. Returns error if directory exists. virtual IOStatus CreateDir(const std::string& dirname, const IOOptions& options, IODebugContext* dbg) = 0; // Creates directory if missing. Return Ok if it exists, or successful in // Creating. virtual IOStatus CreateDirIfMissing(const std::string& dirname, const IOOptions& options, IODebugContext* dbg) = 0; // Delete the specified directory. virtual IOStatus DeleteDir(const std::string& dirname, const IOOptions& options, IODebugContext* dbg) = 0; // Store the size of fname in *file_size. virtual IOStatus GetFileSize(const std::string& fname, const IOOptions& options, uint64_t* file_size, IODebugContext* dbg) = 0; // Store the last modification time of fname in *file_mtime. virtual IOStatus GetFileModificationTime(const std::string& fname, const IOOptions& options, uint64_t* file_mtime, IODebugContext* dbg) = 0; // Rename file src to target. virtual IOStatus RenameFile(const std::string& src, const std::string& target, const IOOptions& options, IODebugContext* dbg) = 0; // Hard Link file src to target. virtual IOStatus LinkFile(const std::string& /*src*/, const std::string& /*target*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported("LinkFile is not supported for this FileSystem"); } virtual IOStatus NumFileLinks(const std::string& /*fname*/, const IOOptions& /*options*/, uint64_t* /*count*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported( "Getting number of file links is not supported for this FileSystem"); } virtual IOStatus AreFilesSame(const std::string& /*first*/, const std::string& /*second*/, const IOOptions& /*options*/, bool* /*res*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported("AreFilesSame is not supported for this FileSystem"); } // Lock the specified file. Used to prevent concurrent access to // the same db by multiple processes. On failure, stores nullptr in // *lock and returns non-OK. // // On success, stores a pointer to the object that represents the // acquired lock in *lock and returns OK. The caller should call // UnlockFile(*lock) to release the lock. If the process exits, // the lock will be automatically released. // // If somebody else already holds the lock, finishes immediately // with a failure. I.e., this call does not wait for existing locks // to go away. // // May create the named file if it does not already exist. virtual IOStatus LockFile(const std::string& fname, const IOOptions& options, FileLock** lock, IODebugContext* dbg) = 0; // Release the lock acquired by a previous successful call to LockFile. // REQUIRES: lock was returned by a successful LockFile() call // REQUIRES: lock has not already been unlocked. virtual IOStatus UnlockFile(FileLock* lock, const IOOptions& options, IODebugContext* dbg) = 0; // *path is set to a temporary directory that can be used for testing. It may // or many not have just been created. The directory may or may not differ // between runs of the same process, but subsequent calls will return the // same directory. virtual IOStatus GetTestDirectory(const IOOptions& options, std::string* path, IODebugContext* dbg) = 0; // Create and returns a default logger (an instance of EnvLogger) for storing // informational messages. Derived classes can overide to provide custom // logger. virtual IOStatus NewLogger(const std::string& fname, const IOOptions& io_opts, std::shared_ptr* result, IODebugContext* dbg) = 0; // Get full directory name for this db. virtual IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options, std::string* output_path, IODebugContext* dbg) = 0; // Sanitize the FileOptions. Typically called by a FileOptions/EnvOptions // copy constructor virtual void SanitizeFileOptions(FileOptions* /*opts*/) const {} // OptimizeForLogRead will create a new FileOptions object that is a copy of // the FileOptions in the parameters, but is optimized for reading log files. virtual FileOptions OptimizeForLogRead(const FileOptions& file_options) const; // OptimizeForManifestRead will create a new FileOptions object that is a copy // of the FileOptions in the parameters, but is optimized for reading manifest // files. virtual FileOptions OptimizeForManifestRead( const FileOptions& file_options) const; // OptimizeForLogWrite will create a new FileOptions object that is a copy of // the FileOptions in the parameters, but is optimized for writing log files. // Default implementation returns the copy of the same object. virtual FileOptions OptimizeForLogWrite(const FileOptions& file_options, const DBOptions& db_options) const; // OptimizeForManifestWrite will create a new FileOptions object that is a // copy of the FileOptions in the parameters, but is optimized for writing // manifest files. Default implementation returns the copy of the same // object. virtual FileOptions OptimizeForManifestWrite( const FileOptions& file_options) const; // OptimizeForCompactionTableWrite will create a new FileOptions object that // is a copy of the FileOptions in the parameters, but is optimized for // writing table files. virtual FileOptions OptimizeForCompactionTableWrite( const FileOptions& file_options, const ImmutableDBOptions& immutable_ops) const; // OptimizeForCompactionTableRead will create a new FileOptions object that // is a copy of the FileOptions in the parameters, but is optimized for // reading table files. virtual FileOptions OptimizeForCompactionTableRead( const FileOptions& file_options, const ImmutableDBOptions& db_options) const; // This seems to clash with a macro on Windows, so #undef it here #ifdef GetFreeSpace #undef GetFreeSpace #endif // Get the amount of free disk space virtual IOStatus GetFreeSpace(const std::string& /*path*/, const IOOptions& /*options*/, uint64_t* /*diskfree*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported(); } virtual IOStatus IsDirectory(const std::string& /*path*/, const IOOptions& options, bool* is_dir, IODebugContext* /*dgb*/) = 0; // If you're adding methods here, remember to add them to EnvWrapper too. private: void operator=(const FileSystem&); }; // A file abstraction for reading sequentially through a file class FSSequentialFile { public: FSSequentialFile() {} virtual ~FSSequentialFile() {} // Read up to "n" bytes from the file. "scratch[0..n-1]" may be // written by this routine. Sets "*result" to the data that was // read (including if fewer than "n" bytes were successfully read). // May set "*result" to point at data in "scratch[0..n-1]", so // "scratch[0..n-1]" must be live when "*result" is used. // If an error was encountered, returns a non-OK status. // // REQUIRES: External synchronization virtual IOStatus Read(size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) = 0; // Skip "n" bytes from the file. This is guaranteed to be no // slower that reading the same data, but may be faster. // // If end of file is reached, skipping will stop at the end of the // file, and Skip will return OK. // // REQUIRES: External synchronization virtual IOStatus Skip(uint64_t n) = 0; // Indicates the upper layers if the current SequentialFile implementation // uses direct IO. virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. virtual IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) { return IOStatus::NotSupported("InvalidateCache not supported."); } // Positioned Read for direct I/O // If Direct I/O enabled, offset, n, and scratch should be properly aligned virtual IOStatus PositionedRead(uint64_t /*offset*/, size_t /*n*/, const IOOptions& /*options*/, Slice* /*result*/, char* /*scratch*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported(); } // If you're adding methods here, remember to add them to // SequentialFileWrapper too. }; // A read IO request structure for use in MultiRead struct FSReadRequest { // File offset in bytes uint64_t offset; // Length to read in bytes size_t len; // A buffer that MultiRead() can optionally place data in. It can // ignore this and allocate its own buffer char* scratch; // Output parameter set by MultiRead() to point to the data buffer, and // the number of valid bytes Slice result; // Status of read IOStatus status; }; // A file abstraction for randomly reading the contents of a file. class FSRandomAccessFile { public: FSRandomAccessFile() {} virtual ~FSRandomAccessFile() {} // Read up to "n" bytes from the file starting at "offset". // "scratch[0..n-1]" may be written by this routine. Sets "*result" // to the data that was read (including if fewer than "n" bytes were // successfully read). May set "*result" to point at data in // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when // "*result" is used. If an error was encountered, returns a non-OK // status. // // Safe for concurrent use by multiple threads. // If Direct I/O enabled, offset, n, and scratch should be aligned properly. virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const = 0; // Readahead the file starting from offset by n bytes for caching. virtual IOStatus Prefetch(uint64_t /*offset*/, size_t /*n*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { return IOStatus::OK(); } // Read a bunch of blocks as described by reqs. The blocks can // optionally be read in parallel. This is a synchronous call, i.e it // should return after all reads have completed. The reads will be // non-overlapping. If the function return Status is not ok, status of // individual requests will be ignored and return status will be assumed // for all read requests. The function return status is only meant for any // any errors that occur before even processing specific read requests virtual IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, const IOOptions& options, IODebugContext* dbg) { assert(reqs != nullptr); for (size_t i = 0; i < num_reqs; ++i) { FSReadRequest& req = reqs[i]; req.status = Read(req.offset, req.len, options, &req.result, req.scratch, dbg); } return IOStatus::OK(); } // Tries to get an unique ID for this file that will be the same each time // the file is opened (and will stay the same while the file is open). // Furthermore, it tries to make this ID at most "max_size" bytes. If such an // ID can be created this function returns the length of the ID and places it // in "id"; otherwise, this function returns 0, in which case "id" // may not have been modified. // // This function guarantees, for IDs from a given environment, two unique ids // cannot be made equal to each other by adding arbitrary bytes to one of // them. That is, no unique ID is the prefix of another. // // This function guarantees that the returned ID will not be interpretable as // a single varint. // // Note: these IDs are only valid for the duration of the process. virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; // Default implementation to prevent issues with backwards // compatibility. }; enum AccessPattern { kNormal, kRandom, kSequential, kWillNeed, kWontNeed }; virtual void Hint(AccessPattern /*pattern*/) {} // Indicates the upper layers if the current RandomAccessFile implementation // uses direct IO. virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. virtual IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) { return IOStatus::NotSupported("InvalidateCache not supported."); } // If you're adding methods here, remember to add them to // RandomAccessFileWrapper too. }; // A file abstraction for sequential writing. The implementation // must provide buffering since callers may append small fragments // at a time to the file. class FSWritableFile { public: FSWritableFile() : last_preallocated_block_(0), preallocation_block_size_(0), io_priority_(Env::IO_TOTAL), write_hint_(Env::WLTH_NOT_SET), strict_bytes_per_sync_(false) {} explicit FSWritableFile(const FileOptions& options) : last_preallocated_block_(0), preallocation_block_size_(0), io_priority_(Env::IO_TOTAL), write_hint_(Env::WLTH_NOT_SET), strict_bytes_per_sync_(options.strict_bytes_per_sync) {} virtual ~FSWritableFile() {} // Append data to the end of the file // Note: A WriteabelFile object must support either Append or // PositionedAppend, so the users cannot mix the two. virtual IOStatus Append(const Slice& data, const IOOptions& options, IODebugContext* dbg) = 0; // PositionedAppend data to the specified offset. The new EOF after append // must be larger than the previous EOF. This is to be used when writes are // not backed by OS buffers and hence has to always start from the start of // the sector. The implementation thus needs to also rewrite the last // partial sector. // Note: PositionAppend does not guarantee moving the file offset after the // write. A WritableFile object must support either Append or // PositionedAppend, so the users cannot mix the two. // // PositionedAppend() can only happen on the page/sector boundaries. For that // reason, if the last write was an incomplete sector we still need to rewind // back to the nearest sector/page and rewrite the portion of it with whatever // we need to add. We need to keep where we stop writing. // // PositionedAppend() can only write whole sectors. For that reason we have to // pad with zeros for the last write and trim the file when closing according // to the position we keep in the previous step. // // PositionedAppend() requires aligned buffer to be passed in. The alignment // required is queried via GetRequiredBufferAlignment() virtual IOStatus PositionedAppend(const Slice& /* data */, uint64_t /* offset */, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { return IOStatus::NotSupported(); } // Truncate is necessary to trim the file to the correct size // before closing. It is not always possible to keep track of the file // size due to whole pages writes. The behavior is undefined if called // with other writes to follow. virtual IOStatus Truncate(uint64_t /*size*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { return IOStatus::OK(); } virtual IOStatus Close(const IOOptions& options, IODebugContext* dbg) = 0; virtual IOStatus Flush(const IOOptions& options, IODebugContext* dbg) = 0; virtual IOStatus Sync(const IOOptions& options, IODebugContext* dbg) = 0; // sync data /* * Sync data and/or metadata as well. * By default, sync only data. * Override this method for environments where we need to sync * metadata as well. */ virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) { return Sync(options, dbg); } // true if Sync() and Fsync() are safe to call concurrently with Append() // and Flush(). virtual bool IsSyncThreadSafe() const { return false; } // Indicates the upper layers if the current WritableFile implementation // uses direct IO. virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) { write_hint_ = hint; } virtual void SetIOPriority(Env::IOPriority pri) { io_priority_ = pri; } virtual Env::IOPriority GetIOPriority() { return io_priority_; } virtual Env::WriteLifeTimeHint GetWriteLifeTimeHint() { return write_hint_; } /* * Get the size of valid data in the file. */ virtual uint64_t GetFileSize(const IOOptions& /*options*/, IODebugContext* /*dbg*/) { return 0; } /* * Get and set the default pre-allocation block size for writes to * this file. If non-zero, then Allocate will be used to extend the * underlying storage of a file (generally via fallocate) if the Env * instance supports it. */ virtual void SetPreallocationBlockSize(size_t size) { preallocation_block_size_ = size; } virtual void GetPreallocationStatus(size_t* block_size, size_t* last_allocated_block) { *last_allocated_block = last_preallocated_block_; *block_size = preallocation_block_size_; } // For documentation, refer to RandomAccessFile::GetUniqueId() virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; // Default implementation to prevent issues with backwards } // Remove any kind of caching of data from the offset to offset+length // of this file. If the length is 0, then it refers to the end of file. // If the system is not caching the file contents, then this is a noop. // This call has no effect on dirty pages in the cache. virtual IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) { return IOStatus::NotSupported("InvalidateCache not supported."); } // Sync a file range with disk. // offset is the starting byte of the file range to be synchronized. // nbytes specifies the length of the range to be synchronized. // This asks the OS to initiate flushing the cached data to disk, // without waiting for completion. // Default implementation does nothing. virtual IOStatus RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/, const IOOptions& options, IODebugContext* dbg) { if (strict_bytes_per_sync_) { return Sync(options, dbg); } return IOStatus::OK(); } // PrepareWrite performs any necessary preparation for a write // before the write actually occurs. This allows for pre-allocation // of space on devices where it can result in less file // fragmentation and/or less waste from over-zealous filesystem // pre-allocation. virtual void PrepareWrite(size_t offset, size_t len, const IOOptions& options, IODebugContext* dbg) { if (preallocation_block_size_ == 0) { return; } // If this write would cross one or more preallocation blocks, // determine what the last preallocation block necessary to // cover this write would be and Allocate to that point. const auto block_size = preallocation_block_size_; size_t new_last_preallocated_block = (offset + len + block_size - 1) / block_size; if (new_last_preallocated_block > last_preallocated_block_) { size_t num_spanned_blocks = new_last_preallocated_block - last_preallocated_block_; Allocate(block_size * last_preallocated_block_, block_size * num_spanned_blocks, options, dbg); last_preallocated_block_ = new_last_preallocated_block; } } // Pre-allocates space for a file. virtual IOStatus Allocate(uint64_t /*offset*/, uint64_t /*len*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) { return IOStatus::OK(); } // If you're adding methods here, remember to add them to // WritableFileWrapper too. protected: size_t preallocation_block_size() { return preallocation_block_size_; } private: size_t last_preallocated_block_; size_t preallocation_block_size_; // No copying allowed FSWritableFile(const FSWritableFile&); void operator=(const FSWritableFile&); protected: Env::IOPriority io_priority_; Env::WriteLifeTimeHint write_hint_; const bool strict_bytes_per_sync_; }; // A file abstraction for random reading and writing. class FSRandomRWFile { public: FSRandomRWFile() {} virtual ~FSRandomRWFile() {} // Indicates if the class makes use of direct I/O // If false you must pass aligned buffer to Write() virtual bool use_direct_io() const { return false; } // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. // Pass aligned buffer when use_direct_io() returns true. virtual IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options, IODebugContext* dbg) = 0; // Read up to `n` bytes starting from offset `offset` and store them in // result, provided `scratch` size should be at least `n`. // Returns Status::OK() on success. virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const = 0; virtual IOStatus Flush(const IOOptions& options, IODebugContext* dbg) = 0; virtual IOStatus Sync(const IOOptions& options, IODebugContext* dbg) = 0; virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) { return Sync(options, dbg); } virtual IOStatus Close(const IOOptions& options, IODebugContext* dbg) = 0; // If you're adding methods here, remember to add them to // RandomRWFileWrapper too. // No copying allowed FSRandomRWFile(const RandomRWFile&) = delete; FSRandomRWFile& operator=(const RandomRWFile&) = delete; }; // MemoryMappedFileBuffer object represents a memory-mapped file's raw buffer. // Subclasses should release the mapping upon destruction. class FSMemoryMappedFileBuffer { public: FSMemoryMappedFileBuffer(void* _base, size_t _length) : base_(_base), length_(_length) {} virtual ~FSMemoryMappedFileBuffer() = 0; // We do not want to unmap this twice. We can make this class // movable if desired, however, since FSMemoryMappedFileBuffer(const FSMemoryMappedFileBuffer&) = delete; FSMemoryMappedFileBuffer& operator=(const FSMemoryMappedFileBuffer&) = delete; void* GetBase() const { return base_; } size_t GetLen() const { return length_; } protected: void* base_; const size_t length_; }; // Directory object represents collection of files and implements // filesystem operations that can be executed on directories. class FSDirectory { public: virtual ~FSDirectory() {} // Fsync directory. Can be called concurrently from multiple threads. virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) = 0; virtual size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const { return 0; } // If you're adding methods here, remember to add them to // DirectoryWrapper too. }; // Below are helpers for wrapping most of the classes in this file. // They forward all calls to another instance of the class. // Useful when wrapping the default implementations. // Typical usage is to inherit your wrapper from *Wrapper, e.g.: // // class MySequentialFileWrapper : public // ROCKSDB_NAMESPACE::FSSequentialFileWrapper { // public: // MySequentialFileWrapper(ROCKSDB_NAMESPACE::FSSequentialFile* target): // ROCKSDB_NAMESPACE::FSSequentialFileWrapper(target) {} // Status Read(size_t n, FileSystem::IOOptions& options, Slice* result, // char* scratch, FileSystem::IODebugContext* dbg) override { // cout << "Doing a read of size " << n << "!" << endl; // return ROCKSDB_NAMESPACE::FSSequentialFileWrapper::Read(n, options, // result, // scratch, dbg); // } // // All other methods are forwarded to target_ automatically. // }; // // This is often more convenient than inheriting the class directly because // (a) Don't have to override and forward all methods - the Wrapper will // forward everything you're not explicitly overriding. // (b) Don't need to update the wrapper when more methods are added to the // rocksdb class. Unless you actually want to override the behavior. // (And unless rocksdb people forgot to update the *Wrapper class.) // An implementation of Env that forwards all calls to another Env. // May be useful to clients who wish to override just part of the // functionality of another Env. class FileSystemWrapper : public FileSystem { public: // Initialize an EnvWrapper that delegates all calls to *t explicit FileSystemWrapper(std::shared_ptr t) : target_(t) {} ~FileSystemWrapper() override {} const char* Name() const override { return target_->Name(); } // Return the target to which this Env forwards all calls FileSystem* target() const { return target_.get(); } // The following text is boilerplate that forwards all methods to target() IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* dbg) override { return target_->NewSequentialFile(f, file_opts, r, dbg); } IOStatus NewRandomAccessFile(const std::string& f, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* dbg) override { return target_->NewRandomAccessFile(f, file_opts, r, dbg); } IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* dbg) override { return target_->NewWritableFile(f, file_opts, r, dbg); } IOStatus ReopenWritableFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) override { return target_->ReopenWritableFile(fname, file_opts, result, dbg); } IOStatus ReuseWritableFile(const std::string& fname, const std::string& old_fname, const FileOptions& file_opts, std::unique_ptr* r, IODebugContext* dbg) override { return target_->ReuseWritableFile(fname, old_fname, file_opts, r, dbg); } IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) override { return target_->NewRandomRWFile(fname, file_opts, result, dbg); } IOStatus NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) override { return target_->NewMemoryMappedFileBuffer(fname, result); } IOStatus NewDirectory(const std::string& name, const IOOptions& io_opts, std::unique_ptr* result, IODebugContext* dbg) override { return target_->NewDirectory(name, io_opts, result, dbg); } IOStatus FileExists(const std::string& f, const IOOptions& io_opts, IODebugContext* dbg) override { return target_->FileExists(f, io_opts, dbg); } IOStatus GetChildren(const std::string& dir, const IOOptions& io_opts, std::vector* r, IODebugContext* dbg) override { return target_->GetChildren(dir, io_opts, r, dbg); } IOStatus GetChildrenFileAttributes(const std::string& dir, const IOOptions& options, std::vector* result, IODebugContext* dbg) override { return target_->GetChildrenFileAttributes(dir, options, result, dbg); } IOStatus DeleteFile(const std::string& f, const IOOptions& options, IODebugContext* dbg) override { return target_->DeleteFile(f, options, dbg); } IOStatus Truncate(const std::string& fname, size_t size, const IOOptions& options, IODebugContext* dbg) override { return target_->Truncate(fname, size, options, dbg); } IOStatus CreateDir(const std::string& d, const IOOptions& options, IODebugContext* dbg) override { return target_->CreateDir(d, options, dbg); } IOStatus CreateDirIfMissing(const std::string& d, const IOOptions& options, IODebugContext* dbg) override { return target_->CreateDirIfMissing(d, options, dbg); } IOStatus DeleteDir(const std::string& d, const IOOptions& options, IODebugContext* dbg) override { return target_->DeleteDir(d, options, dbg); } IOStatus GetFileSize(const std::string& f, const IOOptions& options, uint64_t* s, IODebugContext* dbg) override { return target_->GetFileSize(f, options, s, dbg); } IOStatus GetFileModificationTime(const std::string& fname, const IOOptions& options, uint64_t* file_mtime, IODebugContext* dbg) override { return target_->GetFileModificationTime(fname, options, file_mtime, dbg); } IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options, std::string* output_path, IODebugContext* dbg) override { return target_->GetAbsolutePath(db_path, options, output_path, dbg); } IOStatus RenameFile(const std::string& s, const std::string& t, const IOOptions& options, IODebugContext* dbg) override { return target_->RenameFile(s, t, options, dbg); } IOStatus LinkFile(const std::string& s, const std::string& t, const IOOptions& options, IODebugContext* dbg) override { return target_->LinkFile(s, t, options, dbg); } IOStatus NumFileLinks(const std::string& fname, const IOOptions& options, uint64_t* count, IODebugContext* dbg) override { return target_->NumFileLinks(fname, options, count, dbg); } IOStatus AreFilesSame(const std::string& first, const std::string& second, const IOOptions& options, bool* res, IODebugContext* dbg) override { return target_->AreFilesSame(first, second, options, res, dbg); } IOStatus LockFile(const std::string& f, const IOOptions& options, FileLock** l, IODebugContext* dbg) override { return target_->LockFile(f, options, l, dbg); } IOStatus UnlockFile(FileLock* l, const IOOptions& options, IODebugContext* dbg) override { return target_->UnlockFile(l, options, dbg); } IOStatus GetTestDirectory(const IOOptions& options, std::string* path, IODebugContext* dbg) override { return target_->GetTestDirectory(options, path, dbg); } IOStatus NewLogger(const std::string& fname, const IOOptions& options, std::shared_ptr* result, IODebugContext* dbg) override { return target_->NewLogger(fname, options, result, dbg); } void SanitizeFileOptions(FileOptions* opts) const override { target_->SanitizeFileOptions(opts); } FileOptions OptimizeForLogRead( const FileOptions& file_options) const override { return target_->OptimizeForLogRead(file_options); } FileOptions OptimizeForManifestRead( const FileOptions& file_options) const override { return target_->OptimizeForManifestRead(file_options); } FileOptions OptimizeForLogWrite(const FileOptions& file_options, const DBOptions& db_options) const override { return target_->OptimizeForLogWrite(file_options, db_options); } FileOptions OptimizeForManifestWrite( const FileOptions& file_options) const override { return target_->OptimizeForManifestWrite(file_options); } FileOptions OptimizeForCompactionTableWrite( const FileOptions& file_options, const ImmutableDBOptions& immutable_ops) const override { return target_->OptimizeForCompactionTableWrite(file_options, immutable_ops); } FileOptions OptimizeForCompactionTableRead( const FileOptions& file_options, const ImmutableDBOptions& db_options) const override { return target_->OptimizeForCompactionTableRead(file_options, db_options); } IOStatus GetFreeSpace(const std::string& path, const IOOptions& options, uint64_t* diskfree, IODebugContext* dbg) override { return target_->GetFreeSpace(path, options, diskfree, dbg); } IOStatus IsDirectory(const std::string& path, const IOOptions& options, bool* is_dir, IODebugContext* dbg) override { return target_->IsDirectory(path, options, is_dir, dbg); } private: std::shared_ptr target_; }; class FSSequentialFileWrapper : public FSSequentialFile { public: explicit FSSequentialFileWrapper(FSSequentialFile* target) : target_(target) {} IOStatus Read(size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) override { return target_->Read(n, options, result, scratch, dbg); } IOStatus Skip(uint64_t n) override { return target_->Skip(n); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } IOStatus InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) override { return target_->PositionedRead(offset, n, options, result, scratch, dbg); } private: FSSequentialFile* target_; }; class FSRandomAccessFileWrapper : public FSRandomAccessFile { public: explicit FSRandomAccessFileWrapper(FSRandomAccessFile* target) : target_(target) {} IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const override { return target_->Read(offset, n, options, result, scratch, dbg); } IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, const IOOptions& options, IODebugContext* dbg) override { return target_->MultiRead(reqs, num_reqs, options, dbg); } IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options, IODebugContext* dbg) override { return target_->Prefetch(offset, n, options, dbg); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); }; void Hint(AccessPattern pattern) override { target_->Hint(pattern); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } IOStatus InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } private: FSRandomAccessFile* target_; }; class FSWritableFileWrapper : public FSWritableFile { public: explicit FSWritableFileWrapper(FSWritableFile* t) : target_(t) {} IOStatus Append(const Slice& data, const IOOptions& options, IODebugContext* dbg) override { return target_->Append(data, options, dbg); } IOStatus PositionedAppend(const Slice& data, uint64_t offset, const IOOptions& options, IODebugContext* dbg) override { return target_->PositionedAppend(data, offset, options, dbg); } IOStatus Truncate(uint64_t size, const IOOptions& options, IODebugContext* dbg) override { return target_->Truncate(size, options, dbg); } IOStatus Close(const IOOptions& options, IODebugContext* dbg) override { return target_->Close(options, dbg); } IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override { return target_->Flush(options, dbg); } IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override { return target_->Sync(options, dbg); } IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override { return target_->Fsync(options, dbg); } bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); } bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override { target_->SetWriteLifeTimeHint(hint); } Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { return target_->GetWriteLifeTimeHint(); } uint64_t GetFileSize(const IOOptions& options, IODebugContext* dbg) override { return target_->GetFileSize(options, dbg); } void SetPreallocationBlockSize(size_t size) override { target_->SetPreallocationBlockSize(size); } void GetPreallocationStatus(size_t* block_size, size_t* last_allocated_block) override { target_->GetPreallocationStatus(block_size, last_allocated_block); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } IOStatus InvalidateCache(size_t offset, size_t length) override { return target_->InvalidateCache(offset, length); } IOStatus RangeSync(uint64_t offset, uint64_t nbytes, const IOOptions& options, IODebugContext* dbg) override { return target_->RangeSync(offset, nbytes, options, dbg); } void PrepareWrite(size_t offset, size_t len, const IOOptions& options, IODebugContext* dbg) override { target_->PrepareWrite(offset, len, options, dbg); } IOStatus Allocate(uint64_t offset, uint64_t len, const IOOptions& options, IODebugContext* dbg) override { return target_->Allocate(offset, len, options, dbg); } private: FSWritableFile* target_; }; class FSRandomRWFileWrapper : public FSRandomRWFile { public: explicit FSRandomRWFileWrapper(FSRandomRWFile* target) : target_(target) {} bool use_direct_io() const override { return target_->use_direct_io(); } size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options, IODebugContext* dbg) override { return target_->Write(offset, data, options, dbg); } IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const override { return target_->Read(offset, n, options, result, scratch, dbg); } IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override { return target_->Flush(options, dbg); } IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override { return target_->Sync(options, dbg); } IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override { return target_->Fsync(options, dbg); } IOStatus Close(const IOOptions& options, IODebugContext* dbg) override { return target_->Close(options, dbg); } private: FSRandomRWFile* target_; }; class FSDirectoryWrapper : public FSDirectory { public: explicit FSDirectoryWrapper(FSDirectory* target) : target_(target) {} IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override { return target_->Fsync(options, dbg); } size_t GetUniqueId(char* id, size_t max_size) const override { return target_->GetUniqueId(id, max_size); } private: FSDirectory* target_; }; // A utility routine: write "data" to the named file. extern IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, const std::string& fname, bool should_sync = false); // A utility routine: read contents of named file into *data extern IOStatus ReadFileToString(FileSystem* fs, const std::string& fname, std::string* data); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/filter_policy.h000066400000000000000000000210641370372246700213370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // A database can be configured with a custom FilterPolicy object. // This object is responsible for creating a small filter from a set // of keys. These filters are stored in rocksdb and are consulted // automatically by rocksdb to decide whether or not to read some // information from disk. In many cases, a filter can cut down the // number of disk seeks form a handful to a single disk seek per // DB::Get() call. // // Most people will want to use the builtin bloom filter support (see // NewBloomFilterPolicy() below). #pragma once #include #include #include #include #include #include "rocksdb/advanced_options.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Slice; struct BlockBasedTableOptions; struct ConfigOptions; // A class that takes a bunch of keys, then generates filter class FilterBitsBuilder { public: virtual ~FilterBitsBuilder() {} // Add Key to filter, you could use any way to store the key. // Such as: storing hashes or original keys // Keys are in sorted order and duplicated keys are possible. virtual void AddKey(const Slice& key) = 0; // Generate the filter using the keys that are added // The return value of this function would be the filter bits, // The ownership of actual data is set to buf virtual Slice Finish(std::unique_ptr* buf) = 0; // Calculate num of keys that can be added and generate a filter // <= the specified number of bytes. #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4702) // unreachable code #endif virtual int CalculateNumEntry(const uint32_t /*bytes*/) { #ifndef ROCKSDB_LITE throw std::runtime_error("CalculateNumEntry not Implemented"); #else abort(); #endif return 0; } #if defined(_MSC_VER) #pragma warning(pop) #endif }; // A class that checks if a key can be in filter // It should be initialized by Slice generated by BitsBuilder class FilterBitsReader { public: virtual ~FilterBitsReader() {} // Check if the entry match the bits in filter virtual bool MayMatch(const Slice& entry) = 0; // Check if an array of entries match the bits in filter virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) { for (int i = 0; i < num_keys; ++i) { may_match[i] = MayMatch(*keys[i]); } } }; // Contextual information passed to BloomFilterPolicy at filter building time. // Used in overriding FilterPolicy::GetBuilderWithContext(). References other // structs because this is expected to be a temporary, stack-allocated object. struct FilterBuildingContext { // This constructor is for internal use only and subject to change. FilterBuildingContext(const BlockBasedTableOptions& table_options); // Options for the table being built const BlockBasedTableOptions& table_options; // Name of the column family for the table (or empty string if unknown) std::string column_family_name; // The compactions style in effect for the table CompactionStyle compaction_style = kCompactionStyleLevel; // The table level at time of constructing the SST file, or -1 if unknown. // (The table file could later be used at a different level.) int level_at_creation = -1; // An optional logger for reporting errors, warnings, etc. Logger* info_log = nullptr; }; // We add a new format of filter block called full filter block // This new interface gives you more space of customization // // For the full filter block, you can plug in your version by implement // the FilterBitsBuilder and FilterBitsReader // // There are two sets of interface in FilterPolicy // Set 1: CreateFilter, KeyMayMatch: used for blockbased filter // Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for // full filter. // Set 1 MUST be implemented correctly, Set 2 is optional // RocksDB would first try using functions in Set 2. if they return nullptr, // it would use Set 1 instead. // You can choose filter type in NewBloomFilterPolicy class FilterPolicy { public: virtual ~FilterPolicy(); // Creates a new FilterPolicy based on the input value string and returns the // result The value might be an ID, and ID with properties, or an old-style // policy string. // The value describes the FilterPolicy being created. // For BloomFilters, value may be a ":"-delimited value of the form: // "bloomfilter:[bits_per_key]:[use_block_based_builder]", // e.g. ""bloomfilter:4:true" // The above string is equivalent to calling NewBloomFilterPolicy(4, true). static Status CreateFromString(const ConfigOptions& config_options, const std::string& value, std::shared_ptr* result); // Return the name of this policy. Note that if the filter encoding // changes in an incompatible way, the name returned by this method // must be changed. Otherwise, old incompatible filters may be // passed to methods of this type. virtual const char* Name() const = 0; // keys[0,n-1] contains a list of keys (potentially with duplicates) // that are ordered according to the user supplied comparator. // Append a filter that summarizes keys[0,n-1] to *dst. // // Warning: do not change the initial contents of *dst. Instead, // append the newly constructed filter to *dst. virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const = 0; // "filter" contains the data appended by a preceding call to // CreateFilter() on this class. This method must return true if // the key was in the list of keys passed to CreateFilter(). // This method may return true or false if the key was not on the // list, but it should aim to return false with a high probability. virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; // Return a new FilterBitsBuilder for full or partitioned filter blocks, or // nullptr if using block-based filter. // NOTE: This function is only called by GetBuilderWithContext() below for // custom FilterPolicy implementations. Thus, it is not necessary to // override this function if overriding GetBuilderWithContext(). virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; } // A newer variant of GetFilterBitsBuilder that allows a FilterPolicy // to customize the builder for contextual constraints and hints. // (Name changed to avoid triggering -Werror=overloaded-virtual.) // If overriding GetFilterBitsBuilder() suffices, it is not necessary to // override this function. virtual FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const { return GetFilterBitsBuilder(); } // Return a new FilterBitsReader for full or partitioned filter blocks, or // nullptr if using block-based filter. // As here, the input slice should NOT be deleted by FilterPolicy. virtual FilterBitsReader* GetFilterBitsReader( const Slice& /*contents*/) const { return nullptr; } }; // Return a new filter policy that uses a bloom filter with approximately // the specified number of bits per key. // // bits_per_key: average bits allocated per key in bloom filter. A good // choice is 9.9, which yields a filter with ~ 1% false positive rate. // When format_version < 5, the value will be rounded to the nearest // integer. Recommend using no more than three decimal digits after the // decimal point, as in 6.667. // // use_block_based_builder: use deprecated block based filter (true) rather // than full or partitioned filter (false). // // Callers must delete the result after any database that is using the // result has been closed. // // Note: if you are using a custom comparator that ignores some parts // of the keys being compared, you must not use NewBloomFilterPolicy() // and must provide your own FilterPolicy that also ignores the // corresponding parts of the keys. For example, if the comparator // ignores trailing spaces, it would be incorrect to use a // FilterPolicy (like NewBloomFilterPolicy) that does not ignore // trailing spaces in keys. extern const FilterPolicy* NewBloomFilterPolicy( double bits_per_key, bool use_block_based_builder = false); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/flush_block_policy.h000066400000000000000000000036571370372246700223550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { class Slice; class BlockBuilder; struct Options; // FlushBlockPolicy provides a configurable way to determine when to flush a // block in the block based tables, class FlushBlockPolicy { public: // Keep track of the key/value sequences and return the boolean value to // determine if table builder should flush current data block. virtual bool Update(const Slice& key, const Slice& value) = 0; virtual ~FlushBlockPolicy() {} }; class FlushBlockPolicyFactory { public: // Return the name of the flush block policy. virtual const char* Name() const = 0; // Return a new block flush policy that flushes data blocks by data size. // FlushBlockPolicy may need to access the metadata of the data block // builder to determine when to flush the blocks. // // Callers must delete the result after any database that is using the // result has been closed. virtual FlushBlockPolicy* NewFlushBlockPolicy( const BlockBasedTableOptions& table_options, const BlockBuilder& data_block_builder) const = 0; virtual ~FlushBlockPolicyFactory() {} }; class FlushBlockBySizePolicyFactory : public FlushBlockPolicyFactory { public: FlushBlockBySizePolicyFactory() {} const char* Name() const override { return "FlushBlockBySizePolicyFactory"; } FlushBlockPolicy* NewFlushBlockPolicy( const BlockBasedTableOptions& table_options, const BlockBuilder& data_block_builder) const override; static FlushBlockPolicy* NewFlushBlockPolicy( const uint64_t size, const int deviation, const BlockBuilder& data_block_builder); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/io_status.h000066400000000000000000000175431370372246700205140ustar00rootroot00000000000000// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // An IOStatus encapsulates the result of an operation. It may indicate // success, or it may indicate an error with an associated error message. // // Multiple threads can invoke const methods on an IOStatus without // external synchronization, but if any of the threads may call a // non-const method, all threads accessing the same IOStatus must use // external synchronization. #pragma once #include #include "rocksdb/slice.h" #ifdef OS_WIN #include #endif #include #include "status.h" namespace ROCKSDB_NAMESPACE { class IOStatus : public Status { public: using Code = Status::Code; using SubCode = Status::SubCode; enum IOErrorScope { kIOErrorScopeFileSystem, kIOErrorScopeFile, kIOErrorScopeRange, kIOErrorScopeMax, }; // Create a success status. IOStatus() : IOStatus(kOk, kNone) {} ~IOStatus() {} // Copy the specified status. IOStatus(const IOStatus& s); IOStatus& operator=(const IOStatus& s); IOStatus(IOStatus&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif ; IOStatus& operator=(IOStatus&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif ; bool operator==(const IOStatus& rhs) const; bool operator!=(const IOStatus& rhs) const; void SetRetryable(bool retryable) { retryable_ = retryable; } void SetDataLoss(bool data_loss) { data_loss_ = data_loss; } void SetScope(IOErrorScope scope) { scope_ = scope; } bool GetRetryable() const { return retryable_; } bool GetDataLoss() const { return data_loss_; } IOErrorScope GetScope() const { return scope_; } // Return a success status. static IOStatus OK() { return IOStatus(); } static IOStatus NotSupported(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kNotSupported, msg, msg2); } static IOStatus NotSupported(SubCode msg = kNone) { return IOStatus(kNotSupported, msg); } // Return error status of an appropriate type. static IOStatus NotFound(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kNotFound, msg, msg2); } // Fast path for not found without malloc; static IOStatus NotFound(SubCode msg = kNone) { return IOStatus(kNotFound, msg); } static IOStatus Corruption(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kCorruption, msg, msg2); } static IOStatus Corruption(SubCode msg = kNone) { return IOStatus(kCorruption, msg); } static IOStatus InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kInvalidArgument, msg, msg2); } static IOStatus InvalidArgument(SubCode msg = kNone) { return IOStatus(kInvalidArgument, msg); } static IOStatus IOError(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kIOError, msg, msg2); } static IOStatus IOError(SubCode msg = kNone) { return IOStatus(kIOError, msg); } static IOStatus Busy(SubCode msg = kNone) { return IOStatus(kBusy, msg); } static IOStatus Busy(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kBusy, msg, msg2); } static IOStatus TimedOut(SubCode msg = kNone) { return IOStatus(kTimedOut, msg); } static IOStatus TimedOut(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kTimedOut, msg, msg2); } static IOStatus NoSpace() { return IOStatus(kIOError, kNoSpace); } static IOStatus NoSpace(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kIOError, kNoSpace, msg, msg2); } static IOStatus PathNotFound() { return IOStatus(kIOError, kPathNotFound); } static IOStatus PathNotFound(const Slice& msg, const Slice& msg2 = Slice()) { return IOStatus(kIOError, kPathNotFound, msg, msg2); } // Return a string representation of this status suitable for printing. // Returns the string "OK" for success. // std::string ToString() const; private: friend IOStatus status_to_io_status(Status&&); bool retryable_; bool data_loss_; IOErrorScope scope_; explicit IOStatus(Code _code, SubCode _subcode = kNone) : Status(_code, _subcode), retryable_(false), data_loss_(false), scope_(kIOErrorScopeFileSystem) {} IOStatus(Code _code, SubCode _subcode, const Slice& msg, const Slice& msg2); IOStatus(Code _code, const Slice& msg, const Slice& msg2) : IOStatus(_code, kNone, msg, msg2) {} }; inline IOStatus::IOStatus(Code _code, SubCode _subcode, const Slice& msg, const Slice& msg2) : Status(_code, _subcode), retryable_(false), data_loss_(false), scope_(kIOErrorScopeFileSystem) { assert(code_ != kOk); assert(subcode_ != kMaxSubCode); const size_t len1 = msg.size(); const size_t len2 = msg2.size(); const size_t size = len1 + (len2 ? (2 + len2) : 0); char* const result = new char[size + 1]; // +1 for null terminator memcpy(result, msg.data(), len1); if (len2) { result[len1] = ':'; result[len1 + 1] = ' '; memcpy(result + len1 + 2, msg2.data(), len2); } result[size] = '\0'; // null terminator for C style string state_ = result; } inline IOStatus::IOStatus(const IOStatus& s) : Status(s.code_, s.subcode_) { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED retryable_ = s.retryable_; data_loss_ = s.data_loss_; scope_ = s.scope_; state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_); } inline IOStatus& IOStatus::operator=(const IOStatus& s) { // The following condition catches both aliasing (when this == &s), // and the common case where both s and *this are ok. if (this != &s) { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; checked_ = false; #endif // ROCKSDB_ASSERT_STATUS_CHECKED code_ = s.code_; subcode_ = s.subcode_; retryable_ = s.retryable_; data_loss_ = s.data_loss_; scope_ = s.scope_; delete[] state_; state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_); } return *this; } inline IOStatus::IOStatus(IOStatus&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif : IOStatus() { *this = std::move(s); } inline IOStatus& IOStatus::operator=(IOStatus&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif { if (this != &s) { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; checked_ = false; #endif // ROCKSDB_ASSERT_STATUS_CHECKED code_ = std::move(s.code_); s.code_ = kOk; subcode_ = std::move(s.subcode_); s.subcode_ = kNone; retryable_ = s.retryable_; data_loss_ = s.data_loss_; scope_ = s.scope_; s.scope_ = kIOErrorScopeFileSystem; delete[] state_; state_ = nullptr; std::swap(state_, s.state_); } return *this; } inline bool IOStatus::operator==(const IOStatus& rhs) const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; rhs.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return (code_ == rhs.code_); } inline bool IOStatus::operator!=(const IOStatus& rhs) const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; rhs.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return !(*this == rhs); } inline IOStatus status_to_io_status(Status&& status) { if (status.ok()) { // Fast path return IOStatus::OK(); } else { const char* state = status.getState(); if (state) { return IOStatus(status.code(), status.subcode(), Slice(state, strlen(status.getState()) + 1), Slice()); } else { return IOStatus(status.code(), status.subcode()); } } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/iostats_context.h000066400000000000000000000031441370372246700217240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/perf_level.h" // A thread local context for gathering io-stats efficiently and transparently. // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. namespace ROCKSDB_NAMESPACE { struct IOStatsContext { // reset all io-stats counter to zero void Reset(); std::string ToString(bool exclude_zero_counters = false) const; // the thread pool id uint64_t thread_pool_id; // number of bytes that has been written. uint64_t bytes_written; // number of bytes that has been read. uint64_t bytes_read; // time spent in open() and fopen(). uint64_t open_nanos; // time spent in fallocate(). uint64_t allocate_nanos; // time spent in write() and pwrite(). uint64_t write_nanos; // time spent in read() and pread() uint64_t read_nanos; // time spent in sync_file_range(). uint64_t range_sync_nanos; // time spent in fsync uint64_t fsync_nanos; // time spent in preparing write (fallocate etc). uint64_t prepare_write_nanos; // time spent in Logger::Logv(). uint64_t logger_nanos; // CPU time spent in write() and pwrite() uint64_t cpu_write_nanos; // CPU time spent in read() and pread() uint64_t cpu_read_nanos; }; // Get Thread-local IOStatsContext object pointer IOStatsContext* get_iostats_context(); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/iterator.h000066400000000000000000000121351370372246700203230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // An iterator yields a sequence of key/value pairs from a source. // The following class defines the interface. Multiple implementations // are provided by this library. In particular, iterators are provided // to access the contents of a Table or a DB. // // Multiple threads can invoke const methods on an Iterator without // external synchronization, but if any of the threads may call a // non-const method, all threads accessing the same Iterator must use // external synchronization. #pragma once #include #include "rocksdb/cleanable.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Iterator : public Cleanable { public: Iterator() {} // No copying allowed Iterator(const Iterator&) = delete; void operator=(const Iterator&) = delete; virtual ~Iterator() {} // An iterator is either positioned at a key/value pair, or // not valid. This method returns true iff the iterator is valid. // Always returns false if !status().ok(). virtual bool Valid() const = 0; // Position at the first key in the source. The iterator is Valid() // after this call iff the source is not empty. virtual void SeekToFirst() = 0; // Position at the last key in the source. The iterator is // Valid() after this call iff the source is not empty. // Currently incompatible with user timestamp. virtual void SeekToLast() = 0; // Position at the first key in the source that at or past target. // The iterator is Valid() after this call iff the source contains // an entry that comes at or past target. // All Seek*() methods clear any error status() that the iterator had prior to // the call; after the seek, status() indicates only the error (if any) that // happened during the seek, not any past errors. // Target does not contain timestamp. virtual void Seek(const Slice& target) = 0; // Position at the last key in the source that at or before target. // The iterator is Valid() after this call iff the source contains // an entry that comes at or before target. // Currently incompatible with user timestamp. virtual void SeekForPrev(const Slice& target) = 0; // Moves to the next entry in the source. After this call, Valid() is // true iff the iterator was not positioned at the last entry in the source. // REQUIRES: Valid() virtual void Next() = 0; // Moves to the previous entry in the source. After this call, Valid() is // true iff the iterator was not positioned at the first entry in source. // Currently incompatible with user timestamp. // REQUIRES: Valid() virtual void Prev() = 0; // Return the key for the current entry. The underlying storage for // the returned slice is valid only until the next modification of // the iterator. // REQUIRES: Valid() virtual Slice key() const = 0; // Return the value for the current entry. The underlying storage for // the returned slice is valid only until the next modification of // the iterator. // REQUIRES: Valid() virtual Slice value() const = 0; // If an error has occurred, return it. Else return an ok status. // If non-blocking IO is requested and this operation cannot be // satisfied without doing some IO, then this returns Status::Incomplete(). virtual Status status() const = 0; // If supported, renew the iterator to represent the latest state. The // iterator will be invalidated after the call. Not supported if // ReadOptions.snapshot is given when creating the iterator. virtual Status Refresh() { return Status::NotSupported("Refresh() is not supported"); } // Property "rocksdb.iterator.is-key-pinned": // If returning "1", this means that the Slice returned by key() is valid // as long as the iterator is not deleted. // It is guaranteed to always return "1" if // - Iterator created with ReadOptions::pin_data = true // - DB tables were created with // BlockBasedTableOptions::use_delta_encoding = false. // Property "rocksdb.iterator.super-version-number": // LSM version used by the iterator. The same format as DB Property // kCurrentSuperVersionNumber. See its comment for more information. // Property "rocksdb.iterator.internal-key": // Get the user-key portion of the internal key at which the iteration // stopped. virtual Status GetProperty(std::string prop_name, std::string* prop); virtual Slice timestamp() const { assert(false); return Slice(); } }; // Return an empty iterator (yields nothing). extern Iterator* NewEmptyIterator(); // Return an empty iterator with the specified status. extern Iterator* NewErrorIterator(const Status& status); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/ldb_tool.h000066400000000000000000000023411370372246700202660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/db.h" #include "rocksdb/options.h" namespace ROCKSDB_NAMESPACE { // An interface for converting a slice to a readable string class SliceFormatter { public: virtual ~SliceFormatter() {} virtual std::string Format(const Slice& s) const = 0; }; // Options for customizing ldb tool (beyond the DB Options) struct LDBOptions { // Create LDBOptions with default values for all fields LDBOptions(); // Key formatter that converts a slice to a readable string. // Default: Slice::ToString() std::shared_ptr key_formatter; std::string print_help_header = "ldb - RocksDB Tool"; }; class LDBTool { public: void Run( int argc, char** argv, Options db_options = Options(), const LDBOptions& ldb_options = LDBOptions(), const std::vector* column_families = nullptr); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/listener.h000066400000000000000000000446101370372246700203220ustar00rootroot00000000000000// Copyright (c) 2014 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #pragma once #include #include #include #include #include #include "rocksdb/compaction_job_stats.h" #include "rocksdb/status.h" #include "rocksdb/table_properties.h" namespace ROCKSDB_NAMESPACE { typedef std::unordered_map> TablePropertiesCollection; class DB; class ColumnFamilyHandle; class Status; struct CompactionJobStats; enum CompressionType : unsigned char; enum class TableFileCreationReason { kFlush, kCompaction, kRecovery, kMisc, }; struct TableFileCreationBriefInfo { // the name of the database where the file was created std::string db_name; // the name of the column family where the file was created. std::string cf_name; // the path to the created file. std::string file_path; // the id of the job (which could be flush or compaction) that // created the file. int job_id; // reason of creating the table. TableFileCreationReason reason; }; struct TableFileCreationInfo : public TableFileCreationBriefInfo { TableFileCreationInfo() = default; explicit TableFileCreationInfo(TableProperties&& prop) : table_properties(prop) {} // the size of the file. uint64_t file_size; // Detailed properties of the created file. TableProperties table_properties; // The status indicating whether the creation was successful or not. Status status; }; enum class CompactionReason : int { kUnknown = 0, // [Level] number of L0 files > level0_file_num_compaction_trigger kLevelL0FilesNum, // [Level] total size of level > MaxBytesForLevel() kLevelMaxLevelSize, // [Universal] Compacting for size amplification kUniversalSizeAmplification, // [Universal] Compacting for size ratio kUniversalSizeRatio, // [Universal] number of sorted runs > level0_file_num_compaction_trigger kUniversalSortedRunNum, // [FIFO] total size > max_table_files_size kFIFOMaxSize, // [FIFO] reduce number of files. kFIFOReduceNumFiles, // [FIFO] files with creation time < (current_time - interval) kFIFOTtl, // Manual compaction kManualCompaction, // DB::SuggestCompactRange() marked files for compaction kFilesMarkedForCompaction, // [Level] Automatic compaction within bottommost level to cleanup duplicate // versions of same user key, usually due to a released snapshot. kBottommostFiles, // Compaction based on TTL kTtl, // According to the comments in flush_job.cc, RocksDB treats flush as // a level 0 compaction in internal stats. kFlush, // Compaction caused by external sst file ingestion kExternalSstIngestion, // Compaction due to SST file being too old kPeriodicCompaction, // total number of compaction reasons, new reasons must be added above this. kNumOfReasons, }; enum class FlushReason : int { kOthers = 0x00, kGetLiveFiles = 0x01, kShutDown = 0x02, kExternalFileIngestion = 0x03, kManualCompaction = 0x04, kWriteBufferManager = 0x05, kWriteBufferFull = 0x06, kTest = 0x07, kDeleteFiles = 0x08, kAutoCompaction = 0x09, kManualFlush = 0x0a, kErrorRecovery = 0xb, }; enum class BackgroundErrorReason { kFlush, kCompaction, kWriteCallback, kMemTable, kManifestWrite, }; enum class WriteStallCondition { kNormal, kDelayed, kStopped, }; struct WriteStallInfo { // the name of the column family std::string cf_name; // state of the write controller struct { WriteStallCondition cur; WriteStallCondition prev; } condition; }; #ifndef ROCKSDB_LITE struct TableFileDeletionInfo { // The name of the database where the file was deleted. std::string db_name; // The path to the deleted file. std::string file_path; // The id of the job which deleted the file. int job_id; // The status indicating whether the deletion was successful or not. Status status; }; struct FileOperationInfo { using TimePoint = std::chrono::time_point; const std::string& path; uint64_t offset; size_t length; const TimePoint& start_timestamp; const TimePoint& finish_timestamp; Status status; FileOperationInfo(const std::string& _path, const TimePoint& start, const TimePoint& finish) : path(_path), start_timestamp(start), finish_timestamp(finish) {} }; struct FlushJobInfo { // the id of the column family uint32_t cf_id; // the name of the column family std::string cf_name; // the path to the newly created file std::string file_path; // the file number of the newly created file uint64_t file_number; // the oldest blob file referenced by the newly created file uint64_t oldest_blob_file_number; // the id of the thread that completed this flush job. uint64_t thread_id; // the job id, which is unique in the same thread. int job_id; // If true, then rocksdb is currently slowing-down all writes to prevent // creating too many Level 0 files as compaction seems not able to // catch up the write request speed. This indicates that there are // too many files in Level 0. bool triggered_writes_slowdown; // If true, then rocksdb is currently blocking any writes to prevent // creating more L0 files. This indicates that there are too many // files in level 0. Compactions should try to compact L0 files down // to lower levels as soon as possible. bool triggered_writes_stop; // The smallest sequence number in the newly created file SequenceNumber smallest_seqno; // The largest sequence number in the newly created file SequenceNumber largest_seqno; // Table properties of the table being flushed TableProperties table_properties; FlushReason flush_reason; }; struct CompactionFileInfo { // The level of the file. int level; // The file number of the file. uint64_t file_number; // The file number of the oldest blob file this SST file references. uint64_t oldest_blob_file_number; }; struct CompactionJobInfo { // the id of the column family where the compaction happened. uint32_t cf_id; // the name of the column family where the compaction happened. std::string cf_name; // the status indicating whether the compaction was successful or not. Status status; // the id of the thread that completed this compaction job. uint64_t thread_id; // the job id, which is unique in the same thread. int job_id; // the smallest input level of the compaction. int base_input_level; // the output level of the compaction. int output_level; // The following variables contain information about compaction inputs // and outputs. A file may appear in both the input and output lists // if it was simply moved to a different level. The order of elements // is the same across input_files and input_file_infos; similarly, it is // the same across output_files and output_file_infos. // The names of the compaction input files. std::vector input_files; // Additional information about the compaction input files. std::vector input_file_infos; // The names of the compaction output files. std::vector output_files; // Additional information about the compaction output files. std::vector output_file_infos; // Table properties for input and output tables. // The map is keyed by values from input_files and output_files. TablePropertiesCollection table_properties; // Reason to run the compaction CompactionReason compaction_reason; // Compression algorithm used for output files CompressionType compression; // If non-null, this variable stores detailed information // about this compaction. CompactionJobStats stats; }; struct MemTableInfo { // the name of the column family to which memtable belongs std::string cf_name; // Sequence number of the first element that was inserted // into the memtable. SequenceNumber first_seqno; // Sequence number that is guaranteed to be smaller than or equal // to the sequence number of any key that could be inserted into this // memtable. It can then be assumed that any write with a larger(or equal) // sequence number will be present in this memtable or a later memtable. SequenceNumber earliest_seqno; // Total number of entries in memtable uint64_t num_entries; // Total number of deletes in memtable uint64_t num_deletes; }; struct ExternalFileIngestionInfo { // the name of the column family std::string cf_name; // Path of the file outside the DB std::string external_file_path; // Path of the file inside the DB std::string internal_file_path; // The global sequence number assigned to keys in this file SequenceNumber global_seqno; // Table properties of the table being flushed TableProperties table_properties; }; // EventListener class contains a set of callback functions that will // be called when specific RocksDB event happens such as flush. It can // be used as a building block for developing custom features such as // stats-collector or external compaction algorithm. // // Note that callback functions should not run for an extended period of // time before the function returns, otherwise RocksDB may be blocked. // For example, it is not suggested to do DB::CompactFiles() (as it may // run for a long while) or issue many of DB::Put() (as Put may be blocked // in certain cases) in the same thread in the EventListener callback. // However, doing DB::CompactFiles() and DB::Put() in another thread is // considered safe. // // [Threading] All EventListener callback will be called using the // actual thread that involves in that specific event. For example, it // is the RocksDB background flush thread that does the actual flush to // call EventListener::OnFlushCompleted(). // // [Locking] All EventListener callbacks are designed to be called without // the current thread holding any DB mutex. This is to prevent potential // deadlock and performance issue when using EventListener callback // in a complex way. class EventListener { public: // A callback function to RocksDB which will be called whenever a // registered RocksDB flushes a file. The default implementation is // no-op. // // Note that the this function must be implemented in a way such that // it should not run for an extended period of time before the function // returns. Otherwise, RocksDB may be blocked. virtual void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& /*flush_job_info*/) {} // A callback function to RocksDB which will be called before a // RocksDB starts to flush memtables. The default implementation is // no-op. // // Note that the this function must be implemented in a way such that // it should not run for an extended period of time before the function // returns. Otherwise, RocksDB may be blocked. virtual void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*flush_job_info*/) {} // A callback function for RocksDB which will be called whenever // a SST file is deleted. Different from OnCompactionCompleted and // OnFlushCompleted, this callback is designed for external logging // service and thus only provide string parameters instead // of a pointer to DB. Applications that build logic basic based // on file creations and deletions is suggested to implement // OnFlushCompleted and OnCompactionCompleted. // // Note that if applications would like to use the passed reference // outside this function call, they should make copies from the // returned value. virtual void OnTableFileDeleted(const TableFileDeletionInfo& /*info*/) {} // A callback function to RocksDB which will be called before a // RocksDB starts to compact. The default implementation is // no-op. // // Note that the this function must be implemented in a way such that // it should not run for an extended period of time before the function // returns. Otherwise, RocksDB may be blocked. virtual void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& /*ci*/) {} // A callback function for RocksDB which will be called whenever // a registered RocksDB compacts a file. The default implementation // is a no-op. // // Note that this function must be implemented in a way such that // it should not run for an extended period of time before the function // returns. Otherwise, RocksDB may be blocked. // // @param db a pointer to the rocksdb instance which just compacted // a file. // @param ci a reference to a CompactionJobInfo struct. 'ci' is released // after this function is returned, and must be copied if it is needed // outside of this function. virtual void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& /*ci*/) {} // A callback function for RocksDB which will be called whenever // a SST file is created. Different from OnCompactionCompleted and // OnFlushCompleted, this callback is designed for external logging // service and thus only provide string parameters instead // of a pointer to DB. Applications that build logic basic based // on file creations and deletions is suggested to implement // OnFlushCompleted and OnCompactionCompleted. // // Historically it will only be called if the file is successfully created. // Now it will also be called on failure case. User can check info.status // to see if it succeeded or not. // // Note that if applications would like to use the passed reference // outside this function call, they should make copies from these // returned value. virtual void OnTableFileCreated(const TableFileCreationInfo& /*info*/) {} // A callback function for RocksDB which will be called before // a SST file is being created. It will follow by OnTableFileCreated after // the creation finishes. // // Note that if applications would like to use the passed reference // outside this function call, they should make copies from these // returned value. virtual void OnTableFileCreationStarted( const TableFileCreationBriefInfo& /*info*/) {} // A callback function for RocksDB which will be called before // a memtable is made immutable. // // Note that the this function must be implemented in a way such that // it should not run for an extended period of time before the function // returns. Otherwise, RocksDB may be blocked. // // Note that if applications would like to use the passed reference // outside this function call, they should make copies from these // returned value. virtual void OnMemTableSealed(const MemTableInfo& /*info*/) {} // A callback function for RocksDB which will be called before // a column family handle is deleted. // // Note that the this function must be implemented in a way such that // it should not run for an extended period of time before the function // returns. Otherwise, RocksDB may be blocked. // @param handle is a pointer to the column family handle to be deleted // which will become a dangling pointer after the deletion. virtual void OnColumnFamilyHandleDeletionStarted( ColumnFamilyHandle* /*handle*/) {} // A callback function for RocksDB which will be called after an external // file is ingested using IngestExternalFile. // // Note that the this function will run on the same thread as // IngestExternalFile(), if this function is blocked, IngestExternalFile() // will be blocked from finishing. virtual void OnExternalFileIngested( DB* /*db*/, const ExternalFileIngestionInfo& /*info*/) {} // A callback function for RocksDB which will be called before setting the // background error status to a non-OK value. The new background error status // is provided in `bg_error` and can be modified by the callback. E.g., a // callback can suppress errors by resetting it to Status::OK(), thus // preventing the database from entering read-only mode. We do not provide any // guarantee when failed flushes/compactions will be rescheduled if the user // suppresses an error. // // Note that this function can run on the same threads as flush, compaction, // and user writes. So, it is extremely important not to perform heavy // computations or blocking calls in this function. virtual void OnBackgroundError(BackgroundErrorReason /* reason */, Status* /* bg_error */) {} // A callback function for RocksDB which will be called whenever a change // of superversion triggers a change of the stall conditions. // // Note that the this function must be implemented in a way such that // it should not run for an extended period of time before the function // returns. Otherwise, RocksDB may be blocked. virtual void OnStallConditionsChanged(const WriteStallInfo& /*info*/) {} // A callback function for RocksDB which will be called whenever a file read // operation finishes. virtual void OnFileReadFinish(const FileOperationInfo& /* info */) {} // A callback function for RocksDB which will be called whenever a file write // operation finishes. virtual void OnFileWriteFinish(const FileOperationInfo& /* info */) {} // If true, the OnFileReadFinish and OnFileWriteFinish will be called. If // false, then they won't be called. virtual bool ShouldBeNotifiedOnFileIO() { return false; } // A callback function for RocksDB which will be called just before // starting the automatic recovery process for recoverable background // errors, such as NoSpace(). The callback can suppress the automatic // recovery by setting *auto_recovery to false. The database will then // have to be transitioned out of read-only mode by calling DB::Resume() virtual void OnErrorRecoveryBegin(BackgroundErrorReason /* reason */, Status /* bg_error */, bool* /* auto_recovery */) {} // A callback function for RocksDB which will be called once the database // is recovered from read-only mode after an error. When this is called, it // means normal writes to the database can be issued and the user can // initiate any further recovery actions needed virtual void OnErrorRecoveryCompleted(Status /* old_bg_error */) {} virtual ~EventListener() {} }; #else class EventListener {}; struct FlushJobInfo {}; #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/memory_allocator.h000066400000000000000000000062031370372246700220410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/status.h" #include namespace ROCKSDB_NAMESPACE { // MemoryAllocator is an interface that a client can implement to supply custom // memory allocation and deallocation methods. See rocksdb/cache.h for more // information. // All methods should be thread-safe. class MemoryAllocator { public: virtual ~MemoryAllocator() = default; // Name of the cache allocator, printed in the log virtual const char* Name() const = 0; // Allocate a block of at least size. Has to be thread-safe. virtual void* Allocate(size_t size) = 0; // Deallocate previously allocated block. Has to be thread-safe. virtual void Deallocate(void* p) = 0; // Returns the memory size of the block allocated at p. The default // implementation that just returns the original allocation_size is fine. virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const { // default implementation just returns the allocation size return allocation_size; } }; struct JemallocAllocatorOptions { // Jemalloc tcache cache allocations by size class. For each size class, // it caches between 20 (for large size classes) to 200 (for small size // classes). To reduce tcache memory usage in case the allocator is access // by large number of threads, we can control whether to cache an allocation // by its size. bool limit_tcache_size = false; // Lower bound of allocation size to use tcache, if limit_tcache_size=true. // When used with block cache, it is recommneded to set it to block_size/4. size_t tcache_size_lower_bound = 1024; // Upper bound of allocation size to use tcache, if limit_tcache_size=true. // When used with block cache, it is recommneded to set it to block_size. size_t tcache_size_upper_bound = 16 * 1024; }; // Generate memory allocators which allocates through Jemalloc and utilize // MADV_DONTDUMP through madvice to exclude cache items from core dump. // Applications can use the allocator with block cache to exclude block cache // usage from core dump. // // Implementation details: // The JemallocNodumpAllocator creates a delicated jemalloc arena, and all // allocations of the JemallocNodumpAllocator is through the same arena. // The memory allocator hooks memory allocation of the arena, and call // madvice() with MADV_DONTDUMP flag to exclude the piece of memory from // core dump. Side benefit of using single arena would be reduce of jemalloc // metadata for some workload. // // To mitigate mutex contention for using one single arena, jemalloc tcache // (thread-local cache) is enabled to cache unused allocations for future use. // The tcache normally incur 0.5M extra memory usage per-thread. The usage // can be reduce by limitting allocation sizes to cache. extern Status NewJemallocNodumpAllocator( JemallocAllocatorOptions& options, std::shared_ptr* memory_allocator); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/memtablerep.h000066400000000000000000000371061370372246700207740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file contains the interface that must be implemented by any collection // to be used as the backing store for a MemTable. Such a collection must // satisfy the following properties: // (1) It does not store duplicate items. // (2) It uses MemTableRep::KeyComparator to compare items for iteration and // equality. // (3) It can be accessed concurrently by multiple readers and can support // during reads. However, it needn't support multiple concurrent writes. // (4) Items are never deleted. // The liberal use of assertions is encouraged to enforce (1). // // The factory will be passed an MemTableAllocator object when a new MemTableRep // is requested. // // Users can implement their own memtable representations. We include three // types built in: // - SkipListRep: This is the default; it is backed by a skip list. // - HashSkipListRep: The memtable rep that is best used for keys that are // structured like "prefix:suffix" where iteration within a prefix is // common and iteration across different prefixes is rare. It is backed by // a hash map where each bucket is a skip list. // - VectorRep: This is backed by an unordered std::vector. On iteration, the // vector is sorted. It is intelligent about sorting; once the MarkReadOnly() // has been called, the vector will only be sorted once. It is optimized for // random-write-heavy workloads. // // The last four implementations are designed for situations in which // iteration over the entire collection is rare since doing so requires all the // keys to be copied into a sorted data structure. #pragma once #include #include #include #include #include namespace ROCKSDB_NAMESPACE { class Arena; class Allocator; class LookupKey; class SliceTransform; class Logger; typedef void* KeyHandle; extern Slice GetLengthPrefixedSlice(const char* data); class MemTableRep { public: // KeyComparator provides a means to compare keys, which are internal keys // concatenated with values. class KeyComparator { public: typedef ROCKSDB_NAMESPACE::Slice DecodedType; virtual DecodedType decode_key(const char* key) const { // The format of key is frozen and can be terated as a part of the API // contract. Refer to MemTable::Add for details. return GetLengthPrefixedSlice(key); } // Compare a and b. Return a negative value if a is less than b, 0 if they // are equal, and a positive value if a is greater than b virtual int operator()(const char* prefix_len_key1, const char* prefix_len_key2) const = 0; virtual int operator()(const char* prefix_len_key, const Slice& key) const = 0; virtual ~KeyComparator() {} }; explicit MemTableRep(Allocator* allocator) : allocator_(allocator) {} // Allocate a buf of len size for storing key. The idea is that a // specific memtable representation knows its underlying data structure // better. By allowing it to allocate memory, it can possibly put // correlated stuff in consecutive memory area to make processor // prefetching more efficient. virtual KeyHandle Allocate(const size_t len, char** buf); // Insert key into the collection. (The caller will pack key and value into a // single buffer and pass that in as the parameter to Insert). // REQUIRES: nothing that compares equal to key is currently in the // collection, and no concurrent modifications to the table in progress virtual void Insert(KeyHandle handle) = 0; // Same as ::Insert // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and // the already exists. virtual bool InsertKey(KeyHandle handle) { Insert(handle); return true; } // Same as Insert(), but in additional pass a hint to insert location for // the key. If hint points to nullptr, a new hint will be populated. // otherwise the hint will be updated to reflect the last insert location. // // Currently only skip-list based memtable implement the interface. Other // implementations will fallback to Insert() by default. virtual void InsertWithHint(KeyHandle handle, void** /*hint*/) { // Ignore the hint by default. Insert(handle); } // Same as ::InsertWithHint // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and // the already exists. virtual bool InsertKeyWithHint(KeyHandle handle, void** hint) { InsertWithHint(handle, hint); return true; } // Same as ::InsertWithHint, but allow concurrnet write // // If hint points to nullptr, a new hint will be allocated on heap, otherwise // the hint will be updated to reflect the last insert location. The hint is // owned by the caller and it is the caller's responsibility to delete the // hint later. // // Currently only skip-list based memtable implement the interface. Other // implementations will fallback to InsertConcurrently() by default. virtual void InsertWithHintConcurrently(KeyHandle handle, void** /*hint*/) { // Ignore the hint by default. InsertConcurrently(handle); } // Same as ::InsertWithHintConcurrently // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and // the already exists. virtual bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) { InsertWithHintConcurrently(handle, hint); return true; } // Like Insert(handle), but may be called concurrent with other calls // to InsertConcurrently for other handles. // // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and // the already exists. virtual void InsertConcurrently(KeyHandle handle); // Same as ::InsertConcurrently // Returns false if MemTableRepFactory::CanHandleDuplicatedKey() is true and // the already exists. virtual bool InsertKeyConcurrently(KeyHandle handle) { InsertConcurrently(handle); return true; } // Returns true iff an entry that compares equal to key is in the collection. virtual bool Contains(const char* key) const = 0; // Notify this table rep that it will no longer be added to. By default, // does nothing. After MarkReadOnly() is called, this table rep will // not be written to (ie No more calls to Allocate(), Insert(), // or any writes done directly to entries accessed through the iterator.) virtual void MarkReadOnly() {} // Notify this table rep that it has been flushed to stable storage. // By default, does nothing. // // Invariant: MarkReadOnly() is called, before MarkFlushed(). // Note that this method if overridden, should not run for an extended period // of time. Otherwise, RocksDB may be blocked. virtual void MarkFlushed() {} // Look up key from the mem table, since the first key in the mem table whose // user_key matches the one given k, call the function callback_func(), with // callback_args directly forwarded as the first parameter, and the mem table // key as the second parameter. If the return value is false, then terminates. // Otherwise, go through the next key. // // It's safe for Get() to terminate after having finished all the potential // key for the k.user_key(), or not. // // Default: // Get() function with a default value of dynamically construct an iterator, // seek and call the call back function. virtual void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)); virtual uint64_t ApproximateNumEntries(const Slice& /*start_ikey*/, const Slice& /*end_key*/) { return 0; } // Report an approximation of how much memory has been used other than memory // that was allocated through the allocator. Safe to call from any thread. virtual size_t ApproximateMemoryUsage() = 0; virtual ~MemTableRep() {} // Iteration over the contents of a skip collection class Iterator { public: // Initialize an iterator over the specified collection. // The returned iterator is not valid. // explicit Iterator(const MemTableRep* collection); virtual ~Iterator() {} // Returns true iff the iterator is positioned at a valid node. virtual bool Valid() const = 0; // Returns the key at the current position. // REQUIRES: Valid() virtual const char* key() const = 0; // Advances to the next position. // REQUIRES: Valid() virtual void Next() = 0; // Advances to the previous position. // REQUIRES: Valid() virtual void Prev() = 0; // Advance to the first entry with a key >= target virtual void Seek(const Slice& internal_key, const char* memtable_key) = 0; // retreat to the first entry with a key <= target virtual void SeekForPrev(const Slice& internal_key, const char* memtable_key) = 0; // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. virtual void SeekToFirst() = 0; // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. virtual void SeekToLast() = 0; }; // Return an iterator over the keys in this representation. // arena: If not null, the arena needs to be used to allocate the Iterator. // When destroying the iterator, the caller will not call "delete" // but Iterator::~Iterator() directly. The destructor needs to destroy // all the states but those allocated in arena. virtual Iterator* GetIterator(Arena* arena = nullptr) = 0; // Return an iterator that has a special Seek semantics. The result of // a Seek might only include keys with the same prefix as the target key. // arena: If not null, the arena is used to allocate the Iterator. // When destroying the iterator, the caller will not call "delete" // but Iterator::~Iterator() directly. The destructor needs to destroy // all the states but those allocated in arena. virtual Iterator* GetDynamicPrefixIterator(Arena* arena = nullptr) { return GetIterator(arena); } // Return true if the current MemTableRep supports merge operator. // Default: true virtual bool IsMergeOperatorSupported() const { return true; } // Return true if the current MemTableRep supports snapshot // Default: true virtual bool IsSnapshotSupported() const { return true; } protected: // When *key is an internal key concatenated with the value, returns the // user key. virtual Slice UserKey(const char* key) const; Allocator* allocator_; }; // This is the base class for all factories that are used by RocksDB to create // new MemTableRep objects class MemTableRepFactory { public: virtual ~MemTableRepFactory() {} virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&, Allocator*, const SliceTransform*, Logger* logger) = 0; virtual MemTableRep* CreateMemTableRep( const MemTableRep::KeyComparator& key_cmp, Allocator* allocator, const SliceTransform* slice_transform, Logger* logger, uint32_t /* column_family_id */) { return CreateMemTableRep(key_cmp, allocator, slice_transform, logger); } virtual const char* Name() const = 0; // Return true if the current MemTableRep supports concurrent inserts // Default: false virtual bool IsInsertConcurrentlySupported() const { return false; } // Return true if the current MemTableRep supports detecting duplicate // at insertion time. If true, then MemTableRep::Insert* returns // false when if the already exists. // Default: false virtual bool CanHandleDuplicatedKey() const { return false; } }; // This uses a skip list to store keys. It is the default. // // Parameters: // lookahead: If non-zero, each iterator's seek operation will start the // search from the previously visited record (doing at most 'lookahead' // steps). This is an optimization for the access pattern including many // seeks with consecutive keys. class SkipListFactory : public MemTableRepFactory { public: explicit SkipListFactory(size_t lookahead = 0) : lookahead_(lookahead) {} using MemTableRepFactory::CreateMemTableRep; virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&, Allocator*, const SliceTransform*, Logger* logger) override; virtual const char* Name() const override { return "SkipListFactory"; } bool IsInsertConcurrentlySupported() const override { return true; } bool CanHandleDuplicatedKey() const override { return true; } private: const size_t lookahead_; }; #ifndef ROCKSDB_LITE // This creates MemTableReps that are backed by an std::vector. On iteration, // the vector is sorted. This is useful for workloads where iteration is very // rare and writes are generally not issued after reads begin. // // Parameters: // count: Passed to the constructor of the underlying std::vector of each // VectorRep. On initialization, the underlying array will be at least count // bytes reserved for usage. class VectorRepFactory : public MemTableRepFactory { const size_t count_; public: explicit VectorRepFactory(size_t count = 0) : count_(count) {} using MemTableRepFactory::CreateMemTableRep; virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&, Allocator*, const SliceTransform*, Logger* logger) override; virtual const char* Name() const override { return "VectorRepFactory"; } }; // This class contains a fixed array of buckets, each // pointing to a skiplist (null if the bucket is empty). // bucket_count: number of fixed array buckets // skiplist_height: the max height of the skiplist // skiplist_branching_factor: probabilistic size ratio between adjacent // link lists in the skiplist extern MemTableRepFactory* NewHashSkipListRepFactory( size_t bucket_count = 1000000, int32_t skiplist_height = 4, int32_t skiplist_branching_factor = 4); // The factory is to create memtables based on a hash table: // it contains a fixed array of buckets, each pointing to either a linked list // or a skip list if number of entries inside the bucket exceeds // threshold_use_skiplist. // @bucket_count: number of fixed array buckets // @huge_page_tlb_size: if <=0, allocate the hash table bytes from malloc. // Otherwise from huge page TLB. The user needs to reserve // huge pages for it to be allocated, like: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt // @bucket_entries_logging_threshold: if number of entries in one bucket // exceeds this number, log about it. // @if_log_bucket_dist_when_flash: if true, log distribution of number of // entries when flushing. // @threshold_use_skiplist: a bucket switches to skip list if number of // entries exceed this parameter. extern MemTableRepFactory* NewHashLinkListRepFactory( size_t bucket_count = 50000, size_t huge_page_tlb_size = 0, int bucket_entries_logging_threshold = 4096, bool if_log_bucket_dist_when_flash = true, uint32_t threshold_use_skiplist = 256); #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/merge_operator.h000077500000000000000000000275171370372246700215210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { class Slice; class Logger; // The Merge Operator // // Essentially, a MergeOperator specifies the SEMANTICS of a merge, which only // client knows. It could be numeric addition, list append, string // concatenation, edit data structure, ... , anything. // The library, on the other hand, is concerned with the exercise of this // interface, at the right time (during get, iteration, compaction...) // // To use merge, the client needs to provide an object implementing one of // the following interfaces: // a) AssociativeMergeOperator - for most simple semantics (always take // two values, and merge them into one value, which is then put back // into rocksdb); numeric addition and string concatenation are examples; // // b) MergeOperator - the generic class for all the more abstract / complex // operations; one method (FullMergeV2) to merge a Put/Delete value with a // merge operand; and another method (PartialMerge) that merges multiple // operands together. this is especially useful if your key values have // complex structures but you would still like to support client-specific // incremental updates. // // AssociativeMergeOperator is simpler to implement. MergeOperator is simply // more powerful. // // Refer to rocksdb-merge wiki for more details and example implementations. // class MergeOperator { public: virtual ~MergeOperator() {} static const char* Type() { return "MergeOperator"; } // Gives the client a way to express the read -> modify -> write semantics // key: (IN) The key that's associated with this merge operation. // Client could multiplex the merge operator based on it // if the key space is partitioned and different subspaces // refer to different types of data which have different // merge operation semantics // existing: (IN) null indicates that the key does not exist before this op // operand_list:(IN) the sequence of merge operations to apply, front() first. // new_value:(OUT) Client is responsible for filling the merge result here. // The string that new_value is pointing to will be empty. // logger: (IN) Client could use this to log errors during merge. // // Return true on success. // All values passed in will be client-specific values. So if this method // returns false, it is because client specified bad data or there was // internal corruption. This will be treated as an error by the library. // // Also make use of the *logger for error messages. virtual bool FullMerge(const Slice& /*key*/, const Slice* /*existing_value*/, const std::deque& /*operand_list*/, std::string* /*new_value*/, Logger* /*logger*/) const { // deprecated, please use FullMergeV2() assert(false); return false; } struct MergeOperationInput { explicit MergeOperationInput(const Slice& _key, const Slice* _existing_value, const std::vector& _operand_list, Logger* _logger) : key(_key), existing_value(_existing_value), operand_list(_operand_list), logger(_logger) {} // The key associated with the merge operation. const Slice& key; // The existing value of the current key, nullptr means that the // value doesn't exist. const Slice* existing_value; // A list of operands to apply. const std::vector& operand_list; // Logger could be used by client to log any errors that happen during // the merge operation. Logger* logger; }; struct MergeOperationOutput { explicit MergeOperationOutput(std::string& _new_value, Slice& _existing_operand) : new_value(_new_value), existing_operand(_existing_operand) {} // Client is responsible for filling the merge result here. std::string& new_value; // If the merge result is one of the existing operands (or existing_value), // client can set this field to the operand (or existing_value) instead of // using new_value. Slice& existing_operand; }; // This function applies a stack of merge operands in chrionological order // on top of an existing value. There are two ways in which this method is // being used: // a) During Get() operation, it used to calculate the final value of a key // b) During compaction, in order to collapse some operands with the based // value. // // Note: The name of the method is somewhat misleading, as both in the cases // of Get() or compaction it may be called on a subset of operands: // K: 0 +1 +2 +7 +4 +5 2 +1 +2 // ^ // | // snapshot // In the example above, Get(K) operation will call FullMerge with a base // value of 2 and operands [+1, +2]. Compaction process might decide to // collapse the beginning of the history up to the snapshot by performing // full Merge with base value of 0 and operands [+1, +2, +7, +4]. virtual bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const; // This function performs merge(left_op, right_op) // when both the operands are themselves merge operation types // that you would have passed to a DB::Merge() call in the same order // (i.e.: DB::Merge(key,left_op), followed by DB::Merge(key,right_op)). // // PartialMerge should combine them into a single merge operation that is // saved into *new_value, and then it should return true. // *new_value should be constructed such that a call to // DB::Merge(key, *new_value) would yield the same result as a call // to DB::Merge(key, left_op) followed by DB::Merge(key, right_op). // // The string that new_value is pointing to will be empty. // // The default implementation of PartialMergeMulti will use this function // as a helper, for backward compatibility. Any successor class of // MergeOperator should either implement PartialMerge or PartialMergeMulti, // although implementing PartialMergeMulti is suggested as it is in general // more effective to merge multiple operands at a time instead of two // operands at a time. // // If it is impossible or infeasible to combine the two operations, // leave new_value unchanged and return false. The library will // internally keep track of the operations, and apply them in the // correct order once a base-value (a Put/Delete/End-of-Database) is seen. // // TODO: Presently there is no way to differentiate between error/corruption // and simply "return false". For now, the client should simply return // false in any case it cannot perform partial-merge, regardless of reason. // If there is corruption in the data, handle it in the FullMergeV2() function // and return false there. The default implementation of PartialMerge will // always return false. virtual bool PartialMerge(const Slice& /*key*/, const Slice& /*left_operand*/, const Slice& /*right_operand*/, std::string* /*new_value*/, Logger* /*logger*/) const { return false; } // This function performs merge when all the operands are themselves merge // operation types that you would have passed to a DB::Merge() call in the // same order (front() first) // (i.e. DB::Merge(key, operand_list[0]), followed by // DB::Merge(key, operand_list[1]), ...) // // PartialMergeMulti should combine them into a single merge operation that is // saved into *new_value, and then it should return true. *new_value should // be constructed such that a call to DB::Merge(key, *new_value) would yield // the same result as subquential individual calls to DB::Merge(key, operand) // for each operand in operand_list from front() to back(). // // The string that new_value is pointing to will be empty. // // The PartialMergeMulti function will be called when there are at least two // operands. // // In the default implementation, PartialMergeMulti will invoke PartialMerge // multiple times, where each time it only merges two operands. Developers // should either implement PartialMergeMulti, or implement PartialMerge which // is served as the helper function of the default PartialMergeMulti. virtual bool PartialMergeMulti(const Slice& key, const std::deque& operand_list, std::string* new_value, Logger* logger) const; // The name of the MergeOperator. Used to check for MergeOperator // mismatches (i.e., a DB created with one MergeOperator is // accessed using a different MergeOperator) // TODO: the name is currently not stored persistently and thus // no checking is enforced. Client is responsible for providing // consistent MergeOperator between DB opens. virtual const char* Name() const = 0; // Determines whether the PartialMerge can be called with just a single // merge operand. // Override and return true for allowing a single operand. PartialMerge // and PartialMergeMulti should be overridden and implemented // correctly to properly handle a single operand. virtual bool AllowSingleOperand() const { return false; } // Allows to control when to invoke a full merge during Get. // This could be used to limit the number of merge operands that are looked at // during a point lookup, thereby helping in limiting the number of levels to // read from. // Doesn't help with iterators. // // Note: the merge operands are passed to this function in the reversed order // relative to how they were merged (passed to FullMerge or FullMergeV2) // for performance reasons, see also: // https://github.com/facebook/rocksdb/issues/3865 virtual bool ShouldMerge(const std::vector& /*operands*/) const { return false; } }; // The simpler, associative merge operator. class AssociativeMergeOperator : public MergeOperator { public: ~AssociativeMergeOperator() override {} // Gives the client a way to express the read -> modify -> write semantics // key: (IN) The key that's associated with this merge operation. // existing_value:(IN) null indicates the key does not exist before this op // value: (IN) the value to update/merge the existing_value with // new_value: (OUT) Client is responsible for filling the merge result // here. The string that new_value is pointing to will be empty. // logger: (IN) Client could use this to log errors during merge. // // Return true on success. // All values passed in will be client-specific values. So if this method // returns false, it is because client specified bad data or there was // internal corruption. The client should assume that this will be treated // as an error by the library. virtual bool Merge(const Slice& key, const Slice* existing_value, const Slice& value, std::string* new_value, Logger* logger) const = 0; private: // Default implementations of the MergeOperator functions bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override; bool PartialMerge(const Slice& key, const Slice& left_operand, const Slice& right_operand, std::string* new_value, Logger* logger) const override; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/metadata.h000066400000000000000000000132751370372246700202600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { struct ColumnFamilyMetaData; struct LevelMetaData; struct SstFileMetaData; // The metadata that describes a column family. struct ColumnFamilyMetaData { ColumnFamilyMetaData() : size(0), file_count(0), name("") {} ColumnFamilyMetaData(const std::string& _name, uint64_t _size, const std::vector&& _levels) : size(_size), name(_name), levels(_levels) {} // The size of this column family in bytes, which is equal to the sum of // the file size of its "levels". uint64_t size; // The number of files in this column family. size_t file_count; // The name of the column family. std::string name; // The metadata of all levels in this column family. std::vector levels; }; // The metadata that describes a level. struct LevelMetaData { LevelMetaData(int _level, uint64_t _size, const std::vector&& _files) : level(_level), size(_size), files(_files) {} // The level which this meta data describes. const int level; // The size of this level in bytes, which is equal to the sum of // the file size of its "files". const uint64_t size; // The metadata of all sst files in this level. const std::vector files; }; // The metadata that describes a SST file. struct SstFileMetaData { SstFileMetaData() : size(0), file_number(0), smallest_seqno(0), largest_seqno(0), num_reads_sampled(0), being_compacted(false), num_entries(0), num_deletions(0), oldest_blob_file_number(0) {} SstFileMetaData(const std::string& _file_name, uint64_t _file_number, const std::string& _path, size_t _size, SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno, const std::string& _smallestkey, const std::string& _largestkey, uint64_t _num_reads_sampled, bool _being_compacted, uint64_t _oldest_blob_file_number, uint64_t _oldest_ancester_time, uint64_t _file_creation_time, std::string& _file_checksum, std::string& _file_checksum_func_name) : size(_size), name(_file_name), file_number(_file_number), db_path(_path), smallest_seqno(_smallest_seqno), largest_seqno(_largest_seqno), smallestkey(_smallestkey), largestkey(_largestkey), num_reads_sampled(_num_reads_sampled), being_compacted(_being_compacted), num_entries(0), num_deletions(0), oldest_blob_file_number(_oldest_blob_file_number), oldest_ancester_time(_oldest_ancester_time), file_creation_time(_file_creation_time), file_checksum(_file_checksum), file_checksum_func_name(_file_checksum_func_name) {} // File size in bytes. size_t size; // The name of the file. std::string name; // The id of the file. uint64_t file_number; // The full path where the file locates. std::string db_path; SequenceNumber smallest_seqno; // Smallest sequence number in file. SequenceNumber largest_seqno; // Largest sequence number in file. std::string smallestkey; // Smallest user defined key in the file. std::string largestkey; // Largest user defined key in the file. uint64_t num_reads_sampled; // How many times the file is read. bool being_compacted; // true if the file is currently being compacted. uint64_t num_entries; uint64_t num_deletions; uint64_t oldest_blob_file_number; // The id of the oldest blob file // referenced by the file. // An SST file may be generated by compactions whose input files may // in turn be generated by earlier compactions. The creation time of the // oldest SST file that is the compaction ancester of this file. // The timestamp is provided Env::GetCurrentTime(). // 0 if the information is not available. uint64_t oldest_ancester_time; // Timestamp when the SST file is created, provided by Env::GetCurrentTime(). // 0 if the information is not available. uint64_t file_creation_time; // The checksum of a SST file, the value is decided by the file content and // the checksum algorithm used for this SST file. The checksum function is // identified by the file_checksum_func_name. If the checksum function is // not specified, file_checksum is "0" by default. std::string file_checksum; // The name of the checksum function used to generate the file checksum // value. If file checksum is not enabled (e.g., sst_file_checksum_func is // null), file_checksum_func_name is UnknownFileChecksumFuncName, which is // "Unknown". std::string file_checksum_func_name; }; // The full set of metadata associated with each SST file. struct LiveFileMetaData : SstFileMetaData { std::string column_family_name; // Name of the column family int level; // Level at which this file resides. LiveFileMetaData() : column_family_name(), level(0) {} }; // Metadata returned as output from ExportColumnFamily() and used as input to // CreateColumnFamiliesWithImport(). struct ExportImportFilesMetaData { std::string db_comparator_name; // Used to safety check at import. std::vector files; // Vector of file metadata. }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/options.h000066400000000000000000002152471370372246700201760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include #include #include "rocksdb/advanced_options.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/file_checksum.h" #include "rocksdb/listener.h" #include "rocksdb/universal_compaction.h" #include "rocksdb/version.h" #include "rocksdb/write_buffer_manager.h" #ifdef max #undef max #endif namespace ROCKSDB_NAMESPACE { class Cache; class CompactionFilter; class CompactionFilterFactory; class Comparator; class ConcurrentTaskLimiter; class Env; enum InfoLogLevel : unsigned char; class SstFileManager; class FilterPolicy; class Logger; class MergeOperator; class Snapshot; class MemTableRepFactory; class RateLimiter; class Slice; class Statistics; class InternalKeyComparator; class WalFilter; class FileSystem; enum class CpuPriority { kIdle = 0, kLow = 1, kNormal = 2, kHigh = 3, }; // DB contents are stored in a set of blocks, each of which holds a // sequence of key,value pairs. Each block may be compressed before // being stored in a file. The following enum describes which // compression method (if any) is used to compress a block. enum CompressionType : unsigned char { // NOTE: do not change the values of existing entries, as these are // part of the persistent format on disk. kNoCompression = 0x0, kSnappyCompression = 0x1, kZlibCompression = 0x2, kBZip2Compression = 0x3, kLZ4Compression = 0x4, kLZ4HCCompression = 0x5, kXpressCompression = 0x6, kZSTD = 0x7, // Only use kZSTDNotFinalCompression if you have to use ZSTD lib older than // 0.8.0 or consider a possibility of downgrading the service or copying // the database files to another service running with an older version of // RocksDB that doesn't have kZSTD. Otherwise, you should use kZSTD. We will // eventually remove the option from the public API. kZSTDNotFinalCompression = 0x40, // kDisableCompressionOption is used to disable some compression options. kDisableCompressionOption = 0xff, }; struct Options; struct DbPath; struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions { // The function recovers options to a previous version. Only 4.6 or later // versions are supported. ColumnFamilyOptions* OldDefaults(int rocksdb_major_version = 4, int rocksdb_minor_version = 6); // Some functions that make it easier to optimize RocksDB // Use this if your DB is very small (like under 1GB) and you don't want to // spend lots of memory for memtables. // An optional cache object is passed in to be used as the block cache ColumnFamilyOptions* OptimizeForSmallDb( std::shared_ptr* cache = nullptr); // Use this if you don't need to keep the data sorted, i.e. you'll never use // an iterator, only Put() and Get() API calls // // Not supported in ROCKSDB_LITE ColumnFamilyOptions* OptimizeForPointLookup(uint64_t block_cache_size_mb); // Default values for some parameters in ColumnFamilyOptions are not // optimized for heavy workloads and big datasets, which means you might // observe write stalls under some conditions. As a starting point for tuning // RocksDB options, use the following two functions: // * OptimizeLevelStyleCompaction -- optimizes level style compaction // * OptimizeUniversalStyleCompaction -- optimizes universal style compaction // Universal style compaction is focused on reducing Write Amplification // Factor for big data sets, but increases Space Amplification. You can learn // more about the different styles here: // https://github.com/facebook/rocksdb/wiki/Rocksdb-Architecture-Guide // Make sure to also call IncreaseParallelism(), which will provide the // biggest performance gains. // Note: we might use more memory than memtable_memory_budget during high // write rate period // // OptimizeUniversalStyleCompaction is not supported in ROCKSDB_LITE ColumnFamilyOptions* OptimizeLevelStyleCompaction( uint64_t memtable_memory_budget = 512 * 1024 * 1024); ColumnFamilyOptions* OptimizeUniversalStyleCompaction( uint64_t memtable_memory_budget = 512 * 1024 * 1024); // ------------------- // Parameters that affect behavior // Comparator used to define the order of keys in the table. // Default: a comparator that uses lexicographic byte-wise ordering // // REQUIRES: The client must ensure that the comparator supplied // here has the same name and orders keys *exactly* the same as the // comparator provided to previous open calls on the same DB. const Comparator* comparator = BytewiseComparator(); // REQUIRES: The client must provide a merge operator if Merge operation // needs to be accessed. Calling Merge on a DB without a merge operator // would result in Status::NotSupported. The client must ensure that the // merge operator supplied here has the same name and *exactly* the same // semantics as the merge operator provided to previous open calls on // the same DB. The only exception is reserved for upgrade, where a DB // previously without a merge operator is introduced to Merge operation // for the first time. It's necessary to specify a merge operator when // opening the DB in this case. // Default: nullptr std::shared_ptr merge_operator = nullptr; // A single CompactionFilter instance to call into during compaction. // Allows an application to modify/delete a key-value during background // compaction. // // If the client requires a new compaction filter to be used for different // compaction runs, it can specify compaction_filter_factory instead of this // option. The client should specify only one of the two. // compaction_filter takes precedence over compaction_filter_factory if // client specifies both. // // If multithreaded compaction is being used, the supplied CompactionFilter // instance may be used from different threads concurrently and so should be // thread-safe. // // Default: nullptr const CompactionFilter* compaction_filter = nullptr; // This is a factory that provides compaction filter objects which allow // an application to modify/delete a key-value during background compaction. // // A new filter will be created on each compaction run. If multithreaded // compaction is being used, each created CompactionFilter will only be used // from a single thread and so does not need to be thread-safe. // // Default: nullptr std::shared_ptr compaction_filter_factory = nullptr; // ------------------- // Parameters that affect performance // Amount of data to build up in memory (backed by an unsorted log // on disk) before converting to a sorted on-disk file. // // Larger values increase performance, especially during bulk loads. // Up to max_write_buffer_number write buffers may be held in memory // at the same time, // so you may wish to adjust this parameter to control memory usage. // Also, a larger write buffer will result in a longer recovery time // the next time the database is opened. // // Note that write_buffer_size is enforced per column family. // See db_write_buffer_size for sharing memory across column families. // // Default: 64MB // // Dynamically changeable through SetOptions() API size_t write_buffer_size = 64 << 20; // Compress blocks using the specified compression algorithm. // // Default: kSnappyCompression, if it's supported. If snappy is not linked // with the library, the default is kNoCompression. // // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz: // ~200-500MB/s compression // ~400-800MB/s decompression // // Note that these speeds are significantly faster than most // persistent storage speeds, and therefore it is typically never // worth switching to kNoCompression. Even if the input data is // incompressible, the kSnappyCompression implementation will // efficiently detect that and will switch to uncompressed mode. // // If you do not set `compression_opts.level`, or set it to // `CompressionOptions::kDefaultCompressionLevel`, we will attempt to pick the // default corresponding to `compression` as follows: // // - kZSTD: 3 // - kZlibCompression: Z_DEFAULT_COMPRESSION (currently -1) // - kLZ4HCCompression: 0 // - For all others, we do not specify a compression level // // Dynamically changeable through SetOptions() API CompressionType compression; // Compression algorithm that will be used for the bottommost level that // contain files. // // Default: kDisableCompressionOption (Disabled) CompressionType bottommost_compression = kDisableCompressionOption; // different options for compression algorithms used by bottommost_compression // if it is enabled. To enable it, please see the definition of // CompressionOptions. CompressionOptions bottommost_compression_opts; // different options for compression algorithms CompressionOptions compression_opts; // Number of files to trigger level-0 compaction. A value <0 means that // level-0 compaction will not be triggered by number of files at all. // // Default: 4 // // Dynamically changeable through SetOptions() API int level0_file_num_compaction_trigger = 4; // If non-nullptr, use the specified function to determine the // prefixes for keys. These prefixes will be placed in the filter. // Depending on the workload, this can reduce the number of read-IOP // cost for scans when a prefix is passed via ReadOptions to // db.NewIterator(). For prefix filtering to work properly, // "prefix_extractor" and "comparator" must be such that the following // properties hold: // // 1) key.starts_with(prefix(key)) // 2) Compare(prefix(key), key) <= 0. // 3) If Compare(k1, k2) <= 0, then Compare(prefix(k1), prefix(k2)) <= 0 // 4) prefix(prefix(key)) == prefix(key) // // Default: nullptr std::shared_ptr prefix_extractor = nullptr; // Control maximum total data size for a level. // max_bytes_for_level_base is the max total for level-1. // Maximum number of bytes for level L can be calculated as // (max_bytes_for_level_base) * (max_bytes_for_level_multiplier ^ (L-1)) // For example, if max_bytes_for_level_base is 200MB, and if // max_bytes_for_level_multiplier is 10, total data size for level-1 // will be 200MB, total file size for level-2 will be 2GB, // and total file size for level-3 will be 20GB. // // Default: 256MB. // // Dynamically changeable through SetOptions() API uint64_t max_bytes_for_level_base = 256 * 1048576; // Deprecated. uint64_t snap_refresh_nanos = 0; // Disable automatic compactions. Manual compactions can still // be issued on this column family // // Dynamically changeable through SetOptions() API bool disable_auto_compactions = false; // This is a factory that provides TableFactory objects. // Default: a block-based table factory that provides a default // implementation of TableBuilder and TableReader with default // BlockBasedTableOptions. std::shared_ptr table_factory; // A list of paths where SST files for this column family // can be put into, with its target size. Similar to db_paths, // newer data is placed into paths specified earlier in the // vector while older data gradually moves to paths specified // later in the vector. // Note that, if a path is supplied to multiple column // families, it would have files and total size from all // the column families combined. User should provision for the // total size(from all the column families) in such cases. // // If left empty, db_paths will be used. // Default: empty std::vector cf_paths; // Compaction concurrent thread limiter for the column family. // If non-nullptr, use given concurrent thread limiter to control // the max outstanding compaction tasks. Limiter can be shared with // multiple column families across db instances. // // Default: nullptr std::shared_ptr compaction_thread_limiter = nullptr; // Create ColumnFamilyOptions with default values for all fields ColumnFamilyOptions(); // Create ColumnFamilyOptions from Options explicit ColumnFamilyOptions(const Options& options); void Dump(Logger* log) const; }; enum class WALRecoveryMode : char { // Original levelDB recovery // We tolerate incomplete record in trailing data on all logs // Use case : This is legacy behavior kTolerateCorruptedTailRecords = 0x00, // Recover from clean shutdown // We don't expect to find any corruption in the WAL // Use case : This is ideal for unit tests and rare applications that // can require high consistency guarantee kAbsoluteConsistency = 0x01, // Recover to point-in-time consistency (default) // We stop the WAL playback on discovering WAL inconsistency // Use case : Ideal for systems that have disk controller cache like // hard disk, SSD without super capacitor that store related data kPointInTimeRecovery = 0x02, // Recovery after a disaster // We ignore any corruption in the WAL and try to salvage as much data as // possible // Use case : Ideal for last ditch effort to recover data or systems that // operate with low grade unrelated data kSkipAnyCorruptedRecords = 0x03, }; struct DbPath { std::string path; uint64_t target_size; // Target size of total files under the path, in byte. DbPath() : target_size(0) {} DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {} }; struct DBOptions { // The function recovers options to the option as in version 4.6. DBOptions* OldDefaults(int rocksdb_major_version = 4, int rocksdb_minor_version = 6); // Some functions that make it easier to optimize RocksDB // Use this if your DB is very small (like under 1GB) and you don't want to // spend lots of memory for memtables. // An optional cache object is passed in for the memory of the // memtable to cost to DBOptions* OptimizeForSmallDb(std::shared_ptr* cache = nullptr); #ifndef ROCKSDB_LITE // By default, RocksDB uses only one background thread for flush and // compaction. Calling this function will set it up such that total of // `total_threads` is used. Good value for `total_threads` is the number of // cores. You almost definitely want to call this function if your system is // bottlenecked by RocksDB. DBOptions* IncreaseParallelism(int total_threads = 16); #endif // ROCKSDB_LITE // If true, the database will be created if it is missing. // Default: false bool create_if_missing = false; // If true, missing column families will be automatically created. // Default: false bool create_missing_column_families = false; // If true, an error is raised if the database already exists. // Default: false bool error_if_exists = false; // If true, RocksDB will aggressively check consistency of the data. // Also, if any of the writes to the database fails (Put, Delete, Merge, // Write), the database will switch to read-only mode and fail all other // Write operations. // In most cases you want this to be set to true. // Default: true bool paranoid_checks = true; // Use the specified object to interact with the environment, // e.g. to read/write files, schedule background work, etc. In the near // future, support for doing storage operations such as read/write files // through env will be deprecated in favor of file_system (see below) // Default: Env::Default() Env* env = Env::Default(); // Use to control write rate of flush and compaction. Flush has higher // priority than compaction. Rate limiting is disabled if nullptr. // If rate limiter is enabled, bytes_per_sync is set to 1MB by default. // Default: nullptr std::shared_ptr rate_limiter = nullptr; // Use to track SST files and control their file deletion rate. // // Features: // - Throttle the deletion rate of the SST files. // - Keep track the total size of all SST files. // - Set a maximum allowed space limit for SST files that when reached // the DB wont do any further flushes or compactions and will set the // background error. // - Can be shared between multiple dbs. // Limitations: // - Only track and throttle deletes of SST files in // first db_path (db_name if db_paths is empty). // // Default: nullptr std::shared_ptr sst_file_manager = nullptr; // Any internal progress/error information generated by the db will // be written to info_log if it is non-nullptr, or to a file stored // in the same directory as the DB contents if info_log is nullptr. // Default: nullptr std::shared_ptr info_log = nullptr; #ifdef NDEBUG InfoLogLevel info_log_level = INFO_LEVEL; #else InfoLogLevel info_log_level = DEBUG_LEVEL; #endif // NDEBUG // Number of open files that can be used by the DB. You may need to // increase this if your database has a large working set. Value -1 means // files opened are always kept open. You can estimate number of files based // on target_file_size_base and target_file_size_multiplier for level-based // compaction. For universal-style compaction, you can usually set it to -1. // // Default: -1 // // Dynamically changeable through SetDBOptions() API. int max_open_files = -1; // If max_open_files is -1, DB will open all files on DB::Open(). You can // use this option to increase the number of threads used to open the files. // Default: 16 int max_file_opening_threads = 16; // Once write-ahead logs exceed this size, we will start forcing the flush of // column families whose memtables are backed by the oldest live WAL file // (i.e. the ones that are causing all the space amplification). If set to 0 // (default), we will dynamically choose the WAL size limit to be // [sum of all write_buffer_size * max_write_buffer_number] * 4 // This option takes effect only when there are more than one column family as // otherwise the wal size is dictated by the write_buffer_size. // // Default: 0 // // Dynamically changeable through SetDBOptions() API. uint64_t max_total_wal_size = 0; // If non-null, then we should collect metrics about database operations std::shared_ptr statistics = nullptr; // By default, writes to stable storage use fdatasync (on platforms // where this function is available). If this option is true, // fsync is used instead. // // fsync and fdatasync are equally safe for our purposes and fdatasync is // faster, so it is rarely necessary to set this option. It is provided // as a workaround for kernel/filesystem bugs, such as one that affected // fdatasync with ext4 in kernel versions prior to 3.7. bool use_fsync = false; // A list of paths where SST files can be put into, with its target size. // Newer data is placed into paths specified earlier in the vector while // older data gradually moves to paths specified later in the vector. // // For example, you have a flash device with 10GB allocated for the DB, // as well as a hard drive of 2TB, you should config it to be: // [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] // // The system will try to guarantee data under each path is close to but // not larger than the target size. But current and future file sizes used // by determining where to place a file are based on best-effort estimation, // which means there is a chance that the actual size under the directory // is slightly more than target size under some workloads. User should give // some buffer room for those cases. // // If none of the paths has sufficient room to place a file, the file will // be placed to the last path anyway, despite to the target size. // // Placing newer data to earlier paths is also best-efforts. User should // expect user files to be placed in higher levels in some extreme cases. // // If left empty, only one path will be used, which is db_name passed when // opening the DB. // Default: empty std::vector db_paths; // This specifies the info LOG dir. // If it is empty, the log files will be in the same dir as data. // If it is non empty, the log files will be in the specified dir, // and the db data dir's absolute path will be used as the log file // name's prefix. std::string db_log_dir = ""; // This specifies the absolute dir path for write-ahead logs (WAL). // If it is empty, the log files will be in the same dir as data, // dbname is used as the data dir by default // If it is non empty, the log files will be in kept the specified dir. // When destroying the db, // all log files in wal_dir and the dir itself is deleted std::string wal_dir = ""; // The periodicity when obsolete files get deleted. The default // value is 6 hours. The files that get out of scope by compaction // process will still get automatically delete on every compaction, // regardless of this setting // // Default: 6 hours // // Dynamically changeable through SetDBOptions() API. uint64_t delete_obsolete_files_period_micros = 6ULL * 60 * 60 * 1000000; // Maximum number of concurrent background jobs (compactions and flushes). // // Default: 2 // // Dynamically changeable through SetDBOptions() API. int max_background_jobs = 2; // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the // value of max_background_jobs. This option is ignored. // // Dynamically changeable through SetDBOptions() API. int base_background_compactions = -1; // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the // value of max_background_jobs. For backwards compatibility we will set // `max_background_jobs = max_background_compactions + max_background_flushes` // in the case where user sets at least one of `max_background_compactions` or // `max_background_flushes` (we replace -1 by 1 in case one option is unset). // // Maximum number of concurrent background compaction jobs, submitted to // the default LOW priority thread pool. // // If you're increasing this, also consider increasing number of threads in // LOW priority thread pool. For more information, see // Env::SetBackgroundThreads // // Default: -1 // // Dynamically changeable through SetDBOptions() API. int max_background_compactions = -1; // This value represents the maximum number of threads that will // concurrently perform a compaction job by breaking it into multiple, // smaller ones that are run simultaneously. // Default: 1 (i.e. no subcompactions) uint32_t max_subcompactions = 1; // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the // value of max_background_jobs. For backwards compatibility we will set // `max_background_jobs = max_background_compactions + max_background_flushes` // in the case where user sets at least one of `max_background_compactions` or // `max_background_flushes`. // // Maximum number of concurrent background memtable flush jobs, submitted by // default to the HIGH priority thread pool. If the HIGH priority thread pool // is configured to have zero threads, flush jobs will share the LOW priority // thread pool with compaction jobs. // // It is important to use both thread pools when the same Env is shared by // multiple db instances. Without a separate pool, long running compaction // jobs could potentially block memtable flush jobs of other db instances, // leading to unnecessary Put stalls. // // If you're increasing this, also consider increasing number of threads in // HIGH priority thread pool. For more information, see // Env::SetBackgroundThreads // Default: -1 int max_background_flushes = -1; // Specify the maximal size of the info log file. If the log file // is larger than `max_log_file_size`, a new info log file will // be created. // If max_log_file_size == 0, all logs will be written to one // log file. size_t max_log_file_size = 0; // Time for the info log file to roll (in seconds). // If specified with non-zero value, log file will be rolled // if it has been active longer than `log_file_time_to_roll`. // Default: 0 (disabled) // Not supported in ROCKSDB_LITE mode! size_t log_file_time_to_roll = 0; // Maximal info log files to be kept. // Default: 1000 size_t keep_log_file_num = 1000; // Recycle log files. // If non-zero, we will reuse previously written log files for new // logs, overwriting the old data. The value indicates how many // such files we will keep around at any point in time for later // use. This is more efficient because the blocks are already // allocated and fdatasync does not need to update the inode after // each write. // Default: 0 size_t recycle_log_file_num = 0; // manifest file is rolled over on reaching this limit. // The older manifest file be deleted. // The default value is 1GB so that the manifest file can grow, but not // reach the limit of storage capacity. uint64_t max_manifest_file_size = 1024 * 1024 * 1024; // Number of shards used for table cache. int table_cache_numshardbits = 6; // NOT SUPPORTED ANYMORE // int table_cache_remove_scan_count_limit; // The following two fields affect how archived logs will be deleted. // 1. If both set to 0, logs will be deleted asap and will not get into // the archive. // 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, // WAL files will be checked every 10 min and if total size is greater // then WAL_size_limit_MB, they will be deleted starting with the // earliest until size_limit is met. All empty files will be deleted. // 3. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then // WAL files will be checked every WAL_ttl_seconds / 2 and those that // are older than WAL_ttl_seconds will be deleted. // 4. If both are not 0, WAL files will be checked every 10 min and both // checks will be performed with ttl being first. uint64_t WAL_ttl_seconds = 0; uint64_t WAL_size_limit_MB = 0; // Number of bytes to preallocate (via fallocate) the manifest // files. Default is 4mb, which is reasonable to reduce random IO // as well as prevent overallocation for mounts that preallocate // large amounts of data (such as xfs's allocsize option). size_t manifest_preallocation_size = 4 * 1024 * 1024; // Allow the OS to mmap file for reading sst tables. Default: false bool allow_mmap_reads = false; // Allow the OS to mmap file for writing. // DB::SyncWAL() only works if this is set to false. // Default: false bool allow_mmap_writes = false; // Enable direct I/O mode for read/write // they may or may not improve performance depending on the use case // // Files will be opened in "direct I/O" mode // which means that data r/w from the disk will not be cached or // buffered. The hardware buffer of the devices may however still // be used. Memory mapped files are not impacted by these parameters. // Use O_DIRECT for user and compaction reads. // When true, we also force new_table_reader_for_compaction_inputs to true. // Default: false // Not supported in ROCKSDB_LITE mode! bool use_direct_reads = false; // Use O_DIRECT for writes in background flush and compactions. // Default: false // Not supported in ROCKSDB_LITE mode! bool use_direct_io_for_flush_and_compaction = false; // If false, fallocate() calls are bypassed bool allow_fallocate = true; // Disable child process inherit open files. Default: true bool is_fd_close_on_exec = true; // NOT SUPPORTED ANYMORE -- this options is no longer used bool skip_log_error_on_recovery = false; // if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec // // Default: 600 (10 min) // // Dynamically changeable through SetDBOptions() API. unsigned int stats_dump_period_sec = 600; // if not zero, dump rocksdb.stats to RocksDB every stats_persist_period_sec // Default: 600 unsigned int stats_persist_period_sec = 600; // If true, automatically persist stats to a hidden column family (column // family name: ___rocksdb_stats_history___) every // stats_persist_period_sec seconds; otherwise, write to an in-memory // struct. User can query through `GetStatsHistory` API. // If user attempts to create a column family with the same name on a DB // which have previously set persist_stats_to_disk to true, the column family // creation will fail, but the hidden column family will survive, as well as // the previously persisted statistics. // When peristing stats to disk, the stat name will be limited at 100 bytes. // Default: false bool persist_stats_to_disk = false; // if not zero, periodically take stats snapshots and store in memory, the // memory size for stats snapshots is capped at stats_history_buffer_size // Default: 1MB size_t stats_history_buffer_size = 1024 * 1024; // If set true, will hint the underlying file system that the file // access pattern is random, when a sst file is opened. // Default: true bool advise_random_on_open = true; // Amount of data to build up in memtables across all column // families before writing to disk. // // This is distinct from write_buffer_size, which enforces a limit // for a single memtable. // // This feature is disabled by default. Specify a non-zero value // to enable it. // // Default: 0 (disabled) size_t db_write_buffer_size = 0; // The memory usage of memtable will report to this object. The same object // can be passed into multiple DBs and it will track the sum of size of all // the DBs. If the total size of all live memtables of all the DBs exceeds // a limit, a flush will be triggered in the next DB to which the next write // is issued. // // If the object is only passed to one DB, the behavior is the same as // db_write_buffer_size. When write_buffer_manager is set, the value set will // override db_write_buffer_size. // // This feature is disabled by default. Specify a non-zero value // to enable it. // // Default: null std::shared_ptr write_buffer_manager = nullptr; // Specify the file access pattern once a compaction is started. // It will be applied to all input files of a compaction. // Default: NORMAL enum AccessHint { NONE, NORMAL, SEQUENTIAL, WILLNEED }; AccessHint access_hint_on_compaction_start = NORMAL; // If true, always create a new file descriptor and new table reader // for compaction inputs. Turn this parameter on may introduce extra // memory usage in the table reader, if it allocates extra memory // for indexes. This will allow file descriptor prefetch options // to be set for compaction input files and not to impact file // descriptors for the same file used by user queries. // Suggest to enable BlockBasedTableOptions.cache_index_and_filter_blocks // for this mode if using block-based table. // // Default: false // This flag has no affect on the behavior of compaction and plan to delete // in the future. bool new_table_reader_for_compaction_inputs = false; // If non-zero, we perform bigger reads when doing compaction. If you're // running RocksDB on spinning disks, you should set this to at least 2MB. // That way RocksDB's compaction is doing sequential instead of random reads. // // When non-zero, we also force new_table_reader_for_compaction_inputs to // true. // // Default: 0 // // Dynamically changeable through SetDBOptions() API. size_t compaction_readahead_size = 0; // This is a maximum buffer size that is used by WinMmapReadableFile in // unbuffered disk I/O mode. We need to maintain an aligned buffer for // reads. We allow the buffer to grow until the specified value and then // for bigger requests allocate one shot buffers. In unbuffered mode we // always bypass read-ahead buffer at ReadaheadRandomAccessFile // When read-ahead is required we then make use of compaction_readahead_size // value and always try to read ahead. With read-ahead we always // pre-allocate buffer to the size instead of growing it up to a limit. // // This option is currently honored only on Windows // // Default: 1 Mb // // Special value: 0 - means do not maintain per instance buffer. Allocate // per request buffer and avoid locking. size_t random_access_max_buffer_size = 1024 * 1024; // This is the maximum buffer size that is used by WritableFileWriter. // On Windows, we need to maintain an aligned buffer for writes. // We allow the buffer to grow until it's size hits the limit in buffered // IO and fix the buffer size when using direct IO to ensure alignment of // write requests if the logical sector size is unusual // // Default: 1024 * 1024 (1 MB) // // Dynamically changeable through SetDBOptions() API. size_t writable_file_max_buffer_size = 1024 * 1024; // Use adaptive mutex, which spins in the user space before resorting // to kernel. This could reduce context switch when the mutex is not // heavily contended. However, if the mutex is hot, we could end up // wasting spin time. // Default: false bool use_adaptive_mutex = false; // Create DBOptions with default values for all fields DBOptions(); // Create DBOptions from Options explicit DBOptions(const Options& options); void Dump(Logger* log) const; // Allows OS to incrementally sync files to disk while they are being // written, asynchronously, in the background. This operation can be used // to smooth out write I/Os over time. Users shouldn't rely on it for // persistency guarantee. // Issue one request for every bytes_per_sync written. 0 turns it off. // // You may consider using rate_limiter to regulate write rate to device. // When rate limiter is enabled, it automatically enables bytes_per_sync // to 1MB. // // This option applies to table files // // Default: 0, turned off // // Note: DOES NOT apply to WAL files. See wal_bytes_per_sync instead // Dynamically changeable through SetDBOptions() API. uint64_t bytes_per_sync = 0; // Same as bytes_per_sync, but applies to WAL files // // Default: 0, turned off // // Dynamically changeable through SetDBOptions() API. uint64_t wal_bytes_per_sync = 0; // When true, guarantees WAL files have at most `wal_bytes_per_sync` // bytes submitted for writeback at any given time, and SST files have at most // `bytes_per_sync` bytes pending writeback at any given time. This can be // used to handle cases where processing speed exceeds I/O speed during file // generation, which can lead to a huge sync when the file is finished, even // with `bytes_per_sync` / `wal_bytes_per_sync` properly configured. // // - If `sync_file_range` is supported it achieves this by waiting for any // prior `sync_file_range`s to finish before proceeding. In this way, // processing (compression, etc.) can proceed uninhibited in the gap // between `sync_file_range`s, and we block only when I/O falls behind. // - Otherwise the `WritableFile::Sync` method is used. Note this mechanism // always blocks, thus preventing the interleaving of I/O and processing. // // Note: Enabling this option does not provide any additional persistence // guarantees, as it may use `sync_file_range`, which does not write out // metadata. // // Default: false bool strict_bytes_per_sync = false; // A vector of EventListeners whose callback functions will be called // when specific RocksDB event happens. std::vector> listeners; // If true, then the status of the threads involved in this DB will // be tracked and available via GetThreadList() API. // // Default: false bool enable_thread_tracking = false; // The limited write rate to DB if soft_pending_compaction_bytes_limit or // level0_slowdown_writes_trigger is triggered, or we are writing to the // last mem table allowed and we allow more than 3 mem tables. It is // calculated using size of user write requests before compression. // RocksDB may decide to slow down more if the compaction still // gets behind further. // If the value is 0, we will infer a value from `rater_limiter` value // if it is not empty, or 16MB if `rater_limiter` is empty. Note that // if users change the rate in `rate_limiter` after DB is opened, // `delayed_write_rate` won't be adjusted. // // Unit: byte per second. // // Default: 0 // // Dynamically changeable through SetDBOptions() API. uint64_t delayed_write_rate = 0; // By default, a single write thread queue is maintained. The thread gets // to the head of the queue becomes write batch group leader and responsible // for writing to WAL and memtable for the batch group. // // If enable_pipelined_write is true, separate write thread queue is // maintained for WAL write and memtable write. A write thread first enter WAL // writer queue and then memtable writer queue. Pending thread on the WAL // writer queue thus only have to wait for previous writers to finish their // WAL writing but not the memtable writing. Enabling the feature may improve // write throughput and reduce latency of the prepare phase of two-phase // commit. // // Default: false bool enable_pipelined_write = false; // Setting unordered_write to true trades higher write throughput with // relaxing the immutability guarantee of snapshots. This violates the // repeatability one expects from ::Get from a snapshot, as well as // ::MultiGet and Iterator's consistent-point-in-time view property. // If the application cannot tolerate the relaxed guarantees, it can implement // its own mechanisms to work around that and yet benefit from the higher // throughput. Using TransactionDB with WRITE_PREPARED write policy and // two_write_queues=true is one way to achieve immutable snapshots despite // unordered_write. // // By default, i.e., when it is false, rocksdb does not advance the sequence // number for new snapshots unless all the writes with lower sequence numbers // are already finished. This provides the immutability that we except from // snapshots. Moreover, since Iterator and MultiGet internally depend on // snapshots, the snapshot immutability results into Iterator and MultiGet // offering consistent-point-in-time view. If set to true, although // Read-Your-Own-Write property is still provided, the snapshot immutability // property is relaxed: the writes issued after the snapshot is obtained (with // larger sequence numbers) will be still not visible to the reads from that // snapshot, however, there still might be pending writes (with lower sequence // number) that will change the state visible to the snapshot after they are // landed to the memtable. // // Default: false bool unordered_write = false; // If true, allow multi-writers to update mem tables in parallel. // Only some memtable_factory-s support concurrent writes; currently it // is implemented only for SkipListFactory. Concurrent memtable writes // are not compatible with inplace_update_support or filter_deletes. // It is strongly recommended to set enable_write_thread_adaptive_yield // if you are going to use this feature. // // Default: true bool allow_concurrent_memtable_write = true; // If true, threads synchronizing with the write batch group leader will // wait for up to write_thread_max_yield_usec before blocking on a mutex. // This can substantially improve throughput for concurrent workloads, // regardless of whether allow_concurrent_memtable_write is enabled. // // Default: true bool enable_write_thread_adaptive_yield = true; // The maximum limit of number of bytes that are written in a single batch // of WAL or memtable write. It is followed when the leader write size // is larger than 1/8 of this limit. // // Default: 1 MB uint64_t max_write_batch_group_size_bytes = 1 << 20; // The maximum number of microseconds that a write operation will use // a yielding spin loop to coordinate with other write threads before // blocking on a mutex. (Assuming write_thread_slow_yield_usec is // set properly) increasing this value is likely to increase RocksDB // throughput at the expense of increased CPU usage. // // Default: 100 uint64_t write_thread_max_yield_usec = 100; // The latency in microseconds after which a std::this_thread::yield // call (sched_yield on Linux) is considered to be a signal that // other processes or threads would like to use the current core. // Increasing this makes writer threads more likely to take CPU // by spinning, which will show up as an increase in the number of // involuntary context switches. // // Default: 3 uint64_t write_thread_slow_yield_usec = 3; // If true, then DB::Open() will not update the statistics used to optimize // compaction decision by loading table properties from many files. // Turning off this feature will improve DBOpen time especially in // disk environment. // // Default: false bool skip_stats_update_on_db_open = false; // If true, then DB::Open() will not fetch and check sizes of all sst files. // This may significantly speed up startup if there are many sst files, // especially when using non-default Env with expensive GetFileSize(). // We'll still check that all required sst files exist. // If paranoid_checks is false, this option is ignored, and sst files are // not checked at all. // // Default: false bool skip_checking_sst_file_sizes_on_db_open = false; // Recovery mode to control the consistency while replaying WAL // Default: kPointInTimeRecovery WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery; // if set to false then recovery will fail when a prepared // transaction is encountered in the WAL bool allow_2pc = false; // A global cache for table-level rows. // Default: nullptr (disabled) // Not supported in ROCKSDB_LITE mode! std::shared_ptr row_cache = nullptr; #ifndef ROCKSDB_LITE // A filter object supplied to be invoked while processing write-ahead-logs // (WALs) during recovery. The filter provides a way to inspect log // records, ignoring a particular record or skipping replay. // The filter is invoked at startup and is invoked from a single-thread // currently. WalFilter* wal_filter = nullptr; #endif // ROCKSDB_LITE // If true, then DB::Open / CreateColumnFamily / DropColumnFamily // / SetOptions will fail if options file is not detected or properly // persisted. // // DEFAULT: false bool fail_if_options_file_error = false; // If true, then print malloc stats together with rocksdb.stats // when printing to LOG. // DEFAULT: false bool dump_malloc_stats = false; // By default RocksDB replay WAL logs and flush them on DB open, which may // create very small SST files. If this option is enabled, RocksDB will try // to avoid (but not guarantee not to) flush during recovery. Also, existing // WAL logs will be kept, so that if crash happened before flush, we still // have logs to recover from. // // DEFAULT: false bool avoid_flush_during_recovery = false; // By default RocksDB will flush all memtables on DB close if there are // unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup // DB close. Unpersisted data WILL BE LOST. // // DEFAULT: false // // Dynamically changeable through SetDBOptions() API. bool avoid_flush_during_shutdown = false; // Set this option to true during creation of database if you want // to be able to ingest behind (call IngestExternalFile() skipping keys // that already exist, rather than overwriting matching keys). // Setting this option to true will affect 2 things: // 1) Disable some internal optimizations around SST file compression // 2) Reserve bottom-most level for ingested files only. // 3) Note that num_levels should be >= 3 if this option is turned on. // // DEFAULT: false // Immutable. bool allow_ingest_behind = false; // Needed to support differential snapshots. // If set to true then DB will only process deletes with sequence number // less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts). // Clients are responsible to periodically call this method to advance // the cutoff time. If this method is never called and preserve_deletes // is set to true NO deletes will ever be processed. // At the moment this only keeps normal deletes, SingleDeletes will // not be preserved. // DEFAULT: false // Immutable (TODO: make it dynamically changeable) bool preserve_deletes = false; // If enabled it uses two queues for writes, one for the ones with // disable_memtable and one for the ones that also write to memtable. This // allows the memtable writes not to lag behind other writes. It can be used // to optimize MySQL 2PC in which only the commits, which are serial, write to // memtable. bool two_write_queues = false; // If true WAL is not flushed automatically after each write. Instead it // relies on manual invocation of FlushWAL to write the WAL buffer to its // file. bool manual_wal_flush = false; // If true, RocksDB supports flushing multiple column families and committing // their results atomically to MANIFEST. Note that it is not // necessary to set atomic_flush to true if WAL is always enabled since WAL // allows the database to be restored to the last persistent state in WAL. // This option is useful when there are column families with writes NOT // protected by WAL. // For manual flush, application has to specify which column families to // flush atomically in DB::Flush. // For auto-triggered flush, RocksDB atomically flushes ALL column families. // // Currently, any WAL-enabled writes after atomic flush may be replayed // independently if the process crashes later and tries to recover. bool atomic_flush = false; // If true, working thread may avoid doing unnecessary and long-latency // operation (such as deleting obsolete files directly or deleting memtable) // and will instead schedule a background job to do it. // Use it if you're latency-sensitive. // If set to true, takes precedence over // ReadOptions::background_purge_on_iterator_cleanup. bool avoid_unnecessary_blocking_io = false; // Historically DB ID has always been stored in Identity File in DB folder. // If this flag is true, the DB ID is written to Manifest file in addition // to the Identity file. By doing this 2 problems are solved // 1. We don't checksum the Identity file where as Manifest file is. // 2. Since the source of truth for DB is Manifest file DB ID will sit with // the source of truth. Previously the Identity file could be copied // independent of Manifest and that can result in wrong DB ID. // We recommend setting this flag to true. // Default: false bool write_dbid_to_manifest = false; // The number of bytes to prefetch when reading the log. This is mostly useful // for reading a remotely located log, as it can save the number of // round-trips. If 0, then the prefetching is disabled. // // Default: 0 size_t log_readahead_size = 0; // If user does NOT provide the checksum generator factory, the file checksum // will NOT be used. A new file checksum generator object will be created // when a SST file is created. Therefore, each created FileChecksumGenerator // will only be used from a single thread and so does not need to be // thread-safe. // // Default: nullptr std::shared_ptr file_checksum_gen_factory = nullptr; // By default, RocksDB recovery fails if any table file referenced in // MANIFEST are missing after scanning the MANIFEST. // Best-efforts recovery is another recovery mode that // tries to restore the database to the most recent point in time without // missing file. // Currently not compatible with atomic flush. Furthermore, WAL files will // not be used for recovery if best_efforts_recovery is true. // Default: false bool best_efforts_recovery = false; }; // Options to control the behavior of a database (passed to DB::Open) struct Options : public DBOptions, public ColumnFamilyOptions { // Create an Options object with default values for all fields. Options() : DBOptions(), ColumnFamilyOptions() {} Options(const DBOptions& db_options, const ColumnFamilyOptions& column_family_options) : DBOptions(db_options), ColumnFamilyOptions(column_family_options) {} // The function recovers options to the option as in version 4.6. Options* OldDefaults(int rocksdb_major_version = 4, int rocksdb_minor_version = 6); void Dump(Logger* log) const; void DumpCFOptions(Logger* log) const; // Some functions that make it easier to optimize RocksDB // Set appropriate parameters for bulk loading. // The reason that this is a function that returns "this" instead of a // constructor is to enable chaining of multiple similar calls in the future. // // All data will be in level 0 without any automatic compaction. // It's recommended to manually call CompactRange(NULL, NULL) before reading // from the database, because otherwise the read can be very slow. Options* PrepareForBulkLoad(); // Use this if your DB is very small (like under 1GB) and you don't want to // spend lots of memory for memtables. Options* OptimizeForSmallDb(); }; // // An application can issue a read request (via Get/Iterators) and specify // if that read should process data that ALREADY resides on a specified cache // level. For example, if an application specifies kBlockCacheTier then the // Get call will process data that is already processed in the memtable or // the block cache. It will not page in data from the OS cache or data that // resides in storage. enum ReadTier { kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage kBlockCacheTier = 0x1, // data in memtable or block cache kPersistedTier = 0x2, // persisted data. When WAL is disabled, this option // will skip data in memtable. // Note that this ReadTier currently only supports // Get and MultiGet and does not support iterators. kMemtableTier = 0x3 // data in memtable. used for memtable-only iterators. }; // Options that control read operations struct ReadOptions { // If "snapshot" is non-nullptr, read as of the supplied snapshot // (which must belong to the DB that is being read and which must // not have been released). If "snapshot" is nullptr, use an implicit // snapshot of the state at the beginning of this read operation. // Default: nullptr const Snapshot* snapshot; // `iterate_lower_bound` defines the smallest key at which the backward // iterator can return an entry. Once the bound is passed, Valid() will be // false. `iterate_lower_bound` is inclusive ie the bound value is a valid // entry. // // If prefix_extractor is not null, the Seek target and `iterate_lower_bound` // need to have the same prefix. This is because ordering is not guaranteed // outside of prefix domain. // // Default: nullptr const Slice* iterate_lower_bound; // "iterate_upper_bound" defines the extent upto which the forward iterator // can returns entries. Once the bound is reached, Valid() will be false. // "iterate_upper_bound" is exclusive ie the bound value is // not a valid entry. If prefix_extractor is not null, the Seek target // and iterate_upper_bound need to have the same prefix. // This is because ordering is not guaranteed outside of prefix domain. // // Default: nullptr const Slice* iterate_upper_bound; // RocksDB does auto-readahead for iterators on noticing more than two reads // for a table file. The readahead starts at 8KB and doubles on every // additional read upto 256KB. // This option can help if most of the range scans are large, and if it is // determined that a larger readahead than that enabled by auto-readahead is // needed. // Using a large readahead size (> 2MB) can typically improve the performance // of forward iteration on spinning disks. // Default: 0 size_t readahead_size; // A threshold for the number of keys that can be skipped before failing an // iterator seek as incomplete. The default value of 0 should be used to // never fail a request as incomplete, even on skipping too many keys. // Default: 0 uint64_t max_skippable_internal_keys; // Specify if this read request should process data that ALREADY // resides on a particular cache. If the required data is not // found at the specified cache, then Status::Incomplete is returned. // Default: kReadAllTier ReadTier read_tier; // If true, all data read from underlying storage will be // verified against corresponding checksums. // Default: true bool verify_checksums; // Should the "data block"/"index block"" read for this iteration be placed in // block cache? // Callers may wish to set this field to false for bulk scans. // This would help not to the change eviction order of existing items in the // block cache. Default: true bool fill_cache; // Specify to create a tailing iterator -- a special iterator that has a // view of the complete database (i.e. it can also be used to read newly // added data) and is optimized for sequential reads. It will return records // that were inserted into the database after the creation of the iterator. // Default: false // Not supported in ROCKSDB_LITE mode! bool tailing; // This options is not used anymore. It was to turn on a functionality that // has been removed. bool managed; // Enable a total order seek regardless of index format (e.g. hash index) // used in the table. Some table format (e.g. plain table) may not support // this option. // If true when calling Get(), we also skip prefix bloom when reading from // block based table. It provides a way to read existing data after // changing implementation of prefix extractor. bool total_order_seek; // When true, by default use total_order_seek = true, and RocksDB can // selectively enable prefix seek mode if won't generate a different result // from total_order_seek, based on seek key, and iterator upper bound. // Not suppported in ROCKSDB_LITE mode, in the way that even with value true // prefix mode is not used. bool auto_prefix_mode; // Enforce that the iterator only iterates over the same prefix as the seek. // This option is effective only for prefix seeks, i.e. prefix_extractor is // non-null for the column family and total_order_seek is false. Unlike // iterate_upper_bound, prefix_same_as_start only works within a prefix // but in both directions. // Default: false bool prefix_same_as_start; // Keep the blocks loaded by the iterator pinned in memory as long as the // iterator is not deleted, If used when reading from tables created with // BlockBasedTableOptions::use_delta_encoding = false, // Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to // return 1. // Default: false bool pin_data; // If true, when PurgeObsoleteFile is called in CleanupIteratorState, we // schedule a background job in the flush job queue and delete obsolete files // in background. // Default: false bool background_purge_on_iterator_cleanup; // If true, keys deleted using the DeleteRange() API will be visible to // readers until they are naturally deleted during compaction. This improves // read performance in DBs with many range deletions. // Default: false bool ignore_range_deletions; // A callback to determine whether relevant keys for this scan exist in a // given table based on the table's properties. The callback is passed the // properties of each table during iteration. If the callback returns false, // the table will not be scanned. This option only affects Iterators and has // no impact on point lookups. // Default: empty (every table will be scanned) std::function table_filter; // Needed to support differential snapshots. Has 2 effects: // 1) Iterator will skip all internal keys with seqnum < iter_start_seqnum // 2) if this param > 0 iterator will return INTERNAL keys instead of // user keys; e.g. return tombstones as well. // Default: 0 (don't filter by seqnum, return user keys) SequenceNumber iter_start_seqnum; // Timestamp of operation. Read should return the latest data visible to the // specified timestamp. All timestamps of the same database must be of the // same length and format. The user is responsible for providing a customized // compare function via Comparator to order tuples. // For iterator, iter_start_ts is the lower bound (older) and timestamp // serves as the upper bound. Versions of the same record that fall in // the timestamp range will be returned. If iter_start_ts is nullptr, // only the most recent version visible to timestamp is returned. // The user-specified timestamp feature is still under active development, // and the API is subject to change. const Slice* timestamp; const Slice* iter_start_ts; // Deadline for completing the read request (only Get/MultiGet for now) in us. // It should be set to microseconds since epoch, i.e, gettimeofday or // equivalent plus allowed duration in microseconds. The best way is to use // env->NowMicros() + some timeout. // This is best efforts. The call may exceed the deadline if there is IO // involved and the file system doesn't support deadlines, or due to // checking for deadline periodically rather than for every key if // processing a batch std::chrono::microseconds deadline; // It limits the maximum cumulative value size of the keys in batch while // reading through MultiGet. Once the cumulative value size exceeds this // soft limit then all the remaining keys are returned with status Aborted. // // Default: std::numeric_limits::max() uint64_t value_size_soft_limit; ReadOptions(); ReadOptions(bool cksum, bool cache); }; // Options that control write operations struct WriteOptions { // If true, the write will be flushed from the operating system // buffer cache (by calling WritableFile::Sync()) before the write // is considered complete. If this flag is true, writes will be // slower. // // If this flag is false, and the machine crashes, some recent // writes may be lost. Note that if it is just the process that // crashes (i.e., the machine does not reboot), no writes will be // lost even if sync==false. // // In other words, a DB write with sync==false has similar // crash semantics as the "write()" system call. A DB write // with sync==true has similar crash semantics to a "write()" // system call followed by "fdatasync()". // // Default: false bool sync; // If true, writes will not first go to the write ahead log, // and the write may get lost after a crash. The backup engine // relies on write-ahead logs to back up the memtable, so if // you disable write-ahead logs, you must create backups with // flush_before_backup=true to avoid losing unflushed memtable data. // Default: false bool disableWAL; // If true and if user is trying to write to column families that don't exist // (they were dropped), ignore the write (don't return an error). If there // are multiple writes in a WriteBatch, other writes will succeed. // Default: false bool ignore_missing_column_families; // If true and we need to wait or sleep for the write request, fails // immediately with Status::Incomplete(). // Default: false bool no_slowdown; // If true, this write request is of lower priority if compaction is // behind. In this case, no_slowdown = true, the request will be cancelled // immediately with Status::Incomplete() returned. Otherwise, it will be // slowed down. The slowdown value is determined by RocksDB to guarantee // it introduces minimum impacts to high priority writes. // // Default: false bool low_pri; // If true, this writebatch will maintain the last insert positions of each // memtable as hints in concurrent write. It can improve write performance // in concurrent writes if keys in one writebatch are sequential. In // non-concurrent writes (when concurrent_memtable_writes is false) this // option will be ignored. // // Default: false bool memtable_insert_hint_per_batch; // Timestamp of write operation, e.g. Put. All timestamps of the same // database must share the same length and format. The user is also // responsible for providing a customized compare function via Comparator to // order tuples. If the user wants to enable timestamp, then // all write operations must be associated with timestamp because RocksDB, as // a single-node storage engine currently has no knowledge of global time, // thus has to rely on the application. // The user-specified timestamp feature is still under active development, // and the API is subject to change. const Slice* timestamp; WriteOptions() : sync(false), disableWAL(false), ignore_missing_column_families(false), no_slowdown(false), low_pri(false), memtable_insert_hint_per_batch(false), timestamp(nullptr) {} }; // Options that control flush operations struct FlushOptions { // If true, the flush will wait until the flush is done. // Default: true bool wait; // If true, the flush would proceed immediately even it means writes will // stall for the duration of the flush; if false the operation will wait // until it's possible to do flush w/o causing stall or until required flush // is performed by someone else (foreground call or background thread). // Default: false bool allow_write_stall; FlushOptions() : wait(true), allow_write_stall(false) {} }; // Create a Logger from provided DBOptions extern Status CreateLoggerFromOptions(const std::string& dbname, const DBOptions& options, std::shared_ptr* logger); // CompactionOptions are used in CompactFiles() call. struct CompactionOptions { // Compaction output compression type // Default: snappy // If set to `kDisableCompressionOption`, RocksDB will choose compression type // according to the `ColumnFamilyOptions`, taking into account the output // level if `compression_per_level` is specified. CompressionType compression; // Compaction will create files of size `output_file_size_limit`. // Default: MAX, which means that compaction will create a single file uint64_t output_file_size_limit; // If > 0, it will replace the option in the DBOptions for this compaction. uint32_t max_subcompactions; CompactionOptions() : compression(kSnappyCompression), output_file_size_limit(std::numeric_limits::max()), max_subcompactions(0) {} }; // For level based compaction, we can configure if we want to skip/force // bottommost level compaction. enum class BottommostLevelCompaction { // Skip bottommost level compaction kSkip, // Only compact bottommost level if there is a compaction filter // This is the default option kIfHaveCompactionFilter, // Always compact bottommost level kForce, // Always compact bottommost level but in bottommost level avoid // double-compacting files created in the same compaction kForceOptimized, }; // CompactRangeOptions is used by CompactRange() call. struct CompactRangeOptions { // If true, no other compaction will run at the same time as this // manual compaction bool exclusive_manual_compaction = true; // If true, compacted files will be moved to the minimum level capable // of holding the data or given level (specified non-negative target_level). bool change_level = false; // If change_level is true and target_level have non-negative value, compacted // files will be moved to target_level. int target_level = -1; // Compaction outputs will be placed in options.db_paths[target_path_id]. // Behavior is undefined if target_path_id is out of range. uint32_t target_path_id = 0; // By default level based compaction will only compact the bottommost level // if there is a compaction filter BottommostLevelCompaction bottommost_level_compaction = BottommostLevelCompaction::kIfHaveCompactionFilter; // If true, will execute immediately even if doing so would cause the DB to // enter write stall mode. Otherwise, it'll sleep until load is low enough. bool allow_write_stall = false; // If > 0, it will replace the option in the DBOptions for this compaction. uint32_t max_subcompactions = 0; }; // IngestExternalFileOptions is used by IngestExternalFile() struct IngestExternalFileOptions { // Can be set to true to move the files instead of copying them. bool move_files = false; // If set to true, ingestion falls back to copy when move fails. bool failed_move_fall_back_to_copy = true; // If set to false, an ingested file keys could appear in existing snapshots // that where created before the file was ingested. bool snapshot_consistency = true; // If set to false, IngestExternalFile() will fail if the file key range // overlaps with existing keys or tombstones in the DB. bool allow_global_seqno = true; // If set to false and the file key range overlaps with the memtable key range // (memtable flush required), IngestExternalFile will fail. bool allow_blocking_flush = true; // Set to true if you would like duplicate keys in the file being ingested // to be skipped rather than overwriting existing data under that key. // Usecase: back-fill of some historical data in the database without // over-writing existing newer version of data. // This option could only be used if the DB has been running // with allow_ingest_behind=true since the dawn of time. // All files will be ingested at the bottommost level with seqno=0. bool ingest_behind = false; // Set to true if you would like to write global_seqno to a given offset in // the external SST file for backward compatibility. Older versions of // RocksDB writes a global_seqno to a given offset within ingested SST files, // and new versions of RocksDB do not. If you ingest an external SST using // new version of RocksDB and would like to be able to downgrade to an // older version of RocksDB, you should set 'write_global_seqno' to true. If // your service is just starting to use the new RocksDB, we recommend that // you set this option to false, which brings two benefits: // 1. No extra random write for global_seqno during ingestion. // 2. Without writing external SST file, it's possible to do checksum. // We have a plan to set this option to false by default in the future. bool write_global_seqno = true; // Set to true if you would like to verify the checksums of each block of the // external SST file before ingestion. // Warning: setting this to true causes slowdown in file ingestion because // the external SST file has to be read. bool verify_checksums_before_ingest = false; // When verify_checksums_before_ingest = true, RocksDB uses default // readahead setting to scan the file while verifying checksums before // ingestion. // Users can override the default value using this option. // Using a large readahead size (> 2MB) can typically improve the performance // of forward iteration on spinning disks. size_t verify_checksums_readahead_size = 0; // Set to TRUE if user wants to verify the sst file checksum of ingested // files. The DB checksum function will generate the checksum of each // ingested file (if file_checksum_gen_factory is set) and compare the // checksum function name and checksum with the ingested checksum information. // // If this option is set to True: 1) if DB does not enable checksum // (file_checksum_gen_factory == nullptr), the ingested checksum information // will be ignored; 2) If DB enable the checksum function, we calculate the // sst file checksum after the file is moved or copied and compare the // checksum and checksum name. If checksum or checksum function name does // not match, ingestion will be failed. If the verification is sucessful, // checksum and checksum function name will be stored in Manifest. // If this option is set to FALSE, 1) if DB does not enable checksum, // the ingested checksum information will be ignored; 2) if DB enable the // checksum, we only verify the ingested checksum function name and we // trust the ingested checksum. If the checksum function name matches, we // store the checksum in Manifest. DB does not calculate the checksum during // ingestion. However, if no checksum information is provided with the // ingested files, DB will generate the checksum and store in the Manifest. bool verify_file_checksum = true; }; enum TraceFilterType : uint64_t { // Trace all the operations kTraceFilterNone = 0x0, // Do not trace the get operations kTraceFilterGet = 0x1 << 0, // Do not trace the write operations kTraceFilterWrite = 0x1 << 1 }; // TraceOptions is used for StartTrace struct TraceOptions { // To avoid the trace file size grows large than the storage space, // user can set the max trace file size in Bytes. Default is 64GB uint64_t max_trace_file_size = uint64_t{64} * 1024 * 1024 * 1024; // Specify trace sampling option, i.e. capture one per how many requests. // Default to 1 (capture every request). uint64_t sampling_frequency = 1; // Note: The filtering happens before sampling. uint64_t filter = kTraceFilterNone; }; // ImportColumnFamilyOptions is used by ImportColumnFamily() struct ImportColumnFamilyOptions { // Can be set to true to move the files instead of copying them. bool move_files = false; }; // Options used with DB::GetApproximateSizes() struct SizeApproximationOptions { // Defines whether the returned size should include the recently written // data in the mem-tables. If set to false, include_files must be true. bool include_memtabtles = false; // Defines whether the returned size should include data serialized to disk. // If set to false, include_memtabtles must be true. bool include_files = true; // When approximating the files total size that is used to store a keys range // using DB::GetApproximateSizes, allow approximation with an error margin of // up to total_files_size * files_size_error_margin. This allows to take some // shortcuts in files size approximation, resulting in better performance, // while guaranteeing the resulting error is within a reasonable margin. // E.g., if the value is 0.1, then the error margin of the returned files size // approximation will be within 10%. // If the value is non-positive - a more precise yet more CPU intensive // estimation is performed. double files_size_error_margin = -1.0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/perf_context.h000066400000000000000000000236261370372246700212010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/perf_level.h" namespace ROCKSDB_NAMESPACE { // A thread local context for gathering performance counter efficiently // and transparently. // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. // Break down performance counters by level and store per-level perf context in // PerfContextByLevel struct PerfContextByLevel { // # of times bloom filter has avoided file reads, i.e., negatives. uint64_t bloom_filter_useful = 0; // # of times bloom FullFilter has not avoided the reads. uint64_t bloom_filter_full_positive = 0; // # of times bloom FullFilter has not avoided the reads and data actually // exist. uint64_t bloom_filter_full_true_positive = 0; // total number of user key returned (only include keys that are found, does // not include keys that are deleted or merged without a final put uint64_t user_key_return_count = 0; // total nanos spent on reading data from SST files uint64_t get_from_table_nanos = 0; uint64_t block_cache_hit_count = 0; // total number of block cache hits uint64_t block_cache_miss_count = 0; // total number of block cache misses void Reset(); // reset all performance counters to zero }; struct PerfContext { ~PerfContext(); PerfContext() {} PerfContext(const PerfContext&); PerfContext& operator=(const PerfContext&); PerfContext(PerfContext&&) noexcept; void Reset(); // reset all performance counters to zero std::string ToString(bool exclude_zero_counters = false) const; // enable per level perf context and allocate storage for PerfContextByLevel void EnablePerLevelPerfContext(); // temporarily disable per level perf contxt by setting the flag to false void DisablePerLevelPerfContext(); // free the space for PerfContextByLevel, also disable per level perf context void ClearPerLevelPerfContext(); uint64_t user_key_comparison_count; // total number of user key comparisons uint64_t block_cache_hit_count; // total number of block cache hits uint64_t block_read_count; // total number of block reads (with IO) uint64_t block_read_byte; // total number of bytes from block reads uint64_t block_read_time; // total nanos spent on block reads uint64_t block_cache_index_hit_count; // total number of index block hits uint64_t index_block_read_count; // total number of index block reads uint64_t block_cache_filter_hit_count; // total number of filter block hits uint64_t filter_block_read_count; // total number of filter block reads uint64_t compression_dict_block_read_count; // total number of compression // dictionary block reads uint64_t block_checksum_time; // total nanos spent on block checksum uint64_t block_decompress_time; // total nanos spent on block decompression uint64_t get_read_bytes; // bytes for vals returned by Get uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator // total number of internal keys skipped over during iteration. // There are several reasons for it: // 1. when calling Next(), the iterator is in the position of the previous // key, so that we'll need to skip it. It means this counter will always // be incremented in Next(). // 2. when calling Next(), we need to skip internal entries for the previous // keys that are overwritten. // 3. when calling Next(), Seek() or SeekToFirst(), after previous key // before calling Next(), the seek key in Seek() or the beginning for // SeekToFirst(), there may be one or more deleted keys before the next // valid key that the operation should place the iterator to. We need // to skip both of the tombstone and updates hidden by the tombstones. The // tombstones are not included in this counter, while previous updates // hidden by the tombstones will be included here. // 4. symmetric cases for Prev() and SeekToLast() // internal_recent_skipped_count is not included in this counter. // uint64_t internal_key_skipped_count; // Total number of deletes and single deletes skipped over during iteration // When calling Next(), Seek() or SeekToFirst(), after previous position // before calling Next(), the seek key in Seek() or the beginning for // SeekToFirst(), there may be one or more deleted keys before the next valid // key. Every deleted key is counted once. We don't recount here if there are // still older updates invalidated by the tombstones. // uint64_t internal_delete_skipped_count; // How many times iterators skipped over internal keys that are more recent // than the snapshot that iterator is using. // uint64_t internal_recent_skipped_count; // How many values were fed into merge operator by iterators. // uint64_t internal_merge_count; uint64_t get_snapshot_time; // total nanos spent on getting snapshot uint64_t get_from_memtable_time; // total nanos spent on querying memtables uint64_t get_from_memtable_count; // number of mem tables queried // total nanos spent after Get() finds a key uint64_t get_post_process_time; uint64_t get_from_output_files_time; // total nanos reading from output files // total nanos spent on seeking memtable uint64_t seek_on_memtable_time; // number of seeks issued on memtable // (including SeekForPrev but not SeekToFirst and SeekToLast) uint64_t seek_on_memtable_count; // number of Next()s issued on memtable uint64_t next_on_memtable_count; // number of Prev()s issued on memtable uint64_t prev_on_memtable_count; // total nanos spent on seeking child iters uint64_t seek_child_seek_time; // number of seek issued in child iterators uint64_t seek_child_seek_count; uint64_t seek_min_heap_time; // total nanos spent on the merge min heap uint64_t seek_max_heap_time; // total nanos spent on the merge max heap // total nanos spent on seeking the internal entries uint64_t seek_internal_seek_time; // total nanos spent on iterating internal entries to find the next user entry uint64_t find_next_user_entry_time; // This group of stats provide a breakdown of time spent by Write(). // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write // are enabled. // // total nanos spent on writing to WAL uint64_t write_wal_time; // total nanos spent on writing to mem tables uint64_t write_memtable_time; // total nanos spent on delaying or throttling write uint64_t write_delay_time; // total nanos spent on switching memtable/wal and scheduling // flushes/compactions. uint64_t write_scheduling_flushes_compactions_time; // total nanos spent on writing a record, excluding the above four things uint64_t write_pre_and_post_process_time; // time spent waiting for other threads of the batch group uint64_t write_thread_wait_nanos; // time spent on acquiring DB mutex. uint64_t db_mutex_lock_nanos; // Time spent on waiting with a condition variable created with DB mutex. uint64_t db_condition_wait_nanos; // Time spent on merge operator. uint64_t merge_operator_time_nanos; // Time spent on reading index block from block cache or SST file uint64_t read_index_block_nanos; // Time spent on reading filter block from block cache or SST file uint64_t read_filter_block_nanos; // Time spent on creating data block iterator uint64_t new_table_block_iter_nanos; // Time spent on creating a iterator of an SST file. uint64_t new_table_iterator_nanos; // Time spent on seeking a key in data/index blocks uint64_t block_seek_nanos; // Time spent on finding or creating a table reader uint64_t find_table_nanos; // total number of mem table bloom hits uint64_t bloom_memtable_hit_count; // total number of mem table bloom misses uint64_t bloom_memtable_miss_count; // total number of SST table bloom hits uint64_t bloom_sst_hit_count; // total number of SST table bloom misses uint64_t bloom_sst_miss_count; // Time spent waiting on key locks in transaction lock manager. uint64_t key_lock_wait_time; // number of times acquiring a lock was blocked by another transaction. uint64_t key_lock_wait_count; // Total time spent in Env filesystem operations. These are only populated // when TimedEnv is used. uint64_t env_new_sequential_file_nanos; uint64_t env_new_random_access_file_nanos; uint64_t env_new_writable_file_nanos; uint64_t env_reuse_writable_file_nanos; uint64_t env_new_random_rw_file_nanos; uint64_t env_new_directory_nanos; uint64_t env_file_exists_nanos; uint64_t env_get_children_nanos; uint64_t env_get_children_file_attributes_nanos; uint64_t env_delete_file_nanos; uint64_t env_create_dir_nanos; uint64_t env_create_dir_if_missing_nanos; uint64_t env_delete_dir_nanos; uint64_t env_get_file_size_nanos; uint64_t env_get_file_modification_time_nanos; uint64_t env_rename_file_nanos; uint64_t env_link_file_nanos; uint64_t env_lock_file_nanos; uint64_t env_unlock_file_nanos; uint64_t env_new_logger_nanos; uint64_t get_cpu_nanos; uint64_t iter_next_cpu_nanos; uint64_t iter_prev_cpu_nanos; uint64_t iter_seek_cpu_nanos; // Time spent in encrypting data. Populated when EncryptedEnv is used. uint64_t encrypt_data_nanos; // Time spent in decrypting data. Populated when EncryptedEnv is used. uint64_t decrypt_data_nanos; std::map* level_to_perf_context = nullptr; bool per_level_perf_context_enabled = false; }; // Get Thread-local PerfContext object pointer // if defined(NPERF_CONTEXT), then the pointer is not thread-local PerfContext* get_perf_context(); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/perf_level.h000066400000000000000000000024111370372246700206110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { // How much perf stats to collect. Affects perf_context and iostats_context. enum PerfLevel : unsigned char { kUninitialized = 0, // unknown setting kDisable = 1, // disable perf stats kEnableCount = 2, // enable only count stats kEnableTimeExceptForMutex = 3, // Other than count stats, also enable time // stats except for mutexes // Other than time, also measure CPU time counters. Still don't measure // time (neither wall time nor CPU time) for mutexes. kEnableTimeAndCPUTimeExceptForMutex = 4, kEnableTime = 5, // enable count and time stats kOutOfBounds = 6 // N.B. Must always be the last value! }; // set the perf stats level for current thread void SetPerfLevel(PerfLevel level); // get current perf stats level for current thread PerfLevel GetPerfLevel(); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/persistent_cache.h000066400000000000000000000044361370372246700220220ustar00rootroot00000000000000// Copyright (c) 2013, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { // PersistentCache // // Persistent cache interface for caching IO pages on a persistent medium. The // cache interface is specifically designed for persistent read cache. class PersistentCache { public: typedef std::vector> StatsType; virtual ~PersistentCache() {} // Insert to page cache // // page_key Identifier to identify a page uniquely across restarts // data Page data // size Size of the page virtual Status Insert(const Slice& key, const char* data, const size_t size) = 0; // Lookup page cache by page identifier // // page_key Page identifier // buf Buffer where the data should be copied // size Size of the page virtual Status Lookup(const Slice& key, std::unique_ptr* data, size_t* size) = 0; // Is cache storing uncompressed data ? // // True if the cache is configured to store uncompressed data else false virtual bool IsCompressed() = 0; // Return stats as map of {string, double} per-tier // // Persistent cache can be initialized as a tier of caches. The stats are per // tire top-down virtual StatsType Stats() = 0; virtual std::string GetPrintableOptions() const = 0; }; // Factor method to create a new persistent cache Status NewPersistentCache(Env* const env, const std::string& path, const uint64_t size, const std::shared_ptr& log, const bool optimized_for_nvm, std::shared_ptr* cache); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/rate_limiter.h000066400000000000000000000132111370372246700211460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/env.h" #include "rocksdb/statistics.h" namespace ROCKSDB_NAMESPACE { class RateLimiter { public: enum class OpType { // Limitation: we currently only invoke Request() with OpType::kRead for // compactions when DBOptions::new_table_reader_for_compaction_inputs is set kRead, kWrite, }; enum class Mode { kReadsOnly, kWritesOnly, kAllIo, }; // For API compatibility, default to rate-limiting writes only. explicit RateLimiter(Mode mode = Mode::kWritesOnly) : mode_(mode) {} virtual ~RateLimiter() {} // This API allows user to dynamically change rate limiter's bytes per second. // REQUIRED: bytes_per_second > 0 virtual void SetBytesPerSecond(int64_t bytes_per_second) = 0; // Deprecated. New RateLimiter derived classes should override // Request(const int64_t, const Env::IOPriority, Statistics*) or // Request(const int64_t, const Env::IOPriority, Statistics*, OpType) // instead. // // Request for token for bytes. If this request can not be satisfied, the call // is blocked. Caller is responsible to make sure // bytes <= GetSingleBurstBytes() virtual void Request(const int64_t /*bytes*/, const Env::IOPriority /*pri*/) { assert(false); } // Request for token for bytes and potentially update statistics. If this // request can not be satisfied, the call is blocked. Caller is responsible to // make sure bytes <= GetSingleBurstBytes(). virtual void Request(const int64_t bytes, const Env::IOPriority pri, Statistics* /* stats */) { // For API compatibility, default implementation calls the older API in // which statistics are unsupported. Request(bytes, pri); } // Requests token to read or write bytes and potentially updates statistics. // // If this request can not be satisfied, the call is blocked. Caller is // responsible to make sure bytes <= GetSingleBurstBytes(). virtual void Request(const int64_t bytes, const Env::IOPriority pri, Statistics* stats, OpType op_type) { if (IsRateLimited(op_type)) { Request(bytes, pri, stats); } } // Requests token to read or write bytes and potentially updates statistics. // Takes into account GetSingleBurstBytes() and alignment (e.g., in case of // direct I/O) to allocate an appropriate number of bytes, which may be less // than the number of bytes requested. virtual size_t RequestToken(size_t bytes, size_t alignment, Env::IOPriority io_priority, Statistics* stats, RateLimiter::OpType op_type); // Max bytes can be granted in a single burst virtual int64_t GetSingleBurstBytes() const = 0; // Total bytes that go through rate limiter virtual int64_t GetTotalBytesThrough( const Env::IOPriority pri = Env::IO_TOTAL) const = 0; // Total # of requests that go through rate limiter virtual int64_t GetTotalRequests( const Env::IOPriority pri = Env::IO_TOTAL) const = 0; virtual int64_t GetBytesPerSecond() const = 0; virtual bool IsRateLimited(OpType op_type) { if ((mode_ == RateLimiter::Mode::kWritesOnly && op_type == RateLimiter::OpType::kRead) || (mode_ == RateLimiter::Mode::kReadsOnly && op_type == RateLimiter::OpType::kWrite)) { return false; } return true; } protected: Mode GetMode() { return mode_; } private: const Mode mode_; }; // Create a RateLimiter object, which can be shared among RocksDB instances to // control write rate of flush and compaction. // @rate_bytes_per_sec: this is the only parameter you want to set most of the // time. It controls the total write rate of compaction and flush in bytes per // second. Currently, RocksDB does not enforce rate limit for anything other // than flush and compaction, e.g. write to WAL. // @refill_period_us: this controls how often tokens are refilled. For example, // when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to // 100ms, then 1MB is refilled every 100ms internally. Larger value can lead to // burstier writes while smaller value introduces more CPU overhead. // The default should work for most cases. // @fairness: RateLimiter accepts high-pri requests and low-pri requests. // A low-pri request is usually blocked in favor of hi-pri request. Currently, // RocksDB assigns low-pri to request from compaction and high-pri to request // from flush. Low-pri requests can get blocked if flush requests come in // continuously. This fairness parameter grants low-pri requests permission by // 1/fairness chance even though high-pri requests exist to avoid starvation. // You should be good by leaving it at default 10. // @mode: Mode indicates which types of operations count against the limit. // @auto_tuned: Enables dynamic adjustment of rate limit within the range // `[rate_bytes_per_sec / 20, rate_bytes_per_sec]`, according to // the recent demand for background I/O. extern RateLimiter* NewGenericRateLimiter( int64_t rate_bytes_per_sec, int64_t refill_period_us = 100 * 1000, int32_t fairness = 10, RateLimiter::Mode mode = RateLimiter::Mode::kWritesOnly, bool auto_tuned = false); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/rocksdb_namespace.h000066400000000000000000000005271370372246700221370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_NAMESPACE #define ROCKSDB_NAMESPACE rocksdb #endif rocksdb-6.11.4/include/rocksdb/slice.h000066400000000000000000000170001370372246700175650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Slice is a simple structure containing a pointer into some external // storage and a size. The user of a Slice must ensure that the slice // is not used after the corresponding external storage has been // deallocated. // // Multiple threads can invoke const methods on a Slice without // external synchronization, but if any of the threads may call a // non-const method, all threads accessing the same Slice must use // external synchronization. #pragma once #include #include #include #include #include #ifdef __cpp_lib_string_view #include #endif #include "rocksdb/cleanable.h" namespace ROCKSDB_NAMESPACE { class Slice { public: // Create an empty slice. Slice() : data_(""), size_(0) {} // Create a slice that refers to d[0,n-1]. Slice(const char* d, size_t n) : data_(d), size_(n) {} // Create a slice that refers to the contents of "s" /* implicit */ Slice(const std::string& s) : data_(s.data()), size_(s.size()) {} #ifdef __cpp_lib_string_view // Create a slice that refers to the same contents as "sv" /* implicit */ Slice(std::string_view sv) : data_(sv.data()), size_(sv.size()) {} #endif // Create a slice that refers to s[0,strlen(s)-1] /* implicit */ Slice(const char* s) : data_(s) { size_ = (s == nullptr) ? 0 : strlen(s); } // Create a single slice from SliceParts using buf as storage. // buf must exist as long as the returned Slice exists. Slice(const struct SliceParts& parts, std::string* buf); // Return a pointer to the beginning of the referenced data const char* data() const { return data_; } // Return the length (in bytes) of the referenced data size_t size() const { return size_; } // Return true iff the length of the referenced data is zero bool empty() const { return size_ == 0; } // Return the ith byte in the referenced data. // REQUIRES: n < size() char operator[](size_t n) const { assert(n < size()); return data_[n]; } // Change this slice to refer to an empty array void clear() { data_ = ""; size_ = 0; } // Drop the first "n" bytes from this slice. void remove_prefix(size_t n) { assert(n <= size()); data_ += n; size_ -= n; } void remove_suffix(size_t n) { assert(n <= size()); size_ -= n; } // Return a string that contains the copy of the referenced data. // when hex is true, returns a string of twice the length hex encoded (0-9A-F) std::string ToString(bool hex = false) const; #ifdef __cpp_lib_string_view // Return a string_view that references the same data as this slice. std::string_view ToStringView() const { return std::string_view(data_, size_); } #endif // Decodes the current slice interpreted as an hexadecimal string into result, // if successful returns true, if this isn't a valid hex string // (e.g not coming from Slice::ToString(true)) DecodeHex returns false. // This slice is expected to have an even number of 0-9A-F characters // also accepts lowercase (a-f) bool DecodeHex(std::string* result) const; // Three-way comparison. Returns value: // < 0 iff "*this" < "b", // == 0 iff "*this" == "b", // > 0 iff "*this" > "b" int compare(const Slice& b) const; // Return true iff "x" is a prefix of "*this" bool starts_with(const Slice& x) const { return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0)); } bool ends_with(const Slice& x) const { return ((size_ >= x.size_) && (memcmp(data_ + size_ - x.size_, x.data_, x.size_) == 0)); } // Compare two slices and returns the first byte where they differ size_t difference_offset(const Slice& b) const; // private: make these public for rocksdbjni access const char* data_; size_t size_; // Intentionally copyable }; /** * A Slice that can be pinned with some cleanup tasks, which will be run upon * ::Reset() or object destruction, whichever is invoked first. This can be used * to avoid memcpy by having the PinnableSlice object referring to the data * that is locked in the memory and release them after the data is consumed. */ class PinnableSlice : public Slice, public Cleanable { public: PinnableSlice() { buf_ = &self_space_; } explicit PinnableSlice(std::string* buf) { buf_ = buf; } PinnableSlice(PinnableSlice&& other); PinnableSlice& operator=(PinnableSlice&& other); // No copy constructor and copy assignment allowed. PinnableSlice(PinnableSlice&) = delete; PinnableSlice& operator=(PinnableSlice&) = delete; inline void PinSlice(const Slice& s, CleanupFunction f, void* arg1, void* arg2) { assert(!pinned_); pinned_ = true; data_ = s.data(); size_ = s.size(); RegisterCleanup(f, arg1, arg2); assert(pinned_); } inline void PinSlice(const Slice& s, Cleanable* cleanable) { assert(!pinned_); pinned_ = true; data_ = s.data(); size_ = s.size(); cleanable->DelegateCleanupsTo(this); assert(pinned_); } inline void PinSelf(const Slice& slice) { assert(!pinned_); buf_->assign(slice.data(), slice.size()); data_ = buf_->data(); size_ = buf_->size(); assert(!pinned_); } inline void PinSelf() { assert(!pinned_); data_ = buf_->data(); size_ = buf_->size(); assert(!pinned_); } void remove_suffix(size_t n) { assert(n <= size()); if (pinned_) { size_ -= n; } else { buf_->erase(size() - n, n); PinSelf(); } } void remove_prefix(size_t n) { assert(n <= size()); if (pinned_) { data_ += n; size_ -= n; } else { buf_->erase(0, n); PinSelf(); } } void Reset() { Cleanable::Reset(); pinned_ = false; size_ = 0; } inline std::string* GetSelf() { return buf_; } inline bool IsPinned() const { return pinned_; } private: friend class PinnableSlice4Test; std::string self_space_; std::string* buf_; bool pinned_ = false; }; // A set of Slices that are virtually concatenated together. 'parts' points // to an array of Slices. The number of elements in the array is 'num_parts'. struct SliceParts { SliceParts(const Slice* _parts, int _num_parts) : parts(_parts), num_parts(_num_parts) {} SliceParts() : parts(nullptr), num_parts(0) {} const Slice* parts; int num_parts; }; inline bool operator==(const Slice& x, const Slice& y) { return ((x.size() == y.size()) && (memcmp(x.data(), y.data(), x.size()) == 0)); } inline bool operator!=(const Slice& x, const Slice& y) { return !(x == y); } inline int Slice::compare(const Slice& b) const { assert(data_ != nullptr && b.data_ != nullptr); const size_t min_len = (size_ < b.size_) ? size_ : b.size_; int r = memcmp(data_, b.data_, min_len); if (r == 0) { if (size_ < b.size_) r = -1; else if (size_ > b.size_) r = +1; } return r; } inline size_t Slice::difference_offset(const Slice& b) const { size_t off = 0; const size_t len = (size_ < b.size_) ? size_ : b.size_; for (; off < len; off++) { if (data_[off] != b.data_[off]) break; } return off; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/slice_transform.h000066400000000000000000000106351370372246700216670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Class for specifying user-defined functions which perform a // transformation on a slice. It is not required that every slice // belong to the domain and/or range of a function. Subclasses should // define InDomain and InRange to determine which slices are in either // of these sets respectively. #pragma once #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class Slice; /* * A SliceTransform is a generic pluggable way of transforming one string * to another. Its primary use-case is in configuring rocksdb * to store prefix blooms by setting prefix_extractor in * ColumnFamilyOptions. */ class SliceTransform { public: virtual ~SliceTransform(){}; // Return the name of this transformation. virtual const char* Name() const = 0; // Extract a prefix from a specified key. This method is called when // a key is inserted into the db, and the returned slice is used to // create a bloom filter. virtual Slice Transform(const Slice& key) const = 0; // Determine whether the specified key is compatible with the logic // specified in the Transform method. This method is invoked for every // key that is inserted into the db. If this method returns true, // then Transform is called to translate the key to its prefix and // that returned prefix is inserted into the bloom filter. If this // method returns false, then the call to Transform is skipped and // no prefix is inserted into the bloom filters. // // For example, if the Transform method operates on a fixed length // prefix of size 4, then an invocation to InDomain("abc") returns // false because the specified key length(3) is shorter than the // prefix size of 4. // // Wiki documentation here: // https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes // virtual bool InDomain(const Slice& key) const = 0; // This is currently not used and remains here for backward compatibility. virtual bool InRange(const Slice& /*dst*/) const { return false; } // Some SliceTransform will have a full length which can be used to // determine if two keys are consecuitive. Can be disabled by always // returning 0 virtual bool FullLengthEnabled(size_t* /*len*/) const { return false; } // Transform(s)=Transform(`prefix`) for any s with `prefix` as a prefix. // // This function is not used by RocksDB, but for users. If users pass // Options by string to RocksDB, they might not know what prefix extractor // they are using. This function is to help users can determine: // if they want to iterate all keys prefixing `prefix`, whether it is // safe to use prefix bloom filter and seek to key `prefix`. // If this function returns true, this means a user can Seek() to a prefix // using the bloom filter. Otherwise, user needs to skip the bloom filter // by setting ReadOptions.total_order_seek = true. // // Here is an example: Suppose we implement a slice transform that returns // the first part of the string after splitting it using delimiter ",": // 1. SameResultWhenAppended("abc,") should return true. If applying prefix // bloom filter using it, all slices matching "abc:.*" will be extracted // to "abc,", so any SST file or memtable containing any of those key // will not be filtered out. // 2. SameResultWhenAppended("abc") should return false. A user will not // guaranteed to see all the keys matching "abc.*" if a user seek to "abc" // against a DB with the same setting. If one SST file only contains // "abcd,e", the file can be filtered out and the key will be invisible. // // i.e., an implementation always returning false is safe. virtual bool SameResultWhenAppended(const Slice& /*prefix*/) const { return false; } }; extern const SliceTransform* NewFixedPrefixTransform(size_t prefix_len); extern const SliceTransform* NewCappedPrefixTransform(size_t cap_len); extern const SliceTransform* NewNoopTransform(); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/snapshot.h000066400000000000000000000024101370372246700203240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { class DB; // Abstract handle to particular state of a DB. // A Snapshot is an immutable object and can therefore be safely // accessed from multiple threads without any external synchronization. // // To Create a Snapshot, call DB::GetSnapshot(). // To Destroy a Snapshot, call DB::ReleaseSnapshot(snapshot). class Snapshot { public: // returns Snapshot's sequence number virtual SequenceNumber GetSequenceNumber() const = 0; protected: virtual ~Snapshot(); }; // Simple RAII wrapper class for Snapshot. // Constructing this object will create a snapshot. Destructing will // release the snapshot. class ManagedSnapshot { public: explicit ManagedSnapshot(DB* db); // Instead of creating a snapshot, take ownership of the input snapshot. ManagedSnapshot(DB* db, const Snapshot* _snapshot); ~ManagedSnapshot(); const Snapshot* snapshot(); private: DB* db_; const Snapshot* snapshot_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/sst_dump_tool.h000066400000000000000000000010141370372246700213570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #pragma once #include "rocksdb/options.h" namespace ROCKSDB_NAMESPACE { class SSTDumpTool { public: int Run(int argc, char const* const* argv, Options options = Options()); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/sst_file_manager.h000066400000000000000000000133041370372246700217730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "rocksdb/file_system.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Env; class Logger; // SstFileManager is used to track SST files in the DB and control their // deletion rate. // All SstFileManager public functions are thread-safe. // SstFileManager is not extensible. class SstFileManager { public: virtual ~SstFileManager() {} // Update the maximum allowed space that should be used by RocksDB, if // the total size of the SST files exceeds max_allowed_space, writes to // RocksDB will fail. // // Setting max_allowed_space to 0 will disable this feature; maximum allowed // space will be infinite (Default value). // // thread-safe. virtual void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) = 0; // Set the amount of buffer room each compaction should be able to leave. // In other words, at its maximum disk space consumption, the compaction // should still leave compaction_buffer_size available on the disk so that // other background functions may continue, such as logging and flushing. virtual void SetCompactionBufferSize(uint64_t compaction_buffer_size) = 0; // Return true if the total size of SST files exceeded the maximum allowed // space usage. // // thread-safe. virtual bool IsMaxAllowedSpaceReached() = 0; // Returns true if the total size of SST files as well as estimated size // of ongoing compactions exceeds the maximums allowed space usage. virtual bool IsMaxAllowedSpaceReachedIncludingCompactions() = 0; // Return the total size of all tracked files. // thread-safe virtual uint64_t GetTotalSize() = 0; // Return a map containing all tracked files and their corresponding sizes. // thread-safe virtual std::unordered_map GetTrackedFiles() = 0; // Return delete rate limit in bytes per second. // thread-safe virtual int64_t GetDeleteRateBytesPerSecond() = 0; // Update the delete rate limit in bytes per second. // zero means disable delete rate limiting and delete files immediately // thread-safe virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) = 0; // Return trash/DB size ratio where new files will be deleted immediately // thread-safe virtual double GetMaxTrashDBRatio() = 0; // Update trash/DB size ratio where new files will be deleted immediately // thread-safe virtual void SetMaxTrashDBRatio(double ratio) = 0; // Return the total size of trash files // thread-safe virtual uint64_t GetTotalTrashSize() = 0; // Set the statistics ptr to dump the stat information virtual void SetStatisticsPtr(const std::shared_ptr& stats) = 0; }; // Create a new SstFileManager that can be shared among multiple RocksDB // instances to track SST file and control there deletion rate. // Even though SstFileManager don't track WAL files but it still control // there deletion rate. // // @param env: Pointer to Env object, please see "rocksdb/env.h". // @param fs: Pointer to FileSystem object (rocksdb/file_system.h" // @param info_log: If not nullptr, info_log will be used to log errors. // // == Deletion rate limiting specific arguments == // @param trash_dir: Deprecated, this argument have no effect // @param rate_bytes_per_sec: How many bytes should be deleted per second, If // this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb // in 1 second, we will wait for another 3 seconds before we delete other // files, Set to 0 to disable deletion rate limiting. // This option also affect the delete rate of WAL files in the DB. // @param delete_existing_trash: Deprecated, this argument have no effect, but // if user provide trash_dir we will schedule deletes for files in the dir // @param status: If not nullptr, status will contain any errors that happened // during creating the missing trash_dir or deleting existing files in trash. // @param max_trash_db_ratio: If the trash size constitutes for more than this // fraction of the total DB size we will start deleting new files passed to // DeleteScheduler immediately // @param bytes_max_delete_chunk: if a file to delete is larger than delete // chunk, ftruncate the file by this size each time, rather than dropping the // whole file. 0 means to always delete the whole file. If the file has more // than one linked names, the file will be deleted as a whole. Either way, // `rate_bytes_per_sec` will be appreciated. NOTE that with this option, // files already renamed as a trash may be partial, so users should not // directly recover them without checking. extern SstFileManager* NewSstFileManager( Env* env, std::shared_ptr fs, std::shared_ptr info_log = nullptr, const std::string& trash_dir = "", int64_t rate_bytes_per_sec = 0, bool delete_existing_trash = true, Status* status = nullptr, double max_trash_db_ratio = 0.25, uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024); // Same as above, but takes a pointer to a legacy Env object, instead of // Env and FileSystem objects extern SstFileManager* NewSstFileManager( Env* env, std::shared_ptr info_log = nullptr, std::string trash_dir = "", int64_t rate_bytes_per_sec = 0, bool delete_existing_trash = true, Status* status = nullptr, double max_trash_db_ratio = 0.25, uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/sst_file_reader.h000066400000000000000000000025311370372246700216230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/table_properties.h" namespace ROCKSDB_NAMESPACE { // SstFileReader is used to read sst files that are generated by DB or // SstFileWriter. class SstFileReader { public: SstFileReader(const Options& options); ~SstFileReader(); // Prepares to read from the file located at "file_path". Status Open(const std::string& file_path); // Returns a new iterator over the table contents. // Most read options provide the same control as we read from DB. // If "snapshot" is nullptr, the iterator returns only the latest keys. Iterator* NewIterator(const ReadOptions& options); std::shared_ptr GetTableProperties() const; // Verifies whether there is corruption in this table. Status VerifyChecksum(const ReadOptions& /*read_options*/); Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } private: struct Rep; std::unique_ptr rep_; }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/sst_file_writer.h000066400000000000000000000130661370372246700217020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/table_properties.h" #include "rocksdb/types.h" #if defined(__GNUC__) || defined(__clang__) #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__)) #elif _WIN32 #define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated) #endif namespace ROCKSDB_NAMESPACE { class Comparator; // ExternalSstFileInfo include information about sst files created // using SstFileWriter. struct ExternalSstFileInfo { ExternalSstFileInfo() : file_path(""), smallest_key(""), largest_key(""), smallest_range_del_key(""), largest_range_del_key(""), file_checksum(""), file_checksum_func_name(""), sequence_number(0), file_size(0), num_entries(0), num_range_del_entries(0), version(0) {} ExternalSstFileInfo(const std::string& _file_path, const std::string& _smallest_key, const std::string& _largest_key, SequenceNumber _sequence_number, uint64_t _file_size, int32_t _num_entries, int32_t _version) : file_path(_file_path), smallest_key(_smallest_key), largest_key(_largest_key), smallest_range_del_key(""), largest_range_del_key(""), file_checksum(""), file_checksum_func_name(""), sequence_number(_sequence_number), file_size(_file_size), num_entries(_num_entries), num_range_del_entries(0), version(_version) {} std::string file_path; // external sst file path std::string smallest_key; // smallest user key in file std::string largest_key; // largest user key in file std::string smallest_range_del_key; // smallest range deletion user key in file std::string largest_range_del_key; // largest range deletion user key in file std::string file_checksum; // sst file checksum; std::string file_checksum_func_name; // The name of file checksum function SequenceNumber sequence_number; // sequence number of all keys in file uint64_t file_size; // file size in bytes uint64_t num_entries; // number of entries in file uint64_t num_range_del_entries; // number of range deletion entries in file int32_t version; // file version }; // SstFileWriter is used to create sst files that can be added to database later // All keys in files generated by SstFileWriter will have sequence number = 0. class SstFileWriter { public: // User can pass `column_family` to specify that the generated file will // be ingested into this column_family, note that passing nullptr means that // the column_family is unknown. // If invalidate_page_cache is set to true, SstFileWriter will give the OS a // hint that this file pages is not needed every time we write 1MB to the // file. To use the rate limiter an io_priority smaller than IO_TOTAL can be // passed. SstFileWriter(const EnvOptions& env_options, const Options& options, ColumnFamilyHandle* column_family = nullptr, bool invalidate_page_cache = true, Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL, bool skip_filters = false) : SstFileWriter(env_options, options, options.comparator, column_family, invalidate_page_cache, io_priority, skip_filters) {} // Deprecated API SstFileWriter(const EnvOptions& env_options, const Options& options, const Comparator* user_comparator, ColumnFamilyHandle* column_family = nullptr, bool invalidate_page_cache = true, Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL, bool skip_filters = false); ~SstFileWriter(); // Prepare SstFileWriter to write into file located at "file_path". Status Open(const std::string& file_path); // Add a Put key with value to currently opened file (deprecated) // REQUIRES: key is after any previously added key according to comparator. ROCKSDB_DEPRECATED_FUNC Status Add(const Slice& user_key, const Slice& value); // Add a Put key with value to currently opened file // REQUIRES: key is after any previously added key according to comparator. Status Put(const Slice& user_key, const Slice& value); // Add a Merge key with value to currently opened file // REQUIRES: key is after any previously added key according to comparator. Status Merge(const Slice& user_key, const Slice& value); // Add a deletion key to currently opened file // REQUIRES: key is after any previously added key according to comparator. Status Delete(const Slice& user_key); // Add a range deletion tombstone to currently opened file Status DeleteRange(const Slice& begin_key, const Slice& end_key); // Finalize writing to sst file and close file. // // An optional ExternalSstFileInfo pointer can be passed to the function // which will be populated with information about the created sst file. Status Finish(ExternalSstFileInfo* file_info = nullptr); // Return the current file size. uint64_t FileSize(); private: void InvalidatePageCache(bool closing); struct Rep; std::unique_ptr rep_; }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/statistics.h000066400000000000000000000505131370372246700206660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include #include #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { /** * Keep adding ticker's here. * 1. Any ticker should be added before TICKER_ENUM_MAX. * 2. Add a readable string in TickersNameMap below for the newly added ticker. * 3. Add a corresponding enum value to TickerType.java in the java API * 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType * and toCppTickers */ enum Tickers : uint32_t { // total block cache misses // REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS + // BLOCK_CACHE_FILTER_MISS + // BLOCK_CACHE_DATA_MISS; BLOCK_CACHE_MISS = 0, // total block cache hit // REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT + // BLOCK_CACHE_FILTER_HIT + // BLOCK_CACHE_DATA_HIT; BLOCK_CACHE_HIT, // # of blocks added to block cache. BLOCK_CACHE_ADD, // # of failures when adding blocks to block cache. BLOCK_CACHE_ADD_FAILURES, // # of times cache miss when accessing index block from block cache. BLOCK_CACHE_INDEX_MISS, // # of times cache hit when accessing index block from block cache. BLOCK_CACHE_INDEX_HIT, // # of index blocks added to block cache. BLOCK_CACHE_INDEX_ADD, // # of bytes of index blocks inserted into cache BLOCK_CACHE_INDEX_BYTES_INSERT, // # of bytes of index block erased from cache BLOCK_CACHE_INDEX_BYTES_EVICT, // # of times cache miss when accessing filter block from block cache. BLOCK_CACHE_FILTER_MISS, // # of times cache hit when accessing filter block from block cache. BLOCK_CACHE_FILTER_HIT, // # of filter blocks added to block cache. BLOCK_CACHE_FILTER_ADD, // # of bytes of bloom filter blocks inserted into cache BLOCK_CACHE_FILTER_BYTES_INSERT, // # of bytes of bloom filter block erased from cache BLOCK_CACHE_FILTER_BYTES_EVICT, // # of times cache miss when accessing data block from block cache. BLOCK_CACHE_DATA_MISS, // # of times cache hit when accessing data block from block cache. BLOCK_CACHE_DATA_HIT, // # of data blocks added to block cache. BLOCK_CACHE_DATA_ADD, // # of bytes of data blocks inserted into cache BLOCK_CACHE_DATA_BYTES_INSERT, // # of bytes read from cache. BLOCK_CACHE_BYTES_READ, // # of bytes written into cache. BLOCK_CACHE_BYTES_WRITE, // # of times bloom filter has avoided file reads, i.e., negatives. BLOOM_FILTER_USEFUL, // # of times bloom FullFilter has not avoided the reads. BLOOM_FILTER_FULL_POSITIVE, // # of times bloom FullFilter has not avoided the reads and data actually // exist. BLOOM_FILTER_FULL_TRUE_POSITIVE, BLOOM_FILTER_MICROS, // # persistent cache hit PERSISTENT_CACHE_HIT, // # persistent cache miss PERSISTENT_CACHE_MISS, // # total simulation block cache hits SIM_BLOCK_CACHE_HIT, // # total simulation block cache misses SIM_BLOCK_CACHE_MISS, // # of memtable hits. MEMTABLE_HIT, // # of memtable misses. MEMTABLE_MISS, // # of Get() queries served by L0 GET_HIT_L0, // # of Get() queries served by L1 GET_HIT_L1, // # of Get() queries served by L2 and up GET_HIT_L2_AND_UP, /** * COMPACTION_KEY_DROP_* count the reasons for key drop during compaction * There are 4 reasons currently. */ COMPACTION_KEY_DROP_NEWER_ENTRY, // key was written with a newer value. // Also includes keys dropped for range del. COMPACTION_KEY_DROP_OBSOLETE, // The key is obsolete. COMPACTION_KEY_DROP_RANGE_DEL, // key was covered by a range tombstone. COMPACTION_KEY_DROP_USER, // user compaction function has dropped the key. COMPACTION_RANGE_DEL_DROP_OBSOLETE, // all keys in range were deleted. // Deletions obsoleted before bottom level due to file gap optimization. COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, // If a compaction was cancelled in sfm to prevent ENOSPC COMPACTION_CANCELLED, // Number of keys written to the database via the Put and Write call's NUMBER_KEYS_WRITTEN, // Number of Keys read, NUMBER_KEYS_READ, // Number keys updated, if inplace update is enabled NUMBER_KEYS_UPDATED, // The number of uncompressed bytes issued by DB::Put(), DB::Delete(), // DB::Merge(), and DB::Write(). BYTES_WRITTEN, // The number of uncompressed bytes read from DB::Get(). It could be // either from memtables, cache, or table files. // For the number of logical bytes read from DB::MultiGet(), // please use NUMBER_MULTIGET_BYTES_READ. BYTES_READ, // The number of calls to seek/next/prev NUMBER_DB_SEEK, NUMBER_DB_NEXT, NUMBER_DB_PREV, // The number of calls to seek/next/prev that returned data NUMBER_DB_SEEK_FOUND, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, // The number of uncompressed bytes read from an iterator. // Includes size of key and value. ITER_BYTES_READ, NO_FILE_CLOSES, NO_FILE_OPENS, NO_FILE_ERRORS, // DEPRECATED Time system had to wait to do LO-L1 compactions STALL_L0_SLOWDOWN_MICROS, // DEPRECATED Time system had to wait to move memtable to L1. STALL_MEMTABLE_COMPACTION_MICROS, // DEPRECATED write throttle because of too many files in L0 STALL_L0_NUM_FILES_MICROS, // Writer has to wait for compaction or flush to finish. STALL_MICROS, // The wait time for db mutex. // Disabled by default. To enable it set stats level to kAll DB_MUTEX_WAIT_MICROS, RATE_LIMIT_DELAY_MILLIS, // DEPRECATED number of iterators currently open NO_ITERATORS, // Number of MultiGet calls, keys read, and bytes read NUMBER_MULTIGET_CALLS, NUMBER_MULTIGET_KEYS_READ, NUMBER_MULTIGET_BYTES_READ, // Number of deletes records that were not required to be // written to storage because key does not exist NUMBER_FILTERED_DELETES, NUMBER_MERGE_FAILURES, // number of times bloom was checked before creating iterator on a // file, and the number of times the check was useful in avoiding // iterator creation (and thus likely IOPs). BLOOM_FILTER_PREFIX_CHECKED, BLOOM_FILTER_PREFIX_USEFUL, // Number of times we had to reseek inside an iteration to skip // over large number of keys with same userkey. NUMBER_OF_RESEEKS_IN_ITERATION, // Record the number of calls to GetUpadtesSince. Useful to keep track of // transaction log iterator refreshes GET_UPDATES_SINCE_CALLS, BLOCK_CACHE_COMPRESSED_MISS, // miss in the compressed block cache BLOCK_CACHE_COMPRESSED_HIT, // hit in the compressed block cache // Number of blocks added to compressed block cache BLOCK_CACHE_COMPRESSED_ADD, // Number of failures when adding blocks to compressed block cache BLOCK_CACHE_COMPRESSED_ADD_FAILURES, WAL_FILE_SYNCED, // Number of times WAL sync is done WAL_FILE_BYTES, // Number of bytes written to WAL // Writes can be processed by requesting thread or by the thread at the // head of the writers queue. WRITE_DONE_BY_SELF, WRITE_DONE_BY_OTHER, // Equivalent to writes done for others WRITE_TIMEDOUT, // Number of writes ending up with timed-out. WRITE_WITH_WAL, // Number of Write calls that request WAL COMPACT_READ_BYTES, // Bytes read during compaction COMPACT_WRITE_BYTES, // Bytes written during compaction FLUSH_WRITE_BYTES, // Bytes written during flush // Number of table's properties loaded directly from file, without creating // table reader object. NUMBER_DIRECT_LOAD_TABLE_PROPERTIES, NUMBER_SUPERVERSION_ACQUIRES, NUMBER_SUPERVERSION_RELEASES, NUMBER_SUPERVERSION_CLEANUPS, // # of compressions/decompressions executed NUMBER_BLOCK_COMPRESSED, NUMBER_BLOCK_DECOMPRESSED, NUMBER_BLOCK_NOT_COMPRESSED, MERGE_OPERATION_TOTAL_TIME, FILTER_OPERATION_TOTAL_TIME, // Row cache. ROW_CACHE_HIT, ROW_CACHE_MISS, // Read amplification statistics. // Read amplification can be calculated using this formula // (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES) // // REQUIRES: ReadOptions::read_amp_bytes_per_bit to be enabled READ_AMP_ESTIMATE_USEFUL_BYTES, // Estimate of total bytes actually used. READ_AMP_TOTAL_READ_BYTES, // Total size of loaded data blocks. // Number of refill intervals where rate limiter's bytes are fully consumed. NUMBER_RATE_LIMITER_DRAINS, // Number of internal keys skipped by Iterator NUMBER_ITER_SKIP, // BlobDB specific stats // # of Put/PutTTL/PutUntil to BlobDB. BLOB_DB_NUM_PUT, // # of Write to BlobDB. BLOB_DB_NUM_WRITE, // # of Get to BlobDB. BLOB_DB_NUM_GET, // # of MultiGet to BlobDB. BLOB_DB_NUM_MULTIGET, // # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. BLOB_DB_NUM_SEEK, // # of Next to BlobDB iterator. BLOB_DB_NUM_NEXT, // # of Prev to BlobDB iterator. BLOB_DB_NUM_PREV, // # of keys written to BlobDB. BLOB_DB_NUM_KEYS_WRITTEN, // # of keys read from BlobDB. BLOB_DB_NUM_KEYS_READ, // # of bytes (key + value) written to BlobDB. BLOB_DB_BYTES_WRITTEN, // # of bytes (keys + value) read from BlobDB. BLOB_DB_BYTES_READ, // # of keys written by BlobDB as non-TTL inlined value. BLOB_DB_WRITE_INLINED, // # of keys written by BlobDB as TTL inlined value. BLOB_DB_WRITE_INLINED_TTL, // # of keys written by BlobDB as non-TTL blob value. BLOB_DB_WRITE_BLOB, // # of keys written by BlobDB as TTL blob value. BLOB_DB_WRITE_BLOB_TTL, // # of bytes written to blob file. BLOB_DB_BLOB_FILE_BYTES_WRITTEN, // # of bytes read from blob file. BLOB_DB_BLOB_FILE_BYTES_READ, // # of times a blob files being synced. BLOB_DB_BLOB_FILE_SYNCED, // # of blob index evicted from base DB by BlobDB compaction filter because // of expiration. BLOB_DB_BLOB_INDEX_EXPIRED_COUNT, // size of blob index evicted from base DB by BlobDB compaction filter // because of expiration. BLOB_DB_BLOB_INDEX_EXPIRED_SIZE, // # of blob index evicted from base DB by BlobDB compaction filter because // of corresponding file deleted. BLOB_DB_BLOB_INDEX_EVICTED_COUNT, // size of blob index evicted from base DB by BlobDB compaction filter // because of corresponding file deleted. BLOB_DB_BLOB_INDEX_EVICTED_SIZE, // # of blob files that were obsoleted by garbage collection. BLOB_DB_GC_NUM_FILES, // # of blob files generated by garbage collection. BLOB_DB_GC_NUM_NEW_FILES, // # of BlobDB garbage collection failures. BLOB_DB_GC_FAILURES, // # of keys dropped by BlobDB garbage collection because they had been // overwritten. DEPRECATED. BLOB_DB_GC_NUM_KEYS_OVERWRITTEN, // # of keys dropped by BlobDB garbage collection because of expiration. // DEPRECATED. BLOB_DB_GC_NUM_KEYS_EXPIRED, // # of keys relocated to new blob file by garbage collection. BLOB_DB_GC_NUM_KEYS_RELOCATED, // # of bytes dropped by BlobDB garbage collection because they had been // overwritten. DEPRECATED. BLOB_DB_GC_BYTES_OVERWRITTEN, // # of bytes dropped by BlobDB garbage collection because of expiration. // DEPRECATED. BLOB_DB_GC_BYTES_EXPIRED, // # of bytes relocated to new blob file by garbage collection. BLOB_DB_GC_BYTES_RELOCATED, // # of blob files evicted because of BlobDB is full. BLOB_DB_FIFO_NUM_FILES_EVICTED, // # of keys in the blob files evicted because of BlobDB is full. BLOB_DB_FIFO_NUM_KEYS_EVICTED, // # of bytes in the blob files evicted because of BlobDB is full. BLOB_DB_FIFO_BYTES_EVICTED, // These counters indicate a performance issue in WritePrepared transactions. // We should not seem them ticking them much. // # of times prepare_mutex_ is acquired in the fast path. TXN_PREPARE_MUTEX_OVERHEAD, // # of times old_commit_map_mutex_ is acquired in the fast path. TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD, // # of times we checked a batch for duplicate keys. TXN_DUPLICATE_KEY_OVERHEAD, // # of times snapshot_mutex_ is acquired in the fast path. TXN_SNAPSHOT_MUTEX_OVERHEAD, // # of times ::Get returned TryAgain due to expired snapshot seq TXN_GET_TRY_AGAIN, // Number of keys actually found in MultiGet calls (vs number requested by // caller) // NUMBER_MULTIGET_KEYS_READ gives the number requested by caller NUMBER_MULTIGET_KEYS_FOUND, NO_ITERATOR_CREATED, // number of iterators created NO_ITERATOR_DELETED, // number of iterators deleted BLOCK_CACHE_COMPRESSION_DICT_MISS, BLOCK_CACHE_COMPRESSION_DICT_HIT, BLOCK_CACHE_COMPRESSION_DICT_ADD, BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT, BLOCK_CACHE_COMPRESSION_DICT_BYTES_EVICT, // # of blocks redundantly inserted into block cache. // REQUIRES: BLOCK_CACHE_ADD_REDUNDANT <= BLOCK_CACHE_ADD BLOCK_CACHE_ADD_REDUNDANT, // # of index blocks redundantly inserted into block cache. // REQUIRES: BLOCK_CACHE_INDEX_ADD_REDUNDANT <= BLOCK_CACHE_INDEX_ADD BLOCK_CACHE_INDEX_ADD_REDUNDANT, // # of filter blocks redundantly inserted into block cache. // REQUIRES: BLOCK_CACHE_FILTER_ADD_REDUNDANT <= BLOCK_CACHE_FILTER_ADD BLOCK_CACHE_FILTER_ADD_REDUNDANT, // # of data blocks redundantly inserted into block cache. // REQUIRES: BLOCK_CACHE_DATA_ADD_REDUNDANT <= BLOCK_CACHE_DATA_ADD BLOCK_CACHE_DATA_ADD_REDUNDANT, // # of dict blocks redundantly inserted into block cache. // REQUIRES: BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT // <= BLOCK_CACHE_COMPRESSION_DICT_ADD BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT, // # of files marked as trash by sst file manager and will be deleted // later by background thread. FILES_MARKED_TRASH, // # of files deleted immediately by sst file manger through delete scheduler. FILES_DELETED_IMMEDIATELY, TICKER_ENUM_MAX }; // The order of items listed in Tickers should be the same as // the order listed in TickersNameMap extern const std::vector> TickersNameMap; /** * Keep adding histogram's here. * Any histogram should have value less than HISTOGRAM_ENUM_MAX * Add a new Histogram by assigning it the current value of HISTOGRAM_ENUM_MAX * Add a string representation in HistogramsNameMap below * And increment HISTOGRAM_ENUM_MAX * Add a corresponding enum value to HistogramType.java in the java API */ enum Histograms : uint32_t { DB_GET = 0, DB_WRITE, COMPACTION_TIME, COMPACTION_CPU_TIME, SUBCOMPACTION_SETUP_TIME, TABLE_SYNC_MICROS, COMPACTION_OUTFILE_SYNC_MICROS, WAL_FILE_SYNC_MICROS, MANIFEST_FILE_SYNC_MICROS, // TIME SPENT IN IO DURING TABLE OPEN TABLE_OPEN_IO_MICROS, DB_MULTIGET, READ_BLOCK_COMPACTION_MICROS, READ_BLOCK_GET_MICROS, WRITE_RAW_BLOCK_MICROS, STALL_L0_SLOWDOWN_COUNT, STALL_MEMTABLE_COMPACTION_COUNT, STALL_L0_NUM_FILES_COUNT, HARD_RATE_LIMIT_DELAY_COUNT, SOFT_RATE_LIMIT_DELAY_COUNT, NUM_FILES_IN_SINGLE_COMPACTION, DB_SEEK, WRITE_STALL, SST_READ_MICROS, // The number of subcompactions actually scheduled during a compaction NUM_SUBCOMPACTIONS_SCHEDULED, // Value size distribution in each operation BYTES_PER_READ, BYTES_PER_WRITE, BYTES_PER_MULTIGET, // number of bytes compressed/decompressed // number of bytes is when uncompressed; i.e. before/after respectively BYTES_COMPRESSED, BYTES_DECOMPRESSED, COMPRESSION_TIMES_NANOS, DECOMPRESSION_TIMES_NANOS, // Number of merge operands passed to the merge operator in user read // requests. READ_NUM_MERGE_OPERANDS, // BlobDB specific stats // Size of keys written to BlobDB. BLOB_DB_KEY_SIZE, // Size of values written to BlobDB. BLOB_DB_VALUE_SIZE, // BlobDB Put/PutWithTTL/PutUntil/Write latency. BLOB_DB_WRITE_MICROS, // BlobDB Get lagency. BLOB_DB_GET_MICROS, // BlobDB MultiGet latency. BLOB_DB_MULTIGET_MICROS, // BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. BLOB_DB_SEEK_MICROS, // BlobDB Next latency. BLOB_DB_NEXT_MICROS, // BlobDB Prev latency. BLOB_DB_PREV_MICROS, // Blob file write latency. BLOB_DB_BLOB_FILE_WRITE_MICROS, // Blob file read latency. BLOB_DB_BLOB_FILE_READ_MICROS, // Blob file sync latency. BLOB_DB_BLOB_FILE_SYNC_MICROS, // BlobDB garbage collection time. DEPRECATED. BLOB_DB_GC_MICROS, // BlobDB compression time. BLOB_DB_COMPRESSION_MICROS, // BlobDB decompression time. BLOB_DB_DECOMPRESSION_MICROS, // Time spent flushing memtable to disk FLUSH_TIME, SST_BATCH_SIZE, HISTOGRAM_ENUM_MAX, }; extern const std::vector> HistogramsNameMap; struct HistogramData { double median; double percentile95; double percentile99; double average; double standard_deviation; // zero-initialize new members since old Statistics::histogramData() // implementations won't write them. double max = 0.0; uint64_t count = 0; uint64_t sum = 0; double min = 0.0; }; // StatsLevel can be used to reduce statistics overhead by skipping certain // types of stats in the stats collection process. // Usage: // options.statistics->set_stats_level(StatsLevel::kExceptTimeForMutex); enum StatsLevel : uint8_t { // Disable timer stats, and skip histogram stats kExceptHistogramOrTimers, // Skip timer stats kExceptTimers, // Collect all stats except time inside mutex lock AND time spent on // compression. kExceptDetailedTimers, // Collect all stats except the counters requiring to get time inside the // mutex lock. kExceptTimeForMutex, // Collect all stats, including measuring duration of mutex operations. // If getting time is expensive on the platform to run, it can // reduce scalability to more threads, especially for writes. kAll, }; // Analyze the performance of a db by providing cumulative stats over time. // Usage: // Options options; // options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); // Status s = DB::Open(options, kDBPath, &db); // ... // options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); // HistogramData hist; // options.statistics->histogramData(FLUSH_TIME, &hist); class Statistics { public: virtual ~Statistics() {} static const char* Type() { return "Statistics"; } virtual uint64_t getTickerCount(uint32_t tickerType) const = 0; virtual void histogramData(uint32_t type, HistogramData* const data) const = 0; virtual std::string getHistogramString(uint32_t /*type*/) const { return ""; } virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0; virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0; virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0; virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) { if (get_stats_level() <= StatsLevel::kExceptTimers) { return; } recordInHistogram(histogramType, time); } // The function is here only for backward compatibility reason. // Users implementing their own Statistics class should override // recordInHistogram() instead and leave measureTime() as it is. virtual void measureTime(uint32_t /*histogramType*/, uint64_t /*time*/) { // This is not supposed to be called. assert(false); } virtual void recordInHistogram(uint32_t histogramType, uint64_t time) { // measureTime() is the old and inaccurate function name. // To keep backward compatible. If users implement their own // statistics, which overrides measureTime() but doesn't override // this function. We forward to measureTime(). measureTime(histogramType, time); } // Resets all ticker and histogram stats virtual Status Reset() { return Status::NotSupported("Not implemented"); } // String representation of the statistic object. virtual std::string ToString() const { // Do nothing by default return std::string("ToString(): not implemented"); } virtual bool getTickerMap(std::map*) const { // Do nothing by default return false; } // Override this function to disable particular histogram collection virtual bool HistEnabledForType(uint32_t type) const { return type < HISTOGRAM_ENUM_MAX; } void set_stats_level(StatsLevel sl) { stats_level_.store(sl, std::memory_order_relaxed); } StatsLevel get_stats_level() const { return stats_level_.load(std::memory_order_relaxed); } private: std::atomic stats_level_{kExceptDetailedTimers}; }; // Create a concrete DBStatistics object std::shared_ptr CreateDBStatistics(); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/stats_history.h000066400000000000000000000047721370372246700214210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "rocksdb/statistics.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class DBImpl; // StatsHistoryIterator is the main interface for users to programmatically // access statistics snapshots that was automatically stored by RocksDB. // Depending on options, the stats can be in memory or on disk. // The stats snapshots are indexed by time that they were recorded, and each // stats snapshot contains individual stat name and value at the time of // recording. // Example: // std::unique_ptr stats_iter; // Status s = db->GetStatsHistory(0 /* start_time */, // env->NowMicros() /* end_time*/, // &stats_iter); // if (s.ok) { // for (; stats_iter->Valid(); stats_iter->Next()) { // uint64_t stats_time = stats_iter->GetStatsTime(); // const std::map& stats_map = // stats_iter->GetStatsMap(); // process(stats_time, stats_map); // } // } class StatsHistoryIterator { public: StatsHistoryIterator() {} virtual ~StatsHistoryIterator() {} virtual bool Valid() const = 0; // Moves to the next stats history record. After this call, Valid() is // true iff the iterator was not positioned at the last entry in the source. // REQUIRES: Valid() virtual void Next() = 0; // Return the time stamp (in seconds) when stats history is recorded. // REQUIRES: Valid() virtual uint64_t GetStatsTime() const = 0; virtual int GetFormatVersion() const { return -1; } // Return the current stats history as an std::map which specifies the // mapping from stats name to stats value . The underlying storage // for the returned map is valid only until the next modification of // the iterator. // REQUIRES: Valid() virtual const std::map& GetStatsMap() const = 0; // If an error has occurred, return it. Else return an ok status. virtual Status status() const = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/status.h000066400000000000000000000435661370372246700200310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // A Status encapsulates the result of an operation. It may indicate success, // or it may indicate an error with an associated error message. // // Multiple threads can invoke const methods on a Status without // external synchronization, but if any of the threads may call a // non-const method, all threads accessing the same Status must use // external synchronization. #pragma once #ifdef ROCKSDB_ASSERT_STATUS_CHECKED #include #include #endif #include #ifdef ROCKSDB_ASSERT_STATUS_CHECKED #include "port/stack_trace.h" #endif #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { class Status { public: // Create a success status. Status() : code_(kOk), subcode_(kNone), sev_(kNoError), state_(nullptr) {} ~Status() { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED if (!checked_) { fprintf(stderr, "Failed to check Status\n"); port::PrintStack(); abort(); } #endif // ROCKSDB_ASSERT_STATUS_CHECKED delete[] state_; } // Copy the specified status. Status(const Status& s); Status& operator=(const Status& s); Status(Status&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif ; Status& operator=(Status&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif ; bool operator==(const Status& rhs) const; bool operator!=(const Status& rhs) const; // In case of intentionally swallowing an error, user must explicitly call // this function. That way we are easily able to search the code to find where // error swallowing occurs. void PermitUncheckedError() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED } enum Code : unsigned char { kOk = 0, kNotFound = 1, kCorruption = 2, kNotSupported = 3, kInvalidArgument = 4, kIOError = 5, kMergeInProgress = 6, kIncomplete = 7, kShutdownInProgress = 8, kTimedOut = 9, kAborted = 10, kBusy = 11, kExpired = 12, kTryAgain = 13, kCompactionTooLarge = 14, kColumnFamilyDropped = 15, kMaxCode }; Code code() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code_; } enum SubCode : unsigned char { kNone = 0, kMutexTimeout = 1, kLockTimeout = 2, kLockLimit = 3, kNoSpace = 4, kDeadlock = 5, kStaleFile = 6, kMemoryLimit = 7, kSpaceLimit = 8, kPathNotFound = 9, KMergeOperandsInsufficientCapacity = 10, kManualCompactionPaused = 11, kOverwritten = 12, kTxnNotPrepared = 13, kMaxSubCode }; SubCode subcode() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return subcode_; } enum Severity : unsigned char { kNoError = 0, kSoftError = 1, kHardError = 2, kFatalError = 3, kUnrecoverableError = 4, kMaxSeverity }; Status(const Status& s, Severity sev); Severity severity() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return sev_; } // Returns a C style string indicating the message of the Status const char* getState() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return state_; } // Return a success status. static Status OK() { return Status(); } // Successful, though an existing something was overwritten // Note: using variants of OK status for program logic is discouraged, // but it can be useful for communicating statistical information without // changing public APIs. static Status OkOverwritten() { return Status(kOk, kOverwritten); } // Return error status of an appropriate type. static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kNotFound, msg, msg2); } // Fast path for not found without malloc; static Status NotFound(SubCode msg = kNone) { return Status(kNotFound, msg); } static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kCorruption, msg, msg2); } static Status Corruption(SubCode msg = kNone) { return Status(kCorruption, msg); } static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kNotSupported, msg, msg2); } static Status NotSupported(SubCode msg = kNone) { return Status(kNotSupported, msg); } static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kInvalidArgument, msg, msg2); } static Status InvalidArgument(SubCode msg = kNone) { return Status(kInvalidArgument, msg); } static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kIOError, msg, msg2); } static Status IOError(SubCode msg = kNone) { return Status(kIOError, msg); } static Status MergeInProgress(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kMergeInProgress, msg, msg2); } static Status MergeInProgress(SubCode msg = kNone) { return Status(kMergeInProgress, msg); } static Status Incomplete(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kIncomplete, msg, msg2); } static Status Incomplete(SubCode msg = kNone) { return Status(kIncomplete, msg); } static Status ShutdownInProgress(SubCode msg = kNone) { return Status(kShutdownInProgress, msg); } static Status ShutdownInProgress(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kShutdownInProgress, msg, msg2); } static Status Aborted(SubCode msg = kNone) { return Status(kAborted, msg); } static Status Aborted(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kAborted, msg, msg2); } static Status Busy(SubCode msg = kNone) { return Status(kBusy, msg); } static Status Busy(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kBusy, msg, msg2); } static Status TimedOut(SubCode msg = kNone) { return Status(kTimedOut, msg); } static Status TimedOut(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kTimedOut, msg, msg2); } static Status Expired(SubCode msg = kNone) { return Status(kExpired, msg); } static Status Expired(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kExpired, msg, msg2); } static Status TryAgain(SubCode msg = kNone) { return Status(kTryAgain, msg); } static Status TryAgain(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kTryAgain, msg, msg2); } static Status CompactionTooLarge(SubCode msg = kNone) { return Status(kCompactionTooLarge, msg); } static Status CompactionTooLarge(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kCompactionTooLarge, msg, msg2); } static Status ColumnFamilyDropped(SubCode msg = kNone) { return Status(kColumnFamilyDropped, msg); } static Status ColumnFamilyDropped(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kColumnFamilyDropped, msg, msg2); } static Status NoSpace() { return Status(kIOError, kNoSpace); } static Status NoSpace(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kIOError, kNoSpace, msg, msg2); } static Status MemoryLimit() { return Status(kAborted, kMemoryLimit); } static Status MemoryLimit(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kAborted, kMemoryLimit, msg, msg2); } static Status SpaceLimit() { return Status(kIOError, kSpaceLimit); } static Status SpaceLimit(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kIOError, kSpaceLimit, msg, msg2); } static Status PathNotFound() { return Status(kIOError, kPathNotFound); } static Status PathNotFound(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kIOError, kPathNotFound, msg, msg2); } static Status TxnNotPrepared() { return Status(kInvalidArgument, kTxnNotPrepared); } static Status TxnNotPrepared(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kInvalidArgument, kTxnNotPrepared, msg, msg2); } // Returns true iff the status indicates success. bool ok() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kOk; } // Returns true iff the status indicates success *with* something // overwritten bool IsOkOverwritten() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kOk && subcode() == kOverwritten; } // Returns true iff the status indicates a NotFound error. bool IsNotFound() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kNotFound; } // Returns true iff the status indicates a Corruption error. bool IsCorruption() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kCorruption; } // Returns true iff the status indicates a NotSupported error. bool IsNotSupported() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kNotSupported; } // Returns true iff the status indicates an InvalidArgument error. bool IsInvalidArgument() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kInvalidArgument; } // Returns true iff the status indicates an IOError. bool IsIOError() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kIOError; } // Returns true iff the status indicates an MergeInProgress. bool IsMergeInProgress() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kMergeInProgress; } // Returns true iff the status indicates Incomplete bool IsIncomplete() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kIncomplete; } // Returns true iff the status indicates Shutdown In progress bool IsShutdownInProgress() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kShutdownInProgress; } bool IsTimedOut() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kTimedOut; } bool IsAborted() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kAborted; } bool IsLockLimit() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kAborted && subcode() == kLockLimit; } // Returns true iff the status indicates that a resource is Busy and // temporarily could not be acquired. bool IsBusy() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kBusy; } bool IsDeadlock() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kBusy && subcode() == kDeadlock; } // Returns true iff the status indicated that the operation has Expired. bool IsExpired() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kExpired; } // Returns true iff the status indicates a TryAgain error. // This usually means that the operation failed, but may succeed if // re-attempted. bool IsTryAgain() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kTryAgain; } // Returns true iff the status indicates the proposed compaction is too large bool IsCompactionTooLarge() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kCompactionTooLarge; } // Returns true iff the status indicates Column Family Dropped bool IsColumnFamilyDropped() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return code() == kColumnFamilyDropped; } // Returns true iff the status indicates a NoSpace error // This is caused by an I/O error returning the specific "out of space" // error condition. Stricto sensu, an NoSpace error is an I/O error // with a specific subcode, enabling users to take the appropriate action // if needed bool IsNoSpace() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return (code() == kIOError) && (subcode() == kNoSpace); } // Returns true iff the status indicates a memory limit error. There may be // cases where we limit the memory used in certain operations (eg. the size // of a write batch) in order to avoid out of memory exceptions. bool IsMemoryLimit() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return (code() == kAborted) && (subcode() == kMemoryLimit); } // Returns true iff the status indicates a PathNotFound error // This is caused by an I/O error returning the specific "no such file or // directory" error condition. A PathNotFound error is an I/O error with // a specific subcode, enabling users to take appropriate action if necessary bool IsPathNotFound() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return (code() == kIOError) && (subcode() == kPathNotFound); } // Returns true iff the status indicates manual compaction paused. This // is caused by a call to PauseManualCompaction bool IsManualCompactionPaused() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return (code() == kIncomplete) && (subcode() == kManualCompactionPaused); } // Returns true iff the status indicates a TxnNotPrepared error. bool IsTxnNotPrepared() const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return (code() == kInvalidArgument) && (subcode() == kTxnNotPrepared); } // Return a string representation of this status suitable for printing. // Returns the string "OK" for success. std::string ToString() const; protected: // A nullptr state_ (which is always the case for OK) means the message // is empty. // of the following form: // state_[0..3] == length of message // state_[4..] == message Code code_; SubCode subcode_; Severity sev_; const char* state_; #ifdef ROCKSDB_ASSERT_STATUS_CHECKED mutable bool checked_ = false; #endif // ROCKSDB_ASSERT_STATUS_CHECKED explicit Status(Code _code, SubCode _subcode = kNone) : code_(_code), subcode_(_subcode), sev_(kNoError), state_(nullptr) {} Status(Code _code, SubCode _subcode, const Slice& msg, const Slice& msg2); Status(Code _code, const Slice& msg, const Slice& msg2) : Status(_code, kNone, msg, msg2) {} static const char* CopyState(const char* s); }; inline Status::Status(const Status& s) : code_(s.code_), subcode_(s.subcode_), sev_(s.sev_) { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_); } inline Status::Status(const Status& s, Severity sev) : code_(s.code_), subcode_(s.subcode_), sev_(sev) { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_); } inline Status& Status::operator=(const Status& s) { if (this != &s) { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; checked_ = false; #endif // ROCKSDB_ASSERT_STATUS_CHECKED code_ = s.code_; subcode_ = s.subcode_; sev_ = s.sev_; delete[] state_; state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_); } return *this; } inline Status::Status(Status&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif : Status() { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED *this = std::move(s); } inline Status& Status::operator=(Status&& s) #if !(defined _MSC_VER) || ((defined _MSC_VER) && (_MSC_VER >= 1900)) noexcept #endif { if (this != &s) { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED s.checked_ = true; checked_ = false; #endif // ROCKSDB_ASSERT_STATUS_CHECKED code_ = std::move(s.code_); s.code_ = kOk; subcode_ = std::move(s.subcode_); s.subcode_ = kNone; sev_ = std::move(s.sev_); s.sev_ = kNoError; delete[] state_; state_ = nullptr; std::swap(state_, s.state_); } return *this; } inline bool Status::operator==(const Status& rhs) const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; rhs.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return (code_ == rhs.code_); } inline bool Status::operator!=(const Status& rhs) const { #ifdef ROCKSDB_ASSERT_STATUS_CHECKED checked_ = true; rhs.checked_ = true; #endif // ROCKSDB_ASSERT_STATUS_CHECKED return !(*this == rhs); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/table.h000066400000000000000000000661431370372246700175710ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Currently we support two types of tables: plain table and block-based table. // 1. Block-based table: this is the default table type that we inherited from // LevelDB, which was designed for storing data in hard disk or flash // device. // 2. Plain table: it is one of RocksDB's SST file format optimized // for low query latency on pure-memory or really low-latency media. // // A tutorial of rocksdb table formats is available here: // https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats // // Example code is also available // https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats#wiki-examples #pragma once #include #include #include #include "rocksdb/cache.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { // -- Block-based Table class FilterPolicy; class FlushBlockPolicyFactory; class PersistentCache; class RandomAccessFile; struct TableReaderOptions; struct TableBuilderOptions; class TableBuilder; class TableFactory; class TableReader; class WritableFileWriter; struct ConfigOptions; struct EnvOptions; enum ChecksumType : char { kNoChecksum = 0x0, kCRC32c = 0x1, kxxHash = 0x2, kxxHash64 = 0x3, }; // For advanced user only struct BlockBasedTableOptions { // @flush_block_policy_factory creates the instances of flush block policy. // which provides a configurable way to determine when to flush a block in // the block based tables. If not set, table builder will use the default // block flush policy, which cut blocks by block size (please refer to // `FlushBlockBySizePolicy`). std::shared_ptr flush_block_policy_factory; // TODO(kailiu) Temporarily disable this feature by making the default value // to be false. // // TODO(ajkr) we need to update names of variables controlling meta-block // caching as they should now apply to range tombstone and compression // dictionary meta-blocks, in addition to index and filter meta-blocks. // // Indicating if we'd put index/filter blocks to the block cache. // If not specified, each "table reader" object will pre-load index/filter // block during table initialization. bool cache_index_and_filter_blocks = false; // If cache_index_and_filter_blocks is enabled, cache index and filter // blocks with high priority. If set to true, depending on implementation of // block cache, index and filter blocks may be less likely to be evicted // than data blocks. bool cache_index_and_filter_blocks_with_high_priority = true; // if cache_index_and_filter_blocks is true and the below is true, then // filter and index blocks are stored in the cache, but a reference is // held in the "table reader" object so the blocks are pinned and only // evicted from cache when the table reader is freed. bool pin_l0_filter_and_index_blocks_in_cache = false; // If cache_index_and_filter_blocks is true and the below is true, then // the top-level index of partitioned filter and index blocks are stored in // the cache, but a reference is held in the "table reader" object so the // blocks are pinned and only evicted from cache when the table reader is // freed. This is not limited to l0 in LSM tree. bool pin_top_level_index_and_filter = true; // The index type that will be used for this table. enum IndexType : char { // A space efficient index block that is optimized for // binary-search-based index. kBinarySearch = 0x00, // The hash index, if enabled, will do the hash lookup when // `Options.prefix_extractor` is provided. kHashSearch = 0x01, // A two-level index implementation. Both levels are binary search indexes. kTwoLevelIndexSearch = 0x02, // Like kBinarySearch, but index also contains first key of each block. // This allows iterators to defer reading the block until it's actually // needed. May significantly reduce read amplification of short range scans. // Without it, iterator seek usually reads one block from each level-0 file // and from each level, which may be expensive. // Works best in combination with: // - IndexShorteningMode::kNoShortening, // - custom FlushBlockPolicy to cut blocks at some meaningful boundaries, // e.g. when prefix changes. // Makes the index significantly bigger (2x or more), especially when keys // are long. kBinarySearchWithFirstKey = 0x03, }; IndexType index_type = kBinarySearch; // The index type that will be used for the data block. enum DataBlockIndexType : char { kDataBlockBinarySearch = 0, // traditional block type kDataBlockBinaryAndHash = 1, // additional hash index }; DataBlockIndexType data_block_index_type = kDataBlockBinarySearch; // #entries/#buckets. It is valid only when data_block_hash_index_type is // kDataBlockBinaryAndHash. double data_block_hash_table_util_ratio = 0.75; // This option is now deprecated. No matter what value it is set to, // it will behave as if hash_index_allow_collision=true. bool hash_index_allow_collision = true; // Use the specified checksum type. Newly created table files will be // protected with this checksum type. Old table files will still be readable, // even though they have different checksum type. ChecksumType checksum = kCRC32c; // Disable block cache. If this is set to true, // then no block cache should be used, and the block_cache should // point to a nullptr object. bool no_block_cache = false; // If non-NULL use the specified cache for blocks. // If NULL, rocksdb will automatically create and use an 8MB internal cache. std::shared_ptr block_cache = nullptr; // If non-NULL use the specified cache for pages read from device // IF NULL, no page cache is used std::shared_ptr persistent_cache = nullptr; // If non-NULL use the specified cache for compressed blocks. // If NULL, rocksdb will not use a compressed block cache. // Note: though it looks similar to `block_cache`, RocksDB doesn't put the // same type of object there. std::shared_ptr block_cache_compressed = nullptr; // Approximate size of user data packed per block. Note that the // block size specified here corresponds to uncompressed data. The // actual size of the unit read from disk may be smaller if // compression is enabled. This parameter can be changed dynamically. size_t block_size = 4 * 1024; // This is used to close a block before it reaches the configured // 'block_size'. If the percentage of free space in the current block is less // than this specified number and adding a new record to the block will // exceed the configured block size, then this block will be closed and the // new record will be written to the next block. int block_size_deviation = 10; // Number of keys between restart points for delta encoding of keys. // This parameter can be changed dynamically. Most clients should // leave this parameter alone. The minimum value allowed is 1. Any smaller // value will be silently overwritten with 1. int block_restart_interval = 16; // Same as block_restart_interval but used for the index block. int index_block_restart_interval = 1; // Block size for partitioned metadata. Currently applied to indexes when // kTwoLevelIndexSearch is used and to filters when partition_filters is used. // Note: Since in the current implementation the filters and index partitions // are aligned, an index/filter block is created when either index or filter // block size reaches the specified limit. // Note: this limit is currently applied to only index blocks; a filter // partition is cut right after an index block is cut // TODO(myabandeh): remove the note above when filter partitions are cut // separately uint64_t metadata_block_size = 4096; // Note: currently this option requires kTwoLevelIndexSearch to be set as // well. // TODO(myabandeh): remove the note above once the limitation is lifted // Use partitioned full filters for each SST file. This option is // incompatible with block-based filters. bool partition_filters = false; // Use delta encoding to compress keys in blocks. // ReadOptions::pin_data requires this option to be disabled. // // Default: true bool use_delta_encoding = true; // If non-nullptr, use the specified filter policy to reduce disk reads. // Many applications will benefit from passing the result of // NewBloomFilterPolicy() here. std::shared_ptr filter_policy = nullptr; // If true, place whole keys in the filter (not just prefixes). // This must generally be true for gets to be efficient. bool whole_key_filtering = true; // Verify that decompressing the compressed block gives back the input. This // is a verification mode that we use to detect bugs in compression // algorithms. bool verify_compression = false; // If used, For every data block we load into memory, we will create a bitmap // of size ((block_size / `read_amp_bytes_per_bit`) / 8) bytes. This bitmap // will be used to figure out the percentage we actually read of the blocks. // // When this feature is used Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES and // Tickers::READ_AMP_TOTAL_READ_BYTES can be used to calculate the // read amplification using this formula // (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES) // // value => memory usage (percentage of loaded blocks memory) // 1 => 12.50 % // 2 => 06.25 % // 4 => 03.12 % // 8 => 01.56 % // 16 => 00.78 % // // Note: This number must be a power of 2, if not it will be sanitized // to be the next lowest power of 2, for example a value of 7 will be // treated as 4, a value of 19 will be treated as 16. // // Default: 0 (disabled) uint32_t read_amp_bytes_per_bit = 0; // We currently have five versions: // 0 -- This version is currently written out by all RocksDB's versions by // default. Can be read by really old RocksDB's. Doesn't support changing // checksum (default is CRC32). // 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default // checksum, like xxHash. It is written by RocksDB when // BlockBasedTableOptions::checksum is something other than kCRC32c. (version // 0 is silently upconverted) // 2 -- Can be read by RocksDB's versions since 3.10. Changes the way we // encode compressed blocks with LZ4, BZip2 and Zlib compression. If you // don't plan to run RocksDB before version 3.10, you should probably use // this. // 3 -- Can be read by RocksDB's versions since 5.15. Changes the way we // encode the keys in index blocks. If you don't plan to run RocksDB before // version 5.15, you should probably use this. // This option only affects newly written tables. When reading existing // tables, the information about version is read from the footer. // 4 -- Can be read by RocksDB's versions since 5.16. Changes the way we // encode the values in index blocks. If you don't plan to run RocksDB before // version 5.16 and you are using index_block_restart_interval > 1, you should // probably use this as it would reduce the index size. // This option only affects newly written tables. When reading existing // tables, the information about version is read from the footer. // 5 -- Can be read by RocksDB's versions since 6.6.0. Full and partitioned // filters use a generally faster and more accurate Bloom filter // implementation, with a different schema. uint32_t format_version = 4; // Store index blocks on disk in compressed format. Changing this option to // false will avoid the overhead of decompression if index blocks are evicted // and read back bool enable_index_compression = true; // Align data blocks on lesser of page size and block size bool block_align = false; // This enum allows trading off increased index size for improved iterator // seek performance in some situations, particularly when block cache is // disabled (ReadOptions::fill_cache = false) and direct IO is // enabled (DBOptions::use_direct_reads = true). // The default mode is the best tradeoff for most use cases. // This option only affects newly written tables. // // The index contains a key separating each pair of consecutive blocks. // Let A be the highest key in one block, B the lowest key in the next block, // and I the index entry separating these two blocks: // [ ... A] I [B ...] // I is allowed to be anywhere in [A, B). // If an iterator is seeked to a key in (A, I], we'll unnecessarily read the // first block, then immediately fall through to the second block. // However, if I=A, this can't happen, and we'll read only the second block. // In kNoShortening mode, we use I=A. In other modes, we use the shortest // key in [A, B), which usually significantly reduces index size. // // There's a similar story for the last index entry, which is an upper bound // of the highest key in the file. If it's shortened and therefore // overestimated, iterator is likely to unnecessarily read the last data block // from each file on each seek. enum class IndexShorteningMode : char { // Use full keys. kNoShortening, // Shorten index keys between blocks, but use full key for the last index // key, which is the upper bound of the whole file. kShortenSeparators, // Shorten both keys between blocks and key after last block. kShortenSeparatorsAndSuccessor, }; IndexShorteningMode index_shortening = IndexShorteningMode::kShortenSeparators; }; // Table Properties that are specific to block-based table properties. struct BlockBasedTablePropertyNames { // value of this properties is a fixed int32 number. static const std::string kIndexType; // value is "1" for true and "0" for false. static const std::string kWholeKeyFiltering; // value is "1" for true and "0" for false. static const std::string kPrefixFiltering; }; // Create default block based table factory. extern TableFactory* NewBlockBasedTableFactory( const BlockBasedTableOptions& table_options = BlockBasedTableOptions()); #ifndef ROCKSDB_LITE enum EncodingType : char { // Always write full keys without any special encoding. kPlain, // Find opportunity to write the same prefix once for multiple rows. // In some cases, when a key follows a previous key with the same prefix, // instead of writing out the full key, it just writes out the size of the // shared prefix, as well as other bytes, to save some bytes. // // When using this option, the user is required to use the same prefix // extractor to make sure the same prefix will be extracted from the same key. // The Name() value of the prefix extractor will be stored in the file. When // reopening the file, the name of the options.prefix_extractor given will be // bitwise compared to the prefix extractors stored in the file. An error // will be returned if the two don't match. kPrefix, }; // Table Properties that are specific to plain table properties. struct PlainTablePropertyNames { static const std::string kEncodingType; static const std::string kBloomVersion; static const std::string kNumBloomBlocks; }; const uint32_t kPlainTableVariableLength = 0; struct PlainTableOptions { // @user_key_len: plain table has optimization for fix-sized keys, which can // be specified via user_key_len. Alternatively, you can pass // `kPlainTableVariableLength` if your keys have variable // lengths. uint32_t user_key_len = kPlainTableVariableLength; // @bloom_bits_per_key: the number of bits used for bloom filer per prefix. // You may disable it by passing a zero. int bloom_bits_per_key = 10; // @hash_table_ratio: the desired utilization of the hash table used for // prefix hashing. // hash_table_ratio = number of prefixes / #buckets in the // hash table double hash_table_ratio = 0.75; // @index_sparseness: inside each prefix, need to build one index record for // how many keys for binary search inside each hash bucket. // For encoding type kPrefix, the value will be used when // writing to determine an interval to rewrite the full // key. It will also be used as a suggestion and satisfied // when possible. size_t index_sparseness = 16; // @huge_page_tlb_size: if <=0, allocate hash indexes and blooms from malloc. // Otherwise from huge page TLB. The user needs to // reserve huge pages for it to be allocated, like: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt size_t huge_page_tlb_size = 0; // @encoding_type: how to encode the keys. See enum EncodingType above for // the choices. The value will determine how to encode keys // when writing to a new SST file. This value will be stored // inside the SST file which will be used when reading from // the file, which makes it possible for users to choose // different encoding type when reopening a DB. Files with // different encoding types can co-exist in the same DB and // can be read. EncodingType encoding_type = kPlain; // @full_scan_mode: mode for reading the whole file one record by one without // using the index. bool full_scan_mode = false; // @store_index_in_file: compute plain table index and bloom filter during // file building and store it in file. When reading // file, index will be mmaped instead of recomputation. bool store_index_in_file = false; }; // -- Plain Table with prefix-only seek // For this factory, you need to set Options.prefix_extractor properly to make // it work. Look-up will starts with prefix hash lookup for key prefix. Inside // the hash bucket found, a binary search is executed for hash conflicts. // Finally, a linear search is used. extern TableFactory* NewPlainTableFactory( const PlainTableOptions& options = PlainTableOptions()); struct CuckooTablePropertyNames { // The key that is used to fill empty buckets. static const std::string kEmptyKey; // Fixed length of value. static const std::string kValueLength; // Number of hash functions used in Cuckoo Hash. static const std::string kNumHashFunc; // It denotes the number of buckets in a Cuckoo Block. Given a key and a // particular hash function, a Cuckoo Block is a set of consecutive buckets, // where starting bucket id is given by the hash function on the key. In case // of a collision during inserting the key, the builder tries to insert the // key in other locations of the cuckoo block before using the next hash // function. This reduces cache miss during read operation in case of // collision. static const std::string kCuckooBlockSize; // Size of the hash table. Use this number to compute the modulo of hash // function. The actual number of buckets will be kMaxHashTableSize + // kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to // accommodate the Cuckoo Block from end of hash table, due to cache friendly // implementation. static const std::string kHashTableSize; // Denotes if the key sorted in the file is Internal Key (if false) // or User Key only (if true). static const std::string kIsLastLevel; // Indicate if using identity function for the first hash function. static const std::string kIdentityAsFirstHash; // Indicate if using module or bit and to calculate hash value static const std::string kUseModuleHash; // Fixed user key length static const std::string kUserKeyLength; }; struct CuckooTableOptions { // Determines the utilization of hash tables. Smaller values // result in larger hash tables with fewer collisions. double hash_table_ratio = 0.9; // A property used by builder to determine the depth to go to // to search for a path to displace elements in case of // collision. See Builder.MakeSpaceForKey method. Higher // values result in more efficient hash tables with fewer // lookups but take more time to build. uint32_t max_search_depth = 100; // In case of collision while inserting, the builder // attempts to insert in the next cuckoo_block_size // locations before skipping over to the next Cuckoo hash // function. This makes lookups more cache friendly in case // of collisions. uint32_t cuckoo_block_size = 5; // If this option is enabled, user key is treated as uint64_t and its value // is used as hash value directly. This option changes builder's behavior. // Reader ignore this option and behave according to what specified in table // property. bool identity_as_first_hash = false; // If this option is set to true, module is used during hash calculation. // This often yields better space efficiency at the cost of performance. // If this option is set to false, # of entries in table is constrained to be // power of two, and bit and is used to calculate hash, which is faster in // general. bool use_module_hash = true; }; // Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing extern TableFactory* NewCuckooTableFactory( const CuckooTableOptions& table_options = CuckooTableOptions()); #endif // ROCKSDB_LITE class RandomAccessFileReader; // A base class for table factories. class TableFactory { public: virtual ~TableFactory() {} // The type of the table. // // The client of this package should switch to a new name whenever // the table format implementation changes. // // Names starting with "rocksdb." are reserved and should not be used // by any clients of this package. virtual const char* Name() const = 0; // Returns a Table object table that can fetch data from file specified // in parameter file. It's the caller's responsibility to make sure // file is in the correct format. // // NewTableReader() is called in three places: // (1) TableCache::FindTable() calls the function when table cache miss // and cache the table object returned. // (2) SstFileDumper (for SST Dump) opens the table and dump the table // contents using the iterator of the table. // (3) DBImpl::IngestExternalFile() calls this function to read the contents // of the sst file it's attempting to add // // table_reader_options is a TableReaderOptions which contain all the // needed parameters and configuration to open the table. // file is a file handler to handle the file for the table. // file_size is the physical file size of the file. // table_reader is the output table reader. virtual Status NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache = true) const = 0; // Return a table builder to write to a file for this table type. // // It is called in several places: // (1) When flushing memtable to a level-0 output file, it creates a table // builder (In DBImpl::WriteLevel0Table(), by calling BuildTable()) // (2) During compaction, it gets the builder for writing compaction output // files in DBImpl::OpenCompactionOutputFile(). // (3) When recovering from transaction logs, it creates a table builder to // write to a level-0 output file (In DBImpl::WriteLevel0TableForRecovery, // by calling BuildTable()) // (4) When running Repairer, it creates a table builder to convert logs to // SST files (In Repairer::ConvertLogToTable() by calling BuildTable()) // // Multiple configured can be accessed from there, including and not limited // to compression options. file is a handle of a writable file. // It is the caller's responsibility to keep the file open and close the file // after closing the table builder. compression_type is the compression type // to use in this table. virtual TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const = 0; // Sanitizes the specified DB Options and ColumnFamilyOptions. // // If the function cannot find a way to sanitize the input DB Options, // a non-ok Status will be returned. virtual Status SanitizeOptions(const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const = 0; // Return a string that contains printable format of table configurations. // RocksDB prints configurations at DB Open(). virtual std::string GetPrintableTableOptions() const = 0; virtual Status GetOptionString(const ConfigOptions& /*config_options*/, std::string* /*opt_string*/) const { return Status::NotSupported( "The table factory doesn't implement GetOptionString()."); } // Returns the raw pointer of the table options that is used by this // TableFactory, or nullptr if this function is not supported. // Since the return value is a raw pointer, the TableFactory owns the // pointer and the caller should not delete the pointer. // // In certain case, it is desirable to alter the underlying options when the // TableFactory is not used by any open DB by casting the returned pointer // to the right class. For instance, if BlockBasedTableFactory is used, // then the pointer can be casted to BlockBasedTableOptions. // // Note that changing the underlying TableFactory options while the // TableFactory is currently used by any open DB is undefined behavior. // Developers should use DB::SetOption() instead to dynamically change // options while the DB is open. virtual void* GetOptions() { return nullptr; } // Return is delete range supported virtual bool IsDeleteRangeSupported() const { return false; } }; #ifndef ROCKSDB_LITE // Create a special table factory that can open either of the supported // table formats, based on setting inside the SST files. It should be used to // convert a DB from one table format to another. // @table_factory_to_write: the table factory used when writing to new files. // @block_based_table_factory: block based table factory to use. If NULL, use // a default one. // @plain_table_factory: plain table factory to use. If NULL, use a default one. // @cuckoo_table_factory: cuckoo table factory to use. If NULL, use a default // one. extern TableFactory* NewAdaptiveTableFactory( std::shared_ptr table_factory_to_write = nullptr, std::shared_ptr block_based_table_factory = nullptr, std::shared_ptr plain_table_factory = nullptr, std::shared_ptr cuckoo_table_factory = nullptr); #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/table_properties.h000066400000000000000000000241761370372246700220450ustar00rootroot00000000000000// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #pragma once #include #include #include #include "rocksdb/status.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { // -- Table Properties // Other than basic table properties, each table may also have the user // collected properties. // The value of the user-collected properties are encoded as raw bytes -- // users have to interpret these values by themselves. // Note: To do prefix seek/scan in `UserCollectedProperties`, you can do // something similar to: // // UserCollectedProperties props = ...; // for (auto pos = props.lower_bound(prefix); // pos != props.end() && pos->first.compare(0, prefix.size(), prefix) == 0; // ++pos) { // ... // } typedef std::map UserCollectedProperties; // table properties' human-readable names in the property block. struct TablePropertiesNames { static const std::string kDataSize; static const std::string kIndexSize; static const std::string kIndexPartitions; static const std::string kTopLevelIndexSize; static const std::string kIndexKeyIsUserKey; static const std::string kIndexValueIsDeltaEncoded; static const std::string kFilterSize; static const std::string kRawKeySize; static const std::string kRawValueSize; static const std::string kNumDataBlocks; static const std::string kNumEntries; static const std::string kDeletedKeys; static const std::string kMergeOperands; static const std::string kNumRangeDeletions; static const std::string kFormatVersion; static const std::string kFixedKeyLen; static const std::string kFilterPolicy; static const std::string kColumnFamilyName; static const std::string kColumnFamilyId; static const std::string kComparator; static const std::string kMergeOperator; static const std::string kPrefixExtractorName; static const std::string kPropertyCollectors; static const std::string kCompression; static const std::string kCompressionOptions; static const std::string kCreationTime; static const std::string kOldestKeyTime; static const std::string kFileCreationTime; }; extern const std::string kPropertiesBlock; extern const std::string kCompressionDictBlock; extern const std::string kRangeDelBlock; // `TablePropertiesCollector` provides the mechanism for users to collect // their own properties that they are interested in. This class is essentially // a collection of callback functions that will be invoked during table // building. It is constructed with TablePropertiesCollectorFactory. The methods // don't need to be thread-safe, as we will create exactly one // TablePropertiesCollector object per table and then call it sequentially class TablePropertiesCollector { public: virtual ~TablePropertiesCollector() {} // DEPRECATE User defined collector should implement AddUserKey(), though // this old function still works for backward compatible reason. // Add() will be called when a new key/value pair is inserted into the table. // @params key the user key that is inserted into the table. // @params value the value that is inserted into the table. virtual Status Add(const Slice& /*key*/, const Slice& /*value*/) { return Status::InvalidArgument( "TablePropertiesCollector::Add() deprecated."); } // AddUserKey() will be called when a new key/value pair is inserted into the // table. // @params key the user key that is inserted into the table. // @params value the value that is inserted into the table. virtual Status AddUserKey(const Slice& key, const Slice& value, EntryType /*type*/, SequenceNumber /*seq*/, uint64_t /*file_size*/) { // For backwards-compatibility. return Add(key, value); } // Called after each new block is cut virtual void BlockAdd(uint64_t /* blockRawBytes */, uint64_t /* blockCompressedBytesFast */, uint64_t /* blockCompressedBytesSlow */) { // Nothing to do here. Callback registers can override. return; } // Finish() will be called when a table has already been built and is ready // for writing the properties block. // @params properties User will add their collected statistics to // `properties`. virtual Status Finish(UserCollectedProperties* properties) = 0; // Return the human-readable properties, where the key is property name and // the value is the human-readable form of value. virtual UserCollectedProperties GetReadableProperties() const = 0; // The name of the properties collector can be used for debugging purpose. virtual const char* Name() const = 0; // EXPERIMENTAL Return whether the output file should be further compacted virtual bool NeedCompact() const { return false; } }; // Constructs TablePropertiesCollector. Internals create a new // TablePropertiesCollector for each new table class TablePropertiesCollectorFactory { public: struct Context { uint32_t column_family_id; static const uint32_t kUnknownColumnFamily; }; virtual ~TablePropertiesCollectorFactory() {} // has to be thread-safe virtual TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context context) = 0; // The name of the properties collector can be used for debugging purpose. virtual const char* Name() const = 0; // Can be overridden by sub-classes to return the Name, followed by // configuration info that will // be logged to the info log when the // DB is opened virtual std::string ToString() const { return Name(); } }; // TableProperties contains a bunch of read-only properties of its associated // table. struct TableProperties { public: // the total size of all data blocks. uint64_t data_size = 0; // the size of index block. uint64_t index_size = 0; // Total number of index partitions if kTwoLevelIndexSearch is used uint64_t index_partitions = 0; // Size of the top-level index if kTwoLevelIndexSearch is used uint64_t top_level_index_size = 0; // Whether the index key is user key. Otherwise it includes 8 byte of sequence // number added by internal key format. uint64_t index_key_is_user_key = 0; // Whether delta encoding is used to encode the index values. uint64_t index_value_is_delta_encoded = 0; // the size of filter block. uint64_t filter_size = 0; // total raw key size uint64_t raw_key_size = 0; // total raw value size uint64_t raw_value_size = 0; // the number of blocks in this table uint64_t num_data_blocks = 0; // the number of entries in this table uint64_t num_entries = 0; // the number of deletions in the table uint64_t num_deletions = 0; // the number of merge operands in the table uint64_t num_merge_operands = 0; // the number of range deletions in this table uint64_t num_range_deletions = 0; // format version, reserved for backward compatibility uint64_t format_version = 0; // If 0, key is variable length. Otherwise number of bytes for each key. uint64_t fixed_key_len = 0; // ID of column family for this SST file, corresponding to the CF identified // by column_family_name. uint64_t column_family_id = ROCKSDB_NAMESPACE:: TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; // Timestamp of the latest key. 0 means unknown. // TODO(sagar0): Should be changed to latest_key_time ... but don't know the // full implications of backward compatibility. Hence retaining for now. uint64_t creation_time = 0; // Timestamp of the earliest key. 0 means unknown. uint64_t oldest_key_time = 0; // Actual SST file creation time. 0 means unknown. uint64_t file_creation_time = 0; // Name of the column family with which this SST file is associated. // If column family is unknown, `column_family_name` will be an empty string. std::string column_family_name; // The name of the filter policy used in this table. // If no filter policy is used, `filter_policy_name` will be an empty string. std::string filter_policy_name; // The name of the comparator used in this table. std::string comparator_name; // The name of the merge operator used in this table. // If no merge operator is used, `merge_operator_name` will be "nullptr". std::string merge_operator_name; // The name of the prefix extractor used in this table // If no prefix extractor is used, `prefix_extractor_name` will be "nullptr". std::string prefix_extractor_name; // The names of the property collectors factories used in this table // separated by commas // {collector_name[1]},{collector_name[2]},{collector_name[3]} .. std::string property_collectors_names; // The compression algo used to compress the SST files. std::string compression_name; // Compression options used to compress the SST files. std::string compression_options; // user collected properties UserCollectedProperties user_collected_properties; UserCollectedProperties readable_properties; // The offset of the value of each property in the file. std::map properties_offsets; // convert this object to a human readable form // @prop_delim: delimiter for each property. std::string ToString(const std::string& prop_delim = "; ", const std::string& kv_delim = "=") const; // Aggregate the numerical member variables of the specified // TableProperties. void Add(const TableProperties& tp); }; // Extra properties // Below is a list of non-basic properties that are collected by database // itself. Especially some properties regarding to the internal keys (which // is unknown to `table`). // // DEPRECATED: these properties now belong as TableProperties members. Please // use TableProperties::num_deletions and TableProperties::num_merge_operands, // respectively. extern uint64_t GetDeletedKeys(const UserCollectedProperties& props); extern uint64_t GetMergeOperands(const UserCollectedProperties& props, bool* property_present); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/thread_status.h000066400000000000000000000144401370372246700213450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file defines the structures for exposing run-time status of any // rocksdb-related thread. Such run-time status can be obtained via // GetThreadList() API. // // Note that all thread-status features are still under-development, and // thus APIs and class definitions might subject to change at this point. // Will remove this comment once the APIs have been finalized. #pragma once #include #include #include #include #include #include #if !defined(ROCKSDB_LITE) && !defined(NROCKSDB_THREAD_STATUS) && \ defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #define ROCKSDB_USING_THREAD_STATUS #endif namespace ROCKSDB_NAMESPACE { // TODO(yhchiang): remove this function once c++14 is available // as std::max will be able to cover this. // Current MS compiler does not support constexpr template struct constexpr_max { static const int result = (A > B) ? A : B; }; // A structure that describes the current status of a thread. // The status of active threads can be fetched using // ROCKSDB_NAMESPACE::GetThreadList(). struct ThreadStatus { // The type of a thread. enum ThreadType : int { HIGH_PRIORITY = 0, // RocksDB BG thread in high-pri thread pool LOW_PRIORITY, // RocksDB BG thread in low-pri thread pool USER, // User thread (Non-RocksDB BG thread) BOTTOM_PRIORITY, // RocksDB BG thread in bottom-pri thread pool NUM_THREAD_TYPES }; // The type used to refer to a thread operation. // A thread operation describes high-level action of a thread. // Examples include compaction and flush. enum OperationType : int { OP_UNKNOWN = 0, OP_COMPACTION, OP_FLUSH, NUM_OP_TYPES }; enum OperationStage : int { STAGE_UNKNOWN = 0, STAGE_FLUSH_RUN, STAGE_FLUSH_WRITE_L0, STAGE_COMPACTION_PREPARE, STAGE_COMPACTION_RUN, STAGE_COMPACTION_PROCESS_KV, STAGE_COMPACTION_INSTALL, STAGE_COMPACTION_SYNC_FILE, STAGE_PICK_MEMTABLES_TO_FLUSH, STAGE_MEMTABLE_ROLLBACK, STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS, NUM_OP_STAGES }; enum CompactionPropertyType : int { COMPACTION_JOB_ID = 0, COMPACTION_INPUT_OUTPUT_LEVEL, COMPACTION_PROP_FLAGS, COMPACTION_TOTAL_INPUT_BYTES, COMPACTION_BYTES_READ, COMPACTION_BYTES_WRITTEN, NUM_COMPACTION_PROPERTIES }; enum FlushPropertyType : int { FLUSH_JOB_ID = 0, FLUSH_BYTES_MEMTABLES, FLUSH_BYTES_WRITTEN, NUM_FLUSH_PROPERTIES }; // The maximum number of properties of an operation. // This number should be set to the biggest NUM_XXX_PROPERTIES. static const int kNumOperationProperties = constexpr_max::result; // The type used to refer to a thread state. // A state describes lower-level action of a thread // such as reading / writing a file or waiting for a mutex. enum StateType : int { STATE_UNKNOWN = 0, STATE_MUTEX_WAIT = 1, NUM_STATE_TYPES }; ThreadStatus(const uint64_t _id, const ThreadType _thread_type, const std::string& _db_name, const std::string& _cf_name, const OperationType _operation_type, const uint64_t _op_elapsed_micros, const OperationStage _operation_stage, const uint64_t _op_props[], const StateType _state_type) : thread_id(_id), thread_type(_thread_type), db_name(_db_name), cf_name(_cf_name), operation_type(_operation_type), op_elapsed_micros(_op_elapsed_micros), operation_stage(_operation_stage), state_type(_state_type) { for (int i = 0; i < kNumOperationProperties; ++i) { op_properties[i] = _op_props[i]; } } // An unique ID for the thread. const uint64_t thread_id; // The type of the thread, it could be HIGH_PRIORITY, // LOW_PRIORITY, and USER const ThreadType thread_type; // The name of the DB instance where the thread is currently // involved with. It would be set to empty string if the thread // does not involve in any DB operation. const std::string db_name; // The name of the column family where the thread is currently // It would be set to empty string if the thread does not involve // in any column family. const std::string cf_name; // The operation (high-level action) that the current thread is involved. const OperationType operation_type; // The elapsed time of the current thread operation in microseconds. const uint64_t op_elapsed_micros; // An integer showing the current stage where the thread is involved // in the current operation. const OperationStage operation_stage; // A list of properties that describe some details about the current // operation. Same field in op_properties[] might have different // meanings for different operations. uint64_t op_properties[kNumOperationProperties]; // The state (lower-level action) that the current thread is involved. const StateType state_type; // The followings are a set of utility functions for interpreting // the information of ThreadStatus static std::string GetThreadTypeName(ThreadType thread_type); // Obtain the name of an operation given its type. static const std::string& GetOperationName(OperationType op_type); static const std::string MicrosToString(uint64_t op_elapsed_time); // Obtain a human-readable string describing the specified operation stage. static const std::string& GetOperationStageName(OperationStage stage); // Obtain the name of the "i"th operation property of the // specified operation. static const std::string& GetOperationPropertyName(OperationType op_type, int i); // Translate the "i"th property of the specified operation given // a property value. static std::map InterpretOperationProperties( OperationType op_type, const uint64_t* op_properties); // Obtain the name of a state given its type. static const std::string& GetStateName(StateType state_type); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/threadpool.h000066400000000000000000000040211370372246700206260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { /* * ThreadPool is a component that will spawn N background threads that will * be used to execute scheduled work, The number of background threads could * be modified by calling SetBackgroundThreads(). * */ class ThreadPool { public: virtual ~ThreadPool() {} // Wait for all threads to finish. // Discard those threads that did not start // executing virtual void JoinAllThreads() = 0; // Set the number of background threads that will be executing the // scheduled jobs. virtual void SetBackgroundThreads(int num) = 0; virtual int GetBackgroundThreads() = 0; // Get the number of jobs scheduled in the ThreadPool queue. virtual unsigned int GetQueueLen() const = 0; // Waits for all jobs to complete those // that already started running and those that did not // start yet. This ensures that everything that was thrown // on the TP runs even though // we may not have specified enough threads for the amount // of jobs virtual void WaitForJobsAndJoinAllThreads() = 0; // Submit a fire and forget jobs // This allows to submit the same job multiple times virtual void SubmitJob(const std::function&) = 0; // This moves the function in for efficiency virtual void SubmitJob(std::function&&) = 0; }; // NewThreadPool() is a function that could be used to create a ThreadPool // with `num_threads` background threads. extern ThreadPool* NewThreadPool(int num_threads); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/trace_reader_writer.h000066400000000000000000000033051370372246700225050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { // Allow custom implementations of TraceWriter and TraceReader. // By default, RocksDB provides a way to capture the traces to a file using the // factory NewFileTraceWriter(). But users could also choose to export traces to // any other system by providing custom implementations of TraceWriter and // TraceReader. // TraceWriter allows exporting RocksDB traces to any system, one operation at // a time. class TraceWriter { public: TraceWriter() {} virtual ~TraceWriter() {} virtual Status Write(const Slice& data) = 0; virtual Status Close() = 0; virtual uint64_t GetFileSize() = 0; }; // TraceReader allows reading RocksDB traces from any system, one operation at // a time. A RocksDB Replayer could depend on this to replay opertions. class TraceReader { public: TraceReader() {} virtual ~TraceReader() {} virtual Status Read(std::string* data) = 0; virtual Status Close() = 0; }; // Factory methods to read/write traces from/to a file. Status NewFileTraceWriter(Env* env, const EnvOptions& env_options, const std::string& trace_filename, std::unique_ptr* trace_writer); Status NewFileTraceReader(Env* env, const EnvOptions& env_options, const std::string& trace_filename, std::unique_ptr* trace_reader); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/transaction_log.h000066400000000000000000000073421370372246700216640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/status.h" #include "rocksdb/types.h" #include "rocksdb/write_batch.h" namespace ROCKSDB_NAMESPACE { class LogFile; typedef std::vector> VectorLogPtr; enum WalFileType { /* Indicates that WAL file is in archive directory. WAL files are moved from * the main db directory to archive directory once they are not live and stay * there until cleaned up. Files are cleaned depending on archive size * (Options::WAL_size_limit_MB) and time since last cleaning * (Options::WAL_ttl_seconds). */ kArchivedLogFile = 0, /* Indicates that WAL file is live and resides in the main db directory */ kAliveLogFile = 1 }; class LogFile { public: LogFile() {} virtual ~LogFile() {} // Returns log file's pathname relative to the main db dir // Eg. For a live-log-file = /000003.log // For an archived-log-file = /archive/000003.log virtual std::string PathName() const = 0; // Primary identifier for log file. // This is directly proportional to creation time of the log file virtual uint64_t LogNumber() const = 0; // Log file can be either alive or archived virtual WalFileType Type() const = 0; // Starting sequence number of writebatch written in this log file virtual SequenceNumber StartSequence() const = 0; // Size of log file on disk in Bytes virtual uint64_t SizeFileBytes() const = 0; }; struct BatchResult { SequenceNumber sequence = 0; std::unique_ptr writeBatchPtr; // Add empty __ctor and __dtor for the rule of five // However, preserve the original semantics and prohibit copying // as the std::unique_ptr member does not copy. BatchResult() {} ~BatchResult() {} BatchResult(const BatchResult&) = delete; BatchResult& operator=(const BatchResult&) = delete; BatchResult(BatchResult&& bResult) : sequence(std::move(bResult.sequence)), writeBatchPtr(std::move(bResult.writeBatchPtr)) {} BatchResult& operator=(BatchResult&& bResult) { sequence = std::move(bResult.sequence); writeBatchPtr = std::move(bResult.writeBatchPtr); return *this; } }; // A TransactionLogIterator is used to iterate over the transactions in a db. // One run of the iterator is continuous, i.e. the iterator will stop at the // beginning of any gap in sequences class TransactionLogIterator { public: TransactionLogIterator() {} virtual ~TransactionLogIterator() {} // An iterator is either positioned at a WriteBatch or not valid. // This method returns true if the iterator is valid. // Can read data from a valid iterator. virtual bool Valid() = 0; // Moves the iterator to the next WriteBatch. // REQUIRES: Valid() to be true. virtual void Next() = 0; // Returns ok if the iterator is valid. // Returns the Error when something has gone wrong. virtual Status status() = 0; // If valid return's the current write_batch and the sequence number of the // earliest transaction contained in the batch. // ONLY use if Valid() is true and status() is OK. virtual BatchResult GetBatch() = 0; // The read options for TransactionLogIterator. struct ReadOptions { // If true, all data read from underlying storage will be // verified against corresponding checksums. // Default: true bool verify_checksums_; ReadOptions() : verify_checksums_(true) {} explicit ReadOptions(bool verify_checksums) : verify_checksums_(verify_checksums) {} }; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/types.h000066400000000000000000000026731370372246700176440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { // Define all public custom types here. // Represents a sequence number in a WAL file. typedef uint64_t SequenceNumber; const SequenceNumber kMinUnCommittedSeq = 1; // 0 is always committed // User-oriented representation of internal key types. enum EntryType { kEntryPut, kEntryDelete, kEntrySingleDelete, kEntryMerge, kEntryRangeDeletion, kEntryBlobIndex, kEntryOther, }; // tuple. struct FullKey { Slice user_key; SequenceNumber sequence; EntryType type; FullKey() : sequence(0) {} // Intentionally left uninitialized (for speed) FullKey(const Slice& u, const SequenceNumber& seq, EntryType t) : user_key(u), sequence(seq), type(t) {} std::string DebugString(bool hex = false) const; void clear() { user_key.clear(); sequence = 0; type = EntryType::kEntryPut; } }; // Parse slice representing internal key to FullKey // Parsed FullKey is valid for as long as the memory pointed to by // internal_key is alive. bool ParseFullKey(const Slice& internal_key, FullKey* result); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/universal_compaction.h000066400000000000000000000065141370372246700227220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include namespace ROCKSDB_NAMESPACE { // // Algorithm used to make a compaction request stop picking new files // into a single compaction run // enum CompactionStopStyle { kCompactionStopStyleSimilarSize, // pick files of similar size kCompactionStopStyleTotalSize // total size of picked files > next file }; class CompactionOptionsUniversal { public: // Percentage flexibility while comparing file size. If the candidate file(s) // size is 1% smaller than the next file's size, then include next file into // this candidate set. // Default: 1 unsigned int size_ratio; // The minimum number of files in a single compaction run. Default: 2 unsigned int min_merge_width; // The maximum number of files in a single compaction run. Default: UINT_MAX unsigned int max_merge_width; // The size amplification is defined as the amount (in percentage) of // additional storage needed to store a single byte of data in the database. // For example, a size amplification of 2% means that a database that // contains 100 bytes of user-data may occupy upto 102 bytes of // physical storage. By this definition, a fully compacted database has // a size amplification of 0%. Rocksdb uses the following heuristic // to calculate size amplification: it assumes that all files excluding // the earliest file contribute to the size amplification. // Default: 200, which means that a 100 byte database could require upto // 300 bytes of storage. unsigned int max_size_amplification_percent; // If this option is set to be -1 (the default value), all the output files // will follow compression type specified. // // If this option is not negative, we will try to make sure compressed // size is just above this value. In normal cases, at least this percentage // of data will be compressed. // When we are compacting to a new file, here is the criteria whether // it needs to be compressed: assuming here are the list of files sorted // by generation time: // A1...An B1...Bm C1...Ct // where A1 is the newest and Ct is the oldest, and we are going to compact // B1...Bm, we calculate the total size of all the files as total_size, as // well as the total size of C1...Ct as total_C, the compaction output file // will be compressed iff // total_C / total_size < this percentage // Default: -1 int compression_size_percent; // The algorithm used to stop picking files into a single compaction run // Default: kCompactionStopStyleTotalSize CompactionStopStyle stop_style; // Option to optimize the universal multi level compaction by enabling // trivial move for non overlapping files. // Default: false bool allow_trivial_move; // Default set of parameters CompactionOptionsUniversal() : size_ratio(1), min_merge_width(2), max_merge_width(UINT_MAX), max_size_amplification_percent(200), compression_size_percent(-1), stop_style(kCompactionStopStyleTotalSize), allow_trivial_move(false) {} }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/000077500000000000000000000000001370372246700203325ustar00rootroot00000000000000rocksdb-6.11.4/include/rocksdb/utilities/backupable_db.h000066400000000000000000000377251370372246700232570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include #include #include #include #include #include "rocksdb/utilities/stackable_db.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { struct BackupableDBOptions { // Where to keep the backup files. Has to be different than dbname_ // Best to set this to dbname_ + "/backups" // Required std::string backup_dir; // Backup Env object. It will be used for backup file I/O. If it's // nullptr, backups will be written out using DBs Env. If it's // non-nullptr, backup's I/O will be performed using this object. // If you want to have backups on HDFS, use HDFS Env here! // Default: nullptr Env* backup_env; // If share_table_files == true, backup will assume that table files with // same name have the same contents. This enables incremental backups and // avoids unnecessary data copies. // If share_table_files == false, each backup will be on its own and will // not share any data with other backups. // default: true bool share_table_files; // Backup info and error messages will be written to info_log // if non-nullptr. // Default: nullptr Logger* info_log; // If sync == true, we can guarantee you'll get consistent backup even // on a machine crash/reboot. Backup process is slower with sync enabled. // If sync == false, we don't guarantee anything on machine reboot. However, // chances are some of the backups are consistent. // Default: true bool sync; // If true, it will delete whatever backups there are already // Default: false bool destroy_old_data; // If false, we won't backup log files. This option can be useful for backing // up in-memory databases where log file are persisted, but table files are in // memory. // Default: true bool backup_log_files; // Max bytes that can be transferred in a second during backup. // If 0, go as fast as you can // Default: 0 uint64_t backup_rate_limit; // Backup rate limiter. Used to control transfer speed for backup. If this is // not null, backup_rate_limit is ignored. // Default: nullptr std::shared_ptr backup_rate_limiter{nullptr}; // Max bytes that can be transferred in a second during restore. // If 0, go as fast as you can // Default: 0 uint64_t restore_rate_limit; // Restore rate limiter. Used to control transfer speed during restore. If // this is not null, restore_rate_limit is ignored. // Default: nullptr std::shared_ptr restore_rate_limiter{nullptr}; // Only used if share_table_files is set to true. If true, will consider that // backups can come from different databases, hence a sst is not uniquely // identifed by its name, but by the triple (file name, crc32c, file length) // Default: false // Note: this is an experimental option, and you'll need to set it manually // *turn it on only if you know what you're doing* bool share_files_with_checksum; // Up to this many background threads will copy files for CreateNewBackup() // and RestoreDBFromBackup() // Default: 1 int max_background_operations; // During backup user can get callback every time next // callback_trigger_interval_size bytes being copied. // Default: 4194304 uint64_t callback_trigger_interval_size; // For BackupEngineReadOnly, Open() will open at most this many of the // latest non-corrupted backups. // // Note: this setting is ignored (behaves like INT_MAX) for any kind of // writable BackupEngine because it would inhibit accounting for shared // files for proper backup deletion, including purging any incompletely // created backups on creation of a new backup. // // Default: INT_MAX int max_valid_backups_to_open; void Dump(Logger* logger) const; explicit BackupableDBOptions( const std::string& _backup_dir, Env* _backup_env = nullptr, bool _share_table_files = true, Logger* _info_log = nullptr, bool _sync = true, bool _destroy_old_data = false, bool _backup_log_files = true, uint64_t _backup_rate_limit = 0, uint64_t _restore_rate_limit = 0, int _max_background_operations = 1, uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024, int _max_valid_backups_to_open = INT_MAX) : backup_dir(_backup_dir), backup_env(_backup_env), share_table_files(_share_table_files), info_log(_info_log), sync(_sync), destroy_old_data(_destroy_old_data), backup_log_files(_backup_log_files), backup_rate_limit(_backup_rate_limit), restore_rate_limit(_restore_rate_limit), share_files_with_checksum(false), max_background_operations(_max_background_operations), callback_trigger_interval_size(_callback_trigger_interval_size), max_valid_backups_to_open(_max_valid_backups_to_open) { assert(share_table_files || !share_files_with_checksum); } }; struct CreateBackupOptions { // Flush will always trigger if 2PC is enabled. // If write-ahead logs are disabled, set flush_before_backup=true to // avoid losing unflushed key/value pairs from the memtable. bool flush_before_backup = false; // Callback for reporting progress. std::function progress_callback = []() {}; // If false, background_thread_cpu_priority is ignored. // Otherwise, the cpu priority can be decreased, // if you try to increase the priority, the priority will not change. // The initial priority of the threads is CpuPriority::kNormal, // so you can decrease to priorities lower than kNormal. bool decrease_background_thread_cpu_priority = false; CpuPriority background_thread_cpu_priority = CpuPriority::kNormal; }; struct RestoreOptions { // If true, restore won't overwrite the existing log files in wal_dir. It will // also move all log files from archive directory to wal_dir. Use this option // in combination with BackupableDBOptions::backup_log_files = false for // persisting in-memory databases. // Default: false bool keep_log_files; explicit RestoreOptions(bool _keep_log_files = false) : keep_log_files(_keep_log_files) {} }; typedef uint32_t BackupID; struct BackupInfo { BackupID backup_id; int64_t timestamp; uint64_t size; uint32_t number_files; std::string app_metadata; BackupInfo() {} BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size, uint32_t _number_files, const std::string& _app_metadata) : backup_id(_backup_id), timestamp(_timestamp), size(_size), number_files(_number_files), app_metadata(_app_metadata) {} }; class BackupStatistics { public: BackupStatistics() { number_success_backup = 0; number_fail_backup = 0; } BackupStatistics(uint32_t _number_success_backup, uint32_t _number_fail_backup) : number_success_backup(_number_success_backup), number_fail_backup(_number_fail_backup) {} ~BackupStatistics() {} void IncrementNumberSuccessBackup(); void IncrementNumberFailBackup(); uint32_t GetNumberSuccessBackup() const; uint32_t GetNumberFailBackup() const; std::string ToString() const; private: uint32_t number_success_backup; uint32_t number_fail_backup; }; // A backup engine for accessing information about backups and restoring from // them. // BackupEngineReadOnly is not extensible. class BackupEngineReadOnly { public: virtual ~BackupEngineReadOnly() {} static Status Open(const BackupableDBOptions& options, Env* db_env, BackupEngineReadOnly** backup_engine_ptr); // keep for backward compatibility. static Status Open(Env* db_env, const BackupableDBOptions& options, BackupEngineReadOnly** backup_engine_ptr) { return BackupEngineReadOnly::Open(options, db_env, backup_engine_ptr); } // Returns info about backups in backup_info // You can GetBackupInfo safely, even with other BackupEngine performing // backups on the same directory virtual void GetBackupInfo(std::vector* backup_info) = 0; // Returns info about corrupt backups in corrupt_backups virtual void GetCorruptedBackups( std::vector* corrupt_backup_ids) = 0; // Restoring DB from backup is NOT safe when there is another BackupEngine // running that might call DeleteBackup() or PurgeOldBackups(). It is caller's // responsibility to synchronize the operation, i.e. don't delete the backup // when you're restoring from it // See also the corresponding doc in BackupEngine virtual Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id, const std::string& db_dir, const std::string& wal_dir) = 0; // keep for backward compatibility. virtual Status RestoreDBFromBackup( BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, const RestoreOptions& options = RestoreOptions()) { return RestoreDBFromBackup(options, backup_id, db_dir, wal_dir); } // See the corresponding doc in BackupEngine virtual Status RestoreDBFromLatestBackup(const RestoreOptions& options, const std::string& db_dir, const std::string& wal_dir) = 0; // keep for backward compatibility. virtual Status RestoreDBFromLatestBackup( const std::string& db_dir, const std::string& wal_dir, const RestoreOptions& options = RestoreOptions()) { return RestoreDBFromLatestBackup(options, db_dir, wal_dir); } // checks that each file exists and that the size of the file matches our // expectations. it does not check file checksum. // // If this BackupEngine created the backup, it compares the files' current // sizes against the number of bytes written to them during creation. // Otherwise, it compares the files' current sizes against their sizes when // the BackupEngine was opened. // // Returns Status::OK() if all checks are good virtual Status VerifyBackup(BackupID backup_id) = 0; }; // A backup engine for creating new backups. // BackupEngine is not extensible. class BackupEngine { public: virtual ~BackupEngine() {} // BackupableDBOptions have to be the same as the ones used in previous // BackupEngines for the same backup directory. static Status Open(const BackupableDBOptions& options, Env* db_env, BackupEngine** backup_engine_ptr); // keep for backward compatibility. static Status Open(Env* db_env, const BackupableDBOptions& options, BackupEngine** backup_engine_ptr) { return BackupEngine::Open(options, db_env, backup_engine_ptr); } // same as CreateNewBackup, but stores extra application metadata. virtual Status CreateNewBackupWithMetadata( const CreateBackupOptions& options, DB* db, const std::string& app_metadata) = 0; // keep here for backward compatibility. virtual Status CreateNewBackupWithMetadata( DB* db, const std::string& app_metadata, bool flush_before_backup = false, std::function progress_callback = []() {}) { CreateBackupOptions options; options.flush_before_backup = flush_before_backup; options.progress_callback = progress_callback; return CreateNewBackupWithMetadata(options, db, app_metadata); } // Captures the state of the database in the latest backup // NOT a thread safe call virtual Status CreateNewBackup(const CreateBackupOptions& options, DB* db) { return CreateNewBackupWithMetadata(options, db, ""); } // keep here for backward compatibility. virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false, std::function progress_callback = []() {}) { CreateBackupOptions options; options.flush_before_backup = flush_before_backup; options.progress_callback = progress_callback; return CreateNewBackup(options, db); } // Deletes old backups, keeping latest num_backups_to_keep alive. // See also DeleteBackup. virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0; // Deletes a specific backup. If this operation (or PurgeOldBackups) // is not completed due to crash, power failure, etc. the state // will be cleaned up the next time you call DeleteBackup, // PurgeOldBackups, or GarbageCollect. virtual Status DeleteBackup(BackupID backup_id) = 0; // Call this from another thread if you want to stop the backup // that is currently happening. It will return immediatelly, will // not wait for the backup to stop. // The backup will stop ASAP and the call to CreateNewBackup will // return Status::Incomplete(). It will not clean up after itself, but // the state will remain consistent. The state will be cleaned up the // next time you call CreateNewBackup or GarbageCollect. virtual void StopBackup() = 0; // Returns info about backups in backup_info virtual void GetBackupInfo(std::vector* backup_info) = 0; // Returns info about corrupt backups in corrupt_backups virtual void GetCorruptedBackups( std::vector* corrupt_backup_ids) = 0; // restore from backup with backup_id // IMPORTANT -- if options_.share_table_files == true, // options_.share_files_with_checksum == false, you restore DB from some // backup that is not the latest, and you start creating new backups from the // new DB, they will probably fail. // // Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3. // If you add new data to the DB and try creating a new backup now, the // database will diverge from backups 4 and 5 and the new backup will fail. // If you want to create new backup, you will first have to delete backups 4 // and 5. virtual Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id, const std::string& db_dir, const std::string& wal_dir) = 0; // keep for backward compatibility. virtual Status RestoreDBFromBackup( BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, const RestoreOptions& options = RestoreOptions()) { return RestoreDBFromBackup(options, backup_id, db_dir, wal_dir); } // restore from the latest backup virtual Status RestoreDBFromLatestBackup(const RestoreOptions& options, const std::string& db_dir, const std::string& wal_dir) = 0; // keep for backward compatibility. virtual Status RestoreDBFromLatestBackup( const std::string& db_dir, const std::string& wal_dir, const RestoreOptions& options = RestoreOptions()) { return RestoreDBFromLatestBackup(options, db_dir, wal_dir); } // checks that each file exists and that the size of the file matches our // expectations. it does not check file checksum. // Returns Status::OK() if all checks are good virtual Status VerifyBackup(BackupID backup_id) = 0; // Will delete any files left over from incomplete creation or deletion of // a backup. This is not normally needed as those operations also clean up // after prior incomplete calls to the same kind of operation (create or // delete). // NOTE: This is not designed to delete arbitrary files added to the backup // directory outside of BackupEngine, and clean-up is always subject to // permissions on and availability of the underlying filesystem. virtual Status GarbageCollect() = 0; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/checkpoint.h000066400000000000000000000051251370372246700226350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // A checkpoint is an openable snapshot of a database at a point in time. #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class DB; class ColumnFamilyHandle; struct LiveFileMetaData; struct ExportImportFilesMetaData; class Checkpoint { public: // Creates a Checkpoint object to be used for creating openable snapshots static Status Create(DB* db, Checkpoint** checkpoint_ptr); // Builds an openable snapshot of RocksDB on the same disk, which // accepts an output directory on the same disk, and under the directory // (1) hard-linked SST files pointing to existing live SST files // SST files will be copied if output directory is on a different filesystem // (2) a copied manifest files and other files // The directory should not already exist and will be created by this API. // The directory will be an absolute path // log_size_for_flush: if the total log file size is equal or larger than // this value, then a flush is triggered for all the column families. The // default value is 0, which means flush is always triggered. If you move // away from the default, the checkpoint may not contain up-to-date data // if WAL writing is not always enabled. // Flush will always trigger if it is 2PC. // sequence_number_ptr: if it is not nullptr, the value it points to will be // set to the DB's sequence number. The default value of this parameter is // nullptr. virtual Status CreateCheckpoint(const std::string& checkpoint_dir, uint64_t log_size_for_flush = 0, uint64_t* sequence_number_ptr = nullptr); // Exports all live SST files of a specified Column Family onto export_dir, // returning SST files information in metadata. // - SST files will be created as hard links when the directory specified // is in the same partition as the db directory, copied otherwise. // - export_dir should not already exist and will be created by this API. // - Always triggers a flush. virtual Status ExportColumnFamily(ColumnFamilyHandle* handle, const std::string& export_dir, ExportImportFilesMetaData** metadata); virtual ~Checkpoint() {} }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/convenience.h000066400000000000000000000005471370372246700230050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once // This file was moved to rocksdb/convenience.h" #include "rocksdb/convenience.h" rocksdb-6.11.4/include/rocksdb/utilities/db_ttl.h000066400000000000000000000055111370372246700217550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/db.h" #include "rocksdb/utilities/stackable_db.h" namespace ROCKSDB_NAMESPACE { // Database with TTL support. // // USE-CASES: // This API should be used to open the db when key-values inserted are // meant to be removed from the db in a non-strict 'ttl' amount of time // Therefore, this guarantees that key-values inserted will remain in the // db for >= ttl amount of time and the db will make efforts to remove the // key-values as soon as possible after ttl seconds of their insertion. // // BEHAVIOUR: // TTL is accepted in seconds // (int32_t)Timestamp(creation) is suffixed to values in Put internally // Expired TTL values deleted in compaction only:(Timestamp+ttl=5 // read_only=true opens in the usual read-only mode. Compactions will not be // triggered(neither manual nor automatic), so no expired entries removed // // CONSTRAINTS: // Not specifying/passing or non-positive TTL behaves like TTL = infinity // // !!!WARNING!!!: // Calling DB::Open directly to re-open a db created by this API will get // corrupt values(timestamp suffixed) and no ttl effect will be there // during the second Open, so use this API consistently to open the db // Be careful when passing ttl with a small positive value because the // whole database may be deleted in a small amount of time class DBWithTTL : public StackableDB { public: virtual Status CreateColumnFamilyWithTtl( const ColumnFamilyOptions& options, const std::string& column_family_name, ColumnFamilyHandle** handle, int ttl) = 0; static Status Open(const Options& options, const std::string& dbname, DBWithTTL** dbptr, int32_t ttl = 0, bool read_only = false); static Status Open(const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DBWithTTL** dbptr, std::vector ttls, bool read_only = false); virtual void SetTtl(int32_t ttl) = 0; virtual void SetTtl(ColumnFamilyHandle* h, int32_t ttl) = 0; protected: explicit DBWithTTL(DB* db) : StackableDB(db) {} }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/debug.h000066400000000000000000000036331370372246700215760ustar00rootroot00000000000000// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include "rocksdb/db.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { // Data associated with a particular version of a key. A database may internally // store multiple versions of a same user key due to snapshots, compaction not // happening yet, etc. struct KeyVersion { KeyVersion() : user_key(""), value(""), sequence(0), type(0) {} KeyVersion(const std::string& _user_key, const std::string& _value, SequenceNumber _sequence, int _type) : user_key(_user_key), value(_value), sequence(_sequence), type(_type) {} std::string user_key; std::string value; SequenceNumber sequence; // TODO(ajkr): we should provide a helper function that converts the int to a // string describing the type for easier debugging. int type; }; // Returns listing of all versions of keys in the provided user key range. // The range is inclusive-inclusive, i.e., [`begin_key`, `end_key`], or // `max_num_ikeys` has been reached. Since all those keys returned will be // copied to memory, if the range covers too many keys, the memory usage // may be huge. `max_num_ikeys` can be used to cap the memory usage. // The result is inserted into the provided vector, `key_versions`. Status GetAllKeyVersions(DB* db, Slice begin_key, Slice end_key, size_t max_num_ikeys, std::vector* key_versions); Status GetAllKeyVersions(DB* db, ColumnFamilyHandle* cfh, Slice begin_key, Slice end_key, size_t max_num_ikeys, std::vector* key_versions); } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/env_librados.h000066400000000000000000000166351370372246700231650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/status.h" #include "rocksdb/utilities/env_mirror.h" #include namespace ROCKSDB_NAMESPACE { class LibradosWritableFile; class EnvLibrados : public EnvWrapper { public: // Create a brand new sequentially-readable file with the specified name. // On success, stores a pointer to the new file in *result and returns OK. // On failure stores nullptr in *result and returns non-OK. If the file does // not exist, returns a non-OK status. // // The returned file will only be accessed by one thread at a time. Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; // Create a brand new random access read-only file with the // specified name. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. If the file does not exist, returns a non-OK // status. // // The returned file may be concurrently accessed by multiple threads. Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a // new file. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. // // The returned file will only be accessed by one thread at a time. Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; // Reuse an existing file by renaming it and opening it as writable. Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* result, const EnvOptions& options) override; // Create an object that represents a directory. Will fail if directory // doesn't exist. If the directory exists, it will open the directory // and create a new Directory object. // // On success, stores a pointer to the new Directory in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. Status NewDirectory(const std::string& name, std::unique_ptr* result) override; // Returns OK if the named file exists. // NotFound if the named file does not exist, // the calling process does not have permission to determine // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered Status FileExists(const std::string& fname) override; // Store in *result the names of the children of the specified directory. // The names are relative to "dir". // Original contents of *results are dropped. Status GetChildren(const std::string& dir, std::vector* result); // Delete the named file. Status DeleteFile(const std::string& fname) override; // Create the specified directory. Returns error if directory exists. Status CreateDir(const std::string& dirname) override; // Creates directory if missing. Return Ok if it exists, or successful in // Creating. Status CreateDirIfMissing(const std::string& dirname) override; // Delete the specified directory. Status DeleteDir(const std::string& dirname) override; // Store the size of fname in *file_size. Status GetFileSize(const std::string& fname, uint64_t* file_size) override; // Store the last modification time of fname in *file_mtime. Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) override; // Rename file src to target. Status RenameFile(const std::string& src, const std::string& target) override; // Hard Link file src to target. Status LinkFile(const std::string& src, const std::string& target) override; // Lock the specified file. Used to prevent concurrent access to // the same db by multiple processes. On failure, stores nullptr in // *lock and returns non-OK. // // On success, stores a pointer to the object that represents the // acquired lock in *lock and returns OK. The caller should call // UnlockFile(*lock) to release the lock. If the process exits, // the lock will be automatically released. // // If somebody else already holds the lock, finishes immediately // with a failure. I.e., this call does not wait for existing locks // to go away. // // May create the named file if it does not already exist. Status LockFile(const std::string& fname, FileLock** lock); // Release the lock acquired by a previous successful call to LockFile. // REQUIRES: lock was returned by a successful LockFile() call // REQUIRES: lock has not already been unlocked. Status UnlockFile(FileLock* lock); // Get full directory name for this db. Status GetAbsolutePath(const std::string& db_path, std::string* output_path); // Generate unique id std::string GenerateUniqueId(); // Get default EnvLibrados static EnvLibrados* Default(); explicit EnvLibrados(const std::string& db_name, const std::string& config_path, const std::string& db_pool); explicit EnvLibrados( const std::string& client_name, // first 3 parameters are // for RADOS client init const std::string& cluster_name, const uint64_t flags, const std::string& db_name, const std::string& config_path, const std::string& db_pool, const std::string& wal_dir, const std::string& wal_pool, const uint64_t write_buffer_size); ~EnvLibrados() { _rados.shutdown(); } private: std::string _client_name; std::string _cluster_name; uint64_t _flags; std::string _db_name; // get from user, readable string; Also used as db_id // for db metadata std::string _config_path; librados::Rados _rados; // RADOS client std::string _db_pool_name; librados::IoCtx _db_pool_ioctx; // IoCtx for connecting db_pool std::string _wal_dir; // WAL dir path std::string _wal_pool_name; librados::IoCtx _wal_pool_ioctx; // IoCtx for connecting wal_pool uint64_t _write_buffer_size; // WritableFile buffer max size /* private function to communicate with rados */ std::string _CreateFid(); Status _GetFid(const std::string& fname, std::string& fid); Status _GetFid(const std::string& fname, std::string& fid, int fid_len); Status _RenameFid(const std::string& old_fname, const std::string& new_fname); Status _AddFid(const std::string& fname, const std::string& fid); Status _DelFid(const std::string& fname); Status _GetSubFnames(const std::string& dirname, std::vector* result); librados::IoCtx* _GetIoctx(const std::string& prefix); friend class LibradosWritableFile; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/env_mirror.h000066400000000000000000000133111370372246700226640ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2015, Red Hat, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // MirrorEnv is an Env implementation that mirrors all file-related // operations to two backing Env's (provided at construction time). // Writes are mirrored. For read operations, we do the read from both // backends and assert that the results match. // // This is useful when implementing a new Env and ensuring that the // semantics and behavior are correct (in that they match that of an // existing, stable Env, like the default POSIX one). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { class SequentialFileMirror; class RandomAccessFileMirror; class WritableFileMirror; class EnvMirror : public EnvWrapper { Env *a_, *b_; bool free_a_, free_b_; public: EnvMirror(Env* a, Env* b, bool free_a = false, bool free_b = false) : EnvWrapper(a), a_(a), b_(b), free_a_(free_a), free_b_(free_b) {} ~EnvMirror() { if (free_a_) delete a_; if (free_b_) delete b_; } Status NewSequentialFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override; Status NewRandomAccessFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override; Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override; Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, std::unique_ptr* r, const EnvOptions& options) override; virtual Status NewDirectory(const std::string& name, std::unique_ptr* result) override { std::unique_ptr br; Status as = a_->NewDirectory(name, result); Status bs = b_->NewDirectory(name, &br); assert(as == bs); return as; } Status FileExists(const std::string& f) override { Status as = a_->FileExists(f); Status bs = b_->FileExists(f); assert(as == bs); return as; } #if defined(_MSC_VER) #pragma warning(push) // logical operation on address of string constant #pragma warning(disable : 4130) #endif Status GetChildren(const std::string& dir, std::vector* r) override { std::vector ar, br; Status as = a_->GetChildren(dir, &ar); Status bs = b_->GetChildren(dir, &br); assert(as == bs); std::sort(ar.begin(), ar.end()); std::sort(br.begin(), br.end()); if (!as.ok() || ar != br) { assert(0 == "getchildren results don't match"); } *r = ar; return as; } #if defined(_MSC_VER) #pragma warning(pop) #endif Status DeleteFile(const std::string& f) override { Status as = a_->DeleteFile(f); Status bs = b_->DeleteFile(f); assert(as == bs); return as; } Status CreateDir(const std::string& d) override { Status as = a_->CreateDir(d); Status bs = b_->CreateDir(d); assert(as == bs); return as; } Status CreateDirIfMissing(const std::string& d) override { Status as = a_->CreateDirIfMissing(d); Status bs = b_->CreateDirIfMissing(d); assert(as == bs); return as; } Status DeleteDir(const std::string& d) override { Status as = a_->DeleteDir(d); Status bs = b_->DeleteDir(d); assert(as == bs); return as; } Status GetFileSize(const std::string& f, uint64_t* s) override { uint64_t asize, bsize; Status as = a_->GetFileSize(f, &asize); Status bs = b_->GetFileSize(f, &bsize); assert(as == bs); assert(!as.ok() || asize == bsize); *s = asize; return as; } Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) override { uint64_t amtime, bmtime; Status as = a_->GetFileModificationTime(fname, &amtime); Status bs = b_->GetFileModificationTime(fname, &bmtime); assert(as == bs); assert(!as.ok() || amtime - bmtime < 10000 || bmtime - amtime < 10000); *file_mtime = amtime; return as; } Status RenameFile(const std::string& s, const std::string& t) override { Status as = a_->RenameFile(s, t); Status bs = b_->RenameFile(s, t); assert(as == bs); return as; } Status LinkFile(const std::string& s, const std::string& t) override { Status as = a_->LinkFile(s, t); Status bs = b_->LinkFile(s, t); assert(as == bs); return as; } class FileLockMirror : public FileLock { public: FileLock *a_, *b_; FileLockMirror(FileLock* a, FileLock* b) : a_(a), b_(b) {} }; Status LockFile(const std::string& f, FileLock** l) override { FileLock *al, *bl; Status as = a_->LockFile(f, &al); Status bs = b_->LockFile(f, &bl); assert(as == bs); if (as.ok()) *l = new FileLockMirror(al, bl); return as; } Status UnlockFile(FileLock* l) override { FileLockMirror* ml = static_cast(l); Status as = a_->UnlockFile(ml->a_); Status bs = b_->UnlockFile(ml->b_); assert(as == bs); delete ml; return as; } }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/info_log_finder.h000066400000000000000000000011141370372246700236230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/db.h" #include "rocksdb/options.h" namespace ROCKSDB_NAMESPACE { // This function can be used to list the Information logs, // given the db pointer. Status GetInfoLogList(DB* db, std::vector* info_log_list); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/ldb_cmd.h000066400000000000000000000217021370372246700220710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #ifndef ROCKSDB_LITE #include #include #include #include #include #include #include #include #include "rocksdb/convenience.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/ldb_tool.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/utilities/db_ttl.h" #include "rocksdb/utilities/ldb_cmd_execute_result.h" namespace ROCKSDB_NAMESPACE { class LDBCommand { public: // Command-line arguments static const std::string ARG_ENV_URI; static const std::string ARG_DB; static const std::string ARG_PATH; static const std::string ARG_SECONDARY_PATH; static const std::string ARG_HEX; static const std::string ARG_KEY_HEX; static const std::string ARG_VALUE_HEX; static const std::string ARG_CF_NAME; static const std::string ARG_TTL; static const std::string ARG_TTL_START; static const std::string ARG_TTL_END; static const std::string ARG_TIMESTAMP; static const std::string ARG_TRY_LOAD_OPTIONS; static const std::string ARG_IGNORE_UNKNOWN_OPTIONS; static const std::string ARG_FROM; static const std::string ARG_TO; static const std::string ARG_MAX_KEYS; static const std::string ARG_BLOOM_BITS; static const std::string ARG_FIX_PREFIX_LEN; static const std::string ARG_COMPRESSION_TYPE; static const std::string ARG_COMPRESSION_MAX_DICT_BYTES; static const std::string ARG_BLOCK_SIZE; static const std::string ARG_AUTO_COMPACTION; static const std::string ARG_DB_WRITE_BUFFER_SIZE; static const std::string ARG_WRITE_BUFFER_SIZE; static const std::string ARG_FILE_SIZE; static const std::string ARG_CREATE_IF_MISSING; static const std::string ARG_NO_VALUE; static const std::string ARG_DISABLE_CONSISTENCY_CHECKS; struct ParsedParams { std::string cmd; std::vector cmd_params; std::map option_map; std::vector flags; }; static LDBCommand* SelectCommand(const ParsedParams& parsed_parms); static LDBCommand* InitFromCmdLineArgs( const std::vector& args, const Options& options, const LDBOptions& ldb_options, const std::vector* column_families, const std::function& selector = SelectCommand); static LDBCommand* InitFromCmdLineArgs( int argc, char const* const* argv, const Options& options, const LDBOptions& ldb_options, const std::vector* column_families); bool ValidateCmdLineOptions(); virtual Options PrepareOptionsForOpenDB(); virtual void SetDBOptions(Options options) { options_ = options; } virtual void SetColumnFamilies( const std::vector* column_families) { if (column_families != nullptr) { column_families_ = *column_families; } else { column_families_.clear(); } } void SetLDBOptions(const LDBOptions& ldb_options) { ldb_options_ = ldb_options; } const std::map& TEST_GetOptionMap() { return option_map_; } const std::vector& TEST_GetFlags() { return flags_; } virtual bool NoDBOpen() { return false; } virtual ~LDBCommand() { CloseDB(); } /* Run the command, and return the execute result. */ void Run(); virtual void DoCommand() = 0; LDBCommandExecuteResult GetExecuteState() { return exec_state_; } void ClearPreviousRunState() { exec_state_.Reset(); } // Consider using Slice::DecodeHex directly instead if you don't need the // 0x prefix static std::string HexToString(const std::string& str); // Consider using Slice::ToString(true) directly instead if // you don't need the 0x prefix static std::string StringToHex(const std::string& str); static const char* DELIM; protected: LDBCommandExecuteResult exec_state_; std::string env_uri_; std::string db_path_; // If empty, open DB as primary. If non-empty, open the DB as secondary // with this secondary path. When running against a database opened by // another process, ldb wll leave the source directory completely intact. std::string secondary_path_; std::string column_family_name_; DB* db_; DBWithTTL* db_ttl_; std::map cf_handles_; /** * true implies that this command can work if the db is opened in read-only * mode. */ bool is_read_only_; /** If true, the key is input/output as hex in get/put/scan/delete etc. */ bool is_key_hex_; /** If true, the value is input/output as hex in get/put/scan/delete etc. */ bool is_value_hex_; /** If true, the value is treated as timestamp suffixed */ bool is_db_ttl_; // If true, the kvs are output with their insert/modify timestamp in a ttl db bool timestamp_; // If true, try to construct options from DB's option files. bool try_load_options_; // The value passed to options.force_consistency_checks. bool force_consistency_checks_; bool create_if_missing_; /** * Map of options passed on the command-line. */ const std::map option_map_; /** * Flags passed on the command-line. */ const std::vector flags_; /** List of command-line options valid for this command */ const std::vector valid_cmd_line_options_; /** Shared pointer to underlying environment if applicable **/ std::shared_ptr env_guard_; bool ParseKeyValue(const std::string& line, std::string* key, std::string* value, bool is_key_hex, bool is_value_hex); LDBCommand(const std::map& options, const std::vector& flags, bool is_read_only, const std::vector& valid_cmd_line_options); void OpenDB(); void CloseDB(); ColumnFamilyHandle* GetCfHandle(); static std::string PrintKeyValue(const std::string& key, const std::string& value, bool is_key_hex, bool is_value_hex); static std::string PrintKeyValue(const std::string& key, const std::string& value, bool is_hex); /** * Return true if the specified flag is present in the specified flags vector */ static bool IsFlagPresent(const std::vector& flags, const std::string& flag) { return (std::find(flags.begin(), flags.end(), flag) != flags.end()); } static std::string HelpRangeCmdArgs(); /** * A helper function that returns a list of command line options * used by this command. It includes the common options and the ones * passed in. */ static std::vector BuildCmdLineOptions( std::vector options); bool ParseIntOption(const std::map& options, const std::string& option, int& value, LDBCommandExecuteResult& exec_state); bool ParseStringOption(const std::map& options, const std::string& option, std::string* value); /** * Returns the value of the specified option as a boolean. * default_val is used if the option is not found in options. * Throws an exception if the value of the option is not * "true" or "false" (case insensitive). */ bool ParseBooleanOption(const std::map& options, const std::string& option, bool default_val); Options options_; std::vector column_families_; ConfigOptions config_options_; LDBOptions ldb_options_; private: /** * Interpret command line options and flags to determine if the key * should be input/output in hex. */ bool IsKeyHex(const std::map& options, const std::vector& flags); /** * Interpret command line options and flags to determine if the value * should be input/output in hex. */ bool IsValueHex(const std::map& options, const std::vector& flags); /** * Converts val to a boolean. * val must be either true or false (case insensitive). * Otherwise an exception is thrown. */ bool StringToBool(std::string val); }; class LDBCommandRunner { public: static void PrintHelp(const LDBOptions& ldb_options, const char* exec_name, bool to_stderr = true); // Returns the status code to return. 0 is no error. static int RunCommand( int argc, char const* const* argv, Options options, const LDBOptions& ldb_options, const std::vector* column_families); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/ldb_cmd_execute_result.h000066400000000000000000000031701370372246700252100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #ifdef FAILED #undef FAILED #endif namespace ROCKSDB_NAMESPACE { class LDBCommandExecuteResult { public: enum State { EXEC_NOT_STARTED = 0, EXEC_SUCCEED = 1, EXEC_FAILED = 2, }; LDBCommandExecuteResult() : state_(EXEC_NOT_STARTED), message_("") {} LDBCommandExecuteResult(State state, std::string& msg) : state_(state), message_(msg) {} std::string ToString() { std::string ret; switch (state_) { case EXEC_SUCCEED: break; case EXEC_FAILED: ret.append("Failed: "); break; case EXEC_NOT_STARTED: ret.append("Not started: "); } if (!message_.empty()) { ret.append(message_); } return ret; } void Reset() { state_ = EXEC_NOT_STARTED; message_ = ""; } bool IsSucceed() { return state_ == EXEC_SUCCEED; } bool IsNotStarted() { return state_ == EXEC_NOT_STARTED; } bool IsFailed() { return state_ == EXEC_FAILED; } static LDBCommandExecuteResult Succeed(std::string msg) { return LDBCommandExecuteResult(EXEC_SUCCEED, msg); } static LDBCommandExecuteResult Failed(std::string msg) { return LDBCommandExecuteResult(EXEC_FAILED, msg); } private: State state_; std::string message_; bool operator==(const LDBCommandExecuteResult&); bool operator!=(const LDBCommandExecuteResult&); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/leveldb_options.h000066400000000000000000000122421370372246700236740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class Cache; class Comparator; class Env; class FilterPolicy; class Logger; struct Options; class Snapshot; enum CompressionType : unsigned char; // Options to control the behavior of a database (passed to // DB::Open). A LevelDBOptions object can be initialized as though // it were a LevelDB Options object, and then it can be converted into // a RocksDB Options object. struct LevelDBOptions { // ------------------- // Parameters that affect behavior // Comparator used to define the order of keys in the table. // Default: a comparator that uses lexicographic byte-wise ordering // // REQUIRES: The client must ensure that the comparator supplied // here has the same name and orders keys *exactly* the same as the // comparator provided to previous open calls on the same DB. const Comparator* comparator; // If true, the database will be created if it is missing. // Default: false bool create_if_missing; // If true, an error is raised if the database already exists. // Default: false bool error_if_exists; // If true, the implementation will do aggressive checking of the // data it is processing and will stop early if it detects any // errors. This may have unforeseen ramifications: for example, a // corruption of one DB entry may cause a large number of entries to // become unreadable or for the entire DB to become unopenable. // Default: false bool paranoid_checks; // Use the specified object to interact with the environment, // e.g. to read/write files, schedule background work, etc. // Default: Env::Default() Env* env; // Any internal progress/error information generated by the db will // be written to info_log if it is non-NULL, or to a file stored // in the same directory as the DB contents if info_log is NULL. // Default: NULL Logger* info_log; // ------------------- // Parameters that affect performance // Amount of data to build up in memory (backed by an unsorted log // on disk) before converting to a sorted on-disk file. // // Larger values increase performance, especially during bulk loads. // Up to two write buffers may be held in memory at the same time, // so you may wish to adjust this parameter to control memory usage. // Also, a larger write buffer will result in a longer recovery time // the next time the database is opened. // // Default: 4MB size_t write_buffer_size; // Number of open files that can be used by the DB. You may need to // increase this if your database has a large working set (budget // one open file per 2MB of working set). // // Default: 1000 int max_open_files; // Control over blocks (user data is stored in a set of blocks, and // a block is the unit of reading from disk). // If non-NULL, use the specified cache for blocks. // If NULL, leveldb will automatically create and use an 8MB internal cache. // Default: NULL Cache* block_cache; // Approximate size of user data packed per block. Note that the // block size specified here corresponds to uncompressed data. The // actual size of the unit read from disk may be smaller if // compression is enabled. This parameter can be changed dynamically. // // Default: 4K size_t block_size; // Number of keys between restart points for delta encoding of keys. // This parameter can be changed dynamically. Most clients should // leave this parameter alone. // // Default: 16 int block_restart_interval; // Compress blocks using the specified compression algorithm. This // parameter can be changed dynamically. // // Default: kSnappyCompression, which gives lightweight but fast // compression. // // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz: // ~200-500MB/s compression // ~400-800MB/s decompression // Note that these speeds are significantly faster than most // persistent storage speeds, and therefore it is typically never // worth switching to kNoCompression. Even if the input data is // incompressible, the kSnappyCompression implementation will // efficiently detect that and will switch to uncompressed mode. CompressionType compression; // If non-NULL, use the specified filter policy to reduce disk reads. // Many applications will benefit from passing the result of // NewBloomFilterPolicy() here. // // Default: NULL const FilterPolicy* filter_policy; // Create a LevelDBOptions object with default values for all fields. LevelDBOptions(); }; // Converts a LevelDBOptions object into a RocksDB Options object. Options ConvertOptions(const LevelDBOptions& leveldb_options); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/lua/000077500000000000000000000000001370372246700211135ustar00rootroot00000000000000rocksdb-6.11.4/include/rocksdb/utilities/lua/rocks_lua_custom_library.h000066400000000000000000000027461370372246700263750ustar00rootroot00000000000000// Copyright (c) 2016, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifdef LUA // lua headers extern "C" { #include #include #include } namespace ROCKSDB_NAMESPACE { namespace lua { // A class that used to define custom C Library that is callable // from Lua script class RocksLuaCustomLibrary { public: virtual ~RocksLuaCustomLibrary() {} // The name of the C library. This name will also be used as the table // (namespace) in Lua that contains the C library. virtual const char* Name() const = 0; // Returns a "static const struct luaL_Reg[]", which includes a list of // C functions. Note that the last entry of this static array must be // {nullptr, nullptr} as required by Lua. // // More details about how to implement Lua C libraries can be found // in the official Lua document http://www.lua.org/pil/26.2.html virtual const struct luaL_Reg* Lib() const = 0; // A function that will be called right after the library has been created // and pushed on the top of the lua_State. This custom setup function // allows developers to put additional table or constant values inside // the same table / namespace. virtual void CustomSetup(lua_State* /*L*/) const {} }; } // namespace lua } // namespace ROCKSDB_NAMESPACE #endif // LUA rocksdb-6.11.4/include/rocksdb/utilities/lua/rocks_lua_util.h000066400000000000000000000027331370372246700243100ustar00rootroot00000000000000// Copyright (c) 2016, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once // lua headers extern "C" { #include #include #include } #ifdef LUA #include #include #include "rocksdb/utilities/lua/rocks_lua_custom_library.h" namespace ROCKSDB_NAMESPACE { namespace lua { class LuaStateWrapper { public: explicit LuaStateWrapper(const std::string& lua_script) { lua_state_ = luaL_newstate(); Init(lua_script, {}); } LuaStateWrapper( const std::string& lua_script, const std::vector>& libraries) { lua_state_ = luaL_newstate(); Init(lua_script, libraries); } lua_State* GetLuaState() const { return lua_state_; } ~LuaStateWrapper() { lua_close(lua_state_); } private: void Init( const std::string& lua_script, const std::vector>& libraries) { if (lua_state_) { luaL_openlibs(lua_state_); for (const auto& library : libraries) { luaL_openlib(lua_state_, library->Name(), library->Lib(), 0); library->CustomSetup(lua_state_); } luaL_dostring(lua_state_, lua_script.c_str()); } } lua_State* lua_state_; }; } // namespace lua } // namespace ROCKSDB_NAMESPACE #endif // LUA rocksdb-6.11.4/include/rocksdb/utilities/memory_util.h000066400000000000000000000031331370372246700230500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #pragma once #include #include #include #include #include "rocksdb/cache.h" #include "rocksdb/db.h" namespace ROCKSDB_NAMESPACE { // Returns the current memory usage of the specified DB instances. class MemoryUtil { public: enum UsageType : int { // Memory usage of all the mem-tables. kMemTableTotal = 0, // Memory usage of those un-flushed mem-tables. kMemTableUnFlushed = 1, // Memory usage of all the table readers. kTableReadersTotal = 2, // Memory usage by Cache. kCacheTotal = 3, kNumUsageTypes = 4 }; // Returns the approximate memory usage of different types in the input // list of DBs and Cache set. For instance, in the output map // usage_by_type, usage_by_type[kMemTableTotal] will store the memory // usage of all the mem-tables from all the input rocksdb instances. // // Note that for memory usage inside Cache class, we will // only report the usage of the input "cache_set" without // including those Cache usage inside the input list "dbs" // of DBs. static Status GetApproximateMemoryUsageByType( const std::vector& dbs, const std::unordered_set cache_set, std::map* usage_by_type); }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/object_registry.h000066400000000000000000000165511370372246700237110ustar00rootroot00000000000000// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include #include #include #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Logger; // Returns a new T when called with a string. Populates the std::unique_ptr // argument if granting ownership to caller. template using FactoryFunc = std::function*, std::string*)>; class ObjectLibrary { public: // Base class for an Entry in the Registry. class Entry { public: virtual ~Entry() {} Entry(const std::string& name) : name_(std::move(name)) {} // Checks to see if the target matches this entry virtual bool matches(const std::string& target) const { return name_ == target; } const std::string& Name() const { return name_; } private: const std::string name_; // The name of the Entry }; // End class Entry // An Entry containing a FactoryFunc for creating new Objects template class FactoryEntry : public Entry { public: FactoryEntry(const std::string& name, FactoryFunc f) : Entry(name), pattern_(std::move(name)), factory_(std::move(f)) {} ~FactoryEntry() override {} bool matches(const std::string& target) const override { return std::regex_match(target, pattern_); } // Creates a new T object. T* NewFactoryObject(const std::string& target, std::unique_ptr* guard, std::string* msg) const { return factory_(target, guard, msg); } private: std::regex pattern_; // The pattern for this entry FactoryFunc factory_; }; // End class FactoryEntry public: // Finds the entry matching the input name and type const Entry* FindEntry(const std::string& type, const std::string& name) const; void Dump(Logger* logger) const; // Registers the factory with the library for the pattern. // If the pattern matches, the factory may be used to create a new object. template const FactoryFunc& Register(const std::string& pattern, const FactoryFunc& factory) { std::unique_ptr entry(new FactoryEntry(pattern, factory)); AddEntry(T::Type(), entry); return factory; } // Returns the default ObjectLibrary static std::shared_ptr& Default(); private: // Adds the input entry to the list for the given type void AddEntry(const std::string& type, std::unique_ptr& entry); // ** FactoryFunctions for this loader, organized by type std::unordered_map>> entries_; }; // The ObjectRegistry is used to register objects that can be created by a // name/pattern at run-time where the specific implementation of the object may // not be known in advance. class ObjectRegistry { public: static std::shared_ptr NewInstance(); ObjectRegistry(); void AddLibrary(const std::shared_ptr& library) { libraries_.emplace_back(library); } // Creates a new T using the factory function that was registered with a // pattern that matches the provided "target" string according to // std::regex_match. // // If no registered functions match, returns nullptr. If multiple functions // match, the factory function used is unspecified. // // Populates res_guard with result pointer if caller is granted ownership. template T* NewObject(const std::string& target, std::unique_ptr* guard, std::string* errmsg) { guard->reset(); const auto* basic = FindEntry(T::Type(), target); if (basic != nullptr) { const auto* factory = static_cast*>(basic); return factory->NewFactoryObject(target, guard, errmsg); } else { *errmsg = std::string("Could not load ") + T::Type(); return nullptr; } } // Creates a new unique T using the input factory functions. // Returns OK if a new unique T was successfully created // Returns NotFound if the type/target could not be created // Returns InvalidArgument if the factory return an unguarded object // (meaning it cannot be managed by a unique ptr) template Status NewUniqueObject(const std::string& target, std::unique_ptr* result) { std::string errmsg; T* ptr = NewObject(target, result, &errmsg); if (ptr == nullptr) { return Status::NotFound(errmsg, target); } else if (*result) { return Status::OK(); } else { return Status::InvalidArgument(std::string("Cannot make a unique ") + T::Type() + " from unguarded one ", target); } } // Creates a new shared T using the input factory functions. // Returns OK if a new shared T was successfully created // Returns NotFound if the type/target could not be created // Returns InvalidArgument if the factory return an unguarded object // (meaning it cannot be managed by a shared ptr) template Status NewSharedObject(const std::string& target, std::shared_ptr* result) { std::string errmsg; std::unique_ptr guard; T* ptr = NewObject(target, &guard, &errmsg); if (ptr == nullptr) { return Status::NotFound(errmsg, target); } else if (guard) { result->reset(guard.release()); return Status::OK(); } else { return Status::InvalidArgument(std::string("Cannot make a shared ") + T::Type() + " from unguarded one ", target); } } // Creates a new static T using the input factory functions. // Returns OK if a new static T was successfully created // Returns NotFound if the type/target could not be created // Returns InvalidArgument if the factory return a guarded object // (meaning it is managed by a unique ptr) template Status NewStaticObject(const std::string& target, T** result) { std::string errmsg; std::unique_ptr guard; T* ptr = NewObject(target, &guard, &errmsg); if (ptr == nullptr) { return Status::NotFound(errmsg, target); } else if (guard.get()) { return Status::InvalidArgument(std::string("Cannot make a static ") + T::Type() + " from a guarded one ", target); } else { *result = ptr; return Status::OK(); } } // Dump the contents of the registry to the logger void Dump(Logger* logger) const; private: const ObjectLibrary::Entry* FindEntry(const std::string& type, const std::string& name) const; // The set of libraries to search for factories for this registry. // The libraries are searched in reverse order (back to front) when // searching for entries. std::vector> libraries_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/optimistic_transaction_db.h000066400000000000000000000070021370372246700257400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/utilities/stackable_db.h" namespace ROCKSDB_NAMESPACE { class Transaction; // Database with Transaction support. // // See optimistic_transaction.h and examples/transaction_example.cc // Options to use when starting an Optimistic Transaction struct OptimisticTransactionOptions { // Setting set_snapshot=true is the same as calling SetSnapshot(). bool set_snapshot = false; // Should be set if the DB has a non-default comparator. // See comment in WriteBatchWithIndex constructor. const Comparator* cmp = BytewiseComparator(); }; enum class OccValidationPolicy { // Validate serially at commit stage, AFTER entering the write-group. // Isolation validation is processed single-threaded(since in the // write-group). // May suffer from high mutex contention, as per this link: // https://github.com/facebook/rocksdb/issues/4402 kValidateSerial = 0, // Validate parallelly before commit stage, BEFORE entering the write-group to // reduce mutex contention. Each txn acquires locks for its write-set // records in some well-defined order. kValidateParallel = 1 }; struct OptimisticTransactionDBOptions { OccValidationPolicy validate_policy = OccValidationPolicy::kValidateParallel; // works only if validate_policy == OccValidationPolicy::kValidateParallel uint32_t occ_lock_buckets = (1 << 20); }; class OptimisticTransactionDB : public StackableDB { public: // Open an OptimisticTransactionDB similar to DB::Open(). static Status Open(const Options& options, const std::string& dbname, OptimisticTransactionDB** dbptr); static Status Open(const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, OptimisticTransactionDB** dbptr); static Status Open(const DBOptions& db_options, const OptimisticTransactionDBOptions& occ_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, OptimisticTransactionDB** dbptr); virtual ~OptimisticTransactionDB() {} // Starts a new Transaction. // // Caller is responsible for deleting the returned transaction when no // longer needed. // // If old_txn is not null, BeginTransaction will reuse this Transaction // handle instead of allocating a new one. This is an optimization to avoid // extra allocations when repeatedly creating transactions. virtual Transaction* BeginTransaction( const WriteOptions& write_options, const OptimisticTransactionOptions& txn_options = OptimisticTransactionOptions(), Transaction* old_txn = nullptr) = 0; OptimisticTransactionDB(const OptimisticTransactionDB&) = delete; void operator=(const OptimisticTransactionDB&) = delete; protected: // To Create an OptimisticTransactionDB, call Open() explicit OptimisticTransactionDB(DB* db) : StackableDB(db) {} }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/option_change_migration.h000066400000000000000000000013441370372246700253730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "rocksdb/options.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { // Try to migrate DB created with old_opts to be use new_opts. // Multiple column families is not supported. // It is best-effort. No guarantee to succeed. // A full compaction may be executed. Status OptionChangeMigration(std::string dbname, const Options& old_opts, const Options& new_opts); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/options_util.h000066400000000000000000000131711370372246700232360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // This file contains utility functions for RocksDB Options. #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { struct ConfigOptions; // Constructs the DBOptions and ColumnFamilyDescriptors by loading the // latest RocksDB options file stored in the specified rocksdb database. // // Note that the all the pointer options (except table_factory, which will // be described in more details below) will be initialized with the default // values. Developers can further initialize them after this function call. // Below is an example list of pointer options which will be initialized // // * env // * memtable_factory // * compaction_filter_factory // * prefix_extractor // * comparator // * merge_operator // * compaction_filter // // User can also choose to load customized comparator, env, and/or // merge_operator through object registry: // * comparator needs to be registered through Registrar // * env needs to be registered through Registrar // * merge operator needs to be registered through // Registrar>. // // For table_factory, this function further supports deserializing // BlockBasedTableFactory and its BlockBasedTableOptions except the // pointer options of BlockBasedTableOptions (flush_block_policy_factory, // block_cache, and block_cache_compressed), which will be initialized with // default values. Developers can further specify these three options by // casting the return value of TableFactory::GetOptions() to // BlockBasedTableOptions and making necessary changes. // // ignore_unknown_options can be set to true if you want to ignore options // that are from a newer version of the db, esentially for forward // compatibility. // // config_options contains a set of options that controls the processing // of the options. The LoadLatestOptions(ConfigOptions...) should be preferred; // the alternative signature may be deprecated in a future release. The // equivalent functionality can be achieved by setting the corresponding options // in the ConfigOptions parameter. // // examples/options_file_example.cc demonstrates how to use this function // to open a RocksDB instance. // // @return the function returns an OK status when it went successfully. If // the specified "dbpath" does not contain any option file, then a // Status::NotFound will be returned. A return value other than // Status::OK or Status::NotFound indicates there're some error related // to the options file itself. // // @see LoadOptionsFromFile Status LoadLatestOptions(const std::string& dbpath, Env* env, DBOptions* db_options, std::vector* cf_descs, bool ignore_unknown_options = false, std::shared_ptr* cache = {}); Status LoadLatestOptions(const ConfigOptions& config_options, const std::string& dbpath, DBOptions* db_options, std::vector* cf_descs, std::shared_ptr* cache = {}); // Similar to LoadLatestOptions, this function constructs the DBOptions // and ColumnFamilyDescriptors based on the specified RocksDB Options file. // // The LoadOptionsFile(ConfigOptions...) should be preferred; // the alternative signature may be deprecated in a future release. The // equivalent functionality can be achieved by setting the corresponding // options in the ConfigOptions parameter. // // @see LoadLatestOptions Status LoadOptionsFromFile(const std::string& options_file_name, Env* env, DBOptions* db_options, std::vector* cf_descs, bool ignore_unknown_options = false, std::shared_ptr* cache = {}); Status LoadOptionsFromFile(const ConfigOptions& config_options, const std::string& options_file_name, DBOptions* db_options, std::vector* cf_descs, std::shared_ptr* cache = {}); // Returns the latest options file name under the specified db path. Status GetLatestOptionsFileName(const std::string& dbpath, Env* env, std::string* options_file_name); // Returns Status::OK if the input DBOptions and ColumnFamilyDescriptors // are compatible with the latest options stored in the specified DB path. // // If the return status is non-ok, it means the specified RocksDB instance // might not be correctly opened with the input set of options. Currently, // changing one of the following options will fail the compatibility check: // // * comparator // * prefix_extractor // * table_factory // * merge_operator Status CheckOptionsCompatibility( const std::string& dbpath, Env* env, const DBOptions& db_options, const std::vector& cf_descs, bool ignore_unknown_options = false); Status CheckOptionsCompatibility( const ConfigOptions& config_options, const std::string& dbpath, const DBOptions& db_options, const std::vector& cf_descs); } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/sim_cache.h000066400000000000000000000072461370372246700224270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/cache.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class SimCache; // For instrumentation purpose, use NewSimCache instead of NewLRUCache API // NewSimCache is a wrapper function returning a SimCache instance that can // have additional interface provided in Simcache class besides Cache interface // to predict block cache hit rate without actually allocating the memory. It // can help users tune their current block cache size, and determine how // efficient they are using the memory. // // Since GetSimCapacity() returns the capacity for simulutation, it differs from // actual memory usage, which can be estimated as: // sim_capacity * entry_size / (entry_size + block_size), // where 76 <= entry_size <= 104, // BlockBasedTableOptions.block_size = 4096 by default but is configurable, // Therefore, generally the actual memory overhead of SimCache is Less than // sim_capacity * 2% extern std::shared_ptr NewSimCache(std::shared_ptr cache, size_t sim_capacity, int num_shard_bits); extern std::shared_ptr NewSimCache(std::shared_ptr sim_cache, std::shared_ptr cache, int num_shard_bits); class SimCache : public Cache { public: SimCache() {} ~SimCache() override {} const char* Name() const override { return "SimCache"; } // returns the maximum configured capacity of the simcache for simulation virtual size_t GetSimCapacity() const = 0; // simcache doesn't provide internal handler reference to user, so always // PinnedUsage = 0 and the behavior will be not exactly consistent the // with real cache. // returns the memory size for the entries residing in the simcache. virtual size_t GetSimUsage() const = 0; // sets the maximum configured capacity of the simcache. When the new // capacity is less than the old capacity and the existing usage is // greater than new capacity, the implementation will purge old entries // to fit new capapicty. virtual void SetSimCapacity(size_t capacity) = 0; // returns the lookup times of simcache virtual uint64_t get_miss_counter() const = 0; // returns the hit times of simcache virtual uint64_t get_hit_counter() const = 0; // reset the lookup and hit counters virtual void reset_counter() = 0; // String representation of the statistics of the simcache virtual std::string ToString() const = 0; // Start storing logs of the cache activity (Add/Lookup) into // a file located at activity_log_file, max_logging_size option can be used to // stop logging to the file automatically after reaching a specific size in // bytes, a values of 0 disable this feature virtual Status StartActivityLogging(const std::string& activity_log_file, Env* env, uint64_t max_logging_size = 0) = 0; // Stop cache activity logging if any virtual void StopActivityLogging() = 0; // Status of cache logging happening in background virtual Status GetActivityLoggingStatus() = 0; private: SimCache(const SimCache&); SimCache& operator=(const SimCache&); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/stackable_db.h000066400000000000000000000400421370372246700231010ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "rocksdb/db.h" #ifdef _WIN32 // Windows API macro interference #undef DeleteFile #endif namespace ROCKSDB_NAMESPACE { // This class contains APIs to stack rocksdb wrappers.Eg. Stack TTL over base d class StackableDB : public DB { public: // StackableDB take sole ownership of the underlying db. explicit StackableDB(DB* db) : db_(db) {} // StackableDB take shared ownership of the underlying db. explicit StackableDB(std::shared_ptr db) : db_(db.get()), shared_db_ptr_(db) {} ~StackableDB() { if (shared_db_ptr_ == nullptr) { delete db_; } else { assert(shared_db_ptr_.get() == db_); } db_ = nullptr; } virtual Status Close() override { return db_->Close(); } virtual DB* GetBaseDB() { return db_; } virtual DB* GetRootDB() override { return db_->GetRootDB(); } virtual Status CreateColumnFamily(const ColumnFamilyOptions& options, const std::string& column_family_name, ColumnFamilyHandle** handle) override { return db_->CreateColumnFamily(options, column_family_name, handle); } virtual Status CreateColumnFamilies( const ColumnFamilyOptions& options, const std::vector& column_family_names, std::vector* handles) override { return db_->CreateColumnFamilies(options, column_family_names, handles); } virtual Status CreateColumnFamilies( const std::vector& column_families, std::vector* handles) override { return db_->CreateColumnFamilies(column_families, handles); } virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override { return db_->DropColumnFamily(column_family); } virtual Status DropColumnFamilies( const std::vector& column_families) override { return db_->DropColumnFamilies(column_families); } virtual Status DestroyColumnFamilyHandle( ColumnFamilyHandle* column_family) override { return db_->DestroyColumnFamilyHandle(column_family); } using DB::Put; virtual Status Put(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& val) override { return db_->Put(options, column_family, key, val); } using DB::Get; virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override { return db_->Get(options, column_family, key, value); } using DB::GetMergeOperands; virtual Status GetMergeOperands( const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* slice, GetMergeOperandsOptions* get_merge_operands_options, int* number_of_operands) override { return db_->GetMergeOperands(options, column_family, key, slice, get_merge_operands_options, number_of_operands); } using DB::MultiGet; virtual std::vector MultiGet( const ReadOptions& options, const std::vector& column_family, const std::vector& keys, std::vector* values) override { return db_->MultiGet(options, column_family, keys, values); } virtual void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input = false) override { return db_->MultiGet(options, column_family, num_keys, keys, values, statuses, sorted_input); } using DB::IngestExternalFile; virtual Status IngestExternalFile( ColumnFamilyHandle* column_family, const std::vector& external_files, const IngestExternalFileOptions& options) override { return db_->IngestExternalFile(column_family, external_files, options); } using DB::IngestExternalFiles; virtual Status IngestExternalFiles( const std::vector& args) override { return db_->IngestExternalFiles(args); } using DB::CreateColumnFamilyWithImport; virtual Status CreateColumnFamilyWithImport( const ColumnFamilyOptions& options, const std::string& column_family_name, const ImportColumnFamilyOptions& import_options, const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) override { return db_->CreateColumnFamilyWithImport(options, column_family_name, import_options, metadata, handle); } virtual Status VerifyChecksum() override { return db_->VerifyChecksum(); } virtual Status VerifyChecksum(const ReadOptions& options) override { return db_->VerifyChecksum(options); } using DB::KeyMayExist; virtual bool KeyMayExist(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value, bool* value_found = nullptr) override { return db_->KeyMayExist(options, column_family, key, value, value_found); } using DB::Delete; virtual Status Delete(const WriteOptions& wopts, ColumnFamilyHandle* column_family, const Slice& key) override { return db_->Delete(wopts, column_family, key); } using DB::SingleDelete; virtual Status SingleDelete(const WriteOptions& wopts, ColumnFamilyHandle* column_family, const Slice& key) override { return db_->SingleDelete(wopts, column_family, key); } using DB::Merge; virtual Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) override { return db_->Merge(options, column_family, key, value); } virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override { return db_->Write(opts, updates); } using DB::NewIterator; virtual Iterator* NewIterator(const ReadOptions& opts, ColumnFamilyHandle* column_family) override { return db_->NewIterator(opts, column_family); } virtual Status NewIterators( const ReadOptions& options, const std::vector& column_families, std::vector* iterators) override { return db_->NewIterators(options, column_families, iterators); } virtual const Snapshot* GetSnapshot() override { return db_->GetSnapshot(); } virtual void ReleaseSnapshot(const Snapshot* snapshot) override { return db_->ReleaseSnapshot(snapshot); } using DB::GetMapProperty; using DB::GetProperty; virtual bool GetProperty(ColumnFamilyHandle* column_family, const Slice& property, std::string* value) override { return db_->GetProperty(column_family, property, value); } virtual bool GetMapProperty( ColumnFamilyHandle* column_family, const Slice& property, std::map* value) override { return db_->GetMapProperty(column_family, property, value); } using DB::GetIntProperty; virtual bool GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) override { return db_->GetIntProperty(column_family, property, value); } using DB::GetAggregatedIntProperty; virtual bool GetAggregatedIntProperty(const Slice& property, uint64_t* value) override { return db_->GetAggregatedIntProperty(property, value); } using DB::GetApproximateSizes; virtual Status GetApproximateSizes(const SizeApproximationOptions& options, ColumnFamilyHandle* column_family, const Range* r, int n, uint64_t* sizes) override { return db_->GetApproximateSizes(options, column_family, r, n, sizes); } using DB::GetApproximateMemTableStats; virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family, const Range& range, uint64_t* const count, uint64_t* const size) override { return db_->GetApproximateMemTableStats(column_family, range, count, size); } using DB::CompactRange; virtual Status CompactRange(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) override { return db_->CompactRange(options, column_family, begin, end); } using DB::CompactFiles; virtual Status CompactFiles( const CompactionOptions& compact_options, ColumnFamilyHandle* column_family, const std::vector& input_file_names, const int output_level, const int output_path_id = -1, std::vector* const output_file_names = nullptr, CompactionJobInfo* compaction_job_info = nullptr) override { return db_->CompactFiles(compact_options, column_family, input_file_names, output_level, output_path_id, output_file_names, compaction_job_info); } virtual Status PauseBackgroundWork() override { return db_->PauseBackgroundWork(); } virtual Status ContinueBackgroundWork() override { return db_->ContinueBackgroundWork(); } virtual Status EnableAutoCompaction( const std::vector& column_family_handles) override { return db_->EnableAutoCompaction(column_family_handles); } virtual void EnableManualCompaction() override { return db_->EnableManualCompaction(); } virtual void DisableManualCompaction() override { return db_->DisableManualCompaction(); } using DB::NumberLevels; virtual int NumberLevels(ColumnFamilyHandle* column_family) override { return db_->NumberLevels(column_family); } using DB::MaxMemCompactionLevel; virtual int MaxMemCompactionLevel( ColumnFamilyHandle* column_family) override { return db_->MaxMemCompactionLevel(column_family); } using DB::Level0StopWriteTrigger; virtual int Level0StopWriteTrigger( ColumnFamilyHandle* column_family) override { return db_->Level0StopWriteTrigger(column_family); } virtual const std::string& GetName() const override { return db_->GetName(); } virtual Env* GetEnv() const override { return db_->GetEnv(); } virtual FileSystem* GetFileSystem() const override { return db_->GetFileSystem(); } using DB::GetOptions; virtual Options GetOptions(ColumnFamilyHandle* column_family) const override { return db_->GetOptions(column_family); } using DB::GetDBOptions; virtual DBOptions GetDBOptions() const override { return db_->GetDBOptions(); } using DB::Flush; virtual Status Flush(const FlushOptions& fopts, ColumnFamilyHandle* column_family) override { return db_->Flush(fopts, column_family); } virtual Status Flush( const FlushOptions& fopts, const std::vector& column_families) override { return db_->Flush(fopts, column_families); } virtual Status SyncWAL() override { return db_->SyncWAL(); } virtual Status FlushWAL(bool sync) override { return db_->FlushWAL(sync); } virtual Status LockWAL() override { return db_->LockWAL(); } virtual Status UnlockWAL() override { return db_->UnlockWAL(); } #ifndef ROCKSDB_LITE virtual Status DisableFileDeletions() override { return db_->DisableFileDeletions(); } virtual Status EnableFileDeletions(bool force) override { return db_->EnableFileDeletions(force); } virtual void GetLiveFilesMetaData( std::vector* metadata) override { db_->GetLiveFilesMetaData(metadata); } virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ColumnFamilyMetaData* cf_meta) override { db_->GetColumnFamilyMetaData(column_family, cf_meta); } using DB::StartBlockCacheTrace; Status StartBlockCacheTrace( const TraceOptions& options, std::unique_ptr&& trace_writer) override { return db_->StartBlockCacheTrace(options, std::move(trace_writer)); } using DB::EndBlockCacheTrace; Status EndBlockCacheTrace() override { return db_->EndBlockCacheTrace(); } #endif // ROCKSDB_LITE virtual Status GetLiveFiles(std::vector& vec, uint64_t* mfs, bool flush_memtable = true) override { return db_->GetLiveFiles(vec, mfs, flush_memtable); } virtual SequenceNumber GetLatestSequenceNumber() const override { return db_->GetLatestSequenceNumber(); } virtual bool SetPreserveDeletesSequenceNumber( SequenceNumber seqnum) override { return db_->SetPreserveDeletesSequenceNumber(seqnum); } virtual Status GetSortedWalFiles(VectorLogPtr& files) override { return db_->GetSortedWalFiles(files); } virtual Status GetCurrentWalFile( std::unique_ptr* current_log_file) override { return db_->GetCurrentWalFile(current_log_file); } virtual Status GetCreationTimeOfOldestFile( uint64_t* creation_time) override { return db_->GetCreationTimeOfOldestFile(creation_time); } virtual Status DeleteFile(std::string name) override { return db_->DeleteFile(name); } virtual Status GetDbIdentity(std::string& identity) const override { return db_->GetDbIdentity(identity); } using DB::SetOptions; virtual Status SetOptions(ColumnFamilyHandle* column_family_handle, const std::unordered_map& new_options) override { return db_->SetOptions(column_family_handle, new_options); } virtual Status SetDBOptions( const std::unordered_map& new_options) override { return db_->SetDBOptions(new_options); } using DB::ResetStats; virtual Status ResetStats() override { return db_->ResetStats(); } using DB::GetPropertiesOfAllTables; virtual Status GetPropertiesOfAllTables( ColumnFamilyHandle* column_family, TablePropertiesCollection* props) override { return db_->GetPropertiesOfAllTables(column_family, props); } using DB::GetPropertiesOfTablesInRange; virtual Status GetPropertiesOfTablesInRange( ColumnFamilyHandle* column_family, const Range* range, std::size_t n, TablePropertiesCollection* props) override { return db_->GetPropertiesOfTablesInRange(column_family, range, n, props); } virtual Status GetUpdatesSince( SequenceNumber seq_number, std::unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options) override { return db_->GetUpdatesSince(seq_number, iter, read_options); } virtual Status SuggestCompactRange(ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) override { return db_->SuggestCompactRange(column_family, begin, end); } virtual Status PromoteL0(ColumnFamilyHandle* column_family, int target_level) override { return db_->PromoteL0(column_family, target_level); } virtual ColumnFamilyHandle* DefaultColumnFamily() const override { return db_->DefaultColumnFamily(); } #ifndef ROCKSDB_LITE Status TryCatchUpWithPrimary() override { return db_->TryCatchUpWithPrimary(); } #endif // ROCKSDB_LITE protected: DB* db_; std::shared_ptr shared_db_ptr_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/utilities/table_properties_collectors.h000066400000000000000000000072361370372246700263070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/table_properties.h" namespace ROCKSDB_NAMESPACE { // A factory of a table property collector that marks a SST // file as need-compaction when it observe at least "D" deletion // entries in any "N" consecutive entires or the ratio of tombstone // entries in the whole file >= the specified deletion ratio. class CompactOnDeletionCollectorFactory : public TablePropertiesCollectorFactory { public: ~CompactOnDeletionCollectorFactory() {} TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context context) override; // Change the value of sliding_window_size "N" // Setting it to 0 disables the delete triggered compaction void SetWindowSize(size_t sliding_window_size) { sliding_window_size_.store(sliding_window_size); } // Change the value of deletion_trigger "D" void SetDeletionTrigger(size_t deletion_trigger) { deletion_trigger_.store(deletion_trigger); } // Change deletion ratio. // @param deletion_ratio, if <= 0 or > 1, disable triggering compaction // based on deletion ratio. void SetDeletionRatio(double deletion_ratio) { deletion_ratio_.store(deletion_ratio); } const char* Name() const override { return "CompactOnDeletionCollector"; } std::string ToString() const override; private: friend std::shared_ptr NewCompactOnDeletionCollectorFactory(size_t sliding_window_size, size_t deletion_trigger, double deletion_ratio); // A factory of a table property collector that marks a SST // file as need-compaction when it observe at least "D" deletion // entries in any "N" consecutive entires, or the ratio of tombstone // entries >= deletion_ratio. // // @param sliding_window_size "N" // @param deletion_trigger "D" // @param deletion_ratio, if <= 0 or > 1, disable triggering compaction // based on deletion ratio. CompactOnDeletionCollectorFactory(size_t sliding_window_size, size_t deletion_trigger, double deletion_ratio) : sliding_window_size_(sliding_window_size), deletion_trigger_(deletion_trigger), deletion_ratio_(deletion_ratio) {} std::atomic sliding_window_size_; std::atomic deletion_trigger_; std::atomic deletion_ratio_; }; // Creates a factory of a table property collector that marks a SST // file as need-compaction when it observe at least "D" deletion // entries in any "N" consecutive entires, or the ratio of tombstone // entries >= deletion_ratio. // // @param sliding_window_size "N". Note that this number will be // round up to the smallest multiple of 128 that is no less // than the specified size. // @param deletion_trigger "D". Note that even when "N" is changed, // the specified number for "D" will not be changed. // @param deletion_ratio, if <= 0 or > 1, disable triggering compaction // based on deletion ratio. Disabled by default. extern std::shared_ptr NewCompactOnDeletionCollectorFactory(size_t sliding_window_size, size_t deletion_trigger, double deletion_ratio = 0); } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/transaction.h000066400000000000000000000555261370372246700230450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Iterator; class TransactionDB; class WriteBatchWithIndex; using TransactionName = std::string; using TransactionID = uint64_t; // Provides notification to the caller of SetSnapshotOnNextOperation when // the actual snapshot gets created class TransactionNotifier { public: virtual ~TransactionNotifier() {} // Implement this method to receive notification when a snapshot is // requested via SetSnapshotOnNextOperation. virtual void SnapshotCreated(const Snapshot* newSnapshot) = 0; }; // Provides BEGIN/COMMIT/ROLLBACK transactions. // // To use transactions, you must first create either an OptimisticTransactionDB // or a TransactionDB. See examples/[optimistic_]transaction_example.cc for // more information. // // To create a transaction, use [Optimistic]TransactionDB::BeginTransaction(). // // It is up to the caller to synchronize access to this object. // // See examples/transaction_example.cc for some simple examples. // // TODO(agiardullo): Not yet implemented // -PerfContext statistics // -Support for using Transactions with DBWithTTL class Transaction { public: // No copying allowed Transaction(const Transaction&) = delete; void operator=(const Transaction&) = delete; virtual ~Transaction() {} // If a transaction has a snapshot set, the transaction will ensure that // any keys successfully written(or fetched via GetForUpdate()) have not // been modified outside of this transaction since the time the snapshot was // set. // If a snapshot has not been set, the transaction guarantees that keys have // not been modified since the time each key was first written (or fetched via // GetForUpdate()). // // Using SetSnapshot() will provide stricter isolation guarantees at the // expense of potentially more transaction failures due to conflicts with // other writes. // // Calling SetSnapshot() has no effect on keys written before this function // has been called. // // SetSnapshot() may be called multiple times if you would like to change // the snapshot used for different operations in this transaction. // // Calling SetSnapshot will not affect the version of Data returned by Get() // methods. See Transaction::Get() for more details. virtual void SetSnapshot() = 0; // Similar to SetSnapshot(), but will not change the current snapshot // until Put/Merge/Delete/GetForUpdate/MultigetForUpdate is called. // By calling this function, the transaction will essentially call // SetSnapshot() for you right before performing the next write/GetForUpdate. // // Calling SetSnapshotOnNextOperation() will not affect what snapshot is // returned by GetSnapshot() until the next write/GetForUpdate is executed. // // When the snapshot is created the notifier's SnapshotCreated method will // be called so that the caller can get access to the snapshot. // // This is an optimization to reduce the likelihood of conflicts that // could occur in between the time SetSnapshot() is called and the first // write/GetForUpdate operation. Eg, this prevents the following // race-condition: // // txn1->SetSnapshot(); // txn2->Put("A", ...); // txn2->Commit(); // txn1->GetForUpdate(opts, "A", ...); // FAIL! virtual void SetSnapshotOnNextOperation( std::shared_ptr notifier = nullptr) = 0; // Returns the Snapshot created by the last call to SetSnapshot(). // // REQUIRED: The returned Snapshot is only valid up until the next time // SetSnapshot()/SetSnapshotOnNextSavePoint() is called, ClearSnapshot() // is called, or the Transaction is deleted. virtual const Snapshot* GetSnapshot() const = 0; // Clears the current snapshot (i.e. no snapshot will be 'set') // // This removes any snapshot that currently exists or is set to be created // on the next update operation (SetSnapshotOnNextOperation). // // Calling ClearSnapshot() has no effect on keys written before this function // has been called. // // If a reference to a snapshot was retrieved via GetSnapshot(), it will no // longer be valid and should be discarded after a call to ClearSnapshot(). virtual void ClearSnapshot() = 0; // Prepare the current transaction for 2PC virtual Status Prepare() = 0; // Write all batched keys to the db atomically. // // Returns OK on success. // // May return any error status that could be returned by DB:Write(). // // If this transaction was created by an OptimisticTransactionDB(), // Status::Busy() may be returned if the transaction could not guarantee // that there are no write conflicts. Status::TryAgain() may be returned // if the memtable history size is not large enough // (See max_write_buffer_size_to_maintain). // // If this transaction was created by a TransactionDB(), Status::Expired() // may be returned if this transaction has lived for longer than // TransactionOptions.expiration. Status::TxnNotPrepared() may be returned if // TransactionOptions.skip_prepare is false and Prepare is not called on this // transaction before Commit. virtual Status Commit() = 0; // Discard all batched writes in this transaction. virtual Status Rollback() = 0; // Records the state of the transaction for future calls to // RollbackToSavePoint(). May be called multiple times to set multiple save // points. virtual void SetSavePoint() = 0; // Undo all operations in this transaction (Put, Merge, Delete, PutLogData) // since the most recent call to SetSavePoint() and removes the most recent // SetSavePoint(). // If there is no previous call to SetSavePoint(), returns Status::NotFound() virtual Status RollbackToSavePoint() = 0; // Pop the most recent save point. // If there is no previous call to SetSavePoint(), Status::NotFound() // will be returned. // Otherwise returns Status::OK(). virtual Status PopSavePoint() = 0; // This function is similar to DB::Get() except it will also read pending // changes in this transaction. Currently, this function will return // Status::MergeInProgress if the most recent write to the queried key in // this batch is a Merge. // // If read_options.snapshot is not set, the current version of the key will // be read. Calling SetSnapshot() does not affect the version of the data // returned. // // Note that setting read_options.snapshot will affect what is read from the // DB but will NOT change which keys are read from this transaction (the keys // in this transaction do not yet belong to any snapshot and will be fetched // regardless). virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value) = 0; // An overload of the above method that receives a PinnableSlice // For backward compatibility a default implementation is provided virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val) { assert(pinnable_val != nullptr); auto s = Get(options, column_family, key, pinnable_val->GetSelf()); pinnable_val->PinSelf(); return s; } virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) = 0; virtual Status Get(const ReadOptions& options, const Slice& key, PinnableSlice* pinnable_val) { assert(pinnable_val != nullptr); auto s = Get(options, key, pinnable_val->GetSelf()); pinnable_val->PinSelf(); return s; } virtual std::vector MultiGet( const ReadOptions& options, const std::vector& column_family, const std::vector& keys, std::vector* values) = 0; virtual std::vector MultiGet(const ReadOptions& options, const std::vector& keys, std::vector* values) = 0; // Batched version of MultiGet - see DBImpl::MultiGet(). Sub-classes are // expected to override this with an implementation that calls // DBImpl::MultiGet() virtual void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool /*sorted_input*/ = false) { for (size_t i = 0; i < num_keys; ++i) { statuses[i] = Get(options, column_family, keys[i], &values[i]); } } // Read this key and ensure that this transaction will only // be able to be committed if this key is not written outside this // transaction after it has first been read (or after the snapshot if a // snapshot is set in this transaction and do_validate is true). If // do_validate is false, ReadOptions::snapshot is expected to be nullptr so // that GetForUpdate returns the latest committed value. The transaction // behavior is the same regardless of whether the key exists or not. // // Note: Currently, this function will return Status::MergeInProgress // if the most recent write to the queried key in this batch is a Merge. // // The values returned by this function are similar to Transaction::Get(). // If value==nullptr, then this function will not read any data, but will // still ensure that this key cannot be written to by outside of this // transaction. // // If this transaction was created by an OptimisticTransaction, GetForUpdate() // could cause commit() to fail. Otherwise, it could return any error // that could be returned by DB::Get(). // // If this transaction was created by a TransactionDB, it can return // Status::OK() on success, // Status::Busy() if there is a write conflict, // Status::TimedOut() if a lock could not be acquired, // Status::TryAgain() if the memtable history size is not large enough // (See max_write_buffer_size_to_maintain) // Status::MergeInProgress() if merge operations cannot be resolved. // or other errors if this key could not be read. virtual Status GetForUpdate(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value, bool exclusive = true, const bool do_validate = true) = 0; // An overload of the above method that receives a PinnableSlice // For backward compatibility a default implementation is provided virtual Status GetForUpdate(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val, bool exclusive = true, const bool do_validate = true) { if (pinnable_val == nullptr) { std::string* null_str = nullptr; return GetForUpdate(options, column_family, key, null_str, exclusive, do_validate); } else { auto s = GetForUpdate(options, column_family, key, pinnable_val->GetSelf(), exclusive, do_validate); pinnable_val->PinSelf(); return s; } } virtual Status GetForUpdate(const ReadOptions& options, const Slice& key, std::string* value, bool exclusive = true, const bool do_validate = true) = 0; virtual std::vector MultiGetForUpdate( const ReadOptions& options, const std::vector& column_family, const std::vector& keys, std::vector* values) = 0; virtual std::vector MultiGetForUpdate( const ReadOptions& options, const std::vector& keys, std::vector* values) = 0; // Returns an iterator that will iterate on all keys in the default // column family including both keys in the DB and uncommitted keys in this // transaction. // // Setting read_options.snapshot will affect what is read from the // DB but will NOT change which keys are read from this transaction (the keys // in this transaction do not yet belong to any snapshot and will be fetched // regardless). // // Caller is responsible for deleting the returned Iterator. // // The returned iterator is only valid until Commit(), Rollback(), or // RollbackToSavePoint() is called. virtual Iterator* GetIterator(const ReadOptions& read_options) = 0; virtual Iterator* GetIterator(const ReadOptions& read_options, ColumnFamilyHandle* column_family) = 0; // Put, Merge, Delete, and SingleDelete behave similarly to the corresponding // functions in WriteBatch, but will also do conflict checking on the // keys being written. // // assume_tracked=true expects the key be already tracked. More // specifically, it means the the key was previous tracked in the same // savepoint, with the same exclusive flag, and at a lower sequence number. // If valid then it skips ValidateSnapshot. Returns error otherwise. // // If this Transaction was created on an OptimisticTransactionDB, these // functions should always return Status::OK(). // // If this Transaction was created on a TransactionDB, the status returned // can be: // Status::OK() on success, // Status::Busy() if there is a write conflict, // Status::TimedOut() if a lock could not be acquired, // Status::TryAgain() if the memtable history size is not large enough // (See max_write_buffer_size_to_maintain) // or other errors on unexpected failures. virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value, const bool assume_tracked = false) = 0; virtual Status Put(const Slice& key, const Slice& value) = 0; virtual Status Put(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value, const bool assume_tracked = false) = 0; virtual Status Put(const SliceParts& key, const SliceParts& value) = 0; virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value, const bool assume_tracked = false) = 0; virtual Status Merge(const Slice& key, const Slice& value) = 0; virtual Status Delete(ColumnFamilyHandle* column_family, const Slice& key, const bool assume_tracked = false) = 0; virtual Status Delete(const Slice& key) = 0; virtual Status Delete(ColumnFamilyHandle* column_family, const SliceParts& key, const bool assume_tracked = false) = 0; virtual Status Delete(const SliceParts& key) = 0; virtual Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key, const bool assume_tracked = false) = 0; virtual Status SingleDelete(const Slice& key) = 0; virtual Status SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key, const bool assume_tracked = false) = 0; virtual Status SingleDelete(const SliceParts& key) = 0; // PutUntracked() will write a Put to the batch of operations to be committed // in this transaction. This write will only happen if this transaction // gets committed successfully. But unlike Transaction::Put(), // no conflict checking will be done for this key. // // If this Transaction was created on a PessimisticTransactionDB, this // function will still acquire locks necessary to make sure this write doesn't // cause conflicts in other transactions and may return Status::Busy(). virtual Status PutUntracked(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) = 0; virtual Status PutUntracked(const Slice& key, const Slice& value) = 0; virtual Status PutUntracked(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) = 0; virtual Status PutUntracked(const SliceParts& key, const SliceParts& value) = 0; virtual Status MergeUntracked(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) = 0; virtual Status MergeUntracked(const Slice& key, const Slice& value) = 0; virtual Status DeleteUntracked(ColumnFamilyHandle* column_family, const Slice& key) = 0; virtual Status DeleteUntracked(const Slice& key) = 0; virtual Status DeleteUntracked(ColumnFamilyHandle* column_family, const SliceParts& key) = 0; virtual Status DeleteUntracked(const SliceParts& key) = 0; virtual Status SingleDeleteUntracked(ColumnFamilyHandle* column_family, const Slice& key) = 0; virtual Status SingleDeleteUntracked(const Slice& key) = 0; // Similar to WriteBatch::PutLogData virtual void PutLogData(const Slice& blob) = 0; // By default, all Put/Merge/Delete operations will be indexed in the // transaction so that Get/GetForUpdate/GetIterator can search for these // keys. // // If the caller does not want to fetch the keys about to be written, // they may want to avoid indexing as a performance optimization. // Calling DisableIndexing() will turn off indexing for all future // Put/Merge/Delete operations until EnableIndexing() is called. // // If a key is Put/Merge/Deleted after DisableIndexing is called and then // is fetched via Get/GetForUpdate/GetIterator, the result of the fetch is // undefined. virtual void DisableIndexing() = 0; virtual void EnableIndexing() = 0; // Returns the number of distinct Keys being tracked by this transaction. // If this transaction was created by a TransactionDB, this is the number of // keys that are currently locked by this transaction. // If this transaction was created by an OptimisticTransactionDB, this is the // number of keys that need to be checked for conflicts at commit time. virtual uint64_t GetNumKeys() const = 0; // Returns the number of Puts/Deletes/Merges that have been applied to this // transaction so far. virtual uint64_t GetNumPuts() const = 0; virtual uint64_t GetNumDeletes() const = 0; virtual uint64_t GetNumMerges() const = 0; // Returns the elapsed time in milliseconds since this Transaction began. virtual uint64_t GetElapsedTime() const = 0; // Fetch the underlying write batch that contains all pending changes to be // committed. // // Note: You should not write or delete anything from the batch directly and // should only use the functions in the Transaction class to // write to this transaction. virtual WriteBatchWithIndex* GetWriteBatch() = 0; // Change the value of TransactionOptions.lock_timeout (in milliseconds) for // this transaction. // Has no effect on OptimisticTransactions. virtual void SetLockTimeout(int64_t timeout) = 0; // Return the WriteOptions that will be used during Commit() virtual WriteOptions* GetWriteOptions() = 0; // Reset the WriteOptions that will be used during Commit(). virtual void SetWriteOptions(const WriteOptions& write_options) = 0; // If this key was previously fetched in this transaction using // GetForUpdate/MultigetForUpdate(), calling UndoGetForUpdate will tell // the transaction that it no longer needs to do any conflict checking // for this key. // // If a key has been fetched N times via GetForUpdate/MultigetForUpdate(), // then UndoGetForUpdate will only have an effect if it is also called N // times. If this key has been written to in this transaction, // UndoGetForUpdate() will have no effect. // // If SetSavePoint() has been called after the GetForUpdate(), // UndoGetForUpdate() will not have any effect. // // If this Transaction was created by an OptimisticTransactionDB, // calling UndoGetForUpdate can affect whether this key is conflict checked // at commit time. // If this Transaction was created by a TransactionDB, // calling UndoGetForUpdate may release any held locks for this key. virtual void UndoGetForUpdate(ColumnFamilyHandle* column_family, const Slice& key) = 0; virtual void UndoGetForUpdate(const Slice& key) = 0; virtual Status RebuildFromWriteBatch(WriteBatch* src_batch) = 0; virtual WriteBatch* GetCommitTimeWriteBatch() = 0; virtual void SetLogNumber(uint64_t log) { log_number_ = log; } virtual uint64_t GetLogNumber() const { return log_number_; } virtual Status SetName(const TransactionName& name) = 0; virtual TransactionName GetName() const { return name_; } virtual TransactionID GetID() const { return 0; } virtual bool IsDeadlockDetect() const { return false; } virtual std::vector GetWaitingTxns( uint32_t* /*column_family_id*/, std::string* /*key*/) const { assert(false); return std::vector(); } enum TransactionState { STARTED = 0, AWAITING_PREPARE = 1, PREPARED = 2, AWAITING_COMMIT = 3, COMMITTED = 4, COMMITED = COMMITTED, // old misspelled name AWAITING_ROLLBACK = 5, ROLLEDBACK = 6, LOCKS_STOLEN = 7, }; TransactionState GetState() const { return txn_state_; } void SetState(TransactionState state) { txn_state_ = state; } // NOTE: Experimental feature // The globally unique id with which the transaction is identified. This id // might or might not be set depending on the implementation. Similarly the // implementation decides the point in lifetime of a transaction at which it // assigns the id. Although currently it is the case, the id is not guaranteed // to remain the same across restarts. uint64_t GetId() { return id_; } protected: explicit Transaction(const TransactionDB* /*db*/) {} Transaction() : log_number_(0), txn_state_(STARTED) {} // the log in which the prepared section for this txn resides // (for two phase commit) uint64_t log_number_; TransactionName name_; // Execution status of the transaction. std::atomic txn_state_; uint64_t id_ = 0; virtual void SetId(uint64_t id) { assert(id_ == 0); id_ = id; } virtual uint64_t GetLastLogNumber() const { return log_number_; } private: friend class PessimisticTransactionDB; friend class WriteUnpreparedTxnDB; friend class TransactionTest_TwoPhaseLogRollingTest_Test; friend class TransactionTest_TwoPhaseLogRollingTest2_Test; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/transaction_db.h000066400000000000000000000323411370372246700235000ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/utilities/stackable_db.h" #include "rocksdb/utilities/transaction.h" // Database with Transaction support. // // See transaction.h and examples/transaction_example.cc namespace ROCKSDB_NAMESPACE { class TransactionDBMutexFactory; enum TxnDBWritePolicy { WRITE_COMMITTED = 0, // write only the committed data WRITE_PREPARED, // write data after the prepare phase of 2pc WRITE_UNPREPARED // write data before the prepare phase of 2pc }; const uint32_t kInitialMaxDeadlocks = 5; struct TransactionDBOptions { // Specifies the maximum number of keys that can be locked at the same time // per column family. // If the number of locked keys is greater than max_num_locks, transaction // writes (or GetForUpdate) will return an error. // If this value is not positive, no limit will be enforced. int64_t max_num_locks = -1; // Stores the number of latest deadlocks to track uint32_t max_num_deadlocks = kInitialMaxDeadlocks; // Increasing this value will increase the concurrency by dividing the lock // table (per column family) into more sub-tables, each with their own // separate // mutex. size_t num_stripes = 16; // If positive, specifies the default wait timeout in milliseconds when // a transaction attempts to lock a key if not specified by // TransactionOptions::lock_timeout. // // If 0, no waiting is done if a lock cannot instantly be acquired. // If negative, there is no timeout. Not using a timeout is not recommended // as it can lead to deadlocks. Currently, there is no deadlock-detection to // recover // from a deadlock. int64_t transaction_lock_timeout = 1000; // 1 second // If positive, specifies the wait timeout in milliseconds when writing a key // OUTSIDE of a transaction (ie by calling DB::Put(),Merge(),Delete(),Write() // directly). // If 0, no waiting is done if a lock cannot instantly be acquired. // If negative, there is no timeout and will block indefinitely when acquiring // a lock. // // Not using a timeout can lead to deadlocks. Currently, there // is no deadlock-detection to recover from a deadlock. While DB writes // cannot deadlock with other DB writes, they can deadlock with a transaction. // A negative timeout should only be used if all transactions have a small // expiration set. int64_t default_lock_timeout = 1000; // 1 second // If set, the TransactionDB will use this implementation of a mutex and // condition variable for all transaction locking instead of the default // mutex/condvar implementation. std::shared_ptr custom_mutex_factory; // The policy for when to write the data into the DB. The default policy is to // write only the committed data (WRITE_COMMITTED). The data could be written // before the commit phase. The DB then needs to provide the mechanisms to // tell apart committed from uncommitted data. TxnDBWritePolicy write_policy = TxnDBWritePolicy::WRITE_COMMITTED; // TODO(myabandeh): remove this option // Note: this is a temporary option as a hot fix in rollback of writeprepared // txns in myrocks. MyRocks uses merge operands for autoinc column id without // however obtaining locks. This breaks the assumption behind the rollback // logic in myrocks. This hack of simply not rolling back merge operands works // for the special way that myrocks uses this operands. bool rollback_merge_operands = false; // If true, the TransactionDB implementation might skip concurrency control // unless it is overridden by TransactionOptions or // TransactionDBWriteOptimizations. This can be used in conjuction with // DBOptions::unordered_write when the TransactionDB is used solely for write // ordering rather than concurrency control. bool skip_concurrency_control = false; // This option is only valid for write unprepared. If a write batch exceeds // this threshold, then the transaction will implicitly flush the currently // pending writes into the database. A value of 0 or less means no limit. int64_t default_write_batch_flush_threshold = 0; private: // 128 entries size_t wp_snapshot_cache_bits = static_cast(7); // 8m entry, 64MB size size_t wp_commit_cache_bits = static_cast(23); // For testing, whether transaction name should be auto-generated or not. This // is useful for write unprepared which requires named transactions. bool autogenerate_name = false; friend class WritePreparedTxnDB; friend class WriteUnpreparedTxn; friend class WritePreparedTransactionTestBase; friend class TransactionTestBase; friend class MySQLStyleTransactionTest; }; struct TransactionOptions { // Setting set_snapshot=true is the same as calling // Transaction::SetSnapshot(). bool set_snapshot = false; // Setting to true means that before acquiring locks, this transaction will // check if doing so will cause a deadlock. If so, it will return with // Status::Busy. The user should retry their transaction. bool deadlock_detect = false; // If set, it states that the CommitTimeWriteBatch represents the latest state // of the application, has only one sub-batch, i.e., no duplicate keys, and // meant to be used later during recovery. It enables an optimization to // postpone updating the memtable with CommitTimeWriteBatch to only // SwitchMemtable or recovery. bool use_only_the_last_commit_time_batch_for_recovery = false; // TODO(agiardullo): TransactionDB does not yet support comparators that allow // two non-equal keys to be equivalent. Ie, cmp->Compare(a,b) should only // return 0 if // a.compare(b) returns 0. // If positive, specifies the wait timeout in milliseconds when // a transaction attempts to lock a key. // // If 0, no waiting is done if a lock cannot instantly be acquired. // If negative, TransactionDBOptions::transaction_lock_timeout will be used. int64_t lock_timeout = -1; // Expiration duration in milliseconds. If non-negative, transactions that // last longer than this many milliseconds will fail to commit. If not set, // a forgotten transaction that is never committed, rolled back, or deleted // will never relinquish any locks it holds. This could prevent keys from // being written by other writers. int64_t expiration = -1; // The number of traversals to make during deadlock detection. int64_t deadlock_detect_depth = 50; // The maximum number of bytes used for the write batch. 0 means no limit. size_t max_write_batch_size = 0; // Skip Concurrency Control. This could be as an optimization if the // application knows that the transaction would not have any conflict with // concurrent transactions. It could also be used during recovery if (i) // application guarantees no conflict between prepared transactions in the WAL // (ii) application guarantees that recovered transactions will be rolled // back/commit before new transactions start. // Default: false bool skip_concurrency_control = false; // In pessimistic transaction, if this is true, then you can skip Prepare // before Commit, otherwise, you must Prepare before Commit. bool skip_prepare = true; // See TransactionDBOptions::default_write_batch_flush_threshold for // description. If a negative value is specified, then the default value from // TransactionDBOptions is used. int64_t write_batch_flush_threshold = -1; }; // The per-write optimizations that do not involve transactions. TransactionDB // implementation might or might not make use of the specified optimizations. struct TransactionDBWriteOptimizations { // If it is true it means that the application guarantees that the // key-set in the write batch do not conflict with any concurrent transaction // and hence the concurrency control mechanism could be skipped for this // write. bool skip_concurrency_control = false; // If true, the application guarantees that there is no duplicate in the write batch and any employed mechanism to handle // duplicate keys could be skipped. bool skip_duplicate_key_check = false; }; struct KeyLockInfo { std::string key; std::vector ids; bool exclusive; }; struct DeadlockInfo { TransactionID m_txn_id; uint32_t m_cf_id; bool m_exclusive; std::string m_waiting_key; }; struct DeadlockPath { std::vector path; bool limit_exceeded; int64_t deadlock_time; explicit DeadlockPath(std::vector path_entry, const int64_t& dl_time) : path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {} // empty path, limit exceeded constructor and default constructor explicit DeadlockPath(const int64_t& dl_time = 0, bool limit = false) : path(0), limit_exceeded(limit), deadlock_time(dl_time) {} bool empty() { return path.empty() && !limit_exceeded; } }; class TransactionDB : public StackableDB { public: // Optimized version of ::Write that receives more optimization request such // as skip_concurrency_control. using StackableDB::Write; virtual Status Write(const WriteOptions& opts, const TransactionDBWriteOptimizations&, WriteBatch* updates) { // The default implementation ignores TransactionDBWriteOptimizations and // falls back to the un-optimized version of ::Write return Write(opts, updates); } // Open a TransactionDB similar to DB::Open(). // Internally call PrepareWrap() and WrapDB() // If the return status is not ok, then dbptr is set to nullptr. static Status Open(const Options& options, const TransactionDBOptions& txn_db_options, const std::string& dbname, TransactionDB** dbptr); static Status Open(const DBOptions& db_options, const TransactionDBOptions& txn_db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, TransactionDB** dbptr); // Note: PrepareWrap() may change parameters, make copies before the // invocation if needed. static void PrepareWrap(DBOptions* db_options, std::vector* column_families, std::vector* compaction_enabled_cf_indices); // If the return status is not ok, then dbptr will bet set to nullptr. The // input db parameter might or might not be deleted as a result of the // failure. If it is properly deleted it will be set to nullptr. If the return // status is ok, the ownership of db is transferred to dbptr. static Status WrapDB(DB* db, const TransactionDBOptions& txn_db_options, const std::vector& compaction_enabled_cf_indices, const std::vector& handles, TransactionDB** dbptr); // If the return status is not ok, then dbptr will bet set to nullptr. The // input db parameter might or might not be deleted as a result of the // failure. If it is properly deleted it will be set to nullptr. If the return // status is ok, the ownership of db is transferred to dbptr. static Status WrapStackableDB( StackableDB* db, const TransactionDBOptions& txn_db_options, const std::vector& compaction_enabled_cf_indices, const std::vector& handles, TransactionDB** dbptr); // Since the destructor in StackableDB is virtual, this destructor is virtual // too. The root db will be deleted by the base's destructor. ~TransactionDB() override {} // Starts a new Transaction. // // Caller is responsible for deleting the returned transaction when no // longer needed. // // If old_txn is not null, BeginTransaction will reuse this Transaction // handle instead of allocating a new one. This is an optimization to avoid // extra allocations when repeatedly creating transactions. virtual Transaction* BeginTransaction( const WriteOptions& write_options, const TransactionOptions& txn_options = TransactionOptions(), Transaction* old_txn = nullptr) = 0; virtual Transaction* GetTransactionByName(const TransactionName& name) = 0; virtual void GetAllPreparedTransactions(std::vector* trans) = 0; // Returns set of all locks held. // // The mapping is column family id -> KeyLockInfo virtual std::unordered_multimap GetLockStatusData() = 0; virtual std::vector GetDeadlockInfoBuffer() = 0; virtual void SetDeadlockInfoBufferSize(uint32_t target_size) = 0; protected: // To Create an TransactionDB, call Open() // The ownership of db is transferred to the base StackableDB explicit TransactionDB(DB* db) : StackableDB(db) {} // No copying allowed TransactionDB(const TransactionDB&) = delete; void operator=(const TransactionDB&) = delete; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/transaction_db_mutex.h000066400000000000000000000062561370372246700247300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { // TransactionDBMutex and TransactionDBCondVar APIs allows applications to // implement custom mutexes and condition variables to be used by a // TransactionDB when locking keys. // // To open a TransactionDB with a custom TransactionDBMutexFactory, set // TransactionDBOptions.custom_mutex_factory. class TransactionDBMutex { public: virtual ~TransactionDBMutex() {} // Attempt to acquire lock. Return OK on success, or other Status on failure. // If returned status is OK, TransactionDB will eventually call UnLock(). virtual Status Lock() = 0; // Attempt to acquire lock. If timeout is non-negative, operation may be // failed after this many microseconds. // Returns OK on success, // TimedOut if timed out, // or other Status on failure. // If returned status is OK, TransactionDB will eventually call UnLock(). virtual Status TryLockFor(int64_t timeout_time) = 0; // Unlock Mutex that was successfully locked by Lock() or TryLockUntil() virtual void UnLock() = 0; }; class TransactionDBCondVar { public: virtual ~TransactionDBCondVar() {} // Block current thread until condition variable is notified by a call to // Notify() or NotifyAll(). Wait() will be called with mutex locked. // Returns OK if notified. // Returns non-OK if TransactionDB should stop waiting and fail the operation. // May return OK spuriously even if not notified. virtual Status Wait(std::shared_ptr mutex) = 0; // Block current thread until condition variable is notified by a call to // Notify() or NotifyAll(), or if the timeout is reached. // Wait() will be called with mutex locked. // // If timeout is non-negative, operation should be failed after this many // microseconds. // If implementing a custom version of this class, the implementation may // choose to ignore the timeout. // // Returns OK if notified. // Returns TimedOut if timeout is reached. // Returns other status if TransactionDB should otherwis stop waiting and // fail the operation. // May return OK spuriously even if not notified. virtual Status WaitFor(std::shared_ptr mutex, int64_t timeout_time) = 0; // If any threads are waiting on *this, unblock at least one of the // waiting threads. virtual void Notify() = 0; // Unblocks all threads waiting on *this. virtual void NotifyAll() = 0; }; // Factory class that can allocate mutexes and condition variables. class TransactionDBMutexFactory { public: // Create a TransactionDBMutex object. virtual std::shared_ptr AllocateMutex() = 0; // Create a TransactionDBCondVar object. virtual std::shared_ptr AllocateCondVar() = 0; virtual ~TransactionDBMutexFactory() {} }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/utility_db.h000066400000000000000000000020521370372246700226520ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include #include #include "rocksdb/db.h" #include "rocksdb/utilities/db_ttl.h" #include "rocksdb/utilities/stackable_db.h" namespace ROCKSDB_NAMESPACE { // Please don't use this class. It's deprecated class UtilityDB { public: // This function is here only for backwards compatibility. Please use the // functions defined in DBWithTTl (rocksdb/utilities/db_ttl.h) // (deprecated) #if defined(__GNUC__) || defined(__clang__) __attribute__((deprecated)) #elif _WIN32 __declspec(deprecated) #endif static Status OpenTtlDB(const Options& options, const std::string& name, StackableDB** dbptr, int32_t ttl = 0, bool read_only = false); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/utilities/write_batch_with_index.h000066400000000000000000000250201370372246700252170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // A WriteBatchWithIndex with a binary searchable index built for all the keys // inserted. #pragma once #ifndef ROCKSDB_LITE #include #include #include #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "rocksdb/write_batch.h" #include "rocksdb/write_batch_base.h" namespace ROCKSDB_NAMESPACE { class ColumnFamilyHandle; class Comparator; class DB; class ReadCallback; struct ReadOptions; struct DBOptions; enum WriteType { kPutRecord, kMergeRecord, kDeleteRecord, kSingleDeleteRecord, kDeleteRangeRecord, kLogDataRecord, kXIDRecord, }; // an entry for Put, Merge, Delete, or SingleDelete entry for write batches. // Used in WBWIIterator. struct WriteEntry { WriteType type; Slice key; Slice value; }; // Iterator of one column family out of a WriteBatchWithIndex. class WBWIIterator { public: virtual ~WBWIIterator() {} virtual bool Valid() const = 0; virtual void SeekToFirst() = 0; virtual void SeekToLast() = 0; virtual void Seek(const Slice& key) = 0; virtual void SeekForPrev(const Slice& key) = 0; virtual void Next() = 0; virtual void Prev() = 0; // the return WriteEntry is only valid until the next mutation of // WriteBatchWithIndex virtual WriteEntry Entry() const = 0; virtual Status status() const = 0; }; // A WriteBatchWithIndex with a binary searchable index built for all the keys // inserted. // In Put(), Merge() Delete(), or SingleDelete(), the same function of the // wrapped will be called. At the same time, indexes will be built. // By calling GetWriteBatch(), a user will get the WriteBatch for the data // they inserted, which can be used for DB::Write(). // A user can call NewIterator() to create an iterator. class WriteBatchWithIndex : public WriteBatchBase { public: // backup_index_comparator: the backup comparator used to compare keys // within the same column family, if column family is not given in the // interface, or we can't find a column family from the column family handle // passed in, backup_index_comparator will be used for the column family. // reserved_bytes: reserved bytes in underlying WriteBatch // max_bytes: maximum size of underlying WriteBatch in bytes // overwrite_key: if true, overwrite the key in the index when inserting // the same key as previously, so iterator will never // show two entries with the same key. explicit WriteBatchWithIndex( const Comparator* backup_index_comparator = BytewiseComparator(), size_t reserved_bytes = 0, bool overwrite_key = false, size_t max_bytes = 0); ~WriteBatchWithIndex() override; WriteBatchWithIndex(WriteBatchWithIndex&&); WriteBatchWithIndex& operator=(WriteBatchWithIndex&&); using WriteBatchBase::Put; Status Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) override; Status Put(const Slice& key, const Slice& value) override; using WriteBatchBase::Merge; Status Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) override; Status Merge(const Slice& key, const Slice& value) override; using WriteBatchBase::Delete; Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override; Status Delete(const Slice& key) override; using WriteBatchBase::SingleDelete; Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key) override; Status SingleDelete(const Slice& key) override; using WriteBatchBase::DeleteRange; Status DeleteRange(ColumnFamilyHandle* /* column_family */, const Slice& /* begin_key */, const Slice& /* end_key */) override { return Status::NotSupported( "DeleteRange unsupported in WriteBatchWithIndex"); } Status DeleteRange(const Slice& /* begin_key */, const Slice& /* end_key */) override { return Status::NotSupported( "DeleteRange unsupported in WriteBatchWithIndex"); } using WriteBatchBase::PutLogData; Status PutLogData(const Slice& blob) override; using WriteBatchBase::Clear; void Clear() override; using WriteBatchBase::GetWriteBatch; WriteBatch* GetWriteBatch() override; // Create an iterator of a column family. User can call iterator.Seek() to // search to the next entry of or after a key. Keys will be iterated in the // order given by index_comparator. For multiple updates on the same key, // each update will be returned as a separate entry, in the order of update // time. // // The returned iterator should be deleted by the caller. WBWIIterator* NewIterator(ColumnFamilyHandle* column_family); // Create an iterator of the default column family. WBWIIterator* NewIterator(); // Will create a new Iterator that will use WBWIIterator as a delta and // base_iterator as base. // // This function is only supported if the WriteBatchWithIndex was // constructed with overwrite_key=true. // // The returned iterator should be deleted by the caller. // The base_iterator is now 'owned' by the returned iterator. Deleting the // returned iterator will also delete the base_iterator. // // Updating write batch with the current key of the iterator is not safe. // We strongly recommand users not to do it. It will invalidate the current // key() and value() of the iterator. This invalidation happens even before // the write batch update finishes. The state may recover after Next() is // called. Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family, Iterator* base_iterator, const ReadOptions* opts = nullptr); // default column family Iterator* NewIteratorWithBase(Iterator* base_iterator); // Similar to DB::Get() but will only read the key from this batch. // If the batch does not have enough data to resolve Merge operations, // MergeInProgress status may be returned. Status GetFromBatch(ColumnFamilyHandle* column_family, const DBOptions& options, const Slice& key, std::string* value); // Similar to previous function but does not require a column_family. // Note: An InvalidArgument status will be returned if there are any Merge // operators for this key. Use previous method instead. Status GetFromBatch(const DBOptions& options, const Slice& key, std::string* value) { return GetFromBatch(nullptr, options, key, value); } // Similar to DB::Get() but will also read writes from this batch. // // This function will query both this batch and the DB and then merge // the results using the DB's merge operator (if the batch contains any // merge requests). // // Setting read_options.snapshot will affect what is read from the DB // but will NOT change which keys are read from the batch (the keys in // this batch do not yet belong to any snapshot and will be fetched // regardless). Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, const Slice& key, std::string* value); // An overload of the above method that receives a PinnableSlice Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, const Slice& key, PinnableSlice* value); Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value); // An overload of the above method that receives a PinnableSlice Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value); void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, bool sorted_input); // Records the state of the batch for future calls to RollbackToSavePoint(). // May be called multiple times to set multiple save points. void SetSavePoint() override; // Remove all entries in this batch (Put, Merge, Delete, SingleDelete, // PutLogData) since the most recent call to SetSavePoint() and removes the // most recent save point. // If there is no previous call to SetSavePoint(), behaves the same as // Clear(). // // Calling RollbackToSavePoint invalidates any open iterators on this batch. // // Returns Status::OK() on success, // Status::NotFound() if no previous call to SetSavePoint(), // or other Status on corruption. Status RollbackToSavePoint() override; // Pop the most recent save point. // If there is no previous call to SetSavePoint(), Status::NotFound() // will be returned. // Otherwise returns Status::OK(). Status PopSavePoint() override; void SetMaxBytes(size_t max_bytes) override; size_t GetDataSize() const; private: friend class PessimisticTransactionDB; friend class WritePreparedTxn; friend class WriteUnpreparedTxn; friend class WriteBatchWithIndex_SubBatchCnt_Test; // Returns the number of sub-batches inside the write batch. A sub-batch // starts right before inserting a key that is a duplicate of a key in the // last sub-batch. size_t SubBatchCnt(); Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, ReadCallback* callback); void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, bool sorted_input, ReadCallback* callback); struct Rep; std::unique_ptr rep; }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/include/rocksdb/version.h000066400000000000000000000012021370372246700201500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #define ROCKSDB_MAJOR 6 #define ROCKSDB_MINOR 11 #define ROCKSDB_PATCH 4 // Do not use these. We made the mistake of declaring macros starting with // double underscore. Now we have to live with our choice. We'll deprecate these // at some point #define __ROCKSDB_MAJOR__ ROCKSDB_MAJOR #define __ROCKSDB_MINOR__ ROCKSDB_MINOR #define __ROCKSDB_PATCH__ ROCKSDB_PATCH rocksdb-6.11.4/include/rocksdb/wal_filter.h000066400000000000000000000105471370372246700206270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class WriteBatch; // WALFilter allows an application to inspect write-ahead-log (WAL) // records or modify their processing on recovery. // Please see the details below. class WalFilter { public: enum class WalProcessingOption { // Continue processing as usual kContinueProcessing = 0, // Ignore the current record but continue processing of log(s) kIgnoreCurrentRecord = 1, // Stop replay of logs and discard logs // Logs won't be replayed on subsequent recovery kStopReplay = 2, // Corrupted record detected by filter kCorruptedRecord = 3, // Marker for enum count kWalProcessingOptionMax = 4 }; virtual ~WalFilter() {} // Provide ColumnFamily->LogNumber map to filter // so that filter can determine whether a log number applies to a given // column family (i.e. that log hasn't been flushed to SST already for the // column family). // We also pass in name->id map as only name is known during // recovery (as handles are opened post-recovery). // while write batch callbacks happen in terms of column family id. // // @params cf_lognumber_map column_family_id to lognumber map // @params cf_name_id_map column_family_name to column_family_id map virtual void ColumnFamilyLogNumberMap( const std::map& /*cf_lognumber_map*/, const std::map& /*cf_name_id_map*/) {} // LogRecord is invoked for each log record encountered for all the logs // during replay on logs on recovery. This method can be used to: // * inspect the record (using the batch parameter) // * ignoring current record // (by returning WalProcessingOption::kIgnoreCurrentRecord) // * reporting corrupted record // (by returning WalProcessingOption::kCorruptedRecord) // * stop log replay // (by returning kStop replay) - please note that this implies // discarding the logs from current record onwards. // // @params log_number log_number of the current log. // Filter might use this to determine if the log // record is applicable to a certain column family. // @params log_file_name log file name - only for informational purposes // @params batch batch encountered in the log during recovery // @params new_batch new_batch to populate if filter wants to change // the batch (for example to filter some records out, // or alter some records). // Please note that the new batch MUST NOT contain // more records than original, else recovery would // be failed. // @params batch_changed Whether batch was changed by the filter. // It must be set to true if new_batch was populated, // else new_batch has no effect. // @returns Processing option for the current record. // Please see WalProcessingOption enum above for // details. virtual WalProcessingOption LogRecordFound( unsigned long long /*log_number*/, const std::string& /*log_file_name*/, const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed) { // Default implementation falls back to older function for compatibility return LogRecord(batch, new_batch, batch_changed); } // Please see the comments for LogRecord above. This function is for // compatibility only and contains a subset of parameters. // New code should use the function above. virtual WalProcessingOption LogRecord(const WriteBatch& /*batch*/, WriteBatch* /*new_batch*/, bool* /*batch_changed*/) const { return WalProcessingOption::kContinueProcessing; } // Returns a name that identifies this WAL filter. // The name will be printed to LOG file on start up for diagnosis. virtual const char* Name() const = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/write_batch.h000066400000000000000000000330771370372246700207750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // WriteBatch holds a collection of updates to apply atomically to a DB. // // The updates are applied in the order in which they are added // to the WriteBatch. For example, the value of "key" will be "v3" // after the following batch is written: // // batch.Put("key", "v1"); // batch.Delete("key"); // batch.Put("key", "v2"); // batch.Put("key", "v3"); // // Multiple threads can invoke const methods on a WriteBatch without // external synchronization, but if any of the threads may call a // non-const method, all threads accessing the same WriteBatch must use // external synchronization. #pragma once #include #include #include #include #include #include "rocksdb/status.h" #include "rocksdb/write_batch_base.h" namespace ROCKSDB_NAMESPACE { class Slice; class ColumnFamilyHandle; struct SavePoints; struct SliceParts; struct SavePoint { size_t size; // size of rep_ int count; // count of elements in rep_ uint32_t content_flags; SavePoint() : size(0), count(0), content_flags(0) {} SavePoint(size_t _size, int _count, uint32_t _flags) : size(_size), count(_count), content_flags(_flags) {} void clear() { size = 0; count = 0; content_flags = 0; } bool is_cleared() const { return (size | count | content_flags) == 0; } }; class WriteBatch : public WriteBatchBase { public: explicit WriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0); explicit WriteBatch(size_t reserved_bytes, size_t max_bytes, size_t ts_sz); ~WriteBatch() override; using WriteBatchBase::Put; // Store the mapping "key->value" in the database. Status Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) override; Status Put(const Slice& key, const Slice& value) override { return Put(nullptr, key, value); } // Variant of Put() that gathers output like writev(2). The key and value // that will be written to the database are concatenations of arrays of // slices. Status Put(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) override; Status Put(const SliceParts& key, const SliceParts& value) override { return Put(nullptr, key, value); } using WriteBatchBase::Delete; // If the database contains a mapping for "key", erase it. Else do nothing. Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override; Status Delete(const Slice& key) override { return Delete(nullptr, key); } // variant that takes SliceParts Status Delete(ColumnFamilyHandle* column_family, const SliceParts& key) override; Status Delete(const SliceParts& key) override { return Delete(nullptr, key); } using WriteBatchBase::SingleDelete; // WriteBatch implementation of DB::SingleDelete(). See db.h. Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key) override; Status SingleDelete(const Slice& key) override { return SingleDelete(nullptr, key); } // variant that takes SliceParts Status SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key) override; Status SingleDelete(const SliceParts& key) override { return SingleDelete(nullptr, key); } using WriteBatchBase::DeleteRange; // WriteBatch implementation of DB::DeleteRange(). See db.h. Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key) override; Status DeleteRange(const Slice& begin_key, const Slice& end_key) override { return DeleteRange(nullptr, begin_key, end_key); } // variant that takes SliceParts Status DeleteRange(ColumnFamilyHandle* column_family, const SliceParts& begin_key, const SliceParts& end_key) override; Status DeleteRange(const SliceParts& begin_key, const SliceParts& end_key) override { return DeleteRange(nullptr, begin_key, end_key); } using WriteBatchBase::Merge; // Merge "value" with the existing value of "key" in the database. // "key->merge(existing, value)" Status Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) override; Status Merge(const Slice& key, const Slice& value) override { return Merge(nullptr, key, value); } // variant that takes SliceParts Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value) override; Status Merge(const SliceParts& key, const SliceParts& value) override { return Merge(nullptr, key, value); } using WriteBatchBase::PutLogData; // Append a blob of arbitrary size to the records in this batch. The blob will // be stored in the transaction log but not in any other file. In particular, // it will not be persisted to the SST files. When iterating over this // WriteBatch, WriteBatch::Handler::LogData will be called with the contents // of the blob as it is encountered. Blobs, puts, deletes, and merges will be // encountered in the same order in which they were inserted. The blob will // NOT consume sequence number(s) and will NOT increase the count of the batch // // Example application: add timestamps to the transaction log for use in // replication. Status PutLogData(const Slice& blob) override; using WriteBatchBase::Clear; // Clear all updates buffered in this batch. void Clear() override; // Records the state of the batch for future calls to RollbackToSavePoint(). // May be called multiple times to set multiple save points. void SetSavePoint() override; // Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the // most recent call to SetSavePoint() and removes the most recent save point. // If there is no previous call to SetSavePoint(), Status::NotFound() // will be returned. // Otherwise returns Status::OK(). Status RollbackToSavePoint() override; // Pop the most recent save point. // If there is no previous call to SetSavePoint(), Status::NotFound() // will be returned. // Otherwise returns Status::OK(). Status PopSavePoint() override; // Support for iterating over the contents of a batch. class Handler { public: virtual ~Handler(); // All handler functions in this class provide default implementations so // we won't break existing clients of Handler on a source code level when // adding a new member function. // default implementation will just call Put without column family for // backwards compatibility. If the column family is not default, // the function is noop virtual Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& value) { if (column_family_id == 0) { // Put() historically doesn't return status. We didn't want to be // backwards incompatible so we didn't change the return status // (this is a public API). We do an ordinary get and return Status::OK() Put(key, value); return Status::OK(); } return Status::InvalidArgument( "non-default column family and PutCF not implemented"); } virtual void Put(const Slice& /*key*/, const Slice& /*value*/) {} virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) { if (column_family_id == 0) { Delete(key); return Status::OK(); } return Status::InvalidArgument( "non-default column family and DeleteCF not implemented"); } virtual void Delete(const Slice& /*key*/) {} virtual Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) { if (column_family_id == 0) { SingleDelete(key); return Status::OK(); } return Status::InvalidArgument( "non-default column family and SingleDeleteCF not implemented"); } virtual void SingleDelete(const Slice& /*key*/) {} virtual Status DeleteRangeCF(uint32_t /*column_family_id*/, const Slice& /*begin_key*/, const Slice& /*end_key*/) { return Status::InvalidArgument("DeleteRangeCF not implemented"); } virtual Status MergeCF(uint32_t column_family_id, const Slice& key, const Slice& value) { if (column_family_id == 0) { Merge(key, value); return Status::OK(); } return Status::InvalidArgument( "non-default column family and MergeCF not implemented"); } virtual void Merge(const Slice& /*key*/, const Slice& /*value*/) {} virtual Status PutBlobIndexCF(uint32_t /*column_family_id*/, const Slice& /*key*/, const Slice& /*value*/) { return Status::InvalidArgument("PutBlobIndexCF not implemented"); } // The default implementation of LogData does nothing. virtual void LogData(const Slice& blob); virtual Status MarkBeginPrepare(bool = false) { return Status::InvalidArgument("MarkBeginPrepare() handler not defined."); } virtual Status MarkEndPrepare(const Slice& /*xid*/) { return Status::InvalidArgument("MarkEndPrepare() handler not defined."); } virtual Status MarkNoop(bool /*empty_batch*/) { return Status::InvalidArgument("MarkNoop() handler not defined."); } virtual Status MarkRollback(const Slice& /*xid*/) { return Status::InvalidArgument( "MarkRollbackPrepare() handler not defined."); } virtual Status MarkCommit(const Slice& /*xid*/) { return Status::InvalidArgument("MarkCommit() handler not defined."); } // Continue is called by WriteBatch::Iterate. If it returns false, // iteration is halted. Otherwise, it continues iterating. The default // implementation always returns true. virtual bool Continue(); protected: friend class WriteBatchInternal; virtual bool WriteAfterCommit() const { return true; } virtual bool WriteBeforePrepare() const { return false; } }; Status Iterate(Handler* handler) const; // Retrieve the serialized version of this batch. const std::string& Data() const { return rep_; } // Retrieve data size of the batch. size_t GetDataSize() const { return rep_.size(); } // Returns the number of updates in the batch uint32_t Count() const; // Returns true if PutCF will be called during Iterate bool HasPut() const; // Returns true if DeleteCF will be called during Iterate bool HasDelete() const; // Returns true if SingleDeleteCF will be called during Iterate bool HasSingleDelete() const; // Returns true if DeleteRangeCF will be called during Iterate bool HasDeleteRange() const; // Returns true if MergeCF will be called during Iterate bool HasMerge() const; // Returns true if MarkBeginPrepare will be called during Iterate bool HasBeginPrepare() const; // Returns true if MarkEndPrepare will be called during Iterate bool HasEndPrepare() const; // Returns trie if MarkCommit will be called during Iterate bool HasCommit() const; // Returns trie if MarkRollback will be called during Iterate bool HasRollback() const; // Assign timestamp to write batch Status AssignTimestamp(const Slice& ts); // Assign timestamps to write batch Status AssignTimestamps(const std::vector& ts_list); using WriteBatchBase::GetWriteBatch; WriteBatch* GetWriteBatch() override { return this; } // Constructor with a serialized string object explicit WriteBatch(const std::string& rep); explicit WriteBatch(std::string&& rep); WriteBatch(const WriteBatch& src); WriteBatch(WriteBatch&& src) noexcept; WriteBatch& operator=(const WriteBatch& src); WriteBatch& operator=(WriteBatch&& src); // marks this point in the WriteBatch as the last record to // be inserted into the WAL, provided the WAL is enabled void MarkWalTerminationPoint(); const SavePoint& GetWalTerminationPoint() const { return wal_term_point_; } void SetMaxBytes(size_t max_bytes) override { max_bytes_ = max_bytes; } private: friend class WriteBatchInternal; friend class LocalSavePoint; // TODO(myabandeh): this is needed for a hack to collapse the write batch and // remove duplicate keys. Remove it when the hack is replaced with a proper // solution. friend class WriteBatchWithIndex; std::unique_ptr save_points_; // When sending a WriteBatch through WriteImpl we might want to // specify that only the first x records of the batch be written to // the WAL. SavePoint wal_term_point_; // For HasXYZ. Mutable to allow lazy computation of results mutable std::atomic content_flags_; // Performs deferred computation of content_flags if necessary uint32_t ComputeContentFlags() const; // Maximum size of rep_. size_t max_bytes_; // Is the content of the batch the application's latest state that meant only // to be used for recovery? Refer to // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery for // more details. bool is_latest_persistent_state_ = false; protected: std::string rep_; // See comment in write_batch.cc for the format of rep_ const size_t timestamp_size_; // Intentionally copyable }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/write_batch_base.h000066400000000000000000000125351370372246700217630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class Slice; class Status; class ColumnFamilyHandle; class WriteBatch; struct SliceParts; // Abstract base class that defines the basic interface for a write batch. // See WriteBatch for a basic implementation and WrithBatchWithIndex for an // indexed implementation. class WriteBatchBase { public: virtual ~WriteBatchBase() {} // Store the mapping "key->value" in the database. virtual Status Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) = 0; virtual Status Put(const Slice& key, const Slice& value) = 0; // Variant of Put() that gathers output like writev(2). The key and value // that will be written to the database are concatenations of arrays of // slices. virtual Status Put(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value); virtual Status Put(const SliceParts& key, const SliceParts& value); // Merge "value" with the existing value of "key" in the database. // "key->merge(existing, value)" virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value) = 0; virtual Status Merge(const Slice& key, const Slice& value) = 0; // variant that takes SliceParts virtual Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value); virtual Status Merge(const SliceParts& key, const SliceParts& value); // If the database contains a mapping for "key", erase it. Else do nothing. virtual Status Delete(ColumnFamilyHandle* column_family, const Slice& key) = 0; virtual Status Delete(const Slice& key) = 0; // variant that takes SliceParts virtual Status Delete(ColumnFamilyHandle* column_family, const SliceParts& key); virtual Status Delete(const SliceParts& key); // If the database contains a mapping for "key", erase it. Expects that the // key was not overwritten. Else do nothing. virtual Status SingleDelete(ColumnFamilyHandle* column_family, const Slice& key) = 0; virtual Status SingleDelete(const Slice& key) = 0; // variant that takes SliceParts virtual Status SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key); virtual Status SingleDelete(const SliceParts& key); // If the database contains mappings in the range ["begin_key", "end_key"), // erase them. Else do nothing. virtual Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key) = 0; virtual Status DeleteRange(const Slice& begin_key, const Slice& end_key) = 0; // variant that takes SliceParts virtual Status DeleteRange(ColumnFamilyHandle* column_family, const SliceParts& begin_key, const SliceParts& end_key); virtual Status DeleteRange(const SliceParts& begin_key, const SliceParts& end_key); // Append a blob of arbitrary size to the records in this batch. The blob will // be stored in the transaction log but not in any other file. In particular, // it will not be persisted to the SST files. When iterating over this // WriteBatch, WriteBatch::Handler::LogData will be called with the contents // of the blob as it is encountered. Blobs, puts, deletes, and merges will be // encountered in the same order in which they were inserted. The blob will // NOT consume sequence number(s) and will NOT increase the count of the batch // // Example application: add timestamps to the transaction log for use in // replication. virtual Status PutLogData(const Slice& blob) = 0; // Clear all updates buffered in this batch. virtual void Clear() = 0; // Covert this batch into a WriteBatch. This is an abstracted way of // converting any WriteBatchBase(eg WriteBatchWithIndex) into a basic // WriteBatch. virtual WriteBatch* GetWriteBatch() = 0; // Records the state of the batch for future calls to RollbackToSavePoint(). // May be called multiple times to set multiple save points. virtual void SetSavePoint() = 0; // Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the // most recent call to SetSavePoint() and removes the most recent save point. // If there is no previous call to SetSavePoint(), behaves the same as // Clear(). virtual Status RollbackToSavePoint() = 0; // Pop the most recent save point. // If there is no previous call to SetSavePoint(), Status::NotFound() // will be returned. // Otherwise returns Status::OK(). virtual Status PopSavePoint() = 0; // Sets the maximum size of the write batch in bytes. 0 means no limit. virtual void SetMaxBytes(size_t max_bytes) = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/include/rocksdb/write_buffer_manager.h000066400000000000000000000064661370372246700226610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // WriteBufferManager is for managing memory allocation for one or more // MemTables. #pragma once #include #include #include "rocksdb/cache.h" namespace ROCKSDB_NAMESPACE { class WriteBufferManager { public: // _buffer_size = 0 indicates no limit. Memory won't be capped. // memory_usage() won't be valid and ShouldFlush() will always return true. // if `cache` is provided, we'll put dummy entries in the cache and cost // the memory allocated to the cache. It can be used even if _buffer_size = 0. explicit WriteBufferManager(size_t _buffer_size, std::shared_ptr cache = {}); // No copying allowed WriteBufferManager(const WriteBufferManager&) = delete; WriteBufferManager& operator=(const WriteBufferManager&) = delete; ~WriteBufferManager(); bool enabled() const { return buffer_size_ != 0; } bool cost_to_cache() const { return cache_rep_ != nullptr; } // Only valid if enabled() size_t memory_usage() const { return memory_used_.load(std::memory_order_relaxed); } size_t mutable_memtable_memory_usage() const { return memory_active_.load(std::memory_order_relaxed); } size_t buffer_size() const { return buffer_size_; } // Should only be called from write thread bool ShouldFlush() const { if (enabled()) { if (mutable_memtable_memory_usage() > mutable_limit_) { return true; } if (memory_usage() >= buffer_size_ && mutable_memtable_memory_usage() >= buffer_size_ / 2) { // If the memory exceeds the buffer size, we trigger more aggressive // flush. But if already more than half memory is being flushed, // triggering more flush may not help. We will hold it instead. return true; } } return false; } void ReserveMem(size_t mem) { if (cache_rep_ != nullptr) { ReserveMemWithCache(mem); } else if (enabled()) { memory_used_.fetch_add(mem, std::memory_order_relaxed); } if (enabled()) { memory_active_.fetch_add(mem, std::memory_order_relaxed); } } // We are in the process of freeing `mem` bytes, so it is not considered // when checking the soft limit. void ScheduleFreeMem(size_t mem) { if (enabled()) { memory_active_.fetch_sub(mem, std::memory_order_relaxed); } } void FreeMem(size_t mem) { if (cache_rep_ != nullptr) { FreeMemWithCache(mem); } else if (enabled()) { memory_used_.fetch_sub(mem, std::memory_order_relaxed); } } private: const size_t buffer_size_; const size_t mutable_limit_; std::atomic memory_used_; // Memory that hasn't been scheduled to free. std::atomic memory_active_; struct CacheRep; std::unique_ptr cache_rep_; void ReserveMemWithCache(size_t mem); void FreeMemWithCache(size_t mem); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/issue_template.md000066400000000000000000000004461370372246700166160ustar00rootroot00000000000000> Note: Please use Issues only for bug reports. For questions, discussions, feature requests, etc. post to dev group: https://groups.google.com/forum/#!forum/rocksdb or https://www.facebook.com/groups/rocksdb.dev ### Expected behavior ### Actual behavior ### Steps to reproduce the behavior rocksdb-6.11.4/java/000077500000000000000000000000001370372246700141665ustar00rootroot00000000000000rocksdb-6.11.4/java/CMakeLists.txt000066400000000000000000000515221370372246700167330ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.4) if(${CMAKE_VERSION} VERSION_LESS "3.11.4") message("Please consider switching to CMake 3.11.4 or newer") endif() set(CMAKE_JAVA_COMPILE_FLAGS -source 7) set(JNI_NATIVE_SOURCES rocksjni/backupablejni.cc rocksjni/backupenginejni.cc rocksjni/cassandra_compactionfilterjni.cc rocksjni/cassandra_value_operator.cc rocksjni/checkpoint.cc rocksjni/clock_cache.cc rocksjni/columnfamilyhandle.cc rocksjni/compaction_filter.cc rocksjni/compaction_filter_factory.cc rocksjni/compaction_filter_factory_jnicallback.cc rocksjni/compaction_job_info.cc rocksjni/compaction_job_stats.cc rocksjni/compaction_options.cc rocksjni/compaction_options_fifo.cc rocksjni/compaction_options_universal.cc rocksjni/compact_range_options.cc rocksjni/comparator.cc rocksjni/comparatorjnicallback.cc rocksjni/compression_options.cc rocksjni/config_options.cc rocksjni/env.cc rocksjni/env_options.cc rocksjni/filter.cc rocksjni/ingest_external_file_options.cc rocksjni/iterator.cc rocksjni/jnicallback.cc rocksjni/loggerjnicallback.cc rocksjni/lru_cache.cc rocksjni/memory_util.cc rocksjni/memtablejni.cc rocksjni/merge_operator.cc rocksjni/native_comparator_wrapper_test.cc rocksjni/optimistic_transaction_db.cc rocksjni/optimistic_transaction_options.cc rocksjni/options.cc rocksjni/options_util.cc rocksjni/persistent_cache.cc rocksjni/ratelimiterjni.cc rocksjni/remove_emptyvalue_compactionfilterjni.cc rocksjni/restorejni.cc rocksjni/rocks_callback_object.cc rocksjni/rocksdb_exception_test.cc rocksjni/rocksjni.cc rocksjni/slice.cc rocksjni/snapshot.cc rocksjni/sst_file_manager.cc rocksjni/sst_file_writerjni.cc rocksjni/sst_file_readerjni.cc rocksjni/sst_file_reader_iterator.cc rocksjni/statistics.cc rocksjni/statisticsjni.cc rocksjni/table.cc rocksjni/table_filter.cc rocksjni/table_filter_jnicallback.cc rocksjni/thread_status.cc rocksjni/trace_writer.cc rocksjni/trace_writer_jnicallback.cc rocksjni/transaction.cc rocksjni/transaction_db.cc rocksjni/transaction_db_options.cc rocksjni/transaction_log.cc rocksjni/transaction_notifier.cc rocksjni/transaction_notifier_jnicallback.cc rocksjni/transaction_options.cc rocksjni/ttl.cc rocksjni/wal_filter.cc rocksjni/wal_filter_jnicallback.cc rocksjni/write_batch.cc rocksjni/writebatchhandlerjnicallback.cc rocksjni/write_batch_test.cc rocksjni/write_batch_with_index.cc rocksjni/write_buffer_manager.cc ) set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/AbstractCompactionFilter.java src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java src/main/java/org/rocksdb/AbstractComparator.java src/main/java/org/rocksdb/AbstractImmutableNativeReference.java src/main/java/org/rocksdb/AbstractMutableOptions.java src/main/java/org/rocksdb/AbstractNativeReference.java src/main/java/org/rocksdb/AbstractRocksIterator.java src/main/java/org/rocksdb/AbstractSlice.java src/main/java/org/rocksdb/AbstractTableFilter.java src/main/java/org/rocksdb/AbstractTraceWriter.java src/main/java/org/rocksdb/AbstractTransactionNotifier.java src/main/java/org/rocksdb/AbstractWalFilter.java src/main/java/org/rocksdb/AbstractWriteBatch.java src/main/java/org/rocksdb/AccessHint.java src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java src/main/java/org/rocksdb/BackupableDBOptions.java src/main/java/org/rocksdb/BackupEngine.java src/main/java/org/rocksdb/BackupInfo.java src/main/java/org/rocksdb/BlockBasedTableConfig.java src/main/java/org/rocksdb/BloomFilter.java src/main/java/org/rocksdb/BuiltinComparator.java src/main/java/org/rocksdb/Cache.java src/main/java/org/rocksdb/CassandraCompactionFilter.java src/main/java/org/rocksdb/CassandraValueMergeOperator.java src/main/java/org/rocksdb/Checkpoint.java src/main/java/org/rocksdb/ChecksumType.java src/main/java/org/rocksdb/ClockCache.java src/main/java/org/rocksdb/ColumnFamilyDescriptor.java src/main/java/org/rocksdb/ColumnFamilyHandle.java src/main/java/org/rocksdb/ColumnFamilyMetaData.java src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java src/main/java/org/rocksdb/ColumnFamilyOptions.java src/main/java/org/rocksdb/CompactionJobInfo.java src/main/java/org/rocksdb/CompactionJobStats.java src/main/java/org/rocksdb/CompactionOptions.java src/main/java/org/rocksdb/CompactionOptionsFIFO.java src/main/java/org/rocksdb/CompactionOptionsUniversal.java src/main/java/org/rocksdb/CompactionPriority.java src/main/java/org/rocksdb/CompactionReason.java src/main/java/org/rocksdb/CompactRangeOptions.java src/main/java/org/rocksdb/CompactionStopStyle.java src/main/java/org/rocksdb/CompactionStyle.java src/main/java/org/rocksdb/ComparatorOptions.java src/main/java/org/rocksdb/ComparatorType.java src/main/java/org/rocksdb/CompressionOptions.java src/main/java/org/rocksdb/CompressionType.java src/main/java/org/rocksdb/ConfigOptions.java src/main/java/org/rocksdb/DataBlockIndexType.java src/main/java/org/rocksdb/DBOptionsInterface.java src/main/java/org/rocksdb/DBOptions.java src/main/java/org/rocksdb/DbPath.java src/main/java/org/rocksdb/DirectSlice.java src/main/java/org/rocksdb/EncodingType.java src/main/java/org/rocksdb/Env.java src/main/java/org/rocksdb/EnvOptions.java src/main/java/org/rocksdb/Experimental.java src/main/java/org/rocksdb/Filter.java src/main/java/org/rocksdb/FlushOptions.java src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java src/main/java/org/rocksdb/HashSkipListMemTableConfig.java src/main/java/org/rocksdb/HdfsEnv.java src/main/java/org/rocksdb/HistogramData.java src/main/java/org/rocksdb/HistogramType.java src/main/java/org/rocksdb/Holder.java src/main/java/org/rocksdb/IndexType.java src/main/java/org/rocksdb/InfoLogLevel.java src/main/java/org/rocksdb/IngestExternalFileOptions.java src/main/java/org/rocksdb/LevelMetaData.java src/main/java/org/rocksdb/LiveFileMetaData.java src/main/java/org/rocksdb/LogFile.java src/main/java/org/rocksdb/Logger.java src/main/java/org/rocksdb/LRUCache.java src/main/java/org/rocksdb/MemoryUsageType.java src/main/java/org/rocksdb/MemoryUtil.java src/main/java/org/rocksdb/MemTableConfig.java src/main/java/org/rocksdb/MergeOperator.java src/main/java/org/rocksdb/MutableColumnFamilyOptions.java src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java src/main/java/org/rocksdb/MutableDBOptions.java src/main/java/org/rocksdb/MutableDBOptionsInterface.java src/main/java/org/rocksdb/MutableOptionKey.java src/main/java/org/rocksdb/MutableOptionValue.java src/main/java/org/rocksdb/NativeComparatorWrapper.java src/main/java/org/rocksdb/NativeLibraryLoader.java src/main/java/org/rocksdb/OperationStage.java src/main/java/org/rocksdb/OperationType.java src/main/java/org/rocksdb/OptimisticTransactionDB.java src/main/java/org/rocksdb/OptimisticTransactionOptions.java src/main/java/org/rocksdb/Options.java src/main/java/org/rocksdb/OptionsUtil.java src/main/java/org/rocksdb/PersistentCache.java src/main/java/org/rocksdb/PlainTableConfig.java src/main/java/org/rocksdb/Priority.java src/main/java/org/rocksdb/Range.java src/main/java/org/rocksdb/RateLimiter.java src/main/java/org/rocksdb/RateLimiterMode.java src/main/java/org/rocksdb/ReadOptions.java src/main/java/org/rocksdb/ReadTier.java src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java src/main/java/org/rocksdb/RestoreOptions.java src/main/java/org/rocksdb/ReusedSynchronisationType.java src/main/java/org/rocksdb/RocksCallbackObject.java src/main/java/org/rocksdb/RocksDBException.java src/main/java/org/rocksdb/RocksDB.java src/main/java/org/rocksdb/RocksEnv.java src/main/java/org/rocksdb/RocksIteratorInterface.java src/main/java/org/rocksdb/RocksIterator.java src/main/java/org/rocksdb/RocksMemEnv.java src/main/java/org/rocksdb/RocksMutableObject.java src/main/java/org/rocksdb/RocksObject.java src/main/java/org/rocksdb/SanityLevel.java src/main/java/org/rocksdb/SizeApproximationFlag.java src/main/java/org/rocksdb/SkipListMemTableConfig.java src/main/java/org/rocksdb/Slice.java src/main/java/org/rocksdb/Snapshot.java src/main/java/org/rocksdb/SstFileManager.java src/main/java/org/rocksdb/SstFileMetaData.java src/main/java/org/rocksdb/SstFileWriter.java src/main/java/org/rocksdb/SstFileReader.java src/main/java/org/rocksdb/SstFileReaderIterator.java src/main/java/org/rocksdb/StateType.java src/main/java/org/rocksdb/StatisticsCollectorCallback.java src/main/java/org/rocksdb/StatisticsCollector.java src/main/java/org/rocksdb/Statistics.java src/main/java/org/rocksdb/StatsCollectorInput.java src/main/java/org/rocksdb/StatsLevel.java src/main/java/org/rocksdb/Status.java src/main/java/org/rocksdb/StringAppendOperator.java src/main/java/org/rocksdb/TableFilter.java src/main/java/org/rocksdb/TableProperties.java src/main/java/org/rocksdb/TableFormatConfig.java src/main/java/org/rocksdb/ThreadType.java src/main/java/org/rocksdb/ThreadStatus.java src/main/java/org/rocksdb/TickerType.java src/main/java/org/rocksdb/TimedEnv.java src/main/java/org/rocksdb/TraceOptions.java src/main/java/org/rocksdb/TraceWriter.java src/main/java/org/rocksdb/TransactionalDB.java src/main/java/org/rocksdb/TransactionalOptions.java src/main/java/org/rocksdb/TransactionDB.java src/main/java/org/rocksdb/TransactionDBOptions.java src/main/java/org/rocksdb/Transaction.java src/main/java/org/rocksdb/TransactionLogIterator.java src/main/java/org/rocksdb/TransactionOptions.java src/main/java/org/rocksdb/TtlDB.java src/main/java/org/rocksdb/TxnDBWritePolicy.java src/main/java/org/rocksdb/VectorMemTableConfig.java src/main/java/org/rocksdb/WalFileType.java src/main/java/org/rocksdb/WalFilter.java src/main/java/org/rocksdb/WalProcessingOption.java src/main/java/org/rocksdb/WALRecoveryMode.java src/main/java/org/rocksdb/WBWIRocksIterator.java src/main/java/org/rocksdb/WriteBatch.java src/main/java/org/rocksdb/WriteBatchInterface.java src/main/java/org/rocksdb/WriteBatchWithIndex.java src/main/java/org/rocksdb/WriteOptions.java src/main/java/org/rocksdb/WriteBufferManager.java src/main/java/org/rocksdb/util/ByteUtil.java src/main/java/org/rocksdb/util/BytewiseComparator.java src/main/java/org/rocksdb/util/Environment.java src/main/java/org/rocksdb/util/IntComparator.java src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java src/main/java/org/rocksdb/util/SizeUnit.java src/main/java/org/rocksdb/UInt64AddOperator.java ) set(JAVA_TEST_CLASSES src/test/java/org/rocksdb/BackupEngineTest.java src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java src/test/java/org/rocksdb/NativeComparatorWrapperTest.java src/test/java/org/rocksdb/PlatformRandomHelper.java src/test/java/org/rocksdb/RocksDBExceptionTest.java src/test/java/org/rocksdb/RocksNativeLibraryResource.java src/test/java/org/rocksdb/SnapshotTest.java src/test/java/org/rocksdb/WriteBatchTest.java src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java src/test/java/org/rocksdb/util/WriteBatchGetter.java ) include(FindJava) include(UseJava) find_package(JNI) include_directories(${JNI_INCLUDE_DIRS}) include_directories(${PROJECT_SOURCE_DIR}/java) set(JAVA_TEST_LIBDIR ${PROJECT_SOURCE_DIR}/java/test-libs) set(JAVA_TMP_JAR ${JAVA_TEST_LIBDIR}/tmp.jar) set(JAVA_JUNIT_JAR ${JAVA_TEST_LIBDIR}/junit-4.12.jar) set(JAVA_HAMCR_JAR ${JAVA_TEST_LIBDIR}/hamcrest-core-1.3.jar) set(JAVA_MOCKITO_JAR ${JAVA_TEST_LIBDIR}/mockito-all-1.10.19.jar) set(JAVA_CGLIB_JAR ${JAVA_TEST_LIBDIR}/cglib-2.2.2.jar) set(JAVA_ASSERTJ_JAR ${JAVA_TEST_LIBDIR}/assertj-core-1.7.1.jar) set(JAVA_TESTCLASSPATH ${JAVA_JUNIT_JAR} ${JAVA_HAMCR_JAR} ${JAVA_MOCKITO_JAR} ${JAVA_CGLIB_JAR} ${JAVA_ASSERTJ_JAR}) set(JNI_OUTPUT_DIR ${PROJECT_SOURCE_DIR}/java/include) file(MAKE_DIRECTORY ${JNI_OUTPUT_DIR}) if(${Java_VERSION_MAJOR} VERSION_GREATER_EQUAL "10" AND ${CMAKE_VERSION} VERSION_LESS "3.11.4") # Java 10 and newer don't have javah, but the alternative GENERATE_NATIVE_HEADERS requires CMake 3.11.4 or newer message(FATAL_ERROR "Detected Java 10 or newer (${Java_VERSION_STRING}), to build with CMake please upgrade CMake to 3.11.4 or newer") elseif(${CMAKE_VERSION} VERSION_LESS "3.11.4" OR (${Java_VERSION_MINOR} STREQUAL "7" AND ${Java_VERSION_MAJOR} STREQUAL "1")) # Old CMake or Java 1.7 prepare the JAR... message("Preparing Jar for Java 7") add_jar( rocksdbjni_classes SOURCES ${JAVA_MAIN_CLASSES} ${JAVA_TEST_CLASSES} INCLUDE_JARS ${JAVA_TESTCLASSPATH} ) else () # Java 1.8 or newer prepare the JAR... message("Preparing Jar for JDK ${Java_VERSION_STRING}") add_jar( rocksdbjni_classes SOURCES ${JAVA_MAIN_CLASSES} ${JAVA_TEST_CLASSES} INCLUDE_JARS ${JAVA_TESTCLASSPATH} GENERATE_NATIVE_HEADERS rocksdbjni_headers DESTINATION ${JNI_OUTPUT_DIR} ) endif() if(NOT EXISTS ${PROJECT_SOURCE_DIR}/java/classes) file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/java/classes) endif() if(NOT EXISTS ${JAVA_TEST_LIBDIR}) file(MAKE_DIRECTORY mkdir ${JAVA_TEST_LIBDIR}) endif() if (DEFINED CUSTOM_DEPS_URL) set(DEPS_URL ${CUSTOM_DEPS_URL}/) else () # Using a Facebook AWS account for S3 storage. (maven.org has a history # of failing in Travis builds.) set(DEPS_URL "https://rocksdb-deps.s3-us-west-2.amazonaws.com/jars") endif() if(NOT EXISTS ${JAVA_JUNIT_JAR}) message("Downloading ${JAVA_JUNIT_JAR}") file(DOWNLOAD ${DEPS_URL}/junit-4.12.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) message(FATAL_ERROR "Failed downloading ${JAVA_JUNIT_JAR}: ${error_message}") endif() file(RENAME ${JAVA_TMP_JAR} ${JAVA_JUNIT_JAR}) endif() if(NOT EXISTS ${JAVA_HAMCR_JAR}) message("Downloading ${JAVA_HAMCR_JAR}") file(DOWNLOAD ${DEPS_URL}/hamcrest-core-1.3.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) message(FATAL_ERROR "Failed downloading ${JAVA_HAMCR_JAR}: ${error_message}") endif() file(RENAME ${JAVA_TMP_JAR} ${JAVA_HAMCR_JAR}) endif() if(NOT EXISTS ${JAVA_MOCKITO_JAR}) message("Downloading ${JAVA_MOCKITO_JAR}") file(DOWNLOAD ${DEPS_URL}/mockito-all-1.10.19.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) message(FATAL_ERROR "Failed downloading ${JAVA_MOCKITO_JAR}: ${error_message}") endif() file(RENAME ${JAVA_TMP_JAR} ${JAVA_MOCKITO_JAR}) endif() if(NOT EXISTS ${JAVA_CGLIB_JAR}) message("Downloading ${JAVA_CGLIB_JAR}") file(DOWNLOAD ${DEPS_URL}/cglib-2.2.2.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) message(FATAL_ERROR "Failed downloading ${JAVA_CGLIB_JAR}: ${error_message}") endif() file(RENAME ${JAVA_TMP_JAR} ${JAVA_CGLIB_JAR}) endif() if(NOT EXISTS ${JAVA_ASSERTJ_JAR}) message("Downloading ${JAVA_ASSERTJ_JAR}") file(DOWNLOAD ${DEPS_URL}/assertj-core-1.7.1.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) message(FATAL_ERROR "Failed downloading ${JAVA_ASSERTJ_JAR}: ${error_message}") endif() file(RENAME ${JAVA_TMP_JAR} ${JAVA_ASSERTJ_JAR}) endif() if(${CMAKE_VERSION} VERSION_LESS "3.11.4" OR (${Java_VERSION_MINOR} STREQUAL "7" AND ${Java_VERSION_MAJOR} STREQUAL "1")) # Old CMake or Java 1.7 ONLY generate JNI headers, Java 1.8+ JNI is handled in add_jar step above message("Preparing JNI headers for Java 7") set(NATIVE_JAVA_CLASSES org.rocksdb.AbstractCompactionFilter org.rocksdb.AbstractCompactionFilterFactory org.rocksdb.AbstractComparator org.rocksdb.AbstractImmutableNativeReference org.rocksdb.AbstractNativeReference org.rocksdb.AbstractRocksIterator org.rocksdb.AbstractSlice org.rocksdb.AbstractTableFilter org.rocksdb.AbstractTraceWriter org.rocksdb.AbstractTransactionNotifier org.rocksdb.AbstractWalFilter org.rocksdb.BackupableDBOptions org.rocksdb.BackupEngine org.rocksdb.BlockBasedTableConfig org.rocksdb.BloomFilter org.rocksdb.CassandraCompactionFilter org.rocksdb.CassandraValueMergeOperator org.rocksdb.Checkpoint org.rocksdb.ClockCache org.rocksdb.ColumnFamilyHandle org.rocksdb.ColumnFamilyOptions org.rocksdb.CompactionJobInfo org.rocksdb.CompactionJobStats org.rocksdb.CompactionOptions org.rocksdb.CompactionOptionsFIFO org.rocksdb.CompactionOptionsUniversal org.rocksdb.CompactRangeOptions org.rocksdb.ComparatorOptions org.rocksdb.CompressionOptions org.rocksdb.DBOptions org.rocksdb.DirectSlice org.rocksdb.Env org.rocksdb.EnvOptions org.rocksdb.Filter org.rocksdb.FlushOptions org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig org.rocksdb.HdfsEnv org.rocksdb.IngestExternalFileOptions org.rocksdb.Logger org.rocksdb.LRUCache org.rocksdb.MemoryUtil org.rocksdb.MemTableConfig org.rocksdb.NativeComparatorWrapper org.rocksdb.NativeLibraryLoader org.rocksdb.OptimisticTransactionDB org.rocksdb.OptimisticTransactionOptions org.rocksdb.Options org.rocksdb.OptionsUtil org.rocksdb.PersistentCache org.rocksdb.PlainTableConfig org.rocksdb.RateLimiter org.rocksdb.ReadOptions org.rocksdb.RemoveEmptyValueCompactionFilter org.rocksdb.RestoreOptions org.rocksdb.RocksCallbackObject org.rocksdb.RocksDB org.rocksdb.RocksEnv org.rocksdb.RocksIterator org.rocksdb.RocksIteratorInterface org.rocksdb.RocksMemEnv org.rocksdb.RocksMutableObject org.rocksdb.RocksObject org.rocksdb.SkipListMemTableConfig org.rocksdb.Slice org.rocksdb.Snapshot org.rocksdb.SstFileManager org.rocksdb.SstFileWriter org.rocksdb.SstFileReader org.rocksdb.SstFileReaderIterator org.rocksdb.Statistics org.rocksdb.StringAppendOperator org.rocksdb.TableFormatConfig org.rocksdb.ThreadStatus org.rocksdb.TimedEnv org.rocksdb.Transaction org.rocksdb.TransactionDB org.rocksdb.TransactionDBOptions org.rocksdb.TransactionLogIterator org.rocksdb.TransactionOptions org.rocksdb.TtlDB org.rocksdb.UInt64AddOperator org.rocksdb.VectorMemTableConfig org.rocksdb.WBWIRocksIterator org.rocksdb.WriteBatch org.rocksdb.WriteBatch.Handler org.rocksdb.WriteBatchInterface org.rocksdb.WriteBatchWithIndex org.rocksdb.WriteOptions org.rocksdb.NativeComparatorWrapperTest org.rocksdb.RocksDBExceptionTest org.rocksdb.SnapshotTest org.rocksdb.WriteBatchTest org.rocksdb.WriteBatchTestInternalHelper org.rocksdb.WriteBufferManager ) create_javah( TARGET rocksdbjni_headers CLASSES ${NATIVE_JAVA_CLASSES} CLASSPATH rocksdbjni_classes ${JAVA_TESTCLASSPATH} OUTPUT_DIR ${JNI_OUTPUT_DIR} ) endif() if(NOT MSVC) set_property(TARGET ${ROCKSDB_STATIC_LIB} PROPERTY POSITION_INDEPENDENT_CODE ON) endif() set(ROCKSDBJNI_STATIC_LIB rocksdbjni${ARTIFACT_SUFFIX}) add_library(${ROCKSDBJNI_STATIC_LIB} ${JNI_NATIVE_SOURCES}) add_dependencies(${ROCKSDBJNI_STATIC_LIB} rocksdbjni_headers) target_link_libraries(${ROCKSDBJNI_STATIC_LIB} ${ROCKSDB_STATIC_LIB} ${ROCKSDB_LIB}) if(NOT MINGW) set(ROCKSDBJNI_SHARED_LIB rocksdbjni-shared${ARTIFACT_SUFFIX}) add_library(${ROCKSDBJNI_SHARED_LIB} SHARED ${JNI_NATIVE_SOURCES}) add_dependencies(${ROCKSDBJNI_SHARED_LIB} rocksdbjni_headers) target_link_libraries(${ROCKSDBJNI_SHARED_LIB} ${ROCKSDB_STATIC_LIB} ${ROCKSDB_LIB}) set_target_properties( ${ROCKSDBJNI_SHARED_LIB} PROPERTIES COMPILE_PDB_OUTPUT_DIRECTORY ${CMAKE_CFG_INTDIR} COMPILE_PDB_NAME ${ROCKSDBJNI_STATIC_LIB}.pdb ) endif() rocksdb-6.11.4/java/HISTORY-JAVA.md000066400000000000000000000062341370372246700163750ustar00rootroot00000000000000# RocksJava Change Log ## 3.13 (8/4/2015) ### New Features * Exposed BackupEngine API. * Added CappedPrefixExtractor support. To use such extractor, simply call useCappedPrefixExtractor in either Options or ColumnFamilyOptions. * Added RemoveEmptyValueCompactionFilter. ## 3.10.0 (3/24/2015) ### New Features * Added compression per level API. * MemEnv is now available in RocksJava via RocksMemEnv class. * lz4 compression is now included in rocksjava static library when running `make rocksdbjavastatic`. ### Public API Changes * Overflowing a size_t when setting rocksdb options now throws an IllegalArgumentException, which removes the necessity for a developer to catch these Exceptions explicitly. * The set and get functions for tableCacheRemoveScanCountLimit are deprecated. ## By 01/31/2015 ### New Features * WriteBatchWithIndex support. * Iterator support for WriteBatch and WriteBatchWithIndex * GetUpdatesSince support. * Snapshots carry now information about the related sequence number. * TTL DB support. ## By 11/14/2014 ### New Features * Full support for Column Family. * Slice and Comparator support. * Default merge operator support. * RateLimiter support. ## By 06/15/2014 ### New Features * Added basic Java binding for rocksdb::Env such that multiple RocksDB can share the same thread pool and environment. * Added RestoreBackupableDB ## By 05/30/2014 ### Internal Framework Improvement * Added disOwnNativeHandle to RocksObject, which allows a RocksObject to give-up the ownership of its native handle. This method is useful when sharing and transferring the ownership of RocksDB C++ resources. ## By 05/15/2014 ### New Features * Added RocksObject --- the base class of all RocksDB classes which holds some RocksDB resources in the C++ side. * Use environmental variable JAVA_HOME in Makefile for RocksJava ### Public API changes * Renamed org.rocksdb.Iterator to org.rocksdb.RocksIterator to avoid potential confliction with Java built-in Iterator. ## By 04/30/2014 ### New Features * Added Java binding for MultiGet. * Added static method RocksDB.loadLibrary(), which loads necessary library files. * Added Java bindings for 60+ rocksdb::Options. * Added Java binding for BloomFilter. * Added Java binding for ReadOptions. * Added Java binding for memtables. * Added Java binding for sst formats. * Added Java binding for RocksDB Iterator which enables sequential scan operation. * Added Java binding for Statistics * Added Java binding for BackupableDB. ### DB Benchmark * Added filluniquerandom, readseq benchmark. * 70+ command-line options. * Enabled BloomFilter configuration. ## By 04/15/2014 ### New Features * Added Java binding for WriteOptions. * Added Java binding for WriteBatch, which enables batch-write. * Added Java binding for rocksdb::Options. * Added Java binding for block cache. * Added Java version DB Benchmark. ### DB Benchmark * Added readwhilewriting benchmark. ### Internal Framework Improvement * Avoid a potential byte-array-copy between c++ and Java in RocksDB.get. * Added SizeUnit in org.rocksdb.util to store consts like KB and GB. ### 03/28/2014 * RocksJava project started. * Added Java binding for RocksDB, which supports Open, Close, Get and Put. rocksdb-6.11.4/java/Makefile000066400000000000000000000301031370372246700156230ustar00rootroot00000000000000NATIVE_JAVA_CLASSES = \ org.rocksdb.AbstractCompactionFilter\ org.rocksdb.AbstractCompactionFilterFactory\ org.rocksdb.AbstractComparator\ org.rocksdb.AbstractSlice\ org.rocksdb.AbstractTableFilter\ org.rocksdb.AbstractTraceWriter\ org.rocksdb.AbstractTransactionNotifier\ org.rocksdb.AbstractWalFilter\ org.rocksdb.BackupEngine\ org.rocksdb.BackupableDBOptions\ org.rocksdb.BlockBasedTableConfig\ org.rocksdb.BloomFilter\ org.rocksdb.Checkpoint\ org.rocksdb.ClockCache\ org.rocksdb.CassandraCompactionFilter\ org.rocksdb.CassandraValueMergeOperator\ org.rocksdb.ColumnFamilyHandle\ org.rocksdb.ColumnFamilyOptions\ org.rocksdb.CompactionJobInfo\ org.rocksdb.CompactionJobStats\ org.rocksdb.CompactionOptions\ org.rocksdb.CompactionOptionsFIFO\ org.rocksdb.CompactionOptionsUniversal\ org.rocksdb.CompactRangeOptions\ org.rocksdb.ComparatorOptions\ org.rocksdb.CompressionOptions\ org.rocksdb.ConfigOptions\ org.rocksdb.DBOptions\ org.rocksdb.DirectSlice\ org.rocksdb.Env\ org.rocksdb.EnvOptions\ org.rocksdb.FlushOptions\ org.rocksdb.Filter\ org.rocksdb.IngestExternalFileOptions\ org.rocksdb.HashLinkedListMemTableConfig\ org.rocksdb.HashSkipListMemTableConfig\ org.rocksdb.HdfsEnv\ org.rocksdb.Logger\ org.rocksdb.LRUCache\ org.rocksdb.MemoryUsageType\ org.rocksdb.MemoryUtil\ org.rocksdb.MergeOperator\ org.rocksdb.NativeComparatorWrapper\ org.rocksdb.OptimisticTransactionDB\ org.rocksdb.OptimisticTransactionOptions\ org.rocksdb.Options\ org.rocksdb.OptionsUtil\ org.rocksdb.PersistentCache\ org.rocksdb.PlainTableConfig\ org.rocksdb.RateLimiter\ org.rocksdb.ReadOptions\ org.rocksdb.RemoveEmptyValueCompactionFilter\ org.rocksdb.RestoreOptions\ org.rocksdb.RocksCallbackObject\ org.rocksdb.RocksDB\ org.rocksdb.RocksEnv\ org.rocksdb.RocksIterator\ org.rocksdb.RocksMemEnv\ org.rocksdb.SkipListMemTableConfig\ org.rocksdb.Slice\ org.rocksdb.SstFileManager\ org.rocksdb.SstFileWriter\ org.rocksdb.SstFileReader\ org.rocksdb.SstFileReaderIterator\ org.rocksdb.Statistics\ org.rocksdb.ThreadStatus\ org.rocksdb.TimedEnv\ org.rocksdb.Transaction\ org.rocksdb.TransactionDB\ org.rocksdb.TransactionDBOptions\ org.rocksdb.TransactionOptions\ org.rocksdb.TransactionLogIterator\ org.rocksdb.TtlDB\ org.rocksdb.VectorMemTableConfig\ org.rocksdb.Snapshot\ org.rocksdb.StringAppendOperator\ org.rocksdb.UInt64AddOperator\ org.rocksdb.WriteBatch\ org.rocksdb.WriteBatch.Handler\ org.rocksdb.WriteOptions\ org.rocksdb.WriteBatchWithIndex\ org.rocksdb.WriteBufferManager\ org.rocksdb.WBWIRocksIterator NATIVE_JAVA_TEST_CLASSES = org.rocksdb.RocksDBExceptionTest\ org.rocksdb.NativeComparatorWrapperTest.NativeStringComparatorWrapper\ org.rocksdb.WriteBatchTest\ org.rocksdb.WriteBatchTestInternalHelper ROCKSDB_MAJOR = $(shell egrep "ROCKSDB_MAJOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_MINOR = $(shell egrep "ROCKSDB_MINOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_PATCH = $(shell egrep "ROCKSDB_PATCH.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) NATIVE_INCLUDE = ./include ARCH := $(shell getconf LONG_BIT) ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux$(ARCH).jar ifeq ($(PLATFORM), OS_MACOSX) ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar endif JAVA_TESTS = \ org.rocksdb.BackupableDBOptionsTest\ org.rocksdb.BackupEngineTest\ org.rocksdb.BlockBasedTableConfigTest\ org.rocksdb.BuiltinComparatorTest\ org.rocksdb.util.BytewiseComparatorTest\ org.rocksdb.util.BytewiseComparatorIntTest\ org.rocksdb.CheckPointTest\ org.rocksdb.ClockCacheTest\ org.rocksdb.ColumnFamilyOptionsTest\ org.rocksdb.ColumnFamilyTest\ org.rocksdb.CompactionFilterFactoryTest\ org.rocksdb.CompactionJobInfoTest\ org.rocksdb.CompactionJobStatsTest\ org.rocksdb.CompactionOptionsTest\ org.rocksdb.CompactionOptionsFIFOTest\ org.rocksdb.CompactionOptionsUniversalTest\ org.rocksdb.CompactionPriorityTest\ org.rocksdb.CompactionStopStyleTest\ org.rocksdb.ComparatorOptionsTest\ org.rocksdb.CompressionOptionsTest\ org.rocksdb.CompressionTypesTest\ org.rocksdb.DBOptionsTest\ org.rocksdb.DirectSliceTest\ org.rocksdb.util.EnvironmentTest\ org.rocksdb.EnvOptionsTest\ org.rocksdb.HdfsEnvTest\ org.rocksdb.IngestExternalFileOptionsTest\ org.rocksdb.util.IntComparatorTest\ org.rocksdb.util.JNIComparatorTest\ org.rocksdb.FilterTest\ org.rocksdb.FlushTest\ org.rocksdb.InfoLogLevelTest\ org.rocksdb.KeyMayExistTest\ org.rocksdb.LoggerTest\ org.rocksdb.LRUCacheTest\ org.rocksdb.MemoryUtilTest\ org.rocksdb.MemTableTest\ org.rocksdb.MergeTest\ org.rocksdb.MixedOptionsTest\ org.rocksdb.MutableColumnFamilyOptionsTest\ org.rocksdb.MutableDBOptionsTest\ org.rocksdb.NativeComparatorWrapperTest\ org.rocksdb.NativeLibraryLoaderTest\ org.rocksdb.OptimisticTransactionTest\ org.rocksdb.OptimisticTransactionDBTest\ org.rocksdb.OptimisticTransactionOptionsTest\ org.rocksdb.OptionsUtilTest\ org.rocksdb.OptionsTest\ org.rocksdb.PlainTableConfigTest\ org.rocksdb.RateLimiterTest\ org.rocksdb.ReadOnlyTest\ org.rocksdb.ReadOptionsTest\ org.rocksdb.util.ReverseBytewiseComparatorIntTest\ org.rocksdb.RocksDBTest\ org.rocksdb.RocksDBExceptionTest\ org.rocksdb.DefaultEnvTest\ org.rocksdb.RocksIteratorTest\ org.rocksdb.RocksMemEnvTest\ org.rocksdb.util.SizeUnitTest\ org.rocksdb.SliceTest\ org.rocksdb.SnapshotTest\ org.rocksdb.SstFileManagerTest\ org.rocksdb.SstFileWriterTest\ org.rocksdb.SstFileReaderTest\ org.rocksdb.TableFilterTest\ org.rocksdb.TimedEnvTest\ org.rocksdb.TransactionTest\ org.rocksdb.TransactionDBTest\ org.rocksdb.TransactionOptionsTest\ org.rocksdb.TransactionDBOptionsTest\ org.rocksdb.TransactionLogIteratorTest\ org.rocksdb.TtlDBTest\ org.rocksdb.StatisticsTest\ org.rocksdb.StatisticsCollectorTest\ org.rocksdb.WalFilterTest\ org.rocksdb.WALRecoveryModeTest\ org.rocksdb.WriteBatchHandlerTest\ org.rocksdb.WriteBatchTest\ org.rocksdb.WriteBatchThreadedTest\ org.rocksdb.WriteOptionsTest\ org.rocksdb.WriteBatchWithIndexTest MAIN_SRC = src/main/java TEST_SRC = src/test/java OUTPUT = target MAIN_CLASSES = $(OUTPUT)/classes TEST_CLASSES = $(OUTPUT)/test-classes JAVADOC = $(OUTPUT)/apidocs BENCHMARK_MAIN_SRC = benchmark/src/main/java BENCHMARK_OUTPUT = benchmark/target BENCHMARK_MAIN_CLASSES = $(BENCHMARK_OUTPUT)/classes SAMPLES_MAIN_SRC = samples/src/main/java SAMPLES_OUTPUT = samples/target SAMPLES_MAIN_CLASSES = $(SAMPLES_OUTPUT)/classes JAVA_TEST_LIBDIR = test-libs JAVA_JUNIT_JAR = $(JAVA_TEST_LIBDIR)/junit-4.12.jar JAVA_HAMCR_JAR = $(JAVA_TEST_LIBDIR)/hamcrest-core-1.3.jar JAVA_MOCKITO_JAR = $(JAVA_TEST_LIBDIR)/mockito-all-1.10.19.jar JAVA_CGLIB_JAR = $(JAVA_TEST_LIBDIR)/cglib-2.2.2.jar JAVA_ASSERTJ_JAR = $(JAVA_TEST_LIBDIR)/assertj-core-1.7.1.jar JAVA_TESTCLASSPATH = $(JAVA_JUNIT_JAR):$(JAVA_HAMCR_JAR):$(JAVA_MOCKITO_JAR):$(JAVA_CGLIB_JAR):$(JAVA_ASSERTJ_JAR) MVN_LOCAL = ~/.m2/repository # Set the default JAVA_ARGS to "" for DEBUG_LEVEL=0 JAVA_ARGS ?= JAVAC_ARGS ?= # When debugging add -Xcheck:jni to the java args ifneq ($(DEBUG_LEVEL),0) JAVA_ARGS += -ea -Xcheck:jni JAVAC_ARGS += -Xlint:deprecation -Xlint:unchecked endif # Using a Facebook AWS account for S3 storage. (maven.org has a history # of failing in Travis builds.) DEPS_URL?=https://rocksdb-deps.s3-us-west-2.amazonaws.com/jars clean: clean-not-downloaded clean-downloaded clean-not-downloaded: $(AM_V_at)rm -rf $(NATIVE_INCLUDE) $(AM_V_at)rm -rf $(OUTPUT) $(AM_V_at)rm -rf $(BENCHMARK_OUTPUT) $(AM_V_at)rm -rf $(SAMPLES_OUTPUT) clean-downloaded: $(AM_V_at)rm -rf $(JAVA_TEST_LIBDIR) javadocs: java $(AM_V_GEN)mkdir -p $(JAVADOC) $(AM_V_at)javadoc -d $(JAVADOC) -sourcepath $(MAIN_SRC) -subpackages org javalib: java java_test javadocs java: $(AM_V_GEN)mkdir -p $(MAIN_CLASSES) ifeq ($(shell java -version 2>&1 | grep 1.7.0 > /dev/null; printf $$?), 0) $(AM_V_at)javac $(JAVAC_ARGS) -d $(MAIN_CLASSES)\ $(MAIN_SRC)/org/rocksdb/util/*.java\ $(MAIN_SRC)/org/rocksdb/*.java else $(AM_V_at)javac $(JAVAC_ARGS) -h $(NATIVE_INCLUDE) -d $(MAIN_CLASSES)\ $(MAIN_SRC)/org/rocksdb/util/*.java\ $(MAIN_SRC)/org/rocksdb/*.java endif $(AM_V_at)@cp ../HISTORY.md ./HISTORY-CPP.md $(AM_V_at)@rm -f ./HISTORY-CPP.md ifeq ($(shell java -version 2>&1 | grep 1.7.0 > /dev/null; printf $$?), 0) $(AM_V_at)javah -cp $(MAIN_CLASSES) -d $(NATIVE_INCLUDE) -jni $(NATIVE_JAVA_CLASSES) endif sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)javac $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBSample.java $(AM_V_at)@rm -rf /tmp/rocksdbjni $(AM_V_at)@rm -rf /tmp/rocksdbjni_not_found java $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBSample /tmp/rocksdbjni $(AM_V_at)@rm -rf /tmp/rocksdbjni $(AM_V_at)@rm -rf /tmp/rocksdbjni_not_found column_family_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)javac $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBColumnFamilySample.java $(AM_V_at)@rm -rf /tmp/rocksdbjni java $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBColumnFamilySample /tmp/rocksdbjni $(AM_V_at)@rm -rf /tmp/rocksdbjni transaction_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)javac -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/TransactionSample.java $(AM_V_at)@rm -rf /tmp/rocksdbjni java -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) TransactionSample /tmp/rocksdbjni $(AM_V_at)@rm -rf /tmp/rocksdbjni optimistic_transaction_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)javac -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/OptimisticTransactionSample.java $(AM_V_at)@rm -rf /tmp/rocksdbjni java -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) OptimisticTransactionSample /tmp/rocksdbjni $(AM_V_at)@rm -rf /tmp/rocksdbjni resolve_test_deps: test -d "$(JAVA_TEST_LIBDIR)" || mkdir -p "$(JAVA_TEST_LIBDIR)" test -s "$(JAVA_JUNIT_JAR)" || cp $(MVN_LOCAL)/junit/junit/4.12/junit-4.12.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output $(JAVA_JUNIT_JAR) --location $(DEPS_URL)/junit-4.12.jar test -s "$(JAVA_HAMCR_JAR)" || cp $(MVN_LOCAL)/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output $(JAVA_HAMCR_JAR) --location $(DEPS_URL)/hamcrest-core-1.3.jar test -s "$(JAVA_MOCKITO_JAR)" || cp $(MVN_LOCAL)/org/mockito/mockito-all/1.10.19/mockito-all-1.10.19.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output "$(JAVA_MOCKITO_JAR)" --location $(DEPS_URL)/mockito-all-1.10.19.jar test -s "$(JAVA_CGLIB_JAR)" || cp $(MVN_LOCAL)/cglib/cglib/2.2.2/cglib-2.2.2.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output "$(JAVA_CGLIB_JAR)" --location $(DEPS_URL)/cglib-2.2.2.jar test -s "$(JAVA_ASSERTJ_JAR)" || cp $(MVN_LOCAL)/org/assertj/assertj-core/1.7.1/assertj-core-1.7.1.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output "$(JAVA_ASSERTJ_JAR)" --location $(DEPS_URL)/assertj-core-1.7.1.jar java_test: java resolve_test_deps $(AM_V_GEN)mkdir -p $(TEST_CLASSES) ifeq ($(shell java -version 2>&1|grep 1.7.0 >/dev/null; printf $$?),0) $(AM_V_at)javac $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -d $(TEST_CLASSES)\ $(TEST_SRC)/org/rocksdb/test/*.java\ $(TEST_SRC)/org/rocksdb/util/*.java\ $(TEST_SRC)/org/rocksdb/*.java $(AM_V_at)javah -cp $(MAIN_CLASSES):$(TEST_CLASSES) -d $(NATIVE_INCLUDE) -jni $(NATIVE_JAVA_TEST_CLASSES) else $(AM_V_at)javac $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -h $(NATIVE_INCLUDE) -d $(TEST_CLASSES)\ $(TEST_SRC)/org/rocksdb/test/*.java\ $(TEST_SRC)/org/rocksdb/util/*.java\ $(TEST_SRC)/org/rocksdb/*.java endif test: java java_test run_test run_test: java $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(JAVA_TESTS) db_bench: java $(AM_V_GEN)mkdir -p $(BENCHMARK_MAIN_CLASSES) $(AM_V_at)javac $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(BENCHMARK_MAIN_CLASSES) $(BENCHMARK_MAIN_SRC)/org/rocksdb/benchmark/*.java rocksdb-6.11.4/java/RELEASE.md000066400000000000000000000047741370372246700156040ustar00rootroot00000000000000## Cross-building RocksDB can be built as a single self contained cross-platform JAR. The cross-platform jar can be used on any 64-bit OSX system, 32-bit Linux system, or 64-bit Linux system. Building a cross-platform JAR requires: * [Docker](https://www.docker.com/docker-community) * A Mac OSX machine that can compile RocksDB. * Java 7 set as JAVA_HOME. Once you have these items, run this make command from RocksDB's root source directory: make jclean clean rocksdbjavastaticreleasedocker This command will build RocksDB natively on OSX, and will then spin up docker containers to build RocksDB for 32-bit and 64-bit Linux with glibc, and 32-bit and 64-bit Linux with musl libc. You can find all native binaries and JARs in the java/target directory upon completion: librocksdbjni-linux32.so librocksdbjni-linux64.so librocksdbjni-linux64-musl.so librocksdbjni-linux32-musl.so librocksdbjni-osx.jnilib rocksdbjni-x.y.z-javadoc.jar rocksdbjni-x.y.z-linux32.jar rocksdbjni-x.y.z-linux64.jar rocksdbjni-x.y.z-linux64-musl.jar rocksdbjni-x.y.z-linux32-musl.jar rocksdbjni-x.y.z-osx.jar rocksdbjni-x.y.z-sources.jar rocksdbjni-x.y.z.jar Where x.y.z is the built version number of RocksDB. ## Maven publication Set ~/.m2/settings.xml to contain: sonatype-nexus-staging your-sonatype-jira-username your-sonatype-jira-password From RocksDB's root directory, first build the Java static JARs: make jclean clean rocksdbjavastaticpublish This command will [stage the JAR artifacts on the Sonatype staging repository](http://central.sonatype.org/pages/manual-staging-bundle-creation-and-deployment.html). To release the staged artifacts. 1. Go to [https://oss.sonatype.org/#stagingRepositories](https://oss.sonatype.org/#stagingRepositories) and search for "rocksdb" in the upper right hand search box. 2. Select the rocksdb staging repository, and inspect its contents. 3. If all is well, follow [these steps](https://oss.sonatype.org/#stagingRepositories) to close the repository and release it. After the release has occurred, the artifacts will be synced to Maven central within 24-48 hours. rocksdb-6.11.4/java/benchmark/000077500000000000000000000000001370372246700161205ustar00rootroot00000000000000rocksdb-6.11.4/java/benchmark/src/000077500000000000000000000000001370372246700167075ustar00rootroot00000000000000rocksdb-6.11.4/java/benchmark/src/main/000077500000000000000000000000001370372246700176335ustar00rootroot00000000000000rocksdb-6.11.4/java/benchmark/src/main/java/000077500000000000000000000000001370372246700205545ustar00rootroot00000000000000rocksdb-6.11.4/java/benchmark/src/main/java/org/000077500000000000000000000000001370372246700213435ustar00rootroot00000000000000rocksdb-6.11.4/java/benchmark/src/main/java/org/rocksdb/000077500000000000000000000000001370372246700227725ustar00rootroot00000000000000rocksdb-6.11.4/java/benchmark/src/main/java/org/rocksdb/benchmark/000077500000000000000000000000001370372246700247245ustar00rootroot00000000000000rocksdb-6.11.4/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java000066400000000000000000001632231370372246700277360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). /** * Copyright (C) 2011 the original author or authors. * See the notice.md file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.rocksdb.benchmark; import java.io.IOException; import java.lang.Runnable; import java.lang.Math; import java.io.File; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.nio.ByteBuffer; import java.nio.file.Files; import java.util.Collection; import java.util.Date; import java.util.EnumMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; import java.util.Arrays; import java.util.ArrayList; import java.util.concurrent.Callable; import java.util.concurrent.Executors; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.rocksdb.*; import org.rocksdb.RocksMemEnv; import org.rocksdb.util.SizeUnit; class Stats { int id_; long start_; long finish_; double seconds_; long done_; long found_; long lastOpTime_; long nextReport_; long bytes_; StringBuilder message_; boolean excludeFromMerge_; // TODO(yhchiang): use the following arguments: // (Long)Flag.stats_interval // (Integer)Flag.stats_per_interval Stats(int id) { id_ = id; nextReport_ = 100; done_ = 0; bytes_ = 0; seconds_ = 0; start_ = System.nanoTime(); lastOpTime_ = start_; finish_ = start_; found_ = 0; message_ = new StringBuilder(""); excludeFromMerge_ = false; } void merge(final Stats other) { if (other.excludeFromMerge_) { return; } done_ += other.done_; found_ += other.found_; bytes_ += other.bytes_; seconds_ += other.seconds_; if (other.start_ < start_) start_ = other.start_; if (other.finish_ > finish_) finish_ = other.finish_; // Just keep the messages from one thread if (message_.length() == 0) { message_ = other.message_; } } void stop() { finish_ = System.nanoTime(); seconds_ = (double) (finish_ - start_) * 1e-9; } void addMessage(String msg) { if (message_.length() > 0) { message_.append(" "); } message_.append(msg); } void setId(int id) { id_ = id; } void setExcludeFromMerge() { excludeFromMerge_ = true; } void finishedSingleOp(int bytes) { done_++; lastOpTime_ = System.nanoTime(); bytes_ += bytes; if (done_ >= nextReport_) { if (nextReport_ < 1000) { nextReport_ += 100; } else if (nextReport_ < 5000) { nextReport_ += 500; } else if (nextReport_ < 10000) { nextReport_ += 1000; } else if (nextReport_ < 50000) { nextReport_ += 5000; } else if (nextReport_ < 100000) { nextReport_ += 10000; } else if (nextReport_ < 500000) { nextReport_ += 50000; } else { nextReport_ += 100000; } System.err.printf("... Task %s finished %d ops%30s\r", id_, done_, ""); } } void report(String name) { // Pretend at least one op was done in case we are running a benchmark // that does not call FinishedSingleOp(). if (done_ < 1) done_ = 1; StringBuilder extra = new StringBuilder(""); if (bytes_ > 0) { // Rate is computed on actual elapsed time, not the sum of per-thread // elapsed times. double elapsed = (finish_ - start_) * 1e-9; extra.append(String.format("%6.1f MB/s", (bytes_ / 1048576.0) / elapsed)); } extra.append(message_.toString()); double elapsed = (finish_ - start_); double throughput = (double) done_ / (elapsed * 1e-9); System.out.format("%-12s : %11.3f micros/op %d ops/sec;%s%s\n", name, (elapsed * 1e-6) / done_, (long) throughput, (extra.length() == 0 ? "" : " "), extra.toString()); } } public class DbBenchmark { enum Order { SEQUENTIAL, RANDOM } enum DBState { FRESH, EXISTING } static { RocksDB.loadLibrary(); } abstract class BenchmarkTask implements Callable { // TODO(yhchiang): use (Integer)Flag.perf_level. public BenchmarkTask( int tid, long randSeed, long numEntries, long keyRange) { tid_ = tid; rand_ = new Random(randSeed + tid * 1000); numEntries_ = numEntries; keyRange_ = keyRange; stats_ = new Stats(tid); } @Override public Stats call() throws RocksDBException { stats_.start_ = System.nanoTime(); runTask(); stats_.finish_ = System.nanoTime(); return stats_; } abstract protected void runTask() throws RocksDBException; protected int tid_; protected Random rand_; protected long numEntries_; protected long keyRange_; protected Stats stats_; protected void getFixedKey(byte[] key, long sn) { generateKeyFromLong(key, sn); } protected void getRandomKey(byte[] key, long range) { generateKeyFromLong(key, Math.abs(rand_.nextLong() % range)); } } abstract class WriteTask extends BenchmarkTask { public WriteTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch) { super(tid, randSeed, numEntries, keyRange); writeOpt_ = writeOpt; entriesPerBatch_ = entriesPerBatch; maxWritesPerSecond_ = -1; } public WriteTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch, long maxWritesPerSecond) { super(tid, randSeed, numEntries, keyRange); writeOpt_ = writeOpt; entriesPerBatch_ = entriesPerBatch; maxWritesPerSecond_ = maxWritesPerSecond; } @Override public void runTask() throws RocksDBException { if (numEntries_ != DbBenchmark.this.num_) { stats_.message_.append(String.format(" (%d ops)", numEntries_)); } byte[] key = new byte[keySize_]; byte[] value = new byte[valueSize_]; try { if (entriesPerBatch_ == 1) { for (long i = 0; i < numEntries_; ++i) { getKey(key, i, keyRange_); DbBenchmark.this.gen_.generate(value); db_.put(writeOpt_, key, value); stats_.finishedSingleOp(keySize_ + valueSize_); writeRateControl(i); if (isFinished()) { return; } } } else { for (long i = 0; i < numEntries_; i += entriesPerBatch_) { WriteBatch batch = new WriteBatch(); for (long j = 0; j < entriesPerBatch_; j++) { getKey(key, i + j, keyRange_); DbBenchmark.this.gen_.generate(value); batch.put(key, value); stats_.finishedSingleOp(keySize_ + valueSize_); } db_.write(writeOpt_, batch); batch.dispose(); writeRateControl(i); if (isFinished()) { return; } } } } catch (InterruptedException e) { // thread has been terminated. } } protected void writeRateControl(long writeCount) throws InterruptedException { if (maxWritesPerSecond_ <= 0) return; long minInterval = writeCount * TimeUnit.SECONDS.toNanos(1) / maxWritesPerSecond_; long interval = System.nanoTime() - stats_.start_; if (minInterval - interval > TimeUnit.MILLISECONDS.toNanos(1)) { TimeUnit.NANOSECONDS.sleep(minInterval - interval); } } abstract protected void getKey(byte[] key, long id, long range); protected WriteOptions writeOpt_; protected long entriesPerBatch_; protected long maxWritesPerSecond_; } class WriteSequentialTask extends WriteTask { public WriteSequentialTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch) { super(tid, randSeed, numEntries, keyRange, writeOpt, entriesPerBatch); } public WriteSequentialTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch, long maxWritesPerSecond) { super(tid, randSeed, numEntries, keyRange, writeOpt, entriesPerBatch, maxWritesPerSecond); } @Override protected void getKey(byte[] key, long id, long range) { getFixedKey(key, id); } } class WriteRandomTask extends WriteTask { public WriteRandomTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch) { super(tid, randSeed, numEntries, keyRange, writeOpt, entriesPerBatch); } public WriteRandomTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch, long maxWritesPerSecond) { super(tid, randSeed, numEntries, keyRange, writeOpt, entriesPerBatch, maxWritesPerSecond); } @Override protected void getKey(byte[] key, long id, long range) { getRandomKey(key, range); } } class WriteUniqueRandomTask extends WriteTask { static final int MAX_BUFFER_SIZE = 10000000; public WriteUniqueRandomTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch) { super(tid, randSeed, numEntries, keyRange, writeOpt, entriesPerBatch); initRandomKeySequence(); } public WriteUniqueRandomTask( int tid, long randSeed, long numEntries, long keyRange, WriteOptions writeOpt, long entriesPerBatch, long maxWritesPerSecond) { super(tid, randSeed, numEntries, keyRange, writeOpt, entriesPerBatch, maxWritesPerSecond); initRandomKeySequence(); } @Override protected void getKey(byte[] key, long id, long range) { generateKeyFromLong(key, nextUniqueRandom()); } protected void initRandomKeySequence() { bufferSize_ = MAX_BUFFER_SIZE; if (bufferSize_ > keyRange_) { bufferSize_ = (int) keyRange_; } currentKeyCount_ = bufferSize_; keyBuffer_ = new long[MAX_BUFFER_SIZE]; for (int k = 0; k < bufferSize_; ++k) { keyBuffer_[k] = k; } } /** * Semi-randomly return the next unique key. It is guaranteed to be * fully random if keyRange_ <= MAX_BUFFER_SIZE. */ long nextUniqueRandom() { if (bufferSize_ == 0) { System.err.println("bufferSize_ == 0."); return 0; } int r = rand_.nextInt(bufferSize_); // randomly pick one from the keyBuffer long randKey = keyBuffer_[r]; if (currentKeyCount_ < keyRange_) { // if we have not yet inserted all keys, insert next new key to [r]. keyBuffer_[r] = currentKeyCount_++; } else { // move the last element to [r] and decrease the size by 1. keyBuffer_[r] = keyBuffer_[--bufferSize_]; } return randKey; } int bufferSize_; long currentKeyCount_; long[] keyBuffer_; } class ReadRandomTask extends BenchmarkTask { public ReadRandomTask( int tid, long randSeed, long numEntries, long keyRange) { super(tid, randSeed, numEntries, keyRange); } @Override public void runTask() throws RocksDBException { byte[] key = new byte[keySize_]; byte[] value = new byte[valueSize_]; for (long i = 0; i < numEntries_; i++) { getRandomKey(key, keyRange_); int len = db_.get(key, value); if (len != RocksDB.NOT_FOUND) { stats_.found_++; stats_.finishedSingleOp(keySize_ + valueSize_); } else { stats_.finishedSingleOp(keySize_); } if (isFinished()) { return; } } } } class ReadSequentialTask extends BenchmarkTask { public ReadSequentialTask( int tid, long randSeed, long numEntries, long keyRange) { super(tid, randSeed, numEntries, keyRange); } @Override public void runTask() throws RocksDBException { RocksIterator iter = db_.newIterator(); long i; for (iter.seekToFirst(), i = 0; iter.isValid() && i < numEntries_; iter.next(), ++i) { stats_.found_++; stats_.finishedSingleOp(iter.key().length + iter.value().length); if (isFinished()) { iter.dispose(); return; } } iter.dispose(); } } public DbBenchmark(Map flags) throws Exception { benchmarks_ = (List) flags.get(Flag.benchmarks); num_ = (Integer) flags.get(Flag.num); threadNum_ = (Integer) flags.get(Flag.threads); reads_ = (Integer) (flags.get(Flag.reads) == null ? flags.get(Flag.num) : flags.get(Flag.reads)); keySize_ = (Integer) flags.get(Flag.key_size); valueSize_ = (Integer) flags.get(Flag.value_size); compressionRatio_ = (Double) flags.get(Flag.compression_ratio); useExisting_ = (Boolean) flags.get(Flag.use_existing_db); randSeed_ = (Long) flags.get(Flag.seed); databaseDir_ = (String) flags.get(Flag.db); writesPerSeconds_ = (Integer) flags.get(Flag.writes_per_second); memtable_ = (String) flags.get(Flag.memtablerep); maxWriteBufferNumber_ = (Integer) flags.get(Flag.max_write_buffer_number); prefixSize_ = (Integer) flags.get(Flag.prefix_size); keysPerPrefix_ = (Integer) flags.get(Flag.keys_per_prefix); hashBucketCount_ = (Long) flags.get(Flag.hash_bucket_count); usePlainTable_ = (Boolean) flags.get(Flag.use_plain_table); useMemenv_ = (Boolean) flags.get(Flag.use_mem_env); flags_ = flags; finishLock_ = new Object(); // options.setPrefixSize((Integer)flags_.get(Flag.prefix_size)); // options.setKeysPerPrefix((Long)flags_.get(Flag.keys_per_prefix)); compressionType_ = (String) flags.get(Flag.compression_type); compression_ = CompressionType.NO_COMPRESSION; try { if (compressionType_!=null) { final CompressionType compressionType = CompressionType.getCompressionType(compressionType_); if (compressionType != null && compressionType != CompressionType.NO_COMPRESSION) { System.loadLibrary(compressionType.getLibraryName()); } } } catch (UnsatisfiedLinkError e) { System.err.format("Unable to load %s library:%s%n" + "No compression is used.%n", compressionType_, e.toString()); compressionType_ = "none"; } gen_ = new RandomGenerator(randSeed_, compressionRatio_); } private void prepareReadOptions(ReadOptions options) { options.setVerifyChecksums((Boolean)flags_.get(Flag.verify_checksum)); options.setTailing((Boolean)flags_.get(Flag.use_tailing_iterator)); } private void prepareWriteOptions(WriteOptions options) { options.setSync((Boolean)flags_.get(Flag.sync)); options.setDisableWAL((Boolean)flags_.get(Flag.disable_wal)); } private void prepareOptions(Options options) throws RocksDBException { if (!useExisting_) { options.setCreateIfMissing(true); } else { options.setCreateIfMissing(false); } if (useMemenv_) { options.setEnv(new RocksMemEnv(Env.getDefault())); } switch (memtable_) { case "skip_list": options.setMemTableConfig(new SkipListMemTableConfig()); break; case "vector": options.setMemTableConfig(new VectorMemTableConfig()); break; case "hash_linkedlist": options.setMemTableConfig( new HashLinkedListMemTableConfig() .setBucketCount(hashBucketCount_)); options.useFixedLengthPrefixExtractor(prefixSize_); break; case "hash_skiplist": case "prefix_hash": options.setMemTableConfig( new HashSkipListMemTableConfig() .setBucketCount(hashBucketCount_)); options.useFixedLengthPrefixExtractor(prefixSize_); break; default: System.err.format( "unable to detect the specified memtable, " + "use the default memtable factory %s%n", options.memTableFactoryName()); break; } if (usePlainTable_) { options.setTableFormatConfig( new PlainTableConfig().setKeySize(keySize_)); } else { BlockBasedTableConfig table_options = new BlockBasedTableConfig(); table_options.setBlockSize((Long)flags_.get(Flag.block_size)) .setBlockCacheSize((Long)flags_.get(Flag.cache_size)) .setCacheNumShardBits( (Integer)flags_.get(Flag.cache_numshardbits)); options.setTableFormatConfig(table_options); } options.setWriteBufferSize( (Long)flags_.get(Flag.write_buffer_size)); options.setMaxWriteBufferNumber( (Integer)flags_.get(Flag.max_write_buffer_number)); options.setMaxBackgroundCompactions( (Integer)flags_.get(Flag.max_background_compactions)); options.getEnv().setBackgroundThreads( (Integer)flags_.get(Flag.max_background_compactions)); options.setMaxBackgroundFlushes( (Integer)flags_.get(Flag.max_background_flushes)); options.setMaxBackgroundJobs((Integer) flags_.get(Flag.max_background_jobs)); options.setMaxOpenFiles( (Integer)flags_.get(Flag.open_files)); options.setUseFsync( (Boolean)flags_.get(Flag.use_fsync)); options.setWalDir( (String)flags_.get(Flag.wal_dir)); options.setDeleteObsoleteFilesPeriodMicros( (Integer)flags_.get(Flag.delete_obsolete_files_period_micros)); options.setTableCacheNumshardbits( (Integer)flags_.get(Flag.table_cache_numshardbits)); options.setAllowMmapReads( (Boolean)flags_.get(Flag.mmap_read)); options.setAllowMmapWrites( (Boolean)flags_.get(Flag.mmap_write)); options.setAdviseRandomOnOpen( (Boolean)flags_.get(Flag.advise_random_on_open)); options.setUseAdaptiveMutex( (Boolean)flags_.get(Flag.use_adaptive_mutex)); options.setBytesPerSync( (Long)flags_.get(Flag.bytes_per_sync)); options.setBloomLocality( (Integer)flags_.get(Flag.bloom_locality)); options.setMinWriteBufferNumberToMerge( (Integer)flags_.get(Flag.min_write_buffer_number_to_merge)); options.setMemtablePrefixBloomSizeRatio((Double) flags_.get(Flag.memtable_bloom_size_ratio)); options.setNumLevels( (Integer)flags_.get(Flag.num_levels)); options.setTargetFileSizeBase( (Integer)flags_.get(Flag.target_file_size_base)); options.setTargetFileSizeMultiplier((Integer)flags_.get(Flag.target_file_size_multiplier)); options.setMaxBytesForLevelBase( (Integer)flags_.get(Flag.max_bytes_for_level_base)); options.setMaxBytesForLevelMultiplier((Double) flags_.get(Flag.max_bytes_for_level_multiplier)); options.setLevelZeroStopWritesTrigger( (Integer)flags_.get(Flag.level0_stop_writes_trigger)); options.setLevelZeroSlowdownWritesTrigger( (Integer)flags_.get(Flag.level0_slowdown_writes_trigger)); options.setLevelZeroFileNumCompactionTrigger( (Integer)flags_.get(Flag.level0_file_num_compaction_trigger)); options.setMaxCompactionBytes( (Long) flags_.get(Flag.max_compaction_bytes)); options.setDisableAutoCompactions( (Boolean)flags_.get(Flag.disable_auto_compactions)); options.setMaxSuccessiveMerges( (Integer)flags_.get(Flag.max_successive_merges)); options.setWalTtlSeconds((Long)flags_.get(Flag.wal_ttl_seconds)); options.setWalSizeLimitMB((Long)flags_.get(Flag.wal_size_limit_MB)); if(flags_.get(Flag.java_comparator) != null) { options.setComparator( (AbstractComparator)flags_.get(Flag.java_comparator)); } /* TODO(yhchiang): enable the following parameters options.setCompressionType((String)flags_.get(Flag.compression_type)); options.setCompressionLevel((Integer)flags_.get(Flag.compression_level)); options.setMinLevelToCompress((Integer)flags_.get(Flag.min_level_to_compress)); options.setHdfs((String)flags_.get(Flag.hdfs)); // env options.setStatistics((Boolean)flags_.get(Flag.statistics)); options.setUniversalSizeRatio( (Integer)flags_.get(Flag.universal_size_ratio)); options.setUniversalMinMergeWidth( (Integer)flags_.get(Flag.universal_min_merge_width)); options.setUniversalMaxMergeWidth( (Integer)flags_.get(Flag.universal_max_merge_width)); options.setUniversalMaxSizeAmplificationPercent( (Integer)flags_.get(Flag.universal_max_size_amplification_percent)); options.setUniversalCompressionSizePercent( (Integer)flags_.get(Flag.universal_compression_size_percent)); // TODO(yhchiang): add RocksDB.openForReadOnly() to enable Flag.readonly // TODO(yhchiang): enable Flag.merge_operator by switch options.setAccessHintOnCompactionStart( (String)flags_.get(Flag.compaction_fadvice)); // available values of fadvice are "NONE", "NORMAL", "SEQUENTIAL", "WILLNEED" for fadvice */ } private void run() throws RocksDBException { if (!useExisting_) { destroyDb(); } Options options = new Options(); prepareOptions(options); open(options); printHeader(options); for (String benchmark : benchmarks_) { List> tasks = new ArrayList>(); List> bgTasks = new ArrayList>(); WriteOptions writeOpt = new WriteOptions(); prepareWriteOptions(writeOpt); ReadOptions readOpt = new ReadOptions(); prepareReadOptions(readOpt); int currentTaskId = 0; boolean known = true; switch (benchmark) { case "fillseq": tasks.add(new WriteSequentialTask( currentTaskId++, randSeed_, num_, num_, writeOpt, 1)); break; case "fillbatch": tasks.add( new WriteSequentialTask(currentTaskId++, randSeed_, num_, num_, writeOpt, 1000)); break; case "fillrandom": tasks.add(new WriteRandomTask( currentTaskId++, randSeed_, num_, num_, writeOpt, 1)); break; case "filluniquerandom": tasks.add(new WriteUniqueRandomTask( currentTaskId++, randSeed_, num_, num_, writeOpt, 1)); break; case "fillsync": writeOpt.setSync(true); tasks.add(new WriteRandomTask( currentTaskId++, randSeed_, num_ / 1000, num_ / 1000, writeOpt, 1)); break; case "readseq": for (int t = 0; t < threadNum_; ++t) { tasks.add(new ReadSequentialTask( currentTaskId++, randSeed_, reads_ / threadNum_, num_)); } break; case "readrandom": for (int t = 0; t < threadNum_; ++t) { tasks.add(new ReadRandomTask( currentTaskId++, randSeed_, reads_ / threadNum_, num_)); } break; case "readwhilewriting": WriteTask writeTask = new WriteRandomTask( -1, randSeed_, Long.MAX_VALUE, num_, writeOpt, 1, writesPerSeconds_); writeTask.stats_.setExcludeFromMerge(); bgTasks.add(writeTask); for (int t = 0; t < threadNum_; ++t) { tasks.add(new ReadRandomTask( currentTaskId++, randSeed_, reads_ / threadNum_, num_)); } break; case "readhot": for (int t = 0; t < threadNum_; ++t) { tasks.add(new ReadRandomTask( currentTaskId++, randSeed_, reads_ / threadNum_, num_ / 100)); } break; case "delete": destroyDb(); open(options); break; default: known = false; System.err.println("Unknown benchmark: " + benchmark); break; } if (known) { ExecutorService executor = Executors.newCachedThreadPool(); ExecutorService bgExecutor = Executors.newCachedThreadPool(); try { // measure only the main executor time List> bgResults = new ArrayList>(); for (Callable bgTask : bgTasks) { bgResults.add(bgExecutor.submit(bgTask)); } start(); List> results = executor.invokeAll(tasks); executor.shutdown(); boolean finished = executor.awaitTermination(10, TimeUnit.SECONDS); if (!finished) { System.out.format( "Benchmark %s was not finished before timeout.", benchmark); executor.shutdownNow(); } setFinished(true); bgExecutor.shutdown(); finished = bgExecutor.awaitTermination(10, TimeUnit.SECONDS); if (!finished) { System.out.format( "Benchmark %s was not finished before timeout.", benchmark); bgExecutor.shutdownNow(); } stop(benchmark, results, currentTaskId); } catch (InterruptedException e) { System.err.println(e); } } writeOpt.dispose(); readOpt.dispose(); } options.dispose(); db_.close(); } private void printHeader(Options options) { int kKeySize = 16; System.out.printf("Keys: %d bytes each\n", kKeySize); System.out.printf("Values: %d bytes each (%d bytes after compression)\n", valueSize_, (int) (valueSize_ * compressionRatio_ + 0.5)); System.out.printf("Entries: %d\n", num_); System.out.printf("RawSize: %.1f MB (estimated)\n", ((double)(kKeySize + valueSize_) * num_) / SizeUnit.MB); System.out.printf("FileSize: %.1f MB (estimated)\n", (((kKeySize + valueSize_ * compressionRatio_) * num_) / SizeUnit.MB)); System.out.format("Memtable Factory: %s%n", options.memTableFactoryName()); System.out.format("Prefix: %d bytes%n", prefixSize_); System.out.format("Compression: %s%n", compressionType_); printWarnings(); System.out.printf("------------------------------------------------\n"); } void printWarnings() { boolean assertsEnabled = false; assert assertsEnabled = true; // Intentional side effect!!! if (assertsEnabled) { System.out.printf( "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); } } private void open(Options options) throws RocksDBException { System.out.println("Using database directory: " + databaseDir_); db_ = RocksDB.open(options, databaseDir_); } private void start() { setFinished(false); startTime_ = System.nanoTime(); } private void stop( String benchmark, List> results, int concurrentThreads) { long endTime = System.nanoTime(); double elapsedSeconds = 1.0d * (endTime - startTime_) / TimeUnit.SECONDS.toNanos(1); Stats stats = new Stats(-1); int taskFinishedCount = 0; for (Future result : results) { if (result.isDone()) { try { Stats taskStats = result.get(3, TimeUnit.SECONDS); if (!result.isCancelled()) { taskFinishedCount++; } stats.merge(taskStats); } catch (Exception e) { // then it's not successful, the output will indicate this } } } String extra = ""; if (benchmark.indexOf("read") >= 0) { extra = String.format(" %d / %d found; ", stats.found_, stats.done_); } else { extra = String.format(" %d ops done; ", stats.done_); } System.out.printf( "%-16s : %11.5f micros/op; %6.1f MB/s;%s %d / %d task(s) finished.\n", benchmark, elapsedSeconds / stats.done_ * 1e6, (stats.bytes_ / 1048576.0) / elapsedSeconds, extra, taskFinishedCount, concurrentThreads); } public void generateKeyFromLong(byte[] slice, long n) { assert(n >= 0); int startPos = 0; if (keysPerPrefix_ > 0) { long numPrefix = (num_ + keysPerPrefix_ - 1) / keysPerPrefix_; long prefix = n % numPrefix; int bytesToFill = Math.min(prefixSize_, 8); for (int i = 0; i < bytesToFill; ++i) { slice[i] = (byte) (prefix % 256); prefix /= 256; } for (int i = 8; i < bytesToFill; ++i) { slice[i] = '0'; } startPos = bytesToFill; } for (int i = slice.length - 1; i >= startPos; --i) { slice[i] = (byte) ('0' + (n % 10)); n /= 10; } } private void destroyDb() { if (db_ != null) { db_.close(); } // TODO(yhchiang): develop our own FileUtil // FileUtil.deleteDir(databaseDir_); } private void printStats() { } static void printHelp() { System.out.println("usage:"); for (Flag flag : Flag.values()) { System.out.format(" --%s%n\t%s%n", flag.name(), flag.desc()); if (flag.getDefaultValue() != null) { System.out.format("\tDEFAULT: %s%n", flag.getDefaultValue().toString()); } } } public static void main(String[] args) throws Exception { Map flags = new EnumMap(Flag.class); for (Flag flag : Flag.values()) { if (flag.getDefaultValue() != null) { flags.put(flag, flag.getDefaultValue()); } } for (String arg : args) { boolean valid = false; if (arg.equals("--help") || arg.equals("-h")) { printHelp(); System.exit(0); } if (arg.startsWith("--")) { try { String[] parts = arg.substring(2).split("="); if (parts.length >= 1) { Flag key = Flag.valueOf(parts[0]); if (key != null) { Object value = null; if (parts.length >= 2) { value = key.parseValue(parts[1]); } flags.put(key, value); valid = true; } } } catch (Exception e) { } } if (!valid) { System.err.println("Invalid argument " + arg); System.exit(1); } } new DbBenchmark(flags).run(); } private enum Flag { benchmarks(Arrays.asList("fillseq", "readrandom", "fillrandom"), "Comma-separated list of operations to run in the specified order\n" + "\tActual benchmarks:\n" + "\t\tfillseq -- write N values in sequential key order in async mode.\n" + "\t\tfillrandom -- write N values in random key order in async mode.\n" + "\t\tfillbatch -- write N/1000 batch where each batch has 1000 values\n" + "\t\t in sequential key order in sync mode.\n" + "\t\tfillsync -- write N/100 values in random key order in sync mode.\n" + "\t\tfill100K -- write N/1000 100K values in random order in async mode.\n" + "\t\treadseq -- read N times sequentially.\n" + "\t\treadrandom -- read N times in random order.\n" + "\t\treadhot -- read N times in random order from 1% section of DB.\n" + "\t\treadwhilewriting -- measure the read performance of multiple readers\n" + "\t\t with a bg single writer. The write rate of the bg\n" + "\t\t is capped by --writes_per_second.\n" + "\tMeta Operations:\n" + "\t\tdelete -- delete DB") { @Override public Object parseValue(String value) { return new ArrayList(Arrays.asList(value.split(","))); } }, compression_ratio(0.5d, "Arrange to generate values that shrink to this fraction of\n" + "\ttheir original size after compression.") { @Override public Object parseValue(String value) { return Double.parseDouble(value); } }, use_existing_db(false, "If true, do not destroy the existing database. If you set this\n" + "\tflag and also specify a benchmark that wants a fresh database,\n" + "\tthat benchmark will fail.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, num(1000000, "Number of key/values to place in database.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, threads(1, "Number of concurrent threads to run.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, reads(null, "Number of read operations to do. If negative, do --nums reads.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, key_size(16, "The size of each key in bytes.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, value_size(100, "The size of each value in bytes.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, write_buffer_size(4L * SizeUnit.MB, "Number of bytes to buffer in memtable before compacting\n" + "\t(initialized to default value by 'main'.)") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, max_write_buffer_number(2, "The number of in-memory memtables. Each memtable is of size\n" + "\twrite_buffer_size.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, prefix_size(0, "Controls the prefix size for HashSkipList, HashLinkedList,\n" + "\tand plain table.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, keys_per_prefix(0, "Controls the average number of keys generated\n" + "\tper prefix, 0 means no special handling of the prefix,\n" + "\ti.e. use the prefix comes with the generated random number.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, memtablerep("skip_list", "The memtable format. Available options are\n" + "\tskip_list,\n" + "\tvector,\n" + "\thash_linkedlist,\n" + "\thash_skiplist (prefix_hash.)") { @Override public Object parseValue(String value) { return value; } }, hash_bucket_count(SizeUnit.MB, "The number of hash buckets used in the hash-bucket-based\n" + "\tmemtables. Memtables that currently support this argument are\n" + "\thash_linkedlist and hash_skiplist.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, writes_per_second(10000, "The write-rate of the background writer used in the\n" + "\t`readwhilewriting` benchmark. Non-positive number indicates\n" + "\tusing an unbounded write-rate in `readwhilewriting` benchmark.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, use_plain_table(false, "Use plain-table sst format.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, cache_size(-1L, "Number of bytes to use as a cache of uncompressed data.\n" + "\tNegative means use default settings.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, seed(0L, "Seed base for random number generators.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, num_levels(7, "The total number of levels.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, numdistinct(1000L, "Number of distinct keys to use. Used in RandomWithVerify to\n" + "\tread/write on fewer keys so that gets are more likely to find the\n" + "\tkey and puts are more likely to update the same key.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, merge_keys(-1L, "Number of distinct keys to use for MergeRandom and\n" + "\tReadRandomMergeRandom.\n" + "\tIf negative, there will be FLAGS_num keys.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, bloom_locality(0,"Control bloom filter probes locality.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, duration(0,"Time in seconds for the random-ops tests to run.\n" + "\tWhen 0 then num & reads determine the test duration.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, num_multi_db(0, "Number of DBs used in the benchmark. 0 means single DB.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, histogram(false,"Print histogram of operation timings.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, min_write_buffer_number_to_merge( defaultOptions_.minWriteBufferNumberToMerge(), "The minimum number of write buffers that will be merged together\n" + "\tbefore writing to storage. This is cheap because it is an\n" + "\tin-memory merge. If this feature is not enabled, then all these\n" + "\twrite buffers are flushed to L0 as separate files and this\n" + "\tincreases read amplification because a get request has to check\n" + "\tin all of these files. Also, an in-memory merge may result in\n" + "\twriting less data to storage if there are duplicate records\n" + "\tin each of these individual write buffers.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, max_background_compactions( defaultOptions_.maxBackgroundCompactions(), "The maximum number of concurrent background compactions\n" + "\tthat can occur in parallel.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, max_background_flushes( defaultOptions_.maxBackgroundFlushes(), "The maximum number of concurrent background flushes\n" + "\tthat can occur in parallel.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, max_background_jobs(defaultOptions_.maxBackgroundJobs(), "The maximum number of concurrent background jobs\n" + "\tthat can occur in parallel.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, /* TODO(yhchiang): enable the following compaction_style((int32_t) defaultOptions_.compactionStyle(), "style of compaction: level-based vs universal.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } },*/ universal_size_ratio(0, "Percentage flexibility while comparing file size\n" + "\t(for universal compaction only).") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, universal_min_merge_width(0,"The minimum number of files in a\n" + "\tsingle compaction run (for universal compaction only).") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, universal_max_merge_width(0,"The max number of files to compact\n" + "\tin universal style compaction.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, universal_max_size_amplification_percent(0, "The max size amplification for universal style compaction.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, universal_compression_size_percent(-1, "The percentage of the database to compress for universal\n" + "\tcompaction. -1 means compress everything.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, block_size(defaultBlockBasedTableOptions_.blockSize(), "Number of bytes in a block.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, compressed_cache_size(-1L, "Number of bytes to use as a cache of compressed data.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, open_files(defaultOptions_.maxOpenFiles(), "Maximum number of files to keep open at the same time\n" + "\t(use default if == 0)") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, bloom_bits(-1,"Bloom filter bits per key. Negative means\n" + "\tuse default settings.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, memtable_bloom_size_ratio(0.0d, "Ratio of memtable used by the bloom filter.\n" + "\t0 means no bloom filter.") { @Override public Object parseValue(String value) { return Double.parseDouble(value); } }, cache_numshardbits(-1,"Number of shards for the block cache\n" + "\tis 2 ** cache_numshardbits. Negative means use default settings.\n" + "\tThis is applied only if FLAGS_cache_size is non-negative.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, verify_checksum(false,"Verify checksum for every block read\n" + "\tfrom storage.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, statistics(false,"Database statistics.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, writes(-1L, "Number of write operations to do. If negative, do\n" + "\t--num reads.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, sync(false,"Sync all writes to disk.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, use_fsync(false,"If true, issue fsync instead of fdatasync.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, disable_wal(false,"If true, do not write WAL for write.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, wal_dir("", "If not empty, use the given dir for WAL.") { @Override public Object parseValue(String value) { return value; } }, target_file_size_base(2 * 1048576,"Target file size at level-1") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, target_file_size_multiplier(1, "A multiplier to compute target level-N file size (N >= 2)") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, max_bytes_for_level_base(10 * 1048576, "Max bytes for level-1") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, max_bytes_for_level_multiplier(10.0d, "A multiplier to compute max bytes for level-N (N >= 2)") { @Override public Object parseValue(String value) { return Double.parseDouble(value); } }, level0_stop_writes_trigger(12,"Number of files in level-0\n" + "\tthat will trigger put stop.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, level0_slowdown_writes_trigger(8,"Number of files in level-0\n" + "\tthat will slow down writes.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, level0_file_num_compaction_trigger(4,"Number of files in level-0\n" + "\twhen compactions start.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, readwritepercent(90,"Ratio of reads to reads/writes (expressed\n" + "\tas percentage) for the ReadRandomWriteRandom workload. The\n" + "\tdefault value 90 means 90% operations out of all reads and writes\n" + "\toperations are reads. In other words, 9 gets for every 1 put.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, mergereadpercent(70,"Ratio of merges to merges&reads (expressed\n" + "\tas percentage) for the ReadRandomMergeRandom workload. The\n" + "\tdefault value 70 means 70% out of all read and merge operations\n" + "\tare merges. In other words, 7 merges for every 3 gets.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, deletepercent(2,"Percentage of deletes out of reads/writes/\n" + "\tdeletes (used in RandomWithVerify only). RandomWithVerify\n" + "\tcalculates writepercent as (100 - FLAGS_readwritepercent -\n" + "\tdeletepercent), so deletepercent must be smaller than (100 -\n" + "\tFLAGS_readwritepercent)") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, delete_obsolete_files_period_micros(0,"Option to delete\n" + "\tobsolete files periodically. 0 means that obsolete files are\n" + "\tdeleted after every compaction run.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, compression_type("snappy", "Algorithm used to compress the database.") { @Override public Object parseValue(String value) { return value; } }, compression_level(-1, "Compression level. For zlib this should be -1 for the\n" + "\tdefault level, or between 0 and 9.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, min_level_to_compress(-1,"If non-negative, compression starts\n" + "\tfrom this level. Levels with number < min_level_to_compress are\n" + "\tnot compressed. Otherwise, apply compression_type to\n" + "\tall levels.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, table_cache_numshardbits(4,"") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, stats_interval(0L, "Stats are reported every N operations when\n" + "\tthis is greater than zero. When 0 the interval grows over time.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, stats_per_interval(0,"Reports additional stats per interval when\n" + "\tthis is greater than 0.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, perf_level(0,"Level of perf collection.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, soft_rate_limit(0.0d,"") { @Override public Object parseValue(String value) { return Double.parseDouble(value); } }, hard_rate_limit(0.0d,"When not equal to 0 this make threads\n" + "\tsleep at each stats reporting interval until the compaction\n" + "\tscore for all levels is less than or equal to this value.") { @Override public Object parseValue(String value) { return Double.parseDouble(value); } }, rate_limit_delay_max_milliseconds(1000, "When hard_rate_limit is set then this is the max time a put will\n" + "\tbe stalled.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, max_compaction_bytes(0L, "Limit number of bytes in one compaction to be lower than this\n" + "\threshold. But it's not guaranteed.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, readonly(false,"Run read only benchmarks.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, disable_auto_compactions(false,"Do not auto trigger compactions.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, wal_ttl_seconds(0L,"Set the TTL for the WAL Files in seconds.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, wal_size_limit_MB(0L,"Set the size limit for the WAL Files\n" + "\tin MB.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, /* TODO(yhchiang): enable the following direct_reads(rocksdb::EnvOptions().use_direct_reads, "Allow direct I/O reads.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, direct_writes(rocksdb::EnvOptions().use_direct_reads, "Allow direct I/O reads.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, */ mmap_read(false, "Allow reads to occur via mmap-ing files.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, mmap_write(false, "Allow writes to occur via mmap-ing files.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, advise_random_on_open(defaultOptions_.adviseRandomOnOpen(), "Advise random access on table file open.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, compaction_fadvice("NORMAL", "Access pattern advice when a file is compacted.") { @Override public Object parseValue(String value) { return value; } }, use_tailing_iterator(false, "Use tailing iterator to access a series of keys instead of get.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, use_adaptive_mutex(defaultOptions_.useAdaptiveMutex(), "Use adaptive mutex.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, bytes_per_sync(defaultOptions_.bytesPerSync(), "Allows OS to incrementally sync files to disk while they are\n" + "\tbeing written, in the background. Issue one request for every\n" + "\tbytes_per_sync written. 0 turns it off.") { @Override public Object parseValue(String value) { return Long.parseLong(value); } }, filter_deletes(false," On true, deletes use bloom-filter and drop\n" + "\tthe delete if key not present.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, max_successive_merges(0,"Maximum number of successive merge\n" + "\toperations on a key in the memtable.") { @Override public Object parseValue(String value) { return Integer.parseInt(value); } }, db(getTempDir("rocksdb-jni"), "Use the db with the following name.") { @Override public Object parseValue(String value) { return value; } }, use_mem_env(false, "Use RocksMemEnv instead of default filesystem based\n" + "environment.") { @Override public Object parseValue(String value) { return parseBoolean(value); } }, java_comparator(null, "Class name of a Java Comparator to use instead\n" + "\tof the default C++ ByteWiseComparatorImpl. Must be available on\n" + "\tthe classpath") { @Override protected Object parseValue(final String value) { try { final ComparatorOptions copt = new ComparatorOptions(); final Class clsComparator = (Class)Class.forName(value); final Constructor cstr = clsComparator.getConstructor(ComparatorOptions.class); return cstr.newInstance(copt); } catch(final ClassNotFoundException cnfe) { throw new IllegalArgumentException("Java Comparator '" + value + "'" + " not found on the classpath", cnfe); } catch(final NoSuchMethodException nsme) { throw new IllegalArgumentException("Java Comparator '" + value + "'" + " does not have a public ComparatorOptions constructor", nsme); } catch(final IllegalAccessException | InstantiationException | InvocationTargetException ie) { throw new IllegalArgumentException("Unable to construct Java" + " Comparator '" + value + "'", ie); } } }; private Flag(Object defaultValue, String desc) { defaultValue_ = defaultValue; desc_ = desc; } public Object getDefaultValue() { return defaultValue_; } public String desc() { return desc_; } public boolean parseBoolean(String value) { if (value.equals("1")) { return true; } else if (value.equals("0")) { return false; } return Boolean.parseBoolean(value); } protected abstract Object parseValue(String value); private final Object defaultValue_; private final String desc_; } private final static String DEFAULT_TEMP_DIR = "/tmp"; private static String getTempDir(final String dirName) { try { return Files.createTempDirectory(dirName).toAbsolutePath().toString(); } catch(final IOException ioe) { System.err.println("Unable to create temp directory, defaulting to: " + DEFAULT_TEMP_DIR); return DEFAULT_TEMP_DIR + File.pathSeparator + dirName; } } private static class RandomGenerator { private final byte[] data_; private int dataLength_; private int position_; private double compressionRatio_; Random rand_; private RandomGenerator(long seed, double compressionRatio) { // We use a limited amount of data over and over again and ensure // that it is larger than the compression window (32KB), and also byte[] value = new byte[100]; // large enough to serve all typical value sizes we want to write. rand_ = new Random(seed); dataLength_ = value.length * 10000; data_ = new byte[dataLength_]; compressionRatio_ = compressionRatio; int pos = 0; while (pos < dataLength_) { compressibleBytes(value); System.arraycopy(value, 0, data_, pos, Math.min(value.length, dataLength_ - pos)); pos += value.length; } } private void compressibleBytes(byte[] value) { int baseLength = value.length; if (compressionRatio_ < 1.0d) { baseLength = (int) (compressionRatio_ * value.length + 0.5); } if (baseLength <= 0) { baseLength = 1; } int pos; for (pos = 0; pos < baseLength; ++pos) { value[pos] = (byte) (' ' + rand_.nextInt(95)); // ' ' .. '~' } while (pos < value.length) { System.arraycopy(value, 0, value, pos, Math.min(baseLength, value.length - pos)); pos += baseLength; } } private void generate(byte[] value) { if (position_ + value.length > data_.length) { position_ = 0; assert(value.length <= data_.length); } position_ += value.length; System.arraycopy(data_, position_ - value.length, value, 0, value.length); } } boolean isFinished() { synchronized(finishLock_) { return isFinished_; } } void setFinished(boolean flag) { synchronized(finishLock_) { isFinished_ = flag; } } RocksDB db_; final List benchmarks_; final int num_; final int reads_; final int keySize_; final int valueSize_; final int threadNum_; final int writesPerSeconds_; final long randSeed_; final boolean useExisting_; final String databaseDir_; double compressionRatio_; RandomGenerator gen_; long startTime_; // env boolean useMemenv_; // memtable related final int maxWriteBufferNumber_; final int prefixSize_; final int keysPerPrefix_; final String memtable_; final long hashBucketCount_; // sst format related boolean usePlainTable_; Object finishLock_; boolean isFinished_; Map flags_; // as the scope of a static member equals to the scope of the problem, // we let its c++ pointer to be disposed in its finalizer. static Options defaultOptions_ = new Options(); static BlockBasedTableConfig defaultBlockBasedTableOptions_ = new BlockBasedTableConfig(); String compressionType_; CompressionType compression_; } rocksdb-6.11.4/java/crossbuild/000077500000000000000000000000001370372246700163375ustar00rootroot00000000000000rocksdb-6.11.4/java/crossbuild/Vagrantfile000066400000000000000000000030071370372246700205240ustar00rootroot00000000000000# -*- mode: ruby -*- # vi: set ft=ruby : # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! VAGRANTFILE_API_VERSION = "2" Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.define "linux32" do |linux32| linux32.vm.box = "bento/centos-6.10-i386" linux32.vm.provision :shell, path: "build-linux-centos.sh" end config.vm.define "linux64" do |linux64| linux64.vm.box = "bento/centos-6.10" linux64.vm.provision :shell, path: "build-linux-centos.sh" end config.vm.define "linux32-musl" do |musl32| musl32.vm.box = "alpine/alpine32" musl32.vm.box_version = "3.6.0" musl32.vm.provision :shell, path: "build-linux-alpine.sh" end config.vm.define "linux64-musl" do |musl64| musl64.vm.box = "generic/alpine36" ## Should use the alpine/alpine64 box, but this issue needs to be fixed first - https://github.com/hashicorp/vagrant/issues/11218 # musl64.vm.box = "alpine/alpine64" # musl64.vm.box_version = "3.6.0" musl64.vm.provision :shell, path: "build-linux-alpine.sh" end config.vm.provider "virtualbox" do |v| v.memory = 2048 v.cpus = 4 v.customize ["modifyvm", :id, "--nictype1", "virtio" ] end if Vagrant.has_plugin?("vagrant-cachier") config.cache.scope = :box end if Vagrant.has_plugin?("vagrant-vbguest") config.vbguest.no_install = true end config.vm.synced_folder "../target", "/rocksdb-build" config.vm.synced_folder "../..", "/rocksdb", type: "rsync" config.vm.boot_timeout = 1200 end rocksdb-6.11.4/java/crossbuild/build-linux-alpine.sh000077500000000000000000000030561370372246700224040ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. set -e # update Alpine with latest versions echo '@edge http://nl.alpinelinux.org/alpine/edge/main' >> /etc/apk/repositories echo '@community http://nl.alpinelinux.org/alpine/edge/community' >> /etc/apk/repositories apk update apk upgrade # install CA certificates apk add ca-certificates # install build tools apk add \ build-base \ coreutils \ file \ git \ perl \ automake \ autoconf \ cmake # install tool dependencies for building RocksDB static library apk add \ curl \ bash \ wget \ tar \ openssl # install RocksDB dependencies apk add \ snappy snappy-dev \ zlib zlib-dev \ bzip2 bzip2-dev \ lz4 lz4-dev \ zstd zstd-dev \ linux-headers \ jemalloc jemalloc-dev # install OpenJDK7 apk add openjdk7 \ && apk add java-cacerts \ && rm /usr/lib/jvm/java-1.7-openjdk/jre/lib/security/cacerts \ && ln -s /etc/ssl/certs/java/cacerts /usr/lib/jvm/java-1.7-openjdk/jre/lib/security/cacerts # cleanup rm -rf /var/cache/apk/* # puts javac in the PATH export JAVA_HOME=/usr/lib/jvm/java-1.7-openjdk export PATH=/usr/lib/jvm/java-1.7-openjdk/bin:$PATH # gflags from source cd /tmp &&\ git clone -b v2.0 --single-branch https://github.com/gflags/gflags.git &&\ cd gflags &&\ ./configure --prefix=/usr && make && make install &&\ rm -rf /tmp/* # build rocksdb cd /rocksdb make jclean clean PORTABLE=1 make -j8 rocksdbjavastatic cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build rocksdb-6.11.4/java/crossbuild/build-linux-centos.sh000077500000000000000000000034611370372246700224270ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. set -e # remove fixed relesever variable present in the hanscode boxes sudo rm -f /etc/yum/vars/releasever # enable EPEL sudo yum -y install epel-release # install all required packages for rocksdb that are available through yum sudo yum -y install openssl java-1.7.0-openjdk-devel zlib-devel bzip2-devel lz4-devel snappy-devel libzstd-devel jemalloc-devel cmake3 # set up cmake3 as cmake binary sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake 10 --slave /usr/local/bin/ctest ctest /usr/bin/ctest --slave /usr/local/bin/cpack cpack /usr/bin/cpack --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake3 20 --slave /usr/local/bin/ctest ctest /usr/bin/ctest3 --slave /usr/local/bin/cpack cpack /usr/bin/cpack3 --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake3 # install gcc/g++ 4.8.2 from tru/devtools-2 sudo wget -O /etc/yum.repos.d/devtools-2.repo https://people.centos.org/tru/devtools-2/devtools-2.repo sudo yum -y install devtoolset-2-binutils devtoolset-2-gcc devtoolset-2-gcc-c++ # install gflags wget https://github.com/gflags/gflags/archive/v2.0.tar.gz -O gflags-2.0.tar.gz tar xvfz gflags-2.0.tar.gz; cd gflags-2.0; scl enable devtoolset-2 ./configure; scl enable devtoolset-2 make; sudo make install export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib # set java home so we can build rocksdb jars export JAVA_HOME=/usr/lib/jvm/java-1.7.0 export PATH=$JAVA_HOME:/usr/local/bin:$PATH # build rocksdb cd /rocksdb scl enable devtoolset-2 'make clean-not-downloaded' scl enable devtoolset-2 'PORTABLE=1 make -j8 rocksdbjavastatic' cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build rocksdb-6.11.4/java/crossbuild/build-linux.sh000077500000000000000000000010421370372246700211270ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # install all required packages for rocksdb sudo apt-get update sudo apt-get -y install git make gcc g++ libgflags-dev libsnappy-dev zlib1g-dev libbz2-dev default-jdk # set java home so we can build rocksdb jars export JAVA_HOME=$(echo /usr/lib/jvm/java-7-openjdk*) cd /rocksdb make jclean clean make -j 4 rocksdbjavastatic cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build sudo shutdown -h now rocksdb-6.11.4/java/crossbuild/docker-build-linux-alpine.sh000077500000000000000000000006661370372246700236550ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. set -e #set -x # just in-case this is run outside Docker mkdir -p /rocksdb-local-build rm -rf /rocksdb-local-build/* cp -r /rocksdb-host/* /rocksdb-local-build cd /rocksdb-local-build make clean-not-downloaded PORTABLE=1 make rocksdbjavastatic cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar /rocksdb-java-target rocksdb-6.11.4/java/crossbuild/docker-build-linux-centos.sh000077500000000000000000000016601370372246700236730ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. set -e #set -x # just in-case this is run outside Docker mkdir -p /rocksdb-local-build rm -rf /rocksdb-local-build/* cp -r /rocksdb-host/* /rocksdb-local-build cd /rocksdb-local-build # Use scl devtoolset if available if hash scl 2>/dev/null; then if scl --list | grep -q 'devtoolset-7'; then # CentOS 7+ scl enable devtoolset-7 'make clean-not-downloaded' scl enable devtoolset-7 'PORTABLE=1 make -j2 rocksdbjavastatic' elif scl --list | grep -q 'devtoolset-2'; then # CentOS 5 or 6 scl enable devtoolset-2 'make clean-not-downloaded' scl enable devtoolset-2 'PORTABLE=1 make -j2 rocksdbjavastatic' else echo "Could not find devtoolset" exit 1; fi else make clean-not-downloaded PORTABLE=1 make -j2 rocksdbjavastatic fi cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar /rocksdb-java-target rocksdb-6.11.4/java/jdb_bench.sh000077500000000000000000000007141370372246700164250ustar00rootroot00000000000000# shellcheck disable=SC2148 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. PLATFORM=64 if [ `getconf LONG_BIT` != "64" ] then PLATFORM=32 fi ROCKS_JAR=`find target -name rocksdbjni*.jar` echo "Running benchmark in $PLATFORM-Bit mode." # shellcheck disable=SC2068 java -server -d$PLATFORM -XX:NewSize=4m -XX:+AggressiveOpts -Djava.library.path=target -cp "${ROCKS_JAR}:benchmark/target/classes" org.rocksdb.benchmark.DbBenchmark $@ rocksdb-6.11.4/java/jmh/000077500000000000000000000000001370372246700147445ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/LICENSE-HEADER.txt000066400000000000000000000003721370372246700175170ustar00rootroot00000000000000Copyright (c) 2011-present, Facebook, Inc. All rights reserved. This source code is licensed under both the GPLv2 (found in the COPYING file in the root directory) and Apache 2.0 License (found in the LICENSE.Apache file in the root directory). rocksdb-6.11.4/java/jmh/README.md000066400000000000000000000012771370372246700162320ustar00rootroot00000000000000# JMH Benchmarks for RocksJava These are micro-benchmarks for RocksJava functionality, using [JMH (Java Microbenchmark Harness)](https://openjdk.java.net/projects/code-tools/jmh/). ## Compiling **Note**: This uses a specific build of RocksDB that is set in the `` element of the `dependencies` section of the `pom.xml` file. If you are testing local changes you should build and install a SNAPSHOT version of rocksdbjni, and update the `pom.xml` of rocksdbjni-jmh file to test with this. ```bash $ mvn package ``` ## Running ```bash $ java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar ``` NOTE: you can append `-help` to the command above to see all of the JMH runtime options. rocksdb-6.11.4/java/jmh/pom.xml000066400000000000000000000124461370372246700162700ustar00rootroot00000000000000 4.0.0 org.rocksdb rocksdbjni-jmh 1.0-SNAPSHOT http://rocksdb.org/ rocksdbjni-jmh JMH Benchmarks for RocksDB Java API Facebook, Inc. https://www.facebook.com Apache License 2.0 http://www.apache.org/licenses/LICENSE-2.0.html repo GNU General Public License, version 2 http://www.gnu.org/licenses/gpl-2.0.html repo scm:git:git://github.com/facebook/rocksdb.git scm:git:git@github.com:facebook/rocksdb.git http://github.com/facebook/rocksdb/ 1.7 1.7 UTF-8 1.22 benchmarks org.rocksdb rocksdbjni 6.6.0-SNAPSHOT org.openjdk.jmh jmh-core ${jmh.version} org.openjdk.jmh jmh-generator-annprocess ${jmh.version} provided org.apache.maven.plugins maven-compiler-plugin 3.8.1 ${project.build.source} ${project.build.target} ${project.build.sourceEncoding} com.mycila license-maven-plugin 3.0 true
LICENSE-HEADER.txt
true true true pom.xml ${project.build.sourceEncoding}
org.apache.maven.plugins maven-shade-plugin 3.2.1 package shade ${project.artifactId}-${project.version}-${uberjar.name} org.openjdk.jmh.Main *:* META-INF/*.SF META-INF/*.DSA META-INF/*.RSA
rocksdb-6.11.4/java/jmh/src/000077500000000000000000000000001370372246700155335ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/src/main/000077500000000000000000000000001370372246700164575ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/src/main/java/000077500000000000000000000000001370372246700174005ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/src/main/java/org/000077500000000000000000000000001370372246700201675ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/000077500000000000000000000000001370372246700216165ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/jmh/000077500000000000000000000000001370372246700223745ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java000066400000000000000000000112071370372246700273450ustar00rootroot00000000000000/** * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. * This source code is licensed under both the GPLv2 (found in the * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ package org.rocksdb.jmh; import org.openjdk.jmh.annotations.*; import org.rocksdb.*; import org.rocksdb.util.BytewiseComparator; import org.rocksdb.util.FileUtils; import org.rocksdb.util.ReverseBytewiseComparator; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.concurrent.atomic.AtomicInteger; import static org.rocksdb.util.KVUtils.ba; @State(Scope.Benchmark) public class ComparatorBenchmarks { @Param({ "native_bytewise", "native_reverse_bytewise", "java_bytewise_non-direct_reused-64_adaptive-mutex", "java_bytewise_non-direct_reused-64_non-adaptive-mutex", "java_bytewise_non-direct_reused-64_thread-local", "java_bytewise_direct_reused-64_adaptive-mutex", "java_bytewise_direct_reused-64_non-adaptive-mutex", "java_bytewise_direct_reused-64_thread-local", "java_bytewise_non-direct_no-reuse", "java_bytewise_direct_no-reuse", "java_reverse_bytewise_non-direct_reused-64_adaptive-mutex", "java_reverse_bytewise_non-direct_reused-64_non-adaptive-mutex", "java_reverse_bytewise_non-direct_reused-64_thread-local", "java_reverse_bytewise_direct_reused-64_adaptive-mutex", "java_reverse_bytewise_direct_reused-64_non-adaptive-mutex", "java_reverse_bytewise_direct_reused-64_thread-local", "java_reverse_bytewise_non-direct_no-reuse", "java_reverse_bytewise_direct_no-reuse" }) public String comparatorName; Path dbDir; ComparatorOptions comparatorOptions; AbstractComparator comparator; Options options; RocksDB db; @Setup(Level.Trial) public void setup() throws IOException, RocksDBException { RocksDB.loadLibrary(); dbDir = Files.createTempDirectory("rocksjava-comparator-benchmarks"); options = new Options() .setCreateIfMissing(true); if ("native_bytewise".equals(comparatorName)) { options.setComparator(BuiltinComparator.BYTEWISE_COMPARATOR); } else if ("native_reverse_bytewise".equals(comparatorName)) { options.setComparator(BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR); } else if (comparatorName.startsWith("java_")) { comparatorOptions = new ComparatorOptions(); if (comparatorName.indexOf("non-direct") > -1) { comparatorOptions.setUseDirectBuffer(false); } else if (comparatorName.indexOf("direct") > -1) { comparatorOptions.setUseDirectBuffer(true); } if (comparatorName.indexOf("no-reuse") > -1) { comparatorOptions.setMaxReusedBufferSize(-1); } else if (comparatorName.indexOf("_reused-") > -1) { final int idx = comparatorName.indexOf("_reused-"); String s = comparatorName.substring(idx + 8); s = s.substring(0, s.indexOf('_')); comparatorOptions.setMaxReusedBufferSize(Integer.parseInt(s)); } if (comparatorName.indexOf("non-adaptive-mutex") > -1) { comparatorOptions.setReusedSynchronisationType(ReusedSynchronisationType.MUTEX); } else if (comparatorName.indexOf("adaptive-mutex") > -1) { comparatorOptions.setReusedSynchronisationType(ReusedSynchronisationType.ADAPTIVE_MUTEX); } else if (comparatorName.indexOf("thread-local") > -1) { comparatorOptions.setReusedSynchronisationType(ReusedSynchronisationType.THREAD_LOCAL); } if (comparatorName.startsWith("java_bytewise")) { comparator = new BytewiseComparator(comparatorOptions); } else if (comparatorName.startsWith("java_reverse_bytewise")) { comparator = new ReverseBytewiseComparator(comparatorOptions); } options.setComparator(comparator); } else { throw new IllegalArgumentException("Unknown comparatorName: " + comparatorName); } db = RocksDB.open(options, dbDir.toAbsolutePath().toString()); } @TearDown(Level.Trial) public void cleanup() throws IOException { db.close(); if (comparator != null) { comparator.close(); } if (comparatorOptions != null) { comparatorOptions.close(); } options.close(); FileUtils.delete(dbDir); } @State(Scope.Benchmark) public static class Counter { private final AtomicInteger count = new AtomicInteger(); public int next() { return count.getAndIncrement(); } } @Benchmark public void put(final Counter counter) throws RocksDBException { final int i = counter.next(); db.put(ba("key" + i), ba("value" + i)); } } rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java000066400000000000000000000071501370372246700257570ustar00rootroot00000000000000/** * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. * This source code is licensed under both the GPLv2 (found in the * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ package org.rocksdb.jmh; import org.openjdk.jmh.annotations.*; import org.rocksdb.*; import org.rocksdb.util.FileUtils; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import static org.rocksdb.util.KVUtils.ba; @State(Scope.Benchmark) public class GetBenchmarks { @Param({ "no_column_family", "1_column_family", "20_column_families", "100_column_families" }) String columnFamilyTestType; @Param("100000") int keyCount; Path dbDir; DBOptions options; int cfs = 0; // number of column families private AtomicInteger cfHandlesIdx; ColumnFamilyHandle[] cfHandles; RocksDB db; private final AtomicInteger keyIndex = new AtomicInteger(); @Setup(Level.Trial) public void setup() throws IOException, RocksDBException { RocksDB.loadLibrary(); dbDir = Files.createTempDirectory("rocksjava-get-benchmarks"); options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final List cfDescriptors = new ArrayList<>(); cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); if ("1_column_family".equals(columnFamilyTestType)) { cfs = 1; } else if ("20_column_families".equals(columnFamilyTestType)) { cfs = 20; } else if ("100_column_families".equals(columnFamilyTestType)) { cfs = 100; } if (cfs > 0) { cfHandlesIdx = new AtomicInteger(1); for (int i = 1; i <= cfs; i++) { cfDescriptors.add(new ColumnFamilyDescriptor(ba("cf" + i))); } } final List cfHandlesList = new ArrayList<>(cfDescriptors.size()); db = RocksDB.open(options, dbDir.toAbsolutePath().toString(), cfDescriptors, cfHandlesList); cfHandles = cfHandlesList.toArray(new ColumnFamilyHandle[0]); // store initial data for retrieving via get for (int i = 0; i < cfs; i++) { for (int j = 0; j < keyCount; j++) { db.put(cfHandles[i], ba("key" + j), ba("value" + j)); } } try (final FlushOptions flushOptions = new FlushOptions() .setWaitForFlush(true)) { db.flush(flushOptions); } } @TearDown(Level.Trial) public void cleanup() throws IOException { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } db.close(); options.close(); FileUtils.delete(dbDir); } private ColumnFamilyHandle getColumnFamily() { if (cfs == 0) { return cfHandles[0]; } else if (cfs == 1) { return cfHandles[1]; } else { int idx = cfHandlesIdx.getAndIncrement(); if (idx > cfs) { cfHandlesIdx.set(1); // doesn't ensure a perfect distribution, but it's ok idx = 0; } return cfHandles[idx]; } } /** * Takes the next position in the index. */ private int next() { int idx; int nextIdx; while (true) { idx = keyIndex.get(); nextIdx = idx + 1; if (nextIdx >= keyCount) { nextIdx = 0; } if (keyIndex.compareAndSet(idx, nextIdx)) { break; } } return idx; } @Benchmark public byte[] get() throws RocksDBException { final int keyIdx = next(); return db.get(getColumnFamily(), ba("key" + keyIdx)); } } rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java000066400000000000000000000101621370372246700267670ustar00rootroot00000000000000/** * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. * This source code is licensed under both the GPLv2 (found in the * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ package org.rocksdb.jmh; import org.openjdk.jmh.annotations.*; import org.rocksdb.*; import org.rocksdb.util.FileUtils; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import static org.rocksdb.util.KVUtils.ba; import static org.rocksdb.util.KVUtils.keys; @State(Scope.Benchmark) public class MultiGetBenchmarks { @Param({ "no_column_family", "1_column_family", "20_column_families", "100_column_families" }) String columnFamilyTestType; @Param("100000") int keyCount; @Param({ "10", "100", "1000", "10000", }) int multiGetSize; Path dbDir; DBOptions options; int cfs = 0; // number of column families private AtomicInteger cfHandlesIdx; ColumnFamilyHandle[] cfHandles; RocksDB db; private final AtomicInteger keyIndex = new AtomicInteger(); @Setup(Level.Trial) public void setup() throws IOException, RocksDBException { RocksDB.loadLibrary(); dbDir = Files.createTempDirectory("rocksjava-multiget-benchmarks"); options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final List cfDescriptors = new ArrayList<>(); cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); if ("1_column_family".equals(columnFamilyTestType)) { cfs = 1; } else if ("20_column_families".equals(columnFamilyTestType)) { cfs = 20; } else if ("100_column_families".equals(columnFamilyTestType)) { cfs = 100; } if (cfs > 0) { cfHandlesIdx = new AtomicInteger(1); for (int i = 1; i <= cfs; i++) { cfDescriptors.add(new ColumnFamilyDescriptor(ba("cf" + i))); } } final List cfHandlesList = new ArrayList<>(cfDescriptors.size()); db = RocksDB.open(options, dbDir.toAbsolutePath().toString(), cfDescriptors, cfHandlesList); cfHandles = cfHandlesList.toArray(new ColumnFamilyHandle[0]); // store initial data for retrieving via get for (int i = 0; i < cfs; i++) { for (int j = 0; j < keyCount; j++) { db.put(cfHandles[i], ba("key" + j), ba("value" + j)); } } try (final FlushOptions flushOptions = new FlushOptions() .setWaitForFlush(true)) { db.flush(flushOptions); } } @TearDown(Level.Trial) public void cleanup() throws IOException { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } db.close(); options.close(); FileUtils.delete(dbDir); } private ColumnFamilyHandle getColumnFamily() { if (cfs == 0) { return cfHandles[0]; } else if (cfs == 1) { return cfHandles[1]; } else { int idx = cfHandlesIdx.getAndIncrement(); if (idx > cfs) { cfHandlesIdx.set(1); // doesn't ensure a perfect distribution, but it's ok idx = 0; } return cfHandles[idx]; } } /** * Reserves the next {@inc} positions in the index. * * @param inc the number by which to increment the index * @param limit the limit for the index * @return the index before {@code inc} is added */ private int next(final int inc, final int limit) { int idx; int nextIdx; while (true) { idx = keyIndex.get(); nextIdx = idx + inc; if (nextIdx >= limit) { nextIdx = inc; } if (keyIndex.compareAndSet(idx, nextIdx)) { break; } } if (nextIdx >= limit) { return -1; } else { return idx; } } @Benchmark public List multiGet10() throws RocksDBException { final int fromKeyIdx = next(multiGetSize, keyCount); final List keys = keys(fromKeyIdx, fromKeyIdx + multiGetSize); return db.multiGetAsList(keys); } } rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java000066400000000000000000000061121370372246700260050ustar00rootroot00000000000000/** * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. * This source code is licensed under both the GPLv2 (found in the * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ package org.rocksdb.jmh; import org.openjdk.jmh.annotations.*; import org.rocksdb.*; import org.rocksdb.util.FileUtils; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import static org.rocksdb.util.KVUtils.ba; @State(Scope.Benchmark) public class PutBenchmarks { @Param({ "no_column_family", "1_column_family", "20_column_families", "100_column_families" }) String columnFamilyTestType; Path dbDir; DBOptions options; int cfs = 0; // number of column families private AtomicInteger cfHandlesIdx; ColumnFamilyHandle[] cfHandles; RocksDB db; @Setup(Level.Trial) public void setup() throws IOException, RocksDBException { RocksDB.loadLibrary(); dbDir = Files.createTempDirectory("rocksjava-put-benchmarks"); options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final List cfDescriptors = new ArrayList<>(); cfDescriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); if ("1_column_family".equals(columnFamilyTestType)) { cfs = 1; } else if ("20_column_families".equals(columnFamilyTestType)) { cfs = 20; } else if ("100_column_families".equals(columnFamilyTestType)) { cfs = 100; } if (cfs > 0) { cfHandlesIdx = new AtomicInteger(1); for (int i = 1; i <= cfs; i++) { cfDescriptors.add(new ColumnFamilyDescriptor(ba("cf" + i))); } } final List cfHandlesList = new ArrayList<>(cfDescriptors.size()); db = RocksDB.open(options, dbDir.toAbsolutePath().toString(), cfDescriptors, cfHandlesList); cfHandles = cfHandlesList.toArray(new ColumnFamilyHandle[0]); } @TearDown(Level.Trial) public void cleanup() throws IOException { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } db.close(); options.close(); FileUtils.delete(dbDir); } private ColumnFamilyHandle getColumnFamily() { if (cfs == 0) { return cfHandles[0]; } else if (cfs == 1) { return cfHandles[1]; } else { int idx = cfHandlesIdx.getAndIncrement(); if (idx > cfs) { cfHandlesIdx.set(1); // doesn't ensure a perfect distribution, but it's ok idx = 0; } return cfHandles[idx]; } } @State(Scope.Benchmark) public static class Counter { private final AtomicInteger count = new AtomicInteger(); public int next() { return count.getAndIncrement(); } } @Benchmark public void put(final ComparatorBenchmarks.Counter counter) throws RocksDBException { final int i = counter.next(); db.put(getColumnFamily(), ba("key" + i), ba("value" + i)); } } rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/util/000077500000000000000000000000001370372246700225735ustar00rootroot00000000000000rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java000066400000000000000000000037131370372246700253420ustar00rootroot00000000000000/** * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. * This source code is licensed under both the GPLv2 (found in the * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ package org.rocksdb.util; import java.io.IOException; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; public final class FileUtils { private static final SimpleFileVisitor DELETE_DIR_VISITOR = new DeleteDirVisitor(); /** * Deletes a path from the filesystem * * If the path is a directory its contents * will be recursively deleted before it itself * is deleted. * * Note that removal of a directory is not an atomic-operation * and so if an error occurs during removal, some of the directories * descendants may have already been removed * * @param path the path to delete. * * @throws IOException if an error occurs whilst removing a file or directory */ public static void delete(final Path path) throws IOException { if (!Files.isDirectory(path)) { Files.deleteIfExists(path); } else { Files.walkFileTree(path, DELETE_DIR_VISITOR); } } private static class DeleteDirVisitor extends SimpleFileVisitor { @Override public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException { Files.deleteIfExists(file); return FileVisitResult.CONTINUE; } @Override public FileVisitResult postVisitDirectory(final Path dir, final IOException exc) throws IOException { if (exc != null) { throw exc; } Files.deleteIfExists(dir); return FileVisitResult.CONTINUE; } } } rocksdb-6.11.4/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java000066400000000000000000000025321370372246700250010ustar00rootroot00000000000000/** * Copyright (c) 2011-present, Facebook, Inc. All rights reserved. * This source code is licensed under both the GPLv2 (found in the * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ package org.rocksdb.util; import java.util.ArrayList; import java.util.List; import static java.nio.charset.StandardCharsets.UTF_8; public final class KVUtils { /** * Get a byte array from a string. * * Assumes UTF-8 encoding * * @param string the string * * @return the bytes. */ public static byte[] ba(final String string) { return string.getBytes(UTF_8); } /** * Get a string from a byte array. * * Assumes UTF-8 encoding * * @param bytes the bytes * * @return the string. */ public static String str(final byte[] bytes) { return new String(bytes, UTF_8); } /** * Get a list of keys where the keys are named key1..key1+N * in the range of {@code from} to {@code to} i.e. keyFrom..keyTo. * * @param from the first key * @param to the last key * * @return the array of keys */ public static List keys(final int from, final int to) { final List keys = new ArrayList<>(to - from); for (int i = from; i < to; i++) { keys.add(ba("key" + i)); } return keys; } } rocksdb-6.11.4/java/rocksjni.pom000066400000000000000000000144451370372246700165350ustar00rootroot00000000000000 4.0.0 RocksDB JNI http://rocksdb.org/ org.rocksdb rocksdbjni - RocksDB fat jar that contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files for Mac OSX, and a .dll for Windows x64. Apache License 2.0 http://www.apache.org/licenses/LICENSE-2.0.html repo GNU General Public License, version 2 http://www.gnu.org/licenses/gpl-2.0.html repo scm:git:git://github.com/dropwizard/metrics.git scm:git:git@github.com:dropwizard/metrics.git http://github.com/dropwizard/metrics/ HEAD Facebook help@facebook.com America/New_York architect 1.7 1.7 UTF-8 org.apache.maven.plugins maven-compiler-plugin 3.2 ${project.build.source} ${project.build.target} ${project.build.sourceEncoding} org.apache.maven.plugins maven-surefire-plugin 2.18.1 ${argLine} -ea -Xcheck:jni -Djava.library.path=${project.build.directory} false false ${project.build.directory}/* org.jacoco jacoco-maven-plugin 0.7.2.201409121644 prepare-agent report prepare-package report org.codehaus.gmaven groovy-maven-plugin 2.0 process-classes execute Xenu String fileContents = new File(project.basedir.absolutePath + '/../include/rocksdb/version.h').getText('UTF-8') matcher = (fileContents =~ /(?s).*ROCKSDB_MAJOR ([0-9]+).*?/) String major_version = matcher.getAt(0).getAt(1) matcher = (fileContents =~ /(?s).*ROCKSDB_MINOR ([0-9]+).*?/) String minor_version = matcher.getAt(0).getAt(1) matcher = (fileContents =~ /(?s).*ROCKSDB_PATCH ([0-9]+).*?/) String patch_version = matcher.getAt(0).getAt(1) String version = String.format('%s.%s.%s', major_version, minor_version, patch_version) // Set version to be used in pom.properties project.version = version // Set version to be set as jar name project.build.finalName = project.artifactId + "-" + version junit junit 4.12 test org.assertj assertj-core 1.7.1 test org.mockito mockito-all 1.10.19 test rocksdb-6.11.4/java/rocksjni/000077500000000000000000000000001370372246700160105ustar00rootroot00000000000000rocksdb-6.11.4/java/rocksjni/backupablejni.cc000066400000000000000000000303161370372246700211140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::BackupEnginge and // ROCKSDB_NAMESPACE::BackupableDBOptions methods from Java side. #include #include #include #include #include #include "include/org_rocksdb_BackupableDBOptions.h" #include "rocksdb/utilities/backupable_db.h" #include "rocksjni/portal.h" /////////////////////////////////////////////////////////////////////////// // BackupDBOptions /* * Class: org_rocksdb_BackupableDBOptions * Method: newBackupableDBOptions * Signature: (Ljava/lang/String;)J */ jlong Java_org_rocksdb_BackupableDBOptions_newBackupableDBOptions( JNIEnv* env, jclass /*jcls*/, jstring jpath) { const char* cpath = env->GetStringUTFChars(jpath, nullptr); if (cpath == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* bopt = new ROCKSDB_NAMESPACE::BackupableDBOptions(cpath); env->ReleaseStringUTFChars(jpath, cpath); return reinterpret_cast(bopt); } /* * Class: org_rocksdb_BackupableDBOptions * Method: backupDir * Signature: (J)Ljava/lang/String; */ jstring Java_org_rocksdb_BackupableDBOptions_backupDir(JNIEnv* env, jobject /*jopt*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return env->NewStringUTF(bopt->backup_dir.c_str()); } /* * Class: org_rocksdb_BackupableDBOptions * Method: setBackupEnv * Signature: (JJ)V */ void Java_org_rocksdb_BackupableDBOptions_setBackupEnv( JNIEnv* /*env*/, jobject /*jopt*/, jlong jhandle, jlong jrocks_env_handle) { auto* bopt = reinterpret_cast(jhandle); auto* rocks_env = reinterpret_cast(jrocks_env_handle); bopt->backup_env = rocks_env; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setShareTableFiles * Signature: (JZ)V */ void Java_org_rocksdb_BackupableDBOptions_setShareTableFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { auto* bopt = reinterpret_cast(jhandle); bopt->share_table_files = flag; } /* * Class: org_rocksdb_BackupableDBOptions * Method: shareTableFiles * Signature: (J)Z */ jboolean Java_org_rocksdb_BackupableDBOptions_shareTableFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return bopt->share_table_files; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setInfoLog * Signature: (JJ)V */ void Java_org_rocksdb_BackupableDBOptions_setInfoLog(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong /*jlogger_handle*/) { auto* bopt = reinterpret_cast(jhandle); auto* sptr_logger = reinterpret_cast*>( jhandle); bopt->info_log = sptr_logger->get(); } /* * Class: org_rocksdb_BackupableDBOptions * Method: setSync * Signature: (JZ)V */ void Java_org_rocksdb_BackupableDBOptions_setSync(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { auto* bopt = reinterpret_cast(jhandle); bopt->sync = flag; } /* * Class: org_rocksdb_BackupableDBOptions * Method: sync * Signature: (J)Z */ jboolean Java_org_rocksdb_BackupableDBOptions_sync(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return bopt->sync; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setDestroyOldData * Signature: (JZ)V */ void Java_org_rocksdb_BackupableDBOptions_setDestroyOldData(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { auto* bopt = reinterpret_cast(jhandle); bopt->destroy_old_data = flag; } /* * Class: org_rocksdb_BackupableDBOptions * Method: destroyOldData * Signature: (J)Z */ jboolean Java_org_rocksdb_BackupableDBOptions_destroyOldData(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return bopt->destroy_old_data; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setBackupLogFiles * Signature: (JZ)V */ void Java_org_rocksdb_BackupableDBOptions_setBackupLogFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { auto* bopt = reinterpret_cast(jhandle); bopt->backup_log_files = flag; } /* * Class: org_rocksdb_BackupableDBOptions * Method: backupLogFiles * Signature: (J)Z */ jboolean Java_org_rocksdb_BackupableDBOptions_backupLogFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return bopt->backup_log_files; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setBackupRateLimit * Signature: (JJ)V */ void Java_org_rocksdb_BackupableDBOptions_setBackupRateLimit( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jbackup_rate_limit) { auto* bopt = reinterpret_cast(jhandle); bopt->backup_rate_limit = jbackup_rate_limit; } /* * Class: org_rocksdb_BackupableDBOptions * Method: backupRateLimit * Signature: (J)J */ jlong Java_org_rocksdb_BackupableDBOptions_backupRateLimit(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return bopt->backup_rate_limit; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setBackupRateLimiter * Signature: (JJ)V */ void Java_org_rocksdb_BackupableDBOptions_setBackupRateLimiter( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jrate_limiter_handle) { auto* bopt = reinterpret_cast(jhandle); auto* sptr_rate_limiter = reinterpret_cast*>( jrate_limiter_handle); bopt->backup_rate_limiter = *sptr_rate_limiter; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setRestoreRateLimit * Signature: (JJ)V */ void Java_org_rocksdb_BackupableDBOptions_setRestoreRateLimit( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jrestore_rate_limit) { auto* bopt = reinterpret_cast(jhandle); bopt->restore_rate_limit = jrestore_rate_limit; } /* * Class: org_rocksdb_BackupableDBOptions * Method: restoreRateLimit * Signature: (J)J */ jlong Java_org_rocksdb_BackupableDBOptions_restoreRateLimit(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return bopt->restore_rate_limit; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setRestoreRateLimiter * Signature: (JJ)V */ void Java_org_rocksdb_BackupableDBOptions_setRestoreRateLimiter( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jrate_limiter_handle) { auto* bopt = reinterpret_cast(jhandle); auto* sptr_rate_limiter = reinterpret_cast*>( jrate_limiter_handle); bopt->restore_rate_limiter = *sptr_rate_limiter; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setShareFilesWithChecksum * Signature: (JZ)V */ void Java_org_rocksdb_BackupableDBOptions_setShareFilesWithChecksum( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { auto* bopt = reinterpret_cast(jhandle); bopt->share_files_with_checksum = flag; } /* * Class: org_rocksdb_BackupableDBOptions * Method: shareFilesWithChecksum * Signature: (J)Z */ jboolean Java_org_rocksdb_BackupableDBOptions_shareFilesWithChecksum( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return bopt->share_files_with_checksum; } /* * Class: org_rocksdb_BackupableDBOptions * Method: setMaxBackgroundOperations * Signature: (JI)V */ void Java_org_rocksdb_BackupableDBOptions_setMaxBackgroundOperations( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint max_background_operations) { auto* bopt = reinterpret_cast(jhandle); bopt->max_background_operations = static_cast(max_background_operations); } /* * Class: org_rocksdb_BackupableDBOptions * Method: maxBackgroundOperations * Signature: (J)I */ jint Java_org_rocksdb_BackupableDBOptions_maxBackgroundOperations( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return static_cast(bopt->max_background_operations); } /* * Class: org_rocksdb_BackupableDBOptions * Method: setCallbackTriggerIntervalSize * Signature: (JJ)V */ void Java_org_rocksdb_BackupableDBOptions_setCallbackTriggerIntervalSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jcallback_trigger_interval_size) { auto* bopt = reinterpret_cast(jhandle); bopt->callback_trigger_interval_size = static_cast(jcallback_trigger_interval_size); } /* * Class: org_rocksdb_BackupableDBOptions * Method: callbackTriggerIntervalSize * Signature: (J)J */ jlong Java_org_rocksdb_BackupableDBOptions_callbackTriggerIntervalSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); return static_cast(bopt->callback_trigger_interval_size); } /* * Class: org_rocksdb_BackupableDBOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_BackupableDBOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jopt*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); assert(bopt != nullptr); delete bopt; } rocksdb-6.11.4/java/rocksjni/backupenginejni.cc000066400000000000000000000222601370372246700214550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::BackupEngine methods from the Java side. #include #include #include "include/org_rocksdb_BackupEngine.h" #include "rocksdb/utilities/backupable_db.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_BackupEngine * Method: open * Signature: (JJ)J */ jlong Java_org_rocksdb_BackupEngine_open(JNIEnv* env, jclass /*jcls*/, jlong env_handle, jlong backupable_db_options_handle) { auto* rocks_env = reinterpret_cast(env_handle); auto* backupable_db_options = reinterpret_cast( backupable_db_options_handle); ROCKSDB_NAMESPACE::BackupEngine* backup_engine; auto status = ROCKSDB_NAMESPACE::BackupEngine::Open( rocks_env, *backupable_db_options, &backup_engine); if (status.ok()) { return reinterpret_cast(backup_engine); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); return 0; } } /* * Class: org_rocksdb_BackupEngine * Method: createNewBackup * Signature: (JJZ)V */ void Java_org_rocksdb_BackupEngine_createNewBackup( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jlong db_handle, jboolean jflush_before_backup) { auto* db = reinterpret_cast(db_handle); auto* backup_engine = reinterpret_cast(jbe_handle); auto status = backup_engine->CreateNewBackup( db, static_cast(jflush_before_backup)); if (status.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } /* * Class: org_rocksdb_BackupEngine * Method: createNewBackupWithMetadata * Signature: (JJLjava/lang/String;Z)V */ void Java_org_rocksdb_BackupEngine_createNewBackupWithMetadata( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jlong db_handle, jstring japp_metadata, jboolean jflush_before_backup) { auto* db = reinterpret_cast(db_handle); auto* backup_engine = reinterpret_cast(jbe_handle); jboolean has_exception = JNI_FALSE; std::string app_metadata = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, japp_metadata, &has_exception); if (has_exception == JNI_TRUE) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Could not copy jstring to std::string"); return; } auto status = backup_engine->CreateNewBackupWithMetadata( db, app_metadata, static_cast(jflush_before_backup)); if (status.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } /* * Class: org_rocksdb_BackupEngine * Method: getBackupInfo * Signature: (J)Ljava/util/List; */ jobject Java_org_rocksdb_BackupEngine_getBackupInfo(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle) { auto* backup_engine = reinterpret_cast(jbe_handle); std::vector backup_infos; backup_engine->GetBackupInfo(&backup_infos); return ROCKSDB_NAMESPACE::BackupInfoListJni::getBackupInfo(env, backup_infos); } /* * Class: org_rocksdb_BackupEngine * Method: getCorruptedBackups * Signature: (J)[I */ jintArray Java_org_rocksdb_BackupEngine_getCorruptedBackups(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle) { auto* backup_engine = reinterpret_cast(jbe_handle); std::vector backup_ids; backup_engine->GetCorruptedBackups(&backup_ids); // store backupids in int array std::vector int_backup_ids(backup_ids.begin(), backup_ids.end()); // Store ints in java array // Its ok to loose precision here (64->32) jsize ret_backup_ids_size = static_cast(backup_ids.size()); jintArray ret_backup_ids = env->NewIntArray(ret_backup_ids_size); if (ret_backup_ids == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetIntArrayRegion(ret_backup_ids, 0, ret_backup_ids_size, int_backup_ids.data()); return ret_backup_ids; } /* * Class: org_rocksdb_BackupEngine * Method: garbageCollect * Signature: (J)V */ void Java_org_rocksdb_BackupEngine_garbageCollect(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle) { auto* backup_engine = reinterpret_cast(jbe_handle); auto status = backup_engine->GarbageCollect(); if (status.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } /* * Class: org_rocksdb_BackupEngine * Method: purgeOldBackups * Signature: (JI)V */ void Java_org_rocksdb_BackupEngine_purgeOldBackups(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jint jnum_backups_to_keep) { auto* backup_engine = reinterpret_cast(jbe_handle); auto status = backup_engine->PurgeOldBackups( static_cast(jnum_backups_to_keep)); if (status.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } /* * Class: org_rocksdb_BackupEngine * Method: deleteBackup * Signature: (JI)V */ void Java_org_rocksdb_BackupEngine_deleteBackup(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jint jbackup_id) { auto* backup_engine = reinterpret_cast(jbe_handle); auto status = backup_engine->DeleteBackup( static_cast(jbackup_id)); if (status.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } /* * Class: org_rocksdb_BackupEngine * Method: restoreDbFromBackup * Signature: (JILjava/lang/String;Ljava/lang/String;J)V */ void Java_org_rocksdb_BackupEngine_restoreDbFromBackup( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jint jbackup_id, jstring jdb_dir, jstring jwal_dir, jlong jrestore_options_handle) { auto* backup_engine = reinterpret_cast(jbe_handle); const char* db_dir = env->GetStringUTFChars(jdb_dir, nullptr); if (db_dir == nullptr) { // exception thrown: OutOfMemoryError return; } const char* wal_dir = env->GetStringUTFChars(jwal_dir, nullptr); if (wal_dir == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseStringUTFChars(jdb_dir, db_dir); return; } auto* restore_options = reinterpret_cast( jrestore_options_handle); auto status = backup_engine->RestoreDBFromBackup( static_cast(jbackup_id), db_dir, wal_dir, *restore_options); env->ReleaseStringUTFChars(jwal_dir, wal_dir); env->ReleaseStringUTFChars(jdb_dir, db_dir); if (status.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } /* * Class: org_rocksdb_BackupEngine * Method: restoreDbFromLatestBackup * Signature: (JLjava/lang/String;Ljava/lang/String;J)V */ void Java_org_rocksdb_BackupEngine_restoreDbFromLatestBackup( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jstring jdb_dir, jstring jwal_dir, jlong jrestore_options_handle) { auto* backup_engine = reinterpret_cast(jbe_handle); const char* db_dir = env->GetStringUTFChars(jdb_dir, nullptr); if (db_dir == nullptr) { // exception thrown: OutOfMemoryError return; } const char* wal_dir = env->GetStringUTFChars(jwal_dir, nullptr); if (wal_dir == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseStringUTFChars(jdb_dir, db_dir); return; } auto* restore_options = reinterpret_cast( jrestore_options_handle); auto status = backup_engine->RestoreDBFromLatestBackup(db_dir, wal_dir, *restore_options); env->ReleaseStringUTFChars(jwal_dir, wal_dir); env->ReleaseStringUTFChars(jdb_dir, db_dir); if (status.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } /* * Class: org_rocksdb_BackupEngine * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_BackupEngine_disposeInternal(JNIEnv* /*env*/, jobject /*jbe*/, jlong jbe_handle) { auto* be = reinterpret_cast(jbe_handle); assert(be != nullptr); delete be; } rocksdb-6.11.4/java/rocksjni/cassandra_compactionfilterjni.cc000066400000000000000000000017501370372246700244040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "include/org_rocksdb_CassandraCompactionFilter.h" #include "utilities/cassandra/cassandra_compaction_filter.h" /* * Class: org_rocksdb_CassandraCompactionFilter * Method: createNewCassandraCompactionFilter0 * Signature: (ZI)J */ jlong Java_org_rocksdb_CassandraCompactionFilter_createNewCassandraCompactionFilter0( JNIEnv* /*env*/, jclass /*jcls*/, jboolean purge_ttl_on_expiration, jint gc_grace_period_in_seconds) { auto* compaction_filter = new ROCKSDB_NAMESPACE::cassandra::CassandraCompactionFilter( purge_ttl_on_expiration, gc_grace_period_in_seconds); // set the native handle to our native compaction filter return reinterpret_cast(compaction_filter); } rocksdb-6.11.4/java/rocksjni/cassandra_value_operator.cc000066400000000000000000000031211370372246700233620ustar00rootroot00000000000000// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include #include "include/org_rocksdb_CassandraValueMergeOperator.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "rocksjni/portal.h" #include "utilities/cassandra/merge_operator.h" /* * Class: org_rocksdb_CassandraValueMergeOperator * Method: newSharedCassandraValueMergeOperator * Signature: (II)J */ jlong Java_org_rocksdb_CassandraValueMergeOperator_newSharedCassandraValueMergeOperator( JNIEnv* /*env*/, jclass /*jclazz*/, jint gcGracePeriodInSeconds, jint operands_limit) { auto* op = new std::shared_ptr( new ROCKSDB_NAMESPACE::cassandra::CassandraValueMergeOperator( gcGracePeriodInSeconds, operands_limit)); return reinterpret_cast(op); } /* * Class: org_rocksdb_CassandraValueMergeOperator * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CassandraValueMergeOperator_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* op = reinterpret_cast*>( jhandle); delete op; } rocksdb-6.11.4/java/rocksjni/checkpoint.cc000066400000000000000000000045341370372246700204540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::Checkpoint methods from Java side. #include #include #include #include #include "include/org_rocksdb_Checkpoint.h" #include "rocksdb/db.h" #include "rocksdb/utilities/checkpoint.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_Checkpoint * Method: newCheckpoint * Signature: (J)J */ jlong Java_org_rocksdb_Checkpoint_newCheckpoint(JNIEnv* /*env*/, jclass /*jclazz*/, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::Checkpoint* checkpoint; ROCKSDB_NAMESPACE::Checkpoint::Create(db, &checkpoint); return reinterpret_cast(checkpoint); } /* * Class: org_rocksdb_Checkpoint * Method: dispose * Signature: (J)V */ void Java_org_rocksdb_Checkpoint_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* checkpoint = reinterpret_cast(jhandle); assert(checkpoint != nullptr); delete checkpoint; } /* * Class: org_rocksdb_Checkpoint * Method: createCheckpoint * Signature: (JLjava/lang/String;)V */ void Java_org_rocksdb_Checkpoint_createCheckpoint(JNIEnv* env, jobject /*jobj*/, jlong jcheckpoint_handle, jstring jcheckpoint_path) { const char* checkpoint_path = env->GetStringUTFChars(jcheckpoint_path, 0); if (checkpoint_path == nullptr) { // exception thrown: OutOfMemoryError return; } auto* checkpoint = reinterpret_cast(jcheckpoint_handle); ROCKSDB_NAMESPACE::Status s = checkpoint->CreateCheckpoint(checkpoint_path); env->ReleaseStringUTFChars(jcheckpoint_path, checkpoint_path); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } rocksdb-6.11.4/java/rocksjni/clock_cache.cc000066400000000000000000000026721370372246700205440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::ClockCache. #include #include "cache/clock_cache.h" #include "include/org_rocksdb_ClockCache.h" /* * Class: org_rocksdb_ClockCache * Method: newClockCache * Signature: (JIZ)J */ jlong Java_org_rocksdb_ClockCache_newClockCache( JNIEnv* /*env*/, jclass /*jcls*/, jlong jcapacity, jint jnum_shard_bits, jboolean jstrict_capacity_limit) { auto* sptr_clock_cache = new std::shared_ptr( ROCKSDB_NAMESPACE::NewClockCache( static_cast(jcapacity), static_cast(jnum_shard_bits), static_cast(jstrict_capacity_limit))); return reinterpret_cast(sptr_clock_cache); } /* * Class: org_rocksdb_ClockCache * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_ClockCache_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_clock_cache = reinterpret_cast*>(jhandle); delete sptr_clock_cache; // delete std::shared_ptr } rocksdb-6.11.4/java/rocksjni/columnfamilyhandle.cc000066400000000000000000000051011370372246700221670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::ColumnFamilyHandle. #include #include #include #include "include/org_rocksdb_ColumnFamilyHandle.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_ColumnFamilyHandle * Method: getName * Signature: (J)[B */ jbyteArray Java_org_rocksdb_ColumnFamilyHandle_getName(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); std::string cf_name = cfh->GetName(); return ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, cf_name); } /* * Class: org_rocksdb_ColumnFamilyHandle * Method: getID * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyHandle_getID(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); const int32_t id = cfh->GetID(); return static_cast(id); } /* * Class: org_rocksdb_ColumnFamilyHandle * Method: getDescriptor * Signature: (J)Lorg/rocksdb/ColumnFamilyDescriptor; */ jobject Java_org_rocksdb_ColumnFamilyHandle_getDescriptor(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::ColumnFamilyDescriptor desc; ROCKSDB_NAMESPACE::Status s = cfh->GetDescriptor(&desc); if (s.ok()) { return ROCKSDB_NAMESPACE::ColumnFamilyDescriptorJni::construct(env, &desc); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } } /* * Class: org_rocksdb_ColumnFamilyHandle * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_ColumnFamilyHandle_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); assert(cfh != nullptr); delete cfh; } rocksdb-6.11.4/java/rocksjni/compact_range_options.cc000066400000000000000000000153271370372246700227040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactRangeOptions. #include #include "include/org_rocksdb_CompactRangeOptions.h" #include "rocksdb/options.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_CompactRangeOptions * Method: newCompactRangeOptions * Signature: ()J */ jlong Java_org_rocksdb_CompactRangeOptions_newCompactRangeOptions( JNIEnv* /*env*/, jclass /*jclazz*/) { auto* options = new ROCKSDB_NAMESPACE::CompactRangeOptions(); return reinterpret_cast(options); } /* * Class: org_rocksdb_CompactRangeOptions * Method: exclusiveManualCompaction * Signature: (J)Z */ jboolean Java_org_rocksdb_CompactRangeOptions_exclusiveManualCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->exclusive_manual_compaction); } /* * Class: org_rocksdb_CompactRangeOptions * Method: setExclusiveManualCompaction * Signature: (JZ)V */ void Java_org_rocksdb_CompactRangeOptions_setExclusiveManualCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean exclusive_manual_compaction) { auto* options = reinterpret_cast(jhandle); options->exclusive_manual_compaction = static_cast(exclusive_manual_compaction); } /* * Class: org_rocksdb_CompactRangeOptions * Method: bottommostLevelCompaction * Signature: (J)I */ jint Java_org_rocksdb_CompactRangeOptions_bottommostLevelCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::BottommostLevelCompactionJni:: toJavaBottommostLevelCompaction(options->bottommost_level_compaction); } /* * Class: org_rocksdb_CompactRangeOptions * Method: setBottommostLevelCompaction * Signature: (JI)V */ void Java_org_rocksdb_CompactRangeOptions_setBottommostLevelCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint bottommost_level_compaction) { auto* options = reinterpret_cast(jhandle); options->bottommost_level_compaction = ROCKSDB_NAMESPACE::BottommostLevelCompactionJni:: toCppBottommostLevelCompaction(bottommost_level_compaction); } /* * Class: org_rocksdb_CompactRangeOptions * Method: changeLevel * Signature: (J)Z */ jboolean Java_org_rocksdb_CompactRangeOptions_changeLevel (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->change_level); } /* * Class: org_rocksdb_CompactRangeOptions * Method: setChangeLevel * Signature: (JZ)V */ void Java_org_rocksdb_CompactRangeOptions_setChangeLevel (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean change_level) { auto* options = reinterpret_cast(jhandle); options->change_level = static_cast(change_level); } /* * Class: org_rocksdb_CompactRangeOptions * Method: targetLevel * Signature: (J)I */ jint Java_org_rocksdb_CompactRangeOptions_targetLevel (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->target_level); } /* * Class: org_rocksdb_CompactRangeOptions * Method: setTargetLevel * Signature: (JI)V */ void Java_org_rocksdb_CompactRangeOptions_setTargetLevel (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint target_level) { auto* options = reinterpret_cast(jhandle); options->target_level = static_cast(target_level); } /* * Class: org_rocksdb_CompactRangeOptions * Method: targetPathId * Signature: (J)I */ jint Java_org_rocksdb_CompactRangeOptions_targetPathId (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->target_path_id); } /* * Class: org_rocksdb_CompactRangeOptions * Method: setTargetPathId * Signature: (JI)V */ void Java_org_rocksdb_CompactRangeOptions_setTargetPathId (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint target_path_id) { auto* options = reinterpret_cast(jhandle); options->target_path_id = static_cast(target_path_id); } /* * Class: org_rocksdb_CompactRangeOptions * Method: allowWriteStall * Signature: (J)Z */ jboolean Java_org_rocksdb_CompactRangeOptions_allowWriteStall (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->allow_write_stall); } /* * Class: org_rocksdb_CompactRangeOptions * Method: setAllowWriteStall * Signature: (JZ)V */ void Java_org_rocksdb_CompactRangeOptions_setAllowWriteStall (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean allow_write_stall) { auto* options = reinterpret_cast(jhandle); options->allow_write_stall = static_cast(allow_write_stall); } /* * Class: org_rocksdb_CompactRangeOptions * Method: maxSubcompactions * Signature: (J)I */ jint Java_org_rocksdb_CompactRangeOptions_maxSubcompactions (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->max_subcompactions); } /* * Class: org_rocksdb_CompactRangeOptions * Method: setMaxSubcompactions * Signature: (JI)V */ void Java_org_rocksdb_CompactRangeOptions_setMaxSubcompactions (JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint max_subcompactions) { auto* options = reinterpret_cast(jhandle); options->max_subcompactions = static_cast(max_subcompactions); } /* * Class: org_rocksdb_CompactRangeOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CompactRangeOptions_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = reinterpret_cast(jhandle); delete options; } rocksdb-6.11.4/java/rocksjni/compaction_filter.cc000066400000000000000000000020031370372246700220130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionFilter. #include #include "include/org_rocksdb_AbstractCompactionFilter.h" #include "rocksdb/compaction_filter.h" // /* * Class: org_rocksdb_AbstractCompactionFilter * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_AbstractCompactionFilter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* cf = reinterpret_cast(handle); assert(cf != nullptr); delete cf; } // rocksdb-6.11.4/java/rocksjni/compaction_filter_factory.cc000066400000000000000000000026251370372246700235540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionFilterFactory. #include #include #include "include/org_rocksdb_AbstractCompactionFilterFactory.h" #include "rocksjni/compaction_filter_factory_jnicallback.h" /* * Class: org_rocksdb_AbstractCompactionFilterFactory * Method: createNewCompactionFilterFactory0 * Signature: ()J */ jlong Java_org_rocksdb_AbstractCompactionFilterFactory_createNewCompactionFilterFactory0( JNIEnv* env, jobject jobj) { auto* cff = new ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback(env, jobj); auto* ptr_sptr_cff = new std::shared_ptr< ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback>(cff); return reinterpret_cast(ptr_sptr_cff); } /* * Class: org_rocksdb_AbstractCompactionFilterFactory * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_AbstractCompactionFilterFactory_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* ptr_sptr_cff = reinterpret_cast< std::shared_ptr*>( jhandle); delete ptr_sptr_cff; } rocksdb-6.11.4/java/rocksjni/compaction_filter_factory_jnicallback.cc000066400000000000000000000050671370372246700260740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionFilterFactory. #include "rocksjni/compaction_filter_factory_jnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { CompactionFilterFactoryJniCallback::CompactionFilterFactoryJniCallback( JNIEnv* env, jobject jcompaction_filter_factory) : JniCallback(env, jcompaction_filter_factory) { // Note: The name of a CompactionFilterFactory will not change during // it's lifetime, so we cache it in a global var jmethodID jname_method_id = AbstractCompactionFilterFactoryJni::getNameMethodId(env); if(jname_method_id == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } jstring jname = (jstring)env->CallObjectMethod(m_jcallback_obj, jname_method_id); if(env->ExceptionCheck()) { // exception thrown return; } jboolean has_exception = JNI_FALSE; m_name = JniUtil::copyString(env, jname, &has_exception); // also releases jname if (has_exception == JNI_TRUE) { // exception thrown return; } m_jcreate_compaction_filter_methodid = AbstractCompactionFilterFactoryJni::getCreateCompactionFilterMethodId(env); if(m_jcreate_compaction_filter_methodid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } } const char* CompactionFilterFactoryJniCallback::Name() const { return m_name.get(); } std::unique_ptr CompactionFilterFactoryJniCallback::CreateCompactionFilter( const CompactionFilter::Context& context) { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); jlong addr_compaction_filter = env->CallLongMethod(m_jcallback_obj, m_jcreate_compaction_filter_methodid, static_cast(context.is_full_compaction), static_cast(context.is_manual_compaction)); if(env->ExceptionCheck()) { // exception thrown from CallLongMethod env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return nullptr; } auto* cff = reinterpret_cast(addr_compaction_filter); releaseJniEnv(attached_thread); return std::unique_ptr(cff); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/compaction_filter_factory_jnicallback.h000066400000000000000000000023061370372246700257270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionFilterFactory. #ifndef JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_ #define JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_ #include #include #include "rocksdb/compaction_filter.h" #include "rocksjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { class CompactionFilterFactoryJniCallback : public JniCallback, public CompactionFilterFactory { public: CompactionFilterFactoryJniCallback( JNIEnv* env, jobject jcompaction_filter_factory); virtual std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context); virtual const char* Name() const; private: std::unique_ptr m_name; jmethodID m_jcreate_compaction_filter_methodid; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_COMPACTION_FILTER_FACTORY_JNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/compaction_job_info.cc000066400000000000000000000163041370372246700223240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionJobInfo. #include #include "include/org_rocksdb_CompactionJobInfo.h" #include "rocksdb/listener.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_CompactionJobInfo * Method: newCompactionJobInfo * Signature: ()J */ jlong Java_org_rocksdb_CompactionJobInfo_newCompactionJobInfo( JNIEnv*, jclass) { auto* compact_job_info = new ROCKSDB_NAMESPACE::CompactionJobInfo(); return reinterpret_cast(compact_job_info); } /* * Class: org_rocksdb_CompactionJobInfo * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CompactionJobInfo_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); delete compact_job_info; } /* * Class: org_rocksdb_CompactionJobInfo * Method: columnFamilyName * Signature: (J)[B */ jbyteArray Java_org_rocksdb_CompactionJobInfo_columnFamilyName( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, compact_job_info->cf_name); } /* * Class: org_rocksdb_CompactionJobInfo * Method: status * Signature: (J)Lorg/rocksdb/Status; */ jobject Java_org_rocksdb_CompactionJobInfo_status( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::StatusJni::construct(env, compact_job_info->status); } /* * Class: org_rocksdb_CompactionJobInfo * Method: threadId * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobInfo_threadId( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return static_cast(compact_job_info->thread_id); } /* * Class: org_rocksdb_CompactionJobInfo * Method: jobId * Signature: (J)I */ jint Java_org_rocksdb_CompactionJobInfo_jobId( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return static_cast(compact_job_info->job_id); } /* * Class: org_rocksdb_CompactionJobInfo * Method: baseInputLevel * Signature: (J)I */ jint Java_org_rocksdb_CompactionJobInfo_baseInputLevel( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return static_cast(compact_job_info->base_input_level); } /* * Class: org_rocksdb_CompactionJobInfo * Method: outputLevel * Signature: (J)I */ jint Java_org_rocksdb_CompactionJobInfo_outputLevel( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return static_cast(compact_job_info->output_level); } /* * Class: org_rocksdb_CompactionJobInfo * Method: inputFiles * Signature: (J)[Ljava/lang/String; */ jobjectArray Java_org_rocksdb_CompactionJobInfo_inputFiles( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings( env, &compact_job_info->input_files); } /* * Class: org_rocksdb_CompactionJobInfo * Method: outputFiles * Signature: (J)[Ljava/lang/String; */ jobjectArray Java_org_rocksdb_CompactionJobInfo_outputFiles( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings( env, &compact_job_info->output_files); } /* * Class: org_rocksdb_CompactionJobInfo * Method: tableProperties * Signature: (J)Ljava/util/Map; */ jobject Java_org_rocksdb_CompactionJobInfo_tableProperties( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); auto* map = &compact_job_info->table_properties; jobject jhash_map = ROCKSDB_NAMESPACE::HashMapJni::construct( env, static_cast(map->size())); if (jhash_map == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< const std::string, std::shared_ptr, jobject, jobject> fn_map_kv = [env](const std::pair< const std::string, std::shared_ptr>& kv) { jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &(kv.first), false); if (env->ExceptionCheck()) { // an error occurred return std::unique_ptr>(nullptr); } jobject jtable_properties = ROCKSDB_NAMESPACE::TablePropertiesJni::fromCppTableProperties( env, *(kv.second.get())); if (env->ExceptionCheck()) { // an error occurred env->DeleteLocalRef(jkey); return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair(static_cast(jkey), jtable_properties)); }; if (!ROCKSDB_NAMESPACE::HashMapJni::putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { // exception occurred return nullptr; } return jhash_map; } /* * Class: org_rocksdb_CompactionJobInfo * Method: compactionReason * Signature: (J)B */ jbyte Java_org_rocksdb_CompactionJobInfo_compactionReason( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionReasonJni::toJavaCompactionReason( compact_job_info->compaction_reason); } /* * Class: org_rocksdb_CompactionJobInfo * Method: compression * Signature: (J)B */ jbyte Java_org_rocksdb_CompactionJobInfo_compression( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( compact_job_info->compression); } /* * Class: org_rocksdb_CompactionJobInfo * Method: stats * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobInfo_stats( JNIEnv *, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); auto* stats = new ROCKSDB_NAMESPACE::CompactionJobStats(); stats->Add(compact_job_info->stats); return reinterpret_cast(stats); } rocksdb-6.11.4/java/rocksjni/compaction_job_stats.cc000066400000000000000000000252401370372246700225260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionJobStats. #include #include "include/org_rocksdb_CompactionJobStats.h" #include "rocksdb/compaction_job_stats.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_CompactionJobStats * Method: newCompactionJobStats * Signature: ()J */ jlong Java_org_rocksdb_CompactionJobStats_newCompactionJobStats( JNIEnv*, jclass) { auto* compact_job_stats = new ROCKSDB_NAMESPACE::CompactionJobStats(); return reinterpret_cast(compact_job_stats); } /* * Class: org_rocksdb_CompactionJobStats * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CompactionJobStats_disposeInternal( JNIEnv *, jobject, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); delete compact_job_stats; } /* * Class: org_rocksdb_CompactionJobStats * Method: reset * Signature: (J)V */ void Java_org_rocksdb_CompactionJobStats_reset( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); compact_job_stats->Reset(); } /* * Class: org_rocksdb_CompactionJobStats * Method: add * Signature: (JJ)V */ void Java_org_rocksdb_CompactionJobStats_add( JNIEnv*, jclass, jlong jhandle, jlong jother_handle) { auto* compact_job_stats = reinterpret_cast(jhandle); auto* other_compact_job_stats = reinterpret_cast(jother_handle); compact_job_stats->Add(*other_compact_job_stats); } /* * Class: org_rocksdb_CompactionJobStats * Method: elapsedMicros * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_elapsedMicros( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast(compact_job_stats->elapsed_micros); } /* * Class: org_rocksdb_CompactionJobStats * Method: numInputRecords * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numInputRecords( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast(compact_job_stats->num_input_records); } /* * Class: org_rocksdb_CompactionJobStats * Method: numInputFiles * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numInputFiles( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast(compact_job_stats->num_input_files); } /* * Class: org_rocksdb_CompactionJobStats * Method: numInputFilesAtOutputLevel * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numInputFilesAtOutputLevel( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_input_files_at_output_level); } /* * Class: org_rocksdb_CompactionJobStats * Method: numOutputRecords * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numOutputRecords( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_output_records); } /* * Class: org_rocksdb_CompactionJobStats * Method: numOutputFiles * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numOutputFiles( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_output_files); } /* * Class: org_rocksdb_CompactionJobStats * Method: isManualCompaction * Signature: (J)Z */ jboolean Java_org_rocksdb_CompactionJobStats_isManualCompaction( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); if (compact_job_stats->is_manual_compaction) { return JNI_TRUE; } else { return JNI_FALSE; } } /* * Class: org_rocksdb_CompactionJobStats * Method: totalInputBytes * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_totalInputBytes( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->total_input_bytes); } /* * Class: org_rocksdb_CompactionJobStats * Method: totalOutputBytes * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_totalOutputBytes( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->total_output_bytes); } /* * Class: org_rocksdb_CompactionJobStats * Method: numRecordsReplaced * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numRecordsReplaced( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_records_replaced); } /* * Class: org_rocksdb_CompactionJobStats * Method: totalInputRawKeyBytes * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_totalInputRawKeyBytes( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->total_input_raw_key_bytes); } /* * Class: org_rocksdb_CompactionJobStats * Method: totalInputRawValueBytes * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_totalInputRawValueBytes( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->total_input_raw_value_bytes); } /* * Class: org_rocksdb_CompactionJobStats * Method: numInputDeletionRecords * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numInputDeletionRecords( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_input_deletion_records); } /* * Class: org_rocksdb_CompactionJobStats * Method: numExpiredDeletionRecords * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numExpiredDeletionRecords( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_expired_deletion_records); } /* * Class: org_rocksdb_CompactionJobStats * Method: numCorruptKeys * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numCorruptKeys( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_corrupt_keys); } /* * Class: org_rocksdb_CompactionJobStats * Method: fileWriteNanos * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_fileWriteNanos( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->file_write_nanos); } /* * Class: org_rocksdb_CompactionJobStats * Method: fileRangeSyncNanos * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_fileRangeSyncNanos( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->file_range_sync_nanos); } /* * Class: org_rocksdb_CompactionJobStats * Method: fileFsyncNanos * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_fileFsyncNanos( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->file_fsync_nanos); } /* * Class: org_rocksdb_CompactionJobStats * Method: filePrepareWriteNanos * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_filePrepareWriteNanos( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->file_prepare_write_nanos); } /* * Class: org_rocksdb_CompactionJobStats * Method: smallestOutputKeyPrefix * Signature: (J)[B */ jbyteArray Java_org_rocksdb_CompactionJobStats_smallestOutputKeyPrefix( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, compact_job_stats->smallest_output_key_prefix); } /* * Class: org_rocksdb_CompactionJobStats * Method: largestOutputKeyPrefix * Signature: (J)[B */ jbyteArray Java_org_rocksdb_CompactionJobStats_largestOutputKeyPrefix( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, compact_job_stats->largest_output_key_prefix); } /* * Class: org_rocksdb_CompactionJobStats * Method: numSingleDelFallthru * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numSingleDelFallthru( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_single_del_fallthru); } /* * Class: org_rocksdb_CompactionJobStats * Method: numSingleDelMismatch * Signature: (J)J */ jlong Java_org_rocksdb_CompactionJobStats_numSingleDelMismatch( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); return static_cast( compact_job_stats->num_single_del_mismatch); } rocksdb-6.11.4/java/rocksjni/compaction_options.cc000066400000000000000000000067521370372246700222400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionOptions. #include #include "include/org_rocksdb_CompactionOptions.h" #include "rocksdb/options.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_CompactionOptions * Method: newCompactionOptions * Signature: ()J */ jlong Java_org_rocksdb_CompactionOptions_newCompactionOptions( JNIEnv*, jclass) { auto* compact_opts = new ROCKSDB_NAMESPACE::CompactionOptions(); return reinterpret_cast(compact_opts); } /* * Class: org_rocksdb_CompactionOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CompactionOptions_disposeInternal( JNIEnv *, jobject, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); delete compact_opts; } /* * Class: org_rocksdb_CompactionOptions * Method: compression * Signature: (J)B */ jbyte Java_org_rocksdb_CompactionOptions_compression( JNIEnv*, jclass, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( compact_opts->compression); } /* * Class: org_rocksdb_CompactionOptions * Method: setCompression * Signature: (JB)V */ void Java_org_rocksdb_CompactionOptions_setCompression( JNIEnv*, jclass, jlong jhandle, jbyte jcompression_type_value) { auto* compact_opts = reinterpret_cast(jhandle); compact_opts->compression = ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( jcompression_type_value); } /* * Class: org_rocksdb_CompactionOptions * Method: outputFileSizeLimit * Signature: (J)J */ jlong Java_org_rocksdb_CompactionOptions_outputFileSizeLimit( JNIEnv*, jclass, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); return static_cast( compact_opts->output_file_size_limit); } /* * Class: org_rocksdb_CompactionOptions * Method: setOutputFileSizeLimit * Signature: (JJ)V */ void Java_org_rocksdb_CompactionOptions_setOutputFileSizeLimit( JNIEnv*, jclass, jlong jhandle, jlong joutput_file_size_limit) { auto* compact_opts = reinterpret_cast(jhandle); compact_opts->output_file_size_limit = static_cast(joutput_file_size_limit); } /* * Class: org_rocksdb_CompactionOptions * Method: maxSubcompactions * Signature: (J)I */ jint Java_org_rocksdb_CompactionOptions_maxSubcompactions( JNIEnv*, jclass, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); return static_cast( compact_opts->max_subcompactions); } /* * Class: org_rocksdb_CompactionOptions * Method: setMaxSubcompactions * Signature: (JI)V */ void Java_org_rocksdb_CompactionOptions_setMaxSubcompactions( JNIEnv*, jclass, jlong jhandle, jint jmax_subcompactions) { auto* compact_opts = reinterpret_cast(jhandle); compact_opts->max_subcompactions = static_cast(jmax_subcompactions); } rocksdb-6.11.4/java/rocksjni/compaction_options_fifo.cc000066400000000000000000000050561370372246700232370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionOptionsFIFO. #include #include "include/org_rocksdb_CompactionOptionsFIFO.h" #include "rocksdb/advanced_options.h" /* * Class: org_rocksdb_CompactionOptionsFIFO * Method: newCompactionOptionsFIFO * Signature: ()J */ jlong Java_org_rocksdb_CompactionOptionsFIFO_newCompactionOptionsFIFO( JNIEnv*, jclass) { const auto* opt = new ROCKSDB_NAMESPACE::CompactionOptionsFIFO(); return reinterpret_cast(opt); } /* * Class: org_rocksdb_CompactionOptionsFIFO * Method: setMaxTableFilesSize * Signature: (JJ)V */ void Java_org_rocksdb_CompactionOptionsFIFO_setMaxTableFilesSize( JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { auto* opt = reinterpret_cast(jhandle); opt->max_table_files_size = static_cast(jmax_table_files_size); } /* * Class: org_rocksdb_CompactionOptionsFIFO * Method: maxTableFilesSize * Signature: (J)J */ jlong Java_org_rocksdb_CompactionOptionsFIFO_maxTableFilesSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_table_files_size); } /* * Class: org_rocksdb_CompactionOptionsFIFO * Method: setAllowCompaction * Signature: (JZ)V */ void Java_org_rocksdb_CompactionOptionsFIFO_setAllowCompaction( JNIEnv*, jobject, jlong jhandle, jboolean allow_compaction) { auto* opt = reinterpret_cast(jhandle); opt->allow_compaction = static_cast(allow_compaction); } /* * Class: org_rocksdb_CompactionOptionsFIFO * Method: allowCompaction * Signature: (J)Z */ jboolean Java_org_rocksdb_CompactionOptionsFIFO_allowCompaction( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_compaction); } /* * Class: org_rocksdb_CompactionOptionsFIFO * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CompactionOptionsFIFO_disposeInternal( JNIEnv*, jobject, jlong jhandle) { delete reinterpret_cast(jhandle); } rocksdb-6.11.4/java/rocksjni/compaction_options_universal.cc000066400000000000000000000152211370372246700243170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionOptionsUniversal. #include #include "include/org_rocksdb_CompactionOptionsUniversal.h" #include "rocksdb/advanced_options.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: newCompactionOptionsUniversal * Signature: ()J */ jlong Java_org_rocksdb_CompactionOptionsUniversal_newCompactionOptionsUniversal( JNIEnv*, jclass) { const auto* opt = new ROCKSDB_NAMESPACE::CompactionOptionsUniversal(); return reinterpret_cast(opt); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: setSizeRatio * Signature: (JI)V */ void Java_org_rocksdb_CompactionOptionsUniversal_setSizeRatio( JNIEnv*, jobject, jlong jhandle, jint jsize_ratio) { auto* opt = reinterpret_cast(jhandle); opt->size_ratio = static_cast(jsize_ratio); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: sizeRatio * Signature: (J)I */ jint Java_org_rocksdb_CompactionOptionsUniversal_sizeRatio( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->size_ratio); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: setMinMergeWidth * Signature: (JI)V */ void Java_org_rocksdb_CompactionOptionsUniversal_setMinMergeWidth( JNIEnv*, jobject, jlong jhandle, jint jmin_merge_width) { auto* opt = reinterpret_cast(jhandle); opt->min_merge_width = static_cast(jmin_merge_width); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: minMergeWidth * Signature: (J)I */ jint Java_org_rocksdb_CompactionOptionsUniversal_minMergeWidth( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->min_merge_width); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: setMaxMergeWidth * Signature: (JI)V */ void Java_org_rocksdb_CompactionOptionsUniversal_setMaxMergeWidth( JNIEnv*, jobject, jlong jhandle, jint jmax_merge_width) { auto* opt = reinterpret_cast(jhandle); opt->max_merge_width = static_cast(jmax_merge_width); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: maxMergeWidth * Signature: (J)I */ jint Java_org_rocksdb_CompactionOptionsUniversal_maxMergeWidth( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_merge_width); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: setMaxSizeAmplificationPercent * Signature: (JI)V */ void Java_org_rocksdb_CompactionOptionsUniversal_setMaxSizeAmplificationPercent( JNIEnv*, jobject, jlong jhandle, jint jmax_size_amplification_percent) { auto* opt = reinterpret_cast(jhandle); opt->max_size_amplification_percent = static_cast(jmax_size_amplification_percent); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: maxSizeAmplificationPercent * Signature: (J)I */ jint Java_org_rocksdb_CompactionOptionsUniversal_maxSizeAmplificationPercent( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_size_amplification_percent); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: setCompressionSizePercent * Signature: (JI)V */ void Java_org_rocksdb_CompactionOptionsUniversal_setCompressionSizePercent( JNIEnv*, jobject, jlong jhandle, jint jcompression_size_percent) { auto* opt = reinterpret_cast(jhandle); opt->compression_size_percent = static_cast(jcompression_size_percent); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: compressionSizePercent * Signature: (J)I */ jint Java_org_rocksdb_CompactionOptionsUniversal_compressionSizePercent( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->compression_size_percent); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: setStopStyle * Signature: (JB)V */ void Java_org_rocksdb_CompactionOptionsUniversal_setStopStyle( JNIEnv*, jobject, jlong jhandle, jbyte jstop_style_value) { auto* opt = reinterpret_cast(jhandle); opt->stop_style = ROCKSDB_NAMESPACE::CompactionStopStyleJni::toCppCompactionStopStyle( jstop_style_value); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: stopStyle * Signature: (J)B */ jbyte Java_org_rocksdb_CompactionOptionsUniversal_stopStyle( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionStopStyleJni::toJavaCompactionStopStyle( opt->stop_style); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: setAllowTrivialMove * Signature: (JZ)V */ void Java_org_rocksdb_CompactionOptionsUniversal_setAllowTrivialMove( JNIEnv*, jobject, jlong jhandle, jboolean jallow_trivial_move) { auto* opt = reinterpret_cast(jhandle); opt->allow_trivial_move = static_cast(jallow_trivial_move); } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: allowTrivialMove * Signature: (J)Z */ jboolean Java_org_rocksdb_CompactionOptionsUniversal_allowTrivialMove( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return opt->allow_trivial_move; } /* * Class: org_rocksdb_CompactionOptionsUniversal * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CompactionOptionsUniversal_disposeInternal( JNIEnv*, jobject, jlong jhandle) { delete reinterpret_cast( jhandle); } rocksdb-6.11.4/java/rocksjni/comparator.cc000066400000000000000000000034411370372246700204700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Comparator. #include #include #include #include #include #include "include/org_rocksdb_AbstractComparator.h" #include "include/org_rocksdb_NativeComparatorWrapper.h" #include "rocksjni/comparatorjnicallback.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_AbstractComparator * Method: createNewComparator * Signature: (J)J */ jlong Java_org_rocksdb_AbstractComparator_createNewComparator( JNIEnv* env, jobject jcomparator, jlong copt_handle) { auto* copt = reinterpret_cast( copt_handle); auto* c = new ROCKSDB_NAMESPACE::ComparatorJniCallback(env, jcomparator, copt); return reinterpret_cast(c); } /* * Class: org_rocksdb_AbstractComparator * Method: usingDirectBuffers * Signature: (J)Z */ jboolean Java_org_rocksdb_AbstractComparator_usingDirectBuffers( JNIEnv*, jobject, jlong jhandle) { auto* c = reinterpret_cast(jhandle); return static_cast(c->m_options->direct_buffer); } /* * Class: org_rocksdb_NativeComparatorWrapper * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_NativeComparatorWrapper_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jcomparator_handle) { auto* comparator = reinterpret_cast(jcomparator_handle); delete comparator; } rocksdb-6.11.4/java/rocksjni/comparatorjnicallback.cc000066400000000000000000000513341370372246700226520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Comparator. #include "rocksjni/comparatorjnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { ComparatorJniCallback::ComparatorJniCallback( JNIEnv* env, jobject jcomparator, const ComparatorJniCallbackOptions* options) : JniCallback(env, jcomparator), m_options(options) { // cache the AbstractComparatorJniBridge class as we will reuse it many times for each callback m_abstract_comparator_jni_bridge_clazz = static_cast(env->NewGlobalRef(AbstractComparatorJniBridge::getJClass(env))); // Note: The name of a Comparator will not change during it's lifetime, // so we cache it in a global var jmethodID jname_mid = AbstractComparatorJni::getNameMethodId(env); if (jname_mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } jstring js_name = (jstring)env->CallObjectMethod(m_jcallback_obj, jname_mid); if (env->ExceptionCheck()) { // exception thrown return; } jboolean has_exception = JNI_FALSE; m_name = JniUtil::copyString(env, js_name, &has_exception); // also releases jsName if (has_exception == JNI_TRUE) { // exception thrown return; } // cache the ByteBuffer class as we will reuse it many times for each callback m_jbytebuffer_clazz = static_cast(env->NewGlobalRef(ByteBufferJni::getJClass(env))); m_jcompare_mid = AbstractComparatorJniBridge::getCompareInternalMethodId( env, m_abstract_comparator_jni_bridge_clazz); if (m_jcompare_mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } m_jshortest_mid = AbstractComparatorJniBridge::getFindShortestSeparatorInternalMethodId( env, m_abstract_comparator_jni_bridge_clazz); if (m_jshortest_mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } m_jshort_mid = AbstractComparatorJniBridge::getFindShortSuccessorInternalMethodId(env, m_abstract_comparator_jni_bridge_clazz); if (m_jshort_mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } // do we need reusable buffers? if (m_options->max_reused_buffer_size > -1) { if (m_options->reused_synchronisation_type == ReusedSynchronisationType::THREAD_LOCAL) { // buffers reused per thread UnrefHandler unref = [](void* ptr) { ThreadLocalBuf* tlb = reinterpret_cast(ptr); jboolean attached_thread = JNI_FALSE; JNIEnv* _env = JniUtil::getJniEnv(tlb->jvm, &attached_thread); if (_env != nullptr) { if (tlb->direct_buffer) { void* buf = _env->GetDirectBufferAddress(tlb->jbuf); delete[] static_cast(buf); } _env->DeleteGlobalRef(tlb->jbuf); JniUtil::releaseJniEnv(tlb->jvm, attached_thread); } }; m_tl_buf_a = new ThreadLocalPtr(unref); m_tl_buf_b = new ThreadLocalPtr(unref); m_jcompare_buf_a = nullptr; m_jcompare_buf_b = nullptr; m_jshortest_buf_start = nullptr; m_jshortest_buf_limit = nullptr; m_jshort_buf_key = nullptr; } else { //buffers reused and shared across threads const bool adaptive = m_options->reused_synchronisation_type == ReusedSynchronisationType::ADAPTIVE_MUTEX; mtx_compare = std::unique_ptr(new port::Mutex(adaptive)); mtx_shortest = std::unique_ptr(new port::Mutex(adaptive)); mtx_short = std::unique_ptr(new port::Mutex(adaptive)); m_jcompare_buf_a = env->NewGlobalRef(ByteBufferJni::construct( env, m_options->direct_buffer, m_options->max_reused_buffer_size, m_jbytebuffer_clazz)); if (m_jcompare_buf_a == nullptr) { // exception thrown: OutOfMemoryError return; } m_jcompare_buf_b = env->NewGlobalRef(ByteBufferJni::construct( env, m_options->direct_buffer, m_options->max_reused_buffer_size, m_jbytebuffer_clazz)); if (m_jcompare_buf_b == nullptr) { // exception thrown: OutOfMemoryError return; } m_jshortest_buf_start = env->NewGlobalRef(ByteBufferJni::construct( env, m_options->direct_buffer, m_options->max_reused_buffer_size, m_jbytebuffer_clazz)); if (m_jshortest_buf_start == nullptr) { // exception thrown: OutOfMemoryError return; } m_jshortest_buf_limit = env->NewGlobalRef(ByteBufferJni::construct( env, m_options->direct_buffer, m_options->max_reused_buffer_size, m_jbytebuffer_clazz)); if (m_jshortest_buf_limit == nullptr) { // exception thrown: OutOfMemoryError return; } m_jshort_buf_key = env->NewGlobalRef(ByteBufferJni::construct( env, m_options->direct_buffer, m_options->max_reused_buffer_size, m_jbytebuffer_clazz)); if (m_jshort_buf_key == nullptr) { // exception thrown: OutOfMemoryError return; } m_tl_buf_a = nullptr; m_tl_buf_b = nullptr; } } else { m_jcompare_buf_a = nullptr; m_jcompare_buf_b = nullptr; m_jshortest_buf_start = nullptr; m_jshortest_buf_limit = nullptr; m_jshort_buf_key = nullptr; m_tl_buf_a = nullptr; m_tl_buf_b = nullptr; } } ComparatorJniCallback::~ComparatorJniCallback() { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); env->DeleteGlobalRef(m_abstract_comparator_jni_bridge_clazz); env->DeleteGlobalRef(m_jbytebuffer_clazz); if (m_jcompare_buf_a != nullptr) { if (m_options->direct_buffer) { void* buf = env->GetDirectBufferAddress(m_jcompare_buf_a); delete[] static_cast(buf); } env->DeleteGlobalRef(m_jcompare_buf_a); } if (m_jcompare_buf_b != nullptr) { if (m_options->direct_buffer) { void* buf = env->GetDirectBufferAddress(m_jcompare_buf_b); delete[] static_cast(buf); } env->DeleteGlobalRef(m_jcompare_buf_b); } if (m_jshortest_buf_start != nullptr) { if (m_options->direct_buffer) { void* buf = env->GetDirectBufferAddress(m_jshortest_buf_start); delete[] static_cast(buf); } env->DeleteGlobalRef(m_jshortest_buf_start); } if (m_jshortest_buf_limit != nullptr) { if (m_options->direct_buffer) { void* buf = env->GetDirectBufferAddress(m_jshortest_buf_limit); delete[] static_cast(buf); } env->DeleteGlobalRef(m_jshortest_buf_limit); } if (m_jshort_buf_key != nullptr) { if (m_options->direct_buffer) { void* buf = env->GetDirectBufferAddress(m_jshort_buf_key); delete[] static_cast(buf); } env->DeleteGlobalRef(m_jshort_buf_key); } if (m_tl_buf_a != nullptr) { delete m_tl_buf_a; } if (m_tl_buf_b != nullptr) { delete m_tl_buf_b; } releaseJniEnv(attached_thread); } const char* ComparatorJniCallback::Name() const { return m_name.get(); } int ComparatorJniCallback::Compare(const Slice& a, const Slice& b) const { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); const bool reuse_jbuf_a = static_cast(a.size()) <= m_options->max_reused_buffer_size; const bool reuse_jbuf_b = static_cast(b.size()) <= m_options->max_reused_buffer_size; MaybeLockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); jobject jcompare_buf_a = GetBuffer(env, a, reuse_jbuf_a, m_tl_buf_a, m_jcompare_buf_a); if (jcompare_buf_a == nullptr) { // exception occurred MaybeUnlockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return 0; } jobject jcompare_buf_b = GetBuffer(env, b, reuse_jbuf_b, m_tl_buf_b, m_jcompare_buf_b); if (jcompare_buf_b == nullptr) { // exception occurred if (!reuse_jbuf_a) { DeleteBuffer(env, jcompare_buf_a); } MaybeUnlockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return 0; } jint result = env->CallStaticIntMethod( m_abstract_comparator_jni_bridge_clazz, m_jcompare_mid, m_jcallback_obj, jcompare_buf_a, reuse_jbuf_a ? a.size() : -1, jcompare_buf_b, reuse_jbuf_b ? b.size() : -1); if (env->ExceptionCheck()) { // exception thrown from CallIntMethod env->ExceptionDescribe(); // print out exception to stderr result = 0; // we could not get a result from java callback so use 0 } if (!reuse_jbuf_a) { DeleteBuffer(env, jcompare_buf_a); } if (!reuse_jbuf_b) { DeleteBuffer(env, jcompare_buf_b); } MaybeUnlockForReuse(mtx_compare, reuse_jbuf_a || reuse_jbuf_b); releaseJniEnv(attached_thread); return result; } void ComparatorJniCallback::FindShortestSeparator( std::string* start, const Slice& limit) const { if (start == nullptr) { return; } jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); const bool reuse_jbuf_start = static_cast(start->length()) <= m_options->max_reused_buffer_size; const bool reuse_jbuf_limit = static_cast(limit.size()) <= m_options->max_reused_buffer_size; MaybeLockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); Slice sstart(start->data(), start->length()); jobject j_start_buf = GetBuffer(env, sstart, reuse_jbuf_start, m_tl_buf_a, m_jshortest_buf_start); if (j_start_buf == nullptr) { // exception occurred MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } jobject j_limit_buf = GetBuffer(env, limit, reuse_jbuf_limit, m_tl_buf_b, m_jshortest_buf_limit); if (j_limit_buf == nullptr) { // exception occurred if (!reuse_jbuf_start) { DeleteBuffer(env, j_start_buf); } MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } jint jstart_len = env->CallStaticIntMethod( m_abstract_comparator_jni_bridge_clazz, m_jshortest_mid, m_jcallback_obj, j_start_buf, reuse_jbuf_start ? start->length() : -1, j_limit_buf, reuse_jbuf_limit ? limit.size() : -1); if (env->ExceptionCheck()) { // exception thrown from CallIntMethod env->ExceptionDescribe(); // print out exception to stderr } else if (static_cast(jstart_len) != start->length()) { // start buffer has changed in Java, so update `start` with the result bool copy_from_non_direct = false; if (reuse_jbuf_start) { // reused a buffer if (m_options->direct_buffer) { // reused direct buffer void* start_buf = env->GetDirectBufferAddress(j_start_buf); if (start_buf == nullptr) { if (!reuse_jbuf_start) { DeleteBuffer(env, j_start_buf); } if (!reuse_jbuf_limit) { DeleteBuffer(env, j_limit_buf); } MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Unable to get Direct Buffer Address"); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } start->assign(static_cast(start_buf), jstart_len); } else { // reused non-direct buffer copy_from_non_direct = true; } } else { // there was a new buffer if (m_options->direct_buffer) { // it was direct... don't forget to potentially truncate the `start` string start->resize(jstart_len); } else { // it was non-direct copy_from_non_direct = true; } } if (copy_from_non_direct) { jbyteArray jarray = ByteBufferJni::array(env, j_start_buf, m_jbytebuffer_clazz); if (jarray == nullptr) { if (!reuse_jbuf_start) { DeleteBuffer(env, j_start_buf); } if (!reuse_jbuf_limit) { DeleteBuffer(env, j_limit_buf); } MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } jboolean has_exception = JNI_FALSE; JniUtil::byteString(env, jarray, [start, jstart_len](const char* data, const size_t) { return start->assign(data, static_cast(jstart_len)); }, &has_exception); env->DeleteLocalRef(jarray); if (has_exception == JNI_TRUE) { if (!reuse_jbuf_start) { DeleteBuffer(env, j_start_buf); } if (!reuse_jbuf_limit) { DeleteBuffer(env, j_limit_buf); } env->ExceptionDescribe(); // print out exception to stderr MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); releaseJniEnv(attached_thread); return; } } } if (!reuse_jbuf_start) { DeleteBuffer(env, j_start_buf); } if (!reuse_jbuf_limit) { DeleteBuffer(env, j_limit_buf); } MaybeUnlockForReuse(mtx_shortest, reuse_jbuf_start || reuse_jbuf_limit); releaseJniEnv(attached_thread); } void ComparatorJniCallback::FindShortSuccessor( std::string* key) const { if (key == nullptr) { return; } jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); const bool reuse_jbuf_key = static_cast(key->length()) <= m_options->max_reused_buffer_size; MaybeLockForReuse(mtx_short, reuse_jbuf_key); Slice skey(key->data(), key->length()); jobject j_key_buf = GetBuffer(env, skey, reuse_jbuf_key, m_tl_buf_a, m_jshort_buf_key); if (j_key_buf == nullptr) { // exception occurred MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } jint jkey_len = env->CallStaticIntMethod( m_abstract_comparator_jni_bridge_clazz, m_jshort_mid, m_jcallback_obj, j_key_buf, reuse_jbuf_key ? key->length() : -1); if (env->ExceptionCheck()) { // exception thrown from CallObjectMethod if (!reuse_jbuf_key) { DeleteBuffer(env, j_key_buf); } MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } if (static_cast(jkey_len) != key->length()) { // key buffer has changed in Java, so update `key` with the result bool copy_from_non_direct = false; if (reuse_jbuf_key) { // reused a buffer if (m_options->direct_buffer) { // reused direct buffer void* key_buf = env->GetDirectBufferAddress(j_key_buf); if (key_buf == nullptr) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Unable to get Direct Buffer Address"); if (!reuse_jbuf_key) { DeleteBuffer(env, j_key_buf); } MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } key->assign(static_cast(key_buf), jkey_len); } else { // reused non-direct buffer copy_from_non_direct = true; } } else { // there was a new buffer if (m_options->direct_buffer) { // it was direct... don't forget to potentially truncate the `key` string key->resize(jkey_len); } else { // it was non-direct copy_from_non_direct = true; } } if (copy_from_non_direct) { jbyteArray jarray = ByteBufferJni::array(env, j_key_buf, m_jbytebuffer_clazz); if (jarray == nullptr) { if (!reuse_jbuf_key) { DeleteBuffer(env, j_key_buf); } MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } jboolean has_exception = JNI_FALSE; JniUtil::byteString(env, jarray, [key, jkey_len](const char* data, const size_t) { return key->assign(data, static_cast(jkey_len)); }, &has_exception); env->DeleteLocalRef(jarray); if (has_exception == JNI_TRUE) { if (!reuse_jbuf_key) { DeleteBuffer(env, j_key_buf); } MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } } } if (!reuse_jbuf_key) { DeleteBuffer(env, j_key_buf); } MaybeUnlockForReuse(mtx_short, reuse_jbuf_key); releaseJniEnv(attached_thread); } inline void ComparatorJniCallback::MaybeLockForReuse( const std::unique_ptr& mutex, const bool cond) const { // no need to lock if using thread_local if (m_options->reused_synchronisation_type != ReusedSynchronisationType::THREAD_LOCAL && cond) { mutex.get()->Lock(); } } inline void ComparatorJniCallback::MaybeUnlockForReuse( const std::unique_ptr& mutex, const bool cond) const { // no need to unlock if using thread_local if (m_options->reused_synchronisation_type != ReusedSynchronisationType::THREAD_LOCAL && cond) { mutex.get()->Unlock(); } } jobject ComparatorJniCallback::GetBuffer(JNIEnv* env, const Slice& src, bool reuse_buffer, ThreadLocalPtr* tl_buf, jobject jreuse_buffer) const { if (reuse_buffer) { if (m_options->reused_synchronisation_type == ReusedSynchronisationType::THREAD_LOCAL) { // reuse thread-local bufffer ThreadLocalBuf* tlb = reinterpret_cast(tl_buf->Get()); if (tlb == nullptr) { // thread-local buffer has not yet been created, so create it jobject jtl_buf = env->NewGlobalRef(ByteBufferJni::construct( env, m_options->direct_buffer, m_options->max_reused_buffer_size, m_jbytebuffer_clazz)); if (jtl_buf == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } tlb = new ThreadLocalBuf(m_jvm, m_options->direct_buffer, jtl_buf); tl_buf->Reset(tlb); } return ReuseBuffer(env, src, tlb->jbuf); } else { // reuse class member buffer return ReuseBuffer(env, src, jreuse_buffer); } } else { // new buffer return NewBuffer(env, src); } } jobject ComparatorJniCallback::ReuseBuffer( JNIEnv* env, const Slice& src, jobject jreuse_buffer) const { // we can reuse the buffer if (m_options->direct_buffer) { // copy into direct buffer void* buf = env->GetDirectBufferAddress(jreuse_buffer); if (buf == nullptr) { // either memory region is undefined, given object is not a direct java.nio.Buffer, or JNI access to direct buffers is not supported by this virtual machine. ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Unable to get Direct Buffer Address"); return nullptr; } memcpy(buf, src.data(), src.size()); } else { // copy into non-direct buffer const jbyteArray jarray = ByteBufferJni::array(env, jreuse_buffer, m_jbytebuffer_clazz); if (jarray == nullptr) { // exception occurred return nullptr; } env->SetByteArrayRegion(jarray, 0, static_cast(src.size()), const_cast(reinterpret_cast(src.data()))); if (env->ExceptionCheck()) { // exception occurred env->DeleteLocalRef(jarray); return nullptr; } env->DeleteLocalRef(jarray); } return jreuse_buffer; } jobject ComparatorJniCallback::NewBuffer(JNIEnv* env, const Slice& src) const { // we need a new buffer jobject jbuf = ByteBufferJni::constructWith(env, m_options->direct_buffer, src.data(), src.size(), m_jbytebuffer_clazz); if (jbuf == nullptr) { // exception occurred return nullptr; } return jbuf; } void ComparatorJniCallback::DeleteBuffer(JNIEnv* env, jobject jbuffer) const { env->DeleteLocalRef(jbuffer); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/comparatorjnicallback.h000066400000000000000000000124271370372246700225140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Comparator #ifndef JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ #define JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ #include #include #include #include "rocksjni/jnicallback.h" #include "rocksdb/comparator.h" #include "rocksdb/slice.h" #include "port/port.h" #include "util/thread_local.h" namespace ROCKSDB_NAMESPACE { enum ReusedSynchronisationType { /** * Standard mutex. */ MUTEX, /** * Use adaptive mutex, which spins in the user space before resorting * to kernel. This could reduce context switch when the mutex is not * heavily contended. However, if the mutex is hot, we could end up * wasting spin time. */ ADAPTIVE_MUTEX, /** * There is a reused buffer per-thread. */ THREAD_LOCAL }; struct ComparatorJniCallbackOptions { // Set the synchronisation type used to guard the reused buffers. // Only used if max_reused_buffer_size > 0. // Default: ADAPTIVE_MUTEX ReusedSynchronisationType reused_synchronisation_type = ReusedSynchronisationType::ADAPTIVE_MUTEX; // Indicates if a direct byte buffer (i.e. outside of the normal // garbage-collected heap) is used for the callbacks to Java, // as opposed to a non-direct byte buffer which is a wrapper around // an on-heap byte[]. // Default: true bool direct_buffer = true; // Maximum size of a buffer (in bytes) that will be reused. // Comparators will use 5 of these buffers, // so the retained memory size will be 5 * max_reused_buffer_size. // When a buffer is needed for transferring data to a callback, // if it requires less than max_reused_buffer_size, then an // existing buffer will be reused, else a new buffer will be // allocated just for that callback. -1 to disable. // Default: 64 bytes int32_t max_reused_buffer_size = 64; }; /** * This class acts as a bridge between C++ * and Java. The methods in this class will be * called back from the RocksDB storage engine (C++) * we then callback to the appropriate Java method * this enables Comparators to be implemented in Java. * * The design of this Comparator caches the Java Slice * objects that are used in the compare and findShortestSeparator * method callbacks. Instead of creating new objects for each callback * of those functions, by reuse via setHandle we are a lot * faster; Unfortunately this means that we have to * introduce independent locking in regions of each of those methods * via the mutexs mtx_compare and mtx_findShortestSeparator respectively */ class ComparatorJniCallback : public JniCallback, public Comparator { public: ComparatorJniCallback( JNIEnv* env, jobject jcomparator, const ComparatorJniCallbackOptions* options); ~ComparatorJniCallback(); virtual const char* Name() const; virtual int Compare(const Slice& a, const Slice& b) const; virtual void FindShortestSeparator( std::string* start, const Slice& limit) const; virtual void FindShortSuccessor(std::string* key) const; const ComparatorJniCallbackOptions* m_options; private: struct ThreadLocalBuf { ThreadLocalBuf(JavaVM* _jvm, bool _direct_buffer, jobject _jbuf) : jvm(_jvm), direct_buffer(_direct_buffer), jbuf(_jbuf) {} JavaVM* jvm; bool direct_buffer; jobject jbuf; }; inline void MaybeLockForReuse(const std::unique_ptr& mutex, const bool cond) const; inline void MaybeUnlockForReuse(const std::unique_ptr& mutex, const bool cond) const; jobject GetBuffer(JNIEnv* env, const Slice& src, bool reuse_buffer, ThreadLocalPtr* tl_buf, jobject jreuse_buffer) const; jobject ReuseBuffer(JNIEnv* env, const Slice& src, jobject jreuse_buffer) const; jobject NewBuffer(JNIEnv* env, const Slice& src) const; void DeleteBuffer(JNIEnv* env, jobject jbuffer) const; // used for synchronisation in compare method std::unique_ptr mtx_compare; // used for synchronisation in findShortestSeparator method std::unique_ptr mtx_shortest; // used for synchronisation in findShortSuccessor method std::unique_ptr mtx_short; std::unique_ptr m_name; jclass m_abstract_comparator_jni_bridge_clazz; // TODO(AR) could we make this static somehow? jclass m_jbytebuffer_clazz; // TODO(AR) we could cache this globally for the entire VM if we switch more APIs to use ByteBuffer // TODO(AR) could we make this static somehow? jmethodID m_jcompare_mid; // TODO(AR) could we make this static somehow? jmethodID m_jshortest_mid; // TODO(AR) could we make this static somehow? jmethodID m_jshort_mid; // TODO(AR) could we make this static somehow? jobject m_jcompare_buf_a; jobject m_jcompare_buf_b; jobject m_jshortest_buf_start; jobject m_jshortest_buf_limit; jobject m_jshort_buf_key; ThreadLocalPtr* m_tl_buf_a; ThreadLocalPtr* m_tl_buf_b; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_COMPARATORJNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/compression_options.cc000066400000000000000000000117211370372246700224350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompressionOptions. #include #include "include/org_rocksdb_CompressionOptions.h" #include "rocksdb/advanced_options.h" /* * Class: org_rocksdb_CompressionOptions * Method: newCompressionOptions * Signature: ()J */ jlong Java_org_rocksdb_CompressionOptions_newCompressionOptions( JNIEnv*, jclass) { const auto* opt = new ROCKSDB_NAMESPACE::CompressionOptions(); return reinterpret_cast(opt); } /* * Class: org_rocksdb_CompressionOptions * Method: setWindowBits * Signature: (JI)V */ void Java_org_rocksdb_CompressionOptions_setWindowBits( JNIEnv*, jobject, jlong jhandle, jint jwindow_bits) { auto* opt = reinterpret_cast(jhandle); opt->window_bits = static_cast(jwindow_bits); } /* * Class: org_rocksdb_CompressionOptions * Method: windowBits * Signature: (J)I */ jint Java_org_rocksdb_CompressionOptions_windowBits( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->window_bits); } /* * Class: org_rocksdb_CompressionOptions * Method: setLevel * Signature: (JI)V */ void Java_org_rocksdb_CompressionOptions_setLevel( JNIEnv*, jobject, jlong jhandle, jint jlevel) { auto* opt = reinterpret_cast(jhandle); opt->level = static_cast(jlevel); } /* * Class: org_rocksdb_CompressionOptions * Method: level * Signature: (J)I */ jint Java_org_rocksdb_CompressionOptions_level( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->level); } /* * Class: org_rocksdb_CompressionOptions * Method: setStrategy * Signature: (JI)V */ void Java_org_rocksdb_CompressionOptions_setStrategy( JNIEnv*, jobject, jlong jhandle, jint jstrategy) { auto* opt = reinterpret_cast(jhandle); opt->strategy = static_cast(jstrategy); } /* * Class: org_rocksdb_CompressionOptions * Method: strategy * Signature: (J)I */ jint Java_org_rocksdb_CompressionOptions_strategy( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->strategy); } /* * Class: org_rocksdb_CompressionOptions * Method: setMaxDictBytes * Signature: (JI)V */ void Java_org_rocksdb_CompressionOptions_setMaxDictBytes( JNIEnv*, jobject, jlong jhandle, jint jmax_dict_bytes) { auto* opt = reinterpret_cast(jhandle); opt->max_dict_bytes = static_cast(jmax_dict_bytes); } /* * Class: org_rocksdb_CompressionOptions * Method: maxDictBytes * Signature: (J)I */ jint Java_org_rocksdb_CompressionOptions_maxDictBytes( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_dict_bytes); } /* * Class: org_rocksdb_CompressionOptions * Method: setZstdMaxTrainBytes * Signature: (JI)V */ void Java_org_rocksdb_CompressionOptions_setZstdMaxTrainBytes( JNIEnv*, jobject, jlong jhandle, jint jzstd_max_train_bytes) { auto* opt = reinterpret_cast(jhandle); opt->zstd_max_train_bytes = static_cast(jzstd_max_train_bytes); } /* * Class: org_rocksdb_CompressionOptions * Method: zstdMaxTrainBytes * Signature: (J)I */ jint Java_org_rocksdb_CompressionOptions_zstdMaxTrainBytes( JNIEnv *, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->zstd_max_train_bytes); } /* * Class: org_rocksdb_CompressionOptions * Method: setEnabled * Signature: (JZ)V */ void Java_org_rocksdb_CompressionOptions_setEnabled( JNIEnv*, jobject, jlong jhandle, jboolean jenabled) { auto* opt = reinterpret_cast(jhandle); opt->enabled = jenabled == JNI_TRUE; } /* * Class: org_rocksdb_CompressionOptions * Method: enabled * Signature: (J)Z */ jboolean Java_org_rocksdb_CompressionOptions_enabled( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enabled); } /* * Class: org_rocksdb_CompressionOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_CompressionOptions_disposeInternal( JNIEnv*, jobject, jlong jhandle) { delete reinterpret_cast(jhandle); } rocksdb-6.11.4/java/rocksjni/config_options.cc000066400000000000000000000057751370372246700213550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::ConfigOptions methods // from Java side. #include #include "include/org_rocksdb_ConfigOptions.h" #include "rocksdb/convenience.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_ConfigOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_ConfigOptions_disposeInternal(JNIEnv *, jobject, jlong jhandle) { auto *co = reinterpret_cast(jhandle); assert(co != nullptr); delete co; } /* * Class: org_rocksdb_ConfigOptions * Method: newConfigOptions * Signature: ()J */ jlong Java_org_rocksdb_ConfigOptions_newConfigOptions(JNIEnv *, jclass) { auto *cfg_opt = new ROCKSDB_NAMESPACE::ConfigOptions(); return reinterpret_cast(cfg_opt); } /* * Class: org_rocksdb_ConfigOptions * Method: setDelimiter * Signature: (JLjava/lang/String;)V */ void Java_org_rocksdb_ConfigOptions_setDelimiter(JNIEnv *env, jclass, jlong handle, jstring s) { auto *cfg_opt = reinterpret_cast(handle); const char *delim = env->GetStringUTFChars(s, nullptr); if (delim == nullptr) { // exception thrown: OutOfMemoryError return; } cfg_opt->delimiter = delim; env->ReleaseStringUTFChars(s, delim); } /* * Class: org_rocksdb_ConfigOptions * Method: setIgnoreUnknownOptions * Signature: (JZ)V */ void Java_org_rocksdb_ConfigOptions_setIgnoreUnknownOptions(JNIEnv *, jclass, jlong handle, jboolean b) { auto *cfg_opt = reinterpret_cast(handle); cfg_opt->ignore_unknown_options = static_cast(b); } /* * Class: org_rocksdb_ConfigOptions * Method: setInputStringsEscaped * Signature: (JZ)V */ void Java_org_rocksdb_ConfigOptions_setInputStringsEscaped(JNIEnv *, jclass, jlong handle, jboolean b) { auto *cfg_opt = reinterpret_cast(handle); cfg_opt->input_strings_escaped = static_cast(b); } /* * Class: org_rocksdb_ConfigOptions * Method: setSanityLevel * Signature: (JI)V */ void Java_org_rocksdb_ConfigOptions_setSanityLevel(JNIEnv *, jclass, jlong handle, jbyte level) { auto *cfg_opt = reinterpret_cast(handle); cfg_opt->sanity_level = ROCKSDB_NAMESPACE::SanityLevelJni::toCppSanityLevel(level); } rocksdb-6.11.4/java/rocksjni/env.cc000066400000000000000000000160111370372246700171060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::Env methods from Java side. #include #include #include "portal.h" #include "rocksdb/env.h" #include "include/org_rocksdb_Env.h" #include "include/org_rocksdb_HdfsEnv.h" #include "include/org_rocksdb_RocksEnv.h" #include "include/org_rocksdb_RocksMemEnv.h" #include "include/org_rocksdb_TimedEnv.h" /* * Class: org_rocksdb_Env * Method: getDefaultEnvInternal * Signature: ()J */ jlong Java_org_rocksdb_Env_getDefaultEnvInternal( JNIEnv*, jclass) { return reinterpret_cast(ROCKSDB_NAMESPACE::Env::Default()); } /* * Class: org_rocksdb_RocksEnv * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_RocksEnv_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* e = reinterpret_cast(jhandle); assert(e != nullptr); delete e; } /* * Class: org_rocksdb_Env * Method: setBackgroundThreads * Signature: (JIB)V */ void Java_org_rocksdb_Env_setBackgroundThreads( JNIEnv*, jobject, jlong jhandle, jint jnum, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); rocks_env->SetBackgroundThreads( static_cast(jnum), ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); } /* * Class: org_rocksdb_Env * Method: getBackgroundThreads * Signature: (JB)I */ jint Java_org_rocksdb_Env_getBackgroundThreads( JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); const int num = rocks_env->GetBackgroundThreads( ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); return static_cast(num); } /* * Class: org_rocksdb_Env * Method: getThreadPoolQueueLen * Signature: (JB)I */ jint Java_org_rocksdb_Env_getThreadPoolQueueLen( JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); const int queue_len = rocks_env->GetThreadPoolQueueLen( ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); return static_cast(queue_len); } /* * Class: org_rocksdb_Env * Method: incBackgroundThreadsIfNeeded * Signature: (JIB)V */ void Java_org_rocksdb_Env_incBackgroundThreadsIfNeeded( JNIEnv*, jobject, jlong jhandle, jint jnum, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); rocks_env->IncBackgroundThreadsIfNeeded( static_cast(jnum), ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); } /* * Class: org_rocksdb_Env * Method: lowerThreadPoolIOPriority * Signature: (JB)V */ void Java_org_rocksdb_Env_lowerThreadPoolIOPriority( JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); rocks_env->LowerThreadPoolIOPriority( ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); } /* * Class: org_rocksdb_Env * Method: lowerThreadPoolCPUPriority * Signature: (JB)V */ void Java_org_rocksdb_Env_lowerThreadPoolCPUPriority( JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); rocks_env->LowerThreadPoolCPUPriority( ROCKSDB_NAMESPACE::PriorityJni::toCppPriority(jpriority_value)); } /* * Class: org_rocksdb_Env * Method: getThreadList * Signature: (J)[Lorg/rocksdb/ThreadStatus; */ jobjectArray Java_org_rocksdb_Env_getThreadList( JNIEnv* env, jobject, jlong jhandle) { auto* rocks_env = reinterpret_cast(jhandle); std::vector thread_status; ROCKSDB_NAMESPACE::Status s = rocks_env->GetThreadList(&thread_status); if (!s.ok()) { // error, throw exception ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } // object[] const jsize len = static_cast(thread_status.size()); jobjectArray jthread_status = env->NewObjectArray( len, ROCKSDB_NAMESPACE::ThreadStatusJni::getJClass(env), nullptr); if (jthread_status == nullptr) { // an exception occurred return nullptr; } for (jsize i = 0; i < len; ++i) { jobject jts = ROCKSDB_NAMESPACE::ThreadStatusJni::construct(env, &(thread_status[i])); env->SetObjectArrayElement(jthread_status, i, jts); if (env->ExceptionCheck()) { // exception occurred env->DeleteLocalRef(jthread_status); return nullptr; } } return jthread_status; } /* * Class: org_rocksdb_RocksMemEnv * Method: createMemEnv * Signature: (J)J */ jlong Java_org_rocksdb_RocksMemEnv_createMemEnv( JNIEnv*, jclass, jlong jbase_env_handle) { auto* base_env = reinterpret_cast(jbase_env_handle); return reinterpret_cast(ROCKSDB_NAMESPACE::NewMemEnv(base_env)); } /* * Class: org_rocksdb_RocksMemEnv * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_RocksMemEnv_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* e = reinterpret_cast(jhandle); assert(e != nullptr); delete e; } /* * Class: org_rocksdb_HdfsEnv * Method: createHdfsEnv * Signature: (Ljava/lang/String;)J */ jlong Java_org_rocksdb_HdfsEnv_createHdfsEnv( JNIEnv* env, jclass, jstring jfsname) { jboolean has_exception = JNI_FALSE; auto fsname = ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jfsname, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return 0; } ROCKSDB_NAMESPACE::Env* hdfs_env; ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::NewHdfsEnv(&hdfs_env, fsname); if (!s.ok()) { // error occurred ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } return reinterpret_cast(hdfs_env); } /* * Class: org_rocksdb_HdfsEnv * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_HdfsEnv_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* e = reinterpret_cast(jhandle); assert(e != nullptr); delete e; } /* * Class: org_rocksdb_TimedEnv * Method: createTimedEnv * Signature: (J)J */ jlong Java_org_rocksdb_TimedEnv_createTimedEnv( JNIEnv*, jclass, jlong jbase_env_handle) { auto* base_env = reinterpret_cast(jbase_env_handle); return reinterpret_cast(ROCKSDB_NAMESPACE::NewTimedEnv(base_env)); } /* * Class: org_rocksdb_TimedEnv * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_TimedEnv_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* e = reinterpret_cast(jhandle); assert(e != nullptr); delete e; } rocksdb-6.11.4/java/rocksjni/env_options.cc000066400000000000000000000202551370372246700206660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::EnvOptions methods // from Java side. #include #include "include/org_rocksdb_EnvOptions.h" #include "rocksdb/env.h" #define ENV_OPTIONS_SET_BOOL(_jhandle, _opt) \ reinterpret_cast(_jhandle)->_opt = \ static_cast(_opt) #define ENV_OPTIONS_SET_SIZE_T(_jhandle, _opt) \ reinterpret_cast(_jhandle)->_opt = \ static_cast(_opt) #define ENV_OPTIONS_SET_UINT64_T(_jhandle, _opt) \ reinterpret_cast(_jhandle)->_opt = \ static_cast(_opt) #define ENV_OPTIONS_GET(_jhandle, _opt) \ reinterpret_cast(_jhandle)->_opt /* * Class: org_rocksdb_EnvOptions * Method: newEnvOptions * Signature: ()J */ jlong Java_org_rocksdb_EnvOptions_newEnvOptions__( JNIEnv*, jclass) { auto *env_opt = new ROCKSDB_NAMESPACE::EnvOptions(); return reinterpret_cast(env_opt); } /* * Class: org_rocksdb_EnvOptions * Method: newEnvOptions * Signature: (J)J */ jlong Java_org_rocksdb_EnvOptions_newEnvOptions__J( JNIEnv*, jclass, jlong jdboptions_handle) { auto *db_options = reinterpret_cast(jdboptions_handle); auto *env_opt = new ROCKSDB_NAMESPACE::EnvOptions(*db_options); return reinterpret_cast(env_opt); } /* * Class: org_rocksdb_EnvOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_EnvOptions_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto *eo = reinterpret_cast(jhandle); assert(eo != nullptr); delete eo; } /* * Class: org_rocksdb_EnvOptions * Method: setUseMmapReads * Signature: (JZ)V */ void Java_org_rocksdb_EnvOptions_setUseMmapReads( JNIEnv*, jobject, jlong jhandle, jboolean use_mmap_reads) { ENV_OPTIONS_SET_BOOL(jhandle, use_mmap_reads); } /* * Class: org_rocksdb_EnvOptions * Method: useMmapReads * Signature: (J)Z */ jboolean Java_org_rocksdb_EnvOptions_useMmapReads( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_mmap_reads); } /* * Class: org_rocksdb_EnvOptions * Method: setUseMmapWrites * Signature: (JZ)V */ void Java_org_rocksdb_EnvOptions_setUseMmapWrites( JNIEnv*, jobject, jlong jhandle, jboolean use_mmap_writes) { ENV_OPTIONS_SET_BOOL(jhandle, use_mmap_writes); } /* * Class: org_rocksdb_EnvOptions * Method: useMmapWrites * Signature: (J)Z */ jboolean Java_org_rocksdb_EnvOptions_useMmapWrites( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_mmap_writes); } /* * Class: org_rocksdb_EnvOptions * Method: setUseDirectReads * Signature: (JZ)V */ void Java_org_rocksdb_EnvOptions_setUseDirectReads( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_reads) { ENV_OPTIONS_SET_BOOL(jhandle, use_direct_reads); } /* * Class: org_rocksdb_EnvOptions * Method: useDirectReads * Signature: (J)Z */ jboolean Java_org_rocksdb_EnvOptions_useDirectReads( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_direct_reads); } /* * Class: org_rocksdb_EnvOptions * Method: setUseDirectWrites * Signature: (JZ)V */ void Java_org_rocksdb_EnvOptions_setUseDirectWrites( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_writes) { ENV_OPTIONS_SET_BOOL(jhandle, use_direct_writes); } /* * Class: org_rocksdb_EnvOptions * Method: useDirectWrites * Signature: (J)Z */ jboolean Java_org_rocksdb_EnvOptions_useDirectWrites( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_direct_writes); } /* * Class: org_rocksdb_EnvOptions * Method: setAllowFallocate * Signature: (JZ)V */ void Java_org_rocksdb_EnvOptions_setAllowFallocate( JNIEnv*, jobject, jlong jhandle, jboolean allow_fallocate) { ENV_OPTIONS_SET_BOOL(jhandle, allow_fallocate); } /* * Class: org_rocksdb_EnvOptions * Method: allowFallocate * Signature: (J)Z */ jboolean Java_org_rocksdb_EnvOptions_allowFallocate( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, allow_fallocate); } /* * Class: org_rocksdb_EnvOptions * Method: setSetFdCloexec * Signature: (JZ)V */ void Java_org_rocksdb_EnvOptions_setSetFdCloexec( JNIEnv*, jobject, jlong jhandle, jboolean set_fd_cloexec) { ENV_OPTIONS_SET_BOOL(jhandle, set_fd_cloexec); } /* * Class: org_rocksdb_EnvOptions * Method: setFdCloexec * Signature: (J)Z */ jboolean Java_org_rocksdb_EnvOptions_setFdCloexec( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, set_fd_cloexec); } /* * Class: org_rocksdb_EnvOptions * Method: setBytesPerSync * Signature: (JJ)V */ void Java_org_rocksdb_EnvOptions_setBytesPerSync( JNIEnv*, jobject, jlong jhandle, jlong bytes_per_sync) { ENV_OPTIONS_SET_UINT64_T(jhandle, bytes_per_sync); } /* * Class: org_rocksdb_EnvOptions * Method: bytesPerSync * Signature: (J)J */ jlong Java_org_rocksdb_EnvOptions_bytesPerSync( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, bytes_per_sync); } /* * Class: org_rocksdb_EnvOptions * Method: setFallocateWithKeepSize * Signature: (JZ)V */ void Java_org_rocksdb_EnvOptions_setFallocateWithKeepSize( JNIEnv*, jobject, jlong jhandle, jboolean fallocate_with_keep_size) { ENV_OPTIONS_SET_BOOL(jhandle, fallocate_with_keep_size); } /* * Class: org_rocksdb_EnvOptions * Method: fallocateWithKeepSize * Signature: (J)Z */ jboolean Java_org_rocksdb_EnvOptions_fallocateWithKeepSize( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, fallocate_with_keep_size); } /* * Class: org_rocksdb_EnvOptions * Method: setCompactionReadaheadSize * Signature: (JJ)V */ void Java_org_rocksdb_EnvOptions_setCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong compaction_readahead_size) { ENV_OPTIONS_SET_SIZE_T(jhandle, compaction_readahead_size); } /* * Class: org_rocksdb_EnvOptions * Method: compactionReadaheadSize * Signature: (J)J */ jlong Java_org_rocksdb_EnvOptions_compactionReadaheadSize( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, compaction_readahead_size); } /* * Class: org_rocksdb_EnvOptions * Method: setRandomAccessMaxBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_EnvOptions_setRandomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong random_access_max_buffer_size) { ENV_OPTIONS_SET_SIZE_T(jhandle, random_access_max_buffer_size); } /* * Class: org_rocksdb_EnvOptions * Method: randomAccessMaxBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_EnvOptions_randomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, random_access_max_buffer_size); } /* * Class: org_rocksdb_EnvOptions * Method: setWritableFileMaxBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_EnvOptions_setWritableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong writable_file_max_buffer_size) { ENV_OPTIONS_SET_SIZE_T(jhandle, writable_file_max_buffer_size); } /* * Class: org_rocksdb_EnvOptions * Method: writableFileMaxBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_EnvOptions_writableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, writable_file_max_buffer_size); } /* * Class: org_rocksdb_EnvOptions * Method: setRateLimiter * Signature: (JJ)V */ void Java_org_rocksdb_EnvOptions_setRateLimiter( JNIEnv*, jobject, jlong jhandle, jlong rl_handle) { auto *sptr_rate_limiter = reinterpret_cast *>( rl_handle); auto *env_opt = reinterpret_cast(jhandle); env_opt->rate_limiter = sptr_rate_limiter->get(); } rocksdb-6.11.4/java/rocksjni/filter.cc000066400000000000000000000027521370372246700176120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::FilterPolicy. #include #include #include #include #include "include/org_rocksdb_BloomFilter.h" #include "include/org_rocksdb_Filter.h" #include "rocksdb/filter_policy.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_BloomFilter * Method: createBloomFilter * Signature: (DZ)J */ jlong Java_org_rocksdb_BloomFilter_createNewBloomFilter( JNIEnv* /*env*/, jclass /*jcls*/, jdouble bits_per_key, jboolean use_block_base_builder) { auto* sptr_filter = new std::shared_ptr( ROCKSDB_NAMESPACE::NewBloomFilterPolicy(bits_per_key, use_block_base_builder)); return reinterpret_cast(sptr_filter); } /* * Class: org_rocksdb_Filter * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_Filter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = reinterpret_cast*>( jhandle); delete handle; // delete std::shared_ptr } rocksdb-6.11.4/java/rocksjni/ingest_external_file_options.cc000066400000000000000000000150001370372246700242600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::FilterPolicy. #include #include "include/org_rocksdb_IngestExternalFileOptions.h" #include "rocksdb/options.h" /* * Class: org_rocksdb_IngestExternalFileOptions * Method: newIngestExternalFileOptions * Signature: ()J */ jlong Java_org_rocksdb_IngestExternalFileOptions_newIngestExternalFileOptions__( JNIEnv*, jclass) { auto* options = new ROCKSDB_NAMESPACE::IngestExternalFileOptions(); return reinterpret_cast(options); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: newIngestExternalFileOptions * Signature: (ZZZZ)J */ jlong Java_org_rocksdb_IngestExternalFileOptions_newIngestExternalFileOptions__ZZZZ( JNIEnv*, jclass, jboolean jmove_files, jboolean jsnapshot_consistency, jboolean jallow_global_seqno, jboolean jallow_blocking_flush) { auto* options = new ROCKSDB_NAMESPACE::IngestExternalFileOptions(); options->move_files = static_cast(jmove_files); options->snapshot_consistency = static_cast(jsnapshot_consistency); options->allow_global_seqno = static_cast(jallow_global_seqno); options->allow_blocking_flush = static_cast(jallow_blocking_flush); return reinterpret_cast(options); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: moveFiles * Signature: (J)Z */ jboolean Java_org_rocksdb_IngestExternalFileOptions_moveFiles( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->move_files); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: setMoveFiles * Signature: (JZ)V */ void Java_org_rocksdb_IngestExternalFileOptions_setMoveFiles( JNIEnv*, jobject, jlong jhandle, jboolean jmove_files) { auto* options = reinterpret_cast(jhandle); options->move_files = static_cast(jmove_files); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: snapshotConsistency * Signature: (J)Z */ jboolean Java_org_rocksdb_IngestExternalFileOptions_snapshotConsistency( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->snapshot_consistency); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: setSnapshotConsistency * Signature: (JZ)V */ void Java_org_rocksdb_IngestExternalFileOptions_setSnapshotConsistency( JNIEnv*, jobject, jlong jhandle, jboolean jsnapshot_consistency) { auto* options = reinterpret_cast(jhandle); options->snapshot_consistency = static_cast(jsnapshot_consistency); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: allowGlobalSeqNo * Signature: (J)Z */ jboolean Java_org_rocksdb_IngestExternalFileOptions_allowGlobalSeqNo( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->allow_global_seqno); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: setAllowGlobalSeqNo * Signature: (JZ)V */ void Java_org_rocksdb_IngestExternalFileOptions_setAllowGlobalSeqNo( JNIEnv*, jobject, jlong jhandle, jboolean jallow_global_seqno) { auto* options = reinterpret_cast(jhandle); options->allow_global_seqno = static_cast(jallow_global_seqno); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: allowBlockingFlush * Signature: (J)Z */ jboolean Java_org_rocksdb_IngestExternalFileOptions_allowBlockingFlush( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return static_cast(options->allow_blocking_flush); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: setAllowBlockingFlush * Signature: (JZ)V */ void Java_org_rocksdb_IngestExternalFileOptions_setAllowBlockingFlush( JNIEnv*, jobject, jlong jhandle, jboolean jallow_blocking_flush) { auto* options = reinterpret_cast(jhandle); options->allow_blocking_flush = static_cast(jallow_blocking_flush); } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: ingestBehind * Signature: (J)Z */ jboolean Java_org_rocksdb_IngestExternalFileOptions_ingestBehind( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return options->ingest_behind == JNI_TRUE; } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: setIngestBehind * Signature: (JZ)V */ void Java_org_rocksdb_IngestExternalFileOptions_setIngestBehind( JNIEnv*, jobject, jlong jhandle, jboolean jingest_behind) { auto* options = reinterpret_cast(jhandle); options->ingest_behind = jingest_behind == JNI_TRUE; } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: writeGlobalSeqno * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_IngestExternalFileOptions_writeGlobalSeqno( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return options->write_global_seqno == JNI_TRUE; } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: setWriteGlobalSeqno * Signature: (JZ)V */ JNIEXPORT void JNICALL Java_org_rocksdb_IngestExternalFileOptions_setWriteGlobalSeqno( JNIEnv*, jobject, jlong jhandle, jboolean jwrite_global_seqno) { auto* options = reinterpret_cast(jhandle); options->write_global_seqno = jwrite_global_seqno == JNI_TRUE; } /* * Class: org_rocksdb_IngestExternalFileOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_IngestExternalFileOptions_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); delete options; } rocksdb-6.11.4/java/rocksjni/iterator.cc000066400000000000000000000217551370372246700201620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::Iterator methods from Java side. #include #include #include #include #include "include/org_rocksdb_RocksIterator.h" #include "rocksdb/iterator.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_RocksIterator * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_RocksIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); assert(it != nullptr); delete it; } /* * Class: org_rocksdb_RocksIterator * Method: isValid0 * Signature: (J)Z */ jboolean Java_org_rocksdb_RocksIterator_isValid0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle)->Valid(); } /* * Class: org_rocksdb_RocksIterator * Method: seekToFirst0 * Signature: (J)V */ void Java_org_rocksdb_RocksIterator_seekToFirst0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToFirst(); } /* * Class: org_rocksdb_RocksIterator * Method: seekToLast0 * Signature: (J)V */ void Java_org_rocksdb_RocksIterator_seekToLast0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToLast(); } /* * Class: org_rocksdb_RocksIterator * Method: next0 * Signature: (J)V */ void Java_org_rocksdb_RocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* * Class: org_rocksdb_RocksIterator * Method: prev0 * Signature: (J)V */ void Java_org_rocksdb_RocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Prev(); } /* * Class: org_rocksdb_RocksIterator * Method: refresh0 * Signature: (J)V */ void Java_org_rocksdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->Refresh(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_RocksIterator * Method: seek0 * Signature: (J[BI)V */ void Java_org_rocksdb_RocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { jbyte* target = env->GetByteArrayElements(jtarget, nullptr); if (target == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), jtarget_len); auto* it = reinterpret_cast(handle); it->Seek(target_slice); env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); } /* * Class: org_rocksdb_RocksIterator * Method: seekDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ void Java_org_rocksdb_RocksIterator_seekDirect0(JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { it->Seek(target_slice); }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek, env, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_RocksIterator * Method: seekForPrevDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ void Java_org_rocksdb_RocksIterator_seekForPrevDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); auto seekPrev = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { it->SeekForPrev(target_slice); }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seekPrev, env, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_RocksIterator * Method: seekForPrev0 * Signature: (J[BI)V */ void Java_org_rocksdb_RocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { jbyte* target = env->GetByteArrayElements(jtarget, nullptr); if (target == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), jtarget_len); auto* it = reinterpret_cast(handle); it->SeekForPrev(target_slice); env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); } /* * Class: org_rocksdb_RocksIterator * Method: status0 * Signature: (J)V */ void Java_org_rocksdb_RocksIterator_status0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->status(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_RocksIterator * Method: key0 * Signature: (J)[B */ jbyteArray Java_org_rocksdb_RocksIterator_key0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice key_slice = it->key(); jbyteArray jkey = env->NewByteArray(static_cast(key_slice.size())); if (jkey == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion( jkey, 0, static_cast(key_slice.size()), const_cast(reinterpret_cast(key_slice.data()))); return jkey; } /* * Class: org_rocksdb_RocksIterator * Method: keyDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ jint Java_org_rocksdb_RocksIterator_keyDirect0(JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice key_slice = it->key(); return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, key_slice, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_RocksIterator * Method: value0 * Signature: (J)[B */ jbyteArray Java_org_rocksdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice value_slice = it->value(); jbyteArray jkeyValue = env->NewByteArray(static_cast(value_slice.size())); if (jkeyValue == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion( jkeyValue, 0, static_cast(value_slice.size()), const_cast(reinterpret_cast(value_slice.data()))); return jkeyValue; } /* * Class: org_rocksdb_RocksIterator * Method: valueDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ jint Java_org_rocksdb_RocksIterator_valueDirect0(JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice value_slice = it->value(); return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, value_slice, jtarget, jtarget_off, jtarget_len); } rocksdb-6.11.4/java/rocksjni/jnicallback.cc000066400000000000000000000031151370372246700205540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject #include #include "rocksjni/jnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { JniCallback::JniCallback(JNIEnv* env, jobject jcallback_obj) { // Note: jcallback_obj may be accessed by multiple threads, // so we ref the jvm not the env const jint rs = env->GetJavaVM(&m_jvm); if(rs != JNI_OK) { // exception thrown return; } // Note: we may want to access the Java callback object instance // across multiple method calls, so we create a global ref assert(jcallback_obj != nullptr); m_jcallback_obj = env->NewGlobalRef(jcallback_obj); if(jcallback_obj == nullptr) { // exception thrown: OutOfMemoryError return; } } JNIEnv* JniCallback::getJniEnv(jboolean* attached) const { return JniUtil::getJniEnv(m_jvm, attached); } void JniCallback::releaseJniEnv(jboolean& attached) const { JniUtil::releaseJniEnv(m_jvm, attached); } JniCallback::~JniCallback() { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); if (m_jcallback_obj != nullptr) { env->DeleteGlobalRef(m_jcallback_obj); } releaseJniEnv(attached_thread); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/jnicallback.h000066400000000000000000000016241370372246700204210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject #ifndef JAVA_ROCKSJNI_JNICALLBACK_H_ #define JAVA_ROCKSJNI_JNICALLBACK_H_ #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { class JniCallback { public: JniCallback(JNIEnv* env, jobject jcallback_obj); virtual ~JniCallback(); protected: JavaVM* m_jvm; jobject m_jcallback_obj; JNIEnv* getJniEnv(jboolean* attached) const; void releaseJniEnv(jboolean& attached) const; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_JNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/loggerjnicallback.cc000066400000000000000000000214351370372246700217610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Logger. #include "include/org_rocksdb_Logger.h" #include #include #include "rocksjni/loggerjnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { LoggerJniCallback::LoggerJniCallback(JNIEnv* env, jobject jlogger) : JniCallback(env, jlogger) { m_jLogMethodId = LoggerJni::getLogMethodId(env); if (m_jLogMethodId == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } jobject jdebug_level = InfoLogLevelJni::DEBUG_LEVEL(env); if (jdebug_level == nullptr) { // exception thrown: NoSuchFieldError, ExceptionInInitializerError // or OutOfMemoryError return; } m_jdebug_level = env->NewGlobalRef(jdebug_level); if (m_jdebug_level == nullptr) { // exception thrown: OutOfMemoryError return; } jobject jinfo_level = InfoLogLevelJni::INFO_LEVEL(env); if (jinfo_level == nullptr) { // exception thrown: NoSuchFieldError, ExceptionInInitializerError // or OutOfMemoryError return; } m_jinfo_level = env->NewGlobalRef(jinfo_level); if (m_jinfo_level == nullptr) { // exception thrown: OutOfMemoryError return; } jobject jwarn_level = InfoLogLevelJni::WARN_LEVEL(env); if (jwarn_level == nullptr) { // exception thrown: NoSuchFieldError, ExceptionInInitializerError // or OutOfMemoryError return; } m_jwarn_level = env->NewGlobalRef(jwarn_level); if (m_jwarn_level == nullptr) { // exception thrown: OutOfMemoryError return; } jobject jerror_level = InfoLogLevelJni::ERROR_LEVEL(env); if (jerror_level == nullptr) { // exception thrown: NoSuchFieldError, ExceptionInInitializerError // or OutOfMemoryError return; } m_jerror_level = env->NewGlobalRef(jerror_level); if (m_jerror_level == nullptr) { // exception thrown: OutOfMemoryError return; } jobject jfatal_level = InfoLogLevelJni::FATAL_LEVEL(env); if (jfatal_level == nullptr) { // exception thrown: NoSuchFieldError, ExceptionInInitializerError // or OutOfMemoryError return; } m_jfatal_level = env->NewGlobalRef(jfatal_level); if (m_jfatal_level == nullptr) { // exception thrown: OutOfMemoryError return; } jobject jheader_level = InfoLogLevelJni::HEADER_LEVEL(env); if (jheader_level == nullptr) { // exception thrown: NoSuchFieldError, ExceptionInInitializerError // or OutOfMemoryError return; } m_jheader_level = env->NewGlobalRef(jheader_level); if (m_jheader_level == nullptr) { // exception thrown: OutOfMemoryError return; } } void LoggerJniCallback::Logv(const char* /*format*/, va_list /*ap*/) { // We implement this method because it is virtual but we don't // use it because we need to know about the log level. } void LoggerJniCallback::Logv(const InfoLogLevel log_level, const char* format, va_list ap) { if (GetInfoLogLevel() <= log_level) { // determine InfoLogLevel java enum instance jobject jlog_level; switch (log_level) { case ROCKSDB_NAMESPACE::InfoLogLevel::DEBUG_LEVEL: jlog_level = m_jdebug_level; break; case ROCKSDB_NAMESPACE::InfoLogLevel::INFO_LEVEL: jlog_level = m_jinfo_level; break; case ROCKSDB_NAMESPACE::InfoLogLevel::WARN_LEVEL: jlog_level = m_jwarn_level; break; case ROCKSDB_NAMESPACE::InfoLogLevel::ERROR_LEVEL: jlog_level = m_jerror_level; break; case ROCKSDB_NAMESPACE::InfoLogLevel::FATAL_LEVEL: jlog_level = m_jfatal_level; break; case ROCKSDB_NAMESPACE::InfoLogLevel::HEADER_LEVEL: jlog_level = m_jheader_level; break; default: jlog_level = m_jfatal_level; break; } assert(format != nullptr); const std::unique_ptr msg = format_str(format, ap); // pass msg to java callback handler jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); jstring jmsg = env->NewStringUTF(msg.get()); if (jmsg == nullptr) { // unable to construct string if (env->ExceptionCheck()) { env->ExceptionDescribe(); // print out exception to stderr } releaseJniEnv(attached_thread); return; } if (env->ExceptionCheck()) { // exception thrown: OutOfMemoryError env->ExceptionDescribe(); // print out exception to stderr env->DeleteLocalRef(jmsg); releaseJniEnv(attached_thread); return; } env->CallVoidMethod(m_jcallback_obj, m_jLogMethodId, jlog_level, jmsg); if (env->ExceptionCheck()) { // exception thrown env->ExceptionDescribe(); // print out exception to stderr env->DeleteLocalRef(jmsg); releaseJniEnv(attached_thread); return; } env->DeleteLocalRef(jmsg); releaseJniEnv(attached_thread); } } std::unique_ptr LoggerJniCallback::format_str(const char* format, va_list ap) const { va_list ap_copy; va_copy(ap_copy, ap); const size_t required = vsnprintf(nullptr, 0, format, ap_copy) + 1; // Extra space for '\0' va_end(ap_copy); std::unique_ptr buf(new char[required]); va_copy(ap_copy, ap); vsnprintf(buf.get(), required, format, ap_copy); va_end(ap_copy); return buf; } LoggerJniCallback::~LoggerJniCallback() { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); if (m_jdebug_level != nullptr) { env->DeleteGlobalRef(m_jdebug_level); } if (m_jinfo_level != nullptr) { env->DeleteGlobalRef(m_jinfo_level); } if (m_jwarn_level != nullptr) { env->DeleteGlobalRef(m_jwarn_level); } if (m_jerror_level != nullptr) { env->DeleteGlobalRef(m_jerror_level); } if (m_jfatal_level != nullptr) { env->DeleteGlobalRef(m_jfatal_level); } if (m_jheader_level != nullptr) { env->DeleteGlobalRef(m_jheader_level); } releaseJniEnv(attached_thread); } } // namespace ROCKSDB_NAMESPACE /* * Class: org_rocksdb_Logger * Method: createNewLoggerOptions * Signature: (J)J */ jlong Java_org_rocksdb_Logger_createNewLoggerOptions(JNIEnv* env, jobject jobj, jlong joptions) { auto* sptr_logger = new std::shared_ptr( new ROCKSDB_NAMESPACE::LoggerJniCallback(env, jobj)); // set log level auto* options = reinterpret_cast(joptions); sptr_logger->get()->SetInfoLogLevel(options->info_log_level); return reinterpret_cast(sptr_logger); } /* * Class: org_rocksdb_Logger * Method: createNewLoggerDbOptions * Signature: (J)J */ jlong Java_org_rocksdb_Logger_createNewLoggerDbOptions(JNIEnv* env, jobject jobj, jlong jdb_options) { auto* sptr_logger = new std::shared_ptr( new ROCKSDB_NAMESPACE::LoggerJniCallback(env, jobj)); // set log level auto* db_options = reinterpret_cast(jdb_options); sptr_logger->get()->SetInfoLogLevel(db_options->info_log_level); return reinterpret_cast(sptr_logger); } /* * Class: org_rocksdb_Logger * Method: setInfoLogLevel * Signature: (JB)V */ void Java_org_rocksdb_Logger_setInfoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jbyte jlog_level) { auto* handle = reinterpret_cast*>( jhandle); handle->get()->SetInfoLogLevel( static_cast(jlog_level)); } /* * Class: org_rocksdb_Logger * Method: infoLogLevel * Signature: (J)B */ jbyte Java_org_rocksdb_Logger_infoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = reinterpret_cast*>( jhandle); return static_cast(handle->get()->GetInfoLogLevel()); } /* * Class: org_rocksdb_Logger * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_Logger_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = reinterpret_cast*>( jhandle); delete handle; // delete std::shared_ptr } rocksdb-6.11.4/java/rocksjni/loggerjnicallback.h000066400000000000000000000031301370372246700216130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Logger #ifndef JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_ #define JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_ #include #include #include #include "rocksjni/jnicallback.h" #include "port/port.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { class LoggerJniCallback : public JniCallback, public Logger { public: LoggerJniCallback(JNIEnv* env, jobject jLogger); ~LoggerJniCallback(); using Logger::SetInfoLogLevel; using Logger::GetInfoLogLevel; // Write an entry to the log file with the specified format. virtual void Logv(const char* format, va_list ap); // Write an entry to the log file with the specified log level // and format. Any log with level under the internal log level // of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be // printed. virtual void Logv(const InfoLogLevel log_level, const char* format, va_list ap); private: jmethodID m_jLogMethodId; jobject m_jdebug_level; jobject m_jinfo_level; jobject m_jwarn_level; jobject m_jerror_level; jobject m_jfatal_level; jobject m_jheader_level; std::unique_ptr format_str(const char* format, va_list ap) const; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_LOGGERJNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/lru_cache.cc000066400000000000000000000032251370372246700202460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::LRUCache. #include #include "cache/lru_cache.h" #include "include/org_rocksdb_LRUCache.h" /* * Class: org_rocksdb_LRUCache * Method: newLRUCache * Signature: (JIZD)J */ jlong Java_org_rocksdb_LRUCache_newLRUCache(JNIEnv* /*env*/, jclass /*jcls*/, jlong jcapacity, jint jnum_shard_bits, jboolean jstrict_capacity_limit, jdouble jhigh_pri_pool_ratio) { auto* sptr_lru_cache = new std::shared_ptr( ROCKSDB_NAMESPACE::NewLRUCache( static_cast(jcapacity), static_cast(jnum_shard_bits), static_cast(jstrict_capacity_limit), static_cast(jhigh_pri_pool_ratio))); return reinterpret_cast(sptr_lru_cache); } /* * Class: org_rocksdb_LRUCache * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_LRUCache_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_lru_cache = reinterpret_cast*>(jhandle); delete sptr_lru_cache; // delete std::shared_ptr } rocksdb-6.11.4/java/rocksjni/memory_util.cc000066400000000000000000000076161370372246700206760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include #include "include/org_rocksdb_MemoryUtil.h" #include "rocksjni/portal.h" #include "rocksdb/utilities/memory_util.h" /* * Class: org_rocksdb_MemoryUtil * Method: getApproximateMemoryUsageByType * Signature: ([J[J)Ljava/util/Map; */ jobject Java_org_rocksdb_MemoryUtil_getApproximateMemoryUsageByType( JNIEnv *env, jclass /*jclazz*/, jlongArray jdb_handles, jlongArray jcache_handles) { std::vector dbs; jsize db_handle_count = env->GetArrayLength(jdb_handles); if(db_handle_count > 0) { jlong *ptr_jdb_handles = env->GetLongArrayElements(jdb_handles, nullptr); if (ptr_jdb_handles == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (jsize i = 0; i < db_handle_count; i++) { dbs.push_back( reinterpret_cast(ptr_jdb_handles[i])); } env->ReleaseLongArrayElements(jdb_handles, ptr_jdb_handles, JNI_ABORT); } std::unordered_set cache_set; jsize cache_handle_count = env->GetArrayLength(jcache_handles); if(cache_handle_count > 0) { jlong *ptr_jcache_handles = env->GetLongArrayElements(jcache_handles, nullptr); if (ptr_jcache_handles == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (jsize i = 0; i < cache_handle_count; i++) { auto *cache_ptr = reinterpret_cast *>( ptr_jcache_handles[i]); cache_set.insert(cache_ptr->get()); } env->ReleaseLongArrayElements(jcache_handles, ptr_jcache_handles, JNI_ABORT); } std::map usage_by_type; if (ROCKSDB_NAMESPACE::MemoryUtil::GetApproximateMemoryUsageByType( dbs, cache_set, &usage_by_type) != ROCKSDB_NAMESPACE::Status::OK()) { // Non-OK status return nullptr; } jobject jusage_by_type = ROCKSDB_NAMESPACE::HashMapJni::construct( env, static_cast(usage_by_type.size())); if (jusage_by_type == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< const ROCKSDB_NAMESPACE::MemoryUtil::UsageType, const uint64_t, jobject, jobject> fn_map_kv = [env]( const std::pair &pair) { // Construct key const jobject jusage_type = ROCKSDB_NAMESPACE::ByteJni::valueOf( env, ROCKSDB_NAMESPACE::MemoryUsageTypeJni::toJavaMemoryUsageType( pair.first)); if (jusage_type == nullptr) { // an error occurred return std::unique_ptr>(nullptr); } // Construct value const jobject jusage_value = ROCKSDB_NAMESPACE::LongJni::valueOf(env, pair.second); if (jusage_value == nullptr) { // an error occurred return std::unique_ptr>(nullptr); } // Construct and return pointer to pair of jobjects return std::unique_ptr>( new std::pair(jusage_type, jusage_value)); }; if (!ROCKSDB_NAMESPACE::HashMapJni::putAll(env, jusage_by_type, usage_by_type.begin(), usage_by_type.end(), fn_map_kv)) { // exception occcurred jusage_by_type = nullptr; } return jusage_by_type; } rocksdb-6.11.4/java/rocksjni/memtablejni.cc000066400000000000000000000072311370372246700206110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for MemTables. #include "include/org_rocksdb_HashLinkedListMemTableConfig.h" #include "include/org_rocksdb_HashSkipListMemTableConfig.h" #include "include/org_rocksdb_SkipListMemTableConfig.h" #include "include/org_rocksdb_VectorMemTableConfig.h" #include "rocksdb/memtablerep.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_HashSkipListMemTableConfig * Method: newMemTableFactoryHandle * Signature: (JII)J */ jlong Java_org_rocksdb_HashSkipListMemTableConfig_newMemTableFactoryHandle( JNIEnv* env, jobject /*jobj*/, jlong jbucket_count, jint jheight, jint jbranching_factor) { ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jbucket_count); if (s.ok()) { return reinterpret_cast(ROCKSDB_NAMESPACE::NewHashSkipListRepFactory( static_cast(jbucket_count), static_cast(jheight), static_cast(jbranching_factor))); } ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); return 0; } /* * Class: org_rocksdb_HashLinkedListMemTableConfig * Method: newMemTableFactoryHandle * Signature: (JJIZI)J */ jlong Java_org_rocksdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle( JNIEnv* env, jobject /*jobj*/, jlong jbucket_count, jlong jhuge_page_tlb_size, jint jbucket_entries_logging_threshold, jboolean jif_log_bucket_dist_when_flash, jint jthreshold_use_skiplist) { ROCKSDB_NAMESPACE::Status statusBucketCount = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jbucket_count); ROCKSDB_NAMESPACE::Status statusHugePageTlb = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jhuge_page_tlb_size); if (statusBucketCount.ok() && statusHugePageTlb.ok()) { return reinterpret_cast(ROCKSDB_NAMESPACE::NewHashLinkListRepFactory( static_cast(jbucket_count), static_cast(jhuge_page_tlb_size), static_cast(jbucket_entries_logging_threshold), static_cast(jif_log_bucket_dist_when_flash), static_cast(jthreshold_use_skiplist))); } ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( env, !statusBucketCount.ok() ? statusBucketCount : statusHugePageTlb); return 0; } /* * Class: org_rocksdb_VectorMemTableConfig * Method: newMemTableFactoryHandle * Signature: (J)J */ jlong Java_org_rocksdb_VectorMemTableConfig_newMemTableFactoryHandle( JNIEnv* env, jobject /*jobj*/, jlong jreserved_size) { ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jreserved_size); if (s.ok()) { return reinterpret_cast(new ROCKSDB_NAMESPACE::VectorRepFactory( static_cast(jreserved_size))); } ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); return 0; } /* * Class: org_rocksdb_SkipListMemTableConfig * Method: newMemTableFactoryHandle0 * Signature: (J)J */ jlong Java_org_rocksdb_SkipListMemTableConfig_newMemTableFactoryHandle0( JNIEnv* env, jobject /*jobj*/, jlong jlookahead) { ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jlookahead); if (s.ok()) { return reinterpret_cast(new ROCKSDB_NAMESPACE::SkipListFactory( static_cast(jlookahead))); } ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); return 0; } rocksdb-6.11.4/java/rocksjni/merge_operator.cc000066400000000000000000000055251370372246700213400ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2014, Vlad Balan (vlad.gm@gmail.com). All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::MergeOperator. #include #include #include #include #include #include "include/org_rocksdb_StringAppendOperator.h" #include "include/org_rocksdb_UInt64AddOperator.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "rocksjni/portal.h" #include "utilities/merge_operators.h" /* * Class: org_rocksdb_StringAppendOperator * Method: newSharedStringAppendOperator * Signature: (C)J */ jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator( JNIEnv* /*env*/, jclass /*jclazz*/, jchar jdelim) { auto* sptr_string_append_op = new std::shared_ptr( ROCKSDB_NAMESPACE::MergeOperators::CreateStringAppendOperator( (char)jdelim)); return reinterpret_cast(sptr_string_append_op); } /* * Class: org_rocksdb_StringAppendOperator * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_StringAppendOperator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_string_append_op = reinterpret_cast*>( jhandle); delete sptr_string_append_op; // delete std::shared_ptr } /* * Class: org_rocksdb_UInt64AddOperator * Method: newSharedUInt64AddOperator * Signature: ()J */ jlong Java_org_rocksdb_UInt64AddOperator_newSharedUInt64AddOperator( JNIEnv* /*env*/, jclass /*jclazz*/) { auto* sptr_uint64_add_op = new std::shared_ptr( ROCKSDB_NAMESPACE::MergeOperators::CreateUInt64AddOperator()); return reinterpret_cast(sptr_uint64_add_op); } /* * Class: org_rocksdb_UInt64AddOperator * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_UInt64AddOperator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_uint64_add_op = reinterpret_cast*>( jhandle); delete sptr_uint64_add_op; // delete std::shared_ptr } rocksdb-6.11.4/java/rocksjni/native_comparator_wrapper_test.cc000066400000000000000000000026121370372246700246340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include "rocksdb/comparator.h" #include "rocksdb/slice.h" #include "include/org_rocksdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h" namespace ROCKSDB_NAMESPACE { class NativeComparatorWrapperTestStringComparator : public Comparator { const char* Name() const { return "NativeComparatorWrapperTestStringComparator"; } int Compare(const Slice& a, const Slice& b) const { return a.ToString().compare(b.ToString()); } void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const { return; } void FindShortSuccessor(std::string* /*key*/) const { return; } }; } // namespace ROCKSDB_NAMESPACE /* * Class: org_rocksdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper * Method: newStringComparator * Signature: ()J */ jlong Java_org_rocksdb_NativeComparatorWrapperTest_00024NativeStringComparatorWrapper_newStringComparator( JNIEnv* /*env*/, jobject /*jobj*/) { auto* comparator = new ROCKSDB_NAMESPACE::NativeComparatorWrapperTestStringComparator(); return reinterpret_cast(comparator); } rocksdb-6.11.4/java/rocksjni/optimistic_transaction_db.cc000066400000000000000000000241551370372246700235640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::TransactionDB. #include #include "include/org_rocksdb_OptimisticTransactionDB.h" #include "rocksdb/options.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/transaction.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_OptimisticTransactionDB * Method: open * Signature: (JLjava/lang/String;)J */ jlong Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2( JNIEnv* env, jclass, jlong joptions_handle, jstring jdb_path) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* options = reinterpret_cast(joptions_handle); ROCKSDB_NAMESPACE::OptimisticTransactionDB* otdb = nullptr; ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::OptimisticTransactionDB::Open(*options, db_path, &otdb); env->ReleaseStringUTFChars(jdb_path, db_path); if (s.ok()) { return reinterpret_cast(otdb); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: open * Signature: (JLjava/lang/String;[[B[J)[J */ jlongArray Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jdb_options_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options_handles) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } std::vector column_families; const jsize len_cols = env->GetArrayLength(jcolumn_names); if (len_cols > 0) { if (env->EnsureLocalCapacity(len_cols) != 0) { // out of memory env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } jlong* jco = env->GetLongArrayElements(jcolumn_options_handles, nullptr); if (jco == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } for (int i = 0; i < len_cols; i++) { const jobject jcn = env->GetObjectArrayElement(jcolumn_names, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } const jbyteArray jcn_ba = reinterpret_cast(jcn); const jsize jcf_name_len = env->GetArrayLength(jcn_ba); if (env->EnsureLocalCapacity(jcf_name_len) != 0) { // out of memory env->DeleteLocalRef(jcn); env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } jbyte* jcf_name = env->GetByteArrayElements(jcn_ba, nullptr); if (jcf_name == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jcn); env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } const std::string cf_name(reinterpret_cast(jcf_name), jcf_name_len); const ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = reinterpret_cast(jco[i]); column_families.push_back( ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); env->ReleaseByteArrayElements(jcn_ba, jcf_name, JNI_ABORT); env->DeleteLocalRef(jcn); } env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); } auto* db_options = reinterpret_cast(jdb_options_handle); std::vector handles; ROCKSDB_NAMESPACE::OptimisticTransactionDB* otdb = nullptr; const ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::OptimisticTransactionDB::Open( *db_options, db_path, column_families, &handles, &otdb); env->ReleaseStringUTFChars(jdb_path, db_path); // check if open operation was successful if (s.ok()) { const jsize resultsLen = 1 + len_cols; // db handle + column family handles std::unique_ptr results = std::unique_ptr(new jlong[resultsLen]); results[0] = reinterpret_cast(otdb); for (int i = 1; i <= len_cols; i++) { results[i] = reinterpret_cast(handles[i - 1]); } jlongArray jresults = env->NewLongArray(resultsLen); if (jresults == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException return nullptr; } return jresults; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_OptimisticTransactionDB_disposeInternal( JNIEnv *, jobject, jlong jhandle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); assert(optimistic_txn_db != nullptr); delete optimistic_txn_db; } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: closeDatabase * Signature: (J)V */ void Java_org_rocksdb_OptimisticTransactionDB_closeDatabase( JNIEnv* env, jclass, jlong jhandle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); assert(optimistic_txn_db != nullptr); ROCKSDB_NAMESPACE::Status s = optimistic_txn_db->Close(); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: beginTransaction * Signature: (JJ)J */ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); ROCKSDB_NAMESPACE::Transaction* txn = optimistic_txn_db->BeginTransaction(*write_options); return reinterpret_cast(txn); } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: beginTransaction * Signature: (JJJ)J */ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJJ( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jwrite_options_handle, jlong joptimistic_txn_options_handle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* optimistic_txn_options = reinterpret_cast( joptimistic_txn_options_handle); ROCKSDB_NAMESPACE::Transaction* txn = optimistic_txn_db->BeginTransaction( *write_options, *optimistic_txn_options); return reinterpret_cast(txn); } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: beginTransaction_withOld * Signature: (JJJ)J */ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jold_txn_handle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* old_txn = reinterpret_cast(jold_txn_handle); ROCKSDB_NAMESPACE::OptimisticTransactionOptions optimistic_txn_options; ROCKSDB_NAMESPACE::Transaction* txn = optimistic_txn_db->BeginTransaction( *write_options, optimistic_txn_options, old_txn); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_optimistic_txn assert(txn == old_txn); return reinterpret_cast(txn); } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: beginTransaction_withOld * Signature: (JJJJ)J */ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong joptimistic_txn_options_handle, jlong jold_txn_handle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* optimistic_txn_options = reinterpret_cast( joptimistic_txn_options_handle); auto* old_txn = reinterpret_cast(jold_txn_handle); ROCKSDB_NAMESPACE::Transaction* txn = optimistic_txn_db->BeginTransaction( *write_options, *optimistic_txn_options, old_txn); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_optimisic_txn assert(txn == old_txn); return reinterpret_cast(txn); } /* * Class: org_rocksdb_OptimisticTransactionDB * Method: getBaseDB * Signature: (J)J */ jlong Java_org_rocksdb_OptimisticTransactionDB_getBaseDB( JNIEnv*, jobject, jlong jhandle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); return reinterpret_cast(optimistic_txn_db->GetBaseDB()); } rocksdb-6.11.4/java/rocksjni/optimistic_transaction_options.cc000066400000000000000000000047551370372246700246760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::OptimisticTransactionOptions. #include #include "include/org_rocksdb_OptimisticTransactionOptions.h" #include "rocksdb/comparator.h" #include "rocksdb/utilities/optimistic_transaction_db.h" /* * Class: org_rocksdb_OptimisticTransactionOptions * Method: newOptimisticTransactionOptions * Signature: ()J */ jlong Java_org_rocksdb_OptimisticTransactionOptions_newOptimisticTransactionOptions( JNIEnv* /*env*/, jclass /*jcls*/) { ROCKSDB_NAMESPACE::OptimisticTransactionOptions* opts = new ROCKSDB_NAMESPACE::OptimisticTransactionOptions(); return reinterpret_cast(opts); } /* * Class: org_rocksdb_OptimisticTransactionOptions * Method: isSetSnapshot * Signature: (J)Z */ jboolean Java_org_rocksdb_OptimisticTransactionOptions_isSetSnapshot( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast( jhandle); return opts->set_snapshot; } /* * Class: org_rocksdb_OptimisticTransactionOptions * Method: setSetSnapshot * Signature: (JZ)V */ void Java_org_rocksdb_OptimisticTransactionOptions_setSetSnapshot( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jset_snapshot) { auto* opts = reinterpret_cast( jhandle); opts->set_snapshot = jset_snapshot; } /* * Class: org_rocksdb_OptimisticTransactionOptions * Method: setComparator * Signature: (JJ)V */ void Java_org_rocksdb_OptimisticTransactionOptions_setComparator( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jcomparator_handle) { auto* opts = reinterpret_cast( jhandle); opts->cmp = reinterpret_cast(jcomparator_handle); } /* * Class: org_rocksdb_OptimisticTransactionOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_OptimisticTransactionOptions_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast( jhandle); } rocksdb-6.11.4/java/rocksjni/options.cc000066400000000000000000006644631370372246700200350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Options. #include #include #include #include #include #include "include/org_rocksdb_ColumnFamilyOptions.h" #include "include/org_rocksdb_ComparatorOptions.h" #include "include/org_rocksdb_DBOptions.h" #include "include/org_rocksdb_FlushOptions.h" #include "include/org_rocksdb_Options.h" #include "include/org_rocksdb_ReadOptions.h" #include "include/org_rocksdb_WriteOptions.h" #include "rocksjni/comparatorjnicallback.h" #include "rocksjni/portal.h" #include "rocksjni/statisticsjni.h" #include "rocksjni/table_filter_jnicallback.h" #include "rocksdb/comparator.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "utilities/merge_operators.h" /* * Class: org_rocksdb_Options * Method: newOptions * Signature: ()J */ jlong Java_org_rocksdb_Options_newOptions__( JNIEnv*, jclass) { auto* op = new ROCKSDB_NAMESPACE::Options(); return reinterpret_cast(op); } /* * Class: org_rocksdb_Options * Method: newOptions * Signature: (JJ)J */ jlong Java_org_rocksdb_Options_newOptions__JJ( JNIEnv*, jclass, jlong jdboptions, jlong jcfoptions) { auto* dbOpt = reinterpret_cast(jdboptions); auto* cfOpt = reinterpret_cast( jcfoptions); auto* op = new ROCKSDB_NAMESPACE::Options(*dbOpt, *cfOpt); return reinterpret_cast(op); } /* * Class: org_rocksdb_Options * Method: copyOptions * Signature: (J)J */ jlong Java_org_rocksdb_Options_copyOptions( JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::Options( *(reinterpret_cast(jhandle))); return reinterpret_cast(new_opt); } /* * Class: org_rocksdb_Options * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_Options_disposeInternal( JNIEnv*, jobject, jlong handle) { auto* op = reinterpret_cast(handle); assert(op != nullptr); delete op; } /* * Class: org_rocksdb_Options * Method: setIncreaseParallelism * Signature: (JI)V */ void Java_org_rocksdb_Options_setIncreaseParallelism( JNIEnv*, jobject, jlong jhandle, jint totalThreads) { reinterpret_cast(jhandle)->IncreaseParallelism( static_cast(totalThreads)); } /* * Class: org_rocksdb_Options * Method: setCreateIfMissing * Signature: (JZ)V */ void Java_org_rocksdb_Options_setCreateIfMissing( JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle)->create_if_missing = flag; } /* * Class: org_rocksdb_Options * Method: createIfMissing * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_createIfMissing( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->create_if_missing; } /* * Class: org_rocksdb_Options * Method: setCreateMissingColumnFamilies * Signature: (JZ)V */ void Java_org_rocksdb_Options_setCreateMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle) ->create_missing_column_families = flag; } /* * Class: org_rocksdb_Options * Method: createMissingColumnFamilies * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_createMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->create_missing_column_families; } /* * Class: org_rocksdb_Options * Method: setComparatorHandle * Signature: (JI)V */ void Java_org_rocksdb_Options_setComparatorHandle__JI( JNIEnv*, jobject, jlong jhandle, jint builtinComparator) { switch (builtinComparator) { case 1: reinterpret_cast(jhandle)->comparator = ROCKSDB_NAMESPACE::ReverseBytewiseComparator(); break; default: reinterpret_cast(jhandle)->comparator = ROCKSDB_NAMESPACE::BytewiseComparator(); break; } } /* * Class: org_rocksdb_Options * Method: setComparatorHandle * Signature: (JJB)V */ void Java_org_rocksdb_Options_setComparatorHandle__JJB( JNIEnv*, jobject, jlong jopt_handle, jlong jcomparator_handle, jbyte jcomparator_type) { ROCKSDB_NAMESPACE::Comparator* comparator = nullptr; switch (jcomparator_type) { // JAVA_COMPARATOR case 0x0: comparator = reinterpret_cast( jcomparator_handle); break; // JAVA_NATIVE_COMPARATOR_WRAPPER case 0x1: comparator = reinterpret_cast(jcomparator_handle); break; } auto* opt = reinterpret_cast(jopt_handle); opt->comparator = comparator; } /* * Class: org_rocksdb_Options * Method: setMergeOperatorName * Signature: (JJjava/lang/String)V */ void Java_org_rocksdb_Options_setMergeOperatorName( JNIEnv* env, jobject, jlong jhandle, jstring jop_name) { const char* op_name = env->GetStringUTFChars(jop_name, nullptr); if (op_name == nullptr) { // exception thrown: OutOfMemoryError return; } auto* options = reinterpret_cast(jhandle); options->merge_operator = ROCKSDB_NAMESPACE::MergeOperators::CreateFromStringId(op_name); env->ReleaseStringUTFChars(jop_name, op_name); } /* * Class: org_rocksdb_Options * Method: setMergeOperator * Signature: (JJjava/lang/String)V */ void Java_org_rocksdb_Options_setMergeOperator( JNIEnv*, jobject, jlong jhandle, jlong mergeOperatorHandle) { reinterpret_cast(jhandle)->merge_operator = *(reinterpret_cast*>( mergeOperatorHandle)); } /* * Class: org_rocksdb_Options * Method: setCompactionFilterHandle * Signature: (JJ)V */ void Java_org_rocksdb_Options_setCompactionFilterHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilter_handle) { reinterpret_cast(jopt_handle) ->compaction_filter = reinterpret_cast( jcompactionfilter_handle); } /* * Class: org_rocksdb_Options * Method: setCompactionFilterFactoryHandle * Signature: (JJ)V */ void JNICALL Java_org_rocksdb_Options_setCompactionFilterFactoryHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilterfactory_handle) { auto* cff_factory = reinterpret_cast< std::shared_ptr*>( jcompactionfilterfactory_handle); reinterpret_cast(jopt_handle) ->compaction_filter_factory = *cff_factory; } /* * Class: org_rocksdb_Options * Method: setWriteBufferSize * Signature: (JJ)I */ void Java_org_rocksdb_Options_setWriteBufferSize( JNIEnv* env, jobject, jlong jhandle, jlong jwrite_buffer_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jwrite_buffer_size); if (s.ok()) { reinterpret_cast(jhandle)->write_buffer_size = jwrite_buffer_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: setWriteBufferManager * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWriteBufferManager( JNIEnv*, jobject, jlong joptions_handle, jlong jwrite_buffer_manager_handle) { auto* write_buffer_manager = reinterpret_cast*>( jwrite_buffer_manager_handle); reinterpret_cast(joptions_handle) ->write_buffer_manager = *write_buffer_manager; } /* * Class: org_rocksdb_Options * Method: writeBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_writeBufferSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_buffer_size; } /* * Class: org_rocksdb_Options * Method: setMaxWriteBufferNumber * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxWriteBufferNumber( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number) { reinterpret_cast(jhandle) ->max_write_buffer_number = jmax_write_buffer_number; } /* * Class: org_rocksdb_Options * Method: setStatistics * Signature: (JJ)V */ void Java_org_rocksdb_Options_setStatistics( JNIEnv*, jobject, jlong jhandle, jlong jstatistics_handle) { auto* opt = reinterpret_cast(jhandle); auto* pSptr = reinterpret_cast*>( jstatistics_handle); opt->statistics = *pSptr; } /* * Class: org_rocksdb_Options * Method: statistics * Signature: (J)J */ jlong Java_org_rocksdb_Options_statistics( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); std::shared_ptr sptr = opt->statistics; if (sptr == nullptr) { return 0; } else { std::shared_ptr* pSptr = new std::shared_ptr(sptr); return reinterpret_cast(pSptr); } } /* * Class: org_rocksdb_Options * Method: maxWriteBufferNumber * Signature: (J)I */ jint Java_org_rocksdb_Options_maxWriteBufferNumber( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number; } /* * Class: org_rocksdb_Options * Method: errorIfExists * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_errorIfExists( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->error_if_exists; } /* * Class: org_rocksdb_Options * Method: setErrorIfExists * Signature: (JZ)V */ void Java_org_rocksdb_Options_setErrorIfExists( JNIEnv*, jobject, jlong jhandle, jboolean error_if_exists) { reinterpret_cast(jhandle)->error_if_exists = static_cast(error_if_exists); } /* * Class: org_rocksdb_Options * Method: paranoidChecks * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_paranoidChecks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_checks; } /* * Class: org_rocksdb_Options * Method: setParanoidChecks * Signature: (JZ)V */ void Java_org_rocksdb_Options_setParanoidChecks( JNIEnv*, jobject, jlong jhandle, jboolean paranoid_checks) { reinterpret_cast(jhandle)->paranoid_checks = static_cast(paranoid_checks); } /* * Class: org_rocksdb_Options * Method: setEnv * Signature: (JJ)V */ void Java_org_rocksdb_Options_setEnv( JNIEnv*, jobject, jlong jhandle, jlong jenv) { reinterpret_cast(jhandle)->env = reinterpret_cast(jenv); } /* * Class: org_rocksdb_Options * Method: setMaxTotalWalSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMaxTotalWalSize( JNIEnv*, jobject, jlong jhandle, jlong jmax_total_wal_size) { reinterpret_cast(jhandle)->max_total_wal_size = static_cast(jmax_total_wal_size); } /* * Class: org_rocksdb_Options * Method: maxTotalWalSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_maxTotalWalSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_total_wal_size; } /* * Class: org_rocksdb_Options * Method: maxOpenFiles * Signature: (J)I */ jint Java_org_rocksdb_Options_maxOpenFiles( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->max_open_files; } /* * Class: org_rocksdb_Options * Method: setMaxOpenFiles * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxOpenFiles( JNIEnv*, jobject, jlong jhandle, jint max_open_files) { reinterpret_cast(jhandle)->max_open_files = static_cast(max_open_files); } /* * Class: org_rocksdb_Options * Method: setMaxFileOpeningThreads * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxFileOpeningThreads( JNIEnv*, jobject, jlong jhandle, jint jmax_file_opening_threads) { reinterpret_cast(jhandle) ->max_file_opening_threads = static_cast(jmax_file_opening_threads); } /* * Class: org_rocksdb_Options * Method: maxFileOpeningThreads * Signature: (J)I */ jint Java_org_rocksdb_Options_maxFileOpeningThreads( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_file_opening_threads); } /* * Class: org_rocksdb_Options * Method: useFsync * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_useFsync( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->use_fsync; } /* * Class: org_rocksdb_Options * Method: setUseFsync * Signature: (JZ)V */ void Java_org_rocksdb_Options_setUseFsync( JNIEnv*, jobject, jlong jhandle, jboolean use_fsync) { reinterpret_cast(jhandle)->use_fsync = static_cast(use_fsync); } /* * Class: org_rocksdb_Options * Method: setDbPaths * Signature: (J[Ljava/lang/String;[J)V */ void Java_org_rocksdb_Options_setDbPaths( JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { std::vector db_paths; jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, nullptr); if (ptr_jtarget_size == nullptr) { // exception thrown: OutOfMemoryError return; } jboolean has_exception = JNI_FALSE; const jsize len = env->GetArrayLength(jpaths); for (jsize i = 0; i < len; i++) { jobject jpath = reinterpret_cast(env->GetObjectArrayElement(jpaths, i)); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } std::string path = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, static_cast(jpath), &has_exception); env->DeleteLocalRef(jpath); if (has_exception == JNI_TRUE) { env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } jlong jtarget_size = ptr_jtarget_size[i]; db_paths.push_back( ROCKSDB_NAMESPACE::DbPath(path, static_cast(jtarget_size))); } env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); auto* opt = reinterpret_cast(jhandle); opt->db_paths = db_paths; } /* * Class: org_rocksdb_Options * Method: dbPathsLen * Signature: (J)J */ jlong Java_org_rocksdb_Options_dbPathsLen( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_paths.size()); } /* * Class: org_rocksdb_Options * Method: dbPaths * Signature: (J[Ljava/lang/String;[J)V */ void Java_org_rocksdb_Options_dbPaths( JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, nullptr); if (ptr_jtarget_size == nullptr) { // exception thrown: OutOfMemoryError return; } auto* opt = reinterpret_cast(jhandle); const jsize len = env->GetArrayLength(jpaths); for (jsize i = 0; i < len; i++) { ROCKSDB_NAMESPACE::DbPath db_path = opt->db_paths[i]; jstring jpath = env->NewStringUTF(db_path.path.c_str()); if (jpath == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } env->SetObjectArrayElement(jpaths, i, jpath); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jpath); env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } ptr_jtarget_size[i] = static_cast(db_path.target_size); } env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_COMMIT); } /* * Class: org_rocksdb_Options * Method: dbLogDir * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_Options_dbLogDir( JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle) ->db_log_dir.c_str()); } /* * Class: org_rocksdb_Options * Method: setDbLogDir * Signature: (JLjava/lang/String)V */ void Java_org_rocksdb_Options_setDbLogDir( JNIEnv* env, jobject, jlong jhandle, jstring jdb_log_dir) { const char* log_dir = env->GetStringUTFChars(jdb_log_dir, nullptr); if (log_dir == nullptr) { // exception thrown: OutOfMemoryError return; } reinterpret_cast(jhandle)->db_log_dir.assign( log_dir); env->ReleaseStringUTFChars(jdb_log_dir, log_dir); } /* * Class: org_rocksdb_Options * Method: walDir * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_Options_walDir( JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle)->wal_dir.c_str()); } /* * Class: org_rocksdb_Options * Method: setWalDir * Signature: (JLjava/lang/String)V */ void Java_org_rocksdb_Options_setWalDir( JNIEnv* env, jobject, jlong jhandle, jstring jwal_dir) { const char* wal_dir = env->GetStringUTFChars(jwal_dir, nullptr); if (wal_dir == nullptr) { // exception thrown: OutOfMemoryError return; } reinterpret_cast(jhandle)->wal_dir.assign( wal_dir); env->ReleaseStringUTFChars(jwal_dir, wal_dir); } /* * Class: org_rocksdb_Options * Method: deleteObsoleteFilesPeriodMicros * Signature: (J)J */ jlong Java_org_rocksdb_Options_deleteObsoleteFilesPeriodMicros( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->delete_obsolete_files_period_micros; } /* * Class: org_rocksdb_Options * Method: setDeleteObsoleteFilesPeriodMicros * Signature: (JJ)V */ void Java_org_rocksdb_Options_setDeleteObsoleteFilesPeriodMicros( JNIEnv*, jobject, jlong jhandle, jlong micros) { reinterpret_cast(jhandle) ->delete_obsolete_files_period_micros = static_cast(micros); } /* * Class: org_rocksdb_Options * Method: setBaseBackgroundCompactions * Signature: (JI)V */ void Java_org_rocksdb_Options_setBaseBackgroundCompactions( JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle) ->base_background_compactions = static_cast(max); } /* * Class: org_rocksdb_Options * Method: baseBackgroundCompactions * Signature: (J)I */ jint Java_org_rocksdb_Options_baseBackgroundCompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->base_background_compactions; } /* * Class: org_rocksdb_Options * Method: maxBackgroundCompactions * Signature: (J)I */ jint Java_org_rocksdb_Options_maxBackgroundCompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_compactions; } /* * Class: org_rocksdb_Options * Method: setMaxBackgroundCompactions * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxBackgroundCompactions( JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle) ->max_background_compactions = static_cast(max); } /* * Class: org_rocksdb_Options * Method: setMaxSubcompactions * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxSubcompactions( JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle)->max_subcompactions = static_cast(max); } /* * Class: org_rocksdb_Options * Method: maxSubcompactions * Signature: (J)I */ jint Java_org_rocksdb_Options_maxSubcompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_subcompactions; } /* * Class: org_rocksdb_Options * Method: maxBackgroundFlushes * Signature: (J)I */ jint Java_org_rocksdb_Options_maxBackgroundFlushes( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_flushes; } /* * Class: org_rocksdb_Options * Method: setMaxBackgroundFlushes * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxBackgroundFlushes( JNIEnv*, jobject, jlong jhandle, jint max_background_flushes) { reinterpret_cast(jhandle) ->max_background_flushes = static_cast(max_background_flushes); } /* * Class: org_rocksdb_Options * Method: maxBackgroundJobs * Signature: (J)I */ jint Java_org_rocksdb_Options_maxBackgroundJobs( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_jobs; } /* * Class: org_rocksdb_Options * Method: setMaxBackgroundJobs * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxBackgroundJobs( JNIEnv*, jobject, jlong jhandle, jint max_background_jobs) { reinterpret_cast(jhandle)->max_background_jobs = static_cast(max_background_jobs); } /* * Class: org_rocksdb_Options * Method: maxLogFileSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_maxLogFileSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_log_file_size; } /* * Class: org_rocksdb_Options * Method: setMaxLogFileSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMaxLogFileSize( JNIEnv* env, jobject, jlong jhandle, jlong max_log_file_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(max_log_file_size); if (s.ok()) { reinterpret_cast(jhandle)->max_log_file_size = max_log_file_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: logFileTimeToRoll * Signature: (J)J */ jlong Java_org_rocksdb_Options_logFileTimeToRoll( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->log_file_time_to_roll; } /* * Class: org_rocksdb_Options * Method: setLogFileTimeToRoll * Signature: (JJ)V */ void Java_org_rocksdb_Options_setLogFileTimeToRoll( JNIEnv* env, jobject, jlong jhandle, jlong log_file_time_to_roll) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( log_file_time_to_roll); if (s.ok()) { reinterpret_cast(jhandle) ->log_file_time_to_roll = log_file_time_to_roll; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: keepLogFileNum * Signature: (J)J */ jlong Java_org_rocksdb_Options_keepLogFileNum( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->keep_log_file_num; } /* * Class: org_rocksdb_Options * Method: setKeepLogFileNum * Signature: (JJ)V */ void Java_org_rocksdb_Options_setKeepLogFileNum( JNIEnv* env, jobject, jlong jhandle, jlong keep_log_file_num) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(keep_log_file_num); if (s.ok()) { reinterpret_cast(jhandle)->keep_log_file_num = keep_log_file_num; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: recycleLogFileNum * Signature: (J)J */ jlong Java_org_rocksdb_Options_recycleLogFileNum( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->recycle_log_file_num; } /* * Class: org_rocksdb_Options * Method: setRecycleLogFileNum * Signature: (JJ)V */ void Java_org_rocksdb_Options_setRecycleLogFileNum( JNIEnv* env, jobject, jlong jhandle, jlong recycle_log_file_num) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( recycle_log_file_num); if (s.ok()) { reinterpret_cast(jhandle) ->recycle_log_file_num = recycle_log_file_num; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: maxManifestFileSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_maxManifestFileSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_manifest_file_size; } /* * Method: memTableFactoryName * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_Options_memTableFactoryName( JNIEnv* env, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::MemTableRepFactory* tf = opt->memtable_factory.get(); // Should never be nullptr. // Default memtable factory is SkipListFactory assert(tf); // temporarly fix for the historical typo if (strcmp(tf->Name(), "HashLinkListRepFactory") == 0) { return env->NewStringUTF("HashLinkedListRepFactory"); } return env->NewStringUTF(tf->Name()); } /* * Class: org_rocksdb_Options * Method: setMaxManifestFileSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMaxManifestFileSize( JNIEnv*, jobject, jlong jhandle, jlong max_manifest_file_size) { reinterpret_cast(jhandle) ->max_manifest_file_size = static_cast(max_manifest_file_size); } /* * Method: setMemTableFactory * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMemTableFactory( JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { reinterpret_cast(jhandle) ->memtable_factory.reset( reinterpret_cast( jfactory_handle)); } /* * Class: org_rocksdb_Options * Method: setRateLimiter * Signature: (JJ)V */ void Java_org_rocksdb_Options_setRateLimiter( JNIEnv*, jobject, jlong jhandle, jlong jrate_limiter_handle) { std::shared_ptr* pRateLimiter = reinterpret_cast*>( jrate_limiter_handle); reinterpret_cast(jhandle)->rate_limiter = *pRateLimiter; } /* * Class: org_rocksdb_Options * Method: setSstFileManager * Signature: (JJ)V */ void Java_org_rocksdb_Options_setSstFileManager( JNIEnv*, jobject, jlong jhandle, jlong jsst_file_manager_handle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jsst_file_manager_handle); reinterpret_cast(jhandle)->sst_file_manager = *sptr_sst_file_manager; } /* * Class: org_rocksdb_Options * Method: setLogger * Signature: (JJ)V */ void Java_org_rocksdb_Options_setLogger( JNIEnv*, jobject, jlong jhandle, jlong jlogger_handle) { std::shared_ptr* pLogger = reinterpret_cast*>( jlogger_handle); reinterpret_cast(jhandle)->info_log = *pLogger; } /* * Class: org_rocksdb_Options * Method: setInfoLogLevel * Signature: (JB)V */ void Java_org_rocksdb_Options_setInfoLogLevel( JNIEnv*, jobject, jlong jhandle, jbyte jlog_level) { reinterpret_cast(jhandle)->info_log_level = static_cast(jlog_level); } /* * Class: org_rocksdb_Options * Method: infoLogLevel * Signature: (J)B */ jbyte Java_org_rocksdb_Options_infoLogLevel( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle)->info_log_level); } /* * Class: org_rocksdb_Options * Method: tableCacheNumshardbits * Signature: (J)I */ jint Java_org_rocksdb_Options_tableCacheNumshardbits( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->table_cache_numshardbits; } /* * Class: org_rocksdb_Options * Method: setTableCacheNumshardbits * Signature: (JI)V */ void Java_org_rocksdb_Options_setTableCacheNumshardbits( JNIEnv*, jobject, jlong jhandle, jint table_cache_numshardbits) { reinterpret_cast(jhandle) ->table_cache_numshardbits = static_cast(table_cache_numshardbits); } /* * Method: useFixedLengthPrefixExtractor * Signature: (JI)V */ void Java_org_rocksdb_Options_useFixedLengthPrefixExtractor( JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewFixedPrefixTransform( static_cast(jprefix_length))); } /* * Method: useCappedPrefixExtractor * Signature: (JI)V */ void Java_org_rocksdb_Options_useCappedPrefixExtractor( JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewCappedPrefixTransform( static_cast(jprefix_length))); } /* * Class: org_rocksdb_Options * Method: walTtlSeconds * Signature: (J)J */ jlong Java_org_rocksdb_Options_walTtlSeconds( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_ttl_seconds; } /* * Class: org_rocksdb_Options * Method: setWalTtlSeconds * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWalTtlSeconds( JNIEnv*, jobject, jlong jhandle, jlong WAL_ttl_seconds) { reinterpret_cast(jhandle)->WAL_ttl_seconds = static_cast(WAL_ttl_seconds); } /* * Class: org_rocksdb_Options * Method: walTtlSeconds * Signature: (J)J */ jlong Java_org_rocksdb_Options_walSizeLimitMB( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_size_limit_MB; } /* * Class: org_rocksdb_Options * Method: setWalSizeLimitMB * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWalSizeLimitMB( JNIEnv*, jobject, jlong jhandle, jlong WAL_size_limit_MB) { reinterpret_cast(jhandle)->WAL_size_limit_MB = static_cast(WAL_size_limit_MB); } /* * Class: org_rocksdb_Options * Method: manifestPreallocationSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_manifestPreallocationSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->manifest_preallocation_size; } /* * Class: org_rocksdb_Options * Method: setManifestPreallocationSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setManifestPreallocationSize( JNIEnv* env, jobject, jlong jhandle, jlong preallocation_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( preallocation_size); if (s.ok()) { reinterpret_cast(jhandle) ->manifest_preallocation_size = preallocation_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Method: setTableFactory * Signature: (JJ)V */ void Java_org_rocksdb_Options_setTableFactory( JNIEnv*, jobject, jlong jhandle, jlong jtable_factory_handle) { auto* options = reinterpret_cast(jhandle); auto* table_factory = reinterpret_cast(jtable_factory_handle); options->table_factory.reset(table_factory); } /* * Class: org_rocksdb_Options * Method: allowMmapReads * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_allowMmapReads( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_reads; } /* * Class: org_rocksdb_Options * Method: setAllowMmapReads * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAllowMmapReads( JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_reads) { reinterpret_cast(jhandle)->allow_mmap_reads = static_cast(allow_mmap_reads); } /* * Class: org_rocksdb_Options * Method: allowMmapWrites * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_allowMmapWrites( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_writes; } /* * Class: org_rocksdb_Options * Method: setAllowMmapWrites * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAllowMmapWrites( JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_writes) { reinterpret_cast(jhandle)->allow_mmap_writes = static_cast(allow_mmap_writes); } /* * Class: org_rocksdb_Options * Method: useDirectReads * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_useDirectReads( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_reads; } /* * Class: org_rocksdb_Options * Method: setUseDirectReads * Signature: (JZ)V */ void Java_org_rocksdb_Options_setUseDirectReads( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_reads) { reinterpret_cast(jhandle)->use_direct_reads = static_cast(use_direct_reads); } /* * Class: org_rocksdb_Options * Method: useDirectIoForFlushAndCompaction * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_useDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_io_for_flush_and_compaction; } /* * Class: org_rocksdb_Options * Method: setUseDirectIoForFlushAndCompaction * Signature: (JZ)V */ void Java_org_rocksdb_Options_setUseDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_io_for_flush_and_compaction) { reinterpret_cast(jhandle) ->use_direct_io_for_flush_and_compaction = static_cast(use_direct_io_for_flush_and_compaction); } /* * Class: org_rocksdb_Options * Method: setAllowFAllocate * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAllowFAllocate( JNIEnv*, jobject, jlong jhandle, jboolean jallow_fallocate) { reinterpret_cast(jhandle)->allow_fallocate = static_cast(jallow_fallocate); } /* * Class: org_rocksdb_Options * Method: allowFAllocate * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_allowFAllocate( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_fallocate); } /* * Class: org_rocksdb_Options * Method: isFdCloseOnExec * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_isFdCloseOnExec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->is_fd_close_on_exec; } /* * Class: org_rocksdb_Options * Method: setIsFdCloseOnExec * Signature: (JZ)V */ void Java_org_rocksdb_Options_setIsFdCloseOnExec( JNIEnv*, jobject, jlong jhandle, jboolean is_fd_close_on_exec) { reinterpret_cast(jhandle)->is_fd_close_on_exec = static_cast(is_fd_close_on_exec); } /* * Class: org_rocksdb_Options * Method: statsDumpPeriodSec * Signature: (J)I */ jint Java_org_rocksdb_Options_statsDumpPeriodSec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_dump_period_sec; } /* * Class: org_rocksdb_Options * Method: setStatsDumpPeriodSec * Signature: (JI)V */ void Java_org_rocksdb_Options_setStatsDumpPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_dump_period_sec) { reinterpret_cast(jhandle) ->stats_dump_period_sec = static_cast(jstats_dump_period_sec); } /* * Class: org_rocksdb_Options * Method: statsPersistPeriodSec * Signature: (J)I */ jint Java_org_rocksdb_Options_statsPersistPeriodSec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_persist_period_sec; } /* * Class: org_rocksdb_Options * Method: setStatsPersistPeriodSec * Signature: (JI)V */ void Java_org_rocksdb_Options_setStatsPersistPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_persist_period_sec) { reinterpret_cast(jhandle) ->stats_persist_period_sec = static_cast(jstats_persist_period_sec); } /* * Class: org_rocksdb_Options * Method: statsHistoryBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_statsHistoryBufferSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_history_buffer_size; } /* * Class: org_rocksdb_Options * Method: setStatsHistoryBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setStatsHistoryBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jstats_history_buffer_size) { reinterpret_cast(jhandle) ->stats_history_buffer_size = static_cast(jstats_history_buffer_size); } /* * Class: org_rocksdb_Options * Method: adviseRandomOnOpen * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_adviseRandomOnOpen( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->advise_random_on_open; } /* * Class: org_rocksdb_Options * Method: setAdviseRandomOnOpen * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAdviseRandomOnOpen( JNIEnv*, jobject, jlong jhandle, jboolean advise_random_on_open) { reinterpret_cast(jhandle) ->advise_random_on_open = static_cast(advise_random_on_open); } /* * Class: org_rocksdb_Options * Method: setDbWriteBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setDbWriteBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jdb_write_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->db_write_buffer_size = static_cast(jdb_write_buffer_size); } /* * Class: org_rocksdb_Options * Method: dbWriteBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_dbWriteBufferSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_write_buffer_size); } /* * Class: org_rocksdb_Options * Method: setAccessHintOnCompactionStart * Signature: (JB)V */ void Java_org_rocksdb_Options_setAccessHintOnCompactionStart( JNIEnv*, jobject, jlong jhandle, jbyte jaccess_hint_value) { auto* opt = reinterpret_cast(jhandle); opt->access_hint_on_compaction_start = ROCKSDB_NAMESPACE::AccessHintJni::toCppAccessHint(jaccess_hint_value); } /* * Class: org_rocksdb_Options * Method: accessHintOnCompactionStart * Signature: (J)B */ jbyte Java_org_rocksdb_Options_accessHintOnCompactionStart( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::AccessHintJni::toJavaAccessHint( opt->access_hint_on_compaction_start); } /* * Class: org_rocksdb_Options * Method: setNewTableReaderForCompactionInputs * Signature: (JZ)V */ void Java_org_rocksdb_Options_setNewTableReaderForCompactionInputs( JNIEnv*, jobject, jlong jhandle, jboolean jnew_table_reader_for_compaction_inputs) { auto* opt = reinterpret_cast(jhandle); opt->new_table_reader_for_compaction_inputs = static_cast(jnew_table_reader_for_compaction_inputs); } /* * Class: org_rocksdb_Options * Method: newTableReaderForCompactionInputs * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_newTableReaderForCompactionInputs( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->new_table_reader_for_compaction_inputs); } /* * Class: org_rocksdb_Options * Method: setCompactionReadaheadSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_readahead_size) { auto* opt = reinterpret_cast(jhandle); opt->compaction_readahead_size = static_cast(jcompaction_readahead_size); } /* * Class: org_rocksdb_Options * Method: compactionReadaheadSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_compactionReadaheadSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->compaction_readahead_size); } /* * Class: org_rocksdb_Options * Method: setRandomAccessMaxBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setRandomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jrandom_access_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->random_access_max_buffer_size = static_cast(jrandom_access_max_buffer_size); } /* * Class: org_rocksdb_Options * Method: randomAccessMaxBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_randomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->random_access_max_buffer_size); } /* * Class: org_rocksdb_Options * Method: setWritableFileMaxBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWritableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jwritable_file_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->writable_file_max_buffer_size = static_cast(jwritable_file_max_buffer_size); } /* * Class: org_rocksdb_Options * Method: writableFileMaxBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_writableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->writable_file_max_buffer_size); } /* * Class: org_rocksdb_Options * Method: useAdaptiveMutex * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_useAdaptiveMutex( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_adaptive_mutex; } /* * Class: org_rocksdb_Options * Method: setUseAdaptiveMutex * Signature: (JZ)V */ void Java_org_rocksdb_Options_setUseAdaptiveMutex( JNIEnv*, jobject, jlong jhandle, jboolean use_adaptive_mutex) { reinterpret_cast(jhandle)->use_adaptive_mutex = static_cast(use_adaptive_mutex); } /* * Class: org_rocksdb_Options * Method: bytesPerSync * Signature: (J)J */ jlong Java_org_rocksdb_Options_bytesPerSync( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->bytes_per_sync; } /* * Class: org_rocksdb_Options * Method: setBytesPerSync * Signature: (JJ)V */ void Java_org_rocksdb_Options_setBytesPerSync( JNIEnv*, jobject, jlong jhandle, jlong bytes_per_sync) { reinterpret_cast(jhandle)->bytes_per_sync = static_cast(bytes_per_sync); } /* * Class: org_rocksdb_Options * Method: setWalBytesPerSync * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWalBytesPerSync( JNIEnv*, jobject, jlong jhandle, jlong jwal_bytes_per_sync) { reinterpret_cast(jhandle)->wal_bytes_per_sync = static_cast(jwal_bytes_per_sync); } /* * Class: org_rocksdb_Options * Method: walBytesPerSync * Signature: (J)J */ jlong Java_org_rocksdb_Options_walBytesPerSync( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->wal_bytes_per_sync); } /* * Class: org_rocksdb_Options * Method: setStrictBytesPerSync * Signature: (JZ)V */ void Java_org_rocksdb_Options_setStrictBytesPerSync( JNIEnv*, jobject, jlong jhandle, jboolean jstrict_bytes_per_sync) { reinterpret_cast(jhandle) ->strict_bytes_per_sync = jstrict_bytes_per_sync == JNI_TRUE; } /* * Class: org_rocksdb_Options * Method: strictBytesPerSync * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_strictBytesPerSync( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->strict_bytes_per_sync); } /* * Class: org_rocksdb_Options * Method: setEnableThreadTracking * Signature: (JZ)V */ void Java_org_rocksdb_Options_setEnableThreadTracking( JNIEnv*, jobject, jlong jhandle, jboolean jenable_thread_tracking) { auto* opt = reinterpret_cast(jhandle); opt->enable_thread_tracking = static_cast(jenable_thread_tracking); } /* * Class: org_rocksdb_Options * Method: enableThreadTracking * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_enableThreadTracking( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_thread_tracking); } /* * Class: org_rocksdb_Options * Method: setDelayedWriteRate * Signature: (JJ)V */ void Java_org_rocksdb_Options_setDelayedWriteRate( JNIEnv*, jobject, jlong jhandle, jlong jdelayed_write_rate) { auto* opt = reinterpret_cast(jhandle); opt->delayed_write_rate = static_cast(jdelayed_write_rate); } /* * Class: org_rocksdb_Options * Method: delayedWriteRate * Signature: (J)J */ jlong Java_org_rocksdb_Options_delayedWriteRate( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->delayed_write_rate); } /* * Class: org_rocksdb_Options * Method: setEnablePipelinedWrite * Signature: (JZ)V */ void Java_org_rocksdb_Options_setEnablePipelinedWrite( JNIEnv*, jobject, jlong jhandle, jboolean jenable_pipelined_write) { auto* opt = reinterpret_cast(jhandle); opt->enable_pipelined_write = jenable_pipelined_write == JNI_TRUE; } /* * Class: org_rocksdb_Options * Method: enablePipelinedWrite * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_enablePipelinedWrite( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_pipelined_write); } /* * Class: org_rocksdb_Options * Method: setUnorderedWrite * Signature: (JZ)V */ void Java_org_rocksdb_Options_setUnorderedWrite( JNIEnv*, jobject, jlong jhandle, jboolean unordered_write) { reinterpret_cast(jhandle)->unordered_write = static_cast(unordered_write); } /* * Class: org_rocksdb_Options * Method: unorderedWrite * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_unorderedWrite( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->unordered_write; } /* * Class: org_rocksdb_Options * Method: setAllowConcurrentMemtableWrite * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAllowConcurrentMemtableWrite( JNIEnv*, jobject, jlong jhandle, jboolean allow) { reinterpret_cast(jhandle) ->allow_concurrent_memtable_write = static_cast(allow); } /* * Class: org_rocksdb_Options * Method: allowConcurrentMemtableWrite * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_allowConcurrentMemtableWrite( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_concurrent_memtable_write; } /* * Class: org_rocksdb_Options * Method: setEnableWriteThreadAdaptiveYield * Signature: (JZ)V */ void Java_org_rocksdb_Options_setEnableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle, jboolean yield) { reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield = static_cast(yield); } /* * Class: org_rocksdb_Options * Method: enableWriteThreadAdaptiveYield * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_enableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield; } /* * Class: org_rocksdb_Options * Method: setWriteThreadMaxYieldUsec * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWriteThreadMaxYieldUsec( JNIEnv*, jobject, jlong jhandle, jlong max) { reinterpret_cast(jhandle) ->write_thread_max_yield_usec = static_cast(max); } /* * Class: org_rocksdb_Options * Method: writeThreadMaxYieldUsec * Signature: (J)J */ jlong Java_org_rocksdb_Options_writeThreadMaxYieldUsec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_max_yield_usec; } /* * Class: org_rocksdb_Options * Method: setWriteThreadSlowYieldUsec * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWriteThreadSlowYieldUsec( JNIEnv*, jobject, jlong jhandle, jlong slow) { reinterpret_cast(jhandle) ->write_thread_slow_yield_usec = static_cast(slow); } /* * Class: org_rocksdb_Options * Method: writeThreadSlowYieldUsec * Signature: (J)J */ jlong Java_org_rocksdb_Options_writeThreadSlowYieldUsec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_slow_yield_usec; } /* * Class: org_rocksdb_Options * Method: setSkipStatsUpdateOnDbOpen * Signature: (JZ)V */ void Java_org_rocksdb_Options_setSkipStatsUpdateOnDbOpen( JNIEnv*, jobject, jlong jhandle, jboolean jskip_stats_update_on_db_open) { auto* opt = reinterpret_cast(jhandle); opt->skip_stats_update_on_db_open = static_cast(jskip_stats_update_on_db_open); } /* * Class: org_rocksdb_Options * Method: skipStatsUpdateOnDbOpen * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_skipStatsUpdateOnDbOpen( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_stats_update_on_db_open); } /* * Class: org_rocksdb_Options * Method: setSkipCheckingSstFileSizesOnDbOpen * Signature: (JZ)V */ void Java_org_rocksdb_Options_setSkipCheckingSstFileSizesOnDbOpen( JNIEnv*, jobject, jlong jhandle, jboolean jskip_checking_sst_file_sizes_on_db_open) { auto* opt = reinterpret_cast(jhandle); opt->skip_checking_sst_file_sizes_on_db_open = static_cast(jskip_checking_sst_file_sizes_on_db_open); } /* * Class: org_rocksdb_Options * Method: skipCheckingSstFileSizesOnDbOpen * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_skipCheckingSstFileSizesOnDbOpen( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_checking_sst_file_sizes_on_db_open); } /* * Class: org_rocksdb_Options * Method: setWalRecoveryMode * Signature: (JB)V */ void Java_org_rocksdb_Options_setWalRecoveryMode( JNIEnv*, jobject, jlong jhandle, jbyte jwal_recovery_mode_value) { auto* opt = reinterpret_cast(jhandle); opt->wal_recovery_mode = ROCKSDB_NAMESPACE::WALRecoveryModeJni::toCppWALRecoveryMode( jwal_recovery_mode_value); } /* * Class: org_rocksdb_Options * Method: walRecoveryMode * Signature: (J)B */ jbyte Java_org_rocksdb_Options_walRecoveryMode( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::WALRecoveryModeJni::toJavaWALRecoveryMode( opt->wal_recovery_mode); } /* * Class: org_rocksdb_Options * Method: setAllow2pc * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAllow2pc( JNIEnv*, jobject, jlong jhandle, jboolean jallow_2pc) { auto* opt = reinterpret_cast(jhandle); opt->allow_2pc = static_cast(jallow_2pc); } /* * Class: org_rocksdb_Options * Method: allow2pc * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_allow2pc( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_2pc); } /* * Class: org_rocksdb_Options * Method: setRowCache * Signature: (JJ)V */ void Java_org_rocksdb_Options_setRowCache( JNIEnv*, jobject, jlong jhandle, jlong jrow_cache_handle) { auto* opt = reinterpret_cast(jhandle); auto* row_cache = reinterpret_cast*>( jrow_cache_handle); opt->row_cache = *row_cache; } /* * Class: org_rocksdb_Options * Method: setWalFilter * Signature: (JJ)V */ void Java_org_rocksdb_Options_setWalFilter( JNIEnv*, jobject, jlong jhandle, jlong jwal_filter_handle) { auto* opt = reinterpret_cast(jhandle); auto* wal_filter = reinterpret_cast( jwal_filter_handle); opt->wal_filter = wal_filter; } /* * Class: org_rocksdb_Options * Method: setFailIfOptionsFileError * Signature: (JZ)V */ void Java_org_rocksdb_Options_setFailIfOptionsFileError( JNIEnv*, jobject, jlong jhandle, jboolean jfail_if_options_file_error) { auto* opt = reinterpret_cast(jhandle); opt->fail_if_options_file_error = static_cast(jfail_if_options_file_error); } /* * Class: org_rocksdb_Options * Method: failIfOptionsFileError * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_failIfOptionsFileError( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->fail_if_options_file_error); } /* * Class: org_rocksdb_Options * Method: setDumpMallocStats * Signature: (JZ)V */ void Java_org_rocksdb_Options_setDumpMallocStats( JNIEnv*, jobject, jlong jhandle, jboolean jdump_malloc_stats) { auto* opt = reinterpret_cast(jhandle); opt->dump_malloc_stats = static_cast(jdump_malloc_stats); } /* * Class: org_rocksdb_Options * Method: dumpMallocStats * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_dumpMallocStats( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->dump_malloc_stats); } /* * Class: org_rocksdb_Options * Method: setAvoidFlushDuringRecovery * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAvoidFlushDuringRecovery( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_recovery) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_recovery = static_cast(javoid_flush_during_recovery); } /* * Class: org_rocksdb_Options * Method: avoidFlushDuringRecovery * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_avoidFlushDuringRecovery( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_recovery); } /* * Class: org_rocksdb_Options * Method: setAvoidFlushDuringShutdown * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAvoidFlushDuringShutdown( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_shutdown) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_shutdown = static_cast(javoid_flush_during_shutdown); } /* * Class: org_rocksdb_Options * Method: avoidFlushDuringShutdown * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_avoidFlushDuringShutdown( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_shutdown); } /* * Class: org_rocksdb_Options * Method: setAllowIngestBehind * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAllowIngestBehind( JNIEnv*, jobject, jlong jhandle, jboolean jallow_ingest_behind) { auto* opt = reinterpret_cast(jhandle); opt->allow_ingest_behind = jallow_ingest_behind == JNI_TRUE; } /* * Class: org_rocksdb_Options * Method: allowIngestBehind * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_allowIngestBehind( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_ingest_behind); } /* * Class: org_rocksdb_Options * Method: setPreserveDeletes * Signature: (JZ)V */ void Java_org_rocksdb_Options_setPreserveDeletes( JNIEnv*, jobject, jlong jhandle, jboolean jpreserve_deletes) { auto* opt = reinterpret_cast(jhandle); opt->preserve_deletes = jpreserve_deletes == JNI_TRUE; } /* * Class: org_rocksdb_Options * Method: preserveDeletes * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_preserveDeletes( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->preserve_deletes); } /* * Class: org_rocksdb_Options * Method: setTwoWriteQueues * Signature: (JZ)V */ void Java_org_rocksdb_Options_setTwoWriteQueues( JNIEnv*, jobject, jlong jhandle, jboolean jtwo_write_queues) { auto* opt = reinterpret_cast(jhandle); opt->two_write_queues = jtwo_write_queues == JNI_TRUE; } /* * Class: org_rocksdb_Options * Method: twoWriteQueues * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_twoWriteQueues( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->two_write_queues); } /* * Class: org_rocksdb_Options * Method: setManualWalFlush * Signature: (JZ)V */ void Java_org_rocksdb_Options_setManualWalFlush( JNIEnv*, jobject, jlong jhandle, jboolean jmanual_wal_flush) { auto* opt = reinterpret_cast(jhandle); opt->manual_wal_flush = jmanual_wal_flush == JNI_TRUE; } /* * Class: org_rocksdb_Options * Method: manualWalFlush * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_manualWalFlush( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->manual_wal_flush); } /* * Class: org_rocksdb_Options * Method: setAtomicFlush * Signature: (JZ)V */ void Java_org_rocksdb_Options_setAtomicFlush( JNIEnv*, jobject, jlong jhandle, jboolean jatomic_flush) { auto* opt = reinterpret_cast(jhandle); opt->atomic_flush = jatomic_flush == JNI_TRUE; } /* * Class: org_rocksdb_Options * Method: atomicFlush * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_atomicFlush( JNIEnv *, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->atomic_flush); } /* * Method: tableFactoryName * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_Options_tableFactoryName( JNIEnv* env, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::TableFactory* tf = opt->table_factory.get(); // Should never be nullptr. // Default memtable factory is SkipListFactory assert(tf); return env->NewStringUTF(tf->Name()); } /* * Class: org_rocksdb_Options * Method: minWriteBufferNumberToMerge * Signature: (J)I */ jint Java_org_rocksdb_Options_minWriteBufferNumberToMerge( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge; } /* * Class: org_rocksdb_Options * Method: setMinWriteBufferNumberToMerge * Signature: (JI)V */ void Java_org_rocksdb_Options_setMinWriteBufferNumberToMerge( JNIEnv*, jobject, jlong jhandle, jint jmin_write_buffer_number_to_merge) { reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge = static_cast(jmin_write_buffer_number_to_merge); } /* * Class: org_rocksdb_Options * Method: maxWriteBufferNumberToMaintain * Signature: (J)I */ jint Java_org_rocksdb_Options_maxWriteBufferNumberToMaintain( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number_to_maintain; } /* * Class: org_rocksdb_Options * Method: setMaxWriteBufferNumberToMaintain * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxWriteBufferNumberToMaintain( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number_to_maintain) { reinterpret_cast(jhandle) ->max_write_buffer_number_to_maintain = static_cast(jmax_write_buffer_number_to_maintain); } /* * Class: org_rocksdb_Options * Method: setCompressionType * Signature: (JB)V */ void Java_org_rocksdb_Options_setCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* opts = reinterpret_cast(jhandle); opts->compression = ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( jcompression_type_value); } /* * Class: org_rocksdb_Options * Method: compressionType * Signature: (J)B */ jbyte Java_org_rocksdb_Options_compressionType( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( opts->compression); } /** * Helper method to convert a Java byte array of compression levels * to a C++ vector of ROCKSDB_NAMESPACE::CompressionType * * @param env A pointer to the Java environment * @param jcompression_levels A reference to a java byte array * where each byte indicates a compression level * * @return A std::unique_ptr to the vector, or std::unique_ptr(nullptr) if a JNI * exception occurs */ std::unique_ptr> rocksdb_compression_vector_helper(JNIEnv* env, jbyteArray jcompression_levels) { jsize len = env->GetArrayLength(jcompression_levels); jbyte* jcompression_level = env->GetByteArrayElements(jcompression_levels, nullptr); if (jcompression_level == nullptr) { // exception thrown: OutOfMemoryError return std::unique_ptr>(); } auto* compression_levels = new std::vector(); std::unique_ptr> uptr_compression_levels(compression_levels); for (jsize i = 0; i < len; i++) { jbyte jcl = jcompression_level[i]; compression_levels->push_back( static_cast(jcl)); } env->ReleaseByteArrayElements(jcompression_levels, jcompression_level, JNI_ABORT); return uptr_compression_levels; } /** * Helper method to convert a C++ vector of ROCKSDB_NAMESPACE::CompressionType * to a Java byte array of compression levels * * @param env A pointer to the Java environment * @param jcompression_levels A reference to a java byte array * where each byte indicates a compression level * * @return A jbytearray or nullptr if an exception occurs */ jbyteArray rocksdb_compression_list_helper( JNIEnv* env, std::vector compression_levels) { const size_t len = compression_levels.size(); jbyte* jbuf = new jbyte[len]; for (size_t i = 0; i < len; i++) { jbuf[i] = compression_levels[i]; } // insert in java array jbyteArray jcompression_levels = env->NewByteArray(static_cast(len)); if (jcompression_levels == nullptr) { // exception thrown: OutOfMemoryError delete[] jbuf; return nullptr; } env->SetByteArrayRegion(jcompression_levels, 0, static_cast(len), jbuf); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jcompression_levels); delete[] jbuf; return nullptr; } delete[] jbuf; return jcompression_levels; } /* * Class: org_rocksdb_Options * Method: setCompressionPerLevel * Signature: (J[B)V */ void Java_org_rocksdb_Options_setCompressionPerLevel( JNIEnv* env, jobject, jlong jhandle, jbyteArray jcompressionLevels) { auto uptr_compression_levels = rocksdb_compression_vector_helper(env, jcompressionLevels); if (!uptr_compression_levels) { // exception occurred return; } auto* options = reinterpret_cast(jhandle); options->compression_per_level = *(uptr_compression_levels.get()); } /* * Class: org_rocksdb_Options * Method: compressionPerLevel * Signature: (J)[B */ jbyteArray Java_org_rocksdb_Options_compressionPerLevel( JNIEnv* env, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return rocksdb_compression_list_helper(env, options->compression_per_level); } /* * Class: org_rocksdb_Options * Method: setBottommostCompressionType * Signature: (JB)V */ void Java_org_rocksdb_Options_setBottommostCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* options = reinterpret_cast(jhandle); options->bottommost_compression = ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( jcompression_type_value); } /* * Class: org_rocksdb_Options * Method: bottommostCompressionType * Signature: (J)B */ jbyte Java_org_rocksdb_Options_bottommostCompressionType( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( options->bottommost_compression); } /* * Class: org_rocksdb_Options * Method: setBottommostCompressionOptions * Signature: (JJ)V */ void Java_org_rocksdb_Options_setBottommostCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jbottommost_compression_options_handle) { auto* options = reinterpret_cast(jhandle); auto* bottommost_compression_options = reinterpret_cast( jbottommost_compression_options_handle); options->bottommost_compression_opts = *bottommost_compression_options; } /* * Class: org_rocksdb_Options * Method: setCompressionOptions * Signature: (JJ)V */ void Java_org_rocksdb_Options_setCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jcompression_options_handle) { auto* options = reinterpret_cast(jhandle); auto* compression_options = reinterpret_cast( jcompression_options_handle); options->compression_opts = *compression_options; } /* * Class: org_rocksdb_Options * Method: setCompactionStyle * Signature: (JB)V */ void Java_org_rocksdb_Options_setCompactionStyle( JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_style) { auto* options = reinterpret_cast(jhandle); options->compaction_style = ROCKSDB_NAMESPACE::CompactionStyleJni::toCppCompactionStyle( jcompaction_style); } /* * Class: org_rocksdb_Options * Method: compactionStyle * Signature: (J)B */ jbyte Java_org_rocksdb_Options_compactionStyle( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionStyleJni::toJavaCompactionStyle( options->compaction_style); } /* * Class: org_rocksdb_Options * Method: setMaxTableFilesSizeFIFO * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMaxTableFilesSizeFIFO( JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size = static_cast(jmax_table_files_size); } /* * Class: org_rocksdb_Options * Method: maxTableFilesSizeFIFO * Signature: (J)J */ jlong Java_org_rocksdb_Options_maxTableFilesSizeFIFO( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size; } /* * Class: org_rocksdb_Options * Method: numLevels * Signature: (J)I */ jint Java_org_rocksdb_Options_numLevels( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->num_levels; } /* * Class: org_rocksdb_Options * Method: setNumLevels * Signature: (JI)V */ void Java_org_rocksdb_Options_setNumLevels( JNIEnv*, jobject, jlong jhandle, jint jnum_levels) { reinterpret_cast(jhandle)->num_levels = static_cast(jnum_levels); } /* * Class: org_rocksdb_Options * Method: levelZeroFileNumCompactionTrigger * Signature: (J)I */ jint Java_org_rocksdb_Options_levelZeroFileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger; } /* * Class: org_rocksdb_Options * Method: setLevelZeroFileNumCompactionTrigger * Signature: (JI)V */ void Java_org_rocksdb_Options_setLevelZeroFileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = static_cast(jlevel0_file_num_compaction_trigger); } /* * Class: org_rocksdb_Options * Method: levelZeroSlowdownWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_Options_levelZeroSlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* * Class: org_rocksdb_Options * Method: setLevelSlowdownWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_Options_setLevelZeroSlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = static_cast(jlevel0_slowdown_writes_trigger); } /* * Class: org_rocksdb_Options * Method: levelZeroStopWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_Options_levelZeroStopWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* * Class: org_rocksdb_Options * Method: setLevelStopWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_Options_setLevelZeroStopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = static_cast(jlevel0_stop_writes_trigger); } /* * Class: org_rocksdb_Options * Method: targetFileSizeBase * Signature: (J)J */ jlong Java_org_rocksdb_Options_targetFileSizeBase( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_base; } /* * Class: org_rocksdb_Options * Method: setTargetFileSizeBase * Signature: (JJ)V */ void Java_org_rocksdb_Options_setTargetFileSizeBase( JNIEnv*, jobject, jlong jhandle, jlong jtarget_file_size_base) { reinterpret_cast(jhandle) ->target_file_size_base = static_cast(jtarget_file_size_base); } /* * Class: org_rocksdb_Options * Method: targetFileSizeMultiplier * Signature: (J)I */ jint Java_org_rocksdb_Options_targetFileSizeMultiplier( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_multiplier; } /* * Class: org_rocksdb_Options * Method: setTargetFileSizeMultiplier * Signature: (JI)V */ void Java_org_rocksdb_Options_setTargetFileSizeMultiplier( JNIEnv*, jobject, jlong jhandle, jint jtarget_file_size_multiplier) { reinterpret_cast(jhandle) ->target_file_size_multiplier = static_cast(jtarget_file_size_multiplier); } /* * Class: org_rocksdb_Options * Method: maxBytesForLevelBase * Signature: (J)J */ jlong Java_org_rocksdb_Options_maxBytesForLevelBase( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_bytes_for_level_base; } /* * Class: org_rocksdb_Options * Method: setMaxBytesForLevelBase * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMaxBytesForLevelBase( JNIEnv*, jobject, jlong jhandle, jlong jmax_bytes_for_level_base) { reinterpret_cast(jhandle) ->max_bytes_for_level_base = static_cast(jmax_bytes_for_level_base); } /* * Class: org_rocksdb_Options * Method: levelCompactionDynamicLevelBytes * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_levelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes; } /* * Class: org_rocksdb_Options * Method: setLevelCompactionDynamicLevelBytes * Signature: (JZ)V */ void Java_org_rocksdb_Options_setLevelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle, jboolean jenable_dynamic_level_bytes) { reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes = (jenable_dynamic_level_bytes); } /* * Class: org_rocksdb_Options * Method: maxBytesForLevelMultiplier * Signature: (J)D */ jdouble Java_org_rocksdb_Options_maxBytesForLevelMultiplier( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier; } /* * Class: org_rocksdb_Options * Method: setMaxBytesForLevelMultiplier * Signature: (JD)V */ void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplier( JNIEnv*, jobject, jlong jhandle, jdouble jmax_bytes_for_level_multiplier) { reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier = static_cast(jmax_bytes_for_level_multiplier); } /* * Class: org_rocksdb_Options * Method: maxCompactionBytes * Signature: (J)I */ jlong Java_org_rocksdb_Options_maxCompactionBytes( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle) ->max_compaction_bytes); } /* * Class: org_rocksdb_Options * Method: setMaxCompactionBytes * Signature: (JI)V */ void Java_org_rocksdb_Options_setMaxCompactionBytes( JNIEnv*, jobject, jlong jhandle, jlong jmax_compaction_bytes) { reinterpret_cast(jhandle)->max_compaction_bytes = static_cast(jmax_compaction_bytes); } /* * Class: org_rocksdb_Options * Method: arenaBlockSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_arenaBlockSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->arena_block_size; } /* * Class: org_rocksdb_Options * Method: setArenaBlockSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setArenaBlockSize( JNIEnv* env, jobject, jlong jhandle, jlong jarena_block_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jarena_block_size); if (s.ok()) { reinterpret_cast(jhandle)->arena_block_size = jarena_block_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: disableAutoCompactions * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_disableAutoCompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->disable_auto_compactions; } /* * Class: org_rocksdb_Options * Method: setDisableAutoCompactions * Signature: (JZ)V */ void Java_org_rocksdb_Options_setDisableAutoCompactions( JNIEnv*, jobject, jlong jhandle, jboolean jdisable_auto_compactions) { reinterpret_cast(jhandle) ->disable_auto_compactions = static_cast(jdisable_auto_compactions); } /* * Class: org_rocksdb_Options * Method: maxSequentialSkipInIterations * Signature: (J)J */ jlong Java_org_rocksdb_Options_maxSequentialSkipInIterations( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations; } /* * Class: org_rocksdb_Options * Method: setMaxSequentialSkipInIterations * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMaxSequentialSkipInIterations( JNIEnv*, jobject, jlong jhandle, jlong jmax_sequential_skip_in_iterations) { reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations = static_cast(jmax_sequential_skip_in_iterations); } /* * Class: org_rocksdb_Options * Method: inplaceUpdateSupport * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_inplaceUpdateSupport( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_support; } /* * Class: org_rocksdb_Options * Method: setInplaceUpdateSupport * Signature: (JZ)V */ void Java_org_rocksdb_Options_setInplaceUpdateSupport( JNIEnv*, jobject, jlong jhandle, jboolean jinplace_update_support) { reinterpret_cast(jhandle) ->inplace_update_support = static_cast(jinplace_update_support); } /* * Class: org_rocksdb_Options * Method: inplaceUpdateNumLocks * Signature: (J)J */ jlong Java_org_rocksdb_Options_inplaceUpdateNumLocks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_num_locks; } /* * Class: org_rocksdb_Options * Method: setInplaceUpdateNumLocks * Signature: (JJ)V */ void Java_org_rocksdb_Options_setInplaceUpdateNumLocks( JNIEnv* env, jobject, jlong jhandle, jlong jinplace_update_num_locks) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jinplace_update_num_locks); if (s.ok()) { reinterpret_cast(jhandle) ->inplace_update_num_locks = jinplace_update_num_locks; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: memtablePrefixBloomSizeRatio * Signature: (J)I */ jdouble Java_org_rocksdb_Options_memtablePrefixBloomSizeRatio( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_prefix_bloom_size_ratio; } /* * Class: org_rocksdb_Options * Method: setMemtablePrefixBloomSizeRatio * Signature: (JI)V */ void Java_org_rocksdb_Options_setMemtablePrefixBloomSizeRatio( JNIEnv*, jobject, jlong jhandle, jdouble jmemtable_prefix_bloom_size_ratio) { reinterpret_cast(jhandle) ->memtable_prefix_bloom_size_ratio = static_cast(jmemtable_prefix_bloom_size_ratio); } /* * Class: org_rocksdb_Options * Method: bloomLocality * Signature: (J)I */ jint Java_org_rocksdb_Options_bloomLocality( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->bloom_locality; } /* * Class: org_rocksdb_Options * Method: setBloomLocality * Signature: (JI)V */ void Java_org_rocksdb_Options_setBloomLocality( JNIEnv*, jobject, jlong jhandle, jint jbloom_locality) { reinterpret_cast(jhandle)->bloom_locality = static_cast(jbloom_locality); } /* * Class: org_rocksdb_Options * Method: maxSuccessiveMerges * Signature: (J)J */ jlong Java_org_rocksdb_Options_maxSuccessiveMerges( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_successive_merges; } /* * Class: org_rocksdb_Options * Method: setMaxSuccessiveMerges * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMaxSuccessiveMerges( JNIEnv* env, jobject, jlong jhandle, jlong jmax_successive_merges) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmax_successive_merges); if (s.ok()) { reinterpret_cast(jhandle) ->max_successive_merges = jmax_successive_merges; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: optimizeFiltersForHits * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_optimizeFiltersForHits( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->optimize_filters_for_hits; } /* * Class: org_rocksdb_Options * Method: setOptimizeFiltersForHits * Signature: (JZ)V */ void Java_org_rocksdb_Options_setOptimizeFiltersForHits( JNIEnv*, jobject, jlong jhandle, jboolean joptimize_filters_for_hits) { reinterpret_cast(jhandle) ->optimize_filters_for_hits = static_cast(joptimize_filters_for_hits); } /* * Class: org_rocksdb_Options * Method: optimizeForSmallDb * Signature: (J)V */ void Java_org_rocksdb_Options_optimizeForSmallDb( JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle)->OptimizeForSmallDb(); } /* * Class: org_rocksdb_Options * Method: optimizeForPointLookup * Signature: (JJ)V */ void Java_org_rocksdb_Options_optimizeForPointLookup( JNIEnv*, jobject, jlong jhandle, jlong block_cache_size_mb) { reinterpret_cast(jhandle) ->OptimizeForPointLookup(block_cache_size_mb); } /* * Class: org_rocksdb_Options * Method: optimizeLevelStyleCompaction * Signature: (JJ)V */ void Java_org_rocksdb_Options_optimizeLevelStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeLevelStyleCompaction(memtable_memory_budget); } /* * Class: org_rocksdb_Options * Method: optimizeUniversalStyleCompaction * Signature: (JJ)V */ void Java_org_rocksdb_Options_optimizeUniversalStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeUniversalStyleCompaction(memtable_memory_budget); } /* * Class: org_rocksdb_Options * Method: prepareForBulkLoad * Signature: (J)V */ void Java_org_rocksdb_Options_prepareForBulkLoad( JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle)->PrepareForBulkLoad(); } /* * Class: org_rocksdb_Options * Method: memtableHugePageSize * Signature: (J)J */ jlong Java_org_rocksdb_Options_memtableHugePageSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_huge_page_size; } /* * Class: org_rocksdb_Options * Method: setMemtableHugePageSize * Signature: (JJ)V */ void Java_org_rocksdb_Options_setMemtableHugePageSize( JNIEnv* env, jobject, jlong jhandle, jlong jmemtable_huge_page_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmemtable_huge_page_size); if (s.ok()) { reinterpret_cast(jhandle) ->memtable_huge_page_size = jmemtable_huge_page_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Options * Method: softPendingCompactionBytesLimit * Signature: (J)J */ jlong Java_org_rocksdb_Options_softPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->soft_pending_compaction_bytes_limit; } /* * Class: org_rocksdb_Options * Method: setSoftPendingCompactionBytesLimit * Signature: (JJ)V */ void Java_org_rocksdb_Options_setSoftPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jsoft_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) ->soft_pending_compaction_bytes_limit = static_cast(jsoft_pending_compaction_bytes_limit); } /* * Class: org_rocksdb_Options * Method: softHardCompactionBytesLimit * Signature: (J)J */ jlong Java_org_rocksdb_Options_hardPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->hard_pending_compaction_bytes_limit; } /* * Class: org_rocksdb_Options * Method: setHardPendingCompactionBytesLimit * Signature: (JJ)V */ void Java_org_rocksdb_Options_setHardPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jhard_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) ->hard_pending_compaction_bytes_limit = static_cast(jhard_pending_compaction_bytes_limit); } /* * Class: org_rocksdb_Options * Method: level0FileNumCompactionTrigger * Signature: (J)I */ jint Java_org_rocksdb_Options_level0FileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger; } /* * Class: org_rocksdb_Options * Method: setLevel0FileNumCompactionTrigger * Signature: (JI)V */ void Java_org_rocksdb_Options_setLevel0FileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = static_cast(jlevel0_file_num_compaction_trigger); } /* * Class: org_rocksdb_Options * Method: level0SlowdownWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_Options_level0SlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* * Class: org_rocksdb_Options * Method: setLevel0SlowdownWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_Options_setLevel0SlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = static_cast(jlevel0_slowdown_writes_trigger); } /* * Class: org_rocksdb_Options * Method: level0StopWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_Options_level0StopWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* * Class: org_rocksdb_Options * Method: setLevel0StopWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_Options_setLevel0StopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = static_cast(jlevel0_stop_writes_trigger); } /* * Class: org_rocksdb_Options * Method: maxBytesForLevelMultiplierAdditional * Signature: (J)[I */ jintArray Java_org_rocksdb_Options_maxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle) { auto mbflma = reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier_additional; const size_t size = mbflma.size(); jint* additionals = new jint[size]; for (size_t i = 0; i < size; i++) { additionals[i] = static_cast(mbflma[i]); } jsize jlen = static_cast(size); jintArray result = env->NewIntArray(jlen); if (result == nullptr) { // exception thrown: OutOfMemoryError delete[] additionals; return nullptr; } env->SetIntArrayRegion(result, 0, jlen, additionals); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(result); delete[] additionals; return nullptr; } delete[] additionals; return result; } /* * Class: org_rocksdb_Options * Method: setMaxBytesForLevelMultiplierAdditional * Signature: (J[I)V */ void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle, jintArray jmax_bytes_for_level_multiplier_additional) { jsize len = env->GetArrayLength(jmax_bytes_for_level_multiplier_additional); jint* additionals = env->GetIntArrayElements( jmax_bytes_for_level_multiplier_additional, nullptr); if (additionals == nullptr) { // exception thrown: OutOfMemoryError return; } auto* opt = reinterpret_cast(jhandle); opt->max_bytes_for_level_multiplier_additional.clear(); for (jsize i = 0; i < len; i++) { opt->max_bytes_for_level_multiplier_additional.push_back( static_cast(additionals[i])); } env->ReleaseIntArrayElements(jmax_bytes_for_level_multiplier_additional, additionals, JNI_ABORT); } /* * Class: org_rocksdb_Options * Method: paranoidFileChecks * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_paranoidFileChecks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_file_checks; } /* * Class: org_rocksdb_Options * Method: setParanoidFileChecks * Signature: (JZ)V */ void Java_org_rocksdb_Options_setParanoidFileChecks( JNIEnv*, jobject, jlong jhandle, jboolean jparanoid_file_checks) { reinterpret_cast(jhandle)->paranoid_file_checks = static_cast(jparanoid_file_checks); } /* * Class: org_rocksdb_Options * Method: setCompactionPriority * Signature: (JB)V */ void Java_org_rocksdb_Options_setCompactionPriority( JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_priority_value) { auto* opts = reinterpret_cast(jhandle); opts->compaction_pri = ROCKSDB_NAMESPACE::CompactionPriorityJni::toCppCompactionPriority( jcompaction_priority_value); } /* * Class: org_rocksdb_Options * Method: compactionPriority * Signature: (J)B */ jbyte Java_org_rocksdb_Options_compactionPriority( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionPriorityJni::toJavaCompactionPriority( opts->compaction_pri); } /* * Class: org_rocksdb_Options * Method: setReportBgIoStats * Signature: (JZ)V */ void Java_org_rocksdb_Options_setReportBgIoStats( JNIEnv*, jobject, jlong jhandle, jboolean jreport_bg_io_stats) { auto* opts = reinterpret_cast(jhandle); opts->report_bg_io_stats = static_cast(jreport_bg_io_stats); } /* * Class: org_rocksdb_Options * Method: reportBgIoStats * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_reportBgIoStats( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->report_bg_io_stats); } /* * Class: org_rocksdb_Options * Method: setTtl * Signature: (JJ)V */ void Java_org_rocksdb_Options_setTtl( JNIEnv*, jobject, jlong jhandle, jlong jttl) { auto* opts = reinterpret_cast(jhandle); opts->ttl = static_cast(jttl); } /* * Class: org_rocksdb_Options * Method: ttl * Signature: (J)J */ jlong Java_org_rocksdb_Options_ttl( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->ttl); } /* * Class: org_rocksdb_Options * Method: setCompactionOptionsUniversal * Signature: (JJ)V */ void Java_org_rocksdb_Options_setCompactionOptionsUniversal( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_universal_handle) { auto* opts = reinterpret_cast(jhandle); auto* opts_uni = reinterpret_cast( jcompaction_options_universal_handle); opts->compaction_options_universal = *opts_uni; } /* * Class: org_rocksdb_Options * Method: setCompactionOptionsFIFO * Signature: (JJ)V */ void Java_org_rocksdb_Options_setCompactionOptionsFIFO( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_fifo_handle) { auto* opts = reinterpret_cast(jhandle); auto* opts_fifo = reinterpret_cast( jcompaction_options_fifo_handle); opts->compaction_options_fifo = *opts_fifo; } /* * Class: org_rocksdb_Options * Method: setForceConsistencyChecks * Signature: (JZ)V */ void Java_org_rocksdb_Options_setForceConsistencyChecks( JNIEnv*, jobject, jlong jhandle, jboolean jforce_consistency_checks) { auto* opts = reinterpret_cast(jhandle); opts->force_consistency_checks = static_cast(jforce_consistency_checks); } /* * Class: org_rocksdb_Options * Method: forceConsistencyChecks * Signature: (J)Z */ jboolean Java_org_rocksdb_Options_forceConsistencyChecks( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->force_consistency_checks); } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::ColumnFamilyOptions /* * Class: org_rocksdb_ColumnFamilyOptions * Method: newColumnFamilyOptions * Signature: ()J */ jlong Java_org_rocksdb_ColumnFamilyOptions_newColumnFamilyOptions( JNIEnv*, jclass) { auto* op = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(); return reinterpret_cast(op); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: copyColumnFamilyOptions * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_copyColumnFamilyOptions( JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::ColumnFamilyOptions( *(reinterpret_cast(jhandle))); return reinterpret_cast(new_opt); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: newColumnFamilyOptionsFromOptions * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_newColumnFamilyOptionsFromOptions( JNIEnv*, jclass, jlong joptions_handle) { auto new_opt = new ROCKSDB_NAMESPACE::ColumnFamilyOptions( *reinterpret_cast(joptions_handle)); return reinterpret_cast(new_opt); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: getColumnFamilyOptionsFromProps * Signature: (JLjava/lang/String;)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__JLjava_lang_String_2( JNIEnv* env, jclass, jlong cfg_handle, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* config_options = reinterpret_cast(cfg_handle); auto* cf_options = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(); ROCKSDB_NAMESPACE::Status status = ROCKSDB_NAMESPACE::GetColumnFamilyOptionsFromString( *config_options, ROCKSDB_NAMESPACE::ColumnFamilyOptions(), opt_string, cf_options); env->ReleaseStringUTFChars(jopt_string, opt_string); // Check if ColumnFamilyOptions creation was possible. jlong ret_value = 0; if (status.ok()) { ret_value = reinterpret_cast(cf_options); } else { // if operation failed the ColumnFamilyOptions need to be deleted // again to prevent a memory leak. delete cf_options; } return ret_value; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: getColumnFamilyOptionsFromProps * Signature: (Ljava/util/String;)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__Ljava_lang_String_2( JNIEnv* env, jclass, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* cf_options = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(); ROCKSDB_NAMESPACE::Status status = ROCKSDB_NAMESPACE::GetColumnFamilyOptionsFromString( ROCKSDB_NAMESPACE::ColumnFamilyOptions(), opt_string, cf_options); env->ReleaseStringUTFChars(jopt_string, opt_string); // Check if ColumnFamilyOptions creation was possible. jlong ret_value = 0; if (status.ok()) { ret_value = reinterpret_cast(cf_options); } else { // if operation failed the ColumnFamilyOptions need to be deleted // again to prevent a memory leak. delete cf_options; } return ret_value; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_ColumnFamilyOptions_disposeInternal( JNIEnv*, jobject, jlong handle) { auto* cfo = reinterpret_cast(handle); assert(cfo != nullptr); delete cfo; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: optimizeForSmallDb * Signature: (J)V */ void Java_org_rocksdb_ColumnFamilyOptions_optimizeForSmallDb( JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle) ->OptimizeForSmallDb(); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: optimizeForPointLookup * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_optimizeForPointLookup( JNIEnv*, jobject, jlong jhandle, jlong block_cache_size_mb) { reinterpret_cast(jhandle) ->OptimizeForPointLookup(block_cache_size_mb); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: optimizeLevelStyleCompaction * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_optimizeLevelStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeLevelStyleCompaction(memtable_memory_budget); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: optimizeUniversalStyleCompaction * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_optimizeUniversalStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeUniversalStyleCompaction(memtable_memory_budget); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setComparatorHandle * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JI( JNIEnv*, jobject, jlong jhandle, jint builtinComparator) { switch (builtinComparator) { case 1: reinterpret_cast(jhandle) ->comparator = ROCKSDB_NAMESPACE::ReverseBytewiseComparator(); break; default: reinterpret_cast(jhandle) ->comparator = ROCKSDB_NAMESPACE::BytewiseComparator(); break; } } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setComparatorHandle * Signature: (JJB)V */ void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JJB( JNIEnv*, jobject, jlong jopt_handle, jlong jcomparator_handle, jbyte jcomparator_type) { ROCKSDB_NAMESPACE::Comparator* comparator = nullptr; switch (jcomparator_type) { // JAVA_COMPARATOR case 0x0: comparator = reinterpret_cast( jcomparator_handle); break; // JAVA_NATIVE_COMPARATOR_WRAPPER case 0x1: comparator = reinterpret_cast(jcomparator_handle); break; } auto* opt = reinterpret_cast(jopt_handle); opt->comparator = comparator; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMergeOperatorName * Signature: (JJjava/lang/String)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperatorName( JNIEnv* env, jobject, jlong jhandle, jstring jop_name) { auto* options = reinterpret_cast(jhandle); const char* op_name = env->GetStringUTFChars(jop_name, nullptr); if (op_name == nullptr) { // exception thrown: OutOfMemoryError return; } options->merge_operator = ROCKSDB_NAMESPACE::MergeOperators::CreateFromStringId(op_name); env->ReleaseStringUTFChars(jop_name, op_name); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMergeOperator * Signature: (JJjava/lang/String)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperator( JNIEnv*, jobject, jlong jhandle, jlong mergeOperatorHandle) { reinterpret_cast(jhandle) ->merge_operator = *(reinterpret_cast*>( mergeOperatorHandle)); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompactionFilterHandle * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilter_handle) { reinterpret_cast(jopt_handle) ->compaction_filter = reinterpret_cast( jcompactionfilter_handle); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompactionFilterFactoryHandle * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterFactoryHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilterfactory_handle) { auto* cff_factory = reinterpret_cast< std::shared_ptr*>( jcompactionfilterfactory_handle); reinterpret_cast(jopt_handle) ->compaction_filter_factory = *cff_factory; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setWriteBufferSize * Signature: (JJ)I */ void Java_org_rocksdb_ColumnFamilyOptions_setWriteBufferSize( JNIEnv* env, jobject, jlong jhandle, jlong jwrite_buffer_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jwrite_buffer_size); if (s.ok()) { reinterpret_cast(jhandle) ->write_buffer_size = jwrite_buffer_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: writeBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_writeBufferSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_buffer_size; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxWriteBufferNumber * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxWriteBufferNumber( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number) { reinterpret_cast(jhandle) ->max_write_buffer_number = jmax_write_buffer_number; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxWriteBufferNumber * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_maxWriteBufferNumber( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number; } /* * Method: setMemTableFactory * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMemTableFactory( JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { reinterpret_cast(jhandle) ->memtable_factory.reset( reinterpret_cast( jfactory_handle)); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: memTableFactoryName * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_ColumnFamilyOptions_memTableFactoryName( JNIEnv* env, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::MemTableRepFactory* tf = opt->memtable_factory.get(); // Should never be nullptr. // Default memtable factory is SkipListFactory assert(tf); // temporarly fix for the historical typo if (strcmp(tf->Name(), "HashLinkListRepFactory") == 0) { return env->NewStringUTF("HashLinkedListRepFactory"); } return env->NewStringUTF(tf->Name()); } /* * Method: useFixedLengthPrefixExtractor * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_useFixedLengthPrefixExtractor( JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewFixedPrefixTransform( static_cast(jprefix_length))); } /* * Method: useCappedPrefixExtractor * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_useCappedPrefixExtractor( JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewCappedPrefixTransform( static_cast(jprefix_length))); } /* * Method: setTableFactory * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setTableFactory( JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { reinterpret_cast(jhandle) ->table_factory.reset( reinterpret_cast(jfactory_handle)); } /* * Method: tableFactoryName * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_ColumnFamilyOptions_tableFactoryName( JNIEnv* env, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::TableFactory* tf = opt->table_factory.get(); // Should never be nullptr. // Default memtable factory is SkipListFactory assert(tf); return env->NewStringUTF(tf->Name()); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: minWriteBufferNumberToMerge * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_minWriteBufferNumberToMerge( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMinWriteBufferNumberToMerge * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMinWriteBufferNumberToMerge( JNIEnv*, jobject, jlong jhandle, jint jmin_write_buffer_number_to_merge) { reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge = static_cast(jmin_write_buffer_number_to_merge); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxWriteBufferNumberToMaintain * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_maxWriteBufferNumberToMaintain( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number_to_maintain; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxWriteBufferNumberToMaintain * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxWriteBufferNumberToMaintain( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number_to_maintain) { reinterpret_cast(jhandle) ->max_write_buffer_number_to_maintain = static_cast(jmax_write_buffer_number_to_maintain); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompressionType * Signature: (JB)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* cf_opts = reinterpret_cast(jhandle); cf_opts->compression = ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( jcompression_type_value); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: compressionType * Signature: (J)B */ jbyte Java_org_rocksdb_ColumnFamilyOptions_compressionType( JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( cf_opts->compression); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompressionPerLevel * Signature: (J[B)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompressionPerLevel( JNIEnv* env, jobject, jlong jhandle, jbyteArray jcompressionLevels) { auto* options = reinterpret_cast(jhandle); auto uptr_compression_levels = rocksdb_compression_vector_helper(env, jcompressionLevels); if (!uptr_compression_levels) { // exception occurred return; } options->compression_per_level = *(uptr_compression_levels.get()); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: compressionPerLevel * Signature: (J)[B */ jbyteArray Java_org_rocksdb_ColumnFamilyOptions_compressionPerLevel( JNIEnv* env, jobject, jlong jhandle) { auto* cf_options = reinterpret_cast(jhandle); return rocksdb_compression_list_helper(env, cf_options->compression_per_level); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setBottommostCompressionType * Signature: (JB)V */ void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* cf_options = reinterpret_cast(jhandle); cf_options->bottommost_compression = ROCKSDB_NAMESPACE::CompressionTypeJni::toCppCompressionType( jcompression_type_value); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: bottommostCompressionType * Signature: (J)B */ jbyte Java_org_rocksdb_ColumnFamilyOptions_bottommostCompressionType( JNIEnv*, jobject, jlong jhandle) { auto* cf_options = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( cf_options->bottommost_compression); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setBottommostCompressionOptions * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jbottommost_compression_options_handle) { auto* cf_options = reinterpret_cast(jhandle); auto* bottommost_compression_options = reinterpret_cast( jbottommost_compression_options_handle); cf_options->bottommost_compression_opts = *bottommost_compression_options; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompressionOptions * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jcompression_options_handle) { auto* cf_options = reinterpret_cast(jhandle); auto* compression_options = reinterpret_cast( jcompression_options_handle); cf_options->compression_opts = *compression_options; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompactionStyle * Signature: (JB)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionStyle( JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_style) { auto* cf_options = reinterpret_cast(jhandle); cf_options->compaction_style = ROCKSDB_NAMESPACE::CompactionStyleJni::toCppCompactionStyle( jcompaction_style); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: compactionStyle * Signature: (J)B */ jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionStyle( JNIEnv*, jobject, jlong jhandle) { auto* cf_options = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionStyleJni::toJavaCompactionStyle( cf_options->compaction_style); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxTableFilesSizeFIFO * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxTableFilesSizeFIFO( JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size = static_cast(jmax_table_files_size); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxTableFilesSizeFIFO * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_maxTableFilesSizeFIFO( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: numLevels * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_numLevels( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->num_levels; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setNumLevels * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setNumLevels( JNIEnv*, jobject, jlong jhandle, jint jnum_levels) { reinterpret_cast(jhandle) ->num_levels = static_cast(jnum_levels); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: levelZeroFileNumCompactionTrigger * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroFileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setLevelZeroFileNumCompactionTrigger * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroFileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = static_cast(jlevel0_file_num_compaction_trigger); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: levelZeroSlowdownWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroSlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setLevelSlowdownWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroSlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = static_cast(jlevel0_slowdown_writes_trigger); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: levelZeroStopWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroStopWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setLevelStopWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroStopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = static_cast(jlevel0_stop_writes_trigger); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: targetFileSizeBase * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_targetFileSizeBase( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_base; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setTargetFileSizeBase * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeBase( JNIEnv*, jobject, jlong jhandle, jlong jtarget_file_size_base) { reinterpret_cast(jhandle) ->target_file_size_base = static_cast(jtarget_file_size_base); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: targetFileSizeMultiplier * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_targetFileSizeMultiplier( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_multiplier; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setTargetFileSizeMultiplier * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeMultiplier( JNIEnv*, jobject, jlong jhandle, jint jtarget_file_size_multiplier) { reinterpret_cast(jhandle) ->target_file_size_multiplier = static_cast(jtarget_file_size_multiplier); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxBytesForLevelBase * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelBase( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_bytes_for_level_base; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxBytesForLevelBase * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelBase( JNIEnv*, jobject, jlong jhandle, jlong jmax_bytes_for_level_base) { reinterpret_cast(jhandle) ->max_bytes_for_level_base = static_cast(jmax_bytes_for_level_base); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: levelCompactionDynamicLevelBytes * Signature: (J)Z */ jboolean Java_org_rocksdb_ColumnFamilyOptions_levelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setLevelCompactionDynamicLevelBytes * Signature: (JZ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setLevelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle, jboolean jenable_dynamic_level_bytes) { reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes = (jenable_dynamic_level_bytes); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxBytesForLevelMultiplier * Signature: (J)D */ jdouble Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelMultiplier( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxBytesForLevelMultiplier * Signature: (JD)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplier( JNIEnv*, jobject, jlong jhandle, jdouble jmax_bytes_for_level_multiplier) { reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier = static_cast(jmax_bytes_for_level_multiplier); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxCompactionBytes * Signature: (J)I */ jlong Java_org_rocksdb_ColumnFamilyOptions_maxCompactionBytes( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle) ->max_compaction_bytes); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxCompactionBytes * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxCompactionBytes( JNIEnv*, jobject, jlong jhandle, jlong jmax_compaction_bytes) { reinterpret_cast(jhandle) ->max_compaction_bytes = static_cast(jmax_compaction_bytes); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: arenaBlockSize * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_arenaBlockSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->arena_block_size; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setArenaBlockSize * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setArenaBlockSize( JNIEnv* env, jobject, jlong jhandle, jlong jarena_block_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jarena_block_size); if (s.ok()) { reinterpret_cast(jhandle) ->arena_block_size = jarena_block_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: disableAutoCompactions * Signature: (J)Z */ jboolean Java_org_rocksdb_ColumnFamilyOptions_disableAutoCompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->disable_auto_compactions; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setDisableAutoCompactions * Signature: (JZ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setDisableAutoCompactions( JNIEnv*, jobject, jlong jhandle, jboolean jdisable_auto_compactions) { reinterpret_cast(jhandle) ->disable_auto_compactions = static_cast(jdisable_auto_compactions); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxSequentialSkipInIterations * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_maxSequentialSkipInIterations( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxSequentialSkipInIterations * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxSequentialSkipInIterations( JNIEnv*, jobject, jlong jhandle, jlong jmax_sequential_skip_in_iterations) { reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations = static_cast(jmax_sequential_skip_in_iterations); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: inplaceUpdateSupport * Signature: (J)Z */ jboolean Java_org_rocksdb_ColumnFamilyOptions_inplaceUpdateSupport( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_support; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setInplaceUpdateSupport * Signature: (JZ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setInplaceUpdateSupport( JNIEnv*, jobject, jlong jhandle, jboolean jinplace_update_support) { reinterpret_cast(jhandle) ->inplace_update_support = static_cast(jinplace_update_support); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: inplaceUpdateNumLocks * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_inplaceUpdateNumLocks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_num_locks; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setInplaceUpdateNumLocks * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setInplaceUpdateNumLocks( JNIEnv* env, jobject, jlong jhandle, jlong jinplace_update_num_locks) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jinplace_update_num_locks); if (s.ok()) { reinterpret_cast(jhandle) ->inplace_update_num_locks = jinplace_update_num_locks; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: memtablePrefixBloomSizeRatio * Signature: (J)I */ jdouble Java_org_rocksdb_ColumnFamilyOptions_memtablePrefixBloomSizeRatio( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_prefix_bloom_size_ratio; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMemtablePrefixBloomSizeRatio * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMemtablePrefixBloomSizeRatio( JNIEnv*, jobject, jlong jhandle, jdouble jmemtable_prefix_bloom_size_ratio) { reinterpret_cast(jhandle) ->memtable_prefix_bloom_size_ratio = static_cast(jmemtable_prefix_bloom_size_ratio); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: bloomLocality * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_bloomLocality( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->bloom_locality; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setBloomLocality * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setBloomLocality( JNIEnv*, jobject, jlong jhandle, jint jbloom_locality) { reinterpret_cast(jhandle) ->bloom_locality = static_cast(jbloom_locality); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxSuccessiveMerges * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_maxSuccessiveMerges( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_successive_merges; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxSuccessiveMerges * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxSuccessiveMerges( JNIEnv* env, jobject, jlong jhandle, jlong jmax_successive_merges) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmax_successive_merges); if (s.ok()) { reinterpret_cast(jhandle) ->max_successive_merges = jmax_successive_merges; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: optimizeFiltersForHits * Signature: (J)Z */ jboolean Java_org_rocksdb_ColumnFamilyOptions_optimizeFiltersForHits( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->optimize_filters_for_hits; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setOptimizeFiltersForHits * Signature: (JZ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setOptimizeFiltersForHits( JNIEnv*, jobject, jlong jhandle, jboolean joptimize_filters_for_hits) { reinterpret_cast(jhandle) ->optimize_filters_for_hits = static_cast(joptimize_filters_for_hits); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: memtableHugePageSize * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_memtableHugePageSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_huge_page_size; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMemtableHugePageSize * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMemtableHugePageSize( JNIEnv* env, jobject, jlong jhandle, jlong jmemtable_huge_page_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmemtable_huge_page_size); if (s.ok()) { reinterpret_cast(jhandle) ->memtable_huge_page_size = jmemtable_huge_page_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: softPendingCompactionBytesLimit * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_softPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->soft_pending_compaction_bytes_limit; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setSoftPendingCompactionBytesLimit * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setSoftPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jsoft_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) ->soft_pending_compaction_bytes_limit = static_cast(jsoft_pending_compaction_bytes_limit); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: softHardCompactionBytesLimit * Signature: (J)J */ jlong Java_org_rocksdb_ColumnFamilyOptions_hardPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->hard_pending_compaction_bytes_limit; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setHardPendingCompactionBytesLimit * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setHardPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jhard_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) ->hard_pending_compaction_bytes_limit = static_cast(jhard_pending_compaction_bytes_limit); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: level0FileNumCompactionTrigger * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_level0FileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setLevel0FileNumCompactionTrigger * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setLevel0FileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = static_cast(jlevel0_file_num_compaction_trigger); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: level0SlowdownWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_level0SlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setLevel0SlowdownWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setLevel0SlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = static_cast(jlevel0_slowdown_writes_trigger); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: level0StopWritesTrigger * Signature: (J)I */ jint Java_org_rocksdb_ColumnFamilyOptions_level0StopWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setLevel0StopWritesTrigger * Signature: (JI)V */ void Java_org_rocksdb_ColumnFamilyOptions_setLevel0StopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = static_cast(jlevel0_stop_writes_trigger); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: maxBytesForLevelMultiplierAdditional * Signature: (J)[I */ jintArray Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle) { auto mbflma = reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier_additional; const size_t size = mbflma.size(); jint* additionals = new jint[size]; for (size_t i = 0; i < size; i++) { additionals[i] = static_cast(mbflma[i]); } jsize jlen = static_cast(size); jintArray result = env->NewIntArray(jlen); if (result == nullptr) { // exception thrown: OutOfMemoryError delete[] additionals; return nullptr; } env->SetIntArrayRegion(result, 0, jlen, additionals); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(result); delete[] additionals; return nullptr; } delete[] additionals; return result; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setMaxBytesForLevelMultiplierAdditional * Signature: (J[I)V */ void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle, jintArray jmax_bytes_for_level_multiplier_additional) { jsize len = env->GetArrayLength(jmax_bytes_for_level_multiplier_additional); jint* additionals = env->GetIntArrayElements(jmax_bytes_for_level_multiplier_additional, 0); if (additionals == nullptr) { // exception thrown: OutOfMemoryError return; } auto* cf_opt = reinterpret_cast(jhandle); cf_opt->max_bytes_for_level_multiplier_additional.clear(); for (jsize i = 0; i < len; i++) { cf_opt->max_bytes_for_level_multiplier_additional.push_back( static_cast(additionals[i])); } env->ReleaseIntArrayElements(jmax_bytes_for_level_multiplier_additional, additionals, JNI_ABORT); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: paranoidFileChecks * Signature: (J)Z */ jboolean Java_org_rocksdb_ColumnFamilyOptions_paranoidFileChecks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_file_checks; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setParanoidFileChecks * Signature: (JZ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setParanoidFileChecks( JNIEnv*, jobject, jlong jhandle, jboolean jparanoid_file_checks) { reinterpret_cast(jhandle) ->paranoid_file_checks = static_cast(jparanoid_file_checks); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompactionPriority * Signature: (JB)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionPriority( JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_priority_value) { auto* cf_opts = reinterpret_cast(jhandle); cf_opts->compaction_pri = ROCKSDB_NAMESPACE::CompactionPriorityJni::toCppCompactionPriority( jcompaction_priority_value); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: compactionPriority * Signature: (J)B */ jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionPriority( JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionPriorityJni::toJavaCompactionPriority( cf_opts->compaction_pri); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setReportBgIoStats * Signature: (JZ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setReportBgIoStats( JNIEnv*, jobject, jlong jhandle, jboolean jreport_bg_io_stats) { auto* cf_opts = reinterpret_cast(jhandle); cf_opts->report_bg_io_stats = static_cast(jreport_bg_io_stats); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: reportBgIoStats * Signature: (J)Z */ jboolean Java_org_rocksdb_ColumnFamilyOptions_reportBgIoStats( JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); return static_cast(cf_opts->report_bg_io_stats); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setTtl * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setTtl( JNIEnv*, jobject, jlong jhandle, jlong jttl) { auto* cf_opts = reinterpret_cast(jhandle); cf_opts->ttl = static_cast(jttl); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: ttl * Signature: (J)J */ JNIEXPORT jlong JNICALL Java_org_rocksdb_ColumnFamilyOptions_ttl( JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); return static_cast(cf_opts->ttl); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompactionOptionsUniversal * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsUniversal( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_universal_handle) { auto* cf_opts = reinterpret_cast(jhandle); auto* opts_uni = reinterpret_cast( jcompaction_options_universal_handle); cf_opts->compaction_options_universal = *opts_uni; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setCompactionOptionsFIFO * Signature: (JJ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsFIFO( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_fifo_handle) { auto* cf_opts = reinterpret_cast(jhandle); auto* opts_fifo = reinterpret_cast( jcompaction_options_fifo_handle); cf_opts->compaction_options_fifo = *opts_fifo; } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: setForceConsistencyChecks * Signature: (JZ)V */ void Java_org_rocksdb_ColumnFamilyOptions_setForceConsistencyChecks( JNIEnv*, jobject, jlong jhandle, jboolean jforce_consistency_checks) { auto* cf_opts = reinterpret_cast(jhandle); cf_opts->force_consistency_checks = static_cast(jforce_consistency_checks); } /* * Class: org_rocksdb_ColumnFamilyOptions * Method: forceConsistencyChecks * Signature: (J)Z */ jboolean Java_org_rocksdb_ColumnFamilyOptions_forceConsistencyChecks( JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); return static_cast(cf_opts->force_consistency_checks); } ///////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DBOptions /* * Class: org_rocksdb_DBOptions * Method: newDBOptions * Signature: ()J */ jlong Java_org_rocksdb_DBOptions_newDBOptions( JNIEnv*, jclass) { auto* dbop = new ROCKSDB_NAMESPACE::DBOptions(); return reinterpret_cast(dbop); } /* * Class: org_rocksdb_DBOptions * Method: copyDBOptions * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_copyDBOptions( JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::DBOptions( *(reinterpret_cast(jhandle))); return reinterpret_cast(new_opt); } /* * Class: org_rocksdb_DBOptions * Method: newDBOptionsFromOptions * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_newDBOptionsFromOptions( JNIEnv*, jclass, jlong joptions_handle) { auto new_opt = new ROCKSDB_NAMESPACE::DBOptions( *reinterpret_cast(joptions_handle)); return reinterpret_cast(new_opt); } /* * Class: org_rocksdb_DBOptions * Method: getDBOptionsFromProps * Signature: (JLjava/lang/String;)J */ jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__JLjava_lang_String_2( JNIEnv* env, jclass, jlong config_handle, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* config_options = reinterpret_cast(config_handle); auto* db_options = new ROCKSDB_NAMESPACE::DBOptions(); ROCKSDB_NAMESPACE::Status status = ROCKSDB_NAMESPACE::GetDBOptionsFromString( *config_options, ROCKSDB_NAMESPACE::DBOptions(), opt_string, db_options); env->ReleaseStringUTFChars(jopt_string, opt_string); // Check if DBOptions creation was possible. jlong ret_value = 0; if (status.ok()) { ret_value = reinterpret_cast(db_options); } else { // if operation failed the DBOptions need to be deleted // again to prevent a memory leak. delete db_options; } return ret_value; } /* * Class: org_rocksdb_DBOptions * Method: getDBOptionsFromProps * Signature: (Ljava/util/String;)J */ jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__Ljava_lang_String_2( JNIEnv* env, jclass, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* db_options = new ROCKSDB_NAMESPACE::DBOptions(); ROCKSDB_NAMESPACE::Status status = ROCKSDB_NAMESPACE::GetDBOptionsFromString( ROCKSDB_NAMESPACE::DBOptions(), opt_string, db_options); env->ReleaseStringUTFChars(jopt_string, opt_string); // Check if DBOptions creation was possible. jlong ret_value = 0; if (status.ok()) { ret_value = reinterpret_cast(db_options); } else { // if operation failed the DBOptions need to be deleted // again to prevent a memory leak. delete db_options; } return ret_value; } /* * Class: org_rocksdb_DBOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_DBOptions_disposeInternal( JNIEnv*, jobject, jlong handle) { auto* dbo = reinterpret_cast(handle); assert(dbo != nullptr); delete dbo; } /* * Class: org_rocksdb_DBOptions * Method: optimizeForSmallDb * Signature: (J)V */ void Java_org_rocksdb_DBOptions_optimizeForSmallDb( JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle) ->OptimizeForSmallDb(); } /* * Class: org_rocksdb_DBOptions * Method: setEnv * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setEnv( JNIEnv*, jobject, jlong jhandle, jlong jenv_handle) { reinterpret_cast(jhandle)->env = reinterpret_cast(jenv_handle); } /* * Class: org_rocksdb_DBOptions * Method: setIncreaseParallelism * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setIncreaseParallelism( JNIEnv*, jobject, jlong jhandle, jint totalThreads) { reinterpret_cast(jhandle)->IncreaseParallelism( static_cast(totalThreads)); } /* * Class: org_rocksdb_DBOptions * Method: setCreateIfMissing * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setCreateIfMissing( JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle)->create_if_missing = flag; } /* * Class: org_rocksdb_DBOptions * Method: createIfMissing * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_createIfMissing( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->create_if_missing; } /* * Class: org_rocksdb_DBOptions * Method: setCreateMissingColumnFamilies * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setCreateMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle) ->create_missing_column_families = flag; } /* * Class: org_rocksdb_DBOptions * Method: createMissingColumnFamilies * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_createMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->create_missing_column_families; } /* * Class: org_rocksdb_DBOptions * Method: setErrorIfExists * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setErrorIfExists( JNIEnv*, jobject, jlong jhandle, jboolean error_if_exists) { reinterpret_cast(jhandle)->error_if_exists = static_cast(error_if_exists); } /* * Class: org_rocksdb_DBOptions * Method: errorIfExists * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_errorIfExists( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->error_if_exists; } /* * Class: org_rocksdb_DBOptions * Method: setParanoidChecks * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setParanoidChecks( JNIEnv*, jobject, jlong jhandle, jboolean paranoid_checks) { reinterpret_cast(jhandle)->paranoid_checks = static_cast(paranoid_checks); } /* * Class: org_rocksdb_DBOptions * Method: paranoidChecks * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_paranoidChecks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_checks; } /* * Class: org_rocksdb_DBOptions * Method: setRateLimiter * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setRateLimiter( JNIEnv*, jobject, jlong jhandle, jlong jrate_limiter_handle) { std::shared_ptr* pRateLimiter = reinterpret_cast*>( jrate_limiter_handle); reinterpret_cast(jhandle)->rate_limiter = *pRateLimiter; } /* * Class: org_rocksdb_DBOptions * Method: setSstFileManager * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setSstFileManager( JNIEnv*, jobject, jlong jhandle, jlong jsst_file_manager_handle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jsst_file_manager_handle); reinterpret_cast(jhandle)->sst_file_manager = *sptr_sst_file_manager; } /* * Class: org_rocksdb_DBOptions * Method: setLogger * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setLogger( JNIEnv*, jobject, jlong jhandle, jlong jlogger_handle) { std::shared_ptr* pLogger = reinterpret_cast*>( jlogger_handle); reinterpret_cast(jhandle)->info_log = *pLogger; } /* * Class: org_rocksdb_DBOptions * Method: setInfoLogLevel * Signature: (JB)V */ void Java_org_rocksdb_DBOptions_setInfoLogLevel( JNIEnv*, jobject, jlong jhandle, jbyte jlog_level) { reinterpret_cast(jhandle)->info_log_level = static_cast(jlog_level); } /* * Class: org_rocksdb_DBOptions * Method: infoLogLevel * Signature: (J)B */ jbyte Java_org_rocksdb_DBOptions_infoLogLevel( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle)->info_log_level); } /* * Class: org_rocksdb_DBOptions * Method: setMaxTotalWalSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setMaxTotalWalSize( JNIEnv*, jobject, jlong jhandle, jlong jmax_total_wal_size) { reinterpret_cast(jhandle)->max_total_wal_size = static_cast(jmax_total_wal_size); } /* * Class: org_rocksdb_DBOptions * Method: maxTotalWalSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_maxTotalWalSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_total_wal_size; } /* * Class: org_rocksdb_DBOptions * Method: setMaxOpenFiles * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setMaxOpenFiles( JNIEnv*, jobject, jlong jhandle, jint max_open_files) { reinterpret_cast(jhandle)->max_open_files = static_cast(max_open_files); } /* * Class: org_rocksdb_DBOptions * Method: maxOpenFiles * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_maxOpenFiles( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_open_files; } /* * Class: org_rocksdb_DBOptions * Method: setMaxFileOpeningThreads * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setMaxFileOpeningThreads( JNIEnv*, jobject, jlong jhandle, jint jmax_file_opening_threads) { reinterpret_cast(jhandle) ->max_file_opening_threads = static_cast(jmax_file_opening_threads); } /* * Class: org_rocksdb_DBOptions * Method: maxFileOpeningThreads * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_maxFileOpeningThreads( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_file_opening_threads); } /* * Class: org_rocksdb_DBOptions * Method: setStatistics * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setStatistics( JNIEnv*, jobject, jlong jhandle, jlong jstatistics_handle) { auto* opt = reinterpret_cast(jhandle); auto* pSptr = reinterpret_cast*>( jstatistics_handle); opt->statistics = *pSptr; } /* * Class: org_rocksdb_DBOptions * Method: statistics * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_statistics( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); std::shared_ptr sptr = opt->statistics; if (sptr == nullptr) { return 0; } else { std::shared_ptr* pSptr = new std::shared_ptr(sptr); return reinterpret_cast(pSptr); } } /* * Class: org_rocksdb_DBOptions * Method: setUseFsync * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setUseFsync( JNIEnv*, jobject, jlong jhandle, jboolean use_fsync) { reinterpret_cast(jhandle)->use_fsync = static_cast(use_fsync); } /* * Class: org_rocksdb_DBOptions * Method: useFsync * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_useFsync( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->use_fsync; } /* * Class: org_rocksdb_DBOptions * Method: setDbPaths * Signature: (J[Ljava/lang/String;[J)V */ void Java_org_rocksdb_DBOptions_setDbPaths( JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { std::vector db_paths; jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, nullptr); if (ptr_jtarget_size == nullptr) { // exception thrown: OutOfMemoryError return; } jboolean has_exception = JNI_FALSE; const jsize len = env->GetArrayLength(jpaths); for (jsize i = 0; i < len; i++) { jobject jpath = reinterpret_cast(env->GetObjectArrayElement(jpaths, i)); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } std::string path = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, static_cast(jpath), &has_exception); env->DeleteLocalRef(jpath); if (has_exception == JNI_TRUE) { env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } jlong jtarget_size = ptr_jtarget_size[i]; db_paths.push_back( ROCKSDB_NAMESPACE::DbPath(path, static_cast(jtarget_size))); } env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); auto* opt = reinterpret_cast(jhandle); opt->db_paths = db_paths; } /* * Class: org_rocksdb_DBOptions * Method: dbPathsLen * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_dbPathsLen( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_paths.size()); } /* * Class: org_rocksdb_DBOptions * Method: dbPaths * Signature: (J[Ljava/lang/String;[J)V */ void Java_org_rocksdb_DBOptions_dbPaths( JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { jlong* ptr_jtarget_size = env->GetLongArrayElements(jtarget_sizes, nullptr); if (ptr_jtarget_size == nullptr) { // exception thrown: OutOfMemoryError return; } auto* opt = reinterpret_cast(jhandle); const jsize len = env->GetArrayLength(jpaths); for (jsize i = 0; i < len; i++) { ROCKSDB_NAMESPACE::DbPath db_path = opt->db_paths[i]; jstring jpath = env->NewStringUTF(db_path.path.c_str()); if (jpath == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } env->SetObjectArrayElement(jpaths, i, jpath); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jpath); env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_ABORT); return; } ptr_jtarget_size[i] = static_cast(db_path.target_size); } env->ReleaseLongArrayElements(jtarget_sizes, ptr_jtarget_size, JNI_COMMIT); } /* * Class: org_rocksdb_DBOptions * Method: setDbLogDir * Signature: (JLjava/lang/String)V */ void Java_org_rocksdb_DBOptions_setDbLogDir( JNIEnv* env, jobject, jlong jhandle, jstring jdb_log_dir) { const char* log_dir = env->GetStringUTFChars(jdb_log_dir, nullptr); if (log_dir == nullptr) { // exception thrown: OutOfMemoryError return; } reinterpret_cast(jhandle)->db_log_dir.assign( log_dir); env->ReleaseStringUTFChars(jdb_log_dir, log_dir); } /* * Class: org_rocksdb_DBOptions * Method: dbLogDir * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_DBOptions_dbLogDir( JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle) ->db_log_dir.c_str()); } /* * Class: org_rocksdb_DBOptions * Method: setWalDir * Signature: (JLjava/lang/String)V */ void Java_org_rocksdb_DBOptions_setWalDir( JNIEnv* env, jobject, jlong jhandle, jstring jwal_dir) { const char* wal_dir = env->GetStringUTFChars(jwal_dir, 0); reinterpret_cast(jhandle)->wal_dir.assign( wal_dir); env->ReleaseStringUTFChars(jwal_dir, wal_dir); } /* * Class: org_rocksdb_DBOptions * Method: walDir * Signature: (J)Ljava/lang/String */ jstring Java_org_rocksdb_DBOptions_walDir( JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle) ->wal_dir.c_str()); } /* * Class: org_rocksdb_DBOptions * Method: setDeleteObsoleteFilesPeriodMicros * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setDeleteObsoleteFilesPeriodMicros( JNIEnv*, jobject, jlong jhandle, jlong micros) { reinterpret_cast(jhandle) ->delete_obsolete_files_period_micros = static_cast(micros); } /* * Class: org_rocksdb_DBOptions * Method: deleteObsoleteFilesPeriodMicros * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_deleteObsoleteFilesPeriodMicros( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->delete_obsolete_files_period_micros; } /* * Class: org_rocksdb_DBOptions * Method: setBaseBackgroundCompactions * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setBaseBackgroundCompactions( JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle) ->base_background_compactions = static_cast(max); } /* * Class: org_rocksdb_DBOptions * Method: baseBackgroundCompactions * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_baseBackgroundCompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->base_background_compactions; } /* * Class: org_rocksdb_DBOptions * Method: setMaxBackgroundCompactions * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setMaxBackgroundCompactions( JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle) ->max_background_compactions = static_cast(max); } /* * Class: org_rocksdb_DBOptions * Method: maxBackgroundCompactions * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_maxBackgroundCompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_compactions; } /* * Class: org_rocksdb_DBOptions * Method: setMaxSubcompactions * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setMaxSubcompactions( JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle)->max_subcompactions = static_cast(max); } /* * Class: org_rocksdb_DBOptions * Method: maxSubcompactions * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_maxSubcompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_subcompactions; } /* * Class: org_rocksdb_DBOptions * Method: setMaxBackgroundFlushes * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setMaxBackgroundFlushes( JNIEnv*, jobject, jlong jhandle, jint max_background_flushes) { reinterpret_cast(jhandle) ->max_background_flushes = static_cast(max_background_flushes); } /* * Class: org_rocksdb_DBOptions * Method: maxBackgroundFlushes * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_maxBackgroundFlushes( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_flushes; } /* * Class: org_rocksdb_DBOptions * Method: setMaxBackgroundJobs * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setMaxBackgroundJobs( JNIEnv*, jobject, jlong jhandle, jint max_background_jobs) { reinterpret_cast(jhandle) ->max_background_jobs = static_cast(max_background_jobs); } /* * Class: org_rocksdb_DBOptions * Method: maxBackgroundJobs * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_maxBackgroundJobs( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_jobs; } /* * Class: org_rocksdb_DBOptions * Method: setMaxLogFileSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setMaxLogFileSize( JNIEnv* env, jobject, jlong jhandle, jlong max_log_file_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(max_log_file_size); if (s.ok()) { reinterpret_cast(jhandle) ->max_log_file_size = max_log_file_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_DBOptions * Method: maxLogFileSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_maxLogFileSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_log_file_size; } /* * Class: org_rocksdb_DBOptions * Method: setLogFileTimeToRoll * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setLogFileTimeToRoll( JNIEnv* env, jobject, jlong jhandle, jlong log_file_time_to_roll) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( log_file_time_to_roll); if (s.ok()) { reinterpret_cast(jhandle) ->log_file_time_to_roll = log_file_time_to_roll; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_DBOptions * Method: logFileTimeToRoll * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_logFileTimeToRoll( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->log_file_time_to_roll; } /* * Class: org_rocksdb_DBOptions * Method: setKeepLogFileNum * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setKeepLogFileNum( JNIEnv* env, jobject, jlong jhandle, jlong keep_log_file_num) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(keep_log_file_num); if (s.ok()) { reinterpret_cast(jhandle) ->keep_log_file_num = keep_log_file_num; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_DBOptions * Method: keepLogFileNum * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_keepLogFileNum( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->keep_log_file_num; } /* * Class: org_rocksdb_DBOptions * Method: setRecycleLogFileNum * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setRecycleLogFileNum( JNIEnv* env, jobject, jlong jhandle, jlong recycle_log_file_num) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( recycle_log_file_num); if (s.ok()) { reinterpret_cast(jhandle) ->recycle_log_file_num = recycle_log_file_num; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_DBOptions * Method: recycleLogFileNum * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_recycleLogFileNum( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->recycle_log_file_num; } /* * Class: org_rocksdb_DBOptions * Method: setMaxManifestFileSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setMaxManifestFileSize( JNIEnv*, jobject, jlong jhandle, jlong max_manifest_file_size) { reinterpret_cast(jhandle) ->max_manifest_file_size = static_cast(max_manifest_file_size); } /* * Class: org_rocksdb_DBOptions * Method: maxManifestFileSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_maxManifestFileSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_manifest_file_size; } /* * Class: org_rocksdb_DBOptions * Method: setTableCacheNumshardbits * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setTableCacheNumshardbits( JNIEnv*, jobject, jlong jhandle, jint table_cache_numshardbits) { reinterpret_cast(jhandle) ->table_cache_numshardbits = static_cast(table_cache_numshardbits); } /* * Class: org_rocksdb_DBOptions * Method: tableCacheNumshardbits * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_tableCacheNumshardbits( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->table_cache_numshardbits; } /* * Class: org_rocksdb_DBOptions * Method: setWalTtlSeconds * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWalTtlSeconds( JNIEnv*, jobject, jlong jhandle, jlong WAL_ttl_seconds) { reinterpret_cast(jhandle)->WAL_ttl_seconds = static_cast(WAL_ttl_seconds); } /* * Class: org_rocksdb_DBOptions * Method: walTtlSeconds * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_walTtlSeconds( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_ttl_seconds; } /* * Class: org_rocksdb_DBOptions * Method: setWalSizeLimitMB * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWalSizeLimitMB( JNIEnv*, jobject, jlong jhandle, jlong WAL_size_limit_MB) { reinterpret_cast(jhandle)->WAL_size_limit_MB = static_cast(WAL_size_limit_MB); } /* * Class: org_rocksdb_DBOptions * Method: walTtlSeconds * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_walSizeLimitMB( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_size_limit_MB; } /* * Class: org_rocksdb_DBOptions * Method: setManifestPreallocationSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setManifestPreallocationSize( JNIEnv* env, jobject, jlong jhandle, jlong preallocation_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( preallocation_size); if (s.ok()) { reinterpret_cast(jhandle) ->manifest_preallocation_size = preallocation_size; } else { ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_DBOptions * Method: manifestPreallocationSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_manifestPreallocationSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->manifest_preallocation_size; } /* * Class: org_rocksdb_DBOptions * Method: useDirectReads * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_useDirectReads( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_reads; } /* * Class: org_rocksdb_DBOptions * Method: setUseDirectReads * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setUseDirectReads( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_reads) { reinterpret_cast(jhandle)->use_direct_reads = static_cast(use_direct_reads); } /* * Class: org_rocksdb_DBOptions * Method: useDirectIoForFlushAndCompaction * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_useDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_io_for_flush_and_compaction; } /* * Class: org_rocksdb_DBOptions * Method: setUseDirectReads * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setUseDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_io_for_flush_and_compaction) { reinterpret_cast(jhandle) ->use_direct_io_for_flush_and_compaction = static_cast(use_direct_io_for_flush_and_compaction); } /* * Class: org_rocksdb_DBOptions * Method: setAllowFAllocate * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAllowFAllocate( JNIEnv*, jobject, jlong jhandle, jboolean jallow_fallocate) { reinterpret_cast(jhandle)->allow_fallocate = static_cast(jallow_fallocate); } /* * Class: org_rocksdb_DBOptions * Method: allowFAllocate * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_allowFAllocate( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_fallocate); } /* * Class: org_rocksdb_DBOptions * Method: setAllowMmapReads * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAllowMmapReads( JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_reads) { reinterpret_cast(jhandle)->allow_mmap_reads = static_cast(allow_mmap_reads); } /* * Class: org_rocksdb_DBOptions * Method: allowMmapReads * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_allowMmapReads( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_reads; } /* * Class: org_rocksdb_DBOptions * Method: setAllowMmapWrites * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAllowMmapWrites( JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_writes) { reinterpret_cast(jhandle)->allow_mmap_writes = static_cast(allow_mmap_writes); } /* * Class: org_rocksdb_DBOptions * Method: allowMmapWrites * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_allowMmapWrites( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_writes; } /* * Class: org_rocksdb_DBOptions * Method: setIsFdCloseOnExec * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setIsFdCloseOnExec( JNIEnv*, jobject, jlong jhandle, jboolean is_fd_close_on_exec) { reinterpret_cast(jhandle) ->is_fd_close_on_exec = static_cast(is_fd_close_on_exec); } /* * Class: org_rocksdb_DBOptions * Method: isFdCloseOnExec * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_isFdCloseOnExec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->is_fd_close_on_exec; } /* * Class: org_rocksdb_DBOptions * Method: setStatsDumpPeriodSec * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setStatsDumpPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_dump_period_sec) { reinterpret_cast(jhandle) ->stats_dump_period_sec = static_cast(jstats_dump_period_sec); } /* * Class: org_rocksdb_DBOptions * Method: statsDumpPeriodSec * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_statsDumpPeriodSec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_dump_period_sec; } /* * Class: org_rocksdb_DBOptions * Method: setStatsPersistPeriodSec * Signature: (JI)V */ void Java_org_rocksdb_DBOptions_setStatsPersistPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_persist_period_sec) { reinterpret_cast(jhandle) ->stats_persist_period_sec = static_cast(jstats_persist_period_sec); } /* * Class: org_rocksdb_DBOptions * Method: statsPersistPeriodSec * Signature: (J)I */ jint Java_org_rocksdb_DBOptions_statsPersistPeriodSec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_persist_period_sec; } /* * Class: org_rocksdb_DBOptions * Method: setStatsHistoryBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setStatsHistoryBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jstats_history_buffer_size) { reinterpret_cast(jhandle) ->stats_history_buffer_size = static_cast(jstats_history_buffer_size); } /* * Class: org_rocksdb_DBOptions * Method: statsHistoryBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_statsHistoryBufferSize( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_history_buffer_size; } /* * Class: org_rocksdb_DBOptions * Method: setAdviseRandomOnOpen * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAdviseRandomOnOpen( JNIEnv*, jobject, jlong jhandle, jboolean advise_random_on_open) { reinterpret_cast(jhandle) ->advise_random_on_open = static_cast(advise_random_on_open); } /* * Class: org_rocksdb_DBOptions * Method: adviseRandomOnOpen * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_adviseRandomOnOpen( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->advise_random_on_open; } /* * Class: org_rocksdb_DBOptions * Method: setDbWriteBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setDbWriteBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jdb_write_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->db_write_buffer_size = static_cast(jdb_write_buffer_size); } /* * Class: org_rocksdb_DBOptions * Method: setWriteBufferManager * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWriteBufferManager( JNIEnv*, jobject, jlong jdb_options_handle, jlong jwrite_buffer_manager_handle) { auto* write_buffer_manager = reinterpret_cast*>( jwrite_buffer_manager_handle); reinterpret_cast(jdb_options_handle) ->write_buffer_manager = *write_buffer_manager; } /* * Class: org_rocksdb_DBOptions * Method: dbWriteBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_dbWriteBufferSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_write_buffer_size); } /* * Class: org_rocksdb_DBOptions * Method: setAccessHintOnCompactionStart * Signature: (JB)V */ void Java_org_rocksdb_DBOptions_setAccessHintOnCompactionStart( JNIEnv*, jobject, jlong jhandle, jbyte jaccess_hint_value) { auto* opt = reinterpret_cast(jhandle); opt->access_hint_on_compaction_start = ROCKSDB_NAMESPACE::AccessHintJni::toCppAccessHint(jaccess_hint_value); } /* * Class: org_rocksdb_DBOptions * Method: accessHintOnCompactionStart * Signature: (J)B */ jbyte Java_org_rocksdb_DBOptions_accessHintOnCompactionStart( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::AccessHintJni::toJavaAccessHint( opt->access_hint_on_compaction_start); } /* * Class: org_rocksdb_DBOptions * Method: setNewTableReaderForCompactionInputs * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setNewTableReaderForCompactionInputs( JNIEnv*, jobject, jlong jhandle, jboolean jnew_table_reader_for_compaction_inputs) { auto* opt = reinterpret_cast(jhandle); opt->new_table_reader_for_compaction_inputs = static_cast(jnew_table_reader_for_compaction_inputs); } /* * Class: org_rocksdb_DBOptions * Method: newTableReaderForCompactionInputs * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_newTableReaderForCompactionInputs( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->new_table_reader_for_compaction_inputs); } /* * Class: org_rocksdb_DBOptions * Method: setCompactionReadaheadSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_readahead_size) { auto* opt = reinterpret_cast(jhandle); opt->compaction_readahead_size = static_cast(jcompaction_readahead_size); } /* * Class: org_rocksdb_DBOptions * Method: compactionReadaheadSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_compactionReadaheadSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->compaction_readahead_size); } /* * Class: org_rocksdb_DBOptions * Method: setRandomAccessMaxBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setRandomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jrandom_access_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->random_access_max_buffer_size = static_cast(jrandom_access_max_buffer_size); } /* * Class: org_rocksdb_DBOptions * Method: randomAccessMaxBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_randomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->random_access_max_buffer_size); } /* * Class: org_rocksdb_DBOptions * Method: setWritableFileMaxBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWritableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jwritable_file_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->writable_file_max_buffer_size = static_cast(jwritable_file_max_buffer_size); } /* * Class: org_rocksdb_DBOptions * Method: writableFileMaxBufferSize * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_writableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->writable_file_max_buffer_size); } /* * Class: org_rocksdb_DBOptions * Method: setUseAdaptiveMutex * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setUseAdaptiveMutex( JNIEnv*, jobject, jlong jhandle, jboolean use_adaptive_mutex) { reinterpret_cast(jhandle)->use_adaptive_mutex = static_cast(use_adaptive_mutex); } /* * Class: org_rocksdb_DBOptions * Method: useAdaptiveMutex * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_useAdaptiveMutex( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_adaptive_mutex; } /* * Class: org_rocksdb_DBOptions * Method: setBytesPerSync * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setBytesPerSync( JNIEnv*, jobject, jlong jhandle, jlong bytes_per_sync) { reinterpret_cast(jhandle)->bytes_per_sync = static_cast(bytes_per_sync); } /* * Class: org_rocksdb_DBOptions * Method: bytesPerSync * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_bytesPerSync( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->bytes_per_sync; } /* * Class: org_rocksdb_DBOptions * Method: setWalBytesPerSync * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWalBytesPerSync( JNIEnv*, jobject, jlong jhandle, jlong jwal_bytes_per_sync) { reinterpret_cast(jhandle)->wal_bytes_per_sync = static_cast(jwal_bytes_per_sync); } /* * Class: org_rocksdb_DBOptions * Method: walBytesPerSync * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_walBytesPerSync( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->wal_bytes_per_sync); } /* * Class: org_rocksdb_DBOptions * Method: setStrictBytesPerSync * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setStrictBytesPerSync( JNIEnv*, jobject, jlong jhandle, jboolean jstrict_bytes_per_sync) { reinterpret_cast(jhandle) ->strict_bytes_per_sync = jstrict_bytes_per_sync == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: strictBytesPerSync * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_strictBytesPerSync( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle) ->strict_bytes_per_sync); } /* * Class: org_rocksdb_DBOptions * Method: setDelayedWriteRate * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setDelayedWriteRate( JNIEnv*, jobject, jlong jhandle, jlong jdelayed_write_rate) { auto* opt = reinterpret_cast(jhandle); opt->delayed_write_rate = static_cast(jdelayed_write_rate); } /* * Class: org_rocksdb_DBOptions * Method: delayedWriteRate * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_delayedWriteRate( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->delayed_write_rate); } /* * Class: org_rocksdb_DBOptions * Method: setEnablePipelinedWrite * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setEnablePipelinedWrite( JNIEnv*, jobject, jlong jhandle, jboolean jenable_pipelined_write) { auto* opt = reinterpret_cast(jhandle); opt->enable_pipelined_write = jenable_pipelined_write == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: enablePipelinedWrite * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_enablePipelinedWrite( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_pipelined_write); } /* * Class: org_rocksdb_DBOptions * Method: setUnorderedWrite * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setUnorderedWrite( JNIEnv*, jobject, jlong jhandle, jboolean junordered_write) { auto* opt = reinterpret_cast(jhandle); opt->unordered_write = junordered_write == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: unorderedWrite * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_unorderedWrite( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->unordered_write); } /* * Class: org_rocksdb_DBOptions * Method: setEnableThreadTracking * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setEnableThreadTracking( JNIEnv*, jobject, jlong jhandle, jboolean jenable_thread_tracking) { auto* opt = reinterpret_cast(jhandle); opt->enable_thread_tracking = jenable_thread_tracking == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: enableThreadTracking * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_enableThreadTracking( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_thread_tracking); } /* * Class: org_rocksdb_DBOptions * Method: setAllowConcurrentMemtableWrite * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAllowConcurrentMemtableWrite( JNIEnv*, jobject, jlong jhandle, jboolean allow) { reinterpret_cast(jhandle) ->allow_concurrent_memtable_write = static_cast(allow); } /* * Class: org_rocksdb_DBOptions * Method: allowConcurrentMemtableWrite * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_allowConcurrentMemtableWrite( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_concurrent_memtable_write; } /* * Class: org_rocksdb_DBOptions * Method: setEnableWriteThreadAdaptiveYield * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setEnableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle, jboolean yield) { reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield = static_cast(yield); } /* * Class: org_rocksdb_DBOptions * Method: enableWriteThreadAdaptiveYield * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_enableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield; } /* * Class: org_rocksdb_DBOptions * Method: setWriteThreadMaxYieldUsec * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWriteThreadMaxYieldUsec( JNIEnv*, jobject, jlong jhandle, jlong max) { reinterpret_cast(jhandle) ->write_thread_max_yield_usec = static_cast(max); } /* * Class: org_rocksdb_DBOptions * Method: writeThreadMaxYieldUsec * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_writeThreadMaxYieldUsec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_max_yield_usec; } /* * Class: org_rocksdb_DBOptions * Method: setWriteThreadSlowYieldUsec * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWriteThreadSlowYieldUsec( JNIEnv*, jobject, jlong jhandle, jlong slow) { reinterpret_cast(jhandle) ->write_thread_slow_yield_usec = static_cast(slow); } /* * Class: org_rocksdb_DBOptions * Method: writeThreadSlowYieldUsec * Signature: (J)J */ jlong Java_org_rocksdb_DBOptions_writeThreadSlowYieldUsec( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_slow_yield_usec; } /* * Class: org_rocksdb_DBOptions * Method: setSkipStatsUpdateOnDbOpen * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setSkipStatsUpdateOnDbOpen( JNIEnv*, jobject, jlong jhandle, jboolean jskip_stats_update_on_db_open) { auto* opt = reinterpret_cast(jhandle); opt->skip_stats_update_on_db_open = static_cast(jskip_stats_update_on_db_open); } /* * Class: org_rocksdb_DBOptions * Method: skipStatsUpdateOnDbOpen * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_skipStatsUpdateOnDbOpen( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_stats_update_on_db_open); } /* * Class: org_rocksdb_DBOptions * Method: setSkipCheckingSstFileSizesOnDbOpen * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setSkipCheckingSstFileSizesOnDbOpen( JNIEnv*, jobject, jlong jhandle, jboolean jskip_checking_sst_file_sizes_on_db_open) { auto* opt = reinterpret_cast(jhandle); opt->skip_checking_sst_file_sizes_on_db_open = static_cast(jskip_checking_sst_file_sizes_on_db_open); } /* * Class: org_rocksdb_DBOptions * Method: skipCheckingSstFileSizesOnDbOpen * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_skipCheckingSstFileSizesOnDbOpen( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_checking_sst_file_sizes_on_db_open); } /* * Class: org_rocksdb_DBOptions * Method: setWalRecoveryMode * Signature: (JB)V */ void Java_org_rocksdb_DBOptions_setWalRecoveryMode( JNIEnv*, jobject, jlong jhandle, jbyte jwal_recovery_mode_value) { auto* opt = reinterpret_cast(jhandle); opt->wal_recovery_mode = ROCKSDB_NAMESPACE::WALRecoveryModeJni::toCppWALRecoveryMode( jwal_recovery_mode_value); } /* * Class: org_rocksdb_DBOptions * Method: walRecoveryMode * Signature: (J)B */ jbyte Java_org_rocksdb_DBOptions_walRecoveryMode( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::WALRecoveryModeJni::toJavaWALRecoveryMode( opt->wal_recovery_mode); } /* * Class: org_rocksdb_DBOptions * Method: setAllow2pc * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAllow2pc( JNIEnv*, jobject, jlong jhandle, jboolean jallow_2pc) { auto* opt = reinterpret_cast(jhandle); opt->allow_2pc = static_cast(jallow_2pc); } /* * Class: org_rocksdb_DBOptions * Method: allow2pc * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_allow2pc( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_2pc); } /* * Class: org_rocksdb_DBOptions * Method: setRowCache * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setRowCache( JNIEnv*, jobject, jlong jhandle, jlong jrow_cache_handle) { auto* opt = reinterpret_cast(jhandle); auto* row_cache = reinterpret_cast*>( jrow_cache_handle); opt->row_cache = *row_cache; } /* * Class: org_rocksdb_DBOptions * Method: setWalFilter * Signature: (JJ)V */ void Java_org_rocksdb_DBOptions_setWalFilter( JNIEnv*, jobject, jlong jhandle, jlong jwal_filter_handle) { auto* opt = reinterpret_cast(jhandle); auto* wal_filter = reinterpret_cast( jwal_filter_handle); opt->wal_filter = wal_filter; } /* * Class: org_rocksdb_DBOptions * Method: setFailIfOptionsFileError * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setFailIfOptionsFileError( JNIEnv*, jobject, jlong jhandle, jboolean jfail_if_options_file_error) { auto* opt = reinterpret_cast(jhandle); opt->fail_if_options_file_error = static_cast(jfail_if_options_file_error); } /* * Class: org_rocksdb_DBOptions * Method: failIfOptionsFileError * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_failIfOptionsFileError( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->fail_if_options_file_error); } /* * Class: org_rocksdb_DBOptions * Method: setDumpMallocStats * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setDumpMallocStats( JNIEnv*, jobject, jlong jhandle, jboolean jdump_malloc_stats) { auto* opt = reinterpret_cast(jhandle); opt->dump_malloc_stats = static_cast(jdump_malloc_stats); } /* * Class: org_rocksdb_DBOptions * Method: dumpMallocStats * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_dumpMallocStats( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->dump_malloc_stats); } /* * Class: org_rocksdb_DBOptions * Method: setAvoidFlushDuringRecovery * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAvoidFlushDuringRecovery( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_recovery) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_recovery = static_cast(javoid_flush_during_recovery); } /* * Class: org_rocksdb_DBOptions * Method: avoidFlushDuringRecovery * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_avoidFlushDuringRecovery( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_recovery); } /* * Class: org_rocksdb_DBOptions * Method: setAllowIngestBehind * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAllowIngestBehind( JNIEnv*, jobject, jlong jhandle, jboolean jallow_ingest_behind) { auto* opt = reinterpret_cast(jhandle); opt->allow_ingest_behind = jallow_ingest_behind == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: allowIngestBehind * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_allowIngestBehind( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_ingest_behind); } /* * Class: org_rocksdb_DBOptions * Method: setPreserveDeletes * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setPreserveDeletes( JNIEnv*, jobject, jlong jhandle, jboolean jpreserve_deletes) { auto* opt = reinterpret_cast(jhandle); opt->preserve_deletes = jpreserve_deletes == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: preserveDeletes * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_preserveDeletes( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->preserve_deletes); } /* * Class: org_rocksdb_DBOptions * Method: setTwoWriteQueues * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setTwoWriteQueues( JNIEnv*, jobject, jlong jhandle, jboolean jtwo_write_queues) { auto* opt = reinterpret_cast(jhandle); opt->two_write_queues = jtwo_write_queues == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: twoWriteQueues * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_twoWriteQueues( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->two_write_queues); } /* * Class: org_rocksdb_DBOptions * Method: setManualWalFlush * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setManualWalFlush( JNIEnv*, jobject, jlong jhandle, jboolean jmanual_wal_flush) { auto* opt = reinterpret_cast(jhandle); opt->manual_wal_flush = jmanual_wal_flush == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: manualWalFlush * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_manualWalFlush( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->manual_wal_flush); } /* * Class: org_rocksdb_DBOptions * Method: setAtomicFlush * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAtomicFlush( JNIEnv*, jobject, jlong jhandle, jboolean jatomic_flush) { auto* opt = reinterpret_cast(jhandle); opt->atomic_flush = jatomic_flush == JNI_TRUE; } /* * Class: org_rocksdb_DBOptions * Method: atomicFlush * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_atomicFlush( JNIEnv *, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->atomic_flush); } /* * Class: org_rocksdb_DBOptions * Method: setAvoidFlushDuringShutdown * Signature: (JZ)V */ void Java_org_rocksdb_DBOptions_setAvoidFlushDuringShutdown( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_shutdown) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_shutdown = static_cast(javoid_flush_during_shutdown); } /* * Class: org_rocksdb_DBOptions * Method: avoidFlushDuringShutdown * Signature: (J)Z */ jboolean Java_org_rocksdb_DBOptions_avoidFlushDuringShutdown( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_shutdown); } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::WriteOptions /* * Class: org_rocksdb_WriteOptions * Method: newWriteOptions * Signature: ()J */ jlong Java_org_rocksdb_WriteOptions_newWriteOptions( JNIEnv*, jclass) { auto* op = new ROCKSDB_NAMESPACE::WriteOptions(); return reinterpret_cast(op); } /* * Class: org_rocksdb_WriteOptions * Method: copyWriteOptions * Signature: (J)J */ jlong Java_org_rocksdb_WriteOptions_copyWriteOptions( JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::WriteOptions( *(reinterpret_cast(jhandle))); return reinterpret_cast(new_opt); } /* * Class: org_rocksdb_WriteOptions * Method: disposeInternal * Signature: ()V */ void Java_org_rocksdb_WriteOptions_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* write_options = reinterpret_cast(jhandle); assert(write_options != nullptr); delete write_options; } /* * Class: org_rocksdb_WriteOptions * Method: setSync * Signature: (JZ)V */ void Java_org_rocksdb_WriteOptions_setSync( JNIEnv*, jobject, jlong jhandle, jboolean jflag) { reinterpret_cast(jhandle)->sync = jflag; } /* * Class: org_rocksdb_WriteOptions * Method: sync * Signature: (J)Z */ jboolean Java_org_rocksdb_WriteOptions_sync( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->sync; } /* * Class: org_rocksdb_WriteOptions * Method: setDisableWAL * Signature: (JZ)V */ void Java_org_rocksdb_WriteOptions_setDisableWAL( JNIEnv*, jobject, jlong jhandle, jboolean jflag) { reinterpret_cast(jhandle)->disableWAL = jflag; } /* * Class: org_rocksdb_WriteOptions * Method: disableWAL * Signature: (J)Z */ jboolean Java_org_rocksdb_WriteOptions_disableWAL( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->disableWAL; } /* * Class: org_rocksdb_WriteOptions * Method: setIgnoreMissingColumnFamilies * Signature: (JZ)V */ void Java_org_rocksdb_WriteOptions_setIgnoreMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle, jboolean jignore_missing_column_families) { reinterpret_cast(jhandle) ->ignore_missing_column_families = static_cast(jignore_missing_column_families); } /* * Class: org_rocksdb_WriteOptions * Method: ignoreMissingColumnFamilies * Signature: (J)Z */ jboolean Java_org_rocksdb_WriteOptions_ignoreMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->ignore_missing_column_families; } /* * Class: org_rocksdb_WriteOptions * Method: setNoSlowdown * Signature: (JZ)V */ void Java_org_rocksdb_WriteOptions_setNoSlowdown( JNIEnv*, jobject, jlong jhandle, jboolean jno_slowdown) { reinterpret_cast(jhandle)->no_slowdown = static_cast(jno_slowdown); } /* * Class: org_rocksdb_WriteOptions * Method: noSlowdown * Signature: (J)Z */ jboolean Java_org_rocksdb_WriteOptions_noSlowdown( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->no_slowdown; } /* * Class: org_rocksdb_WriteOptions * Method: setLowPri * Signature: (JZ)V */ void Java_org_rocksdb_WriteOptions_setLowPri( JNIEnv*, jobject, jlong jhandle, jboolean jlow_pri) { reinterpret_cast(jhandle)->low_pri = static_cast(jlow_pri); } /* * Class: org_rocksdb_WriteOptions * Method: lowPri * Signature: (J)Z */ jboolean Java_org_rocksdb_WriteOptions_lowPri( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->low_pri; } ///////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::ReadOptions /* * Class: org_rocksdb_ReadOptions * Method: newReadOptions * Signature: ()J */ jlong Java_org_rocksdb_ReadOptions_newReadOptions__( JNIEnv*, jclass) { auto* read_options = new ROCKSDB_NAMESPACE::ReadOptions(); return reinterpret_cast(read_options); } /* * Class: org_rocksdb_ReadOptions * Method: newReadOptions * Signature: (ZZ)J */ jlong Java_org_rocksdb_ReadOptions_newReadOptions__ZZ( JNIEnv*, jclass, jboolean jverify_checksums, jboolean jfill_cache) { auto* read_options = new ROCKSDB_NAMESPACE::ReadOptions( static_cast(jverify_checksums), static_cast(jfill_cache)); return reinterpret_cast(read_options); } /* * Class: org_rocksdb_ReadOptions * Method: copyReadOptions * Signature: (J)J */ jlong Java_org_rocksdb_ReadOptions_copyReadOptions( JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::ReadOptions( *(reinterpret_cast(jhandle))); return reinterpret_cast(new_opt); } /* * Class: org_rocksdb_ReadOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_ReadOptions_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* read_options = reinterpret_cast(jhandle); assert(read_options != nullptr); delete read_options; } /* * Class: org_rocksdb_ReadOptions * Method: setVerifyChecksums * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setVerifyChecksums( JNIEnv*, jobject, jlong jhandle, jboolean jverify_checksums) { reinterpret_cast(jhandle)->verify_checksums = static_cast(jverify_checksums); } /* * Class: org_rocksdb_ReadOptions * Method: verifyChecksums * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_verifyChecksums( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->verify_checksums; } /* * Class: org_rocksdb_ReadOptions * Method: setFillCache * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setFillCache( JNIEnv*, jobject, jlong jhandle, jboolean jfill_cache) { reinterpret_cast(jhandle)->fill_cache = static_cast(jfill_cache); } /* * Class: org_rocksdb_ReadOptions * Method: fillCache * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_fillCache( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->fill_cache; } /* * Class: org_rocksdb_ReadOptions * Method: setTailing * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setTailing( JNIEnv*, jobject, jlong jhandle, jboolean jtailing) { reinterpret_cast(jhandle)->tailing = static_cast(jtailing); } /* * Class: org_rocksdb_ReadOptions * Method: tailing * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_tailing( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->tailing; } /* * Class: org_rocksdb_ReadOptions * Method: managed * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_managed( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->managed; } /* * Class: org_rocksdb_ReadOptions * Method: setManaged * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setManaged( JNIEnv*, jobject, jlong jhandle, jboolean jmanaged) { reinterpret_cast(jhandle)->managed = static_cast(jmanaged); } /* * Class: org_rocksdb_ReadOptions * Method: totalOrderSeek * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_totalOrderSeek( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->total_order_seek; } /* * Class: org_rocksdb_ReadOptions * Method: setTotalOrderSeek * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setTotalOrderSeek( JNIEnv*, jobject, jlong jhandle, jboolean jtotal_order_seek) { reinterpret_cast(jhandle)->total_order_seek = static_cast(jtotal_order_seek); } /* * Class: org_rocksdb_ReadOptions * Method: prefixSameAsStart * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_prefixSameAsStart( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->prefix_same_as_start; } /* * Class: org_rocksdb_ReadOptions * Method: setPrefixSameAsStart * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setPrefixSameAsStart( JNIEnv*, jobject, jlong jhandle, jboolean jprefix_same_as_start) { reinterpret_cast(jhandle) ->prefix_same_as_start = static_cast(jprefix_same_as_start); } /* * Class: org_rocksdb_ReadOptions * Method: pinData * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_pinData( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->pin_data; } /* * Class: org_rocksdb_ReadOptions * Method: setPinData * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setPinData( JNIEnv*, jobject, jlong jhandle, jboolean jpin_data) { reinterpret_cast(jhandle)->pin_data = static_cast(jpin_data); } /* * Class: org_rocksdb_ReadOptions * Method: backgroundPurgeOnIteratorCleanup * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_backgroundPurgeOnIteratorCleanup( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->background_purge_on_iterator_cleanup); } /* * Class: org_rocksdb_ReadOptions * Method: setBackgroundPurgeOnIteratorCleanup * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setBackgroundPurgeOnIteratorCleanup( JNIEnv*, jobject, jlong jhandle, jboolean jbackground_purge_on_iterator_cleanup) { auto* opt = reinterpret_cast(jhandle); opt->background_purge_on_iterator_cleanup = static_cast(jbackground_purge_on_iterator_cleanup); } /* * Class: org_rocksdb_ReadOptions * Method: readaheadSize * Signature: (J)J */ jlong Java_org_rocksdb_ReadOptions_readaheadSize( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->readahead_size); } /* * Class: org_rocksdb_ReadOptions * Method: setReadaheadSize * Signature: (JJ)V */ void Java_org_rocksdb_ReadOptions_setReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong jreadahead_size) { auto* opt = reinterpret_cast(jhandle); opt->readahead_size = static_cast(jreadahead_size); } /* * Class: org_rocksdb_ReadOptions * Method: maxSkippableInternalKeys * Signature: (J)J */ jlong Java_org_rocksdb_ReadOptions_maxSkippableInternalKeys( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_skippable_internal_keys); } /* * Class: org_rocksdb_ReadOptions * Method: setMaxSkippableInternalKeys * Signature: (JJ)V */ void Java_org_rocksdb_ReadOptions_setMaxSkippableInternalKeys( JNIEnv*, jobject, jlong jhandle, jlong jmax_skippable_internal_keys) { auto* opt = reinterpret_cast(jhandle); opt->max_skippable_internal_keys = static_cast(jmax_skippable_internal_keys); } /* * Class: org_rocksdb_ReadOptions * Method: ignoreRangeDeletions * Signature: (J)Z */ jboolean Java_org_rocksdb_ReadOptions_ignoreRangeDeletions( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->ignore_range_deletions); } /* * Class: org_rocksdb_ReadOptions * Method: setIgnoreRangeDeletions * Signature: (JZ)V */ void Java_org_rocksdb_ReadOptions_setIgnoreRangeDeletions( JNIEnv*, jobject, jlong jhandle, jboolean jignore_range_deletions) { auto* opt = reinterpret_cast(jhandle); opt->ignore_range_deletions = static_cast(jignore_range_deletions); } /* * Class: org_rocksdb_ReadOptions * Method: setSnapshot * Signature: (JJ)V */ void Java_org_rocksdb_ReadOptions_setSnapshot( JNIEnv*, jobject, jlong jhandle, jlong jsnapshot) { reinterpret_cast(jhandle)->snapshot = reinterpret_cast(jsnapshot); } /* * Class: org_rocksdb_ReadOptions * Method: snapshot * Signature: (J)J */ jlong Java_org_rocksdb_ReadOptions_snapshot( JNIEnv*, jobject, jlong jhandle) { auto& snapshot = reinterpret_cast(jhandle)->snapshot; return reinterpret_cast(snapshot); } /* * Class: org_rocksdb_ReadOptions * Method: readTier * Signature: (J)B */ jbyte Java_org_rocksdb_ReadOptions_readTier( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle)->read_tier); } /* * Class: org_rocksdb_ReadOptions * Method: setReadTier * Signature: (JB)V */ void Java_org_rocksdb_ReadOptions_setReadTier( JNIEnv*, jobject, jlong jhandle, jbyte jread_tier) { reinterpret_cast(jhandle)->read_tier = static_cast(jread_tier); } /* * Class: org_rocksdb_ReadOptions * Method: setIterateUpperBound * Signature: (JJ)I */ void Java_org_rocksdb_ReadOptions_setIterateUpperBound( JNIEnv*, jobject, jlong jhandle, jlong jupper_bound_slice_handle) { reinterpret_cast(jhandle) ->iterate_upper_bound = reinterpret_cast(jupper_bound_slice_handle); } /* * Class: org_rocksdb_ReadOptions * Method: iterateUpperBound * Signature: (J)J */ jlong Java_org_rocksdb_ReadOptions_iterateUpperBound( JNIEnv*, jobject, jlong jhandle) { auto& upper_bound_slice_handle = reinterpret_cast(jhandle) ->iterate_upper_bound; return reinterpret_cast(upper_bound_slice_handle); } /* * Class: org_rocksdb_ReadOptions * Method: setIterateLowerBound * Signature: (JJ)I */ void Java_org_rocksdb_ReadOptions_setIterateLowerBound( JNIEnv*, jobject, jlong jhandle, jlong jlower_bound_slice_handle) { reinterpret_cast(jhandle) ->iterate_lower_bound = reinterpret_cast(jlower_bound_slice_handle); } /* * Class: org_rocksdb_ReadOptions * Method: iterateLowerBound * Signature: (J)J */ jlong Java_org_rocksdb_ReadOptions_iterateLowerBound( JNIEnv*, jobject, jlong jhandle) { auto& lower_bound_slice_handle = reinterpret_cast(jhandle) ->iterate_lower_bound; return reinterpret_cast(lower_bound_slice_handle); } /* * Class: org_rocksdb_ReadOptions * Method: setTableFilter * Signature: (JJ)V */ void Java_org_rocksdb_ReadOptions_setTableFilter( JNIEnv*, jobject, jlong jhandle, jlong jjni_table_filter_handle) { auto* opt = reinterpret_cast(jhandle); auto* jni_table_filter = reinterpret_cast( jjni_table_filter_handle); opt->table_filter = jni_table_filter->GetTableFilterFunction(); } /* * Class: org_rocksdb_ReadOptions * Method: setIterStartSeqnum * Signature: (JJ)V */ void Java_org_rocksdb_ReadOptions_setIterStartSeqnum( JNIEnv*, jobject, jlong jhandle, jlong jiter_start_seqnum) { auto* opt = reinterpret_cast(jhandle); opt->iter_start_seqnum = static_cast(jiter_start_seqnum); } /* * Class: org_rocksdb_ReadOptions * Method: iterStartSeqnum * Signature: (J)J */ jlong Java_org_rocksdb_ReadOptions_iterStartSeqnum( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->iter_start_seqnum); } ///////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::ComparatorOptions /* * Class: org_rocksdb_ComparatorOptions * Method: newComparatorOptions * Signature: ()J */ jlong Java_org_rocksdb_ComparatorOptions_newComparatorOptions( JNIEnv*, jclass) { auto* comparator_opt = new ROCKSDB_NAMESPACE::ComparatorJniCallbackOptions(); return reinterpret_cast(comparator_opt); } /* * Class: org_rocksdb_ComparatorOptions * Method: reusedSynchronisationType * Signature: (J)B */ jbyte Java_org_rocksdb_ComparatorOptions_reusedSynchronisationType( JNIEnv *, jobject, jlong jhandle) { auto* comparator_opt = reinterpret_cast( jhandle); return ROCKSDB_NAMESPACE::ReusedSynchronisationTypeJni:: toJavaReusedSynchronisationType( comparator_opt->reused_synchronisation_type); } /* * Class: org_rocksdb_ComparatorOptions * Method: setReusedSynchronisationType * Signature: (JB)V */ void Java_org_rocksdb_ComparatorOptions_setReusedSynchronisationType( JNIEnv*, jobject, jlong jhandle, jbyte jreused_synhcronisation_type) { auto* comparator_opt = reinterpret_cast( jhandle); comparator_opt->reused_synchronisation_type = ROCKSDB_NAMESPACE::ReusedSynchronisationTypeJni:: toCppReusedSynchronisationType(jreused_synhcronisation_type); } /* * Class: org_rocksdb_ComparatorOptions * Method: useDirectBuffer * Signature: (J)Z */ jboolean Java_org_rocksdb_ComparatorOptions_useDirectBuffer( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast( jhandle) ->direct_buffer); } /* * Class: org_rocksdb_ComparatorOptions * Method: setUseDirectBuffer * Signature: (JZ)V */ void Java_org_rocksdb_ComparatorOptions_setUseDirectBuffer( JNIEnv*, jobject, jlong jhandle, jboolean jdirect_buffer) { reinterpret_cast(jhandle) ->direct_buffer = jdirect_buffer == JNI_TRUE; } /* * Class: org_rocksdb_ComparatorOptions * Method: maxReusedBufferSize * Signature: (J)I */ jint Java_org_rocksdb_ComparatorOptions_maxReusedBufferSize( JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast( jhandle) ->max_reused_buffer_size); } /* * Class: org_rocksdb_ComparatorOptions * Method: setMaxReusedBufferSize * Signature: (JI)V */ void Java_org_rocksdb_ComparatorOptions_setMaxReusedBufferSize( JNIEnv*, jobject, jlong jhandle, jint jmax_reused_buffer_size) { reinterpret_cast(jhandle) ->max_reused_buffer_size = static_cast(jmax_reused_buffer_size); } /* * Class: org_rocksdb_ComparatorOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_ComparatorOptions_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* comparator_opt = reinterpret_cast( jhandle); assert(comparator_opt != nullptr); delete comparator_opt; } ///////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::FlushOptions /* * Class: org_rocksdb_FlushOptions * Method: newFlushOptions * Signature: ()J */ jlong Java_org_rocksdb_FlushOptions_newFlushOptions( JNIEnv*, jclass) { auto* flush_opt = new ROCKSDB_NAMESPACE::FlushOptions(); return reinterpret_cast(flush_opt); } /* * Class: org_rocksdb_FlushOptions * Method: setWaitForFlush * Signature: (JZ)V */ void Java_org_rocksdb_FlushOptions_setWaitForFlush( JNIEnv*, jobject, jlong jhandle, jboolean jwait) { reinterpret_cast(jhandle)->wait = static_cast(jwait); } /* * Class: org_rocksdb_FlushOptions * Method: waitForFlush * Signature: (J)Z */ jboolean Java_org_rocksdb_FlushOptions_waitForFlush( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->wait; } /* * Class: org_rocksdb_FlushOptions * Method: setAllowWriteStall * Signature: (JZ)V */ void Java_org_rocksdb_FlushOptions_setAllowWriteStall( JNIEnv*, jobject, jlong jhandle, jboolean jallow_write_stall) { auto* flush_options = reinterpret_cast(jhandle); flush_options->allow_write_stall = jallow_write_stall == JNI_TRUE; } /* * Class: org_rocksdb_FlushOptions * Method: allowWriteStall * Signature: (J)Z */ jboolean Java_org_rocksdb_FlushOptions_allowWriteStall( JNIEnv*, jobject, jlong jhandle) { auto* flush_options = reinterpret_cast(jhandle); return static_cast(flush_options->allow_write_stall); } /* * Class: org_rocksdb_FlushOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_FlushOptions_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* flush_opt = reinterpret_cast(jhandle); assert(flush_opt != nullptr); delete flush_opt; } rocksdb-6.11.4/java/rocksjni/options_util.cc000066400000000000000000000156451370372246700210620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::OptionsUtil methods from Java side. #include #include #include "include/org_rocksdb_OptionsUtil.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/utilities/options_util.h" #include "rocksjni/portal.h" void build_column_family_descriptor_list( JNIEnv* env, jobject jcfds, std::vector& cf_descs) { jmethodID add_mid = ROCKSDB_NAMESPACE::ListJni::getListAddMethodId(env); if (add_mid == nullptr) { // exception occurred accessing method return; } // Column family descriptor for (ROCKSDB_NAMESPACE::ColumnFamilyDescriptor& cfd : cf_descs) { // Construct a ColumnFamilyDescriptor java object jobject jcfd = ROCKSDB_NAMESPACE::ColumnFamilyDescriptorJni::construct(env, &cfd); if (env->ExceptionCheck()) { // exception occurred constructing object if (jcfd != nullptr) { env->DeleteLocalRef(jcfd); } return; } // Add the object to java list. jboolean rs = env->CallBooleanMethod(jcfds, add_mid, jcfd); if (env->ExceptionCheck() || rs == JNI_FALSE) { // exception occurred calling method, or could not add if (jcfd != nullptr) { env->DeleteLocalRef(jcfd); } return; } } } /* * Class: org_rocksdb_OptionsUtil * Method: loadLatestOptions * Signature: (Ljava/lang/String;JLjava/util/List;Z)V */ void Java_org_rocksdb_OptionsUtil_loadLatestOptions__Ljava_lang_String_2JJLjava_util_List_2Z( JNIEnv* env, jclass /*jcls*/, jstring jdbpath, jlong jenv_handle, jlong jdb_opts_handle, jobject jcfds, jboolean ignore_unknown_options) { jboolean has_exception = JNI_FALSE; auto db_path = ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdbpath, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } std::vector cf_descs; ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::LoadLatestOptions( db_path, reinterpret_cast(jenv_handle), reinterpret_cast(jdb_opts_handle), &cf_descs, ignore_unknown_options); if (!s.ok()) { // error, raise an exception ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } else { build_column_family_descriptor_list(env, jcfds, cf_descs); } } /* * Class: org_rocksdb_OptionsUtil * Method: loadLatestOptions_1 * Signature: (JLjava/lang/String;JLjava/util/List;)V */ void Java_org_rocksdb_OptionsUtil_loadLatestOptions__JLjava_lang_String_2JLjava_util_List_2( JNIEnv* env, jclass /*jcls*/, jlong cfg_handle, jstring jdbpath, jlong jdb_opts_handle, jobject jcfds) { jboolean has_exception = JNI_FALSE; auto db_path = ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdbpath, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } std::vector cf_descs; auto* config_options = reinterpret_cast(cfg_handle); auto* db_options = reinterpret_cast(jdb_opts_handle); ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::LoadLatestOptions( *config_options, db_path, db_options, &cf_descs); if (!s.ok()) { // error, raise an exception ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } else { build_column_family_descriptor_list(env, jcfds, cf_descs); } } /* * Class: org_rocksdb_OptionsUtil * Method: loadOptionsFromFile * Signature: (Ljava/lang/String;JJLjava/util/List;Z)V */ void Java_org_rocksdb_OptionsUtil_loadOptionsFromFile__Ljava_lang_String_2JJLjava_util_List_2Z( JNIEnv* env, jclass /*jcls*/, jstring jopts_file_name, jlong jenv_handle, jlong jdb_opts_handle, jobject jcfds, jboolean ignore_unknown_options) { jboolean has_exception = JNI_FALSE; auto opts_file_name = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, jopts_file_name, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } std::vector cf_descs; ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::LoadOptionsFromFile( opts_file_name, reinterpret_cast(jenv_handle), reinterpret_cast(jdb_opts_handle), &cf_descs, ignore_unknown_options); if (!s.ok()) { // error, raise an exception ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } else { build_column_family_descriptor_list(env, jcfds, cf_descs); } } /* * Class: org_rocksdb_OptionsUtil * Method: loadOptionsFromFile * Signature: (JLjava/lang/String;JLjava/util/List;)V */ void Java_org_rocksdb_OptionsUtil_loadOptionsFromFile__JLjava_lang_String_2JLjava_util_List_2( JNIEnv* env, jclass /*jcls*/, jlong cfg_handle, jstring jopts_file_name, jlong jdb_opts_handle, jobject jcfds) { jboolean has_exception = JNI_FALSE; auto opts_file_name = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, jopts_file_name, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } std::vector cf_descs; auto* config_options = reinterpret_cast(cfg_handle); auto* db_options = reinterpret_cast(jdb_opts_handle); ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::LoadOptionsFromFile( *config_options, opts_file_name, db_options, &cf_descs); if (!s.ok()) { // error, raise an exception ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } else { build_column_family_descriptor_list(env, jcfds, cf_descs); } } /* * Class: org_rocksdb_OptionsUtil * Method: getLatestOptionsFileName * Signature: (Ljava/lang/String;J)Ljava/lang/String; */ jstring Java_org_rocksdb_OptionsUtil_getLatestOptionsFileName( JNIEnv* env, jclass /*jcls*/, jstring jdbpath, jlong jenv_handle) { jboolean has_exception = JNI_FALSE; auto db_path = ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdbpath, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return nullptr; } std::string options_file_name; ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::GetLatestOptionsFileName( db_path, reinterpret_cast(jenv_handle), &options_file_name); if (!s.ok()) { // error, raise an exception ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } else { return env->NewStringUTF(options_file_name.c_str()); } } rocksdb-6.11.4/java/rocksjni/persistent_cache.cc000066400000000000000000000037041370372246700216460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::PersistentCache. #include #include #include "include/org_rocksdb_PersistentCache.h" #include "rocksdb/persistent_cache.h" #include "loggerjnicallback.h" #include "portal.h" /* * Class: org_rocksdb_PersistentCache * Method: newPersistentCache * Signature: (JLjava/lang/String;JJZ)J */ jlong Java_org_rocksdb_PersistentCache_newPersistentCache( JNIEnv* env, jclass, jlong jenv_handle, jstring jpath, jlong jsz, jlong jlogger_handle, jboolean joptimized_for_nvm) { auto* rocks_env = reinterpret_cast(jenv_handle); jboolean has_exception = JNI_FALSE; std::string path = ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jpath, &has_exception); if (has_exception == JNI_TRUE) { return 0; } auto* logger = reinterpret_cast*>( jlogger_handle); auto* cache = new std::shared_ptr(nullptr); ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::NewPersistentCache( rocks_env, path, static_cast(jsz), *logger, static_cast(joptimized_for_nvm), cache); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } return reinterpret_cast(cache); } /* * Class: org_rocksdb_PersistentCache * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_PersistentCache_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* cache = reinterpret_cast*>( jhandle); delete cache; // delete std::shared_ptr } rocksdb-6.11.4/java/rocksjni/portal.h000066400000000000000000007622411370372246700174760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // This file is designed for caching those frequently used IDs and provide // efficient portal (i.e, a set of static functions) to access java code // from c++. #ifndef JAVA_ROCKSJNI_PORTAL_H_ #define JAVA_ROCKSJNI_PORTAL_H_ #include #include #include #include #include #include #include #include #include #include #include #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/filter_policy.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "rocksdb/utilities/backupable_db.h" #include "rocksdb/utilities/memory_util.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "rocksjni/compaction_filter_factory_jnicallback.h" #include "rocksjni/comparatorjnicallback.h" #include "rocksjni/loggerjnicallback.h" #include "rocksjni/table_filter_jnicallback.h" #include "rocksjni/trace_writer_jnicallback.h" #include "rocksjni/transaction_notifier_jnicallback.h" #include "rocksjni/wal_filter_jnicallback.h" #include "rocksjni/writebatchhandlerjnicallback.h" // Remove macro on windows #ifdef DELETE #undef DELETE #endif namespace ROCKSDB_NAMESPACE { class JavaClass { public: /** * Gets and initializes a Java Class * * @param env A pointer to the Java environment * @param jclazz_name The fully qualified JNI name of the Java Class * e.g. "java/lang/String" * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env, const char* jclazz_name) { jclass jclazz = env->FindClass(jclazz_name); assert(jclazz != nullptr); return jclazz; } }; // Native class template template class RocksDBNativeClass : public JavaClass { }; // Native class template for sub-classes of RocksMutableObject template class NativeRocksMutableObject : public RocksDBNativeClass { public: /** * Gets the Java Method ID for the * RocksMutableObject#setNativeHandle(long, boolean) method * * @param env A pointer to the Java environment * @return The Java Method ID or nullptr the RocksMutableObject class cannot * be accessed, or if one of the NoSuchMethodError, * ExceptionInInitializerError or OutOfMemoryError exceptions is thrown */ static jmethodID getSetNativeHandleMethod(JNIEnv* env) { static jclass jclazz = DERIVED::getJClass(env); if(jclazz == nullptr) { return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "setNativeHandle", "(JZ)V"); assert(mid != nullptr); return mid; } /** * Sets the C++ object pointer handle in the Java object * * @param env A pointer to the Java environment * @param jobj The Java object on which to set the pointer handle * @param ptr The C++ object pointer * @param java_owns_handle JNI_TRUE if ownership of the C++ object is * managed by the Java object * * @return true if a Java exception is pending, false otherwise */ static bool setHandle(JNIEnv* env, jobject jobj, PTR ptr, jboolean java_owns_handle) { assert(jobj != nullptr); static jmethodID mid = getSetNativeHandleMethod(env); if(mid == nullptr) { return true; // signal exception } env->CallVoidMethod(jobj, mid, reinterpret_cast(ptr), java_owns_handle); if(env->ExceptionCheck()) { return true; // signal exception } return false; } }; // Java Exception template template class JavaException : public JavaClass { public: /** * Create and throw a java exception with the provided message * * @param env A pointer to the Java environment * @param msg The message for the exception * * @return true if an exception was thrown, false otherwise */ static bool ThrowNew(JNIEnv* env, const std::string& msg) { jclass jclazz = DERIVED::getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class std::cerr << "JavaException::ThrowNew - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } const jint rs = env->ThrowNew(jclazz, msg.c_str()); if(rs != JNI_OK) { // exception could not be thrown std::cerr << "JavaException::ThrowNew - Fatal: could not throw exception!" << std::endl; return env->ExceptionCheck(); } return true; } }; // The portal class for java.lang.IllegalArgumentException class IllegalArgumentExceptionJni : public JavaException { public: /** * Get the Java Class java.lang.IllegalArgumentException * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaException::getJClass(env, "java/lang/IllegalArgumentException"); } /** * Create and throw a Java IllegalArgumentException with the provided status * * If s.ok() == true, then this function will not throw any exception. * * @param env A pointer to the Java environment * @param s The status for the exception * * @return true if an exception was thrown, false otherwise */ static bool ThrowNew(JNIEnv* env, const Status& s) { assert(!s.ok()); if (s.ok()) { return false; } // get the IllegalArgumentException class jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class std::cerr << "IllegalArgumentExceptionJni::ThrowNew/class - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } return JavaException::ThrowNew(env, s.ToString()); } }; // The portal class for org.rocksdb.Status.Code class CodeJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.Status.Code * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/Status$Code"); } /** * Get the Java Method: Status.Code#getValue * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getValueMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "getValue", "()b"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.Status.SubCode class SubCodeJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.Status.SubCode * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/Status$SubCode"); } /** * Get the Java Method: Status.SubCode#getValue * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getValueMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "getValue", "()b"); assert(mid != nullptr); return mid; } static ROCKSDB_NAMESPACE::Status::SubCode toCppSubCode( const jbyte jsub_code) { switch (jsub_code) { case 0x0: return ROCKSDB_NAMESPACE::Status::SubCode::kNone; case 0x1: return ROCKSDB_NAMESPACE::Status::SubCode::kMutexTimeout; case 0x2: return ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout; case 0x3: return ROCKSDB_NAMESPACE::Status::SubCode::kLockLimit; case 0x4: return ROCKSDB_NAMESPACE::Status::SubCode::kNoSpace; case 0x5: return ROCKSDB_NAMESPACE::Status::SubCode::kDeadlock; case 0x6: return ROCKSDB_NAMESPACE::Status::SubCode::kStaleFile; case 0x7: return ROCKSDB_NAMESPACE::Status::SubCode::kMemoryLimit; case 0x7F: default: return ROCKSDB_NAMESPACE::Status::SubCode::kNone; } } }; // The portal class for org.rocksdb.Status class StatusJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.Status * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/Status"); } /** * Get the Java Method: Status#getCode * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getCodeMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "getCode", "()Lorg/rocksdb/Status$Code;"); assert(mid != nullptr); return mid; } /** * Get the Java Method: Status#getSubCode * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getSubCodeMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "getSubCode", "()Lorg/rocksdb/Status$SubCode;"); assert(mid != nullptr); return mid; } /** * Get the Java Method: Status#getState * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getStateMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "getState", "()Ljava/lang/String;"); assert(mid != nullptr); return mid; } /** * Create a new Java org.rocksdb.Status object with the same properties as * the provided C++ ROCKSDB_NAMESPACE::Status object * * @param env A pointer to the Java environment * @param status The ROCKSDB_NAMESPACE::Status object * * @return A reference to a Java org.rocksdb.Status object, or nullptr * if an an exception occurs */ static jobject construct(JNIEnv* env, const Status& status) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(BBLjava/lang/String;)V"); if(mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } // convert the Status state for Java jstring jstate = nullptr; if (status.getState() != nullptr) { const char* const state = status.getState(); jstate = env->NewStringUTF(state); if(env->ExceptionCheck()) { if(jstate != nullptr) { env->DeleteLocalRef(jstate); } return nullptr; } } jobject jstatus = env->NewObject(jclazz, mid, toJavaStatusCode(status.code()), toJavaStatusSubCode(status.subcode()), jstate); if(env->ExceptionCheck()) { // exception occurred if(jstate != nullptr) { env->DeleteLocalRef(jstate); } return nullptr; } if(jstate != nullptr) { env->DeleteLocalRef(jstate); } return jstatus; } // Returns the equivalent org.rocksdb.Status.Code for the provided // C++ ROCKSDB_NAMESPACE::Status::Code enum static jbyte toJavaStatusCode(const ROCKSDB_NAMESPACE::Status::Code& code) { switch (code) { case ROCKSDB_NAMESPACE::Status::Code::kOk: return 0x0; case ROCKSDB_NAMESPACE::Status::Code::kNotFound: return 0x1; case ROCKSDB_NAMESPACE::Status::Code::kCorruption: return 0x2; case ROCKSDB_NAMESPACE::Status::Code::kNotSupported: return 0x3; case ROCKSDB_NAMESPACE::Status::Code::kInvalidArgument: return 0x4; case ROCKSDB_NAMESPACE::Status::Code::kIOError: return 0x5; case ROCKSDB_NAMESPACE::Status::Code::kMergeInProgress: return 0x6; case ROCKSDB_NAMESPACE::Status::Code::kIncomplete: return 0x7; case ROCKSDB_NAMESPACE::Status::Code::kShutdownInProgress: return 0x8; case ROCKSDB_NAMESPACE::Status::Code::kTimedOut: return 0x9; case ROCKSDB_NAMESPACE::Status::Code::kAborted: return 0xA; case ROCKSDB_NAMESPACE::Status::Code::kBusy: return 0xB; case ROCKSDB_NAMESPACE::Status::Code::kExpired: return 0xC; case ROCKSDB_NAMESPACE::Status::Code::kTryAgain: return 0xD; case ROCKSDB_NAMESPACE::Status::Code::kColumnFamilyDropped: return 0xE; default: return 0x7F; // undefined } } // Returns the equivalent org.rocksdb.Status.SubCode for the provided // C++ ROCKSDB_NAMESPACE::Status::SubCode enum static jbyte toJavaStatusSubCode( const ROCKSDB_NAMESPACE::Status::SubCode& subCode) { switch (subCode) { case ROCKSDB_NAMESPACE::Status::SubCode::kNone: return 0x0; case ROCKSDB_NAMESPACE::Status::SubCode::kMutexTimeout: return 0x1; case ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout: return 0x2; case ROCKSDB_NAMESPACE::Status::SubCode::kLockLimit: return 0x3; case ROCKSDB_NAMESPACE::Status::SubCode::kNoSpace: return 0x4; case ROCKSDB_NAMESPACE::Status::SubCode::kDeadlock: return 0x5; case ROCKSDB_NAMESPACE::Status::SubCode::kStaleFile: return 0x6; case ROCKSDB_NAMESPACE::Status::SubCode::kMemoryLimit: return 0x7; default: return 0x7F; // undefined } } static std::unique_ptr toCppStatus( const jbyte jcode_value, const jbyte jsub_code_value) { std::unique_ptr status; switch (jcode_value) { case 0x0: //Ok status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::OK())); break; case 0x1: //NotFound status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::NotFound( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0x2: //Corruption status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Corruption( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0x3: //NotSupported status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status( ROCKSDB_NAMESPACE::Status::NotSupported( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( jsub_code_value)))); break; case 0x4: //InvalidArgument status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status( ROCKSDB_NAMESPACE::Status::InvalidArgument( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( jsub_code_value)))); break; case 0x5: //IOError status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::IOError( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0x6: //MergeInProgress status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status( ROCKSDB_NAMESPACE::Status::MergeInProgress( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( jsub_code_value)))); break; case 0x7: //Incomplete status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Incomplete( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0x8: //ShutdownInProgress status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status( ROCKSDB_NAMESPACE::Status::ShutdownInProgress( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( jsub_code_value)))); break; case 0x9: //TimedOut status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::TimedOut( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0xA: //Aborted status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Aborted( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0xB: //Busy status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Busy( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0xC: //Expired status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::Expired( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0xD: //TryAgain status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::TryAgain( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode(jsub_code_value)))); break; case 0xE: // ColumnFamilyDropped status = std::unique_ptr( new ROCKSDB_NAMESPACE::Status( ROCKSDB_NAMESPACE::Status::ColumnFamilyDropped( ROCKSDB_NAMESPACE::SubCodeJni::toCppSubCode( jsub_code_value)))); break; case 0x7F: default: return nullptr; } return status; } // Returns the equivalent ROCKSDB_NAMESPACE::Status for the Java // org.rocksdb.Status static std::unique_ptr toCppStatus( JNIEnv* env, const jobject jstatus) { jmethodID mid_code = getCodeMethod(env); if (mid_code == nullptr) { // exception occurred return nullptr; } jobject jcode = env->CallObjectMethod(jstatus, mid_code); if (env->ExceptionCheck()) { // exception occurred return nullptr; } jmethodID mid_code_value = ROCKSDB_NAMESPACE::CodeJni::getValueMethod(env); if (mid_code_value == nullptr) { // exception occurred return nullptr; } jbyte jcode_value = env->CallByteMethod(jcode, mid_code_value); if (env->ExceptionCheck()) { // exception occurred if (jcode != nullptr) { env->DeleteLocalRef(jcode); } return nullptr; } jmethodID mid_subCode = getSubCodeMethod(env); if (mid_subCode == nullptr) { // exception occurred return nullptr; } jobject jsubCode = env->CallObjectMethod(jstatus, mid_subCode); if (env->ExceptionCheck()) { // exception occurred if (jcode != nullptr) { env->DeleteLocalRef(jcode); } return nullptr; } jbyte jsub_code_value = 0x0; // None if (jsubCode != nullptr) { jmethodID mid_subCode_value = ROCKSDB_NAMESPACE::SubCodeJni::getValueMethod(env); if (mid_subCode_value == nullptr) { // exception occurred return nullptr; } jsub_code_value = env->CallByteMethod(jsubCode, mid_subCode_value); if (env->ExceptionCheck()) { // exception occurred if (jcode != nullptr) { env->DeleteLocalRef(jcode); } return nullptr; } } jmethodID mid_state = getStateMethod(env); if (mid_state == nullptr) { // exception occurred return nullptr; } jobject jstate = env->CallObjectMethod(jstatus, mid_state); if (env->ExceptionCheck()) { // exception occurred if (jsubCode != nullptr) { env->DeleteLocalRef(jsubCode); } if (jcode != nullptr) { env->DeleteLocalRef(jcode); } return nullptr; } std::unique_ptr status = toCppStatus(jcode_value, jsub_code_value); // delete all local refs if (jstate != nullptr) { env->DeleteLocalRef(jstate); } if (jsubCode != nullptr) { env->DeleteLocalRef(jsubCode); } if (jcode != nullptr) { env->DeleteLocalRef(jcode); } return status; } }; // The portal class for org.rocksdb.RocksDBException class RocksDBExceptionJni : public JavaException { public: /** * Get the Java Class org.rocksdb.RocksDBException * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaException::getJClass(env, "org/rocksdb/RocksDBException"); } /** * Create and throw a Java RocksDBException with the provided message * * @param env A pointer to the Java environment * @param msg The message for the exception * * @return true if an exception was thrown, false otherwise */ static bool ThrowNew(JNIEnv* env, const std::string& msg) { return JavaException::ThrowNew(env, msg); } /** * Create and throw a Java RocksDBException with the provided status * * If s->ok() == true, then this function will not throw any exception. * * @param env A pointer to the Java environment * @param s The status for the exception * * @return true if an exception was thrown, false otherwise */ static bool ThrowNew(JNIEnv* env, std::unique_ptr& s) { return ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, *(s.get())); } /** * Create and throw a Java RocksDBException with the provided status * * If s.ok() == true, then this function will not throw any exception. * * @param env A pointer to the Java environment * @param s The status for the exception * * @return true if an exception was thrown, false otherwise */ static bool ThrowNew(JNIEnv* env, const Status& s) { if (s.ok()) { return false; } // get the RocksDBException class jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class std::cerr << "RocksDBExceptionJni::ThrowNew/class - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } // get the constructor of org.rocksdb.RocksDBException jmethodID mid = env->GetMethodID(jclazz, "", "(Lorg/rocksdb/Status;)V"); if(mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError std::cerr << "RocksDBExceptionJni::ThrowNew/cstr - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } // get the Java status object jobject jstatus = StatusJni::construct(env, s); if(jstatus == nullptr) { // exception occcurred std::cerr << "RocksDBExceptionJni::ThrowNew/StatusJni - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } // construct the RocksDBException jthrowable rocksdb_exception = reinterpret_cast(env->NewObject(jclazz, mid, jstatus)); if(env->ExceptionCheck()) { if(jstatus != nullptr) { env->DeleteLocalRef(jstatus); } if(rocksdb_exception != nullptr) { env->DeleteLocalRef(rocksdb_exception); } std::cerr << "RocksDBExceptionJni::ThrowNew/NewObject - Error: unexpected exception!" << std::endl; return true; } // throw the RocksDBException const jint rs = env->Throw(rocksdb_exception); if(rs != JNI_OK) { // exception could not be thrown std::cerr << "RocksDBExceptionJni::ThrowNew - Fatal: could not throw exception!" << std::endl; if(jstatus != nullptr) { env->DeleteLocalRef(jstatus); } if(rocksdb_exception != nullptr) { env->DeleteLocalRef(rocksdb_exception); } return env->ExceptionCheck(); } if(jstatus != nullptr) { env->DeleteLocalRef(jstatus); } if(rocksdb_exception != nullptr) { env->DeleteLocalRef(rocksdb_exception); } return true; } /** * Create and throw a Java RocksDBException with the provided message * and status * * If s.ok() == true, then this function will not throw any exception. * * @param env A pointer to the Java environment * @param msg The message for the exception * @param s The status for the exception * * @return true if an exception was thrown, false otherwise */ static bool ThrowNew(JNIEnv* env, const std::string& msg, const Status& s) { assert(!s.ok()); if (s.ok()) { return false; } // get the RocksDBException class jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class std::cerr << "RocksDBExceptionJni::ThrowNew/class - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } // get the constructor of org.rocksdb.RocksDBException jmethodID mid = env->GetMethodID(jclazz, "", "(Ljava/lang/String;Lorg/rocksdb/Status;)V"); if(mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError std::cerr << "RocksDBExceptionJni::ThrowNew/cstr - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } jstring jmsg = env->NewStringUTF(msg.c_str()); if(jmsg == nullptr) { // exception thrown: OutOfMemoryError std::cerr << "RocksDBExceptionJni::ThrowNew/msg - Error: unexpected exception!" << std::endl; return env->ExceptionCheck(); } // get the Java status object jobject jstatus = StatusJni::construct(env, s); if(jstatus == nullptr) { // exception occcurred std::cerr << "RocksDBExceptionJni::ThrowNew/StatusJni - Error: unexpected exception!" << std::endl; if(jmsg != nullptr) { env->DeleteLocalRef(jmsg); } return env->ExceptionCheck(); } // construct the RocksDBException jthrowable rocksdb_exception = reinterpret_cast(env->NewObject(jclazz, mid, jmsg, jstatus)); if(env->ExceptionCheck()) { if(jstatus != nullptr) { env->DeleteLocalRef(jstatus); } if(jmsg != nullptr) { env->DeleteLocalRef(jmsg); } if(rocksdb_exception != nullptr) { env->DeleteLocalRef(rocksdb_exception); } std::cerr << "RocksDBExceptionJni::ThrowNew/NewObject - Error: unexpected exception!" << std::endl; return true; } // throw the RocksDBException const jint rs = env->Throw(rocksdb_exception); if(rs != JNI_OK) { // exception could not be thrown std::cerr << "RocksDBExceptionJni::ThrowNew - Fatal: could not throw exception!" << std::endl; if(jstatus != nullptr) { env->DeleteLocalRef(jstatus); } if(jmsg != nullptr) { env->DeleteLocalRef(jmsg); } if(rocksdb_exception != nullptr) { env->DeleteLocalRef(rocksdb_exception); } return env->ExceptionCheck(); } if(jstatus != nullptr) { env->DeleteLocalRef(jstatus); } if(jmsg != nullptr) { env->DeleteLocalRef(jmsg); } if(rocksdb_exception != nullptr) { env->DeleteLocalRef(rocksdb_exception); } return true; } /** * Get the Java Method: RocksDBException#getStatus * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getStatusMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "getStatus", "()Lorg/rocksdb/Status;"); assert(mid != nullptr); return mid; } static std::unique_ptr toCppStatus( JNIEnv* env, jthrowable jrocksdb_exception) { if(!env->IsInstanceOf(jrocksdb_exception, getJClass(env))) { // not an instance of RocksDBException return nullptr; } // get the java status object jmethodID mid = getStatusMethod(env); if(mid == nullptr) { // exception occurred accessing class or method return nullptr; } jobject jstatus = env->CallObjectMethod(jrocksdb_exception, mid); if(env->ExceptionCheck()) { // exception occurred return nullptr; } if(jstatus == nullptr) { return nullptr; // no status available } return ROCKSDB_NAMESPACE::StatusJni::toCppStatus(env, jstatus); } }; // The portal class for java.util.List class ListJni : public JavaClass { public: /** * Get the Java Class java.util.List * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getListClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/util/List"); } /** * Get the Java Class java.util.ArrayList * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getArrayListClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/util/ArrayList"); } /** * Get the Java Class java.util.Iterator * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getIteratorClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/util/Iterator"); } /** * Get the Java Method: List#iterator * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getIteratorMethod(JNIEnv* env) { jclass jlist_clazz = getListClass(env); if(jlist_clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jlist_clazz, "iterator", "()Ljava/util/Iterator;"); assert(mid != nullptr); return mid; } /** * Get the Java Method: Iterator#hasNext * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getHasNextMethod(JNIEnv* env) { jclass jiterator_clazz = getIteratorClass(env); if(jiterator_clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jiterator_clazz, "hasNext", "()Z"); assert(mid != nullptr); return mid; } /** * Get the Java Method: Iterator#next * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getNextMethod(JNIEnv* env) { jclass jiterator_clazz = getIteratorClass(env); if(jiterator_clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jiterator_clazz, "next", "()Ljava/lang/Object;"); assert(mid != nullptr); return mid; } /** * Get the Java Method: ArrayList constructor * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getArrayListConstructorMethodId(JNIEnv* env) { jclass jarray_list_clazz = getArrayListClass(env); if(jarray_list_clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jarray_list_clazz, "", "(I)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: List#add * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getListAddMethodId(JNIEnv* env) { jclass jlist_clazz = getListClass(env); if(jlist_clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jlist_clazz, "add", "(Ljava/lang/Object;)Z"); assert(mid != nullptr); return mid; } }; // The portal class for java.lang.Byte class ByteJni : public JavaClass { public: /** * Get the Java Class java.lang.Byte * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/lang/Byte"); } /** * Get the Java Class byte[] * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getArrayJClass(JNIEnv* env) { return JavaClass::getJClass(env, "[B"); } /** * Creates a new 2-dimensional Java Byte Array byte[][] * * @param env A pointer to the Java environment * @param len The size of the first dimension * * @return A reference to the Java byte[][] or nullptr if an exception occurs */ static jobjectArray new2dByteArray(JNIEnv* env, const jsize len) { jclass clazz = getArrayJClass(env); if(clazz == nullptr) { // exception occurred accessing class return nullptr; } return env->NewObjectArray(len, clazz, nullptr); } /** * Get the Java Method: Byte#byteValue * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retrieved */ static jmethodID getByteValueMethod(JNIEnv* env) { jclass clazz = getJClass(env); if(clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(clazz, "byteValue", "()B"); assert(mid != nullptr); return mid; } /** * Calls the Java Method: Byte#valueOf, returning a constructed Byte jobject * * @param env A pointer to the Java environment * * @return A constructing Byte object or nullptr if the class or method id could not * be retrieved, or an exception occurred */ static jobject valueOf(JNIEnv* env, jbyte jprimitive_byte) { jclass clazz = getJClass(env); if (clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetStaticMethodID(clazz, "valueOf", "(B)Ljava/lang/Byte;"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } const jobject jbyte_obj = env->CallStaticObjectMethod(clazz, mid, jprimitive_byte); if (env->ExceptionCheck()) { // exception occurred return nullptr; } return jbyte_obj; } }; // The portal class for java.nio.ByteBuffer class ByteBufferJni : public JavaClass { public: /** * Get the Java Class java.nio.ByteBuffer * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/nio/ByteBuffer"); } /** * Get the Java Method: ByteBuffer#allocate * * @param env A pointer to the Java environment * @param jbytebuffer_clazz if you have a reference to a ByteBuffer class, or nullptr * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getAllocateMethodId(JNIEnv* env, jclass jbytebuffer_clazz = nullptr) { const jclass jclazz = jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz; if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetStaticMethodID( jclazz, "allocate", "(I)Ljava/nio/ByteBuffer;"); assert(mid != nullptr); return mid; } /** * Get the Java Method: ByteBuffer#array * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getArrayMethodId(JNIEnv* env, jclass jbytebuffer_clazz = nullptr) { const jclass jclazz = jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz; if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "array", "()[B"); assert(mid != nullptr); return mid; } static jobject construct( JNIEnv* env, const bool direct, const size_t capacity, jclass jbytebuffer_clazz = nullptr) { return constructWith(env, direct, nullptr, capacity, jbytebuffer_clazz); } static jobject constructWith(JNIEnv* env, const bool direct, const char* buf, const size_t capacity, jclass jbytebuffer_clazz = nullptr) { if (direct) { bool allocated = false; if (buf == nullptr) { buf = new char[capacity]; allocated = true; } jobject jbuf = env->NewDirectByteBuffer(const_cast(buf), static_cast(capacity)); if (jbuf == nullptr) { // exception occurred if (allocated) { delete[] static_cast(buf); } return nullptr; } return jbuf; } else { const jclass jclazz = jbytebuffer_clazz == nullptr ? getJClass(env) : jbytebuffer_clazz; if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } const jmethodID jmid_allocate = getAllocateMethodId(env, jbytebuffer_clazz); if (jmid_allocate == nullptr) { // exception occurred accessing class, or NoSuchMethodException or OutOfMemoryError return nullptr; } const jobject jbuf = env->CallStaticObjectMethod( jclazz, jmid_allocate, static_cast(capacity)); if (env->ExceptionCheck()) { // exception occurred return nullptr; } // set buffer data? if (buf != nullptr) { jbyteArray jarray = array(env, jbuf, jbytebuffer_clazz); if (jarray == nullptr) { // exception occurred env->DeleteLocalRef(jbuf); return nullptr; } jboolean is_copy = JNI_FALSE; jbyte* ja = reinterpret_cast( env->GetPrimitiveArrayCritical(jarray, &is_copy)); if (ja == nullptr) { // exception occurred env->DeleteLocalRef(jarray); env->DeleteLocalRef(jbuf); return nullptr; } memcpy(ja, const_cast(buf), capacity); env->ReleasePrimitiveArrayCritical(jarray, ja, 0); env->DeleteLocalRef(jarray); } return jbuf; } } static jbyteArray array(JNIEnv* env, const jobject& jbyte_buffer, jclass jbytebuffer_clazz = nullptr) { const jmethodID mid = getArrayMethodId(env, jbytebuffer_clazz); if (mid == nullptr) { // exception occurred accessing class, or NoSuchMethodException or OutOfMemoryError return nullptr; } const jobject jarray = env->CallObjectMethod(jbyte_buffer, mid); if (env->ExceptionCheck()) { // exception occurred return nullptr; } return static_cast(jarray); } }; // The portal class for java.lang.Integer class IntegerJni : public JavaClass { public: /** * Get the Java Class java.lang.Integer * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/lang/Integer"); } static jobject valueOf(JNIEnv* env, jint jprimitive_int) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetStaticMethodID(jclazz, "valueOf", "(I)Ljava/lang/Integer;"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } const jobject jinteger_obj = env->CallStaticObjectMethod(jclazz, mid, jprimitive_int); if (env->ExceptionCheck()) { // exception occurred return nullptr; } return jinteger_obj; } }; // The portal class for java.lang.Long class LongJni : public JavaClass { public: /** * Get the Java Class java.lang.Long * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/lang/Long"); } static jobject valueOf(JNIEnv* env, jlong jprimitive_long) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetStaticMethodID(jclazz, "valueOf", "(J)Ljava/lang/Long;"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } const jobject jlong_obj = env->CallStaticObjectMethod(jclazz, mid, jprimitive_long); if (env->ExceptionCheck()) { // exception occurred return nullptr; } return jlong_obj; } }; // The portal class for java.lang.StringBuilder class StringBuilderJni : public JavaClass { public: /** * Get the Java Class java.lang.StringBuilder * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/lang/StringBuilder"); } /** * Get the Java Method: StringBuilder#append * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getListAddMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "append", "(Ljava/lang/String;)Ljava/lang/StringBuilder;"); assert(mid != nullptr); return mid; } /** * Appends a C-style string to a StringBuilder * * @param env A pointer to the Java environment * @param jstring_builder Reference to a java.lang.StringBuilder * @param c_str A C-style string to append to the StringBuilder * * @return A reference to the updated StringBuilder, or a nullptr if * an exception occurs */ static jobject append(JNIEnv* env, jobject jstring_builder, const char* c_str) { jmethodID mid = getListAddMethodId(env); if(mid == nullptr) { // exception occurred accessing class or method return nullptr; } jstring new_value_str = env->NewStringUTF(c_str); if(new_value_str == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } jobject jresult_string_builder = env->CallObjectMethod(jstring_builder, mid, new_value_str); if(env->ExceptionCheck()) { // exception occurred env->DeleteLocalRef(new_value_str); return nullptr; } return jresult_string_builder; } }; // various utility functions for working with RocksDB and JNI class JniUtil { public: /** * Detect if jlong overflows size_t * * @param jvalue the jlong value * * @return */ inline static Status check_if_jlong_fits_size_t(const jlong& jvalue) { Status s = Status::OK(); if (static_cast(jvalue) > std::numeric_limits::max()) { s = Status::InvalidArgument(Slice("jlong overflows 32 bit value.")); } return s; } /** * Obtains a reference to the JNIEnv from * the JVM * * If the current thread is not attached to the JavaVM * then it will be attached so as to retrieve the JNIEnv * * If a thread is attached, it must later be manually * released by calling JavaVM::DetachCurrentThread. * This can be handled by always matching calls to this * function with calls to {@link JniUtil::releaseJniEnv(JavaVM*, jboolean)} * * @param jvm (IN) A pointer to the JavaVM instance * @param attached (OUT) A pointer to a boolean which * will be set to JNI_TRUE if we had to attach the thread * * @return A pointer to the JNIEnv or nullptr if a fatal error * occurs and the JNIEnv cannot be retrieved */ static JNIEnv* getJniEnv(JavaVM* jvm, jboolean* attached) { assert(jvm != nullptr); JNIEnv *env; const jint env_rs = jvm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_6); if(env_rs == JNI_OK) { // current thread is already attached, return the JNIEnv *attached = JNI_FALSE; return env; } else if(env_rs == JNI_EDETACHED) { // current thread is not attached, attempt to attach const jint rs_attach = jvm->AttachCurrentThread(reinterpret_cast(&env), NULL); if(rs_attach == JNI_OK) { *attached = JNI_TRUE; return env; } else { // error, could not attach the thread std::cerr << "JniUtil::getJniEnv - Fatal: could not attach current thread to JVM!" << std::endl; return nullptr; } } else if(env_rs == JNI_EVERSION) { // error, JDK does not support JNI_VERSION_1_6+ std::cerr << "JniUtil::getJniEnv - Fatal: JDK does not support JNI_VERSION_1_6" << std::endl; return nullptr; } else { std::cerr << "JniUtil::getJniEnv - Fatal: Unknown error: env_rs=" << env_rs << std::endl; return nullptr; } } /** * Counterpart to {@link JniUtil::getJniEnv(JavaVM*, jboolean*)} * * Detachess the current thread from the JVM if it was previously * attached * * @param jvm (IN) A pointer to the JavaVM instance * @param attached (IN) JNI_TRUE if we previously had to attach the thread * to the JavaVM to get the JNIEnv */ static void releaseJniEnv(JavaVM* jvm, jboolean& attached) { assert(jvm != nullptr); if(attached == JNI_TRUE) { const jint rs_detach = jvm->DetachCurrentThread(); assert(rs_detach == JNI_OK); if(rs_detach != JNI_OK) { std::cerr << "JniUtil::getJniEnv - Warn: Unable to detach current thread from JVM!" << std::endl; } } } /** * Copies a Java String[] to a C++ std::vector * * @param env (IN) A pointer to the java environment * @param jss (IN) The Java String array to copy * @param has_exception (OUT) will be set to JNI_TRUE * if an OutOfMemoryError or ArrayIndexOutOfBoundsException * exception occurs * * @return A std::vector containing copies of the Java strings */ static std::vector copyStrings(JNIEnv* env, jobjectArray jss, jboolean* has_exception) { return ROCKSDB_NAMESPACE::JniUtil::copyStrings( env, jss, env->GetArrayLength(jss), has_exception); } /** * Copies a Java String[] to a C++ std::vector * * @param env (IN) A pointer to the java environment * @param jss (IN) The Java String array to copy * @param jss_len (IN) The length of the Java String array to copy * @param has_exception (OUT) will be set to JNI_TRUE * if an OutOfMemoryError or ArrayIndexOutOfBoundsException * exception occurs * * @return A std::vector containing copies of the Java strings */ static std::vector copyStrings(JNIEnv* env, jobjectArray jss, const jsize jss_len, jboolean* has_exception) { std::vector strs; strs.reserve(jss_len); for (jsize i = 0; i < jss_len; i++) { jobject js = env->GetObjectArrayElement(jss, i); if(env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException *has_exception = JNI_TRUE; return strs; } jstring jstr = static_cast(js); const char* str = env->GetStringUTFChars(jstr, nullptr); if(str == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(js); *has_exception = JNI_TRUE; return strs; } strs.push_back(std::string(str)); env->ReleaseStringUTFChars(jstr, str); env->DeleteLocalRef(js); } *has_exception = JNI_FALSE; return strs; } /** * Copies a jstring to a C-style null-terminated byte string * and releases the original jstring * * The jstring is copied as UTF-8 * * If an exception occurs, then JNIEnv::ExceptionCheck() * will have been called * * @param env (IN) A pointer to the java environment * @param js (IN) The java string to copy * @param has_exception (OUT) will be set to JNI_TRUE * if an OutOfMemoryError exception occurs * * @return A pointer to the copied string, or a * nullptr if has_exception == JNI_TRUE */ static std::unique_ptr copyString(JNIEnv* env, jstring js, jboolean* has_exception) { const char *utf = env->GetStringUTFChars(js, nullptr); if(utf == nullptr) { // exception thrown: OutOfMemoryError env->ExceptionCheck(); *has_exception = JNI_TRUE; return nullptr; } else if(env->ExceptionCheck()) { // exception thrown env->ReleaseStringUTFChars(js, utf); *has_exception = JNI_TRUE; return nullptr; } const jsize utf_len = env->GetStringUTFLength(js); std::unique_ptr str(new char[utf_len + 1]); // Note: + 1 is needed for the c_str null terminator std::strcpy(str.get(), utf); env->ReleaseStringUTFChars(js, utf); *has_exception = JNI_FALSE; return str; } /** * Copies a jstring to a std::string * and releases the original jstring * * If an exception occurs, then JNIEnv::ExceptionCheck() * will have been called * * @param env (IN) A pointer to the java environment * @param js (IN) The java string to copy * @param has_exception (OUT) will be set to JNI_TRUE * if an OutOfMemoryError exception occurs * * @return A std:string copy of the jstring, or an * empty std::string if has_exception == JNI_TRUE */ static std::string copyStdString(JNIEnv* env, jstring js, jboolean* has_exception) { const char *utf = env->GetStringUTFChars(js, nullptr); if(utf == nullptr) { // exception thrown: OutOfMemoryError env->ExceptionCheck(); *has_exception = JNI_TRUE; return std::string(); } else if(env->ExceptionCheck()) { // exception thrown env->ReleaseStringUTFChars(js, utf); *has_exception = JNI_TRUE; return std::string(); } std::string name(utf); env->ReleaseStringUTFChars(js, utf); *has_exception = JNI_FALSE; return name; } /** * Copies bytes from a std::string to a jByteArray * * @param env A pointer to the java environment * @param bytes The bytes to copy * * @return the Java byte[], or nullptr if an exception occurs * * @throws RocksDBException thrown * if memory size to copy exceeds general java specific array size limitation. */ static jbyteArray copyBytes(JNIEnv* env, std::string bytes) { return createJavaByteArrayWithSizeCheck(env, bytes.c_str(), bytes.size()); } /** * Given a Java byte[][] which is an array of java.lang.Strings * where each String is a byte[], the passed function `string_fn` * will be called on each String, the result is the collected by * calling the passed function `collector_fn` * * @param env (IN) A pointer to the java environment * @param jbyte_strings (IN) A Java array of Strings expressed as bytes * @param string_fn (IN) A transform function to call for each String * @param collector_fn (IN) A collector which is called for the result * of each `string_fn` * @param has_exception (OUT) will be set to JNI_TRUE * if an ArrayIndexOutOfBoundsException or OutOfMemoryError * exception occurs */ template static void byteStrings(JNIEnv* env, jobjectArray jbyte_strings, std::function string_fn, std::function collector_fn, jboolean *has_exception) { const jsize jlen = env->GetArrayLength(jbyte_strings); for(jsize i = 0; i < jlen; i++) { jobject jbyte_string_obj = env->GetObjectArrayElement(jbyte_strings, i); if(env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException *has_exception = JNI_TRUE; // signal error return; } jbyteArray jbyte_string_ary = reinterpret_cast(jbyte_string_obj); T result = byteString(env, jbyte_string_ary, string_fn, has_exception); env->DeleteLocalRef(jbyte_string_obj); if(*has_exception == JNI_TRUE) { // exception thrown: OutOfMemoryError return; } collector_fn(i, result); } *has_exception = JNI_FALSE; } /** * Given a Java String which is expressed as a Java Byte Array byte[], * the passed function `string_fn` will be called on the String * and the result returned * * @param env (IN) A pointer to the java environment * @param jbyte_string_ary (IN) A Java String expressed in bytes * @param string_fn (IN) A transform function to call on the String * @param has_exception (OUT) will be set to JNI_TRUE * if an OutOfMemoryError exception occurs */ template static T byteString(JNIEnv* env, jbyteArray jbyte_string_ary, std::function string_fn, jboolean* has_exception) { const jsize jbyte_string_len = env->GetArrayLength(jbyte_string_ary); return byteString(env, jbyte_string_ary, jbyte_string_len, string_fn, has_exception); } /** * Given a Java String which is expressed as a Java Byte Array byte[], * the passed function `string_fn` will be called on the String * and the result returned * * @param env (IN) A pointer to the java environment * @param jbyte_string_ary (IN) A Java String expressed in bytes * @param jbyte_string_len (IN) The length of the Java String * expressed in bytes * @param string_fn (IN) A transform function to call on the String * @param has_exception (OUT) will be set to JNI_TRUE * if an OutOfMemoryError exception occurs */ template static T byteString(JNIEnv* env, jbyteArray jbyte_string_ary, const jsize jbyte_string_len, std::function string_fn, jboolean* has_exception) { jbyte* jbyte_string = env->GetByteArrayElements(jbyte_string_ary, nullptr); if(jbyte_string == nullptr) { // exception thrown: OutOfMemoryError *has_exception = JNI_TRUE; return nullptr; // signal error } T result = string_fn(reinterpret_cast(jbyte_string), jbyte_string_len); env->ReleaseByteArrayElements(jbyte_string_ary, jbyte_string, JNI_ABORT); *has_exception = JNI_FALSE; return result; } /** * Converts a std::vector to a Java byte[][] where each Java String * is expressed as a Java Byte Array byte[]. * * @param env A pointer to the java environment * @param strings A vector of Strings * * @return A Java array of Strings expressed as bytes, * or nullptr if an exception is thrown */ static jobjectArray stringsBytes(JNIEnv* env, std::vector strings) { jclass jcls_ba = ByteJni::getArrayJClass(env); if(jcls_ba == nullptr) { // exception occurred return nullptr; } const jsize len = static_cast(strings.size()); jobjectArray jbyte_strings = env->NewObjectArray(len, jcls_ba, nullptr); if(jbyte_strings == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (jsize i = 0; i < len; i++) { std::string *str = &strings[i]; const jsize str_len = static_cast(str->size()); jbyteArray jbyte_string_ary = env->NewByteArray(str_len); if(jbyte_string_ary == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jbyte_strings); return nullptr; } env->SetByteArrayRegion( jbyte_string_ary, 0, str_len, const_cast(reinterpret_cast(str->c_str()))); if(env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jbyte_string_ary); env->DeleteLocalRef(jbyte_strings); return nullptr; } env->SetObjectArrayElement(jbyte_strings, i, jbyte_string_ary); if(env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException // or ArrayStoreException env->DeleteLocalRef(jbyte_string_ary); env->DeleteLocalRef(jbyte_strings); return nullptr; } env->DeleteLocalRef(jbyte_string_ary); } return jbyte_strings; } /** * Converts a std::vector to a Java String[]. * * @param env A pointer to the java environment * @param strings A vector of Strings * * @return A Java array of Strings, * or nullptr if an exception is thrown */ static jobjectArray toJavaStrings(JNIEnv* env, const std::vector* strings) { jclass jcls_str = env->FindClass("java/lang/String"); if(jcls_str == nullptr) { // exception occurred return nullptr; } const jsize len = static_cast(strings->size()); jobjectArray jstrings = env->NewObjectArray(len, jcls_str, nullptr); if(jstrings == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (jsize i = 0; i < len; i++) { const std::string *str = &((*strings)[i]); jstring js = ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, str); if (js == nullptr) { env->DeleteLocalRef(jstrings); return nullptr; } env->SetObjectArrayElement(jstrings, i, js); if(env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException // or ArrayStoreException env->DeleteLocalRef(js); env->DeleteLocalRef(jstrings); return nullptr; } } return jstrings; } /** * Creates a Java UTF String from a C++ std::string * * @param env A pointer to the java environment * @param string the C++ std::string * @param treat_empty_as_null true if empty strings should be treated as null * * @return the Java UTF string, or nullptr if the provided string * is null (or empty and treat_empty_as_null is set), or if an * exception occurs allocating the Java String. */ static jstring toJavaString(JNIEnv* env, const std::string* string, const bool treat_empty_as_null = false) { if (string == nullptr) { return nullptr; } if (treat_empty_as_null && string->empty()) { return nullptr; } return env->NewStringUTF(string->c_str()); } /** * Copies bytes to a new jByteArray with the check of java array size limitation. * * @param bytes pointer to memory to copy to a new jByteArray * @param size number of bytes to copy * * @return the Java byte[], or nullptr if an exception occurs * * @throws RocksDBException thrown * if memory size to copy exceeds general java array size limitation to avoid overflow. */ static jbyteArray createJavaByteArrayWithSizeCheck(JNIEnv* env, const char* bytes, const size_t size) { // Limitation for java array size is vm specific // In general it cannot exceed Integer.MAX_VALUE (2^31 - 1) // Current HotSpot VM limitation for array size is Integer.MAX_VALUE - 5 (2^31 - 1 - 5) // It means that the next call to env->NewByteArray can still end with // OutOfMemoryError("Requested array size exceeds VM limit") coming from VM static const size_t MAX_JARRAY_SIZE = (static_cast(1)) << 31; if(size > MAX_JARRAY_SIZE) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Requested array size exceeds VM limit"); return nullptr; } const jsize jlen = static_cast(size); jbyteArray jbytes = env->NewByteArray(jlen); if(jbytes == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion(jbytes, 0, jlen, const_cast(reinterpret_cast(bytes))); if(env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jbytes); return nullptr; } return jbytes; } /** * Copies bytes from a ROCKSDB_NAMESPACE::Slice to a jByteArray * * @param env A pointer to the java environment * @param bytes The bytes to copy * * @return the Java byte[] or nullptr if an exception occurs * * @throws RocksDBException thrown * if memory size to copy exceeds general java specific array size * limitation. */ static jbyteArray copyBytes(JNIEnv* env, const Slice& bytes) { return createJavaByteArrayWithSizeCheck(env, bytes.data(), bytes.size()); } /* * Helper for operations on a key and value * for example WriteBatch->Put * * TODO(AR) could be used for RocksDB->Put etc. */ static std::unique_ptr kv_op( std::function op, JNIEnv* env, jobject /*jobj*/, jbyteArray jkey, jint jkey_len, jbyteArray jvalue, jint jvalue_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); if(env->ExceptionCheck()) { // exception thrown: OutOfMemoryError return nullptr; } jbyte* value = env->GetByteArrayElements(jvalue, nullptr); if(env->ExceptionCheck()) { // exception thrown: OutOfMemoryError if(key != nullptr) { env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); } return nullptr; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), jvalue_len); auto status = op(key_slice, value_slice); if(value != nullptr) { env->ReleaseByteArrayElements(jvalue, value, JNI_ABORT); } if(key != nullptr) { env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); } return std::unique_ptr( new ROCKSDB_NAMESPACE::Status(status)); } /* * Helper for operations on a key * for example WriteBatch->Delete * * TODO(AR) could be used for RocksDB->Delete etc. */ static std::unique_ptr k_op( std::function op, JNIEnv* env, jobject /*jobj*/, jbyteArray jkey, jint jkey_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); if(env->ExceptionCheck()) { // exception thrown: OutOfMemoryError return nullptr; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); auto status = op(key_slice); if(key != nullptr) { env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); } return std::unique_ptr( new ROCKSDB_NAMESPACE::Status(status)); } /* * Helper for operations on a value * for example WriteBatchWithIndex->GetFromBatch */ static jbyteArray v_op(std::function op, JNIEnv* env, jbyteArray jkey, jint jkey_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); if(env->ExceptionCheck()) { // exception thrown: OutOfMemoryError return nullptr; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); std::string value; ROCKSDB_NAMESPACE::Status s = op(key_slice, &value); if(key != nullptr) { env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); } if (s.IsNotFound()) { return nullptr; } if (s.ok()) { jbyteArray jret_value = env->NewByteArray(static_cast(value.size())); if(jret_value == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion(jret_value, 0, static_cast(value.size()), const_cast(reinterpret_cast(value.c_str()))); if(env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException if(jret_value != nullptr) { env->DeleteLocalRef(jret_value); } return nullptr; } return jret_value; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } /** * Creates a vector of C++ pointers from * a Java array of C++ pointer addresses. * * @param env (IN) A pointer to the java environment * @param pointers (IN) A Java array of C++ pointer addresses * @param has_exception (OUT) will be set to JNI_TRUE * if an ArrayIndexOutOfBoundsException or OutOfMemoryError * exception occurs. * * @return A vector of C++ pointers. */ template static std::vector fromJPointers( JNIEnv* env, jlongArray jptrs, jboolean *has_exception) { const jsize jptrs_len = env->GetArrayLength(jptrs); std::vector ptrs; jlong* jptr = env->GetLongArrayElements(jptrs, nullptr); if (jptr == nullptr) { // exception thrown: OutOfMemoryError *has_exception = JNI_TRUE; return ptrs; } ptrs.reserve(jptrs_len); for (jsize i = 0; i < jptrs_len; i++) { ptrs.push_back(reinterpret_cast(jptr[i])); } env->ReleaseLongArrayElements(jptrs, jptr, JNI_ABORT); return ptrs; } /** * Creates a Java array of C++ pointer addresses * from a vector of C++ pointers. * * @param env (IN) A pointer to the java environment * @param pointers (IN) A vector of C++ pointers * @param has_exception (OUT) will be set to JNI_TRUE * if an ArrayIndexOutOfBoundsException or OutOfMemoryError * exception occurs * * @return Java array of C++ pointer addresses. */ template static jlongArray toJPointers(JNIEnv* env, const std::vector &pointers, jboolean *has_exception) { const jsize len = static_cast(pointers.size()); std::unique_ptr results(new jlong[len]); std::transform(pointers.begin(), pointers.end(), results.get(), [](T* pointer) -> jlong { return reinterpret_cast(pointer); }); jlongArray jpointers = env->NewLongArray(len); if (jpointers == nullptr) { // exception thrown: OutOfMemoryError *has_exception = JNI_TRUE; return nullptr; } env->SetLongArrayRegion(jpointers, 0, len, results.get()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException *has_exception = JNI_TRUE; env->DeleteLocalRef(jpointers); return nullptr; } *has_exception = JNI_FALSE; return jpointers; } /* * Helper for operations on a key and value * for example WriteBatch->Put * * TODO(AR) could be extended to cover returning ROCKSDB_NAMESPACE::Status * from `op` and used for RocksDB->Put etc. */ static void kv_op_direct(std::function op, JNIEnv* env, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len) { char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); if (key == nullptr || env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid key argument"); return; } char* value = reinterpret_cast(env->GetDirectBufferAddress(jval)); if (value == nullptr || env->GetDirectBufferCapacity(jval) < (jval_off + jval_len)) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid value argument"); return; } key += jkey_off; value += jval_off; ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); ROCKSDB_NAMESPACE::Slice value_slice(value, jval_len); op(key_slice, value_slice); } /* * Helper for operations on a key and value * for example WriteBatch->Delete * * TODO(AR) could be extended to cover returning ROCKSDB_NAMESPACE::Status * from `op` and used for RocksDB->Delete etc. */ static void k_op_direct(std::function op, JNIEnv* env, jobject jkey, jint jkey_off, jint jkey_len) { char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); if (key == nullptr || env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid key argument"); return; } key += jkey_off; ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); return op(key_slice); } template static jint copyToDirect(JNIEnv* env, T& source, jobject jtarget, jint jtarget_off, jint jtarget_len) { char* target = reinterpret_cast(env->GetDirectBufferAddress(jtarget)); if (target == nullptr || env->GetDirectBufferCapacity(jtarget) < (jtarget_off + jtarget_len)) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid target argument"); return 0; } target += jtarget_off; const jint cvalue_len = static_cast(source.size()); const jint length = std::min(jtarget_len, cvalue_len); memcpy(target, source.data(), length); return cvalue_len; } }; class MapJni : public JavaClass { public: /** * Get the Java Class java.util.Map * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/util/Map"); } /** * Get the Java Method: Map#put * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMapPutMethodId(JNIEnv* env) { jclass jlist_clazz = getJClass(env); if(jlist_clazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jlist_clazz, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); assert(mid != nullptr); return mid; } }; class HashMapJni : public JavaClass { public: /** * Get the Java Class java.util.HashMap * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "java/util/HashMap"); } /** * Create a new Java java.util.HashMap object. * * @param env A pointer to the Java environment * * @return A reference to a Java java.util.HashMap object, or * nullptr if an an exception occurs */ static jobject construct(JNIEnv* env, const uint32_t initial_capacity = 16) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(I)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jobject jhash_map = env->NewObject(jclazz, mid, static_cast(initial_capacity)); if (env->ExceptionCheck()) { return nullptr; } return jhash_map; } /** * A function which maps a std::pair to a std::pair * * @return Either a pointer to a std::pair, or nullptr * if an error occurs during the mapping */ template using FnMapKV = std::function> (const std::pair&)>; // template ::value_type, std::pair>::value, int32_t>::type = 0> // static void putAll(JNIEnv* env, const jobject jhash_map, I iterator, const FnMapKV &fn_map_kv) { /** * Returns true if it succeeds, false if an error occurs */ template static bool putAll(JNIEnv* env, const jobject jhash_map, iterator_type iterator, iterator_type end, const FnMapKV &fn_map_kv) { const jmethodID jmid_put = ROCKSDB_NAMESPACE::MapJni::getMapPutMethodId(env); if (jmid_put == nullptr) { return false; } for (auto it = iterator; it != end; ++it) { const std::unique_ptr> result = fn_map_kv(*it); if (result == nullptr) { // an error occurred during fn_map_kv return false; } env->CallObjectMethod(jhash_map, jmid_put, result->first, result->second); if (env->ExceptionCheck()) { // exception occurred env->DeleteLocalRef(result->second); env->DeleteLocalRef(result->first); return false; } // release local references env->DeleteLocalRef(result->second); env->DeleteLocalRef(result->first); } return true; } /** * Creates a java.util.Map from a std::map * * @param env A pointer to the Java environment * @param map the Cpp map * * @return a reference to the Java java.util.Map object, or nullptr if an exception occcurred */ static jobject fromCppMap(JNIEnv* env, const std::map* map) { if (map == nullptr) { return nullptr; } jobject jhash_map = construct(env, static_cast(map->size())); if (jhash_map == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< const std::string, const std::string, jobject, jobject> fn_map_kv = [env](const std::pair& kv) { jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &(kv.first), false); if (env->ExceptionCheck()) { // an error occurred return std::unique_ptr>(nullptr); } jstring jvalue = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &(kv.second), true); if (env->ExceptionCheck()) { // an error occurred env->DeleteLocalRef(jkey); return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair( static_cast(jkey), static_cast(jvalue))); }; if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { // exception occurred return nullptr; } return jhash_map; } /** * Creates a java.util.Map from a std::map * * @param env A pointer to the Java environment * @param map the Cpp map * * @return a reference to the Java java.util.Map object, or nullptr if an exception occcurred */ static jobject fromCppMap(JNIEnv* env, const std::map* map) { if (map == nullptr) { return nullptr; } if (map == nullptr) { return nullptr; } jobject jhash_map = construct(env, static_cast(map->size())); if (jhash_map == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< const std::string, const uint32_t, jobject, jobject> fn_map_kv = [env](const std::pair& kv) { jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &(kv.first), false); if (env->ExceptionCheck()) { // an error occurred return std::unique_ptr>(nullptr); } jobject jvalue = ROCKSDB_NAMESPACE::IntegerJni::valueOf( env, static_cast(kv.second)); if (env->ExceptionCheck()) { // an error occurred env->DeleteLocalRef(jkey); return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair(static_cast(jkey), jvalue)); }; if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { // exception occurred return nullptr; } return jhash_map; } /** * Creates a java.util.Map from a std::map * * @param env A pointer to the Java environment * @param map the Cpp map * * @return a reference to the Java java.util.Map object, or nullptr if an exception occcurred */ static jobject fromCppMap(JNIEnv* env, const std::map* map) { if (map == nullptr) { return nullptr; } jobject jhash_map = construct(env, static_cast(map->size())); if (jhash_map == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< const std::string, const uint64_t, jobject, jobject> fn_map_kv = [env](const std::pair& kv) { jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &(kv.first), false); if (env->ExceptionCheck()) { // an error occurred return std::unique_ptr>(nullptr); } jobject jvalue = ROCKSDB_NAMESPACE::LongJni::valueOf( env, static_cast(kv.second)); if (env->ExceptionCheck()) { // an error occurred env->DeleteLocalRef(jkey); return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair(static_cast(jkey), jvalue)); }; if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { // exception occurred return nullptr; } return jhash_map; } /** * Creates a java.util.Map from a std::map * * @param env A pointer to the Java environment * @param map the Cpp map * * @return a reference to the Java java.util.Map object, or nullptr if an exception occcurred */ static jobject fromCppMap(JNIEnv* env, const std::map* map) { if (map == nullptr) { return nullptr; } jobject jhash_map = construct(env, static_cast(map->size())); if (jhash_map == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV fn_map_kv = [env](const std::pair& kv) { jobject jkey = ROCKSDB_NAMESPACE::IntegerJni::valueOf( env, static_cast(kv.first)); if (env->ExceptionCheck()) { // an error occurred return std::unique_ptr>(nullptr); } jobject jvalue = ROCKSDB_NAMESPACE::LongJni::valueOf( env, static_cast(kv.second)); if (env->ExceptionCheck()) { // an error occurred env->DeleteLocalRef(jkey); return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair(static_cast(jkey), jvalue)); }; if (!putAll(env, jhash_map, map->begin(), map->end(), fn_map_kv)) { // exception occurred return nullptr; } return jhash_map; } }; // The portal class for org.rocksdb.RocksDB class RocksDBJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.RocksDB * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksDB"); } }; // The portal class for org.rocksdb.Options class OptionsJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.Options * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/Options"); } }; // The portal class for org.rocksdb.DBOptions class DBOptionsJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.DBOptions * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/DBOptions"); } }; // The portal class for org.rocksdb.ColumnFamilyOptions class ColumnFamilyOptionsJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.ColumnFamilyOptions * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/ColumnFamilyOptions"); } /** * Create a new Java org.rocksdb.ColumnFamilyOptions object with the same * properties as the provided C++ ROCKSDB_NAMESPACE::ColumnFamilyOptions * object * * @param env A pointer to the Java environment * @param cfoptions A pointer to ROCKSDB_NAMESPACE::ColumnFamilyOptions object * * @return A reference to a Java org.rocksdb.ColumnFamilyOptions object, or * nullptr if an an exception occurs */ static jobject construct(JNIEnv* env, const ColumnFamilyOptions* cfoptions) { auto* cfo = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(*cfoptions); jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(J)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jobject jcfd = env->NewObject(jclazz, mid, reinterpret_cast(cfo)); if (env->ExceptionCheck()) { return nullptr; } return jcfd; } }; // The portal class for org.rocksdb.WriteOptions class WriteOptionsJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.WriteOptions * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteOptions"); } }; // The portal class for org.rocksdb.ReadOptions class ReadOptionsJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.ReadOptions * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/ReadOptions"); } }; // The portal class for org.rocksdb.WriteBatch class WriteBatchJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.WriteBatch * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch"); } /** * Create a new Java org.rocksdb.WriteBatch object * * @param env A pointer to the Java environment * @param wb A pointer to ROCKSDB_NAMESPACE::WriteBatch object * * @return A reference to a Java org.rocksdb.WriteBatch object, or * nullptr if an an exception occurs */ static jobject construct(JNIEnv* env, const WriteBatch* wb) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(J)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jobject jwb = env->NewObject(jclazz, mid, reinterpret_cast(wb)); if (env->ExceptionCheck()) { return nullptr; } return jwb; } }; // The portal class for org.rocksdb.WriteBatch.Handler class WriteBatchHandlerJni : public RocksDBNativeClass< const ROCKSDB_NAMESPACE::WriteBatchHandlerJniCallback*, WriteBatchHandlerJni> { public: /** * Get the Java Class org.rocksdb.WriteBatch.Handler * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch$Handler"); } /** * Get the Java Method: WriteBatch.Handler#put * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getPutCfMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "put", "(I[B[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#put * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getPutMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "put", "([B[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#merge * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMergeCfMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "merge", "(I[B[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#merge * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMergeMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "merge", "([B[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#delete * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getDeleteCfMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "delete", "(I[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#delete * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getDeleteMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "delete", "([B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#singleDelete * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getSingleDeleteCfMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "singleDelete", "(I[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#singleDelete * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getSingleDeleteMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "singleDelete", "([B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#deleteRange * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getDeleteRangeCfMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "deleteRange", "(I[B[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#deleteRange * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getDeleteRangeMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "deleteRange", "([B[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#logData * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getLogDataMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "logData", "([B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#putBlobIndex * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getPutBlobIndexCfMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "putBlobIndex", "(I[B[B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#markBeginPrepare * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMarkBeginPrepareMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "markBeginPrepare", "()V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#markEndPrepare * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMarkEndPrepareMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "markEndPrepare", "([B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#markNoop * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMarkNoopMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "markNoop", "(Z)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#markRollback * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMarkRollbackMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "markRollback", "([B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#markCommit * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getMarkCommitMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "markCommit", "([B)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: WriteBatch.Handler#shouldContinue * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getContinueMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "shouldContinue", "()Z"); assert(mid != nullptr); return mid; } }; class WriteBatchSavePointJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.WriteBatch.SavePoint * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/WriteBatch$SavePoint"); } /** * Get the Java Method: HistogramData constructor * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getConstructorMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "", "(JJJ)V"); assert(mid != nullptr); return mid; } /** * Create a new Java org.rocksdb.WriteBatch.SavePoint object * * @param env A pointer to the Java environment * @param savePoint A pointer to ROCKSDB_NAMESPACE::WriteBatch::SavePoint * object * * @return A reference to a Java org.rocksdb.WriteBatch.SavePoint object, or * nullptr if an an exception occurs */ static jobject construct(JNIEnv* env, const SavePoint &save_point) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = getConstructorMethodId(env); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jobject jsave_point = env->NewObject(jclazz, mid, static_cast(save_point.size), static_cast(save_point.count), static_cast(save_point.content_flags)); if (env->ExceptionCheck()) { return nullptr; } return jsave_point; } }; // The portal class for org.rocksdb.WriteBatchWithIndex class WriteBatchWithIndexJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.WriteBatchWithIndex * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatchWithIndex"); } }; // The portal class for org.rocksdb.HistogramData class HistogramDataJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.HistogramData * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/HistogramData"); } /** * Get the Java Method: HistogramData constructor * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getConstructorMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "", "(DDDDDDJJD)V"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.BackupableDBOptions class BackupableDBOptionsJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.BackupableDBOptions * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/BackupableDBOptions"); } }; // The portal class for org.rocksdb.BackupEngine class BackupEngineJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.BackupableEngine * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/BackupEngine"); } }; // The portal class for org.rocksdb.RocksIterator class IteratorJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.RocksIterator * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksIterator"); } }; // The portal class for org.rocksdb.Filter class FilterJni : public RocksDBNativeClass< std::shared_ptr*, FilterJni> { public: /** * Get the Java Class org.rocksdb.Filter * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/Filter"); } }; // The portal class for org.rocksdb.ColumnFamilyHandle class ColumnFamilyHandleJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.ColumnFamilyHandle * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/ColumnFamilyHandle"); } }; // The portal class for org.rocksdb.FlushOptions class FlushOptionsJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.FlushOptions * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/FlushOptions"); } }; // The portal class for org.rocksdb.ComparatorOptions class ComparatorOptionsJni : public RocksDBNativeClass< ROCKSDB_NAMESPACE::ComparatorJniCallbackOptions*, ComparatorOptionsJni> { public: /** * Get the Java Class org.rocksdb.ComparatorOptions * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/ComparatorOptions"); } }; // The portal class for org.rocksdb.AbstractCompactionFilterFactory class AbstractCompactionFilterFactoryJni : public RocksDBNativeClass< const ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback*, AbstractCompactionFilterFactoryJni> { public: /** * Get the Java Class org.rocksdb.AbstractCompactionFilterFactory * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractCompactionFilterFactory"); } /** * Get the Java Method: AbstractCompactionFilterFactory#name * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getNameMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "name", "()Ljava/lang/String;"); assert(mid != nullptr); return mid; } /** * Get the Java Method: AbstractCompactionFilterFactory#createCompactionFilter * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getCreateCompactionFilterMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "createCompactionFilter", "(ZZ)J"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.AbstractTransactionNotifier class AbstractTransactionNotifierJni : public RocksDBNativeClass< const ROCKSDB_NAMESPACE::TransactionNotifierJniCallback*, AbstractTransactionNotifierJni> { public: static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractTransactionNotifier"); } // Get the java method `snapshotCreated` // of org.rocksdb.AbstractTransactionNotifier. static jmethodID getSnapshotCreatedMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "snapshotCreated", "(J)V"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.AbstractComparatorJniBridge class AbstractComparatorJniBridge : public JavaClass { public: /** * Get the Java Class org.rocksdb.AbstractComparatorJniBridge * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/AbstractComparatorJniBridge"); } /** * Get the Java Method: Comparator#compareInternal * * @param env A pointer to the Java environment * @param jclazz the AbstractComparatorJniBridge class * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getCompareInternalMethodId(JNIEnv* env, jclass jclazz) { static jmethodID mid = env->GetStaticMethodID(jclazz, "compareInternal", "(Lorg/rocksdb/AbstractComparator;Ljava/nio/ByteBuffer;ILjava/nio/ByteBuffer;I)I"); assert(mid != nullptr); return mid; } /** * Get the Java Method: Comparator#findShortestSeparatorInternal * * @param env A pointer to the Java environment * @param jclazz the AbstractComparatorJniBridge class * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getFindShortestSeparatorInternalMethodId(JNIEnv* env, jclass jclazz) { static jmethodID mid = env->GetStaticMethodID(jclazz, "findShortestSeparatorInternal", "(Lorg/rocksdb/AbstractComparator;Ljava/nio/ByteBuffer;ILjava/nio/ByteBuffer;I)I"); assert(mid != nullptr); return mid; } /** * Get the Java Method: Comparator#findShortSuccessorInternal * * @param env A pointer to the Java environment * @param jclazz the AbstractComparatorJniBridge class * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getFindShortSuccessorInternalMethodId(JNIEnv* env, jclass jclazz) { static jmethodID mid = env->GetStaticMethodID(jclazz, "findShortSuccessorInternal", "(Lorg/rocksdb/AbstractComparator;Ljava/nio/ByteBuffer;I)I"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.AbstractComparator class AbstractComparatorJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.AbstractComparator * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractComparator"); } /** * Get the Java Method: Comparator#name * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getNameMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "name", "()Ljava/lang/String;"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.AbstractSlice class AbstractSliceJni : public NativeRocksMutableObject { public: /** * Get the Java Class org.rocksdb.AbstractSlice * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractSlice"); } }; // The portal class for org.rocksdb.Slice class SliceJni : public NativeRocksMutableObject { public: /** * Get the Java Class org.rocksdb.Slice * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/Slice"); } /** * Constructs a Slice object * * @param env A pointer to the Java environment * * @return A reference to a Java Slice object, or a nullptr if an * exception occurs */ static jobject construct0(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "", "()V"); if(mid == nullptr) { // exception occurred accessing method return nullptr; } jobject jslice = env->NewObject(jclazz, mid); if(env->ExceptionCheck()) { return nullptr; } return jslice; } }; // The portal class for org.rocksdb.DirectSlice class DirectSliceJni : public NativeRocksMutableObject { public: /** * Get the Java Class org.rocksdb.DirectSlice * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/DirectSlice"); } /** * Constructs a DirectSlice object * * @param env A pointer to the Java environment * * @return A reference to a Java DirectSlice object, or a nullptr if an * exception occurs */ static jobject construct0(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "", "()V"); if(mid == nullptr) { // exception occurred accessing method return nullptr; } jobject jdirect_slice = env->NewObject(jclazz, mid); if(env->ExceptionCheck()) { return nullptr; } return jdirect_slice; } }; // The portal class for org.rocksdb.BackupInfo class BackupInfoJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.BackupInfo * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/BackupInfo"); } /** * Constructs a BackupInfo object * * @param env A pointer to the Java environment * @param backup_id id of the backup * @param timestamp timestamp of the backup * @param size size of the backup * @param number_files number of files related to the backup * @param app_metadata application specific metadata * * @return A reference to a Java BackupInfo object, or a nullptr if an * exception occurs */ static jobject construct0(JNIEnv* env, uint32_t backup_id, int64_t timestamp, uint64_t size, uint32_t number_files, const std::string& app_metadata) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "", "(IJJILjava/lang/String;)V"); if(mid == nullptr) { // exception occurred accessing method return nullptr; } jstring japp_metadata = nullptr; if (app_metadata != nullptr) { japp_metadata = env->NewStringUTF(app_metadata.c_str()); if (japp_metadata == nullptr) { // exception occurred creating java string return nullptr; } } jobject jbackup_info = env->NewObject(jclazz, mid, backup_id, timestamp, size, number_files, japp_metadata); if(env->ExceptionCheck()) { env->DeleteLocalRef(japp_metadata); return nullptr; } return jbackup_info; } }; class BackupInfoListJni { public: /** * Converts a C++ std::vector object to * a Java ArrayList object * * @param env A pointer to the Java environment * @param backup_infos A vector of BackupInfo * * @return Either a reference to a Java ArrayList object, or a nullptr * if an exception occurs */ static jobject getBackupInfo(JNIEnv* env, std::vector backup_infos) { jclass jarray_list_clazz = ROCKSDB_NAMESPACE::ListJni::getArrayListClass(env); if(jarray_list_clazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID cstr_mid = ROCKSDB_NAMESPACE::ListJni::getArrayListConstructorMethodId(env); if(cstr_mid == nullptr) { // exception occurred accessing method return nullptr; } jmethodID add_mid = ROCKSDB_NAMESPACE::ListJni::getListAddMethodId(env); if(add_mid == nullptr) { // exception occurred accessing method return nullptr; } // create java list jobject jbackup_info_handle_list = env->NewObject(jarray_list_clazz, cstr_mid, backup_infos.size()); if(env->ExceptionCheck()) { // exception occurred constructing object return nullptr; } // insert in java list auto end = backup_infos.end(); for (auto it = backup_infos.begin(); it != end; ++it) { auto backup_info = *it; jobject obj = ROCKSDB_NAMESPACE::BackupInfoJni::construct0( env, backup_info.backup_id, backup_info.timestamp, backup_info.size, backup_info.number_files, backup_info.app_metadata); if(env->ExceptionCheck()) { // exception occurred constructing object if(obj != nullptr) { env->DeleteLocalRef(obj); } if(jbackup_info_handle_list != nullptr) { env->DeleteLocalRef(jbackup_info_handle_list); } return nullptr; } jboolean rs = env->CallBooleanMethod(jbackup_info_handle_list, add_mid, obj); if(env->ExceptionCheck() || rs == JNI_FALSE) { // exception occurred calling method, or could not add if(obj != nullptr) { env->DeleteLocalRef(obj); } if(jbackup_info_handle_list != nullptr) { env->DeleteLocalRef(jbackup_info_handle_list); } return nullptr; } } return jbackup_info_handle_list; } }; // The portal class for org.rocksdb.WBWIRocksIterator class WBWIRocksIteratorJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.WBWIRocksIterator * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator"); } /** * Get the Java Field: WBWIRocksIterator#entry * * @param env A pointer to the Java environment * * @return The Java Field ID or nullptr if the class or field id could not * be retieved */ static jfieldID getWriteEntryField(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jfieldID fid = env->GetFieldID(jclazz, "entry", "Lorg/rocksdb/WBWIRocksIterator$WriteEntry;"); assert(fid != nullptr); return fid; } /** * Gets the value of the WBWIRocksIterator#entry * * @param env A pointer to the Java environment * @param jwbwi_rocks_iterator A reference to a WBWIIterator * * @return A reference to a Java WBWIRocksIterator.WriteEntry object, or * a nullptr if an exception occurs */ static jobject getWriteEntry(JNIEnv* env, jobject jwbwi_rocks_iterator) { assert(jwbwi_rocks_iterator != nullptr); jfieldID jwrite_entry_field = getWriteEntryField(env); if(jwrite_entry_field == nullptr) { // exception occurred accessing the field return nullptr; } jobject jwe = env->GetObjectField(jwbwi_rocks_iterator, jwrite_entry_field); assert(jwe != nullptr); return jwe; } }; // The portal class for org.rocksdb.WBWIRocksIterator.WriteType class WriteTypeJni : public JavaClass { public: /** * Get the PUT enum field value of WBWIRocksIterator.WriteType * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject PUT(JNIEnv* env) { return getEnum(env, "PUT"); } /** * Get the MERGE enum field value of WBWIRocksIterator.WriteType * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject MERGE(JNIEnv* env) { return getEnum(env, "MERGE"); } /** * Get the DELETE enum field value of WBWIRocksIterator.WriteType * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject DELETE(JNIEnv* env) { return getEnum(env, "DELETE"); } /** * Get the LOG enum field value of WBWIRocksIterator.WriteType * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject LOG(JNIEnv* env) { return getEnum(env, "LOG"); } // Returns the equivalent org.rocksdb.WBWIRocksIterator.WriteType for the // provided C++ ROCKSDB_NAMESPACE::WriteType enum static jbyte toJavaWriteType(const ROCKSDB_NAMESPACE::WriteType& writeType) { switch (writeType) { case ROCKSDB_NAMESPACE::WriteType::kPutRecord: return 0x0; case ROCKSDB_NAMESPACE::WriteType::kMergeRecord: return 0x1; case ROCKSDB_NAMESPACE::WriteType::kDeleteRecord: return 0x2; case ROCKSDB_NAMESPACE::WriteType::kSingleDeleteRecord: return 0x3; case ROCKSDB_NAMESPACE::WriteType::kDeleteRangeRecord: return 0x4; case ROCKSDB_NAMESPACE::WriteType::kLogDataRecord: return 0x5; case ROCKSDB_NAMESPACE::WriteType::kXIDRecord: return 0x6; default: return 0x7F; // undefined } } private: /** * Get the Java Class org.rocksdb.WBWIRocksIterator.WriteType * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator$WriteType"); } /** * Get an enum field of org.rocksdb.WBWIRocksIterator.WriteType * * @param env A pointer to the Java environment * @param name The name of the enum field * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject getEnum(JNIEnv* env, const char name[]) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jfieldID jfid = env->GetStaticFieldID(jclazz, name, "Lorg/rocksdb/WBWIRocksIterator$WriteType;"); if(env->ExceptionCheck()) { // exception occurred while getting field return nullptr; } else if(jfid == nullptr) { return nullptr; } jobject jwrite_type = env->GetStaticObjectField(jclazz, jfid); assert(jwrite_type != nullptr); return jwrite_type; } }; // The portal class for org.rocksdb.WBWIRocksIterator.WriteEntry class WriteEntryJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.WBWIRocksIterator.WriteEntry * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator$WriteEntry"); } }; // The portal class for org.rocksdb.InfoLogLevel class InfoLogLevelJni : public JavaClass { public: /** * Get the DEBUG_LEVEL enum field value of InfoLogLevel * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject DEBUG_LEVEL(JNIEnv* env) { return getEnum(env, "DEBUG_LEVEL"); } /** * Get the INFO_LEVEL enum field value of InfoLogLevel * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject INFO_LEVEL(JNIEnv* env) { return getEnum(env, "INFO_LEVEL"); } /** * Get the WARN_LEVEL enum field value of InfoLogLevel * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject WARN_LEVEL(JNIEnv* env) { return getEnum(env, "WARN_LEVEL"); } /** * Get the ERROR_LEVEL enum field value of InfoLogLevel * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject ERROR_LEVEL(JNIEnv* env) { return getEnum(env, "ERROR_LEVEL"); } /** * Get the FATAL_LEVEL enum field value of InfoLogLevel * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject FATAL_LEVEL(JNIEnv* env) { return getEnum(env, "FATAL_LEVEL"); } /** * Get the HEADER_LEVEL enum field value of InfoLogLevel * * @param env A pointer to the Java environment * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject HEADER_LEVEL(JNIEnv* env) { return getEnum(env, "HEADER_LEVEL"); } private: /** * Get the Java Class org.rocksdb.InfoLogLevel * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/InfoLogLevel"); } /** * Get an enum field of org.rocksdb.InfoLogLevel * * @param env A pointer to the Java environment * @param name The name of the enum field * * @return A reference to the enum field value or a nullptr if * the enum field value could not be retrieved */ static jobject getEnum(JNIEnv* env, const char name[]) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jfieldID jfid = env->GetStaticFieldID(jclazz, name, "Lorg/rocksdb/InfoLogLevel;"); if(env->ExceptionCheck()) { // exception occurred while getting field return nullptr; } else if(jfid == nullptr) { return nullptr; } jobject jinfo_log_level = env->GetStaticObjectField(jclazz, jfid); assert(jinfo_log_level != nullptr); return jinfo_log_level; } }; // The portal class for org.rocksdb.Logger class LoggerJni : public RocksDBNativeClass< std::shared_ptr*, LoggerJni> { public: /** * Get the Java Class org/rocksdb/Logger * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/Logger"); } /** * Get the Java Method: Logger#log * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getLogMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "log", "(Lorg/rocksdb/InfoLogLevel;Ljava/lang/String;)V"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.TransactionLogIterator.BatchResult class BatchResultJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.TransactionLogIterator.BatchResult * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/TransactionLogIterator$BatchResult"); } /** * Create a new Java org.rocksdb.TransactionLogIterator.BatchResult object * with the same properties as the provided C++ ROCKSDB_NAMESPACE::BatchResult * object * * @param env A pointer to the Java environment * @param batch_result The ROCKSDB_NAMESPACE::BatchResult object * * @return A reference to a Java * org.rocksdb.TransactionLogIterator.BatchResult object, * or nullptr if an an exception occurs */ static jobject construct(JNIEnv* env, ROCKSDB_NAMESPACE::BatchResult& batch_result) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID( jclazz, "", "(JJ)V"); if(mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jobject jbatch_result = env->NewObject(jclazz, mid, batch_result.sequence, batch_result.writeBatchPtr.get()); if(jbatch_result == nullptr) { // exception thrown: InstantiationException or OutOfMemoryError return nullptr; } batch_result.writeBatchPtr.release(); return jbatch_result; } }; // The portal class for org.rocksdb.BottommostLevelCompaction class BottommostLevelCompactionJni { public: // Returns the equivalent org.rocksdb.BottommostLevelCompaction for the // provided C++ ROCKSDB_NAMESPACE::BottommostLevelCompaction enum static jint toJavaBottommostLevelCompaction( const ROCKSDB_NAMESPACE::BottommostLevelCompaction& bottommost_level_compaction) { switch(bottommost_level_compaction) { case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kSkip: return 0x0; case ROCKSDB_NAMESPACE::BottommostLevelCompaction:: kIfHaveCompactionFilter: return 0x1; case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForce: return 0x2; case ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForceOptimized: return 0x3; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::BottommostLevelCompaction // enum for the provided Java org.rocksdb.BottommostLevelCompaction static ROCKSDB_NAMESPACE::BottommostLevelCompaction toCppBottommostLevelCompaction(jint bottommost_level_compaction) { switch(bottommost_level_compaction) { case 0x0: return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kSkip; case 0x1: return ROCKSDB_NAMESPACE::BottommostLevelCompaction:: kIfHaveCompactionFilter; case 0x2: return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForce; case 0x3: return ROCKSDB_NAMESPACE::BottommostLevelCompaction::kForceOptimized; default: // undefined/default return ROCKSDB_NAMESPACE::BottommostLevelCompaction:: kIfHaveCompactionFilter; } } }; // The portal class for org.rocksdb.CompactionStopStyle class CompactionStopStyleJni { public: // Returns the equivalent org.rocksdb.CompactionStopStyle for the provided // C++ ROCKSDB_NAMESPACE::CompactionStopStyle enum static jbyte toJavaCompactionStopStyle( const ROCKSDB_NAMESPACE::CompactionStopStyle& compaction_stop_style) { switch(compaction_stop_style) { case ROCKSDB_NAMESPACE::CompactionStopStyle:: kCompactionStopStyleSimilarSize: return 0x0; case ROCKSDB_NAMESPACE::CompactionStopStyle:: kCompactionStopStyleTotalSize: return 0x1; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionStopStyle enum for // the provided Java org.rocksdb.CompactionStopStyle static ROCKSDB_NAMESPACE::CompactionStopStyle toCppCompactionStopStyle( jbyte jcompaction_stop_style) { switch(jcompaction_stop_style) { case 0x0: return ROCKSDB_NAMESPACE::CompactionStopStyle:: kCompactionStopStyleSimilarSize; case 0x1: return ROCKSDB_NAMESPACE::CompactionStopStyle:: kCompactionStopStyleTotalSize; default: // undefined/default return ROCKSDB_NAMESPACE::CompactionStopStyle:: kCompactionStopStyleSimilarSize; } } }; // The portal class for org.rocksdb.CompressionType class CompressionTypeJni { public: // Returns the equivalent org.rocksdb.CompressionType for the provided // C++ ROCKSDB_NAMESPACE::CompressionType enum static jbyte toJavaCompressionType( const ROCKSDB_NAMESPACE::CompressionType& compression_type) { switch(compression_type) { case ROCKSDB_NAMESPACE::CompressionType::kNoCompression: return 0x0; case ROCKSDB_NAMESPACE::CompressionType::kSnappyCompression: return 0x1; case ROCKSDB_NAMESPACE::CompressionType::kZlibCompression: return 0x2; case ROCKSDB_NAMESPACE::CompressionType::kBZip2Compression: return 0x3; case ROCKSDB_NAMESPACE::CompressionType::kLZ4Compression: return 0x4; case ROCKSDB_NAMESPACE::CompressionType::kLZ4HCCompression: return 0x5; case ROCKSDB_NAMESPACE::CompressionType::kXpressCompression: return 0x6; case ROCKSDB_NAMESPACE::CompressionType::kZSTD: return 0x7; case ROCKSDB_NAMESPACE::CompressionType::kDisableCompressionOption: default: return 0x7F; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompressionType enum for the // provided Java org.rocksdb.CompressionType static ROCKSDB_NAMESPACE::CompressionType toCppCompressionType( jbyte jcompression_type) { switch(jcompression_type) { case 0x0: return ROCKSDB_NAMESPACE::CompressionType::kNoCompression; case 0x1: return ROCKSDB_NAMESPACE::CompressionType::kSnappyCompression; case 0x2: return ROCKSDB_NAMESPACE::CompressionType::kZlibCompression; case 0x3: return ROCKSDB_NAMESPACE::CompressionType::kBZip2Compression; case 0x4: return ROCKSDB_NAMESPACE::CompressionType::kLZ4Compression; case 0x5: return ROCKSDB_NAMESPACE::CompressionType::kLZ4HCCompression; case 0x6: return ROCKSDB_NAMESPACE::CompressionType::kXpressCompression; case 0x7: return ROCKSDB_NAMESPACE::CompressionType::kZSTD; case 0x7F: default: return ROCKSDB_NAMESPACE::CompressionType::kDisableCompressionOption; } } }; // The portal class for org.rocksdb.CompactionPriority class CompactionPriorityJni { public: // Returns the equivalent org.rocksdb.CompactionPriority for the provided // C++ ROCKSDB_NAMESPACE::CompactionPri enum static jbyte toJavaCompactionPriority( const ROCKSDB_NAMESPACE::CompactionPri& compaction_priority) { switch(compaction_priority) { case ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize: return 0x0; case ROCKSDB_NAMESPACE::CompactionPri::kOldestLargestSeqFirst: return 0x1; case ROCKSDB_NAMESPACE::CompactionPri::kOldestSmallestSeqFirst: return 0x2; case ROCKSDB_NAMESPACE::CompactionPri::kMinOverlappingRatio: return 0x3; default: return 0x0; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionPri enum for the // provided Java org.rocksdb.CompactionPriority static ROCKSDB_NAMESPACE::CompactionPri toCppCompactionPriority( jbyte jcompaction_priority) { switch(jcompaction_priority) { case 0x0: return ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize; case 0x1: return ROCKSDB_NAMESPACE::CompactionPri::kOldestLargestSeqFirst; case 0x2: return ROCKSDB_NAMESPACE::CompactionPri::kOldestSmallestSeqFirst; case 0x3: return ROCKSDB_NAMESPACE::CompactionPri::kMinOverlappingRatio; default: // undefined/default return ROCKSDB_NAMESPACE::CompactionPri::kByCompensatedSize; } } }; // The portal class for org.rocksdb.AccessHint class AccessHintJni { public: // Returns the equivalent org.rocksdb.AccessHint for the provided // C++ ROCKSDB_NAMESPACE::DBOptions::AccessHint enum static jbyte toJavaAccessHint( const ROCKSDB_NAMESPACE::DBOptions::AccessHint& access_hint) { switch(access_hint) { case ROCKSDB_NAMESPACE::DBOptions::AccessHint::NONE: return 0x0; case ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL: return 0x1; case ROCKSDB_NAMESPACE::DBOptions::AccessHint::SEQUENTIAL: return 0x2; case ROCKSDB_NAMESPACE::DBOptions::AccessHint::WILLNEED: return 0x3; default: // undefined/default return 0x1; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::DBOptions::AccessHint enum // for the provided Java org.rocksdb.AccessHint static ROCKSDB_NAMESPACE::DBOptions::AccessHint toCppAccessHint( jbyte jaccess_hint) { switch(jaccess_hint) { case 0x0: return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NONE; case 0x1: return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL; case 0x2: return ROCKSDB_NAMESPACE::DBOptions::AccessHint::SEQUENTIAL; case 0x3: return ROCKSDB_NAMESPACE::DBOptions::AccessHint::WILLNEED; default: // undefined/default return ROCKSDB_NAMESPACE::DBOptions::AccessHint::NORMAL; } } }; // The portal class for org.rocksdb.WALRecoveryMode class WALRecoveryModeJni { public: // Returns the equivalent org.rocksdb.WALRecoveryMode for the provided // C++ ROCKSDB_NAMESPACE::WALRecoveryMode enum static jbyte toJavaWALRecoveryMode( const ROCKSDB_NAMESPACE::WALRecoveryMode& wal_recovery_mode) { switch(wal_recovery_mode) { case ROCKSDB_NAMESPACE::WALRecoveryMode::kTolerateCorruptedTailRecords: return 0x0; case ROCKSDB_NAMESPACE::WALRecoveryMode::kAbsoluteConsistency: return 0x1; case ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery: return 0x2; case ROCKSDB_NAMESPACE::WALRecoveryMode::kSkipAnyCorruptedRecords: return 0x3; default: // undefined/default return 0x2; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::WALRecoveryMode enum for the // provided Java org.rocksdb.WALRecoveryMode static ROCKSDB_NAMESPACE::WALRecoveryMode toCppWALRecoveryMode( jbyte jwal_recovery_mode) { switch(jwal_recovery_mode) { case 0x0: return ROCKSDB_NAMESPACE::WALRecoveryMode:: kTolerateCorruptedTailRecords; case 0x1: return ROCKSDB_NAMESPACE::WALRecoveryMode::kAbsoluteConsistency; case 0x2: return ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery; case 0x3: return ROCKSDB_NAMESPACE::WALRecoveryMode::kSkipAnyCorruptedRecords; default: // undefined/default return ROCKSDB_NAMESPACE::WALRecoveryMode::kPointInTimeRecovery; } } }; // The portal class for org.rocksdb.TickerType class TickerTypeJni { public: // Returns the equivalent org.rocksdb.TickerType for the provided // C++ ROCKSDB_NAMESPACE::Tickers enum static jbyte toJavaTickerType(const ROCKSDB_NAMESPACE::Tickers& tickers) { switch(tickers) { case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS: return 0x0; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_HIT: return 0x1; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD: return 0x2; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD_FAILURES: return 0x3; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_MISS: return 0x4; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_HIT: return 0x5; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_ADD: return 0x6; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_INSERT: return 0x7; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_EVICT: return 0x8; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_MISS: return 0x9; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_HIT: return 0xA; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_ADD: return 0xB; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_INSERT: return 0xC; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_EVICT: return 0xD; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_MISS: return 0xE; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_HIT: return 0xF; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_ADD: return 0x10; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_BYTES_INSERT: return 0x11; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_READ: return 0x12; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_WRITE: return 0x13; case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_USEFUL: return 0x14; case ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_HIT: return 0x15; case ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_MISS: return 0x16; case ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_HIT: return 0x17; case ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_MISS: return 0x18; case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_HIT: return 0x19; case ROCKSDB_NAMESPACE::Tickers::MEMTABLE_MISS: return 0x1A; case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L0: return 0x1B; case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L1: return 0x1C; case ROCKSDB_NAMESPACE::Tickers::GET_HIT_L2_AND_UP: return 0x1D; case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_NEWER_ENTRY: return 0x1E; case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_OBSOLETE: return 0x1F; case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_RANGE_DEL: return 0x20; case ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_USER: return 0x21; case ROCKSDB_NAMESPACE::Tickers::COMPACTION_RANGE_DEL_DROP_OBSOLETE: return 0x22; case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_WRITTEN: return 0x23; case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_READ: return 0x24; case ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_UPDATED: return 0x25; case ROCKSDB_NAMESPACE::Tickers::BYTES_WRITTEN: return 0x26; case ROCKSDB_NAMESPACE::Tickers::BYTES_READ: return 0x27; case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK: return 0x28; case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT: return 0x29; case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV: return 0x2A; case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK_FOUND: return 0x2B; case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT_FOUND: return 0x2C; case ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV_FOUND: return 0x2D; case ROCKSDB_NAMESPACE::Tickers::ITER_BYTES_READ: return 0x2E; case ROCKSDB_NAMESPACE::Tickers::NO_FILE_CLOSES: return 0x2F; case ROCKSDB_NAMESPACE::Tickers::NO_FILE_OPENS: return 0x30; case ROCKSDB_NAMESPACE::Tickers::NO_FILE_ERRORS: return 0x31; case ROCKSDB_NAMESPACE::Tickers::STALL_L0_SLOWDOWN_MICROS: return 0x32; case ROCKSDB_NAMESPACE::Tickers::STALL_MEMTABLE_COMPACTION_MICROS: return 0x33; case ROCKSDB_NAMESPACE::Tickers::STALL_L0_NUM_FILES_MICROS: return 0x34; case ROCKSDB_NAMESPACE::Tickers::STALL_MICROS: return 0x35; case ROCKSDB_NAMESPACE::Tickers::DB_MUTEX_WAIT_MICROS: return 0x36; case ROCKSDB_NAMESPACE::Tickers::RATE_LIMIT_DELAY_MILLIS: return 0x37; case ROCKSDB_NAMESPACE::Tickers::NO_ITERATORS: return 0x38; case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_CALLS: return 0x39; case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_READ: return 0x3A; case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_BYTES_READ: return 0x3B; case ROCKSDB_NAMESPACE::Tickers::NUMBER_FILTERED_DELETES: return 0x3C; case ROCKSDB_NAMESPACE::Tickers::NUMBER_MERGE_FAILURES: return 0x3D; case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_CHECKED: return 0x3E; case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_USEFUL: return 0x3F; case ROCKSDB_NAMESPACE::Tickers::NUMBER_OF_RESEEKS_IN_ITERATION: return 0x40; case ROCKSDB_NAMESPACE::Tickers::GET_UPDATES_SINCE_CALLS: return 0x41; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_MISS: return 0x42; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_HIT: return 0x43; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_ADD: return 0x44; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_ADD_FAILURES: return 0x45; case ROCKSDB_NAMESPACE::Tickers::WAL_FILE_SYNCED: return 0x46; case ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES: return 0x47; case ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_SELF: return 0x48; case ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_OTHER: return 0x49; case ROCKSDB_NAMESPACE::Tickers::WRITE_TIMEDOUT: return 0x4A; case ROCKSDB_NAMESPACE::Tickers::WRITE_WITH_WAL: return 0x4B; case ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES: return 0x4C; case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES: return 0x4D; case ROCKSDB_NAMESPACE::Tickers::FLUSH_WRITE_BYTES: return 0x4E; case ROCKSDB_NAMESPACE::Tickers::NUMBER_DIRECT_LOAD_TABLE_PROPERTIES: return 0x4F; case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_ACQUIRES: return 0x50; case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_RELEASES: return 0x51; case ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_CLEANUPS: return 0x52; case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_COMPRESSED: return 0x53; case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_DECOMPRESSED: return 0x54; case ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_NOT_COMPRESSED: return 0x55; case ROCKSDB_NAMESPACE::Tickers::MERGE_OPERATION_TOTAL_TIME: return 0x56; case ROCKSDB_NAMESPACE::Tickers::FILTER_OPERATION_TOTAL_TIME: return 0x57; case ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_HIT: return 0x58; case ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_MISS: return 0x59; case ROCKSDB_NAMESPACE::Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES: return 0x5A; case ROCKSDB_NAMESPACE::Tickers::READ_AMP_TOTAL_READ_BYTES: return 0x5B; case ROCKSDB_NAMESPACE::Tickers::NUMBER_RATE_LIMITER_DRAINS: return 0x5C; case ROCKSDB_NAMESPACE::Tickers::NUMBER_ITER_SKIP: return 0x5D; case ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND: return 0x5E; case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED: // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX. return -0x01; case ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED: return 0x60; case ROCKSDB_NAMESPACE::Tickers::COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE: return 0x61; case ROCKSDB_NAMESPACE::Tickers::COMPACTION_CANCELLED: return 0x62; case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_POSITIVE: return 0x63; case ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_TRUE_POSITIVE: return 0x64; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PUT: return 0x65; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_WRITE: return 0x66; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_GET: return 0x67; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_MULTIGET: return 0x68; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_SEEK: return 0x69; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_NEXT: return 0x6A; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PREV: return 0x6B; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_WRITTEN: return 0x6C; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_READ: return 0x6D; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_WRITTEN: return 0x6E; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_READ: return 0x6F; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED: return 0x70; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED_TTL: return 0x71; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB: return 0x72; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB_TTL: return 0x73; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_WRITTEN: return 0x74; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_READ: return 0x75; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_SYNCED: return 0x76; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_COUNT: return 0x77; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_SIZE: return 0x78; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_COUNT: return 0x79; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_SIZE: return 0x7A; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_FILES: return 0x7B; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_NEW_FILES: return 0x7C; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_FAILURES: return 0x7D; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_OVERWRITTEN: return 0x7E; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_EXPIRED: return 0x7F; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_RELOCATED: return -0x02; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_OVERWRITTEN: return -0x03; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_EXPIRED: return -0x04; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_RELOCATED: return -0x05; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_FILES_EVICTED: return -0x06; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_KEYS_EVICTED: return -0x07; case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_BYTES_EVICTED: return -0x08; case ROCKSDB_NAMESPACE::Tickers::TXN_PREPARE_MUTEX_OVERHEAD: return -0x09; case ROCKSDB_NAMESPACE::Tickers::TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD: return -0x0A; case ROCKSDB_NAMESPACE::Tickers::TXN_DUPLICATE_KEY_OVERHEAD: return -0x0B; case ROCKSDB_NAMESPACE::Tickers::TXN_SNAPSHOT_MUTEX_OVERHEAD: return -0x0C; case ROCKSDB_NAMESPACE::Tickers::TXN_GET_TRY_AGAIN: return -0x0D; case ROCKSDB_NAMESPACE::Tickers::FILES_MARKED_TRASH: return -0x0E; case ROCKSDB_NAMESPACE::Tickers::FILES_DELETED_IMMEDIATELY: return -0X0F; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: // 0x5F for backwards compatibility on current minor version. return 0x5F; default: // undefined/default return 0x0; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::Tickers enum for the // provided Java org.rocksdb.TickerType static ROCKSDB_NAMESPACE::Tickers toCppTickers(jbyte jticker_type) { switch(jticker_type) { case 0x0: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS; case 0x1: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_HIT; case 0x2: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD; case 0x3: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_ADD_FAILURES; case 0x4: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_MISS; case 0x5: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_HIT; case 0x6: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_ADD; case 0x7: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_INSERT; case 0x8: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_INDEX_BYTES_EVICT; case 0x9: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_MISS; case 0xA: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_HIT; case 0xB: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_ADD; case 0xC: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_INSERT; case 0xD: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_FILTER_BYTES_EVICT; case 0xE: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_MISS; case 0xF: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_HIT; case 0x10: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_ADD; case 0x11: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_DATA_BYTES_INSERT; case 0x12: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_READ; case 0x13: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_BYTES_WRITE; case 0x14: return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_USEFUL; case 0x15: return ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_HIT; case 0x16: return ROCKSDB_NAMESPACE::Tickers::PERSISTENT_CACHE_MISS; case 0x17: return ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_HIT; case 0x18: return ROCKSDB_NAMESPACE::Tickers::SIM_BLOCK_CACHE_MISS; case 0x19: return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_HIT; case 0x1A: return ROCKSDB_NAMESPACE::Tickers::MEMTABLE_MISS; case 0x1B: return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L0; case 0x1C: return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L1; case 0x1D: return ROCKSDB_NAMESPACE::Tickers::GET_HIT_L2_AND_UP; case 0x1E: return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_NEWER_ENTRY; case 0x1F: return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_OBSOLETE; case 0x20: return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_RANGE_DEL; case 0x21: return ROCKSDB_NAMESPACE::Tickers::COMPACTION_KEY_DROP_USER; case 0x22: return ROCKSDB_NAMESPACE::Tickers::COMPACTION_RANGE_DEL_DROP_OBSOLETE; case 0x23: return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_WRITTEN; case 0x24: return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_READ; case 0x25: return ROCKSDB_NAMESPACE::Tickers::NUMBER_KEYS_UPDATED; case 0x26: return ROCKSDB_NAMESPACE::Tickers::BYTES_WRITTEN; case 0x27: return ROCKSDB_NAMESPACE::Tickers::BYTES_READ; case 0x28: return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK; case 0x29: return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT; case 0x2A: return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV; case 0x2B: return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_SEEK_FOUND; case 0x2C: return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_NEXT_FOUND; case 0x2D: return ROCKSDB_NAMESPACE::Tickers::NUMBER_DB_PREV_FOUND; case 0x2E: return ROCKSDB_NAMESPACE::Tickers::ITER_BYTES_READ; case 0x2F: return ROCKSDB_NAMESPACE::Tickers::NO_FILE_CLOSES; case 0x30: return ROCKSDB_NAMESPACE::Tickers::NO_FILE_OPENS; case 0x31: return ROCKSDB_NAMESPACE::Tickers::NO_FILE_ERRORS; case 0x32: return ROCKSDB_NAMESPACE::Tickers::STALL_L0_SLOWDOWN_MICROS; case 0x33: return ROCKSDB_NAMESPACE::Tickers::STALL_MEMTABLE_COMPACTION_MICROS; case 0x34: return ROCKSDB_NAMESPACE::Tickers::STALL_L0_NUM_FILES_MICROS; case 0x35: return ROCKSDB_NAMESPACE::Tickers::STALL_MICROS; case 0x36: return ROCKSDB_NAMESPACE::Tickers::DB_MUTEX_WAIT_MICROS; case 0x37: return ROCKSDB_NAMESPACE::Tickers::RATE_LIMIT_DELAY_MILLIS; case 0x38: return ROCKSDB_NAMESPACE::Tickers::NO_ITERATORS; case 0x39: return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_CALLS; case 0x3A: return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_READ; case 0x3B: return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_BYTES_READ; case 0x3C: return ROCKSDB_NAMESPACE::Tickers::NUMBER_FILTERED_DELETES; case 0x3D: return ROCKSDB_NAMESPACE::Tickers::NUMBER_MERGE_FAILURES; case 0x3E: return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_CHECKED; case 0x3F: return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_PREFIX_USEFUL; case 0x40: return ROCKSDB_NAMESPACE::Tickers::NUMBER_OF_RESEEKS_IN_ITERATION; case 0x41: return ROCKSDB_NAMESPACE::Tickers::GET_UPDATES_SINCE_CALLS; case 0x42: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_MISS; case 0x43: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_HIT; case 0x44: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_ADD; case 0x45: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_COMPRESSED_ADD_FAILURES; case 0x46: return ROCKSDB_NAMESPACE::Tickers::WAL_FILE_SYNCED; case 0x47: return ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES; case 0x48: return ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_SELF; case 0x49: return ROCKSDB_NAMESPACE::Tickers::WRITE_DONE_BY_OTHER; case 0x4A: return ROCKSDB_NAMESPACE::Tickers::WRITE_TIMEDOUT; case 0x4B: return ROCKSDB_NAMESPACE::Tickers::WRITE_WITH_WAL; case 0x4C: return ROCKSDB_NAMESPACE::Tickers::COMPACT_READ_BYTES; case 0x4D: return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES; case 0x4E: return ROCKSDB_NAMESPACE::Tickers::FLUSH_WRITE_BYTES; case 0x4F: return ROCKSDB_NAMESPACE::Tickers::NUMBER_DIRECT_LOAD_TABLE_PROPERTIES; case 0x50: return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_ACQUIRES; case 0x51: return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_RELEASES; case 0x52: return ROCKSDB_NAMESPACE::Tickers::NUMBER_SUPERVERSION_CLEANUPS; case 0x53: return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_COMPRESSED; case 0x54: return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_DECOMPRESSED; case 0x55: return ROCKSDB_NAMESPACE::Tickers::NUMBER_BLOCK_NOT_COMPRESSED; case 0x56: return ROCKSDB_NAMESPACE::Tickers::MERGE_OPERATION_TOTAL_TIME; case 0x57: return ROCKSDB_NAMESPACE::Tickers::FILTER_OPERATION_TOTAL_TIME; case 0x58: return ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_HIT; case 0x59: return ROCKSDB_NAMESPACE::Tickers::ROW_CACHE_MISS; case 0x5A: return ROCKSDB_NAMESPACE::Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES; case 0x5B: return ROCKSDB_NAMESPACE::Tickers::READ_AMP_TOTAL_READ_BYTES; case 0x5C: return ROCKSDB_NAMESPACE::Tickers::NUMBER_RATE_LIMITER_DRAINS; case 0x5D: return ROCKSDB_NAMESPACE::Tickers::NUMBER_ITER_SKIP; case 0x5E: return ROCKSDB_NAMESPACE::Tickers::NUMBER_MULTIGET_KEYS_FOUND; case -0x01: // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX. return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_CREATED; case 0x60: return ROCKSDB_NAMESPACE::Tickers::NO_ITERATOR_DELETED; case 0x61: return ROCKSDB_NAMESPACE::Tickers:: COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE; case 0x62: return ROCKSDB_NAMESPACE::Tickers::COMPACTION_CANCELLED; case 0x63: return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_POSITIVE; case 0x64: return ROCKSDB_NAMESPACE::Tickers::BLOOM_FILTER_FULL_TRUE_POSITIVE; case 0x65: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PUT; case 0x66: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_WRITE; case 0x67: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_GET; case 0x68: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_MULTIGET; case 0x69: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_SEEK; case 0x6A: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_NEXT; case 0x6B: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_PREV; case 0x6C: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_WRITTEN; case 0x6D: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_NUM_KEYS_READ; case 0x6E: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_WRITTEN; case 0x6F: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BYTES_READ; case 0x70: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED; case 0x71: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_INLINED_TTL; case 0x72: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB; case 0x73: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_WRITE_BLOB_TTL; case 0x74: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_WRITTEN; case 0x75: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_BYTES_READ; case 0x76: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_FILE_SYNCED; case 0x77: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_COUNT; case 0x78: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EXPIRED_SIZE; case 0x79: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_COUNT; case 0x7A: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_BLOB_INDEX_EVICTED_SIZE; case 0x7B: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_FILES; case 0x7C: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_NEW_FILES; case 0x7D: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_FAILURES; case 0x7E: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_OVERWRITTEN; case 0x7F: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_EXPIRED; case -0x02: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_NUM_KEYS_RELOCATED; case -0x03: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_OVERWRITTEN; case -0x04: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_EXPIRED; case -0x05: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_GC_BYTES_RELOCATED; case -0x06: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_FILES_EVICTED; case -0x07: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_NUM_KEYS_EVICTED; case -0x08: return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_FIFO_BYTES_EVICTED; case -0x09: return ROCKSDB_NAMESPACE::Tickers::TXN_PREPARE_MUTEX_OVERHEAD; case -0x0A: return ROCKSDB_NAMESPACE::Tickers::TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD; case -0x0B: return ROCKSDB_NAMESPACE::Tickers::TXN_DUPLICATE_KEY_OVERHEAD; case -0x0C: return ROCKSDB_NAMESPACE::Tickers::TXN_SNAPSHOT_MUTEX_OVERHEAD; case -0x0D: return ROCKSDB_NAMESPACE::Tickers::TXN_GET_TRY_AGAIN; case -0x0E: return ROCKSDB_NAMESPACE::Tickers::FILES_MARKED_TRASH; case -0x0F: return ROCKSDB_NAMESPACE::Tickers::FILES_DELETED_IMMEDIATELY; case 0x5F: // 0x5F for backwards compatibility on current minor version. return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX; default: // undefined/default return ROCKSDB_NAMESPACE::Tickers::BLOCK_CACHE_MISS; } } }; // The portal class for org.rocksdb.HistogramType class HistogramTypeJni { public: // Returns the equivalent org.rocksdb.HistogramType for the provided // C++ ROCKSDB_NAMESPACE::Histograms enum static jbyte toJavaHistogramsType( const ROCKSDB_NAMESPACE::Histograms& histograms) { switch(histograms) { case ROCKSDB_NAMESPACE::Histograms::DB_GET: return 0x0; case ROCKSDB_NAMESPACE::Histograms::DB_WRITE: return 0x1; case ROCKSDB_NAMESPACE::Histograms::COMPACTION_TIME: return 0x2; case ROCKSDB_NAMESPACE::Histograms::SUBCOMPACTION_SETUP_TIME: return 0x3; case ROCKSDB_NAMESPACE::Histograms::TABLE_SYNC_MICROS: return 0x4; case ROCKSDB_NAMESPACE::Histograms::COMPACTION_OUTFILE_SYNC_MICROS: return 0x5; case ROCKSDB_NAMESPACE::Histograms::WAL_FILE_SYNC_MICROS: return 0x6; case ROCKSDB_NAMESPACE::Histograms::MANIFEST_FILE_SYNC_MICROS: return 0x7; case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_IO_MICROS: return 0x8; case ROCKSDB_NAMESPACE::Histograms::DB_MULTIGET: return 0x9; case ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_COMPACTION_MICROS: return 0xA; case ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_GET_MICROS: return 0xB; case ROCKSDB_NAMESPACE::Histograms::WRITE_RAW_BLOCK_MICROS: return 0xC; case ROCKSDB_NAMESPACE::Histograms::STALL_L0_SLOWDOWN_COUNT: return 0xD; case ROCKSDB_NAMESPACE::Histograms::STALL_MEMTABLE_COMPACTION_COUNT: return 0xE; case ROCKSDB_NAMESPACE::Histograms::STALL_L0_NUM_FILES_COUNT: return 0xF; case ROCKSDB_NAMESPACE::Histograms::HARD_RATE_LIMIT_DELAY_COUNT: return 0x10; case ROCKSDB_NAMESPACE::Histograms::SOFT_RATE_LIMIT_DELAY_COUNT: return 0x11; case ROCKSDB_NAMESPACE::Histograms::NUM_FILES_IN_SINGLE_COMPACTION: return 0x12; case ROCKSDB_NAMESPACE::Histograms::DB_SEEK: return 0x13; case ROCKSDB_NAMESPACE::Histograms::WRITE_STALL: return 0x14; case ROCKSDB_NAMESPACE::Histograms::SST_READ_MICROS: return 0x15; case ROCKSDB_NAMESPACE::Histograms::NUM_SUBCOMPACTIONS_SCHEDULED: return 0x16; case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_READ: return 0x17; case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_WRITE: return 0x18; case ROCKSDB_NAMESPACE::Histograms::BYTES_PER_MULTIGET: return 0x19; case ROCKSDB_NAMESPACE::Histograms::BYTES_COMPRESSED: return 0x1A; case ROCKSDB_NAMESPACE::Histograms::BYTES_DECOMPRESSED: return 0x1B; case ROCKSDB_NAMESPACE::Histograms::COMPRESSION_TIMES_NANOS: return 0x1C; case ROCKSDB_NAMESPACE::Histograms::DECOMPRESSION_TIMES_NANOS: return 0x1D; case ROCKSDB_NAMESPACE::Histograms::READ_NUM_MERGE_OPERANDS: return 0x1E; // 0x20 to skip 0x1F so TICKER_ENUM_MAX remains unchanged for minor version compatibility. case ROCKSDB_NAMESPACE::Histograms::FLUSH_TIME: return 0x20; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_KEY_SIZE: return 0x21; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_VALUE_SIZE: return 0x22; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_WRITE_MICROS: return 0x23; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GET_MICROS: return 0x24; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_MULTIGET_MICROS: return 0x25; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_SEEK_MICROS: return 0x26; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_NEXT_MICROS: return 0x27; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_PREV_MICROS: return 0x28; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS: return 0x29; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_READ_MICROS: return 0x2A; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_SYNC_MICROS: return 0x2B; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GC_MICROS: return 0x2C; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS: return 0x2D; case ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS: return 0x2E; case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: // 0x1F for backwards compatibility on current minor version. return 0x1F; default: // undefined/default return 0x0; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::Histograms enum for the // provided Java org.rocksdb.HistogramsType static ROCKSDB_NAMESPACE::Histograms toCppHistograms(jbyte jhistograms_type) { switch(jhistograms_type) { case 0x0: return ROCKSDB_NAMESPACE::Histograms::DB_GET; case 0x1: return ROCKSDB_NAMESPACE::Histograms::DB_WRITE; case 0x2: return ROCKSDB_NAMESPACE::Histograms::COMPACTION_TIME; case 0x3: return ROCKSDB_NAMESPACE::Histograms::SUBCOMPACTION_SETUP_TIME; case 0x4: return ROCKSDB_NAMESPACE::Histograms::TABLE_SYNC_MICROS; case 0x5: return ROCKSDB_NAMESPACE::Histograms::COMPACTION_OUTFILE_SYNC_MICROS; case 0x6: return ROCKSDB_NAMESPACE::Histograms::WAL_FILE_SYNC_MICROS; case 0x7: return ROCKSDB_NAMESPACE::Histograms::MANIFEST_FILE_SYNC_MICROS; case 0x8: return ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_IO_MICROS; case 0x9: return ROCKSDB_NAMESPACE::Histograms::DB_MULTIGET; case 0xA: return ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_COMPACTION_MICROS; case 0xB: return ROCKSDB_NAMESPACE::Histograms::READ_BLOCK_GET_MICROS; case 0xC: return ROCKSDB_NAMESPACE::Histograms::WRITE_RAW_BLOCK_MICROS; case 0xD: return ROCKSDB_NAMESPACE::Histograms::STALL_L0_SLOWDOWN_COUNT; case 0xE: return ROCKSDB_NAMESPACE::Histograms::STALL_MEMTABLE_COMPACTION_COUNT; case 0xF: return ROCKSDB_NAMESPACE::Histograms::STALL_L0_NUM_FILES_COUNT; case 0x10: return ROCKSDB_NAMESPACE::Histograms::HARD_RATE_LIMIT_DELAY_COUNT; case 0x11: return ROCKSDB_NAMESPACE::Histograms::SOFT_RATE_LIMIT_DELAY_COUNT; case 0x12: return ROCKSDB_NAMESPACE::Histograms::NUM_FILES_IN_SINGLE_COMPACTION; case 0x13: return ROCKSDB_NAMESPACE::Histograms::DB_SEEK; case 0x14: return ROCKSDB_NAMESPACE::Histograms::WRITE_STALL; case 0x15: return ROCKSDB_NAMESPACE::Histograms::SST_READ_MICROS; case 0x16: return ROCKSDB_NAMESPACE::Histograms::NUM_SUBCOMPACTIONS_SCHEDULED; case 0x17: return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_READ; case 0x18: return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_WRITE; case 0x19: return ROCKSDB_NAMESPACE::Histograms::BYTES_PER_MULTIGET; case 0x1A: return ROCKSDB_NAMESPACE::Histograms::BYTES_COMPRESSED; case 0x1B: return ROCKSDB_NAMESPACE::Histograms::BYTES_DECOMPRESSED; case 0x1C: return ROCKSDB_NAMESPACE::Histograms::COMPRESSION_TIMES_NANOS; case 0x1D: return ROCKSDB_NAMESPACE::Histograms::DECOMPRESSION_TIMES_NANOS; case 0x1E: return ROCKSDB_NAMESPACE::Histograms::READ_NUM_MERGE_OPERANDS; // 0x20 to skip 0x1F so TICKER_ENUM_MAX remains unchanged for minor version compatibility. case 0x20: return ROCKSDB_NAMESPACE::Histograms::FLUSH_TIME; case 0x21: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_KEY_SIZE; case 0x22: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_VALUE_SIZE; case 0x23: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_WRITE_MICROS; case 0x24: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GET_MICROS; case 0x25: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_MULTIGET_MICROS; case 0x26: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_SEEK_MICROS; case 0x27: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_NEXT_MICROS; case 0x28: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_PREV_MICROS; case 0x29: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS; case 0x2A: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_READ_MICROS; case 0x2B: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_BLOB_FILE_SYNC_MICROS; case 0x2C: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_GC_MICROS; case 0x2D: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_COMPRESSION_MICROS; case 0x2E: return ROCKSDB_NAMESPACE::Histograms::BLOB_DB_DECOMPRESSION_MICROS; case 0x1F: // 0x1F for backwards compatibility on current minor version. return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; default: // undefined/default return ROCKSDB_NAMESPACE::Histograms::DB_GET; } } }; // The portal class for org.rocksdb.StatsLevel class StatsLevelJni { public: // Returns the equivalent org.rocksdb.StatsLevel for the provided // C++ ROCKSDB_NAMESPACE::StatsLevel enum static jbyte toJavaStatsLevel( const ROCKSDB_NAMESPACE::StatsLevel& stats_level) { switch(stats_level) { case ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers: return 0x0; case ROCKSDB_NAMESPACE::StatsLevel::kExceptTimeForMutex: return 0x1; case ROCKSDB_NAMESPACE::StatsLevel::kAll: return 0x2; default: // undefined/default return 0x0; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::StatsLevel enum for the // provided Java org.rocksdb.StatsLevel static ROCKSDB_NAMESPACE::StatsLevel toCppStatsLevel(jbyte jstats_level) { switch(jstats_level) { case 0x0: return ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers; case 0x1: return ROCKSDB_NAMESPACE::StatsLevel::kExceptTimeForMutex; case 0x2: return ROCKSDB_NAMESPACE::StatsLevel::kAll; default: // undefined/default return ROCKSDB_NAMESPACE::StatsLevel::kExceptDetailedTimers; } } }; // The portal class for org.rocksdb.RateLimiterMode class RateLimiterModeJni { public: // Returns the equivalent org.rocksdb.RateLimiterMode for the provided // C++ ROCKSDB_NAMESPACE::RateLimiter::Mode enum static jbyte toJavaRateLimiterMode( const ROCKSDB_NAMESPACE::RateLimiter::Mode& rate_limiter_mode) { switch(rate_limiter_mode) { case ROCKSDB_NAMESPACE::RateLimiter::Mode::kReadsOnly: return 0x0; case ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly: return 0x1; case ROCKSDB_NAMESPACE::RateLimiter::Mode::kAllIo: return 0x2; default: // undefined/default return 0x1; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::RateLimiter::Mode enum for // the provided Java org.rocksdb.RateLimiterMode static ROCKSDB_NAMESPACE::RateLimiter::Mode toCppRateLimiterMode( jbyte jrate_limiter_mode) { switch(jrate_limiter_mode) { case 0x0: return ROCKSDB_NAMESPACE::RateLimiter::Mode::kReadsOnly; case 0x1: return ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly; case 0x2: return ROCKSDB_NAMESPACE::RateLimiter::Mode::kAllIo; default: // undefined/default return ROCKSDB_NAMESPACE::RateLimiter::Mode::kWritesOnly; } } }; // The portal class for org.rocksdb.MemoryUsageType class MemoryUsageTypeJni { public: // Returns the equivalent org.rocksdb.MemoryUsageType for the provided // C++ ROCKSDB_NAMESPACE::MemoryUtil::UsageType enum static jbyte toJavaMemoryUsageType( const ROCKSDB_NAMESPACE::MemoryUtil::UsageType& usage_type) { switch (usage_type) { case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableTotal: return 0x0; case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableUnFlushed: return 0x1; case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kTableReadersTotal: return 0x2; case ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kCacheTotal: return 0x3; default: // undefined: use kNumUsageTypes return 0x4; } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::MemoryUtil::UsageType enum for // the provided Java org.rocksdb.MemoryUsageType static ROCKSDB_NAMESPACE::MemoryUtil::UsageType toCppMemoryUsageType( jbyte usage_type) { switch (usage_type) { case 0x0: return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableTotal; case 0x1: return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kMemTableUnFlushed; case 0x2: return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kTableReadersTotal; case 0x3: return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kCacheTotal; default: // undefined/default: use kNumUsageTypes return ROCKSDB_NAMESPACE::MemoryUtil::UsageType::kNumUsageTypes; } } }; // The portal class for org.rocksdb.Transaction class TransactionJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.Transaction * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/Transaction"); } /** * Create a new Java org.rocksdb.Transaction.WaitingTransactions object * * @param env A pointer to the Java environment * @param jtransaction A Java org.rocksdb.Transaction object * @param column_family_id The id of the column family * @param key The key * @param transaction_ids The transaction ids * * @return A reference to a Java * org.rocksdb.Transaction.WaitingTransactions object, * or nullptr if an an exception occurs */ static jobject newWaitingTransactions(JNIEnv* env, jobject jtransaction, const uint32_t column_family_id, const std::string &key, const std::vector &transaction_ids) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID( jclazz, "newWaitingTransactions", "(JLjava/lang/String;[J)Lorg/rocksdb/Transaction$WaitingTransactions;"); if(mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jstring jkey = env->NewStringUTF(key.c_str()); if(jkey == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } const size_t len = transaction_ids.size(); jlongArray jtransaction_ids = env->NewLongArray(static_cast(len)); if(jtransaction_ids == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jkey); return nullptr; } jlong *body = env->GetLongArrayElements(jtransaction_ids, nullptr); if(body == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jkey); env->DeleteLocalRef(jtransaction_ids); return nullptr; } for(size_t i = 0; i < len; ++i) { body[i] = static_cast(transaction_ids[i]); } env->ReleaseLongArrayElements(jtransaction_ids, body, 0); jobject jwaiting_transactions = env->CallObjectMethod(jtransaction, mid, static_cast(column_family_id), jkey, jtransaction_ids); if(env->ExceptionCheck()) { // exception thrown: InstantiationException or OutOfMemoryError env->DeleteLocalRef(jkey); env->DeleteLocalRef(jtransaction_ids); return nullptr; } return jwaiting_transactions; } }; // The portal class for org.rocksdb.TransactionDB class TransactionDBJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.TransactionDB * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/TransactionDB"); } /** * Create a new Java org.rocksdb.TransactionDB.DeadlockInfo object * * @param env A pointer to the Java environment * @param jtransaction A Java org.rocksdb.Transaction object * @param column_family_id The id of the column family * @param key The key * @param transaction_ids The transaction ids * * @return A reference to a Java * org.rocksdb.Transaction.WaitingTransactions object, * or nullptr if an an exception occurs */ static jobject newDeadlockInfo( JNIEnv* env, jobject jtransaction_db, const ROCKSDB_NAMESPACE::TransactionID transaction_id, const uint32_t column_family_id, const std::string& waiting_key, const bool exclusive) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID( jclazz, "newDeadlockInfo", "(JJLjava/lang/String;Z)Lorg/rocksdb/TransactionDB$DeadlockInfo;"); if(mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jstring jwaiting_key = env->NewStringUTF(waiting_key.c_str()); if(jwaiting_key == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } // resolve the column family id to a ColumnFamilyHandle jobject jdeadlock_info = env->CallObjectMethod(jtransaction_db, mid, transaction_id, static_cast(column_family_id), jwaiting_key, exclusive); if(env->ExceptionCheck()) { // exception thrown: InstantiationException or OutOfMemoryError env->DeleteLocalRef(jwaiting_key); return nullptr; } return jdeadlock_info; } }; // The portal class for org.rocksdb.TxnDBWritePolicy class TxnDBWritePolicyJni { public: // Returns the equivalent org.rocksdb.TxnDBWritePolicy for the provided // C++ ROCKSDB_NAMESPACE::TxnDBWritePolicy enum static jbyte toJavaTxnDBWritePolicy( const ROCKSDB_NAMESPACE::TxnDBWritePolicy& txndb_write_policy) { switch (txndb_write_policy) { case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED: return 0x0; case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_PREPARED: return 0x1; case ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_UNPREPARED: return 0x2; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::TxnDBWritePolicy enum for the // provided Java org.rocksdb.TxnDBWritePolicy static ROCKSDB_NAMESPACE::TxnDBWritePolicy toCppTxnDBWritePolicy( jbyte jtxndb_write_policy) { switch (jtxndb_write_policy) { case 0x0: return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED; case 0x1: return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_PREPARED; case 0x2: return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_UNPREPARED; default: // undefined/default return ROCKSDB_NAMESPACE::TxnDBWritePolicy::WRITE_COMMITTED; } } }; // The portal class for org.rocksdb.TransactionDB.KeyLockInfo class KeyLockInfoJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.TransactionDB.KeyLockInfo * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$KeyLockInfo"); } /** * Create a new Java org.rocksdb.TransactionDB.KeyLockInfo object * with the same properties as the provided C++ ROCKSDB_NAMESPACE::KeyLockInfo * object * * @param env A pointer to the Java environment * @param key_lock_info The ROCKSDB_NAMESPACE::KeyLockInfo object * * @return A reference to a Java * org.rocksdb.TransactionDB.KeyLockInfo object, * or nullptr if an an exception occurs */ static jobject construct( JNIEnv* env, const ROCKSDB_NAMESPACE::KeyLockInfo& key_lock_info) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID( jclazz, "", "(Ljava/lang/String;[JZ)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jstring jkey = env->NewStringUTF(key_lock_info.key.c_str()); if (jkey == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } const jsize jtransaction_ids_len = static_cast(key_lock_info.ids.size()); jlongArray jtransactions_ids = env->NewLongArray(jtransaction_ids_len); if (jtransactions_ids == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jkey); return nullptr; } const jobject jkey_lock_info = env->NewObject(jclazz, mid, jkey, jtransactions_ids, key_lock_info.exclusive); if(jkey_lock_info == nullptr) { // exception thrown: InstantiationException or OutOfMemoryError env->DeleteLocalRef(jtransactions_ids); env->DeleteLocalRef(jkey); return nullptr; } return jkey_lock_info; } }; // The portal class for org.rocksdb.TransactionDB.DeadlockInfo class DeadlockInfoJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.TransactionDB.DeadlockInfo * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env,"org/rocksdb/TransactionDB$DeadlockInfo"); } }; // The portal class for org.rocksdb.TransactionDB.DeadlockPath class DeadlockPathJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.TransactionDB.DeadlockPath * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$DeadlockPath"); } /** * Create a new Java org.rocksdb.TransactionDB.DeadlockPath object * * @param env A pointer to the Java environment * * @return A reference to a Java * org.rocksdb.TransactionDB.DeadlockPath object, * or nullptr if an an exception occurs */ static jobject construct(JNIEnv* env, const jobjectArray jdeadlock_infos, const bool limit_exceeded) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID( jclazz, "", "([LDeadlockInfo;Z)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } const jobject jdeadlock_path = env->NewObject(jclazz, mid, jdeadlock_infos, limit_exceeded); if(jdeadlock_path == nullptr) { // exception thrown: InstantiationException or OutOfMemoryError return nullptr; } return jdeadlock_path; } }; class AbstractTableFilterJni : public RocksDBNativeClass< const ROCKSDB_NAMESPACE::TableFilterJniCallback*, AbstractTableFilterJni> { public: /** * Get the Java Method: TableFilter#filter(TableProperties) * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getFilterMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "filter", "(Lorg/rocksdb/TableProperties;)Z"); assert(mid != nullptr); return mid; } private: static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/TableFilter"); } }; class TablePropertiesJni : public JavaClass { public: /** * Create a new Java org.rocksdb.TableProperties object. * * @param env A pointer to the Java environment * @param table_properties A Cpp table properties object * * @return A reference to a Java org.rocksdb.TableProperties object, or * nullptr if an an exception occurs */ static jobject fromCppTableProperties( JNIEnv* env, const ROCKSDB_NAMESPACE::TableProperties& table_properties) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(JJJJJJJJJJJJJJJJJJJ[BLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/util/Map;Ljava/util/Map;Ljava/util/Map;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jbyteArray jcolumn_family_name = ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, table_properties.column_family_name); if (jcolumn_family_name == nullptr) { // exception occurred creating java string return nullptr; } jstring jfilter_policy_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &table_properties.filter_policy_name, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); return nullptr; } jstring jcomparator_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &table_properties.comparator_name, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); return nullptr; } jstring jmerge_operator_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &table_properties.merge_operator_name, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); env->DeleteLocalRef(jcomparator_name); return nullptr; } jstring jprefix_extractor_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &table_properties.prefix_extractor_name, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); env->DeleteLocalRef(jcomparator_name); env->DeleteLocalRef(jmerge_operator_name); return nullptr; } jstring jproperty_collectors_names = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &table_properties.property_collectors_names, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); env->DeleteLocalRef(jcomparator_name); env->DeleteLocalRef(jmerge_operator_name); env->DeleteLocalRef(jprefix_extractor_name); return nullptr; } jstring jcompression_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &table_properties.compression_name, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); env->DeleteLocalRef(jcomparator_name); env->DeleteLocalRef(jmerge_operator_name); env->DeleteLocalRef(jprefix_extractor_name); env->DeleteLocalRef(jproperty_collectors_names); return nullptr; } // Map jobject juser_collected_properties = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap( env, &table_properties.user_collected_properties); if (env->ExceptionCheck()) { // exception occurred creating java map env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); env->DeleteLocalRef(jcomparator_name); env->DeleteLocalRef(jmerge_operator_name); env->DeleteLocalRef(jprefix_extractor_name); env->DeleteLocalRef(jproperty_collectors_names); env->DeleteLocalRef(jcompression_name); return nullptr; } // Map jobject jreadable_properties = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap( env, &table_properties.readable_properties); if (env->ExceptionCheck()) { // exception occurred creating java map env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); env->DeleteLocalRef(jcomparator_name); env->DeleteLocalRef(jmerge_operator_name); env->DeleteLocalRef(jprefix_extractor_name); env->DeleteLocalRef(jproperty_collectors_names); env->DeleteLocalRef(jcompression_name); env->DeleteLocalRef(juser_collected_properties); return nullptr; } // Map jobject jproperties_offsets = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap( env, &table_properties.properties_offsets); if (env->ExceptionCheck()) { // exception occurred creating java map env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfilter_policy_name); env->DeleteLocalRef(jcomparator_name); env->DeleteLocalRef(jmerge_operator_name); env->DeleteLocalRef(jprefix_extractor_name); env->DeleteLocalRef(jproperty_collectors_names); env->DeleteLocalRef(jcompression_name); env->DeleteLocalRef(juser_collected_properties); env->DeleteLocalRef(jreadable_properties); return nullptr; } jobject jtable_properties = env->NewObject(jclazz, mid, static_cast(table_properties.data_size), static_cast(table_properties.index_size), static_cast(table_properties.index_partitions), static_cast(table_properties.top_level_index_size), static_cast(table_properties.index_key_is_user_key), static_cast(table_properties.index_value_is_delta_encoded), static_cast(table_properties.filter_size), static_cast(table_properties.raw_key_size), static_cast(table_properties.raw_value_size), static_cast(table_properties.num_data_blocks), static_cast(table_properties.num_entries), static_cast(table_properties.num_deletions), static_cast(table_properties.num_merge_operands), static_cast(table_properties.num_range_deletions), static_cast(table_properties.format_version), static_cast(table_properties.fixed_key_len), static_cast(table_properties.column_family_id), static_cast(table_properties.creation_time), static_cast(table_properties.oldest_key_time), jcolumn_family_name, jfilter_policy_name, jcomparator_name, jmerge_operator_name, jprefix_extractor_name, jproperty_collectors_names, jcompression_name, juser_collected_properties, jreadable_properties, jproperties_offsets ); if (env->ExceptionCheck()) { return nullptr; } return jtable_properties; } private: static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/TableProperties"); } }; class ColumnFamilyDescriptorJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.ColumnFamilyDescriptor * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyDescriptor"); } /** * Create a new Java org.rocksdb.ColumnFamilyDescriptor object with the same * properties as the provided C++ ROCKSDB_NAMESPACE::ColumnFamilyDescriptor * object * * @param env A pointer to the Java environment * @param cfd A pointer to ROCKSDB_NAMESPACE::ColumnFamilyDescriptor object * * @return A reference to a Java org.rocksdb.ColumnFamilyDescriptor object, or * nullptr if an an exception occurs */ static jobject construct(JNIEnv* env, ColumnFamilyDescriptor* cfd) { jbyteArray jcf_name = JniUtil::copyBytes(env, cfd->name); jobject cfopts = ColumnFamilyOptionsJni::construct(env, &(cfd->options)); jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "([BLorg/rocksdb/ColumnFamilyOptions;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError env->DeleteLocalRef(jcf_name); return nullptr; } jobject jcfd = env->NewObject(jclazz, mid, jcf_name, cfopts); if (env->ExceptionCheck()) { env->DeleteLocalRef(jcf_name); return nullptr; } return jcfd; } /** * Get the Java Method: ColumnFamilyDescriptor#columnFamilyName * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getColumnFamilyNameMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID(jclazz, "columnFamilyName", "()[B"); assert(mid != nullptr); return mid; } /** * Get the Java Method: ColumnFamilyDescriptor#columnFamilyOptions * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getColumnFamilyOptionsMethod(JNIEnv* env) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "columnFamilyOptions", "()Lorg/rocksdb/ColumnFamilyOptions;"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.IndexType class IndexTypeJni { public: // Returns the equivalent org.rocksdb.IndexType for the provided // C++ ROCKSDB_NAMESPACE::IndexType enum static jbyte toJavaIndexType( const ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType& index_type) { switch (index_type) { case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::kBinarySearch: return 0x0; case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType::kHashSearch: return 0x1; case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: kTwoLevelIndexSearch: return 0x2; case ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: kBinarySearchWithFirstKey: return 0x3; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::IndexType enum for the // provided Java org.rocksdb.IndexType static ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType toCppIndexType( jbyte jindex_type) { switch (jindex_type) { case 0x0: return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: kBinarySearch; case 0x1: return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: kHashSearch; case 0x2: return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: kTwoLevelIndexSearch; case 0x3: return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: kBinarySearchWithFirstKey; default: // undefined/default return ROCKSDB_NAMESPACE::BlockBasedTableOptions::IndexType:: kBinarySearch; } } }; // The portal class for org.rocksdb.DataBlockIndexType class DataBlockIndexTypeJni { public: // Returns the equivalent org.rocksdb.DataBlockIndexType for the provided // C++ ROCKSDB_NAMESPACE::DataBlockIndexType enum static jbyte toJavaDataBlockIndexType( const ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType& index_type) { switch (index_type) { case ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: kDataBlockBinarySearch: return 0x0; case ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: kDataBlockBinaryAndHash: return 0x1; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::DataBlockIndexType enum for // the provided Java org.rocksdb.DataBlockIndexType static ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType toCppDataBlockIndexType(jbyte jindex_type) { switch (jindex_type) { case 0x0: return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: kDataBlockBinarySearch; case 0x1: return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: kDataBlockBinaryAndHash; default: // undefined/default return ROCKSDB_NAMESPACE::BlockBasedTableOptions::DataBlockIndexType:: kDataBlockBinarySearch; } } }; // The portal class for org.rocksdb.ChecksumType class ChecksumTypeJni { public: // Returns the equivalent org.rocksdb.ChecksumType for the provided // C++ ROCKSDB_NAMESPACE::ChecksumType enum static jbyte toJavaChecksumType( const ROCKSDB_NAMESPACE::ChecksumType& checksum_type) { switch (checksum_type) { case ROCKSDB_NAMESPACE::ChecksumType::kNoChecksum: return 0x0; case ROCKSDB_NAMESPACE::ChecksumType::kCRC32c: return 0x1; case ROCKSDB_NAMESPACE::ChecksumType::kxxHash: return 0x2; case ROCKSDB_NAMESPACE::ChecksumType::kxxHash64: return 0x3; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::ChecksumType enum for the // provided Java org.rocksdb.ChecksumType static ROCKSDB_NAMESPACE::ChecksumType toCppChecksumType( jbyte jchecksum_type) { switch (jchecksum_type) { case 0x0: return ROCKSDB_NAMESPACE::ChecksumType::kNoChecksum; case 0x1: return ROCKSDB_NAMESPACE::ChecksumType::kCRC32c; case 0x2: return ROCKSDB_NAMESPACE::ChecksumType::kxxHash; case 0x3: return ROCKSDB_NAMESPACE::ChecksumType::kxxHash64; default: // undefined/default return ROCKSDB_NAMESPACE::ChecksumType::kCRC32c; } } }; // The portal class for org.rocksdb.Priority class PriorityJni { public: // Returns the equivalent org.rocksdb.Priority for the provided // C++ ROCKSDB_NAMESPACE::Env::Priority enum static jbyte toJavaPriority( const ROCKSDB_NAMESPACE::Env::Priority& priority) { switch (priority) { case ROCKSDB_NAMESPACE::Env::Priority::BOTTOM: return 0x0; case ROCKSDB_NAMESPACE::Env::Priority::LOW: return 0x1; case ROCKSDB_NAMESPACE::Env::Priority::HIGH: return 0x2; case ROCKSDB_NAMESPACE::Env::Priority::TOTAL: return 0x3; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::env::Priority enum for the // provided Java org.rocksdb.Priority static ROCKSDB_NAMESPACE::Env::Priority toCppPriority(jbyte jpriority) { switch (jpriority) { case 0x0: return ROCKSDB_NAMESPACE::Env::Priority::BOTTOM; case 0x1: return ROCKSDB_NAMESPACE::Env::Priority::LOW; case 0x2: return ROCKSDB_NAMESPACE::Env::Priority::HIGH; case 0x3: return ROCKSDB_NAMESPACE::Env::Priority::TOTAL; default: // undefined/default return ROCKSDB_NAMESPACE::Env::Priority::LOW; } } }; // The portal class for org.rocksdb.ThreadType class ThreadTypeJni { public: // Returns the equivalent org.rocksdb.ThreadType for the provided // C++ ROCKSDB_NAMESPACE::ThreadStatus::ThreadType enum static jbyte toJavaThreadType( const ROCKSDB_NAMESPACE::ThreadStatus::ThreadType& thread_type) { switch (thread_type) { case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::HIGH_PRIORITY: return 0x0; case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY: return 0x1; case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::USER: return 0x2; case ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::BOTTOM_PRIORITY: return 0x3; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::ThreadType enum // for the provided Java org.rocksdb.ThreadType static ROCKSDB_NAMESPACE::ThreadStatus::ThreadType toCppThreadType( jbyte jthread_type) { switch (jthread_type) { case 0x0: return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::HIGH_PRIORITY; case 0x1: return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY; case 0x2: return ThreadStatus::ThreadType::USER; case 0x3: return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::BOTTOM_PRIORITY; default: // undefined/default return ROCKSDB_NAMESPACE::ThreadStatus::ThreadType::LOW_PRIORITY; } } }; // The portal class for org.rocksdb.OperationType class OperationTypeJni { public: // Returns the equivalent org.rocksdb.OperationType for the provided // C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationType enum static jbyte toJavaOperationType( const ROCKSDB_NAMESPACE::ThreadStatus::OperationType& operation_type) { switch (operation_type) { case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN: return 0x0; case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_COMPACTION: return 0x1; case ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_FLUSH: return 0x2; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationType // enum for the provided Java org.rocksdb.OperationType static ROCKSDB_NAMESPACE::ThreadStatus::OperationType toCppOperationType( jbyte joperation_type) { switch (joperation_type) { case 0x0: return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN; case 0x1: return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_COMPACTION; case 0x2: return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_FLUSH; default: // undefined/default return ROCKSDB_NAMESPACE::ThreadStatus::OperationType::OP_UNKNOWN; } } }; // The portal class for org.rocksdb.OperationStage class OperationStageJni { public: // Returns the equivalent org.rocksdb.OperationStage for the provided // C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationStage enum static jbyte toJavaOperationStage( const ROCKSDB_NAMESPACE::ThreadStatus::OperationStage& operation_stage) { switch (operation_stage) { case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN: return 0x0; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_FLUSH_RUN: return 0x1; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_FLUSH_WRITE_L0: return 0x2; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_PREPARE: return 0x3; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_RUN: return 0x4; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_PROCESS_KV: return 0x5; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_INSTALL: return 0x6; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_SYNC_FILE: return 0x7; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_PICK_MEMTABLES_TO_FLUSH: return 0x8; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_MEMTABLE_ROLLBACK: return 0x9; case ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS: return 0xA; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::OperationStage // enum for the provided Java org.rocksdb.OperationStage static ROCKSDB_NAMESPACE::ThreadStatus::OperationStage toCppOperationStage( jbyte joperation_stage) { switch (joperation_stage) { case 0x0: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN; case 0x1: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_FLUSH_RUN; case 0x2: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_FLUSH_WRITE_L0; case 0x3: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_PREPARE; case 0x4: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_RUN; case 0x5: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_PROCESS_KV; case 0x6: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_INSTALL; case 0x7: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_COMPACTION_SYNC_FILE; case 0x8: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_PICK_MEMTABLES_TO_FLUSH; case 0x9: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_MEMTABLE_ROLLBACK; case 0xA: return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage:: STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS; default: // undefined/default return ROCKSDB_NAMESPACE::ThreadStatus::OperationStage::STAGE_UNKNOWN; } } }; // The portal class for org.rocksdb.StateType class StateTypeJni { public: // Returns the equivalent org.rocksdb.StateType for the provided // C++ ROCKSDB_NAMESPACE::ThreadStatus::StateType enum static jbyte toJavaStateType( const ROCKSDB_NAMESPACE::ThreadStatus::StateType& state_type) { switch (state_type) { case ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN: return 0x0; case ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_MUTEX_WAIT: return 0x1; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::ThreadStatus::StateType enum // for the provided Java org.rocksdb.StateType static ROCKSDB_NAMESPACE::ThreadStatus::StateType toCppStateType( jbyte jstate_type) { switch (jstate_type) { case 0x0: return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN; case 0x1: return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_MUTEX_WAIT; default: // undefined/default return ROCKSDB_NAMESPACE::ThreadStatus::StateType::STATE_UNKNOWN; } } }; // The portal class for org.rocksdb.ThreadStatus class ThreadStatusJni : public JavaClass { public: /** * Get the Java Class org.rocksdb.ThreadStatus * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/ThreadStatus"); } /** * Create a new Java org.rocksdb.ThreadStatus object with the same * properties as the provided C++ ROCKSDB_NAMESPACE::ThreadStatus object * * @param env A pointer to the Java environment * @param thread_status A pointer to ROCKSDB_NAMESPACE::ThreadStatus object * * @return A reference to a Java org.rocksdb.ColumnFamilyOptions object, or * nullptr if an an exception occurs */ static jobject construct( JNIEnv* env, const ROCKSDB_NAMESPACE::ThreadStatus* thread_status) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(JBLjava/lang/String;Ljava/lang/String;BJB[JB)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jstring jdb_name = JniUtil::toJavaString(env, &(thread_status->db_name), true); if (env->ExceptionCheck()) { // an error occurred return nullptr; } jstring jcf_name = JniUtil::toJavaString(env, &(thread_status->cf_name), true); if (env->ExceptionCheck()) { // an error occurred env->DeleteLocalRef(jdb_name); return nullptr; } // long[] const jsize len = static_cast( ROCKSDB_NAMESPACE::ThreadStatus::kNumOperationProperties); jlongArray joperation_properties = env->NewLongArray(len); if (joperation_properties == nullptr) { // an exception occurred env->DeleteLocalRef(jdb_name); env->DeleteLocalRef(jcf_name); return nullptr; } jlong *body = env->GetLongArrayElements(joperation_properties, nullptr); if (body == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jdb_name); env->DeleteLocalRef(jcf_name); env->DeleteLocalRef(joperation_properties); return nullptr; } for (size_t i = 0; i < len; ++i) { body[i] = static_cast(thread_status->op_properties[i]); } env->ReleaseLongArrayElements(joperation_properties, body, 0); jobject jcfd = env->NewObject(jclazz, mid, static_cast(thread_status->thread_id), ThreadTypeJni::toJavaThreadType(thread_status->thread_type), jdb_name, jcf_name, OperationTypeJni::toJavaOperationType(thread_status->operation_type), static_cast(thread_status->op_elapsed_micros), OperationStageJni::toJavaOperationStage(thread_status->operation_stage), joperation_properties, StateTypeJni::toJavaStateType(thread_status->state_type)); if (env->ExceptionCheck()) { // exception occurred env->DeleteLocalRef(jdb_name); env->DeleteLocalRef(jcf_name); env->DeleteLocalRef(joperation_properties); return nullptr; } // cleanup env->DeleteLocalRef(jdb_name); env->DeleteLocalRef(jcf_name); env->DeleteLocalRef(joperation_properties); return jcfd; } }; // The portal class for org.rocksdb.CompactionStyle class CompactionStyleJni { public: // Returns the equivalent org.rocksdb.CompactionStyle for the provided // C++ ROCKSDB_NAMESPACE::CompactionStyle enum static jbyte toJavaCompactionStyle( const ROCKSDB_NAMESPACE::CompactionStyle& compaction_style) { switch (compaction_style) { case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel: return 0x0; case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleUniversal: return 0x1; case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO: return 0x2; case ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone: return 0x3; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionStyle enum for the // provided Java org.rocksdb.CompactionStyle static ROCKSDB_NAMESPACE::CompactionStyle toCppCompactionStyle( jbyte jcompaction_style) { switch (jcompaction_style) { case 0x0: return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel; case 0x1: return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleUniversal; case 0x2: return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO; case 0x3: return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone; default: // undefined/default return ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleLevel; } } }; // The portal class for org.rocksdb.CompactionReason class CompactionReasonJni { public: // Returns the equivalent org.rocksdb.CompactionReason for the provided // C++ ROCKSDB_NAMESPACE::CompactionReason enum static jbyte toJavaCompactionReason( const ROCKSDB_NAMESPACE::CompactionReason& compaction_reason) { switch (compaction_reason) { case ROCKSDB_NAMESPACE::CompactionReason::kUnknown: return 0x0; case ROCKSDB_NAMESPACE::CompactionReason::kLevelL0FilesNum: return 0x1; case ROCKSDB_NAMESPACE::CompactionReason::kLevelMaxLevelSize: return 0x2; case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeAmplification: return 0x3; case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeRatio: return 0x4; case ROCKSDB_NAMESPACE::CompactionReason::kUniversalSortedRunNum: return 0x5; case ROCKSDB_NAMESPACE::CompactionReason::kFIFOMaxSize: return 0x6; case ROCKSDB_NAMESPACE::CompactionReason::kFIFOReduceNumFiles: return 0x7; case ROCKSDB_NAMESPACE::CompactionReason::kFIFOTtl: return 0x8; case ROCKSDB_NAMESPACE::CompactionReason::kManualCompaction: return 0x9; case ROCKSDB_NAMESPACE::CompactionReason::kFilesMarkedForCompaction: return 0x10; case ROCKSDB_NAMESPACE::CompactionReason::kBottommostFiles: return 0x0A; case ROCKSDB_NAMESPACE::CompactionReason::kTtl: return 0x0B; case ROCKSDB_NAMESPACE::CompactionReason::kFlush: return 0x0C; case ROCKSDB_NAMESPACE::CompactionReason::kExternalSstIngestion: return 0x0D; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::CompactionReason enum for the // provided Java org.rocksdb.CompactionReason static ROCKSDB_NAMESPACE::CompactionReason toCppCompactionReason( jbyte jcompaction_reason) { switch (jcompaction_reason) { case 0x0: return ROCKSDB_NAMESPACE::CompactionReason::kUnknown; case 0x1: return ROCKSDB_NAMESPACE::CompactionReason::kLevelL0FilesNum; case 0x2: return ROCKSDB_NAMESPACE::CompactionReason::kLevelMaxLevelSize; case 0x3: return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeAmplification; case 0x4: return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSizeRatio; case 0x5: return ROCKSDB_NAMESPACE::CompactionReason::kUniversalSortedRunNum; case 0x6: return ROCKSDB_NAMESPACE::CompactionReason::kFIFOMaxSize; case 0x7: return ROCKSDB_NAMESPACE::CompactionReason::kFIFOReduceNumFiles; case 0x8: return ROCKSDB_NAMESPACE::CompactionReason::kFIFOTtl; case 0x9: return ROCKSDB_NAMESPACE::CompactionReason::kManualCompaction; case 0x10: return ROCKSDB_NAMESPACE::CompactionReason::kFilesMarkedForCompaction; case 0x0A: return ROCKSDB_NAMESPACE::CompactionReason::kBottommostFiles; case 0x0B: return ROCKSDB_NAMESPACE::CompactionReason::kTtl; case 0x0C: return ROCKSDB_NAMESPACE::CompactionReason::kFlush; case 0x0D: return ROCKSDB_NAMESPACE::CompactionReason::kExternalSstIngestion; default: // undefined/default return ROCKSDB_NAMESPACE::CompactionReason::kUnknown; } } }; // The portal class for org.rocksdb.WalFileType class WalFileTypeJni { public: // Returns the equivalent org.rocksdb.WalFileType for the provided // C++ ROCKSDB_NAMESPACE::WalFileType enum static jbyte toJavaWalFileType( const ROCKSDB_NAMESPACE::WalFileType& wal_file_type) { switch (wal_file_type) { case ROCKSDB_NAMESPACE::WalFileType::kArchivedLogFile: return 0x0; case ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile: return 0x1; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::WalFileType enum for the // provided Java org.rocksdb.WalFileType static ROCKSDB_NAMESPACE::WalFileType toCppWalFileType(jbyte jwal_file_type) { switch (jwal_file_type) { case 0x0: return ROCKSDB_NAMESPACE::WalFileType::kArchivedLogFile; case 0x1: return ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile; default: // undefined/default return ROCKSDB_NAMESPACE::WalFileType::kAliveLogFile; } } }; class LogFileJni : public JavaClass { public: /** * Create a new Java org.rocksdb.LogFile object. * * @param env A pointer to the Java environment * @param log_file A Cpp log file object * * @return A reference to a Java org.rocksdb.LogFile object, or * nullptr if an an exception occurs */ static jobject fromCppLogFile(JNIEnv* env, ROCKSDB_NAMESPACE::LogFile* log_file) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(Ljava/lang/String;JBJJ)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } std::string path_name = log_file->PathName(); jstring jpath_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &path_name, true); if (env->ExceptionCheck()) { // exception occurred creating java string return nullptr; } jobject jlog_file = env->NewObject( jclazz, mid, jpath_name, static_cast(log_file->LogNumber()), ROCKSDB_NAMESPACE::WalFileTypeJni::toJavaWalFileType(log_file->Type()), static_cast(log_file->StartSequence()), static_cast(log_file->SizeFileBytes())); if (env->ExceptionCheck()) { env->DeleteLocalRef(jpath_name); return nullptr; } // cleanup env->DeleteLocalRef(jpath_name); return jlog_file; } static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/LogFile"); } }; class LiveFileMetaDataJni : public JavaClass { public: /** * Create a new Java org.rocksdb.LiveFileMetaData object. * * @param env A pointer to the Java environment * @param live_file_meta_data A Cpp live file meta data object * * @return A reference to a Java org.rocksdb.LiveFileMetaData object, or * nullptr if an an exception occurs */ static jobject fromCppLiveFileMetaData( JNIEnv* env, ROCKSDB_NAMESPACE::LiveFileMetaData* live_file_meta_data) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "([BILjava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jbyteArray jcolumn_family_name = ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, live_file_meta_data->column_family_name); if (jcolumn_family_name == nullptr) { // exception occurred creating java byte array return nullptr; } jstring jfile_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &live_file_meta_data->name, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); return nullptr; } jstring jpath = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &live_file_meta_data->db_path, true); if (env->ExceptionCheck()) { // exception occurred creating java string env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfile_name); return nullptr; } jbyteArray jsmallest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, live_file_meta_data->smallestkey); if (jsmallest_key == nullptr) { // exception occurred creating java byte array env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); return nullptr; } jbyteArray jlargest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, live_file_meta_data->largestkey); if (jlargest_key == nullptr) { // exception occurred creating java byte array env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); return nullptr; } jobject jlive_file_meta_data = env->NewObject(jclazz, mid, jcolumn_family_name, static_cast(live_file_meta_data->level), jfile_name, jpath, static_cast(live_file_meta_data->size), static_cast(live_file_meta_data->smallest_seqno), static_cast(live_file_meta_data->largest_seqno), jsmallest_key, jlargest_key, static_cast(live_file_meta_data->num_reads_sampled), static_cast(live_file_meta_data->being_compacted), static_cast(live_file_meta_data->num_entries), static_cast(live_file_meta_data->num_deletions) ); if (env->ExceptionCheck()) { env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); return nullptr; } // cleanup env->DeleteLocalRef(jcolumn_family_name); env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); return jlive_file_meta_data; } static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/LiveFileMetaData"); } }; class SstFileMetaDataJni : public JavaClass { public: /** * Create a new Java org.rocksdb.SstFileMetaData object. * * @param env A pointer to the Java environment * @param sst_file_meta_data A Cpp sst file meta data object * * @return A reference to a Java org.rocksdb.SstFileMetaData object, or * nullptr if an an exception occurs */ static jobject fromCppSstFileMetaData( JNIEnv* env, const ROCKSDB_NAMESPACE::SstFileMetaData* sst_file_meta_data) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(Ljava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jstring jfile_name = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &sst_file_meta_data->name, true); if (jfile_name == nullptr) { // exception occurred creating java byte array return nullptr; } jstring jpath = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &sst_file_meta_data->db_path, true); if (jpath == nullptr) { // exception occurred creating java byte array env->DeleteLocalRef(jfile_name); return nullptr; } jbyteArray jsmallest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, sst_file_meta_data->smallestkey); if (jsmallest_key == nullptr) { // exception occurred creating java byte array env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); return nullptr; } jbyteArray jlargest_key = ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, sst_file_meta_data->largestkey); if (jlargest_key == nullptr) { // exception occurred creating java byte array env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); return nullptr; } jobject jsst_file_meta_data = env->NewObject(jclazz, mid, jfile_name, jpath, static_cast(sst_file_meta_data->size), static_cast(sst_file_meta_data->smallest_seqno), static_cast(sst_file_meta_data->largest_seqno), jsmallest_key, jlargest_key, static_cast(sst_file_meta_data->num_reads_sampled), static_cast(sst_file_meta_data->being_compacted), static_cast(sst_file_meta_data->num_entries), static_cast(sst_file_meta_data->num_deletions) ); if (env->ExceptionCheck()) { env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); return nullptr; } // cleanup env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); return jsst_file_meta_data; } static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/SstFileMetaData"); } }; class LevelMetaDataJni : public JavaClass { public: /** * Create a new Java org.rocksdb.LevelMetaData object. * * @param env A pointer to the Java environment * @param level_meta_data A Cpp level meta data object * * @return A reference to a Java org.rocksdb.LevelMetaData object, or * nullptr if an an exception occurs */ static jobject fromCppLevelMetaData( JNIEnv* env, const ROCKSDB_NAMESPACE::LevelMetaData* level_meta_data) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(IJ[Lorg/rocksdb/SstFileMetaData;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } const jsize jlen = static_cast(level_meta_data->files.size()); jobjectArray jfiles = env->NewObjectArray(jlen, SstFileMetaDataJni::getJClass(env), nullptr); if (jfiles == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } jsize i = 0; for (auto it = level_meta_data->files.begin(); it != level_meta_data->files.end(); ++it) { jobject jfile = SstFileMetaDataJni::fromCppSstFileMetaData(env, &(*it)); if (jfile == nullptr) { // exception occurred env->DeleteLocalRef(jfiles); return nullptr; } env->SetObjectArrayElement(jfiles, i++, jfile); } jobject jlevel_meta_data = env->NewObject(jclazz, mid, static_cast(level_meta_data->level), static_cast(level_meta_data->size), jfiles ); if (env->ExceptionCheck()) { env->DeleteLocalRef(jfiles); return nullptr; } // cleanup env->DeleteLocalRef(jfiles); return jlevel_meta_data; } static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/LevelMetaData"); } }; class ColumnFamilyMetaDataJni : public JavaClass { public: /** * Create a new Java org.rocksdb.ColumnFamilyMetaData object. * * @param env A pointer to the Java environment * @param column_famly_meta_data A Cpp live file meta data object * * @return A reference to a Java org.rocksdb.ColumnFamilyMetaData object, or * nullptr if an an exception occurs */ static jobject fromCppColumnFamilyMetaData( JNIEnv* env, const ROCKSDB_NAMESPACE::ColumnFamilyMetaData* column_famly_meta_data) { jclass jclazz = getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = env->GetMethodID(jclazz, "", "(JJ[B[Lorg/rocksdb/LevelMetaData;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; } jbyteArray jname = ROCKSDB_NAMESPACE::JniUtil::copyBytes( env, column_famly_meta_data->name); if (jname == nullptr) { // exception occurred creating java byte array return nullptr; } const jsize jlen = static_cast(column_famly_meta_data->levels.size()); jobjectArray jlevels = env->NewObjectArray(jlen, LevelMetaDataJni::getJClass(env), nullptr); if(jlevels == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jname); return nullptr; } jsize i = 0; for (auto it = column_famly_meta_data->levels.begin(); it != column_famly_meta_data->levels.end(); ++it) { jobject jlevel = LevelMetaDataJni::fromCppLevelMetaData(env, &(*it)); if (jlevel == nullptr) { // exception occurred env->DeleteLocalRef(jname); env->DeleteLocalRef(jlevels); return nullptr; } env->SetObjectArrayElement(jlevels, i++, jlevel); } jobject jcolumn_family_meta_data = env->NewObject(jclazz, mid, static_cast(column_famly_meta_data->size), static_cast(column_famly_meta_data->file_count), jname, jlevels ); if (env->ExceptionCheck()) { env->DeleteLocalRef(jname); env->DeleteLocalRef(jlevels); return nullptr; } // cleanup env->DeleteLocalRef(jname); env->DeleteLocalRef(jlevels); return jcolumn_family_meta_data; } static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyMetaData"); } }; // The portal class for org.rocksdb.AbstractTraceWriter class AbstractTraceWriterJni : public RocksDBNativeClass< const ROCKSDB_NAMESPACE::TraceWriterJniCallback*, AbstractTraceWriterJni> { public: /** * Get the Java Class org.rocksdb.AbstractTraceWriter * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractTraceWriter"); } /** * Get the Java Method: AbstractTraceWriter#write * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getWriteProxyMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "writeProxy", "(J)S"); assert(mid != nullptr); return mid; } /** * Get the Java Method: AbstractTraceWriter#closeWriter * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getCloseWriterProxyMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "closeWriterProxy", "()S"); assert(mid != nullptr); return mid; } /** * Get the Java Method: AbstractTraceWriter#getFileSize * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getGetFileSizeMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "getFileSize", "()J"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.AbstractWalFilter class AbstractWalFilterJni : public RocksDBNativeClass { public: /** * Get the Java Class org.rocksdb.AbstractWalFilter * * @param env A pointer to the Java environment * * @return The Java Class or nullptr if one of the * ClassFormatError, ClassCircularityError, NoClassDefFoundError, * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractWalFilter"); } /** * Get the Java Method: AbstractWalFilter#columnFamilyLogNumberMap * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getColumnFamilyLogNumberMapMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "columnFamilyLogNumberMap", "(Ljava/util/Map;Ljava/util/Map;)V"); assert(mid != nullptr); return mid; } /** * Get the Java Method: AbstractTraceWriter#logRecordFoundProxy * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getLogRecordFoundProxyMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "logRecordFoundProxy", "(JLjava/lang/String;JJ)S"); assert(mid != nullptr); return mid; } /** * Get the Java Method: AbstractTraceWriter#name * * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not * be retieved */ static jmethodID getNameMethodId(JNIEnv* env) { jclass jclazz = getJClass(env); if(jclazz == nullptr) { // exception occurred accessing class return nullptr; } static jmethodID mid = env->GetMethodID( jclazz, "name", "()Ljava/lang/String;"); assert(mid != nullptr); return mid; } }; // The portal class for org.rocksdb.WalProcessingOption class WalProcessingOptionJni { public: // Returns the equivalent org.rocksdb.WalProcessingOption for the provided // C++ ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption enum static jbyte toJavaWalProcessingOption( const ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption& wal_processing_option) { switch (wal_processing_option) { case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: kContinueProcessing: return 0x0; case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: kIgnoreCurrentRecord: return 0x1; case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kStopReplay: return 0x2; case ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kCorruptedRecord: return 0x3; default: return 0x7F; // undefined } } // Returns the equivalent C++ // ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption enum for the provided // Java org.rocksdb.WalProcessingOption static ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption toCppWalProcessingOption(jbyte jwal_processing_option) { switch (jwal_processing_option) { case 0x0: return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: kContinueProcessing; case 0x1: return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: kIgnoreCurrentRecord; case 0x2: return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption::kStopReplay; case 0x3: return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: kCorruptedRecord; default: // undefined/default return ROCKSDB_NAMESPACE::WalFilter::WalProcessingOption:: kCorruptedRecord; } } }; // The portal class for org.rocksdb.ReusedSynchronisationType class ReusedSynchronisationTypeJni { public: // Returns the equivalent org.rocksdb.ReusedSynchronisationType for the // provided C++ ROCKSDB_NAMESPACE::ReusedSynchronisationType enum static jbyte toJavaReusedSynchronisationType( const ROCKSDB_NAMESPACE::ReusedSynchronisationType& reused_synchronisation_type) { switch(reused_synchronisation_type) { case ROCKSDB_NAMESPACE::ReusedSynchronisationType::MUTEX: return 0x0; case ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX: return 0x1; case ROCKSDB_NAMESPACE::ReusedSynchronisationType::THREAD_LOCAL: return 0x2; default: return 0x7F; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::ReusedSynchronisationType // enum for the provided Java org.rocksdb.ReusedSynchronisationType static ROCKSDB_NAMESPACE::ReusedSynchronisationType toCppReusedSynchronisationType(jbyte reused_synchronisation_type) { switch(reused_synchronisation_type) { case 0x0: return ROCKSDB_NAMESPACE::ReusedSynchronisationType::MUTEX; case 0x1: return ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX; case 0x2: return ROCKSDB_NAMESPACE::ReusedSynchronisationType::THREAD_LOCAL; default: // undefined/default return ROCKSDB_NAMESPACE::ReusedSynchronisationType::ADAPTIVE_MUTEX; } } }; // The portal class for org.rocksdb.SanityLevel class SanityLevelJni { public: // Returns the equivalent org.rocksdb.SanityLevel for the provided // C++ ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel enum static jbyte toJavaSanityLevel( const ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel &sanity_level) { switch (sanity_level) { case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel::kSanityLevelNone: return 0x0; case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel:: kSanityLevelLooselyCompatible: return 0x1; case ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel:: kSanityLevelExactMatch: return -0x01; default: return -0x01; // undefined } } // Returns the equivalent C++ ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel enum for // the provided Java org.rocksdb.SanityLevel static ROCKSDB_NAMESPACE::ConfigOptions::SanityLevel toCppSanityLevel( jbyte sanity_level) { switch (sanity_level) { case 0x0: return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelNone; case 0x1: return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelLooselyCompatible; default: // undefined/default return ROCKSDB_NAMESPACE::ConfigOptions::kSanityLevelExactMatch; } } }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_PORTAL_H_ rocksdb-6.11.4/java/rocksjni/ratelimiterjni.cc000066400000000000000000000105561370372246700213500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for RateLimiter. #include "include/org_rocksdb_RateLimiter.h" #include "rocksdb/rate_limiter.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_RateLimiter * Method: newRateLimiterHandle * Signature: (JJIBZ)J */ jlong Java_org_rocksdb_RateLimiter_newRateLimiterHandle( JNIEnv* /*env*/, jclass /*jclazz*/, jlong jrate_bytes_per_second, jlong jrefill_period_micros, jint jfairness, jbyte jrate_limiter_mode, jboolean jauto_tune) { auto rate_limiter_mode = ROCKSDB_NAMESPACE::RateLimiterModeJni::toCppRateLimiterMode( jrate_limiter_mode); auto* sptr_rate_limiter = new std::shared_ptr( ROCKSDB_NAMESPACE::NewGenericRateLimiter( static_cast(jrate_bytes_per_second), static_cast(jrefill_period_micros), static_cast(jfairness), rate_limiter_mode, jauto_tune)); return reinterpret_cast(sptr_rate_limiter); } /* * Class: org_rocksdb_RateLimiter * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_RateLimiter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = reinterpret_cast*>( jhandle); delete handle; // delete std::shared_ptr } /* * Class: org_rocksdb_RateLimiter * Method: setBytesPerSecond * Signature: (JJ)V */ void Java_org_rocksdb_RateLimiter_setBytesPerSecond(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jlong jbytes_per_second) { reinterpret_cast*>(handle) ->get() ->SetBytesPerSecond(jbytes_per_second); } /* * Class: org_rocksdb_RateLimiter * Method: getBytesPerSecond * Signature: (J)J */ jlong Java_org_rocksdb_RateLimiter_getBytesPerSecond(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( handle) ->get() ->GetBytesPerSecond(); } /* * Class: org_rocksdb_RateLimiter * Method: request * Signature: (JJ)V */ void Java_org_rocksdb_RateLimiter_request(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jlong jbytes) { reinterpret_cast*>(handle) ->get() ->Request(jbytes, ROCKSDB_NAMESPACE::Env::IO_TOTAL); } /* * Class: org_rocksdb_RateLimiter * Method: getSingleBurstBytes * Signature: (J)J */ jlong Java_org_rocksdb_RateLimiter_getSingleBurstBytes(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( handle) ->get() ->GetSingleBurstBytes(); } /* * Class: org_rocksdb_RateLimiter * Method: getTotalBytesThrough * Signature: (J)J */ jlong Java_org_rocksdb_RateLimiter_getTotalBytesThrough(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( handle) ->get() ->GetTotalBytesThrough(); } /* * Class: org_rocksdb_RateLimiter * Method: getTotalRequests * Signature: (J)J */ jlong Java_org_rocksdb_RateLimiter_getTotalRequests(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( handle) ->get() ->GetTotalRequests(); } rocksdb-6.11.4/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc000066400000000000000000000016211370372246700262120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "include/org_rocksdb_RemoveEmptyValueCompactionFilter.h" #include "utilities/compaction_filters/remove_emptyvalue_compactionfilter.h" /* * Class: org_rocksdb_RemoveEmptyValueCompactionFilter * Method: createNewRemoveEmptyValueCompactionFilter0 * Signature: ()J */ jlong Java_org_rocksdb_RemoveEmptyValueCompactionFilter_createNewRemoveEmptyValueCompactionFilter0( JNIEnv* /*env*/, jclass /*jcls*/) { auto* compaction_filter = new ROCKSDB_NAMESPACE::RemoveEmptyValueCompactionFilter(); // set the native handle to our native compaction filter return reinterpret_cast(compaction_filter); } rocksdb-6.11.4/java/rocksjni/restorejni.cc000066400000000000000000000025211370372246700205030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::RestoreOptions methods // from Java side. #include #include #include #include #include "include/org_rocksdb_RestoreOptions.h" #include "rocksdb/utilities/backupable_db.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_RestoreOptions * Method: newRestoreOptions * Signature: (Z)J */ jlong Java_org_rocksdb_RestoreOptions_newRestoreOptions( JNIEnv* /*env*/, jclass /*jcls*/, jboolean keep_log_files) { auto* ropt = new ROCKSDB_NAMESPACE::RestoreOptions(keep_log_files); return reinterpret_cast(ropt); } /* * Class: org_rocksdb_RestoreOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_RestoreOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* ropt = reinterpret_cast(jhandle); assert(ropt); delete ropt; } rocksdb-6.11.4/java/rocksjni/rocks_callback_object.cc000066400000000000000000000024311370372246700226020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject #include #include "include/org_rocksdb_RocksCallbackObject.h" #include "jnicallback.h" /* * Class: org_rocksdb_RocksCallbackObject * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_RocksCallbackObject_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { // TODO(AR) is deleting from the super class JniCallback OK, or must we delete // the subclass? Example hierarchies: // 1) Comparator -> BaseComparatorJniCallback + JniCallback -> // DirectComparatorJniCallback 2) Comparator -> BaseComparatorJniCallback + // JniCallback -> ComparatorJniCallback // I think this is okay, as Comparator and JniCallback both have virtual // destructors... delete reinterpret_cast(handle); } rocksdb-6.11.4/java/rocksjni/rocksdb_exception_test.cc000066400000000000000000000053141370372246700230660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "include/org_rocksdb_RocksDBExceptionTest.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_RocksDBExceptionTest * Method: raiseException * Signature: ()V */ void Java_org_rocksdb_RocksDBExceptionTest_raiseException(JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, std::string("test message")); } /* * Class: org_rocksdb_RocksDBExceptionTest * Method: raiseExceptionWithStatusCode * Signature: ()V */ void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "test message", ROCKSDB_NAMESPACE::Status::NotSupported()); } /* * Class: org_rocksdb_RocksDBExceptionTest * Method: raiseExceptionNoMsgWithStatusCode * Signature: ()V */ void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::NotSupported()); } /* * Class: org_rocksdb_RocksDBExceptionTest * Method: raiseExceptionWithStatusCodeSubCode * Signature: ()V */ void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeSubCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "test message", ROCKSDB_NAMESPACE::Status::TimedOut( ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout)); } /* * Class: org_rocksdb_RocksDBExceptionTest * Method: raiseExceptionNoMsgWithStatusCodeSubCode * Signature: ()V */ void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCodeSubCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::TimedOut( ROCKSDB_NAMESPACE::Status::SubCode::kLockTimeout)); } /* * Class: org_rocksdb_RocksDBExceptionTest * Method: raiseExceptionWithStatusCodeState * Signature: ()V */ void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeState( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::Slice state("test state"); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "test message", ROCKSDB_NAMESPACE::Status::NotSupported(state)); } rocksdb-6.11.4/java/rocksjni/rocksjni.cc000066400000000000000000003357601370372246700201570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::DB methods from Java side. #include #include #include #include #include #include #include #include #include #include "include/org_rocksdb_RocksDB.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/options.h" #include "rocksdb/types.h" #include "rocksjni/portal.h" #ifdef min #undef min #endif jlong rocksdb_open_helper(JNIEnv* env, jlong jopt_handle, jstring jdb_path, std::function open_fn) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* opt = reinterpret_cast(jopt_handle); ROCKSDB_NAMESPACE::DB* db = nullptr; ROCKSDB_NAMESPACE::Status s = open_fn(*opt, db_path, &db); env->ReleaseStringUTFChars(jdb_path, db_path); if (s.ok()) { return reinterpret_cast(db); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } } /* * Class: org_rocksdb_RocksDB * Method: open * Signature: (JLjava/lang/String;)J */ jlong Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path) { return rocksdb_open_helper(env, jopt_handle, jdb_path, (ROCKSDB_NAMESPACE::Status(*)( const ROCKSDB_NAMESPACE::Options&, const std::string&, ROCKSDB_NAMESPACE::DB**)) & ROCKSDB_NAMESPACE::DB::Open); } /* * Class: org_rocksdb_RocksDB * Method: openROnly * Signature: (JLjava/lang/String;)J */ jlong Java_org_rocksdb_RocksDB_openROnly__JLjava_lang_String_2( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path) { return rocksdb_open_helper( env, jopt_handle, jdb_path, [](const ROCKSDB_NAMESPACE::Options& options, const std::string& db_path, ROCKSDB_NAMESPACE::DB** db) { return ROCKSDB_NAMESPACE::DB::OpenForReadOnly(options, db_path, db); }); } jlongArray rocksdb_open_helper( JNIEnv* env, jlong jopt_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options, std::function&, std::vector*, ROCKSDB_NAMESPACE::DB**)> open_fn) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } const jsize len_cols = env->GetArrayLength(jcolumn_names); jlong* jco = env->GetLongArrayElements(jcolumn_options, nullptr); if (jco == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } std::vector column_families; jboolean has_exception = JNI_FALSE; ROCKSDB_NAMESPACE::JniUtil::byteStrings( env, jcolumn_names, [](const char* str_data, const size_t str_len) { return std::string(str_data, str_len); }, [&jco, &column_families](size_t idx, std::string cf_name) { ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = reinterpret_cast(jco[idx]); column_families.push_back( ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); }, &has_exception); env->ReleaseLongArrayElements(jcolumn_options, jco, JNI_ABORT); if (has_exception == JNI_TRUE) { // exception occurred env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } auto* opt = reinterpret_cast(jopt_handle); std::vector cf_handles; ROCKSDB_NAMESPACE::DB* db = nullptr; ROCKSDB_NAMESPACE::Status s = open_fn(*opt, db_path, column_families, &cf_handles, &db); // we have now finished with db_path env->ReleaseStringUTFChars(jdb_path, db_path); // check if open operation was successful if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } const jsize resultsLen = 1 + len_cols; // db handle + column family handles std::unique_ptr results = std::unique_ptr(new jlong[resultsLen]); results[0] = reinterpret_cast(db); for (int i = 1; i <= len_cols; i++) { results[i] = reinterpret_cast(cf_handles[i - 1]); } jlongArray jresults = env->NewLongArray(resultsLen); if (jresults == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresults); return nullptr; } return jresults; } /* * Class: org_rocksdb_RocksDB * Method: openROnly * Signature: (JLjava/lang/String;[[B[J)[J */ jlongArray Java_org_rocksdb_RocksDB_openROnly__JLjava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options) { return rocksdb_open_helper( env, jopt_handle, jdb_path, jcolumn_names, jcolumn_options, [](const ROCKSDB_NAMESPACE::DBOptions& options, const std::string& db_path, const std::vector& column_families, std::vector* handles, ROCKSDB_NAMESPACE::DB** db) { return ROCKSDB_NAMESPACE::DB::OpenForReadOnly( options, db_path, column_families, handles, db); }); } /* * Class: org_rocksdb_RocksDB * Method: open * Signature: (JLjava/lang/String;[[B[J)[J */ jlongArray Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options) { return rocksdb_open_helper( env, jopt_handle, jdb_path, jcolumn_names, jcolumn_options, (ROCKSDB_NAMESPACE::Status(*)( const ROCKSDB_NAMESPACE::DBOptions&, const std::string&, const std::vector&, std::vector*, ROCKSDB_NAMESPACE::DB**)) & ROCKSDB_NAMESPACE::DB::Open); } /* * Class: org_rocksdb_RocksDB * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_RocksDB_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* db = reinterpret_cast(jhandle); assert(db != nullptr); delete db; } /* * Class: org_rocksdb_RocksDB * Method: closeDatabase * Signature: (J)V */ void Java_org_rocksdb_RocksDB_closeDatabase( JNIEnv* env, jclass, jlong jhandle) { auto* db = reinterpret_cast(jhandle); assert(db != nullptr); ROCKSDB_NAMESPACE::Status s = db->Close(); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_RocksDB * Method: listColumnFamilies * Signature: (JLjava/lang/String;)[[B */ jobjectArray Java_org_rocksdb_RocksDB_listColumnFamilies( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path) { std::vector column_family_names; const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } auto* opt = reinterpret_cast(jopt_handle); ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DB::ListColumnFamilies( *opt, db_path, &column_family_names); env->ReleaseStringUTFChars(jdb_path, db_path); jobjectArray jcolumn_family_names = ROCKSDB_NAMESPACE::JniUtil::stringsBytes(env, column_family_names); return jcolumn_family_names; } /* * Class: org_rocksdb_RocksDB * Method: createColumnFamily * Signature: (J[BIJ)J */ jlong Java_org_rocksdb_RocksDB_createColumnFamily( JNIEnv* env, jobject, jlong jhandle, jbyteArray jcf_name, jint jcf_name_len, jlong jcf_options_handle) { auto* db = reinterpret_cast(jhandle); jboolean has_exception = JNI_FALSE; const std::string cf_name = ROCKSDB_NAMESPACE::JniUtil::byteString( env, jcf_name, jcf_name_len, [](const char* str, const size_t len) { return std::string(str, len); }, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return 0; } auto* cf_options = reinterpret_cast( jcf_options_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; ROCKSDB_NAMESPACE::Status s = db->CreateColumnFamily(*cf_options, cf_name, &cf_handle); if (!s.ok()) { // error occurred ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } return reinterpret_cast(cf_handle); } /* * Class: org_rocksdb_RocksDB * Method: createColumnFamilies * Signature: (JJ[[B)[J */ jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__JJ_3_3B( JNIEnv* env, jobject, jlong jhandle, jlong jcf_options_handle, jobjectArray jcf_names) { auto* db = reinterpret_cast(jhandle); auto* cf_options = reinterpret_cast( jcf_options_handle); jboolean has_exception = JNI_FALSE; std::vector cf_names; ROCKSDB_NAMESPACE::JniUtil::byteStrings( env, jcf_names, [](const char* str, const size_t len) { return std::string(str, len); }, [&cf_names](const size_t, std::string str) { cf_names.push_back(str); }, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return nullptr; } std::vector cf_handles; ROCKSDB_NAMESPACE::Status s = db->CreateColumnFamilies(*cf_options, cf_names, &cf_handles); if (!s.ok()) { // error occurred ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } jlongArray jcf_handles = ROCKSDB_NAMESPACE::JniUtil::toJPointers< ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, cf_handles, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return nullptr; } return jcf_handles; } /* * Class: org_rocksdb_RocksDB * Method: createColumnFamilies * Signature: (J[J[[B)[J */ jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__J_3J_3_3B( JNIEnv* env, jobject, jlong jhandle, jlongArray jcf_options_handles, jobjectArray jcf_names) { auto* db = reinterpret_cast(jhandle); const jsize jlen = env->GetArrayLength(jcf_options_handles); std::vector cf_descriptors; cf_descriptors.reserve(jlen); jboolean jcf_options_handles_is_copy = JNI_FALSE; jlong *jcf_options_handles_elems = env->GetLongArrayElements(jcf_options_handles, &jcf_options_handles_is_copy); if(jcf_options_handles_elems == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } // extract the column family descriptors jboolean has_exception = JNI_FALSE; for (jsize i = 0; i < jlen; i++) { auto* cf_options = reinterpret_cast( jcf_options_handles_elems[i]); jbyteArray jcf_name = static_cast( env->GetObjectArrayElement(jcf_names, i)); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->ReleaseLongArrayElements(jcf_options_handles, jcf_options_handles_elems, JNI_ABORT); return nullptr; } const std::string cf_name = ROCKSDB_NAMESPACE::JniUtil::byteString( env, jcf_name, [](const char* str, const size_t len) { return std::string(str, len); }, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred env->DeleteLocalRef(jcf_name); env->ReleaseLongArrayElements(jcf_options_handles, jcf_options_handles_elems, JNI_ABORT); return nullptr; } cf_descriptors.push_back( ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); env->DeleteLocalRef(jcf_name); } std::vector cf_handles; ROCKSDB_NAMESPACE::Status s = db->CreateColumnFamilies(cf_descriptors, &cf_handles); env->ReleaseLongArrayElements(jcf_options_handles, jcf_options_handles_elems, JNI_ABORT); if (!s.ok()) { // error occurred ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } jlongArray jcf_handles = ROCKSDB_NAMESPACE::JniUtil::toJPointers< ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, cf_handles, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return nullptr; } return jcf_handles; } /* * Class: org_rocksdb_RocksDB * Method: dropColumnFamily * Signature: (JJ)V; */ void Java_org_rocksdb_RocksDB_dropColumnFamily( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db_handle = reinterpret_cast(jdb_handle); auto* cf_handle = reinterpret_cast(jcf_handle); ROCKSDB_NAMESPACE::Status s = db_handle->DropColumnFamily(cf_handle); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: dropColumnFamilies * Signature: (J[J)V */ void Java_org_rocksdb_RocksDB_dropColumnFamilies( JNIEnv* env, jobject, jlong jdb_handle, jlongArray jcolumn_family_handles) { auto* db_handle = reinterpret_cast(jdb_handle); std::vector cf_handles; if (jcolumn_family_handles != nullptr) { const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); if (jcfh == nullptr) { // exception thrown: OutOfMemoryError return; } for (jsize i = 0; i < len_cols; i++) { auto* cf_handle = reinterpret_cast(jcfh[i]); cf_handles.push_back(cf_handle); } env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); } ROCKSDB_NAMESPACE::Status s = db_handle->DropColumnFamilies(cf_handles); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Put /** * @return true if the put succeeded, false if a Java Exception was thrown */ bool rocksdb_put_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::WriteOptions& write_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] key; return false; } jbyte* value = new jbyte[jval_len]; env->GetByteArrayRegion(jval, jval_off, jval_len, value); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] value; delete[] key; return false; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), jval_len); ROCKSDB_NAMESPACE::Status s; if (cf_handle != nullptr) { s = db->Put(write_options, cf_handle, key_slice, value_slice); } else { // backwards compatibility s = db->Put(write_options, key_slice, value_slice); } // cleanup delete[] value; delete[] key; if (s.ok()) { return true; } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return false; } } /* * Class: org_rocksdb_RocksDB * Method: put * Signature: (J[BII[BII)V */ void Java_org_rocksdb_RocksDB_put__J_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); rocksdb_put_helper(env, db, default_write_options, nullptr, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } /* * Class: org_rocksdb_RocksDB * Method: put * Signature: (J[BII[BIIJ)V */ void Java_org_rocksdb_RocksDB_put__J_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_put_helper(env, db, default_write_options, cf_handle, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } /* * Class: org_rocksdb_RocksDB * Method: put * Signature: (JJ[BII[BII)V */ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); rocksdb_put_helper(env, db, *write_options, nullptr, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } /* * Class: org_rocksdb_RocksDB * Method: put * Signature: (JJ[BII[BIIJ)V */ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_put_helper(env, db, *write_options, cf_handle, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } /* * Class: org_rocksdb_RocksDB * Method: putDirect * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ void Java_org_rocksdb_RocksDB_putDirect( JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jwrite_options_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto put = [&env, &db, &cf_handle, &write_options]( ROCKSDB_NAMESPACE::Slice& key, ROCKSDB_NAMESPACE::Slice& value) { ROCKSDB_NAMESPACE::Status s; if (cf_handle == nullptr) { s = db->Put(*write_options, key, value); } else { s = db->Put(*write_options, cf_handle, key, value); } if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); }; ROCKSDB_NAMESPACE::JniUtil::kv_op_direct(put, env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Delete() /** * @return true if the delete succeeded, false if a Java Exception was thrown */ bool rocksdb_delete_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::WriteOptions& write_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] key; return false; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); ROCKSDB_NAMESPACE::Status s; if (cf_handle != nullptr) { s = db->Delete(write_options, cf_handle, key_slice); } else { // backwards compatibility s = db->Delete(write_options, key_slice); } // cleanup delete[] key; if (s.ok()) { return true; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return false; } /* * Class: org_rocksdb_RocksDB * Method: delete * Signature: (J[BII)V */ void Java_org_rocksdb_RocksDB_delete__J_3BII( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); rocksdb_delete_helper(env, db, default_write_options, nullptr, jkey, jkey_off, jkey_len); } /* * Class: org_rocksdb_RocksDB * Method: delete * Signature: (J[BIIJ)V */ void Java_org_rocksdb_RocksDB_delete__J_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_delete_helper(env, db, default_write_options, cf_handle, jkey, jkey_off, jkey_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } /* * Class: org_rocksdb_RocksDB * Method: delete * Signature: (JJ[BII)V */ void Java_org_rocksdb_RocksDB_delete__JJ_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, jint jkey_off, jint jkey_len) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options); rocksdb_delete_helper(env, db, *write_options, nullptr, jkey, jkey_off, jkey_len); } /* * Class: org_rocksdb_RocksDB * Method: delete * Signature: (JJ[BIIJ)V */ void Java_org_rocksdb_RocksDB_delete__JJ_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_delete_helper(env, db, *write_options, cf_handle, jkey, jkey_off, jkey_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::SingleDelete() /** * @return true if the single delete succeeded, false if a Java Exception * was thrown */ bool rocksdb_single_delete_helper( JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::WriteOptions& write_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jkey, jint jkey_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return false; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); ROCKSDB_NAMESPACE::Status s; if (cf_handle != nullptr) { s = db->SingleDelete(write_options, cf_handle, key_slice); } else { // backwards compatibility s = db->SingleDelete(write_options, key_slice); } // trigger java unref on key and value. // by passing JNI_ABORT, it will simply release the reference without // copying the result back to the java byte array. env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); if (s.ok()) { return true; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return false; } /* * Class: org_rocksdb_RocksDB * Method: singleDelete * Signature: (J[BI)V */ void Java_org_rocksdb_RocksDB_singleDelete__J_3BI( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_len) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); rocksdb_single_delete_helper(env, db, default_write_options, nullptr, jkey, jkey_len); } /* * Class: org_rocksdb_RocksDB * Method: singleDelete * Signature: (J[BIJ)V */ void Java_org_rocksdb_RocksDB_singleDelete__J_3BIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_single_delete_helper(env, db, default_write_options, cf_handle, jkey, jkey_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } /* * Class: org_rocksdb_RocksDB * Method: singleDelete * Signature: (JJ[BIJ)V */ void Java_org_rocksdb_RocksDB_singleDelete__JJ_3BI( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, jint jkey_len) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options); rocksdb_single_delete_helper(env, db, *write_options, nullptr, jkey, jkey_len); } /* * Class: org_rocksdb_RocksDB * Method: singleDelete * Signature: (JJ[BIJ)V */ void Java_org_rocksdb_RocksDB_singleDelete__JJ_3BIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_single_delete_helper(env, db, *write_options, cf_handle, jkey, jkey_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::DeleteRange() /** * @return true if the delete range succeeded, false if a Java Exception * was thrown */ bool rocksdb_delete_range_helper( JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::WriteOptions& write_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { jbyte* begin_key = new jbyte[jbegin_key_len]; env->GetByteArrayRegion(jbegin_key, jbegin_key_off, jbegin_key_len, begin_key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] begin_key; return false; } ROCKSDB_NAMESPACE::Slice begin_key_slice(reinterpret_cast(begin_key), jbegin_key_len); jbyte* end_key = new jbyte[jend_key_len]; env->GetByteArrayRegion(jend_key, jend_key_off, jend_key_len, end_key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] begin_key; delete[] end_key; return false; } ROCKSDB_NAMESPACE::Slice end_key_slice(reinterpret_cast(end_key), jend_key_len); ROCKSDB_NAMESPACE::Status s = db->DeleteRange(write_options, cf_handle, begin_key_slice, end_key_slice); // cleanup delete[] begin_key; delete[] end_key; if (s.ok()) { return true; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return false; } /* * Class: org_rocksdb_RocksDB * Method: deleteRange * Signature: (J[BII[BII)V */ void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); rocksdb_delete_range_helper(env, db, default_write_options, nullptr, jbegin_key, jbegin_key_off, jbegin_key_len, jend_key, jend_key_off, jend_key_len); } jint rocksdb_get_helper_direct( JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::ReadOptions& read_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* column_family_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len, bool* has_exception) { static const int kNotFound = -1; static const int kStatusError = -2; static const int kArgumentError = -3; char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); if (key == nullptr) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid key argument (argument is not a valid direct ByteBuffer)"); *has_exception = true; return kArgumentError; } if (env->GetDirectBufferCapacity(jkey) < (jkey_off + jkey_len)) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid key argument. Capacity is less than requested region (offset " "+ length)."); *has_exception = true; return kArgumentError; } char* value = reinterpret_cast(env->GetDirectBufferAddress(jval)); if (value == nullptr) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid value argument (argument is not a valid direct ByteBuffer)"); *has_exception = true; return kArgumentError; } if (env->GetDirectBufferCapacity(jval) < (jval_off + jval_len)) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "Invalid value argument. Capacity is less than requested region " "(offset + length)."); *has_exception = true; return kArgumentError; } key += jkey_off; value += jval_off; ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); // TODO(yhchiang): we might save one memory allocation here by adding // a DB::Get() function which takes preallocated jbyte* as input. std::string cvalue; ROCKSDB_NAMESPACE::Status s; if (column_family_handle != nullptr) { s = db->Get(read_options, column_family_handle, key_slice, &cvalue); } else { // backwards compatibility s = db->Get(read_options, key_slice, &cvalue); } if (s.IsNotFound()) { *has_exception = false; return kNotFound; } else if (!s.ok()) { *has_exception = true; // Here since we are throwing a Java exception from c++ side. // As a result, c++ does not know calling this function will in fact // throwing an exception. As a result, the execution flow will // not stop here, and codes after this throw will still be // executed. ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); // Return a dummy const value to avoid compilation error, although // java side might not have a chance to get the return value :) return kStatusError; } const jint cvalue_len = static_cast(cvalue.size()); const jint length = std::min(jval_len, cvalue_len); memcpy(value, cvalue.c_str(), length); *has_exception = false; return cvalue_len; } /* * Class: org_rocksdb_RocksDB * Method: deleteRange * Signature: (J[BII[BIIJ)V */ void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_delete_range_helper(env, db, default_write_options, cf_handle, jbegin_key, jbegin_key_off, jbegin_key_len, jend_key, jend_key_off, jend_key_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } /* * Class: org_rocksdb_RocksDB * Method: deleteRange * Signature: (JJ[BII[BII)V */ void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options); rocksdb_delete_range_helper(env, db, *write_options, nullptr, jbegin_key, jbegin_key_off, jbegin_key_len, jend_key, jend_key_off, jend_key_len); } /* * Class: org_rocksdb_RocksDB * Method: deleteRange * Signature: (JJ[BII[BIIJ)V */ void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_delete_range_helper(env, db, *write_options, cf_handle, jbegin_key, jbegin_key_off, jbegin_key_len, jend_key, jend_key_off, jend_key_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } /* * Class: org_rocksdb_RocksDB * Method: getDirect * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)I */ jint Java_org_rocksdb_RocksDB_getDirect(JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jropt_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db_handle = reinterpret_cast(jdb_handle); auto* ro_opt = reinterpret_cast(jropt_handle); auto* cf_handle = reinterpret_cast(jcf_handle); bool has_exception = false; return rocksdb_get_helper_direct( env, db_handle, ro_opt == nullptr ? ROCKSDB_NAMESPACE::ReadOptions() : *ro_opt, cf_handle, jkey, jkey_off, jkey_len, jval, jval_off, jval_len, &has_exception); } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Merge /** * @return true if the merge succeeded, false if a Java Exception was thrown */ bool rocksdb_merge_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::WriteOptions& write_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] key; return false; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); jbyte* value = new jbyte[jval_len]; env->GetByteArrayRegion(jval, jval_off, jval_len, value); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] value; delete[] key; return false; } ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), jval_len); ROCKSDB_NAMESPACE::Status s; if (cf_handle != nullptr) { s = db->Merge(write_options, cf_handle, key_slice, value_slice); } else { s = db->Merge(write_options, key_slice, value_slice); } // cleanup delete[] value; delete[] key; if (s.ok()) { return true; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return false; } /* * Class: org_rocksdb_RocksDB * Method: merge * Signature: (J[BII[BII)V */ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); rocksdb_merge_helper(env, db, default_write_options, nullptr, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } /* * Class: org_rocksdb_RocksDB * Method: merge * Signature: (J[BII[BIIJ)V */ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_merge_helper(env, db, default_write_options, cf_handle, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } /* * Class: org_rocksdb_RocksDB * Method: merge * Signature: (JJ[BII[BII)V */ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); rocksdb_merge_helper(env, db, *write_options, nullptr, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } /* * Class: org_rocksdb_RocksDB * Method: merge * Signature: (JJ[BII[BIIJ)V */ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { rocksdb_merge_helper(env, db, *write_options, cf_handle, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); } } jlong rocksdb_iterator_helper( ROCKSDB_NAMESPACE::DB* db, ROCKSDB_NAMESPACE::ReadOptions read_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle) { ROCKSDB_NAMESPACE::Iterator* iterator = nullptr; if (cf_handle != nullptr) { iterator = db->NewIterator(read_options, cf_handle); } else { iterator = db->NewIterator(read_options); } return reinterpret_cast(iterator); } /* * Class: org_rocksdb_RocksDB * Method: deleteDirect * Signature: (JJLjava/nio/ByteBuffer;IIJ)V */ void Java_org_rocksdb_RocksDB_deleteDirect(JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jwrite_options, jobject jkey, jint jkey_offset, jint jkey_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options); auto* cf_handle = reinterpret_cast(jcf_handle); auto remove = [&env, &db, &write_options, &cf_handle](ROCKSDB_NAMESPACE::Slice& key) { ROCKSDB_NAMESPACE::Status s; if (cf_handle == nullptr) { s = db->Delete(*write_options, key); } else { s = db->Delete(*write_options, cf_handle, key); } if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(remove, env, jkey, jkey_offset, jkey_len); } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Write /* * Class: org_rocksdb_RocksDB * Method: write0 * Signature: (JJJ)V */ void Java_org_rocksdb_RocksDB_write0( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jlong jwb_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* wb = reinterpret_cast(jwb_handle); ROCKSDB_NAMESPACE::Status s = db->Write(*write_options, wb); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: write1 * Signature: (JJJ)V */ void Java_org_rocksdb_RocksDB_write1( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jlong jwbwi_handle) { auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* wbwi = reinterpret_cast(jwbwi_handle); auto* wb = wbwi->GetWriteBatch(); ROCKSDB_NAMESPACE::Status s = db->Write(*write_options, wb); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Get jbyteArray rocksdb_get_helper( JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::ReadOptions& read_opt, ROCKSDB_NAMESPACE::ColumnFamilyHandle* column_family_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] key; return nullptr; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); std::string value; ROCKSDB_NAMESPACE::Status s; if (column_family_handle != nullptr) { s = db->Get(read_opt, column_family_handle, key_slice, &value); } else { // backwards compatibility s = db->Get(read_opt, key_slice, &value); } // cleanup delete[] key; if (s.IsNotFound()) { return nullptr; } if (s.ok()) { jbyteArray jret_value = ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, value); if (jret_value == nullptr) { // exception occurred return nullptr; } return jret_value; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (J[BII)[B */ jbyteArray Java_org_rocksdb_RocksDB_get__J_3BII( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { return rocksdb_get_helper( env, reinterpret_cast(jdb_handle), ROCKSDB_NAMESPACE::ReadOptions(), nullptr, jkey, jkey_off, jkey_len); } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (J[BIIJ)[B */ jbyteArray Java_org_rocksdb_RocksDB_get__J_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { auto db_handle = reinterpret_cast(jdb_handle); auto cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { return rocksdb_get_helper(env, db_handle, ROCKSDB_NAMESPACE::ReadOptions(), cf_handle, jkey, jkey_off, jkey_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); return nullptr; } } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (JJ[BII)[B */ jbyteArray Java_org_rocksdb_RocksDB_get__JJ_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { return rocksdb_get_helper( env, reinterpret_cast(jdb_handle), *reinterpret_cast(jropt_handle), nullptr, jkey, jkey_off, jkey_len); } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (JJ[BIIJ)[B */ jbyteArray Java_org_rocksdb_RocksDB_get__JJ_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { auto* db_handle = reinterpret_cast(jdb_handle); auto& ro_opt = *reinterpret_cast(jropt_handle); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { return rocksdb_get_helper( env, db_handle, ro_opt, cf_handle, jkey, jkey_off, jkey_len); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); return nullptr; } } jint rocksdb_get_helper( JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::ReadOptions& read_options, ROCKSDB_NAMESPACE::ColumnFamilyHandle* column_family_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, bool* has_exception) { static const int kNotFound = -1; static const int kStatusError = -2; jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); if (env->ExceptionCheck()) { // exception thrown: OutOfMemoryError delete[] key; *has_exception = true; return kStatusError; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); // TODO(yhchiang): we might save one memory allocation here by adding // a DB::Get() function which takes preallocated jbyte* as input. std::string cvalue; ROCKSDB_NAMESPACE::Status s; if (column_family_handle != nullptr) { s = db->Get(read_options, column_family_handle, key_slice, &cvalue); } else { // backwards compatibility s = db->Get(read_options, key_slice, &cvalue); } // cleanup delete[] key; if (s.IsNotFound()) { *has_exception = false; return kNotFound; } else if (!s.ok()) { *has_exception = true; // Here since we are throwing a Java exception from c++ side. // As a result, c++ does not know calling this function will in fact // throwing an exception. As a result, the execution flow will // not stop here, and codes after this throw will still be // executed. ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); // Return a dummy const value to avoid compilation error, although // java side might not have a chance to get the return value :) return kStatusError; } const jint cvalue_len = static_cast(cvalue.size()); const jint length = std::min(jval_len, cvalue_len); env->SetByteArrayRegion( jval, jval_off, length, const_cast(reinterpret_cast(cvalue.c_str()))); if (env->ExceptionCheck()) { // exception thrown: OutOfMemoryError *has_exception = true; return kStatusError; } *has_exception = false; return cvalue_len; } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (J[BII[BII)I */ jint Java_org_rocksdb_RocksDB_get__J_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { bool has_exception = false; return rocksdb_get_helper( env, reinterpret_cast(jdb_handle), ROCKSDB_NAMESPACE::ReadOptions(), nullptr, jkey, jkey_off, jkey_len, jval, jval_off, jval_len, &has_exception); } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (J[BII[BIIJ)I */ jint Java_org_rocksdb_RocksDB_get__J_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db_handle = reinterpret_cast(jdb_handle); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { bool has_exception = false; return rocksdb_get_helper(env, db_handle, ROCKSDB_NAMESPACE::ReadOptions(), cf_handle, jkey, jkey_off, jkey_len, jval, jval_off, jval_len, &has_exception); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); // will never be evaluated return 0; } } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (JJ[BII[BII)I */ jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { bool has_exception = false; return rocksdb_get_helper( env, reinterpret_cast(jdb_handle), *reinterpret_cast(jropt_handle), nullptr, jkey, jkey_off, jkey_len, jval, jval_off, jval_len, &has_exception); } /* * Class: org_rocksdb_RocksDB * Method: get * Signature: (JJ[BII[BIIJ)I */ jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { auto* db_handle = reinterpret_cast(jdb_handle); auto& ro_opt = *reinterpret_cast(jropt_handle); auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { bool has_exception = false; return rocksdb_get_helper(env, db_handle, ro_opt, cf_handle, jkey, jkey_off, jkey_len, jval, jval_off, jval_len, &has_exception); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); // will never be evaluated return 0; } } inline void multi_get_helper_release_keys( JNIEnv* env, std::vector>& keys_to_free) { auto end = keys_to_free.end(); for (auto it = keys_to_free.begin(); it != end; ++it) { delete[] it->first; env->DeleteLocalRef(it->second); } keys_to_free.clear(); } /** * cf multi get * * @return byte[][] of values or nullptr if an exception occurs */ jobjectArray multi_get_helper(JNIEnv* env, jobject, ROCKSDB_NAMESPACE::DB* db, const ROCKSDB_NAMESPACE::ReadOptions& rOpt, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens, jlongArray jcolumn_family_handles) { std::vector cf_handles; if (jcolumn_family_handles != nullptr) { const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); if (jcfh == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (jsize i = 0; i < len_cols; i++) { auto* cf_handle = reinterpret_cast(jcfh[i]); cf_handles.push_back(cf_handle); } env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); } const jsize len_keys = env->GetArrayLength(jkeys); if (env->EnsureLocalCapacity(len_keys) != 0) { // exception thrown: OutOfMemoryError return nullptr; } jint* jkey_off = env->GetIntArrayElements(jkey_offs, nullptr); if (jkey_off == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } jint* jkey_len = env->GetIntArrayElements(jkey_lens, nullptr); if (jkey_len == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); return nullptr; } std::vector keys; std::vector> keys_to_free; for (jsize i = 0; i < len_keys; i++) { jobject jkey = env->GetObjectArrayElement(jkeys, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); multi_get_helper_release_keys(env, keys_to_free); return nullptr; } jbyteArray jkey_ba = reinterpret_cast(jkey); const jint len_key = jkey_len[i]; jbyte* key = new jbyte[len_key]; env->GetByteArrayRegion(jkey_ba, jkey_off[i], len_key, key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] key; env->DeleteLocalRef(jkey); env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); multi_get_helper_release_keys(env, keys_to_free); return nullptr; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), len_key); keys.push_back(key_slice); keys_to_free.push_back(std::pair(key, jkey)); } // cleanup jkey_off and jken_len env->ReleaseIntArrayElements(jkey_lens, jkey_len, JNI_ABORT); env->ReleaseIntArrayElements(jkey_offs, jkey_off, JNI_ABORT); std::vector values; std::vector s; if (cf_handles.size() == 0) { s = db->MultiGet(rOpt, keys, &values); } else { s = db->MultiGet(rOpt, cf_handles, keys, &values); } // free up allocated byte arrays multi_get_helper_release_keys(env, keys_to_free); // prepare the results jobjectArray jresults = ROCKSDB_NAMESPACE::ByteJni::new2dByteArray( env, static_cast(s.size())); if (jresults == nullptr) { // exception occurred return nullptr; } // TODO(AR) it is not clear to me why EnsureLocalCapacity is needed for the // loop as we cleanup references with env->DeleteLocalRef(jentry_value); if (env->EnsureLocalCapacity(static_cast(s.size())) != 0) { // exception thrown: OutOfMemoryError return nullptr; } // add to the jresults for (std::vector::size_type i = 0; i != s.size(); i++) { if (s[i].ok()) { std::string* value = &values[i]; const jsize jvalue_len = static_cast(value->size()); jbyteArray jentry_value = env->NewByteArray(jvalue_len); if (jentry_value == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion( jentry_value, 0, static_cast(jvalue_len), const_cast(reinterpret_cast(value->c_str()))); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jentry_value); return nullptr; } env->SetObjectArrayElement(jresults, static_cast(i), jentry_value); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jentry_value); return nullptr; } env->DeleteLocalRef(jentry_value); } } return jresults; } /* * Class: org_rocksdb_RocksDB * Method: multiGet * Signature: (J[[B[I[I)[[B */ jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I( JNIEnv* env, jobject jdb, jlong jdb_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens) { return multi_get_helper( env, jdb, reinterpret_cast(jdb_handle), ROCKSDB_NAMESPACE::ReadOptions(), jkeys, jkey_offs, jkey_lens, nullptr); } /* * Class: org_rocksdb_RocksDB * Method: multiGet * Signature: (J[[B[I[I[J)[[B */ jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I_3J( JNIEnv* env, jobject jdb, jlong jdb_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens, jlongArray jcolumn_family_handles) { return multi_get_helper(env, jdb, reinterpret_cast(jdb_handle), ROCKSDB_NAMESPACE::ReadOptions(), jkeys, jkey_offs, jkey_lens, jcolumn_family_handles); } /* * Class: org_rocksdb_RocksDB * Method: multiGet * Signature: (JJ[[B[I[I)[[B */ jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I( JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens) { return multi_get_helper( env, jdb, reinterpret_cast(jdb_handle), *reinterpret_cast(jropt_handle), jkeys, jkey_offs, jkey_lens, nullptr); } /* * Class: org_rocksdb_RocksDB * Method: multiGet * Signature: (JJ[[B[I[I[J)[[B */ jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I_3J( JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens, jlongArray jcolumn_family_handles) { return multi_get_helper( env, jdb, reinterpret_cast(jdb_handle), *reinterpret_cast(jropt_handle), jkeys, jkey_offs, jkey_lens, jcolumn_family_handles); } ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::KeyMayExist bool key_may_exist_helper(JNIEnv* env, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len, bool* has_exception, std::string* value, bool* value_found) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } ROCKSDB_NAMESPACE::ReadOptions read_opts = jread_opts_handle == 0 ? ROCKSDB_NAMESPACE::ReadOptions() : *(reinterpret_cast( jread_opts_handle)); jbyte* key = new jbyte[jkey_len]; env->GetByteArrayRegion(jkey, jkey_offset, jkey_len, key); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete[] key; *has_exception = true; return false; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); const bool exists = db->KeyMayExist( read_opts, cf_handle, key_slice, value, value_found); // cleanup delete[] key; return exists; } /* * Class: org_rocksdb_RocksDB * Method: keyMayExist * Signature: (JJJ[BII)Z */ jboolean Java_org_rocksdb_RocksDB_keyMayExist( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len) { bool has_exception = false; std::string value; bool value_found = false; const bool exists = key_may_exist_helper( env, jdb_handle, jcf_handle, jread_opts_handle, jkey, jkey_offset, jkey_len, &has_exception, &value, &value_found); if (has_exception) { // java exception already raised return false; } return static_cast(exists); } /* * Class: org_rocksdb_RocksDB * Method: keyMayExistFoundValue * Signature: (JJJ[BII)[[B */ jobjectArray Java_org_rocksdb_RocksDB_keyMayExistFoundValue( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len) { bool has_exception = false; std::string value; bool value_found = false; const bool exists = key_may_exist_helper( env, jdb_handle, jcf_handle, jread_opts_handle, jkey, jkey_offset, jkey_len, &has_exception, &value, &value_found); if (has_exception) { // java exception already raised return nullptr; } jbyte result_flags[1]; if (!exists) { result_flags[0] = 0; } else if (!value_found) { result_flags[0] = 1; } else { // found result_flags[0] = 2; } jobjectArray jresults = ROCKSDB_NAMESPACE::ByteJni::new2dByteArray(env, 2); if (jresults == nullptr) { // exception occurred return nullptr; } // prepare the result flag jbyteArray jresult_flags = env->NewByteArray(1); if (jresult_flags == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion(jresult_flags, 0, 1, result_flags); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresult_flags); return nullptr; } env->SetObjectArrayElement(jresults, 0, jresult_flags); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresult_flags); return nullptr; } env->DeleteLocalRef(jresult_flags); if (result_flags[0] == 2) { // set the value const jsize jvalue_len = static_cast(value.size()); jbyteArray jresult_value = env->NewByteArray(jvalue_len); if (jresult_value == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion(jresult_value, 0, jvalue_len, const_cast(reinterpret_cast(value.data()))); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresult_value); return nullptr; } env->SetObjectArrayElement(jresults, 1, jresult_value); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresult_value); return nullptr; } env->DeleteLocalRef(jresult_value); } return jresults; } /* * Class: org_rocksdb_RocksDB * Method: iterator * Signature: (J)J */ jlong Java_org_rocksdb_RocksDB_iterator__J( JNIEnv*, jobject, jlong db_handle) { auto* db = reinterpret_cast(db_handle); return rocksdb_iterator_helper(db, ROCKSDB_NAMESPACE::ReadOptions(), nullptr); } /* * Class: org_rocksdb_RocksDB * Method: iterator * Signature: (JJ)J */ jlong Java_org_rocksdb_RocksDB_iterator__JJ( JNIEnv*, jobject, jlong db_handle, jlong jread_options_handle) { auto* db = reinterpret_cast(db_handle); auto& read_options = *reinterpret_cast(jread_options_handle); return rocksdb_iterator_helper(db, read_options, nullptr); } /* * Class: org_rocksdb_RocksDB * Method: iteratorCF * Signature: (JJ)J */ jlong Java_org_rocksdb_RocksDB_iteratorCF__JJ( JNIEnv*, jobject, jlong db_handle, jlong jcf_handle) { auto* db = reinterpret_cast(db_handle); auto* cf_handle = reinterpret_cast(jcf_handle); return rocksdb_iterator_helper(db, ROCKSDB_NAMESPACE::ReadOptions(), cf_handle); } /* * Class: org_rocksdb_RocksDB * Method: iteratorCF * Signature: (JJJ)J */ jlong Java_org_rocksdb_RocksDB_iteratorCF__JJJ( JNIEnv*, jobject, jlong db_handle, jlong jcf_handle, jlong jread_options_handle) { auto* db = reinterpret_cast(db_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto& read_options = *reinterpret_cast(jread_options_handle); return rocksdb_iterator_helper(db, read_options, cf_handle); } /* * Class: org_rocksdb_RocksDB * Method: iterators * Signature: (J[JJ)[J */ jlongArray Java_org_rocksdb_RocksDB_iterators( JNIEnv* env, jobject, jlong db_handle, jlongArray jcolumn_family_handles, jlong jread_options_handle) { auto* db = reinterpret_cast(db_handle); auto& read_options = *reinterpret_cast(jread_options_handle); std::vector cf_handles; if (jcolumn_family_handles != nullptr) { const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); if (jcfh == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (jsize i = 0; i < len_cols; i++) { auto* cf_handle = reinterpret_cast(jcfh[i]); cf_handles.push_back(cf_handle); } env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); } std::vector iterators; ROCKSDB_NAMESPACE::Status s = db->NewIterators(read_options, cf_handles, &iterators); if (s.ok()) { jlongArray jLongArray = env->NewLongArray(static_cast(iterators.size())); if (jLongArray == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (std::vector::size_type i = 0; i < iterators.size(); i++) { env->SetLongArrayRegion( jLongArray, static_cast(i), 1, const_cast(reinterpret_cast(&iterators[i]))); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jLongArray); return nullptr; } } return jLongArray; } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } } /* * Method: getSnapshot * Signature: (J)J */ jlong Java_org_rocksdb_RocksDB_getSnapshot( JNIEnv*, jobject, jlong db_handle) { auto* db = reinterpret_cast(db_handle); const ROCKSDB_NAMESPACE::Snapshot* snapshot = db->GetSnapshot(); return reinterpret_cast(snapshot); } /* * Method: releaseSnapshot * Signature: (JJ)V */ void Java_org_rocksdb_RocksDB_releaseSnapshot( JNIEnv*, jobject, jlong db_handle, jlong snapshot_handle) { auto* db = reinterpret_cast(db_handle); auto* snapshot = reinterpret_cast(snapshot_handle); db->ReleaseSnapshot(snapshot); } /* * Class: org_rocksdb_RocksDB * Method: getProperty * Signature: (JJLjava/lang/String;I)Ljava/lang/String; */ jstring Java_org_rocksdb_RocksDB_getProperty( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jstring jproperty, jint jproperty_len) { const char* property = env->GetStringUTFChars(jproperty, nullptr); if (property == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } std::string property_value; bool retCode = db->GetProperty(cf_handle, property_name, &property_value); env->ReleaseStringUTFChars(jproperty, property); if (retCode) { return env->NewStringUTF(property_value.c_str()); } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::NotFound()); return nullptr; } /* * Class: org_rocksdb_RocksDB * Method: getMapProperty * Signature: (JJLjava/lang/String;I)Ljava/util/Map; */ jobject Java_org_rocksdb_RocksDB_getMapProperty( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jstring jproperty, jint jproperty_len) { const char* property = env->GetStringUTFChars(jproperty, nullptr); if (property == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } std::map property_value; bool retCode = db->GetMapProperty(cf_handle, property_name, &property_value); env->ReleaseStringUTFChars(jproperty, property); if (retCode) { return ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &property_value); } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::NotFound()); return nullptr; } /* * Class: org_rocksdb_RocksDB * Method: getLongProperty * Signature: (JJLjava/lang/String;I)J */ jlong Java_org_rocksdb_RocksDB_getLongProperty( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jstring jproperty, jint jproperty_len) { const char* property = env->GetStringUTFChars(jproperty, nullptr); if (property == nullptr) { // exception thrown: OutOfMemoryError return 0; } ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } uint64_t property_value; bool retCode = db->GetIntProperty(cf_handle, property_name, &property_value); env->ReleaseStringUTFChars(jproperty, property); if (retCode) { return property_value; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::NotFound()); return 0; } /* * Class: org_rocksdb_RocksDB * Method: resetStats * Signature: (J)V */ void Java_org_rocksdb_RocksDB_resetStats( JNIEnv *, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); db->ResetStats(); } /* * Class: org_rocksdb_RocksDB * Method: getAggregatedLongProperty * Signature: (JLjava/lang/String;I)J */ jlong Java_org_rocksdb_RocksDB_getAggregatedLongProperty( JNIEnv* env, jobject, jlong db_handle, jstring jproperty, jint jproperty_len) { const char* property = env->GetStringUTFChars(jproperty, nullptr); if (property == nullptr) { return 0; } ROCKSDB_NAMESPACE::Slice property_name(property, jproperty_len); auto* db = reinterpret_cast(db_handle); uint64_t property_value = 0; bool retCode = db->GetAggregatedIntProperty(property_name, &property_value); env->ReleaseStringUTFChars(jproperty, property); if (retCode) { return property_value; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::NotFound()); return 0; } /* * Class: org_rocksdb_RocksDB * Method: getApproximateSizes * Signature: (JJ[JB)[J */ jlongArray Java_org_rocksdb_RocksDB_getApproximateSizes( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlongArray jrange_slice_handles, jbyte jinclude_flags) { const jsize jlen = env->GetArrayLength(jrange_slice_handles); const size_t range_count = jlen / 2; jboolean jranges_is_copy = JNI_FALSE; jlong* jranges = env->GetLongArrayElements(jrange_slice_handles, &jranges_is_copy); if (jranges == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } auto ranges = std::unique_ptr( new ROCKSDB_NAMESPACE::Range[range_count]); size_t range_offset = 0; for (jsize i = 0; i < jlen; ++i) { auto* start = reinterpret_cast(jranges[i]); auto* limit = reinterpret_cast(jranges[++i]); ranges.get()[range_offset++] = ROCKSDB_NAMESPACE::Range(*start, *limit); } auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } auto sizes = std::unique_ptr(new uint64_t[range_count]); db->GetApproximateSizes(cf_handle, ranges.get(), static_cast(range_count), sizes.get(), static_cast(jinclude_flags)); // release LongArrayElements env->ReleaseLongArrayElements(jrange_slice_handles, jranges, JNI_ABORT); // prepare results auto results = std::unique_ptr(new jlong[range_count]); for (size_t i = 0; i < range_count; ++i) { results.get()[i] = static_cast(sizes.get()[i]); } const jsize jrange_count = jlen / 2; jlongArray jresults = env->NewLongArray(jrange_count); if (jresults == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetLongArrayRegion(jresults, 0, jrange_count, results.get()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresults); return nullptr; } return jresults; } /* * Class: org_rocksdb_RocksDB * Method: getApproximateMemTableStats * Signature: (JJJJ)[J */ jlongArray Java_org_rocksdb_RocksDB_getApproximateMemTableStats( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jstartHandle, jlong jlimitHandle) { auto* start = reinterpret_cast(jstartHandle); auto* limit = reinterpret_cast(jlimitHandle); const ROCKSDB_NAMESPACE::Range range(*start, *limit); auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } uint64_t count = 0; uint64_t sizes = 0; db->GetApproximateMemTableStats(cf_handle, range, &count, &sizes); // prepare results jlong results[2] = { static_cast(count), static_cast(sizes)}; const jsize jcount = static_cast(count); jlongArray jsizes = env->NewLongArray(jcount); if (jsizes == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetLongArrayRegion(jsizes, 0, jcount, results); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jsizes); return nullptr; } return jsizes; } /* * Class: org_rocksdb_RocksDB * Method: compactRange * Signature: (J[BI[BIJJ)V */ void Java_org_rocksdb_RocksDB_compactRange( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin, jint jbegin_len, jbyteArray jend, jint jend_len, jlong jcompact_range_opts_handle, jlong jcf_handle) { jboolean has_exception = JNI_FALSE; std::string str_begin; if (jbegin_len > 0) { str_begin = ROCKSDB_NAMESPACE::JniUtil::byteString( env, jbegin, jbegin_len, [](const char* str, const size_t len) { return std::string(str, len); }, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } } std::string str_end; if (jend_len > 0) { str_end = ROCKSDB_NAMESPACE::JniUtil::byteString( env, jend, jend_len, [](const char* str, const size_t len) { return std::string(str, len); }, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } } ROCKSDB_NAMESPACE::CompactRangeOptions* compact_range_opts = nullptr; if (jcompact_range_opts_handle == 0) { // NOTE: we DO own the pointer! compact_range_opts = new ROCKSDB_NAMESPACE::CompactRangeOptions(); } else { // NOTE: we do NOT own the pointer! compact_range_opts = reinterpret_cast( jcompact_range_opts_handle); } auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } ROCKSDB_NAMESPACE::Status s; if (jbegin_len > 0 || jend_len > 0) { const ROCKSDB_NAMESPACE::Slice begin(str_begin); const ROCKSDB_NAMESPACE::Slice end(str_end); s = db->CompactRange(*compact_range_opts, cf_handle, &begin, &end); } else { s = db->CompactRange(*compact_range_opts, cf_handle, nullptr, nullptr); } if (jcompact_range_opts_handle == 0) { delete compact_range_opts; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_RocksDB * Method: setOptions * Signature: (JJ[Ljava/lang/String;[Ljava/lang/String;)V */ void Java_org_rocksdb_RocksDB_setOptions( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jobjectArray jkeys, jobjectArray jvalues) { const jsize len = env->GetArrayLength(jkeys); assert(len == env->GetArrayLength(jvalues)); std::unordered_map options_map; for (jsize i = 0; i < len; i++) { jobject jobj_key = env->GetObjectArrayElement(jkeys, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException return; } jobject jobj_value = env->GetObjectArrayElement(jvalues, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jobj_key); return; } jboolean has_exception = JNI_FALSE; std::string s_key = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, reinterpret_cast(jobj_key), &has_exception); if (has_exception == JNI_TRUE) { // exception occurred env->DeleteLocalRef(jobj_value); env->DeleteLocalRef(jobj_key); return; } std::string s_value = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, reinterpret_cast(jobj_value), &has_exception); if (has_exception == JNI_TRUE) { // exception occurred env->DeleteLocalRef(jobj_value); env->DeleteLocalRef(jobj_key); return; } options_map[s_key] = s_value; env->DeleteLocalRef(jobj_key); env->DeleteLocalRef(jobj_value); } auto* db = reinterpret_cast(jdb_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto s = db->SetOptions(cf_handle, options_map); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: setDBOptions * Signature: (J[Ljava/lang/String;[Ljava/lang/String;)V */ void Java_org_rocksdb_RocksDB_setDBOptions( JNIEnv* env, jobject, jlong jdb_handle, jobjectArray jkeys, jobjectArray jvalues) { const jsize len = env->GetArrayLength(jkeys); assert(len == env->GetArrayLength(jvalues)); std::unordered_map options_map; for (jsize i = 0; i < len; i++) { jobject jobj_key = env->GetObjectArrayElement(jkeys, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException return; } jobject jobj_value = env->GetObjectArrayElement(jvalues, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jobj_key); return; } jboolean has_exception = JNI_FALSE; std::string s_key = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, reinterpret_cast(jobj_key), &has_exception); if (has_exception == JNI_TRUE) { // exception occurred env->DeleteLocalRef(jobj_value); env->DeleteLocalRef(jobj_key); return; } std::string s_value = ROCKSDB_NAMESPACE::JniUtil::copyStdString( env, reinterpret_cast(jobj_value), &has_exception); if (has_exception == JNI_TRUE) { // exception occurred env->DeleteLocalRef(jobj_value); env->DeleteLocalRef(jobj_key); return; } options_map[s_key] = s_value; env->DeleteLocalRef(jobj_key); env->DeleteLocalRef(jobj_value); } auto* db = reinterpret_cast(jdb_handle); auto s = db->SetDBOptions(options_map); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: compactFiles * Signature: (JJJ[Ljava/lang/String;IIJ)[Ljava/lang/String; */ jobjectArray Java_org_rocksdb_RocksDB_compactFiles( JNIEnv* env, jobject, jlong jdb_handle, jlong jcompaction_opts_handle, jlong jcf_handle, jobjectArray jinput_file_names, jint joutput_level, jint joutput_path_id, jlong jcompaction_job_info_handle) { jboolean has_exception = JNI_FALSE; const std::vector input_file_names = ROCKSDB_NAMESPACE::JniUtil::copyStrings(env, jinput_file_names, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return nullptr; } auto* compaction_opts = reinterpret_cast( jcompaction_opts_handle); auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } ROCKSDB_NAMESPACE::CompactionJobInfo* compaction_job_info = nullptr; if (jcompaction_job_info_handle != 0) { compaction_job_info = reinterpret_cast( jcompaction_job_info_handle); } std::vector output_file_names; auto s = db->CompactFiles(*compaction_opts, cf_handle, input_file_names, static_cast(joutput_level), static_cast(joutput_path_id), &output_file_names, compaction_job_info); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings(env, &output_file_names); } /* * Class: org_rocksdb_RocksDB * Method: cancelAllBackgroundWork * Signature: (JZ)V */ void Java_org_rocksdb_RocksDB_cancelAllBackgroundWork( JNIEnv*, jobject, jlong jdb_handle, jboolean jwait) { auto* db = reinterpret_cast(jdb_handle); rocksdb::CancelAllBackgroundWork(db, jwait); } /* * Class: org_rocksdb_RocksDB * Method: pauseBackgroundWork * Signature: (J)V */ void Java_org_rocksdb_RocksDB_pauseBackgroundWork( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->PauseBackgroundWork(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: continueBackgroundWork * Signature: (J)V */ void Java_org_rocksdb_RocksDB_continueBackgroundWork( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->ContinueBackgroundWork(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: enableAutoCompaction * Signature: (J[J)V */ void Java_org_rocksdb_RocksDB_enableAutoCompaction( JNIEnv* env, jobject, jlong jdb_handle, jlongArray jcf_handles) { auto* db = reinterpret_cast(jdb_handle); jboolean has_exception = JNI_FALSE; const std::vector cf_handles = ROCKSDB_NAMESPACE::JniUtil::fromJPointers< ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, jcf_handles, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } db->EnableAutoCompaction(cf_handles); } /* * Class: org_rocksdb_RocksDB * Method: numberLevels * Signature: (JJ)I */ jint Java_org_rocksdb_RocksDB_numberLevels( JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } return static_cast(db->NumberLevels(cf_handle)); } /* * Class: org_rocksdb_RocksDB * Method: maxMemCompactionLevel * Signature: (JJ)I */ jint Java_org_rocksdb_RocksDB_maxMemCompactionLevel( JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } return static_cast(db->MaxMemCompactionLevel(cf_handle)); } /* * Class: org_rocksdb_RocksDB * Method: level0StopWriteTrigger * Signature: (JJ)I */ jint Java_org_rocksdb_RocksDB_level0StopWriteTrigger( JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } return static_cast(db->Level0StopWriteTrigger(cf_handle)); } /* * Class: org_rocksdb_RocksDB * Method: getName * Signature: (J)Ljava/lang/String; */ jstring Java_org_rocksdb_RocksDB_getName( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); std::string name = db->GetName(); return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, false); } /* * Class: org_rocksdb_RocksDB * Method: getEnv * Signature: (J)J */ jlong Java_org_rocksdb_RocksDB_getEnv( JNIEnv*, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); return reinterpret_cast(db->GetEnv()); } /* * Class: org_rocksdb_RocksDB * Method: flush * Signature: (JJ[J)V */ void Java_org_rocksdb_RocksDB_flush( JNIEnv* env, jobject, jlong jdb_handle, jlong jflush_opts_handle, jlongArray jcf_handles) { auto* db = reinterpret_cast(jdb_handle); auto* flush_opts = reinterpret_cast(jflush_opts_handle); std::vector cf_handles; if (jcf_handles == nullptr) { cf_handles.push_back(db->DefaultColumnFamily()); } else { jboolean has_exception = JNI_FALSE; cf_handles = ROCKSDB_NAMESPACE::JniUtil::fromJPointers< ROCKSDB_NAMESPACE::ColumnFamilyHandle>(env, jcf_handles, &has_exception); if (has_exception) { // exception occurred return; } } auto s = db->Flush(*flush_opts, cf_handles); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: flushWal * Signature: (JZ)V */ void Java_org_rocksdb_RocksDB_flushWal( JNIEnv* env, jobject, jlong jdb_handle, jboolean jsync) { auto* db = reinterpret_cast(jdb_handle); auto s = db->FlushWAL(jsync == JNI_TRUE); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: syncWal * Signature: (J)V */ void Java_org_rocksdb_RocksDB_syncWal( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->SyncWAL(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: getLatestSequenceNumber * Signature: (J)V */ jlong Java_org_rocksdb_RocksDB_getLatestSequenceNumber( JNIEnv*, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); return db->GetLatestSequenceNumber(); } /* * Class: org_rocksdb_RocksDB * Method: setPreserveDeletesSequenceNumber * Signature: (JJ)Z */ jboolean JNICALL Java_org_rocksdb_RocksDB_setPreserveDeletesSequenceNumber( JNIEnv*, jobject, jlong jdb_handle, jlong jseq_number) { auto* db = reinterpret_cast(jdb_handle); if (db->SetPreserveDeletesSequenceNumber( static_cast(jseq_number))) { return JNI_TRUE; } else { return JNI_FALSE; } } /* * Class: org_rocksdb_RocksDB * Method: disableFileDeletions * Signature: (J)V */ void Java_org_rocksdb_RocksDB_disableFileDeletions( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::Status s = db->DisableFileDeletions(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: enableFileDeletions * Signature: (JZ)V */ void Java_org_rocksdb_RocksDB_enableFileDeletions( JNIEnv* env, jobject, jlong jdb_handle, jboolean jforce) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::Status s = db->EnableFileDeletions(jforce); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: getLiveFiles * Signature: (JZ)[Ljava/lang/String; */ jobjectArray Java_org_rocksdb_RocksDB_getLiveFiles( JNIEnv* env, jobject, jlong jdb_handle, jboolean jflush_memtable) { auto* db = reinterpret_cast(jdb_handle); std::vector live_files; uint64_t manifest_file_size = 0; auto s = db->GetLiveFiles( live_files, &manifest_file_size, jflush_memtable == JNI_TRUE); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } // append the manifest_file_size to the vector // for passing back to java live_files.push_back(std::to_string(manifest_file_size)); return ROCKSDB_NAMESPACE::JniUtil::toJavaStrings(env, &live_files); } /* * Class: org_rocksdb_RocksDB * Method: getSortedWalFiles * Signature: (J)[Lorg/rocksdb/LogFile; */ jobjectArray Java_org_rocksdb_RocksDB_getSortedWalFiles( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); std::vector> sorted_wal_files; auto s = db->GetSortedWalFiles(sorted_wal_files); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } // convert to Java type const jsize jlen = static_cast(sorted_wal_files.size()); jobjectArray jsorted_wal_files = env->NewObjectArray( jlen, ROCKSDB_NAMESPACE::LogFileJni::getJClass(env), nullptr); if(jsorted_wal_files == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } jsize i = 0; for (auto it = sorted_wal_files.begin(); it != sorted_wal_files.end(); ++it) { jobject jlog_file = ROCKSDB_NAMESPACE::LogFileJni::fromCppLogFile(env, it->get()); if (jlog_file == nullptr) { // exception occurred env->DeleteLocalRef(jsorted_wal_files); return nullptr; } env->SetObjectArrayElement(jsorted_wal_files, i++, jlog_file); if (env->ExceptionCheck()) { // exception occurred env->DeleteLocalRef(jlog_file); env->DeleteLocalRef(jsorted_wal_files); return nullptr; } env->DeleteLocalRef(jlog_file); } return jsorted_wal_files; } /* * Class: org_rocksdb_RocksDB * Method: getUpdatesSince * Signature: (JJ)J */ jlong Java_org_rocksdb_RocksDB_getUpdatesSince( JNIEnv* env, jobject, jlong jdb_handle, jlong jsequence_number) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::SequenceNumber sequence_number = static_cast(jsequence_number); std::unique_ptr iter; ROCKSDB_NAMESPACE::Status s = db->GetUpdatesSince(sequence_number, &iter); if (s.ok()) { return reinterpret_cast(iter.release()); } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } /* * Class: org_rocksdb_RocksDB * Method: deleteFile * Signature: (JLjava/lang/String;)V */ void Java_org_rocksdb_RocksDB_deleteFile( JNIEnv* env, jobject, jlong jdb_handle, jstring jname) { auto* db = reinterpret_cast(jdb_handle); jboolean has_exception = JNI_FALSE; std::string name = ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jname, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } db->DeleteFile(name); } /* * Class: org_rocksdb_RocksDB * Method: getLiveFilesMetaData * Signature: (J)[Lorg/rocksdb/LiveFileMetaData; */ jobjectArray Java_org_rocksdb_RocksDB_getLiveFilesMetaData( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); std::vector live_files_meta_data; db->GetLiveFilesMetaData(&live_files_meta_data); // convert to Java type const jsize jlen = static_cast(live_files_meta_data.size()); jobjectArray jlive_files_meta_data = env->NewObjectArray( jlen, ROCKSDB_NAMESPACE::LiveFileMetaDataJni::getJClass(env), nullptr); if(jlive_files_meta_data == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } jsize i = 0; for (auto it = live_files_meta_data.begin(); it != live_files_meta_data.end(); ++it) { jobject jlive_file_meta_data = ROCKSDB_NAMESPACE::LiveFileMetaDataJni::fromCppLiveFileMetaData(env, &(*it)); if (jlive_file_meta_data == nullptr) { // exception occurred env->DeleteLocalRef(jlive_files_meta_data); return nullptr; } env->SetObjectArrayElement(jlive_files_meta_data, i++, jlive_file_meta_data); if (env->ExceptionCheck()) { // exception occurred env->DeleteLocalRef(jlive_file_meta_data); env->DeleteLocalRef(jlive_files_meta_data); return nullptr; } env->DeleteLocalRef(jlive_file_meta_data); } return jlive_files_meta_data; } /* * Class: org_rocksdb_RocksDB * Method: getColumnFamilyMetaData * Signature: (JJ)Lorg/rocksdb/ColumnFamilyMetaData; */ jobject Java_org_rocksdb_RocksDB_getColumnFamilyMetaData( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } ROCKSDB_NAMESPACE::ColumnFamilyMetaData cf_metadata; db->GetColumnFamilyMetaData(cf_handle, &cf_metadata); return ROCKSDB_NAMESPACE::ColumnFamilyMetaDataJni:: fromCppColumnFamilyMetaData(env, &cf_metadata); } /* * Class: org_rocksdb_RocksDB * Method: ingestExternalFile * Signature: (JJ[Ljava/lang/String;IJ)V */ void Java_org_rocksdb_RocksDB_ingestExternalFile( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jobjectArray jfile_path_list, jint jfile_path_list_len, jlong jingest_external_file_options_handle) { jboolean has_exception = JNI_FALSE; std::vector file_path_list = ROCKSDB_NAMESPACE::JniUtil::copyStrings( env, jfile_path_list, jfile_path_list_len, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return; } auto* db = reinterpret_cast(jdb_handle); auto* column_family = reinterpret_cast(jcf_handle); auto* ifo = reinterpret_cast( jingest_external_file_options_handle); ROCKSDB_NAMESPACE::Status s = db->IngestExternalFile(column_family, file_path_list, *ifo); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: verifyChecksum * Signature: (J)V */ void Java_org_rocksdb_RocksDB_verifyChecksum( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->VerifyChecksum(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: getDefaultColumnFamily * Signature: (J)J */ jlong Java_org_rocksdb_RocksDB_getDefaultColumnFamily( JNIEnv*, jobject, jlong jdb_handle) { auto* db_handle = reinterpret_cast(jdb_handle); auto* cf_handle = db_handle->DefaultColumnFamily(); return reinterpret_cast(cf_handle); } /* * Class: org_rocksdb_RocksDB * Method: getPropertiesOfAllTables * Signature: (JJ)Ljava/util/Map; */ jobject Java_org_rocksdb_RocksDB_getPropertiesOfAllTables( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } ROCKSDB_NAMESPACE::TablePropertiesCollection table_properties_collection; auto s = db->GetPropertiesOfAllTables(cf_handle, &table_properties_collection); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } // convert to Java type jobject jhash_map = ROCKSDB_NAMESPACE::HashMapJni::construct( env, static_cast(table_properties_collection.size())); if (jhash_map == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< const std::string, const std::shared_ptr, jobject, jobject> fn_map_kv = [env](const std::pair>& kv) { jstring jkey = ROCKSDB_NAMESPACE::JniUtil::toJavaString( env, &(kv.first), false); if (env->ExceptionCheck()) { // an error occurred return std::unique_ptr>(nullptr); } jobject jtable_properties = ROCKSDB_NAMESPACE::TablePropertiesJni::fromCppTableProperties( env, *(kv.second.get())); if (jtable_properties == nullptr) { // an error occurred env->DeleteLocalRef(jkey); return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair( static_cast(jkey), static_cast(jtable_properties))); }; if (!ROCKSDB_NAMESPACE::HashMapJni::putAll( env, jhash_map, table_properties_collection.begin(), table_properties_collection.end(), fn_map_kv)) { // exception occurred return nullptr; } return jhash_map; } /* * Class: org_rocksdb_RocksDB * Method: getPropertiesOfTablesInRange * Signature: (JJ[J)Ljava/util/Map; */ jobject Java_org_rocksdb_RocksDB_getPropertiesOfTablesInRange( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlongArray jrange_slice_handles) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } const jsize jlen = env->GetArrayLength(jrange_slice_handles); jboolean jrange_slice_handles_is_copy = JNI_FALSE; jlong *jrange_slice_handle = env->GetLongArrayElements( jrange_slice_handles, &jrange_slice_handles_is_copy); if (jrange_slice_handle == nullptr) { // exception occurred return nullptr; } const size_t ranges_len = static_cast(jlen / 2); auto ranges = std::unique_ptr( new ROCKSDB_NAMESPACE::Range[ranges_len]); for (jsize i = 0, j = 0; i < jlen; ++i) { auto* start = reinterpret_cast(jrange_slice_handle[i]); auto* limit = reinterpret_cast(jrange_slice_handle[++i]); ranges[j++] = ROCKSDB_NAMESPACE::Range(*start, *limit); } ROCKSDB_NAMESPACE::TablePropertiesCollection table_properties_collection; auto s = db->GetPropertiesOfTablesInRange( cf_handle, ranges.get(), ranges_len, &table_properties_collection); if (!s.ok()) { // error occurred env->ReleaseLongArrayElements(jrange_slice_handles, jrange_slice_handle, JNI_ABORT); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } // cleanup env->ReleaseLongArrayElements(jrange_slice_handles, jrange_slice_handle, JNI_ABORT); return jrange_slice_handles; } /* * Class: org_rocksdb_RocksDB * Method: suggestCompactRange * Signature: (JJ)[J */ jlongArray Java_org_rocksdb_RocksDB_suggestCompactRange( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } auto* begin = new ROCKSDB_NAMESPACE::Slice(); auto* end = new ROCKSDB_NAMESPACE::Slice(); auto s = db->SuggestCompactRange(cf_handle, begin, end); if (!s.ok()) { // error occurred delete begin; delete end; ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } jlongArray jslice_handles = env->NewLongArray(2); if (jslice_handles == nullptr) { // exception thrown: OutOfMemoryError delete begin; delete end; return nullptr; } jlong slice_handles[2]; slice_handles[0] = reinterpret_cast(begin); slice_handles[1] = reinterpret_cast(end); env->SetLongArrayRegion(jslice_handles, 0, 2, slice_handles); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException delete begin; delete end; env->DeleteLocalRef(jslice_handles); return nullptr; } return jslice_handles; } /* * Class: org_rocksdb_RocksDB * Method: promoteL0 * Signature: (JJI)V */ void Java_org_rocksdb_RocksDB_promoteL0( JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle, jint jtarget_level) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; if (jcf_handle == 0) { cf_handle = db->DefaultColumnFamily(); } else { cf_handle = reinterpret_cast(jcf_handle); } db->PromoteL0(cf_handle, static_cast(jtarget_level)); } /* * Class: org_rocksdb_RocksDB * Method: startTrace * Signature: (JJJ)V */ void Java_org_rocksdb_RocksDB_startTrace( JNIEnv* env, jobject, jlong jdb_handle, jlong jmax_trace_file_size, jlong jtrace_writer_jnicallback_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::TraceOptions trace_options; trace_options.max_trace_file_size = static_cast(jmax_trace_file_size); // transfer ownership of trace writer from Java to C++ auto trace_writer = std::unique_ptr( reinterpret_cast( jtrace_writer_jnicallback_handle)); auto s = db->StartTrace(trace_options, std::move(trace_writer)); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: endTrace * Signature: (J)V */ JNIEXPORT void JNICALL Java_org_rocksdb_RocksDB_endTrace( JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->EndTrace(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_RocksDB * Method: destroyDB * Signature: (Ljava/lang/String;J)V */ void Java_org_rocksdb_RocksDB_destroyDB( JNIEnv* env, jclass, jstring jdb_path, jlong joptions_handle) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return; } auto* options = reinterpret_cast(joptions_handle); if (options == nullptr) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument("Invalid Options.")); } ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DestroyDB(db_path, *options); env->ReleaseStringUTFChars(jdb_path, db_path); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } bool get_slice_helper(JNIEnv* env, jobjectArray ranges, jsize index, std::unique_ptr& slice, std::vector>& ranges_to_free) { jobject jArray = env->GetObjectArrayElement(ranges, index); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException return false; } if (jArray == nullptr) { return true; } jbyteArray jba = reinterpret_cast(jArray); jsize len_ba = env->GetArrayLength(jba); ranges_to_free.push_back(std::unique_ptr(new jbyte[len_ba])); env->GetByteArrayRegion(jba, 0, len_ba, ranges_to_free.back().get()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jArray); return false; } env->DeleteLocalRef(jArray); slice.reset(new ROCKSDB_NAMESPACE::Slice( reinterpret_cast(ranges_to_free.back().get()), len_ba)); return true; } /* * Class: org_rocksdb_RocksDB * Method: deleteFilesInRanges * Signature: (JJLjava/util/List;Z)V */ JNIEXPORT void JNICALL Java_org_rocksdb_RocksDB_deleteFilesInRanges( JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jcf_handle, jobjectArray ranges, jboolean include_end) { jsize length = env->GetArrayLength(ranges); std::vector rangesVector; std::vector> slices; std::vector> ranges_to_free; for (jsize i = 0; (i + 1) < length; i += 2) { slices.push_back(std::unique_ptr()); if (!get_slice_helper(env, ranges, i, slices.back(), ranges_to_free)) { // exception thrown return; } slices.push_back(std::unique_ptr()); if (!get_slice_helper(env, ranges, i + 1, slices.back(), ranges_to_free)) { // exception thrown return; } rangesVector.push_back(ROCKSDB_NAMESPACE::RangePtr( slices[slices.size() - 2].get(), slices[slices.size() - 1].get())); } auto* db = reinterpret_cast(jdb_handle); auto* column_family = reinterpret_cast(jcf_handle); ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DeleteFilesInRanges( db, column_family == nullptr ? db->DefaultColumnFamily() : column_family, rangesVector.data(), rangesVector.size(), include_end); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } rocksdb-6.11.4/java/rocksjni/slice.cc000066400000000000000000000270741370372246700174300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Slice. #include #include #include #include #include "include/org_rocksdb_AbstractSlice.h" #include "include/org_rocksdb_DirectSlice.h" #include "include/org_rocksdb_Slice.h" #include "rocksdb/slice.h" #include "rocksjni/portal.h" // /* * Class: org_rocksdb_Slice * Method: createNewSlice0 * Signature: ([BI)J */ jlong Java_org_rocksdb_Slice_createNewSlice0(JNIEnv* env, jclass /*jcls*/, jbyteArray data, jint offset) { const jsize dataSize = env->GetArrayLength(data); const int len = dataSize - offset; // NOTE: buf will be deleted in the Java_org_rocksdb_Slice_disposeInternalBuf // method jbyte* buf = new jbyte[len]; env->GetByteArrayRegion(data, offset, len, buf); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException return 0; } const auto* slice = new ROCKSDB_NAMESPACE::Slice((const char*)buf, len); return reinterpret_cast(slice); } /* * Class: org_rocksdb_Slice * Method: createNewSlice1 * Signature: ([B)J */ jlong Java_org_rocksdb_Slice_createNewSlice1(JNIEnv* env, jclass /*jcls*/, jbyteArray data) { jbyte* ptrData = env->GetByteArrayElements(data, nullptr); if (ptrData == nullptr) { // exception thrown: OutOfMemoryError return 0; } const int len = env->GetArrayLength(data) + 1; // NOTE: buf will be deleted in the Java_org_rocksdb_Slice_disposeInternalBuf // method char* buf = new char[len]; memcpy(buf, ptrData, len - 1); buf[len - 1] = '\0'; const auto* slice = new ROCKSDB_NAMESPACE::Slice(buf, len - 1); env->ReleaseByteArrayElements(data, ptrData, JNI_ABORT); return reinterpret_cast(slice); } /* * Class: org_rocksdb_Slice * Method: data0 * Signature: (J)[B */ jbyteArray Java_org_rocksdb_Slice_data0(JNIEnv* env, jobject /*jobj*/, jlong handle) { const auto* slice = reinterpret_cast(handle); const jsize len = static_cast(slice->size()); const jbyteArray data = env->NewByteArray(len); if (data == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion( data, 0, len, const_cast(reinterpret_cast(slice->data()))); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(data); return nullptr; } return data; } /* * Class: org_rocksdb_Slice * Method: clear0 * Signature: (JZJ)V */ void Java_org_rocksdb_Slice_clear0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jboolean shouldRelease, jlong internalBufferOffset) { auto* slice = reinterpret_cast(handle); if (shouldRelease == JNI_TRUE) { const char* buf = slice->data_ - internalBufferOffset; delete[] buf; } slice->clear(); } /* * Class: org_rocksdb_Slice * Method: removePrefix0 * Signature: (JI)V */ void Java_org_rocksdb_Slice_removePrefix0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jint length) { auto* slice = reinterpret_cast(handle); slice->remove_prefix(length); } /* * Class: org_rocksdb_Slice * Method: disposeInternalBuf * Signature: (JJ)V */ void Java_org_rocksdb_Slice_disposeInternalBuf(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jlong internalBufferOffset) { const auto* slice = reinterpret_cast(handle); const char* buf = slice->data_ - internalBufferOffset; delete[] buf; } // // (data_addr); const auto* slice = new ROCKSDB_NAMESPACE::Slice(ptrData, length); return reinterpret_cast(slice); } /* * Class: org_rocksdb_DirectSlice * Method: createNewDirectSlice1 * Signature: (Ljava/nio/ByteBuffer;)J */ jlong Java_org_rocksdb_DirectSlice_createNewDirectSlice1(JNIEnv* env, jclass /*jcls*/, jobject data) { void* data_addr = env->GetDirectBufferAddress(data); if (data_addr == nullptr) { // error: memory region is undefined, given object is not a direct // java.nio.Buffer, or JNI access to direct buffers is not supported by JVM ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Could not access DirectBuffer")); return 0; } const auto* ptrData = reinterpret_cast(data_addr); const auto* slice = new ROCKSDB_NAMESPACE::Slice(ptrData); return reinterpret_cast(slice); } /* * Class: org_rocksdb_DirectSlice * Method: data0 * Signature: (J)Ljava/lang/Object; */ jobject Java_org_rocksdb_DirectSlice_data0(JNIEnv* env, jobject /*jobj*/, jlong handle) { const auto* slice = reinterpret_cast(handle); return env->NewDirectByteBuffer(const_cast(slice->data()), slice->size()); } /* * Class: org_rocksdb_DirectSlice * Method: get0 * Signature: (JI)B */ jbyte Java_org_rocksdb_DirectSlice_get0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jint offset) { const auto* slice = reinterpret_cast(handle); return (*slice)[offset]; } /* * Class: org_rocksdb_DirectSlice * Method: clear0 * Signature: (JZJ)V */ void Java_org_rocksdb_DirectSlice_clear0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jboolean shouldRelease, jlong internalBufferOffset) { auto* slice = reinterpret_cast(handle); if (shouldRelease == JNI_TRUE) { const char* buf = slice->data_ - internalBufferOffset; delete[] buf; } slice->clear(); } /* * Class: org_rocksdb_DirectSlice * Method: removePrefix0 * Signature: (JI)V */ void Java_org_rocksdb_DirectSlice_removePrefix0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jint length) { auto* slice = reinterpret_cast(handle); slice->remove_prefix(length); } /* * Class: org_rocksdb_DirectSlice * Method: disposeInternalBuf * Signature: (JJ)V */ void Java_org_rocksdb_DirectSlice_disposeInternalBuf( JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jlong internalBufferOffset) { const auto* slice = reinterpret_cast(handle); const char* buf = slice->data_ - internalBufferOffset; delete[] buf; } // rocksdb-6.11.4/java/rocksjni/snapshot.cc000066400000000000000000000016221370372246700201570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++. #include #include #include #include "include/org_rocksdb_Snapshot.h" #include "rocksdb/db.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_Snapshot * Method: getSequenceNumber * Signature: (J)J */ jlong Java_org_rocksdb_Snapshot_getSequenceNumber(JNIEnv* /*env*/, jobject /*jobj*/, jlong jsnapshot_handle) { auto* snapshot = reinterpret_cast(jsnapshot_handle); return snapshot->GetSequenceNumber(); } rocksdb-6.11.4/java/rocksjni/sst_file_manager.cc000066400000000000000000000213071370372246700216240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::SstFileManager methods // from Java side. #include #include #include "include/org_rocksdb_SstFileManager.h" #include "rocksdb/sst_file_manager.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_SstFileManager * Method: newSstFileManager * Signature: (JJJDJ)J */ jlong Java_org_rocksdb_SstFileManager_newSstFileManager( JNIEnv* jnienv, jclass /*jcls*/, jlong jenv_handle, jlong jlogger_handle, jlong jrate_bytes, jdouble jmax_trash_db_ratio, jlong jmax_delete_chunk_bytes) { auto* env = reinterpret_cast(jenv_handle); ROCKSDB_NAMESPACE::Status s; ROCKSDB_NAMESPACE::SstFileManager* sst_file_manager = nullptr; if (jlogger_handle != 0) { auto* sptr_logger = reinterpret_cast*>( jlogger_handle); sst_file_manager = ROCKSDB_NAMESPACE::NewSstFileManager( env, *sptr_logger, "", jrate_bytes, true, &s, jmax_trash_db_ratio, jmax_delete_chunk_bytes); } else { sst_file_manager = ROCKSDB_NAMESPACE::NewSstFileManager( env, nullptr, "", jrate_bytes, true, &s, jmax_trash_db_ratio, jmax_delete_chunk_bytes); } if (!s.ok()) { if (sst_file_manager != nullptr) { delete sst_file_manager; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(jnienv, s); } auto* sptr_sst_file_manager = new std::shared_ptr(sst_file_manager); return reinterpret_cast(sptr_sst_file_manager); } /* * Class: org_rocksdb_SstFileManager * Method: setMaxAllowedSpaceUsage * Signature: (JJ)V */ void Java_org_rocksdb_SstFileManager_setMaxAllowedSpaceUsage( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jmax_allowed_space) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); sptr_sst_file_manager->get()->SetMaxAllowedSpaceUsage(jmax_allowed_space); } /* * Class: org_rocksdb_SstFileManager * Method: setCompactionBufferSize * Signature: (JJ)V */ void Java_org_rocksdb_SstFileManager_setCompactionBufferSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jcompaction_buffer_size) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); sptr_sst_file_manager->get()->SetCompactionBufferSize( jcompaction_buffer_size); } /* * Class: org_rocksdb_SstFileManager * Method: isMaxAllowedSpaceReached * Signature: (J)Z */ jboolean Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReached( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); return sptr_sst_file_manager->get()->IsMaxAllowedSpaceReached(); } /* * Class: org_rocksdb_SstFileManager * Method: isMaxAllowedSpaceReachedIncludingCompactions * Signature: (J)Z */ jboolean Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReachedIncludingCompactions( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); return sptr_sst_file_manager->get() ->IsMaxAllowedSpaceReachedIncludingCompactions(); } /* * Class: org_rocksdb_SstFileManager * Method: getTotalSize * Signature: (J)J */ jlong Java_org_rocksdb_SstFileManager_getTotalSize(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); return sptr_sst_file_manager->get()->GetTotalSize(); } /* * Class: org_rocksdb_SstFileManager * Method: getTrackedFiles * Signature: (J)Ljava/util/Map; */ jobject Java_org_rocksdb_SstFileManager_getTrackedFiles(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); auto tracked_files = sptr_sst_file_manager->get()->GetTrackedFiles(); // TODO(AR) could refactor to share code with // ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, tracked_files); const jobject jtracked_files = ROCKSDB_NAMESPACE::HashMapJni::construct( env, static_cast(tracked_files.size())); if (jtracked_files == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV fn_map_kv = [env](const std::pair& pair) { const jstring jtracked_file_path = env->NewStringUTF(pair.first.c_str()); if (jtracked_file_path == nullptr) { // an error occurred return std::unique_ptr>(nullptr); } const jobject jtracked_file_size = ROCKSDB_NAMESPACE::LongJni::valueOf(env, pair.second); if (jtracked_file_size == nullptr) { // an error occurred return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair(jtracked_file_path, jtracked_file_size)); }; if (!ROCKSDB_NAMESPACE::HashMapJni::putAll(env, jtracked_files, tracked_files.begin(), tracked_files.end(), fn_map_kv)) { // exception occcurred return nullptr; } return jtracked_files; } /* * Class: org_rocksdb_SstFileManager * Method: getDeleteRateBytesPerSecond * Signature: (J)J */ jlong Java_org_rocksdb_SstFileManager_getDeleteRateBytesPerSecond( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); return sptr_sst_file_manager->get()->GetDeleteRateBytesPerSecond(); } /* * Class: org_rocksdb_SstFileManager * Method: setDeleteRateBytesPerSecond * Signature: (JJ)V */ void Java_org_rocksdb_SstFileManager_setDeleteRateBytesPerSecond( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jdelete_rate) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); sptr_sst_file_manager->get()->SetDeleteRateBytesPerSecond(jdelete_rate); } /* * Class: org_rocksdb_SstFileManager * Method: getMaxTrashDBRatio * Signature: (J)D */ jdouble Java_org_rocksdb_SstFileManager_getMaxTrashDBRatio(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); return sptr_sst_file_manager->get()->GetMaxTrashDBRatio(); } /* * Class: org_rocksdb_SstFileManager * Method: setMaxTrashDBRatio * Signature: (JD)V */ void Java_org_rocksdb_SstFileManager_setMaxTrashDBRatio(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jdouble jratio) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); sptr_sst_file_manager->get()->SetMaxTrashDBRatio(jratio); } /* * Class: org_rocksdb_SstFileManager * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_SstFileManager_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( jhandle); delete sptr_sst_file_manager; } rocksdb-6.11.4/java/rocksjni/sst_file_reader_iterator.cc000066400000000000000000000224521370372246700233670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::Iterator methods from Java side. #include #include #include #include "include/org_rocksdb_SstFileReaderIterator.h" #include "rocksdb/iterator.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_SstFileReaderIterator * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_SstFileReaderIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); assert(it != nullptr); delete it; } /* * Class: org_rocksdb_SstFileReaderIterator * Method: isValid0 * Signature: (J)Z */ jboolean Java_org_rocksdb_SstFileReaderIterator_isValid0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle)->Valid(); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: seekToFirst0 * Signature: (J)V */ void Java_org_rocksdb_SstFileReaderIterator_seekToFirst0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToFirst(); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: seekToLast0 * Signature: (J)V */ void Java_org_rocksdb_SstFileReaderIterator_seekToLast0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToLast(); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: next0 * Signature: (J)V */ void Java_org_rocksdb_SstFileReaderIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: prev0 * Signature: (J)V */ void Java_org_rocksdb_SstFileReaderIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Prev(); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: seek0 * Signature: (J[BI)V */ void Java_org_rocksdb_SstFileReaderIterator_seek0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { jbyte* target = env->GetByteArrayElements(jtarget, nullptr); if (target == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), jtarget_len); auto* it = reinterpret_cast(handle); it->Seek(target_slice); env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: seekForPrev0 * Signature: (J[BI)V */ void Java_org_rocksdb_SstFileReaderIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { jbyte* target = env->GetByteArrayElements(jtarget, nullptr); if (target == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), jtarget_len); auto* it = reinterpret_cast(handle); it->SeekForPrev(target_slice); env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: status0 * Signature: (J)V */ void Java_org_rocksdb_SstFileReaderIterator_status0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->status(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: key0 * Signature: (J)[B */ jbyteArray Java_org_rocksdb_SstFileReaderIterator_key0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice key_slice = it->key(); jbyteArray jkey = env->NewByteArray(static_cast(key_slice.size())); if (jkey == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion( jkey, 0, static_cast(key_slice.size()), const_cast(reinterpret_cast(key_slice.data()))); return jkey; } /* * Class: org_rocksdb_SstFileReaderIterator * Method: value0 * Signature: (J)[B */ jbyteArray Java_org_rocksdb_SstFileReaderIterator_value0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice value_slice = it->value(); jbyteArray jkeyValue = env->NewByteArray(static_cast(value_slice.size())); if (jkeyValue == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion(jkeyValue, 0, static_cast(value_slice.size()), const_cast(reinterpret_cast(value_slice.data()))); return jkeyValue; } /* * Class: org_rocksdb_SstFileReaderIterator * Method: keyDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ jint Java_org_rocksdb_SstFileReaderIterator_keyDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice key_slice = it->key(); return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, key_slice, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: valueDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ jint Java_org_rocksdb_SstFileReaderIterator_valueDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice value_slice = it->value(); return ROCKSDB_NAMESPACE::JniUtil::copyToDirect(env, value_slice, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: seekDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ void Java_org_rocksdb_SstFileReaderIterator_seekDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { it->Seek(target_slice); }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek, env, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: seekForPrevDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ void Java_org_rocksdb_SstFileReaderIterator_seekForPrevDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); auto seekPrev = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { it->SeekForPrev(target_slice); }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seekPrev, env, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_SstFileReaderIterator * Method: refresh0 * Signature: (J)V */ void Java_org_rocksdb_SstFileReaderIterator_refresh0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->Refresh(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } rocksdb-6.11.4/java/rocksjni/sst_file_readerjni.cc000066400000000000000000000100361370372246700221520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::SstFileReader methods // from Java side. #include #include #include "include/org_rocksdb_SstFileReader.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/sst_file_reader.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_SstFileReader * Method: newSstFileReader * Signature: (J)J */ jlong Java_org_rocksdb_SstFileReader_newSstFileReader(JNIEnv * /*env*/, jclass /*jcls*/, jlong joptions) { auto *options = reinterpret_cast(joptions); ROCKSDB_NAMESPACE::SstFileReader *sst_file_reader = new ROCKSDB_NAMESPACE::SstFileReader(*options); return reinterpret_cast(sst_file_reader); } /* * Class: org_rocksdb_SstFileReader * Method: open * Signature: (JLjava/lang/String;)V */ void Java_org_rocksdb_SstFileReader_open(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jstring jfile_path) { const char *file_path = env->GetStringUTFChars(jfile_path, nullptr); if (file_path == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Open( file_path); env->ReleaseStringUTFChars(jfile_path, file_path); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileReader * Method: newIterator * Signature: (JJ)J */ jlong Java_org_rocksdb_SstFileReader_newIterator(JNIEnv * /*env*/, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle) { auto *sst_file_reader = reinterpret_cast(jhandle); auto *read_options = reinterpret_cast(jread_options_handle); return reinterpret_cast(sst_file_reader->NewIterator(*read_options)); } /* * Class: org_rocksdb_SstFileReader * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_SstFileReader_disposeInternal(JNIEnv * /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); } /* * Class: org_rocksdb_SstFileReader * Method: verifyChecksum * Signature: (J)V */ void Java_org_rocksdb_SstFileReader_verifyChecksum(JNIEnv *env, jobject /*jobj*/, jlong jhandle) { auto *sst_file_reader = reinterpret_cast(jhandle); auto s = sst_file_reader->VerifyChecksum(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileReader * Method: getTableProperties * Signature: (J)J */ jobject Java_org_rocksdb_SstFileReader_getTableProperties(JNIEnv *env, jobject /*jobj*/, jlong jhandle) { auto *sst_file_reader = reinterpret_cast(jhandle); std::shared_ptr tp = sst_file_reader->GetTableProperties(); jobject jtable_properties = ROCKSDB_NAMESPACE::TablePropertiesJni::fromCppTableProperties( env, *(tp.get())); return jtable_properties; } rocksdb-6.11.4/java/rocksjni/sst_file_writerjni.cc000066400000000000000000000252201370372246700222250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling C++ ROCKSDB_NAMESPACE::SstFileWriter methods // from Java side. #include #include #include "include/org_rocksdb_SstFileWriter.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/sst_file_writer.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_SstFileWriter * Method: newSstFileWriter * Signature: (JJJB)J */ jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJJB( JNIEnv * /*env*/, jclass /*jcls*/, jlong jenvoptions, jlong joptions, jlong jcomparator_handle, jbyte jcomparator_type) { ROCKSDB_NAMESPACE::Comparator *comparator = nullptr; switch (jcomparator_type) { // JAVA_COMPARATOR case 0x0: comparator = reinterpret_cast( jcomparator_handle); break; // JAVA_NATIVE_COMPARATOR_WRAPPER case 0x1: comparator = reinterpret_cast(jcomparator_handle); break; } auto *env_options = reinterpret_cast(jenvoptions); auto *options = reinterpret_cast(joptions); ROCKSDB_NAMESPACE::SstFileWriter *sst_file_writer = new ROCKSDB_NAMESPACE::SstFileWriter(*env_options, *options, comparator); return reinterpret_cast(sst_file_writer); } /* * Class: org_rocksdb_SstFileWriter * Method: newSstFileWriter * Signature: (JJ)J */ jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJ(JNIEnv * /*env*/, jclass /*jcls*/, jlong jenvoptions, jlong joptions) { auto *env_options = reinterpret_cast(jenvoptions); auto *options = reinterpret_cast(joptions); ROCKSDB_NAMESPACE::SstFileWriter *sst_file_writer = new ROCKSDB_NAMESPACE::SstFileWriter(*env_options, *options); return reinterpret_cast(sst_file_writer); } /* * Class: org_rocksdb_SstFileWriter * Method: open * Signature: (JLjava/lang/String;)V */ void Java_org_rocksdb_SstFileWriter_open(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jstring jfile_path) { const char *file_path = env->GetStringUTFChars(jfile_path, nullptr); if (file_path == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Open( file_path); env->ReleaseStringUTFChars(jfile_path, file_path); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: put * Signature: (JJJ)V */ void Java_org_rocksdb_SstFileWriter_put__JJJ(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jlong jkey_handle, jlong jvalue_handle) { auto *key_slice = reinterpret_cast(jkey_handle); auto *value_slice = reinterpret_cast(jvalue_handle); ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Put( *key_slice, *value_slice); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: put * Signature: (JJJ)V */ void Java_org_rocksdb_SstFileWriter_put__J_3B_3B(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jbyteArray jval) { jbyte *key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), env->GetArrayLength(jkey)); jbyte *value = env->GetByteArrayElements(jval, nullptr); if (value == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); return; } ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), env->GetArrayLength(jval)); ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Put( key_slice, value_slice); env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); env->ReleaseByteArrayElements(jval, value, JNI_ABORT); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V */ void Java_org_rocksdb_SstFileWriter_putDirect(JNIEnv *env, jobject /*jdb*/, jlong jdb_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len) { auto *writer = reinterpret_cast(jdb_handle); auto put = [&env, &writer](ROCKSDB_NAMESPACE::Slice &key, ROCKSDB_NAMESPACE::Slice &value) { ROCKSDB_NAMESPACE::Status s = writer->Put(key, value); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); }; ROCKSDB_NAMESPACE::JniUtil::kv_op_direct(put, env, jkey, jkey_off, jkey_len, jval, jval_off, jval_len); } /* * Class: org_rocksdb_SstFileWriter * Method: fileSize * Signature: (J)J */ jlong Java_org_rocksdb_SstFileWriter_fileSize(JNIEnv * /*env*/, jobject /*jdb*/, jlong jdb_handle) { auto *writer = reinterpret_cast(jdb_handle); return static_cast(writer->FileSize()); } /* * Class: org_rocksdb_SstFileWriter * Method: merge * Signature: (JJJ)V */ void Java_org_rocksdb_SstFileWriter_merge__JJJ(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jlong jkey_handle, jlong jvalue_handle) { auto *key_slice = reinterpret_cast(jkey_handle); auto *value_slice = reinterpret_cast(jvalue_handle); ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Merge( *key_slice, *value_slice); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: merge * Signature: (J[B[B)V */ void Java_org_rocksdb_SstFileWriter_merge__J_3B_3B(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jbyteArray jval) { jbyte *key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), env->GetArrayLength(jkey)); jbyte *value = env->GetByteArrayElements(jval, nullptr); if (value == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); return; } ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), env->GetArrayLength(jval)); ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Merge( key_slice, value_slice); env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); env->ReleaseByteArrayElements(jval, value, JNI_ABORT); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: delete * Signature: (JJJ)V */ void Java_org_rocksdb_SstFileWriter_delete__J_3B(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey) { jbyte *key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), env->GetArrayLength(jkey)); ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Delete( key_slice); env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: delete * Signature: (JJJ)V */ void Java_org_rocksdb_SstFileWriter_delete__JJ(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jlong jkey_handle) { auto *key_slice = reinterpret_cast(jkey_handle); ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Delete( *key_slice); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: finish * Signature: (J)V */ void Java_org_rocksdb_SstFileWriter_finish(JNIEnv *env, jobject /*jobj*/, jlong jhandle) { ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Finish(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_SstFileWriter * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_SstFileWriter_disposeInternal(JNIEnv * /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); } rocksdb-6.11.4/java/rocksjni/statistics.cc000066400000000000000000000202431370372246700205120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::Statistics methods from Java side. #include #include #include #include "include/org_rocksdb_Statistics.h" #include "rocksdb/statistics.h" #include "rocksjni/portal.h" #include "rocksjni/statisticsjni.h" /* * Class: org_rocksdb_Statistics * Method: newStatistics * Signature: ()J */ jlong Java_org_rocksdb_Statistics_newStatistics__( JNIEnv* env, jclass jcls) { return Java_org_rocksdb_Statistics_newStatistics___3BJ( env, jcls, nullptr, 0); } /* * Class: org_rocksdb_Statistics * Method: newStatistics * Signature: (J)J */ jlong Java_org_rocksdb_Statistics_newStatistics__J( JNIEnv* env, jclass jcls, jlong jother_statistics_handle) { return Java_org_rocksdb_Statistics_newStatistics___3BJ( env, jcls, nullptr, jother_statistics_handle); } /* * Class: org_rocksdb_Statistics * Method: newStatistics * Signature: ([B)J */ jlong Java_org_rocksdb_Statistics_newStatistics___3B( JNIEnv* env, jclass jcls, jbyteArray jhistograms) { return Java_org_rocksdb_Statistics_newStatistics___3BJ( env, jcls, jhistograms, 0); } /* * Class: org_rocksdb_Statistics * Method: newStatistics * Signature: ([BJ)J */ jlong Java_org_rocksdb_Statistics_newStatistics___3BJ( JNIEnv* env, jclass, jbyteArray jhistograms, jlong jother_statistics_handle) { std::shared_ptr* pSptr_other_statistics = nullptr; if (jother_statistics_handle > 0) { pSptr_other_statistics = reinterpret_cast*>( jother_statistics_handle); } std::set histograms; if (jhistograms != nullptr) { const jsize len = env->GetArrayLength(jhistograms); if (len > 0) { jbyte* jhistogram = env->GetByteArrayElements(jhistograms, nullptr); if (jhistogram == nullptr) { // exception thrown: OutOfMemoryError return 0; } for (jsize i = 0; i < len; i++) { const ROCKSDB_NAMESPACE::Histograms histogram = ROCKSDB_NAMESPACE::HistogramTypeJni::toCppHistograms(jhistogram[i]); histograms.emplace(histogram); } env->ReleaseByteArrayElements(jhistograms, jhistogram, JNI_ABORT); } } std::shared_ptr sptr_other_statistics = nullptr; if (pSptr_other_statistics != nullptr) { sptr_other_statistics = *pSptr_other_statistics; } auto* pSptr_statistics = new std::shared_ptr( new ROCKSDB_NAMESPACE::StatisticsJni(sptr_other_statistics, histograms)); return reinterpret_cast(pSptr_statistics); } /* * Class: org_rocksdb_Statistics * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_Statistics_disposeInternal( JNIEnv*, jobject, jlong jhandle) { if (jhandle > 0) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); delete pSptr_statistics; } } /* * Class: org_rocksdb_Statistics * Method: statsLevel * Signature: (J)B */ jbyte Java_org_rocksdb_Statistics_statsLevel( JNIEnv*, jobject, jlong jhandle) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); return ROCKSDB_NAMESPACE::StatsLevelJni::toJavaStatsLevel( pSptr_statistics->get()->get_stats_level()); } /* * Class: org_rocksdb_Statistics * Method: setStatsLevel * Signature: (JB)V */ void Java_org_rocksdb_Statistics_setStatsLevel( JNIEnv*, jobject, jlong jhandle, jbyte jstats_level) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); auto stats_level = ROCKSDB_NAMESPACE::StatsLevelJni::toCppStatsLevel(jstats_level); pSptr_statistics->get()->set_stats_level(stats_level); } /* * Class: org_rocksdb_Statistics * Method: getTickerCount * Signature: (JB)J */ jlong Java_org_rocksdb_Statistics_getTickerCount( JNIEnv*, jobject, jlong jhandle, jbyte jticker_type) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); auto ticker = ROCKSDB_NAMESPACE::TickerTypeJni::toCppTickers(jticker_type); uint64_t count = pSptr_statistics->get()->getTickerCount(ticker); return static_cast(count); } /* * Class: org_rocksdb_Statistics * Method: getAndResetTickerCount * Signature: (JB)J */ jlong Java_org_rocksdb_Statistics_getAndResetTickerCount( JNIEnv*, jobject, jlong jhandle, jbyte jticker_type) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); auto ticker = ROCKSDB_NAMESPACE::TickerTypeJni::toCppTickers(jticker_type); return pSptr_statistics->get()->getAndResetTickerCount(ticker); } /* * Class: org_rocksdb_Statistics * Method: getHistogramData * Signature: (JB)Lorg/rocksdb/HistogramData; */ jobject Java_org_rocksdb_Statistics_getHistogramData( JNIEnv* env, jobject, jlong jhandle, jbyte jhistogram_type) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); // TODO(AR) perhaps better to construct a Java Object Wrapper that // uses ptr to C++ `new HistogramData` ROCKSDB_NAMESPACE::HistogramData data; auto histogram = ROCKSDB_NAMESPACE::HistogramTypeJni::toCppHistograms(jhistogram_type); pSptr_statistics->get()->histogramData( static_cast(histogram), &data); jclass jclazz = ROCKSDB_NAMESPACE::HistogramDataJni::getJClass(env); if (jclazz == nullptr) { // exception occurred accessing class return nullptr; } jmethodID mid = ROCKSDB_NAMESPACE::HistogramDataJni::getConstructorMethodId(env); if (mid == nullptr) { // exception occurred accessing method return nullptr; } return env->NewObject(jclazz, mid, data.median, data.percentile95, data.percentile99, data.average, data.standard_deviation, data.max, data.count, data.sum, data.min); } /* * Class: org_rocksdb_Statistics * Method: getHistogramString * Signature: (JB)Ljava/lang/String; */ jstring Java_org_rocksdb_Statistics_getHistogramString( JNIEnv* env, jobject, jlong jhandle, jbyte jhistogram_type) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); auto histogram = ROCKSDB_NAMESPACE::HistogramTypeJni::toCppHistograms(jhistogram_type); auto str = pSptr_statistics->get()->getHistogramString(histogram); return env->NewStringUTF(str.c_str()); } /* * Class: org_rocksdb_Statistics * Method: reset * Signature: (J)V */ void Java_org_rocksdb_Statistics_reset( JNIEnv* env, jobject, jlong jhandle) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); ROCKSDB_NAMESPACE::Status s = pSptr_statistics->get()->Reset(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Statistics * Method: toString * Signature: (J)Ljava/lang/String; */ jstring Java_org_rocksdb_Statistics_toString( JNIEnv* env, jobject, jlong jhandle) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); assert(pSptr_statistics != nullptr); auto str = pSptr_statistics->get()->ToString(); return env->NewStringUTF(str.c_str()); } rocksdb-6.11.4/java/rocksjni/statisticsjni.cc000066400000000000000000000017551370372246700212220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Statistics #include "rocksjni/statisticsjni.h" namespace ROCKSDB_NAMESPACE { StatisticsJni::StatisticsJni(std::shared_ptr stats) : StatisticsImpl(stats), m_ignore_histograms() {} StatisticsJni::StatisticsJni(std::shared_ptr stats, const std::set ignore_histograms) : StatisticsImpl(stats), m_ignore_histograms(ignore_histograms) {} bool StatisticsJni::HistEnabledForType(uint32_t type) const { if (type >= HISTOGRAM_ENUM_MAX) { return false; } if (m_ignore_histograms.count(type) > 0) { return false; } return true; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/statisticsjni.h000066400000000000000000000020001370372246700210440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Statistics #ifndef JAVA_ROCKSJNI_STATISTICSJNI_H_ #define JAVA_ROCKSJNI_STATISTICSJNI_H_ #include #include #include #include "rocksdb/statistics.h" #include "monitoring/statistics.h" namespace ROCKSDB_NAMESPACE { class StatisticsJni : public StatisticsImpl { public: StatisticsJni(std::shared_ptr stats); StatisticsJni(std::shared_ptr stats, const std::set ignore_histograms); virtual bool HistEnabledForType(uint32_t type) const override; private: const std::set m_ignore_histograms; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_STATISTICSJNI_H_ rocksdb-6.11.4/java/rocksjni/table.cc000066400000000000000000000156321370372246700174150ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Options. #include "rocksdb/table.h" #include #include "include/org_rocksdb_BlockBasedTableConfig.h" #include "include/org_rocksdb_PlainTableConfig.h" #include "portal.h" #include "rocksdb/cache.h" #include "rocksdb/filter_policy.h" /* * Class: org_rocksdb_PlainTableConfig * Method: newTableFactoryHandle * Signature: (IIDIIBZZ)J */ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( JNIEnv * /*env*/, jobject /*jobj*/, jint jkey_size, jint jbloom_bits_per_key, jdouble jhash_table_ratio, jint jindex_sparseness, jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode, jboolean jstore_index_in_file) { ROCKSDB_NAMESPACE::PlainTableOptions options = ROCKSDB_NAMESPACE::PlainTableOptions(); options.user_key_len = jkey_size; options.bloom_bits_per_key = jbloom_bits_per_key; options.hash_table_ratio = jhash_table_ratio; options.index_sparseness = jindex_sparseness; options.huge_page_tlb_size = jhuge_page_tlb_size; options.encoding_type = static_cast(jencoding_type); options.full_scan_mode = jfull_scan_mode; options.store_index_in_file = jstore_index_in_file; return reinterpret_cast( ROCKSDB_NAMESPACE::NewPlainTableFactory(options)); } /* * Class: org_rocksdb_BlockBasedTableConfig * Method: newTableFactoryHandle * Signature: (ZZZZBBDBZJJJJIIIJZZJZZIIZZJIJI)J */ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( JNIEnv*, jobject, jboolean jcache_index_and_filter_blocks, jboolean jcache_index_and_filter_blocks_with_high_priority, jboolean jpin_l0_filter_and_index_blocks_in_cache, jboolean jpin_top_level_index_and_filter, jbyte jindex_type_value, jbyte jdata_block_index_type_value, jdouble jdata_block_hash_table_util_ratio, jbyte jchecksum_type_value, jboolean jno_block_cache, jlong jblock_cache_handle, jlong jpersistent_cache_handle, jlong jblock_cache_compressed_handle, jlong jblock_size, jint jblock_size_deviation, jint jblock_restart_interval, jint jindex_block_restart_interval, jlong jmetadata_block_size, jboolean jpartition_filters, jboolean juse_delta_encoding, jlong jfilter_policy_handle, jboolean jwhole_key_filtering, jboolean jverify_compression, jint jread_amp_bytes_per_bit, jint jformat_version, jboolean jenable_index_compression, jboolean jblock_align, jlong jblock_cache_size, jint jblock_cache_num_shard_bits, jlong jblock_cache_compressed_size, jint jblock_cache_compressed_num_shard_bits) { ROCKSDB_NAMESPACE::BlockBasedTableOptions options; options.cache_index_and_filter_blocks = static_cast(jcache_index_and_filter_blocks); options.cache_index_and_filter_blocks_with_high_priority = static_cast(jcache_index_and_filter_blocks_with_high_priority); options.pin_l0_filter_and_index_blocks_in_cache = static_cast(jpin_l0_filter_and_index_blocks_in_cache); options.pin_top_level_index_and_filter = static_cast(jpin_top_level_index_and_filter); options.index_type = ROCKSDB_NAMESPACE::IndexTypeJni::toCppIndexType(jindex_type_value); options.data_block_index_type = ROCKSDB_NAMESPACE::DataBlockIndexTypeJni::toCppDataBlockIndexType( jdata_block_index_type_value); options.data_block_hash_table_util_ratio = static_cast(jdata_block_hash_table_util_ratio); options.checksum = ROCKSDB_NAMESPACE::ChecksumTypeJni::toCppChecksumType( jchecksum_type_value); options.no_block_cache = static_cast(jno_block_cache); if (options.no_block_cache) { options.block_cache = nullptr; } else { if (jblock_cache_handle > 0) { std::shared_ptr *pCache = reinterpret_cast *>( jblock_cache_handle); options.block_cache = *pCache; } else if (jblock_cache_size >= 0) { if (jblock_cache_num_shard_bits > 0) { options.block_cache = ROCKSDB_NAMESPACE::NewLRUCache( static_cast(jblock_cache_size), static_cast(jblock_cache_num_shard_bits)); } else { options.block_cache = ROCKSDB_NAMESPACE::NewLRUCache( static_cast(jblock_cache_size)); } } else { options.no_block_cache = true; options.block_cache = nullptr; } } if (jpersistent_cache_handle > 0) { std::shared_ptr *pCache = reinterpret_cast *>( jpersistent_cache_handle); options.persistent_cache = *pCache; } if (jblock_cache_compressed_handle > 0) { std::shared_ptr *pCache = reinterpret_cast *>( jblock_cache_compressed_handle); options.block_cache_compressed = *pCache; } else if (jblock_cache_compressed_size > 0) { if (jblock_cache_compressed_num_shard_bits > 0) { options.block_cache_compressed = ROCKSDB_NAMESPACE::NewLRUCache( static_cast(jblock_cache_compressed_size), static_cast(jblock_cache_compressed_num_shard_bits)); } else { options.block_cache_compressed = ROCKSDB_NAMESPACE::NewLRUCache( static_cast(jblock_cache_compressed_size)); } } options.block_size = static_cast(jblock_size); options.block_size_deviation = static_cast(jblock_size_deviation); options.block_restart_interval = static_cast(jblock_restart_interval); options.index_block_restart_interval = static_cast(jindex_block_restart_interval); options.metadata_block_size = static_cast(jmetadata_block_size); options.partition_filters = static_cast(jpartition_filters); options.use_delta_encoding = static_cast(juse_delta_encoding); if (jfilter_policy_handle > 0) { std::shared_ptr *pFilterPolicy = reinterpret_cast *>( jfilter_policy_handle); options.filter_policy = *pFilterPolicy; } options.whole_key_filtering = static_cast(jwhole_key_filtering); options.verify_compression = static_cast(jverify_compression); options.read_amp_bytes_per_bit = static_cast(jread_amp_bytes_per_bit); options.format_version = static_cast(jformat_version); options.enable_index_compression = static_cast(jenable_index_compression); options.block_align = static_cast(jblock_align); return reinterpret_cast( ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(options)); } rocksdb-6.11.4/java/rocksjni/table_filter.cc000066400000000000000000000015641370372246700207610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // org.rocksdb.AbstractTableFilter. #include #include #include "include/org_rocksdb_AbstractTableFilter.h" #include "rocksjni/table_filter_jnicallback.h" /* * Class: org_rocksdb_AbstractTableFilter * Method: createNewTableFilter * Signature: ()J */ jlong Java_org_rocksdb_AbstractTableFilter_createNewTableFilter( JNIEnv* env, jobject jtable_filter) { auto* table_filter_jnicallback = new ROCKSDB_NAMESPACE::TableFilterJniCallback(env, jtable_filter); return reinterpret_cast(table_filter_jnicallback); } rocksdb-6.11.4/java/rocksjni/table_filter_jnicallback.cc000066400000000000000000000045431370372246700232760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TableFilter. #include "rocksjni/table_filter_jnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { TableFilterJniCallback::TableFilterJniCallback( JNIEnv* env, jobject jtable_filter) : JniCallback(env, jtable_filter) { m_jfilter_methodid = AbstractTableFilterJni::getFilterMethod(env); if(m_jfilter_methodid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } // create the function reference /* Note the JNI ENV must be obtained/release on each call to the function itself as it may be called from multiple threads */ m_table_filter_function = [this](const ROCKSDB_NAMESPACE::TableProperties& table_properties) { jboolean attached_thread = JNI_FALSE; JNIEnv* thread_env = getJniEnv(&attached_thread); assert(thread_env != nullptr); // create a Java TableProperties object jobject jtable_properties = TablePropertiesJni::fromCppTableProperties( thread_env, table_properties); if (jtable_properties == nullptr) { // exception thrown from fromCppTableProperties thread_env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return false; } jboolean result = thread_env->CallBooleanMethod( m_jcallback_obj, m_jfilter_methodid, jtable_properties); if (thread_env->ExceptionCheck()) { // exception thrown from CallBooleanMethod thread_env->DeleteLocalRef(jtable_properties); thread_env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return false; } // ok... cleanup and then return releaseJniEnv(attached_thread); return static_cast(result); }; } std::function TableFilterJniCallback::GetTableFilterFunction() { return m_table_filter_function; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/table_filter_jnicallback.h000066400000000000000000000021211370372246700231260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TableFilter. #ifndef JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_ #define JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_ #include #include #include #include "rocksdb/table_properties.h" #include "rocksjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { class TableFilterJniCallback : public JniCallback { public: TableFilterJniCallback( JNIEnv* env, jobject jtable_filter); std::function GetTableFilterFunction(); private: jmethodID m_jfilter_methodid; std::function m_table_filter_function; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_TABLE_FILTER_JNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/thread_status.cc000066400000000000000000000105251370372246700211740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::ThreadStatus methods from Java side. #include #include "portal.h" #include "include/org_rocksdb_ThreadStatus.h" #include "rocksdb/thread_status.h" /* * Class: org_rocksdb_ThreadStatus * Method: getThreadTypeName * Signature: (B)Ljava/lang/String; */ jstring Java_org_rocksdb_ThreadStatus_getThreadTypeName( JNIEnv* env, jclass, jbyte jthread_type_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetThreadTypeName( ROCKSDB_NAMESPACE::ThreadTypeJni::toCppThreadType(jthread_type_value)); return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); } /* * Class: org_rocksdb_ThreadStatus * Method: getOperationName * Signature: (B)Ljava/lang/String; */ jstring Java_org_rocksdb_ThreadStatus_getOperationName( JNIEnv* env, jclass, jbyte joperation_type_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationName( ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( joperation_type_value)); return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); } /* * Class: org_rocksdb_ThreadStatus * Method: microsToStringNative * Signature: (J)Ljava/lang/String; */ jstring Java_org_rocksdb_ThreadStatus_microsToStringNative( JNIEnv* env, jclass, jlong jmicros) { auto str = ROCKSDB_NAMESPACE::ThreadStatus::MicrosToString( static_cast(jmicros)); return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &str, true); } /* * Class: org_rocksdb_ThreadStatus * Method: getOperationStageName * Signature: (B)Ljava/lang/String; */ jstring Java_org_rocksdb_ThreadStatus_getOperationStageName( JNIEnv* env, jclass, jbyte joperation_stage_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationStageName( ROCKSDB_NAMESPACE::OperationStageJni::toCppOperationStage( joperation_stage_value)); return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); } /* * Class: org_rocksdb_ThreadStatus * Method: getOperationPropertyName * Signature: (BI)Ljava/lang/String; */ jstring Java_org_rocksdb_ThreadStatus_getOperationPropertyName( JNIEnv* env, jclass, jbyte joperation_type_value, jint jindex) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationPropertyName( ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( joperation_type_value), static_cast(jindex)); return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); } /* * Class: org_rocksdb_ThreadStatus * Method: interpretOperationProperties * Signature: (B[J)Ljava/util/Map; */ jobject Java_org_rocksdb_ThreadStatus_interpretOperationProperties( JNIEnv* env, jclass, jbyte joperation_type_value, jlongArray joperation_properties) { //convert joperation_properties const jsize len = env->GetArrayLength(joperation_properties); const std::unique_ptr op_properties(new uint64_t[len]); jlong* jop = env->GetLongArrayElements(joperation_properties, nullptr); if (jop == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } for (jsize i = 0; i < len; i++) { op_properties[i] = static_cast(jop[i]); } env->ReleaseLongArrayElements(joperation_properties, jop, JNI_ABORT); // call the function auto result = ROCKSDB_NAMESPACE::ThreadStatus::InterpretOperationProperties( ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( joperation_type_value), op_properties.get()); jobject jresult = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &result); if (env->ExceptionCheck()) { // exception occurred return nullptr; } return jresult; } /* * Class: org_rocksdb_ThreadStatus * Method: getStateName * Signature: (B)Ljava/lang/String; */ jstring Java_org_rocksdb_ThreadStatus_getStateName( JNIEnv* env, jclass, jbyte jstate_type_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetStateName( ROCKSDB_NAMESPACE::StateTypeJni::toCppStateType(jstate_type_value)); return ROCKSDB_NAMESPACE::JniUtil::toJavaString(env, &name, true); } rocksdb-6.11.4/java/rocksjni/trace_writer.cc000066400000000000000000000014751370372246700210200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionFilterFactory. #include #include "include/org_rocksdb_AbstractTraceWriter.h" #include "rocksjni/trace_writer_jnicallback.h" /* * Class: org_rocksdb_AbstractTraceWriter * Method: createNewTraceWriter * Signature: ()J */ jlong Java_org_rocksdb_AbstractTraceWriter_createNewTraceWriter( JNIEnv* env, jobject jobj) { auto* trace_writer = new ROCKSDB_NAMESPACE::TraceWriterJniCallback(env, jobj); return reinterpret_cast(trace_writer); } rocksdb-6.11.4/java/rocksjni/trace_writer_jnicallback.cc000066400000000000000000000070271370372246700233340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TraceWriter. #include "rocksjni/trace_writer_jnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { TraceWriterJniCallback::TraceWriterJniCallback( JNIEnv* env, jobject jtrace_writer) : JniCallback(env, jtrace_writer) { m_jwrite_proxy_methodid = AbstractTraceWriterJni::getWriteProxyMethodId(env); if(m_jwrite_proxy_methodid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } m_jclose_writer_proxy_methodid = AbstractTraceWriterJni::getCloseWriterProxyMethodId(env); if(m_jclose_writer_proxy_methodid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } m_jget_file_size_methodid = AbstractTraceWriterJni::getGetFileSizeMethodId(env); if(m_jget_file_size_methodid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } } Status TraceWriterJniCallback::Write(const Slice& data) { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); if (env == nullptr) { return Status::IOError("Unable to attach JNI Environment"); } jshort jstatus = env->CallShortMethod(m_jcallback_obj, m_jwrite_proxy_methodid, &data); if(env->ExceptionCheck()) { // exception thrown from CallShortMethod env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return Status::IOError("Unable to call AbstractTraceWriter#writeProxy(long)"); } // unpack status code and status sub-code from jstatus jbyte jcode_value = (jstatus >> 8) & 0xFF; jbyte jsub_code_value = jstatus & 0xFF; std::unique_ptr s = StatusJni::toCppStatus(jcode_value, jsub_code_value); releaseJniEnv(attached_thread); return Status(*s); } Status TraceWriterJniCallback::Close() { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); if (env == nullptr) { return Status::IOError("Unable to attach JNI Environment"); } jshort jstatus = env->CallShortMethod(m_jcallback_obj, m_jclose_writer_proxy_methodid); if(env->ExceptionCheck()) { // exception thrown from CallShortMethod env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return Status::IOError("Unable to call AbstractTraceWriter#closeWriterProxy()"); } // unpack status code and status sub-code from jstatus jbyte code_value = (jstatus >> 8) & 0xFF; jbyte sub_code_value = jstatus & 0xFF; std::unique_ptr s = StatusJni::toCppStatus(code_value, sub_code_value); releaseJniEnv(attached_thread); return Status(*s); } uint64_t TraceWriterJniCallback::GetFileSize() { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); if (env == nullptr) { return 0; } jlong jfile_size = env->CallLongMethod(m_jcallback_obj, m_jget_file_size_methodid); if(env->ExceptionCheck()) { // exception thrown from CallLongMethod env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return 0; } releaseJniEnv(attached_thread); return static_cast(jfile_size); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/trace_writer_jnicallback.h000066400000000000000000000021241370372246700231670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TraceWriter. #ifndef JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_ #define JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_ #include #include #include "rocksdb/trace_reader_writer.h" #include "rocksjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { class TraceWriterJniCallback : public JniCallback, public TraceWriter { public: TraceWriterJniCallback( JNIEnv* env, jobject jtrace_writer); virtual Status Write(const Slice& data); virtual Status Close(); virtual uint64_t GetFileSize(); private: jmethodID m_jwrite_proxy_methodid; jmethodID m_jclose_writer_proxy_methodid; jmethodID m_jget_file_size_methodid; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_TRACE_WRITER_JNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/transaction.cc000066400000000000000000001733661370372246700206640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::Transaction. #include #include #include "include/org_rocksdb_Transaction.h" #include "rocksdb/utilities/transaction.h" #include "rocksjni/portal.h" using namespace std::placeholders; #if defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4503) // identifier' : decorated name length // exceeded, name was truncated #endif /* * Class: org_rocksdb_Transaction * Method: setSnapshot * Signature: (J)V */ void Java_org_rocksdb_Transaction_setSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->SetSnapshot(); } /* * Class: org_rocksdb_Transaction * Method: setSnapshotOnNextOperation * Signature: (J)V */ void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__J( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->SetSnapshotOnNextOperation(nullptr); } /* * Class: org_rocksdb_Transaction * Method: setSnapshotOnNextOperation * Signature: (JJ)V */ void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__JJ( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jtxn_notifier_handle) { auto* txn = reinterpret_cast(jhandle); auto* txn_notifier = reinterpret_cast< std::shared_ptr*>( jtxn_notifier_handle); txn->SetSnapshotOnNextOperation(*txn_notifier); } /* * Class: org_rocksdb_Transaction * Method: getSnapshot * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); const ROCKSDB_NAMESPACE::Snapshot* snapshot = txn->GetSnapshot(); return reinterpret_cast(snapshot); } /* * Class: org_rocksdb_Transaction * Method: clearSnapshot * Signature: (J)V */ void Java_org_rocksdb_Transaction_clearSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->ClearSnapshot(); } /* * Class: org_rocksdb_Transaction * Method: prepare * Signature: (J)V */ void Java_org_rocksdb_Transaction_prepare(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Status s = txn->Prepare(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Transaction * Method: commit * Signature: (J)V */ void Java_org_rocksdb_Transaction_commit(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Status s = txn->Commit(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Transaction * Method: rollback * Signature: (J)V */ void Java_org_rocksdb_Transaction_rollback(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Status s = txn->Rollback(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Transaction * Method: setSavePoint * Signature: (J)V */ void Java_org_rocksdb_Transaction_setSavePoint(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->SetSavePoint(); } /* * Class: org_rocksdb_Transaction * Method: rollbackToSavePoint * Signature: (J)V */ void Java_org_rocksdb_Transaction_rollbackToSavePoint(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Status s = txn->RollbackToSavePoint(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } typedef std::function FnGet; // TODO(AR) consider refactoring to share this between here and rocksjni.cc jbyteArray txn_get_helper(JNIEnv* env, const FnGet& fn_get, const jlong& jread_options_handle, const jbyteArray& jkey, const jint& jkey_part_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_part_len); auto* read_options = reinterpret_cast(jread_options_handle); std::string value; ROCKSDB_NAMESPACE::Status s = fn_get(*read_options, key_slice, &value); // trigger java unref on key. // by passing JNI_ABORT, it will simply release the reference without // copying the result back to the java byte array. env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); if (s.IsNotFound()) { return nullptr; } if (s.ok()) { jbyteArray jret_value = env->NewByteArray(static_cast(value.size())); if (jret_value == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion(jret_value, 0, static_cast(value.size()), const_cast(reinterpret_cast(value.c_str()))); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException return nullptr; } return jret_value; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } /* * Class: org_rocksdb_Transaction * Method: get * Signature: (JJ[BIJ)[B */ jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnGet fn_get = std::bind( &ROCKSDB_NAMESPACE::Transaction::Get, txn, _1, column_family_handle, _2, _3); return txn_get_helper(env, fn_get, jread_options_handle, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: get * Signature: (JJ[BI)[B */ jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); FnGet fn_get = std::bind( &ROCKSDB_NAMESPACE::Transaction::Get, txn, _1, _2, _3); return txn_get_helper(env, fn_get, jread_options_handle, jkey, jkey_part_len); } // TODO(AR) consider refactoring to share this between here and rocksjni.cc // used by txn_multi_get_helper below std::vector txn_column_families_helper( JNIEnv* env, jlongArray jcolumn_family_handles, bool* has_exception) { std::vector cf_handles; if (jcolumn_family_handles != nullptr) { const jsize len_cols = env->GetArrayLength(jcolumn_family_handles); if (len_cols > 0) { if (env->EnsureLocalCapacity(len_cols) != 0) { // out of memory *has_exception = JNI_TRUE; return std::vector(); } jlong* jcfh = env->GetLongArrayElements(jcolumn_family_handles, nullptr); if (jcfh == nullptr) { // exception thrown: OutOfMemoryError *has_exception = JNI_TRUE; return std::vector(); } for (int i = 0; i < len_cols; i++) { auto* cf_handle = reinterpret_cast(jcfh[i]); cf_handles.push_back(cf_handle); } env->ReleaseLongArrayElements(jcolumn_family_handles, jcfh, JNI_ABORT); } } return cf_handles; } typedef std::function( const ROCKSDB_NAMESPACE::ReadOptions&, const std::vector&, std::vector*)> FnMultiGet; void free_parts( JNIEnv* env, std::vector>& parts_to_free) { for (auto& value : parts_to_free) { jobject jk; jbyteArray jk_ba; jbyte* jk_val; std::tie(jk_ba, jk_val, jk) = value; env->ReleaseByteArrayElements(jk_ba, jk_val, JNI_ABORT); env->DeleteLocalRef(jk); } } // TODO(AR) consider refactoring to share this between here and rocksjni.cc // cf multi get jobjectArray txn_multi_get_helper(JNIEnv* env, const FnMultiGet& fn_multi_get, const jlong& jread_options_handle, const jobjectArray& jkey_parts) { const jsize len_key_parts = env->GetArrayLength(jkey_parts); if (env->EnsureLocalCapacity(len_key_parts) != 0) { // out of memory return nullptr; } std::vector key_parts; std::vector> key_parts_to_free; for (int i = 0; i < len_key_parts; i++) { const jobject jk = env->GetObjectArrayElement(jkey_parts, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException free_parts(env, key_parts_to_free); return nullptr; } jbyteArray jk_ba = reinterpret_cast(jk); const jsize len_key = env->GetArrayLength(jk_ba); if (env->EnsureLocalCapacity(len_key) != 0) { // out of memory env->DeleteLocalRef(jk); free_parts(env, key_parts_to_free); return nullptr; } jbyte* jk_val = env->GetByteArrayElements(jk_ba, nullptr); if (jk_val == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jk); free_parts(env, key_parts_to_free); return nullptr; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(jk_val), len_key); key_parts.push_back(key_slice); key_parts_to_free.push_back(std::make_tuple(jk_ba, jk_val, jk)); } auto* read_options = reinterpret_cast(jread_options_handle); std::vector value_parts; std::vector s = fn_multi_get(*read_options, key_parts, &value_parts); // free up allocated byte arrays free_parts(env, key_parts_to_free); // prepare the results const jclass jcls_ba = env->FindClass("[B"); jobjectArray jresults = env->NewObjectArray(static_cast(s.size()), jcls_ba, nullptr); if (jresults == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } // add to the jresults for (std::vector::size_type i = 0; i != s.size(); i++) { if (s[i].ok()) { jbyteArray jentry_value = env->NewByteArray(static_cast(value_parts[i].size())); if (jentry_value == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion( jentry_value, 0, static_cast(value_parts[i].size()), const_cast(reinterpret_cast(value_parts[i].c_str()))); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jentry_value); return nullptr; } env->SetObjectArrayElement(jresults, static_cast(i), jentry_value); env->DeleteLocalRef(jentry_value); } } return jresults; } /* * Class: org_rocksdb_Transaction * Method: multiGet * Signature: (JJ[[B[J)[[B */ jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B_3J( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts, jlongArray jcolumn_family_handles) { bool has_exception = false; const std::vector column_family_handles = txn_column_families_helper( env, jcolumn_family_handles, &has_exception); if (has_exception) { // exception thrown: OutOfMemoryError return nullptr; } auto* txn = reinterpret_cast(jhandle); FnMultiGet fn_multi_get = std::bind ( ROCKSDB_NAMESPACE::Transaction::*)( const ROCKSDB_NAMESPACE::ReadOptions&, const std::vector&, const std::vector&, std::vector*)>( &ROCKSDB_NAMESPACE::Transaction::MultiGet, txn, _1, column_family_handles, _2, _3); return txn_multi_get_helper(env, fn_multi_get, jread_options_handle, jkey_parts); } /* * Class: org_rocksdb_Transaction * Method: multiGet * Signature: (JJ[[B)[[B */ jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts) { auto* txn = reinterpret_cast(jhandle); FnMultiGet fn_multi_get = std::bind ( ROCKSDB_NAMESPACE::Transaction::*)( const ROCKSDB_NAMESPACE::ReadOptions&, const std::vector&, std::vector*)>( &ROCKSDB_NAMESPACE::Transaction::MultiGet, txn, _1, _2, _3); return txn_multi_get_helper(env, fn_multi_get, jread_options_handle, jkey_parts); } /* * Class: org_rocksdb_Transaction * Method: getForUpdate * Signature: (JJ[BIJZZ)[B */ jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIJZZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle, jboolean jexclusive, jboolean jdo_validate) { auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); auto* txn = reinterpret_cast(jhandle); FnGet fn_get_for_update = std::bind( &ROCKSDB_NAMESPACE::Transaction::GetForUpdate, txn, _1, column_family_handle, _2, _3, jexclusive, jdo_validate); return txn_get_helper(env, fn_get_for_update, jread_options_handle, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: getForUpdate * Signature: (JJ[BIZZ)[B */ jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIZZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jbyteArray jkey, jint jkey_part_len, jboolean jexclusive, jboolean jdo_validate) { auto* txn = reinterpret_cast(jhandle); FnGet fn_get_for_update = std::bind( &ROCKSDB_NAMESPACE::Transaction::GetForUpdate, txn, _1, _2, _3, jexclusive, jdo_validate); return txn_get_helper(env, fn_get_for_update, jread_options_handle, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: multiGetForUpdate * Signature: (JJ[[B[J)[[B */ jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B_3J( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts, jlongArray jcolumn_family_handles) { bool has_exception = false; const std::vector column_family_handles = txn_column_families_helper( env, jcolumn_family_handles, &has_exception); if (has_exception) { // exception thrown: OutOfMemoryError return nullptr; } auto* txn = reinterpret_cast(jhandle); FnMultiGet fn_multi_get_for_update = std::bind (ROCKSDB_NAMESPACE::Transaction::*)( const ROCKSDB_NAMESPACE::ReadOptions&, const std::vector&, const std::vector&, std::vector*)>( &ROCKSDB_NAMESPACE::Transaction::MultiGetForUpdate, txn, _1, column_family_handles, _2, _3); return txn_multi_get_helper(env, fn_multi_get_for_update, jread_options_handle, jkey_parts); } /* * Class: org_rocksdb_Transaction * Method: multiGetForUpdate * Signature: (JJ[[B)[[B */ jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts) { auto* txn = reinterpret_cast(jhandle); FnMultiGet fn_multi_get_for_update = std::bind (ROCKSDB_NAMESPACE::Transaction::*)( const ROCKSDB_NAMESPACE::ReadOptions&, const std::vector&, std::vector*)>( &ROCKSDB_NAMESPACE::Transaction::MultiGetForUpdate, txn, _1, _2, _3); return txn_multi_get_helper(env, fn_multi_get_for_update, jread_options_handle, jkey_parts); } /* * Class: org_rocksdb_Transaction * Method: getIterator * Signature: (JJ)J */ jlong Java_org_rocksdb_Transaction_getIterator__JJ(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle) { auto* txn = reinterpret_cast(jhandle); auto* read_options = reinterpret_cast(jread_options_handle); return reinterpret_cast(txn->GetIterator(*read_options)); } /* * Class: org_rocksdb_Transaction * Method: getIterator * Signature: (JJJ)J */ jlong Java_org_rocksdb_Transaction_getIterator__JJJ( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* read_options = reinterpret_cast(jread_options_handle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); return reinterpret_cast( txn->GetIterator(*read_options, column_family_handle)); } typedef std::function FnWriteKV; // TODO(AR) consider refactoring to share this between here and rocksjni.cc void txn_write_kv_helper(JNIEnv* env, const FnWriteKV& fn_write_kv, const jbyteArray& jkey, const jint& jkey_part_len, const jbyteArray& jval, const jint& jval_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } jbyte* value = env->GetByteArrayElements(jval, nullptr); if (value == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_part_len); ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), jval_len); ROCKSDB_NAMESPACE::Status s = fn_write_kv(key_slice, value_slice); // trigger java unref on key. // by passing JNI_ABORT, it will simply release the reference without // copying the result back to the java byte array. env->ReleaseByteArrayElements(jval, value, JNI_ABORT); env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_Transaction * Method: put * Signature: (J[BI[BIJZ)V */ void Java_org_rocksdb_Transaction_put__J_3BI_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKV fn_put = std::bind(&ROCKSDB_NAMESPACE::Transaction::Put, txn, column_family_handle, _1, _2, jassume_tracked); txn_write_kv_helper(env, fn_put, jkey, jkey_part_len, jval, jval_len); } /* * Class: org_rocksdb_Transaction * Method: put * Signature: (J[BI[BI)V */ void Java_org_rocksdb_Transaction_put__J_3BI_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKV fn_put = std::bind( &ROCKSDB_NAMESPACE::Transaction::Put, txn, _1, _2); txn_write_kv_helper(env, fn_put, jkey, jkey_part_len, jval, jval_len); } typedef std::function FnWriteKVParts; // TODO(AR) consider refactoring to share this between here and rocksjni.cc void txn_write_kv_parts_helper(JNIEnv* env, const FnWriteKVParts& fn_write_kv_parts, const jobjectArray& jkey_parts, const jint& jkey_parts_len, const jobjectArray& jvalue_parts, const jint& jvalue_parts_len) { #ifndef DEBUG (void) jvalue_parts_len; #else assert(jkey_parts_len == jvalue_parts_len); #endif auto key_parts = std::vector(); auto value_parts = std::vector(); auto jparts_to_free = std::vector>(); // convert java key_parts/value_parts byte[][] to Slice(s) for (jsize i = 0; i < jkey_parts_len; ++i) { const jobject jobj_key_part = env->GetObjectArrayElement(jkey_parts, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException free_parts(env, jparts_to_free); return; } const jobject jobj_value_part = env->GetObjectArrayElement(jvalue_parts, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jobj_key_part); free_parts(env, jparts_to_free); return; } const jbyteArray jba_key_part = reinterpret_cast(jobj_key_part); const jsize jkey_part_len = env->GetArrayLength(jba_key_part); if (env->EnsureLocalCapacity(jkey_part_len) != 0) { // out of memory env->DeleteLocalRef(jobj_value_part); env->DeleteLocalRef(jobj_key_part); free_parts(env, jparts_to_free); return; } jbyte* jkey_part = env->GetByteArrayElements(jba_key_part, nullptr); if (jkey_part == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jobj_value_part); env->DeleteLocalRef(jobj_key_part); free_parts(env, jparts_to_free); return; } const jbyteArray jba_value_part = reinterpret_cast(jobj_value_part); const jsize jvalue_part_len = env->GetArrayLength(jba_value_part); if (env->EnsureLocalCapacity(jvalue_part_len) != 0) { // out of memory env->DeleteLocalRef(jobj_value_part); env->DeleteLocalRef(jobj_key_part); free_parts(env, jparts_to_free); return; } jbyte* jvalue_part = env->GetByteArrayElements(jba_value_part, nullptr); if (jvalue_part == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseByteArrayElements(jba_value_part, jvalue_part, JNI_ABORT); env->DeleteLocalRef(jobj_value_part); env->DeleteLocalRef(jobj_key_part); free_parts(env, jparts_to_free); return; } jparts_to_free.push_back( std::make_tuple(jba_key_part, jkey_part, jobj_key_part)); jparts_to_free.push_back( std::make_tuple(jba_value_part, jvalue_part, jobj_value_part)); key_parts.push_back(ROCKSDB_NAMESPACE::Slice( reinterpret_cast(jkey_part), jkey_part_len)); value_parts.push_back(ROCKSDB_NAMESPACE::Slice( reinterpret_cast(jvalue_part), jvalue_part_len)); } // call the write_multi function ROCKSDB_NAMESPACE::Status s = fn_write_kv_parts( ROCKSDB_NAMESPACE::SliceParts(key_parts.data(), (int)key_parts.size()), ROCKSDB_NAMESPACE::SliceParts(value_parts.data(), (int)value_parts.size())); // cleanup temporary memory free_parts(env, jparts_to_free); // return if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_Transaction * Method: put * Signature: (J[[BI[[BIJZ)V */ void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKVParts fn_put_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::Put, txn, column_family_handle, _1, _2, jassume_tracked); txn_write_kv_parts_helper(env, fn_put_parts, jkey_parts, jkey_parts_len, jvalue_parts, jvalue_parts_len); } /* * Class: org_rocksdb_Transaction * Method: put * Signature: (J[[BI[[BI)V */ void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKVParts fn_put_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::Put, txn, _1, _2); txn_write_kv_parts_helper(env, fn_put_parts, jkey_parts, jkey_parts_len, jvalue_parts, jvalue_parts_len); } /* * Class: org_rocksdb_Transaction * Method: merge * Signature: (J[BI[BIJZ)V */ void Java_org_rocksdb_Transaction_merge__J_3BI_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKV fn_merge = std::bind(&ROCKSDB_NAMESPACE::Transaction::Merge, txn, column_family_handle, _1, _2, jassume_tracked); txn_write_kv_helper(env, fn_merge, jkey, jkey_part_len, jval, jval_len); } /* * Class: org_rocksdb_Transaction * Method: merge * Signature: (J[BI[BI)V */ void Java_org_rocksdb_Transaction_merge__J_3BI_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKV fn_merge = std::bind( &ROCKSDB_NAMESPACE::Transaction::Merge, txn, _1, _2); txn_write_kv_helper(env, fn_merge, jkey, jkey_part_len, jval, jval_len); } typedef std::function FnWriteK; // TODO(AR) consider refactoring to share this between here and rocksjni.cc void txn_write_k_helper(JNIEnv* env, const FnWriteK& fn_write_k, const jbyteArray& jkey, const jint& jkey_part_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_part_len); ROCKSDB_NAMESPACE::Status s = fn_write_k(key_slice); // trigger java unref on key. // by passing JNI_ABORT, it will simply release the reference without // copying the result back to the java byte array. env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_Transaction * Method: delete * Signature: (J[BIJZ)V */ void Java_org_rocksdb_Transaction_delete__J_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteK fn_delete = std::bind( &ROCKSDB_NAMESPACE::Transaction::Delete, txn, column_family_handle, _1, jassume_tracked); txn_write_k_helper(env, fn_delete, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: delete * Signature: (J[BI)V */ void Java_org_rocksdb_Transaction_delete__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); FnWriteK fn_delete = std::bind( &ROCKSDB_NAMESPACE::Transaction::Delete, txn, _1); txn_write_k_helper(env, fn_delete, jkey, jkey_part_len); } typedef std::function FnWriteKParts; // TODO(AR) consider refactoring to share this between here and rocksjni.cc void txn_write_k_parts_helper(JNIEnv* env, const FnWriteKParts& fn_write_k_parts, const jobjectArray& jkey_parts, const jint& jkey_parts_len) { std::vector key_parts; std::vector> jkey_parts_to_free; // convert java key_parts byte[][] to Slice(s) for (jint i = 0; i < jkey_parts_len; ++i) { const jobject jobj_key_part = env->GetObjectArrayElement(jkey_parts, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException free_parts(env, jkey_parts_to_free); return; } const jbyteArray jba_key_part = reinterpret_cast(jobj_key_part); const jsize jkey_part_len = env->GetArrayLength(jba_key_part); if (env->EnsureLocalCapacity(jkey_part_len) != 0) { // out of memory env->DeleteLocalRef(jobj_key_part); free_parts(env, jkey_parts_to_free); return; } jbyte* jkey_part = env->GetByteArrayElements(jba_key_part, nullptr); if (jkey_part == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jobj_key_part); free_parts(env, jkey_parts_to_free); return; } jkey_parts_to_free.push_back(std::tuple( jba_key_part, jkey_part, jobj_key_part)); key_parts.push_back(ROCKSDB_NAMESPACE::Slice( reinterpret_cast(jkey_part), jkey_part_len)); } // call the write_multi function ROCKSDB_NAMESPACE::Status s = fn_write_k_parts( ROCKSDB_NAMESPACE::SliceParts(key_parts.data(), (int)key_parts.size())); // cleanup temporary memory free_parts(env, jkey_parts_to_free); // return if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_Transaction * Method: delete * Signature: (J[[BIJZ)V */ void Java_org_rocksdb_Transaction_delete__J_3_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKParts fn_delete_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::Delete, txn, column_family_handle, _1, jassume_tracked); txn_write_k_parts_helper(env, fn_delete_parts, jkey_parts, jkey_parts_len); } /* * Class: org_rocksdb_Transaction * Method: delete * Signature: (J[[BI)V */ void Java_org_rocksdb_Transaction_delete__J_3_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKParts fn_delete_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::Delete, txn, _1); txn_write_k_parts_helper(env, fn_delete_parts, jkey_parts, jkey_parts_len); } /* * Class: org_rocksdb_Transaction * Method: singleDelete * Signature: (J[BIJZ)V */ void Java_org_rocksdb_Transaction_singleDelete__J_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteK fn_single_delete = std::bind( &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, column_family_handle, _1, jassume_tracked); txn_write_k_helper(env, fn_single_delete, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: singleDelete * Signature: (J[BI)V */ void Java_org_rocksdb_Transaction_singleDelete__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); FnWriteK fn_single_delete = std::bind( &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, _1); txn_write_k_helper(env, fn_single_delete, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: singleDelete * Signature: (J[[BIJZ)V */ void Java_org_rocksdb_Transaction_singleDelete__J_3_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKParts fn_single_delete_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, column_family_handle, _1, jassume_tracked); txn_write_k_parts_helper(env, fn_single_delete_parts, jkey_parts, jkey_parts_len); } /* * Class: org_rocksdb_Transaction * Method: singleDelete * Signature: (J[[BI)V */ void Java_org_rocksdb_Transaction_singleDelete__J_3_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKParts fn_single_delete_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::SingleDelete, txn, _1); txn_write_k_parts_helper(env, fn_single_delete_parts, jkey_parts, jkey_parts_len); } /* * Class: org_rocksdb_Transaction * Method: putUntracked * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKV fn_put_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, column_family_handle, _1, _2); txn_write_kv_helper(env, fn_put_untracked, jkey, jkey_part_len, jval, jval_len); } /* * Class: org_rocksdb_Transaction * Method: putUntracked * Signature: (J[BI[BI)V */ void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKV fn_put_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, _1, _2); txn_write_kv_helper(env, fn_put_untracked, jkey, jkey_part_len, jval, jval_len); } /* * Class: org_rocksdb_Transaction * Method: putUntracked * Signature: (J[[BI[[BIJ)V */ void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKVParts fn_put_parts_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, column_family_handle, _1, _2); txn_write_kv_parts_helper(env, fn_put_parts_untracked, jkey_parts, jkey_parts_len, jvalue_parts, jvalue_parts_len); } /* * Class: org_rocksdb_Transaction * Method: putUntracked * Signature: (J[[BI[[BI)V */ void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKVParts fn_put_parts_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, _1, _2); txn_write_kv_parts_helper(env, fn_put_parts_untracked, jkey_parts, jkey_parts_len, jvalue_parts, jvalue_parts_len); } /* * Class: org_rocksdb_Transaction * Method: mergeUntracked * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_Transaction_mergeUntracked__J_3BI_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKV fn_merge_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::MergeUntracked, txn, column_family_handle, _1, _2); txn_write_kv_helper(env, fn_merge_untracked, jkey, jkey_part_len, jval, jval_len); } /* * Class: org_rocksdb_Transaction * Method: mergeUntracked * Signature: (J[BI[BI)V */ void Java_org_rocksdb_Transaction_mergeUntracked__J_3BI_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKV fn_merge_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::MergeUntracked, txn, _1, _2); txn_write_kv_helper(env, fn_merge_untracked, jkey, jkey_part_len, jval, jval_len); } /* * Class: org_rocksdb_Transaction * Method: deleteUntracked * Signature: (J[BIJ)V */ void Java_org_rocksdb_Transaction_deleteUntracked__J_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteK fn_delete_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, column_family_handle, _1); txn_write_k_helper(env, fn_delete_untracked, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: deleteUntracked * Signature: (J[BI)V */ void Java_org_rocksdb_Transaction_deleteUntracked__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); FnWriteK fn_delete_untracked = std::bind( &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, _1); txn_write_k_helper(env, fn_delete_untracked, jkey, jkey_part_len); } /* * Class: org_rocksdb_Transaction * Method: deleteUntracked * Signature: (J[[BIJ)V */ void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); FnWriteKParts fn_delete_untracked_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, column_family_handle, _1); txn_write_k_parts_helper(env, fn_delete_untracked_parts, jkey_parts, jkey_parts_len); } /* * Class: org_rocksdb_Transaction * Method: deleteUntracked * Signature: (J[[BI)V */ void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len) { auto* txn = reinterpret_cast(jhandle); FnWriteKParts fn_delete_untracked_parts = std::bind( &ROCKSDB_NAMESPACE::Transaction::DeleteUntracked, txn, _1); txn_write_k_parts_helper(env, fn_delete_untracked_parts, jkey_parts, jkey_parts_len); } /* * Class: org_rocksdb_Transaction * Method: putLogData * Signature: (J[BI)V */ void Java_org_rocksdb_Transaction_putLogData(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); jbyte* key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_part_len); txn->PutLogData(key_slice); // trigger java unref on key. // by passing JNI_ABORT, it will simply release the reference without // copying the result back to the java byte array. env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); } /* * Class: org_rocksdb_Transaction * Method: disableIndexing * Signature: (J)V */ void Java_org_rocksdb_Transaction_disableIndexing(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->DisableIndexing(); } /* * Class: org_rocksdb_Transaction * Method: enableIndexing * Signature: (J)V */ void Java_org_rocksdb_Transaction_enableIndexing(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->EnableIndexing(); } /* * Class: org_rocksdb_Transaction * Method: getNumKeys * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getNumKeys(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetNumKeys(); } /* * Class: org_rocksdb_Transaction * Method: getNumPuts * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getNumPuts(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetNumPuts(); } /* * Class: org_rocksdb_Transaction * Method: getNumDeletes * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getNumDeletes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetNumDeletes(); } /* * Class: org_rocksdb_Transaction * Method: getNumMerges * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getNumMerges(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetNumMerges(); } /* * Class: org_rocksdb_Transaction * Method: getElapsedTime * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getElapsedTime(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetElapsedTime(); } /* * Class: org_rocksdb_Transaction * Method: getWriteBatch * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getWriteBatch(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return reinterpret_cast(txn->GetWriteBatch()); } /* * Class: org_rocksdb_Transaction * Method: setLockTimeout * Signature: (JJ)V */ void Java_org_rocksdb_Transaction_setLockTimeout(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jlock_timeout) { auto* txn = reinterpret_cast(jhandle); txn->SetLockTimeout(jlock_timeout); } /* * Class: org_rocksdb_Transaction * Method: getWriteOptions * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getWriteOptions(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return reinterpret_cast(txn->GetWriteOptions()); } /* * Class: org_rocksdb_Transaction * Method: setWriteOptions * Signature: (JJ)V */ void Java_org_rocksdb_Transaction_setWriteOptions(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jwrite_options_handle) { auto* txn = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); txn->SetWriteOptions(*write_options); } /* * Class: org_rocksdb_Transaction * Method: undo * Signature: (J[BIJ)V */ void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); jbyte* key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_part_len); txn->UndoGetForUpdate(column_family_handle, key_slice); env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); } /* * Class: org_rocksdb_Transaction * Method: undoGetForUpdate * Signature: (J[BI)V */ void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); jbyte* key = env->GetByteArrayElements(jkey, nullptr); if (key == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_part_len); txn->UndoGetForUpdate(key_slice); env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); } /* * Class: org_rocksdb_Transaction * Method: rebuildFromWriteBatch * Signature: (JJ)V */ void Java_org_rocksdb_Transaction_rebuildFromWriteBatch( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jwrite_batch_handle) { auto* txn = reinterpret_cast(jhandle); auto* write_batch = reinterpret_cast(jwrite_batch_handle); ROCKSDB_NAMESPACE::Status s = txn->RebuildFromWriteBatch(write_batch); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Transaction * Method: getCommitTimeWriteBatch * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getCommitTimeWriteBatch(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return reinterpret_cast(txn->GetCommitTimeWriteBatch()); } /* * Class: org_rocksdb_Transaction * Method: setLogNumber * Signature: (JJ)V */ void Java_org_rocksdb_Transaction_setLogNumber(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jlog_number) { auto* txn = reinterpret_cast(jhandle); txn->SetLogNumber(jlog_number); } /* * Class: org_rocksdb_Transaction * Method: getLogNumber * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getLogNumber(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetLogNumber(); } /* * Class: org_rocksdb_Transaction * Method: setName * Signature: (JLjava/lang/String;)V */ void Java_org_rocksdb_Transaction_setName(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jstring jname) { auto* txn = reinterpret_cast(jhandle); const char* name = env->GetStringUTFChars(jname, nullptr); if (name == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Status s = txn->SetName(name); env->ReleaseStringUTFChars(jname, name); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_Transaction * Method: getName * Signature: (J)Ljava/lang/String; */ jstring Java_org_rocksdb_Transaction_getName(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::TransactionName name = txn->GetName(); return env->NewStringUTF(name.data()); } /* * Class: org_rocksdb_Transaction * Method: getID * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getID(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::TransactionID id = txn->GetID(); return static_cast(id); } /* * Class: org_rocksdb_Transaction * Method: isDeadlockDetect * Signature: (J)Z */ jboolean Java_org_rocksdb_Transaction_isDeadlockDetect(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return static_cast(txn->IsDeadlockDetect()); } /* * Class: org_rocksdb_Transaction * Method: getWaitingTxns * Signature: (J)Lorg/rocksdb/Transaction/WaitingTransactions; */ jobject Java_org_rocksdb_Transaction_getWaitingTxns(JNIEnv* env, jobject jtransaction_obj, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); uint32_t column_family_id; std::string key; std::vector waiting_txns = txn->GetWaitingTxns(&column_family_id, &key); jobject jwaiting_txns = ROCKSDB_NAMESPACE::TransactionJni::newWaitingTransactions( env, jtransaction_obj, column_family_id, key, waiting_txns); return jwaiting_txns; } /* * Class: org_rocksdb_Transaction * Method: getState * Signature: (J)B */ jbyte Java_org_rocksdb_Transaction_getState(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Transaction::TransactionState txn_status = txn->GetState(); switch (txn_status) { case ROCKSDB_NAMESPACE::Transaction::TransactionState::STARTED: return 0x0; case ROCKSDB_NAMESPACE::Transaction::TransactionState::AWAITING_PREPARE: return 0x1; case ROCKSDB_NAMESPACE::Transaction::TransactionState::PREPARED: return 0x2; case ROCKSDB_NAMESPACE::Transaction::TransactionState::AWAITING_COMMIT: return 0x3; case ROCKSDB_NAMESPACE::Transaction::TransactionState::COMMITTED: return 0x4; case ROCKSDB_NAMESPACE::Transaction::TransactionState::AWAITING_ROLLBACK: return 0x5; case ROCKSDB_NAMESPACE::Transaction::TransactionState::ROLLEDBACK: return 0x6; case ROCKSDB_NAMESPACE::Transaction::TransactionState::LOCKS_STOLEN: return 0x7; } assert(false); return static_cast(-1); } /* * Class: org_rocksdb_Transaction * Method: getId * Signature: (J)J */ jlong Java_org_rocksdb_Transaction_getId(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); uint64_t id = txn->GetId(); return static_cast(id); } /* * Class: org_rocksdb_Transaction * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_Transaction_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); } rocksdb-6.11.4/java/rocksjni/transaction_db.cc000066400000000000000000000407341370372246700213210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::TransactionDB. #include #include #include #include #include "include/org_rocksdb_TransactionDB.h" #include "rocksdb/options.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_TransactionDB * Method: open * Signature: (JJLjava/lang/String;)J */ jlong Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2( JNIEnv* env, jclass, jlong joptions_handle, jlong jtxn_db_options_handle, jstring jdb_path) { auto* options = reinterpret_cast(joptions_handle); auto* txn_db_options = reinterpret_cast( jtxn_db_options_handle); ROCKSDB_NAMESPACE::TransactionDB* tdb = nullptr; const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return 0; } ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::TransactionDB::Open( *options, *txn_db_options, db_path, &tdb); env->ReleaseStringUTFChars(jdb_path, db_path); if (s.ok()) { return reinterpret_cast(tdb); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } } /* * Class: org_rocksdb_TransactionDB * Method: open * Signature: (JJLjava/lang/String;[[B[J)[J */ jlongArray Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jdb_options_handle, jlong jtxn_db_options_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options_handles) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } const jsize len_cols = env->GetArrayLength(jcolumn_names); if (env->EnsureLocalCapacity(len_cols) != 0) { // out of memory env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } jlong* jco = env->GetLongArrayElements(jcolumn_options_handles, nullptr); if (jco == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } std::vector column_families; for (int i = 0; i < len_cols; i++) { const jobject jcn = env->GetObjectArrayElement(jcolumn_names, i); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } const jbyteArray jcn_ba = reinterpret_cast(jcn); jbyte* jcf_name = env->GetByteArrayElements(jcn_ba, nullptr); if (jcf_name == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jcn); env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } const int jcf_name_len = env->GetArrayLength(jcn_ba); if (env->EnsureLocalCapacity(jcf_name_len) != 0) { // out of memory env->ReleaseByteArrayElements(jcn_ba, jcf_name, JNI_ABORT); env->DeleteLocalRef(jcn); env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } const std::string cf_name(reinterpret_cast(jcf_name), jcf_name_len); const ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = reinterpret_cast(jco[i]); column_families.push_back( ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); env->ReleaseByteArrayElements(jcn_ba, jcf_name, JNI_ABORT); env->DeleteLocalRef(jcn); } env->ReleaseLongArrayElements(jcolumn_options_handles, jco, JNI_ABORT); auto* db_options = reinterpret_cast(jdb_options_handle); auto* txn_db_options = reinterpret_cast( jtxn_db_options_handle); std::vector handles; ROCKSDB_NAMESPACE::TransactionDB* tdb = nullptr; const ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::TransactionDB::Open( *db_options, *txn_db_options, db_path, column_families, &handles, &tdb); // check if open operation was successful if (s.ok()) { const jsize resultsLen = 1 + len_cols; // db handle + column family handles std::unique_ptr results = std::unique_ptr(new jlong[resultsLen]); results[0] = reinterpret_cast(tdb); for (int i = 1; i <= len_cols; i++) { results[i] = reinterpret_cast(handles[i - 1]); } jlongArray jresults = env->NewLongArray(resultsLen); if (jresults == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresults); return nullptr; } return jresults; } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return nullptr; } } /* * Class: org_rocksdb_TransactionDB * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_TransactionDB_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); assert(txn_db != nullptr); delete txn_db; } /* * Class: org_rocksdb_TransactionDB * Method: closeDatabase * Signature: (J)V */ void Java_org_rocksdb_TransactionDB_closeDatabase( JNIEnv* env, jclass, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); assert(txn_db != nullptr); ROCKSDB_NAMESPACE::Status s = txn_db->Close(); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_TransactionDB * Method: beginTransaction * Signature: (JJ)J */ jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle) { auto* txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); ROCKSDB_NAMESPACE::Transaction* txn = txn_db->BeginTransaction(*write_options); return reinterpret_cast(txn); } /* * Class: org_rocksdb_TransactionDB * Method: beginTransaction * Signature: (JJJ)J */ jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jtxn_options_handle) { auto* txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* txn_options = reinterpret_cast( jtxn_options_handle); ROCKSDB_NAMESPACE::Transaction* txn = txn_db->BeginTransaction(*write_options, *txn_options); return reinterpret_cast(txn); } /* * Class: org_rocksdb_TransactionDB * Method: beginTransaction_withOld * Signature: (JJJ)J */ jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jold_txn_handle) { auto* txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* old_txn = reinterpret_cast(jold_txn_handle); ROCKSDB_NAMESPACE::TransactionOptions txn_options; ROCKSDB_NAMESPACE::Transaction* txn = txn_db->BeginTransaction(*write_options, txn_options, old_txn); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_txn assert(txn == old_txn); return reinterpret_cast(txn); } /* * Class: org_rocksdb_TransactionDB * Method: beginTransaction_withOld * Signature: (JJJJ)J */ jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jtxn_options_handle, jlong jold_txn_handle) { auto* txn_db = reinterpret_cast(jhandle); auto* write_options = reinterpret_cast(jwrite_options_handle); auto* txn_options = reinterpret_cast( jtxn_options_handle); auto* old_txn = reinterpret_cast(jold_txn_handle); ROCKSDB_NAMESPACE::Transaction* txn = txn_db->BeginTransaction(*write_options, *txn_options, old_txn); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_txn assert(txn == old_txn); return reinterpret_cast(txn); } /* * Class: org_rocksdb_TransactionDB * Method: getTransactionByName * Signature: (JLjava/lang/String;)J */ jlong Java_org_rocksdb_TransactionDB_getTransactionByName( JNIEnv* env, jobject, jlong jhandle, jstring jname) { auto* txn_db = reinterpret_cast(jhandle); const char* name = env->GetStringUTFChars(jname, nullptr); if (name == nullptr) { // exception thrown: OutOfMemoryError return 0; } ROCKSDB_NAMESPACE::Transaction* txn = txn_db->GetTransactionByName(name); env->ReleaseStringUTFChars(jname, name); return reinterpret_cast(txn); } /* * Class: org_rocksdb_TransactionDB * Method: getAllPreparedTransactions * Signature: (J)[J */ jlongArray Java_org_rocksdb_TransactionDB_getAllPreparedTransactions( JNIEnv* env, jobject, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); std::vector txns; txn_db->GetAllPreparedTransactions(&txns); const size_t size = txns.size(); assert(size < UINT32_MAX); // does it fit in a jint? const jsize len = static_cast(size); std::vector tmp(len); for (jsize i = 0; i < len; ++i) { tmp[i] = reinterpret_cast(txns[i]); } jlongArray jtxns = env->NewLongArray(len); if (jtxns == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetLongArrayRegion(jtxns, 0, len, tmp.data()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jtxns); return nullptr; } return jtxns; } /* * Class: org_rocksdb_TransactionDB * Method: getLockStatusData * Signature: (J)Ljava/util/Map; */ jobject Java_org_rocksdb_TransactionDB_getLockStatusData( JNIEnv* env, jobject, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); const std::unordered_multimap lock_status_data = txn_db->GetLockStatusData(); const jobject jlock_status_data = ROCKSDB_NAMESPACE::HashMapJni::construct( env, static_cast(lock_status_data.size())); if (jlock_status_data == nullptr) { // exception occurred return nullptr; } const ROCKSDB_NAMESPACE::HashMapJni::FnMapKV< const int32_t, const ROCKSDB_NAMESPACE::KeyLockInfo, jobject, jobject> fn_map_kv = [env](const std::pair& pair) { const jobject jlong_column_family_id = ROCKSDB_NAMESPACE::LongJni::valueOf(env, pair.first); if (jlong_column_family_id == nullptr) { // an error occurred return std::unique_ptr>(nullptr); } const jobject jkey_lock_info = ROCKSDB_NAMESPACE::KeyLockInfoJni::construct(env, pair.second); if (jkey_lock_info == nullptr) { // an error occurred return std::unique_ptr>(nullptr); } return std::unique_ptr>( new std::pair(jlong_column_family_id, jkey_lock_info)); }; if (!ROCKSDB_NAMESPACE::HashMapJni::putAll( env, jlock_status_data, lock_status_data.begin(), lock_status_data.end(), fn_map_kv)) { // exception occcurred return nullptr; } return jlock_status_data; } /* * Class: org_rocksdb_TransactionDB * Method: getDeadlockInfoBuffer * Signature: (J)[Lorg/rocksdb/TransactionDB/DeadlockPath; */ jobjectArray Java_org_rocksdb_TransactionDB_getDeadlockInfoBuffer( JNIEnv* env, jobject jobj, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); const std::vector deadlock_info_buffer = txn_db->GetDeadlockInfoBuffer(); const jsize deadlock_info_buffer_len = static_cast(deadlock_info_buffer.size()); jobjectArray jdeadlock_info_buffer = env->NewObjectArray( deadlock_info_buffer_len, ROCKSDB_NAMESPACE::DeadlockPathJni::getJClass(env), nullptr); if (jdeadlock_info_buffer == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } jsize jdeadlock_info_buffer_offset = 0; auto buf_end = deadlock_info_buffer.end(); for (auto buf_it = deadlock_info_buffer.begin(); buf_it != buf_end; ++buf_it) { const ROCKSDB_NAMESPACE::DeadlockPath deadlock_path = *buf_it; const std::vector deadlock_infos = deadlock_path.path; const jsize deadlock_infos_len = static_cast(deadlock_info_buffer.size()); jobjectArray jdeadlock_infos = env->NewObjectArray( deadlock_infos_len, ROCKSDB_NAMESPACE::DeadlockInfoJni::getJClass(env), nullptr); if (jdeadlock_infos == nullptr) { // exception thrown: OutOfMemoryError env->DeleteLocalRef(jdeadlock_info_buffer); return nullptr; } jsize jdeadlock_infos_offset = 0; auto infos_end = deadlock_infos.end(); for (auto infos_it = deadlock_infos.begin(); infos_it != infos_end; ++infos_it) { const ROCKSDB_NAMESPACE::DeadlockInfo deadlock_info = *infos_it; const jobject jdeadlock_info = ROCKSDB_NAMESPACE::TransactionDBJni::newDeadlockInfo( env, jobj, deadlock_info.m_txn_id, deadlock_info.m_cf_id, deadlock_info.m_waiting_key, deadlock_info.m_exclusive); if (jdeadlock_info == nullptr) { // exception occcurred env->DeleteLocalRef(jdeadlock_info_buffer); return nullptr; } env->SetObjectArrayElement(jdeadlock_infos, jdeadlock_infos_offset++, jdeadlock_info); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException or // ArrayStoreException env->DeleteLocalRef(jdeadlock_info); env->DeleteLocalRef(jdeadlock_info_buffer); return nullptr; } } const jobject jdeadlock_path = ROCKSDB_NAMESPACE::DeadlockPathJni::construct( env, jdeadlock_infos, deadlock_path.limit_exceeded); if (jdeadlock_path == nullptr) { // exception occcurred env->DeleteLocalRef(jdeadlock_info_buffer); return nullptr; } env->SetObjectArrayElement(jdeadlock_info_buffer, jdeadlock_info_buffer_offset++, jdeadlock_path); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException or ArrayStoreException env->DeleteLocalRef(jdeadlock_path); env->DeleteLocalRef(jdeadlock_info_buffer); return nullptr; } } return jdeadlock_info_buffer; } /* * Class: org_rocksdb_TransactionDB * Method: setDeadlockInfoBufferSize * Signature: (JI)V */ void Java_org_rocksdb_TransactionDB_setDeadlockInfoBufferSize( JNIEnv*, jobject, jlong jhandle, jint jdeadlock_info_buffer_size) { auto* txn_db = reinterpret_cast(jhandle); txn_db->SetDeadlockInfoBufferSize(jdeadlock_info_buffer_size); } rocksdb-6.11.4/java/rocksjni/transaction_db_options.cc000066400000000000000000000133031370372246700230640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::TransactionDBOptions. #include #include "include/org_rocksdb_TransactionDBOptions.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_TransactionDBOptions * Method: newTransactionDBOptions * Signature: ()J */ jlong Java_org_rocksdb_TransactionDBOptions_newTransactionDBOptions( JNIEnv* /*env*/, jclass /*jcls*/) { ROCKSDB_NAMESPACE::TransactionDBOptions* opts = new ROCKSDB_NAMESPACE::TransactionDBOptions(); return reinterpret_cast(opts); } /* * Class: org_rocksdb_TransactionDBOptions * Method: getMaxNumLocks * Signature: (J)J */ jlong Java_org_rocksdb_TransactionDBOptions_getMaxNumLocks(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->max_num_locks; } /* * Class: org_rocksdb_TransactionDBOptions * Method: setMaxNumLocks * Signature: (JJ)V */ void Java_org_rocksdb_TransactionDBOptions_setMaxNumLocks( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jmax_num_locks) { auto* opts = reinterpret_cast(jhandle); opts->max_num_locks = jmax_num_locks; } /* * Class: org_rocksdb_TransactionDBOptions * Method: getNumStripes * Signature: (J)J */ jlong Java_org_rocksdb_TransactionDBOptions_getNumStripes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->num_stripes; } /* * Class: org_rocksdb_TransactionDBOptions * Method: setNumStripes * Signature: (JJ)V */ void Java_org_rocksdb_TransactionDBOptions_setNumStripes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jnum_stripes) { auto* opts = reinterpret_cast(jhandle); opts->num_stripes = jnum_stripes; } /* * Class: org_rocksdb_TransactionDBOptions * Method: getTransactionLockTimeout * Signature: (J)J */ jlong Java_org_rocksdb_TransactionDBOptions_getTransactionLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->transaction_lock_timeout; } /* * Class: org_rocksdb_TransactionDBOptions * Method: setTransactionLockTimeout * Signature: (JJ)V */ void Java_org_rocksdb_TransactionDBOptions_setTransactionLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jtransaction_lock_timeout) { auto* opts = reinterpret_cast(jhandle); opts->transaction_lock_timeout = jtransaction_lock_timeout; } /* * Class: org_rocksdb_TransactionDBOptions * Method: getDefaultLockTimeout * Signature: (J)J */ jlong Java_org_rocksdb_TransactionDBOptions_getDefaultLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->default_lock_timeout; } /* * Class: org_rocksdb_TransactionDBOptions * Method: setDefaultLockTimeout * Signature: (JJ)V */ void Java_org_rocksdb_TransactionDBOptions_setDefaultLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jdefault_lock_timeout) { auto* opts = reinterpret_cast(jhandle); opts->default_lock_timeout = jdefault_lock_timeout; } /* * Class: org_rocksdb_TransactionDBOptions * Method: getWritePolicy * Signature: (J)B */ jbyte Java_org_rocksdb_TransactionDBOptions_getWritePolicy(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::TxnDBWritePolicyJni::toJavaTxnDBWritePolicy( opts->write_policy); } /* * Class: org_rocksdb_TransactionDBOptions * Method: setWritePolicy * Signature: (JB)V */ void Java_org_rocksdb_TransactionDBOptions_setWritePolicy(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jbyte jwrite_policy) { auto* opts = reinterpret_cast(jhandle); opts->write_policy = ROCKSDB_NAMESPACE::TxnDBWritePolicyJni::toCppTxnDBWritePolicy( jwrite_policy); } /* * Class: org_rocksdb_TransactionDBOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_TransactionDBOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); } rocksdb-6.11.4/java/rocksjni/transaction_log.cc000066400000000000000000000055211370372246700215100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::Iterator methods from Java side. #include #include #include #include "include/org_rocksdb_TransactionLogIterator.h" #include "rocksdb/transaction_log.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_TransactionLogIterator * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_TransactionLogIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { delete reinterpret_cast(handle); } /* * Class: org_rocksdb_TransactionLogIterator * Method: isValid * Signature: (J)Z */ jboolean Java_org_rocksdb_TransactionLogIterator_isValid(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle) ->Valid(); } /* * Class: org_rocksdb_TransactionLogIterator * Method: next * Signature: (J)V */ void Java_org_rocksdb_TransactionLogIterator_next(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* * Class: org_rocksdb_TransactionLogIterator * Method: status * Signature: (J)V */ void Java_org_rocksdb_TransactionLogIterator_status(JNIEnv* env, jobject /*jobj*/, jlong handle) { ROCKSDB_NAMESPACE::Status s = reinterpret_cast(handle) ->status(); if (!s.ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } /* * Class: org_rocksdb_TransactionLogIterator * Method: getBatch * Signature: (J)Lorg/rocksdb/TransactionLogIterator$BatchResult */ jobject Java_org_rocksdb_TransactionLogIterator_getBatch(JNIEnv* env, jobject /*jobj*/, jlong handle) { ROCKSDB_NAMESPACE::BatchResult batch_result = reinterpret_cast(handle) ->GetBatch(); return ROCKSDB_NAMESPACE::BatchResultJni::construct(env, batch_result); } rocksdb-6.11.4/java/rocksjni/transaction_notifier.cc000066400000000000000000000031401370372246700225410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::TransactionNotifier. #include #include "include/org_rocksdb_AbstractTransactionNotifier.h" #include "rocksjni/transaction_notifier_jnicallback.h" /* * Class: org_rocksdb_AbstractTransactionNotifier * Method: createNewTransactionNotifier * Signature: ()J */ jlong Java_org_rocksdb_AbstractTransactionNotifier_createNewTransactionNotifier( JNIEnv* env, jobject jobj) { auto* transaction_notifier = new ROCKSDB_NAMESPACE::TransactionNotifierJniCallback(env, jobj); auto* sptr_transaction_notifier = new std::shared_ptr( transaction_notifier); return reinterpret_cast(sptr_transaction_notifier); } /* * Class: org_rocksdb_AbstractTransactionNotifier * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_AbstractTransactionNotifier_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { // TODO(AR) refactor to use JniCallback::JniCallback // when https://github.com/facebook/rocksdb/pull/1241/ is merged std::shared_ptr* handle = reinterpret_cast< std::shared_ptr*>( jhandle); delete handle; } rocksdb-6.11.4/java/rocksjni/transaction_notifier_jnicallback.cc000066400000000000000000000025471370372246700250700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TransactionNotifier. #include "rocksjni/transaction_notifier_jnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { TransactionNotifierJniCallback::TransactionNotifierJniCallback(JNIEnv* env, jobject jtransaction_notifier) : JniCallback(env, jtransaction_notifier) { // we cache the method id for the JNI callback m_jsnapshot_created_methodID = AbstractTransactionNotifierJni::getSnapshotCreatedMethodId(env); } void TransactionNotifierJniCallback::SnapshotCreated( const Snapshot* newSnapshot) { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); assert(env != nullptr); env->CallVoidMethod(m_jcallback_obj, m_jsnapshot_created_methodID, reinterpret_cast(newSnapshot)); if(env->ExceptionCheck()) { // exception thrown from CallVoidMethod env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } releaseJniEnv(attached_thread); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/transaction_notifier_jnicallback.h000066400000000000000000000030001370372246700247130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TransactionNotifier. #ifndef JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_ #define JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_ #include #include "rocksdb/utilities/transaction.h" #include "rocksjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { /** * This class acts as a bridge between C++ * and Java. The methods in this class will be * called back from the RocksDB TransactionDB or OptimisticTransactionDB (C++), * we then callback to the appropriate Java method * this enables TransactionNotifier to be implemented in Java. * * Unlike RocksJava's Comparator JNI Callback, we do not attempt * to reduce Java object allocations by caching the Snapshot object * presented to the callback. This could be revisited in future * if performance is lacking. */ class TransactionNotifierJniCallback: public JniCallback, public TransactionNotifier { public: TransactionNotifierJniCallback(JNIEnv* env, jobject jtransaction_notifier); virtual void SnapshotCreated(const Snapshot* newSnapshot); private: jmethodID m_jsnapshot_created_methodID; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_TRANSACTION_NOTIFIER_JNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/transaction_options.cc000066400000000000000000000144471370372246700224310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ // for ROCKSDB_NAMESPACE::TransactionOptions. #include #include "include/org_rocksdb_TransactionOptions.h" #include "rocksdb/utilities/transaction_db.h" /* * Class: org_rocksdb_TransactionOptions * Method: newTransactionOptions * Signature: ()J */ jlong Java_org_rocksdb_TransactionOptions_newTransactionOptions( JNIEnv* /*env*/, jclass /*jcls*/) { auto* opts = new ROCKSDB_NAMESPACE::TransactionOptions(); return reinterpret_cast(opts); } /* * Class: org_rocksdb_TransactionOptions * Method: isSetSnapshot * Signature: (J)Z */ jboolean Java_org_rocksdb_TransactionOptions_isSetSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->set_snapshot; } /* * Class: org_rocksdb_TransactionOptions * Method: setSetSnapshot * Signature: (JZ)V */ void Java_org_rocksdb_TransactionOptions_setSetSnapshot( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jset_snapshot) { auto* opts = reinterpret_cast(jhandle); opts->set_snapshot = jset_snapshot; } /* * Class: org_rocksdb_TransactionOptions * Method: isDeadlockDetect * Signature: (J)Z */ jboolean Java_org_rocksdb_TransactionOptions_isDeadlockDetect(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->deadlock_detect; } /* * Class: org_rocksdb_TransactionOptions * Method: setDeadlockDetect * Signature: (JZ)V */ void Java_org_rocksdb_TransactionOptions_setDeadlockDetect( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jdeadlock_detect) { auto* opts = reinterpret_cast(jhandle); opts->deadlock_detect = jdeadlock_detect; } /* * Class: org_rocksdb_TransactionOptions * Method: getLockTimeout * Signature: (J)J */ jlong Java_org_rocksdb_TransactionOptions_getLockTimeout(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->lock_timeout; } /* * Class: org_rocksdb_TransactionOptions * Method: setLockTimeout * Signature: (JJ)V */ void Java_org_rocksdb_TransactionOptions_setLockTimeout(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jlock_timeout) { auto* opts = reinterpret_cast(jhandle); opts->lock_timeout = jlock_timeout; } /* * Class: org_rocksdb_TransactionOptions * Method: getExpiration * Signature: (J)J */ jlong Java_org_rocksdb_TransactionOptions_getExpiration(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->expiration; } /* * Class: org_rocksdb_TransactionOptions * Method: setExpiration * Signature: (JJ)V */ void Java_org_rocksdb_TransactionOptions_setExpiration(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jexpiration) { auto* opts = reinterpret_cast(jhandle); opts->expiration = jexpiration; } /* * Class: org_rocksdb_TransactionOptions * Method: getDeadlockDetectDepth * Signature: (J)J */ jlong Java_org_rocksdb_TransactionOptions_getDeadlockDetectDepth( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->deadlock_detect_depth; } /* * Class: org_rocksdb_TransactionOptions * Method: setDeadlockDetectDepth * Signature: (JJ)V */ void Java_org_rocksdb_TransactionOptions_setDeadlockDetectDepth( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jdeadlock_detect_depth) { auto* opts = reinterpret_cast(jhandle); opts->deadlock_detect_depth = jdeadlock_detect_depth; } /* * Class: org_rocksdb_TransactionOptions * Method: getMaxWriteBatchSize * Signature: (J)J */ jlong Java_org_rocksdb_TransactionOptions_getMaxWriteBatchSize(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return opts->max_write_batch_size; } /* * Class: org_rocksdb_TransactionOptions * Method: setMaxWriteBatchSize * Signature: (JJ)V */ void Java_org_rocksdb_TransactionOptions_setMaxWriteBatchSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jmax_write_batch_size) { auto* opts = reinterpret_cast(jhandle); opts->max_write_batch_size = jmax_write_batch_size; } /* * Class: org_rocksdb_TransactionOptions * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_TransactionOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); } rocksdb-6.11.4/java/rocksjni/ttl.cc000066400000000000000000000152231370372246700171250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::TtlDB methods. // from Java side. #include #include #include #include #include #include #include "include/org_rocksdb_TtlDB.h" #include "rocksdb/utilities/db_ttl.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_TtlDB * Method: open * Signature: (JLjava/lang/String;IZ)J */ jlong Java_org_rocksdb_TtlDB_open( JNIEnv* env, jclass, jlong joptions_handle, jstring jdb_path, jint jttl, jboolean jread_only) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return 0; } auto* opt = reinterpret_cast(joptions_handle); ROCKSDB_NAMESPACE::DBWithTTL* db = nullptr; ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DBWithTTL::Open(*opt, db_path, &db, jttl, jread_only); env->ReleaseStringUTFChars(jdb_path, db_path); // as TTLDB extends RocksDB on the java side, we can reuse // the RocksDB portal here. if (s.ok()) { return reinterpret_cast(db); } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } } /* * Class: org_rocksdb_TtlDB * Method: openCF * Signature: (JLjava/lang/String;[[B[J[IZ)[J */ jlongArray Java_org_rocksdb_TtlDB_openCF( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options, jintArray jttls, jboolean jread_only) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { // exception thrown: OutOfMemoryError return 0; } const jsize len_cols = env->GetArrayLength(jcolumn_names); jlong* jco = env->GetLongArrayElements(jcolumn_options, nullptr); if (jco == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } std::vector column_families; jboolean has_exception = JNI_FALSE; ROCKSDB_NAMESPACE::JniUtil::byteStrings( env, jcolumn_names, [](const char* str_data, const size_t str_len) { return std::string(str_data, str_len); }, [&jco, &column_families](size_t idx, std::string cf_name) { ROCKSDB_NAMESPACE::ColumnFamilyOptions* cf_options = reinterpret_cast(jco[idx]); column_families.push_back( ROCKSDB_NAMESPACE::ColumnFamilyDescriptor(cf_name, *cf_options)); }, &has_exception); env->ReleaseLongArrayElements(jcolumn_options, jco, JNI_ABORT); if (has_exception == JNI_TRUE) { // exception occurred env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } std::vector ttl_values; jint* jttlv = env->GetIntArrayElements(jttls, nullptr); if (jttlv == nullptr) { // exception thrown: OutOfMemoryError env->ReleaseStringUTFChars(jdb_path, db_path); return nullptr; } const jsize len_ttls = env->GetArrayLength(jttls); for (jsize i = 0; i < len_ttls; i++) { ttl_values.push_back(jttlv[i]); } env->ReleaseIntArrayElements(jttls, jttlv, JNI_ABORT); auto* opt = reinterpret_cast(jopt_handle); std::vector handles; ROCKSDB_NAMESPACE::DBWithTTL* db = nullptr; ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::DBWithTTL::Open( *opt, db_path, column_families, &handles, &db, ttl_values, jread_only); // we have now finished with db_path env->ReleaseStringUTFChars(jdb_path, db_path); // check if open operation was successful if (s.ok()) { const jsize resultsLen = 1 + len_cols; // db handle + column family handles std::unique_ptr results = std::unique_ptr(new jlong[resultsLen]); results[0] = reinterpret_cast(db); for (int i = 1; i <= len_cols; i++) { results[i] = reinterpret_cast(handles[i - 1]); } jlongArray jresults = env->NewLongArray(resultsLen); if (jresults == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetLongArrayRegion(jresults, 0, resultsLen, results.get()); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresults); return nullptr; } return jresults; } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return NULL; } } /* * Class: org_rocksdb_TtlDB * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_TtlDB_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* ttl_db = reinterpret_cast(jhandle); assert(ttl_db != nullptr); delete ttl_db; } /* * Class: org_rocksdb_TtlDB * Method: closeDatabase * Signature: (J)V */ void Java_org_rocksdb_TtlDB_closeDatabase( JNIEnv* /* env */, jclass, jlong /* jhandle */) { // auto* ttl_db = reinterpret_cast(jhandle); // assert(ttl_db != nullptr); // ROCKSDB_NAMESPACE::Status s = ttl_db->Close(); // ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); //TODO(AR) this is disabled until https://github.com/facebook/rocksdb/issues/4818 is resolved! } /* * Class: org_rocksdb_TtlDB * Method: createColumnFamilyWithTtl * Signature: (JLorg/rocksdb/ColumnFamilyDescriptor;[BJI)J; */ jlong Java_org_rocksdb_TtlDB_createColumnFamilyWithTtl( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jcolumn_name, jlong jcolumn_options, jint jttl) { jbyte* cfname = env->GetByteArrayElements(jcolumn_name, nullptr); if (cfname == nullptr) { // exception thrown: OutOfMemoryError return 0; } const jsize len = env->GetArrayLength(jcolumn_name); auto* cfOptions = reinterpret_cast( jcolumn_options); auto* db_handle = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* handle; ROCKSDB_NAMESPACE::Status s = db_handle->CreateColumnFamilyWithTtl( *cfOptions, std::string(reinterpret_cast(cfname), len), &handle, jttl); env->ReleaseByteArrayElements(jcolumn_name, cfname, 0); if (s.ok()) { return reinterpret_cast(handle); } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); return 0; } rocksdb-6.11.4/java/rocksjni/wal_filter.cc000066400000000000000000000014351370372246700204520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::WalFilter. #include #include "include/org_rocksdb_AbstractWalFilter.h" #include "rocksjni/wal_filter_jnicallback.h" /* * Class: org_rocksdb_AbstractWalFilter * Method: createNewWalFilter * Signature: ()J */ jlong Java_org_rocksdb_AbstractWalFilter_createNewWalFilter( JNIEnv* env, jobject jobj) { auto* wal_filter = new ROCKSDB_NAMESPACE::WalFilterJniCallback(env, jobj); return reinterpret_cast(wal_filter); } rocksdb-6.11.4/java/rocksjni/wal_filter_jnicallback.cc000066400000000000000000000112331370372246700227640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::WalFilter. #include "rocksjni/wal_filter_jnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { WalFilterJniCallback::WalFilterJniCallback( JNIEnv* env, jobject jwal_filter) : JniCallback(env, jwal_filter) { // Note: The name of a WalFilter will not change during it's lifetime, // so we cache it in a global var jmethodID jname_mid = AbstractWalFilterJni::getNameMethodId(env); if(jname_mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } jstring jname = (jstring)env->CallObjectMethod(m_jcallback_obj, jname_mid); if(env->ExceptionCheck()) { // exception thrown return; } jboolean has_exception = JNI_FALSE; m_name = JniUtil::copyString(env, jname, &has_exception); // also releases jname if (has_exception == JNI_TRUE) { // exception thrown return; } m_column_family_log_number_map_mid = AbstractWalFilterJni::getColumnFamilyLogNumberMapMethodId(env); if(m_column_family_log_number_map_mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } m_log_record_found_proxy_mid = AbstractWalFilterJni::getLogRecordFoundProxyMethodId(env); if(m_log_record_found_proxy_mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return; } } void WalFilterJniCallback::ColumnFamilyLogNumberMap( const std::map& cf_lognumber_map, const std::map& cf_name_id_map) { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); if (env == nullptr) { return; } jobject jcf_lognumber_map = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &cf_lognumber_map); if (jcf_lognumber_map == nullptr) { // exception occurred env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return; } jobject jcf_name_id_map = ROCKSDB_NAMESPACE::HashMapJni::fromCppMap(env, &cf_name_id_map); if (jcf_name_id_map == nullptr) { // exception occurred env->ExceptionDescribe(); // print out exception to stderr env->DeleteLocalRef(jcf_lognumber_map); releaseJniEnv(attached_thread); return; } env->CallVoidMethod(m_jcallback_obj, m_column_family_log_number_map_mid, jcf_lognumber_map, jcf_name_id_map); env->DeleteLocalRef(jcf_lognumber_map); env->DeleteLocalRef(jcf_name_id_map); if(env->ExceptionCheck()) { // exception thrown from CallVoidMethod env->ExceptionDescribe(); // print out exception to stderr } releaseJniEnv(attached_thread); } WalFilter::WalProcessingOption WalFilterJniCallback::LogRecordFound( unsigned long long log_number, const std::string& log_file_name, const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed) { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); if (env == nullptr) { return WalFilter::WalProcessingOption::kCorruptedRecord; } jstring jlog_file_name = JniUtil::toJavaString(env, &log_file_name); if (jlog_file_name == nullptr) { // exception occcurred env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return WalFilter::WalProcessingOption::kCorruptedRecord; } jshort jlog_record_found_result = env->CallShortMethod(m_jcallback_obj, m_log_record_found_proxy_mid, static_cast(log_number), jlog_file_name, reinterpret_cast(&batch), reinterpret_cast(new_batch)); env->DeleteLocalRef(jlog_file_name); if (env->ExceptionCheck()) { // exception thrown from CallShortMethod env->ExceptionDescribe(); // print out exception to stderr releaseJniEnv(attached_thread); return WalFilter::WalProcessingOption::kCorruptedRecord; } // unpack WalProcessingOption and batch_changed from jlog_record_found_result jbyte jwal_processing_option_value = (jlog_record_found_result >> 8) & 0xFF; jbyte jbatch_changed_value = jlog_record_found_result & 0xFF; releaseJniEnv(attached_thread); *batch_changed = jbatch_changed_value == JNI_TRUE; return WalProcessingOptionJni::toCppWalProcessingOption( jwal_processing_option_value); } const char* WalFilterJniCallback::Name() const { return m_name.get(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/wal_filter_jnicallback.h000066400000000000000000000026321370372246700226310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::WalFilter. #ifndef JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_ #define JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_ #include #include #include #include #include "rocksdb/wal_filter.h" #include "rocksjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { class WalFilterJniCallback : public JniCallback, public WalFilter { public: WalFilterJniCallback( JNIEnv* env, jobject jwal_filter); virtual void ColumnFamilyLogNumberMap( const std::map& cf_lognumber_map, const std::map& cf_name_id_map); virtual WalFilter::WalProcessingOption LogRecordFound( unsigned long long log_number, const std::string& log_file_name, const WriteBatch& batch, WriteBatch* new_batch, bool* batch_changed); virtual const char* Name() const; private: std::unique_ptr m_name; jmethodID m_column_family_log_number_map_mid; jmethodID m_log_record_found_proxy_mid; }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_WAL_FILTER_JNICALLBACK_H_ rocksdb-6.11.4/java/rocksjni/write_batch.cc000066400000000000000000000560341370372246700206220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::WriteBatch methods from Java side. #include #include "db/memtable.h" #include "db/write_batch_internal.h" #include "include/org_rocksdb_WriteBatch.h" #include "include/org_rocksdb_WriteBatch_Handler.h" #include "logging/logging.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/status.h" #include "rocksdb/write_batch.h" #include "rocksdb/write_buffer_manager.h" #include "rocksjni/portal.h" #include "rocksjni/writebatchhandlerjnicallback.h" #include "table/scoped_arena_iterator.h" /* * Class: org_rocksdb_WriteBatch * Method: newWriteBatch * Signature: (I)J */ jlong Java_org_rocksdb_WriteBatch_newWriteBatch__I(JNIEnv* /*env*/, jclass /*jcls*/, jint jreserved_bytes) { auto* wb = new ROCKSDB_NAMESPACE::WriteBatch(static_cast(jreserved_bytes)); return reinterpret_cast(wb); } /* * Class: org_rocksdb_WriteBatch * Method: newWriteBatch * Signature: ([BI)J */ jlong Java_org_rocksdb_WriteBatch_newWriteBatch___3BI(JNIEnv* env, jclass /*jcls*/, jbyteArray jserialized, jint jserialized_length) { jboolean has_exception = JNI_FALSE; std::string serialized = ROCKSDB_NAMESPACE::JniUtil::byteString( env, jserialized, jserialized_length, [](const char* str, const size_t len) { return std::string(str, len); }, &has_exception); if (has_exception == JNI_TRUE) { // exception occurred return 0; } auto* wb = new ROCKSDB_NAMESPACE::WriteBatch(serialized); return reinterpret_cast(wb); } /* * Class: org_rocksdb_WriteBatch * Method: count0 * Signature: (J)I */ jint Java_org_rocksdb_WriteBatch_count0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return static_cast(wb->Count()); } /* * Class: org_rocksdb_WriteBatch * Method: clear0 * Signature: (J)V */ void Java_org_rocksdb_WriteBatch_clear0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); wb->Clear(); } /* * Class: org_rocksdb_WriteBatch * Method: setSavePoint0 * Signature: (J)V */ void Java_org_rocksdb_WriteBatch_setSavePoint0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); wb->SetSavePoint(); } /* * Class: org_rocksdb_WriteBatch * Method: rollbackToSavePoint0 * Signature: (J)V */ void Java_org_rocksdb_WriteBatch_rollbackToSavePoint0(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto s = wb->RollbackToSavePoint(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_WriteBatch * Method: popSavePoint * Signature: (J)V */ void Java_org_rocksdb_WriteBatch_popSavePoint(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto s = wb->PopSavePoint(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_WriteBatch * Method: setMaxBytes * Signature: (JJ)V */ void Java_org_rocksdb_WriteBatch_setMaxBytes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle, jlong jmax_bytes) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); wb->SetMaxBytes(static_cast(jmax_bytes)); } /* * Class: org_rocksdb_WriteBatch * Method: put * Signature: (J[BI[BI)V */ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BI(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto put = [&wb](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Put(key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: put * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto put = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Put(cf_handle, key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ void Java_org_rocksdb_WriteBatch_putDirect(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jobject jval, jint jval_offset, jint jval_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); auto put = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key, ROCKSDB_NAMESPACE::Slice& value) { if (cf_handle == nullptr) { wb->Put(key, value); } else { wb->Put(cf_handle, key, value); } }; ROCKSDB_NAMESPACE::JniUtil::kv_op_direct( put, env, jkey, jkey_offset, jkey_len, jval, jval_offset, jval_len); } /* * Class: org_rocksdb_WriteBatch * Method: merge * Signature: (J[BI[BI)V */ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto merge = [&wb](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Merge(key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: merge * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto merge = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wb->Merge(cf_handle, key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: delete * Signature: (J[BI)V */ void Java_org_rocksdb_WriteBatch_delete__J_3BI(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto remove = [&wb](ROCKSDB_NAMESPACE::Slice key) { return wb->Delete(key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: delete * Signature: (J[BIJ)V */ void Java_org_rocksdb_WriteBatch_delete__J_3BIJ(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto remove = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { return wb->Delete(cf_handle, key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: singleDelete * Signature: (J[BI)V */ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BI(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto single_delete = [&wb](ROCKSDB_NAMESPACE::Slice key) { return wb->SingleDelete(key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: singleDelete * Signature: (J[BIJ)V */ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BIJ(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto single_delete = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { return wb->SingleDelete(cf_handle, key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: removeDirect * Signature: (JLjava/nio/ByteBuffer;IIJ)V */ void Java_org_rocksdb_WriteBatch_removeDirect(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); auto remove = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key) { if (cf_handle == nullptr) { wb->Delete(key); } else { wb->Delete(cf_handle, key); } }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(remove, env, jkey, jkey_offset, jkey_len); } /* * Class: org_rocksdb_WriteBatch * Method: deleteRange * Signature: (J[BI[BI)V */ void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto deleteRange = [&wb](ROCKSDB_NAMESPACE::Slice beginKey, ROCKSDB_NAMESPACE::Slice endKey) { return wb->DeleteRange(beginKey, endKey); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, jbegin_key_len, jend_key, jend_key_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: deleteRange * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto deleteRange = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice beginKey, ROCKSDB_NAMESPACE::Slice endKey) { return wb->DeleteRange(cf_handle, beginKey, endKey); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, jbegin_key_len, jend_key, jend_key_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: putLogData * Signature: (J[BI)V */ void Java_org_rocksdb_WriteBatch_putLogData(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jblob, jint jblob_len) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto putLogData = [&wb](ROCKSDB_NAMESPACE::Slice blob) { return wb->PutLogData(blob); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(putLogData, env, jobj, jblob, jblob_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatch * Method: iterate * Signature: (JJ)V */ void Java_org_rocksdb_WriteBatch_iterate(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jlong handlerHandle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); ROCKSDB_NAMESPACE::Status s = wb->Iterate( reinterpret_cast( handlerHandle)); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_WriteBatch * Method: data * Signature: (J)[B */ jbyteArray Java_org_rocksdb_WriteBatch_data(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto data = wb->Data(); return ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, data); } /* * Class: org_rocksdb_WriteBatch * Method: getDataSize * Signature: (J)J */ jlong Java_org_rocksdb_WriteBatch_getDataSize(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto data_size = wb->GetDataSize(); return static_cast(data_size); } /* * Class: org_rocksdb_WriteBatch * Method: hasPut * Signature: (J)Z */ jboolean Java_org_rocksdb_WriteBatch_hasPut(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasPut(); } /* * Class: org_rocksdb_WriteBatch * Method: hasDelete * Signature: (J)Z */ jboolean Java_org_rocksdb_WriteBatch_hasDelete(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasDelete(); } /* * Class: org_rocksdb_WriteBatch * Method: hasSingleDelete * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasSingleDelete( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasSingleDelete(); } /* * Class: org_rocksdb_WriteBatch * Method: hasDeleteRange * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasDeleteRange( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasDeleteRange(); } /* * Class: org_rocksdb_WriteBatch * Method: hasMerge * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasMerge( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasMerge(); } /* * Class: org_rocksdb_WriteBatch * Method: hasBeginPrepare * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasBeginPrepare( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasBeginPrepare(); } /* * Class: org_rocksdb_WriteBatch * Method: hasEndPrepare * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasEndPrepare( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasEndPrepare(); } /* * Class: org_rocksdb_WriteBatch * Method: hasCommit * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasCommit( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasCommit(); } /* * Class: org_rocksdb_WriteBatch * Method: hasRollback * Signature: (J)Z */ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasRollback( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return wb->HasRollback(); } /* * Class: org_rocksdb_WriteBatch * Method: markWalTerminationPoint * Signature: (J)V */ void Java_org_rocksdb_WriteBatch_markWalTerminationPoint(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); wb->MarkWalTerminationPoint(); } /* * Class: org_rocksdb_WriteBatch * Method: getWalTerminationPoint * Signature: (J)Lorg/rocksdb/WriteBatch/SavePoint; */ jobject Java_org_rocksdb_WriteBatch_getWalTerminationPoint(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto save_point = wb->GetWalTerminationPoint(); return ROCKSDB_NAMESPACE::WriteBatchSavePointJni::construct(env, save_point); } /* * Class: org_rocksdb_WriteBatch * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_WriteBatch_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* wb = reinterpret_cast(handle); assert(wb != nullptr); delete wb; } /* * Class: org_rocksdb_WriteBatch_Handler * Method: createNewHandler0 * Signature: ()J */ jlong Java_org_rocksdb_WriteBatch_00024Handler_createNewHandler0(JNIEnv* env, jobject jobj) { auto* wbjnic = new ROCKSDB_NAMESPACE::WriteBatchHandlerJniCallback(env, jobj); return reinterpret_cast(wbjnic); } rocksdb-6.11.4/java/rocksjni/write_batch_test.cc000066400000000000000000000157711370372246700216640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::WriteBatch methods testing from Java side. #include #include "db/memtable.h" #include "db/write_batch_internal.h" #include "include/org_rocksdb_WriteBatch.h" #include "include/org_rocksdb_WriteBatchTest.h" #include "include/org_rocksdb_WriteBatchTestInternalHelper.h" #include "include/org_rocksdb_WriteBatch_Handler.h" #include "options/cf_options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/status.h" #include "rocksdb/write_batch.h" #include "rocksdb/write_buffer_manager.h" #include "rocksjni/portal.h" #include "table/scoped_arena_iterator.h" #include "test_util/testharness.h" #include "util/string_util.h" /* * Class: org_rocksdb_WriteBatchTest * Method: getContents * Signature: (J)[B */ jbyteArray Java_org_rocksdb_WriteBatchTest_getContents(JNIEnv* env, jclass /*jclazz*/, jlong jwb_handle) { auto* b = reinterpret_cast(jwb_handle); assert(b != nullptr); // todo: Currently the following code is directly copied from // db/write_bench_test.cc. It could be implemented in java once // all the necessary components can be accessed via jni api. ROCKSDB_NAMESPACE::InternalKeyComparator cmp( ROCKSDB_NAMESPACE::BytewiseComparator()); auto factory = std::make_shared(); ROCKSDB_NAMESPACE::Options options; ROCKSDB_NAMESPACE::WriteBufferManager wb(options.db_write_buffer_size); options.memtable_factory = factory; ROCKSDB_NAMESPACE::MemTable* mem = new ROCKSDB_NAMESPACE::MemTable( cmp, ROCKSDB_NAMESPACE::ImmutableCFOptions(options), ROCKSDB_NAMESPACE::MutableCFOptions(options), &wb, ROCKSDB_NAMESPACE::kMaxSequenceNumber, 0 /* column_family_id */); mem->Ref(); std::string state; ROCKSDB_NAMESPACE::ColumnFamilyMemTablesDefault cf_mems_default(mem); ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::WriteBatchInternal::InsertInto(b, &cf_mems_default, nullptr, nullptr); unsigned int count = 0; ROCKSDB_NAMESPACE::Arena arena; ROCKSDB_NAMESPACE::ScopedArenaIterator iter( mem->NewIterator(ROCKSDB_NAMESPACE::ReadOptions(), &arena)); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ROCKSDB_NAMESPACE::ParsedInternalKey ikey; ikey.clear(); bool parsed = ROCKSDB_NAMESPACE::ParseInternalKey(iter->key(), &ikey); if (!parsed) { assert(parsed); } switch (ikey.type) { case ROCKSDB_NAMESPACE::kTypeValue: state.append("Put("); state.append(ikey.user_key.ToString()); state.append(", "); state.append(iter->value().ToString()); state.append(")"); count++; break; case ROCKSDB_NAMESPACE::kTypeMerge: state.append("Merge("); state.append(ikey.user_key.ToString()); state.append(", "); state.append(iter->value().ToString()); state.append(")"); count++; break; case ROCKSDB_NAMESPACE::kTypeDeletion: state.append("Delete("); state.append(ikey.user_key.ToString()); state.append(")"); count++; break; case ROCKSDB_NAMESPACE::kTypeSingleDeletion: state.append("SingleDelete("); state.append(ikey.user_key.ToString()); state.append(")"); count++; break; case ROCKSDB_NAMESPACE::kTypeRangeDeletion: state.append("DeleteRange("); state.append(ikey.user_key.ToString()); state.append(", "); state.append(iter->value().ToString()); state.append(")"); count++; break; case ROCKSDB_NAMESPACE::kTypeLogData: state.append("LogData("); state.append(ikey.user_key.ToString()); state.append(")"); count++; break; default: assert(false); state.append("Err:Expected("); state.append(std::to_string(ikey.type)); state.append(")"); count++; break; } state.append("@"); state.append(ROCKSDB_NAMESPACE::NumberToString(ikey.sequence)); } if (!s.ok()) { state.append(s.ToString()); } else if (ROCKSDB_NAMESPACE::WriteBatchInternal::Count(b) != count) { state.append("Err:CountMismatch(expected="); state.append( std::to_string(ROCKSDB_NAMESPACE::WriteBatchInternal::Count(b))); state.append(", actual="); state.append(std::to_string(count)); state.append(")"); } delete mem->Unref(); jbyteArray jstate = env->NewByteArray(static_cast(state.size())); if (jstate == nullptr) { // exception thrown: OutOfMemoryError return nullptr; } env->SetByteArrayRegion( jstate, 0, static_cast(state.size()), const_cast(reinterpret_cast(state.c_str()))); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jstate); return nullptr; } return jstate; } /* * Class: org_rocksdb_WriteBatchTestInternalHelper * Method: setSequence * Signature: (JJ)V */ void Java_org_rocksdb_WriteBatchTestInternalHelper_setSequence( JNIEnv* /*env*/, jclass /*jclazz*/, jlong jwb_handle, jlong jsn) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); ROCKSDB_NAMESPACE::WriteBatchInternal::SetSequence( wb, static_cast(jsn)); } /* * Class: org_rocksdb_WriteBatchTestInternalHelper * Method: sequence * Signature: (J)J */ jlong Java_org_rocksdb_WriteBatchTestInternalHelper_sequence(JNIEnv* /*env*/, jclass /*jclazz*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); return static_cast( ROCKSDB_NAMESPACE::WriteBatchInternal::Sequence(wb)); } /* * Class: org_rocksdb_WriteBatchTestInternalHelper * Method: append * Signature: (JJ)V */ void Java_org_rocksdb_WriteBatchTestInternalHelper_append(JNIEnv* /*env*/, jclass /*jclazz*/, jlong jwb_handle_1, jlong jwb_handle_2) { auto* wb1 = reinterpret_cast(jwb_handle_1); assert(wb1 != nullptr); auto* wb2 = reinterpret_cast(jwb_handle_2); assert(wb2 != nullptr); ROCKSDB_NAMESPACE::WriteBatchInternal::Append(wb1, wb2); } rocksdb-6.11.4/java/rocksjni/write_batch_with_index.cc000066400000000000000000000750701370372246700230450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ and enables // calling c++ ROCKSDB_NAMESPACE::WriteBatchWithIndex methods from Java side. #include "rocksdb/utilities/write_batch_with_index.h" #include "include/org_rocksdb_WBWIRocksIterator.h" #include "include/org_rocksdb_WriteBatchWithIndex.h" #include "rocksdb/comparator.h" #include "rocksjni/portal.h" /* * Class: org_rocksdb_WriteBatchWithIndex * Method: newWriteBatchWithIndex * Signature: ()J */ jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__( JNIEnv* /*env*/, jclass /*jcls*/) { auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex(); return reinterpret_cast(wbwi); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: newWriteBatchWithIndex * Signature: (Z)J */ jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__Z( JNIEnv* /*env*/, jclass /*jcls*/, jboolean joverwrite_key) { auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex( ROCKSDB_NAMESPACE::BytewiseComparator(), 0, static_cast(joverwrite_key)); return reinterpret_cast(wbwi); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: newWriteBatchWithIndex * Signature: (JBIZ)J */ jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__JBIZ( JNIEnv* /*env*/, jclass /*jcls*/, jlong jfallback_index_comparator_handle, jbyte jcomparator_type, jint jreserved_bytes, jboolean joverwrite_key) { ROCKSDB_NAMESPACE::Comparator* fallback_comparator = nullptr; switch (jcomparator_type) { // JAVA_COMPARATOR case 0x0: fallback_comparator = reinterpret_cast( jfallback_index_comparator_handle); break; // JAVA_NATIVE_COMPARATOR_WRAPPER case 0x1: fallback_comparator = reinterpret_cast( jfallback_index_comparator_handle); break; } auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex( fallback_comparator, static_cast(jreserved_bytes), static_cast(joverwrite_key)); return reinterpret_cast(wbwi); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: count0 * Signature: (J)I */ jint Java_org_rocksdb_WriteBatchWithIndex_count0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); return static_cast(wbwi->GetWriteBatch()->Count()); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: put * Signature: (J[BI[BI)V */ void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto put = [&wbwi](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wbwi->Put(key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: put * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto put = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wbwi->Put(cf_handle, key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(put, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_putDirect( JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jobject jval, jint jval_offset, jint jval_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); auto put = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key, ROCKSDB_NAMESPACE::Slice& value) { if (cf_handle == nullptr) { wb->Put(key, value); } else { wb->Put(cf_handle, key, value); } }; ROCKSDB_NAMESPACE::JniUtil::kv_op_direct( put, env, jkey, jkey_offset, jkey_len, jval, jval_offset, jval_len); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: merge * Signature: (J[BI[BI)V */ void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto merge = [&wbwi](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wbwi->Merge(key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: merge * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto merge = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key, ROCKSDB_NAMESPACE::Slice value) { return wbwi->Merge(cf_handle, key, value); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(merge, env, jobj, jkey, jkey_len, jentry_value, jentry_value_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: delete * Signature: (J[BI)V */ void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BI(JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto remove = [&wbwi](ROCKSDB_NAMESPACE::Slice key) { return wbwi->Delete(key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: delete * Signature: (J[BIJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto remove = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { return wbwi->Delete(cf_handle, key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(remove, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: singleDelete * Signature: (J[BI)V */ void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto single_delete = [&wbwi](ROCKSDB_NAMESPACE::Slice key) { return wbwi->SingleDelete(key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: singleDelete * Signature: (J[BIJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto single_delete = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice key) { return wbwi->SingleDelete(cf_handle, key); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(single_delete, env, jobj, jkey, jkey_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: removeDirect * Signature: (JLjava/nio/ByteBuffer;IIJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_removeDirect( JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); auto remove = [&wb, &cf_handle](ROCKSDB_NAMESPACE::Slice& key) { if (cf_handle == nullptr) { wb->Delete(key); } else { wb->Delete(cf_handle, key); } }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(remove, env, jkey, jkey_offset, jkey_len); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: deleteRange * Signature: (J[BI[BI)V */ void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto deleteRange = [&wbwi](ROCKSDB_NAMESPACE::Slice beginKey, ROCKSDB_NAMESPACE::Slice endKey) { return wbwi->DeleteRange(beginKey, endKey); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, jbegin_key_len, jend_key, jend_key_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: deleteRange * Signature: (J[BI[BIJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto* cf_handle = reinterpret_cast(jcf_handle); assert(cf_handle != nullptr); auto deleteRange = [&wbwi, &cf_handle](ROCKSDB_NAMESPACE::Slice beginKey, ROCKSDB_NAMESPACE::Slice endKey) { return wbwi->DeleteRange(cf_handle, beginKey, endKey); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::kv_op(deleteRange, env, jobj, jbegin_key, jbegin_key_len, jend_key, jend_key_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: putLogData * Signature: (J[BI)V */ void Java_org_rocksdb_WriteBatchWithIndex_putLogData(JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jblob, jint jblob_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto putLogData = [&wbwi](ROCKSDB_NAMESPACE::Slice blob) { return wbwi->PutLogData(blob); }; std::unique_ptr status = ROCKSDB_NAMESPACE::JniUtil::k_op(putLogData, env, jobj, jblob, jblob_len); if (status != nullptr && !status->ok()) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); } } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: clear * Signature: (J)V */ void Java_org_rocksdb_WriteBatchWithIndex_clear0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); wbwi->Clear(); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: setSavePoint0 * Signature: (J)V */ void Java_org_rocksdb_WriteBatchWithIndex_setSavePoint0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); wbwi->SetSavePoint(); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: rollbackToSavePoint0 * Signature: (J)V */ void Java_org_rocksdb_WriteBatchWithIndex_rollbackToSavePoint0( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto s = wbwi->RollbackToSavePoint(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: popSavePoint * Signature: (J)V */ void Java_org_rocksdb_WriteBatchWithIndex_popSavePoint(JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto s = wbwi->PopSavePoint(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: setMaxBytes * Signature: (JJ)V */ void Java_org_rocksdb_WriteBatchWithIndex_setMaxBytes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle, jlong jmax_bytes) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); wbwi->SetMaxBytes(static_cast(jmax_bytes)); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: getWriteBatch * Signature: (J)Lorg/rocksdb/WriteBatch; */ jobject Java_org_rocksdb_WriteBatchWithIndex_getWriteBatch(JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); assert(wbwi != nullptr); auto* wb = wbwi->GetWriteBatch(); // TODO(AR) is the `wb` object owned by us? return ROCKSDB_NAMESPACE::WriteBatchJni::construct(env, wb); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: iterator0 * Signature: (J)J */ jlong Java_org_rocksdb_WriteBatchWithIndex_iterator0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); auto* wbwi_iterator = wbwi->NewIterator(); return reinterpret_cast(wbwi_iterator); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: iterator1 * Signature: (JJ)J */ jlong Java_org_rocksdb_WriteBatchWithIndex_iterator1(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto* wbwi_iterator = wbwi->NewIterator(cf_handle); return reinterpret_cast(wbwi_iterator); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: iteratorWithBase * Signature: (JJJJ)J */ jlong Java_org_rocksdb_WriteBatchWithIndex_iteratorWithBase( JNIEnv*, jobject, jlong jwbwi_handle, jlong jcf_handle, jlong jbase_iterator_handle, jlong jread_opts_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto* base_iterator = reinterpret_cast(jbase_iterator_handle); ROCKSDB_NAMESPACE::ReadOptions* read_opts = jread_opts_handle == 0 ? nullptr : reinterpret_cast( jread_opts_handle); auto* iterator = wbwi->NewIteratorWithBase(cf_handle, base_iterator, read_opts); return reinterpret_cast(iterator); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: getFromBatch * Signature: (JJ[BI)[B */ jbyteArray JNICALL Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BI( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdbopt_handle, jbyteArray jkey, jint jkey_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); auto* dbopt = reinterpret_cast(jdbopt_handle); auto getter = [&wbwi, &dbopt](const ROCKSDB_NAMESPACE::Slice& key, std::string* value) { return wbwi->GetFromBatch(*dbopt, key, value); }; return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: getFromBatch * Signature: (JJ[BIJ)[B */ jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdbopt_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); auto* dbopt = reinterpret_cast(jdbopt_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto getter = [&wbwi, &cf_handle, &dbopt](const ROCKSDB_NAMESPACE::Slice& key, std::string* value) { return wbwi->GetFromBatch(cf_handle, *dbopt, key, value); }; return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: getFromBatchAndDB * Signature: (JJJ[BI)[B */ jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BI( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdb_handle, jlong jreadopt_handle, jbyteArray jkey, jint jkey_len) { auto* wbwi = reinterpret_cast(jwbwi_handle); auto* db = reinterpret_cast(jdb_handle); auto* readopt = reinterpret_cast(jreadopt_handle); auto getter = [&wbwi, &db, &readopt](const ROCKSDB_NAMESPACE::Slice& key, std::string* value) { return wbwi->GetFromBatchAndDB(db, *readopt, key, value); }; return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: getFromBatchAndDB * Signature: (JJJ[BIJ)[B */ jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdb_handle, jlong jreadopt_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); auto* db = reinterpret_cast(jdb_handle); auto* readopt = reinterpret_cast(jreadopt_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto getter = [&wbwi, &db, &cf_handle, &readopt]( const ROCKSDB_NAMESPACE::Slice& key, std::string* value) { return wbwi->GetFromBatchAndDB(db, *readopt, cf_handle, key, value); }; return ROCKSDB_NAMESPACE::JniUtil::v_op(getter, env, jkey, jkey_len); } /* * Class: org_rocksdb_WriteBatchWithIndex * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_WriteBatchWithIndex_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* wbwi = reinterpret_cast(handle); assert(wbwi != nullptr); delete wbwi; } /* WBWIRocksIterator below */ /* * Class: org_rocksdb_WBWIRocksIterator * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_WBWIRocksIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); assert(it != nullptr); delete it; } /* * Class: org_rocksdb_WBWIRocksIterator * Method: isValid0 * Signature: (J)Z */ jboolean Java_org_rocksdb_WBWIRocksIterator_isValid0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle)->Valid(); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: seekToFirst0 * Signature: (J)V */ void Java_org_rocksdb_WBWIRocksIterator_seekToFirst0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToFirst(); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: seekToLast0 * Signature: (J)V */ void Java_org_rocksdb_WBWIRocksIterator_seekToLast0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToLast(); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: next0 * Signature: (J)V */ void Java_org_rocksdb_WBWIRocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: prev0 * Signature: (J)V */ void Java_org_rocksdb_WBWIRocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Prev(); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: seek0 * Signature: (J[BI)V */ void Java_org_rocksdb_WBWIRocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { auto* it = reinterpret_cast(handle); jbyte* target = env->GetByteArrayElements(jtarget, nullptr); if (target == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), jtarget_len); it->Seek(target_slice); env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: seekDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ void Java_org_rocksdb_WBWIRocksIterator_seekDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); auto seek = [&it](ROCKSDB_NAMESPACE::Slice& target_slice) { it->Seek(target_slice); }; ROCKSDB_NAMESPACE::JniUtil::k_op_direct(seek, env, jtarget, jtarget_off, jtarget_len); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: seekForPrev0 * Signature: (J[BI)V */ void Java_org_rocksdb_WBWIRocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { auto* it = reinterpret_cast(handle); jbyte* target = env->GetByteArrayElements(jtarget, nullptr); if (target == nullptr) { // exception thrown: OutOfMemoryError return; } ROCKSDB_NAMESPACE::Slice target_slice(reinterpret_cast(target), jtarget_len); it->SeekForPrev(target_slice); env->ReleaseByteArrayElements(jtarget, target, JNI_ABORT); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: status0 * Signature: (J)V */ void Java_org_rocksdb_WBWIRocksIterator_status0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->status(); if (s.ok()) { return; } ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_WBWIRocksIterator * Method: entry1 * Signature: (J)[J */ jlongArray Java_org_rocksdb_WBWIRocksIterator_entry1(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); const ROCKSDB_NAMESPACE::WriteEntry& we = it->Entry(); jlong results[3]; // set the type of the write entry results[0] = ROCKSDB_NAMESPACE::WriteTypeJni::toJavaWriteType(we.type); // NOTE: key_slice and value_slice will be freed by // org.rocksdb.DirectSlice#close auto* key_slice = new ROCKSDB_NAMESPACE::Slice(we.key.data(), we.key.size()); results[1] = reinterpret_cast(key_slice); if (we.type == ROCKSDB_NAMESPACE::kDeleteRecord || we.type == ROCKSDB_NAMESPACE::kSingleDeleteRecord || we.type == ROCKSDB_NAMESPACE::kLogDataRecord) { // set native handle of value slice to null if no value available results[2] = 0; } else { auto* value_slice = new ROCKSDB_NAMESPACE::Slice(we.value.data(), we.value.size()); results[2] = reinterpret_cast(value_slice); } jlongArray jresults = env->NewLongArray(3); if (jresults == nullptr) { // exception thrown: OutOfMemoryError if (results[2] != 0) { auto* value_slice = reinterpret_cast(results[2]); delete value_slice; } delete key_slice; return nullptr; } env->SetLongArrayRegion(jresults, 0, 3, results); if (env->ExceptionCheck()) { // exception thrown: ArrayIndexOutOfBoundsException env->DeleteLocalRef(jresults); if (results[2] != 0) { auto* value_slice = reinterpret_cast(results[2]); delete value_slice; } delete key_slice; return nullptr; } return jresults; } /* * Class: org_rocksdb_WBWIRocksIterator * Method: refresh0 * Signature: (J)V */ void Java_org_rocksdb_WBWIRocksIterator_refresh0(JNIEnv* env) { ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::Status::NotSupported("Refresh() is not supported"); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } rocksdb-6.11.4/java/rocksjni/write_buffer_manager.cc000066400000000000000000000027601370372246700225010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "include/org_rocksdb_WriteBufferManager.h" #include "rocksdb/cache.h" #include "rocksdb/write_buffer_manager.h" /* * Class: org_rocksdb_WriteBufferManager * Method: newWriteBufferManager * Signature: (JJ)J */ jlong Java_org_rocksdb_WriteBufferManager_newWriteBufferManager( JNIEnv* /*env*/, jclass /*jclazz*/, jlong jbuffer_size, jlong jcache_handle) { auto* cache_ptr = reinterpret_cast*>( jcache_handle); auto* write_buffer_manager = new std::shared_ptr( std::make_shared(jbuffer_size, *cache_ptr)); return reinterpret_cast(write_buffer_manager); } /* * Class: org_rocksdb_WriteBufferManager * Method: disposeInternal * Signature: (J)V */ void Java_org_rocksdb_WriteBufferManager_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* write_buffer_manager = reinterpret_cast*>( jhandle); assert(write_buffer_manager != nullptr); delete write_buffer_manager; } rocksdb-6.11.4/java/rocksjni/writebatchhandlerjnicallback.cc000066400000000000000000000413701370372246700241740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Comparator. #include "rocksjni/writebatchhandlerjnicallback.h" #include "rocksjni/portal.h" namespace ROCKSDB_NAMESPACE { WriteBatchHandlerJniCallback::WriteBatchHandlerJniCallback( JNIEnv* env, jobject jWriteBatchHandler) : JniCallback(env, jWriteBatchHandler), m_env(env) { m_jPutCfMethodId = WriteBatchHandlerJni::getPutCfMethodId(env); if(m_jPutCfMethodId == nullptr) { // exception thrown return; } m_jPutMethodId = WriteBatchHandlerJni::getPutMethodId(env); if(m_jPutMethodId == nullptr) { // exception thrown return; } m_jMergeCfMethodId = WriteBatchHandlerJni::getMergeCfMethodId(env); if(m_jMergeCfMethodId == nullptr) { // exception thrown return; } m_jMergeMethodId = WriteBatchHandlerJni::getMergeMethodId(env); if(m_jMergeMethodId == nullptr) { // exception thrown return; } m_jDeleteCfMethodId = WriteBatchHandlerJni::getDeleteCfMethodId(env); if(m_jDeleteCfMethodId == nullptr) { // exception thrown return; } m_jDeleteMethodId = WriteBatchHandlerJni::getDeleteMethodId(env); if(m_jDeleteMethodId == nullptr) { // exception thrown return; } m_jSingleDeleteCfMethodId = WriteBatchHandlerJni::getSingleDeleteCfMethodId(env); if(m_jSingleDeleteCfMethodId == nullptr) { // exception thrown return; } m_jSingleDeleteMethodId = WriteBatchHandlerJni::getSingleDeleteMethodId(env); if(m_jSingleDeleteMethodId == nullptr) { // exception thrown return; } m_jDeleteRangeCfMethodId = WriteBatchHandlerJni::getDeleteRangeCfMethodId(env); if (m_jDeleteRangeCfMethodId == nullptr) { // exception thrown return; } m_jDeleteRangeMethodId = WriteBatchHandlerJni::getDeleteRangeMethodId(env); if (m_jDeleteRangeMethodId == nullptr) { // exception thrown return; } m_jLogDataMethodId = WriteBatchHandlerJni::getLogDataMethodId(env); if(m_jLogDataMethodId == nullptr) { // exception thrown return; } m_jPutBlobIndexCfMethodId = WriteBatchHandlerJni::getPutBlobIndexCfMethodId(env); if(m_jPutBlobIndexCfMethodId == nullptr) { // exception thrown return; } m_jMarkBeginPrepareMethodId = WriteBatchHandlerJni::getMarkBeginPrepareMethodId(env); if(m_jMarkBeginPrepareMethodId == nullptr) { // exception thrown return; } m_jMarkEndPrepareMethodId = WriteBatchHandlerJni::getMarkEndPrepareMethodId(env); if(m_jMarkEndPrepareMethodId == nullptr) { // exception thrown return; } m_jMarkNoopMethodId = WriteBatchHandlerJni::getMarkNoopMethodId(env); if(m_jMarkNoopMethodId == nullptr) { // exception thrown return; } m_jMarkRollbackMethodId = WriteBatchHandlerJni::getMarkRollbackMethodId(env); if(m_jMarkRollbackMethodId == nullptr) { // exception thrown return; } m_jMarkCommitMethodId = WriteBatchHandlerJni::getMarkCommitMethodId(env); if(m_jMarkCommitMethodId == nullptr) { // exception thrown return; } m_jContinueMethodId = WriteBatchHandlerJni::getContinueMethodId(env); if(m_jContinueMethodId == nullptr) { // exception thrown return; } } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::PutCF( uint32_t column_family_id, const Slice& key, const Slice& value) { auto put = [this, column_family_id] ( jbyteArray j_key, jbyteArray j_value) { m_env->CallVoidMethod( m_jcallback_obj, m_jPutCfMethodId, static_cast(column_family_id), j_key, j_value); }; auto status = WriteBatchHandlerJniCallback::kv_op(key, value, put); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } void WriteBatchHandlerJniCallback::Put(const Slice& key, const Slice& value) { auto put = [this] ( jbyteArray j_key, jbyteArray j_value) { m_env->CallVoidMethod( m_jcallback_obj, m_jPutMethodId, j_key, j_value); }; WriteBatchHandlerJniCallback::kv_op(key, value, put); } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MergeCF( uint32_t column_family_id, const Slice& key, const Slice& value) { auto merge = [this, column_family_id] ( jbyteArray j_key, jbyteArray j_value) { m_env->CallVoidMethod( m_jcallback_obj, m_jMergeCfMethodId, static_cast(column_family_id), j_key, j_value); }; auto status = WriteBatchHandlerJniCallback::kv_op(key, value, merge); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } void WriteBatchHandlerJniCallback::Merge(const Slice& key, const Slice& value) { auto merge = [this] ( jbyteArray j_key, jbyteArray j_value) { m_env->CallVoidMethod( m_jcallback_obj, m_jMergeMethodId, j_key, j_value); }; WriteBatchHandlerJniCallback::kv_op(key, value, merge); } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::DeleteCF( uint32_t column_family_id, const Slice& key) { auto remove = [this, column_family_id] (jbyteArray j_key) { m_env->CallVoidMethod( m_jcallback_obj, m_jDeleteCfMethodId, static_cast(column_family_id), j_key); }; auto status = WriteBatchHandlerJniCallback::k_op(key, remove); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } void WriteBatchHandlerJniCallback::Delete(const Slice& key) { auto remove = [this] (jbyteArray j_key) { m_env->CallVoidMethod( m_jcallback_obj, m_jDeleteMethodId, j_key); }; WriteBatchHandlerJniCallback::k_op(key, remove); } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::SingleDeleteCF( uint32_t column_family_id, const Slice& key) { auto singleDelete = [this, column_family_id] (jbyteArray j_key) { m_env->CallVoidMethod( m_jcallback_obj, m_jSingleDeleteCfMethodId, static_cast(column_family_id), j_key); }; auto status = WriteBatchHandlerJniCallback::k_op(key, singleDelete); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } void WriteBatchHandlerJniCallback::SingleDelete(const Slice& key) { auto singleDelete = [this] (jbyteArray j_key) { m_env->CallVoidMethod( m_jcallback_obj, m_jSingleDeleteMethodId, j_key); }; WriteBatchHandlerJniCallback::k_op(key, singleDelete); } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::DeleteRangeCF( uint32_t column_family_id, const Slice& beginKey, const Slice& endKey) { auto deleteRange = [this, column_family_id] ( jbyteArray j_beginKey, jbyteArray j_endKey) { m_env->CallVoidMethod( m_jcallback_obj, m_jDeleteRangeCfMethodId, static_cast(column_family_id), j_beginKey, j_endKey); }; auto status = WriteBatchHandlerJniCallback::kv_op(beginKey, endKey, deleteRange); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } void WriteBatchHandlerJniCallback::DeleteRange(const Slice& beginKey, const Slice& endKey) { auto deleteRange = [this] ( jbyteArray j_beginKey, jbyteArray j_endKey) { m_env->CallVoidMethod( m_jcallback_obj, m_jDeleteRangeMethodId, j_beginKey, j_endKey); }; WriteBatchHandlerJniCallback::kv_op(beginKey, endKey, deleteRange); } void WriteBatchHandlerJniCallback::LogData(const Slice& blob) { auto logData = [this] (jbyteArray j_blob) { m_env->CallVoidMethod( m_jcallback_obj, m_jLogDataMethodId, j_blob); }; WriteBatchHandlerJniCallback::k_op(blob, logData); } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::PutBlobIndexCF( uint32_t column_family_id, const Slice& key, const Slice& value) { auto putBlobIndex = [this, column_family_id] ( jbyteArray j_key, jbyteArray j_value) { m_env->CallVoidMethod( m_jcallback_obj, m_jPutBlobIndexCfMethodId, static_cast(column_family_id), j_key, j_value); }; auto status = WriteBatchHandlerJniCallback::kv_op(key, value, putBlobIndex); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkBeginPrepare( bool unprepare) { #ifndef DEBUG (void) unprepare; #else assert(!unprepare); #endif m_env->CallVoidMethod(m_jcallback_obj, m_jMarkBeginPrepareMethodId); // check for Exception, in-particular RocksDBException if (m_env->ExceptionCheck()) { // exception thrown jthrowable exception = m_env->ExceptionOccurred(); std::unique_ptr status = ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); if (status == nullptr) { // unkown status or exception occurred extracting status m_env->ExceptionDescribe(); return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) probably need a // better error code here } else { m_env->ExceptionClear(); // clear the exception, as we have extracted the status return ROCKSDB_NAMESPACE::Status(*status); } } return ROCKSDB_NAMESPACE::Status::OK(); } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkEndPrepare( const Slice& xid) { auto markEndPrepare = [this] ( jbyteArray j_xid) { m_env->CallVoidMethod( m_jcallback_obj, m_jMarkEndPrepareMethodId, j_xid); }; auto status = WriteBatchHandlerJniCallback::k_op(xid, markEndPrepare); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkNoop( bool empty_batch) { m_env->CallVoidMethod(m_jcallback_obj, m_jMarkNoopMethodId, static_cast(empty_batch)); // check for Exception, in-particular RocksDBException if (m_env->ExceptionCheck()) { // exception thrown jthrowable exception = m_env->ExceptionOccurred(); std::unique_ptr status = ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); if (status == nullptr) { // unkown status or exception occurred extracting status m_env->ExceptionDescribe(); return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) probably need a // better error code here } else { m_env->ExceptionClear(); // clear the exception, as we have extracted the status return ROCKSDB_NAMESPACE::Status(*status); } } return ROCKSDB_NAMESPACE::Status::OK(); } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkRollback( const Slice& xid) { auto markRollback = [this] ( jbyteArray j_xid) { m_env->CallVoidMethod( m_jcallback_obj, m_jMarkRollbackMethodId, j_xid); }; auto status = WriteBatchHandlerJniCallback::k_op(xid, markRollback); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } ROCKSDB_NAMESPACE::Status WriteBatchHandlerJniCallback::MarkCommit( const Slice& xid) { auto markCommit = [this] ( jbyteArray j_xid) { m_env->CallVoidMethod( m_jcallback_obj, m_jMarkCommitMethodId, j_xid); }; auto status = WriteBatchHandlerJniCallback::k_op(xid, markCommit); if(status == nullptr) { return ROCKSDB_NAMESPACE::Status::OK(); // TODO(AR) what to do if there is // an Exception but we don't know // the ROCKSDB_NAMESPACE::Status? } else { return ROCKSDB_NAMESPACE::Status(*status); } } bool WriteBatchHandlerJniCallback::Continue() { jboolean jContinue = m_env->CallBooleanMethod( m_jcallback_obj, m_jContinueMethodId); if(m_env->ExceptionCheck()) { // exception thrown m_env->ExceptionDescribe(); } return static_cast(jContinue == JNI_TRUE); } std::unique_ptr WriteBatchHandlerJniCallback::kv_op( const Slice& key, const Slice& value, std::function kvFn) { const jbyteArray j_key = JniUtil::copyBytes(m_env, key); if (j_key == nullptr) { // exception thrown if (m_env->ExceptionCheck()) { m_env->ExceptionDescribe(); } return nullptr; } const jbyteArray j_value = JniUtil::copyBytes(m_env, value); if (j_value == nullptr) { // exception thrown if (m_env->ExceptionCheck()) { m_env->ExceptionDescribe(); } if (j_key != nullptr) { m_env->DeleteLocalRef(j_key); } return nullptr; } kvFn(j_key, j_value); // check for Exception, in-particular RocksDBException if (m_env->ExceptionCheck()) { if (j_value != nullptr) { m_env->DeleteLocalRef(j_value); } if (j_key != nullptr) { m_env->DeleteLocalRef(j_key); } // exception thrown jthrowable exception = m_env->ExceptionOccurred(); std::unique_ptr status = ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); if (status == nullptr) { // unkown status or exception occurred extracting status m_env->ExceptionDescribe(); return nullptr; } else { m_env->ExceptionClear(); // clear the exception, as we have extracted the status return status; } } if (j_value != nullptr) { m_env->DeleteLocalRef(j_value); } if (j_key != nullptr) { m_env->DeleteLocalRef(j_key); } // all OK return std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::OK())); } std::unique_ptr WriteBatchHandlerJniCallback::k_op( const Slice& key, std::function kFn) { const jbyteArray j_key = JniUtil::copyBytes(m_env, key); if (j_key == nullptr) { // exception thrown if (m_env->ExceptionCheck()) { m_env->ExceptionDescribe(); } return nullptr; } kFn(j_key); // check for Exception, in-particular RocksDBException if (m_env->ExceptionCheck()) { if (j_key != nullptr) { m_env->DeleteLocalRef(j_key); } // exception thrown jthrowable exception = m_env->ExceptionOccurred(); std::unique_ptr status = ROCKSDB_NAMESPACE::RocksDBExceptionJni::toCppStatus(m_env, exception); if (status == nullptr) { // unkown status or exception occurred extracting status m_env->ExceptionDescribe(); return nullptr; } else { m_env->ExceptionClear(); // clear the exception, as we have extracted the status return status; } } if (j_key != nullptr) { m_env->DeleteLocalRef(j_key); } // all OK return std::unique_ptr( new ROCKSDB_NAMESPACE::Status(ROCKSDB_NAMESPACE::Status::OK())); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/java/rocksjni/writebatchhandlerjnicallback.h000066400000000000000000000065641370372246700240440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::WriteBatch::Handler. #ifndef JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_ #define JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_ #include #include #include #include "rocksjni/jnicallback.h" #include "rocksdb/write_batch.h" namespace ROCKSDB_NAMESPACE { /** * This class acts as a bridge between C++ * and Java. The methods in this class will be * called back from the RocksDB storage engine (C++) * which calls the appropriate Java method. * This enables Write Batch Handlers to be implemented in Java. */ class WriteBatchHandlerJniCallback : public JniCallback, public WriteBatch::Handler { public: WriteBatchHandlerJniCallback( JNIEnv* env, jobject jWriteBackHandler); Status PutCF(uint32_t column_family_id, const Slice& key, const Slice& value); void Put(const Slice& key, const Slice& value); Status MergeCF(uint32_t column_family_id, const Slice& key, const Slice& value); void Merge(const Slice& key, const Slice& value); Status DeleteCF(uint32_t column_family_id, const Slice& key); void Delete(const Slice& key); Status SingleDeleteCF(uint32_t column_family_id, const Slice& key); void SingleDelete(const Slice& key); Status DeleteRangeCF(uint32_t column_family_id, const Slice& beginKey, const Slice& endKey); void DeleteRange(const Slice& beginKey, const Slice& endKey); void LogData(const Slice& blob); Status PutBlobIndexCF(uint32_t column_family_id, const Slice& key, const Slice& value); Status MarkBeginPrepare(bool); Status MarkEndPrepare(const Slice& xid); Status MarkNoop(bool empty_batch); Status MarkRollback(const Slice& xid); Status MarkCommit(const Slice& xid); bool Continue(); private: JNIEnv* m_env; jmethodID m_jPutCfMethodId; jmethodID m_jPutMethodId; jmethodID m_jMergeCfMethodId; jmethodID m_jMergeMethodId; jmethodID m_jDeleteCfMethodId; jmethodID m_jDeleteMethodId; jmethodID m_jSingleDeleteCfMethodId; jmethodID m_jSingleDeleteMethodId; jmethodID m_jDeleteRangeCfMethodId; jmethodID m_jDeleteRangeMethodId; jmethodID m_jLogDataMethodId; jmethodID m_jPutBlobIndexCfMethodId; jmethodID m_jMarkBeginPrepareMethodId; jmethodID m_jMarkEndPrepareMethodId; jmethodID m_jMarkNoopMethodId; jmethodID m_jMarkRollbackMethodId; jmethodID m_jMarkCommitMethodId; jmethodID m_jContinueMethodId; /** * @return A pointer to a ROCKSDB_NAMESPACE::Status or nullptr if an * unexpected exception occurred */ std::unique_ptr kv_op( const Slice& key, const Slice& value, std::function kvFn); /** * @return A pointer to a ROCKSDB_NAMESPACE::Status or nullptr if an * unexpected exception occurred */ std::unique_ptr k_op( const Slice& key, std::function kFn); }; } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_WRITEBATCHHANDLERJNICALLBACK_H_ rocksdb-6.11.4/java/samples/000077500000000000000000000000001370372246700156325ustar00rootroot00000000000000rocksdb-6.11.4/java/samples/src/000077500000000000000000000000001370372246700164215ustar00rootroot00000000000000rocksdb-6.11.4/java/samples/src/main/000077500000000000000000000000001370372246700173455ustar00rootroot00000000000000rocksdb-6.11.4/java/samples/src/main/java/000077500000000000000000000000001370372246700202665ustar00rootroot00000000000000rocksdb-6.11.4/java/samples/src/main/java/OptimisticTransactionSample.java000066400000000000000000000144241370372246700266320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). import org.rocksdb.*; import static java.nio.charset.StandardCharsets.UTF_8; /** * Demonstrates using Transactions on an OptimisticTransactionDB with * varying isolation guarantees */ public class OptimisticTransactionSample { private static final String dbPath = "/tmp/rocksdb_optimistic_transaction_example"; public static final void main(final String args[]) throws RocksDBException { try(final Options options = new Options() .setCreateIfMissing(true); final OptimisticTransactionDB txnDb = OptimisticTransactionDB.open(options, dbPath)) { try (final WriteOptions writeOptions = new WriteOptions(); final ReadOptions readOptions = new ReadOptions()) { //////////////////////////////////////////////////////// // // Simple OptimisticTransaction Example ("Read Committed") // //////////////////////////////////////////////////////// readCommitted(txnDb, writeOptions, readOptions); //////////////////////////////////////////////////////// // // "Repeatable Read" (Snapshot Isolation) Example // -- Using a single Snapshot // //////////////////////////////////////////////////////// repeatableRead(txnDb, writeOptions, readOptions); //////////////////////////////////////////////////////// // // "Read Committed" (Monotonic Atomic Views) Example // --Using multiple Snapshots // //////////////////////////////////////////////////////// readCommitted_monotonicAtomicViews(txnDb, writeOptions, readOptions); } } } /** * Demonstrates "Read Committed" isolation */ private static void readCommitted(final OptimisticTransactionDB txnDb, final WriteOptions writeOptions, final ReadOptions readOptions) throws RocksDBException { final byte key1[] = "abc".getBytes(UTF_8); final byte value1[] = "def".getBytes(UTF_8); final byte key2[] = "xyz".getBytes(UTF_8); final byte value2[] = "zzz".getBytes(UTF_8); // Start a transaction try(final Transaction txn = txnDb.beginTransaction(writeOptions)) { // Read a key in this transaction byte[] value = txn.get(readOptions, key1); assert(value == null); // Write a key in this transaction txn.put(key1, value1); // Read a key OUTSIDE this transaction. Does not affect txn. value = txnDb.get(readOptions, key1); assert(value == null); // Write a key OUTSIDE of this transaction. // Does not affect txn since this is an unrelated key. // If we wrote key 'abc' here, the transaction would fail to commit. txnDb.put(writeOptions, key2, value2); // Commit transaction txn.commit(); } } /** * Demonstrates "Repeatable Read" (Snapshot Isolation) isolation */ private static void repeatableRead(final OptimisticTransactionDB txnDb, final WriteOptions writeOptions, final ReadOptions readOptions) throws RocksDBException { final byte key1[] = "ghi".getBytes(UTF_8); final byte value1[] = "jkl".getBytes(UTF_8); // Set a snapshot at start of transaction by setting setSnapshot(true) try(final OptimisticTransactionOptions txnOptions = new OptimisticTransactionOptions().setSetSnapshot(true); final Transaction txn = txnDb.beginTransaction(writeOptions, txnOptions)) { final Snapshot snapshot = txn.getSnapshot(); // Write a key OUTSIDE of transaction txnDb.put(writeOptions, key1, value1); // Read a key using the snapshot. readOptions.setSnapshot(snapshot); final byte[] value = txn.getForUpdate(readOptions, key1, true); assert(value == value1); try { // Attempt to commit transaction txn.commit(); throw new IllegalStateException(); } catch(final RocksDBException e) { // Transaction could not commit since the write outside of the txn // conflicted with the read! assert(e.getStatus().getCode() == Status.Code.Busy); } txn.rollback(); } finally { // Clear snapshot from read options since it is no longer valid readOptions.setSnapshot(null); } } /** * Demonstrates "Read Committed" (Monotonic Atomic Views) isolation * * In this example, we set the snapshot multiple times. This is probably * only necessary if you have very strict isolation requirements to * implement. */ private static void readCommitted_monotonicAtomicViews( final OptimisticTransactionDB txnDb, final WriteOptions writeOptions, final ReadOptions readOptions) throws RocksDBException { final byte keyX[] = "x".getBytes(UTF_8); final byte valueX[] = "x".getBytes(UTF_8); final byte keyY[] = "y".getBytes(UTF_8); final byte valueY[] = "y".getBytes(UTF_8); try (final OptimisticTransactionOptions txnOptions = new OptimisticTransactionOptions().setSetSnapshot(true); final Transaction txn = txnDb.beginTransaction(writeOptions, txnOptions)) { // Do some reads and writes to key "x" Snapshot snapshot = txnDb.getSnapshot(); readOptions.setSnapshot(snapshot); byte[] value = txn.get(readOptions, keyX); txn.put(valueX, valueX); // Do a write outside of the transaction to key "y" txnDb.put(writeOptions, keyY, valueY); // Set a new snapshot in the transaction txn.setSnapshot(); snapshot = txnDb.getSnapshot(); readOptions.setSnapshot(snapshot); // Do some reads and writes to key "y" // Since the snapshot was advanced, the write done outside of the // transaction does not conflict. value = txn.getForUpdate(readOptions, keyY, true); txn.put(keyY, valueY); // Commit. Since the snapshot was advanced, the write done outside of the // transaction does not prevent this transaction from Committing. txn.commit(); } finally { // Clear snapshot from read options since it is no longer valid readOptions.setSnapshot(null); } } } rocksdb-6.11.4/java/samples/src/main/java/RocksDBColumnFamilySample.java000066400000000000000000000051761370372246700261130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). import org.rocksdb.*; import java.util.ArrayList; import java.util.List; public class RocksDBColumnFamilySample { static { RocksDB.loadLibrary(); } public static void main(final String[] args) throws RocksDBException { if (args.length < 1) { System.out.println( "usage: RocksDBColumnFamilySample db_path"); System.exit(-1); } final String db_path = args[0]; System.out.println("RocksDBColumnFamilySample"); try(final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, db_path)) { assert(db != null); // create column family try(final ColumnFamilyHandle columnFamilyHandle = db.createColumnFamily( new ColumnFamilyDescriptor("new_cf".getBytes(), new ColumnFamilyOptions()))) { assert (columnFamilyHandle != null); } } // open DB with two column families final List columnFamilyDescriptors = new ArrayList<>(); // have to open default column family columnFamilyDescriptors.add(new ColumnFamilyDescriptor( RocksDB.DEFAULT_COLUMN_FAMILY, new ColumnFamilyOptions())); // open the new one, too columnFamilyDescriptors.add(new ColumnFamilyDescriptor( "new_cf".getBytes(), new ColumnFamilyOptions())); final List columnFamilyHandles = new ArrayList<>(); try(final DBOptions options = new DBOptions(); final RocksDB db = RocksDB.open(options, db_path, columnFamilyDescriptors, columnFamilyHandles)) { assert(db != null); try { // put and get from non-default column family db.put(columnFamilyHandles.get(0), new WriteOptions(), "key".getBytes(), "value".getBytes()); // atomic write try (final WriteBatch wb = new WriteBatch()) { wb.put(columnFamilyHandles.get(0), "key2".getBytes(), "value2".getBytes()); wb.put(columnFamilyHandles.get(1), "key3".getBytes(), "value3".getBytes()); wb.remove(columnFamilyHandles.get(0), "key".getBytes()); db.write(new WriteOptions(), wb); } // drop column family db.dropColumnFamily(columnFamilyHandles.get(1)); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } rocksdb-6.11.4/java/samples/src/main/java/RocksDBSample.java000066400000000000000000000263551370372246700235750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). import java.lang.IllegalArgumentException; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.ArrayList; import org.rocksdb.*; import org.rocksdb.util.SizeUnit; public class RocksDBSample { static { RocksDB.loadLibrary(); } public static void main(final String[] args) { if (args.length < 1) { System.out.println("usage: RocksDBSample db_path"); System.exit(-1); } final String db_path = args[0]; final String db_path_not_found = db_path + "_not_found"; System.out.println("RocksDBSample"); try (final Options options = new Options(); final Filter bloomFilter = new BloomFilter(10); final ReadOptions readOptions = new ReadOptions() .setFillCache(false); final Statistics stats = new Statistics(); final RateLimiter rateLimiter = new RateLimiter(10000000,10000, 10)) { try (final RocksDB db = RocksDB.open(options, db_path_not_found)) { assert (false); } catch (final RocksDBException e) { System.out.format("Caught the expected exception -- %s\n", e); } try { options.setCreateIfMissing(true) .setStatistics(stats) .setWriteBufferSize(8 * SizeUnit.KB) .setMaxWriteBufferNumber(3) .setMaxBackgroundCompactions(10) .setCompressionType(CompressionType.SNAPPY_COMPRESSION) .setCompactionStyle(CompactionStyle.UNIVERSAL); } catch (final IllegalArgumentException e) { assert (false); } assert (options.createIfMissing() == true); assert (options.writeBufferSize() == 8 * SizeUnit.KB); assert (options.maxWriteBufferNumber() == 3); assert (options.maxBackgroundCompactions() == 10); assert (options.compressionType() == CompressionType.SNAPPY_COMPRESSION); assert (options.compactionStyle() == CompactionStyle.UNIVERSAL); assert (options.memTableFactoryName().equals("SkipListFactory")); options.setMemTableConfig( new HashSkipListMemTableConfig() .setHeight(4) .setBranchingFactor(4) .setBucketCount(2000000)); assert (options.memTableFactoryName().equals("HashSkipListRepFactory")); options.setMemTableConfig( new HashLinkedListMemTableConfig() .setBucketCount(100000)); assert (options.memTableFactoryName().equals("HashLinkedListRepFactory")); options.setMemTableConfig( new VectorMemTableConfig().setReservedSize(10000)); assert (options.memTableFactoryName().equals("VectorRepFactory")); options.setMemTableConfig(new SkipListMemTableConfig()); assert (options.memTableFactoryName().equals("SkipListFactory")); options.setTableFormatConfig(new PlainTableConfig()); // Plain-Table requires mmap read options.setAllowMmapReads(true); assert (options.tableFactoryName().equals("PlainTable")); options.setRateLimiter(rateLimiter); final BlockBasedTableConfig table_options = new BlockBasedTableConfig(); table_options.setBlockCacheSize(64 * SizeUnit.KB) .setFilter(bloomFilter) .setCacheNumShardBits(6) .setBlockSizeDeviation(5) .setBlockRestartInterval(10) .setCacheIndexAndFilterBlocks(true) .setHashIndexAllowCollision(false) .setBlockCacheCompressedSize(64 * SizeUnit.KB) .setBlockCacheCompressedNumShardBits(10); assert (table_options.blockCacheSize() == 64 * SizeUnit.KB); assert (table_options.cacheNumShardBits() == 6); assert (table_options.blockSizeDeviation() == 5); assert (table_options.blockRestartInterval() == 10); assert (table_options.cacheIndexAndFilterBlocks() == true); assert (table_options.hashIndexAllowCollision() == false); assert (table_options.blockCacheCompressedSize() == 64 * SizeUnit.KB); assert (table_options.blockCacheCompressedNumShardBits() == 10); options.setTableFormatConfig(table_options); assert (options.tableFactoryName().equals("BlockBasedTable")); try (final RocksDB db = RocksDB.open(options, db_path)) { db.put("hello".getBytes(), "world".getBytes()); final byte[] value = db.get("hello".getBytes()); assert ("world".equals(new String(value))); final String str = db.getProperty("rocksdb.stats"); assert (str != null && !str.equals("")); } catch (final RocksDBException e) { System.out.format("[ERROR] caught the unexpected exception -- %s\n", e); assert (false); } try (final RocksDB db = RocksDB.open(options, db_path)) { db.put("hello".getBytes(), "world".getBytes()); byte[] value = db.get("hello".getBytes()); System.out.format("Get('hello') = %s\n", new String(value)); for (int i = 1; i <= 9; ++i) { for (int j = 1; j <= 9; ++j) { db.put(String.format("%dx%d", i, j).getBytes(), String.format("%d", i * j).getBytes()); } } for (int i = 1; i <= 9; ++i) { for (int j = 1; j <= 9; ++j) { System.out.format("%s ", new String(db.get( String.format("%dx%d", i, j).getBytes()))); } System.out.println(""); } // write batch test try (final WriteOptions writeOpt = new WriteOptions()) { for (int i = 10; i <= 19; ++i) { try (final WriteBatch batch = new WriteBatch()) { for (int j = 10; j <= 19; ++j) { batch.put(String.format("%dx%d", i, j).getBytes(), String.format("%d", i * j).getBytes()); } db.write(writeOpt, batch); } } } for (int i = 10; i <= 19; ++i) { for (int j = 10; j <= 19; ++j) { assert (new String( db.get(String.format("%dx%d", i, j).getBytes())).equals( String.format("%d", i * j))); System.out.format("%s ", new String(db.get( String.format("%dx%d", i, j).getBytes()))); } System.out.println(""); } value = db.get("1x1".getBytes()); assert (value != null); value = db.get("world".getBytes()); assert (value == null); value = db.get(readOptions, "world".getBytes()); assert (value == null); final byte[] testKey = "asdf".getBytes(); final byte[] testValue = "asdfghjkl;'?> insufficientArray.length); len = db.get("asdfjkl;".getBytes(), enoughArray); assert (len == RocksDB.NOT_FOUND); len = db.get(testKey, enoughArray); assert (len == testValue.length); len = db.get(readOptions, testKey, insufficientArray); assert (len > insufficientArray.length); len = db.get(readOptions, "asdfjkl;".getBytes(), enoughArray); assert (len == RocksDB.NOT_FOUND); len = db.get(readOptions, testKey, enoughArray); assert (len == testValue.length); db.remove(testKey); len = db.get(testKey, enoughArray); assert (len == RocksDB.NOT_FOUND); // repeat the test with WriteOptions try (final WriteOptions writeOpts = new WriteOptions()) { writeOpts.setSync(true); writeOpts.setDisableWAL(true); db.put(writeOpts, testKey, testValue); len = db.get(testKey, enoughArray); assert (len == testValue.length); assert (new String(testValue).equals( new String(enoughArray, 0, len))); } try { for (final TickerType statsType : TickerType.values()) { if (statsType != TickerType.TICKER_ENUM_MAX) { stats.getTickerCount(statsType); } } System.out.println("getTickerCount() passed."); } catch (final Exception e) { System.out.println("Failed in call to getTickerCount()"); assert (false); //Should never reach here. } try { for (final HistogramType histogramType : HistogramType.values()) { if (histogramType != HistogramType.HISTOGRAM_ENUM_MAX) { HistogramData data = stats.getHistogramData(histogramType); } } System.out.println("getHistogramData() passed."); } catch (final Exception e) { System.out.println("Failed in call to getHistogramData()"); assert (false); //Should never reach here. } try (final RocksIterator iterator = db.newIterator()) { boolean seekToFirstPassed = false; for (iterator.seekToFirst(); iterator.isValid(); iterator.next()) { iterator.status(); assert (iterator.key() != null); assert (iterator.value() != null); seekToFirstPassed = true; } if (seekToFirstPassed) { System.out.println("iterator seekToFirst tests passed."); } boolean seekToLastPassed = false; for (iterator.seekToLast(); iterator.isValid(); iterator.prev()) { iterator.status(); assert (iterator.key() != null); assert (iterator.value() != null); seekToLastPassed = true; } if (seekToLastPassed) { System.out.println("iterator seekToLastPassed tests passed."); } iterator.seekToFirst(); iterator.seek(iterator.key()); assert (iterator.key() != null); assert (iterator.value() != null); System.out.println("iterator seek test passed."); } System.out.println("iterator tests passed."); final List keys = new ArrayList<>(); try (final RocksIterator iterator = db.newIterator()) { for (iterator.seekToLast(); iterator.isValid(); iterator.prev()) { keys.add(iterator.key()); } } Map values = db.multiGet(keys); assert (values.size() == keys.size()); for (final byte[] value1 : values.values()) { assert (value1 != null); } values = db.multiGet(new ReadOptions(), keys); assert (values.size() == keys.size()); for (final byte[] value1 : values.values()) { assert (value1 != null); } } catch (final RocksDBException e) { System.err.println(e); } } } } rocksdb-6.11.4/java/samples/src/main/java/TransactionSample.java000066400000000000000000000141761370372246700245710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). import org.rocksdb.*; import static java.nio.charset.StandardCharsets.UTF_8; /** * Demonstrates using Transactions on a TransactionDB with * varying isolation guarantees */ public class TransactionSample { private static final String dbPath = "/tmp/rocksdb_transaction_example"; public static final void main(final String args[]) throws RocksDBException { try(final Options options = new Options() .setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB txnDb = TransactionDB.open(options, txnDbOptions, dbPath)) { try (final WriteOptions writeOptions = new WriteOptions(); final ReadOptions readOptions = new ReadOptions()) { //////////////////////////////////////////////////////// // // Simple Transaction Example ("Read Committed") // //////////////////////////////////////////////////////// readCommitted(txnDb, writeOptions, readOptions); //////////////////////////////////////////////////////// // // "Repeatable Read" (Snapshot Isolation) Example // -- Using a single Snapshot // //////////////////////////////////////////////////////// repeatableRead(txnDb, writeOptions, readOptions); //////////////////////////////////////////////////////// // // "Read Committed" (Monotonic Atomic Views) Example // --Using multiple Snapshots // //////////////////////////////////////////////////////// readCommitted_monotonicAtomicViews(txnDb, writeOptions, readOptions); } } } /** * Demonstrates "Read Committed" isolation */ private static void readCommitted(final TransactionDB txnDb, final WriteOptions writeOptions, final ReadOptions readOptions) throws RocksDBException { final byte key1[] = "abc".getBytes(UTF_8); final byte value1[] = "def".getBytes(UTF_8); final byte key2[] = "xyz".getBytes(UTF_8); final byte value2[] = "zzz".getBytes(UTF_8); // Start a transaction try(final Transaction txn = txnDb.beginTransaction(writeOptions)) { // Read a key in this transaction byte[] value = txn.get(readOptions, key1); assert(value == null); // Write a key in this transaction txn.put(key1, value1); // Read a key OUTSIDE this transaction. Does not affect txn. value = txnDb.get(readOptions, key1); assert(value == null); // Write a key OUTSIDE of this transaction. // Does not affect txn since this is an unrelated key. // If we wrote key 'abc' here, the transaction would fail to commit. txnDb.put(writeOptions, key2, value2); // Commit transaction txn.commit(); } } /** * Demonstrates "Repeatable Read" (Snapshot Isolation) isolation */ private static void repeatableRead(final TransactionDB txnDb, final WriteOptions writeOptions, final ReadOptions readOptions) throws RocksDBException { final byte key1[] = "ghi".getBytes(UTF_8); final byte value1[] = "jkl".getBytes(UTF_8); // Set a snapshot at start of transaction by setting setSnapshot(true) try(final TransactionOptions txnOptions = new TransactionOptions() .setSetSnapshot(true); final Transaction txn = txnDb.beginTransaction(writeOptions, txnOptions)) { final Snapshot snapshot = txn.getSnapshot(); // Write a key OUTSIDE of transaction txnDb.put(writeOptions, key1, value1); // Attempt to read a key using the snapshot. This will fail since // the previous write outside this txn conflicts with this read. readOptions.setSnapshot(snapshot); try { final byte[] value = txn.getForUpdate(readOptions, key1, true); throw new IllegalStateException(); } catch(final RocksDBException e) { assert(e.getStatus().getCode() == Status.Code.Busy); } txn.rollback(); } finally { // Clear snapshot from read options since it is no longer valid readOptions.setSnapshot(null); } } /** * Demonstrates "Read Committed" (Monotonic Atomic Views) isolation * * In this example, we set the snapshot multiple times. This is probably * only necessary if you have very strict isolation requirements to * implement. */ private static void readCommitted_monotonicAtomicViews( final TransactionDB txnDb, final WriteOptions writeOptions, final ReadOptions readOptions) throws RocksDBException { final byte keyX[] = "x".getBytes(UTF_8); final byte valueX[] = "x".getBytes(UTF_8); final byte keyY[] = "y".getBytes(UTF_8); final byte valueY[] = "y".getBytes(UTF_8); try (final TransactionOptions txnOptions = new TransactionOptions() .setSetSnapshot(true); final Transaction txn = txnDb.beginTransaction(writeOptions, txnOptions)) { // Do some reads and writes to key "x" Snapshot snapshot = txnDb.getSnapshot(); readOptions.setSnapshot(snapshot); byte[] value = txn.get(readOptions, keyX); txn.put(valueX, valueX); // Do a write outside of the transaction to key "y" txnDb.put(writeOptions, keyY, valueY); // Set a new snapshot in the transaction txn.setSnapshot(); txn.setSavePoint(); snapshot = txnDb.getSnapshot(); readOptions.setSnapshot(snapshot); // Do some reads and writes to key "y" // Since the snapshot was advanced, the write done outside of the // transaction does not conflict. value = txn.getForUpdate(readOptions, keyY, true); txn.put(keyY, valueY); // Decide we want to revert the last write from this transaction. txn.rollbackToSavePoint(); // Commit. txn.commit(); } finally { // Clear snapshot from read options since it is no longer valid readOptions.setSnapshot(null); } } } rocksdb-6.11.4/java/src/000077500000000000000000000000001370372246700147555ustar00rootroot00000000000000rocksdb-6.11.4/java/src/main/000077500000000000000000000000001370372246700157015ustar00rootroot00000000000000rocksdb-6.11.4/java/src/main/java/000077500000000000000000000000001370372246700166225ustar00rootroot00000000000000rocksdb-6.11.4/java/src/main/java/org/000077500000000000000000000000001370372246700174115ustar00rootroot00000000000000rocksdb-6.11.4/java/src/main/java/org/rocksdb/000077500000000000000000000000001370372246700210405ustar00rootroot00000000000000rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java000066400000000000000000000034041370372246700266320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * A CompactionFilter allows an application to modify/delete a key-value at * the time of compaction. * * At present we just permit an overriding Java class to wrap a C++ * implementation */ public abstract class AbstractCompactionFilter> extends RocksObject { public static class Context { private final boolean fullCompaction; private final boolean manualCompaction; public Context(final boolean fullCompaction, final boolean manualCompaction) { this.fullCompaction = fullCompaction; this.manualCompaction = manualCompaction; } /** * Does this compaction run include all data files * * @return true if this is a full compaction run */ public boolean isFullCompaction() { return fullCompaction; } /** * Is this compaction requested by the client, * or is it occurring as an automatic compaction process * * @return true if the compaction was initiated by the client */ public boolean isManualCompaction() { return manualCompaction; } } protected AbstractCompactionFilter(final long nativeHandle) { super(nativeHandle); } /** * Deletes underlying C++ compaction pointer. * * Note that this function should be called only after all * RocksDB instances referencing the compaction filter are closed. * Otherwise an undefined behavior will occur. */ @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java000066400000000000000000000047631370372246700301730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Each compaction will create a new {@link AbstractCompactionFilter} * allowing the application to know about different compactions * * @param The concrete type of the compaction filter */ public abstract class AbstractCompactionFilterFactory> extends RocksCallbackObject { public AbstractCompactionFilterFactory() { super(null); } @Override protected long initializeNative(final long... nativeParameterHandles) { return createNewCompactionFilterFactory0(); } /** * Called from JNI, see compaction_filter_factory_jnicallback.cc * * @param fullCompaction {@link AbstractCompactionFilter.Context#fullCompaction} * @param manualCompaction {@link AbstractCompactionFilter.Context#manualCompaction} * * @return native handle of the CompactionFilter */ private long createCompactionFilter(final boolean fullCompaction, final boolean manualCompaction) { final T filter = createCompactionFilter( new AbstractCompactionFilter.Context(fullCompaction, manualCompaction)); // CompactionFilterFactory::CreateCompactionFilter returns a std::unique_ptr // which therefore has ownership of the underlying native object filter.disOwnNativeHandle(); return filter.nativeHandle_; } /** * Create a new compaction filter * * @param context The context describing the need for a new compaction filter * * @return A new instance of {@link AbstractCompactionFilter} */ public abstract T createCompactionFilter( final AbstractCompactionFilter.Context context); /** * A name which identifies this compaction filter * * The name will be printed to the LOG file on start up for diagnosis * * @return name which identifies this compaction filter. */ public abstract String name(); /** * We override {@link RocksCallbackObject#disposeInternal()} * as disposing of a rocksdb::AbstractCompactionFilterFactory requires * a slightly different approach as it is a std::shared_ptr */ @Override protected void disposeInternal() { disposeInternal(nativeHandle_); } private native long createNewCompactionFilterFactory0(); private native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractComparator.java000066400000000000000000000075021370372246700255020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * Comparators are used by RocksDB to determine * the ordering of keys. * * Implementations of Comparators in Java should extend this class. */ public abstract class AbstractComparator extends RocksCallbackObject { AbstractComparator() { super(); } protected AbstractComparator(final ComparatorOptions copt) { super(copt.nativeHandle_); } @Override protected long initializeNative(final long... nativeParameterHandles) { return createNewComparator(nativeParameterHandles[0]); } /** * Get the type of this comparator. * * Used for determining the correct C++ cast in native code. * * @return The type of the comparator. */ ComparatorType getComparatorType() { return ComparatorType.JAVA_COMPARATOR; } /** * The name of the comparator. Used to check for comparator * mismatches (i.e., a DB created with one comparator is * accessed using a different comparator). * * A new name should be used whenever * the comparator implementation changes in a way that will cause * the relative ordering of any two keys to change. * * Names starting with "rocksdb." are reserved and should not be used. * * @return The name of this comparator implementation */ public abstract String name(); /** * Three-way key comparison. Implementations should provide a *
total order * on keys that might be passed to it. * * The implementation may modify the {@code ByteBuffer}s passed in, though * it would be unconventional to modify the "limit" or any of the * underlying bytes. As a callback, RocksJava will ensure that {@code a} * is a different instance from {@code b}. * * @param a buffer containing the first key in its "remaining" elements * @param b buffer containing the second key in its "remaining" elements * * @return Should return either: * 1) < 0 if "a" < "b" * 2) == 0 if "a" == "b" * 3) > 0 if "a" > "b" */ public abstract int compare(final ByteBuffer a, final ByteBuffer b); /** *

Used to reduce the space requirements * for internal data structures like index blocks.

* *

If start < limit, you may modify start which is a * shorter string in [start, limit).

* * If you modify start, it is expected that you set the byte buffer so that * a subsequent read of start.remaining() bytes from start.position() * to start.limit() will obtain the new start value. * *

Simple comparator implementations may return with start unchanged. * i.e., an implementation of this method that does nothing is correct.

* * @param start the start * @param limit the limit */ public void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { // no-op } /** *

Used to reduce the space requirements * for internal data structures like index blocks.

* *

You may change key to a shorter key (key1) where * key1 ≥ key.

* *

Simple comparator implementations may return the key unchanged. * i.e., an implementation of * this method that does nothing is correct.

* * @param key the key */ public void findShortSuccessor(final ByteBuffer key) { // no-op } public final boolean usingDirectBuffers() { return usingDirectBuffers(nativeHandle_); } private native boolean usingDirectBuffers(final long nativeHandle); private native long createNewComparator(final long comparatorOptionsHandle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java000066400000000000000000000100621370372246700272530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * This class is intentionally private, * it holds methods which are called * from C++ to interact with a Comparator * written in Java. * * Placing these bridge methods in this * class keeps the API of the * {@link org.rocksdb.AbstractComparator} clean. */ class AbstractComparatorJniBridge { /** * Only called from JNI. * * Simply a bridge to calling * {@link AbstractComparator#compare(ByteBuffer, ByteBuffer)}, * which ensures that the byte buffer lengths are correct * before and after the call. * * @param comparator the comparator object on which to * call {@link AbstractComparator#compare(ByteBuffer, ByteBuffer)} * @param a buffer access to first key * @param aLen the length of the a key, * may be smaller than the buffer {@code a} * @param b buffer access to second key * @param bLen the length of the b key, * may be smaller than the buffer {@code b} * * @return the result of the comparison */ private static int compareInternal( final AbstractComparator comparator, final ByteBuffer a, final int aLen, final ByteBuffer b, final int bLen) { if (aLen != -1) { a.mark(); a.limit(aLen); } if (bLen != -1) { b.mark(); b.limit(bLen); } final int c = comparator.compare(a, b); if (aLen != -1) { a.reset(); } if (bLen != -1) { b.reset(); } return c; } /** * Only called from JNI. * * Simply a bridge to calling * {@link AbstractComparator#findShortestSeparator(ByteBuffer, ByteBuffer)}, * which ensures that the byte buffer lengths are correct * before the call. * * @param comparator the comparator object on which to * call {@link AbstractComparator#findShortestSeparator(ByteBuffer, ByteBuffer)} * @param start buffer access to the start key * @param startLen the length of the start key, * may be smaller than the buffer {@code start} * @param limit buffer access to the limit key * @param limitLen the length of the limit key, * may be smaller than the buffer {@code limit} * * @return either {@code startLen} if the start key is unchanged, otherwise * the new length of the start key */ private static int findShortestSeparatorInternal( final AbstractComparator comparator, final ByteBuffer start, final int startLen, final ByteBuffer limit, final int limitLen) { if (startLen != -1) { start.limit(startLen); } if (limitLen != -1) { limit.limit(limitLen); } comparator.findShortestSeparator(start, limit); return start.remaining(); } /** * Only called from JNI. * * Simply a bridge to calling * {@link AbstractComparator#findShortestSeparator(ByteBuffer, ByteBuffer)}, * which ensures that the byte buffer length is correct * before the call. * * @param comparator the comparator object on which to * call {@link AbstractComparator#findShortSuccessor(ByteBuffer)} * @param key buffer access to the key * @param keyLen the length of the key, * may be smaller than the buffer {@code key} * * @return either keyLen if the key is unchanged, otherwise the new length of the key */ private static int findShortSuccessorInternal( final AbstractComparator comparator, final ByteBuffer key, final int keyLen) { if (keyLen != -1) { key.limit(keyLen); } comparator.findShortSuccessor(key); return key.remaining(); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java000066400000000000000000000041361370372246700303000ustar00rootroot00000000000000// Copyright (c) 2016, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.concurrent.atomic.AtomicBoolean; /** * Offers functionality for implementations of * {@link AbstractNativeReference} which have an immutable reference to the * underlying native C++ object */ //@ThreadSafe public abstract class AbstractImmutableNativeReference extends AbstractNativeReference { /** * A flag indicating whether the current {@code AbstractNativeReference} is * responsible to free the underlying C++ object */ protected final AtomicBoolean owningHandle_; protected AbstractImmutableNativeReference(final boolean owningHandle) { this.owningHandle_ = new AtomicBoolean(owningHandle); } @Override public boolean isOwningHandle() { return owningHandle_.get(); } /** * Releases this {@code AbstractNativeReference} from the responsibility of * freeing the underlying native C++ object *

* This will prevent the object from attempting to delete the underlying * native object in its finalizer. This must be used when another object * takes over ownership of the native object or both will attempt to delete * the underlying object when garbage collected. *

* When {@code disOwnNativeHandle()} is called, {@code dispose()} will * subsequently take no action. As a result, incorrect use of this function * may cause a memory leak. *

* * @see #dispose() */ protected final void disOwnNativeHandle() { owningHandle_.set(false); } @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { disposeInternal(); } } /** * The helper function of {@link AbstractImmutableNativeReference#dispose()} * which all subclasses of {@code AbstractImmutableNativeReference} must * implement to release their underlying native C++ objects. */ protected abstract void disposeInternal(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractMutableOptions.java000066400000000000000000000173101370372246700263360ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import java.util.*; public abstract class AbstractMutableOptions { protected static final String KEY_VALUE_PAIR_SEPARATOR = ";"; protected static final char KEY_VALUE_SEPARATOR = '='; static final String INT_ARRAY_INT_SEPARATOR = ","; protected final String[] keys; private final String[] values; /** * User must use builder pattern, or parser. * * @param keys the keys * @param values the values */ protected AbstractMutableOptions(final String[] keys, final String[] values) { this.keys = keys; this.values = values; } String[] getKeys() { return keys; } String[] getValues() { return values; } /** * Returns a string representation of MutableOptions which * is suitable for consumption by {@code #parse(String)}. * * @return String representation of MutableOptions */ @Override public String toString() { final StringBuilder buffer = new StringBuilder(); for(int i = 0; i < keys.length; i++) { buffer .append(keys[i]) .append(KEY_VALUE_SEPARATOR) .append(values[i]); if(i + 1 < keys.length) { buffer.append(KEY_VALUE_PAIR_SEPARATOR); } } return buffer.toString(); } public static abstract class AbstractMutableOptionsBuilder< T extends AbstractMutableOptions, U extends AbstractMutableOptionsBuilder, K extends MutableOptionKey> { private final Map> options = new LinkedHashMap<>(); protected abstract U self(); /** * Get all of the possible keys * * @return A map of all keys, indexed by name. */ protected abstract Map allKeys(); /** * Construct a sub-class instance of {@link AbstractMutableOptions}. * * @param keys the keys * @param values the values * * @return an instance of the options. */ protected abstract T build(final String[] keys, final String[] values); public T build() { final String keys[] = new String[options.size()]; final String values[] = new String[options.size()]; int i = 0; for (final Map.Entry> option : options.entrySet()) { keys[i] = option.getKey().name(); values[i] = option.getValue().asString(); i++; } return build(keys, values); } protected U setDouble( final K key, final double value) { if (key.getValueType() != MutableOptionKey.ValueType.DOUBLE) { throw new IllegalArgumentException( key + " does not accept a double value"); } options.put(key, MutableOptionValue.fromDouble(value)); return self(); } protected double getDouble(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { throw new NoSuchElementException(key.name() + " has not been set"); } return value.asDouble(); } protected U setLong( final K key, final long value) { if(key.getValueType() != MutableOptionKey.ValueType.LONG) { throw new IllegalArgumentException( key + " does not accept a long value"); } options.put(key, MutableOptionValue.fromLong(value)); return self(); } protected long getLong(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { throw new NoSuchElementException(key.name() + " has not been set"); } return value.asLong(); } protected U setInt( final K key, final int value) { if(key.getValueType() != MutableOptionKey.ValueType.INT) { throw new IllegalArgumentException( key + " does not accept an integer value"); } options.put(key, MutableOptionValue.fromInt(value)); return self(); } protected int getInt(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { throw new NoSuchElementException(key.name() + " has not been set"); } return value.asInt(); } protected U setBoolean( final K key, final boolean value) { if(key.getValueType() != MutableOptionKey.ValueType.BOOLEAN) { throw new IllegalArgumentException( key + " does not accept a boolean value"); } options.put(key, MutableOptionValue.fromBoolean(value)); return self(); } protected boolean getBoolean(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { throw new NoSuchElementException(key.name() + " has not been set"); } return value.asBoolean(); } protected U setIntArray( final K key, final int[] value) { if(key.getValueType() != MutableOptionKey.ValueType.INT_ARRAY) { throw new IllegalArgumentException( key + " does not accept an int array value"); } options.put(key, MutableOptionValue.fromIntArray(value)); return self(); } protected int[] getIntArray(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { throw new NoSuchElementException(key.name() + " has not been set"); } return value.asIntArray(); } protected > U setEnum( final K key, final N value) { if(key.getValueType() != MutableOptionKey.ValueType.ENUM) { throw new IllegalArgumentException( key + " does not accept a Enum value"); } options.put(key, MutableOptionValue.fromEnum(value)); return self(); } @SuppressWarnings("unchecked") protected > N getEnum(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if (value == null) { throw new NoSuchElementException(key.name() + " has not been set"); } if (!(value instanceof MutableOptionValue.MutableOptionEnumValue)) { throw new NoSuchElementException(key.name() + " is not of Enum type"); } return ((MutableOptionValue.MutableOptionEnumValue) value).asObject(); } public U fromString( final String keyStr, final String valueStr) throws IllegalArgumentException { Objects.requireNonNull(keyStr); Objects.requireNonNull(valueStr); final K key = allKeys().get(keyStr); switch(key.getValueType()) { case DOUBLE: return setDouble(key, Double.parseDouble(valueStr)); case LONG: return setLong(key, Long.parseLong(valueStr)); case INT: return setInt(key, Integer.parseInt(valueStr)); case BOOLEAN: return setBoolean(key, Boolean.parseBoolean(valueStr)); case INT_ARRAY: final String[] strInts = valueStr .trim().split(INT_ARRAY_INT_SEPARATOR); if(strInts == null || strInts.length == 0) { throw new IllegalArgumentException( "int array value is not correctly formatted"); } final int value[] = new int[strInts.length]; int i = 0; for(final String strInt : strInts) { value[i++] = Integer.parseInt(strInt); } return setIntArray(key, value); } throw new IllegalStateException( key + " has unknown value type: " + key.getValueType()); } } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractNativeReference.java000066400000000000000000000051751370372246700264440ustar00rootroot00000000000000// Copyright (c) 2016, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * AbstractNativeReference is the base-class of all RocksDB classes that have * a pointer to a native C++ {@code rocksdb} object. *

* AbstractNativeReference has the {@link AbstractNativeReference#dispose()} * method, which frees its associated C++ object.

*

* This function should be called manually, however, if required it will be * called automatically during the regular Java GC process via * {@link AbstractNativeReference#finalize()}.

*

* Note - Java can only see the long member variable (which is the C++ pointer * value to the native object), as such it does not know the real size of the * object and therefore may assign a low GC priority for it; So it is strongly * suggested that you manually dispose of objects when you are finished with * them.

*/ public abstract class AbstractNativeReference implements AutoCloseable { /** * Returns true if we are responsible for freeing the underlying C++ object * * @return true if we are responsible to free the C++ object * @see #dispose() */ protected abstract boolean isOwningHandle(); /** * Frees the underlying C++ object *

* It is strong recommended that the developer calls this after they * have finished using the object.

*

* Note, that once an instance of {@link AbstractNativeReference} has been * disposed, calling any of its functions will lead to undefined * behavior.

*/ @Override public abstract void close(); /** * @deprecated Instead use {@link AbstractNativeReference#close()} */ @Deprecated public final void dispose() { close(); } /** * Simply calls {@link AbstractNativeReference#dispose()} to free * any underlying C++ object reference which has not yet been manually * released. * * @deprecated You should not rely on GC of Rocks objects, and instead should * either call {@link AbstractNativeReference#close()} manually or make * use of some sort of ARM (Automatic Resource Management) such as * Java 7's try-with-resources * statement */ @Override @Deprecated protected void finalize() throws Throwable { if(isOwningHandle()) { //TODO(AR) log a warning message... developer should have called close() } dispose(); super.finalize(); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractRocksIterator.java000066400000000000000000000077721370372246700261770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * Base class implementation for Rocks Iterators * in the Java API * *

Multiple threads can invoke const methods on an RocksIterator without * external synchronization, but if any of the threads may call a * non-const method, all threads accessing the same RocksIterator must use * external synchronization.

* * @param

The type of the Parent Object from which the Rocks Iterator was * created. This is used by disposeInternal to avoid double-free * issues with the underlying C++ object. * @see org.rocksdb.RocksObject */ public abstract class AbstractRocksIterator

extends RocksObject implements RocksIteratorInterface { final P parent_; protected AbstractRocksIterator(final P parent, final long nativeHandle) { super(nativeHandle); // parent must point to a valid RocksDB instance. assert (parent != null); // RocksIterator must hold a reference to the related parent instance // to guarantee that while a GC cycle starts RocksIterator instances // are freed prior to parent instances. parent_ = parent; } @Override public boolean isValid() { assert (isOwningHandle()); return isValid0(nativeHandle_); } @Override public void seekToFirst() { assert (isOwningHandle()); seekToFirst0(nativeHandle_); } @Override public void seekToLast() { assert (isOwningHandle()); seekToLast0(nativeHandle_); } @Override public void seek(byte[] target) { assert (isOwningHandle()); seek0(nativeHandle_, target, target.length); } @Override public void seekForPrev(byte[] target) { assert (isOwningHandle()); seekForPrev0(nativeHandle_, target, target.length); } @Override public void seek(ByteBuffer target) { assert (isOwningHandle() && target.isDirect()); seekDirect0(nativeHandle_, target, target.position(), target.remaining()); target.position(target.limit()); } @Override public void seekForPrev(ByteBuffer target) { assert (isOwningHandle() && target.isDirect()); seekForPrevDirect0(nativeHandle_, target, target.position(), target.remaining()); target.position(target.limit()); } @Override public void next() { assert (isOwningHandle()); next0(nativeHandle_); } @Override public void prev() { assert (isOwningHandle()); prev0(nativeHandle_); } @Override public void refresh() throws RocksDBException { assert (isOwningHandle()); refresh0(nativeHandle_); } @Override public void status() throws RocksDBException { assert (isOwningHandle()); status0(nativeHandle_); } /** *

Deletes underlying C++ iterator pointer.

* *

Note: the underlying handle can only be safely deleted if the parent * instance related to a certain RocksIterator is still valid and initialized. * Therefore {@code disposeInternal()} checks if the parent is initialized * before freeing the native handle.

*/ @Override protected void disposeInternal() { if (parent_.isOwningHandle()) { disposeInternal(nativeHandle_); } } abstract boolean isValid0(long handle); abstract void seekToFirst0(long handle); abstract void seekToLast0(long handle); abstract void next0(long handle); abstract void prev0(long handle); abstract void refresh0(long handle) throws RocksDBException; abstract void seek0(long handle, byte[] target, int targetLen); abstract void seekForPrev0(long handle, byte[] target, int targetLen); abstract void seekDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); abstract void seekForPrevDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); abstract void status0(long handle) throws RocksDBException; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractSlice.java000066400000000000000000000120351370372246700244270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Slices are used by RocksDB to provide * efficient access to keys and values. * * This class is package private, implementers * should extend either of the public abstract classes: * @see org.rocksdb.Slice * @see org.rocksdb.DirectSlice * * Regards the lifecycle of Java Slices in RocksDB: * At present when you configure a Comparator from Java, it creates an * instance of a C++ BaseComparatorJniCallback subclass and * passes that to RocksDB as the comparator. That subclass of * BaseComparatorJniCallback creates the Java * @see org.rocksdb.AbstractSlice subclass Objects. When you dispose * the Java @see org.rocksdb.AbstractComparator subclass, it disposes the * C++ BaseComparatorJniCallback subclass, which in turn destroys the * Java @see org.rocksdb.AbstractSlice subclass Objects. */ public abstract class AbstractSlice extends RocksMutableObject { protected AbstractSlice() { super(); } protected AbstractSlice(final long nativeHandle) { super(nativeHandle); } /** * Returns the data of the slice. * * @return The slice data. Note, the type of access is * determined by the subclass * @see org.rocksdb.AbstractSlice#data0(long) */ public T data() { return data0(getNativeHandle()); } /** * Access to the data is provided by the * subtype as it needs to handle the * generic typing. * * @param handle The address of the underlying * native object. * * @return Java typed access to the data. */ protected abstract T data0(long handle); /** * Drops the specified {@code n} * number of bytes from the start * of the backing slice * * @param n The number of bytes to drop */ public abstract void removePrefix(final int n); /** * Clears the backing slice */ public abstract void clear(); /** * Return the length (in bytes) of the data. * * @return The length in bytes. */ public int size() { return size0(getNativeHandle()); } /** * Return true if the length of the * data is zero. * * @return true if there is no data, false otherwise. */ public boolean empty() { return empty0(getNativeHandle()); } /** * Creates a string representation of the data * * @param hex When true, the representation * will be encoded in hexadecimal. * * @return The string representation of the data. */ public String toString(final boolean hex) { return toString0(getNativeHandle(), hex); } @Override public String toString() { return toString(false); } /** * Three-way key comparison * * @param other A slice to compare against * * @return Should return either: * 1) < 0 if this < other * 2) == 0 if this == other * 3) > 0 if this > other */ public int compare(final AbstractSlice other) { assert (other != null); if(!isOwningHandle()) { return other.isOwningHandle() ? -1 : 0; } else { if(!other.isOwningHandle()) { return 1; } else { return compare0(getNativeHandle(), other.getNativeHandle()); } } } @Override public int hashCode() { return toString().hashCode(); } /** * If other is a slice object, then * we defer to {@link #compare(AbstractSlice) compare} * to check equality, otherwise we return false. * * @param other Object to test for equality * * @return true when {@code this.compare(other) == 0}, * false otherwise. */ @Override public boolean equals(final Object other) { if (other != null && other instanceof AbstractSlice) { return compare((AbstractSlice)other) == 0; } else { return false; } } /** * Determines whether this slice starts with * another slice * * @param prefix Another slice which may of may not * be a prefix of this slice. * * @return true when this slice starts with the * {@code prefix} slice */ public boolean startsWith(final AbstractSlice prefix) { if (prefix != null) { return startsWith0(getNativeHandle(), prefix.getNativeHandle()); } else { return false; } } protected native static long createNewSliceFromString(final String str); private native int size0(long handle); private native boolean empty0(long handle); private native String toString0(long handle, boolean hex); private native int compare0(long handle, long otherHandle); private native boolean startsWith0(long handle, long otherHandle); /** * Deletes underlying C++ slice pointer. * Note that this function should be called only after all * RocksDB instances referencing the slice are closed. * Otherwise an undefined behavior will occur. */ @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractTableFilter.java000066400000000000000000000007201370372246700255630ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * Base class for Table Filters. */ public abstract class AbstractTableFilter extends RocksCallbackObject implements TableFilter { protected AbstractTableFilter() { super(); } @Override protected long initializeNative(final long... nativeParameterHandles) { return createNewTableFilter(); } private native long createNewTableFilter(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractTraceWriter.java000066400000000000000000000043321370372246700256240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Base class for TraceWriters. */ public abstract class AbstractTraceWriter extends RocksCallbackObject implements TraceWriter { @Override protected long initializeNative(final long... nativeParameterHandles) { return createNewTraceWriter(); } /** * Called from JNI, proxy for {@link TraceWriter#write(Slice)}. * * @param sliceHandle the native handle of the slice (which we do not own) * * @return short (2 bytes) where the first byte is the * {@link Status.Code#getValue()} and the second byte is the * {@link Status.SubCode#getValue()}. */ private short writeProxy(final long sliceHandle) { try { write(new Slice(sliceHandle)); return statusToShort(Status.Code.Ok, Status.SubCode.None); } catch (final RocksDBException e) { return statusToShort(e.getStatus()); } } /** * Called from JNI, proxy for {@link TraceWriter#closeWriter()}. * * @return short (2 bytes) where the first byte is the * {@link Status.Code#getValue()} and the second byte is the * {@link Status.SubCode#getValue()}. */ private short closeWriterProxy() { try { closeWriter(); return statusToShort(Status.Code.Ok, Status.SubCode.None); } catch (final RocksDBException e) { return statusToShort(e.getStatus()); } } private static short statusToShort(/*@Nullable*/ final Status status) { final Status.Code code = status != null && status.getCode() != null ? status.getCode() : Status.Code.IOError; final Status.SubCode subCode = status != null && status.getSubCode() != null ? status.getSubCode() : Status.SubCode.None; return statusToShort(code, subCode); } private static short statusToShort(final Status.Code code, final Status.SubCode subCode) { short result = (short)(code.getValue() << 8); return (short)(result | subCode.getValue()); } private native long createNewTraceWriter(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java000066400000000000000000000031311370372246700273520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Provides notification to the caller of SetSnapshotOnNextOperation when * the actual snapshot gets created */ public abstract class AbstractTransactionNotifier extends RocksCallbackObject { protected AbstractTransactionNotifier() { super(); } /** * Implement this method to receive notification when a snapshot is * requested via {@link Transaction#setSnapshotOnNextOperation()}. * * @param newSnapshot the snapshot that has been created. */ public abstract void snapshotCreated(final Snapshot newSnapshot); /** * This is intentionally private as it is the callback hook * from JNI */ private void snapshotCreated(final long snapshotHandle) { snapshotCreated(new Snapshot(snapshotHandle)); } @Override protected long initializeNative(final long... nativeParameterHandles) { return createNewTransactionNotifier(); } private native long createNewTransactionNotifier(); /** * Deletes underlying C++ TransactionNotifier pointer. * * Note that this function should be called only after all * Transactions referencing the comparator are closed. * Otherwise an undefined behavior will occur. */ @Override protected void disposeInternal() { disposeInternal(nativeHandle_); } protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractWalFilter.java000066400000000000000000000034451370372246700252660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Base class for WAL Filters. */ public abstract class AbstractWalFilter extends RocksCallbackObject implements WalFilter { @Override protected long initializeNative(final long... nativeParameterHandles) { return createNewWalFilter(); } /** * Called from JNI, proxy for * {@link WalFilter#logRecordFound(long, String, WriteBatch, WriteBatch)}. * * @param logNumber the log handle. * @param logFileName the log file name * @param batchHandle the native handle of a WriteBatch (which we do not own) * @param newBatchHandle the native handle of a * new WriteBatch (which we do not own) * * @return short (2 bytes) where the first byte is the * {@link WalFilter.LogRecordFoundResult#walProcessingOption} * {@link WalFilter.LogRecordFoundResult#batchChanged}. */ private short logRecordFoundProxy(final long logNumber, final String logFileName, final long batchHandle, final long newBatchHandle) { final LogRecordFoundResult logRecordFoundResult = logRecordFound( logNumber, logFileName, new WriteBatch(batchHandle), new WriteBatch(newBatchHandle)); return logRecordFoundResultToShort(logRecordFoundResult); } private static short logRecordFoundResultToShort( final LogRecordFoundResult logRecordFoundResult) { short result = (short)(logRecordFoundResult.walProcessingOption.getValue() << 8); return (short)(result | (logRecordFoundResult.batchChanged ? 1 : 0)); } private native long createNewWalFilter(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AbstractWriteBatch.java000066400000000000000000000157441370372246700254360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; public abstract class AbstractWriteBatch extends RocksObject implements WriteBatchInterface { protected AbstractWriteBatch(final long nativeHandle) { super(nativeHandle); } @Override public int count() { return count0(nativeHandle_); } @Override public void put(byte[] key, byte[] value) throws RocksDBException { put(nativeHandle_, key, key.length, value, value.length); } @Override public void put(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) throws RocksDBException { put(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_); } @Override public void merge(byte[] key, byte[] value) throws RocksDBException { merge(nativeHandle_, key, key.length, value, value.length); } @Override public void merge(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) throws RocksDBException { merge(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_); } @Override @Deprecated public void remove(byte[] key) throws RocksDBException { delete(nativeHandle_, key, key.length); } @Override @Deprecated public void remove(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException { delete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } public void put(ByteBuffer key, ByteBuffer value) throws RocksDBException { assert key.isDirect() && value.isDirect(); putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), value.remaining(), 0); key.position(key.limit()); value.position(value.limit()); } @Override public void put(ColumnFamilyHandle columnFamilyHandle, ByteBuffer key, ByteBuffer value) throws RocksDBException { assert key.isDirect() && value.isDirect(); putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); key.position(key.limit()); value.position(value.limit()); } @Override public void delete(byte[] key) throws RocksDBException { delete(nativeHandle_, key, key.length); } @Override public void delete(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException { delete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } @Override public void singleDelete(byte[] key) throws RocksDBException { singleDelete(nativeHandle_, key, key.length); } @Override public void singleDelete(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException { singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } @Override public void deleteRange(byte[] beginKey, byte[] endKey) throws RocksDBException { deleteRange(nativeHandle_, beginKey, beginKey.length, endKey, endKey.length); } @Override public void deleteRange(ColumnFamilyHandle columnFamilyHandle, byte[] beginKey, byte[] endKey) throws RocksDBException { deleteRange(nativeHandle_, beginKey, beginKey.length, endKey, endKey.length, columnFamilyHandle.nativeHandle_); } public void remove(ByteBuffer key) throws RocksDBException { removeDirect(nativeHandle_, key, key.position(), key.remaining(), 0); key.position(key.limit()); } @Override public void remove(ColumnFamilyHandle columnFamilyHandle, ByteBuffer key) throws RocksDBException { removeDirect( nativeHandle_, key, key.position(), key.remaining(), columnFamilyHandle.nativeHandle_); key.position(key.limit()); } @Override public void putLogData(byte[] blob) throws RocksDBException { putLogData(nativeHandle_, blob, blob.length); } @Override public void clear() { clear0(nativeHandle_); } @Override public void setSavePoint() { setSavePoint0(nativeHandle_); } @Override public void rollbackToSavePoint() throws RocksDBException { rollbackToSavePoint0(nativeHandle_); } @Override public void popSavePoint() throws RocksDBException { popSavePoint(nativeHandle_); } @Override public void setMaxBytes(final long maxBytes) { setMaxBytes(nativeHandle_, maxBytes); } @Override public WriteBatch getWriteBatch() { return getWriteBatch(nativeHandle_); } abstract int count0(final long handle); abstract void put(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen) throws RocksDBException; abstract void put(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen, final long cfHandle) throws RocksDBException; abstract void putDirect(final long handle, final ByteBuffer key, final int keyOffset, final int keyLength, final ByteBuffer value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; abstract void merge(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen) throws RocksDBException; abstract void merge(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen, final long cfHandle) throws RocksDBException; abstract void delete(final long handle, final byte[] key, final int keyLen) throws RocksDBException; abstract void delete(final long handle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; abstract void singleDelete(final long handle, final byte[] key, final int keyLen) throws RocksDBException; abstract void singleDelete(final long handle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; abstract void removeDirect(final long handle, final ByteBuffer key, final int keyOffset, final int keyLength, final long cfHandle) throws RocksDBException; abstract void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, final byte[] endKey, final int endKeyLen) throws RocksDBException; abstract void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, final byte[] endKey, final int endKeyLen, final long cfHandle) throws RocksDBException; abstract void putLogData(final long handle, final byte[] blob, final int blobLen) throws RocksDBException; abstract void clear0(final long handle); abstract void setSavePoint0(final long handle); abstract void rollbackToSavePoint0(final long handle); abstract void popSavePoint(final long handle) throws RocksDBException; abstract void setMaxBytes(final long handle, long maxBytes); abstract WriteBatch getWriteBatch(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AccessHint.java000066400000000000000000000025011370372246700237250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * File access pattern once a compaction has started */ public enum AccessHint { NONE((byte)0x0), NORMAL((byte)0x1), SEQUENTIAL((byte)0x2), WILLNEED((byte)0x3); private final byte value; AccessHint(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ public byte getValue() { return value; } /** *

Get the AccessHint enumeration value by * passing the byte identifier to this method.

* * @param byteIdentifier of AccessHint. * * @return AccessHint instance. * * @throws IllegalArgumentException if the access hint for the byteIdentifier * cannot be found */ public static AccessHint getAccessHint(final byte byteIdentifier) { for (final AccessHint accessHint : AccessHint.values()) { if (accessHint.getValue() == byteIdentifier) { return accessHint; } } throw new IllegalArgumentException( "Illegal value provided for AccessHint."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java000066400000000000000000000421551370372246700311340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; /** * Advanced Column Family Options which are not * mutable (i.e. present in {@link AdvancedMutableColumnFamilyOptionsInterface} * * Taken from include/rocksdb/advanced_options.h */ public interface AdvancedColumnFamilyOptionsInterface< T extends AdvancedColumnFamilyOptionsInterface> { /** * The minimum number of write buffers that will be merged together * before writing to storage. If set to 1, then * all write buffers are flushed to L0 as individual files and this increases * read amplification because a get request has to check in all of these * files. Also, an in-memory merge may result in writing lesser * data to storage if there are duplicate records in each of these * individual write buffers. Default: 1 * * @param minWriteBufferNumberToMerge the minimum number of write buffers * that will be merged together. * @return the reference to the current options. */ T setMinWriteBufferNumberToMerge( int minWriteBufferNumberToMerge); /** * The minimum number of write buffers that will be merged together * before writing to storage. If set to 1, then * all write buffers are flushed to L0 as individual files and this increases * read amplification because a get request has to check in all of these * files. Also, an in-memory merge may result in writing lesser * data to storage if there are duplicate records in each of these * individual write buffers. Default: 1 * * @return the minimum number of write buffers that will be merged together. */ int minWriteBufferNumberToMerge(); /** * The total maximum number of write buffers to maintain in memory including * copies of buffers that have already been flushed. Unlike * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()}, * this parameter does not affect flushing. * This controls the minimum amount of write history that will be available * in memory for conflict checking when Transactions are used. * * When using an OptimisticTransactionDB: * If this value is too low, some transactions may fail at commit time due * to not being able to determine whether there were any write conflicts. * * When using a TransactionDB: * If Transaction::SetSnapshot is used, TransactionDB will read either * in-memory write buffers or SST files to do write-conflict checking. * Increasing this value can reduce the number of reads to SST files * done for conflict detection. * * Setting this value to 0 will cause write buffers to be freed immediately * after they are flushed. * If this value is set to -1, * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} * will be used. * * Default: * If using a TransactionDB/OptimisticTransactionDB, the default value will * be set to the value of * {@link AdvancedMutableColumnFamilyOptionsInterface#maxWriteBufferNumber()} * if it is not explicitly set by the user. Otherwise, the default is 0. * * @param maxWriteBufferNumberToMaintain The maximum number of write * buffers to maintain * * @return the reference to the current options. */ T setMaxWriteBufferNumberToMaintain( int maxWriteBufferNumberToMaintain); /** * The total maximum number of write buffers to maintain in memory including * copies of buffers that have already been flushed. * * @return maxWriteBufferNumberToMaintain The maximum number of write buffers * to maintain */ int maxWriteBufferNumberToMaintain(); /** * Allows thread-safe inplace updates. * If inplace_callback function is not set, * Put(key, new_value) will update inplace the existing_value iff * * key exists in current memtable * * new sizeof(new_value) ≤ sizeof(existing_value) * * existing_value for that key is a put i.e. kTypeValue * If inplace_callback function is set, check doc for inplace_callback. * Default: false. * * @param inplaceUpdateSupport true if thread-safe inplace updates * are allowed. * @return the reference to the current options. */ T setInplaceUpdateSupport( boolean inplaceUpdateSupport); /** * Allows thread-safe inplace updates. * If inplace_callback function is not set, * Put(key, new_value) will update inplace the existing_value iff * * key exists in current memtable * * new sizeof(new_value) ≤ sizeof(existing_value) * * existing_value for that key is a put i.e. kTypeValue * If inplace_callback function is set, check doc for inplace_callback. * Default: false. * * @return true if thread-safe inplace updates are allowed. */ boolean inplaceUpdateSupport(); /** * Control locality of bloom filter probes to improve cache miss rate. * This option only applies to memtable prefix bloom and plaintable * prefix bloom. It essentially limits the max number of cache lines each * bloom filter check can touch. * This optimization is turned off when set to 0. The number should never * be greater than number of probes. This option can boost performance * for in-memory workload but should use with care since it can cause * higher false positive rate. * Default: 0 * * @param bloomLocality the level of locality of bloom-filter probes. * @return the reference to the current options. */ T setBloomLocality(int bloomLocality); /** * Control locality of bloom filter probes to improve cache miss rate. * This option only applies to memtable prefix bloom and plaintable * prefix bloom. It essentially limits the max number of cache lines each * bloom filter check can touch. * This optimization is turned off when set to 0. The number should never * be greater than number of probes. This option can boost performance * for in-memory workload but should use with care since it can cause * higher false positive rate. * Default: 0 * * @return the level of locality of bloom-filter probes. * @see #setBloomLocality(int) */ int bloomLocality(); /** *

Different levels can have different compression * policies. There are cases where most lower levels * would like to use quick compression algorithms while * the higher levels (which have more data) use * compression algorithms that have better compression * but could be slower. This array, if non-empty, should * have an entry for each level of the database; * these override the value specified in the previous * field 'compression'.

* * NOTICE *

If {@code level_compaction_dynamic_level_bytes=true}, * {@code compression_per_level[0]} still determines {@code L0}, * but other elements of the array are based on base level * (the level {@code L0} files are merged to), and may not * match the level users see from info log for metadata. *

*

If {@code L0} files are merged to {@code level - n}, * then, for {@code i>0}, {@code compression_per_level[i]} * determines compaction type for level {@code n+i-1}.

* * Example *

For example, if we have 5 levels, and we determine to * merge {@code L0} data to {@code L4} (which means {@code L1..L3} * will be empty), then the new files go to {@code L4} uses * compression type {@code compression_per_level[1]}.

* *

If now {@code L0} is merged to {@code L2}. Data goes to * {@code L2} will be compressed according to * {@code compression_per_level[1]}, {@code L3} using * {@code compression_per_level[2]}and {@code L4} using * {@code compression_per_level[3]}. Compaction for each * level can change when data grows.

* *

Default: empty

* * @param compressionLevels list of * {@link org.rocksdb.CompressionType} instances. * * @return the reference to the current options. */ T setCompressionPerLevel( List compressionLevels); /** *

Return the currently set {@link org.rocksdb.CompressionType} * per instances.

* *

See: {@link #setCompressionPerLevel(java.util.List)}

* * @return list of {@link org.rocksdb.CompressionType} * instances. */ List compressionPerLevel(); /** * Set the number of levels for this database * If level-styled compaction is used, then this number determines * the total number of levels. * * @param numLevels the number of levels. * @return the reference to the current options. */ T setNumLevels(int numLevels); /** * If level-styled compaction is used, then this number determines * the total number of levels. * * @return the number of levels. */ int numLevels(); /** *

If {@code true}, RocksDB will pick target size of each level * dynamically. We will pick a base level b >= 1. L0 will be * directly merged into level b, instead of always into level 1. * Level 1 to b-1 need to be empty. We try to pick b and its target * size so that

* *
    *
  1. target size is in the range of * (max_bytes_for_level_base / max_bytes_for_level_multiplier, * max_bytes_for_level_base]
  2. *
  3. target size of the last level (level num_levels-1) equals to extra size * of the level.
  4. *
* *

At the same time max_bytes_for_level_multiplier and * max_bytes_for_level_multiplier_additional are still satisfied.

* *

With this option on, from an empty DB, we make last level the base * level, which means merging L0 data into the last level, until it exceeds * max_bytes_for_level_base. And then we make the second last level to be * base level, to start to merge L0 data to second last level, with its * target size to be {@code 1/max_bytes_for_level_multiplier} of the last * levels extra size. After the data accumulates more so that we need to * move the base level to the third last one, and so on.

* *

Example

* *

For example, assume {@code max_bytes_for_level_multiplier=10}, * {@code num_levels=6}, and {@code max_bytes_for_level_base=10MB}.

* *

Target sizes of level 1 to 5 starts with:

* {@code [- - - - 10MB]} *

with base level is level. Target sizes of level 1 to 4 are not applicable * because they will not be used. * Until the size of Level 5 grows to more than 10MB, say 11MB, we make * base target to level 4 and now the targets looks like:

* {@code [- - - 1.1MB 11MB]} *

While data are accumulated, size targets are tuned based on actual data * of level 5. When level 5 has 50MB of data, the target is like:

* {@code [- - - 5MB 50MB]} *

Until level 5's actual size is more than 100MB, say 101MB. Now if we * keep level 4 to be the base level, its target size needs to be 10.1MB, * which doesn't satisfy the target size range. So now we make level 3 * the target size and the target sizes of the levels look like:

* {@code [- - 1.01MB 10.1MB 101MB]} *

In the same way, while level 5 further grows, all levels' targets grow, * like

* {@code [- - 5MB 50MB 500MB]} *

Until level 5 exceeds 1000MB and becomes 1001MB, we make level 2 the * base level and make levels' target sizes like this:

* {@code [- 1.001MB 10.01MB 100.1MB 1001MB]} *

and go on...

* *

By doing it, we give {@code max_bytes_for_level_multiplier} a priority * against {@code max_bytes_for_level_base}, for a more predictable LSM tree * shape. It is useful to limit worse case space amplification.

* *

{@code max_bytes_for_level_multiplier_additional} is ignored with * this flag on.

* *

Turning this feature on or off for an existing DB can cause unexpected * LSM tree structure so it's not recommended.

* *

Caution: this option is experimental

* *

Default: false

* * @param enableLevelCompactionDynamicLevelBytes boolean value indicating * if {@code LevelCompactionDynamicLevelBytes} shall be enabled. * @return the reference to the current options. */ @Experimental("Turning this feature on or off for an existing DB can cause" + "unexpected LSM tree structure so it's not recommended") T setLevelCompactionDynamicLevelBytes( boolean enableLevelCompactionDynamicLevelBytes); /** *

Return if {@code LevelCompactionDynamicLevelBytes} is enabled. *

* *

For further information see * {@link #setLevelCompactionDynamicLevelBytes(boolean)}

* * @return boolean value indicating if * {@code levelCompactionDynamicLevelBytes} is enabled. */ @Experimental("Caution: this option is experimental") boolean levelCompactionDynamicLevelBytes(); /** * Maximum size of each compaction (not guarantee) * * @param maxCompactionBytes the compaction size limit * @return the reference to the current options. */ T setMaxCompactionBytes( long maxCompactionBytes); /** * Control maximum size of each compaction (not guaranteed) * * @return compaction size threshold */ long maxCompactionBytes(); /** * Set compaction style for DB. * * Default: LEVEL. * * @param compactionStyle Compaction style. * @return the reference to the current options. */ ColumnFamilyOptionsInterface setCompactionStyle( CompactionStyle compactionStyle); /** * Compaction style for DB. * * @return Compaction style. */ CompactionStyle compactionStyle(); /** * If level {@link #compactionStyle()} == {@link CompactionStyle#LEVEL}, * for each level, which files are prioritized to be picked to compact. * * Default: {@link CompactionPriority#ByCompensatedSize} * * @param compactionPriority The compaction priority * * @return the reference to the current options. */ T setCompactionPriority( CompactionPriority compactionPriority); /** * Get the Compaction priority if level compaction * is used for all levels * * @return The compaction priority */ CompactionPriority compactionPriority(); /** * Set the options needed to support Universal Style compactions * * @param compactionOptionsUniversal The Universal Style compaction options * * @return the reference to the current options. */ T setCompactionOptionsUniversal( CompactionOptionsUniversal compactionOptionsUniversal); /** * The options needed to support Universal Style compactions * * @return The Universal Style compaction options */ CompactionOptionsUniversal compactionOptionsUniversal(); /** * The options for FIFO compaction style * * @param compactionOptionsFIFO The FIFO compaction options * * @return the reference to the current options. */ T setCompactionOptionsFIFO( CompactionOptionsFIFO compactionOptionsFIFO); /** * The options for FIFO compaction style * * @return The FIFO compaction options */ CompactionOptionsFIFO compactionOptionsFIFO(); /** *

This flag specifies that the implementation should optimize the filters * mainly for cases where keys are found rather than also optimize for keys * missed. This would be used in cases where the application knows that * there are very few misses or the performance in the case of misses is not * important.

* *

For now, this flag allows us to not store filters for the last level i.e * the largest level which contains data of the LSM store. For keys which * are hits, the filters in this level are not useful because we will search * for the data anyway.

* *

NOTE: the filters in other levels are still useful * even for key hit because they tell us whether to look in that level or go * to the higher level.

* *

Default: false

* * @param optimizeFiltersForHits boolean value indicating if this flag is set. * @return the reference to the current options. */ T setOptimizeFiltersForHits( boolean optimizeFiltersForHits); /** *

Returns the current state of the {@code optimize_filters_for_hits} * setting.

* * @return boolean value indicating if the flag * {@code optimize_filters_for_hits} was set. */ boolean optimizeFiltersForHits(); /** * In debug mode, RocksDB run consistency checks on the LSM every time the LSM * change (Flush, Compaction, AddFile). These checks are disabled in release * mode, use this option to enable them in release mode as well. * * Default: false * * @param forceConsistencyChecks true to force consistency checks * * @return the reference to the current options. */ T setForceConsistencyChecks( boolean forceConsistencyChecks); /** * In debug mode, RocksDB run consistency checks on the LSM every time the LSM * change (Flush, Compaction, AddFile). These checks are disabled in release * mode. * * @return true if consistency checks are enforced */ boolean forceConsistencyChecks(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java000066400000000000000000000353161370372246700324470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Advanced Column Family Options which are mutable * * Taken from include/rocksdb/advanced_options.h * and MutableCFOptions in util/cf_options.h */ public interface AdvancedMutableColumnFamilyOptionsInterface< T extends AdvancedMutableColumnFamilyOptionsInterface> { /** * The maximum number of write buffers that are built up in memory. * The default is 2, so that when 1 write buffer is being flushed to * storage, new writes can continue to the other write buffer. * Default: 2 * * @param maxWriteBufferNumber maximum number of write buffers. * @return the instance of the current options. */ T setMaxWriteBufferNumber( int maxWriteBufferNumber); /** * Returns maximum number of write buffers. * * @return maximum number of write buffers. * @see #setMaxWriteBufferNumber(int) */ int maxWriteBufferNumber(); /** * Number of locks used for inplace update * Default: 10000, if inplace_update_support = true, else 0. * * @param inplaceUpdateNumLocks the number of locks used for * inplace updates. * @return the reference to the current options. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setInplaceUpdateNumLocks( long inplaceUpdateNumLocks); /** * Number of locks used for inplace update * Default: 10000, if inplace_update_support = true, else 0. * * @return the number of locks used for inplace update. */ long inplaceUpdateNumLocks(); /** * if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, * create prefix bloom for memtable with the size of * write_buffer_size * memtable_prefix_bloom_size_ratio. * If it is larger than 0.25, it is santinized to 0.25. * * Default: 0 (disable) * * @param memtablePrefixBloomSizeRatio The ratio * @return the reference to the current options. */ T setMemtablePrefixBloomSizeRatio( double memtablePrefixBloomSizeRatio); /** * if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, * create prefix bloom for memtable with the size of * write_buffer_size * memtable_prefix_bloom_size_ratio. * If it is larger than 0.25, it is santinized to 0.25. * * Default: 0 (disable) * * @return the ratio */ double memtablePrefixBloomSizeRatio(); /** * Page size for huge page TLB for bloom in memtable. If ≤ 0, not allocate * from huge page TLB but from malloc. * Need to reserve huge pages for it to be allocated. For example: * sysctl -w vm.nr_hugepages=20 * See linux doc Documentation/vm/hugetlbpage.txt * * @param memtableHugePageSize The page size of the huge * page tlb * @return the reference to the current options. */ T setMemtableHugePageSize( long memtableHugePageSize); /** * Page size for huge page TLB for bloom in memtable. If ≤ 0, not allocate * from huge page TLB but from malloc. * Need to reserve huge pages for it to be allocated. For example: * sysctl -w vm.nr_hugepages=20 * See linux doc Documentation/vm/hugetlbpage.txt * * @return The page size of the huge page tlb */ long memtableHugePageSize(); /** * The size of one block in arena memory allocation. * If ≤ 0, a proper value is automatically calculated (usually 1/10 of * writer_buffer_size). * * There are two additional restriction of the specified size: * (1) size should be in the range of [4096, 2 << 30] and * (2) be the multiple of the CPU word (which helps with the memory * alignment). * * We'll automatically check and adjust the size number to make sure it * conforms to the restrictions. * Default: 0 * * @param arenaBlockSize the size of an arena block * @return the reference to the current options. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setArenaBlockSize(long arenaBlockSize); /** * The size of one block in arena memory allocation. * If ≤ 0, a proper value is automatically calculated (usually 1/10 of * writer_buffer_size). * * There are two additional restriction of the specified size: * (1) size should be in the range of [4096, 2 << 30] and * (2) be the multiple of the CPU word (which helps with the memory * alignment). * * We'll automatically check and adjust the size number to make sure it * conforms to the restrictions. * Default: 0 * * @return the size of an arena block */ long arenaBlockSize(); /** * Soft limit on number of level-0 files. We start slowing down writes at this * point. A value < 0 means that no writing slow down will be triggered by * number of files in level-0. * * @param level0SlowdownWritesTrigger The soft limit on the number of * level-0 files * @return the reference to the current options. */ T setLevel0SlowdownWritesTrigger( int level0SlowdownWritesTrigger); /** * Soft limit on number of level-0 files. We start slowing down writes at this * point. A value < 0 means that no writing slow down will be triggered by * number of files in level-0. * * @return The soft limit on the number of * level-0 files */ int level0SlowdownWritesTrigger(); /** * Maximum number of level-0 files. We stop writes at this point. * * @param level0StopWritesTrigger The maximum number of level-0 files * @return the reference to the current options. */ T setLevel0StopWritesTrigger( int level0StopWritesTrigger); /** * Maximum number of level-0 files. We stop writes at this point. * * @return The maximum number of level-0 files */ int level0StopWritesTrigger(); /** * The target file size for compaction. * This targetFileSizeBase determines a level-1 file size. * Target file size for level L can be calculated by * targetFileSizeBase * (targetFileSizeMultiplier ^ (L-1)) * For example, if targetFileSizeBase is 2MB and * target_file_size_multiplier is 10, then each file on level-1 will * be 2MB, and each file on level 2 will be 20MB, * and each file on level-3 will be 200MB. * by default targetFileSizeBase is 64MB. * * @param targetFileSizeBase the target size of a level-0 file. * @return the reference to the current options. * * @see #setTargetFileSizeMultiplier(int) */ T setTargetFileSizeBase( long targetFileSizeBase); /** * The target file size for compaction. * This targetFileSizeBase determines a level-1 file size. * Target file size for level L can be calculated by * targetFileSizeBase * (targetFileSizeMultiplier ^ (L-1)) * For example, if targetFileSizeBase is 2MB and * target_file_size_multiplier is 10, then each file on level-1 will * be 2MB, and each file on level 2 will be 20MB, * and each file on level-3 will be 200MB. * by default targetFileSizeBase is 64MB. * * @return the target size of a level-0 file. * * @see #targetFileSizeMultiplier() */ long targetFileSizeBase(); /** * targetFileSizeMultiplier defines the size ratio between a * level-L file and level-(L+1) file. * By default target_file_size_multiplier is 1, meaning * files in different levels have the same target. * * @param multiplier the size ratio between a level-(L+1) file * and level-L file. * @return the reference to the current options. */ T setTargetFileSizeMultiplier( int multiplier); /** * targetFileSizeMultiplier defines the size ratio between a * level-(L+1) file and level-L file. * By default targetFileSizeMultiplier is 1, meaning * files in different levels have the same target. * * @return the size ratio between a level-(L+1) file and level-L file. */ int targetFileSizeMultiplier(); /** * The ratio between the total size of level-(L+1) files and the total * size of level-L files for all L. * DEFAULT: 10 * * @param multiplier the ratio between the total size of level-(L+1) * files and the total size of level-L files for all L. * @return the reference to the current options. * * See {@link MutableColumnFamilyOptionsInterface#setMaxBytesForLevelBase(long)} */ T setMaxBytesForLevelMultiplier(double multiplier); /** * The ratio between the total size of level-(L+1) files and the total * size of level-L files for all L. * DEFAULT: 10 * * @return the ratio between the total size of level-(L+1) files and * the total size of level-L files for all L. * * See {@link MutableColumnFamilyOptionsInterface#maxBytesForLevelBase()} */ double maxBytesForLevelMultiplier(); /** * Different max-size multipliers for different levels. * These are multiplied by max_bytes_for_level_multiplier to arrive * at the max-size of each level. * * Default: 1 * * @param maxBytesForLevelMultiplierAdditional The max-size multipliers * for each level * @return the reference to the current options. */ T setMaxBytesForLevelMultiplierAdditional( int[] maxBytesForLevelMultiplierAdditional); /** * Different max-size multipliers for different levels. * These are multiplied by max_bytes_for_level_multiplier to arrive * at the max-size of each level. * * Default: 1 * * @return The max-size multipliers for each level */ int[] maxBytesForLevelMultiplierAdditional(); /** * All writes will be slowed down to at least delayed_write_rate if estimated * bytes needed to be compaction exceed this threshold. * * Default: 64GB * * @param softPendingCompactionBytesLimit The soft limit to impose on * compaction * @return the reference to the current options. */ T setSoftPendingCompactionBytesLimit( long softPendingCompactionBytesLimit); /** * All writes will be slowed down to at least delayed_write_rate if estimated * bytes needed to be compaction exceed this threshold. * * Default: 64GB * * @return The soft limit to impose on compaction */ long softPendingCompactionBytesLimit(); /** * All writes are stopped if estimated bytes needed to be compaction exceed * this threshold. * * Default: 256GB * * @param hardPendingCompactionBytesLimit The hard limit to impose on * compaction * @return the reference to the current options. */ T setHardPendingCompactionBytesLimit( long hardPendingCompactionBytesLimit); /** * All writes are stopped if estimated bytes needed to be compaction exceed * this threshold. * * Default: 256GB * * @return The hard limit to impose on compaction */ long hardPendingCompactionBytesLimit(); /** * An iteration->Next() sequentially skips over keys with the same * user-key unless this option is set. This number specifies the number * of keys (with the same userkey) that will be sequentially * skipped before a reseek is issued. * Default: 8 * * @param maxSequentialSkipInIterations the number of keys could * be skipped in a iteration. * @return the reference to the current options. */ T setMaxSequentialSkipInIterations( long maxSequentialSkipInIterations); /** * An iteration->Next() sequentially skips over keys with the same * user-key unless this option is set. This number specifies the number * of keys (with the same userkey) that will be sequentially * skipped before a reseek is issued. * Default: 8 * * @return the number of keys could be skipped in a iteration. */ long maxSequentialSkipInIterations(); /** * Maximum number of successive merge operations on a key in the memtable. * * When a merge operation is added to the memtable and the maximum number of * successive merges is reached, the value of the key will be calculated and * inserted into the memtable instead of the merge operation. This will * ensure that there are never more than max_successive_merges merge * operations in the memtable. * * Default: 0 (disabled) * * @param maxSuccessiveMerges the maximum number of successive merges. * @return the reference to the current options. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setMaxSuccessiveMerges( long maxSuccessiveMerges); /** * Maximum number of successive merge operations on a key in the memtable. * * When a merge operation is added to the memtable and the maximum number of * successive merges is reached, the value of the key will be calculated and * inserted into the memtable instead of the merge operation. This will * ensure that there are never more than max_successive_merges merge * operations in the memtable. * * Default: 0 (disabled) * * @return the maximum number of successive merges. */ long maxSuccessiveMerges(); /** * After writing every SST file, reopen it and read all the keys. * * Default: false * * @param paranoidFileChecks true to enable paranoid file checks * @return the reference to the current options. */ T setParanoidFileChecks( boolean paranoidFileChecks); /** * After writing every SST file, reopen it and read all the keys. * * Default: false * * @return true if paranoid file checks are enabled */ boolean paranoidFileChecks(); /** * Measure IO stats in compactions and flushes, if true. * * Default: false * * @param reportBgIoStats true to enable reporting * @return the reference to the current options. */ T setReportBgIoStats( boolean reportBgIoStats); /** * Determine whether IO stats in compactions and flushes are being measured * * @return true if reporting is enabled */ boolean reportBgIoStats(); /** * Non-bottom-level files older than TTL will go through the compaction * process. This needs {@link MutableDBOptionsInterface#maxOpenFiles()} to be * set to -1. * * Enabled only for level compaction for now. * * Default: 0 (disabled) * * Dynamically changeable through * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)}. * * @param ttl the time-to-live. * * @return the reference to the current options. */ T setTtl(final long ttl); /** * Get the TTL for Non-bottom-level files that will go through the compaction * process. * * See {@link #setTtl(long)}. * * @return the time-to-live. */ long ttl(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/BackupEngine.java000066400000000000000000000240321370372246700242370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; /** * BackupEngine allows you to backup * and restore the database * * Be aware, that `new BackupEngine` takes time proportional to the amount * of backups. So if you have a slow filesystem to backup (like HDFS) * and you have a lot of backups then restoring can take some time. * That's why we recommend to limit the number of backups. * Also we recommend to keep BackupEngine alive and not to recreate it every * time you need to do a backup. */ public class BackupEngine extends RocksObject implements AutoCloseable { protected BackupEngine(final long nativeHandle) { super(nativeHandle); } /** * Opens a new Backup Engine * * @param env The environment that the backup engine should operate within * @param options Any options for the backup engine * * @return A new BackupEngine instance * @throws RocksDBException thrown if the backup engine could not be opened */ public static BackupEngine open(final Env env, final BackupableDBOptions options) throws RocksDBException { return new BackupEngine(open(env.nativeHandle_, options.nativeHandle_)); } /** * Captures the state of the database in the latest backup * * Just a convenience for {@link #createNewBackup(RocksDB, boolean)} with * the flushBeforeBackup parameter set to false * * @param db The database to backup * * Note - This method is not thread safe * * @throws RocksDBException thrown if a new backup could not be created */ public void createNewBackup(final RocksDB db) throws RocksDBException { createNewBackup(db, false); } /** * Captures the state of the database in the latest backup * * @param db The database to backup * @param flushBeforeBackup When true, the Backup Engine will first issue a * memtable flush and only then copy the DB files to * the backup directory. Doing so will prevent log * files from being copied to the backup directory * (since flush will delete them). * When false, the Backup Engine will not issue a * flush before starting the backup. In that case, * the backup will also include log files * corresponding to live memtables. If writes have * been performed with the write ahead log disabled, * set flushBeforeBackup to true to prevent those * writes from being lost. Otherwise, the backup will * always be consistent with the current state of the * database regardless of the flushBeforeBackup * parameter. * * Note - This method is not thread safe * * @throws RocksDBException thrown if a new backup could not be created */ public void createNewBackup( final RocksDB db, final boolean flushBeforeBackup) throws RocksDBException { assert (isOwningHandle()); createNewBackup(nativeHandle_, db.nativeHandle_, flushBeforeBackup); } /** * Captures the state of the database in the latest backup along with * application specific metadata. * * @param db The database to backup * @param metadata Application metadata * @param flushBeforeBackup When true, the Backup Engine will first issue a * memtable flush and only then copy the DB files to * the backup directory. Doing so will prevent log * files from being copied to the backup directory * (since flush will delete them). * When false, the Backup Engine will not issue a * flush before starting the backup. In that case, * the backup will also include log files * corresponding to live memtables. If writes have * been performed with the write ahead log disabled, * set flushBeforeBackup to true to prevent those * writes from being lost. Otherwise, the backup will * always be consistent with the current state of the * database regardless of the flushBeforeBackup * parameter. * * Note - This method is not thread safe * * @throws RocksDBException thrown if a new backup could not be created */ public void createNewBackupWithMetadata(final RocksDB db, final String metadata, final boolean flushBeforeBackup) throws RocksDBException { assert (isOwningHandle()); createNewBackupWithMetadata(nativeHandle_, db.nativeHandle_, metadata, flushBeforeBackup); } /** * Gets information about the available * backups * * @return A list of information about each available backup */ public List getBackupInfo() { assert (isOwningHandle()); return getBackupInfo(nativeHandle_); } /** *

Returns a list of corrupted backup ids. If there * is no corrupted backup the method will return an * empty list.

* * @return array of backup ids as int ids. */ public int[] getCorruptedBackups() { assert(isOwningHandle()); return getCorruptedBackups(nativeHandle_); } /** *

Will delete all the files we don't need anymore. It will * do the full scan of the files/ directory and delete all the * files that are not referenced.

* * @throws RocksDBException thrown if error happens in underlying * native library. */ public void garbageCollect() throws RocksDBException { assert(isOwningHandle()); garbageCollect(nativeHandle_); } /** * Deletes old backups, keeping just the latest numBackupsToKeep * * @param numBackupsToKeep The latest n backups to keep * * @throws RocksDBException thrown if the old backups could not be deleted */ public void purgeOldBackups( final int numBackupsToKeep) throws RocksDBException { assert (isOwningHandle()); purgeOldBackups(nativeHandle_, numBackupsToKeep); } /** * Deletes a backup * * @param backupId The id of the backup to delete * * @throws RocksDBException thrown if the backup could not be deleted */ public void deleteBackup(final int backupId) throws RocksDBException { assert (isOwningHandle()); deleteBackup(nativeHandle_, backupId); } /** * Restore the database from a backup * * IMPORTANT: if options.share_table_files == true and you restore the DB * from some backup that is not the latest, and you start creating new * backups from the new DB, they will probably fail! * * Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3. * If you add new data to the DB and try creating a new backup now, the * database will diverge from backups 4 and 5 and the new backup will fail. * If you want to create new backup, you will first have to delete backups 4 * and 5. * * @param backupId The id of the backup to restore * @param dbDir The directory to restore the backup to, i.e. where your * database is * @param walDir The location of the log files for your database, * often the same as dbDir * @param restoreOptions Options for controlling the restore * * @throws RocksDBException thrown if the database could not be restored */ public void restoreDbFromBackup( final int backupId, final String dbDir, final String walDir, final RestoreOptions restoreOptions) throws RocksDBException { assert (isOwningHandle()); restoreDbFromBackup(nativeHandle_, backupId, dbDir, walDir, restoreOptions.nativeHandle_); } /** * Restore the database from the latest backup * * @param dbDir The directory to restore the backup to, i.e. where your * database is * @param walDir The location of the log files for your database, often the * same as dbDir * @param restoreOptions Options for controlling the restore * * @throws RocksDBException thrown if the database could not be restored */ public void restoreDbFromLatestBackup( final String dbDir, final String walDir, final RestoreOptions restoreOptions) throws RocksDBException { assert (isOwningHandle()); restoreDbFromLatestBackup(nativeHandle_, dbDir, walDir, restoreOptions.nativeHandle_); } private native static long open(final long env, final long backupableDbOptions) throws RocksDBException; private native void createNewBackup(final long handle, final long dbHandle, final boolean flushBeforeBackup) throws RocksDBException; private native void createNewBackupWithMetadata(final long handle, final long dbHandle, final String metadata, final boolean flushBeforeBackup) throws RocksDBException; private native List getBackupInfo(final long handle); private native int[] getCorruptedBackups(final long handle); private native void garbageCollect(final long handle) throws RocksDBException; private native void purgeOldBackups(final long handle, final int numBackupsToKeep) throws RocksDBException; private native void deleteBackup(final long handle, final int backupId) throws RocksDBException; private native void restoreDbFromBackup(final long handle, final int backupId, final String dbDir, final String walDir, final long restoreOptionsHandle) throws RocksDBException; private native void restoreDbFromLatestBackup(final long handle, final String dbDir, final String walDir, final long restoreOptionsHandle) throws RocksDBException; @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/BackupInfo.java000066400000000000000000000032361370372246700237300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Instances of this class describe a Backup made by * {@link org.rocksdb.BackupEngine}. */ public class BackupInfo { /** * Package private constructor used to create instances * of BackupInfo by {@link org.rocksdb.BackupEngine} * * @param backupId id of backup * @param timestamp timestamp of backup * @param size size of backup * @param numberFiles number of files related to this backup. */ BackupInfo(final int backupId, final long timestamp, final long size, final int numberFiles, final String app_metadata) { backupId_ = backupId; timestamp_ = timestamp; size_ = size; numberFiles_ = numberFiles; app_metadata_ = app_metadata; } /** * * @return the backup id. */ public int backupId() { return backupId_; } /** * * @return the timestamp of the backup. */ public long timestamp() { return timestamp_; } /** * * @return the size of the backup */ public long size() { return size_; } /** * * @return the number of files of this backup. */ public int numberFiles() { return numberFiles_; } /** * * @return the associated application metadata, or null */ public String appMetadata() { return app_metadata_; } private int backupId_; private long timestamp_; private long size_; private int numberFiles_; private String app_metadata_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/BackupableDBOptions.java000066400000000000000000000354371370372246700255320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.io.File; /** *

BackupableDBOptions to control the behavior of a backupable database. * It will be used during the creation of a {@link org.rocksdb.BackupEngine}. *

*

Note that dispose() must be called before an Options instance * become out-of-scope to release the allocated memory in c++.

* * @see org.rocksdb.BackupEngine */ public class BackupableDBOptions extends RocksObject { private Env backupEnv = null; private Logger infoLog = null; private RateLimiter backupRateLimiter = null; private RateLimiter restoreRateLimiter = null; /** *

BackupableDBOptions constructor.

* * @param path Where to keep the backup files. Has to be different than db * name. Best to set this to {@code db name_ + "/backups"} * @throws java.lang.IllegalArgumentException if illegal path is used. */ public BackupableDBOptions(final String path) { super(newBackupableDBOptions(ensureWritableFile(path))); } private static String ensureWritableFile(final String path) { final File backupPath = path == null ? null : new File(path); if (backupPath == null || !backupPath.isDirectory() || !backupPath.canWrite()) { throw new IllegalArgumentException("Illegal path provided."); } else { return path; } } /** *

Returns the path to the BackupableDB directory.

* * @return the path to the BackupableDB directory. */ public String backupDir() { assert(isOwningHandle()); return backupDir(nativeHandle_); } /** * Backup Env object. It will be used for backup file I/O. If it's * null, backups will be written out using DBs Env. Otherwise * backup's I/O will be performed using this object. * * If you want to have backups on HDFS, use HDFS Env here! * * Default: null * * @param env The environment to use * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setBackupEnv(final Env env) { assert(isOwningHandle()); setBackupEnv(nativeHandle_, env.nativeHandle_); this.backupEnv = env; return this; } /** * Backup Env object. It will be used for backup file I/O. If it's * null, backups will be written out using DBs Env. Otherwise * backup's I/O will be performed using this object. * * If you want to have backups on HDFS, use HDFS Env here! * * Default: null * * @return The environment in use */ public Env backupEnv() { return this.backupEnv; } /** *

Share table files between backups.

* * @param shareTableFiles If {@code share_table_files == true}, backup will * assume that table files with same name have the same contents. This * enables incremental backups and avoids unnecessary data copies. If * {@code share_table_files == false}, each backup will be on its own and * will not share any data with other backups. * *

Default: true

* * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setShareTableFiles(final boolean shareTableFiles) { assert(isOwningHandle()); setShareTableFiles(nativeHandle_, shareTableFiles); return this; } /** *

Share table files between backups.

* * @return boolean value indicating if SST files will be shared between * backups. */ public boolean shareTableFiles() { assert(isOwningHandle()); return shareTableFiles(nativeHandle_); } /** * Set the logger to use for Backup info and error messages * * @param logger The logger to use for the backup * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setInfoLog(final Logger logger) { assert(isOwningHandle()); setInfoLog(nativeHandle_, logger.nativeHandle_); this.infoLog = logger; return this; } /** * Set the logger to use for Backup info and error messages * * Default: null * * @return The logger in use for the backup */ public Logger infoLog() { return this.infoLog; } /** *

Set synchronous backups.

* * @param sync If {@code sync == true}, we can guarantee you'll get consistent * backup even on a machine crash/reboot. Backup process is slower with sync * enabled. If {@code sync == false}, we don't guarantee anything on machine * reboot. However, chances are some of the backups are consistent. * *

Default: true

* * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setSync(final boolean sync) { assert(isOwningHandle()); setSync(nativeHandle_, sync); return this; } /** *

Are synchronous backups activated.

* * @return boolean value if synchronous backups are configured. */ public boolean sync() { assert(isOwningHandle()); return sync(nativeHandle_); } /** *

Set if old data will be destroyed.

* * @param destroyOldData If true, it will delete whatever backups there are * already. * *

Default: false

* * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setDestroyOldData(final boolean destroyOldData) { assert(isOwningHandle()); setDestroyOldData(nativeHandle_, destroyOldData); return this; } /** *

Returns if old data will be destroyed will performing new backups.

* * @return boolean value indicating if old data will be destroyed. */ public boolean destroyOldData() { assert(isOwningHandle()); return destroyOldData(nativeHandle_); } /** *

Set if log files shall be persisted.

* * @param backupLogFiles If false, we won't backup log files. This option can * be useful for backing up in-memory databases where log file are * persisted, but table files are in memory. * *

Default: true

* * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setBackupLogFiles(final boolean backupLogFiles) { assert(isOwningHandle()); setBackupLogFiles(nativeHandle_, backupLogFiles); return this; } /** *

Return information if log files shall be persisted.

* * @return boolean value indicating if log files will be persisted. */ public boolean backupLogFiles() { assert(isOwningHandle()); return backupLogFiles(nativeHandle_); } /** *

Set backup rate limit.

* * @param backupRateLimit Max bytes that can be transferred in a second during * backup. If 0 or negative, then go as fast as you can. * *

Default: 0

* * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setBackupRateLimit(long backupRateLimit) { assert(isOwningHandle()); backupRateLimit = (backupRateLimit <= 0) ? 0 : backupRateLimit; setBackupRateLimit(nativeHandle_, backupRateLimit); return this; } /** *

Return backup rate limit which described the max bytes that can be * transferred in a second during backup.

* * @return numerical value describing the backup transfer limit in bytes per * second. */ public long backupRateLimit() { assert(isOwningHandle()); return backupRateLimit(nativeHandle_); } /** * Backup rate limiter. Used to control transfer speed for backup. If this is * not null, {@link #backupRateLimit()} is ignored. * * Default: null * * @param backupRateLimiter The rate limiter to use for the backup * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setBackupRateLimiter(final RateLimiter backupRateLimiter) { assert(isOwningHandle()); setBackupRateLimiter(nativeHandle_, backupRateLimiter.nativeHandle_); this.backupRateLimiter = backupRateLimiter; return this; } /** * Backup rate limiter. Used to control transfer speed for backup. If this is * not null, {@link #backupRateLimit()} is ignored. * * Default: null * * @return The rate limiter in use for the backup */ public RateLimiter backupRateLimiter() { assert(isOwningHandle()); return this.backupRateLimiter; } /** *

Set restore rate limit.

* * @param restoreRateLimit Max bytes that can be transferred in a second * during restore. If 0 or negative, then go as fast as you can. * *

Default: 0

* * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setRestoreRateLimit(long restoreRateLimit) { assert(isOwningHandle()); restoreRateLimit = (restoreRateLimit <= 0) ? 0 : restoreRateLimit; setRestoreRateLimit(nativeHandle_, restoreRateLimit); return this; } /** *

Return restore rate limit which described the max bytes that can be * transferred in a second during restore.

* * @return numerical value describing the restore transfer limit in bytes per * second. */ public long restoreRateLimit() { assert(isOwningHandle()); return restoreRateLimit(nativeHandle_); } /** * Restore rate limiter. Used to control transfer speed during restore. If * this is not null, {@link #restoreRateLimit()} is ignored. * * Default: null * * @param restoreRateLimiter The rate limiter to use during restore * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setRestoreRateLimiter(final RateLimiter restoreRateLimiter) { assert(isOwningHandle()); setRestoreRateLimiter(nativeHandle_, restoreRateLimiter.nativeHandle_); this.restoreRateLimiter = restoreRateLimiter; return this; } /** * Restore rate limiter. Used to control transfer speed during restore. If * this is not null, {@link #restoreRateLimit()} is ignored. * * Default: null * * @return The rate limiter in use during restore */ public RateLimiter restoreRateLimiter() { assert(isOwningHandle()); return this.restoreRateLimiter; } /** *

Only used if share_table_files is set to true. If true, will consider * that backups can come from different databases, hence a sst is not uniquely * identified by its name, but by the triple (file name, crc32, file length) *

* * @param shareFilesWithChecksum boolean value indicating if SST files are * stored using the triple (file name, crc32, file length) and not its name. * *

Note: this is an experimental option, and you'll need to set it manually * turn it on only if you know what you're doing*

* *

Default: false

* * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setShareFilesWithChecksum( final boolean shareFilesWithChecksum) { assert(isOwningHandle()); setShareFilesWithChecksum(nativeHandle_, shareFilesWithChecksum); return this; } /** *

Return of share files with checksum is active.

* * @return boolean value indicating if share files with checksum * is active. */ public boolean shareFilesWithChecksum() { assert(isOwningHandle()); return shareFilesWithChecksum(nativeHandle_); } /** * Up to this many background threads will copy files for * {@link BackupEngine#createNewBackup(RocksDB, boolean)} and * {@link BackupEngine#restoreDbFromBackup(int, String, String, RestoreOptions)} * * Default: 1 * * @param maxBackgroundOperations The maximum number of background threads * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setMaxBackgroundOperations( final int maxBackgroundOperations) { assert(isOwningHandle()); setMaxBackgroundOperations(nativeHandle_, maxBackgroundOperations); return this; } /** * Up to this many background threads will copy files for * {@link BackupEngine#createNewBackup(RocksDB, boolean)} and * {@link BackupEngine#restoreDbFromBackup(int, String, String, RestoreOptions)} * * Default: 1 * * @return The maximum number of background threads */ public int maxBackgroundOperations() { assert(isOwningHandle()); return maxBackgroundOperations(nativeHandle_); } /** * During backup user can get callback every time next * {@link #callbackTriggerIntervalSize()} bytes being copied. * * Default: 4194304 * * @param callbackTriggerIntervalSize The interval size for the * callback trigger * @return instance of current BackupableDBOptions. */ public BackupableDBOptions setCallbackTriggerIntervalSize( final long callbackTriggerIntervalSize) { assert(isOwningHandle()); setCallbackTriggerIntervalSize(nativeHandle_, callbackTriggerIntervalSize); return this; } /** * During backup user can get callback every time next * {@link #callbackTriggerIntervalSize()} bytes being copied. * * Default: 4194304 * * @return The interval size for the callback trigger */ public long callbackTriggerIntervalSize() { assert(isOwningHandle()); return callbackTriggerIntervalSize(nativeHandle_); } private native static long newBackupableDBOptions(final String path); private native String backupDir(long handle); private native void setBackupEnv(final long handle, final long envHandle); private native void setShareTableFiles(long handle, boolean flag); private native boolean shareTableFiles(long handle); private native void setInfoLog(final long handle, final long infoLogHandle); private native void setSync(long handle, boolean flag); private native boolean sync(long handle); private native void setDestroyOldData(long handle, boolean flag); private native boolean destroyOldData(long handle); private native void setBackupLogFiles(long handle, boolean flag); private native boolean backupLogFiles(long handle); private native void setBackupRateLimit(long handle, long rateLimit); private native long backupRateLimit(long handle); private native void setBackupRateLimiter(long handle, long rateLimiterHandle); private native void setRestoreRateLimit(long handle, long rateLimit); private native long restoreRateLimit(long handle); private native void setRestoreRateLimiter(final long handle, final long rateLimiterHandle); private native void setShareFilesWithChecksum(long handle, boolean flag); private native boolean shareFilesWithChecksum(long handle); private native void setMaxBackgroundOperations(final long handle, final int maxBackgroundOperations); private native int maxBackgroundOperations(final long handle); private native void setCallbackTriggerIntervalSize(final long handle, long callbackTriggerIntervalSize); private native long callbackTriggerIntervalSize(final long handle); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java000066400000000000000000000747601370372246700260100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The config for plain table sst format. * * BlockBasedTable is a RocksDB's default SST file format. */ //TODO(AR) should be renamed BlockBasedTableOptions public class BlockBasedTableConfig extends TableFormatConfig { public BlockBasedTableConfig() { //TODO(AR) flushBlockPolicyFactory cacheIndexAndFilterBlocks = false; cacheIndexAndFilterBlocksWithHighPriority = false; pinL0FilterAndIndexBlocksInCache = false; pinTopLevelIndexAndFilter = true; indexType = IndexType.kBinarySearch; dataBlockIndexType = DataBlockIndexType.kDataBlockBinarySearch; dataBlockHashTableUtilRatio = 0.75; checksumType = ChecksumType.kCRC32c; noBlockCache = false; blockCache = null; persistentCache = null; blockCacheCompressed = null; blockSize = 4 * 1024; blockSizeDeviation = 10; blockRestartInterval = 16; indexBlockRestartInterval = 1; metadataBlockSize = 4096; partitionFilters = false; useDeltaEncoding = true; filterPolicy = null; wholeKeyFiltering = true; verifyCompression = true; readAmpBytesPerBit = 0; formatVersion = 2; enableIndexCompression = true; blockAlign = false; // NOTE: ONLY used if blockCache == null blockCacheSize = 8 * 1024 * 1024; blockCacheNumShardBits = 0; // NOTE: ONLY used if blockCacheCompressed == null blockCacheCompressedSize = 0; blockCacheCompressedNumShardBits = 0; } /** * Indicating if we'd put index/filter blocks to the block cache. * If not specified, each "table reader" object will pre-load index/filter * block during table initialization. * * @return if index and filter blocks should be put in block cache. */ public boolean cacheIndexAndFilterBlocks() { return cacheIndexAndFilterBlocks; } /** * Indicating if we'd put index/filter blocks to the block cache. * If not specified, each "table reader" object will pre-load index/filter * block during table initialization. * * @param cacheIndexAndFilterBlocks and filter blocks should be put in block cache. * @return the reference to the current config. */ public BlockBasedTableConfig setCacheIndexAndFilterBlocks( final boolean cacheIndexAndFilterBlocks) { this.cacheIndexAndFilterBlocks = cacheIndexAndFilterBlocks; return this; } /** * Indicates if index and filter blocks will be treated as high-priority in the block cache. * See note below about applicability. If not specified, defaults to false. * * @return if index and filter blocks will be treated as high-priority. */ public boolean cacheIndexAndFilterBlocksWithHighPriority() { return cacheIndexAndFilterBlocksWithHighPriority; } /** * If true, cache index and filter blocks with high priority. If set to true, * depending on implementation of block cache, index and filter blocks may be * less likely to be evicted than data blocks. * * @param cacheIndexAndFilterBlocksWithHighPriority if index and filter blocks * will be treated as high-priority. * @return the reference to the current config. */ public BlockBasedTableConfig setCacheIndexAndFilterBlocksWithHighPriority( final boolean cacheIndexAndFilterBlocksWithHighPriority) { this.cacheIndexAndFilterBlocksWithHighPriority = cacheIndexAndFilterBlocksWithHighPriority; return this; } /** * Indicating if we'd like to pin L0 index/filter blocks to the block cache. If not specified, defaults to false. * * @return if L0 index and filter blocks should be pinned to the block cache. */ public boolean pinL0FilterAndIndexBlocksInCache() { return pinL0FilterAndIndexBlocksInCache; } /** * Indicating if we'd like to pin L0 index/filter blocks to the block cache. If not specified, defaults to false. * * @param pinL0FilterAndIndexBlocksInCache pin blocks in block cache * @return the reference to the current config. */ public BlockBasedTableConfig setPinL0FilterAndIndexBlocksInCache( final boolean pinL0FilterAndIndexBlocksInCache) { this.pinL0FilterAndIndexBlocksInCache = pinL0FilterAndIndexBlocksInCache; return this; } /** * Indicates if top-level index and filter blocks should be pinned. * * @return if top-level index and filter blocks should be pinned. */ public boolean pinTopLevelIndexAndFilter() { return pinTopLevelIndexAndFilter; } /** * If cacheIndexAndFilterBlocks is true and the below is true, then * the top-level index of partitioned filter and index blocks are stored in * the cache, but a reference is held in the "table reader" object so the * blocks are pinned and only evicted from cache when the table reader is * freed. This is not limited to l0 in LSM tree. * * @param pinTopLevelIndexAndFilter if top-level index and filter blocks should be pinned. * @return the reference to the current config. */ public BlockBasedTableConfig setPinTopLevelIndexAndFilter(final boolean pinTopLevelIndexAndFilter) { this.pinTopLevelIndexAndFilter = pinTopLevelIndexAndFilter; return this; } /** * Get the index type. * * @return the currently set index type */ public IndexType indexType() { return indexType; } /** * Sets the index type to used with this table. * * @param indexType {@link org.rocksdb.IndexType} value * @return the reference to the current option. */ public BlockBasedTableConfig setIndexType( final IndexType indexType) { this.indexType = indexType; return this; } /** * Get the data block index type. * * @return the currently set data block index type */ public DataBlockIndexType dataBlockIndexType() { return dataBlockIndexType; } /** * Sets the data block index type to used with this table. * * @param dataBlockIndexType {@link org.rocksdb.DataBlockIndexType} value * @return the reference to the current option. */ public BlockBasedTableConfig setDataBlockIndexType( final DataBlockIndexType dataBlockIndexType) { this.dataBlockIndexType = dataBlockIndexType; return this; } /** * Get the #entries/#buckets. It is valid only when {@link #dataBlockIndexType()} is * {@link DataBlockIndexType#kDataBlockBinaryAndHash}. * * @return the #entries/#buckets. */ public double dataBlockHashTableUtilRatio() { return dataBlockHashTableUtilRatio; } /** * Set the #entries/#buckets. It is valid only when {@link #dataBlockIndexType()} is * {@link DataBlockIndexType#kDataBlockBinaryAndHash}. * * @param dataBlockHashTableUtilRatio #entries/#buckets * @return the reference to the current option. */ public BlockBasedTableConfig setDataBlockHashTableUtilRatio( final double dataBlockHashTableUtilRatio) { this.dataBlockHashTableUtilRatio = dataBlockHashTableUtilRatio; return this; } /** * Get the checksum type to be used with this table. * * @return the currently set checksum type */ public ChecksumType checksumType() { return checksumType; } /** * Sets * * @param checksumType {@link org.rocksdb.ChecksumType} value. * @return the reference to the current option. */ public BlockBasedTableConfig setChecksumType( final ChecksumType checksumType) { this.checksumType = checksumType; return this; } /** * Determine if the block cache is disabled. * * @return if block cache is disabled */ public boolean noBlockCache() { return noBlockCache; } /** * Disable block cache. If this is set to true, * then no block cache should be used, and the {@link #setBlockCache(Cache)} * should point to a {@code null} object. * * Default: false * * @param noBlockCache if use block cache * @return the reference to the current config. */ public BlockBasedTableConfig setNoBlockCache(final boolean noBlockCache) { this.noBlockCache = noBlockCache; return this; } /** * Use the specified cache for blocks. * When not null this take precedence even if the user sets a block cache size. * * {@link org.rocksdb.Cache} should not be disposed before options instances * using this cache is disposed. * * {@link org.rocksdb.Cache} instance can be re-used in multiple options * instances. * * @param blockCache {@link org.rocksdb.Cache} Cache java instance * (e.g. LRUCache). * * @return the reference to the current config. */ public BlockBasedTableConfig setBlockCache(final Cache blockCache) { this.blockCache = blockCache; return this; } /** * Use the specified persistent cache. * * If {@code !null} use the specified cache for pages read from device, * otherwise no page cache is used. * * @param persistentCache the persistent cache * * @return the reference to the current config. */ public BlockBasedTableConfig setPersistentCache( final PersistentCache persistentCache) { this.persistentCache = persistentCache; return this; } /** * Use the specified cache for compressed blocks. * * If {@code null}, RocksDB will not use a compressed block cache. * * Note: though it looks similar to {@link #setBlockCache(Cache)}, RocksDB * doesn't put the same type of object there. * * {@link org.rocksdb.Cache} should not be disposed before options instances * using this cache is disposed. * * {@link org.rocksdb.Cache} instance can be re-used in multiple options * instances. * * @param blockCacheCompressed {@link org.rocksdb.Cache} Cache java instance * (e.g. LRUCache). * * @return the reference to the current config. */ public BlockBasedTableConfig setBlockCacheCompressed( final Cache blockCacheCompressed) { this.blockCacheCompressed = blockCacheCompressed; return this; } /** * Get the approximate size of user data packed per block. * * @return block size in bytes */ public long blockSize() { return blockSize; } /** * Approximate size of user data packed per block. Note that the * block size specified here corresponds to uncompressed data. The * actual size of the unit read from disk may be smaller if * compression is enabled. This parameter can be changed dynamically. * Default: 4K * * @param blockSize block size in bytes * @return the reference to the current config. */ public BlockBasedTableConfig setBlockSize(final long blockSize) { this.blockSize = blockSize; return this; } /** * @return the hash table ratio. */ public int blockSizeDeviation() { return blockSizeDeviation; } /** * This is used to close a block before it reaches the configured * {@link #blockSize()}. If the percentage of free space in the current block * is less than this specified number and adding a new record to the block * will exceed the configured block size, then this block will be closed and * the new record will be written to the next block. * * Default is 10. * * @param blockSizeDeviation the deviation to block size allowed * @return the reference to the current config. */ public BlockBasedTableConfig setBlockSizeDeviation( final int blockSizeDeviation) { this.blockSizeDeviation = blockSizeDeviation; return this; } /** * Get the block restart interval. * * @return block restart interval */ public int blockRestartInterval() { return blockRestartInterval; } /** * Set the block restart interval. * * @param restartInterval block restart interval. * @return the reference to the current config. */ public BlockBasedTableConfig setBlockRestartInterval( final int restartInterval) { blockRestartInterval = restartInterval; return this; } /** * Get the index block restart interval. * * @return index block restart interval */ public int indexBlockRestartInterval() { return indexBlockRestartInterval; } /** * Set the index block restart interval * * @param restartInterval index block restart interval. * @return the reference to the current config. */ public BlockBasedTableConfig setIndexBlockRestartInterval( final int restartInterval) { indexBlockRestartInterval = restartInterval; return this; } /** * Get the block size for partitioned metadata. * * @return block size for partitioned metadata. */ public long metadataBlockSize() { return metadataBlockSize; } /** * Set block size for partitioned metadata. * * @param metadataBlockSize Partitioned metadata block size. * @return the reference to the current config. */ public BlockBasedTableConfig setMetadataBlockSize( final long metadataBlockSize) { this.metadataBlockSize = metadataBlockSize; return this; } /** * Indicates if we're using partitioned filters. * * @return if we're using partition filters. */ public boolean partitionFilters() { return partitionFilters; } /** * Use partitioned full filters for each SST file. This option is incompatible * with block-based filters. * * Defaults to false. * * @param partitionFilters use partition filters. * @return the reference to the current config. */ public BlockBasedTableConfig setPartitionFilters(final boolean partitionFilters) { this.partitionFilters = partitionFilters; return this; } /** * Determine if delta encoding is being used to compress block keys. * * @return true if delta encoding is enabled, false otherwise. */ public boolean useDeltaEncoding() { return useDeltaEncoding; } /** * Use delta encoding to compress keys in blocks. * * NOTE: {@link ReadOptions#pinData()} requires this option to be disabled. * * Default: true * * @param useDeltaEncoding true to enable delta encoding * * @return the reference to the current config. */ public BlockBasedTableConfig setUseDeltaEncoding( final boolean useDeltaEncoding) { this.useDeltaEncoding = useDeltaEncoding; return this; } /** * Get the filter policy. * * @return the current filter policy. */ public Filter filterPolicy() { return filterPolicy; } /** * Use the specified filter policy to reduce disk reads. * * {@link org.rocksdb.Filter} should not be disposed before options instances * using this filter is disposed. If {@link Filter#dispose()} function is not * called, then filter object will be GC'd automatically. * * {@link org.rocksdb.Filter} instance can be re-used in multiple options * instances. * * @param filterPolicy {@link org.rocksdb.Filter} Filter Policy java instance. * @return the reference to the current config. */ public BlockBasedTableConfig setFilterPolicy( final Filter filterPolicy) { this.filterPolicy = filterPolicy; return this; } /** * Set the filter. * * @param filter the filter * @return the reference to the current config. * * @deprecated Use {@link #setFilterPolicy(Filter)} */ @Deprecated public BlockBasedTableConfig setFilter( final Filter filter) { return setFilterPolicy(filter); } /** * Determine if whole keys as opposed to prefixes are placed in the filter. * * @return if whole key filtering is enabled */ public boolean wholeKeyFiltering() { return wholeKeyFiltering; } /** * If true, place whole keys in the filter (not just prefixes). * This must generally be true for gets to be efficient. * Default: true * * @param wholeKeyFiltering if enable whole key filtering * @return the reference to the current config. */ public BlockBasedTableConfig setWholeKeyFiltering( final boolean wholeKeyFiltering) { this.wholeKeyFiltering = wholeKeyFiltering; return this; } /** * Returns true when compression verification is enabled. * * See {@link #setVerifyCompression(boolean)}. * * @return true if compression verification is enabled. */ public boolean verifyCompression() { return verifyCompression; } /** * Verify that decompressing the compressed block gives back the input. This * is a verification mode that we use to detect bugs in compression * algorithms. * * @param verifyCompression true to enable compression verification. * * @return the reference to the current config. */ public BlockBasedTableConfig setVerifyCompression( final boolean verifyCompression) { this.verifyCompression = verifyCompression; return this; } /** * Get the Read amplification bytes per-bit. * * See {@link #setReadAmpBytesPerBit(int)}. * * @return the bytes per-bit. */ public int readAmpBytesPerBit() { return readAmpBytesPerBit; } /** * Set the Read amplification bytes per-bit. * * If used, For every data block we load into memory, we will create a bitmap * of size ((block_size / `read_amp_bytes_per_bit`) / 8) bytes. This bitmap * will be used to figure out the percentage we actually read of the blocks. * * When this feature is used Tickers::READ_AMP_ESTIMATE_USEFUL_BYTES and * Tickers::READ_AMP_TOTAL_READ_BYTES can be used to calculate the * read amplification using this formula * (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES) * * value => memory usage (percentage of loaded blocks memory) * 1 => 12.50 % * 2 => 06.25 % * 4 => 03.12 % * 8 => 01.56 % * 16 => 00.78 % * * Note: This number must be a power of 2, if not it will be sanitized * to be the next lowest power of 2, for example a value of 7 will be * treated as 4, a value of 19 will be treated as 16. * * Default: 0 (disabled) * * @param readAmpBytesPerBit the bytes per-bit * * @return the reference to the current config. */ public BlockBasedTableConfig setReadAmpBytesPerBit(final int readAmpBytesPerBit) { this.readAmpBytesPerBit = readAmpBytesPerBit; return this; } /** * Get the format version. * See {@link #setFormatVersion(int)}. * * @return the currently configured format version. */ public int formatVersion() { return formatVersion; } /** *

We currently have five versions:

* *
    *
  • 0 - This version is currently written * out by all RocksDB's versions by default. Can be read by really old * RocksDB's. Doesn't support changing checksum (default is CRC32).
  • *
  • 1 - Can be read by RocksDB's versions since 3.0. * Supports non-default checksum, like xxHash. It is written by RocksDB when * BlockBasedTableOptions::checksum is something other than kCRC32c. (version * 0 is silently upconverted)
  • *
  • 2 - Can be read by RocksDB's versions since 3.10. * Changes the way we encode compressed blocks with LZ4, BZip2 and Zlib * compression. If you don't plan to run RocksDB before version 3.10, * you should probably use this.
  • *
  • 3 - Can be read by RocksDB's versions since 5.15. Changes the way we * encode the keys in index blocks. If you don't plan to run RocksDB before * version 5.15, you should probably use this. * This option only affects newly written tables. When reading existing * tables, the information about version is read from the footer.
  • *
  • 4 - Can be read by RocksDB's versions since 5.16. Changes the way we * encode the values in index blocks. If you don't plan to run RocksDB before * version 5.16 and you are using index_block_restart_interval > 1, you should * probably use this as it would reduce the index size.
  • *
*

This option only affects newly written tables. When reading existing * tables, the information about version is read from the footer.

* * @param formatVersion integer representing the version to be used. * * @return the reference to the current option. */ public BlockBasedTableConfig setFormatVersion( final int formatVersion) { assert(formatVersion >= 0 && formatVersion <= 4); this.formatVersion = formatVersion; return this; } /** * Determine if index compression is enabled. * * See {@link #setEnableIndexCompression(boolean)}. * * @return true if index compression is enabled, false otherwise */ public boolean enableIndexCompression() { return enableIndexCompression; } /** * Store index blocks on disk in compressed format. * * Changing this option to false will avoid the overhead of decompression * if index blocks are evicted and read back. * * @param enableIndexCompression true to enable index compression, * false to disable * * @return the reference to the current option. */ public BlockBasedTableConfig setEnableIndexCompression( final boolean enableIndexCompression) { this.enableIndexCompression = enableIndexCompression; return this; } /** * Determines whether data blocks are aligned on the lesser of page size * and block size. * * @return true if data blocks are aligned on the lesser of page size * and block size. */ public boolean blockAlign() { return blockAlign; } /** * Set whether data blocks should be aligned on the lesser of page size * and block size. * * @param blockAlign true to align data blocks on the lesser of page size * and block size. * * @return the reference to the current option. */ public BlockBasedTableConfig setBlockAlign(final boolean blockAlign) { this.blockAlign = blockAlign; return this; } /** * Get the size of the cache in bytes that will be used by RocksDB. * * @return block cache size in bytes */ @Deprecated public long blockCacheSize() { return blockCacheSize; } /** * Set the size of the cache in bytes that will be used by RocksDB. * If cacheSize is negative, then cache will not be used. * DEFAULT: 8M * * @param blockCacheSize block cache size in bytes * @return the reference to the current config. * * @deprecated Use {@link #setBlockCache(Cache)}. */ @Deprecated public BlockBasedTableConfig setBlockCacheSize(final long blockCacheSize) { this.blockCacheSize = blockCacheSize; return this; } /** * Returns the number of shard bits used in the block cache. * The resulting number of shards would be 2 ^ (returned value). * Any negative number means use default settings. * * @return the number of shard bits used in the block cache. */ @Deprecated public int cacheNumShardBits() { return blockCacheNumShardBits; } /** * Controls the number of shards for the block cache. * This is applied only if cacheSize is set to non-negative. * * @param blockCacheNumShardBits the number of shard bits. The resulting * number of shards would be 2 ^ numShardBits. Any negative * number means use default settings." * @return the reference to the current option. * * @deprecated Use {@link #setBlockCache(Cache)}. */ @Deprecated public BlockBasedTableConfig setCacheNumShardBits( final int blockCacheNumShardBits) { this.blockCacheNumShardBits = blockCacheNumShardBits; return this; } /** * Size of compressed block cache. If 0, then block_cache_compressed is set * to null. * * @return size of compressed block cache. */ @Deprecated public long blockCacheCompressedSize() { return blockCacheCompressedSize; } /** * Size of compressed block cache. If 0, then block_cache_compressed is set * to null. * * @param blockCacheCompressedSize of compressed block cache. * @return the reference to the current config. * * @deprecated Use {@link #setBlockCacheCompressed(Cache)}. */ @Deprecated public BlockBasedTableConfig setBlockCacheCompressedSize( final long blockCacheCompressedSize) { this.blockCacheCompressedSize = blockCacheCompressedSize; return this; } /** * Controls the number of shards for the block compressed cache. * This is applied only if blockCompressedCacheSize is set to non-negative. * * @return numShardBits the number of shard bits. The resulting * number of shards would be 2 ^ numShardBits. Any negative * number means use default settings. */ @Deprecated public int blockCacheCompressedNumShardBits() { return blockCacheCompressedNumShardBits; } /** * Controls the number of shards for the block compressed cache. * This is applied only if blockCompressedCacheSize is set to non-negative. * * @param blockCacheCompressedNumShardBits the number of shard bits. The resulting * number of shards would be 2 ^ numShardBits. Any negative * number means use default settings." * @return the reference to the current option. * * @deprecated Use {@link #setBlockCacheCompressed(Cache)}. */ @Deprecated public BlockBasedTableConfig setBlockCacheCompressedNumShardBits( final int blockCacheCompressedNumShardBits) { this.blockCacheCompressedNumShardBits = blockCacheCompressedNumShardBits; return this; } /** * Influence the behavior when kHashSearch is used. * if false, stores a precise prefix to block range mapping * if true, does not store prefix and allows prefix hash collision * (less memory consumption) * * @return if hash collisions should be allowed. * * @deprecated This option is now deprecated. No matter what value it * is set to, it will behave as * if {@link #hashIndexAllowCollision()} == true. */ @Deprecated public boolean hashIndexAllowCollision() { return true; } /** * Influence the behavior when kHashSearch is used. * if false, stores a precise prefix to block range mapping * if true, does not store prefix and allows prefix hash collision * (less memory consumption) * * @param hashIndexAllowCollision points out if hash collisions should be allowed. * * @return the reference to the current config. * * @deprecated This option is now deprecated. No matter what value it * is set to, it will behave as * if {@link #hashIndexAllowCollision()} == true. */ @Deprecated public BlockBasedTableConfig setHashIndexAllowCollision( final boolean hashIndexAllowCollision) { // no-op return this; } @Override protected long newTableFactoryHandle() { final long filterPolicyHandle; if (filterPolicy != null) { filterPolicyHandle = filterPolicy.nativeHandle_; } else { filterPolicyHandle = 0; } final long blockCacheHandle; if (blockCache != null) { blockCacheHandle = blockCache.nativeHandle_; } else { blockCacheHandle = 0; } final long persistentCacheHandle; if (persistentCache != null) { persistentCacheHandle = persistentCache.nativeHandle_; } else { persistentCacheHandle = 0; } final long blockCacheCompressedHandle; if (blockCacheCompressed != null) { blockCacheCompressedHandle = blockCacheCompressed.nativeHandle_; } else { blockCacheCompressedHandle = 0; } return newTableFactoryHandle(cacheIndexAndFilterBlocks, cacheIndexAndFilterBlocksWithHighPriority, pinL0FilterAndIndexBlocksInCache, pinTopLevelIndexAndFilter, indexType.getValue(), dataBlockIndexType.getValue(), dataBlockHashTableUtilRatio, checksumType.getValue(), noBlockCache, blockCacheHandle, persistentCacheHandle, blockCacheCompressedHandle, blockSize, blockSizeDeviation, blockRestartInterval, indexBlockRestartInterval, metadataBlockSize, partitionFilters, useDeltaEncoding, filterPolicyHandle, wholeKeyFiltering, verifyCompression, readAmpBytesPerBit, formatVersion, enableIndexCompression, blockAlign, blockCacheSize, blockCacheNumShardBits, blockCacheCompressedSize, blockCacheCompressedNumShardBits); } private native long newTableFactoryHandle( final boolean cacheIndexAndFilterBlocks, final boolean cacheIndexAndFilterBlocksWithHighPriority, final boolean pinL0FilterAndIndexBlocksInCache, final boolean pinTopLevelIndexAndFilter, final byte indexTypeValue, final byte dataBlockIndexTypeValue, final double dataBlockHashTableUtilRatio, final byte checksumTypeValue, final boolean noBlockCache, final long blockCacheHandle, final long persistentCacheHandle, final long blockCacheCompressedHandle, final long blockSize, final int blockSizeDeviation, final int blockRestartInterval, final int indexBlockRestartInterval, final long metadataBlockSize, final boolean partitionFilters, final boolean useDeltaEncoding, final long filterPolicyHandle, final boolean wholeKeyFiltering, final boolean verifyCompression, final int readAmpBytesPerBit, final int formatVersion, final boolean enableIndexCompression, final boolean blockAlign, @Deprecated final long blockCacheSize, @Deprecated final int blockCacheNumShardBits, @Deprecated final long blockCacheCompressedSize, @Deprecated final int blockCacheCompressedNumShardBits ); //TODO(AR) flushBlockPolicyFactory private boolean cacheIndexAndFilterBlocks; private boolean cacheIndexAndFilterBlocksWithHighPriority; private boolean pinL0FilterAndIndexBlocksInCache; private boolean pinTopLevelIndexAndFilter; private IndexType indexType; private DataBlockIndexType dataBlockIndexType; private double dataBlockHashTableUtilRatio; private ChecksumType checksumType; private boolean noBlockCache; private Cache blockCache; private PersistentCache persistentCache; private Cache blockCacheCompressed; private long blockSize; private int blockSizeDeviation; private int blockRestartInterval; private int indexBlockRestartInterval; private long metadataBlockSize; private boolean partitionFilters; private boolean useDeltaEncoding; private Filter filterPolicy; private boolean wholeKeyFiltering; private boolean verifyCompression; private int readAmpBytesPerBit; private int formatVersion; private boolean enableIndexCompression; private boolean blockAlign; // NOTE: ONLY used if blockCache == null @Deprecated private long blockCacheSize; @Deprecated private int blockCacheNumShardBits; // NOTE: ONLY used if blockCacheCompressed == null @Deprecated private long blockCacheCompressedSize; @Deprecated private int blockCacheCompressedNumShardBits; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/BloomFilter.java000066400000000000000000000051671370372246700241320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Bloom filter policy that uses a bloom filter with approximately * the specified number of bits per key. * *

* Note: if you are using a custom comparator that ignores some parts * of the keys being compared, you must not use this {@code BloomFilter} * and must provide your own FilterPolicy that also ignores the * corresponding parts of the keys. For example, if the comparator * ignores trailing spaces, it would be incorrect to use a * FilterPolicy (like {@code BloomFilter}) that does not ignore * trailing spaces in keys.

*/ public class BloomFilter extends Filter { private static final double DEFAULT_BITS_PER_KEY = 10.0; private static final boolean DEFAULT_MODE = true; /** * BloomFilter constructor * *

* Callers must delete the result after any database that is using the * result has been closed.

*/ public BloomFilter() { this(DEFAULT_BITS_PER_KEY, DEFAULT_MODE); } /** * BloomFilter constructor * *

* bits_per_key: bits per key in bloom filter. A good value for bits_per_key * is 9.9, which yields a filter with ~ 1% false positive rate. *

*

* Callers must delete the result after any database that is using the * result has been closed.

* * @param bitsPerKey number of bits to use */ public BloomFilter(final double bitsPerKey) { this(bitsPerKey, DEFAULT_MODE); } /** * BloomFilter constructor * *

* bits_per_key: bits per key in bloom filter. A good value for bits_per_key * is 10, which yields a filter with ~ 1% false positive rate. *

default bits_per_key: 10

* *

use_block_based_builder: use block based filter rather than full filter. * If you want to builder full filter, it needs to be set to false. *

*

default mode: block based filter

*

* Callers must delete the result after any database that is using the * result has been closed.

* * @param bitsPerKey number of bits to use * @param useBlockBasedMode use block based mode or full filter mode */ public BloomFilter(final double bitsPerKey, final boolean useBlockBasedMode) { super(createNewBloomFilter(bitsPerKey, useBlockBasedMode)); } private native static long createNewBloomFilter(final double bitsKeyKey, final boolean useBlockBasedMode); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/BuiltinComparator.java000066400000000000000000000011361370372246700253420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Builtin RocksDB comparators * *
    *
  1. BYTEWISE_COMPARATOR - Sorts all keys in ascending bytewise * order.
  2. *
  3. REVERSE_BYTEWISE_COMPARATOR - Sorts all keys in descending bytewise * order
  4. *
*/ public enum BuiltinComparator { BYTEWISE_COMPARATOR, REVERSE_BYTEWISE_COMPARATOR } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Cache.java000066400000000000000000000006331370372246700227100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public abstract class Cache extends RocksObject { protected Cache(final long nativeHandle) { super(nativeHandle); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java000066400000000000000000000014141370372246700267650ustar00rootroot00000000000000// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Just a Java wrapper around CassandraCompactionFilter implemented in C++ */ public class CassandraCompactionFilter extends AbstractCompactionFilter { public CassandraCompactionFilter(boolean purgeTtlOnExpiration, int gcGracePeriodInSeconds) { super(createNewCassandraCompactionFilter0(purgeTtlOnExpiration, gcGracePeriodInSeconds)); } private native static long createNewCassandraCompactionFilter0( boolean purgeTtlOnExpiration, int gcGracePeriodInSeconds); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java000066400000000000000000000017371370372246700273030ustar00rootroot00000000000000// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * CassandraValueMergeOperator is a merge operator that merges two cassandra wide column * values. */ public class CassandraValueMergeOperator extends MergeOperator { public CassandraValueMergeOperator(int gcGracePeriodInSeconds) { super(newSharedCassandraValueMergeOperator(gcGracePeriodInSeconds, 0)); } public CassandraValueMergeOperator(int gcGracePeriodInSeconds, int operandsLimit) { super(newSharedCassandraValueMergeOperator(gcGracePeriodInSeconds, operandsLimit)); } private native static long newSharedCassandraValueMergeOperator( int gcGracePeriodInSeconds, int limit); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Checkpoint.java000066400000000000000000000042531370372246700237760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Provides Checkpoint functionality. Checkpoints * provide persistent snapshots of RocksDB databases. */ public class Checkpoint extends RocksObject { /** * Creates a Checkpoint object to be used for creating open-able * snapshots. * * @param db {@link RocksDB} instance. * @return a Checkpoint instance. * * @throws java.lang.IllegalArgumentException if {@link RocksDB} * instance is null. * @throws java.lang.IllegalStateException if {@link RocksDB} * instance is not initialized. */ public static Checkpoint create(final RocksDB db) { if (db == null) { throw new IllegalArgumentException( "RocksDB instance shall not be null."); } else if (!db.isOwningHandle()) { throw new IllegalStateException( "RocksDB instance must be initialized."); } Checkpoint checkpoint = new Checkpoint(db); return checkpoint; } /** *

Builds an open-able snapshot of RocksDB on the same disk, which * accepts an output directory on the same disk, and under the directory * (1) hard-linked SST files pointing to existing live SST files * (2) a copied manifest files and other files

* * @param checkpointPath path to the folder where the snapshot is going * to be stored. * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void createCheckpoint(final String checkpointPath) throws RocksDBException { createCheckpoint(nativeHandle_, checkpointPath); } private Checkpoint(final RocksDB db) { super(newCheckpoint(db.nativeHandle_)); this.db_ = db; } private final RocksDB db_; private static native long newCheckpoint(long dbHandle); @Override protected final native void disposeInternal(final long handle); private native void createCheckpoint(long handle, String checkpointPath) throws RocksDBException; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ChecksumType.java000066400000000000000000000014261370372246700243120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Checksum types used in conjunction with BlockBasedTable. */ public enum ChecksumType { /** * Not implemented yet. */ kNoChecksum((byte) 0), /** * CRC32 Checksum */ kCRC32c((byte) 1), /** * XX Hash */ kxxHash((byte) 2); /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value_; } private ChecksumType(byte value) { value_ = value; } private final byte value_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ClockCache.java000066400000000000000000000045241370372246700236670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Similar to {@link LRUCache}, but based on the CLOCK algorithm with * better concurrent performance in some cases */ public class ClockCache extends Cache { /** * Create a new cache with a fixed size capacity. * * @param capacity The fixed size capacity of the cache */ public ClockCache(final long capacity) { super(newClockCache(capacity, -1, false)); } /** * Create a new cache with a fixed size capacity. The cache is sharded * to 2^numShardBits shards, by hash of the key. The total capacity * is divided and evenly assigned to each shard. * numShardBits = -1 means it is automatically determined: every shard * will be at least 512KB and number of shard bits will not exceed 6. * * @param capacity The fixed size capacity of the cache * @param numShardBits The cache is sharded to 2^numShardBits shards, * by hash of the key */ public ClockCache(final long capacity, final int numShardBits) { super(newClockCache(capacity, numShardBits, false)); } /** * Create a new cache with a fixed size capacity. The cache is sharded * to 2^numShardBits shards, by hash of the key. The total capacity * is divided and evenly assigned to each shard. If strictCapacityLimit * is set, insert to the cache will fail when cache is full. * numShardBits = -1 means it is automatically determined: every shard * will be at least 512KB and number of shard bits will not exceed 6. * * @param capacity The fixed size capacity of the cache * @param numShardBits The cache is sharded to 2^numShardBits shards, * by hash of the key * @param strictCapacityLimit insert to the cache will fail when cache is full */ public ClockCache(final long capacity, final int numShardBits, final boolean strictCapacityLimit) { super(newClockCache(capacity, numShardBits, strictCapacityLimit)); } private native static long newClockCache(final long capacity, final int numShardBits, final boolean strictCapacityLimit); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java000066400000000000000000000054151370372246700263460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; /** *

Describes a column family with a * name and respective Options.

*/ public class ColumnFamilyDescriptor { /** *

Creates a new Column Family using a name and default * options,

* * @param columnFamilyName name of column family. * @since 3.10.0 */ public ColumnFamilyDescriptor(final byte[] columnFamilyName) { this(columnFamilyName, new ColumnFamilyOptions()); } /** *

Creates a new Column Family using a name and custom * options.

* * @param columnFamilyName name of column family. * @param columnFamilyOptions options to be used with * column family. * @since 3.10.0 */ public ColumnFamilyDescriptor(final byte[] columnFamilyName, final ColumnFamilyOptions columnFamilyOptions) { columnFamilyName_ = columnFamilyName; columnFamilyOptions_ = columnFamilyOptions; } /** * Retrieve name of column family. * * @return column family name. * @since 3.10.0 */ public byte[] getName() { return columnFamilyName_; } /** * Retrieve name of column family. * * @return column family name. * @since 3.10.0 * * @deprecated Use {@link #getName()} instead. */ @Deprecated public byte[] columnFamilyName() { return getName(); } /** * Retrieve assigned options instance. * * @return Options instance assigned to this instance. */ public ColumnFamilyOptions getOptions() { return columnFamilyOptions_; } /** * Retrieve assigned options instance. * * @return Options instance assigned to this instance. * * @deprecated Use {@link #getOptions()} instead. */ @Deprecated public ColumnFamilyOptions columnFamilyOptions() { return getOptions(); } @Override public boolean equals(final Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } final ColumnFamilyDescriptor that = (ColumnFamilyDescriptor) o; return Arrays.equals(columnFamilyName_, that.columnFamilyName_) && columnFamilyOptions_.nativeHandle_ == that.columnFamilyOptions_.nativeHandle_; } @Override public int hashCode() { int result = (int) (columnFamilyOptions_.nativeHandle_ ^ (columnFamilyOptions_.nativeHandle_ >>> 32)); result = 31 * result + Arrays.hashCode(columnFamilyName_); return result; } private final byte[] columnFamilyName_; private final ColumnFamilyOptions columnFamilyOptions_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java000066400000000000000000000075651370372246700254330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; import java.util.Objects; /** * ColumnFamilyHandle class to hold handles to underlying rocksdb * ColumnFamily Pointers. */ public class ColumnFamilyHandle extends RocksObject { ColumnFamilyHandle(final RocksDB rocksDB, final long nativeHandle) { super(nativeHandle); // rocksDB must point to a valid RocksDB instance; assert(rocksDB != null); // ColumnFamilyHandle must hold a reference to the related RocksDB instance // to guarantee that while a GC cycle starts ColumnFamilyHandle instances // are freed prior to RocksDB instances. this.rocksDB_ = rocksDB; } /** * Gets the name of the Column Family. * * @return The name of the Column Family. * * @throws RocksDBException if an error occurs whilst retrieving the name. */ public byte[] getName() throws RocksDBException { assert(isOwningHandle() || isDefaultColumnFamily()); return getName(nativeHandle_); } /** * Gets the ID of the Column Family. * * @return the ID of the Column Family. */ public int getID() { assert(isOwningHandle() || isDefaultColumnFamily()); return getID(nativeHandle_); } /** * Gets the up-to-date descriptor of the column family * associated with this handle. Since it fills "*desc" with the up-to-date * information, this call might internally lock and release DB mutex to * access the up-to-date CF options. In addition, all the pointer-typed * options cannot be referenced any longer than the original options exist. * * Note that this function is not supported in RocksDBLite. * * @return the up-to-date descriptor. * * @throws RocksDBException if an error occurs whilst retrieving the * descriptor. */ public ColumnFamilyDescriptor getDescriptor() throws RocksDBException { assert(isOwningHandle() || isDefaultColumnFamily()); return getDescriptor(nativeHandle_); } @Override public boolean equals(final Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } final ColumnFamilyHandle that = (ColumnFamilyHandle) o; try { return rocksDB_.nativeHandle_ == that.rocksDB_.nativeHandle_ && getID() == that.getID() && Arrays.equals(getName(), that.getName()); } catch (RocksDBException e) { throw new RuntimeException("Cannot compare column family handles", e); } } @Override public int hashCode() { try { return Objects.hash(getName(), getID(), rocksDB_.nativeHandle_); } catch (RocksDBException e) { throw new RuntimeException("Cannot calculate hash code of column family handle", e); } } protected boolean isDefaultColumnFamily() { return nativeHandle_ == rocksDB_.getDefaultColumnFamily().nativeHandle_; } /** *

Deletes underlying C++ iterator pointer.

* *

Note: the underlying handle can only be safely deleted if the RocksDB * instance related to a certain ColumnFamilyHandle is still valid and * initialized. Therefore {@code disposeInternal()} checks if the RocksDB is * initialized before freeing the native handle.

*/ @Override protected void disposeInternal() { if(rocksDB_.isOwningHandle()) { disposeInternal(nativeHandle_); } } private native byte[] getName(final long handle) throws RocksDBException; private native int getID(final long handle); private native ColumnFamilyDescriptor getDescriptor(final long handle) throws RocksDBException; @Override protected final native void disposeInternal(final long handle); private final RocksDB rocksDB_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java000066400000000000000000000027721370372246700257130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; import java.util.List; /** * The metadata that describes a column family. */ public class ColumnFamilyMetaData { private final long size; private final long fileCount; private final byte[] name; private final LevelMetaData[] levels; /** * Called from JNI C++ */ private ColumnFamilyMetaData( final long size, final long fileCount, final byte[] name, final LevelMetaData[] levels) { this.size = size; this.fileCount = fileCount; this.name = name; this.levels = levels; } /** * The size of this column family in bytes, which is equal to the sum of * the file size of its {@link #levels()}. * * @return the size of this column family */ public long size() { return size; } /** * The number of files in this column family. * * @return the number of files */ public long fileCount() { return fileCount; } /** * The name of the column family. * * @return the name */ public byte[] name() { return name; } /** * The metadata of all levels in this column family. * * @return the levels metadata */ public List levels() { return Arrays.asList(levels); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java000066400000000000000000001022161370372246700256600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.ArrayList; import java.util.List; import java.util.Properties; /** * ColumnFamilyOptions to control the behavior of a database. It will be used * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). * * If {@link #dispose()} function is not called, then it will be GC'd * automatically and native resources will be released as part of the process. */ public class ColumnFamilyOptions extends RocksObject implements ColumnFamilyOptionsInterface, MutableColumnFamilyOptionsInterface { static { RocksDB.loadLibrary(); } /** * Construct ColumnFamilyOptions. * * This constructor will create (by allocating a block of memory) * an {@code rocksdb::ColumnFamilyOptions} in the c++ side. */ public ColumnFamilyOptions() { super(newColumnFamilyOptions()); } /** * Copy constructor for ColumnFamilyOptions. * * NOTE: This does a shallow copy, which means comparator, merge_operator, compaction_filter, * compaction_filter_factory and other pointers will be cloned! * * @param other The ColumnFamilyOptions to copy. */ public ColumnFamilyOptions(ColumnFamilyOptions other) { super(copyColumnFamilyOptions(other.nativeHandle_)); this.memTableConfig_ = other.memTableConfig_; this.tableFormatConfig_ = other.tableFormatConfig_; this.comparator_ = other.comparator_; this.compactionFilter_ = other.compactionFilter_; this.compactionFilterFactory_ = other.compactionFilterFactory_; this.compactionOptionsUniversal_ = other.compactionOptionsUniversal_; this.compactionOptionsFIFO_ = other.compactionOptionsFIFO_; this.bottommostCompressionOptions_ = other.bottommostCompressionOptions_; this.compressionOptions_ = other.compressionOptions_; } /** * Constructor from Options * * @param options The options. */ public ColumnFamilyOptions(final Options options) { super(newColumnFamilyOptionsFromOptions(options.nativeHandle_)); } /** *

Constructor to be used by * {@link #getColumnFamilyOptionsFromProps(java.util.Properties)}, * {@link ColumnFamilyDescriptor#getOptions()} * and also called via JNI.

* * @param handle native handle to ColumnFamilyOptions instance. */ ColumnFamilyOptions(final long handle) { super(handle); } /** *

Method to get a options instance by using pre-configured * property values. If one or many values are undefined in * the context of RocksDB the method will return a null * value.

* *

Note: Property keys can be derived from * getter methods within the options class. Example: the method * {@code writeBufferSize()} has a property key: * {@code write_buffer_size}.

* * @param properties {@link java.util.Properties} instance. * * @return {@link org.rocksdb.ColumnFamilyOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty * {@link Properties} instance is passed to the method call. */ public static ColumnFamilyOptions getColumnFamilyOptionsFromProps( final Properties properties) { ColumnFamilyOptions columnFamilyOptions = null; final long handle = getColumnFamilyOptionsFromProps(Options.getOptionStringFromProps(properties)); if (handle != 0) { columnFamilyOptions = new ColumnFamilyOptions(handle); } return columnFamilyOptions; } /** *

Method to get a options instance by using pre-configured * property values. If one or many values are undefined in * the context of RocksDB the method will return a null * value.

* *

Note: Property keys can be derived from * getter methods within the options class. Example: the method * {@code writeBufferSize()} has a property key: * {@code write_buffer_size}.

* * @param cfgOpts ConfigOptions controlling how the properties are parsed. * @param properties {@link java.util.Properties} instance. * * @return {@link org.rocksdb.ColumnFamilyOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty * {@link Properties} instance is passed to the method call. */ public static ColumnFamilyOptions getColumnFamilyOptionsFromProps( final ConfigOptions cfgOpts, final Properties properties) { ColumnFamilyOptions columnFamilyOptions = null; final long handle = getColumnFamilyOptionsFromProps( cfgOpts.nativeHandle_, Options.getOptionStringFromProps(properties)); if (handle != 0){ columnFamilyOptions = new ColumnFamilyOptions(handle); } return columnFamilyOptions; } @Override public ColumnFamilyOptions optimizeForSmallDb() { optimizeForSmallDb(nativeHandle_); return this; } @Override public ColumnFamilyOptions optimizeForPointLookup( final long blockCacheSizeMb) { optimizeForPointLookup(nativeHandle_, blockCacheSizeMb); return this; } @Override public ColumnFamilyOptions optimizeLevelStyleCompaction() { optimizeLevelStyleCompaction(nativeHandle_, DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); return this; } @Override public ColumnFamilyOptions optimizeLevelStyleCompaction( final long memtableMemoryBudget) { optimizeLevelStyleCompaction(nativeHandle_, memtableMemoryBudget); return this; } @Override public ColumnFamilyOptions optimizeUniversalStyleCompaction() { optimizeUniversalStyleCompaction(nativeHandle_, DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); return this; } @Override public ColumnFamilyOptions optimizeUniversalStyleCompaction( final long memtableMemoryBudget) { optimizeUniversalStyleCompaction(nativeHandle_, memtableMemoryBudget); return this; } @Override public ColumnFamilyOptions setComparator( final BuiltinComparator builtinComparator) { assert(isOwningHandle()); setComparatorHandle(nativeHandle_, builtinComparator.ordinal()); return this; } @Override public ColumnFamilyOptions setComparator( final AbstractComparator comparator) { assert (isOwningHandle()); setComparatorHandle(nativeHandle_, comparator.nativeHandle_, comparator.getComparatorType().getValue()); comparator_ = comparator; return this; } @Override public ColumnFamilyOptions setMergeOperatorName(final String name) { assert (isOwningHandle()); if (name == null) { throw new IllegalArgumentException( "Merge operator name must not be null."); } setMergeOperatorName(nativeHandle_, name); return this; } @Override public ColumnFamilyOptions setMergeOperator( final MergeOperator mergeOperator) { setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); return this; } @Override public ColumnFamilyOptions setCompactionFilter( final AbstractCompactionFilter> compactionFilter) { setCompactionFilterHandle(nativeHandle_, compactionFilter.nativeHandle_); compactionFilter_ = compactionFilter; return this; } @Override public AbstractCompactionFilter> compactionFilter() { assert (isOwningHandle()); return compactionFilter_; } @Override public ColumnFamilyOptions setCompactionFilterFactory(final AbstractCompactionFilterFactory> compactionFilterFactory) { assert (isOwningHandle()); setCompactionFilterFactoryHandle(nativeHandle_, compactionFilterFactory.nativeHandle_); compactionFilterFactory_ = compactionFilterFactory; return this; } @Override public AbstractCompactionFilterFactory> compactionFilterFactory() { assert (isOwningHandle()); return compactionFilterFactory_; } @Override public ColumnFamilyOptions setWriteBufferSize(final long writeBufferSize) { assert(isOwningHandle()); setWriteBufferSize(nativeHandle_, writeBufferSize); return this; } @Override public long writeBufferSize() { assert(isOwningHandle()); return writeBufferSize(nativeHandle_); } @Override public ColumnFamilyOptions setMaxWriteBufferNumber( final int maxWriteBufferNumber) { assert(isOwningHandle()); setMaxWriteBufferNumber(nativeHandle_, maxWriteBufferNumber); return this; } @Override public int maxWriteBufferNumber() { assert(isOwningHandle()); return maxWriteBufferNumber(nativeHandle_); } @Override public ColumnFamilyOptions setMinWriteBufferNumberToMerge( final int minWriteBufferNumberToMerge) { setMinWriteBufferNumberToMerge(nativeHandle_, minWriteBufferNumberToMerge); return this; } @Override public int minWriteBufferNumberToMerge() { return minWriteBufferNumberToMerge(nativeHandle_); } @Override public ColumnFamilyOptions useFixedLengthPrefixExtractor(final int n) { assert(isOwningHandle()); useFixedLengthPrefixExtractor(nativeHandle_, n); return this; } @Override public ColumnFamilyOptions useCappedPrefixExtractor(final int n) { assert(isOwningHandle()); useCappedPrefixExtractor(nativeHandle_, n); return this; } @Override public ColumnFamilyOptions setCompressionType( final CompressionType compressionType) { setCompressionType(nativeHandle_, compressionType.getValue()); return this; } @Override public CompressionType compressionType() { return CompressionType.getCompressionType(compressionType(nativeHandle_)); } @Override public ColumnFamilyOptions setCompressionPerLevel( final List compressionLevels) { final byte[] byteCompressionTypes = new byte[ compressionLevels.size()]; for (int i = 0; i < compressionLevels.size(); i++) { byteCompressionTypes[i] = compressionLevels.get(i).getValue(); } setCompressionPerLevel(nativeHandle_, byteCompressionTypes); return this; } @Override public List compressionPerLevel() { final byte[] byteCompressionTypes = compressionPerLevel(nativeHandle_); final List compressionLevels = new ArrayList<>(); for (final Byte byteCompressionType : byteCompressionTypes) { compressionLevels.add(CompressionType.getCompressionType( byteCompressionType)); } return compressionLevels; } @Override public ColumnFamilyOptions setBottommostCompressionType( final CompressionType bottommostCompressionType) { setBottommostCompressionType(nativeHandle_, bottommostCompressionType.getValue()); return this; } @Override public CompressionType bottommostCompressionType() { return CompressionType.getCompressionType( bottommostCompressionType(nativeHandle_)); } @Override public ColumnFamilyOptions setBottommostCompressionOptions( final CompressionOptions bottommostCompressionOptions) { setBottommostCompressionOptions(nativeHandle_, bottommostCompressionOptions.nativeHandle_); this.bottommostCompressionOptions_ = bottommostCompressionOptions; return this; } @Override public CompressionOptions bottommostCompressionOptions() { return this.bottommostCompressionOptions_; } @Override public ColumnFamilyOptions setCompressionOptions( final CompressionOptions compressionOptions) { setCompressionOptions(nativeHandle_, compressionOptions.nativeHandle_); this.compressionOptions_ = compressionOptions; return this; } @Override public CompressionOptions compressionOptions() { return this.compressionOptions_; } @Override public ColumnFamilyOptions setNumLevels(final int numLevels) { setNumLevels(nativeHandle_, numLevels); return this; } @Override public int numLevels() { return numLevels(nativeHandle_); } @Override public ColumnFamilyOptions setLevelZeroFileNumCompactionTrigger( final int numFiles) { setLevelZeroFileNumCompactionTrigger( nativeHandle_, numFiles); return this; } @Override public int levelZeroFileNumCompactionTrigger() { return levelZeroFileNumCompactionTrigger(nativeHandle_); } @Override public ColumnFamilyOptions setLevelZeroSlowdownWritesTrigger( final int numFiles) { setLevelZeroSlowdownWritesTrigger(nativeHandle_, numFiles); return this; } @Override public int levelZeroSlowdownWritesTrigger() { return levelZeroSlowdownWritesTrigger(nativeHandle_); } @Override public ColumnFamilyOptions setLevelZeroStopWritesTrigger(final int numFiles) { setLevelZeroStopWritesTrigger(nativeHandle_, numFiles); return this; } @Override public int levelZeroStopWritesTrigger() { return levelZeroStopWritesTrigger(nativeHandle_); } @Override public ColumnFamilyOptions setTargetFileSizeBase( final long targetFileSizeBase) { setTargetFileSizeBase(nativeHandle_, targetFileSizeBase); return this; } @Override public long targetFileSizeBase() { return targetFileSizeBase(nativeHandle_); } @Override public ColumnFamilyOptions setTargetFileSizeMultiplier( final int multiplier) { setTargetFileSizeMultiplier(nativeHandle_, multiplier); return this; } @Override public int targetFileSizeMultiplier() { return targetFileSizeMultiplier(nativeHandle_); } @Override public ColumnFamilyOptions setMaxBytesForLevelBase( final long maxBytesForLevelBase) { setMaxBytesForLevelBase(nativeHandle_, maxBytesForLevelBase); return this; } @Override public long maxBytesForLevelBase() { return maxBytesForLevelBase(nativeHandle_); } @Override public ColumnFamilyOptions setLevelCompactionDynamicLevelBytes( final boolean enableLevelCompactionDynamicLevelBytes) { setLevelCompactionDynamicLevelBytes(nativeHandle_, enableLevelCompactionDynamicLevelBytes); return this; } @Override public boolean levelCompactionDynamicLevelBytes() { return levelCompactionDynamicLevelBytes(nativeHandle_); } @Override public ColumnFamilyOptions setMaxBytesForLevelMultiplier(final double multiplier) { setMaxBytesForLevelMultiplier(nativeHandle_, multiplier); return this; } @Override public double maxBytesForLevelMultiplier() { return maxBytesForLevelMultiplier(nativeHandle_); } @Override public ColumnFamilyOptions setMaxCompactionBytes(final long maxCompactionBytes) { setMaxCompactionBytes(nativeHandle_, maxCompactionBytes); return this; } @Override public long maxCompactionBytes() { return maxCompactionBytes(nativeHandle_); } @Override public ColumnFamilyOptions setArenaBlockSize( final long arenaBlockSize) { setArenaBlockSize(nativeHandle_, arenaBlockSize); return this; } @Override public long arenaBlockSize() { return arenaBlockSize(nativeHandle_); } @Override public ColumnFamilyOptions setDisableAutoCompactions( final boolean disableAutoCompactions) { setDisableAutoCompactions(nativeHandle_, disableAutoCompactions); return this; } @Override public boolean disableAutoCompactions() { return disableAutoCompactions(nativeHandle_); } @Override public ColumnFamilyOptions setCompactionStyle( final CompactionStyle compactionStyle) { setCompactionStyle(nativeHandle_, compactionStyle.getValue()); return this; } @Override public CompactionStyle compactionStyle() { return CompactionStyle.fromValue(compactionStyle(nativeHandle_)); } @Override public ColumnFamilyOptions setMaxTableFilesSizeFIFO( final long maxTableFilesSize) { assert(maxTableFilesSize > 0); // unsigned native type assert(isOwningHandle()); setMaxTableFilesSizeFIFO(nativeHandle_, maxTableFilesSize); return this; } @Override public long maxTableFilesSizeFIFO() { return maxTableFilesSizeFIFO(nativeHandle_); } @Override public ColumnFamilyOptions setMaxSequentialSkipInIterations( final long maxSequentialSkipInIterations) { setMaxSequentialSkipInIterations(nativeHandle_, maxSequentialSkipInIterations); return this; } @Override public long maxSequentialSkipInIterations() { return maxSequentialSkipInIterations(nativeHandle_); } @Override public MemTableConfig memTableConfig() { return this.memTableConfig_; } @Override public ColumnFamilyOptions setMemTableConfig( final MemTableConfig memTableConfig) { setMemTableFactory( nativeHandle_, memTableConfig.newMemTableFactoryHandle()); this.memTableConfig_ = memTableConfig; return this; } @Override public String memTableFactoryName() { assert(isOwningHandle()); return memTableFactoryName(nativeHandle_); } @Override public TableFormatConfig tableFormatConfig() { return this.tableFormatConfig_; } @Override public ColumnFamilyOptions setTableFormatConfig( final TableFormatConfig tableFormatConfig) { setTableFactory(nativeHandle_, tableFormatConfig.newTableFactoryHandle()); this.tableFormatConfig_ = tableFormatConfig; return this; } @Override public String tableFactoryName() { assert(isOwningHandle()); return tableFactoryName(nativeHandle_); } @Override public ColumnFamilyOptions setInplaceUpdateSupport( final boolean inplaceUpdateSupport) { setInplaceUpdateSupport(nativeHandle_, inplaceUpdateSupport); return this; } @Override public boolean inplaceUpdateSupport() { return inplaceUpdateSupport(nativeHandle_); } @Override public ColumnFamilyOptions setInplaceUpdateNumLocks( final long inplaceUpdateNumLocks) { setInplaceUpdateNumLocks(nativeHandle_, inplaceUpdateNumLocks); return this; } @Override public long inplaceUpdateNumLocks() { return inplaceUpdateNumLocks(nativeHandle_); } @Override public ColumnFamilyOptions setMemtablePrefixBloomSizeRatio( final double memtablePrefixBloomSizeRatio) { setMemtablePrefixBloomSizeRatio(nativeHandle_, memtablePrefixBloomSizeRatio); return this; } @Override public double memtablePrefixBloomSizeRatio() { return memtablePrefixBloomSizeRatio(nativeHandle_); } @Override public ColumnFamilyOptions setBloomLocality(int bloomLocality) { setBloomLocality(nativeHandle_, bloomLocality); return this; } @Override public int bloomLocality() { return bloomLocality(nativeHandle_); } @Override public ColumnFamilyOptions setMaxSuccessiveMerges( final long maxSuccessiveMerges) { setMaxSuccessiveMerges(nativeHandle_, maxSuccessiveMerges); return this; } @Override public long maxSuccessiveMerges() { return maxSuccessiveMerges(nativeHandle_); } @Override public ColumnFamilyOptions setOptimizeFiltersForHits( final boolean optimizeFiltersForHits) { setOptimizeFiltersForHits(nativeHandle_, optimizeFiltersForHits); return this; } @Override public boolean optimizeFiltersForHits() { return optimizeFiltersForHits(nativeHandle_); } @Override public ColumnFamilyOptions setMemtableHugePageSize( long memtableHugePageSize) { setMemtableHugePageSize(nativeHandle_, memtableHugePageSize); return this; } @Override public long memtableHugePageSize() { return memtableHugePageSize(nativeHandle_); } @Override public ColumnFamilyOptions setSoftPendingCompactionBytesLimit(long softPendingCompactionBytesLimit) { setSoftPendingCompactionBytesLimit(nativeHandle_, softPendingCompactionBytesLimit); return this; } @Override public long softPendingCompactionBytesLimit() { return softPendingCompactionBytesLimit(nativeHandle_); } @Override public ColumnFamilyOptions setHardPendingCompactionBytesLimit(long hardPendingCompactionBytesLimit) { setHardPendingCompactionBytesLimit(nativeHandle_, hardPendingCompactionBytesLimit); return this; } @Override public long hardPendingCompactionBytesLimit() { return hardPendingCompactionBytesLimit(nativeHandle_); } @Override public ColumnFamilyOptions setLevel0FileNumCompactionTrigger(int level0FileNumCompactionTrigger) { setLevel0FileNumCompactionTrigger(nativeHandle_, level0FileNumCompactionTrigger); return this; } @Override public int level0FileNumCompactionTrigger() { return level0FileNumCompactionTrigger(nativeHandle_); } @Override public ColumnFamilyOptions setLevel0SlowdownWritesTrigger(int level0SlowdownWritesTrigger) { setLevel0SlowdownWritesTrigger(nativeHandle_, level0SlowdownWritesTrigger); return this; } @Override public int level0SlowdownWritesTrigger() { return level0SlowdownWritesTrigger(nativeHandle_); } @Override public ColumnFamilyOptions setLevel0StopWritesTrigger(int level0StopWritesTrigger) { setLevel0StopWritesTrigger(nativeHandle_, level0StopWritesTrigger); return this; } @Override public int level0StopWritesTrigger() { return level0StopWritesTrigger(nativeHandle_); } @Override public ColumnFamilyOptions setMaxBytesForLevelMultiplierAdditional(int[] maxBytesForLevelMultiplierAdditional) { setMaxBytesForLevelMultiplierAdditional(nativeHandle_, maxBytesForLevelMultiplierAdditional); return this; } @Override public int[] maxBytesForLevelMultiplierAdditional() { return maxBytesForLevelMultiplierAdditional(nativeHandle_); } @Override public ColumnFamilyOptions setParanoidFileChecks(boolean paranoidFileChecks) { setParanoidFileChecks(nativeHandle_, paranoidFileChecks); return this; } @Override public boolean paranoidFileChecks() { return paranoidFileChecks(nativeHandle_); } @Override public ColumnFamilyOptions setMaxWriteBufferNumberToMaintain( final int maxWriteBufferNumberToMaintain) { setMaxWriteBufferNumberToMaintain( nativeHandle_, maxWriteBufferNumberToMaintain); return this; } @Override public int maxWriteBufferNumberToMaintain() { return maxWriteBufferNumberToMaintain(nativeHandle_); } @Override public ColumnFamilyOptions setCompactionPriority( final CompactionPriority compactionPriority) { setCompactionPriority(nativeHandle_, compactionPriority.getValue()); return this; } @Override public CompactionPriority compactionPriority() { return CompactionPriority.getCompactionPriority( compactionPriority(nativeHandle_)); } @Override public ColumnFamilyOptions setReportBgIoStats(final boolean reportBgIoStats) { setReportBgIoStats(nativeHandle_, reportBgIoStats); return this; } @Override public boolean reportBgIoStats() { return reportBgIoStats(nativeHandle_); } @Override public ColumnFamilyOptions setTtl(final long ttl) { setTtl(nativeHandle_, ttl); return this; } @Override public long ttl() { return ttl(nativeHandle_); } @Override public ColumnFamilyOptions setCompactionOptionsUniversal( final CompactionOptionsUniversal compactionOptionsUniversal) { setCompactionOptionsUniversal(nativeHandle_, compactionOptionsUniversal.nativeHandle_); this.compactionOptionsUniversal_ = compactionOptionsUniversal; return this; } @Override public CompactionOptionsUniversal compactionOptionsUniversal() { return this.compactionOptionsUniversal_; } @Override public ColumnFamilyOptions setCompactionOptionsFIFO(final CompactionOptionsFIFO compactionOptionsFIFO) { setCompactionOptionsFIFO(nativeHandle_, compactionOptionsFIFO.nativeHandle_); this.compactionOptionsFIFO_ = compactionOptionsFIFO; return this; } @Override public CompactionOptionsFIFO compactionOptionsFIFO() { return this.compactionOptionsFIFO_; } @Override public ColumnFamilyOptions setForceConsistencyChecks(final boolean forceConsistencyChecks) { setForceConsistencyChecks(nativeHandle_, forceConsistencyChecks); return this; } @Override public boolean forceConsistencyChecks() { return forceConsistencyChecks(nativeHandle_); } private static native long getColumnFamilyOptionsFromProps( final long cfgHandle, String optString); private static native long getColumnFamilyOptionsFromProps(final String optString); private static native long newColumnFamilyOptions(); private static native long copyColumnFamilyOptions(final long handle); private static native long newColumnFamilyOptionsFromOptions( final long optionsHandle); @Override protected final native void disposeInternal(final long handle); private native void optimizeForSmallDb(final long handle); private native void optimizeForPointLookup(long handle, long blockCacheSizeMb); private native void optimizeLevelStyleCompaction(long handle, long memtableMemoryBudget); private native void optimizeUniversalStyleCompaction(long handle, long memtableMemoryBudget); private native void setComparatorHandle(long handle, int builtinComparator); private native void setComparatorHandle(long optHandle, long comparatorHandle, byte comparatorType); private native void setMergeOperatorName(long handle, String name); private native void setMergeOperator(long handle, long mergeOperatorHandle); private native void setCompactionFilterHandle(long handle, long compactionFilterHandle); private native void setCompactionFilterFactoryHandle(long handle, long compactionFilterFactoryHandle); private native void setWriteBufferSize(long handle, long writeBufferSize) throws IllegalArgumentException; private native long writeBufferSize(long handle); private native void setMaxWriteBufferNumber( long handle, int maxWriteBufferNumber); private native int maxWriteBufferNumber(long handle); private native void setMinWriteBufferNumberToMerge( long handle, int minWriteBufferNumberToMerge); private native int minWriteBufferNumberToMerge(long handle); private native void setCompressionType(long handle, byte compressionType); private native byte compressionType(long handle); private native void setCompressionPerLevel(long handle, byte[] compressionLevels); private native byte[] compressionPerLevel(long handle); private native void setBottommostCompressionType(long handle, byte bottommostCompressionType); private native byte bottommostCompressionType(long handle); private native void setBottommostCompressionOptions(final long handle, final long bottommostCompressionOptionsHandle); private native void setCompressionOptions(long handle, long compressionOptionsHandle); private native void useFixedLengthPrefixExtractor( long handle, int prefixLength); private native void useCappedPrefixExtractor( long handle, int prefixLength); private native void setNumLevels( long handle, int numLevels); private native int numLevels(long handle); private native void setLevelZeroFileNumCompactionTrigger( long handle, int numFiles); private native int levelZeroFileNumCompactionTrigger(long handle); private native void setLevelZeroSlowdownWritesTrigger( long handle, int numFiles); private native int levelZeroSlowdownWritesTrigger(long handle); private native void setLevelZeroStopWritesTrigger( long handle, int numFiles); private native int levelZeroStopWritesTrigger(long handle); private native void setTargetFileSizeBase( long handle, long targetFileSizeBase); private native long targetFileSizeBase(long handle); private native void setTargetFileSizeMultiplier( long handle, int multiplier); private native int targetFileSizeMultiplier(long handle); private native void setMaxBytesForLevelBase( long handle, long maxBytesForLevelBase); private native long maxBytesForLevelBase(long handle); private native void setLevelCompactionDynamicLevelBytes( long handle, boolean enableLevelCompactionDynamicLevelBytes); private native boolean levelCompactionDynamicLevelBytes( long handle); private native void setMaxBytesForLevelMultiplier(long handle, double multiplier); private native double maxBytesForLevelMultiplier(long handle); private native void setMaxCompactionBytes(long handle, long maxCompactionBytes); private native long maxCompactionBytes(long handle); private native void setArenaBlockSize( long handle, long arenaBlockSize) throws IllegalArgumentException; private native long arenaBlockSize(long handle); private native void setDisableAutoCompactions( long handle, boolean disableAutoCompactions); private native boolean disableAutoCompactions(long handle); private native void setCompactionStyle(long handle, byte compactionStyle); private native byte compactionStyle(long handle); private native void setMaxTableFilesSizeFIFO( long handle, long max_table_files_size); private native long maxTableFilesSizeFIFO(long handle); private native void setMaxSequentialSkipInIterations( long handle, long maxSequentialSkipInIterations); private native long maxSequentialSkipInIterations(long handle); private native void setMemTableFactory(long handle, long factoryHandle); private native String memTableFactoryName(long handle); private native void setTableFactory(long handle, long factoryHandle); private native String tableFactoryName(long handle); private native void setInplaceUpdateSupport( long handle, boolean inplaceUpdateSupport); private native boolean inplaceUpdateSupport(long handle); private native void setInplaceUpdateNumLocks( long handle, long inplaceUpdateNumLocks) throws IllegalArgumentException; private native long inplaceUpdateNumLocks(long handle); private native void setMemtablePrefixBloomSizeRatio( long handle, double memtablePrefixBloomSizeRatio); private native double memtablePrefixBloomSizeRatio(long handle); private native void setBloomLocality( long handle, int bloomLocality); private native int bloomLocality(long handle); private native void setMaxSuccessiveMerges( long handle, long maxSuccessiveMerges) throws IllegalArgumentException; private native long maxSuccessiveMerges(long handle); private native void setOptimizeFiltersForHits(long handle, boolean optimizeFiltersForHits); private native boolean optimizeFiltersForHits(long handle); private native void setMemtableHugePageSize(long handle, long memtableHugePageSize); private native long memtableHugePageSize(long handle); private native void setSoftPendingCompactionBytesLimit(long handle, long softPendingCompactionBytesLimit); private native long softPendingCompactionBytesLimit(long handle); private native void setHardPendingCompactionBytesLimit(long handle, long hardPendingCompactionBytesLimit); private native long hardPendingCompactionBytesLimit(long handle); private native void setLevel0FileNumCompactionTrigger(long handle, int level0FileNumCompactionTrigger); private native int level0FileNumCompactionTrigger(long handle); private native void setLevel0SlowdownWritesTrigger(long handle, int level0SlowdownWritesTrigger); private native int level0SlowdownWritesTrigger(long handle); private native void setLevel0StopWritesTrigger(long handle, int level0StopWritesTrigger); private native int level0StopWritesTrigger(long handle); private native void setMaxBytesForLevelMultiplierAdditional(long handle, int[] maxBytesForLevelMultiplierAdditional); private native int[] maxBytesForLevelMultiplierAdditional(long handle); private native void setParanoidFileChecks(long handle, boolean paranoidFileChecks); private native boolean paranoidFileChecks(long handle); private native void setMaxWriteBufferNumberToMaintain(final long handle, final int maxWriteBufferNumberToMaintain); private native int maxWriteBufferNumberToMaintain(final long handle); private native void setCompactionPriority(final long handle, final byte compactionPriority); private native byte compactionPriority(final long handle); private native void setReportBgIoStats(final long handle, final boolean reportBgIoStats); private native boolean reportBgIoStats(final long handle); private native void setTtl(final long handle, final long ttl); private native long ttl(final long handle); private native void setCompactionOptionsUniversal(final long handle, final long compactionOptionsUniversalHandle); private native void setCompactionOptionsFIFO(final long handle, final long compactionOptionsFIFOHandle); private native void setForceConsistencyChecks(final long handle, final boolean forceConsistencyChecks); private native boolean forceConsistencyChecks(final long handle); // instance variables // NOTE: If you add new member variables, please update the copy constructor above! private MemTableConfig memTableConfig_; private TableFormatConfig tableFormatConfig_; private AbstractComparator comparator_; private AbstractCompactionFilter> compactionFilter_; private AbstractCompactionFilterFactory> compactionFilterFactory_; private CompactionOptionsUniversal compactionOptionsUniversal_; private CompactionOptionsFIFO compactionOptionsFIFO_; private CompressionOptions bottommostCompressionOptions_; private CompressionOptions compressionOptions_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java000066400000000000000000000365531370372246700275130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public interface ColumnFamilyOptionsInterface> extends AdvancedColumnFamilyOptionsInterface { /** * Use this if your DB is very small (like under 1GB) and you don't want to * spend lots of memory for memtables. * * @return the instance of the current object. */ T optimizeForSmallDb(); /** * Use this if you don't need to keep the data sorted, i.e. you'll never use * an iterator, only Put() and Get() API calls * * @param blockCacheSizeMb Block cache size in MB * @return the instance of the current object. */ T optimizeForPointLookup(long blockCacheSizeMb); /** *

Default values for some parameters in ColumnFamilyOptions are not * optimized for heavy workloads and big datasets, which means you might * observe write stalls under some conditions. As a starting point for tuning * RocksDB options, use the following for level style compaction.

* *

Make sure to also call IncreaseParallelism(), which will provide the * biggest performance gains.

*

Note: we might use more memory than memtable_memory_budget during high * write rate period

* * @return the instance of the current object. */ T optimizeLevelStyleCompaction(); /** *

Default values for some parameters in ColumnFamilyOptions are not * optimized for heavy workloads and big datasets, which means you might * observe write stalls under some conditions. As a starting point for tuning * RocksDB options, use the following for level style compaction.

* *

Make sure to also call IncreaseParallelism(), which will provide the * biggest performance gains.

*

Note: we might use more memory than memtable_memory_budget during high * write rate period

* * @param memtableMemoryBudget memory budget in bytes * @return the instance of the current object. */ T optimizeLevelStyleCompaction( long memtableMemoryBudget); /** *

Default values for some parameters in ColumnFamilyOptions are not * optimized for heavy workloads and big datasets, which means you might * observe write stalls under some conditions. As a starting point for tuning * RocksDB options, use the following for universal style compaction.

* *

Universal style compaction is focused on reducing Write Amplification * Factor for big data sets, but increases Space Amplification.

* *

Make sure to also call IncreaseParallelism(), which will provide the * biggest performance gains.

* *

Note: we might use more memory than memtable_memory_budget during high * write rate period

* * @return the instance of the current object. */ T optimizeUniversalStyleCompaction(); /** *

Default values for some parameters in ColumnFamilyOptions are not * optimized for heavy workloads and big datasets, which means you might * observe write stalls under some conditions. As a starting point for tuning * RocksDB options, use the following for universal style compaction.

* *

Universal style compaction is focused on reducing Write Amplification * Factor for big data sets, but increases Space Amplification.

* *

Make sure to also call IncreaseParallelism(), which will provide the * biggest performance gains.

* *

Note: we might use more memory than memtable_memory_budget during high * write rate period

* * @param memtableMemoryBudget memory budget in bytes * @return the instance of the current object. */ T optimizeUniversalStyleCompaction( long memtableMemoryBudget); /** * Set {@link BuiltinComparator} to be used with RocksDB. * * Note: Comparator can be set once upon database creation. * * Default: BytewiseComparator. * @param builtinComparator a {@link BuiltinComparator} type. * @return the instance of the current object. */ T setComparator( BuiltinComparator builtinComparator); /** * Use the specified comparator for key ordering. * * Comparator should not be disposed before options instances using this comparator is * disposed. If dispose() function is not called, then comparator object will be * GC'd automatically. * * Comparator instance can be re-used in multiple options instances. * * @param comparator java instance. * @return the instance of the current object. */ T setComparator( AbstractComparator comparator); /** *

Set the merge operator to be used for merging two merge operands * of the same key. The merge function is invoked during * compaction and at lookup time, if multiple key/value pairs belonging * to the same key are found in the database.

* * @param name the name of the merge function, as defined by * the MergeOperators factory (see utilities/MergeOperators.h) * The merge function is specified by name and must be one of the * standard merge operators provided by RocksDB. The available * operators are "put", "uint64add", "stringappend" and "stringappendtest". * @return the instance of the current object. */ T setMergeOperatorName(String name); /** *

Set the merge operator to be used for merging two different key/value * pairs that share the same key. The merge function is invoked during * compaction and at lookup time, if multiple key/value pairs belonging * to the same key are found in the database.

* * @param mergeOperator {@link MergeOperator} instance. * @return the instance of the current object. */ T setMergeOperator(MergeOperator mergeOperator); /** * A single CompactionFilter instance to call into during compaction. * Allows an application to modify/delete a key-value during background * compaction. * * If the client requires a new compaction filter to be used for different * compaction runs, it can specify call * {@link #setCompactionFilterFactory(AbstractCompactionFilterFactory)} * instead. * * The client should specify only set one of the two. * {@link #setCompactionFilter(AbstractCompactionFilter)} takes precedence * over {@link #setCompactionFilterFactory(AbstractCompactionFilterFactory)} * if the client specifies both. * * If multithreaded compaction is being used, the supplied CompactionFilter * instance may be used from different threads concurrently and so should be thread-safe. * * @param compactionFilter {@link AbstractCompactionFilter} instance. * @return the instance of the current object. */ T setCompactionFilter( final AbstractCompactionFilter> compactionFilter); /** * Accessor for the CompactionFilter instance in use. * * @return Reference to the CompactionFilter, or null if one hasn't been set. */ AbstractCompactionFilter> compactionFilter(); /** * This is a factory that provides {@link AbstractCompactionFilter} objects * which allow an application to modify/delete a key-value during background * compaction. * * A new filter will be created on each compaction run. If multithreaded * compaction is being used, each created CompactionFilter will only be used * from a single thread and so does not need to be thread-safe. * * @param compactionFilterFactory {@link AbstractCompactionFilterFactory} instance. * @return the instance of the current object. */ T setCompactionFilterFactory( final AbstractCompactionFilterFactory> compactionFilterFactory); /** * Accessor for the CompactionFilterFactory instance in use. * * @return Reference to the CompactionFilterFactory, or null if one hasn't been set. */ AbstractCompactionFilterFactory> compactionFilterFactory(); /** * This prefix-extractor uses the first n bytes of a key as its prefix. * * In some hash-based memtable representation such as HashLinkedList * and HashSkipList, prefixes are used to partition the keys into * several buckets. Prefix extractor is used to specify how to * extract the prefix given a key. * * @param n use the first n bytes of a key as its prefix. * @return the reference to the current option. */ T useFixedLengthPrefixExtractor(int n); /** * Same as fixed length prefix extractor, except that when slice is * shorter than the fixed length, it will use the full key. * * @param n use the first n bytes of a key as its prefix. * @return the reference to the current option. */ T useCappedPrefixExtractor(int n); /** * Number of files to trigger level-0 compaction. A value < 0 means that * level-0 compaction will not be triggered by number of files at all. * Default: 4 * * @param numFiles the number of files in level-0 to trigger compaction. * @return the reference to the current option. */ T setLevelZeroFileNumCompactionTrigger( int numFiles); /** * The number of files in level 0 to trigger compaction from level-0 to * level-1. A value < 0 means that level-0 compaction will not be * triggered by number of files at all. * Default: 4 * * @return the number of files in level 0 to trigger compaction. */ int levelZeroFileNumCompactionTrigger(); /** * Soft limit on number of level-0 files. We start slowing down writes at this * point. A value < 0 means that no writing slow down will be triggered by * number of files in level-0. * * @param numFiles soft limit on number of level-0 files. * @return the reference to the current option. */ T setLevelZeroSlowdownWritesTrigger( int numFiles); /** * Soft limit on the number of level-0 files. We start slowing down writes * at this point. A value < 0 means that no writing slow down will be * triggered by number of files in level-0. * * @return the soft limit on the number of level-0 files. */ int levelZeroSlowdownWritesTrigger(); /** * Maximum number of level-0 files. We stop writes at this point. * * @param numFiles the hard limit of the number of level-0 files. * @return the reference to the current option. */ T setLevelZeroStopWritesTrigger(int numFiles); /** * Maximum number of level-0 files. We stop writes at this point. * * @return the hard limit of the number of level-0 file. */ int levelZeroStopWritesTrigger(); /** * The ratio between the total size of level-(L+1) files and the total * size of level-L files for all L. * DEFAULT: 10 * * @param multiplier the ratio between the total size of level-(L+1) * files and the total size of level-L files for all L. * @return the reference to the current option. */ T setMaxBytesForLevelMultiplier( double multiplier); /** * The ratio between the total size of level-(L+1) files and the total * size of level-L files for all L. * DEFAULT: 10 * * @return the ratio between the total size of level-(L+1) files and * the total size of level-L files for all L. */ double maxBytesForLevelMultiplier(); /** * FIFO compaction option. * The oldest table file will be deleted * once the sum of table files reaches this size. * The default value is 1GB (1 * 1024 * 1024 * 1024). * * @param maxTableFilesSize the size limit of the total sum of table files. * @return the instance of the current object. */ T setMaxTableFilesSizeFIFO( long maxTableFilesSize); /** * FIFO compaction option. * The oldest table file will be deleted * once the sum of table files reaches this size. * The default value is 1GB (1 * 1024 * 1024 * 1024). * * @return the size limit of the total sum of table files. */ long maxTableFilesSizeFIFO(); /** * Get the config for mem-table. * * @return the mem-table config. */ MemTableConfig memTableConfig(); /** * Set the config for mem-table. * * @param memTableConfig the mem-table config. * @return the instance of the current object. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setMemTableConfig(MemTableConfig memTableConfig); /** * Returns the name of the current mem table representation. * Memtable format can be set using setTableFormatConfig. * * @return the name of the currently-used memtable factory. * @see #setTableFormatConfig(org.rocksdb.TableFormatConfig) */ String memTableFactoryName(); /** * Get the config for table format. * * @return the table format config. */ TableFormatConfig tableFormatConfig(); /** * Set the config for table format. * * @param config the table format config. * @return the reference of the current options. */ T setTableFormatConfig(TableFormatConfig config); /** * @return the name of the currently used table factory. */ String tableFactoryName(); /** * Compression algorithm that will be used for the bottommost level that * contain files. If level-compaction is used, this option will only affect * levels after base level. * * Default: {@link CompressionType#DISABLE_COMPRESSION_OPTION} * * @param bottommostCompressionType The compression type to use for the * bottommost level * * @return the reference of the current options. */ T setBottommostCompressionType( final CompressionType bottommostCompressionType); /** * Compression algorithm that will be used for the bottommost level that * contain files. If level-compaction is used, this option will only affect * levels after base level. * * Default: {@link CompressionType#DISABLE_COMPRESSION_OPTION} * * @return The compression type used for the bottommost level */ CompressionType bottommostCompressionType(); /** * Set the options for compression algorithms used by * {@link #bottommostCompressionType()} if it is enabled. * * To enable it, please see the definition of * {@link CompressionOptions}. * * @param compressionOptions the bottom most compression options. * * @return the reference of the current options. */ T setBottommostCompressionOptions( final CompressionOptions compressionOptions); /** * Get the bottom most compression options. * * See {@link #setBottommostCompressionOptions(CompressionOptions)}. * * @return the bottom most compression options. */ CompressionOptions bottommostCompressionOptions(); /** * Set the different options for compression algorithms * * @param compressionOptions The compression options * * @return the reference of the current options. */ T setCompressionOptions( CompressionOptions compressionOptions); /** * Get the different options for compression algorithms * * @return The compression options */ CompressionOptions compressionOptions(); /** * Default memtable memory budget used with the following methods: * *
    *
  1. {@link #optimizeLevelStyleCompaction()}
  2. *
  3. {@link #optimizeUniversalStyleCompaction()}
  4. *
*/ long DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET = 512 * 1024 * 1024; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactRangeOptions.java000066400000000000000000000201421370372246700256210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * CompactRangeOptions is used by CompactRange() call. In the documentation of the methods "the compaction" refers to * any compaction that is using this CompactRangeOptions. */ public class CompactRangeOptions extends RocksObject { private final static byte VALUE_kSkip = 0; private final static byte VALUE_kIfHaveCompactionFilter = 1; private final static byte VALUE_kForce = 2; // For level based compaction, we can configure if we want to skip/force bottommost level compaction. // The order of this neum MUST follow the C++ layer. See BottommostLevelCompaction in db/options.h public enum BottommostLevelCompaction { /** * Skip bottommost level compaction */ kSkip((byte)VALUE_kSkip), /** * Only compact bottommost level if there is a compaction filter. This is the default option */ kIfHaveCompactionFilter(VALUE_kIfHaveCompactionFilter), /** * Always compact bottommost level */ kForce(VALUE_kForce); private final byte value; BottommostLevelCompaction(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ public byte getValue() { return value; } /** * Returns the BottommostLevelCompaction for the given C++ rocks enum value. * @param bottommostLevelCompaction The value of the BottommostLevelCompaction * @return BottommostLevelCompaction instance, or null if none matches */ public static BottommostLevelCompaction fromRocksId(final int bottommostLevelCompaction) { switch (bottommostLevelCompaction) { case VALUE_kSkip: return kSkip; case VALUE_kIfHaveCompactionFilter: return kIfHaveCompactionFilter; case VALUE_kForce: return kForce; default: return null; } } } /** * Construct CompactRangeOptions. */ public CompactRangeOptions() { super(newCompactRangeOptions()); } /** * Returns whether the compaction is exclusive or other compactions may run concurrently at the same time. * * @return true if exclusive, false if concurrent */ public boolean exclusiveManualCompaction() { return exclusiveManualCompaction(nativeHandle_); } /** * Sets whether the compaction is exclusive or other compaction are allowed run concurrently at the same time. * * @param exclusiveCompaction true if compaction should be exclusive * @return This CompactRangeOptions */ public CompactRangeOptions setExclusiveManualCompaction(final boolean exclusiveCompaction) { setExclusiveManualCompaction(nativeHandle_, exclusiveCompaction); return this; } /** * Returns whether compacted files will be moved to the minimum level capable of holding the data or given level * (specified non-negative target_level). * @return true, if compacted files will be moved to the minimum level */ public boolean changeLevel() { return changeLevel(nativeHandle_); } /** * Whether compacted files will be moved to the minimum level capable of holding the data or given level * (specified non-negative target_level). * * @param changeLevel If true, compacted files will be moved to the minimum level * @return This CompactRangeOptions */ public CompactRangeOptions setChangeLevel(final boolean changeLevel) { setChangeLevel(nativeHandle_, changeLevel); return this; } /** * If change_level is true and target_level have non-negative value, compacted files will be moved to target_level. * @return The target level for the compacted files */ public int targetLevel() { return targetLevel(nativeHandle_); } /** * If change_level is true and target_level have non-negative value, compacted files will be moved to target_level. * * @param targetLevel target level for the compacted files * @return This CompactRangeOptions */ public CompactRangeOptions setTargetLevel(final int targetLevel) { setTargetLevel(nativeHandle_, targetLevel); return this; } /** * target_path_id for compaction output. Compaction outputs will be placed in options.db_paths[target_path_id]. * * @return target_path_id */ public int targetPathId() { return targetPathId(nativeHandle_); } /** * Compaction outputs will be placed in options.db_paths[target_path_id]. Behavior is undefined if target_path_id is * out of range. * * @param targetPathId target path id * @return This CompactRangeOptions */ public CompactRangeOptions setTargetPathId(final int targetPathId) { setTargetPathId(nativeHandle_, targetPathId); return this; } /** * Returns the policy for compacting the bottommost level * @return The BottommostLevelCompaction policy */ public BottommostLevelCompaction bottommostLevelCompaction() { return BottommostLevelCompaction.fromRocksId(bottommostLevelCompaction(nativeHandle_)); } /** * Sets the policy for compacting the bottommost level * * @param bottommostLevelCompaction The policy for compacting the bottommost level * @return This CompactRangeOptions */ public CompactRangeOptions setBottommostLevelCompaction(final BottommostLevelCompaction bottommostLevelCompaction) { setBottommostLevelCompaction(nativeHandle_, bottommostLevelCompaction.getValue()); return this; } /** * If true, compaction will execute immediately even if doing so would cause the DB to * enter write stall mode. Otherwise, it'll sleep until load is low enough. * @return true if compaction will execute immediately */ public boolean allowWriteStall() { return allowWriteStall(nativeHandle_); } /** * If true, compaction will execute immediately even if doing so would cause the DB to * enter write stall mode. Otherwise, it'll sleep until load is low enough. * * @return This CompactRangeOptions * @param allowWriteStall true if compaction should execute immediately */ public CompactRangeOptions setAllowWriteStall(final boolean allowWriteStall) { setAllowWriteStall(nativeHandle_, allowWriteStall); return this; } /** * If > 0, it will replace the option in the DBOptions for this compaction * @return number of subcompactions */ public int maxSubcompactions() { return maxSubcompactions(nativeHandle_); } /** * If > 0, it will replace the option in the DBOptions for this compaction * * @param maxSubcompactions number of subcompactions * @return This CompactRangeOptions */ public CompactRangeOptions setMaxSubcompactions(final int maxSubcompactions) { setMaxSubcompactions(nativeHandle_, maxSubcompactions); return this; } private native static long newCompactRangeOptions(); @Override protected final native void disposeInternal(final long handle); private native boolean exclusiveManualCompaction(final long handle); private native void setExclusiveManualCompaction(final long handle, final boolean exclusive_manual_compaction); private native boolean changeLevel(final long handle); private native void setChangeLevel(final long handle, final boolean changeLevel); private native int targetLevel(final long handle); private native void setTargetLevel(final long handle, final int targetLevel); private native int targetPathId(final long handle); private native void setTargetPathId(final long handle, final int targetPathId); private native int bottommostLevelCompaction(final long handle); private native void setBottommostLevelCompaction(final long handle, final int bottommostLevelCompaction); private native boolean allowWriteStall(final long handle); private native void setAllowWriteStall(final long handle, final boolean allowWriteStall); private native void setMaxSubcompactions(final long handle, final int maxSubcompactions); private native int maxSubcompactions(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionJobInfo.java000066400000000000000000000100551370372246700252470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; import java.util.List; import java.util.Map; public class CompactionJobInfo extends RocksObject { public CompactionJobInfo() { super(newCompactionJobInfo()); } /** * Private as called from JNI C++ */ private CompactionJobInfo(final long nativeHandle) { super(nativeHandle); } /** * Get the name of the column family where the compaction happened. * * @return the name of the column family */ public byte[] columnFamilyName() { return columnFamilyName(nativeHandle_); } /** * Get the status indicating whether the compaction was successful or not. * * @return the status */ public Status status() { return status(nativeHandle_); } /** * Get the id of the thread that completed this compaction job. * * @return the id of the thread */ public long threadId() { return threadId(nativeHandle_); } /** * Get the job id, which is unique in the same thread. * * @return the id of the thread */ public int jobId() { return jobId(nativeHandle_); } /** * Get the smallest input level of the compaction. * * @return the input level */ public int baseInputLevel() { return baseInputLevel(nativeHandle_); } /** * Get the output level of the compaction. * * @return the output level */ public int outputLevel() { return outputLevel(nativeHandle_); } /** * Get the names of the compaction input files. * * @return the names of the input files. */ public List inputFiles() { return Arrays.asList(inputFiles(nativeHandle_)); } /** * Get the names of the compaction output files. * * @return the names of the output files. */ public List outputFiles() { return Arrays.asList(outputFiles(nativeHandle_)); } /** * Get the table properties for the input and output tables. * * The map is keyed by values from {@link #inputFiles()} and * {@link #outputFiles()}. * * @return the table properties */ public Map tableProperties() { return tableProperties(nativeHandle_); } /** * Get the Reason for running the compaction. * * @return the reason. */ public CompactionReason compactionReason() { return CompactionReason.fromValue(compactionReason(nativeHandle_)); } // /** * Get the compression algorithm used for output files. * * @return the compression algorithm */ public CompressionType compression() { return CompressionType.getCompressionType(compression(nativeHandle_)); } /** * Get detailed information about this compaction. * * @return the detailed information, or null if not available. */ public /* @Nullable */ CompactionJobStats stats() { final long statsHandle = stats(nativeHandle_); if (statsHandle == 0) { return null; } return new CompactionJobStats(statsHandle); } private static native long newCompactionJobInfo(); @Override protected native void disposeInternal(final long handle); private static native byte[] columnFamilyName(final long handle); private static native Status status(final long handle); private static native long threadId(final long handle); private static native int jobId(final long handle); private static native int baseInputLevel(final long handle); private static native int outputLevel(final long handle); private static native String[] inputFiles(final long handle); private static native String[] outputFiles(final long handle); private static native Map tableProperties( final long handle); private static native byte compactionReason(final long handle); private static native byte compression(final long handle); private static native long stats(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionJobStats.java000066400000000000000000000211401370372246700254470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class CompactionJobStats extends RocksObject { public CompactionJobStats() { super(newCompactionJobStats()); } /** * Private as called from JNI C++ */ CompactionJobStats(final long nativeHandle) { super(nativeHandle); } /** * Reset the stats. */ public void reset() { reset(nativeHandle_); } /** * Aggregate the CompactionJobStats from another instance with this one. * * @param compactionJobStats another instance of stats. */ public void add(final CompactionJobStats compactionJobStats) { add(nativeHandle_, compactionJobStats.nativeHandle_); } /** * Get the elapsed time in micro of this compaction. * * @return the elapsed time in micro of this compaction. */ public long elapsedMicros() { return elapsedMicros(nativeHandle_); } /** * Get the number of compaction input records. * * @return the number of compaction input records. */ public long numInputRecords() { return numInputRecords(nativeHandle_); } /** * Get the number of compaction input files. * * @return the number of compaction input files. */ public long numInputFiles() { return numInputFiles(nativeHandle_); } /** * Get the number of compaction input files at the output level. * * @return the number of compaction input files at the output level. */ public long numInputFilesAtOutputLevel() { return numInputFilesAtOutputLevel(nativeHandle_); } /** * Get the number of compaction output records. * * @return the number of compaction output records. */ public long numOutputRecords() { return numOutputRecords(nativeHandle_); } /** * Get the number of compaction output files. * * @return the number of compaction output files. */ public long numOutputFiles() { return numOutputFiles(nativeHandle_); } /** * Determine if the compaction is a manual compaction. * * @return true if the compaction is a manual compaction, false otherwise. */ public boolean isManualCompaction() { return isManualCompaction(nativeHandle_); } /** * Get the size of the compaction input in bytes. * * @return the size of the compaction input in bytes. */ public long totalInputBytes() { return totalInputBytes(nativeHandle_); } /** * Get the size of the compaction output in bytes. * * @return the size of the compaction output in bytes. */ public long totalOutputBytes() { return totalOutputBytes(nativeHandle_); } /** * Get the number of records being replaced by newer record associated * with same key. * * This could be a new value or a deletion entry for that key so this field * sums up all updated and deleted keys. * * @return the number of records being replaced by newer record associated * with same key. */ public long numRecordsReplaced() { return numRecordsReplaced(nativeHandle_); } /** * Get the sum of the uncompressed input keys in bytes. * * @return the sum of the uncompressed input keys in bytes. */ public long totalInputRawKeyBytes() { return totalInputRawKeyBytes(nativeHandle_); } /** * Get the sum of the uncompressed input values in bytes. * * @return the sum of the uncompressed input values in bytes. */ public long totalInputRawValueBytes() { return totalInputRawValueBytes(nativeHandle_); } /** * Get the number of deletion entries before compaction. * * Deletion entries can disappear after compaction because they expired. * * @return the number of deletion entries before compaction. */ public long numInputDeletionRecords() { return numInputDeletionRecords(nativeHandle_); } /** * Get the number of deletion records that were found obsolete and discarded * because it is not possible to delete any more keys with this entry. * (i.e. all possible deletions resulting from it have been completed) * * @return the number of deletion records that were found obsolete and * discarded. */ public long numExpiredDeletionRecords() { return numExpiredDeletionRecords(nativeHandle_); } /** * Get the number of corrupt keys (ParseInternalKey returned false when * applied to the key) encountered and written out. * * @return the number of corrupt keys. */ public long numCorruptKeys() { return numCorruptKeys(nativeHandle_); } /** * Get the Time spent on file's Append() call. * * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. * * @return the Time spent on file's Append() call. */ public long fileWriteNanos() { return fileWriteNanos(nativeHandle_); } /** * Get the Time spent on sync file range. * * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. * * @return the Time spent on sync file range. */ public long fileRangeSyncNanos() { return fileRangeSyncNanos(nativeHandle_); } /** * Get the Time spent on file fsync. * * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. * * @return the Time spent on file fsync. */ public long fileFsyncNanos() { return fileFsyncNanos(nativeHandle_); } /** * Get the Time spent on preparing file write (falocate, etc) * * Only populated if {@link ColumnFamilyOptions#reportBgIoStats()} is set. * * @return the Time spent on preparing file write (falocate, etc). */ public long filePrepareWriteNanos() { return filePrepareWriteNanos(nativeHandle_); } /** * Get the smallest output key prefix. * * @return the smallest output key prefix. */ public byte[] smallestOutputKeyPrefix() { return smallestOutputKeyPrefix(nativeHandle_); } /** * Get the largest output key prefix. * * @return the smallest output key prefix. */ public byte[] largestOutputKeyPrefix() { return largestOutputKeyPrefix(nativeHandle_); } /** * Get the number of single-deletes which do not meet a put. * * @return number of single-deletes which do not meet a put. */ @Experimental("Performance optimization for a very specific workload") public long numSingleDelFallthru() { return numSingleDelFallthru(nativeHandle_); } /** * Get the number of single-deletes which meet something other than a put. * * @return the number of single-deletes which meet something other than a put. */ @Experimental("Performance optimization for a very specific workload") public long numSingleDelMismatch() { return numSingleDelMismatch(nativeHandle_); } private static native long newCompactionJobStats(); @Override protected native void disposeInternal(final long handle); private static native void reset(final long handle); private static native void add(final long handle, final long compactionJobStatsHandle); private static native long elapsedMicros(final long handle); private static native long numInputRecords(final long handle); private static native long numInputFiles(final long handle); private static native long numInputFilesAtOutputLevel(final long handle); private static native long numOutputRecords(final long handle); private static native long numOutputFiles(final long handle); private static native boolean isManualCompaction(final long handle); private static native long totalInputBytes(final long handle); private static native long totalOutputBytes(final long handle); private static native long numRecordsReplaced(final long handle); private static native long totalInputRawKeyBytes(final long handle); private static native long totalInputRawValueBytes(final long handle); private static native long numInputDeletionRecords(final long handle); private static native long numExpiredDeletionRecords(final long handle); private static native long numCorruptKeys(final long handle); private static native long fileWriteNanos(final long handle); private static native long fileRangeSyncNanos(final long handle); private static native long fileFsyncNanos(final long handle); private static native long filePrepareWriteNanos(final long handle); private static native byte[] smallestOutputKeyPrefix(final long handle); private static native byte[] largestOutputKeyPrefix(final long handle); private static native long numSingleDelFallthru(final long handle); private static native long numSingleDelMismatch(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionOptions.java000066400000000000000000000073701370372246700253620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; /** * CompactionOptions are used in * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, CompactionJobInfo)} * calls. */ public class CompactionOptions extends RocksObject { public CompactionOptions() { super(newCompactionOptions()); } /** * Get the compaction output compression type. * * See {@link #setCompression(CompressionType)}. * * @return the compression type. */ public CompressionType compression() { return CompressionType.getCompressionType( compression(nativeHandle_)); } /** * Set the compaction output compression type. * * Default: snappy * * If set to {@link CompressionType#DISABLE_COMPRESSION_OPTION}, * RocksDB will choose compression type according to the * {@link ColumnFamilyOptions#compressionType()}, taking into account * the output level if {@link ColumnFamilyOptions#compressionPerLevel()} * is specified. * * @param compression the compression type to use for compaction output. * * @return the instance of the current Options. */ public CompactionOptions setCompression(final CompressionType compression) { setCompression(nativeHandle_, compression.getValue()); return this; } /** * Get the compaction output file size limit. * * See {@link #setOutputFileSizeLimit(long)}. * * @return the file size limit. */ public long outputFileSizeLimit() { return outputFileSizeLimit(nativeHandle_); } /** * Compaction will create files of size {@link #outputFileSizeLimit()}. * * Default: 2^64-1, which means that compaction will create a single file * * @param outputFileSizeLimit the size limit * * @return the instance of the current Options. */ public CompactionOptions setOutputFileSizeLimit( final long outputFileSizeLimit) { setOutputFileSizeLimit(nativeHandle_, outputFileSizeLimit); return this; } /** * Get the maximum number of threads that will concurrently perform a * compaction job. * * @return the maximum number of threads. */ public int maxSubcompactions() { return maxSubcompactions(nativeHandle_); } /** * This value represents the maximum number of threads that will * concurrently perform a compaction job by breaking it into multiple, * smaller ones that are run simultaneously. * * Default: 0 (i.e. no subcompactions) * * If > 0, it will replace the option in * {@link DBOptions#maxSubcompactions()} for this compaction. * * @param maxSubcompactions The maximum number of threads that will * concurrently perform a compaction job * * @return the instance of the current Options. */ public CompactionOptions setMaxSubcompactions(final int maxSubcompactions) { setMaxSubcompactions(nativeHandle_, maxSubcompactions); return this; } private static native long newCompactionOptions(); @Override protected final native void disposeInternal(final long handle); private static native byte compression(final long handle); private static native void setCompression(final long handle, final byte compressionTypeValue); private static native long outputFileSizeLimit(final long handle); private static native void setOutputFileSizeLimit(final long handle, final long outputFileSizeLimit); private static native int maxSubcompactions(final long handle); private static native void setMaxSubcompactions(final long handle, final int maxSubcompactions); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java000066400000000000000000000051231370372246700260200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Options for FIFO Compaction */ public class CompactionOptionsFIFO extends RocksObject { public CompactionOptionsFIFO() { super(newCompactionOptionsFIFO()); } /** * Once the total sum of table files reaches this, we will delete the oldest * table file * * Default: 1GB * * @param maxTableFilesSize The maximum size of the table files * * @return the reference to the current options. */ public CompactionOptionsFIFO setMaxTableFilesSize( final long maxTableFilesSize) { setMaxTableFilesSize(nativeHandle_, maxTableFilesSize); return this; } /** * Once the total sum of table files reaches this, we will delete the oldest * table file * * Default: 1GB * * @return max table file size in bytes */ public long maxTableFilesSize() { return maxTableFilesSize(nativeHandle_); } /** * If true, try to do compaction to compact smaller files into larger ones. * Minimum files to compact follows options.level0_file_num_compaction_trigger * and compaction won't trigger if average compact bytes per del file is * larger than options.write_buffer_size. This is to protect large files * from being compacted again. * * Default: false * * @param allowCompaction true to allow intra-L0 compaction * * @return the reference to the current options. */ public CompactionOptionsFIFO setAllowCompaction( final boolean allowCompaction) { setAllowCompaction(nativeHandle_, allowCompaction); return this; } /** * Check if intra-L0 compaction is enabled. * When enabled, we try to compact smaller files into larger ones. * * See {@link #setAllowCompaction(boolean)}. * * Default: false * * @return true if intra-L0 compaction is enabled, false otherwise. */ public boolean allowCompaction() { return allowCompaction(nativeHandle_); } private native static long newCompactionOptionsFIFO(); @Override protected final native void disposeInternal(final long handle); private native void setMaxTableFilesSize(final long handle, final long maxTableFilesSize); private native long maxTableFilesSize(final long handle); private native void setAllowCompaction(final long handle, final boolean allowCompaction); private native boolean allowCompaction(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java000066400000000000000000000225431370372246700272520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Options for Universal Compaction */ public class CompactionOptionsUniversal extends RocksObject { public CompactionOptionsUniversal() { super(newCompactionOptionsUniversal()); } /** * Percentage flexibility while comparing file size. If the candidate file(s) * size is 1% smaller than the next file's size, then include next file into * this candidate set. * * Default: 1 * * @param sizeRatio The size ratio to use * * @return the reference to the current options. */ public CompactionOptionsUniversal setSizeRatio(final int sizeRatio) { setSizeRatio(nativeHandle_, sizeRatio); return this; } /** * Percentage flexibility while comparing file size. If the candidate file(s) * size is 1% smaller than the next file's size, then include next file into * this candidate set. * * Default: 1 * * @return The size ratio in use */ public int sizeRatio() { return sizeRatio(nativeHandle_); } /** * The minimum number of files in a single compaction run. * * Default: 2 * * @param minMergeWidth minimum number of files in a single compaction run * * @return the reference to the current options. */ public CompactionOptionsUniversal setMinMergeWidth(final int minMergeWidth) { setMinMergeWidth(nativeHandle_, minMergeWidth); return this; } /** * The minimum number of files in a single compaction run. * * Default: 2 * * @return minimum number of files in a single compaction run */ public int minMergeWidth() { return minMergeWidth(nativeHandle_); } /** * The maximum number of files in a single compaction run. * * Default: {@link Long#MAX_VALUE} * * @param maxMergeWidth maximum number of files in a single compaction run * * @return the reference to the current options. */ public CompactionOptionsUniversal setMaxMergeWidth(final int maxMergeWidth) { setMaxMergeWidth(nativeHandle_, maxMergeWidth); return this; } /** * The maximum number of files in a single compaction run. * * Default: {@link Long#MAX_VALUE} * * @return maximum number of files in a single compaction run */ public int maxMergeWidth() { return maxMergeWidth(nativeHandle_); } /** * The size amplification is defined as the amount (in percentage) of * additional storage needed to store a single byte of data in the database. * For example, a size amplification of 2% means that a database that * contains 100 bytes of user-data may occupy upto 102 bytes of * physical storage. By this definition, a fully compacted database has * a size amplification of 0%. Rocksdb uses the following heuristic * to calculate size amplification: it assumes that all files excluding * the earliest file contribute to the size amplification. * * Default: 200, which means that a 100 byte database could require upto * 300 bytes of storage. * * @param maxSizeAmplificationPercent the amount of additional storage needed * (as a percentage) to store a single byte in the database * * @return the reference to the current options. */ public CompactionOptionsUniversal setMaxSizeAmplificationPercent( final int maxSizeAmplificationPercent) { setMaxSizeAmplificationPercent(nativeHandle_, maxSizeAmplificationPercent); return this; } /** * The size amplification is defined as the amount (in percentage) of * additional storage needed to store a single byte of data in the database. * For example, a size amplification of 2% means that a database that * contains 100 bytes of user-data may occupy upto 102 bytes of * physical storage. By this definition, a fully compacted database has * a size amplification of 0%. Rocksdb uses the following heuristic * to calculate size amplification: it assumes that all files excluding * the earliest file contribute to the size amplification. * * Default: 200, which means that a 100 byte database could require upto * 300 bytes of storage. * * @return the amount of additional storage needed (as a percentage) to store * a single byte in the database */ public int maxSizeAmplificationPercent() { return maxSizeAmplificationPercent(nativeHandle_); } /** * If this option is set to be -1 (the default value), all the output files * will follow compression type specified. * * If this option is not negative, we will try to make sure compressed * size is just above this value. In normal cases, at least this percentage * of data will be compressed. * * When we are compacting to a new file, here is the criteria whether * it needs to be compressed: assuming here are the list of files sorted * by generation time: * A1...An B1...Bm C1...Ct * where A1 is the newest and Ct is the oldest, and we are going to compact * B1...Bm, we calculate the total size of all the files as total_size, as * well as the total size of C1...Ct as total_C, the compaction output file * will be compressed iff * total_C / total_size < this percentage * * Default: -1 * * @param compressionSizePercent percentage of size for compression * * @return the reference to the current options. */ public CompactionOptionsUniversal setCompressionSizePercent( final int compressionSizePercent) { setCompressionSizePercent(nativeHandle_, compressionSizePercent); return this; } /** * If this option is set to be -1 (the default value), all the output files * will follow compression type specified. * * If this option is not negative, we will try to make sure compressed * size is just above this value. In normal cases, at least this percentage * of data will be compressed. * * When we are compacting to a new file, here is the criteria whether * it needs to be compressed: assuming here are the list of files sorted * by generation time: * A1...An B1...Bm C1...Ct * where A1 is the newest and Ct is the oldest, and we are going to compact * B1...Bm, we calculate the total size of all the files as total_size, as * well as the total size of C1...Ct as total_C, the compaction output file * will be compressed iff * total_C / total_size < this percentage * * Default: -1 * * @return percentage of size for compression */ public int compressionSizePercent() { return compressionSizePercent(nativeHandle_); } /** * The algorithm used to stop picking files into a single compaction run * * Default: {@link CompactionStopStyle#CompactionStopStyleTotalSize} * * @param compactionStopStyle The compaction algorithm * * @return the reference to the current options. */ public CompactionOptionsUniversal setStopStyle( final CompactionStopStyle compactionStopStyle) { setStopStyle(nativeHandle_, compactionStopStyle.getValue()); return this; } /** * The algorithm used to stop picking files into a single compaction run * * Default: {@link CompactionStopStyle#CompactionStopStyleTotalSize} * * @return The compaction algorithm */ public CompactionStopStyle stopStyle() { return CompactionStopStyle.getCompactionStopStyle(stopStyle(nativeHandle_)); } /** * Option to optimize the universal multi level compaction by enabling * trivial move for non overlapping files. * * Default: false * * @param allowTrivialMove true if trivial move is allowed * * @return the reference to the current options. */ public CompactionOptionsUniversal setAllowTrivialMove( final boolean allowTrivialMove) { setAllowTrivialMove(nativeHandle_, allowTrivialMove); return this; } /** * Option to optimize the universal multi level compaction by enabling * trivial move for non overlapping files. * * Default: false * * @return true if trivial move is allowed */ public boolean allowTrivialMove() { return allowTrivialMove(nativeHandle_); } private native static long newCompactionOptionsUniversal(); @Override protected final native void disposeInternal(final long handle); private native void setSizeRatio(final long handle, final int sizeRatio); private native int sizeRatio(final long handle); private native void setMinMergeWidth( final long handle, final int minMergeWidth); private native int minMergeWidth(final long handle); private native void setMaxMergeWidth( final long handle, final int maxMergeWidth); private native int maxMergeWidth(final long handle); private native void setMaxSizeAmplificationPercent( final long handle, final int maxSizeAmplificationPercent); private native int maxSizeAmplificationPercent(final long handle); private native void setCompressionSizePercent( final long handle, final int compressionSizePercent); private native int compressionSizePercent(final long handle); private native void setStopStyle( final long handle, final byte stopStyle); private native byte stopStyle(final long handle); private native void setAllowTrivialMove( final long handle, final boolean allowTrivialMove); private native boolean allowTrivialMove(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionPriority.java000066400000000000000000000037651370372246700255540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Compaction Priorities */ public enum CompactionPriority { /** * Slightly Prioritize larger files by size compensated by #deletes */ ByCompensatedSize((byte)0x0), /** * First compact files whose data's latest update time is oldest. * Try this if you only update some hot keys in small ranges. */ OldestLargestSeqFirst((byte)0x1), /** * First compact files whose range hasn't been compacted to the next level * for the longest. If your updates are random across the key space, * write amplification is slightly better with this option. */ OldestSmallestSeqFirst((byte)0x2), /** * First compact files whose ratio between overlapping size in next level * and its size is the smallest. It in many cases can optimize write * amplification. */ MinOverlappingRatio((byte)0x3); private final byte value; CompactionPriority(final byte value) { this.value = value; } /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value; } /** * Get CompactionPriority by byte value. * * @param value byte representation of CompactionPriority. * * @return {@link org.rocksdb.CompactionPriority} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static CompactionPriority getCompactionPriority(final byte value) { for (final CompactionPriority compactionPriority : CompactionPriority.values()) { if (compactionPriority.getValue() == value){ return compactionPriority; } } throw new IllegalArgumentException( "Illegal value provided for CompactionPriority."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionReason.java000066400000000000000000000051051370372246700251500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public enum CompactionReason { kUnknown((byte)0x0), /** * [Level] number of L0 files > level0_file_num_compaction_trigger */ kLevelL0FilesNum((byte)0x1), /** * [Level] total size of level > MaxBytesForLevel() */ kLevelMaxLevelSize((byte)0x2), /** * [Universal] Compacting for size amplification */ kUniversalSizeAmplification((byte)0x3), /** * [Universal] Compacting for size ratio */ kUniversalSizeRatio((byte)0x4), /** * [Universal] number of sorted runs > level0_file_num_compaction_trigger */ kUniversalSortedRunNum((byte)0x5), /** * [FIFO] total size > max_table_files_size */ kFIFOMaxSize((byte)0x6), /** * [FIFO] reduce number of files. */ kFIFOReduceNumFiles((byte)0x7), /** * [FIFO] files with creation time < (current_time - interval) */ kFIFOTtl((byte)0x8), /** * Manual compaction */ kManualCompaction((byte)0x9), /** * DB::SuggestCompactRange() marked files for compaction */ kFilesMarkedForCompaction((byte)0x10), /** * [Level] Automatic compaction within bottommost level to cleanup duplicate * versions of same user key, usually due to a released snapshot. */ kBottommostFiles((byte)0x0A), /** * Compaction based on TTL */ kTtl((byte)0x0B), /** * According to the comments in flush_job.cc, RocksDB treats flush as * a level 0 compaction in internal stats. */ kFlush((byte)0x0C), /** * Compaction caused by external sst file ingestion */ kExternalSstIngestion((byte)0x0D); private final byte value; CompactionReason(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value */ byte getValue() { return value; } /** * Get the CompactionReason from the internal representation value. * * @return the compaction reason. * * @throws IllegalArgumentException if the value is unknown. */ static CompactionReason fromValue(final byte value) { for (final CompactionReason compactionReason : CompactionReason.values()) { if(compactionReason.value == value) { return compactionReason; } } throw new IllegalArgumentException( "Illegal value provided for CompactionReason: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionStopStyle.java000066400000000000000000000025211370372246700256660ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * Algorithm used to make a compaction request stop picking new files * into a single compaction run */ public enum CompactionStopStyle { /** * Pick files of similar size */ CompactionStopStyleSimilarSize((byte)0x0), /** * Total size of picked files > next file */ CompactionStopStyleTotalSize((byte)0x1); private final byte value; CompactionStopStyle(final byte value) { this.value = value; } /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value; } /** * Get CompactionStopStyle by byte value. * * @param value byte representation of CompactionStopStyle. * * @return {@link org.rocksdb.CompactionStopStyle} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static CompactionStopStyle getCompactionStopStyle(final byte value) { for (final CompactionStopStyle compactionStopStyle : CompactionStopStyle.values()) { if (compactionStopStyle.getValue() == value){ return compactionStopStyle; } } throw new IllegalArgumentException( "Illegal value provided for CompactionStopStyle."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompactionStyle.java000066400000000000000000000047671370372246700250360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; /** * Enum CompactionStyle * * RocksDB supports different styles of compaction. Available * compaction styles can be chosen using this enumeration. * *
    *
  1. LEVEL - Level based Compaction style
  2. *
  3. UNIVERSAL - Universal Compaction Style is a * compaction style, targeting the use cases requiring lower write * amplification, trading off read amplification and space * amplification.
  4. *
  5. FIFO - FIFO compaction style is the simplest * compaction strategy. It is suited for keeping event log data with * very low overhead (query log for example). It periodically deletes * the old data, so it's basically a TTL compaction style.
  6. *
  7. NONE - Disable background compaction. * Compaction jobs are submitted * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, CompactionJobInfo)} ()}.
  8. *
* * @see * Universal Compaction * @see * FIFO Compaction */ public enum CompactionStyle { LEVEL((byte) 0x0), UNIVERSAL((byte) 0x1), FIFO((byte) 0x2), NONE((byte) 0x3); private final byte value; CompactionStyle(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value. */ //TODO(AR) should be made package-private public byte getValue() { return value; } /** * Get the Compaction style from the internal representation value. * * @param value the internal representation value. * * @return the Compaction style * * @throws IllegalArgumentException if the value does not match a * CompactionStyle */ static CompactionStyle fromValue(final byte value) throws IllegalArgumentException { for (final CompactionStyle compactionStyle : CompactionStyle.values()) { if (compactionStyle.value == value) { return compactionStyle; } } throw new IllegalArgumentException("Unknown value for CompactionStyle: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ComparatorOptions.java000066400000000000000000000113651370372246700253740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * This class controls the behaviour * of Java implementations of * AbstractComparator * * Note that dispose() must be called before a ComparatorOptions * instance becomes out-of-scope to release the allocated memory in C++. */ public class ComparatorOptions extends RocksObject { public ComparatorOptions() { super(newComparatorOptions()); } /** * Get the synchronisation type used to guard the reused buffers. * Only used if {@link #maxReusedBufferSize()} > 0 * Default: {@link ReusedSynchronisationType#ADAPTIVE_MUTEX} * * @return the synchronisation type */ public ReusedSynchronisationType reusedSynchronisationType() { assert(isOwningHandle()); return ReusedSynchronisationType.getReusedSynchronisationType( reusedSynchronisationType(nativeHandle_)); } /** * Set the synchronisation type used to guard the reused buffers. * Only used if {@link #maxReusedBufferSize()} > 0 * Default: {@link ReusedSynchronisationType#ADAPTIVE_MUTEX} * * @param reusedSynchronisationType the synchronisation type * * @return the reference to the current comparator options. */ public ComparatorOptions setReusedSynchronisationType( final ReusedSynchronisationType reusedSynchronisationType) { assert (isOwningHandle()); setReusedSynchronisationType(nativeHandle_, reusedSynchronisationType.getValue()); return this; } /** * Indicates if a direct byte buffer (i.e. outside of the normal * garbage-collected heap) is used, as opposed to a non-direct byte buffer * which is a wrapper around an on-heap byte[]. * * Default: true * * @return true if a direct byte buffer will be used, false otherwise */ public boolean useDirectBuffer() { assert(isOwningHandle()); return useDirectBuffer(nativeHandle_); } /** * Controls whether a direct byte buffer (i.e. outside of the normal * garbage-collected heap) is used, as opposed to a non-direct byte buffer * which is a wrapper around an on-heap byte[]. * * Default: true * * @param useDirectBuffer true if a direct byte buffer should be used, * false otherwise * @return the reference to the current comparator options. */ public ComparatorOptions setUseDirectBuffer(final boolean useDirectBuffer) { assert(isOwningHandle()); setUseDirectBuffer(nativeHandle_, useDirectBuffer); return this; } /** * Maximum size of a buffer (in bytes) that will be reused. * Comparators will use 5 of these buffers, * so the retained memory size will be 5 * max_reused_buffer_size. * When a buffer is needed for transferring data to a callback, * if it requires less than {@code maxReuseBufferSize}, then an * existing buffer will be reused, else a new buffer will be * allocated just for that callback. * * Default: 64 bytes * * @return the maximum size of a buffer which is reused, * or 0 if reuse is disabled */ public int maxReusedBufferSize() { assert(isOwningHandle()); return maxReusedBufferSize(nativeHandle_); } /** * Sets the maximum size of a buffer (in bytes) that will be reused. * Comparators will use 5 of these buffers, * so the retained memory size will be 5 * max_reused_buffer_size. * When a buffer is needed for transferring data to a callback, * if it requires less than {@code maxReuseBufferSize}, then an * existing buffer will be reused, else a new buffer will be * allocated just for that callback. * * Default: 64 bytes * * @param maxReusedBufferSize the maximum size for a buffer to reuse, or 0 to * disable reuse * * @return the maximum size of a buffer which is reused */ public ComparatorOptions setMaxReusedBufferSize(final int maxReusedBufferSize) { assert(isOwningHandle()); setMaxReusedBufferSize(nativeHandle_, maxReusedBufferSize); return this; } private native static long newComparatorOptions(); private native byte reusedSynchronisationType(final long handle); private native void setReusedSynchronisationType(final long handle, final byte reusedSynchronisationType); private native boolean useDirectBuffer(final long handle); private native void setUseDirectBuffer(final long handle, final boolean useDirectBuffer); private native int maxReusedBufferSize(final long handle); private native void setMaxReusedBufferSize(final long handle, final int maxReuseBufferSize); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ComparatorType.java000066400000000000000000000024321370372246700246550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; enum ComparatorType { JAVA_COMPARATOR((byte)0x0), JAVA_NATIVE_COMPARATOR_WRAPPER((byte)0x1); private final byte value; ComparatorType(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ byte getValue() { return value; } /** *

Get the ComparatorType enumeration value by * passing the byte identifier to this method.

* * @param byteIdentifier of ComparatorType. * * @return ComparatorType instance. * * @throws IllegalArgumentException if the comparator type for the byteIdentifier * cannot be found */ static ComparatorType getComparatorType(final byte byteIdentifier) { for (final ComparatorType comparatorType : ComparatorType.values()) { if (comparatorType.getValue() == byteIdentifier) { return comparatorType; } } throw new IllegalArgumentException( "Illegal value provided for ComparatorType."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompressionOptions.java000066400000000000000000000114641370372246700255660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Options for Compression */ public class CompressionOptions extends RocksObject { public CompressionOptions() { super(newCompressionOptions()); } public CompressionOptions setWindowBits(final int windowBits) { setWindowBits(nativeHandle_, windowBits); return this; } public int windowBits() { return windowBits(nativeHandle_); } public CompressionOptions setLevel(final int level) { setLevel(nativeHandle_, level); return this; } public int level() { return level(nativeHandle_); } public CompressionOptions setStrategy(final int strategy) { setStrategy(nativeHandle_, strategy); return this; } public int strategy() { return strategy(nativeHandle_); } /** * Maximum size of dictionary used to prime the compression library. Currently * this dictionary will be constructed by sampling the first output file in a * subcompaction when the target level is bottommost. This dictionary will be * loaded into the compression library before compressing/uncompressing each * data block of subsequent files in the subcompaction. Effectively, this * improves compression ratios when there are repetitions across data blocks. * * A value of 0 indicates the feature is disabled. * * Default: 0. * * @param maxDictBytes Maximum bytes to use for the dictionary * * @return the reference to the current options */ public CompressionOptions setMaxDictBytes(final int maxDictBytes) { setMaxDictBytes(nativeHandle_, maxDictBytes); return this; } /** * Maximum size of dictionary used to prime the compression library. * * @return The maximum bytes to use for the dictionary */ public int maxDictBytes() { return maxDictBytes(nativeHandle_); } /** * Maximum size of training data passed to zstd's dictionary trainer. Using * zstd's dictionary trainer can achieve even better compression ratio * improvements than using {@link #setMaxDictBytes(int)} alone. * * The training data will be used to generate a dictionary * of {@link #maxDictBytes()}. * * Default: 0. * * @param zstdMaxTrainBytes Maximum bytes to use for training ZStd. * * @return the reference to the current options */ public CompressionOptions setZStdMaxTrainBytes(final int zstdMaxTrainBytes) { setZstdMaxTrainBytes(nativeHandle_, zstdMaxTrainBytes); return this; } /** * Maximum size of training data passed to zstd's dictionary trainer. * * @return Maximum bytes to use for training ZStd */ public int zstdMaxTrainBytes() { return zstdMaxTrainBytes(nativeHandle_); } /** * When the compression options are set by the user, it will be set to "true". * For bottommost_compression_opts, to enable it, user must set enabled=true. * Otherwise, bottommost compression will use compression_opts as default * compression options. * * For compression_opts, if compression_opts.enabled=false, it is still * used as compression options for compression process. * * Default: false. * * @param enabled true to use these compression options * for the bottommost_compression_opts, false otherwise * * @return the reference to the current options */ public CompressionOptions setEnabled(final boolean enabled) { setEnabled(nativeHandle_, enabled); return this; } /** * Determine whether these compression options * are used for the bottommost_compression_opts. * * @return true if these compression options are used * for the bottommost_compression_opts, false otherwise */ public boolean enabled() { return enabled(nativeHandle_); } private native static long newCompressionOptions(); @Override protected final native void disposeInternal(final long handle); private native void setWindowBits(final long handle, final int windowBits); private native int windowBits(final long handle); private native void setLevel(final long handle, final int level); private native int level(final long handle); private native void setStrategy(final long handle, final int strategy); private native int strategy(final long handle); private native void setMaxDictBytes(final long handle, final int maxDictBytes); private native int maxDictBytes(final long handle); private native void setZstdMaxTrainBytes(final long handle, final int zstdMaxTrainBytes); private native int zstdMaxTrainBytes(final long handle); private native void setEnabled(final long handle, final boolean enabled); private native boolean enabled(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/CompressionType.java000066400000000000000000000056331370372246700250550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Enum CompressionType * *

DB contents are stored in a set of blocks, each of which holds a * sequence of key,value pairs. Each block may be compressed before * being stored in a file. The following enum describes which * compression method (if any) is used to compress a block.

*/ public enum CompressionType { NO_COMPRESSION((byte) 0x0, null), SNAPPY_COMPRESSION((byte) 0x1, "snappy"), ZLIB_COMPRESSION((byte) 0x2, "z"), BZLIB2_COMPRESSION((byte) 0x3, "bzip2"), LZ4_COMPRESSION((byte) 0x4, "lz4"), LZ4HC_COMPRESSION((byte) 0x5, "lz4hc"), XPRESS_COMPRESSION((byte) 0x6, "xpress"), ZSTD_COMPRESSION((byte)0x7, "zstd"), DISABLE_COMPRESSION_OPTION((byte)0x7F, null); /** *

Get the CompressionType enumeration value by * passing the library name to this method.

* *

If library cannot be found the enumeration * value {@code NO_COMPRESSION} will be returned.

* * @param libraryName compression library name. * * @return CompressionType instance. */ public static CompressionType getCompressionType(String libraryName) { if (libraryName != null) { for (CompressionType compressionType : CompressionType.values()) { if (compressionType.getLibraryName() != null && compressionType.getLibraryName().equals(libraryName)) { return compressionType; } } } return CompressionType.NO_COMPRESSION; } /** *

Get the CompressionType enumeration value by * passing the byte identifier to this method.

* * @param byteIdentifier of CompressionType. * * @return CompressionType instance. * * @throws IllegalArgumentException If CompressionType cannot be found for the * provided byteIdentifier */ public static CompressionType getCompressionType(byte byteIdentifier) { for (final CompressionType compressionType : CompressionType.values()) { if (compressionType.getValue() == byteIdentifier) { return compressionType; } } throw new IllegalArgumentException( "Illegal value provided for CompressionType."); } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ public byte getValue() { return value_; } /** *

Returns the library name of the compression type * identified by the enumeration value.

* * @return library name */ public String getLibraryName() { return libraryName_; } CompressionType(final byte value, final String libraryName) { value_ = value; libraryName_ = libraryName; } private final byte value_; private final String libraryName_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ConfigOptions.java000066400000000000000000000026651370372246700244750ustar00rootroot00000000000000package org.rocksdb; public class ConfigOptions extends RocksObject { static { RocksDB.loadLibrary(); } /** * Construct with default Options */ public ConfigOptions() { super(newConfigOptions()); } public ConfigOptions setDelimiter(final String delimiter) { setDelimiter(nativeHandle_, delimiter); return this; } public ConfigOptions setIgnoreUnknownOptions(final boolean ignore) { setIgnoreUnknownOptions(nativeHandle_, ignore); return this; } public ConfigOptions setEnv(final Env env) { setEnv(nativeHandle_, env.nativeHandle_); return this; } public ConfigOptions setInputStringsEscaped(final boolean escaped) { setInputStringsEscaped(nativeHandle_, escaped); return this; } public ConfigOptions setSanityLevel(final SanityLevel level) { setSanityLevel(nativeHandle_, level.getValue()); return this; } @Override protected final native void disposeInternal(final long handle); private native static long newConfigOptions(); private native static void setEnv(final long handle, final long envHandle); private native static void setDelimiter(final long handle, final String delimiter); private native static void setIgnoreUnknownOptions(final long handle, final boolean ignore); private native static void setInputStringsEscaped(final long handle, final boolean escaped); private native static void setSanityLevel(final long handle, final byte level); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/DBOptions.java000066400000000000000000001241451370372246700235530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.file.Paths; import java.util.*; /** * DBOptions to control the behavior of a database. It will be used * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). * * If {@link #dispose()} function is not called, then it will be GC'd * automatically and native resources will be released as part of the process. */ public class DBOptions extends RocksObject implements DBOptionsInterface, MutableDBOptionsInterface { static { RocksDB.loadLibrary(); } /** * Construct DBOptions. * * This constructor will create (by allocating a block of memory) * an {@code rocksdb::DBOptions} in the c++ side. */ public DBOptions() { super(newDBOptions()); numShardBits_ = DEFAULT_NUM_SHARD_BITS; } /** * Copy constructor for DBOptions. * * NOTE: This does a shallow copy, which means env, rate_limiter, sst_file_manager, * info_log and other pointers will be cloned! * * @param other The DBOptions to copy. */ public DBOptions(DBOptions other) { super(copyDBOptions(other.nativeHandle_)); this.env_ = other.env_; this.numShardBits_ = other.numShardBits_; this.rateLimiter_ = other.rateLimiter_; this.rowCache_ = other.rowCache_; this.walFilter_ = other.walFilter_; this.writeBufferManager_ = other.writeBufferManager_; } /** * Constructor from Options * * @param options The options. */ public DBOptions(final Options options) { super(newDBOptionsFromOptions(options.nativeHandle_)); } /** *

Method to get a options instance by using pre-configured * property values. If one or many values are undefined in * the context of RocksDB the method will return a null * value.

* *

Note: Property keys can be derived from * getter methods within the options class. Example: the method * {@code allowMmapReads()} has a property key: * {@code allow_mmap_reads}.

* * @param cfgOpts The ConfigOptions to control how the string is processed. * @param properties {@link java.util.Properties} instance. * * @return {@link org.rocksdb.DBOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty * {@link java.util.Properties} instance is passed to the method call. */ public static DBOptions getDBOptionsFromProps( final ConfigOptions cfgOpts, final Properties properties) { DBOptions dbOptions = null; final String optionsString = Options.getOptionStringFromProps(properties); final long handle = getDBOptionsFromProps(cfgOpts.nativeHandle_, optionsString); if (handle != 0) { dbOptions = new DBOptions(handle); } return dbOptions; } /** *

Method to get a options instance by using pre-configured * property values. If one or many values are undefined in * the context of RocksDB the method will return a null * value.

* *

Note: Property keys can be derived from * getter methods within the options class. Example: the method * {@code allowMmapReads()} has a property key: * {@code allow_mmap_reads}.

* * @param properties {@link java.util.Properties} instance. * * @return {@link org.rocksdb.DBOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty * {@link java.util.Properties} instance is passed to the method call. */ public static DBOptions getDBOptionsFromProps(final Properties properties) { DBOptions dbOptions = null; final String optionsString = Options.getOptionStringFromProps(properties); final long handle = getDBOptionsFromProps(optionsString); if (handle != 0) { dbOptions = new DBOptions(handle); } return dbOptions; } @Override public DBOptions optimizeForSmallDb() { optimizeForSmallDb(nativeHandle_); return this; } @Override public DBOptions setIncreaseParallelism( final int totalThreads) { assert(isOwningHandle()); setIncreaseParallelism(nativeHandle_, totalThreads); return this; } @Override public DBOptions setCreateIfMissing(final boolean flag) { assert(isOwningHandle()); setCreateIfMissing(nativeHandle_, flag); return this; } @Override public boolean createIfMissing() { assert(isOwningHandle()); return createIfMissing(nativeHandle_); } @Override public DBOptions setCreateMissingColumnFamilies( final boolean flag) { assert(isOwningHandle()); setCreateMissingColumnFamilies(nativeHandle_, flag); return this; } @Override public boolean createMissingColumnFamilies() { assert(isOwningHandle()); return createMissingColumnFamilies(nativeHandle_); } @Override public DBOptions setErrorIfExists( final boolean errorIfExists) { assert(isOwningHandle()); setErrorIfExists(nativeHandle_, errorIfExists); return this; } @Override public boolean errorIfExists() { assert(isOwningHandle()); return errorIfExists(nativeHandle_); } @Override public DBOptions setParanoidChecks( final boolean paranoidChecks) { assert(isOwningHandle()); setParanoidChecks(nativeHandle_, paranoidChecks); return this; } @Override public boolean paranoidChecks() { assert(isOwningHandle()); return paranoidChecks(nativeHandle_); } @Override public DBOptions setEnv(final Env env) { setEnv(nativeHandle_, env.nativeHandle_); this.env_ = env; return this; } @Override public Env getEnv() { return env_; } @Override public DBOptions setRateLimiter(final RateLimiter rateLimiter) { assert(isOwningHandle()); rateLimiter_ = rateLimiter; setRateLimiter(nativeHandle_, rateLimiter.nativeHandle_); return this; } @Override public DBOptions setSstFileManager(final SstFileManager sstFileManager) { assert(isOwningHandle()); setSstFileManager(nativeHandle_, sstFileManager.nativeHandle_); return this; } @Override public DBOptions setLogger(final Logger logger) { assert(isOwningHandle()); setLogger(nativeHandle_, logger.nativeHandle_); return this; } @Override public DBOptions setInfoLogLevel( final InfoLogLevel infoLogLevel) { assert(isOwningHandle()); setInfoLogLevel(nativeHandle_, infoLogLevel.getValue()); return this; } @Override public InfoLogLevel infoLogLevel() { assert(isOwningHandle()); return InfoLogLevel.getInfoLogLevel( infoLogLevel(nativeHandle_)); } @Override public DBOptions setMaxOpenFiles( final int maxOpenFiles) { assert(isOwningHandle()); setMaxOpenFiles(nativeHandle_, maxOpenFiles); return this; } @Override public int maxOpenFiles() { assert(isOwningHandle()); return maxOpenFiles(nativeHandle_); } @Override public DBOptions setMaxFileOpeningThreads(final int maxFileOpeningThreads) { assert(isOwningHandle()); setMaxFileOpeningThreads(nativeHandle_, maxFileOpeningThreads); return this; } @Override public int maxFileOpeningThreads() { assert(isOwningHandle()); return maxFileOpeningThreads(nativeHandle_); } @Override public DBOptions setMaxTotalWalSize( final long maxTotalWalSize) { assert(isOwningHandle()); setMaxTotalWalSize(nativeHandle_, maxTotalWalSize); return this; } @Override public long maxTotalWalSize() { assert(isOwningHandle()); return maxTotalWalSize(nativeHandle_); } @Override public DBOptions setStatistics(final Statistics statistics) { assert(isOwningHandle()); setStatistics(nativeHandle_, statistics.nativeHandle_); return this; } @Override public Statistics statistics() { assert(isOwningHandle()); final long statisticsNativeHandle = statistics(nativeHandle_); if(statisticsNativeHandle == 0) { return null; } else { return new Statistics(statisticsNativeHandle); } } @Override public DBOptions setUseFsync( final boolean useFsync) { assert(isOwningHandle()); setUseFsync(nativeHandle_, useFsync); return this; } @Override public boolean useFsync() { assert(isOwningHandle()); return useFsync(nativeHandle_); } @Override public DBOptions setDbPaths(final Collection dbPaths) { assert(isOwningHandle()); final int len = dbPaths.size(); final String[] paths = new String[len]; final long[] targetSizes = new long[len]; int i = 0; for(final DbPath dbPath : dbPaths) { paths[i] = dbPath.path.toString(); targetSizes[i] = dbPath.targetSize; i++; } setDbPaths(nativeHandle_, paths, targetSizes); return this; } @Override public List dbPaths() { final int len = (int)dbPathsLen(nativeHandle_); if(len == 0) { return Collections.emptyList(); } else { final String[] paths = new String[len]; final long[] targetSizes = new long[len]; dbPaths(nativeHandle_, paths, targetSizes); final List dbPaths = new ArrayList<>(); for(int i = 0; i < len; i++) { dbPaths.add(new DbPath(Paths.get(paths[i]), targetSizes[i])); } return dbPaths; } } @Override public DBOptions setDbLogDir( final String dbLogDir) { assert(isOwningHandle()); setDbLogDir(nativeHandle_, dbLogDir); return this; } @Override public String dbLogDir() { assert(isOwningHandle()); return dbLogDir(nativeHandle_); } @Override public DBOptions setWalDir( final String walDir) { assert(isOwningHandle()); setWalDir(nativeHandle_, walDir); return this; } @Override public String walDir() { assert(isOwningHandle()); return walDir(nativeHandle_); } @Override public DBOptions setDeleteObsoleteFilesPeriodMicros( final long micros) { assert(isOwningHandle()); setDeleteObsoleteFilesPeriodMicros(nativeHandle_, micros); return this; } @Override public long deleteObsoleteFilesPeriodMicros() { assert(isOwningHandle()); return deleteObsoleteFilesPeriodMicros(nativeHandle_); } @Override public DBOptions setMaxBackgroundJobs(final int maxBackgroundJobs) { assert(isOwningHandle()); setMaxBackgroundJobs(nativeHandle_, maxBackgroundJobs); return this; } @Override public int maxBackgroundJobs() { assert(isOwningHandle()); return maxBackgroundJobs(nativeHandle_); } @Override @Deprecated public void setBaseBackgroundCompactions( final int baseBackgroundCompactions) { assert(isOwningHandle()); setBaseBackgroundCompactions(nativeHandle_, baseBackgroundCompactions); } @Override public int baseBackgroundCompactions() { assert(isOwningHandle()); return baseBackgroundCompactions(nativeHandle_); } @Override @Deprecated public DBOptions setMaxBackgroundCompactions( final int maxBackgroundCompactions) { assert(isOwningHandle()); setMaxBackgroundCompactions(nativeHandle_, maxBackgroundCompactions); return this; } @Override @Deprecated public int maxBackgroundCompactions() { assert(isOwningHandle()); return maxBackgroundCompactions(nativeHandle_); } @Override public DBOptions setMaxSubcompactions(final int maxSubcompactions) { assert(isOwningHandle()); setMaxSubcompactions(nativeHandle_, maxSubcompactions); return this; } @Override public int maxSubcompactions() { assert(isOwningHandle()); return maxSubcompactions(nativeHandle_); } @Override @Deprecated public DBOptions setMaxBackgroundFlushes( final int maxBackgroundFlushes) { assert(isOwningHandle()); setMaxBackgroundFlushes(nativeHandle_, maxBackgroundFlushes); return this; } @Override @Deprecated public int maxBackgroundFlushes() { assert(isOwningHandle()); return maxBackgroundFlushes(nativeHandle_); } @Override public DBOptions setMaxLogFileSize(final long maxLogFileSize) { assert(isOwningHandle()); setMaxLogFileSize(nativeHandle_, maxLogFileSize); return this; } @Override public long maxLogFileSize() { assert(isOwningHandle()); return maxLogFileSize(nativeHandle_); } @Override public DBOptions setLogFileTimeToRoll( final long logFileTimeToRoll) { assert(isOwningHandle()); setLogFileTimeToRoll(nativeHandle_, logFileTimeToRoll); return this; } @Override public long logFileTimeToRoll() { assert(isOwningHandle()); return logFileTimeToRoll(nativeHandle_); } @Override public DBOptions setKeepLogFileNum( final long keepLogFileNum) { assert(isOwningHandle()); setKeepLogFileNum(nativeHandle_, keepLogFileNum); return this; } @Override public long keepLogFileNum() { assert(isOwningHandle()); return keepLogFileNum(nativeHandle_); } @Override public DBOptions setRecycleLogFileNum(final long recycleLogFileNum) { assert(isOwningHandle()); setRecycleLogFileNum(nativeHandle_, recycleLogFileNum); return this; } @Override public long recycleLogFileNum() { assert(isOwningHandle()); return recycleLogFileNum(nativeHandle_); } @Override public DBOptions setMaxManifestFileSize( final long maxManifestFileSize) { assert(isOwningHandle()); setMaxManifestFileSize(nativeHandle_, maxManifestFileSize); return this; } @Override public long maxManifestFileSize() { assert(isOwningHandle()); return maxManifestFileSize(nativeHandle_); } @Override public DBOptions setTableCacheNumshardbits( final int tableCacheNumshardbits) { assert(isOwningHandle()); setTableCacheNumshardbits(nativeHandle_, tableCacheNumshardbits); return this; } @Override public int tableCacheNumshardbits() { assert(isOwningHandle()); return tableCacheNumshardbits(nativeHandle_); } @Override public DBOptions setWalTtlSeconds( final long walTtlSeconds) { assert(isOwningHandle()); setWalTtlSeconds(nativeHandle_, walTtlSeconds); return this; } @Override public long walTtlSeconds() { assert(isOwningHandle()); return walTtlSeconds(nativeHandle_); } @Override public DBOptions setWalSizeLimitMB( final long sizeLimitMB) { assert(isOwningHandle()); setWalSizeLimitMB(nativeHandle_, sizeLimitMB); return this; } @Override public long walSizeLimitMB() { assert(isOwningHandle()); return walSizeLimitMB(nativeHandle_); } @Override public DBOptions setManifestPreallocationSize( final long size) { assert(isOwningHandle()); setManifestPreallocationSize(nativeHandle_, size); return this; } @Override public long manifestPreallocationSize() { assert(isOwningHandle()); return manifestPreallocationSize(nativeHandle_); } @Override public DBOptions setAllowMmapReads( final boolean allowMmapReads) { assert(isOwningHandle()); setAllowMmapReads(nativeHandle_, allowMmapReads); return this; } @Override public boolean allowMmapReads() { assert(isOwningHandle()); return allowMmapReads(nativeHandle_); } @Override public DBOptions setAllowMmapWrites( final boolean allowMmapWrites) { assert(isOwningHandle()); setAllowMmapWrites(nativeHandle_, allowMmapWrites); return this; } @Override public boolean allowMmapWrites() { assert(isOwningHandle()); return allowMmapWrites(nativeHandle_); } @Override public DBOptions setUseDirectReads( final boolean useDirectReads) { assert(isOwningHandle()); setUseDirectReads(nativeHandle_, useDirectReads); return this; } @Override public boolean useDirectReads() { assert(isOwningHandle()); return useDirectReads(nativeHandle_); } @Override public DBOptions setUseDirectIoForFlushAndCompaction( final boolean useDirectIoForFlushAndCompaction) { assert(isOwningHandle()); setUseDirectIoForFlushAndCompaction(nativeHandle_, useDirectIoForFlushAndCompaction); return this; } @Override public boolean useDirectIoForFlushAndCompaction() { assert(isOwningHandle()); return useDirectIoForFlushAndCompaction(nativeHandle_); } @Override public DBOptions setAllowFAllocate(final boolean allowFAllocate) { assert(isOwningHandle()); setAllowFAllocate(nativeHandle_, allowFAllocate); return this; } @Override public boolean allowFAllocate() { assert(isOwningHandle()); return allowFAllocate(nativeHandle_); } @Override public DBOptions setIsFdCloseOnExec( final boolean isFdCloseOnExec) { assert(isOwningHandle()); setIsFdCloseOnExec(nativeHandle_, isFdCloseOnExec); return this; } @Override public boolean isFdCloseOnExec() { assert(isOwningHandle()); return isFdCloseOnExec(nativeHandle_); } @Override public DBOptions setStatsDumpPeriodSec( final int statsDumpPeriodSec) { assert(isOwningHandle()); setStatsDumpPeriodSec(nativeHandle_, statsDumpPeriodSec); return this; } @Override public int statsDumpPeriodSec() { assert(isOwningHandle()); return statsDumpPeriodSec(nativeHandle_); } @Override public DBOptions setStatsPersistPeriodSec( final int statsPersistPeriodSec) { assert(isOwningHandle()); setStatsPersistPeriodSec(nativeHandle_, statsPersistPeriodSec); return this; } @Override public int statsPersistPeriodSec() { assert(isOwningHandle()); return statsPersistPeriodSec(nativeHandle_); } @Override public DBOptions setStatsHistoryBufferSize( final long statsHistoryBufferSize) { assert(isOwningHandle()); setStatsHistoryBufferSize(nativeHandle_, statsHistoryBufferSize); return this; } @Override public long statsHistoryBufferSize() { assert(isOwningHandle()); return statsHistoryBufferSize(nativeHandle_); } @Override public DBOptions setAdviseRandomOnOpen( final boolean adviseRandomOnOpen) { assert(isOwningHandle()); setAdviseRandomOnOpen(nativeHandle_, adviseRandomOnOpen); return this; } @Override public boolean adviseRandomOnOpen() { return adviseRandomOnOpen(nativeHandle_); } @Override public DBOptions setDbWriteBufferSize(final long dbWriteBufferSize) { assert(isOwningHandle()); setDbWriteBufferSize(nativeHandle_, dbWriteBufferSize); return this; } @Override public DBOptions setWriteBufferManager(final WriteBufferManager writeBufferManager) { assert(isOwningHandle()); setWriteBufferManager(nativeHandle_, writeBufferManager.nativeHandle_); this.writeBufferManager_ = writeBufferManager; return this; } @Override public WriteBufferManager writeBufferManager() { assert(isOwningHandle()); return this.writeBufferManager_; } @Override public long dbWriteBufferSize() { assert(isOwningHandle()); return dbWriteBufferSize(nativeHandle_); } @Override public DBOptions setAccessHintOnCompactionStart(final AccessHint accessHint) { assert(isOwningHandle()); setAccessHintOnCompactionStart(nativeHandle_, accessHint.getValue()); return this; } @Override public AccessHint accessHintOnCompactionStart() { assert(isOwningHandle()); return AccessHint.getAccessHint(accessHintOnCompactionStart(nativeHandle_)); } @Override public DBOptions setNewTableReaderForCompactionInputs( final boolean newTableReaderForCompactionInputs) { assert(isOwningHandle()); setNewTableReaderForCompactionInputs(nativeHandle_, newTableReaderForCompactionInputs); return this; } @Override public boolean newTableReaderForCompactionInputs() { assert(isOwningHandle()); return newTableReaderForCompactionInputs(nativeHandle_); } @Override public DBOptions setCompactionReadaheadSize(final long compactionReadaheadSize) { assert(isOwningHandle()); setCompactionReadaheadSize(nativeHandle_, compactionReadaheadSize); return this; } @Override public long compactionReadaheadSize() { assert(isOwningHandle()); return compactionReadaheadSize(nativeHandle_); } @Override public DBOptions setRandomAccessMaxBufferSize(final long randomAccessMaxBufferSize) { assert(isOwningHandle()); setRandomAccessMaxBufferSize(nativeHandle_, randomAccessMaxBufferSize); return this; } @Override public long randomAccessMaxBufferSize() { assert(isOwningHandle()); return randomAccessMaxBufferSize(nativeHandle_); } @Override public DBOptions setWritableFileMaxBufferSize(final long writableFileMaxBufferSize) { assert(isOwningHandle()); setWritableFileMaxBufferSize(nativeHandle_, writableFileMaxBufferSize); return this; } @Override public long writableFileMaxBufferSize() { assert(isOwningHandle()); return writableFileMaxBufferSize(nativeHandle_); } @Override public DBOptions setUseAdaptiveMutex( final boolean useAdaptiveMutex) { assert(isOwningHandle()); setUseAdaptiveMutex(nativeHandle_, useAdaptiveMutex); return this; } @Override public boolean useAdaptiveMutex() { assert(isOwningHandle()); return useAdaptiveMutex(nativeHandle_); } @Override public DBOptions setBytesPerSync( final long bytesPerSync) { assert(isOwningHandle()); setBytesPerSync(nativeHandle_, bytesPerSync); return this; } @Override public long bytesPerSync() { return bytesPerSync(nativeHandle_); } @Override public DBOptions setWalBytesPerSync(final long walBytesPerSync) { assert(isOwningHandle()); setWalBytesPerSync(nativeHandle_, walBytesPerSync); return this; } @Override public long walBytesPerSync() { assert(isOwningHandle()); return walBytesPerSync(nativeHandle_); } @Override public DBOptions setStrictBytesPerSync(final boolean strictBytesPerSync) { assert(isOwningHandle()); setStrictBytesPerSync(nativeHandle_, strictBytesPerSync); return this; } @Override public boolean strictBytesPerSync() { assert(isOwningHandle()); return strictBytesPerSync(nativeHandle_); } //TODO(AR) NOW // @Override // public DBOptions setListeners(final List listeners) { // assert(isOwningHandle()); // final long[] eventListenerHandlers = new long[listeners.size()]; // for (int i = 0; i < eventListenerHandlers.length; i++) { // eventListenerHandlers[i] = listeners.get(i).nativeHandle_; // } // setEventListeners(nativeHandle_, eventListenerHandlers); // return this; // } // // @Override // public Collection listeners() { // assert(isOwningHandle()); // final long[] eventListenerHandlers = listeners(nativeHandle_); // if (eventListenerHandlers == null || eventListenerHandlers.length == 0) { // return Collections.emptyList(); // } // // final List eventListeners = new ArrayList<>(); // for (final long eventListenerHandle : eventListenerHandlers) { // eventListeners.add(new EventListener(eventListenerHandle)); //TODO(AR) check ownership is set to false! // } // return eventListeners; // } @Override public DBOptions setEnableThreadTracking(final boolean enableThreadTracking) { assert(isOwningHandle()); setEnableThreadTracking(nativeHandle_, enableThreadTracking); return this; } @Override public boolean enableThreadTracking() { assert(isOwningHandle()); return enableThreadTracking(nativeHandle_); } @Override public DBOptions setDelayedWriteRate(final long delayedWriteRate) { assert(isOwningHandle()); setDelayedWriteRate(nativeHandle_, delayedWriteRate); return this; } @Override public long delayedWriteRate(){ return delayedWriteRate(nativeHandle_); } @Override public DBOptions setEnablePipelinedWrite(final boolean enablePipelinedWrite) { assert(isOwningHandle()); setEnablePipelinedWrite(nativeHandle_, enablePipelinedWrite); return this; } @Override public boolean enablePipelinedWrite() { assert(isOwningHandle()); return enablePipelinedWrite(nativeHandle_); } @Override public DBOptions setUnorderedWrite(final boolean unorderedWrite) { setUnorderedWrite(nativeHandle_, unorderedWrite); return this; } @Override public boolean unorderedWrite() { return unorderedWrite(nativeHandle_); } @Override public DBOptions setAllowConcurrentMemtableWrite( final boolean allowConcurrentMemtableWrite) { setAllowConcurrentMemtableWrite(nativeHandle_, allowConcurrentMemtableWrite); return this; } @Override public boolean allowConcurrentMemtableWrite() { return allowConcurrentMemtableWrite(nativeHandle_); } @Override public DBOptions setEnableWriteThreadAdaptiveYield( final boolean enableWriteThreadAdaptiveYield) { setEnableWriteThreadAdaptiveYield(nativeHandle_, enableWriteThreadAdaptiveYield); return this; } @Override public boolean enableWriteThreadAdaptiveYield() { return enableWriteThreadAdaptiveYield(nativeHandle_); } @Override public DBOptions setWriteThreadMaxYieldUsec(final long writeThreadMaxYieldUsec) { setWriteThreadMaxYieldUsec(nativeHandle_, writeThreadMaxYieldUsec); return this; } @Override public long writeThreadMaxYieldUsec() { return writeThreadMaxYieldUsec(nativeHandle_); } @Override public DBOptions setWriteThreadSlowYieldUsec(final long writeThreadSlowYieldUsec) { setWriteThreadSlowYieldUsec(nativeHandle_, writeThreadSlowYieldUsec); return this; } @Override public long writeThreadSlowYieldUsec() { return writeThreadSlowYieldUsec(nativeHandle_); } @Override public DBOptions setSkipStatsUpdateOnDbOpen(final boolean skipStatsUpdateOnDbOpen) { assert(isOwningHandle()); setSkipStatsUpdateOnDbOpen(nativeHandle_, skipStatsUpdateOnDbOpen); return this; } @Override public boolean skipStatsUpdateOnDbOpen() { assert(isOwningHandle()); return skipStatsUpdateOnDbOpen(nativeHandle_); } @Override public DBOptions setWalRecoveryMode(final WALRecoveryMode walRecoveryMode) { assert(isOwningHandle()); setWalRecoveryMode(nativeHandle_, walRecoveryMode.getValue()); return this; } @Override public WALRecoveryMode walRecoveryMode() { assert(isOwningHandle()); return WALRecoveryMode.getWALRecoveryMode(walRecoveryMode(nativeHandle_)); } @Override public DBOptions setAllow2pc(final boolean allow2pc) { assert(isOwningHandle()); setAllow2pc(nativeHandle_, allow2pc); return this; } @Override public boolean allow2pc() { assert(isOwningHandle()); return allow2pc(nativeHandle_); } @Override public DBOptions setRowCache(final Cache rowCache) { assert(isOwningHandle()); setRowCache(nativeHandle_, rowCache.nativeHandle_); this.rowCache_ = rowCache; return this; } @Override public Cache rowCache() { assert(isOwningHandle()); return this.rowCache_; } @Override public DBOptions setWalFilter(final AbstractWalFilter walFilter) { assert(isOwningHandle()); setWalFilter(nativeHandle_, walFilter.nativeHandle_); this.walFilter_ = walFilter; return this; } @Override public WalFilter walFilter() { assert(isOwningHandle()); return this.walFilter_; } @Override public DBOptions setFailIfOptionsFileError(final boolean failIfOptionsFileError) { assert(isOwningHandle()); setFailIfOptionsFileError(nativeHandle_, failIfOptionsFileError); return this; } @Override public boolean failIfOptionsFileError() { assert(isOwningHandle()); return failIfOptionsFileError(nativeHandle_); } @Override public DBOptions setDumpMallocStats(final boolean dumpMallocStats) { assert(isOwningHandle()); setDumpMallocStats(nativeHandle_, dumpMallocStats); return this; } @Override public boolean dumpMallocStats() { assert(isOwningHandle()); return dumpMallocStats(nativeHandle_); } @Override public DBOptions setAvoidFlushDuringRecovery(final boolean avoidFlushDuringRecovery) { assert(isOwningHandle()); setAvoidFlushDuringRecovery(nativeHandle_, avoidFlushDuringRecovery); return this; } @Override public boolean avoidFlushDuringRecovery() { assert(isOwningHandle()); return avoidFlushDuringRecovery(nativeHandle_); } @Override public DBOptions setAvoidFlushDuringShutdown(final boolean avoidFlushDuringShutdown) { assert(isOwningHandle()); setAvoidFlushDuringShutdown(nativeHandle_, avoidFlushDuringShutdown); return this; } @Override public boolean avoidFlushDuringShutdown() { assert(isOwningHandle()); return avoidFlushDuringShutdown(nativeHandle_); } @Override public DBOptions setAllowIngestBehind(final boolean allowIngestBehind) { assert(isOwningHandle()); setAllowIngestBehind(nativeHandle_, allowIngestBehind); return this; } @Override public boolean allowIngestBehind() { assert(isOwningHandle()); return allowIngestBehind(nativeHandle_); } @Override public DBOptions setPreserveDeletes(final boolean preserveDeletes) { assert(isOwningHandle()); setPreserveDeletes(nativeHandle_, preserveDeletes); return this; } @Override public boolean preserveDeletes() { assert(isOwningHandle()); return preserveDeletes(nativeHandle_); } @Override public DBOptions setTwoWriteQueues(final boolean twoWriteQueues) { assert(isOwningHandle()); setTwoWriteQueues(nativeHandle_, twoWriteQueues); return this; } @Override public boolean twoWriteQueues() { assert(isOwningHandle()); return twoWriteQueues(nativeHandle_); } @Override public DBOptions setManualWalFlush(final boolean manualWalFlush) { assert(isOwningHandle()); setManualWalFlush(nativeHandle_, manualWalFlush); return this; } @Override public boolean manualWalFlush() { assert(isOwningHandle()); return manualWalFlush(nativeHandle_); } @Override public DBOptions setAtomicFlush(final boolean atomicFlush) { setAtomicFlush(nativeHandle_, atomicFlush); return this; } @Override public boolean atomicFlush() { return atomicFlush(nativeHandle_); } static final int DEFAULT_NUM_SHARD_BITS = -1; /** *

Private constructor to be used by * {@link #getDBOptionsFromProps(java.util.Properties)}

* * @param nativeHandle native handle to DBOptions instance. */ private DBOptions(final long nativeHandle) { super(nativeHandle); } private static native long getDBOptionsFromProps(long cfgHandle, String optString); private static native long getDBOptionsFromProps(String optString); private static native long newDBOptions(); private static native long copyDBOptions(final long handle); private static native long newDBOptionsFromOptions(final long optionsHandle); @Override protected final native void disposeInternal(final long handle); private native void optimizeForSmallDb(final long handle); private native void setIncreaseParallelism(long handle, int totalThreads); private native void setCreateIfMissing(long handle, boolean flag); private native boolean createIfMissing(long handle); private native void setCreateMissingColumnFamilies( long handle, boolean flag); private native boolean createMissingColumnFamilies(long handle); private native void setEnv(long handle, long envHandle); private native void setErrorIfExists(long handle, boolean errorIfExists); private native boolean errorIfExists(long handle); private native void setParanoidChecks( long handle, boolean paranoidChecks); private native boolean paranoidChecks(long handle); private native void setRateLimiter(long handle, long rateLimiterHandle); private native void setSstFileManager(final long handle, final long sstFileManagerHandle); private native void setLogger(long handle, long loggerHandle); private native void setInfoLogLevel(long handle, byte logLevel); private native byte infoLogLevel(long handle); private native void setMaxOpenFiles(long handle, int maxOpenFiles); private native int maxOpenFiles(long handle); private native void setMaxFileOpeningThreads(final long handle, final int maxFileOpeningThreads); private native int maxFileOpeningThreads(final long handle); private native void setMaxTotalWalSize(long handle, long maxTotalWalSize); private native long maxTotalWalSize(long handle); private native void setStatistics(final long handle, final long statisticsHandle); private native long statistics(final long handle); private native boolean useFsync(long handle); private native void setUseFsync(long handle, boolean useFsync); private native void setDbPaths(final long handle, final String[] paths, final long[] targetSizes); private native long dbPathsLen(final long handle); private native void dbPaths(final long handle, final String[] paths, final long[] targetSizes); private native void setDbLogDir(long handle, String dbLogDir); private native String dbLogDir(long handle); private native void setWalDir(long handle, String walDir); private native String walDir(long handle); private native void setDeleteObsoleteFilesPeriodMicros( long handle, long micros); private native long deleteObsoleteFilesPeriodMicros(long handle); private native void setBaseBackgroundCompactions(long handle, int baseBackgroundCompactions); private native int baseBackgroundCompactions(long handle); private native void setMaxBackgroundCompactions( long handle, int maxBackgroundCompactions); private native int maxBackgroundCompactions(long handle); private native void setMaxSubcompactions(long handle, int maxSubcompactions); private native int maxSubcompactions(long handle); private native void setMaxBackgroundFlushes( long handle, int maxBackgroundFlushes); private native int maxBackgroundFlushes(long handle); private native void setMaxBackgroundJobs(long handle, int maxBackgroundJobs); private native int maxBackgroundJobs(long handle); private native void setMaxLogFileSize(long handle, long maxLogFileSize) throws IllegalArgumentException; private native long maxLogFileSize(long handle); private native void setLogFileTimeToRoll( long handle, long logFileTimeToRoll) throws IllegalArgumentException; private native long logFileTimeToRoll(long handle); private native void setKeepLogFileNum(long handle, long keepLogFileNum) throws IllegalArgumentException; private native long keepLogFileNum(long handle); private native void setRecycleLogFileNum(long handle, long recycleLogFileNum); private native long recycleLogFileNum(long handle); private native void setMaxManifestFileSize( long handle, long maxManifestFileSize); private native long maxManifestFileSize(long handle); private native void setTableCacheNumshardbits( long handle, int tableCacheNumshardbits); private native int tableCacheNumshardbits(long handle); private native void setWalTtlSeconds(long handle, long walTtlSeconds); private native long walTtlSeconds(long handle); private native void setWalSizeLimitMB(long handle, long sizeLimitMB); private native long walSizeLimitMB(long handle); private native void setManifestPreallocationSize( long handle, long size) throws IllegalArgumentException; private native long manifestPreallocationSize(long handle); private native void setUseDirectReads(long handle, boolean useDirectReads); private native boolean useDirectReads(long handle); private native void setUseDirectIoForFlushAndCompaction( long handle, boolean useDirectIoForFlushAndCompaction); private native boolean useDirectIoForFlushAndCompaction(long handle); private native void setAllowFAllocate(final long handle, final boolean allowFAllocate); private native boolean allowFAllocate(final long handle); private native void setAllowMmapReads( long handle, boolean allowMmapReads); private native boolean allowMmapReads(long handle); private native void setAllowMmapWrites( long handle, boolean allowMmapWrites); private native boolean allowMmapWrites(long handle); private native void setIsFdCloseOnExec( long handle, boolean isFdCloseOnExec); private native boolean isFdCloseOnExec(long handle); private native void setStatsDumpPeriodSec( long handle, int statsDumpPeriodSec); private native int statsDumpPeriodSec(long handle); private native void setStatsPersistPeriodSec( final long handle, final int statsPersistPeriodSec); private native int statsPersistPeriodSec( final long handle); private native void setStatsHistoryBufferSize( final long handle, final long statsHistoryBufferSize); private native long statsHistoryBufferSize( final long handle); private native void setAdviseRandomOnOpen( long handle, boolean adviseRandomOnOpen); private native boolean adviseRandomOnOpen(long handle); private native void setDbWriteBufferSize(final long handle, final long dbWriteBufferSize); private native void setWriteBufferManager(final long dbOptionsHandle, final long writeBufferManagerHandle); private native long dbWriteBufferSize(final long handle); private native void setAccessHintOnCompactionStart(final long handle, final byte accessHintOnCompactionStart); private native byte accessHintOnCompactionStart(final long handle); private native void setNewTableReaderForCompactionInputs(final long handle, final boolean newTableReaderForCompactionInputs); private native boolean newTableReaderForCompactionInputs(final long handle); private native void setCompactionReadaheadSize(final long handle, final long compactionReadaheadSize); private native long compactionReadaheadSize(final long handle); private native void setRandomAccessMaxBufferSize(final long handle, final long randomAccessMaxBufferSize); private native long randomAccessMaxBufferSize(final long handle); private native void setWritableFileMaxBufferSize(final long handle, final long writableFileMaxBufferSize); private native long writableFileMaxBufferSize(final long handle); private native void setUseAdaptiveMutex( long handle, boolean useAdaptiveMutex); private native boolean useAdaptiveMutex(long handle); private native void setBytesPerSync( long handle, long bytesPerSync); private native long bytesPerSync(long handle); private native void setWalBytesPerSync(long handle, long walBytesPerSync); private native long walBytesPerSync(long handle); private native void setStrictBytesPerSync( final long handle, final boolean strictBytesPerSync); private native boolean strictBytesPerSync( final long handle); private native void setEnableThreadTracking(long handle, boolean enableThreadTracking); private native boolean enableThreadTracking(long handle); private native void setDelayedWriteRate(long handle, long delayedWriteRate); private native long delayedWriteRate(long handle); private native void setEnablePipelinedWrite(final long handle, final boolean enablePipelinedWrite); private native boolean enablePipelinedWrite(final long handle); private native void setUnorderedWrite(final long handle, final boolean unorderedWrite); private native boolean unorderedWrite(final long handle); private native void setAllowConcurrentMemtableWrite(long handle, boolean allowConcurrentMemtableWrite); private native boolean allowConcurrentMemtableWrite(long handle); private native void setEnableWriteThreadAdaptiveYield(long handle, boolean enableWriteThreadAdaptiveYield); private native boolean enableWriteThreadAdaptiveYield(long handle); private native void setWriteThreadMaxYieldUsec(long handle, long writeThreadMaxYieldUsec); private native long writeThreadMaxYieldUsec(long handle); private native void setWriteThreadSlowYieldUsec(long handle, long writeThreadSlowYieldUsec); private native long writeThreadSlowYieldUsec(long handle); private native void setSkipStatsUpdateOnDbOpen(final long handle, final boolean skipStatsUpdateOnDbOpen); private native boolean skipStatsUpdateOnDbOpen(final long handle); private native void setWalRecoveryMode(final long handle, final byte walRecoveryMode); private native byte walRecoveryMode(final long handle); private native void setAllow2pc(final long handle, final boolean allow2pc); private native boolean allow2pc(final long handle); private native void setRowCache(final long handle, final long rowCacheHandle); private native void setWalFilter(final long handle, final long walFilterHandle); private native void setFailIfOptionsFileError(final long handle, final boolean failIfOptionsFileError); private native boolean failIfOptionsFileError(final long handle); private native void setDumpMallocStats(final long handle, final boolean dumpMallocStats); private native boolean dumpMallocStats(final long handle); private native void setAvoidFlushDuringRecovery(final long handle, final boolean avoidFlushDuringRecovery); private native boolean avoidFlushDuringRecovery(final long handle); private native void setAvoidFlushDuringShutdown(final long handle, final boolean avoidFlushDuringShutdown); private native boolean avoidFlushDuringShutdown(final long handle); private native void setAllowIngestBehind(final long handle, final boolean allowIngestBehind); private native boolean allowIngestBehind(final long handle); private native void setPreserveDeletes(final long handle, final boolean preserveDeletes); private native boolean preserveDeletes(final long handle); private native void setTwoWriteQueues(final long handle, final boolean twoWriteQueues); private native boolean twoWriteQueues(final long handle); private native void setManualWalFlush(final long handle, final boolean manualWalFlush); private native boolean manualWalFlush(final long handle); private native void setAtomicFlush(final long handle, final boolean atomicFlush); private native boolean atomicFlush(final long handle); // instance variables // NOTE: If you add new member variables, please update the copy constructor above! private Env env_; private int numShardBits_; private RateLimiter rateLimiter_; private Cache rowCache_; private WalFilter walFilter_; private WriteBufferManager writeBufferManager_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/DBOptionsInterface.java000066400000000000000000001534531370372246700254000ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Collection; import java.util.List; public interface DBOptionsInterface> { /** * Use this if your DB is very small (like under 1GB) and you don't want to * spend lots of memory for memtables. * * @return the instance of the current object. */ T optimizeForSmallDb(); /** * Use the specified object to interact with the environment, * e.g. to read/write files, schedule background work, etc. * Default: {@link Env#getDefault()} * * @param env {@link Env} instance. * @return the instance of the current Options. */ T setEnv(final Env env); /** * Returns the set RocksEnv instance. * * @return {@link RocksEnv} instance set in the options. */ Env getEnv(); /** *

By default, RocksDB uses only one background thread for flush and * compaction. Calling this function will set it up such that total of * `total_threads` is used.

* *

You almost definitely want to call this function if your system is * bottlenecked by RocksDB.

* * @param totalThreads The total number of threads to be used by RocksDB. * A good value is the number of cores. * * @return the instance of the current Options */ T setIncreaseParallelism(int totalThreads); /** * If this value is set to true, then the database will be created * if it is missing during {@code RocksDB.open()}. * Default: false * * @param flag a flag indicating whether to create a database the * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation * is missing. * @return the instance of the current Options * @see RocksDB#open(org.rocksdb.Options, String) */ T setCreateIfMissing(boolean flag); /** * Return true if the create_if_missing flag is set to true. * If true, the database will be created if it is missing. * * @return true if the createIfMissing option is set to true. * @see #setCreateIfMissing(boolean) */ boolean createIfMissing(); /** *

If true, missing column families will be automatically created

* *

Default: false

* * @param flag a flag indicating if missing column families shall be * created automatically. * @return true if missing column families shall be created automatically * on open. */ T setCreateMissingColumnFamilies(boolean flag); /** * Return true if the create_missing_column_families flag is set * to true. If true column families be created if missing. * * @return true if the createMissingColumnFamilies is set to * true. * @see #setCreateMissingColumnFamilies(boolean) */ boolean createMissingColumnFamilies(); /** * If true, an error will be thrown during RocksDB.open() if the * database already exists. * Default: false * * @param errorIfExists if true, an exception will be thrown * during {@code RocksDB.open()} if the database already exists. * @return the reference to the current option. * @see RocksDB#open(org.rocksdb.Options, String) */ T setErrorIfExists(boolean errorIfExists); /** * If true, an error will be thrown during RocksDB.open() if the * database already exists. * * @return if true, an error is raised when the specified database * already exists before open. */ boolean errorIfExists(); /** * If true, the implementation will do aggressive checking of the * data it is processing and will stop early if it detects any * errors. This may have unforeseen ramifications: for example, a * corruption of one DB entry may cause a large number of entries to * become unreadable or for the entire DB to become unopenable. * If any of the writes to the database fails (Put, Delete, Merge, Write), * the database will switch to read-only mode and fail all other * Write operations. * Default: true * * @param paranoidChecks a flag to indicate whether paranoid-check * is on. * @return the reference to the current option. */ T setParanoidChecks(boolean paranoidChecks); /** * If true, the implementation will do aggressive checking of the * data it is processing and will stop early if it detects any * errors. This may have unforeseen ramifications: for example, a * corruption of one DB entry may cause a large number of entries to * become unreadable or for the entire DB to become unopenable. * If any of the writes to the database fails (Put, Delete, Merge, Write), * the database will switch to read-only mode and fail all other * Write operations. * * @return a boolean indicating whether paranoid-check is on. */ boolean paranoidChecks(); /** * Use to control write rate of flush and compaction. Flush has higher * priority than compaction. Rate limiting is disabled if nullptr. * Default: nullptr * * @param rateLimiter {@link org.rocksdb.RateLimiter} instance. * @return the instance of the current object. * * @since 3.10.0 */ T setRateLimiter(RateLimiter rateLimiter); /** * Use to track SST files and control their file deletion rate. * * Features: * - Throttle the deletion rate of the SST files. * - Keep track the total size of all SST files. * - Set a maximum allowed space limit for SST files that when reached * the DB wont do any further flushes or compactions and will set the * background error. * - Can be shared between multiple dbs. * * Limitations: * - Only track and throttle deletes of SST files in * first db_path (db_name if db_paths is empty). * * @param sstFileManager The SST File Manager for the db. * @return the instance of the current object. */ T setSstFileManager(SstFileManager sstFileManager); /** *

Any internal progress/error information generated by * the db will be written to the Logger if it is non-nullptr, * or to a file stored in the same directory as the DB * contents if info_log is nullptr.

* *

Default: nullptr

* * @param logger {@link Logger} instance. * @return the instance of the current object. */ T setLogger(Logger logger); /** *

Sets the RocksDB log level. Default level is INFO

* * @param infoLogLevel log level to set. * @return the instance of the current object. */ T setInfoLogLevel(InfoLogLevel infoLogLevel); /** *

Returns currently set log level.

* @return {@link org.rocksdb.InfoLogLevel} instance. */ InfoLogLevel infoLogLevel(); /** * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open * all files on DB::Open(). You can use this option to increase the number * of threads used to open the files. * * Default: 16 * * @param maxFileOpeningThreads the maximum number of threads to use to * open files * * @return the reference to the current options. */ T setMaxFileOpeningThreads(int maxFileOpeningThreads); /** * If {@link MutableDBOptionsInterface#maxOpenFiles()} is -1, DB will open all * files on DB::Open(). You can use this option to increase the number of * threads used to open the files. * * Default: 16 * * @return the maximum number of threads to use to open files */ int maxFileOpeningThreads(); /** *

Sets the statistics object which collects metrics about database operations. * Statistics objects should not be shared between DB instances as * it does not use any locks to prevent concurrent updates.

* * @param statistics The statistics to set * * @return the instance of the current object. * * @see RocksDB#open(org.rocksdb.Options, String) */ T setStatistics(final Statistics statistics); /** *

Returns statistics object.

* * @return the instance of the statistics object or null if there is no * statistics object. * * @see #setStatistics(Statistics) */ Statistics statistics(); /** *

If true, then every store to stable storage will issue a fsync.

*

If false, then every store to stable storage will issue a fdatasync. * This parameter should be set to true while storing data to * filesystem like ext3 that can lose files after a reboot.

*

Default: false

* * @param useFsync a boolean flag to specify whether to use fsync * @return the instance of the current object. */ T setUseFsync(boolean useFsync); /** *

If true, then every store to stable storage will issue a fsync.

*

If false, then every store to stable storage will issue a fdatasync. * This parameter should be set to true while storing data to * filesystem like ext3 that can lose files after a reboot.

* * @return boolean value indicating if fsync is used. */ boolean useFsync(); /** * A list of paths where SST files can be put into, with its target size. * Newer data is placed into paths specified earlier in the vector while * older data gradually moves to paths specified later in the vector. * * For example, you have a flash device with 10GB allocated for the DB, * as well as a hard drive of 2TB, you should config it to be: * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] * * The system will try to guarantee data under each path is close to but * not larger than the target size. But current and future file sizes used * by determining where to place a file are based on best-effort estimation, * which means there is a chance that the actual size under the directory * is slightly more than target size under some workloads. User should give * some buffer room for those cases. * * If none of the paths has sufficient room to place a file, the file will * be placed to the last path anyway, despite to the target size. * * Placing newer data to earlier paths is also best-efforts. User should * expect user files to be placed in higher levels in some extreme cases. * * If left empty, only one path will be used, which is db_name passed when * opening the DB. * * Default: empty * * @param dbPaths the paths and target sizes * * @return the reference to the current options */ T setDbPaths(final Collection dbPaths); /** * A list of paths where SST files can be put into, with its target size. * Newer data is placed into paths specified earlier in the vector while * older data gradually moves to paths specified later in the vector. * * For example, you have a flash device with 10GB allocated for the DB, * as well as a hard drive of 2TB, you should config it to be: * [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] * * The system will try to guarantee data under each path is close to but * not larger than the target size. But current and future file sizes used * by determining where to place a file are based on best-effort estimation, * which means there is a chance that the actual size under the directory * is slightly more than target size under some workloads. User should give * some buffer room for those cases. * * If none of the paths has sufficient room to place a file, the file will * be placed to the last path anyway, despite to the target size. * * Placing newer data to earlier paths is also best-efforts. User should * expect user files to be placed in higher levels in some extreme cases. * * If left empty, only one path will be used, which is db_name passed when * opening the DB. * * Default: {@link java.util.Collections#emptyList()} * * @return dbPaths the paths and target sizes */ List dbPaths(); /** * This specifies the info LOG dir. * If it is empty, the log files will be in the same dir as data. * If it is non empty, the log files will be in the specified dir, * and the db data dir's absolute path will be used as the log file * name's prefix. * * @param dbLogDir the path to the info log directory * @return the instance of the current object. */ T setDbLogDir(String dbLogDir); /** * Returns the directory of info log. * * If it is empty, the log files will be in the same dir as data. * If it is non empty, the log files will be in the specified dir, * and the db data dir's absolute path will be used as the log file * name's prefix. * * @return the path to the info log directory */ String dbLogDir(); /** * This specifies the absolute dir path for write-ahead logs (WAL). * If it is empty, the log files will be in the same dir as data, * dbname is used as the data dir by default * If it is non empty, the log files will be in kept the specified dir. * When destroying the db, * all log files in wal_dir and the dir itself is deleted * * @param walDir the path to the write-ahead-log directory. * @return the instance of the current object. */ T setWalDir(String walDir); /** * Returns the path to the write-ahead-logs (WAL) directory. * * If it is empty, the log files will be in the same dir as data, * dbname is used as the data dir by default * If it is non empty, the log files will be in kept the specified dir. * When destroying the db, * all log files in wal_dir and the dir itself is deleted * * @return the path to the write-ahead-logs (WAL) directory. */ String walDir(); /** * The periodicity when obsolete files get deleted. The default * value is 6 hours. The files that get out of scope by compaction * process will still get automatically delete on every compaction, * regardless of this setting * * @param micros the time interval in micros * @return the instance of the current object. */ T setDeleteObsoleteFilesPeriodMicros(long micros); /** * The periodicity when obsolete files get deleted. The default * value is 6 hours. The files that get out of scope by compaction * process will still get automatically delete on every compaction, * regardless of this setting * * @return the time interval in micros when obsolete files will be deleted. */ long deleteObsoleteFilesPeriodMicros(); /** * This value represents the maximum number of threads that will * concurrently perform a compaction job by breaking it into multiple, * smaller ones that are run simultaneously. * Default: 1 (i.e. no subcompactions) * * @param maxSubcompactions The maximum number of threads that will * concurrently perform a compaction job * * @return the instance of the current object. */ T setMaxSubcompactions(int maxSubcompactions); /** * This value represents the maximum number of threads that will * concurrently perform a compaction job by breaking it into multiple, * smaller ones that are run simultaneously. * Default: 1 (i.e. no subcompactions) * * @return The maximum number of threads that will concurrently perform a * compaction job */ int maxSubcompactions(); /** * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the * value of max_background_jobs. For backwards compatibility we will set * `max_background_jobs = max_background_compactions + max_background_flushes` * in the case where user sets at least one of `max_background_compactions` or * `max_background_flushes`. * * Specifies the maximum number of concurrent background flush jobs. * If you're increasing this, also consider increasing number of threads in * HIGH priority thread pool. For more information, see * Default: -1 * * @param maxBackgroundFlushes number of max concurrent flush jobs * @return the instance of the current object. * * @see RocksEnv#setBackgroundThreads(int) * @see RocksEnv#setBackgroundThreads(int, Priority) * @see MutableDBOptionsInterface#maxBackgroundCompactions() * * @deprecated Use {@link MutableDBOptionsInterface#setMaxBackgroundJobs(int)} */ @Deprecated T setMaxBackgroundFlushes(int maxBackgroundFlushes); /** * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the * value of max_background_jobs. For backwards compatibility we will set * `max_background_jobs = max_background_compactions + max_background_flushes` * in the case where user sets at least one of `max_background_compactions` or * `max_background_flushes`. * * Returns the maximum number of concurrent background flush jobs. * If you're increasing this, also consider increasing number of threads in * HIGH priority thread pool. For more information, see * Default: -1 * * @return the maximum number of concurrent background flush jobs. * @see RocksEnv#setBackgroundThreads(int) * @see RocksEnv#setBackgroundThreads(int, Priority) */ @Deprecated int maxBackgroundFlushes(); /** * Specifies the maximum size of a info log file. If the current log file * is larger than `max_log_file_size`, a new info log file will * be created. * If 0, all logs will be written to one log file. * * @param maxLogFileSize the maximum size of a info log file. * @return the instance of the current object. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setMaxLogFileSize(long maxLogFileSize); /** * Returns the maximum size of a info log file. If the current log file * is larger than this size, a new info log file will be created. * If 0, all logs will be written to one log file. * * @return the maximum size of the info log file. */ long maxLogFileSize(); /** * Specifies the time interval for the info log file to roll (in seconds). * If specified with non-zero value, log file will be rolled * if it has been active longer than `log_file_time_to_roll`. * Default: 0 (disabled) * * @param logFileTimeToRoll the time interval in seconds. * @return the instance of the current object. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setLogFileTimeToRoll(long logFileTimeToRoll); /** * Returns the time interval for the info log file to roll (in seconds). * If specified with non-zero value, log file will be rolled * if it has been active longer than `log_file_time_to_roll`. * Default: 0 (disabled) * * @return the time interval in seconds. */ long logFileTimeToRoll(); /** * Specifies the maximum number of info log files to be kept. * Default: 1000 * * @param keepLogFileNum the maximum number of info log files to be kept. * @return the instance of the current object. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setKeepLogFileNum(long keepLogFileNum); /** * Returns the maximum number of info log files to be kept. * Default: 1000 * * @return the maximum number of info log files to be kept. */ long keepLogFileNum(); /** * Recycle log files. * * If non-zero, we will reuse previously written log files for new * logs, overwriting the old data. The value indicates how many * such files we will keep around at any point in time for later * use. * * This is more efficient because the blocks are already * allocated and fdatasync does not need to update the inode after * each write. * * Default: 0 * * @param recycleLogFileNum the number of log files to keep for recycling * * @return the reference to the current options */ T setRecycleLogFileNum(long recycleLogFileNum); /** * Recycle log files. * * If non-zero, we will reuse previously written log files for new * logs, overwriting the old data. The value indicates how many * such files we will keep around at any point in time for later * use. * * This is more efficient because the blocks are already * allocated and fdatasync does not need to update the inode after * each write. * * Default: 0 * * @return the number of log files kept for recycling */ long recycleLogFileNum(); /** * Manifest file is rolled over on reaching this limit. * The older manifest file be deleted. * The default value is 1GB so that the manifest file can grow, but not * reach the limit of storage capacity. * * @param maxManifestFileSize the size limit of a manifest file. * @return the instance of the current object. */ T setMaxManifestFileSize(long maxManifestFileSize); /** * Manifest file is rolled over on reaching this limit. * The older manifest file be deleted. * The default value is 1GB so that the manifest file can grow, but not * reach the limit of storage capacity. * * @return the size limit of a manifest file. */ long maxManifestFileSize(); /** * Number of shards used for table cache. * * @param tableCacheNumshardbits the number of chards * @return the instance of the current object. */ T setTableCacheNumshardbits(int tableCacheNumshardbits); /** * Number of shards used for table cache. * * @return the number of shards used for table cache. */ int tableCacheNumshardbits(); /** * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs * will be deleted. *
    *
  1. If both set to 0, logs will be deleted asap and will not get into * the archive.
  2. *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, * WAL files will be checked every 10 min and if total size is greater * then WAL_size_limit_MB, they will be deleted starting with the * earliest until size_limit is met. All empty files will be deleted.
  4. *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then * WAL files will be checked every WAL_ttl_secondsi / 2 and those that * are older than WAL_ttl_seconds will be deleted.
  6. *
  7. If both are not 0, WAL files will be checked every 10 min and both * checks will be performed with ttl being first.
  8. *
* * @param walTtlSeconds the ttl seconds * @return the instance of the current object. * @see #setWalSizeLimitMB(long) */ T setWalTtlSeconds(long walTtlSeconds); /** * WalTtlSeconds() and walSizeLimitMB() affect how archived logs * will be deleted. *
    *
  1. If both set to 0, logs will be deleted asap and will not get into * the archive.
  2. *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, * WAL files will be checked every 10 min and if total size is greater * then WAL_size_limit_MB, they will be deleted starting with the * earliest until size_limit is met. All empty files will be deleted.
  4. *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then * WAL files will be checked every WAL_ttl_secondsi / 2 and those that * are older than WAL_ttl_seconds will be deleted.
  6. *
  7. If both are not 0, WAL files will be checked every 10 min and both * checks will be performed with ttl being first.
  8. *
* * @return the wal-ttl seconds * @see #walSizeLimitMB() */ long walTtlSeconds(); /** * WalTtlSeconds() and walSizeLimitMB() affect how archived logs * will be deleted. *
    *
  1. If both set to 0, logs will be deleted asap and will not get into * the archive.
  2. *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, * WAL files will be checked every 10 min and if total size is greater * then WAL_size_limit_MB, they will be deleted starting with the * earliest until size_limit is met. All empty files will be deleted.
  4. *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then * WAL files will be checked every WAL_ttl_secondsi / 2 and those that * are older than WAL_ttl_seconds will be deleted.
  6. *
  7. If both are not 0, WAL files will be checked every 10 min and both * checks will be performed with ttl being first.
  8. *
* * @param sizeLimitMB size limit in mega-bytes. * @return the instance of the current object. * @see #setWalSizeLimitMB(long) */ T setWalSizeLimitMB(long sizeLimitMB); /** * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs * will be deleted. *
    *
  1. If both set to 0, logs will be deleted asap and will not get into * the archive.
  2. *
  3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, * WAL files will be checked every 10 min and if total size is greater * then WAL_size_limit_MB, they will be deleted starting with the * earliest until size_limit is met. All empty files will be deleted.
  4. *
  5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then * WAL files will be checked every WAL_ttl_seconds i / 2 and those that * are older than WAL_ttl_seconds will be deleted.
  6. *
  7. If both are not 0, WAL files will be checked every 10 min and both * checks will be performed with ttl being first.
  8. *
* @return size limit in mega-bytes. * @see #walSizeLimitMB() */ long walSizeLimitMB(); /** * Number of bytes to preallocate (via fallocate) the manifest * files. Default is 4mb, which is reasonable to reduce random IO * as well as prevent overallocation for mounts that preallocate * large amounts of data (such as xfs's allocsize option). * * @param size the size in byte * @return the instance of the current object. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ T setManifestPreallocationSize(long size); /** * Number of bytes to preallocate (via fallocate) the manifest * files. Default is 4mb, which is reasonable to reduce random IO * as well as prevent overallocation for mounts that preallocate * large amounts of data (such as xfs's allocsize option). * * @return size in bytes. */ long manifestPreallocationSize(); /** * Enable the OS to use direct I/O for reading sst tables. * Default: false * * @param useDirectReads if true, then direct read is enabled * @return the instance of the current object. */ T setUseDirectReads(boolean useDirectReads); /** * Enable the OS to use direct I/O for reading sst tables. * Default: false * * @return if true, then direct reads are enabled */ boolean useDirectReads(); /** * Enable the OS to use direct reads and writes in flush and * compaction * Default: false * * @param useDirectIoForFlushAndCompaction if true, then direct * I/O will be enabled for background flush and compactions * @return the instance of the current object. */ T setUseDirectIoForFlushAndCompaction(boolean useDirectIoForFlushAndCompaction); /** * Enable the OS to use direct reads and writes in flush and * compaction * * @return if true, then direct I/O is enabled for flush and * compaction */ boolean useDirectIoForFlushAndCompaction(); /** * Whether fallocate calls are allowed * * @param allowFAllocate false if fallocate() calls are bypassed * * @return the reference to the current options. */ T setAllowFAllocate(boolean allowFAllocate); /** * Whether fallocate calls are allowed * * @return false if fallocate() calls are bypassed */ boolean allowFAllocate(); /** * Allow the OS to mmap file for reading sst tables. * Default: false * * @param allowMmapReads true if mmap reads are allowed. * @return the instance of the current object. */ T setAllowMmapReads(boolean allowMmapReads); /** * Allow the OS to mmap file for reading sst tables. * Default: false * * @return true if mmap reads are allowed. */ boolean allowMmapReads(); /** * Allow the OS to mmap file for writing. Default: false * * @param allowMmapWrites true if mmap writes are allowd. * @return the instance of the current object. */ T setAllowMmapWrites(boolean allowMmapWrites); /** * Allow the OS to mmap file for writing. Default: false * * @return true if mmap writes are allowed. */ boolean allowMmapWrites(); /** * Disable child process inherit open files. Default: true * * @param isFdCloseOnExec true if child process inheriting open * files is disabled. * @return the instance of the current object. */ T setIsFdCloseOnExec(boolean isFdCloseOnExec); /** * Disable child process inherit open files. Default: true * * @return true if child process inheriting open files is disabled. */ boolean isFdCloseOnExec(); /** * If set true, will hint the underlying file system that the file * access pattern is random, when a sst file is opened. * Default: true * * @param adviseRandomOnOpen true if hinting random access is on. * @return the instance of the current object. */ T setAdviseRandomOnOpen(boolean adviseRandomOnOpen); /** * If set true, will hint the underlying file system that the file * access pattern is random, when a sst file is opened. * Default: true * * @return true if hinting random access is on. */ boolean adviseRandomOnOpen(); /** * Amount of data to build up in memtables across all column * families before writing to disk. * * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, * which enforces a limit for a single memtable. * * This feature is disabled by default. Specify a non-zero value * to enable it. * * Default: 0 (disabled) * * @param dbWriteBufferSize the size of the write buffer * * @return the reference to the current options. */ T setDbWriteBufferSize(long dbWriteBufferSize); /** * Use passed {@link WriteBufferManager} to control memory usage across * multiple column families and/or DB instances. * * Check * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager * for more details on when to use it * * @param writeBufferManager The WriteBufferManager to use * @return the reference of the current options. */ T setWriteBufferManager(final WriteBufferManager writeBufferManager); /** * Reference to {@link WriteBufferManager} used by it.
* * Default: null (Disabled) * * @return a reference to WriteBufferManager */ WriteBufferManager writeBufferManager(); /** * Amount of data to build up in memtables across all column * families before writing to disk. * * This is distinct from {@link ColumnFamilyOptions#writeBufferSize()}, * which enforces a limit for a single memtable. * * This feature is disabled by default. Specify a non-zero value * to enable it. * * Default: 0 (disabled) * * @return the size of the write buffer */ long dbWriteBufferSize(); /** * Specify the file access pattern once a compaction is started. * It will be applied to all input files of a compaction. * * Default: {@link AccessHint#NORMAL} * * @param accessHint The access hint * * @return the reference to the current options. */ T setAccessHintOnCompactionStart(final AccessHint accessHint); /** * Specify the file access pattern once a compaction is started. * It will be applied to all input files of a compaction. * * Default: {@link AccessHint#NORMAL} * * @return The access hint */ AccessHint accessHintOnCompactionStart(); /** * If true, always create a new file descriptor and new table reader * for compaction inputs. Turn this parameter on may introduce extra * memory usage in the table reader, if it allocates extra memory * for indexes. This will allow file descriptor prefetch options * to be set for compaction input files and not to impact file * descriptors for the same file used by user queries. * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()} * for this mode if using block-based table. * * Default: false * * @param newTableReaderForCompactionInputs true if a new file descriptor and * table reader should be created for compaction inputs * * @return the reference to the current options. */ T setNewTableReaderForCompactionInputs( boolean newTableReaderForCompactionInputs); /** * If true, always create a new file descriptor and new table reader * for compaction inputs. Turn this parameter on may introduce extra * memory usage in the table reader, if it allocates extra memory * for indexes. This will allow file descriptor prefetch options * to be set for compaction input files and not to impact file * descriptors for the same file used by user queries. * Suggest to enable {@link BlockBasedTableConfig#cacheIndexAndFilterBlocks()} * for this mode if using block-based table. * * Default: false * * @return true if a new file descriptor and table reader are created for * compaction inputs */ boolean newTableReaderForCompactionInputs(); /** * This is a maximum buffer size that is used by WinMmapReadableFile in * unbuffered disk I/O mode. We need to maintain an aligned buffer for * reads. We allow the buffer to grow until the specified value and then * for bigger requests allocate one shot buffers. In unbuffered mode we * always bypass read-ahead buffer at ReadaheadRandomAccessFile * When read-ahead is required we then make use of * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and * always try to read ahead. * With read-ahead we always pre-allocate buffer to the size instead of * growing it up to a limit. * * This option is currently honored only on Windows * * Default: 1 Mb * * Special value: 0 - means do not maintain per instance buffer. Allocate * per request buffer and avoid locking. * * @param randomAccessMaxBufferSize the maximum size of the random access * buffer * * @return the reference to the current options. */ T setRandomAccessMaxBufferSize(long randomAccessMaxBufferSize); /** * This is a maximum buffer size that is used by WinMmapReadableFile in * unbuffered disk I/O mode. We need to maintain an aligned buffer for * reads. We allow the buffer to grow until the specified value and then * for bigger requests allocate one shot buffers. In unbuffered mode we * always bypass read-ahead buffer at ReadaheadRandomAccessFile * When read-ahead is required we then make use of * {@link MutableDBOptionsInterface#compactionReadaheadSize()} value and * always try to read ahead. With read-ahead we always pre-allocate buffer * to the size instead of growing it up to a limit. * * This option is currently honored only on Windows * * Default: 1 Mb * * Special value: 0 - means do not maintain per instance buffer. Allocate * per request buffer and avoid locking. * * @return the maximum size of the random access buffer */ long randomAccessMaxBufferSize(); /** * Use adaptive mutex, which spins in the user space before resorting * to kernel. This could reduce context switch when the mutex is not * heavily contended. However, if the mutex is hot, we could end up * wasting spin time. * Default: false * * @param useAdaptiveMutex true if adaptive mutex is used. * @return the instance of the current object. */ T setUseAdaptiveMutex(boolean useAdaptiveMutex); /** * Use adaptive mutex, which spins in the user space before resorting * to kernel. This could reduce context switch when the mutex is not * heavily contended. However, if the mutex is hot, we could end up * wasting spin time. * Default: false * * @return true if adaptive mutex is used. */ boolean useAdaptiveMutex(); //TODO(AR) NOW // /** // * Sets the {@link EventListener}s whose callback functions // * will be called when specific RocksDB event happens. // * // * @param listeners the listeners who should be notified on various events. // * // * @return the instance of the current object. // */ // T setListeners(final List listeners); // // /** // * Gets the {@link EventListener}s whose callback functions // * will be called when specific RocksDB event happens. // * // * @return a collection of Event listeners. // */ // Collection listeners(); /** * If true, then the status of the threads involved in this DB will * be tracked and available via GetThreadList() API. * * Default: false * * @param enableThreadTracking true to enable tracking * * @return the reference to the current options. */ T setEnableThreadTracking(boolean enableThreadTracking); /** * If true, then the status of the threads involved in this DB will * be tracked and available via GetThreadList() API. * * Default: false * * @return true if tracking is enabled */ boolean enableThreadTracking(); /** * By default, a single write thread queue is maintained. The thread gets * to the head of the queue becomes write batch group leader and responsible * for writing to WAL and memtable for the batch group. * * If {@link #enablePipelinedWrite()} is true, separate write thread queue is * maintained for WAL write and memtable write. A write thread first enter WAL * writer queue and then memtable writer queue. Pending thread on the WAL * writer queue thus only have to wait for previous writers to finish their * WAL writing but not the memtable writing. Enabling the feature may improve * write throughput and reduce latency of the prepare phase of two-phase * commit. * * Default: false * * @param enablePipelinedWrite true to enabled pipelined writes * * @return the reference to the current options. */ T setEnablePipelinedWrite(final boolean enablePipelinedWrite); /** * Returns true if pipelined writes are enabled. * See {@link #setEnablePipelinedWrite(boolean)}. * * @return true if pipelined writes are enabled, false otherwise. */ boolean enablePipelinedWrite(); /** * Setting {@link #unorderedWrite()} to true trades higher write throughput with * relaxing the immutability guarantee of snapshots. This violates the * repeatability one expects from ::Get from a snapshot, as well as * ::MultiGet and Iterator's consistent-point-in-time view property. * If the application cannot tolerate the relaxed guarantees, it can implement * its own mechanisms to work around that and yet benefit from the higher * throughput. Using TransactionDB with WRITE_PREPARED write policy and * {@link #twoWriteQueues()} true is one way to achieve immutable snapshots despite * unordered_write. * * By default, i.e., when it is false, rocksdb does not advance the sequence * number for new snapshots unless all the writes with lower sequence numbers * are already finished. This provides the immutability that we except from * snapshots. Moreover, since Iterator and MultiGet internally depend on * snapshots, the snapshot immutability results into Iterator and MultiGet * offering consistent-point-in-time view. If set to true, although * Read-Your-Own-Write property is still provided, the snapshot immutability * property is relaxed: the writes issued after the snapshot is obtained (with * larger sequence numbers) will be still not visible to the reads from that * snapshot, however, there still might be pending writes (with lower sequence * number) that will change the state visible to the snapshot after they are * landed to the memtable. * * @param unorderedWrite true to enabled unordered write * * @return the reference to the current options. */ T setUnorderedWrite(final boolean unorderedWrite); /** * Returns true if unordered write are enabled. * See {@link #setUnorderedWrite(boolean)}. * * @return true if unordered write are enabled, false otherwise. */ boolean unorderedWrite(); /** * If true, allow multi-writers to update mem tables in parallel. * Only some memtable factorys support concurrent writes; currently it * is implemented only for SkipListFactory. Concurrent memtable writes * are not compatible with inplace_update_support or filter_deletes. * It is strongly recommended to set * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use * this feature. * Default: true * * @param allowConcurrentMemtableWrite true to enable concurrent writes * for the memtable * * @return the reference to the current options. */ T setAllowConcurrentMemtableWrite(boolean allowConcurrentMemtableWrite); /** * If true, allow multi-writers to update mem tables in parallel. * Only some memtable factorys support concurrent writes; currently it * is implemented only for SkipListFactory. Concurrent memtable writes * are not compatible with inplace_update_support or filter_deletes. * It is strongly recommended to set * {@link #setEnableWriteThreadAdaptiveYield(boolean)} if you are going to use * this feature. * Default: true * * @return true if concurrent writes are enabled for the memtable */ boolean allowConcurrentMemtableWrite(); /** * If true, threads synchronizing with the write batch group leader will * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a * mutex. This can substantially improve throughput for concurrent workloads, * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. * Default: true * * @param enableWriteThreadAdaptiveYield true to enable adaptive yield for the * write threads * * @return the reference to the current options. */ T setEnableWriteThreadAdaptiveYield( boolean enableWriteThreadAdaptiveYield); /** * If true, threads synchronizing with the write batch group leader will * wait for up to {@link #writeThreadMaxYieldUsec()} before blocking on a * mutex. This can substantially improve throughput for concurrent workloads, * regardless of whether {@link #allowConcurrentMemtableWrite()} is enabled. * Default: true * * @return true if adaptive yield is enabled * for the writing threads */ boolean enableWriteThreadAdaptiveYield(); /** * The maximum number of microseconds that a write operation will use * a yielding spin loop to coordinate with other write threads before * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is * set properly) increasing this value is likely to increase RocksDB * throughput at the expense of increased CPU usage. * Default: 100 * * @param writeThreadMaxYieldUsec maximum number of microseconds * * @return the reference to the current options. */ T setWriteThreadMaxYieldUsec(long writeThreadMaxYieldUsec); /** * The maximum number of microseconds that a write operation will use * a yielding spin loop to coordinate with other write threads before * blocking on a mutex. (Assuming {@link #writeThreadSlowYieldUsec()} is * set properly) increasing this value is likely to increase RocksDB * throughput at the expense of increased CPU usage. * Default: 100 * * @return the maximum number of microseconds */ long writeThreadMaxYieldUsec(); /** * The latency in microseconds after which a std::this_thread::yield * call (sched_yield on Linux) is considered to be a signal that * other processes or threads would like to use the current core. * Increasing this makes writer threads more likely to take CPU * by spinning, which will show up as an increase in the number of * involuntary context switches. * Default: 3 * * @param writeThreadSlowYieldUsec the latency in microseconds * * @return the reference to the current options. */ T setWriteThreadSlowYieldUsec(long writeThreadSlowYieldUsec); /** * The latency in microseconds after which a std::this_thread::yield * call (sched_yield on Linux) is considered to be a signal that * other processes or threads would like to use the current core. * Increasing this makes writer threads more likely to take CPU * by spinning, which will show up as an increase in the number of * involuntary context switches. * Default: 3 * * @return writeThreadSlowYieldUsec the latency in microseconds */ long writeThreadSlowYieldUsec(); /** * If true, then DB::Open() will not update the statistics used to optimize * compaction decision by loading table properties from many files. * Turning off this feature will improve DBOpen time especially in * disk environment. * * Default: false * * @param skipStatsUpdateOnDbOpen true if updating stats will be skipped * * @return the reference to the current options. */ T setSkipStatsUpdateOnDbOpen(boolean skipStatsUpdateOnDbOpen); /** * If true, then DB::Open() will not update the statistics used to optimize * compaction decision by loading table properties from many files. * Turning off this feature will improve DBOpen time especially in * disk environment. * * Default: false * * @return true if updating stats will be skipped */ boolean skipStatsUpdateOnDbOpen(); /** * Recovery mode to control the consistency while replaying WAL * * Default: {@link WALRecoveryMode#PointInTimeRecovery} * * @param walRecoveryMode The WAL recover mode * * @return the reference to the current options. */ T setWalRecoveryMode(WALRecoveryMode walRecoveryMode); /** * Recovery mode to control the consistency while replaying WAL * * Default: {@link WALRecoveryMode#PointInTimeRecovery} * * @return The WAL recover mode */ WALRecoveryMode walRecoveryMode(); /** * if set to false then recovery will fail when a prepared * transaction is encountered in the WAL * * Default: false * * @param allow2pc true if two-phase-commit is enabled * * @return the reference to the current options. */ T setAllow2pc(boolean allow2pc); /** * if set to false then recovery will fail when a prepared * transaction is encountered in the WAL * * Default: false * * @return true if two-phase-commit is enabled */ boolean allow2pc(); /** * A global cache for table-level rows. * * Default: null (disabled) * * @param rowCache The global row cache * * @return the reference to the current options. */ T setRowCache(final Cache rowCache); /** * A global cache for table-level rows. * * Default: null (disabled) * * @return The global row cache */ Cache rowCache(); /** * A filter object supplied to be invoked while processing write-ahead-logs * (WALs) during recovery. The filter provides a way to inspect log * records, ignoring a particular record or skipping replay. * The filter is invoked at startup and is invoked from a single-thread * currently. * * @param walFilter the filter for processing WALs during recovery. * * @return the reference to the current options. */ T setWalFilter(final AbstractWalFilter walFilter); /** * Get's the filter for processing WALs during recovery. * See {@link #setWalFilter(AbstractWalFilter)}. * * @return the filter used for processing WALs during recovery. */ WalFilter walFilter(); /** * If true, then DB::Open / CreateColumnFamily / DropColumnFamily * / SetOptions will fail if options file is not detected or properly * persisted. * * DEFAULT: false * * @param failIfOptionsFileError true if we should fail if there is an error * in the options file * * @return the reference to the current options. */ T setFailIfOptionsFileError(boolean failIfOptionsFileError); /** * If true, then DB::Open / CreateColumnFamily / DropColumnFamily * / SetOptions will fail if options file is not detected or properly * persisted. * * DEFAULT: false * * @return true if we should fail if there is an error in the options file */ boolean failIfOptionsFileError(); /** * If true, then print malloc stats together with rocksdb.stats * when printing to LOG. * * DEFAULT: false * * @param dumpMallocStats true if malloc stats should be printed to LOG * * @return the reference to the current options. */ T setDumpMallocStats(boolean dumpMallocStats); /** * If true, then print malloc stats together with rocksdb.stats * when printing to LOG. * * DEFAULT: false * * @return true if malloc stats should be printed to LOG */ boolean dumpMallocStats(); /** * By default RocksDB replay WAL logs and flush them on DB open, which may * create very small SST files. If this option is enabled, RocksDB will try * to avoid (but not guarantee not to) flush during recovery. Also, existing * WAL logs will be kept, so that if crash happened before flush, we still * have logs to recover from. * * DEFAULT: false * * @param avoidFlushDuringRecovery true to try to avoid (but not guarantee * not to) flush during recovery * * @return the reference to the current options. */ T setAvoidFlushDuringRecovery(boolean avoidFlushDuringRecovery); /** * By default RocksDB replay WAL logs and flush them on DB open, which may * create very small SST files. If this option is enabled, RocksDB will try * to avoid (but not guarantee not to) flush during recovery. Also, existing * WAL logs will be kept, so that if crash happened before flush, we still * have logs to recover from. * * DEFAULT: false * * @return true to try to avoid (but not guarantee not to) flush during * recovery */ boolean avoidFlushDuringRecovery(); /** * Set this option to true during creation of database if you want * to be able to ingest behind (call IngestExternalFile() skipping keys * that already exist, rather than overwriting matching keys). * Setting this option to true will affect 2 things: * 1) Disable some internal optimizations around SST file compression * 2) Reserve bottom-most level for ingested files only. * 3) Note that num_levels should be >= 3 if this option is turned on. * * DEFAULT: false * * @param allowIngestBehind true to allow ingest behind, false to disallow. * * @return the reference to the current options. */ T setAllowIngestBehind(final boolean allowIngestBehind); /** * Returns true if ingest behind is allowed. * See {@link #setAllowIngestBehind(boolean)}. * * @return true if ingest behind is allowed, false otherwise. */ boolean allowIngestBehind(); /** * Needed to support differential snapshots. * If set to true then DB will only process deletes with sequence number * less than what was set by SetPreserveDeletesSequenceNumber(uint64_t ts). * Clients are responsible to periodically call this method to advance * the cutoff time. If this method is never called and preserve_deletes * is set to true NO deletes will ever be processed. * At the moment this only keeps normal deletes, SingleDeletes will * not be preserved. * * DEFAULT: false * * @param preserveDeletes true to preserve deletes. * * @return the reference to the current options. */ T setPreserveDeletes(final boolean preserveDeletes); /** * Returns true if deletes are preserved. * See {@link #setPreserveDeletes(boolean)}. * * @return true if deletes are preserved, false otherwise. */ boolean preserveDeletes(); /** * If enabled it uses two queues for writes, one for the ones with * disable_memtable and one for the ones that also write to memtable. This * allows the memtable writes not to lag behind other writes. It can be used * to optimize MySQL 2PC in which only the commits, which are serial, write to * memtable. * * DEFAULT: false * * @param twoWriteQueues true to enable two write queues, false otherwise. * * @return the reference to the current options. */ T setTwoWriteQueues(final boolean twoWriteQueues); /** * Returns true if two write queues are enabled. * * @return true if two write queues are enabled, false otherwise. */ boolean twoWriteQueues(); /** * If true WAL is not flushed automatically after each write. Instead it * relies on manual invocation of FlushWAL to write the WAL buffer to its * file. * * DEFAULT: false * * @param manualWalFlush true to set disable automatic WAL flushing, * false otherwise. * * @return the reference to the current options. */ T setManualWalFlush(final boolean manualWalFlush); /** * Returns true if automatic WAL flushing is disabled. * See {@link #setManualWalFlush(boolean)}. * * @return true if automatic WAL flushing is disabled, false otherwise. */ boolean manualWalFlush(); /** * If true, RocksDB supports flushing multiple column families and committing * their results atomically to MANIFEST. Note that it is not * necessary to set atomic_flush to true if WAL is always enabled since WAL * allows the database to be restored to the last persistent state in WAL. * This option is useful when there are column families with writes NOT * protected by WAL. * For manual flush, application has to specify which column families to * flush atomically in {@link RocksDB#flush(FlushOptions, List)}. * For auto-triggered flush, RocksDB atomically flushes ALL column families. * * Currently, any WAL-enabled writes after atomic flush may be replayed * independently if the process crashes later and tries to recover. * * @param atomicFlush true to enable atomic flush of multiple column families. * * @return the reference to the current options. */ T setAtomicFlush(final boolean atomicFlush); /** * Determine if atomic flush of multiple column families is enabled. * * See {@link #setAtomicFlush(boolean)}. * * @return true if atomic flush is enabled. */ boolean atomicFlush(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/DataBlockIndexType.java000066400000000000000000000012611370372246700253610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * DataBlockIndexType used in conjunction with BlockBasedTable. */ public enum DataBlockIndexType { /** * traditional block type */ kDataBlockBinarySearch((byte)0x0), /** * additional hash index */ kDataBlockBinaryAndHash((byte)0x1); private final byte value; DataBlockIndexType(final byte value) { this.value = value; } byte getValue() { return value; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/DbPath.java000066400000000000000000000021141370372246700230430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.file.Path; /** * Tuple of database path and target size */ public class DbPath { final Path path; final long targetSize; public DbPath(final Path path, final long targetSize) { this.path = path; this.targetSize = targetSize; } @Override public boolean equals(final Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } final DbPath dbPath = (DbPath) o; if (targetSize != dbPath.targetSize) { return false; } return path != null ? path.equals(dbPath.path) : dbPath.path == null; } @Override public int hashCode() { int result = path != null ? path.hashCode() : 0; result = 31 * result + (int) (targetSize ^ (targetSize >>> 32)); return result; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/DirectSlice.java000066400000000000000000000076461370372246700241120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * Base class for slices which will receive direct * ByteBuffer based access to the underlying data. * * ByteBuffer backed slices typically perform better with * larger keys and values. When using smaller keys and * values consider using @see org.rocksdb.Slice */ public class DirectSlice extends AbstractSlice { public final static DirectSlice NONE = new DirectSlice(); /** * Indicates whether we have to free the memory pointed to by the Slice */ private final boolean internalBuffer; private volatile boolean cleared = false; private volatile long internalBufferOffset = 0; /** * Called from JNI to construct a new Java DirectSlice * without an underlying C++ object set * at creation time. * * Note: You should be aware that it is intentionally marked as * package-private. This is so that developers cannot construct their own * default DirectSlice objects (at present). As developers cannot construct * their own DirectSlice objects through this, they are not creating * underlying C++ DirectSlice objects, and so there is nothing to free * (dispose) from Java. */ DirectSlice() { super(); this.internalBuffer = false; } /** * Constructs a slice * where the data is taken from * a String. * * @param str The string */ public DirectSlice(final String str) { super(createNewSliceFromString(str)); this.internalBuffer = true; } /** * Constructs a slice where the data is * read from the provided * ByteBuffer up to a certain length * * @param data The buffer containing the data * @param length The length of the data to use for the slice */ public DirectSlice(final ByteBuffer data, final int length) { super(createNewDirectSlice0(ensureDirect(data), length)); this.internalBuffer = false; } /** * Constructs a slice where the data is * read from the provided * ByteBuffer * * @param data The bugger containing the data */ public DirectSlice(final ByteBuffer data) { super(createNewDirectSlice1(ensureDirect(data))); this.internalBuffer = false; } private static ByteBuffer ensureDirect(final ByteBuffer data) { if(!data.isDirect()) { throw new IllegalArgumentException("The ByteBuffer must be direct"); } return data; } /** * Retrieves the byte at a specific offset * from the underlying data * * @param offset The (zero-based) offset of the byte to retrieve * * @return the requested byte */ public byte get(final int offset) { return get0(getNativeHandle(), offset); } @Override public void clear() { clear0(getNativeHandle(), !cleared && internalBuffer, internalBufferOffset); cleared = true; } @Override public void removePrefix(final int n) { removePrefix0(getNativeHandle(), n); this.internalBufferOffset += n; } @Override protected void disposeInternal() { final long nativeHandle = getNativeHandle(); if(!cleared && internalBuffer) { disposeInternalBuf(nativeHandle, internalBufferOffset); } disposeInternal(nativeHandle); } private native static long createNewDirectSlice0(final ByteBuffer data, final int length); private native static long createNewDirectSlice1(final ByteBuffer data); @Override protected final native ByteBuffer data0(long handle); private native byte get0(long handle, int offset); private native void clear0(long handle, boolean internalBuffer, long internalBufferOffset); private native void removePrefix0(long handle, int length); private native void disposeInternalBuf(final long handle, long internalBufferOffset); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/EncodingType.java000066400000000000000000000034501370372246700242750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * EncodingType * *

The value will determine how to encode keys * when writing to a new SST file.

* *

This value will be stored * inside the SST file which will be used when reading from * the file, which makes it possible for users to choose * different encoding type when reopening a DB. Files with * different encoding types can co-exist in the same DB and * can be read.

*/ public enum EncodingType { /** * Always write full keys without any special encoding. */ kPlain((byte) 0), /** *

Find opportunity to write the same prefix once for multiple rows. * In some cases, when a key follows a previous key with the same prefix, * instead of writing out the full key, it just writes out the size of the * shared prefix, as well as other bytes, to save some bytes.

* *

When using this option, the user is required to use the same prefix * extractor to make sure the same prefix will be extracted from the same key. * The Name() value of the prefix extractor will be stored in the file. When * reopening the file, the name of the options.prefix_extractor given will be * bitwise compared to the prefix extractors stored in the file. An error * will be returned if the two don't match.

*/ kPrefix((byte) 1); /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value_; } private EncodingType(byte value) { value_ = value; } private final byte value_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Env.java000066400000000000000000000121761370372246700224420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; import java.util.List; /** * Base class for all Env implementations in RocksDB. */ public abstract class Env extends RocksObject { static { RocksDB.loadLibrary(); } private static final Env DEFAULT_ENV = new RocksEnv(getDefaultEnvInternal()); static { /** * The Ownership of the Default Env belongs to C++ * and so we disown the native handle here so that * we cannot accidentally free it from Java. */ DEFAULT_ENV.disOwnNativeHandle(); } /** *

Returns the default environment suitable for the current operating * system.

* *

The result of {@code getDefault()} is a singleton whose ownership * belongs to rocksdb c++. As a result, the returned RocksEnv will not * have the ownership of its c++ resource, and calling its dispose()/close() * will be no-op.

* * @return the default {@link org.rocksdb.RocksEnv} instance. */ public static Env getDefault() { return DEFAULT_ENV; } /** *

Sets the number of background worker threads of the flush pool * for this environment.

*

Default number: 1

* * @param number the number of threads * * @return current {@link RocksEnv} instance. */ public Env setBackgroundThreads(final int number) { return setBackgroundThreads(number, Priority.LOW); } /** *

Gets the number of background worker threads of the pool * for this environment.

* * @param priority the priority id of a specified thread pool. * * @return the number of threads. */ public int getBackgroundThreads(final Priority priority) { return getBackgroundThreads(nativeHandle_, priority.getValue()); } /** *

Sets the number of background worker threads of the specified thread * pool for this environment.

* * @param number the number of threads * @param priority the priority id of a specified thread pool. * *

Default number: 1

* @return current {@link RocksEnv} instance. */ public Env setBackgroundThreads(final int number, final Priority priority) { setBackgroundThreads(nativeHandle_, number, priority.getValue()); return this; } /** *

Returns the length of the queue associated with the specified * thread pool.

* * @param priority the priority id of a specified thread pool. * * @return the thread pool queue length. */ public int getThreadPoolQueueLen(final Priority priority) { return getThreadPoolQueueLen(nativeHandle_, priority.getValue()); } /** * Enlarge number of background worker threads of a specific thread pool * for this environment if it is smaller than specified. 'LOW' is the default * pool. * * @param number the number of threads. * @param priority the priority id of a specified thread pool. * * @return current {@link RocksEnv} instance. */ public Env incBackgroundThreadsIfNeeded(final int number, final Priority priority) { incBackgroundThreadsIfNeeded(nativeHandle_, number, priority.getValue()); return this; } /** * Lower IO priority for threads from the specified pool. * * @param priority the priority id of a specified thread pool. * * @return current {@link RocksEnv} instance. */ public Env lowerThreadPoolIOPriority(final Priority priority) { lowerThreadPoolIOPriority(nativeHandle_, priority.getValue()); return this; } /** * Lower CPU priority for threads from the specified pool. * * @param priority the priority id of a specified thread pool. * * @return current {@link RocksEnv} instance. */ public Env lowerThreadPoolCPUPriority(final Priority priority) { lowerThreadPoolCPUPriority(nativeHandle_, priority.getValue()); return this; } /** * Returns the status of all threads that belong to the current Env. * * @return the status of all threads belong to this env. * * @throws RocksDBException if the thread list cannot be acquired. */ public List getThreadList() throws RocksDBException { return Arrays.asList(getThreadList(nativeHandle_)); } Env(final long nativeHandle) { super(nativeHandle); } private static native long getDefaultEnvInternal(); private native void setBackgroundThreads( final long handle, final int number, final byte priority); private native int getBackgroundThreads(final long handle, final byte priority); private native int getThreadPoolQueueLen(final long handle, final byte priority); private native void incBackgroundThreadsIfNeeded(final long handle, final int number, final byte priority); private native void lowerThreadPoolIOPriority(final long handle, final byte priority); private native void lowerThreadPoolCPUPriority(final long handle, final byte priority); private native ThreadStatus[] getThreadList(final long handle) throws RocksDBException; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/EnvOptions.java000066400000000000000000000250211370372246700240070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Options while opening a file to read/write */ public class EnvOptions extends RocksObject { static { RocksDB.loadLibrary(); } /** * Construct with default Options */ public EnvOptions() { super(newEnvOptions()); } /** * Construct from {@link DBOptions}. * * @param dbOptions the database options. */ public EnvOptions(final DBOptions dbOptions) { super(newEnvOptions(dbOptions.nativeHandle_)); } /** * Enable/Disable memory mapped reads. * * Default: false * * @param useMmapReads true to enable memory mapped reads, false to disable. * * @return the reference to these options. */ public EnvOptions setUseMmapReads(final boolean useMmapReads) { setUseMmapReads(nativeHandle_, useMmapReads); return this; } /** * Determine if memory mapped reads are in-use. * * @return true if memory mapped reads are in-use, false otherwise. */ public boolean useMmapReads() { assert(isOwningHandle()); return useMmapReads(nativeHandle_); } /** * Enable/Disable memory mapped Writes. * * Default: true * * @param useMmapWrites true to enable memory mapped writes, false to disable. * * @return the reference to these options. */ public EnvOptions setUseMmapWrites(final boolean useMmapWrites) { setUseMmapWrites(nativeHandle_, useMmapWrites); return this; } /** * Determine if memory mapped writes are in-use. * * @return true if memory mapped writes are in-use, false otherwise. */ public boolean useMmapWrites() { assert(isOwningHandle()); return useMmapWrites(nativeHandle_); } /** * Enable/Disable direct reads, i.e. {@code O_DIRECT}. * * Default: false * * @param useDirectReads true to enable direct reads, false to disable. * * @return the reference to these options. */ public EnvOptions setUseDirectReads(final boolean useDirectReads) { setUseDirectReads(nativeHandle_, useDirectReads); return this; } /** * Determine if direct reads are in-use. * * @return true if direct reads are in-use, false otherwise. */ public boolean useDirectReads() { assert(isOwningHandle()); return useDirectReads(nativeHandle_); } /** * Enable/Disable direct writes, i.e. {@code O_DIRECT}. * * Default: false * * @param useDirectWrites true to enable direct writes, false to disable. * * @return the reference to these options. */ public EnvOptions setUseDirectWrites(final boolean useDirectWrites) { setUseDirectWrites(nativeHandle_, useDirectWrites); return this; } /** * Determine if direct writes are in-use. * * @return true if direct writes are in-use, false otherwise. */ public boolean useDirectWrites() { assert(isOwningHandle()); return useDirectWrites(nativeHandle_); } /** * Enable/Disable fallocate calls. * * Default: true * * If false, {@code fallocate()} calls are bypassed. * * @param allowFallocate true to enable fallocate calls, false to disable. * * @return the reference to these options. */ public EnvOptions setAllowFallocate(final boolean allowFallocate) { setAllowFallocate(nativeHandle_, allowFallocate); return this; } /** * Determine if fallocate calls are used. * * @return true if fallocate calls are used, false otherwise. */ public boolean allowFallocate() { assert(isOwningHandle()); return allowFallocate(nativeHandle_); } /** * Enable/Disable the {@code FD_CLOEXEC} bit when opening file descriptors. * * Default: true * * @param setFdCloexec true to enable the {@code FB_CLOEXEC} bit, * false to disable. * * @return the reference to these options. */ public EnvOptions setSetFdCloexec(final boolean setFdCloexec) { setSetFdCloexec(nativeHandle_, setFdCloexec); return this; } /** * Determine i fthe {@code FD_CLOEXEC} bit is set when opening file * descriptors. * * @return true if the {@code FB_CLOEXEC} bit is enabled, false otherwise. */ public boolean setFdCloexec() { assert(isOwningHandle()); return setFdCloexec(nativeHandle_); } /** * Allows OS to incrementally sync files to disk while they are being * written, in the background. Issue one request for every * {@code bytesPerSync} written. * * Default: 0 * * @param bytesPerSync 0 to disable, otherwise the number of bytes. * * @return the reference to these options. */ public EnvOptions setBytesPerSync(final long bytesPerSync) { setBytesPerSync(nativeHandle_, bytesPerSync); return this; } /** * Get the number of incremental bytes per sync written in the background. * * @return 0 if disabled, otherwise the number of bytes. */ public long bytesPerSync() { assert(isOwningHandle()); return bytesPerSync(nativeHandle_); } /** * If true, we will preallocate the file with {@code FALLOC_FL_KEEP_SIZE} * flag, which means that file size won't change as part of preallocation. * If false, preallocation will also change the file size. This option will * improve the performance in workloads where you sync the data on every * write. By default, we set it to true for MANIFEST writes and false for * WAL writes * * @param fallocateWithKeepSize true to preallocate, false otherwise. * * @return the reference to these options. */ public EnvOptions setFallocateWithKeepSize( final boolean fallocateWithKeepSize) { setFallocateWithKeepSize(nativeHandle_, fallocateWithKeepSize); return this; } /** * Determine if file is preallocated. * * @return true if the file is preallocated, false otherwise. */ public boolean fallocateWithKeepSize() { assert(isOwningHandle()); return fallocateWithKeepSize(nativeHandle_); } /** * See {@link DBOptions#setCompactionReadaheadSize(long)}. * * @param compactionReadaheadSize the compaction read-ahead size. * * @return the reference to these options. */ public EnvOptions setCompactionReadaheadSize( final long compactionReadaheadSize) { setCompactionReadaheadSize(nativeHandle_, compactionReadaheadSize); return this; } /** * See {@link DBOptions#compactionReadaheadSize()}. * * @return the compaction read-ahead size. */ public long compactionReadaheadSize() { assert(isOwningHandle()); return compactionReadaheadSize(nativeHandle_); } /** * See {@link DBOptions#setRandomAccessMaxBufferSize(long)}. * * @param randomAccessMaxBufferSize the max buffer size for random access. * * @return the reference to these options. */ public EnvOptions setRandomAccessMaxBufferSize( final long randomAccessMaxBufferSize) { setRandomAccessMaxBufferSize(nativeHandle_, randomAccessMaxBufferSize); return this; } /** * See {@link DBOptions#randomAccessMaxBufferSize()}. * * @return the max buffer size for random access. */ public long randomAccessMaxBufferSize() { assert(isOwningHandle()); return randomAccessMaxBufferSize(nativeHandle_); } /** * See {@link DBOptions#setWritableFileMaxBufferSize(long)}. * * @param writableFileMaxBufferSize the max buffer size. * * @return the reference to these options. */ public EnvOptions setWritableFileMaxBufferSize( final long writableFileMaxBufferSize) { setWritableFileMaxBufferSize(nativeHandle_, writableFileMaxBufferSize); return this; } /** * See {@link DBOptions#writableFileMaxBufferSize()}. * * @return the max buffer size. */ public long writableFileMaxBufferSize() { assert(isOwningHandle()); return writableFileMaxBufferSize(nativeHandle_); } /** * Set the write rate limiter for flush and compaction. * * @param rateLimiter the rate limiter. * * @return the reference to these options. */ public EnvOptions setRateLimiter(final RateLimiter rateLimiter) { this.rateLimiter = rateLimiter; setRateLimiter(nativeHandle_, rateLimiter.nativeHandle_); return this; } /** * Get the write rate limiter for flush and compaction. * * @return the rate limiter. */ public RateLimiter rateLimiter() { assert(isOwningHandle()); return rateLimiter; } private native static long newEnvOptions(); private native static long newEnvOptions(final long dboptions_handle); @Override protected final native void disposeInternal(final long handle); private native void setUseMmapReads(final long handle, final boolean useMmapReads); private native boolean useMmapReads(final long handle); private native void setUseMmapWrites(final long handle, final boolean useMmapWrites); private native boolean useMmapWrites(final long handle); private native void setUseDirectReads(final long handle, final boolean useDirectReads); private native boolean useDirectReads(final long handle); private native void setUseDirectWrites(final long handle, final boolean useDirectWrites); private native boolean useDirectWrites(final long handle); private native void setAllowFallocate(final long handle, final boolean allowFallocate); private native boolean allowFallocate(final long handle); private native void setSetFdCloexec(final long handle, final boolean setFdCloexec); private native boolean setFdCloexec(final long handle); private native void setBytesPerSync(final long handle, final long bytesPerSync); private native long bytesPerSync(final long handle); private native void setFallocateWithKeepSize( final long handle, final boolean fallocateWithKeepSize); private native boolean fallocateWithKeepSize(final long handle); private native void setCompactionReadaheadSize( final long handle, final long compactionReadaheadSize); private native long compactionReadaheadSize(final long handle); private native void setRandomAccessMaxBufferSize( final long handle, final long randomAccessMaxBufferSize); private native long randomAccessMaxBufferSize(final long handle); private native void setWritableFileMaxBufferSize( final long handle, final long writableFileMaxBufferSize); private native long writableFileMaxBufferSize(final long handle); private native void setRateLimiter(final long handle, final long rateLimiterHandle); private RateLimiter rateLimiter; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Experimental.java000066400000000000000000000013741370372246700243450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.lang.annotation.ElementType; import java.lang.annotation.Documented; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; /** * Marks a feature as experimental, meaning that it is likely * to change or even be removed/re-engineered in the future */ @Documented @Retention(RetentionPolicy.SOURCE) @Target({ElementType.TYPE, ElementType.METHOD}) public @interface Experimental { String value(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Filter.java000066400000000000000000000021511370372246700231270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Filters are stored in rocksdb and are consulted automatically * by rocksdb to decide whether or not to read some * information from disk. In many cases, a filter can cut down the * number of disk seeks form a handful to a single disk seek per * DB::Get() call. */ //TODO(AR) should be renamed FilterPolicy public abstract class Filter extends RocksObject { protected Filter(final long nativeHandle) { super(nativeHandle); } /** * Deletes underlying C++ filter pointer. * * Note that this function should be called only after all * RocksDB instances referencing the filter are closed. * Otherwise an undefined behavior will occur. */ @Override protected void disposeInternal() { disposeInternal(nativeHandle_); } @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/FlushOptions.java000066400000000000000000000052431370372246700243440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * FlushOptions to be passed to flush operations of * {@link org.rocksdb.RocksDB}. */ public class FlushOptions extends RocksObject { static { RocksDB.loadLibrary(); } /** * Construct a new instance of FlushOptions. */ public FlushOptions(){ super(newFlushOptions()); } /** * Set if the flush operation shall block until it terminates. * * @param waitForFlush boolean value indicating if the flush * operations waits for termination of the flush process. * * @return instance of current FlushOptions. */ public FlushOptions setWaitForFlush(final boolean waitForFlush) { assert(isOwningHandle()); setWaitForFlush(nativeHandle_, waitForFlush); return this; } /** * Wait for flush to finished. * * @return boolean value indicating if the flush operation * waits for termination of the flush process. */ public boolean waitForFlush() { assert(isOwningHandle()); return waitForFlush(nativeHandle_); } /** * Set to true so that flush would proceeds immediately even it it means * writes will stall for the duration of the flush. * * Set to false so that the operation will wait until it's possible to do * the flush without causing stall or until required flush is performed by * someone else (foreground call or background thread). * * Default: false * * @param allowWriteStall true to allow writes to stall for flush, false * otherwise. * * @return instance of current FlushOptions. */ public FlushOptions setAllowWriteStall(final boolean allowWriteStall) { assert(isOwningHandle()); setAllowWriteStall(nativeHandle_, allowWriteStall); return this; } /** * Returns true if writes are allowed to stall for flushes to complete, false * otherwise. * * @return true if writes are allowed to stall for flushes */ public boolean allowWriteStall() { assert(isOwningHandle()); return allowWriteStall(nativeHandle_); } private native static long newFlushOptions(); @Override protected final native void disposeInternal(final long handle); private native void setWaitForFlush(final long handle, final boolean wait); private native boolean waitForFlush(final long handle); private native void setAllowWriteStall(final long handle, final boolean allowWriteStall); private native boolean allowWriteStall(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java000066400000000000000000000127321370372246700273130ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * The config for hash linked list memtable representation * Such memtable contains a fix-sized array of buckets, where * each bucket points to a sorted singly-linked * list (or null if the bucket is empty). * * Note that since this mem-table representation relies on the * key prefix, it is required to invoke one of the usePrefixExtractor * functions to specify how to extract key prefix given a key. * If proper prefix-extractor is not set, then RocksDB will * use the default memtable representation (SkipList) instead * and post a warning in the LOG. */ public class HashLinkedListMemTableConfig extends MemTableConfig { public static final long DEFAULT_BUCKET_COUNT = 50000; public static final long DEFAULT_HUGE_PAGE_TLB_SIZE = 0; public static final int DEFAULT_BUCKET_ENTRIES_LOG_THRES = 4096; public static final boolean DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH = true; public static final int DEFAUL_THRESHOLD_USE_SKIPLIST = 256; /** * HashLinkedListMemTableConfig constructor */ public HashLinkedListMemTableConfig() { bucketCount_ = DEFAULT_BUCKET_COUNT; hugePageTlbSize_ = DEFAULT_HUGE_PAGE_TLB_SIZE; bucketEntriesLoggingThreshold_ = DEFAULT_BUCKET_ENTRIES_LOG_THRES; ifLogBucketDistWhenFlush_ = DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH; thresholdUseSkiplist_ = DEFAUL_THRESHOLD_USE_SKIPLIST; } /** * Set the number of buckets in the fixed-size array used * in the hash linked-list mem-table. * * @param count the number of hash buckets. * @return the reference to the current HashLinkedListMemTableConfig. */ public HashLinkedListMemTableConfig setBucketCount( final long count) { bucketCount_ = count; return this; } /** * Returns the number of buckets that will be used in the memtable * created based on this config. * * @return the number of buckets */ public long bucketCount() { return bucketCount_; } /** *

Set the size of huge tlb or allocate the hashtable bytes from * malloc if {@code size <= 0}.

* *

The user needs to reserve huge pages for it to be allocated, * like: {@code sysctl -w vm.nr_hugepages=20}

* *

See linux documentation/vm/hugetlbpage.txt

* * @param size if set to {@code <= 0} hashtable bytes from malloc * @return the reference to the current HashLinkedListMemTableConfig. */ public HashLinkedListMemTableConfig setHugePageTlbSize( final long size) { hugePageTlbSize_ = size; return this; } /** * Returns the size value of hugePageTlbSize. * * @return the hugePageTlbSize. */ public long hugePageTlbSize() { return hugePageTlbSize_; } /** * If number of entries in one bucket exceeds that setting, log * about it. * * @param threshold - number of entries in a single bucket before * logging starts. * @return the reference to the current HashLinkedListMemTableConfig. */ public HashLinkedListMemTableConfig setBucketEntriesLoggingThreshold(final int threshold) { bucketEntriesLoggingThreshold_ = threshold; return this; } /** * Returns the maximum number of entries in one bucket before * logging starts. * * @return maximum number of entries in one bucket before logging * starts. */ public int bucketEntriesLoggingThreshold() { return bucketEntriesLoggingThreshold_; } /** * If true the distrubition of number of entries will be logged. * * @param logDistribution - boolean parameter indicating if number * of entry distribution shall be logged. * @return the reference to the current HashLinkedListMemTableConfig. */ public HashLinkedListMemTableConfig setIfLogBucketDistWhenFlush(final boolean logDistribution) { ifLogBucketDistWhenFlush_ = logDistribution; return this; } /** * Returns information about logging the distribution of * number of entries on flush. * * @return if distrubtion of number of entries shall be logged. */ public boolean ifLogBucketDistWhenFlush() { return ifLogBucketDistWhenFlush_; } /** * Set maximum number of entries in one bucket. Exceeding this val * leads to a switch from LinkedList to SkipList. * * @param threshold maximum number of entries before SkipList is * used. * @return the reference to the current HashLinkedListMemTableConfig. */ public HashLinkedListMemTableConfig setThresholdUseSkiplist(final int threshold) { thresholdUseSkiplist_ = threshold; return this; } /** * Returns entries per bucket threshold before LinkedList is * replaced by SkipList usage for that bucket. * * @return entries per bucket threshold before SkipList is used. */ public int thresholdUseSkiplist() { return thresholdUseSkiplist_; } @Override protected long newMemTableFactoryHandle() { return newMemTableFactoryHandle(bucketCount_, hugePageTlbSize_, bucketEntriesLoggingThreshold_, ifLogBucketDistWhenFlush_, thresholdUseSkiplist_); } private native long newMemTableFactoryHandle(long bucketCount, long hugePageTlbSize, int bucketEntriesLoggingThreshold, boolean ifLogBucketDistWhenFlush, int thresholdUseSkiplist) throws IllegalArgumentException; private long bucketCount_; private long hugePageTlbSize_; private int bucketEntriesLoggingThreshold_; private boolean ifLogBucketDistWhenFlush_; private int thresholdUseSkiplist_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java000066400000000000000000000057711370372246700270200ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * The config for hash skip-list mem-table representation. * Such mem-table representation contains a fix-sized array of * buckets, where each bucket points to a skiplist (or null if the * bucket is empty). * * Note that since this mem-table representation relies on the * key prefix, it is required to invoke one of the usePrefixExtractor * functions to specify how to extract key prefix given a key. * If proper prefix-extractor is not set, then RocksDB will * use the default memtable representation (SkipList) instead * and post a warning in the LOG. */ public class HashSkipListMemTableConfig extends MemTableConfig { public static final int DEFAULT_BUCKET_COUNT = 1000000; public static final int DEFAULT_BRANCHING_FACTOR = 4; public static final int DEFAULT_HEIGHT = 4; /** * HashSkipListMemTableConfig constructor */ public HashSkipListMemTableConfig() { bucketCount_ = DEFAULT_BUCKET_COUNT; branchingFactor_ = DEFAULT_BRANCHING_FACTOR; height_ = DEFAULT_HEIGHT; } /** * Set the number of hash buckets used in the hash skiplist memtable. * Default = 1000000. * * @param count the number of hash buckets used in the hash * skiplist memtable. * @return the reference to the current HashSkipListMemTableConfig. */ public HashSkipListMemTableConfig setBucketCount( final long count) { bucketCount_ = count; return this; } /** * @return the number of hash buckets */ public long bucketCount() { return bucketCount_; } /** * Set the height of the skip list. Default = 4. * * @param height height to set. * * @return the reference to the current HashSkipListMemTableConfig. */ public HashSkipListMemTableConfig setHeight(final int height) { height_ = height; return this; } /** * @return the height of the skip list. */ public int height() { return height_; } /** * Set the branching factor used in the hash skip-list memtable. * This factor controls the probabilistic size ratio between adjacent * links in the skip list. * * @param bf the probabilistic size ratio between adjacent link * lists in the skip list. * @return the reference to the current HashSkipListMemTableConfig. */ public HashSkipListMemTableConfig setBranchingFactor( final int bf) { branchingFactor_ = bf; return this; } /** * @return branching factor, the probabilistic size ratio between * adjacent links in the skip list. */ public int branchingFactor() { return branchingFactor_; } @Override protected long newMemTableFactoryHandle() { return newMemTableFactoryHandle( bucketCount_, height_, branchingFactor_); } private native long newMemTableFactoryHandle( long bucketCount, int height, int branchingFactor) throws IllegalArgumentException; private long bucketCount_; private int branchingFactor_; private int height_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/HdfsEnv.java000066400000000000000000000014641370372246700232450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * HDFS environment. */ public class HdfsEnv extends Env { /**

Creates a new environment that is used for HDFS environment.

* *

The caller must delete the result when it is * no longer needed.

* * @param fsName the HDFS as a string in the form "hdfs://hostname:port/" */ public HdfsEnv(final String fsName) { super(createHdfsEnv(fsName)); } private static native long createHdfsEnv(final String fsName); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/HistogramData.java000066400000000000000000000035541370372246700244410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class HistogramData { private final double median_; private final double percentile95_; private final double percentile99_; private final double average_; private final double standardDeviation_; private final double max_; private final long count_; private final long sum_; private final double min_; public HistogramData(final double median, final double percentile95, final double percentile99, final double average, final double standardDeviation) { this(median, percentile95, percentile99, average, standardDeviation, 0.0, 0, 0, 0.0); } public HistogramData(final double median, final double percentile95, final double percentile99, final double average, final double standardDeviation, final double max, final long count, final long sum, final double min) { median_ = median; percentile95_ = percentile95; percentile99_ = percentile99; average_ = average; standardDeviation_ = standardDeviation; min_ = min; max_ = max; count_ = count; sum_ = sum; } public double getMedian() { return median_; } public double getPercentile95() { return percentile95_; } public double getPercentile99() { return percentile99_; } public double getAverage() { return average_; } public double getStandardDeviation() { return standardDeviation_; } public double getMax() { return max_; } public long getCount() { return count_; } public long getSum() { return sum_; } public double getMin() { return min_; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/HistogramType.java000066400000000000000000000077621370372246700245160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public enum HistogramType { DB_GET((byte) 0x0), DB_WRITE((byte) 0x1), COMPACTION_TIME((byte) 0x2), SUBCOMPACTION_SETUP_TIME((byte) 0x3), TABLE_SYNC_MICROS((byte) 0x4), COMPACTION_OUTFILE_SYNC_MICROS((byte) 0x5), WAL_FILE_SYNC_MICROS((byte) 0x6), MANIFEST_FILE_SYNC_MICROS((byte) 0x7), /** * TIME SPENT IN IO DURING TABLE OPEN. */ TABLE_OPEN_IO_MICROS((byte) 0x8), DB_MULTIGET((byte) 0x9), READ_BLOCK_COMPACTION_MICROS((byte) 0xA), READ_BLOCK_GET_MICROS((byte) 0xB), WRITE_RAW_BLOCK_MICROS((byte) 0xC), STALL_L0_SLOWDOWN_COUNT((byte) 0xD), STALL_MEMTABLE_COMPACTION_COUNT((byte) 0xE), STALL_L0_NUM_FILES_COUNT((byte) 0xF), HARD_RATE_LIMIT_DELAY_COUNT((byte) 0x10), SOFT_RATE_LIMIT_DELAY_COUNT((byte) 0x11), NUM_FILES_IN_SINGLE_COMPACTION((byte) 0x12), DB_SEEK((byte) 0x13), WRITE_STALL((byte) 0x14), SST_READ_MICROS((byte) 0x15), /** * The number of subcompactions actually scheduled during a compaction. */ NUM_SUBCOMPACTIONS_SCHEDULED((byte) 0x16), /** * Value size distribution in each operation. */ BYTES_PER_READ((byte) 0x17), BYTES_PER_WRITE((byte) 0x18), BYTES_PER_MULTIGET((byte) 0x19), /** * number of bytes compressed. */ BYTES_COMPRESSED((byte) 0x1A), /** * number of bytes decompressed. * * number of bytes is when uncompressed; i.e. before/after respectively */ BYTES_DECOMPRESSED((byte) 0x1B), COMPRESSION_TIMES_NANOS((byte) 0x1C), DECOMPRESSION_TIMES_NANOS((byte) 0x1D), READ_NUM_MERGE_OPERANDS((byte) 0x1E), /** * Time spent flushing memtable to disk. */ FLUSH_TIME((byte) 0x20), /** * Size of keys written to BlobDB. */ BLOB_DB_KEY_SIZE((byte) 0x21), /** * Size of values written to BlobDB. */ BLOB_DB_VALUE_SIZE((byte) 0x22), /** * BlobDB Put/PutWithTTL/PutUntil/Write latency. */ BLOB_DB_WRITE_MICROS((byte) 0x23), /** * BlobDB Get lagency. */ BLOB_DB_GET_MICROS((byte) 0x24), /** * BlobDB MultiGet latency. */ BLOB_DB_MULTIGET_MICROS((byte) 0x25), /** * BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. */ BLOB_DB_SEEK_MICROS((byte) 0x26), /** * BlobDB Next latency. */ BLOB_DB_NEXT_MICROS((byte) 0x27), /** * BlobDB Prev latency. */ BLOB_DB_PREV_MICROS((byte) 0x28), /** * Blob file write latency. */ BLOB_DB_BLOB_FILE_WRITE_MICROS((byte) 0x29), /** * Blob file read latency. */ BLOB_DB_BLOB_FILE_READ_MICROS((byte) 0x2A), /** * Blob file sync latency. */ BLOB_DB_BLOB_FILE_SYNC_MICROS((byte) 0x2B), /** * BlobDB garbage collection time. */ BLOB_DB_GC_MICROS((byte) 0x2C), /** * BlobDB compression time. */ BLOB_DB_COMPRESSION_MICROS((byte) 0x2D), /** * BlobDB decompression time. */ BLOB_DB_DECOMPRESSION_MICROS((byte) 0x2E), // 0x1F for backwards compatibility on current minor version. HISTOGRAM_ENUM_MAX((byte) 0x1F); private final byte value; HistogramType(final byte value) { this.value = value; } /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value; } /** * Get Histogram type by byte value. * * @param value byte representation of HistogramType. * * @return {@link org.rocksdb.HistogramType} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static HistogramType getHistogramType(final byte value) { for (final HistogramType histogramType : HistogramType.values()) { if (histogramType.getValue() == value) { return histogramType; } } throw new IllegalArgumentException( "Illegal value provided for HistogramType."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Holder.java000066400000000000000000000017251370372246700231250ustar00rootroot00000000000000// Copyright (c) 2016, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Simple instance reference wrapper. */ public class Holder { private /* @Nullable */ T value; /** * Constructs a new Holder with null instance. */ public Holder() { } /** * Constructs a new Holder. * * @param value the instance or null */ public Holder(/* @Nullable */ final T value) { this.value = value; } /** * Get the instance reference. * * @return value the instance reference or null */ public /* @Nullable */ T getValue() { return value; } /** * Set the instance reference. * * @param value the instance reference or null */ public void setValue(/* @Nullable */ final T value) { this.value = value; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/IndexType.java000066400000000000000000000017631370372246700236230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * IndexType used in conjunction with BlockBasedTable. */ public enum IndexType { /** * A space efficient index block that is optimized for * binary-search-based index. */ kBinarySearch((byte) 0), /** * The hash index, if enabled, will do the hash lookup when * {@code Options.prefix_extractor} is provided. */ kHashSearch((byte) 1), /** * A two-level index implementation. Both levels are binary search indexes. */ kTwoLevelIndexSearch((byte) 2); /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value_; } IndexType(byte value) { value_ = value; } private final byte value_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/InfoLogLevel.java000066400000000000000000000022131370372246700242260ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * RocksDB log levels. */ public enum InfoLogLevel { DEBUG_LEVEL((byte)0), INFO_LEVEL((byte)1), WARN_LEVEL((byte)2), ERROR_LEVEL((byte)3), FATAL_LEVEL((byte)4), HEADER_LEVEL((byte)5), NUM_INFO_LOG_LEVELS((byte)6); private final byte value_; private InfoLogLevel(final byte value) { value_ = value; } /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value_; } /** * Get InfoLogLevel by byte value. * * @param value byte representation of InfoLogLevel. * * @return {@link org.rocksdb.InfoLogLevel} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static InfoLogLevel getInfoLogLevel(final byte value) { for (final InfoLogLevel infoLogLevel : InfoLogLevel.values()) { if (infoLogLevel.getValue() == value) { return infoLogLevel; } } throw new IllegalArgumentException( "Illegal value provided for InfoLogLevel."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java000066400000000000000000000202201370372246700270070ustar00rootroot00000000000000package org.rocksdb; // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). import java.util.List; /** * IngestExternalFileOptions is used by * {@link RocksDB#ingestExternalFile(ColumnFamilyHandle, List, IngestExternalFileOptions)}. */ public class IngestExternalFileOptions extends RocksObject { public IngestExternalFileOptions() { super(newIngestExternalFileOptions()); } /** * @param moveFiles {@link #setMoveFiles(boolean)} * @param snapshotConsistency {@link #setSnapshotConsistency(boolean)} * @param allowGlobalSeqNo {@link #setAllowGlobalSeqNo(boolean)} * @param allowBlockingFlush {@link #setAllowBlockingFlush(boolean)} */ public IngestExternalFileOptions(final boolean moveFiles, final boolean snapshotConsistency, final boolean allowGlobalSeqNo, final boolean allowBlockingFlush) { super(newIngestExternalFileOptions(moveFiles, snapshotConsistency, allowGlobalSeqNo, allowBlockingFlush)); } /** * Can be set to true to move the files instead of copying them. * * @return true if files will be moved */ public boolean moveFiles() { return moveFiles(nativeHandle_); } /** * Can be set to true to move the files instead of copying them. * * @param moveFiles true if files should be moved instead of copied * * @return the reference to the current IngestExternalFileOptions. */ public IngestExternalFileOptions setMoveFiles(final boolean moveFiles) { setMoveFiles(nativeHandle_, moveFiles); return this; } /** * If set to false, an ingested file keys could appear in existing snapshots * that where created before the file was ingested. * * @return true if snapshot consistency is assured */ public boolean snapshotConsistency() { return snapshotConsistency(nativeHandle_); } /** * If set to false, an ingested file keys could appear in existing snapshots * that where created before the file was ingested. * * @param snapshotConsistency true if snapshot consistency is required * * @return the reference to the current IngestExternalFileOptions. */ public IngestExternalFileOptions setSnapshotConsistency( final boolean snapshotConsistency) { setSnapshotConsistency(nativeHandle_, snapshotConsistency); return this; } /** * If set to false, {@link RocksDB#ingestExternalFile(ColumnFamilyHandle, List, IngestExternalFileOptions)} * will fail if the file key range overlaps with existing keys or tombstones in the DB. * * @return true if global seq numbers are assured */ public boolean allowGlobalSeqNo() { return allowGlobalSeqNo(nativeHandle_); } /** * If set to false, {@link RocksDB#ingestExternalFile(ColumnFamilyHandle, List, IngestExternalFileOptions)} * will fail if the file key range overlaps with existing keys or tombstones in the DB. * * @param allowGlobalSeqNo true if global seq numbers are required * * @return the reference to the current IngestExternalFileOptions. */ public IngestExternalFileOptions setAllowGlobalSeqNo( final boolean allowGlobalSeqNo) { setAllowGlobalSeqNo(nativeHandle_, allowGlobalSeqNo); return this; } /** * If set to false and the file key range overlaps with the memtable key range * (memtable flush required), IngestExternalFile will fail. * * @return true if blocking flushes may occur */ public boolean allowBlockingFlush() { return allowBlockingFlush(nativeHandle_); } /** * If set to false and the file key range overlaps with the memtable key range * (memtable flush required), IngestExternalFile will fail. * * @param allowBlockingFlush true if blocking flushes are allowed * * @return the reference to the current IngestExternalFileOptions. */ public IngestExternalFileOptions setAllowBlockingFlush( final boolean allowBlockingFlush) { setAllowBlockingFlush(nativeHandle_, allowBlockingFlush); return this; } /** * Returns true if duplicate keys in the file being ingested are * to be skipped rather than overwriting existing data under that key. * * @return true if duplicate keys in the file being ingested are to be * skipped, false otherwise. */ public boolean ingestBehind() { return ingestBehind(nativeHandle_); } /** * Set to true if you would like duplicate keys in the file being ingested * to be skipped rather than overwriting existing data under that key. * * Usecase: back-fill of some historical data in the database without * over-writing existing newer version of data. * * This option could only be used if the DB has been running * with DBOptions#allowIngestBehind() == true since the dawn of time. * * All files will be ingested at the bottommost level with seqno=0. * * Default: false * * @param ingestBehind true if you would like duplicate keys in the file being * ingested to be skipped. * * @return the reference to the current IngestExternalFileOptions. */ public IngestExternalFileOptions setIngestBehind(final boolean ingestBehind) { setIngestBehind(nativeHandle_, ingestBehind); return this; } /** * Returns true write if the global_seqno is written to a given offset * in the external SST file for backward compatibility. * * See {@link #setWriteGlobalSeqno(boolean)}. * * @return true if the global_seqno is written to a given offset, * false otherwise. */ public boolean writeGlobalSeqno() { return writeGlobalSeqno(nativeHandle_); } /** * Set to true if you would like to write the global_seqno to a given offset * in the external SST file for backward compatibility. * * Older versions of RocksDB write the global_seqno to a given offset within * the ingested SST files, and new versions of RocksDB do not. * * If you ingest an external SST using new version of RocksDB and would like * to be able to downgrade to an older version of RocksDB, you should set * {@link #writeGlobalSeqno()} to true. * * If your service is just starting to use the new RocksDB, we recommend that * you set this option to false, which brings two benefits: * 1. No extra random write for global_seqno during ingestion. * 2. Without writing external SST file, it's possible to do checksum. * * We have a plan to set this option to false by default in the future. * * Default: true * * @param writeGlobalSeqno true to write the gloal_seqno to a given offset, * false otherwise * * @return the reference to the current IngestExternalFileOptions. */ public IngestExternalFileOptions setWriteGlobalSeqno( final boolean writeGlobalSeqno) { setWriteGlobalSeqno(nativeHandle_, writeGlobalSeqno); return this; } private native static long newIngestExternalFileOptions(); private native static long newIngestExternalFileOptions( final boolean moveFiles, final boolean snapshotConsistency, final boolean allowGlobalSeqNo, final boolean allowBlockingFlush); @Override protected final native void disposeInternal(final long handle); private native boolean moveFiles(final long handle); private native void setMoveFiles(final long handle, final boolean move_files); private native boolean snapshotConsistency(final long handle); private native void setSnapshotConsistency(final long handle, final boolean snapshotConsistency); private native boolean allowGlobalSeqNo(final long handle); private native void setAllowGlobalSeqNo(final long handle, final boolean allowGloablSeqNo); private native boolean allowBlockingFlush(final long handle); private native void setAllowBlockingFlush(final long handle, final boolean allowBlockingFlush); private native boolean ingestBehind(final long handle); private native void setIngestBehind(final long handle, final boolean ingestBehind); private native boolean writeGlobalSeqno(final long handle); private native void setWriteGlobalSeqno(final long handle, final boolean writeGlobalSeqNo); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/LRUCache.java000066400000000000000000000065771370372246700233100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Least Recently Used Cache */ public class LRUCache extends Cache { /** * Create a new cache with a fixed size capacity * * @param capacity The fixed size capacity of the cache */ public LRUCache(final long capacity) { this(capacity, -1, false, 0.0); } /** * Create a new cache with a fixed size capacity. The cache is sharded * to 2^numShardBits shards, by hash of the key. The total capacity * is divided and evenly assigned to each shard. * numShardBits = -1 means it is automatically determined: every shard * will be at least 512KB and number of shard bits will not exceed 6. * * @param capacity The fixed size capacity of the cache * @param numShardBits The cache is sharded to 2^numShardBits shards, * by hash of the key */ public LRUCache(final long capacity, final int numShardBits) { super(newLRUCache(capacity, numShardBits, false,0.0)); } /** * Create a new cache with a fixed size capacity. The cache is sharded * to 2^numShardBits shards, by hash of the key. The total capacity * is divided and evenly assigned to each shard. If strictCapacityLimit * is set, insert to the cache will fail when cache is full. * numShardBits = -1 means it is automatically determined: every shard * will be at least 512KB and number of shard bits will not exceed 6. * * @param capacity The fixed size capacity of the cache * @param numShardBits The cache is sharded to 2^numShardBits shards, * by hash of the key * @param strictCapacityLimit insert to the cache will fail when cache is full */ public LRUCache(final long capacity, final int numShardBits, final boolean strictCapacityLimit) { super(newLRUCache(capacity, numShardBits, strictCapacityLimit,0.0)); } /** * Create a new cache with a fixed size capacity. The cache is sharded * to 2^numShardBits shards, by hash of the key. The total capacity * is divided and evenly assigned to each shard. If strictCapacityLimit * is set, insert to the cache will fail when cache is full. User can also * set percentage of the cache reserves for high priority entries via * highPriPoolRatio. * numShardBits = -1 means it is automatically determined: every shard * will be at least 512KB and number of shard bits will not exceed 6. * * @param capacity The fixed size capacity of the cache * @param numShardBits The cache is sharded to 2^numShardBits shards, * by hash of the key * @param strictCapacityLimit insert to the cache will fail when cache is full * @param highPriPoolRatio percentage of the cache reserves for high priority * entries */ public LRUCache(final long capacity, final int numShardBits, final boolean strictCapacityLimit, final double highPriPoolRatio) { super(newLRUCache(capacity, numShardBits, strictCapacityLimit, highPriPoolRatio)); } private native static long newLRUCache(final long capacity, final int numShardBits, final boolean strictCapacityLimit, final double highPriPoolRatio); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/LevelMetaData.java000066400000000000000000000023151370372246700243540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; import java.util.List; /** * The metadata that describes a level. */ public class LevelMetaData { private final int level; private final long size; private final SstFileMetaData[] files; /** * Called from JNI C++ */ private LevelMetaData(final int level, final long size, final SstFileMetaData[] files) { this.level = level; this.size = size; this.files = files; } /** * The level which this meta data describes. * * @return the level */ public int level() { return level; } /** * The size of this level in bytes, which is equal to the sum of * the file size of its {@link #files()}. * * @return the size */ public long size() { return size; } /** * The metadata of all sst files in this level. * * @return the metadata of the files */ public List files() { return Arrays.asList(files); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/LiveFileMetaData.java000066400000000000000000000027311370372246700250060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The full set of metadata associated with each SST file. */ public class LiveFileMetaData extends SstFileMetaData { private final byte[] columnFamilyName; private final int level; /** * Called from JNI C++ */ private LiveFileMetaData( final byte[] columnFamilyName, final int level, final String fileName, final String path, final long size, final long smallestSeqno, final long largestSeqno, final byte[] smallestKey, final byte[] largestKey, final long numReadsSampled, final boolean beingCompacted, final long numEntries, final long numDeletions) { super(fileName, path, size, smallestSeqno, largestSeqno, smallestKey, largestKey, numReadsSampled, beingCompacted, numEntries, numDeletions); this.columnFamilyName = columnFamilyName; this.level = level; } /** * Get the name of the column family. * * @return the name of the column family */ public byte[] columnFamilyName() { return columnFamilyName; } /** * Get the level at which this file resides. * * @return the level at which the file resides. */ public int level() { return level; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/LogFile.java000066400000000000000000000034621370372246700232310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class LogFile { private final String pathName; private final long logNumber; private final WalFileType type; private final long startSequence; private final long sizeFileBytes; /** * Called from JNI C++ */ private LogFile(final String pathName, final long logNumber, final byte walFileTypeValue, final long startSequence, final long sizeFileBytes) { this.pathName = pathName; this.logNumber = logNumber; this.type = WalFileType.fromValue(walFileTypeValue); this.startSequence = startSequence; this.sizeFileBytes = sizeFileBytes; } /** * Returns log file's pathname relative to the main db dir * Eg. For a live-log-file = /000003.log * For an archived-log-file = /archive/000003.log * * @return log file's pathname */ public String pathName() { return pathName; } /** * Primary identifier for log file. * This is directly proportional to creation time of the log file * * @return the log number */ public long logNumber() { return logNumber; } /** * Log file can be either alive or archived. * * @return the type of the log file. */ public WalFileType type() { return type; } /** * Starting sequence number of writebatch written in this log file. * * @return the stating sequence number */ public long startSequence() { return startSequence; } /** * Size of log file on disk in Bytes. * * @return size of log file */ public long sizeFileBytes() { return sizeFileBytes; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Logger.java000066400000000000000000000075051370372246700231310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** *

This class provides a custom logger functionality * in Java which wraps {@code RocksDB} logging facilities. *

* *

Using this class RocksDB can log with common * Java logging APIs like Log4j or Slf4j without keeping * database logs in the filesystem.

* * Performance *

There are certain performance penalties using a Java * {@code Logger} implementation within production code. *

* *

* A log level can be set using {@link org.rocksdb.Options} or * {@link Logger#setInfoLogLevel(InfoLogLevel)}. The set log level * influences the underlying native code. Each log message is * checked against the set log level and if the log level is more * verbose as the set log level, native allocations will be made * and data structures are allocated. *

* *

Every log message which will be emitted by native code will * trigger expensive native to Java transitions. So the preferred * setting for production use is either * {@link org.rocksdb.InfoLogLevel#ERROR_LEVEL} or * {@link org.rocksdb.InfoLogLevel#FATAL_LEVEL}. *

*/ public abstract class Logger extends RocksCallbackObject { private final static long WITH_OPTIONS = 0; private final static long WITH_DBOPTIONS = 1; /** *

AbstractLogger constructor.

* *

Important: the log level set within * the {@link org.rocksdb.Options} instance will be used as * maximum log level of RocksDB.

* * @param options {@link org.rocksdb.Options} instance. */ public Logger(final Options options) { super(options.nativeHandle_, WITH_OPTIONS); } /** *

AbstractLogger constructor.

* *

Important: the log level set within * the {@link org.rocksdb.DBOptions} instance will be used * as maximum log level of RocksDB.

* * @param dboptions {@link org.rocksdb.DBOptions} instance. */ public Logger(final DBOptions dboptions) { super(dboptions.nativeHandle_, WITH_DBOPTIONS); } @Override protected long initializeNative(long... nativeParameterHandles) { if(nativeParameterHandles[1] == WITH_OPTIONS) { return createNewLoggerOptions(nativeParameterHandles[0]); } else if(nativeParameterHandles[1] == WITH_DBOPTIONS) { return createNewLoggerDbOptions(nativeParameterHandles[0]); } else { throw new IllegalArgumentException(); } } /** * Set {@link org.rocksdb.InfoLogLevel} to AbstractLogger. * * @param infoLogLevel {@link org.rocksdb.InfoLogLevel} instance. */ public void setInfoLogLevel(final InfoLogLevel infoLogLevel) { setInfoLogLevel(nativeHandle_, infoLogLevel.getValue()); } /** * Return the loggers log level. * * @return {@link org.rocksdb.InfoLogLevel} instance. */ public InfoLogLevel infoLogLevel() { return InfoLogLevel.getInfoLogLevel( infoLogLevel(nativeHandle_)); } protected abstract void log(InfoLogLevel infoLogLevel, String logMsg); protected native long createNewLoggerOptions( long options); protected native long createNewLoggerDbOptions( long dbOptions); protected native void setInfoLogLevel(long handle, byte infoLogLevel); protected native byte infoLogLevel(long handle); /** * We override {@link RocksCallbackObject#disposeInternal()} * as disposing of a rocksdb::LoggerJniCallback requires * a slightly different approach as it is a std::shared_ptr */ @Override protected void disposeInternal() { disposeInternal(nativeHandle_); } private native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MemTableConfig.java000066400000000000000000000021531370372246700245200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * MemTableConfig is used to config the internal mem-table of a RocksDB. * It is required for each memtable to have one such sub-class to allow * Java developers to use it. * * To make a RocksDB to use a specific MemTable format, its associated * MemTableConfig should be properly set and passed into Options * via Options.setMemTableFactory() and open the db using that Options. * * @see Options */ public abstract class MemTableConfig { /** * This function should only be called by Options.setMemTableConfig(), * which will create a c++ shared-pointer to the c++ MemTableRepFactory * that associated with the Java MemTableConfig. * * @see Options#setMemTableConfig(MemTableConfig) * * @return native handle address to native memory table instance. */ abstract protected long newMemTableFactoryHandle(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MemoryUsageType.java000066400000000000000000000033751370372246700250120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * MemoryUsageType * *

The value will be used as a key to indicate the type of memory usage * described

*/ public enum MemoryUsageType { /** * Memory usage of all the mem-tables. */ kMemTableTotal((byte) 0), /** * Memory usage of those un-flushed mem-tables. */ kMemTableUnFlushed((byte) 1), /** * Memory usage of all the table readers. */ kTableReadersTotal((byte) 2), /** * Memory usage by Cache. */ kCacheTotal((byte) 3), /** * Max usage types - copied to keep 1:1 with native. */ kNumUsageTypes((byte) 4); /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value_; } /** *

Get the MemoryUsageType enumeration value by * passing the byte identifier to this method.

* * @param byteIdentifier of MemoryUsageType. * * @return MemoryUsageType instance. * * @throws IllegalArgumentException if the usage type for the byteIdentifier * cannot be found */ public static MemoryUsageType getMemoryUsageType(final byte byteIdentifier) { for (final MemoryUsageType memoryUsageType : MemoryUsageType.values()) { if (memoryUsageType.getValue() == byteIdentifier) { return memoryUsageType; } } throw new IllegalArgumentException( "Illegal value provided for MemoryUsageType."); } MemoryUsageType(byte value) { value_ = value; } private final byte value_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MemoryUtil.java000066400000000000000000000045201370372246700240120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.*; /** * JNI passthrough for MemoryUtil. */ public class MemoryUtil { /** *

Returns the approximate memory usage of different types in the input * list of DBs and Cache set. For instance, in the output map the key * kMemTableTotal will be associated with the memory * usage of all the mem-tables from all the input rocksdb instances.

* *

Note that for memory usage inside Cache class, we will * only report the usage of the input "cache_set" without * including those Cache usage inside the input list "dbs" * of DBs.

* * @param dbs List of dbs to collect memory usage for. * @param caches Set of caches to collect memory usage for. * @return Map from {@link MemoryUsageType} to memory usage as a {@link Long}. */ public static Map getApproximateMemoryUsageByType(final List dbs, final Set caches) { int dbCount = (dbs == null) ? 0 : dbs.size(); int cacheCount = (caches == null) ? 0 : caches.size(); long[] dbHandles = new long[dbCount]; long[] cacheHandles = new long[cacheCount]; if (dbCount > 0) { ListIterator dbIter = dbs.listIterator(); while (dbIter.hasNext()) { dbHandles[dbIter.nextIndex()] = dbIter.next().nativeHandle_; } } if (cacheCount > 0) { // NOTE: This index handling is super ugly but I couldn't get a clean way to track both the // index and the iterator simultaneously within a Set. int i = 0; for (Cache cache : caches) { cacheHandles[i] = cache.nativeHandle_; i++; } } Map byteOutput = getApproximateMemoryUsageByType(dbHandles, cacheHandles); Map output = new HashMap<>(); for(Map.Entry longEntry : byteOutput.entrySet()) { output.put(MemoryUsageType.getMemoryUsageType(longEntry.getKey()), longEntry.getValue()); } return output; } private native static Map getApproximateMemoryUsageByType(final long[] dbHandles, final long[] cacheHandles); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MergeOperator.java000066400000000000000000000012411370372246700244540ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2014, Vlad Balan (vlad.gm@gmail.com). All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * MergeOperator holds an operator to be applied when compacting * two merge operands held under the same key in order to obtain a single * value. */ public abstract class MergeOperator extends RocksObject { protected MergeOperator(final long nativeHandle) { super(nativeHandle); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java000066400000000000000000000345141370372246700271770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.*; public class MutableColumnFamilyOptions extends AbstractMutableOptions { /** * User must use builder pattern, or parser. * * @param keys the keys * @param values the values * * See {@link #builder()} and {@link #parse(String)}. */ private MutableColumnFamilyOptions(final String[] keys, final String[] values) { super(keys, values); } /** * Creates a builder which allows you * to set MutableColumnFamilyOptions in a fluent * manner * * @return A builder for MutableColumnFamilyOptions */ public static MutableColumnFamilyOptionsBuilder builder() { return new MutableColumnFamilyOptionsBuilder(); } /** * Parses a String representation of MutableColumnFamilyOptions * * The format is: key1=value1;key2=value2;key3=value3 etc * * For int[] values, each int should be separated by a comma, e.g. * * key1=value1;intArrayKey1=1,2,3 * * @param str The string representation of the mutable column family options * * @return A builder for the mutable column family options */ public static MutableColumnFamilyOptionsBuilder parse(final String str) { Objects.requireNonNull(str); final MutableColumnFamilyOptionsBuilder builder = new MutableColumnFamilyOptionsBuilder(); final String[] options = str.trim().split(KEY_VALUE_PAIR_SEPARATOR); for(final String option : options) { final int equalsOffset = option.indexOf(KEY_VALUE_SEPARATOR); if(equalsOffset <= 0) { throw new IllegalArgumentException( "options string has an invalid key=value pair"); } final String key = option.substring(0, equalsOffset); if(key.isEmpty()) { throw new IllegalArgumentException("options string is invalid"); } final String value = option.substring(equalsOffset + 1); if(value.isEmpty()) { throw new IllegalArgumentException("options string is invalid"); } builder.fromString(key, value); } return builder; } private interface MutableColumnFamilyOptionKey extends MutableOptionKey {} public enum MemtableOption implements MutableColumnFamilyOptionKey { write_buffer_size(ValueType.LONG), arena_block_size(ValueType.LONG), memtable_prefix_bloom_size_ratio(ValueType.DOUBLE), @Deprecated memtable_prefix_bloom_bits(ValueType.INT), @Deprecated memtable_prefix_bloom_probes(ValueType.INT), memtable_huge_page_size(ValueType.LONG), max_successive_merges(ValueType.LONG), @Deprecated filter_deletes(ValueType.BOOLEAN), max_write_buffer_number(ValueType.INT), inplace_update_num_locks(ValueType.LONG); private final ValueType valueType; MemtableOption(final ValueType valueType) { this.valueType = valueType; } @Override public ValueType getValueType() { return valueType; } } public enum CompactionOption implements MutableColumnFamilyOptionKey { disable_auto_compactions(ValueType.BOOLEAN), @Deprecated soft_rate_limit(ValueType.DOUBLE), soft_pending_compaction_bytes_limit(ValueType.LONG), @Deprecated hard_rate_limit(ValueType.DOUBLE), hard_pending_compaction_bytes_limit(ValueType.LONG), level0_file_num_compaction_trigger(ValueType.INT), level0_slowdown_writes_trigger(ValueType.INT), level0_stop_writes_trigger(ValueType.INT), max_compaction_bytes(ValueType.LONG), target_file_size_base(ValueType.LONG), target_file_size_multiplier(ValueType.INT), max_bytes_for_level_base(ValueType.LONG), max_bytes_for_level_multiplier(ValueType.INT), max_bytes_for_level_multiplier_additional(ValueType.INT_ARRAY), ttl(ValueType.LONG); private final ValueType valueType; CompactionOption(final ValueType valueType) { this.valueType = valueType; } @Override public ValueType getValueType() { return valueType; } } public enum MiscOption implements MutableColumnFamilyOptionKey { max_sequential_skip_in_iterations(ValueType.LONG), paranoid_file_checks(ValueType.BOOLEAN), report_bg_io_stats(ValueType.BOOLEAN), compression_type(ValueType.ENUM); private final ValueType valueType; MiscOption(final ValueType valueType) { this.valueType = valueType; } @Override public ValueType getValueType() { return valueType; } } public static class MutableColumnFamilyOptionsBuilder extends AbstractMutableOptionsBuilder implements MutableColumnFamilyOptionsInterface { private final static Map ALL_KEYS_LOOKUP = new HashMap<>(); static { for(final MutableColumnFamilyOptionKey key : MemtableOption.values()) { ALL_KEYS_LOOKUP.put(key.name(), key); } for(final MutableColumnFamilyOptionKey key : CompactionOption.values()) { ALL_KEYS_LOOKUP.put(key.name(), key); } for(final MutableColumnFamilyOptionKey key : MiscOption.values()) { ALL_KEYS_LOOKUP.put(key.name(), key); } } private MutableColumnFamilyOptionsBuilder() { super(); } @Override protected MutableColumnFamilyOptionsBuilder self() { return this; } @Override protected Map allKeys() { return ALL_KEYS_LOOKUP; } @Override protected MutableColumnFamilyOptions build(final String[] keys, final String[] values) { return new MutableColumnFamilyOptions(keys, values); } @Override public MutableColumnFamilyOptionsBuilder setWriteBufferSize( final long writeBufferSize) { return setLong(MemtableOption.write_buffer_size, writeBufferSize); } @Override public long writeBufferSize() { return getLong(MemtableOption.write_buffer_size); } @Override public MutableColumnFamilyOptionsBuilder setArenaBlockSize( final long arenaBlockSize) { return setLong(MemtableOption.arena_block_size, arenaBlockSize); } @Override public long arenaBlockSize() { return getLong(MemtableOption.arena_block_size); } @Override public MutableColumnFamilyOptionsBuilder setMemtablePrefixBloomSizeRatio( final double memtablePrefixBloomSizeRatio) { return setDouble(MemtableOption.memtable_prefix_bloom_size_ratio, memtablePrefixBloomSizeRatio); } @Override public double memtablePrefixBloomSizeRatio() { return getDouble(MemtableOption.memtable_prefix_bloom_size_ratio); } @Override public MutableColumnFamilyOptionsBuilder setMemtableHugePageSize( final long memtableHugePageSize) { return setLong(MemtableOption.memtable_huge_page_size, memtableHugePageSize); } @Override public long memtableHugePageSize() { return getLong(MemtableOption.memtable_huge_page_size); } @Override public MutableColumnFamilyOptionsBuilder setMaxSuccessiveMerges( final long maxSuccessiveMerges) { return setLong(MemtableOption.max_successive_merges, maxSuccessiveMerges); } @Override public long maxSuccessiveMerges() { return getLong(MemtableOption.max_successive_merges); } @Override public MutableColumnFamilyOptionsBuilder setMaxWriteBufferNumber( final int maxWriteBufferNumber) { return setInt(MemtableOption.max_write_buffer_number, maxWriteBufferNumber); } @Override public int maxWriteBufferNumber() { return getInt(MemtableOption.max_write_buffer_number); } @Override public MutableColumnFamilyOptionsBuilder setInplaceUpdateNumLocks( final long inplaceUpdateNumLocks) { return setLong(MemtableOption.inplace_update_num_locks, inplaceUpdateNumLocks); } @Override public long inplaceUpdateNumLocks() { return getLong(MemtableOption.inplace_update_num_locks); } @Override public MutableColumnFamilyOptionsBuilder setDisableAutoCompactions( final boolean disableAutoCompactions) { return setBoolean(CompactionOption.disable_auto_compactions, disableAutoCompactions); } @Override public boolean disableAutoCompactions() { return getBoolean(CompactionOption.disable_auto_compactions); } @Override public MutableColumnFamilyOptionsBuilder setSoftPendingCompactionBytesLimit( final long softPendingCompactionBytesLimit) { return setLong(CompactionOption.soft_pending_compaction_bytes_limit, softPendingCompactionBytesLimit); } @Override public long softPendingCompactionBytesLimit() { return getLong(CompactionOption.soft_pending_compaction_bytes_limit); } @Override public MutableColumnFamilyOptionsBuilder setHardPendingCompactionBytesLimit( final long hardPendingCompactionBytesLimit) { return setLong(CompactionOption.hard_pending_compaction_bytes_limit, hardPendingCompactionBytesLimit); } @Override public long hardPendingCompactionBytesLimit() { return getLong(CompactionOption.hard_pending_compaction_bytes_limit); } @Override public MutableColumnFamilyOptionsBuilder setLevel0FileNumCompactionTrigger( final int level0FileNumCompactionTrigger) { return setInt(CompactionOption.level0_file_num_compaction_trigger, level0FileNumCompactionTrigger); } @Override public int level0FileNumCompactionTrigger() { return getInt(CompactionOption.level0_file_num_compaction_trigger); } @Override public MutableColumnFamilyOptionsBuilder setLevel0SlowdownWritesTrigger( final int level0SlowdownWritesTrigger) { return setInt(CompactionOption.level0_slowdown_writes_trigger, level0SlowdownWritesTrigger); } @Override public int level0SlowdownWritesTrigger() { return getInt(CompactionOption.level0_slowdown_writes_trigger); } @Override public MutableColumnFamilyOptionsBuilder setLevel0StopWritesTrigger( final int level0StopWritesTrigger) { return setInt(CompactionOption.level0_stop_writes_trigger, level0StopWritesTrigger); } @Override public int level0StopWritesTrigger() { return getInt(CompactionOption.level0_stop_writes_trigger); } @Override public MutableColumnFamilyOptionsBuilder setMaxCompactionBytes(final long maxCompactionBytes) { return setLong(CompactionOption.max_compaction_bytes, maxCompactionBytes); } @Override public long maxCompactionBytes() { return getLong(CompactionOption.max_compaction_bytes); } @Override public MutableColumnFamilyOptionsBuilder setTargetFileSizeBase( final long targetFileSizeBase) { return setLong(CompactionOption.target_file_size_base, targetFileSizeBase); } @Override public long targetFileSizeBase() { return getLong(CompactionOption.target_file_size_base); } @Override public MutableColumnFamilyOptionsBuilder setTargetFileSizeMultiplier( final int targetFileSizeMultiplier) { return setInt(CompactionOption.target_file_size_multiplier, targetFileSizeMultiplier); } @Override public int targetFileSizeMultiplier() { return getInt(CompactionOption.target_file_size_multiplier); } @Override public MutableColumnFamilyOptionsBuilder setMaxBytesForLevelBase( final long maxBytesForLevelBase) { return setLong(CompactionOption.max_bytes_for_level_base, maxBytesForLevelBase); } @Override public long maxBytesForLevelBase() { return getLong(CompactionOption.max_bytes_for_level_base); } @Override public MutableColumnFamilyOptionsBuilder setMaxBytesForLevelMultiplier( final double maxBytesForLevelMultiplier) { return setDouble(CompactionOption.max_bytes_for_level_multiplier, maxBytesForLevelMultiplier); } @Override public double maxBytesForLevelMultiplier() { return getDouble(CompactionOption.max_bytes_for_level_multiplier); } @Override public MutableColumnFamilyOptionsBuilder setMaxBytesForLevelMultiplierAdditional( final int[] maxBytesForLevelMultiplierAdditional) { return setIntArray( CompactionOption.max_bytes_for_level_multiplier_additional, maxBytesForLevelMultiplierAdditional); } @Override public int[] maxBytesForLevelMultiplierAdditional() { return getIntArray( CompactionOption.max_bytes_for_level_multiplier_additional); } @Override public MutableColumnFamilyOptionsBuilder setMaxSequentialSkipInIterations( final long maxSequentialSkipInIterations) { return setLong(MiscOption.max_sequential_skip_in_iterations, maxSequentialSkipInIterations); } @Override public long maxSequentialSkipInIterations() { return getLong(MiscOption.max_sequential_skip_in_iterations); } @Override public MutableColumnFamilyOptionsBuilder setParanoidFileChecks( final boolean paranoidFileChecks) { return setBoolean(MiscOption.paranoid_file_checks, paranoidFileChecks); } @Override public boolean paranoidFileChecks() { return getBoolean(MiscOption.paranoid_file_checks); } @Override public MutableColumnFamilyOptionsBuilder setCompressionType( final CompressionType compressionType) { return setEnum(MiscOption.compression_type, compressionType); } @Override public CompressionType compressionType() { return (CompressionType)getEnum(MiscOption.compression_type); } @Override public MutableColumnFamilyOptionsBuilder setReportBgIoStats( final boolean reportBgIoStats) { return setBoolean(MiscOption.report_bg_io_stats, reportBgIoStats); } @Override public boolean reportBgIoStats() { return getBoolean(MiscOption.report_bg_io_stats); } @Override public MutableColumnFamilyOptionsBuilder setTtl(final long ttl) { return setLong(CompactionOption.ttl, ttl); } @Override public long ttl() { return getLong(CompactionOption.ttl); } } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java000066400000000000000000000127521370372246700310200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public interface MutableColumnFamilyOptionsInterface< T extends MutableColumnFamilyOptionsInterface> extends AdvancedMutableColumnFamilyOptionsInterface { /** * Amount of data to build up in memory (backed by an unsorted log * on disk) before converting to a sorted on-disk file. * * Larger values increase performance, especially during bulk loads. * Up to {@code max_write_buffer_number} write buffers may be held in memory * at the same time, so you may wish to adjust this parameter * to control memory usage. * * Also, a larger write buffer will result in a longer recovery time * the next time the database is opened. * * Default: 64MB * @param writeBufferSize the size of write buffer. * @return the instance of the current object. * @throws java.lang.IllegalArgumentException thrown on 32-Bit platforms * while overflowing the underlying platform specific value. */ MutableColumnFamilyOptionsInterface setWriteBufferSize(long writeBufferSize); /** * Return size of write buffer size. * * @return size of write buffer. * @see #setWriteBufferSize(long) */ long writeBufferSize(); /** * Disable automatic compactions. Manual compactions can still * be issued on this column family * * @param disableAutoCompactions true if auto-compactions are disabled. * @return the reference to the current option. */ MutableColumnFamilyOptionsInterface setDisableAutoCompactions( boolean disableAutoCompactions); /** * Disable automatic compactions. Manual compactions can still * be issued on this column family * * @return true if auto-compactions are disabled. */ boolean disableAutoCompactions(); /** * Number of files to trigger level-0 compaction. A value < 0 means that * level-0 compaction will not be triggered by number of files at all. * * Default: 4 * * @param level0FileNumCompactionTrigger The number of files to trigger * level-0 compaction * @return the reference to the current option. */ MutableColumnFamilyOptionsInterface setLevel0FileNumCompactionTrigger( int level0FileNumCompactionTrigger); /** * Number of files to trigger level-0 compaction. A value < 0 means that * level-0 compaction will not be triggered by number of files at all. * * Default: 4 * * @return The number of files to trigger */ int level0FileNumCompactionTrigger(); /** * We try to limit number of bytes in one compaction to be lower than this * threshold. But it's not guaranteed. * Value 0 will be sanitized. * * @param maxCompactionBytes max bytes in a compaction * @return the reference to the current option. * @see #maxCompactionBytes() */ MutableColumnFamilyOptionsInterface setMaxCompactionBytes(final long maxCompactionBytes); /** * We try to limit number of bytes in one compaction to be lower than this * threshold. But it's not guaranteed. * Value 0 will be sanitized. * * @return the maximum number of bytes in for a compaction. * @see #setMaxCompactionBytes(long) */ long maxCompactionBytes(); /** * The upper-bound of the total size of level-1 files in bytes. * Maximum number of bytes for level L can be calculated as * (maxBytesForLevelBase) * (maxBytesForLevelMultiplier ^ (L-1)) * For example, if maxBytesForLevelBase is 20MB, and if * max_bytes_for_level_multiplier is 10, total data size for level-1 * will be 200MB, total file size for level-2 will be 2GB, * and total file size for level-3 will be 20GB. * by default 'maxBytesForLevelBase' is 256MB. * * @param maxBytesForLevelBase maximum bytes for level base. * * @return the reference to the current option. * * See {@link AdvancedMutableColumnFamilyOptionsInterface#setMaxBytesForLevelMultiplier(double)} */ T setMaxBytesForLevelBase( long maxBytesForLevelBase); /** * The upper-bound of the total size of level-1 files in bytes. * Maximum number of bytes for level L can be calculated as * (maxBytesForLevelBase) * (maxBytesForLevelMultiplier ^ (L-1)) * For example, if maxBytesForLevelBase is 20MB, and if * max_bytes_for_level_multiplier is 10, total data size for level-1 * will be 200MB, total file size for level-2 will be 2GB, * and total file size for level-3 will be 20GB. * by default 'maxBytesForLevelBase' is 256MB. * * @return the upper-bound of the total size of level-1 files * in bytes. * * See {@link AdvancedMutableColumnFamilyOptionsInterface#maxBytesForLevelMultiplier()} */ long maxBytesForLevelBase(); /** * Compress blocks using the specified compression algorithm. This * parameter can be changed dynamically. * * Default: SNAPPY_COMPRESSION, which gives lightweight but fast compression. * * @param compressionType Compression Type. * @return the reference to the current option. */ T setCompressionType( CompressionType compressionType); /** * Compress blocks using the specified compression algorithm. This * parameter can be changed dynamically. * * Default: SNAPPY_COMPRESSION, which gives lightweight but fast compression. * * @return Compression type. */ CompressionType compressionType(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MutableDBOptions.java000066400000000000000000000221131370372246700250550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.HashMap; import java.util.Map; import java.util.Objects; public class MutableDBOptions extends AbstractMutableOptions { /** * User must use builder pattern, or parser. * * @param keys the keys * @param values the values * * See {@link #builder()} and {@link #parse(String)}. */ private MutableDBOptions(final String[] keys, final String[] values) { super(keys, values); } /** * Creates a builder which allows you * to set MutableDBOptions in a fluent * manner * * @return A builder for MutableDBOptions */ public static MutableDBOptionsBuilder builder() { return new MutableDBOptionsBuilder(); } /** * Parses a String representation of MutableDBOptions * * The format is: key1=value1;key2=value2;key3=value3 etc * * For int[] values, each int should be separated by a comma, e.g. * * key1=value1;intArrayKey1=1,2,3 * * @param str The string representation of the mutable db options * * @return A builder for the mutable db options */ public static MutableDBOptionsBuilder parse(final String str) { Objects.requireNonNull(str); final MutableDBOptionsBuilder builder = new MutableDBOptionsBuilder(); final String[] options = str.trim().split(KEY_VALUE_PAIR_SEPARATOR); for(final String option : options) { final int equalsOffset = option.indexOf(KEY_VALUE_SEPARATOR); if(equalsOffset <= 0) { throw new IllegalArgumentException( "options string has an invalid key=value pair"); } final String key = option.substring(0, equalsOffset); if(key.isEmpty()) { throw new IllegalArgumentException("options string is invalid"); } final String value = option.substring(equalsOffset + 1); if(value.isEmpty()) { throw new IllegalArgumentException("options string is invalid"); } builder.fromString(key, value); } return builder; } private interface MutableDBOptionKey extends MutableOptionKey {} public enum DBOption implements MutableDBOptionKey { max_background_jobs(ValueType.INT), base_background_compactions(ValueType.INT), max_background_compactions(ValueType.INT), avoid_flush_during_shutdown(ValueType.BOOLEAN), writable_file_max_buffer_size(ValueType.LONG), delayed_write_rate(ValueType.LONG), max_total_wal_size(ValueType.LONG), delete_obsolete_files_period_micros(ValueType.LONG), stats_dump_period_sec(ValueType.INT), stats_persist_period_sec(ValueType.INT), stats_history_buffer_size(ValueType.LONG), max_open_files(ValueType.INT), bytes_per_sync(ValueType.LONG), wal_bytes_per_sync(ValueType.LONG), strict_bytes_per_sync(ValueType.BOOLEAN), compaction_readahead_size(ValueType.LONG); private final ValueType valueType; DBOption(final ValueType valueType) { this.valueType = valueType; } @Override public ValueType getValueType() { return valueType; } } public static class MutableDBOptionsBuilder extends AbstractMutableOptionsBuilder implements MutableDBOptionsInterface { private final static Map ALL_KEYS_LOOKUP = new HashMap<>(); static { for(final MutableDBOptionKey key : DBOption.values()) { ALL_KEYS_LOOKUP.put(key.name(), key); } } private MutableDBOptionsBuilder() { super(); } @Override protected MutableDBOptionsBuilder self() { return this; } @Override protected Map allKeys() { return ALL_KEYS_LOOKUP; } @Override protected MutableDBOptions build(final String[] keys, final String[] values) { return new MutableDBOptions(keys, values); } @Override public MutableDBOptionsBuilder setMaxBackgroundJobs( final int maxBackgroundJobs) { return setInt(DBOption.max_background_jobs, maxBackgroundJobs); } @Override public int maxBackgroundJobs() { return getInt(DBOption.max_background_jobs); } @Override @Deprecated public void setBaseBackgroundCompactions( final int baseBackgroundCompactions) { setInt(DBOption.base_background_compactions, baseBackgroundCompactions); } @Override public int baseBackgroundCompactions() { return getInt(DBOption.base_background_compactions); } @Override @Deprecated public MutableDBOptionsBuilder setMaxBackgroundCompactions( final int maxBackgroundCompactions) { return setInt(DBOption.max_background_compactions, maxBackgroundCompactions); } @Override @Deprecated public int maxBackgroundCompactions() { return getInt(DBOption.max_background_compactions); } @Override public MutableDBOptionsBuilder setAvoidFlushDuringShutdown( final boolean avoidFlushDuringShutdown) { return setBoolean(DBOption.avoid_flush_during_shutdown, avoidFlushDuringShutdown); } @Override public boolean avoidFlushDuringShutdown() { return getBoolean(DBOption.avoid_flush_during_shutdown); } @Override public MutableDBOptionsBuilder setWritableFileMaxBufferSize( final long writableFileMaxBufferSize) { return setLong(DBOption.writable_file_max_buffer_size, writableFileMaxBufferSize); } @Override public long writableFileMaxBufferSize() { return getLong(DBOption.writable_file_max_buffer_size); } @Override public MutableDBOptionsBuilder setDelayedWriteRate( final long delayedWriteRate) { return setLong(DBOption.delayed_write_rate, delayedWriteRate); } @Override public long delayedWriteRate() { return getLong(DBOption.delayed_write_rate); } @Override public MutableDBOptionsBuilder setMaxTotalWalSize( final long maxTotalWalSize) { return setLong(DBOption.max_total_wal_size, maxTotalWalSize); } @Override public long maxTotalWalSize() { return getLong(DBOption.max_total_wal_size); } @Override public MutableDBOptionsBuilder setDeleteObsoleteFilesPeriodMicros( final long micros) { return setLong(DBOption.delete_obsolete_files_period_micros, micros); } @Override public long deleteObsoleteFilesPeriodMicros() { return getLong(DBOption.delete_obsolete_files_period_micros); } @Override public MutableDBOptionsBuilder setStatsDumpPeriodSec( final int statsDumpPeriodSec) { return setInt(DBOption.stats_dump_period_sec, statsDumpPeriodSec); } @Override public int statsDumpPeriodSec() { return getInt(DBOption.stats_dump_period_sec); } @Override public MutableDBOptionsBuilder setStatsPersistPeriodSec( final int statsPersistPeriodSec) { return setInt(DBOption.stats_persist_period_sec, statsPersistPeriodSec); } @Override public int statsPersistPeriodSec() { return getInt(DBOption.stats_persist_period_sec); } @Override public MutableDBOptionsBuilder setStatsHistoryBufferSize( final long statsHistoryBufferSize) { return setLong(DBOption.stats_history_buffer_size, statsHistoryBufferSize); } @Override public long statsHistoryBufferSize() { return getLong(DBOption.stats_history_buffer_size); } @Override public MutableDBOptionsBuilder setMaxOpenFiles(final int maxOpenFiles) { return setInt(DBOption.max_open_files, maxOpenFiles); } @Override public int maxOpenFiles() { return getInt(DBOption.max_open_files); } @Override public MutableDBOptionsBuilder setBytesPerSync(final long bytesPerSync) { return setLong(DBOption.bytes_per_sync, bytesPerSync); } @Override public long bytesPerSync() { return getLong(DBOption.bytes_per_sync); } @Override public MutableDBOptionsBuilder setWalBytesPerSync( final long walBytesPerSync) { return setLong(DBOption.wal_bytes_per_sync, walBytesPerSync); } @Override public long walBytesPerSync() { return getLong(DBOption.wal_bytes_per_sync); } @Override public MutableDBOptionsBuilder setStrictBytesPerSync( final boolean strictBytesPerSync) { return setBoolean(DBOption.strict_bytes_per_sync, strictBytesPerSync); } @Override public boolean strictBytesPerSync() { return getBoolean(DBOption.strict_bytes_per_sync); } @Override public MutableDBOptionsBuilder setCompactionReadaheadSize( final long compactionReadaheadSize) { return setLong(DBOption.compaction_readahead_size, compactionReadaheadSize); } @Override public long compactionReadaheadSize() { return getLong(DBOption.compaction_readahead_size); } } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java000066400000000000000000000367321370372246700267120ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; public interface MutableDBOptionsInterface> { /** * Specifies the maximum number of concurrent background jobs (both flushes * and compactions combined). * Default: 2 * * @param maxBackgroundJobs number of max concurrent background jobs * @return the instance of the current object. */ T setMaxBackgroundJobs(int maxBackgroundJobs); /** * Returns the maximum number of concurrent background jobs (both flushes * and compactions combined). * Default: 2 * * @return the maximum number of concurrent background jobs. */ int maxBackgroundJobs(); /** * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the * value of max_background_jobs. This option is ignored. * * Suggested number of concurrent background compaction jobs, submitted to * the default LOW priority thread pool. * Default: -1 * * @param baseBackgroundCompactions Suggested number of background compaction * jobs * * @deprecated Use {@link #setMaxBackgroundJobs(int)} */ @Deprecated void setBaseBackgroundCompactions(int baseBackgroundCompactions); /** * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the * value of max_background_jobs. This option is ignored. * * Suggested number of concurrent background compaction jobs, submitted to * the default LOW priority thread pool. * Default: -1 * * @return Suggested number of background compaction jobs */ int baseBackgroundCompactions(); /** * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the * value of max_background_jobs. For backwards compatibility we will set * `max_background_jobs = max_background_compactions + max_background_flushes` * in the case where user sets at least one of `max_background_compactions` or * `max_background_flushes` (we replace -1 by 1 in case one option is unset). * * Specifies the maximum number of concurrent background compaction jobs, * submitted to the default LOW priority thread pool. * If you're increasing this, also consider increasing number of threads in * LOW priority thread pool. For more information, see * Default: -1 * * @param maxBackgroundCompactions the maximum number of background * compaction jobs. * @return the instance of the current object. * * @see RocksEnv#setBackgroundThreads(int) * @see RocksEnv#setBackgroundThreads(int, Priority) * @see DBOptionsInterface#maxBackgroundFlushes() * @deprecated Use {@link #setMaxBackgroundJobs(int)} */ @Deprecated T setMaxBackgroundCompactions(int maxBackgroundCompactions); /** * NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the * value of max_background_jobs. For backwards compatibility we will set * `max_background_jobs = max_background_compactions + max_background_flushes` * in the case where user sets at least one of `max_background_compactions` or * `max_background_flushes` (we replace -1 by 1 in case one option is unset). * * Returns the maximum number of concurrent background compaction jobs, * submitted to the default LOW priority thread pool. * When increasing this number, we may also want to consider increasing * number of threads in LOW priority thread pool. * Default: -1 * * @return the maximum number of concurrent background compaction jobs. * @see RocksEnv#setBackgroundThreads(int) * @see RocksEnv#setBackgroundThreads(int, Priority) * * @deprecated Use {@link #setMaxBackgroundJobs(int)} */ @Deprecated int maxBackgroundCompactions(); /** * By default RocksDB will flush all memtables on DB close if there are * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup * DB close. Unpersisted data WILL BE LOST. * * DEFAULT: false * * Dynamically changeable through * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)} * API. * * @param avoidFlushDuringShutdown true if we should avoid flush during * shutdown * * @return the reference to the current options. */ T setAvoidFlushDuringShutdown(boolean avoidFlushDuringShutdown); /** * By default RocksDB will flush all memtables on DB close if there are * unpersisted data (i.e. with WAL disabled) The flush can be skip to speedup * DB close. Unpersisted data WILL BE LOST. * * DEFAULT: false * * Dynamically changeable through * {@link RocksDB#setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)} * API. * * @return true if we should avoid flush during shutdown */ boolean avoidFlushDuringShutdown(); /** * This is the maximum buffer size that is used by WritableFileWriter. * On Windows, we need to maintain an aligned buffer for writes. * We allow the buffer to grow until it's size hits the limit. * * Default: 1024 * 1024 (1 MB) * * @param writableFileMaxBufferSize the maximum buffer size * * @return the reference to the current options. */ T setWritableFileMaxBufferSize(long writableFileMaxBufferSize); /** * This is the maximum buffer size that is used by WritableFileWriter. * On Windows, we need to maintain an aligned buffer for writes. * We allow the buffer to grow until it's size hits the limit. * * Default: 1024 * 1024 (1 MB) * * @return the maximum buffer size */ long writableFileMaxBufferSize(); /** * The limited write rate to DB if * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered, * or we are writing to the last mem table allowed and we allow more than 3 * mem tables. It is calculated using size of user write requests before * compression. RocksDB may decide to slow down more if the compaction still * gets behind further. * If the value is 0, we will infer a value from `rater_limiter` value * if it is not empty, or 16MB if `rater_limiter` is empty. Note that * if users change the rate in `rate_limiter` after DB is opened, * `delayed_write_rate` won't be adjusted. * * Unit: bytes per second. * * Default: 0 * * Dynamically changeable through {@link RocksDB#setDBOptions(MutableDBOptions)}. * * @param delayedWriteRate the rate in bytes per second * * @return the reference to the current options. */ T setDelayedWriteRate(long delayedWriteRate); /** * The limited write rate to DB if * {@link ColumnFamilyOptions#softPendingCompactionBytesLimit()} or * {@link ColumnFamilyOptions#level0SlowdownWritesTrigger()} is triggered, * or we are writing to the last mem table allowed and we allow more than 3 * mem tables. It is calculated using size of user write requests before * compression. RocksDB may decide to slow down more if the compaction still * gets behind further. * If the value is 0, we will infer a value from `rater_limiter` value * if it is not empty, or 16MB if `rater_limiter` is empty. Note that * if users change the rate in `rate_limiter` after DB is opened, * `delayed_write_rate` won't be adjusted. * * Unit: bytes per second. * * Default: 0 * * Dynamically changeable through {@link RocksDB#setDBOptions(MutableDBOptions)}. * * @return the rate in bytes per second */ long delayedWriteRate(); /** *

Once write-ahead logs exceed this size, we will start forcing the * flush of column families whose memtables are backed by the oldest live * WAL file (i.e. the ones that are causing all the space amplification). *

*

If set to 0 (default), we will dynamically choose the WAL size limit to * be [sum of all write_buffer_size * max_write_buffer_number] * 2

*

This option takes effect only when there are more than one column family as * otherwise the wal size is dictated by the write_buffer_size.

*

Default: 0

* * @param maxTotalWalSize max total wal size. * @return the instance of the current object. */ T setMaxTotalWalSize(long maxTotalWalSize); /** *

Returns the max total wal size. Once write-ahead logs exceed this size, * we will start forcing the flush of column families whose memtables are * backed by the oldest live WAL file (i.e. the ones that are causing all * the space amplification).

* *

If set to 0 (default), we will dynamically choose the WAL size limit * to be [sum of all write_buffer_size * max_write_buffer_number] * 2 *

* * @return max total wal size */ long maxTotalWalSize(); /** * The periodicity when obsolete files get deleted. The default * value is 6 hours. The files that get out of scope by compaction * process will still get automatically delete on every compaction, * regardless of this setting * * @param micros the time interval in micros * @return the instance of the current object. */ T setDeleteObsoleteFilesPeriodMicros(long micros); /** * The periodicity when obsolete files get deleted. The default * value is 6 hours. The files that get out of scope by compaction * process will still get automatically delete on every compaction, * regardless of this setting * * @return the time interval in micros when obsolete files will be deleted. */ long deleteObsoleteFilesPeriodMicros(); /** * if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec * Default: 600 (10 minutes) * * @param statsDumpPeriodSec time interval in seconds. * @return the instance of the current object. */ T setStatsDumpPeriodSec(int statsDumpPeriodSec); /** * If not zero, dump rocksdb.stats to LOG every stats_dump_period_sec * Default: 600 (10 minutes) * * @return time interval in seconds. */ int statsDumpPeriodSec(); /** * If not zero, dump rocksdb.stats to RocksDB every * {@code statsPersistPeriodSec} * * Default: 600 * * @param statsPersistPeriodSec time interval in seconds. * @return the instance of the current object. */ T setStatsPersistPeriodSec(int statsPersistPeriodSec); /** * If not zero, dump rocksdb.stats to RocksDB every * {@code statsPersistPeriodSec} * * @return time interval in seconds. */ int statsPersistPeriodSec(); /** * If not zero, periodically take stats snapshots and store in memory, the * memory size for stats snapshots is capped at {@code statsHistoryBufferSize} * * Default: 1MB * * @param statsHistoryBufferSize the size of the buffer. * @return the instance of the current object. */ T setStatsHistoryBufferSize(long statsHistoryBufferSize); /** * If not zero, periodically take stats snapshots and store in memory, the * memory size for stats snapshots is capped at {@code statsHistoryBufferSize} * * @return the size of the buffer. */ long statsHistoryBufferSize(); /** * Number of open files that can be used by the DB. You may need to * increase this if your database has a large working set. Value -1 means * files opened are always kept open. You can estimate number of files based * on {@code target_file_size_base} and {@code target_file_size_multiplier} * for level-based compaction. For universal-style compaction, you can usually * set it to -1. * Default: -1 * * @param maxOpenFiles the maximum number of open files. * @return the instance of the current object. */ T setMaxOpenFiles(int maxOpenFiles); /** * Number of open files that can be used by the DB. You may need to * increase this if your database has a large working set. Value -1 means * files opened are always kept open. You can estimate number of files based * on {@code target_file_size_base} and {@code target_file_size_multiplier} * for level-based compaction. For universal-style compaction, you can usually * set it to -1. * Default: -1 * * @return the maximum number of open files. */ int maxOpenFiles(); /** * Allows OS to incrementally sync files to disk while they are being * written, asynchronously, in the background. * Issue one request for every bytes_per_sync written. 0 turns it off. * Default: 0 * * @param bytesPerSync size in bytes * @return the instance of the current object. */ T setBytesPerSync(long bytesPerSync); /** * Allows OS to incrementally sync files to disk while they are being * written, asynchronously, in the background. * Issue one request for every bytes_per_sync written. 0 turns it off. * Default: 0 * * @return size in bytes */ long bytesPerSync(); /** * Same as {@link #setBytesPerSync(long)} , but applies to WAL files * * Default: 0, turned off * * @param walBytesPerSync size in bytes * @return the instance of the current object. */ T setWalBytesPerSync(long walBytesPerSync); /** * Same as {@link #bytesPerSync()} , but applies to WAL files * * Default: 0, turned off * * @return size in bytes */ long walBytesPerSync(); /** * When true, guarantees WAL files have at most {@link #walBytesPerSync()} * bytes submitted for writeback at any given time, and SST files have at most * {@link #bytesPerSync()} bytes pending writeback at any given time. This * can be used to handle cases where processing speed exceeds I/O speed * during file generation, which can lead to a huge sync when the file is * finished, even with {@link #bytesPerSync()} / {@link #walBytesPerSync()} * properly configured. * * - If `sync_file_range` is supported it achieves this by waiting for any * prior `sync_file_range`s to finish before proceeding. In this way, * processing (compression, etc.) can proceed uninhibited in the gap * between `sync_file_range`s, and we block only when I/O falls * behind. * - Otherwise the `WritableFile::Sync` method is used. Note this mechanism * always blocks, thus preventing the interleaving of I/O and processing. * * Note: Enabling this option does not provide any additional persistence * guarantees, as it may use `sync_file_range`, which does not write out * metadata. * * Default: false * * @param strictBytesPerSync the bytes per sync * @return the instance of the current object. */ T setStrictBytesPerSync(boolean strictBytesPerSync); /** * Return the strict byte limit per sync. * * See {@link #setStrictBytesPerSync(boolean)} * * @return the limit in bytes. */ boolean strictBytesPerSync(); /** * If non-zero, we perform bigger reads when doing compaction. If you're * running RocksDB on spinning disks, you should set this to at least 2MB. * * That way RocksDB's compaction is doing sequential instead of random reads. * When non-zero, we also force * {@link DBOptionsInterface#newTableReaderForCompactionInputs()} to true. * * Default: 0 * * @param compactionReadaheadSize The compaction read-ahead size * * @return the reference to the current options. */ T setCompactionReadaheadSize(final long compactionReadaheadSize); /** * If non-zero, we perform bigger reads when doing compaction. If you're * running RocksDB on spinning disks, you should set this to at least 2MB. * * That way RocksDB's compaction is doing sequential instead of random reads. * When non-zero, we also force * {@link DBOptionsInterface#newTableReaderForCompactionInputs()} to true. * * Default: 0 * * @return The compaction read-ahead size */ long compactionReadaheadSize(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MutableOptionKey.java000066400000000000000000000004161370372246700251370ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; public interface MutableOptionKey { enum ValueType { DOUBLE, LONG, INT, BOOLEAN, INT_ARRAY, ENUM } String name(); ValueType getValueType(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/MutableOptionValue.java000066400000000000000000000221711370372246700254650ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import static org.rocksdb.AbstractMutableOptions.INT_ARRAY_INT_SEPARATOR; public abstract class MutableOptionValue { abstract double asDouble() throws NumberFormatException; abstract long asLong() throws NumberFormatException; abstract int asInt() throws NumberFormatException; abstract boolean asBoolean() throws IllegalStateException; abstract int[] asIntArray() throws IllegalStateException; abstract String asString(); abstract T asObject(); private static abstract class MutableOptionValueObject extends MutableOptionValue { protected final T value; private MutableOptionValueObject(final T value) { this.value = value; } @Override T asObject() { return value; } } static MutableOptionValue fromString(final String s) { return new MutableOptionStringValue(s); } static MutableOptionValue fromDouble(final double d) { return new MutableOptionDoubleValue(d); } static MutableOptionValue fromLong(final long d) { return new MutableOptionLongValue(d); } static MutableOptionValue fromInt(final int i) { return new MutableOptionIntValue(i); } static MutableOptionValue fromBoolean(final boolean b) { return new MutableOptionBooleanValue(b); } static MutableOptionValue fromIntArray(final int[] ix) { return new MutableOptionIntArrayValue(ix); } static > MutableOptionValue fromEnum(final N value) { return new MutableOptionEnumValue<>(value); } static class MutableOptionStringValue extends MutableOptionValueObject { MutableOptionStringValue(final String value) { super(value); } @Override double asDouble() throws NumberFormatException { return Double.parseDouble(value); } @Override long asLong() throws NumberFormatException { return Long.parseLong(value); } @Override int asInt() throws NumberFormatException { return Integer.parseInt(value); } @Override boolean asBoolean() throws IllegalStateException { return Boolean.parseBoolean(value); } @Override int[] asIntArray() throws IllegalStateException { throw new IllegalStateException("String is not applicable as int[]"); } @Override String asString() { return value; } } static class MutableOptionDoubleValue extends MutableOptionValue { private final double value; MutableOptionDoubleValue(final double value) { this.value = value; } @Override double asDouble() { return value; } @Override long asLong() throws NumberFormatException { return Double.valueOf(value).longValue(); } @Override int asInt() throws NumberFormatException { if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { throw new NumberFormatException( "double value lies outside the bounds of int"); } return Double.valueOf(value).intValue(); } @Override boolean asBoolean() throws IllegalStateException { throw new IllegalStateException( "double is not applicable as boolean"); } @Override int[] asIntArray() throws IllegalStateException { if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { throw new NumberFormatException( "double value lies outside the bounds of int"); } return new int[] { Double.valueOf(value).intValue() }; } @Override String asString() { return String.valueOf(value); } @Override Double asObject() { return value; } } static class MutableOptionLongValue extends MutableOptionValue { private final long value; MutableOptionLongValue(final long value) { this.value = value; } @Override double asDouble() { if(value > Double.MAX_VALUE || value < Double.MIN_VALUE) { throw new NumberFormatException( "long value lies outside the bounds of int"); } return Long.valueOf(value).doubleValue(); } @Override long asLong() throws NumberFormatException { return value; } @Override int asInt() throws NumberFormatException { if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { throw new NumberFormatException( "long value lies outside the bounds of int"); } return Long.valueOf(value).intValue(); } @Override boolean asBoolean() throws IllegalStateException { throw new IllegalStateException( "long is not applicable as boolean"); } @Override int[] asIntArray() throws IllegalStateException { if(value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { throw new NumberFormatException( "long value lies outside the bounds of int"); } return new int[] { Long.valueOf(value).intValue() }; } @Override String asString() { return String.valueOf(value); } @Override Long asObject() { return value; } } static class MutableOptionIntValue extends MutableOptionValue { private final int value; MutableOptionIntValue(final int value) { this.value = value; } @Override double asDouble() { if(value > Double.MAX_VALUE || value < Double.MIN_VALUE) { throw new NumberFormatException("int value lies outside the bounds of int"); } return Integer.valueOf(value).doubleValue(); } @Override long asLong() throws NumberFormatException { return value; } @Override int asInt() throws NumberFormatException { return value; } @Override boolean asBoolean() throws IllegalStateException { throw new IllegalStateException("int is not applicable as boolean"); } @Override int[] asIntArray() throws IllegalStateException { return new int[] { value }; } @Override String asString() { return String.valueOf(value); } @Override Integer asObject() { return value; } } static class MutableOptionBooleanValue extends MutableOptionValue { private final boolean value; MutableOptionBooleanValue(final boolean value) { this.value = value; } @Override double asDouble() { throw new NumberFormatException("boolean is not applicable as double"); } @Override long asLong() throws NumberFormatException { throw new NumberFormatException("boolean is not applicable as Long"); } @Override int asInt() throws NumberFormatException { throw new NumberFormatException("boolean is not applicable as int"); } @Override boolean asBoolean() { return value; } @Override int[] asIntArray() throws IllegalStateException { throw new IllegalStateException("boolean is not applicable as int[]"); } @Override String asString() { return String.valueOf(value); } @Override Boolean asObject() { return value; } } static class MutableOptionIntArrayValue extends MutableOptionValueObject { MutableOptionIntArrayValue(final int[] value) { super(value); } @Override double asDouble() { throw new NumberFormatException("int[] is not applicable as double"); } @Override long asLong() throws NumberFormatException { throw new NumberFormatException("int[] is not applicable as Long"); } @Override int asInt() throws NumberFormatException { throw new NumberFormatException("int[] is not applicable as int"); } @Override boolean asBoolean() { throw new NumberFormatException("int[] is not applicable as boolean"); } @Override int[] asIntArray() throws IllegalStateException { return value; } @Override String asString() { final StringBuilder builder = new StringBuilder(); for(int i = 0; i < value.length; i++) { builder.append(i); if(i + 1 < value.length) { builder.append(INT_ARRAY_INT_SEPARATOR); } } return builder.toString(); } } static class MutableOptionEnumValue> extends MutableOptionValueObject { MutableOptionEnumValue(final T value) { super(value); } @Override double asDouble() throws NumberFormatException { throw new NumberFormatException("Enum is not applicable as double"); } @Override long asLong() throws NumberFormatException { throw new NumberFormatException("Enum is not applicable as long"); } @Override int asInt() throws NumberFormatException { throw new NumberFormatException("Enum is not applicable as int"); } @Override boolean asBoolean() throws IllegalStateException { throw new NumberFormatException("Enum is not applicable as boolean"); } @Override int[] asIntArray() throws IllegalStateException { throw new NumberFormatException("Enum is not applicable as int[]"); } @Override String asString() { return value.name(); } } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java000066400000000000000000000034541370372246700265300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * A simple abstraction to allow a Java class to wrap a custom comparator * implemented in C++. * * The native comparator must directly extend rocksdb::Comparator. */ public abstract class NativeComparatorWrapper extends AbstractComparator { @Override final ComparatorType getComparatorType() { return ComparatorType.JAVA_NATIVE_COMPARATOR_WRAPPER; } @Override public final String name() { throw new IllegalStateException("This should not be called. " + "Implementation is in Native code"); } @Override public final int compare(final ByteBuffer s1, final ByteBuffer s2) { throw new IllegalStateException("This should not be called. " + "Implementation is in Native code"); } @Override public final void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { throw new IllegalStateException("This should not be called. " + "Implementation is in Native code"); } @Override public final void findShortSuccessor(final ByteBuffer key) { throw new IllegalStateException("This should not be called. " + "Implementation is in Native code"); } /** * We override {@link RocksCallbackObject#disposeInternal()} * as disposing of a native rocksdb::Comparator extension requires * a slightly different approach as it is not really a RocksCallbackObject */ @Override protected void disposeInternal() { disposeInternal(nativeHandle_); } private native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/NativeLibraryLoader.java000066400000000000000000000101711370372246700256050ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import java.io.*; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import org.rocksdb.util.Environment; /** * This class is used to load the RocksDB shared library from within the jar. * The shared library is extracted to a temp folder and loaded from there. */ public class NativeLibraryLoader { //singleton private static final NativeLibraryLoader instance = new NativeLibraryLoader(); private static boolean initialized = false; private static final String sharedLibraryName = Environment.getSharedLibraryName("rocksdb"); private static final String jniLibraryName = Environment.getJniLibraryName("rocksdb"); private static final String jniLibraryFileName = Environment.getJniLibraryFileName("rocksdb"); private static final String tempFilePrefix = "librocksdbjni"; private static final String tempFileSuffix = Environment.getJniLibraryExtension(); /** * Get a reference to the NativeLibraryLoader * * @return The NativeLibraryLoader */ public static NativeLibraryLoader getInstance() { return instance; } /** * Firstly attempts to load the library from java.library.path, * if that fails then it falls back to extracting * the library from the classpath * {@link org.rocksdb.NativeLibraryLoader#loadLibraryFromJar(java.lang.String)} * * @param tmpDir A temporary directory to use * to copy the native library to when loading from the classpath. * If null, or the empty string, we rely on Java's * {@link java.io.File#createTempFile(String, String)} * function to provide a temporary location. * The temporary file will be registered for deletion * on exit. * * @throws java.io.IOException if a filesystem operation fails. */ public synchronized void loadLibrary(final String tmpDir) throws IOException { try { System.loadLibrary(sharedLibraryName); } catch(final UnsatisfiedLinkError ule1) { try { System.loadLibrary(jniLibraryName); } catch(final UnsatisfiedLinkError ule2) { loadLibraryFromJar(tmpDir); } } } /** * Attempts to extract the native RocksDB library * from the classpath and load it * * @param tmpDir A temporary directory to use * to copy the native library to. If null, * or the empty string, we rely on Java's * {@link java.io.File#createTempFile(String, String)} * function to provide a temporary location. * The temporary file will be registered for deletion * on exit. * * @throws java.io.IOException if a filesystem operation fails. */ void loadLibraryFromJar(final String tmpDir) throws IOException { if (!initialized) { System.load(loadLibraryFromJarToTemp(tmpDir).getAbsolutePath()); initialized = true; } } File loadLibraryFromJarToTemp(final String tmpDir) throws IOException { final File temp; if (tmpDir == null || tmpDir.isEmpty()) { temp = File.createTempFile(tempFilePrefix, tempFileSuffix); } else { temp = new File(tmpDir, jniLibraryFileName); if (temp.exists() && !temp.delete()) { throw new RuntimeException("File: " + temp.getAbsolutePath() + " already exists and cannot be removed."); } if (!temp.createNewFile()) { throw new RuntimeException("File: " + temp.getAbsolutePath() + " could not be created."); } } if (!temp.exists()) { throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); } else { temp.deleteOnExit(); } // attempt to copy the library from the Jar file to the temp destination try (final InputStream is = getClass().getClassLoader(). getResourceAsStream(jniLibraryFileName)) { if (is == null) { throw new RuntimeException(jniLibraryFileName + " was not found inside JAR."); } else { Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); } } return temp; } /** * Private constructor to disallow instantiation */ private NativeLibraryLoader() { } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/OperationStage.java000066400000000000000000000031411370372246700246260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The operation stage. */ public enum OperationStage { STAGE_UNKNOWN((byte)0x0), STAGE_FLUSH_RUN((byte)0x1), STAGE_FLUSH_WRITE_L0((byte)0x2), STAGE_COMPACTION_PREPARE((byte)0x3), STAGE_COMPACTION_RUN((byte)0x4), STAGE_COMPACTION_PROCESS_KV((byte)0x5), STAGE_COMPACTION_INSTALL((byte)0x6), STAGE_COMPACTION_SYNC_FILE((byte)0x7), STAGE_PICK_MEMTABLES_TO_FLUSH((byte)0x8), STAGE_MEMTABLE_ROLLBACK((byte)0x9), STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS((byte)0xA); private final byte value; OperationStage(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value. */ byte getValue() { return value; } /** * Get the Operation stage from the internal representation value. * * @param value the internal representation value. * * @return the operation stage * * @throws IllegalArgumentException if the value does not match * an OperationStage */ static OperationStage fromValue(final byte value) throws IllegalArgumentException { for (final OperationStage threadType : OperationStage.values()) { if (threadType.value == value) { return threadType; } } throw new IllegalArgumentException( "Unknown value for OperationStage: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/OperationType.java000066400000000000000000000026031370372246700245060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The type used to refer to a thread operation. * * A thread operation describes high-level action of a thread, * examples include compaction and flush. */ public enum OperationType { OP_UNKNOWN((byte)0x0), OP_COMPACTION((byte)0x1), OP_FLUSH((byte)0x2); private final byte value; OperationType(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value. */ byte getValue() { return value; } /** * Get the Operation type from the internal representation value. * * @param value the internal representation value. * * @return the operation type * * @throws IllegalArgumentException if the value does not match * an OperationType */ static OperationType fromValue(final byte value) throws IllegalArgumentException { for (final OperationType threadType : OperationType.values()) { if (threadType.value == value) { return threadType; } } throw new IllegalArgumentException( "Unknown value for OperationType: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java000066400000000000000000000176311370372246700264530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; /** * Database with Transaction support. */ public class OptimisticTransactionDB extends RocksDB implements TransactionalDB { /** * Private constructor. * * @param nativeHandle The native handle of the C++ OptimisticTransactionDB * object */ private OptimisticTransactionDB(final long nativeHandle) { super(nativeHandle); } /** * Open an OptimisticTransactionDB similar to * {@link RocksDB#open(Options, String)}. * * @param options {@link org.rocksdb.Options} instance. * @param path the path to the rocksdb. * * @return a {@link OptimisticTransactionDB} instance on success, null if the * specified {@link OptimisticTransactionDB} can not be opened. * * @throws RocksDBException if an error occurs whilst opening the database. */ public static OptimisticTransactionDB open(final Options options, final String path) throws RocksDBException { final OptimisticTransactionDB otdb = new OptimisticTransactionDB(open( options.nativeHandle_, path)); // when non-default Options is used, keeping an Options reference // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. otdb.storeOptionsInstance(options); return otdb; } /** * Open an OptimisticTransactionDB similar to * {@link RocksDB#open(DBOptions, String, List, List)}. * * @param dbOptions {@link org.rocksdb.DBOptions} instance. * @param path the path to the rocksdb. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances * * @return a {@link OptimisticTransactionDB} instance on success, null if the * specified {@link OptimisticTransactionDB} can not be opened. * * @throws RocksDBException if an error occurs whilst opening the database. */ public static OptimisticTransactionDB open(final DBOptions dbOptions, final String path, final List columnFamilyDescriptors, final List columnFamilyHandles) throws RocksDBException { final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; for (int i = 0; i < columnFamilyDescriptors.size(); i++) { final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors .get(i); cfNames[i] = cfDescriptor.getName(); cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; } final long[] handles = open(dbOptions.nativeHandle_, path, cfNames, cfOptionHandles); final OptimisticTransactionDB otdb = new OptimisticTransactionDB(handles[0]); // when non-default Options is used, keeping an Options reference // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. otdb.storeOptionsInstance(dbOptions); for (int i = 1; i < handles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(otdb, handles[i])); } return otdb; } /** * This is similar to {@link #close()} except that it * throws an exception if any error occurs. * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. * * @throws RocksDBException if an error occurs whilst closing. */ public void closeE() throws RocksDBException { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } finally { disposeInternal(); } } } /** * This is similar to {@link #closeE()} except that it * silently ignores any errors. * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. */ @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } catch (final RocksDBException e) { // silently ignore the error report } finally { disposeInternal(); } } } @Override public Transaction beginTransaction(final WriteOptions writeOptions) { return new Transaction(this, beginTransaction(nativeHandle_, writeOptions.nativeHandle_)); } @Override public Transaction beginTransaction(final WriteOptions writeOptions, final OptimisticTransactionOptions optimisticTransactionOptions) { return new Transaction(this, beginTransaction(nativeHandle_, writeOptions.nativeHandle_, optimisticTransactionOptions.nativeHandle_)); } // TODO(AR) consider having beingTransaction(... oldTransaction) set a // reference count inside Transaction, so that we can always call // Transaction#close but the object is only disposed when there are as many // closes as beginTransaction. Makes the try-with-resources paradigm easier for // java developers @Override public Transaction beginTransaction(final WriteOptions writeOptions, final Transaction oldTransaction) { final long jtxn_handle = beginTransaction_withOld(nativeHandle_, writeOptions.nativeHandle_, oldTransaction.nativeHandle_); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_txn assert(jtxn_handle == oldTransaction.nativeHandle_); return oldTransaction; } @Override public Transaction beginTransaction(final WriteOptions writeOptions, final OptimisticTransactionOptions optimisticTransactionOptions, final Transaction oldTransaction) { final long jtxn_handle = beginTransaction_withOld(nativeHandle_, writeOptions.nativeHandle_, optimisticTransactionOptions.nativeHandle_, oldTransaction.nativeHandle_); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_txn assert(jtxn_handle == oldTransaction.nativeHandle_); return oldTransaction; } /** * Get the underlying database that was opened. * * @return The underlying database that was opened. */ public RocksDB getBaseDB() { final RocksDB db = new RocksDB(getBaseDB(nativeHandle_)); db.disOwnNativeHandle(); return db; } @Override protected final native void disposeInternal(final long handle); protected static native long open(final long optionsHandle, final String path) throws RocksDBException; protected static native long[] open(final long handle, final String path, final byte[][] columnFamilyNames, final long[] columnFamilyOptions); private native static void closeDatabase(final long handle) throws RocksDBException; private native long beginTransaction(final long handle, final long writeOptionsHandle); private native long beginTransaction(final long handle, final long writeOptionsHandle, final long optimisticTransactionOptionsHandle); private native long beginTransaction_withOld(final long handle, final long writeOptionsHandle, final long oldTransactionHandle); private native long beginTransaction_withOld(final long handle, final long writeOptionsHandle, final long optimisticTransactionOptionsHandle, final long oldTransactionHandle); private native long getBaseDB(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java000066400000000000000000000033301370372246700276100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class OptimisticTransactionOptions extends RocksObject implements TransactionalOptions { public OptimisticTransactionOptions() { super(newOptimisticTransactionOptions()); } @Override public boolean isSetSnapshot() { assert(isOwningHandle()); return isSetSnapshot(nativeHandle_); } @Override public OptimisticTransactionOptions setSetSnapshot( final boolean setSnapshot) { assert(isOwningHandle()); setSetSnapshot(nativeHandle_, setSnapshot); return this; } /** * Should be set if the DB has a non-default comparator. * See comment in * {@link WriteBatchWithIndex#WriteBatchWithIndex(AbstractComparator, int, boolean)} * constructor. * * @param comparator The comparator to use for the transaction. * * @return this OptimisticTransactionOptions instance */ public OptimisticTransactionOptions setComparator( final AbstractComparator comparator) { assert(isOwningHandle()); setComparator(nativeHandle_, comparator.nativeHandle_); return this; } private native static long newOptimisticTransactionOptions(); private native boolean isSetSnapshot(final long handle); private native void setSetSnapshot(final long handle, final boolean setSnapshot); private native void setComparator(final long handle, final long comparatorHandle); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Options.java000066400000000000000000002054501370372246700233440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.file.Paths; import java.util.*; /** * Options to control the behavior of a database. It will be used * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). * * If {@link #dispose()} function is not called, then it will be GC'd * automatically and native resources will be released as part of the process. */ public class Options extends RocksObject implements DBOptionsInterface, MutableDBOptionsInterface, ColumnFamilyOptionsInterface, MutableColumnFamilyOptionsInterface { static { RocksDB.loadLibrary(); } /** * Converts the input properties into a Options-style formatted string * @param properties The set of properties to convert * @return The Options-style representation of those properties. */ public static String getOptionStringFromProps(final Properties properties) { if (properties == null || properties.size() == 0) { throw new IllegalArgumentException("Properties value must contain at least one value."); } StringBuilder stringBuilder = new StringBuilder(); for (final String name : properties.stringPropertyNames()) { stringBuilder.append(name); stringBuilder.append("="); stringBuilder.append(properties.getProperty(name)); stringBuilder.append(";"); } return stringBuilder.toString(); } /** * Construct options for opening a RocksDB. * * This constructor will create (by allocating a block of memory) * an {@code rocksdb::Options} in the c++ side. */ public Options() { super(newOptions()); env_ = Env.getDefault(); } /** * Construct options for opening a RocksDB. Reusing database options * and column family options. * * @param dbOptions {@link org.rocksdb.DBOptions} instance * @param columnFamilyOptions {@link org.rocksdb.ColumnFamilyOptions} * instance */ public Options(final DBOptions dbOptions, final ColumnFamilyOptions columnFamilyOptions) { super(newOptions(dbOptions.nativeHandle_, columnFamilyOptions.nativeHandle_)); env_ = Env.getDefault(); } /** * Copy constructor for ColumnFamilyOptions. * * NOTE: This does a shallow copy, which means comparator, merge_operator * and other pointers will be cloned! * * @param other The Options to copy. */ public Options(Options other) { super(copyOptions(other.nativeHandle_)); this.env_ = other.env_; this.memTableConfig_ = other.memTableConfig_; this.tableFormatConfig_ = other.tableFormatConfig_; this.rateLimiter_ = other.rateLimiter_; this.comparator_ = other.comparator_; this.compactionFilter_ = other.compactionFilter_; this.compactionFilterFactory_ = other.compactionFilterFactory_; this.compactionOptionsUniversal_ = other.compactionOptionsUniversal_; this.compactionOptionsFIFO_ = other.compactionOptionsFIFO_; this.compressionOptions_ = other.compressionOptions_; this.rowCache_ = other.rowCache_; this.writeBufferManager_ = other.writeBufferManager_; } @Override public Options setIncreaseParallelism(final int totalThreads) { assert(isOwningHandle()); setIncreaseParallelism(nativeHandle_, totalThreads); return this; } @Override public Options setCreateIfMissing(final boolean flag) { assert(isOwningHandle()); setCreateIfMissing(nativeHandle_, flag); return this; } @Override public Options setCreateMissingColumnFamilies(final boolean flag) { assert(isOwningHandle()); setCreateMissingColumnFamilies(nativeHandle_, flag); return this; } @Override public Options setEnv(final Env env) { assert(isOwningHandle()); setEnv(nativeHandle_, env.nativeHandle_); env_ = env; return this; } @Override public Env getEnv() { return env_; } /** *

Set appropriate parameters for bulk loading. * The reason that this is a function that returns "this" instead of a * constructor is to enable chaining of multiple similar calls in the future. *

* *

All data will be in level 0 without any automatic compaction. * It's recommended to manually call CompactRange(NULL, NULL) before reading * from the database, because otherwise the read can be very slow.

* * @return the instance of the current Options. */ public Options prepareForBulkLoad() { prepareForBulkLoad(nativeHandle_); return this; } @Override public boolean createIfMissing() { assert(isOwningHandle()); return createIfMissing(nativeHandle_); } @Override public boolean createMissingColumnFamilies() { assert(isOwningHandle()); return createMissingColumnFamilies(nativeHandle_); } @Override public Options optimizeForSmallDb() { optimizeForSmallDb(nativeHandle_); return this; } @Override public Options optimizeForPointLookup( long blockCacheSizeMb) { optimizeForPointLookup(nativeHandle_, blockCacheSizeMb); return this; } @Override public Options optimizeLevelStyleCompaction() { optimizeLevelStyleCompaction(nativeHandle_, DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); return this; } @Override public Options optimizeLevelStyleCompaction( long memtableMemoryBudget) { optimizeLevelStyleCompaction(nativeHandle_, memtableMemoryBudget); return this; } @Override public Options optimizeUniversalStyleCompaction() { optimizeUniversalStyleCompaction(nativeHandle_, DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET); return this; } @Override public Options optimizeUniversalStyleCompaction( final long memtableMemoryBudget) { optimizeUniversalStyleCompaction(nativeHandle_, memtableMemoryBudget); return this; } @Override public Options setComparator(final BuiltinComparator builtinComparator) { assert(isOwningHandle()); setComparatorHandle(nativeHandle_, builtinComparator.ordinal()); return this; } @Override public Options setComparator( final AbstractComparator comparator) { assert(isOwningHandle()); setComparatorHandle(nativeHandle_, comparator.nativeHandle_, comparator.getComparatorType().getValue()); comparator_ = comparator; return this; } @Override public Options setMergeOperatorName(final String name) { assert(isOwningHandle()); if (name == null) { throw new IllegalArgumentException( "Merge operator name must not be null."); } setMergeOperatorName(nativeHandle_, name); return this; } @Override public Options setMergeOperator(final MergeOperator mergeOperator) { setMergeOperator(nativeHandle_, mergeOperator.nativeHandle_); return this; } @Override public Options setCompactionFilter( final AbstractCompactionFilter> compactionFilter) { setCompactionFilterHandle(nativeHandle_, compactionFilter.nativeHandle_); compactionFilter_ = compactionFilter; return this; } @Override public AbstractCompactionFilter> compactionFilter() { assert (isOwningHandle()); return compactionFilter_; } @Override public Options setCompactionFilterFactory(final AbstractCompactionFilterFactory> compactionFilterFactory) { assert (isOwningHandle()); setCompactionFilterFactoryHandle(nativeHandle_, compactionFilterFactory.nativeHandle_); compactionFilterFactory_ = compactionFilterFactory; return this; } @Override public AbstractCompactionFilterFactory> compactionFilterFactory() { assert (isOwningHandle()); return compactionFilterFactory_; } @Override public Options setWriteBufferSize(final long writeBufferSize) { assert(isOwningHandle()); setWriteBufferSize(nativeHandle_, writeBufferSize); return this; } @Override public long writeBufferSize() { assert(isOwningHandle()); return writeBufferSize(nativeHandle_); } @Override public Options setMaxWriteBufferNumber(final int maxWriteBufferNumber) { assert(isOwningHandle()); setMaxWriteBufferNumber(nativeHandle_, maxWriteBufferNumber); return this; } @Override public int maxWriteBufferNumber() { assert(isOwningHandle()); return maxWriteBufferNumber(nativeHandle_); } @Override public boolean errorIfExists() { assert(isOwningHandle()); return errorIfExists(nativeHandle_); } @Override public Options setErrorIfExists(final boolean errorIfExists) { assert(isOwningHandle()); setErrorIfExists(nativeHandle_, errorIfExists); return this; } @Override public boolean paranoidChecks() { assert(isOwningHandle()); return paranoidChecks(nativeHandle_); } @Override public Options setParanoidChecks(final boolean paranoidChecks) { assert(isOwningHandle()); setParanoidChecks(nativeHandle_, paranoidChecks); return this; } @Override public int maxOpenFiles() { assert(isOwningHandle()); return maxOpenFiles(nativeHandle_); } @Override public Options setMaxFileOpeningThreads(final int maxFileOpeningThreads) { assert(isOwningHandle()); setMaxFileOpeningThreads(nativeHandle_, maxFileOpeningThreads); return this; } @Override public int maxFileOpeningThreads() { assert(isOwningHandle()); return maxFileOpeningThreads(nativeHandle_); } @Override public Options setMaxTotalWalSize(final long maxTotalWalSize) { assert(isOwningHandle()); setMaxTotalWalSize(nativeHandle_, maxTotalWalSize); return this; } @Override public long maxTotalWalSize() { assert(isOwningHandle()); return maxTotalWalSize(nativeHandle_); } @Override public Options setMaxOpenFiles(final int maxOpenFiles) { assert(isOwningHandle()); setMaxOpenFiles(nativeHandle_, maxOpenFiles); return this; } @Override public boolean useFsync() { assert(isOwningHandle()); return useFsync(nativeHandle_); } @Override public Options setUseFsync(final boolean useFsync) { assert(isOwningHandle()); setUseFsync(nativeHandle_, useFsync); return this; } @Override public Options setDbPaths(final Collection dbPaths) { assert(isOwningHandle()); final int len = dbPaths.size(); final String paths[] = new String[len]; final long targetSizes[] = new long[len]; int i = 0; for(final DbPath dbPath : dbPaths) { paths[i] = dbPath.path.toString(); targetSizes[i] = dbPath.targetSize; i++; } setDbPaths(nativeHandle_, paths, targetSizes); return this; } @Override public List dbPaths() { final int len = (int)dbPathsLen(nativeHandle_); if(len == 0) { return Collections.emptyList(); } else { final String paths[] = new String[len]; final long targetSizes[] = new long[len]; dbPaths(nativeHandle_, paths, targetSizes); final List dbPaths = new ArrayList<>(); for(int i = 0; i < len; i++) { dbPaths.add(new DbPath(Paths.get(paths[i]), targetSizes[i])); } return dbPaths; } } @Override public String dbLogDir() { assert(isOwningHandle()); return dbLogDir(nativeHandle_); } @Override public Options setDbLogDir(final String dbLogDir) { assert(isOwningHandle()); setDbLogDir(nativeHandle_, dbLogDir); return this; } @Override public String walDir() { assert(isOwningHandle()); return walDir(nativeHandle_); } @Override public Options setWalDir(final String walDir) { assert(isOwningHandle()); setWalDir(nativeHandle_, walDir); return this; } @Override public long deleteObsoleteFilesPeriodMicros() { assert(isOwningHandle()); return deleteObsoleteFilesPeriodMicros(nativeHandle_); } @Override public Options setDeleteObsoleteFilesPeriodMicros( final long micros) { assert(isOwningHandle()); setDeleteObsoleteFilesPeriodMicros(nativeHandle_, micros); return this; } @Override @Deprecated public int maxBackgroundCompactions() { assert(isOwningHandle()); return maxBackgroundCompactions(nativeHandle_); } @Override public Options setStatistics(final Statistics statistics) { assert(isOwningHandle()); setStatistics(nativeHandle_, statistics.nativeHandle_); return this; } @Override public Statistics statistics() { assert(isOwningHandle()); final long statisticsNativeHandle = statistics(nativeHandle_); if(statisticsNativeHandle == 0) { return null; } else { return new Statistics(statisticsNativeHandle); } } @Override @Deprecated public void setBaseBackgroundCompactions( final int baseBackgroundCompactions) { assert(isOwningHandle()); setBaseBackgroundCompactions(nativeHandle_, baseBackgroundCompactions); } @Override public int baseBackgroundCompactions() { assert(isOwningHandle()); return baseBackgroundCompactions(nativeHandle_); } @Override @Deprecated public Options setMaxBackgroundCompactions( final int maxBackgroundCompactions) { assert(isOwningHandle()); setMaxBackgroundCompactions(nativeHandle_, maxBackgroundCompactions); return this; } @Override public Options setMaxSubcompactions(final int maxSubcompactions) { assert(isOwningHandle()); setMaxSubcompactions(nativeHandle_, maxSubcompactions); return this; } @Override public int maxSubcompactions() { assert(isOwningHandle()); return maxSubcompactions(nativeHandle_); } @Override @Deprecated public int maxBackgroundFlushes() { assert(isOwningHandle()); return maxBackgroundFlushes(nativeHandle_); } @Override @Deprecated public Options setMaxBackgroundFlushes( final int maxBackgroundFlushes) { assert(isOwningHandle()); setMaxBackgroundFlushes(nativeHandle_, maxBackgroundFlushes); return this; } @Override public int maxBackgroundJobs() { assert(isOwningHandle()); return maxBackgroundJobs(nativeHandle_); } @Override public Options setMaxBackgroundJobs(final int maxBackgroundJobs) { assert(isOwningHandle()); setMaxBackgroundJobs(nativeHandle_, maxBackgroundJobs); return this; } @Override public long maxLogFileSize() { assert(isOwningHandle()); return maxLogFileSize(nativeHandle_); } @Override public Options setMaxLogFileSize(final long maxLogFileSize) { assert(isOwningHandle()); setMaxLogFileSize(nativeHandle_, maxLogFileSize); return this; } @Override public long logFileTimeToRoll() { assert(isOwningHandle()); return logFileTimeToRoll(nativeHandle_); } @Override public Options setLogFileTimeToRoll(final long logFileTimeToRoll) { assert(isOwningHandle()); setLogFileTimeToRoll(nativeHandle_, logFileTimeToRoll); return this; } @Override public long keepLogFileNum() { assert(isOwningHandle()); return keepLogFileNum(nativeHandle_); } @Override public Options setKeepLogFileNum(final long keepLogFileNum) { assert(isOwningHandle()); setKeepLogFileNum(nativeHandle_, keepLogFileNum); return this; } @Override public Options setRecycleLogFileNum(final long recycleLogFileNum) { assert(isOwningHandle()); setRecycleLogFileNum(nativeHandle_, recycleLogFileNum); return this; } @Override public long recycleLogFileNum() { assert(isOwningHandle()); return recycleLogFileNum(nativeHandle_); } @Override public long maxManifestFileSize() { assert(isOwningHandle()); return maxManifestFileSize(nativeHandle_); } @Override public Options setMaxManifestFileSize( final long maxManifestFileSize) { assert(isOwningHandle()); setMaxManifestFileSize(nativeHandle_, maxManifestFileSize); return this; } @Override public Options setMaxTableFilesSizeFIFO( final long maxTableFilesSize) { assert(maxTableFilesSize > 0); // unsigned native type assert(isOwningHandle()); setMaxTableFilesSizeFIFO(nativeHandle_, maxTableFilesSize); return this; } @Override public long maxTableFilesSizeFIFO() { return maxTableFilesSizeFIFO(nativeHandle_); } @Override public int tableCacheNumshardbits() { assert(isOwningHandle()); return tableCacheNumshardbits(nativeHandle_); } @Override public Options setTableCacheNumshardbits( final int tableCacheNumshardbits) { assert(isOwningHandle()); setTableCacheNumshardbits(nativeHandle_, tableCacheNumshardbits); return this; } @Override public long walTtlSeconds() { assert(isOwningHandle()); return walTtlSeconds(nativeHandle_); } @Override public Options setWalTtlSeconds(final long walTtlSeconds) { assert(isOwningHandle()); setWalTtlSeconds(nativeHandle_, walTtlSeconds); return this; } @Override public long walSizeLimitMB() { assert(isOwningHandle()); return walSizeLimitMB(nativeHandle_); } @Override public Options setWalSizeLimitMB(final long sizeLimitMB) { assert(isOwningHandle()); setWalSizeLimitMB(nativeHandle_, sizeLimitMB); return this; } @Override public long manifestPreallocationSize() { assert(isOwningHandle()); return manifestPreallocationSize(nativeHandle_); } @Override public Options setManifestPreallocationSize(final long size) { assert(isOwningHandle()); setManifestPreallocationSize(nativeHandle_, size); return this; } @Override public Options setUseDirectReads(final boolean useDirectReads) { assert(isOwningHandle()); setUseDirectReads(nativeHandle_, useDirectReads); return this; } @Override public boolean useDirectReads() { assert(isOwningHandle()); return useDirectReads(nativeHandle_); } @Override public Options setUseDirectIoForFlushAndCompaction( final boolean useDirectIoForFlushAndCompaction) { assert(isOwningHandle()); setUseDirectIoForFlushAndCompaction(nativeHandle_, useDirectIoForFlushAndCompaction); return this; } @Override public boolean useDirectIoForFlushAndCompaction() { assert(isOwningHandle()); return useDirectIoForFlushAndCompaction(nativeHandle_); } @Override public Options setAllowFAllocate(final boolean allowFAllocate) { assert(isOwningHandle()); setAllowFAllocate(nativeHandle_, allowFAllocate); return this; } @Override public boolean allowFAllocate() { assert(isOwningHandle()); return allowFAllocate(nativeHandle_); } @Override public boolean allowMmapReads() { assert(isOwningHandle()); return allowMmapReads(nativeHandle_); } @Override public Options setAllowMmapReads(final boolean allowMmapReads) { assert(isOwningHandle()); setAllowMmapReads(nativeHandle_, allowMmapReads); return this; } @Override public boolean allowMmapWrites() { assert(isOwningHandle()); return allowMmapWrites(nativeHandle_); } @Override public Options setAllowMmapWrites(final boolean allowMmapWrites) { assert(isOwningHandle()); setAllowMmapWrites(nativeHandle_, allowMmapWrites); return this; } @Override public boolean isFdCloseOnExec() { assert(isOwningHandle()); return isFdCloseOnExec(nativeHandle_); } @Override public Options setIsFdCloseOnExec(final boolean isFdCloseOnExec) { assert(isOwningHandle()); setIsFdCloseOnExec(nativeHandle_, isFdCloseOnExec); return this; } @Override public int statsDumpPeriodSec() { assert(isOwningHandle()); return statsDumpPeriodSec(nativeHandle_); } @Override public Options setStatsDumpPeriodSec(final int statsDumpPeriodSec) { assert(isOwningHandle()); setStatsDumpPeriodSec(nativeHandle_, statsDumpPeriodSec); return this; } @Override public Options setStatsPersistPeriodSec( final int statsPersistPeriodSec) { assert(isOwningHandle()); setStatsPersistPeriodSec(nativeHandle_, statsPersistPeriodSec); return this; } @Override public int statsPersistPeriodSec() { assert(isOwningHandle()); return statsPersistPeriodSec(nativeHandle_); } @Override public Options setStatsHistoryBufferSize( final long statsHistoryBufferSize) { assert(isOwningHandle()); setStatsHistoryBufferSize(nativeHandle_, statsHistoryBufferSize); return this; } @Override public long statsHistoryBufferSize() { assert(isOwningHandle()); return statsHistoryBufferSize(nativeHandle_); } @Override public boolean adviseRandomOnOpen() { return adviseRandomOnOpen(nativeHandle_); } @Override public Options setAdviseRandomOnOpen(final boolean adviseRandomOnOpen) { assert(isOwningHandle()); setAdviseRandomOnOpen(nativeHandle_, adviseRandomOnOpen); return this; } @Override public Options setDbWriteBufferSize(final long dbWriteBufferSize) { assert(isOwningHandle()); setDbWriteBufferSize(nativeHandle_, dbWriteBufferSize); return this; } @Override public Options setWriteBufferManager(final WriteBufferManager writeBufferManager) { assert(isOwningHandle()); setWriteBufferManager(nativeHandle_, writeBufferManager.nativeHandle_); this.writeBufferManager_ = writeBufferManager; return this; } @Override public WriteBufferManager writeBufferManager() { assert(isOwningHandle()); return this.writeBufferManager_; } @Override public long dbWriteBufferSize() { assert(isOwningHandle()); return dbWriteBufferSize(nativeHandle_); } @Override public Options setAccessHintOnCompactionStart(final AccessHint accessHint) { assert(isOwningHandle()); setAccessHintOnCompactionStart(nativeHandle_, accessHint.getValue()); return this; } @Override public AccessHint accessHintOnCompactionStart() { assert(isOwningHandle()); return AccessHint.getAccessHint(accessHintOnCompactionStart(nativeHandle_)); } @Override public Options setNewTableReaderForCompactionInputs( final boolean newTableReaderForCompactionInputs) { assert(isOwningHandle()); setNewTableReaderForCompactionInputs(nativeHandle_, newTableReaderForCompactionInputs); return this; } @Override public boolean newTableReaderForCompactionInputs() { assert(isOwningHandle()); return newTableReaderForCompactionInputs(nativeHandle_); } @Override public Options setCompactionReadaheadSize(final long compactionReadaheadSize) { assert(isOwningHandle()); setCompactionReadaheadSize(nativeHandle_, compactionReadaheadSize); return this; } @Override public long compactionReadaheadSize() { assert(isOwningHandle()); return compactionReadaheadSize(nativeHandle_); } @Override public Options setRandomAccessMaxBufferSize(final long randomAccessMaxBufferSize) { assert(isOwningHandle()); setRandomAccessMaxBufferSize(nativeHandle_, randomAccessMaxBufferSize); return this; } @Override public long randomAccessMaxBufferSize() { assert(isOwningHandle()); return randomAccessMaxBufferSize(nativeHandle_); } @Override public Options setWritableFileMaxBufferSize(final long writableFileMaxBufferSize) { assert(isOwningHandle()); setWritableFileMaxBufferSize(nativeHandle_, writableFileMaxBufferSize); return this; } @Override public long writableFileMaxBufferSize() { assert(isOwningHandle()); return writableFileMaxBufferSize(nativeHandle_); } @Override public boolean useAdaptiveMutex() { assert(isOwningHandle()); return useAdaptiveMutex(nativeHandle_); } @Override public Options setUseAdaptiveMutex(final boolean useAdaptiveMutex) { assert(isOwningHandle()); setUseAdaptiveMutex(nativeHandle_, useAdaptiveMutex); return this; } @Override public long bytesPerSync() { return bytesPerSync(nativeHandle_); } @Override public Options setBytesPerSync(final long bytesPerSync) { assert(isOwningHandle()); setBytesPerSync(nativeHandle_, bytesPerSync); return this; } @Override public Options setWalBytesPerSync(final long walBytesPerSync) { assert(isOwningHandle()); setWalBytesPerSync(nativeHandle_, walBytesPerSync); return this; } @Override public long walBytesPerSync() { assert(isOwningHandle()); return walBytesPerSync(nativeHandle_); } @Override public Options setStrictBytesPerSync(final boolean strictBytesPerSync) { assert(isOwningHandle()); setStrictBytesPerSync(nativeHandle_, strictBytesPerSync); return this; } @Override public boolean strictBytesPerSync() { assert(isOwningHandle()); return strictBytesPerSync(nativeHandle_); } @Override public Options setEnableThreadTracking(final boolean enableThreadTracking) { assert(isOwningHandle()); setEnableThreadTracking(nativeHandle_, enableThreadTracking); return this; } @Override public boolean enableThreadTracking() { assert(isOwningHandle()); return enableThreadTracking(nativeHandle_); } @Override public Options setDelayedWriteRate(final long delayedWriteRate) { assert(isOwningHandle()); setDelayedWriteRate(nativeHandle_, delayedWriteRate); return this; } @Override public long delayedWriteRate(){ return delayedWriteRate(nativeHandle_); } @Override public Options setEnablePipelinedWrite(final boolean enablePipelinedWrite) { setEnablePipelinedWrite(nativeHandle_, enablePipelinedWrite); return this; } @Override public boolean enablePipelinedWrite() { return enablePipelinedWrite(nativeHandle_); } @Override public Options setUnorderedWrite(final boolean unorderedWrite) { setUnorderedWrite(nativeHandle_, unorderedWrite); return this; } @Override public boolean unorderedWrite() { return unorderedWrite(nativeHandle_); } @Override public Options setAllowConcurrentMemtableWrite( final boolean allowConcurrentMemtableWrite) { setAllowConcurrentMemtableWrite(nativeHandle_, allowConcurrentMemtableWrite); return this; } @Override public boolean allowConcurrentMemtableWrite() { return allowConcurrentMemtableWrite(nativeHandle_); } @Override public Options setEnableWriteThreadAdaptiveYield( final boolean enableWriteThreadAdaptiveYield) { setEnableWriteThreadAdaptiveYield(nativeHandle_, enableWriteThreadAdaptiveYield); return this; } @Override public boolean enableWriteThreadAdaptiveYield() { return enableWriteThreadAdaptiveYield(nativeHandle_); } @Override public Options setWriteThreadMaxYieldUsec(final long writeThreadMaxYieldUsec) { setWriteThreadMaxYieldUsec(nativeHandle_, writeThreadMaxYieldUsec); return this; } @Override public long writeThreadMaxYieldUsec() { return writeThreadMaxYieldUsec(nativeHandle_); } @Override public Options setWriteThreadSlowYieldUsec(final long writeThreadSlowYieldUsec) { setWriteThreadSlowYieldUsec(nativeHandle_, writeThreadSlowYieldUsec); return this; } @Override public long writeThreadSlowYieldUsec() { return writeThreadSlowYieldUsec(nativeHandle_); } @Override public Options setSkipStatsUpdateOnDbOpen(final boolean skipStatsUpdateOnDbOpen) { assert(isOwningHandle()); setSkipStatsUpdateOnDbOpen(nativeHandle_, skipStatsUpdateOnDbOpen); return this; } @Override public boolean skipStatsUpdateOnDbOpen() { assert(isOwningHandle()); return skipStatsUpdateOnDbOpen(nativeHandle_); } @Override public Options setWalRecoveryMode(final WALRecoveryMode walRecoveryMode) { assert(isOwningHandle()); setWalRecoveryMode(nativeHandle_, walRecoveryMode.getValue()); return this; } @Override public WALRecoveryMode walRecoveryMode() { assert(isOwningHandle()); return WALRecoveryMode.getWALRecoveryMode(walRecoveryMode(nativeHandle_)); } @Override public Options setAllow2pc(final boolean allow2pc) { assert(isOwningHandle()); setAllow2pc(nativeHandle_, allow2pc); return this; } @Override public boolean allow2pc() { assert(isOwningHandle()); return allow2pc(nativeHandle_); } @Override public Options setRowCache(final Cache rowCache) { assert(isOwningHandle()); setRowCache(nativeHandle_, rowCache.nativeHandle_); this.rowCache_ = rowCache; return this; } @Override public Cache rowCache() { assert(isOwningHandle()); return this.rowCache_; } @Override public Options setWalFilter(final AbstractWalFilter walFilter) { assert(isOwningHandle()); setWalFilter(nativeHandle_, walFilter.nativeHandle_); this.walFilter_ = walFilter; return this; } @Override public WalFilter walFilter() { assert(isOwningHandle()); return this.walFilter_; } @Override public Options setFailIfOptionsFileError(final boolean failIfOptionsFileError) { assert(isOwningHandle()); setFailIfOptionsFileError(nativeHandle_, failIfOptionsFileError); return this; } @Override public boolean failIfOptionsFileError() { assert(isOwningHandle()); return failIfOptionsFileError(nativeHandle_); } @Override public Options setDumpMallocStats(final boolean dumpMallocStats) { assert(isOwningHandle()); setDumpMallocStats(nativeHandle_, dumpMallocStats); return this; } @Override public boolean dumpMallocStats() { assert(isOwningHandle()); return dumpMallocStats(nativeHandle_); } @Override public Options setAvoidFlushDuringRecovery(final boolean avoidFlushDuringRecovery) { assert(isOwningHandle()); setAvoidFlushDuringRecovery(nativeHandle_, avoidFlushDuringRecovery); return this; } @Override public boolean avoidFlushDuringRecovery() { assert(isOwningHandle()); return avoidFlushDuringRecovery(nativeHandle_); } @Override public Options setAvoidFlushDuringShutdown(final boolean avoidFlushDuringShutdown) { assert(isOwningHandle()); setAvoidFlushDuringShutdown(nativeHandle_, avoidFlushDuringShutdown); return this; } @Override public boolean avoidFlushDuringShutdown() { assert(isOwningHandle()); return avoidFlushDuringShutdown(nativeHandle_); } @Override public Options setAllowIngestBehind(final boolean allowIngestBehind) { assert(isOwningHandle()); setAllowIngestBehind(nativeHandle_, allowIngestBehind); return this; } @Override public boolean allowIngestBehind() { assert(isOwningHandle()); return allowIngestBehind(nativeHandle_); } @Override public Options setPreserveDeletes(final boolean preserveDeletes) { assert(isOwningHandle()); setPreserveDeletes(nativeHandle_, preserveDeletes); return this; } @Override public boolean preserveDeletes() { assert(isOwningHandle()); return preserveDeletes(nativeHandle_); } @Override public Options setTwoWriteQueues(final boolean twoWriteQueues) { assert(isOwningHandle()); setTwoWriteQueues(nativeHandle_, twoWriteQueues); return this; } @Override public boolean twoWriteQueues() { assert(isOwningHandle()); return twoWriteQueues(nativeHandle_); } @Override public Options setManualWalFlush(final boolean manualWalFlush) { assert(isOwningHandle()); setManualWalFlush(nativeHandle_, manualWalFlush); return this; } @Override public boolean manualWalFlush() { assert(isOwningHandle()); return manualWalFlush(nativeHandle_); } @Override public MemTableConfig memTableConfig() { return this.memTableConfig_; } @Override public Options setMemTableConfig(final MemTableConfig config) { memTableConfig_ = config; setMemTableFactory(nativeHandle_, config.newMemTableFactoryHandle()); return this; } @Override public Options setRateLimiter(final RateLimiter rateLimiter) { assert(isOwningHandle()); rateLimiter_ = rateLimiter; setRateLimiter(nativeHandle_, rateLimiter.nativeHandle_); return this; } @Override public Options setSstFileManager(final SstFileManager sstFileManager) { assert(isOwningHandle()); setSstFileManager(nativeHandle_, sstFileManager.nativeHandle_); return this; } @Override public Options setLogger(final Logger logger) { assert(isOwningHandle()); setLogger(nativeHandle_, logger.nativeHandle_); return this; } @Override public Options setInfoLogLevel(final InfoLogLevel infoLogLevel) { assert(isOwningHandle()); setInfoLogLevel(nativeHandle_, infoLogLevel.getValue()); return this; } @Override public InfoLogLevel infoLogLevel() { assert(isOwningHandle()); return InfoLogLevel.getInfoLogLevel( infoLogLevel(nativeHandle_)); } @Override public String memTableFactoryName() { assert(isOwningHandle()); return memTableFactoryName(nativeHandle_); } @Override public TableFormatConfig tableFormatConfig() { return this.tableFormatConfig_; } @Override public Options setTableFormatConfig(final TableFormatConfig config) { tableFormatConfig_ = config; setTableFactory(nativeHandle_, config.newTableFactoryHandle()); return this; } @Override public String tableFactoryName() { assert(isOwningHandle()); return tableFactoryName(nativeHandle_); } @Override public Options useFixedLengthPrefixExtractor(final int n) { assert(isOwningHandle()); useFixedLengthPrefixExtractor(nativeHandle_, n); return this; } @Override public Options useCappedPrefixExtractor(final int n) { assert(isOwningHandle()); useCappedPrefixExtractor(nativeHandle_, n); return this; } @Override public CompressionType compressionType() { return CompressionType.getCompressionType(compressionType(nativeHandle_)); } @Override public Options setCompressionPerLevel( final List compressionLevels) { final byte[] byteCompressionTypes = new byte[ compressionLevels.size()]; for (int i = 0; i < compressionLevels.size(); i++) { byteCompressionTypes[i] = compressionLevels.get(i).getValue(); } setCompressionPerLevel(nativeHandle_, byteCompressionTypes); return this; } @Override public List compressionPerLevel() { final byte[] byteCompressionTypes = compressionPerLevel(nativeHandle_); final List compressionLevels = new ArrayList<>(); for (final Byte byteCompressionType : byteCompressionTypes) { compressionLevels.add(CompressionType.getCompressionType( byteCompressionType)); } return compressionLevels; } @Override public Options setCompressionType(CompressionType compressionType) { setCompressionType(nativeHandle_, compressionType.getValue()); return this; } @Override public Options setBottommostCompressionType( final CompressionType bottommostCompressionType) { setBottommostCompressionType(nativeHandle_, bottommostCompressionType.getValue()); return this; } @Override public CompressionType bottommostCompressionType() { return CompressionType.getCompressionType( bottommostCompressionType(nativeHandle_)); } @Override public Options setBottommostCompressionOptions( final CompressionOptions bottommostCompressionOptions) { setBottommostCompressionOptions(nativeHandle_, bottommostCompressionOptions.nativeHandle_); this.bottommostCompressionOptions_ = bottommostCompressionOptions; return this; } @Override public CompressionOptions bottommostCompressionOptions() { return this.bottommostCompressionOptions_; } @Override public Options setCompressionOptions( final CompressionOptions compressionOptions) { setCompressionOptions(nativeHandle_, compressionOptions.nativeHandle_); this.compressionOptions_ = compressionOptions; return this; } @Override public CompressionOptions compressionOptions() { return this.compressionOptions_; } @Override public CompactionStyle compactionStyle() { return CompactionStyle.fromValue(compactionStyle(nativeHandle_)); } @Override public Options setCompactionStyle( final CompactionStyle compactionStyle) { setCompactionStyle(nativeHandle_, compactionStyle.getValue()); return this; } @Override public int numLevels() { return numLevels(nativeHandle_); } @Override public Options setNumLevels(int numLevels) { setNumLevels(nativeHandle_, numLevels); return this; } @Override public int levelZeroFileNumCompactionTrigger() { return levelZeroFileNumCompactionTrigger(nativeHandle_); } @Override public Options setLevelZeroFileNumCompactionTrigger( final int numFiles) { setLevelZeroFileNumCompactionTrigger( nativeHandle_, numFiles); return this; } @Override public int levelZeroSlowdownWritesTrigger() { return levelZeroSlowdownWritesTrigger(nativeHandle_); } @Override public Options setLevelZeroSlowdownWritesTrigger( final int numFiles) { setLevelZeroSlowdownWritesTrigger(nativeHandle_, numFiles); return this; } @Override public int levelZeroStopWritesTrigger() { return levelZeroStopWritesTrigger(nativeHandle_); } @Override public Options setLevelZeroStopWritesTrigger( final int numFiles) { setLevelZeroStopWritesTrigger(nativeHandle_, numFiles); return this; } @Override public long targetFileSizeBase() { return targetFileSizeBase(nativeHandle_); } @Override public Options setTargetFileSizeBase(long targetFileSizeBase) { setTargetFileSizeBase(nativeHandle_, targetFileSizeBase); return this; } @Override public int targetFileSizeMultiplier() { return targetFileSizeMultiplier(nativeHandle_); } @Override public Options setTargetFileSizeMultiplier(int multiplier) { setTargetFileSizeMultiplier(nativeHandle_, multiplier); return this; } @Override public Options setMaxBytesForLevelBase(final long maxBytesForLevelBase) { setMaxBytesForLevelBase(nativeHandle_, maxBytesForLevelBase); return this; } @Override public long maxBytesForLevelBase() { return maxBytesForLevelBase(nativeHandle_); } @Override public Options setLevelCompactionDynamicLevelBytes( final boolean enableLevelCompactionDynamicLevelBytes) { setLevelCompactionDynamicLevelBytes(nativeHandle_, enableLevelCompactionDynamicLevelBytes); return this; } @Override public boolean levelCompactionDynamicLevelBytes() { return levelCompactionDynamicLevelBytes(nativeHandle_); } @Override public double maxBytesForLevelMultiplier() { return maxBytesForLevelMultiplier(nativeHandle_); } @Override public Options setMaxBytesForLevelMultiplier(final double multiplier) { setMaxBytesForLevelMultiplier(nativeHandle_, multiplier); return this; } @Override public long maxCompactionBytes() { return maxCompactionBytes(nativeHandle_); } @Override public Options setMaxCompactionBytes(final long maxCompactionBytes) { setMaxCompactionBytes(nativeHandle_, maxCompactionBytes); return this; } @Override public long arenaBlockSize() { return arenaBlockSize(nativeHandle_); } @Override public Options setArenaBlockSize(final long arenaBlockSize) { setArenaBlockSize(nativeHandle_, arenaBlockSize); return this; } @Override public boolean disableAutoCompactions() { return disableAutoCompactions(nativeHandle_); } @Override public Options setDisableAutoCompactions( final boolean disableAutoCompactions) { setDisableAutoCompactions(nativeHandle_, disableAutoCompactions); return this; } @Override public long maxSequentialSkipInIterations() { return maxSequentialSkipInIterations(nativeHandle_); } @Override public Options setMaxSequentialSkipInIterations( final long maxSequentialSkipInIterations) { setMaxSequentialSkipInIterations(nativeHandle_, maxSequentialSkipInIterations); return this; } @Override public boolean inplaceUpdateSupport() { return inplaceUpdateSupport(nativeHandle_); } @Override public Options setInplaceUpdateSupport( final boolean inplaceUpdateSupport) { setInplaceUpdateSupport(nativeHandle_, inplaceUpdateSupport); return this; } @Override public long inplaceUpdateNumLocks() { return inplaceUpdateNumLocks(nativeHandle_); } @Override public Options setInplaceUpdateNumLocks( final long inplaceUpdateNumLocks) { setInplaceUpdateNumLocks(nativeHandle_, inplaceUpdateNumLocks); return this; } @Override public double memtablePrefixBloomSizeRatio() { return memtablePrefixBloomSizeRatio(nativeHandle_); } @Override public Options setMemtablePrefixBloomSizeRatio(final double memtablePrefixBloomSizeRatio) { setMemtablePrefixBloomSizeRatio(nativeHandle_, memtablePrefixBloomSizeRatio); return this; } @Override public int bloomLocality() { return bloomLocality(nativeHandle_); } @Override public Options setBloomLocality(final int bloomLocality) { setBloomLocality(nativeHandle_, bloomLocality); return this; } @Override public long maxSuccessiveMerges() { return maxSuccessiveMerges(nativeHandle_); } @Override public Options setMaxSuccessiveMerges(long maxSuccessiveMerges) { setMaxSuccessiveMerges(nativeHandle_, maxSuccessiveMerges); return this; } @Override public int minWriteBufferNumberToMerge() { return minWriteBufferNumberToMerge(nativeHandle_); } @Override public Options setMinWriteBufferNumberToMerge( final int minWriteBufferNumberToMerge) { setMinWriteBufferNumberToMerge(nativeHandle_, minWriteBufferNumberToMerge); return this; } @Override public Options setOptimizeFiltersForHits( final boolean optimizeFiltersForHits) { setOptimizeFiltersForHits(nativeHandle_, optimizeFiltersForHits); return this; } @Override public boolean optimizeFiltersForHits() { return optimizeFiltersForHits(nativeHandle_); } @Override public Options setMemtableHugePageSize( long memtableHugePageSize) { setMemtableHugePageSize(nativeHandle_, memtableHugePageSize); return this; } @Override public long memtableHugePageSize() { return memtableHugePageSize(nativeHandle_); } @Override public Options setSoftPendingCompactionBytesLimit(long softPendingCompactionBytesLimit) { setSoftPendingCompactionBytesLimit(nativeHandle_, softPendingCompactionBytesLimit); return this; } @Override public long softPendingCompactionBytesLimit() { return softPendingCompactionBytesLimit(nativeHandle_); } @Override public Options setHardPendingCompactionBytesLimit(long hardPendingCompactionBytesLimit) { setHardPendingCompactionBytesLimit(nativeHandle_, hardPendingCompactionBytesLimit); return this; } @Override public long hardPendingCompactionBytesLimit() { return hardPendingCompactionBytesLimit(nativeHandle_); } @Override public Options setLevel0FileNumCompactionTrigger(int level0FileNumCompactionTrigger) { setLevel0FileNumCompactionTrigger(nativeHandle_, level0FileNumCompactionTrigger); return this; } @Override public int level0FileNumCompactionTrigger() { return level0FileNumCompactionTrigger(nativeHandle_); } @Override public Options setLevel0SlowdownWritesTrigger(int level0SlowdownWritesTrigger) { setLevel0SlowdownWritesTrigger(nativeHandle_, level0SlowdownWritesTrigger); return this; } @Override public int level0SlowdownWritesTrigger() { return level0SlowdownWritesTrigger(nativeHandle_); } @Override public Options setLevel0StopWritesTrigger(int level0StopWritesTrigger) { setLevel0StopWritesTrigger(nativeHandle_, level0StopWritesTrigger); return this; } @Override public int level0StopWritesTrigger() { return level0StopWritesTrigger(nativeHandle_); } @Override public Options setMaxBytesForLevelMultiplierAdditional(int[] maxBytesForLevelMultiplierAdditional) { setMaxBytesForLevelMultiplierAdditional(nativeHandle_, maxBytesForLevelMultiplierAdditional); return this; } @Override public int[] maxBytesForLevelMultiplierAdditional() { return maxBytesForLevelMultiplierAdditional(nativeHandle_); } @Override public Options setParanoidFileChecks(boolean paranoidFileChecks) { setParanoidFileChecks(nativeHandle_, paranoidFileChecks); return this; } @Override public boolean paranoidFileChecks() { return paranoidFileChecks(nativeHandle_); } @Override public Options setMaxWriteBufferNumberToMaintain( final int maxWriteBufferNumberToMaintain) { setMaxWriteBufferNumberToMaintain( nativeHandle_, maxWriteBufferNumberToMaintain); return this; } @Override public int maxWriteBufferNumberToMaintain() { return maxWriteBufferNumberToMaintain(nativeHandle_); } @Override public Options setCompactionPriority( final CompactionPriority compactionPriority) { setCompactionPriority(nativeHandle_, compactionPriority.getValue()); return this; } @Override public CompactionPriority compactionPriority() { return CompactionPriority.getCompactionPriority( compactionPriority(nativeHandle_)); } @Override public Options setReportBgIoStats(final boolean reportBgIoStats) { setReportBgIoStats(nativeHandle_, reportBgIoStats); return this; } @Override public boolean reportBgIoStats() { return reportBgIoStats(nativeHandle_); } @Override public Options setTtl(final long ttl) { setTtl(nativeHandle_, ttl); return this; } @Override public long ttl() { return ttl(nativeHandle_); } @Override public Options setCompactionOptionsUniversal( final CompactionOptionsUniversal compactionOptionsUniversal) { setCompactionOptionsUniversal(nativeHandle_, compactionOptionsUniversal.nativeHandle_); this.compactionOptionsUniversal_ = compactionOptionsUniversal; return this; } @Override public CompactionOptionsUniversal compactionOptionsUniversal() { return this.compactionOptionsUniversal_; } @Override public Options setCompactionOptionsFIFO(final CompactionOptionsFIFO compactionOptionsFIFO) { setCompactionOptionsFIFO(nativeHandle_, compactionOptionsFIFO.nativeHandle_); this.compactionOptionsFIFO_ = compactionOptionsFIFO; return this; } @Override public CompactionOptionsFIFO compactionOptionsFIFO() { return this.compactionOptionsFIFO_; } @Override public Options setForceConsistencyChecks(final boolean forceConsistencyChecks) { setForceConsistencyChecks(nativeHandle_, forceConsistencyChecks); return this; } @Override public boolean forceConsistencyChecks() { return forceConsistencyChecks(nativeHandle_); } @Override public Options setAtomicFlush(final boolean atomicFlush) { setAtomicFlush(nativeHandle_, atomicFlush); return this; } @Override public boolean atomicFlush() { return atomicFlush(nativeHandle_); } private native static long newOptions(); private native static long newOptions(long dbOptHandle, long cfOptHandle); private native static long copyOptions(long handle); @Override protected final native void disposeInternal(final long handle); private native void setEnv(long optHandle, long envHandle); private native void prepareForBulkLoad(long handle); // DB native handles private native void setIncreaseParallelism(long handle, int totalThreads); private native void setCreateIfMissing(long handle, boolean flag); private native boolean createIfMissing(long handle); private native void setCreateMissingColumnFamilies( long handle, boolean flag); private native boolean createMissingColumnFamilies(long handle); private native void setErrorIfExists(long handle, boolean errorIfExists); private native boolean errorIfExists(long handle); private native void setParanoidChecks( long handle, boolean paranoidChecks); private native boolean paranoidChecks(long handle); private native void setRateLimiter(long handle, long rateLimiterHandle); private native void setSstFileManager(final long handle, final long sstFileManagerHandle); private native void setLogger(long handle, long loggerHandle); private native void setInfoLogLevel(long handle, byte logLevel); private native byte infoLogLevel(long handle); private native void setMaxOpenFiles(long handle, int maxOpenFiles); private native int maxOpenFiles(long handle); private native void setMaxTotalWalSize(long handle, long maxTotalWalSize); private native void setMaxFileOpeningThreads(final long handle, final int maxFileOpeningThreads); private native int maxFileOpeningThreads(final long handle); private native long maxTotalWalSize(long handle); private native void setStatistics(final long handle, final long statisticsHandle); private native long statistics(final long handle); private native boolean useFsync(long handle); private native void setUseFsync(long handle, boolean useFsync); private native void setDbPaths(final long handle, final String[] paths, final long[] targetSizes); private native long dbPathsLen(final long handle); private native void dbPaths(final long handle, final String[] paths, final long[] targetSizes); private native void setDbLogDir(long handle, String dbLogDir); private native String dbLogDir(long handle); private native void setWalDir(long handle, String walDir); private native String walDir(long handle); private native void setDeleteObsoleteFilesPeriodMicros( long handle, long micros); private native long deleteObsoleteFilesPeriodMicros(long handle); private native void setBaseBackgroundCompactions(long handle, int baseBackgroundCompactions); private native int baseBackgroundCompactions(long handle); private native void setMaxBackgroundCompactions( long handle, int maxBackgroundCompactions); private native int maxBackgroundCompactions(long handle); private native void setMaxSubcompactions(long handle, int maxSubcompactions); private native int maxSubcompactions(long handle); private native void setMaxBackgroundFlushes( long handle, int maxBackgroundFlushes); private native int maxBackgroundFlushes(long handle); private native void setMaxBackgroundJobs(long handle, int maxMaxBackgroundJobs); private native int maxBackgroundJobs(long handle); private native void setMaxLogFileSize(long handle, long maxLogFileSize) throws IllegalArgumentException; private native long maxLogFileSize(long handle); private native void setLogFileTimeToRoll( long handle, long logFileTimeToRoll) throws IllegalArgumentException; private native long logFileTimeToRoll(long handle); private native void setKeepLogFileNum(long handle, long keepLogFileNum) throws IllegalArgumentException; private native long keepLogFileNum(long handle); private native void setRecycleLogFileNum(long handle, long recycleLogFileNum); private native long recycleLogFileNum(long handle); private native void setMaxManifestFileSize( long handle, long maxManifestFileSize); private native long maxManifestFileSize(long handle); private native void setMaxTableFilesSizeFIFO( long handle, long maxTableFilesSize); private native long maxTableFilesSizeFIFO(long handle); private native void setTableCacheNumshardbits( long handle, int tableCacheNumshardbits); private native int tableCacheNumshardbits(long handle); private native void setWalTtlSeconds(long handle, long walTtlSeconds); private native long walTtlSeconds(long handle); private native void setWalSizeLimitMB(long handle, long sizeLimitMB); private native long walSizeLimitMB(long handle); private native void setManifestPreallocationSize( long handle, long size) throws IllegalArgumentException; private native long manifestPreallocationSize(long handle); private native void setUseDirectReads(long handle, boolean useDirectReads); private native boolean useDirectReads(long handle); private native void setUseDirectIoForFlushAndCompaction( long handle, boolean useDirectIoForFlushAndCompaction); private native boolean useDirectIoForFlushAndCompaction(long handle); private native void setAllowFAllocate(final long handle, final boolean allowFAllocate); private native boolean allowFAllocate(final long handle); private native void setAllowMmapReads( long handle, boolean allowMmapReads); private native boolean allowMmapReads(long handle); private native void setAllowMmapWrites( long handle, boolean allowMmapWrites); private native boolean allowMmapWrites(long handle); private native void setIsFdCloseOnExec( long handle, boolean isFdCloseOnExec); private native boolean isFdCloseOnExec(long handle); private native void setStatsDumpPeriodSec( long handle, int statsDumpPeriodSec); private native int statsDumpPeriodSec(long handle); private native void setStatsPersistPeriodSec( final long handle, final int statsPersistPeriodSec); private native int statsPersistPeriodSec( final long handle); private native void setStatsHistoryBufferSize( final long handle, final long statsHistoryBufferSize); private native long statsHistoryBufferSize( final long handle); private native void setAdviseRandomOnOpen( long handle, boolean adviseRandomOnOpen); private native boolean adviseRandomOnOpen(long handle); private native void setDbWriteBufferSize(final long handle, final long dbWriteBufferSize); private native void setWriteBufferManager(final long handle, final long writeBufferManagerHandle); private native long dbWriteBufferSize(final long handle); private native void setAccessHintOnCompactionStart(final long handle, final byte accessHintOnCompactionStart); private native byte accessHintOnCompactionStart(final long handle); private native void setNewTableReaderForCompactionInputs(final long handle, final boolean newTableReaderForCompactionInputs); private native boolean newTableReaderForCompactionInputs(final long handle); private native void setCompactionReadaheadSize(final long handle, final long compactionReadaheadSize); private native long compactionReadaheadSize(final long handle); private native void setRandomAccessMaxBufferSize(final long handle, final long randomAccessMaxBufferSize); private native long randomAccessMaxBufferSize(final long handle); private native void setWritableFileMaxBufferSize(final long handle, final long writableFileMaxBufferSize); private native long writableFileMaxBufferSize(final long handle); private native void setUseAdaptiveMutex( long handle, boolean useAdaptiveMutex); private native boolean useAdaptiveMutex(long handle); private native void setBytesPerSync( long handle, long bytesPerSync); private native long bytesPerSync(long handle); private native void setWalBytesPerSync(long handle, long walBytesPerSync); private native long walBytesPerSync(long handle); private native void setStrictBytesPerSync( final long handle, final boolean strictBytesPerSync); private native boolean strictBytesPerSync( final long handle); private native void setEnableThreadTracking(long handle, boolean enableThreadTracking); private native boolean enableThreadTracking(long handle); private native void setDelayedWriteRate(long handle, long delayedWriteRate); private native long delayedWriteRate(long handle); private native void setEnablePipelinedWrite(final long handle, final boolean pipelinedWrite); private native boolean enablePipelinedWrite(final long handle); private native void setUnorderedWrite(final long handle, final boolean unorderedWrite); private native boolean unorderedWrite(final long handle); private native void setAllowConcurrentMemtableWrite(long handle, boolean allowConcurrentMemtableWrite); private native boolean allowConcurrentMemtableWrite(long handle); private native void setEnableWriteThreadAdaptiveYield(long handle, boolean enableWriteThreadAdaptiveYield); private native boolean enableWriteThreadAdaptiveYield(long handle); private native void setWriteThreadMaxYieldUsec(long handle, long writeThreadMaxYieldUsec); private native long writeThreadMaxYieldUsec(long handle); private native void setWriteThreadSlowYieldUsec(long handle, long writeThreadSlowYieldUsec); private native long writeThreadSlowYieldUsec(long handle); private native void setSkipStatsUpdateOnDbOpen(final long handle, final boolean skipStatsUpdateOnDbOpen); private native boolean skipStatsUpdateOnDbOpen(final long handle); private native void setWalRecoveryMode(final long handle, final byte walRecoveryMode); private native byte walRecoveryMode(final long handle); private native void setAllow2pc(final long handle, final boolean allow2pc); private native boolean allow2pc(final long handle); private native void setRowCache(final long handle, final long rowCacheHandle); private native void setWalFilter(final long handle, final long walFilterHandle); private native void setFailIfOptionsFileError(final long handle, final boolean failIfOptionsFileError); private native boolean failIfOptionsFileError(final long handle); private native void setDumpMallocStats(final long handle, final boolean dumpMallocStats); private native boolean dumpMallocStats(final long handle); private native void setAvoidFlushDuringRecovery(final long handle, final boolean avoidFlushDuringRecovery); private native boolean avoidFlushDuringRecovery(final long handle); private native void setAvoidFlushDuringShutdown(final long handle, final boolean avoidFlushDuringShutdown); private native boolean avoidFlushDuringShutdown(final long handle); private native void setAllowIngestBehind(final long handle, final boolean allowIngestBehind); private native boolean allowIngestBehind(final long handle); private native void setPreserveDeletes(final long handle, final boolean preserveDeletes); private native boolean preserveDeletes(final long handle); private native void setTwoWriteQueues(final long handle, final boolean twoWriteQueues); private native boolean twoWriteQueues(final long handle); private native void setManualWalFlush(final long handle, final boolean manualWalFlush); private native boolean manualWalFlush(final long handle); // CF native handles private native void optimizeForSmallDb(final long handle); private native void optimizeForPointLookup(long handle, long blockCacheSizeMb); private native void optimizeLevelStyleCompaction(long handle, long memtableMemoryBudget); private native void optimizeUniversalStyleCompaction(long handle, long memtableMemoryBudget); private native void setComparatorHandle(long handle, int builtinComparator); private native void setComparatorHandle(long optHandle, long comparatorHandle, byte comparatorType); private native void setMergeOperatorName( long handle, String name); private native void setMergeOperator( long handle, long mergeOperatorHandle); private native void setCompactionFilterHandle( long handle, long compactionFilterHandle); private native void setCompactionFilterFactoryHandle( long handle, long compactionFilterFactoryHandle); private native void setWriteBufferSize(long handle, long writeBufferSize) throws IllegalArgumentException; private native long writeBufferSize(long handle); private native void setMaxWriteBufferNumber( long handle, int maxWriteBufferNumber); private native int maxWriteBufferNumber(long handle); private native void setMinWriteBufferNumberToMerge( long handle, int minWriteBufferNumberToMerge); private native int minWriteBufferNumberToMerge(long handle); private native void setCompressionType(long handle, byte compressionType); private native byte compressionType(long handle); private native void setCompressionPerLevel(long handle, byte[] compressionLevels); private native byte[] compressionPerLevel(long handle); private native void setBottommostCompressionType(long handle, byte bottommostCompressionType); private native byte bottommostCompressionType(long handle); private native void setBottommostCompressionOptions(final long handle, final long bottommostCompressionOptionsHandle); private native void setCompressionOptions(long handle, long compressionOptionsHandle); private native void useFixedLengthPrefixExtractor( long handle, int prefixLength); private native void useCappedPrefixExtractor( long handle, int prefixLength); private native void setNumLevels( long handle, int numLevels); private native int numLevels(long handle); private native void setLevelZeroFileNumCompactionTrigger( long handle, int numFiles); private native int levelZeroFileNumCompactionTrigger(long handle); private native void setLevelZeroSlowdownWritesTrigger( long handle, int numFiles); private native int levelZeroSlowdownWritesTrigger(long handle); private native void setLevelZeroStopWritesTrigger( long handle, int numFiles); private native int levelZeroStopWritesTrigger(long handle); private native void setTargetFileSizeBase( long handle, long targetFileSizeBase); private native long targetFileSizeBase(long handle); private native void setTargetFileSizeMultiplier( long handle, int multiplier); private native int targetFileSizeMultiplier(long handle); private native void setMaxBytesForLevelBase( long handle, long maxBytesForLevelBase); private native long maxBytesForLevelBase(long handle); private native void setLevelCompactionDynamicLevelBytes( long handle, boolean enableLevelCompactionDynamicLevelBytes); private native boolean levelCompactionDynamicLevelBytes( long handle); private native void setMaxBytesForLevelMultiplier(long handle, double multiplier); private native double maxBytesForLevelMultiplier(long handle); private native void setMaxCompactionBytes(long handle, long maxCompactionBytes); private native long maxCompactionBytes(long handle); private native void setArenaBlockSize( long handle, long arenaBlockSize) throws IllegalArgumentException; private native long arenaBlockSize(long handle); private native void setDisableAutoCompactions( long handle, boolean disableAutoCompactions); private native boolean disableAutoCompactions(long handle); private native void setCompactionStyle(long handle, byte compactionStyle); private native byte compactionStyle(long handle); private native void setMaxSequentialSkipInIterations( long handle, long maxSequentialSkipInIterations); private native long maxSequentialSkipInIterations(long handle); private native void setMemTableFactory(long handle, long factoryHandle); private native String memTableFactoryName(long handle); private native void setTableFactory(long handle, long factoryHandle); private native String tableFactoryName(long handle); private native void setInplaceUpdateSupport( long handle, boolean inplaceUpdateSupport); private native boolean inplaceUpdateSupport(long handle); private native void setInplaceUpdateNumLocks( long handle, long inplaceUpdateNumLocks) throws IllegalArgumentException; private native long inplaceUpdateNumLocks(long handle); private native void setMemtablePrefixBloomSizeRatio( long handle, double memtablePrefixBloomSizeRatio); private native double memtablePrefixBloomSizeRatio(long handle); private native void setBloomLocality( long handle, int bloomLocality); private native int bloomLocality(long handle); private native void setMaxSuccessiveMerges( long handle, long maxSuccessiveMerges) throws IllegalArgumentException; private native long maxSuccessiveMerges(long handle); private native void setOptimizeFiltersForHits(long handle, boolean optimizeFiltersForHits); private native boolean optimizeFiltersForHits(long handle); private native void setMemtableHugePageSize(long handle, long memtableHugePageSize); private native long memtableHugePageSize(long handle); private native void setSoftPendingCompactionBytesLimit(long handle, long softPendingCompactionBytesLimit); private native long softPendingCompactionBytesLimit(long handle); private native void setHardPendingCompactionBytesLimit(long handle, long hardPendingCompactionBytesLimit); private native long hardPendingCompactionBytesLimit(long handle); private native void setLevel0FileNumCompactionTrigger(long handle, int level0FileNumCompactionTrigger); private native int level0FileNumCompactionTrigger(long handle); private native void setLevel0SlowdownWritesTrigger(long handle, int level0SlowdownWritesTrigger); private native int level0SlowdownWritesTrigger(long handle); private native void setLevel0StopWritesTrigger(long handle, int level0StopWritesTrigger); private native int level0StopWritesTrigger(long handle); private native void setMaxBytesForLevelMultiplierAdditional(long handle, int[] maxBytesForLevelMultiplierAdditional); private native int[] maxBytesForLevelMultiplierAdditional(long handle); private native void setParanoidFileChecks(long handle, boolean paranoidFileChecks); private native boolean paranoidFileChecks(long handle); private native void setMaxWriteBufferNumberToMaintain(final long handle, final int maxWriteBufferNumberToMaintain); private native int maxWriteBufferNumberToMaintain(final long handle); private native void setCompactionPriority(final long handle, final byte compactionPriority); private native byte compactionPriority(final long handle); private native void setReportBgIoStats(final long handle, final boolean reportBgIoStats); private native boolean reportBgIoStats(final long handle); private native void setTtl(final long handle, final long ttl); private native long ttl(final long handle); private native void setCompactionOptionsUniversal(final long handle, final long compactionOptionsUniversalHandle); private native void setCompactionOptionsFIFO(final long handle, final long compactionOptionsFIFOHandle); private native void setForceConsistencyChecks(final long handle, final boolean forceConsistencyChecks); private native boolean forceConsistencyChecks(final long handle); private native void setAtomicFlush(final long handle, final boolean atomicFlush); private native boolean atomicFlush(final long handle); // instance variables // NOTE: If you add new member variables, please update the copy constructor above! private Env env_; private MemTableConfig memTableConfig_; private TableFormatConfig tableFormatConfig_; private RateLimiter rateLimiter_; private AbstractComparator comparator_; private AbstractCompactionFilter> compactionFilter_; private AbstractCompactionFilterFactory> compactionFilterFactory_; private CompactionOptionsUniversal compactionOptionsUniversal_; private CompactionOptionsFIFO compactionOptionsFIFO_; private CompressionOptions bottommostCompressionOptions_; private CompressionOptions compressionOptions_; private Cache rowCache_; private WalFilter walFilter_; private WriteBufferManager writeBufferManager_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/OptionsUtil.java000066400000000000000000000200121370372246700241670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.ArrayList; import java.util.List; public class OptionsUtil { /** * A static method to construct the DBOptions and ColumnFamilyDescriptors by * loading the latest RocksDB options file stored in the specified rocksdb * database. * * Note that the all the pointer options (except table_factory, which will * be described in more details below) will be initialized with the default * values. Developers can further initialize them after this function call. * Below is an example list of pointer options which will be initialized. * * - env * - memtable_factory * - compaction_filter_factory * - prefix_extractor * - comparator * - merge_operator * - compaction_filter * * For table_factory, this function further supports deserializing * BlockBasedTableFactory and its BlockBasedTableOptions except the * pointer options of BlockBasedTableOptions (flush_block_policy_factory, * block_cache, and block_cache_compressed), which will be initialized with * default values. Developers can further specify these three options by * casting the return value of TableFactoroy::GetOptions() to * BlockBasedTableOptions and making necessary changes. * * @param dbPath the path to the RocksDB. * @param env {@link org.rocksdb.Env} instance. * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be * filled and returned. * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be * returned. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static void loadLatestOptions(String dbPath, Env env, DBOptions dbOptions, List cfDescs) throws RocksDBException { loadLatestOptions(dbPath, env, dbOptions, cfDescs, false); } /** * @param dbPath the path to the RocksDB. * @param env {@link org.rocksdb.Env} instance. * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be * filled and returned. * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be * returned. * @param ignoreUnknownOptions this flag can be set to true if you want to * ignore options that are from a newer version of the db, essentially for * forward compatibility. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static void loadLatestOptions(String dbPath, Env env, DBOptions dbOptions, List cfDescs, boolean ignoreUnknownOptions) throws RocksDBException { loadLatestOptions( dbPath, env.nativeHandle_, dbOptions.nativeHandle_, cfDescs, ignoreUnknownOptions); } /** * Similar to LoadLatestOptions, this function constructs the DBOptions * and ColumnFamilyDescriptors based on the specified RocksDB Options file. * See LoadLatestOptions above. * * @param dbPath the path to the RocksDB. * @param configOptions {@link org.rocksdb.ConfigOptions} instance. * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be * filled and returned. * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be * returned. * @throws RocksDBException thrown if error happens in underlying * native library. */ public static void loadLatestOptions(ConfigOptions configOptions, String dbPath, DBOptions dbOptions, List cfDescs) throws RocksDBException { loadLatestOptions(configOptions.nativeHandle_, dbPath, dbOptions.nativeHandle_, cfDescs); } /** * Similar to LoadLatestOptions, this function constructs the DBOptions * and ColumnFamilyDescriptors based on the specified RocksDB Options file. * See LoadLatestOptions above. * * @param optionsFileName the RocksDB options file path. * @param env {@link org.rocksdb.Env} instance. * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be * filled and returned. * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be * returned. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static void loadOptionsFromFile(String optionsFileName, Env env, DBOptions dbOptions, List cfDescs) throws RocksDBException { loadOptionsFromFile(optionsFileName, env, dbOptions, cfDescs, false); } /** * @param optionsFileName the RocksDB options file path. * @param env {@link org.rocksdb.Env} instance. * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be * filled and returned. * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be * returned. * @param ignoreUnknownOptions this flag can be set to true if you want to * ignore options that are from a newer version of the db, esentially for * forward compatibility. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static void loadOptionsFromFile(String optionsFileName, Env env, DBOptions dbOptions, List cfDescs, boolean ignoreUnknownOptions) throws RocksDBException { loadOptionsFromFile( optionsFileName, env.nativeHandle_, dbOptions.nativeHandle_, cfDescs, ignoreUnknownOptions); } /** * Similar to LoadLatestOptions, this function constructs the DBOptions * and ColumnFamilyDescriptors based on the specified RocksDB Options file. * See LoadLatestOptions above. * * @param optionsFileName the RocksDB options file path. * @param configOptions {@link org.rocksdb.ConfigOptions} instance. * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be * filled and returned. * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be * returned. * @throws RocksDBException thrown if error happens in underlying * native library. */ public static void loadOptionsFromFile(ConfigOptions configOptions, String optionsFileName, DBOptions dbOptions, List cfDescs) throws RocksDBException { loadOptionsFromFile( configOptions.nativeHandle_, optionsFileName, dbOptions.nativeHandle_, cfDescs); } /** * Returns the latest options file name under the specified RocksDB path. * * @param dbPath the path to the RocksDB. * @param env {@link org.rocksdb.Env} instance. * @return the latest options file name under the db path. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static String getLatestOptionsFileName(String dbPath, Env env) throws RocksDBException { return getLatestOptionsFileName(dbPath, env.nativeHandle_); } /** * Private constructor. * This class has only static methods and shouldn't be instantiated. */ private OptionsUtil() {} // native methods private native static void loadLatestOptions(String dbPath, long envHandle, long dbOptionsHandle, List cfDescs, boolean ignoreUnknownOptions) throws RocksDBException; private native static void loadLatestOptions(long cfgHandle, String dbPath, long dbOptionsHandle, List cfDescs) throws RocksDBException; private native static void loadOptionsFromFile(String optionsFileName, long envHandle, long dbOptionsHandle, List cfDescs, boolean ignoreUnknownOptions) throws RocksDBException; private native static void loadOptionsFromFile(long cfgHandle, String optionsFileName, long dbOptionsHandle, List cfDescs) throws RocksDBException; private native static String getLatestOptionsFileName(String dbPath, long envHandle) throws RocksDBException; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/PersistentCache.java000066400000000000000000000020041370372246700247630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Persistent cache for caching IO pages on a persistent medium. The * cache is specifically designed for persistent read cache. */ public class PersistentCache extends RocksObject { public PersistentCache(final Env env, final String path, final long size, final Logger logger, final boolean optimizedForNvm) throws RocksDBException { super(newPersistentCache(env.nativeHandle_, path, size, logger.nativeHandle_, optimizedForNvm)); } private native static long newPersistentCache(final long envHandle, final String path, final long size, final long loggerHandle, final boolean optimizedForNvm) throws RocksDBException; @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/PlainTableConfig.java000066400000000000000000000166531370372246700250570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The config for plain table sst format. * *

PlainTable is a RocksDB's SST file format optimized for low query * latency on pure-memory or really low-latency media.

* *

It also support prefix hash feature.

*/ public class PlainTableConfig extends TableFormatConfig { public static final int VARIABLE_LENGTH = 0; public static final int DEFAULT_BLOOM_BITS_PER_KEY = 10; public static final double DEFAULT_HASH_TABLE_RATIO = 0.75; public static final int DEFAULT_INDEX_SPARSENESS = 16; public static final int DEFAULT_HUGE_TLB_SIZE = 0; public static final EncodingType DEFAULT_ENCODING_TYPE = EncodingType.kPlain; public static final boolean DEFAULT_FULL_SCAN_MODE = false; public static final boolean DEFAULT_STORE_INDEX_IN_FILE = false; public PlainTableConfig() { keySize_ = VARIABLE_LENGTH; bloomBitsPerKey_ = DEFAULT_BLOOM_BITS_PER_KEY; hashTableRatio_ = DEFAULT_HASH_TABLE_RATIO; indexSparseness_ = DEFAULT_INDEX_SPARSENESS; hugePageTlbSize_ = DEFAULT_HUGE_TLB_SIZE; encodingType_ = DEFAULT_ENCODING_TYPE; fullScanMode_ = DEFAULT_FULL_SCAN_MODE; storeIndexInFile_ = DEFAULT_STORE_INDEX_IN_FILE; } /** *

Set the length of the user key. If it is set to be * VARIABLE_LENGTH, then it indicates the user keys are * of variable length.

* *

Otherwise,all the keys need to have the same length * in byte.

* *

DEFAULT: VARIABLE_LENGTH

* * @param keySize the length of the user key. * @return the reference to the current config. */ public PlainTableConfig setKeySize(int keySize) { keySize_ = keySize; return this; } /** * @return the specified size of the user key. If VARIABLE_LENGTH, * then it indicates variable-length key. */ public int keySize() { return keySize_; } /** * Set the number of bits per key used by the internal bloom filter * in the plain table sst format. * * @param bitsPerKey the number of bits per key for bloom filer. * @return the reference to the current config. */ public PlainTableConfig setBloomBitsPerKey(int bitsPerKey) { bloomBitsPerKey_ = bitsPerKey; return this; } /** * @return the number of bits per key used for the bloom filter. */ public int bloomBitsPerKey() { return bloomBitsPerKey_; } /** * hashTableRatio is the desired utilization of the hash table used * for prefix hashing. The ideal ratio would be the number of * prefixes / the number of hash buckets. If this value is set to * zero, then hash table will not be used. * * @param ratio the hash table ratio. * @return the reference to the current config. */ public PlainTableConfig setHashTableRatio(double ratio) { hashTableRatio_ = ratio; return this; } /** * @return the hash table ratio. */ public double hashTableRatio() { return hashTableRatio_; } /** * Index sparseness determines the index interval for keys inside the * same prefix. This number is equal to the maximum number of linear * search required after hash and binary search. If it's set to 0, * then each key will be indexed. * * @param sparseness the index sparseness. * @return the reference to the current config. */ public PlainTableConfig setIndexSparseness(int sparseness) { indexSparseness_ = sparseness; return this; } /** * @return the index sparseness. */ public long indexSparseness() { return indexSparseness_; } /** *

huge_page_tlb_size: if ≤0, allocate hash indexes and blooms * from malloc otherwise from huge page TLB.

* *

The user needs to reserve huge pages for it to be allocated, * like: {@code sysctl -w vm.nr_hugepages=20}

* *

See linux doc Documentation/vm/hugetlbpage.txt

* * @param hugePageTlbSize huge page tlb size * @return the reference to the current config. */ public PlainTableConfig setHugePageTlbSize(int hugePageTlbSize) { this.hugePageTlbSize_ = hugePageTlbSize; return this; } /** * Returns the value for huge page tlb size * * @return hugePageTlbSize */ public int hugePageTlbSize() { return hugePageTlbSize_; } /** * Sets the encoding type. * *

This setting determines how to encode * the keys. See enum {@link EncodingType} for * the choices.

* *

The value will determine how to encode keys * when writing to a new SST file. This value will be stored * inside the SST file which will be used when reading from * the file, which makes it possible for users to choose * different encoding type when reopening a DB. Files with * different encoding types can co-exist in the same DB and * can be read.

* * @param encodingType {@link org.rocksdb.EncodingType} value. * @return the reference to the current config. */ public PlainTableConfig setEncodingType(EncodingType encodingType) { this.encodingType_ = encodingType; return this; } /** * Returns the active EncodingType * * @return currently set encoding type */ public EncodingType encodingType() { return encodingType_; } /** * Set full scan mode, if true the whole file will be read * one record by one without using the index. * * @param fullScanMode boolean value indicating if full * scan mode shall be enabled. * @return the reference to the current config. */ public PlainTableConfig setFullScanMode(boolean fullScanMode) { this.fullScanMode_ = fullScanMode; return this; } /** * Return if full scan mode is active * @return boolean value indicating if the full scan mode is * enabled. */ public boolean fullScanMode() { return fullScanMode_; } /** *

If set to true: compute plain table index and bloom * filter during file building and store it in file. * When reading file, index will be mmaped instead * of doing recomputation.

* * @param storeIndexInFile value indicating if index shall * be stored in a file * @return the reference to the current config. */ public PlainTableConfig setStoreIndexInFile(boolean storeIndexInFile) { this.storeIndexInFile_ = storeIndexInFile; return this; } /** * Return a boolean value indicating if index shall be stored * in a file. * * @return currently set value for store index in file. */ public boolean storeIndexInFile() { return storeIndexInFile_; } @Override protected long newTableFactoryHandle() { return newTableFactoryHandle(keySize_, bloomBitsPerKey_, hashTableRatio_, indexSparseness_, hugePageTlbSize_, encodingType_.getValue(), fullScanMode_, storeIndexInFile_); } private native long newTableFactoryHandle( int keySize, int bloomBitsPerKey, double hashTableRatio, int indexSparseness, int hugePageTlbSize, byte encodingType, boolean fullScanMode, boolean storeIndexInFile); private int keySize_; private int bloomBitsPerKey_; private double hashTableRatio_; private int indexSparseness_; private int hugePageTlbSize_; private EncodingType encodingType_; private boolean fullScanMode_; private boolean storeIndexInFile_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Priority.java000066400000000000000000000022451370372246700235270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The Thread Pool priority. */ public enum Priority { BOTTOM((byte) 0x0), LOW((byte) 0x1), HIGH((byte)0x2), TOTAL((byte)0x3); private final byte value; Priority(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ byte getValue() { return value; } /** * Get Priority by byte value. * * @param value byte representation of Priority. * * @return {@link org.rocksdb.Priority} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ static Priority getPriority(final byte value) { for (final Priority priority : Priority.values()) { if (priority.getValue() == value){ return priority; } } throw new IllegalArgumentException("Illegal value provided for Priority."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Range.java000066400000000000000000000007571370372246700227500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Range from start to limit. */ public class Range { final Slice start; final Slice limit; public Range(final Slice start, final Slice limit) { this.start = start; this.limit = limit; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RateLimiter.java000066400000000000000000000227631370372246700241360ustar00rootroot00000000000000// Copyright (c) 2015, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * RateLimiter, which is used to control write rate of flush and * compaction. * * @since 3.10.0 */ public class RateLimiter extends RocksObject { public static final long DEFAULT_REFILL_PERIOD_MICROS = 100 * 1000; public static final int DEFAULT_FAIRNESS = 10; public static final RateLimiterMode DEFAULT_MODE = RateLimiterMode.WRITES_ONLY; public static final boolean DEFAULT_AUTOTUNE = false; /** * RateLimiter constructor * * @param rateBytesPerSecond this is the only parameter you want to set * most of the time. It controls the total write rate of compaction * and flush in bytes per second. Currently, RocksDB does not enforce * rate limit for anything other than flush and compaction, e.g. write to * WAL. */ public RateLimiter(final long rateBytesPerSecond) { this(rateBytesPerSecond, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE); } /** * RateLimiter constructor * * @param rateBytesPerSecond this is the only parameter you want to set * most of the time. It controls the total write rate of compaction * and flush in bytes per second. Currently, RocksDB does not enforce * rate limit for anything other than flush and compaction, e.g. write to * WAL. * @param refillPeriodMicros this controls how often tokens are refilled. For * example, * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to * 100ms, then 1MB is refilled every 100ms internally. Larger value can * lead to burstier writes while smaller value introduces more CPU * overhead. The default of 100,000ms should work for most cases. */ public RateLimiter(final long rateBytesPerSecond, final long refillPeriodMicros) { this(rateBytesPerSecond, refillPeriodMicros, DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE); } /** * RateLimiter constructor * * @param rateBytesPerSecond this is the only parameter you want to set * most of the time. It controls the total write rate of compaction * and flush in bytes per second. Currently, RocksDB does not enforce * rate limit for anything other than flush and compaction, e.g. write to * WAL. * @param refillPeriodMicros this controls how often tokens are refilled. For * example, * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to * 100ms, then 1MB is refilled every 100ms internally. Larger value can * lead to burstier writes while smaller value introduces more CPU * overhead. The default of 100,000ms should work for most cases. * @param fairness RateLimiter accepts high-pri requests and low-pri requests. * A low-pri request is usually blocked in favor of hi-pri request. * Currently, RocksDB assigns low-pri to request from compaction and * high-pri to request from flush. Low-pri requests can get blocked if * flush requests come in continuously. This fairness parameter grants * low-pri requests permission by fairness chance even though high-pri * requests exist to avoid starvation. * You should be good by leaving it at default 10. */ public RateLimiter(final long rateBytesPerSecond, final long refillPeriodMicros, final int fairness) { this(rateBytesPerSecond, refillPeriodMicros, fairness, DEFAULT_MODE, DEFAULT_AUTOTUNE); } /** * RateLimiter constructor * * @param rateBytesPerSecond this is the only parameter you want to set * most of the time. It controls the total write rate of compaction * and flush in bytes per second. Currently, RocksDB does not enforce * rate limit for anything other than flush and compaction, e.g. write to * WAL. * @param refillPeriodMicros this controls how often tokens are refilled. For * example, * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to * 100ms, then 1MB is refilled every 100ms internally. Larger value can * lead to burstier writes while smaller value introduces more CPU * overhead. The default of 100,000ms should work for most cases. * @param fairness RateLimiter accepts high-pri requests and low-pri requests. * A low-pri request is usually blocked in favor of hi-pri request. * Currently, RocksDB assigns low-pri to request from compaction and * high-pri to request from flush. Low-pri requests can get blocked if * flush requests come in continuously. This fairness parameter grants * low-pri requests permission by fairness chance even though high-pri * requests exist to avoid starvation. * You should be good by leaving it at default 10. * @param rateLimiterMode indicates which types of operations count against * the limit. */ public RateLimiter(final long rateBytesPerSecond, final long refillPeriodMicros, final int fairness, final RateLimiterMode rateLimiterMode) { this(rateBytesPerSecond, refillPeriodMicros, fairness, rateLimiterMode, DEFAULT_AUTOTUNE); } /** * RateLimiter constructor * * @param rateBytesPerSecond this is the only parameter you want to set * most of the time. It controls the total write rate of compaction * and flush in bytes per second. Currently, RocksDB does not enforce * rate limit for anything other than flush and compaction, e.g. write to * WAL. * @param refillPeriodMicros this controls how often tokens are refilled. For * example, * when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to * 100ms, then 1MB is refilled every 100ms internally. Larger value can * lead to burstier writes while smaller value introduces more CPU * overhead. The default of 100,000ms should work for most cases. * @param fairness RateLimiter accepts high-pri requests and low-pri requests. * A low-pri request is usually blocked in favor of hi-pri request. * Currently, RocksDB assigns low-pri to request from compaction and * high-pri to request from flush. Low-pri requests can get blocked if * flush requests come in continuously. This fairness parameter grants * low-pri requests permission by fairness chance even though high-pri * requests exist to avoid starvation. * You should be good by leaving it at default 10. * @param rateLimiterMode indicates which types of operations count against * the limit. * @param autoTune Enables dynamic adjustment of rate limit within the range * {@code [rate_bytes_per_sec / 20, rate_bytes_per_sec]}, according to * the recent demand for background I/O. */ public RateLimiter(final long rateBytesPerSecond, final long refillPeriodMicros, final int fairness, final RateLimiterMode rateLimiterMode, final boolean autoTune) { super(newRateLimiterHandle(rateBytesPerSecond, refillPeriodMicros, fairness, rateLimiterMode.getValue(), autoTune)); } /** *

This API allows user to dynamically change rate limiter's bytes per second. * REQUIRED: bytes_per_second > 0

* * @param bytesPerSecond bytes per second. */ public void setBytesPerSecond(final long bytesPerSecond) { assert(isOwningHandle()); setBytesPerSecond(nativeHandle_, bytesPerSecond); } /** * Returns the bytes per second. * * @return bytes per second. */ public long getBytesPerSecond() { assert(isOwningHandle()); return getBytesPerSecond(nativeHandle_); } /** *

Request for token to write bytes. If this request can not be satisfied, * the call is blocked. Caller is responsible to make sure * {@code bytes < GetSingleBurstBytes()}.

* * @param bytes requested bytes. */ public void request(final long bytes) { assert(isOwningHandle()); request(nativeHandle_, bytes); } /** *

Max bytes can be granted in a single burst.

* * @return max bytes can be granted in a single burst. */ public long getSingleBurstBytes() { assert(isOwningHandle()); return getSingleBurstBytes(nativeHandle_); } /** *

Total bytes that go through rate limiter.

* * @return total bytes that go through rate limiter. */ public long getTotalBytesThrough() { assert(isOwningHandle()); return getTotalBytesThrough(nativeHandle_); } /** *

Total # of requests that go through rate limiter.

* * @return total # of requests that go through rate limiter. */ public long getTotalRequests() { assert(isOwningHandle()); return getTotalRequests(nativeHandle_); } private static native long newRateLimiterHandle(final long rateBytesPerSecond, final long refillPeriodMicros, final int fairness, final byte rateLimiterMode, final boolean autoTune); @Override protected final native void disposeInternal(final long handle); private native void setBytesPerSecond(final long handle, final long bytesPerSecond); private native long getBytesPerSecond(final long handle); private native void request(final long handle, final long bytes); private native long getSingleBurstBytes(final long handle); private native long getTotalBytesThrough(final long handle); private native long getTotalRequests(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RateLimiterMode.java000066400000000000000000000026071370372246700247360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Mode for {@link RateLimiter#RateLimiter(long, long, int, RateLimiterMode)}. */ public enum RateLimiterMode { READS_ONLY((byte)0x0), WRITES_ONLY((byte)0x1), ALL_IO((byte)0x2); private final byte value; RateLimiterMode(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ public byte getValue() { return value; } /** *

Get the RateLimiterMode enumeration value by * passing the byte identifier to this method.

* * @param byteIdentifier of RateLimiterMode. * * @return AccessHint instance. * * @throws IllegalArgumentException if the access hint for the byteIdentifier * cannot be found */ public static RateLimiterMode getRateLimiterMode(final byte byteIdentifier) { for (final RateLimiterMode rateLimiterMode : RateLimiterMode.values()) { if (rateLimiterMode.getValue() == byteIdentifier) { return rateLimiterMode; } } throw new IllegalArgumentException( "Illegal value provided for RateLimiterMode."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ReadOptions.java000066400000000000000000000526071370372246700241440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The class that controls the get behavior. * * Note that dispose() must be called before an Options instance * become out-of-scope to release the allocated memory in c++. */ public class ReadOptions extends RocksObject { public ReadOptions() { super(newReadOptions()); } /** * @param verifyChecksums verification will be performed on every read * when set to true * @param fillCache if true, then fill-cache behavior will be performed. */ public ReadOptions(final boolean verifyChecksums, final boolean fillCache) { super(newReadOptions(verifyChecksums, fillCache)); } /** * Copy constructor. * * NOTE: This does a shallow copy, which means snapshot, iterate_upper_bound * and other pointers will be cloned! * * @param other The ReadOptions to copy. */ public ReadOptions(ReadOptions other) { super(copyReadOptions(other.nativeHandle_)); this.iterateLowerBoundSlice_ = other.iterateLowerBoundSlice_; this.iterateUpperBoundSlice_ = other.iterateUpperBoundSlice_; } /** * If true, all data read from underlying storage will be * verified against corresponding checksums. * Default: true * * @return true if checksum verification is on. */ public boolean verifyChecksums() { assert(isOwningHandle()); return verifyChecksums(nativeHandle_); } /** * If true, all data read from underlying storage will be * verified against corresponding checksums. * Default: true * * @param verifyChecksums if true, then checksum verification * will be performed on every read. * @return the reference to the current ReadOptions. */ public ReadOptions setVerifyChecksums( final boolean verifyChecksums) { assert(isOwningHandle()); setVerifyChecksums(nativeHandle_, verifyChecksums); return this; } // TODO(yhchiang): this option seems to be block-based table only. // move this to a better place? /** * Fill the cache when loading the block-based sst formated db. * Callers may wish to set this field to false for bulk scans. * Default: true * * @return true if the fill-cache behavior is on. */ public boolean fillCache() { assert(isOwningHandle()); return fillCache(nativeHandle_); } /** * Fill the cache when loading the block-based sst formatted db. * Callers may wish to set this field to false for bulk scans. * Default: true * * @param fillCache if true, then fill-cache behavior will be * performed. * @return the reference to the current ReadOptions. */ public ReadOptions setFillCache(final boolean fillCache) { assert(isOwningHandle()); setFillCache(nativeHandle_, fillCache); return this; } /** * Returns the currently assigned Snapshot instance. * * @return the Snapshot assigned to this instance. If no Snapshot * is assigned null. */ public Snapshot snapshot() { assert(isOwningHandle()); long snapshotHandle = snapshot(nativeHandle_); if (snapshotHandle != 0) { return new Snapshot(snapshotHandle); } return null; } /** *

If "snapshot" is non-nullptr, read as of the supplied snapshot * (which must belong to the DB that is being read and which must * not have been released). If "snapshot" is nullptr, use an implicit * snapshot of the state at the beginning of this read operation.

*

Default: null

* * @param snapshot {@link Snapshot} instance * @return the reference to the current ReadOptions. */ public ReadOptions setSnapshot(final Snapshot snapshot) { assert(isOwningHandle()); if (snapshot != null) { setSnapshot(nativeHandle_, snapshot.nativeHandle_); } else { setSnapshot(nativeHandle_, 0l); } return this; } /** * Returns the current read tier. * * @return the read tier in use, by default {@link ReadTier#READ_ALL_TIER} */ public ReadTier readTier() { assert(isOwningHandle()); return ReadTier.getReadTier(readTier(nativeHandle_)); } /** * Specify if this read request should process data that ALREADY * resides on a particular cache. If the required data is not * found at the specified cache, then {@link RocksDBException} is thrown. * * @param readTier {@link ReadTier} instance * @return the reference to the current ReadOptions. */ public ReadOptions setReadTier(final ReadTier readTier) { assert(isOwningHandle()); setReadTier(nativeHandle_, readTier.getValue()); return this; } /** * Specify to create a tailing iterator -- a special iterator that has a * view of the complete database (i.e. it can also be used to read newly * added data) and is optimized for sequential reads. It will return records * that were inserted into the database after the creation of the iterator. * Default: false * * Not supported in {@code ROCKSDB_LITE} mode! * * @return true if tailing iterator is enabled. */ public boolean tailing() { assert(isOwningHandle()); return tailing(nativeHandle_); } /** * Specify to create a tailing iterator -- a special iterator that has a * view of the complete database (i.e. it can also be used to read newly * added data) and is optimized for sequential reads. It will return records * that were inserted into the database after the creation of the iterator. * Default: false * Not supported in ROCKSDB_LITE mode! * * @param tailing if true, then tailing iterator will be enabled. * @return the reference to the current ReadOptions. */ public ReadOptions setTailing(final boolean tailing) { assert(isOwningHandle()); setTailing(nativeHandle_, tailing); return this; } /** * Returns whether managed iterators will be used. * * @return the setting of whether managed iterators will be used, * by default false * * @deprecated This options is not used anymore. */ @Deprecated public boolean managed() { assert(isOwningHandle()); return managed(nativeHandle_); } /** * Specify to create a managed iterator -- a special iterator that * uses less resources by having the ability to free its underlying * resources on request. * * @param managed if true, then managed iterators will be enabled. * @return the reference to the current ReadOptions. * * @deprecated This options is not used anymore. */ @Deprecated public ReadOptions setManaged(final boolean managed) { assert(isOwningHandle()); setManaged(nativeHandle_, managed); return this; } /** * Returns whether a total seek order will be used * * @return the setting of whether a total seek order will be used */ public boolean totalOrderSeek() { assert(isOwningHandle()); return totalOrderSeek(nativeHandle_); } /** * Enable a total order seek regardless of index format (e.g. hash index) * used in the table. Some table format (e.g. plain table) may not support * this option. * * @param totalOrderSeek if true, then total order seek will be enabled. * @return the reference to the current ReadOptions. */ public ReadOptions setTotalOrderSeek(final boolean totalOrderSeek) { assert(isOwningHandle()); setTotalOrderSeek(nativeHandle_, totalOrderSeek); return this; } /** * Returns whether the iterator only iterates over the same prefix as the seek * * @return the setting of whether the iterator only iterates over the same * prefix as the seek, default is false */ public boolean prefixSameAsStart() { assert(isOwningHandle()); return prefixSameAsStart(nativeHandle_); } /** * Enforce that the iterator only iterates over the same prefix as the seek. * This option is effective only for prefix seeks, i.e. prefix_extractor is * non-null for the column family and {@link #totalOrderSeek()} is false. * Unlike iterate_upper_bound, {@link #setPrefixSameAsStart(boolean)} only * works within a prefix but in both directions. * * @param prefixSameAsStart if true, then the iterator only iterates over the * same prefix as the seek * @return the reference to the current ReadOptions. */ public ReadOptions setPrefixSameAsStart(final boolean prefixSameAsStart) { assert(isOwningHandle()); setPrefixSameAsStart(nativeHandle_, prefixSameAsStart); return this; } /** * Returns whether the blocks loaded by the iterator will be pinned in memory * * @return the setting of whether the blocks loaded by the iterator will be * pinned in memory */ public boolean pinData() { assert(isOwningHandle()); return pinData(nativeHandle_); } /** * Keep the blocks loaded by the iterator pinned in memory as long as the * iterator is not deleted, If used when reading from tables created with * BlockBasedTableOptions::use_delta_encoding = false, * Iterator's property "rocksdb.iterator.is-key-pinned" is guaranteed to * return 1. * * @param pinData if true, the blocks loaded by the iterator will be pinned * @return the reference to the current ReadOptions. */ public ReadOptions setPinData(final boolean pinData) { assert(isOwningHandle()); setPinData(nativeHandle_, pinData); return this; } /** * If true, when PurgeObsoleteFile is called in CleanupIteratorState, we * schedule a background job in the flush job queue and delete obsolete files * in background. * * Default: false * * @return true when PurgeObsoleteFile is called in CleanupIteratorState */ public boolean backgroundPurgeOnIteratorCleanup() { assert(isOwningHandle()); return backgroundPurgeOnIteratorCleanup(nativeHandle_); } /** * If true, when PurgeObsoleteFile is called in CleanupIteratorState, we * schedule a background job in the flush job queue and delete obsolete files * in background. * * Default: false * * @param backgroundPurgeOnIteratorCleanup true when PurgeObsoleteFile is * called in CleanupIteratorState * @return the reference to the current ReadOptions. */ public ReadOptions setBackgroundPurgeOnIteratorCleanup( final boolean backgroundPurgeOnIteratorCleanup) { assert(isOwningHandle()); setBackgroundPurgeOnIteratorCleanup(nativeHandle_, backgroundPurgeOnIteratorCleanup); return this; } /** * If non-zero, NewIterator will create a new table reader which * performs reads of the given size. Using a large size (> 2MB) can * improve the performance of forward iteration on spinning disks. * * Default: 0 * * @return The readahead size is bytes */ public long readaheadSize() { assert(isOwningHandle()); return readaheadSize(nativeHandle_); } /** * If non-zero, NewIterator will create a new table reader which * performs reads of the given size. Using a large size (> 2MB) can * improve the performance of forward iteration on spinning disks. * * Default: 0 * * @param readaheadSize The readahead size is bytes * @return the reference to the current ReadOptions. */ public ReadOptions setReadaheadSize(final long readaheadSize) { assert(isOwningHandle()); setReadaheadSize(nativeHandle_, readaheadSize); return this; } /** * A threshold for the number of keys that can be skipped before failing an * iterator seek as incomplete. * * @return the number of keys that can be skipped * before failing an iterator seek as incomplete. */ public long maxSkippableInternalKeys() { assert(isOwningHandle()); return maxSkippableInternalKeys(nativeHandle_); } /** * A threshold for the number of keys that can be skipped before failing an * iterator seek as incomplete. The default value of 0 should be used to * never fail a request as incomplete, even on skipping too many keys. * * Default: 0 * * @param maxSkippableInternalKeys the number of keys that can be skipped * before failing an iterator seek as incomplete. * * @return the reference to the current ReadOptions. */ public ReadOptions setMaxSkippableInternalKeys( final long maxSkippableInternalKeys) { assert(isOwningHandle()); setMaxSkippableInternalKeys(nativeHandle_, maxSkippableInternalKeys); return this; } /** * If true, keys deleted using the DeleteRange() API will be visible to * readers until they are naturally deleted during compaction. This improves * read performance in DBs with many range deletions. * * Default: false * * @return true if keys deleted using the DeleteRange() API will be visible */ public boolean ignoreRangeDeletions() { assert(isOwningHandle()); return ignoreRangeDeletions(nativeHandle_); } /** * If true, keys deleted using the DeleteRange() API will be visible to * readers until they are naturally deleted during compaction. This improves * read performance in DBs with many range deletions. * * Default: false * * @param ignoreRangeDeletions true if keys deleted using the DeleteRange() * API should be visible * @return the reference to the current ReadOptions. */ public ReadOptions setIgnoreRangeDeletions(final boolean ignoreRangeDeletions) { assert(isOwningHandle()); setIgnoreRangeDeletions(nativeHandle_, ignoreRangeDeletions); return this; } /** * Defines the smallest key at which the backward * iterator can return an entry. Once the bound is passed, * {@link RocksIterator#isValid()} will be false. * * The lower bound is inclusive i.e. the bound value is a valid * entry. * * If prefix_extractor is not null, the Seek target and `iterate_lower_bound` * need to have the same prefix. This is because ordering is not guaranteed * outside of prefix domain. * * Default: null * * @param iterateLowerBound Slice representing the upper bound * @return the reference to the current ReadOptions. */ public ReadOptions setIterateLowerBound(final Slice iterateLowerBound) { assert(isOwningHandle()); if (iterateLowerBound != null) { // Hold onto a reference so it doesn't get garbage collected out from under us. iterateLowerBoundSlice_ = iterateLowerBound; setIterateLowerBound(nativeHandle_, iterateLowerBoundSlice_.getNativeHandle()); } return this; } /** * Returns the smallest key at which the backward * iterator can return an entry. * * The lower bound is inclusive i.e. the bound value is a valid entry. * * @return the smallest key, or null if there is no lower bound defined. */ public Slice iterateLowerBound() { assert(isOwningHandle()); final long lowerBoundSliceHandle = iterateLowerBound(nativeHandle_); if (lowerBoundSliceHandle != 0) { // Disown the new slice - it's owned by the C++ side of the JNI boundary // from the perspective of this method. return new Slice(lowerBoundSliceHandle, false); } return null; } /** * Defines the extent up to which the forward iterator * can returns entries. Once the bound is reached, * {@link RocksIterator#isValid()} will be false. * * The upper bound is exclusive i.e. the bound value is not a valid entry. * * If prefix_extractor is not null, the Seek target and iterate_upper_bound * need to have the same prefix. This is because ordering is not guaranteed * outside of prefix domain. * * Default: null * * @param iterateUpperBound Slice representing the upper bound * @return the reference to the current ReadOptions. */ public ReadOptions setIterateUpperBound(final Slice iterateUpperBound) { assert(isOwningHandle()); if (iterateUpperBound != null) { // Hold onto a reference so it doesn't get garbage collected out from under us. iterateUpperBoundSlice_ = iterateUpperBound; setIterateUpperBound(nativeHandle_, iterateUpperBoundSlice_.getNativeHandle()); } return this; } /** * Returns the largest key at which the forward * iterator can return an entry. * * The upper bound is exclusive i.e. the bound value is not a valid entry. * * @return the largest key, or null if there is no upper bound defined. */ public Slice iterateUpperBound() { assert(isOwningHandle()); final long upperBoundSliceHandle = iterateUpperBound(nativeHandle_); if (upperBoundSliceHandle != 0) { // Disown the new slice - it's owned by the C++ side of the JNI boundary // from the perspective of this method. return new Slice(upperBoundSliceHandle, false); } return null; } /** * A callback to determine whether relevant keys for this scan exist in a * given table based on the table's properties. The callback is passed the * properties of each table during iteration. If the callback returns false, * the table will not be scanned. This option only affects Iterators and has * no impact on point lookups. * * Default: null (every table will be scanned) * * @param tableFilter the table filter for the callback. * * @return the reference to the current ReadOptions. */ public ReadOptions setTableFilter(final AbstractTableFilter tableFilter) { assert(isOwningHandle()); setTableFilter(nativeHandle_, tableFilter.nativeHandle_); return this; } /** * Needed to support differential snapshots. Has 2 effects: * 1) Iterator will skip all internal keys with seqnum < iter_start_seqnum * 2) if this param > 0 iterator will return INTERNAL keys instead of user * keys; e.g. return tombstones as well. * * Default: 0 (don't filter by seqnum, return user keys) * * @param startSeqnum the starting sequence number. * * @return the reference to the current ReadOptions. */ public ReadOptions setIterStartSeqnum(final long startSeqnum) { assert(isOwningHandle()); setIterStartSeqnum(nativeHandle_, startSeqnum); return this; } /** * Returns the starting Sequence Number of any iterator. * See {@link #setIterStartSeqnum(long)}. * * @return the starting sequence number of any iterator. */ public long iterStartSeqnum() { assert(isOwningHandle()); return iterStartSeqnum(nativeHandle_); } // instance variables // NOTE: If you add new member variables, please update the copy constructor above! // // Hold a reference to any iterate lower or upper bound that was set on this // object until we're destroyed or it's overwritten. That way the caller can // freely leave scope without us losing the Java Slice object, which during // close() would also reap its associated rocksdb::Slice native object since // it's possibly (likely) to be an owning handle. private Slice iterateLowerBoundSlice_; private Slice iterateUpperBoundSlice_; private native static long newReadOptions(); private native static long newReadOptions(final boolean verifyChecksums, final boolean fillCache); private native static long copyReadOptions(long handle); @Override protected final native void disposeInternal(final long handle); private native boolean verifyChecksums(long handle); private native void setVerifyChecksums(long handle, boolean verifyChecksums); private native boolean fillCache(long handle); private native void setFillCache(long handle, boolean fillCache); private native long snapshot(long handle); private native void setSnapshot(long handle, long snapshotHandle); private native byte readTier(long handle); private native void setReadTier(long handle, byte readTierValue); private native boolean tailing(long handle); private native void setTailing(long handle, boolean tailing); private native boolean managed(long handle); private native void setManaged(long handle, boolean managed); private native boolean totalOrderSeek(long handle); private native void setTotalOrderSeek(long handle, boolean totalOrderSeek); private native boolean prefixSameAsStart(long handle); private native void setPrefixSameAsStart(long handle, boolean prefixSameAsStart); private native boolean pinData(long handle); private native void setPinData(long handle, boolean pinData); private native boolean backgroundPurgeOnIteratorCleanup(final long handle); private native void setBackgroundPurgeOnIteratorCleanup(final long handle, final boolean backgroundPurgeOnIteratorCleanup); private native long readaheadSize(final long handle); private native void setReadaheadSize(final long handle, final long readaheadSize); private native long maxSkippableInternalKeys(final long handle); private native void setMaxSkippableInternalKeys(final long handle, final long maxSkippableInternalKeys); private native boolean ignoreRangeDeletions(final long handle); private native void setIgnoreRangeDeletions(final long handle, final boolean ignoreRangeDeletions); private native void setIterateUpperBound(final long handle, final long upperBoundSliceHandle); private native long iterateUpperBound(final long handle); private native void setIterateLowerBound(final long handle, final long lowerBoundSliceHandle); private native long iterateLowerBound(final long handle); private native void setTableFilter(final long handle, final long tableFilterHandle); private native void setIterStartSeqnum(final long handle, final long seqNum); private native long iterStartSeqnum(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ReadTier.java000066400000000000000000000023351370372246700234050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * RocksDB {@link ReadOptions} read tiers. */ public enum ReadTier { READ_ALL_TIER((byte)0), BLOCK_CACHE_TIER((byte)1), PERSISTED_TIER((byte)2), MEMTABLE_TIER((byte)3); private final byte value; ReadTier(final byte value) { this.value = value; } /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value; } /** * Get ReadTier by byte value. * * @param value byte representation of ReadTier. * * @return {@link org.rocksdb.ReadTier} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static ReadTier getReadTier(final byte value) { for (final ReadTier readTier : ReadTier.values()) { if (readTier.getValue() == value){ return readTier; } } throw new IllegalArgumentException("Illegal value provided for ReadTier."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java000066400000000000000000000012051370372246700303350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Just a Java wrapper around EmptyValueCompactionFilter implemented in C++ */ public class RemoveEmptyValueCompactionFilter extends AbstractCompactionFilter { public RemoveEmptyValueCompactionFilter() { super(createNewRemoveEmptyValueCompactionFilter0()); } private native static long createNewRemoveEmptyValueCompactionFilter0(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RestoreOptions.java000066400000000000000000000021611370372246700247020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * RestoreOptions to control the behavior of restore. * * Note that dispose() must be called before this instance become out-of-scope * to release the allocated memory in c++. * */ public class RestoreOptions extends RocksObject { /** * Constructor * * @param keepLogFiles If true, restore won't overwrite the existing log files * in wal_dir. It will also move all log files from archive directory to * wal_dir. Use this option in combination with * BackupableDBOptions::backup_log_files = false for persisting in-memory * databases. * Default: false */ public RestoreOptions(final boolean keepLogFiles) { super(newRestoreOptions(keepLogFiles)); } private native static long newRestoreOptions(boolean keepLogFiles); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ReusedSynchronisationType.java000066400000000000000000000034351370372246700271140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Determines the type of synchronisation primitive used * in native code. */ public enum ReusedSynchronisationType { /** * Standard mutex. */ MUTEX((byte)0x0), /** * Use adaptive mutex, which spins in the user space before resorting * to kernel. This could reduce context switch when the mutex is not * heavily contended. However, if the mutex is hot, we could end up * wasting spin time. */ ADAPTIVE_MUTEX((byte)0x1), /** * There is a reused buffer per-thread. */ THREAD_LOCAL((byte)0x2); private final byte value; ReusedSynchronisationType(final byte value) { this.value = value; } /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value; } /** * Get ReusedSynchronisationType by byte value. * * @param value byte representation of ReusedSynchronisationType. * * @return {@link org.rocksdb.ReusedSynchronisationType} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static ReusedSynchronisationType getReusedSynchronisationType( final byte value) { for (final ReusedSynchronisationType reusedSynchronisationType : ReusedSynchronisationType.values()) { if (reusedSynchronisationType.getValue() == value) { return reusedSynchronisationType; } } throw new IllegalArgumentException( "Illegal value provided for ReusedSynchronisationType."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksCallbackObject.java000066400000000000000000000032211370372246700255260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * RocksCallbackObject is similar to {@link RocksObject} but varies * in its construction as it is designed for Java objects which have functions * which are called from C++ via JNI. * * RocksCallbackObject is the base-class any RocksDB classes that acts as a * callback from some underlying underlying native C++ {@code rocksdb} object. * * The use of {@code RocksObject} should always be preferred over * {@link RocksCallbackObject} if callbacks are not required. */ public abstract class RocksCallbackObject extends AbstractImmutableNativeReference { protected final long nativeHandle_; protected RocksCallbackObject(final long... nativeParameterHandles) { super(true); this.nativeHandle_ = initializeNative(nativeParameterHandles); } /** * Construct the Native C++ object which will callback * to our object methods * * @param nativeParameterHandles An array of native handles for any parameter * objects that are needed during construction * * @return The native handle of the C++ object which will callback to us */ protected abstract long initializeNative( final long... nativeParameterHandles); /** * Deletes underlying C++ native callback object pointer */ @Override protected void disposeInternal() { disposeInternal(nativeHandle_); } private native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksDB.java000066400000000000000000005420701370372246700232020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import org.rocksdb.util.Environment; /** * A RocksDB is a persistent ordered map from keys to values. It is safe for * concurrent access from multiple threads without any external synchronization. * All methods of this class could potentially throw RocksDBException, which * indicates sth wrong at the RocksDB library side and the call failed. */ public class RocksDB extends RocksObject { public static final byte[] DEFAULT_COLUMN_FAMILY = "default".getBytes(); public static final int NOT_FOUND = -1; private enum LibraryState { NOT_LOADED, LOADING, LOADED } private static AtomicReference libraryLoaded = new AtomicReference<>(LibraryState.NOT_LOADED); static { RocksDB.loadLibrary(); } /** * Loads the necessary library files. * Calling this method twice will have no effect. * By default the method extracts the shared library for loading at * java.io.tmpdir, however, you can override this temporary location by * setting the environment variable ROCKSDB_SHAREDLIB_DIR. */ public static void loadLibrary() { if (libraryLoaded.get() == LibraryState.LOADED) { return; } if (libraryLoaded.compareAndSet(LibraryState.NOT_LOADED, LibraryState.LOADING)) { final String tmpDir = System.getenv("ROCKSDB_SHAREDLIB_DIR"); // loading possibly necessary libraries. for (final CompressionType compressionType : CompressionType.values()) { try { if (compressionType.getLibraryName() != null) { System.loadLibrary(compressionType.getLibraryName()); } } catch (UnsatisfiedLinkError e) { // since it may be optional, we ignore its loading failure here. } } try { NativeLibraryLoader.getInstance().loadLibrary(tmpDir); } catch (IOException e) { libraryLoaded.set(LibraryState.NOT_LOADED); throw new RuntimeException("Unable to load the RocksDB shared library", e); } libraryLoaded.set(LibraryState.LOADED); return; } while (libraryLoaded.get() == LibraryState.LOADING) { try { Thread.sleep(10); } catch(final InterruptedException e) { //ignore } } } /** * Tries to load the necessary library files from the given list of * directories. * * @param paths a list of strings where each describes a directory * of a library. */ public static void loadLibrary(final List paths) { if (libraryLoaded.get() == LibraryState.LOADED) { return; } if (libraryLoaded.compareAndSet(LibraryState.NOT_LOADED, LibraryState.LOADING)) { for (final CompressionType compressionType : CompressionType.values()) { if (compressionType.equals(CompressionType.NO_COMPRESSION)) { continue; } for (final String path : paths) { try { System.load(path + "/" + Environment.getSharedLibraryFileName( compressionType.getLibraryName())); break; } catch (UnsatisfiedLinkError e) { // since they are optional, we ignore loading fails. } } } boolean success = false; UnsatisfiedLinkError err = null; for (final String path : paths) { try { System.load(path + "/" + Environment.getJniLibraryFileName("rocksdbjni")); success = true; break; } catch (UnsatisfiedLinkError e) { err = e; } } if (!success) { libraryLoaded.set(LibraryState.NOT_LOADED); throw err; } libraryLoaded.set(LibraryState.LOADED); return; } while (libraryLoaded.get() == LibraryState.LOADING) { try { Thread.sleep(10); } catch(final InterruptedException e) { //ignore } } } /** * Private constructor. * * @param nativeHandle The native handle of the C++ RocksDB object */ protected RocksDB(final long nativeHandle) { super(nativeHandle); } /** * The factory constructor of RocksDB that opens a RocksDB instance given * the path to the database using the default options w/ createIfMissing * set to true. * * @param path the path to the rocksdb. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. * @see Options#setCreateIfMissing(boolean) */ public static RocksDB open(final String path) throws RocksDBException { final Options options = new Options(); options.setCreateIfMissing(true); return open(options, path); } /** * The factory constructor of RocksDB that opens a RocksDB instance given * the path to the database using the specified options and db path and a list * of column family names. *

* If opened in read write mode every existing column family name must be * passed within the list to this method.

*

* If opened in read-only mode only a subset of existing column families must * be passed to this method.

*

* Options instance *should* not be disposed before all DBs using this options * instance have been closed. If user doesn't call options dispose explicitly, * then this options instance will be GC'd automatically

*

* ColumnFamily handles are disposed when the RocksDB instance is disposed. *

* * @param path the path to the rocksdb. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances * on open. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. * @see DBOptions#setCreateIfMissing(boolean) */ public static RocksDB open(final String path, final List columnFamilyDescriptors, final List columnFamilyHandles) throws RocksDBException { final DBOptions options = new DBOptions(); return open(options, path, columnFamilyDescriptors, columnFamilyHandles); } /** * The factory constructor of RocksDB that opens a RocksDB instance given * the path to the database using the specified options and db path. * *

* Options instance *should* not be disposed before all DBs using this options * instance have been closed. If user doesn't call options dispose explicitly, * then this options instance will be GC'd automatically.

*

* Options instance can be re-used to open multiple DBs if DB statistics is * not used. If DB statistics are required, then its recommended to open DB * with new Options instance as underlying native statistics instance does not * use any locks to prevent concurrent updates.

* * @param options {@link org.rocksdb.Options} instance. * @param path the path to the rocksdb. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. * * @see Options#setCreateIfMissing(boolean) */ public static RocksDB open(final Options options, final String path) throws RocksDBException { // when non-default Options is used, keeping an Options reference // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. final RocksDB db = new RocksDB(open(options.nativeHandle_, path)); db.storeOptionsInstance(options); return db; } /** * The factory constructor of RocksDB that opens a RocksDB instance given * the path to the database using the specified options and db path and a list * of column family names. *

* If opened in read write mode every existing column family name must be * passed within the list to this method.

*

* If opened in read-only mode only a subset of existing column families must * be passed to this method.

*

* Options instance *should* not be disposed before all DBs using this options * instance have been closed. If user doesn't call options dispose explicitly, * then this options instance will be GC'd automatically.

*

* Options instance can be re-used to open multiple DBs if DB statistics is * not used. If DB statistics are required, then its recommended to open DB * with new Options instance as underlying native statistics instance does not * use any locks to prevent concurrent updates.

*

* ColumnFamily handles are disposed when the RocksDB instance is disposed. *

* * @param options {@link org.rocksdb.DBOptions} instance. * @param path the path to the rocksdb. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances * on open. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. * * @see DBOptions#setCreateIfMissing(boolean) */ public static RocksDB open(final DBOptions options, final String path, final List columnFamilyDescriptors, final List columnFamilyHandles) throws RocksDBException { final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; for (int i = 0; i < columnFamilyDescriptors.size(); i++) { final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors .get(i); cfNames[i] = cfDescriptor.getName(); cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; } final long[] handles = open(options.nativeHandle_, path, cfNames, cfOptionHandles); final RocksDB db = new RocksDB(handles[0]); db.storeOptionsInstance(options); for (int i = 1; i < handles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(db, handles[i])); } return db; } /** * The factory constructor of RocksDB that opens a RocksDB instance in * Read-Only mode given the path to the database using the default * options. * * @param path the path to the RocksDB. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static RocksDB openReadOnly(final String path) throws RocksDBException { // This allows to use the rocksjni default Options instead of // the c++ one. Options options = new Options(); return openReadOnly(options, path); } /** * The factory constructor of RocksDB that opens a RocksDB instance in * Read-Only mode given the path to the database using the default * options. * * @param path the path to the RocksDB. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances * on open. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static RocksDB openReadOnly(final String path, final List columnFamilyDescriptors, final List columnFamilyHandles) throws RocksDBException { // This allows to use the rocksjni default Options instead of // the c++ one. final DBOptions options = new DBOptions(); return openReadOnly(options, path, columnFamilyDescriptors, columnFamilyHandles); } /** * The factory constructor of RocksDB that opens a RocksDB instance in * Read-Only mode given the path to the database using the specified * options and db path. * * Options instance *should* not be disposed before all DBs using this options * instance have been closed. If user doesn't call options dispose explicitly, * then this options instance will be GC'd automatically. * * @param options {@link Options} instance. * @param path the path to the RocksDB. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static RocksDB openReadOnly(final Options options, final String path) throws RocksDBException { // when non-default Options is used, keeping an Options reference // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. final RocksDB db = new RocksDB(openROnly(options.nativeHandle_, path)); db.storeOptionsInstance(options); return db; } /** * The factory constructor of RocksDB that opens a RocksDB instance in * Read-Only mode given the path to the database using the specified * options and db path. * *

This open method allows to open RocksDB using a subset of available * column families

*

Options instance *should* not be disposed before all DBs using this * options instance have been closed. If user doesn't call options dispose * explicitly,then this options instance will be GC'd automatically.

* * @param options {@link DBOptions} instance. * @param path the path to the RocksDB. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances * on open. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static RocksDB openReadOnly(final DBOptions options, final String path, final List columnFamilyDescriptors, final List columnFamilyHandles) throws RocksDBException { // when non-default Options is used, keeping an Options reference // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; for (int i = 0; i < columnFamilyDescriptors.size(); i++) { final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors .get(i); cfNames[i] = cfDescriptor.getName(); cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; } final long[] handles = openROnly(options.nativeHandle_, path, cfNames, cfOptionHandles); final RocksDB db = new RocksDB(handles[0]); db.storeOptionsInstance(options); for (int i = 1; i < handles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(db, handles[i])); } return db; } /** * This is similar to {@link #close()} except that it * throws an exception if any error occurs. * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. * * @throws RocksDBException if an error occurs whilst closing. */ public void closeE() throws RocksDBException { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } finally { disposeInternal(); } } } /** * This is similar to {@link #closeE()} except that it * silently ignores any errors. * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. */ @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } catch (final RocksDBException e) { // silently ignore the error report } finally { disposeInternal(); } } } /** * Static method to determine all available column families for a * rocksdb database identified by path * * @param options Options for opening the database * @param path Absolute path to rocksdb database * @return List<byte[]> List containing the column family names * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static List listColumnFamilies(final Options options, final String path) throws RocksDBException { return Arrays.asList(RocksDB.listColumnFamilies(options.nativeHandle_, path)); } /** * Creates a new column family with the name columnFamilyName and * allocates a ColumnFamilyHandle within an internal structure. * The ColumnFamilyHandle is automatically disposed with DB disposal. * * @param columnFamilyDescriptor column family to be created. * @return {@link org.rocksdb.ColumnFamilyHandle} instance. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public ColumnFamilyHandle createColumnFamily( final ColumnFamilyDescriptor columnFamilyDescriptor) throws RocksDBException { return new ColumnFamilyHandle(this, createColumnFamily(nativeHandle_, columnFamilyDescriptor.getName(), columnFamilyDescriptor.getName().length, columnFamilyDescriptor.getOptions().nativeHandle_)); } /** * Bulk create column families with the same column family options. * * @param columnFamilyOptions the options for the column families. * @param columnFamilyNames the names of the column families. * * @return the handles to the newly created column families. * * @throws RocksDBException if an error occurs whilst creating * the column families */ public List createColumnFamilies( final ColumnFamilyOptions columnFamilyOptions, final List columnFamilyNames) throws RocksDBException { final byte[][] cfNames = columnFamilyNames.toArray( new byte[0][]); final long[] cfHandles = createColumnFamilies(nativeHandle_, columnFamilyOptions.nativeHandle_, cfNames); final List columnFamilyHandles = new ArrayList<>(cfHandles.length); for (int i = 0; i < cfHandles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(this, cfHandles[i])); } return columnFamilyHandles; } /** * Bulk create column families with the same column family options. * * @param columnFamilyDescriptors the descriptions of the column families. * * @return the handles to the newly created column families. * * @throws RocksDBException if an error occurs whilst creating * the column families */ public List createColumnFamilies( final List columnFamilyDescriptors) throws RocksDBException { final long[] cfOptsHandles = new long[columnFamilyDescriptors.size()]; final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; for (int i = 0; i < columnFamilyDescriptors.size(); i++) { final ColumnFamilyDescriptor columnFamilyDescriptor = columnFamilyDescriptors.get(i); cfOptsHandles[i] = columnFamilyDescriptor.getOptions().nativeHandle_; cfNames[i] = columnFamilyDescriptor.getName(); } final long[] cfHandles = createColumnFamilies(nativeHandle_, cfOptsHandles, cfNames); final List columnFamilyHandles = new ArrayList<>(cfHandles.length); for (int i = 0; i < cfHandles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(this, cfHandles[i])); } return columnFamilyHandles; } /** * Drops the column family specified by {@code columnFamilyHandle}. This call * only records a drop record in the manifest and prevents the column * family from flushing and compacting. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void dropColumnFamily(final ColumnFamilyHandle columnFamilyHandle) throws RocksDBException { dropColumnFamily(nativeHandle_, columnFamilyHandle.nativeHandle_); } // Bulk drop column families. This call only records drop records in the // manifest and prevents the column families from flushing and compacting. // In case of error, the request may succeed partially. User may call // ListColumnFamilies to check the result. public void dropColumnFamilies( final List columnFamilies) throws RocksDBException { final long[] cfHandles = new long[columnFamilies.size()]; for (int i = 0; i < columnFamilies.size(); i++) { cfHandles[i] = columnFamilies.get(i).nativeHandle_; } dropColumnFamilies(nativeHandle_, cfHandles); } //TODO(AR) what about DestroyColumnFamilyHandle /** * Set the database entry for "key" to "value". * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void put(final byte[] key, final byte[] value) throws RocksDBException { put(nativeHandle_, key, 0, key.length, value, 0, value.length); } /** * Set the database entry for "key" to "value". * * @param key The specified key to be inserted * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the value associated with the specified key * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * * @throws RocksDBException thrown if errors happens in underlying native * library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void put(final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); put(nativeHandle_, key, offset, len, value, vOffset, vLen); } /** * Set the database entry for "key" to "value" in the specified * column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * throws IllegalArgumentException if column family is not present * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { put(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_); } /** * Set the database entry for "key" to "value" in the specified * column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key The specified key to be inserted * @param offset the offset of the "key" array to be used, must * be non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the value associated with the specified key * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * * @throws RocksDBException thrown if errors happens in underlying native * library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); put(nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } /** * Set the database entry for "key" to "value". * * @param writeOpts {@link org.rocksdb.WriteOptions} instance. * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void put(final WriteOptions writeOpts, final byte[] key, final byte[] value) throws RocksDBException { put(nativeHandle_, writeOpts.nativeHandle_, key, 0, key.length, value, 0, value.length); } /** * Set the database entry for "key" to "value". * * @param writeOpts {@link org.rocksdb.WriteOptions} instance. * @param key The specified key to be inserted * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the value associated with the specified key * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void put(final WriteOptions writeOpts, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); put(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen); } /** * Set the database entry for "key" to "value" for the specified * column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param writeOpts {@link org.rocksdb.WriteOptions} instance. * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * throws IllegalArgumentException if column family is not present * * @throws RocksDBException thrown if error happens in underlying * native library. * @see IllegalArgumentException */ public void put(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, final byte[] key, final byte[] value) throws RocksDBException { put(nativeHandle_, writeOpts.nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_); } /** * Set the database entry for "key" to "value" for the specified * column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param writeOpts {@link org.rocksdb.WriteOptions} instance. * @param key the specified key to be inserted. Position and limit is used. * Supports direct buffer only. * @param value the value associated with the specified key. Position and limit is used. * Supports direct buffer only. * * throws IllegalArgumentException if column family is not present * * @throws RocksDBException thrown if error happens in underlying * native library. * @see IllegalArgumentException */ public void put(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value) throws RocksDBException { assert key.isDirect() && value.isDirect(); putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); key.position(key.limit()); value.position(value.limit()); } /** * Set the database entry for "key" to "value". * * @param writeOpts {@link org.rocksdb.WriteOptions} instance. * @param key the specified key to be inserted. Position and limit is used. * Supports direct buffer only. * @param value the value associated with the specified key. Position and limit is used. * Supports direct buffer only. * * throws IllegalArgumentException if column family is not present * * @throws RocksDBException thrown if error happens in underlying * native library. * @see IllegalArgumentException */ public void put(final WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value) throws RocksDBException { assert key.isDirect() && value.isDirect(); putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, value.position(), value.remaining(), 0); key.position(key.limit()); value.position(value.limit()); } /** * Set the database entry for "key" to "value" for the specified * column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param writeOpts {@link org.rocksdb.WriteOptions} instance. * @param key The specified key to be inserted * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the value associated with the specified key * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void put(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); put(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } /** * Remove the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Use {@link #delete(byte[])} */ @Deprecated public void remove(final byte[] key) throws RocksDBException { delete(key); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final byte[] key) throws RocksDBException { delete(nativeHandle_, key, 0, key.length); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param key Key to delete within database * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be * non-negative and no larger than ("key".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final byte[] key, final int offset, final int len) throws RocksDBException { delete(nativeHandle_, key, offset, len); } /** * Remove the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Use {@link #delete(ColumnFamilyHandle, byte[])} */ @Deprecated public void remove(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException { delete(columnFamilyHandle, key); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException { delete(nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key Key to delete within database * @param offset the offset of the "key" array to be used, * must be non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("value".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len) throws RocksDBException { delete(nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); } /** * Remove the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Use {@link #delete(WriteOptions, byte[])} */ @Deprecated public void remove(final WriteOptions writeOpt, final byte[] key) throws RocksDBException { delete(writeOpt, key); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final WriteOptions writeOpt, final byte[] key) throws RocksDBException { delete(nativeHandle_, writeOpt.nativeHandle_, key, 0, key.length); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be * non-negative and no larger than ("key".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final WriteOptions writeOpt, final byte[] key, final int offset, final int len) throws RocksDBException { delete(nativeHandle_, writeOpt.nativeHandle_, key, offset, len); } /** * Remove the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Use {@link #delete(ColumnFamilyHandle, WriteOptions, byte[])} */ @Deprecated public void remove(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, final byte[] key) throws RocksDBException { delete(columnFamilyHandle, writeOpt, key); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, final byte[] key) throws RocksDBException { delete(nativeHandle_, writeOpt.nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be * non-negative and no larger than ("key".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, final byte[] key, final int offset, final int len) throws RocksDBException { delete(nativeHandle_, writeOpt.nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); } /** * Get the value associated with the specified key within column family. * * @param opt {@link org.rocksdb.ReadOptions} instance. * @param key the key to retrieve the value. It is using position and limit. * Supports direct buffer only. * @param value the out-value to receive the retrieved value. * It is using position and limit. Limit is set according to value size. * Supports direct buffer only. * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ReadOptions opt, final ByteBuffer key, final ByteBuffer value) throws RocksDBException { assert key.isDirect() && value.isDirect(); int result = getDirect(nativeHandle_, opt.nativeHandle_, key, key.position(), key.remaining(), value, value.position(), value.remaining(), 0); if (result != NOT_FOUND) { value.limit(Math.min(value.limit(), value.position() + result)); } key.position(key.limit()); return result; } /** * Get the value associated with the specified key within column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param opt {@link org.rocksdb.ReadOptions} instance. * @param key the key to retrieve the value. It is using position and limit. * Supports direct buffer only. * @param value the out-value to receive the retrieved value. * It is using position and limit. Limit is set according to value size. * Supports direct buffer only. * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, final ByteBuffer key, final ByteBuffer value) throws RocksDBException { assert key.isDirect() && value.isDirect(); int result = getDirect(nativeHandle_, opt.nativeHandle_, key, key.position(), key.remaining(), value, value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); if (result != NOT_FOUND) { value.limit(Math.min(value.limit(), value.position() + result)); } key.position(key.limit()); return result; } /** * Remove the database entry for {@code key}. Requires that the key exists * and was not overwritten. It is not an error if the key did not exist * in the database. * * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple * times), then the result of calling SingleDelete() on this key is undefined. * SingleDelete() only behaves correctly if there has been only one Put() * for this key since the previous call to SingleDelete() for this key. * * This feature is currently an experimental performance optimization * for a very specific workload. It is up to the caller to ensure that * SingleDelete is only used for a key that is not deleted using Delete() or * written using Merge(). Mixing SingleDelete operations with Deletes and * Merges can result in undefined behavior. * * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, key, key.length); } /** * Remove the database entry for {@code key}. Requires that the key exists * and was not overwritten. It is not an error if the key did not exist * in the database. * * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple * times), then the result of calling SingleDelete() on this key is undefined. * SingleDelete() only behaves correctly if there has been only one Put() * for this key since the previous call to SingleDelete() for this key. * * This feature is currently an experimental performance optimization * for a very specific workload. It is up to the caller to ensure that * SingleDelete is only used for a key that is not deleted using Delete() or * written using Merge(). Mixing SingleDelete operations with Deletes and * Merges can result in undefined behavior. * * @param columnFamilyHandle The column family to delete the key from * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } /** * Remove the database entry for {@code key}. Requires that the key exists * and was not overwritten. It is not an error if the key did not exist * in the database. * * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple * times), then the result of calling SingleDelete() on this key is undefined. * SingleDelete() only behaves correctly if there has been only one Put() * for this key since the previous call to SingleDelete() for this key. * * This feature is currently an experimental performance optimization * for a very specific workload. It is up to the caller to ensure that * SingleDelete is only used for a key that is not deleted using Delete() or * written using Merge(). Mixing SingleDelete operations with Deletes and * Merges can result in undefined behavior. * * Note: consider setting {@link WriteOptions#setSync(boolean)} true. * * @param writeOpt Write options for the delete * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final WriteOptions writeOpt, final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, writeOpt.nativeHandle_, key, key.length); } /** * Remove the database entry for {@code key}. Requires that the key exists * and was not overwritten. It is not an error if the key did not exist * in the database. * * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple * times), then the result of calling SingleDelete() on this key is undefined. * SingleDelete() only behaves correctly if there has been only one Put() * for this key since the previous call to SingleDelete() for this key. * * This feature is currently an experimental performance optimization * for a very specific workload. It is up to the caller to ensure that * SingleDelete is only used for a key that is not deleted using Delete() or * written using Merge(). Mixing SingleDelete operations with Deletes and * Merges can result in undefined behavior. * * Note: consider setting {@link WriteOptions#setSync(boolean)} true. * * @param columnFamilyHandle The column family to delete the key from * @param writeOpt Write options for the delete * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, writeOpt.nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } /** * Removes the database entries in the range ["beginKey", "endKey"), i.e., * including "beginKey" and excluding "endKey". a non-OK status on error. It * is not an error if no keys exist in the range ["beginKey", "endKey"). * * Delete the database entry (if any) for "key". Returns OK on success, and a * non-OK status on error. It is not an error if "key" did not exist in the * database. * * @param beginKey First key to delete within database (inclusive) * @param endKey Last key to delete within database (exclusive) * * @throws RocksDBException thrown if error happens in underlying native * library. */ public void deleteRange(final byte[] beginKey, final byte[] endKey) throws RocksDBException { deleteRange(nativeHandle_, beginKey, 0, beginKey.length, endKey, 0, endKey.length); } /** * Removes the database entries in the range ["beginKey", "endKey"), i.e., * including "beginKey" and excluding "endKey". a non-OK status on error. It * is not an error if no keys exist in the range ["beginKey", "endKey"). * * Delete the database entry (if any) for "key". Returns OK on success, and a * non-OK status on error. It is not an error if "key" did not exist in the * database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance * @param beginKey First key to delete within database (inclusive) * @param endKey Last key to delete within database (exclusive) * * @throws RocksDBException thrown if error happens in underlying native * library. */ public void deleteRange(final ColumnFamilyHandle columnFamilyHandle, final byte[] beginKey, final byte[] endKey) throws RocksDBException { deleteRange(nativeHandle_, beginKey, 0, beginKey.length, endKey, 0, endKey.length, columnFamilyHandle.nativeHandle_); } /** * Removes the database entries in the range ["beginKey", "endKey"), i.e., * including "beginKey" and excluding "endKey". a non-OK status on error. It * is not an error if no keys exist in the range ["beginKey", "endKey"). * * Delete the database entry (if any) for "key". Returns OK on success, and a * non-OK status on error. It is not an error if "key" did not exist in the * database. * * @param writeOpt WriteOptions to be used with delete operation * @param beginKey First key to delete within database (inclusive) * @param endKey Last key to delete within database (exclusive) * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void deleteRange(final WriteOptions writeOpt, final byte[] beginKey, final byte[] endKey) throws RocksDBException { deleteRange(nativeHandle_, writeOpt.nativeHandle_, beginKey, 0, beginKey.length, endKey, 0, endKey.length); } /** * Removes the database entries in the range ["beginKey", "endKey"), i.e., * including "beginKey" and excluding "endKey". a non-OK status on error. It * is not an error if no keys exist in the range ["beginKey", "endKey"). * * Delete the database entry (if any) for "key". Returns OK on success, and a * non-OK status on error. It is not an error if "key" did not exist in the * database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance * @param writeOpt WriteOptions to be used with delete operation * @param beginKey First key to delete within database (included) * @param endKey Last key to delete within database (excluded) * * @throws RocksDBException thrown if error happens in underlying native * library. */ public void deleteRange(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, final byte[] beginKey, final byte[] endKey) throws RocksDBException { deleteRange(nativeHandle_, writeOpt.nativeHandle_, beginKey, 0, beginKey.length, endKey, 0, endKey.length, columnFamilyHandle.nativeHandle_); } /** * Add merge operand for key/value pair. * * @param key the specified key to be merged. * @param value the value to be merged with the current value for the * specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void merge(final byte[] key, final byte[] value) throws RocksDBException { merge(nativeHandle_, key, 0, key.length, value, 0, value.length); } /** * Add merge operand for key/value pair. * * @param key the specified key to be merged. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the value to be merged with the current value for the * specified key. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and must be non-negative and no larger than * ("value".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void merge(final byte[] key, int offset, int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); merge(nativeHandle_, key, offset, len, value, vOffset, vLen); } /** * Add merge operand for key/value pair in a ColumnFamily. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key the specified key to be merged. * @param value the value to be merged with the current value for * the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void merge(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { merge(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_); } /** * Add merge operand for key/value pair in a ColumnFamily. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key the specified key to be merged. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the value to be merged with the current value for * the specified key. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * must be non-negative and no larger than ("value".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void merge(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); merge(nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } /** * Add merge operand for key/value pair. * * @param writeOpts {@link WriteOptions} for this write. * @param key the specified key to be merged. * @param value the value to be merged with the current value for * the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void merge(final WriteOptions writeOpts, final byte[] key, final byte[] value) throws RocksDBException { merge(nativeHandle_, writeOpts.nativeHandle_, key, 0, key.length, value, 0, value.length); } /** * Add merge operand for key/value pair. * * @param writeOpts {@link WriteOptions} for this write. * @param key the specified key to be merged. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("value".length - offset) * @param value the value to be merged with the current value for * the specified key. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void merge(final WriteOptions writeOpts, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); merge(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database. It is using position and limit. * Supports direct buffer only. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final WriteOptions writeOpt, final ByteBuffer key) throws RocksDBException { assert key.isDirect(); deleteDirect(nativeHandle_, writeOpt.nativeHandle_, key, key.position(), key.remaining(), 0); key.position(key.limit()); } /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database. It is using position and limit. * Supports direct buffer only. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, final ByteBuffer key) throws RocksDBException { assert key.isDirect(); deleteDirect(nativeHandle_, writeOpt.nativeHandle_, key, key.position(), key.remaining(), columnFamilyHandle.nativeHandle_); key.position(key.limit()); } /** * Add merge operand for key/value pair. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param writeOpts {@link WriteOptions} for this write. * @param key the specified key to be merged. * @param value the value to be merged with the current value for the * specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void merge(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, final byte[] key, final byte[] value) throws RocksDBException { merge(nativeHandle_, writeOpts.nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_); } /** * Add merge operand for key/value pair. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param writeOpts {@link WriteOptions} for this write. * @param key the specified key to be merged. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the value to be merged with the current value for * the specified key. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IndexOutOfBoundsException if an offset or length is out of bounds */ public void merge( final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); merge(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } /** * Apply the specified updates to the database. * * @param writeOpts WriteOptions instance * @param updates WriteBatch instance * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void write(final WriteOptions writeOpts, final WriteBatch updates) throws RocksDBException { write0(nativeHandle_, writeOpts.nativeHandle_, updates.nativeHandle_); } /** * Apply the specified updates to the database. * * @param writeOpts WriteOptions instance * @param updates WriteBatchWithIndex instance * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void write(final WriteOptions writeOpts, final WriteBatchWithIndex updates) throws RocksDBException { write1(nativeHandle_, writeOpts.nativeHandle_, updates.nativeHandle_); } // TODO(AR) we should improve the #get() API, returning -1 (RocksDB.NOT_FOUND) is not very nice // when we could communicate better status into, also the C++ code show that -2 could be returned /** * Get the value associated with the specified key within column family* * * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. * * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final byte[] key, final byte[] value) throws RocksDBException { return get(nativeHandle_, key, 0, key.length, value, 0, value.length); } /** * Get the value associated with the specified key within column family* * * @param key the key to retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the out-value to receive the retrieved value. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "value".length * @param vLen the length of the "value" array to be used, must be * non-negative and and no larger than ("value".length - offset) * * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); return get(nativeHandle_, key, offset, len, value, vOffset, vLen); } /** * Get the value associated with the specified key within column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException, IllegalArgumentException { return get(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_); } /** * Get the value associated with the specified key within column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * an no larger than ("key".length - offset) * @param value the out-value to receive the retrieved value. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException, IllegalArgumentException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); return get(nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } /** * Get the value associated with the specified key. * * @param opt {@link org.rocksdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ReadOptions opt, final byte[] key, final byte[] value) throws RocksDBException { return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, value, 0, value.length); } /** * Get the value associated with the specified key. * * @param opt {@link org.rocksdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param value the out-value to receive the retrieved value. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, must be * non-negative and no larger than ("value".length - offset) * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ReadOptions opt, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len, value, vOffset, vLen); } /** * Get the value associated with the specified key within column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param opt {@link org.rocksdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, final byte[] key, final byte[] value) throws RocksDBException { return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_); } /** * Get the value associated with the specified key within column family. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param opt {@link org.rocksdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be * non-negative and and no larger than ("key".length - offset) * @param value the out-value to receive the retrieved value. * @param vOffset the offset of the "value" array to be used, must be * non-negative and no longer than "key".length * @param vLen the length of the "value" array to be used, and must be * non-negative and no larger than ("value".length - offset) * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. RocksDB.NOT_FOUND will be returned if the value not * found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public int get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { checkBounds(offset, len, key.length); checkBounds(vOffset, vLen, value.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param key the key retrieve the value. * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final byte[] key) throws RocksDBException { return get(nativeHandle_, key, 0, key.length); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param key the key retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final byte[] key, final int offset, final int len) throws RocksDBException { checkBounds(offset, len, key.length); return get(nativeHandle_, key, offset, len); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException { return get(nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len) throws RocksDBException { checkBounds(offset, len, key.length); return get(nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param key the key retrieve the value. * @param opt Read options. * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final ReadOptions opt, final byte[] key) throws RocksDBException { return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param key the key retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param opt Read options. * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final ReadOptions opt, final byte[] key, final int offset, final int len) throws RocksDBException { checkBounds(offset, len, key.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @param opt Read options. * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, final byte[] key) throws RocksDBException { return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_); } /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than ("key".length - offset) * @param opt Read options. * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, final byte[] key, final int offset, final int len) throws RocksDBException { checkBounds(offset, len, key.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); } /** * Returns a map of keys for which values were found in DB. * * @param keys List of keys for which values need to be retrieved. * @return Map where key of map is the key passed by user and value for map * entry is the corresponding value in DB. * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Consider {@link #multiGetAsList(List)} instead. */ @Deprecated public Map multiGet(final List keys) throws RocksDBException { assert(keys.size() != 0); final byte[][] keysArray = keys.toArray(new byte[0][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } final byte[][] values = multiGet(nativeHandle_, keysArray, keyOffsets, keyLengths); final Map keyValueMap = new HashMap<>(computeCapacityHint(values.length)); for(int i = 0; i < values.length; i++) { if(values[i] == null) { continue; } keyValueMap.put(keys.get(i), values[i]); } return keyValueMap; } /** * Returns a map of keys for which values were found in DB. *

* Note: Every key needs to have a related column family name in * {@code columnFamilyHandleList}. *

* * @param columnFamilyHandleList {@link java.util.List} containing * {@link org.rocksdb.ColumnFamilyHandle} instances. * @param keys List of keys for which values need to be retrieved. * @return Map where key of map is the key passed by user and value for map * entry is the corresponding value in DB. * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IllegalArgumentException thrown if the size of passed keys is not * equal to the amount of passed column family handles. * * @deprecated Consider {@link #multiGetAsList(List, List)} instead. */ @Deprecated public Map multiGet( final List columnFamilyHandleList, final List keys) throws RocksDBException, IllegalArgumentException { assert(keys.size() != 0); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size() != columnFamilyHandleList.size()) { throw new IllegalArgumentException( "For each key there must be a ColumnFamilyHandle."); } final long[] cfHandles = new long[columnFamilyHandleList.size()]; for (int i = 0; i < columnFamilyHandleList.size(); i++) { cfHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; } final byte[][] keysArray = keys.toArray(new byte[0][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } final byte[][] values = multiGet(nativeHandle_, keysArray, keyOffsets, keyLengths, cfHandles); final Map keyValueMap = new HashMap<>(computeCapacityHint(values.length)); for(int i = 0; i < values.length; i++) { if (values[i] == null) { continue; } keyValueMap.put(keys.get(i), values[i]); } return keyValueMap; } /** * Returns a map of keys for which values were found in DB. * * @param opt Read options. * @param keys of keys for which values need to be retrieved. * @return Map where key of map is the key passed by user and value for map * entry is the corresponding value in DB. * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Consider {@link #multiGetAsList(ReadOptions, List)} instead. */ @Deprecated public Map multiGet(final ReadOptions opt, final List keys) throws RocksDBException { assert(keys.size() != 0); final byte[][] keysArray = keys.toArray(new byte[0][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } final byte[][] values = multiGet(nativeHandle_, opt.nativeHandle_, keysArray, keyOffsets, keyLengths); final Map keyValueMap = new HashMap<>(computeCapacityHint(values.length)); for(int i = 0; i < values.length; i++) { if(values[i] == null) { continue; } keyValueMap.put(keys.get(i), values[i]); } return keyValueMap; } /** * Returns a map of keys for which values were found in DB. *

* Note: Every key needs to have a related column family name in * {@code columnFamilyHandleList}. *

* * @param opt Read options. * @param columnFamilyHandleList {@link java.util.List} containing * {@link org.rocksdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * @return Map where key of map is the key passed by user and value for map * entry is the corresponding value in DB. * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IllegalArgumentException thrown if the size of passed keys is not * equal to the amount of passed column family handles. * * @deprecated Consider {@link #multiGetAsList(ReadOptions, List, List)} * instead. */ @Deprecated public Map multiGet(final ReadOptions opt, final List columnFamilyHandleList, final List keys) throws RocksDBException { assert(keys.size() != 0); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size()!=columnFamilyHandleList.size()){ throw new IllegalArgumentException( "For each key there must be a ColumnFamilyHandle."); } final long[] cfHandles = new long[columnFamilyHandleList.size()]; for (int i = 0; i < columnFamilyHandleList.size(); i++) { cfHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; } final byte[][] keysArray = keys.toArray(new byte[0][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } final byte[][] values = multiGet(nativeHandle_, opt.nativeHandle_, keysArray, keyOffsets, keyLengths, cfHandles); final Map keyValueMap = new HashMap<>(computeCapacityHint(values.length)); for(int i = 0; i < values.length; i++) { if(values[i] == null) { continue; } keyValueMap.put(keys.get(i), values[i]); } return keyValueMap; } /** * Takes a list of keys, and returns a list of values for the given list of * keys. List will contain null for keys which could not be found. * * @param keys List of keys for which values need to be retrieved. * @return List of values for the given list of keys. List will contain * null for keys which could not be found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public List multiGetAsList(final List keys) throws RocksDBException { assert(keys.size() != 0); final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } return Arrays.asList(multiGet(nativeHandle_, keysArray, keyOffsets, keyLengths)); } /** * Returns a list of values for the given list of keys. List will contain * null for keys which could not be found. *

* Note: Every key needs to have a related column family name in * {@code columnFamilyHandleList}. *

* * @param columnFamilyHandleList {@link java.util.List} containing * {@link org.rocksdb.ColumnFamilyHandle} instances. * @param keys List of keys for which values need to be retrieved. * @return List of values for the given list of keys. List will contain * null for keys which could not be found. * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IllegalArgumentException thrown if the size of passed keys is not * equal to the amount of passed column family handles. */ public List multiGetAsList( final List columnFamilyHandleList, final List keys) throws RocksDBException, IllegalArgumentException { assert(keys.size() != 0); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size() != columnFamilyHandleList.size()) { throw new IllegalArgumentException( "For each key there must be a ColumnFamilyHandle."); } final long[] cfHandles = new long[columnFamilyHandleList.size()]; for (int i = 0; i < columnFamilyHandleList.size(); i++) { cfHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; } final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } return Arrays.asList(multiGet(nativeHandle_, keysArray, keyOffsets, keyLengths, cfHandles)); } /** * Returns a list of values for the given list of keys. List will contain * null for keys which could not be found. * * @param opt Read options. * @param keys of keys for which values need to be retrieved. * @return List of values for the given list of keys. List will contain * null for keys which could not be found. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public List multiGetAsList(final ReadOptions opt, final List keys) throws RocksDBException { assert(keys.size() != 0); final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } return Arrays.asList(multiGet(nativeHandle_, opt.nativeHandle_, keysArray, keyOffsets, keyLengths)); } /** * Returns a list of values for the given list of keys. List will contain * null for keys which could not be found. *

* Note: Every key needs to have a related column family name in * {@code columnFamilyHandleList}. *

* * @param opt Read options. * @param columnFamilyHandleList {@link java.util.List} containing * {@link org.rocksdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * @return List of values for the given list of keys. List will contain * null for keys which could not be found. * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IllegalArgumentException thrown if the size of passed keys is not * equal to the amount of passed column family handles. */ public List multiGetAsList(final ReadOptions opt, final List columnFamilyHandleList, final List keys) throws RocksDBException { assert(keys.size() != 0); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size()!=columnFamilyHandleList.size()){ throw new IllegalArgumentException( "For each key there must be a ColumnFamilyHandle."); } final long[] cfHandles = new long[columnFamilyHandleList.size()]; for (int i = 0; i < columnFamilyHandleList.size(); i++) { cfHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; } final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); final int keyOffsets[] = new int[keysArray.length]; final int keyLengths[] = new int[keysArray.length]; for(int i = 0; i < keyLengths.length; i++) { keyLengths[i] = keysArray[i].length; } return Arrays.asList(multiGet(nativeHandle_, opt.nativeHandle_, keysArray, keyOffsets, keyLengths, cfHandles)); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(byte[])}. One way to make this lighter weight is to avoid * doing any IOs. * * @param key byte array of a key to search for * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist(final byte[] key, /* @Nullable */ final Holder valueHolder) { return keyMayExist(key, 0, key.length, valueHolder); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(byte[], int, int)}. One way to make this lighter weight is to * avoid doing any IOs. * * @param key byte array of a key to search for * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than "key".length * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist(final byte[] key, final int offset, final int len, /* @Nullable */ final Holder valueHolder) { return keyMayExist((ColumnFamilyHandle)null, key, offset, len, valueHolder); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(ColumnFamilyHandle,byte[])}. One way to make this lighter * weight is to avoid doing any IOs. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key byte array of a key to search for * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist( final ColumnFamilyHandle columnFamilyHandle, final byte[] key, /* @Nullable */ final Holder valueHolder) { return keyMayExist(columnFamilyHandle, key, 0, key.length, valueHolder); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(ColumnFamilyHandle, byte[], int, int)}. One way to make this * lighter weight is to avoid doing any IOs. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key byte array of a key to search for * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than "key".length * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist( final ColumnFamilyHandle columnFamilyHandle, final byte[] key, int offset, int len, /* @Nullable */ final Holder valueHolder) { return keyMayExist(columnFamilyHandle, null, key, offset, len, valueHolder); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(ReadOptions, byte[])}. One way to make this * lighter weight is to avoid doing any IOs. * * @param readOptions {@link ReadOptions} instance * @param key byte array of a key to search for * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist( final ReadOptions readOptions, final byte[] key, /* @Nullable */ final Holder valueHolder) { return keyMayExist(readOptions, key, 0, key.length, valueHolder); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(ReadOptions, byte[], int, int)}. One way to make this * lighter weight is to avoid doing any IOs. * * @param readOptions {@link ReadOptions} instance * @param key byte array of a key to search for * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than "key".length * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist( final ReadOptions readOptions, final byte[] key, final int offset, final int len, /* @Nullable */ final Holder valueHolder) { return keyMayExist(null, readOptions, key, offset, len, valueHolder); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(ColumnFamilyHandle, ReadOptions, byte[])}. One way to make this * lighter weight is to avoid doing any IOs. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param readOptions {@link ReadOptions} instance * @param key byte array of a key to search for * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist( final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, final byte[] key, /* @Nullable */ final Holder valueHolder) { return keyMayExist(columnFamilyHandle, readOptions, key, 0, key.length, valueHolder); } /** * If the key definitely does not exist in the database, then this method * returns null, else it returns an instance of KeyMayExistResult * * If the caller wants to obtain value when the key * is found in memory, then {@code valueHolder} must be set. * * This check is potentially lighter-weight than invoking * {@link #get(ColumnFamilyHandle, ReadOptions, byte[], int, int)}. * One way to make this lighter weight is to avoid doing any IOs. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param readOptions {@link ReadOptions} instance * @param key byte array of a key to search for * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length * @param len the length of the "key" array to be used, must be non-negative * and no larger than "key".length * @param valueHolder non-null to retrieve the value if it is found, or null * if the value is not needed. If non-null, upon return of the function, * the {@code value} will be set if it could be retrieved. * * @return false if the key definitely does not exist in the database, * otherwise true. */ public boolean keyMayExist( final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, final byte[] key, final int offset, final int len, /* @Nullable */ final Holder valueHolder) { checkBounds(offset, len, key.length); if (valueHolder == null) { return keyMayExist(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, readOptions == null ? 0 : readOptions.nativeHandle_, key, offset, len); } else { final byte[][] result = keyMayExistFoundValue( nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, readOptions == null ? 0 : readOptions.nativeHandle_, key, offset, len); if (result[0][0] == 0x0) { valueHolder.setValue(null); return false; } else if (result[0][0] == 0x1) { valueHolder.setValue(null); return true; } else { valueHolder.setValue(result[1]); return true; } } } /** *

Return a heap-allocated iterator over the contents of the * database. The result of newIterator() is initially invalid * (caller must call one of the Seek methods on the iterator * before using it).

* *

Caller should close the iterator when it is no longer needed. * The returned iterator should be closed before this db is closed. *

* * @return instance of iterator object. */ public RocksIterator newIterator() { return new RocksIterator(this, iterator(nativeHandle_)); } /** *

Return a heap-allocated iterator over the contents of the * database. The result of newIterator() is initially invalid * (caller must call one of the Seek methods on the iterator * before using it).

* *

Caller should close the iterator when it is no longer needed. * The returned iterator should be closed before this db is closed. *

* * @param readOptions {@link ReadOptions} instance. * @return instance of iterator object. */ public RocksIterator newIterator(final ReadOptions readOptions) { return new RocksIterator(this, iterator(nativeHandle_, readOptions.nativeHandle_)); } /** *

Return a heap-allocated iterator over the contents of the * database. The result of newIterator() is initially invalid * (caller must call one of the Seek methods on the iterator * before using it).

* *

Caller should close the iterator when it is no longer needed. * The returned iterator should be closed before this db is closed. *

* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @return instance of iterator object. */ public RocksIterator newIterator( final ColumnFamilyHandle columnFamilyHandle) { return new RocksIterator(this, iteratorCF(nativeHandle_, columnFamilyHandle.nativeHandle_)); } /** *

Return a heap-allocated iterator over the contents of the * database. The result of newIterator() is initially invalid * (caller must call one of the Seek methods on the iterator * before using it).

* *

Caller should close the iterator when it is no longer needed. * The returned iterator should be closed before this db is closed. *

* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param readOptions {@link ReadOptions} instance. * @return instance of iterator object. */ public RocksIterator newIterator(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions) { return new RocksIterator(this, iteratorCF(nativeHandle_, columnFamilyHandle.nativeHandle_, readOptions.nativeHandle_)); } /** * Returns iterators from a consistent database state across multiple * column families. Iterators are heap allocated and need to be deleted * before the db is deleted * * @param columnFamilyHandleList {@link java.util.List} containing * {@link org.rocksdb.ColumnFamilyHandle} instances. * @return {@link java.util.List} containing {@link org.rocksdb.RocksIterator} * instances * * @throws RocksDBException thrown if error happens in underlying * native library. */ public List newIterators( final List columnFamilyHandleList) throws RocksDBException { return newIterators(columnFamilyHandleList, new ReadOptions()); } /** * Returns iterators from a consistent database state across multiple * column families. Iterators are heap allocated and need to be deleted * before the db is deleted * * @param columnFamilyHandleList {@link java.util.List} containing * {@link org.rocksdb.ColumnFamilyHandle} instances. * @param readOptions {@link ReadOptions} instance. * @return {@link java.util.List} containing {@link org.rocksdb.RocksIterator} * instances * * @throws RocksDBException thrown if error happens in underlying * native library. */ public List newIterators( final List columnFamilyHandleList, final ReadOptions readOptions) throws RocksDBException { final long[] columnFamilyHandles = new long[columnFamilyHandleList.size()]; for (int i = 0; i < columnFamilyHandleList.size(); i++) { columnFamilyHandles[i] = columnFamilyHandleList.get(i).nativeHandle_; } final long[] iteratorRefs = iterators(nativeHandle_, columnFamilyHandles, readOptions.nativeHandle_); final List iterators = new ArrayList<>( columnFamilyHandleList.size()); for (int i=0; iReturn a handle to the current DB state. Iterators created with * this handle will all observe a stable snapshot of the current DB * state. The caller must call ReleaseSnapshot(result) when the * snapshot is no longer needed.

* *

nullptr will be returned if the DB fails to take a snapshot or does * not support snapshot.

* * @return Snapshot {@link Snapshot} instance */ public Snapshot getSnapshot() { long snapshotHandle = getSnapshot(nativeHandle_); if (snapshotHandle != 0) { return new Snapshot(snapshotHandle); } return null; } /** * Release a previously acquired snapshot. * * The caller must not use "snapshot" after this call. * * @param snapshot {@link Snapshot} instance */ public void releaseSnapshot(final Snapshot snapshot) { if (snapshot != null) { releaseSnapshot(nativeHandle_, snapshot.nativeHandle_); } } /** * DB implements can export properties about their state * via this method on a per column family level. * *

If {@code property} is a valid property understood by this DB * implementation, fills {@code value} with its current value and * returns true. Otherwise returns false.

* *

Valid property names include: *

    *
  • "rocksdb.num-files-at-level<N>" - return the number of files at * level <N>, where <N> is an ASCII representation of a level * number (e.g. "0").
  • *
  • "rocksdb.stats" - returns a multi-line string that describes statistics * about the internal operation of the DB.
  • *
  • "rocksdb.sstables" - returns a multi-line string that describes all * of the sstables that make up the db contents.
  • *
* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param property to be fetched. See above for examples * @return property value * * @throws RocksDBException thrown if error happens in underlying * native library. */ public String getProperty( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final String property) throws RocksDBException { return getProperty(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, property, property.length()); } /** * DB implementations can export properties about their state * via this method. If "property" is a valid property understood by this * DB implementation, fills "*value" with its current value and returns * true. Otherwise returns false. * *

Valid property names include: *

    *
  • "rocksdb.num-files-at-level<N>" - return the number of files at * level <N>, where <N> is an ASCII representation of a level * number (e.g. "0").
  • *
  • "rocksdb.stats" - returns a multi-line string that describes statistics * about the internal operation of the DB.
  • *
  • "rocksdb.sstables" - returns a multi-line string that describes all * of the sstables that make up the db contents.
  • *
* * @param property to be fetched. See above for examples * @return property value * * @throws RocksDBException thrown if error happens in underlying * native library. */ public String getProperty(final String property) throws RocksDBException { return getProperty(null, property); } /** * Gets a property map. * * @param property to be fetched. * * @return the property map * * @throws RocksDBException if an error happens in the underlying native code. */ public Map getMapProperty(final String property) throws RocksDBException { return getMapProperty(null, property); } /** * Gets a property map. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param property to be fetched. * * @return the property map * * @throws RocksDBException if an error happens in the underlying native code. */ public Map getMapProperty( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final String property) throws RocksDBException { return getMapProperty(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, property, property.length()); } /** *

Similar to GetProperty(), but only works for a subset of properties * whose return value is a numerical value. Return the value as long.

* *

Note: As the returned property is of type * {@code uint64_t} on C++ side the returning value can be negative * because Java supports in Java 7 only signed long values.

* *

Java 7: To mitigate the problem of the non * existent unsigned long tpye, values should be encapsulated using * {@link java.math.BigInteger} to reflect the correct value. The correct * behavior is guaranteed if {@code 2^64} is added to negative values.

* *

Java 8: In Java 8 the value should be treated as * unsigned long using provided methods of type {@link Long}.

* * @param property to be fetched. * * @return numerical property value. * * @throws RocksDBException if an error happens in the underlying native code. */ public long getLongProperty(final String property) throws RocksDBException { return getLongProperty(null, property); } /** *

Similar to GetProperty(), but only works for a subset of properties * whose return value is a numerical value. Return the value as long.

* *

Note: As the returned property is of type * {@code uint64_t} on C++ side the returning value can be negative * because Java supports in Java 7 only signed long values.

* *

Java 7: To mitigate the problem of the non * existent unsigned long tpye, values should be encapsulated using * {@link java.math.BigInteger} to reflect the correct value. The correct * behavior is guaranteed if {@code 2^64} is added to negative values.

* *

Java 8: In Java 8 the value should be treated as * unsigned long using provided methods of type {@link Long}.

* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family * @param property to be fetched. * * @return numerical property value * * @throws RocksDBException if an error happens in the underlying native code. */ public long getLongProperty( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final String property) throws RocksDBException { return getLongProperty(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, property, property.length()); } /** * Reset internal stats for DB and all column families. * * Note this doesn't reset {@link Options#statistics()} as it is not * owned by DB. * * @throws RocksDBException if an error occurs whilst reseting the stats */ public void resetStats() throws RocksDBException { resetStats(nativeHandle_); } /** *

Return sum of the getLongProperty of all the column families

* *

Note: As the returned property is of type * {@code uint64_t} on C++ side the returning value can be negative * because Java supports in Java 7 only signed long values.

* *

Java 7: To mitigate the problem of the non * existent unsigned long tpye, values should be encapsulated using * {@link java.math.BigInteger} to reflect the correct value. The correct * behavior is guaranteed if {@code 2^64} is added to negative values.

* *

Java 8: In Java 8 the value should be treated as * unsigned long using provided methods of type {@link Long}.

* * @param property to be fetched. * * @return numerical property value * * @throws RocksDBException if an error happens in the underlying native code. */ public long getAggregatedLongProperty(final String property) throws RocksDBException { return getAggregatedLongProperty(nativeHandle_, property, property.length()); } /** * Get the approximate file system space used by keys in each range. * * Note that the returned sizes measure file system space usage, so * if the user data compresses by a factor of ten, the returned * sizes will be one-tenth the size of the corresponding user data size. * * If {@code sizeApproximationFlags} defines whether the returned size * should include the recently written data in the mem-tables (if * the mem-table type supports it), data serialized to disk, or both. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family * @param ranges the ranges over which to approximate sizes * @param sizeApproximationFlags flags to determine what to include in the * approximation. * * @return the sizes */ public long[] getApproximateSizes( /*@Nullable*/ final ColumnFamilyHandle columnFamilyHandle, final List ranges, final SizeApproximationFlag... sizeApproximationFlags) { byte flags = 0x0; for (final SizeApproximationFlag sizeApproximationFlag : sizeApproximationFlags) { flags |= sizeApproximationFlag.getValue(); } return getApproximateSizes(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, toRangeSliceHandles(ranges), flags); } /** * Get the approximate file system space used by keys in each range for * the default column family. * * Note that the returned sizes measure file system space usage, so * if the user data compresses by a factor of ten, the returned * sizes will be one-tenth the size of the corresponding user data size. * * If {@code sizeApproximationFlags} defines whether the returned size * should include the recently written data in the mem-tables (if * the mem-table type supports it), data serialized to disk, or both. * * @param ranges the ranges over which to approximate sizes * @param sizeApproximationFlags flags to determine what to include in the * approximation. * * @return the sizes. */ public long[] getApproximateSizes(final List ranges, final SizeApproximationFlag... sizeApproximationFlags) { return getApproximateSizes(null, ranges, sizeApproximationFlags); } public static class CountAndSize { public final long count; public final long size; public CountAndSize(final long count, final long size) { this.count = count; this.size = size; } } /** * This method is similar to * {@link #getApproximateSizes(ColumnFamilyHandle, List, SizeApproximationFlag...)}, * except that it returns approximate number of records and size in memtables. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family * @param range the ranges over which to get the memtable stats * * @return the count and size for the range */ public CountAndSize getApproximateMemTableStats( /*@Nullable*/ final ColumnFamilyHandle columnFamilyHandle, final Range range) { final long[] result = getApproximateMemTableStats(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, range.start.getNativeHandle(), range.limit.getNativeHandle()); return new CountAndSize(result[0], result[1]); } /** * This method is similar to * {@link #getApproximateSizes(ColumnFamilyHandle, List, SizeApproximationFlag...)}, * except that it returns approximate number of records and size in memtables. * * @param range the ranges over which to get the memtable stats * * @return the count and size for the range */ public CountAndSize getApproximateMemTableStats( final Range range) { return getApproximateMemTableStats(null, range); } /** *

Range compaction of database.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

See also

*
    *
  • {@link #compactRange(boolean, int, int)}
  • *
  • {@link #compactRange(byte[], byte[])}
  • *
  • {@link #compactRange(byte[], byte[], boolean, int, int)}
  • *
* * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void compactRange() throws RocksDBException { compactRange(null); } /** *

Range compaction of column family.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

See also

*
    *
  • * {@link #compactRange(ColumnFamilyHandle, boolean, int, int)} *
  • *
  • * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} *
  • *
  • * {@link #compactRange(ColumnFamilyHandle, byte[], byte[], * boolean, int, int)} *
  • *
* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family. * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void compactRange( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) throws RocksDBException { compactRange(nativeHandle_, null, -1, null, -1, 0, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** *

Range compaction of database.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

See also

*
    *
  • {@link #compactRange()}
  • *
  • {@link #compactRange(boolean, int, int)}
  • *
  • {@link #compactRange(byte[], byte[], boolean, int, int)}
  • *
* * @param begin start of key range (included in range) * @param end end of key range (excluded from range) * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void compactRange(final byte[] begin, final byte[] end) throws RocksDBException { compactRange(null, begin, end); } /** *

Range compaction of column family.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

See also

*
    *
  • {@link #compactRange(ColumnFamilyHandle)}
  • *
  • * {@link #compactRange(ColumnFamilyHandle, boolean, int, int)} *
  • *
  • * {@link #compactRange(ColumnFamilyHandle, byte[], byte[], * boolean, int, int)} *
  • *
* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param begin start of key range (included in range) * @param end end of key range (excluded from range) * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void compactRange( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final byte[] begin, final byte[] end) throws RocksDBException { compactRange(nativeHandle_, begin, begin == null ? -1 : begin.length, end, end == null ? -1 : end.length, 0, columnFamilyHandle == null ? 0: columnFamilyHandle.nativeHandle_); } /** *

Range compaction of database.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

Compaction outputs should be placed in options.db_paths * [target_path_id]. Behavior is undefined if target_path_id is * out of range.

* *

See also

*
    *
  • {@link #compactRange()}
  • *
  • {@link #compactRange(byte[], byte[])}
  • *
  • {@link #compactRange(byte[], byte[], boolean, int, int)}
  • *
* * @deprecated Use {@link #compactRange(ColumnFamilyHandle, byte[], byte[], CompactRangeOptions)} instead * * @param changeLevel reduce level after compaction * @param targetLevel target level to compact to * @param targetPathId the target path id of output path * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ @Deprecated public void compactRange(final boolean changeLevel, final int targetLevel, final int targetPathId) throws RocksDBException { compactRange(null, changeLevel, targetLevel, targetPathId); } /** *

Range compaction of column family.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

Compaction outputs should be placed in options.db_paths * [target_path_id]. Behavior is undefined if target_path_id is * out of range.

* *

See also

*
    *
  • {@link #compactRange(ColumnFamilyHandle)}
  • *
  • * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} *
  • *
  • * {@link #compactRange(ColumnFamilyHandle, byte[], byte[], * boolean, int, int)} *
  • *
* * @deprecated Use {@link #compactRange(ColumnFamilyHandle, byte[], byte[], CompactRangeOptions)} instead * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param changeLevel reduce level after compaction * @param targetLevel target level to compact to * @param targetPathId the target path id of output path * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ @Deprecated public void compactRange( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final boolean changeLevel, final int targetLevel, final int targetPathId) throws RocksDBException { final CompactRangeOptions options = new CompactRangeOptions(); options.setChangeLevel(changeLevel); options.setTargetLevel(targetLevel); options.setTargetPathId(targetPathId); compactRange(nativeHandle_, null, -1, null, -1, options.nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** *

Range compaction of database.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

Compaction outputs should be placed in options.db_paths * [target_path_id]. Behavior is undefined if target_path_id is * out of range.

* *

See also

*
    *
  • {@link #compactRange()}
  • *
  • {@link #compactRange(boolean, int, int)}
  • *
  • {@link #compactRange(byte[], byte[])}
  • *
* * @deprecated Use {@link #compactRange(ColumnFamilyHandle, byte[], byte[], CompactRangeOptions)} * instead * * @param begin start of key range (included in range) * @param end end of key range (excluded from range) * @param changeLevel reduce level after compaction * @param targetLevel target level to compact to * @param targetPathId the target path id of output path * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ @Deprecated public void compactRange(final byte[] begin, final byte[] end, final boolean changeLevel, final int targetLevel, final int targetPathId) throws RocksDBException { compactRange(null, begin, end, changeLevel, targetLevel, targetPathId); } /** *

Range compaction of column family.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* *

Compaction outputs should be placed in options.db_paths * [target_path_id]. Behavior is undefined if target_path_id is * out of range.

* *

See also

*
    *
  • {@link #compactRange(ColumnFamilyHandle)}
  • *
  • * {@link #compactRange(ColumnFamilyHandle, boolean, int, int)} *
  • *
  • * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} *
  • *
* * @deprecated Use {@link #compactRange(ColumnFamilyHandle, byte[], byte[], CompactRangeOptions)} instead * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance. * @param begin start of key range (included in range) * @param end end of key range (excluded from range) * @param changeLevel reduce level after compaction * @param targetLevel target level to compact to * @param targetPathId the target path id of output path * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ @Deprecated public void compactRange( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final byte[] begin, final byte[] end, final boolean changeLevel, final int targetLevel, final int targetPathId) throws RocksDBException { final CompactRangeOptions options = new CompactRangeOptions(); options.setChangeLevel(changeLevel); options.setTargetLevel(targetLevel); options.setTargetPathId(targetPathId); compactRange(nativeHandle_, begin, begin == null ? -1 : begin.length, end, end == null ? -1 : end.length, options.nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** *

Range compaction of column family.

*

Note: After the database has been compacted, * all data will have been pushed down to the last level containing * any data.

* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance. * @param begin start of key range (included in range) * @param end end of key range (excluded from range) * @param compactRangeOptions options for the compaction * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void compactRange( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final byte[] begin, final byte[] end, final CompactRangeOptions compactRangeOptions) throws RocksDBException { compactRange(nativeHandle_, begin, begin == null ? -1 : begin.length, end, end == null ? -1 : end.length, compactRangeOptions.nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** * Change the options for the column family handle. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param mutableColumnFamilyOptions the options. * * @throws RocksDBException if an error occurs whilst setting the options */ public void setOptions( /* @Nullable */final ColumnFamilyHandle columnFamilyHandle, final MutableColumnFamilyOptions mutableColumnFamilyOptions) throws RocksDBException { setOptions(nativeHandle_, columnFamilyHandle.nativeHandle_, mutableColumnFamilyOptions.getKeys(), mutableColumnFamilyOptions.getValues()); } /** * Change the options for the default column family handle. * * @param mutableColumnFamilyOptions the options. * * @throws RocksDBException if an error occurs whilst setting the options */ public void setOptions( final MutableColumnFamilyOptions mutableColumnFamilyOptions) throws RocksDBException { setOptions(null, mutableColumnFamilyOptions); } /** * Set the options for the column family handle. * * @param mutableDBoptions the options. * * @throws RocksDBException if an error occurs whilst setting the options */ public void setDBOptions(final MutableDBOptions mutableDBoptions) throws RocksDBException { setDBOptions(nativeHandle_, mutableDBoptions.getKeys(), mutableDBoptions.getValues()); } /** * Takes a list of files specified by file names and * compacts them to the specified level. * * Note that the behavior is different from * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} * in that CompactFiles() performs the compaction job using the CURRENT * thread. * * @param compactionOptions compaction options * @param inputFileNames the name of the files to compact * @param outputLevel the level to which they should be compacted * @param outputPathId the id of the output path, or -1 * @param compactionJobInfo the compaction job info, this parameter * will be updated with the info from compacting the files, * can just be null if you don't need it. * * @return the list of compacted files * * @throws RocksDBException if an error occurs during compaction */ public List compactFiles( final CompactionOptions compactionOptions, final List inputFileNames, final int outputLevel, final int outputPathId, /* @Nullable */ final CompactionJobInfo compactionJobInfo) throws RocksDBException { return compactFiles(compactionOptions, null, inputFileNames, outputLevel, outputPathId, compactionJobInfo); } /** * Takes a list of files specified by file names and * compacts them to the specified level. * * Note that the behavior is different from * {@link #compactRange(ColumnFamilyHandle, byte[], byte[])} * in that CompactFiles() performs the compaction job using the CURRENT * thread. * * @param compactionOptions compaction options * @param columnFamilyHandle columnFamilyHandle, or null for the * default column family * @param inputFileNames the name of the files to compact * @param outputLevel the level to which they should be compacted * @param outputPathId the id of the output path, or -1 * @param compactionJobInfo the compaction job info, this parameter * will be updated with the info from compacting the files, * can just be null if you don't need it. * * @return the list of compacted files * * @throws RocksDBException if an error occurs during compaction */ public List compactFiles( final CompactionOptions compactionOptions, /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle, final List inputFileNames, final int outputLevel, final int outputPathId, /* @Nullable */ final CompactionJobInfo compactionJobInfo) throws RocksDBException { return Arrays.asList(compactFiles(nativeHandle_, compactionOptions.nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, inputFileNames.toArray(new String[0]), outputLevel, outputPathId, compactionJobInfo == null ? 0 : compactionJobInfo.nativeHandle_)); } /** * This function will cancel all currently running background processes. * * @param wait if true, wait for all background work to be cancelled before * returning. * */ public void cancelAllBackgroundWork(boolean wait) { cancelAllBackgroundWork(nativeHandle_, wait); } /** * This function will wait until all currently running background processes * finish. After it returns, no background process will be run until * {@link #continueBackgroundWork()} is called * * @throws RocksDBException if an error occurs when pausing background work */ public void pauseBackgroundWork() throws RocksDBException { pauseBackgroundWork(nativeHandle_); } /** * Resumes background work which was suspended by * previously calling {@link #pauseBackgroundWork()} * * @throws RocksDBException if an error occurs when resuming background work */ public void continueBackgroundWork() throws RocksDBException { continueBackgroundWork(nativeHandle_); } /** * Enable automatic compactions for the given column * families if they were previously disabled. * * The function will first set the * {@link ColumnFamilyOptions#disableAutoCompactions()} option for each * column family to false, after which it will schedule a flush/compaction. * * NOTE: Setting disableAutoCompactions to 'false' through * {@link #setOptions(ColumnFamilyHandle, MutableColumnFamilyOptions)} * does NOT schedule a flush/compaction afterwards, and only changes the * parameter itself within the column family option. * * @param columnFamilyHandles the column family handles * * @throws RocksDBException if an error occurs whilst enabling auto-compaction */ public void enableAutoCompaction( final List columnFamilyHandles) throws RocksDBException { enableAutoCompaction(nativeHandle_, toNativeHandleList(columnFamilyHandles)); } /** * Number of levels used for this DB. * * @return the number of levels */ public int numberLevels() { return numberLevels(null); } /** * Number of levels used for a column family in this DB. * * @param columnFamilyHandle the column family handle, or null * for the default column family * * @return the number of levels */ public int numberLevels(/* @Nullable */final ColumnFamilyHandle columnFamilyHandle) { return numberLevels(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** * Maximum level to which a new compacted memtable is pushed if it * does not create overlap. * * @return the maximum level */ public int maxMemCompactionLevel() { return maxMemCompactionLevel(null); } /** * Maximum level to which a new compacted memtable is pushed if it * does not create overlap. * * @param columnFamilyHandle the column family handle * * @return the maximum level */ public int maxMemCompactionLevel( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) { return maxMemCompactionLevel(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** * Number of files in level-0 that would stop writes. * * @return the number of files */ public int level0StopWriteTrigger() { return level0StopWriteTrigger(null); } /** * Number of files in level-0 that would stop writes. * * @param columnFamilyHandle the column family handle * * @return the number of files */ public int level0StopWriteTrigger( /* @Nullable */final ColumnFamilyHandle columnFamilyHandle) { return level0StopWriteTrigger(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** * Get DB name -- the exact same name that was provided as an argument to * as path to {@link #open(Options, String)}. * * @return the DB name */ public String getName() { return getName(nativeHandle_); } /** * Get the Env object from the DB * * @return the env */ public Env getEnv() { final long envHandle = getEnv(nativeHandle_); if (envHandle == Env.getDefault().nativeHandle_) { return Env.getDefault(); } else { final Env env = new RocksEnv(envHandle); env.disOwnNativeHandle(); // we do not own the Env! return env; } } /** *

Flush all memory table data.

* *

Note: it must be ensured that the FlushOptions instance * is not GC'ed before this method finishes. If the wait parameter is * set to false, flush processing is asynchronous.

* * @param flushOptions {@link org.rocksdb.FlushOptions} instance. * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void flush(final FlushOptions flushOptions) throws RocksDBException { flush(flushOptions, (List) null); } /** *

Flush all memory table data.

* *

Note: it must be ensured that the FlushOptions instance * is not GC'ed before this method finishes. If the wait parameter is * set to false, flush processing is asynchronous.

* * @param flushOptions {@link org.rocksdb.FlushOptions} instance. * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance. * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void flush(final FlushOptions flushOptions, /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) throws RocksDBException { flush(flushOptions, columnFamilyHandle == null ? null : Arrays.asList(columnFamilyHandle)); } /** * Flushes multiple column families. * * If atomic flush is not enabled, this is equivalent to calling * {@link #flush(FlushOptions, ColumnFamilyHandle)} multiple times. * * If atomic flush is enabled, this will flush all column families * specified up to the latest sequence number at the time when flush is * requested. * * @param flushOptions {@link org.rocksdb.FlushOptions} instance. * @param columnFamilyHandles column family handles. * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public void flush(final FlushOptions flushOptions, /* @Nullable */ final List columnFamilyHandles) throws RocksDBException { flush(nativeHandle_, flushOptions.nativeHandle_, toNativeHandleList(columnFamilyHandles)); } /** * Flush the WAL memory buffer to the file. If {@code sync} is true, * it calls {@link #syncWal()} afterwards. * * @param sync true to also fsync to disk. * * @throws RocksDBException if an error occurs whilst flushing */ public void flushWal(final boolean sync) throws RocksDBException { flushWal(nativeHandle_, sync); } /** * Sync the WAL. * * Note that {@link #write(WriteOptions, WriteBatch)} followed by * {@link #syncWal()} is not exactly the same as * {@link #write(WriteOptions, WriteBatch)} with * {@link WriteOptions#sync()} set to true; In the latter case the changes * won't be visible until the sync is done. * * Currently only works if {@link Options#allowMmapWrites()} is set to false. * * @throws RocksDBException if an error occurs whilst syncing */ public void syncWal() throws RocksDBException { syncWal(nativeHandle_); } /** *

The sequence number of the most recent transaction.

* * @return sequence number of the most * recent transaction. */ public long getLatestSequenceNumber() { return getLatestSequenceNumber(nativeHandle_); } /** * Instructs DB to preserve deletes with sequence numbers >= sequenceNumber. * * Has no effect if DBOptions#preserveDeletes() is set to false. * * This function assumes that user calls this function with monotonically * increasing seqnums (otherwise we can't guarantee that a particular delete * hasn't been already processed). * * @param sequenceNumber the minimum sequence number to preserve * * @return true if the value was successfully updated, * false if user attempted to call if with * sequenceNumber <= current value. */ public boolean setPreserveDeletesSequenceNumber(final long sequenceNumber) { return setPreserveDeletesSequenceNumber(nativeHandle_, sequenceNumber); } /** *

Prevent file deletions. Compactions will continue to occur, * but no obsolete files will be deleted. Calling this multiple * times have the same effect as calling it once.

* * @throws RocksDBException thrown if operation was not performed * successfully. */ public void disableFileDeletions() throws RocksDBException { disableFileDeletions(nativeHandle_); } /** *

Allow compactions to delete obsolete files. * If force == true, the call to EnableFileDeletions() * will guarantee that file deletions are enabled after * the call, even if DisableFileDeletions() was called * multiple times before.

* *

If force == false, EnableFileDeletions will only * enable file deletion after it's been called at least * as many times as DisableFileDeletions(), enabling * the two methods to be called by two threads * concurrently without synchronization * -- i.e., file deletions will be enabled only after both * threads call EnableFileDeletions()

* * @param force boolean value described above. * * @throws RocksDBException thrown if operation was not performed * successfully. */ public void enableFileDeletions(final boolean force) throws RocksDBException { enableFileDeletions(nativeHandle_, force); } public static class LiveFiles { /** * The valid size of the manifest file. The manifest file is an ever growing * file, but only the portion specified here is valid for this snapshot. */ public final long manifestFileSize; /** * The files are relative to the {@link #getName()} and are not * absolute paths. Despite being relative paths, the file names begin * with "/". */ public final List files; LiveFiles(final long manifestFileSize, final List files) { this.manifestFileSize = manifestFileSize; this.files = files; } } /** * Retrieve the list of all files in the database after flushing the memtable. * * See {@link #getLiveFiles(boolean)}. * * @return the live files * * @throws RocksDBException if an error occurs whilst retrieving the list * of live files */ public LiveFiles getLiveFiles() throws RocksDBException { return getLiveFiles(true); } /** * Retrieve the list of all files in the database. * * In case you have multiple column families, even if {@code flushMemtable} * is true, you still need to call {@link #getSortedWalFiles()} * after {@link #getLiveFiles(boolean)} to compensate for new data that * arrived to already-flushed column families while other column families * were flushing. * * NOTE: Calling {@link #getLiveFiles(boolean)} followed by * {@link #getSortedWalFiles()} can generate a lossless backup. * * @param flushMemtable set to true to flush before recoding the live * files. Setting to false is useful when we don't want to wait for flush * which may have to wait for compaction to complete taking an * indeterminate time. * * @return the live files * * @throws RocksDBException if an error occurs whilst retrieving the list * of live files */ public LiveFiles getLiveFiles(final boolean flushMemtable) throws RocksDBException { final String[] result = getLiveFiles(nativeHandle_, flushMemtable); if (result == null) { return null; } final String[] files = Arrays.copyOf(result, result.length - 1); final long manifestFileSize = Long.parseLong(result[result.length - 1]); return new LiveFiles(manifestFileSize, Arrays.asList(files)); } /** * Retrieve the sorted list of all wal files with earliest file first. * * @return the log files * * @throws RocksDBException if an error occurs whilst retrieving the list * of sorted WAL files */ public List getSortedWalFiles() throws RocksDBException { final LogFile[] logFiles = getSortedWalFiles(nativeHandle_); return Arrays.asList(logFiles); } /** *

Returns an iterator that is positioned at a write-batch containing * seq_number. If the sequence number is non existent, it returns an iterator * at the first available seq_no after the requested seq_no.

* *

Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to * use this api, else the WAL files will get * cleared aggressively and the iterator might keep getting invalid before * an update is read.

* * @param sequenceNumber sequence number offset * * @return {@link org.rocksdb.TransactionLogIterator} instance. * * @throws org.rocksdb.RocksDBException if iterator cannot be retrieved * from native-side. */ public TransactionLogIterator getUpdatesSince(final long sequenceNumber) throws RocksDBException { return new TransactionLogIterator( getUpdatesSince(nativeHandle_, sequenceNumber)); } /** * Delete the file name from the db directory and update the internal state to * reflect that. Supports deletion of sst and log files only. 'name' must be * path relative to the db directory. eg. 000001.sst, /archive/000003.log * * @param name the file name * * @throws RocksDBException if an error occurs whilst deleting the file */ public void deleteFile(final String name) throws RocksDBException { deleteFile(nativeHandle_, name); } /** * Gets a list of all table files metadata. * * @return table files metadata. */ public List getLiveFilesMetaData() { return Arrays.asList(getLiveFilesMetaData(nativeHandle_)); } /** * Obtains the meta data of the specified column family of the DB. * * @param columnFamilyHandle the column family * * @return the column family metadata */ public ColumnFamilyMetaData getColumnFamilyMetaData( /* @Nullable */ final ColumnFamilyHandle columnFamilyHandle) { return getColumnFamilyMetaData(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** * Obtains the meta data of the default column family of the DB. * * @return the column family metadata */ public ColumnFamilyMetaData GetColumnFamilyMetaData() { return getColumnFamilyMetaData(null); } /** * ingestExternalFile will load a list of external SST files (1) into the DB * We will try to find the lowest possible level that the file can fit in, and * ingest the file into this level (2). A file that have a key range that * overlap with the memtable key range will require us to Flush the memtable * first before ingesting the file. * * (1) External SST files can be created using {@link SstFileWriter} * (2) We will try to ingest the files to the lowest possible level * even if the file compression doesn't match the level compression * * @param filePathList The list of files to ingest * @param ingestExternalFileOptions the options for the ingestion * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void ingestExternalFile(final List filePathList, final IngestExternalFileOptions ingestExternalFileOptions) throws RocksDBException { ingestExternalFile(nativeHandle_, getDefaultColumnFamily().nativeHandle_, filePathList.toArray(new String[0]), filePathList.size(), ingestExternalFileOptions.nativeHandle_); } /** * ingestExternalFile will load a list of external SST files (1) into the DB * We will try to find the lowest possible level that the file can fit in, and * ingest the file into this level (2). A file that have a key range that * overlap with the memtable key range will require us to Flush the memtable * first before ingesting the file. * * (1) External SST files can be created using {@link SstFileWriter} * (2) We will try to ingest the files to the lowest possible level * even if the file compression doesn't match the level compression * * @param columnFamilyHandle The column family for the ingested files * @param filePathList The list of files to ingest * @param ingestExternalFileOptions the options for the ingestion * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void ingestExternalFile(final ColumnFamilyHandle columnFamilyHandle, final List filePathList, final IngestExternalFileOptions ingestExternalFileOptions) throws RocksDBException { ingestExternalFile(nativeHandle_, columnFamilyHandle.nativeHandle_, filePathList.toArray(new String[0]), filePathList.size(), ingestExternalFileOptions.nativeHandle_); } /** * Verify checksum * * @throws RocksDBException if the checksum is not valid */ public void verifyChecksum() throws RocksDBException { verifyChecksum(nativeHandle_); } /** * Gets the handle for the default column family * * @return The handle of the default column family */ public ColumnFamilyHandle getDefaultColumnFamily() { final ColumnFamilyHandle cfHandle = new ColumnFamilyHandle(this, getDefaultColumnFamily(nativeHandle_)); cfHandle.disOwnNativeHandle(); return cfHandle; } /** * Get the properties of all tables. * * @param columnFamilyHandle the column family handle, or null for the default * column family. * * @return the properties * * @throws RocksDBException if an error occurs whilst getting the properties */ public Map getPropertiesOfAllTables( /* @Nullable */final ColumnFamilyHandle columnFamilyHandle) throws RocksDBException { return getPropertiesOfAllTables(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } /** * Get the properties of all tables in the default column family. * * @return the properties * * @throws RocksDBException if an error occurs whilst getting the properties */ public Map getPropertiesOfAllTables() throws RocksDBException { return getPropertiesOfAllTables(null); } /** * Get the properties of tables in range. * * @param columnFamilyHandle the column family handle, or null for the default * column family. * @param ranges the ranges over which to get the table properties * * @return the properties * * @throws RocksDBException if an error occurs whilst getting the properties */ public Map getPropertiesOfTablesInRange( /* @Nullable */final ColumnFamilyHandle columnFamilyHandle, final List ranges) throws RocksDBException { return getPropertiesOfTablesInRange(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, toRangeSliceHandles(ranges)); } /** * Get the properties of tables in range for the default column family. * * @param ranges the ranges over which to get the table properties * * @return the properties * * @throws RocksDBException if an error occurs whilst getting the properties */ public Map getPropertiesOfTablesInRange( final List ranges) throws RocksDBException { return getPropertiesOfTablesInRange(null, ranges); } /** * Suggest the range to compact. * * @param columnFamilyHandle the column family handle, or null for the default * column family. * * @return the suggested range. * * @throws RocksDBException if an error occurs whilst suggesting the range */ public Range suggestCompactRange( /* @Nullable */final ColumnFamilyHandle columnFamilyHandle) throws RocksDBException { final long[] rangeSliceHandles = suggestCompactRange(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); return new Range(new Slice(rangeSliceHandles[0]), new Slice(rangeSliceHandles[1])); } /** * Suggest the range to compact for the default column family. * * @return the suggested range. * * @throws RocksDBException if an error occurs whilst suggesting the range */ public Range suggestCompactRange() throws RocksDBException { return suggestCompactRange(null); } /** * Promote L0. * * @param columnFamilyHandle the column family handle, * or null for the default column family. * @param targetLevel the target level for L0 * * @throws RocksDBException if an error occurs whilst promoting L0 */ public void promoteL0( /* @Nullable */final ColumnFamilyHandle columnFamilyHandle, final int targetLevel) throws RocksDBException { promoteL0(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, targetLevel); } /** * Promote L0 for the default column family. * * @param targetLevel the target level for L0 * * @throws RocksDBException if an error occurs whilst promoting L0 */ public void promoteL0(final int targetLevel) throws RocksDBException { promoteL0(null, targetLevel); } /** * Trace DB operations. * * Use {@link #endTrace()} to stop tracing. * * @param traceOptions the options * @param traceWriter the trace writer * * @throws RocksDBException if an error occurs whilst starting the trace */ public void startTrace(final TraceOptions traceOptions, final AbstractTraceWriter traceWriter) throws RocksDBException { startTrace(nativeHandle_, traceOptions.getMaxTraceFileSize(), traceWriter.nativeHandle_); /** * NOTE: {@link #startTrace(long, long, long) transfers the ownership * from Java to C++, so we must disown the native handle here. */ traceWriter.disOwnNativeHandle(); } /** * Stop tracing DB operations. * * See {@link #startTrace(TraceOptions, AbstractTraceWriter)} * * @throws RocksDBException if an error occurs whilst ending the trace */ public void endTrace() throws RocksDBException { endTrace(nativeHandle_); } /** * Delete files in multiple ranges at once. * Delete files in a lot of ranges one at a time can be slow, use this API for * better performance in that case. * * @param columnFamily - The column family for operation (null for default) * @param includeEnd - Whether ranges should include end * @param ranges - pairs of ranges (from1, to1, from2, to2, ...) * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void deleteFilesInRanges(final ColumnFamilyHandle columnFamily, final List ranges, final boolean includeEnd) throws RocksDBException { if (ranges.size() == 0) { return; } if ((ranges.size() % 2) != 0) { throw new IllegalArgumentException("Ranges size needs to be multiple of 2 " + "(from1, to1, from2, to2, ...), but is " + ranges.size()); } final byte[][] rangesArray = ranges.toArray(new byte[ranges.size()][]); deleteFilesInRanges(nativeHandle_, columnFamily == null ? 0 : columnFamily.nativeHandle_, rangesArray, includeEnd); } /** * Static method to destroy the contents of the specified database. * Be very careful using this method. * * @param path the path to the Rocksdb database. * @param options {@link org.rocksdb.Options} instance. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public static void destroyDB(final String path, final Options options) throws RocksDBException { destroyDB(path, options.nativeHandle_); } private /* @Nullable */ long[] toNativeHandleList( /* @Nullable */ final List objectList) { if (objectList == null) { return null; } final int len = objectList.size(); final long[] handleList = new long[len]; for (int i = 0; i < len; i++) { handleList[i] = objectList.get(i).nativeHandle_; } return handleList; } private static long[] toRangeSliceHandles(final List ranges) { final long rangeSliceHandles[] = new long [ranges.size() * 2]; for (int i = 0, j = 0; i < ranges.size(); i++) { final Range range = ranges.get(i); rangeSliceHandles[j++] = range.start.getNativeHandle(); rangeSliceHandles[j++] = range.limit.getNativeHandle(); } return rangeSliceHandles; } protected void storeOptionsInstance(DBOptionsInterface options) { options_ = options; } private static void checkBounds(int offset, int len, int size) { if ((offset | len | (offset + len) | (size - (offset + len))) < 0) { throw new IndexOutOfBoundsException(String.format("offset(%d), len(%d), size(%d)", offset, len, size)); } } private static int computeCapacityHint(final int estimatedNumberOfItems) { // Default load factor for HashMap is 0.75, so N * 1.5 will be at the load // limit. We add +1 for a buffer. return (int)Math.ceil(estimatedNumberOfItems * 1.5 + 1.0); } // native methods private native static long open(final long optionsHandle, final String path) throws RocksDBException; /** * @param optionsHandle Native handle pointing to an Options object * @param path The directory path for the database files * @param columnFamilyNames An array of column family names * @param columnFamilyOptions An array of native handles pointing to * ColumnFamilyOptions objects * * @return An array of native handles, [0] is the handle of the RocksDB object * [1..1+n] are handles of the ColumnFamilyReferences * * @throws RocksDBException thrown if the database could not be opened */ private native static long[] open(final long optionsHandle, final String path, final byte[][] columnFamilyNames, final long[] columnFamilyOptions) throws RocksDBException; private native static long openROnly(final long optionsHandle, final String path) throws RocksDBException; /** * @param optionsHandle Native handle pointing to an Options object * @param path The directory path for the database files * @param columnFamilyNames An array of column family names * @param columnFamilyOptions An array of native handles pointing to * ColumnFamilyOptions objects * * @return An array of native handles, [0] is the handle of the RocksDB object * [1..1+n] are handles of the ColumnFamilyReferences * * @throws RocksDBException thrown if the database could not be opened */ private native static long[] openROnly(final long optionsHandle, final String path, final byte[][] columnFamilyNames, final long[] columnFamilyOptions ) throws RocksDBException; @Override protected native void disposeInternal(final long handle); private native static void closeDatabase(final long handle) throws RocksDBException; private native static byte[][] listColumnFamilies(final long optionsHandle, final String path) throws RocksDBException; private native long createColumnFamily(final long handle, final byte[] columnFamilyName, final int columnFamilyNamelen, final long columnFamilyOptions) throws RocksDBException; private native long[] createColumnFamilies(final long handle, final long columnFamilyOptionsHandle, final byte[][] columnFamilyNames) throws RocksDBException; private native long[] createColumnFamilies(final long handle, final long columnFamilyOptionsHandles[], final byte[][] columnFamilyNames) throws RocksDBException; private native void dropColumnFamily( final long handle, final long cfHandle) throws RocksDBException; private native void dropColumnFamilies(final long handle, final long[] cfHandles) throws RocksDBException; //TODO(AR) best way to express DestroyColumnFamilyHandle? ...maybe in ColumnFamilyHandle? private native void put(final long handle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, int valueLength) throws RocksDBException; private native void put(final long handle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; private native void put(final long handle, final long writeOptHandle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; private native void put(final long handle, final long writeOptHandle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; private native void delete(final long handle, final byte[] key, final int keyOffset, final int keyLength) throws RocksDBException; private native void delete(final long handle, final byte[] key, final int keyOffset, final int keyLength, final long cfHandle) throws RocksDBException; private native void delete(final long handle, final long writeOptHandle, final byte[] key, final int keyOffset, final int keyLength) throws RocksDBException; private native void delete(final long handle, final long writeOptHandle, final byte[] key, final int keyOffset, final int keyLength, final long cfHandle) throws RocksDBException; private native void singleDelete( final long handle, final byte[] key, final int keyLen) throws RocksDBException; private native void singleDelete( final long handle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; private native void singleDelete( final long handle, final long writeOptHandle, final byte[] key, final int keyLen) throws RocksDBException; private native void singleDelete( final long handle, final long writeOptHandle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; private native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, final int endKeyOffset, final int endKeyLength) throws RocksDBException; private native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, final int endKeyOffset, final int endKeyLength, final long cfHandle) throws RocksDBException; private native void deleteRange(final long handle, final long writeOptHandle, final byte[] beginKey, final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, final int endKeyOffset, final int endKeyLength) throws RocksDBException; private native void deleteRange( final long handle, final long writeOptHandle, final byte[] beginKey, final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, final int endKeyOffset, final int endKeyLength, final long cfHandle) throws RocksDBException; private native void merge(final long handle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; private native void merge(final long handle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; private native void merge(final long handle, final long writeOptHandle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; private native void merge(final long handle, final long writeOptHandle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; private native void write0(final long handle, final long writeOptHandle, final long wbHandle) throws RocksDBException; private native void write1(final long handle, final long writeOptHandle, final long wbwiHandle) throws RocksDBException; private native int get(final long handle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; private native int get(final long handle, final byte[] key, final int keyOffset, final int keyLength, byte[] value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; private native int get(final long handle, final long readOptHandle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; private native int get(final long handle, final long readOptHandle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; private native byte[] get(final long handle, byte[] key, final int keyOffset, final int keyLength) throws RocksDBException; private native byte[] get(final long handle, final byte[] key, final int keyOffset, final int keyLength, final long cfHandle) throws RocksDBException; private native byte[] get(final long handle, final long readOptHandle, final byte[] key, final int keyOffset, final int keyLength) throws RocksDBException; private native byte[] get(final long handle, final long readOptHandle, final byte[] key, final int keyOffset, final int keyLength, final long cfHandle) throws RocksDBException; private native byte[][] multiGet(final long dbHandle, final byte[][] keys, final int[] keyOffsets, final int[] keyLengths); private native byte[][] multiGet(final long dbHandle, final byte[][] keys, final int[] keyOffsets, final int[] keyLengths, final long[] columnFamilyHandles); private native byte[][] multiGet(final long dbHandle, final long rOptHandle, final byte[][] keys, final int[] keyOffsets, final int[] keyLengths); private native byte[][] multiGet(final long dbHandle, final long rOptHandle, final byte[][] keys, final int[] keyOffsets, final int[] keyLengths, final long[] columnFamilyHandles); private native boolean keyMayExist( final long handle, final long cfHandle, final long readOptHandle, final byte[] key, final int keyOffset, final int keyLength); private native byte[][] keyMayExistFoundValue( final long handle, final long cfHandle, final long readOptHandle, final byte[] key, final int keyOffset, final int keyLength); private native void putDirect(long handle, long writeOptHandle, ByteBuffer key, int keyOffset, int keyLength, ByteBuffer value, int valueOffset, int valueLength, long cfHandle) throws RocksDBException; private native long iterator(final long handle); private native long iterator(final long handle, final long readOptHandle); private native long iteratorCF(final long handle, final long cfHandle); private native long iteratorCF(final long handle, final long cfHandle, final long readOptHandle); private native long[] iterators(final long handle, final long[] columnFamilyHandles, final long readOptHandle) throws RocksDBException; private native long getSnapshot(final long nativeHandle); private native void releaseSnapshot( final long nativeHandle, final long snapshotHandle); private native String getProperty(final long nativeHandle, final long cfHandle, final String property, final int propertyLength) throws RocksDBException; private native Map getMapProperty(final long nativeHandle, final long cfHandle, final String property, final int propertyLength) throws RocksDBException; private native int getDirect(long handle, long readOptHandle, ByteBuffer key, int keyOffset, int keyLength, ByteBuffer value, int valueOffset, int valueLength, long cfHandle) throws RocksDBException; private native void deleteDirect(long handle, long optHandle, ByteBuffer key, int keyOffset, int keyLength, long cfHandle) throws RocksDBException; private native long getLongProperty(final long nativeHandle, final long cfHandle, final String property, final int propertyLength) throws RocksDBException; private native void resetStats(final long nativeHandle) throws RocksDBException; private native long getAggregatedLongProperty(final long nativeHandle, final String property, int propertyLength) throws RocksDBException; private native long[] getApproximateSizes(final long nativeHandle, final long columnFamilyHandle, final long[] rangeSliceHandles, final byte includeFlags); private final native long[] getApproximateMemTableStats( final long nativeHandle, final long columnFamilyHandle, final long rangeStartSliceHandle, final long rangeLimitSliceHandle); private native void compactRange(final long handle, /* @Nullable */ final byte[] begin, final int beginLen, /* @Nullable */ final byte[] end, final int endLen, final long compactRangeOptHandle, final long cfHandle) throws RocksDBException; private native void setOptions(final long handle, final long cfHandle, final String[] keys, final String[] values) throws RocksDBException; private native void setDBOptions(final long handle, final String[] keys, final String[] values) throws RocksDBException; private native String[] compactFiles(final long handle, final long compactionOptionsHandle, final long columnFamilyHandle, final String[] inputFileNames, final int outputLevel, final int outputPathId, final long compactionJobInfoHandle) throws RocksDBException; private native void cancelAllBackgroundWork(final long handle, final boolean wait); private native void pauseBackgroundWork(final long handle) throws RocksDBException; private native void continueBackgroundWork(final long handle) throws RocksDBException; private native void enableAutoCompaction(final long handle, final long[] columnFamilyHandles) throws RocksDBException; private native int numberLevels(final long handle, final long columnFamilyHandle); private native int maxMemCompactionLevel(final long handle, final long columnFamilyHandle); private native int level0StopWriteTrigger(final long handle, final long columnFamilyHandle); private native String getName(final long handle); private native long getEnv(final long handle); private native void flush(final long handle, final long flushOptHandle, /* @Nullable */ final long[] cfHandles) throws RocksDBException; private native void flushWal(final long handle, final boolean sync) throws RocksDBException; private native void syncWal(final long handle) throws RocksDBException; private native long getLatestSequenceNumber(final long handle); private native boolean setPreserveDeletesSequenceNumber(final long handle, final long sequenceNumber); private native void disableFileDeletions(long handle) throws RocksDBException; private native void enableFileDeletions(long handle, boolean force) throws RocksDBException; private native String[] getLiveFiles(final long handle, final boolean flushMemtable) throws RocksDBException; private native LogFile[] getSortedWalFiles(final long handle) throws RocksDBException; private native long getUpdatesSince(final long handle, final long sequenceNumber) throws RocksDBException; private native void deleteFile(final long handle, final String name) throws RocksDBException; private native LiveFileMetaData[] getLiveFilesMetaData(final long handle); private native ColumnFamilyMetaData getColumnFamilyMetaData( final long handle, final long columnFamilyHandle); private native void ingestExternalFile(final long handle, final long columnFamilyHandle, final String[] filePathList, final int filePathListLen, final long ingestExternalFileOptionsHandle) throws RocksDBException; private native void verifyChecksum(final long handle) throws RocksDBException; private native long getDefaultColumnFamily(final long handle); private native Map getPropertiesOfAllTables( final long handle, final long columnFamilyHandle) throws RocksDBException; private native Map getPropertiesOfTablesInRange( final long handle, final long columnFamilyHandle, final long[] rangeSliceHandles); private native long[] suggestCompactRange(final long handle, final long columnFamilyHandle) throws RocksDBException; private native void promoteL0(final long handle, final long columnFamilyHandle, final int tragetLevel) throws RocksDBException; private native void startTrace(final long handle, final long maxTraceFileSize, final long traceWriterHandle) throws RocksDBException; private native void endTrace(final long handle) throws RocksDBException; private native void deleteFilesInRanges(long handle, long cfHandle, final byte[][] ranges, boolean include_end) throws RocksDBException; private native static void destroyDB(final String path, final long optionsHandle) throws RocksDBException; protected DBOptionsInterface options_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksDBException.java000066400000000000000000000023171370372246700250540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * A RocksDBException encapsulates the error of an operation. This exception * type is used to describe an internal error from the c++ rocksdb library. */ public class RocksDBException extends Exception { /* @Nullable */ private final Status status; /** * The private construct used by a set of public static factory method. * * @param msg the specified error message. */ public RocksDBException(final String msg) { this(msg, null); } public RocksDBException(final String msg, final Status status) { super(msg); this.status = status; } public RocksDBException(final Status status) { super(status.getState() != null ? status.getState() : status.getCodeString()); this.status = status; } /** * Get the status returned from RocksDB * * @return The status reported by RocksDB, or null if no status is available */ public Status getStatus() { return status; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksEnv.java000066400000000000000000000021331370372246700234340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** *

A RocksEnv is an interface used by the rocksdb implementation to access * operating system functionality like the filesystem etc.

* *

All Env implementations are safe for concurrent access from * multiple threads without any external synchronization.

*/ public class RocksEnv extends Env { /** *

Package-private constructor that uses the specified native handle * to construct a RocksEnv.

* *

Note that the ownership of the input handle * belongs to the caller, and the newly created RocksEnv will not take * the ownership of the input handle. As a result, calling * {@code dispose()} of the created RocksEnv will be no-op.

*/ RocksEnv(final long handle) { super(handle); } @Override protected native final void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksIterator.java000066400000000000000000000112111370372246700244720ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** *

An iterator that yields a sequence of key/value pairs from a source. * Multiple implementations are provided by this library. * In particular, iterators are provided * to access the contents of a Table or a DB.

* *

Multiple threads can invoke const methods on an RocksIterator without * external synchronization, but if any of the threads may call a * non-const method, all threads accessing the same RocksIterator must use * external synchronization.

* * @see org.rocksdb.RocksObject */ public class RocksIterator extends AbstractRocksIterator { protected RocksIterator(RocksDB rocksDB, long nativeHandle) { super(rocksDB, nativeHandle); } /** *

Return the key for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: {@link #isValid()}

* * @return key for the current entry. */ public byte[] key() { assert(isOwningHandle()); return key0(nativeHandle_); } /** *

Return the key for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: {@link #isValid()}

* * @param key the out-value to receive the retrieved key. * It is using position and limit. Limit is set according to key size. * Supports direct buffer only. * @return The size of the actual key. If the return key is greater than the * length of {@code key}, then it indicates that the size of the * input buffer {@code key} is insufficient and partial result will * be returned. */ public int key(ByteBuffer key) { assert (isOwningHandle() && key.isDirect()); int result = keyDirect0(nativeHandle_, key, key.position(), key.remaining()); key.limit(Math.min(key.position() + result, key.limit())); return result; } /** *

Return the value for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: !AtEnd() && !AtStart()

* @return value for the current entry. */ public byte[] value() { assert(isOwningHandle()); return value0(nativeHandle_); } /** *

Return the value for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: {@link #isValid()}

* * @param value the out-value to receive the retrieved value. * It is using position and limit. Limit is set according to value size. * Supports direct buffer only. * @return The size of the actual value. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. */ public int value(ByteBuffer value) { assert (isOwningHandle() && value.isDirect()); int result = valueDirect0(nativeHandle_, value, value.position(), value.remaining()); value.limit(Math.min(value.position() + result, value.limit())); return result; } @Override protected final native void disposeInternal(final long handle); @Override final native boolean isValid0(long handle); @Override final native void seekToFirst0(long handle); @Override final native void seekToLast0(long handle); @Override final native void next0(long handle); @Override final native void prev0(long handle); @Override final native void refresh0(long handle); @Override final native void seek0(long handle, byte[] target, int targetLen); @Override final native void seekForPrev0(long handle, byte[] target, int targetLen); @Override final native void seekDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); @Override final native void seekForPrevDirect0( long handle, ByteBuffer target, int targetOffset, int targetLen); @Override final native void status0(long handle) throws RocksDBException; private native byte[] key0(long handle); private native byte[] value0(long handle); private native int keyDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); private native int valueDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksIteratorInterface.java000066400000000000000000000101561370372246700263220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** *

Defines the interface for an Iterator which provides * access to data one entry at a time. Multiple implementations * are provided by this library. In particular, iterators are provided * to access the contents of a DB and Write Batch.

* *

Multiple threads can invoke const methods on an RocksIterator without * external synchronization, but if any of the threads may call a * non-const method, all threads accessing the same RocksIterator must use * external synchronization.

* * @see org.rocksdb.RocksObject */ public interface RocksIteratorInterface { /** *

An iterator is either positioned at an entry, or * not valid. This method returns true if the iterator is valid.

* * @return true if iterator is valid. */ boolean isValid(); /** *

Position at the first entry in the source. The iterator is Valid() * after this call if the source is not empty.

*/ void seekToFirst(); /** *

Position at the last entry in the source. The iterator is * valid after this call if the source is not empty.

*/ void seekToLast(); /** *

Position at the first entry in the source whose key is at or * past target.

* *

The iterator is valid after this call if the source contains * a key that comes at or past target.

* * @param target byte array describing a key or a * key prefix to seek for. */ void seek(byte[] target); /** *

Position at the first entry in the source whose key is that or * before target.

* *

The iterator is valid after this call if the source contains * a key that comes at or before target.

* * @param target byte array describing a key or a * key prefix to seek for. */ void seekForPrev(byte[] target); /** *

Position at the first entry in the source whose key is that or * past target.

* *

The iterator is valid after this call if the source contains * a key that comes at or past target.

* * @param target byte array describing a key or a * key prefix to seek for. Supports direct buffer only. */ void seek(ByteBuffer target); /** *

Position at the last key that is less than or equal to the target key.

* *

The iterator is valid after this call if the source contains * a key that comes at or past target.

* * @param target byte array describing a key or a * key prefix to seek for. Supports direct buffer only. */ void seekForPrev(ByteBuffer target); /** *

Moves to the next entry in the source. After this call, Valid() is * true if the iterator was not positioned at the last entry in the source.

* *

REQUIRES: {@link #isValid()}

*/ void next(); /** *

Moves to the previous entry in the source. After this call, Valid() is * true if the iterator was not positioned at the first entry in source.

* *

REQUIRES: {@link #isValid()}

*/ void prev(); /** *

If an error has occurred, return it. Else return an ok status. * If non-blocking IO is requested and this operation cannot be * satisfied without doing some IO, then this returns Status::Incomplete().

* * @throws RocksDBException thrown if error happens in underlying * native library. */ void status() throws RocksDBException; /** *

If supported, renew the iterator to represent the latest state. The iterator will be * invalidated after the call. Not supported if {@link ReadOptions#setSnapshot(Snapshot)} was * specified when creating the iterator.

* * @throws RocksDBException thrown if the operation is not supported or an error happens in the * underlying native library */ void refresh() throws RocksDBException; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksMemEnv.java000066400000000000000000000021071370372246700240740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Memory environment. */ //TODO(AR) rename to MemEnv public class RocksMemEnv extends Env { /** *

Creates a new environment that stores its data * in memory and delegates all non-file-storage tasks to * {@code baseEnv}.

* *

The caller must delete the result when it is * no longer needed.

* * @param baseEnv the base environment, * must remain live while the result is in use. */ public RocksMemEnv(final Env baseEnv) { super(createMemEnv(baseEnv.nativeHandle_)); } /** * @deprecated Use {@link #RocksMemEnv(Env)}. */ @Deprecated public RocksMemEnv() { this(Env.getDefault()); } private static native long createMemEnv(final long baseEnvHandle); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksMutableObject.java000066400000000000000000000051061370372246700254270ustar00rootroot00000000000000// Copyright (c) 2016, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * RocksMutableObject is an implementation of {@link AbstractNativeReference} * whose reference to the underlying native C++ object can change. * *

The use of {@code RocksMutableObject} should be kept to a minimum, as it * has synchronization overheads and introduces complexity. Instead it is * recommended to use {@link RocksObject} where possible.

*/ public abstract class RocksMutableObject extends AbstractNativeReference { /** * An mutable reference to the value of the C++ pointer pointing to some * underlying native RocksDB C++ object. */ private long nativeHandle_; private boolean owningHandle_; protected RocksMutableObject() { } protected RocksMutableObject(final long nativeHandle) { this.nativeHandle_ = nativeHandle; this.owningHandle_ = true; } /** * Closes the existing handle, and changes the handle to the new handle * * @param newNativeHandle The C++ pointer to the new native object * @param owningNativeHandle true if we own the new native object */ public synchronized void resetNativeHandle(final long newNativeHandle, final boolean owningNativeHandle) { close(); setNativeHandle(newNativeHandle, owningNativeHandle); } /** * Sets the handle (C++ pointer) of the underlying C++ native object * * @param nativeHandle The C++ pointer to the native object * @param owningNativeHandle true if we own the native object */ public synchronized void setNativeHandle(final long nativeHandle, final boolean owningNativeHandle) { this.nativeHandle_ = nativeHandle; this.owningHandle_ = owningNativeHandle; } @Override protected synchronized boolean isOwningHandle() { return this.owningHandle_; } /** * Gets the value of the C++ pointer pointing to the underlying * native C++ object * * @return the pointer value for the native object */ protected synchronized long getNativeHandle() { assert (this.nativeHandle_ != 0); return this.nativeHandle_; } @Override public synchronized final void close() { if (isOwningHandle()) { disposeInternal(); this.owningHandle_ = false; this.nativeHandle_ = 0; } } protected void disposeInternal() { disposeInternal(nativeHandle_); } protected abstract void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/RocksObject.java000066400000000000000000000024271370372246700241200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * RocksObject is an implementation of {@link AbstractNativeReference} which * has an immutable and therefore thread-safe reference to the underlying * native C++ RocksDB object. *

* RocksObject is the base-class of almost all RocksDB classes that have a * pointer to some underlying native C++ {@code rocksdb} object.

*

* The use of {@code RocksObject} should always be preferred over * {@link RocksMutableObject}.

*/ public abstract class RocksObject extends AbstractImmutableNativeReference { /** * An immutable reference to the value of the C++ pointer pointing to some * underlying native RocksDB C++ object. */ protected final long nativeHandle_; protected RocksObject(final long nativeHandle) { super(true); this.nativeHandle_ = nativeHandle; } /** * Deletes underlying C++ object pointer. */ @Override protected void disposeInternal() { disposeInternal(nativeHandle_); } protected abstract void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SanityLevel.java000066400000000000000000000017371370372246700241520ustar00rootroot00000000000000package org.rocksdb; public enum SanityLevel { NONE((byte) 0x0), LOOSELY_COMPATIBLE((byte) 0x1), EXACT_MATCH((byte) 0xFF); private final byte value; SanityLevel(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value. */ // TODO(AR) should be made package-private public byte getValue() { return value; } /** * Get the SanityLevel from the internal representation value. * * @param value the internal representation value. * * @return the SanityLevel * * @throws IllegalArgumentException if the value does not match a * SanityLevel */ static SanityLevel fromValue(final byte value) throws IllegalArgumentException { for (final SanityLevel level : SanityLevel.values()) { if (level.value == value) { return level; } } throw new IllegalArgumentException("Unknown value for SanityLevel: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SizeApproximationFlag.java000066400000000000000000000012661370372246700261670ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import java.util.List; /** * Flags for * {@link RocksDB#getApproximateSizes(ColumnFamilyHandle, List, SizeApproximationFlag...)} * that specify whether memtable stats should be included, * or file stats approximation or both. */ public enum SizeApproximationFlag { NONE((byte)0x0), INCLUDE_MEMTABLES((byte)0x1), INCLUDE_FILES((byte)0x2); private final byte value; SizeApproximationFlag(final byte value) { this.value = value; } /** * Get the internal byte representation. * * @return the internal representation. */ byte getValue() { return value; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java000066400000000000000000000024331370372246700262040ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * The config for skip-list memtable representation. */ public class SkipListMemTableConfig extends MemTableConfig { public static final long DEFAULT_LOOKAHEAD = 0; /** * SkipListMemTableConfig constructor */ public SkipListMemTableConfig() { lookahead_ = DEFAULT_LOOKAHEAD; } /** * Sets lookahead for SkipList * * @param lookahead If non-zero, each iterator's seek operation * will start the search from the previously visited record * (doing at most 'lookahead' steps). This is an * optimization for the access pattern including many * seeks with consecutive keys. * @return the current instance of SkipListMemTableConfig */ public SkipListMemTableConfig setLookahead(final long lookahead) { lookahead_ = lookahead; return this; } /** * Returns the currently set lookahead value. * * @return lookahead value */ public long lookahead() { return lookahead_; } @Override protected long newMemTableFactoryHandle() { return newMemTableFactoryHandle0(lookahead_); } private native long newMemTableFactoryHandle0(long lookahead) throws IllegalArgumentException; private long lookahead_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Slice.java000066400000000000000000000102231370372246700227400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** *

Base class for slices which will receive * byte[] based access to the underlying data.

* *

byte[] backed slices typically perform better with * small keys and values. When using larger keys and * values consider using {@link org.rocksdb.DirectSlice}

*/ public class Slice extends AbstractSlice { /** * Indicates whether we have to free the memory pointed to by the Slice */ private volatile boolean cleared; private volatile long internalBufferOffset = 0; /** *

Called from JNI to construct a new Java Slice * without an underlying C++ object set * at creation time.

* *

Note: You should be aware that * {@see org.rocksdb.RocksObject#disOwnNativeHandle()} is intentionally * called from the default Slice constructor, and that it is marked as * private. This is so that developers cannot construct their own default * Slice objects (at present). As developers cannot construct their own * Slice objects through this, they are not creating underlying C++ Slice * objects, and so there is nothing to free (dispose) from Java.

*/ @SuppressWarnings("unused") private Slice() { super(); } /** *

Package-private Slice constructor which is used to construct * Slice instances from C++ side. As the reference to this * object is also managed from C++ side the handle will be disowned.

* * @param nativeHandle address of native instance. */ Slice(final long nativeHandle) { this(nativeHandle, false); } /** *

Package-private Slice constructor which is used to construct * Slice instances using a handle.

* * @param nativeHandle address of native instance. * @param owningNativeHandle true if the Java side owns the memory pointed to * by this reference, false if ownership belongs to the C++ side */ Slice(final long nativeHandle, final boolean owningNativeHandle) { super(); setNativeHandle(nativeHandle, owningNativeHandle); } /** *

Constructs a slice where the data is taken from * a String.

* * @param str String value. */ public Slice(final String str) { super(createNewSliceFromString(str)); } /** *

Constructs a slice where the data is a copy of * the byte array from a specific offset.

* * @param data byte array. * @param offset offset within the byte array. */ public Slice(final byte[] data, final int offset) { super(createNewSlice0(data, offset)); } /** *

Constructs a slice where the data is a copy of * the byte array.

* * @param data byte array. */ public Slice(final byte[] data) { super(createNewSlice1(data)); } @Override public void clear() { clear0(getNativeHandle(), !cleared, internalBufferOffset); cleared = true; } @Override public void removePrefix(final int n) { removePrefix0(getNativeHandle(), n); this.internalBufferOffset += n; } /** *

Deletes underlying C++ slice pointer * and any buffered data.

* *

* Note that this function should be called only after all * RocksDB instances referencing the slice are closed. * Otherwise an undefined behavior will occur.

*/ @Override protected void disposeInternal() { final long nativeHandle = getNativeHandle(); if(!cleared) { disposeInternalBuf(nativeHandle, internalBufferOffset); } super.disposeInternal(nativeHandle); } @Override protected final native byte[] data0(long handle); private native static long createNewSlice0(final byte[] data, final int length); private native static long createNewSlice1(final byte[] data); private native void clear0(long handle, boolean internalBuffer, long internalBufferOffset); private native void removePrefix0(long handle, int length); private native void disposeInternalBuf(final long handle, long internalBufferOffset); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Snapshot.java000066400000000000000000000020501370372246700234770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Snapshot of database */ public class Snapshot extends RocksObject { Snapshot(final long nativeHandle) { super(nativeHandle); // The pointer to the snapshot is always released // by the database instance. disOwnNativeHandle(); } /** * Return the associated sequence number; * * @return the associated sequence number of * this snapshot. */ public long getSequenceNumber() { return getSequenceNumber(nativeHandle_); } @Override protected final void disposeInternal(final long handle) { /** * Nothing to release, we never own the pointer for a * Snapshot. The pointer * to the snapshot is released by the database * instance. */ } private native long getSequenceNumber(long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SstFileManager.java000066400000000000000000000230371370372246700245540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Map; /** * SstFileManager is used to track SST files in the DB and control their * deletion rate. * * All SstFileManager public functions are thread-safe. * * SstFileManager is not extensible. */ //@ThreadSafe public final class SstFileManager extends RocksObject { public static final long RATE_BYTES_PER_SEC_DEFAULT = 0; public static final boolean DELETE_EXISTING_TRASH_DEFAULT = true; public static final double MAX_TRASH_DB_RATION_DEFAULT = 0.25; public static final long BYTES_MAX_DELETE_CHUNK_DEFAULT = 64 * 1024 * 1024; /** * Create a new SstFileManager that can be shared among multiple RocksDB * instances to track SST file and control there deletion rate. * * @param env the environment. * * @throws RocksDBException thrown if error happens in underlying native library. */ public SstFileManager(final Env env) throws RocksDBException { this(env, null); } /** * Create a new SstFileManager that can be shared among multiple RocksDB * instances to track SST file and control there deletion rate. * * @param env the environment. * @param logger if not null, the logger will be used to log errors. * * @throws RocksDBException thrown if error happens in underlying native library. */ public SstFileManager(final Env env, /*@Nullable*/ final Logger logger) throws RocksDBException { this(env, logger, RATE_BYTES_PER_SEC_DEFAULT); } /** * Create a new SstFileManager that can be shared among multiple RocksDB * instances to track SST file and control there deletion rate. * * @param env the environment. * @param logger if not null, the logger will be used to log errors. * * == Deletion rate limiting specific arguments == * @param rateBytesPerSec how many bytes should be deleted per second, If * this value is set to 1024 (1 Kb / sec) and we deleted a file of size * 4 Kb in 1 second, we will wait for another 3 seconds before we delete * other files, Set to 0 to disable deletion rate limiting. * * @throws RocksDBException thrown if error happens in underlying native library. */ public SstFileManager(final Env env, /*@Nullable*/ final Logger logger, final long rateBytesPerSec) throws RocksDBException { this(env, logger, rateBytesPerSec, MAX_TRASH_DB_RATION_DEFAULT); } /** * Create a new SstFileManager that can be shared among multiple RocksDB * instances to track SST file and control there deletion rate. * * @param env the environment. * @param logger if not null, the logger will be used to log errors. * * == Deletion rate limiting specific arguments == * @param rateBytesPerSec how many bytes should be deleted per second, If * this value is set to 1024 (1 Kb / sec) and we deleted a file of size * 4 Kb in 1 second, we will wait for another 3 seconds before we delete * other files, Set to 0 to disable deletion rate limiting. * @param maxTrashDbRatio if the trash size constitutes for more than this * fraction of the total DB size we will start deleting new files passed * to DeleteScheduler immediately. * * @throws RocksDBException thrown if error happens in underlying native library. */ public SstFileManager(final Env env, /*@Nullable*/ final Logger logger, final long rateBytesPerSec, final double maxTrashDbRatio) throws RocksDBException { this(env, logger, rateBytesPerSec, maxTrashDbRatio, BYTES_MAX_DELETE_CHUNK_DEFAULT); } /** * Create a new SstFileManager that can be shared among multiple RocksDB * instances to track SST file and control there deletion rate. * * @param env the environment. * @param logger if not null, the logger will be used to log errors. * * == Deletion rate limiting specific arguments == * @param rateBytesPerSec how many bytes should be deleted per second, If * this value is set to 1024 (1 Kb / sec) and we deleted a file of size * 4 Kb in 1 second, we will wait for another 3 seconds before we delete * other files, Set to 0 to disable deletion rate limiting. * @param maxTrashDbRatio if the trash size constitutes for more than this * fraction of the total DB size we will start deleting new files passed * to DeleteScheduler immediately. * @param bytesMaxDeleteChunk if a single file is larger than delete chunk, * ftruncate the file by this size each time, rather than dropping the whole * file. 0 means to always delete the whole file. * * @throws RocksDBException thrown if error happens in underlying native library. */ public SstFileManager(final Env env, /*@Nullable*/final Logger logger, final long rateBytesPerSec, final double maxTrashDbRatio, final long bytesMaxDeleteChunk) throws RocksDBException { super(newSstFileManager(env.nativeHandle_, logger != null ? logger.nativeHandle_ : 0, rateBytesPerSec, maxTrashDbRatio, bytesMaxDeleteChunk)); } /** * Update the maximum allowed space that should be used by RocksDB, if * the total size of the SST files exceeds {@code maxAllowedSpace}, writes to * RocksDB will fail. * * Setting {@code maxAllowedSpace} to 0 will disable this feature; * maximum allowed space will be infinite (Default value). * * @param maxAllowedSpace the maximum allowed space that should be used by * RocksDB. */ public void setMaxAllowedSpaceUsage(final long maxAllowedSpace) { setMaxAllowedSpaceUsage(nativeHandle_, maxAllowedSpace); } /** * Set the amount of buffer room each compaction should be able to leave. * In other words, at its maximum disk space consumption, the compaction * should still leave {@code compactionBufferSize} available on the disk so * that other background functions may continue, such as logging and flushing. * * @param compactionBufferSize the amount of buffer room each compaction * should be able to leave. */ public void setCompactionBufferSize(final long compactionBufferSize) { setCompactionBufferSize(nativeHandle_, compactionBufferSize); } /** * Determines if the total size of SST files exceeded the maximum allowed * space usage. * * @return true when the maximum allows space usage has been exceeded. */ public boolean isMaxAllowedSpaceReached() { return isMaxAllowedSpaceReached(nativeHandle_); } /** * Determines if the total size of SST files as well as estimated size * of ongoing compactions exceeds the maximums allowed space usage. * * @return true when the total size of SST files as well as estimated size * of ongoing compactions exceeds the maximums allowed space usage. */ public boolean isMaxAllowedSpaceReachedIncludingCompactions() { return isMaxAllowedSpaceReachedIncludingCompactions(nativeHandle_); } /** * Get the total size of all tracked files. * * @return the total size of all tracked files. */ public long getTotalSize() { return getTotalSize(nativeHandle_); } /** * Gets all tracked files and their corresponding sizes. * * @return a map containing all tracked files and there corresponding sizes. */ public Map getTrackedFiles() { return getTrackedFiles(nativeHandle_); } /** * Gets the delete rate limit. * * @return the delete rate limit (in bytes per second). */ public long getDeleteRateBytesPerSecond() { return getDeleteRateBytesPerSecond(nativeHandle_); } /** * Set the delete rate limit. * * Zero means disable delete rate limiting and delete files immediately. * * @param deleteRate the delete rate limit (in bytes per second). */ public void setDeleteRateBytesPerSecond(final long deleteRate) { setDeleteRateBytesPerSecond(nativeHandle_, deleteRate); } /** * Get the trash/DB size ratio where new files will be deleted immediately. * * @return the trash/DB size ratio. */ public double getMaxTrashDBRatio() { return getMaxTrashDBRatio(nativeHandle_); } /** * Set the trash/DB size ratio where new files will be deleted immediately. * * @param ratio the trash/DB size ratio. */ public void setMaxTrashDBRatio(final double ratio) { setMaxTrashDBRatio(nativeHandle_, ratio); } private native static long newSstFileManager(final long handle, final long logger_handle, final long rateBytesPerSec, final double maxTrashDbRatio, final long bytesMaxDeleteChunk) throws RocksDBException; private native void setMaxAllowedSpaceUsage(final long handle, final long maxAllowedSpace); private native void setCompactionBufferSize(final long handle, final long compactionBufferSize); private native boolean isMaxAllowedSpaceReached(final long handle); private native boolean isMaxAllowedSpaceReachedIncludingCompactions( final long handle); private native long getTotalSize(final long handle); private native Map getTrackedFiles(final long handle); private native long getDeleteRateBytesPerSecond(final long handle); private native void setDeleteRateBytesPerSecond(final long handle, final long deleteRate); private native double getMaxTrashDBRatio(final long handle); private native void setMaxTrashDBRatio(final long handle, final double ratio); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SstFileMetaData.java000066400000000000000000000073731370372246700246670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The metadata that describes a SST file. */ public class SstFileMetaData { private final String fileName; private final String path; private final long size; private final long smallestSeqno; private final long largestSeqno; private final byte[] smallestKey; private final byte[] largestKey; private final long numReadsSampled; private final boolean beingCompacted; private final long numEntries; private final long numDeletions; /** * Called from JNI C++ * * @param fileName the file name * @param path the file path * @param size the size of the file * @param smallestSeqno the smallest sequence number * @param largestSeqno the largest sequence number * @param smallestKey the smallest key * @param largestKey the largest key * @param numReadsSampled the number of reads sampled * @param beingCompacted true if the file is being compacted, false otherwise * @param numEntries the number of entries * @param numDeletions the number of deletions */ protected SstFileMetaData( final String fileName, final String path, final long size, final long smallestSeqno, final long largestSeqno, final byte[] smallestKey, final byte[] largestKey, final long numReadsSampled, final boolean beingCompacted, final long numEntries, final long numDeletions) { this.fileName = fileName; this.path = path; this.size = size; this.smallestSeqno = smallestSeqno; this.largestSeqno = largestSeqno; this.smallestKey = smallestKey; this.largestKey = largestKey; this.numReadsSampled = numReadsSampled; this.beingCompacted = beingCompacted; this.numEntries = numEntries; this.numDeletions = numDeletions; } /** * Get the name of the file. * * @return the name of the file. */ public String fileName() { return fileName; } /** * Get the full path where the file locates. * * @return the full path */ public String path() { return path; } /** * Get the file size in bytes. * * @return file size */ public long size() { return size; } /** * Get the smallest sequence number in file. * * @return the smallest sequence number */ public long smallestSeqno() { return smallestSeqno; } /** * Get the largest sequence number in file. * * @return the largest sequence number */ public long largestSeqno() { return largestSeqno; } /** * Get the smallest user defined key in the file. * * @return the smallest user defined key */ public byte[] smallestKey() { return smallestKey; } /** * Get the largest user defined key in the file. * * @return the largest user defined key */ public byte[] largestKey() { return largestKey; } /** * Get the number of times the file has been read. * * @return the number of times the file has been read */ public long numReadsSampled() { return numReadsSampled; } /** * Returns true if the file is currently being compacted. * * @return true if the file is currently being compacted, false otherwise. */ public boolean beingCompacted() { return beingCompacted; } /** * Get the number of entries. * * @return the number of entries. */ public long numEntries() { return numEntries; } /** * Get the number of deletions. * * @return the number of deletions. */ public long numDeletions() { return numDeletions; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SstFileReader.java000066400000000000000000000051271370372246700244040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class SstFileReader extends RocksObject { static { RocksDB.loadLibrary(); } public SstFileReader(final Options options) { super(newSstFileReader(options.nativeHandle_)); } /** * Returns an iterator that will iterate on all keys in the default * column family including both keys in the DB and uncommitted keys in this * transaction. * * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read * from the DB but will NOT change which keys are read from this transaction * (the keys in this transaction do not yet belong to any snapshot and will be * fetched regardless). * * Caller is responsible for deleting the returned Iterator. * * @param readOptions Read options. * * @return instance of iterator object. */ public SstFileReaderIterator newIterator(final ReadOptions readOptions) { assert (isOwningHandle()); long iter = newIterator(nativeHandle_, readOptions.nativeHandle_); return new SstFileReaderIterator(this, iter); } /** * Prepare SstFileReader to read a file. * * @param filePath the location of file * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void open(final String filePath) throws RocksDBException { open(nativeHandle_, filePath); } /** * Verify checksum * * @throws RocksDBException if the checksum is not valid */ public void verifyChecksum() throws RocksDBException { verifyChecksum(nativeHandle_); } /** * Get the properties of the table. * * @return the properties * * @throws RocksDBException if an error occurs whilst getting the table * properties */ public TableProperties getTableProperties() throws RocksDBException { return getTableProperties(nativeHandle_); } @Override protected final native void disposeInternal(final long handle); private native long newIterator(final long handle, final long readOptionsHandle); private native void open(final long handle, final String filePath) throws RocksDBException; private native static long newSstFileReader(final long optionsHandle); private native void verifyChecksum(final long handle) throws RocksDBException; private native TableProperties getTableProperties(final long handle) throws RocksDBException; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SstFileReaderIterator.java000066400000000000000000000112631370372246700261140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** *

An iterator that yields a sequence of key/value pairs from a source. * Multiple implementations are provided by this library. * In particular, iterators are provided * to access the contents of a Table or a DB.

* *

Multiple threads can invoke const methods on an RocksIterator without * external synchronization, but if any of the threads may call a * non-const method, all threads accessing the same RocksIterator must use * external synchronization.

* * @see RocksObject */ public class SstFileReaderIterator extends AbstractRocksIterator { protected SstFileReaderIterator(SstFileReader reader, long nativeHandle) { super(reader, nativeHandle); } /** *

Return the key for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: {@link #isValid()}

* * @return key for the current entry. */ public byte[] key() { assert (isOwningHandle()); return key0(nativeHandle_); } /** *

Return the key for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: {@link #isValid()}

* * @param key the out-value to receive the retrieved key. * It is using position and limit. Limit is set according to key size. * Supports direct buffer only. * @return The size of the actual key. If the return key is greater than the * length of {@code key}, then it indicates that the size of the * input buffer {@code key} is insufficient and partial result will * be returned. */ public int key(ByteBuffer key) { assert (isOwningHandle() && key.isDirect()); int result = keyDirect0(nativeHandle_, key, key.position(), key.remaining()); key.limit(Math.min(key.position() + result, key.limit())); return result; } /** *

Return the value for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: !AtEnd() && !AtStart()

* @return value for the current entry. */ public byte[] value() { assert (isOwningHandle()); return value0(nativeHandle_); } /** *

Return the value for the current entry. The underlying storage for * the returned slice is valid only until the next modification of * the iterator.

* *

REQUIRES: {@link #isValid()}

* * @param value the out-value to receive the retrieved value. * It is using position and limit. Limit is set according to value size. * Supports direct buffer only. * @return The size of the actual value. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will * be returned. */ public int value(ByteBuffer value) { assert (isOwningHandle() && value.isDirect()); int result = valueDirect0(nativeHandle_, value, value.position(), value.remaining()); value.limit(Math.min(value.position() + result, value.limit())); return result; } @Override protected final native void disposeInternal(final long handle); @Override final native boolean isValid0(long handle); @Override final native void seekToFirst0(long handle); @Override final native void seekToLast0(long handle); @Override final native void next0(long handle); @Override final native void prev0(long handle); @Override final native void refresh0(long handle) throws RocksDBException; @Override final native void seek0(long handle, byte[] target, int targetLen); @Override final native void seekForPrev0(long handle, byte[] target, int targetLen); @Override final native void status0(long handle) throws RocksDBException; private native byte[] key0(long handle); private native byte[] value0(long handle); private native int keyDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); private native int valueDirect0(long handle, ByteBuffer buffer, int bufferOffset, int bufferLen); @Override final native void seekDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); @Override final native void seekForPrevDirect0( long handle, ByteBuffer target, int targetOffset, int targetLen); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/SstFileWriter.java000066400000000000000000000221231370372246700244510ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * SstFileWriter is used to create sst files that can be added to the * database later. All keys in files generated by SstFileWriter will have * sequence number = 0. */ public class SstFileWriter extends RocksObject { static { RocksDB.loadLibrary(); } /** * SstFileWriter Constructor. * * @param envOptions {@link org.rocksdb.EnvOptions} instance. * @param options {@link org.rocksdb.Options} instance. * @param comparator the comparator to specify the ordering of keys. * * @deprecated Use {@link #SstFileWriter(EnvOptions, Options)}. * Passing an explicit comparator is deprecated in lieu of passing the * comparator as part of options. Use the other constructor instead. */ @Deprecated public SstFileWriter(final EnvOptions envOptions, final Options options, final AbstractComparator comparator) { super(newSstFileWriter( envOptions.nativeHandle_, options.nativeHandle_, comparator.nativeHandle_, comparator.getComparatorType().getValue())); } /** * SstFileWriter Constructor. * * @param envOptions {@link org.rocksdb.EnvOptions} instance. * @param options {@link org.rocksdb.Options} instance. */ public SstFileWriter(final EnvOptions envOptions, final Options options) { super(newSstFileWriter( envOptions.nativeHandle_, options.nativeHandle_)); } /** * Prepare SstFileWriter to write to a file. * * @param filePath the location of file * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void open(final String filePath) throws RocksDBException { open(nativeHandle_, filePath); } /** * Add a Put key with value to currently opened file. * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Use {@link #put(Slice, Slice)} */ @Deprecated public void add(final Slice key, final Slice value) throws RocksDBException { put(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); } /** * Add a Put key with value to currently opened file. * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. * * @deprecated Use {@link #put(DirectSlice, DirectSlice)} */ @Deprecated public void add(final DirectSlice key, final DirectSlice value) throws RocksDBException { put(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); } /** * Add a Put key with value to currently opened file. * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void put(final Slice key, final Slice value) throws RocksDBException { put(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); } /** * Add a Put key with value to currently opened file. * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void put(final DirectSlice key, final DirectSlice value) throws RocksDBException { put(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); } /** * Add a Put key with value to currently opened file. * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void put(final ByteBuffer key, final ByteBuffer value) throws RocksDBException { assert key.isDirect() && value.isDirect(); putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), value.remaining()); key.position(key.limit()); value.position(value.limit()); } /** * Add a Put key with value to currently opened file. * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void put(final byte[] key, final byte[] value) throws RocksDBException { put(nativeHandle_, key, value); } /** * Add a Merge key with value to currently opened file. * * @param key the specified key to be merged. * @param value the value to be merged with the current value for * the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void merge(final Slice key, final Slice value) throws RocksDBException { merge(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); } /** * Add a Merge key with value to currently opened file. * * @param key the specified key to be merged. * @param value the value to be merged with the current value for * the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void merge(final byte[] key, final byte[] value) throws RocksDBException { merge(nativeHandle_, key, value); } /** * Add a Merge key with value to currently opened file. * * @param key the specified key to be merged. * @param value the value to be merged with the current value for * the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void merge(final DirectSlice key, final DirectSlice value) throws RocksDBException { merge(nativeHandle_, key.getNativeHandle(), value.getNativeHandle()); } /** * Add a deletion key to currently opened file. * * @param key the specified key to be deleted. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final Slice key) throws RocksDBException { delete(nativeHandle_, key.getNativeHandle()); } /** * Add a deletion key to currently opened file. * * @param key the specified key to be deleted. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final DirectSlice key) throws RocksDBException { delete(nativeHandle_, key.getNativeHandle()); } /** * Add a deletion key to currently opened file. * * @param key the specified key to be deleted. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void delete(final byte[] key) throws RocksDBException { delete(nativeHandle_, key); } /** * Finish the process and close the sst file. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public void finish() throws RocksDBException { finish(nativeHandle_); } /** * Return the current file size. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public long fileSize() throws RocksDBException { return fileSize(nativeHandle_); } private native static long newSstFileWriter( final long envOptionsHandle, final long optionsHandle, final long userComparatorHandle, final byte comparatorType); private native static long newSstFileWriter(final long envOptionsHandle, final long optionsHandle); private native void open(final long handle, final String filePath) throws RocksDBException; private native void put(final long handle, final long keyHandle, final long valueHandle) throws RocksDBException; private native void put(final long handle, final byte[] key, final byte[] value) throws RocksDBException; private native void putDirect(long handle, ByteBuffer key, int keyOffset, int keyLength, ByteBuffer value, int valueOffset, int valueLength) throws RocksDBException; private native long fileSize(long handle) throws RocksDBException; private native void merge(final long handle, final long keyHandle, final long valueHandle) throws RocksDBException; private native void merge(final long handle, final byte[] key, final byte[] value) throws RocksDBException; private native void delete(final long handle, final long keyHandle) throws RocksDBException; private native void delete(final long handle, final byte[] key) throws RocksDBException; private native void finish(final long handle) throws RocksDBException; @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/StateType.java000066400000000000000000000025201370372246700236240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The type used to refer to a thread state. * * A state describes lower-level action of a thread * such as reading / writing a file or waiting for a mutex. */ public enum StateType { STATE_UNKNOWN((byte)0x0), STATE_MUTEX_WAIT((byte)0x1); private final byte value; StateType(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value. */ byte getValue() { return value; } /** * Get the State type from the internal representation value. * * @param value the internal representation value. * * @return the state type * * @throws IllegalArgumentException if the value does not match * a StateType */ static StateType fromValue(final byte value) throws IllegalArgumentException { for (final StateType threadType : StateType.values()) { if (threadType.value == value) { return threadType; } } throw new IllegalArgumentException( "Unknown value for StateType: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Statistics.java000066400000000000000000000113011370372246700240310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.EnumSet; /** * Statistics to analyze the performance of a db. Pointer for statistics object * is managed by Options class. */ public class Statistics extends RocksObject { public Statistics() { super(newStatistics()); } public Statistics(final Statistics otherStatistics) { super(newStatistics(otherStatistics.nativeHandle_)); } public Statistics(final EnumSet ignoreHistograms) { super(newStatistics(toArrayValues(ignoreHistograms))); } public Statistics(final EnumSet ignoreHistograms, final Statistics otherStatistics) { super(newStatistics(toArrayValues(ignoreHistograms), otherStatistics.nativeHandle_)); } /** * Intentionally package-private. * * Used from {@link DBOptions#statistics()} * * @param existingStatisticsHandle The C++ pointer to an existing statistics object */ Statistics(final long existingStatisticsHandle) { super(existingStatisticsHandle); } private static byte[] toArrayValues(final EnumSet histogramTypes) { final byte[] values = new byte[histogramTypes.size()]; int i = 0; for(final HistogramType histogramType : histogramTypes) { values[i++] = histogramType.getValue(); } return values; } /** * Gets the current stats level. * * @return The stats level. */ public StatsLevel statsLevel() { return StatsLevel.getStatsLevel(statsLevel(nativeHandle_)); } /** * Sets the stats level. * * @param statsLevel The stats level to set. */ public void setStatsLevel(final StatsLevel statsLevel) { setStatsLevel(nativeHandle_, statsLevel.getValue()); } /** * Get the count for a ticker. * * @param tickerType The ticker to get the count for * * @return The count for the ticker */ public long getTickerCount(final TickerType tickerType) { assert(isOwningHandle()); return getTickerCount(nativeHandle_, tickerType.getValue()); } /** * Get the count for a ticker and reset the tickers count. * * @param tickerType The ticker to get the count for * * @return The count for the ticker */ public long getAndResetTickerCount(final TickerType tickerType) { assert(isOwningHandle()); return getAndResetTickerCount(nativeHandle_, tickerType.getValue()); } /** * Gets the histogram data for a particular histogram. * * @param histogramType The histogram to retrieve the data for * * @return The histogram data */ public HistogramData getHistogramData(final HistogramType histogramType) { assert(isOwningHandle()); return getHistogramData(nativeHandle_, histogramType.getValue()); } /** * Gets a string representation of a particular histogram. * * @param histogramType The histogram to retrieve the data for * * @return A string representation of the histogram data */ public String getHistogramString(final HistogramType histogramType) { assert(isOwningHandle()); return getHistogramString(nativeHandle_, histogramType.getValue()); } /** * Resets all ticker and histogram stats. * * @throws RocksDBException if an error occurs when resetting the statistics. */ public void reset() throws RocksDBException { assert(isOwningHandle()); reset(nativeHandle_); } /** * String representation of the statistic object. */ @Override public String toString() { assert(isOwningHandle()); return toString(nativeHandle_); } private native static long newStatistics(); private native static long newStatistics(final long otherStatisticsHandle); private native static long newStatistics(final byte[] ignoreHistograms); private native static long newStatistics(final byte[] ignoreHistograms, final long otherStatisticsHandle); @Override protected final native void disposeInternal(final long handle); private native byte statsLevel(final long handle); private native void setStatsLevel(final long handle, final byte statsLevel); private native long getTickerCount(final long handle, final byte tickerType); private native long getAndResetTickerCount(final long handle, final byte tickerType); private native HistogramData getHistogramData(final long handle, final byte histogramType); private native String getHistogramString(final long handle, final byte histogramType); private native void reset(final long nativeHandle) throws RocksDBException; private native String toString(final long nativeHandle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/StatisticsCollector.java000066400000000000000000000077661370372246700257240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; import java.util.concurrent.Executors; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; /** *

Helper class to collect DB statistics periodically at a period specified in * constructor. Callback function (provided in constructor) is called with * every statistics collection.

* *

Caller should call start() to start statistics collection. Shutdown() should * be called to stop stats collection and should be called before statistics ( * provided in constructor) reference has been disposed.

*/ public class StatisticsCollector { private final List _statsCollectorInputList; private final ExecutorService _executorService; private final int _statsCollectionInterval; private volatile boolean _isRunning = true; /** * Constructor for statistics collector. * * @param statsCollectorInputList List of statistics collector input. * @param statsCollectionIntervalInMilliSeconds Statistics collection time * period (specified in milliseconds). */ public StatisticsCollector( final List statsCollectorInputList, final int statsCollectionIntervalInMilliSeconds) { _statsCollectorInputList = statsCollectorInputList; _statsCollectionInterval = statsCollectionIntervalInMilliSeconds; _executorService = Executors.newSingleThreadExecutor(); } public void start() { _executorService.submit(collectStatistics()); } /** * Shuts down statistics collector. * * @param shutdownTimeout Time in milli-seconds to wait for shutdown before * killing the collection process. * @throws java.lang.InterruptedException thrown if Threads are interrupted. */ public void shutDown(final int shutdownTimeout) throws InterruptedException { _isRunning = false; _executorService.shutdownNow(); // Wait for collectStatistics runnable to finish so that disposal of // statistics does not cause any exceptions to be thrown. _executorService.awaitTermination(shutdownTimeout, TimeUnit.MILLISECONDS); } private Runnable collectStatistics() { return new Runnable() { @Override public void run() { while (_isRunning) { try { if(Thread.currentThread().isInterrupted()) { break; } for(final StatsCollectorInput statsCollectorInput : _statsCollectorInputList) { Statistics statistics = statsCollectorInput.getStatistics(); StatisticsCollectorCallback statsCallback = statsCollectorInput.getCallback(); // Collect ticker data for(final TickerType ticker : TickerType.values()) { if(ticker != TickerType.TICKER_ENUM_MAX) { final long tickerValue = statistics.getTickerCount(ticker); statsCallback.tickerCallback(ticker, tickerValue); } } // Collect histogram data for(final HistogramType histogramType : HistogramType.values()) { if(histogramType != HistogramType.HISTOGRAM_ENUM_MAX) { final HistogramData histogramData = statistics.getHistogramData(histogramType); statsCallback.histogramCallback(histogramType, histogramData); } } } Thread.sleep(_statsCollectionInterval); } catch (final InterruptedException e) { Thread.currentThread().interrupt(); break; } catch (final Exception e) { throw new RuntimeException("Error while calculating statistics", e); } } } }; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java000066400000000000000000000021441370372246700273220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Callback interface provided to StatisticsCollector. * * Thread safety: * StatisticsCollector doesn't make any guarantees about thread safety. * If the same reference of StatisticsCollectorCallback is passed to multiple * StatisticsCollector references, then its the responsibility of the * user to make StatisticsCollectorCallback's implementation thread-safe. * */ public interface StatisticsCollectorCallback { /** * Callback function to get ticker values. * @param tickerType Ticker type. * @param tickerCount Value of ticker type. */ void tickerCallback(TickerType tickerType, long tickerCount); /** * Callback function to get histogram values. * @param histType Histogram type. * @param histData Histogram data. */ void histogramCallback(HistogramType histType, HistogramData histData); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/StatsCollectorInput.java000066400000000000000000000020261370372246700256700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Contains all information necessary to collect statistics from one instance * of DB statistics. */ public class StatsCollectorInput { private final Statistics _statistics; private final StatisticsCollectorCallback _statsCallback; /** * Constructor for StatsCollectorInput. * * @param statistics Reference of DB statistics. * @param statsCallback Reference of statistics callback interface. */ public StatsCollectorInput(final Statistics statistics, final StatisticsCollectorCallback statsCallback) { _statistics = statistics; _statsCallback = statsCallback; } public Statistics getStatistics() { return _statistics; } public StatisticsCollectorCallback getCallback() { return _statsCallback; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/StatsLevel.java000066400000000000000000000034251370372246700237750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The level of Statistics to report. */ public enum StatsLevel { /** * Collect all stats except time inside mutex lock AND time spent on * compression. */ EXCEPT_DETAILED_TIMERS((byte) 0x0), /** * Collect all stats except the counters requiring to get time inside the * mutex lock. */ EXCEPT_TIME_FOR_MUTEX((byte) 0x1), /** * Collect all stats, including measuring duration of mutex operations. * * If getting time is expensive on the platform to run, it can * reduce scalability to more threads, especially for writes. */ ALL((byte) 0x2); private final byte value; StatsLevel(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ public byte getValue() { return value; } /** * Get StatsLevel by byte value. * * @param value byte representation of StatsLevel. * * @return {@link org.rocksdb.StatsLevel} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static StatsLevel getStatsLevel(final byte value) { for (final StatsLevel statsLevel : StatsLevel.values()) { if (statsLevel.getValue() == value){ return statsLevel; } } throw new IllegalArgumentException( "Illegal value provided for StatsLevel."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Status.java000066400000000000000000000071401370372246700231700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Represents the status returned by a function call in RocksDB. * * Currently only used with {@link RocksDBException} when the * status is not {@link Code#Ok} */ public class Status { private final Code code; /* @Nullable */ private final SubCode subCode; /* @Nullable */ private final String state; public Status(final Code code, final SubCode subCode, final String state) { this.code = code; this.subCode = subCode; this.state = state; } /** * Intentionally private as this will be called from JNI */ private Status(final byte code, final byte subCode, final String state) { this.code = Code.getCode(code); this.subCode = SubCode.getSubCode(subCode); this.state = state; } public Code getCode() { return code; } public SubCode getSubCode() { return subCode; } public String getState() { return state; } public String getCodeString() { final StringBuilder builder = new StringBuilder() .append(code.name()); if(subCode != null && subCode != SubCode.None) { builder.append("(") .append(subCode.name()) .append(")"); } return builder.toString(); } // should stay in sync with /include/rocksdb/status.h:Code and /java/rocksjni/portal.h:toJavaStatusCode public enum Code { Ok( (byte)0x0), NotFound( (byte)0x1), Corruption( (byte)0x2), NotSupported( (byte)0x3), InvalidArgument( (byte)0x4), IOError( (byte)0x5), MergeInProgress( (byte)0x6), Incomplete( (byte)0x7), ShutdownInProgress( (byte)0x8), TimedOut( (byte)0x9), Aborted( (byte)0xA), Busy( (byte)0xB), Expired( (byte)0xC), TryAgain( (byte)0xD), Undefined( (byte)0x7F); private final byte value; Code(final byte value) { this.value = value; } public static Code getCode(final byte value) { for (final Code code : Code.values()) { if (code.value == value){ return code; } } throw new IllegalArgumentException( "Illegal value provided for Code (" + value + ")."); } /** * Returns the byte value of the enumerations value. * * @return byte representation */ public byte getValue() { return value; } } // should stay in sync with /include/rocksdb/status.h:SubCode and /java/rocksjni/portal.h:toJavaStatusSubCode public enum SubCode { None( (byte)0x0), MutexTimeout( (byte)0x1), LockTimeout( (byte)0x2), LockLimit( (byte)0x3), NoSpace( (byte)0x4), Deadlock( (byte)0x5), StaleFile( (byte)0x6), MemoryLimit( (byte)0x7), Undefined( (byte)0x7F); private final byte value; SubCode(final byte value) { this.value = value; } public static SubCode getSubCode(final byte value) { for (final SubCode subCode : SubCode.values()) { if (subCode.value == value){ return subCode; } } throw new IllegalArgumentException( "Illegal value provided for SubCode (" + value + ")."); } /** * Returns the byte value of the enumerations value. * * @return byte representation */ public byte getValue() { return value; } } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/StringAppendOperator.java000066400000000000000000000015061370372246700260170ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2014, Vlad Balan (vlad.gm@gmail.com). All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * StringAppendOperator is a merge operator that concatenates * two strings. */ public class StringAppendOperator extends MergeOperator { public StringAppendOperator() { this(','); } public StringAppendOperator(char delim) { super(newSharedStringAppendOperator(delim)); } private native static long newSharedStringAppendOperator(final char delim); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TableFilter.java000066400000000000000000000013061370372246700241000ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * Filter for iterating a table. */ public interface TableFilter { /** * A callback to determine whether relevant keys for this scan exist in a * given table based on the table's properties. The callback is passed the * properties of each table during iteration. If the callback returns false, * the table will not be scanned. This option only affects Iterators and has * no impact on point lookups. * * @param tableProperties the table properties. * * @return true if the table should be scanned, false otherwise. */ boolean filter(final TableProperties tableProperties); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TableFormatConfig.java000066400000000000000000000016761370372246700252430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * TableFormatConfig is used to config the internal Table format of a RocksDB. * To make a RocksDB to use a specific Table format, its associated * TableFormatConfig should be properly set and passed into Options via * Options.setTableFormatConfig() and open the db using that Options. */ public abstract class TableFormatConfig { /** *

This function should only be called by Options.setTableFormatConfig(), * which will create a c++ shared-pointer to the c++ TableFactory * that associated with the Java TableFormatConfig.

* * @return native handle address to native table instance. */ abstract protected long newTableFactoryHandle(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TableProperties.java000066400000000000000000000231561370372246700250160ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import java.util.Map; /** * TableProperties contains read-only properties of its associated * table. */ public class TableProperties { private final long dataSize; private final long indexSize; private final long indexPartitions; private final long topLevelIndexSize; private final long indexKeyIsUserKey; private final long indexValueIsDeltaEncoded; private final long filterSize; private final long rawKeySize; private final long rawValueSize; private final long numDataBlocks; private final long numEntries; private final long numDeletions; private final long numMergeOperands; private final long numRangeDeletions; private final long formatVersion; private final long fixedKeyLen; private final long columnFamilyId; private final long creationTime; private final long oldestKeyTime; private final byte[] columnFamilyName; private final String filterPolicyName; private final String comparatorName; private final String mergeOperatorName; private final String prefixExtractorName; private final String propertyCollectorsNames; private final String compressionName; private final Map userCollectedProperties; private final Map readableProperties; private final Map propertiesOffsets; /** * Access is private as this will only be constructed from * C++ via JNI. */ private TableProperties(final long dataSize, final long indexSize, final long indexPartitions, final long topLevelIndexSize, final long indexKeyIsUserKey, final long indexValueIsDeltaEncoded, final long filterSize, final long rawKeySize, final long rawValueSize, final long numDataBlocks, final long numEntries, final long numDeletions, final long numMergeOperands, final long numRangeDeletions, final long formatVersion, final long fixedKeyLen, final long columnFamilyId, final long creationTime, final long oldestKeyTime, final byte[] columnFamilyName, final String filterPolicyName, final String comparatorName, final String mergeOperatorName, final String prefixExtractorName, final String propertyCollectorsNames, final String compressionName, final Map userCollectedProperties, final Map readableProperties, final Map propertiesOffsets) { this.dataSize = dataSize; this.indexSize = indexSize; this.indexPartitions = indexPartitions; this.topLevelIndexSize = topLevelIndexSize; this.indexKeyIsUserKey = indexKeyIsUserKey; this.indexValueIsDeltaEncoded = indexValueIsDeltaEncoded; this.filterSize = filterSize; this.rawKeySize = rawKeySize; this.rawValueSize = rawValueSize; this.numDataBlocks = numDataBlocks; this.numEntries = numEntries; this.numDeletions = numDeletions; this.numMergeOperands = numMergeOperands; this.numRangeDeletions = numRangeDeletions; this.formatVersion = formatVersion; this.fixedKeyLen = fixedKeyLen; this.columnFamilyId = columnFamilyId; this.creationTime = creationTime; this.oldestKeyTime = oldestKeyTime; this.columnFamilyName = columnFamilyName; this.filterPolicyName = filterPolicyName; this.comparatorName = comparatorName; this.mergeOperatorName = mergeOperatorName; this.prefixExtractorName = prefixExtractorName; this.propertyCollectorsNames = propertyCollectorsNames; this.compressionName = compressionName; this.userCollectedProperties = userCollectedProperties; this.readableProperties = readableProperties; this.propertiesOffsets = propertiesOffsets; } /** * Get the total size of all data blocks. * * @return the total size of all data blocks. */ public long getDataSize() { return dataSize; } /** * Get the size of index block. * * @return the size of index block. */ public long getIndexSize() { return indexSize; } /** * Get the total number of index partitions * if {@link IndexType#kTwoLevelIndexSearch} is used. * * @return the total number of index partitions. */ public long getIndexPartitions() { return indexPartitions; } /** * Size of the top-level index * if {@link IndexType#kTwoLevelIndexSearch} is used. * * @return the size of the top-level index. */ public long getTopLevelIndexSize() { return topLevelIndexSize; } /** * Whether the index key is user key. * Otherwise it includes 8 byte of sequence * number added by internal key format. * * @return the index key */ public long getIndexKeyIsUserKey() { return indexKeyIsUserKey; } /** * Whether delta encoding is used to encode the index values. * * @return whether delta encoding is used to encode the index values. */ public long getIndexValueIsDeltaEncoded() { return indexValueIsDeltaEncoded; } /** * Get the size of filter block. * * @return the size of filter block. */ public long getFilterSize() { return filterSize; } /** * Get the total raw key size. * * @return the total raw key size. */ public long getRawKeySize() { return rawKeySize; } /** * Get the total raw value size. * * @return the total raw value size. */ public long getRawValueSize() { return rawValueSize; } /** * Get the number of blocks in this table. * * @return the number of blocks in this table. */ public long getNumDataBlocks() { return numDataBlocks; } /** * Get the number of entries in this table. * * @return the number of entries in this table. */ public long getNumEntries() { return numEntries; } /** * Get the number of deletions in the table. * * @return the number of deletions in the table. */ public long getNumDeletions() { return numDeletions; } /** * Get the number of merge operands in the table. * * @return the number of merge operands in the table. */ public long getNumMergeOperands() { return numMergeOperands; } /** * Get the number of range deletions in this table. * * @return the number of range deletions in this table. */ public long getNumRangeDeletions() { return numRangeDeletions; } /** * Get the format version, reserved for backward compatibility. * * @return the format version. */ public long getFormatVersion() { return formatVersion; } /** * Get the length of the keys. * * @return 0 when the key is variable length, otherwise number of * bytes for each key. */ public long getFixedKeyLen() { return fixedKeyLen; } /** * Get the ID of column family for this SST file, * corresponding to the column family identified by * {@link #getColumnFamilyName()}. * * @return the id of the column family. */ public long getColumnFamilyId() { return columnFamilyId; } /** * The time when the SST file was created. * Since SST files are immutable, this is equivalent * to last modified time. * * @return the created time. */ public long getCreationTime() { return creationTime; } /** * Get the timestamp of the earliest key. * * @return 0 means unknown, otherwise the timestamp. */ public long getOldestKeyTime() { return oldestKeyTime; } /** * Get the name of the column family with which this * SST file is associated. * * @return the name of the column family, or null if the * column family is unknown. */ /*@Nullable*/ public byte[] getColumnFamilyName() { return columnFamilyName; } /** * Get the name of the filter policy used in this table. * * @return the name of the filter policy, or null if * no filter policy is used. */ /*@Nullable*/ public String getFilterPolicyName() { return filterPolicyName; } /** * Get the name of the comparator used in this table. * * @return the name of the comparator. */ public String getComparatorName() { return comparatorName; } /** * Get the name of the merge operator used in this table. * * @return the name of the merge operator, or null if no merge operator * is used. */ /*@Nullable*/ public String getMergeOperatorName() { return mergeOperatorName; } /** * Get the name of the prefix extractor used in this table. * * @return the name of the prefix extractor, or null if no prefix * extractor is used. */ /*@Nullable*/ public String getPrefixExtractorName() { return prefixExtractorName; } /** * Get the names of the property collectors factories used in this table. * * @return the names of the property collector factories separated * by commas, e.g. {collector_name[1]},{collector_name[2]},... */ public String getPropertyCollectorsNames() { return propertyCollectorsNames; } /** * Get the name of the compression algorithm used to compress the SST files. * * @return the name of the compression algorithm. */ public String getCompressionName() { return compressionName; } /** * Get the user collected properties. * * @return the user collected properties. */ public Map getUserCollectedProperties() { return userCollectedProperties; } /** * Get the readable properties. * * @return the readable properties. */ public Map getReadableProperties() { return readableProperties; } /** * The offset of the value of each property in the file. * * @return the offset of each property. */ public Map getPropertiesOffsets() { return propertiesOffsets; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ThreadStatus.java000066400000000000000000000146361370372246700243300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Map; public class ThreadStatus { private final long threadId; private final ThreadType threadType; private final String dbName; private final String cfName; private final OperationType operationType; private final long operationElapsedTime; // microseconds private final OperationStage operationStage; private final long operationProperties[]; private final StateType stateType; /** * Invoked from C++ via JNI */ private ThreadStatus(final long threadId, final byte threadTypeValue, final String dbName, final String cfName, final byte operationTypeValue, final long operationElapsedTime, final byte operationStageValue, final long[] operationProperties, final byte stateTypeValue) { this.threadId = threadId; this.threadType = ThreadType.fromValue(threadTypeValue); this.dbName = dbName; this.cfName = cfName; this.operationType = OperationType.fromValue(operationTypeValue); this.operationElapsedTime = operationElapsedTime; this.operationStage = OperationStage.fromValue(operationStageValue); this.operationProperties = operationProperties; this.stateType = StateType.fromValue(stateTypeValue); } /** * Get the unique ID of the thread. * * @return the thread id */ public long getThreadId() { return threadId; } /** * Get the type of the thread. * * @return the type of the thread. */ public ThreadType getThreadType() { return threadType; } /** * The name of the DB instance that the thread is currently * involved with. * * @return the name of the db, or null if the thread is not involved * in any DB operation. */ /* @Nullable */ public String getDbName() { return dbName; } /** * The name of the Column Family that the thread is currently * involved with. * * @return the name of the db, or null if the thread is not involved * in any column Family operation. */ /* @Nullable */ public String getCfName() { return cfName; } /** * Get the operation (high-level action) that the current thread is involved * with. * * @return the operation */ public OperationType getOperationType() { return operationType; } /** * Get the elapsed time of the current thread operation in microseconds. * * @return the elapsed time */ public long getOperationElapsedTime() { return operationElapsedTime; } /** * Get the current stage where the thread is involved in the current * operation. * * @return the current stage of the current operation */ public OperationStage getOperationStage() { return operationStage; } /** * Get the list of properties that describe some details about the current * operation. * * Each field in might have different meanings for different operations. * * @return the properties */ public long[] getOperationProperties() { return operationProperties; } /** * Get the state (lower-level action) that the current thread is involved * with. * * @return the state */ public StateType getStateType() { return stateType; } /** * Get the name of the thread type. * * @param threadType the thread type * * @return the name of the thread type. */ public static String getThreadTypeName(final ThreadType threadType) { return getThreadTypeName(threadType.getValue()); } /** * Get the name of an operation given its type. * * @param operationType the type of operation. * * @return the name of the operation. */ public static String getOperationName(final OperationType operationType) { return getOperationName(operationType.getValue()); } public static String microsToString(final long operationElapsedTime) { return microsToStringNative(operationElapsedTime); } /** * Obtain a human-readable string describing the specified operation stage. * * @param operationStage the stage of the operation. * * @return the description of the operation stage. */ public static String getOperationStageName( final OperationStage operationStage) { return getOperationStageName(operationStage.getValue()); } /** * Obtain the name of the "i"th operation property of the * specified operation. * * @param operationType the operation type. * @param i the index of the operation property. * * @return the name of the operation property */ public static String getOperationPropertyName( final OperationType operationType, final int i) { return getOperationPropertyName(operationType.getValue(), i); } /** * Translate the "i"th property of the specified operation given * a property value. * * @param operationType the operation type. * @param operationProperties the operation properties. * * @return the property values. */ public static Map interpretOperationProperties( final OperationType operationType, final long[] operationProperties) { return interpretOperationProperties(operationType.getValue(), operationProperties); } /** * Obtain the name of a state given its type. * * @param stateType the state type. * * @return the name of the state. */ public static String getStateName(final StateType stateType) { return getStateName(stateType.getValue()); } private static native String getThreadTypeName(final byte threadTypeValue); private static native String getOperationName(final byte operationTypeValue); private static native String microsToStringNative( final long operationElapsedTime); private static native String getOperationStageName( final byte operationStageTypeValue); private static native String getOperationPropertyName( final byte operationTypeValue, final int i); private static native MapinterpretOperationProperties( final byte operationTypeValue, final long[] operationProperties); private static native String getStateName(final byte stateTypeValue); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/ThreadType.java000066400000000000000000000027321370372246700237600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The type of a thread. */ public enum ThreadType { /** * RocksDB BG thread in high-pri thread pool. */ HIGH_PRIORITY((byte)0x0), /** * RocksDB BG thread in low-pri thread pool. */ LOW_PRIORITY((byte)0x1), /** * User thread (Non-RocksDB BG thread). */ USER((byte)0x2), /** * RocksDB BG thread in bottom-pri thread pool */ BOTTOM_PRIORITY((byte)0x3); private final byte value; ThreadType(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value. */ byte getValue() { return value; } /** * Get the Thread type from the internal representation value. * * @param value the internal representation value. * * @return the thread type * * @throws IllegalArgumentException if the value does not match a ThreadType */ static ThreadType fromValue(final byte value) throws IllegalArgumentException { for (final ThreadType threadType : ThreadType.values()) { if (threadType.value == value) { return threadType; } } throw new IllegalArgumentException("Unknown value for ThreadType: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TickerType.java000066400000000000000000000442111370372246700237700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The logical mapping of tickers defined in rocksdb::Tickers. * * Java byte value mappings don't align 1:1 to the c++ values. c++ rocksdb::Tickers enumeration type * is uint32_t and java org.rocksdb.TickerType is byte, this causes mapping issues when * rocksdb::Tickers value is greater then 127 (0x7F) for jbyte jni interface as range greater is not * available. Without breaking interface in minor versions, value mappings for * org.rocksdb.TickerType leverage full byte range [-128 (-0x80), (0x7F)]. Newer tickers added * should descend into negative values until TICKER_ENUM_MAX reaches -128 (-0x80). */ public enum TickerType { /** * total block cache misses * * REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS + * BLOCK_CACHE_FILTER_MISS + * BLOCK_CACHE_DATA_MISS; */ BLOCK_CACHE_MISS((byte) 0x0), /** * total block cache hit * * REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT + * BLOCK_CACHE_FILTER_HIT + * BLOCK_CACHE_DATA_HIT; */ BLOCK_CACHE_HIT((byte) 0x1), BLOCK_CACHE_ADD((byte) 0x2), /** * # of failures when adding blocks to block cache. */ BLOCK_CACHE_ADD_FAILURES((byte) 0x3), /** * # of times cache miss when accessing index block from block cache. */ BLOCK_CACHE_INDEX_MISS((byte) 0x4), /** * # of times cache hit when accessing index block from block cache. */ BLOCK_CACHE_INDEX_HIT((byte) 0x5), /** * # of index blocks added to block cache. */ BLOCK_CACHE_INDEX_ADD((byte) 0x6), /** * # of bytes of index blocks inserted into cache */ BLOCK_CACHE_INDEX_BYTES_INSERT((byte) 0x7), /** * # of bytes of index block erased from cache */ BLOCK_CACHE_INDEX_BYTES_EVICT((byte) 0x8), /** * # of times cache miss when accessing filter block from block cache. */ BLOCK_CACHE_FILTER_MISS((byte) 0x9), /** * # of times cache hit when accessing filter block from block cache. */ BLOCK_CACHE_FILTER_HIT((byte) 0xA), /** * # of filter blocks added to block cache. */ BLOCK_CACHE_FILTER_ADD((byte) 0xB), /** * # of bytes of bloom filter blocks inserted into cache */ BLOCK_CACHE_FILTER_BYTES_INSERT((byte) 0xC), /** * # of bytes of bloom filter block erased from cache */ BLOCK_CACHE_FILTER_BYTES_EVICT((byte) 0xD), /** * # of times cache miss when accessing data block from block cache. */ BLOCK_CACHE_DATA_MISS((byte) 0xE), /** * # of times cache hit when accessing data block from block cache. */ BLOCK_CACHE_DATA_HIT((byte) 0xF), /** * # of data blocks added to block cache. */ BLOCK_CACHE_DATA_ADD((byte) 0x10), /** * # of bytes of data blocks inserted into cache */ BLOCK_CACHE_DATA_BYTES_INSERT((byte) 0x11), /** * # of bytes read from cache. */ BLOCK_CACHE_BYTES_READ((byte) 0x12), /** * # of bytes written into cache. */ BLOCK_CACHE_BYTES_WRITE((byte) 0x13), /** * # of times bloom filter has avoided file reads. */ BLOOM_FILTER_USEFUL((byte) 0x14), /** * # persistent cache hit */ PERSISTENT_CACHE_HIT((byte) 0x15), /** * # persistent cache miss */ PERSISTENT_CACHE_MISS((byte) 0x16), /** * # total simulation block cache hits */ SIM_BLOCK_CACHE_HIT((byte) 0x17), /** * # total simulation block cache misses */ SIM_BLOCK_CACHE_MISS((byte) 0x18), /** * # of memtable hits. */ MEMTABLE_HIT((byte) 0x19), /** * # of memtable misses. */ MEMTABLE_MISS((byte) 0x1A), /** * # of Get() queries served by L0 */ GET_HIT_L0((byte) 0x1B), /** * # of Get() queries served by L1 */ GET_HIT_L1((byte) 0x1C), /** * # of Get() queries served by L2 and up */ GET_HIT_L2_AND_UP((byte) 0x1D), /** * COMPACTION_KEY_DROP_* count the reasons for key drop during compaction * There are 4 reasons currently. */ /** * key was written with a newer value. */ COMPACTION_KEY_DROP_NEWER_ENTRY((byte) 0x1E), /** * Also includes keys dropped for range del. * The key is obsolete. */ COMPACTION_KEY_DROP_OBSOLETE((byte) 0x1F), /** * key was covered by a range tombstone. */ COMPACTION_KEY_DROP_RANGE_DEL((byte) 0x20), /** * User compaction function has dropped the key. */ COMPACTION_KEY_DROP_USER((byte) 0x21), /** * all keys in range were deleted. */ COMPACTION_RANGE_DEL_DROP_OBSOLETE((byte) 0x22), /** * Number of keys written to the database via the Put and Write call's. */ NUMBER_KEYS_WRITTEN((byte) 0x23), /** * Number of Keys read. */ NUMBER_KEYS_READ((byte) 0x24), /** * Number keys updated, if inplace update is enabled */ NUMBER_KEYS_UPDATED((byte) 0x25), /** * The number of uncompressed bytes issued by DB::Put(), DB::Delete(),\ * DB::Merge(), and DB::Write(). */ BYTES_WRITTEN((byte) 0x26), /** * The number of uncompressed bytes read from DB::Get(). It could be * either from memtables, cache, or table files. * * For the number of logical bytes read from DB::MultiGet(), * please use {@link #NUMBER_MULTIGET_BYTES_READ}. */ BYTES_READ((byte) 0x27), /** * The number of calls to seek. */ NUMBER_DB_SEEK((byte) 0x28), /** * The number of calls to next. */ NUMBER_DB_NEXT((byte) 0x29), /** * The number of calls to prev. */ NUMBER_DB_PREV((byte) 0x2A), /** * The number of calls to seek that returned data. */ NUMBER_DB_SEEK_FOUND((byte) 0x2B), /** * The number of calls to next that returned data. */ NUMBER_DB_NEXT_FOUND((byte) 0x2C), /** * The number of calls to prev that returned data. */ NUMBER_DB_PREV_FOUND((byte) 0x2D), /** * The number of uncompressed bytes read from an iterator. * Includes size of key and value. */ ITER_BYTES_READ((byte) 0x2E), NO_FILE_CLOSES((byte) 0x2F), NO_FILE_OPENS((byte) 0x30), NO_FILE_ERRORS((byte) 0x31), /** * Time system had to wait to do LO-L1 compactions. * * @deprecated */ @Deprecated STALL_L0_SLOWDOWN_MICROS((byte) 0x32), /** * Time system had to wait to move memtable to L1. * * @deprecated */ @Deprecated STALL_MEMTABLE_COMPACTION_MICROS((byte) 0x33), /** * write throttle because of too many files in L0. * * @deprecated */ @Deprecated STALL_L0_NUM_FILES_MICROS((byte) 0x34), /** * Writer has to wait for compaction or flush to finish. */ STALL_MICROS((byte) 0x35), /** * The wait time for db mutex. * * Disabled by default. To enable it set stats level to {@link StatsLevel#ALL} */ DB_MUTEX_WAIT_MICROS((byte) 0x36), RATE_LIMIT_DELAY_MILLIS((byte) 0x37), /** * Number of iterators created. * */ NO_ITERATORS((byte) 0x38), /** * Number of MultiGet calls. */ NUMBER_MULTIGET_CALLS((byte) 0x39), /** * Number of MultiGet keys read. */ NUMBER_MULTIGET_KEYS_READ((byte) 0x3A), /** * Number of MultiGet bytes read. */ NUMBER_MULTIGET_BYTES_READ((byte) 0x3B), /** * Number of deletes records that were not required to be * written to storage because key does not exist. */ NUMBER_FILTERED_DELETES((byte) 0x3C), NUMBER_MERGE_FAILURES((byte) 0x3D), /** * Number of times bloom was checked before creating iterator on a * file, and the number of times the check was useful in avoiding * iterator creation (and thus likely IOPs). */ BLOOM_FILTER_PREFIX_CHECKED((byte) 0x3E), BLOOM_FILTER_PREFIX_USEFUL((byte) 0x3F), /** * Number of times we had to reseek inside an iteration to skip * over large number of keys with same userkey. */ NUMBER_OF_RESEEKS_IN_ITERATION((byte) 0x40), /** * Record the number of calls to {@link RocksDB#getUpdatesSince(long)}. Useful to keep track of * transaction log iterator refreshes. */ GET_UPDATES_SINCE_CALLS((byte) 0x41), /** * Miss in the compressed block cache. */ BLOCK_CACHE_COMPRESSED_MISS((byte) 0x42), /** * Hit in the compressed block cache. */ BLOCK_CACHE_COMPRESSED_HIT((byte) 0x43), /** * Number of blocks added to compressed block cache. */ BLOCK_CACHE_COMPRESSED_ADD((byte) 0x44), /** * Number of failures when adding blocks to compressed block cache. */ BLOCK_CACHE_COMPRESSED_ADD_FAILURES((byte) 0x45), /** * Number of times WAL sync is done. */ WAL_FILE_SYNCED((byte) 0x46), /** * Number of bytes written to WAL. */ WAL_FILE_BYTES((byte) 0x47), /** * Writes can be processed by requesting thread or by the thread at the * head of the writers queue. */ WRITE_DONE_BY_SELF((byte) 0x48), /** * Equivalent to writes done for others. */ WRITE_DONE_BY_OTHER((byte) 0x49), /** * Number of writes ending up with timed-out. */ WRITE_TIMEDOUT((byte) 0x4A), /** * Number of Write calls that request WAL. */ WRITE_WITH_WAL((byte) 0x4B), /** * Bytes read during compaction. */ COMPACT_READ_BYTES((byte) 0x4C), /** * Bytes written during compaction. */ COMPACT_WRITE_BYTES((byte) 0x4D), /** * Bytes written during flush. */ FLUSH_WRITE_BYTES((byte) 0x4E), /** * Number of table's properties loaded directly from file, without creating * table reader object. */ NUMBER_DIRECT_LOAD_TABLE_PROPERTIES((byte) 0x4F), NUMBER_SUPERVERSION_ACQUIRES((byte) 0x50), NUMBER_SUPERVERSION_RELEASES((byte) 0x51), NUMBER_SUPERVERSION_CLEANUPS((byte) 0x52), /** * # of compressions/decompressions executed */ NUMBER_BLOCK_COMPRESSED((byte) 0x53), NUMBER_BLOCK_DECOMPRESSED((byte) 0x54), NUMBER_BLOCK_NOT_COMPRESSED((byte) 0x55), MERGE_OPERATION_TOTAL_TIME((byte) 0x56), FILTER_OPERATION_TOTAL_TIME((byte) 0x57), /** * Row cache. */ ROW_CACHE_HIT((byte) 0x58), ROW_CACHE_MISS((byte) 0x59), /** * Read amplification statistics. * * Read amplification can be calculated using this formula * (READ_AMP_TOTAL_READ_BYTES / READ_AMP_ESTIMATE_USEFUL_BYTES) * * REQUIRES: ReadOptions::read_amp_bytes_per_bit to be enabled */ /** * Estimate of total bytes actually used. */ READ_AMP_ESTIMATE_USEFUL_BYTES((byte) 0x5A), /** * Total size of loaded data blocks. */ READ_AMP_TOTAL_READ_BYTES((byte) 0x5B), /** * Number of refill intervals where rate limiter's bytes are fully consumed. */ NUMBER_RATE_LIMITER_DRAINS((byte) 0x5C), /** * Number of internal skipped during iteration */ NUMBER_ITER_SKIP((byte) 0x5D), /** * Number of MultiGet keys found (vs number requested) */ NUMBER_MULTIGET_KEYS_FOUND((byte) 0x5E), // -0x01 to fixate the new value that incorrectly changed TICKER_ENUM_MAX /** * Number of iterators created. */ NO_ITERATOR_CREATED((byte) -0x01), /** * Number of iterators deleted. */ NO_ITERATOR_DELETED((byte) 0x60), /** * Deletions obsoleted before bottom level due to file gap optimization. */ COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE((byte) 0x61), /** * If a compaction was cancelled in sfm to prevent ENOSPC */ COMPACTION_CANCELLED((byte) 0x62), /** * # of times bloom FullFilter has not avoided the reads. */ BLOOM_FILTER_FULL_POSITIVE((byte) 0x63), /** * # of times bloom FullFilter has not avoided the reads and data actually * exist. */ BLOOM_FILTER_FULL_TRUE_POSITIVE((byte) 0x64), /** * BlobDB specific stats * # of Put/PutTTL/PutUntil to BlobDB. */ BLOB_DB_NUM_PUT((byte) 0x65), /** * # of Write to BlobDB. */ BLOB_DB_NUM_WRITE((byte) 0x66), /** * # of Get to BlobDB. */ BLOB_DB_NUM_GET((byte) 0x67), /** * # of MultiGet to BlobDB. */ BLOB_DB_NUM_MULTIGET((byte) 0x68), /** * # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. */ BLOB_DB_NUM_SEEK((byte) 0x69), /** * # of Next to BlobDB iterator. */ BLOB_DB_NUM_NEXT((byte) 0x6A), /** * # of Prev to BlobDB iterator. */ BLOB_DB_NUM_PREV((byte) 0x6B), /** * # of keys written to BlobDB. */ BLOB_DB_NUM_KEYS_WRITTEN((byte) 0x6C), /** * # of keys read from BlobDB. */ BLOB_DB_NUM_KEYS_READ((byte) 0x6D), /** * # of bytes (key + value) written to BlobDB. */ BLOB_DB_BYTES_WRITTEN((byte) 0x6E), /** * # of bytes (keys + value) read from BlobDB. */ BLOB_DB_BYTES_READ((byte) 0x6F), /** * # of keys written by BlobDB as non-TTL inlined value. */ BLOB_DB_WRITE_INLINED((byte) 0x70), /** * # of keys written by BlobDB as TTL inlined value. */ BLOB_DB_WRITE_INLINED_TTL((byte) 0x71), /** * # of keys written by BlobDB as non-TTL blob value. */ BLOB_DB_WRITE_BLOB((byte) 0x72), /** * # of keys written by BlobDB as TTL blob value. */ BLOB_DB_WRITE_BLOB_TTL((byte) 0x73), /** * # of bytes written to blob file. */ BLOB_DB_BLOB_FILE_BYTES_WRITTEN((byte) 0x74), /** * # of bytes read from blob file. */ BLOB_DB_BLOB_FILE_BYTES_READ((byte) 0x75), /** * # of times a blob files being synced. */ BLOB_DB_BLOB_FILE_SYNCED((byte) 0x76), /** * # of blob index evicted from base DB by BlobDB compaction filter because * of expiration. */ BLOB_DB_BLOB_INDEX_EXPIRED_COUNT((byte) 0x77), /** * Size of blob index evicted from base DB by BlobDB compaction filter * because of expiration. */ BLOB_DB_BLOB_INDEX_EXPIRED_SIZE((byte) 0x78), /** * # of blob index evicted from base DB by BlobDB compaction filter because * of corresponding file deleted. */ BLOB_DB_BLOB_INDEX_EVICTED_COUNT((byte) 0x79), /** * Size of blob index evicted from base DB by BlobDB compaction filter * because of corresponding file deleted. */ BLOB_DB_BLOB_INDEX_EVICTED_SIZE((byte) 0x7A), /** * # of blob files being garbage collected. */ BLOB_DB_GC_NUM_FILES((byte) 0x7B), /** * # of blob files generated by garbage collection. */ BLOB_DB_GC_NUM_NEW_FILES((byte) 0x7C), /** * # of BlobDB garbage collection failures. */ BLOB_DB_GC_FAILURES((byte) 0x7D), /** * # of keys drop by BlobDB garbage collection because they had been * overwritten. */ BLOB_DB_GC_NUM_KEYS_OVERWRITTEN((byte) 0x7E), /** * # of keys drop by BlobDB garbage collection because of expiration. */ BLOB_DB_GC_NUM_KEYS_EXPIRED((byte) 0x7F), /** * # of keys relocated to new blob file by garbage collection. */ BLOB_DB_GC_NUM_KEYS_RELOCATED((byte) -0x02), /** * # of bytes drop by BlobDB garbage collection because they had been * overwritten. */ BLOB_DB_GC_BYTES_OVERWRITTEN((byte) -0x03), /** * # of bytes drop by BlobDB garbage collection because of expiration. */ BLOB_DB_GC_BYTES_EXPIRED((byte) -0x04), /** * # of bytes relocated to new blob file by garbage collection. */ BLOB_DB_GC_BYTES_RELOCATED((byte) -0x05), /** * # of blob files evicted because of BlobDB is full. */ BLOB_DB_FIFO_NUM_FILES_EVICTED((byte) -0x06), /** * # of keys in the blob files evicted because of BlobDB is full. */ BLOB_DB_FIFO_NUM_KEYS_EVICTED((byte) -0x07), /** * # of bytes in the blob files evicted because of BlobDB is full. */ BLOB_DB_FIFO_BYTES_EVICTED((byte) -0x08), /** * These counters indicate a performance issue in WritePrepared transactions. * We should not seem them ticking them much. * # of times prepare_mutex_ is acquired in the fast path. */ TXN_PREPARE_MUTEX_OVERHEAD((byte) -0x09), /** * # of times old_commit_map_mutex_ is acquired in the fast path. */ TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD((byte) -0x0A), /** * # of times we checked a batch for duplicate keys. */ TXN_DUPLICATE_KEY_OVERHEAD((byte) -0x0B), /** * # of times snapshot_mutex_ is acquired in the fast path. */ TXN_SNAPSHOT_MUTEX_OVERHEAD((byte) -0x0C), /** * # of times ::Get returned TryAgain due to expired snapshot seq */ TXN_GET_TRY_AGAIN((byte) -0x0D), /** * # of files marked as trash by delete scheduler */ FILES_MARKED_TRASH((byte) -0x0E), /** * # of files deleted immediately by delete scheduler */ FILES_DELETED_IMMEDIATELY((byte) -0x0f), TICKER_ENUM_MAX((byte) 0x5F); private final byte value; TickerType(final byte value) { this.value = value; } /** * Returns the byte value of the enumerations value * * @return byte representation */ public byte getValue() { return value; } /** * Get Ticker type by byte value. * * @param value byte representation of TickerType. * * @return {@link org.rocksdb.TickerType} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static TickerType getTickerType(final byte value) { for (final TickerType tickerType : TickerType.values()) { if (tickerType.getValue() == value) { return tickerType; } } throw new IllegalArgumentException( "Illegal value provided for TickerType."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TimedEnv.java000066400000000000000000000016641370372246700234250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Timed environment. */ public class TimedEnv extends Env { /** *

Creates a new environment that measures function call times for * filesystem operations, reporting results to variables in PerfContext.

* * *

The caller must delete the result when it is * no longer needed.

* * @param baseEnv the base environment, * must remain live while the result is in use. */ public TimedEnv(final Env baseEnv) { super(createTimedEnv(baseEnv.nativeHandle_)); } private static native long createTimedEnv(final long baseEnvHandle); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TraceOptions.java000066400000000000000000000016201370372246700243140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * TraceOptions is used for * {@link RocksDB#startTrace(TraceOptions, AbstractTraceWriter)}. */ public class TraceOptions { private final long maxTraceFileSize; public TraceOptions() { this.maxTraceFileSize = 64 * 1024 * 1024 * 1024; // 64 GB } public TraceOptions(final long maxTraceFileSize) { this.maxTraceFileSize = maxTraceFileSize; } /** * To avoid the trace file size grows large than the storage space, * user can set the max trace file size in Bytes. Default is 64GB * * @return the max trace size */ public long getMaxTraceFileSize() { return maxTraceFileSize; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TraceWriter.java000066400000000000000000000016021370372246700241350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * TraceWriter allows exporting RocksDB traces to any system, * one operation at a time. */ public interface TraceWriter { /** * Write the data. * * @param data the data * * @throws RocksDBException if an error occurs whilst writing. */ void write(final Slice data) throws RocksDBException; /** * Close the writer. * * @throws RocksDBException if an error occurs whilst closing the writer. */ void closeWriter() throws RocksDBException; /** * Get the size of the file that this writer is writing to. * * @return the file size */ long getFileSize(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/Transaction.java000066400000000000000000002422641370372246700242020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; /** * Provides BEGIN/COMMIT/ROLLBACK transactions. * * To use transactions, you must first create either an * {@link OptimisticTransactionDB} or a {@link TransactionDB} * * To create a transaction, use * {@link OptimisticTransactionDB#beginTransaction(org.rocksdb.WriteOptions)} or * {@link TransactionDB#beginTransaction(org.rocksdb.WriteOptions)} * * It is up to the caller to synchronize access to this object. * * See samples/src/main/java/OptimisticTransactionSample.java and * samples/src/main/java/TransactionSample.java for some simple * examples. */ public class Transaction extends RocksObject { private final RocksDB parent; /** * Intentionally package private * as this is called from * {@link OptimisticTransactionDB#beginTransaction(org.rocksdb.WriteOptions)} * or {@link TransactionDB#beginTransaction(org.rocksdb.WriteOptions)} * * @param parent This must be either {@link TransactionDB} or * {@link OptimisticTransactionDB} * @param transactionHandle The native handle to the underlying C++ * transaction object */ Transaction(final RocksDB parent, final long transactionHandle) { super(transactionHandle); this.parent = parent; } /** * If a transaction has a snapshot set, the transaction will ensure that * any keys successfully written(or fetched via {@link #getForUpdate}) have * not been modified outside of this transaction since the time the snapshot * was set. * * If a snapshot has not been set, the transaction guarantees that keys have * not been modified since the time each key was first written (or fetched via * {@link #getForUpdate}). * * Using {@link #setSnapshot()} will provide stricter isolation guarantees * at the expense of potentially more transaction failures due to conflicts * with other writes. * * Calling {@link #setSnapshot()} has no effect on keys written before this * function has been called. * * {@link #setSnapshot()} may be called multiple times if you would like to * change the snapshot used for different operations in this transaction. * * Calling {@link #setSnapshot()} will not affect the version of Data returned * by get(...) methods. See {@link #get} for more details. */ public void setSnapshot() { assert(isOwningHandle()); setSnapshot(nativeHandle_); } /** * Similar to {@link #setSnapshot()}, but will not change the current snapshot * until put/merge/delete/getForUpdate/multiGetForUpdate is called. * By calling this function, the transaction will essentially call * {@link #setSnapshot()} for you right before performing the next * write/getForUpdate. * * Calling {@link #setSnapshotOnNextOperation()} will not affect what * snapshot is returned by {@link #getSnapshot} until the next * write/getForUpdate is executed. * * When the snapshot is created the notifier's snapshotCreated method will * be called so that the caller can get access to the snapshot. * * This is an optimization to reduce the likelihood of conflicts that * could occur in between the time {@link #setSnapshot()} is called and the * first write/getForUpdate operation. i.e. this prevents the following * race-condition: * * txn1->setSnapshot(); * txn2->put("A", ...); * txn2->commit(); * txn1->getForUpdate(opts, "A", ...); * FAIL! */ public void setSnapshotOnNextOperation() { assert(isOwningHandle()); setSnapshotOnNextOperation(nativeHandle_); } /** * Similar to {@link #setSnapshot()}, but will not change the current snapshot * until put/merge/delete/getForUpdate/multiGetForUpdate is called. * By calling this function, the transaction will essentially call * {@link #setSnapshot()} for you right before performing the next * write/getForUpdate. * * Calling {@link #setSnapshotOnNextOperation()} will not affect what * snapshot is returned by {@link #getSnapshot} until the next * write/getForUpdate is executed. * * When the snapshot is created the * {@link AbstractTransactionNotifier#snapshotCreated(Snapshot)} method will * be called so that the caller can get access to the snapshot. * * This is an optimization to reduce the likelihood of conflicts that * could occur in between the time {@link #setSnapshot()} is called and the * first write/getForUpdate operation. i.e. this prevents the following * race-condition: * * txn1->setSnapshot(); * txn2->put("A", ...); * txn2->commit(); * txn1->getForUpdate(opts, "A", ...); * FAIL! * * @param transactionNotifier A handler for receiving snapshot notifications * for the transaction * */ public void setSnapshotOnNextOperation( final AbstractTransactionNotifier transactionNotifier) { assert(isOwningHandle()); setSnapshotOnNextOperation(nativeHandle_, transactionNotifier.nativeHandle_); } /** * Returns the Snapshot created by the last call to {@link #setSnapshot()}. * * REQUIRED: The returned Snapshot is only valid up until the next time * {@link #setSnapshot()}/{@link #setSnapshotOnNextOperation()} is called, * {@link #clearSnapshot()} is called, or the Transaction is deleted. * * @return The snapshot or null if there is no snapshot */ public Snapshot getSnapshot() { assert(isOwningHandle()); final long snapshotNativeHandle = getSnapshot(nativeHandle_); if(snapshotNativeHandle == 0) { return null; } else { final Snapshot snapshot = new Snapshot(snapshotNativeHandle); return snapshot; } } /** * Clears the current snapshot (i.e. no snapshot will be 'set') * * This removes any snapshot that currently exists or is set to be created * on the next update operation ({@link #setSnapshotOnNextOperation()}). * * Calling {@link #clearSnapshot()} has no effect on keys written before this * function has been called. * * If a reference to a snapshot was retrieved via {@link #getSnapshot()}, it * will no longer be valid and should be discarded after a call to * {@link #clearSnapshot()}. */ public void clearSnapshot() { assert(isOwningHandle()); clearSnapshot(nativeHandle_); } /** * Prepare the current transaction for 2PC */ void prepare() throws RocksDBException { //TODO(AR) consider a Java'ish version of this function, which returns an AutoCloseable (commit) assert(isOwningHandle()); prepare(nativeHandle_); } /** * Write all batched keys to the db atomically. * * Returns OK on success. * * May return any error status that could be returned by DB:Write(). * * If this transaction was created by an {@link OptimisticTransactionDB} * Status::Busy() may be returned if the transaction could not guarantee * that there are no write conflicts. Status::TryAgain() may be returned * if the memtable history size is not large enough * (See max_write_buffer_number_to_maintain). * * If this transaction was created by a {@link TransactionDB}, * Status::Expired() may be returned if this transaction has lived for * longer than {@link TransactionOptions#getExpiration()}. * * @throws RocksDBException if an error occurs when committing the transaction */ public void commit() throws RocksDBException { assert(isOwningHandle()); commit(nativeHandle_); } /** * Discard all batched writes in this transaction. * * @throws RocksDBException if an error occurs when rolling back the transaction */ public void rollback() throws RocksDBException { assert(isOwningHandle()); rollback(nativeHandle_); } /** * Records the state of the transaction for future calls to * {@link #rollbackToSavePoint()}. * * May be called multiple times to set multiple save points. * * @throws RocksDBException if an error occurs whilst setting a save point */ public void setSavePoint() throws RocksDBException { assert(isOwningHandle()); setSavePoint(nativeHandle_); } /** * Undo all operations in this transaction (put, merge, delete, putLogData) * since the most recent call to {@link #setSavePoint()} and removes the most * recent {@link #setSavePoint()}. * * If there is no previous call to {@link #setSavePoint()}, * returns Status::NotFound() * * @throws RocksDBException if an error occurs when rolling back to a save point */ public void rollbackToSavePoint() throws RocksDBException { assert(isOwningHandle()); rollbackToSavePoint(nativeHandle_); } /** * This function is similar to * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])} except it will * also read pending changes in this transaction. * Currently, this function will return Status::MergeInProgress if the most * recent write to the queried key in this batch is a Merge. * * If {@link ReadOptions#snapshot()} is not set, the current version of the * key will be read. Calling {@link #setSnapshot()} does not affect the * version of the data returned. * * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect * what is read from the DB but will NOT change which keys are read from this * transaction (the keys in this transaction do not yet belong to any snapshot * and will be fetched regardless). * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance * @param readOptions Read options. * @param key the key to retrieve the value for. * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying native * library. */ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, final byte[] key) throws RocksDBException { assert(isOwningHandle()); return get(nativeHandle_, readOptions.nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } /** * This function is similar to * {@link RocksDB#get(ReadOptions, byte[])} except it will * also read pending changes in this transaction. * Currently, this function will return Status::MergeInProgress if the most * recent write to the queried key in this batch is a Merge. * * If {@link ReadOptions#snapshot()} is not set, the current version of the * key will be read. Calling {@link #setSnapshot()} does not affect the * version of the data returned. * * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect * what is read from the DB but will NOT change which keys are read from this * transaction (the keys in this transaction do not yet belong to any snapshot * and will be fetched regardless). * * @param readOptions Read options. * @param key the key to retrieve the value for. * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying native * library. */ public byte[] get(final ReadOptions readOptions, final byte[] key) throws RocksDBException { assert(isOwningHandle()); return get(nativeHandle_, readOptions.nativeHandle_, key, key.length); } /** * This function is similar to * {@link RocksDB#multiGet(ReadOptions, List, List)} except it will * also read pending changes in this transaction. * Currently, this function will return Status::MergeInProgress if the most * recent write to the queried key in this batch is a Merge. * * If {@link ReadOptions#snapshot()} is not set, the current version of the * key will be read. Calling {@link #setSnapshot()} does not affect the * version of the data returned. * * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect * what is read from the DB but will NOT change which keys are read from this * transaction (the keys in this transaction do not yet belong to any snapshot * and will be fetched regardless). * * @param readOptions Read options. * @param columnFamilyHandles {@link java.util.List} containing * {@link org.rocksdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * * @return Array of values, one for each key * * @throws RocksDBException thrown if error happens in underlying * native library. * @throws IllegalArgumentException thrown if the size of passed keys is not * equal to the amount of passed column family handles. */ public byte[][] multiGet(final ReadOptions readOptions, final List columnFamilyHandles, final byte[][] keys) throws RocksDBException { assert(isOwningHandle()); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.length != columnFamilyHandles.size()) { throw new IllegalArgumentException( "For each key there must be a ColumnFamilyHandle."); } if(keys.length == 0) { return new byte[0][0]; } final long[] cfHandles = new long[columnFamilyHandles.size()]; for (int i = 0; i < columnFamilyHandles.size(); i++) { cfHandles[i] = columnFamilyHandles.get(i).nativeHandle_; } return multiGet(nativeHandle_, readOptions.nativeHandle_, keys, cfHandles); } /** * This function is similar to * {@link RocksDB#multiGet(ReadOptions, List)} except it will * also read pending changes in this transaction. * Currently, this function will return Status::MergeInProgress if the most * recent write to the queried key in this batch is a Merge. * * If {@link ReadOptions#snapshot()} is not set, the current version of the * key will be read. Calling {@link #setSnapshot()} does not affect the * version of the data returned. * * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect * what is read from the DB but will NOT change which keys are read from this * transaction (the keys in this transaction do not yet belong to any snapshot * and will be fetched regardless). * * @param readOptions Read options.= * {@link org.rocksdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * * @return Array of values, one for each key * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[][] multiGet(final ReadOptions readOptions, final byte[][] keys) throws RocksDBException { assert(isOwningHandle()); if(keys.length == 0) { return new byte[0][0]; } return multiGet(nativeHandle_, readOptions.nativeHandle_, keys); } /** * Read this key and ensure that this transaction will only * be able to be committed if this key is not written outside this * transaction after it has first been read (or after the snapshot if a * snapshot is set in this transaction). The transaction behavior is the * same regardless of whether the key exists or not. * * Note: Currently, this function will return Status::MergeInProgress * if the most recent write to the queried key in this batch is a Merge. * * The values returned by this function are similar to * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])}. * If value==nullptr, then this function will not read any data, but will * still ensure that this key cannot be written to by outside of this * transaction. * * If this transaction was created by an {@link OptimisticTransactionDB}, * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} * could cause {@link #commit()} to fail. Otherwise, it could return any error * that could be returned by * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])}. * * If this transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * {@link Status.Code#MergeInProgress} if merge operations cannot be * resolved. * * @param readOptions Read options. * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value for. * @param exclusive true if the transaction should have exclusive access to * the key, otherwise false for shared access. * @param doValidate true if it should validate the snapshot before doing the read * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] getForUpdate(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final boolean exclusive, final boolean doValidate) throws RocksDBException { assert (isOwningHandle()); return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, exclusive, doValidate); } /** * Same as * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean, boolean)} * with doValidate=true. * * @param readOptions Read options. * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value for. * @param exclusive true if the transaction should have exclusive access to * the key, otherwise false for shared access. * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] getForUpdate(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final boolean exclusive) throws RocksDBException { assert(isOwningHandle()); return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, exclusive, true /*doValidate*/); } /** * Read this key and ensure that this transaction will only * be able to be committed if this key is not written outside this * transaction after it has first been read (or after the snapshot if a * snapshot is set in this transaction). The transaction behavior is the * same regardless of whether the key exists or not. * * Note: Currently, this function will return Status::MergeInProgress * if the most recent write to the queried key in this batch is a Merge. * * The values returned by this function are similar to * {@link RocksDB#get(ReadOptions, byte[])}. * If value==nullptr, then this function will not read any data, but will * still ensure that this key cannot be written to by outside of this * transaction. * * If this transaction was created on an {@link OptimisticTransactionDB}, * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} * could cause {@link #commit()} to fail. Otherwise, it could return any error * that could be returned by * {@link RocksDB#get(ReadOptions, byte[])}. * * If this transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * {@link Status.Code#MergeInProgress} if merge operations cannot be * resolved. * * @param readOptions Read options. * @param key the key to retrieve the value for. * @param exclusive true if the transaction should have exclusive access to * the key, otherwise false for shared access. * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[] getForUpdate(final ReadOptions readOptions, final byte[] key, final boolean exclusive) throws RocksDBException { assert(isOwningHandle()); return getForUpdate( nativeHandle_, readOptions.nativeHandle_, key, key.length, exclusive, true /*doValidate*/); } /** * A multi-key version of * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}. * * * @param readOptions Read options. * @param columnFamilyHandles {@link org.rocksdb.ColumnFamilyHandle} * instances * @param keys the keys to retrieve the values for. * * @return Array of values, one for each key * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[][] multiGetForUpdate(final ReadOptions readOptions, final List columnFamilyHandles, final byte[][] keys) throws RocksDBException { assert(isOwningHandle()); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.length != columnFamilyHandles.size()){ throw new IllegalArgumentException( "For each key there must be a ColumnFamilyHandle."); } if(keys.length == 0) { return new byte[0][0]; } final long[] cfHandles = new long[columnFamilyHandles.size()]; for (int i = 0; i < columnFamilyHandles.size(); i++) { cfHandles[i] = columnFamilyHandles.get(i).nativeHandle_; } return multiGetForUpdate(nativeHandle_, readOptions.nativeHandle_, keys, cfHandles); } /** * A multi-key version of {@link #getForUpdate(ReadOptions, byte[], boolean)}. * * * @param readOptions Read options. * @param keys the keys to retrieve the values for. * * @return Array of values, one for each key * * @throws RocksDBException thrown if error happens in underlying * native library. */ public byte[][] multiGetForUpdate(final ReadOptions readOptions, final byte[][] keys) throws RocksDBException { assert(isOwningHandle()); if(keys.length == 0) { return new byte[0][0]; } return multiGetForUpdate(nativeHandle_, readOptions.nativeHandle_, keys); } /** * Returns an iterator that will iterate on all keys in the default * column family including both keys in the DB and uncommitted keys in this * transaction. * * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read * from the DB but will NOT change which keys are read from this transaction * (the keys in this transaction do not yet belong to any snapshot and will be * fetched regardless). * * Caller is responsible for deleting the returned Iterator. * * The returned iterator is only valid until {@link #commit()}, * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. * * @param readOptions Read options. * * @return instance of iterator object. */ public RocksIterator getIterator(final ReadOptions readOptions) { assert(isOwningHandle()); return new RocksIterator(parent, getIterator(nativeHandle_, readOptions.nativeHandle_)); } /** * Returns an iterator that will iterate on all keys in the default * column family including both keys in the DB and uncommitted keys in this * transaction. * * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read * from the DB but will NOT change which keys are read from this transaction * (the keys in this transaction do not yet belong to any snapshot and will be * fetched regardless). * * Caller is responsible for calling {@link RocksIterator#close()} on * the returned Iterator. * * The returned iterator is only valid until {@link #commit()}, * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. * * @param readOptions Read options. * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * * @return instance of iterator object. */ public RocksIterator getIterator(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle) { assert(isOwningHandle()); return new RocksIterator(parent, getIterator(nativeHandle_, readOptions.nativeHandle_, columnFamilyHandle.nativeHandle_)); } /** * Similar to {@link RocksDB#put(ColumnFamilyHandle, byte[], byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to put the key/value into * @param key the specified key to be inserted. * @param value the value associated with the specified key. * @param assumeTracked true when it is expected that the key is already * tracked. More specifically, it means the the key was previous tracked * in the same savepoint, with the same exclusive flag, and at a lower * sequence number. If valid then it skips ValidateSnapshot, * throws an error otherwise. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); put(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_, assumeTracked); } /** * Similar to {@link #put(ColumnFamilyHandle, byte[], byte[], boolean)} * but with {@code assumeTracked = false}. * * Will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to put the key/value into * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); put(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_, false); } /** * Similar to {@link RocksDB#put(byte[], byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void put(final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); put(nativeHandle_, key, key.length, value, value.length); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #put(ColumnFamilyHandle, byte[], byte[])} but allows * you to specify the key and value in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to put the key/value into * @param keyParts the specified key to be inserted. * @param valueParts the value associated with the specified key. * @param assumeTracked true when it is expected that the key is already * tracked. More specifically, it means the the key was previous tracked * in the same savepoint, with the same exclusive flag, and at a lower * sequence number. If valid then it skips ValidateSnapshot, * throws an error otherwise. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts, final byte[][] valueParts, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); put(nativeHandle_, keyParts, keyParts.length, valueParts, valueParts.length, columnFamilyHandle.nativeHandle_, assumeTracked); } /** * Similar to {@link #put(ColumnFamilyHandle, byte[][], byte[][], boolean)} * but with with {@code assumeTracked = false}. * * Allows you to specify the key and value in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to put the key/value into * @param keyParts the specified key to be inserted. * @param valueParts the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts, final byte[][] valueParts) throws RocksDBException { assert(isOwningHandle()); put(nativeHandle_, keyParts, keyParts.length, valueParts, valueParts.length, columnFamilyHandle.nativeHandle_, false); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #put(byte[], byte[])} but allows * you to specify the key and value in several parts that will be * concatenated together * * @param keyParts the specified key to be inserted. * @param valueParts the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void put(final byte[][] keyParts, final byte[][] valueParts) throws RocksDBException { assert(isOwningHandle()); put(nativeHandle_, keyParts, keyParts.length, valueParts, valueParts.length); } /** * Similar to {@link RocksDB#merge(ColumnFamilyHandle, byte[], byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to merge the key/value into * @param key the specified key to be merged. * @param value the value associated with the specified key. * @param assumeTracked true when it is expected that the key is already * tracked. More specifically, it means the the key was previous tracked * in the same savepoint, with the same exclusive flag, and at a lower * sequence number. If valid then it skips ValidateSnapshot, * throws an error otherwise. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void merge(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); merge(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_, assumeTracked); } /** * Similar to {@link #merge(ColumnFamilyHandle, byte[], byte[], boolean)} * but with {@code assumeTracked = false}. * * Will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to merge the key/value into * @param key the specified key to be merged. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void merge(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); merge(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_, false); } /** * Similar to {@link RocksDB#merge(byte[], byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param key the specified key to be merged. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void merge(final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); merge(nativeHandle_, key, key.length, value, value.length); } /** * Similar to {@link RocksDB#delete(ColumnFamilyHandle, byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to delete the key/value from * @param key the specified key to be deleted. * @param assumeTracked true when it is expected that the key is already * tracked. More specifically, it means the the key was previous tracked * in the same savepoint, with the same exclusive flag, and at a lower * sequence number. If valid then it skips ValidateSnapshot, * throws an error otherwise. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); delete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, assumeTracked); } /** * Similar to {@link #delete(ColumnFamilyHandle, byte[], boolean)} * but with {@code assumeTracked = false}. * * Will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to delete the key/value from * @param key the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException { assert(isOwningHandle()); delete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, /*assumeTracked*/ false); } /** * Similar to {@link RocksDB#delete(byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param key the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void delete(final byte[] key) throws RocksDBException { assert(isOwningHandle()); delete(nativeHandle_, key, key.length); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #delete(ColumnFamilyHandle, byte[])} but allows * you to specify the key in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to delete the key/value from * @param keyParts the specified key to be deleted. * @param assumeTracked true when it is expected that the key is already * tracked. More specifically, it means the the key was previous tracked * in the same savepoint, with the same exclusive flag, and at a lower * sequence number. If valid then it skips ValidateSnapshot, * throws an error otherwise. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); delete(nativeHandle_, keyParts, keyParts.length, columnFamilyHandle.nativeHandle_, assumeTracked); } /** * Similar to{@link #delete(ColumnFamilyHandle, byte[][], boolean)} * but with {@code assumeTracked = false}. * * Allows you to specify the key in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to delete the key/value from * @param keyParts the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void delete(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts) throws RocksDBException { assert(isOwningHandle()); delete(nativeHandle_, keyParts, keyParts.length, columnFamilyHandle.nativeHandle_, false); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #delete(byte[])} but allows * you to specify key the in several parts that will be * concatenated together. * * @param keyParts the specified key to be deleted * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void delete(final byte[][] keyParts) throws RocksDBException { assert(isOwningHandle()); delete(nativeHandle_, keyParts, keyParts.length); } /** * Similar to {@link RocksDB#singleDelete(ColumnFamilyHandle, byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to delete the key/value from * @param key the specified key to be deleted. * @param assumeTracked true when it is expected that the key is already * tracked. More specifically, it means the the key was previous tracked * in the same savepoint, with the same exclusive flag, and at a lower * sequence number. If valid then it skips ValidateSnapshot, * throws an error otherwise. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, assumeTracked); } /** * Similar to {@link #singleDelete(ColumnFamilyHandle, byte[], boolean)} * but with {@code assumeTracked = false}. * * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param columnFamilyHandle The column family to delete the key/value from * @param key the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, false); } /** * Similar to {@link RocksDB#singleDelete(byte[])}, but * will also perform conflict checking on the keys be written. * * If this Transaction was created on an {@link OptimisticTransactionDB}, * these functions should always succeed. * * If this Transaction was created on a {@link TransactionDB}, an * {@link RocksDBException} may be thrown with an accompanying {@link Status} * when: * {@link Status.Code#Busy} if there is a write conflict, * {@link Status.Code#TimedOut} if a lock could not be acquired, * {@link Status.Code#TryAgain} if the memtable history size is not large * enough. See * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} * * @param key the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final byte[] key) throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, key, key.length); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #singleDelete(ColumnFamilyHandle, byte[])} but allows * you to specify the key in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to delete the key/value from * @param keyParts the specified key to be deleted. * @param assumeTracked true when it is expected that the key is already * tracked. More specifically, it means the the key was previous tracked * in the same savepoint, with the same exclusive flag, and at a lower * sequence number. If valid then it skips ValidateSnapshot, * throws an error otherwise. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); singleDelete(nativeHandle_, keyParts, keyParts.length, columnFamilyHandle.nativeHandle_, assumeTracked); } /** * Similar to{@link #singleDelete(ColumnFamilyHandle, byte[][], boolean)} * but with {@code assumeTracked = false}. * * Allows you to specify the key in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to delete the key/value from * @param keyParts the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts) throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, keyParts, keyParts.length, columnFamilyHandle.nativeHandle_, false); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #singleDelete(byte[])} but allows * you to specify the key in several parts that will be * concatenated together. * * @param keyParts the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ @Experimental("Performance optimization for a very specific workload") public void singleDelete(final byte[][] keyParts) throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, keyParts, keyParts.length); } /** * Similar to {@link RocksDB#put(ColumnFamilyHandle, byte[], byte[])}, * but operates on the transactions write batch. This write will only happen * if this transaction gets committed successfully. * * Unlike {@link #put(ColumnFamilyHandle, byte[], byte[])} no conflict * checking will be performed for this key. * * If this Transaction was created on a {@link TransactionDB}, this function * will still acquire locks necessary to make sure this write doesn't cause * conflicts in other transactions; This may cause a {@link RocksDBException} * with associated {@link Status.Code#Busy}. * * @param columnFamilyHandle The column family to put the key/value into * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void putUntracked(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); putUntracked(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_); } /** * Similar to {@link RocksDB#put(byte[], byte[])}, * but operates on the transactions write batch. This write will only happen * if this transaction gets committed successfully. * * Unlike {@link #put(byte[], byte[])} no conflict * checking will be performed for this key. * * If this Transaction was created on a {@link TransactionDB}, this function * will still acquire locks necessary to make sure this write doesn't cause * conflicts in other transactions; This may cause a {@link RocksDBException} * with associated {@link Status.Code#Busy}. * * @param key the specified key to be inserted. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void putUntracked(final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); putUntracked(nativeHandle_, key, key.length, value, value.length); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #putUntracked(ColumnFamilyHandle, byte[], byte[])} but * allows you to specify the key and value in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to put the key/value into * @param keyParts the specified key to be inserted. * @param valueParts the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void putUntracked(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts, final byte[][] valueParts) throws RocksDBException { assert(isOwningHandle()); putUntracked(nativeHandle_, keyParts, keyParts.length, valueParts, valueParts.length, columnFamilyHandle.nativeHandle_); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #putUntracked(byte[], byte[])} but * allows you to specify the key and value in several parts that will be * concatenated together. * * @param keyParts the specified key to be inserted. * @param valueParts the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void putUntracked(final byte[][] keyParts, final byte[][] valueParts) throws RocksDBException { assert(isOwningHandle()); putUntracked(nativeHandle_, keyParts, keyParts.length, valueParts, valueParts.length); } /** * Similar to {@link RocksDB#merge(ColumnFamilyHandle, byte[], byte[])}, * but operates on the transactions write batch. This write will only happen * if this transaction gets committed successfully. * * Unlike {@link #merge(ColumnFamilyHandle, byte[], byte[])} no conflict * checking will be performed for this key. * * If this Transaction was created on a {@link TransactionDB}, this function * will still acquire locks necessary to make sure this write doesn't cause * conflicts in other transactions; This may cause a {@link RocksDBException} * with associated {@link Status.Code#Busy}. * * @param columnFamilyHandle The column family to merge the key/value into * @param key the specified key to be merged. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void mergeUntracked(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { mergeUntracked(nativeHandle_, key, key.length, value, value.length, columnFamilyHandle.nativeHandle_); } /** * Similar to {@link RocksDB#merge(byte[], byte[])}, * but operates on the transactions write batch. This write will only happen * if this transaction gets committed successfully. * * Unlike {@link #merge(byte[], byte[])} no conflict * checking will be performed for this key. * * If this Transaction was created on a {@link TransactionDB}, this function * will still acquire locks necessary to make sure this write doesn't cause * conflicts in other transactions; This may cause a {@link RocksDBException} * with associated {@link Status.Code#Busy}. * * @param key the specified key to be merged. * @param value the value associated with the specified key. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void mergeUntracked(final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); mergeUntracked(nativeHandle_, key, key.length, value, value.length); } /** * Similar to {@link RocksDB#delete(ColumnFamilyHandle, byte[])}, * but operates on the transactions write batch. This write will only happen * if this transaction gets committed successfully. * * Unlike {@link #delete(ColumnFamilyHandle, byte[])} no conflict * checking will be performed for this key. * * If this Transaction was created on a {@link TransactionDB}, this function * will still acquire locks necessary to make sure this write doesn't cause * conflicts in other transactions; This may cause a {@link RocksDBException} * with associated {@link Status.Code#Busy}. * * @param columnFamilyHandle The column family to delete the key/value from * @param key the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void deleteUntracked(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException { assert(isOwningHandle()); deleteUntracked(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } /** * Similar to {@link RocksDB#delete(byte[])}, * but operates on the transactions write batch. This write will only happen * if this transaction gets committed successfully. * * Unlike {@link #delete(byte[])} no conflict * checking will be performed for this key. * * If this Transaction was created on a {@link TransactionDB}, this function * will still acquire locks necessary to make sure this write doesn't cause * conflicts in other transactions; This may cause a {@link RocksDBException} * with associated {@link Status.Code#Busy}. * * @param key the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void deleteUntracked(final byte[] key) throws RocksDBException { assert(isOwningHandle()); deleteUntracked(nativeHandle_, key, key.length); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #deleteUntracked(ColumnFamilyHandle, byte[])} but allows * you to specify the key in several parts that will be * concatenated together. * * @param columnFamilyHandle The column family to delete the key/value from * @param keyParts the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void deleteUntracked(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts) throws RocksDBException { assert(isOwningHandle()); deleteUntracked(nativeHandle_, keyParts, keyParts.length, columnFamilyHandle.nativeHandle_); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #deleteUntracked(byte[])} but allows * you to specify the key in several parts that will be * concatenated together. * * @param keyParts the specified key to be deleted. * * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ public void deleteUntracked(final byte[][] keyParts) throws RocksDBException { assert(isOwningHandle()); deleteUntracked(nativeHandle_, keyParts, keyParts.length); } /** * Similar to {@link WriteBatch#putLogData(byte[])} * * @param blob binary object to be inserted */ public void putLogData(final byte[] blob) { assert(isOwningHandle()); putLogData(nativeHandle_, blob, blob.length); } /** * By default, all put/merge/delete operations will be indexed in the * transaction so that get/getForUpdate/getIterator can search for these * keys. * * If the caller does not want to fetch the keys about to be written, * they may want to avoid indexing as a performance optimization. * Calling {@link #disableIndexing()} will turn off indexing for all future * put/merge/delete operations until {@link #enableIndexing()} is called. * * If a key is put/merge/deleted after {@link #disableIndexing()} is called * and then is fetched via get/getForUpdate/getIterator, the result of the * fetch is undefined. */ public void disableIndexing() { assert(isOwningHandle()); disableIndexing(nativeHandle_); } /** * Re-enables indexing after a previous call to {@link #disableIndexing()} */ public void enableIndexing() { assert(isOwningHandle()); enableIndexing(nativeHandle_); } /** * Returns the number of distinct Keys being tracked by this transaction. * If this transaction was created by a {@link TransactionDB}, this is the * number of keys that are currently locked by this transaction. * If this transaction was created by an {@link OptimisticTransactionDB}, * this is the number of keys that need to be checked for conflicts at commit * time. * * @return the number of distinct Keys being tracked by this transaction */ public long getNumKeys() { assert(isOwningHandle()); return getNumKeys(nativeHandle_); } /** * Returns the number of puts that have been applied to this * transaction so far. * * @return the number of puts that have been applied to this transaction */ public long getNumPuts() { assert(isOwningHandle()); return getNumPuts(nativeHandle_); } /** * Returns the number of deletes that have been applied to this * transaction so far. * * @return the number of deletes that have been applied to this transaction */ public long getNumDeletes() { assert(isOwningHandle()); return getNumDeletes(nativeHandle_); } /** * Returns the number of merges that have been applied to this * transaction so far. * * @return the number of merges that have been applied to this transaction */ public long getNumMerges() { assert(isOwningHandle()); return getNumMerges(nativeHandle_); } /** * Returns the elapsed time in milliseconds since this Transaction began. * * @return the elapsed time in milliseconds since this transaction began. */ public long getElapsedTime() { assert(isOwningHandle()); return getElapsedTime(nativeHandle_); } /** * Fetch the underlying write batch that contains all pending changes to be * committed. * * Note: You should not write or delete anything from the batch directly and * should only use the functions in the {@link Transaction} class to * write to this transaction. * * @return The write batch */ public WriteBatchWithIndex getWriteBatch() { assert(isOwningHandle()); final WriteBatchWithIndex writeBatchWithIndex = new WriteBatchWithIndex(getWriteBatch(nativeHandle_)); return writeBatchWithIndex; } /** * Change the value of {@link TransactionOptions#getLockTimeout()} * (in milliseconds) for this transaction. * * Has no effect on OptimisticTransactions. * * @param lockTimeout the timeout (in milliseconds) for locks used by this * transaction. */ public void setLockTimeout(final long lockTimeout) { assert(isOwningHandle()); setLockTimeout(nativeHandle_, lockTimeout); } /** * Return the WriteOptions that will be used during {@link #commit()}. * * @return the WriteOptions that will be used */ public WriteOptions getWriteOptions() { assert(isOwningHandle()); final WriteOptions writeOptions = new WriteOptions(getWriteOptions(nativeHandle_)); return writeOptions; } /** * Reset the WriteOptions that will be used during {@link #commit()}. * * @param writeOptions The new WriteOptions */ public void setWriteOptions(final WriteOptions writeOptions) { assert(isOwningHandle()); setWriteOptions(nativeHandle_, writeOptions.nativeHandle_); } /** * If this key was previously fetched in this transaction using * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}/ * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, calling * {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} will tell * the transaction that it no longer needs to do any conflict checking * for this key. * * If a key has been fetched N times via * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}/ * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, then * {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} will only have an * effect if it is also called N times. If this key has been written to in * this transaction, {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} * will have no effect. * * If {@link #setSavePoint()} has been called after the * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)}, * {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} will not have any * effect. * * If this Transaction was created by an {@link OptimisticTransactionDB}, * calling {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} can affect * whether this key is conflict checked at commit time. * If this Transaction was created by a {@link TransactionDB}, * calling {@link #undoGetForUpdate(ColumnFamilyHandle, byte[])} may release * any held locks for this key. * * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value for. */ public void undoGetForUpdate(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) { assert(isOwningHandle()); undoGetForUpdate(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } /** * If this key was previously fetched in this transaction using * {@link #getForUpdate(ReadOptions, byte[], boolean)}/ * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, calling * {@link #undoGetForUpdate(byte[])} will tell * the transaction that it no longer needs to do any conflict checking * for this key. * * If a key has been fetched N times via * {@link #getForUpdate(ReadOptions, byte[], boolean)}/ * {@link #multiGetForUpdate(ReadOptions, List, byte[][])}, then * {@link #undoGetForUpdate(byte[])} will only have an * effect if it is also called N times. If this key has been written to in * this transaction, {@link #undoGetForUpdate(byte[])} * will have no effect. * * If {@link #setSavePoint()} has been called after the * {@link #getForUpdate(ReadOptions, byte[], boolean)}, * {@link #undoGetForUpdate(byte[])} will not have any * effect. * * If this Transaction was created by an {@link OptimisticTransactionDB}, * calling {@link #undoGetForUpdate(byte[])} can affect * whether this key is conflict checked at commit time. * If this Transaction was created by a {@link TransactionDB}, * calling {@link #undoGetForUpdate(byte[])} may release * any held locks for this key. * * @param key the key to retrieve the value for. */ public void undoGetForUpdate(final byte[] key) { assert(isOwningHandle()); undoGetForUpdate(nativeHandle_, key, key.length); } /** * Adds the keys from the WriteBatch to the transaction * * @param writeBatch The write batch to read from * * @throws RocksDBException if an error occurs whilst rebuilding from the * write batch. */ public void rebuildFromWriteBatch(final WriteBatch writeBatch) throws RocksDBException { assert(isOwningHandle()); rebuildFromWriteBatch(nativeHandle_, writeBatch.nativeHandle_); } /** * Get the Commit time Write Batch. * * @return the commit time write batch. */ public WriteBatch getCommitTimeWriteBatch() { assert(isOwningHandle()); final WriteBatch writeBatch = new WriteBatch(getCommitTimeWriteBatch(nativeHandle_)); return writeBatch; } /** * Set the log number. * * @param logNumber the log number */ public void setLogNumber(final long logNumber) { assert(isOwningHandle()); setLogNumber(nativeHandle_, logNumber); } /** * Get the log number. * * @return the log number */ public long getLogNumber() { assert(isOwningHandle()); return getLogNumber(nativeHandle_); } /** * Set the name of the transaction. * * @param transactionName the name of the transaction * * @throws RocksDBException if an error occurs when setting the transaction * name. */ public void setName(final String transactionName) throws RocksDBException { assert(isOwningHandle()); setName(nativeHandle_, transactionName); } /** * Get the name of the transaction. * * @return the name of the transaction */ public String getName() { assert(isOwningHandle()); return getName(nativeHandle_); } /** * Get the ID of the transaction. * * @return the ID of the transaction. */ public long getID() { assert(isOwningHandle()); return getID(nativeHandle_); } /** * Determine if a deadlock has been detected. * * @return true if a deadlock has been detected. */ public boolean isDeadlockDetect() { assert(isOwningHandle()); return isDeadlockDetect(nativeHandle_); } /** * Get the list of waiting transactions. * * @return The list of waiting transactions. */ public WaitingTransactions getWaitingTxns() { assert(isOwningHandle()); return getWaitingTxns(nativeHandle_); } /** * Get the execution status of the transaction. * * NOTE: The execution status of an Optimistic Transaction * never changes. This is only useful for non-optimistic transactions! * * @return The execution status of the transaction */ public TransactionState getState() { assert(isOwningHandle()); return TransactionState.getTransactionState( getState(nativeHandle_)); } /** * The globally unique id with which the transaction is identified. This id * might or might not be set depending on the implementation. Similarly the * implementation decides the point in lifetime of a transaction at which it * assigns the id. Although currently it is the case, the id is not guaranteed * to remain the same across restarts. * * @return the transaction id. */ @Experimental("NOTE: Experimental feature") public long getId() { assert(isOwningHandle()); return getId(nativeHandle_); } public enum TransactionState { STARTED((byte)0), AWAITING_PREPARE((byte)1), PREPARED((byte)2), AWAITING_COMMIT((byte)3), COMMITTED((byte)4), AWAITING_ROLLBACK((byte)5), ROLLEDBACK((byte)6), LOCKS_STOLEN((byte)7); /* * Keep old misspelled variable as alias * Tip from https://stackoverflow.com/a/37092410/454544 */ public static final TransactionState COMMITED = COMMITTED; private final byte value; TransactionState(final byte value) { this.value = value; } /** * Get TransactionState by byte value. * * @param value byte representation of TransactionState. * * @return {@link org.rocksdb.Transaction.TransactionState} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ public static TransactionState getTransactionState(final byte value) { for (final TransactionState transactionState : TransactionState.values()) { if (transactionState.value == value){ return transactionState; } } throw new IllegalArgumentException( "Illegal value provided for TransactionState."); } } /** * Called from C++ native method {@link #getWaitingTxns(long)} * to construct a WaitingTransactions object. * * @param columnFamilyId The id of the {@link ColumnFamilyHandle} * @param key The key * @param transactionIds The transaction ids * * @return The waiting transactions */ private WaitingTransactions newWaitingTransactions( final long columnFamilyId, final String key, final long[] transactionIds) { return new WaitingTransactions(columnFamilyId, key, transactionIds); } public static class WaitingTransactions { private final long columnFamilyId; private final String key; private final long[] transactionIds; private WaitingTransactions(final long columnFamilyId, final String key, final long[] transactionIds) { this.columnFamilyId = columnFamilyId; this.key = key; this.transactionIds = transactionIds; } /** * Get the Column Family ID. * * @return The column family ID */ public long getColumnFamilyId() { return columnFamilyId; } /** * Get the key on which the transactions are waiting. * * @return The key */ public String getKey() { return key; } /** * Get the IDs of the waiting transactions. * * @return The IDs of the waiting transactions */ public long[] getTransactionIds() { return transactionIds; } } private native void setSnapshot(final long handle); private native void setSnapshotOnNextOperation(final long handle); private native void setSnapshotOnNextOperation(final long handle, final long transactionNotifierHandle); private native long getSnapshot(final long handle); private native void clearSnapshot(final long handle); private native void prepare(final long handle) throws RocksDBException; private native void commit(final long handle) throws RocksDBException; private native void rollback(final long handle) throws RocksDBException; private native void setSavePoint(final long handle) throws RocksDBException; private native void rollbackToSavePoint(final long handle) throws RocksDBException; private native byte[] get(final long handle, final long readOptionsHandle, final byte key[], final int keyLength, final long columnFamilyHandle) throws RocksDBException; private native byte[] get(final long handle, final long readOptionsHandle, final byte key[], final int keyLen) throws RocksDBException; private native byte[][] multiGet(final long handle, final long readOptionsHandle, final byte[][] keys, final long[] columnFamilyHandles) throws RocksDBException; private native byte[][] multiGet(final long handle, final long readOptionsHandle, final byte[][] keys) throws RocksDBException; private native byte[] getForUpdate(final long handle, final long readOptionsHandle, final byte key[], final int keyLength, final long columnFamilyHandle, final boolean exclusive, final boolean doValidate) throws RocksDBException; private native byte[] getForUpdate(final long handle, final long readOptionsHandle, final byte key[], final int keyLen, final boolean exclusive, final boolean doValidate) throws RocksDBException; private native byte[][] multiGetForUpdate(final long handle, final long readOptionsHandle, final byte[][] keys, final long[] columnFamilyHandles) throws RocksDBException; private native byte[][] multiGetForUpdate(final long handle, final long readOptionsHandle, final byte[][] keys) throws RocksDBException; private native long getIterator(final long handle, final long readOptionsHandle); private native long getIterator(final long handle, final long readOptionsHandle, final long columnFamilyHandle); private native void put(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void put(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength) throws RocksDBException; private native void put(final long handle, final byte[][] keys, final int keysLength, final byte[][] values, final int valuesLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void put(final long handle, final byte[][] keys, final int keysLength, final byte[][] values, final int valuesLength) throws RocksDBException; private native void merge(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void merge(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength) throws RocksDBException; private native void delete(final long handle, final byte[] key, final int keyLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void delete(final long handle, final byte[] key, final int keyLength) throws RocksDBException; private native void delete(final long handle, final byte[][] keys, final int keysLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void delete(final long handle, final byte[][] keys, final int keysLength) throws RocksDBException; private native void singleDelete(final long handle, final byte[] key, final int keyLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void singleDelete(final long handle, final byte[] key, final int keyLength) throws RocksDBException; private native void singleDelete(final long handle, final byte[][] keys, final int keysLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void singleDelete(final long handle, final byte[][] keys, final int keysLength) throws RocksDBException; private native void putUntracked(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength, final long columnFamilyHandle) throws RocksDBException; private native void putUntracked(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength) throws RocksDBException; private native void putUntracked(final long handle, final byte[][] keys, final int keysLength, final byte[][] values, final int valuesLength, final long columnFamilyHandle) throws RocksDBException; private native void putUntracked(final long handle, final byte[][] keys, final int keysLength, final byte[][] values, final int valuesLength) throws RocksDBException; private native void mergeUntracked(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength, final long columnFamilyHandle) throws RocksDBException; private native void mergeUntracked(final long handle, final byte[] key, final int keyLength, final byte[] value, final int valueLength) throws RocksDBException; private native void deleteUntracked(final long handle, final byte[] key, final int keyLength, final long columnFamilyHandle) throws RocksDBException; private native void deleteUntracked(final long handle, final byte[] key, final int keyLength) throws RocksDBException; private native void deleteUntracked(final long handle, final byte[][] keys, final int keysLength, final long columnFamilyHandle) throws RocksDBException; private native void deleteUntracked(final long handle, final byte[][] keys, final int keysLength) throws RocksDBException; private native void putLogData(final long handle, final byte[] blob, final int blobLength); private native void disableIndexing(final long handle); private native void enableIndexing(final long handle); private native long getNumKeys(final long handle); private native long getNumPuts(final long handle); private native long getNumDeletes(final long handle); private native long getNumMerges(final long handle); private native long getElapsedTime(final long handle); private native long getWriteBatch(final long handle); private native void setLockTimeout(final long handle, final long lockTimeout); private native long getWriteOptions(final long handle); private native void setWriteOptions(final long handle, final long writeOptionsHandle); private native void undoGetForUpdate(final long handle, final byte[] key, final int keyLength, final long columnFamilyHandle); private native void undoGetForUpdate(final long handle, final byte[] key, final int keyLength); private native void rebuildFromWriteBatch(final long handle, final long writeBatchHandle) throws RocksDBException; private native long getCommitTimeWriteBatch(final long handle); private native void setLogNumber(final long handle, final long logNumber); private native long getLogNumber(final long handle); private native void setName(final long handle, final String name) throws RocksDBException; private native String getName(final long handle); private native long getID(final long handle); private native boolean isDeadlockDetect(final long handle); private native WaitingTransactions getWaitingTxns(final long handle); private native byte getState(final long handle); private native long getId(final long handle); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TransactionDB.java000066400000000000000000000314211370372246700243770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Database with Transaction support */ public class TransactionDB extends RocksDB implements TransactionalDB { private TransactionDBOptions transactionDbOptions_; /** * Private constructor. * * @param nativeHandle The native handle of the C++ TransactionDB object */ private TransactionDB(final long nativeHandle) { super(nativeHandle); } /** * Open a TransactionDB, similar to {@link RocksDB#open(Options, String)}. * * @param options {@link org.rocksdb.Options} instance. * @param transactionDbOptions {@link org.rocksdb.TransactionDBOptions} * instance. * @param path the path to the rocksdb. * * @return a {@link TransactionDB} instance on success, null if the specified * {@link TransactionDB} can not be opened. * * @throws RocksDBException if an error occurs whilst opening the database. */ public static TransactionDB open(final Options options, final TransactionDBOptions transactionDbOptions, final String path) throws RocksDBException { final TransactionDB tdb = new TransactionDB(open(options.nativeHandle_, transactionDbOptions.nativeHandle_, path)); // when non-default Options is used, keeping an Options reference // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. tdb.storeOptionsInstance(options); tdb.storeTransactionDbOptions(transactionDbOptions); return tdb; } /** * Open a TransactionDB, similar to * {@link RocksDB#open(DBOptions, String, List, List)}. * * @param dbOptions {@link org.rocksdb.DBOptions} instance. * @param transactionDbOptions {@link org.rocksdb.TransactionDBOptions} * instance. * @param path the path to the rocksdb. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances * * @return a {@link TransactionDB} instance on success, null if the specified * {@link TransactionDB} can not be opened. * * @throws RocksDBException if an error occurs whilst opening the database. */ public static TransactionDB open(final DBOptions dbOptions, final TransactionDBOptions transactionDbOptions, final String path, final List columnFamilyDescriptors, final List columnFamilyHandles) throws RocksDBException { final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; for (int i = 0; i < columnFamilyDescriptors.size(); i++) { final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors .get(i); cfNames[i] = cfDescriptor.getName(); cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; } final long[] handles = open(dbOptions.nativeHandle_, transactionDbOptions.nativeHandle_, path, cfNames, cfOptionHandles); final TransactionDB tdb = new TransactionDB(handles[0]); // when non-default Options is used, keeping an Options reference // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. tdb.storeOptionsInstance(dbOptions); tdb.storeTransactionDbOptions(transactionDbOptions); for (int i = 1; i < handles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(tdb, handles[i])); } return tdb; } /** * This is similar to {@link #close()} except that it * throws an exception if any error occurs. * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. * * @throws RocksDBException if an error occurs whilst closing. */ public void closeE() throws RocksDBException { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } finally { disposeInternal(); } } } /** * This is similar to {@link #closeE()} except that it * silently ignores any errors. * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. */ @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } catch (final RocksDBException e) { // silently ignore the error report } finally { disposeInternal(); } } } @Override public Transaction beginTransaction(final WriteOptions writeOptions) { return new Transaction(this, beginTransaction(nativeHandle_, writeOptions.nativeHandle_)); } @Override public Transaction beginTransaction(final WriteOptions writeOptions, final TransactionOptions transactionOptions) { return new Transaction(this, beginTransaction(nativeHandle_, writeOptions.nativeHandle_, transactionOptions.nativeHandle_)); } // TODO(AR) consider having beingTransaction(... oldTransaction) set a // reference count inside Transaction, so that we can always call // Transaction#close but the object is only disposed when there are as many // closes as beginTransaction. Makes the try-with-resources paradigm easier for // java developers @Override public Transaction beginTransaction(final WriteOptions writeOptions, final Transaction oldTransaction) { final long jtxnHandle = beginTransaction_withOld(nativeHandle_, writeOptions.nativeHandle_, oldTransaction.nativeHandle_); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_txn assert(jtxnHandle == oldTransaction.nativeHandle_); return oldTransaction; } @Override public Transaction beginTransaction(final WriteOptions writeOptions, final TransactionOptions transactionOptions, final Transaction oldTransaction) { final long jtxn_handle = beginTransaction_withOld(nativeHandle_, writeOptions.nativeHandle_, transactionOptions.nativeHandle_, oldTransaction.nativeHandle_); // RocksJava relies on the assumption that // we do not allocate a new Transaction object // when providing an old_txn assert(jtxn_handle == oldTransaction.nativeHandle_); return oldTransaction; } public Transaction getTransactionByName(final String transactionName) { final long jtxnHandle = getTransactionByName(nativeHandle_, transactionName); if(jtxnHandle == 0) { return null; } final Transaction txn = new Transaction(this, jtxnHandle); // this instance doesn't own the underlying C++ object txn.disOwnNativeHandle(); return txn; } public List getAllPreparedTransactions() { final long[] jtxnHandles = getAllPreparedTransactions(nativeHandle_); final List txns = new ArrayList<>(); for(final long jtxnHandle : jtxnHandles) { final Transaction txn = new Transaction(this, jtxnHandle); // this instance doesn't own the underlying C++ object txn.disOwnNativeHandle(); txns.add(txn); } return txns; } public static class KeyLockInfo { private final String key; private final long[] transactionIDs; private final boolean exclusive; public KeyLockInfo(final String key, final long transactionIDs[], final boolean exclusive) { this.key = key; this.transactionIDs = transactionIDs; this.exclusive = exclusive; } /** * Get the key. * * @return the key */ public String getKey() { return key; } /** * Get the Transaction IDs. * * @return the Transaction IDs. */ public long[] getTransactionIDs() { return transactionIDs; } /** * Get the Lock status. * * @return true if the lock is exclusive, false if the lock is shared. */ public boolean isExclusive() { return exclusive; } } /** * Returns map of all locks held. * * @return a map of all the locks held. */ public Map getLockStatusData() { return getLockStatusData(nativeHandle_); } /** * Called from C++ native method {@link #getDeadlockInfoBuffer(long)} * to construct a DeadlockInfo object. * * @param transactionID The transaction id * @param columnFamilyId The id of the {@link ColumnFamilyHandle} * @param waitingKey the key that we are waiting on * @param exclusive true if the lock is exclusive, false if the lock is shared * * @return The waiting transactions */ private DeadlockInfo newDeadlockInfo( final long transactionID, final long columnFamilyId, final String waitingKey, final boolean exclusive) { return new DeadlockInfo(transactionID, columnFamilyId, waitingKey, exclusive); } public static class DeadlockInfo { private final long transactionID; private final long columnFamilyId; private final String waitingKey; private final boolean exclusive; private DeadlockInfo(final long transactionID, final long columnFamilyId, final String waitingKey, final boolean exclusive) { this.transactionID = transactionID; this.columnFamilyId = columnFamilyId; this.waitingKey = waitingKey; this.exclusive = exclusive; } /** * Get the Transaction ID. * * @return the transaction ID */ public long getTransactionID() { return transactionID; } /** * Get the Column Family ID. * * @return The column family ID */ public long getColumnFamilyId() { return columnFamilyId; } /** * Get the key that we are waiting on. * * @return the key that we are waiting on */ public String getWaitingKey() { return waitingKey; } /** * Get the Lock status. * * @return true if the lock is exclusive, false if the lock is shared. */ public boolean isExclusive() { return exclusive; } } public static class DeadlockPath { final DeadlockInfo[] path; final boolean limitExceeded; public DeadlockPath(final DeadlockInfo[] path, final boolean limitExceeded) { this.path = path; this.limitExceeded = limitExceeded; } public boolean isEmpty() { return path.length == 0 && !limitExceeded; } } public DeadlockPath[] getDeadlockInfoBuffer() { return getDeadlockInfoBuffer(nativeHandle_); } public void setDeadlockInfoBufferSize(final int targetSize) { setDeadlockInfoBufferSize(nativeHandle_, targetSize); } private void storeTransactionDbOptions( final TransactionDBOptions transactionDbOptions) { this.transactionDbOptions_ = transactionDbOptions; } @Override protected final native void disposeInternal(final long handle); private static native long open(final long optionsHandle, final long transactionDbOptionsHandle, final String path) throws RocksDBException; private static native long[] open(final long dbOptionsHandle, final long transactionDbOptionsHandle, final String path, final byte[][] columnFamilyNames, final long[] columnFamilyOptions); private native static void closeDatabase(final long handle) throws RocksDBException; private native long beginTransaction(final long handle, final long writeOptionsHandle); private native long beginTransaction(final long handle, final long writeOptionsHandle, final long transactionOptionsHandle); private native long beginTransaction_withOld(final long handle, final long writeOptionsHandle, final long oldTransactionHandle); private native long beginTransaction_withOld(final long handle, final long writeOptionsHandle, final long transactionOptionsHandle, final long oldTransactionHandle); private native long getTransactionByName(final long handle, final String name); private native long[] getAllPreparedTransactions(final long handle); private native Map getLockStatusData( final long handle); private native DeadlockPath[] getDeadlockInfoBuffer(final long handle); private native void setDeadlockInfoBufferSize(final long handle, final int targetSize); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TransactionDBOptions.java000066400000000000000000000171761370372246700257660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class TransactionDBOptions extends RocksObject { public TransactionDBOptions() { super(newTransactionDBOptions()); } /** * Specifies the maximum number of keys that can be locked at the same time * per column family. * * If the number of locked keys is greater than {@link #getMaxNumLocks()}, * transaction writes (or GetForUpdate) will return an error. * * @return The maximum number of keys that can be locked */ public long getMaxNumLocks() { assert(isOwningHandle()); return getMaxNumLocks(nativeHandle_); } /** * Specifies the maximum number of keys that can be locked at the same time * per column family. * * If the number of locked keys is greater than {@link #getMaxNumLocks()}, * transaction writes (or GetForUpdate) will return an error. * * @param maxNumLocks The maximum number of keys that can be locked; * If this value is not positive, no limit will be enforced. * * @return this TransactionDBOptions instance */ public TransactionDBOptions setMaxNumLocks(final long maxNumLocks) { assert(isOwningHandle()); setMaxNumLocks(nativeHandle_, maxNumLocks); return this; } /** * The number of sub-tables per lock table (per column family) * * @return The number of sub-tables */ public long getNumStripes() { assert(isOwningHandle()); return getNumStripes(nativeHandle_); } /** * Increasing this value will increase the concurrency by dividing the lock * table (per column family) into more sub-tables, each with their own * separate mutex. * * Default: 16 * * @param numStripes The number of sub-tables * * @return this TransactionDBOptions instance */ public TransactionDBOptions setNumStripes(final long numStripes) { assert(isOwningHandle()); setNumStripes(nativeHandle_, numStripes); return this; } /** * The default wait timeout in milliseconds when * a transaction attempts to lock a key if not specified by * {@link TransactionOptions#setLockTimeout(long)} * * If 0, no waiting is done if a lock cannot instantly be acquired. * If negative, there is no timeout. * * @return the default wait timeout in milliseconds */ public long getTransactionLockTimeout() { assert(isOwningHandle()); return getTransactionLockTimeout(nativeHandle_); } /** * If positive, specifies the default wait timeout in milliseconds when * a transaction attempts to lock a key if not specified by * {@link TransactionOptions#setLockTimeout(long)} * * If 0, no waiting is done if a lock cannot instantly be acquired. * If negative, there is no timeout. Not using a timeout is not recommended * as it can lead to deadlocks. Currently, there is no deadlock-detection to * recover from a deadlock. * * Default: 1000 * * @param transactionLockTimeout the default wait timeout in milliseconds * * @return this TransactionDBOptions instance */ public TransactionDBOptions setTransactionLockTimeout( final long transactionLockTimeout) { assert(isOwningHandle()); setTransactionLockTimeout(nativeHandle_, transactionLockTimeout); return this; } /** * The wait timeout in milliseconds when writing a key * OUTSIDE of a transaction (ie by calling {@link RocksDB#put}, * {@link RocksDB#merge}, {@link RocksDB#delete} or {@link RocksDB#write} * directly). * * If 0, no waiting is done if a lock cannot instantly be acquired. * If negative, there is no timeout and will block indefinitely when acquiring * a lock. * * @return the timeout in milliseconds when writing a key OUTSIDE of a * transaction */ public long getDefaultLockTimeout() { assert(isOwningHandle()); return getDefaultLockTimeout(nativeHandle_); } /** * If positive, specifies the wait timeout in milliseconds when writing a key * OUTSIDE of a transaction (ie by calling {@link RocksDB#put}, * {@link RocksDB#merge}, {@link RocksDB#delete} or {@link RocksDB#write} * directly). * * If 0, no waiting is done if a lock cannot instantly be acquired. * If negative, there is no timeout and will block indefinitely when acquiring * a lock. * * Not using a timeout can lead to deadlocks. Currently, there * is no deadlock-detection to recover from a deadlock. While DB writes * cannot deadlock with other DB writes, they can deadlock with a transaction. * A negative timeout should only be used if all transactions have a small * expiration set. * * Default: 1000 * * @param defaultLockTimeout the timeout in milliseconds when writing a key * OUTSIDE of a transaction * @return this TransactionDBOptions instance */ public TransactionDBOptions setDefaultLockTimeout( final long defaultLockTimeout) { assert(isOwningHandle()); setDefaultLockTimeout(nativeHandle_, defaultLockTimeout); return this; } // /** // * If set, the {@link TransactionDB} will use this implementation of a mutex // * and condition variable for all transaction locking instead of the default // * mutex/condvar implementation. // * // * @param transactionDbMutexFactory the mutex factory for the transactions // * // * @return this TransactionDBOptions instance // */ // public TransactionDBOptions setCustomMutexFactory( // final TransactionDBMutexFactory transactionDbMutexFactory) { // // } /** * The policy for when to write the data into the DB. The default policy is to * write only the committed data {@link TxnDBWritePolicy#WRITE_COMMITTED}. * The data could be written before the commit phase. The DB then needs to * provide the mechanisms to tell apart committed from uncommitted data. * * @return The write policy. */ public TxnDBWritePolicy getWritePolicy() { assert(isOwningHandle()); return TxnDBWritePolicy.getTxnDBWritePolicy(getWritePolicy(nativeHandle_)); } /** * The policy for when to write the data into the DB. The default policy is to * write only the committed data {@link TxnDBWritePolicy#WRITE_COMMITTED}. * The data could be written before the commit phase. The DB then needs to * provide the mechanisms to tell apart committed from uncommitted data. * * @param writePolicy The write policy. * * @return this TransactionDBOptions instance */ public TransactionDBOptions setWritePolicy( final TxnDBWritePolicy writePolicy) { assert(isOwningHandle()); setWritePolicy(nativeHandle_, writePolicy.getValue()); return this; } private native static long newTransactionDBOptions(); private native long getMaxNumLocks(final long handle); private native void setMaxNumLocks(final long handle, final long maxNumLocks); private native long getNumStripes(final long handle); private native void setNumStripes(final long handle, final long numStripes); private native long getTransactionLockTimeout(final long handle); private native void setTransactionLockTimeout(final long handle, final long transactionLockTimeout); private native long getDefaultLockTimeout(final long handle); private native void setDefaultLockTimeout(final long handle, final long transactionLockTimeout); private native byte getWritePolicy(final long handle); private native void setWritePolicy(final long handle, final byte writePolicy); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TransactionLogIterator.java000066400000000000000000000061301370372246700263440ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** *

A TransactionLogIterator is used to iterate over the transactions in a db. * One run of the iterator is continuous, i.e. the iterator will stop at the * beginning of any gap in sequences.

*/ public class TransactionLogIterator extends RocksObject { /** *

An iterator is either positioned at a WriteBatch * or not valid. This method returns true if the iterator * is valid. Can read data from a valid iterator.

* * @return true if iterator position is valid. */ public boolean isValid() { return isValid(nativeHandle_); } /** *

Moves the iterator to the next WriteBatch. * REQUIRES: Valid() to be true.

*/ public void next() { next(nativeHandle_); } /** *

Throws RocksDBException if something went wrong.

* * @throws org.rocksdb.RocksDBException if something went * wrong in the underlying C++ code. */ public void status() throws RocksDBException { status(nativeHandle_); } /** *

If iterator position is valid, return the current * write_batch and the sequence number of the earliest * transaction contained in the batch.

* *

ONLY use if Valid() is true and status() is OK.

* * @return {@link org.rocksdb.TransactionLogIterator.BatchResult} * instance. */ public BatchResult getBatch() { assert(isValid()); return getBatch(nativeHandle_); } /** *

TransactionLogIterator constructor.

* * @param nativeHandle address to native address. */ TransactionLogIterator(final long nativeHandle) { super(nativeHandle); } /** *

BatchResult represents a data structure returned * by a TransactionLogIterator containing a sequence * number and a {@link WriteBatch} instance.

*/ public static final class BatchResult { /** *

Constructor of BatchResult class.

* * @param sequenceNumber related to this BatchResult instance. * @param nativeHandle to {@link org.rocksdb.WriteBatch} * native instance. */ public BatchResult(final long sequenceNumber, final long nativeHandle) { sequenceNumber_ = sequenceNumber; writeBatch_ = new WriteBatch(nativeHandle, true); } /** *

Return sequence number related to this BatchResult.

* * @return Sequence number. */ public long sequenceNumber() { return sequenceNumber_; } /** *

Return contained {@link org.rocksdb.WriteBatch} * instance

* * @return {@link org.rocksdb.WriteBatch} instance. */ public WriteBatch writeBatch() { return writeBatch_; } private final long sequenceNumber_; private final WriteBatch writeBatch_; } @Override protected final native void disposeInternal(final long handle); private native boolean isValid(long handle); private native void next(long handle); private native void status(long handle) throws RocksDBException; private native BatchResult getBatch(long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TransactionOptions.java000066400000000000000000000142431370372246700255500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class TransactionOptions extends RocksObject implements TransactionalOptions { public TransactionOptions() { super(newTransactionOptions()); } @Override public boolean isSetSnapshot() { assert(isOwningHandle()); return isSetSnapshot(nativeHandle_); } @Override public TransactionOptions setSetSnapshot(final boolean setSnapshot) { assert(isOwningHandle()); setSetSnapshot(nativeHandle_, setSnapshot); return this; } /** * True means that before acquiring locks, this transaction will * check if doing so will cause a deadlock. If so, it will return with * {@link Status.Code#Busy}. The user should retry their transaction. * * @return true if a deadlock is detected. */ public boolean isDeadlockDetect() { assert(isOwningHandle()); return isDeadlockDetect(nativeHandle_); } /** * Setting to true means that before acquiring locks, this transaction will * check if doing so will cause a deadlock. If so, it will return with * {@link Status.Code#Busy}. The user should retry their transaction. * * @param deadlockDetect true if we should detect deadlocks. * * @return this TransactionOptions instance */ public TransactionOptions setDeadlockDetect(final boolean deadlockDetect) { assert(isOwningHandle()); setDeadlockDetect(nativeHandle_, deadlockDetect); return this; } /** * The wait timeout in milliseconds when a transaction attempts to lock a key. * * If 0, no waiting is done if a lock cannot instantly be acquired. * If negative, {@link TransactionDBOptions#getTransactionLockTimeout(long)} * will be used * * @return the lock timeout in milliseconds */ public long getLockTimeout() { assert(isOwningHandle()); return getLockTimeout(nativeHandle_); } /** * If positive, specifies the wait timeout in milliseconds when * a transaction attempts to lock a key. * * If 0, no waiting is done if a lock cannot instantly be acquired. * If negative, {@link TransactionDBOptions#getTransactionLockTimeout(long)} * will be used * * Default: -1 * * @param lockTimeout the lock timeout in milliseconds * * @return this TransactionOptions instance */ public TransactionOptions setLockTimeout(final long lockTimeout) { assert(isOwningHandle()); setLockTimeout(nativeHandle_, lockTimeout); return this; } /** * Expiration duration in milliseconds. * * If non-negative, transactions that last longer than this many milliseconds * will fail to commit. If not set, a forgotten transaction that is never * committed, rolled back, or deleted will never relinquish any locks it * holds. This could prevent keys from being written by other writers. * * @return expiration the expiration duration in milliseconds */ public long getExpiration() { assert(isOwningHandle()); return getExpiration(nativeHandle_); } /** * Expiration duration in milliseconds. * * If non-negative, transactions that last longer than this many milliseconds * will fail to commit. If not set, a forgotten transaction that is never * committed, rolled back, or deleted will never relinquish any locks it * holds. This could prevent keys from being written by other writers. * * Default: -1 * * @param expiration the expiration duration in milliseconds * * @return this TransactionOptions instance */ public TransactionOptions setExpiration(final long expiration) { assert(isOwningHandle()); setExpiration(nativeHandle_, expiration); return this; } /** * Gets the number of traversals to make during deadlock detection. * * @return the number of traversals to make during * deadlock detection */ public long getDeadlockDetectDepth() { return getDeadlockDetectDepth(nativeHandle_); } /** * Sets the number of traversals to make during deadlock detection. * * Default: 50 * * @param deadlockDetectDepth the number of traversals to make during * deadlock detection * * @return this TransactionOptions instance */ public TransactionOptions setDeadlockDetectDepth( final long deadlockDetectDepth) { setDeadlockDetectDepth(nativeHandle_, deadlockDetectDepth); return this; } /** * Get the maximum number of bytes that may be used for the write batch. * * @return the maximum number of bytes, 0 means no limit. */ public long getMaxWriteBatchSize() { return getMaxWriteBatchSize(nativeHandle_); } /** * Set the maximum number of bytes that may be used for the write batch. * * @param maxWriteBatchSize the maximum number of bytes, 0 means no limit. * * @return this TransactionOptions instance */ public TransactionOptions setMaxWriteBatchSize(final long maxWriteBatchSize) { setMaxWriteBatchSize(nativeHandle_, maxWriteBatchSize); return this; } private native static long newTransactionOptions(); private native boolean isSetSnapshot(final long handle); private native void setSetSnapshot(final long handle, final boolean setSnapshot); private native boolean isDeadlockDetect(final long handle); private native void setDeadlockDetect(final long handle, final boolean deadlockDetect); private native long getLockTimeout(final long handle); private native void setLockTimeout(final long handle, final long lockTimeout); private native long getExpiration(final long handle); private native void setExpiration(final long handle, final long expiration); private native long getDeadlockDetectDepth(final long handle); private native void setDeadlockDetectDepth(final long handle, final long deadlockDetectDepth); private native long getMaxWriteBatchSize(final long handle); private native void setMaxWriteBatchSize(final long handle, final long maxWriteBatchSize); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TransactionalDB.java000066400000000000000000000046651370372246700247260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; interface TransactionalDB extends AutoCloseable { /** * Starts a new Transaction. * * Caller is responsible for calling {@link #close()} on the returned * transaction when it is no longer needed. * * @param writeOptions Any write options for the transaction * @return a new transaction */ Transaction beginTransaction(final WriteOptions writeOptions); /** * Starts a new Transaction. * * Caller is responsible for calling {@link #close()} on the returned * transaction when it is no longer needed. * * @param writeOptions Any write options for the transaction * @param transactionOptions Any options for the transaction * @return a new transaction */ Transaction beginTransaction(final WriteOptions writeOptions, final T transactionOptions); /** * Starts a new Transaction. * * Caller is responsible for calling {@link #close()} on the returned * transaction when it is no longer needed. * * @param writeOptions Any write options for the transaction * @param oldTransaction this Transaction will be reused instead of allocating * a new one. This is an optimization to avoid extra allocations * when repeatedly creating transactions. * @return The oldTransaction which has been reinitialized as a new * transaction */ Transaction beginTransaction(final WriteOptions writeOptions, final Transaction oldTransaction); /** * Starts a new Transaction. * * Caller is responsible for calling {@link #close()} on the returned * transaction when it is no longer needed. * * @param writeOptions Any write options for the transaction * @param transactionOptions Any options for the transaction * @param oldTransaction this Transaction will be reused instead of allocating * a new one. This is an optimization to avoid extra allocations * when repeatedly creating transactions. * @return The oldTransaction which has been reinitialized as a new * transaction */ Transaction beginTransaction(final WriteOptions writeOptions, final T transactionOptions, final Transaction oldTransaction); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TransactionalOptions.java000066400000000000000000000015521370372246700260640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; interface TransactionalOptions> extends AutoCloseable { /** * True indicates snapshots will be set, just like if * {@link Transaction#setSnapshot()} had been called * * @return whether a snapshot will be set */ boolean isSetSnapshot(); /** * Setting the setSnapshot to true is the same as calling * {@link Transaction#setSnapshot()}. * * Default: false * * @param setSnapshot Whether to set a snapshot * * @return this TransactionalOptions instance */ T setSetSnapshot(final boolean setSnapshot); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TtlDB.java000066400000000000000000000211461370372246700226600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.List; /** * Database with TTL support. * *

Use case

*

This API should be used to open the db when key-values inserted are * meant to be removed from the db in a non-strict 'ttl' amount of time * Therefore, this guarantees that key-values inserted will remain in the * db for >= ttl amount of time and the db will make efforts to remove the * key-values as soon as possible after ttl seconds of their insertion. *

* *

Behaviour

*

TTL is accepted in seconds * (int32_t)Timestamp(creation) is suffixed to values in Put internally * Expired TTL values deleted in compaction only:(Timestamp+ttl<time_now) * Get/Iterator may return expired entries(compaction not run on them yet) * Different TTL may be used during different Opens *

* *

Example

*
    *
  • Open1 at t=0 with ttl=4 and insert k1,k2, close at t=2
  • *
  • Open2 at t=3 with ttl=5. Now k1,k2 should be deleted at t>=5
  • *
* *

* read_only=true opens in the usual read-only mode. Compactions will not be * triggered(neither manual nor automatic), so no expired entries removed *

* *

Constraints

*

Not specifying/passing or non-positive TTL behaves * like TTL = infinity

* *

!!!WARNING!!!

*

Calling DB::Open directly to re-open a db created by this API will get * corrupt values(timestamp suffixed) and no ttl effect will be there * during the second Open, so use this API consistently to open the db * Be careful when passing ttl with a small positive value because the * whole database may be deleted in a small amount of time.

*/ public class TtlDB extends RocksDB { /** *

Opens a TtlDB.

* *

Database is opened in read-write mode without default TTL.

* * @param options {@link org.rocksdb.Options} instance. * @param db_path path to database. * * @return TtlDB instance. * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public static TtlDB open(final Options options, final String db_path) throws RocksDBException { return open(options, db_path, 0, false); } /** *

Opens a TtlDB.

* * @param options {@link org.rocksdb.Options} instance. * @param db_path path to database. * @param ttl time to live for new entries. * @param readOnly boolean value indicating if database if db is * opened read-only. * * @return TtlDB instance. * * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ public static TtlDB open(final Options options, final String db_path, final int ttl, final boolean readOnly) throws RocksDBException { return new TtlDB(open(options.nativeHandle_, db_path, ttl, readOnly)); } /** *

Opens a TtlDB.

* * @param options {@link org.rocksdb.Options} instance. * @param db_path path to database. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances * on open. * @param ttlValues time to live values per column family handle * @param readOnly boolean value indicating if database if db is * opened read-only. * * @return TtlDB instance. * * @throws RocksDBException thrown if an error occurs within the native * part of the library. * @throws java.lang.IllegalArgumentException when there is not a ttl value * per given column family handle. */ public static TtlDB open(final DBOptions options, final String db_path, final List columnFamilyDescriptors, final List columnFamilyHandles, final List ttlValues, final boolean readOnly) throws RocksDBException { if (columnFamilyDescriptors.size() != ttlValues.size()) { throw new IllegalArgumentException("There must be a ttl value per column" + "family handle."); } final byte[][] cfNames = new byte[columnFamilyDescriptors.size()][]; final long[] cfOptionHandles = new long[columnFamilyDescriptors.size()]; for (int i = 0; i < columnFamilyDescriptors.size(); i++) { final ColumnFamilyDescriptor cfDescriptor = columnFamilyDescriptors.get(i); cfNames[i] = cfDescriptor.getName(); cfOptionHandles[i] = cfDescriptor.getOptions().nativeHandle_; } final int ttlVals[] = new int[ttlValues.size()]; for(int i = 0; i < ttlValues.size(); i++) { ttlVals[i] = ttlValues.get(i); } final long[] handles = openCF(options.nativeHandle_, db_path, cfNames, cfOptionHandles, ttlVals, readOnly); final TtlDB ttlDB = new TtlDB(handles[0]); for (int i = 1; i < handles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(ttlDB, handles[i])); } return ttlDB; } /** *

Close the TtlDB instance and release resource.

* * This is similar to {@link #close()} except that it * throws an exception if any error occurs. * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. * * @throws RocksDBException if an error occurs whilst closing. */ public void closeE() throws RocksDBException { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } finally { disposeInternal(); } } } /** *

Close the TtlDB instance and release resource.

* * * This will not fsync the WAL files. * If syncing is required, the caller must first call {@link #syncWal()} * or {@link #write(WriteOptions, WriteBatch)} using an empty write batch * with {@link WriteOptions#setSync(boolean)} set to true. * * See also {@link #close()}. */ @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { try { closeDatabase(nativeHandle_); } catch (final RocksDBException e) { // silently ignore the error report } finally { disposeInternal(); } } } /** *

Creates a new ttl based column family with a name defined * in given ColumnFamilyDescriptor and allocates a * ColumnFamilyHandle within an internal structure.

* *

The ColumnFamilyHandle is automatically disposed with DB * disposal.

* * @param columnFamilyDescriptor column family to be created. * @param ttl TTL to set for this column family. * * @return {@link org.rocksdb.ColumnFamilyHandle} instance. * * @throws RocksDBException thrown if error happens in underlying * native library. */ public ColumnFamilyHandle createColumnFamilyWithTtl( final ColumnFamilyDescriptor columnFamilyDescriptor, final int ttl) throws RocksDBException { return new ColumnFamilyHandle(this, createColumnFamilyWithTtl(nativeHandle_, columnFamilyDescriptor.getName(), columnFamilyDescriptor.getOptions().nativeHandle_, ttl)); } /** *

A protected constructor that will be used in the static * factory method * {@link #open(Options, String, int, boolean)} * and * {@link #open(DBOptions, String, java.util.List, java.util.List, * java.util.List, boolean)}. *

* * @param nativeHandle The native handle of the C++ TtlDB object */ protected TtlDB(final long nativeHandle) { super(nativeHandle); } @Override protected native void disposeInternal(final long handle); private native static long open(final long optionsHandle, final String db_path, final int ttl, final boolean readOnly) throws RocksDBException; private native static long[] openCF(final long optionsHandle, final String db_path, final byte[][] columnFamilyNames, final long[] columnFamilyOptions, final int[] ttlValues, final boolean readOnly) throws RocksDBException; private native long createColumnFamilyWithTtl(final long handle, final byte[] columnFamilyName, final long columnFamilyOptions, int ttl) throws RocksDBException; private native static void closeDatabase(final long handle) throws RocksDBException; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java000066400000000000000000000030571370372246700250620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The transaction db write policy. */ public enum TxnDBWritePolicy { /** * Write only the committed data. */ WRITE_COMMITTED((byte)0x00), /** * Write data after the prepare phase of 2pc. */ WRITE_PREPARED((byte)0x1), /** * Write data before the prepare phase of 2pc. */ WRITE_UNPREPARED((byte)0x2); private byte value; TxnDBWritePolicy(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ public byte getValue() { return value; } /** *

Get the TxnDBWritePolicy enumeration value by * passing the byte identifier to this method.

* * @param byteIdentifier of TxnDBWritePolicy. * * @return TxnDBWritePolicy instance. * * @throws IllegalArgumentException If TxnDBWritePolicy cannot be found for * the provided byteIdentifier */ public static TxnDBWritePolicy getTxnDBWritePolicy(final byte byteIdentifier) { for (final TxnDBWritePolicy txnDBWritePolicy : TxnDBWritePolicy.values()) { if (txnDBWritePolicy.getValue() == byteIdentifier) { return txnDBWritePolicy; } } throw new IllegalArgumentException( "Illegal value provided for TxnDBWritePolicy."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/UInt64AddOperator.java000066400000000000000000000012141370372246700250570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Uint64AddOperator is a merge operator that accumlates a long * integer value. */ public class UInt64AddOperator extends MergeOperator { public UInt64AddOperator() { super(newSharedUInt64AddOperator()); } private native static long newSharedUInt64AddOperator(); @Override protected final native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/VectorMemTableConfig.java000066400000000000000000000022671370372246700257110ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; /** * The config for vector memtable representation. */ public class VectorMemTableConfig extends MemTableConfig { public static final int DEFAULT_RESERVED_SIZE = 0; /** * VectorMemTableConfig constructor */ public VectorMemTableConfig() { reservedSize_ = DEFAULT_RESERVED_SIZE; } /** * Set the initial size of the vector that will be used * by the memtable created based on this config. * * @param size the initial size of the vector. * @return the reference to the current config. */ public VectorMemTableConfig setReservedSize(final int size) { reservedSize_ = size; return this; } /** * Returns the initial size of the vector used by the memtable * created based on this config. * * @return the initial size of the vector. */ public int reservedSize() { return reservedSize_; } @Override protected long newMemTableFactoryHandle() { return newMemTableFactoryHandle(reservedSize_); } private native long newMemTableFactoryHandle(long reservedSize) throws IllegalArgumentException; private int reservedSize_; } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WALRecoveryMode.java000066400000000000000000000044461370372246700246620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * The WAL Recover Mode */ public enum WALRecoveryMode { /** * Original levelDB recovery * * We tolerate incomplete record in trailing data on all logs * Use case : This is legacy behavior (default) */ TolerateCorruptedTailRecords((byte)0x00), /** * Recover from clean shutdown * * We don't expect to find any corruption in the WAL * Use case : This is ideal for unit tests and rare applications that * can require high consistency guarantee */ AbsoluteConsistency((byte)0x01), /** * Recover to point-in-time consistency * We stop the WAL playback on discovering WAL inconsistency * Use case : Ideal for systems that have disk controller cache like * hard disk, SSD without super capacitor that store related data */ PointInTimeRecovery((byte)0x02), /** * Recovery after a disaster * We ignore any corruption in the WAL and try to salvage as much data as * possible * Use case : Ideal for last ditch effort to recover data or systems that * operate with low grade unrelated data */ SkipAnyCorruptedRecords((byte)0x03); private byte value; WALRecoveryMode(final byte value) { this.value = value; } /** *

Returns the byte value of the enumerations value.

* * @return byte representation */ public byte getValue() { return value; } /** *

Get the WALRecoveryMode enumeration value by * passing the byte identifier to this method.

* * @param byteIdentifier of WALRecoveryMode. * * @return WALRecoveryMode instance. * * @throws IllegalArgumentException If WALRecoveryMode cannot be found for the * provided byteIdentifier */ public static WALRecoveryMode getWALRecoveryMode(final byte byteIdentifier) { for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { if (walRecoveryMode.getValue() == byteIdentifier) { return walRecoveryMode; } } throw new IllegalArgumentException( "Illegal value provided for WALRecoveryMode."); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WBWIRocksIterator.java000066400000000000000000000130571370372246700251750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; public class WBWIRocksIterator extends AbstractRocksIterator { private final WriteEntry entry = new WriteEntry(); protected WBWIRocksIterator(final WriteBatchWithIndex wbwi, final long nativeHandle) { super(wbwi, nativeHandle); } /** * Get the current entry * * The WriteEntry is only valid * until the iterator is repositioned. * If you want to keep the WriteEntry across iterator * movements, you must make a copy of its data! * * Note - This method is not thread-safe with respect to the WriteEntry * as it performs a non-atomic update across the fields of the WriteEntry * * @return The WriteEntry of the current entry */ public WriteEntry entry() { assert(isOwningHandle()); final long ptrs[] = entry1(nativeHandle_); entry.type = WriteType.fromId((byte)ptrs[0]); entry.key.resetNativeHandle(ptrs[1], ptrs[1] != 0); entry.value.resetNativeHandle(ptrs[2], ptrs[2] != 0); return entry; } @Override protected final native void disposeInternal(final long handle); @Override final native boolean isValid0(long handle); @Override final native void seekToFirst0(long handle); @Override final native void seekToLast0(long handle); @Override final native void next0(long handle); @Override final native void prev0(long handle); @Override final native void refresh0(final long handle) throws RocksDBException; @Override final native void seek0(long handle, byte[] target, int targetLen); @Override final native void seekForPrev0(long handle, byte[] target, int targetLen); @Override final native void status0(long handle) throws RocksDBException; @Override final native void seekDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen); private native long[] entry1(final long handle); /** * Enumeration of the Write operation * that created the record in the Write Batch */ public enum WriteType { PUT((byte)0x0), MERGE((byte)0x1), DELETE((byte)0x2), SINGLE_DELETE((byte)0x3), DELETE_RANGE((byte)0x4), LOG((byte)0x5), XID((byte)0x6); final byte id; WriteType(final byte id) { this.id = id; } public static WriteType fromId(final byte id) { for(final WriteType wt : WriteType.values()) { if(id == wt.id) { return wt; } } throw new IllegalArgumentException("No WriteType with id=" + id); } } @Override public void close() { entry.close(); super.close(); } /** * Represents an entry returned by * {@link org.rocksdb.WBWIRocksIterator#entry()} * * It is worth noting that a WriteEntry with * the type {@link org.rocksdb.WBWIRocksIterator.WriteType#DELETE} * or {@link org.rocksdb.WBWIRocksIterator.WriteType#LOG} * will not have a value. */ public static class WriteEntry implements AutoCloseable { WriteType type = null; final DirectSlice key; final DirectSlice value; /** * Intentionally private as this * should only be instantiated in * this manner by the outer WBWIRocksIterator * class; The class members are then modified * by calling {@link org.rocksdb.WBWIRocksIterator#entry()} */ private WriteEntry() { key = new DirectSlice(); value = new DirectSlice(); } public WriteEntry(final WriteType type, final DirectSlice key, final DirectSlice value) { this.type = type; this.key = key; this.value = value; } /** * Returns the type of the Write Entry * * @return the WriteType of the WriteEntry */ public WriteType getType() { return type; } /** * Returns the key of the Write Entry * * @return The slice containing the key * of the WriteEntry */ public DirectSlice getKey() { return key; } /** * Returns the value of the Write Entry * * @return The slice containing the value of * the WriteEntry or null if the WriteEntry has * no value */ public DirectSlice getValue() { if(!value.isOwningHandle()) { return null; //TODO(AR) migrate to JDK8 java.util.Optional#empty() } else { return value; } } /** * Generates a hash code for the Write Entry. NOTE: The hash code is based * on the string representation of the key, so it may not work correctly * with exotic custom comparators. * * @return The hash code for the Write Entry */ @Override public int hashCode() { return (key == null) ? 0 : key.hashCode(); } @Override public boolean equals(final Object other) { if(other == null) { return false; } else if (this == other) { return true; } else if(other instanceof WriteEntry) { final WriteEntry otherWriteEntry = (WriteEntry)other; return type.equals(otherWriteEntry.type) && key.equals(otherWriteEntry.key) && value.equals(otherWriteEntry.value); } else { return false; } } @Override public void close() { value.close(); key.close(); } } @Override void seekForPrevDirect0(long handle, ByteBuffer target, int targetOffset, int targetLen) { throw new IllegalAccessError("Not implemented"); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WalFileType.java000066400000000000000000000027671370372246700241040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public enum WalFileType { /** * Indicates that WAL file is in archive directory. WAL files are moved from * the main db directory to archive directory once they are not live and stay * there until cleaned up. Files are cleaned depending on archive size * (Options::WAL_size_limit_MB) and time since last cleaning * (Options::WAL_ttl_seconds). */ kArchivedLogFile((byte)0x0), /** * Indicates that WAL file is live and resides in the main db directory */ kAliveLogFile((byte)0x1); private final byte value; WalFileType(final byte value) { this.value = value; } /** * Get the internal representation value. * * @return the internal representation value */ byte getValue() { return value; } /** * Get the WalFileType from the internal representation value. * * @return the wal file type. * * @throws IllegalArgumentException if the value is unknown. */ static WalFileType fromValue(final byte value) { for (final WalFileType walFileType : WalFileType.values()) { if(walFileType.value == value) { return walFileType; } } throw new IllegalArgumentException( "Illegal value provided for WalFileType: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WalFilter.java000066400000000000000000000065311370372246700236010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Map; /** * WALFilter allows an application to inspect write-ahead-log (WAL) * records or modify their processing on recovery. */ public interface WalFilter { /** * Provide ColumnFamily->LogNumber map to filter * so that filter can determine whether a log number applies to a given * column family (i.e. that log hasn't been flushed to SST already for the * column family). * * We also pass in name>id map as only name is known during * recovery (as handles are opened post-recovery). * while write batch callbacks happen in terms of column family id. * * @param cfLognumber column_family_id to lognumber map * @param cfNameId column_family_name to column_family_id map */ void columnFamilyLogNumberMap(final Map cfLognumber, final Map cfNameId); /** * LogRecord is invoked for each log record encountered for all the logs * during replay on logs on recovery. This method can be used to: * * inspect the record (using the batch parameter) * * ignoring current record * (by returning WalProcessingOption::kIgnoreCurrentRecord) * * reporting corrupted record * (by returning WalProcessingOption::kCorruptedRecord) * * stop log replay * (by returning kStop replay) - please note that this implies * discarding the logs from current record onwards. * * @param logNumber log number of the current log. * Filter might use this to determine if the log * record is applicable to a certain column family. * @param logFileName log file name - only for informational purposes * @param batch batch encountered in the log during recovery * @param newBatch new batch to populate if filter wants to change * the batch (for example to filter some records out, or alter some * records). Please note that the new batch MUST NOT contain * more records than original, else recovery would be failed. * * @return Processing option for the current record. */ LogRecordFoundResult logRecordFound(final long logNumber, final String logFileName, final WriteBatch batch, final WriteBatch newBatch); class LogRecordFoundResult { public static LogRecordFoundResult CONTINUE_UNCHANGED = new LogRecordFoundResult(WalProcessingOption.CONTINUE_PROCESSING, false); final WalProcessingOption walProcessingOption; final boolean batchChanged; /** * @param walProcessingOption the processing option * @param batchChanged Whether batch was changed by the filter. * It must be set to true if newBatch was populated, * else newBatch has no effect. */ public LogRecordFoundResult(final WalProcessingOption walProcessingOption, final boolean batchChanged) { this.walProcessingOption = walProcessingOption; this.batchChanged = batchChanged; } } /** * Returns a name that identifies this WAL filter. * The name will be printed to LOG file on start up for diagnosis. * * @return the name */ String name(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WalProcessingOption.java000066400000000000000000000024771370372246700256660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public enum WalProcessingOption { /** * Continue processing as usual. */ CONTINUE_PROCESSING((byte)0x0), /** * Ignore the current record but continue processing of log(s). */ IGNORE_CURRENT_RECORD((byte)0x1), /** * Stop replay of logs and discard logs. * Logs won't be replayed on subsequent recovery. */ STOP_REPLAY((byte)0x2), /** * Corrupted record detected by filter. */ CORRUPTED_RECORD((byte)0x3); private final byte value; WalProcessingOption(final byte value) { this.value = value; } /** * Get the internal representation. * * @return the internal representation. */ byte getValue() { return value; } public static WalProcessingOption fromValue(final byte value) { for (final WalProcessingOption walProcessingOption : WalProcessingOption.values()) { if (walProcessingOption.value == value) { return walProcessingOption; } } throw new IllegalArgumentException( "Illegal value provided for WalProcessingOption: " + value); } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WriteBatch.java000066400000000000000000000312311370372246700237370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * WriteBatch holds a collection of updates to apply atomically to a DB. * * The updates are applied in the order in which they are added * to the WriteBatch. For example, the value of "key" will be "v3" * after the following batch is written: * * batch.put("key", "v1"); * batch.remove("key"); * batch.put("key", "v2"); * batch.put("key", "v3"); * * Multiple threads can invoke const methods on a WriteBatch without * external synchronization, but if any of the threads may call a * non-const method, all threads accessing the same WriteBatch must use * external synchronization. */ public class WriteBatch extends AbstractWriteBatch { /** * Constructs a WriteBatch instance. */ public WriteBatch() { this(0); } /** * Constructs a WriteBatch instance with a given size. * * @param reserved_bytes reserved size for WriteBatch */ public WriteBatch(final int reserved_bytes) { super(newWriteBatch(reserved_bytes)); } /** * Constructs a WriteBatch instance from a serialized representation * as returned by {@link #data()}. * * @param serialized the serialized representation. */ public WriteBatch(final byte[] serialized) { super(newWriteBatch(serialized, serialized.length)); } /** * Support for iterating over the contents of a batch. * * @param handler A handler that is called back for each * update present in the batch * * @throws RocksDBException If we cannot iterate over the batch */ public void iterate(final Handler handler) throws RocksDBException { iterate(nativeHandle_, handler.nativeHandle_); } /** * Retrieve the serialized version of this batch. * * @return the serialized representation of this write batch. * * @throws RocksDBException if an error occurs whilst retrieving * the serialized batch data. */ public byte[] data() throws RocksDBException { return data(nativeHandle_); } /** * Retrieve data size of the batch. * * @return the serialized data size of the batch. */ public long getDataSize() { return getDataSize(nativeHandle_); } /** * Returns true if Put will be called during Iterate. * * @return true if Put will be called during Iterate. */ public boolean hasPut() { return hasPut(nativeHandle_); } /** * Returns true if Delete will be called during Iterate. * * @return true if Delete will be called during Iterate. */ public boolean hasDelete() { return hasDelete(nativeHandle_); } /** * Returns true if SingleDelete will be called during Iterate. * * @return true if SingleDelete will be called during Iterate. */ public boolean hasSingleDelete() { return hasSingleDelete(nativeHandle_); } /** * Returns true if DeleteRange will be called during Iterate. * * @return true if DeleteRange will be called during Iterate. */ public boolean hasDeleteRange() { return hasDeleteRange(nativeHandle_); } /** * Returns true if Merge will be called during Iterate. * * @return true if Merge will be called during Iterate. */ public boolean hasMerge() { return hasMerge(nativeHandle_); } /** * Returns true if MarkBeginPrepare will be called during Iterate. * * @return true if MarkBeginPrepare will be called during Iterate. */ public boolean hasBeginPrepare() { return hasBeginPrepare(nativeHandle_); } /** * Returns true if MarkEndPrepare will be called during Iterate. * * @return true if MarkEndPrepare will be called during Iterate. */ public boolean hasEndPrepare() { return hasEndPrepare(nativeHandle_); } /** * Returns true if MarkCommit will be called during Iterate. * * @return true if MarkCommit will be called during Iterate. */ public boolean hasCommit() { return hasCommit(nativeHandle_); } /** * Returns true if MarkRollback will be called during Iterate. * * @return true if MarkRollback will be called during Iterate. */ public boolean hasRollback() { return hasRollback(nativeHandle_); } @Override public WriteBatch getWriteBatch() { return this; } /** * Marks this point in the WriteBatch as the last record to * be inserted into the WAL, provided the WAL is enabled. */ public void markWalTerminationPoint() { markWalTerminationPoint(nativeHandle_); } /** * Gets the WAL termination point. * * See {@link #markWalTerminationPoint()} * * @return the WAL termination point */ public SavePoint getWalTerminationPoint() { return getWalTerminationPoint(nativeHandle_); } @Override WriteBatch getWriteBatch(final long handle) { return this; } /** *

Private WriteBatch constructor which is used to construct * WriteBatch instances from C++ side. As the reference to this * object is also managed from C++ side the handle will be disowned.

* * @param nativeHandle address of native instance. */ WriteBatch(final long nativeHandle) { this(nativeHandle, false); } /** *

Private WriteBatch constructor which is used to construct * WriteBatch instances.

* * @param nativeHandle address of native instance. * @param owningNativeHandle whether to own this reference from the C++ side or not */ WriteBatch(final long nativeHandle, final boolean owningNativeHandle) { super(nativeHandle); if(!owningNativeHandle) disOwnNativeHandle(); } @Override protected final native void disposeInternal(final long handle); @Override final native int count0(final long handle); @Override final native void put(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen); @Override final native void put(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen, final long cfHandle); @Override final native void putDirect(final long handle, final ByteBuffer key, final int keyOffset, final int keyLength, final ByteBuffer value, final int valueOffset, final int valueLength, final long cfHandle); @Override final native void merge(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen); @Override final native void merge(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen, final long cfHandle); @Override final native void delete(final long handle, final byte[] key, final int keyLen) throws RocksDBException; @Override final native void delete(final long handle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; @Override final native void singleDelete(final long handle, final byte[] key, final int keyLen) throws RocksDBException; @Override final native void singleDelete(final long handle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; @Override final native void removeDirect(final long handle, final ByteBuffer key, final int keyOffset, final int keyLength, final long cfHandle) throws RocksDBException; @Override final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, final byte[] endKey, final int endKeyLen); @Override final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, final byte[] endKey, final int endKeyLen, final long cfHandle); @Override final native void putLogData(final long handle, final byte[] blob, final int blobLen) throws RocksDBException; @Override final native void clear0(final long handle); @Override final native void setSavePoint0(final long handle); @Override final native void rollbackToSavePoint0(final long handle); @Override final native void popSavePoint(final long handle) throws RocksDBException; @Override final native void setMaxBytes(final long nativeHandle, final long maxBytes); private native static long newWriteBatch(final int reserved_bytes); private native static long newWriteBatch(final byte[] serialized, final int serializedLength); private native void iterate(final long handle, final long handlerHandle) throws RocksDBException; private native byte[] data(final long nativeHandle) throws RocksDBException; private native long getDataSize(final long nativeHandle); private native boolean hasPut(final long nativeHandle); private native boolean hasDelete(final long nativeHandle); private native boolean hasSingleDelete(final long nativeHandle); private native boolean hasDeleteRange(final long nativeHandle); private native boolean hasMerge(final long nativeHandle); private native boolean hasBeginPrepare(final long nativeHandle); private native boolean hasEndPrepare(final long nativeHandle); private native boolean hasCommit(final long nativeHandle); private native boolean hasRollback(final long nativeHandle); private native void markWalTerminationPoint(final long nativeHandle); private native SavePoint getWalTerminationPoint(final long nativeHandle); /** * Handler callback for iterating over the contents of a batch. */ public static abstract class Handler extends RocksCallbackObject { public Handler() { super(null); } @Override protected long initializeNative(final long... nativeParameterHandles) { return createNewHandler0(); } public abstract void put(final int columnFamilyId, final byte[] key, final byte[] value) throws RocksDBException; public abstract void put(final byte[] key, final byte[] value); public abstract void merge(final int columnFamilyId, final byte[] key, final byte[] value) throws RocksDBException; public abstract void merge(final byte[] key, final byte[] value); public abstract void delete(final int columnFamilyId, final byte[] key) throws RocksDBException; public abstract void delete(final byte[] key); public abstract void singleDelete(final int columnFamilyId, final byte[] key) throws RocksDBException; public abstract void singleDelete(final byte[] key); public abstract void deleteRange(final int columnFamilyId, final byte[] beginKey, final byte[] endKey) throws RocksDBException; public abstract void deleteRange(final byte[] beginKey, final byte[] endKey); public abstract void logData(final byte[] blob); public abstract void putBlobIndex(final int columnFamilyId, final byte[] key, final byte[] value) throws RocksDBException; public abstract void markBeginPrepare() throws RocksDBException; public abstract void markEndPrepare(final byte[] xid) throws RocksDBException; public abstract void markNoop(final boolean emptyBatch) throws RocksDBException; public abstract void markRollback(final byte[] xid) throws RocksDBException; public abstract void markCommit(final byte[] xid) throws RocksDBException; /** * shouldContinue is called by the underlying iterator * {@link WriteBatch#iterate(Handler)}. If it returns false, * iteration is halted. Otherwise, it continues * iterating. The default implementation always * returns true. * * @return boolean value indicating if the * iteration is halted. */ public boolean shouldContinue() { return true; } private native long createNewHandler0(); } /** * A structure for describing the save point in the Write Batch. */ public static class SavePoint { private long size; private long count; private long contentFlags; public SavePoint(final long size, final long count, final long contentFlags) { this.size = size; this.count = count; this.contentFlags = contentFlags; } public void clear() { this.size = 0; this.count = 0; this.contentFlags = 0; } /** * Get the size of the serialized representation. * * @return the size of the serialized representation. */ public long getSize() { return size; } /** * Get the number of elements. * * @return the number of elements. */ public long getCount() { return count; } /** * Get the content flags. * * @return the content flags. */ public long getContentFlags() { return contentFlags; } public boolean isCleared() { return (size | count | contentFlags) == 0; } } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WriteBatchInterface.java000066400000000000000000000300201370372246700255530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** *

Defines the interface for a Write Batch which * holds a collection of updates to apply atomically to a DB.

*/ public interface WriteBatchInterface { /** * Returns the number of updates in the batch. * * @return number of items in WriteBatch */ int count(); /** *

Store the mapping "key->value" in the database.

* * @param key the specified key to be inserted. * @param value the value associated with the specified key. * @throws RocksDBException thrown if error happens in underlying native library. */ void put(byte[] key, byte[] value) throws RocksDBException; /** *

Store the mapping "key->value" within given column * family.

* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the specified key to be inserted. * @param value the value associated with the specified key. * @throws RocksDBException thrown if error happens in underlying native library. */ void put(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) throws RocksDBException; /** *

Store the mapping "key->value" within given column * family.

* * @param key the specified key to be inserted. It is using position and limit. * Supports direct buffer only. * @param value the value associated with the specified key. It is using position and limit. * Supports direct buffer only. * @throws RocksDBException */ void put(ByteBuffer key, ByteBuffer value) throws RocksDBException; /** *

Store the mapping "key->value" within given column * family.

* * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} * instance * @param key the specified key to be inserted. It is using position and limit. * Supports direct buffer only. * @param value the value associated with the specified key. It is using position and limit. * Supports direct buffer only. * @throws RocksDBException */ void put(ColumnFamilyHandle columnFamilyHandle, ByteBuffer key, ByteBuffer value) throws RocksDBException; /** *

Merge "value" with the existing value of "key" in the database. * "key->merge(existing, value)"

* * @param key the specified key to be merged. * @param value the value to be merged with the current value for * the specified key. * @throws RocksDBException thrown if error happens in underlying native library. */ void merge(byte[] key, byte[] value) throws RocksDBException; /** *

Merge "value" with the existing value of "key" in given column family. * "key->merge(existing, value)"

* * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key the specified key to be merged. * @param value the value to be merged with the current value for * the specified key. * @throws RocksDBException thrown if error happens in underlying native library. */ void merge(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) throws RocksDBException; /** *

If the database contains a mapping for "key", erase it. Else do nothing.

* * @param key Key to delete within database * * @deprecated Use {@link #delete(byte[])} * @throws RocksDBException thrown if error happens in underlying native library. */ @Deprecated void remove(byte[] key) throws RocksDBException; /** *

If column family contains a mapping for "key", erase it. Else do nothing.

* * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key Key to delete within database * * @deprecated Use {@link #delete(ColumnFamilyHandle, byte[])} * @throws RocksDBException thrown if error happens in underlying native library. */ @Deprecated void remove(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException; /** *

If the database contains a mapping for "key", erase it. Else do nothing.

* * @param key Key to delete within database * @throws RocksDBException thrown if error happens in underlying native library. */ void delete(byte[] key) throws RocksDBException; /** *

If column family contains a mapping for "key", erase it. Else do nothing.

* * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key Key to delete within database * @throws RocksDBException thrown if error happens in underlying native library. */ void delete(ColumnFamilyHandle columnFamilyHandle, byte[] key) throws RocksDBException; /** * Remove the database entry for {@code key}. Requires that the key exists * and was not overwritten. It is not an error if the key did not exist * in the database. * * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple * times), then the result of calling SingleDelete() on this key is undefined. * SingleDelete() only behaves correctly if there has been only one Put() * for this key since the previous call to SingleDelete() for this key. * * This feature is currently an experimental performance optimization * for a very specific workload. It is up to the caller to ensure that * SingleDelete is only used for a key that is not deleted using Delete() or * written using Merge(). Mixing SingleDelete operations with Deletes and * Merges can result in undefined behavior. * * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ @Experimental("Performance optimization for a very specific workload") void singleDelete(final byte[] key) throws RocksDBException; /** * Remove the database entry for {@code key}. Requires that the key exists * and was not overwritten. It is not an error if the key did not exist * in the database. * * If a key is overwritten (by calling {@link #put(byte[], byte[])} multiple * times), then the result of calling SingleDelete() on this key is undefined. * SingleDelete() only behaves correctly if there has been only one Put() * for this key since the previous call to SingleDelete() for this key. * * This feature is currently an experimental performance optimization * for a very specific workload. It is up to the caller to ensure that * SingleDelete is only used for a key that is not deleted using Delete() or * written using Merge(). Mixing SingleDelete operations with Deletes and * Merges can result in undefined behavior. * * @param columnFamilyHandle The column family to delete the key from * @param key Key to delete within database * * @throws RocksDBException thrown if error happens in underlying * native library. */ @Experimental("Performance optimization for a very specific workload") void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) throws RocksDBException; /** *

If column family contains a mapping for "key", erase it. Else do nothing.

* * @param key Key to delete within database. It is using position and limit. * Supports direct buffer only. * @throws RocksDBException */ void remove(ByteBuffer key) throws RocksDBException; /** *

If column family contains a mapping for "key", erase it. Else do nothing.

* * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param key Key to delete within database. It is using position and limit. * Supports direct buffer only. * @throws RocksDBException */ void remove(ColumnFamilyHandle columnFamilyHandle, ByteBuffer key) throws RocksDBException; /** * Removes the database entries in the range ["beginKey", "endKey"), i.e., * including "beginKey" and excluding "endKey". a non-OK status on error. It * is not an error if no keys exist in the range ["beginKey", "endKey"). * * Delete the database entry (if any) for "key". Returns OK on success, and a * non-OK status on error. It is not an error if "key" did not exist in the * database. * * @param beginKey * First key to delete within database (included) * @param endKey * Last key to delete within database (excluded) * @throws RocksDBException thrown if error happens in underlying native library. */ void deleteRange(byte[] beginKey, byte[] endKey) throws RocksDBException; /** * Removes the database entries in the range ["beginKey", "endKey"), i.e., * including "beginKey" and excluding "endKey". a non-OK status on error. It * is not an error if no keys exist in the range ["beginKey", "endKey"). * * Delete the database entry (if any) for "key". Returns OK on success, and a * non-OK status on error. It is not an error if "key" did not exist in the * database. * * @param columnFamilyHandle {@link ColumnFamilyHandle} instance * @param beginKey * First key to delete within database (included) * @param endKey * Last key to delete within database (excluded) * @throws RocksDBException thrown if error happens in underlying native library. */ void deleteRange(ColumnFamilyHandle columnFamilyHandle, byte[] beginKey, byte[] endKey) throws RocksDBException; /** * Append a blob of arbitrary size to the records in this batch. The blob will * be stored in the transaction log but not in any other file. In particular, * it will not be persisted to the SST files. When iterating over this * WriteBatch, WriteBatch::Handler::LogData will be called with the contents * of the blob as it is encountered. Blobs, puts, deletes, and merges will be * encountered in the same order in thich they were inserted. The blob will * NOT consume sequence number(s) and will NOT increase the count of the batch * * Example application: add timestamps to the transaction log for use in * replication. * * @param blob binary object to be inserted * @throws RocksDBException thrown if error happens in underlying native library. */ void putLogData(byte[] blob) throws RocksDBException; /** * Clear all updates buffered in this batch */ void clear(); /** * Records the state of the batch for future calls to RollbackToSavePoint(). * May be called multiple times to set multiple save points. */ void setSavePoint(); /** * Remove all entries in this batch (Put, Merge, Delete, PutLogData) since * the most recent call to SetSavePoint() and removes the most recent save * point. * * @throws RocksDBException if there is no previous call to SetSavePoint() */ void rollbackToSavePoint() throws RocksDBException; /** * Pop the most recent save point. * * That is to say that it removes the last save point, * which was set by {@link #setSavePoint()}. * * @throws RocksDBException If there is no previous call to * {@link #setSavePoint()}, an exception with * {@link Status.Code#NotFound} will be thrown. */ void popSavePoint() throws RocksDBException; /** * Set the maximum size of the write batch. * * @param maxBytes the maximum size in bytes. */ void setMaxBytes(long maxBytes); /** * Get the underlying Write Batch. * * @return the underlying WriteBatch. */ WriteBatch getWriteBatch(); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java000066400000000000000000000375171370372246700256000ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; /** * Similar to {@link org.rocksdb.WriteBatch} but with a binary searchable * index built for all the keys inserted. * * Calling put, merge, remove or putLogData calls the same function * as with {@link org.rocksdb.WriteBatch} whilst also building an index. * * A user can call {@link org.rocksdb.WriteBatchWithIndex#newIterator()} to * create an iterator over the write batch or * {@link org.rocksdb.WriteBatchWithIndex#newIteratorWithBase(org.rocksdb.RocksIterator)} * to get an iterator for the database with Read-Your-Own-Writes like capability */ public class WriteBatchWithIndex extends AbstractWriteBatch { /** * Creates a WriteBatchWithIndex where no bytes * are reserved up-front, bytewise comparison is * used for fallback key comparisons, * and duplicate keys operations are retained */ public WriteBatchWithIndex() { super(newWriteBatchWithIndex()); } /** * Creates a WriteBatchWithIndex where no bytes * are reserved up-front, bytewise comparison is * used for fallback key comparisons, and duplicate key * assignment is determined by the constructor argument * * @param overwriteKey if true, overwrite the key in the index when * inserting a duplicate key, in this way an iterator will never * show two entries with the same key. */ public WriteBatchWithIndex(final boolean overwriteKey) { super(newWriteBatchWithIndex(overwriteKey)); } /** * Creates a WriteBatchWithIndex * * @param fallbackIndexComparator We fallback to this comparator * to compare keys within a column family if we cannot determine * the column family and so look up it's comparator. * * @param reservedBytes reserved bytes in underlying WriteBatch * * @param overwriteKey if true, overwrite the key in the index when * inserting a duplicate key, in this way an iterator will never * show two entries with the same key. */ public WriteBatchWithIndex( final AbstractComparator fallbackIndexComparator, final int reservedBytes, final boolean overwriteKey) { super(newWriteBatchWithIndex(fallbackIndexComparator.nativeHandle_, fallbackIndexComparator.getComparatorType().getValue(), reservedBytes, overwriteKey)); } /** *

Private WriteBatchWithIndex constructor which is used to construct * WriteBatchWithIndex instances from C++ side. As the reference to this * object is also managed from C++ side the handle will be disowned.

* * @param nativeHandle address of native instance. */ WriteBatchWithIndex(final long nativeHandle) { super(nativeHandle); disOwnNativeHandle(); } /** * Create an iterator of a column family. User can call * {@link org.rocksdb.RocksIteratorInterface#seek(byte[])} to * search to the next entry of or after a key. Keys will be iterated in the * order given by index_comparator. For multiple updates on the same key, * each update will be returned as a separate entry, in the order of update * time. * * @param columnFamilyHandle The column family to iterate over * @return An iterator for the Write Batch contents, restricted to the column * family */ public WBWIRocksIterator newIterator( final ColumnFamilyHandle columnFamilyHandle) { return new WBWIRocksIterator(this, iterator1(nativeHandle_, columnFamilyHandle.nativeHandle_)); } /** * Create an iterator of the default column family. User can call * {@link org.rocksdb.RocksIteratorInterface#seek(byte[])} to * search to the next entry of or after a key. Keys will be iterated in the * order given by index_comparator. For multiple updates on the same key, * each update will be returned as a separate entry, in the order of update * time. * * @return An iterator for the Write Batch contents */ public WBWIRocksIterator newIterator() { return new WBWIRocksIterator(this, iterator0(nativeHandle_)); } /** * Provides Read-Your-Own-Writes like functionality by * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} * as a delta and baseIterator as a base * * Updating write batch with the current key of the iterator is not safe. * We strongly recommand users not to do it. It will invalidate the current * key() and value() of the iterator. This invalidation happens even before * the write batch update finishes. The state may recover after Next() is * called. * * @param columnFamilyHandle The column family to iterate over * @param baseIterator The base iterator, * e.g. {@link org.rocksdb.RocksDB#newIterator()} * @return An iterator which shows a view comprised of both the database * point-in-time from baseIterator and modifications made in this write batch. */ public RocksIterator newIteratorWithBase( final ColumnFamilyHandle columnFamilyHandle, final RocksIterator baseIterator) { return newIteratorWithBase(columnFamilyHandle, baseIterator, null); } /** * Provides Read-Your-Own-Writes like functionality by * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} * as a delta and baseIterator as a base * * Updating write batch with the current key of the iterator is not safe. * We strongly recommand users not to do it. It will invalidate the current * key() and value() of the iterator. This invalidation happens even before * the write batch update finishes. The state may recover after Next() is * called. * * @param columnFamilyHandle The column family to iterate over * @param baseIterator The base iterator, * e.g. {@link org.rocksdb.RocksDB#newIterator()} * @param readOptions the read options, or null * @return An iterator which shows a view comprised of both the database * point-in-time from baseIterator and modifications made in this write batch. */ public RocksIterator newIteratorWithBase(final ColumnFamilyHandle columnFamilyHandle, final RocksIterator baseIterator, /* @Nullable */ final ReadOptions readOptions) { final RocksIterator iterator = new RocksIterator(baseIterator.parent_, iteratorWithBase(nativeHandle_, columnFamilyHandle.nativeHandle_, baseIterator.nativeHandle_, readOptions == null ? 0 : readOptions.nativeHandle_)); // when the iterator is deleted it will also delete the baseIterator baseIterator.disOwnNativeHandle(); return iterator; } /** * Provides Read-Your-Own-Writes like functionality by * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} * as a delta and baseIterator as a base. Operates on the default column * family. * * @param baseIterator The base iterator, * e.g. {@link org.rocksdb.RocksDB#newIterator()} * @return An iterator which shows a view comprised of both the database * point-in-timefrom baseIterator and modifications made in this write batch. */ public RocksIterator newIteratorWithBase(final RocksIterator baseIterator) { return newIteratorWithBase(baseIterator.parent_.getDefaultColumnFamily(), baseIterator, null); } /** * Provides Read-Your-Own-Writes like functionality by * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} * as a delta and baseIterator as a base. Operates on the default column * family. * * @param baseIterator The base iterator, * e.g. {@link org.rocksdb.RocksDB#newIterator()} * @param readOptions the read options, or null * @return An iterator which shows a view comprised of both the database * point-in-timefrom baseIterator and modifications made in this write batch. */ public RocksIterator newIteratorWithBase(final RocksIterator baseIterator, /* @Nullable */ final ReadOptions readOptions) { return newIteratorWithBase( baseIterator.parent_.getDefaultColumnFamily(), baseIterator, readOptions); } /** * Similar to {@link RocksDB#get(ColumnFamilyHandle, byte[])} but will only * read the key from this batch. * * @param columnFamilyHandle The column family to retrieve the value from * @param options The database options to use * @param key The key to read the value for * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException if the batch does not have enough data to resolve * Merge operations, MergeInProgress status may be returned. */ public byte[] getFromBatch(final ColumnFamilyHandle columnFamilyHandle, final DBOptions options, final byte[] key) throws RocksDBException { return getFromBatch(nativeHandle_, options.nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } /** * Similar to {@link RocksDB#get(byte[])} but will only * read the key from this batch. * * @param options The database options to use * @param key The key to read the value for * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException if the batch does not have enough data to resolve * Merge operations, MergeInProgress status may be returned. */ public byte[] getFromBatch(final DBOptions options, final byte[] key) throws RocksDBException { return getFromBatch(nativeHandle_, options.nativeHandle_, key, key.length); } /** * Similar to {@link RocksDB#get(ColumnFamilyHandle, byte[])} but will also * read writes from this batch. * * This function will query both this batch and the DB and then merge * the results using the DB's merge operator (if the batch contains any * merge requests). * * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is * read from the DB but will NOT change which keys are read from the batch * (the keys in this batch do not yet belong to any snapshot and will be * fetched regardless). * * @param db The Rocks database * @param columnFamilyHandle The column family to retrieve the value from * @param options The read options to use * @param key The key to read the value for * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException if the value for the key cannot be read */ public byte[] getFromBatchAndDB(final RocksDB db, final ColumnFamilyHandle columnFamilyHandle, final ReadOptions options, final byte[] key) throws RocksDBException { return getFromBatchAndDB(nativeHandle_, db.nativeHandle_, options.nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } /** * Similar to {@link RocksDB#get(byte[])} but will also * read writes from this batch. * * This function will query both this batch and the DB and then merge * the results using the DB's merge operator (if the batch contains any * merge requests). * * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is * read from the DB but will NOT change which keys are read from the batch * (the keys in this batch do not yet belong to any snapshot and will be * fetched regardless). * * @param db The Rocks database * @param options The read options to use * @param key The key to read the value for * * @return a byte array storing the value associated with the input key if * any. null if it does not find the specified key. * * @throws RocksDBException if the value for the key cannot be read */ public byte[] getFromBatchAndDB(final RocksDB db, final ReadOptions options, final byte[] key) throws RocksDBException { return getFromBatchAndDB(nativeHandle_, db.nativeHandle_, options.nativeHandle_, key, key.length); } @Override protected final native void disposeInternal(final long handle); @Override final native int count0(final long handle); @Override final native void put(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen); @Override final native void put(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen, final long cfHandle); @Override final native void putDirect(final long handle, final ByteBuffer key, final int keyOffset, final int keyLength, final ByteBuffer value, final int valueOffset, final int valueLength, final long cfHandle); @Override final native void merge(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen); @Override final native void merge(final long handle, final byte[] key, final int keyLen, final byte[] value, final int valueLen, final long cfHandle); @Override final native void delete(final long handle, final byte[] key, final int keyLen) throws RocksDBException; @Override final native void delete(final long handle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; @Override final native void singleDelete(final long handle, final byte[] key, final int keyLen) throws RocksDBException; @Override final native void singleDelete(final long handle, final byte[] key, final int keyLen, final long cfHandle) throws RocksDBException; @Override final native void removeDirect(final long handle, final ByteBuffer key, final int keyOffset, final int keyLength, final long cfHandle) throws RocksDBException; // DO NOT USE - `WriteBatchWithIndex::deleteRange` is not yet supported @Override final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, final byte[] endKey, final int endKeyLen); // DO NOT USE - `WriteBatchWithIndex::deleteRange` is not yet supported @Override final native void deleteRange(final long handle, final byte[] beginKey, final int beginKeyLen, final byte[] endKey, final int endKeyLen, final long cfHandle); @Override final native void putLogData(final long handle, final byte[] blob, final int blobLen) throws RocksDBException; @Override final native void clear0(final long handle); @Override final native void setSavePoint0(final long handle); @Override final native void rollbackToSavePoint0(final long handle); @Override final native void popSavePoint(final long handle) throws RocksDBException; @Override final native void setMaxBytes(final long nativeHandle, final long maxBytes); @Override final native WriteBatch getWriteBatch(final long handle); private native static long newWriteBatchWithIndex(); private native static long newWriteBatchWithIndex(final boolean overwriteKey); private native static long newWriteBatchWithIndex( final long fallbackIndexComparatorHandle, final byte comparatorType, final int reservedBytes, final boolean overwriteKey); private native long iterator0(final long handle); private native long iterator1(final long handle, final long cfHandle); private native long iteratorWithBase(final long handle, final long baseIteratorHandle, final long cfHandle, final long readOptionsHandle); private native byte[] getFromBatch(final long handle, final long optHandle, final byte[] key, final int keyLen); private native byte[] getFromBatch(final long handle, final long optHandle, final byte[] key, final int keyLen, final long cfHandle); private native byte[] getFromBatchAndDB(final long handle, final long dbHandle, final long readOptHandle, final byte[] key, final int keyLen); private native byte[] getFromBatchAndDB(final long handle, final long dbHandle, final long readOptHandle, final byte[] key, final int keyLen, final long cfHandle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WriteBufferManager.java000066400000000000000000000023061370372246700254230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Java wrapper over native write_buffer_manager class */ public class WriteBufferManager extends RocksObject { static { RocksDB.loadLibrary(); } /** * Construct a new instance of WriteBufferManager. * * Check * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager * for more details on when to use it * * @param bufferSizeBytes buffer size(in bytes) to use for native write_buffer_manager * @param cache cache whose memory should be bounded by this write buffer manager */ public WriteBufferManager(final long bufferSizeBytes, final Cache cache){ super(newWriteBufferManager(bufferSizeBytes, cache.nativeHandle_)); } private native static long newWriteBufferManager(final long bufferSizeBytes, final long cacheHandle); @Override protected native void disposeInternal(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/WriteOptions.java000066400000000000000000000170571370372246700243630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Options that control write operations. * * Note that developers should call WriteOptions.dispose() to release the * c++ side memory before a WriteOptions instance runs out of scope. */ public class WriteOptions extends RocksObject { /** * Construct WriteOptions instance. */ public WriteOptions() { super(newWriteOptions()); } // TODO(AR) consider ownership WriteOptions(final long nativeHandle) { super(nativeHandle); disOwnNativeHandle(); } /** * Copy constructor for WriteOptions. * * NOTE: This does a shallow copy, which means comparator, merge_operator, compaction_filter, * compaction_filter_factory and other pointers will be cloned! * * @param other The ColumnFamilyOptions to copy. */ public WriteOptions(WriteOptions other) { super(copyWriteOptions(other.nativeHandle_)); } /** * If true, the write will be flushed from the operating system * buffer cache (by calling WritableFile::Sync()) before the write * is considered complete. If this flag is true, writes will be * slower. * * If this flag is false, and the machine crashes, some recent * writes may be lost. Note that if it is just the process that * crashes (i.e., the machine does not reboot), no writes will be * lost even if sync==false. * * In other words, a DB write with sync==false has similar * crash semantics as the "write()" system call. A DB write * with sync==true has similar crash semantics to a "write()" * system call followed by "fdatasync()". * * Default: false * * @param flag a boolean flag to indicate whether a write * should be synchronized. * @return the instance of the current WriteOptions. */ public WriteOptions setSync(final boolean flag) { setSync(nativeHandle_, flag); return this; } /** * If true, the write will be flushed from the operating system * buffer cache (by calling WritableFile::Sync()) before the write * is considered complete. If this flag is true, writes will be * slower. * * If this flag is false, and the machine crashes, some recent * writes may be lost. Note that if it is just the process that * crashes (i.e., the machine does not reboot), no writes will be * lost even if sync==false. * * In other words, a DB write with sync==false has similar * crash semantics as the "write()" system call. A DB write * with sync==true has similar crash semantics to a "write()" * system call followed by "fdatasync()". * * @return boolean value indicating if sync is active. */ public boolean sync() { return sync(nativeHandle_); } /** * If true, writes will not first go to the write ahead log, * and the write may got lost after a crash. The backup engine * relies on write-ahead logs to back up the memtable, so if * you disable write-ahead logs, you must create backups with * flush_before_backup=true to avoid losing unflushed memtable data. * * @param flag a boolean flag to specify whether to disable * write-ahead-log on writes. * @return the instance of the current WriteOptions. */ public WriteOptions setDisableWAL(final boolean flag) { setDisableWAL(nativeHandle_, flag); return this; } /** * If true, writes will not first go to the write ahead log, * and the write may got lost after a crash. The backup engine * relies on write-ahead logs to back up the memtable, so if * you disable write-ahead logs, you must create backups with * flush_before_backup=true to avoid losing unflushed memtable data. * * @return boolean value indicating if WAL is disabled. */ public boolean disableWAL() { return disableWAL(nativeHandle_); } /** * If true and if user is trying to write to column families that don't exist * (they were dropped), ignore the write (don't return an error). If there * are multiple writes in a WriteBatch, other writes will succeed. * * Default: false * * @param ignoreMissingColumnFamilies true to ignore writes to column families * which don't exist * @return the instance of the current WriteOptions. */ public WriteOptions setIgnoreMissingColumnFamilies( final boolean ignoreMissingColumnFamilies) { setIgnoreMissingColumnFamilies(nativeHandle_, ignoreMissingColumnFamilies); return this; } /** * If true and if user is trying to write to column families that don't exist * (they were dropped), ignore the write (don't return an error). If there * are multiple writes in a WriteBatch, other writes will succeed. * * Default: false * * @return true if writes to column families which don't exist are ignored */ public boolean ignoreMissingColumnFamilies() { return ignoreMissingColumnFamilies(nativeHandle_); } /** * If true and we need to wait or sleep for the write request, fails * immediately with {@link Status.Code#Incomplete}. * * @param noSlowdown true to fail write requests if we need to wait or sleep * @return the instance of the current WriteOptions. */ public WriteOptions setNoSlowdown(final boolean noSlowdown) { setNoSlowdown(nativeHandle_, noSlowdown); return this; } /** * If true and we need to wait or sleep for the write request, fails * immediately with {@link Status.Code#Incomplete}. * * @return true when write requests are failed if we need to wait or sleep */ public boolean noSlowdown() { return noSlowdown(nativeHandle_); } /** * If true, this write request is of lower priority if compaction is * behind. In this case that, {@link #noSlowdown()} == true, the request * will be cancelled immediately with {@link Status.Code#Incomplete} returned. * Otherwise, it will be slowed down. The slowdown value is determined by * RocksDB to guarantee it introduces minimum impacts to high priority writes. * * Default: false * * @param lowPri true if the write request should be of lower priority than * compactions which are behind. * * @return the instance of the current WriteOptions. */ public WriteOptions setLowPri(final boolean lowPri) { setLowPri(nativeHandle_, lowPri); return this; } /** * Returns true if this write request is of lower priority if compaction is * behind. * * See {@link #setLowPri(boolean)}. * * @return true if this write request is of lower priority, false otherwise. */ public boolean lowPri() { return lowPri(nativeHandle_); } private native static long newWriteOptions(); private native static long copyWriteOptions(long handle); @Override protected final native void disposeInternal(final long handle); private native void setSync(long handle, boolean flag); private native boolean sync(long handle); private native void setDisableWAL(long handle, boolean flag); private native boolean disableWAL(long handle); private native void setIgnoreMissingColumnFamilies(final long handle, final boolean ignoreMissingColumnFamilies); private native boolean ignoreMissingColumnFamilies(final long handle); private native void setNoSlowdown(final long handle, final boolean noSlowdown); private native boolean noSlowdown(final long handle); private native void setLowPri(final long handle, final boolean lowPri); private native boolean lowPri(final long handle); } rocksdb-6.11.4/java/src/main/java/org/rocksdb/util/000077500000000000000000000000001370372246700220155ustar00rootroot00000000000000rocksdb-6.11.4/java/src/main/java/org/rocksdb/util/ByteUtil.java000066400000000000000000000024651370372246700244300ustar00rootroot00000000000000package org.rocksdb.util; import java.nio.ByteBuffer; import static java.nio.charset.StandardCharsets.UTF_8; public class ByteUtil { /** * Convert a String to a UTF-8 byte array. * * @param str the string * * @return the byte array. */ public static byte[] bytes(final String str) { return str.getBytes(UTF_8); } /** * Compares the first {@code count} bytes of two areas of memory. Returns * zero if they are the same, a value less than zero if {@code x} is * lexically less than {@code y}, or a value greater than zero if {@code x} * is lexically greater than {@code y}. Note that lexical order is determined * as if comparing unsigned char arrays. * * Similar to memcmp.c. * * @param x the first value to compare with * @param y the second value to compare against * @param count the number of bytes to compare * * @return the result of the comparison */ public static int memcmp(final ByteBuffer x, final ByteBuffer y, final int count) { for (int idx = 0; idx < count; idx++) { final int aa = x.get(idx) & 0xff; final int bb = y.get(idx) & 0xff; if (aa != bb) { return aa - bb; } } return 0; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/util/BytewiseComparator.java000066400000000000000000000071551370372246700265130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.rocksdb.*; import java.nio.ByteBuffer; import static org.rocksdb.util.ByteUtil.memcmp; /** * This is a Java Native implementation of the C++ * equivalent BytewiseComparatorImpl using {@link Slice} * * The performance of Comparators implemented in Java is always * less than their C++ counterparts due to the bridging overhead, * as such you likely don't want to use this apart from benchmarking * and you most likely instead wanted * {@link org.rocksdb.BuiltinComparator#BYTEWISE_COMPARATOR} */ public final class BytewiseComparator extends AbstractComparator { public BytewiseComparator(final ComparatorOptions copt) { super(copt); } @Override public String name() { return "rocksdb.java.BytewiseComparator"; } @Override public int compare(final ByteBuffer a, final ByteBuffer b) { return _compare(a, b); } static int _compare(final ByteBuffer a, final ByteBuffer b) { assert(a != null && b != null); final int minLen = a.remaining() < b.remaining() ? a.remaining() : b.remaining(); int r = memcmp(a, b, minLen); if (r == 0) { if (a.remaining() < b.remaining()) { r = -1; } else if (a.remaining() > b.remaining()) { r = +1; } } return r; } @Override public void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { // Find length of common prefix final int minLength = Math.min(start.remaining(), limit.remaining()); int diffIndex = 0; while (diffIndex < minLength && start.get(diffIndex) == limit.get(diffIndex)) { diffIndex++; } if (diffIndex >= minLength) { // Do not shorten if one string is a prefix of the other } else { final int startByte = start.get(diffIndex) & 0xff; final int limitByte = limit.get(diffIndex) & 0xff; if (startByte >= limitByte) { // Cannot shorten since limit is smaller than start or start is // already the shortest possible. return; } assert(startByte < limitByte); if (diffIndex < limit.remaining() - 1 || startByte + 1 < limitByte) { start.put(diffIndex, (byte)((start.get(diffIndex) & 0xff) + 1)); start.limit(diffIndex + 1); } else { // v // A A 1 A A A // A A 2 // // Incrementing the current byte will make start bigger than limit, we // will skip this byte, and find the first non 0xFF byte in start and // increment it. diffIndex++; while (diffIndex < start.remaining()) { // Keep moving until we find the first non 0xFF byte to // increment it if ((start.get(diffIndex) & 0xff) < 0xff) { start.put(diffIndex, (byte)((start.get(diffIndex) & 0xff) + 1)); start.limit(diffIndex + 1); break; } diffIndex++; } } assert(compare(start.duplicate(), limit.duplicate()) < 0); } } @Override public void findShortSuccessor(final ByteBuffer key) { // Find first character that can be incremented final int n = key.remaining(); for (int i = 0; i < n; i++) { final int byt = key.get(i) & 0xff; if (byt != 0xff) { key.put(i, (byte)(byt + 1)); key.limit(i+1); return; } } // *key is a run of 0xffs. Leave it alone. } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/util/Environment.java000066400000000000000000000102771370372246700251730ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb.util; import java.io.File; import java.io.IOException; public class Environment { private static String OS = System.getProperty("os.name").toLowerCase(); private static String ARCH = System.getProperty("os.arch").toLowerCase(); private static boolean MUSL_LIBC; static { try { final Process p = new ProcessBuilder("/usr/bin/env", "sh", "-c", "ldd /usr/bin/env | grep -q musl").start(); MUSL_LIBC = p.waitFor() == 0; } catch (final IOException | InterruptedException e) { MUSL_LIBC = false; } } public static boolean isAarch64() { return ARCH.contains("aarch64"); } public static boolean isPowerPC() { return ARCH.contains("ppc"); } public static boolean isS390x() { return ARCH.contains("s390x"); } public static boolean isWindows() { return (OS.contains("win")); } public static boolean isFreeBSD() { return (OS.contains("freebsd")); } public static boolean isMac() { return (OS.contains("mac")); } public static boolean isAix() { return OS.contains("aix"); } public static boolean isUnix() { return OS.contains("nix") || OS.contains("nux"); } public static boolean isMuslLibc() { return MUSL_LIBC; } public static boolean isSolaris() { return OS.contains("sunos"); } public static boolean isOpenBSD() { return (OS.contains("openbsd")); } public static boolean is64Bit() { if (ARCH.indexOf("sparcv9") >= 0) { return true; } return (ARCH.indexOf("64") > 0); } public static String getSharedLibraryName(final String name) { return name + "jni"; } public static String getSharedLibraryFileName(final String name) { return appendLibOsSuffix("lib" + getSharedLibraryName(name), true); } /** * Get the name of the libc implementation * * @return the name of the implementation, * or null if the default for that platform (e.g. glibc on Linux). */ public static /* @Nullable */ String getLibcName() { if (isMuslLibc()) { return "musl"; } else { return null; } } private static String getLibcPostfix() { final String libcName = getLibcName(); if (libcName == null) { return ""; } return "-" + libcName; } public static String getJniLibraryName(final String name) { if (isUnix()) { final String arch = is64Bit() ? "64" : "32"; if (isPowerPC() || isAarch64()) { return String.format("%sjni-linux-%s%s", name, ARCH, getLibcPostfix()); } else if (isS390x()) { return String.format("%sjni-linux%s", name, ARCH); } else { return String.format("%sjni-linux%s%s", name, arch, getLibcPostfix()); } } else if (isMac()) { return String.format("%sjni-osx", name); } else if (isFreeBSD()) { return String.format("%sjni-freebsd%s", name, is64Bit() ? "64" : "32"); } else if (isAix() && is64Bit()) { return String.format("%sjni-aix64", name); } else if (isSolaris()) { final String arch = is64Bit() ? "64" : "32"; return String.format("%sjni-solaris%s", name, arch); } else if (isWindows() && is64Bit()) { return String.format("%sjni-win64", name); } else if (isOpenBSD()) { return String.format("%sjni-openbsd%s", name, is64Bit() ? "64" : "32"); } throw new UnsupportedOperationException(String.format("Cannot determine JNI library name for ARCH='%s' OS='%s' name='%s'", ARCH, OS, name)); } public static String getJniLibraryFileName(final String name) { return appendLibOsSuffix("lib" + getJniLibraryName(name), false); } private static String appendLibOsSuffix(final String libraryFileName, final boolean shared) { if (isUnix() || isAix() || isSolaris() || isFreeBSD() || isOpenBSD()) { return libraryFileName + ".so"; } else if (isMac()) { return libraryFileName + (shared ? ".dylib" : ".jnilib"); } else if (isWindows()) { return libraryFileName + ".dll"; } throw new UnsupportedOperationException(); } public static String getJniLibraryExtension() { if (isWindows()) { return ".dll"; } return (isMac()) ? ".jnilib" : ".so"; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/util/IntComparator.java000066400000000000000000000035731370372246700254520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.rocksdb.AbstractComparator; import org.rocksdb.ComparatorOptions; import java.nio.ByteBuffer; /** * This is a Java implementation of a Comparator for Java int * keys. * * This comparator assumes keys are (at least) four bytes, so * the caller must guarantee that in accessing other APIs in * combination with this comparator. * * The performance of Comparators implemented in Java is always * less than their C++ counterparts due to the bridging overhead, * as such you likely don't want to use this apart from benchmarking * or testing. */ public final class IntComparator extends AbstractComparator { public IntComparator(final ComparatorOptions copt) { super(copt); } @Override public String name() { return "rocksdb.java.IntComparator"; } @Override public int compare(final ByteBuffer a, final ByteBuffer b) { return compareIntKeys(a, b); } /** * Compares integer keys * so that they are in ascending order * * @param a 4-bytes representing an integer key * @param b 4-bytes representing an integer key * * @return negative if a < b, 0 if a == b, positive otherwise */ private final int compareIntKeys(final ByteBuffer a, final ByteBuffer b) { final int iA = a.getInt(); final int iB = b.getInt(); // protect against int key calculation overflow final long diff = (long)iA - iB; final int result; if (diff < Integer.MIN_VALUE) { result = Integer.MIN_VALUE; } else if(diff > Integer.MAX_VALUE) { result = Integer.MAX_VALUE; } else { result = (int)diff; } return result; } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java000066400000000000000000000052241370372246700300420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.rocksdb.AbstractComparator; import org.rocksdb.BuiltinComparator; import org.rocksdb.ComparatorOptions; import org.rocksdb.Slice; import java.nio.ByteBuffer; /** * This is a Java Native implementation of the C++ * equivalent ReverseBytewiseComparatorImpl using {@link Slice} * * The performance of Comparators implemented in Java is always * less than their C++ counterparts due to the bridging overhead, * as such you likely don't want to use this apart from benchmarking * and you most likely instead wanted * {@link BuiltinComparator#REVERSE_BYTEWISE_COMPARATOR} */ public final class ReverseBytewiseComparator extends AbstractComparator { public ReverseBytewiseComparator(final ComparatorOptions copt) { super(copt); } @Override public String name() { return "rocksdb.java.ReverseBytewiseComparator"; } @Override public int compare(final ByteBuffer a, final ByteBuffer b) { return -BytewiseComparator._compare(a, b); } @Override public void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { // Find length of common prefix final int minLength = Math.min(start.remaining(), limit.remaining()); int diffIndex = 0; while (diffIndex < minLength && start.get(diffIndex) == limit.get(diffIndex)) { diffIndex++; } assert(diffIndex <= minLength); if (diffIndex == minLength) { // Do not shorten if one string is a prefix of the other // // We could handle cases like: // V // A A 2 X Y // A A 2 // in a similar way as BytewiseComparator::FindShortestSeparator(). // We keep it simple by not implementing it. We can come back to it // later when needed. } else { final int startByte = start.get(diffIndex) & 0xff; final int limitByte = limit.get(diffIndex) & 0xff; if (startByte > limitByte && diffIndex < start.remaining() - 1) { // Case like // V // A A 3 A A // A A 1 B B // // or // v // A A 2 A A // A A 1 B B // In this case "AA2" will be good. //#ifndef NDEBUG // std::string old_start = *start; //#endif start.limit(diffIndex + 1); //#ifndef NDEBUG // assert(old_start >= *start); //#endif assert(BytewiseComparator._compare(start.duplicate(), limit.duplicate()) > 0); } } } } rocksdb-6.11.4/java/src/main/java/org/rocksdb/util/SizeUnit.java000066400000000000000000000010371370372246700244330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; public class SizeUnit { public static final long KB = 1024L; public static final long MB = KB * KB; public static final long GB = KB * MB; public static final long TB = KB * GB; public static final long PB = KB * TB; private SizeUnit() {} } rocksdb-6.11.4/java/src/test/000077500000000000000000000000001370372246700157345ustar00rootroot00000000000000rocksdb-6.11.4/java/src/test/java/000077500000000000000000000000001370372246700166555ustar00rootroot00000000000000rocksdb-6.11.4/java/src/test/java/org/000077500000000000000000000000001370372246700174445ustar00rootroot00000000000000rocksdb-6.11.4/java/src/test/java/org/rocksdb/000077500000000000000000000000001370372246700210735ustar00rootroot00000000000000rocksdb-6.11.4/java/src/test/java/org/rocksdb/AbstractTransactionTest.java000066400000000000000000000740261370372246700265600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; /** * Base class of {@link TransactionTest} and {@link OptimisticTransactionTest} */ public abstract class AbstractTransactionTest { protected final static byte[] TXN_TEST_COLUMN_FAMILY = "txn_test_cf" .getBytes(); protected static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); public abstract DBContainer startDb() throws RocksDBException; @Test public void setSnapshot() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.setSnapshot(); } } @Test public void setSnapshotOnNextOperation() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.setSnapshotOnNextOperation(); txn.put("key1".getBytes(), "value1".getBytes()); } } @Test public void setSnapshotOnNextOperation_transactionNotifier() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { try(final TestTransactionNotifier notifier = new TestTransactionNotifier()) { txn.setSnapshotOnNextOperation(notifier); txn.put("key1".getBytes(), "value1".getBytes()); txn.setSnapshotOnNextOperation(notifier); txn.put("key2".getBytes(), "value2".getBytes()); assertThat(notifier.getCreatedSnapshots().size()).isEqualTo(2); } } } @Test public void getSnapshot() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.setSnapshot(); final Snapshot snapshot = txn.getSnapshot(); assertThat(snapshot.isOwningHandle()).isFalse(); } } @Test public void getSnapshot_null() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { final Snapshot snapshot = txn.getSnapshot(); assertThat(snapshot).isNull(); } } @Test public void clearSnapshot() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.setSnapshot(); txn.clearSnapshot(); } } @Test public void clearSnapshot_none() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.clearSnapshot(); } } @Test public void commit() throws RocksDBException { final byte k1[] = "rollback-key1".getBytes(UTF_8); final byte v1[] = "rollback-value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); txn.commit(); } try(final ReadOptions readOptions = new ReadOptions(); final Transaction txn2 = dbContainer.beginTransaction()) { assertThat(txn2.get(readOptions, k1)).isEqualTo(v1); } } } @Test public void rollback() throws RocksDBException { final byte k1[] = "rollback-key1".getBytes(UTF_8); final byte v1[] = "rollback-value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); txn.rollback(); } try(final ReadOptions readOptions = new ReadOptions(); final Transaction txn2 = dbContainer.beginTransaction()) { assertThat(txn2.get(readOptions, k1)).isNull(); } } } @Test public void savePoint() throws RocksDBException { final byte k1[] = "savePoint-key1".getBytes(UTF_8); final byte v1[] = "savePoint-value1".getBytes(UTF_8); final byte k2[] = "savePoint-key2".getBytes(UTF_8); final byte v2[] = "savePoint-value2".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); txn.setSavePoint(); txn.put(k2, v2); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); assertThat(txn.get(readOptions, k2)).isEqualTo(v2); txn.rollbackToSavePoint(); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); assertThat(txn.get(readOptions, k2)).isNull(); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { assertThat(txn2.get(readOptions, k1)).isEqualTo(v1); assertThat(txn2.get(readOptions, k2)).isNull(); } } } @Test public void getPut_cf() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); assertThat(txn.get(testCf, readOptions, k1)).isNull(); txn.put(testCf, k1, v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); } } @Test public void getPut() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.get(readOptions, k1)).isNull(); txn.put(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); } } @Test public void multiGetPut_cf() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); final List cfList = Arrays.asList(testCf, testCf); assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(new byte[][] { null, null }); txn.put(testCf, keys[0], values[0]); txn.put(testCf, keys[1], values[1]); assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); } } @Test public void multiGetPut() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.multiGet(readOptions, keys)).isEqualTo(new byte[][] { null, null }); txn.put(keys[0], values[0]); txn.put(keys[1], values[1]); assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); } } @Test public void getForUpdate_cf() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isNull(); txn.put(testCf, k1, v1); assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); } } @Test public void getForUpdate() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getForUpdate(readOptions, k1, true)).isNull(); txn.put(k1, v1); assertThat(txn.getForUpdate(readOptions, k1, true)).isEqualTo(v1); } } @Test public void multiGetForUpdate_cf() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); final List cfList = Arrays.asList(testCf, testCf); assertThat(txn.multiGetForUpdate(readOptions, cfList, keys)) .isEqualTo(new byte[][] { null, null }); txn.put(testCf, keys[0], values[0]); txn.put(testCf, keys[1], values[1]); assertThat(txn.multiGetForUpdate(readOptions, cfList, keys)) .isEqualTo(values); } } @Test public void multiGetForUpdate() throws RocksDBException { final byte keys[][] = new byte[][]{ "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][]{ "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; try (final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.multiGetForUpdate(readOptions, keys)).isEqualTo(new byte[][]{null, null}); txn.put(keys[0], values[0]); txn.put(keys[1], values[1]); assertThat(txn.multiGetForUpdate(readOptions, keys)).isEqualTo(values); } } @Test public void getIterator() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); txn.put(k1, v1); try(final RocksIterator iterator = txn.getIterator(readOptions)) { iterator.seek(k1); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo(k1); assertThat(iterator.value()).isEqualTo(v1); } } } @Test public void getIterator_cf() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); txn.put(testCf, k1, v1); try(final RocksIterator iterator = txn.getIterator(readOptions, testCf)) { iterator.seek(k1); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo(k1); assertThat(iterator.value()).isEqualTo(v1); } } } @Test public void merge_cf() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.merge(testCf, k1, v1); } } @Test public void merge() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.merge(k1, v1); } } @Test public void delete_cf() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, k1, v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); txn.delete(testCf, k1); assertThat(txn.get(testCf, readOptions, k1)).isNull(); } } @Test public void delete() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); txn.delete(k1); assertThat(txn.get(readOptions, k1)).isNull(); } } @Test public void delete_parts_cf() throws RocksDBException { final byte keyParts[][] = new byte[][] { "ke".getBytes(UTF_8), "y1".getBytes(UTF_8)}; final byte valueParts[][] = new byte[][] { "val".getBytes(UTF_8), "ue1".getBytes(UTF_8)}; final byte[] key = concat(keyParts); final byte[] value = concat(valueParts); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, keyParts, valueParts); assertThat(txn.get(testCf, readOptions, key)).isEqualTo(value); txn.delete(testCf, keyParts); assertThat(txn.get(testCf, readOptions, key)) .isNull(); } } @Test public void delete_parts() throws RocksDBException { final byte keyParts[][] = new byte[][] { "ke".getBytes(UTF_8), "y1".getBytes(UTF_8)}; final byte valueParts[][] = new byte[][] { "val".getBytes(UTF_8), "ue1".getBytes(UTF_8)}; final byte[] key = concat(keyParts); final byte[] value = concat(valueParts); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { txn.put(keyParts, valueParts); assertThat(txn.get(readOptions, key)).isEqualTo(value); txn.delete(keyParts); assertThat(txn.get(readOptions, key)).isNull(); } } @Test public void getPutUntracked_cf() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); assertThat(txn.get(testCf, readOptions, k1)).isNull(); txn.putUntracked(testCf, k1, v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); } } @Test public void getPutUntracked() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.get(readOptions, k1)).isNull(); txn.putUntracked(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); } } @Test public void multiGetPutUntracked_cf() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); final List cfList = Arrays.asList(testCf, testCf); assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(new byte[][] { null, null }); txn.putUntracked(testCf, keys[0], values[0]); txn.putUntracked(testCf, keys[1], values[1]); assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); } } @Test public void multiGetPutUntracked() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.multiGet(readOptions, keys)).isEqualTo(new byte[][] { null, null }); txn.putUntracked(keys[0], values[0]); txn.putUntracked(keys[1], values[1]); assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); } } @Test public void mergeUntracked_cf() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.mergeUntracked(testCf, k1, v1); } } @Test public void mergeUntracked() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.mergeUntracked(k1, v1); } } @Test public void deleteUntracked_cf() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, k1, v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); txn.deleteUntracked(testCf, k1); assertThat(txn.get(testCf, readOptions, k1)).isNull(); } } @Test public void deleteUntracked() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); txn.deleteUntracked(k1); assertThat(txn.get(readOptions, k1)).isNull(); } } @Test public void deleteUntracked_parts_cf() throws RocksDBException { final byte keyParts[][] = new byte[][] { "ke".getBytes(UTF_8), "y1".getBytes(UTF_8)}; final byte valueParts[][] = new byte[][] { "val".getBytes(UTF_8), "ue1".getBytes(UTF_8)}; final byte[] key = concat(keyParts); final byte[] value = concat(valueParts); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, keyParts, valueParts); assertThat(txn.get(testCf, readOptions, key)).isEqualTo(value); txn.deleteUntracked(testCf, keyParts); assertThat(txn.get(testCf, readOptions, key)).isNull(); } } @Test public void deleteUntracked_parts() throws RocksDBException { final byte keyParts[][] = new byte[][] { "ke".getBytes(UTF_8), "y1".getBytes(UTF_8)}; final byte valueParts[][] = new byte[][] { "val".getBytes(UTF_8), "ue1".getBytes(UTF_8)}; final byte[] key = concat(keyParts); final byte[] value = concat(valueParts); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { txn.put(keyParts, valueParts); assertThat(txn.get(readOptions, key)).isEqualTo(value); txn.deleteUntracked(keyParts); assertThat(txn.get(readOptions, key)).isNull(); } } @Test public void putLogData() throws RocksDBException { final byte[] blob = "blobby".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.putLogData(blob); } } @Test public void enabledDisableIndexing() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.disableIndexing(); txn.enableIndexing(); txn.disableIndexing(); txn.enableIndexing(); } } @Test public void numKeys() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte k2[] = "key2".getBytes(UTF_8); final byte v2[] = "value2".getBytes(UTF_8); final byte k3[] = "key3".getBytes(UTF_8); final byte v3[] = "value3".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(k1, v1); txn.put(testCf, k2, v2); txn.merge(k3, v3); txn.delete(testCf, k2); assertThat(txn.getNumKeys()).isEqualTo(3); assertThat(txn.getNumPuts()).isEqualTo(2); assertThat(txn.getNumMerges()).isEqualTo(1); assertThat(txn.getNumDeletes()).isEqualTo(1); } } @Test public void elapsedTime() throws RocksDBException, InterruptedException { final long preStartTxnTime = System.currentTimeMillis(); try (final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { Thread.sleep(2); final long txnElapsedTime = txn.getElapsedTime(); assertThat(txnElapsedTime).isLessThan(System.currentTimeMillis() - preStartTxnTime); assertThat(txnElapsedTime).isGreaterThan(0); } } @Test public void getWriteBatch() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); final WriteBatchWithIndex writeBatch = txn.getWriteBatch(); assertThat(writeBatch).isNotNull(); assertThat(writeBatch.isOwningHandle()).isFalse(); assertThat(writeBatch.count()).isEqualTo(1); } } @Test public void setLockTimeout() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.setLockTimeout(1000); } } @Test public void writeOptions() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final WriteOptions writeOptions = new WriteOptions() .setDisableWAL(true) .setSync(true); final Transaction txn = dbContainer.beginTransaction(writeOptions)) { txn.put(k1, v1); WriteOptions txnWriteOptions = txn.getWriteOptions(); assertThat(txnWriteOptions).isNotNull(); assertThat(txnWriteOptions.isOwningHandle()).isFalse(); assertThat(txnWriteOptions).isNotSameAs(writeOptions); assertThat(txnWriteOptions.disableWAL()).isTrue(); assertThat(txnWriteOptions.sync()).isTrue(); txn.setWriteOptions(txnWriteOptions.setSync(false)); txnWriteOptions = txn.getWriteOptions(); assertThat(txnWriteOptions).isNotNull(); assertThat(txnWriteOptions.isOwningHandle()).isFalse(); assertThat(txnWriteOptions).isNotSameAs(writeOptions); assertThat(txnWriteOptions.disableWAL()).isTrue(); assertThat(txnWriteOptions.sync()).isFalse(); } } @Test public void undoGetForUpdate_cf() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isNull(); txn.put(testCf, k1, v1); assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); txn.undoGetForUpdate(testCf, k1); } } @Test public void undoGetForUpdate() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getForUpdate(readOptions, k1, true)).isNull(); txn.put(k1, v1); assertThat(txn.getForUpdate(readOptions, k1, true)).isEqualTo(v1); txn.undoGetForUpdate(k1); } } @Test public void rebuildFromWriteBatch() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte k2[] = "key2".getBytes(UTF_8); final byte v2[] = "value2".getBytes(UTF_8); final byte k3[] = "key3".getBytes(UTF_8); final byte v3[] = "value3".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); assertThat(txn.getNumKeys()).isEqualTo(1); try(final WriteBatch writeBatch = new WriteBatch()) { writeBatch.put(k2, v2); writeBatch.put(k3, v3); txn.rebuildFromWriteBatch(writeBatch); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); assertThat(txn.get(readOptions, k2)).isEqualTo(v2); assertThat(txn.get(readOptions, k3)).isEqualTo(v3); assertThat(txn.getNumKeys()).isEqualTo(3); } } } @Test public void getCommitTimeWriteBatch() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); final WriteBatch writeBatch = txn.getCommitTimeWriteBatch(); assertThat(writeBatch).isNotNull(); assertThat(writeBatch.isOwningHandle()).isFalse(); assertThat(writeBatch.count()).isEqualTo(0); } } @Test public void logNumber() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getLogNumber()).isEqualTo(0); final long logNumber = rand.nextLong(); txn.setLogNumber(logNumber); assertThat(txn.getLogNumber()).isEqualTo(logNumber); } } private static byte[] concat(final byte[][] bufs) { int resultLength = 0; for(final byte[] buf : bufs) { resultLength += buf.length; } final byte[] result = new byte[resultLength]; int resultOffset = 0; for(final byte[] buf : bufs) { final int srcLength = buf.length; System.arraycopy(buf, 0, result, resultOffset, srcLength); resultOffset += srcLength; } return result; } private static class TestTransactionNotifier extends AbstractTransactionNotifier { private final List createdSnapshots = new ArrayList<>(); @Override public void snapshotCreated(final Snapshot newSnapshot) { createdSnapshots.add(newSnapshot); } public List getCreatedSnapshots() { return createdSnapshots; } } protected static abstract class DBContainer implements AutoCloseable { protected final WriteOptions writeOptions; protected final List columnFamilyHandles; protected final ColumnFamilyOptions columnFamilyOptions; protected final DBOptions options; public DBContainer(final WriteOptions writeOptions, final List columnFamilyHandles, final ColumnFamilyOptions columnFamilyOptions, final DBOptions options) { this.writeOptions = writeOptions; this.columnFamilyHandles = columnFamilyHandles; this.columnFamilyOptions = columnFamilyOptions; this.options = options; } public abstract Transaction beginTransaction(); public abstract Transaction beginTransaction( final WriteOptions writeOptions); public ColumnFamilyHandle getTestColumnFamily() { return columnFamilyHandles.get(1); } @Override public abstract void close(); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/BackupEngineTest.java000066400000000000000000000226721370372246700251420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.List; import java.util.concurrent.ThreadLocalRandom; import static org.assertj.core.api.Assertions.assertThat; public class BackupEngineTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Rule public TemporaryFolder backupFolder = new TemporaryFolder(); @Test public void backupDb() throws RocksDBException { // Open empty database. try(final Options opt = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // Fill database with some test values prepareDatabase(db); // Create two backups try(final BackupableDBOptions bopt = new BackupableDBOptions( backupFolder.getRoot().getAbsolutePath()); final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { be.createNewBackup(db, false); be.createNewBackup(db, true); verifyNumberOfValidBackups(be, 2); } } } @Test public void deleteBackup() throws RocksDBException { // Open empty database. try(final Options opt = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // Fill database with some test values prepareDatabase(db); // Create two backups try(final BackupableDBOptions bopt = new BackupableDBOptions( backupFolder.getRoot().getAbsolutePath()); final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { be.createNewBackup(db, false); be.createNewBackup(db, true); final List backupInfo = verifyNumberOfValidBackups(be, 2); // Delete the first backup be.deleteBackup(backupInfo.get(0).backupId()); final List newBackupInfo = verifyNumberOfValidBackups(be, 1); // The second backup must remain. assertThat(newBackupInfo.get(0).backupId()). isEqualTo(backupInfo.get(1).backupId()); } } } @Test public void purgeOldBackups() throws RocksDBException { // Open empty database. try(final Options opt = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // Fill database with some test values prepareDatabase(db); // Create four backups try(final BackupableDBOptions bopt = new BackupableDBOptions( backupFolder.getRoot().getAbsolutePath()); final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { be.createNewBackup(db, false); be.createNewBackup(db, true); be.createNewBackup(db, true); be.createNewBackup(db, true); final List backupInfo = verifyNumberOfValidBackups(be, 4); // Delete everything except the latest backup be.purgeOldBackups(1); final List newBackupInfo = verifyNumberOfValidBackups(be, 1); // The latest backup must remain. assertThat(newBackupInfo.get(0).backupId()). isEqualTo(backupInfo.get(3).backupId()); } } } @Test public void restoreLatestBackup() throws RocksDBException { try(final Options opt = new Options().setCreateIfMissing(true)) { // Open empty database. RocksDB db = null; try { db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath()); // Fill database with some test values prepareDatabase(db); try (final BackupableDBOptions bopt = new BackupableDBOptions( backupFolder.getRoot().getAbsolutePath()); final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { be.createNewBackup(db, true); verifyNumberOfValidBackups(be, 1); db.put("key1".getBytes(), "valueV2".getBytes()); db.put("key2".getBytes(), "valueV2".getBytes()); be.createNewBackup(db, true); verifyNumberOfValidBackups(be, 2); db.put("key1".getBytes(), "valueV3".getBytes()); db.put("key2".getBytes(), "valueV3".getBytes()); assertThat(new String(db.get("key1".getBytes()))).endsWith("V3"); assertThat(new String(db.get("key2".getBytes()))).endsWith("V3"); db.close(); db = null; verifyNumberOfValidBackups(be, 2); // restore db from latest backup try(final RestoreOptions ropts = new RestoreOptions(false)) { be.restoreDbFromLatestBackup(dbFolder.getRoot().getAbsolutePath(), dbFolder.getRoot().getAbsolutePath(), ropts); } // Open database again. db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath()); // Values must have suffix V2 because of restoring latest backup. assertThat(new String(db.get("key1".getBytes()))).endsWith("V2"); assertThat(new String(db.get("key2".getBytes()))).endsWith("V2"); } } finally { if(db != null) { db.close(); } } } } @Test public void restoreFromBackup() throws RocksDBException { try(final Options opt = new Options().setCreateIfMissing(true)) { RocksDB db = null; try { // Open empty database. db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath()); // Fill database with some test values prepareDatabase(db); try (final BackupableDBOptions bopt = new BackupableDBOptions( backupFolder.getRoot().getAbsolutePath()); final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { be.createNewBackup(db, true); verifyNumberOfValidBackups(be, 1); db.put("key1".getBytes(), "valueV2".getBytes()); db.put("key2".getBytes(), "valueV2".getBytes()); be.createNewBackup(db, true); verifyNumberOfValidBackups(be, 2); db.put("key1".getBytes(), "valueV3".getBytes()); db.put("key2".getBytes(), "valueV3".getBytes()); assertThat(new String(db.get("key1".getBytes()))).endsWith("V3"); assertThat(new String(db.get("key2".getBytes()))).endsWith("V3"); //close the database db.close(); db = null; //restore the backup final List backupInfo = verifyNumberOfValidBackups(be, 2); // restore db from first backup be.restoreDbFromBackup(backupInfo.get(0).backupId(), dbFolder.getRoot().getAbsolutePath(), dbFolder.getRoot().getAbsolutePath(), new RestoreOptions(false)); // Open database again. db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath()); // Values must have suffix V2 because of restoring latest backup. assertThat(new String(db.get("key1".getBytes()))).endsWith("V1"); assertThat(new String(db.get("key2".getBytes()))).endsWith("V1"); } } finally { if(db != null) { db.close(); } } } } @Test public void backupDbWithMetadata() throws RocksDBException { // Open empty database. try (final Options opt = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // Fill database with some test values prepareDatabase(db); // Create two backups try (final BackupableDBOptions bopt = new BackupableDBOptions(backupFolder.getRoot().getAbsolutePath()); final BackupEngine be = BackupEngine.open(opt.getEnv(), bopt)) { final String metadata = String.valueOf(ThreadLocalRandom.current().nextInt()); be.createNewBackupWithMetadata(db, metadata, true); final List backupInfoList = verifyNumberOfValidBackups(be, 1); assertThat(backupInfoList.get(0).appMetadata()).isEqualTo(metadata); } } } /** * Verify backups. * * @param be {@link BackupEngine} instance. * @param expectedNumberOfBackups numerical value * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ private List verifyNumberOfValidBackups(final BackupEngine be, final int expectedNumberOfBackups) throws RocksDBException { // Verify that backups exist assertThat(be.getCorruptedBackups().length). isEqualTo(0); be.garbageCollect(); final List backupInfo = be.getBackupInfo(); assertThat(backupInfo.size()). isEqualTo(expectedNumberOfBackups); return backupInfo; } /** * Fill database with some test values. * * @param db {@link RocksDB} instance. * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ private void prepareDatabase(final RocksDB db) throws RocksDBException { db.put("key1".getBytes(), "valueV1".getBytes()); db.put("key2".getBytes(), "valueV1".getBytes()); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java000066400000000000000000000243131370372246700264140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; import java.util.Random; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; public class BackupableDBOptionsTest { private final static String ARBITRARY_PATH = System.getProperty("java.io.tmpdir"); @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public ExpectedException exception = ExpectedException.none(); public static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void backupDir() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { assertThat(backupableDBOptions.backupDir()). isEqualTo(ARBITRARY_PATH); } } @Test public void env() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { assertThat(backupableDBOptions.backupEnv()). isNull(); try(final Env env = new RocksMemEnv(Env.getDefault())) { backupableDBOptions.setBackupEnv(env); assertThat(backupableDBOptions.backupEnv()) .isEqualTo(env); } } } @Test public void shareTableFiles() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { final boolean value = rand.nextBoolean(); backupableDBOptions.setShareTableFiles(value); assertThat(backupableDBOptions.shareTableFiles()). isEqualTo(value); } } @Test public void infoLog() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { assertThat(backupableDBOptions.infoLog()). isNull(); try(final Options options = new Options(); final Logger logger = new Logger(options){ @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { } }) { backupableDBOptions.setInfoLog(logger); assertThat(backupableDBOptions.infoLog()) .isEqualTo(logger); } } } @Test public void sync() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { final boolean value = rand.nextBoolean(); backupableDBOptions.setSync(value); assertThat(backupableDBOptions.sync()).isEqualTo(value); } } @Test public void destroyOldData() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH);) { final boolean value = rand.nextBoolean(); backupableDBOptions.setDestroyOldData(value); assertThat(backupableDBOptions.destroyOldData()). isEqualTo(value); } } @Test public void backupLogFiles() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { final boolean value = rand.nextBoolean(); backupableDBOptions.setBackupLogFiles(value); assertThat(backupableDBOptions.backupLogFiles()). isEqualTo(value); } } @Test public void backupRateLimit() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { final long value = Math.abs(rand.nextLong()); backupableDBOptions.setBackupRateLimit(value); assertThat(backupableDBOptions.backupRateLimit()). isEqualTo(value); // negative will be mapped to 0 backupableDBOptions.setBackupRateLimit(-1); assertThat(backupableDBOptions.backupRateLimit()). isEqualTo(0); } } @Test public void backupRateLimiter() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { assertThat(backupableDBOptions.backupEnv()). isNull(); try(final RateLimiter backupRateLimiter = new RateLimiter(999)) { backupableDBOptions.setBackupRateLimiter(backupRateLimiter); assertThat(backupableDBOptions.backupRateLimiter()) .isEqualTo(backupRateLimiter); } } } @Test public void restoreRateLimit() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { final long value = Math.abs(rand.nextLong()); backupableDBOptions.setRestoreRateLimit(value); assertThat(backupableDBOptions.restoreRateLimit()). isEqualTo(value); // negative will be mapped to 0 backupableDBOptions.setRestoreRateLimit(-1); assertThat(backupableDBOptions.restoreRateLimit()). isEqualTo(0); } } @Test public void restoreRateLimiter() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { assertThat(backupableDBOptions.backupEnv()). isNull(); try(final RateLimiter restoreRateLimiter = new RateLimiter(911)) { backupableDBOptions.setRestoreRateLimiter(restoreRateLimiter); assertThat(backupableDBOptions.restoreRateLimiter()) .isEqualTo(restoreRateLimiter); } } } @Test public void shareFilesWithChecksum() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { boolean value = rand.nextBoolean(); backupableDBOptions.setShareFilesWithChecksum(value); assertThat(backupableDBOptions.shareFilesWithChecksum()). isEqualTo(value); } } @Test public void maxBackgroundOperations() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { final int value = rand.nextInt(); backupableDBOptions.setMaxBackgroundOperations(value); assertThat(backupableDBOptions.maxBackgroundOperations()). isEqualTo(value); } } @Test public void callbackTriggerIntervalSize() { try (final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH)) { final long value = rand.nextLong(); backupableDBOptions.setCallbackTriggerIntervalSize(value); assertThat(backupableDBOptions.callbackTriggerIntervalSize()). isEqualTo(value); } } @Test public void failBackupDirIsNull() { exception.expect(IllegalArgumentException.class); try (final BackupableDBOptions opts = new BackupableDBOptions(null)) { //no-op } } @Test public void failBackupDirIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.backupDir(); } } @Test public void failSetShareTableFilesIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.setShareTableFiles(true); } } @Test public void failShareTableFilesIfDisposed() { try (BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.shareTableFiles(); } } @Test public void failSetSyncIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.setSync(true); } } @Test public void failSyncIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.sync(); } } @Test public void failSetDestroyOldDataIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.setDestroyOldData(true); } } @Test public void failDestroyOldDataIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.destroyOldData(); } } @Test public void failSetBackupLogFilesIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.setBackupLogFiles(true); } } @Test public void failBackupLogFilesIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.backupLogFiles(); } } @Test public void failSetBackupRateLimitIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.setBackupRateLimit(1); } } @Test public void failBackupRateLimitIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.backupRateLimit(); } } @Test public void failSetRestoreRateLimitIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.setRestoreRateLimit(1); } } @Test public void failRestoreRateLimitIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.restoreRateLimit(); } } @Test public void failSetShareFilesWithChecksumIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.setShareFilesWithChecksum(true); } } @Test public void failShareFilesWithChecksumIfDisposed() { try (final BackupableDBOptions options = setupUninitializedBackupableDBOptions(exception)) { options.shareFilesWithChecksum(); } } private BackupableDBOptions setupUninitializedBackupableDBOptions( ExpectedException exception) { final BackupableDBOptions backupableDBOptions = new BackupableDBOptions(ARBITRARY_PATH); backupableDBOptions.close(); exception.expect(AssertionError.class); return backupableDBOptions; } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java000066400000000000000000000342261370372246700266740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.nio.charset.StandardCharsets; import static org.assertj.core.api.Assertions.assertThat; public class BlockBasedTableConfigTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void cacheIndexAndFilterBlocks() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setCacheIndexAndFilterBlocks(true); assertThat(blockBasedTableConfig.cacheIndexAndFilterBlocks()). isTrue(); } @Test public void cacheIndexAndFilterBlocksWithHighPriority() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setCacheIndexAndFilterBlocksWithHighPriority(true); assertThat(blockBasedTableConfig.cacheIndexAndFilterBlocksWithHighPriority()). isTrue(); } @Test public void pinL0FilterAndIndexBlocksInCache() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setPinL0FilterAndIndexBlocksInCache(true); assertThat(blockBasedTableConfig.pinL0FilterAndIndexBlocksInCache()). isTrue(); } @Test public void pinTopLevelIndexAndFilter() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setPinTopLevelIndexAndFilter(false); assertThat(blockBasedTableConfig.pinTopLevelIndexAndFilter()). isFalse(); } @Test public void indexType() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); assertThat(IndexType.values().length).isEqualTo(3); blockBasedTableConfig.setIndexType(IndexType.kHashSearch); assertThat(blockBasedTableConfig.indexType().equals( IndexType.kHashSearch)); assertThat(IndexType.valueOf("kBinarySearch")).isNotNull(); blockBasedTableConfig.setIndexType(IndexType.valueOf("kBinarySearch")); assertThat(blockBasedTableConfig.indexType().equals( IndexType.kBinarySearch)); } @Test public void dataBlockIndexType() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinaryAndHash); assertThat(blockBasedTableConfig.dataBlockIndexType().equals( DataBlockIndexType.kDataBlockBinaryAndHash)); blockBasedTableConfig.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinarySearch); assertThat(blockBasedTableConfig.dataBlockIndexType().equals( DataBlockIndexType.kDataBlockBinarySearch)); } @Test public void checksumType() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); assertThat(ChecksumType.values().length).isEqualTo(3); assertThat(ChecksumType.valueOf("kxxHash")). isEqualTo(ChecksumType.kxxHash); blockBasedTableConfig.setChecksumType(ChecksumType.kNoChecksum); blockBasedTableConfig.setChecksumType(ChecksumType.kxxHash); assertThat(blockBasedTableConfig.checksumType().equals( ChecksumType.kxxHash)); } @Test public void noBlockCache() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setNoBlockCache(true); assertThat(blockBasedTableConfig.noBlockCache()).isTrue(); } @Test public void blockCache() { try ( final Cache cache = new LRUCache(17 * 1024 * 1024); final Options options = new Options().setTableFormatConfig( new BlockBasedTableConfig().setBlockCache(cache))) { assertThat(options.tableFactoryName()).isEqualTo("BlockBasedTable"); } } @Test public void blockCacheIntegration() throws RocksDBException { try (final Cache cache = new LRUCache(8 * 1024 * 1024); final Statistics statistics = new Statistics()) { for (int shard = 0; shard < 8; shard++) { try (final Options options = new Options() .setCreateIfMissing(true) .setStatistics(statistics) .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache)); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath() + "/" + shard)) { final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); db.put(key, value); db.flush(new FlushOptions()); db.get(key); assertThat(statistics.getTickerCount(TickerType.BLOCK_CACHE_ADD)).isEqualTo(shard + 1); } } } } @Test public void persistentCache() throws RocksDBException { try (final DBOptions dbOptions = new DBOptions(). setInfoLogLevel(InfoLogLevel.INFO_LEVEL). setCreateIfMissing(true); final Logger logger = new Logger(dbOptions) { @Override protected void log(final InfoLogLevel infoLogLevel, final String logMsg) { System.out.println(infoLogLevel.name() + ": " + logMsg); } }) { try (final PersistentCache persistentCache = new PersistentCache(Env.getDefault(), dbFolder.getRoot().getPath(), 1024 * 1024 * 100, logger, false); final Options options = new Options().setTableFormatConfig( new BlockBasedTableConfig().setPersistentCache(persistentCache))) { assertThat(options.tableFactoryName()).isEqualTo("BlockBasedTable"); } } } @Test public void blockCacheCompressed() { try (final Cache cache = new LRUCache(17 * 1024 * 1024); final Options options = new Options().setTableFormatConfig( new BlockBasedTableConfig().setBlockCacheCompressed(cache))) { assertThat(options.tableFactoryName()).isEqualTo("BlockBasedTable"); } } @Ignore("See issue: https://github.com/facebook/rocksdb/issues/4822") @Test public void blockCacheCompressedIntegration() throws RocksDBException { final byte[] key1 = "some-key1".getBytes(StandardCharsets.UTF_8); final byte[] key2 = "some-key1".getBytes(StandardCharsets.UTF_8); final byte[] key3 = "some-key1".getBytes(StandardCharsets.UTF_8); final byte[] key4 = "some-key1".getBytes(StandardCharsets.UTF_8); final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); try (final Cache compressedCache = new LRUCache(8 * 1024 * 1024); final Statistics statistics = new Statistics()) { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig() .setNoBlockCache(true) .setBlockCache(null) .setBlockCacheCompressed(compressedCache) .setFormatVersion(4); try (final Options options = new Options() .setCreateIfMissing(true) .setStatistics(statistics) .setTableFormatConfig(blockBasedTableConfig)) { for (int shard = 0; shard < 8; shard++) { try (final FlushOptions flushOptions = new FlushOptions(); final WriteOptions writeOptions = new WriteOptions(); final ReadOptions readOptions = new ReadOptions(); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath() + "/" + shard)) { db.put(writeOptions, key1, value); db.put(writeOptions, key2, value); db.put(writeOptions, key3, value); db.put(writeOptions, key4, value); db.flush(flushOptions); db.get(readOptions, key1); db.get(readOptions, key2); db.get(readOptions, key3); db.get(readOptions, key4); assertThat(statistics.getTickerCount(TickerType.BLOCK_CACHE_COMPRESSED_ADD)).isEqualTo(shard + 1); } } } } } @Test public void blockSize() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setBlockSize(10); assertThat(blockBasedTableConfig.blockSize()).isEqualTo(10); } @Test public void blockSizeDeviation() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setBlockSizeDeviation(12); assertThat(blockBasedTableConfig.blockSizeDeviation()). isEqualTo(12); } @Test public void blockRestartInterval() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setBlockRestartInterval(15); assertThat(blockBasedTableConfig.blockRestartInterval()). isEqualTo(15); } @Test public void indexBlockRestartInterval() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setIndexBlockRestartInterval(15); assertThat(blockBasedTableConfig.indexBlockRestartInterval()). isEqualTo(15); } @Test public void metadataBlockSize() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setMetadataBlockSize(1024); assertThat(blockBasedTableConfig.metadataBlockSize()). isEqualTo(1024); } @Test public void partitionFilters() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setPartitionFilters(true); assertThat(blockBasedTableConfig.partitionFilters()). isTrue(); } @Test public void useDeltaEncoding() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setUseDeltaEncoding(false); assertThat(blockBasedTableConfig.useDeltaEncoding()). isFalse(); } @Test public void blockBasedTableWithFilterPolicy() { try(final Options options = new Options() .setTableFormatConfig(new BlockBasedTableConfig() .setFilterPolicy(new BloomFilter(10)))) { assertThat(options.tableFactoryName()). isEqualTo("BlockBasedTable"); } } @Test public void blockBasedTableWithoutFilterPolicy() { try(final Options options = new Options().setTableFormatConfig( new BlockBasedTableConfig().setFilterPolicy(null))) { assertThat(options.tableFactoryName()). isEqualTo("BlockBasedTable"); } } @Test public void wholeKeyFiltering() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setWholeKeyFiltering(false); assertThat(blockBasedTableConfig.wholeKeyFiltering()). isFalse(); } @Test public void verifyCompression() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setVerifyCompression(true); assertThat(blockBasedTableConfig.verifyCompression()). isTrue(); } @Test public void readAmpBytesPerBit() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setReadAmpBytesPerBit(2); assertThat(blockBasedTableConfig.readAmpBytesPerBit()). isEqualTo(2); } @Test public void formatVersion() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); for (int version = 0; version < 5; version++) { blockBasedTableConfig.setFormatVersion(version); assertThat(blockBasedTableConfig.formatVersion()).isEqualTo(version); } } @Test(expected = AssertionError.class) public void formatVersionFailNegative() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setFormatVersion(-1); } @Test(expected = AssertionError.class) public void formatVersionFailIllegalVersion() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setFormatVersion(99); } @Test public void enableIndexCompression() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setEnableIndexCompression(false); assertThat(blockBasedTableConfig.enableIndexCompression()). isFalse(); } @Test public void blockAlign() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setBlockAlign(true); assertThat(blockBasedTableConfig.blockAlign()). isTrue(); } @Deprecated @Test public void hashIndexAllowCollision() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setHashIndexAllowCollision(false); assertThat(blockBasedTableConfig.hashIndexAllowCollision()). isTrue(); // NOTE: setHashIndexAllowCollision should do nothing! } @Deprecated @Test public void blockCacheSize() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setBlockCacheSize(8 * 1024); assertThat(blockBasedTableConfig.blockCacheSize()). isEqualTo(8 * 1024); } @Deprecated @Test public void blockCacheNumShardBits() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setCacheNumShardBits(5); assertThat(blockBasedTableConfig.cacheNumShardBits()). isEqualTo(5); } @Deprecated @Test public void blockCacheCompressedSize() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setBlockCacheCompressedSize(40); assertThat(blockBasedTableConfig.blockCacheCompressedSize()). isEqualTo(40); } @Deprecated @Test public void blockCacheCompressedNumShardBits() { final BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); blockBasedTableConfig.setBlockCacheCompressedNumShardBits(4); assertThat(blockBasedTableConfig.blockCacheCompressedNumShardBits()). isEqualTo(4); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/BuiltinComparatorTest.java000066400000000000000000000124731370372246700262430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.assertj.core.api.Assertions.assertThat; public class BuiltinComparatorTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void builtinForwardComparator() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setComparator(BuiltinComparator.BYTEWISE_COMPARATOR); final RocksDB rocksDb = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()) ) { rocksDb.put("abc1".getBytes(), "abc1".getBytes()); rocksDb.put("abc2".getBytes(), "abc2".getBytes()); rocksDb.put("abc3".getBytes(), "abc3".getBytes()); try(final RocksIterator rocksIterator = rocksDb.newIterator()) { // Iterate over keys using a iterator rocksIterator.seekToFirst(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc1".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc1".getBytes()); rocksIterator.next(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc2".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc2".getBytes()); rocksIterator.next(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc3".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc3".getBytes()); rocksIterator.next(); assertThat(rocksIterator.isValid()).isFalse(); // Get last one rocksIterator.seekToLast(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc3".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc3".getBytes()); // Seek for abc rocksIterator.seek("abc".getBytes()); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc1".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc1".getBytes()); } } } @Test public void builtinReverseComparator() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setComparator(BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR); final RocksDB rocksDb = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()) ) { rocksDb.put("abc1".getBytes(), "abc1".getBytes()); rocksDb.put("abc2".getBytes(), "abc2".getBytes()); rocksDb.put("abc3".getBytes(), "abc3".getBytes()); try (final RocksIterator rocksIterator = rocksDb.newIterator()) { // Iterate over keys using a iterator rocksIterator.seekToFirst(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc3".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc3".getBytes()); rocksIterator.next(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc2".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc2".getBytes()); rocksIterator.next(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc1".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc1".getBytes()); rocksIterator.next(); assertThat(rocksIterator.isValid()).isFalse(); // Get last one rocksIterator.seekToLast(); assertThat(rocksIterator.isValid()).isTrue(); assertThat(rocksIterator.key()).isEqualTo( "abc1".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc1".getBytes()); // Will be invalid because abc is after abc1 rocksIterator.seek("abc".getBytes()); assertThat(rocksIterator.isValid()).isFalse(); // Will be abc3 because the next one after abc999 // is abc3 rocksIterator.seek("abc999".getBytes()); assertThat(rocksIterator.key()).isEqualTo( "abc3".getBytes()); assertThat(rocksIterator.value()).isEqualTo( "abc3".getBytes()); } } } @Test public void builtinComparatorEnum(){ assertThat(BuiltinComparator.BYTEWISE_COMPARATOR.ordinal()) .isEqualTo(0); assertThat( BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR.ordinal()) .isEqualTo(1); assertThat(BuiltinComparator.values().length).isEqualTo(2); assertThat(BuiltinComparator.valueOf("BYTEWISE_COMPARATOR")). isEqualTo(BuiltinComparator.BYTEWISE_COMPARATOR); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CheckPointTest.java000066400000000000000000000051131370372246700246250ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.assertj.core.api.Assertions.assertThat; public class CheckPointTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Rule public TemporaryFolder checkpointFolder = new TemporaryFolder(); @Test public void checkPoint() throws RocksDBException { try (final Options options = new Options(). setCreateIfMissing(true)) { try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key".getBytes(), "value".getBytes()); try (final Checkpoint checkpoint = Checkpoint.create(db)) { checkpoint.createCheckpoint(checkpointFolder. getRoot().getAbsolutePath() + "/snapshot1"); db.put("key2".getBytes(), "value2".getBytes()); checkpoint.createCheckpoint(checkpointFolder. getRoot().getAbsolutePath() + "/snapshot2"); } } try (final RocksDB db = RocksDB.open(options, checkpointFolder.getRoot().getAbsolutePath() + "/snapshot1")) { assertThat(new String(db.get("key".getBytes()))). isEqualTo("value"); assertThat(db.get("key2".getBytes())).isNull(); } try (final RocksDB db = RocksDB.open(options, checkpointFolder.getRoot().getAbsolutePath() + "/snapshot2")) { assertThat(new String(db.get("key".getBytes()))). isEqualTo("value"); assertThat(new String(db.get("key2".getBytes()))). isEqualTo("value2"); } } } @Test(expected = IllegalArgumentException.class) public void failIfDbIsNull() { try (final Checkpoint checkpoint = Checkpoint.create(null)) { } } @Test(expected = IllegalStateException.class) public void failIfDbNotInitialized() throws RocksDBException { try (final RocksDB db = RocksDB.open( dbFolder.getRoot().getAbsolutePath())) { db.close(); Checkpoint.create(db); } } @Test(expected = RocksDBException.class) public void failWithIllegalPath() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final Checkpoint checkpoint = Checkpoint.create(db)) { checkpoint.createCheckpoint("/Z:///:\\C:\\TZ/-"); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/ClockCacheTest.java000066400000000000000000000012271370372246700245570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; public class ClockCacheTest { static { RocksDB.loadLibrary(); } @Test public void newClockCache() { final long capacity = 1000; final int numShardBits = 16; final boolean strictCapacityLimit = true; try(final Cache clockCache = new ClockCache(capacity, numShardBits, strictCapacityLimit)) { //no op } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java000066400000000000000000000534551370372246700265650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; import java.util.ArrayList; import java.util.List; import java.util.Properties; import java.util.Random; import static org.assertj.core.api.Assertions.assertThat; public class ColumnFamilyOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); public static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void copyConstructor() { ColumnFamilyOptions origOpts = new ColumnFamilyOptions(); origOpts.setNumLevels(rand.nextInt(8)); origOpts.setTargetFileSizeMultiplier(rand.nextInt(100)); origOpts.setLevel0StopWritesTrigger(rand.nextInt(50)); ColumnFamilyOptions copyOpts = new ColumnFamilyOptions(origOpts); assertThat(origOpts.numLevels()).isEqualTo(copyOpts.numLevels()); assertThat(origOpts.targetFileSizeMultiplier()).isEqualTo(copyOpts.targetFileSizeMultiplier()); assertThat(origOpts.level0StopWritesTrigger()).isEqualTo(copyOpts.level0StopWritesTrigger()); } @Test public void getColumnFamilyOptionsFromProps() { Properties properties = new Properties(); properties.put("write_buffer_size", "112"); properties.put("max_write_buffer_number", "13"); try (final ColumnFamilyOptions opt = ColumnFamilyOptions. getColumnFamilyOptionsFromProps(properties)) { // setup sample properties assertThat(opt).isNotNull(); assertThat(String.valueOf(opt.writeBufferSize())). isEqualTo(properties.get("write_buffer_size")); assertThat(String.valueOf(opt.maxWriteBufferNumber())). isEqualTo(properties.get("max_write_buffer_number")); } } @Test public void getColumnFamilyOptionsFromPropsWithIgnoreIllegalValue() { // setup sample properties final Properties properties = new Properties(); properties.put("tomato", "1024"); properties.put("burger", "2"); properties.put("write_buffer_size", "112"); properties.put("max_write_buffer_number", "13"); try (final ConfigOptions cfgOpts = new ConfigOptions().setIgnoreUnknownOptions(true); final ColumnFamilyOptions opt = ColumnFamilyOptions.getColumnFamilyOptionsFromProps(cfgOpts, properties)) { // setup sample properties assertThat(opt).isNotNull(); assertThat(String.valueOf(opt.writeBufferSize())) .isEqualTo(properties.get("write_buffer_size")); assertThat(String.valueOf(opt.maxWriteBufferNumber())) .isEqualTo(properties.get("max_write_buffer_number")); } } @Test public void failColumnFamilyOptionsFromPropsWithIllegalValue() { // setup sample properties final Properties properties = new Properties(); properties.put("tomato", "1024"); properties.put("burger", "2"); try (final ColumnFamilyOptions opt = ColumnFamilyOptions.getColumnFamilyOptionsFromProps(properties)) { assertThat(opt).isNull(); } } @Test(expected = IllegalArgumentException.class) public void failColumnFamilyOptionsFromPropsWithNullValue() { try (final ColumnFamilyOptions opt = ColumnFamilyOptions.getColumnFamilyOptionsFromProps(null)) { } } @Test(expected = IllegalArgumentException.class) public void failColumnFamilyOptionsFromPropsWithEmptyProps() { try (final ColumnFamilyOptions opt = ColumnFamilyOptions.getColumnFamilyOptionsFromProps( new Properties())) { } } @Test public void writeBufferSize() throws RocksDBException { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setWriteBufferSize(longValue); assertThat(opt.writeBufferSize()).isEqualTo(longValue); } } @Test public void maxWriteBufferNumber() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setMaxWriteBufferNumber(intValue); assertThat(opt.maxWriteBufferNumber()).isEqualTo(intValue); } } @Test public void minWriteBufferNumberToMerge() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setMinWriteBufferNumberToMerge(intValue); assertThat(opt.minWriteBufferNumberToMerge()).isEqualTo(intValue); } } @Test public void numLevels() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setNumLevels(intValue); assertThat(opt.numLevels()).isEqualTo(intValue); } } @Test public void levelZeroFileNumCompactionTrigger() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setLevelZeroFileNumCompactionTrigger(intValue); assertThat(opt.levelZeroFileNumCompactionTrigger()).isEqualTo(intValue); } } @Test public void levelZeroSlowdownWritesTrigger() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setLevelZeroSlowdownWritesTrigger(intValue); assertThat(opt.levelZeroSlowdownWritesTrigger()).isEqualTo(intValue); } } @Test public void levelZeroStopWritesTrigger() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setLevelZeroStopWritesTrigger(intValue); assertThat(opt.levelZeroStopWritesTrigger()).isEqualTo(intValue); } } @Test public void targetFileSizeBase() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setTargetFileSizeBase(longValue); assertThat(opt.targetFileSizeBase()).isEqualTo(longValue); } } @Test public void targetFileSizeMultiplier() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setTargetFileSizeMultiplier(intValue); assertThat(opt.targetFileSizeMultiplier()).isEqualTo(intValue); } } @Test public void maxBytesForLevelBase() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setMaxBytesForLevelBase(longValue); assertThat(opt.maxBytesForLevelBase()).isEqualTo(longValue); } } @Test public void levelCompactionDynamicLevelBytes() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setLevelCompactionDynamicLevelBytes(boolValue); assertThat(opt.levelCompactionDynamicLevelBytes()) .isEqualTo(boolValue); } } @Test public void maxBytesForLevelMultiplier() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final double doubleValue = rand.nextDouble(); opt.setMaxBytesForLevelMultiplier(doubleValue); assertThat(opt.maxBytesForLevelMultiplier()).isEqualTo(doubleValue); } } @Test public void maxBytesForLevelMultiplierAdditional() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue1 = rand.nextInt(); final int intValue2 = rand.nextInt(); final int[] ints = new int[]{intValue1, intValue2}; opt.setMaxBytesForLevelMultiplierAdditional(ints); assertThat(opt.maxBytesForLevelMultiplierAdditional()).isEqualTo(ints); } } @Test public void maxCompactionBytes() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setMaxCompactionBytes(longValue); assertThat(opt.maxCompactionBytes()).isEqualTo(longValue); } } @Test public void softPendingCompactionBytesLimit() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setSoftPendingCompactionBytesLimit(longValue); assertThat(opt.softPendingCompactionBytesLimit()).isEqualTo(longValue); } } @Test public void hardPendingCompactionBytesLimit() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setHardPendingCompactionBytesLimit(longValue); assertThat(opt.hardPendingCompactionBytesLimit()).isEqualTo(longValue); } } @Test public void level0FileNumCompactionTrigger() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setLevel0FileNumCompactionTrigger(intValue); assertThat(opt.level0FileNumCompactionTrigger()).isEqualTo(intValue); } } @Test public void level0SlowdownWritesTrigger() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setLevel0SlowdownWritesTrigger(intValue); assertThat(opt.level0SlowdownWritesTrigger()).isEqualTo(intValue); } } @Test public void level0StopWritesTrigger() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setLevel0StopWritesTrigger(intValue); assertThat(opt.level0StopWritesTrigger()).isEqualTo(intValue); } } @Test public void arenaBlockSize() throws RocksDBException { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setArenaBlockSize(longValue); assertThat(opt.arenaBlockSize()).isEqualTo(longValue); } } @Test public void disableAutoCompactions() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setDisableAutoCompactions(boolValue); assertThat(opt.disableAutoCompactions()).isEqualTo(boolValue); } } @Test public void maxSequentialSkipInIterations() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setMaxSequentialSkipInIterations(longValue); assertThat(opt.maxSequentialSkipInIterations()).isEqualTo(longValue); } } @Test public void inplaceUpdateSupport() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setInplaceUpdateSupport(boolValue); assertThat(opt.inplaceUpdateSupport()).isEqualTo(boolValue); } } @Test public void inplaceUpdateNumLocks() throws RocksDBException { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setInplaceUpdateNumLocks(longValue); assertThat(opt.inplaceUpdateNumLocks()).isEqualTo(longValue); } } @Test public void memtablePrefixBloomSizeRatio() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final double doubleValue = rand.nextDouble(); opt.setMemtablePrefixBloomSizeRatio(doubleValue); assertThat(opt.memtablePrefixBloomSizeRatio()).isEqualTo(doubleValue); } } @Test public void memtableHugePageSize() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setMemtableHugePageSize(longValue); assertThat(opt.memtableHugePageSize()).isEqualTo(longValue); } } @Test public void bloomLocality() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final int intValue = rand.nextInt(); opt.setBloomLocality(intValue); assertThat(opt.bloomLocality()).isEqualTo(intValue); } } @Test public void maxSuccessiveMerges() throws RocksDBException { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final long longValue = rand.nextLong(); opt.setMaxSuccessiveMerges(longValue); assertThat(opt.maxSuccessiveMerges()).isEqualTo(longValue); } } @Test public void optimizeFiltersForHits() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final boolean aBoolean = rand.nextBoolean(); opt.setOptimizeFiltersForHits(aBoolean); assertThat(opt.optimizeFiltersForHits()).isEqualTo(aBoolean); } } @Test public void memTable() throws RocksDBException { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { opt.setMemTableConfig(new HashLinkedListMemTableConfig()); assertThat(opt.memTableFactoryName()). isEqualTo("HashLinkedListRepFactory"); } } @Test public void comparator() throws RocksDBException { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { opt.setComparator(BuiltinComparator.BYTEWISE_COMPARATOR); } } @Test public void linkageOfPrepMethods() { try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { options.optimizeUniversalStyleCompaction(); options.optimizeUniversalStyleCompaction(4000); options.optimizeLevelStyleCompaction(); options.optimizeLevelStyleCompaction(3000); options.optimizeForPointLookup(10); options.optimizeForSmallDb(); } } @Test public void shouldSetTestPrefixExtractor() { try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { options.useFixedLengthPrefixExtractor(100); options.useFixedLengthPrefixExtractor(10); } } @Test public void shouldSetTestCappedPrefixExtractor() { try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { options.useCappedPrefixExtractor(100); options.useCappedPrefixExtractor(10); } } @Test public void compressionTypes() { try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions()) { for (final CompressionType compressionType : CompressionType.values()) { columnFamilyOptions.setCompressionType(compressionType); assertThat(columnFamilyOptions.compressionType()). isEqualTo(compressionType); assertThat(CompressionType.valueOf("NO_COMPRESSION")). isEqualTo(CompressionType.NO_COMPRESSION); } } } @Test public void compressionPerLevel() { try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions()) { assertThat(columnFamilyOptions.compressionPerLevel()).isEmpty(); List compressionTypeList = new ArrayList<>(); for (int i = 0; i < columnFamilyOptions.numLevels(); i++) { compressionTypeList.add(CompressionType.NO_COMPRESSION); } columnFamilyOptions.setCompressionPerLevel(compressionTypeList); compressionTypeList = columnFamilyOptions.compressionPerLevel(); for (CompressionType compressionType : compressionTypeList) { assertThat(compressionType).isEqualTo( CompressionType.NO_COMPRESSION); } } } @Test public void differentCompressionsPerLevel() { try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions()) { columnFamilyOptions.setNumLevels(3); assertThat(columnFamilyOptions.compressionPerLevel()).isEmpty(); List compressionTypeList = new ArrayList<>(); compressionTypeList.add(CompressionType.BZLIB2_COMPRESSION); compressionTypeList.add(CompressionType.SNAPPY_COMPRESSION); compressionTypeList.add(CompressionType.LZ4_COMPRESSION); columnFamilyOptions.setCompressionPerLevel(compressionTypeList); compressionTypeList = columnFamilyOptions.compressionPerLevel(); assertThat(compressionTypeList.size()).isEqualTo(3); assertThat(compressionTypeList). containsExactly( CompressionType.BZLIB2_COMPRESSION, CompressionType.SNAPPY_COMPRESSION, CompressionType.LZ4_COMPRESSION); } } @Test public void bottommostCompressionType() { try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions()) { assertThat(columnFamilyOptions.bottommostCompressionType()) .isEqualTo(CompressionType.DISABLE_COMPRESSION_OPTION); for (final CompressionType compressionType : CompressionType.values()) { columnFamilyOptions.setBottommostCompressionType(compressionType); assertThat(columnFamilyOptions.bottommostCompressionType()) .isEqualTo(compressionType); } } } @Test public void bottommostCompressionOptions() { try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); final CompressionOptions bottommostCompressionOptions = new CompressionOptions() .setMaxDictBytes(123)) { columnFamilyOptions.setBottommostCompressionOptions( bottommostCompressionOptions); assertThat(columnFamilyOptions.bottommostCompressionOptions()) .isEqualTo(bottommostCompressionOptions); assertThat(columnFamilyOptions.bottommostCompressionOptions() .maxDictBytes()).isEqualTo(123); } } @Test public void compressionOptions() { try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); final CompressionOptions compressionOptions = new CompressionOptions() .setMaxDictBytes(123)) { columnFamilyOptions.setCompressionOptions(compressionOptions); assertThat(columnFamilyOptions.compressionOptions()) .isEqualTo(compressionOptions); assertThat(columnFamilyOptions.compressionOptions().maxDictBytes()) .isEqualTo(123); } } @Test public void compactionStyles() { try (final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions()) { for (final CompactionStyle compactionStyle : CompactionStyle.values()) { columnFamilyOptions.setCompactionStyle(compactionStyle); assertThat(columnFamilyOptions.compactionStyle()). isEqualTo(compactionStyle); assertThat(CompactionStyle.valueOf("FIFO")). isEqualTo(CompactionStyle.FIFO); } } } @Test public void maxTableFilesSizeFIFO() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { long longValue = rand.nextLong(); // Size has to be positive longValue = (longValue < 0) ? -longValue : longValue; longValue = (longValue == 0) ? longValue + 1 : longValue; opt.setMaxTableFilesSizeFIFO(longValue); assertThat(opt.maxTableFilesSizeFIFO()). isEqualTo(longValue); } } @Test public void maxWriteBufferNumberToMaintain() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { int intValue = rand.nextInt(); // Size has to be positive intValue = (intValue < 0) ? -intValue : intValue; intValue = (intValue == 0) ? intValue + 1 : intValue; opt.setMaxWriteBufferNumberToMaintain(intValue); assertThat(opt.maxWriteBufferNumberToMaintain()). isEqualTo(intValue); } } @Test public void compactionPriorities() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { for (final CompactionPriority compactionPriority : CompactionPriority.values()) { opt.setCompactionPriority(compactionPriority); assertThat(opt.compactionPriority()). isEqualTo(compactionPriority); } } } @Test public void reportBgIoStats() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final boolean booleanValue = true; opt.setReportBgIoStats(booleanValue); assertThat(opt.reportBgIoStats()). isEqualTo(booleanValue); } } @Test public void ttl() { try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { options.setTtl(1000 * 60); assertThat(options.ttl()). isEqualTo(1000 * 60); } } @Test public void compactionOptionsUniversal() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions(); final CompactionOptionsUniversal optUni = new CompactionOptionsUniversal() .setCompressionSizePercent(7)) { opt.setCompactionOptionsUniversal(optUni); assertThat(opt.compactionOptionsUniversal()). isEqualTo(optUni); assertThat(opt.compactionOptionsUniversal().compressionSizePercent()) .isEqualTo(7); } } @Test public void compactionOptionsFIFO() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions(); final CompactionOptionsFIFO optFifo = new CompactionOptionsFIFO() .setMaxTableFilesSize(2000)) { opt.setCompactionOptionsFIFO(optFifo); assertThat(opt.compactionOptionsFIFO()). isEqualTo(optFifo); assertThat(opt.compactionOptionsFIFO().maxTableFilesSize()) .isEqualTo(2000); } } @Test public void forceConsistencyChecks() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { final boolean booleanValue = true; opt.setForceConsistencyChecks(booleanValue); assertThat(opt.forceConsistencyChecks()). isEqualTo(booleanValue); } } @Test public void compactionFilter() { try(final ColumnFamilyOptions options = new ColumnFamilyOptions(); final RemoveEmptyValueCompactionFilter cf = new RemoveEmptyValueCompactionFilter()) { options.setCompactionFilter(cf); assertThat(options.compactionFilter()).isEqualTo(cf); } } @Test public void compactionFilterFactory() { try(final ColumnFamilyOptions options = new ColumnFamilyOptions(); final RemoveEmptyValueCompactionFilterFactory cff = new RemoveEmptyValueCompactionFilterFactory()) { options.setCompactionFilterFactory(cff); assertThat(options.compactionFilterFactory()).isEqualTo(cff); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/ColumnFamilyTest.java000066400000000000000000000716551370372246700252130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.*; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; public class ColumnFamilyTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void columnFamilyDescriptorName() throws RocksDBException { final byte[] cfName = "some_name".getBytes(UTF_8); try(final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions()) { final ColumnFamilyDescriptor cfDescriptor = new ColumnFamilyDescriptor(cfName, cfOptions); assertThat(cfDescriptor.getName()).isEqualTo(cfName); } } @Test public void columnFamilyDescriptorOptions() throws RocksDBException { final byte[] cfName = "some_name".getBytes(UTF_8); try(final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions() .setCompressionType(CompressionType.BZLIB2_COMPRESSION)) { final ColumnFamilyDescriptor cfDescriptor = new ColumnFamilyDescriptor(cfName, cfOptions); assertThat(cfDescriptor.getOptions().compressionType()) .isEqualTo(CompressionType.BZLIB2_COMPRESSION); } } @Test public void listColumnFamilies() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { // Test listColumnFamilies final List columnFamilyNames = RocksDB.listColumnFamilies(options, dbFolder.getRoot().getAbsolutePath()); assertThat(columnFamilyNames).isNotNull(); assertThat(columnFamilyNames.size()).isGreaterThan(0); assertThat(columnFamilyNames.size()).isEqualTo(1); assertThat(new String(columnFamilyNames.get(0))).isEqualTo("default"); } } @Test public void defaultColumnFamily() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { final ColumnFamilyHandle cfh = db.getDefaultColumnFamily(); try { assertThat(cfh).isNotNull(); assertThat(cfh.getName()).isEqualTo("default".getBytes(UTF_8)); assertThat(cfh.getID()).isEqualTo(0); assertThat(cfh.getDescriptor().getName()).isEqualTo("default".getBytes(UTF_8)); final byte[] key = "key".getBytes(); final byte[] value = "value".getBytes(); db.put(cfh, key, value); final byte[] actualValue = db.get(cfh, key); assertThat(cfh).isNotNull(); assertThat(actualValue).isEqualTo(value); } finally { cfh.close(); } } } @Test public void createColumnFamily() throws RocksDBException { final byte[] cfName = "new_cf".getBytes(UTF_8); final ColumnFamilyDescriptor cfDescriptor = new ColumnFamilyDescriptor(cfName, new ColumnFamilyOptions()); try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { final ColumnFamilyHandle columnFamilyHandle = db.createColumnFamily(cfDescriptor); try { assertThat(columnFamilyHandle.getName()).isEqualTo(cfName); assertThat(columnFamilyHandle.getID()).isEqualTo(1); final ColumnFamilyDescriptor latestDescriptor = columnFamilyHandle.getDescriptor(); assertThat(latestDescriptor.getName()).isEqualTo(cfName); final List columnFamilyNames = RocksDB.listColumnFamilies( options, dbFolder.getRoot().getAbsolutePath()); assertThat(columnFamilyNames).isNotNull(); assertThat(columnFamilyNames.size()).isGreaterThan(0); assertThat(columnFamilyNames.size()).isEqualTo(2); assertThat(new String(columnFamilyNames.get(0))).isEqualTo("default"); assertThat(new String(columnFamilyNames.get(1))).isEqualTo("new_cf"); } finally { columnFamilyHandle.close(); } } } @Test public void openWithColumnFamilies() throws RocksDBException { final List cfNames = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes()) ); final List columnFamilyHandleList = new ArrayList<>(); // Test open database with column family names try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList)) { try { assertThat(columnFamilyHandleList.size()).isEqualTo(2); db.put("dfkey1".getBytes(), "dfvalue".getBytes()); db.put(columnFamilyHandleList.get(0), "dfkey2".getBytes(), "dfvalue".getBytes()); db.put(columnFamilyHandleList.get(1), "newcfkey1".getBytes(), "newcfvalue".getBytes()); String retVal = new String(db.get(columnFamilyHandleList.get(1), "newcfkey1".getBytes())); assertThat(retVal).isEqualTo("newcfvalue"); assertThat((db.get(columnFamilyHandleList.get(1), "dfkey1".getBytes()))).isNull(); db.delete(columnFamilyHandleList.get(1), "newcfkey1".getBytes()); assertThat((db.get(columnFamilyHandleList.get(1), "newcfkey1".getBytes()))).isNull(); db.delete(columnFamilyHandleList.get(0), new WriteOptions(), "dfkey2".getBytes()); assertThat(db.get(columnFamilyHandleList.get(0), new ReadOptions(), "dfkey2".getBytes())).isNull(); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void getWithOutValueAndCf() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); final List columnFamilyHandleList = new ArrayList<>(); // Test open database with column family names try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { db.put(columnFamilyHandleList.get(0), new WriteOptions(), "key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); final byte[] outValue = new byte[5]; // not found value int getResult = db.get("keyNotFound".getBytes(), outValue); assertThat(getResult).isEqualTo(RocksDB.NOT_FOUND); // found value which fits in outValue getResult = db.get(columnFamilyHandleList.get(0), "key1".getBytes(), outValue); assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); assertThat(outValue).isEqualTo("value".getBytes()); // found value which fits partially getResult = db.get(columnFamilyHandleList.get(0), new ReadOptions(), "key2".getBytes(), outValue); assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); assertThat(outValue).isEqualTo("12345".getBytes()); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void createWriteDropColumnFamily() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { ColumnFamilyHandle tmpColumnFamilyHandle = null; try { tmpColumnFamilyHandle = db.createColumnFamily( new ColumnFamilyDescriptor("tmpCF".getBytes(), new ColumnFamilyOptions())); db.put(tmpColumnFamilyHandle, "key".getBytes(), "value".getBytes()); db.dropColumnFamily(tmpColumnFamilyHandle); assertThat(tmpColumnFamilyHandle.isOwningHandle()).isTrue(); } finally { if (tmpColumnFamilyHandle != null) { tmpColumnFamilyHandle.close(); } for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void createWriteDropColumnFamilies() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { ColumnFamilyHandle tmpColumnFamilyHandle = null; ColumnFamilyHandle tmpColumnFamilyHandle2 = null; try { tmpColumnFamilyHandle = db.createColumnFamily( new ColumnFamilyDescriptor("tmpCF".getBytes(), new ColumnFamilyOptions())); tmpColumnFamilyHandle2 = db.createColumnFamily( new ColumnFamilyDescriptor("tmpCF2".getBytes(), new ColumnFamilyOptions())); db.put(tmpColumnFamilyHandle, "key".getBytes(), "value".getBytes()); db.put(tmpColumnFamilyHandle2, "key".getBytes(), "value".getBytes()); db.dropColumnFamilies(Arrays.asList(tmpColumnFamilyHandle, tmpColumnFamilyHandle2)); assertThat(tmpColumnFamilyHandle.isOwningHandle()).isTrue(); assertThat(tmpColumnFamilyHandle2.isOwningHandle()).isTrue(); } finally { if (tmpColumnFamilyHandle != null) { tmpColumnFamilyHandle.close(); } if (tmpColumnFamilyHandle2 != null) { tmpColumnFamilyHandle2.close(); } for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void writeBatch() throws RocksDBException { try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final ColumnFamilyOptions defaultCfOptions = new ColumnFamilyOptions() .setMergeOperator(stringAppendOperator)) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, defaultCfOptions), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList); final WriteBatch writeBatch = new WriteBatch(); final WriteOptions writeOpt = new WriteOptions()) { try { writeBatch.put("key".getBytes(), "value".getBytes()); writeBatch.put(db.getDefaultColumnFamily(), "mergeKey".getBytes(), "merge".getBytes()); writeBatch.merge(db.getDefaultColumnFamily(), "mergeKey".getBytes(), "merge".getBytes()); writeBatch.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); writeBatch.put(columnFamilyHandleList.get(1), "newcfkey2".getBytes(), "value2".getBytes()); writeBatch.delete("xyz".getBytes()); writeBatch.delete(columnFamilyHandleList.get(1), "xyz".getBytes()); db.write(writeOpt, writeBatch); assertThat(db.get(columnFamilyHandleList.get(1), "xyz".getBytes()) == null); assertThat(new String(db.get(columnFamilyHandleList.get(1), "newcfkey".getBytes()))).isEqualTo("value"); assertThat(new String(db.get(columnFamilyHandleList.get(1), "newcfkey2".getBytes()))).isEqualTo("value2"); assertThat(new String(db.get("key".getBytes()))).isEqualTo("value"); // check if key is merged assertThat(new String(db.get(db.getDefaultColumnFamily(), "mergeKey".getBytes()))).isEqualTo("merge,merge"); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test public void iteratorOnColumnFamily() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { db.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); db.put(columnFamilyHandleList.get(1), "newcfkey2".getBytes(), "value2".getBytes()); try (final RocksIterator rocksIterator = db.newIterator(columnFamilyHandleList.get(1))) { rocksIterator.seekToFirst(); Map refMap = new HashMap<>(); refMap.put("newcfkey", "value"); refMap.put("newcfkey2", "value2"); int i = 0; while (rocksIterator.isValid()) { i++; assertThat(refMap.get(new String(rocksIterator.key()))). isEqualTo(new String(rocksIterator.value())); rocksIterator.next(); } assertThat(i).isEqualTo(2); } } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void multiGet() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { db.put(columnFamilyHandleList.get(0), "key".getBytes(), "value".getBytes()); db.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); final List keys = Arrays.asList(new byte[][]{ "key".getBytes(), "newcfkey".getBytes() }); List retValues = db.multiGetAsList(columnFamilyHandleList, keys); assertThat(retValues.size()).isEqualTo(2); assertThat(new String(retValues.get(0))) .isEqualTo("value"); assertThat(new String(retValues.get(1))) .isEqualTo("value"); retValues = db.multiGetAsList(new ReadOptions(), columnFamilyHandleList, keys); assertThat(retValues.size()).isEqualTo(2); assertThat(new String(retValues.get(0))) .isEqualTo("value"); assertThat(new String(retValues.get(1))) .isEqualTo("value"); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void multiGetAsList() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { db.put(columnFamilyHandleList.get(0), "key".getBytes(), "value".getBytes()); db.put(columnFamilyHandleList.get(1), "newcfkey".getBytes(), "value".getBytes()); final List keys = Arrays.asList(new byte[][]{ "key".getBytes(), "newcfkey".getBytes() }); List retValues = db.multiGetAsList(columnFamilyHandleList, keys); assertThat(retValues.size()).isEqualTo(2); assertThat(new String(retValues.get(0))) .isEqualTo("value"); assertThat(new String(retValues.get(1))) .isEqualTo("value"); retValues = db.multiGetAsList(new ReadOptions(), columnFamilyHandleList, keys); assertThat(retValues.size()).isEqualTo(2); assertThat(new String(retValues.get(0))) .isEqualTo("value"); assertThat(new String(retValues.get(1))) .isEqualTo("value"); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void properties() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { assertThat(db.getProperty("rocksdb.estimate-num-keys")). isNotNull(); assertThat(db.getLongProperty(columnFamilyHandleList.get(0), "rocksdb.estimate-num-keys")).isGreaterThanOrEqualTo(0); assertThat(db.getProperty("rocksdb.stats")).isNotNull(); assertThat(db.getProperty(columnFamilyHandleList.get(0), "rocksdb.sstables")).isNotNull(); assertThat(db.getProperty(columnFamilyHandleList.get(1), "rocksdb.estimate-num-keys")).isNotNull(); assertThat(db.getProperty(columnFamilyHandleList.get(1), "rocksdb.stats")).isNotNull(); assertThat(db.getProperty(columnFamilyHandleList.get(1), "rocksdb.sstables")).isNotNull(); assertThat(db.getAggregatedLongProperty("rocksdb.estimate-num-keys")). isNotNull(); assertThat(db.getAggregatedLongProperty("rocksdb.estimate-num-keys")). isGreaterThanOrEqualTo(0); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void iterators() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { List iterators = null; try { iterators = db.newIterators(columnFamilyHandleList); assertThat(iterators.size()).isEqualTo(2); RocksIterator iter = iterators.get(0); iter.seekToFirst(); final Map defRefMap = new HashMap<>(); defRefMap.put("dfkey1", "dfvalue"); defRefMap.put("key", "value"); while (iter.isValid()) { assertThat(defRefMap.get(new String(iter.key()))). isEqualTo(new String(iter.value())); iter.next(); } // iterate over new_cf key/value pairs final Map cfRefMap = new HashMap<>(); cfRefMap.put("newcfkey", "value"); cfRefMap.put("newcfkey2", "value2"); iter = iterators.get(1); iter.seekToFirst(); while (iter.isValid()) { assertThat(cfRefMap.get(new String(iter.key()))). isEqualTo(new String(iter.value())); iter.next(); } } finally { if (iterators != null) { for (final RocksIterator rocksIterator : iterators) { rocksIterator.close(); } } for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test(expected = RocksDBException.class) public void failPutDisposedCF() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { db.dropColumnFamily(columnFamilyHandleList.get(1)); db.put(columnFamilyHandleList.get(1), "key".getBytes(), "value".getBytes()); } finally { for (ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test(expected = RocksDBException.class) public void failRemoveDisposedCF() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { db.dropColumnFamily(columnFamilyHandleList.get(1)); db.delete(columnFamilyHandleList.get(1), "key".getBytes()); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test(expected = RocksDBException.class) public void failGetDisposedCF() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { db.dropColumnFamily(columnFamilyHandleList.get(1)); db.get(columnFamilyHandleList.get(1), "key".getBytes()); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test(expected = RocksDBException.class) public void failMultiGetWithoutCorrectNumberOfCF() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { final List keys = new ArrayList<>(); keys.add("key".getBytes()); keys.add("newcfkey".getBytes()); final List cfCustomList = new ArrayList<>(); db.multiGetAsList(cfCustomList, keys); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void testByteCreateFolumnFamily() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()) ) { final byte[] b0 = new byte[]{(byte) 0x00}; final byte[] b1 = new byte[]{(byte) 0x01}; final byte[] b2 = new byte[]{(byte) 0x02}; ColumnFamilyHandle cf1 = null, cf2 = null, cf3 = null; try { cf1 = db.createColumnFamily(new ColumnFamilyDescriptor(b0)); cf2 = db.createColumnFamily(new ColumnFamilyDescriptor(b1)); final List families = RocksDB.listColumnFamilies(options, dbFolder.getRoot().getAbsolutePath()); assertThat(families).contains("default".getBytes(), b0, b1); cf3 = db.createColumnFamily(new ColumnFamilyDescriptor(b2)); } finally { if (cf1 != null) { cf1.close(); } if (cf2 != null) { cf2.close(); } if (cf3 != null) { cf3.close(); } } } } @Test public void testCFNamesWithZeroBytes() throws RocksDBException { ColumnFamilyHandle cf1 = null, cf2 = null; try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); ) { try { final byte[] b0 = new byte[]{0, 0}; final byte[] b1 = new byte[]{0, 1}; cf1 = db.createColumnFamily(new ColumnFamilyDescriptor(b0)); cf2 = db.createColumnFamily(new ColumnFamilyDescriptor(b1)); final List families = RocksDB.listColumnFamilies(options, dbFolder.getRoot().getAbsolutePath()); assertThat(families).contains("default".getBytes(), b0, b1); } finally { if (cf1 != null) { cf1.close(); } if (cf2 != null) { cf2.close(); } } } } @Test public void testCFNameSimplifiedChinese() throws RocksDBException { ColumnFamilyHandle columnFamilyHandle = null; try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); ) { try { final String simplifiedChinese = "\u7b80\u4f53\u5b57"; columnFamilyHandle = db.createColumnFamily( new ColumnFamilyDescriptor(simplifiedChinese.getBytes())); final List families = RocksDB.listColumnFamilies(options, dbFolder.getRoot().getAbsolutePath()); assertThat(families).contains("default".getBytes(), simplifiedChinese.getBytes()); } finally { if (columnFamilyHandle != null) { columnFamilyHandle.close(); } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java000066400000000000000000000060361370372246700265220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import org.rocksdb.CompactRangeOptions.BottommostLevelCompaction; import static org.assertj.core.api.Assertions.assertThat; public class CompactRangeOptionsTest { static { RocksDB.loadLibrary(); } @Test public void exclusiveManualCompaction() { CompactRangeOptions opt = new CompactRangeOptions(); boolean value = false; opt.setExclusiveManualCompaction(value); assertThat(opt.exclusiveManualCompaction()).isEqualTo(value); value = true; opt.setExclusiveManualCompaction(value); assertThat(opt.exclusiveManualCompaction()).isEqualTo(value); } @Test public void bottommostLevelCompaction() { CompactRangeOptions opt = new CompactRangeOptions(); BottommostLevelCompaction value = BottommostLevelCompaction.kSkip; opt.setBottommostLevelCompaction(value); assertThat(opt.bottommostLevelCompaction()).isEqualTo(value); value = BottommostLevelCompaction.kForce; opt.setBottommostLevelCompaction(value); assertThat(opt.bottommostLevelCompaction()).isEqualTo(value); value = BottommostLevelCompaction.kIfHaveCompactionFilter; opt.setBottommostLevelCompaction(value); assertThat(opt.bottommostLevelCompaction()).isEqualTo(value); } @Test public void changeLevel() { CompactRangeOptions opt = new CompactRangeOptions(); boolean value = false; opt.setChangeLevel(value); assertThat(opt.changeLevel()).isEqualTo(value); value = true; opt.setChangeLevel(value); assertThat(opt.changeLevel()).isEqualTo(value); } @Test public void targetLevel() { CompactRangeOptions opt = new CompactRangeOptions(); int value = 2; opt.setTargetLevel(value); assertThat(opt.targetLevel()).isEqualTo(value); value = 3; opt.setTargetLevel(value); assertThat(opt.targetLevel()).isEqualTo(value); } @Test public void targetPathId() { CompactRangeOptions opt = new CompactRangeOptions(); int value = 2; opt.setTargetPathId(value); assertThat(opt.targetPathId()).isEqualTo(value); value = 3; opt.setTargetPathId(value); assertThat(opt.targetPathId()).isEqualTo(value); } @Test public void allowWriteStall() { CompactRangeOptions opt = new CompactRangeOptions(); boolean value = false; opt.setAllowWriteStall(value); assertThat(opt.allowWriteStall()).isEqualTo(value); value = true; opt.setAllowWriteStall(value); assertThat(opt.allowWriteStall()).isEqualTo(value); } @Test public void maxSubcompactions() { CompactRangeOptions opt = new CompactRangeOptions(); int value = 2; opt.setMaxSubcompactions(value); assertThat(opt.maxSubcompactions()).isEqualTo(value); value = 3; opt.setMaxSubcompactions(value); assertThat(opt.maxSubcompactions()).isEqualTo(value); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java000066400000000000000000000045021370372246700273710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; public class CompactionFilterFactoryTest { @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void columnFamilyOptions_setCompactionFilterFactory() throws RocksDBException { try(final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RemoveEmptyValueCompactionFilterFactory compactionFilterFactory = new RemoveEmptyValueCompactionFilterFactory(); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() .setCompactionFilterFactory(compactionFilterFactory)) { final List cfNames = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); final List cfHandles = new ArrayList<>(); try (final RocksDB rocksDb = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfNames, cfHandles); ) { try { final byte[] key1 = "key1".getBytes(); final byte[] key2 = "key2".getBytes(); final byte[] value1 = "value1".getBytes(); final byte[] value2 = new byte[0]; rocksDb.put(cfHandles.get(1), key1, value1); rocksDb.put(cfHandles.get(1), key2, value2); rocksDb.compactRange(cfHandles.get(1)); assertThat(rocksDb.get(cfHandles.get(1), key1)).isEqualTo(value1); final boolean exists = rocksDb.keyMayExist(cfHandles.get(1), key2, null); assertThat(exists).isFalse(); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionJobInfoTest.java000066400000000000000000000060001370372246700261350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompactionJobInfoTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void columnFamilyName() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.columnFamilyName()) .isEmpty(); } } @Test public void status() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.status().getCode()) .isEqualTo(Status.Code.Ok); } } @Test public void threadId() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.threadId()) .isEqualTo(0); } } @Test public void jobId() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.jobId()) .isEqualTo(0); } } @Test public void baseInputLevel() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.baseInputLevel()) .isEqualTo(0); } } @Test public void outputLevel() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.outputLevel()) .isEqualTo(0); } } @Test public void inputFiles() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.inputFiles()) .isEmpty(); } } @Test public void outputFiles() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.outputFiles()) .isEmpty(); } } @Test public void tableProperties() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.tableProperties()) .isEmpty(); } } @Test public void compactionReason() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.compactionReason()) .isEqualTo(CompactionReason.kUnknown); } } @Test public void compression() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.compression()) .isEqualTo(CompressionType.NO_COMPRESSION); } } @Test public void stats() { try (final CompactionJobInfo compactionJobInfo = new CompactionJobInfo()) { assertThat(compactionJobInfo.stats()) .isNotNull(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionJobStatsTest.java000066400000000000000000000134431370372246700263510ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompactionJobStatsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void reset() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { compactionJobStats.reset(); assertThat(compactionJobStats.elapsedMicros()).isEqualTo(0); } } @Test public void add() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats(); final CompactionJobStats otherCompactionJobStats = new CompactionJobStats()) { compactionJobStats.add(otherCompactionJobStats); } } @Test public void elapsedMicros() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.elapsedMicros()).isEqualTo(0); } } @Test public void numInputRecords() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numInputRecords()).isEqualTo(0); } } @Test public void numInputFiles() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numInputFiles()).isEqualTo(0); } } @Test public void numInputFilesAtOutputLevel() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numInputFilesAtOutputLevel()).isEqualTo(0); } } @Test public void numOutputRecords() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numOutputRecords()).isEqualTo(0); } } @Test public void numOutputFiles() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numOutputFiles()).isEqualTo(0); } } @Test public void isManualCompaction() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.isManualCompaction()).isFalse(); } } @Test public void totalInputBytes() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.totalInputBytes()).isEqualTo(0); } } @Test public void totalOutputBytes() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.totalOutputBytes()).isEqualTo(0); } } @Test public void numRecordsReplaced() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numRecordsReplaced()).isEqualTo(0); } } @Test public void totalInputRawKeyBytes() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.totalInputRawKeyBytes()).isEqualTo(0); } } @Test public void totalInputRawValueBytes() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.totalInputRawValueBytes()).isEqualTo(0); } } @Test public void numInputDeletionRecords() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numInputDeletionRecords()).isEqualTo(0); } } @Test public void numExpiredDeletionRecords() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numExpiredDeletionRecords()).isEqualTo(0); } } @Test public void numCorruptKeys() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numCorruptKeys()).isEqualTo(0); } } @Test public void fileWriteNanos() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.fileWriteNanos()).isEqualTo(0); } } @Test public void fileRangeSyncNanos() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.fileRangeSyncNanos()).isEqualTo(0); } } @Test public void fileFsyncNanos() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.fileFsyncNanos()).isEqualTo(0); } } @Test public void filePrepareWriteNanos() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.filePrepareWriteNanos()).isEqualTo(0); } } @Test public void smallestOutputKeyPrefix() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.smallestOutputKeyPrefix()).isEmpty(); } } @Test public void largestOutputKeyPrefix() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.largestOutputKeyPrefix()).isEmpty(); } } @Test public void numSingleDelFallthru() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numSingleDelFallthru()).isEqualTo(0); } } @Test public void numSingleDelMismatch() { try (final CompactionJobStats compactionJobStats = new CompactionJobStats()) { assertThat(compactionJobStats.numSingleDelMismatch()).isEqualTo(0); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java000066400000000000000000000017541370372246700267210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompactionOptionsFIFOTest { static { RocksDB.loadLibrary(); } @Test public void maxTableFilesSize() { final long size = 500 * 1024 * 1026; try (final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { opt.setMaxTableFilesSize(size); assertThat(opt.maxTableFilesSize()).isEqualTo(size); } } @Test public void allowCompaction() { final boolean allowCompaction = true; try (final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { opt.setAllowCompaction(allowCompaction); assertThat(opt.allowCompaction()).isEqualTo(allowCompaction); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionOptionsTest.java000066400000000000000000000033051370372246700262470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompactionOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void compression() { try (final CompactionOptions compactionOptions = new CompactionOptions()) { assertThat(compactionOptions.compression()) .isEqualTo(CompressionType.SNAPPY_COMPRESSION); compactionOptions.setCompression(CompressionType.NO_COMPRESSION); assertThat(compactionOptions.compression()) .isEqualTo(CompressionType.NO_COMPRESSION); } } @Test public void outputFileSizeLimit() { final long mb250 = 1024 * 1024 * 250; try (final CompactionOptions compactionOptions = new CompactionOptions()) { assertThat(compactionOptions.outputFileSizeLimit()) .isEqualTo(-1); compactionOptions.setOutputFileSizeLimit(mb250); assertThat(compactionOptions.outputFileSizeLimit()) .isEqualTo(mb250); } } @Test public void maxSubcompactions() { try (final CompactionOptions compactionOptions = new CompactionOptions()) { assertThat(compactionOptions.maxSubcompactions()) .isEqualTo(0); compactionOptions.setMaxSubcompactions(9); assertThat(compactionOptions.maxSubcompactions()) .isEqualTo(9); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java000066400000000000000000000050271370372246700301430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompactionOptionsUniversalTest { static { RocksDB.loadLibrary(); } @Test public void sizeRatio() { final int sizeRatio = 4; try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { opt.setSizeRatio(sizeRatio); assertThat(opt.sizeRatio()).isEqualTo(sizeRatio); } } @Test public void minMergeWidth() { final int minMergeWidth = 3; try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { opt.setMinMergeWidth(minMergeWidth); assertThat(opt.minMergeWidth()).isEqualTo(minMergeWidth); } } @Test public void maxMergeWidth() { final int maxMergeWidth = Integer.MAX_VALUE - 1234; try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { opt.setMaxMergeWidth(maxMergeWidth); assertThat(opt.maxMergeWidth()).isEqualTo(maxMergeWidth); } } @Test public void maxSizeAmplificationPercent() { final int maxSizeAmplificationPercent = 150; try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { opt.setMaxSizeAmplificationPercent(maxSizeAmplificationPercent); assertThat(opt.maxSizeAmplificationPercent()).isEqualTo(maxSizeAmplificationPercent); } } @Test public void compressionSizePercent() { final int compressionSizePercent = 500; try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { opt.setCompressionSizePercent(compressionSizePercent); assertThat(opt.compressionSizePercent()).isEqualTo(compressionSizePercent); } } @Test public void stopStyle() { final CompactionStopStyle stopStyle = CompactionStopStyle.CompactionStopStyleSimilarSize; try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { opt.setStopStyle(stopStyle); assertThat(opt.stopStyle()).isEqualTo(stopStyle); } } @Test public void allowTrivialMove() { final boolean allowTrivialMove = true; try(final CompactionOptionsUniversal opt = new CompactionOptionsUniversal()) { opt.setAllowTrivialMove(allowTrivialMove); assertThat(opt.allowTrivialMove()).isEqualTo(allowTrivialMove); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionPriorityTest.java000066400000000000000000000017221370372246700264360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompactionPriorityTest { @Test(expected = IllegalArgumentException.class) public void failIfIllegalByteValueProvided() { CompactionPriority.getCompactionPriority((byte) -1); } @Test public void getCompactionPriority() { assertThat(CompactionPriority.getCompactionPriority( CompactionPriority.OldestLargestSeqFirst.getValue())) .isEqualTo(CompactionPriority.OldestLargestSeqFirst); } @Test public void valueOf() { assertThat(CompactionPriority.valueOf("OldestSmallestSeqFirst")). isEqualTo(CompactionPriority.OldestSmallestSeqFirst); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java000066400000000000000000000017721370372246700265700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompactionStopStyleTest { @Test(expected = IllegalArgumentException.class) public void failIfIllegalByteValueProvided() { CompactionStopStyle.getCompactionStopStyle((byte) -1); } @Test public void getCompactionStopStyle() { assertThat(CompactionStopStyle.getCompactionStopStyle( CompactionStopStyle.CompactionStopStyleTotalSize.getValue())) .isEqualTo(CompactionStopStyle.CompactionStopStyleTotalSize); } @Test public void valueOf() { assertThat(CompactionStopStyle.valueOf("CompactionStopStyleSimilarSize")). isEqualTo(CompactionStopStyle.CompactionStopStyleSimilarSize); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java000066400000000000000000000035531370372246700262670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class ComparatorOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void reusedSynchronisationType() { try(final ComparatorOptions copt = new ComparatorOptions()) { copt.setReusedSynchronisationType(ReusedSynchronisationType.MUTEX); assertThat(copt.reusedSynchronisationType()) .isEqualTo(ReusedSynchronisationType.MUTEX); copt.setReusedSynchronisationType(ReusedSynchronisationType.ADAPTIVE_MUTEX); assertThat(copt.reusedSynchronisationType()) .isEqualTo(ReusedSynchronisationType.ADAPTIVE_MUTEX); copt.setReusedSynchronisationType(ReusedSynchronisationType.THREAD_LOCAL); assertThat(copt.reusedSynchronisationType()) .isEqualTo(ReusedSynchronisationType.THREAD_LOCAL); } } @Test public void useDirectBuffer() { try(final ComparatorOptions copt = new ComparatorOptions()) { copt.setUseDirectBuffer(true); assertThat(copt.useDirectBuffer()).isTrue(); copt.setUseDirectBuffer(false); assertThat(copt.useDirectBuffer()).isFalse(); } } @Test public void maxReusedBufferSize() { try(final ComparatorOptions copt = new ComparatorOptions()) { copt.setMaxReusedBufferSize(12345); assertThat(copt.maxReusedBufferSize()).isEqualTo(12345); copt.setMaxReusedBufferSize(-1); assertThat(copt.maxReusedBufferSize()).isEqualTo(-1); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompressionOptionsTest.java000066400000000000000000000035431370372246700264600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class CompressionOptionsTest { static { RocksDB.loadLibrary(); } @Test public void windowBits() { final int windowBits = 7; try(final CompressionOptions opt = new CompressionOptions()) { opt.setWindowBits(windowBits); assertThat(opt.windowBits()).isEqualTo(windowBits); } } @Test public void level() { final int level = 6; try(final CompressionOptions opt = new CompressionOptions()) { opt.setLevel(level); assertThat(opt.level()).isEqualTo(level); } } @Test public void strategy() { final int strategy = 2; try(final CompressionOptions opt = new CompressionOptions()) { opt.setStrategy(strategy); assertThat(opt.strategy()).isEqualTo(strategy); } } @Test public void maxDictBytes() { final int maxDictBytes = 999; try(final CompressionOptions opt = new CompressionOptions()) { opt.setMaxDictBytes(maxDictBytes); assertThat(opt.maxDictBytes()).isEqualTo(maxDictBytes); } } @Test public void zstdMaxTrainBytes() { final int zstdMaxTrainBytes = 999; try(final CompressionOptions opt = new CompressionOptions()) { opt.setZStdMaxTrainBytes(zstdMaxTrainBytes); assertThat(opt.zstdMaxTrainBytes()).isEqualTo(zstdMaxTrainBytes); } } @Test public void enabled() { try(final CompressionOptions opt = new CompressionOptions()) { assertThat(opt.enabled()).isFalse(); opt.setEnabled(true); assertThat(opt.enabled()).isTrue(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/CompressionTypesTest.java000066400000000000000000000011661370372246700261300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; public class CompressionTypesTest { @Test public void getCompressionType() { for (final CompressionType compressionType : CompressionType.values()) { String libraryName = compressionType.getLibraryName(); compressionType.equals(CompressionType.getCompressionType( libraryName)); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/DBOptionsTest.java000066400000000000000000000572671370372246700244600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import java.nio.file.Paths; import java.util.*; import static org.assertj.core.api.Assertions.assertThat; public class DBOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); public static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void copyConstructor() { DBOptions origOpts = new DBOptions(); origOpts.setCreateIfMissing(rand.nextBoolean()); origOpts.setAllow2pc(rand.nextBoolean()); origOpts.setMaxBackgroundJobs(rand.nextInt(10)); DBOptions copyOpts = new DBOptions(origOpts); assertThat(origOpts.createIfMissing()).isEqualTo(copyOpts.createIfMissing()); assertThat(origOpts.allow2pc()).isEqualTo(copyOpts.allow2pc()); assertThat(origOpts.baseBackgroundCompactions()).isEqualTo( copyOpts.baseBackgroundCompactions()); } @Test public void getDBOptionsFromProps() { // setup sample properties final Properties properties = new Properties(); properties.put("allow_mmap_reads", "true"); properties.put("bytes_per_sync", "13"); try(final DBOptions opt = DBOptions.getDBOptionsFromProps(properties)) { assertThat(opt).isNotNull(); assertThat(String.valueOf(opt.allowMmapReads())). isEqualTo(properties.get("allow_mmap_reads")); assertThat(String.valueOf(opt.bytesPerSync())). isEqualTo(properties.get("bytes_per_sync")); } } @Test public void failDBOptionsFromPropsWithIllegalValue() { // setup sample properties final Properties properties = new Properties(); properties.put("tomato", "1024"); properties.put("burger", "2"); try(final DBOptions opt = DBOptions.getDBOptionsFromProps(properties)) { assertThat(opt).isNull(); } } @Test(expected = IllegalArgumentException.class) public void failDBOptionsFromPropsWithNullValue() { try(final DBOptions opt = DBOptions.getDBOptionsFromProps(null)) { //no-op } } @Test(expected = IllegalArgumentException.class) public void failDBOptionsFromPropsWithEmptyProps() { try(final DBOptions opt = DBOptions.getDBOptionsFromProps( new Properties())) { //no-op } } @Test public void linkageOfPrepMethods() { try (final DBOptions opt = new DBOptions()) { opt.optimizeForSmallDb(); } } @Test public void env() { try (final DBOptions opt = new DBOptions(); final Env env = Env.getDefault()) { opt.setEnv(env); assertThat(opt.getEnv()).isSameAs(env); } } @Test public void setIncreaseParallelism() { try(final DBOptions opt = new DBOptions()) { final int threads = Runtime.getRuntime().availableProcessors() * 2; opt.setIncreaseParallelism(threads); } } @Test public void createIfMissing() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setCreateIfMissing(boolValue); assertThat(opt.createIfMissing()).isEqualTo(boolValue); } } @Test public void createMissingColumnFamilies() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setCreateMissingColumnFamilies(boolValue); assertThat(opt.createMissingColumnFamilies()).isEqualTo(boolValue); } } @Test public void errorIfExists() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setErrorIfExists(boolValue); assertThat(opt.errorIfExists()).isEqualTo(boolValue); } } @Test public void paranoidChecks() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setParanoidChecks(boolValue); assertThat(opt.paranoidChecks()).isEqualTo(boolValue); } } @Test public void maxTotalWalSize() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setMaxTotalWalSize(longValue); assertThat(opt.maxTotalWalSize()).isEqualTo(longValue); } } @Test public void maxOpenFiles() { try(final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setMaxOpenFiles(intValue); assertThat(opt.maxOpenFiles()).isEqualTo(intValue); } } @Test public void maxFileOpeningThreads() { try(final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setMaxFileOpeningThreads(intValue); assertThat(opt.maxFileOpeningThreads()).isEqualTo(intValue); } } @Test public void useFsync() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setUseFsync(boolValue); assertThat(opt.useFsync()).isEqualTo(boolValue); } } @Test public void dbPaths() { final List dbPaths = new ArrayList<>(); dbPaths.add(new DbPath(Paths.get("/a"), 10)); dbPaths.add(new DbPath(Paths.get("/b"), 100)); dbPaths.add(new DbPath(Paths.get("/c"), 1000)); try(final DBOptions opt = new DBOptions()) { assertThat(opt.dbPaths()).isEqualTo(Collections.emptyList()); opt.setDbPaths(dbPaths); assertThat(opt.dbPaths()).isEqualTo(dbPaths); } } @Test public void dbLogDir() { try(final DBOptions opt = new DBOptions()) { final String str = "path/to/DbLogDir"; opt.setDbLogDir(str); assertThat(opt.dbLogDir()).isEqualTo(str); } } @Test public void walDir() { try(final DBOptions opt = new DBOptions()) { final String str = "path/to/WalDir"; opt.setWalDir(str); assertThat(opt.walDir()).isEqualTo(str); } } @Test public void deleteObsoleteFilesPeriodMicros() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setDeleteObsoleteFilesPeriodMicros(longValue); assertThat(opt.deleteObsoleteFilesPeriodMicros()).isEqualTo(longValue); } } @SuppressWarnings("deprecated") @Test public void baseBackgroundCompactions() { try (final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setBaseBackgroundCompactions(intValue); assertThat(opt.baseBackgroundCompactions()). isEqualTo(intValue); } } @SuppressWarnings("deprecated") @Test public void maxBackgroundCompactions() { try(final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setMaxBackgroundCompactions(intValue); assertThat(opt.maxBackgroundCompactions()).isEqualTo(intValue); } } @Test public void maxSubcompactions() { try (final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setMaxSubcompactions(intValue); assertThat(opt.maxSubcompactions()). isEqualTo(intValue); } } @SuppressWarnings("deprecated") @Test public void maxBackgroundFlushes() { try(final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setMaxBackgroundFlushes(intValue); assertThat(opt.maxBackgroundFlushes()).isEqualTo(intValue); } } @Test public void maxBackgroundJobs() { try (final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setMaxBackgroundJobs(intValue); assertThat(opt.maxBackgroundJobs()).isEqualTo(intValue); } } @Test public void maxLogFileSize() throws RocksDBException { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setMaxLogFileSize(longValue); assertThat(opt.maxLogFileSize()).isEqualTo(longValue); } } @Test public void logFileTimeToRoll() throws RocksDBException { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setLogFileTimeToRoll(longValue); assertThat(opt.logFileTimeToRoll()).isEqualTo(longValue); } } @Test public void keepLogFileNum() throws RocksDBException { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setKeepLogFileNum(longValue); assertThat(opt.keepLogFileNum()).isEqualTo(longValue); } } @Test public void recycleLogFileNum() throws RocksDBException { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setRecycleLogFileNum(longValue); assertThat(opt.recycleLogFileNum()).isEqualTo(longValue); } } @Test public void maxManifestFileSize() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setMaxManifestFileSize(longValue); assertThat(opt.maxManifestFileSize()).isEqualTo(longValue); } } @Test public void tableCacheNumshardbits() { try(final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setTableCacheNumshardbits(intValue); assertThat(opt.tableCacheNumshardbits()).isEqualTo(intValue); } } @Test public void walSizeLimitMB() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setWalSizeLimitMB(longValue); assertThat(opt.walSizeLimitMB()).isEqualTo(longValue); } } @Test public void walTtlSeconds() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setWalTtlSeconds(longValue); assertThat(opt.walTtlSeconds()).isEqualTo(longValue); } } @Test public void manifestPreallocationSize() throws RocksDBException { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setManifestPreallocationSize(longValue); assertThat(opt.manifestPreallocationSize()).isEqualTo(longValue); } } @Test public void useDirectReads() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setUseDirectReads(boolValue); assertThat(opt.useDirectReads()).isEqualTo(boolValue); } } @Test public void useDirectIoForFlushAndCompaction() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setUseDirectIoForFlushAndCompaction(boolValue); assertThat(opt.useDirectIoForFlushAndCompaction()).isEqualTo(boolValue); } } @Test public void allowFAllocate() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowFAllocate(boolValue); assertThat(opt.allowFAllocate()).isEqualTo(boolValue); } } @Test public void allowMmapReads() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowMmapReads(boolValue); assertThat(opt.allowMmapReads()).isEqualTo(boolValue); } } @Test public void allowMmapWrites() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowMmapWrites(boolValue); assertThat(opt.allowMmapWrites()).isEqualTo(boolValue); } } @Test public void isFdCloseOnExec() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setIsFdCloseOnExec(boolValue); assertThat(opt.isFdCloseOnExec()).isEqualTo(boolValue); } } @Test public void statsDumpPeriodSec() { try(final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setStatsDumpPeriodSec(intValue); assertThat(opt.statsDumpPeriodSec()).isEqualTo(intValue); } } @Test public void statsPersistPeriodSec() { try (final DBOptions opt = new DBOptions()) { final int intValue = rand.nextInt(); opt.setStatsPersistPeriodSec(intValue); assertThat(opt.statsPersistPeriodSec()).isEqualTo(intValue); } } @Test public void statsHistoryBufferSize() { try (final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setStatsHistoryBufferSize(longValue); assertThat(opt.statsHistoryBufferSize()).isEqualTo(longValue); } } @Test public void adviseRandomOnOpen() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAdviseRandomOnOpen(boolValue); assertThat(opt.adviseRandomOnOpen()).isEqualTo(boolValue); } } @Test public void dbWriteBufferSize() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setDbWriteBufferSize(longValue); assertThat(opt.dbWriteBufferSize()).isEqualTo(longValue); } } @Test public void setWriteBufferManager() throws RocksDBException { try (final DBOptions opt = new DBOptions(); final Cache cache = new LRUCache(1 * 1024 * 1024); final WriteBufferManager writeBufferManager = new WriteBufferManager(2000l, cache)) { opt.setWriteBufferManager(writeBufferManager); assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); } } @Test public void setWriteBufferManagerWithZeroBufferSize() throws RocksDBException { try (final DBOptions opt = new DBOptions(); final Cache cache = new LRUCache(1 * 1024 * 1024); final WriteBufferManager writeBufferManager = new WriteBufferManager(0l, cache)) { opt.setWriteBufferManager(writeBufferManager); assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); } } @Test public void accessHintOnCompactionStart() { try(final DBOptions opt = new DBOptions()) { final AccessHint accessHint = AccessHint.SEQUENTIAL; opt.setAccessHintOnCompactionStart(accessHint); assertThat(opt.accessHintOnCompactionStart()).isEqualTo(accessHint); } } @Test public void newTableReaderForCompactionInputs() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setNewTableReaderForCompactionInputs(boolValue); assertThat(opt.newTableReaderForCompactionInputs()).isEqualTo(boolValue); } } @Test public void compactionReadaheadSize() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setCompactionReadaheadSize(longValue); assertThat(opt.compactionReadaheadSize()).isEqualTo(longValue); } } @Test public void randomAccessMaxBufferSize() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setRandomAccessMaxBufferSize(longValue); assertThat(opt.randomAccessMaxBufferSize()).isEqualTo(longValue); } } @Test public void writableFileMaxBufferSize() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setWritableFileMaxBufferSize(longValue); assertThat(opt.writableFileMaxBufferSize()).isEqualTo(longValue); } } @Test public void useAdaptiveMutex() { try(final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setUseAdaptiveMutex(boolValue); assertThat(opt.useAdaptiveMutex()).isEqualTo(boolValue); } } @Test public void bytesPerSync() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setBytesPerSync(longValue); assertThat(opt.bytesPerSync()).isEqualTo(longValue); } } @Test public void walBytesPerSync() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setWalBytesPerSync(longValue); assertThat(opt.walBytesPerSync()).isEqualTo(longValue); } } @Test public void strictBytesPerSync() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.strictBytesPerSync()).isFalse(); opt.setStrictBytesPerSync(true); assertThat(opt.strictBytesPerSync()).isTrue(); } } @Test public void enableThreadTracking() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setEnableThreadTracking(boolValue); assertThat(opt.enableThreadTracking()).isEqualTo(boolValue); } } @Test public void delayedWriteRate() { try(final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setDelayedWriteRate(longValue); assertThat(opt.delayedWriteRate()).isEqualTo(longValue); } } @Test public void enablePipelinedWrite() { try(final DBOptions opt = new DBOptions()) { assertThat(opt.enablePipelinedWrite()).isFalse(); opt.setEnablePipelinedWrite(true); assertThat(opt.enablePipelinedWrite()).isTrue(); } } @Test public void unordredWrite() { try(final DBOptions opt = new DBOptions()) { assertThat(opt.unorderedWrite()).isFalse(); opt.setUnorderedWrite(true); assertThat(opt.unorderedWrite()).isTrue(); } } @Test public void allowConcurrentMemtableWrite() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowConcurrentMemtableWrite(boolValue); assertThat(opt.allowConcurrentMemtableWrite()).isEqualTo(boolValue); } } @Test public void enableWriteThreadAdaptiveYield() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setEnableWriteThreadAdaptiveYield(boolValue); assertThat(opt.enableWriteThreadAdaptiveYield()).isEqualTo(boolValue); } } @Test public void writeThreadMaxYieldUsec() { try (final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setWriteThreadMaxYieldUsec(longValue); assertThat(opt.writeThreadMaxYieldUsec()).isEqualTo(longValue); } } @Test public void writeThreadSlowYieldUsec() { try (final DBOptions opt = new DBOptions()) { final long longValue = rand.nextLong(); opt.setWriteThreadSlowYieldUsec(longValue); assertThat(opt.writeThreadSlowYieldUsec()).isEqualTo(longValue); } } @Test public void skipStatsUpdateOnDbOpen() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setSkipStatsUpdateOnDbOpen(boolValue); assertThat(opt.skipStatsUpdateOnDbOpen()).isEqualTo(boolValue); } } @Test public void walRecoveryMode() { try (final DBOptions opt = new DBOptions()) { for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { opt.setWalRecoveryMode(walRecoveryMode); assertThat(opt.walRecoveryMode()).isEqualTo(walRecoveryMode); } } } @Test public void allow2pc() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAllow2pc(boolValue); assertThat(opt.allow2pc()).isEqualTo(boolValue); } } @Test public void rowCache() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.rowCache()).isNull(); try(final Cache lruCache = new LRUCache(1000)) { opt.setRowCache(lruCache); assertThat(opt.rowCache()).isEqualTo(lruCache); } try(final Cache clockCache = new ClockCache(1000)) { opt.setRowCache(clockCache); assertThat(opt.rowCache()).isEqualTo(clockCache); } } } @Test public void walFilter() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.walFilter()).isNull(); try (final AbstractWalFilter walFilter = new AbstractWalFilter() { @Override public void columnFamilyLogNumberMap( final Map cfLognumber, final Map cfNameId) { // no-op } @Override public LogRecordFoundResult logRecordFound(final long logNumber, final String logFileName, final WriteBatch batch, final WriteBatch newBatch) { return new LogRecordFoundResult( WalProcessingOption.CONTINUE_PROCESSING, false); } @Override public String name() { return "test-wal-filter"; } }) { opt.setWalFilter(walFilter); assertThat(opt.walFilter()).isEqualTo(walFilter); } } } @Test public void failIfOptionsFileError() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setFailIfOptionsFileError(boolValue); assertThat(opt.failIfOptionsFileError()).isEqualTo(boolValue); } } @Test public void dumpMallocStats() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setDumpMallocStats(boolValue); assertThat(opt.dumpMallocStats()).isEqualTo(boolValue); } } @Test public void avoidFlushDuringRecovery() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAvoidFlushDuringRecovery(boolValue); assertThat(opt.avoidFlushDuringRecovery()).isEqualTo(boolValue); } } @Test public void avoidFlushDuringShutdown() { try (final DBOptions opt = new DBOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setAvoidFlushDuringShutdown(boolValue); assertThat(opt.avoidFlushDuringShutdown()).isEqualTo(boolValue); } } @Test public void allowIngestBehind() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.allowIngestBehind()).isFalse(); opt.setAllowIngestBehind(true); assertThat(opt.allowIngestBehind()).isTrue(); } } @Test public void preserveDeletes() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.preserveDeletes()).isFalse(); opt.setPreserveDeletes(true); assertThat(opt.preserveDeletes()).isTrue(); } } @Test public void twoWriteQueues() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.twoWriteQueues()).isFalse(); opt.setTwoWriteQueues(true); assertThat(opt.twoWriteQueues()).isTrue(); } } @Test public void manualWalFlush() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.manualWalFlush()).isFalse(); opt.setManualWalFlush(true); assertThat(opt.manualWalFlush()).isTrue(); } } @Test public void atomicFlush() { try (final DBOptions opt = new DBOptions()) { assertThat(opt.atomicFlush()).isFalse(); opt.setAtomicFlush(true); assertThat(opt.atomicFlush()).isTrue(); } } @Test public void rateLimiter() { try(final DBOptions options = new DBOptions(); final DBOptions anotherOptions = new DBOptions(); final RateLimiter rateLimiter = new RateLimiter(1000, 100 * 1000, 1)) { options.setRateLimiter(rateLimiter); // Test with parameter initialization anotherOptions.setRateLimiter( new RateLimiter(1000)); } } @Test public void sstFileManager() throws RocksDBException { try (final DBOptions options = new DBOptions(); final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { options.setSstFileManager(sstFileManager); } } @Test public void statistics() { try(final DBOptions options = new DBOptions()) { final Statistics statistics = options.statistics(); assertThat(statistics).isNull(); } try(final Statistics statistics = new Statistics(); final DBOptions options = new DBOptions().setStatistics(statistics); final Statistics stats = options.statistics()) { assertThat(stats).isNotNull(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/DefaultEnvTest.java000066400000000000000000000072561370372246700246450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.Collection; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; public class DefaultEnvTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void backgroundThreads() { try (final Env defaultEnv = RocksEnv.getDefault()) { defaultEnv.setBackgroundThreads(5, Priority.BOTTOM); assertThat(defaultEnv.getBackgroundThreads(Priority.BOTTOM)).isEqualTo(5); defaultEnv.setBackgroundThreads(5); assertThat(defaultEnv.getBackgroundThreads(Priority.LOW)).isEqualTo(5); defaultEnv.setBackgroundThreads(5, Priority.LOW); assertThat(defaultEnv.getBackgroundThreads(Priority.LOW)).isEqualTo(5); defaultEnv.setBackgroundThreads(5, Priority.HIGH); assertThat(defaultEnv.getBackgroundThreads(Priority.HIGH)).isEqualTo(5); } } @Test public void threadPoolQueueLen() { try (final Env defaultEnv = RocksEnv.getDefault()) { assertThat(defaultEnv.getThreadPoolQueueLen(Priority.BOTTOM)).isEqualTo(0); assertThat(defaultEnv.getThreadPoolQueueLen(Priority.LOW)).isEqualTo(0); assertThat(defaultEnv.getThreadPoolQueueLen(Priority.HIGH)).isEqualTo(0); } } @Test public void incBackgroundThreadsIfNeeded() { try (final Env defaultEnv = RocksEnv.getDefault()) { defaultEnv.incBackgroundThreadsIfNeeded(20, Priority.BOTTOM); assertThat(defaultEnv.getBackgroundThreads(Priority.BOTTOM)).isGreaterThanOrEqualTo(20); defaultEnv.incBackgroundThreadsIfNeeded(20, Priority.LOW); assertThat(defaultEnv.getBackgroundThreads(Priority.LOW)).isGreaterThanOrEqualTo(20); defaultEnv.incBackgroundThreadsIfNeeded(20, Priority.HIGH); assertThat(defaultEnv.getBackgroundThreads(Priority.HIGH)).isGreaterThanOrEqualTo(20); } } @Test public void lowerThreadPoolIOPriority() { try (final Env defaultEnv = RocksEnv.getDefault()) { defaultEnv.lowerThreadPoolIOPriority(Priority.BOTTOM); defaultEnv.lowerThreadPoolIOPriority(Priority.LOW); defaultEnv.lowerThreadPoolIOPriority(Priority.HIGH); } } @Test public void lowerThreadPoolCPUPriority() { try (final Env defaultEnv = RocksEnv.getDefault()) { defaultEnv.lowerThreadPoolCPUPriority(Priority.BOTTOM); defaultEnv.lowerThreadPoolCPUPriority(Priority.LOW); defaultEnv.lowerThreadPoolCPUPriority(Priority.HIGH); } } @Test public void threadList() throws RocksDBException { try (final Env defaultEnv = RocksEnv.getDefault()) { final Collection threadList = defaultEnv.getThreadList(); assertThat(threadList.size()).isGreaterThan(0); } } @Test public void threadList_integration() throws RocksDBException { try (final Env env = RocksEnv.getDefault(); final Options opt = new Options() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true) .setEnv(env)) { // open database try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { final List threadList = env.getThreadList(); assertThat(threadList.size()).isGreaterThan(0); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/DirectSliceTest.java000066400000000000000000000060111370372246700247660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import java.nio.ByteBuffer; import static org.assertj.core.api.Assertions.assertThat; public class DirectSliceTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void directSlice() { try(final DirectSlice directSlice = new DirectSlice("abc"); final DirectSlice otherSlice = new DirectSlice("abc")) { assertThat(directSlice.toString()).isEqualTo("abc"); // clear first slice directSlice.clear(); assertThat(directSlice.toString()).isEmpty(); // get first char in otherslice assertThat(otherSlice.get(0)).isEqualTo("a".getBytes()[0]); // remove prefix otherSlice.removePrefix(1); assertThat(otherSlice.toString()).isEqualTo("bc"); } } @Test public void directSliceWithByteBuffer() { final byte[] data = "Some text".getBytes(); final ByteBuffer buffer = ByteBuffer.allocateDirect(data.length + 1); buffer.put(data); buffer.put(data.length, (byte)0); try(final DirectSlice directSlice = new DirectSlice(buffer)) { assertThat(directSlice.toString()).isEqualTo("Some text"); } } @Test public void directSliceWithByteBufferAndLength() { final byte[] data = "Some text".getBytes(); final ByteBuffer buffer = ByteBuffer.allocateDirect(data.length); buffer.put(data); try(final DirectSlice directSlice = new DirectSlice(buffer, 4)) { assertThat(directSlice.toString()).isEqualTo("Some"); } } @Test(expected = IllegalArgumentException.class) public void directSliceInitWithoutDirectAllocation() { final byte[] data = "Some text".getBytes(); final ByteBuffer buffer = ByteBuffer.wrap(data); try(final DirectSlice directSlice = new DirectSlice(buffer)) { //no-op } } @Test(expected = IllegalArgumentException.class) public void directSlicePrefixInitWithoutDirectAllocation() { final byte[] data = "Some text".getBytes(); final ByteBuffer buffer = ByteBuffer.wrap(data); try(final DirectSlice directSlice = new DirectSlice(buffer, 4)) { //no-op } } @Test public void directSliceClear() { try(final DirectSlice directSlice = new DirectSlice("abc")) { assertThat(directSlice.toString()).isEqualTo("abc"); directSlice.clear(); assertThat(directSlice.toString()).isEmpty(); directSlice.clear(); // make sure we don't double-free } } @Test public void directSliceRemovePrefix() { try(final DirectSlice directSlice = new DirectSlice("abc")) { assertThat(directSlice.toString()).isEqualTo("abc"); directSlice.removePrefix(1); assertThat(directSlice.toString()).isEqualTo("bc"); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/EnvOptionsTest.java000066400000000000000000000111401370372246700246770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import java.util.Random; import static org.assertj.core.api.Assertions.assertThat; public class EnvOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); public static final Random rand = PlatformRandomHelper.getPlatformSpecificRandomFactory(); @Test public void dbOptionsConstructor() { final long compactionReadaheadSize = 4 * 1024 * 1024; try (final DBOptions dbOptions = new DBOptions() .setCompactionReadaheadSize(compactionReadaheadSize)) { try (final EnvOptions envOptions = new EnvOptions(dbOptions)) { assertThat(envOptions.compactionReadaheadSize()) .isEqualTo(compactionReadaheadSize); } } } @Test public void useMmapReads() { try (final EnvOptions envOptions = new EnvOptions()) { final boolean boolValue = rand.nextBoolean(); envOptions.setUseMmapReads(boolValue); assertThat(envOptions.useMmapReads()).isEqualTo(boolValue); } } @Test public void useMmapWrites() { try (final EnvOptions envOptions = new EnvOptions()) { final boolean boolValue = rand.nextBoolean(); envOptions.setUseMmapWrites(boolValue); assertThat(envOptions.useMmapWrites()).isEqualTo(boolValue); } } @Test public void useDirectReads() { try (final EnvOptions envOptions = new EnvOptions()) { final boolean boolValue = rand.nextBoolean(); envOptions.setUseDirectReads(boolValue); assertThat(envOptions.useDirectReads()).isEqualTo(boolValue); } } @Test public void useDirectWrites() { try (final EnvOptions envOptions = new EnvOptions()) { final boolean boolValue = rand.nextBoolean(); envOptions.setUseDirectWrites(boolValue); assertThat(envOptions.useDirectWrites()).isEqualTo(boolValue); } } @Test public void allowFallocate() { try (final EnvOptions envOptions = new EnvOptions()) { final boolean boolValue = rand.nextBoolean(); envOptions.setAllowFallocate(boolValue); assertThat(envOptions.allowFallocate()).isEqualTo(boolValue); } } @Test public void setFdCloexecs() { try (final EnvOptions envOptions = new EnvOptions()) { final boolean boolValue = rand.nextBoolean(); envOptions.setSetFdCloexec(boolValue); assertThat(envOptions.setFdCloexec()).isEqualTo(boolValue); } } @Test public void bytesPerSync() { try (final EnvOptions envOptions = new EnvOptions()) { final long longValue = rand.nextLong(); envOptions.setBytesPerSync(longValue); assertThat(envOptions.bytesPerSync()).isEqualTo(longValue); } } @Test public void fallocateWithKeepSize() { try (final EnvOptions envOptions = new EnvOptions()) { final boolean boolValue = rand.nextBoolean(); envOptions.setFallocateWithKeepSize(boolValue); assertThat(envOptions.fallocateWithKeepSize()).isEqualTo(boolValue); } } @Test public void compactionReadaheadSize() { try (final EnvOptions envOptions = new EnvOptions()) { final int intValue = rand.nextInt(); envOptions.setCompactionReadaheadSize(intValue); assertThat(envOptions.compactionReadaheadSize()).isEqualTo(intValue); } } @Test public void randomAccessMaxBufferSize() { try (final EnvOptions envOptions = new EnvOptions()) { final int intValue = rand.nextInt(); envOptions.setRandomAccessMaxBufferSize(intValue); assertThat(envOptions.randomAccessMaxBufferSize()).isEqualTo(intValue); } } @Test public void writableFileMaxBufferSize() { try (final EnvOptions envOptions = new EnvOptions()) { final int intValue = rand.nextInt(); envOptions.setWritableFileMaxBufferSize(intValue); assertThat(envOptions.writableFileMaxBufferSize()).isEqualTo(intValue); } } @Test public void rateLimiter() { try (final EnvOptions envOptions = new EnvOptions(); final RateLimiter rateLimiter1 = new RateLimiter(1000, 100 * 1000, 1)) { envOptions.setRateLimiter(rateLimiter1); assertThat(envOptions.rateLimiter()).isEqualTo(rateLimiter1); try(final RateLimiter rateLimiter2 = new RateLimiter(1000)) { envOptions.setRateLimiter(rateLimiter2); assertThat(envOptions.rateLimiter()).isEqualTo(rateLimiter2); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/FilterTest.java000066400000000000000000000022661370372246700240310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; public class FilterTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void filter() { // new Bloom filter final BlockBasedTableConfig blockConfig = new BlockBasedTableConfig(); try(final Options options = new Options()) { try(final Filter bloomFilter = new BloomFilter()) { blockConfig.setFilterPolicy(bloomFilter); options.setTableFormatConfig(blockConfig); } try(final Filter bloomFilter = new BloomFilter(10)) { blockConfig.setFilterPolicy(bloomFilter); options.setTableFormatConfig(blockConfig); } try(final Filter bloomFilter = new BloomFilter(10, false)) { blockConfig.setFilterPolicy(bloomFilter); options.setTableFormatConfig(blockConfig); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/FlushOptionsTest.java000066400000000000000000000016721370372246700252410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class FlushOptionsTest { @Test public void waitForFlush() { try (final FlushOptions flushOptions = new FlushOptions()) { assertThat(flushOptions.waitForFlush()).isTrue(); flushOptions.setWaitForFlush(false); assertThat(flushOptions.waitForFlush()).isFalse(); } } @Test public void allowWriteStall() { try (final FlushOptions flushOptions = new FlushOptions()) { assertThat(flushOptions.allowWriteStall()).isFalse(); flushOptions.setAllowWriteStall(true); assertThat(flushOptions.allowWriteStall()).isTrue(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/FlushTest.java000066400000000000000000000033351370372246700236630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.assertj.core.api.Assertions.assertThat; public class FlushTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void flush() throws RocksDBException { try(final Options options = new Options() .setCreateIfMissing(true) .setMaxWriteBufferNumber(10) .setMinWriteBufferNumberToMerge(10); final WriteOptions wOpt = new WriteOptions() .setDisableWAL(true); final FlushOptions flushOptions = new FlushOptions() .setWaitForFlush(true)) { assertThat(flushOptions.waitForFlush()).isTrue(); try(final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put(wOpt, "key1".getBytes(), "value1".getBytes()); db.put(wOpt, "key2".getBytes(), "value2".getBytes()); db.put(wOpt, "key3".getBytes(), "value3".getBytes()); db.put(wOpt, "key4".getBytes(), "value4".getBytes()); assertThat(db.getProperty("rocksdb.num-entries-active-mem-table")) .isEqualTo("4"); db.flush(flushOptions); assertThat(db.getProperty("rocksdb.num-entries-active-mem-table")) .isEqualTo("0"); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/HdfsEnvTest.java000066400000000000000000000027101370372246700241330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static java.nio.charset.StandardCharsets.UTF_8; public class HdfsEnvTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); // expect org.rocksdb.RocksDBException: Not compiled with hdfs support @Test(expected = RocksDBException.class) public void construct() throws RocksDBException { try (final Env env = new HdfsEnv("hdfs://localhost:5000")) { // no-op } } // expect org.rocksdb.RocksDBException: Not compiled with hdfs support @Test(expected = RocksDBException.class) public void construct_integration() throws RocksDBException { try (final Env env = new HdfsEnv("hdfs://localhost:5000"); final Options options = new Options() .setCreateIfMissing(true) .setEnv(env); ) { try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getPath())) { db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/InfoLogLevelTest.java000066400000000000000000000070141370372246700251250ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.rocksdb.util.Environment; import java.io.IOException; import static java.nio.file.Files.readAllBytes; import static java.nio.file.Paths.get; import static org.assertj.core.api.Assertions.assertThat; public class InfoLogLevelTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void testInfoLogLevel() throws RocksDBException, IOException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { db.put("key".getBytes(), "value".getBytes()); db.flush(new FlushOptions().setWaitForFlush(true)); assertThat(getLogContentsWithoutHeader()).isNotEmpty(); } } @Test public void testFatalLogLevel() throws RocksDBException, IOException { try (final Options options = new Options(). setCreateIfMissing(true). setInfoLogLevel(InfoLogLevel.FATAL_LEVEL); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { assertThat(options.infoLogLevel()). isEqualTo(InfoLogLevel.FATAL_LEVEL); db.put("key".getBytes(), "value".getBytes()); // As InfoLogLevel is set to FATAL_LEVEL, here we expect the log // content to be empty. assertThat(getLogContentsWithoutHeader()).isEmpty(); } } @Test public void testFatalLogLevelWithDBOptions() throws RocksDBException, IOException { try (final DBOptions dbOptions = new DBOptions(). setInfoLogLevel(InfoLogLevel.FATAL_LEVEL); final Options options = new Options(dbOptions, new ColumnFamilyOptions()). setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { assertThat(dbOptions.infoLogLevel()). isEqualTo(InfoLogLevel.FATAL_LEVEL); assertThat(options.infoLogLevel()). isEqualTo(InfoLogLevel.FATAL_LEVEL); db.put("key".getBytes(), "value".getBytes()); assertThat(getLogContentsWithoutHeader()).isEmpty(); } } @Test(expected = IllegalArgumentException.class) public void failIfIllegalByteValueProvided() { InfoLogLevel.getInfoLogLevel((byte) -1); } @Test public void valueOf() { assertThat(InfoLogLevel.valueOf("DEBUG_LEVEL")). isEqualTo(InfoLogLevel.DEBUG_LEVEL); } /** * Read LOG file contents into String. * * @return LOG file contents as String. * @throws IOException if file is not found. */ private String getLogContentsWithoutHeader() throws IOException { final String separator = Environment.isWindows() ? "\n" : System.getProperty("line.separator"); final String[] lines = new String(readAllBytes(get( dbFolder.getRoot().getAbsolutePath() + "/LOG"))).split(separator); int first_non_header = lines.length; // Identify the last line of the header for (int i = lines.length - 1; i >= 0; --i) { if (lines[i].indexOf("DB pointer") >= 0) { first_non_header = i + 1; break; } } StringBuilder builder = new StringBuilder(); for (int i = first_non_header; i < lines.length; ++i) { builder.append(lines[i]).append(separator); } return builder.toString(); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java000066400000000000000000000070661370372246700277170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import java.util.Random; import static org.assertj.core.api.Assertions.assertThat; public class IngestExternalFileOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); public static final Random rand = PlatformRandomHelper.getPlatformSpecificRandomFactory(); @Test public void createExternalSstFileInfoWithoutParameters() { try (final IngestExternalFileOptions options = new IngestExternalFileOptions()) { assertThat(options).isNotNull(); } } @Test public void createExternalSstFileInfoWithParameters() { final boolean moveFiles = rand.nextBoolean(); final boolean snapshotConsistency = rand.nextBoolean(); final boolean allowGlobalSeqNo = rand.nextBoolean(); final boolean allowBlockingFlush = rand.nextBoolean(); try (final IngestExternalFileOptions options = new IngestExternalFileOptions(moveFiles, snapshotConsistency, allowGlobalSeqNo, allowBlockingFlush)) { assertThat(options).isNotNull(); assertThat(options.moveFiles()).isEqualTo(moveFiles); assertThat(options.snapshotConsistency()).isEqualTo(snapshotConsistency); assertThat(options.allowGlobalSeqNo()).isEqualTo(allowGlobalSeqNo); assertThat(options.allowBlockingFlush()).isEqualTo(allowBlockingFlush); } } @Test public void moveFiles() { try (final IngestExternalFileOptions options = new IngestExternalFileOptions()) { final boolean moveFiles = rand.nextBoolean(); options.setMoveFiles(moveFiles); assertThat(options.moveFiles()).isEqualTo(moveFiles); } } @Test public void snapshotConsistency() { try (final IngestExternalFileOptions options = new IngestExternalFileOptions()) { final boolean snapshotConsistency = rand.nextBoolean(); options.setSnapshotConsistency(snapshotConsistency); assertThat(options.snapshotConsistency()).isEqualTo(snapshotConsistency); } } @Test public void allowGlobalSeqNo() { try (final IngestExternalFileOptions options = new IngestExternalFileOptions()) { final boolean allowGlobalSeqNo = rand.nextBoolean(); options.setAllowGlobalSeqNo(allowGlobalSeqNo); assertThat(options.allowGlobalSeqNo()).isEqualTo(allowGlobalSeqNo); } } @Test public void allowBlockingFlush() { try (final IngestExternalFileOptions options = new IngestExternalFileOptions()) { final boolean allowBlockingFlush = rand.nextBoolean(); options.setAllowBlockingFlush(allowBlockingFlush); assertThat(options.allowBlockingFlush()).isEqualTo(allowBlockingFlush); } } @Test public void ingestBehind() { try (final IngestExternalFileOptions options = new IngestExternalFileOptions()) { assertThat(options.ingestBehind()).isFalse(); options.setIngestBehind(true); assertThat(options.ingestBehind()).isTrue(); } } @Test public void writeGlobalSeqno() { try (final IngestExternalFileOptions options = new IngestExternalFileOptions()) { assertThat(options.writeGlobalSeqno()).isTrue(); options.setWriteGlobalSeqno(false); assertThat(options.writeGlobalSeqno()).isFalse(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/KeyMayExistTest.java000066400000000000000000000166371370372246700250270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; public class KeyMayExistTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void keyMayExist() throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes()) ); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { assertThat(columnFamilyHandleList.size()). isEqualTo(2); db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); // Test without column family final Holder holder = new Holder<>(); boolean exists = db.keyMayExist("key".getBytes(UTF_8), holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); exists = db.keyMayExist("key".getBytes(UTF_8), null); assertThat(exists).isTrue(); // Slice key final StringBuilder builder = new StringBuilder("prefix"); final int offset = builder.toString().length(); builder.append("slice key 0"); final int len = builder.toString().length() - offset; builder.append("suffix"); final byte[] sliceKey = builder.toString().getBytes(UTF_8); final byte[] sliceValue = "slice value 0".getBytes(UTF_8); db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); exists = db.keyMayExist(sliceKey, offset, len, holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(holder.getValue()).isEqualTo(sliceValue); exists = db.keyMayExist(sliceKey, offset, len, null); assertThat(exists).isTrue(); // Test without column family but with readOptions try (final ReadOptions readOptions = new ReadOptions()) { exists = db.keyMayExist(readOptions, "key".getBytes(UTF_8), holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); exists = db.keyMayExist(readOptions, "key".getBytes(UTF_8), null); assertThat(exists).isTrue(); exists = db.keyMayExist(readOptions, sliceKey, offset, len, holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(holder.getValue()).isEqualTo(sliceValue); exists = db.keyMayExist(readOptions, sliceKey, offset, len, null); assertThat(exists).isTrue(); } // Test with column family exists = db.keyMayExist(columnFamilyHandleList.get(0), "key".getBytes(UTF_8), holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); exists = db.keyMayExist(columnFamilyHandleList.get(0), "key".getBytes(UTF_8), null); assertThat(exists).isTrue(); // Test slice sky with column family exists = db.keyMayExist(columnFamilyHandleList.get(0), sliceKey, offset, len, holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(holder.getValue()).isEqualTo(sliceValue); exists = db.keyMayExist(columnFamilyHandleList.get(0), sliceKey, offset, len, null); assertThat(exists).isTrue(); // Test with column family and readOptions try (final ReadOptions readOptions = new ReadOptions()) { exists = db.keyMayExist(columnFamilyHandleList.get(0), readOptions, "key".getBytes(UTF_8), holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(new String(holder.getValue(), UTF_8)).isEqualTo("value"); exists = db.keyMayExist(columnFamilyHandleList.get(0), readOptions, "key".getBytes(UTF_8), null); assertThat(exists).isTrue(); // Test slice key with column family and read options exists = db.keyMayExist(columnFamilyHandleList.get(0), readOptions, sliceKey, offset, len, holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(holder.getValue()).isEqualTo(sliceValue); exists = db.keyMayExist(columnFamilyHandleList.get(0), readOptions, sliceKey, offset, len, null); assertThat(exists).isTrue(); } // KeyMayExist in CF1 must return null value exists = db.keyMayExist(columnFamilyHandleList.get(1), "key".getBytes(UTF_8), holder); assertThat(exists).isFalse(); assertThat(holder.getValue()).isNull(); exists = db.keyMayExist(columnFamilyHandleList.get(1), "key".getBytes(UTF_8), null); assertThat(exists).isFalse(); // slice key exists = db.keyMayExist(columnFamilyHandleList.get(1), sliceKey, 1, 3, holder); assertThat(exists).isFalse(); assertThat(holder.getValue()).isNull(); exists = db.keyMayExist(columnFamilyHandleList.get(1), sliceKey, 1, 3, null); assertThat(exists).isFalse(); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void keyMayExistNonUnicodeString() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { final byte key[] = "key".getBytes(UTF_8); final byte value[] = { (byte)0x80 }; // invalid unicode code-point db.put(key, value); final byte buf[] = new byte[10]; final int read = db.get(key, buf); assertThat(read).isEqualTo(1); assertThat(buf).startsWith(value); final Holder holder = new Holder<>(); boolean exists = db.keyMayExist("key".getBytes(UTF_8), holder); assertThat(exists).isTrue(); assertThat(holder.getValue()).isNotNull(); assertThat(holder.getValue()).isEqualTo(value); exists = db.keyMayExist("key".getBytes(UTF_8), null); assertThat(exists).isTrue(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/LRUCacheTest.java000066400000000000000000000013101370372246700241570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; public class LRUCacheTest { static { RocksDB.loadLibrary(); } @Test public void newLRUCache() { final long capacity = 1000; final int numShardBits = 16; final boolean strictCapacityLimit = true; final double highPriPoolRatio = 5; try(final Cache lruCache = new LRUCache(capacity, numShardBits, strictCapacityLimit, highPriPoolRatio)) { //no op } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/LoggerTest.java000066400000000000000000000200141370372246700240120ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import static org.assertj.core.api.Assertions.assertThat; public class LoggerTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void customLogger() throws RocksDBException { final AtomicInteger logMessageCounter = new AtomicInteger(); try (final Options options = new Options(). setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL). setCreateIfMissing(true); final Logger logger = new Logger(options) { // Create new logger with max log level passed by options @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { assertThat(logMsg).isNotNull(); assertThat(logMsg.length()).isGreaterThan(0); logMessageCounter.incrementAndGet(); } } ) { // Set custom logger to options options.setLogger(logger); try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { // there should be more than zero received log messages in // debug level. assertThat(logMessageCounter.get()).isGreaterThan(0); } } } @Test public void warnLogger() throws RocksDBException { final AtomicInteger logMessageCounter = new AtomicInteger(); try (final Options options = new Options(). setInfoLogLevel(InfoLogLevel.WARN_LEVEL). setCreateIfMissing(true); final Logger logger = new Logger(options) { // Create new logger with max log level passed by options @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { assertThat(logMsg).isNotNull(); assertThat(logMsg.length()).isGreaterThan(0); logMessageCounter.incrementAndGet(); } } ) { // Set custom logger to options options.setLogger(logger); try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { // there should be zero messages // using warn level as log level. assertThat(logMessageCounter.get()).isEqualTo(0); } } } @Test public void fatalLogger() throws RocksDBException { final AtomicInteger logMessageCounter = new AtomicInteger(); try (final Options options = new Options(). setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). setCreateIfMissing(true); final Logger logger = new Logger(options) { // Create new logger with max log level passed by options @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { assertThat(logMsg).isNotNull(); assertThat(logMsg.length()).isGreaterThan(0); logMessageCounter.incrementAndGet(); } } ) { // Set custom logger to options options.setLogger(logger); try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { // there should be zero messages // using fatal level as log level. assertThat(logMessageCounter.get()).isEqualTo(0); } } } @Test public void dbOptionsLogger() throws RocksDBException { final AtomicInteger logMessageCounter = new AtomicInteger(); try (final DBOptions options = new DBOptions(). setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). setCreateIfMissing(true); final Logger logger = new Logger(options) { // Create new logger with max log level passed by options @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { assertThat(logMsg).isNotNull(); assertThat(logMsg.length()).isGreaterThan(0); logMessageCounter.incrementAndGet(); } } ) { // Set custom logger to options options.setLogger(logger); final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); final List cfHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, cfHandles)) { try { // there should be zero messages // using fatal level as log level. assertThat(logMessageCounter.get()).isEqualTo(0); } finally { for (final ColumnFamilyHandle columnFamilyHandle : cfHandles) { columnFamilyHandle.close(); } } } } } @Test public void setWarnLogLevel() { final AtomicInteger logMessageCounter = new AtomicInteger(); try (final Options options = new Options(). setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). setCreateIfMissing(true); final Logger logger = new Logger(options) { // Create new logger with max log level passed by options @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { assertThat(logMsg).isNotNull(); assertThat(logMsg.length()).isGreaterThan(0); logMessageCounter.incrementAndGet(); } } ) { assertThat(logger.infoLogLevel()). isEqualTo(InfoLogLevel.FATAL_LEVEL); logger.setInfoLogLevel(InfoLogLevel.WARN_LEVEL); assertThat(logger.infoLogLevel()). isEqualTo(InfoLogLevel.WARN_LEVEL); } } @Test public void setInfoLogLevel() { final AtomicInteger logMessageCounter = new AtomicInteger(); try (final Options options = new Options(). setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). setCreateIfMissing(true); final Logger logger = new Logger(options) { // Create new logger with max log level passed by options @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { assertThat(logMsg).isNotNull(); assertThat(logMsg.length()).isGreaterThan(0); logMessageCounter.incrementAndGet(); } } ) { assertThat(logger.infoLogLevel()). isEqualTo(InfoLogLevel.FATAL_LEVEL); logger.setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL); assertThat(logger.infoLogLevel()). isEqualTo(InfoLogLevel.DEBUG_LEVEL); } } @Test public void changeLogLevelAtRuntime() throws RocksDBException { final AtomicInteger logMessageCounter = new AtomicInteger(); try (final Options options = new Options(). setInfoLogLevel(InfoLogLevel.FATAL_LEVEL). setCreateIfMissing(true); // Create new logger with max log level passed by options final Logger logger = new Logger(options) { @Override protected void log(InfoLogLevel infoLogLevel, String logMsg) { assertThat(logMsg).isNotNull(); assertThat(logMsg.length()).isGreaterThan(0); logMessageCounter.incrementAndGet(); } } ) { // Set custom logger to options options.setLogger(logger); try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { // there should be zero messages // using fatal level as log level. assertThat(logMessageCounter.get()).isEqualTo(0); // change log level to debug level logger.setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL); db.put("key".getBytes(), "value".getBytes()); db.flush(new FlushOptions().setWaitForFlush(true)); // messages shall be received due to previous actions. assertThat(logMessageCounter.get()).isNotEqualTo(0); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/MemTableTest.java000066400000000000000000000077071370372246700242770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class MemTableTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void hashSkipListMemTable() throws RocksDBException { try(final Options options = new Options()) { // Test HashSkipListMemTableConfig HashSkipListMemTableConfig memTableConfig = new HashSkipListMemTableConfig(); assertThat(memTableConfig.bucketCount()). isEqualTo(1000000); memTableConfig.setBucketCount(2000000); assertThat(memTableConfig.bucketCount()). isEqualTo(2000000); assertThat(memTableConfig.height()). isEqualTo(4); memTableConfig.setHeight(5); assertThat(memTableConfig.height()). isEqualTo(5); assertThat(memTableConfig.branchingFactor()). isEqualTo(4); memTableConfig.setBranchingFactor(6); assertThat(memTableConfig.branchingFactor()). isEqualTo(6); options.setMemTableConfig(memTableConfig); } } @Test public void skipListMemTable() throws RocksDBException { try(final Options options = new Options()) { SkipListMemTableConfig skipMemTableConfig = new SkipListMemTableConfig(); assertThat(skipMemTableConfig.lookahead()). isEqualTo(0); skipMemTableConfig.setLookahead(20); assertThat(skipMemTableConfig.lookahead()). isEqualTo(20); options.setMemTableConfig(skipMemTableConfig); } } @Test public void hashLinkedListMemTable() throws RocksDBException { try(final Options options = new Options()) { HashLinkedListMemTableConfig hashLinkedListMemTableConfig = new HashLinkedListMemTableConfig(); assertThat(hashLinkedListMemTableConfig.bucketCount()). isEqualTo(50000); hashLinkedListMemTableConfig.setBucketCount(100000); assertThat(hashLinkedListMemTableConfig.bucketCount()). isEqualTo(100000); assertThat(hashLinkedListMemTableConfig.hugePageTlbSize()). isEqualTo(0); hashLinkedListMemTableConfig.setHugePageTlbSize(1); assertThat(hashLinkedListMemTableConfig.hugePageTlbSize()). isEqualTo(1); assertThat(hashLinkedListMemTableConfig. bucketEntriesLoggingThreshold()). isEqualTo(4096); hashLinkedListMemTableConfig. setBucketEntriesLoggingThreshold(200); assertThat(hashLinkedListMemTableConfig. bucketEntriesLoggingThreshold()). isEqualTo(200); assertThat(hashLinkedListMemTableConfig. ifLogBucketDistWhenFlush()).isTrue(); hashLinkedListMemTableConfig. setIfLogBucketDistWhenFlush(false); assertThat(hashLinkedListMemTableConfig. ifLogBucketDistWhenFlush()).isFalse(); assertThat(hashLinkedListMemTableConfig. thresholdUseSkiplist()). isEqualTo(256); hashLinkedListMemTableConfig.setThresholdUseSkiplist(29); assertThat(hashLinkedListMemTableConfig. thresholdUseSkiplist()). isEqualTo(29); options.setMemTableConfig(hashLinkedListMemTableConfig); } } @Test public void vectorMemTable() throws RocksDBException { try(final Options options = new Options()) { VectorMemTableConfig vectorMemTableConfig = new VectorMemTableConfig(); assertThat(vectorMemTableConfig.reservedSize()). isEqualTo(0); vectorMemTableConfig.setReservedSize(123); assertThat(vectorMemTableConfig.reservedSize()). isEqualTo(123); options.setMemTableConfig(vectorMemTableConfig); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/MemoryUtilTest.java000066400000000000000000000140501370372246700247040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.nio.charset.StandardCharsets; import java.util.*; import static org.assertj.core.api.Assertions.assertThat; public class MemoryUtilTest { private static final String MEMTABLE_SIZE = "rocksdb.size-all-mem-tables"; private static final String UNFLUSHED_MEMTABLE_SIZE = "rocksdb.cur-size-all-mem-tables"; private static final String TABLE_READERS = "rocksdb.estimate-table-readers-mem"; private final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); private final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder1 = new TemporaryFolder(); @Rule public TemporaryFolder dbFolder2 = new TemporaryFolder(); /** * Test MemoryUtil.getApproximateMemoryUsageByType before and after a put + get */ @Test public void getApproximateMemoryUsageByType() throws RocksDBException { try (final Cache cache = new LRUCache(8 * 1024 * 1024); final Options options = new Options() .setCreateIfMissing(true) .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache)); final FlushOptions flushOptions = new FlushOptions().setWaitForFlush(true); final RocksDB db = RocksDB.open(options, dbFolder1.getRoot().getAbsolutePath())) { List dbs = new ArrayList<>(1); dbs.add(db); Set caches = new HashSet<>(1); caches.add(cache); Map usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( db.getAggregatedLongProperty(MEMTABLE_SIZE)); assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( db.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( db.getAggregatedLongProperty(TABLE_READERS)); assertThat(usage.get(MemoryUsageType.kCacheTotal)).isEqualTo(0); db.put(key, value); db.flush(flushOptions); db.get(key); usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isGreaterThan(0); assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( db.getAggregatedLongProperty(MEMTABLE_SIZE)); assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isGreaterThan(0); assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( db.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isGreaterThan(0); assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( db.getAggregatedLongProperty(TABLE_READERS)); assertThat(usage.get(MemoryUsageType.kCacheTotal)).isGreaterThan(0); } } /** * Test MemoryUtil.getApproximateMemoryUsageByType with null inputs */ @Test public void getApproximateMemoryUsageByTypeNulls() throws RocksDBException { Map usage = MemoryUtil.getApproximateMemoryUsageByType(null, null); assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo(null); assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo(null); assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo(null); assertThat(usage.get(MemoryUsageType.kCacheTotal)).isEqualTo(null); } /** * Test MemoryUtil.getApproximateMemoryUsageByType with two DBs and two caches */ @Test public void getApproximateMemoryUsageByTypeMultiple() throws RocksDBException { try (final Cache cache1 = new LRUCache(1 * 1024 * 1024); final Options options1 = new Options() .setCreateIfMissing(true) .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache1)); final RocksDB db1 = RocksDB.open(options1, dbFolder1.getRoot().getAbsolutePath()); final Cache cache2 = new LRUCache(1 * 1024 * 1024); final Options options2 = new Options() .setCreateIfMissing(true) .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache2)); final RocksDB db2 = RocksDB.open(options2, dbFolder2.getRoot().getAbsolutePath()); final FlushOptions flushOptions = new FlushOptions().setWaitForFlush(true); ) { List dbs = new ArrayList<>(1); dbs.add(db1); dbs.add(db2); Set caches = new HashSet<>(1); caches.add(cache1); caches.add(cache2); for (RocksDB db: dbs) { db.put(key, value); db.flush(flushOptions); db.get(key); } Map usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( db1.getAggregatedLongProperty(MEMTABLE_SIZE) + db2.getAggregatedLongProperty(MEMTABLE_SIZE)); assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( db1.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE) + db2.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( db1.getAggregatedLongProperty(TABLE_READERS) + db2.getAggregatedLongProperty(TABLE_READERS)); assertThat(usage.get(MemoryUsageType.kCacheTotal)).isGreaterThan(0); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/MergeTest.java000066400000000000000000000372431370372246700236460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import java.util.ArrayList; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.assertj.core.api.Assertions.assertThat; public class MergeTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void stringOption() throws InterruptedException, RocksDBException { try (final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperatorName("stringappend"); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // writing aa under key db.put("key".getBytes(), "aa".getBytes()); // merge bb under key db.merge("key".getBytes(), "bb".getBytes()); final byte[] value = db.get("key".getBytes()); final String strValue = new String(value); assertThat(strValue).isEqualTo("aa,bb"); } } private byte[] longToByteArray(long l) { ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE); buf.putLong(l); return buf.array(); } private long longFromByteArray(byte[] a) { ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE); buf.put(a); buf.flip(); return buf.getLong(); } @Test public void uint64AddOption() throws InterruptedException, RocksDBException { try (final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperatorName("uint64add"); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // writing (long)100 under key db.put("key".getBytes(), longToByteArray(100)); // merge (long)1 under key db.merge("key".getBytes(), longToByteArray(1)); final byte[] value = db.get("key".getBytes()); final long longValue = longFromByteArray(value); assertThat(longValue).isEqualTo(101); } } @Test public void cFStringOption() throws InterruptedException, RocksDBException { try (final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() .setMergeOperatorName("stringappend"); final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() .setMergeOperatorName("stringappend") ) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt2) ); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions opt = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { // writing aa under key db.put(columnFamilyHandleList.get(1), "cfkey".getBytes(), "aa".getBytes()); // merge bb under key db.merge(columnFamilyHandleList.get(1), "cfkey".getBytes(), "bb".getBytes()); byte[] value = db.get(columnFamilyHandleList.get(1), "cfkey".getBytes()); String strValue = new String(value); assertThat(strValue).isEqualTo("aa,bb"); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandleList) { handle.close(); } } } } } @Test public void cFUInt64AddOption() throws InterruptedException, RocksDBException { try (final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() .setMergeOperatorName("uint64add"); final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() .setMergeOperatorName("uint64add") ) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt2) ); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions opt = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try { // writing (long)100 under key db.put(columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(100)); // merge (long)1 under key db.merge(columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(1)); byte[] value = db.get(columnFamilyHandleList.get(1), "cfkey".getBytes()); long longValue = longFromByteArray(value); assertThat(longValue).isEqualTo(101); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandleList) { handle.close(); } } } } } @Test public void operatorOption() throws InterruptedException, RocksDBException { try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperator(stringAppendOperator); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // Writing aa under key db.put("key".getBytes(), "aa".getBytes()); // Writing bb under key db.merge("key".getBytes(), "bb".getBytes()); final byte[] value = db.get("key".getBytes()); final String strValue = new String(value); assertThat(strValue).isEqualTo("aa,bb"); } } @Test public void uint64AddOperatorOption() throws InterruptedException, RocksDBException { try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperator(uint64AddOperator); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // Writing (long)100 under key db.put("key".getBytes(), longToByteArray(100)); // Writing (long)1 under key db.merge("key".getBytes(), longToByteArray(1)); final byte[] value = db.get("key".getBytes()); final long longValue = longFromByteArray(value); assertThat(longValue).isEqualTo(101); } } @Test public void cFOperatorOption() throws InterruptedException, RocksDBException { try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() .setMergeOperator(stringAppendOperator); final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() .setMergeOperator(stringAppendOperator) ) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpt2) ); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions opt = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList) ) { try { // writing aa under key db.put(columnFamilyHandleList.get(1), "cfkey".getBytes(), "aa".getBytes()); // merge bb under key db.merge(columnFamilyHandleList.get(1), "cfkey".getBytes(), "bb".getBytes()); byte[] value = db.get(columnFamilyHandleList.get(1), "cfkey".getBytes()); String strValue = new String(value); // Test also with createColumnFamily try (final ColumnFamilyOptions cfHandleOpts = new ColumnFamilyOptions() .setMergeOperator(stringAppendOperator); final ColumnFamilyHandle cfHandle = db.createColumnFamily( new ColumnFamilyDescriptor("new_cf2".getBytes(), cfHandleOpts)) ) { // writing xx under cfkey2 db.put(cfHandle, "cfkey2".getBytes(), "xx".getBytes()); // merge yy under cfkey2 db.merge(cfHandle, new WriteOptions(), "cfkey2".getBytes(), "yy".getBytes()); value = db.get(cfHandle, "cfkey2".getBytes()); String strValueTmpCf = new String(value); assertThat(strValue).isEqualTo("aa,bb"); assertThat(strValueTmpCf).isEqualTo("xx,yy"); } } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test public void cFUInt64AddOperatorOption() throws InterruptedException, RocksDBException { try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() .setMergeOperator(uint64AddOperator); final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() .setMergeOperator(uint64AddOperator) ) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpt2) ); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions opt = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList) ) { try { // writing (long)100 under key db.put(columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(100)); // merge (long)1 under key db.merge(columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(1)); byte[] value = db.get(columnFamilyHandleList.get(1), "cfkey".getBytes()); long longValue = longFromByteArray(value); // Test also with createColumnFamily try (final ColumnFamilyOptions cfHandleOpts = new ColumnFamilyOptions() .setMergeOperator(uint64AddOperator); final ColumnFamilyHandle cfHandle = db.createColumnFamily( new ColumnFamilyDescriptor("new_cf2".getBytes(), cfHandleOpts)) ) { // writing (long)200 under cfkey2 db.put(cfHandle, "cfkey2".getBytes(), longToByteArray(200)); // merge (long)50 under cfkey2 db.merge(cfHandle, new WriteOptions(), "cfkey2".getBytes(), longToByteArray(50)); value = db.get(cfHandle, "cfkey2".getBytes()); long longValueTmpCf = longFromByteArray(value); assertThat(longValue).isEqualTo(101); assertThat(longValueTmpCf).isEqualTo(250); } } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test public void operatorGcBehaviour() throws RocksDBException { try (final StringAppendOperator stringAppendOperator = new StringAppendOperator()) { try (final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperator(stringAppendOperator); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } // test reuse try (final Options opt = new Options() .setMergeOperator(stringAppendOperator); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } // test param init try (final StringAppendOperator stringAppendOperator2 = new StringAppendOperator(); final Options opt = new Options() .setMergeOperator(stringAppendOperator2); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } // test replace one with another merge operator instance try (final Options opt = new Options() .setMergeOperator(stringAppendOperator); final StringAppendOperator newStringAppendOperator = new StringAppendOperator()) { opt.setMergeOperator(newStringAppendOperator); try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } } } } @Test public void uint64AddOperatorGcBehaviour() throws RocksDBException { try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator()) { try (final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperator(uint64AddOperator); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } // test reuse try (final Options opt = new Options() .setMergeOperator(uint64AddOperator); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } // test param init try (final UInt64AddOperator uint64AddOperator2 = new UInt64AddOperator(); final Options opt = new Options() .setMergeOperator(uint64AddOperator2); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } // test replace one with another merge operator instance try (final Options opt = new Options() .setMergeOperator(uint64AddOperator); final UInt64AddOperator newUInt64AddOperator = new UInt64AddOperator()) { opt.setMergeOperator(newUInt64AddOperator); try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { //no-op } } } } @Test public void emptyStringInSetMergeOperatorByName() { try (final Options opt = new Options() .setMergeOperatorName(""); final ColumnFamilyOptions cOpt = new ColumnFamilyOptions() .setMergeOperatorName("")) { //no-op } } @Test(expected = IllegalArgumentException.class) public void nullStringInSetMergeOperatorByNameOptions() { try (final Options opt = new Options()) { opt.setMergeOperatorName(null); } } @Test(expected = IllegalArgumentException.class) public void nullStringInSetMergeOperatorByNameColumnFamilyOptions() { try (final ColumnFamilyOptions opt = new ColumnFamilyOptions()) { opt.setMergeOperatorName(null); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/MixedOptionsTest.java000066400000000000000000000037771370372246700252360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class MixedOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void mixedOptionsTest(){ // Set a table factory and check the names try(final Filter bloomFilter = new BloomFilter(); final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions() .setTableFormatConfig( new BlockBasedTableConfig().setFilterPolicy(bloomFilter)) ) { assertThat(cfOptions.tableFactoryName()).isEqualTo( "BlockBasedTable"); cfOptions.setTableFormatConfig(new PlainTableConfig()); assertThat(cfOptions.tableFactoryName()).isEqualTo("PlainTable"); // Initialize a dbOptions object from cf options and // db options try (final DBOptions dbOptions = new DBOptions(); final Options options = new Options(dbOptions, cfOptions)) { assertThat(options.tableFactoryName()).isEqualTo("PlainTable"); // Free instances } } // Test Optimize for statements try(final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions()) { cfOptions.optimizeUniversalStyleCompaction(); cfOptions.optimizeLevelStyleCompaction(); cfOptions.optimizeForPointLookup(1024); try(final Options options = new Options()) { options.optimizeLevelStyleCompaction(); options.optimizeLevelStyleCompaction(400); options.optimizeUniversalStyleCompaction(); options.optimizeUniversalStyleCompaction(400); options.optimizeForPointLookup(1024); options.prepareForBulkLoad(); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java000066400000000000000000000060251370372246700300660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import org.rocksdb.MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder; import java.util.NoSuchElementException; import static org.assertj.core.api.Assertions.assertThat; public class MutableColumnFamilyOptionsTest { @Test public void builder() { final MutableColumnFamilyOptionsBuilder builder = MutableColumnFamilyOptions.builder(); builder .setWriteBufferSize(10) .setInplaceUpdateNumLocks(5) .setDisableAutoCompactions(true) .setParanoidFileChecks(true); assertThat(builder.writeBufferSize()).isEqualTo(10); assertThat(builder.inplaceUpdateNumLocks()).isEqualTo(5); assertThat(builder.disableAutoCompactions()).isEqualTo(true); assertThat(builder.paranoidFileChecks()).isEqualTo(true); } @Test(expected = NoSuchElementException.class) public void builder_getWhenNotSet() { final MutableColumnFamilyOptionsBuilder builder = MutableColumnFamilyOptions.builder(); builder.writeBufferSize(); } @Test public void builder_build() { final MutableColumnFamilyOptions options = MutableColumnFamilyOptions .builder() .setWriteBufferSize(10) .setParanoidFileChecks(true) .build(); assertThat(options.getKeys().length).isEqualTo(2); assertThat(options.getValues().length).isEqualTo(2); assertThat(options.getKeys()[0]) .isEqualTo( MutableColumnFamilyOptions.MemtableOption.write_buffer_size.name()); assertThat(options.getValues()[0]).isEqualTo("10"); assertThat(options.getKeys()[1]) .isEqualTo( MutableColumnFamilyOptions.MiscOption.paranoid_file_checks.name()); assertThat(options.getValues()[1]).isEqualTo("true"); } @Test public void mutableColumnFamilyOptions_toString() { final String str = MutableColumnFamilyOptions .builder() .setWriteBufferSize(10) .setInplaceUpdateNumLocks(5) .setDisableAutoCompactions(true) .setParanoidFileChecks(true) .build() .toString(); assertThat(str).isEqualTo("write_buffer_size=10;inplace_update_num_locks=5;" + "disable_auto_compactions=true;paranoid_file_checks=true"); } @Test public void mutableColumnFamilyOptions_parse() { final String str = "write_buffer_size=10;inplace_update_num_locks=5;" + "disable_auto_compactions=true;paranoid_file_checks=true"; final MutableColumnFamilyOptionsBuilder builder = MutableColumnFamilyOptions.parse(str); assertThat(builder.writeBufferSize()).isEqualTo(10); assertThat(builder.inplaceUpdateNumLocks()).isEqualTo(5); assertThat(builder.disableAutoCompactions()).isEqualTo(true); assertThat(builder.paranoidFileChecks()).isEqualTo(true); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/MutableDBOptionsTest.java000066400000000000000000000052621370372246700257560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import org.rocksdb.MutableDBOptions.MutableDBOptionsBuilder; import java.util.NoSuchElementException; import static org.assertj.core.api.Assertions.assertThat; public class MutableDBOptionsTest { @Test public void builder() { final MutableDBOptionsBuilder builder = MutableDBOptions.builder(); builder .setBytesPerSync(1024 * 1024 * 7) .setMaxBackgroundJobs(5) .setAvoidFlushDuringShutdown(false); assertThat(builder.bytesPerSync()).isEqualTo(1024 * 1024 * 7); assertThat(builder.maxBackgroundJobs()).isEqualTo(5); assertThat(builder.avoidFlushDuringShutdown()).isEqualTo(false); } @Test(expected = NoSuchElementException.class) public void builder_getWhenNotSet() { final MutableDBOptionsBuilder builder = MutableDBOptions.builder(); builder.bytesPerSync(); } @Test public void builder_build() { final MutableDBOptions options = MutableDBOptions .builder() .setBytesPerSync(1024 * 1024 * 7) .setMaxBackgroundJobs(5) .build(); assertThat(options.getKeys().length).isEqualTo(2); assertThat(options.getValues().length).isEqualTo(2); assertThat(options.getKeys()[0]) .isEqualTo( MutableDBOptions.DBOption.bytes_per_sync.name()); assertThat(options.getValues()[0]).isEqualTo("7340032"); assertThat(options.getKeys()[1]) .isEqualTo( MutableDBOptions.DBOption.max_background_jobs.name()); assertThat(options.getValues()[1]).isEqualTo("5"); } @Test public void mutableDBOptions_toString() { final String str = MutableDBOptions .builder() .setMaxOpenFiles(99) .setDelayedWriteRate(789) .setAvoidFlushDuringShutdown(true) .setStrictBytesPerSync(true) .build() .toString(); assertThat(str).isEqualTo("max_open_files=99;delayed_write_rate=789;" + "avoid_flush_during_shutdown=true;strict_bytes_per_sync=true"); } @Test public void mutableDBOptions_parse() { final String str = "max_open_files=99;delayed_write_rate=789;" + "avoid_flush_during_shutdown=true"; final MutableDBOptionsBuilder builder = MutableDBOptions.parse(str); assertThat(builder.maxOpenFiles()).isEqualTo(99); assertThat(builder.delayedWriteRate()).isEqualTo(789); assertThat(builder.avoidFlushDuringShutdown()).isEqualTo(true); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java000066400000000000000000000054241370372246700274220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.*; import java.util.Comparator; import static org.junit.Assert.assertEquals; public class NativeComparatorWrapperTest { @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); private static final Random random = new Random(); @Test public void rountrip() throws RocksDBException { final String dbPath = dbFolder.getRoot().getAbsolutePath(); final int ITERATIONS = 1_000; final String[] storedKeys = new String[ITERATIONS]; try (final NativeStringComparatorWrapper comparator = new NativeStringComparatorWrapper(); final Options opt = new Options() .setCreateIfMissing(true) .setComparator(comparator)) { // store random integer keys try (final RocksDB db = RocksDB.open(opt, dbPath)) { for (int i = 0; i < ITERATIONS; i++) { final String strKey = randomString(); final byte key[] = strKey.getBytes(); // does key already exist (avoid duplicates) if (i > 0 && db.get(key) != null) { i--; // generate a different key } else { db.put(key, "value".getBytes()); storedKeys[i] = strKey; } } } // sort the stored keys into ascending alpha-numeric order Arrays.sort(storedKeys, new Comparator() { @Override public int compare(final String o1, final String o2) { return o1.compareTo(o2); } }); // re-open db and read from start to end // string keys should be in ascending // order try (final RocksDB db = RocksDB.open(opt, dbPath); final RocksIterator it = db.newIterator()) { int count = 0; for (it.seekToFirst(); it.isValid(); it.next()) { final String strKey = new String(it.key()); assertEquals(storedKeys[count++], strKey); } } } } private String randomString() { final char[] chars = new char[12]; for(int i = 0; i < 12; i++) { final int letterCode = random.nextInt(24); final char letter = (char) (((int) 'a') + letterCode); chars[i] = letter; } return String.copyValueOf(chars); } public static class NativeStringComparatorWrapper extends NativeComparatorWrapper { @Override protected long initializeNative(final long... nativeParameterHandles) { return newStringComparator(); } private native long newStringComparator(); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java000066400000000000000000000026221370372246700265020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.rocksdb.util.Environment; import java.io.File; import java.io.IOException; import java.nio.file.*; import static org.assertj.core.api.Assertions.assertThat; public class NativeLibraryLoaderTest { @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); @Test public void tempFolder() throws IOException { NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( temporaryFolder.getRoot().getAbsolutePath()); final Path path = Paths.get(temporaryFolder.getRoot().getAbsolutePath(), Environment.getJniLibraryFileName("rocksdb")); assertThat(Files.exists(path)).isTrue(); assertThat(Files.isReadable(path)).isTrue(); } @Test public void overridesExistingLibrary() throws IOException { File first = NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( temporaryFolder.getRoot().getAbsolutePath()); NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( temporaryFolder.getRoot().getAbsolutePath()); assertThat(first.exists()).isTrue(); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java000066400000000000000000000113071370372246700273400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; public class OptimisticTransactionDBTest { @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void open() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final OptimisticTransactionDB otdb = OptimisticTransactionDB.open(options, dbFolder.getRoot().getAbsolutePath())) { assertThat(otdb).isNotNull(); } } @Test public void open_columnFamilies() throws RocksDBException { try(final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final ColumnFamilyOptions myCfOpts = new ColumnFamilyOptions()) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("myCf".getBytes(), myCfOpts)); final List columnFamilyHandles = new ArrayList<>(); try (final OptimisticTransactionDB otdb = OptimisticTransactionDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { assertThat(otdb).isNotNull(); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void beginTransaction() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( options, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions()) { try(final Transaction txn = otdb.beginTransaction(writeOptions)) { assertThat(txn).isNotNull(); } } } @Test public void beginTransaction_transactionOptions() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( options, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions(); final OptimisticTransactionOptions optimisticTxnOptions = new OptimisticTransactionOptions()) { try(final Transaction txn = otdb.beginTransaction(writeOptions, optimisticTxnOptions)) { assertThat(txn).isNotNull(); } } } @Test public void beginTransaction_withOld() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( options, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions()) { try(final Transaction txn = otdb.beginTransaction(writeOptions)) { final Transaction txnReused = otdb.beginTransaction(writeOptions, txn); assertThat(txnReused).isSameAs(txn); } } } @Test public void beginTransaction_withOld_transactionOptions() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final OptimisticTransactionDB otdb = OptimisticTransactionDB.open( options, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions(); final OptimisticTransactionOptions optimisticTxnOptions = new OptimisticTransactionOptions()) { try(final Transaction txn = otdb.beginTransaction(writeOptions)) { final Transaction txnReused = otdb.beginTransaction(writeOptions, optimisticTxnOptions, txn); assertThat(txnReused).isSameAs(txn); } } } @Test public void baseDB() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final OptimisticTransactionDB otdb = OptimisticTransactionDB.open(options, dbFolder.getRoot().getAbsolutePath())) { assertThat(otdb).isNotNull(); final RocksDB db = otdb.getBaseDB(); assertThat(db).isNotNull(); assertThat(db.isOwningHandle()).isFalse(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java000066400000000000000000000023151370372246700305050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import org.rocksdb.util.BytewiseComparator; import java.util.Random; import static org.assertj.core.api.Assertions.assertThat; public class OptimisticTransactionOptionsTest { private static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void setSnapshot() { try (final OptimisticTransactionOptions opt = new OptimisticTransactionOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setSetSnapshot(boolValue); assertThat(opt.isSetSnapshot()).isEqualTo(boolValue); } } @Test public void comparator() { try (final OptimisticTransactionOptions opt = new OptimisticTransactionOptions(); final ComparatorOptions copt = new ComparatorOptions() .setUseDirectBuffer(true); final AbstractComparator comparator = new BytewiseComparator(copt)) { opt.setComparator(comparator); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java000066400000000000000000000276111370372246700271370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; public class OptimisticTransactionTest extends AbstractTransactionTest { @Test public void getForUpdate_cf_conflict() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte v12[] = "value12".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(testCf, k1, v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); // NOTE: txn2 updates k1, during txn3 txn2.put(testCf, k1, v12); assertThat(txn2.get(testCf, readOptions, k1)).isEqualTo(v12); txn2.commit(); try { txn3.commit(); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void getForUpdate_conflict() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte v12[] = "value12".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.getForUpdate(readOptions, k1, true)).isEqualTo(v1); // NOTE: txn2 updates k1, during txn3 txn2.put(k1, v12); assertThat(txn2.get(readOptions, k1)).isEqualTo(v12); txn2.commit(); try { txn3.commit(); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void multiGetForUpdate_cf_conflict() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; final byte[] otherValue = "otherValue".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); final List cfList = Arrays.asList(testCf, testCf); try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(testCf, keys[0], values[0]); txn.put(testCf, keys[1], values[1]); assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.multiGetForUpdate(readOptions, cfList, keys)) .isEqualTo(values); // NOTE: txn2 updates k1, during txn3 txn2.put(testCf, keys[0], otherValue); assertThat(txn2.get(testCf, readOptions, keys[0])) .isEqualTo(otherValue); txn2.commit(); try { txn3.commit(); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void multiGetForUpdate_conflict() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; final byte[] otherValue = "otherValue".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(keys[0], values[0]); txn.put(keys[1], values[1]); assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.multiGetForUpdate(readOptions, keys)) .isEqualTo(values); // NOTE: txn2 updates k1, during txn3 txn2.put(keys[0], otherValue); assertThat(txn2.get(readOptions, keys[0])) .isEqualTo(otherValue); txn2.commit(); try { txn3.commit(); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.Busy); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void undoGetForUpdate_cf_conflict() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte v12[] = "value12".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(testCf, k1, v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); // undo the getForUpdate txn3.undoGetForUpdate(testCf, k1); // NOTE: txn2 updates k1, during txn3 txn2.put(testCf, k1, v12); assertThat(txn2.get(testCf, readOptions, k1)).isEqualTo(v12); txn2.commit(); // should not cause an exception // because we undid the getForUpdate above! txn3.commit(); } } } } @Test public void undoGetForUpdate_conflict() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte v12[] = "value12".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); assertThat(txn.get(readOptions, k1)).isEqualTo(v1); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.getForUpdate(readOptions, k1, true)).isEqualTo(v1); // undo the getForUpdate txn3.undoGetForUpdate(k1); // NOTE: txn2 updates k1, during txn3 txn2.put(k1, v12); assertThat(txn2.get(readOptions, k1)).isEqualTo(v12); txn2.commit(); // should not cause an exception // because we undid the getForUpdate above! txn3.commit(); } } } } @Test public void name() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getName()).isEmpty(); final String name = "my-transaction-" + rand.nextLong(); try { txn.setName(name); } catch(final RocksDBException e) { assertThat(e.getStatus().getCode() == Status.Code.InvalidArgument); return; } fail("Optimistic transactions cannot be named."); } } @Override public OptimisticTransactionDBContainer startDb() throws RocksDBException { final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, columnFamilyOptions)); final List columnFamilyHandles = new ArrayList<>(); final OptimisticTransactionDB optimisticTxnDb; try { optimisticTxnDb = OptimisticTransactionDB.open( options, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles); } catch(final RocksDBException e) { columnFamilyOptions.close(); options.close(); throw e; } final WriteOptions writeOptions = new WriteOptions(); final OptimisticTransactionOptions optimisticTxnOptions = new OptimisticTransactionOptions(); return new OptimisticTransactionDBContainer(optimisticTxnOptions, writeOptions, columnFamilyHandles, optimisticTxnDb, columnFamilyOptions, options); } private static class OptimisticTransactionDBContainer extends DBContainer { private final OptimisticTransactionOptions optimisticTxnOptions; private final OptimisticTransactionDB optimisticTxnDb; public OptimisticTransactionDBContainer( final OptimisticTransactionOptions optimisticTxnOptions, final WriteOptions writeOptions, final List columnFamilyHandles, final OptimisticTransactionDB optimisticTxnDb, final ColumnFamilyOptions columnFamilyOptions, final DBOptions options) { super(writeOptions, columnFamilyHandles, columnFamilyOptions, options); this.optimisticTxnOptions = optimisticTxnOptions; this.optimisticTxnDb = optimisticTxnDb; } @Override public Transaction beginTransaction() { return optimisticTxnDb.beginTransaction(writeOptions, optimisticTxnOptions); } @Override public Transaction beginTransaction(final WriteOptions writeOptions) { return optimisticTxnDb.beginTransaction(writeOptions, optimisticTxnOptions); } @Override public void close() { optimisticTxnOptions.close(); writeOptions.close(); for(final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { columnFamilyHandle.close(); } optimisticTxnDb.close(); options.close(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/OptionsTest.java000066400000000000000000001153301370372246700242340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.nio.file.Paths; import java.util.*; import org.junit.ClassRule; import org.junit.Test; import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; import static org.assertj.core.api.Assertions.assertThat; public class OptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); public static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void copyConstructor() { Options origOpts = new Options(); origOpts.setNumLevels(rand.nextInt(8)); origOpts.setTargetFileSizeMultiplier(rand.nextInt(100)); origOpts.setLevel0StopWritesTrigger(rand.nextInt(50)); Options copyOpts = new Options(origOpts); assertThat(origOpts.numLevels()).isEqualTo(copyOpts.numLevels()); assertThat(origOpts.targetFileSizeMultiplier()).isEqualTo(copyOpts.targetFileSizeMultiplier()); assertThat(origOpts.level0StopWritesTrigger()).isEqualTo(copyOpts.level0StopWritesTrigger()); } @Test public void setIncreaseParallelism() { try (final Options opt = new Options()) { final int threads = Runtime.getRuntime().availableProcessors() * 2; opt.setIncreaseParallelism(threads); } } @Test public void writeBufferSize() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setWriteBufferSize(longValue); assertThat(opt.writeBufferSize()).isEqualTo(longValue); } } @Test public void maxWriteBufferNumber() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMaxWriteBufferNumber(intValue); assertThat(opt.maxWriteBufferNumber()).isEqualTo(intValue); } } @Test public void minWriteBufferNumberToMerge() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMinWriteBufferNumberToMerge(intValue); assertThat(opt.minWriteBufferNumberToMerge()).isEqualTo(intValue); } } @Test public void numLevels() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setNumLevels(intValue); assertThat(opt.numLevels()).isEqualTo(intValue); } } @Test public void levelZeroFileNumCompactionTrigger() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setLevelZeroFileNumCompactionTrigger(intValue); assertThat(opt.levelZeroFileNumCompactionTrigger()).isEqualTo(intValue); } } @Test public void levelZeroSlowdownWritesTrigger() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setLevelZeroSlowdownWritesTrigger(intValue); assertThat(opt.levelZeroSlowdownWritesTrigger()).isEqualTo(intValue); } } @Test public void levelZeroStopWritesTrigger() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setLevelZeroStopWritesTrigger(intValue); assertThat(opt.levelZeroStopWritesTrigger()).isEqualTo(intValue); } } @Test public void targetFileSizeBase() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setTargetFileSizeBase(longValue); assertThat(opt.targetFileSizeBase()).isEqualTo(longValue); } } @Test public void targetFileSizeMultiplier() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setTargetFileSizeMultiplier(intValue); assertThat(opt.targetFileSizeMultiplier()).isEqualTo(intValue); } } @Test public void maxBytesForLevelBase() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMaxBytesForLevelBase(longValue); assertThat(opt.maxBytesForLevelBase()).isEqualTo(longValue); } } @Test public void levelCompactionDynamicLevelBytes() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setLevelCompactionDynamicLevelBytes(boolValue); assertThat(opt.levelCompactionDynamicLevelBytes()) .isEqualTo(boolValue); } } @Test public void maxBytesForLevelMultiplier() { try (final Options opt = new Options()) { final double doubleValue = rand.nextDouble(); opt.setMaxBytesForLevelMultiplier(doubleValue); assertThat(opt.maxBytesForLevelMultiplier()).isEqualTo(doubleValue); } } @Test public void maxBytesForLevelMultiplierAdditional() { try (final Options opt = new Options()) { final int intValue1 = rand.nextInt(); final int intValue2 = rand.nextInt(); final int[] ints = new int[]{intValue1, intValue2}; opt.setMaxBytesForLevelMultiplierAdditional(ints); assertThat(opt.maxBytesForLevelMultiplierAdditional()).isEqualTo(ints); } } @Test public void maxCompactionBytes() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMaxCompactionBytes(longValue); assertThat(opt.maxCompactionBytes()).isEqualTo(longValue); } } @Test public void softPendingCompactionBytesLimit() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setSoftPendingCompactionBytesLimit(longValue); assertThat(opt.softPendingCompactionBytesLimit()).isEqualTo(longValue); } } @Test public void hardPendingCompactionBytesLimit() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setHardPendingCompactionBytesLimit(longValue); assertThat(opt.hardPendingCompactionBytesLimit()).isEqualTo(longValue); } } @Test public void level0FileNumCompactionTrigger() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setLevel0FileNumCompactionTrigger(intValue); assertThat(opt.level0FileNumCompactionTrigger()).isEqualTo(intValue); } } @Test public void level0SlowdownWritesTrigger() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setLevel0SlowdownWritesTrigger(intValue); assertThat(opt.level0SlowdownWritesTrigger()).isEqualTo(intValue); } } @Test public void level0StopWritesTrigger() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setLevel0StopWritesTrigger(intValue); assertThat(opt.level0StopWritesTrigger()).isEqualTo(intValue); } } @Test public void arenaBlockSize() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setArenaBlockSize(longValue); assertThat(opt.arenaBlockSize()).isEqualTo(longValue); } } @Test public void disableAutoCompactions() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setDisableAutoCompactions(boolValue); assertThat(opt.disableAutoCompactions()).isEqualTo(boolValue); } } @Test public void maxSequentialSkipInIterations() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMaxSequentialSkipInIterations(longValue); assertThat(opt.maxSequentialSkipInIterations()).isEqualTo(longValue); } } @Test public void inplaceUpdateSupport() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setInplaceUpdateSupport(boolValue); assertThat(opt.inplaceUpdateSupport()).isEqualTo(boolValue); } } @Test public void inplaceUpdateNumLocks() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setInplaceUpdateNumLocks(longValue); assertThat(opt.inplaceUpdateNumLocks()).isEqualTo(longValue); } } @Test public void memtablePrefixBloomSizeRatio() { try (final Options opt = new Options()) { final double doubleValue = rand.nextDouble(); opt.setMemtablePrefixBloomSizeRatio(doubleValue); assertThat(opt.memtablePrefixBloomSizeRatio()).isEqualTo(doubleValue); } } @Test public void memtableHugePageSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMemtableHugePageSize(longValue); assertThat(opt.memtableHugePageSize()).isEqualTo(longValue); } } @Test public void bloomLocality() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setBloomLocality(intValue); assertThat(opt.bloomLocality()).isEqualTo(intValue); } } @Test public void maxSuccessiveMerges() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMaxSuccessiveMerges(longValue); assertThat(opt.maxSuccessiveMerges()).isEqualTo(longValue); } } @Test public void optimizeFiltersForHits() { try (final Options opt = new Options()) { final boolean aBoolean = rand.nextBoolean(); opt.setOptimizeFiltersForHits(aBoolean); assertThat(opt.optimizeFiltersForHits()).isEqualTo(aBoolean); } } @Test public void createIfMissing() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setCreateIfMissing(boolValue); assertThat(opt.createIfMissing()). isEqualTo(boolValue); } } @Test public void createMissingColumnFamilies() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setCreateMissingColumnFamilies(boolValue); assertThat(opt.createMissingColumnFamilies()). isEqualTo(boolValue); } } @Test public void errorIfExists() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setErrorIfExists(boolValue); assertThat(opt.errorIfExists()).isEqualTo(boolValue); } } @Test public void paranoidChecks() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setParanoidChecks(boolValue); assertThat(opt.paranoidChecks()). isEqualTo(boolValue); } } @Test public void maxTotalWalSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMaxTotalWalSize(longValue); assertThat(opt.maxTotalWalSize()). isEqualTo(longValue); } } @Test public void maxOpenFiles() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMaxOpenFiles(intValue); assertThat(opt.maxOpenFiles()).isEqualTo(intValue); } } @Test public void maxFileOpeningThreads() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMaxFileOpeningThreads(intValue); assertThat(opt.maxFileOpeningThreads()).isEqualTo(intValue); } } @Test public void useFsync() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setUseFsync(boolValue); assertThat(opt.useFsync()).isEqualTo(boolValue); } } @Test public void dbPaths() { final List dbPaths = new ArrayList<>(); dbPaths.add(new DbPath(Paths.get("/a"), 10)); dbPaths.add(new DbPath(Paths.get("/b"), 100)); dbPaths.add(new DbPath(Paths.get("/c"), 1000)); try (final Options opt = new Options()) { assertThat(opt.dbPaths()).isEqualTo(Collections.emptyList()); opt.setDbPaths(dbPaths); assertThat(opt.dbPaths()).isEqualTo(dbPaths); } } @Test public void dbLogDir() { try (final Options opt = new Options()) { final String str = "path/to/DbLogDir"; opt.setDbLogDir(str); assertThat(opt.dbLogDir()).isEqualTo(str); } } @Test public void walDir() { try (final Options opt = new Options()) { final String str = "path/to/WalDir"; opt.setWalDir(str); assertThat(opt.walDir()).isEqualTo(str); } } @Test public void deleteObsoleteFilesPeriodMicros() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setDeleteObsoleteFilesPeriodMicros(longValue); assertThat(opt.deleteObsoleteFilesPeriodMicros()). isEqualTo(longValue); } } @SuppressWarnings("deprecated") @Test public void baseBackgroundCompactions() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setBaseBackgroundCompactions(intValue); assertThat(opt.baseBackgroundCompactions()). isEqualTo(intValue); } } @SuppressWarnings("deprecated") @Test public void maxBackgroundCompactions() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMaxBackgroundCompactions(intValue); assertThat(opt.maxBackgroundCompactions()). isEqualTo(intValue); } } @Test public void maxSubcompactions() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMaxSubcompactions(intValue); assertThat(opt.maxSubcompactions()). isEqualTo(intValue); } } @SuppressWarnings("deprecated") @Test public void maxBackgroundFlushes() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMaxBackgroundFlushes(intValue); assertThat(opt.maxBackgroundFlushes()). isEqualTo(intValue); } } @Test public void maxBackgroundJobs() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setMaxBackgroundJobs(intValue); assertThat(opt.maxBackgroundJobs()).isEqualTo(intValue); } } @Test public void maxLogFileSize() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMaxLogFileSize(longValue); assertThat(opt.maxLogFileSize()).isEqualTo(longValue); } } @Test public void logFileTimeToRoll() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setLogFileTimeToRoll(longValue); assertThat(opt.logFileTimeToRoll()). isEqualTo(longValue); } } @Test public void keepLogFileNum() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setKeepLogFileNum(longValue); assertThat(opt.keepLogFileNum()).isEqualTo(longValue); } } @Test public void recycleLogFileNum() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setRecycleLogFileNum(longValue); assertThat(opt.recycleLogFileNum()).isEqualTo(longValue); } } @Test public void maxManifestFileSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setMaxManifestFileSize(longValue); assertThat(opt.maxManifestFileSize()). isEqualTo(longValue); } } @Test public void tableCacheNumshardbits() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setTableCacheNumshardbits(intValue); assertThat(opt.tableCacheNumshardbits()). isEqualTo(intValue); } } @Test public void walSizeLimitMB() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setWalSizeLimitMB(longValue); assertThat(opt.walSizeLimitMB()).isEqualTo(longValue); } } @Test public void walTtlSeconds() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setWalTtlSeconds(longValue); assertThat(opt.walTtlSeconds()).isEqualTo(longValue); } } @Test public void manifestPreallocationSize() throws RocksDBException { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setManifestPreallocationSize(longValue); assertThat(opt.manifestPreallocationSize()). isEqualTo(longValue); } } @Test public void useDirectReads() { try(final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setUseDirectReads(boolValue); assertThat(opt.useDirectReads()).isEqualTo(boolValue); } } @Test public void useDirectIoForFlushAndCompaction() { try(final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setUseDirectIoForFlushAndCompaction(boolValue); assertThat(opt.useDirectIoForFlushAndCompaction()).isEqualTo(boolValue); } } @Test public void allowFAllocate() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowFAllocate(boolValue); assertThat(opt.allowFAllocate()).isEqualTo(boolValue); } } @Test public void allowMmapReads() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowMmapReads(boolValue); assertThat(opt.allowMmapReads()).isEqualTo(boolValue); } } @Test public void allowMmapWrites() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowMmapWrites(boolValue); assertThat(opt.allowMmapWrites()).isEqualTo(boolValue); } } @Test public void isFdCloseOnExec() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setIsFdCloseOnExec(boolValue); assertThat(opt.isFdCloseOnExec()).isEqualTo(boolValue); } } @Test public void statsDumpPeriodSec() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setStatsDumpPeriodSec(intValue); assertThat(opt.statsDumpPeriodSec()).isEqualTo(intValue); } } @Test public void statsPersistPeriodSec() { try (final Options opt = new Options()) { final int intValue = rand.nextInt(); opt.setStatsPersistPeriodSec(intValue); assertThat(opt.statsPersistPeriodSec()).isEqualTo(intValue); } } @Test public void statsHistoryBufferSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setStatsHistoryBufferSize(longValue); assertThat(opt.statsHistoryBufferSize()).isEqualTo(longValue); } } @Test public void adviseRandomOnOpen() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAdviseRandomOnOpen(boolValue); assertThat(opt.adviseRandomOnOpen()).isEqualTo(boolValue); } } @Test public void dbWriteBufferSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setDbWriteBufferSize(longValue); assertThat(opt.dbWriteBufferSize()).isEqualTo(longValue); } } @Test public void setWriteBufferManager() throws RocksDBException { try (final Options opt = new Options(); final Cache cache = new LRUCache(1 * 1024 * 1024); final WriteBufferManager writeBufferManager = new WriteBufferManager(2000l, cache)) { opt.setWriteBufferManager(writeBufferManager); assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); } } @Test public void setWriteBufferManagerWithZeroBufferSize() throws RocksDBException { try (final Options opt = new Options(); final Cache cache = new LRUCache(1 * 1024 * 1024); final WriteBufferManager writeBufferManager = new WriteBufferManager(0l, cache)) { opt.setWriteBufferManager(writeBufferManager); assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); } } @Test public void accessHintOnCompactionStart() { try (final Options opt = new Options()) { final AccessHint accessHint = AccessHint.SEQUENTIAL; opt.setAccessHintOnCompactionStart(accessHint); assertThat(opt.accessHintOnCompactionStart()).isEqualTo(accessHint); } } @Test public void newTableReaderForCompactionInputs() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setNewTableReaderForCompactionInputs(boolValue); assertThat(opt.newTableReaderForCompactionInputs()).isEqualTo(boolValue); } } @Test public void compactionReadaheadSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setCompactionReadaheadSize(longValue); assertThat(opt.compactionReadaheadSize()).isEqualTo(longValue); } } @Test public void randomAccessMaxBufferSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setRandomAccessMaxBufferSize(longValue); assertThat(opt.randomAccessMaxBufferSize()).isEqualTo(longValue); } } @Test public void writableFileMaxBufferSize() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setWritableFileMaxBufferSize(longValue); assertThat(opt.writableFileMaxBufferSize()).isEqualTo(longValue); } } @Test public void useAdaptiveMutex() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setUseAdaptiveMutex(boolValue); assertThat(opt.useAdaptiveMutex()).isEqualTo(boolValue); } } @Test public void bytesPerSync() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setBytesPerSync(longValue); assertThat(opt.bytesPerSync()).isEqualTo(longValue); } } @Test public void walBytesPerSync() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setWalBytesPerSync(longValue); assertThat(opt.walBytesPerSync()).isEqualTo(longValue); } } @Test public void strictBytesPerSync() { try (final Options opt = new Options()) { assertThat(opt.strictBytesPerSync()).isFalse(); opt.setStrictBytesPerSync(true); assertThat(opt.strictBytesPerSync()).isTrue(); } } @Test public void enableThreadTracking() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setEnableThreadTracking(boolValue); assertThat(opt.enableThreadTracking()).isEqualTo(boolValue); } } @Test public void delayedWriteRate() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setDelayedWriteRate(longValue); assertThat(opt.delayedWriteRate()).isEqualTo(longValue); } } @Test public void enablePipelinedWrite() { try(final Options opt = new Options()) { assertThat(opt.enablePipelinedWrite()).isFalse(); opt.setEnablePipelinedWrite(true); assertThat(opt.enablePipelinedWrite()).isTrue(); } } @Test public void unordredWrite() { try(final Options opt = new Options()) { assertThat(opt.unorderedWrite()).isFalse(); opt.setUnorderedWrite(true); assertThat(opt.unorderedWrite()).isTrue(); } } @Test public void allowConcurrentMemtableWrite() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAllowConcurrentMemtableWrite(boolValue); assertThat(opt.allowConcurrentMemtableWrite()).isEqualTo(boolValue); } } @Test public void enableWriteThreadAdaptiveYield() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setEnableWriteThreadAdaptiveYield(boolValue); assertThat(opt.enableWriteThreadAdaptiveYield()).isEqualTo(boolValue); } } @Test public void writeThreadMaxYieldUsec() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setWriteThreadMaxYieldUsec(longValue); assertThat(opt.writeThreadMaxYieldUsec()).isEqualTo(longValue); } } @Test public void writeThreadSlowYieldUsec() { try (final Options opt = new Options()) { final long longValue = rand.nextLong(); opt.setWriteThreadSlowYieldUsec(longValue); assertThat(opt.writeThreadSlowYieldUsec()).isEqualTo(longValue); } } @Test public void skipStatsUpdateOnDbOpen() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setSkipStatsUpdateOnDbOpen(boolValue); assertThat(opt.skipStatsUpdateOnDbOpen()).isEqualTo(boolValue); } } @Test public void walRecoveryMode() { try (final Options opt = new Options()) { for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { opt.setWalRecoveryMode(walRecoveryMode); assertThat(opt.walRecoveryMode()).isEqualTo(walRecoveryMode); } } } @Test public void allow2pc() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAllow2pc(boolValue); assertThat(opt.allow2pc()).isEqualTo(boolValue); } } @Test public void rowCache() { try (final Options opt = new Options()) { assertThat(opt.rowCache()).isNull(); try(final Cache lruCache = new LRUCache(1000)) { opt.setRowCache(lruCache); assertThat(opt.rowCache()).isEqualTo(lruCache); } try(final Cache clockCache = new ClockCache(1000)) { opt.setRowCache(clockCache); assertThat(opt.rowCache()).isEqualTo(clockCache); } } } @Test public void walFilter() { try (final Options opt = new Options()) { assertThat(opt.walFilter()).isNull(); try (final AbstractWalFilter walFilter = new AbstractWalFilter() { @Override public void columnFamilyLogNumberMap( final Map cfLognumber, final Map cfNameId) { // no-op } @Override public LogRecordFoundResult logRecordFound(final long logNumber, final String logFileName, final WriteBatch batch, final WriteBatch newBatch) { return new LogRecordFoundResult( WalProcessingOption.CONTINUE_PROCESSING, false); } @Override public String name() { return "test-wal-filter"; } }) { opt.setWalFilter(walFilter); assertThat(opt.walFilter()).isEqualTo(walFilter); } } } @Test public void failIfOptionsFileError() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setFailIfOptionsFileError(boolValue); assertThat(opt.failIfOptionsFileError()).isEqualTo(boolValue); } } @Test public void dumpMallocStats() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setDumpMallocStats(boolValue); assertThat(opt.dumpMallocStats()).isEqualTo(boolValue); } } @Test public void avoidFlushDuringRecovery() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAvoidFlushDuringRecovery(boolValue); assertThat(opt.avoidFlushDuringRecovery()).isEqualTo(boolValue); } } @Test public void avoidFlushDuringShutdown() { try (final Options opt = new Options()) { final boolean boolValue = rand.nextBoolean(); opt.setAvoidFlushDuringShutdown(boolValue); assertThat(opt.avoidFlushDuringShutdown()).isEqualTo(boolValue); } } @Test public void allowIngestBehind() { try (final Options opt = new Options()) { assertThat(opt.allowIngestBehind()).isFalse(); opt.setAllowIngestBehind(true); assertThat(opt.allowIngestBehind()).isTrue(); } } @Test public void preserveDeletes() { try (final Options opt = new Options()) { assertThat(opt.preserveDeletes()).isFalse(); opt.setPreserveDeletes(true); assertThat(opt.preserveDeletes()).isTrue(); } } @Test public void twoWriteQueues() { try (final Options opt = new Options()) { assertThat(opt.twoWriteQueues()).isFalse(); opt.setTwoWriteQueues(true); assertThat(opt.twoWriteQueues()).isTrue(); } } @Test public void manualWalFlush() { try (final Options opt = new Options()) { assertThat(opt.manualWalFlush()).isFalse(); opt.setManualWalFlush(true); assertThat(opt.manualWalFlush()).isTrue(); } } @Test public void atomicFlush() { try (final Options opt = new Options()) { assertThat(opt.atomicFlush()).isFalse(); opt.setAtomicFlush(true); assertThat(opt.atomicFlush()).isTrue(); } } @Test public void env() { try (final Options options = new Options(); final Env env = Env.getDefault()) { options.setEnv(env); assertThat(options.getEnv()).isSameAs(env); } } @Test public void linkageOfPrepMethods() { try (final Options options = new Options()) { options.optimizeUniversalStyleCompaction(); options.optimizeUniversalStyleCompaction(4000); options.optimizeLevelStyleCompaction(); options.optimizeLevelStyleCompaction(3000); options.optimizeForPointLookup(10); options.optimizeForSmallDb(); options.prepareForBulkLoad(); } } @Test public void compressionTypes() { try (final Options options = new Options()) { for (final CompressionType compressionType : CompressionType.values()) { options.setCompressionType(compressionType); assertThat(options.compressionType()). isEqualTo(compressionType); assertThat(CompressionType.valueOf("NO_COMPRESSION")). isEqualTo(CompressionType.NO_COMPRESSION); } } } @Test public void compressionPerLevel() { try (final Options options = new Options()) { assertThat(options.compressionPerLevel()).isEmpty(); List compressionTypeList = new ArrayList<>(); for (int i = 0; i < options.numLevels(); i++) { compressionTypeList.add(CompressionType.NO_COMPRESSION); } options.setCompressionPerLevel(compressionTypeList); compressionTypeList = options.compressionPerLevel(); for (final CompressionType compressionType : compressionTypeList) { assertThat(compressionType).isEqualTo( CompressionType.NO_COMPRESSION); } } } @Test public void differentCompressionsPerLevel() { try (final Options options = new Options()) { options.setNumLevels(3); assertThat(options.compressionPerLevel()).isEmpty(); List compressionTypeList = new ArrayList<>(); compressionTypeList.add(CompressionType.BZLIB2_COMPRESSION); compressionTypeList.add(CompressionType.SNAPPY_COMPRESSION); compressionTypeList.add(CompressionType.LZ4_COMPRESSION); options.setCompressionPerLevel(compressionTypeList); compressionTypeList = options.compressionPerLevel(); assertThat(compressionTypeList.size()).isEqualTo(3); assertThat(compressionTypeList). containsExactly( CompressionType.BZLIB2_COMPRESSION, CompressionType.SNAPPY_COMPRESSION, CompressionType.LZ4_COMPRESSION); } } @Test public void bottommostCompressionType() { try (final Options options = new Options()) { assertThat(options.bottommostCompressionType()) .isEqualTo(CompressionType.DISABLE_COMPRESSION_OPTION); for (final CompressionType compressionType : CompressionType.values()) { options.setBottommostCompressionType(compressionType); assertThat(options.bottommostCompressionType()) .isEqualTo(compressionType); } } } @Test public void bottommostCompressionOptions() { try (final Options options = new Options(); final CompressionOptions bottommostCompressionOptions = new CompressionOptions() .setMaxDictBytes(123)) { options.setBottommostCompressionOptions(bottommostCompressionOptions); assertThat(options.bottommostCompressionOptions()) .isEqualTo(bottommostCompressionOptions); assertThat(options.bottommostCompressionOptions().maxDictBytes()) .isEqualTo(123); } } @Test public void compressionOptions() { try (final Options options = new Options(); final CompressionOptions compressionOptions = new CompressionOptions() .setMaxDictBytes(123)) { options.setCompressionOptions(compressionOptions); assertThat(options.compressionOptions()) .isEqualTo(compressionOptions); assertThat(options.compressionOptions().maxDictBytes()) .isEqualTo(123); } } @Test public void compactionStyles() { try (final Options options = new Options()) { for (final CompactionStyle compactionStyle : CompactionStyle.values()) { options.setCompactionStyle(compactionStyle); assertThat(options.compactionStyle()). isEqualTo(compactionStyle); assertThat(CompactionStyle.valueOf("FIFO")). isEqualTo(CompactionStyle.FIFO); } } } @Test public void maxTableFilesSizeFIFO() { try (final Options opt = new Options()) { long longValue = rand.nextLong(); // Size has to be positive longValue = (longValue < 0) ? -longValue : longValue; longValue = (longValue == 0) ? longValue + 1 : longValue; opt.setMaxTableFilesSizeFIFO(longValue); assertThat(opt.maxTableFilesSizeFIFO()). isEqualTo(longValue); } } @Test public void rateLimiter() { try (final Options options = new Options(); final Options anotherOptions = new Options(); final RateLimiter rateLimiter = new RateLimiter(1000, 100 * 1000, 1)) { options.setRateLimiter(rateLimiter); // Test with parameter initialization anotherOptions.setRateLimiter( new RateLimiter(1000)); } } @Test public void sstFileManager() throws RocksDBException { try (final Options options = new Options(); final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { options.setSstFileManager(sstFileManager); } } @Test public void shouldSetTestPrefixExtractor() { try (final Options options = new Options()) { options.useFixedLengthPrefixExtractor(100); options.useFixedLengthPrefixExtractor(10); } } @Test public void shouldSetTestCappedPrefixExtractor() { try (final Options options = new Options()) { options.useCappedPrefixExtractor(100); options.useCappedPrefixExtractor(10); } } @Test public void shouldTestMemTableFactoryName() throws RocksDBException { try (final Options options = new Options()) { options.setMemTableConfig(new VectorMemTableConfig()); assertThat(options.memTableFactoryName()). isEqualTo("VectorRepFactory"); options.setMemTableConfig( new HashLinkedListMemTableConfig()); assertThat(options.memTableFactoryName()). isEqualTo("HashLinkedListRepFactory"); } } @Test public void statistics() { try(final Options options = new Options()) { final Statistics statistics = options.statistics(); assertThat(statistics).isNull(); } try(final Statistics statistics = new Statistics(); final Options options = new Options().setStatistics(statistics); final Statistics stats = options.statistics()) { assertThat(stats).isNotNull(); } } @Test public void maxWriteBufferNumberToMaintain() { try (final Options options = new Options()) { int intValue = rand.nextInt(); // Size has to be positive intValue = (intValue < 0) ? -intValue : intValue; intValue = (intValue == 0) ? intValue + 1 : intValue; options.setMaxWriteBufferNumberToMaintain(intValue); assertThat(options.maxWriteBufferNumberToMaintain()). isEqualTo(intValue); } } @Test public void compactionPriorities() { try (final Options options = new Options()) { for (final CompactionPriority compactionPriority : CompactionPriority.values()) { options.setCompactionPriority(compactionPriority); assertThat(options.compactionPriority()). isEqualTo(compactionPriority); } } } @Test public void reportBgIoStats() { try (final Options options = new Options()) { final boolean booleanValue = true; options.setReportBgIoStats(booleanValue); assertThat(options.reportBgIoStats()). isEqualTo(booleanValue); } } @Test public void ttl() { try (final Options options = new Options()) { options.setTtl(1000 * 60); assertThat(options.ttl()). isEqualTo(1000 * 60); } } @Test public void compactionOptionsUniversal() { try (final Options options = new Options(); final CompactionOptionsUniversal optUni = new CompactionOptionsUniversal() .setCompressionSizePercent(7)) { options.setCompactionOptionsUniversal(optUni); assertThat(options.compactionOptionsUniversal()). isEqualTo(optUni); assertThat(options.compactionOptionsUniversal().compressionSizePercent()) .isEqualTo(7); } } @Test public void compactionOptionsFIFO() { try (final Options options = new Options(); final CompactionOptionsFIFO optFifo = new CompactionOptionsFIFO() .setMaxTableFilesSize(2000)) { options.setCompactionOptionsFIFO(optFifo); assertThat(options.compactionOptionsFIFO()). isEqualTo(optFifo); assertThat(options.compactionOptionsFIFO().maxTableFilesSize()) .isEqualTo(2000); } } @Test public void forceConsistencyChecks() { try (final Options options = new Options()) { final boolean booleanValue = true; options.setForceConsistencyChecks(booleanValue); assertThat(options.forceConsistencyChecks()). isEqualTo(booleanValue); } } @Test public void compactionFilter() { try(final Options options = new Options(); final RemoveEmptyValueCompactionFilter cf = new RemoveEmptyValueCompactionFilter()) { options.setCompactionFilter(cf); assertThat(options.compactionFilter()).isEqualTo(cf); } } @Test public void compactionFilterFactory() { try(final Options options = new Options(); final RemoveEmptyValueCompactionFilterFactory cff = new RemoveEmptyValueCompactionFilterFactory()) { options.setCompactionFilterFactory(cff); assertThat(options.compactionFilterFactory()).isEqualTo(cff); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/OptionsUtilTest.java000066400000000000000000000127761370372246700251040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.*; import static org.assertj.core.api.Assertions.assertThat; public class OptionsUtilTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); enum TestAPI { LOAD_LATEST_OPTIONS, LOAD_OPTIONS_FROM_FILE } @Test public void loadLatestOptions() throws RocksDBException { verifyOptions(TestAPI.LOAD_LATEST_OPTIONS); } @Test public void loadOptionsFromFile() throws RocksDBException { verifyOptions(TestAPI.LOAD_OPTIONS_FROM_FILE); } @Test public void getLatestOptionsFileName() throws RocksDBException { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db).isNotNull(); } String fName = OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); assertThat(fName).isNotNull(); assert(fName.startsWith("OPTIONS-") == true); // System.out.println("latest options fileName: " + fName); } private void verifyOptions(TestAPI apiType) throws RocksDBException { final String dbPath = dbFolder.getRoot().getAbsolutePath(); final Options options = new Options() .setCreateIfMissing(true) .setParanoidChecks(false) .setMaxOpenFiles(478) .setDelayedWriteRate(1234567L); final ColumnFamilyOptions baseDefaultCFOpts = new ColumnFamilyOptions(); final byte[] secondCFName = "new_cf".getBytes(); final ColumnFamilyOptions baseSecondCFOpts = new ColumnFamilyOptions() .setWriteBufferSize(70 * 1024) .setMaxWriteBufferNumber(7) .setMaxBytesForLevelBase(53 * 1024 * 1024) .setLevel0FileNumCompactionTrigger(3) .setLevel0SlowdownWritesTrigger(51) .setBottommostCompressionType(CompressionType.ZSTD_COMPRESSION); // Create a database with a new column family try (final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db).isNotNull(); // create column family try (final ColumnFamilyHandle columnFamilyHandle = db.createColumnFamily(new ColumnFamilyDescriptor(secondCFName, baseSecondCFOpts))) { assert(columnFamilyHandle != null); } } // Read the options back and verify DBOptions dbOptions = new DBOptions(); final List cfDescs = new ArrayList<>(); String path = dbPath; if (apiType == TestAPI.LOAD_LATEST_OPTIONS) { OptionsUtil.loadLatestOptions(path, Env.getDefault(), dbOptions, cfDescs, false); } else if (apiType == TestAPI.LOAD_OPTIONS_FROM_FILE) { path = dbPath + "/" + OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); OptionsUtil.loadOptionsFromFile(path, Env.getDefault(), dbOptions, cfDescs, false); } assertThat(dbOptions.createIfMissing()).isEqualTo(options.createIfMissing()); assertThat(dbOptions.paranoidChecks()).isEqualTo(options.paranoidChecks()); assertThat(dbOptions.maxOpenFiles()).isEqualTo(options.maxOpenFiles()); assertThat(dbOptions.delayedWriteRate()).isEqualTo(options.delayedWriteRate()); assertThat(cfDescs.size()).isEqualTo(2); assertThat(cfDescs.get(0)).isNotNull(); assertThat(cfDescs.get(1)).isNotNull(); assertThat(cfDescs.get(0).getName()).isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); assertThat(cfDescs.get(1).getName()).isEqualTo(secondCFName); ColumnFamilyOptions defaultCFOpts = cfDescs.get(0).getOptions(); assertThat(defaultCFOpts.writeBufferSize()).isEqualTo(baseDefaultCFOpts.writeBufferSize()); assertThat(defaultCFOpts.maxWriteBufferNumber()) .isEqualTo(baseDefaultCFOpts.maxWriteBufferNumber()); assertThat(defaultCFOpts.maxBytesForLevelBase()) .isEqualTo(baseDefaultCFOpts.maxBytesForLevelBase()); assertThat(defaultCFOpts.level0FileNumCompactionTrigger()) .isEqualTo(baseDefaultCFOpts.level0FileNumCompactionTrigger()); assertThat(defaultCFOpts.level0SlowdownWritesTrigger()) .isEqualTo(baseDefaultCFOpts.level0SlowdownWritesTrigger()); assertThat(defaultCFOpts.bottommostCompressionType()) .isEqualTo(baseDefaultCFOpts.bottommostCompressionType()); ColumnFamilyOptions secondCFOpts = cfDescs.get(1).getOptions(); assertThat(secondCFOpts.writeBufferSize()).isEqualTo(baseSecondCFOpts.writeBufferSize()); assertThat(secondCFOpts.maxWriteBufferNumber()) .isEqualTo(baseSecondCFOpts.maxWriteBufferNumber()); assertThat(secondCFOpts.maxBytesForLevelBase()) .isEqualTo(baseSecondCFOpts.maxBytesForLevelBase()); assertThat(secondCFOpts.level0FileNumCompactionTrigger()) .isEqualTo(baseSecondCFOpts.level0FileNumCompactionTrigger()); assertThat(secondCFOpts.level0SlowdownWritesTrigger()) .isEqualTo(baseSecondCFOpts.level0SlowdownWritesTrigger()); assertThat(secondCFOpts.bottommostCompressionType()) .isEqualTo(baseSecondCFOpts.bottommostCompressionType()); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/PlainTableConfigTest.java000066400000000000000000000051711370372246700257430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class PlainTableConfigTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void keySize() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setKeySize(5); assertThat(plainTableConfig.keySize()). isEqualTo(5); } @Test public void bloomBitsPerKey() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setBloomBitsPerKey(11); assertThat(plainTableConfig.bloomBitsPerKey()). isEqualTo(11); } @Test public void hashTableRatio() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setHashTableRatio(0.95); assertThat(plainTableConfig.hashTableRatio()). isEqualTo(0.95); } @Test public void indexSparseness() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setIndexSparseness(18); assertThat(plainTableConfig.indexSparseness()). isEqualTo(18); } @Test public void hugePageTlbSize() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setHugePageTlbSize(1); assertThat(plainTableConfig.hugePageTlbSize()). isEqualTo(1); } @Test public void encodingType() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setEncodingType(EncodingType.kPrefix); assertThat(plainTableConfig.encodingType()).isEqualTo( EncodingType.kPrefix); } @Test public void fullScanMode() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setFullScanMode(true); assertThat(plainTableConfig.fullScanMode()).isTrue(); } @Test public void storeIndexInFile() { PlainTableConfig plainTableConfig = new PlainTableConfig(); plainTableConfig.setStoreIndexInFile(true); assertThat(plainTableConfig.storeIndexInFile()). isTrue(); } @Test public void plainTableConfig() { try(final Options opt = new Options()) { final PlainTableConfig plainTableConfig = new PlainTableConfig(); opt.setTableFormatConfig(plainTableConfig); assertThat(opt.tableFactoryName()).isEqualTo("PlainTable"); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/PlatformRandomHelper.java000066400000000000000000000032251370372246700260250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Random; /** * Helper class to get the appropriate Random class instance dependent * on the current platform architecture (32bit vs 64bit) */ public class PlatformRandomHelper { /** * Determine if OS is 32-Bit/64-Bit * * @return boolean value indicating if operating system is 64 Bit. */ public static boolean isOs64Bit(){ final boolean is64Bit; if (System.getProperty("os.name").contains("Windows")) { is64Bit = (System.getenv("ProgramFiles(x86)") != null); } else { is64Bit = (System.getProperty("os.arch").contains("64")); } return is64Bit; } /** * Factory to get a platform specific Random instance * * @return {@link java.util.Random} instance. */ public static Random getPlatformSpecificRandomFactory(){ if (isOs64Bit()) { return new Random(); } return new Random32Bit(); } /** * Random32Bit is a class which overrides {@code nextLong} to * provide random numbers which fit in size_t. This workaround * is necessary because there is no unsigned_int < Java 8 */ private static class Random32Bit extends Random { @Override public long nextLong(){ return this.nextInt(Integer.MAX_VALUE); } } /** * Utility class constructor */ private PlatformRandomHelper() { } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/RateLimiterTest.java000066400000000000000000000041701370372246700250210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; import static org.rocksdb.RateLimiter.*; public class RateLimiterTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void bytesPerSecond() { try(final RateLimiter rateLimiter = new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { assertThat(rateLimiter.getBytesPerSecond()).isGreaterThan(0); rateLimiter.setBytesPerSecond(2000); assertThat(rateLimiter.getBytesPerSecond()).isGreaterThan(0); } } @Test public void getSingleBurstBytes() { try(final RateLimiter rateLimiter = new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { assertThat(rateLimiter.getSingleBurstBytes()).isEqualTo(100); } } @Test public void getTotalBytesThrough() { try(final RateLimiter rateLimiter = new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { assertThat(rateLimiter.getTotalBytesThrough()).isEqualTo(0); } } @Test public void getTotalRequests() { try(final RateLimiter rateLimiter = new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS, DEFAULT_MODE, DEFAULT_AUTOTUNE)) { assertThat(rateLimiter.getTotalRequests()).isEqualTo(0); } } @Test public void autoTune() { try(final RateLimiter rateLimiter = new RateLimiter(1000, DEFAULT_REFILL_PERIOD_MICROS, DEFAULT_FAIRNESS, DEFAULT_MODE, true)) { assertThat(rateLimiter.getBytesPerSecond()).isGreaterThan(0); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/ReadOnlyTest.java000066400000000000000000000262521370372246700243220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; public class ReadOnlyTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void readOnlyOpen() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key".getBytes(), "value".getBytes()); try (final RocksDB db2 = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath())) { assertThat("value"). isEqualTo(new String(db2.get("key".getBytes()))); } } try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { final List cfDescriptors = new ArrayList<>(); cfDescriptors.add(new ColumnFamilyDescriptor( RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts)); final List columnFamilyHandleList = new ArrayList<>(); try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { try (final ColumnFamilyOptions newCfOpts = new ColumnFamilyOptions(); final ColumnFamilyOptions newCf2Opts = new ColumnFamilyOptions() ) { columnFamilyHandleList.add(db.createColumnFamily( new ColumnFamilyDescriptor("new_cf".getBytes(), newCfOpts))); columnFamilyHandleList.add(db.createColumnFamily( new ColumnFamilyDescriptor("new_cf2".getBytes(), newCf2Opts))); db.put(columnFamilyHandleList.get(2), "key2".getBytes(), "value2".getBytes()); final List readOnlyColumnFamilyHandleList = new ArrayList<>(); try (final RocksDB db2 = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList)) { try (final ColumnFamilyOptions newCfOpts2 = new ColumnFamilyOptions(); final ColumnFamilyOptions newCf2Opts2 = new ColumnFamilyOptions() ) { assertThat(db2.get("key2".getBytes())).isNull(); assertThat(db2.get(readOnlyColumnFamilyHandleList.get(0), "key2".getBytes())). isNull(); cfDescriptors.clear(); cfDescriptors.add( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, newCfOpts2)); cfDescriptors.add(new ColumnFamilyDescriptor("new_cf2".getBytes(), newCf2Opts2)); final List readOnlyColumnFamilyHandleList2 = new ArrayList<>(); try (final RocksDB db3 = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList2)) { try { assertThat(new String(db3.get( readOnlyColumnFamilyHandleList2.get(1), "key2".getBytes()))).isEqualTo("value2"); } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList2) { columnFamilyHandle.close(); } } } } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList) { columnFamilyHandle.close(); } } } } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test(expected = RocksDBException.class) public void failToWriteInReadOnly() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true)) { try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { //no-op } } try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) ); final List readOnlyColumnFamilyHandleList = new ArrayList<>(); try (final RocksDB rDb = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList)) { try { // test that put fails in readonly mode rDb.put("key".getBytes(), "value".getBytes()); } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test(expected = RocksDBException.class) public void failToCFWriteInReadOnly() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { //no-op } try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) ); final List readOnlyColumnFamilyHandleList = new ArrayList<>(); try (final RocksDB rDb = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList)) { try { rDb.put(readOnlyColumnFamilyHandleList.get(0), "key".getBytes(), "value".getBytes()); } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test(expected = RocksDBException.class) public void failToRemoveInReadOnly() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { //no-op } try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) ); final List readOnlyColumnFamilyHandleList = new ArrayList<>(); try (final RocksDB rDb = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList)) { try { rDb.delete("key".getBytes()); } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test(expected = RocksDBException.class) public void failToCFRemoveInReadOnly() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { //no-op } try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) ); final List readOnlyColumnFamilyHandleList = new ArrayList<>(); try (final RocksDB rDb = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList)) { try { rDb.delete(readOnlyColumnFamilyHandleList.get(0), "key".getBytes()); } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test(expected = RocksDBException.class) public void failToWriteBatchReadOnly() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { //no-op } try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) ); final List readOnlyColumnFamilyHandleList = new ArrayList<>(); try (final RocksDB rDb = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList); final WriteBatch wb = new WriteBatch(); final WriteOptions wOpts = new WriteOptions()) { try { wb.put("key".getBytes(), "value".getBytes()); rDb.write(wOpts, wb); } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList) { columnFamilyHandle.close(); } } } } } @Test(expected = RocksDBException.class) public void failToCFWriteBatchReadOnly() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { //no-op } try (final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions()) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpts) ); final List readOnlyColumnFamilyHandleList = new ArrayList<>(); try (final RocksDB rDb = RocksDB.openReadOnly( dbFolder.getRoot().getAbsolutePath(), cfDescriptors, readOnlyColumnFamilyHandleList); final WriteBatch wb = new WriteBatch(); final WriteOptions wOpts = new WriteOptions()) { try { wb.put(readOnlyColumnFamilyHandleList.get(0), "key".getBytes(), "value".getBytes()); rDb.write(wOpts, wb); } finally { for (final ColumnFamilyHandle columnFamilyHandle : readOnlyColumnFamilyHandleList) { columnFamilyHandle.close(); } } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/ReadOptionsTest.java000066400000000000000000000212771370372246700250360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; import java.util.Random; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import static org.assertj.core.api.Assertions.assertThat; public class ReadOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public ExpectedException exception = ExpectedException.none(); @Test public void altConstructor() { try (final ReadOptions opt = new ReadOptions(true, true)) { assertThat(opt.verifyChecksums()).isTrue(); assertThat(opt.fillCache()).isTrue(); } } @Test public void copyConstructor() { try (final ReadOptions opt = new ReadOptions()) { opt.setVerifyChecksums(false); opt.setFillCache(false); opt.setIterateUpperBound(buildRandomSlice()); opt.setIterateLowerBound(buildRandomSlice()); try (final ReadOptions other = new ReadOptions(opt)) { assertThat(opt.verifyChecksums()).isEqualTo(other.verifyChecksums()); assertThat(opt.fillCache()).isEqualTo(other.fillCache()); assertThat(Arrays.equals(opt.iterateUpperBound().data(), other.iterateUpperBound().data())).isTrue(); assertThat(Arrays.equals(opt.iterateLowerBound().data(), other.iterateLowerBound().data())).isTrue(); } } } @Test public void verifyChecksum() { try (final ReadOptions opt = new ReadOptions()) { final Random rand = new Random(); final boolean boolValue = rand.nextBoolean(); opt.setVerifyChecksums(boolValue); assertThat(opt.verifyChecksums()).isEqualTo(boolValue); } } @Test public void fillCache() { try (final ReadOptions opt = new ReadOptions()) { final Random rand = new Random(); final boolean boolValue = rand.nextBoolean(); opt.setFillCache(boolValue); assertThat(opt.fillCache()).isEqualTo(boolValue); } } @Test public void tailing() { try (final ReadOptions opt = new ReadOptions()) { final Random rand = new Random(); final boolean boolValue = rand.nextBoolean(); opt.setTailing(boolValue); assertThat(opt.tailing()).isEqualTo(boolValue); } } @Test public void snapshot() { try (final ReadOptions opt = new ReadOptions()) { opt.setSnapshot(null); assertThat(opt.snapshot()).isNull(); } } @Test public void readTier() { try (final ReadOptions opt = new ReadOptions()) { opt.setReadTier(ReadTier.BLOCK_CACHE_TIER); assertThat(opt.readTier()).isEqualTo(ReadTier.BLOCK_CACHE_TIER); } } @SuppressWarnings("deprecated") @Test public void managed() { try (final ReadOptions opt = new ReadOptions()) { opt.setManaged(true); assertThat(opt.managed()).isTrue(); } } @Test public void totalOrderSeek() { try (final ReadOptions opt = new ReadOptions()) { opt.setTotalOrderSeek(true); assertThat(opt.totalOrderSeek()).isTrue(); } } @Test public void prefixSameAsStart() { try (final ReadOptions opt = new ReadOptions()) { opt.setPrefixSameAsStart(true); assertThat(opt.prefixSameAsStart()).isTrue(); } } @Test public void pinData() { try (final ReadOptions opt = new ReadOptions()) { opt.setPinData(true); assertThat(opt.pinData()).isTrue(); } } @Test public void backgroundPurgeOnIteratorCleanup() { try (final ReadOptions opt = new ReadOptions()) { opt.setBackgroundPurgeOnIteratorCleanup(true); assertThat(opt.backgroundPurgeOnIteratorCleanup()).isTrue(); } } @Test public void readaheadSize() { try (final ReadOptions opt = new ReadOptions()) { final Random rand = new Random(); final long longValue = rand.nextLong(); opt.setReadaheadSize(longValue); assertThat(opt.readaheadSize()).isEqualTo(longValue); } } @Test public void ignoreRangeDeletions() { try (final ReadOptions opt = new ReadOptions()) { opt.setIgnoreRangeDeletions(true); assertThat(opt.ignoreRangeDeletions()).isTrue(); } } @Test public void iterateUpperBound() { try (final ReadOptions opt = new ReadOptions()) { Slice upperBound = buildRandomSlice(); opt.setIterateUpperBound(upperBound); assertThat(Arrays.equals(upperBound.data(), opt.iterateUpperBound().data())).isTrue(); } } @Test public void iterateUpperBoundNull() { try (final ReadOptions opt = new ReadOptions()) { assertThat(opt.iterateUpperBound()).isNull(); } } @Test public void iterateLowerBound() { try (final ReadOptions opt = new ReadOptions()) { Slice lowerBound = buildRandomSlice(); opt.setIterateLowerBound(lowerBound); assertThat(Arrays.equals(lowerBound.data(), opt.iterateLowerBound().data())).isTrue(); } } @Test public void iterateLowerBoundNull() { try (final ReadOptions opt = new ReadOptions()) { assertThat(opt.iterateLowerBound()).isNull(); } } @Test public void tableFilter() { try (final ReadOptions opt = new ReadOptions(); final AbstractTableFilter allTablesFilter = new AllTablesFilter()) { opt.setTableFilter(allTablesFilter); } } @Test public void iterStartSeqnum() { try (final ReadOptions opt = new ReadOptions()) { assertThat(opt.iterStartSeqnum()).isEqualTo(0); opt.setIterStartSeqnum(10); assertThat(opt.iterStartSeqnum()).isEqualTo(10); } } @Test public void failSetVerifyChecksumUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.setVerifyChecksums(true); } } @Test public void failVerifyChecksumUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.verifyChecksums(); } } @Test public void failSetFillCacheUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.setFillCache(true); } } @Test public void failFillCacheUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.fillCache(); } } @Test public void failSetTailingUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.setTailing(true); } } @Test public void failTailingUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.tailing(); } } @Test public void failSetSnapshotUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.setSnapshot(null); } } @Test public void failSnapshotUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.snapshot(); } } @Test public void failSetIterateUpperBoundUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.setIterateUpperBound(null); } } @Test public void failIterateUpperBoundUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.iterateUpperBound(); } } @Test public void failSetIterateLowerBoundUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.setIterateLowerBound(null); } } @Test public void failIterateLowerBoundUninitialized() { try (final ReadOptions readOptions = setupUninitializedReadOptions(exception)) { readOptions.iterateLowerBound(); } } private ReadOptions setupUninitializedReadOptions( ExpectedException exception) { final ReadOptions readOptions = new ReadOptions(); readOptions.close(); exception.expect(AssertionError.class); return readOptions; } private Slice buildRandomSlice() { final Random rand = new Random(); byte[] sliceBytes = new byte[rand.nextInt(100) + 1]; rand.nextBytes(sliceBytes); return new Slice(sliceBytes); } private static class AllTablesFilter extends AbstractTableFilter { @Override public boolean filter(final TableProperties tableProperties) { return true; } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java000066400000000000000000000073131370372246700257500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import org.rocksdb.Status.Code; import org.rocksdb.Status.SubCode; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; public class RocksDBExceptionTest { @Test public void exception() { try { raiseException(); } catch(final RocksDBException e) { assertThat(e.getStatus()).isNull(); assertThat(e.getMessage()).isEqualTo("test message"); return; } fail(); } @Test public void exceptionWithStatusCode() { try { raiseExceptionWithStatusCode(); } catch(final RocksDBException e) { assertThat(e.getStatus()).isNotNull(); assertThat(e.getStatus().getCode()).isEqualTo(Code.NotSupported); assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.None); assertThat(e.getStatus().getState()).isNull(); assertThat(e.getMessage()).isEqualTo("test message"); return; } fail(); } @Test public void exceptionNoMsgWithStatusCode() { try { raiseExceptionNoMsgWithStatusCode(); } catch(final RocksDBException e) { assertThat(e.getStatus()).isNotNull(); assertThat(e.getStatus().getCode()).isEqualTo(Code.NotSupported); assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.None); assertThat(e.getStatus().getState()).isNull(); assertThat(e.getMessage()).isEqualTo(Code.NotSupported.name()); return; } fail(); } @Test public void exceptionWithStatusCodeSubCode() { try { raiseExceptionWithStatusCodeSubCode(); } catch(final RocksDBException e) { assertThat(e.getStatus()).isNotNull(); assertThat(e.getStatus().getCode()).isEqualTo(Code.TimedOut); assertThat(e.getStatus().getSubCode()) .isEqualTo(Status.SubCode.LockTimeout); assertThat(e.getStatus().getState()).isNull(); assertThat(e.getMessage()).isEqualTo("test message"); return; } fail(); } @Test public void exceptionNoMsgWithStatusCodeSubCode() { try { raiseExceptionNoMsgWithStatusCodeSubCode(); } catch(final RocksDBException e) { assertThat(e.getStatus()).isNotNull(); assertThat(e.getStatus().getCode()).isEqualTo(Code.TimedOut); assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.LockTimeout); assertThat(e.getStatus().getState()).isNull(); assertThat(e.getMessage()).isEqualTo(Code.TimedOut.name() + "(" + SubCode.LockTimeout.name() + ")"); return; } fail(); } @Test public void exceptionWithStatusCodeState() { try { raiseExceptionWithStatusCodeState(); } catch(final RocksDBException e) { assertThat(e.getStatus()).isNotNull(); assertThat(e.getStatus().getCode()).isEqualTo(Code.NotSupported); assertThat(e.getStatus().getSubCode()).isEqualTo(SubCode.None); assertThat(e.getStatus().getState()).isNotNull(); assertThat(e.getMessage()).isEqualTo("test message"); return; } fail(); } private native void raiseException() throws RocksDBException; private native void raiseExceptionWithStatusCode() throws RocksDBException; private native void raiseExceptionNoMsgWithStatusCode() throws RocksDBException; private native void raiseExceptionWithStatusCodeSubCode() throws RocksDBException; private native void raiseExceptionNoMsgWithStatusCodeSubCode() throws RocksDBException; private native void raiseExceptionWithStatusCodeState() throws RocksDBException; } rocksdb-6.11.4/java/src/test/java/org/rocksdb/RocksDBTest.java000066400000000000000000001766571370372246700241130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.*; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import java.nio.ByteBuffer; import java.util.*; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; public class RocksDBTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); public static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void open() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { assertThat(db).isNotNull(); } } @Test public void open_opt() throws RocksDBException { try (final Options opt = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { assertThat(db).isNotNull(); } } @Test public void openWhenOpen() throws RocksDBException { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db1 = RocksDB.open(dbPath)) { try (final RocksDB db2 = RocksDB.open(dbPath)) { fail("Should have thrown an exception when opening the same db twice"); } catch (final RocksDBException e) { assertThat(e.getStatus().getCode()).isEqualTo(Status.Code.IOError); assertThat(e.getStatus().getSubCode()).isEqualTo(Status.SubCode.None); assertThat(e.getStatus().getState()).contains("lock "); } } } @Test public void createColumnFamily() throws RocksDBException { final byte[] col1Name = "col1".getBytes(UTF_8); try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() ) { try (final ColumnFamilyHandle col1 = db.createColumnFamily(new ColumnFamilyDescriptor(col1Name, cfOpts))) { assertThat(col1).isNotNull(); assertThat(col1.getName()).isEqualTo(col1Name); } } final List cfHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath(), Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor(col1Name)), cfHandles)) { try { assertThat(cfHandles.size()).isEqualTo(2); assertThat(cfHandles.get(1)).isNotNull(); assertThat(cfHandles.get(1).getName()).isEqualTo(col1Name); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } @Test public void createColumnFamilies() throws RocksDBException { final byte[] col1Name = "col1".getBytes(UTF_8); final byte[] col2Name = "col2".getBytes(UTF_8); List cfHandles; try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() ) { cfHandles = db.createColumnFamilies(cfOpts, Arrays.asList(col1Name, col2Name)); try { assertThat(cfHandles).isNotNull(); assertThat(cfHandles.size()).isEqualTo(2); assertThat(cfHandles.get(0).getName()).isEqualTo(col1Name); assertThat(cfHandles.get(1).getName()).isEqualTo(col2Name); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } cfHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath(), Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor(col1Name), new ColumnFamilyDescriptor(col2Name)), cfHandles)) { try { assertThat(cfHandles.size()).isEqualTo(3); assertThat(cfHandles.get(1)).isNotNull(); assertThat(cfHandles.get(1).getName()).isEqualTo(col1Name); assertThat(cfHandles.get(2)).isNotNull(); assertThat(cfHandles.get(2).getName()).isEqualTo(col2Name); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } @Test public void createColumnFamiliesfromDescriptors() throws RocksDBException { final byte[] col1Name = "col1".getBytes(UTF_8); final byte[] col2Name = "col2".getBytes(UTF_8); List cfHandles; try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() ) { cfHandles = db.createColumnFamilies(Arrays.asList( new ColumnFamilyDescriptor(col1Name, cfOpts), new ColumnFamilyDescriptor(col2Name, cfOpts))); try { assertThat(cfHandles).isNotNull(); assertThat(cfHandles.size()).isEqualTo(2); assertThat(cfHandles.get(0).getName()).isEqualTo(col1Name); assertThat(cfHandles.get(1).getName()).isEqualTo(col2Name); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } cfHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath(), Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor(col1Name), new ColumnFamilyDescriptor(col2Name)), cfHandles)) { try { assertThat(cfHandles.size()).isEqualTo(3); assertThat(cfHandles.get(1)).isNotNull(); assertThat(cfHandles.get(1).getName()).isEqualTo(col1Name); assertThat(cfHandles.get(2)).isNotNull(); assertThat(cfHandles.get(2).getName()).isEqualTo(col2Name); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } @Test public void put() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final WriteOptions opt = new WriteOptions(); final ReadOptions optr = new ReadOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put(opt, "key2".getBytes(), "12345678".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo( "value".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo( "12345678".getBytes()); ByteBuffer key = ByteBuffer.allocateDirect(12); ByteBuffer value = ByteBuffer.allocateDirect(12); key.position(4); key.put("key3".getBytes()); key.position(4).limit(8); value.position(4); value.put("val3".getBytes()); value.position(4).limit(8); db.put(opt, key, value); assertThat(key.position()).isEqualTo(8); assertThat(key.limit()).isEqualTo(8); assertThat(value.position()).isEqualTo(8); assertThat(value.limit()).isEqualTo(8); key.position(4); ByteBuffer result = ByteBuffer.allocateDirect(12); assertThat(db.get(optr, key, result)).isEqualTo(4); assertThat(result.position()).isEqualTo(0); assertThat(result.limit()).isEqualTo(4); assertThat(key.position()).isEqualTo(8); assertThat(key.limit()).isEqualTo(8); byte[] tmp = new byte[4]; result.get(tmp); assertThat(tmp).isEqualTo("val3".getBytes()); key.position(4); result.clear().position(9); assertThat(db.get(optr, key, result)).isEqualTo(4); assertThat(result.position()).isEqualTo(9); assertThat(result.limit()).isEqualTo(12); assertThat(key.position()).isEqualTo(8); assertThat(key.limit()).isEqualTo(8); byte[] tmp2 = new byte[3]; result.get(tmp2); assertThat(tmp2).isEqualTo("val".getBytes()); // put Segment key3 = sliceSegment("key3"); Segment key4 = sliceSegment("key4"); Segment value0 = sliceSegment("value 0"); Segment value1 = sliceSegment("value 1"); db.put(key3.data, key3.offset, key3.len, value0.data, value0.offset, value0.len); db.put(opt, key4.data, key4.offset, key4.len, value1.data, value1.offset, value1.len); // compare Assert.assertTrue(value0.isSamePayload(db.get(key3.data, key3.offset, key3.len))); Assert.assertTrue(value1.isSamePayload(db.get(key4.data, key4.offset, key4.len))); } } private static Segment sliceSegment(String key) { ByteBuffer rawKey = ByteBuffer.allocate(key.length() + 4); rawKey.put((byte)0); rawKey.put((byte)0); rawKey.put(key.getBytes()); return new Segment(rawKey.array(), 2, key.length()); } private static class Segment { final byte[] data; final int offset; final int len; public boolean isSamePayload(byte[] value) { if (value == null) { return false; } if (value.length != len) { return false; } for (int i = 0; i < value.length; i++) { if (data[i + offset] != value[i]) { return false; } } return true; } public Segment(byte[] value, int offset, int len) { this.data = value; this.offset = offset; this.len = len; } } @Test public void write() throws RocksDBException { try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options options = new Options() .setMergeOperator(stringAppendOperator) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); final WriteOptions opts = new WriteOptions()) { try (final WriteBatch wb1 = new WriteBatch()) { wb1.put("key1".getBytes(), "aa".getBytes()); wb1.merge("key1".getBytes(), "bb".getBytes()); try (final WriteBatch wb2 = new WriteBatch()) { wb2.put("key2".getBytes(), "xx".getBytes()); wb2.merge("key2".getBytes(), "yy".getBytes()); db.write(opts, wb1); db.write(opts, wb2); } } assertThat(db.get("key1".getBytes())).isEqualTo( "aa,bb".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo( "xx,yy".getBytes()); } } @Test public void getWithOutValue() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); byte[] outValue = new byte[5]; // not found value int getResult = db.get("keyNotFound".getBytes(), outValue); assertThat(getResult).isEqualTo(RocksDB.NOT_FOUND); // found value which fits in outValue getResult = db.get("key1".getBytes(), outValue); assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); assertThat(outValue).isEqualTo("value".getBytes()); // found value which fits partially getResult = db.get("key2".getBytes(), outValue); assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); assertThat(outValue).isEqualTo("12345".getBytes()); } } @Test public void getWithOutValueReadOptions() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final ReadOptions rOpt = new ReadOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); byte[] outValue = new byte[5]; // not found value int getResult = db.get(rOpt, "keyNotFound".getBytes(), outValue); assertThat(getResult).isEqualTo(RocksDB.NOT_FOUND); // found value which fits in outValue getResult = db.get(rOpt, "key1".getBytes(), outValue); assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); assertThat(outValue).isEqualTo("value".getBytes()); // found value which fits partially getResult = db.get(rOpt, "key2".getBytes(), outValue); assertThat(getResult).isNotEqualTo(RocksDB.NOT_FOUND); assertThat(outValue).isEqualTo("12345".getBytes()); } } @Rule public ExpectedException thrown = ExpectedException.none(); @Test public void getOutOfArrayMaxSizeValue() throws RocksDBException { final int numberOfValueSplits = 10; final int splitSize = Integer.MAX_VALUE / numberOfValueSplits; Runtime runtime = Runtime.getRuntime(); long neededMemory = ((long)(splitSize)) * (((long)numberOfValueSplits) + 3); boolean isEnoughMemory = runtime.maxMemory() - runtime.totalMemory() > neededMemory; Assume.assumeTrue(isEnoughMemory); final byte[] valueSplit = new byte[splitSize]; final byte[] key = "key".getBytes(); thrown.expect(RocksDBException.class); thrown.expectMessage("Requested array size exceeds VM limit"); // merge (numberOfValueSplits + 1) valueSplit's to get value size exceeding Integer.MAX_VALUE try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperator(stringAppendOperator); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { db.put(key, valueSplit); for (int i = 0; i < numberOfValueSplits; i++) { db.merge(key, valueSplit); } db.get(key); } } @SuppressWarnings("deprecated") @Test public void multiGet() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final ReadOptions rOpt = new ReadOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); List lookupKeys = new ArrayList<>(); lookupKeys.add("key1".getBytes()); lookupKeys.add("key2".getBytes()); Map results = db.multiGet(lookupKeys); assertThat(results).isNotNull(); assertThat(results.values()).isNotNull(); assertThat(results.values()). contains("value".getBytes(), "12345678".getBytes()); // test same method with ReadOptions results = db.multiGet(rOpt, lookupKeys); assertThat(results).isNotNull(); assertThat(results.values()).isNotNull(); assertThat(results.values()). contains("value".getBytes(), "12345678".getBytes()); // remove existing key lookupKeys.remove("key2".getBytes()); // add non existing key lookupKeys.add("key3".getBytes()); results = db.multiGet(lookupKeys); assertThat(results).isNotNull(); assertThat(results.values()).isNotNull(); assertThat(results.values()). contains("value".getBytes()); // test same call with readOptions results = db.multiGet(rOpt, lookupKeys); assertThat(results).isNotNull(); assertThat(results.values()).isNotNull(); assertThat(results.values()). contains("value".getBytes()); } } @Test public void multiGetAsList() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final ReadOptions rOpt = new ReadOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); List lookupKeys = new ArrayList<>(); lookupKeys.add("key1".getBytes()); lookupKeys.add("key2".getBytes()); List results = db.multiGetAsList(lookupKeys); assertThat(results).isNotNull(); assertThat(results).hasSize(lookupKeys.size()); assertThat(results). containsExactly("value".getBytes(), "12345678".getBytes()); // test same method with ReadOptions results = db.multiGetAsList(rOpt, lookupKeys); assertThat(results).isNotNull(); assertThat(results). contains("value".getBytes(), "12345678".getBytes()); // remove existing key lookupKeys.remove(1); // add non existing key lookupKeys.add("key3".getBytes()); results = db.multiGetAsList(lookupKeys); assertThat(results).isNotNull(); assertThat(results). containsExactly("value".getBytes(), null); // test same call with readOptions results = db.multiGetAsList(rOpt, lookupKeys); assertThat(results).isNotNull(); assertThat(results).contains("value".getBytes()); } } @Test public void merge() throws RocksDBException { try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options opt = new Options() .setCreateIfMissing(true) .setMergeOperator(stringAppendOperator); final WriteOptions wOpt = new WriteOptions(); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath()) ) { db.put("key1".getBytes(), "value".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo( "value".getBytes()); // merge key1 with another value portion db.merge("key1".getBytes(), "value2".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo( "value,value2".getBytes()); // merge key1 with another value portion db.merge(wOpt, "key1".getBytes(), "value3".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo( "value,value2,value3".getBytes()); // merge on non existent key shall insert the value db.merge(wOpt, "key2".getBytes(), "xxxx".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo( "xxxx".getBytes()); Segment key3 = sliceSegment("key3"); Segment key4 = sliceSegment("key4"); Segment value0 = sliceSegment("value 0"); Segment value1 = sliceSegment("value 1"); db.merge(key3.data, key3.offset, key3.len, value0.data, value0.offset, value0.len); db.merge(wOpt, key4.data, key4.offset, key4.len, value1.data, value1.offset, value1.len); // compare Assert.assertTrue(value0.isSamePayload(db.get(key3.data, key3.offset, key3.len))); Assert.assertTrue(value1.isSamePayload(db.get(key4.data, key4.offset, key4.len))); } } @Test public void delete() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final WriteOptions wOpt = new WriteOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); db.put("key3".getBytes(), "33".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo( "value".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo( "12345678".getBytes()); assertThat(db.get("key3".getBytes())).isEqualTo("33".getBytes()); db.delete("key1".getBytes()); db.delete(wOpt, "key2".getBytes()); ByteBuffer key = ByteBuffer.allocateDirect(16); key.put("key3".getBytes()).flip(); db.delete(wOpt, key); assertThat(key.position()).isEqualTo(4); assertThat(key.limit()).isEqualTo(4); assertThat(db.get("key1".getBytes())).isNull(); assertThat(db.get("key2".getBytes())).isNull(); Segment key3 = sliceSegment("key3"); Segment key4 = sliceSegment("key4"); db.put("key3".getBytes(), "key3 value".getBytes()); db.put("key4".getBytes(), "key4 value".getBytes()); db.delete(key3.data, key3.offset, key3.len); db.delete(wOpt, key4.data, key4.offset, key4.len); assertThat(db.get("key3".getBytes())).isNull(); assertThat(db.get("key4".getBytes())).isNull(); } } @Test public void singleDelete() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final WriteOptions wOpt = new WriteOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo( "value".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo( "12345678".getBytes()); db.singleDelete("key1".getBytes()); db.singleDelete(wOpt, "key2".getBytes()); assertThat(db.get("key1".getBytes())).isNull(); assertThat(db.get("key2".getBytes())).isNull(); } } @Test public void singleDelete_nonExisting() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final WriteOptions wOpt = new WriteOptions()) { db.singleDelete("key1".getBytes()); db.singleDelete(wOpt, "key2".getBytes()); assertThat(db.get("key1".getBytes())).isNull(); assertThat(db.get("key2".getBytes())).isNull(); } } @Test public void deleteRange() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); db.put("key3".getBytes(), "abcdefg".getBytes()); db.put("key4".getBytes(), "xyz".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); db.deleteRange("key2".getBytes(), "key4".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); assertThat(db.get("key2".getBytes())).isNull(); assertThat(db.get("key3".getBytes())).isNull(); assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); } } @Test public void getIntProperty() throws RocksDBException { try ( final Options options = new Options() .setCreateIfMissing(true) .setMaxWriteBufferNumber(10) .setMinWriteBufferNumberToMerge(10); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); final WriteOptions wOpt = new WriteOptions().setDisableWAL(true) ) { db.put(wOpt, "key1".getBytes(), "value1".getBytes()); db.put(wOpt, "key2".getBytes(), "value2".getBytes()); db.put(wOpt, "key3".getBytes(), "value3".getBytes()); db.put(wOpt, "key4".getBytes(), "value4".getBytes()); assertThat(db.getLongProperty("rocksdb.num-entries-active-mem-table")) .isGreaterThan(0); assertThat(db.getLongProperty("rocksdb.cur-size-active-mem-table")) .isGreaterThan(0); } } @Test public void fullCompactRange() throws RocksDBException { try (final Options opt = new Options(). setCreateIfMissing(true). setDisableAutoCompactions(true). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(4). setWriteBufferSize(100 << 10). setLevelZeroFileNumCompactionTrigger(3). setTargetFileSizeBase(200 << 10). setTargetFileSizeMultiplier(1). setMaxBytesForLevelBase(500 << 10). setMaxBytesForLevelMultiplier(1). setDisableAutoCompactions(false); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // fill database with key/value pairs byte[] b = new byte[10000]; for (int i = 0; i < 200; i++) { rand.nextBytes(b); db.put((String.valueOf(i)).getBytes(), b); } db.compactRange(); } } @Test public void fullCompactRangeColumnFamily() throws RocksDBException { try ( final DBOptions opt = new DBOptions(). setCreateIfMissing(true). setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). setDisableAutoCompactions(true). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(4). setWriteBufferSize(100 << 10). setLevelZeroFileNumCompactionTrigger(3). setTargetFileSizeBase(200 << 10). setTargetFileSizeMultiplier(1). setMaxBytesForLevelBase(500 << 10). setMaxBytesForLevelMultiplier(1). setDisableAutoCompactions(false) ) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); // open database final List columnFamilyHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { // fill database with key/value pairs byte[] b = new byte[10000]; for (int i = 0; i < 200; i++) { rand.nextBytes(b); db.put(columnFamilyHandles.get(1), String.valueOf(i).getBytes(), b); } db.compactRange(columnFamilyHandles.get(1)); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void compactRangeWithKeys() throws RocksDBException { try (final Options opt = new Options(). setCreateIfMissing(true). setDisableAutoCompactions(true). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(4). setWriteBufferSize(100 << 10). setLevelZeroFileNumCompactionTrigger(3). setTargetFileSizeBase(200 << 10). setTargetFileSizeMultiplier(1). setMaxBytesForLevelBase(500 << 10). setMaxBytesForLevelMultiplier(1). setDisableAutoCompactions(false); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // fill database with key/value pairs byte[] b = new byte[10000]; for (int i = 0; i < 200; i++) { rand.nextBytes(b); db.put((String.valueOf(i)).getBytes(), b); } db.compactRange("0".getBytes(), "201".getBytes()); } } @Test public void compactRangeWithKeysReduce() throws RocksDBException { try ( final Options opt = new Options(). setCreateIfMissing(true). setDisableAutoCompactions(true). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(4). setWriteBufferSize(100 << 10). setLevelZeroFileNumCompactionTrigger(3). setTargetFileSizeBase(200 << 10). setTargetFileSizeMultiplier(1). setMaxBytesForLevelBase(500 << 10). setMaxBytesForLevelMultiplier(1). setDisableAutoCompactions(false); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { // fill database with key/value pairs byte[] b = new byte[10000]; for (int i = 0; i < 200; i++) { rand.nextBytes(b); db.put((String.valueOf(i)).getBytes(), b); } db.flush(new FlushOptions().setWaitForFlush(true)); try (final CompactRangeOptions compactRangeOpts = new CompactRangeOptions() .setChangeLevel(true) .setTargetLevel(-1) .setTargetPathId(0)) { db.compactRange(null, "0".getBytes(), "201".getBytes(), compactRangeOpts); } } } @Test public void compactRangeWithKeysColumnFamily() throws RocksDBException { try (final DBOptions opt = new DBOptions(). setCreateIfMissing(true). setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). setDisableAutoCompactions(true). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(4). setWriteBufferSize(100 << 10). setLevelZeroFileNumCompactionTrigger(3). setTargetFileSizeBase(200 << 10). setTargetFileSizeMultiplier(1). setMaxBytesForLevelBase(500 << 10). setMaxBytesForLevelMultiplier(1). setDisableAutoCompactions(false) ) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) ); // open database final List columnFamilyHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { // fill database with key/value pairs byte[] b = new byte[10000]; for (int i = 0; i < 200; i++) { rand.nextBytes(b); db.put(columnFamilyHandles.get(1), String.valueOf(i).getBytes(), b); } db.compactRange(columnFamilyHandles.get(1), "0".getBytes(), "201".getBytes()); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void compactRangeWithKeysReduceColumnFamily() throws RocksDBException { try (final DBOptions opt = new DBOptions(). setCreateIfMissing(true). setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). setDisableAutoCompactions(true). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(4). setWriteBufferSize(100 << 10). setLevelZeroFileNumCompactionTrigger(3). setTargetFileSizeBase(200 << 10). setTargetFileSizeMultiplier(1). setMaxBytesForLevelBase(500 << 10). setMaxBytesForLevelMultiplier(1). setDisableAutoCompactions(false) ) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) ); final List columnFamilyHandles = new ArrayList<>(); // open database try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try (final CompactRangeOptions compactRangeOpts = new CompactRangeOptions() .setChangeLevel(true) .setTargetLevel(-1) .setTargetPathId(0)) { // fill database with key/value pairs byte[] b = new byte[10000]; for (int i = 0; i < 200; i++) { rand.nextBytes(b); db.put(columnFamilyHandles.get(1), String.valueOf(i).getBytes(), b); } db.compactRange(columnFamilyHandles.get(1), "0".getBytes(), "201".getBytes(), compactRangeOpts); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void compactRangeToLevel() throws RocksDBException, InterruptedException { final int NUM_KEYS_PER_L0_FILE = 100; final int KEY_SIZE = 20; final int VALUE_SIZE = 300; final int L0_FILE_SIZE = NUM_KEYS_PER_L0_FILE * (KEY_SIZE + VALUE_SIZE); final int NUM_L0_FILES = 10; final int TEST_SCALE = 5; final int KEY_INTERVAL = 100; try (final Options opt = new Options(). setCreateIfMissing(true). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(5). // a slightly bigger write buffer than L0 file // so that we can ensure manual flush always // go before background flush happens. setWriteBufferSize(L0_FILE_SIZE * 2). // Disable auto L0 -> L1 compaction setLevelZeroFileNumCompactionTrigger(20). setTargetFileSizeBase(L0_FILE_SIZE * 100). setTargetFileSizeMultiplier(1). // To disable auto compaction setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100). setMaxBytesForLevelMultiplier(2). setDisableAutoCompactions(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath()) ) { // fill database with key/value pairs byte[] value = new byte[VALUE_SIZE]; int int_key = 0; for (int round = 0; round < 5; ++round) { int initial_key = int_key; for (int f = 1; f <= NUM_L0_FILES; ++f) { for (int i = 0; i < NUM_KEYS_PER_L0_FILE; ++i) { int_key += KEY_INTERVAL; rand.nextBytes(value); db.put(String.format("%020d", int_key).getBytes(), value); } db.flush(new FlushOptions().setWaitForFlush(true)); // Make sure we do create one more L0 files. assertThat( db.getProperty("rocksdb.num-files-at-level0")). isEqualTo("" + f); } // Compact all L0 files we just created db.compactRange( String.format("%020d", initial_key).getBytes(), String.format("%020d", int_key - 1).getBytes()); // Making sure there isn't any L0 files. assertThat( db.getProperty("rocksdb.num-files-at-level0")). isEqualTo("0"); // Making sure there are some L1 files. // Here we only use != 0 instead of a specific number // as we don't want the test make any assumption on // how compaction works. assertThat( db.getProperty("rocksdb.num-files-at-level1")). isNotEqualTo("0"); // Because we only compacted those keys we issued // in this round, there shouldn't be any L1 -> L2 // compaction. So we expect zero L2 files here. assertThat( db.getProperty("rocksdb.num-files-at-level2")). isEqualTo("0"); } } } @Test public void deleteFilesInRange() throws RocksDBException, InterruptedException { final int KEY_SIZE = 20; final int VALUE_SIZE = 1000; final int FILE_SIZE = 64000; final int NUM_FILES = 10; final int KEY_INTERVAL = 10000; /* * Intention of these options is to end up reliably with 10 files * we will be deleting using deleteFilesInRange. * It is writing roughly number of keys that will fit in 10 files (target size) * It is writing interleaved so that files from memory on L0 will overlap * Then compaction cleans everything and we should end up with 10 files */ try (final Options opt = new Options() .setCreateIfMissing(true) .setCompressionType(CompressionType.NO_COMPRESSION) .setTargetFileSizeBase(FILE_SIZE) .setWriteBufferSize(FILE_SIZE / 2) .setDisableAutoCompactions(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { int records = FILE_SIZE / (KEY_SIZE + VALUE_SIZE); // fill database with key/value pairs byte[] value = new byte[VALUE_SIZE]; int key_init = 0; for (int o = 0; o < NUM_FILES; ++o) { int int_key = key_init++; for (int i = 0; i < records; ++i) { int_key += KEY_INTERVAL; rand.nextBytes(value); db.put(String.format("%020d", int_key).getBytes(), value); } } db.flush(new FlushOptions().setWaitForFlush(true)); db.compactRange(); // Make sure we do create one more L0 files. assertThat(db.getProperty("rocksdb.num-files-at-level0")).isEqualTo("0"); // Should be 10, but we are OK with asserting +- 2 int files = Integer.parseInt(db.getProperty("rocksdb.num-files-at-level1")); assertThat(files).isBetween(8, 12); // Delete lower 60% (roughly). Result should be 5, but we are OK with asserting +- 2 // Important is that we know something was deleted (JNI call did something) // Exact assertions are done in C++ unit tests db.deleteFilesInRanges(null, Arrays.asList(null, String.format("%020d", records * KEY_INTERVAL * 6 / 10).getBytes()), false); files = Integer.parseInt(db.getProperty("rocksdb.num-files-at-level1")); assertThat(files).isBetween(3, 7); } } @Test public void compactRangeToLevelColumnFamily() throws RocksDBException { final int NUM_KEYS_PER_L0_FILE = 100; final int KEY_SIZE = 20; final int VALUE_SIZE = 300; final int L0_FILE_SIZE = NUM_KEYS_PER_L0_FILE * (KEY_SIZE + VALUE_SIZE); final int NUM_L0_FILES = 10; final int TEST_SCALE = 5; final int KEY_INTERVAL = 100; try (final DBOptions opt = new DBOptions(). setCreateIfMissing(true). setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions(). setCompactionStyle(CompactionStyle.LEVEL). setNumLevels(5). // a slightly bigger write buffer than L0 file // so that we can ensure manual flush always // go before background flush happens. setWriteBufferSize(L0_FILE_SIZE * 2). // Disable auto L0 -> L1 compaction setLevelZeroFileNumCompactionTrigger(20). setTargetFileSizeBase(L0_FILE_SIZE * 100). setTargetFileSizeMultiplier(1). // To disable auto compaction setMaxBytesForLevelBase(NUM_L0_FILES * L0_FILE_SIZE * 100). setMaxBytesForLevelMultiplier(2). setDisableAutoCompactions(true) ) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) ); final List columnFamilyHandles = new ArrayList<>(); // open database try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { // fill database with key/value pairs byte[] value = new byte[VALUE_SIZE]; int int_key = 0; for (int round = 0; round < 5; ++round) { int initial_key = int_key; for (int f = 1; f <= NUM_L0_FILES; ++f) { for (int i = 0; i < NUM_KEYS_PER_L0_FILE; ++i) { int_key += KEY_INTERVAL; rand.nextBytes(value); db.put(columnFamilyHandles.get(1), String.format("%020d", int_key).getBytes(), value); } db.flush(new FlushOptions().setWaitForFlush(true), columnFamilyHandles.get(1)); // Make sure we do create one more L0 files. assertThat( db.getProperty(columnFamilyHandles.get(1), "rocksdb.num-files-at-level0")). isEqualTo("" + f); } // Compact all L0 files we just created db.compactRange( columnFamilyHandles.get(1), String.format("%020d", initial_key).getBytes(), String.format("%020d", int_key - 1).getBytes()); // Making sure there isn't any L0 files. assertThat( db.getProperty(columnFamilyHandles.get(1), "rocksdb.num-files-at-level0")). isEqualTo("0"); // Making sure there are some L1 files. // Here we only use != 0 instead of a specific number // as we don't want the test make any assumption on // how compaction works. assertThat( db.getProperty(columnFamilyHandles.get(1), "rocksdb.num-files-at-level1")). isNotEqualTo("0"); // Because we only compacted those keys we issued // in this round, there shouldn't be any L1 -> L2 // compaction. So we expect zero L2 files here. assertThat( db.getProperty(columnFamilyHandles.get(1), "rocksdb.num-files-at-level2")). isEqualTo("0"); } } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void continueBackgroundWorkAfterCancelAllBackgroundWork() throws RocksDBException { final int KEY_SIZE = 20; final int VALUE_SIZE = 300; try (final DBOptions opt = new DBOptions(). setCreateIfMissing(true). setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() ) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) ); final List columnFamilyHandles = new ArrayList<>(); // open the database try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { db.cancelAllBackgroundWork(true); try { db.put(new byte[KEY_SIZE], new byte[VALUE_SIZE]); db.flush(new FlushOptions().setWaitForFlush(true)); fail("Expected RocksDBException to be thrown if we attempt to trigger a flush after" + " all background work is cancelled."); } catch (RocksDBException ignored) { } } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void cancelAllBackgroundWorkTwice() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()) ) { // Cancel all background work synchronously db.cancelAllBackgroundWork(true); // Cancel all background work asynchronously db.cancelAllBackgroundWork(false); } } @Test public void pauseContinueBackgroundWork() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()) ) { db.pauseBackgroundWork(); db.continueBackgroundWork(); db.pauseBackgroundWork(); db.continueBackgroundWork(); } } @Test public void enableDisableFileDeletions() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()) ) { db.disableFileDeletions(); db.enableFileDeletions(false); db.disableFileDeletions(); db.enableFileDeletions(true); } } @Test public void setOptions() throws RocksDBException { try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() .setWriteBufferSize(4096)) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); // open database final List columnFamilyHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { final MutableColumnFamilyOptions mutableOptions = MutableColumnFamilyOptions.builder() .setWriteBufferSize(2048) .build(); db.setOptions(columnFamilyHandles.get(1), mutableOptions); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void destroyDB() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.put("key1".getBytes(), "value".getBytes()); } assertThat(dbFolder.getRoot().exists() && dbFolder.getRoot().listFiles().length != 0) .isTrue(); RocksDB.destroyDB(dbPath, options); assertThat(dbFolder.getRoot().exists() && dbFolder.getRoot().listFiles().length != 0) .isFalse(); } } @Test(expected = RocksDBException.class) public void destroyDBFailIfOpen() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { // Fails as the db is open and locked. RocksDB.destroyDB(dbPath, options); } } } @Test public void getApproximateSizes() throws RocksDBException { final byte key1[] = "key1".getBytes(UTF_8); final byte key2[] = "key2".getBytes(UTF_8); final byte key3[] = "key3".getBytes(UTF_8); try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.put(key1, key1); db.put(key2, key2); db.put(key3, key3); final long[] sizes = db.getApproximateSizes( Arrays.asList( new Range(new Slice(key1), new Slice(key1)), new Range(new Slice(key2), new Slice(key3)) ), SizeApproximationFlag.INCLUDE_FILES, SizeApproximationFlag.INCLUDE_MEMTABLES); assertThat(sizes.length).isEqualTo(2); assertThat(sizes[0]).isEqualTo(0); assertThat(sizes[1]).isGreaterThanOrEqualTo(1); } } } @Test public void getApproximateMemTableStats() throws RocksDBException { final byte key1[] = "key1".getBytes(UTF_8); final byte key2[] = "key2".getBytes(UTF_8); final byte key3[] = "key3".getBytes(UTF_8); try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.put(key1, key1); db.put(key2, key2); db.put(key3, key3); final RocksDB.CountAndSize stats = db.getApproximateMemTableStats( new Range(new Slice(key1), new Slice(key3))); assertThat(stats).isNotNull(); assertThat(stats.count).isGreaterThan(1); assertThat(stats.size).isGreaterThan(1); } } } @Ignore("TODO(AR) re-enable when ready!") @Test public void compactFiles() throws RocksDBException { final int kTestKeySize = 16; final int kTestValueSize = 984; final int kEntrySize = kTestKeySize + kTestValueSize; final int kEntriesPerBuffer = 100; final int writeBufferSize = kEntrySize * kEntriesPerBuffer; final byte[] cfName = "pikachu".getBytes(UTF_8); try (final Options options = new Options() .setCreateIfMissing(true) .setWriteBufferSize(writeBufferSize) .setCompactionStyle(CompactionStyle.LEVEL) .setTargetFileSizeBase(writeBufferSize) .setMaxBytesForLevelBase(writeBufferSize * 2) .setLevel0StopWritesTrigger(2) .setMaxBytesForLevelMultiplier(2) .setCompressionType(CompressionType.NO_COMPRESSION) .setMaxSubcompactions(4)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath); final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(options)) { db.createColumnFamily(new ColumnFamilyDescriptor(cfName, cfOptions)).close(); } try (final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(options)) { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOptions), new ColumnFamilyDescriptor(cfName, cfOptions) ); final List cfHandles = new ArrayList<>(); try (final DBOptions dbOptions = new DBOptions(options); final RocksDB db = RocksDB.open(dbOptions, dbPath, cfDescriptors, cfHandles); ) { try (final FlushOptions flushOptions = new FlushOptions() .setWaitForFlush(true) .setAllowWriteStall(true); final CompactionOptions compactionOptions = new CompactionOptions()) { final Random rnd = new Random(301); for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { final byte[] value = new byte[kTestValueSize]; rnd.nextBytes(value); db.put(cfHandles.get(1), Integer.toString(key).getBytes(UTF_8), value); } db.flush(flushOptions, cfHandles); final RocksDB.LiveFiles liveFiles = db.getLiveFiles(); final List compactedFiles = db.compactFiles(compactionOptions, cfHandles.get(1), liveFiles.files, 1, -1, null); assertThat(compactedFiles).isNotEmpty(); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } } } @Test public void enableAutoCompaction() throws RocksDBException { try (final DBOptions options = new DBOptions() .setCreateIfMissing(true)) { final List cfDescs = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) ); final List cfHandles = new ArrayList<>(); final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { try { db.enableAutoCompaction(cfHandles); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } } @Test public void numberLevels() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db.numberLevels()).isEqualTo(7); } } } @Test public void maxMemCompactionLevel() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db.maxMemCompactionLevel()).isEqualTo(0); } } } @Test public void level0StopWriteTrigger() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db.level0StopWriteTrigger()).isEqualTo(36); } } } @Test public void getName() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db.getName()).isEqualTo(dbPath); } } } @Test public void getEnv() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db.getEnv()).isEqualTo(Env.getDefault()); } } } @Test public void flush() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath); final FlushOptions flushOptions = new FlushOptions()) { db.flush(flushOptions); } } } @Test public void flushWal() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.flushWal(true); } } } @Test public void syncWal() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.syncWal(); } } } @Test public void setPreserveDeletesSequenceNumber() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { assertThat(db.setPreserveDeletesSequenceNumber(db.getLatestSequenceNumber())) .isFalse(); } } } @Test public void getLiveFiles() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { final RocksDB.LiveFiles livefiles = db.getLiveFiles(true); assertThat(livefiles).isNotNull(); assertThat(livefiles.manifestFileSize).isEqualTo(13); assertThat(livefiles.files.size()).isEqualTo(3); assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT"); assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000001"); assertThat(livefiles.files.get(2)).isEqualTo("/OPTIONS-000005"); } } } @Test public void getSortedWalFiles() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); final List logFiles = db.getSortedWalFiles(); assertThat(logFiles).isNotNull(); assertThat(logFiles.size()).isEqualTo(1); assertThat(logFiles.get(0).type()) .isEqualTo(WalFileType.kAliveLogFile); } } } @Test public void deleteFile() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.deleteFile("unknown"); } } } @Test public void getLiveFilesMetaData() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); final List liveFilesMetaData = db.getLiveFilesMetaData(); assertThat(liveFilesMetaData).isEmpty(); } } } @Test public void getColumnFamilyMetaData() throws RocksDBException { try (final DBOptions options = new DBOptions() .setCreateIfMissing(true)) { final List cfDescs = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) ); final List cfHandles = new ArrayList<>(); final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); try { final ColumnFamilyMetaData cfMetadata = db.getColumnFamilyMetaData(cfHandles.get(0)); assertThat(cfMetadata).isNotNull(); assertThat(cfMetadata.name()).isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); assertThat(cfMetadata.levels().size()).isEqualTo(7); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } } @Test public void verifyChecksum() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.verifyChecksum(); } } } @Test public void getPropertiesOfAllTables() throws RocksDBException { try (final DBOptions options = new DBOptions() .setCreateIfMissing(true)) { final List cfDescs = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) ); final List cfHandles = new ArrayList<>(); final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); try { final Map properties = db.getPropertiesOfAllTables(cfHandles.get(0)); assertThat(properties).isNotNull(); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } } @Test public void getPropertiesOfTablesInRange() throws RocksDBException { try (final DBOptions options = new DBOptions() .setCreateIfMissing(true)) { final List cfDescs = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) ); final List cfHandles = new ArrayList<>(); final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); db.put(cfHandles.get(0), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); db.put(cfHandles.get(0), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); try { final Range range = new Range( new Slice("key1".getBytes(UTF_8)), new Slice("key3".getBytes(UTF_8))); final Map properties = db.getPropertiesOfTablesInRange( cfHandles.get(0), Arrays.asList(range)); assertThat(properties).isNotNull(); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } } @Test public void suggestCompactRange() throws RocksDBException { try (final DBOptions options = new DBOptions() .setCreateIfMissing(true)) { final List cfDescs = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY) ); final List cfHandles = new ArrayList<>(); final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); db.put(cfHandles.get(0), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); db.put(cfHandles.get(0), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); try { final Range range = db.suggestCompactRange(cfHandles.get(0)); assertThat(range).isNotNull(); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } } } } } @Test public void promoteL0() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { db.promoteL0(2); } } } @Test public void startTrace() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { final String dbPath = dbFolder.getRoot().getAbsolutePath(); try (final RocksDB db = RocksDB.open(options, dbPath)) { final TraceOptions traceOptions = new TraceOptions(); try (final InMemoryTraceWriter traceWriter = new InMemoryTraceWriter()) { db.startTrace(traceOptions, traceWriter); db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); db.endTrace(); final List writes = traceWriter.getWrites(); assertThat(writes.size()).isGreaterThan(0); } } } } @Test public void setDBOptions() throws RocksDBException { try (final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() .setWriteBufferSize(4096)) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts)); // open database final List columnFamilyHandles = new ArrayList<>(); try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { final MutableDBOptions mutableOptions = MutableDBOptions.builder() .setBytesPerSync(1024 * 1027 * 7) .setAvoidFlushDuringShutdown(false) .build(); db.setDBOptions(mutableOptions); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } private static class InMemoryTraceWriter extends AbstractTraceWriter { private final List writes = new ArrayList<>(); private volatile boolean closed = false; @Override public void write(final Slice slice) { if (closed) { return; } final byte[] data = slice.data(); final byte[] dataCopy = new byte[data.length]; System.arraycopy(data, 0, dataCopy, 0, data.length); writes.add(dataCopy); } @Override public void closeWriter() { closed = true; } @Override public long getFileSize() { long size = 0; for (int i = 0; i < writes.size(); i++) { size += writes.get(i).length; } return size; } public List getWrites() { return writes; } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/RocksIteratorTest.java000066400000000000000000000203401370372246700253700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; import java.nio.ByteBuffer; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; public class RocksIteratorTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void rocksIterator() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key1".getBytes(), "value1".getBytes()); db.put("key2".getBytes(), "value2".getBytes()); try (final RocksIterator iterator = db.newIterator()) { iterator.seekToFirst(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); assertThat(iterator.value()).isEqualTo("value1".getBytes()); ByteBuffer key = ByteBuffer.allocateDirect(2); ByteBuffer value = ByteBuffer.allocateDirect(2); assertThat(iterator.key(key)).isEqualTo(4); assertThat(iterator.value(value)).isEqualTo(6); assertThat(key.position()).isEqualTo(0); assertThat(key.limit()).isEqualTo(2); assertThat(value.position()).isEqualTo(0); assertThat(value.limit()).isEqualTo(2); byte[] tmp = new byte[2]; key.get(tmp); assertThat(tmp).isEqualTo("ke".getBytes()); value.get(tmp); assertThat(tmp).isEqualTo("va".getBytes()); key = ByteBuffer.allocateDirect(12); value = ByteBuffer.allocateDirect(12); assertThat(iterator.key(key)).isEqualTo(4); assertThat(iterator.value(value)).isEqualTo(6); assertThat(key.position()).isEqualTo(0); assertThat(key.limit()).isEqualTo(4); assertThat(value.position()).isEqualTo(0); assertThat(value.limit()).isEqualTo(6); tmp = new byte[4]; key.get(tmp); assertThat(tmp).isEqualTo("key1".getBytes()); tmp = new byte[6]; value.get(tmp); assertThat(tmp).isEqualTo("value1".getBytes()); iterator.next(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); assertThat(iterator.value()).isEqualTo("value2".getBytes()); iterator.next(); assertThat(iterator.isValid()).isFalse(); iterator.seekToLast(); iterator.prev(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); assertThat(iterator.value()).isEqualTo("value1".getBytes()); iterator.seekToFirst(); iterator.seekToLast(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); assertThat(iterator.value()).isEqualTo("value2".getBytes()); iterator.status(); key.clear(); key.put("key1".getBytes()); key.flip(); iterator.seek(key); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.value()).isEqualTo("value1".getBytes()); assertThat(key.position()).isEqualTo(4); assertThat(key.limit()).isEqualTo(4); key.clear(); key.put("key2".getBytes()); key.flip(); iterator.seekForPrev(key); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.value()).isEqualTo("value2".getBytes()); assertThat(key.position()).isEqualTo(4); assertThat(key.limit()).isEqualTo(4); } try (final RocksIterator iterator = db.newIterator()) { iterator.seek("key0".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); iterator.seek("key1".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); iterator.seek("key1.5".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); iterator.seek("key2".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); iterator.seek("key3".getBytes()); assertThat(iterator.isValid()).isFalse(); } try (final RocksIterator iterator = db.newIterator()) { iterator.seekForPrev("key0".getBytes()); assertThat(iterator.isValid()).isFalse(); iterator.seekForPrev("key1".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); iterator.seekForPrev("key1.5".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); iterator.seekForPrev("key2".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); iterator.seekForPrev("key3".getBytes()); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); } try (final RocksIterator iterator = db.newIterator()) { iterator.seekToFirst(); assertThat(iterator.isValid()).isTrue(); byte[] lastKey; do { lastKey = iterator.key(); iterator.next(); } while (iterator.isValid()); db.put("key3".getBytes(), "value3".getBytes()); assertThat(iterator.isValid()).isFalse(); iterator.refresh(); iterator.seek(lastKey); assertThat(iterator.isValid()).isTrue(); iterator.next(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key3".getBytes()); } } } @Test public void rocksIteratorReleaseAfterCfClose() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, this.dbFolder.getRoot().getAbsolutePath())) { db.put("key".getBytes(), "value".getBytes()); // Test case: release iterator after default CF close try (final RocksIterator iterator = db.newIterator()) { // In fact, calling close() on default CF has no effect db.getDefaultColumnFamily().close(); iterator.seekToFirst(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key".getBytes()); assertThat(iterator.value()).isEqualTo("value".getBytes()); } // Test case: release iterator after custom CF close ColumnFamilyDescriptor cfd1 = new ColumnFamilyDescriptor("cf1".getBytes()); ColumnFamilyHandle cfHandle1 = db.createColumnFamily(cfd1); db.put(cfHandle1, "key1".getBytes(), "value1".getBytes()); try (final RocksIterator iterator = db.newIterator(cfHandle1)) { cfHandle1.close(); iterator.seekToFirst(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); assertThat(iterator.value()).isEqualTo("value1".getBytes()); } // Test case: release iterator after custom CF drop & close ColumnFamilyDescriptor cfd2 = new ColumnFamilyDescriptor("cf2".getBytes()); ColumnFamilyHandle cfHandle2 = db.createColumnFamily(cfd2); db.put(cfHandle2, "key2".getBytes(), "value2".getBytes()); try (final RocksIterator iterator = db.newIterator(cfHandle2)) { db.dropColumnFamily(cfHandle2); cfHandle2.close(); iterator.seekToFirst(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); assertThat(iterator.value()).isEqualTo("value2".getBytes()); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/RocksMemEnvTest.java000066400000000000000000000105571370372246700247770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class RocksMemEnvTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void memEnvFillAndReopen() throws RocksDBException { final byte[][] keys = { "aaa".getBytes(), "bbb".getBytes(), "ccc".getBytes() }; final byte[][] values = { "foo".getBytes(), "bar".getBytes(), "baz".getBytes() }; try (final Env env = new RocksMemEnv(Env.getDefault()); final Options options = new Options() .setCreateIfMissing(true) .setEnv(env); final FlushOptions flushOptions = new FlushOptions() .setWaitForFlush(true); ) { try (final RocksDB db = RocksDB.open(options, "dir/db")) { // write key/value pairs using MemEnv for (int i = 0; i < keys.length; i++) { db.put(keys[i], values[i]); } // read key/value pairs using MemEnv for (int i = 0; i < keys.length; i++) { assertThat(db.get(keys[i])).isEqualTo(values[i]); } // Check iterator access try (final RocksIterator iterator = db.newIterator()) { iterator.seekToFirst(); for (int i = 0; i < keys.length; i++) { assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo(keys[i]); assertThat(iterator.value()).isEqualTo(values[i]); iterator.next(); } // reached end of database assertThat(iterator.isValid()).isFalse(); } // flush db.flush(flushOptions); // read key/value pairs after flush using MemEnv for (int i = 0; i < keys.length; i++) { assertThat(db.get(keys[i])).isEqualTo(values[i]); } } options.setCreateIfMissing(false); // After reopen the values shall still be in the mem env. // as long as the env is not freed. try (final RocksDB db = RocksDB.open(options, "dir/db")) { // read key/value pairs using MemEnv for (int i = 0; i < keys.length; i++) { assertThat(db.get(keys[i])).isEqualTo(values[i]); } } } } @Test public void multipleDatabaseInstances() throws RocksDBException { // db - keys final byte[][] keys = { "aaa".getBytes(), "bbb".getBytes(), "ccc".getBytes() }; // otherDb - keys final byte[][] otherKeys = { "111".getBytes(), "222".getBytes(), "333".getBytes() }; // values final byte[][] values = { "foo".getBytes(), "bar".getBytes(), "baz".getBytes() }; try (final Env env = new RocksMemEnv(Env.getDefault()); final Options options = new Options() .setCreateIfMissing(true) .setEnv(env); final RocksDB db = RocksDB.open(options, "dir/db"); final RocksDB otherDb = RocksDB.open(options, "dir/otherDb") ) { // write key/value pairs using MemEnv // to db and to otherDb. for (int i = 0; i < keys.length; i++) { db.put(keys[i], values[i]); otherDb.put(otherKeys[i], values[i]); } // verify key/value pairs after flush using MemEnv for (int i = 0; i < keys.length; i++) { // verify db assertThat(db.get(otherKeys[i])).isNull(); assertThat(db.get(keys[i])).isEqualTo(values[i]); // verify otherDb assertThat(otherDb.get(keys[i])).isNull(); assertThat(otherDb.get(otherKeys[i])).isEqualTo(values[i]); } } } @Test(expected = RocksDBException.class) public void createIfMissingFalse() throws RocksDBException { try (final Env env = new RocksMemEnv(Env.getDefault()); final Options options = new Options() .setCreateIfMissing(false) .setEnv(env); final RocksDB db = RocksDB.open(options, "db/dir")) { // shall throw an exception because db dir does not // exist. } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java000066400000000000000000000010071370372246700272210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.rules.ExternalResource; /** * Resource to load the RocksDB JNI library. */ public class RocksNativeLibraryResource extends ExternalResource { @Override protected void before() { RocksDB.loadLibrary(); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/SliceTest.java000066400000000000000000000045631370372246700236450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class SliceTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void slice() { try (final Slice slice = new Slice("testSlice")) { assertThat(slice.empty()).isFalse(); assertThat(slice.size()).isEqualTo(9); assertThat(slice.data()).isEqualTo("testSlice".getBytes()); } try (final Slice otherSlice = new Slice("otherSlice".getBytes())) { assertThat(otherSlice.data()).isEqualTo("otherSlice".getBytes()); } try (final Slice thirdSlice = new Slice("otherSlice".getBytes(), 5)) { assertThat(thirdSlice.data()).isEqualTo("Slice".getBytes()); } } @Test public void sliceClear() { try (final Slice slice = new Slice("abc")) { assertThat(slice.toString()).isEqualTo("abc"); slice.clear(); assertThat(slice.toString()).isEmpty(); slice.clear(); // make sure we don't double-free } } @Test public void sliceRemovePrefix() { try (final Slice slice = new Slice("abc")) { assertThat(slice.toString()).isEqualTo("abc"); slice.removePrefix(1); assertThat(slice.toString()).isEqualTo("bc"); } } @Test public void sliceEquals() { try (final Slice slice = new Slice("abc"); final Slice slice2 = new Slice("abc")) { assertThat(slice.equals(slice2)).isTrue(); assertThat(slice.hashCode() == slice2.hashCode()).isTrue(); } } @Test public void sliceStartWith() { try (final Slice slice = new Slice("matchpoint"); final Slice match = new Slice("mat"); final Slice noMatch = new Slice("nomatch")) { assertThat(slice.startsWith(match)).isTrue(); assertThat(slice.startsWith(noMatch)).isFalse(); } } @Test public void sliceToString() { try (final Slice slice = new Slice("stringTest")) { assertThat(slice.toString()).isEqualTo("stringTest"); assertThat(slice.toString(true)).isNotEqualTo(""); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/SnapshotTest.java000066400000000000000000000150331370372246700243770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.assertj.core.api.Assertions.assertThat; public class SnapshotTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void snapshots() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key".getBytes(), "value".getBytes()); // Get new Snapshot of database try (final Snapshot snapshot = db.getSnapshot()) { assertThat(snapshot.getSequenceNumber()).isGreaterThan(0); assertThat(snapshot.getSequenceNumber()).isEqualTo(1); try (final ReadOptions readOptions = new ReadOptions()) { // set snapshot in ReadOptions readOptions.setSnapshot(snapshot); // retrieve key value pair assertThat(new String(db.get("key".getBytes()))). isEqualTo("value"); // retrieve key value pair created before // the snapshot was made assertThat(new String(db.get(readOptions, "key".getBytes()))).isEqualTo("value"); // add new key/value pair db.put("newkey".getBytes(), "newvalue".getBytes()); // using no snapshot the latest db entries // will be taken into account assertThat(new String(db.get("newkey".getBytes()))). isEqualTo("newvalue"); // snapshopot was created before newkey assertThat(db.get(readOptions, "newkey".getBytes())). isNull(); // Retrieve snapshot from read options try (final Snapshot sameSnapshot = readOptions.snapshot()) { readOptions.setSnapshot(sameSnapshot); // results must be the same with new Snapshot // instance using the same native pointer assertThat(new String(db.get(readOptions, "key".getBytes()))).isEqualTo("value"); // update key value pair to newvalue db.put("key".getBytes(), "newvalue".getBytes()); // read with previously created snapshot will // read previous version of key value pair assertThat(new String(db.get(readOptions, "key".getBytes()))).isEqualTo("value"); // read for newkey using the snapshot must be // null assertThat(db.get(readOptions, "newkey".getBytes())). isNull(); // setting null to snapshot in ReadOptions leads // to no Snapshot being used. readOptions.setSnapshot(null); assertThat(new String(db.get(readOptions, "newkey".getBytes()))).isEqualTo("newvalue"); // release Snapshot db.releaseSnapshot(snapshot); } } } } } @Test public void iteratorWithSnapshot() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key".getBytes(), "value".getBytes()); // Get new Snapshot of database // set snapshot in ReadOptions try (final Snapshot snapshot = db.getSnapshot(); final ReadOptions readOptions = new ReadOptions().setSnapshot(snapshot)) { db.put("key2".getBytes(), "value2".getBytes()); // iterate over current state of db try (final RocksIterator iterator = db.newIterator()) { iterator.seekToFirst(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key".getBytes()); iterator.next(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); iterator.next(); assertThat(iterator.isValid()).isFalse(); } // iterate using a snapshot try (final RocksIterator snapshotIterator = db.newIterator(readOptions)) { snapshotIterator.seekToFirst(); assertThat(snapshotIterator.isValid()).isTrue(); assertThat(snapshotIterator.key()).isEqualTo("key".getBytes()); snapshotIterator.next(); assertThat(snapshotIterator.isValid()).isFalse(); } // release Snapshot db.releaseSnapshot(snapshot); } } } @Test public void iteratorWithSnapshotOnColumnFamily() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key".getBytes(), "value".getBytes()); // Get new Snapshot of database // set snapshot in ReadOptions try (final Snapshot snapshot = db.getSnapshot(); final ReadOptions readOptions = new ReadOptions() .setSnapshot(snapshot)) { db.put("key2".getBytes(), "value2".getBytes()); // iterate over current state of column family try (final RocksIterator iterator = db.newIterator( db.getDefaultColumnFamily())) { iterator.seekToFirst(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key".getBytes()); iterator.next(); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key2".getBytes()); iterator.next(); assertThat(iterator.isValid()).isFalse(); } // iterate using a snapshot on default column family try (final RocksIterator snapshotIterator = db.newIterator( db.getDefaultColumnFamily(), readOptions)) { snapshotIterator.seekToFirst(); assertThat(snapshotIterator.isValid()).isTrue(); assertThat(snapshotIterator.key()).isEqualTo("key".getBytes()); snapshotIterator.next(); assertThat(snapshotIterator.isValid()).isFalse(); // release Snapshot db.releaseSnapshot(snapshot); } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/SstFileManagerTest.java000066400000000000000000000047621370372246700254530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import java.util.Collections; import static org.assertj.core.api.Assertions.*; public class SstFileManagerTest { @Test public void maxAllowedSpaceUsage() throws RocksDBException { try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { sstFileManager.setMaxAllowedSpaceUsage(1024 * 1024 * 64); assertThat(sstFileManager.isMaxAllowedSpaceReached()).isFalse(); assertThat(sstFileManager.isMaxAllowedSpaceReachedIncludingCompactions()).isFalse(); } } @Test public void compactionBufferSize() throws RocksDBException { try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { sstFileManager.setCompactionBufferSize(1024 * 1024 * 10); assertThat(sstFileManager.isMaxAllowedSpaceReachedIncludingCompactions()).isFalse(); } } @Test public void totalSize() throws RocksDBException { try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { assertThat(sstFileManager.getTotalSize()).isEqualTo(0); } } @Test public void trackedFiles() throws RocksDBException { try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { assertThat(sstFileManager.getTrackedFiles()).isEqualTo(Collections.emptyMap()); } } @Test public void deleteRateBytesPerSecond() throws RocksDBException { try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { assertThat(sstFileManager.getDeleteRateBytesPerSecond()).isEqualTo(SstFileManager.RATE_BYTES_PER_SEC_DEFAULT); final long ratePerSecond = 1024 * 1024 * 52; sstFileManager.setDeleteRateBytesPerSecond(ratePerSecond); assertThat(sstFileManager.getDeleteRateBytesPerSecond()).isEqualTo(ratePerSecond); } } @Test public void maxTrashDBRatio() throws RocksDBException { try (final SstFileManager sstFileManager = new SstFileManager(Env.getDefault())) { assertThat(sstFileManager.getMaxTrashDBRatio()).isEqualTo(SstFileManager.MAX_TRASH_DB_RATION_DEFAULT); final double trashRatio = 0.2; sstFileManager.setMaxTrashDBRatio(trashRatio); assertThat(sstFileManager.getMaxTrashDBRatio()).isEqualTo(trashRatio); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/SstFileReaderTest.java000066400000000000000000000117741370372246700253040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.rocksdb.util.BytewiseComparator; public class SstFileReaderTest { private static final String SST_FILE_NAME = "test.sst"; class KeyValueWithOp { KeyValueWithOp(String key, String value, OpType opType) { this.key = key; this.value = value; this.opType = opType; } String getKey() { return key; } String getValue() { return value; } OpType getOpType() { return opType; } private String key; private String value; private OpType opType; } @Rule public TemporaryFolder parentFolder = new TemporaryFolder(); enum OpType { PUT, PUT_BYTES, MERGE, MERGE_BYTES, DELETE, DELETE_BYTES } private File newSstFile(final List keyValues) throws IOException, RocksDBException { final EnvOptions envOptions = new EnvOptions(); final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options options = new Options().setMergeOperator(stringAppendOperator); SstFileWriter sstFileWriter; sstFileWriter = new SstFileWriter(envOptions, options); final File sstFile = parentFolder.newFile(SST_FILE_NAME); try { sstFileWriter.open(sstFile.getAbsolutePath()); for (KeyValueWithOp keyValue : keyValues) { Slice keySlice = new Slice(keyValue.getKey()); Slice valueSlice = new Slice(keyValue.getValue()); byte[] keyBytes = keyValue.getKey().getBytes(); byte[] valueBytes = keyValue.getValue().getBytes(); switch (keyValue.getOpType()) { case PUT: sstFileWriter.put(keySlice, valueSlice); break; case PUT_BYTES: sstFileWriter.put(keyBytes, valueBytes); break; case MERGE: sstFileWriter.merge(keySlice, valueSlice); break; case MERGE_BYTES: sstFileWriter.merge(keyBytes, valueBytes); break; case DELETE: sstFileWriter.delete(keySlice); break; case DELETE_BYTES: sstFileWriter.delete(keyBytes); break; default: fail("Unsupported op type"); } keySlice.close(); valueSlice.close(); } sstFileWriter.finish(); } finally { assertThat(sstFileWriter).isNotNull(); sstFileWriter.close(); options.close(); envOptions.close(); } return sstFile; } @Test public void readSstFile() throws RocksDBException, IOException { final List keyValues = new ArrayList<>(); keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); final File sstFile = newSstFile(keyValues); try (final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options options = new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator); final SstFileReader reader = new SstFileReader(options)) { // Open the sst file and iterator reader.open(sstFile.getAbsolutePath()); final ReadOptions readOptions = new ReadOptions(); final SstFileReaderIterator iterator = reader.newIterator(readOptions); // Use the iterator to read sst file iterator.seekToFirst(); // Verify Checksum reader.verifyChecksum(); // Verify Table Properties assertEquals(reader.getTableProperties().getNumEntries(), 1); // Check key and value assertThat(iterator.key()).isEqualTo("key1".getBytes()); assertThat(iterator.value()).isEqualTo("value1".getBytes()); ByteBuffer direct = ByteBuffer.allocateDirect(128); direct.put("key1".getBytes()).flip(); iterator.seek(direct); assertThat(direct.position()).isEqualTo(4); assertThat(direct.limit()).isEqualTo(4); assertThat(iterator.isValid()).isTrue(); assertThat(iterator.key()).isEqualTo("key1".getBytes()); assertThat(iterator.value()).isEqualTo("value1".getBytes()); direct.clear(); assertThat(iterator.key(direct)).isEqualTo("key1".getBytes().length); byte[] dst = new byte["key1".getBytes().length]; direct.get(dst); assertThat(new String(dst)).isEqualTo("key1"); direct.clear(); assertThat(iterator.value(direct)).isEqualTo("value1".getBytes().length); dst = new byte["value1".getBytes().length]; direct.get(dst); assertThat(new String(dst)).isEqualTo("value1"); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/SstFileWriterTest.java000066400000000000000000000223341370372246700253500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.rocksdb.util.BytewiseComparator; public class SstFileWriterTest { private static final String SST_FILE_NAME = "test.sst"; private static final String DB_DIRECTORY_NAME = "test_db"; @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder parentFolder = new TemporaryFolder(); enum OpType { PUT, PUT_BYTES, PUT_DIRECT, MERGE, MERGE_BYTES, DELETE, DELETE_BYTES } class KeyValueWithOp { KeyValueWithOp(String key, String value, OpType opType) { this.key = key; this.value = value; this.opType = opType; } String getKey() { return key; } String getValue() { return value; } OpType getOpType() { return opType; } private String key; private String value; private OpType opType; }; private File newSstFile(final List keyValues, boolean useJavaBytewiseComparator) throws IOException, RocksDBException { final EnvOptions envOptions = new EnvOptions(); final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options options = new Options().setMergeOperator(stringAppendOperator); SstFileWriter sstFileWriter = null; ComparatorOptions comparatorOptions = null; BytewiseComparator comparator = null; if (useJavaBytewiseComparator) { comparatorOptions = new ComparatorOptions().setUseDirectBuffer(false); comparator = new BytewiseComparator(comparatorOptions); options.setComparator(comparator); sstFileWriter = new SstFileWriter(envOptions, options); } else { sstFileWriter = new SstFileWriter(envOptions, options); } final File sstFile = parentFolder.newFile(SST_FILE_NAME); try { sstFileWriter.open(sstFile.getAbsolutePath()); assertThat(sstFileWriter.fileSize()).isEqualTo(0); for (KeyValueWithOp keyValue : keyValues) { Slice keySlice = new Slice(keyValue.getKey()); Slice valueSlice = new Slice(keyValue.getValue()); byte[] keyBytes = keyValue.getKey().getBytes(); byte[] valueBytes = keyValue.getValue().getBytes(); ByteBuffer keyDirect = ByteBuffer.allocateDirect(keyBytes.length); keyDirect.put(keyBytes); keyDirect.flip(); ByteBuffer valueDirect = ByteBuffer.allocateDirect(valueBytes.length); valueDirect.put(valueBytes); valueDirect.flip(); switch (keyValue.getOpType()) { case PUT: sstFileWriter.put(keySlice, valueSlice); break; case PUT_BYTES: sstFileWriter.put(keyBytes, valueBytes); break; case PUT_DIRECT: sstFileWriter.put(keyDirect, valueDirect); assertThat(keyDirect.position()).isEqualTo(keyBytes.length); assertThat(keyDirect.limit()).isEqualTo(keyBytes.length); assertThat(valueDirect.position()).isEqualTo(valueBytes.length); assertThat(valueDirect.limit()).isEqualTo(valueBytes.length); break; case MERGE: sstFileWriter.merge(keySlice, valueSlice); break; case MERGE_BYTES: sstFileWriter.merge(keyBytes, valueBytes); break; case DELETE: sstFileWriter.delete(keySlice); break; case DELETE_BYTES: sstFileWriter.delete(keyBytes); break; default: fail("Unsupported op type"); } keySlice.close(); valueSlice.close(); } sstFileWriter.finish(); assertThat(sstFileWriter.fileSize()).isGreaterThan(100); } finally { assertThat(sstFileWriter).isNotNull(); sstFileWriter.close(); options.close(); envOptions.close(); if (comparatorOptions != null) { comparatorOptions.close(); } if (comparator != null) { comparator.close(); } } return sstFile; } @Test public void generateSstFileWithJavaComparator() throws RocksDBException, IOException { final List keyValues = new ArrayList<>(); keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT)); keyValues.add(new KeyValueWithOp("key3", "value3", OpType.MERGE)); keyValues.add(new KeyValueWithOp("key4", "value4", OpType.MERGE)); keyValues.add(new KeyValueWithOp("key5", "", OpType.DELETE)); newSstFile(keyValues, true); } @Test public void generateSstFileWithNativeComparator() throws RocksDBException, IOException { final List keyValues = new ArrayList<>(); keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT)); keyValues.add(new KeyValueWithOp("key3", "value3", OpType.MERGE)); keyValues.add(new KeyValueWithOp("key4", "value4", OpType.MERGE)); keyValues.add(new KeyValueWithOp("key5", "", OpType.DELETE)); newSstFile(keyValues, false); } @Test public void ingestSstFile() throws RocksDBException, IOException { final List keyValues = new ArrayList<>(); keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT_DIRECT)); keyValues.add(new KeyValueWithOp("key3", "value3", OpType.PUT_BYTES)); keyValues.add(new KeyValueWithOp("key4", "value4", OpType.MERGE)); keyValues.add(new KeyValueWithOp("key5", "value5", OpType.MERGE_BYTES)); keyValues.add(new KeyValueWithOp("key6", "", OpType.DELETE)); keyValues.add(new KeyValueWithOp("key7", "", OpType.DELETE)); final File sstFile = newSstFile(keyValues, false); final File dbFolder = parentFolder.newFolder(DB_DIRECTORY_NAME); try(final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options options = new Options() .setCreateIfMissing(true) .setMergeOperator(stringAppendOperator); final RocksDB db = RocksDB.open(options, dbFolder.getAbsolutePath()); final IngestExternalFileOptions ingestExternalFileOptions = new IngestExternalFileOptions()) { db.ingestExternalFile(Arrays.asList(sstFile.getAbsolutePath()), ingestExternalFileOptions); assertThat(db.get("key1".getBytes())).isEqualTo("value1".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo("value2".getBytes()); assertThat(db.get("key3".getBytes())).isEqualTo("value3".getBytes()); assertThat(db.get("key4".getBytes())).isEqualTo("value4".getBytes()); assertThat(db.get("key5".getBytes())).isEqualTo("value5".getBytes()); assertThat(db.get("key6".getBytes())).isEqualTo(null); assertThat(db.get("key7".getBytes())).isEqualTo(null); } } @Test public void ingestSstFile_cf() throws RocksDBException, IOException { final List keyValues = new ArrayList<>(); keyValues.add(new KeyValueWithOp("key1", "value1", OpType.PUT)); keyValues.add(new KeyValueWithOp("key2", "value2", OpType.PUT)); keyValues.add(new KeyValueWithOp("key3", "value3", OpType.MERGE)); keyValues.add(new KeyValueWithOp("key4", "", OpType.DELETE)); final File sstFile = newSstFile(keyValues, false); final File dbFolder = parentFolder.newFolder(DB_DIRECTORY_NAME); try(final StringAppendOperator stringAppendOperator = new StringAppendOperator(); final Options options = new Options() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true) .setMergeOperator(stringAppendOperator); final RocksDB db = RocksDB.open(options, dbFolder.getAbsolutePath()); final IngestExternalFileOptions ingestExternalFileOptions = new IngestExternalFileOptions()) { try(final ColumnFamilyOptions cf_opts = new ColumnFamilyOptions() .setMergeOperator(stringAppendOperator); final ColumnFamilyHandle cf_handle = db.createColumnFamily( new ColumnFamilyDescriptor("new_cf".getBytes(), cf_opts))) { db.ingestExternalFile(cf_handle, Arrays.asList(sstFile.getAbsolutePath()), ingestExternalFileOptions); assertThat(db.get(cf_handle, "key1".getBytes())).isEqualTo("value1".getBytes()); assertThat(db.get(cf_handle, "key2".getBytes())).isEqualTo("value2".getBytes()); assertThat(db.get(cf_handle, "key3".getBytes())).isEqualTo("value3".getBytes()); assertThat(db.get(cf_handle, "key4".getBytes())).isEqualTo(null); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java000066400000000000000000000032751370372246700266060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Collections; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.assertj.core.api.Assertions.assertThat; public class StatisticsCollectorTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void statisticsCollector() throws InterruptedException, RocksDBException { try (final Statistics statistics = new Statistics(); final Options opt = new Options() .setStatistics(statistics) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { try(final Statistics stats = opt.statistics()) { final StatsCallbackMock callback = new StatsCallbackMock(); final StatsCollectorInput statsInput = new StatsCollectorInput(stats, callback); final StatisticsCollector statsCollector = new StatisticsCollector( Collections.singletonList(statsInput), 100); statsCollector.start(); Thread.sleep(1000); assertThat(callback.tickerCallbackCount).isGreaterThan(0); assertThat(callback.histCallbackCount).isGreaterThan(0); statsCollector.shutDown(1000); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/StatisticsTest.java000066400000000000000000000131751370372246700247370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.nio.charset.StandardCharsets; import static org.assertj.core.api.Assertions.assertThat; public class StatisticsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void statsLevel() throws RocksDBException { final Statistics statistics = new Statistics(); statistics.setStatsLevel(StatsLevel.ALL); assertThat(statistics.statsLevel()).isEqualTo(StatsLevel.ALL); } @Test public void getTickerCount() throws RocksDBException { try (final Statistics statistics = new Statistics(); final Options opt = new Options() .setStatistics(statistics) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); db.put(key, value); for(int i = 0; i < 10; i++) { db.get(key); } assertThat(statistics.getTickerCount(TickerType.BYTES_READ)).isGreaterThan(0); } } @Test public void getAndResetTickerCount() throws RocksDBException { try (final Statistics statistics = new Statistics(); final Options opt = new Options() .setStatistics(statistics) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); db.put(key, value); for(int i = 0; i < 10; i++) { db.get(key); } final long read = statistics.getAndResetTickerCount(TickerType.BYTES_READ); assertThat(read).isGreaterThan(0); final long readAfterReset = statistics.getTickerCount(TickerType.BYTES_READ); assertThat(readAfterReset).isLessThan(read); } } @Test public void getHistogramData() throws RocksDBException { try (final Statistics statistics = new Statistics(); final Options opt = new Options() .setStatistics(statistics) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); db.put(key, value); for(int i = 0; i < 10; i++) { db.get(key); } final HistogramData histogramData = statistics.getHistogramData(HistogramType.BYTES_PER_READ); assertThat(histogramData).isNotNull(); assertThat(histogramData.getAverage()).isGreaterThan(0); assertThat(histogramData.getMedian()).isGreaterThan(0); assertThat(histogramData.getPercentile95()).isGreaterThan(0); assertThat(histogramData.getPercentile99()).isGreaterThan(0); assertThat(histogramData.getStandardDeviation()).isEqualTo(0.00); assertThat(histogramData.getMax()).isGreaterThan(0); assertThat(histogramData.getCount()).isGreaterThan(0); assertThat(histogramData.getSum()).isGreaterThan(0); assertThat(histogramData.getMin()).isGreaterThan(0); } } @Test public void getHistogramString() throws RocksDBException { try (final Statistics statistics = new Statistics(); final Options opt = new Options() .setStatistics(statistics) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); for(int i = 0; i < 10; i++) { db.put(key, value); } assertThat(statistics.getHistogramString(HistogramType.BYTES_PER_WRITE)).isNotNull(); } } @Test public void reset() throws RocksDBException { try (final Statistics statistics = new Statistics(); final Options opt = new Options() .setStatistics(statistics) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); db.put(key, value); for(int i = 0; i < 10; i++) { db.get(key); } final long read = statistics.getTickerCount(TickerType.BYTES_READ); assertThat(read).isGreaterThan(0); statistics.reset(); final long readAfterReset = statistics.getTickerCount(TickerType.BYTES_READ); assertThat(readAfterReset).isLessThan(read); } } @Test public void ToString() throws RocksDBException { try (final Statistics statistics = new Statistics(); final Options opt = new Options() .setStatistics(statistics) .setCreateIfMissing(true); final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { assertThat(statistics.toString()).isNotNull(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/StatsCallbackMock.java000066400000000000000000000012161370372246700252630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; public class StatsCallbackMock implements StatisticsCollectorCallback { public int tickerCallbackCount = 0; public int histCallbackCount = 0; public void tickerCallback(TickerType tickerType, long tickerCount) { tickerCallbackCount++; } public void histogramCallback(HistogramType histType, HistogramData histData) { histCallbackCount++; } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TableFilterTest.java000066400000000000000000000073421370372246700250010ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; public class TableFilterTest { @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void readOptions() throws RocksDBException { try (final DBOptions opt = new DBOptions(). setCreateIfMissing(true). setCreateMissingColumnFamilies(true); final ColumnFamilyOptions new_cf_opts = new ColumnFamilyOptions() ) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new_cf_opts) ); final List columnFamilyHandles = new ArrayList<>(); // open database try (final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try (final CfNameCollectionTableFilter cfNameCollectingTableFilter = new CfNameCollectionTableFilter(); final FlushOptions flushOptions = new FlushOptions().setWaitForFlush(true); final ReadOptions readOptions = new ReadOptions().setTableFilter(cfNameCollectingTableFilter)) { db.put(columnFamilyHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); db.put(columnFamilyHandles.get(0), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); db.put(columnFamilyHandles.get(0), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); db.put(columnFamilyHandles.get(1), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); db.put(columnFamilyHandles.get(1), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); db.put(columnFamilyHandles.get(1), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); db.flush(flushOptions, columnFamilyHandles); try (final RocksIterator iterator = db.newIterator(columnFamilyHandles.get(0), readOptions)) { iterator.seekToFirst(); while (iterator.isValid()) { iterator.key(); iterator.value(); iterator.next(); } } try (final RocksIterator iterator = db.newIterator(columnFamilyHandles.get(1), readOptions)) { iterator.seekToFirst(); while (iterator.isValid()) { iterator.key(); iterator.value(); iterator.next(); } } assertThat(cfNameCollectingTableFilter.cfNames.size()).isEqualTo(2); assertThat(cfNameCollectingTableFilter.cfNames.get(0)) .isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); assertThat(cfNameCollectingTableFilter.cfNames.get(1)) .isEqualTo("new_cf".getBytes(UTF_8)); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { columnFamilyHandle.close(); } } } } } private static class CfNameCollectionTableFilter extends AbstractTableFilter { private final List cfNames = new ArrayList<>(); @Override public boolean filter(final TableProperties tableProperties) { cfNames.add(tableProperties.getColumnFamilyName()); return true; } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TimedEnvTest.java000066400000000000000000000023451370372246700243150ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static java.nio.charset.StandardCharsets.UTF_8; public class TimedEnvTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void construct() throws RocksDBException { try (final Env env = new TimedEnv(Env.getDefault())) { // no-op } } @Test public void construct_integration() throws RocksDBException { try (final Env env = new TimedEnv(Env.getDefault()); final Options options = new Options() .setCreateIfMissing(true) .setEnv(env); ) { try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getPath())) { db.put("key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java000066400000000000000000000036611370372246700266530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import java.util.Random; import static org.assertj.core.api.Assertions.assertThat; public class TransactionDBOptionsTest { private static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void maxNumLocks() { try (final TransactionDBOptions opt = new TransactionDBOptions()) { final long longValue = rand.nextLong(); opt.setMaxNumLocks(longValue); assertThat(opt.getMaxNumLocks()).isEqualTo(longValue); } } @Test public void maxNumStripes() { try (final TransactionDBOptions opt = new TransactionDBOptions()) { final long longValue = rand.nextLong(); opt.setNumStripes(longValue); assertThat(opt.getNumStripes()).isEqualTo(longValue); } } @Test public void transactionLockTimeout() { try (final TransactionDBOptions opt = new TransactionDBOptions()) { final long longValue = rand.nextLong(); opt.setTransactionLockTimeout(longValue); assertThat(opt.getTransactionLockTimeout()).isEqualTo(longValue); } } @Test public void defaultLockTimeout() { try (final TransactionDBOptions opt = new TransactionDBOptions()) { final long longValue = rand.nextLong(); opt.setDefaultLockTimeout(longValue); assertThat(opt.getDefaultLockTimeout()).isEqualTo(longValue); } } @Test public void writePolicy() { try (final TransactionDBOptions opt = new TransactionDBOptions()) { final TxnDBWritePolicy writePolicy = TxnDBWritePolicy.WRITE_UNPREPARED; // non-default opt.setWritePolicy(writePolicy); assertThat(opt.getWritePolicy()).isEqualTo(writePolicy); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TransactionDBTest.java000066400000000000000000000157721370372246700253050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.*; import static org.assertj.core.api.Assertions.assertThat; import static java.nio.charset.StandardCharsets.UTF_8; public class TransactionDBTest { @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void open() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath())) { assertThat(tdb).isNotNull(); } } @Test public void open_columnFamilies() throws RocksDBException { try(final DBOptions dbOptions = new DBOptions().setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final ColumnFamilyOptions myCfOpts = new ColumnFamilyOptions()) { final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("myCf".getBytes(), myCfOpts)); final List columnFamilyHandles = new ArrayList<>(); try (final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(dbOptions, txnDbOptions, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles)) { try { assertThat(tdb).isNotNull(); } finally { for (final ColumnFamilyHandle handle : columnFamilyHandles) { handle.close(); } } } } } @Test public void beginTransaction() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions()) { try(final Transaction txn = tdb.beginTransaction(writeOptions)) { assertThat(txn).isNotNull(); } } } @Test public void beginTransaction_transactionOptions() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions(); final TransactionOptions txnOptions = new TransactionOptions()) { try(final Transaction txn = tdb.beginTransaction(writeOptions, txnOptions)) { assertThat(txn).isNotNull(); } } } @Test public void beginTransaction_withOld() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions()) { try(final Transaction txn = tdb.beginTransaction(writeOptions)) { final Transaction txnReused = tdb.beginTransaction(writeOptions, txn); assertThat(txnReused).isSameAs(txn); } } } @Test public void beginTransaction_withOld_transactionOptions() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions(); final TransactionOptions txnOptions = new TransactionOptions()) { try(final Transaction txn = tdb.beginTransaction(writeOptions)) { final Transaction txnReused = tdb.beginTransaction(writeOptions, txnOptions, txn); assertThat(txnReused).isSameAs(txn); } } } @Test public void lockStatusData() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath()); final WriteOptions writeOptions = new WriteOptions(); final ReadOptions readOptions = new ReadOptions()) { try (final Transaction txn = tdb.beginTransaction(writeOptions)) { final byte key[] = "key".getBytes(UTF_8); final byte value[] = "value".getBytes(UTF_8); txn.put(key, value); assertThat(txn.getForUpdate(readOptions, key, true)).isEqualTo(value); final Map lockStatus = tdb.getLockStatusData(); assertThat(lockStatus.size()).isEqualTo(1); final Set> entrySet = lockStatus.entrySet(); final Map.Entry entry = entrySet.iterator().next(); final long columnFamilyId = entry.getKey(); assertThat(columnFamilyId).isEqualTo(0); final TransactionDB.KeyLockInfo keyLockInfo = entry.getValue(); assertThat(keyLockInfo.getKey()).isEqualTo(new String(key, UTF_8)); assertThat(keyLockInfo.getTransactionIDs().length).isEqualTo(1); assertThat(keyLockInfo.getTransactionIDs()[0]).isEqualTo(txn.getId()); assertThat(keyLockInfo.isExclusive()).isTrue(); } } } @Test public void deadlockInfoBuffer() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath())) { // TODO(AR) can we cause a deadlock so that we can test the output here? assertThat(tdb.getDeadlockInfoBuffer()).isEmpty(); } } @Test public void setDeadlockInfoBufferSize() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final TransactionDB tdb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath())) { tdb.setDeadlockInfoBufferSize(123); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java000066400000000000000000000112551370372246700272430ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.assertj.core.api.Assertions.assertThat; public class TransactionLogIteratorTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void transactionLogIterator() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); final TransactionLogIterator transactionLogIterator = db.getUpdatesSince(0)) { //no-op } } @Test public void getBatch() throws RocksDBException { final int numberOfPuts = 5; try (final Options options = new Options() .setCreateIfMissing(true) .setWalTtlSeconds(1000) .setWalSizeLimitMB(10); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { for (int i = 0; i < numberOfPuts; i++) { db.put(String.valueOf(i).getBytes(), String.valueOf(i).getBytes()); } db.flush(new FlushOptions().setWaitForFlush(true)); // the latest sequence number is 5 because 5 puts // were written beforehand assertThat(db.getLatestSequenceNumber()). isEqualTo(numberOfPuts); // insert 5 writes into a cf try (final ColumnFamilyHandle cfHandle = db.createColumnFamily( new ColumnFamilyDescriptor("new_cf".getBytes()))) { for (int i = 0; i < numberOfPuts; i++) { db.put(cfHandle, String.valueOf(i).getBytes(), String.valueOf(i).getBytes()); } // the latest sequence number is 10 because // (5 + 5) puts were written beforehand assertThat(db.getLatestSequenceNumber()). isEqualTo(numberOfPuts + numberOfPuts); // Get updates since the beginning try (final TransactionLogIterator transactionLogIterator = db.getUpdatesSince(0)) { assertThat(transactionLogIterator.isValid()).isTrue(); transactionLogIterator.status(); // The first sequence number is 1 final TransactionLogIterator.BatchResult batchResult = transactionLogIterator.getBatch(); assertThat(batchResult.sequenceNumber()).isEqualTo(1); } } } } @Test public void transactionLogIteratorStallAtLastRecord() throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setWalTtlSeconds(1000) .setWalSizeLimitMB(10); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key1".getBytes(), "value1".getBytes()); // Get updates since the beginning try (final TransactionLogIterator transactionLogIterator = db.getUpdatesSince(0)) { transactionLogIterator.status(); assertThat(transactionLogIterator.isValid()).isTrue(); transactionLogIterator.next(); assertThat(transactionLogIterator.isValid()).isFalse(); transactionLogIterator.status(); db.put("key2".getBytes(), "value2".getBytes()); transactionLogIterator.next(); transactionLogIterator.status(); assertThat(transactionLogIterator.isValid()).isTrue(); } } } @Test public void transactionLogIteratorCheckAfterRestart() throws RocksDBException { final int numberOfKeys = 2; try (final Options options = new Options() .setCreateIfMissing(true) .setWalTtlSeconds(1000) .setWalSizeLimitMB(10)) { try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put("key1".getBytes(), "value1".getBytes()); db.put("key2".getBytes(), "value2".getBytes()); db.flush(new FlushOptions().setWaitForFlush(true)); } // reopen try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { assertThat(db.getLatestSequenceNumber()).isEqualTo(numberOfKeys); try (final TransactionLogIterator transactionLogIterator = db.getUpdatesSince(0)) { for (int i = 0; i < numberOfKeys; i++) { transactionLogIterator.status(); assertThat(transactionLogIterator.isValid()).isTrue(); transactionLogIterator.next(); } } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TransactionOptionsTest.java000066400000000000000000000041541370372246700264430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import java.util.Random; import static org.assertj.core.api.Assertions.assertThat; public class TransactionOptionsTest { private static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void snapshot() { try (final TransactionOptions opt = new TransactionOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setSetSnapshot(boolValue); assertThat(opt.isSetSnapshot()).isEqualTo(boolValue); } } @Test public void deadlockDetect() { try (final TransactionOptions opt = new TransactionOptions()) { final boolean boolValue = rand.nextBoolean(); opt.setDeadlockDetect(boolValue); assertThat(opt.isDeadlockDetect()).isEqualTo(boolValue); } } @Test public void lockTimeout() { try (final TransactionOptions opt = new TransactionOptions()) { final long longValue = rand.nextLong(); opt.setLockTimeout(longValue); assertThat(opt.getLockTimeout()).isEqualTo(longValue); } } @Test public void expiration() { try (final TransactionOptions opt = new TransactionOptions()) { final long longValue = rand.nextLong(); opt.setExpiration(longValue); assertThat(opt.getExpiration()).isEqualTo(longValue); } } @Test public void deadlockDetectDepth() { try (final TransactionOptions opt = new TransactionOptions()) { final long longValue = rand.nextLong(); opt.setDeadlockDetectDepth(longValue); assertThat(opt.getDeadlockDetectDepth()).isEqualTo(longValue); } } @Test public void maxWriteBatchSize() { try (final TransactionOptions opt = new TransactionOptions()) { final long longValue = rand.nextLong(); opt.setMaxWriteBatchSize(longValue); assertThat(opt.getMaxWriteBatchSize()).isEqualTo(longValue); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TransactionTest.java000066400000000000000000000247541370372246700250770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; public class TransactionTest extends AbstractTransactionTest { @Test public void getForUpdate_cf_conflict() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte v12[] = "value12".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(testCf, k1, v1); assertThat(txn.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.getForUpdate(readOptions, testCf, k1, true)).isEqualTo(v1); // NOTE: txn2 updates k1, during txn3 try { txn2.put(testCf, k1, v12); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void getForUpdate_conflict() throws RocksDBException { final byte k1[] = "key1".getBytes(UTF_8); final byte v1[] = "value1".getBytes(UTF_8); final byte v12[] = "value12".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(k1, v1); assertThat(txn.getForUpdate(readOptions, k1, true)).isEqualTo(v1); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.getForUpdate(readOptions, k1, true)).isEqualTo(v1); // NOTE: txn2 updates k1, during txn3 try { txn2.put(k1, v12); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void multiGetForUpdate_cf_conflict() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; final byte[] otherValue = "otherValue".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); final List cfList = Arrays.asList(testCf, testCf); try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(testCf, keys[0], values[0]); txn.put(testCf, keys[1], values[1]); assertThat(txn.multiGet(readOptions, cfList, keys)).isEqualTo(values); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.multiGetForUpdate(readOptions, cfList, keys)) .isEqualTo(values); // NOTE: txn2 updates k1, during txn3 try { txn2.put(testCf, keys[0], otherValue); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void multiGetForUpdate_conflict() throws RocksDBException { final byte keys[][] = new byte[][] { "key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; final byte values[][] = new byte[][] { "value1".getBytes(UTF_8), "value2".getBytes(UTF_8)}; final byte[] otherValue = "otherValue".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final ReadOptions readOptions = new ReadOptions()) { try(final Transaction txn = dbContainer.beginTransaction()) { txn.put(keys[0], values[0]); txn.put(keys[1], values[1]); assertThat(txn.multiGet(readOptions, keys)).isEqualTo(values); txn.commit(); } try(final Transaction txn2 = dbContainer.beginTransaction()) { try(final Transaction txn3 = dbContainer.beginTransaction()) { assertThat(txn3.multiGetForUpdate(readOptions, keys)) .isEqualTo(values); // NOTE: txn2 updates k1, during txn3 try { txn2.put(keys[0], otherValue); // should cause an exception! } catch(final RocksDBException e) { assertThat(e.getStatus().getCode()).isSameAs(Status.Code.TimedOut); return; } } } fail("Expected an exception for put after getForUpdate from conflicting" + "transactions"); } } @Test public void name() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getName()).isEmpty(); final String name = "my-transaction-" + rand.nextLong(); txn.setName(name); assertThat(txn.getName()).isEqualTo(name); } } @Test public void ID() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getID()).isGreaterThan(0); } } @Test public void deadlockDetect() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.isDeadlockDetect()).isFalse(); } } @Test public void waitingTxns() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getWaitingTxns().getTransactionIds().length).isEqualTo(0); } } @Test public void state() throws RocksDBException { try(final DBContainer dbContainer = startDb()) { try(final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getState()) .isSameAs(Transaction.TransactionState.STARTED); txn.commit(); assertThat(txn.getState()) .isSameAs(Transaction.TransactionState.COMMITTED); } try(final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getState()) .isSameAs(Transaction.TransactionState.STARTED); txn.rollback(); assertThat(txn.getState()) .isSameAs(Transaction.TransactionState.STARTED); } } } @Test public void Id() throws RocksDBException { try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { assertThat(txn.getId()).isNotNull(); } } @Override public TransactionDBContainer startDb() throws RocksDBException { final DBOptions options = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); final List columnFamilyDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, columnFamilyOptions)); final List columnFamilyHandles = new ArrayList<>(); final TransactionDB txnDb; try { txnDb = TransactionDB.open(options, txnDbOptions, dbFolder.getRoot().getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles); } catch(final RocksDBException e) { columnFamilyOptions.close(); txnDbOptions.close(); options.close(); throw e; } final WriteOptions writeOptions = new WriteOptions(); final TransactionOptions txnOptions = new TransactionOptions(); return new TransactionDBContainer(txnOptions, writeOptions, columnFamilyHandles, txnDb, txnDbOptions, columnFamilyOptions, options); } private static class TransactionDBContainer extends DBContainer { private final TransactionOptions txnOptions; private final TransactionDB txnDb; private final TransactionDBOptions txnDbOptions; public TransactionDBContainer( final TransactionOptions txnOptions, final WriteOptions writeOptions, final List columnFamilyHandles, final TransactionDB txnDb, final TransactionDBOptions txnDbOptions, final ColumnFamilyOptions columnFamilyOptions, final DBOptions options) { super(writeOptions, columnFamilyHandles, columnFamilyOptions, options); this.txnOptions = txnOptions; this.txnDb = txnDb; this.txnDbOptions = txnDbOptions; } @Override public Transaction beginTransaction() { return txnDb.beginTransaction(writeOptions, txnOptions); } @Override public Transaction beginTransaction(final WriteOptions writeOptions) { return txnDb.beginTransaction(writeOptions, txnOptions); } @Override public void close() { txnOptions.close(); writeOptions.close(); for(final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandles) { columnFamilyHandle.close(); } txnDb.close(); txnDbOptions.close(); options.close(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/TtlDBTest.java000066400000000000000000000102241370372246700235460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; import static org.assertj.core.api.Assertions.assertThat; public class TtlDBTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void ttlDBOpen() throws RocksDBException, InterruptedException { try (final Options options = new Options().setCreateIfMissing(true).setMaxCompactionBytes(0); final TtlDB ttlDB = TtlDB.open(options, dbFolder.getRoot().getAbsolutePath())) { ttlDB.put("key".getBytes(), "value".getBytes()); assertThat(ttlDB.get("key".getBytes())). isEqualTo("value".getBytes()); assertThat(ttlDB.get("key".getBytes())).isNotNull(); } } @Test public void ttlDBOpenWithTtl() throws RocksDBException, InterruptedException { try (final Options options = new Options().setCreateIfMissing(true).setMaxCompactionBytes(0); final TtlDB ttlDB = TtlDB.open(options, dbFolder.getRoot().getAbsolutePath(), 1, false);) { ttlDB.put("key".getBytes(), "value".getBytes()); assertThat(ttlDB.get("key".getBytes())). isEqualTo("value".getBytes()); TimeUnit.SECONDS.sleep(2); ttlDB.compactRange(); assertThat(ttlDB.get("key".getBytes())).isNull(); } } @Test public void ttlDbOpenWithColumnFamilies() throws RocksDBException, InterruptedException { final List cfNames = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes()) ); final List ttlValues = Arrays.asList(0, 1); final List columnFamilyHandleList = new ArrayList<>(); try (final DBOptions dbOptions = new DBOptions() .setCreateMissingColumnFamilies(true) .setCreateIfMissing(true); final TtlDB ttlDB = TtlDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList, ttlValues, false)) { try { ttlDB.put("key".getBytes(), "value".getBytes()); assertThat(ttlDB.get("key".getBytes())). isEqualTo("value".getBytes()); ttlDB.put(columnFamilyHandleList.get(1), "key".getBytes(), "value".getBytes()); assertThat(ttlDB.get(columnFamilyHandleList.get(1), "key".getBytes())).isEqualTo("value".getBytes()); TimeUnit.SECONDS.sleep(2); ttlDB.compactRange(); ttlDB.compactRange(columnFamilyHandleList.get(1)); assertThat(ttlDB.get("key".getBytes())).isNotNull(); assertThat(ttlDB.get(columnFamilyHandleList.get(1), "key".getBytes())).isNull(); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void createTtlColumnFamily() throws RocksDBException, InterruptedException { try (final Options options = new Options().setCreateIfMissing(true); final TtlDB ttlDB = TtlDB.open(options, dbFolder.getRoot().getAbsolutePath()); final ColumnFamilyHandle columnFamilyHandle = ttlDB.createColumnFamilyWithTtl( new ColumnFamilyDescriptor("new_cf".getBytes()), 1)) { ttlDB.put(columnFamilyHandle, "key".getBytes(), "value".getBytes()); assertThat(ttlDB.get(columnFamilyHandle, "key".getBytes())). isEqualTo("value".getBytes()); TimeUnit.SECONDS.sleep(2); ttlDB.compactRange(columnFamilyHandle); assertThat(ttlDB.get(columnFamilyHandle, "key".getBytes())).isNull(); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/Types.java000066400000000000000000000020061370372246700230400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; /** * Simple type conversion methods * for use in tests */ public class Types { /** * Convert first 4 bytes of a byte array to an int * * @param data The byte array * * @return An integer */ public static int byteToInt(final byte data[]) { return (data[0] & 0xff) | ((data[1] & 0xff) << 8) | ((data[2] & 0xff) << 16) | ((data[3] & 0xff) << 24); } /** * Convert an int to 4 bytes * * @param v The int * * @return A byte array containing 4 bytes */ public static byte[] intToByte(final int v) { return new byte[] { (byte)((v >>> 0) & 0xff), (byte)((v >>> 8) & 0xff), (byte)((v >>> 16) & 0xff), (byte)((v >>> 24) & 0xff) }; } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java000066400000000000000000000012221370372246700255420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class WALRecoveryModeTest { @Test public void getWALRecoveryMode() { for (final WALRecoveryMode walRecoveryMode : WALRecoveryMode.values()) { assertThat(WALRecoveryMode.getWALRecoveryMode(walRecoveryMode.getValue())) .isEqualTo(walRecoveryMode); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/WalFilterTest.java000066400000000000000000000123531370372246700244730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; import static org.rocksdb.util.ByteUtil.bytes; import static org.rocksdb.util.TestUtil.*; public class WalFilterTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void walFilter() throws RocksDBException { // Create 3 batches with two keys each final byte[][][] batchKeys = { new byte[][] { bytes("key1"), bytes("key2") }, new byte[][] { bytes("key3"), bytes("key4") }, new byte[][] { bytes("key5"), bytes("key6") } }; final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor(bytes("pikachu")) ); final List cfHandles = new ArrayList<>(); // Test with all WAL processing options for (final WalProcessingOption option : WalProcessingOption.values()) { try (final Options options = optionsForLogIterTest(); final DBOptions dbOptions = new DBOptions(options) .setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, cfHandles)) { try (final WriteOptions writeOptions = new WriteOptions()) { // Write given keys in given batches for (int i = 0; i < batchKeys.length; i++) { final WriteBatch batch = new WriteBatch(); for (int j = 0; j < batchKeys[i].length; j++) { batch.put(cfHandles.get(0), batchKeys[i][j], dummyString(1024)); } db.write(writeOptions, batch); } } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); } } // Create a test filter that would apply wal_processing_option at the first // record final int applyOptionForRecordIndex = 1; try (final TestableWalFilter walFilter = new TestableWalFilter(option, applyOptionForRecordIndex)) { try (final Options options = optionsForLogIterTest(); final DBOptions dbOptions = new DBOptions(options) .setWalFilter(walFilter)) { try (final RocksDB db = RocksDB.open(dbOptions, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, cfHandles)) { try { assertThat(walFilter.logNumbers).isNotEmpty(); assertThat(walFilter.logFileNames).isNotEmpty(); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); } } catch (final RocksDBException e) { if (option != WalProcessingOption.CORRUPTED_RECORD) { // exception is expected when CORRUPTED_RECORD! throw e; } } } } } } private static class TestableWalFilter extends AbstractWalFilter { private final WalProcessingOption walProcessingOption; private final int applyOptionForRecordIndex; Map cfLognumber; Map cfNameId; final List logNumbers = new ArrayList<>(); final List logFileNames = new ArrayList<>(); private int currentRecordIndex = 0; public TestableWalFilter(final WalProcessingOption walProcessingOption, final int applyOptionForRecordIndex) { super(); this.walProcessingOption = walProcessingOption; this.applyOptionForRecordIndex = applyOptionForRecordIndex; } @Override public void columnFamilyLogNumberMap(final Map cfLognumber, final Map cfNameId) { this.cfLognumber = cfLognumber; this.cfNameId = cfNameId; } @Override public LogRecordFoundResult logRecordFound( final long logNumber, final String logFileName, final WriteBatch batch, final WriteBatch newBatch) { logNumbers.add(logNumber); logFileNames.add(logFileName); final WalProcessingOption optionToReturn; if (currentRecordIndex == applyOptionForRecordIndex) { optionToReturn = walProcessingOption; } else { optionToReturn = WalProcessingOption.CONTINUE_PROCESSING; } currentRecordIndex++; return new LogRecordFoundResult(optionToReturn, false); } @Override public String name() { return "testable-wal-filter"; } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java000066400000000000000000000045321370372246700261340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import java.util.Arrays; import java.util.List; import org.junit.ClassRule; import org.junit.Test; import org.rocksdb.util.CapturingWriteBatchHandler; import org.rocksdb.util.CapturingWriteBatchHandler.Event; import static org.assertj.core.api.Assertions.assertThat; import static org.rocksdb.util.CapturingWriteBatchHandler.Action.*; public class WriteBatchHandlerTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Test public void writeBatchHandler() throws RocksDBException { // setup test data final List testEvents = Arrays.asList( new Event(DELETE, "k0".getBytes(), null), new Event(PUT, "k1".getBytes(), "v1".getBytes()), new Event(PUT, "k2".getBytes(), "v2".getBytes()), new Event(PUT, "k3".getBytes(), "v3".getBytes()), new Event(LOG, null, "log1".getBytes()), new Event(MERGE, "k2".getBytes(), "v22".getBytes()), new Event(DELETE, "k3".getBytes(), null) ); // load test data to the write batch try (final WriteBatch batch = new WriteBatch()) { for (final Event testEvent : testEvents) { switch (testEvent.action) { case PUT: batch.put(testEvent.key, testEvent.value); break; case MERGE: batch.merge(testEvent.key, testEvent.value); break; case DELETE: batch.delete(testEvent.key); break; case LOG: batch.putLogData(testEvent.value); break; } } // attempt to read test data back from the WriteBatch by iterating // with a handler try (final CapturingWriteBatchHandler handler = new CapturingWriteBatchHandler()) { batch.iterate(handler); // compare the results to the test data final List actualEvents = handler.getEvents(); assertThat(testEvents.size()).isSameAs(actualEvents.size()); assertThat(testEvents).isEqualTo(actualEvents); } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/WriteBatchTest.java000066400000000000000000000424311370372246700246360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. package org.rocksdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; import static org.rocksdb.util.CapturingWriteBatchHandler.Action.DELETE; import static org.rocksdb.util.CapturingWriteBatchHandler.Action.DELETE_RANGE; import static org.rocksdb.util.CapturingWriteBatchHandler.Action.LOG; import static org.rocksdb.util.CapturingWriteBatchHandler.Action.MERGE; import static org.rocksdb.util.CapturingWriteBatchHandler.Action.PUT; import static org.rocksdb.util.CapturingWriteBatchHandler.Action.SINGLE_DELETE; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.rocksdb.util.CapturingWriteBatchHandler; import org.rocksdb.util.CapturingWriteBatchHandler.Event; import org.rocksdb.util.WriteBatchGetter; /** * This class mimics the db/write_batch_test.cc * in the c++ rocksdb library. */ public class WriteBatchTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void emptyWriteBatch() { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.count()).isEqualTo(0); } } @Test public void multipleBatchOperations() throws RocksDBException { final byte[] foo = "foo".getBytes(UTF_8); final byte[] bar = "bar".getBytes(UTF_8); final byte[] box = "box".getBytes(UTF_8); final byte[] baz = "baz".getBytes(UTF_8); final byte[] boo = "boo".getBytes(UTF_8); final byte[] hoo = "hoo".getBytes(UTF_8); final byte[] hello = "hello".getBytes(UTF_8); try (final WriteBatch batch = new WriteBatch()) { batch.put(foo, bar); batch.delete(box); batch.put(baz, boo); batch.merge(baz, hoo); batch.singleDelete(foo); batch.deleteRange(baz, foo); batch.putLogData(hello); try(final CapturingWriteBatchHandler handler = new CapturingWriteBatchHandler()) { batch.iterate(handler); assertThat(handler.getEvents().size()).isEqualTo(7); assertThat(handler.getEvents().get(0)).isEqualTo(new Event(PUT, foo, bar)); assertThat(handler.getEvents().get(1)).isEqualTo(new Event(DELETE, box, null)); assertThat(handler.getEvents().get(2)).isEqualTo(new Event(PUT, baz, boo)); assertThat(handler.getEvents().get(3)).isEqualTo(new Event(MERGE, baz, hoo)); assertThat(handler.getEvents().get(4)).isEqualTo(new Event(SINGLE_DELETE, foo, null)); assertThat(handler.getEvents().get(5)).isEqualTo(new Event(DELETE_RANGE, baz, foo)); assertThat(handler.getEvents().get(6)).isEqualTo(new Event(LOG, null, hello)); } } } @Test public void multipleBatchOperationsDirect() throws UnsupportedEncodingException, RocksDBException { try (WriteBatch batch = new WriteBatch()) { ByteBuffer key = ByteBuffer.allocateDirect(16); ByteBuffer value = ByteBuffer.allocateDirect(16); key.put("foo".getBytes("US-ASCII")).flip(); value.put("bar".getBytes("US-ASCII")).flip(); batch.put(key, value); assertThat(key.position()).isEqualTo(3); assertThat(key.limit()).isEqualTo(3); assertThat(value.position()).isEqualTo(3); assertThat(value.limit()).isEqualTo(3); key.clear(); key.put("box".getBytes("US-ASCII")).flip(); batch.remove(key); assertThat(key.position()).isEqualTo(3); assertThat(key.limit()).isEqualTo(3); batch.put("baz".getBytes("US-ASCII"), "boo".getBytes("US-ASCII")); WriteBatchTestInternalHelper.setSequence(batch, 100); assertThat(WriteBatchTestInternalHelper.sequence(batch)).isNotNull().isEqualTo(100); assertThat(batch.count()).isEqualTo(3); assertThat(new String(getContents(batch), "US-ASCII")) .isEqualTo("Put(baz, boo)@102" + "Delete(box)@101" + "Put(foo, bar)@100"); } } @Test public void testAppendOperation() throws RocksDBException { try (final WriteBatch b1 = new WriteBatch(); final WriteBatch b2 = new WriteBatch()) { WriteBatchTestInternalHelper.setSequence(b1, 200); WriteBatchTestInternalHelper.setSequence(b2, 300); WriteBatchTestInternalHelper.append(b1, b2); assertThat(getContents(b1).length).isEqualTo(0); assertThat(b1.count()).isEqualTo(0); b2.put("a".getBytes(UTF_8), "va".getBytes(UTF_8)); WriteBatchTestInternalHelper.append(b1, b2); assertThat("Put(a, va)@200".equals(new String(getContents(b1), UTF_8))); assertThat(b1.count()).isEqualTo(1); b2.clear(); b2.put("b".getBytes(UTF_8), "vb".getBytes(UTF_8)); WriteBatchTestInternalHelper.append(b1, b2); assertThat(("Put(a, va)@200" + "Put(b, vb)@201") .equals(new String(getContents(b1), UTF_8))); assertThat(b1.count()).isEqualTo(2); b2.delete("foo".getBytes(UTF_8)); WriteBatchTestInternalHelper.append(b1, b2); assertThat(("Put(a, va)@200" + "Put(b, vb)@202" + "Put(b, vb)@201" + "Delete(foo)@203") .equals(new String(getContents(b1), UTF_8))); assertThat(b1.count()).isEqualTo(4); } } @Test public void blobOperation() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); batch.put("k2".getBytes(UTF_8), "v2".getBytes(UTF_8)); batch.put("k3".getBytes(UTF_8), "v3".getBytes(UTF_8)); batch.putLogData("blob1".getBytes(UTF_8)); batch.delete("k2".getBytes(UTF_8)); batch.putLogData("blob2".getBytes(UTF_8)); batch.merge("foo".getBytes(UTF_8), "bar".getBytes(UTF_8)); assertThat(batch.count()).isEqualTo(5); assertThat(("Merge(foo, bar)@4" + "Put(k1, v1)@0" + "Delete(k2)@3" + "Put(k2, v2)@1" + "Put(k3, v3)@2") .equals(new String(getContents(batch), UTF_8))); } } @Test public void savePoints() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); batch.put("k2".getBytes(UTF_8), "v2".getBytes(UTF_8)); batch.put("k3".getBytes(UTF_8), "v3".getBytes(UTF_8)); assertThat(getFromWriteBatch(batch, "k1")).isEqualTo("v1"); assertThat(getFromWriteBatch(batch, "k2")).isEqualTo("v2"); assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3"); batch.setSavePoint(); batch.delete("k2".getBytes(UTF_8)); batch.put("k3".getBytes(UTF_8), "v3-2".getBytes(UTF_8)); assertThat(getFromWriteBatch(batch, "k2")).isNull(); assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3-2"); batch.setSavePoint(); batch.put("k3".getBytes(UTF_8), "v3-3".getBytes(UTF_8)); batch.put("k4".getBytes(UTF_8), "v4".getBytes(UTF_8)); assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3-3"); assertThat(getFromWriteBatch(batch, "k4")).isEqualTo("v4"); batch.rollbackToSavePoint(); assertThat(getFromWriteBatch(batch, "k2")).isNull(); assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3-2"); assertThat(getFromWriteBatch(batch, "k4")).isNull(); batch.rollbackToSavePoint(); assertThat(getFromWriteBatch(batch, "k1")).isEqualTo("v1"); assertThat(getFromWriteBatch(batch, "k2")).isEqualTo("v2"); assertThat(getFromWriteBatch(batch, "k3")).isEqualTo("v3"); assertThat(getFromWriteBatch(batch, "k4")).isNull(); } } @Test public void deleteRange() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final WriteBatch batch = new WriteBatch(); final WriteOptions wOpt = new WriteOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); db.put("key3".getBytes(), "abcdefg".getBytes()); db.put("key4".getBytes(), "xyz".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); batch.deleteRange("key2".getBytes(), "key4".getBytes()); db.write(wOpt, batch); assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); assertThat(db.get("key2".getBytes())).isNull(); assertThat(db.get("key3".getBytes())).isNull(); assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); } } @Test public void restorePoints() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.put("k1".getBytes(), "v1".getBytes()); batch.put("k2".getBytes(), "v2".getBytes()); batch.setSavePoint(); batch.put("k1".getBytes(), "123456789".getBytes()); batch.delete("k2".getBytes()); batch.rollbackToSavePoint(); try(final CapturingWriteBatchHandler handler = new CapturingWriteBatchHandler()) { batch.iterate(handler); assertThat(handler.getEvents().size()).isEqualTo(2); assertThat(handler.getEvents().get(0)).isEqualTo(new Event(PUT, "k1".getBytes(), "v1".getBytes())); assertThat(handler.getEvents().get(1)).isEqualTo(new Event(PUT, "k2".getBytes(), "v2".getBytes())); } } } @Test(expected = RocksDBException.class) public void restorePoints_withoutSavePoints() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.rollbackToSavePoint(); } } @Test(expected = RocksDBException.class) public void restorePoints_withoutSavePoints_nested() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.setSavePoint(); batch.rollbackToSavePoint(); // without previous corresponding setSavePoint batch.rollbackToSavePoint(); } } @Test public void popSavePoint() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.put("k1".getBytes(), "v1".getBytes()); batch.put("k2".getBytes(), "v2".getBytes()); batch.setSavePoint(); batch.put("k1".getBytes(), "123456789".getBytes()); batch.delete("k2".getBytes()); batch.setSavePoint(); batch.popSavePoint(); batch.rollbackToSavePoint(); try(final CapturingWriteBatchHandler handler = new CapturingWriteBatchHandler()) { batch.iterate(handler); assertThat(handler.getEvents().size()).isEqualTo(2); assertThat(handler.getEvents().get(0)).isEqualTo(new Event(PUT, "k1".getBytes(), "v1".getBytes())); assertThat(handler.getEvents().get(1)).isEqualTo(new Event(PUT, "k2".getBytes(), "v2".getBytes())); } } } @Test(expected = RocksDBException.class) public void popSavePoint_withoutSavePoints() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.popSavePoint(); } } @Test(expected = RocksDBException.class) public void popSavePoint_withoutSavePoints_nested() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.setSavePoint(); batch.popSavePoint(); // without previous corresponding setSavePoint batch.popSavePoint(); } } @Test public void maxBytes() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.setMaxBytes(19); batch.put("k1".getBytes(), "v1".getBytes()); } } @Test(expected = RocksDBException.class) public void maxBytes_over() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.setMaxBytes(1); batch.put("k1".getBytes(), "v1".getBytes()); } } @Test public void data() throws RocksDBException { try (final WriteBatch batch1 = new WriteBatch()) { batch1.delete("k0".getBytes()); batch1.put("k1".getBytes(), "v1".getBytes()); batch1.put("k2".getBytes(), "v2".getBytes()); batch1.put("k3".getBytes(), "v3".getBytes()); batch1.putLogData("log1".getBytes()); batch1.merge("k2".getBytes(), "v22".getBytes()); batch1.delete("k3".getBytes()); final byte[] serialized = batch1.data(); try(final WriteBatch batch2 = new WriteBatch(serialized)) { assertThat(batch2.count()).isEqualTo(batch1.count()); try(final CapturingWriteBatchHandler handler1 = new CapturingWriteBatchHandler()) { batch1.iterate(handler1); try (final CapturingWriteBatchHandler handler2 = new CapturingWriteBatchHandler()) { batch2.iterate(handler2); assertThat(handler1.getEvents().equals(handler2.getEvents())).isTrue(); } } } } } @Test public void dataSize() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { batch.put("k1".getBytes(), "v1".getBytes()); assertThat(batch.getDataSize()).isEqualTo(19); } } @Test public void hasPut() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasPut()).isFalse(); batch.put("k1".getBytes(), "v1".getBytes()); assertThat(batch.hasPut()).isTrue(); } } @Test public void hasDelete() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasDelete()).isFalse(); batch.delete("k1".getBytes()); assertThat(batch.hasDelete()).isTrue(); } } @Test public void hasSingleDelete() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasSingleDelete()).isFalse(); batch.singleDelete("k1".getBytes()); assertThat(batch.hasSingleDelete()).isTrue(); } } @Test public void hasDeleteRange() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasDeleteRange()).isFalse(); batch.deleteRange("k1".getBytes(), "k2".getBytes()); assertThat(batch.hasDeleteRange()).isTrue(); } } @Test public void hasBeginPrepareRange() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasBeginPrepare()).isFalse(); } } @Test public void hasEndPrepareRange() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasEndPrepare()).isFalse(); } } @Test public void hasCommit() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasCommit()).isFalse(); } } @Test public void hasRollback() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.hasRollback()).isFalse(); } } @Test public void walTerminationPoint() throws RocksDBException { try (final WriteBatch batch = new WriteBatch()) { WriteBatch.SavePoint walTerminationPoint = batch.getWalTerminationPoint(); assertThat(walTerminationPoint.isCleared()).isTrue(); batch.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); batch.markWalTerminationPoint(); walTerminationPoint = batch.getWalTerminationPoint(); assertThat(walTerminationPoint.getSize()).isEqualTo(19); assertThat(walTerminationPoint.getCount()).isEqualTo(1); assertThat(walTerminationPoint.getContentFlags()).isEqualTo(2); } } @Test public void getWriteBatch() { try (final WriteBatch batch = new WriteBatch()) { assertThat(batch.getWriteBatch()).isEqualTo(batch); } } static byte[] getContents(final WriteBatch wb) { return getContents(wb.nativeHandle_); } static String getFromWriteBatch(final WriteBatch wb, final String key) throws RocksDBException { final WriteBatchGetter getter = new WriteBatchGetter(key.getBytes(UTF_8)); wb.iterate(getter); if(getter.getValue() != null) { return new String(getter.getValue(), UTF_8); } else { return null; } } private static native byte[] getContents(final long writeBatchHandle); } /** * Package-private class which provides java api to access * c++ WriteBatchInternal. */ class WriteBatchTestInternalHelper { static void setSequence(final WriteBatch wb, final long sn) { setSequence(wb.nativeHandle_, sn); } static long sequence(final WriteBatch wb) { return sequence(wb.nativeHandle_); } static void append(final WriteBatch wb1, final WriteBatch wb2) { append(wb1.nativeHandle_, wb2.nativeHandle_); } private static native void setSequence(final long writeBatchHandle, final long sn); private static native long sequence(final long writeBatchHandle); private static native void append(final long writeBatchHandle1, final long writeBatchHandle2); } rocksdb-6.11.4/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java000066400000000000000000000057511370372246700263030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import java.nio.ByteBuffer; import java.util.*; import java.util.concurrent.*; @RunWith(Parameterized.class) public class WriteBatchThreadedTest { @Parameters(name = "WriteBatchThreadedTest(threadCount={0})") public static Iterable data() { return Arrays.asList(new Integer[]{1, 10, 50, 100}); } @Parameter public int threadCount; @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); RocksDB db; @Before public void setUp() throws Exception { RocksDB.loadLibrary(); final Options options = new Options() .setCreateIfMissing(true) .setIncreaseParallelism(32); db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath()); assert (db != null); } @After public void tearDown() throws Exception { if (db != null) { db.close(); } } @Test public void threadedWrites() throws InterruptedException, ExecutionException { final List> callables = new ArrayList<>(); for (int i = 0; i < 100; i++) { final int offset = i * 100; callables.add(new Callable() { @Override public Void call() throws RocksDBException { try (final WriteBatch wb = new WriteBatch(); final WriteOptions w_opt = new WriteOptions()) { for (int i = offset; i < offset + 100; i++) { wb.put(ByteBuffer.allocate(4).putInt(i).array(), "parallel rocks test".getBytes()); } db.write(w_opt, wb); } return null; } }); } //submit the callables final ExecutorService executorService = Executors.newFixedThreadPool(threadCount); try { final ExecutorCompletionService completionService = new ExecutorCompletionService<>(executorService); final Set> futures = new HashSet<>(); for (final Callable callable : callables) { futures.add(completionService.submit(callable)); } while (futures.size() > 0) { final Future future = completionService.take(); futures.remove(future); try { future.get(); } catch (final ExecutionException e) { for (final Future f : futures) { f.cancel(true); } throw e; } } } finally { executorService.shutdown(); executorService.awaitTermination(10, TimeUnit.SECONDS); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java000066400000000000000000000553161370372246700264700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. package org.rocksdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; public class WriteBatchWithIndexTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void readYourOwnWrites() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { final byte[] k1 = "key1".getBytes(); final byte[] v1 = "value1".getBytes(); final byte[] k2 = "key2".getBytes(); final byte[] v2 = "value2".getBytes(); db.put(k1, v1); db.put(k2, v2); try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); final RocksIterator base = db.newIterator(); final RocksIterator it = wbwi.newIteratorWithBase(base)) { it.seek(k1); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k1); assertThat(it.value()).isEqualTo(v1); it.seek(k2); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k2); assertThat(it.value()).isEqualTo(v2); //put data to the write batch and make sure we can read it. final byte[] k3 = "key3".getBytes(); final byte[] v3 = "value3".getBytes(); wbwi.put(k3, v3); it.seek(k3); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k3); assertThat(it.value()).isEqualTo(v3); //update k2 in the write batch and check the value final byte[] v2Other = "otherValue2".getBytes(); wbwi.put(k2, v2Other); it.seek(k2); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k2); assertThat(it.value()).isEqualTo(v2Other); //delete k1 and make sure we can read back the write wbwi.delete(k1); it.seek(k1); assertThat(it.key()).isNotEqualTo(k1); //reinsert k1 and make sure we see the new value final byte[] v1Other = "otherValue1".getBytes(); wbwi.put(k1, v1Other); it.seek(k1); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k1); assertThat(it.value()).isEqualTo(v1Other); //single remove k3 and make sure we can read back the write wbwi.singleDelete(k3); it.seek(k3); assertThat(it.isValid()).isEqualTo(false); //reinsert k3 and make sure we see the new value final byte[] v3Other = "otherValue3".getBytes(); wbwi.put(k3, v3Other); it.seek(k3); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k3); assertThat(it.value()).isEqualTo(v3Other); } } } @Test public void readYourOwnWritesCf() throws RocksDBException { final List cfNames = Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes())); final List columnFamilyHandleList = new ArrayList<>(); // Test open database with column family names try (final DBOptions options = new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open( options, dbFolder.getRoot().getAbsolutePath(), cfNames, columnFamilyHandleList)) { final ColumnFamilyHandle newCf = columnFamilyHandleList.get(1); try { final byte[] k1 = "key1".getBytes(); final byte[] v1 = "value1".getBytes(); final byte[] k2 = "key2".getBytes(); final byte[] v2 = "value2".getBytes(); db.put(newCf, k1, v1); db.put(newCf, k2, v2); try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); final ReadOptions readOptions = new ReadOptions(); final RocksIterator base = db.newIterator(newCf, readOptions); final RocksIterator it = wbwi.newIteratorWithBase(newCf, base, readOptions)) { it.seek(k1); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k1); assertThat(it.value()).isEqualTo(v1); it.seek(k2); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k2); assertThat(it.value()).isEqualTo(v2); // put data to the write batch and make sure we can read it. final byte[] k3 = "key3".getBytes(); final byte[] v3 = "value3".getBytes(); wbwi.put(newCf, k3, v3); it.seek(k3); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k3); assertThat(it.value()).isEqualTo(v3); // update k2 in the write batch and check the value final byte[] v2Other = "otherValue2".getBytes(); wbwi.put(newCf, k2, v2Other); it.seek(k2); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k2); assertThat(it.value()).isEqualTo(v2Other); // delete k1 and make sure we can read back the write wbwi.delete(newCf, k1); it.seek(k1); assertThat(it.key()).isNotEqualTo(k1); // reinsert k1 and make sure we see the new value final byte[] v1Other = "otherValue1".getBytes(); wbwi.put(newCf, k1, v1Other); it.seek(k1); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k1); assertThat(it.value()).isEqualTo(v1Other); // single remove k3 and make sure we can read back the write wbwi.singleDelete(newCf, k3); it.seek(k3); assertThat(it.isValid()).isEqualTo(false); // reinsert k3 and make sure we see the new value final byte[] v3Other = "otherValue3".getBytes(); wbwi.put(newCf, k3, v3Other); it.seek(k3); assertThat(it.isValid()).isTrue(); assertThat(it.key()).isEqualTo(k3); assertThat(it.value()).isEqualTo(v3Other); } } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { columnFamilyHandle.close(); } } } } @Test public void writeBatchWithIndex() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { final byte[] k1 = "key1".getBytes(); final byte[] v1 = "value1".getBytes(); final byte[] k2 = "key2".getBytes(); final byte[] v2 = "value2".getBytes(); try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(); final WriteOptions wOpt = new WriteOptions()) { wbwi.put(k1, v1); wbwi.put(k2, v2); db.write(wOpt, wbwi); } assertThat(db.get(k1)).isEqualTo(v1); assertThat(db.get(k2)).isEqualTo(v2); } } @Test public void write_writeBatchWithIndexDirect() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { ByteBuffer k1 = ByteBuffer.allocateDirect(16); ByteBuffer v1 = ByteBuffer.allocateDirect(16); ByteBuffer k2 = ByteBuffer.allocateDirect(16); ByteBuffer v2 = ByteBuffer.allocateDirect(16); k1.put("key1".getBytes()).flip(); v1.put("value1".getBytes()).flip(); k2.put("key2".getBytes()).flip(); v2.put("value2".getBytes()).flip(); try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.put(k1, v1); assertThat(k1.position()).isEqualTo(4); assertThat(k1.limit()).isEqualTo(4); assertThat(v1.position()).isEqualTo(6); assertThat(v1.limit()).isEqualTo(6); wbwi.put(k2, v2); db.write(new WriteOptions(), wbwi); } assertThat(db.get("key1".getBytes())).isEqualTo("value1".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo("value2".getBytes()); } } @Test public void iterator() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true)) { final String k1 = "key1"; final String v1 = "value1"; final String k2 = "key2"; final String v2 = "value2"; final String k3 = "key3"; final String v3 = "value3"; final String k4 = "key4"; final String k5 = "key5"; final String k6 = "key6"; final String k7 = "key7"; final String v8 = "value8"; final byte[] k1b = k1.getBytes(UTF_8); final byte[] v1b = v1.getBytes(UTF_8); final byte[] k2b = k2.getBytes(UTF_8); final byte[] v2b = v2.getBytes(UTF_8); final byte[] k3b = k3.getBytes(UTF_8); final byte[] v3b = v3.getBytes(UTF_8); final byte[] k4b = k4.getBytes(UTF_8); final byte[] k5b = k5.getBytes(UTF_8); final byte[] k6b = k6.getBytes(UTF_8); final byte[] k7b = k7.getBytes(UTF_8); final byte[] v8b = v8.getBytes(UTF_8); // add put records wbwi.put(k1b, v1b); wbwi.put(k2b, v2b); wbwi.put(k3b, v3b); // add a deletion record wbwi.delete(k4b); // add a single deletion record wbwi.singleDelete(k5b); // add a log record wbwi.putLogData(v8b); final WBWIRocksIterator.WriteEntry[] expected = { new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, new DirectSlice(k1), new DirectSlice(v1)), new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, new DirectSlice(k2), new DirectSlice(v2)), new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, new DirectSlice(k3), new DirectSlice(v3)), new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.DELETE, new DirectSlice(k4), DirectSlice.NONE), new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.SINGLE_DELETE, new DirectSlice(k5), DirectSlice.NONE), }; try (final WBWIRocksIterator it = wbwi.newIterator()) { //direct access - seek to key offsets final int[] testOffsets = {2, 0, 3, 4, 1}; for (int i = 0; i < testOffsets.length; i++) { final int testOffset = testOffsets[i]; final byte[] key = toArray(expected[testOffset].getKey().data()); it.seek(key); assertThat(it.isValid()).isTrue(); final WBWIRocksIterator.WriteEntry entry = it.entry(); assertThat(entry).isEqualTo(expected[testOffset]); // Direct buffer seek expected[testOffset].getKey().data().mark(); ByteBuffer db = expected[testOffset].getKey().data(); it.seek(db); assertThat(db.position()).isEqualTo(key.length); assertThat(it.isValid()).isTrue(); } //forward iterative access int i = 0; for (it.seekToFirst(); it.isValid(); it.next()) { assertThat(it.entry()).isEqualTo(expected[i++]); } //reverse iterative access i = expected.length - 1; for (it.seekToLast(); it.isValid(); it.prev()) { assertThat(it.entry()).isEqualTo(expected[i--]); } } } } @Test public void zeroByteTests() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true)) { final byte[] zeroByteValue = new byte[]{0, 0}; //add zero byte value wbwi.put(zeroByteValue, zeroByteValue); final ByteBuffer buffer = ByteBuffer.allocateDirect(zeroByteValue.length); buffer.put(zeroByteValue); final WBWIRocksIterator.WriteEntry expected = new WBWIRocksIterator.WriteEntry(WBWIRocksIterator.WriteType.PUT, new DirectSlice(buffer, zeroByteValue.length), new DirectSlice(buffer, zeroByteValue.length)); try (final WBWIRocksIterator it = wbwi.newIterator()) { it.seekToFirst(); final WBWIRocksIterator.WriteEntry actual = it.entry(); assertThat(actual.equals(expected)).isTrue(); assertThat(it.entry().hashCode() == expected.hashCode()).isTrue(); } } } @Test public void savePoints() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); final ReadOptions readOptions = new ReadOptions()) { wbwi.put("k1".getBytes(), "v1".getBytes()); wbwi.put("k2".getBytes(), "v2".getBytes()); wbwi.put("k3".getBytes(), "v3".getBytes()); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k1")) .isEqualTo("v1"); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) .isEqualTo("v2"); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) .isEqualTo("v3"); wbwi.setSavePoint(); wbwi.delete("k2".getBytes()); wbwi.put("k3".getBytes(), "v3-2".getBytes()); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) .isNull(); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) .isEqualTo("v3-2"); wbwi.setSavePoint(); wbwi.put("k3".getBytes(), "v3-3".getBytes()); wbwi.put("k4".getBytes(), "v4".getBytes()); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) .isEqualTo("v3-3"); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k4")) .isEqualTo("v4"); wbwi.rollbackToSavePoint(); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) .isNull(); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) .isEqualTo("v3-2"); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k4")) .isNull(); wbwi.rollbackToSavePoint(); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k1")) .isEqualTo("v1"); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k2")) .isEqualTo("v2"); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k3")) .isEqualTo("v3"); assertThat(getFromWriteBatchWithIndex(db, readOptions, wbwi, "k4")) .isNull(); } } } @Test public void restorePoints() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.put("k1".getBytes(UTF_8), "v1".getBytes(UTF_8)); wbwi.put("k2".getBytes(UTF_8), "v2".getBytes(UTF_8)); wbwi.setSavePoint(); wbwi.put("k1".getBytes(UTF_8), "123456789".getBytes(UTF_8)); wbwi.delete("k2".getBytes(UTF_8)); wbwi.rollbackToSavePoint(); try(final DBOptions options = new DBOptions()) { assertThat(wbwi.getFromBatch(options,"k1".getBytes(UTF_8))).isEqualTo("v1".getBytes()); assertThat(wbwi.getFromBatch(options,"k2".getBytes(UTF_8))).isEqualTo("v2".getBytes()); } } } @Test(expected = RocksDBException.class) public void restorePoints_withoutSavePoints() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.rollbackToSavePoint(); } } @Test(expected = RocksDBException.class) public void restorePoints_withoutSavePoints_nested() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.setSavePoint(); wbwi.rollbackToSavePoint(); // without previous corresponding setSavePoint wbwi.rollbackToSavePoint(); } } @Test public void popSavePoint() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.put("k1".getBytes(), "v1".getBytes()); wbwi.put("k2".getBytes(), "v2".getBytes()); wbwi.setSavePoint(); wbwi.put("k1".getBytes(), "123456789".getBytes()); wbwi.delete("k2".getBytes()); wbwi.setSavePoint(); wbwi.popSavePoint(); wbwi.rollbackToSavePoint(); try(final DBOptions options = new DBOptions()) { assertThat(wbwi.getFromBatch(options,"k1".getBytes(UTF_8))).isEqualTo("v1".getBytes()); assertThat(wbwi.getFromBatch(options,"k2".getBytes(UTF_8))).isEqualTo("v2".getBytes()); } } } @Test(expected = RocksDBException.class) public void popSavePoint_withoutSavePoints() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.popSavePoint(); } } @Test(expected = RocksDBException.class) public void popSavePoint_withoutSavePoints_nested() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.setSavePoint(); wbwi.popSavePoint(); // without previous corresponding setSavePoint wbwi.popSavePoint(); } } @Test public void maxBytes() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.setMaxBytes(19); wbwi.put("k1".getBytes(), "v1".getBytes()); } } @Test(expected = RocksDBException.class) public void maxBytes_over() throws RocksDBException { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { wbwi.setMaxBytes(1); wbwi.put("k1".getBytes(), "v1".getBytes()); } } @Test public void getWriteBatch() { try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex()) { final WriteBatch wb = wbwi.getWriteBatch(); assertThat(wb).isNotNull(); assertThat(wb.isOwningHandle()).isFalse(); } } private static String getFromWriteBatchWithIndex(final RocksDB db, final ReadOptions readOptions, final WriteBatchWithIndex wbwi, final String skey) { final byte[] key = skey.getBytes(); try (final RocksIterator baseIterator = db.newIterator(readOptions); final RocksIterator iterator = wbwi.newIteratorWithBase(baseIterator)) { iterator.seek(key); // Arrays.equals(key, iterator.key()) ensures an exact match in Rocks, // instead of a nearest match return iterator.isValid() && Arrays.equals(key, iterator.key()) ? new String(iterator.value()) : null; } } @Test public void getFromBatch() throws RocksDBException { final byte[] k1 = "k1".getBytes(); final byte[] k2 = "k2".getBytes(); final byte[] k3 = "k3".getBytes(); final byte[] k4 = "k4".getBytes(); final byte[] v1 = "v1".getBytes(); final byte[] v2 = "v2".getBytes(); final byte[] v3 = "v3".getBytes(); try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); final DBOptions dbOptions = new DBOptions()) { wbwi.put(k1, v1); wbwi.put(k2, v2); wbwi.put(k3, v3); assertThat(wbwi.getFromBatch(dbOptions, k1)).isEqualTo(v1); assertThat(wbwi.getFromBatch(dbOptions, k2)).isEqualTo(v2); assertThat(wbwi.getFromBatch(dbOptions, k3)).isEqualTo(v3); assertThat(wbwi.getFromBatch(dbOptions, k4)).isNull(); wbwi.delete(k2); assertThat(wbwi.getFromBatch(dbOptions, k2)).isNull(); } } @Test public void getFromBatchAndDB() throws RocksDBException { final byte[] k1 = "k1".getBytes(); final byte[] k2 = "k2".getBytes(); final byte[] k3 = "k3".getBytes(); final byte[] k4 = "k4".getBytes(); final byte[] v1 = "v1".getBytes(); final byte[] v2 = "v2".getBytes(); final byte[] v3 = "v3".getBytes(); final byte[] v4 = "v4".getBytes(); try (final Options options = new Options().setCreateIfMissing(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { db.put(k1, v1); db.put(k2, v2); db.put(k4, v4); try (final WriteBatchWithIndex wbwi = new WriteBatchWithIndex(true); final DBOptions dbOptions = new DBOptions(); final ReadOptions readOptions = new ReadOptions()) { assertThat(wbwi.getFromBatch(dbOptions, k1)).isNull(); assertThat(wbwi.getFromBatch(dbOptions, k2)).isNull(); assertThat(wbwi.getFromBatch(dbOptions, k4)).isNull(); wbwi.put(k3, v3); assertThat(wbwi.getFromBatch(dbOptions, k3)).isEqualTo(v3); assertThat(wbwi.getFromBatchAndDB(db, readOptions, k1)).isEqualTo(v1); assertThat(wbwi.getFromBatchAndDB(db, readOptions, k2)).isEqualTo(v2); assertThat(wbwi.getFromBatchAndDB(db, readOptions, k3)).isEqualTo(v3); assertThat(wbwi.getFromBatchAndDB(db, readOptions, k4)).isEqualTo(v4); wbwi.delete(k4); assertThat(wbwi.getFromBatchAndDB(db, readOptions, k4)).isNull(); } } } private byte[] toArray(final ByteBuffer buf) { final byte[] ary = new byte[buf.remaining()]; buf.get(ary); return ary; } @Test public void deleteRange() throws RocksDBException { try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); final WriteBatch batch = new WriteBatch(); final WriteOptions wOpt = new WriteOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put("key2".getBytes(), "12345678".getBytes()); db.put("key3".getBytes(), "abcdefg".getBytes()); db.put("key4".getBytes(), "xyz".getBytes()); assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); batch.deleteRange("key2".getBytes(), "key4".getBytes()); db.write(wOpt, batch); assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); assertThat(db.get("key2".getBytes())).isNull(); assertThat(db.get("key3".getBytes())).isNull(); assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/WriteOptionsTest.java000066400000000000000000000044601370372246700252500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import org.junit.ClassRule; import org.junit.Test; import java.util.Random; import static org.assertj.core.api.Assertions.assertThat; public class WriteOptionsTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); public static final Random rand = PlatformRandomHelper. getPlatformSpecificRandomFactory(); @Test public void writeOptions() { try (final WriteOptions writeOptions = new WriteOptions()) { writeOptions.setSync(true); assertThat(writeOptions.sync()).isTrue(); writeOptions.setSync(false); assertThat(writeOptions.sync()).isFalse(); writeOptions.setDisableWAL(true); assertThat(writeOptions.disableWAL()).isTrue(); writeOptions.setDisableWAL(false); assertThat(writeOptions.disableWAL()).isFalse(); writeOptions.setIgnoreMissingColumnFamilies(true); assertThat(writeOptions.ignoreMissingColumnFamilies()).isTrue(); writeOptions.setIgnoreMissingColumnFamilies(false); assertThat(writeOptions.ignoreMissingColumnFamilies()).isFalse(); writeOptions.setNoSlowdown(true); assertThat(writeOptions.noSlowdown()).isTrue(); writeOptions.setNoSlowdown(false); assertThat(writeOptions.noSlowdown()).isFalse(); writeOptions.setLowPri(true); assertThat(writeOptions.lowPri()).isTrue(); writeOptions.setLowPri(false); assertThat(writeOptions.lowPri()).isFalse(); } } @Test public void copyConstructor() { WriteOptions origOpts = new WriteOptions(); origOpts.setDisableWAL(rand.nextBoolean()); origOpts.setIgnoreMissingColumnFamilies(rand.nextBoolean()); origOpts.setSync(rand.nextBoolean()); WriteOptions copyOpts = new WriteOptions(origOpts); assertThat(origOpts.disableWAL()).isEqualTo(copyOpts.disableWAL()); assertThat(origOpts.ignoreMissingColumnFamilies()).isEqualTo( copyOpts.ignoreMissingColumnFamilies()); assertThat(origOpts.sync()).isEqualTo(copyOpts.sync()); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/test/000077500000000000000000000000001370372246700220525ustar00rootroot00000000000000rocksdb-6.11.4/java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java000066400000000000000000000014241370372246700327020ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb.test; import org.rocksdb.AbstractCompactionFilter; import org.rocksdb.AbstractCompactionFilterFactory; import org.rocksdb.RemoveEmptyValueCompactionFilter; /** * Simple CompactionFilterFactory class used in tests. Generates RemoveEmptyValueCompactionFilters. */ public class RemoveEmptyValueCompactionFilterFactory extends AbstractCompactionFilterFactory { @Override public RemoveEmptyValueCompactionFilter createCompactionFilter(final AbstractCompactionFilter.Context context) { return new RemoveEmptyValueCompactionFilter(); } @Override public String name() { return "RemoveEmptyValueCompactionFilterFactory"; } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java000066400000000000000000000117731370372246700262130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.test; import org.junit.internal.JUnitSystem; import org.junit.internal.RealSystem; import org.junit.internal.TextListener; import org.junit.runner.Description; import org.junit.runner.JUnitCore; import org.junit.runner.Result; import org.junit.runner.notification.Failure; import org.rocksdb.RocksDB; import java.io.PrintStream; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.List; import static org.rocksdb.test.RocksJunitRunner.RocksJunitListener.Status.*; /** * Custom Junit Runner to print also Test classes * and executed methods to command prompt. */ public class RocksJunitRunner { /** * Listener which overrides default functionality * to print class and method to system out. */ static class RocksJunitListener extends TextListener { private final static NumberFormat secsFormat = new DecimalFormat("###,###.###"); private final PrintStream writer; private String currentClassName = null; private String currentMethodName = null; private Status currentStatus = null; private long currentTestsStartTime; private int currentTestsCount = 0; private int currentTestsIgnoredCount = 0; private int currentTestsFailureCount = 0; private int currentTestsErrorCount = 0; enum Status { IGNORED, FAILURE, ERROR, OK } /** * RocksJunitListener constructor * * @param system JUnitSystem */ public RocksJunitListener(final JUnitSystem system) { this(system.out()); } public RocksJunitListener(final PrintStream writer) { super(writer); this.writer = writer; } @Override public void testRunStarted(final Description description) { writer.format("Starting RocksJava Tests...%n"); } @Override public void testStarted(final Description description) { if(currentClassName == null || !currentClassName.equals(description.getClassName())) { if(currentClassName != null) { printTestsSummary(); } else { currentTestsStartTime = System.currentTimeMillis(); } writer.format("%nRunning: %s%n", description.getClassName()); currentClassName = description.getClassName(); } currentMethodName = description.getMethodName(); currentStatus = OK; currentTestsCount++; } private void printTestsSummary() { // print summary of last test set writer.format("Tests run: %d, Failures: %d, Errors: %d, Ignored: %d, Time elapsed: %s sec%n", currentTestsCount, currentTestsFailureCount, currentTestsErrorCount, currentTestsIgnoredCount, formatSecs(System.currentTimeMillis() - currentTestsStartTime)); // reset counters currentTestsCount = 0; currentTestsFailureCount = 0; currentTestsErrorCount = 0; currentTestsIgnoredCount = 0; currentTestsStartTime = System.currentTimeMillis(); } private static String formatSecs(final double milliseconds) { final double seconds = milliseconds / 1000; return secsFormat.format(seconds); } @Override public void testFailure(final Failure failure) { if (failure.getException() != null && failure.getException() instanceof AssertionError) { currentStatus = FAILURE; currentTestsFailureCount++; } else { currentStatus = ERROR; currentTestsErrorCount++; } } @Override public void testIgnored(final Description description) { currentStatus = IGNORED; currentTestsIgnoredCount++; } @Override public void testFinished(final Description description) { if(currentStatus == OK) { writer.format("\t%s OK%n",currentMethodName); } else { writer.format(" [%s] %s%n", currentStatus.name(), currentMethodName); } } @Override public void testRunFinished(final Result result) { printTestsSummary(); super.testRunFinished(result); } } /** * Main method to execute tests * * @param args Test classes as String names */ public static void main(final String[] args){ final JUnitCore runner = new JUnitCore(); final JUnitSystem system = new RealSystem(); runner.addListener(new RocksJunitListener(system)); try { final List> classes = new ArrayList<>(); for (final String arg : args) { classes.add(Class.forName(arg)); } final Class[] clazzes = classes.toArray(new Class[classes.size()]); final Result result = runner.run(clazzes); if(!result.wasSuccessful()) { System.exit(-1); } } catch (final ClassNotFoundException e) { e.printStackTrace(); System.exit(-2); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/000077500000000000000000000000001370372246700220505ustar00rootroot00000000000000rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java000066400000000000000000000222571370372246700300610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import org.rocksdb.*; import java.nio.ByteBuffer; import java.nio.file.FileSystems; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; /** * Similar to {@link IntComparatorTest}, but uses {@link BytewiseComparator} * which ensures the correct ordering of positive integers. */ @RunWith(Parameterized.class) public class BytewiseComparatorIntTest { // test with 500 random positive integer keys private static final int TOTAL_KEYS = 500; private static final byte[][] keys = new byte[TOTAL_KEYS][4]; @BeforeClass public static void prepareKeys() { final ByteBuffer buf = ByteBuffer.allocate(4); final Random random = new Random(); for (int i = 0; i < TOTAL_KEYS; i++) { final int ri = random.nextInt() & Integer.MAX_VALUE; // the & ensures positive integer buf.putInt(ri); buf.flip(); final byte[] key = buf.array(); // does key already exist (avoid duplicates) if (keyExists(key, i)) { i--; // loop round and generate a different key } else { System.arraycopy(key, 0, keys[i], 0, 4); } } } private static boolean keyExists(final byte[] key, final int limit) { for (int j = 0; j < limit; j++) { if (Arrays.equals(key, keys[j])) { return true; } } return false; } @Parameters(name = "{0}") public static Iterable parameters() { return Arrays.asList(new Object[][] { { "non-direct_reused64_mutex", false, 64, ReusedSynchronisationType.MUTEX }, { "direct_reused64_mutex", true, 64, ReusedSynchronisationType.MUTEX }, { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, { "non-direct_reused64_thread-local", false, 64, ReusedSynchronisationType.THREAD_LOCAL }, { "direct_reused64_thread-local", true, 64, ReusedSynchronisationType.THREAD_LOCAL }, { "non-direct_noreuse", false, -1, null }, { "direct_noreuse", true, -1, null } }); } @Parameter(0) public String name; @Parameter(1) public boolean useDirectBuffer; @Parameter(2) public int maxReusedBufferSize; @Parameter(3) public ReusedSynchronisationType reusedSynchronisationType; @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void javaComparatorDefaultCf() throws RocksDBException { try (final ComparatorOptions options = new ComparatorOptions() .setUseDirectBuffer(useDirectBuffer) .setMaxReusedBufferSize(maxReusedBufferSize) // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); final BytewiseComparator comparator = new BytewiseComparator(options)) { // test the round-tripability of keys written and read with the Comparator testRoundtrip(FileSystems.getDefault().getPath( dbFolder.getRoot().getAbsolutePath()), comparator); } } @Test public void javaComparatorNamedCf() throws RocksDBException { try (final ComparatorOptions options = new ComparatorOptions() .setUseDirectBuffer(useDirectBuffer) .setMaxReusedBufferSize(maxReusedBufferSize) // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); final BytewiseComparator comparator = new BytewiseComparator(options)) { // test the round-tripability of keys written and read with the Comparator testRoundtripCf(FileSystems.getDefault().getPath( dbFolder.getRoot().getAbsolutePath()), comparator); } } /** * Test which stores random keys into the database * using an {@link IntComparator} * it then checks that these keys are read back in * ascending order * * @param db_path A path where we can store database * files temporarily * * @param comparator the comparator * * @throws RocksDBException if a database error happens. */ private void testRoundtrip(final Path db_path, final AbstractComparator comparator) throws RocksDBException { try (final Options opt = new Options() .setCreateIfMissing(true) .setComparator(comparator)) { // store TOTAL_KEYS into the db try (final RocksDB db = RocksDB.open(opt, db_path.toString())) { for (int i = 0; i < TOTAL_KEYS; i++) { db.put(keys[i], "value".getBytes(UTF_8)); } } // re-open db and read from start to end // integer keys should be in ascending // order as defined by IntComparator final ByteBuffer key = ByteBuffer.allocate(4); try (final RocksDB db = RocksDB.open(opt, db_path.toString()); final RocksIterator it = db.newIterator()) { it.seekToFirst(); int lastKey = Integer.MIN_VALUE; int count = 0; for (it.seekToFirst(); it.isValid(); it.next()) { key.put(it.key()); key.flip(); final int thisKey = key.getInt(); key.clear(); assertThat(thisKey).isGreaterThan(lastKey); lastKey = thisKey; count++; } assertThat(count).isEqualTo(TOTAL_KEYS); } } } /** * Test which stores random keys into a column family * in the database * using an {@link IntComparator} * it then checks that these keys are read back in * ascending order * * @param db_path A path where we can store database * files temporarily * * @param comparator the comparator * * @throws RocksDBException if a database error happens. */ private void testRoundtripCf(final Path db_path, final AbstractComparator comparator) throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new ColumnFamilyOptions() .setComparator(comparator)) ); final List cfHandles = new ArrayList<>(); try (final DBOptions opt = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true)) { try (final RocksDB db = RocksDB.open(opt, db_path.toString(), cfDescriptors, cfHandles)) { try { assertThat(cfDescriptors.size()).isEqualTo(2); assertThat(cfHandles.size()).isEqualTo(2); for (int i = 0; i < TOTAL_KEYS; i++) { db.put(cfHandles.get(1), keys[i], "value".getBytes(UTF_8)); } } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); } } // re-open db and read from start to end // integer keys should be in ascending // order as defined by SimpleIntComparator final ByteBuffer key = ByteBuffer.allocate(4); try (final RocksDB db = RocksDB.open(opt, db_path.toString(), cfDescriptors, cfHandles); final RocksIterator it = db.newIterator(cfHandles.get(1))) { try { assertThat(cfDescriptors.size()).isEqualTo(2); assertThat(cfHandles.size()).isEqualTo(2); it.seekToFirst(); int lastKey = Integer.MIN_VALUE; int count = 0; for (it.seekToFirst(); it.isValid(); it.next()) { key.put(it.key()); key.flip(); final int thisKey = key.getInt(); key.clear(); assertThat(thisKey).isGreaterThan(lastKey); lastKey = thisKey; count++; } assertThat(count).isEqualTo(TOTAL_KEYS); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); for (final ColumnFamilyDescriptor cfDescriptor : cfDescriptors) { cfDescriptor.getOptions().close(); } } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java000066400000000000000000000404371370372246700274060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.rocksdb.*; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.file.*; import java.util.*; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.*; import static org.rocksdb.util.ByteUtil.bytes; /** * This is a direct port of various C++ * tests from db/comparator_db_test.cc * and some code to adapt it to RocksJava */ public class BytewiseComparatorTest { @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); private List source_strings = Arrays.asList("b", "d", "f", "h", "j", "l"); private List interleaving_strings = Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"); /** * Open the database using the C++ BytewiseComparatorImpl * and test the results against our Java BytewiseComparator */ @Test public void java_vs_cpp_bytewiseComparator() throws IOException, RocksDBException { for(int rand_seed = 301; rand_seed < 306; rand_seed++) { final Path dbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); try(final RocksDB db = openDatabase(dbDir, BuiltinComparator.BYTEWISE_COMPARATOR)) { final Random rnd = new Random(rand_seed); try(final ComparatorOptions copt2 = new ComparatorOptions() .setUseDirectBuffer(false); final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { final java.util.Comparator jComparator = toJavaComparator(comparator2); doRandomIterationTest( db, jComparator, rnd, 8, 100, 3 ); } } } } /** * Open the database using the Java BytewiseComparator * and test the results against another Java BytewiseComparator */ @Test public void java_vs_java_bytewiseComparator() throws IOException, RocksDBException { for(int rand_seed = 301; rand_seed < 306; rand_seed++) { final Path dbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); try(final ComparatorOptions copt = new ComparatorOptions() .setUseDirectBuffer(false); final AbstractComparator comparator = new BytewiseComparator(copt); final RocksDB db = openDatabase(dbDir, comparator)) { final Random rnd = new Random(rand_seed); try(final ComparatorOptions copt2 = new ComparatorOptions() .setUseDirectBuffer(false); final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { final java.util.Comparator jComparator = toJavaComparator(comparator2); doRandomIterationTest( db, jComparator, rnd, 8, 100, 3 ); } } } } /** * Open the database using the C++ BytewiseComparatorImpl * and test the results against our Java DirectBytewiseComparator */ @Test public void java_vs_cpp_directBytewiseComparator() throws IOException, RocksDBException { for(int rand_seed = 301; rand_seed < 306; rand_seed++) { final Path dbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); try(final RocksDB db = openDatabase(dbDir, BuiltinComparator.BYTEWISE_COMPARATOR)) { final Random rnd = new Random(rand_seed); try(final ComparatorOptions copt2 = new ComparatorOptions() .setUseDirectBuffer(true); final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { final java.util.Comparator jComparator = toJavaComparator(comparator2); doRandomIterationTest( db, jComparator, rnd, 8, 100, 3 ); } } } } /** * Open the database using the Java DirectBytewiseComparator * and test the results against another Java DirectBytewiseComparator */ @Test public void java_vs_java_directBytewiseComparator() throws IOException, RocksDBException { for(int rand_seed = 301; rand_seed < 306; rand_seed++) { final Path dbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); try (final ComparatorOptions copt = new ComparatorOptions() .setUseDirectBuffer(true); final AbstractComparator comparator = new BytewiseComparator(copt); final RocksDB db = openDatabase(dbDir, comparator)) { final Random rnd = new Random(rand_seed); try(final ComparatorOptions copt2 = new ComparatorOptions() .setUseDirectBuffer(true); final AbstractComparator comparator2 = new BytewiseComparator(copt2)) { final java.util.Comparator jComparator = toJavaComparator(comparator2); doRandomIterationTest( db, jComparator, rnd, 8, 100, 3 ); } } } } /** * Open the database using the C++ ReverseBytewiseComparatorImpl * and test the results against our Java ReverseBytewiseComparator */ @Test public void java_vs_cpp_reverseBytewiseComparator() throws IOException, RocksDBException { for(int rand_seed = 301; rand_seed < 306; rand_seed++) { final Path dbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); try(final RocksDB db = openDatabase(dbDir, BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR)) { final Random rnd = new Random(rand_seed); try(final ComparatorOptions copt2 = new ComparatorOptions() .setUseDirectBuffer(false); final AbstractComparator comparator2 = new ReverseBytewiseComparator(copt2)) { final java.util.Comparator jComparator = toJavaComparator(comparator2); doRandomIterationTest( db, jComparator, rnd, 8, 100, 3 ); } } } } /** * Open the database using the Java ReverseBytewiseComparator * and test the results against another Java ReverseBytewiseComparator */ @Test public void java_vs_java_reverseBytewiseComparator() throws IOException, RocksDBException { for(int rand_seed = 301; rand_seed < 306; rand_seed++) { final Path dbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); try (final ComparatorOptions copt = new ComparatorOptions() .setUseDirectBuffer(false); final AbstractComparator comparator = new ReverseBytewiseComparator(copt); final RocksDB db = openDatabase(dbDir, comparator)) { final Random rnd = new Random(rand_seed); try(final ComparatorOptions copt2 = new ComparatorOptions() .setUseDirectBuffer(false); final AbstractComparator comparator2 = new ReverseBytewiseComparator(copt2)) { final java.util.Comparator jComparator = toJavaComparator(comparator2); doRandomIterationTest( db, jComparator, rnd, 8, 100, 3 ); } } } } private void doRandomIterationTest( final RocksDB db, final java.util.Comparator javaComparator, final Random rnd, final int num_writes, final int num_iter_ops, final int num_trigger_flush) throws RocksDBException { final TreeMap map = new TreeMap<>(javaComparator); try (final FlushOptions flushOptions = new FlushOptions(); final WriteOptions writeOptions = new WriteOptions()) { for (int i = 0; i < num_writes; i++) { if (num_trigger_flush > 0 && i != 0 && i % num_trigger_flush == 0) { db.flush(flushOptions); } final int type = rnd.nextInt(2); final int index = rnd.nextInt(source_strings.size()); final String key = source_strings.get(index); switch (type) { case 0: // put map.put(key, key); db.put(writeOptions, bytes(key), bytes(key)); break; case 1: // delete if (map.containsKey(key)) { map.remove(key); } db.delete(writeOptions, bytes(key)); break; default: fail("Should not be able to generate random outside range 1..2"); } } } try (final ReadOptions readOptions = new ReadOptions(); final RocksIterator iter = db.newIterator(readOptions)) { final KVIter result_iter = new KVIter<>(map); boolean is_valid = false; for (int i = 0; i < num_iter_ops; i++) { // Random walk and make sure iter and result_iter returns the // same key and value final int type = rnd.nextInt(8); iter.status(); switch (type) { case 0: // Seek to First iter.seekToFirst(); result_iter.seekToFirst(); break; case 1: // Seek to last iter.seekToLast(); result_iter.seekToLast(); break; case 2: { // Seek to random (existing or non-existing) key final int key_idx = rnd.nextInt(interleaving_strings.size()); final String key = interleaving_strings.get(key_idx); iter.seek(bytes(key)); result_iter.seek(bytes(key)); break; } case 3: { // SeekForPrev to random (existing or non-existing) key final int key_idx = rnd.nextInt(interleaving_strings.size()); final String key = interleaving_strings.get(key_idx); iter.seekForPrev(bytes(key)); result_iter.seekForPrev(bytes(key)); break; } case 4: // Next if (is_valid) { iter.next(); result_iter.next(); } else { continue; } break; case 5: // Prev if (is_valid) { iter.prev(); result_iter.prev(); } else { continue; } break; case 6: // Refresh iter.refresh(); result_iter.refresh(); iter.seekToFirst(); result_iter.seekToFirst(); break; default: { assert (type == 7); final int key_idx = rnd.nextInt(source_strings.size()); final String key = source_strings.get(key_idx); final byte[] result = db.get(readOptions, bytes(key)); if (!map.containsKey(key)) { assertNull(result); } else { assertArrayEquals(bytes(map.get(key)), result); } break; } } assertEquals(result_iter.isValid(), iter.isValid()); is_valid = iter.isValid(); if (is_valid) { assertArrayEquals(bytes(result_iter.key()), iter.key()); //note that calling value on a non-valid iterator from the Java API //results in a SIGSEGV assertArrayEquals(bytes(result_iter.value()), iter.value()); } } } } /** * Open the database using a C++ Comparator */ private RocksDB openDatabase( final Path dbDir, final BuiltinComparator cppComparator) throws IOException, RocksDBException { final Options options = new Options() .setCreateIfMissing(true) .setComparator(cppComparator); return RocksDB.open(options, dbDir.toAbsolutePath().toString()); } /** * Open the database using a Java Comparator */ private RocksDB openDatabase( final Path dbDir, final AbstractComparator javaComparator) throws IOException, RocksDBException { final Options options = new Options() .setCreateIfMissing(true) .setComparator(javaComparator); return RocksDB.open(options, dbDir.toAbsolutePath().toString()); } private java.util.Comparator toJavaComparator( final AbstractComparator rocksComparator) { return new java.util.Comparator() { @Override public int compare(final String s1, final String s2) { final ByteBuffer bufS1; final ByteBuffer bufS2; if (rocksComparator.usingDirectBuffers()) { bufS1 = ByteBuffer.allocateDirect(s1.length()); bufS2 = ByteBuffer.allocateDirect(s2.length()); } else { bufS1 = ByteBuffer.allocate(s1.length()); bufS2 = ByteBuffer.allocate(s2.length()); } bufS1.put(bytes(s1)); bufS1.flip(); bufS2.put(bytes(s2)); bufS2.flip(); return rocksComparator.compare(bufS1, bufS2); } }; } private static class KVIter implements RocksIteratorInterface { private final List> entries; private final java.util.Comparator comparator; private int offset = -1; private int lastPrefixMatchIdx = -1; private int lastPrefixMatch = 0; public KVIter(final TreeMap map) { this.entries = new ArrayList<>(); entries.addAll(map.entrySet()); this.comparator = map.comparator(); } @Override public boolean isValid() { return offset > -1 && offset < entries.size(); } @Override public void seekToFirst() { offset = 0; } @Override public void seekToLast() { offset = entries.size() - 1; } @SuppressWarnings("unchecked") @Override public void seek(final byte[] target) { for(offset = 0; offset < entries.size(); offset++) { if(comparator.compare(entries.get(offset).getKey(), (K)new String(target, UTF_8)) >= 0) { return; } } } @SuppressWarnings("unchecked") @Override public void seekForPrev(final byte[] target) { for(offset = entries.size()-1; offset >= 0; offset--) { if(comparator.compare(entries.get(offset).getKey(), (K)new String(target, UTF_8)) <= 0) { return; } } } /** * Is `a` a prefix of `b` * * @return The length of the matching prefix, or 0 if it is not a prefix */ private int isPrefix(final byte[] a, final byte[] b) { if(b.length >= a.length) { for(int i = 0; i < a.length; i++) { if(a[i] != b[i]) { return i; } } return a.length; } else { return 0; } } @Override public void next() { if(offset < entries.size()) { offset++; } } @Override public void prev() { if(offset >= 0) { offset--; } } @Override public void refresh() throws RocksDBException { offset = -1; } @Override public void status() throws RocksDBException { if(offset < 0 || offset >= entries.size()) { throw new RocksDBException("Index out of bounds. Size is: " + entries.size() + ", offset is: " + offset); } } @SuppressWarnings("unchecked") public K key() { if(!isValid()) { if(entries.isEmpty()) { return (K)""; } else if(offset == -1){ return entries.get(0).getKey(); } else if(offset == entries.size()) { return entries.get(offset - 1).getKey(); } else { return (K)""; } } else { return entries.get(offset).getKey(); } } @SuppressWarnings("unchecked") public V value() { if(!isValid()) { return (V)""; } else { return entries.get(offset).getValue(); } } @Override public void seek(ByteBuffer target) { throw new IllegalAccessError("Not implemented"); } @Override public void seekForPrev(ByteBuffer target) { throw new IllegalAccessError("Not implemented"); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java000066400000000000000000000114241370372246700301240ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb.util; import org.rocksdb.RocksDBException; import org.rocksdb.WriteBatch; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Objects; /** * A simple WriteBatch Handler which adds a record * of each event that it receives to a list */ public class CapturingWriteBatchHandler extends WriteBatch.Handler { private final List events = new ArrayList<>(); /** * Returns a copy of the current events list * * @return a list of the events which have happened upto now */ public List getEvents() { return new ArrayList<>(events); } @Override public void put(final int columnFamilyId, final byte[] key, final byte[] value) { events.add(new Event(Action.PUT, columnFamilyId, key, value)); } @Override public void put(final byte[] key, final byte[] value) { events.add(new Event(Action.PUT, key, value)); } @Override public void merge(final int columnFamilyId, final byte[] key, final byte[] value) { events.add(new Event(Action.MERGE, columnFamilyId, key, value)); } @Override public void merge(final byte[] key, final byte[] value) { events.add(new Event(Action.MERGE, key, value)); } @Override public void delete(final int columnFamilyId, final byte[] key) { events.add(new Event(Action.DELETE, columnFamilyId, key, (byte[])null)); } @Override public void delete(final byte[] key) { events.add(new Event(Action.DELETE, key, (byte[])null)); } @Override public void singleDelete(final int columnFamilyId, final byte[] key) { events.add(new Event(Action.SINGLE_DELETE, columnFamilyId, key, (byte[])null)); } @Override public void singleDelete(final byte[] key) { events.add(new Event(Action.SINGLE_DELETE, key, (byte[])null)); } @Override public void deleteRange(final int columnFamilyId, final byte[] beginKey, final byte[] endKey) { events.add(new Event(Action.DELETE_RANGE, columnFamilyId, beginKey, endKey)); } @Override public void deleteRange(final byte[] beginKey, final byte[] endKey) { events.add(new Event(Action.DELETE_RANGE, beginKey, endKey)); } @Override public void logData(final byte[] blob) { events.add(new Event(Action.LOG, (byte[])null, blob)); } @Override public void putBlobIndex(final int columnFamilyId, final byte[] key, final byte[] value) { events.add(new Event(Action.PUT_BLOB_INDEX, key, value)); } @Override public void markBeginPrepare() throws RocksDBException { events.add(new Event(Action.MARK_BEGIN_PREPARE, (byte[])null, (byte[])null)); } @Override public void markEndPrepare(final byte[] xid) throws RocksDBException { events.add(new Event(Action.MARK_END_PREPARE, (byte[])null, (byte[])null)); } @Override public void markNoop(final boolean emptyBatch) throws RocksDBException { events.add(new Event(Action.MARK_NOOP, (byte[])null, (byte[])null)); } @Override public void markRollback(final byte[] xid) throws RocksDBException { events.add(new Event(Action.MARK_ROLLBACK, (byte[])null, (byte[])null)); } @Override public void markCommit(final byte[] xid) throws RocksDBException { events.add(new Event(Action.MARK_COMMIT, (byte[])null, (byte[])null)); } public static class Event { public final Action action; public final int columnFamilyId; public final byte[] key; public final byte[] value; public Event(final Action action, final byte[] key, final byte[] value) { this(action, 0, key, value); } public Event(final Action action, final int columnFamilyId, final byte[] key, final byte[] value) { this.action = action; this.columnFamilyId = columnFamilyId; this.key = key; this.value = value; } @Override public boolean equals(final Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } final Event event = (Event) o; return columnFamilyId == event.columnFamilyId && action == event.action && ((key == null && event.key == null) || Arrays.equals(key, event.key)) && ((value == null && event.value == null) || Arrays.equals(value, event.value)); } @Override public int hashCode() { return Objects.hash(action, columnFamilyId, key, value); } } /** * Enumeration of Write Batch * event actions */ public enum Action { PUT, MERGE, DELETE, SINGLE_DELETE, DELETE_RANGE, LOG, PUT_BLOB_INDEX, MARK_BEGIN_PREPARE, MARK_END_PREPARE, MARK_NOOP, MARK_COMMIT, MARK_ROLLBACK } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/EnvironmentTest.java000066400000000000000000000251411370372246700260620ustar00rootroot00000000000000// Copyright (c) 2014, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import static org.assertj.core.api.Assertions.assertThat; public class EnvironmentTest { private final static String ARCH_FIELD_NAME = "ARCH"; private final static String OS_FIELD_NAME = "OS"; private final static String MUSL_LIBC_FIELD_NAME = "MUSL_LIBC"; private static String INITIAL_OS; private static String INITIAL_ARCH; private static boolean INITIAL_MUSL_LIBC; @BeforeClass public static void saveState() { INITIAL_ARCH = getEnvironmentClassField(ARCH_FIELD_NAME); INITIAL_OS = getEnvironmentClassField(OS_FIELD_NAME); INITIAL_MUSL_LIBC = getEnvironmentClassField(MUSL_LIBC_FIELD_NAME); } @Test public void mac32() { setEnvironmentClassFields("mac", "32"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".jnilib"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-osx.jnilib"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.dylib"); } @Test public void mac64() { setEnvironmentClassFields("mac", "64"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".jnilib"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-osx.jnilib"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.dylib"); } @Test public void nix32() { // Linux setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); setEnvironmentClassFields("Linux", "32"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-linux32.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.so"); // Linux musl-libc (Alpine) setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-linux32-musl.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.so"); // UNIX setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); setEnvironmentClassFields("Unix", "32"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-linux32.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.so"); } @Test(expected = UnsupportedOperationException.class) public void aix32() { // AIX setEnvironmentClassFields("aix", "32"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); Environment.getJniLibraryFileName("rocksdb"); } @Test public void nix64() { setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); setEnvironmentClassFields("Linux", "x64"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-linux64.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.so"); // Linux musl-libc (Alpine) setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-linux64-musl.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.so"); // UNIX setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); setEnvironmentClassFields("Unix", "x64"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-linux64.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.so"); // AIX setEnvironmentClassFields("aix", "x64"); assertThat(Environment.isWindows()).isFalse(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".so"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-aix64.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.so"); } @Test public void detectWindows(){ setEnvironmentClassFields("win", "x64"); assertThat(Environment.isWindows()).isTrue(); } @Test public void win64() { setEnvironmentClassFields("win", "x64"); assertThat(Environment.isWindows()).isTrue(); assertThat(Environment.getJniLibraryExtension()). isEqualTo(".dll"); assertThat(Environment.getJniLibraryFileName("rocksdb")). isEqualTo("librocksdbjni-win64.dll"); assertThat(Environment.getSharedLibraryFileName("rocksdb")). isEqualTo("librocksdbjni.dll"); } @Test public void ppc64le() { setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); setEnvironmentClassFields("Linux", "ppc64le"); assertThat(Environment.isUnix()).isTrue(); assertThat(Environment.isPowerPC()).isTrue(); assertThat(Environment.is64Bit()).isTrue(); assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-ppc64le"); assertThat(Environment.getJniLibraryFileName("rocksdb")) .isEqualTo("librocksdbjni-linux-ppc64le.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); // Linux musl-libc (Alpine) setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); setEnvironmentClassFields("Linux", "ppc64le"); assertThat(Environment.isUnix()).isTrue(); assertThat(Environment.isPowerPC()).isTrue(); assertThat(Environment.is64Bit()).isTrue(); assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-ppc64le-musl"); assertThat(Environment.getJniLibraryFileName("rocksdb")) .isEqualTo("librocksdbjni-linux-ppc64le-musl.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); } @Test public void aarch64() { setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); setEnvironmentClassFields("Linux", "aarch64"); assertThat(Environment.isUnix()).isTrue(); assertThat(Environment.isAarch64()).isTrue(); assertThat(Environment.is64Bit()).isTrue(); assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-aarch64"); assertThat(Environment.getJniLibraryFileName("rocksdb")) .isEqualTo("librocksdbjni-linux-aarch64.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); // Linux musl-libc (Alpine) setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, true); setEnvironmentClassFields("Linux", "aarch64"); assertThat(Environment.isUnix()).isTrue(); assertThat(Environment.isAarch64()).isTrue(); assertThat(Environment.is64Bit()).isTrue(); assertThat(Environment.getJniLibraryExtension()).isEqualTo(".so"); assertThat(Environment.getSharedLibraryName("rocksdb")).isEqualTo("rocksdbjni"); assertThat(Environment.getJniLibraryName("rocksdb")).isEqualTo("rocksdbjni-linux-aarch64-musl"); assertThat(Environment.getJniLibraryFileName("rocksdb")) .isEqualTo("librocksdbjni-linux-aarch64-musl.so"); assertThat(Environment.getSharedLibraryFileName("rocksdb")).isEqualTo("librocksdbjni.so"); setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, false); } private void setEnvironmentClassFields(String osName, String osArch) { setEnvironmentClassField(OS_FIELD_NAME, osName); setEnvironmentClassField(ARCH_FIELD_NAME, osArch); } @AfterClass public static void restoreState() { setEnvironmentClassField(OS_FIELD_NAME, INITIAL_OS); setEnvironmentClassField(ARCH_FIELD_NAME, INITIAL_ARCH); setEnvironmentClassField(MUSL_LIBC_FIELD_NAME, INITIAL_MUSL_LIBC); } @SuppressWarnings("unchecked") private static T getEnvironmentClassField(String fieldName) { final Field field; try { field = Environment.class.getDeclaredField(fieldName); field.setAccessible(true); /* Fails in JDK 13; and not needed unless fields are final final Field modifiersField = Field.class.getDeclaredField("modifiers"); modifiersField.setAccessible(true); modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); */ return (T)field.get(null); } catch (final NoSuchFieldException | IllegalAccessException e) { throw new RuntimeException(e); } } private static void setEnvironmentClassField(String fieldName, Object value) { final Field field; try { field = Environment.class.getDeclaredField(fieldName); field.setAccessible(true); /* Fails in JDK 13; and not needed unless fields are final final Field modifiersField = Field.class.getDeclaredField("modifiers"); modifiersField.setAccessible(true); modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); */ field.set(null, value); } catch (final NoSuchFieldException | IllegalAccessException e) { throw new RuntimeException(e); } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/IntComparatorTest.java000066400000000000000000000220171370372246700263370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import org.rocksdb.*; import java.nio.ByteBuffer; import java.nio.file.*; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; /** * Tests for IntComparator, but more generally * also for rocksdb::ComparatorJniCallback implementation. */ @RunWith(Parameterized.class) public class IntComparatorTest { // test with 500 random integer keys private static final int TOTAL_KEYS = 500; private static final byte[][] keys = new byte[TOTAL_KEYS][4]; @BeforeClass public static void prepareKeys() { final ByteBuffer buf = ByteBuffer.allocate(4); final Random random = new Random(); for (int i = 0; i < TOTAL_KEYS; i++) { final int ri = random.nextInt(); buf.putInt(ri); buf.flip(); final byte[] key = buf.array(); // does key already exist (avoid duplicates) if (keyExists(key, i)) { i--; // loop round and generate a different key } else { System.arraycopy(key, 0, keys[i], 0, 4); } } } private static boolean keyExists(final byte[] key, final int limit) { for (int j = 0; j < limit; j++) { if (Arrays.equals(key, keys[j])) { return true; } } return false; } @Parameters(name = "{0}") public static Iterable parameters() { return Arrays.asList(new Object[][] { { "non-direct_reused64_mutex", false, 64, ReusedSynchronisationType.MUTEX }, { "direct_reused64_mutex", true, 64, ReusedSynchronisationType.MUTEX }, { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, { "non-direct_reused64_thread-local", false, 64, ReusedSynchronisationType.THREAD_LOCAL }, { "direct_reused64_thread-local", true, 64, ReusedSynchronisationType.THREAD_LOCAL }, { "non-direct_noreuse", false, -1, null }, { "direct_noreuse", true, -1, null } }); } @Parameter(0) public String name; @Parameter(1) public boolean useDirectBuffer; @Parameter(2) public int maxReusedBufferSize; @Parameter(3) public ReusedSynchronisationType reusedSynchronisationType; @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void javaComparatorDefaultCf() throws RocksDBException { try (final ComparatorOptions options = new ComparatorOptions() .setUseDirectBuffer(useDirectBuffer) .setMaxReusedBufferSize(maxReusedBufferSize) // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); final IntComparator comparator = new IntComparator(options)) { // test the round-tripability of keys written and read with the Comparator testRoundtrip(FileSystems.getDefault().getPath( dbFolder.getRoot().getAbsolutePath()), comparator); } } @Test public void javaComparatorNamedCf() throws RocksDBException { try (final ComparatorOptions options = new ComparatorOptions() .setUseDirectBuffer(useDirectBuffer) .setMaxReusedBufferSize(maxReusedBufferSize) // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); final IntComparator comparator = new IntComparator(options)) { // test the round-tripability of keys written and read with the Comparator testRoundtripCf(FileSystems.getDefault().getPath( dbFolder.getRoot().getAbsolutePath()), comparator); } } /** * Test which stores random keys into the database * using an {@link IntComparator} * it then checks that these keys are read back in * ascending order * * @param db_path A path where we can store database * files temporarily * * @param comparator the comparator * * @throws RocksDBException if a database error happens. */ private void testRoundtrip(final Path db_path, final AbstractComparator comparator) throws RocksDBException { try (final Options opt = new Options() .setCreateIfMissing(true) .setComparator(comparator)) { // store TOTAL_KEYS into the db try (final RocksDB db = RocksDB.open(opt, db_path.toString())) { for (int i = 0; i < TOTAL_KEYS; i++) { db.put(keys[i], "value".getBytes(UTF_8)); } } // re-open db and read from start to end // integer keys should be in ascending // order as defined by IntComparator final ByteBuffer key = ByteBuffer.allocate(4); try (final RocksDB db = RocksDB.open(opt, db_path.toString()); final RocksIterator it = db.newIterator()) { it.seekToFirst(); int lastKey = Integer.MIN_VALUE; int count = 0; for (it.seekToFirst(); it.isValid(); it.next()) { key.put(it.key()); key.flip(); final int thisKey = key.getInt(); key.clear(); assertThat(thisKey).isGreaterThan(lastKey); lastKey = thisKey; count++; } assertThat(count).isEqualTo(TOTAL_KEYS); } } } /** * Test which stores random keys into a column family * in the database * using an {@link IntComparator} * it then checks that these keys are read back in * ascending order * * @param db_path A path where we can store database * files temporarily * * @param comparator the comparator * * @throws RocksDBException if a database error happens. */ private void testRoundtripCf(final Path db_path, final AbstractComparator comparator) throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new ColumnFamilyOptions() .setComparator(comparator)) ); final List cfHandles = new ArrayList<>(); try (final DBOptions opt = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true)) { try (final RocksDB db = RocksDB.open(opt, db_path.toString(), cfDescriptors, cfHandles)) { try { assertThat(cfDescriptors.size()).isEqualTo(2); assertThat(cfHandles.size()).isEqualTo(2); for (int i = 0; i < TOTAL_KEYS; i++) { db.put(cfHandles.get(1), keys[i], "value".getBytes(UTF_8)); } } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); } } // re-open db and read from start to end // integer keys should be in ascending // order as defined by SimpleIntComparator final ByteBuffer key = ByteBuffer.allocate(4); try (final RocksDB db = RocksDB.open(opt, db_path.toString(), cfDescriptors, cfHandles); final RocksIterator it = db.newIterator(cfHandles.get(1))) { try { assertThat(cfDescriptors.size()).isEqualTo(2); assertThat(cfHandles.size()).isEqualTo(2); it.seekToFirst(); int lastKey = Integer.MIN_VALUE; int count = 0; for (it.seekToFirst(); it.isValid(); it.next()) { key.put(it.key()); key.flip(); final int thisKey = key.getInt(); key.clear(); assertThat(thisKey).isGreaterThan(lastKey); lastKey = thisKey; count++; } assertThat(count).isEqualTo(TOTAL_KEYS); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); for (final ColumnFamilyDescriptor cfDescriptor : cfDescriptors) { cfDescriptor.getOptions().close(); } } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/JNIComparatorTest.java000066400000000000000000000122101370372246700262170ustar00rootroot00000000000000package org.rocksdb.util; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import org.rocksdb.*; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.file.*; import java.util.Arrays; import static org.assertj.core.api.Assertions.assertThat; @RunWith(Parameterized.class) public class JNIComparatorTest { @Parameters(name = "{0}") public static Iterable parameters() { return Arrays.asList(new Object[][] { { "bytewise_non-direct", BuiltinComparator.BYTEWISE_COMPARATOR, false }, { "bytewise_direct", BuiltinComparator.BYTEWISE_COMPARATOR, true }, { "reverse-bytewise_non-direct", BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR, false }, { "reverse-bytewise_direct", BuiltinComparator.REVERSE_BYTEWISE_COMPARATOR, true }, }); } @Parameter(0) public String name; @Parameter(1) public BuiltinComparator builtinComparator; @Parameter(2) public boolean useDirectBuffer; @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); private static final int MIN = Short.MIN_VALUE - 1; private static final int MAX = Short.MAX_VALUE + 1; @Test public void java_comparator_equals_cpp_comparator() throws RocksDBException, IOException { final int[] javaKeys; try (final ComparatorOptions comparatorOptions = new ComparatorOptions(); final AbstractComparator comparator = builtinComparator == BuiltinComparator.BYTEWISE_COMPARATOR ? new BytewiseComparator(comparatorOptions) : new ReverseBytewiseComparator(comparatorOptions)) { final Path javaDbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); storeWithJavaComparator(javaDbDir, comparator); javaKeys = readAllWithJavaComparator(javaDbDir, comparator); } final Path cppDbDir = FileSystems.getDefault().getPath(dbFolder.newFolder().getAbsolutePath()); storeWithCppComparator(cppDbDir, builtinComparator); final int[] cppKeys = readAllWithCppComparator(cppDbDir, builtinComparator); assertThat(javaKeys).isEqualTo(cppKeys); } private void storeWithJavaComparator(final Path dir, final AbstractComparator comparator) throws RocksDBException { final ByteBuffer buf = ByteBuffer.allocate(4); try (final Options options = new Options() .setCreateIfMissing(true) .setComparator(comparator); final RocksDB db = RocksDB.open(options, dir.toAbsolutePath().toString())) { for (int i = MIN; i < MAX; i++) { buf.putInt(i); buf.flip(); db.put(buf.array(), buf.array()); buf.clear(); } } } private void storeWithCppComparator(final Path dir, final BuiltinComparator builtinComparator) throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setComparator(builtinComparator); final RocksDB db = RocksDB.open(options, dir.toAbsolutePath().toString())) { final ByteBuffer buf = ByteBuffer.allocate(4); for (int i = MIN; i < MAX; i++) { buf.putInt(i); buf.flip(); db.put(buf.array(), buf.array()); buf.clear(); } } } private int[] readAllWithJavaComparator(final Path dir, final AbstractComparator comparator) throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setComparator(comparator); final RocksDB db = RocksDB.open(options, dir.toAbsolutePath().toString())) { try (final RocksIterator it = db.newIterator()) { it.seekToFirst(); final ByteBuffer buf = ByteBuffer.allocate(4); final int[] keys = new int[MAX - MIN]; int idx = 0; while (it.isValid()) { buf.put(it.key()); buf.flip(); final int thisKey = buf.getInt(); keys[idx++] = thisKey; buf.clear(); it.next(); } return keys; } } } private int[] readAllWithCppComparator(final Path dir, final BuiltinComparator comparator) throws RocksDBException { try (final Options options = new Options() .setCreateIfMissing(true) .setComparator(comparator); final RocksDB db = RocksDB.open(options, dir.toAbsolutePath().toString())) { try (final RocksIterator it = db.newIterator()) { it.seekToFirst(); final ByteBuffer buf = ByteBuffer.allocate(4); final int[] keys = new int[MAX - MIN]; int idx = 0; while (it.isValid()) { buf.put(it.key()); buf.flip(); final int thisKey = buf.getInt(); keys[idx++] = thisKey; buf.clear(); it.next(); } return keys; } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java000066400000000000000000000223051370372246700314070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import org.rocksdb.*; import java.nio.ByteBuffer; import java.nio.file.FileSystems; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; /** * Similar to {@link IntComparatorTest}, but uses * {@link ReverseBytewiseComparator} which ensures the correct reverse * ordering of positive integers. */ @RunWith(Parameterized.class) public class ReverseBytewiseComparatorIntTest { // test with 500 random positive integer keys private static final int TOTAL_KEYS = 500; private static final byte[][] keys = new byte[TOTAL_KEYS][4]; @BeforeClass public static void prepareKeys() { final ByteBuffer buf = ByteBuffer.allocate(4); final Random random = new Random(); for (int i = 0; i < TOTAL_KEYS; i++) { final int ri = random.nextInt() & Integer.MAX_VALUE; // the & ensures positive integer buf.putInt(ri); buf.flip(); final byte[] key = buf.array(); // does key already exist (avoid duplicates) if (keyExists(key, i)) { i--; // loop round and generate a different key } else { System.arraycopy(key, 0, keys[i], 0, 4); } } } private static boolean keyExists(final byte[] key, final int limit) { for (int j = 0; j < limit; j++) { if (Arrays.equals(key, keys[j])) { return true; } } return false; } @Parameters(name = "{0}") public static Iterable parameters() { return Arrays.asList(new Object[][] { { "non-direct_reused64_mutex", false, 64, ReusedSynchronisationType.MUTEX }, { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.MUTEX }, { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.ADAPTIVE_MUTEX }, { "non-direct_reused64_adaptive-mutex", false, 64, ReusedSynchronisationType.THREAD_LOCAL }, { "direct_reused64_adaptive-mutex", true, 64, ReusedSynchronisationType.THREAD_LOCAL }, { "non-direct_noreuse", false, -1, null }, { "direct_noreuse", true, -1, null } }); } @Parameter(0) public String name; @Parameter(1) public boolean useDirectBuffer; @Parameter(2) public int maxReusedBufferSize; @Parameter(3) public ReusedSynchronisationType reusedSynchronisationType; @ClassRule public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = new RocksNativeLibraryResource(); @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); @Test public void javaComparatorDefaultCf() throws RocksDBException { try (final ComparatorOptions options = new ComparatorOptions() .setUseDirectBuffer(useDirectBuffer) .setMaxReusedBufferSize(maxReusedBufferSize) // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); final ReverseBytewiseComparator comparator = new ReverseBytewiseComparator(options)) { // test the round-tripability of keys written and read with the Comparator testRoundtrip(FileSystems.getDefault().getPath( dbFolder.getRoot().getAbsolutePath()), comparator); } } @Test public void javaComparatorNamedCf() throws RocksDBException { try (final ComparatorOptions options = new ComparatorOptions() .setUseDirectBuffer(useDirectBuffer) .setMaxReusedBufferSize(maxReusedBufferSize) // if reusedSynchronisationType == null we assume that maxReusedBufferSize <= 0 and so we just set ADAPTIVE_MUTEX, even though it won't be used .setReusedSynchronisationType(reusedSynchronisationType == null ? ReusedSynchronisationType.ADAPTIVE_MUTEX : reusedSynchronisationType); final ReverseBytewiseComparator comparator = new ReverseBytewiseComparator(options)) { // test the round-tripability of keys written and read with the Comparator testRoundtripCf(FileSystems.getDefault().getPath( dbFolder.getRoot().getAbsolutePath()), comparator); } } /** * Test which stores random keys into the database * using an {@link IntComparator} * it then checks that these keys are read back in * ascending order * * @param db_path A path where we can store database * files temporarily * * @param comparator the comparator * * @throws RocksDBException if a database error happens. */ private void testRoundtrip(final Path db_path, final AbstractComparator comparator) throws RocksDBException { try (final Options opt = new Options() .setCreateIfMissing(true) .setComparator(comparator)) { // store TOTAL_KEYS into the db try (final RocksDB db = RocksDB.open(opt, db_path.toString())) { for (int i = 0; i < TOTAL_KEYS; i++) { db.put(keys[i], "value".getBytes(UTF_8)); } } // re-open db and read from start to end // integer keys should be in descending // order final ByteBuffer key = ByteBuffer.allocate(4); try (final RocksDB db = RocksDB.open(opt, db_path.toString()); final RocksIterator it = db.newIterator()) { it.seekToFirst(); int lastKey = Integer.MAX_VALUE; int count = 0; for (it.seekToFirst(); it.isValid(); it.next()) { key.put(it.key()); key.flip(); final int thisKey = key.getInt(); key.clear(); assertThat(thisKey).isLessThan(lastKey); lastKey = thisKey; count++; } assertThat(count).isEqualTo(TOTAL_KEYS); } } } /** * Test which stores random keys into a column family * in the database * using an {@link IntComparator} * it then checks that these keys are read back in * ascending order * * @param db_path A path where we can store database * files temporarily * * @param comparator the comparator * * @throws RocksDBException if a database error happens. */ private void testRoundtripCf(final Path db_path, final AbstractComparator comparator) throws RocksDBException { final List cfDescriptors = Arrays.asList( new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), new ColumnFamilyDescriptor("new_cf".getBytes(), new ColumnFamilyOptions() .setComparator(comparator)) ); final List cfHandles = new ArrayList<>(); try (final DBOptions opt = new DBOptions() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true)) { try (final RocksDB db = RocksDB.open(opt, db_path.toString(), cfDescriptors, cfHandles)) { try { assertThat(cfDescriptors.size()).isEqualTo(2); assertThat(cfHandles.size()).isEqualTo(2); for (int i = 0; i < TOTAL_KEYS; i++) { db.put(cfHandles.get(1), keys[i], "value".getBytes(UTF_8)); } } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); } } // re-open db and read from start to end // integer keys should be in descending // order final ByteBuffer key = ByteBuffer.allocate(4); try (final RocksDB db = RocksDB.open(opt, db_path.toString(), cfDescriptors, cfHandles); final RocksIterator it = db.newIterator(cfHandles.get(1))) { try { assertThat(cfDescriptors.size()).isEqualTo(2); assertThat(cfHandles.size()).isEqualTo(2); it.seekToFirst(); int lastKey = Integer.MAX_VALUE; int count = 0; for (it.seekToFirst(); it.isValid(); it.next()) { key.put(it.key()); key.flip(); final int thisKey = key.getInt(); key.clear(); assertThat(thisKey).isLessThan(lastKey); lastKey = thisKey; count++; } assertThat(count).isEqualTo(TOTAL_KEYS); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); } cfHandles.clear(); for (final ColumnFamilyDescriptor cfDescriptor : cfDescriptors) { cfDescriptor.getOptions().close(); } } } } } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/SizeUnitTest.java000066400000000000000000000015501370372246700253260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; public class SizeUnitTest { public static final long COMPUTATION_UNIT = 1024L; @Test public void sizeUnit() { assertThat(SizeUnit.KB).isEqualTo(COMPUTATION_UNIT); assertThat(SizeUnit.MB).isEqualTo( SizeUnit.KB * COMPUTATION_UNIT); assertThat(SizeUnit.GB).isEqualTo( SizeUnit.MB * COMPUTATION_UNIT); assertThat(SizeUnit.TB).isEqualTo( SizeUnit.GB * COMPUTATION_UNIT); assertThat(SizeUnit.PB).isEqualTo( SizeUnit.TB * COMPUTATION_UNIT); } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/TestUtil.java000066400000000000000000000030741370372246700244740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). package org.rocksdb.util; import org.rocksdb.CompactionPriority; import org.rocksdb.Options; import org.rocksdb.WALRecoveryMode; import java.util.Random; import static java.nio.charset.StandardCharsets.UTF_8; /** * General test utilities. */ public class TestUtil { /** * Get the options for log iteration tests. * * @return the options */ public static Options optionsForLogIterTest() { return defaultOptions() .setCreateIfMissing(true) .setWalTtlSeconds(1000); } /** * Get the default options. * * @return the options */ public static Options defaultOptions() { return new Options() .setWriteBufferSize(4090 * 4096) .setTargetFileSizeBase(2 * 1024 * 1024) .setMaxBytesForLevelBase(10 * 1024 * 1024) .setMaxOpenFiles(5000) .setWalRecoveryMode(WALRecoveryMode.TolerateCorruptedTailRecords) .setCompactionPriority(CompactionPriority.ByCompensatedSize); } private static final Random random = new Random(); /** * Generate a random string of bytes. * * @param len the length of the string to generate. * * @return the random string of bytes */ public static byte[] dummyString(final int len) { final byte[] str = new byte[len]; random.nextBytes(str); return str; } } rocksdb-6.11.4/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java000066400000000000000000000062571370372246700261340ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. package org.rocksdb.util; import org.rocksdb.RocksDBException; import org.rocksdb.WriteBatch; import java.util.Arrays; public class WriteBatchGetter extends WriteBatch.Handler { private int columnFamilyId = -1; private final byte[] key; private byte[] value; public WriteBatchGetter(final byte[] key) { this.key = key; } public byte[] getValue() { return value; } @Override public void put(final int columnFamilyId, final byte[] key, final byte[] value) { if(Arrays.equals(this.key, key)) { this.columnFamilyId = columnFamilyId; this.value = value; } } @Override public void put(final byte[] key, final byte[] value) { if(Arrays.equals(this.key, key)) { this.value = value; } } @Override public void merge(final int columnFamilyId, final byte[] key, final byte[] value) { if(Arrays.equals(this.key, key)) { this.columnFamilyId = columnFamilyId; this.value = value; } } @Override public void merge(final byte[] key, final byte[] value) { if(Arrays.equals(this.key, key)) { this.value = value; } } @Override public void delete(final int columnFamilyId, final byte[] key) { if(Arrays.equals(this.key, key)) { this.columnFamilyId = columnFamilyId; this.value = null; } } @Override public void delete(final byte[] key) { if(Arrays.equals(this.key, key)) { this.value = null; } } @Override public void singleDelete(final int columnFamilyId, final byte[] key) { if(Arrays.equals(this.key, key)) { this.columnFamilyId = columnFamilyId; this.value = null; } } @Override public void singleDelete(final byte[] key) { if(Arrays.equals(this.key, key)) { this.value = null; } } @Override public void deleteRange(final int columnFamilyId, final byte[] beginKey, final byte[] endKey) { throw new UnsupportedOperationException(); } @Override public void deleteRange(final byte[] beginKey, final byte[] endKey) { throw new UnsupportedOperationException(); } @Override public void logData(final byte[] blob) { throw new UnsupportedOperationException(); } @Override public void putBlobIndex(final int columnFamilyId, final byte[] key, final byte[] value) { if(Arrays.equals(this.key, key)) { this.columnFamilyId = columnFamilyId; this.value = value; } } @Override public void markBeginPrepare() throws RocksDBException { throw new UnsupportedOperationException(); } @Override public void markEndPrepare(final byte[] xid) throws RocksDBException { throw new UnsupportedOperationException(); } @Override public void markNoop(final boolean emptyBatch) throws RocksDBException { throw new UnsupportedOperationException(); } @Override public void markRollback(final byte[] xid) throws RocksDBException { throw new UnsupportedOperationException(); } @Override public void markCommit(final byte[] xid) throws RocksDBException { throw new UnsupportedOperationException(); } } rocksdb-6.11.4/logging/000077500000000000000000000000001370372246700146735ustar00rootroot00000000000000rocksdb-6.11.4/logging/auto_roll_logger.cc000066400000000000000000000217761370372246700205560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "logging/auto_roll_logger.h" #include #include "file/filename.h" #include "logging/logging.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE // -- AutoRollLogger AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname, const std::string& db_log_dir, size_t log_max_size, size_t log_file_time_to_roll, size_t keep_log_file_num, const InfoLogLevel log_level) : Logger(log_level), dbname_(dbname), db_log_dir_(db_log_dir), env_(env), status_(Status::OK()), kMaxLogFileSize(log_max_size), kLogFileTimeToRoll(log_file_time_to_roll), kKeepLogFileNum(keep_log_file_num), cached_now(static_cast(env_->NowMicros() * 1e-6)), ctime_(cached_now), cached_now_access_count(0), call_NowMicros_every_N_records_(100), mutex_() { Status s = env->GetAbsolutePath(dbname, &db_absolute_path_); if (s.IsNotSupported()) { db_absolute_path_ = dbname; } else { status_ = s; } log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_); if (env_->FileExists(log_fname_).ok()) { RollLogFile(); } GetExistingFiles(); ResetLogger(); if (status_.ok()) { status_ = TrimOldLogFiles(); } } Status AutoRollLogger::ResetLogger() { TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger"); status_ = env_->NewLogger(log_fname_, &logger_); TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger"); if (!status_.ok()) { return status_; } assert(logger_); logger_->SetInfoLogLevel(Logger::GetInfoLogLevel()); if (logger_->GetLogFileSize() == Logger::kDoNotSupportGetLogFileSize) { status_ = Status::NotSupported( "The underlying logger doesn't support GetLogFileSize()"); } if (status_.ok()) { cached_now = static_cast(env_->NowMicros() * 1e-6); ctime_ = cached_now; cached_now_access_count = 0; } return status_; } void AutoRollLogger::RollLogFile() { // This function is called when log is rotating. Two rotations // can happen quickly (NowMicro returns same value). To not overwrite // previous log file we increment by one micro second and try again. uint64_t now = env_->NowMicros(); std::string old_fname; do { old_fname = OldInfoLogFileName( dbname_, now, db_absolute_path_, db_log_dir_); now++; } while (env_->FileExists(old_fname).ok()); env_->RenameFile(log_fname_, old_fname); old_log_files_.push(old_fname); } void AutoRollLogger::GetExistingFiles() { { // Empty the queue to avoid duplicated entries in the queue. std::queue empty; std::swap(old_log_files_, empty); } std::string parent_dir; std::vector info_log_files; Status s = GetInfoLogFiles(env_, db_log_dir_, dbname_, &parent_dir, &info_log_files); if (status_.ok()) { status_ = s; } // We need to sort the file before enqueing it so that when we // delete file from the front, it is the oldest file. std::sort(info_log_files.begin(), info_log_files.end()); for (const std::string& f : info_log_files) { old_log_files_.push(parent_dir + "/" + f); } } Status AutoRollLogger::TrimOldLogFiles() { // Here we directly list info files and delete them through Env. // The deletion isn't going through DB, so there are shortcomes: // 1. the deletion is not rate limited by SstFileManager // 2. there is a chance that an I/O will be issued here // Since it's going to be complicated to pass DB object down to // here, we take a simple approach to keep the code easier to // maintain. // old_log_files_.empty() is helpful for the corner case that // kKeepLogFileNum == 0. We can instead check kKeepLogFileNum != 0 but // it's essentially the same thing, and checking empty before accessing // the queue feels safer. while (!old_log_files_.empty() && old_log_files_.size() >= kKeepLogFileNum) { Status s = env_->DeleteFile(old_log_files_.front()); // Remove the file from the tracking anyway. It's possible that // DB cleaned up the old log file, or people cleaned it up manually. old_log_files_.pop(); // To make the file really go away, we should sync parent directory. // Since there isn't any consistency issue involved here, skipping // this part to avoid one I/O here. if (!s.ok()) { return s; } } return Status::OK(); } std::string AutoRollLogger::ValistToString(const char* format, va_list args) const { // Any log messages longer than 1024 will get truncated. // The user is responsible for chopping longer messages into multi line log static const int MAXBUFFERSIZE = 1024; char buffer[MAXBUFFERSIZE]; int count = vsnprintf(buffer, MAXBUFFERSIZE, format, args); (void) count; assert(count >= 0); return buffer; } void AutoRollLogger::LogInternal(const char* format, ...) { mutex_.AssertHeld(); if (!logger_) { return; } va_list args; va_start(args, format); logger_->Logv(format, args); va_end(args); } void AutoRollLogger::Logv(const char* format, va_list ap) { assert(GetStatus().ok()); if (!logger_) { return; } std::shared_ptr logger; { MutexLock l(&mutex_); if ((kLogFileTimeToRoll > 0 && LogExpired()) || (kMaxLogFileSize > 0 && logger_->GetLogFileSize() >= kMaxLogFileSize)) { RollLogFile(); Status s = ResetLogger(); Status s2 = TrimOldLogFiles(); if (!s.ok()) { // can't really log the error if creating a new LOG file failed return; } WriteHeaderInfo(); if (!s2.ok()) { ROCKS_LOG_WARN(logger.get(), "Fail to trim old info log file: %s", s2.ToString().c_str()); } } // pin down the current logger_ instance before releasing the mutex. logger = logger_; } // Another thread could have put a new Logger instance into logger_ by now. // However, since logger is still hanging on to the previous instance // (reference count is not zero), we don't have to worry about it being // deleted while we are accessing it. // Note that logv itself is not mutex protected to allow maximum concurrency, // as thread safety should have been handled by the underlying logger. logger->Logv(format, ap); } void AutoRollLogger::WriteHeaderInfo() { mutex_.AssertHeld(); for (auto& header : headers_) { LogInternal("%s", header.c_str()); } } void AutoRollLogger::LogHeader(const char* format, va_list args) { if (!logger_) { return; } // header message are to be retained in memory. Since we cannot make any // assumptions about the data contained in va_list, we will retain them as // strings va_list tmp; va_copy(tmp, args); std::string data = ValistToString(format, tmp); va_end(tmp); MutexLock l(&mutex_); headers_.push_back(data); // Log the original message to the current log logger_->Logv(format, args); } bool AutoRollLogger::LogExpired() { if (cached_now_access_count >= call_NowMicros_every_N_records_) { cached_now = static_cast(env_->NowMicros() * 1e-6); cached_now_access_count = 0; } ++cached_now_access_count; return cached_now >= ctime_ + kLogFileTimeToRoll; } #endif // !ROCKSDB_LITE Status CreateLoggerFromOptions(const std::string& dbname, const DBOptions& options, std::shared_ptr* logger) { if (options.info_log) { *logger = options.info_log; return Status::OK(); } Env* env = options.env; std::string db_absolute_path; env->GetAbsolutePath(dbname, &db_absolute_path); std::string fname = InfoLogFileName(dbname, db_absolute_path, options.db_log_dir); env->CreateDirIfMissing(dbname); // In case it does not exist // Currently we only support roll by time-to-roll and log size #ifndef ROCKSDB_LITE if (options.log_file_time_to_roll > 0 || options.max_log_file_size > 0) { AutoRollLogger* result = new AutoRollLogger( env, dbname, options.db_log_dir, options.max_log_file_size, options.log_file_time_to_roll, options.keep_log_file_num, options.info_log_level); Status s = result->GetStatus(); if (!s.ok()) { delete result; } else { logger->reset(result); } return s; } #endif // !ROCKSDB_LITE // Open a log file in the same directory as the db env->RenameFile(fname, OldInfoLogFileName(dbname, env->NowMicros(), db_absolute_path, options.db_log_dir)); auto s = env->NewLogger(fname, logger); if (logger->get() != nullptr) { (*logger)->SetInfoLogLevel(options.info_log_level); } return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/logging/auto_roll_logger.h000066400000000000000000000114161370372246700204060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Logger implementation that can be shared by all environments // where enough posix functionality is available. #pragma once #include #include #include #include "file/filename.h" #include "port/port.h" #include "port/util_logger.h" #include "test_util/sync_point.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE // Rolls the log file by size and/or time class AutoRollLogger : public Logger { public: AutoRollLogger(Env* env, const std::string& dbname, const std::string& db_log_dir, size_t log_max_size, size_t log_file_time_to_roll, size_t keep_log_file_num, const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL); using Logger::Logv; void Logv(const char* format, va_list ap) override; // Write a header entry to the log. All header information will be written // again every time the log rolls over. virtual void LogHeader(const char* format, va_list ap) override; // check if the logger has encountered any problem. Status GetStatus() { return status_; } size_t GetLogFileSize() const override { if (!logger_) { return 0; } std::shared_ptr logger; { MutexLock l(&mutex_); // pin down the current logger_ instance before releasing the mutex. logger = logger_; } return logger->GetLogFileSize(); } void Flush() override { std::shared_ptr logger; { MutexLock l(&mutex_); // pin down the current logger_ instance before releasing the mutex. logger = logger_; } TEST_SYNC_POINT("AutoRollLogger::Flush:PinnedLogger"); if (logger) { logger->Flush(); } } virtual ~AutoRollLogger() { if (logger_ && !closed_) { logger_->Close(); } } using Logger::GetInfoLogLevel; InfoLogLevel GetInfoLogLevel() const override { MutexLock l(&mutex_); if (!logger_) { return Logger::GetInfoLogLevel(); } return logger_->GetInfoLogLevel(); } using Logger::SetInfoLogLevel; void SetInfoLogLevel(const InfoLogLevel log_level) override { MutexLock lock(&mutex_); Logger::SetInfoLogLevel(log_level); if (logger_) { logger_->SetInfoLogLevel(log_level); } } void SetCallNowMicrosEveryNRecords(uint64_t call_NowMicros_every_N_records) { call_NowMicros_every_N_records_ = call_NowMicros_every_N_records; } // Expose the log file path for testing purpose std::string TEST_log_fname() const { return log_fname_; } uint64_t TEST_ctime() const { return ctime_; } Logger* TEST_inner_logger() const { return logger_.get(); } protected: // Implementation of Close() virtual Status CloseImpl() override { if (logger_) { return logger_->Close(); } else { return Status::OK(); } } private: bool LogExpired(); Status ResetLogger(); void RollLogFile(); // Read all names of old log files into old_log_files_ // If there is any error, put the error code in status_ void GetExistingFiles(); // Delete old log files if it excceeds the limit. Status TrimOldLogFiles(); // Log message to logger without rolling void LogInternal(const char* format, ...); // Serialize the va_list to a string std::string ValistToString(const char* format, va_list args) const; // Write the logs marked as headers to the new log file void WriteHeaderInfo(); std::string log_fname_; // Current active info log's file name. std::string dbname_; std::string db_log_dir_; std::string db_absolute_path_; Env* env_; std::shared_ptr logger_; // current status of the logger Status status_; const size_t kMaxLogFileSize; const size_t kLogFileTimeToRoll; const size_t kKeepLogFileNum; // header information std::list headers_; // List of all existing info log files. Used for enforcing number of // info log files. // Full path is stored here. It consumes signifianctly more memory // than only storing file name. Can optimize if it causes a problem. std::queue old_log_files_; // to avoid frequent env->NowMicros() calls, we cached the current time uint64_t cached_now; uint64_t ctime_; uint64_t cached_now_access_count; uint64_t call_NowMicros_every_N_records_; mutable port::Mutex mutex_; }; #endif // !ROCKSDB_LITE // Facade to craete logger automatically Status CreateLoggerFromOptions(const std::string& dbname, const DBOptions& options, std::shared_ptr* logger); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/logging/auto_roll_logger_test.cc000066400000000000000000000602751370372246700216120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include "logging/auto_roll_logger.h" #include #include #include #include #include #include #include #include #include #include #include "logging/logging.h" #include "port/port.h" #include "rocksdb/db.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { namespace { class NoSleepEnv : public EnvWrapper { public: NoSleepEnv(Env* base) : EnvWrapper(base) {} void SleepForMicroseconds(int micros) override { fake_time_ += static_cast(micros); } uint64_t NowNanos() override { return fake_time_ * 1000; } uint64_t NowMicros() override { return fake_time_; } private: uint64_t fake_time_ = 6666666666; }; } // namespace // In this test we only want to Log some simple log message with // no format. LogMessage() provides such a simple interface and // avoids the [format-security] warning which occurs when you // call ROCKS_LOG_INFO(logger, log_message) directly. namespace { void LogMessage(Logger* logger, const char* message) { ROCKS_LOG_INFO(logger, "%s", message); } void LogMessage(const InfoLogLevel log_level, Logger* logger, const char* message) { Log(log_level, logger, "%s", message); } } // namespace class AutoRollLoggerTest : public testing::Test { public: static void InitTestDb() { #ifdef OS_WIN // Replace all slashes in the path so windows CompSpec does not // become confused std::string testDir(kTestDir); std::replace_if(testDir.begin(), testDir.end(), [](char ch) { return ch == '/'; }, '\\'); std::string deleteCmd = "if exist " + testDir + " rd /s /q " + testDir; #else std::string deleteCmd = "rm -rf " + kTestDir; #endif ASSERT_TRUE(system(deleteCmd.c_str()) == 0); Env::Default()->CreateDir(kTestDir); } void RollLogFileBySizeTest(AutoRollLogger* logger, size_t log_max_size, const std::string& log_message); void RollLogFileByTimeTest(Env*, AutoRollLogger* logger, size_t time, const std::string& log_message); // return list of files under kTestDir that contains "LOG" std::vector GetLogFiles() { std::vector ret; std::vector files; Status s = default_env->GetChildren(kTestDir, &files); // Should call ASSERT_OK() here but it doesn't compile. It's not // worth the time figuring out why. EXPECT_TRUE(s.ok()); for (const auto& f : files) { if (f.find("LOG") != std::string::npos) { ret.push_back(f); } } return ret; } // Delete all log files under kTestDir void CleanupLogFiles() { for (const std::string& f : GetLogFiles()) { ASSERT_OK(default_env->DeleteFile(kTestDir + "/" + f)); } } void RollNTimesBySize(Logger* auto_roll_logger, size_t file_num, size_t max_log_file_size) { // Roll the log 4 times, and it will trim to 3 files. std::string dummy_large_string; dummy_large_string.assign(max_log_file_size, '='); auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); for (size_t i = 0; i < file_num + 1; i++) { // Log enough bytes to trigger at least one roll. LogMessage(auto_roll_logger, dummy_large_string.c_str()); LogMessage(auto_roll_logger, ""); } } static const std::string kSampleMessage; static const std::string kTestDir; static const std::string kLogFile; static Env* default_env; }; const std::string AutoRollLoggerTest::kSampleMessage( "this is the message to be written to the log file!!"); const std::string AutoRollLoggerTest::kTestDir( test::PerThreadDBPath("db_log_test")); const std::string AutoRollLoggerTest::kLogFile( test::PerThreadDBPath("db_log_test") + "/LOG"); Env* AutoRollLoggerTest::default_env = Env::Default(); void AutoRollLoggerTest::RollLogFileBySizeTest(AutoRollLogger* logger, size_t log_max_size, const std::string& log_message) { logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); ASSERT_EQ(InfoLogLevel::INFO_LEVEL, logger->GetInfoLogLevel()); ASSERT_EQ(InfoLogLevel::INFO_LEVEL, logger->TEST_inner_logger()->GetInfoLogLevel()); // measure the size of each message, which is supposed // to be equal or greater than log_message.size() LogMessage(logger, log_message.c_str()); size_t message_size = logger->GetLogFileSize(); size_t current_log_size = message_size; // Test the cases when the log file will not be rolled. while (current_log_size + message_size < log_max_size) { LogMessage(logger, log_message.c_str()); current_log_size += message_size; ASSERT_EQ(current_log_size, logger->GetLogFileSize()); } // Now the log file will be rolled LogMessage(logger, log_message.c_str()); // Since rotation is checked before actual logging, we need to // trigger the rotation by logging another message. LogMessage(logger, log_message.c_str()); ASSERT_TRUE(message_size == logger->GetLogFileSize()); } void AutoRollLoggerTest::RollLogFileByTimeTest(Env* env, AutoRollLogger* logger, size_t time, const std::string& log_message) { uint64_t expected_ctime; uint64_t actual_ctime; uint64_t total_log_size; EXPECT_OK(env->GetFileSize(kLogFile, &total_log_size)); expected_ctime = logger->TEST_ctime(); logger->SetCallNowMicrosEveryNRecords(0); // -- Write to the log for several times, which is supposed // to be finished before time. for (int i = 0; i < 10; ++i) { env->SleepForMicroseconds(50000); LogMessage(logger, log_message.c_str()); EXPECT_OK(logger->GetStatus()); // Make sure we always write to the same log file (by // checking the create time); actual_ctime = logger->TEST_ctime(); // Also make sure the log size is increasing. EXPECT_EQ(expected_ctime, actual_ctime); EXPECT_GT(logger->GetLogFileSize(), total_log_size); total_log_size = logger->GetLogFileSize(); } // -- Make the log file expire env->SleepForMicroseconds(static_cast(time * 1000000)); LogMessage(logger, log_message.c_str()); // At this time, the new log file should be created. actual_ctime = logger->TEST_ctime(); EXPECT_LT(expected_ctime, actual_ctime); EXPECT_LT(logger->GetLogFileSize(), total_log_size); } TEST_F(AutoRollLoggerTest, RollLogFileBySize) { InitTestDb(); size_t log_max_size = 1024 * 5; size_t keep_log_file_num = 10; AutoRollLogger logger(Env::Default(), kTestDir, "", log_max_size, 0, keep_log_file_num); RollLogFileBySizeTest(&logger, log_max_size, kSampleMessage + ":RollLogFileBySize"); } TEST_F(AutoRollLoggerTest, RollLogFileByTime) { NoSleepEnv nse(Env::Default()); size_t time = 2; size_t log_size = 1024 * 5; size_t keep_log_file_num = 10; InitTestDb(); // -- Test the existence of file during the server restart. ASSERT_EQ(Status::NotFound(), default_env->FileExists(kLogFile)); AutoRollLogger logger(&nse, kTestDir, "", log_size, time, keep_log_file_num); ASSERT_OK(default_env->FileExists(kLogFile)); RollLogFileByTimeTest(&nse, &logger, time, kSampleMessage + ":RollLogFileByTime"); } TEST_F(AutoRollLoggerTest, SetInfoLogLevel) { InitTestDb(); Options options; options.info_log_level = InfoLogLevel::FATAL_LEVEL; options.max_log_file_size = 1024; std::shared_ptr logger; ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); auto* auto_roll_logger = dynamic_cast(logger.get()); ASSERT_NE(nullptr, auto_roll_logger); ASSERT_EQ(InfoLogLevel::FATAL_LEVEL, auto_roll_logger->GetInfoLogLevel()); ASSERT_EQ(InfoLogLevel::FATAL_LEVEL, auto_roll_logger->TEST_inner_logger()->GetInfoLogLevel()); auto_roll_logger->SetInfoLogLevel(InfoLogLevel::DEBUG_LEVEL); ASSERT_EQ(InfoLogLevel::DEBUG_LEVEL, auto_roll_logger->GetInfoLogLevel()); ASSERT_EQ(InfoLogLevel::DEBUG_LEVEL, logger->GetInfoLogLevel()); ASSERT_EQ(InfoLogLevel::DEBUG_LEVEL, auto_roll_logger->TEST_inner_logger()->GetInfoLogLevel()); } TEST_F(AutoRollLoggerTest, OpenLogFilesMultipleTimesWithOptionLog_max_size) { // If only 'log_max_size' options is specified, then every time // when rocksdb is restarted, a new empty log file will be created. InitTestDb(); // WORKAROUND: // avoid complier's complaint of "comparison between signed // and unsigned integer expressions" because literal 0 is // treated as "singed". size_t kZero = 0; size_t log_size = 1024; size_t keep_log_file_num = 10; AutoRollLogger* logger = new AutoRollLogger(Env::Default(), kTestDir, "", log_size, 0, keep_log_file_num); LogMessage(logger, kSampleMessage.c_str()); ASSERT_GT(logger->GetLogFileSize(), kZero); delete logger; // reopens the log file and an empty log file will be created. logger = new AutoRollLogger(Env::Default(), kTestDir, "", log_size, 0, 10); ASSERT_EQ(logger->GetLogFileSize(), kZero); delete logger; } TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) { size_t time = 2, log_max_size = 1024 * 5; size_t keep_log_file_num = 10; InitTestDb(); NoSleepEnv nse(Env::Default()); AutoRollLogger logger(&nse, kTestDir, "", log_max_size, time, keep_log_file_num); // Test the ability to roll by size RollLogFileBySizeTest(&logger, log_max_size, kSampleMessage + ":CompositeRollByTimeAndSizeLogger"); // Test the ability to roll by Time RollLogFileByTimeTest(&nse, &logger, time, kSampleMessage + ":CompositeRollByTimeAndSizeLogger"); } #ifndef OS_WIN // TODO: does not build for Windows because of PosixLogger use below. Need to // port TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) { DBOptions options; NoSleepEnv nse(Env::Default()); std::shared_ptr logger; // Normal logger ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); ASSERT_TRUE(dynamic_cast(logger.get())); // Only roll by size InitTestDb(); options.max_log_file_size = 1024; ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); AutoRollLogger* auto_roll_logger = dynamic_cast(logger.get()); ASSERT_TRUE(auto_roll_logger); RollLogFileBySizeTest( auto_roll_logger, options.max_log_file_size, kSampleMessage + ":CreateLoggerFromOptions - size"); // Only roll by Time options.env = &nse; InitTestDb(); options.max_log_file_size = 0; options.log_file_time_to_roll = 2; ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); auto_roll_logger = dynamic_cast(logger.get()); RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll, kSampleMessage + ":CreateLoggerFromOptions - time"); // roll by both Time and size InitTestDb(); options.max_log_file_size = 1024 * 5; options.log_file_time_to_roll = 2; ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); auto_roll_logger = dynamic_cast(logger.get()); RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, kSampleMessage + ":CreateLoggerFromOptions - both"); RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll, kSampleMessage + ":CreateLoggerFromOptions - both"); // Set keep_log_file_num { const size_t kFileNum = 3; InitTestDb(); options.max_log_file_size = 512; options.log_file_time_to_roll = 2; options.keep_log_file_num = kFileNum; ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); auto_roll_logger = dynamic_cast(logger.get()); // Roll the log 4 times, and it will trim to 3 files. std::string dummy_large_string; dummy_large_string.assign(options.max_log_file_size, '='); auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); for (size_t i = 0; i < kFileNum + 1; i++) { // Log enough bytes to trigger at least one roll. LogMessage(auto_roll_logger, dummy_large_string.c_str()); LogMessage(auto_roll_logger, ""); } std::vector files = GetLogFiles(); ASSERT_EQ(kFileNum, files.size()); CleanupLogFiles(); } // Set keep_log_file_num and dbname is different from // db_log_dir. { const size_t kFileNum = 3; InitTestDb(); options.max_log_file_size = 512; options.log_file_time_to_roll = 2; options.keep_log_file_num = kFileNum; options.db_log_dir = kTestDir; ASSERT_OK(CreateLoggerFromOptions("/dummy/db/name", options, &logger)); auto_roll_logger = dynamic_cast(logger.get()); // Roll the log 4 times, and it will trim to 3 files. std::string dummy_large_string; dummy_large_string.assign(options.max_log_file_size, '='); auto_roll_logger->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); for (size_t i = 0; i < kFileNum + 1; i++) { // Log enough bytes to trigger at least one roll. LogMessage(auto_roll_logger, dummy_large_string.c_str()); LogMessage(auto_roll_logger, ""); } std::vector files = GetLogFiles(); ASSERT_EQ(kFileNum, files.size()); for (const auto& f : files) { ASSERT_TRUE(f.find("dummy") != std::string::npos); } // Cleaning up those files. CleanupLogFiles(); } } TEST_F(AutoRollLoggerTest, AutoDeleting) { for (int attempt = 0; attempt < 2; attempt++) { // In the first attemp, db_log_dir is not set, while in the // second it is set. std::string dbname = (attempt == 0) ? kTestDir : "/test/dummy/dir"; std::string db_log_dir = (attempt == 0) ? "" : kTestDir; InitTestDb(); const size_t kMaxFileSize = 512; { size_t log_num = 8; AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, log_num); RollNTimesBySize(&logger, log_num, kMaxFileSize); ASSERT_EQ(log_num, GetLogFiles().size()); } // Shrink number of files { size_t log_num = 5; AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, log_num); ASSERT_EQ(log_num, GetLogFiles().size()); RollNTimesBySize(&logger, 3, kMaxFileSize); ASSERT_EQ(log_num, GetLogFiles().size()); } // Increase number of files again. { size_t log_num = 7; AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, log_num); ASSERT_EQ(6, GetLogFiles().size()); RollNTimesBySize(&logger, 3, kMaxFileSize); ASSERT_EQ(log_num, GetLogFiles().size()); } CleanupLogFiles(); } } TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) { DBOptions options; std::shared_ptr logger; InitTestDb(); options.max_log_file_size = 1024 * 5; ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); AutoRollLogger* auto_roll_logger = dynamic_cast(logger.get()); ASSERT_TRUE(auto_roll_logger); ROCKSDB_NAMESPACE::port::Thread flush_thread; // Notes: // (1) Need to pin the old logger before beginning the roll, as rolling grabs // the mutex, which would prevent us from accessing the old logger. This // also marks flush_thread with AutoRollLogger::Flush:PinnedLogger. // (2) Need to reset logger during PosixLogger::Flush() to exercise a race // condition case, which is executing the flush with the pinned (old) // logger after auto-roll logger has cut over to a new logger. // (3) PosixLogger::Flush() happens in both threads but its SyncPoints only // are enabled in flush_thread (the one pinning the old logger). ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependencyAndMarkers( {{"AutoRollLogger::Flush:PinnedLogger", "AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"}, {"PosixLogger::Flush:Begin1", "AutoRollLogger::ResetLogger:BeforeNewLogger"}, {"AutoRollLogger::ResetLogger:AfterNewLogger", "PosixLogger::Flush:Begin2"}}, {{"AutoRollLogger::Flush:PinnedLogger", "PosixLogger::Flush:Begin1"}, {"AutoRollLogger::Flush:PinnedLogger", "PosixLogger::Flush:Begin2"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); flush_thread = port::Thread([&]() { auto_roll_logger->Flush(); }); TEST_SYNC_POINT( "AutoRollLoggerTest::LogFlushWhileRolling:PreRollAndPostThreadInit"); RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, kSampleMessage + ":LogFlushWhileRolling"); flush_thread.join(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } #endif // OS_WIN TEST_F(AutoRollLoggerTest, InfoLogLevel) { InitTestDb(); size_t log_size = 8192; size_t log_lines = 0; // an extra-scope to force the AutoRollLogger to flush the log file when it // becomes out of scope. { AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10); for (int log_level = InfoLogLevel::HEADER_LEVEL; log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { logger.SetInfoLogLevel((InfoLogLevel)log_level); for (int log_type = InfoLogLevel::DEBUG_LEVEL; log_type <= InfoLogLevel::HEADER_LEVEL; log_type++) { // log messages with log level smaller than log_level will not be // logged. LogMessage((InfoLogLevel)log_type, &logger, kSampleMessage.c_str()); } log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; } for (int log_level = InfoLogLevel::HEADER_LEVEL; log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { logger.SetInfoLogLevel((InfoLogLevel)log_level); // again, messages with level smaller than log_level will not be logged. ROCKS_LOG_HEADER(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_DEBUG(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_INFO(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_WARN(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_ERROR(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_FATAL(&logger, "%s", kSampleMessage.c_str()); log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; } } std::ifstream inFile(AutoRollLoggerTest::kLogFile.c_str()); size_t lines = std::count(std::istreambuf_iterator(inFile), std::istreambuf_iterator(), '\n'); ASSERT_EQ(log_lines, lines); inFile.close(); } TEST_F(AutoRollLoggerTest, Close) { InitTestDb(); size_t log_size = 8192; size_t log_lines = 0; AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10); for (int log_level = InfoLogLevel::HEADER_LEVEL; log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { logger.SetInfoLogLevel((InfoLogLevel)log_level); for (int log_type = InfoLogLevel::DEBUG_LEVEL; log_type <= InfoLogLevel::HEADER_LEVEL; log_type++) { // log messages with log level smaller than log_level will not be // logged. LogMessage((InfoLogLevel)log_type, &logger, kSampleMessage.c_str()); } log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; } for (int log_level = InfoLogLevel::HEADER_LEVEL; log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { logger.SetInfoLogLevel((InfoLogLevel)log_level); // again, messages with level smaller than log_level will not be logged. ROCKS_LOG_HEADER(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_DEBUG(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_INFO(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_WARN(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_ERROR(&logger, "%s", kSampleMessage.c_str()); ROCKS_LOG_FATAL(&logger, "%s", kSampleMessage.c_str()); log_lines += InfoLogLevel::HEADER_LEVEL - log_level + 1; } ASSERT_EQ(logger.Close(), Status::OK()); std::ifstream inFile(AutoRollLoggerTest::kLogFile.c_str()); size_t lines = std::count(std::istreambuf_iterator(inFile), std::istreambuf_iterator(), '\n'); ASSERT_EQ(log_lines, lines); inFile.close(); } // Test the logger Header function for roll over logs // We expect the new logs creates as roll over to carry the headers specified static std::vector GetOldFileNames(const std::string& path) { std::vector ret; const std::string dirname = path.substr(/*start=*/0, path.find_last_of("/")); const std::string fname = path.substr(path.find_last_of("/") + 1); std::vector children; Env::Default()->GetChildren(dirname, &children); // We know that the old log files are named [path] // Return all entities that match the pattern for (auto& child : children) { if (fname != child && child.find(fname) == 0) { ret.push_back(dirname + "/" + child); } } return ret; } TEST_F(AutoRollLoggerTest, LogHeaderTest) { static const size_t MAX_HEADERS = 10; static const size_t LOG_MAX_SIZE = 1024 * 5; static const std::string HEADER_STR = "Log header line"; // test_num == 0 -> standard call to Header() // test_num == 1 -> call to Log() with InfoLogLevel::HEADER_LEVEL for (int test_num = 0; test_num < 2; test_num++) { InitTestDb(); AutoRollLogger logger(Env::Default(), kTestDir, /*db_log_dir=*/"", LOG_MAX_SIZE, /*log_file_time_to_roll=*/0, /*keep_log_file_num=*/10); if (test_num == 0) { // Log some headers explicitly using Header() for (size_t i = 0; i < MAX_HEADERS; i++) { Header(&logger, "%s %" ROCKSDB_PRIszt, HEADER_STR.c_str(), i); } } else if (test_num == 1) { // HEADER_LEVEL should make this behave like calling Header() for (size_t i = 0; i < MAX_HEADERS; i++) { ROCKS_LOG_HEADER(&logger, "%s %" ROCKSDB_PRIszt, HEADER_STR.c_str(), i); } } const std::string newfname = logger.TEST_log_fname(); // Log enough data to cause a roll over int i = 0; for (size_t iter = 0; iter < 2; iter++) { while (logger.GetLogFileSize() < LOG_MAX_SIZE) { Info(&logger, (kSampleMessage + ":LogHeaderTest line %d").c_str(), i); ++i; } Info(&logger, "Rollover"); } // Flush the log for the latest file LogFlush(&logger); const auto oldfiles = GetOldFileNames(newfname); ASSERT_EQ(oldfiles.size(), (size_t) 2); for (auto& oldfname : oldfiles) { // verify that the files rolled over ASSERT_NE(oldfname, newfname); // verify that the old log contains all the header logs ASSERT_EQ(test::GetLinesCount(oldfname, HEADER_STR), MAX_HEADERS); } } } TEST_F(AutoRollLoggerTest, LogFileExistence) { ROCKSDB_NAMESPACE::DB* db; ROCKSDB_NAMESPACE::Options options; #ifdef OS_WIN // Replace all slashes in the path so windows CompSpec does not // become confused std::string testDir(kTestDir); std::replace_if(testDir.begin(), testDir.end(), [](char ch) { return ch == '/'; }, '\\'); std::string deleteCmd = "if exist " + testDir + " rd /s /q " + testDir; #else std::string deleteCmd = "rm -rf " + kTestDir; #endif ASSERT_EQ(system(deleteCmd.c_str()), 0); options.max_log_file_size = 100 * 1024 * 1024; options.create_if_missing = true; ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kTestDir, &db)); ASSERT_OK(default_env->FileExists(kLogFile)); delete db; } TEST_F(AutoRollLoggerTest, FileCreateFailure) { Options options; options.max_log_file_size = 100 * 1024 * 1024; options.db_log_dir = "/a/dir/does/not/exist/at/all"; std::shared_ptr logger; ASSERT_NOK(CreateLoggerFromOptions("", options, &logger)); ASSERT_TRUE(!logger); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as AutoRollLogger is not supported in ROCKSDB_LITE\n"); return 0; } #endif // !ROCKSDB_LITE rocksdb-6.11.4/logging/env_logger.h000066400000000000000000000111341370372246700171730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Logger implementation that uses custom Env object for logging. #pragma once #include #include #include #include "port/sys_time.h" #include "file/writable_file_writer.h" #include "monitoring/iostats_context_imp.h" #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "test_util/sync_point.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { class EnvLogger : public Logger { public: EnvLogger(std::unique_ptr&& writable_file, const std::string& fname, const EnvOptions& options, Env* env, InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL) : Logger(log_level), file_(std::move(writable_file), fname, options, env), last_flush_micros_(0), env_(env), flush_pending_(false) {} ~EnvLogger() { if (!closed_) { closed_ = true; CloseHelper(); } } private: void FlushLocked() { mutex_.AssertHeld(); if (flush_pending_) { flush_pending_ = false; file_.Flush(); } last_flush_micros_ = env_->NowMicros(); } void Flush() override { TEST_SYNC_POINT("EnvLogger::Flush:Begin1"); TEST_SYNC_POINT("EnvLogger::Flush:Begin2"); MutexLock l(&mutex_); FlushLocked(); } Status CloseImpl() override { return CloseHelper(); } Status CloseHelper() { mutex_.Lock(); const auto close_status = file_.Close(); mutex_.Unlock(); if (close_status.ok()) { return close_status; } return Status::IOError("Close of log file failed with error:" + (close_status.getState() ? std::string(close_status.getState()) : std::string())); } using Logger::Logv; void Logv(const char* format, va_list ap) override { IOSTATS_TIMER_GUARD(logger_nanos); const uint64_t thread_id = env_->GetThreadID(); // We try twice: the first time with a fixed-size stack allocated buffer, // and the second time with a much larger dynamically allocated buffer. char buffer[500]; for (int iter = 0; iter < 2; iter++) { char* base; int bufsize; if (iter == 0) { bufsize = sizeof(buffer); base = buffer; } else { bufsize = 65536; base = new char[bufsize]; } char* p = base; char* limit = base + bufsize; struct timeval now_tv; gettimeofday(&now_tv, nullptr); const time_t seconds = now_tv.tv_sec; struct tm t; localtime_r(&seconds, &t); p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(now_tv.tv_usec), static_cast(thread_id)); // Print the message if (p < limit) { va_list backup_ap; va_copy(backup_ap, ap); p += vsnprintf(p, limit - p, format, backup_ap); va_end(backup_ap); } // Truncate to available space if necessary if (p >= limit) { if (iter == 0) { continue; // Try again with larger buffer } else { p = limit - 1; } } // Add newline if necessary if (p == base || p[-1] != '\n') { *p++ = '\n'; } assert(p <= limit); mutex_.Lock(); // We will ignore any error returned by Append(). file_.Append(Slice(base, p - base)); flush_pending_ = true; const uint64_t now_micros = env_->NowMicros(); if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) { FlushLocked(); } mutex_.Unlock(); if (base != buffer) { delete[] base; } break; } } size_t GetLogFileSize() const override { MutexLock l(&mutex_); return file_.GetFileSize(); } private: WritableFileWriter file_; mutable port::Mutex mutex_; // Mutex to protect the shared variables below. const static uint64_t flush_every_seconds_ = 5; std::atomic_uint_fast64_t last_flush_micros_; Env* env_; std::atomic flush_pending_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/logging/env_logger_test.cc000066400000000000000000000103251370372246700203710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "logging/env_logger.h" #include "env/mock_env.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { namespace { // In this test we only want to Log some simple log message with // no format. void LogMessage(std::shared_ptr logger, const std::string& message) { Log(logger, "%s", message.c_str()); } // Helper method to write the message num_times in the given logger. void WriteLogs(std::shared_ptr logger, const std::string& message, int num_times) { for (int ii = 0; ii < num_times; ++ii) { LogMessage(logger, message); } } } // namespace class EnvLoggerTest : public testing::Test { public: Env* env_; EnvLoggerTest() : env_(Env::Default()) {} ~EnvLoggerTest() = default; std::shared_ptr CreateLogger() { std::shared_ptr result; assert(NewEnvLogger(kLogFile, env_, &result).ok()); assert(result); result->SetInfoLogLevel(InfoLogLevel::INFO_LEVEL); return result; } void DeleteLogFile() { ASSERT_OK(env_->DeleteFile(kLogFile)); } static const std::string kSampleMessage; static const std::string kTestDir; static const std::string kLogFile; }; const std::string EnvLoggerTest::kSampleMessage = "this is the message to be written to the log file!!"; const std::string EnvLoggerTest::kLogFile = test::PerThreadDBPath("log_file"); TEST_F(EnvLoggerTest, EmptyLogFile) { auto logger = CreateLogger(); ASSERT_EQ(logger->Close(), Status::OK()); // Check the size of the log file. uint64_t file_size; ASSERT_EQ(env_->GetFileSize(kLogFile, &file_size), Status::OK()); ASSERT_EQ(file_size, 0); DeleteLogFile(); } TEST_F(EnvLoggerTest, LogMultipleLines) { auto logger = CreateLogger(); // Write multiple lines. const int kNumIter = 10; WriteLogs(logger, kSampleMessage, kNumIter); // Flush the logs. logger->Flush(); ASSERT_EQ(logger->Close(), Status::OK()); // Validate whether the log file has 'kNumIter' number of lines. ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), kNumIter); DeleteLogFile(); } TEST_F(EnvLoggerTest, Overwrite) { { auto logger = CreateLogger(); // Write multiple lines. const int kNumIter = 10; WriteLogs(logger, kSampleMessage, kNumIter); ASSERT_EQ(logger->Close(), Status::OK()); // Validate whether the log file has 'kNumIter' number of lines. ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), kNumIter); } // Now reopen the file again. { auto logger = CreateLogger(); // File should be empty. uint64_t file_size; ASSERT_EQ(env_->GetFileSize(kLogFile, &file_size), Status::OK()); ASSERT_EQ(file_size, 0); ASSERT_EQ(logger->GetLogFileSize(), 0); ASSERT_EQ(logger->Close(), Status::OK()); } DeleteLogFile(); } TEST_F(EnvLoggerTest, Close) { auto logger = CreateLogger(); // Write multiple lines. const int kNumIter = 10; WriteLogs(logger, kSampleMessage, kNumIter); ASSERT_EQ(logger->Close(), Status::OK()); // Validate whether the log file has 'kNumIter' number of lines. ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), kNumIter); DeleteLogFile(); } TEST_F(EnvLoggerTest, ConcurrentLogging) { auto logger = CreateLogger(); const int kNumIter = 20; std::function cb = [&]() { WriteLogs(logger, kSampleMessage, kNumIter); logger->Flush(); }; // Write to the logs from multiple threads. std::vector threads; const int kNumThreads = 5; // Create threads. for (int ii = 0; ii < kNumThreads; ++ii) { threads.push_back(port::Thread(cb)); } // Wait for them to complete. for (auto& th : threads) { th.join(); } ASSERT_EQ(logger->Close(), Status::OK()); // Verfiy the log file. ASSERT_EQ(test::GetLinesCount(kLogFile, kSampleMessage), kNumIter * kNumThreads); DeleteLogFile(); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/logging/event_logger.cc000066400000000000000000000040011370372246700176550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "logging/event_logger.h" #include #include #include #include #include "logging/logging.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { EventLoggerStream::EventLoggerStream(Logger* logger) : logger_(logger), log_buffer_(nullptr), max_log_size_(0), json_writer_(nullptr) {} EventLoggerStream::EventLoggerStream(LogBuffer* log_buffer, const size_t max_log_size) : logger_(nullptr), log_buffer_(log_buffer), max_log_size_(max_log_size), json_writer_(nullptr) {} EventLoggerStream::~EventLoggerStream() { if (json_writer_) { json_writer_->EndObject(); #ifdef ROCKSDB_PRINT_EVENTS_TO_STDOUT printf("%s\n", json_writer_->Get().c_str()); #else if (logger_) { EventLogger::Log(logger_, *json_writer_); } else if (log_buffer_) { assert(max_log_size_); EventLogger::LogToBuffer(log_buffer_, *json_writer_, max_log_size_); } #endif delete json_writer_; } } void EventLogger::Log(const JSONWriter& jwriter) { Log(logger_, jwriter); } void EventLogger::Log(Logger* logger, const JSONWriter& jwriter) { #ifdef ROCKSDB_PRINT_EVENTS_TO_STDOUT printf("%s\n", jwriter.Get().c_str()); #else ROCKSDB_NAMESPACE::Log(logger, "%s %s", Prefix(), jwriter.Get().c_str()); #endif } void EventLogger::LogToBuffer(LogBuffer* log_buffer, const JSONWriter& jwriter, const size_t max_log_size) { #ifdef ROCKSDB_PRINT_EVENTS_TO_STDOUT printf("%s\n", jwriter.Get().c_str()); #else assert(log_buffer); ROCKSDB_NAMESPACE::LogToBuffer(log_buffer, max_log_size, "%s %s", Prefix(), jwriter.Get().c_str()); #endif } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/logging/event_logger.h000066400000000000000000000116321370372246700175270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "logging/log_buffer.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { class JSONWriter { public: JSONWriter() : state_(kExpectKey), first_element_(true), in_array_(false) { stream_ << "{"; } void AddKey(const std::string& key) { assert(state_ == kExpectKey); if (!first_element_) { stream_ << ", "; } stream_ << "\"" << key << "\": "; state_ = kExpectValue; first_element_ = false; } void AddValue(const char* value) { assert(state_ == kExpectValue || state_ == kInArray); if (state_ == kInArray && !first_element_) { stream_ << ", "; } stream_ << "\"" << value << "\""; if (state_ != kInArray) { state_ = kExpectKey; } first_element_ = false; } template void AddValue(const T& value) { assert(state_ == kExpectValue || state_ == kInArray); if (state_ == kInArray && !first_element_) { stream_ << ", "; } stream_ << value; if (state_ != kInArray) { state_ = kExpectKey; } first_element_ = false; } void StartArray() { assert(state_ == kExpectValue); state_ = kInArray; in_array_ = true; stream_ << "["; first_element_ = true; } void EndArray() { assert(state_ == kInArray); state_ = kExpectKey; in_array_ = false; stream_ << "]"; first_element_ = false; } void StartObject() { assert(state_ == kExpectValue); state_ = kExpectKey; stream_ << "{"; first_element_ = true; } void EndObject() { assert(state_ == kExpectKey); stream_ << "}"; first_element_ = false; } void StartArrayedObject() { assert(state_ == kInArray && in_array_); state_ = kExpectValue; if (!first_element_) { stream_ << ", "; } StartObject(); } void EndArrayedObject() { assert(in_array_); EndObject(); state_ = kInArray; } std::string Get() const { return stream_.str(); } JSONWriter& operator<<(const char* val) { if (state_ == kExpectKey) { AddKey(val); } else { AddValue(val); } return *this; } JSONWriter& operator<<(const std::string& val) { return *this << val.c_str(); } template JSONWriter& operator<<(const T& val) { assert(state_ != kExpectKey); AddValue(val); return *this; } private: enum JSONWriterState { kExpectKey, kExpectValue, kInArray, kInArrayedObject, }; JSONWriterState state_; bool first_element_; bool in_array_; std::ostringstream stream_; }; class EventLoggerStream { public: template EventLoggerStream& operator<<(const T& val) { MakeStream(); *json_writer_ << val; return *this; } void StartArray() { json_writer_->StartArray(); } void EndArray() { json_writer_->EndArray(); } void StartObject() { json_writer_->StartObject(); } void EndObject() { json_writer_->EndObject(); } ~EventLoggerStream(); private: void MakeStream() { if (!json_writer_) { json_writer_ = new JSONWriter(); *this << "time_micros" << std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()).count(); } } friend class EventLogger; explicit EventLoggerStream(Logger* logger); explicit EventLoggerStream(LogBuffer* log_buffer, const size_t max_log_size); // exactly one is non-nullptr Logger* const logger_; LogBuffer* const log_buffer_; const size_t max_log_size_; // used only for log_buffer_ // ownership JSONWriter* json_writer_; }; // here is an example of the output that will show up in the LOG: // 2015/01/15-14:13:25.788019 1105ef000 EVENT_LOG_v1 {"time_micros": // 1421360005788015, "event": "table_file_creation", "file_number": 12, // "file_size": 1909699} class EventLogger { public: static const char* Prefix() { return "EVENT_LOG_v1"; } explicit EventLogger(Logger* logger) : logger_(logger) {} EventLoggerStream Log() { return EventLoggerStream(logger_); } EventLoggerStream LogToBuffer(LogBuffer* log_buffer) { return EventLoggerStream(log_buffer, LogBuffer::kDefaultMaxLogSize); } EventLoggerStream LogToBuffer(LogBuffer* log_buffer, const size_t max_log_size) { return EventLoggerStream(log_buffer, max_log_size); } void Log(const JSONWriter& jwriter); static void Log(Logger* logger, const JSONWriter& jwriter); static void LogToBuffer( LogBuffer* log_buffer, const JSONWriter& jwriter, const size_t max_log_size = LogBuffer::kDefaultMaxLogSize); private: Logger* logger_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/logging/event_logger_test.cc000066400000000000000000000023551370372246700207260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "logging/event_logger.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class EventLoggerTest : public testing::Test {}; class StringLogger : public Logger { public: using Logger::Logv; void Logv(const char* format, va_list ap) override { vsnprintf(buffer_, sizeof(buffer_), format, ap); } char* buffer() { return buffer_; } private: char buffer_[1000]; }; TEST_F(EventLoggerTest, SimpleTest) { StringLogger logger; EventLogger event_logger(&logger); event_logger.Log() << "id" << 5 << "event" << "just_testing"; std::string output(logger.buffer()); ASSERT_TRUE(output.find("\"event\": \"just_testing\"") != std::string::npos); ASSERT_TRUE(output.find("\"id\": 5") != std::string::npos); ASSERT_TRUE(output.find("\"time_micros\"") != std::string::npos); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/logging/log_buffer.cc000066400000000000000000000046631370372246700173250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "logging/log_buffer.h" #include "port/sys_time.h" #include "port/port.h" namespace ROCKSDB_NAMESPACE { LogBuffer::LogBuffer(const InfoLogLevel log_level, Logger*info_log) : log_level_(log_level), info_log_(info_log) {} void LogBuffer::AddLogToBuffer(size_t max_log_size, const char* format, va_list ap) { if (!info_log_ || log_level_ < info_log_->GetInfoLogLevel()) { // Skip the level because of its level. return; } char* alloc_mem = arena_.AllocateAligned(max_log_size); BufferedLog* buffered_log = new (alloc_mem) BufferedLog(); char* p = buffered_log->message; char* limit = alloc_mem + max_log_size - 1; // store the time gettimeofday(&(buffered_log->now_tv), nullptr); // Print the message if (p < limit) { va_list backup_ap; va_copy(backup_ap, ap); auto n = vsnprintf(p, limit - p, format, backup_ap); #ifndef OS_WIN // MS reports -1 when the buffer is too short assert(n >= 0); #endif if (n > 0) { p += n; } else { p = limit; } va_end(backup_ap); } if (p > limit) { p = limit; } // Add '\0' to the end *p = '\0'; logs_.push_back(buffered_log); } void LogBuffer::FlushBufferToLog() { for (BufferedLog* log : logs_) { const time_t seconds = log->now_tv.tv_sec; struct tm t; if (localtime_r(&seconds, &t) != nullptr) { Log(log_level_, info_log_, "(Original Log Time %04d/%02d/%02d-%02d:%02d:%02d.%06d) %s", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(log->now_tv.tv_usec), log->message); } } logs_.clear(); } void LogToBuffer(LogBuffer* log_buffer, size_t max_log_size, const char* format, ...) { if (log_buffer != nullptr) { va_list ap; va_start(ap, format); log_buffer->AddLogToBuffer(max_log_size, format, ap); va_end(ap); } } void LogToBuffer(LogBuffer* log_buffer, const char* format, ...) { if (log_buffer != nullptr) { va_list ap; va_start(ap, format); log_buffer->AddLogToBuffer(LogBuffer::kDefaultMaxLogSize, format, ap); va_end(ap); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/logging/log_buffer.h000066400000000000000000000034651370372246700171660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "memory/arena.h" #include "port/sys_time.h" #include "rocksdb/env.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class Logger; // A class to buffer info log entries and flush them in the end. class LogBuffer { public: // log_level: the log level for all the logs // info_log: logger to write the logs to LogBuffer(const InfoLogLevel log_level, Logger* info_log); // Add a log entry to the buffer. Use default max_log_size. // max_log_size indicates maximize log size, including some metadata. void AddLogToBuffer(size_t max_log_size, const char* format, va_list ap); size_t IsEmpty() const { return logs_.empty(); } // Flush all buffered log to the info log. void FlushBufferToLog(); static const size_t kDefaultMaxLogSize = 512; private: // One log entry with its timestamp struct BufferedLog { struct timeval now_tv; // Timestamp of the log char message[1]; // Beginning of log message }; const InfoLogLevel log_level_; Logger* info_log_; Arena arena_; autovector logs_; }; // Add log to the LogBuffer for a delayed info logging. It can be used when // we want to add some logs inside a mutex. // max_log_size indicates maximize log size, including some metadata. extern void LogToBuffer(LogBuffer* log_buffer, size_t max_log_size, const char* format, ...); // Same as previous function, but with default max log size. extern void LogToBuffer(LogBuffer* log_buffer, const char* format, ...); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/logging/logging.h000066400000000000000000000062431370372246700164770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Must not be included from any .h files to avoid polluting the namespace // with macros. #pragma once // Helper macros that include information about file name and line number #define ROCKS_LOG_STRINGIFY(x) #x #define ROCKS_LOG_TOSTRING(x) ROCKS_LOG_STRINGIFY(x) #define ROCKS_LOG_PREPEND_FILE_LINE(FMT) ("[%s:" ROCKS_LOG_TOSTRING(__LINE__) "] " FMT) inline const char* RocksLogShorterFileName(const char* file) { // 18 is the length of "logging/logging.h". // If the name of this file changed, please change this number, too. return file + (sizeof(__FILE__) > 18 ? sizeof(__FILE__) - 18 : 0); } // Don't inclide file/line info in HEADER level #define ROCKS_LOG_HEADER(LGR, FMT, ...) \ ROCKSDB_NAMESPACE::Log(InfoLogLevel::HEADER_LEVEL, LGR, FMT, ##__VA_ARGS__) #define ROCKS_LOG_DEBUG(LGR, FMT, ...) \ ROCKSDB_NAMESPACE::Log(InfoLogLevel::DEBUG_LEVEL, LGR, \ ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) #define ROCKS_LOG_INFO(LGR, FMT, ...) \ ROCKSDB_NAMESPACE::Log(InfoLogLevel::INFO_LEVEL, LGR, \ ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) #define ROCKS_LOG_WARN(LGR, FMT, ...) \ ROCKSDB_NAMESPACE::Log(InfoLogLevel::WARN_LEVEL, LGR, \ ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) #define ROCKS_LOG_ERROR(LGR, FMT, ...) \ ROCKSDB_NAMESPACE::Log(InfoLogLevel::ERROR_LEVEL, LGR, \ ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) #define ROCKS_LOG_FATAL(LGR, FMT, ...) \ ROCKSDB_NAMESPACE::Log(InfoLogLevel::FATAL_LEVEL, LGR, \ ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) #define ROCKS_LOG_BUFFER(LOG_BUF, FMT, ...) \ ROCKSDB_NAMESPACE::LogToBuffer(LOG_BUF, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ RocksLogShorterFileName(__FILE__), \ ##__VA_ARGS__) #define ROCKS_LOG_BUFFER_MAX_SZ(LOG_BUF, MAX_LOG_SIZE, FMT, ...) \ ROCKSDB_NAMESPACE::LogToBuffer( \ LOG_BUF, MAX_LOG_SIZE, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) #define ROCKS_LOG_DETAILS(LGR, FMT, ...) \ ; // due to overhead by default skip such lines // ROCKS_LOG_DEBUG(LGR, FMT, ##__VA_ARGS__) rocksdb-6.11.4/logging/posix_logger.h000066400000000000000000000124321370372246700175470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Logger implementation that can be shared by all environments // where enough posix functionality is available. #pragma once #include #include #include "port/sys_time.h" #include #include #ifdef OS_LINUX #ifndef FALLOC_FL_KEEP_SIZE #include #endif #endif #include #include "env/io_posix.h" #include "monitoring/iostats_context_imp.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { class PosixLogger : public Logger { private: Status PosixCloseHelper() { int ret; ret = fclose(file_); if (ret) { return IOError("Unable to close log file", "", ret); } return Status::OK(); } FILE* file_; uint64_t (*gettid_)(); // Return the thread id for the current thread std::atomic_size_t log_size_; int fd_; const static uint64_t flush_every_seconds_ = 5; std::atomic_uint_fast64_t last_flush_micros_; Env* env_; std::atomic flush_pending_; protected: virtual Status CloseImpl() override { return PosixCloseHelper(); } public: PosixLogger(FILE* f, uint64_t (*gettid)(), Env* env, const InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL) : Logger(log_level), file_(f), gettid_(gettid), log_size_(0), fd_(fileno(f)), last_flush_micros_(0), env_(env), flush_pending_(false) {} virtual ~PosixLogger() { if (!closed_) { closed_ = true; PosixCloseHelper(); } } virtual void Flush() override { TEST_SYNC_POINT("PosixLogger::Flush:Begin1"); TEST_SYNC_POINT("PosixLogger::Flush:Begin2"); if (flush_pending_) { flush_pending_ = false; fflush(file_); } last_flush_micros_ = env_->NowMicros(); } using Logger::Logv; virtual void Logv(const char* format, va_list ap) override { IOSTATS_TIMER_GUARD(logger_nanos); const uint64_t thread_id = (*gettid_)(); // We try twice: the first time with a fixed-size stack allocated buffer, // and the second time with a much larger dynamically allocated buffer. char buffer[500]; for (int iter = 0; iter < 2; iter++) { char* base; int bufsize; if (iter == 0) { bufsize = sizeof(buffer); base = buffer; } else { bufsize = 65536; base = new char[bufsize]; } char* p = base; char* limit = base + bufsize; struct timeval now_tv; gettimeofday(&now_tv, nullptr); const time_t seconds = now_tv.tv_sec; struct tm t; localtime_r(&seconds, &t); p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(now_tv.tv_usec), static_cast(thread_id)); // Print the message if (p < limit) { va_list backup_ap; va_copy(backup_ap, ap); p += vsnprintf(p, limit - p, format, backup_ap); va_end(backup_ap); } // Truncate to available space if necessary if (p >= limit) { if (iter == 0) { continue; // Try again with larger buffer } else { p = limit - 1; } } // Add newline if necessary if (p == base || p[-1] != '\n') { *p++ = '\n'; } assert(p <= limit); const size_t write_size = p - base; #ifdef ROCKSDB_FALLOCATE_PRESENT const int kDebugLogChunkSize = 128 * 1024; // If this write would cross a boundary of kDebugLogChunkSize // space, pre-allocate more space to avoid overly large // allocations from filesystem allocsize options. const size_t log_size = log_size_; const size_t last_allocation_chunk = ((kDebugLogChunkSize - 1 + log_size) / kDebugLogChunkSize); const size_t desired_allocation_chunk = ((kDebugLogChunkSize - 1 + log_size + write_size) / kDebugLogChunkSize); if (last_allocation_chunk != desired_allocation_chunk) { fallocate( fd_, FALLOC_FL_KEEP_SIZE, 0, static_cast(desired_allocation_chunk * kDebugLogChunkSize)); } #endif size_t sz = fwrite(base, 1, write_size, file_); flush_pending_ = true; if (sz > 0) { log_size_ += write_size; } uint64_t now_micros = static_cast(now_tv.tv_sec) * 1000000 + now_tv.tv_usec; if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) { Flush(); } if (base != buffer) { delete[] base; } break; } } size_t GetLogFileSize() const override { return log_size_; } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/000077500000000000000000000000001370372246700145555ustar00rootroot00000000000000rocksdb-6.11.4/memory/allocator.h000066400000000000000000000032731370372246700167130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Abstract interface for allocating memory in blocks. This memory is freed // when the allocator object is destroyed. See the Arena class for more info. #pragma once #include #include #include "rocksdb/write_buffer_manager.h" namespace ROCKSDB_NAMESPACE { class Logger; class Allocator { public: virtual ~Allocator() {} virtual char* Allocate(size_t bytes) = 0; virtual char* AllocateAligned(size_t bytes, size_t huge_page_size = 0, Logger* logger = nullptr) = 0; virtual size_t BlockSize() const = 0; }; class AllocTracker { public: explicit AllocTracker(WriteBufferManager* write_buffer_manager); // No copying allowed AllocTracker(const AllocTracker&) = delete; void operator=(const AllocTracker&) = delete; ~AllocTracker(); void Allocate(size_t bytes); // Call when we're finished allocating memory so we can free it from // the write buffer's limit. void DoneAllocating(); void FreeMem(); bool is_freed() const { return write_buffer_manager_ == nullptr || freed_; } private: WriteBufferManager* write_buffer_manager_; std::atomic bytes_allocated_; bool done_allocating_; bool freed_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/arena.cc000066400000000000000000000160341370372246700161560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "memory/arena.h" #ifndef OS_WIN #include #endif #include #include "logging/logging.h" #include "port/malloc.h" #include "port/port.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { // MSVC complains that it is already defined since it is static in the header. #ifndef _MSC_VER const size_t Arena::kInlineSize; #endif const size_t Arena::kMinBlockSize = 4096; const size_t Arena::kMaxBlockSize = 2u << 30; static const int kAlignUnit = alignof(max_align_t); size_t OptimizeBlockSize(size_t block_size) { // Make sure block_size is in optimal range block_size = std::max(Arena::kMinBlockSize, block_size); block_size = std::min(Arena::kMaxBlockSize, block_size); // make sure block_size is the multiple of kAlignUnit if (block_size % kAlignUnit != 0) { block_size = (1 + block_size / kAlignUnit) * kAlignUnit; } return block_size; } Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size) : kBlockSize(OptimizeBlockSize(block_size)), tracker_(tracker) { assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize && kBlockSize % kAlignUnit == 0); TEST_SYNC_POINT_CALLBACK("Arena::Arena:0", const_cast(&kBlockSize)); alloc_bytes_remaining_ = sizeof(inline_block_); blocks_memory_ += alloc_bytes_remaining_; aligned_alloc_ptr_ = inline_block_; unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_; #ifdef MAP_HUGETLB hugetlb_size_ = huge_page_size; if (hugetlb_size_ && kBlockSize > hugetlb_size_) { hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_; } #else (void)huge_page_size; #endif if (tracker_ != nullptr) { tracker_->Allocate(kInlineSize); } } Arena::~Arena() { if (tracker_ != nullptr) { assert(tracker_->is_freed()); tracker_->FreeMem(); } for (const auto& block : blocks_) { delete[] block; } #ifdef MAP_HUGETLB for (const auto& mmap_info : huge_blocks_) { if (mmap_info.addr_ == nullptr) { continue; } auto ret = munmap(mmap_info.addr_, mmap_info.length_); if (ret != 0) { // TODO(sdong): Better handling } } #endif } char* Arena::AllocateFallback(size_t bytes, bool aligned) { if (bytes > kBlockSize / 4) { ++irregular_block_num; // Object is more than a quarter of our block size. Allocate it separately // to avoid wasting too much space in leftover bytes. return AllocateNewBlock(bytes); } // We waste the remaining space in the current block. size_t size = 0; char* block_head = nullptr; #ifdef MAP_HUGETLB if (hugetlb_size_) { size = hugetlb_size_; block_head = AllocateFromHugePage(size); } #endif if (!block_head) { size = kBlockSize; block_head = AllocateNewBlock(size); } alloc_bytes_remaining_ = size - bytes; if (aligned) { aligned_alloc_ptr_ = block_head + bytes; unaligned_alloc_ptr_ = block_head + size; return block_head; } else { aligned_alloc_ptr_ = block_head; unaligned_alloc_ptr_ = block_head + size - bytes; return unaligned_alloc_ptr_; } } char* Arena::AllocateFromHugePage(size_t bytes) { #ifdef MAP_HUGETLB if (hugetlb_size_ == 0) { return nullptr; } // Reserve space in `huge_blocks_` before calling `mmap`. // Use `emplace_back()` instead of `reserve()` to let std::vector manage its // own memory and do fewer reallocations. // // - If `emplace_back` throws, no memory leaks because we haven't called // `mmap` yet. // - If `mmap` throws, no memory leaks because the vector will be cleaned up // via RAII. huge_blocks_.emplace_back(nullptr /* addr */, 0 /* length */); void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), -1, 0); if (addr == MAP_FAILED) { return nullptr; } huge_blocks_.back() = MmapInfo(addr, bytes); blocks_memory_ += bytes; if (tracker_ != nullptr) { tracker_->Allocate(bytes); } return reinterpret_cast(addr); #else (void)bytes; return nullptr; #endif } char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size, Logger* logger) { assert((kAlignUnit & (kAlignUnit - 1)) == 0); // Pointer size should be a power of 2 #ifdef MAP_HUGETLB if (huge_page_size > 0 && bytes > 0) { // Allocate from a huge page TBL table. assert(logger != nullptr); // logger need to be passed in. size_t reserved_size = ((bytes - 1U) / huge_page_size + 1U) * huge_page_size; assert(reserved_size >= bytes); char* addr = AllocateFromHugePage(reserved_size); if (addr == nullptr) { ROCKS_LOG_WARN(logger, "AllocateAligned fail to allocate huge TLB pages: %s", strerror(errno)); // fail back to malloc } else { return addr; } } #else (void)huge_page_size; (void)logger; #endif size_t current_mod = reinterpret_cast(aligned_alloc_ptr_) & (kAlignUnit - 1); size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod); size_t needed = bytes + slop; char* result; if (needed <= alloc_bytes_remaining_) { result = aligned_alloc_ptr_ + slop; aligned_alloc_ptr_ += needed; alloc_bytes_remaining_ -= needed; } else { // AllocateFallback always returns aligned memory result = AllocateFallback(bytes, true /* aligned */); } assert((reinterpret_cast(result) & (kAlignUnit - 1)) == 0); return result; } char* Arena::AllocateNewBlock(size_t block_bytes) { // Reserve space in `blocks_` before allocating memory via new. // Use `emplace_back()` instead of `reserve()` to let std::vector manage its // own memory and do fewer reallocations. // // - If `emplace_back` throws, no memory leaks because we haven't called `new` // yet. // - If `new` throws, no memory leaks because the vector will be cleaned up // via RAII. blocks_.emplace_back(nullptr); char* block = new char[block_bytes]; size_t allocated_size; #ifdef ROCKSDB_MALLOC_USABLE_SIZE allocated_size = malloc_usable_size(block); #ifndef NDEBUG // It's hard to predict what malloc_usable_size() returns. // A callback can allow users to change the costed size. std::pair pair(&allocated_size, &block_bytes); TEST_SYNC_POINT_CALLBACK("Arena::AllocateNewBlock:0", &pair); #endif // NDEBUG #else allocated_size = block_bytes; #endif // ROCKSDB_MALLOC_USABLE_SIZE blocks_memory_ += allocated_size; if (tracker_ != nullptr) { tracker_->Allocate(allocated_size); } blocks_.back() = block; return block; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/arena.h000066400000000000000000000122161370372246700160160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // Arena is an implementation of Allocator class. For a request of small size, // it allocates a block with pre-defined block size. For a request of big // size, it uses malloc to directly get the requested size. #pragma once #ifndef OS_WIN #include #endif #include #include #include #include #include #include "memory/allocator.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { class Arena : public Allocator { public: // No copying allowed Arena(const Arena&) = delete; void operator=(const Arena&) = delete; static const size_t kInlineSize = 2048; static const size_t kMinBlockSize; static const size_t kMaxBlockSize; // huge_page_size: if 0, don't use huge page TLB. If > 0 (should set to the // supported hugepage size of the system), block allocation will try huge // page TLB first. If allocation fails, will fall back to normal case. explicit Arena(size_t block_size = kMinBlockSize, AllocTracker* tracker = nullptr, size_t huge_page_size = 0); ~Arena(); char* Allocate(size_t bytes) override; // huge_page_size: if >0, will try to allocate from huage page TLB. // The argument will be the size of the page size for huge page TLB. Bytes // will be rounded up to multiple of the page size to allocate through mmap // anonymous option with huge page on. The extra space allocated will be // wasted. If allocation fails, will fall back to normal case. To enable it, // need to reserve huge pages for it to be allocated, like: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt for details. // huge page allocation can fail. In this case it will fail back to // normal cases. The messages will be logged to logger. So when calling with // huge_page_tlb_size > 0, we highly recommend a logger is passed in. // Otherwise, the error message will be printed out to stderr directly. char* AllocateAligned(size_t bytes, size_t huge_page_size = 0, Logger* logger = nullptr) override; // Returns an estimate of the total memory usage of data allocated // by the arena (exclude the space allocated but not yet used for future // allocations). size_t ApproximateMemoryUsage() const { return blocks_memory_ + blocks_.capacity() * sizeof(char*) - alloc_bytes_remaining_; } size_t MemoryAllocatedBytes() const { return blocks_memory_; } size_t AllocatedAndUnused() const { return alloc_bytes_remaining_; } // If an allocation is too big, we'll allocate an irregular block with the // same size of that allocation. size_t IrregularBlockNum() const { return irregular_block_num; } size_t BlockSize() const override { return kBlockSize; } bool IsInInlineBlock() const { return blocks_.empty(); } private: char inline_block_[kInlineSize] __attribute__((__aligned__(alignof(max_align_t)))); // Number of bytes allocated in one block const size_t kBlockSize; // Array of new[] allocated memory blocks typedef std::vector Blocks; Blocks blocks_; struct MmapInfo { void* addr_; size_t length_; MmapInfo(void* addr, size_t length) : addr_(addr), length_(length) {} }; std::vector huge_blocks_; size_t irregular_block_num = 0; // Stats for current active block. // For each block, we allocate aligned memory chucks from one end and // allocate unaligned memory chucks from the other end. Otherwise the // memory waste for alignment will be higher if we allocate both types of // memory from one direction. char* unaligned_alloc_ptr_ = nullptr; char* aligned_alloc_ptr_ = nullptr; // How many bytes left in currently active block? size_t alloc_bytes_remaining_ = 0; #ifdef MAP_HUGETLB size_t hugetlb_size_ = 0; #endif // MAP_HUGETLB char* AllocateFromHugePage(size_t bytes); char* AllocateFallback(size_t bytes, bool aligned); char* AllocateNewBlock(size_t block_bytes); // Bytes of memory in blocks allocated so far size_t blocks_memory_ = 0; AllocTracker* tracker_; }; inline char* Arena::Allocate(size_t bytes) { // The semantics of what to return are a bit messy if we allow // 0-byte allocations, so we disallow them here (we don't need // them for our internal use). assert(bytes > 0); if (bytes <= alloc_bytes_remaining_) { unaligned_alloc_ptr_ -= bytes; alloc_bytes_remaining_ -= bytes; return unaligned_alloc_ptr_; } return AllocateFallback(bytes, false /* unaligned */); } // check and adjust the block_size so that the return value is // 1. in the range of [kMinBlockSize, kMaxBlockSize]. // 2. the multiple of align unit. extern size_t OptimizeBlockSize(size_t block_size); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/arena_test.cc000066400000000000000000000152351370372246700172170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "memory/arena.h" #include "test_util/testharness.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { namespace { const size_t kHugePageSize = 2 * 1024 * 1024; } // namespace class ArenaTest : public testing::Test {}; TEST_F(ArenaTest, Empty) { Arena arena0; } namespace { bool CheckMemoryAllocated(size_t allocated, size_t expected) { // The value returned by Arena::MemoryAllocatedBytes() may be greater than // the requested memory. We choose a somewhat arbitrary upper bound of // max_expected = expected * 1.1 to detect critical overallocation. size_t max_expected = expected + expected / 10; return allocated >= expected && allocated <= max_expected; } void MemoryAllocatedBytesTest(size_t huge_page_size) { const int N = 17; size_t req_sz; // requested size size_t bsz = 32 * 1024; // block size size_t expected_memory_allocated; Arena arena(bsz, nullptr, huge_page_size); // requested size > quarter of a block: // allocate requested size separately req_sz = 12 * 1024; for (int i = 0; i < N; i++) { arena.Allocate(req_sz); } expected_memory_allocated = req_sz * N + Arena::kInlineSize; ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), expected_memory_allocated); arena.Allocate(Arena::kInlineSize - 1); // requested size < quarter of a block: // allocate a block with the default size, then try to use unused part // of the block. So one new block will be allocated for the first // Allocate(99) call. All the remaining calls won't lead to new allocation. req_sz = 99; for (int i = 0; i < N; i++) { arena.Allocate(req_sz); } if (huge_page_size) { ASSERT_TRUE( CheckMemoryAllocated(arena.MemoryAllocatedBytes(), expected_memory_allocated + bsz) || CheckMemoryAllocated(arena.MemoryAllocatedBytes(), expected_memory_allocated + huge_page_size)); } else { expected_memory_allocated += bsz; ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), expected_memory_allocated); } // requested size > size of a block: // allocate requested size separately expected_memory_allocated = arena.MemoryAllocatedBytes(); req_sz = 8 * 1024 * 1024; for (int i = 0; i < N; i++) { arena.Allocate(req_sz); } expected_memory_allocated += req_sz * N; ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), expected_memory_allocated); } // Make sure we didn't count the allocate but not used memory space in // Arena::ApproximateMemoryUsage() static void ApproximateMemoryUsageTest(size_t huge_page_size) { const size_t kBlockSize = 4096; const size_t kEntrySize = kBlockSize / 8; const size_t kZero = 0; Arena arena(kBlockSize, nullptr, huge_page_size); ASSERT_EQ(kZero, arena.ApproximateMemoryUsage()); // allocate inline bytes const size_t kAlignUnit = alignof(max_align_t); EXPECT_TRUE(arena.IsInInlineBlock()); arena.AllocateAligned(kAlignUnit); EXPECT_TRUE(arena.IsInInlineBlock()); arena.AllocateAligned(Arena::kInlineSize / 2 - (2 * kAlignUnit)); EXPECT_TRUE(arena.IsInInlineBlock()); arena.AllocateAligned(Arena::kInlineSize / 2); EXPECT_TRUE(arena.IsInInlineBlock()); ASSERT_EQ(arena.ApproximateMemoryUsage(), Arena::kInlineSize - kAlignUnit); ASSERT_PRED2(CheckMemoryAllocated, arena.MemoryAllocatedBytes(), Arena::kInlineSize); auto num_blocks = kBlockSize / kEntrySize; // first allocation arena.AllocateAligned(kEntrySize); EXPECT_FALSE(arena.IsInInlineBlock()); auto mem_usage = arena.MemoryAllocatedBytes(); if (huge_page_size) { ASSERT_TRUE( CheckMemoryAllocated(mem_usage, kBlockSize + Arena::kInlineSize) || CheckMemoryAllocated(mem_usage, huge_page_size + Arena::kInlineSize)); } else { ASSERT_PRED2(CheckMemoryAllocated, mem_usage, kBlockSize + Arena::kInlineSize); } auto usage = arena.ApproximateMemoryUsage(); ASSERT_LT(usage, mem_usage); for (size_t i = 1; i < num_blocks; ++i) { arena.AllocateAligned(kEntrySize); ASSERT_EQ(mem_usage, arena.MemoryAllocatedBytes()); ASSERT_EQ(arena.ApproximateMemoryUsage(), usage + kEntrySize); EXPECT_FALSE(arena.IsInInlineBlock()); usage = arena.ApproximateMemoryUsage(); } if (huge_page_size) { ASSERT_TRUE(usage > mem_usage || usage + huge_page_size - kBlockSize == mem_usage); } else { ASSERT_GT(usage, mem_usage); } } static void SimpleTest(size_t huge_page_size) { std::vector> allocated; Arena arena(Arena::kMinBlockSize, nullptr, huge_page_size); const int N = 100000; size_t bytes = 0; Random rnd(301); for (int i = 0; i < N; i++) { size_t s; if (i % (N / 10) == 0) { s = i; } else { s = rnd.OneIn(4000) ? rnd.Uniform(6000) : (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); } if (s == 0) { // Our arena disallows size 0 allocations. s = 1; } char* r; if (rnd.OneIn(10)) { r = arena.AllocateAligned(s); } else { r = arena.Allocate(s); } for (unsigned int b = 0; b < s; b++) { // Fill the "i"th allocation with a known bit pattern r[b] = i % 256; } bytes += s; allocated.push_back(std::make_pair(s, r)); ASSERT_GE(arena.ApproximateMemoryUsage(), bytes); if (i > N / 10) { ASSERT_LE(arena.ApproximateMemoryUsage(), bytes * 1.10); } } for (unsigned int i = 0; i < allocated.size(); i++) { size_t num_bytes = allocated[i].first; const char* p = allocated[i].second; for (unsigned int b = 0; b < num_bytes; b++) { // Check the "i"th allocation for the known bit pattern ASSERT_EQ(int(p[b]) & 0xff, (int)(i % 256)); } } } } // namespace TEST_F(ArenaTest, MemoryAllocatedBytes) { MemoryAllocatedBytesTest(0); MemoryAllocatedBytesTest(kHugePageSize); } TEST_F(ArenaTest, ApproximateMemoryUsage) { ApproximateMemoryUsageTest(0); ApproximateMemoryUsageTest(kHugePageSize); } TEST_F(ArenaTest, Simple) { SimpleTest(0); SimpleTest(kHugePageSize); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/memory/concurrent_arena.cc000066400000000000000000000032271370372246700204200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "memory/concurrent_arena.h" #include #include "port/port.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL __thread size_t ConcurrentArena::tls_cpuid = 0; #endif namespace { // If the shard block size is too large, in the worst case, every core // allocates a block without populate it. If the shared block size is // 1MB, 64 cores will quickly allocate 64MB, and may quickly trigger a // flush. Cap the size instead. const size_t kMaxShardBlockSize = size_t{128 * 1024}; } // namespace ConcurrentArena::ConcurrentArena(size_t block_size, AllocTracker* tracker, size_t huge_page_size) : shard_block_size_(std::min(kMaxShardBlockSize, block_size / 8)), shards_(), arena_(block_size, tracker, huge_page_size) { Fixup(); } ConcurrentArena::Shard* ConcurrentArena::Repick() { auto shard_and_index = shards_.AccessElementAndIndex(); #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL // even if we are cpu 0, use a non-zero tls_cpuid so we can tell we // have repicked tls_cpuid = shard_and_index.second | shards_.Size(); #endif return shard_and_index.first; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/concurrent_arena.h000066400000000000000000000172051370372246700202630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "memory/allocator.h" #include "memory/arena.h" #include "port/lang.h" #include "port/likely.h" #include "util/core_local.h" #include "util/mutexlock.h" #include "util/thread_local.h" // Only generate field unused warning for padding array, or build under // GCC 4.8.1 will fail. #ifdef __clang__ #define ROCKSDB_FIELD_UNUSED __attribute__((__unused__)) #else #define ROCKSDB_FIELD_UNUSED #endif // __clang__ namespace ROCKSDB_NAMESPACE { class Logger; // ConcurrentArena wraps an Arena. It makes it thread safe using a fast // inlined spinlock, and adds small per-core allocation caches to avoid // contention for small allocations. To avoid any memory waste from the // per-core shards, they are kept small, they are lazily instantiated // only if ConcurrentArena actually notices concurrent use, and they // adjust their size so that there is no fragmentation waste when the // shard blocks are allocated from the underlying main arena. class ConcurrentArena : public Allocator { public: // block_size and huge_page_size are the same as for Arena (and are // in fact just passed to the constructor of arena_. The core-local // shards compute their shard_block_size as a fraction of block_size // that varies according to the hardware concurrency level. explicit ConcurrentArena(size_t block_size = Arena::kMinBlockSize, AllocTracker* tracker = nullptr, size_t huge_page_size = 0); char* Allocate(size_t bytes) override { return AllocateImpl(bytes, false /*force_arena*/, [this, bytes]() { return arena_.Allocate(bytes); }); } char* AllocateAligned(size_t bytes, size_t huge_page_size = 0, Logger* logger = nullptr) override { size_t rounded_up = ((bytes - 1) | (sizeof(void*) - 1)) + 1; assert(rounded_up >= bytes && rounded_up < bytes + sizeof(void*) && (rounded_up % sizeof(void*)) == 0); return AllocateImpl(rounded_up, huge_page_size != 0 /*force_arena*/, [this, rounded_up, huge_page_size, logger]() { return arena_.AllocateAligned(rounded_up, huge_page_size, logger); }); } size_t ApproximateMemoryUsage() const { std::unique_lock lock(arena_mutex_, std::defer_lock); lock.lock(); return arena_.ApproximateMemoryUsage() - ShardAllocatedAndUnused(); } size_t MemoryAllocatedBytes() const { return memory_allocated_bytes_.load(std::memory_order_relaxed); } size_t AllocatedAndUnused() const { return arena_allocated_and_unused_.load(std::memory_order_relaxed) + ShardAllocatedAndUnused(); } size_t IrregularBlockNum() const { return irregular_block_num_.load(std::memory_order_relaxed); } size_t BlockSize() const override { return arena_.BlockSize(); } private: struct Shard { char padding[40] ROCKSDB_FIELD_UNUSED; mutable SpinMutex mutex; char* free_begin_; std::atomic allocated_and_unused_; Shard() : free_begin_(nullptr), allocated_and_unused_(0) {} }; #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL static __thread size_t tls_cpuid; #else enum ZeroFirstEnum : size_t { tls_cpuid = 0 }; #endif char padding0[56] ROCKSDB_FIELD_UNUSED; size_t shard_block_size_; CoreLocalArray shards_; Arena arena_; mutable SpinMutex arena_mutex_; std::atomic arena_allocated_and_unused_; std::atomic memory_allocated_bytes_; std::atomic irregular_block_num_; char padding1[56] ROCKSDB_FIELD_UNUSED; Shard* Repick(); size_t ShardAllocatedAndUnused() const { size_t total = 0; for (size_t i = 0; i < shards_.Size(); ++i) { total += shards_.AccessAtCore(i)->allocated_and_unused_.load( std::memory_order_relaxed); } return total; } template char* AllocateImpl(size_t bytes, bool force_arena, const Func& func) { size_t cpu; // Go directly to the arena if the allocation is too large, or if // we've never needed to Repick() and the arena mutex is available // with no waiting. This keeps the fragmentation penalty of // concurrency zero unless it might actually confer an advantage. std::unique_lock arena_lock(arena_mutex_, std::defer_lock); if (bytes > shard_block_size_ / 4 || force_arena || ((cpu = tls_cpuid) == 0 && !shards_.AccessAtCore(0)->allocated_and_unused_.load( std::memory_order_relaxed) && arena_lock.try_lock())) { if (!arena_lock.owns_lock()) { arena_lock.lock(); } auto rv = func(); Fixup(); return rv; } // pick a shard from which to allocate Shard* s = shards_.AccessAtCore(cpu & (shards_.Size() - 1)); if (!s->mutex.try_lock()) { s = Repick(); s->mutex.lock(); } std::unique_lock lock(s->mutex, std::adopt_lock); size_t avail = s->allocated_and_unused_.load(std::memory_order_relaxed); if (avail < bytes) { // reload std::lock_guard reload_lock(arena_mutex_); // If the arena's current block is within a factor of 2 of the right // size, we adjust our request to avoid arena waste. auto exact = arena_allocated_and_unused_.load(std::memory_order_relaxed); assert(exact == arena_.AllocatedAndUnused()); if (exact >= bytes && arena_.IsInInlineBlock()) { // If we haven't exhausted arena's inline block yet, allocate from arena // directly. This ensures that we'll do the first few small allocations // without allocating any blocks. // In particular this prevents empty memtables from using // disproportionately large amount of memory: a memtable allocates on // the order of 1 KB of memory when created; we wouldn't want to // allocate a full arena block (typically a few megabytes) for that, // especially if there are thousands of empty memtables. auto rv = func(); Fixup(); return rv; } avail = exact >= shard_block_size_ / 2 && exact < shard_block_size_ * 2 ? exact : shard_block_size_; s->free_begin_ = arena_.AllocateAligned(avail); Fixup(); } s->allocated_and_unused_.store(avail - bytes, std::memory_order_relaxed); char* rv; if ((bytes % sizeof(void*)) == 0) { // aligned allocation from the beginning rv = s->free_begin_; s->free_begin_ += bytes; } else { // unaligned from the end rv = s->free_begin_ + avail - bytes; } return rv; } void Fixup() { arena_allocated_and_unused_.store(arena_.AllocatedAndUnused(), std::memory_order_relaxed); memory_allocated_bytes_.store(arena_.MemoryAllocatedBytes(), std::memory_order_relaxed); irregular_block_num_.store(arena_.IrregularBlockNum(), std::memory_order_relaxed); } ConcurrentArena(const ConcurrentArena&) = delete; ConcurrentArena& operator=(const ConcurrentArena&) = delete; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/jemalloc_nodump_allocator.cc000066400000000000000000000163671370372246700223110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "memory/jemalloc_nodump_allocator.h" #include #include #include "port/likely.h" #include "port/port.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR std::atomic JemallocNodumpAllocator::original_alloc_{nullptr}; JemallocNodumpAllocator::JemallocNodumpAllocator( JemallocAllocatorOptions& options, std::unique_ptr&& arena_hooks, unsigned arena_index) : options_(options), arena_hooks_(std::move(arena_hooks)), arena_index_(arena_index), tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache) {} int JemallocNodumpAllocator::GetThreadSpecificCache(size_t size) { // We always enable tcache. The only corner case is when there are a ton of // threads accessing with low frequency, then it could consume a lot of // memory (may reach # threads * ~1MB) without bringing too much benefit. if (options_.limit_tcache_size && (size <= options_.tcache_size_lower_bound || size > options_.tcache_size_upper_bound)) { return MALLOCX_TCACHE_NONE; } unsigned* tcache_index = reinterpret_cast(tcache_.Get()); if (UNLIKELY(tcache_index == nullptr)) { // Instantiate tcache. tcache_index = new unsigned(0); size_t tcache_index_size = sizeof(unsigned); int ret = mallctl("tcache.create", tcache_index, &tcache_index_size, nullptr, 0); if (ret != 0) { // No good way to expose the error. Silently disable tcache. delete tcache_index; return MALLOCX_TCACHE_NONE; } tcache_.Reset(static_cast(tcache_index)); } return MALLOCX_TCACHE(*tcache_index); } void* JemallocNodumpAllocator::Allocate(size_t size) { int tcache_flag = GetThreadSpecificCache(size); return mallocx(size, MALLOCX_ARENA(arena_index_) | tcache_flag); } void JemallocNodumpAllocator::Deallocate(void* p) { // Obtain tcache. size_t size = 0; if (options_.limit_tcache_size) { size = malloc_usable_size(p); } int tcache_flag = GetThreadSpecificCache(size); // No need to pass arena index to dallocx(). Jemalloc will find arena index // from its own metadata. dallocx(p, tcache_flag); } void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr, size_t size, size_t alignment, bool* zero, bool* commit, unsigned arena_ind) { extent_alloc_t* original_alloc = original_alloc_.load(std::memory_order_relaxed); assert(original_alloc != nullptr); void* result = original_alloc(extent, new_addr, size, alignment, zero, commit, arena_ind); if (result != nullptr) { int ret = madvise(result, size, MADV_DONTDUMP); if (ret != 0) { fprintf( stderr, "JemallocNodumpAllocator failed to set MADV_DONTDUMP, error code: %d", ret); assert(false); } } return result; } Status JemallocNodumpAllocator::DestroyArena(unsigned arena_index) { assert(arena_index != 0); std::string key = "arena." + ToString(arena_index) + ".destroy"; int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0); if (ret != 0) { return Status::Incomplete("Failed to destroy jemalloc arena, error code: " + ToString(ret)); } return Status::OK(); } void JemallocNodumpAllocator::DestroyThreadSpecificCache(void* ptr) { assert(ptr != nullptr); unsigned* tcache_index = static_cast(ptr); size_t tcache_index_size = sizeof(unsigned); int ret __attribute__((__unused__)) = mallctl("tcache.destroy", nullptr, 0, tcache_index, tcache_index_size); // Silently ignore error. assert(ret == 0); delete tcache_index; } JemallocNodumpAllocator::~JemallocNodumpAllocator() { // Destroy tcache before destroying arena. autovector tcache_list; tcache_.Scrape(&tcache_list, nullptr); for (void* tcache_index : tcache_list) { DestroyThreadSpecificCache(tcache_index); } // Destroy arena. Silently ignore error. Status s __attribute__((__unused__)) = DestroyArena(arena_index_); assert(s.ok()); } size_t JemallocNodumpAllocator::UsableSize(void* p, size_t /*allocation_size*/) const { return malloc_usable_size(static_cast(p)); } #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR Status NewJemallocNodumpAllocator( JemallocAllocatorOptions& options, std::shared_ptr* memory_allocator) { *memory_allocator = nullptr; Status unsupported = Status::NotSupported( "JemallocNodumpAllocator only available with jemalloc version >= 5 " "and MADV_DONTDUMP is available."); #ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR (void)options; return unsupported; #else if (!HasJemalloc()) { return unsupported; } if (memory_allocator == nullptr) { return Status::InvalidArgument("memory_allocator must be non-null."); } if (options.limit_tcache_size && options.tcache_size_lower_bound >= options.tcache_size_upper_bound) { return Status::InvalidArgument( "tcache_size_lower_bound larger or equal to tcache_size_upper_bound."); } // Create arena. unsigned arena_index = 0; size_t arena_index_size = sizeof(arena_index); int ret = mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0); if (ret != 0) { return Status::Incomplete("Failed to create jemalloc arena, error code: " + ToString(ret)); } assert(arena_index != 0); // Read existing hooks. std::string key = "arena." + ToString(arena_index) + ".extent_hooks"; extent_hooks_t* hooks; size_t hooks_size = sizeof(hooks); ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0); if (ret != 0) { JemallocNodumpAllocator::DestroyArena(arena_index); return Status::Incomplete("Failed to read existing hooks, error code: " + ToString(ret)); } // Store existing alloc. extent_alloc_t* original_alloc = hooks->alloc; extent_alloc_t* expected = nullptr; bool success = JemallocNodumpAllocator::original_alloc_.compare_exchange_strong( expected, original_alloc); if (!success && original_alloc != expected) { JemallocNodumpAllocator::DestroyArena(arena_index); return Status::Incomplete("Original alloc conflict."); } // Set the custom hook. std::unique_ptr new_hooks(new extent_hooks_t(*hooks)); new_hooks->alloc = &JemallocNodumpAllocator::Alloc; extent_hooks_t* hooks_ptr = new_hooks.get(); ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr)); if (ret != 0) { JemallocNodumpAllocator::DestroyArena(arena_index); return Status::Incomplete("Failed to set custom hook, error code: " + ToString(ret)); } // Create cache allocator. memory_allocator->reset( new JemallocNodumpAllocator(options, std::move(new_hooks), arena_index)); return Status::OK(); #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/jemalloc_nodump_allocator.h000066400000000000000000000052341370372246700221420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "port/jemalloc_helper.h" #include "port/port.h" #include "rocksdb/memory_allocator.h" #include "util/thread_local.h" #if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX) #include #if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP) #define ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR namespace ROCKSDB_NAMESPACE { class JemallocNodumpAllocator : public MemoryAllocator { public: JemallocNodumpAllocator(JemallocAllocatorOptions& options, std::unique_ptr&& arena_hooks, unsigned arena_index); ~JemallocNodumpAllocator(); const char* Name() const override { return "JemallocNodumpAllocator"; } void* Allocate(size_t size) override; void Deallocate(void* p) override; size_t UsableSize(void* p, size_t allocation_size) const override; private: friend Status NewJemallocNodumpAllocator( JemallocAllocatorOptions& options, std::shared_ptr* memory_allocator); // Custom alloc hook to replace jemalloc default alloc. static void* Alloc(extent_hooks_t* extent, void* new_addr, size_t size, size_t alignment, bool* zero, bool* commit, unsigned arena_ind); // Destroy arena on destruction of the allocator, or on failure. static Status DestroyArena(unsigned arena_index); // Destroy tcache on destruction of the allocator, or thread exit. static void DestroyThreadSpecificCache(void* ptr); // Get or create tcache. Return flag suitable to use with `mallocx`: // either MALLOCX_TCACHE_NONE or MALLOCX_TCACHE(tc). int GetThreadSpecificCache(size_t size); // A function pointer to jemalloc default alloc. Use atomic to make sure // NewJemallocNodumpAllocator is thread-safe. // // Hack: original_alloc_ needs to be static for Alloc() to access it. // alloc needs to be static to pass to jemalloc as function pointer. static std::atomic original_alloc_; const JemallocAllocatorOptions options_; // Custom hooks has to outlive corresponding arena. const std::unique_ptr arena_hooks_; // Arena index. const unsigned arena_index_; // Hold thread-local tcache index. ThreadLocalPtr tcache_; }; } // namespace ROCKSDB_NAMESPACE #endif // (JEMALLOC_VERSION_MAJOR >= 5) && MADV_DONTDUMP #endif // ROCKSDB_JEMALLOC && ROCKSDB_PLATFORM_POSIX rocksdb-6.11.4/memory/memkind_kmem_allocator.cc000066400000000000000000000016521370372246700215650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // Copyright (c) 2019 Intel Corporation // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifdef MEMKIND #include "memkind_kmem_allocator.h" namespace rocksdb { void* MemkindKmemAllocator::Allocate(size_t size) { void* p = memkind_malloc(MEMKIND_DAX_KMEM, size); if (p == NULL) { throw std::bad_alloc(); } return p; } void MemkindKmemAllocator::Deallocate(void* p) { memkind_free(MEMKIND_DAX_KMEM, p); } #ifdef ROCKSDB_MALLOC_USABLE_SIZE size_t MemkindKmemAllocator::UsableSize(void* p, size_t /*allocation_size*/) const { return memkind_malloc_usable_size(MEMKIND_DAX_KMEM, p); } #endif // ROCKSDB_MALLOC_USABLE_SIZE } // namespace rocksdb #endif // MEMKIND rocksdb-6.11.4/memory/memkind_kmem_allocator.h000066400000000000000000000014241370372246700214240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // Copyright (c) 2019 Intel Corporation // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifdef MEMKIND #include #include "rocksdb/memory_allocator.h" namespace rocksdb { class MemkindKmemAllocator : public MemoryAllocator { public: const char* Name() const override { return "MemkindKmemAllocator"; }; void* Allocate(size_t size) override; void Deallocate(void* p) override; #ifdef ROCKSDB_MALLOC_USABLE_SIZE size_t UsableSize(void* p, size_t /*allocation_size*/) const override; #endif }; } // namespace rocksdb #endif // MEMKIND rocksdb-6.11.4/memory/memkind_kmem_allocator_test.cc000066400000000000000000000055471370372246700226330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // Copyright (c) 2019 Intel Corporation // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #ifdef MEMKIND #include "memkind_kmem_allocator.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/options.h" #include "table/block_based/block_based_table_factory.h" #include "test_util/testharness.h" namespace rocksdb { TEST(MemkindKmemAllocatorTest, Allocate) { MemkindKmemAllocator allocator; void* p; try { p = allocator.Allocate(1024); } catch (const std::bad_alloc& e) { return; } ASSERT_NE(p, nullptr); size_t size = allocator.UsableSize(p, 1024); ASSERT_GE(size, 1024); allocator.Deallocate(p); } TEST(MemkindKmemAllocatorTest, DatabaseBlockCache) { // Check if a memory node is available for allocation try { MemkindKmemAllocator allocator; allocator.Allocate(1024); } catch (const std::bad_alloc& e) { return; // if no node available, skip the test } // Create database with block cache using MemkindKmemAllocator Options options; std::string dbname = test::PerThreadDBPath("memkind_kmem_allocator_test"); ASSERT_OK(DestroyDB(dbname, options)); options.create_if_missing = true; std::shared_ptr cache = NewLRUCache( 1024 * 1024, 6, false, false, std::make_shared()); BlockBasedTableOptions table_options; table_options.block_cache = cache; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DB* db = nullptr; Status s = DB::Open(options, dbname, &db); ASSERT_OK(s); ASSERT_NE(db, nullptr); ASSERT_EQ(cache->GetUsage(), 0); // Write 2kB (200 values, each 10 bytes) int num_keys = 200; WriteOptions wo; std::string val = "0123456789"; for (int i = 0; i < num_keys; i++) { std::string key = std::to_string(i); s = db->Put(wo, Slice(key), Slice(val)); ASSERT_OK(s); } ASSERT_OK(db->Flush(FlushOptions())); // Flush all data from memtable so that // reads are from block cache // Read and check block cache usage ReadOptions ro; std::string result; for (int i = 0; i < num_keys; i++) { std::string key = std::to_string(i); s = db->Get(ro, key, &result); ASSERT_OK(s); ASSERT_EQ(result, val); } ASSERT_GT(cache->GetUsage(), 2000); // Close database s = db->Close(); ASSERT_OK(s); ASSERT_OK(DestroyDB(dbname, options)); } } // namespace rocksdb int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else int main(int /*argc*/, char** /*argv*/) { printf( "Skip memkind_kmem_allocator_test as the required library memkind is " "missing."); } #endif // MEMKIND rocksdb-6.11.4/memory/memory_allocator.h000066400000000000000000000020271370372246700202770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "rocksdb/memory_allocator.h" namespace ROCKSDB_NAMESPACE { struct CustomDeleter { CustomDeleter(MemoryAllocator* a = nullptr) : allocator(a) {} void operator()(char* ptr) const { if (allocator) { allocator->Deallocate(reinterpret_cast(ptr)); } else { delete[] ptr; } } MemoryAllocator* allocator; }; using CacheAllocationPtr = std::unique_ptr; inline CacheAllocationPtr AllocateBlock(size_t size, MemoryAllocator* allocator) { if (allocator) { auto block = reinterpret_cast(allocator->Allocate(size)); return CacheAllocationPtr(block, allocator); } return CacheAllocationPtr(new char[size]); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memory/memory_usage.h000066400000000000000000000015401370372246700174220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include namespace ROCKSDB_NAMESPACE { // Helper methods to estimate memroy usage by std containers. template size_t ApproximateMemoryUsage( const std::unordered_map& umap) { typedef std::unordered_map Map; return sizeof(umap) + // Size of all items plus a next pointer for each item. (sizeof(typename Map::value_type) + sizeof(void*)) * umap.size() + // Size of hash buckets. umap.bucket_count() * sizeof(void*); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memtable/000077500000000000000000000000001370372246700150335ustar00rootroot00000000000000rocksdb-6.11.4/memtable/alloc_tracker.cc000066400000000000000000000040401370372246700201450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include "memory/allocator.h" #include "memory/arena.h" #include "rocksdb/write_buffer_manager.h" namespace ROCKSDB_NAMESPACE { AllocTracker::AllocTracker(WriteBufferManager* write_buffer_manager) : write_buffer_manager_(write_buffer_manager), bytes_allocated_(0), done_allocating_(false), freed_(false) {} AllocTracker::~AllocTracker() { FreeMem(); } void AllocTracker::Allocate(size_t bytes) { assert(write_buffer_manager_ != nullptr); if (write_buffer_manager_->enabled() || write_buffer_manager_->cost_to_cache()) { bytes_allocated_.fetch_add(bytes, std::memory_order_relaxed); write_buffer_manager_->ReserveMem(bytes); } } void AllocTracker::DoneAllocating() { if (write_buffer_manager_ != nullptr && !done_allocating_) { if (write_buffer_manager_->enabled() || write_buffer_manager_->cost_to_cache()) { write_buffer_manager_->ScheduleFreeMem( bytes_allocated_.load(std::memory_order_relaxed)); } else { assert(bytes_allocated_.load(std::memory_order_relaxed) == 0); } done_allocating_ = true; } } void AllocTracker::FreeMem() { if (!done_allocating_) { DoneAllocating(); } if (write_buffer_manager_ != nullptr && !freed_) { if (write_buffer_manager_->enabled() || write_buffer_manager_->cost_to_cache()) { write_buffer_manager_->FreeMem( bytes_allocated_.load(std::memory_order_relaxed)); } else { assert(bytes_allocated_.load(std::memory_order_relaxed) == 0); } freed_ = true; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memtable/hash_linklist_rep.cc000066400000000000000000000714721370372246700210570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include "memtable/hash_linklist_rep.h" #include #include #include "db/memtable.h" #include "memory/arena.h" #include "memtable/skiplist.h" #include "monitoring/histogram.h" #include "port/port.h" #include "rocksdb/memtablerep.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { namespace { typedef const char* Key; typedef SkipList MemtableSkipList; typedef std::atomic Pointer; // A data structure used as the header of a link list of a hash bucket. struct BucketHeader { Pointer next; std::atomic num_entries; explicit BucketHeader(void* n, uint32_t count) : next(n), num_entries(count) {} bool IsSkipListBucket() { return next.load(std::memory_order_relaxed) == this; } uint32_t GetNumEntries() const { return num_entries.load(std::memory_order_relaxed); } // REQUIRES: called from single-threaded Insert() void IncNumEntries() { // Only one thread can do write at one time. No need to do atomic // incremental. Update it with relaxed load and store. num_entries.store(GetNumEntries() + 1, std::memory_order_relaxed); } }; // A data structure used as the header of a skip list of a hash bucket. struct SkipListBucketHeader { BucketHeader Counting_header; MemtableSkipList skip_list; explicit SkipListBucketHeader(const MemTableRep::KeyComparator& cmp, Allocator* allocator, uint32_t count) : Counting_header(this, // Pointing to itself to indicate header type. count), skip_list(cmp, allocator) {} }; struct Node { // Accessors/mutators for links. Wrapped in methods so we can // add the appropriate barriers as necessary. Node* Next() { // Use an 'acquire load' so that we observe a fully initialized // version of the returned Node. return next_.load(std::memory_order_acquire); } void SetNext(Node* x) { // Use a 'release store' so that anybody who reads through this // pointer observes a fully initialized version of the inserted node. next_.store(x, std::memory_order_release); } // No-barrier variants that can be safely used in a few locations. Node* NoBarrier_Next() { return next_.load(std::memory_order_relaxed); } void NoBarrier_SetNext(Node* x) { next_.store(x, std::memory_order_relaxed); } // Needed for placement new below which is fine Node() {} private: std::atomic next_; // Prohibit copying due to the below Node(const Node&) = delete; Node& operator=(const Node&) = delete; public: char key[1]; }; // Memory structure of the mem table: // It is a hash table, each bucket points to one entry, a linked list or a // skip list. In order to track total number of records in a bucket to determine // whether should switch to skip list, a header is added just to indicate // number of entries in the bucket. // // // +-----> NULL Case 1. Empty bucket // | // | // | +---> +-------+ // | | | Next +--> NULL // | | +-------+ // +-----+ | | | | Case 2. One Entry in bucket. // | +-+ | | Data | next pointer points to // +-----+ | | | NULL. All other cases // | | | | | next pointer is not NULL. // +-----+ | +-------+ // | +---+ // +-----+ +-> +-------+ +> +-------+ +-> +-------+ // | | | | Next +--+ | Next +--+ | Next +-->NULL // +-----+ | +-------+ +-------+ +-------+ // | +-----+ | Count | | | | | // +-----+ +-------+ | Data | | Data | // | | | | | | // +-----+ Case 3. | | | | // | | A header +-------+ +-------+ // +-----+ points to // | | a linked list. Count indicates total number // +-----+ of rows in this bucket. // | | // +-----+ +-> +-------+ <--+ // | | | | Next +----+ // +-----+ | +-------+ Case 4. A header points to a skip // | +----+ | Count | list and next pointer points to // +-----+ +-------+ itself, to distinguish case 3 or 4. // | | | | Count still is kept to indicates total // +-----+ | Skip +--> of entries in the bucket for debugging // | | | List | Data purpose. // | | | +--> // +-----+ | | // | | +-------+ // +-----+ // // We don't have data race when changing cases because: // (1) When changing from case 2->3, we create a new bucket header, put the // single node there first without changing the original node, and do a // release store when changing the bucket pointer. In that case, a reader // who sees a stale value of the bucket pointer will read this node, while // a reader sees the correct value because of the release store. // (2) When changing case 3->4, a new header is created with skip list points // to the data, before doing an acquire store to change the bucket pointer. // The old header and nodes are never changed, so any reader sees any // of those existing pointers will guarantee to be able to iterate to the // end of the linked list. // (3) Header's next pointer in case 3 might change, but they are never equal // to itself, so no matter a reader sees any stale or newer value, it will // be able to correctly distinguish case 3 and 4. // // The reason that we use case 2 is we want to make the format to be efficient // when the utilization of buckets is relatively low. If we use case 3 for // single entry bucket, we will need to waste 12 bytes for every entry, // which can be significant decrease of memory utilization. class HashLinkListRep : public MemTableRep { public: HashLinkListRep(const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, size_t bucket_size, uint32_t threshold_use_skiplist, size_t huge_page_tlb_size, Logger* logger, int bucket_entries_logging_threshold, bool if_log_bucket_dist_when_flash); KeyHandle Allocate(const size_t len, char** buf) override; void Insert(KeyHandle handle) override; bool Contains(const char* key) const override; size_t ApproximateMemoryUsage() override; void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override; ~HashLinkListRep() override; MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override; MemTableRep::Iterator* GetDynamicPrefixIterator( Arena* arena = nullptr) override; private: friend class DynamicIterator; size_t bucket_size_; // Maps slices (which are transformed user keys) to buckets of keys sharing // the same transform. Pointer* buckets_; const uint32_t threshold_use_skiplist_; // The user-supplied transform whose domain is the user keys. const SliceTransform* transform_; const MemTableRep::KeyComparator& compare_; Logger* logger_; int bucket_entries_logging_threshold_; bool if_log_bucket_dist_when_flash_; bool LinkListContains(Node* head, const Slice& key) const; SkipListBucketHeader* GetSkipListBucketHeader(Pointer* first_next_pointer) const; Node* GetLinkListFirstNode(Pointer* first_next_pointer) const; Slice GetPrefix(const Slice& internal_key) const { return transform_->Transform(ExtractUserKey(internal_key)); } size_t GetHash(const Slice& slice) const { return fastrange64(GetSliceNPHash64(slice), bucket_size_); } Pointer* GetBucket(size_t i) const { return static_cast(buckets_[i].load(std::memory_order_acquire)); } Pointer* GetBucket(const Slice& slice) const { return GetBucket(GetHash(slice)); } bool Equal(const Slice& a, const Key& b) const { return (compare_(b, a) == 0); } bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } bool KeyIsAfterNode(const Slice& internal_key, const Node* n) const { // nullptr n is considered infinite return (n != nullptr) && (compare_(n->key, internal_key) < 0); } bool KeyIsAfterNode(const Key& key, const Node* n) const { // nullptr n is considered infinite return (n != nullptr) && (compare_(n->key, key) < 0); } bool KeyIsAfterOrAtNode(const Slice& internal_key, const Node* n) const { // nullptr n is considered infinite return (n != nullptr) && (compare_(n->key, internal_key) <= 0); } bool KeyIsAfterOrAtNode(const Key& key, const Node* n) const { // nullptr n is considered infinite return (n != nullptr) && (compare_(n->key, key) <= 0); } Node* FindGreaterOrEqualInBucket(Node* head, const Slice& key) const; Node* FindLessOrEqualInBucket(Node* head, const Slice& key) const; class FullListIterator : public MemTableRep::Iterator { public: explicit FullListIterator(MemtableSkipList* list, Allocator* allocator) : iter_(list), full_list_(list), allocator_(allocator) {} ~FullListIterator() override {} // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return iter_.Valid(); } // Returns the key at the current position. // REQUIRES: Valid() const char* key() const override { assert(Valid()); return iter_.key(); } // Advances to the next position. // REQUIRES: Valid() void Next() override { assert(Valid()); iter_.Next(); } // Advances to the previous position. // REQUIRES: Valid() void Prev() override { assert(Valid()); iter_.Prev(); } // Advance to the first entry with a key >= target void Seek(const Slice& internal_key, const char* memtable_key) override { const char* encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, internal_key); iter_.Seek(encoded_key); } // Retreat to the last entry with a key <= target void SeekForPrev(const Slice& internal_key, const char* memtable_key) override { const char* encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, internal_key); iter_.SeekForPrev(encoded_key); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToFirst() override { iter_.SeekToFirst(); } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToLast() override { iter_.SeekToLast(); } private: MemtableSkipList::Iterator iter_; // To destruct with the iterator. std::unique_ptr full_list_; std::unique_ptr allocator_; std::string tmp_; // For passing to EncodeKey }; class LinkListIterator : public MemTableRep::Iterator { public: explicit LinkListIterator(const HashLinkListRep* const hash_link_list_rep, Node* head) : hash_link_list_rep_(hash_link_list_rep), head_(head), node_(nullptr) {} ~LinkListIterator() override {} // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return node_ != nullptr; } // Returns the key at the current position. // REQUIRES: Valid() const char* key() const override { assert(Valid()); return node_->key; } // Advances to the next position. // REQUIRES: Valid() void Next() override { assert(Valid()); node_ = node_->Next(); } // Advances to the previous position. // REQUIRES: Valid() void Prev() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } // Advance to the first entry with a key >= target void Seek(const Slice& internal_key, const char* /*memtable_key*/) override { node_ = hash_link_list_rep_->FindGreaterOrEqualInBucket(head_, internal_key); } // Retreat to the last entry with a key <= target void SeekForPrev(const Slice& /*internal_key*/, const char* /*memtable_key*/) override { // Since we do not support Prev() // We simply do not support SeekForPrev Reset(nullptr); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToFirst() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToLast() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } protected: void Reset(Node* head) { head_ = head; node_ = nullptr; } private: friend class HashLinkListRep; const HashLinkListRep* const hash_link_list_rep_; Node* head_; Node* node_; virtual void SeekToHead() { node_ = head_; } }; class DynamicIterator : public HashLinkListRep::LinkListIterator { public: explicit DynamicIterator(HashLinkListRep& memtable_rep) : HashLinkListRep::LinkListIterator(&memtable_rep, nullptr), memtable_rep_(memtable_rep) {} // Advance to the first entry with a key >= target void Seek(const Slice& k, const char* memtable_key) override { auto transformed = memtable_rep_.GetPrefix(k); auto* bucket = memtable_rep_.GetBucket(transformed); SkipListBucketHeader* skip_list_header = memtable_rep_.GetSkipListBucketHeader(bucket); if (skip_list_header != nullptr) { // The bucket is organized as a skip list if (!skip_list_iter_) { skip_list_iter_.reset( new MemtableSkipList::Iterator(&skip_list_header->skip_list)); } else { skip_list_iter_->SetList(&skip_list_header->skip_list); } if (memtable_key != nullptr) { skip_list_iter_->Seek(memtable_key); } else { IterKey encoded_key; encoded_key.EncodeLengthPrefixedKey(k); skip_list_iter_->Seek(encoded_key.GetUserKey().data()); } } else { // The bucket is organized as a linked list skip_list_iter_.reset(); Reset(memtable_rep_.GetLinkListFirstNode(bucket)); HashLinkListRep::LinkListIterator::Seek(k, memtable_key); } } bool Valid() const override { if (skip_list_iter_) { return skip_list_iter_->Valid(); } return HashLinkListRep::LinkListIterator::Valid(); } const char* key() const override { if (skip_list_iter_) { return skip_list_iter_->key(); } return HashLinkListRep::LinkListIterator::key(); } void Next() override { if (skip_list_iter_) { skip_list_iter_->Next(); } else { HashLinkListRep::LinkListIterator::Next(); } } private: // the underlying memtable const HashLinkListRep& memtable_rep_; std::unique_ptr skip_list_iter_; }; class EmptyIterator : public MemTableRep::Iterator { // This is used when there wasn't a bucket. It is cheaper than // instantiating an empty bucket over which to iterate. public: EmptyIterator() { } bool Valid() const override { return false; } const char* key() const override { assert(false); return nullptr; } void Next() override {} void Prev() override {} void Seek(const Slice& /*user_key*/, const char* /*memtable_key*/) override {} void SeekForPrev(const Slice& /*user_key*/, const char* /*memtable_key*/) override {} void SeekToFirst() override {} void SeekToLast() override {} private: }; }; HashLinkListRep::HashLinkListRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, size_t bucket_size, uint32_t threshold_use_skiplist, size_t huge_page_tlb_size, Logger* logger, int bucket_entries_logging_threshold, bool if_log_bucket_dist_when_flash) : MemTableRep(allocator), bucket_size_(bucket_size), // Threshold to use skip list doesn't make sense if less than 3, so we // force it to be minimum of 3 to simplify implementation. threshold_use_skiplist_(std::max(threshold_use_skiplist, 3U)), transform_(transform), compare_(compare), logger_(logger), bucket_entries_logging_threshold_(bucket_entries_logging_threshold), if_log_bucket_dist_when_flash_(if_log_bucket_dist_when_flash) { char* mem = allocator_->AllocateAligned(sizeof(Pointer) * bucket_size, huge_page_tlb_size, logger); buckets_ = new (mem) Pointer[bucket_size]; for (size_t i = 0; i < bucket_size_; ++i) { buckets_[i].store(nullptr, std::memory_order_relaxed); } } HashLinkListRep::~HashLinkListRep() { } KeyHandle HashLinkListRep::Allocate(const size_t len, char** buf) { char* mem = allocator_->AllocateAligned(sizeof(Node) + len); Node* x = new (mem) Node(); *buf = x->key; return static_cast(x); } SkipListBucketHeader* HashLinkListRep::GetSkipListBucketHeader( Pointer* first_next_pointer) const { if (first_next_pointer == nullptr) { return nullptr; } if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) { // Single entry bucket return nullptr; } // Counting header BucketHeader* header = reinterpret_cast(first_next_pointer); if (header->IsSkipListBucket()) { assert(header->GetNumEntries() > threshold_use_skiplist_); auto* skip_list_bucket_header = reinterpret_cast(header); assert(skip_list_bucket_header->Counting_header.next.load( std::memory_order_relaxed) == header); return skip_list_bucket_header; } assert(header->GetNumEntries() <= threshold_use_skiplist_); return nullptr; } Node* HashLinkListRep::GetLinkListFirstNode(Pointer* first_next_pointer) const { if (first_next_pointer == nullptr) { return nullptr; } if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) { // Single entry bucket return reinterpret_cast(first_next_pointer); } // Counting header BucketHeader* header = reinterpret_cast(first_next_pointer); if (!header->IsSkipListBucket()) { assert(header->GetNumEntries() <= threshold_use_skiplist_); return reinterpret_cast( header->next.load(std::memory_order_acquire)); } assert(header->GetNumEntries() > threshold_use_skiplist_); return nullptr; } void HashLinkListRep::Insert(KeyHandle handle) { Node* x = static_cast(handle); assert(!Contains(x->key)); Slice internal_key = GetLengthPrefixedSlice(x->key); auto transformed = GetPrefix(internal_key); auto& bucket = buckets_[GetHash(transformed)]; Pointer* first_next_pointer = static_cast(bucket.load(std::memory_order_relaxed)); if (first_next_pointer == nullptr) { // Case 1. empty bucket // NoBarrier_SetNext() suffices since we will add a barrier when // we publish a pointer to "x" in prev[i]. x->NoBarrier_SetNext(nullptr); bucket.store(x, std::memory_order_release); return; } BucketHeader* header = nullptr; if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) { // Case 2. only one entry in the bucket // Need to convert to a Counting bucket and turn to case 4. Node* first = reinterpret_cast(first_next_pointer); // Need to add a bucket header. // We have to first convert it to a bucket with header before inserting // the new node. Otherwise, we might need to change next pointer of first. // In that case, a reader might sees the next pointer is NULL and wrongly // think the node is a bucket header. auto* mem = allocator_->AllocateAligned(sizeof(BucketHeader)); header = new (mem) BucketHeader(first, 1); bucket.store(header, std::memory_order_release); } else { header = reinterpret_cast(first_next_pointer); if (header->IsSkipListBucket()) { // Case 4. Bucket is already a skip list assert(header->GetNumEntries() > threshold_use_skiplist_); auto* skip_list_bucket_header = reinterpret_cast(header); // Only one thread can execute Insert() at one time. No need to do atomic // incremental. skip_list_bucket_header->Counting_header.IncNumEntries(); skip_list_bucket_header->skip_list.Insert(x->key); return; } } if (bucket_entries_logging_threshold_ > 0 && header->GetNumEntries() == static_cast(bucket_entries_logging_threshold_)) { Info(logger_, "HashLinkedList bucket %" ROCKSDB_PRIszt " has more than %d " "entries. Key to insert: %s", GetHash(transformed), header->GetNumEntries(), GetLengthPrefixedSlice(x->key).ToString(true).c_str()); } if (header->GetNumEntries() == threshold_use_skiplist_) { // Case 3. number of entries reaches the threshold so need to convert to // skip list. LinkListIterator bucket_iter( this, reinterpret_cast( first_next_pointer->load(std::memory_order_relaxed))); auto mem = allocator_->AllocateAligned(sizeof(SkipListBucketHeader)); SkipListBucketHeader* new_skip_list_header = new (mem) SkipListBucketHeader(compare_, allocator_, header->GetNumEntries() + 1); auto& skip_list = new_skip_list_header->skip_list; // Add all current entries to the skip list for (bucket_iter.SeekToHead(); bucket_iter.Valid(); bucket_iter.Next()) { skip_list.Insert(bucket_iter.key()); } // insert the new entry skip_list.Insert(x->key); // Set the bucket bucket.store(new_skip_list_header, std::memory_order_release); } else { // Case 5. Need to insert to the sorted linked list without changing the // header. Node* first = reinterpret_cast(header->next.load(std::memory_order_relaxed)); assert(first != nullptr); // Advance counter unless the bucket needs to be advanced to skip list. // In that case, we need to make sure the previous count never exceeds // threshold_use_skiplist_ to avoid readers to cast to wrong format. header->IncNumEntries(); Node* cur = first; Node* prev = nullptr; while (true) { if (cur == nullptr) { break; } Node* next = cur->Next(); // Make sure the lists are sorted. // If x points to head_ or next points nullptr, it is trivially satisfied. assert((cur == first) || (next == nullptr) || KeyIsAfterNode(next->key, cur)); if (KeyIsAfterNode(internal_key, cur)) { // Keep searching in this list prev = cur; cur = next; } else { break; } } // Our data structure does not allow duplicate insertion assert(cur == nullptr || !Equal(x->key, cur->key)); // NoBarrier_SetNext() suffices since we will add a barrier when // we publish a pointer to "x" in prev[i]. x->NoBarrier_SetNext(cur); if (prev) { prev->SetNext(x); } else { header->next.store(static_cast(x), std::memory_order_release); } } } bool HashLinkListRep::Contains(const char* key) const { Slice internal_key = GetLengthPrefixedSlice(key); auto transformed = GetPrefix(internal_key); auto bucket = GetBucket(transformed); if (bucket == nullptr) { return false; } SkipListBucketHeader* skip_list_header = GetSkipListBucketHeader(bucket); if (skip_list_header != nullptr) { return skip_list_header->skip_list.Contains(key); } else { return LinkListContains(GetLinkListFirstNode(bucket), internal_key); } } size_t HashLinkListRep::ApproximateMemoryUsage() { // Memory is always allocated from the allocator. return 0; } void HashLinkListRep::Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) { auto transformed = transform_->Transform(k.user_key()); auto bucket = GetBucket(transformed); auto* skip_list_header = GetSkipListBucketHeader(bucket); if (skip_list_header != nullptr) { // Is a skip list MemtableSkipList::Iterator iter(&skip_list_header->skip_list); for (iter.Seek(k.memtable_key().data()); iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) { } } else { auto* link_list_head = GetLinkListFirstNode(bucket); if (link_list_head != nullptr) { LinkListIterator iter(this, link_list_head); for (iter.Seek(k.internal_key(), nullptr); iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) { } } } } MemTableRep::Iterator* HashLinkListRep::GetIterator(Arena* alloc_arena) { // allocate a new arena of similar size to the one currently in use Arena* new_arena = new Arena(allocator_->BlockSize()); auto list = new MemtableSkipList(compare_, new_arena); HistogramImpl keys_per_bucket_hist; for (size_t i = 0; i < bucket_size_; ++i) { int count = 0; auto* bucket = GetBucket(i); if (bucket != nullptr) { auto* skip_list_header = GetSkipListBucketHeader(bucket); if (skip_list_header != nullptr) { // Is a skip list MemtableSkipList::Iterator itr(&skip_list_header->skip_list); for (itr.SeekToFirst(); itr.Valid(); itr.Next()) { list->Insert(itr.key()); count++; } } else { auto* link_list_head = GetLinkListFirstNode(bucket); if (link_list_head != nullptr) { LinkListIterator itr(this, link_list_head); for (itr.SeekToHead(); itr.Valid(); itr.Next()) { list->Insert(itr.key()); count++; } } } } if (if_log_bucket_dist_when_flash_) { keys_per_bucket_hist.Add(count); } } if (if_log_bucket_dist_when_flash_ && logger_ != nullptr) { Info(logger_, "hashLinkedList Entry distribution among buckets: %s", keys_per_bucket_hist.ToString().c_str()); } if (alloc_arena == nullptr) { return new FullListIterator(list, new_arena); } else { auto mem = alloc_arena->AllocateAligned(sizeof(FullListIterator)); return new (mem) FullListIterator(list, new_arena); } } MemTableRep::Iterator* HashLinkListRep::GetDynamicPrefixIterator( Arena* alloc_arena) { if (alloc_arena == nullptr) { return new DynamicIterator(*this); } else { auto mem = alloc_arena->AllocateAligned(sizeof(DynamicIterator)); return new (mem) DynamicIterator(*this); } } bool HashLinkListRep::LinkListContains(Node* head, const Slice& user_key) const { Node* x = FindGreaterOrEqualInBucket(head, user_key); return (x != nullptr && Equal(user_key, x->key)); } Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head, const Slice& key) const { Node* x = head; while (true) { if (x == nullptr) { return x; } Node* next = x->Next(); // Make sure the lists are sorted. // If x points to head_ or next points nullptr, it is trivially satisfied. assert((x == head) || (next == nullptr) || KeyIsAfterNode(next->key, x)); if (KeyIsAfterNode(key, x)) { // Keep searching in this list x = next; } else { break; } } return x; } } // anon namespace MemTableRep* HashLinkListRepFactory::CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* logger) { return new HashLinkListRep(compare, allocator, transform, bucket_count_, threshold_use_skiplist_, huge_page_tlb_size_, logger, bucket_entries_logging_threshold_, if_log_bucket_dist_when_flash_); } MemTableRepFactory* NewHashLinkListRepFactory( size_t bucket_count, size_t huge_page_tlb_size, int bucket_entries_logging_threshold, bool if_log_bucket_dist_when_flash, uint32_t threshold_use_skiplist) { return new HashLinkListRepFactory( bucket_count, threshold_use_skiplist, huge_page_tlb_size, bucket_entries_logging_threshold, if_log_bucket_dist_when_flash); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/memtable/hash_linklist_rep.h000066400000000000000000000035731370372246700207160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include "rocksdb/slice_transform.h" #include "rocksdb/memtablerep.h" namespace ROCKSDB_NAMESPACE { class HashLinkListRepFactory : public MemTableRepFactory { public: explicit HashLinkListRepFactory(size_t bucket_count, uint32_t threshold_use_skiplist, size_t huge_page_tlb_size, int bucket_entries_logging_threshold, bool if_log_bucket_dist_when_flash) : bucket_count_(bucket_count), threshold_use_skiplist_(threshold_use_skiplist), huge_page_tlb_size_(huge_page_tlb_size), bucket_entries_logging_threshold_(bucket_entries_logging_threshold), if_log_bucket_dist_when_flash_(if_log_bucket_dist_when_flash) {} virtual ~HashLinkListRepFactory() {} using MemTableRepFactory::CreateMemTableRep; virtual MemTableRep* CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* logger) override; virtual const char* Name() const override { return "HashLinkListRepFactory"; } private: const size_t bucket_count_; const uint32_t threshold_use_skiplist_; const size_t huge_page_tlb_size_; int bucket_entries_logging_threshold_; bool if_log_bucket_dist_when_flash_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/memtable/hash_skiplist_rep.cc000066400000000000000000000255711370372246700210670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include "memtable/hash_skiplist_rep.h" #include #include "db/memtable.h" #include "memory/arena.h" #include "memtable/skiplist.h" #include "port/port.h" #include "rocksdb/memtablerep.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "util/murmurhash.h" namespace ROCKSDB_NAMESPACE { namespace { class HashSkipListRep : public MemTableRep { public: HashSkipListRep(const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, size_t bucket_size, int32_t skiplist_height, int32_t skiplist_branching_factor); void Insert(KeyHandle handle) override; bool Contains(const char* key) const override; size_t ApproximateMemoryUsage() override; void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override; ~HashSkipListRep() override; MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override; MemTableRep::Iterator* GetDynamicPrefixIterator( Arena* arena = nullptr) override; private: friend class DynamicIterator; typedef SkipList Bucket; size_t bucket_size_; const int32_t skiplist_height_; const int32_t skiplist_branching_factor_; // Maps slices (which are transformed user keys) to buckets of keys sharing // the same transform. std::atomic* buckets_; // The user-supplied transform whose domain is the user keys. const SliceTransform* transform_; const MemTableRep::KeyComparator& compare_; // immutable after construction Allocator* const allocator_; inline size_t GetHash(const Slice& slice) const { return MurmurHash(slice.data(), static_cast(slice.size()), 0) % bucket_size_; } inline Bucket* GetBucket(size_t i) const { return buckets_[i].load(std::memory_order_acquire); } inline Bucket* GetBucket(const Slice& slice) const { return GetBucket(GetHash(slice)); } // Get a bucket from buckets_. If the bucket hasn't been initialized yet, // initialize it before returning. Bucket* GetInitializedBucket(const Slice& transformed); class Iterator : public MemTableRep::Iterator { public: explicit Iterator(Bucket* list, bool own_list = true, Arena* arena = nullptr) : list_(list), iter_(list), own_list_(own_list), arena_(arena) {} ~Iterator() override { // if we own the list, we should also delete it if (own_list_) { assert(list_ != nullptr); delete list_; } } // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return list_ != nullptr && iter_.Valid(); } // Returns the key at the current position. // REQUIRES: Valid() const char* key() const override { assert(Valid()); return iter_.key(); } // Advances to the next position. // REQUIRES: Valid() void Next() override { assert(Valid()); iter_.Next(); } // Advances to the previous position. // REQUIRES: Valid() void Prev() override { assert(Valid()); iter_.Prev(); } // Advance to the first entry with a key >= target void Seek(const Slice& internal_key, const char* memtable_key) override { if (list_ != nullptr) { const char* encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, internal_key); iter_.Seek(encoded_key); } } // Retreat to the last entry with a key <= target void SeekForPrev(const Slice& /*internal_key*/, const char* /*memtable_key*/) override { // not supported assert(false); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToFirst() override { if (list_ != nullptr) { iter_.SeekToFirst(); } } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToLast() override { if (list_ != nullptr) { iter_.SeekToLast(); } } protected: void Reset(Bucket* list) { if (own_list_) { assert(list_ != nullptr); delete list_; } list_ = list; iter_.SetList(list); own_list_ = false; } private: // if list_ is nullptr, we should NEVER call any methods on iter_ // if list_ is nullptr, this Iterator is not Valid() Bucket* list_; Bucket::Iterator iter_; // here we track if we own list_. If we own it, we are also // responsible for it's cleaning. This is a poor man's std::shared_ptr bool own_list_; std::unique_ptr arena_; std::string tmp_; // For passing to EncodeKey }; class DynamicIterator : public HashSkipListRep::Iterator { public: explicit DynamicIterator(const HashSkipListRep& memtable_rep) : HashSkipListRep::Iterator(nullptr, false), memtable_rep_(memtable_rep) {} // Advance to the first entry with a key >= target void Seek(const Slice& k, const char* memtable_key) override { auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k)); Reset(memtable_rep_.GetBucket(transformed)); HashSkipListRep::Iterator::Seek(k, memtable_key); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToFirst() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToLast() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } private: // the underlying memtable const HashSkipListRep& memtable_rep_; }; class EmptyIterator : public MemTableRep::Iterator { // This is used when there wasn't a bucket. It is cheaper than // instantiating an empty bucket over which to iterate. public: EmptyIterator() { } bool Valid() const override { return false; } const char* key() const override { assert(false); return nullptr; } void Next() override {} void Prev() override {} void Seek(const Slice& /*internal_key*/, const char* /*memtable_key*/) override {} void SeekForPrev(const Slice& /*internal_key*/, const char* /*memtable_key*/) override {} void SeekToFirst() override {} void SeekToLast() override {} private: }; }; HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, size_t bucket_size, int32_t skiplist_height, int32_t skiplist_branching_factor) : MemTableRep(allocator), bucket_size_(bucket_size), skiplist_height_(skiplist_height), skiplist_branching_factor_(skiplist_branching_factor), transform_(transform), compare_(compare), allocator_(allocator) { auto mem = allocator->AllocateAligned( sizeof(std::atomic) * bucket_size); buckets_ = new (mem) std::atomic[bucket_size]; for (size_t i = 0; i < bucket_size_; ++i) { buckets_[i].store(nullptr, std::memory_order_relaxed); } } HashSkipListRep::~HashSkipListRep() { } HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket( const Slice& transformed) { size_t hash = GetHash(transformed); auto bucket = GetBucket(hash); if (bucket == nullptr) { auto addr = allocator_->AllocateAligned(sizeof(Bucket)); bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_, skiplist_branching_factor_); buckets_[hash].store(bucket, std::memory_order_release); } return bucket; } void HashSkipListRep::Insert(KeyHandle handle) { auto* key = static_cast(handle); assert(!Contains(key)); auto transformed = transform_->Transform(UserKey(key)); auto bucket = GetInitializedBucket(transformed); bucket->Insert(key); } bool HashSkipListRep::Contains(const char* key) const { auto transformed = transform_->Transform(UserKey(key)); auto bucket = GetBucket(transformed); if (bucket == nullptr) { return false; } return bucket->Contains(key); } size_t HashSkipListRep::ApproximateMemoryUsage() { return 0; } void HashSkipListRep::Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) { auto transformed = transform_->Transform(k.user_key()); auto bucket = GetBucket(transformed); if (bucket != nullptr) { Bucket::Iterator iter(bucket); for (iter.Seek(k.memtable_key().data()); iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) { } } } MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) { // allocate a new arena of similar size to the one currently in use Arena* new_arena = new Arena(allocator_->BlockSize()); auto list = new Bucket(compare_, new_arena); for (size_t i = 0; i < bucket_size_; ++i) { auto bucket = GetBucket(i); if (bucket != nullptr) { Bucket::Iterator itr(bucket); for (itr.SeekToFirst(); itr.Valid(); itr.Next()) { list->Insert(itr.key()); } } } if (arena == nullptr) { return new Iterator(list, true, new_arena); } else { auto mem = arena->AllocateAligned(sizeof(Iterator)); return new (mem) Iterator(list, true, new_arena); } } MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) { if (arena == nullptr) { return new DynamicIterator(*this); } else { auto mem = arena->AllocateAligned(sizeof(DynamicIterator)); return new (mem) DynamicIterator(*this); } } } // anon namespace MemTableRep* HashSkipListRepFactory::CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* /*logger*/) { return new HashSkipListRep(compare, allocator, transform, bucket_count_, skiplist_height_, skiplist_branching_factor_); } MemTableRepFactory* NewHashSkipListRepFactory( size_t bucket_count, int32_t skiplist_height, int32_t skiplist_branching_factor) { return new HashSkipListRepFactory(bucket_count, skiplist_height, skiplist_branching_factor); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/memtable/hash_skiplist_rep.h000066400000000000000000000027231370372246700207230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include "rocksdb/slice_transform.h" #include "rocksdb/memtablerep.h" namespace ROCKSDB_NAMESPACE { class HashSkipListRepFactory : public MemTableRepFactory { public: explicit HashSkipListRepFactory( size_t bucket_count, int32_t skiplist_height, int32_t skiplist_branching_factor) : bucket_count_(bucket_count), skiplist_height_(skiplist_height), skiplist_branching_factor_(skiplist_branching_factor) { } virtual ~HashSkipListRepFactory() {} using MemTableRepFactory::CreateMemTableRep; virtual MemTableRep* CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* logger) override; virtual const char* Name() const override { return "HashSkipListRepFactory"; } private: const size_t bucket_count_; const int32_t skiplist_height_; const int32_t skiplist_branching_factor_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/memtable/inlineskiplist.h000066400000000000000000001063371370372246700202570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. Use of // this source code is governed by a BSD-style license that can be found // in the LICENSE file. See the AUTHORS file for names of contributors. // // InlineSkipList is derived from SkipList (skiplist.h), but it optimizes // the memory layout by requiring that the key storage be allocated through // the skip list instance. For the common case of SkipList this saves 1 pointer per skip list node and gives better cache // locality, at the expense of wasted padding from using AllocateAligned // instead of Allocate for the keys. The unused padding will be from // 0 to sizeof(void*)-1 bytes, and the space savings are sizeof(void*) // bytes, so despite the padding the space used is always less than // SkipList. // // Thread safety ------------- // // Writes via Insert require external synchronization, most likely a mutex. // InsertConcurrently can be safely called concurrently with reads and // with other concurrent inserts. Reads require a guarantee that the // InlineSkipList will not be destroyed while the read is in progress. // Apart from that, reads progress without any internal locking or // synchronization. // // Invariants: // // (1) Allocated nodes are never deleted until the InlineSkipList is // destroyed. This is trivially guaranteed by the code since we never // delete any skip list nodes. // // (2) The contents of a Node except for the next/prev pointers are // immutable after the Node has been linked into the InlineSkipList. // Only Insert() modifies the list, and it is careful to initialize a // node and use release-stores to publish the nodes in one or more lists. // // ... prev vs. next pointer ordering ... // #pragma once #include #include #include #include #include #include "memory/allocator.h" #include "port/likely.h" #include "port/port.h" #include "rocksdb/slice.h" #include "util/coding.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { template class InlineSkipList { private: struct Node; struct Splice; public: using DecodedKey = \ typename std::remove_reference::type::DecodedType; static const uint16_t kMaxPossibleHeight = 32; // Create a new InlineSkipList object that will use "cmp" for comparing // keys, and will allocate memory using "*allocator". Objects allocated // in the allocator must remain allocated for the lifetime of the // skiplist object. explicit InlineSkipList(Comparator cmp, Allocator* allocator, int32_t max_height = 12, int32_t branching_factor = 4); // No copying allowed InlineSkipList(const InlineSkipList&) = delete; InlineSkipList& operator=(const InlineSkipList&) = delete; // Allocates a key and a skip-list node, returning a pointer to the key // portion of the node. This method is thread-safe if the allocator // is thread-safe. char* AllocateKey(size_t key_size); // Allocate a splice using allocator. Splice* AllocateSplice(); // Allocate a splice on heap. Splice* AllocateSpliceOnHeap(); // Inserts a key allocated by AllocateKey, after the actual key value // has been filled in. // // REQUIRES: nothing that compares equal to key is currently in the list. // REQUIRES: no concurrent calls to any of inserts. bool Insert(const char* key); // Inserts a key allocated by AllocateKey with a hint of last insert // position in the skip-list. If hint points to nullptr, a new hint will be // populated, which can be used in subsequent calls. // // It can be used to optimize the workload where there are multiple groups // of keys, and each key is likely to insert to a location close to the last // inserted key in the same group. One example is sequential inserts. // // REQUIRES: nothing that compares equal to key is currently in the list. // REQUIRES: no concurrent calls to any of inserts. bool InsertWithHint(const char* key, void** hint); // Like InsertConcurrently, but with a hint // // REQUIRES: nothing that compares equal to key is currently in the list. // REQUIRES: no concurrent calls that use same hint bool InsertWithHintConcurrently(const char* key, void** hint); // Like Insert, but external synchronization is not required. bool InsertConcurrently(const char* key); // Inserts a node into the skip list. key must have been allocated by // AllocateKey and then filled in by the caller. If UseCAS is true, // then external synchronization is not required, otherwise this method // may not be called concurrently with any other insertions. // // Regardless of whether UseCAS is true, the splice must be owned // exclusively by the current thread. If allow_partial_splice_fix is // true, then the cost of insertion is amortized O(log D), where D is // the distance from the splice to the inserted key (measured as the // number of intervening nodes). Note that this bound is very good for // sequential insertions! If allow_partial_splice_fix is false then // the existing splice will be ignored unless the current key is being // inserted immediately after the splice. allow_partial_splice_fix == // false has worse running time for the non-sequential case O(log N), // but a better constant factor. template bool Insert(const char* key, Splice* splice, bool allow_partial_splice_fix); // Returns true iff an entry that compares equal to key is in the list. bool Contains(const char* key) const; // Return estimated number of entries smaller than `key`. uint64_t EstimateCount(const char* key) const; // Validate correctness of the skip-list. void TEST_Validate() const; // Iteration over the contents of a skip list class Iterator { public: // Initialize an iterator over the specified list. // The returned iterator is not valid. explicit Iterator(const InlineSkipList* list); // Change the underlying skiplist used for this iterator // This enables us not changing the iterator without deallocating // an old one and then allocating a new one void SetList(const InlineSkipList* list); // Returns true iff the iterator is positioned at a valid node. bool Valid() const; // Returns the key at the current position. // REQUIRES: Valid() const char* key() const; // Advances to the next position. // REQUIRES: Valid() void Next(); // Advances to the previous position. // REQUIRES: Valid() void Prev(); // Advance to the first entry with a key >= target void Seek(const char* target); // Retreat to the last entry with a key <= target void SeekForPrev(const char* target); // Position at the first entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToFirst(); // Position at the last entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToLast(); private: const InlineSkipList* list_; Node* node_; // Intentionally copyable }; private: const uint16_t kMaxHeight_; const uint16_t kBranching_; const uint32_t kScaledInverseBranching_; Allocator* const allocator_; // Allocator used for allocations of nodes // Immutable after construction Comparator const compare_; Node* const head_; // Modified only by Insert(). Read racily by readers, but stale // values are ok. std::atomic max_height_; // Height of the entire list // seq_splice_ is a Splice used for insertions in the non-concurrent // case. It caches the prev and next found during the most recent // non-concurrent insertion. Splice* seq_splice_; inline int GetMaxHeight() const { return max_height_.load(std::memory_order_relaxed); } int RandomHeight(); Node* AllocateNode(size_t key_size, int height); bool Equal(const char* a, const char* b) const { return (compare_(a, b) == 0); } bool LessThan(const char* a, const char* b) const { return (compare_(a, b) < 0); } // Return true if key is greater than the data stored in "n". Null n // is considered infinite. n should not be head_. bool KeyIsAfterNode(const char* key, Node* n) const; bool KeyIsAfterNode(const DecodedKey& key, Node* n) const; // Returns the earliest node with a key >= key. // Return nullptr if there is no such node. Node* FindGreaterOrEqual(const char* key) const; // Return the latest node with a key < key. // Return head_ if there is no such node. // Fills prev[level] with pointer to previous node at "level" for every // level in [0..max_height_-1], if prev is non-null. Node* FindLessThan(const char* key, Node** prev = nullptr) const; // Return the latest node with a key < key on bottom_level. Start searching // from root node on the level below top_level. // Fills prev[level] with pointer to previous node at "level" for every // level in [bottom_level..top_level-1], if prev is non-null. Node* FindLessThan(const char* key, Node** prev, Node* root, int top_level, int bottom_level) const; // Return the last node in the list. // Return head_ if list is empty. Node* FindLast() const; // Traverses a single level of the list, setting *out_prev to the last // node before the key and *out_next to the first node after. Assumes // that the key is not present in the skip list. On entry, before should // point to a node that is before the key, and after should point to // a node that is after the key. after should be nullptr if a good after // node isn't conveniently available. template void FindSpliceForLevel(const DecodedKey& key, Node* before, Node* after, int level, Node** out_prev, Node** out_next); // Recomputes Splice levels from highest_level (inclusive) down to // lowest_level (inclusive). void RecomputeSpliceLevels(const DecodedKey& key, Splice* splice, int recompute_level); }; // Implementation details follow template struct InlineSkipList::Splice { // The invariant of a Splice is that prev_[i+1].key <= prev_[i].key < // next_[i].key <= next_[i+1].key for all i. That means that if a // key is bracketed by prev_[i] and next_[i] then it is bracketed by // all higher levels. It is _not_ required that prev_[i]->Next(i) == // next_[i] (it probably did at some point in the past, but intervening // or concurrent operations might have inserted nodes in between). int height_ = 0; Node** prev_; Node** next_; }; // The Node data type is more of a pointer into custom-managed memory than // a traditional C++ struct. The key is stored in the bytes immediately // after the struct, and the next_ pointers for nodes with height > 1 are // stored immediately _before_ the struct. This avoids the need to include // any pointer or sizing data, which reduces per-node memory overheads. template struct InlineSkipList::Node { // Stores the height of the node in the memory location normally used for // next_[0]. This is used for passing data from AllocateKey to Insert. void StashHeight(const int height) { assert(sizeof(int) <= sizeof(next_[0])); memcpy(static_cast(&next_[0]), &height, sizeof(int)); } // Retrieves the value passed to StashHeight. Undefined after a call // to SetNext or NoBarrier_SetNext. int UnstashHeight() const { int rv; memcpy(&rv, &next_[0], sizeof(int)); return rv; } const char* Key() const { return reinterpret_cast(&next_[1]); } // Accessors/mutators for links. Wrapped in methods so we can add // the appropriate barriers as necessary, and perform the necessary // addressing trickery for storing links below the Node in memory. Node* Next(int n) { assert(n >= 0); // Use an 'acquire load' so that we observe a fully initialized // version of the returned Node. return ((&next_[0] - n)->load(std::memory_order_acquire)); } void SetNext(int n, Node* x) { assert(n >= 0); // Use a 'release store' so that anybody who reads through this // pointer observes a fully initialized version of the inserted node. (&next_[0] - n)->store(x, std::memory_order_release); } bool CASNext(int n, Node* expected, Node* x) { assert(n >= 0); return (&next_[0] - n)->compare_exchange_strong(expected, x); } // No-barrier variants that can be safely used in a few locations. Node* NoBarrier_Next(int n) { assert(n >= 0); return (&next_[0] - n)->load(std::memory_order_relaxed); } void NoBarrier_SetNext(int n, Node* x) { assert(n >= 0); (&next_[0] - n)->store(x, std::memory_order_relaxed); } // Insert node after prev on specific level. void InsertAfter(Node* prev, int level) { // NoBarrier_SetNext() suffices since we will add a barrier when // we publish a pointer to "this" in prev. NoBarrier_SetNext(level, prev->NoBarrier_Next(level)); prev->SetNext(level, this); } private: // next_[0] is the lowest level link (level 0). Higher levels are // stored _earlier_, so level 1 is at next_[-1]. std::atomic next_[1]; }; template inline InlineSkipList::Iterator::Iterator( const InlineSkipList* list) { SetList(list); } template inline void InlineSkipList::Iterator::SetList( const InlineSkipList* list) { list_ = list; node_ = nullptr; } template inline bool InlineSkipList::Iterator::Valid() const { return node_ != nullptr; } template inline const char* InlineSkipList::Iterator::key() const { assert(Valid()); return node_->Key(); } template inline void InlineSkipList::Iterator::Next() { assert(Valid()); node_ = node_->Next(0); } template inline void InlineSkipList::Iterator::Prev() { // Instead of using explicit "prev" links, we just search for the // last node that falls before key. assert(Valid()); node_ = list_->FindLessThan(node_->Key()); if (node_ == list_->head_) { node_ = nullptr; } } template inline void InlineSkipList::Iterator::Seek(const char* target) { node_ = list_->FindGreaterOrEqual(target); } template inline void InlineSkipList::Iterator::SeekForPrev( const char* target) { Seek(target); if (!Valid()) { SeekToLast(); } while (Valid() && list_->LessThan(target, key())) { Prev(); } } template inline void InlineSkipList::Iterator::SeekToFirst() { node_ = list_->head_->Next(0); } template inline void InlineSkipList::Iterator::SeekToLast() { node_ = list_->FindLast(); if (node_ == list_->head_) { node_ = nullptr; } } template int InlineSkipList::RandomHeight() { auto rnd = Random::GetTLSInstance(); // Increase height with probability 1 in kBranching int height = 1; while (height < kMaxHeight_ && height < kMaxPossibleHeight && rnd->Next() < kScaledInverseBranching_) { height++; } assert(height > 0); assert(height <= kMaxHeight_); assert(height <= kMaxPossibleHeight); return height; } template bool InlineSkipList::KeyIsAfterNode(const char* key, Node* n) const { // nullptr n is considered infinite assert(n != head_); return (n != nullptr) && (compare_(n->Key(), key) < 0); } template bool InlineSkipList::KeyIsAfterNode(const DecodedKey& key, Node* n) const { // nullptr n is considered infinite assert(n != head_); return (n != nullptr) && (compare_(n->Key(), key) < 0); } template typename InlineSkipList::Node* InlineSkipList::FindGreaterOrEqual(const char* key) const { // Note: It looks like we could reduce duplication by implementing // this function as FindLessThan(key)->Next(0), but we wouldn't be able // to exit early on equality and the result wouldn't even be correct. // A concurrent insert might occur after FindLessThan(key) but before // we get a chance to call Next(0). Node* x = head_; int level = GetMaxHeight() - 1; Node* last_bigger = nullptr; const DecodedKey key_decoded = compare_.decode_key(key); while (true) { Node* next = x->Next(level); if (next != nullptr) { PREFETCH(next->Next(level), 0, 1); } // Make sure the lists are sorted assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x)); // Make sure we haven't overshot during our search assert(x == head_ || KeyIsAfterNode(key_decoded, x)); int cmp = (next == nullptr || next == last_bigger) ? 1 : compare_(next->Key(), key_decoded); if (cmp == 0 || (cmp > 0 && level == 0)) { return next; } else if (cmp < 0) { // Keep searching in this list x = next; } else { // Switch to next list, reuse compare_() result last_bigger = next; level--; } } } template typename InlineSkipList::Node* InlineSkipList::FindLessThan(const char* key, Node** prev) const { return FindLessThan(key, prev, head_, GetMaxHeight(), 0); } template typename InlineSkipList::Node* InlineSkipList::FindLessThan(const char* key, Node** prev, Node* root, int top_level, int bottom_level) const { assert(top_level > bottom_level); int level = top_level - 1; Node* x = root; // KeyIsAfter(key, last_not_after) is definitely false Node* last_not_after = nullptr; const DecodedKey key_decoded = compare_.decode_key(key); while (true) { assert(x != nullptr); Node* next = x->Next(level); if (next != nullptr) { PREFETCH(next->Next(level), 0, 1); } assert(x == head_ || next == nullptr || KeyIsAfterNode(next->Key(), x)); assert(x == head_ || KeyIsAfterNode(key_decoded, x)); if (next != last_not_after && KeyIsAfterNode(key_decoded, next)) { // Keep searching in this list assert(next != nullptr); x = next; } else { if (prev != nullptr) { prev[level] = x; } if (level == bottom_level) { return x; } else { // Switch to next list, reuse KeyIsAfterNode() result last_not_after = next; level--; } } } } template typename InlineSkipList::Node* InlineSkipList::FindLast() const { Node* x = head_; int level = GetMaxHeight() - 1; while (true) { Node* next = x->Next(level); if (next == nullptr) { if (level == 0) { return x; } else { // Switch to next list level--; } } else { x = next; } } } template uint64_t InlineSkipList::EstimateCount(const char* key) const { uint64_t count = 0; Node* x = head_; int level = GetMaxHeight() - 1; const DecodedKey key_decoded = compare_.decode_key(key); while (true) { assert(x == head_ || compare_(x->Key(), key_decoded) < 0); Node* next = x->Next(level); if (next != nullptr) { PREFETCH(next->Next(level), 0, 1); } if (next == nullptr || compare_(next->Key(), key_decoded) >= 0) { if (level == 0) { return count; } else { // Switch to next list count *= kBranching_; level--; } } else { x = next; count++; } } } template InlineSkipList::InlineSkipList(const Comparator cmp, Allocator* allocator, int32_t max_height, int32_t branching_factor) : kMaxHeight_(static_cast(max_height)), kBranching_(static_cast(branching_factor)), kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_), allocator_(allocator), compare_(cmp), head_(AllocateNode(0, max_height)), max_height_(1), seq_splice_(AllocateSplice()) { assert(max_height > 0 && kMaxHeight_ == static_cast(max_height)); assert(branching_factor > 1 && kBranching_ == static_cast(branching_factor)); assert(kScaledInverseBranching_ > 0); for (int i = 0; i < kMaxHeight_; ++i) { head_->SetNext(i, nullptr); } } template char* InlineSkipList::AllocateKey(size_t key_size) { return const_cast(AllocateNode(key_size, RandomHeight())->Key()); } template typename InlineSkipList::Node* InlineSkipList::AllocateNode(size_t key_size, int height) { auto prefix = sizeof(std::atomic) * (height - 1); // prefix is space for the height - 1 pointers that we store before // the Node instance (next_[-(height - 1) .. -1]). Node starts at // raw + prefix, and holds the bottom-mode (level 0) skip list pointer // next_[0]. key_size is the bytes for the key, which comes just after // the Node. char* raw = allocator_->AllocateAligned(prefix + sizeof(Node) + key_size); Node* x = reinterpret_cast(raw + prefix); // Once we've linked the node into the skip list we don't actually need // to know its height, because we can implicitly use the fact that we // traversed into a node at level h to known that h is a valid level // for that node. We need to convey the height to the Insert step, // however, so that it can perform the proper links. Since we're not // using the pointers at the moment, StashHeight temporarily borrow // storage from next_[0] for that purpose. x->StashHeight(height); return x; } template typename InlineSkipList::Splice* InlineSkipList::AllocateSplice() { // size of prev_ and next_ size_t array_size = sizeof(Node*) * (kMaxHeight_ + 1); char* raw = allocator_->AllocateAligned(sizeof(Splice) + array_size * 2); Splice* splice = reinterpret_cast(raw); splice->height_ = 0; splice->prev_ = reinterpret_cast(raw + sizeof(Splice)); splice->next_ = reinterpret_cast(raw + sizeof(Splice) + array_size); return splice; } template typename InlineSkipList::Splice* InlineSkipList::AllocateSpliceOnHeap() { size_t array_size = sizeof(Node*) * (kMaxHeight_ + 1); char* raw = new char[sizeof(Splice) + array_size * 2]; Splice* splice = reinterpret_cast(raw); splice->height_ = 0; splice->prev_ = reinterpret_cast(raw + sizeof(Splice)); splice->next_ = reinterpret_cast(raw + sizeof(Splice) + array_size); return splice; } template bool InlineSkipList::Insert(const char* key) { return Insert(key, seq_splice_, false); } template bool InlineSkipList::InsertConcurrently(const char* key) { Node* prev[kMaxPossibleHeight]; Node* next[kMaxPossibleHeight]; Splice splice; splice.prev_ = prev; splice.next_ = next; return Insert(key, &splice, false); } template bool InlineSkipList::InsertWithHint(const char* key, void** hint) { assert(hint != nullptr); Splice* splice = reinterpret_cast(*hint); if (splice == nullptr) { splice = AllocateSplice(); *hint = reinterpret_cast(splice); } return Insert(key, splice, true); } template bool InlineSkipList::InsertWithHintConcurrently(const char* key, void** hint) { assert(hint != nullptr); Splice* splice = reinterpret_cast(*hint); if (splice == nullptr) { splice = AllocateSpliceOnHeap(); *hint = reinterpret_cast(splice); } return Insert(key, splice, true); } template template void InlineSkipList::FindSpliceForLevel(const DecodedKey& key, Node* before, Node* after, int level, Node** out_prev, Node** out_next) { while (true) { Node* next = before->Next(level); if (next != nullptr) { PREFETCH(next->Next(level), 0, 1); } if (prefetch_before == true) { if (next != nullptr && level>0) { PREFETCH(next->Next(level-1), 0, 1); } } assert(before == head_ || next == nullptr || KeyIsAfterNode(next->Key(), before)); assert(before == head_ || KeyIsAfterNode(key, before)); if (next == after || !KeyIsAfterNode(key, next)) { // found it *out_prev = before; *out_next = next; return; } before = next; } } template void InlineSkipList::RecomputeSpliceLevels(const DecodedKey& key, Splice* splice, int recompute_level) { assert(recompute_level > 0); assert(recompute_level <= splice->height_); for (int i = recompute_level - 1; i >= 0; --i) { FindSpliceForLevel(key, splice->prev_[i + 1], splice->next_[i + 1], i, &splice->prev_[i], &splice->next_[i]); } } template template bool InlineSkipList::Insert(const char* key, Splice* splice, bool allow_partial_splice_fix) { Node* x = reinterpret_cast(const_cast(key)) - 1; const DecodedKey key_decoded = compare_.decode_key(key); int height = x->UnstashHeight(); assert(height >= 1 && height <= kMaxHeight_); int max_height = max_height_.load(std::memory_order_relaxed); while (height > max_height) { if (max_height_.compare_exchange_weak(max_height, height)) { // successfully updated it max_height = height; break; } // else retry, possibly exiting the loop because somebody else // increased it } assert(max_height <= kMaxPossibleHeight); int recompute_height = 0; if (splice->height_ < max_height) { // Either splice has never been used or max_height has grown since // last use. We could potentially fix it in the latter case, but // that is tricky. splice->prev_[max_height] = head_; splice->next_[max_height] = nullptr; splice->height_ = max_height; recompute_height = max_height; } else { // Splice is a valid proper-height splice that brackets some // key, but does it bracket this one? We need to validate it and // recompute a portion of the splice (levels 0..recompute_height-1) // that is a superset of all levels that don't bracket the new key. // Several choices are reasonable, because we have to balance the work // saved against the extra comparisons required to validate the Splice. // // One strategy is just to recompute all of orig_splice_height if the // bottom level isn't bracketing. This pessimistically assumes that // we will either get a perfect Splice hit (increasing sequential // inserts) or have no locality. // // Another strategy is to walk up the Splice's levels until we find // a level that brackets the key. This strategy lets the Splice // hint help for other cases: it turns insertion from O(log N) into // O(log D), where D is the number of nodes in between the key that // produced the Splice and the current insert (insertion is aided // whether the new key is before or after the splice). If you have // a way of using a prefix of the key to map directly to the closest // Splice out of O(sqrt(N)) Splices and we make it so that splices // can also be used as hints during read, then we end up with Oshman's // and Shavit's SkipTrie, which has O(log log N) lookup and insertion // (compare to O(log N) for skip list). // // We control the pessimistic strategy with allow_partial_splice_fix. // A good strategy is probably to be pessimistic for seq_splice_, // optimistic if the caller actually went to the work of providing // a Splice. while (recompute_height < max_height) { if (splice->prev_[recompute_height]->Next(recompute_height) != splice->next_[recompute_height]) { // splice isn't tight at this level, there must have been some inserts // to this // location that didn't update the splice. We might only be a little // stale, but if // the splice is very stale it would be O(N) to fix it. We haven't used // up any of // our budget of comparisons, so always move up even if we are // pessimistic about // our chances of success. ++recompute_height; } else if (splice->prev_[recompute_height] != head_ && !KeyIsAfterNode(key_decoded, splice->prev_[recompute_height])) { // key is from before splice if (allow_partial_splice_fix) { // skip all levels with the same node without more comparisons Node* bad = splice->prev_[recompute_height]; while (splice->prev_[recompute_height] == bad) { ++recompute_height; } } else { // we're pessimistic, recompute everything recompute_height = max_height; } } else if (KeyIsAfterNode(key_decoded, splice->next_[recompute_height])) { // key is from after splice if (allow_partial_splice_fix) { Node* bad = splice->next_[recompute_height]; while (splice->next_[recompute_height] == bad) { ++recompute_height; } } else { recompute_height = max_height; } } else { // this level brackets the key, we won! break; } } } assert(recompute_height <= max_height); if (recompute_height > 0) { RecomputeSpliceLevels(key_decoded, splice, recompute_height); } bool splice_is_valid = true; if (UseCAS) { for (int i = 0; i < height; ++i) { while (true) { // Checking for duplicate keys on the level 0 is sufficient if (UNLIKELY(i == 0 && splice->next_[i] != nullptr && compare_(x->Key(), splice->next_[i]->Key()) >= 0)) { // duplicate key return false; } if (UNLIKELY(i == 0 && splice->prev_[i] != head_ && compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) { // duplicate key return false; } assert(splice->next_[i] == nullptr || compare_(x->Key(), splice->next_[i]->Key()) < 0); assert(splice->prev_[i] == head_ || compare_(splice->prev_[i]->Key(), x->Key()) < 0); x->NoBarrier_SetNext(i, splice->next_[i]); if (splice->prev_[i]->CASNext(i, splice->next_[i], x)) { // success break; } // CAS failed, we need to recompute prev and next. It is unlikely // to be helpful to try to use a different level as we redo the // search, because it should be unlikely that lots of nodes have // been inserted between prev[i] and next[i]. No point in using // next[i] as the after hint, because we know it is stale. FindSpliceForLevel(key_decoded, splice->prev_[i], nullptr, i, &splice->prev_[i], &splice->next_[i]); // Since we've narrowed the bracket for level i, we might have // violated the Splice constraint between i and i-1. Make sure // we recompute the whole thing next time. if (i > 0) { splice_is_valid = false; } } } } else { for (int i = 0; i < height; ++i) { if (i >= recompute_height && splice->prev_[i]->Next(i) != splice->next_[i]) { FindSpliceForLevel(key_decoded, splice->prev_[i], nullptr, i, &splice->prev_[i], &splice->next_[i]); } // Checking for duplicate keys on the level 0 is sufficient if (UNLIKELY(i == 0 && splice->next_[i] != nullptr && compare_(x->Key(), splice->next_[i]->Key()) >= 0)) { // duplicate key return false; } if (UNLIKELY(i == 0 && splice->prev_[i] != head_ && compare_(splice->prev_[i]->Key(), x->Key()) >= 0)) { // duplicate key return false; } assert(splice->next_[i] == nullptr || compare_(x->Key(), splice->next_[i]->Key()) < 0); assert(splice->prev_[i] == head_ || compare_(splice->prev_[i]->Key(), x->Key()) < 0); assert(splice->prev_[i]->Next(i) == splice->next_[i]); x->NoBarrier_SetNext(i, splice->next_[i]); splice->prev_[i]->SetNext(i, x); } } if (splice_is_valid) { for (int i = 0; i < height; ++i) { splice->prev_[i] = x; } assert(splice->prev_[splice->height_] == head_); assert(splice->next_[splice->height_] == nullptr); for (int i = 0; i < splice->height_; ++i) { assert(splice->next_[i] == nullptr || compare_(key, splice->next_[i]->Key()) < 0); assert(splice->prev_[i] == head_ || compare_(splice->prev_[i]->Key(), key) <= 0); assert(splice->prev_[i + 1] == splice->prev_[i] || splice->prev_[i + 1] == head_ || compare_(splice->prev_[i + 1]->Key(), splice->prev_[i]->Key()) < 0); assert(splice->next_[i + 1] == splice->next_[i] || splice->next_[i + 1] == nullptr || compare_(splice->next_[i]->Key(), splice->next_[i + 1]->Key()) < 0); } } else { splice->height_ = 0; } return true; } template bool InlineSkipList::Contains(const char* key) const { Node* x = FindGreaterOrEqual(key); if (x != nullptr && Equal(key, x->Key())) { return true; } else { return false; } } template void InlineSkipList::TEST_Validate() const { // Interate over all levels at the same time, and verify nodes appear in // the right order, and nodes appear in upper level also appear in lower // levels. Node* nodes[kMaxPossibleHeight]; int max_height = GetMaxHeight(); assert(max_height > 0); for (int i = 0; i < max_height; i++) { nodes[i] = head_; } while (nodes[0] != nullptr) { Node* l0_next = nodes[0]->Next(0); if (l0_next == nullptr) { break; } assert(nodes[0] == head_ || compare_(nodes[0]->Key(), l0_next->Key()) < 0); nodes[0] = l0_next; int i = 1; while (i < max_height) { Node* next = nodes[i]->Next(i); if (next == nullptr) { break; } auto cmp = compare_(nodes[0]->Key(), next->Key()); assert(cmp <= 0); if (cmp == 0) { assert(next == nodes[0]); nodes[i] = next; } else { break; } i++; } } for (int i = 1; i < max_height; i++) { assert(nodes[i] != nullptr && nodes[i]->Next(i) == nullptr); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memtable/inlineskiplist_test.cc000066400000000000000000000434171370372246700214530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "memtable/inlineskiplist.h" #include #include #include "memory/concurrent_arena.h" #include "rocksdb/env.h" #include "test_util/testharness.h" #include "util/hash.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { // Our test skip list stores 8-byte unsigned integers typedef uint64_t Key; static const char* Encode(const uint64_t* key) { return reinterpret_cast(key); } static Key Decode(const char* key) { Key rv; memcpy(&rv, key, sizeof(Key)); return rv; } struct TestComparator { typedef Key DecodedType; static DecodedType decode_key(const char* b) { return Decode(b); } int operator()(const char* a, const char* b) const { if (Decode(a) < Decode(b)) { return -1; } else if (Decode(a) > Decode(b)) { return +1; } else { return 0; } } int operator()(const char* a, const DecodedType b) const { if (Decode(a) < b) { return -1; } else if (Decode(a) > b) { return +1; } else { return 0; } } }; typedef InlineSkipList TestInlineSkipList; class InlineSkipTest : public testing::Test { public: void Insert(TestInlineSkipList* list, Key key) { char* buf = list->AllocateKey(sizeof(Key)); memcpy(buf, &key, sizeof(Key)); list->Insert(buf); keys_.insert(key); } bool InsertWithHint(TestInlineSkipList* list, Key key, void** hint) { char* buf = list->AllocateKey(sizeof(Key)); memcpy(buf, &key, sizeof(Key)); bool res = list->InsertWithHint(buf, hint); keys_.insert(key); return res; } void Validate(TestInlineSkipList* list) { // Check keys exist. for (Key key : keys_) { ASSERT_TRUE(list->Contains(Encode(&key))); } // Iterate over the list, make sure keys appears in order and no extra // keys exist. TestInlineSkipList::Iterator iter(list); ASSERT_FALSE(iter.Valid()); Key zero = 0; iter.Seek(Encode(&zero)); for (Key key : keys_) { ASSERT_TRUE(iter.Valid()); ASSERT_EQ(key, Decode(iter.key())); iter.Next(); } ASSERT_FALSE(iter.Valid()); // Validate the list is well-formed. list->TEST_Validate(); } private: std::set keys_; }; TEST_F(InlineSkipTest, Empty) { Arena arena; TestComparator cmp; InlineSkipList list(cmp, &arena); Key key = 10; ASSERT_TRUE(!list.Contains(Encode(&key))); InlineSkipList::Iterator iter(&list); ASSERT_TRUE(!iter.Valid()); iter.SeekToFirst(); ASSERT_TRUE(!iter.Valid()); key = 100; iter.Seek(Encode(&key)); ASSERT_TRUE(!iter.Valid()); iter.SeekForPrev(Encode(&key)); ASSERT_TRUE(!iter.Valid()); iter.SeekToLast(); ASSERT_TRUE(!iter.Valid()); } TEST_F(InlineSkipTest, InsertAndLookup) { const int N = 2000; const int R = 5000; Random rnd(1000); std::set keys; ConcurrentArena arena; TestComparator cmp; InlineSkipList list(cmp, &arena); for (int i = 0; i < N; i++) { Key key = rnd.Next() % R; if (keys.insert(key).second) { char* buf = list.AllocateKey(sizeof(Key)); memcpy(buf, &key, sizeof(Key)); list.Insert(buf); } } for (Key i = 0; i < R; i++) { if (list.Contains(Encode(&i))) { ASSERT_EQ(keys.count(i), 1U); } else { ASSERT_EQ(keys.count(i), 0U); } } // Simple iterator tests { InlineSkipList::Iterator iter(&list); ASSERT_TRUE(!iter.Valid()); uint64_t zero = 0; iter.Seek(Encode(&zero)); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.begin()), Decode(iter.key())); uint64_t max_key = R - 1; iter.SeekForPrev(Encode(&max_key)); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.rbegin()), Decode(iter.key())); iter.SeekToFirst(); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.begin()), Decode(iter.key())); iter.SeekToLast(); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.rbegin()), Decode(iter.key())); } // Forward iteration test for (Key i = 0; i < R; i++) { InlineSkipList::Iterator iter(&list); iter.Seek(Encode(&i)); // Compare against model iterator std::set::iterator model_iter = keys.lower_bound(i); for (int j = 0; j < 3; j++) { if (model_iter == keys.end()) { ASSERT_TRUE(!iter.Valid()); break; } else { ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*model_iter, Decode(iter.key())); ++model_iter; iter.Next(); } } } // Backward iteration test for (Key i = 0; i < R; i++) { InlineSkipList::Iterator iter(&list); iter.SeekForPrev(Encode(&i)); // Compare against model iterator std::set::iterator model_iter = keys.upper_bound(i); for (int j = 0; j < 3; j++) { if (model_iter == keys.begin()) { ASSERT_TRUE(!iter.Valid()); break; } else { ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*--model_iter, Decode(iter.key())); iter.Prev(); } } } } TEST_F(InlineSkipTest, InsertWithHint_Sequential) { const int N = 100000; Arena arena; TestComparator cmp; TestInlineSkipList list(cmp, &arena); void* hint = nullptr; for (int i = 0; i < N; i++) { Key key = i; InsertWithHint(&list, key, &hint); } Validate(&list); } TEST_F(InlineSkipTest, InsertWithHint_MultipleHints) { const int N = 100000; const int S = 100; Random rnd(534); Arena arena; TestComparator cmp; TestInlineSkipList list(cmp, &arena); void* hints[S]; Key last_key[S]; for (int i = 0; i < S; i++) { hints[i] = nullptr; last_key[i] = 0; } for (int i = 0; i < N; i++) { Key s = rnd.Uniform(S); Key key = (s << 32) + (++last_key[s]); InsertWithHint(&list, key, &hints[s]); } Validate(&list); } TEST_F(InlineSkipTest, InsertWithHint_MultipleHintsRandom) { const int N = 100000; const int S = 100; Random rnd(534); Arena arena; TestComparator cmp; TestInlineSkipList list(cmp, &arena); void* hints[S]; for (int i = 0; i < S; i++) { hints[i] = nullptr; } for (int i = 0; i < N; i++) { Key s = rnd.Uniform(S); Key key = (s << 32) + rnd.Next(); InsertWithHint(&list, key, &hints[s]); } Validate(&list); } TEST_F(InlineSkipTest, InsertWithHint_CompatibleWithInsertWithoutHint) { const int N = 100000; const int S1 = 100; const int S2 = 100; Random rnd(534); Arena arena; TestComparator cmp; TestInlineSkipList list(cmp, &arena); std::unordered_set used; Key with_hint[S1]; Key without_hint[S2]; void* hints[S1]; for (int i = 0; i < S1; i++) { hints[i] = nullptr; while (true) { Key s = rnd.Next(); if (used.insert(s).second) { with_hint[i] = s; break; } } } for (int i = 0; i < S2; i++) { while (true) { Key s = rnd.Next(); if (used.insert(s).second) { without_hint[i] = s; break; } } } for (int i = 0; i < N; i++) { Key s = rnd.Uniform(S1 + S2); if (s < S1) { Key key = (with_hint[s] << 32) + rnd.Next(); InsertWithHint(&list, key, &hints[s]); } else { Key key = (without_hint[s - S1] << 32) + rnd.Next(); Insert(&list, key); } } Validate(&list); } #ifndef ROCKSDB_VALGRIND_RUN // We want to make sure that with a single writer and multiple // concurrent readers (with no synchronization other than when a // reader's iterator is created), the reader always observes all the // data that was present in the skip list when the iterator was // constructor. Because insertions are happening concurrently, we may // also observe new values that were inserted since the iterator was // constructed, but we should never miss any values that were present // at iterator construction time. // // We generate multi-part keys: // // where: // key is in range [0..K-1] // gen is a generation number for key // hash is hash(key,gen) // // The insertion code picks a random key, sets gen to be 1 + the last // generation number inserted for that key, and sets hash to Hash(key,gen). // // At the beginning of a read, we snapshot the last inserted // generation number for each key. We then iterate, including random // calls to Next() and Seek(). For every key we encounter, we // check that it is either expected given the initial snapshot or has // been concurrently added since the iterator started. class ConcurrentTest { public: static const uint32_t K = 8; private: static uint64_t key(Key key) { return (key >> 40); } static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } static uint64_t hash(Key key) { return key & 0xff; } static uint64_t HashNumbers(uint64_t k, uint64_t g) { uint64_t data[2] = {k, g}; return Hash(reinterpret_cast(data), sizeof(data), 0); } static Key MakeKey(uint64_t k, uint64_t g) { assert(sizeof(Key) == sizeof(uint64_t)); assert(k <= K); // We sometimes pass K to seek to the end of the skiplist assert(g <= 0xffffffffu); return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff)); } static bool IsValidKey(Key k) { return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); } static Key RandomTarget(Random* rnd) { switch (rnd->Next() % 10) { case 0: // Seek to beginning return MakeKey(0, 0); case 1: // Seek to end return MakeKey(K, 0); default: // Seek to middle return MakeKey(rnd->Next() % K, 0); } } // Per-key generation struct State { std::atomic generation[K]; void Set(int k, int v) { generation[k].store(v, std::memory_order_release); } int Get(int k) { return generation[k].load(std::memory_order_acquire); } State() { for (unsigned int k = 0; k < K; k++) { Set(k, 0); } } }; // Current state of the test State current_; ConcurrentArena arena_; // InlineSkipList is not protected by mu_. We just use a single writer // thread to modify it. InlineSkipList list_; public: ConcurrentTest() : list_(TestComparator(), &arena_) {} // REQUIRES: No concurrent calls to WriteStep or ConcurrentWriteStep void WriteStep(Random* rnd) { const uint32_t k = rnd->Next() % K; const int g = current_.Get(k) + 1; const Key new_key = MakeKey(k, g); char* buf = list_.AllocateKey(sizeof(Key)); memcpy(buf, &new_key, sizeof(Key)); list_.Insert(buf); current_.Set(k, g); } // REQUIRES: No concurrent calls for the same k void ConcurrentWriteStep(uint32_t k, bool use_hint = false) { const int g = current_.Get(k) + 1; const Key new_key = MakeKey(k, g); char* buf = list_.AllocateKey(sizeof(Key)); memcpy(buf, &new_key, sizeof(Key)); if (use_hint) { void* hint = nullptr; list_.InsertWithHintConcurrently(buf, &hint); delete[] reinterpret_cast(hint); } else { list_.InsertConcurrently(buf); } ASSERT_EQ(g, current_.Get(k) + 1); current_.Set(k, g); } void ReadStep(Random* rnd) { // Remember the initial committed state of the skiplist. State initial_state; for (unsigned int k = 0; k < K; k++) { initial_state.Set(k, current_.Get(k)); } Key pos = RandomTarget(rnd); InlineSkipList::Iterator iter(&list_); iter.Seek(Encode(&pos)); while (true) { Key current; if (!iter.Valid()) { current = MakeKey(K, 0); } else { current = Decode(iter.key()); ASSERT_TRUE(IsValidKey(current)) << current; } ASSERT_LE(pos, current) << "should not go backwards"; // Verify that everything in [pos,current) was not present in // initial_state. while (pos < current) { ASSERT_LT(key(pos), K) << pos; // Note that generation 0 is never inserted, so it is ok if // <*,0,*> is missing. ASSERT_TRUE((gen(pos) == 0U) || (gen(pos) > static_cast(initial_state.Get( static_cast(key(pos)))))) << "key: " << key(pos) << "; gen: " << gen(pos) << "; initgen: " << initial_state.Get(static_cast(key(pos))); // Advance to next key in the valid key space if (key(pos) < key(current)) { pos = MakeKey(key(pos) + 1, 0); } else { pos = MakeKey(key(pos), gen(pos) + 1); } } if (!iter.Valid()) { break; } if (rnd->Next() % 2) { iter.Next(); pos = MakeKey(key(pos), gen(pos) + 1); } else { Key new_target = RandomTarget(rnd); if (new_target > pos) { pos = new_target; iter.Seek(Encode(&new_target)); } } } } }; const uint32_t ConcurrentTest::K; // Simple test that does single-threaded testing of the ConcurrentTest // scaffolding. TEST_F(InlineSkipTest, ConcurrentReadWithoutThreads) { ConcurrentTest test; Random rnd(test::RandomSeed()); for (int i = 0; i < 10000; i++) { test.ReadStep(&rnd); test.WriteStep(&rnd); } } TEST_F(InlineSkipTest, ConcurrentInsertWithoutThreads) { ConcurrentTest test; Random rnd(test::RandomSeed()); for (int i = 0; i < 10000; i++) { test.ReadStep(&rnd); uint32_t base = rnd.Next(); for (int j = 0; j < 4; ++j) { test.ConcurrentWriteStep((base + j) % ConcurrentTest::K); } } } class TestState { public: ConcurrentTest t_; bool use_hint_; int seed_; std::atomic quit_flag_; std::atomic next_writer_; enum ReaderState { STARTING, RUNNING, DONE }; explicit TestState(int s) : seed_(s), quit_flag_(false), state_(STARTING), pending_writers_(0), state_cv_(&mu_) {} void Wait(ReaderState s) { mu_.Lock(); while (state_ != s) { state_cv_.Wait(); } mu_.Unlock(); } void Change(ReaderState s) { mu_.Lock(); state_ = s; state_cv_.Signal(); mu_.Unlock(); } void AdjustPendingWriters(int delta) { mu_.Lock(); pending_writers_ += delta; if (pending_writers_ == 0) { state_cv_.Signal(); } mu_.Unlock(); } void WaitForPendingWriters() { mu_.Lock(); while (pending_writers_ != 0) { state_cv_.Wait(); } mu_.Unlock(); } private: port::Mutex mu_; ReaderState state_; int pending_writers_; port::CondVar state_cv_; }; static void ConcurrentReader(void* arg) { TestState* state = reinterpret_cast(arg); Random rnd(state->seed_); int64_t reads = 0; state->Change(TestState::RUNNING); while (!state->quit_flag_.load(std::memory_order_acquire)) { state->t_.ReadStep(&rnd); ++reads; } state->Change(TestState::DONE); } static void ConcurrentWriter(void* arg) { TestState* state = reinterpret_cast(arg); uint32_t k = state->next_writer_++ % ConcurrentTest::K; state->t_.ConcurrentWriteStep(k, state->use_hint_); state->AdjustPendingWriters(-1); } static void RunConcurrentRead(int run) { const int seed = test::RandomSeed() + (run * 100); Random rnd(seed); const int N = 1000; const int kSize = 1000; for (int i = 0; i < N; i++) { if ((i % 100) == 0) { fprintf(stderr, "Run %d of %d\n", i, N); } TestState state(seed + 1); Env::Default()->SetBackgroundThreads(1); Env::Default()->Schedule(ConcurrentReader, &state); state.Wait(TestState::RUNNING); for (int k = 0; k < kSize; ++k) { state.t_.WriteStep(&rnd); } state.quit_flag_.store(true, std::memory_order_release); state.Wait(TestState::DONE); } } static void RunConcurrentInsert(int run, bool use_hint = false, int write_parallelism = 4) { Env::Default()->SetBackgroundThreads(1 + write_parallelism, Env::Priority::LOW); const int seed = test::RandomSeed() + (run * 100); Random rnd(seed); const int N = 1000; const int kSize = 1000; for (int i = 0; i < N; i++) { if ((i % 100) == 0) { fprintf(stderr, "Run %d of %d\n", i, N); } TestState state(seed + 1); state.use_hint_ = use_hint; Env::Default()->Schedule(ConcurrentReader, &state); state.Wait(TestState::RUNNING); for (int k = 0; k < kSize; k += write_parallelism) { state.next_writer_ = rnd.Next(); state.AdjustPendingWriters(write_parallelism); for (int p = 0; p < write_parallelism; ++p) { Env::Default()->Schedule(ConcurrentWriter, &state); } state.WaitForPendingWriters(); } state.quit_flag_.store(true, std::memory_order_release); state.Wait(TestState::DONE); } } TEST_F(InlineSkipTest, ConcurrentRead1) { RunConcurrentRead(1); } TEST_F(InlineSkipTest, ConcurrentRead2) { RunConcurrentRead(2); } TEST_F(InlineSkipTest, ConcurrentRead3) { RunConcurrentRead(3); } TEST_F(InlineSkipTest, ConcurrentRead4) { RunConcurrentRead(4); } TEST_F(InlineSkipTest, ConcurrentRead5) { RunConcurrentRead(5); } TEST_F(InlineSkipTest, ConcurrentInsert1) { RunConcurrentInsert(1); } TEST_F(InlineSkipTest, ConcurrentInsert2) { RunConcurrentInsert(2); } TEST_F(InlineSkipTest, ConcurrentInsert3) { RunConcurrentInsert(3); } TEST_F(InlineSkipTest, ConcurrentInsertWithHint1) { RunConcurrentInsert(1, true); } TEST_F(InlineSkipTest, ConcurrentInsertWithHint2) { RunConcurrentInsert(2, true); } TEST_F(InlineSkipTest, ConcurrentInsertWithHint3) { RunConcurrentInsert(3, true); } #endif // ROCKSDB_VALGRIND_RUN } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/memtable/memtablerep_bench.cc000066400000000000000000000573071370372246700210120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef GFLAGS #include int main() { fprintf(stderr, "Please install gflags to run rocksdb tools\n"); return 1; } #else #include #include #include #include #include #include #include "db/dbformat.h" #include "db/memtable.h" #include "memory/arena.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/comparator.h" #include "rocksdb/memtablerep.h" #include "rocksdb/options.h" #include "rocksdb/slice_transform.h" #include "rocksdb/write_buffer_manager.h" #include "test_util/testutil.h" #include "util/gflags_compat.h" #include "util/mutexlock.h" #include "util/stop_watch.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::RegisterFlagValidator; using GFLAGS_NAMESPACE::SetUsageMessage; DEFINE_string(benchmarks, "fillrandom", "Comma-separated list of benchmarks to run. Options:\n" "\tfillrandom -- write N random values\n" "\tfillseq -- write N values in sequential order\n" "\treadrandom -- read N values in random order\n" "\treadseq -- scan the DB\n" "\treadwrite -- 1 thread writes while N - 1 threads " "do random\n" "\t reads\n" "\tseqreadwrite -- 1 thread writes while N - 1 threads " "do scans\n"); DEFINE_string(memtablerep, "skiplist", "Which implementation of memtablerep to use. See " "include/memtablerep.h for\n" " more details. Options:\n" "\tskiplist -- backed by a skiplist\n" "\tvector -- backed by an std::vector\n" "\thashskiplist -- backed by a hash skip list\n" "\thashlinklist -- backed by a hash linked list\n" "\tcuckoo -- backed by a cuckoo hash table"); DEFINE_int64(bucket_count, 1000000, "bucket_count parameter to pass into NewHashSkiplistRepFactory or " "NewHashLinkListRepFactory"); DEFINE_int32( hashskiplist_height, 4, "skiplist_height parameter to pass into NewHashSkiplistRepFactory"); DEFINE_int32( hashskiplist_branching_factor, 4, "branching_factor parameter to pass into NewHashSkiplistRepFactory"); DEFINE_int32( huge_page_tlb_size, 0, "huge_page_tlb_size parameter to pass into NewHashLinkListRepFactory"); DEFINE_int32(bucket_entries_logging_threshold, 4096, "bucket_entries_logging_threshold parameter to pass into " "NewHashLinkListRepFactory"); DEFINE_bool(if_log_bucket_dist_when_flash, true, "if_log_bucket_dist_when_flash parameter to pass into " "NewHashLinkListRepFactory"); DEFINE_int32( threshold_use_skiplist, 256, "threshold_use_skiplist parameter to pass into NewHashLinkListRepFactory"); DEFINE_int64(write_buffer_size, 256, "write_buffer_size parameter to pass into WriteBufferManager"); DEFINE_int32( num_threads, 1, "Number of concurrent threads to run. If the benchmark includes writes,\n" "then at most one thread will be a writer"); DEFINE_int32(num_operations, 1000000, "Number of operations to do for write and random read benchmarks"); DEFINE_int32(num_scans, 10, "Number of times for each thread to scan the memtablerep for " "sequential read " "benchmarks"); DEFINE_int32(item_size, 100, "Number of bytes each item should be"); DEFINE_int32(prefix_length, 8, "Prefix length to pass into NewFixedPrefixTransform"); /* VectorRep settings */ DEFINE_int64(vectorrep_count, 0, "Number of entries to reserve on VectorRep initialization"); DEFINE_int64(seed, 0, "Seed base for random number generators. " "When 0 it is deterministic."); namespace ROCKSDB_NAMESPACE { namespace { struct CallbackVerifyArgs { bool found; LookupKey* key; MemTableRep* table; InternalKeyComparator* comparator; }; } // namespace // Helper for quickly generating random data. class RandomGenerator { private: std::string data_; unsigned int pos_; public: RandomGenerator() { Random rnd(301); auto size = (unsigned)std::max(1048576, FLAGS_item_size); test::RandomString(&rnd, size, &data_); pos_ = 0; } Slice Generate(unsigned int len) { assert(len <= data_.size()); if (pos_ + len > data_.size()) { pos_ = 0; } pos_ += len; return Slice(data_.data() + pos_ - len, len); } }; enum WriteMode { SEQUENTIAL, RANDOM, UNIQUE_RANDOM }; class KeyGenerator { public: KeyGenerator(Random64* rand, WriteMode mode, uint64_t num) : rand_(rand), mode_(mode), num_(num), next_(0) { if (mode_ == UNIQUE_RANDOM) { // NOTE: if memory consumption of this approach becomes a concern, // we can either break it into pieces and only random shuffle a section // each time. Alternatively, use a bit map implementation // (https://reviews.facebook.net/differential/diff/54627/) values_.resize(num_); for (uint64_t i = 0; i < num_; ++i) { values_[i] = i; } RandomShuffle(values_.begin(), values_.end(), static_cast(FLAGS_seed)); } } uint64_t Next() { switch (mode_) { case SEQUENTIAL: return next_++; case RANDOM: return rand_->Next() % num_; case UNIQUE_RANDOM: return values_[next_++]; } assert(false); return std::numeric_limits::max(); } private: Random64* rand_; WriteMode mode_; const uint64_t num_; uint64_t next_; std::vector values_; }; class BenchmarkThread { public: explicit BenchmarkThread(MemTableRep* table, KeyGenerator* key_gen, uint64_t* bytes_written, uint64_t* bytes_read, uint64_t* sequence, uint64_t num_ops, uint64_t* read_hits) : table_(table), key_gen_(key_gen), bytes_written_(bytes_written), bytes_read_(bytes_read), sequence_(sequence), num_ops_(num_ops), read_hits_(read_hits) {} virtual void operator()() = 0; virtual ~BenchmarkThread() {} protected: MemTableRep* table_; KeyGenerator* key_gen_; uint64_t* bytes_written_; uint64_t* bytes_read_; uint64_t* sequence_; uint64_t num_ops_; uint64_t* read_hits_; RandomGenerator generator_; }; class FillBenchmarkThread : public BenchmarkThread { public: FillBenchmarkThread(MemTableRep* table, KeyGenerator* key_gen, uint64_t* bytes_written, uint64_t* bytes_read, uint64_t* sequence, uint64_t num_ops, uint64_t* read_hits) : BenchmarkThread(table, key_gen, bytes_written, bytes_read, sequence, num_ops, read_hits) {} void FillOne() { char* buf = nullptr; auto internal_key_size = 16; auto encoded_len = FLAGS_item_size + VarintLength(internal_key_size) + internal_key_size; KeyHandle handle = table_->Allocate(encoded_len, &buf); assert(buf != nullptr); char* p = EncodeVarint32(buf, internal_key_size); auto key = key_gen_->Next(); EncodeFixed64(p, key); p += 8; EncodeFixed64(p, ++(*sequence_)); p += 8; Slice bytes = generator_.Generate(FLAGS_item_size); memcpy(p, bytes.data(), FLAGS_item_size); p += FLAGS_item_size; assert(p == buf + encoded_len); table_->Insert(handle); *bytes_written_ += encoded_len; } void operator()() override { for (unsigned int i = 0; i < num_ops_; ++i) { FillOne(); } } }; class ConcurrentFillBenchmarkThread : public FillBenchmarkThread { public: ConcurrentFillBenchmarkThread(MemTableRep* table, KeyGenerator* key_gen, uint64_t* bytes_written, uint64_t* bytes_read, uint64_t* sequence, uint64_t num_ops, uint64_t* read_hits, std::atomic_int* threads_done) : FillBenchmarkThread(table, key_gen, bytes_written, bytes_read, sequence, num_ops, read_hits) { threads_done_ = threads_done; } void operator()() override { // # of read threads will be total threads - write threads (always 1). Loop // while all reads complete. while ((*threads_done_).load() < (FLAGS_num_threads - 1)) { FillOne(); } } private: std::atomic_int* threads_done_; }; class ReadBenchmarkThread : public BenchmarkThread { public: ReadBenchmarkThread(MemTableRep* table, KeyGenerator* key_gen, uint64_t* bytes_written, uint64_t* bytes_read, uint64_t* sequence, uint64_t num_ops, uint64_t* read_hits) : BenchmarkThread(table, key_gen, bytes_written, bytes_read, sequence, num_ops, read_hits) {} static bool callback(void* arg, const char* entry) { CallbackVerifyArgs* callback_args = static_cast(arg); assert(callback_args != nullptr); uint32_t key_length; const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length); if ((callback_args->comparator) ->user_comparator() ->Equal(Slice(key_ptr, key_length - 8), callback_args->key->user_key())) { callback_args->found = true; } return false; } void ReadOne() { std::string user_key; auto key = key_gen_->Next(); PutFixed64(&user_key, key); LookupKey lookup_key(user_key, *sequence_); InternalKeyComparator internal_key_comp(BytewiseComparator()); CallbackVerifyArgs verify_args; verify_args.found = false; verify_args.key = &lookup_key; verify_args.table = table_; verify_args.comparator = &internal_key_comp; table_->Get(lookup_key, &verify_args, callback); if (verify_args.found) { *bytes_read_ += VarintLength(16) + 16 + FLAGS_item_size; ++*read_hits_; } } void operator()() override { for (unsigned int i = 0; i < num_ops_; ++i) { ReadOne(); } } }; class SeqReadBenchmarkThread : public BenchmarkThread { public: SeqReadBenchmarkThread(MemTableRep* table, KeyGenerator* key_gen, uint64_t* bytes_written, uint64_t* bytes_read, uint64_t* sequence, uint64_t num_ops, uint64_t* read_hits) : BenchmarkThread(table, key_gen, bytes_written, bytes_read, sequence, num_ops, read_hits) {} void ReadOneSeq() { std::unique_ptr iter(table_->GetIterator()); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { // pretend to read the value *bytes_read_ += VarintLength(16) + 16 + FLAGS_item_size; } ++*read_hits_; } void operator()() override { for (unsigned int i = 0; i < num_ops_; ++i) { { ReadOneSeq(); } } } }; class ConcurrentReadBenchmarkThread : public ReadBenchmarkThread { public: ConcurrentReadBenchmarkThread(MemTableRep* table, KeyGenerator* key_gen, uint64_t* bytes_written, uint64_t* bytes_read, uint64_t* sequence, uint64_t num_ops, uint64_t* read_hits, std::atomic_int* threads_done) : ReadBenchmarkThread(table, key_gen, bytes_written, bytes_read, sequence, num_ops, read_hits) { threads_done_ = threads_done; } void operator()() override { for (unsigned int i = 0; i < num_ops_; ++i) { ReadOne(); } ++*threads_done_; } private: std::atomic_int* threads_done_; }; class SeqConcurrentReadBenchmarkThread : public SeqReadBenchmarkThread { public: SeqConcurrentReadBenchmarkThread(MemTableRep* table, KeyGenerator* key_gen, uint64_t* bytes_written, uint64_t* bytes_read, uint64_t* sequence, uint64_t num_ops, uint64_t* read_hits, std::atomic_int* threads_done) : SeqReadBenchmarkThread(table, key_gen, bytes_written, bytes_read, sequence, num_ops, read_hits) { threads_done_ = threads_done; } void operator()() override { for (unsigned int i = 0; i < num_ops_; ++i) { ReadOneSeq(); } ++*threads_done_; } private: std::atomic_int* threads_done_; }; class Benchmark { public: explicit Benchmark(MemTableRep* table, KeyGenerator* key_gen, uint64_t* sequence, uint32_t num_threads) : table_(table), key_gen_(key_gen), sequence_(sequence), num_threads_(num_threads) {} virtual ~Benchmark() {} virtual void Run() { std::cout << "Number of threads: " << num_threads_ << std::endl; std::vector threads; uint64_t bytes_written = 0; uint64_t bytes_read = 0; uint64_t read_hits = 0; StopWatchNano timer(Env::Default(), true); RunThreads(&threads, &bytes_written, &bytes_read, true, &read_hits); auto elapsed_time = static_cast(timer.ElapsedNanos() / 1000); std::cout << "Elapsed time: " << static_cast(elapsed_time) << " us" << std::endl; if (bytes_written > 0) { auto MiB_written = static_cast(bytes_written) / (1 << 20); auto write_throughput = MiB_written / (elapsed_time / 1000000); std::cout << "Total bytes written: " << MiB_written << " MiB" << std::endl; std::cout << "Write throughput: " << write_throughput << " MiB/s" << std::endl; auto us_per_op = elapsed_time / num_write_ops_per_thread_; std::cout << "write us/op: " << us_per_op << std::endl; } if (bytes_read > 0) { auto MiB_read = static_cast(bytes_read) / (1 << 20); auto read_throughput = MiB_read / (elapsed_time / 1000000); std::cout << "Total bytes read: " << MiB_read << " MiB" << std::endl; std::cout << "Read throughput: " << read_throughput << " MiB/s" << std::endl; auto us_per_op = elapsed_time / num_read_ops_per_thread_; std::cout << "read us/op: " << us_per_op << std::endl; } } virtual void RunThreads(std::vector* threads, uint64_t* bytes_written, uint64_t* bytes_read, bool write, uint64_t* read_hits) = 0; protected: MemTableRep* table_; KeyGenerator* key_gen_; uint64_t* sequence_; uint64_t num_write_ops_per_thread_ = 0; uint64_t num_read_ops_per_thread_ = 0; const uint32_t num_threads_; }; class FillBenchmark : public Benchmark { public: explicit FillBenchmark(MemTableRep* table, KeyGenerator* key_gen, uint64_t* sequence) : Benchmark(table, key_gen, sequence, 1) { num_write_ops_per_thread_ = FLAGS_num_operations; } void RunThreads(std::vector* /*threads*/, uint64_t* bytes_written, uint64_t* bytes_read, bool /*write*/, uint64_t* read_hits) override { FillBenchmarkThread(table_, key_gen_, bytes_written, bytes_read, sequence_, num_write_ops_per_thread_, read_hits)(); } }; class ReadBenchmark : public Benchmark { public: explicit ReadBenchmark(MemTableRep* table, KeyGenerator* key_gen, uint64_t* sequence) : Benchmark(table, key_gen, sequence, FLAGS_num_threads) { num_read_ops_per_thread_ = FLAGS_num_operations / FLAGS_num_threads; } void RunThreads(std::vector* threads, uint64_t* bytes_written, uint64_t* bytes_read, bool /*write*/, uint64_t* read_hits) override { for (int i = 0; i < FLAGS_num_threads; ++i) { threads->emplace_back( ReadBenchmarkThread(table_, key_gen_, bytes_written, bytes_read, sequence_, num_read_ops_per_thread_, read_hits)); } for (auto& thread : *threads) { thread.join(); } std::cout << "read hit%: " << (static_cast(*read_hits) / FLAGS_num_operations) * 100 << std::endl; } }; class SeqReadBenchmark : public Benchmark { public: explicit SeqReadBenchmark(MemTableRep* table, uint64_t* sequence) : Benchmark(table, nullptr, sequence, FLAGS_num_threads) { num_read_ops_per_thread_ = FLAGS_num_scans; } void RunThreads(std::vector* threads, uint64_t* bytes_written, uint64_t* bytes_read, bool /*write*/, uint64_t* read_hits) override { for (int i = 0; i < FLAGS_num_threads; ++i) { threads->emplace_back(SeqReadBenchmarkThread( table_, key_gen_, bytes_written, bytes_read, sequence_, num_read_ops_per_thread_, read_hits)); } for (auto& thread : *threads) { thread.join(); } } }; template class ReadWriteBenchmark : public Benchmark { public: explicit ReadWriteBenchmark(MemTableRep* table, KeyGenerator* key_gen, uint64_t* sequence) : Benchmark(table, key_gen, sequence, FLAGS_num_threads) { num_read_ops_per_thread_ = FLAGS_num_threads <= 1 ? 0 : (FLAGS_num_operations / (FLAGS_num_threads - 1)); num_write_ops_per_thread_ = FLAGS_num_operations; } void RunThreads(std::vector* threads, uint64_t* bytes_written, uint64_t* bytes_read, bool /*write*/, uint64_t* read_hits) override { std::atomic_int threads_done; threads_done.store(0); threads->emplace_back(ConcurrentFillBenchmarkThread( table_, key_gen_, bytes_written, bytes_read, sequence_, num_write_ops_per_thread_, read_hits, &threads_done)); for (int i = 1; i < FLAGS_num_threads; ++i) { threads->emplace_back( ReadThreadType(table_, key_gen_, bytes_written, bytes_read, sequence_, num_read_ops_per_thread_, read_hits, &threads_done)); } for (auto& thread : *threads) { thread.join(); } } }; } // namespace ROCKSDB_NAMESPACE void PrintWarnings() { #if defined(__GNUC__) && !defined(__OPTIMIZE__) fprintf(stdout, "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"); #endif #ifndef NDEBUG fprintf(stdout, "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); #endif } int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + " [OPTIONS]..."); ParseCommandLineFlags(&argc, &argv, true); PrintWarnings(); ROCKSDB_NAMESPACE::Options options; std::unique_ptr factory; if (FLAGS_memtablerep == "skiplist") { factory.reset(new ROCKSDB_NAMESPACE::SkipListFactory); #ifndef ROCKSDB_LITE } else if (FLAGS_memtablerep == "vector") { factory.reset(new ROCKSDB_NAMESPACE::VectorRepFactory); } else if (FLAGS_memtablerep == "hashskiplist") { factory.reset(ROCKSDB_NAMESPACE::NewHashSkipListRepFactory( FLAGS_bucket_count, FLAGS_hashskiplist_height, FLAGS_hashskiplist_branching_factor)); options.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(FLAGS_prefix_length)); } else if (FLAGS_memtablerep == "hashlinklist") { factory.reset(ROCKSDB_NAMESPACE::NewHashLinkListRepFactory( FLAGS_bucket_count, FLAGS_huge_page_tlb_size, FLAGS_bucket_entries_logging_threshold, FLAGS_if_log_bucket_dist_when_flash, FLAGS_threshold_use_skiplist)); options.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(FLAGS_prefix_length)); #endif // ROCKSDB_LITE } else { fprintf(stdout, "Unknown memtablerep: %s\n", FLAGS_memtablerep.c_str()); exit(1); } ROCKSDB_NAMESPACE::InternalKeyComparator internal_key_comp( ROCKSDB_NAMESPACE::BytewiseComparator()); ROCKSDB_NAMESPACE::MemTable::KeyComparator key_comp(internal_key_comp); ROCKSDB_NAMESPACE::Arena arena; ROCKSDB_NAMESPACE::WriteBufferManager wb(FLAGS_write_buffer_size); uint64_t sequence; auto createMemtableRep = [&] { sequence = 0; return factory->CreateMemTableRep(key_comp, &arena, options.prefix_extractor.get(), options.info_log.get()); }; std::unique_ptr memtablerep; ROCKSDB_NAMESPACE::Random64 rng(FLAGS_seed); const char* benchmarks = FLAGS_benchmarks.c_str(); while (benchmarks != nullptr) { std::unique_ptr key_gen; const char* sep = strchr(benchmarks, ','); ROCKSDB_NAMESPACE::Slice name; if (sep == nullptr) { name = benchmarks; benchmarks = nullptr; } else { name = ROCKSDB_NAMESPACE::Slice(benchmarks, sep - benchmarks); benchmarks = sep + 1; } std::unique_ptr benchmark; if (name == ROCKSDB_NAMESPACE::Slice("fillseq")) { memtablerep.reset(createMemtableRep()); key_gen.reset(new ROCKSDB_NAMESPACE::KeyGenerator( &rng, ROCKSDB_NAMESPACE::SEQUENTIAL, FLAGS_num_operations)); benchmark.reset(new ROCKSDB_NAMESPACE::FillBenchmark( memtablerep.get(), key_gen.get(), &sequence)); } else if (name == ROCKSDB_NAMESPACE::Slice("fillrandom")) { memtablerep.reset(createMemtableRep()); key_gen.reset(new ROCKSDB_NAMESPACE::KeyGenerator( &rng, ROCKSDB_NAMESPACE::UNIQUE_RANDOM, FLAGS_num_operations)); benchmark.reset(new ROCKSDB_NAMESPACE::FillBenchmark( memtablerep.get(), key_gen.get(), &sequence)); } else if (name == ROCKSDB_NAMESPACE::Slice("readrandom")) { key_gen.reset(new ROCKSDB_NAMESPACE::KeyGenerator( &rng, ROCKSDB_NAMESPACE::RANDOM, FLAGS_num_operations)); benchmark.reset(new ROCKSDB_NAMESPACE::ReadBenchmark( memtablerep.get(), key_gen.get(), &sequence)); } else if (name == ROCKSDB_NAMESPACE::Slice("readseq")) { key_gen.reset(new ROCKSDB_NAMESPACE::KeyGenerator( &rng, ROCKSDB_NAMESPACE::SEQUENTIAL, FLAGS_num_operations)); benchmark.reset(new ROCKSDB_NAMESPACE::SeqReadBenchmark(memtablerep.get(), &sequence)); } else if (name == ROCKSDB_NAMESPACE::Slice("readwrite")) { memtablerep.reset(createMemtableRep()); key_gen.reset(new ROCKSDB_NAMESPACE::KeyGenerator( &rng, ROCKSDB_NAMESPACE::RANDOM, FLAGS_num_operations)); benchmark.reset(new ROCKSDB_NAMESPACE::ReadWriteBenchmark< ROCKSDB_NAMESPACE::ConcurrentReadBenchmarkThread>( memtablerep.get(), key_gen.get(), &sequence)); } else if (name == ROCKSDB_NAMESPACE::Slice("seqreadwrite")) { memtablerep.reset(createMemtableRep()); key_gen.reset(new ROCKSDB_NAMESPACE::KeyGenerator( &rng, ROCKSDB_NAMESPACE::RANDOM, FLAGS_num_operations)); benchmark.reset(new ROCKSDB_NAMESPACE::ReadWriteBenchmark< ROCKSDB_NAMESPACE::SeqConcurrentReadBenchmarkThread>( memtablerep.get(), key_gen.get(), &sequence)); } else { std::cout << "WARNING: skipping unknown benchmark '" << name.ToString() << std::endl; continue; } std::cout << "Running " << name.ToString() << std::endl; benchmark->Run(); } return 0; } #endif // GFLAGS rocksdb-6.11.4/memtable/skiplist.h000066400000000000000000000372351370372246700170600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Thread safety // ------------- // // Writes require external synchronization, most likely a mutex. // Reads require a guarantee that the SkipList will not be destroyed // while the read is in progress. Apart from that, reads progress // without any internal locking or synchronization. // // Invariants: // // (1) Allocated nodes are never deleted until the SkipList is // destroyed. This is trivially guaranteed by the code since we // never delete any skip list nodes. // // (2) The contents of a Node except for the next/prev pointers are // immutable after the Node has been linked into the SkipList. // Only Insert() modifies the list, and it is careful to initialize // a node and use release-stores to publish the nodes in one or // more lists. // // ... prev vs. next pointer ordering ... // #pragma once #include #include #include #include "memory/allocator.h" #include "port/port.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { template class SkipList { private: struct Node; public: // Create a new SkipList object that will use "cmp" for comparing keys, // and will allocate memory using "*allocator". Objects allocated in the // allocator must remain allocated for the lifetime of the skiplist object. explicit SkipList(Comparator cmp, Allocator* allocator, int32_t max_height = 12, int32_t branching_factor = 4); // No copying allowed SkipList(const SkipList&) = delete; void operator=(const SkipList&) = delete; // Insert key into the list. // REQUIRES: nothing that compares equal to key is currently in the list. void Insert(const Key& key); // Returns true iff an entry that compares equal to key is in the list. bool Contains(const Key& key) const; // Return estimated number of entries smaller than `key`. uint64_t EstimateCount(const Key& key) const; // Iteration over the contents of a skip list class Iterator { public: // Initialize an iterator over the specified list. // The returned iterator is not valid. explicit Iterator(const SkipList* list); // Change the underlying skiplist used for this iterator // This enables us not changing the iterator without deallocating // an old one and then allocating a new one void SetList(const SkipList* list); // Returns true iff the iterator is positioned at a valid node. bool Valid() const; // Returns the key at the current position. // REQUIRES: Valid() const Key& key() const; // Advances to the next position. // REQUIRES: Valid() void Next(); // Advances to the previous position. // REQUIRES: Valid() void Prev(); // Advance to the first entry with a key >= target void Seek(const Key& target); // Retreat to the last entry with a key <= target void SeekForPrev(const Key& target); // Position at the first entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToFirst(); // Position at the last entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToLast(); private: const SkipList* list_; Node* node_; // Intentionally copyable }; private: const uint16_t kMaxHeight_; const uint16_t kBranching_; const uint32_t kScaledInverseBranching_; // Immutable after construction Comparator const compare_; Allocator* const allocator_; // Allocator used for allocations of nodes Node* const head_; // Modified only by Insert(). Read racily by readers, but stale // values are ok. std::atomic max_height_; // Height of the entire list // Used for optimizing sequential insert patterns. Tricky. prev_[i] for // i up to max_height_ is the predecessor of prev_[0] and prev_height_ // is the height of prev_[0]. prev_[0] can only be equal to head before // insertion, in which case max_height_ and prev_height_ are 1. Node** prev_; int32_t prev_height_; inline int GetMaxHeight() const { return max_height_.load(std::memory_order_relaxed); } Node* NewNode(const Key& key, int height); int RandomHeight(); bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } bool LessThan(const Key& a, const Key& b) const { return (compare_(a, b) < 0); } // Return true if key is greater than the data stored in "n" bool KeyIsAfterNode(const Key& key, Node* n) const; // Returns the earliest node with a key >= key. // Return nullptr if there is no such node. Node* FindGreaterOrEqual(const Key& key) const; // Return the latest node with a key < key. // Return head_ if there is no such node. // Fills prev[level] with pointer to previous node at "level" for every // level in [0..max_height_-1], if prev is non-null. Node* FindLessThan(const Key& key, Node** prev = nullptr) const; // Return the last node in the list. // Return head_ if list is empty. Node* FindLast() const; }; // Implementation details follow template struct SkipList::Node { explicit Node(const Key& k) : key(k) { } Key const key; // Accessors/mutators for links. Wrapped in methods so we can // add the appropriate barriers as necessary. Node* Next(int n) { assert(n >= 0); // Use an 'acquire load' so that we observe a fully initialized // version of the returned Node. return (next_[n].load(std::memory_order_acquire)); } void SetNext(int n, Node* x) { assert(n >= 0); // Use a 'release store' so that anybody who reads through this // pointer observes a fully initialized version of the inserted node. next_[n].store(x, std::memory_order_release); } // No-barrier variants that can be safely used in a few locations. Node* NoBarrier_Next(int n) { assert(n >= 0); return next_[n].load(std::memory_order_relaxed); } void NoBarrier_SetNext(int n, Node* x) { assert(n >= 0); next_[n].store(x, std::memory_order_relaxed); } private: // Array of length equal to the node height. next_[0] is lowest level link. std::atomic next_[1]; }; template typename SkipList::Node* SkipList::NewNode(const Key& key, int height) { char* mem = allocator_->AllocateAligned( sizeof(Node) + sizeof(std::atomic) * (height - 1)); return new (mem) Node(key); } template inline SkipList::Iterator::Iterator(const SkipList* list) { SetList(list); } template inline void SkipList::Iterator::SetList(const SkipList* list) { list_ = list; node_ = nullptr; } template inline bool SkipList::Iterator::Valid() const { return node_ != nullptr; } template inline const Key& SkipList::Iterator::key() const { assert(Valid()); return node_->key; } template inline void SkipList::Iterator::Next() { assert(Valid()); node_ = node_->Next(0); } template inline void SkipList::Iterator::Prev() { // Instead of using explicit "prev" links, we just search for the // last node that falls before key. assert(Valid()); node_ = list_->FindLessThan(node_->key); if (node_ == list_->head_) { node_ = nullptr; } } template inline void SkipList::Iterator::Seek(const Key& target) { node_ = list_->FindGreaterOrEqual(target); } template inline void SkipList::Iterator::SeekForPrev( const Key& target) { Seek(target); if (!Valid()) { SeekToLast(); } while (Valid() && list_->LessThan(target, key())) { Prev(); } } template inline void SkipList::Iterator::SeekToFirst() { node_ = list_->head_->Next(0); } template inline void SkipList::Iterator::SeekToLast() { node_ = list_->FindLast(); if (node_ == list_->head_) { node_ = nullptr; } } template int SkipList::RandomHeight() { auto rnd = Random::GetTLSInstance(); // Increase height with probability 1 in kBranching int height = 1; while (height < kMaxHeight_ && rnd->Next() < kScaledInverseBranching_) { height++; } assert(height > 0); assert(height <= kMaxHeight_); return height; } template bool SkipList::KeyIsAfterNode(const Key& key, Node* n) const { // nullptr n is considered infinite return (n != nullptr) && (compare_(n->key, key) < 0); } template typename SkipList::Node* SkipList:: FindGreaterOrEqual(const Key& key) const { // Note: It looks like we could reduce duplication by implementing // this function as FindLessThan(key)->Next(0), but we wouldn't be able // to exit early on equality and the result wouldn't even be correct. // A concurrent insert might occur after FindLessThan(key) but before // we get a chance to call Next(0). Node* x = head_; int level = GetMaxHeight() - 1; Node* last_bigger = nullptr; while (true) { assert(x != nullptr); Node* next = x->Next(level); // Make sure the lists are sorted assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x)); // Make sure we haven't overshot during our search assert(x == head_ || KeyIsAfterNode(key, x)); int cmp = (next == nullptr || next == last_bigger) ? 1 : compare_(next->key, key); if (cmp == 0 || (cmp > 0 && level == 0)) { return next; } else if (cmp < 0) { // Keep searching in this list x = next; } else { // Switch to next list, reuse compare_() result last_bigger = next; level--; } } } template typename SkipList::Node* SkipList::FindLessThan(const Key& key, Node** prev) const { Node* x = head_; int level = GetMaxHeight() - 1; // KeyIsAfter(key, last_not_after) is definitely false Node* last_not_after = nullptr; while (true) { assert(x != nullptr); Node* next = x->Next(level); assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x)); assert(x == head_ || KeyIsAfterNode(key, x)); if (next != last_not_after && KeyIsAfterNode(key, next)) { // Keep searching in this list x = next; } else { if (prev != nullptr) { prev[level] = x; } if (level == 0) { return x; } else { // Switch to next list, reuse KeyIUsAfterNode() result last_not_after = next; level--; } } } } template typename SkipList::Node* SkipList::FindLast() const { Node* x = head_; int level = GetMaxHeight() - 1; while (true) { Node* next = x->Next(level); if (next == nullptr) { if (level == 0) { return x; } else { // Switch to next list level--; } } else { x = next; } } } template uint64_t SkipList::EstimateCount(const Key& key) const { uint64_t count = 0; Node* x = head_; int level = GetMaxHeight() - 1; while (true) { assert(x == head_ || compare_(x->key, key) < 0); Node* next = x->Next(level); if (next == nullptr || compare_(next->key, key) >= 0) { if (level == 0) { return count; } else { // Switch to next list count *= kBranching_; level--; } } else { x = next; count++; } } } template SkipList::SkipList(const Comparator cmp, Allocator* allocator, int32_t max_height, int32_t branching_factor) : kMaxHeight_(static_cast(max_height)), kBranching_(static_cast(branching_factor)), kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_), compare_(cmp), allocator_(allocator), head_(NewNode(0 /* any key will do */, max_height)), max_height_(1), prev_height_(1) { assert(max_height > 0 && kMaxHeight_ == static_cast(max_height)); assert(branching_factor > 0 && kBranching_ == static_cast(branching_factor)); assert(kScaledInverseBranching_ > 0); // Allocate the prev_ Node* array, directly from the passed-in allocator. // prev_ does not need to be freed, as its life cycle is tied up with // the allocator as a whole. prev_ = reinterpret_cast( allocator_->AllocateAligned(sizeof(Node*) * kMaxHeight_)); for (int i = 0; i < kMaxHeight_; i++) { head_->SetNext(i, nullptr); prev_[i] = head_; } } template void SkipList::Insert(const Key& key) { // fast path for sequential insertion if (!KeyIsAfterNode(key, prev_[0]->NoBarrier_Next(0)) && (prev_[0] == head_ || KeyIsAfterNode(key, prev_[0]))) { assert(prev_[0] != head_ || (prev_height_ == 1 && GetMaxHeight() == 1)); // Outside of this method prev_[1..max_height_] is the predecessor // of prev_[0], and prev_height_ refers to prev_[0]. Inside Insert // prev_[0..max_height - 1] is the predecessor of key. Switch from // the external state to the internal for (int i = 1; i < prev_height_; i++) { prev_[i] = prev_[0]; } } else { // TODO(opt): we could use a NoBarrier predecessor search as an // optimization for architectures where memory_order_acquire needs // a synchronization instruction. Doesn't matter on x86 FindLessThan(key, prev_); } // Our data structure does not allow duplicate insertion assert(prev_[0]->Next(0) == nullptr || !Equal(key, prev_[0]->Next(0)->key)); int height = RandomHeight(); if (height > GetMaxHeight()) { for (int i = GetMaxHeight(); i < height; i++) { prev_[i] = head_; } //fprintf(stderr, "Change height from %d to %d\n", max_height_, height); // It is ok to mutate max_height_ without any synchronization // with concurrent readers. A concurrent reader that observes // the new value of max_height_ will see either the old value of // new level pointers from head_ (nullptr), or a new value set in // the loop below. In the former case the reader will // immediately drop to the next level since nullptr sorts after all // keys. In the latter case the reader will use the new node. max_height_.store(height, std::memory_order_relaxed); } Node* x = NewNode(key, height); for (int i = 0; i < height; i++) { // NoBarrier_SetNext() suffices since we will add a barrier when // we publish a pointer to "x" in prev[i]. x->NoBarrier_SetNext(i, prev_[i]->NoBarrier_Next(i)); prev_[i]->SetNext(i, x); } prev_[0] = x; prev_height_ = height; } template bool SkipList::Contains(const Key& key) const { Node* x = FindGreaterOrEqual(key); if (x != nullptr && Equal(key, x->key)) { return true; } else { return false; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memtable/skiplist_test.cc000066400000000000000000000243711370372246700202520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "memtable/skiplist.h" #include #include "memory/arena.h" #include "rocksdb/env.h" #include "test_util/testharness.h" #include "util/hash.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { typedef uint64_t Key; struct TestComparator { int operator()(const Key& a, const Key& b) const { if (a < b) { return -1; } else if (a > b) { return +1; } else { return 0; } } }; class SkipTest : public testing::Test {}; TEST_F(SkipTest, Empty) { Arena arena; TestComparator cmp; SkipList list(cmp, &arena); ASSERT_TRUE(!list.Contains(10)); SkipList::Iterator iter(&list); ASSERT_TRUE(!iter.Valid()); iter.SeekToFirst(); ASSERT_TRUE(!iter.Valid()); iter.Seek(100); ASSERT_TRUE(!iter.Valid()); iter.SeekForPrev(100); ASSERT_TRUE(!iter.Valid()); iter.SeekToLast(); ASSERT_TRUE(!iter.Valid()); } TEST_F(SkipTest, InsertAndLookup) { const int N = 2000; const int R = 5000; Random rnd(1000); std::set keys; Arena arena; TestComparator cmp; SkipList list(cmp, &arena); for (int i = 0; i < N; i++) { Key key = rnd.Next() % R; if (keys.insert(key).second) { list.Insert(key); } } for (int i = 0; i < R; i++) { if (list.Contains(i)) { ASSERT_EQ(keys.count(i), 1U); } else { ASSERT_EQ(keys.count(i), 0U); } } // Simple iterator tests { SkipList::Iterator iter(&list); ASSERT_TRUE(!iter.Valid()); iter.Seek(0); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.begin()), iter.key()); iter.SeekForPrev(R - 1); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.rbegin()), iter.key()); iter.SeekToFirst(); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.begin()), iter.key()); iter.SeekToLast(); ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*(keys.rbegin()), iter.key()); } // Forward iteration test for (int i = 0; i < R; i++) { SkipList::Iterator iter(&list); iter.Seek(i); // Compare against model iterator std::set::iterator model_iter = keys.lower_bound(i); for (int j = 0; j < 3; j++) { if (model_iter == keys.end()) { ASSERT_TRUE(!iter.Valid()); break; } else { ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*model_iter, iter.key()); ++model_iter; iter.Next(); } } } // Backward iteration test for (int i = 0; i < R; i++) { SkipList::Iterator iter(&list); iter.SeekForPrev(i); // Compare against model iterator std::set::iterator model_iter = keys.upper_bound(i); for (int j = 0; j < 3; j++) { if (model_iter == keys.begin()) { ASSERT_TRUE(!iter.Valid()); break; } else { ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*--model_iter, iter.key()); iter.Prev(); } } } } // We want to make sure that with a single writer and multiple // concurrent readers (with no synchronization other than when a // reader's iterator is created), the reader always observes all the // data that was present in the skip list when the iterator was // constructor. Because insertions are happening concurrently, we may // also observe new values that were inserted since the iterator was // constructed, but we should never miss any values that were present // at iterator construction time. // // We generate multi-part keys: // // where: // key is in range [0..K-1] // gen is a generation number for key // hash is hash(key,gen) // // The insertion code picks a random key, sets gen to be 1 + the last // generation number inserted for that key, and sets hash to Hash(key,gen). // // At the beginning of a read, we snapshot the last inserted // generation number for each key. We then iterate, including random // calls to Next() and Seek(). For every key we encounter, we // check that it is either expected given the initial snapshot or has // been concurrently added since the iterator started. class ConcurrentTest { private: static const uint32_t K = 4; static uint64_t key(Key key) { return (key >> 40); } static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } static uint64_t hash(Key key) { return key & 0xff; } static uint64_t HashNumbers(uint64_t k, uint64_t g) { uint64_t data[2] = { k, g }; return Hash(reinterpret_cast(data), sizeof(data), 0); } static Key MakeKey(uint64_t k, uint64_t g) { assert(sizeof(Key) == sizeof(uint64_t)); assert(k <= K); // We sometimes pass K to seek to the end of the skiplist assert(g <= 0xffffffffu); return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff)); } static bool IsValidKey(Key k) { return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); } static Key RandomTarget(Random* rnd) { switch (rnd->Next() % 10) { case 0: // Seek to beginning return MakeKey(0, 0); case 1: // Seek to end return MakeKey(K, 0); default: // Seek to middle return MakeKey(rnd->Next() % K, 0); } } // Per-key generation struct State { std::atomic generation[K]; void Set(int k, int v) { generation[k].store(v, std::memory_order_release); } int Get(int k) { return generation[k].load(std::memory_order_acquire); } State() { for (unsigned int k = 0; k < K; k++) { Set(k, 0); } } }; // Current state of the test State current_; Arena arena_; // SkipList is not protected by mu_. We just use a single writer // thread to modify it. SkipList list_; public: ConcurrentTest() : list_(TestComparator(), &arena_) {} // REQUIRES: External synchronization void WriteStep(Random* rnd) { const uint32_t k = rnd->Next() % K; const int g = current_.Get(k) + 1; const Key new_key = MakeKey(k, g); list_.Insert(new_key); current_.Set(k, g); } void ReadStep(Random* rnd) { // Remember the initial committed state of the skiplist. State initial_state; for (unsigned int k = 0; k < K; k++) { initial_state.Set(k, current_.Get(k)); } Key pos = RandomTarget(rnd); SkipList::Iterator iter(&list_); iter.Seek(pos); while (true) { Key current; if (!iter.Valid()) { current = MakeKey(K, 0); } else { current = iter.key(); ASSERT_TRUE(IsValidKey(current)) << current; } ASSERT_LE(pos, current) << "should not go backwards"; // Verify that everything in [pos,current) was not present in // initial_state. while (pos < current) { ASSERT_LT(key(pos), K) << pos; // Note that generation 0 is never inserted, so it is ok if // <*,0,*> is missing. ASSERT_TRUE((gen(pos) == 0U) || (gen(pos) > static_cast(initial_state.Get( static_cast(key(pos)))))) << "key: " << key(pos) << "; gen: " << gen(pos) << "; initgen: " << initial_state.Get(static_cast(key(pos))); // Advance to next key in the valid key space if (key(pos) < key(current)) { pos = MakeKey(key(pos) + 1, 0); } else { pos = MakeKey(key(pos), gen(pos) + 1); } } if (!iter.Valid()) { break; } if (rnd->Next() % 2) { iter.Next(); pos = MakeKey(key(pos), gen(pos) + 1); } else { Key new_target = RandomTarget(rnd); if (new_target > pos) { pos = new_target; iter.Seek(new_target); } } } } }; const uint32_t ConcurrentTest::K; // Simple test that does single-threaded testing of the ConcurrentTest // scaffolding. TEST_F(SkipTest, ConcurrentWithoutThreads) { ConcurrentTest test; Random rnd(test::RandomSeed()); for (int i = 0; i < 10000; i++) { test.ReadStep(&rnd); test.WriteStep(&rnd); } } class TestState { public: ConcurrentTest t_; int seed_; std::atomic quit_flag_; enum ReaderState { STARTING, RUNNING, DONE }; explicit TestState(int s) : seed_(s), quit_flag_(false), state_(STARTING), state_cv_(&mu_) {} void Wait(ReaderState s) { mu_.Lock(); while (state_ != s) { state_cv_.Wait(); } mu_.Unlock(); } void Change(ReaderState s) { mu_.Lock(); state_ = s; state_cv_.Signal(); mu_.Unlock(); } private: port::Mutex mu_; ReaderState state_; port::CondVar state_cv_; }; static void ConcurrentReader(void* arg) { TestState* state = reinterpret_cast(arg); Random rnd(state->seed_); int64_t reads = 0; state->Change(TestState::RUNNING); while (!state->quit_flag_.load(std::memory_order_acquire)) { state->t_.ReadStep(&rnd); ++reads; } state->Change(TestState::DONE); } static void RunConcurrent(int run) { const int seed = test::RandomSeed() + (run * 100); Random rnd(seed); const int N = 1000; const int kSize = 1000; for (int i = 0; i < N; i++) { if ((i % 100) == 0) { fprintf(stderr, "Run %d of %d\n", i, N); } TestState state(seed + 1); Env::Default()->SetBackgroundThreads(1); Env::Default()->Schedule(ConcurrentReader, &state); state.Wait(TestState::RUNNING); for (int k = 0; k < kSize; k++) { state.t_.WriteStep(&rnd); } state.quit_flag_.store(true, std::memory_order_release); state.Wait(TestState::DONE); } } TEST_F(SkipTest, Concurrent1) { RunConcurrent(1); } TEST_F(SkipTest, Concurrent2) { RunConcurrent(2); } TEST_F(SkipTest, Concurrent3) { RunConcurrent(3); } TEST_F(SkipTest, Concurrent4) { RunConcurrent(4); } TEST_F(SkipTest, Concurrent5) { RunConcurrent(5); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/memtable/skiplistrep.cc000066400000000000000000000215701370372246700177200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "db/memtable.h" #include "memory/arena.h" #include "memtable/inlineskiplist.h" #include "rocksdb/memtablerep.h" namespace ROCKSDB_NAMESPACE { namespace { class SkipListRep : public MemTableRep { InlineSkipList skip_list_; const MemTableRep::KeyComparator& cmp_; const SliceTransform* transform_; const size_t lookahead_; friend class LookaheadIterator; public: explicit SkipListRep(const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, const size_t lookahead) : MemTableRep(allocator), skip_list_(compare, allocator), cmp_(compare), transform_(transform), lookahead_(lookahead) {} KeyHandle Allocate(const size_t len, char** buf) override { *buf = skip_list_.AllocateKey(len); return static_cast(*buf); } // Insert key into the list. // REQUIRES: nothing that compares equal to key is currently in the list. void Insert(KeyHandle handle) override { skip_list_.Insert(static_cast(handle)); } bool InsertKey(KeyHandle handle) override { return skip_list_.Insert(static_cast(handle)); } void InsertWithHint(KeyHandle handle, void** hint) override { skip_list_.InsertWithHint(static_cast(handle), hint); } bool InsertKeyWithHint(KeyHandle handle, void** hint) override { return skip_list_.InsertWithHint(static_cast(handle), hint); } void InsertWithHintConcurrently(KeyHandle handle, void** hint) override { skip_list_.InsertWithHintConcurrently(static_cast(handle), hint); } bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) override { return skip_list_.InsertWithHintConcurrently(static_cast(handle), hint); } void InsertConcurrently(KeyHandle handle) override { skip_list_.InsertConcurrently(static_cast(handle)); } bool InsertKeyConcurrently(KeyHandle handle) override { return skip_list_.InsertConcurrently(static_cast(handle)); } // Returns true iff an entry that compares equal to key is in the list. bool Contains(const char* key) const override { return skip_list_.Contains(key); } size_t ApproximateMemoryUsage() override { // All memory is allocated through allocator; nothing to report here return 0; } void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override { SkipListRep::Iterator iter(&skip_list_); Slice dummy_slice; for (iter.Seek(dummy_slice, k.memtable_key().data()); iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) { } } uint64_t ApproximateNumEntries(const Slice& start_ikey, const Slice& end_ikey) override { std::string tmp; uint64_t start_count = skip_list_.EstimateCount(EncodeKey(&tmp, start_ikey)); uint64_t end_count = skip_list_.EstimateCount(EncodeKey(&tmp, end_ikey)); return (end_count >= start_count) ? (end_count - start_count) : 0; } ~SkipListRep() override {} // Iteration over the contents of a skip list class Iterator : public MemTableRep::Iterator { InlineSkipList::Iterator iter_; public: // Initialize an iterator over the specified list. // The returned iterator is not valid. explicit Iterator( const InlineSkipList* list) : iter_(list) {} ~Iterator() override {} // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return iter_.Valid(); } // Returns the key at the current position. // REQUIRES: Valid() const char* key() const override { return iter_.key(); } // Advances to the next position. // REQUIRES: Valid() void Next() override { iter_.Next(); } // Advances to the previous position. // REQUIRES: Valid() void Prev() override { iter_.Prev(); } // Advance to the first entry with a key >= target void Seek(const Slice& user_key, const char* memtable_key) override { if (memtable_key != nullptr) { iter_.Seek(memtable_key); } else { iter_.Seek(EncodeKey(&tmp_, user_key)); } } // Retreat to the last entry with a key <= target void SeekForPrev(const Slice& user_key, const char* memtable_key) override { if (memtable_key != nullptr) { iter_.SeekForPrev(memtable_key); } else { iter_.SeekForPrev(EncodeKey(&tmp_, user_key)); } } // Position at the first entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToFirst() override { iter_.SeekToFirst(); } // Position at the last entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToLast() override { iter_.SeekToLast(); } protected: std::string tmp_; // For passing to EncodeKey }; // Iterator over the contents of a skip list which also keeps track of the // previously visited node. In Seek(), it examines a few nodes after it // first, falling back to O(log n) search from the head of the list only if // the target key hasn't been found. class LookaheadIterator : public MemTableRep::Iterator { public: explicit LookaheadIterator(const SkipListRep& rep) : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {} ~LookaheadIterator() override {} bool Valid() const override { return iter_.Valid(); } const char* key() const override { assert(Valid()); return iter_.key(); } void Next() override { assert(Valid()); bool advance_prev = true; if (prev_.Valid()) { auto k1 = rep_.UserKey(prev_.key()); auto k2 = rep_.UserKey(iter_.key()); if (k1.compare(k2) == 0) { // same user key, don't move prev_ advance_prev = false; } else if (rep_.transform_) { // only advance prev_ if it has the same prefix as iter_ auto t1 = rep_.transform_->Transform(k1); auto t2 = rep_.transform_->Transform(k2); advance_prev = t1.compare(t2) == 0; } } if (advance_prev) { prev_ = iter_; } iter_.Next(); } void Prev() override { assert(Valid()); iter_.Prev(); prev_ = iter_; } void Seek(const Slice& internal_key, const char* memtable_key) override { const char *encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, internal_key); if (prev_.Valid() && rep_.cmp_(encoded_key, prev_.key()) >= 0) { // prev_.key() is smaller or equal to our target key; do a quick // linear search (at most lookahead_ steps) starting from prev_ iter_ = prev_; size_t cur = 0; while (cur++ <= rep_.lookahead_ && iter_.Valid()) { if (rep_.cmp_(encoded_key, iter_.key()) <= 0) { return; } Next(); } } iter_.Seek(encoded_key); prev_ = iter_; } void SeekForPrev(const Slice& internal_key, const char* memtable_key) override { const char* encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, internal_key); iter_.SeekForPrev(encoded_key); prev_ = iter_; } void SeekToFirst() override { iter_.SeekToFirst(); prev_ = iter_; } void SeekToLast() override { iter_.SeekToLast(); prev_ = iter_; } protected: std::string tmp_; // For passing to EncodeKey private: const SkipListRep& rep_; InlineSkipList::Iterator iter_; InlineSkipList::Iterator prev_; }; MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override { if (lookahead_ > 0) { void *mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::LookaheadIterator)) : operator new(sizeof(SkipListRep::LookaheadIterator)); return new (mem) SkipListRep::LookaheadIterator(*this); } else { void *mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::Iterator)) : operator new(sizeof(SkipListRep::Iterator)); return new (mem) SkipListRep::Iterator(&skip_list_); } } }; } MemTableRep* SkipListFactory::CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform* transform, Logger* /*logger*/) { return new SkipListRep(compare, allocator, transform, lookahead_); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memtable/stl_wrappers.h000066400000000000000000000016021370372246700177300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/comparator.h" #include "rocksdb/memtablerep.h" #include "rocksdb/slice.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { namespace stl_wrappers { class Base { protected: const MemTableRep::KeyComparator& compare_; explicit Base(const MemTableRep::KeyComparator& compare) : compare_(compare) {} }; struct Compare : private Base { explicit Compare(const MemTableRep::KeyComparator& compare) : Base(compare) {} inline bool operator()(const char* a, const char* b) const { return compare_(a, b) < 0; } }; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memtable/vectorrep.cc000066400000000000000000000214311370372246700173540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #ifndef ROCKSDB_LITE #include "rocksdb/memtablerep.h" #include #include #include #include #include #include "db/memtable.h" #include "memory/arena.h" #include "memtable/stl_wrappers.h" #include "port/port.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { namespace { using namespace stl_wrappers; class VectorRep : public MemTableRep { public: VectorRep(const KeyComparator& compare, Allocator* allocator, size_t count); // Insert key into the collection. (The caller will pack key and value into a // single buffer and pass that in as the parameter to Insert) // REQUIRES: nothing that compares equal to key is currently in the // collection. void Insert(KeyHandle handle) override; // Returns true iff an entry that compares equal to key is in the collection. bool Contains(const char* key) const override; void MarkReadOnly() override; size_t ApproximateMemoryUsage() override; void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override; ~VectorRep() override {} class Iterator : public MemTableRep::Iterator { class VectorRep* vrep_; std::shared_ptr> bucket_; std::vector::const_iterator mutable cit_; const KeyComparator& compare_; std::string tmp_; // For passing to EncodeKey bool mutable sorted_; void DoSort() const; public: explicit Iterator(class VectorRep* vrep, std::shared_ptr> bucket, const KeyComparator& compare); // Initialize an iterator over the specified collection. // The returned iterator is not valid. // explicit Iterator(const MemTableRep* collection); ~Iterator() override{}; // Returns true iff the iterator is positioned at a valid node. bool Valid() const override; // Returns the key at the current position. // REQUIRES: Valid() const char* key() const override; // Advances to the next position. // REQUIRES: Valid() void Next() override; // Advances to the previous position. // REQUIRES: Valid() void Prev() override; // Advance to the first entry with a key >= target void Seek(const Slice& user_key, const char* memtable_key) override; // Advance to the first entry with a key <= target void SeekForPrev(const Slice& user_key, const char* memtable_key) override; // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToFirst() override; // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void SeekToLast() override; }; // Return an iterator over the keys in this representation. MemTableRep::Iterator* GetIterator(Arena* arena) override; private: friend class Iterator; typedef std::vector Bucket; std::shared_ptr bucket_; mutable port::RWMutex rwlock_; bool immutable_; bool sorted_; const KeyComparator& compare_; }; void VectorRep::Insert(KeyHandle handle) { auto* key = static_cast(handle); WriteLock l(&rwlock_); assert(!immutable_); bucket_->push_back(key); } // Returns true iff an entry that compares equal to key is in the collection. bool VectorRep::Contains(const char* key) const { ReadLock l(&rwlock_); return std::find(bucket_->begin(), bucket_->end(), key) != bucket_->end(); } void VectorRep::MarkReadOnly() { WriteLock l(&rwlock_); immutable_ = true; } size_t VectorRep::ApproximateMemoryUsage() { return sizeof(bucket_) + sizeof(*bucket_) + bucket_->size() * sizeof( std::remove_reference::type::value_type ); } VectorRep::VectorRep(const KeyComparator& compare, Allocator* allocator, size_t count) : MemTableRep(allocator), bucket_(new Bucket()), immutable_(false), sorted_(false), compare_(compare) { bucket_.get()->reserve(count); } VectorRep::Iterator::Iterator(class VectorRep* vrep, std::shared_ptr> bucket, const KeyComparator& compare) : vrep_(vrep), bucket_(bucket), cit_(bucket_->end()), compare_(compare), sorted_(false) { } void VectorRep::Iterator::DoSort() const { // vrep is non-null means that we are working on an immutable memtable if (!sorted_ && vrep_ != nullptr) { WriteLock l(&vrep_->rwlock_); if (!vrep_->sorted_) { std::sort(bucket_->begin(), bucket_->end(), Compare(compare_)); cit_ = bucket_->begin(); vrep_->sorted_ = true; } sorted_ = true; } if (!sorted_) { std::sort(bucket_->begin(), bucket_->end(), Compare(compare_)); cit_ = bucket_->begin(); sorted_ = true; } assert(sorted_); assert(vrep_ == nullptr || vrep_->sorted_); } // Returns true iff the iterator is positioned at a valid node. bool VectorRep::Iterator::Valid() const { DoSort(); return cit_ != bucket_->end(); } // Returns the key at the current position. // REQUIRES: Valid() const char* VectorRep::Iterator::key() const { assert(sorted_); return *cit_; } // Advances to the next position. // REQUIRES: Valid() void VectorRep::Iterator::Next() { assert(sorted_); if (cit_ == bucket_->end()) { return; } ++cit_; } // Advances to the previous position. // REQUIRES: Valid() void VectorRep::Iterator::Prev() { assert(sorted_); if (cit_ == bucket_->begin()) { // If you try to go back from the first element, the iterator should be // invalidated. So we set it to past-the-end. This means that you can // treat the container circularly. cit_ = bucket_->end(); } else { --cit_; } } // Advance to the first entry with a key >= target void VectorRep::Iterator::Seek(const Slice& user_key, const char* memtable_key) { DoSort(); // Do binary search to find first value not less than the target const char* encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key); cit_ = std::equal_range(bucket_->begin(), bucket_->end(), encoded_key, [this] (const char* a, const char* b) { return compare_(a, b) < 0; }).first; } // Advance to the first entry with a key <= target void VectorRep::Iterator::SeekForPrev(const Slice& /*user_key*/, const char* /*memtable_key*/) { assert(false); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. void VectorRep::Iterator::SeekToFirst() { DoSort(); cit_ = bucket_->begin(); } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. void VectorRep::Iterator::SeekToLast() { DoSort(); cit_ = bucket_->end(); if (bucket_->size() != 0) { --cit_; } } void VectorRep::Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) { rwlock_.ReadLock(); VectorRep* vector_rep; std::shared_ptr bucket; if (immutable_) { vector_rep = this; } else { vector_rep = nullptr; bucket.reset(new Bucket(*bucket_)); // make a copy } VectorRep::Iterator iter(vector_rep, immutable_ ? bucket_ : bucket, compare_); rwlock_.ReadUnlock(); for (iter.Seek(k.user_key(), k.memtable_key().data()); iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) { } } MemTableRep::Iterator* VectorRep::GetIterator(Arena* arena) { char* mem = nullptr; if (arena != nullptr) { mem = arena->AllocateAligned(sizeof(Iterator)); } ReadLock l(&rwlock_); // Do not sort here. The sorting would be done the first time // a Seek is performed on the iterator. if (immutable_) { if (arena == nullptr) { return new Iterator(this, bucket_, compare_); } else { return new (mem) Iterator(this, bucket_, compare_); } } else { std::shared_ptr tmp; tmp.reset(new Bucket(*bucket_)); // make a copy if (arena == nullptr) { return new Iterator(nullptr, tmp, compare_); } else { return new (mem) Iterator(nullptr, tmp, compare_); } } } } // anon namespace MemTableRep* VectorRepFactory::CreateMemTableRep( const MemTableRep::KeyComparator& compare, Allocator* allocator, const SliceTransform*, Logger* /*logger*/) { return new VectorRep(compare, allocator, count_); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/memtable/write_buffer_manager.cc000066400000000000000000000125741370372246700215300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/write_buffer_manager.h" #include #include "util/coding.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE namespace { const size_t kSizeDummyEntry = 256 * 1024; // The key will be longer than keys for blocks in SST files so they won't // conflict. const size_t kCacheKeyPrefix = kMaxVarint64Length * 4 + 1; } // namespace struct WriteBufferManager::CacheRep { std::shared_ptr cache_; std::mutex cache_mutex_; std::atomic cache_allocated_size_; // The non-prefix part will be updated according to the ID to use. char cache_key_[kCacheKeyPrefix + kMaxVarint64Length]; uint64_t next_cache_key_id_ = 0; std::vector dummy_handles_; explicit CacheRep(std::shared_ptr cache) : cache_(cache), cache_allocated_size_(0) { memset(cache_key_, 0, kCacheKeyPrefix); size_t pointer_size = sizeof(const void*); assert(pointer_size <= kCacheKeyPrefix); memcpy(cache_key_, static_cast(this), pointer_size); } Slice GetNextCacheKey() { memset(cache_key_ + kCacheKeyPrefix, 0, kMaxVarint64Length); char* end = EncodeVarint64(cache_key_ + kCacheKeyPrefix, next_cache_key_id_++); return Slice(cache_key_, static_cast(end - cache_key_)); } }; #else struct WriteBufferManager::CacheRep {}; #endif // ROCKSDB_LITE WriteBufferManager::WriteBufferManager(size_t _buffer_size, std::shared_ptr cache) : buffer_size_(_buffer_size), mutable_limit_(buffer_size_ * 7 / 8), memory_used_(0), memory_active_(0), cache_rep_(nullptr) { #ifndef ROCKSDB_LITE if (cache) { // Construct the cache key using the pointer to this. cache_rep_.reset(new CacheRep(cache)); } #else (void)cache; #endif // ROCKSDB_LITE } WriteBufferManager::~WriteBufferManager() { #ifndef ROCKSDB_LITE if (cache_rep_) { for (auto* handle : cache_rep_->dummy_handles_) { if (handle != nullptr) { cache_rep_->cache_->Release(handle, true); } } } #endif // ROCKSDB_LITE } // Should only be called from write thread void WriteBufferManager::ReserveMemWithCache(size_t mem) { #ifndef ROCKSDB_LITE assert(cache_rep_ != nullptr); // Use a mutex to protect various data structures. Can be optimized to a // lock-free solution if it ends up with a performance bottleneck. std::lock_guard lock(cache_rep_->cache_mutex_); size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) + mem; memory_used_.store(new_mem_used, std::memory_order_relaxed); while (new_mem_used > cache_rep_->cache_allocated_size_) { // Expand size by at least 256KB. // Add a dummy record to the cache Cache::Handle* handle = nullptr; cache_rep_->cache_->Insert(cache_rep_->GetNextCacheKey(), nullptr, kSizeDummyEntry, nullptr, &handle); // We keep the handle even if insertion fails and a null handle is // returned, so that when memory shrinks, we don't release extra // entries from cache. // Ideallly we should prevent this allocation from happening if // this insertion fails. However, the callers to this code path // are not able to handle failures properly. We'll need to improve // it in the future. cache_rep_->dummy_handles_.push_back(handle); cache_rep_->cache_allocated_size_ += kSizeDummyEntry; } #else (void)mem; #endif // ROCKSDB_LITE } void WriteBufferManager::FreeMemWithCache(size_t mem) { #ifndef ROCKSDB_LITE assert(cache_rep_ != nullptr); // Use a mutex to protect various data structures. Can be optimized to a // lock-free solution if it ends up with a performance bottleneck. std::lock_guard lock(cache_rep_->cache_mutex_); size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) - mem; memory_used_.store(new_mem_used, std::memory_order_relaxed); // Gradually shrink memory costed in the block cache if the actual // usage is less than 3/4 of what we reserve from the block cache. // We do this because: // 1. we don't pay the cost of the block cache immediately a memtable is // freed, as block cache insert is expensive; // 2. eventually, if we walk away from a temporary memtable size increase, // we make sure shrink the memory costed in block cache over time. // In this way, we only shrink costed memory showly even there is enough // margin. if (new_mem_used < cache_rep_->cache_allocated_size_ / 4 * 3 && cache_rep_->cache_allocated_size_ - kSizeDummyEntry > new_mem_used) { assert(!cache_rep_->dummy_handles_.empty()); auto* handle = cache_rep_->dummy_handles_.back(); // If insert failed, handle is null so we should not release. if (handle != nullptr) { cache_rep_->cache_->Release(handle, true); } cache_rep_->dummy_handles_.pop_back(); cache_rep_->cache_allocated_size_ -= kSizeDummyEntry; } #else (void)mem; #endif // ROCKSDB_LITE } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/memtable/write_buffer_manager_test.cc000066400000000000000000000145761370372246700225730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/write_buffer_manager.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class WriteBufferManagerTest : public testing::Test {}; #ifndef ROCKSDB_LITE TEST_F(WriteBufferManagerTest, ShouldFlush) { // A write buffer manager of size 10MB std::unique_ptr wbf( new WriteBufferManager(10 * 1024 * 1024)); wbf->ReserveMem(8 * 1024 * 1024); ASSERT_FALSE(wbf->ShouldFlush()); // 90% of the hard limit will hit the condition wbf->ReserveMem(1 * 1024 * 1024); ASSERT_TRUE(wbf->ShouldFlush()); // Scheduling for freeing will release the condition wbf->ScheduleFreeMem(1 * 1024 * 1024); ASSERT_FALSE(wbf->ShouldFlush()); wbf->ReserveMem(2 * 1024 * 1024); ASSERT_TRUE(wbf->ShouldFlush()); wbf->ScheduleFreeMem(4 * 1024 * 1024); // 11MB total, 6MB mutable. hard limit still hit ASSERT_TRUE(wbf->ShouldFlush()); wbf->ScheduleFreeMem(2 * 1024 * 1024); // 11MB total, 4MB mutable. hard limit stills but won't flush because more // than half data is already being flushed. ASSERT_FALSE(wbf->ShouldFlush()); wbf->ReserveMem(4 * 1024 * 1024); // 15 MB total, 8MB mutable. ASSERT_TRUE(wbf->ShouldFlush()); wbf->FreeMem(7 * 1024 * 1024); // 9MB total, 8MB mutable. ASSERT_FALSE(wbf->ShouldFlush()); } TEST_F(WriteBufferManagerTest, CacheCost) { LRUCacheOptions co; // 1GB cache co.capacity = 1024 * 1024 * 1024; co.num_shard_bits = 4; co.metadata_charge_policy = kDontChargeCacheMetadata; std::shared_ptr cache = NewLRUCache(co); // A write buffer manager of size 50MB std::unique_ptr wbf( new WriteBufferManager(50 * 1024 * 1024, cache)); // Allocate 333KB will allocate 512KB wbf->ReserveMem(333 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + 10000); // Allocate another 512KB wbf->ReserveMem(512 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + 10000); // Allocate another 10MB wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); // Free 1MB will not cause any change in cache cost wbf->FreeMem(1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); ASSERT_FALSE(wbf->ShouldFlush()); // Allocate another 41MB wbf->ReserveMem(41 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); ASSERT_TRUE(wbf->ShouldFlush()); ASSERT_TRUE(wbf->ShouldFlush()); wbf->ScheduleFreeMem(20 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); // Still need flush as the hard limit hits ASSERT_TRUE(wbf->ShouldFlush()); // Free 20MB will releae 256KB from cache wbf->FreeMem(20 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024 + 10000); ASSERT_FALSE(wbf->ShouldFlush()); // Every free will release 256KB if still not hit 3/4 wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024 + 10000); wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); // Reserve 512KB will not cause any change in cache cost wbf->ReserveMem(512 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024 + 10000); // Destory write buffer manger should free everything wbf.reset(); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024); } TEST_F(WriteBufferManagerTest, NoCapCacheCost) { // 1GB cache std::shared_ptr cache = NewLRUCache(1024 * 1024 * 1024, 4); // A write buffer manager of size 256MB std::unique_ptr wbf(new WriteBufferManager(0, cache)); // Allocate 1.5MB will allocate 2MB wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 10 * 1024 * 1024 + 10000); ASSERT_FALSE(wbf->ShouldFlush()); wbf->FreeMem(9 * 1024 * 1024); for (int i = 0; i < 40; i++) { wbf->FreeMem(4 * 1024); } ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000); } TEST_F(WriteBufferManagerTest, CacheFull) { // 15MB cache size with strict capacity LRUCacheOptions lo; lo.capacity = 12 * 1024 * 1024; lo.num_shard_bits = 0; lo.strict_capacity_limit = true; std::shared_ptr cache = NewLRUCache(lo); std::unique_ptr wbf(new WriteBufferManager(0, cache)); wbf->ReserveMem(10 * 1024 * 1024); size_t prev_pinned = cache->GetPinnedUsage(); ASSERT_GE(prev_pinned, 10 * 1024 * 1024); // Some insert will fail wbf->ReserveMem(10 * 1024 * 1024); ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024); // Increase capacity so next insert will succeed cache->SetCapacity(30 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GT(cache->GetPinnedUsage(), 20 * 1024 * 1024); // Gradually release 20 MB for (int i = 0; i < 40; i++) { wbf->FreeMem(512 * 1024); } ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 20 * 1024 * 1024); } #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/monitoring/000077500000000000000000000000001370372246700154325ustar00rootroot00000000000000rocksdb-6.11.4/monitoring/file_read_sample.h000066400000000000000000000015131370372246700210560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "db/version_edit.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { static const uint32_t kFileReadSampleRate = 1024; extern bool should_sample_file_read(); extern void sample_file_read_inc(FileMetaData*); inline bool should_sample_file_read() { return (Random::GetTLSInstance()->Next() % kFileReadSampleRate == 307); } inline void sample_file_read_inc(FileMetaData* meta) { meta->stats.num_reads_sampled.fetch_add(kFileReadSampleRate, std::memory_order_relaxed); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/histogram.cc000066400000000000000000000223351370372246700177430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "monitoring/histogram.h" #include #include #include #include #include "port/port.h" #include "util/cast_util.h" namespace ROCKSDB_NAMESPACE { HistogramBucketMapper::HistogramBucketMapper() { // If you change this, you also need to change // size of array buckets_ in HistogramImpl bucketValues_ = {1, 2}; valueIndexMap_ = {{1, 0}, {2, 1}}; double bucket_val = static_cast(bucketValues_.back()); while ((bucket_val = 1.5 * bucket_val) <= static_cast(port::kMaxUint64)) { bucketValues_.push_back(static_cast(bucket_val)); // Extracts two most significant digits to make histogram buckets more // human-readable. E.g., 172 becomes 170. uint64_t pow_of_ten = 1; while (bucketValues_.back() / 10 > 10) { bucketValues_.back() /= 10; pow_of_ten *= 10; } bucketValues_.back() *= pow_of_ten; valueIndexMap_[bucketValues_.back()] = bucketValues_.size() - 1; } maxBucketValue_ = bucketValues_.back(); minBucketValue_ = bucketValues_.front(); } size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const { if (value >= maxBucketValue_) { return bucketValues_.size() - 1; } else if ( value >= minBucketValue_ ) { std::map::const_iterator lowerBound = valueIndexMap_.lower_bound(value); if (lowerBound != valueIndexMap_.end()) { return static_cast(lowerBound->second); } else { return 0; } } else { return 0; } } namespace { const HistogramBucketMapper bucketMapper; } HistogramStat::HistogramStat() : num_buckets_(bucketMapper.BucketCount()) { assert(num_buckets_ == sizeof(buckets_) / sizeof(*buckets_)); Clear(); } void HistogramStat::Clear() { min_.store(bucketMapper.LastValue(), std::memory_order_relaxed); max_.store(0, std::memory_order_relaxed); num_.store(0, std::memory_order_relaxed); sum_.store(0, std::memory_order_relaxed); sum_squares_.store(0, std::memory_order_relaxed); for (unsigned int b = 0; b < num_buckets_; b++) { buckets_[b].store(0, std::memory_order_relaxed); } }; bool HistogramStat::Empty() const { return num() == 0; } void HistogramStat::Add(uint64_t value) { // This function is designed to be lock free, as it's in the critical path // of any operation. Each individual value is atomic and the order of updates // by concurrent threads is tolerable. const size_t index = bucketMapper.IndexForValue(value); assert(index < num_buckets_); buckets_[index].store(buckets_[index].load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); uint64_t old_min = min(); if (value < old_min) { min_.store(value, std::memory_order_relaxed); } uint64_t old_max = max(); if (value > old_max) { max_.store(value, std::memory_order_relaxed); } num_.store(num_.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); sum_.store(sum_.load(std::memory_order_relaxed) + value, std::memory_order_relaxed); sum_squares_.store( sum_squares_.load(std::memory_order_relaxed) + value * value, std::memory_order_relaxed); } void HistogramStat::Merge(const HistogramStat& other) { // This function needs to be performned with the outer lock acquired // However, atomic operation on every member is still need, since Add() // requires no lock and value update can still happen concurrently uint64_t old_min = min(); uint64_t other_min = other.min(); while (other_min < old_min && !min_.compare_exchange_weak(old_min, other_min)) {} uint64_t old_max = max(); uint64_t other_max = other.max(); while (other_max > old_max && !max_.compare_exchange_weak(old_max, other_max)) {} num_.fetch_add(other.num(), std::memory_order_relaxed); sum_.fetch_add(other.sum(), std::memory_order_relaxed); sum_squares_.fetch_add(other.sum_squares(), std::memory_order_relaxed); for (unsigned int b = 0; b < num_buckets_; b++) { buckets_[b].fetch_add(other.bucket_at(b), std::memory_order_relaxed); } } double HistogramStat::Median() const { return Percentile(50.0); } double HistogramStat::Percentile(double p) const { double threshold = num() * (p / 100.0); uint64_t cumulative_sum = 0; for (unsigned int b = 0; b < num_buckets_; b++) { uint64_t bucket_value = bucket_at(b); cumulative_sum += bucket_value; if (cumulative_sum >= threshold) { // Scale linearly within this bucket uint64_t left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b-1); uint64_t right_point = bucketMapper.BucketLimit(b); uint64_t left_sum = cumulative_sum - bucket_value; uint64_t right_sum = cumulative_sum; double pos = 0; uint64_t right_left_diff = right_sum - left_sum; if (right_left_diff != 0) { pos = (threshold - left_sum) / right_left_diff; } double r = left_point + (right_point - left_point) * pos; uint64_t cur_min = min(); uint64_t cur_max = max(); if (r < cur_min) r = static_cast(cur_min); if (r > cur_max) r = static_cast(cur_max); return r; } } return static_cast(max()); } double HistogramStat::Average() const { uint64_t cur_num = num(); uint64_t cur_sum = sum(); if (cur_num == 0) return 0; return static_cast(cur_sum) / static_cast(cur_num); } double HistogramStat::StandardDeviation() const { uint64_t cur_num = num(); uint64_t cur_sum = sum(); uint64_t cur_sum_squares = sum_squares(); if (cur_num == 0) return 0; double variance = static_cast(cur_sum_squares * cur_num - cur_sum * cur_sum) / static_cast(cur_num * cur_num); return std::sqrt(variance); } std::string HistogramStat::ToString() const { uint64_t cur_num = num(); std::string r; char buf[1650]; snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f StdDev: %.2f\n", cur_num, Average(), StandardDeviation()); r.append(buf); snprintf(buf, sizeof(buf), "Min: %" PRIu64 " Median: %.4f Max: %" PRIu64 "\n", (cur_num == 0 ? 0 : min()), Median(), (cur_num == 0 ? 0 : max())); r.append(buf); snprintf(buf, sizeof(buf), "Percentiles: " "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n", Percentile(50), Percentile(75), Percentile(99), Percentile(99.9), Percentile(99.99)); r.append(buf); r.append("------------------------------------------------------\n"); if (cur_num == 0) return r; // all buckets are empty const double mult = 100.0 / cur_num; uint64_t cumulative_sum = 0; for (unsigned int b = 0; b < num_buckets_; b++) { uint64_t bucket_value = bucket_at(b); if (bucket_value <= 0.0) continue; cumulative_sum += bucket_value; snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ", (b == 0) ? '[' : '(', (b == 0) ? 0 : bucketMapper.BucketLimit(b-1), // left bucketMapper.BucketLimit(b), // right bucket_value, // count (mult * bucket_value), // percentage (mult * cumulative_sum)); // cumulative percentage r.append(buf); // Add hash marks based on percentage; 20 marks for 100%. size_t marks = static_cast(mult * bucket_value / 5 + 0.5); r.append(marks, '#'); r.push_back('\n'); } return r; } void HistogramStat::Data(HistogramData * const data) const { assert(data); data->median = Median(); data->percentile95 = Percentile(95); data->percentile99 = Percentile(99); data->max = static_cast(max()); data->average = Average(); data->standard_deviation = StandardDeviation(); data->count = num(); data->sum = sum(); data->min = static_cast(min()); } void HistogramImpl::Clear() { std::lock_guard lock(mutex_); stats_.Clear(); } bool HistogramImpl::Empty() const { return stats_.Empty(); } void HistogramImpl::Add(uint64_t value) { stats_.Add(value); } void HistogramImpl::Merge(const Histogram& other) { if (strcmp(Name(), other.Name()) == 0) { Merge(*static_cast_with_check(&other)); } } void HistogramImpl::Merge(const HistogramImpl& other) { std::lock_guard lock(mutex_); stats_.Merge(other.stats_); } double HistogramImpl::Median() const { return stats_.Median(); } double HistogramImpl::Percentile(double p) const { return stats_.Percentile(p); } double HistogramImpl::Average() const { return stats_.Average(); } double HistogramImpl::StandardDeviation() const { return stats_.StandardDeviation(); } std::string HistogramImpl::ToString() const { return stats_.ToString(); } void HistogramImpl::Data(HistogramData * const data) const { stats_.Data(data); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/histogram.h000066400000000000000000000107121370372246700176010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/statistics.h" #include #include #include #include #include namespace ROCKSDB_NAMESPACE { class HistogramBucketMapper { public: HistogramBucketMapper(); // converts a value to the bucket index. size_t IndexForValue(uint64_t value) const; // number of buckets required. size_t BucketCount() const { return bucketValues_.size(); } uint64_t LastValue() const { return maxBucketValue_; } uint64_t FirstValue() const { return minBucketValue_; } uint64_t BucketLimit(const size_t bucketNumber) const { assert(bucketNumber < BucketCount()); return bucketValues_[bucketNumber]; } private: std::vector bucketValues_; uint64_t maxBucketValue_; uint64_t minBucketValue_; std::map valueIndexMap_; }; struct HistogramStat { HistogramStat(); ~HistogramStat() {} HistogramStat(const HistogramStat&) = delete; HistogramStat& operator=(const HistogramStat&) = delete; void Clear(); bool Empty() const; void Add(uint64_t value); void Merge(const HistogramStat& other); inline uint64_t min() const { return min_.load(std::memory_order_relaxed); } inline uint64_t max() const { return max_.load(std::memory_order_relaxed); } inline uint64_t num() const { return num_.load(std::memory_order_relaxed); } inline uint64_t sum() const { return sum_.load(std::memory_order_relaxed); } inline uint64_t sum_squares() const { return sum_squares_.load(std::memory_order_relaxed); } inline uint64_t bucket_at(size_t b) const { return buckets_[b].load(std::memory_order_relaxed); } double Median() const; double Percentile(double p) const; double Average() const; double StandardDeviation() const; void Data(HistogramData* const data) const; std::string ToString() const; // To be able to use HistogramStat as thread local variable, it // cannot have dynamic allocated member. That's why we're // using manually values from BucketMapper std::atomic_uint_fast64_t min_; std::atomic_uint_fast64_t max_; std::atomic_uint_fast64_t num_; std::atomic_uint_fast64_t sum_; std::atomic_uint_fast64_t sum_squares_; std::atomic_uint_fast64_t buckets_[109]; // 109==BucketMapper::BucketCount() const uint64_t num_buckets_; }; class Histogram { public: Histogram() {} virtual ~Histogram() {}; virtual void Clear() = 0; virtual bool Empty() const = 0; virtual void Add(uint64_t value) = 0; virtual void Merge(const Histogram&) = 0; virtual std::string ToString() const = 0; virtual const char* Name() const = 0; virtual uint64_t min() const = 0; virtual uint64_t max() const = 0; virtual uint64_t num() const = 0; virtual double Median() const = 0; virtual double Percentile(double p) const = 0; virtual double Average() const = 0; virtual double StandardDeviation() const = 0; virtual void Data(HistogramData* const data) const = 0; }; class HistogramImpl : public Histogram { public: HistogramImpl() { Clear(); } HistogramImpl(const HistogramImpl&) = delete; HistogramImpl& operator=(const HistogramImpl&) = delete; virtual void Clear() override; virtual bool Empty() const override; virtual void Add(uint64_t value) override; virtual void Merge(const Histogram& other) override; void Merge(const HistogramImpl& other); virtual std::string ToString() const override; virtual const char* Name() const override { return "HistogramImpl"; } virtual uint64_t min() const override { return stats_.min(); } virtual uint64_t max() const override { return stats_.max(); } virtual uint64_t num() const override { return stats_.num(); } virtual double Median() const override; virtual double Percentile(double p) const override; virtual double Average() const override; virtual double StandardDeviation() const override; virtual void Data(HistogramData* const data) const override; virtual ~HistogramImpl() {} private: HistogramStat stats_; std::mutex mutex_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/histogram_test.cc000066400000000000000000000161441370372246700210030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include "monitoring/histogram.h" #include "monitoring/histogram_windowing.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { class HistogramTest : public testing::Test {}; namespace { const double kIota = 0.1; const HistogramBucketMapper bucketMapper; Env* env = Env::Default(); } void PopulateHistogram(Histogram& histogram, uint64_t low, uint64_t high, uint64_t loop = 1) { for (; loop > 0; loop--) { for (uint64_t i = low; i <= high; i++) { histogram.Add(i); } } } void BasicOperation(Histogram& histogram) { PopulateHistogram(histogram, 1, 110, 10); // fill up to bucket [70, 110) HistogramData data; histogram.Data(&data); ASSERT_LE(fabs(histogram.Percentile(100.0) - 110.0), kIota); ASSERT_LE(fabs(data.percentile99 - 108.9), kIota); // 99 * 110 / 100 ASSERT_LE(fabs(data.percentile95 - 104.5), kIota); // 95 * 110 / 100 ASSERT_LE(fabs(data.median - 55.0), kIota); // 50 * 110 / 100 ASSERT_EQ(data.average, 55.5); // (1 + 110) / 2 } void MergeHistogram(Histogram& histogram, Histogram& other) { PopulateHistogram(histogram, 1, 100); PopulateHistogram(other, 101, 250); histogram.Merge(other); HistogramData data; histogram.Data(&data); ASSERT_LE(fabs(histogram.Percentile(100.0) - 250.0), kIota); ASSERT_LE(fabs(data.percentile99 - 247.5), kIota); // 99 * 250 / 100 ASSERT_LE(fabs(data.percentile95 - 237.5), kIota); // 95 * 250 / 100 ASSERT_LE(fabs(data.median - 125.0), kIota); // 50 * 250 / 100 ASSERT_EQ(data.average, 125.5); // (1 + 250) / 2 } void EmptyHistogram(Histogram& histogram) { ASSERT_EQ(histogram.min(), bucketMapper.LastValue()); ASSERT_EQ(histogram.max(), 0); ASSERT_EQ(histogram.num(), 0); ASSERT_EQ(histogram.Median(), 0.0); ASSERT_EQ(histogram.Percentile(85.0), 0.0); ASSERT_EQ(histogram.Average(), 0.0); ASSERT_EQ(histogram.StandardDeviation(), 0.0); } void ClearHistogram(Histogram& histogram) { for (uint64_t i = 1; i <= 100; i++) { histogram.Add(i); } histogram.Clear(); ASSERT_TRUE(histogram.Empty()); ASSERT_EQ(histogram.Median(), 0); ASSERT_EQ(histogram.Percentile(85.0), 0); ASSERT_EQ(histogram.Average(), 0); } TEST_F(HistogramTest, BasicOperation) { HistogramImpl histogram; BasicOperation(histogram); HistogramWindowingImpl histogramWindowing; BasicOperation(histogramWindowing); } TEST_F(HistogramTest, BoundaryValue) { HistogramImpl histogram; // - both should be in [0, 1] bucket because we place values on bucket // boundaries in the lower bucket. // - all points are in [0, 1] bucket, so p50 will be 0.5 // - the test cannot be written with a single point since histogram won't // report percentiles lower than the min or greater than the max. histogram.Add(0); histogram.Add(1); ASSERT_LE(fabs(histogram.Percentile(50.0) - 0.5), kIota); } TEST_F(HistogramTest, MergeHistogram) { HistogramImpl histogram; HistogramImpl other; MergeHistogram(histogram, other); HistogramWindowingImpl histogramWindowing; HistogramWindowingImpl otherWindowing; MergeHistogram(histogramWindowing, otherWindowing); } TEST_F(HistogramTest, EmptyHistogram) { HistogramImpl histogram; EmptyHistogram(histogram); HistogramWindowingImpl histogramWindowing; EmptyHistogram(histogramWindowing); } TEST_F(HistogramTest, ClearHistogram) { HistogramImpl histogram; ClearHistogram(histogram); HistogramWindowingImpl histogramWindowing; ClearHistogram(histogramWindowing); } TEST_F(HistogramTest, HistogramWindowingExpire) { uint64_t num_windows = 3; int micros_per_window = 1000000; uint64_t min_num_per_window = 0; HistogramWindowingImpl histogramWindowing(num_windows, micros_per_window, min_num_per_window); PopulateHistogram(histogramWindowing, 1, 1, 100); env->SleepForMicroseconds(micros_per_window); ASSERT_EQ(histogramWindowing.num(), 100); ASSERT_EQ(histogramWindowing.min(), 1); ASSERT_EQ(histogramWindowing.max(), 1); ASSERT_EQ(histogramWindowing.Average(), 1); PopulateHistogram(histogramWindowing, 2, 2, 100); env->SleepForMicroseconds(micros_per_window); ASSERT_EQ(histogramWindowing.num(), 200); ASSERT_EQ(histogramWindowing.min(), 1); ASSERT_EQ(histogramWindowing.max(), 2); ASSERT_EQ(histogramWindowing.Average(), 1.5); PopulateHistogram(histogramWindowing, 3, 3, 100); env->SleepForMicroseconds(micros_per_window); ASSERT_EQ(histogramWindowing.num(), 300); ASSERT_EQ(histogramWindowing.min(), 1); ASSERT_EQ(histogramWindowing.max(), 3); ASSERT_EQ(histogramWindowing.Average(), 2.0); // dropping oldest window with value 1, remaining 2 ~ 4 PopulateHistogram(histogramWindowing, 4, 4, 100); env->SleepForMicroseconds(micros_per_window); ASSERT_EQ(histogramWindowing.num(), 300); ASSERT_EQ(histogramWindowing.min(), 2); ASSERT_EQ(histogramWindowing.max(), 4); ASSERT_EQ(histogramWindowing.Average(), 3.0); // dropping oldest window with value 2, remaining 3 ~ 5 PopulateHistogram(histogramWindowing, 5, 5, 100); env->SleepForMicroseconds(micros_per_window); ASSERT_EQ(histogramWindowing.num(), 300); ASSERT_EQ(histogramWindowing.min(), 3); ASSERT_EQ(histogramWindowing.max(), 5); ASSERT_EQ(histogramWindowing.Average(), 4.0); } TEST_F(HistogramTest, HistogramWindowingMerge) { uint64_t num_windows = 3; int micros_per_window = 1000000; uint64_t min_num_per_window = 0; HistogramWindowingImpl histogramWindowing(num_windows, micros_per_window, min_num_per_window); HistogramWindowingImpl otherWindowing(num_windows, micros_per_window, min_num_per_window); PopulateHistogram(histogramWindowing, 1, 1, 100); PopulateHistogram(otherWindowing, 1, 1, 100); env->SleepForMicroseconds(micros_per_window); PopulateHistogram(histogramWindowing, 2, 2, 100); PopulateHistogram(otherWindowing, 2, 2, 100); env->SleepForMicroseconds(micros_per_window); PopulateHistogram(histogramWindowing, 3, 3, 100); PopulateHistogram(otherWindowing, 3, 3, 100); env->SleepForMicroseconds(micros_per_window); histogramWindowing.Merge(otherWindowing); ASSERT_EQ(histogramWindowing.num(), 600); ASSERT_EQ(histogramWindowing.min(), 1); ASSERT_EQ(histogramWindowing.max(), 3); ASSERT_EQ(histogramWindowing.Average(), 2.0); // dropping oldest window with value 1, remaining 2 ~ 4 PopulateHistogram(histogramWindowing, 4, 4, 100); env->SleepForMicroseconds(micros_per_window); ASSERT_EQ(histogramWindowing.num(), 500); ASSERT_EQ(histogramWindowing.min(), 2); ASSERT_EQ(histogramWindowing.max(), 4); // dropping oldest window with value 2, remaining 3 ~ 5 PopulateHistogram(histogramWindowing, 5, 5, 100); env->SleepForMicroseconds(micros_per_window); ASSERT_EQ(histogramWindowing.num(), 400); ASSERT_EQ(histogramWindowing.min(), 3); ASSERT_EQ(histogramWindowing.max(), 5); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/monitoring/histogram_windowing.cc000066400000000000000000000144521370372246700220310ustar00rootroot00000000000000// Copyright (c) 2013, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "monitoring/histogram_windowing.h" #include "monitoring/histogram.h" #include "util/cast_util.h" #include namespace ROCKSDB_NAMESPACE { HistogramWindowingImpl::HistogramWindowingImpl() { env_ = Env::Default(); window_stats_.reset(new HistogramStat[static_cast(num_windows_)]); Clear(); } HistogramWindowingImpl::HistogramWindowingImpl( uint64_t num_windows, uint64_t micros_per_window, uint64_t min_num_per_window) : num_windows_(num_windows), micros_per_window_(micros_per_window), min_num_per_window_(min_num_per_window) { env_ = Env::Default(); window_stats_.reset(new HistogramStat[static_cast(num_windows_)]); Clear(); } HistogramWindowingImpl::~HistogramWindowingImpl() { } void HistogramWindowingImpl::Clear() { std::lock_guard lock(mutex_); stats_.Clear(); for (size_t i = 0; i < num_windows_; i++) { window_stats_[i].Clear(); } current_window_.store(0, std::memory_order_relaxed); last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed); } bool HistogramWindowingImpl::Empty() const { return stats_.Empty(); } // This function is designed to be lock free, as it's in the critical path // of any operation. // Each individual value is atomic, it is just that some samples can go // in the older bucket which is tolerable. void HistogramWindowingImpl::Add(uint64_t value){ TimerTick(); // Parent (global) member update stats_.Add(value); // Current window update window_stats_[static_cast(current_window())].Add(value); } void HistogramWindowingImpl::Merge(const Histogram& other) { if (strcmp(Name(), other.Name()) == 0) { Merge(*static_cast_with_check(&other)); } } void HistogramWindowingImpl::Merge(const HistogramWindowingImpl& other) { std::lock_guard lock(mutex_); stats_.Merge(other.stats_); if (stats_.num_buckets_ != other.stats_.num_buckets_ || micros_per_window_ != other.micros_per_window_) { return; } uint64_t cur_window = current_window(); uint64_t other_cur_window = other.current_window(); // going backwards for alignment for (unsigned int i = 0; i < std::min(num_windows_, other.num_windows_); i++) { uint64_t window_index = (cur_window + num_windows_ - i) % num_windows_; uint64_t other_window_index = (other_cur_window + other.num_windows_ - i) % other.num_windows_; size_t windex = static_cast(window_index); size_t other_windex = static_cast(other_window_index); window_stats_[windex].Merge( other.window_stats_[other_windex]); } } std::string HistogramWindowingImpl::ToString() const { return stats_.ToString(); } double HistogramWindowingImpl::Median() const { return Percentile(50.0); } double HistogramWindowingImpl::Percentile(double p) const { // Retry 3 times in total for (int retry = 0; retry < 3; retry++) { uint64_t start_num = stats_.num(); double result = stats_.Percentile(p); // Detect if swap buckets or Clear() was called during calculation if (stats_.num() >= start_num) { return result; } } return 0.0; } double HistogramWindowingImpl::Average() const { return stats_.Average(); } double HistogramWindowingImpl::StandardDeviation() const { return stats_.StandardDeviation(); } void HistogramWindowingImpl::Data(HistogramData * const data) const { stats_.Data(data); } void HistogramWindowingImpl::TimerTick() { uint64_t curr_time = env_->NowMicros(); size_t curr_window_ = static_cast(current_window()); if (curr_time - last_swap_time() > micros_per_window_ && window_stats_[curr_window_].num() >= min_num_per_window_) { SwapHistoryBucket(); } } void HistogramWindowingImpl::SwapHistoryBucket() { // Threads executing Add() would be competing for this mutex, the first one // who got the metex would take care of the bucket swap, other threads // can skip this. // If mutex is held by Merge() or Clear(), next Add() will take care of the // swap, if needed. if (mutex_.try_lock()) { last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed); uint64_t curr_window = current_window(); uint64_t next_window = (curr_window == num_windows_ - 1) ? 0 : curr_window + 1; // subtract next buckets from totals and swap to next buckets HistogramStat& stats_to_drop = window_stats_[static_cast(next_window)]; if (!stats_to_drop.Empty()) { for (size_t b = 0; b < stats_.num_buckets_; b++){ stats_.buckets_[b].fetch_sub( stats_to_drop.bucket_at(b), std::memory_order_relaxed); } if (stats_.min() == stats_to_drop.min()) { uint64_t new_min = std::numeric_limits::max(); for (unsigned int i = 0; i < num_windows_; i++) { if (i != next_window) { uint64_t m = window_stats_[i].min(); if (m < new_min) new_min = m; } } stats_.min_.store(new_min, std::memory_order_relaxed); } if (stats_.max() == stats_to_drop.max()) { uint64_t new_max = 0; for (unsigned int i = 0; i < num_windows_; i++) { if (i != next_window) { uint64_t m = window_stats_[i].max(); if (m > new_max) new_max = m; } } stats_.max_.store(new_max, std::memory_order_relaxed); } stats_.num_.fetch_sub(stats_to_drop.num(), std::memory_order_relaxed); stats_.sum_.fetch_sub(stats_to_drop.sum(), std::memory_order_relaxed); stats_.sum_squares_.fetch_sub( stats_to_drop.sum_squares(), std::memory_order_relaxed); stats_to_drop.Clear(); } // advance to next window bucket current_window_.store(next_window, std::memory_order_relaxed); mutex_.unlock(); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/histogram_windowing.h000066400000000000000000000054261370372246700216740ustar00rootroot00000000000000// Copyright (c) 2013, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "monitoring/histogram.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { class HistogramWindowingImpl : public Histogram { public: HistogramWindowingImpl(); HistogramWindowingImpl(uint64_t num_windows, uint64_t micros_per_window, uint64_t min_num_per_window); HistogramWindowingImpl(const HistogramWindowingImpl&) = delete; HistogramWindowingImpl& operator=(const HistogramWindowingImpl&) = delete; ~HistogramWindowingImpl(); virtual void Clear() override; virtual bool Empty() const override; virtual void Add(uint64_t value) override; virtual void Merge(const Histogram& other) override; void Merge(const HistogramWindowingImpl& other); virtual std::string ToString() const override; virtual const char* Name() const override { return "HistogramWindowingImpl"; } virtual uint64_t min() const override { return stats_.min(); } virtual uint64_t max() const override { return stats_.max(); } virtual uint64_t num() const override { return stats_.num(); } virtual double Median() const override; virtual double Percentile(double p) const override; virtual double Average() const override; virtual double StandardDeviation() const override; virtual void Data(HistogramData* const data) const override; private: void TimerTick(); void SwapHistoryBucket(); inline uint64_t current_window() const { return current_window_.load(std::memory_order_relaxed); } inline uint64_t last_swap_time() const{ return last_swap_time_.load(std::memory_order_relaxed); } Env* env_; std::mutex mutex_; // Aggregated stats over windows_stats_, all the computation is done // upon aggregated values HistogramStat stats_; // This is a circular array representing the latest N time-windows. // Each entry stores a time-window of data. Expiration is done // on window-based. std::unique_ptr window_stats_; std::atomic_uint_fast64_t current_window_; std::atomic_uint_fast64_t last_swap_time_; // Following parameters are configuable uint64_t num_windows_ = 5; uint64_t micros_per_window_ = 60000000; // By default, don't care about the number of values in current window // when decide whether to swap windows or not. uint64_t min_num_per_window_ = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/in_memory_stats_history.cc000066400000000000000000000036221370372246700227410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "monitoring/in_memory_stats_history.h" #include "db/db_impl/db_impl.h" namespace ROCKSDB_NAMESPACE { InMemoryStatsHistoryIterator::~InMemoryStatsHistoryIterator() {} bool InMemoryStatsHistoryIterator::Valid() const { return valid_; } Status InMemoryStatsHistoryIterator::status() const { return status_; } // Because of garbage collection, the next stats snapshot may or may not be // right after the current one. When reading from DBImpl::stats_history_, this // call will be protected by DB Mutex so it will not return partial or // corrupted results. void InMemoryStatsHistoryIterator::Next() { // increment start_time by 1 to avoid infinite loop AdvanceIteratorByTime(GetStatsTime() + 1, end_time_); } uint64_t InMemoryStatsHistoryIterator::GetStatsTime() const { return time_; } const std::map& InMemoryStatsHistoryIterator::GetStatsMap() const { return stats_map_; } // advance the iterator to the next time between [start_time, end_time) // if success, update time_ and stats_map_ with new_time and stats_map void InMemoryStatsHistoryIterator::AdvanceIteratorByTime(uint64_t start_time, uint64_t end_time) { // try to find next entry in stats_history_ map if (db_impl_ != nullptr) { valid_ = db_impl_->FindStatsByTime(start_time, end_time, &time_, &stats_map_); } else { valid_ = false; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/in_memory_stats_history.h000066400000000000000000000055731370372246700226120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/stats_history.h" namespace ROCKSDB_NAMESPACE { // InMemoryStatsHistoryIterator can be used to access stats history that was // stored by an in-memory two level std::map(DBImpl::stats_history_). It keeps // a copy of the stats snapshot (in stats_map_) that is currently being pointed // to, which allows the iterator to access the stats snapshot even when // the background garbage collecting thread purges it from the source of truth // (`DBImpl::stats_history_`). In that case, the iterator will continue to be // valid until a call to `Next()` returns no result and invalidates it. In // some extreme cases, the iterator may also return fragmented segments of // stats snapshots due to long gaps between `Next()` calls and interleaved // garbage collection. class InMemoryStatsHistoryIterator final : public StatsHistoryIterator { public: // Setup InMemoryStatsHistoryIterator to return stats snapshots between // seconds timestamps [start_time, end_time) InMemoryStatsHistoryIterator(uint64_t start_time, uint64_t end_time, DBImpl* db_impl) : start_time_(start_time), end_time_(end_time), valid_(true), db_impl_(db_impl) { AdvanceIteratorByTime(start_time_, end_time_); } // no copying allowed InMemoryStatsHistoryIterator(const InMemoryStatsHistoryIterator&) = delete; void operator=(const InMemoryStatsHistoryIterator&) = delete; InMemoryStatsHistoryIterator(InMemoryStatsHistoryIterator&&) = delete; InMemoryStatsHistoryIterator& operator=(InMemoryStatsHistoryIterator&&) = delete; ~InMemoryStatsHistoryIterator() override; bool Valid() const override; Status status() const override; // Move to the next stats snapshot currently available // This function may invalidate the iterator // REQUIRES: Valid() void Next() override; // REQUIRES: Valid() uint64_t GetStatsTime() const override; // This function is idempotent // REQUIRES: Valid() const std::map& GetStatsMap() const override; private: // advance the iterator to the next stats history record with timestamp // between [start_time, end_time) void AdvanceIteratorByTime(uint64_t start_time, uint64_t end_time); uint64_t time_; uint64_t start_time_; uint64_t end_time_; std::map stats_map_; Status status_; bool valid_; DBImpl* db_impl_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/instrumented_mutex.cc000066400000000000000000000040571370372246700217120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "monitoring/instrumented_mutex.h" #include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_util.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { namespace { #ifndef NPERF_CONTEXT Statistics* stats_for_report(Env* env, Statistics* stats) { if (env != nullptr && stats != nullptr && stats->get_stats_level() > kExceptTimeForMutex) { return stats; } else { return nullptr; } } #endif // NPERF_CONTEXT } // namespace void InstrumentedMutex::Lock() { PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD( db_mutex_lock_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS, stats_for_report(env_, stats_), stats_code_); LockInternal(); } void InstrumentedMutex::LockInternal() { #ifndef NDEBUG ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT); #endif mutex_.Lock(); } void InstrumentedCondVar::Wait() { PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD( db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS, stats_for_report(env_, stats_), stats_code_); WaitInternal(); } void InstrumentedCondVar::WaitInternal() { #ifndef NDEBUG ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT); #endif cond_.Wait(); } bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) { PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD( db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS, stats_for_report(env_, stats_), stats_code_); return TimedWaitInternal(abs_time_us); } bool InstrumentedCondVar::TimedWaitInternal(uint64_t abs_time_us) { #ifndef NDEBUG ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT); #endif TEST_SYNC_POINT_CALLBACK("InstrumentedCondVar::TimedWaitInternal", &abs_time_us); return cond_.TimedWait(abs_time_us); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/instrumented_mutex.h000066400000000000000000000044751370372246700215600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "monitoring/statistics.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/statistics.h" #include "rocksdb/thread_status.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { class InstrumentedCondVar; // A wrapper class for port::Mutex that provides additional layer // for collecting stats and instrumentation. class InstrumentedMutex { public: explicit InstrumentedMutex(bool adaptive = false) : mutex_(adaptive), stats_(nullptr), env_(nullptr), stats_code_(0) {} InstrumentedMutex( Statistics* stats, Env* env, int stats_code, bool adaptive = false) : mutex_(adaptive), stats_(stats), env_(env), stats_code_(stats_code) {} void Lock(); void Unlock() { mutex_.Unlock(); } void AssertHeld() { mutex_.AssertHeld(); } private: void LockInternal(); friend class InstrumentedCondVar; port::Mutex mutex_; Statistics* stats_; Env* env_; int stats_code_; }; // A wrapper class for port::Mutex that provides additional layer // for collecting stats and instrumentation. class InstrumentedMutexLock { public: explicit InstrumentedMutexLock(InstrumentedMutex* mutex) : mutex_(mutex) { mutex_->Lock(); } ~InstrumentedMutexLock() { mutex_->Unlock(); } private: InstrumentedMutex* const mutex_; InstrumentedMutexLock(const InstrumentedMutexLock&) = delete; void operator=(const InstrumentedMutexLock&) = delete; }; class InstrumentedCondVar { public: explicit InstrumentedCondVar(InstrumentedMutex* instrumented_mutex) : cond_(&(instrumented_mutex->mutex_)), stats_(instrumented_mutex->stats_), env_(instrumented_mutex->env_), stats_code_(instrumented_mutex->stats_code_) {} void Wait(); bool TimedWait(uint64_t abs_time_us); void Signal() { cond_.Signal(); } void SignalAll() { cond_.SignalAll(); } private: void WaitInternal(); bool TimedWaitInternal(uint64_t abs_time_us); port::CondVar cond_; Statistics* stats_; Env* env_; int stats_code_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/iostats_context.cc000066400000000000000000000032611370372246700211750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "monitoring/iostats_context_imp.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL __thread IOStatsContext iostats_context; #endif IOStatsContext* get_iostats_context() { #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL return &iostats_context; #else return nullptr; #endif } void IOStatsContext::Reset() { thread_pool_id = Env::Priority::TOTAL; bytes_read = 0; bytes_written = 0; open_nanos = 0; allocate_nanos = 0; write_nanos = 0; read_nanos = 0; range_sync_nanos = 0; prepare_write_nanos = 0; fsync_nanos = 0; logger_nanos = 0; } #define IOSTATS_CONTEXT_OUTPUT(counter) \ if (!exclude_zero_counters || counter > 0) { \ ss << #counter << " = " << counter << ", "; \ } std::string IOStatsContext::ToString(bool exclude_zero_counters) const { std::ostringstream ss; IOSTATS_CONTEXT_OUTPUT(thread_pool_id); IOSTATS_CONTEXT_OUTPUT(bytes_read); IOSTATS_CONTEXT_OUTPUT(bytes_written); IOSTATS_CONTEXT_OUTPUT(open_nanos); IOSTATS_CONTEXT_OUTPUT(allocate_nanos); IOSTATS_CONTEXT_OUTPUT(write_nanos); IOSTATS_CONTEXT_OUTPUT(read_nanos); IOSTATS_CONTEXT_OUTPUT(range_sync_nanos); IOSTATS_CONTEXT_OUTPUT(fsync_nanos); IOSTATS_CONTEXT_OUTPUT(prepare_write_nanos); IOSTATS_CONTEXT_OUTPUT(logger_nanos); std::string str = ss.str(); str.erase(str.find_last_not_of(", ") + 1); return str; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/iostats_context_imp.h000066400000000000000000000041321370372246700217020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "monitoring/perf_step_timer.h" #include "rocksdb/iostats_context.h" #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL namespace ROCKSDB_NAMESPACE { extern __thread IOStatsContext iostats_context; } // namespace ROCKSDB_NAMESPACE // increment a specific counter by the specified value #define IOSTATS_ADD(metric, value) (iostats_context.metric += value) // Increase metric value only when it is positive #define IOSTATS_ADD_IF_POSITIVE(metric, value) \ if (value > 0) { IOSTATS_ADD(metric, value); } // reset a specific counter to zero #define IOSTATS_RESET(metric) (iostats_context.metric = 0) // reset all counters to zero #define IOSTATS_RESET_ALL() (iostats_context.Reset()) #define IOSTATS_SET_THREAD_POOL_ID(value) \ (iostats_context.thread_pool_id = value) #define IOSTATS_THREAD_POOL_ID() (iostats_context.thread_pool_id) #define IOSTATS(metric) (iostats_context.metric) // Declare and set start time of the timer #define IOSTATS_TIMER_GUARD(metric) \ PerfStepTimer iostats_step_timer_##metric(&(iostats_context.metric)); \ iostats_step_timer_##metric.Start(); // Declare and set start time of the timer #define IOSTATS_CPU_TIMER_GUARD(metric, env) \ PerfStepTimer iostats_step_timer_##metric( \ &(iostats_context.metric), env, true, \ PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ iostats_step_timer_##metric.Start(); #else // ROCKSDB_SUPPORT_THREAD_LOCAL #define IOSTATS_ADD(metric, value) #define IOSTATS_ADD_IF_POSITIVE(metric, value) #define IOSTATS_RESET(metric) #define IOSTATS_RESET_ALL() #define IOSTATS_SET_THREAD_POOL_ID(value) #define IOSTATS_THREAD_POOL_ID() #define IOSTATS(metric) 0 #define IOSTATS_TIMER_GUARD(metric) #define IOSTATS_CPU_TIMER_GUARD(metric, env) static_cast(env) #endif // ROCKSDB_SUPPORT_THREAD_LOCAL rocksdb-6.11.4/monitoring/iostats_context_test.cc000066400000000000000000000017541370372246700222410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/iostats_context.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { TEST(IOStatsContextTest, ToString) { get_iostats_context()->Reset(); get_iostats_context()->bytes_read = 12345; std::string zero_included = get_iostats_context()->ToString(); ASSERT_NE(std::string::npos, zero_included.find("= 0")); ASSERT_NE(std::string::npos, zero_included.find("= 12345")); std::string zero_excluded = get_iostats_context()->ToString(true); ASSERT_EQ(std::string::npos, zero_excluded.find("= 0")); ASSERT_NE(std::string::npos, zero_excluded.find("= 12345")); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/monitoring/perf_context.cc000066400000000000000000000577661370372246700204660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include "monitoring/perf_context_imp.h" namespace ROCKSDB_NAMESPACE { #if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) PerfContext perf_context; #else #if defined(OS_SOLARIS) __thread PerfContext perf_context_; #else thread_local PerfContext perf_context; #endif #endif PerfContext* get_perf_context() { #if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) return &perf_context; #else #if defined(OS_SOLARIS) return &perf_context_; #else return &perf_context; #endif #endif } PerfContext::~PerfContext() { #if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL) && !defined(OS_SOLARIS) ClearPerLevelPerfContext(); #endif } PerfContext::PerfContext(const PerfContext& other) { #ifdef NPERF_CONTEXT (void)other; #else user_key_comparison_count = other.user_key_comparison_count; block_cache_hit_count = other.block_cache_hit_count; block_read_count = other.block_read_count; block_read_byte = other.block_read_byte; block_read_time = other.block_read_time; block_cache_index_hit_count = other.block_cache_index_hit_count; index_block_read_count = other.index_block_read_count; block_cache_filter_hit_count = other.block_cache_filter_hit_count; filter_block_read_count = other.filter_block_read_count; compression_dict_block_read_count = other.compression_dict_block_read_count; block_checksum_time = other.block_checksum_time; block_decompress_time = other.block_decompress_time; get_read_bytes = other.get_read_bytes; multiget_read_bytes = other.multiget_read_bytes; iter_read_bytes = other.iter_read_bytes; internal_key_skipped_count = other.internal_key_skipped_count; internal_delete_skipped_count = other.internal_delete_skipped_count; internal_recent_skipped_count = other.internal_recent_skipped_count; internal_merge_count = other.internal_merge_count; write_wal_time = other.write_wal_time; get_snapshot_time = other.get_snapshot_time; get_from_memtable_time = other.get_from_memtable_time; get_from_memtable_count = other.get_from_memtable_count; get_post_process_time = other.get_post_process_time; get_from_output_files_time = other.get_from_output_files_time; seek_on_memtable_time = other.seek_on_memtable_time; seek_on_memtable_count = other.seek_on_memtable_count; next_on_memtable_count = other.next_on_memtable_count; prev_on_memtable_count = other.prev_on_memtable_count; seek_child_seek_time = other.seek_child_seek_time; seek_child_seek_count = other.seek_child_seek_count; seek_min_heap_time = other.seek_min_heap_time; seek_internal_seek_time = other.seek_internal_seek_time; find_next_user_entry_time = other.find_next_user_entry_time; write_pre_and_post_process_time = other.write_pre_and_post_process_time; write_memtable_time = other.write_memtable_time; write_delay_time = other.write_delay_time; write_thread_wait_nanos = other.write_thread_wait_nanos; write_scheduling_flushes_compactions_time = other.write_scheduling_flushes_compactions_time; db_mutex_lock_nanos = other.db_mutex_lock_nanos; db_condition_wait_nanos = other.db_condition_wait_nanos; merge_operator_time_nanos = other.merge_operator_time_nanos; read_index_block_nanos = other.read_index_block_nanos; read_filter_block_nanos = other.read_filter_block_nanos; new_table_block_iter_nanos = other.new_table_block_iter_nanos; new_table_iterator_nanos = other.new_table_iterator_nanos; block_seek_nanos = other.block_seek_nanos; find_table_nanos = other.find_table_nanos; bloom_memtable_hit_count = other.bloom_memtable_hit_count; bloom_memtable_miss_count = other.bloom_memtable_miss_count; bloom_sst_hit_count = other.bloom_sst_hit_count; bloom_sst_miss_count = other.bloom_sst_miss_count; key_lock_wait_time = other.key_lock_wait_time; key_lock_wait_count = other.key_lock_wait_count; env_new_sequential_file_nanos = other.env_new_sequential_file_nanos; env_new_random_access_file_nanos = other.env_new_random_access_file_nanos; env_new_writable_file_nanos = other.env_new_writable_file_nanos; env_reuse_writable_file_nanos = other.env_reuse_writable_file_nanos; env_new_random_rw_file_nanos = other.env_new_random_rw_file_nanos; env_new_directory_nanos = other.env_new_directory_nanos; env_file_exists_nanos = other.env_file_exists_nanos; env_get_children_nanos = other.env_get_children_nanos; env_get_children_file_attributes_nanos = other.env_get_children_file_attributes_nanos; env_delete_file_nanos = other.env_delete_file_nanos; env_create_dir_nanos = other.env_create_dir_nanos; env_create_dir_if_missing_nanos = other.env_create_dir_if_missing_nanos; env_delete_dir_nanos = other.env_delete_dir_nanos; env_get_file_size_nanos = other.env_get_file_size_nanos; env_get_file_modification_time_nanos = other.env_get_file_modification_time_nanos; env_rename_file_nanos = other.env_rename_file_nanos; env_link_file_nanos = other.env_link_file_nanos; env_lock_file_nanos = other.env_lock_file_nanos; env_unlock_file_nanos = other.env_unlock_file_nanos; env_new_logger_nanos = other.env_new_logger_nanos; get_cpu_nanos = other.get_cpu_nanos; iter_next_cpu_nanos = other.iter_next_cpu_nanos; iter_prev_cpu_nanos = other.iter_prev_cpu_nanos; iter_seek_cpu_nanos = other.iter_seek_cpu_nanos; if (per_level_perf_context_enabled && level_to_perf_context != nullptr) { ClearPerLevelPerfContext(); } if (other.level_to_perf_context != nullptr) { level_to_perf_context = new std::map(); *level_to_perf_context = *other.level_to_perf_context; } per_level_perf_context_enabled = other.per_level_perf_context_enabled; #endif } PerfContext::PerfContext(PerfContext&& other) noexcept { #ifdef NPERF_CONTEXT (void)other; #else user_key_comparison_count = other.user_key_comparison_count; block_cache_hit_count = other.block_cache_hit_count; block_read_count = other.block_read_count; block_read_byte = other.block_read_byte; block_read_time = other.block_read_time; block_cache_index_hit_count = other.block_cache_index_hit_count; index_block_read_count = other.index_block_read_count; block_cache_filter_hit_count = other.block_cache_filter_hit_count; filter_block_read_count = other.filter_block_read_count; compression_dict_block_read_count = other.compression_dict_block_read_count; block_checksum_time = other.block_checksum_time; block_decompress_time = other.block_decompress_time; get_read_bytes = other.get_read_bytes; multiget_read_bytes = other.multiget_read_bytes; iter_read_bytes = other.iter_read_bytes; internal_key_skipped_count = other.internal_key_skipped_count; internal_delete_skipped_count = other.internal_delete_skipped_count; internal_recent_skipped_count = other.internal_recent_skipped_count; internal_merge_count = other.internal_merge_count; write_wal_time = other.write_wal_time; get_snapshot_time = other.get_snapshot_time; get_from_memtable_time = other.get_from_memtable_time; get_from_memtable_count = other.get_from_memtable_count; get_post_process_time = other.get_post_process_time; get_from_output_files_time = other.get_from_output_files_time; seek_on_memtable_time = other.seek_on_memtable_time; seek_on_memtable_count = other.seek_on_memtable_count; next_on_memtable_count = other.next_on_memtable_count; prev_on_memtable_count = other.prev_on_memtable_count; seek_child_seek_time = other.seek_child_seek_time; seek_child_seek_count = other.seek_child_seek_count; seek_min_heap_time = other.seek_min_heap_time; seek_internal_seek_time = other.seek_internal_seek_time; find_next_user_entry_time = other.find_next_user_entry_time; write_pre_and_post_process_time = other.write_pre_and_post_process_time; write_memtable_time = other.write_memtable_time; write_delay_time = other.write_delay_time; write_thread_wait_nanos = other.write_thread_wait_nanos; write_scheduling_flushes_compactions_time = other.write_scheduling_flushes_compactions_time; db_mutex_lock_nanos = other.db_mutex_lock_nanos; db_condition_wait_nanos = other.db_condition_wait_nanos; merge_operator_time_nanos = other.merge_operator_time_nanos; read_index_block_nanos = other.read_index_block_nanos; read_filter_block_nanos = other.read_filter_block_nanos; new_table_block_iter_nanos = other.new_table_block_iter_nanos; new_table_iterator_nanos = other.new_table_iterator_nanos; block_seek_nanos = other.block_seek_nanos; find_table_nanos = other.find_table_nanos; bloom_memtable_hit_count = other.bloom_memtable_hit_count; bloom_memtable_miss_count = other.bloom_memtable_miss_count; bloom_sst_hit_count = other.bloom_sst_hit_count; bloom_sst_miss_count = other.bloom_sst_miss_count; key_lock_wait_time = other.key_lock_wait_time; key_lock_wait_count = other.key_lock_wait_count; env_new_sequential_file_nanos = other.env_new_sequential_file_nanos; env_new_random_access_file_nanos = other.env_new_random_access_file_nanos; env_new_writable_file_nanos = other.env_new_writable_file_nanos; env_reuse_writable_file_nanos = other.env_reuse_writable_file_nanos; env_new_random_rw_file_nanos = other.env_new_random_rw_file_nanos; env_new_directory_nanos = other.env_new_directory_nanos; env_file_exists_nanos = other.env_file_exists_nanos; env_get_children_nanos = other.env_get_children_nanos; env_get_children_file_attributes_nanos = other.env_get_children_file_attributes_nanos; env_delete_file_nanos = other.env_delete_file_nanos; env_create_dir_nanos = other.env_create_dir_nanos; env_create_dir_if_missing_nanos = other.env_create_dir_if_missing_nanos; env_delete_dir_nanos = other.env_delete_dir_nanos; env_get_file_size_nanos = other.env_get_file_size_nanos; env_get_file_modification_time_nanos = other.env_get_file_modification_time_nanos; env_rename_file_nanos = other.env_rename_file_nanos; env_link_file_nanos = other.env_link_file_nanos; env_lock_file_nanos = other.env_lock_file_nanos; env_unlock_file_nanos = other.env_unlock_file_nanos; env_new_logger_nanos = other.env_new_logger_nanos; get_cpu_nanos = other.get_cpu_nanos; iter_next_cpu_nanos = other.iter_next_cpu_nanos; iter_prev_cpu_nanos = other.iter_prev_cpu_nanos; iter_seek_cpu_nanos = other.iter_seek_cpu_nanos; if (per_level_perf_context_enabled && level_to_perf_context != nullptr) { ClearPerLevelPerfContext(); } if (other.level_to_perf_context != nullptr) { level_to_perf_context = other.level_to_perf_context; other.level_to_perf_context = nullptr; } per_level_perf_context_enabled = other.per_level_perf_context_enabled; #endif } // TODO(Zhongyi): reduce code duplication between copy constructor and // assignment operator PerfContext& PerfContext::operator=(const PerfContext& other) { #ifdef NPERF_CONTEXT (void)other; #else user_key_comparison_count = other.user_key_comparison_count; block_cache_hit_count = other.block_cache_hit_count; block_read_count = other.block_read_count; block_read_byte = other.block_read_byte; block_read_time = other.block_read_time; block_cache_index_hit_count = other.block_cache_index_hit_count; index_block_read_count = other.index_block_read_count; block_cache_filter_hit_count = other.block_cache_filter_hit_count; filter_block_read_count = other.filter_block_read_count; compression_dict_block_read_count = other.compression_dict_block_read_count; block_checksum_time = other.block_checksum_time; block_decompress_time = other.block_decompress_time; get_read_bytes = other.get_read_bytes; multiget_read_bytes = other.multiget_read_bytes; iter_read_bytes = other.iter_read_bytes; internal_key_skipped_count = other.internal_key_skipped_count; internal_delete_skipped_count = other.internal_delete_skipped_count; internal_recent_skipped_count = other.internal_recent_skipped_count; internal_merge_count = other.internal_merge_count; write_wal_time = other.write_wal_time; get_snapshot_time = other.get_snapshot_time; get_from_memtable_time = other.get_from_memtable_time; get_from_memtable_count = other.get_from_memtable_count; get_post_process_time = other.get_post_process_time; get_from_output_files_time = other.get_from_output_files_time; seek_on_memtable_time = other.seek_on_memtable_time; seek_on_memtable_count = other.seek_on_memtable_count; next_on_memtable_count = other.next_on_memtable_count; prev_on_memtable_count = other.prev_on_memtable_count; seek_child_seek_time = other.seek_child_seek_time; seek_child_seek_count = other.seek_child_seek_count; seek_min_heap_time = other.seek_min_heap_time; seek_internal_seek_time = other.seek_internal_seek_time; find_next_user_entry_time = other.find_next_user_entry_time; write_pre_and_post_process_time = other.write_pre_and_post_process_time; write_memtable_time = other.write_memtable_time; write_delay_time = other.write_delay_time; write_thread_wait_nanos = other.write_thread_wait_nanos; write_scheduling_flushes_compactions_time = other.write_scheduling_flushes_compactions_time; db_mutex_lock_nanos = other.db_mutex_lock_nanos; db_condition_wait_nanos = other.db_condition_wait_nanos; merge_operator_time_nanos = other.merge_operator_time_nanos; read_index_block_nanos = other.read_index_block_nanos; read_filter_block_nanos = other.read_filter_block_nanos; new_table_block_iter_nanos = other.new_table_block_iter_nanos; new_table_iterator_nanos = other.new_table_iterator_nanos; block_seek_nanos = other.block_seek_nanos; find_table_nanos = other.find_table_nanos; bloom_memtable_hit_count = other.bloom_memtable_hit_count; bloom_memtable_miss_count = other.bloom_memtable_miss_count; bloom_sst_hit_count = other.bloom_sst_hit_count; bloom_sst_miss_count = other.bloom_sst_miss_count; key_lock_wait_time = other.key_lock_wait_time; key_lock_wait_count = other.key_lock_wait_count; env_new_sequential_file_nanos = other.env_new_sequential_file_nanos; env_new_random_access_file_nanos = other.env_new_random_access_file_nanos; env_new_writable_file_nanos = other.env_new_writable_file_nanos; env_reuse_writable_file_nanos = other.env_reuse_writable_file_nanos; env_new_random_rw_file_nanos = other.env_new_random_rw_file_nanos; env_new_directory_nanos = other.env_new_directory_nanos; env_file_exists_nanos = other.env_file_exists_nanos; env_get_children_nanos = other.env_get_children_nanos; env_get_children_file_attributes_nanos = other.env_get_children_file_attributes_nanos; env_delete_file_nanos = other.env_delete_file_nanos; env_create_dir_nanos = other.env_create_dir_nanos; env_create_dir_if_missing_nanos = other.env_create_dir_if_missing_nanos; env_delete_dir_nanos = other.env_delete_dir_nanos; env_get_file_size_nanos = other.env_get_file_size_nanos; env_get_file_modification_time_nanos = other.env_get_file_modification_time_nanos; env_rename_file_nanos = other.env_rename_file_nanos; env_link_file_nanos = other.env_link_file_nanos; env_lock_file_nanos = other.env_lock_file_nanos; env_unlock_file_nanos = other.env_unlock_file_nanos; env_new_logger_nanos = other.env_new_logger_nanos; get_cpu_nanos = other.get_cpu_nanos; iter_next_cpu_nanos = other.iter_next_cpu_nanos; iter_prev_cpu_nanos = other.iter_prev_cpu_nanos; iter_seek_cpu_nanos = other.iter_seek_cpu_nanos; if (per_level_perf_context_enabled && level_to_perf_context != nullptr) { ClearPerLevelPerfContext(); } if (other.level_to_perf_context != nullptr) { level_to_perf_context = new std::map(); *level_to_perf_context = *other.level_to_perf_context; } per_level_perf_context_enabled = other.per_level_perf_context_enabled; #endif return *this; } void PerfContext::Reset() { #ifndef NPERF_CONTEXT user_key_comparison_count = 0; block_cache_hit_count = 0; block_read_count = 0; block_read_byte = 0; block_read_time = 0; block_cache_index_hit_count = 0; index_block_read_count = 0; block_cache_filter_hit_count = 0; filter_block_read_count = 0; compression_dict_block_read_count = 0; block_checksum_time = 0; block_decompress_time = 0; get_read_bytes = 0; multiget_read_bytes = 0; iter_read_bytes = 0; internal_key_skipped_count = 0; internal_delete_skipped_count = 0; internal_recent_skipped_count = 0; internal_merge_count = 0; write_wal_time = 0; get_snapshot_time = 0; get_from_memtable_time = 0; get_from_memtable_count = 0; get_post_process_time = 0; get_from_output_files_time = 0; seek_on_memtable_time = 0; seek_on_memtable_count = 0; next_on_memtable_count = 0; prev_on_memtable_count = 0; seek_child_seek_time = 0; seek_child_seek_count = 0; seek_min_heap_time = 0; seek_internal_seek_time = 0; find_next_user_entry_time = 0; write_pre_and_post_process_time = 0; write_memtable_time = 0; write_delay_time = 0; write_thread_wait_nanos = 0; write_scheduling_flushes_compactions_time = 0; db_mutex_lock_nanos = 0; db_condition_wait_nanos = 0; merge_operator_time_nanos = 0; read_index_block_nanos = 0; read_filter_block_nanos = 0; new_table_block_iter_nanos = 0; new_table_iterator_nanos = 0; block_seek_nanos = 0; find_table_nanos = 0; bloom_memtable_hit_count = 0; bloom_memtable_miss_count = 0; bloom_sst_hit_count = 0; bloom_sst_miss_count = 0; key_lock_wait_time = 0; key_lock_wait_count = 0; env_new_sequential_file_nanos = 0; env_new_random_access_file_nanos = 0; env_new_writable_file_nanos = 0; env_reuse_writable_file_nanos = 0; env_new_random_rw_file_nanos = 0; env_new_directory_nanos = 0; env_file_exists_nanos = 0; env_get_children_nanos = 0; env_get_children_file_attributes_nanos = 0; env_delete_file_nanos = 0; env_create_dir_nanos = 0; env_create_dir_if_missing_nanos = 0; env_delete_dir_nanos = 0; env_get_file_size_nanos = 0; env_get_file_modification_time_nanos = 0; env_rename_file_nanos = 0; env_link_file_nanos = 0; env_lock_file_nanos = 0; env_unlock_file_nanos = 0; env_new_logger_nanos = 0; get_cpu_nanos = 0; iter_next_cpu_nanos = 0; iter_prev_cpu_nanos = 0; iter_seek_cpu_nanos = 0; if (per_level_perf_context_enabled && level_to_perf_context) { for (auto& kv : *level_to_perf_context) { kv.second.Reset(); } } #endif } #define PERF_CONTEXT_OUTPUT(counter) \ if (!exclude_zero_counters || (counter > 0)) { \ ss << #counter << " = " << counter << ", "; \ } #define PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(counter) \ if (per_level_perf_context_enabled && \ level_to_perf_context) { \ ss << #counter << " = "; \ for (auto& kv : *level_to_perf_context) { \ if (!exclude_zero_counters || (kv.second.counter > 0)) { \ ss << kv.second.counter << "@level" << kv.first << ", "; \ } \ } \ } void PerfContextByLevel::Reset() { #ifndef NPERF_CONTEXT bloom_filter_useful = 0; bloom_filter_full_positive = 0; bloom_filter_full_true_positive = 0; block_cache_hit_count = 0; block_cache_miss_count = 0; #endif } std::string PerfContext::ToString(bool exclude_zero_counters) const { #ifdef NPERF_CONTEXT (void)exclude_zero_counters; return ""; #else std::ostringstream ss; PERF_CONTEXT_OUTPUT(user_key_comparison_count); PERF_CONTEXT_OUTPUT(block_cache_hit_count); PERF_CONTEXT_OUTPUT(block_read_count); PERF_CONTEXT_OUTPUT(block_read_byte); PERF_CONTEXT_OUTPUT(block_read_time); PERF_CONTEXT_OUTPUT(block_cache_index_hit_count); PERF_CONTEXT_OUTPUT(index_block_read_count); PERF_CONTEXT_OUTPUT(block_cache_filter_hit_count); PERF_CONTEXT_OUTPUT(filter_block_read_count); PERF_CONTEXT_OUTPUT(compression_dict_block_read_count); PERF_CONTEXT_OUTPUT(block_checksum_time); PERF_CONTEXT_OUTPUT(block_decompress_time); PERF_CONTEXT_OUTPUT(get_read_bytes); PERF_CONTEXT_OUTPUT(multiget_read_bytes); PERF_CONTEXT_OUTPUT(iter_read_bytes); PERF_CONTEXT_OUTPUT(internal_key_skipped_count); PERF_CONTEXT_OUTPUT(internal_delete_skipped_count); PERF_CONTEXT_OUTPUT(internal_recent_skipped_count); PERF_CONTEXT_OUTPUT(internal_merge_count); PERF_CONTEXT_OUTPUT(write_wal_time); PERF_CONTEXT_OUTPUT(get_snapshot_time); PERF_CONTEXT_OUTPUT(get_from_memtable_time); PERF_CONTEXT_OUTPUT(get_from_memtable_count); PERF_CONTEXT_OUTPUT(get_post_process_time); PERF_CONTEXT_OUTPUT(get_from_output_files_time); PERF_CONTEXT_OUTPUT(seek_on_memtable_time); PERF_CONTEXT_OUTPUT(seek_on_memtable_count); PERF_CONTEXT_OUTPUT(next_on_memtable_count); PERF_CONTEXT_OUTPUT(prev_on_memtable_count); PERF_CONTEXT_OUTPUT(seek_child_seek_time); PERF_CONTEXT_OUTPUT(seek_child_seek_count); PERF_CONTEXT_OUTPUT(seek_min_heap_time); PERF_CONTEXT_OUTPUT(seek_internal_seek_time); PERF_CONTEXT_OUTPUT(find_next_user_entry_time); PERF_CONTEXT_OUTPUT(write_pre_and_post_process_time); PERF_CONTEXT_OUTPUT(write_memtable_time); PERF_CONTEXT_OUTPUT(write_thread_wait_nanos); PERF_CONTEXT_OUTPUT(write_scheduling_flushes_compactions_time); PERF_CONTEXT_OUTPUT(db_mutex_lock_nanos); PERF_CONTEXT_OUTPUT(db_condition_wait_nanos); PERF_CONTEXT_OUTPUT(merge_operator_time_nanos); PERF_CONTEXT_OUTPUT(write_delay_time); PERF_CONTEXT_OUTPUT(read_index_block_nanos); PERF_CONTEXT_OUTPUT(read_filter_block_nanos); PERF_CONTEXT_OUTPUT(new_table_block_iter_nanos); PERF_CONTEXT_OUTPUT(new_table_iterator_nanos); PERF_CONTEXT_OUTPUT(block_seek_nanos); PERF_CONTEXT_OUTPUT(find_table_nanos); PERF_CONTEXT_OUTPUT(bloom_memtable_hit_count); PERF_CONTEXT_OUTPUT(bloom_memtable_miss_count); PERF_CONTEXT_OUTPUT(bloom_sst_hit_count); PERF_CONTEXT_OUTPUT(bloom_sst_miss_count); PERF_CONTEXT_OUTPUT(key_lock_wait_time); PERF_CONTEXT_OUTPUT(key_lock_wait_count); PERF_CONTEXT_OUTPUT(env_new_sequential_file_nanos); PERF_CONTEXT_OUTPUT(env_new_random_access_file_nanos); PERF_CONTEXT_OUTPUT(env_new_writable_file_nanos); PERF_CONTEXT_OUTPUT(env_reuse_writable_file_nanos); PERF_CONTEXT_OUTPUT(env_new_random_rw_file_nanos); PERF_CONTEXT_OUTPUT(env_new_directory_nanos); PERF_CONTEXT_OUTPUT(env_file_exists_nanos); PERF_CONTEXT_OUTPUT(env_get_children_nanos); PERF_CONTEXT_OUTPUT(env_get_children_file_attributes_nanos); PERF_CONTEXT_OUTPUT(env_delete_file_nanos); PERF_CONTEXT_OUTPUT(env_create_dir_nanos); PERF_CONTEXT_OUTPUT(env_create_dir_if_missing_nanos); PERF_CONTEXT_OUTPUT(env_delete_dir_nanos); PERF_CONTEXT_OUTPUT(env_get_file_size_nanos); PERF_CONTEXT_OUTPUT(env_get_file_modification_time_nanos); PERF_CONTEXT_OUTPUT(env_rename_file_nanos); PERF_CONTEXT_OUTPUT(env_link_file_nanos); PERF_CONTEXT_OUTPUT(env_lock_file_nanos); PERF_CONTEXT_OUTPUT(env_unlock_file_nanos); PERF_CONTEXT_OUTPUT(env_new_logger_nanos); PERF_CONTEXT_OUTPUT(get_cpu_nanos); PERF_CONTEXT_OUTPUT(iter_next_cpu_nanos); PERF_CONTEXT_OUTPUT(iter_prev_cpu_nanos); PERF_CONTEXT_OUTPUT(iter_seek_cpu_nanos); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(block_cache_hit_count); PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(block_cache_miss_count); std::string str = ss.str(); str.erase(str.find_last_not_of(", ") + 1); return str; #endif } void PerfContext::EnablePerLevelPerfContext() { if (level_to_perf_context == nullptr) { level_to_perf_context = new std::map(); } per_level_perf_context_enabled = true; } void PerfContext::DisablePerLevelPerfContext(){ per_level_perf_context_enabled = false; } void PerfContext::ClearPerLevelPerfContext(){ if (level_to_perf_context != nullptr) { level_to_perf_context->clear(); delete level_to_perf_context; level_to_perf_context = nullptr; } per_level_perf_context_enabled = false; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/perf_context_imp.h000066400000000000000000000101521370372246700211470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "monitoring/perf_step_timer.h" #include "rocksdb/perf_context.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { #if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) extern PerfContext perf_context; #else #if defined(OS_SOLARIS) extern __thread PerfContext perf_context_; #define perf_context (*get_perf_context()) #else extern thread_local PerfContext perf_context; #endif #endif #if defined(NPERF_CONTEXT) #define PERF_TIMER_STOP(metric) #define PERF_TIMER_START(metric) #define PERF_TIMER_GUARD(metric) #define PERF_TIMER_GUARD_WITH_ENV(metric, env) #define PERF_CPU_TIMER_GUARD(metric, env) #define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \ ticker_type) #define PERF_TIMER_MEASURE(metric) #define PERF_COUNTER_ADD(metric, value) #define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) #else // Stop the timer and update the metric #define PERF_TIMER_STOP(metric) perf_step_timer_##metric.Stop(); #define PERF_TIMER_START(metric) perf_step_timer_##metric.Start(); // Declare and set start time of the timer #define PERF_TIMER_GUARD(metric) \ PerfStepTimer perf_step_timer_##metric(&(perf_context.metric)); \ perf_step_timer_##metric.Start(); // Declare and set start time of the timer #define PERF_TIMER_GUARD_WITH_ENV(metric, env) \ PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), env); \ perf_step_timer_##metric.Start(); // Declare and set start time of the timer #define PERF_CPU_TIMER_GUARD(metric, env) \ PerfStepTimer perf_step_timer_##metric( \ &(perf_context.metric), env, true, \ PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ perf_step_timer_##metric.Start(); #define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \ ticker_type) \ PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), nullptr, \ false, PerfLevel::kEnableTime, stats, \ ticker_type); \ if (condition) { \ perf_step_timer_##metric.Start(); \ } // Update metric with time elapsed since last START. start time is reset // to current timestamp. #define PERF_TIMER_MEASURE(metric) perf_step_timer_##metric.Measure(); // Increase metric value #define PERF_COUNTER_ADD(metric, value) \ if (perf_level >= PerfLevel::kEnableCount) { \ perf_context.metric += value; \ } // Increase metric value #define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \ if (perf_level >= PerfLevel::kEnableCount && \ perf_context.per_level_perf_context_enabled && \ perf_context.level_to_perf_context) { \ if ((*(perf_context.level_to_perf_context)).find(level) != \ (*(perf_context.level_to_perf_context)).end()) { \ (*(perf_context.level_to_perf_context))[level].metric += value; \ } \ else { \ PerfContextByLevel empty_context; \ (*(perf_context.level_to_perf_context))[level] = empty_context; \ (*(perf_context.level_to_perf_context))[level].metric += value; \ } \ } \ #endif } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/perf_level.cc000066400000000000000000000012771370372246700200730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include "monitoring/perf_level_imp.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL __thread PerfLevel perf_level = kEnableCount; #else PerfLevel perf_level = kEnableCount; #endif void SetPerfLevel(PerfLevel level) { assert(level > kUninitialized); assert(level < kOutOfBounds); perf_level = level; } PerfLevel GetPerfLevel() { return perf_level; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/perf_level_imp.h000066400000000000000000000010041370372246700205660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "rocksdb/perf_level.h" #include "port/port.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL extern __thread PerfLevel perf_level; #else extern PerfLevel perf_level; #endif } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/perf_step_timer.h000066400000000000000000000036511370372246700207770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "monitoring/perf_level_imp.h" #include "rocksdb/env.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { class PerfStepTimer { public: explicit PerfStepTimer( uint64_t* metric, Env* env = nullptr, bool use_cpu_time = false, PerfLevel enable_level = PerfLevel::kEnableTimeExceptForMutex, Statistics* statistics = nullptr, uint32_t ticker_type = 0) : perf_counter_enabled_(perf_level >= enable_level), use_cpu_time_(use_cpu_time), env_((perf_counter_enabled_ || statistics != nullptr) ? ((env != nullptr) ? env : Env::Default()) : nullptr), start_(0), metric_(metric), statistics_(statistics), ticker_type_(ticker_type) {} ~PerfStepTimer() { Stop(); } void Start() { if (perf_counter_enabled_ || statistics_ != nullptr) { start_ = time_now(); } } uint64_t time_now() { if (!use_cpu_time_) { return env_->NowNanos(); } else { return env_->NowCPUNanos(); } } void Measure() { if (start_) { uint64_t now = time_now(); *metric_ += now - start_; start_ = now; } } void Stop() { if (start_) { uint64_t duration = time_now() - start_; if (perf_counter_enabled_) { *metric_ += duration; } if (statistics_ != nullptr) { RecordTick(statistics_, ticker_type_, duration); } start_ = 0; } } private: const bool perf_counter_enabled_; const bool use_cpu_time_; Env* const env_; uint64_t start_; uint64_t* metric_; Statistics* statistics_; uint32_t ticker_type_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/persistent_stats_history.cc000066400000000000000000000142431370372246700231440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "monitoring/persistent_stats_history.h" #include #include #include #include "db/db_impl/db_impl.h" #include "port/likely.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // 10 digit seconds timestamp => [Sep 9, 2001 ~ Nov 20, 2286] const int kNowSecondsStringLength = 10; const std::string kFormatVersionKeyString = "__persistent_stats_format_version__"; const std::string kCompatibleVersionKeyString = "__persistent_stats_compatible_version__"; // Every release maintains two versions numbers for persistents stats: Current // format version and compatible format version. Current format version // designates what type of encoding will be used when writing to stats CF; // compatible format version designates the minimum format version that // can decode the stats CF encoded using the current format version. const uint64_t kStatsCFCurrentFormatVersion = 1; const uint64_t kStatsCFCompatibleFormatVersion = 1; Status DecodePersistentStatsVersionNumber(DBImpl* db, StatsVersionKeyType type, uint64_t* version_number) { if (type >= StatsVersionKeyType::kKeyTypeMax) { return Status::InvalidArgument("Invalid stats version key type provided"); } std::string key; if (type == StatsVersionKeyType::kFormatVersion) { key = kFormatVersionKeyString; } else if (type == StatsVersionKeyType::kCompatibleVersion) { key = kCompatibleVersionKeyString; } ReadOptions options; options.verify_checksums = true; std::string result; Status s = db->Get(options, db->PersistentStatsColumnFamily(), key, &result); if (!s.ok() || result.empty()) { return Status::NotFound("Persistent stats version key " + key + " not found."); } // read version_number but do nothing in current version *version_number = ParseUint64(result); return Status::OK(); } int EncodePersistentStatsKey(uint64_t now_seconds, const std::string& key, int size, char* buf) { char timestamp[kNowSecondsStringLength + 1]; // make time stamp string equal in length to allow sorting by time snprintf(timestamp, sizeof(timestamp), "%010d", static_cast(now_seconds)); timestamp[kNowSecondsStringLength] = '\0'; return snprintf(buf, size, "%s#%s", timestamp, key.c_str()); } void OptimizeForPersistentStats(ColumnFamilyOptions* cfo) { cfo->write_buffer_size = 2 << 20; cfo->target_file_size_base = 2 * 1048576; cfo->max_bytes_for_level_base = 10 * 1048576; cfo->soft_pending_compaction_bytes_limit = 256 * 1048576; cfo->hard_pending_compaction_bytes_limit = 1073741824ul; cfo->compression = kNoCompression; } PersistentStatsHistoryIterator::~PersistentStatsHistoryIterator() {} bool PersistentStatsHistoryIterator::Valid() const { return valid_; } Status PersistentStatsHistoryIterator::status() const { return status_; } void PersistentStatsHistoryIterator::Next() { // increment start_time by 1 to avoid infinite loop AdvanceIteratorByTime(GetStatsTime() + 1, end_time_); } uint64_t PersistentStatsHistoryIterator::GetStatsTime() const { return time_; } const std::map& PersistentStatsHistoryIterator::GetStatsMap() const { return stats_map_; } std::pair parseKey(const Slice& key, uint64_t start_time) { std::pair result; std::string key_str = key.ToString(); std::string::size_type pos = key_str.find("#"); // TODO(Zhongyi): add counters to track parse failures? if (pos == std::string::npos) { result.first = port::kMaxUint64; result.second.clear(); } else { uint64_t parsed_time = ParseUint64(key_str.substr(0, pos)); // skip entries with timestamp smaller than start_time if (parsed_time < start_time) { result.first = port::kMaxUint64; result.second = ""; } else { result.first = parsed_time; std::string key_resize = key_str.substr(pos + 1); result.second = key_resize; } } return result; } // advance the iterator to the next time between [start_time, end_time) // if success, update time_ and stats_map_ with new_time and stats_map void PersistentStatsHistoryIterator::AdvanceIteratorByTime(uint64_t start_time, uint64_t end_time) { // try to find next entry in stats_history_ map if (db_impl_ != nullptr) { ReadOptions ro; Iterator* iter = db_impl_->NewIterator(ro, db_impl_->PersistentStatsColumnFamily()); char timestamp[kNowSecondsStringLength + 1]; snprintf(timestamp, sizeof(timestamp), "%010d", static_cast(std::max(time_, start_time))); timestamp[kNowSecondsStringLength] = '\0'; iter->Seek(timestamp); // no more entries with timestamp >= start_time is found or version key // is found to be incompatible if (!iter->Valid()) { valid_ = false; delete iter; return; } time_ = parseKey(iter->key(), start_time).first; valid_ = true; // check parsed time and invalid if it exceeds end_time if (time_ > end_time) { valid_ = false; delete iter; return; } // find all entries with timestamp equal to time_ std::map new_stats_map; std::pair kv; for (; iter->Valid(); iter->Next()) { kv = parseKey(iter->key(), start_time); if (kv.first != time_) { break; } if (kv.second.compare(kFormatVersionKeyString) == 0) { continue; } new_stats_map[kv.second] = ParseUint64(iter->value().ToString()); } stats_map_.swap(new_stats_map); delete iter; } else { valid_ = false; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/persistent_stats_history.h000066400000000000000000000056341370372246700230120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "db/db_impl/db_impl.h" #include "rocksdb/stats_history.h" namespace ROCKSDB_NAMESPACE { extern const std::string kFormatVersionKeyString; extern const std::string kCompatibleVersionKeyString; extern const uint64_t kStatsCFCurrentFormatVersion; extern const uint64_t kStatsCFCompatibleFormatVersion; enum StatsVersionKeyType : uint32_t { kFormatVersion = 1, kCompatibleVersion = 2, kKeyTypeMax = 3 }; // Read the version number from persitent stats cf depending on type provided // stores the version number in `*version_number` // returns Status::OK() on success, or other status code on failure Status DecodePersistentStatsVersionNumber(DBImpl* db, StatsVersionKeyType type, uint64_t* version_number); // Encode timestamp and stats key into buf // Format: timestamp(10 digit) + '#' + key // Total length of encoded key will be capped at 100 bytes int EncodePersistentStatsKey(uint64_t timestamp, const std::string& key, int size, char* buf); void OptimizeForPersistentStats(ColumnFamilyOptions* cfo); class PersistentStatsHistoryIterator final : public StatsHistoryIterator { public: PersistentStatsHistoryIterator(uint64_t start_time, uint64_t end_time, DBImpl* db_impl) : time_(0), start_time_(start_time), end_time_(end_time), valid_(true), db_impl_(db_impl) { AdvanceIteratorByTime(start_time_, end_time_); } ~PersistentStatsHistoryIterator() override; bool Valid() const override; Status status() const override; void Next() override; uint64_t GetStatsTime() const override; const std::map& GetStatsMap() const override; private: // advance the iterator to the next stats history record with timestamp // between [start_time, end_time) void AdvanceIteratorByTime(uint64_t start_time, uint64_t end_time); // No copying allowed PersistentStatsHistoryIterator(const PersistentStatsHistoryIterator&) = delete; void operator=(const PersistentStatsHistoryIterator&) = delete; PersistentStatsHistoryIterator(PersistentStatsHistoryIterator&&) = delete; PersistentStatsHistoryIterator& operator=(PersistentStatsHistoryIterator&&) = delete; uint64_t time_; uint64_t start_time_; uint64_t end_time_; std::map stats_map_; Status status_; bool valid_; DBImpl* db_impl_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/statistics.cc000066400000000000000000000455641370372246700201510ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "monitoring/statistics.h" #include #include #include #include "port/likely.h" #include "rocksdb/statistics.h" namespace ROCKSDB_NAMESPACE { // The order of items listed in Tickers should be the same as // the order listed in TickersNameMap const std::vector> TickersNameMap = { {BLOCK_CACHE_MISS, "rocksdb.block.cache.miss"}, {BLOCK_CACHE_HIT, "rocksdb.block.cache.hit"}, {BLOCK_CACHE_ADD, "rocksdb.block.cache.add"}, {BLOCK_CACHE_ADD_FAILURES, "rocksdb.block.cache.add.failures"}, {BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss"}, {BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit"}, {BLOCK_CACHE_INDEX_ADD, "rocksdb.block.cache.index.add"}, {BLOCK_CACHE_INDEX_BYTES_INSERT, "rocksdb.block.cache.index.bytes.insert"}, {BLOCK_CACHE_INDEX_BYTES_EVICT, "rocksdb.block.cache.index.bytes.evict"}, {BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss"}, {BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit"}, {BLOCK_CACHE_FILTER_ADD, "rocksdb.block.cache.filter.add"}, {BLOCK_CACHE_FILTER_BYTES_INSERT, "rocksdb.block.cache.filter.bytes.insert"}, {BLOCK_CACHE_FILTER_BYTES_EVICT, "rocksdb.block.cache.filter.bytes.evict"}, {BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss"}, {BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit"}, {BLOCK_CACHE_DATA_ADD, "rocksdb.block.cache.data.add"}, {BLOCK_CACHE_DATA_BYTES_INSERT, "rocksdb.block.cache.data.bytes.insert"}, {BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"}, {BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"}, {BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"}, {BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"}, {BLOOM_FILTER_FULL_TRUE_POSITIVE, "rocksdb.bloom.filter.full.true.positive"}, {BLOOM_FILTER_MICROS, "rocksdb.bloom.filter.micros"}, {PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"}, {PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"}, {SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"}, {SIM_BLOCK_CACHE_MISS, "rocksdb.sim.block.cache.miss"}, {MEMTABLE_HIT, "rocksdb.memtable.hit"}, {MEMTABLE_MISS, "rocksdb.memtable.miss"}, {GET_HIT_L0, "rocksdb.l0.hit"}, {GET_HIT_L1, "rocksdb.l1.hit"}, {GET_HIT_L2_AND_UP, "rocksdb.l2andup.hit"}, {COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new"}, {COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete"}, {COMPACTION_KEY_DROP_RANGE_DEL, "rocksdb.compaction.key.drop.range_del"}, {COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user"}, {COMPACTION_RANGE_DEL_DROP_OBSOLETE, "rocksdb.compaction.range_del.drop.obsolete"}, {COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, "rocksdb.compaction.optimized.del.drop.obsolete"}, {COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"}, {NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"}, {NUMBER_KEYS_READ, "rocksdb.number.keys.read"}, {NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"}, {BYTES_WRITTEN, "rocksdb.bytes.written"}, {BYTES_READ, "rocksdb.bytes.read"}, {NUMBER_DB_SEEK, "rocksdb.number.db.seek"}, {NUMBER_DB_NEXT, "rocksdb.number.db.next"}, {NUMBER_DB_PREV, "rocksdb.number.db.prev"}, {NUMBER_DB_SEEK_FOUND, "rocksdb.number.db.seek.found"}, {NUMBER_DB_NEXT_FOUND, "rocksdb.number.db.next.found"}, {NUMBER_DB_PREV_FOUND, "rocksdb.number.db.prev.found"}, {ITER_BYTES_READ, "rocksdb.db.iter.bytes.read"}, {NO_FILE_CLOSES, "rocksdb.no.file.closes"}, {NO_FILE_OPENS, "rocksdb.no.file.opens"}, {NO_FILE_ERRORS, "rocksdb.no.file.errors"}, {STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros"}, {STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros"}, {STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros"}, {STALL_MICROS, "rocksdb.stall.micros"}, {DB_MUTEX_WAIT_MICROS, "rocksdb.db.mutex.wait.micros"}, {RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis"}, {NO_ITERATORS, "rocksdb.num.iterators"}, {NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get"}, {NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read"}, {NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read"}, {NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered"}, {NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures"}, {BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked"}, {BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful"}, {NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration"}, {GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"}, {BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss"}, {BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit"}, {BLOCK_CACHE_COMPRESSED_ADD, "rocksdb.block.cachecompressed.add"}, {BLOCK_CACHE_COMPRESSED_ADD_FAILURES, "rocksdb.block.cachecompressed.add.failures"}, {WAL_FILE_SYNCED, "rocksdb.wal.synced"}, {WAL_FILE_BYTES, "rocksdb.wal.bytes"}, {WRITE_DONE_BY_SELF, "rocksdb.write.self"}, {WRITE_DONE_BY_OTHER, "rocksdb.write.other"}, {WRITE_TIMEDOUT, "rocksdb.write.timeout"}, {WRITE_WITH_WAL, "rocksdb.write.wal"}, {COMPACT_READ_BYTES, "rocksdb.compact.read.bytes"}, {COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes"}, {FLUSH_WRITE_BYTES, "rocksdb.flush.write.bytes"}, {NUMBER_DIRECT_LOAD_TABLE_PROPERTIES, "rocksdb.number.direct.load.table.properties"}, {NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"}, {NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"}, {NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"}, {NUMBER_BLOCK_COMPRESSED, "rocksdb.number.block.compressed"}, {NUMBER_BLOCK_DECOMPRESSED, "rocksdb.number.block.decompressed"}, {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"}, {MERGE_OPERATION_TOTAL_TIME, "rocksdb.merge.operation.time.nanos"}, {FILTER_OPERATION_TOTAL_TIME, "rocksdb.filter.operation.time.nanos"}, {ROW_CACHE_HIT, "rocksdb.row.cache.hit"}, {ROW_CACHE_MISS, "rocksdb.row.cache.miss"}, {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"}, {READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"}, {NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"}, {NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"}, {BLOB_DB_NUM_PUT, "rocksdb.blobdb.num.put"}, {BLOB_DB_NUM_WRITE, "rocksdb.blobdb.num.write"}, {BLOB_DB_NUM_GET, "rocksdb.blobdb.num.get"}, {BLOB_DB_NUM_MULTIGET, "rocksdb.blobdb.num.multiget"}, {BLOB_DB_NUM_SEEK, "rocksdb.blobdb.num.seek"}, {BLOB_DB_NUM_NEXT, "rocksdb.blobdb.num.next"}, {BLOB_DB_NUM_PREV, "rocksdb.blobdb.num.prev"}, {BLOB_DB_NUM_KEYS_WRITTEN, "rocksdb.blobdb.num.keys.written"}, {BLOB_DB_NUM_KEYS_READ, "rocksdb.blobdb.num.keys.read"}, {BLOB_DB_BYTES_WRITTEN, "rocksdb.blobdb.bytes.written"}, {BLOB_DB_BYTES_READ, "rocksdb.blobdb.bytes.read"}, {BLOB_DB_WRITE_INLINED, "rocksdb.blobdb.write.inlined"}, {BLOB_DB_WRITE_INLINED_TTL, "rocksdb.blobdb.write.inlined.ttl"}, {BLOB_DB_WRITE_BLOB, "rocksdb.blobdb.write.blob"}, {BLOB_DB_WRITE_BLOB_TTL, "rocksdb.blobdb.write.blob.ttl"}, {BLOB_DB_BLOB_FILE_BYTES_WRITTEN, "rocksdb.blobdb.blob.file.bytes.written"}, {BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file.bytes.read"}, {BLOB_DB_BLOB_FILE_SYNCED, "rocksdb.blobdb.blob.file.synced"}, {BLOB_DB_BLOB_INDEX_EXPIRED_COUNT, "rocksdb.blobdb.blob.index.expired.count"}, {BLOB_DB_BLOB_INDEX_EXPIRED_SIZE, "rocksdb.blobdb.blob.index.expired.size"}, {BLOB_DB_BLOB_INDEX_EVICTED_COUNT, "rocksdb.blobdb.blob.index.evicted.count"}, {BLOB_DB_BLOB_INDEX_EVICTED_SIZE, "rocksdb.blobdb.blob.index.evicted.size"}, {BLOB_DB_GC_NUM_FILES, "rocksdb.blobdb.gc.num.files"}, {BLOB_DB_GC_NUM_NEW_FILES, "rocksdb.blobdb.gc.num.new.files"}, {BLOB_DB_GC_FAILURES, "rocksdb.blobdb.gc.failures"}, {BLOB_DB_GC_NUM_KEYS_OVERWRITTEN, "rocksdb.blobdb.gc.num.keys.overwritten"}, {BLOB_DB_GC_NUM_KEYS_EXPIRED, "rocksdb.blobdb.gc.num.keys.expired"}, {BLOB_DB_GC_NUM_KEYS_RELOCATED, "rocksdb.blobdb.gc.num.keys.relocated"}, {BLOB_DB_GC_BYTES_OVERWRITTEN, "rocksdb.blobdb.gc.bytes.overwritten"}, {BLOB_DB_GC_BYTES_EXPIRED, "rocksdb.blobdb.gc.bytes.expired"}, {BLOB_DB_GC_BYTES_RELOCATED, "rocksdb.blobdb.gc.bytes.relocated"}, {BLOB_DB_FIFO_NUM_FILES_EVICTED, "rocksdb.blobdb.fifo.num.files.evicted"}, {BLOB_DB_FIFO_NUM_KEYS_EVICTED, "rocksdb.blobdb.fifo.num.keys.evicted"}, {BLOB_DB_FIFO_BYTES_EVICTED, "rocksdb.blobdb.fifo.bytes.evicted"}, {TXN_PREPARE_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.prepare"}, {TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.old.commit.map"}, {TXN_DUPLICATE_KEY_OVERHEAD, "rocksdb.txn.overhead.duplicate.key"}, {TXN_SNAPSHOT_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.snapshot"}, {TXN_GET_TRY_AGAIN, "rocksdb.txn.get.tryagain"}, {NUMBER_MULTIGET_KEYS_FOUND, "rocksdb.number.multiget.keys.found"}, {NO_ITERATOR_CREATED, "rocksdb.num.iterator.created"}, {NO_ITERATOR_DELETED, "rocksdb.num.iterator.deleted"}, {BLOCK_CACHE_COMPRESSION_DICT_MISS, "rocksdb.block.cache.compression.dict.miss"}, {BLOCK_CACHE_COMPRESSION_DICT_HIT, "rocksdb.block.cache.compression.dict.hit"}, {BLOCK_CACHE_COMPRESSION_DICT_ADD, "rocksdb.block.cache.compression.dict.add"}, {BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT, "rocksdb.block.cache.compression.dict.bytes.insert"}, {BLOCK_CACHE_COMPRESSION_DICT_BYTES_EVICT, "rocksdb.block.cache.compression.dict.bytes.evict"}, {BLOCK_CACHE_ADD_REDUNDANT, "rocksdb.block.cache.add.redundant"}, {BLOCK_CACHE_INDEX_ADD_REDUNDANT, "rocksdb.block.cache.index.add.redundant"}, {BLOCK_CACHE_FILTER_ADD_REDUNDANT, "rocksdb.block.cache.filter.add.redundant"}, {BLOCK_CACHE_DATA_ADD_REDUNDANT, "rocksdb.block.cache.data.add.redundant"}, {BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT, "rocksdb.block.cache.compression.dict.add.redundant"}, {FILES_MARKED_TRASH, "rocksdb.files.marked.trash"}, {FILES_DELETED_IMMEDIATELY, "rocksdb.files.deleted.immediately"}, }; const std::vector> HistogramsNameMap = { {DB_GET, "rocksdb.db.get.micros"}, {DB_WRITE, "rocksdb.db.write.micros"}, {COMPACTION_TIME, "rocksdb.compaction.times.micros"}, {COMPACTION_CPU_TIME, "rocksdb.compaction.times.cpu_micros"}, {SUBCOMPACTION_SETUP_TIME, "rocksdb.subcompaction.setup.times.micros"}, {TABLE_SYNC_MICROS, "rocksdb.table.sync.micros"}, {COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros"}, {WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros"}, {MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros"}, {TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros"}, {DB_MULTIGET, "rocksdb.db.multiget.micros"}, {READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros"}, {READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros"}, {WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros"}, {STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"}, {STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"}, {STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"}, {HARD_RATE_LIMIT_DELAY_COUNT, "rocksdb.hard.rate.limit.delay.count"}, {SOFT_RATE_LIMIT_DELAY_COUNT, "rocksdb.soft.rate.limit.delay.count"}, {NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction"}, {DB_SEEK, "rocksdb.db.seek.micros"}, {WRITE_STALL, "rocksdb.db.write.stall"}, {SST_READ_MICROS, "rocksdb.sst.read.micros"}, {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"}, {BYTES_PER_READ, "rocksdb.bytes.per.read"}, {BYTES_PER_WRITE, "rocksdb.bytes.per.write"}, {BYTES_PER_MULTIGET, "rocksdb.bytes.per.multiget"}, {BYTES_COMPRESSED, "rocksdb.bytes.compressed"}, {BYTES_DECOMPRESSED, "rocksdb.bytes.decompressed"}, {COMPRESSION_TIMES_NANOS, "rocksdb.compression.times.nanos"}, {DECOMPRESSION_TIMES_NANOS, "rocksdb.decompression.times.nanos"}, {READ_NUM_MERGE_OPERANDS, "rocksdb.read.num.merge_operands"}, {BLOB_DB_KEY_SIZE, "rocksdb.blobdb.key.size"}, {BLOB_DB_VALUE_SIZE, "rocksdb.blobdb.value.size"}, {BLOB_DB_WRITE_MICROS, "rocksdb.blobdb.write.micros"}, {BLOB_DB_GET_MICROS, "rocksdb.blobdb.get.micros"}, {BLOB_DB_MULTIGET_MICROS, "rocksdb.blobdb.multiget.micros"}, {BLOB_DB_SEEK_MICROS, "rocksdb.blobdb.seek.micros"}, {BLOB_DB_NEXT_MICROS, "rocksdb.blobdb.next.micros"}, {BLOB_DB_PREV_MICROS, "rocksdb.blobdb.prev.micros"}, {BLOB_DB_BLOB_FILE_WRITE_MICROS, "rocksdb.blobdb.blob.file.write.micros"}, {BLOB_DB_BLOB_FILE_READ_MICROS, "rocksdb.blobdb.blob.file.read.micros"}, {BLOB_DB_BLOB_FILE_SYNC_MICROS, "rocksdb.blobdb.blob.file.sync.micros"}, {BLOB_DB_GC_MICROS, "rocksdb.blobdb.gc.micros"}, {BLOB_DB_COMPRESSION_MICROS, "rocksdb.blobdb.compression.micros"}, {BLOB_DB_DECOMPRESSION_MICROS, "rocksdb.blobdb.decompression.micros"}, {FLUSH_TIME, "rocksdb.db.flush.micros"}, {SST_BATCH_SIZE, "rocksdb.sst.batch.size"}, }; std::shared_ptr CreateDBStatistics() { return std::make_shared(nullptr); } StatisticsImpl::StatisticsImpl(std::shared_ptr stats) : stats_(std::move(stats)) {} StatisticsImpl::~StatisticsImpl() {} uint64_t StatisticsImpl::getTickerCount(uint32_t tickerType) const { MutexLock lock(&aggregate_lock_); return getTickerCountLocked(tickerType); } uint64_t StatisticsImpl::getTickerCountLocked(uint32_t tickerType) const { assert(tickerType < TICKER_ENUM_MAX); uint64_t res = 0; for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { res += per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType]; } return res; } void StatisticsImpl::histogramData(uint32_t histogramType, HistogramData* const data) const { MutexLock lock(&aggregate_lock_); getHistogramImplLocked(histogramType)->Data(data); } std::unique_ptr StatisticsImpl::getHistogramImplLocked( uint32_t histogramType) const { assert(histogramType < HISTOGRAM_ENUM_MAX); std::unique_ptr res_hist(new HistogramImpl()); for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { res_hist->Merge( per_core_stats_.AccessAtCore(core_idx)->histograms_[histogramType]); } return res_hist; } std::string StatisticsImpl::getHistogramString(uint32_t histogramType) const { MutexLock lock(&aggregate_lock_); return getHistogramImplLocked(histogramType)->ToString(); } void StatisticsImpl::setTickerCount(uint32_t tickerType, uint64_t count) { { MutexLock lock(&aggregate_lock_); setTickerCountLocked(tickerType, count); } if (stats_ && tickerType < TICKER_ENUM_MAX) { stats_->setTickerCount(tickerType, count); } } void StatisticsImpl::setTickerCountLocked(uint32_t tickerType, uint64_t count) { assert(tickerType < TICKER_ENUM_MAX); for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { if (core_idx == 0) { per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType] = count; } else { per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType] = 0; } } } uint64_t StatisticsImpl::getAndResetTickerCount(uint32_t tickerType) { uint64_t sum = 0; { MutexLock lock(&aggregate_lock_); assert(tickerType < TICKER_ENUM_MAX); for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { sum += per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType].exchange( 0, std::memory_order_relaxed); } } if (stats_ && tickerType < TICKER_ENUM_MAX) { stats_->setTickerCount(tickerType, 0); } return sum; } void StatisticsImpl::recordTick(uint32_t tickerType, uint64_t count) { assert(tickerType < TICKER_ENUM_MAX); per_core_stats_.Access()->tickers_[tickerType].fetch_add( count, std::memory_order_relaxed); if (stats_ && tickerType < TICKER_ENUM_MAX) { stats_->recordTick(tickerType, count); } } void StatisticsImpl::recordInHistogram(uint32_t histogramType, uint64_t value) { assert(histogramType < HISTOGRAM_ENUM_MAX); if (get_stats_level() <= StatsLevel::kExceptHistogramOrTimers) { return; } per_core_stats_.Access()->histograms_[histogramType].Add(value); if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) { stats_->recordInHistogram(histogramType, value); } } Status StatisticsImpl::Reset() { MutexLock lock(&aggregate_lock_); for (uint32_t i = 0; i < TICKER_ENUM_MAX; ++i) { setTickerCountLocked(i, 0); } for (uint32_t i = 0; i < HISTOGRAM_ENUM_MAX; ++i) { for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { per_core_stats_.AccessAtCore(core_idx)->histograms_[i].Clear(); } } return Status::OK(); } namespace { // a buffer size used for temp string buffers const int kTmpStrBufferSize = 200; } // namespace std::string StatisticsImpl::ToString() const { MutexLock lock(&aggregate_lock_); std::string res; res.reserve(20000); for (const auto& t : TickersNameMap) { assert(t.first < TICKER_ENUM_MAX); char buffer[kTmpStrBufferSize]; snprintf(buffer, kTmpStrBufferSize, "%s COUNT : %" PRIu64 "\n", t.second.c_str(), getTickerCountLocked(t.first)); res.append(buffer); } for (const auto& h : HistogramsNameMap) { assert(h.first < HISTOGRAM_ENUM_MAX); char buffer[kTmpStrBufferSize]; HistogramData hData; getHistogramImplLocked(h.first)->Data(&hData); // don't handle failures - buffer should always be big enough and arguments // should be provided correctly int ret = snprintf(buffer, kTmpStrBufferSize, "%s P50 : %f P95 : %f P99 : %f P100 : %f COUNT : %" PRIu64 " SUM : %" PRIu64 "\n", h.second.c_str(), hData.median, hData.percentile95, hData.percentile99, hData.max, hData.count, hData.sum); if (ret < 0 || ret >= kTmpStrBufferSize) { assert(false); continue; } res.append(buffer); } res.shrink_to_fit(); return res; } bool StatisticsImpl::getTickerMap( std::map* stats_map) const { assert(stats_map); if (!stats_map) return false; stats_map->clear(); MutexLock lock(&aggregate_lock_); for (const auto& t : TickersNameMap) { assert(t.first < TICKER_ENUM_MAX); (*stats_map)[t.second.c_str()] = getTickerCountLocked(t.first); } return true; } bool StatisticsImpl::HistEnabledForType(uint32_t type) const { return type < HISTOGRAM_ENUM_MAX; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/statistics.h000066400000000000000000000116001370372246700177730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "rocksdb/statistics.h" #include #include #include #include #include "monitoring/histogram.h" #include "port/likely.h" #include "port/port.h" #include "util/core_local.h" #include "util/mutexlock.h" #ifdef __clang__ #define ROCKSDB_FIELD_UNUSED __attribute__((__unused__)) #else #define ROCKSDB_FIELD_UNUSED #endif // __clang__ #ifndef STRINGIFY #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) #endif namespace ROCKSDB_NAMESPACE { enum TickersInternal : uint32_t { INTERNAL_TICKER_ENUM_START = TICKER_ENUM_MAX, INTERNAL_TICKER_ENUM_MAX }; enum HistogramsInternal : uint32_t { INTERNAL_HISTOGRAM_START = HISTOGRAM_ENUM_MAX, INTERNAL_HISTOGRAM_ENUM_MAX }; class StatisticsImpl : public Statistics { public: StatisticsImpl(std::shared_ptr stats); virtual ~StatisticsImpl(); virtual uint64_t getTickerCount(uint32_t ticker_type) const override; virtual void histogramData(uint32_t histogram_type, HistogramData* const data) const override; std::string getHistogramString(uint32_t histogram_type) const override; virtual void setTickerCount(uint32_t ticker_type, uint64_t count) override; virtual uint64_t getAndResetTickerCount(uint32_t ticker_type) override; virtual void recordTick(uint32_t ticker_type, uint64_t count) override; // The function is implemented for now for backward compatibility reason. // In case a user explictly calls it, for example, they may have a wrapped // Statistics object, passing the call to recordTick() into here, nothing // will break. void measureTime(uint32_t histogramType, uint64_t time) override { recordInHistogram(histogramType, time); } virtual void recordInHistogram(uint32_t histogram_type, uint64_t value) override; virtual Status Reset() override; virtual std::string ToString() const override; virtual bool getTickerMap(std::map*) const override; virtual bool HistEnabledForType(uint32_t type) const override; private: // If non-nullptr, forwards updates to the object pointed to by `stats_`. std::shared_ptr stats_; // Synchronizes anything that operates across other cores' local data, // such that operations like Reset() can be performed atomically. mutable port::Mutex aggregate_lock_; // The ticker/histogram data are stored in this structure, which we will store // per-core. It is cache-aligned, so tickers/histograms belonging to different // cores can never share the same cache line. // // Alignment attributes expand to nothing depending on the platform struct ALIGN_AS(CACHE_LINE_SIZE) StatisticsData { std::atomic_uint_fast64_t tickers_[INTERNAL_TICKER_ENUM_MAX] = {{0}}; HistogramImpl histograms_[INTERNAL_HISTOGRAM_ENUM_MAX]; #ifndef HAVE_ALIGNED_NEW char padding[(CACHE_LINE_SIZE - (INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) + INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) % CACHE_LINE_SIZE)] ROCKSDB_FIELD_UNUSED; #endif void *operator new(size_t s) { return port::cacheline_aligned_alloc(s); } void *operator new[](size_t s) { return port::cacheline_aligned_alloc(s); } void operator delete(void *p) { port::cacheline_aligned_free(p); } void operator delete[](void *p) { port::cacheline_aligned_free(p); } }; static_assert(sizeof(StatisticsData) % CACHE_LINE_SIZE == 0, "Expected " TOSTRING(CACHE_LINE_SIZE) "-byte aligned"); CoreLocalArray per_core_stats_; uint64_t getTickerCountLocked(uint32_t ticker_type) const; std::unique_ptr getHistogramImplLocked( uint32_t histogram_type) const; void setTickerCountLocked(uint32_t ticker_type, uint64_t count); }; // Utility functions inline void RecordInHistogram(Statistics* statistics, uint32_t histogram_type, uint64_t value) { if (statistics) { statistics->recordInHistogram(histogram_type, value); } } inline void RecordTimeToHistogram(Statistics* statistics, uint32_t histogram_type, uint64_t value) { if (statistics) { statistics->reportTimeToHistogram(histogram_type, value); } } inline void RecordTick(Statistics* statistics, uint32_t ticker_type, uint64_t count = 1) { if (statistics) { statistics->recordTick(ticker_type, count); } } inline void SetTickerCount(Statistics* statistics, uint32_t ticker_type, uint64_t count) { if (statistics) { statistics->setTickerCount(ticker_type, count); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/statistics_test.cc000066400000000000000000000027711370372246700212010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "port/stack_trace.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "rocksdb/statistics.h" namespace ROCKSDB_NAMESPACE { class StatisticsTest : public testing::Test {}; // Sanity check to make sure that contents and order of TickersNameMap // match Tickers enum TEST_F(StatisticsTest, SanityTickers) { EXPECT_EQ(static_cast(Tickers::TICKER_ENUM_MAX), TickersNameMap.size()); for (uint32_t t = 0; t < Tickers::TICKER_ENUM_MAX; t++) { auto pair = TickersNameMap[static_cast(t)]; ASSERT_EQ(pair.first, t) << "Miss match at " << pair.second; } } // Sanity check to make sure that contents and order of HistogramsNameMap // match Tickers enum TEST_F(StatisticsTest, SanityHistograms) { EXPECT_EQ(static_cast(Histograms::HISTOGRAM_ENUM_MAX), HistogramsNameMap.size()); for (uint32_t h = 0; h < Histograms::HISTOGRAM_ENUM_MAX; h++) { auto pair = HistogramsNameMap[static_cast(h)]; ASSERT_EQ(pair.first, h) << "Miss match at " << pair.second; } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/monitoring/stats_history_test.cc000066400000000000000000000617011370372246700217240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "monitoring/persistent_stats_history.h" #include "options/options_helper.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/stats_history.h" #include "test_util/sync_point.h" #include "test_util/testutil.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { class StatsHistoryTest : public DBTestBase { public: StatsHistoryTest() : DBTestBase("/stats_history_test") {} }; #ifndef ROCKSDB_LITE TEST_F(StatsHistoryTest, RunStatsDumpPeriodSec) { Options options; options.create_if_missing = true; options.stats_dump_period_sec = 5; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); int counter = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); #if defined(OS_MACOSX) && !defined(NDEBUG) ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { uint64_t time_us = *reinterpret_cast(arg); if (time_us < mock_env->RealNowMicros()) { *reinterpret_cast(arg) = mock_env->RealNowMicros() + 1000; } }); #endif // OS_MACOSX && !NDEBUG ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::DumpStats:1", [&](void* /*arg*/) { counter++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_dump_period_sec); dbfull()->TEST_WaitForDumpStatsRun([&] { mock_env->set_current_time(5); }); ASSERT_GE(counter, 1); // Test cacel job through SetOptions ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "0"}})); int old_val = counter; for (int i = 6; i < 20; ++i) { dbfull()->TEST_WaitForDumpStatsRun([&] { mock_env->set_current_time(i); }); } ASSERT_EQ(counter, old_val); Close(); } // Test persistent stats background thread scheduling and cancelling TEST_F(StatsHistoryTest, StatsPersistScheduling) { Options options; options.create_if_missing = true; options.stats_persist_period_sec = 5; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); #if defined(OS_MACOSX) && !defined(NDEBUG) ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { uint64_t time_us = *reinterpret_cast(arg); if (time_us < mock_env->RealNowMicros()) { *reinterpret_cast(arg) = mock_env->RealNowMicros() + 1000; } }); #endif // OS_MACOSX && !NDEBUG int counter = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::PersistStats:Entry", [&](void* /*arg*/) { counter++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_persist_period_sec); dbfull()->TEST_WaitForPersistStatsRun([&] { mock_env->set_current_time(5); }); ASSERT_GE(counter, 1); // Test cacel job through SetOptions ASSERT_TRUE(dbfull()->TEST_IsPersistentStatsEnabled()); ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}})); ASSERT_FALSE(dbfull()->TEST_IsPersistentStatsEnabled()); Close(); } // Test enabling persistent stats for the first time TEST_F(StatsHistoryTest, PersistentStatsFreshInstall) { Options options; options.create_if_missing = true; options.stats_persist_period_sec = 0; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); #if defined(OS_MACOSX) && !defined(NDEBUG) ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { uint64_t time_us = *reinterpret_cast(arg); if (time_us < mock_env->RealNowMicros()) { *reinterpret_cast(arg) = mock_env->RealNowMicros() + 1000; } }); #endif // OS_MACOSX && !NDEBUG int counter = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::PersistStats:Entry", [&](void* /*arg*/) { counter++; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "5"}})); ASSERT_EQ(5u, dbfull()->GetDBOptions().stats_persist_period_sec); dbfull()->TEST_WaitForPersistStatsRun([&] { mock_env->set_current_time(5); }); ASSERT_GE(counter, 1); Close(); } // TODO(Zhongyi): Move persistent stats related tests to a separate file TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) { Options options; options.create_if_missing = true; options.stats_persist_period_sec = 5; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); #if defined(OS_MACOSX) && !defined(NDEBUG) ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { uint64_t time_us = *reinterpret_cast(arg); if (time_us < mock_env->RealNowMicros()) { *reinterpret_cast(arg) = mock_env->RealNowMicros() + 1000; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); #endif // OS_MACOSX && !NDEBUG CreateColumnFamilies({"pikachu"}, options); ASSERT_OK(Put("foo", "bar")); ReopenWithColumnFamilies({"default", "pikachu"}, options); int mock_time = 1; // Wait for stats persist to finish dbfull()->TEST_WaitForPersistStatsRun([&] { mock_env->set_current_time(5); }); std::unique_ptr stats_iter; db_->GetStatsHistory(0 /*start_time*/, 6 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); // disabled stats snapshots ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}})); size_t stats_count = 0; for (; stats_iter->Valid(); stats_iter->Next()) { auto stats_map = stats_iter->GetStatsMap(); ASSERT_EQ(stats_iter->GetStatsTime(), 5); stats_count += stats_map.size(); } ASSERT_GT(stats_count, 0); // Wait a bit and verify no more stats are found for (mock_time = 6; mock_time < 20; ++mock_time) { dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(mock_time); }); } db_->GetStatsHistory(0 /*start_time*/, 20 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); size_t stats_count_new = 0; for (; stats_iter->Valid(); stats_iter->Next()) { stats_count_new += stats_iter->GetStatsMap().size(); } ASSERT_EQ(stats_count_new, stats_count); Close(); } TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) { Options options; options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.stats_persist_period_sec = 1; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); #if defined(OS_MACOSX) && !defined(NDEBUG) ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) { uint64_t time_us = *reinterpret_cast(arg); if (time_us < mock_env->RealNowMicros()) { *reinterpret_cast(arg) = mock_env->RealNowMicros() + 1000; } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); #endif // OS_MACOSX && !NDEBUG CreateColumnFamilies({"pikachu"}, options); ASSERT_OK(Put("foo", "bar")); ReopenWithColumnFamilies({"default", "pikachu"}, options); // some random operation to populate statistics ASSERT_OK(Delete("foo")); ASSERT_OK(Put("sol", "sol")); ASSERT_OK(Put("epic", "epic")); ASSERT_OK(Put("ltd", "ltd")); ASSERT_EQ("sol", Get("sol")); ASSERT_EQ("epic", Get("epic")); ASSERT_EQ("ltd", Get("ltd")); Iterator* iterator = db_->NewIterator(ReadOptions()); for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { ASSERT_TRUE(iterator->key() == iterator->value()); } delete iterator; ASSERT_OK(Flush()); ASSERT_OK(Delete("sol")); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); int mock_time = 1; // Wait for stats persist to finish for (; mock_time < 5; ++mock_time) { dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(mock_time); }); } // second round of ops ASSERT_OK(Put("saigon", "saigon")); ASSERT_OK(Put("noodle talk", "noodle talk")); ASSERT_OK(Put("ping bistro", "ping bistro")); iterator = db_->NewIterator(ReadOptions()); for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { ASSERT_TRUE(iterator->key() == iterator->value()); } delete iterator; ASSERT_OK(Flush()); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); for (; mock_time < 10; ++mock_time) { dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(mock_time); }); } std::unique_ptr stats_iter; db_->GetStatsHistory(0 /*start_time*/, 10 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); size_t stats_count = 0; int slice_count = 0; for (; stats_iter->Valid(); stats_iter->Next()) { slice_count++; auto stats_map = stats_iter->GetStatsMap(); stats_count += stats_map.size(); } size_t stats_history_size = dbfull()->TEST_EstimateInMemoryStatsHistorySize(); ASSERT_GE(slice_count, 9); ASSERT_GE(stats_history_size, 13000); // capping memory cost at 13000 bytes since one slice is around 10000~13000 ASSERT_OK(dbfull()->SetDBOptions({{"stats_history_buffer_size", "13000"}})); ASSERT_EQ(13000, dbfull()->GetDBOptions().stats_history_buffer_size); // Wait for stats persist to finish for (; mock_time < 20; ++mock_time) { dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(mock_time); }); } db_->GetStatsHistory(0 /*start_time*/, 20 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); size_t stats_count_reopen = 0; slice_count = 0; for (; stats_iter->Valid(); stats_iter->Next()) { slice_count++; auto stats_map = stats_iter->GetStatsMap(); stats_count_reopen += stats_map.size(); } size_t stats_history_size_reopen = dbfull()->TEST_EstimateInMemoryStatsHistorySize(); // only one slice can fit under the new stats_history_buffer_size ASSERT_LT(slice_count, 2); ASSERT_TRUE(stats_history_size_reopen < 13000 && stats_history_size_reopen > 0); ASSERT_TRUE(stats_count_reopen < stats_count && stats_count_reopen > 0); Close(); // TODO: may also want to verify stats timestamp to make sure we are purging // the correct stats snapshot } int countkeys(Iterator* iter) { int count = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { count++; } return count; } TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) { Options options; options.create_if_missing = true; options.stats_persist_period_sec = 5; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.persist_stats_to_disk = true; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); CreateColumnFamilies({"pikachu"}, options); ASSERT_OK(Put("foo", "bar")); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ(Get("foo"), "bar"); // Wait for stats persist to finish dbfull()->TEST_WaitForPersistStatsRun([&] { mock_env->set_current_time(5); }); auto iter = db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); int key_count1 = countkeys(iter); delete iter; dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(10); }); iter = db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); int key_count2 = countkeys(iter); delete iter; dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(15); }); iter = db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); int key_count3 = countkeys(iter); delete iter; ASSERT_GE(key_count2, key_count1); ASSERT_GE(key_count3, key_count2); ASSERT_EQ(key_count3 - key_count2, key_count2 - key_count1); std::unique_ptr stats_iter; db_->GetStatsHistory(0 /*start_time*/, 16 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); size_t stats_count = 0; int slice_count = 0; int non_zero_count = 0; for (int i = 1; stats_iter->Valid(); stats_iter->Next(), i++) { slice_count++; auto stats_map = stats_iter->GetStatsMap(); ASSERT_EQ(stats_iter->GetStatsTime(), 5 * i); for (auto& stat : stats_map) { if (stat.second != 0) { non_zero_count++; } } stats_count += stats_map.size(); } ASSERT_EQ(slice_count, 3); // 2 extra keys for format version ASSERT_EQ(stats_count, key_count3 - 2); // verify reopen will not cause data loss ReopenWithColumnFamilies({"default", "pikachu"}, options); db_->GetStatsHistory(0 /*start_time*/, 16 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); size_t stats_count_reopen = 0; int slice_count_reopen = 0; int non_zero_count_recover = 0; for (; stats_iter->Valid(); stats_iter->Next()) { slice_count_reopen++; auto stats_map = stats_iter->GetStatsMap(); for (auto& stat : stats_map) { if (stat.second != 0) { non_zero_count_recover++; } } stats_count_reopen += stats_map.size(); } ASSERT_EQ(non_zero_count, non_zero_count_recover); ASSERT_EQ(slice_count, slice_count_reopen); ASSERT_EQ(stats_count, stats_count_reopen); Close(); } // Test persisted stats matches the value found in options.statistics and // the stats value retains after DB reopen TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) { Options options; options.create_if_missing = true; options.stats_persist_period_sec = 5; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.persist_stats_to_disk = true; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); std::map stats_map_before; ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_before)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); CreateColumnFamilies({"pikachu"}, options); ASSERT_OK(Put("foo", "bar")); ReopenWithColumnFamilies({"default", "pikachu"}, options); ASSERT_EQ(Get("foo"), "bar"); // Wait for stats persist to finish dbfull()->TEST_WaitForPersistStatsRun([&] { mock_env->set_current_time(5); }); auto iter = db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); countkeys(iter); delete iter; dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(10); }); iter = db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); countkeys(iter); delete iter; dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(15); }); iter = db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); countkeys(iter); delete iter; dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(20); }); std::map stats_map_after; ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_after)); std::unique_ptr stats_iter; db_->GetStatsHistory(0 /*start_time*/, 21 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); std::string sample = "rocksdb.num.iterator.deleted"; uint64_t recovered_value = 0; for (int i = 1; stats_iter->Valid(); stats_iter->Next(), ++i) { auto stats_map = stats_iter->GetStatsMap(); ASSERT_EQ(stats_iter->GetStatsTime(), 5 * i); for (const auto& stat : stats_map) { if (sample.compare(stat.first) == 0) { recovered_value += stat.second; } } } ASSERT_EQ(recovered_value, stats_map_after[sample]); // test stats value retains after recovery ReopenWithColumnFamilies({"default", "pikachu"}, options); db_->GetStatsHistory(0 /*start_time*/, 21 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); uint64_t new_recovered_value = 0; for (int i = 1; stats_iter->Valid(); stats_iter->Next(), i++) { auto stats_map = stats_iter->GetStatsMap(); ASSERT_EQ(stats_iter->GetStatsTime(), 5 * i); for (const auto& stat : stats_map) { if (sample.compare(stat.first) == 0) { new_recovered_value += stat.second; } } } ASSERT_EQ(recovered_value, new_recovered_value); // TODO(Zhongyi): also add test to read raw values from disk and verify // correctness Close(); } // TODO(Zhongyi): add test for different format versions TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) { Options options; options.create_if_missing = true; options.stats_persist_period_sec = 5; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.persist_stats_to_disk = true; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); ASSERT_OK(TryReopen(options)); CreateColumnFamilies({"one", "two", "three"}, options); ASSERT_OK(Put(1, "foo", "bar")); ReopenWithColumnFamilies({"default", "one", "two", "three"}, options); ASSERT_EQ(Get(2, "foo"), "bar"); CreateColumnFamilies({"four"}, options); ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options); ASSERT_EQ(Get(2, "foo"), "bar"); dbfull()->TEST_WaitForPersistStatsRun([&] { mock_env->set_current_time(5); }); auto iter = db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); int key_count = countkeys(iter); delete iter; ASSERT_GE(key_count, 0); uint64_t num_write_wal = 0; std::string sample = "rocksdb.write.wal"; std::unique_ptr stats_iter; db_->GetStatsHistory(0 /*start_time*/, 5 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); for (; stats_iter->Valid(); stats_iter->Next()) { auto stats_map = stats_iter->GetStatsMap(); for (const auto& stat : stats_map) { if (sample.compare(stat.first) == 0) { num_write_wal += stat.second; } } } stats_iter.reset(); ASSERT_EQ(num_write_wal, 2); options.persist_stats_to_disk = false; ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options); int cf_count = 0; for (auto cfd : *dbfull()->versions_->GetColumnFamilySet()) { (void)cfd; cf_count++; } // persistent stats cf will be implicitly opened even if // persist_stats_to_disk is false ASSERT_EQ(cf_count, 6); ASSERT_EQ(Get(2, "foo"), "bar"); // attempt to create column family using same name, should fail ColumnFamilyOptions cf_opts(options); ColumnFamilyHandle* handle; ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName, &handle)); options.persist_stats_to_disk = true; ReopenWithColumnFamilies({"default", "one", "two", "three", "four"}, options); ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName, &handle)); // verify stats is not affected by prior failed CF creation db_->GetStatsHistory(0 /*start_time*/, 5 /*end_time*/, &stats_iter); ASSERT_TRUE(stats_iter != nullptr); num_write_wal = 0; for (; stats_iter->Valid(); stats_iter->Next()) { auto stats_map = stats_iter->GetStatsMap(); for (const auto& stat : stats_map) { if (sample.compare(stat.first) == 0) { num_write_wal += stat.second; } } } ASSERT_EQ(num_write_wal, 2); Close(); Destroy(options); } TEST_F(StatsHistoryTest, PersistentStatsReadOnly) { ASSERT_OK(Put("bar", "v2")); Close(); auto options = CurrentOptions(); options.stats_persist_period_sec = 5; options.persist_stats_to_disk = true; assert(options.env == env_); ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ("v2", Get("bar")); Close(); // Reopen and flush memtable. ASSERT_OK(TryReopen(options)); Flush(); Close(); // Now check keys in read only mode. ASSERT_OK(ReadOnlyReopen(options)); } TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) { Options options; options.create_if_missing = true; options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb options.stats_persist_period_sec = 5; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.persist_stats_to_disk = true; std::unique_ptr mock_env; mock_env.reset(new ROCKSDB_NAMESPACE::MockTimeEnv(env_)); mock_env->set_current_time(0); // in seconds options.env = mock_env.get(); CreateColumnFamilies({"pikachu"}, options); ReopenWithColumnFamilies({"default", "pikachu"}, options); ColumnFamilyData* cfd_default = static_cast(dbfull()->DefaultColumnFamily()) ->cfd(); ColumnFamilyData* cfd_stats = static_cast( dbfull()->PersistentStatsColumnFamily()) ->cfd(); ColumnFamilyData* cfd_test = static_cast(handles_[1])->cfd(); ASSERT_OK(Put("foo", "v0")); ASSERT_OK(Put("bar", "v0")); ASSERT_EQ("v0", Get("bar")); ASSERT_EQ("v0", Get("foo")); ASSERT_OK(Put(1, "Eevee", "v0")); ASSERT_EQ("v0", Get(1, "Eevee")); dbfull()->TEST_WaitForPersistStatsRun([&] { mock_env->set_current_time(5); }); // writing to all three cf, flush default cf // LogNumbers: default: 14, stats: 4, pikachu: 4 ASSERT_OK(Flush()); ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber()); ASSERT_LT(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber()); ASSERT_OK(Put("foo1", "v1")); ASSERT_OK(Put("bar1", "v1")); ASSERT_EQ("v1", Get("bar1")); ASSERT_EQ("v1", Get("foo1")); ASSERT_OK(Put(1, "Vaporeon", "v1")); ASSERT_EQ("v1", Get(1, "Vaporeon")); // writing to default and test cf, flush test cf // LogNumbers: default: 14, stats: 16, pikachu: 16 ASSERT_OK(Flush(1)); ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber()); ASSERT_GT(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber()); ASSERT_OK(Put("foo2", "v2")); ASSERT_OK(Put("bar2", "v2")); ASSERT_EQ("v2", Get("bar2")); ASSERT_EQ("v2", Get("foo2")); dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(10); }); // writing to default and stats cf, flushing default cf // LogNumbers: default: 19, stats: 19, pikachu: 19 ASSERT_OK(Flush()); ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber()); ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber()); ASSERT_OK(Put("foo3", "v3")); ASSERT_OK(Put("bar3", "v3")); ASSERT_EQ("v3", Get("bar3")); ASSERT_EQ("v3", Get("foo3")); ASSERT_OK(Put(1, "Jolteon", "v3")); ASSERT_EQ("v3", Get(1, "Jolteon")); dbfull()->TEST_WaitForPersistStatsRun( [&] { mock_env->set_current_time(15); }); // writing to all three cf, flushing test cf // LogNumbers: default: 19, stats: 19, pikachu: 22 ASSERT_OK(Flush(1)); ASSERT_LT(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber()); ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber()); Close(); } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/monitoring/thread_status_impl.cc000066400000000000000000000115001370372246700216310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include "rocksdb/env.h" #include "rocksdb/thread_status.h" #include "util/string_util.h" #include "util/thread_operation.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_USING_THREAD_STATUS std::string ThreadStatus::GetThreadTypeName( ThreadStatus::ThreadType thread_type) { switch (thread_type) { case ThreadStatus::ThreadType::HIGH_PRIORITY: return "High Pri"; case ThreadStatus::ThreadType::LOW_PRIORITY: return "Low Pri"; case ThreadStatus::ThreadType::USER: return "User"; case ThreadStatus::ThreadType::BOTTOM_PRIORITY: return "Bottom Pri"; case ThreadStatus::ThreadType::NUM_THREAD_TYPES: assert(false); } return "Unknown"; } const std::string& ThreadStatus::GetOperationName( ThreadStatus::OperationType op_type) { if (op_type < 0 || op_type >= NUM_OP_TYPES) { return global_operation_table[OP_UNKNOWN].name; } return global_operation_table[op_type].name; } const std::string& ThreadStatus::GetOperationStageName( ThreadStatus::OperationStage stage) { if (stage < 0 || stage >= NUM_OP_STAGES) { return global_op_stage_table[STAGE_UNKNOWN].name; } return global_op_stage_table[stage].name; } const std::string& ThreadStatus::GetStateName( ThreadStatus::StateType state_type) { if (state_type < 0 || state_type >= NUM_STATE_TYPES) { return global_state_table[STATE_UNKNOWN].name; } return global_state_table[state_type].name; } const std::string ThreadStatus::MicrosToString(uint64_t micros) { if (micros == 0) { return ""; } const int kBufferLen = 100; char buffer[kBufferLen]; AppendHumanMicros(micros, buffer, kBufferLen, false); return std::string(buffer); } const std::string& ThreadStatus::GetOperationPropertyName( ThreadStatus::OperationType op_type, int i) { static const std::string empty_str = ""; switch (op_type) { case ThreadStatus::OP_COMPACTION: if (i >= NUM_COMPACTION_PROPERTIES) { return empty_str; } return compaction_operation_properties[i].name; case ThreadStatus::OP_FLUSH: if (i >= NUM_FLUSH_PROPERTIES) { return empty_str; } return flush_operation_properties[i].name; default: return empty_str; } } std::map ThreadStatus::InterpretOperationProperties( ThreadStatus::OperationType op_type, const uint64_t* op_properties) { int num_properties; switch (op_type) { case OP_COMPACTION: num_properties = NUM_COMPACTION_PROPERTIES; break; case OP_FLUSH: num_properties = NUM_FLUSH_PROPERTIES; break; default: num_properties = 0; } std::map property_map; for (int i = 0; i < num_properties; ++i) { if (op_type == OP_COMPACTION && i == COMPACTION_INPUT_OUTPUT_LEVEL) { property_map.insert({"BaseInputLevel", op_properties[i] >> 32}); property_map.insert( {"OutputLevel", op_properties[i] % (uint64_t(1) << 32U)}); } else if (op_type == OP_COMPACTION && i == COMPACTION_PROP_FLAGS) { property_map.insert({"IsManual", ((op_properties[i] & 2) >> 1)}); property_map.insert({"IsDeletion", ((op_properties[i] & 4) >> 2)}); property_map.insert({"IsTrivialMove", ((op_properties[i] & 8) >> 3)}); } else { property_map.insert( {GetOperationPropertyName(op_type, i), op_properties[i]}); } } return property_map; } #else std::string ThreadStatus::GetThreadTypeName( ThreadStatus::ThreadType /*thread_type*/) { static std::string dummy_str = ""; return dummy_str; } const std::string& ThreadStatus::GetOperationName( ThreadStatus::OperationType /*op_type*/) { static std::string dummy_str = ""; return dummy_str; } const std::string& ThreadStatus::GetOperationStageName( ThreadStatus::OperationStage /*stage*/) { static std::string dummy_str = ""; return dummy_str; } const std::string& ThreadStatus::GetStateName( ThreadStatus::StateType /*state_type*/) { static std::string dummy_str = ""; return dummy_str; } const std::string ThreadStatus::MicrosToString(uint64_t /*op_elapsed_time*/) { static std::string dummy_str = ""; return dummy_str; } const std::string& ThreadStatus::GetOperationPropertyName( ThreadStatus::OperationType /*op_type*/, int /*i*/) { static std::string dummy_str = ""; return dummy_str; } std::map ThreadStatus::InterpretOperationProperties( ThreadStatus::OperationType /*op_type*/, const uint64_t* /*op_properties*/) { return std::map(); } #endif // ROCKSDB_USING_THREAD_STATUS } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/thread_status_updater.cc000066400000000000000000000260371370372246700223470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "monitoring/thread_status_updater.h" #include #include "port/likely.h" #include "rocksdb/env.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_USING_THREAD_STATUS __thread ThreadStatusData* ThreadStatusUpdater::thread_status_data_ = nullptr; void ThreadStatusUpdater::RegisterThread(ThreadStatus::ThreadType ttype, uint64_t thread_id) { if (UNLIKELY(thread_status_data_ == nullptr)) { thread_status_data_ = new ThreadStatusData(); thread_status_data_->thread_type = ttype; thread_status_data_->thread_id = thread_id; std::lock_guard lck(thread_list_mutex_); thread_data_set_.insert(thread_status_data_); } ClearThreadOperationProperties(); } void ThreadStatusUpdater::UnregisterThread() { if (thread_status_data_ != nullptr) { std::lock_guard lck(thread_list_mutex_); thread_data_set_.erase(thread_status_data_); delete thread_status_data_; thread_status_data_ = nullptr; } } void ThreadStatusUpdater::ResetThreadStatus() { ClearThreadState(); ClearThreadOperation(); SetColumnFamilyInfoKey(nullptr); } void ThreadStatusUpdater::SetColumnFamilyInfoKey(const void* cf_key) { auto* data = Get(); if (data == nullptr) { return; } // set the tracking flag based on whether cf_key is non-null or not. // If enable_thread_tracking is set to false, the input cf_key // would be nullptr. data->enable_tracking = (cf_key != nullptr); data->cf_key.store(const_cast(cf_key), std::memory_order_relaxed); } const void* ThreadStatusUpdater::GetColumnFamilyInfoKey() { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return nullptr; } return data->cf_key.load(std::memory_order_relaxed); } void ThreadStatusUpdater::SetThreadOperation( const ThreadStatus::OperationType type) { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } // NOTE: Our practice here is to set all the thread operation properties // and stage before we set thread operation, and thread operation // will be set in std::memory_order_release. This is to ensure // whenever a thread operation is not OP_UNKNOWN, we will always // have a consistent information on its properties. data->operation_type.store(type, std::memory_order_release); if (type == ThreadStatus::OP_UNKNOWN) { data->operation_stage.store(ThreadStatus::STAGE_UNKNOWN, std::memory_order_relaxed); ClearThreadOperationProperties(); } } void ThreadStatusUpdater::SetThreadOperationProperty(int i, uint64_t value) { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } data->op_properties[i].store(value, std::memory_order_relaxed); } void ThreadStatusUpdater::IncreaseThreadOperationProperty(int i, uint64_t delta) { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } data->op_properties[i].fetch_add(delta, std::memory_order_relaxed); } void ThreadStatusUpdater::SetOperationStartTime(const uint64_t start_time) { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } data->op_start_time.store(start_time, std::memory_order_relaxed); } void ThreadStatusUpdater::ClearThreadOperation() { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } data->operation_stage.store(ThreadStatus::STAGE_UNKNOWN, std::memory_order_relaxed); data->operation_type.store(ThreadStatus::OP_UNKNOWN, std::memory_order_relaxed); ClearThreadOperationProperties(); } void ThreadStatusUpdater::ClearThreadOperationProperties() { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } for (int i = 0; i < ThreadStatus::kNumOperationProperties; ++i) { data->op_properties[i].store(0, std::memory_order_relaxed); } } ThreadStatus::OperationStage ThreadStatusUpdater::SetThreadOperationStage( ThreadStatus::OperationStage stage) { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return ThreadStatus::STAGE_UNKNOWN; } return data->operation_stage.exchange(stage, std::memory_order_relaxed); } void ThreadStatusUpdater::SetThreadState(const ThreadStatus::StateType type) { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } data->state_type.store(type, std::memory_order_relaxed); } void ThreadStatusUpdater::ClearThreadState() { auto* data = GetLocalThreadStatus(); if (data == nullptr) { return; } data->state_type.store(ThreadStatus::STATE_UNKNOWN, std::memory_order_relaxed); } Status ThreadStatusUpdater::GetThreadList( std::vector* thread_list) { thread_list->clear(); std::vector> valid_list; uint64_t now_micros = Env::Default()->NowMicros(); std::lock_guard lck(thread_list_mutex_); for (auto* thread_data : thread_data_set_) { assert(thread_data); auto thread_id = thread_data->thread_id.load(std::memory_order_relaxed); auto thread_type = thread_data->thread_type.load(std::memory_order_relaxed); // Since any change to cf_info_map requires thread_list_mutex, // which is currently held by GetThreadList(), here we can safely // use "memory_order_relaxed" to load the cf_key. auto cf_key = thread_data->cf_key.load(std::memory_order_relaxed); ThreadStatus::OperationType op_type = ThreadStatus::OP_UNKNOWN; ThreadStatus::OperationStage op_stage = ThreadStatus::STAGE_UNKNOWN; ThreadStatus::StateType state_type = ThreadStatus::STATE_UNKNOWN; uint64_t op_elapsed_micros = 0; uint64_t op_props[ThreadStatus::kNumOperationProperties] = {0}; auto iter = cf_info_map_.find(cf_key); if (iter != cf_info_map_.end()) { op_type = thread_data->operation_type.load(std::memory_order_acquire); // display lower-level info only when higher-level info is available. if (op_type != ThreadStatus::OP_UNKNOWN) { op_elapsed_micros = now_micros - thread_data->op_start_time.load( std::memory_order_relaxed); op_stage = thread_data->operation_stage.load(std::memory_order_relaxed); state_type = thread_data->state_type.load(std::memory_order_relaxed); for (int i = 0; i < ThreadStatus::kNumOperationProperties; ++i) { op_props[i] = thread_data->op_properties[i].load(std::memory_order_relaxed); } } } thread_list->emplace_back( thread_id, thread_type, iter != cf_info_map_.end() ? iter->second.db_name : "", iter != cf_info_map_.end() ? iter->second.cf_name : "", op_type, op_elapsed_micros, op_stage, op_props, state_type); } return Status::OK(); } ThreadStatusData* ThreadStatusUpdater::GetLocalThreadStatus() { if (thread_status_data_ == nullptr) { return nullptr; } if (!thread_status_data_->enable_tracking) { assert(thread_status_data_->cf_key.load(std::memory_order_relaxed) == nullptr); return nullptr; } return thread_status_data_; } void ThreadStatusUpdater::NewColumnFamilyInfo(const void* db_key, const std::string& db_name, const void* cf_key, const std::string& cf_name) { // Acquiring same lock as GetThreadList() to guarantee // a consistent view of global column family table (cf_info_map). std::lock_guard lck(thread_list_mutex_); cf_info_map_.emplace(std::piecewise_construct, std::make_tuple(cf_key), std::make_tuple(db_key, db_name, cf_name)); db_key_map_[db_key].insert(cf_key); } void ThreadStatusUpdater::EraseColumnFamilyInfo(const void* cf_key) { // Acquiring same lock as GetThreadList() to guarantee // a consistent view of global column family table (cf_info_map). std::lock_guard lck(thread_list_mutex_); auto cf_pair = cf_info_map_.find(cf_key); if (cf_pair != cf_info_map_.end()) { // Remove its entry from db_key_map_ by the following steps: // 1. Obtain the entry in db_key_map_ whose set contains cf_key // 2. Remove it from the set. ConstantColumnFamilyInfo& cf_info = cf_pair->second; auto db_pair = db_key_map_.find(cf_info.db_key); assert(db_pair != db_key_map_.end()); size_t result __attribute__((__unused__)); result = db_pair->second.erase(cf_key); assert(result); cf_info_map_.erase(cf_pair); } } void ThreadStatusUpdater::EraseDatabaseInfo(const void* db_key) { // Acquiring same lock as GetThreadList() to guarantee // a consistent view of global column family table (cf_info_map). std::lock_guard lck(thread_list_mutex_); auto db_pair = db_key_map_.find(db_key); if (UNLIKELY(db_pair == db_key_map_.end())) { // In some occasional cases such as DB::Open fails, we won't // register ColumnFamilyInfo for a db. return; } for (auto cf_key : db_pair->second) { auto cf_pair = cf_info_map_.find(cf_key); if (cf_pair != cf_info_map_.end()) { cf_info_map_.erase(cf_pair); } } db_key_map_.erase(db_key); } #else void ThreadStatusUpdater::RegisterThread(ThreadStatus::ThreadType /*ttype*/, uint64_t /*thread_id*/) {} void ThreadStatusUpdater::UnregisterThread() {} void ThreadStatusUpdater::ResetThreadStatus() {} void ThreadStatusUpdater::SetColumnFamilyInfoKey(const void* /*cf_key*/) {} void ThreadStatusUpdater::SetThreadOperation( const ThreadStatus::OperationType /*type*/) {} void ThreadStatusUpdater::ClearThreadOperation() {} void ThreadStatusUpdater::SetThreadState( const ThreadStatus::StateType /*type*/) {} void ThreadStatusUpdater::ClearThreadState() {} Status ThreadStatusUpdater::GetThreadList( std::vector* /*thread_list*/) { return Status::NotSupported( "GetThreadList is not supported in the current running environment."); } void ThreadStatusUpdater::NewColumnFamilyInfo(const void* /*db_key*/, const std::string& /*db_name*/, const void* /*cf_key*/, const std::string& /*cf_name*/) {} void ThreadStatusUpdater::EraseColumnFamilyInfo(const void* /*cf_key*/) {} void ThreadStatusUpdater::EraseDatabaseInfo(const void* /*db_key*/) {} void ThreadStatusUpdater::SetThreadOperationProperty(int /*i*/, uint64_t /*value*/) {} void ThreadStatusUpdater::IncreaseThreadOperationProperty(int /*i*/, uint64_t /*delta*/) {} #endif // ROCKSDB_USING_THREAD_STATUS } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/thread_status_updater.h000066400000000000000000000207651370372246700222130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // The implementation of ThreadStatus. // // Note that we make get and set access to ThreadStatusData lockless. // As a result, ThreadStatusData as a whole is not atomic. However, // we guarantee consistent ThreadStatusData all the time whenever // user call GetThreadList(). This consistency guarantee is done // by having the following constraint in the internal implementation // of set and get order: // // 1. When reset any information in ThreadStatusData, always start from // clearing up the lower-level information first. // 2. When setting any information in ThreadStatusData, always start from // setting the higher-level information. // 3. When returning ThreadStatusData to the user, fields are fetched from // higher-level to lower-level. In addition, where there's a nullptr // in one field, then all fields that has lower-level than that field // should be ignored. // // The high to low level information would be: // thread_id > thread_type > db > cf > operation > state // // This means user might not always get full information, but whenever // returned by the GetThreadList() is guaranteed to be consistent. #pragma once #include #include #include #include #include #include #include #include #include "rocksdb/status.h" #include "rocksdb/thread_status.h" #include "port/port.h" #include "util/thread_operation.h" namespace ROCKSDB_NAMESPACE { class ColumnFamilyHandle; // The structure that keeps constant information about a column family. struct ConstantColumnFamilyInfo { #ifdef ROCKSDB_USING_THREAD_STATUS public: ConstantColumnFamilyInfo( const void* _db_key, const std::string& _db_name, const std::string& _cf_name) : db_key(_db_key), db_name(_db_name), cf_name(_cf_name) {} const void* db_key; const std::string db_name; const std::string cf_name; #endif // ROCKSDB_USING_THREAD_STATUS }; // the internal data-structure that is used to reflect the current // status of a thread using a set of atomic pointers. struct ThreadStatusData { #ifdef ROCKSDB_USING_THREAD_STATUS explicit ThreadStatusData() : enable_tracking(false) { thread_id.store(0); thread_type.store(ThreadStatus::USER); cf_key.store(nullptr); operation_type.store(ThreadStatus::OP_UNKNOWN); op_start_time.store(0); state_type.store(ThreadStatus::STATE_UNKNOWN); } // A flag to indicate whether the thread tracking is enabled // in the current thread. This value will be updated based on whether // the associated Options::enable_thread_tracking is set to true // in ThreadStatusUtil::SetColumnFamily(). // // If set to false, then SetThreadOperation and SetThreadState // will be no-op. bool enable_tracking; std::atomic thread_id; std::atomic thread_type; std::atomic cf_key; std::atomic operation_type; std::atomic op_start_time; std::atomic operation_stage; std::atomic op_properties[ThreadStatus::kNumOperationProperties]; std::atomic state_type; #endif // ROCKSDB_USING_THREAD_STATUS }; // The class that stores and updates the status of the current thread // using a thread-local ThreadStatusData. // // In most of the case, you should use ThreadStatusUtil to update // the status of the current thread instead of using ThreadSatusUpdater // directly. // // @see ThreadStatusUtil class ThreadStatusUpdater { public: ThreadStatusUpdater() {} // Releases all ThreadStatusData of all active threads. virtual ~ThreadStatusUpdater() {} // Unregister the current thread. void UnregisterThread(); // Reset the status of the current thread. This includes resetting // ColumnFamilyInfoKey, ThreadOperation, and ThreadState. void ResetThreadStatus(); // Set the id of the current thread. void SetThreadID(uint64_t thread_id); // Register the current thread for tracking. void RegisterThread(ThreadStatus::ThreadType ttype, uint64_t thread_id); // Update the column-family info of the current thread by setting // its thread-local pointer of ThreadStateInfo to the correct entry. void SetColumnFamilyInfoKey(const void* cf_key); // returns the column family info key. const void* GetColumnFamilyInfoKey(); // Update the thread operation of the current thread. void SetThreadOperation(const ThreadStatus::OperationType type); // The start time of the current thread operation. It is in the format // of micro-seconds since some fixed point in time. void SetOperationStartTime(const uint64_t start_time); // Set the "i"th property of the current operation. // // NOTE: Our practice here is to set all the thread operation properties // and stage before we set thread operation, and thread operation // will be set in std::memory_order_release. This is to ensure // whenever a thread operation is not OP_UNKNOWN, we will always // have a consistent information on its properties. void SetThreadOperationProperty( int i, uint64_t value); // Increase the "i"th property of the current operation with // the specified delta. void IncreaseThreadOperationProperty( int i, uint64_t delta); // Update the thread operation stage of the current thread. ThreadStatus::OperationStage SetThreadOperationStage( const ThreadStatus::OperationStage stage); // Clear thread operation of the current thread. void ClearThreadOperation(); // Reset all thread-operation-properties to 0. void ClearThreadOperationProperties(); // Update the thread state of the current thread. void SetThreadState(const ThreadStatus::StateType type); // Clear the thread state of the current thread. void ClearThreadState(); // Obtain the status of all active registered threads. Status GetThreadList( std::vector* thread_list); // Create an entry in the global ColumnFamilyInfo table for the // specified column family. This function should be called only // when the current thread does not hold db_mutex. void NewColumnFamilyInfo( const void* db_key, const std::string& db_name, const void* cf_key, const std::string& cf_name); // Erase all ConstantColumnFamilyInfo that is associated with the // specified db instance. This function should be called only when // the current thread does not hold db_mutex. void EraseDatabaseInfo(const void* db_key); // Erase the ConstantColumnFamilyInfo that is associated with the // specified ColumnFamilyData. This function should be called only // when the current thread does not hold db_mutex. void EraseColumnFamilyInfo(const void* cf_key); // Verifies whether the input ColumnFamilyHandles matches // the information stored in the current cf_info_map. void TEST_VerifyColumnFamilyInfoMap( const std::vector& handles, bool check_exist); protected: #ifdef ROCKSDB_USING_THREAD_STATUS // The thread-local variable for storing thread status. static __thread ThreadStatusData* thread_status_data_; // Returns the pointer to the thread status data only when the // thread status data is non-null and has enable_tracking == true. ThreadStatusData* GetLocalThreadStatus(); // Directly returns the pointer to thread_status_data_ without // checking whether enabling_tracking is true of not. ThreadStatusData* Get() { return thread_status_data_; } // The mutex that protects cf_info_map and db_key_map. std::mutex thread_list_mutex_; // The current status data of all active threads. std::unordered_set thread_data_set_; // A global map that keeps the column family information. It is stored // globally instead of inside DB is to avoid the situation where DB is // closing while GetThreadList function already get the pointer to its // CopnstantColumnFamilyInfo. std::unordered_map cf_info_map_; // A db_key to cf_key map that allows erasing elements in cf_info_map // associated to the same db_key faster. std::unordered_map< const void*, std::unordered_set> db_key_map_; #else static ThreadStatusData* thread_status_data_; #endif // ROCKSDB_USING_THREAD_STATUS }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/thread_status_updater_debug.cc000066400000000000000000000024321370372246700235060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "db/column_family.h" #include "monitoring/thread_status_updater.h" namespace ROCKSDB_NAMESPACE { #ifndef NDEBUG #ifdef ROCKSDB_USING_THREAD_STATUS void ThreadStatusUpdater::TEST_VerifyColumnFamilyInfoMap( const std::vector& handles, bool check_exist) { std::unique_lock lock(thread_list_mutex_); if (check_exist) { assert(cf_info_map_.size() == handles.size()); } for (auto* handle : handles) { auto* cfd = reinterpret_cast(handle)->cfd(); auto iter __attribute__((__unused__)) = cf_info_map_.find(cfd); if (check_exist) { assert(iter != cf_info_map_.end()); assert(iter->second.cf_name == cfd->GetName()); } else { assert(iter == cf_info_map_.end()); } } } #else void ThreadStatusUpdater::TEST_VerifyColumnFamilyInfoMap( const std::vector& /*handles*/, bool /*check_exist*/) { } #endif // ROCKSDB_USING_THREAD_STATUS #endif // !NDEBUG } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/thread_status_util.cc000066400000000000000000000161561370372246700216610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "monitoring/thread_status_util.h" #include "monitoring/thread_status_updater.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { #ifdef ROCKSDB_USING_THREAD_STATUS __thread ThreadStatusUpdater* ThreadStatusUtil::thread_updater_local_cache_ = nullptr; __thread bool ThreadStatusUtil::thread_updater_initialized_ = false; void ThreadStatusUtil::RegisterThread(const Env* env, ThreadStatus::ThreadType thread_type) { if (!MaybeInitThreadLocalUpdater(env)) { return; } assert(thread_updater_local_cache_); thread_updater_local_cache_->RegisterThread(thread_type, env->GetThreadID()); } void ThreadStatusUtil::UnregisterThread() { thread_updater_initialized_ = false; if (thread_updater_local_cache_ != nullptr) { thread_updater_local_cache_->UnregisterThread(); thread_updater_local_cache_ = nullptr; } } void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* cfd, const Env* env, bool enable_thread_tracking) { if (!MaybeInitThreadLocalUpdater(env)) { return; } assert(thread_updater_local_cache_); if (cfd != nullptr && enable_thread_tracking) { thread_updater_local_cache_->SetColumnFamilyInfoKey(cfd); } else { // When cfd == nullptr or enable_thread_tracking == false, we set // ColumnFamilyInfoKey to nullptr, which makes SetThreadOperation // and SetThreadState become no-op. thread_updater_local_cache_->SetColumnFamilyInfoKey(nullptr); } } void ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType op) { if (thread_updater_local_cache_ == nullptr) { // thread_updater_local_cache_ must be set in SetColumnFamily // or other ThreadStatusUtil functions. return; } if (op != ThreadStatus::OP_UNKNOWN) { uint64_t current_time = Env::Default()->NowMicros(); thread_updater_local_cache_->SetOperationStartTime(current_time); } else { // TDOO(yhchiang): we could report the time when we set operation to // OP_UNKNOWN once the whole instrumentation has been done. thread_updater_local_cache_->SetOperationStartTime(0); } thread_updater_local_cache_->SetThreadOperation(op); } ThreadStatus::OperationStage ThreadStatusUtil::SetThreadOperationStage( ThreadStatus::OperationStage stage) { if (thread_updater_local_cache_ == nullptr) { // thread_updater_local_cache_ must be set in SetColumnFamily // or other ThreadStatusUtil functions. return ThreadStatus::STAGE_UNKNOWN; } return thread_updater_local_cache_->SetThreadOperationStage(stage); } void ThreadStatusUtil::SetThreadOperationProperty(int code, uint64_t value) { if (thread_updater_local_cache_ == nullptr) { // thread_updater_local_cache_ must be set in SetColumnFamily // or other ThreadStatusUtil functions. return; } thread_updater_local_cache_->SetThreadOperationProperty(code, value); } void ThreadStatusUtil::IncreaseThreadOperationProperty(int code, uint64_t delta) { if (thread_updater_local_cache_ == nullptr) { // thread_updater_local_cache_ must be set in SetColumnFamily // or other ThreadStatusUtil functions. return; } thread_updater_local_cache_->IncreaseThreadOperationProperty(code, delta); } void ThreadStatusUtil::SetThreadState(ThreadStatus::StateType state) { if (thread_updater_local_cache_ == nullptr) { // thread_updater_local_cache_ must be set in SetColumnFamily // or other ThreadStatusUtil functions. return; } thread_updater_local_cache_->SetThreadState(state); } void ThreadStatusUtil::ResetThreadStatus() { if (thread_updater_local_cache_ == nullptr) { return; } thread_updater_local_cache_->ResetThreadStatus(); } void ThreadStatusUtil::NewColumnFamilyInfo(const DB* db, const ColumnFamilyData* cfd, const std::string& cf_name, const Env* env) { if (!MaybeInitThreadLocalUpdater(env)) { return; } assert(thread_updater_local_cache_); if (thread_updater_local_cache_) { thread_updater_local_cache_->NewColumnFamilyInfo(db, db->GetName(), cfd, cf_name); } } void ThreadStatusUtil::EraseColumnFamilyInfo(const ColumnFamilyData* cfd) { if (thread_updater_local_cache_ == nullptr) { return; } thread_updater_local_cache_->EraseColumnFamilyInfo(cfd); } void ThreadStatusUtil::EraseDatabaseInfo(const DB* db) { ThreadStatusUpdater* thread_updater = db->GetEnv()->GetThreadStatusUpdater(); if (thread_updater == nullptr) { return; } thread_updater->EraseDatabaseInfo(db); } bool ThreadStatusUtil::MaybeInitThreadLocalUpdater(const Env* env) { if (!thread_updater_initialized_ && env != nullptr) { thread_updater_initialized_ = true; thread_updater_local_cache_ = env->GetThreadStatusUpdater(); } return (thread_updater_local_cache_ != nullptr); } AutoThreadOperationStageUpdater::AutoThreadOperationStageUpdater( ThreadStatus::OperationStage stage) { prev_stage_ = ThreadStatusUtil::SetThreadOperationStage(stage); } AutoThreadOperationStageUpdater::~AutoThreadOperationStageUpdater() { ThreadStatusUtil::SetThreadOperationStage(prev_stage_); } #else ThreadStatusUpdater* ThreadStatusUtil::thread_updater_local_cache_ = nullptr; bool ThreadStatusUtil::thread_updater_initialized_ = false; bool ThreadStatusUtil::MaybeInitThreadLocalUpdater(const Env* /*env*/) { return false; } void ThreadStatusUtil::SetColumnFamily(const ColumnFamilyData* /*cfd*/, const Env* /*env*/, bool /*enable_thread_tracking*/) {} void ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType /*op*/) {} void ThreadStatusUtil::SetThreadOperationProperty(int /*code*/, uint64_t /*value*/) {} void ThreadStatusUtil::IncreaseThreadOperationProperty(int /*code*/, uint64_t /*delta*/) {} void ThreadStatusUtil::SetThreadState(ThreadStatus::StateType /*state*/) {} void ThreadStatusUtil::NewColumnFamilyInfo(const DB* /*db*/, const ColumnFamilyData* /*cfd*/, const std::string& /*cf_name*/, const Env* /*env*/) {} void ThreadStatusUtil::EraseColumnFamilyInfo(const ColumnFamilyData* /*cfd*/) {} void ThreadStatusUtil::EraseDatabaseInfo(const DB* /*db*/) {} void ThreadStatusUtil::ResetThreadStatus() {} AutoThreadOperationStageUpdater::AutoThreadOperationStageUpdater( ThreadStatus::OperationStage /*stage*/) {} AutoThreadOperationStageUpdater::~AutoThreadOperationStageUpdater() {} #endif // ROCKSDB_USING_THREAD_STATUS } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/thread_status_util.h000066400000000000000000000121421370372246700215120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "monitoring/thread_status_updater.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/thread_status.h" namespace ROCKSDB_NAMESPACE { class ColumnFamilyData; // The static utility class for updating thread-local status. // // The thread-local status is updated via the thread-local cached // pointer thread_updater_local_cache_. During each function call, // when ThreadStatusUtil finds thread_updater_local_cache_ is // left uninitialized (determined by thread_updater_initialized_), // it will tries to initialize it using the return value of // Env::GetThreadStatusUpdater(). When thread_updater_local_cache_ // is initialized by a non-null pointer, each function call will // then update the status of the current thread. Otherwise, // all function calls to ThreadStatusUtil will be no-op. class ThreadStatusUtil { public: // Register the current thread for tracking. static void RegisterThread( const Env* env, ThreadStatus::ThreadType thread_type); // Unregister the current thread. static void UnregisterThread(); // Create an entry in the global ColumnFamilyInfo table for the // specified column family. This function should be called only // when the current thread does not hold db_mutex. static void NewColumnFamilyInfo(const DB* db, const ColumnFamilyData* cfd, const std::string& cf_name, const Env* env); // Erase the ConstantColumnFamilyInfo that is associated with the // specified ColumnFamilyData. This function should be called only // when the current thread does not hold db_mutex. static void EraseColumnFamilyInfo(const ColumnFamilyData* cfd); // Erase all ConstantColumnFamilyInfo that is associated with the // specified db instance. This function should be called only when // the current thread does not hold db_mutex. static void EraseDatabaseInfo(const DB* db); // Update the thread status to indicate the current thread is doing // something related to the specified column family. static void SetColumnFamily(const ColumnFamilyData* cfd, const Env* env, bool enable_thread_tracking); static void SetThreadOperation(ThreadStatus::OperationType type); static ThreadStatus::OperationStage SetThreadOperationStage( ThreadStatus::OperationStage stage); static void SetThreadOperationProperty( int code, uint64_t value); static void IncreaseThreadOperationProperty( int code, uint64_t delta); static void SetThreadState(ThreadStatus::StateType type); static void ResetThreadStatus(); #ifndef NDEBUG static void TEST_SetStateDelay( const ThreadStatus::StateType state, int micro); static void TEST_StateDelay(const ThreadStatus::StateType state); #endif protected: // Initialize the thread-local ThreadStatusUpdater when it finds // the cached value is nullptr. Returns true if it has cached // a non-null pointer. static bool MaybeInitThreadLocalUpdater(const Env* env); #ifdef ROCKSDB_USING_THREAD_STATUS // A boolean flag indicating whether thread_updater_local_cache_ // is initialized. It is set to true when an Env uses any // ThreadStatusUtil functions using the current thread other // than UnregisterThread(). It will be set to false when // UnregisterThread() is called. // // When this variable is set to true, thread_updater_local_cache_ // will not be updated until this variable is again set to false // in UnregisterThread(). static __thread bool thread_updater_initialized_; // The thread-local cached ThreadStatusUpdater that caches the // thread_status_updater_ of the first Env that uses any ThreadStatusUtil // function other than UnregisterThread(). This variable will // be cleared when UnregisterThread() is called. // // When this variable is set to a non-null pointer, then the status // of the current thread will be updated when a function of // ThreadStatusUtil is called. Otherwise, all functions of // ThreadStatusUtil will be no-op. // // When thread_updater_initialized_ is set to true, this variable // will not be updated until this thread_updater_initialized_ is // again set to false in UnregisterThread(). static __thread ThreadStatusUpdater* thread_updater_local_cache_; #else static bool thread_updater_initialized_; static ThreadStatusUpdater* thread_updater_local_cache_; #endif }; // A helper class for updating thread state. It will set the // thread state according to the input parameter in its constructor // and set the thread state to the previous state in its destructor. class AutoThreadOperationStageUpdater { public: explicit AutoThreadOperationStageUpdater( ThreadStatus::OperationStage stage); ~AutoThreadOperationStageUpdater(); #ifdef ROCKSDB_USING_THREAD_STATUS private: ThreadStatus::OperationStage prev_stage_; #endif }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/monitoring/thread_status_util_debug.cc000066400000000000000000000017321370372246700230210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_util.h" #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { #ifndef NDEBUG // the delay for debugging purpose. static std::atomic states_delay[ThreadStatus::NUM_STATE_TYPES]; void ThreadStatusUtil::TEST_SetStateDelay( const ThreadStatus::StateType state, int micro) { states_delay[state].store(micro, std::memory_order_relaxed); } void ThreadStatusUtil::TEST_StateDelay(const ThreadStatus::StateType state) { auto delay = states_delay[state].load(std::memory_order_relaxed); if (delay > 0) { Env::Default()->SleepForMicroseconds(delay); } } #endif // !NDEBUG } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/000077500000000000000000000000001370372246700147405ustar00rootroot00000000000000rocksdb-6.11.4/options/cf_options.cc000066400000000000000000001217471370372246700174260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "options/cf_options.h" #include #include #include #include #include "options/db_options.h" #include "options/options_helper.h" #include "port/port.h" #include "rocksdb/concurrent_task_limiter.h" #include "rocksdb/convenience.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/utilities/object_registry.h" #include "table/block_based/block_based_table_factory.h" #include "table/plain/plain_table_factory.h" #include "util/cast_util.h" namespace ROCKSDB_NAMESPACE { // offset_of is used to get the offset of a class data member // ex: offset_of(&ColumnFamilyOptions::num_levels) // This call will return the offset of num_levels in ColumnFamilyOptions class // // This is the same as offsetof() but allow us to work with non standard-layout // classes and structures // refs: // http://en.cppreference.com/w/cpp/concept/StandardLayoutType // https://gist.github.com/graphitemaster/494f21190bb2c63c5516 #ifndef ROCKSDB_LITE ColumnFamilyOptions OptionsHelper::dummy_cf_options; template int offset_of(T1 ColumnFamilyOptions::*member) { return int(size_t(&(OptionsHelper::dummy_cf_options.*member)) - size_t(&OptionsHelper::dummy_cf_options)); } template int offset_of(T1 AdvancedColumnFamilyOptions::*member) { return int(size_t(&(OptionsHelper::dummy_cf_options.*member)) - size_t(&OptionsHelper::dummy_cf_options)); } static Status ParseCompressionOptions(const std::string& value, const std::string& name, CompressionOptions& compression_opts) { size_t start = 0; size_t end = value.find(':'); if (end == std::string::npos) { return Status::InvalidArgument("unable to parse the specified CF option " + name); } compression_opts.window_bits = ParseInt(value.substr(start, end - start)); start = end + 1; end = value.find(':', start); if (end == std::string::npos) { return Status::InvalidArgument("unable to parse the specified CF option " + name); } compression_opts.level = ParseInt(value.substr(start, end - start)); start = end + 1; if (start >= value.size()) { return Status::InvalidArgument("unable to parse the specified CF option " + name); } end = value.find(':', start); compression_opts.strategy = ParseInt(value.substr(start, value.size() - start)); // max_dict_bytes is optional for backwards compatibility if (end != std::string::npos) { start = end + 1; if (start >= value.size()) { return Status::InvalidArgument( "unable to parse the specified CF option " + name); } compression_opts.max_dict_bytes = ParseInt(value.substr(start, value.size() - start)); end = value.find(':', start); } // zstd_max_train_bytes is optional for backwards compatibility if (end != std::string::npos) { start = end + 1; if (start >= value.size()) { return Status::InvalidArgument( "unable to parse the specified CF option " + name); } compression_opts.zstd_max_train_bytes = ParseInt(value.substr(start, value.size() - start)); end = value.find(':', start); } // parallel_threads is not serialized with this format. // We plan to upgrade the format to a JSON-like format. compression_opts.parallel_threads = CompressionOptions().parallel_threads; // enabled is optional for backwards compatibility if (end != std::string::npos) { start = end + 1; if (start >= value.size()) { return Status::InvalidArgument( "unable to parse the specified CF option " + name); } compression_opts.enabled = ParseBoolean("", value.substr(start, value.size() - start)); } return Status::OK(); } const std::string kOptNameBMCompOpts = "bottommost_compression_opts"; const std::string kOptNameCompOpts = "compression_opts"; static std::unordered_map fifo_compaction_options_type_info = { {"max_table_files_size", {offsetof(struct CompactionOptionsFIFO, max_table_files_size), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct CompactionOptionsFIFO, max_table_files_size)}}, {"ttl", {0, OptionType::kUInt64T, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"allow_compaction", {offsetof(struct CompactionOptionsFIFO, allow_compaction), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct CompactionOptionsFIFO, allow_compaction)}}}; static std::unordered_map universal_compaction_options_type_info = { {"size_ratio", {offsetof(class CompactionOptionsUniversal, size_ratio), OptionType::kUInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(class CompactionOptionsUniversal, size_ratio)}}, {"min_merge_width", {offsetof(class CompactionOptionsUniversal, min_merge_width), OptionType::kUInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(class CompactionOptionsUniversal, min_merge_width)}}, {"max_merge_width", {offsetof(class CompactionOptionsUniversal, max_merge_width), OptionType::kUInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(class CompactionOptionsUniversal, max_merge_width)}}, {"max_size_amplification_percent", {offsetof(class CompactionOptionsUniversal, max_size_amplification_percent), OptionType::kUInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(class CompactionOptionsUniversal, max_size_amplification_percent)}}, {"compression_size_percent", {offsetof(class CompactionOptionsUniversal, compression_size_percent), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(class CompactionOptionsUniversal, compression_size_percent)}}, {"stop_style", {offsetof(class CompactionOptionsUniversal, stop_style), OptionType::kCompactionStopStyle, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(class CompactionOptionsUniversal, stop_style)}}, {"allow_trivial_move", {offsetof(class CompactionOptionsUniversal, allow_trivial_move), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(class CompactionOptionsUniversal, allow_trivial_move)}}}; std::unordered_map OptionsHelper::cf_options_type_info = { /* not yet supported CompressionOptions compression_opts; TablePropertiesCollectorFactories table_properties_collector_factories; typedef std::vector> TablePropertiesCollectorFactories; UpdateStatus (*inplace_callback)(char* existing_value, uint34_t* existing_value_size, Slice delta_value, std::string* merged_value); std::vector cf_paths; */ {"report_bg_io_stats", {offset_of(&ColumnFamilyOptions::report_bg_io_stats), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, report_bg_io_stats)}}, {"compaction_measure_io_stats", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"disable_auto_compactions", {offset_of(&ColumnFamilyOptions::disable_auto_compactions), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, disable_auto_compactions)}}, {"filter_deletes", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"inplace_update_support", {offset_of(&ColumnFamilyOptions::inplace_update_support), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"level_compaction_dynamic_level_bytes", {offset_of(&ColumnFamilyOptions::level_compaction_dynamic_level_bytes), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"optimize_filters_for_hits", {offset_of(&ColumnFamilyOptions::optimize_filters_for_hits), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"paranoid_file_checks", {offset_of(&ColumnFamilyOptions::paranoid_file_checks), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, paranoid_file_checks)}}, {"force_consistency_checks", {offset_of(&ColumnFamilyOptions::force_consistency_checks), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"purge_redundant_kvs_while_flush", {offset_of(&ColumnFamilyOptions::purge_redundant_kvs_while_flush), OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"verify_checksums_in_compaction", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"soft_pending_compaction_bytes_limit", {offset_of(&ColumnFamilyOptions::soft_pending_compaction_bytes_limit), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, soft_pending_compaction_bytes_limit)}}, {"hard_pending_compaction_bytes_limit", {offset_of(&ColumnFamilyOptions::hard_pending_compaction_bytes_limit), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, hard_pending_compaction_bytes_limit)}}, {"hard_rate_limit", {0, OptionType::kDouble, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"soft_rate_limit", {0, OptionType::kDouble, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"max_compaction_bytes", {offset_of(&ColumnFamilyOptions::max_compaction_bytes), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_compaction_bytes)}}, {"expanded_compaction_factor", {0, OptionType::kInt, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"level0_file_num_compaction_trigger", {offset_of(&ColumnFamilyOptions::level0_file_num_compaction_trigger), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, level0_file_num_compaction_trigger)}}, {"level0_slowdown_writes_trigger", {offset_of(&ColumnFamilyOptions::level0_slowdown_writes_trigger), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, level0_slowdown_writes_trigger)}}, {"level0_stop_writes_trigger", {offset_of(&ColumnFamilyOptions::level0_stop_writes_trigger), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, level0_stop_writes_trigger)}}, {"max_grandparent_overlap_factor", {0, OptionType::kInt, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"max_mem_compaction_level", {0, OptionType::kInt, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"max_write_buffer_number", {offset_of(&ColumnFamilyOptions::max_write_buffer_number), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_write_buffer_number)}}, {"max_write_buffer_number_to_maintain", {offset_of(&ColumnFamilyOptions::max_write_buffer_number_to_maintain), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"max_write_buffer_size_to_maintain", {offset_of(&ColumnFamilyOptions::max_write_buffer_size_to_maintain), OptionType::kInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"min_write_buffer_number_to_merge", {offset_of(&ColumnFamilyOptions::min_write_buffer_number_to_merge), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"num_levels", {offset_of(&ColumnFamilyOptions::num_levels), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"source_compaction_factor", {0, OptionType::kInt, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"target_file_size_multiplier", {offset_of(&ColumnFamilyOptions::target_file_size_multiplier), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, target_file_size_multiplier)}}, {"arena_block_size", {offset_of(&ColumnFamilyOptions::arena_block_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, arena_block_size)}}, {"inplace_update_num_locks", {offset_of(&ColumnFamilyOptions::inplace_update_num_locks), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, inplace_update_num_locks)}}, {"max_successive_merges", {offset_of(&ColumnFamilyOptions::max_successive_merges), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_successive_merges)}}, {"memtable_huge_page_size", {offset_of(&ColumnFamilyOptions::memtable_huge_page_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, memtable_huge_page_size)}}, {"memtable_prefix_bloom_huge_page_tlb_size", {0, OptionType::kSizeT, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"write_buffer_size", {offset_of(&ColumnFamilyOptions::write_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, write_buffer_size)}}, {"bloom_locality", {offset_of(&ColumnFamilyOptions::bloom_locality), OptionType::kUInt32T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"memtable_prefix_bloom_bits", {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"memtable_prefix_bloom_size_ratio", {offset_of(&ColumnFamilyOptions::memtable_prefix_bloom_size_ratio), OptionType::kDouble, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, memtable_prefix_bloom_size_ratio)}}, {"memtable_prefix_bloom_probes", {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"memtable_whole_key_filtering", {offset_of(&ColumnFamilyOptions::memtable_whole_key_filtering), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, memtable_whole_key_filtering)}}, {"min_partial_merge_operands", {0, OptionType::kUInt32T, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"max_bytes_for_level_base", {offset_of(&ColumnFamilyOptions::max_bytes_for_level_base), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_bytes_for_level_base)}}, {"snap_refresh_nanos", {0, OptionType::kUInt64T, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"max_bytes_for_level_multiplier", {offset_of(&ColumnFamilyOptions::max_bytes_for_level_multiplier), OptionType::kDouble, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier)}}, {"max_bytes_for_level_multiplier_additional", OptionTypeInfo::Vector( offset_of(&ColumnFamilyOptions:: max_bytes_for_level_multiplier_additional), OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier_additional), {0, OptionType::kInt, 0})}, {"max_sequential_skip_in_iterations", {offset_of(&ColumnFamilyOptions::max_sequential_skip_in_iterations), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_sequential_skip_in_iterations)}}, {"target_file_size_base", {offset_of(&ColumnFamilyOptions::target_file_size_base), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, target_file_size_base)}}, {"rate_limit_delay_max_milliseconds", {0, OptionType::kUInt, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"compression", {offset_of(&ColumnFamilyOptions::compression), OptionType::kCompressionType, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, compression)}}, {"compression_per_level", OptionTypeInfo::Vector( offset_of(&ColumnFamilyOptions::compression_per_level), OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, {0, OptionType::kCompressionType})}, {"bottommost_compression", {offset_of(&ColumnFamilyOptions::bottommost_compression), OptionType::kCompressionType, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, bottommost_compression)}}, {"comparator", {offset_of(&ColumnFamilyOptions::comparator), OptionType::kComparator, OptionVerificationType::kByName, OptionTypeFlags::kCompareLoose, 0, // Parses the string and sets the corresponding comparator [](const ConfigOptions& /*opts*/, const std::string& /*name*/, const std::string& value, char* addr) { auto old_comparator = reinterpret_cast(addr); const Comparator* new_comparator = *old_comparator; Status status = ObjectRegistry::NewInstance()->NewStaticObject( value, &new_comparator); if (status.ok()) { *old_comparator = new_comparator; return status; } return Status::OK(); }}}, {"prefix_extractor", {offset_of(&ColumnFamilyOptions::prefix_extractor), OptionType::kSliceTransform, OptionVerificationType::kByNameAllowNull, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, prefix_extractor)}}, {"memtable_insert_with_hint_prefix_extractor", {offset_of( &ColumnFamilyOptions::memtable_insert_with_hint_prefix_extractor), OptionType::kSliceTransform, OptionVerificationType::kByNameAllowNull, OptionTypeFlags::kNone, 0}}, {"memtable_factory", {offset_of(&ColumnFamilyOptions::memtable_factory), OptionType::kMemTableRepFactory, OptionVerificationType::kByName, OptionTypeFlags::kNone, 0}}, {"memtable", {offset_of(&ColumnFamilyOptions::memtable_factory), OptionType::kMemTableRepFactory, OptionVerificationType::kAlias, OptionTypeFlags::kNone, 0, // Parses the value string and updates the memtable_factory [](const ConfigOptions& /*opts*/, const std::string& /*name*/, const std::string& value, char* addr) { std::unique_ptr new_mem_factory; Status s = GetMemTableRepFactoryFromString(value, &new_mem_factory); if (s.ok()) { auto memtable_factory = reinterpret_cast*>(addr); memtable_factory->reset(new_mem_factory.release()); } return s; }}}, {"table_factory", {offset_of(&ColumnFamilyOptions::table_factory), OptionType::kTableFactory, OptionVerificationType::kByName, OptionTypeFlags::kCompareLoose, 0}}, {"block_based_table_factory", {offset_of(&ColumnFamilyOptions::table_factory), OptionType::kTableFactory, OptionVerificationType::kAlias, OptionTypeFlags::kCompareLoose, 0, // Parses the input value and creates a BlockBasedTableFactory [](const ConfigOptions& /*opts*/, const std::string& /*name*/, const std::string& value, char* addr) { // Nested options auto old_table_factory = reinterpret_cast*>(addr); BlockBasedTableOptions table_opts, base_opts; BlockBasedTableFactory* block_based_table_factory = static_cast_with_check( old_table_factory->get()); if (block_based_table_factory != nullptr) { base_opts = block_based_table_factory->table_options(); } Status s = GetBlockBasedTableOptionsFromString(base_opts, value, &table_opts); if (s.ok()) { old_table_factory->reset(NewBlockBasedTableFactory(table_opts)); } return s; }}}, {"plain_table_factory", {offset_of(&ColumnFamilyOptions::table_factory), OptionType::kTableFactory, OptionVerificationType::kAlias, OptionTypeFlags::kCompareLoose, 0, // Parses the input value and creates a PlainTableFactory [](const ConfigOptions& /*opts*/, const std::string& /*name*/, const std::string& value, char* addr) { // Nested options auto old_table_factory = reinterpret_cast*>(addr); PlainTableOptions table_opts, base_opts; PlainTableFactory* plain_table_factory = static_cast_with_check( old_table_factory->get()); if (plain_table_factory != nullptr) { base_opts = plain_table_factory->table_options(); } Status s = GetPlainTableOptionsFromString(base_opts, value, &table_opts); if (s.ok()) { old_table_factory->reset(NewPlainTableFactory(table_opts)); } return s; }}}, {"compaction_filter", {offset_of(&ColumnFamilyOptions::compaction_filter), OptionType::kCompactionFilter, OptionVerificationType::kByName, OptionTypeFlags::kNone, 0}}, {"compaction_filter_factory", {offset_of(&ColumnFamilyOptions::compaction_filter_factory), OptionType::kCompactionFilterFactory, OptionVerificationType::kByName, OptionTypeFlags::kNone, 0}}, {"merge_operator", {offset_of(&ColumnFamilyOptions::merge_operator), OptionType::kMergeOperator, OptionVerificationType::kByNameAllowFromNull, OptionTypeFlags::kCompareLoose, 0, // Parses the input value as a MergeOperator, updating the value [](const ConfigOptions& /*opts*/, const std::string& /*name*/, const std::string& value, char* addr) { auto mop = reinterpret_cast*>(addr); ObjectRegistry::NewInstance() ->NewSharedObject(value, mop) .PermitUncheckedError(); return Status::OK(); }}}, {"compaction_style", {offset_of(&ColumnFamilyOptions::compaction_style), OptionType::kCompactionStyle, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"compaction_pri", {offset_of(&ColumnFamilyOptions::compaction_pri), OptionType::kCompactionPri, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"compaction_options_fifo", OptionTypeInfo::Struct( "compaction_options_fifo", &fifo_compaction_options_type_info, offset_of(&ColumnFamilyOptions::compaction_options_fifo), OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, compaction_options_fifo), [](const ConfigOptions& opts, const std::string& name, const std::string& value, char* addr) { // This is to handle backward compatibility, where // compaction_options_fifo could be assigned a single scalar // value, say, like "23", which would be assigned to // max_table_files_size. if (name == "compaction_options_fifo" && value.find("=") == std::string::npos) { // Old format. Parse just a single uint64_t value. auto options = reinterpret_cast(addr); options->max_table_files_size = ParseUint64(value); return Status::OK(); } else { return OptionTypeInfo::ParseStruct( opts, "compaction_options_fifo", &fifo_compaction_options_type_info, name, value, addr); } })}, {"compaction_options_universal", OptionTypeInfo::Struct( "compaction_options_universal", &universal_compaction_options_type_info, offset_of(&ColumnFamilyOptions::compaction_options_universal), OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, compaction_options_universal))}, {"ttl", {offset_of(&ColumnFamilyOptions::ttl), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, ttl)}}, {"periodic_compaction_seconds", {offset_of(&ColumnFamilyOptions::periodic_compaction_seconds), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, periodic_compaction_seconds)}}, {"sample_for_compression", {offset_of(&ColumnFamilyOptions::sample_for_compression), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, sample_for_compression)}}, // The following properties were handled as special cases in ParseOption // This means that the properties could be read from the options file // but never written to the file or compared to each other. {kOptNameCompOpts, {offset_of(&ColumnFamilyOptions::compression_opts), OptionType::kUnknown, OptionVerificationType::kNormal, (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever | OptionTypeFlags::kMutable), offsetof(struct MutableCFOptions, compression_opts), // Parses the value as a CompressionOptions [](const ConfigOptions& /*opts*/, const std::string& name, const std::string& value, char* addr) { auto* compression = reinterpret_cast(addr); return ParseCompressionOptions(value, name, *compression); }}}, {kOptNameBMCompOpts, {offset_of(&ColumnFamilyOptions::bottommost_compression_opts), OptionType::kUnknown, OptionVerificationType::kNormal, (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever | OptionTypeFlags::kMutable), offsetof(struct MutableCFOptions, bottommost_compression_opts), // Parses the value as a CompressionOptions [](const ConfigOptions& /*opts*/, const std::string& name, const std::string& value, char* addr) { auto* compression = reinterpret_cast(addr); return ParseCompressionOptions(value, name, *compression); }}}, // End special case properties }; Status ParseColumnFamilyOption(const ConfigOptions& config_options, const std::string& name, const std::string& org_value, ColumnFamilyOptions* new_options) { const std::string& value = config_options.input_strings_escaped ? UnescapeOptionString(org_value) : org_value; try { std::string elem; const auto opt_info = OptionTypeInfo::Find(name, cf_options_type_info, &elem); if (opt_info != nullptr) { return opt_info->Parse( config_options, elem, value, reinterpret_cast(new_options) + opt_info->offset_); } else { return Status::InvalidArgument( "Unable to parse the specified CF option " + name); } } catch (const std::exception&) { return Status::InvalidArgument("unable to parse the specified option " + name); } } #endif // ROCKSDB_LITE ImmutableCFOptions::ImmutableCFOptions(const Options& options) : ImmutableCFOptions(ImmutableDBOptions(options), options) {} ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options, const ColumnFamilyOptions& cf_options) : compaction_style(cf_options.compaction_style), compaction_pri(cf_options.compaction_pri), user_comparator(cf_options.comparator), internal_comparator(InternalKeyComparator(cf_options.comparator)), merge_operator(cf_options.merge_operator.get()), compaction_filter(cf_options.compaction_filter), compaction_filter_factory(cf_options.compaction_filter_factory.get()), min_write_buffer_number_to_merge( cf_options.min_write_buffer_number_to_merge), max_write_buffer_number_to_maintain( cf_options.max_write_buffer_number_to_maintain), max_write_buffer_size_to_maintain( cf_options.max_write_buffer_size_to_maintain), inplace_update_support(cf_options.inplace_update_support), inplace_callback(cf_options.inplace_callback), info_log(db_options.info_log.get()), statistics(db_options.statistics.get()), rate_limiter(db_options.rate_limiter.get()), info_log_level(db_options.info_log_level), env(db_options.env), fs(db_options.fs.get()), allow_mmap_reads(db_options.allow_mmap_reads), allow_mmap_writes(db_options.allow_mmap_writes), db_paths(db_options.db_paths), memtable_factory(cf_options.memtable_factory.get()), table_factory(cf_options.table_factory.get()), table_properties_collector_factories( cf_options.table_properties_collector_factories), advise_random_on_open(db_options.advise_random_on_open), bloom_locality(cf_options.bloom_locality), purge_redundant_kvs_while_flush( cf_options.purge_redundant_kvs_while_flush), use_fsync(db_options.use_fsync), compression_per_level(cf_options.compression_per_level), level_compaction_dynamic_level_bytes( cf_options.level_compaction_dynamic_level_bytes), access_hint_on_compaction_start( db_options.access_hint_on_compaction_start), new_table_reader_for_compaction_inputs( db_options.new_table_reader_for_compaction_inputs), num_levels(cf_options.num_levels), optimize_filters_for_hits(cf_options.optimize_filters_for_hits), force_consistency_checks(cf_options.force_consistency_checks), allow_ingest_behind(db_options.allow_ingest_behind), preserve_deletes(db_options.preserve_deletes), listeners(db_options.listeners), row_cache(db_options.row_cache), max_subcompactions(db_options.max_subcompactions), memtable_insert_with_hint_prefix_extractor( cf_options.memtable_insert_with_hint_prefix_extractor.get()), cf_paths(cf_options.cf_paths), compaction_thread_limiter(cf_options.compaction_thread_limiter), file_checksum_gen_factory(db_options.file_checksum_gen_factory.get()) {} // Multiple two operands. If they overflow, return op1. uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) { if (op1 == 0 || op2 <= 0) { return 0; } if (port::kMaxUint64 / op1 < op2) { return op1; } return static_cast(op1 * op2); } // when level_compaction_dynamic_level_bytes is true and leveled compaction // is used, the base level is not always L1, so precomupted max_file_size can // no longer be used. Recompute file_size_for_level from base level. uint64_t MaxFileSizeForLevel(const MutableCFOptions& cf_options, int level, CompactionStyle compaction_style, int base_level, bool level_compaction_dynamic_level_bytes) { if (!level_compaction_dynamic_level_bytes || level < base_level || compaction_style != kCompactionStyleLevel) { assert(level >= 0); assert(level < (int)cf_options.max_file_size.size()); return cf_options.max_file_size[level]; } else { assert(level >= 0 && base_level >= 0); assert(level - base_level < (int)cf_options.max_file_size.size()); return cf_options.max_file_size[level - base_level]; } } size_t MaxFileSizeForL0MetaPin(const MutableCFOptions& cf_options) { // We do not want to pin meta-blocks that almost certainly came from intra-L0 // or a former larger `write_buffer_size` value to avoid surprising users with // pinned memory usage. We use a factor of 1.5 to account for overhead // introduced during flush in most cases. if (port::kMaxSizet / 3 < cf_options.write_buffer_size / 2) { return port::kMaxSizet; } return cf_options.write_buffer_size / 2 * 3; } void MutableCFOptions::RefreshDerivedOptions(int num_levels, CompactionStyle compaction_style) { max_file_size.resize(num_levels); for (int i = 0; i < num_levels; ++i) { if (i == 0 && compaction_style == kCompactionStyleUniversal) { max_file_size[i] = ULLONG_MAX; } else if (i > 1) { max_file_size[i] = MultiplyCheckOverflow(max_file_size[i - 1], target_file_size_multiplier); } else { max_file_size[i] = target_file_size_base; } } } void MutableCFOptions::Dump(Logger* log) const { // Memtable related options ROCKS_LOG_INFO(log, " write_buffer_size: %" ROCKSDB_PRIszt, write_buffer_size); ROCKS_LOG_INFO(log, " max_write_buffer_number: %d", max_write_buffer_number); ROCKS_LOG_INFO(log, " arena_block_size: %" ROCKSDB_PRIszt, arena_block_size); ROCKS_LOG_INFO(log, " memtable_prefix_bloom_ratio: %f", memtable_prefix_bloom_size_ratio); ROCKS_LOG_INFO(log, " memtable_whole_key_filtering: %d", memtable_whole_key_filtering); ROCKS_LOG_INFO(log, " memtable_huge_page_size: %" ROCKSDB_PRIszt, memtable_huge_page_size); ROCKS_LOG_INFO(log, " max_successive_merges: %" ROCKSDB_PRIszt, max_successive_merges); ROCKS_LOG_INFO(log, " inplace_update_num_locks: %" ROCKSDB_PRIszt, inplace_update_num_locks); ROCKS_LOG_INFO( log, " prefix_extractor: %s", prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name()); ROCKS_LOG_INFO(log, " disable_auto_compactions: %d", disable_auto_compactions); ROCKS_LOG_INFO(log, " soft_pending_compaction_bytes_limit: %" PRIu64, soft_pending_compaction_bytes_limit); ROCKS_LOG_INFO(log, " hard_pending_compaction_bytes_limit: %" PRIu64, hard_pending_compaction_bytes_limit); ROCKS_LOG_INFO(log, " level0_file_num_compaction_trigger: %d", level0_file_num_compaction_trigger); ROCKS_LOG_INFO(log, " level0_slowdown_writes_trigger: %d", level0_slowdown_writes_trigger); ROCKS_LOG_INFO(log, " level0_stop_writes_trigger: %d", level0_stop_writes_trigger); ROCKS_LOG_INFO(log, " max_compaction_bytes: %" PRIu64, max_compaction_bytes); ROCKS_LOG_INFO(log, " target_file_size_base: %" PRIu64, target_file_size_base); ROCKS_LOG_INFO(log, " target_file_size_multiplier: %d", target_file_size_multiplier); ROCKS_LOG_INFO(log, " max_bytes_for_level_base: %" PRIu64, max_bytes_for_level_base); ROCKS_LOG_INFO(log, " max_bytes_for_level_multiplier: %f", max_bytes_for_level_multiplier); ROCKS_LOG_INFO(log, " ttl: %" PRIu64, ttl); ROCKS_LOG_INFO(log, " periodic_compaction_seconds: %" PRIu64, periodic_compaction_seconds); std::string result; char buf[10]; for (const auto m : max_bytes_for_level_multiplier_additional) { snprintf(buf, sizeof(buf), "%d, ", m); result += buf; } if (result.size() >= 2) { result.resize(result.size() - 2); } else { result = ""; } ROCKS_LOG_INFO(log, "max_bytes_for_level_multiplier_additional: %s", result.c_str()); ROCKS_LOG_INFO(log, " max_sequential_skip_in_iterations: %" PRIu64, max_sequential_skip_in_iterations); ROCKS_LOG_INFO(log, " paranoid_file_checks: %d", paranoid_file_checks); ROCKS_LOG_INFO(log, " report_bg_io_stats: %d", report_bg_io_stats); ROCKS_LOG_INFO(log, " compression: %d", static_cast(compression)); // Universal Compaction Options ROCKS_LOG_INFO(log, "compaction_options_universal.size_ratio : %d", compaction_options_universal.size_ratio); ROCKS_LOG_INFO(log, "compaction_options_universal.min_merge_width : %d", compaction_options_universal.min_merge_width); ROCKS_LOG_INFO(log, "compaction_options_universal.max_merge_width : %d", compaction_options_universal.max_merge_width); ROCKS_LOG_INFO( log, "compaction_options_universal.max_size_amplification_percent : %d", compaction_options_universal.max_size_amplification_percent); ROCKS_LOG_INFO(log, "compaction_options_universal.compression_size_percent : %d", compaction_options_universal.compression_size_percent); ROCKS_LOG_INFO(log, "compaction_options_universal.stop_style : %d", compaction_options_universal.stop_style); ROCKS_LOG_INFO( log, "compaction_options_universal.allow_trivial_move : %d", static_cast(compaction_options_universal.allow_trivial_move)); // FIFO Compaction Options ROCKS_LOG_INFO(log, "compaction_options_fifo.max_table_files_size : %" PRIu64, compaction_options_fifo.max_table_files_size); ROCKS_LOG_INFO(log, "compaction_options_fifo.allow_compaction : %d", compaction_options_fifo.allow_compaction); } MutableCFOptions::MutableCFOptions(const Options& options) : MutableCFOptions(ColumnFamilyOptions(options)) {} } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/cf_options.h000066400000000000000000000224531370372246700172620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "db/dbformat.h" #include "options/db_options.h" #include "rocksdb/options.h" #include "util/compression.h" namespace ROCKSDB_NAMESPACE { // ImmutableCFOptions is a data struct used by RocksDB internal. It contains a // subset of Options that should not be changed during the entire lifetime // of DB. Raw pointers defined in this struct do not have ownership to the data // they point to. Options contains std::shared_ptr to these data. struct ImmutableCFOptions { explicit ImmutableCFOptions(const Options& options); ImmutableCFOptions(const ImmutableDBOptions& db_options, const ColumnFamilyOptions& cf_options); CompactionStyle compaction_style; CompactionPri compaction_pri; const Comparator* user_comparator; InternalKeyComparator internal_comparator; MergeOperator* merge_operator; const CompactionFilter* compaction_filter; CompactionFilterFactory* compaction_filter_factory; int min_write_buffer_number_to_merge; int max_write_buffer_number_to_maintain; int64_t max_write_buffer_size_to_maintain; bool inplace_update_support; UpdateStatus (*inplace_callback)(char* existing_value, uint32_t* existing_value_size, Slice delta_value, std::string* merged_value); Logger* info_log; Statistics* statistics; RateLimiter* rate_limiter; InfoLogLevel info_log_level; Env* env; FileSystem* fs; // Allow the OS to mmap file for reading sst tables. Default: false bool allow_mmap_reads; // Allow the OS to mmap file for writing. Default: false bool allow_mmap_writes; std::vector db_paths; MemTableRepFactory* memtable_factory; TableFactory* table_factory; Options::TablePropertiesCollectorFactories table_properties_collector_factories; bool advise_random_on_open; // This options is required by PlainTableReader. May need to move it // to PlainTableOptions just like bloom_bits_per_key uint32_t bloom_locality; bool purge_redundant_kvs_while_flush; bool use_fsync; std::vector compression_per_level; bool level_compaction_dynamic_level_bytes; Options::AccessHint access_hint_on_compaction_start; bool new_table_reader_for_compaction_inputs; int num_levels; bool optimize_filters_for_hits; bool force_consistency_checks; bool allow_ingest_behind; bool preserve_deletes; // A vector of EventListeners which callback functions will be called // when specific RocksDB event happens. std::vector> listeners; std::shared_ptr row_cache; uint32_t max_subcompactions; const SliceTransform* memtable_insert_with_hint_prefix_extractor; std::vector cf_paths; std::shared_ptr compaction_thread_limiter; FileChecksumGenFactory* file_checksum_gen_factory; }; struct MutableCFOptions { explicit MutableCFOptions(const ColumnFamilyOptions& options) : write_buffer_size(options.write_buffer_size), max_write_buffer_number(options.max_write_buffer_number), arena_block_size(options.arena_block_size), memtable_prefix_bloom_size_ratio( options.memtable_prefix_bloom_size_ratio), memtable_whole_key_filtering(options.memtable_whole_key_filtering), memtable_huge_page_size(options.memtable_huge_page_size), max_successive_merges(options.max_successive_merges), inplace_update_num_locks(options.inplace_update_num_locks), prefix_extractor(options.prefix_extractor), disable_auto_compactions(options.disable_auto_compactions), soft_pending_compaction_bytes_limit( options.soft_pending_compaction_bytes_limit), hard_pending_compaction_bytes_limit( options.hard_pending_compaction_bytes_limit), level0_file_num_compaction_trigger( options.level0_file_num_compaction_trigger), level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger), level0_stop_writes_trigger(options.level0_stop_writes_trigger), max_compaction_bytes(options.max_compaction_bytes), target_file_size_base(options.target_file_size_base), target_file_size_multiplier(options.target_file_size_multiplier), max_bytes_for_level_base(options.max_bytes_for_level_base), max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier), ttl(options.ttl), periodic_compaction_seconds(options.periodic_compaction_seconds), max_bytes_for_level_multiplier_additional( options.max_bytes_for_level_multiplier_additional), compaction_options_fifo(options.compaction_options_fifo), compaction_options_universal(options.compaction_options_universal), max_sequential_skip_in_iterations( options.max_sequential_skip_in_iterations), paranoid_file_checks(options.paranoid_file_checks), report_bg_io_stats(options.report_bg_io_stats), compression(options.compression), bottommost_compression(options.bottommost_compression), compression_opts(options.compression_opts), bottommost_compression_opts(options.bottommost_compression_opts), sample_for_compression(options.sample_for_compression) { RefreshDerivedOptions(options.num_levels, options.compaction_style); } MutableCFOptions() : write_buffer_size(0), max_write_buffer_number(0), arena_block_size(0), memtable_prefix_bloom_size_ratio(0), memtable_whole_key_filtering(false), memtable_huge_page_size(0), max_successive_merges(0), inplace_update_num_locks(0), prefix_extractor(nullptr), disable_auto_compactions(false), soft_pending_compaction_bytes_limit(0), hard_pending_compaction_bytes_limit(0), level0_file_num_compaction_trigger(0), level0_slowdown_writes_trigger(0), level0_stop_writes_trigger(0), max_compaction_bytes(0), target_file_size_base(0), target_file_size_multiplier(0), max_bytes_for_level_base(0), max_bytes_for_level_multiplier(0), ttl(0), periodic_compaction_seconds(0), compaction_options_fifo(), max_sequential_skip_in_iterations(0), paranoid_file_checks(false), report_bg_io_stats(false), compression(Snappy_Supported() ? kSnappyCompression : kNoCompression), bottommost_compression(kDisableCompressionOption), sample_for_compression(0) {} explicit MutableCFOptions(const Options& options); // Must be called after any change to MutableCFOptions void RefreshDerivedOptions(int num_levels, CompactionStyle compaction_style); void RefreshDerivedOptions(const ImmutableCFOptions& ioptions) { RefreshDerivedOptions(ioptions.num_levels, ioptions.compaction_style); } int MaxBytesMultiplerAdditional(int level) const { if (level >= static_cast(max_bytes_for_level_multiplier_additional.size())) { return 1; } return max_bytes_for_level_multiplier_additional[level]; } void Dump(Logger* log) const; // Memtable related options size_t write_buffer_size; int max_write_buffer_number; size_t arena_block_size; double memtable_prefix_bloom_size_ratio; bool memtable_whole_key_filtering; size_t memtable_huge_page_size; size_t max_successive_merges; size_t inplace_update_num_locks; std::shared_ptr prefix_extractor; // Compaction related options bool disable_auto_compactions; uint64_t soft_pending_compaction_bytes_limit; uint64_t hard_pending_compaction_bytes_limit; int level0_file_num_compaction_trigger; int level0_slowdown_writes_trigger; int level0_stop_writes_trigger; uint64_t max_compaction_bytes; uint64_t target_file_size_base; int target_file_size_multiplier; uint64_t max_bytes_for_level_base; double max_bytes_for_level_multiplier; uint64_t ttl; uint64_t periodic_compaction_seconds; std::vector max_bytes_for_level_multiplier_additional; CompactionOptionsFIFO compaction_options_fifo; CompactionOptionsUniversal compaction_options_universal; // Misc options uint64_t max_sequential_skip_in_iterations; bool paranoid_file_checks; bool report_bg_io_stats; CompressionType compression; CompressionType bottommost_compression; CompressionOptions compression_opts; CompressionOptions bottommost_compression_opts; uint64_t sample_for_compression; // Derived options // Per-level target file size. std::vector max_file_size; }; uint64_t MultiplyCheckOverflow(uint64_t op1, double op2); // Get the max file size in a given level. uint64_t MaxFileSizeForLevel(const MutableCFOptions& cf_options, int level, CompactionStyle compaction_style, int base_level = 1, bool level_compaction_dynamic_level_bytes = false); // Get the max size of an L0 file for which we will pin its meta-blocks when // `pin_l0_filter_and_index_blocks_in_cache` is set. size_t MaxFileSizeForL0MetaPin(const MutableCFOptions& cf_options); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/db_options.cc000066400000000000000000001105131370372246700174100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "options/db_options.h" #include #include "logging/logging.h" #include "options/options_helper.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/sst_file_manager.h" #include "rocksdb/wal_filter.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE static std::unordered_map wal_recovery_mode_string_map = { {"kTolerateCorruptedTailRecords", WALRecoveryMode::kTolerateCorruptedTailRecords}, {"kAbsoluteConsistency", WALRecoveryMode::kAbsoluteConsistency}, {"kPointInTimeRecovery", WALRecoveryMode::kPointInTimeRecovery}, {"kSkipAnyCorruptedRecords", WALRecoveryMode::kSkipAnyCorruptedRecords}}; static std::unordered_map access_hint_string_map = {{"NONE", DBOptions::AccessHint::NONE}, {"NORMAL", DBOptions::AccessHint::NORMAL}, {"SEQUENTIAL", DBOptions::AccessHint::SEQUENTIAL}, {"WILLNEED", DBOptions::AccessHint::WILLNEED}}; static std::unordered_map info_log_level_string_map = {{"DEBUG_LEVEL", InfoLogLevel::DEBUG_LEVEL}, {"INFO_LEVEL", InfoLogLevel::INFO_LEVEL}, {"WARN_LEVEL", InfoLogLevel::WARN_LEVEL}, {"ERROR_LEVEL", InfoLogLevel::ERROR_LEVEL}, {"FATAL_LEVEL", InfoLogLevel::FATAL_LEVEL}, {"HEADER_LEVEL", InfoLogLevel::HEADER_LEVEL}}; std::unordered_map OptionsHelper::db_options_type_info = { /* // not yet supported std::shared_ptr row_cache; std::shared_ptr delete_scheduler; std::shared_ptr info_log; std::shared_ptr rate_limiter; std::shared_ptr statistics; std::vector db_paths; std::vector> listeners; */ {"advise_random_on_open", {offsetof(struct DBOptions, advise_random_on_open), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"allow_mmap_reads", {offsetof(struct DBOptions, allow_mmap_reads), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"allow_fallocate", {offsetof(struct DBOptions, allow_fallocate), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"allow_mmap_writes", {offsetof(struct DBOptions, allow_mmap_writes), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"use_direct_reads", {offsetof(struct DBOptions, use_direct_reads), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"use_direct_writes", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"use_direct_io_for_flush_and_compaction", {offsetof(struct DBOptions, use_direct_io_for_flush_and_compaction), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"allow_2pc", {offsetof(struct DBOptions, allow_2pc), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"allow_os_buffer", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kMutable, 0}}, {"create_if_missing", {offsetof(struct DBOptions, create_if_missing), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"create_missing_column_families", {offsetof(struct DBOptions, create_missing_column_families), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"disableDataSync", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"disable_data_sync", // for compatibility {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"enable_thread_tracking", {offsetof(struct DBOptions, enable_thread_tracking), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"error_if_exists", {offsetof(struct DBOptions, error_if_exists), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"is_fd_close_on_exec", {offsetof(struct DBOptions, is_fd_close_on_exec), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"paranoid_checks", {offsetof(struct DBOptions, paranoid_checks), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"skip_log_error_on_recovery", {offsetof(struct DBOptions, skip_log_error_on_recovery), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"skip_stats_update_on_db_open", {offsetof(struct DBOptions, skip_stats_update_on_db_open), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"skip_checking_sst_file_sizes_on_db_open", {offsetof(struct DBOptions, skip_checking_sst_file_sizes_on_db_open), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"new_table_reader_for_compaction_inputs", {offsetof(struct DBOptions, new_table_reader_for_compaction_inputs), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"compaction_readahead_size", {offsetof(struct DBOptions, compaction_readahead_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, compaction_readahead_size)}}, {"random_access_max_buffer_size", {offsetof(struct DBOptions, random_access_max_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"use_adaptive_mutex", {offsetof(struct DBOptions, use_adaptive_mutex), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"use_fsync", {offsetof(struct DBOptions, use_fsync), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"max_background_jobs", {offsetof(struct DBOptions, max_background_jobs), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, max_background_jobs)}}, {"max_background_compactions", {offsetof(struct DBOptions, max_background_compactions), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, max_background_compactions)}}, {"base_background_compactions", {offsetof(struct DBOptions, base_background_compactions), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, base_background_compactions)}}, {"max_background_flushes", {offsetof(struct DBOptions, max_background_flushes), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, max_background_flushes)}}, {"max_file_opening_threads", {offsetof(struct DBOptions, max_file_opening_threads), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"max_open_files", {offsetof(struct DBOptions, max_open_files), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, max_open_files)}}, {"table_cache_numshardbits", {offsetof(struct DBOptions, table_cache_numshardbits), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"db_write_buffer_size", {offsetof(struct DBOptions, db_write_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"keep_log_file_num", {offsetof(struct DBOptions, keep_log_file_num), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"recycle_log_file_num", {offsetof(struct DBOptions, recycle_log_file_num), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"log_file_time_to_roll", {offsetof(struct DBOptions, log_file_time_to_roll), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"manifest_preallocation_size", {offsetof(struct DBOptions, manifest_preallocation_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"max_log_file_size", {offsetof(struct DBOptions, max_log_file_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"db_log_dir", {offsetof(struct DBOptions, db_log_dir), OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"wal_dir", {offsetof(struct DBOptions, wal_dir), OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"max_subcompactions", {offsetof(struct DBOptions, max_subcompactions), OptionType::kUInt32T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"WAL_size_limit_MB", {offsetof(struct DBOptions, WAL_size_limit_MB), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"WAL_ttl_seconds", {offsetof(struct DBOptions, WAL_ttl_seconds), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"bytes_per_sync", {offsetof(struct DBOptions, bytes_per_sync), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, bytes_per_sync)}}, {"delayed_write_rate", {offsetof(struct DBOptions, delayed_write_rate), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, delayed_write_rate)}}, {"delete_obsolete_files_period_micros", {offsetof(struct DBOptions, delete_obsolete_files_period_micros), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, delete_obsolete_files_period_micros)}}, {"max_manifest_file_size", {offsetof(struct DBOptions, max_manifest_file_size), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"max_total_wal_size", {offsetof(struct DBOptions, max_total_wal_size), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, max_total_wal_size)}}, {"wal_bytes_per_sync", {offsetof(struct DBOptions, wal_bytes_per_sync), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, wal_bytes_per_sync)}}, {"strict_bytes_per_sync", {offsetof(struct DBOptions, strict_bytes_per_sync), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, strict_bytes_per_sync)}}, {"stats_dump_period_sec", {offsetof(struct DBOptions, stats_dump_period_sec), OptionType::kUInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, stats_dump_period_sec)}}, {"stats_persist_period_sec", {offsetof(struct DBOptions, stats_persist_period_sec), OptionType::kUInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, stats_persist_period_sec)}}, {"persist_stats_to_disk", {offsetof(struct DBOptions, persist_stats_to_disk), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, offsetof(struct ImmutableDBOptions, persist_stats_to_disk)}}, {"stats_history_buffer_size", {offsetof(struct DBOptions, stats_history_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, stats_history_buffer_size)}}, {"fail_if_options_file_error", {offsetof(struct DBOptions, fail_if_options_file_error), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"enable_pipelined_write", {offsetof(struct DBOptions, enable_pipelined_write), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"unordered_write", {offsetof(struct DBOptions, unordered_write), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"allow_concurrent_memtable_write", {offsetof(struct DBOptions, allow_concurrent_memtable_write), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"wal_recovery_mode", OptionTypeInfo::Enum( offsetof(struct DBOptions, wal_recovery_mode), &wal_recovery_mode_string_map)}, {"enable_write_thread_adaptive_yield", {offsetof(struct DBOptions, enable_write_thread_adaptive_yield), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"write_thread_slow_yield_usec", {offsetof(struct DBOptions, write_thread_slow_yield_usec), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"max_write_batch_group_size_bytes", {offsetof(struct DBOptions, max_write_batch_group_size_bytes), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"write_thread_max_yield_usec", {offsetof(struct DBOptions, write_thread_max_yield_usec), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"access_hint_on_compaction_start", OptionTypeInfo::Enum( offsetof(struct DBOptions, access_hint_on_compaction_start), &access_hint_string_map)}, {"info_log_level", OptionTypeInfo::Enum( offsetof(struct DBOptions, info_log_level), &info_log_level_string_map)}, {"dump_malloc_stats", {offsetof(struct DBOptions, dump_malloc_stats), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"avoid_flush_during_recovery", {offsetof(struct DBOptions, avoid_flush_during_recovery), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"avoid_flush_during_shutdown", {offsetof(struct DBOptions, avoid_flush_during_shutdown), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, avoid_flush_during_shutdown)}}, {"writable_file_max_buffer_size", {offsetof(struct DBOptions, writable_file_max_buffer_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct MutableDBOptions, writable_file_max_buffer_size)}}, {"allow_ingest_behind", {offsetof(struct DBOptions, allow_ingest_behind), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, offsetof(struct ImmutableDBOptions, allow_ingest_behind)}}, {"preserve_deletes", {offsetof(struct DBOptions, preserve_deletes), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, offsetof(struct ImmutableDBOptions, preserve_deletes)}}, {"concurrent_prepare", // Deprecated by two_write_queues {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"two_write_queues", {offsetof(struct DBOptions, two_write_queues), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, offsetof(struct ImmutableDBOptions, two_write_queues)}}, {"manual_wal_flush", {offsetof(struct DBOptions, manual_wal_flush), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, offsetof(struct ImmutableDBOptions, manual_wal_flush)}}, {"seq_per_batch", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"atomic_flush", {offsetof(struct DBOptions, atomic_flush), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, offsetof(struct ImmutableDBOptions, atomic_flush)}}, {"avoid_unnecessary_blocking_io", {offsetof(struct DBOptions, avoid_unnecessary_blocking_io), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, offsetof(struct ImmutableDBOptions, avoid_unnecessary_blocking_io)}}, {"write_dbid_to_manifest", {offsetof(struct DBOptions, write_dbid_to_manifest), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"log_readahead_size", {offsetof(struct DBOptions, log_readahead_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"best_efforts_recovery", {offsetof(struct DBOptions, best_efforts_recovery), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, // The following properties were handled as special cases in ParseOption // This means that the properties could be read from the options file // but never written to the file or compared to each other. {"rate_limiter_bytes_per_sec", {offsetof(struct DBOptions, rate_limiter), OptionType::kUnknown, OptionVerificationType::kNormal, (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever), 0, // Parse the input value as a RateLimiter [](const ConfigOptions& /*opts*/, const std::string& /*name*/, const std::string& value, char* addr) { auto limiter = reinterpret_cast*>(addr); limiter->reset(NewGenericRateLimiter( static_cast(ParseUint64(value)))); return Status::OK(); }}}, {"env", {offsetof(struct DBOptions, env), OptionType::kUnknown, OptionVerificationType::kNormal, (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever), 0, // Parse the input value as an Env [](const ConfigOptions& /*opts*/, const std::string& /*name*/, const std::string& value, char* addr) { auto old_env = reinterpret_cast(addr); // Get the old value Env* new_env = *old_env; // Set new to old Status s = Env::LoadEnv(value, &new_env); // Update new value if (s.ok()) { // It worked *old_env = new_env; // Update the old one } return s; }}}, }; #endif // ROCKSDB_LITE ImmutableDBOptions::ImmutableDBOptions() : ImmutableDBOptions(Options()) {} ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) : create_if_missing(options.create_if_missing), create_missing_column_families(options.create_missing_column_families), error_if_exists(options.error_if_exists), paranoid_checks(options.paranoid_checks), env(options.env), fs(options.env->GetFileSystem()), rate_limiter(options.rate_limiter), sst_file_manager(options.sst_file_manager), info_log(options.info_log), info_log_level(options.info_log_level), max_file_opening_threads(options.max_file_opening_threads), statistics(options.statistics), use_fsync(options.use_fsync), db_paths(options.db_paths), db_log_dir(options.db_log_dir), wal_dir(options.wal_dir), max_subcompactions(options.max_subcompactions), max_log_file_size(options.max_log_file_size), log_file_time_to_roll(options.log_file_time_to_roll), keep_log_file_num(options.keep_log_file_num), recycle_log_file_num(options.recycle_log_file_num), max_manifest_file_size(options.max_manifest_file_size), table_cache_numshardbits(options.table_cache_numshardbits), wal_ttl_seconds(options.WAL_ttl_seconds), wal_size_limit_mb(options.WAL_size_limit_MB), max_write_batch_group_size_bytes( options.max_write_batch_group_size_bytes), manifest_preallocation_size(options.manifest_preallocation_size), allow_mmap_reads(options.allow_mmap_reads), allow_mmap_writes(options.allow_mmap_writes), use_direct_reads(options.use_direct_reads), use_direct_io_for_flush_and_compaction( options.use_direct_io_for_flush_and_compaction), allow_fallocate(options.allow_fallocate), is_fd_close_on_exec(options.is_fd_close_on_exec), advise_random_on_open(options.advise_random_on_open), db_write_buffer_size(options.db_write_buffer_size), write_buffer_manager(options.write_buffer_manager), access_hint_on_compaction_start(options.access_hint_on_compaction_start), new_table_reader_for_compaction_inputs( options.new_table_reader_for_compaction_inputs), random_access_max_buffer_size(options.random_access_max_buffer_size), use_adaptive_mutex(options.use_adaptive_mutex), listeners(options.listeners), enable_thread_tracking(options.enable_thread_tracking), enable_pipelined_write(options.enable_pipelined_write), unordered_write(options.unordered_write), allow_concurrent_memtable_write(options.allow_concurrent_memtable_write), enable_write_thread_adaptive_yield( options.enable_write_thread_adaptive_yield), write_thread_max_yield_usec(options.write_thread_max_yield_usec), write_thread_slow_yield_usec(options.write_thread_slow_yield_usec), skip_stats_update_on_db_open(options.skip_stats_update_on_db_open), skip_checking_sst_file_sizes_on_db_open( options.skip_checking_sst_file_sizes_on_db_open), wal_recovery_mode(options.wal_recovery_mode), allow_2pc(options.allow_2pc), row_cache(options.row_cache), #ifndef ROCKSDB_LITE wal_filter(options.wal_filter), #endif // ROCKSDB_LITE fail_if_options_file_error(options.fail_if_options_file_error), dump_malloc_stats(options.dump_malloc_stats), avoid_flush_during_recovery(options.avoid_flush_during_recovery), allow_ingest_behind(options.allow_ingest_behind), preserve_deletes(options.preserve_deletes), two_write_queues(options.two_write_queues), manual_wal_flush(options.manual_wal_flush), atomic_flush(options.atomic_flush), avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io), persist_stats_to_disk(options.persist_stats_to_disk), write_dbid_to_manifest(options.write_dbid_to_manifest), log_readahead_size(options.log_readahead_size), file_checksum_gen_factory(options.file_checksum_gen_factory), best_efforts_recovery(options.best_efforts_recovery) { } void ImmutableDBOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER(log, " Options.error_if_exists: %d", error_if_exists); ROCKS_LOG_HEADER(log, " Options.create_if_missing: %d", create_if_missing); ROCKS_LOG_HEADER(log, " Options.paranoid_checks: %d", paranoid_checks); ROCKS_LOG_HEADER(log, " Options.env: %p", env); ROCKS_LOG_HEADER(log, " Options.fs: %s", fs->Name()); ROCKS_LOG_HEADER(log, " Options.info_log: %p", info_log.get()); ROCKS_LOG_HEADER(log, " Options.max_file_opening_threads: %d", max_file_opening_threads); ROCKS_LOG_HEADER(log, " Options.statistics: %p", statistics.get()); ROCKS_LOG_HEADER(log, " Options.use_fsync: %d", use_fsync); ROCKS_LOG_HEADER( log, " Options.max_log_file_size: %" ROCKSDB_PRIszt, max_log_file_size); ROCKS_LOG_HEADER(log, " Options.max_manifest_file_size: %" PRIu64, max_manifest_file_size); ROCKS_LOG_HEADER( log, " Options.log_file_time_to_roll: %" ROCKSDB_PRIszt, log_file_time_to_roll); ROCKS_LOG_HEADER( log, " Options.keep_log_file_num: %" ROCKSDB_PRIszt, keep_log_file_num); ROCKS_LOG_HEADER( log, " Options.recycle_log_file_num: %" ROCKSDB_PRIszt, recycle_log_file_num); ROCKS_LOG_HEADER(log, " Options.allow_fallocate: %d", allow_fallocate); ROCKS_LOG_HEADER(log, " Options.allow_mmap_reads: %d", allow_mmap_reads); ROCKS_LOG_HEADER(log, " Options.allow_mmap_writes: %d", allow_mmap_writes); ROCKS_LOG_HEADER(log, " Options.use_direct_reads: %d", use_direct_reads); ROCKS_LOG_HEADER(log, " " "Options.use_direct_io_for_flush_and_compaction: %d", use_direct_io_for_flush_and_compaction); ROCKS_LOG_HEADER(log, " Options.create_missing_column_families: %d", create_missing_column_families); ROCKS_LOG_HEADER(log, " Options.db_log_dir: %s", db_log_dir.c_str()); ROCKS_LOG_HEADER(log, " Options.wal_dir: %s", wal_dir.c_str()); ROCKS_LOG_HEADER(log, " Options.table_cache_numshardbits: %d", table_cache_numshardbits); ROCKS_LOG_HEADER(log, " Options.max_subcompactions: %" PRIu32, max_subcompactions); ROCKS_LOG_HEADER(log, " Options.WAL_ttl_seconds: %" PRIu64, wal_ttl_seconds); ROCKS_LOG_HEADER(log, " Options.WAL_size_limit_MB: %" PRIu64, wal_size_limit_mb); ROCKS_LOG_HEADER(log, " " "Options.max_write_batch_group_size_bytes: %" PRIu64, max_write_batch_group_size_bytes); ROCKS_LOG_HEADER( log, " Options.manifest_preallocation_size: %" ROCKSDB_PRIszt, manifest_preallocation_size); ROCKS_LOG_HEADER(log, " Options.is_fd_close_on_exec: %d", is_fd_close_on_exec); ROCKS_LOG_HEADER(log, " Options.advise_random_on_open: %d", advise_random_on_open); ROCKS_LOG_HEADER( log, " Options.db_write_buffer_size: %" ROCKSDB_PRIszt, db_write_buffer_size); ROCKS_LOG_HEADER(log, " Options.write_buffer_manager: %p", write_buffer_manager.get()); ROCKS_LOG_HEADER(log, " Options.access_hint_on_compaction_start: %d", static_cast(access_hint_on_compaction_start)); ROCKS_LOG_HEADER(log, " Options.new_table_reader_for_compaction_inputs: %d", new_table_reader_for_compaction_inputs); ROCKS_LOG_HEADER( log, " Options.random_access_max_buffer_size: %" ROCKSDB_PRIszt, random_access_max_buffer_size); ROCKS_LOG_HEADER(log, " Options.use_adaptive_mutex: %d", use_adaptive_mutex); ROCKS_LOG_HEADER(log, " Options.rate_limiter: %p", rate_limiter.get()); Header( log, " Options.sst_file_manager.rate_bytes_per_sec: %" PRIi64, sst_file_manager ? sst_file_manager->GetDeleteRateBytesPerSecond() : 0); ROCKS_LOG_HEADER(log, " Options.wal_recovery_mode: %d", static_cast(wal_recovery_mode)); ROCKS_LOG_HEADER(log, " Options.enable_thread_tracking: %d", enable_thread_tracking); ROCKS_LOG_HEADER(log, " Options.enable_pipelined_write: %d", enable_pipelined_write); ROCKS_LOG_HEADER(log, " Options.unordered_write: %d", unordered_write); ROCKS_LOG_HEADER(log, " Options.allow_concurrent_memtable_write: %d", allow_concurrent_memtable_write); ROCKS_LOG_HEADER(log, " Options.enable_write_thread_adaptive_yield: %d", enable_write_thread_adaptive_yield); ROCKS_LOG_HEADER(log, " Options.write_thread_max_yield_usec: %" PRIu64, write_thread_max_yield_usec); ROCKS_LOG_HEADER(log, " Options.write_thread_slow_yield_usec: %" PRIu64, write_thread_slow_yield_usec); if (row_cache) { ROCKS_LOG_HEADER( log, " Options.row_cache: %" ROCKSDB_PRIszt, row_cache->GetCapacity()); } else { ROCKS_LOG_HEADER(log, " Options.row_cache: None"); } #ifndef ROCKSDB_LITE ROCKS_LOG_HEADER(log, " Options.wal_filter: %s", wal_filter ? wal_filter->Name() : "None"); #endif // ROCKDB_LITE ROCKS_LOG_HEADER(log, " Options.avoid_flush_during_recovery: %d", avoid_flush_during_recovery); ROCKS_LOG_HEADER(log, " Options.allow_ingest_behind: %d", allow_ingest_behind); ROCKS_LOG_HEADER(log, " Options.preserve_deletes: %d", preserve_deletes); ROCKS_LOG_HEADER(log, " Options.two_write_queues: %d", two_write_queues); ROCKS_LOG_HEADER(log, " Options.manual_wal_flush: %d", manual_wal_flush); ROCKS_LOG_HEADER(log, " Options.atomic_flush: %d", atomic_flush); ROCKS_LOG_HEADER(log, " Options.avoid_unnecessary_blocking_io: %d", avoid_unnecessary_blocking_io); ROCKS_LOG_HEADER(log, " Options.persist_stats_to_disk: %u", persist_stats_to_disk); ROCKS_LOG_HEADER(log, " Options.write_dbid_to_manifest: %d", write_dbid_to_manifest); ROCKS_LOG_HEADER( log, " Options.log_readahead_size: %" ROCKSDB_PRIszt, log_readahead_size); ROCKS_LOG_HEADER(log, " Options.file_checksum_gen_factory: %s", file_checksum_gen_factory ? file_checksum_gen_factory->Name() : kUnknownFileChecksumFuncName); ROCKS_LOG_HEADER(log, " Options.best_efforts_recovery: %d", static_cast(best_efforts_recovery)); } MutableDBOptions::MutableDBOptions() : max_background_jobs(2), base_background_compactions(-1), max_background_compactions(-1), avoid_flush_during_shutdown(false), writable_file_max_buffer_size(1024 * 1024), delayed_write_rate(2 * 1024U * 1024U), max_total_wal_size(0), delete_obsolete_files_period_micros(6ULL * 60 * 60 * 1000000), stats_dump_period_sec(600), stats_persist_period_sec(600), stats_history_buffer_size(1024 * 1024), max_open_files(-1), bytes_per_sync(0), wal_bytes_per_sync(0), strict_bytes_per_sync(false), compaction_readahead_size(0), max_background_flushes(-1) {} MutableDBOptions::MutableDBOptions(const DBOptions& options) : max_background_jobs(options.max_background_jobs), base_background_compactions(options.base_background_compactions), max_background_compactions(options.max_background_compactions), avoid_flush_during_shutdown(options.avoid_flush_during_shutdown), writable_file_max_buffer_size(options.writable_file_max_buffer_size), delayed_write_rate(options.delayed_write_rate), max_total_wal_size(options.max_total_wal_size), delete_obsolete_files_period_micros( options.delete_obsolete_files_period_micros), stats_dump_period_sec(options.stats_dump_period_sec), stats_persist_period_sec(options.stats_persist_period_sec), stats_history_buffer_size(options.stats_history_buffer_size), max_open_files(options.max_open_files), bytes_per_sync(options.bytes_per_sync), wal_bytes_per_sync(options.wal_bytes_per_sync), strict_bytes_per_sync(options.strict_bytes_per_sync), compaction_readahead_size(options.compaction_readahead_size), max_background_flushes(options.max_background_flushes) {} void MutableDBOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER(log, " Options.max_background_jobs: %d", max_background_jobs); ROCKS_LOG_HEADER(log, " Options.max_background_compactions: %d", max_background_compactions); ROCKS_LOG_HEADER(log, " Options.avoid_flush_during_shutdown: %d", avoid_flush_during_shutdown); ROCKS_LOG_HEADER( log, " Options.writable_file_max_buffer_size: %" ROCKSDB_PRIszt, writable_file_max_buffer_size); ROCKS_LOG_HEADER(log, " Options.delayed_write_rate : %" PRIu64, delayed_write_rate); ROCKS_LOG_HEADER(log, " Options.max_total_wal_size: %" PRIu64, max_total_wal_size); ROCKS_LOG_HEADER( log, " Options.delete_obsolete_files_period_micros: %" PRIu64, delete_obsolete_files_period_micros); ROCKS_LOG_HEADER(log, " Options.stats_dump_period_sec: %u", stats_dump_period_sec); ROCKS_LOG_HEADER(log, " Options.stats_persist_period_sec: %d", stats_persist_period_sec); ROCKS_LOG_HEADER( log, " Options.stats_history_buffer_size: %" ROCKSDB_PRIszt, stats_history_buffer_size); ROCKS_LOG_HEADER(log, " Options.max_open_files: %d", max_open_files); ROCKS_LOG_HEADER(log, " Options.bytes_per_sync: %" PRIu64, bytes_per_sync); ROCKS_LOG_HEADER(log, " Options.wal_bytes_per_sync: %" PRIu64, wal_bytes_per_sync); ROCKS_LOG_HEADER(log, " Options.strict_bytes_per_sync: %d", strict_bytes_per_sync); ROCKS_LOG_HEADER(log, " Options.compaction_readahead_size: %" ROCKSDB_PRIszt, compaction_readahead_size); ROCKS_LOG_HEADER(log, " Options.max_background_flushes: %d", max_background_flushes); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/db_options.h000066400000000000000000000071171370372246700172570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/options.h" namespace ROCKSDB_NAMESPACE { struct ImmutableDBOptions { ImmutableDBOptions(); explicit ImmutableDBOptions(const DBOptions& options); void Dump(Logger* log) const; bool create_if_missing; bool create_missing_column_families; bool error_if_exists; bool paranoid_checks; Env* env; std::shared_ptr fs; std::shared_ptr rate_limiter; std::shared_ptr sst_file_manager; std::shared_ptr info_log; InfoLogLevel info_log_level; int max_file_opening_threads; std::shared_ptr statistics; bool use_fsync; std::vector db_paths; std::string db_log_dir; std::string wal_dir; uint32_t max_subcompactions; size_t max_log_file_size; size_t log_file_time_to_roll; size_t keep_log_file_num; size_t recycle_log_file_num; uint64_t max_manifest_file_size; int table_cache_numshardbits; uint64_t wal_ttl_seconds; uint64_t wal_size_limit_mb; uint64_t max_write_batch_group_size_bytes; size_t manifest_preallocation_size; bool allow_mmap_reads; bool allow_mmap_writes; bool use_direct_reads; bool use_direct_io_for_flush_and_compaction; bool allow_fallocate; bool is_fd_close_on_exec; bool advise_random_on_open; size_t db_write_buffer_size; std::shared_ptr write_buffer_manager; DBOptions::AccessHint access_hint_on_compaction_start; bool new_table_reader_for_compaction_inputs; size_t random_access_max_buffer_size; bool use_adaptive_mutex; std::vector> listeners; bool enable_thread_tracking; bool enable_pipelined_write; bool unordered_write; bool allow_concurrent_memtable_write; bool enable_write_thread_adaptive_yield; uint64_t write_thread_max_yield_usec; uint64_t write_thread_slow_yield_usec; bool skip_stats_update_on_db_open; bool skip_checking_sst_file_sizes_on_db_open; WALRecoveryMode wal_recovery_mode; bool allow_2pc; std::shared_ptr row_cache; #ifndef ROCKSDB_LITE WalFilter* wal_filter; #endif // ROCKSDB_LITE bool fail_if_options_file_error; bool dump_malloc_stats; bool avoid_flush_during_recovery; bool allow_ingest_behind; bool preserve_deletes; bool two_write_queues; bool manual_wal_flush; bool atomic_flush; bool avoid_unnecessary_blocking_io; bool persist_stats_to_disk; bool write_dbid_to_manifest; size_t log_readahead_size; std::shared_ptr file_checksum_gen_factory; bool best_efforts_recovery; }; struct MutableDBOptions { MutableDBOptions(); explicit MutableDBOptions(const MutableDBOptions& options) = default; explicit MutableDBOptions(const DBOptions& options); void Dump(Logger* log) const; int max_background_jobs; int base_background_compactions; int max_background_compactions; bool avoid_flush_during_shutdown; size_t writable_file_max_buffer_size; uint64_t delayed_write_rate; uint64_t max_total_wal_size; uint64_t delete_obsolete_files_period_micros; unsigned int stats_dump_period_sec; unsigned int stats_persist_period_sec; size_t stats_history_buffer_size; int max_open_files; uint64_t bytes_per_sync; uint64_t wal_bytes_per_sync; bool strict_bytes_per_sync; size_t compaction_readahead_size; int max_background_flushes; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/options.cc000066400000000000000000000645531370372246700167570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/options.h" #include #include #include "monitoring/statistics.h" #include "options/db_options.h" #include "options/options_helper.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/sst_file_manager.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "rocksdb/wal_filter.h" #include "table/block_based/block_based_table_factory.h" #include "util/compression.h" namespace ROCKSDB_NAMESPACE { AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions() { assert(memtable_factory.get() != nullptr); } AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options) : max_write_buffer_number(options.max_write_buffer_number), min_write_buffer_number_to_merge( options.min_write_buffer_number_to_merge), max_write_buffer_number_to_maintain( options.max_write_buffer_number_to_maintain), max_write_buffer_size_to_maintain( options.max_write_buffer_size_to_maintain), inplace_update_support(options.inplace_update_support), inplace_update_num_locks(options.inplace_update_num_locks), inplace_callback(options.inplace_callback), memtable_prefix_bloom_size_ratio( options.memtable_prefix_bloom_size_ratio), memtable_whole_key_filtering(options.memtable_whole_key_filtering), memtable_huge_page_size(options.memtable_huge_page_size), memtable_insert_with_hint_prefix_extractor( options.memtable_insert_with_hint_prefix_extractor), bloom_locality(options.bloom_locality), arena_block_size(options.arena_block_size), compression_per_level(options.compression_per_level), num_levels(options.num_levels), level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger), level0_stop_writes_trigger(options.level0_stop_writes_trigger), target_file_size_base(options.target_file_size_base), target_file_size_multiplier(options.target_file_size_multiplier), level_compaction_dynamic_level_bytes( options.level_compaction_dynamic_level_bytes), max_bytes_for_level_multiplier(options.max_bytes_for_level_multiplier), max_bytes_for_level_multiplier_additional( options.max_bytes_for_level_multiplier_additional), max_compaction_bytes(options.max_compaction_bytes), soft_pending_compaction_bytes_limit( options.soft_pending_compaction_bytes_limit), hard_pending_compaction_bytes_limit( options.hard_pending_compaction_bytes_limit), compaction_style(options.compaction_style), compaction_pri(options.compaction_pri), compaction_options_universal(options.compaction_options_universal), compaction_options_fifo(options.compaction_options_fifo), max_sequential_skip_in_iterations( options.max_sequential_skip_in_iterations), memtable_factory(options.memtable_factory), table_properties_collector_factories( options.table_properties_collector_factories), max_successive_merges(options.max_successive_merges), optimize_filters_for_hits(options.optimize_filters_for_hits), paranoid_file_checks(options.paranoid_file_checks), force_consistency_checks(options.force_consistency_checks), report_bg_io_stats(options.report_bg_io_stats), ttl(options.ttl), periodic_compaction_seconds(options.periodic_compaction_seconds), sample_for_compression(options.sample_for_compression) { assert(memtable_factory.get() != nullptr); if (max_bytes_for_level_multiplier_additional.size() < static_cast(num_levels)) { max_bytes_for_level_multiplier_additional.resize(num_levels, 1); } } ColumnFamilyOptions::ColumnFamilyOptions() : compression(Snappy_Supported() ? kSnappyCompression : kNoCompression), table_factory( std::shared_ptr(new BlockBasedTableFactory())) {} ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) : ColumnFamilyOptions(*static_cast(&options)) {} DBOptions::DBOptions() {} DBOptions::DBOptions(const Options& options) : DBOptions(*static_cast(&options)) {} void DBOptions::Dump(Logger* log) const { ImmutableDBOptions(*this).Dump(log); MutableDBOptions(*this).Dump(log); } // DBOptions::Dump void ColumnFamilyOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER(log, " Options.comparator: %s", comparator->Name()); ROCKS_LOG_HEADER(log, " Options.merge_operator: %s", merge_operator ? merge_operator->Name() : "None"); ROCKS_LOG_HEADER(log, " Options.compaction_filter: %s", compaction_filter ? compaction_filter->Name() : "None"); ROCKS_LOG_HEADER( log, " Options.compaction_filter_factory: %s", compaction_filter_factory ? compaction_filter_factory->Name() : "None"); ROCKS_LOG_HEADER(log, " Options.memtable_factory: %s", memtable_factory->Name()); ROCKS_LOG_HEADER(log, " Options.table_factory: %s", table_factory->Name()); ROCKS_LOG_HEADER(log, " table_factory options: %s", table_factory->GetPrintableTableOptions().c_str()); ROCKS_LOG_HEADER(log, " Options.write_buffer_size: %" ROCKSDB_PRIszt, write_buffer_size); ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number: %d", max_write_buffer_number); if (!compression_per_level.empty()) { for (unsigned int i = 0; i < compression_per_level.size(); i++) { ROCKS_LOG_HEADER( log, " Options.compression[%d]: %s", i, CompressionTypeToString(compression_per_level[i]).c_str()); } } else { ROCKS_LOG_HEADER(log, " Options.compression: %s", CompressionTypeToString(compression).c_str()); } ROCKS_LOG_HEADER( log, " Options.bottommost_compression: %s", bottommost_compression == kDisableCompressionOption ? "Disabled" : CompressionTypeToString(bottommost_compression).c_str()); ROCKS_LOG_HEADER( log, " Options.prefix_extractor: %s", prefix_extractor == nullptr ? "nullptr" : prefix_extractor->Name()); ROCKS_LOG_HEADER(log, " Options.memtable_insert_with_hint_prefix_extractor: %s", memtable_insert_with_hint_prefix_extractor == nullptr ? "nullptr" : memtable_insert_with_hint_prefix_extractor->Name()); ROCKS_LOG_HEADER(log, " Options.num_levels: %d", num_levels); ROCKS_LOG_HEADER(log, " Options.min_write_buffer_number_to_merge: %d", min_write_buffer_number_to_merge); ROCKS_LOG_HEADER(log, " Options.max_write_buffer_number_to_maintain: %d", max_write_buffer_number_to_maintain); ROCKS_LOG_HEADER(log, " Options.max_write_buffer_size_to_maintain: %" PRIu64, max_write_buffer_size_to_maintain); ROCKS_LOG_HEADER( log, " Options.bottommost_compression_opts.window_bits: %d", bottommost_compression_opts.window_bits); ROCKS_LOG_HEADER( log, " Options.bottommost_compression_opts.level: %d", bottommost_compression_opts.level); ROCKS_LOG_HEADER( log, " Options.bottommost_compression_opts.strategy: %d", bottommost_compression_opts.strategy); ROCKS_LOG_HEADER( log, " Options.bottommost_compression_opts.max_dict_bytes: " "%" PRIu32, bottommost_compression_opts.max_dict_bytes); ROCKS_LOG_HEADER( log, " Options.bottommost_compression_opts.zstd_max_train_bytes: " "%" PRIu32, bottommost_compression_opts.zstd_max_train_bytes); ROCKS_LOG_HEADER( log, " Options.bottommost_compression_opts.parallel_threads: " "%" PRIu32, bottommost_compression_opts.parallel_threads); ROCKS_LOG_HEADER( log, " Options.bottommost_compression_opts.enabled: %s", bottommost_compression_opts.enabled ? "true" : "false"); ROCKS_LOG_HEADER(log, " Options.compression_opts.window_bits: %d", compression_opts.window_bits); ROCKS_LOG_HEADER(log, " Options.compression_opts.level: %d", compression_opts.level); ROCKS_LOG_HEADER(log, " Options.compression_opts.strategy: %d", compression_opts.strategy); ROCKS_LOG_HEADER( log, " Options.compression_opts.max_dict_bytes: %" PRIu32, compression_opts.max_dict_bytes); ROCKS_LOG_HEADER(log, " Options.compression_opts.zstd_max_train_bytes: " "%" PRIu32, compression_opts.zstd_max_train_bytes); ROCKS_LOG_HEADER(log, " Options.compression_opts.parallel_threads: " "%" PRIu32, compression_opts.parallel_threads); ROCKS_LOG_HEADER(log, " Options.compression_opts.enabled: %s", compression_opts.enabled ? "true" : "false"); ROCKS_LOG_HEADER(log, " Options.level0_file_num_compaction_trigger: %d", level0_file_num_compaction_trigger); ROCKS_LOG_HEADER(log, " Options.level0_slowdown_writes_trigger: %d", level0_slowdown_writes_trigger); ROCKS_LOG_HEADER(log, " Options.level0_stop_writes_trigger: %d", level0_stop_writes_trigger); ROCKS_LOG_HEADER( log, " Options.target_file_size_base: %" PRIu64, target_file_size_base); ROCKS_LOG_HEADER(log, " Options.target_file_size_multiplier: %d", target_file_size_multiplier); ROCKS_LOG_HEADER( log, " Options.max_bytes_for_level_base: %" PRIu64, max_bytes_for_level_base); ROCKS_LOG_HEADER(log, "Options.level_compaction_dynamic_level_bytes: %d", level_compaction_dynamic_level_bytes); ROCKS_LOG_HEADER(log, " Options.max_bytes_for_level_multiplier: %f", max_bytes_for_level_multiplier); for (size_t i = 0; i < max_bytes_for_level_multiplier_additional.size(); i++) { ROCKS_LOG_HEADER( log, "Options.max_bytes_for_level_multiplier_addtl[%" ROCKSDB_PRIszt "]: %d", i, max_bytes_for_level_multiplier_additional[i]); } ROCKS_LOG_HEADER( log, " Options.max_sequential_skip_in_iterations: %" PRIu64, max_sequential_skip_in_iterations); ROCKS_LOG_HEADER( log, " Options.max_compaction_bytes: %" PRIu64, max_compaction_bytes); ROCKS_LOG_HEADER( log, " Options.arena_block_size: %" ROCKSDB_PRIszt, arena_block_size); ROCKS_LOG_HEADER(log, " Options.soft_pending_compaction_bytes_limit: %" PRIu64, soft_pending_compaction_bytes_limit); ROCKS_LOG_HEADER(log, " Options.hard_pending_compaction_bytes_limit: %" PRIu64, hard_pending_compaction_bytes_limit); ROCKS_LOG_HEADER(log, " Options.rate_limit_delay_max_milliseconds: %u", rate_limit_delay_max_milliseconds); ROCKS_LOG_HEADER(log, " Options.disable_auto_compactions: %d", disable_auto_compactions); const auto& it_compaction_style = compaction_style_to_string.find(compaction_style); std::string str_compaction_style; if (it_compaction_style == compaction_style_to_string.end()) { assert(false); str_compaction_style = "unknown_" + std::to_string(compaction_style); } else { str_compaction_style = it_compaction_style->second; } ROCKS_LOG_HEADER(log, " Options.compaction_style: %s", str_compaction_style.c_str()); const auto& it_compaction_pri = compaction_pri_to_string.find(compaction_pri); std::string str_compaction_pri; if (it_compaction_pri == compaction_pri_to_string.end()) { assert(false); str_compaction_pri = "unknown_" + std::to_string(compaction_pri); } else { str_compaction_pri = it_compaction_pri->second; } ROCKS_LOG_HEADER(log, " Options.compaction_pri: %s", str_compaction_pri.c_str()); ROCKS_LOG_HEADER(log, "Options.compaction_options_universal.size_ratio: %u", compaction_options_universal.size_ratio); ROCKS_LOG_HEADER(log, "Options.compaction_options_universal.min_merge_width: %u", compaction_options_universal.min_merge_width); ROCKS_LOG_HEADER(log, "Options.compaction_options_universal.max_merge_width: %u", compaction_options_universal.max_merge_width); ROCKS_LOG_HEADER( log, "Options.compaction_options_universal." "max_size_amplification_percent: %u", compaction_options_universal.max_size_amplification_percent); ROCKS_LOG_HEADER( log, "Options.compaction_options_universal.compression_size_percent: %d", compaction_options_universal.compression_size_percent); const auto& it_compaction_stop_style = compaction_stop_style_to_string.find( compaction_options_universal.stop_style); std::string str_compaction_stop_style; if (it_compaction_stop_style == compaction_stop_style_to_string.end()) { assert(false); str_compaction_stop_style = "unknown_" + std::to_string(compaction_options_universal.stop_style); } else { str_compaction_stop_style = it_compaction_stop_style->second; } ROCKS_LOG_HEADER(log, "Options.compaction_options_universal.stop_style: %s", str_compaction_stop_style.c_str()); ROCKS_LOG_HEADER( log, "Options.compaction_options_fifo.max_table_files_size: %" PRIu64, compaction_options_fifo.max_table_files_size); ROCKS_LOG_HEADER(log, "Options.compaction_options_fifo.allow_compaction: %d", compaction_options_fifo.allow_compaction); std::ostringstream collector_info; for (const auto& collector_factory : table_properties_collector_factories) { collector_info << collector_factory->ToString() << ';'; } ROCKS_LOG_HEADER( log, " Options.table_properties_collectors: %s", collector_info.str().c_str()); ROCKS_LOG_HEADER(log, " Options.inplace_update_support: %d", inplace_update_support); ROCKS_LOG_HEADER( log, " Options.inplace_update_num_locks: %" ROCKSDB_PRIszt, inplace_update_num_locks); // TODO: easier config for bloom (maybe based on avg key/value size) ROCKS_LOG_HEADER( log, " Options.memtable_prefix_bloom_size_ratio: %f", memtable_prefix_bloom_size_ratio); ROCKS_LOG_HEADER(log, " Options.memtable_whole_key_filtering: %d", memtable_whole_key_filtering); ROCKS_LOG_HEADER(log, " Options.memtable_huge_page_size: %" ROCKSDB_PRIszt, memtable_huge_page_size); ROCKS_LOG_HEADER(log, " Options.bloom_locality: %d", bloom_locality); ROCKS_LOG_HEADER( log, " Options.max_successive_merges: %" ROCKSDB_PRIszt, max_successive_merges); ROCKS_LOG_HEADER(log, " Options.optimize_filters_for_hits: %d", optimize_filters_for_hits); ROCKS_LOG_HEADER(log, " Options.paranoid_file_checks: %d", paranoid_file_checks); ROCKS_LOG_HEADER(log, " Options.force_consistency_checks: %d", force_consistency_checks); ROCKS_LOG_HEADER(log, " Options.report_bg_io_stats: %d", report_bg_io_stats); ROCKS_LOG_HEADER(log, " Options.ttl: %" PRIu64, ttl); ROCKS_LOG_HEADER(log, " Options.periodic_compaction_seconds: %" PRIu64, periodic_compaction_seconds); } // ColumnFamilyOptions::Dump void Options::Dump(Logger* log) const { DBOptions::Dump(log); ColumnFamilyOptions::Dump(log); } // Options::Dump void Options::DumpCFOptions(Logger* log) const { ColumnFamilyOptions::Dump(log); } // Options::DumpCFOptions // // The goal of this method is to create a configuration that // allows an application to write all files into L0 and // then do a single compaction to output all files into L1. Options* Options::PrepareForBulkLoad() { // never slowdown ingest. level0_file_num_compaction_trigger = (1<<30); level0_slowdown_writes_trigger = (1<<30); level0_stop_writes_trigger = (1<<30); soft_pending_compaction_bytes_limit = 0; hard_pending_compaction_bytes_limit = 0; // no auto compactions please. The application should issue a // manual compaction after all data is loaded into L0. disable_auto_compactions = true; // A manual compaction run should pick all files in L0 in // a single compaction run. max_compaction_bytes = (static_cast(1) << 60); // It is better to have only 2 levels, otherwise a manual // compaction would compact at every possible level, thereby // increasing the total time needed for compactions. num_levels = 2; // Need to allow more write buffers to allow more parallism // of flushes. max_write_buffer_number = 6; min_write_buffer_number_to_merge = 1; // When compaction is disabled, more parallel flush threads can // help with write throughput. max_background_flushes = 4; // Prevent a memtable flush to automatically promote files // to L1. This is helpful so that all files that are // input to the manual compaction are all at L0. max_background_compactions = 2; // The compaction would create large files in L1. target_file_size_base = 256 * 1024 * 1024; return this; } Options* Options::OptimizeForSmallDb() { // 16MB block cache std::shared_ptr cache = NewLRUCache(16 << 20); ColumnFamilyOptions::OptimizeForSmallDb(&cache); DBOptions::OptimizeForSmallDb(&cache); return this; } Options* Options::OldDefaults(int rocksdb_major_version, int rocksdb_minor_version) { ColumnFamilyOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version); DBOptions::OldDefaults(rocksdb_major_version, rocksdb_minor_version); return this; } DBOptions* DBOptions::OldDefaults(int rocksdb_major_version, int rocksdb_minor_version) { if (rocksdb_major_version < 4 || (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) { max_file_opening_threads = 1; table_cache_numshardbits = 4; } if (rocksdb_major_version < 5 || (rocksdb_major_version == 5 && rocksdb_minor_version < 2)) { delayed_write_rate = 2 * 1024U * 1024U; } else if (rocksdb_major_version < 5 || (rocksdb_major_version == 5 && rocksdb_minor_version < 6)) { delayed_write_rate = 16 * 1024U * 1024U; } max_open_files = 5000; wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; return this; } ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults( int rocksdb_major_version, int rocksdb_minor_version) { if (rocksdb_major_version < 5 || (rocksdb_major_version == 5 && rocksdb_minor_version <= 18)) { compaction_pri = CompactionPri::kByCompensatedSize; } if (rocksdb_major_version < 4 || (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) { write_buffer_size = 4 << 20; target_file_size_base = 2 * 1048576; max_bytes_for_level_base = 10 * 1048576; soft_pending_compaction_bytes_limit = 0; hard_pending_compaction_bytes_limit = 0; } if (rocksdb_major_version < 5) { level0_stop_writes_trigger = 24; } else if (rocksdb_major_version == 5 && rocksdb_minor_version < 2) { level0_stop_writes_trigger = 30; } return this; } // Optimization functions DBOptions* DBOptions::OptimizeForSmallDb(std::shared_ptr* cache) { max_file_opening_threads = 1; max_open_files = 5000; // Cost memtable to block cache too. std::shared_ptr wbm = std::make_shared( 0, (cache != nullptr) ? *cache : std::shared_ptr()); write_buffer_manager = wbm; return this; } ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb( std::shared_ptr* cache) { write_buffer_size = 2 << 20; target_file_size_base = 2 * 1048576; max_bytes_for_level_base = 10 * 1048576; soft_pending_compaction_bytes_limit = 256 * 1048576; hard_pending_compaction_bytes_limit = 1073741824ul; BlockBasedTableOptions table_options; table_options.block_cache = (cache != nullptr) ? *cache : std::shared_ptr(); table_options.cache_index_and_filter_blocks = true; // Two level iterator to avoid LRU cache imbalance table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; table_factory.reset(new BlockBasedTableFactory(table_options)); return this; } #ifndef ROCKSDB_LITE ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForPointLookup( uint64_t block_cache_size_mb) { BlockBasedTableOptions block_based_options; block_based_options.data_block_index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash; block_based_options.data_block_hash_table_util_ratio = 0.75; block_based_options.filter_policy.reset(NewBloomFilterPolicy(10)); block_based_options.block_cache = NewLRUCache(static_cast(block_cache_size_mb * 1024 * 1024)); table_factory.reset(new BlockBasedTableFactory(block_based_options)); memtable_prefix_bloom_size_ratio = 0.02; memtable_whole_key_filtering = true; return this; } ColumnFamilyOptions* ColumnFamilyOptions::OptimizeLevelStyleCompaction( uint64_t memtable_memory_budget) { write_buffer_size = static_cast(memtable_memory_budget / 4); // merge two memtables when flushing to L0 min_write_buffer_number_to_merge = 2; // this means we'll use 50% extra memory in the worst case, but will reduce // write stalls. max_write_buffer_number = 6; // start flushing L0->L1 as soon as possible. each file on level0 is // (memtable_memory_budget / 2). This will flush level 0 when it's bigger than // memtable_memory_budget. level0_file_num_compaction_trigger = 2; // doesn't really matter much, but we don't want to create too many files target_file_size_base = memtable_memory_budget / 8; // make Level1 size equal to Level0 size, so that L0->L1 compactions are fast max_bytes_for_level_base = memtable_memory_budget; // level style compaction compaction_style = kCompactionStyleLevel; // only compress levels >= 2 compression_per_level.resize(num_levels); for (int i = 0; i < num_levels; ++i) { if (i < 2) { compression_per_level[i] = kNoCompression; } else { compression_per_level[i] = LZ4_Supported() ? kLZ4Compression : (Snappy_Supported() ? kSnappyCompression : kNoCompression); } } return this; } ColumnFamilyOptions* ColumnFamilyOptions::OptimizeUniversalStyleCompaction( uint64_t memtable_memory_budget) { write_buffer_size = static_cast(memtable_memory_budget / 4); // merge two memtables when flushing to L0 min_write_buffer_number_to_merge = 2; // this means we'll use 50% extra memory in the worst case, but will reduce // write stalls. max_write_buffer_number = 6; // universal style compaction compaction_style = kCompactionStyleUniversal; compaction_options_universal.compression_size_percent = 80; return this; } DBOptions* DBOptions::IncreaseParallelism(int total_threads) { max_background_jobs = total_threads; env->SetBackgroundThreads(total_threads, Env::LOW); env->SetBackgroundThreads(1, Env::HIGH); return this; } #endif // !ROCKSDB_LITE ReadOptions::ReadOptions() : snapshot(nullptr), iterate_lower_bound(nullptr), iterate_upper_bound(nullptr), readahead_size(0), max_skippable_internal_keys(0), read_tier(kReadAllTier), verify_checksums(true), fill_cache(true), tailing(false), managed(false), total_order_seek(false), auto_prefix_mode(false), prefix_same_as_start(false), pin_data(false), background_purge_on_iterator_cleanup(false), ignore_range_deletions(false), iter_start_seqnum(0), timestamp(nullptr), iter_start_ts(nullptr), deadline(std::chrono::microseconds::zero()), value_size_soft_limit(std::numeric_limits::max()) {} ReadOptions::ReadOptions(bool cksum, bool cache) : snapshot(nullptr), iterate_lower_bound(nullptr), iterate_upper_bound(nullptr), readahead_size(0), max_skippable_internal_keys(0), read_tier(kReadAllTier), verify_checksums(cksum), fill_cache(cache), tailing(false), managed(false), total_order_seek(false), auto_prefix_mode(false), prefix_same_as_start(false), pin_data(false), background_purge_on_iterator_cleanup(false), ignore_range_deletions(false), iter_start_seqnum(0), timestamp(nullptr), iter_start_ts(nullptr), deadline(std::chrono::microseconds::zero()), value_size_soft_limit(std::numeric_limits::max()) {} } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/options_helper.cc000066400000000000000000001544021370372246700203070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "options/options_helper.h" #include #include #include #include #include #include "options/options_type.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/convenience.h" #include "rocksdb/filter_policy.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "rocksdb/utilities/object_registry.h" #include "table/block_based/block_based_table_factory.h" #include "table/plain/plain_table_factory.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, const MutableDBOptions& mutable_db_options) { DBOptions options; options.create_if_missing = immutable_db_options.create_if_missing; options.create_missing_column_families = immutable_db_options.create_missing_column_families; options.error_if_exists = immutable_db_options.error_if_exists; options.paranoid_checks = immutable_db_options.paranoid_checks; options.env = immutable_db_options.env; options.rate_limiter = immutable_db_options.rate_limiter; options.sst_file_manager = immutable_db_options.sst_file_manager; options.info_log = immutable_db_options.info_log; options.info_log_level = immutable_db_options.info_log_level; options.max_open_files = mutable_db_options.max_open_files; options.max_file_opening_threads = immutable_db_options.max_file_opening_threads; options.max_total_wal_size = mutable_db_options.max_total_wal_size; options.statistics = immutable_db_options.statistics; options.use_fsync = immutable_db_options.use_fsync; options.db_paths = immutable_db_options.db_paths; options.db_log_dir = immutable_db_options.db_log_dir; options.wal_dir = immutable_db_options.wal_dir; options.delete_obsolete_files_period_micros = mutable_db_options.delete_obsolete_files_period_micros; options.max_background_jobs = mutable_db_options.max_background_jobs; options.base_background_compactions = mutable_db_options.base_background_compactions; options.max_background_compactions = mutable_db_options.max_background_compactions; options.bytes_per_sync = mutable_db_options.bytes_per_sync; options.wal_bytes_per_sync = mutable_db_options.wal_bytes_per_sync; options.strict_bytes_per_sync = mutable_db_options.strict_bytes_per_sync; options.max_subcompactions = immutable_db_options.max_subcompactions; options.max_background_flushes = mutable_db_options.max_background_flushes; options.max_log_file_size = immutable_db_options.max_log_file_size; options.log_file_time_to_roll = immutable_db_options.log_file_time_to_roll; options.keep_log_file_num = immutable_db_options.keep_log_file_num; options.recycle_log_file_num = immutable_db_options.recycle_log_file_num; options.max_manifest_file_size = immutable_db_options.max_manifest_file_size; options.table_cache_numshardbits = immutable_db_options.table_cache_numshardbits; options.WAL_ttl_seconds = immutable_db_options.wal_ttl_seconds; options.WAL_size_limit_MB = immutable_db_options.wal_size_limit_mb; options.manifest_preallocation_size = immutable_db_options.manifest_preallocation_size; options.allow_mmap_reads = immutable_db_options.allow_mmap_reads; options.allow_mmap_writes = immutable_db_options.allow_mmap_writes; options.use_direct_reads = immutable_db_options.use_direct_reads; options.use_direct_io_for_flush_and_compaction = immutable_db_options.use_direct_io_for_flush_and_compaction; options.allow_fallocate = immutable_db_options.allow_fallocate; options.is_fd_close_on_exec = immutable_db_options.is_fd_close_on_exec; options.stats_dump_period_sec = mutable_db_options.stats_dump_period_sec; options.stats_persist_period_sec = mutable_db_options.stats_persist_period_sec; options.persist_stats_to_disk = immutable_db_options.persist_stats_to_disk; options.stats_history_buffer_size = mutable_db_options.stats_history_buffer_size; options.advise_random_on_open = immutable_db_options.advise_random_on_open; options.db_write_buffer_size = immutable_db_options.db_write_buffer_size; options.write_buffer_manager = immutable_db_options.write_buffer_manager; options.access_hint_on_compaction_start = immutable_db_options.access_hint_on_compaction_start; options.new_table_reader_for_compaction_inputs = immutable_db_options.new_table_reader_for_compaction_inputs; options.compaction_readahead_size = mutable_db_options.compaction_readahead_size; options.random_access_max_buffer_size = immutable_db_options.random_access_max_buffer_size; options.writable_file_max_buffer_size = mutable_db_options.writable_file_max_buffer_size; options.use_adaptive_mutex = immutable_db_options.use_adaptive_mutex; options.listeners = immutable_db_options.listeners; options.enable_thread_tracking = immutable_db_options.enable_thread_tracking; options.delayed_write_rate = mutable_db_options.delayed_write_rate; options.enable_pipelined_write = immutable_db_options.enable_pipelined_write; options.unordered_write = immutable_db_options.unordered_write; options.allow_concurrent_memtable_write = immutable_db_options.allow_concurrent_memtable_write; options.enable_write_thread_adaptive_yield = immutable_db_options.enable_write_thread_adaptive_yield; options.max_write_batch_group_size_bytes = immutable_db_options.max_write_batch_group_size_bytes; options.write_thread_max_yield_usec = immutable_db_options.write_thread_max_yield_usec; options.write_thread_slow_yield_usec = immutable_db_options.write_thread_slow_yield_usec; options.skip_stats_update_on_db_open = immutable_db_options.skip_stats_update_on_db_open; options.skip_checking_sst_file_sizes_on_db_open = immutable_db_options.skip_checking_sst_file_sizes_on_db_open; options.wal_recovery_mode = immutable_db_options.wal_recovery_mode; options.allow_2pc = immutable_db_options.allow_2pc; options.row_cache = immutable_db_options.row_cache; #ifndef ROCKSDB_LITE options.wal_filter = immutable_db_options.wal_filter; #endif // ROCKSDB_LITE options.fail_if_options_file_error = immutable_db_options.fail_if_options_file_error; options.dump_malloc_stats = immutable_db_options.dump_malloc_stats; options.avoid_flush_during_recovery = immutable_db_options.avoid_flush_during_recovery; options.avoid_flush_during_shutdown = mutable_db_options.avoid_flush_during_shutdown; options.allow_ingest_behind = immutable_db_options.allow_ingest_behind; options.preserve_deletes = immutable_db_options.preserve_deletes; options.two_write_queues = immutable_db_options.two_write_queues; options.manual_wal_flush = immutable_db_options.manual_wal_flush; options.atomic_flush = immutable_db_options.atomic_flush; options.avoid_unnecessary_blocking_io = immutable_db_options.avoid_unnecessary_blocking_io; options.log_readahead_size = immutable_db_options.log_readahead_size; options.file_checksum_gen_factory = immutable_db_options.file_checksum_gen_factory; options.best_efforts_recovery = immutable_db_options.best_efforts_recovery; return options; } ColumnFamilyOptions BuildColumnFamilyOptions( const ColumnFamilyOptions& options, const MutableCFOptions& mutable_cf_options) { ColumnFamilyOptions cf_opts(options); // Memtable related options cf_opts.write_buffer_size = mutable_cf_options.write_buffer_size; cf_opts.max_write_buffer_number = mutable_cf_options.max_write_buffer_number; cf_opts.arena_block_size = mutable_cf_options.arena_block_size; cf_opts.memtable_prefix_bloom_size_ratio = mutable_cf_options.memtable_prefix_bloom_size_ratio; cf_opts.memtable_whole_key_filtering = mutable_cf_options.memtable_whole_key_filtering; cf_opts.memtable_huge_page_size = mutable_cf_options.memtable_huge_page_size; cf_opts.max_successive_merges = mutable_cf_options.max_successive_merges; cf_opts.inplace_update_num_locks = mutable_cf_options.inplace_update_num_locks; cf_opts.prefix_extractor = mutable_cf_options.prefix_extractor; // Compaction related options cf_opts.disable_auto_compactions = mutable_cf_options.disable_auto_compactions; cf_opts.soft_pending_compaction_bytes_limit = mutable_cf_options.soft_pending_compaction_bytes_limit; cf_opts.hard_pending_compaction_bytes_limit = mutable_cf_options.hard_pending_compaction_bytes_limit; cf_opts.level0_file_num_compaction_trigger = mutable_cf_options.level0_file_num_compaction_trigger; cf_opts.level0_slowdown_writes_trigger = mutable_cf_options.level0_slowdown_writes_trigger; cf_opts.level0_stop_writes_trigger = mutable_cf_options.level0_stop_writes_trigger; cf_opts.max_compaction_bytes = mutable_cf_options.max_compaction_bytes; cf_opts.target_file_size_base = mutable_cf_options.target_file_size_base; cf_opts.target_file_size_multiplier = mutable_cf_options.target_file_size_multiplier; cf_opts.max_bytes_for_level_base = mutable_cf_options.max_bytes_for_level_base; cf_opts.max_bytes_for_level_multiplier = mutable_cf_options.max_bytes_for_level_multiplier; cf_opts.ttl = mutable_cf_options.ttl; cf_opts.periodic_compaction_seconds = mutable_cf_options.periodic_compaction_seconds; cf_opts.max_bytes_for_level_multiplier_additional.clear(); for (auto value : mutable_cf_options.max_bytes_for_level_multiplier_additional) { cf_opts.max_bytes_for_level_multiplier_additional.emplace_back(value); } cf_opts.compaction_options_fifo = mutable_cf_options.compaction_options_fifo; cf_opts.compaction_options_universal = mutable_cf_options.compaction_options_universal; // Misc options cf_opts.max_sequential_skip_in_iterations = mutable_cf_options.max_sequential_skip_in_iterations; cf_opts.paranoid_file_checks = mutable_cf_options.paranoid_file_checks; cf_opts.report_bg_io_stats = mutable_cf_options.report_bg_io_stats; cf_opts.compression = mutable_cf_options.compression; cf_opts.compression_opts = mutable_cf_options.compression_opts; cf_opts.bottommost_compression = mutable_cf_options.bottommost_compression; cf_opts.bottommost_compression_opts = mutable_cf_options.bottommost_compression_opts; cf_opts.sample_for_compression = mutable_cf_options.sample_for_compression; cf_opts.table_factory = options.table_factory; // TODO(yhchiang): find some way to handle the following derived options // * max_file_size return cf_opts; } std::map OptionsHelper::compaction_style_to_string = { {kCompactionStyleLevel, "kCompactionStyleLevel"}, {kCompactionStyleUniversal, "kCompactionStyleUniversal"}, {kCompactionStyleFIFO, "kCompactionStyleFIFO"}, {kCompactionStyleNone, "kCompactionStyleNone"}}; std::map OptionsHelper::compaction_pri_to_string = { {kByCompensatedSize, "kByCompensatedSize"}, {kOldestLargestSeqFirst, "kOldestLargestSeqFirst"}, {kOldestSmallestSeqFirst, "kOldestSmallestSeqFirst"}, {kMinOverlappingRatio, "kMinOverlappingRatio"}}; std::map OptionsHelper::compaction_stop_style_to_string = { {kCompactionStopStyleSimilarSize, "kCompactionStopStyleSimilarSize"}, {kCompactionStopStyleTotalSize, "kCompactionStopStyleTotalSize"}}; std::unordered_map OptionsHelper::checksum_type_string_map = {{"kNoChecksum", kNoChecksum}, {"kCRC32c", kCRC32c}, {"kxxHash", kxxHash}, {"kxxHash64", kxxHash64}}; std::unordered_map OptionsHelper::compression_type_string_map = { {"kNoCompression", kNoCompression}, {"kSnappyCompression", kSnappyCompression}, {"kZlibCompression", kZlibCompression}, {"kBZip2Compression", kBZip2Compression}, {"kLZ4Compression", kLZ4Compression}, {"kLZ4HCCompression", kLZ4HCCompression}, {"kXpressCompression", kXpressCompression}, {"kZSTD", kZSTD}, {"kZSTDNotFinalCompression", kZSTDNotFinalCompression}, {"kDisableCompressionOption", kDisableCompressionOption}}; std::vector GetSupportedCompressions() { std::vector supported_compressions; for (const auto& comp_to_name : OptionsHelper::compression_type_string_map) { CompressionType t = comp_to_name.second; if (t != kDisableCompressionOption && CompressionTypeSupported(t)) { supported_compressions.push_back(t); } } return supported_compressions; } #ifndef ROCKSDB_LITE bool ParseSliceTransformHelper( const std::string& kFixedPrefixName, const std::string& kCappedPrefixName, const std::string& value, std::shared_ptr* slice_transform) { const char* no_op_name = "rocksdb.Noop"; size_t no_op_length = strlen(no_op_name); auto& pe_value = value; if (pe_value.size() > kFixedPrefixName.size() && pe_value.compare(0, kFixedPrefixName.size(), kFixedPrefixName) == 0) { int prefix_length = ParseInt(trim(value.substr(kFixedPrefixName.size()))); slice_transform->reset(NewFixedPrefixTransform(prefix_length)); } else if (pe_value.size() > kCappedPrefixName.size() && pe_value.compare(0, kCappedPrefixName.size(), kCappedPrefixName) == 0) { int prefix_length = ParseInt(trim(pe_value.substr(kCappedPrefixName.size()))); slice_transform->reset(NewCappedPrefixTransform(prefix_length)); } else if (pe_value.size() == no_op_length && pe_value.compare(0, no_op_length, no_op_name) == 0) { const SliceTransform* no_op_transform = NewNoopTransform(); slice_transform->reset(no_op_transform); } else if (value == kNullptrString) { slice_transform->reset(); } else { return false; } return true; } bool ParseSliceTransform( const std::string& value, std::shared_ptr* slice_transform) { // While we normally don't convert the string representation of a // pointer-typed option into its instance, here we do so for backward // compatibility as we allow this action in SetOption(). // TODO(yhchiang): A possible better place for these serialization / // deserialization is inside the class definition of pointer-typed // option itself, but this requires a bigger change of public API. bool result = ParseSliceTransformHelper("fixed:", "capped:", value, slice_transform); if (result) { return result; } result = ParseSliceTransformHelper( "rocksdb.FixedPrefix.", "rocksdb.CappedPrefix.", value, slice_transform); if (result) { return result; } // TODO(yhchiang): we can further support other default // SliceTransforms here. return false; } bool ParseOptionHelper(char* opt_address, const OptionType& opt_type, const std::string& value) { switch (opt_type) { case OptionType::kBoolean: *reinterpret_cast(opt_address) = ParseBoolean("", value); break; case OptionType::kInt: *reinterpret_cast(opt_address) = ParseInt(value); break; case OptionType::kInt32T: *reinterpret_cast(opt_address) = ParseInt32(value); break; case OptionType::kInt64T: PutUnaligned(reinterpret_cast(opt_address), ParseInt64(value)); break; case OptionType::kUInt: *reinterpret_cast(opt_address) = ParseUint32(value); break; case OptionType::kUInt32T: *reinterpret_cast(opt_address) = ParseUint32(value); break; case OptionType::kUInt64T: PutUnaligned(reinterpret_cast(opt_address), ParseUint64(value)); break; case OptionType::kSizeT: PutUnaligned(reinterpret_cast(opt_address), ParseSizeT(value)); break; case OptionType::kString: *reinterpret_cast(opt_address) = value; break; case OptionType::kDouble: *reinterpret_cast(opt_address) = ParseDouble(value); break; case OptionType::kCompactionStyle: return ParseEnum( compaction_style_string_map, value, reinterpret_cast(opt_address)); case OptionType::kCompactionPri: return ParseEnum( compaction_pri_string_map, value, reinterpret_cast(opt_address)); case OptionType::kCompressionType: return ParseEnum( compression_type_string_map, value, reinterpret_cast(opt_address)); case OptionType::kSliceTransform: return ParseSliceTransform( value, reinterpret_cast*>( opt_address)); case OptionType::kChecksumType: return ParseEnum( checksum_type_string_map, value, reinterpret_cast(opt_address)); case OptionType::kEncodingType: return ParseEnum( encoding_type_string_map, value, reinterpret_cast(opt_address)); case OptionType::kCompactionStopStyle: return ParseEnum( compaction_stop_style_string_map, value, reinterpret_cast(opt_address)); default: return false; } return true; } bool SerializeSingleOptionHelper(const char* opt_address, const OptionType opt_type, std::string* value) { assert(value); switch (opt_type) { case OptionType::kBoolean: *value = *(reinterpret_cast(opt_address)) ? "true" : "false"; break; case OptionType::kInt: *value = ToString(*(reinterpret_cast(opt_address))); break; case OptionType::kInt32T: *value = ToString(*(reinterpret_cast(opt_address))); break; case OptionType::kInt64T: { int64_t v; GetUnaligned(reinterpret_cast(opt_address), &v); *value = ToString(v); } break; case OptionType::kUInt: *value = ToString(*(reinterpret_cast(opt_address))); break; case OptionType::kUInt32T: *value = ToString(*(reinterpret_cast(opt_address))); break; case OptionType::kUInt64T: { uint64_t v; GetUnaligned(reinterpret_cast(opt_address), &v); *value = ToString(v); } break; case OptionType::kSizeT: { size_t v; GetUnaligned(reinterpret_cast(opt_address), &v); *value = ToString(v); } break; case OptionType::kDouble: *value = ToString(*(reinterpret_cast(opt_address))); break; case OptionType::kString: *value = EscapeOptionString( *(reinterpret_cast(opt_address))); break; case OptionType::kCompactionStyle: return SerializeEnum( compaction_style_string_map, *(reinterpret_cast(opt_address)), value); case OptionType::kCompactionPri: return SerializeEnum( compaction_pri_string_map, *(reinterpret_cast(opt_address)), value); case OptionType::kCompressionType: return SerializeEnum( compression_type_string_map, *(reinterpret_cast(opt_address)), value); case OptionType::kSliceTransform: { const auto* slice_transform_ptr = reinterpret_cast*>( opt_address); *value = slice_transform_ptr->get() ? slice_transform_ptr->get()->Name() : kNullptrString; break; } case OptionType::kTableFactory: { const auto* table_factory_ptr = reinterpret_cast*>( opt_address); *value = table_factory_ptr->get() ? table_factory_ptr->get()->Name() : kNullptrString; break; } case OptionType::kComparator: { // it's a const pointer of const Comparator* const auto* ptr = reinterpret_cast(opt_address); // Since the user-specified comparator will be wrapped by // InternalKeyComparator, we should persist the user-specified one // instead of InternalKeyComparator. if (*ptr == nullptr) { *value = kNullptrString; } else { const Comparator* root_comp = (*ptr)->GetRootComparator(); if (root_comp == nullptr) { root_comp = (*ptr); } *value = root_comp->Name(); } break; } case OptionType::kCompactionFilter: { // it's a const pointer of const CompactionFilter* const auto* ptr = reinterpret_cast(opt_address); *value = *ptr ? (*ptr)->Name() : kNullptrString; break; } case OptionType::kCompactionFilterFactory: { const auto* ptr = reinterpret_cast*>( opt_address); *value = ptr->get() ? ptr->get()->Name() : kNullptrString; break; } case OptionType::kMemTableRepFactory: { const auto* ptr = reinterpret_cast*>( opt_address); *value = ptr->get() ? ptr->get()->Name() : kNullptrString; break; } case OptionType::kMergeOperator: { const auto* ptr = reinterpret_cast*>(opt_address); *value = ptr->get() ? ptr->get()->Name() : kNullptrString; break; } case OptionType::kFilterPolicy: { const auto* ptr = reinterpret_cast*>(opt_address); *value = ptr->get() ? ptr->get()->Name() : kNullptrString; break; } case OptionType::kChecksumType: return SerializeEnum( checksum_type_string_map, *reinterpret_cast(opt_address), value); case OptionType::kFlushBlockPolicyFactory: { const auto* ptr = reinterpret_cast*>( opt_address); *value = ptr->get() ? ptr->get()->Name() : kNullptrString; break; } case OptionType::kEncodingType: return SerializeEnum( encoding_type_string_map, *reinterpret_cast(opt_address), value); case OptionType::kCompactionStopStyle: return SerializeEnum( compaction_stop_style_string_map, *reinterpret_cast(opt_address), value); default: return false; } return true; } Status GetMutableOptionsFromStrings( const MutableCFOptions& base_options, const std::unordered_map& options_map, Logger* info_log, MutableCFOptions* new_options) { assert(new_options); *new_options = base_options; ConfigOptions config_options; for (const auto& o : options_map) { std::string elem; const auto opt_info = OptionTypeInfo::Find(o.first, cf_options_type_info, &elem); if (opt_info == nullptr) { return Status::InvalidArgument("Unrecognized option: " + o.first); } else if (!opt_info->IsMutable()) { return Status::InvalidArgument("Option not changeable: " + o.first); } else if (opt_info->IsDeprecated()) { // log warning when user tries to set a deprecated option but don't fail // the call for compatibility. ROCKS_LOG_WARN(info_log, "%s is a deprecated option and cannot be set", o.first.c_str()); } else { Status s = opt_info->Parse( config_options, elem, o.second, reinterpret_cast(new_options) + opt_info->mutable_offset_); if (!s.ok()) { return s; } } } return Status::OK(); } Status GetMutableDBOptionsFromStrings( const MutableDBOptions& base_options, const std::unordered_map& options_map, MutableDBOptions* new_options) { assert(new_options); *new_options = base_options; ConfigOptions config_options; for (const auto& o : options_map) { try { std::string elem; const auto opt_info = OptionTypeInfo::Find(o.first, db_options_type_info, &elem); if (opt_info == nullptr) { return Status::InvalidArgument("Unrecognized option: " + o.first); } else if (!opt_info->IsMutable()) { return Status::InvalidArgument("Option not changeable: " + o.first); } else { Status s = opt_info->Parse( config_options, elem, o.second, reinterpret_cast(new_options) + opt_info->mutable_offset_); if (!s.ok()) { return s; } } } catch (std::exception& e) { return Status::InvalidArgument("Error parsing " + o.first + ":" + std::string(e.what())); } } return Status::OK(); } Status StringToMap(const std::string& opts_str, std::unordered_map* opts_map) { assert(opts_map); // Example: // opts_str = "write_buffer_size=1024;max_write_buffer_number=2;" // "nested_opt={opt1=1;opt2=2};max_bytes_for_level_base=100" size_t pos = 0; std::string opts = trim(opts_str); // If the input string starts and ends with "{...}", strip off the brackets while (opts.size() > 2 && opts[0] == '{' && opts[opts.size() - 1] == '}') { opts = trim(opts.substr(1, opts.size() - 2)); } while (pos < opts.size()) { size_t eq_pos = opts.find('=', pos); if (eq_pos == std::string::npos) { return Status::InvalidArgument("Mismatched key value pair, '=' expected"); } std::string key = trim(opts.substr(pos, eq_pos - pos)); if (key.empty()) { return Status::InvalidArgument("Empty key found"); } std::string value; Status s = OptionTypeInfo::NextToken(opts, ';', eq_pos + 1, &pos, &value); if (!s.ok()) { return s; } else { (*opts_map)[key] = value; if (pos == std::string::npos) { break; } else { pos++; } } } return Status::OK(); } Status GetStringFromStruct( const ConfigOptions& config_options, const void* const opt_ptr, const std::unordered_map& type_info, std::string* opt_string) { assert(opt_string); opt_string->clear(); for (const auto iter : type_info) { const auto& opt_info = iter.second; // If the option is no longer used in rocksdb and marked as deprecated, // we skip it in the serialization. if (opt_info.ShouldSerialize()) { const char* opt_addr = reinterpret_cast(opt_ptr) + opt_info.offset_; std::string value; Status s = opt_info.Serialize(config_options, iter.first, opt_addr, &value); if (s.ok()) { opt_string->append(iter.first + "=" + value + config_options.delimiter); } else { return s; } } } return Status::OK(); } Status GetStringFromDBOptions(std::string* opt_string, const DBOptions& db_options, const std::string& delimiter) { ConfigOptions config_options; config_options.delimiter = delimiter; return GetStringFromDBOptions(config_options, db_options, opt_string); } Status GetStringFromDBOptions(const ConfigOptions& cfg_options, const DBOptions& db_options, std::string* opt_string) { return GetStringFromStruct(cfg_options, &db_options, db_options_type_info, opt_string); } Status GetStringFromColumnFamilyOptions(std::string* opt_string, const ColumnFamilyOptions& cf_options, const std::string& delimiter) { ConfigOptions config_options; config_options.delimiter = delimiter; return GetStringFromColumnFamilyOptions(config_options, cf_options, opt_string); } Status GetStringFromColumnFamilyOptions(const ConfigOptions& config_options, const ColumnFamilyOptions& cf_options, std::string* opt_string) { return GetStringFromStruct(config_options, &cf_options, cf_options_type_info, opt_string); } Status GetStringFromCompressionType(std::string* compression_str, CompressionType compression_type) { bool ok = SerializeEnum(compression_type_string_map, compression_type, compression_str); if (ok) { return Status::OK(); } else { return Status::InvalidArgument("Invalid compression types"); } } static Status ParseDBOption(const ConfigOptions& config_options, const std::string& name, const std::string& org_value, DBOptions* new_options) { const std::string& value = config_options.input_strings_escaped ? UnescapeOptionString(org_value) : org_value; std::string elem; const auto opt_info = OptionTypeInfo::Find(name, db_options_type_info, &elem); if (opt_info == nullptr) { return Status::InvalidArgument("Unrecognized option DBOptions:", name); } else { return opt_info->Parse( config_options, elem, value, reinterpret_cast(new_options) + opt_info->offset_); } } Status GetColumnFamilyOptionsFromMap( const ColumnFamilyOptions& base_options, const std::unordered_map& opts_map, ColumnFamilyOptions* new_options, bool input_strings_escaped, bool ignore_unknown_options) { ConfigOptions config_options; config_options.ignore_unknown_options = ignore_unknown_options; config_options.input_strings_escaped = input_strings_escaped; return GetColumnFamilyOptionsFromMap(config_options, base_options, opts_map, new_options); } Status GetColumnFamilyOptionsFromMap( const ConfigOptions& config_options, const ColumnFamilyOptions& base_options, const std::unordered_map& opts_map, ColumnFamilyOptions* new_options) { assert(new_options); *new_options = base_options; for (const auto& o : opts_map) { auto s = ParseColumnFamilyOption(config_options, o.first, o.second, new_options); if (!s.ok()) { if (s.IsNotSupported()) { continue; } else if (s.IsInvalidArgument() && config_options.ignore_unknown_options) { continue; } else { // Restore "new_options" to the default "base_options". *new_options = base_options; return s; } } } return Status::OK(); } Status GetColumnFamilyOptionsFromString( const ColumnFamilyOptions& base_options, const std::string& opts_str, ColumnFamilyOptions* new_options) { ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; return GetColumnFamilyOptionsFromString(config_options, base_options, opts_str, new_options); } Status GetColumnFamilyOptionsFromString(const ConfigOptions& config_options, const ColumnFamilyOptions& base_options, const std::string& opts_str, ColumnFamilyOptions* new_options) { std::unordered_map opts_map; Status s = StringToMap(opts_str, &opts_map); if (!s.ok()) { *new_options = base_options; return s; } return GetColumnFamilyOptionsFromMap(config_options, base_options, opts_map, new_options); } Status GetDBOptionsFromMap( const DBOptions& base_options, const std::unordered_map& opts_map, DBOptions* new_options, bool input_strings_escaped, bool ignore_unknown_options) { ConfigOptions config_options; config_options.input_strings_escaped = input_strings_escaped; config_options.ignore_unknown_options = ignore_unknown_options; return GetDBOptionsFromMap(config_options, base_options, opts_map, new_options); } Status GetDBOptionsFromMap( const ConfigOptions& config_options, const DBOptions& base_options, const std::unordered_map& opts_map, DBOptions* new_options) { return GetDBOptionsFromMapInternal(config_options, base_options, opts_map, new_options, nullptr); } Status GetDBOptionsFromMapInternal( const ConfigOptions& config_options, const DBOptions& base_options, const std::unordered_map& opts_map, DBOptions* new_options, std::vector* unsupported_options_names) { assert(new_options); *new_options = base_options; if (unsupported_options_names) { unsupported_options_names->clear(); } for (const auto& o : opts_map) { auto s = ParseDBOption(config_options, o.first, o.second, new_options); if (!s.ok()) { if (s.IsNotSupported()) { // If the deserialization of the specified option is not supported // and an output vector of unsupported_options is provided, then // we log the name of the unsupported option and proceed. if (unsupported_options_names != nullptr) { unsupported_options_names->push_back(o.first); } // Note that we still return Status::OK in such case to maintain // the backward compatibility in the old public API defined in // rocksdb/convenience.h } else if (s.IsInvalidArgument() && config_options.ignore_unknown_options) { continue; } else { // Restore "new_options" to the default "base_options". *new_options = base_options; return s; } } } return Status::OK(); } Status GetDBOptionsFromString(const DBOptions& base_options, const std::string& opts_str, DBOptions* new_options) { ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; return GetDBOptionsFromString(config_options, base_options, opts_str, new_options); } Status GetDBOptionsFromString(const ConfigOptions& config_options, const DBOptions& base_options, const std::string& opts_str, DBOptions* new_options) { std::unordered_map opts_map; Status s = StringToMap(opts_str, &opts_map); if (!s.ok()) { *new_options = base_options; return s; } return GetDBOptionsFromMap(config_options, base_options, opts_map, new_options); } Status GetOptionsFromString(const Options& base_options, const std::string& opts_str, Options* new_options) { ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; return GetOptionsFromString(config_options, base_options, opts_str, new_options); } Status GetOptionsFromString(const ConfigOptions& config_options, const Options& base_options, const std::string& opts_str, Options* new_options) { std::unordered_map opts_map; Status s = StringToMap(opts_str, &opts_map); if (!s.ok()) { return s; } DBOptions new_db_options(base_options); ColumnFamilyOptions new_cf_options(base_options); for (const auto& o : opts_map) { if (ParseDBOption(config_options, o.first, o.second, &new_db_options) .ok()) { } else if (ParseColumnFamilyOption(config_options, o.first, o.second, &new_cf_options) .ok()) { } else { return Status::InvalidArgument("Can't parse option " + o.first); } } *new_options = Options(new_db_options, new_cf_options); return Status::OK(); } Status GetTableFactoryFromMap( const std::string& factory_name, const std::unordered_map& opt_map, std::shared_ptr* table_factory, bool ignore_unknown_options) { ConfigOptions config_options; // Use default for escaped(true) and check (exact) config_options.ignore_unknown_options = ignore_unknown_options; return GetTableFactoryFromMap(config_options, factory_name, opt_map, table_factory); } Status GetTableFactoryFromMap( const ConfigOptions& config_options, const std::string& factory_name, const std::unordered_map& opt_map, std::shared_ptr* table_factory) { Status s; if (factory_name == BlockBasedTableFactory().Name()) { BlockBasedTableOptions bbt_opt; s = GetBlockBasedTableOptionsFromMap( config_options, BlockBasedTableOptions(), opt_map, &bbt_opt); if (!s.ok()) { return s; } table_factory->reset(new BlockBasedTableFactory(bbt_opt)); return s; } else if (factory_name == PlainTableFactory().Name()) { PlainTableOptions pt_opt; s = GetPlainTableOptionsFromMap(config_options, PlainTableOptions(), opt_map, &pt_opt); if (!s.ok()) { return s; } table_factory->reset(new PlainTableFactory(pt_opt)); return s; } // Return OK for not supported table factories as TableFactory // Deserialization is optional. table_factory->reset(); return s; } std::unordered_map OptionsHelper::encoding_type_string_map = {{"kPlain", kPlain}, {"kPrefix", kPrefix}}; std::unordered_map OptionsHelper::compaction_style_string_map = { {"kCompactionStyleLevel", kCompactionStyleLevel}, {"kCompactionStyleUniversal", kCompactionStyleUniversal}, {"kCompactionStyleFIFO", kCompactionStyleFIFO}, {"kCompactionStyleNone", kCompactionStyleNone}}; std::unordered_map OptionsHelper::compaction_pri_string_map = { {"kByCompensatedSize", kByCompensatedSize}, {"kOldestLargestSeqFirst", kOldestLargestSeqFirst}, {"kOldestSmallestSeqFirst", kOldestSmallestSeqFirst}, {"kMinOverlappingRatio", kMinOverlappingRatio}}; std::unordered_map OptionsHelper::compaction_stop_style_string_map = { {"kCompactionStopStyleSimilarSize", kCompactionStopStyleSimilarSize}, {"kCompactionStopStyleTotalSize", kCompactionStopStyleTotalSize}}; Status OptionTypeInfo::NextToken(const std::string& opts, char delimiter, size_t pos, size_t* end, std::string* token) { while (pos < opts.size() && isspace(opts[pos])) { ++pos; } // Empty value at the end if (pos >= opts.size()) { *token = ""; *end = std::string::npos; return Status::OK(); } else if (opts[pos] == '{') { int count = 1; size_t brace_pos = pos + 1; while (brace_pos < opts.size()) { if (opts[brace_pos] == '{') { ++count; } else if (opts[brace_pos] == '}') { --count; if (count == 0) { break; } } ++brace_pos; } // found the matching closing brace if (count == 0) { *token = trim(opts.substr(pos + 1, brace_pos - pos - 1)); // skip all whitespace and move to the next delimiter // brace_pos points to the next position after the matching '}' pos = brace_pos + 1; while (pos < opts.size() && isspace(opts[pos])) { ++pos; } if (pos < opts.size() && opts[pos] != delimiter) { return Status::InvalidArgument("Unexpected chars after nested options"); } *end = pos; } else { return Status::InvalidArgument( "Mismatched curly braces for nested options"); } } else { *end = opts.find(delimiter, pos); if (*end == std::string::npos) { // It either ends with a trailing semi-colon or the last key-value pair *token = trim(opts.substr(pos)); } else { *token = trim(opts.substr(pos, *end - pos)); } } return Status::OK(); } Status OptionTypeInfo::Parse(const ConfigOptions& config_options, const std::string& opt_name, const std::string& opt_value, char* opt_addr) const { if (IsDeprecated()) { return Status::OK(); } try { if (opt_addr == nullptr) { return Status::NotFound("Could not find option: ", opt_name); } else if (parse_func_ != nullptr) { return parse_func_(config_options, opt_name, opt_value, opt_addr); } else if (ParseOptionHelper(opt_addr, type_, opt_value)) { return Status::OK(); } else if (IsByName()) { return Status::NotSupported("Deserializing the option " + opt_name + " is not supported"); } else { return Status::InvalidArgument("Error parsing:", opt_name); } } catch (std::exception& e) { return Status::InvalidArgument("Error parsing " + opt_name + ":" + std::string(e.what())); } } Status OptionTypeInfo::ParseStruct( const ConfigOptions& config_options, const std::string& struct_name, const std::unordered_map* struct_map, const std::string& opt_name, const std::string& opt_value, char* opt_addr) { assert(struct_map); Status status; if (opt_name == struct_name || EndsWith(opt_name, "." + struct_name)) { // This option represents the entire struct std::unordered_map opt_map; status = StringToMap(opt_value, &opt_map); for (const auto& map_iter : opt_map) { if (!status.ok()) { break; } const auto iter = struct_map->find(map_iter.first); if (iter != struct_map->end()) { status = iter->second.Parse(config_options, map_iter.first, map_iter.second, opt_addr + iter->second.offset_); } else { status = Status::InvalidArgument("Unrecognized option: ", struct_name + "." + map_iter.first); } } } else if (StartsWith(opt_name, struct_name + ".")) { // This option represents a nested field in the struct (e.g, struct.field) std::string elem_name; const auto opt_info = Find(opt_name.substr(struct_name.size() + 1), *struct_map, &elem_name); if (opt_info != nullptr) { status = opt_info->Parse(config_options, elem_name, opt_value, opt_addr + opt_info->offset_); } else { status = Status::InvalidArgument("Unrecognized option: ", opt_name); } } else { // This option represents a field in the struct (e.g. field) std::string elem_name; const auto opt_info = Find(opt_name, *struct_map, &elem_name); if (opt_info != nullptr) { status = opt_info->Parse(config_options, elem_name, opt_value, opt_addr + opt_info->offset_); } else { status = Status::InvalidArgument("Unrecognized option: ", struct_name + "." + opt_name); } } return status; } Status OptionTypeInfo::Serialize(const ConfigOptions& config_options, const std::string& opt_name, const char* opt_addr, std::string* opt_value) const { // If the option is no longer used in rocksdb and marked as deprecated, // we skip it in the serialization. if (opt_addr != nullptr && ShouldSerialize()) { if (serialize_func_ != nullptr) { return serialize_func_(config_options, opt_name, opt_addr, opt_value); } else if (!SerializeSingleOptionHelper(opt_addr, type_, opt_value)) { return Status::InvalidArgument("Cannot serialize option: ", opt_name); } } return Status::OK(); } Status OptionTypeInfo::SerializeStruct( const ConfigOptions& config_options, const std::string& struct_name, const std::unordered_map* struct_map, const std::string& opt_name, const char* opt_addr, std::string* value) { assert(struct_map); Status status; if (EndsWith(opt_name, struct_name)) { // We are going to write the struct as "{ prop1=value1; prop2=value2;}. // Set the delimiter to ";" so that the everything will be on one line. ConfigOptions embedded = config_options; embedded.delimiter = ";"; // This option represents the entire struct std::string result; for (const auto& iter : *struct_map) { std::string single; const auto& opt_info = iter.second; if (opt_info.ShouldSerialize()) { status = opt_info.Serialize(embedded, iter.first, opt_addr + opt_info.offset_, &single); if (!status.ok()) { return status; } else { result.append(iter.first + "=" + single + embedded.delimiter); } } } *value = "{" + result + "}"; } else if (StartsWith(opt_name, struct_name + ".")) { // This option represents a nested field in the struct (e.g, struct.field) std::string elem_name; const auto opt_info = Find(opt_name.substr(struct_name.size() + 1), *struct_map, &elem_name); if (opt_info != nullptr) { status = opt_info->Serialize(config_options, elem_name, opt_addr + opt_info->offset_, value); } else { status = Status::InvalidArgument("Unrecognized option: ", opt_name); } } else { // This option represents a field in the struct (e.g. field) std::string elem_name; const auto opt_info = Find(opt_name, *struct_map, &elem_name); if (opt_info == nullptr) { status = Status::InvalidArgument("Unrecognized option: ", opt_name); } else if (opt_info->ShouldSerialize()) { status = opt_info->Serialize(config_options, opt_name + "." + elem_name, opt_addr + opt_info->offset_, value); } } return status; } template bool IsOptionEqual(const char* offset1, const char* offset2) { return (*reinterpret_cast(offset1) == *reinterpret_cast(offset2)); } static bool AreEqualDoubles(const double a, const double b) { return (fabs(a - b) < 0.00001); } static bool AreOptionsEqual(OptionType type, const char* this_offset, const char* that_offset) { switch (type) { case OptionType::kBoolean: return IsOptionEqual(this_offset, that_offset); case OptionType::kInt: return IsOptionEqual(this_offset, that_offset); case OptionType::kUInt: return IsOptionEqual(this_offset, that_offset); case OptionType::kInt32T: return IsOptionEqual(this_offset, that_offset); case OptionType::kInt64T: { int64_t v1, v2; GetUnaligned(reinterpret_cast(this_offset), &v1); GetUnaligned(reinterpret_cast(that_offset), &v2); return (v1 == v2); } case OptionType::kUInt32T: return IsOptionEqual(this_offset, that_offset); case OptionType::kUInt64T: { uint64_t v1, v2; GetUnaligned(reinterpret_cast(this_offset), &v1); GetUnaligned(reinterpret_cast(that_offset), &v2); return (v1 == v2); } case OptionType::kSizeT: { size_t v1, v2; GetUnaligned(reinterpret_cast(this_offset), &v1); GetUnaligned(reinterpret_cast(that_offset), &v2); return (v1 == v2); } case OptionType::kString: return IsOptionEqual(this_offset, that_offset); case OptionType::kDouble: return AreEqualDoubles(*reinterpret_cast(this_offset), *reinterpret_cast(that_offset)); case OptionType::kCompactionStyle: return IsOptionEqual(this_offset, that_offset); case OptionType::kCompactionStopStyle: return IsOptionEqual(this_offset, that_offset); case OptionType::kCompactionPri: return IsOptionEqual(this_offset, that_offset); case OptionType::kCompressionType: return IsOptionEqual(this_offset, that_offset); case OptionType::kChecksumType: return IsOptionEqual(this_offset, that_offset); case OptionType::kEncodingType: return IsOptionEqual(this_offset, that_offset); default: return false; } // End switch } bool OptionTypeInfo::AreEqual(const ConfigOptions& config_options, const std::string& opt_name, const char* this_addr, const char* that_addr, std::string* mismatch) const { if (!config_options.IsCheckEnabled(GetSanityLevel())) { return true; // If the sanity level is not being checked, skip it } if (this_addr == nullptr || that_addr == nullptr) { if (this_addr == that_addr) { return true; } } else if (equals_func_ != nullptr) { if (equals_func_(config_options, opt_name, this_addr, that_addr, mismatch)) { return true; } } else if (AreOptionsEqual(type_, this_addr, that_addr)) { return true; } if (mismatch->empty()) { *mismatch = opt_name; } return false; } bool OptionTypeInfo::StructsAreEqual( const ConfigOptions& config_options, const std::string& struct_name, const std::unordered_map* struct_map, const std::string& opt_name, const char* this_addr, const char* that_addr, std::string* mismatch) { assert(struct_map); bool matches = true; std::string result; if (EndsWith(opt_name, struct_name)) { // This option represents the entire struct for (const auto& iter : *struct_map) { const auto& opt_info = iter.second; matches = opt_info.AreEqual(config_options, iter.first, this_addr + opt_info.offset_, that_addr + opt_info.offset_, &result); if (!matches) { *mismatch = struct_name + "." + result; return false; } } } else if (StartsWith(opt_name, struct_name + ".")) { // This option represents a nested field in the struct (e.g, struct.field) std::string elem_name; const auto opt_info = Find(opt_name.substr(struct_name.size() + 1), *struct_map, &elem_name); assert(opt_info); if (opt_info == nullptr) { *mismatch = opt_name; matches = false; } else if (!opt_info->AreEqual(config_options, elem_name, this_addr + opt_info->offset_, that_addr + opt_info->offset_, &result)) { matches = false; *mismatch = struct_name + "." + result; } } else { // This option represents a field in the struct (e.g. field) std::string elem_name; const auto opt_info = Find(opt_name, *struct_map, &elem_name); assert(opt_info); if (opt_info == nullptr) { *mismatch = struct_name + "." + opt_name; matches = false; } else if (!opt_info->AreEqual(config_options, elem_name, this_addr + opt_info->offset_, that_addr + opt_info->offset_, &result)) { matches = false; *mismatch = struct_name + "." + result; } } return matches; } bool OptionTypeInfo::AreEqualByName(const ConfigOptions& config_options, const std::string& opt_name, const char* this_addr, const char* that_addr) const { if (IsByName()) { std::string that_value; if (Serialize(config_options, opt_name, that_addr, &that_value).ok()) { return AreEqualByName(config_options, opt_name, this_addr, that_value); } } return false; } bool OptionTypeInfo::AreEqualByName(const ConfigOptions& config_options, const std::string& opt_name, const char* opt_addr, const std::string& that_value) const { std::string this_value; if (!IsByName()) { return false; } else if (!Serialize(config_options, opt_name, opt_addr, &this_value).ok()) { return false; } else if (IsEnabled(OptionVerificationType::kByNameAllowFromNull)) { if (that_value == kNullptrString) { return true; } } else if (IsEnabled(OptionVerificationType::kByNameAllowNull)) { if (that_value == kNullptrString) { return true; } } return (this_value == that_value); } const OptionTypeInfo* OptionTypeInfo::Find( const std::string& opt_name, const std::unordered_map& opt_map, std::string* elem_name) { const auto iter = opt_map.find(opt_name); // Look up the value in the map if (iter != opt_map.end()) { // Found the option in the map *elem_name = opt_name; // Return the name return &(iter->second); // Return the contents of the iterator } else { auto idx = opt_name.find("."); // Look for a separator if (idx > 0 && idx != std::string::npos) { // We found a separator auto siter = opt_map.find(opt_name.substr(0, idx)); // Look for the short name if (siter != opt_map.end()) { // We found the short name if (siter->second.IsStruct()) { // If the object is a struct *elem_name = opt_name.substr(idx + 1); // Return the rest return &(siter->second); // Return the contents of the iterator } } } } return nullptr; } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/options_helper.h000066400000000000000000000126171370372246700201520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "options/cf_options.h" #include "options/db_options.h" #include "options/options_type.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "rocksdb/universal_compaction.h" namespace ROCKSDB_NAMESPACE { struct ConfigOptions; std::vector GetSupportedCompressions(); DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, const MutableDBOptions& mutable_db_options); ColumnFamilyOptions BuildColumnFamilyOptions( const ColumnFamilyOptions& ioptions, const MutableCFOptions& mutable_cf_options); #ifndef ROCKSDB_LITE Status GetStringFromStruct( const ConfigOptions& config_options, const void* const opt_ptr, const std::unordered_map& type_info, std::string* opt_string); Status ParseColumnFamilyOption(const ConfigOptions& config_options, const std::string& name, const std::string& org_value, ColumnFamilyOptions* new_options); Status GetMutableOptionsFromStrings( const MutableCFOptions& base_options, const std::unordered_map& options_map, Logger* info_log, MutableCFOptions* new_options); Status GetMutableDBOptionsFromStrings( const MutableDBOptions& base_options, const std::unordered_map& options_map, MutableDBOptions* new_options); Status GetTableFactoryFromMap( const std::string& factory_name, const std::unordered_map& opt_map, std::shared_ptr* table_factory, bool ignore_unknown_options = false); Status GetTableFactoryFromMap( const ConfigOptions& config_options, const std::string& factory_name, const std::unordered_map& opt_map, std::shared_ptr* table_factory); // A helper function that converts "opt_address" to a std::string // based on the specified OptionType. bool SerializeSingleOptionHelper(const char* opt_address, const OptionType opt_type, std::string* value); // In addition to its public version defined in rocksdb/convenience.h, // this further takes an optional output vector "unsupported_options_names", // which stores the name of all the unsupported options specified in "opts_map". Status GetDBOptionsFromMapInternal( const ConfigOptions& config_options, const DBOptions& base_options, const std::unordered_map& opts_map, DBOptions* new_options, std::vector* unsupported_options_names = nullptr); bool ParseSliceTransform( const std::string& value, std::shared_ptr* slice_transform); extern Status StringToMap( const std::string& opts_str, std::unordered_map* opts_map); extern bool ParseOptionHelper(char* opt_address, const OptionType& opt_type, const std::string& value); #endif // !ROCKSDB_LITE struct OptionsHelper { static std::map compaction_style_to_string; static std::map compaction_pri_to_string; static std::map compaction_stop_style_to_string; static std::unordered_map checksum_type_string_map; static std::unordered_map compression_type_string_map; #ifndef ROCKSDB_LITE static std::unordered_map cf_options_type_info; static std::unordered_map compaction_stop_style_string_map; static std::unordered_map db_options_type_info; static std::unordered_map encoding_type_string_map; static std::unordered_map compaction_style_string_map; static std::unordered_map compaction_pri_string_map; static ColumnFamilyOptions dummy_cf_options; #endif // !ROCKSDB_LITE }; // Some aliasing static auto& compaction_style_to_string = OptionsHelper::compaction_style_to_string; static auto& compaction_pri_to_string = OptionsHelper::compaction_pri_to_string; static auto& compaction_stop_style_to_string = OptionsHelper::compaction_stop_style_to_string; static auto& checksum_type_string_map = OptionsHelper::checksum_type_string_map; #ifndef ROCKSDB_LITE static auto& cf_options_type_info = OptionsHelper::cf_options_type_info; static auto& compaction_stop_style_string_map = OptionsHelper::compaction_stop_style_string_map; static auto& db_options_type_info = OptionsHelper::db_options_type_info; static auto& compression_type_string_map = OptionsHelper::compression_type_string_map; static auto& encoding_type_string_map = OptionsHelper::encoding_type_string_map; static auto& compaction_style_string_map = OptionsHelper::compaction_style_string_map; static auto& compaction_pri_string_map = OptionsHelper::compaction_pri_string_map; #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/options_parser.cc000066400000000000000000000651321370372246700203250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "options/options_parser.h" #include #include #include #include #include #include "file/read_write_util.h" #include "file/writable_file_writer.h" #include "options/options_helper.h" #include "port/port.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "table/block_based/block_based_table_factory.h" #include "test_util/sync_point.h" #include "util/cast_util.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { static const std::string option_file_header = "# This is a RocksDB option file.\n" "#\n" "# For detailed file format spec, please refer to the example file\n" "# in examples/rocksdb_option_file_example.ini\n" "#\n" "\n"; Status PersistRocksDBOptions(const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs) { ConfigOptions config_options; // Use default for escaped(true) and check (exact) config_options.delimiter = "\n "; // If a readahead size was set in the input options, use it if (db_opt.log_readahead_size > 0) { config_options.file_readahead_size = db_opt.log_readahead_size; } return PersistRocksDBOptions(config_options, db_opt, cf_names, cf_opts, file_name, fs); } Status PersistRocksDBOptions(const ConfigOptions& config_options_in, const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs) { ConfigOptions config_options = config_options_in; config_options.delimiter = "\n "; // Override the default to nl TEST_SYNC_POINT("PersistRocksDBOptions:start"); if (cf_names.size() != cf_opts.size()) { return Status::InvalidArgument( "cf_names.size() and cf_opts.size() must be the same"); } std::unique_ptr wf; Status s = fs->NewWritableFile(file_name, FileOptions(), &wf, nullptr); if (!s.ok()) { return s; } std::unique_ptr writable; writable.reset(new WritableFileWriter(std::move(wf), file_name, EnvOptions(), nullptr /* statistics */)); std::string options_file_content; s = writable->Append(option_file_header + "[" + opt_section_titles[kOptionSectionVersion] + "]\n" " rocksdb_version=" + ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR) + "." + ToString(ROCKSDB_PATCH) + "\n"); if (s.ok()) { s = writable->Append( " options_file_version=" + ToString(ROCKSDB_OPTION_FILE_MAJOR) + "." + ToString(ROCKSDB_OPTION_FILE_MINOR) + "\n"); } if (s.ok()) { s = writable->Append("\n[" + opt_section_titles[kOptionSectionDBOptions] + "]\n "); } if (s.ok()) { s = GetStringFromDBOptions(config_options, db_opt, &options_file_content); } if (s.ok()) { s = writable->Append(options_file_content + "\n"); } for (size_t i = 0; s.ok() && i < cf_opts.size(); ++i) { // CFOptions section s = writable->Append("\n[" + opt_section_titles[kOptionSectionCFOptions] + " \"" + EscapeOptionString(cf_names[i]) + "\"]\n "); if (s.ok()) { s = GetStringFromColumnFamilyOptions(config_options, cf_opts[i], &options_file_content); } if (s.ok()) { s = writable->Append(options_file_content + "\n"); } // TableOptions section auto* tf = cf_opts[i].table_factory.get(); if (tf != nullptr) { if (s.ok()) { s = writable->Append( "[" + opt_section_titles[kOptionSectionTableOptions] + tf->Name() + " \"" + EscapeOptionString(cf_names[i]) + "\"]\n "); } if (s.ok()) { options_file_content.clear(); s = tf->GetOptionString(config_options, &options_file_content); } if (s.ok()) { s = writable->Append(options_file_content + "\n"); } } } if (s.ok()) { s = writable->Sync(true /* use_fsync */); } if (s.ok()) { s = writable->Close(); } if (s.ok()) { return RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( config_options, db_opt, cf_names, cf_opts, file_name, fs); } return s; } RocksDBOptionsParser::RocksDBOptionsParser() { Reset(); } void RocksDBOptionsParser::Reset() { db_opt_ = DBOptions(); db_opt_map_.clear(); cf_names_.clear(); cf_opts_.clear(); cf_opt_maps_.clear(); has_version_section_ = false; has_db_options_ = false; has_default_cf_options_ = false; for (int i = 0; i < 3; ++i) { db_version[i] = 0; opt_file_version[i] = 0; } } bool RocksDBOptionsParser::IsSection(const std::string& line) { if (line.size() < 2) { return false; } if (line[0] != '[' || line[line.size() - 1] != ']') { return false; } return true; } Status RocksDBOptionsParser::ParseSection(OptionSection* section, std::string* title, std::string* argument, const std::string& line, const int line_num) { *section = kOptionSectionUnknown; // A section is of the form [ ""], where // "" is optional. size_t arg_start_pos = line.find("\""); size_t arg_end_pos = line.rfind("\""); // The following if-then check tries to identify whether the input // section has the optional section argument. if (arg_start_pos != std::string::npos && arg_start_pos != arg_end_pos) { *title = TrimAndRemoveComment(line.substr(1, arg_start_pos - 1), true); *argument = UnescapeOptionString( line.substr(arg_start_pos + 1, arg_end_pos - arg_start_pos - 1)); } else { *title = TrimAndRemoveComment(line.substr(1, line.size() - 2), true); *argument = ""; } for (int i = 0; i < kOptionSectionUnknown; ++i) { if (title->find(opt_section_titles[i]) == 0) { if (i == kOptionSectionVersion || i == kOptionSectionDBOptions || i == kOptionSectionCFOptions) { if (title->size() == opt_section_titles[i].size()) { // if true, then it indicats equal *section = static_cast(i); return CheckSection(*section, *argument, line_num); } } else if (i == kOptionSectionTableOptions) { // This type of sections has a sufffix at the end of the // section title if (title->size() > opt_section_titles[i].size()) { *section = static_cast(i); return CheckSection(*section, *argument, line_num); } } } } return Status::InvalidArgument(std::string("Unknown section ") + line); } Status RocksDBOptionsParser::InvalidArgument(const int line_num, const std::string& message) { return Status::InvalidArgument( "[RocksDBOptionsParser Error] ", message + " (at line " + ToString(line_num) + ")"); } Status RocksDBOptionsParser::ParseStatement(std::string* name, std::string* value, const std::string& line, const int line_num) { size_t eq_pos = line.find("="); if (eq_pos == std::string::npos) { return InvalidArgument(line_num, "A valid statement must have a '='."); } *name = TrimAndRemoveComment(line.substr(0, eq_pos), true); *value = TrimAndRemoveComment(line.substr(eq_pos + 1, line.size() - eq_pos - 1)); if (name->empty()) { return InvalidArgument(line_num, "A valid statement must have a variable name."); } return Status::OK(); } Status RocksDBOptionsParser::Parse(const std::string& file_name, FileSystem* fs, bool ignore_unknown_options, size_t file_readahead_size) { ConfigOptions config_options; // Use default for escaped(true) and check (exact) config_options.ignore_unknown_options = ignore_unknown_options; if (file_readahead_size > 0) { config_options.file_readahead_size = file_readahead_size; } return Parse(config_options, file_name, fs); } Status RocksDBOptionsParser::Parse(const ConfigOptions& config_options_in, const std::string& file_name, FileSystem* fs) { Reset(); ConfigOptions config_options = config_options_in; std::unique_ptr seq_file; Status s = fs->NewSequentialFile(file_name, FileOptions(), &seq_file, nullptr); if (!s.ok()) { return s; } SequentialFileReader sf_reader(std::move(seq_file), file_name, config_options.file_readahead_size); OptionSection section = kOptionSectionUnknown; std::string title; std::string argument; std::unordered_map opt_map; std::istringstream iss; std::string line; bool has_data = true; // we only support single-lined statement. for (int line_num = 1; ReadOneLine(&iss, &sf_reader, &line, &has_data, &s); ++line_num) { if (!s.ok()) { return s; } line = TrimAndRemoveComment(line); if (line.empty()) { continue; } if (IsSection(line)) { s = EndSection(config_options, section, title, argument, opt_map); opt_map.clear(); if (!s.ok()) { return s; } // If the option file is not generated by a higher minor version, // there shouldn't be any unknown option. if (config_options.ignore_unknown_options && section == kOptionSectionVersion) { if (db_version[0] < ROCKSDB_MAJOR || (db_version[0] == ROCKSDB_MAJOR && db_version[1] <= ROCKSDB_MINOR)) { config_options.ignore_unknown_options = false; } } s = ParseSection(§ion, &title, &argument, line, line_num); if (!s.ok()) { return s; } } else { std::string name; std::string value; s = ParseStatement(&name, &value, line, line_num); if (!s.ok()) { return s; } opt_map.insert({name, value}); } } s = EndSection(config_options, section, title, argument, opt_map); opt_map.clear(); if (!s.ok()) { return s; } return ValidityCheck(); } Status RocksDBOptionsParser::CheckSection(const OptionSection section, const std::string& section_arg, const int line_num) { if (section == kOptionSectionDBOptions) { if (has_db_options_) { return InvalidArgument( line_num, "More than one DBOption section found in the option config file"); } has_db_options_ = true; } else if (section == kOptionSectionCFOptions) { bool is_default_cf = (section_arg == kDefaultColumnFamilyName); if (cf_opts_.size() == 0 && !is_default_cf) { return InvalidArgument( line_num, "Default column family must be the first CFOptions section " "in the option config file"); } else if (cf_opts_.size() != 0 && is_default_cf) { return InvalidArgument( line_num, "Default column family must be the first CFOptions section " "in the optio/n config file"); } else if (GetCFOptions(section_arg) != nullptr) { return InvalidArgument( line_num, "Two identical column families found in option config file"); } has_default_cf_options_ |= is_default_cf; } else if (section == kOptionSectionTableOptions) { if (GetCFOptions(section_arg) == nullptr) { return InvalidArgument( line_num, std::string( "Does not find a matched column family name in " "TableOptions section. Column Family Name:") + section_arg); } } else if (section == kOptionSectionVersion) { if (has_version_section_) { return InvalidArgument( line_num, "More than one Version section found in the option config file."); } has_version_section_ = true; } return Status::OK(); } Status RocksDBOptionsParser::ParseVersionNumber(const std::string& ver_name, const std::string& ver_string, const int max_count, int* version) { int version_index = 0; int current_number = 0; int current_digit_count = 0; bool has_dot = false; for (int i = 0; i < max_count; ++i) { version[i] = 0; } constexpr int kBufferSize = 200; char buffer[kBufferSize]; for (size_t i = 0; i < ver_string.size(); ++i) { if (ver_string[i] == '.') { if (version_index >= max_count - 1) { snprintf(buffer, sizeof(buffer) - 1, "A valid %s can only contains at most %d dots.", ver_name.c_str(), max_count - 1); return Status::InvalidArgument(buffer); } if (current_digit_count == 0) { snprintf(buffer, sizeof(buffer) - 1, "A valid %s must have at least one digit before each dot.", ver_name.c_str()); return Status::InvalidArgument(buffer); } version[version_index++] = current_number; current_number = 0; current_digit_count = 0; has_dot = true; } else if (isdigit(ver_string[i])) { current_number = current_number * 10 + (ver_string[i] - '0'); current_digit_count++; } else { snprintf(buffer, sizeof(buffer) - 1, "A valid %s can only contains dots and numbers.", ver_name.c_str()); return Status::InvalidArgument(buffer); } } version[version_index] = current_number; if (has_dot && current_digit_count == 0) { snprintf(buffer, sizeof(buffer) - 1, "A valid %s must have at least one digit after each dot.", ver_name.c_str()); return Status::InvalidArgument(buffer); } return Status::OK(); } Status RocksDBOptionsParser::EndSection( const ConfigOptions& config_options, const OptionSection section, const std::string& section_title, const std::string& section_arg, const std::unordered_map& opt_map) { Status s; if (section == kOptionSectionDBOptions) { s = GetDBOptionsFromMap(config_options, DBOptions(), opt_map, &db_opt_); if (!s.ok()) { return s; } db_opt_map_ = opt_map; } else if (section == kOptionSectionCFOptions) { // This condition should be ensured earlier in ParseSection // so we make an assertion here. assert(GetCFOptions(section_arg) == nullptr); cf_names_.emplace_back(section_arg); cf_opts_.emplace_back(); s = GetColumnFamilyOptionsFromMap(config_options, ColumnFamilyOptions(), opt_map, &cf_opts_.back()); if (!s.ok()) { return s; } // keep the parsed string. cf_opt_maps_.emplace_back(opt_map); } else if (section == kOptionSectionTableOptions) { assert(GetCFOptions(section_arg) != nullptr); auto* cf_opt = GetCFOptionsImpl(section_arg); if (cf_opt == nullptr) { return Status::InvalidArgument( "The specified column family must be defined before the " "TableOptions section:", section_arg); } // Ignore error as table factory deserialization is optional s = GetTableFactoryFromMap( config_options, section_title.substr( opt_section_titles[kOptionSectionTableOptions].size()), opt_map, &(cf_opt->table_factory)); if (!s.ok()) { return s; } } else if (section == kOptionSectionVersion) { for (const auto& pair : opt_map) { if (pair.first == "rocksdb_version") { s = ParseVersionNumber(pair.first, pair.second, 3, db_version); if (!s.ok()) { return s; } } else if (pair.first == "options_file_version") { s = ParseVersionNumber(pair.first, pair.second, 2, opt_file_version); if (!s.ok()) { return s; } if (opt_file_version[0] < 1) { return Status::InvalidArgument( "A valid options_file_version must be at least 1."); } } } } return s; } Status RocksDBOptionsParser::ValidityCheck() { if (!has_db_options_) { return Status::Corruption( "A RocksDB Option file must have a single DBOptions section"); } if (!has_default_cf_options_) { return Status::Corruption( "A RocksDB Option file must have a single CFOptions:default section"); } return Status::OK(); } std::string RocksDBOptionsParser::TrimAndRemoveComment(const std::string& line, bool trim_only) { size_t start = 0; size_t end = line.size(); // we only support "#" style comment if (!trim_only) { size_t search_pos = 0; while (search_pos < line.size()) { size_t comment_pos = line.find('#', search_pos); if (comment_pos == std::string::npos) { break; } if (comment_pos == 0 || line[comment_pos - 1] != '\\') { end = comment_pos; break; } search_pos = comment_pos + 1; } } while (start < end && isspace(line[start]) != 0) { ++start; } // start < end implies end > 0. while (start < end && isspace(line[end - 1]) != 0) { --end; } if (start < end) { return line.substr(start, end - start); } return ""; } Status RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( const ConfigOptions& config_options, const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs) { RocksDBOptionsParser parser; Status s = parser.Parse(config_options, file_name, fs); if (!s.ok()) { return s; } // Verify DBOptions s = VerifyDBOptions(config_options, db_opt, *parser.db_opt(), parser.db_opt_map()); if (!s.ok()) { return s; } // Verify ColumnFamily Name if (cf_names.size() != parser.cf_names()->size()) { if (config_options.sanity_level >= ConfigOptions::kSanityLevelLooselyCompatible) { return Status::InvalidArgument( "[RocksDBOptionParser Error] The persisted options does not have " "the same number of column family names as the db instance."); } else if (cf_opts.size() > parser.cf_opts()->size()) { return Status::InvalidArgument( "[RocksDBOptionsParser Error]", "The persisted options file has less number of column family " "names than that of the specified one."); } } for (size_t i = 0; i < cf_names.size(); ++i) { if (cf_names[i] != parser.cf_names()->at(i)) { return Status::InvalidArgument( "[RocksDBOptionParser Error] The persisted options and the db" "instance does not have the same name for column family ", ToString(i)); } } // Verify Column Family Options if (cf_opts.size() != parser.cf_opts()->size()) { if (config_options.sanity_level >= ConfigOptions::kSanityLevelLooselyCompatible) { return Status::InvalidArgument( "[RocksDBOptionsParser Error]", "The persisted options does not have the same number of " "column families as the db instance."); } else if (cf_opts.size() > parser.cf_opts()->size()) { return Status::InvalidArgument( "[RocksDBOptionsParser Error]", "The persisted options file has less number of column families " "than that of the specified number."); } } for (size_t i = 0; i < cf_opts.size(); ++i) { s = VerifyCFOptions(config_options, cf_opts[i], parser.cf_opts()->at(i), &(parser.cf_opt_maps()->at(i))); if (!s.ok()) { return s; } s = VerifyTableFactory(config_options, cf_opts[i].table_factory.get(), parser.cf_opts()->at(i).table_factory.get()); if (!s.ok()) { return s; } } return Status::OK(); } Status RocksDBOptionsParser::VerifyDBOptions( const ConfigOptions& config_options, const DBOptions& base_opt, const DBOptions& file_opt, const std::unordered_map* /*opt_map*/) { for (const auto& pair : db_options_type_info) { const auto& opt_info = pair.second; if (config_options.IsCheckEnabled(opt_info.GetSanityLevel())) { const char* base_addr = reinterpret_cast(&base_opt) + opt_info.offset_; const char* file_addr = reinterpret_cast(&file_opt) + opt_info.offset_; std::string mismatch; if (!opt_info.AreEqual(config_options, pair.first, base_addr, file_addr, &mismatch) && !opt_info.AreEqualByName(config_options, pair.first, base_addr, file_addr)) { const size_t kBufferSize = 2048; char buffer[kBufferSize]; std::string base_value; std::string file_value; int offset = snprintf(buffer, sizeof(buffer), "[RocksDBOptionsParser]: " "failed the verification on ColumnFamilyOptions::%s", pair.first.c_str()); Status s = opt_info.Serialize(config_options, pair.first, base_addr, &base_value); if (s.ok()) { s = opt_info.Serialize(config_options, pair.first, file_addr, &file_value); } snprintf(buffer, sizeof(buffer), "[RocksDBOptionsParser]: " "failed the verification on DBOptions::%s --- " "The specified one is %s while the persisted one is %s.\n", pair.first.c_str(), base_value.c_str(), file_value.c_str()); assert(offset >= 0); assert(static_cast(offset) < sizeof(buffer)); if (s.ok()) { snprintf( buffer + offset, sizeof(buffer) - static_cast(offset), "--- The specified one is %s while the persisted one is %s.\n", base_value.c_str(), file_value.c_str()); } else { snprintf(buffer + offset, sizeof(buffer) - static_cast(offset), "--- Unable to re-serialize an option: %s.\n", s.ToString().c_str()); } return Status::InvalidArgument(Slice(buffer, strlen(buffer))); } } } return Status::OK(); } Status RocksDBOptionsParser::VerifyCFOptions( const ConfigOptions& config_options, const ColumnFamilyOptions& base_opt, const ColumnFamilyOptions& file_opt, const std::unordered_map* opt_map) { for (const auto& pair : cf_options_type_info) { const auto& opt_info = pair.second; if (config_options.IsCheckEnabled(opt_info.GetSanityLevel())) { std::string mismatch; const char* base_addr = reinterpret_cast(&base_opt) + opt_info.offset_; const char* file_addr = reinterpret_cast(&file_opt) + opt_info.offset_; bool matches = opt_info.AreEqual(config_options, pair.first, base_addr, file_addr, &mismatch); if (!matches && opt_info.IsByName()) { if (opt_map == nullptr) { matches = true; } else { auto iter = opt_map->find(pair.first); if (iter == opt_map->end()) { matches = true; } else { matches = opt_info.AreEqualByName(config_options, pair.first, base_addr, iter->second); } } } if (!matches) { // The options do not match const size_t kBufferSize = 2048; char buffer[kBufferSize]; std::string base_value; std::string file_value; Status s = opt_info.Serialize(config_options, pair.first, base_addr, &base_value); if (s.ok()) { s = opt_info.Serialize(config_options, pair.first, file_addr, &file_value); } int offset = snprintf(buffer, sizeof(buffer), "[RocksDBOptionsParser]: " "failed the verification on ColumnFamilyOptions::%s", pair.first.c_str()); assert(offset >= 0); assert(static_cast(offset) < sizeof(buffer)); if (s.ok()) { snprintf( buffer + offset, sizeof(buffer) - static_cast(offset), "--- The specified one is %s while the persisted one is %s.\n", base_value.c_str(), file_value.c_str()); } else { snprintf(buffer + offset, sizeof(buffer) - static_cast(offset), "--- Unable to re-serialize an option: %s.\n", s.ToString().c_str()); } return Status::InvalidArgument(Slice(buffer, sizeof(buffer))); } // if (! matches) } // CheckSanityLevel } // For each option return Status::OK(); } Status RocksDBOptionsParser::VerifyTableFactory( const ConfigOptions& config_options, const TableFactory* base_tf, const TableFactory* file_tf) { if (base_tf && file_tf) { if (config_options.sanity_level > ConfigOptions::kSanityLevelNone && std::string(base_tf->Name()) != std::string(file_tf->Name())) { return Status::Corruption( "[RocksDBOptionsParser]: " "failed the verification on TableFactory->Name()"); } if (base_tf->Name() == BlockBasedTableFactory::kName) { return VerifyBlockBasedTableFactory( config_options, static_cast_with_check(base_tf), static_cast_with_check(file_tf)); } // TODO(yhchiang): add checks for other table factory types } else { // TODO(yhchiang): further support sanity check here } return Status::OK(); } } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/options/options_parser.h000066400000000000000000000125331370372246700201640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/env.h" #include "rocksdb/options.h" namespace ROCKSDB_NAMESPACE { #ifndef ROCKSDB_LITE struct ConfigOptions; class OptionTypeInfo; class TableFactory; #define ROCKSDB_OPTION_FILE_MAJOR 1 #define ROCKSDB_OPTION_FILE_MINOR 1 enum OptionSection : char { kOptionSectionVersion = 0, kOptionSectionDBOptions, kOptionSectionCFOptions, kOptionSectionTableOptions, kOptionSectionUnknown }; static const std::string opt_section_titles[] = { "Version", "DBOptions", "CFOptions", "TableOptions/", "Unknown"}; Status PersistRocksDBOptions(const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs); Status PersistRocksDBOptions(const ConfigOptions& config_options, const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs); class RocksDBOptionsParser { public: explicit RocksDBOptionsParser(); ~RocksDBOptionsParser() {} void Reset(); // `file_readahead_size` is used for readahead for the option file. // If 0 is given, a default value will be used. Status Parse(const std::string& file_name, FileSystem* fs, bool ignore_unknown_options, size_t file_readahead_size); Status Parse(const ConfigOptions& config_options, const std::string& file_name, FileSystem* fs); static std::string TrimAndRemoveComment(const std::string& line, const bool trim_only = false); const DBOptions* db_opt() const { return &db_opt_; } const std::unordered_map* db_opt_map() const { return &db_opt_map_; } const std::vector* cf_opts() const { return &cf_opts_; } const std::vector* cf_names() const { return &cf_names_; } const std::vector>* cf_opt_maps() const { return &cf_opt_maps_; } const ColumnFamilyOptions* GetCFOptions(const std::string& name) { return GetCFOptionsImpl(name); } size_t NumColumnFamilies() { return cf_opts_.size(); } static Status VerifyRocksDBOptionsFromFile( const ConfigOptions& config_options, const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs); static Status VerifyDBOptions( const ConfigOptions& config_options, const DBOptions& base_opt, const DBOptions& new_opt, const std::unordered_map* new_opt_map = nullptr); static Status VerifyCFOptions( const ConfigOptions& config_options, const ColumnFamilyOptions& base_opt, const ColumnFamilyOptions& new_opt, const std::unordered_map* new_opt_map = nullptr); static Status VerifyTableFactory(const ConfigOptions& config_options, const TableFactory* base_tf, const TableFactory* file_tf); static Status ExtraParserCheck(const RocksDBOptionsParser& input_parser); protected: bool IsSection(const std::string& line); Status ParseSection(OptionSection* section, std::string* title, std::string* argument, const std::string& line, const int line_num); Status CheckSection(const OptionSection section, const std::string& section_arg, const int line_num); Status ParseStatement(std::string* name, std::string* value, const std::string& line, const int line_num); Status EndSection( const ConfigOptions& config_options, const OptionSection section, const std::string& title, const std::string& section_arg, const std::unordered_map& opt_map); Status ValidityCheck(); Status InvalidArgument(const int line_num, const std::string& message); Status ParseVersionNumber(const std::string& ver_name, const std::string& ver_string, const int max_count, int* version); ColumnFamilyOptions* GetCFOptionsImpl(const std::string& name) { assert(cf_names_.size() == cf_opts_.size()); for (size_t i = 0; i < cf_names_.size(); ++i) { if (cf_names_[i] == name) { return &cf_opts_[i]; } } return nullptr; } private: DBOptions db_opt_; std::unordered_map db_opt_map_; std::vector cf_names_; std::vector cf_opts_; std::vector> cf_opt_maps_; bool has_version_section_; bool has_db_options_; bool has_default_cf_options_; int db_version[3]; int opt_file_version[3]; }; #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/options/options_settable_test.cc000066400000000000000000000610711370372246700216710ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include "options/options_helper.h" #include "rocksdb/convenience.h" #include "test_util/testharness.h" #ifndef GFLAGS bool FLAGS_enable_print = false; #else #include "util/gflags_compat.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; DEFINE_bool(enable_print, false, "Print options generated to console."); #endif // GFLAGS namespace ROCKSDB_NAMESPACE { // Verify options are settable from options strings. // We take the approach that depends on compiler behavior that copy constructor // won't touch implicit padding bytes, so that the test is fragile. // As a result, we only run the tests to verify new fields in options are // settable through string on limited platforms as it depends on behavior of // compilers. #ifndef ROCKSDB_LITE #if defined OS_LINUX || defined OS_WIN #ifndef __clang__ class OptionsSettableTest : public testing::Test { public: OptionsSettableTest() {} }; const char kSpecialChar = 'z'; typedef std::vector> OffsetGap; void FillWithSpecialChar(char* start_ptr, size_t total_size, const OffsetGap& blacklist, char special_char = kSpecialChar) { size_t offset = 0; for (auto& pair : blacklist) { std::memset(start_ptr + offset, special_char, pair.first - offset); offset = pair.first + pair.second; } std::memset(start_ptr + offset, special_char, total_size - offset); } int NumUnsetBytes(char* start_ptr, size_t total_size, const OffsetGap& blacklist) { int total_unset_bytes_base = 0; size_t offset = 0; for (auto& pair : blacklist) { for (char* ptr = start_ptr + offset; ptr < start_ptr + pair.first; ptr++) { if (*ptr == kSpecialChar) { total_unset_bytes_base++; } } offset = pair.first + pair.second; } for (char* ptr = start_ptr + offset; ptr < start_ptr + total_size; ptr++) { if (*ptr == kSpecialChar) { total_unset_bytes_base++; } } return total_unset_bytes_base; } // Return true iff two structs are the same except blacklist fields. bool CompareBytes(char* start_ptr1, char* start_ptr2, size_t total_size, const OffsetGap& blacklist) { size_t offset = 0; for (auto& pair : blacklist) { for (; offset < pair.first; offset++) { if (*(start_ptr1 + offset) != *(start_ptr2 + offset)) { return false; } } offset = pair.first + pair.second; } for (; offset < total_size; offset++) { if (*(start_ptr1 + offset) != *(start_ptr2 + offset)) { return false; } } return true; } // If the test fails, likely a new option is added to BlockBasedTableOptions // but it cannot be set through GetBlockBasedTableOptionsFromString(), or the // test is not updated accordingly. // After adding an option, we need to make sure it is settable by // GetBlockBasedTableOptionsFromString() and add the option to the input string // passed to the GetBlockBasedTableOptionsFromString() in this test. // If it is a complicated type, you also need to add the field to // kBbtoBlacklist, and maybe add customized verification for it. TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) { // Items in the form of . Need to be in ascending order // and not overlapping. Need to updated if new pointer-option is added. const OffsetGap kBbtoBlacklist = { {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory), sizeof(std::shared_ptr)}, {offsetof(struct BlockBasedTableOptions, block_cache), sizeof(std::shared_ptr)}, {offsetof(struct BlockBasedTableOptions, persistent_cache), sizeof(std::shared_ptr)}, {offsetof(struct BlockBasedTableOptions, block_cache_compressed), sizeof(std::shared_ptr)}, {offsetof(struct BlockBasedTableOptions, filter_policy), sizeof(std::shared_ptr)}, }; // In this test, we catch a new option of BlockBasedTableOptions that is not // settable through GetBlockBasedTableOptionsFromString(). // We count padding bytes of the option struct, and assert it to be the same // as unset bytes of an option struct initialized by // GetBlockBasedTableOptionsFromString(). char* bbto_ptr = new char[sizeof(BlockBasedTableOptions)]; // Count padding bytes by setting all bytes in the memory to a special char, // copy a well constructed struct to this memory and see how many special // bytes left. BlockBasedTableOptions* bbto = new (bbto_ptr) BlockBasedTableOptions(); FillWithSpecialChar(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist); // It based on the behavior of compiler that padding bytes are not changed // when copying the struct. It's prone to failure when compiler behavior // changes. We verify there is unset bytes to detect the case. *bbto = BlockBasedTableOptions(); int unset_bytes_base = NumUnsetBytes(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist); ASSERT_GT(unset_bytes_base, 0); bbto->~BlockBasedTableOptions(); // Construct the base option passed into // GetBlockBasedTableOptionsFromString(). bbto = new (bbto_ptr) BlockBasedTableOptions(); FillWithSpecialChar(bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist); // This option is not setable: bbto->use_delta_encoding = true; char* new_bbto_ptr = new char[sizeof(BlockBasedTableOptions)]; BlockBasedTableOptions* new_bbto = new (new_bbto_ptr) BlockBasedTableOptions(); FillWithSpecialChar(new_bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist); // Need to update the option string if a new option is added. ASSERT_OK(GetBlockBasedTableOptionsFromString( *bbto, "cache_index_and_filter_blocks=1;" "cache_index_and_filter_blocks_with_high_priority=true;" "pin_l0_filter_and_index_blocks_in_cache=1;" "pin_top_level_index_and_filter=1;" "index_type=kHashSearch;" "data_block_index_type=kDataBlockBinaryAndHash;" "index_shortening=kNoShortening;" "data_block_hash_table_util_ratio=0.75;" "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;" "block_cache=1M;block_cache_compressed=1k;block_size=1024;" "block_size_deviation=8;block_restart_interval=4; " "metadata_block_size=1024;" "partition_filters=false;" "index_block_restart_interval=4;" "filter_policy=bloomfilter:4:true;whole_key_filtering=1;" "format_version=1;" "hash_index_allow_collision=false;" "verify_compression=true;read_amp_bytes_per_bit=0;" "enable_index_compression=false;" "block_align=true", new_bbto)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_bbto_ptr, sizeof(BlockBasedTableOptions), kBbtoBlacklist)); ASSERT_TRUE(new_bbto->block_cache.get() != nullptr); ASSERT_TRUE(new_bbto->block_cache_compressed.get() != nullptr); ASSERT_TRUE(new_bbto->filter_policy.get() != nullptr); bbto->~BlockBasedTableOptions(); new_bbto->~BlockBasedTableOptions(); delete[] bbto_ptr; delete[] new_bbto_ptr; } // If the test fails, likely a new option is added to DBOptions // but it cannot be set through GetDBOptionsFromString(), or the test is not // updated accordingly. // After adding an option, we need to make sure it is settable by // GetDBOptionsFromString() and add the option to the input string passed to // DBOptionsFromString()in this test. // If it is a complicated type, you also need to add the field to // kDBOptionsBlacklist, and maybe add customized verification for it. TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { const OffsetGap kDBOptionsBlacklist = { {offsetof(struct DBOptions, env), sizeof(Env*)}, {offsetof(struct DBOptions, rate_limiter), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, sst_file_manager), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, info_log), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, statistics), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, db_paths), sizeof(std::vector)}, {offsetof(struct DBOptions, db_log_dir), sizeof(std::string)}, {offsetof(struct DBOptions, wal_dir), sizeof(std::string)}, {offsetof(struct DBOptions, write_buffer_manager), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, listeners), sizeof(std::vector>)}, {offsetof(struct DBOptions, row_cache), sizeof(std::shared_ptr)}, {offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)}, {offsetof(struct DBOptions, file_checksum_gen_factory), sizeof(std::shared_ptr)}, }; char* options_ptr = new char[sizeof(DBOptions)]; // Count padding bytes by setting all bytes in the memory to a special char, // copy a well constructed struct to this memory and see how many special // bytes left. DBOptions* options = new (options_ptr) DBOptions(); FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); // It based on the behavior of compiler that padding bytes are not changed // when copying the struct. It's prone to failure when compiler behavior // changes. We verify there is unset bytes to detect the case. *options = DBOptions(); int unset_bytes_base = NumUnsetBytes(options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); ASSERT_GT(unset_bytes_base, 0); options->~DBOptions(); options = new (options_ptr) DBOptions(); FillWithSpecialChar(options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); char* new_options_ptr = new char[sizeof(DBOptions)]; DBOptions* new_options = new (new_options_ptr) DBOptions(); FillWithSpecialChar(new_options_ptr, sizeof(DBOptions), kDBOptionsBlacklist); // Need to update the option string if a new option is added. ASSERT_OK( GetDBOptionsFromString(*options, "wal_bytes_per_sync=4295048118;" "delete_obsolete_files_period_micros=4294967758;" "WAL_ttl_seconds=4295008036;" "WAL_size_limit_MB=4295036161;" "max_write_batch_group_size_bytes=1048576;" "wal_dir=path/to/wal_dir;" "db_write_buffer_size=2587;" "max_subcompactions=64330;" "table_cache_numshardbits=28;" "max_open_files=72;" "max_file_opening_threads=35;" "max_background_jobs=8;" "base_background_compactions=3;" "max_background_compactions=33;" "use_fsync=true;" "use_adaptive_mutex=false;" "max_total_wal_size=4295005604;" "compaction_readahead_size=0;" "new_table_reader_for_compaction_inputs=false;" "keep_log_file_num=4890;" "skip_stats_update_on_db_open=false;" "skip_checking_sst_file_sizes_on_db_open=false;" "max_manifest_file_size=4295009941;" "db_log_dir=path/to/db_log_dir;" "skip_log_error_on_recovery=true;" "writable_file_max_buffer_size=1048576;" "paranoid_checks=true;" "is_fd_close_on_exec=false;" "bytes_per_sync=4295013613;" "strict_bytes_per_sync=true;" "enable_thread_tracking=false;" "recycle_log_file_num=0;" "create_missing_column_families=true;" "log_file_time_to_roll=3097;" "max_background_flushes=35;" "create_if_missing=false;" "error_if_exists=true;" "delayed_write_rate=4294976214;" "manifest_preallocation_size=1222;" "allow_mmap_writes=false;" "stats_dump_period_sec=70127;" "stats_persist_period_sec=54321;" "persist_stats_to_disk=true;" "stats_history_buffer_size=14159;" "allow_fallocate=true;" "allow_mmap_reads=false;" "use_direct_reads=false;" "use_direct_io_for_flush_and_compaction=false;" "max_log_file_size=4607;" "random_access_max_buffer_size=1048576;" "advise_random_on_open=true;" "fail_if_options_file_error=false;" "enable_pipelined_write=false;" "unordered_write=false;" "allow_concurrent_memtable_write=true;" "wal_recovery_mode=kPointInTimeRecovery;" "enable_write_thread_adaptive_yield=true;" "write_thread_slow_yield_usec=5;" "write_thread_max_yield_usec=1000;" "access_hint_on_compaction_start=NONE;" "info_log_level=DEBUG_LEVEL;" "dump_malloc_stats=false;" "allow_2pc=false;" "avoid_flush_during_recovery=false;" "avoid_flush_during_shutdown=false;" "allow_ingest_behind=false;" "preserve_deletes=false;" "concurrent_prepare=false;" "two_write_queues=false;" "manual_wal_flush=false;" "seq_per_batch=false;" "atomic_flush=false;" "avoid_unnecessary_blocking_io=false;" "log_readahead_size=0;" "write_dbid_to_manifest=false;" "best_efforts_recovery=false", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), kDBOptionsBlacklist)); options->~DBOptions(); new_options->~DBOptions(); delete[] options_ptr; delete[] new_options_ptr; } template inline int offset_of(T1 T2::*member) { static T2 obj; return int(size_t(&(obj.*member)) - size_t(&obj)); } // If the test fails, likely a new option is added to ColumnFamilyOptions // but it cannot be set through GetColumnFamilyOptionsFromString(), or the // test is not updated accordingly. // After adding an option, we need to make sure it is settable by // GetColumnFamilyOptionsFromString() and add the option to the input // string passed to GetColumnFamilyOptionsFromString()in this test. // If it is a complicated type, you also need to add the field to // kColumnFamilyOptionsBlacklist, and maybe add customized verification // for it. TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { // options in the blacklist need to appear in the same order as in // ColumnFamilyOptions. const OffsetGap kColumnFamilyOptionsBlacklist = { {offset_of(&ColumnFamilyOptions::inplace_callback), sizeof(UpdateStatus(*)(char*, uint32_t*, Slice, std::string*))}, {offset_of( &ColumnFamilyOptions::memtable_insert_with_hint_prefix_extractor), sizeof(std::shared_ptr)}, {offset_of(&ColumnFamilyOptions::compression_per_level), sizeof(std::vector)}, {offset_of( &ColumnFamilyOptions::max_bytes_for_level_multiplier_additional), sizeof(std::vector)}, {offset_of(&ColumnFamilyOptions::memtable_factory), sizeof(std::shared_ptr)}, {offset_of(&ColumnFamilyOptions::table_properties_collector_factories), sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)}, {offset_of(&ColumnFamilyOptions::comparator), sizeof(Comparator*)}, {offset_of(&ColumnFamilyOptions::merge_operator), sizeof(std::shared_ptr)}, {offset_of(&ColumnFamilyOptions::compaction_filter), sizeof(const CompactionFilter*)}, {offset_of(&ColumnFamilyOptions::compaction_filter_factory), sizeof(std::shared_ptr)}, {offset_of(&ColumnFamilyOptions::prefix_extractor), sizeof(std::shared_ptr)}, {offset_of(&ColumnFamilyOptions::snap_refresh_nanos), sizeof(uint64_t)}, {offset_of(&ColumnFamilyOptions::table_factory), sizeof(std::shared_ptr)}, {offset_of(&ColumnFamilyOptions::cf_paths), sizeof(std::vector)}, {offset_of(&ColumnFamilyOptions::compaction_thread_limiter), sizeof(std::shared_ptr)}, }; char* options_ptr = new char[sizeof(ColumnFamilyOptions)]; // Count padding bytes by setting all bytes in the memory to a special char, // copy a well constructed struct to this memory and see how many special // bytes left. ColumnFamilyOptions* options = new (options_ptr) ColumnFamilyOptions(); FillWithSpecialChar(options_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsBlacklist); // It based on the behavior of compiler that padding bytes are not changed // when copying the struct. It's prone to failure when compiler behavior // changes. We verify there is unset bytes to detect the case. *options = ColumnFamilyOptions(); // Deprecatd option which is not initialized. Need to set it to avoid // Valgrind error options->max_mem_compaction_level = 0; int unset_bytes_base = NumUnsetBytes(options_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsBlacklist); ASSERT_GT(unset_bytes_base, 0); options->~ColumnFamilyOptions(); options = new (options_ptr) ColumnFamilyOptions(); FillWithSpecialChar(options_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsBlacklist); // Following options are not settable through // GetColumnFamilyOptionsFromString(): options->rate_limit_delay_max_milliseconds = 33; options->compaction_options_universal = CompactionOptionsUniversal(); options->hard_rate_limit = 0; options->soft_rate_limit = 0; options->purge_redundant_kvs_while_flush = false; options->max_mem_compaction_level = 0; options->compaction_filter = nullptr; char* new_options_ptr = new char[sizeof(ColumnFamilyOptions)]; ColumnFamilyOptions* new_options = new (new_options_ptr) ColumnFamilyOptions(); FillWithSpecialChar(new_options_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsBlacklist); // Need to update the option string if a new option is added. ASSERT_OK(GetColumnFamilyOptionsFromString( *options, "compaction_filter_factory=mpudlojcujCompactionFilterFactory;" "table_factory=PlainTable;" "prefix_extractor=rocksdb.CappedPrefix.13;" "comparator=leveldb.BytewiseComparator;" "compression_per_level=kBZip2Compression:kBZip2Compression:" "kBZip2Compression:kNoCompression:kZlibCompression:kBZip2Compression:" "kSnappyCompression;" "max_bytes_for_level_base=986;" "bloom_locality=8016;" "target_file_size_base=4294976376;" "memtable_huge_page_size=2557;" "max_successive_merges=5497;" "max_sequential_skip_in_iterations=4294971408;" "arena_block_size=1893;" "target_file_size_multiplier=35;" "min_write_buffer_number_to_merge=9;" "max_write_buffer_number=84;" "write_buffer_size=1653;" "max_compaction_bytes=64;" "max_bytes_for_level_multiplier=60;" "memtable_factory=SkipListFactory;" "compression=kNoCompression;" "compression_opts=5:6:7:8:9:true;" "bottommost_compression_opts=4:5:6:7:8:true;" "bottommost_compression=kDisableCompressionOption;" "level0_stop_writes_trigger=33;" "num_levels=99;" "level0_slowdown_writes_trigger=22;" "level0_file_num_compaction_trigger=14;" "compaction_filter=urxcqstuwnCompactionFilter;" "soft_rate_limit=530.615385;" "soft_pending_compaction_bytes_limit=0;" "max_write_buffer_number_to_maintain=84;" "max_write_buffer_size_to_maintain=2147483648;" "merge_operator=aabcxehazrMergeOperator;" "memtable_prefix_bloom_size_ratio=0.4642;" "memtable_whole_key_filtering=true;" "memtable_insert_with_hint_prefix_extractor=rocksdb.CappedPrefix.13;" "paranoid_file_checks=true;" "force_consistency_checks=true;" "inplace_update_num_locks=7429;" "optimize_filters_for_hits=false;" "level_compaction_dynamic_level_bytes=false;" "inplace_update_support=false;" "compaction_style=kCompactionStyleFIFO;" "compaction_pri=kMinOverlappingRatio;" "hard_pending_compaction_bytes_limit=0;" "disable_auto_compactions=false;" "report_bg_io_stats=true;" "ttl=60;" "periodic_compaction_seconds=3600;" "sample_for_compression=0;" "compaction_options_fifo={max_table_files_size=3;allow_" "compaction=false;};", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsBlacklist)); ColumnFamilyOptions rnd_filled_options = *new_options; options->~ColumnFamilyOptions(); new_options->~ColumnFamilyOptions(); delete[] options_ptr; delete[] new_options_ptr; // Test copying to mutabable and immutable options and copy back the mutable // part. const OffsetGap kMutableCFOptionsBlacklist = { {offset_of(&MutableCFOptions::prefix_extractor), sizeof(std::shared_ptr)}, {offset_of(&MutableCFOptions::max_bytes_for_level_multiplier_additional), sizeof(std::vector)}, {offset_of(&MutableCFOptions::max_file_size), sizeof(std::vector)}, }; // For all memory used for options, pre-fill every char. Otherwise, the // padding bytes might be different so that byte-wise comparison doesn't // general equal results even if objects are equal. const char kMySpecialChar = 'x'; char* mcfo1_ptr = new char[sizeof(MutableCFOptions)]; FillWithSpecialChar(mcfo1_ptr, sizeof(MutableCFOptions), kMutableCFOptionsBlacklist, kMySpecialChar); char* mcfo2_ptr = new char[sizeof(MutableCFOptions)]; FillWithSpecialChar(mcfo2_ptr, sizeof(MutableCFOptions), kMutableCFOptionsBlacklist, kMySpecialChar); // A clean column family options is constructed after filling the same special // char as the initial one. So that the padding bytes are the same. char* cfo_clean_ptr = new char[sizeof(ColumnFamilyOptions)]; FillWithSpecialChar(cfo_clean_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsBlacklist); rnd_filled_options.num_levels = 66; ColumnFamilyOptions* cfo_clean = new (cfo_clean_ptr) ColumnFamilyOptions(); MutableCFOptions* mcfo1 = new (mcfo1_ptr) MutableCFOptions(rnd_filled_options); ColumnFamilyOptions cfo_back = BuildColumnFamilyOptions(*cfo_clean, *mcfo1); MutableCFOptions* mcfo2 = new (mcfo2_ptr) MutableCFOptions(cfo_back); ASSERT_TRUE(CompareBytes(mcfo1_ptr, mcfo2_ptr, sizeof(MutableCFOptions), kMutableCFOptionsBlacklist)); cfo_clean->~ColumnFamilyOptions(); mcfo1->~MutableCFOptions(); mcfo2->~MutableCFOptions(); delete[] mcfo1_ptr; delete[] mcfo2_ptr; delete[] cfo_clean_ptr; } #endif // !__clang__ #endif // OS_LINUX || OS_WIN #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); #ifdef GFLAGS ParseCommandLineFlags(&argc, &argv, true); #endif // GFLAGS return RUN_ALL_TESTS(); } rocksdb-6.11.4/options/options_test.cc000066400000000000000000004364541370372246700200210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include #include "cache/lru_cache.h" #include "cache/sharded_cache.h" #include "options/options_helper.h" #include "options/options_parser.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/memtablerep.h" #include "rocksdb/utilities/leveldb_options.h" #include "rocksdb/utilities/object_registry.h" #include "table/block_based/filter_policy_internal.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/random.h" #include "util/stderr_logger.h" #include "util/string_util.h" #include "utilities/merge_operators/bytesxor.h" #ifndef GFLAGS bool FLAGS_enable_print = false; #else #include "util/gflags_compat.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; DEFINE_bool(enable_print, false, "Print options generated to console."); #endif // GFLAGS namespace ROCKSDB_NAMESPACE { class OptionsTest : public testing::Test {}; #ifndef ROCKSDB_LITE // GetOptionsFromMap is not supported in ROCKSDB_LITE TEST_F(OptionsTest, GetOptionsFromMapTest) { std::unordered_map cf_options_map = { {"write_buffer_size", "1"}, {"max_write_buffer_number", "2"}, {"min_write_buffer_number_to_merge", "3"}, {"max_write_buffer_number_to_maintain", "99"}, {"max_write_buffer_size_to_maintain", "-99999"}, {"compression", "kSnappyCompression"}, {"compression_per_level", "kNoCompression:" "kSnappyCompression:" "kZlibCompression:" "kBZip2Compression:" "kLZ4Compression:" "kLZ4HCCompression:" "kXpressCompression:" "kZSTD:" "kZSTDNotFinalCompression"}, {"bottommost_compression", "kLZ4Compression"}, {"bottommost_compression_opts", "5:6:7:8:10:true"}, {"compression_opts", "4:5:6:7:8:true"}, {"num_levels", "8"}, {"level0_file_num_compaction_trigger", "8"}, {"level0_slowdown_writes_trigger", "9"}, {"level0_stop_writes_trigger", "10"}, {"target_file_size_base", "12"}, {"target_file_size_multiplier", "13"}, {"max_bytes_for_level_base", "14"}, {"level_compaction_dynamic_level_bytes", "true"}, {"max_bytes_for_level_multiplier", "15.0"}, {"max_bytes_for_level_multiplier_additional", "16:17:18"}, {"max_compaction_bytes", "21"}, {"soft_rate_limit", "1.1"}, {"hard_rate_limit", "2.1"}, {"hard_pending_compaction_bytes_limit", "211"}, {"arena_block_size", "22"}, {"disable_auto_compactions", "true"}, {"compaction_style", "kCompactionStyleLevel"}, {"compaction_pri", "kOldestSmallestSeqFirst"}, {"verify_checksums_in_compaction", "false"}, {"compaction_options_fifo", "23"}, {"max_sequential_skip_in_iterations", "24"}, {"inplace_update_support", "true"}, {"report_bg_io_stats", "true"}, {"compaction_measure_io_stats", "false"}, {"inplace_update_num_locks", "25"}, {"memtable_prefix_bloom_size_ratio", "0.26"}, {"memtable_whole_key_filtering", "true"}, {"memtable_huge_page_size", "28"}, {"bloom_locality", "29"}, {"max_successive_merges", "30"}, {"min_partial_merge_operands", "31"}, {"prefix_extractor", "fixed:31"}, {"optimize_filters_for_hits", "true"}, }; std::unordered_map db_options_map = { {"create_if_missing", "false"}, {"create_missing_column_families", "true"}, {"error_if_exists", "false"}, {"paranoid_checks", "true"}, {"max_open_files", "32"}, {"max_total_wal_size", "33"}, {"use_fsync", "true"}, {"db_log_dir", "/db_log_dir"}, {"wal_dir", "/wal_dir"}, {"delete_obsolete_files_period_micros", "34"}, {"max_background_compactions", "35"}, {"max_background_flushes", "36"}, {"max_log_file_size", "37"}, {"log_file_time_to_roll", "38"}, {"keep_log_file_num", "39"}, {"recycle_log_file_num", "5"}, {"max_manifest_file_size", "40"}, {"table_cache_numshardbits", "41"}, {"WAL_ttl_seconds", "43"}, {"WAL_size_limit_MB", "44"}, {"manifest_preallocation_size", "45"}, {"allow_mmap_reads", "true"}, {"allow_mmap_writes", "false"}, {"use_direct_reads", "false"}, {"use_direct_io_for_flush_and_compaction", "false"}, {"is_fd_close_on_exec", "true"}, {"skip_log_error_on_recovery", "false"}, {"stats_dump_period_sec", "46"}, {"stats_persist_period_sec", "57"}, {"persist_stats_to_disk", "false"}, {"stats_history_buffer_size", "69"}, {"advise_random_on_open", "true"}, {"use_adaptive_mutex", "false"}, {"new_table_reader_for_compaction_inputs", "true"}, {"compaction_readahead_size", "100"}, {"random_access_max_buffer_size", "3145728"}, {"writable_file_max_buffer_size", "314159"}, {"bytes_per_sync", "47"}, {"wal_bytes_per_sync", "48"}, {"strict_bytes_per_sync", "true"}, }; ColumnFamilyOptions base_cf_opt; ColumnFamilyOptions new_cf_opt; ConfigOptions exact, loose; exact.input_strings_escaped = false; exact.ignore_unknown_options = false; exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; loose.sanity_level = ConfigOptions::kSanityLevelLooselyCompatible; loose.input_strings_escaped = false; loose.ignore_unknown_options = true; ASSERT_OK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 1U); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 2); ASSERT_EQ(new_cf_opt.min_write_buffer_number_to_merge, 3); ASSERT_EQ(new_cf_opt.max_write_buffer_number_to_maintain, 99); ASSERT_EQ(new_cf_opt.max_write_buffer_size_to_maintain, -99999); ASSERT_EQ(new_cf_opt.compression, kSnappyCompression); ASSERT_EQ(new_cf_opt.compression_per_level.size(), 9U); ASSERT_EQ(new_cf_opt.compression_per_level[0], kNoCompression); ASSERT_EQ(new_cf_opt.compression_per_level[1], kSnappyCompression); ASSERT_EQ(new_cf_opt.compression_per_level[2], kZlibCompression); ASSERT_EQ(new_cf_opt.compression_per_level[3], kBZip2Compression); ASSERT_EQ(new_cf_opt.compression_per_level[4], kLZ4Compression); ASSERT_EQ(new_cf_opt.compression_per_level[5], kLZ4HCCompression); ASSERT_EQ(new_cf_opt.compression_per_level[6], kXpressCompression); ASSERT_EQ(new_cf_opt.compression_per_level[7], kZSTD); ASSERT_EQ(new_cf_opt.compression_per_level[8], kZSTDNotFinalCompression); ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4); ASSERT_EQ(new_cf_opt.compression_opts.level, 5); ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u); ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, CompressionOptions().parallel_threads); ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_bytes, 8u); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 10u); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, CompressionOptions().parallel_threads); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true); ASSERT_EQ(new_cf_opt.num_levels, 8); ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8); ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9); ASSERT_EQ(new_cf_opt.level0_stop_writes_trigger, 10); ASSERT_EQ(new_cf_opt.target_file_size_base, static_cast(12)); ASSERT_EQ(new_cf_opt.target_file_size_multiplier, 13); ASSERT_EQ(new_cf_opt.max_bytes_for_level_base, 14U); ASSERT_EQ(new_cf_opt.level_compaction_dynamic_level_bytes, true); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier, 15.0); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional.size(), 3U); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[0], 16); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[1], 17); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[2], 18); ASSERT_EQ(new_cf_opt.max_compaction_bytes, 21); ASSERT_EQ(new_cf_opt.hard_pending_compaction_bytes_limit, 211); ASSERT_EQ(new_cf_opt.arena_block_size, 22U); ASSERT_EQ(new_cf_opt.disable_auto_compactions, true); ASSERT_EQ(new_cf_opt.compaction_style, kCompactionStyleLevel); ASSERT_EQ(new_cf_opt.compaction_pri, kOldestSmallestSeqFirst); ASSERT_EQ(new_cf_opt.compaction_options_fifo.max_table_files_size, static_cast(23)); ASSERT_EQ(new_cf_opt.max_sequential_skip_in_iterations, static_cast(24)); ASSERT_EQ(new_cf_opt.inplace_update_support, true); ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U); ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26); ASSERT_EQ(new_cf_opt.memtable_whole_key_filtering, true); ASSERT_EQ(new_cf_opt.memtable_huge_page_size, 28U); ASSERT_EQ(new_cf_opt.bloom_locality, 29U); ASSERT_EQ(new_cf_opt.max_successive_merges, 30U); ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr); ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true); ASSERT_EQ(std::string(new_cf_opt.prefix_extractor->Name()), "rocksdb.FixedPrefix.31"); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, &new_cf_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); cf_options_map["write_buffer_size"] = "1"; ASSERT_OK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, &new_cf_opt)); cf_options_map["unknown_option"] = "1"; ASSERT_NOK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, &new_cf_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); // ignore_unknown_options=true;input_strings_escaped=false ASSERT_OK(GetColumnFamilyOptionsFromMap(loose, base_cf_opt, cf_options_map, &new_cf_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyCFOptions(loose, base_cf_opt, new_cf_opt)); ASSERT_NOK( RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); DBOptions base_db_opt; DBOptions new_db_opt; ASSERT_OK( GetDBOptionsFromMap(exact, base_db_opt, db_options_map, &new_db_opt)); ASSERT_EQ(new_db_opt.create_if_missing, false); ASSERT_EQ(new_db_opt.create_missing_column_families, true); ASSERT_EQ(new_db_opt.error_if_exists, false); ASSERT_EQ(new_db_opt.paranoid_checks, true); ASSERT_EQ(new_db_opt.max_open_files, 32); ASSERT_EQ(new_db_opt.max_total_wal_size, static_cast(33)); ASSERT_EQ(new_db_opt.use_fsync, true); ASSERT_EQ(new_db_opt.db_log_dir, "/db_log_dir"); ASSERT_EQ(new_db_opt.wal_dir, "/wal_dir"); ASSERT_EQ(new_db_opt.delete_obsolete_files_period_micros, static_cast(34)); ASSERT_EQ(new_db_opt.max_background_compactions, 35); ASSERT_EQ(new_db_opt.max_background_flushes, 36); ASSERT_EQ(new_db_opt.max_log_file_size, 37U); ASSERT_EQ(new_db_opt.log_file_time_to_roll, 38U); ASSERT_EQ(new_db_opt.keep_log_file_num, 39U); ASSERT_EQ(new_db_opt.recycle_log_file_num, 5U); ASSERT_EQ(new_db_opt.max_manifest_file_size, static_cast(40)); ASSERT_EQ(new_db_opt.table_cache_numshardbits, 41); ASSERT_EQ(new_db_opt.WAL_ttl_seconds, static_cast(43)); ASSERT_EQ(new_db_opt.WAL_size_limit_MB, static_cast(44)); ASSERT_EQ(new_db_opt.manifest_preallocation_size, 45U); ASSERT_EQ(new_db_opt.allow_mmap_reads, true); ASSERT_EQ(new_db_opt.allow_mmap_writes, false); ASSERT_EQ(new_db_opt.use_direct_reads, false); ASSERT_EQ(new_db_opt.use_direct_io_for_flush_and_compaction, false); ASSERT_EQ(new_db_opt.is_fd_close_on_exec, true); ASSERT_EQ(new_db_opt.skip_log_error_on_recovery, false); ASSERT_EQ(new_db_opt.stats_dump_period_sec, 46U); ASSERT_EQ(new_db_opt.stats_persist_period_sec, 57U); ASSERT_EQ(new_db_opt.persist_stats_to_disk, false); ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U); ASSERT_EQ(new_db_opt.advise_random_on_open, true); ASSERT_EQ(new_db_opt.use_adaptive_mutex, false); ASSERT_EQ(new_db_opt.new_table_reader_for_compaction_inputs, true); ASSERT_EQ(new_db_opt.compaction_readahead_size, 100); ASSERT_EQ(new_db_opt.random_access_max_buffer_size, 3145728); ASSERT_EQ(new_db_opt.writable_file_max_buffer_size, 314159); ASSERT_EQ(new_db_opt.bytes_per_sync, static_cast(47)); ASSERT_EQ(new_db_opt.wal_bytes_per_sync, static_cast(48)); ASSERT_EQ(new_db_opt.strict_bytes_per_sync, true); db_options_map["max_open_files"] = "hello"; ASSERT_NOK( GetDBOptionsFromMap(exact, base_db_opt, db_options_map, &new_db_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); // unknow options should fail parsing without ignore_unknown_options = true db_options_map["unknown_db_option"] = "1"; ASSERT_NOK( GetDBOptionsFromMap(exact, base_db_opt, db_options_map, &new_db_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); ASSERT_OK( GetDBOptionsFromMap(loose, base_db_opt, db_options_map, &new_db_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); ASSERT_NOK( RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // GetColumnFamilyOptionsFromString is not supported in // ROCKSDB_LITE TEST_F(OptionsTest, GetColumnFamilyOptionsFromStringTest) { ColumnFamilyOptions base_cf_opt; ColumnFamilyOptions new_cf_opt; ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; base_cf_opt.table_factory.reset(); ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "", &new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=5", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 5U); ASSERT_TRUE(new_cf_opt.table_factory == nullptr); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=6;", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 6U); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, " write_buffer_size = 7 ", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 7U); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, " write_buffer_size = 8 ; ", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 8U); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=9;max_write_buffer_number=10", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 9U); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 10); ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=11; max_write_buffer_number = 12 ;", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 11U); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 12); // Wrong name "max_write_buffer_number_" ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=13;max_write_buffer_number_=14;", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); // Comparator from object registry std::string kCompName = "reverse_comp"; ObjectLibrary::Default()->Register( kCompName, [](const std::string& /*name*/, std::unique_ptr* /*guard*/, std::string* /* errmsg */) { return ReverseBytewiseComparator(); }); ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "comparator=" + kCompName + ";", &new_cf_opt)); ASSERT_EQ(new_cf_opt.comparator, ReverseBytewiseComparator()); // MergeOperator from object registry std::unique_ptr bxo(new BytesXOROperator()); std::string kMoName = bxo->Name(); ObjectLibrary::Default()->Register( kMoName, [](const std::string& /*name*/, std::unique_ptr* guard, std::string* /* errmsg */) { guard->reset(new BytesXOROperator()); return guard->get(); }); ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "merge_operator=" + kMoName + ";", &new_cf_opt)); ASSERT_EQ(kMoName, std::string(new_cf_opt.merge_operator->Name())); // Wrong key/value pair ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=13;max_write_buffer_number;", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); // Error Paring value ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=13;max_write_buffer_number=;", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); // Missing option name ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=13; =100;", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); const uint64_t kilo = 1024UL; const uint64_t mega = 1024 * kilo; const uint64_t giga = 1024 * mega; const uint64_t tera = 1024 * giga; // Units (k) ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "max_write_buffer_number=15K", &new_cf_opt)); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 15 * kilo); // Units (m) ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "max_write_buffer_number=16m;inplace_update_num_locks=17M", &new_cf_opt)); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 16 * mega); ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 17u * mega); // Units (g) ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=18g;prefix_extractor=capped:8;" "arena_block_size=19G", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 18 * giga); ASSERT_EQ(new_cf_opt.arena_block_size, 19 * giga); ASSERT_TRUE(new_cf_opt.prefix_extractor.get() != nullptr); std::string prefix_name(new_cf_opt.prefix_extractor->Name()); ASSERT_EQ(prefix_name, "rocksdb.CappedPrefix.8"); // Units (t) ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=20t;arena_block_size=21T", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 20 * tera); ASSERT_EQ(new_cf_opt.arena_block_size, 21 * tera); // Nested block based table options // Empty ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={};arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); // Non-empty ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_cache=1M;block_size=4;};" "arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); // Last one ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_cache=1M;block_size=4;}", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); // Mismatch curly braces ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={{{block_size=4;};" "arena_block_size=1024", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); // Unexpected chars after closing curly brace ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_size=4;}};" "arena_block_size=1024", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_size=4;}xdfa;" "arena_block_size=1024", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_size=4;}xdfa", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); // Invalid block based table option ASSERT_NOK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={xx_block_size=4;}", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "optimize_filters_for_hits=true", &new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "optimize_filters_for_hits=false", &new_cf_opt)); ASSERT_NOK(GetColumnFamilyOptionsFromString(config_options, base_cf_opt, "optimize_filters_for_hits=junk", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opt, new_cf_opt)); // Nested plain table options // Empty ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "plain_table_factory={};arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); // Non-empty ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "plain_table_factory={user_key_len=66;bloom_bits_per_key=20;};" "arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); // memtable factory ASSERT_OK(GetColumnFamilyOptionsFromString( config_options, base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "memtable=skip_list:10;arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory"); } TEST_F(OptionsTest, OldInterfaceTest) { ColumnFamilyOptions base_cf_opt; ColumnFamilyOptions new_cf_opt; ConfigOptions exact; ASSERT_OK(GetColumnFamilyOptionsFromString( base_cf_opt, "write_buffer_size=18;prefix_extractor=capped:8;" "arena_block_size=19", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 18); ASSERT_EQ(new_cf_opt.arena_block_size, 19); ASSERT_TRUE(new_cf_opt.prefix_extractor.get() != nullptr); // And with a bad option ASSERT_NOK(GetColumnFamilyOptionsFromString( base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={xx_block_size=4;}", &new_cf_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); std::unordered_map cf_options_map = { {"write_buffer_size", "1"}, {"max_write_buffer_number", "2"}, {"min_write_buffer_number_to_merge", "3"}, }; ASSERT_OK( GetColumnFamilyOptionsFromMap(base_cf_opt, cf_options_map, &new_cf_opt)); cf_options_map["unknown_option"] = "1"; ASSERT_NOK( GetColumnFamilyOptionsFromMap(base_cf_opt, cf_options_map, &new_cf_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromMap(base_cf_opt, cf_options_map, &new_cf_opt, true, true)); DBOptions base_db_opt; DBOptions new_db_opt; std::unordered_map db_options_map = { {"create_if_missing", "false"}, {"create_missing_column_families", "true"}, {"error_if_exists", "false"}, {"paranoid_checks", "true"}, {"max_open_files", "32"}, }; ASSERT_OK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt)); ASSERT_EQ(new_db_opt.create_if_missing, false); ASSERT_EQ(new_db_opt.create_missing_column_families, true); ASSERT_EQ(new_db_opt.error_if_exists, false); ASSERT_EQ(new_db_opt.paranoid_checks, true); ASSERT_EQ(new_db_opt.max_open_files, 32); db_options_map["unknown_option"] = "1"; ASSERT_NOK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); ASSERT_OK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt, true, true)); ASSERT_OK(GetDBOptionsFromString( base_db_opt, "create_if_missing=false;error_if_exists=false;max_open_files=42;", &new_db_opt)); ASSERT_EQ(new_db_opt.create_if_missing, false); ASSERT_EQ(new_db_opt.error_if_exists, false); ASSERT_EQ(new_db_opt.max_open_files, 42); ASSERT_NOK(GetDBOptionsFromString( base_db_opt, "create_if_missing=false;error_if_exists=false;max_open_files=42;" "unknown_option=1;", &new_db_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // GetBlockBasedTableOptionsFromString is not supported TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { BlockBasedTableOptions table_opt; BlockBasedTableOptions new_opt; ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; // make sure default values are overwritten by something else ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "cache_index_and_filter_blocks=1;index_type=kHashSearch;" "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;" "block_cache=1M;block_cache_compressed=1k;block_size=1024;" "block_size_deviation=8;block_restart_interval=4;" "format_version=5;whole_key_filtering=1;" "filter_policy=bloomfilter:4.567:false;", &new_opt)); ASSERT_TRUE(new_opt.cache_index_and_filter_blocks); ASSERT_EQ(new_opt.index_type, BlockBasedTableOptions::kHashSearch); ASSERT_EQ(new_opt.checksum, ChecksumType::kxxHash); ASSERT_TRUE(new_opt.hash_index_allow_collision); ASSERT_TRUE(new_opt.no_block_cache); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 1024UL); ASSERT_EQ(new_opt.block_size, 1024UL); ASSERT_EQ(new_opt.block_size_deviation, 8); ASSERT_EQ(new_opt.block_restart_interval, 4); ASSERT_EQ(new_opt.format_version, 5U); ASSERT_EQ(new_opt.whole_key_filtering, true); ASSERT_TRUE(new_opt.filter_policy != nullptr); const BloomFilterPolicy& bfp = dynamic_cast(*new_opt.filter_policy); EXPECT_EQ(bfp.GetMillibitsPerKey(), 4567); EXPECT_EQ(bfp.GetWholeBitsPerKey(), 5); // unknown option ASSERT_NOK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "cache_index_and_filter_blocks=1;index_type=kBinarySearch;" "bad_option=1", &new_opt)); ASSERT_EQ(static_cast(table_opt.cache_index_and_filter_blocks), new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.index_type, new_opt.index_type); // unrecognized index type ASSERT_NOK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "cache_index_and_filter_blocks=1;index_type=kBinarySearchXX", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.index_type, new_opt.index_type); // unrecognized checksum type ASSERT_NOK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "cache_index_and_filter_blocks=1;checksum=kxxHashXX", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.index_type, new_opt.index_type); // unrecognized filter policy name ASSERT_NOK( GetBlockBasedTableOptionsFromString(config_options, table_opt, "cache_index_and_filter_blocks=1;" "filter_policy=bloomfilterxx:4:true", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); // unrecognized filter policy config ASSERT_NOK( GetBlockBasedTableOptionsFromString(config_options, table_opt, "cache_index_and_filter_blocks=1;" "filter_policy=bloomfilter:4", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); // Check block cache options are overwritten when specified // in new format as a struct. ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "block_cache={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};" "block_cache_compressed={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;high_pri_pool_ratio=0.5;}", &new_opt)); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetHighPriPoolRatio(), 0.5); // Set only block cache capacity. Check other values are // reset to default values. ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "block_cache={capacity=2M};" "block_cache_compressed={capacity=2M}", &new_opt)); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 2*1024UL*1024UL); // Default values ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), GetDefaultCacheShardBits(new_opt.block_cache->GetCapacity())); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) ->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 2*1024UL*1024UL); // Default values ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), GetDefaultCacheShardBits( new_opt.block_cache_compressed->GetCapacity())); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache_compressed) ->GetHighPriPoolRatio(), 0.5); // Set couple of block cache options. ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "block_cache={num_shard_bits=5;high_pri_pool_ratio=0.5;};" "block_cache_compressed={num_shard_bits=5;" "high_pri_pool_ratio=0.0;}", &new_opt)); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 0); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), 5); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 0); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), 5); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache_compressed) ->GetHighPriPoolRatio(), 0.0); // Set couple of block cache options. ASSERT_OK(GetBlockBasedTableOptionsFromString( config_options, table_opt, "block_cache={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;};" "block_cache_compressed={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;}", &new_opt)); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) ->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache_compressed) ->GetHighPriPoolRatio(), 0.5); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // GetPlainTableOptionsFromString is not supported TEST_F(OptionsTest, GetPlainTableOptionsFromString) { PlainTableOptions table_opt; PlainTableOptions new_opt; ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; // make sure default values are overwritten by something else ASSERT_OK(GetPlainTableOptionsFromString( config_options, table_opt, "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" "index_sparseness=8;huge_page_tlb_size=4;encoding_type=kPrefix;" "full_scan_mode=true;store_index_in_file=true", &new_opt)); ASSERT_EQ(new_opt.user_key_len, 66u); ASSERT_EQ(new_opt.bloom_bits_per_key, 20); ASSERT_EQ(new_opt.hash_table_ratio, 0.5); ASSERT_EQ(new_opt.index_sparseness, 8); ASSERT_EQ(new_opt.huge_page_tlb_size, 4); ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix); ASSERT_TRUE(new_opt.full_scan_mode); ASSERT_TRUE(new_opt.store_index_in_file); // unknown option ASSERT_NOK(GetPlainTableOptionsFromString( config_options, table_opt, "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" "bad_option=1", &new_opt)); // unrecognized EncodingType ASSERT_NOK(GetPlainTableOptionsFromString( config_options, table_opt, "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" "encoding_type=kPrefixXX", &new_opt)); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // GetMemTableRepFactoryFromString is not supported TEST_F(OptionsTest, GetMemTableRepFactoryFromString) { std::unique_ptr new_mem_factory = nullptr; ASSERT_OK(GetMemTableRepFactoryFromString("skip_list", &new_mem_factory)); ASSERT_OK(GetMemTableRepFactoryFromString("skip_list:16", &new_mem_factory)); ASSERT_EQ(std::string(new_mem_factory->Name()), "SkipListFactory"); ASSERT_NOK(GetMemTableRepFactoryFromString("skip_list:16:invalid_opt", &new_mem_factory)); ASSERT_OK(GetMemTableRepFactoryFromString("prefix_hash", &new_mem_factory)); ASSERT_OK(GetMemTableRepFactoryFromString("prefix_hash:1000", &new_mem_factory)); ASSERT_EQ(std::string(new_mem_factory->Name()), "HashSkipListRepFactory"); ASSERT_NOK(GetMemTableRepFactoryFromString("prefix_hash:1000:invalid_opt", &new_mem_factory)); ASSERT_OK(GetMemTableRepFactoryFromString("hash_linkedlist", &new_mem_factory)); ASSERT_OK(GetMemTableRepFactoryFromString("hash_linkedlist:1000", &new_mem_factory)); ASSERT_EQ(std::string(new_mem_factory->Name()), "HashLinkListRepFactory"); ASSERT_NOK(GetMemTableRepFactoryFromString("hash_linkedlist:1000:invalid_opt", &new_mem_factory)); ASSERT_OK(GetMemTableRepFactoryFromString("vector", &new_mem_factory)); ASSERT_OK(GetMemTableRepFactoryFromString("vector:1024", &new_mem_factory)); ASSERT_EQ(std::string(new_mem_factory->Name()), "VectorRepFactory"); ASSERT_NOK(GetMemTableRepFactoryFromString("vector:1024:invalid_opt", &new_mem_factory)); ASSERT_NOK(GetMemTableRepFactoryFromString("cuckoo", &new_mem_factory)); // CuckooHash memtable is already removed. ASSERT_NOK(GetMemTableRepFactoryFromString("cuckoo:1024", &new_mem_factory)); ASSERT_NOK(GetMemTableRepFactoryFromString("bad_factory", &new_mem_factory)); } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE // GetOptionsFromString is not supported in RocksDB Lite TEST_F(OptionsTest, GetOptionsFromStringTest) { Options base_options, new_options; ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; base_options.write_buffer_size = 20; base_options.min_write_buffer_number_to_merge = 15; BlockBasedTableOptions block_based_table_options; block_based_table_options.cache_index_and_filter_blocks = true; base_options.table_factory.reset( NewBlockBasedTableFactory(block_based_table_options)); // Register an Env with object registry. const static char* kCustomEnvName = "CustomEnv"; class CustomEnv : public EnvWrapper { public: explicit CustomEnv(Env* _target) : EnvWrapper(_target) {} }; ObjectLibrary::Default()->Register( kCustomEnvName, [](const std::string& /*name*/, std::unique_ptr* /*env_guard*/, std::string* /* errmsg */) { static CustomEnv env(Env::Default()); return &env; }); ASSERT_OK(GetOptionsFromString( config_options, base_options, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_cache=1M;block_size=4;};" "compression_opts=4:5:6;create_if_missing=true;max_open_files=1;" "bottommost_compression_opts=5:6:7;create_if_missing=true;max_open_files=" "1;" "rate_limiter_bytes_per_sec=1024;env=CustomEnv", &new_options)); ASSERT_EQ(new_options.compression_opts.window_bits, 4); ASSERT_EQ(new_options.compression_opts.level, 5); ASSERT_EQ(new_options.compression_opts.strategy, 6); ASSERT_EQ(new_options.compression_opts.max_dict_bytes, 0u); ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.compression_opts.enabled, false); ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption); ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_options.bottommost_compression_opts.level, 6); ASSERT_EQ(new_options.bottommost_compression_opts.strategy, 7); ASSERT_EQ(new_options.bottommost_compression_opts.max_dict_bytes, 0u); ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false); ASSERT_EQ(new_options.write_buffer_size, 10U); ASSERT_EQ(new_options.max_write_buffer_number, 16); BlockBasedTableOptions new_block_based_table_options = dynamic_cast(new_options.table_factory.get()) ->table_options(); ASSERT_EQ(new_block_based_table_options.block_cache->GetCapacity(), 1U << 20); ASSERT_EQ(new_block_based_table_options.block_size, 4U); // don't overwrite block based table options ASSERT_TRUE(new_block_based_table_options.cache_index_and_filter_blocks); ASSERT_EQ(new_options.create_if_missing, true); ASSERT_EQ(new_options.max_open_files, 1); ASSERT_TRUE(new_options.rate_limiter.get() != nullptr); Env* newEnv = new_options.env; ASSERT_OK(Env::LoadEnv(kCustomEnvName, &newEnv)); ASSERT_EQ(newEnv, new_options.env); // Test the old interfaxe ASSERT_OK(GetOptionsFromString( base_options, "write_buffer_size=22;max_write_buffer_number=33;max_open_files=44;", &new_options)); ASSERT_EQ(new_options.write_buffer_size, 22U); ASSERT_EQ(new_options.max_write_buffer_number, 33); ASSERT_EQ(new_options.max_open_files, 44); } TEST_F(OptionsTest, DBOptionsSerialization) { Options base_options, new_options; Random rnd(301); ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; // Phase 1: Make big change in base_options test::RandomInitDBOptions(&base_options, &rnd); // Phase 2: obtain a string from base_option std::string base_options_file_content; ASSERT_OK(GetStringFromDBOptions(config_options, base_options, &base_options_file_content)); // Phase 3: Set new_options from the derived string and expect // new_options == base_options ASSERT_OK(GetDBOptionsFromString(config_options, DBOptions(), base_options_file_content, &new_options)); ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(config_options, base_options, new_options)); } TEST_F(OptionsTest, OptionsComposeDecompose) { // build an Options from DBOptions + CFOptions, then decompose it to verify // we get same constituent options. DBOptions base_db_opts; ColumnFamilyOptions base_cf_opts; ConfigOptions config_options; // Use default for ignore(false) and check (exact) config_options.input_strings_escaped = false; Random rnd(301); test::RandomInitDBOptions(&base_db_opts, &rnd); test::RandomInitCFOptions(&base_cf_opts, base_db_opts, &rnd); Options base_opts(base_db_opts, base_cf_opts); DBOptions new_db_opts(base_opts); ColumnFamilyOptions new_cf_opts(base_opts); ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(config_options, base_db_opts, new_db_opts)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_cf_opts, new_cf_opts)); delete new_cf_opts.compaction_filter; } TEST_F(OptionsTest, ColumnFamilyOptionsSerialization) { Options options; ColumnFamilyOptions base_opt, new_opt; Random rnd(302); ConfigOptions config_options; config_options.input_strings_escaped = false; // Phase 1: randomly assign base_opt // custom type options test::RandomInitCFOptions(&base_opt, options, &rnd); // Phase 2: obtain a string from base_opt std::string base_options_file_content; ASSERT_OK(GetStringFromColumnFamilyOptions(config_options, base_opt, &base_options_file_content)); // Phase 3: Set new_opt from the derived string and expect // new_opt == base_opt ASSERT_OK( GetColumnFamilyOptionsFromString(config_options, ColumnFamilyOptions(), base_options_file_content, &new_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyCFOptions(config_options, base_opt, new_opt)); if (base_opt.compaction_filter) { delete base_opt.compaction_filter; } } #endif // !ROCKSDB_LITE Status StringToMap( const std::string& opts_str, std::unordered_map* opts_map); #ifndef ROCKSDB_LITE // StringToMap is not supported in ROCKSDB_LITE TEST_F(OptionsTest, StringToMapTest) { std::unordered_map opts_map; // Regular options ASSERT_OK(StringToMap("k1=v1;k2=v2;k3=v3", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], "v2"); ASSERT_EQ(opts_map["k3"], "v3"); // Value with '=' opts_map.clear(); ASSERT_OK(StringToMap("k1==v1;k2=v2=;", &opts_map)); ASSERT_EQ(opts_map["k1"], "=v1"); ASSERT_EQ(opts_map["k2"], "v2="); // Overwrriten option opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k1=v2;k3=v3", &opts_map)); ASSERT_EQ(opts_map["k1"], "v2"); ASSERT_EQ(opts_map["k3"], "v3"); // Empty value opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2=;k3=v3;k4=", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); ASSERT_EQ(opts_map["k2"], ""); ASSERT_EQ(opts_map["k3"], "v3"); ASSERT_TRUE(opts_map.find("k4") != opts_map.end()); ASSERT_EQ(opts_map["k4"], ""); opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2=;k3=v3;k4= ", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); ASSERT_EQ(opts_map["k2"], ""); ASSERT_EQ(opts_map["k3"], "v3"); ASSERT_TRUE(opts_map.find("k4") != opts_map.end()); ASSERT_EQ(opts_map["k4"], ""); opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2=;k3=", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); ASSERT_EQ(opts_map["k2"], ""); ASSERT_TRUE(opts_map.find("k3") != opts_map.end()); ASSERT_EQ(opts_map["k3"], ""); opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2=;k3=;", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_TRUE(opts_map.find("k2") != opts_map.end()); ASSERT_EQ(opts_map["k2"], ""); ASSERT_TRUE(opts_map.find("k3") != opts_map.end()); ASSERT_EQ(opts_map["k3"], ""); // Regular nested options opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2={nk1=nv1;nk2=nv2};k3=v3", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], "nk1=nv1;nk2=nv2"); ASSERT_EQ(opts_map["k3"], "v3"); // Multi-level nested options opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2={nk1=nv1;nk2={nnk1=nnk2}};" "k3={nk1={nnk1={nnnk1=nnnv1;nnnk2;nnnv2}}};k4=v4", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], "nk1=nv1;nk2={nnk1=nnk2}"); ASSERT_EQ(opts_map["k3"], "nk1={nnk1={nnnk1=nnnv1;nnnk2;nnnv2}}"); ASSERT_EQ(opts_map["k4"], "v4"); // Garbage inside curly braces opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2={dfad=};k3={=};k4=v4", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], "dfad="); ASSERT_EQ(opts_map["k3"], "="); ASSERT_EQ(opts_map["k4"], "v4"); // Empty nested options opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2={};", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], ""); opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2={{{{}}}{}{}};", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], "{{{}}}{}{}"); // With random spaces opts_map.clear(); ASSERT_OK(StringToMap(" k1 = v1 ; k2= {nk1=nv1; nk2={nnk1=nnk2}} ; " "k3={ { } }; k4= v4 ", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], "nk1=nv1; nk2={nnk1=nnk2}"); ASSERT_EQ(opts_map["k3"], "{ }"); ASSERT_EQ(opts_map["k4"], "v4"); // Empty key ASSERT_NOK(StringToMap("k1=v1;k2=v2;=", &opts_map)); ASSERT_NOK(StringToMap("=v1;k2=v2", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2v2;", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2=v2;fadfa", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2=v2;;", &opts_map)); // Mismatch curly braces ASSERT_NOK(StringToMap("k1=v1;k2={;k3=v3", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={{};k3=v3", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={}};k3=v3", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={{}{}}};k3=v3", &opts_map)); // However this is valid! opts_map.clear(); ASSERT_OK(StringToMap("k1=v1;k2=};k3=v3", &opts_map)); ASSERT_EQ(opts_map["k1"], "v1"); ASSERT_EQ(opts_map["k2"], "}"); ASSERT_EQ(opts_map["k3"], "v3"); // Invalid chars after closing curly brace ASSERT_NOK(StringToMap("k1=v1;k2={{}}{};k3=v3", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={{}}cfda;k3=v3", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={{}} cfda;k3=v3", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={{}} cfda", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={{}}{}", &opts_map)); ASSERT_NOK(StringToMap("k1=v1;k2={{dfdl}adfa}{}", &opts_map)); } #endif // ROCKSDB_LITE #ifndef ROCKSDB_LITE // StringToMap is not supported in ROCKSDB_LITE TEST_F(OptionsTest, StringToMapRandomTest) { std::unordered_map opts_map; // Make sure segfault is not hit by semi-random strings std::vector bases = { "a={aa={};tt={xxx={}}};c=defff", "a={aa={};tt={xxx={}}};c=defff;d={{}yxx{}3{xx}}", "abc={{}{}{}{{{}}}{{}{}{}{}{}{}{}"}; for (std::string base : bases) { for (int rand_seed = 301; rand_seed < 401; rand_seed++) { Random rnd(rand_seed); for (int attempt = 0; attempt < 10; attempt++) { std::string str = base; // Replace random position to space size_t pos = static_cast( rnd.Uniform(static_cast(base.size()))); str[pos] = ' '; Status s = StringToMap(str, &opts_map); ASSERT_TRUE(s.ok() || s.IsInvalidArgument()); opts_map.clear(); } } } // Random Construct a string std::vector chars = {'{', '}', ' ', '=', ';', 'c'}; for (int rand_seed = 301; rand_seed < 1301; rand_seed++) { Random rnd(rand_seed); int len = rnd.Uniform(30); std::string str = ""; for (int attempt = 0; attempt < len; attempt++) { // Add a random character size_t pos = static_cast( rnd.Uniform(static_cast(chars.size()))); str.append(1, chars[pos]); } Status s = StringToMap(str, &opts_map); ASSERT_TRUE(s.ok() || s.IsInvalidArgument()); s = StringToMap("name=" + str, &opts_map); ASSERT_TRUE(s.ok() || s.IsInvalidArgument()); opts_map.clear(); } } TEST_F(OptionsTest, GetStringFromCompressionType) { std::string res; ASSERT_OK(GetStringFromCompressionType(&res, kNoCompression)); ASSERT_EQ(res, "kNoCompression"); ASSERT_OK(GetStringFromCompressionType(&res, kSnappyCompression)); ASSERT_EQ(res, "kSnappyCompression"); ASSERT_OK(GetStringFromCompressionType(&res, kDisableCompressionOption)); ASSERT_EQ(res, "kDisableCompressionOption"); ASSERT_OK(GetStringFromCompressionType(&res, kLZ4Compression)); ASSERT_EQ(res, "kLZ4Compression"); ASSERT_OK(GetStringFromCompressionType(&res, kZlibCompression)); ASSERT_EQ(res, "kZlibCompression"); ASSERT_NOK( GetStringFromCompressionType(&res, static_cast(-10))); } #endif // !ROCKSDB_LITE TEST_F(OptionsTest, ConvertOptionsTest) { LevelDBOptions leveldb_opt; Options converted_opt = ConvertOptions(leveldb_opt); ASSERT_EQ(converted_opt.create_if_missing, leveldb_opt.create_if_missing); ASSERT_EQ(converted_opt.error_if_exists, leveldb_opt.error_if_exists); ASSERT_EQ(converted_opt.paranoid_checks, leveldb_opt.paranoid_checks); ASSERT_EQ(converted_opt.env, leveldb_opt.env); ASSERT_EQ(converted_opt.info_log.get(), leveldb_opt.info_log); ASSERT_EQ(converted_opt.write_buffer_size, leveldb_opt.write_buffer_size); ASSERT_EQ(converted_opt.max_open_files, leveldb_opt.max_open_files); ASSERT_EQ(converted_opt.compression, leveldb_opt.compression); std::shared_ptr tb_guard = converted_opt.table_factory; BlockBasedTableFactory* table_factory = dynamic_cast(converted_opt.table_factory.get()); ASSERT_TRUE(table_factory != nullptr); const BlockBasedTableOptions table_opt = table_factory->table_options(); ASSERT_EQ(table_opt.block_cache->GetCapacity(), 8UL << 20); ASSERT_EQ(table_opt.block_size, leveldb_opt.block_size); ASSERT_EQ(table_opt.block_restart_interval, leveldb_opt.block_restart_interval); ASSERT_EQ(table_opt.filter_policy.get(), leveldb_opt.filter_policy); } #ifndef ROCKSDB_LITE // This test suite tests the old APIs into the Configure options methods. // Once those APIs are officially deprecated, this test suite can be deleted. class OptionsOldApiTest : public testing::Test {}; TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { std::unordered_map cf_options_map = { {"write_buffer_size", "1"}, {"max_write_buffer_number", "2"}, {"min_write_buffer_number_to_merge", "3"}, {"max_write_buffer_number_to_maintain", "99"}, {"max_write_buffer_size_to_maintain", "-99999"}, {"compression", "kSnappyCompression"}, {"compression_per_level", "kNoCompression:" "kSnappyCompression:" "kZlibCompression:" "kBZip2Compression:" "kLZ4Compression:" "kLZ4HCCompression:" "kXpressCompression:" "kZSTD:" "kZSTDNotFinalCompression"}, {"bottommost_compression", "kLZ4Compression"}, {"bottommost_compression_opts", "5:6:7:8:9:true"}, {"compression_opts", "4:5:6:7:8:true"}, {"num_levels", "8"}, {"level0_file_num_compaction_trigger", "8"}, {"level0_slowdown_writes_trigger", "9"}, {"level0_stop_writes_trigger", "10"}, {"target_file_size_base", "12"}, {"target_file_size_multiplier", "13"}, {"max_bytes_for_level_base", "14"}, {"level_compaction_dynamic_level_bytes", "true"}, {"max_bytes_for_level_multiplier", "15.0"}, {"max_bytes_for_level_multiplier_additional", "16:17:18"}, {"max_compaction_bytes", "21"}, {"soft_rate_limit", "1.1"}, {"hard_rate_limit", "2.1"}, {"hard_pending_compaction_bytes_limit", "211"}, {"arena_block_size", "22"}, {"disable_auto_compactions", "true"}, {"compaction_style", "kCompactionStyleLevel"}, {"compaction_pri", "kOldestSmallestSeqFirst"}, {"verify_checksums_in_compaction", "false"}, {"compaction_options_fifo", "23"}, {"max_sequential_skip_in_iterations", "24"}, {"inplace_update_support", "true"}, {"report_bg_io_stats", "true"}, {"compaction_measure_io_stats", "false"}, {"inplace_update_num_locks", "25"}, {"memtable_prefix_bloom_size_ratio", "0.26"}, {"memtable_whole_key_filtering", "true"}, {"memtable_huge_page_size", "28"}, {"bloom_locality", "29"}, {"max_successive_merges", "30"}, {"min_partial_merge_operands", "31"}, {"prefix_extractor", "fixed:31"}, {"optimize_filters_for_hits", "true"}, }; std::unordered_map db_options_map = { {"create_if_missing", "false"}, {"create_missing_column_families", "true"}, {"error_if_exists", "false"}, {"paranoid_checks", "true"}, {"max_open_files", "32"}, {"max_total_wal_size", "33"}, {"use_fsync", "true"}, {"db_log_dir", "/db_log_dir"}, {"wal_dir", "/wal_dir"}, {"delete_obsolete_files_period_micros", "34"}, {"max_background_compactions", "35"}, {"max_background_flushes", "36"}, {"max_log_file_size", "37"}, {"log_file_time_to_roll", "38"}, {"keep_log_file_num", "39"}, {"recycle_log_file_num", "5"}, {"max_manifest_file_size", "40"}, {"table_cache_numshardbits", "41"}, {"WAL_ttl_seconds", "43"}, {"WAL_size_limit_MB", "44"}, {"manifest_preallocation_size", "45"}, {"allow_mmap_reads", "true"}, {"allow_mmap_writes", "false"}, {"use_direct_reads", "false"}, {"use_direct_io_for_flush_and_compaction", "false"}, {"is_fd_close_on_exec", "true"}, {"skip_log_error_on_recovery", "false"}, {"stats_dump_period_sec", "46"}, {"stats_persist_period_sec", "57"}, {"persist_stats_to_disk", "false"}, {"stats_history_buffer_size", "69"}, {"advise_random_on_open", "true"}, {"use_adaptive_mutex", "false"}, {"new_table_reader_for_compaction_inputs", "true"}, {"compaction_readahead_size", "100"}, {"random_access_max_buffer_size", "3145728"}, {"writable_file_max_buffer_size", "314159"}, {"bytes_per_sync", "47"}, {"wal_bytes_per_sync", "48"}, {"strict_bytes_per_sync", "true"}, }; ColumnFamilyOptions base_cf_opt; ColumnFamilyOptions new_cf_opt; ASSERT_OK(GetColumnFamilyOptionsFromMap( base_cf_opt, cf_options_map, &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 1U); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 2); ASSERT_EQ(new_cf_opt.min_write_buffer_number_to_merge, 3); ASSERT_EQ(new_cf_opt.max_write_buffer_number_to_maintain, 99); ASSERT_EQ(new_cf_opt.max_write_buffer_size_to_maintain, -99999); ASSERT_EQ(new_cf_opt.compression, kSnappyCompression); ASSERT_EQ(new_cf_opt.compression_per_level.size(), 9U); ASSERT_EQ(new_cf_opt.compression_per_level[0], kNoCompression); ASSERT_EQ(new_cf_opt.compression_per_level[1], kSnappyCompression); ASSERT_EQ(new_cf_opt.compression_per_level[2], kZlibCompression); ASSERT_EQ(new_cf_opt.compression_per_level[3], kBZip2Compression); ASSERT_EQ(new_cf_opt.compression_per_level[4], kLZ4Compression); ASSERT_EQ(new_cf_opt.compression_per_level[5], kLZ4HCCompression); ASSERT_EQ(new_cf_opt.compression_per_level[6], kXpressCompression); ASSERT_EQ(new_cf_opt.compression_per_level[7], kZSTD); ASSERT_EQ(new_cf_opt.compression_per_level[8], kZSTDNotFinalCompression); ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4); ASSERT_EQ(new_cf_opt.compression_opts.level, 5); ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u); ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u); ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, CompressionOptions().parallel_threads); ASSERT_EQ(new_cf_opt.compression_opts.enabled, true); ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_bytes, 8u); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 9u); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, CompressionOptions().parallel_threads); ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true); ASSERT_EQ(new_cf_opt.num_levels, 8); ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8); ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9); ASSERT_EQ(new_cf_opt.level0_stop_writes_trigger, 10); ASSERT_EQ(new_cf_opt.target_file_size_base, static_cast(12)); ASSERT_EQ(new_cf_opt.target_file_size_multiplier, 13); ASSERT_EQ(new_cf_opt.max_bytes_for_level_base, 14U); ASSERT_EQ(new_cf_opt.level_compaction_dynamic_level_bytes, true); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier, 15.0); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional.size(), 3U); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[0], 16); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[1], 17); ASSERT_EQ(new_cf_opt.max_bytes_for_level_multiplier_additional[2], 18); ASSERT_EQ(new_cf_opt.max_compaction_bytes, 21); ASSERT_EQ(new_cf_opt.hard_pending_compaction_bytes_limit, 211); ASSERT_EQ(new_cf_opt.arena_block_size, 22U); ASSERT_EQ(new_cf_opt.disable_auto_compactions, true); ASSERT_EQ(new_cf_opt.compaction_style, kCompactionStyleLevel); ASSERT_EQ(new_cf_opt.compaction_pri, kOldestSmallestSeqFirst); ASSERT_EQ(new_cf_opt.compaction_options_fifo.max_table_files_size, static_cast(23)); ASSERT_EQ(new_cf_opt.max_sequential_skip_in_iterations, static_cast(24)); ASSERT_EQ(new_cf_opt.inplace_update_support, true); ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 25U); ASSERT_EQ(new_cf_opt.memtable_prefix_bloom_size_ratio, 0.26); ASSERT_EQ(new_cf_opt.memtable_whole_key_filtering, true); ASSERT_EQ(new_cf_opt.memtable_huge_page_size, 28U); ASSERT_EQ(new_cf_opt.bloom_locality, 29U); ASSERT_EQ(new_cf_opt.max_successive_merges, 30U); ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr); ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true); ASSERT_EQ(std::string(new_cf_opt.prefix_extractor->Name()), "rocksdb.FixedPrefix.31"); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap( base_cf_opt, cf_options_map, &new_cf_opt)); ConfigOptions exact, loose; exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; loose.sanity_level = ConfigOptions::kSanityLevelLooselyCompatible; ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); cf_options_map["write_buffer_size"] = "1"; ASSERT_OK(GetColumnFamilyOptionsFromMap( base_cf_opt, cf_options_map, &new_cf_opt)); cf_options_map["unknown_option"] = "1"; ASSERT_NOK(GetColumnFamilyOptionsFromMap( base_cf_opt, cf_options_map, &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromMap(base_cf_opt, cf_options_map, &new_cf_opt, false, /* input_strings_escaped */ true /* ignore_unknown_options */)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( loose, base_cf_opt, new_cf_opt, nullptr /* new_opt_map */)); ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( exact /* default for VerifyCFOptions */, base_cf_opt, new_cf_opt, nullptr)); DBOptions base_db_opt; DBOptions new_db_opt; ASSERT_OK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt)); ASSERT_EQ(new_db_opt.create_if_missing, false); ASSERT_EQ(new_db_opt.create_missing_column_families, true); ASSERT_EQ(new_db_opt.error_if_exists, false); ASSERT_EQ(new_db_opt.paranoid_checks, true); ASSERT_EQ(new_db_opt.max_open_files, 32); ASSERT_EQ(new_db_opt.max_total_wal_size, static_cast(33)); ASSERT_EQ(new_db_opt.use_fsync, true); ASSERT_EQ(new_db_opt.db_log_dir, "/db_log_dir"); ASSERT_EQ(new_db_opt.wal_dir, "/wal_dir"); ASSERT_EQ(new_db_opt.delete_obsolete_files_period_micros, static_cast(34)); ASSERT_EQ(new_db_opt.max_background_compactions, 35); ASSERT_EQ(new_db_opt.max_background_flushes, 36); ASSERT_EQ(new_db_opt.max_log_file_size, 37U); ASSERT_EQ(new_db_opt.log_file_time_to_roll, 38U); ASSERT_EQ(new_db_opt.keep_log_file_num, 39U); ASSERT_EQ(new_db_opt.recycle_log_file_num, 5U); ASSERT_EQ(new_db_opt.max_manifest_file_size, static_cast(40)); ASSERT_EQ(new_db_opt.table_cache_numshardbits, 41); ASSERT_EQ(new_db_opt.WAL_ttl_seconds, static_cast(43)); ASSERT_EQ(new_db_opt.WAL_size_limit_MB, static_cast(44)); ASSERT_EQ(new_db_opt.manifest_preallocation_size, 45U); ASSERT_EQ(new_db_opt.allow_mmap_reads, true); ASSERT_EQ(new_db_opt.allow_mmap_writes, false); ASSERT_EQ(new_db_opt.use_direct_reads, false); ASSERT_EQ(new_db_opt.use_direct_io_for_flush_and_compaction, false); ASSERT_EQ(new_db_opt.is_fd_close_on_exec, true); ASSERT_EQ(new_db_opt.skip_log_error_on_recovery, false); ASSERT_EQ(new_db_opt.stats_dump_period_sec, 46U); ASSERT_EQ(new_db_opt.stats_persist_period_sec, 57U); ASSERT_EQ(new_db_opt.persist_stats_to_disk, false); ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U); ASSERT_EQ(new_db_opt.advise_random_on_open, true); ASSERT_EQ(new_db_opt.use_adaptive_mutex, false); ASSERT_EQ(new_db_opt.new_table_reader_for_compaction_inputs, true); ASSERT_EQ(new_db_opt.compaction_readahead_size, 100); ASSERT_EQ(new_db_opt.random_access_max_buffer_size, 3145728); ASSERT_EQ(new_db_opt.writable_file_max_buffer_size, 314159); ASSERT_EQ(new_db_opt.bytes_per_sync, static_cast(47)); ASSERT_EQ(new_db_opt.wal_bytes_per_sync, static_cast(48)); ASSERT_EQ(new_db_opt.strict_bytes_per_sync, true); db_options_map["max_open_files"] = "hello"; ASSERT_NOK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); // unknow options should fail parsing without ignore_unknown_options = true db_options_map["unknown_db_option"] = "1"; ASSERT_NOK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); ASSERT_OK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt, false, /* input_strings_escaped */ true /* ignore_unknown_options */)); ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(loose, base_db_opt, new_db_opt)); ASSERT_NOK(RocksDBOptionsParser::VerifyDBOptions(exact, base_db_opt, new_db_opt)); } TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) { ColumnFamilyOptions base_cf_opt; ColumnFamilyOptions new_cf_opt; base_cf_opt.table_factory.reset(); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "", &new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=5", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 5U); ASSERT_TRUE(new_cf_opt.table_factory == nullptr); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=6;", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 6U); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, " write_buffer_size = 7 ", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 7U); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, " write_buffer_size = 8 ; ", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 8U); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=9;max_write_buffer_number=10", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 9U); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 10); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=11; max_write_buffer_number = 12 ;", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 11U); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 12); // Wrong name "max_write_buffer_number_" ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=13;max_write_buffer_number_=14;", &new_cf_opt)); ConfigOptions exact; exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); // Comparator from object registry std::string kCompName = "reverse_comp"; ObjectLibrary::Default()->Register( kCompName, [](const std::string& /*name*/, std::unique_ptr* /*guard*/, std::string* /* errmsg */) { return ReverseBytewiseComparator(); }); ASSERT_OK(GetColumnFamilyOptionsFromString( base_cf_opt, "comparator=" + kCompName + ";", &new_cf_opt)); ASSERT_EQ(new_cf_opt.comparator, ReverseBytewiseComparator()); // MergeOperator from object registry std::unique_ptr bxo(new BytesXOROperator()); std::string kMoName = bxo->Name(); ObjectLibrary::Default()->Register( kMoName, [](const std::string& /*name*/, std::unique_ptr* guard, std::string* /* errmsg */) { guard->reset(new BytesXOROperator()); return guard->get(); }); ASSERT_OK(GetColumnFamilyOptionsFromString( base_cf_opt, "merge_operator=" + kMoName + ";", &new_cf_opt)); ASSERT_EQ(kMoName, std::string(new_cf_opt.merge_operator->Name())); // Wrong key/value pair ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=13;max_write_buffer_number;", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); // Error Paring value ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=13;max_write_buffer_number=;", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); // Missing option name ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=13; =100;", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); const uint64_t kilo = 1024UL; const uint64_t mega = 1024 * kilo; const uint64_t giga = 1024 * mega; const uint64_t tera = 1024 * giga; // Units (k) ASSERT_OK(GetColumnFamilyOptionsFromString( base_cf_opt, "max_write_buffer_number=15K", &new_cf_opt)); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 15 * kilo); // Units (m) ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "max_write_buffer_number=16m;inplace_update_num_locks=17M", &new_cf_opt)); ASSERT_EQ(new_cf_opt.max_write_buffer_number, 16 * mega); ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 17u * mega); // Units (g) ASSERT_OK(GetColumnFamilyOptionsFromString( base_cf_opt, "write_buffer_size=18g;prefix_extractor=capped:8;" "arena_block_size=19G", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 18 * giga); ASSERT_EQ(new_cf_opt.arena_block_size, 19 * giga); ASSERT_TRUE(new_cf_opt.prefix_extractor.get() != nullptr); std::string prefix_name(new_cf_opt.prefix_extractor->Name()); ASSERT_EQ(prefix_name, "rocksdb.CappedPrefix.8"); // Units (t) ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=20t;arena_block_size=21T", &new_cf_opt)); ASSERT_EQ(new_cf_opt.write_buffer_size, 20 * tera); ASSERT_EQ(new_cf_opt.arena_block_size, 21 * tera); // Nested block based table options // Empty ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={};arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); // Non-empty ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_cache=1M;block_size=4;};" "arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); // Last one ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_cache=1M;block_size=4;}", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); // Mismatch curly braces ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={{{block_size=4;};" "arena_block_size=1024", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); // Unexpected chars after closing curly brace ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_size=4;}};" "arena_block_size=1024", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_size=4;}xdfa;" "arena_block_size=1024", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_size=4;}xdfa", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); // Invalid block based table option ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={xx_block_size=4;}", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "optimize_filters_for_hits=true", &new_cf_opt)); ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "optimize_filters_for_hits=false", &new_cf_opt)); ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt, "optimize_filters_for_hits=junk", &new_cf_opt)); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(exact, base_cf_opt, new_cf_opt)); // Nested plain table options // Empty ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "plain_table_factory={};arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); // Non-empty ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "plain_table_factory={user_key_len=66;bloom_bits_per_key=20;};" "arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.table_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.table_factory->Name()), "PlainTable"); // memtable factory ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt, "write_buffer_size=10;max_write_buffer_number=16;" "memtable=skip_list:10;arena_block_size=1024", &new_cf_opt)); ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory"); } TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) { BlockBasedTableOptions table_opt; BlockBasedTableOptions new_opt; // make sure default values are overwritten by something else ASSERT_OK(GetBlockBasedTableOptionsFromString( table_opt, "cache_index_and_filter_blocks=1;index_type=kHashSearch;" "checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;" "block_cache=1M;block_cache_compressed=1k;block_size=1024;" "block_size_deviation=8;block_restart_interval=4;" "format_version=5;whole_key_filtering=1;" "filter_policy=bloomfilter:4.567:false;", &new_opt)); ASSERT_TRUE(new_opt.cache_index_and_filter_blocks); ASSERT_EQ(new_opt.index_type, BlockBasedTableOptions::kHashSearch); ASSERT_EQ(new_opt.checksum, ChecksumType::kxxHash); ASSERT_TRUE(new_opt.hash_index_allow_collision); ASSERT_TRUE(new_opt.no_block_cache); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 1024UL); ASSERT_EQ(new_opt.block_size, 1024UL); ASSERT_EQ(new_opt.block_size_deviation, 8); ASSERT_EQ(new_opt.block_restart_interval, 4); ASSERT_EQ(new_opt.format_version, 5U); ASSERT_EQ(new_opt.whole_key_filtering, true); ASSERT_TRUE(new_opt.filter_policy != nullptr); const BloomFilterPolicy& bfp = dynamic_cast(*new_opt.filter_policy); EXPECT_EQ(bfp.GetMillibitsPerKey(), 4567); EXPECT_EQ(bfp.GetWholeBitsPerKey(), 5); // unknown option ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt, "cache_index_and_filter_blocks=1;index_type=kBinarySearch;" "bad_option=1", &new_opt)); ASSERT_EQ(static_cast(table_opt.cache_index_and_filter_blocks), new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.index_type, new_opt.index_type); // unrecognized index type ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt, "cache_index_and_filter_blocks=1;index_type=kBinarySearchXX", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.index_type, new_opt.index_type); // unrecognized checksum type ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt, "cache_index_and_filter_blocks=1;checksum=kxxHashXX", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.index_type, new_opt.index_type); // unrecognized filter policy name ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt, "cache_index_and_filter_blocks=1;" "filter_policy=bloomfilterxx:4:true", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); // unrecognized filter policy config ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt, "cache_index_and_filter_blocks=1;" "filter_policy=bloomfilter:4", &new_opt)); ASSERT_EQ(table_opt.cache_index_and_filter_blocks, new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); // Check block cache options are overwritten when specified // in new format as a struct. ASSERT_OK(GetBlockBasedTableOptionsFromString(table_opt, "block_cache={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};" "block_cache_compressed={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;high_pri_pool_ratio=0.5;}", &new_opt)); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetHighPriPoolRatio(), 0.5); // Set only block cache capacity. Check other values are // reset to default values. ASSERT_OK(GetBlockBasedTableOptionsFromString(table_opt, "block_cache={capacity=2M};" "block_cache_compressed={capacity=2M}", &new_opt)); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 2*1024UL*1024UL); // Default values ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), GetDefaultCacheShardBits(new_opt.block_cache->GetCapacity())); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) ->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 2*1024UL*1024UL); // Default values ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), GetDefaultCacheShardBits( new_opt.block_cache_compressed->GetCapacity())); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache_compressed) ->GetHighPriPoolRatio(), 0.5); // Set couple of block cache options. ASSERT_OK(GetBlockBasedTableOptionsFromString( table_opt, "block_cache={num_shard_bits=5;high_pri_pool_ratio=0.5;};" "block_cache_compressed={num_shard_bits=5;" "high_pri_pool_ratio=0.0;}", &new_opt)); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 0); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), 5); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 0); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), 5); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), false); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache_compressed) ->GetHighPriPoolRatio(), 0.0); // Set couple of block cache options. ASSERT_OK(GetBlockBasedTableOptionsFromString(table_opt, "block_cache={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;};" "block_cache_compressed={capacity=1M;num_shard_bits=4;" "strict_capacity_limit=true;}", &new_opt)); ASSERT_TRUE(new_opt.block_cache != nullptr); ASSERT_EQ(new_opt.block_cache->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache) ->GetHighPriPoolRatio(), 0.5); ASSERT_TRUE(new_opt.block_cache_compressed != nullptr); ASSERT_EQ(new_opt.block_cache_compressed->GetCapacity(), 1024UL*1024UL); ASSERT_EQ(std::dynamic_pointer_cast( new_opt.block_cache_compressed)->GetNumShardBits(), 4); ASSERT_EQ(new_opt.block_cache_compressed->HasStrictCapacityLimit(), true); ASSERT_EQ(std::dynamic_pointer_cast(new_opt.block_cache_compressed) ->GetHighPriPoolRatio(), 0.5); } TEST_F(OptionsOldApiTest, GetPlainTableOptionsFromString) { PlainTableOptions table_opt; PlainTableOptions new_opt; // make sure default values are overwritten by something else ASSERT_OK(GetPlainTableOptionsFromString(table_opt, "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" "index_sparseness=8;huge_page_tlb_size=4;encoding_type=kPrefix;" "full_scan_mode=true;store_index_in_file=true", &new_opt)); ASSERT_EQ(new_opt.user_key_len, 66u); ASSERT_EQ(new_opt.bloom_bits_per_key, 20); ASSERT_EQ(new_opt.hash_table_ratio, 0.5); ASSERT_EQ(new_opt.index_sparseness, 8); ASSERT_EQ(new_opt.huge_page_tlb_size, 4); ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix); ASSERT_TRUE(new_opt.full_scan_mode); ASSERT_TRUE(new_opt.store_index_in_file); // unknown option ASSERT_NOK(GetPlainTableOptionsFromString(table_opt, "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" "bad_option=1", &new_opt)); // unrecognized EncodingType ASSERT_NOK(GetPlainTableOptionsFromString(table_opt, "user_key_len=66;bloom_bits_per_key=20;hash_table_ratio=0.5;" "encoding_type=kPrefixXX", &new_opt)); } TEST_F(OptionsOldApiTest, GetOptionsFromStringTest) { Options base_options, new_options; base_options.write_buffer_size = 20; base_options.min_write_buffer_number_to_merge = 15; BlockBasedTableOptions block_based_table_options; block_based_table_options.cache_index_and_filter_blocks = true; base_options.table_factory.reset( NewBlockBasedTableFactory(block_based_table_options)); // Register an Env with object registry. const static char* kCustomEnvName = "CustomEnv"; class CustomEnv : public EnvWrapper { public: explicit CustomEnv(Env* _target) : EnvWrapper(_target) {} }; ObjectLibrary::Default()->Register( kCustomEnvName, [](const std::string& /*name*/, std::unique_ptr* /*env_guard*/, std::string* /* errmsg */) { static CustomEnv env(Env::Default()); return &env; }); ASSERT_OK(GetOptionsFromString( base_options, "write_buffer_size=10;max_write_buffer_number=16;" "block_based_table_factory={block_cache=1M;block_size=4;};" "compression_opts=4:5:6;create_if_missing=true;max_open_files=1;" "bottommost_compression_opts=5:6:7;create_if_missing=true;max_open_files=" "1;" "rate_limiter_bytes_per_sec=1024;env=CustomEnv", &new_options)); ASSERT_EQ(new_options.compression_opts.window_bits, 4); ASSERT_EQ(new_options.compression_opts.level, 5); ASSERT_EQ(new_options.compression_opts.strategy, 6); ASSERT_EQ(new_options.compression_opts.max_dict_bytes, 0u); ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.compression_opts.enabled, false); ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption); ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5); ASSERT_EQ(new_options.bottommost_compression_opts.level, 6); ASSERT_EQ(new_options.bottommost_compression_opts.strategy, 7); ASSERT_EQ(new_options.bottommost_compression_opts.max_dict_bytes, 0u); ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u); ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u); ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false); ASSERT_EQ(new_options.write_buffer_size, 10U); ASSERT_EQ(new_options.max_write_buffer_number, 16); BlockBasedTableOptions new_block_based_table_options = dynamic_cast(new_options.table_factory.get()) ->table_options(); ASSERT_EQ(new_block_based_table_options.block_cache->GetCapacity(), 1U << 20); ASSERT_EQ(new_block_based_table_options.block_size, 4U); // don't overwrite block based table options ASSERT_TRUE(new_block_based_table_options.cache_index_and_filter_blocks); ASSERT_EQ(new_options.create_if_missing, true); ASSERT_EQ(new_options.max_open_files, 1); ASSERT_TRUE(new_options.rate_limiter.get() != nullptr); Env* newEnv = new_options.env; ASSERT_OK(Env::LoadEnv(kCustomEnvName, &newEnv)); ASSERT_EQ(newEnv, new_options.env); } TEST_F(OptionsOldApiTest, DBOptionsSerialization) { Options base_options, new_options; Random rnd(301); // Phase 1: Make big change in base_options test::RandomInitDBOptions(&base_options, &rnd); // Phase 2: obtain a string from base_option std::string base_options_file_content; ASSERT_OK(GetStringFromDBOptions(&base_options_file_content, base_options)); // Phase 3: Set new_options from the derived string and expect // new_options == base_options ASSERT_OK(GetDBOptionsFromString(DBOptions(), base_options_file_content, &new_options)); ConfigOptions config_options; ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions(config_options, base_options, new_options)); } TEST_F(OptionsOldApiTest, ColumnFamilyOptionsSerialization) { Options options; ColumnFamilyOptions base_opt, new_opt; Random rnd(302); // Phase 1: randomly assign base_opt // custom type options test::RandomInitCFOptions(&base_opt, options, &rnd); // Phase 2: obtain a string from base_opt std::string base_options_file_content; ASSERT_OK( GetStringFromColumnFamilyOptions(&base_options_file_content, base_opt)); // Phase 3: Set new_opt from the derived string and expect // new_opt == base_opt ASSERT_OK(GetColumnFamilyOptionsFromString( ColumnFamilyOptions(), base_options_file_content, &new_opt)); ConfigOptions config_options; ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, base_opt, new_opt)); if (base_opt.compaction_filter) { delete base_opt.compaction_filter; } } #endif // !ROCKSDB_LITE #ifndef ROCKSDB_LITE class OptionsParserTest : public testing::Test { public: OptionsParserTest() { env_.reset(new test::StringEnv(Env::Default())); fs_.reset(new LegacyFileSystemWrapper(env_.get())); } protected: std::unique_ptr env_; std::unique_ptr fs_; }; TEST_F(OptionsParserTest, Comment) { DBOptions db_opt; db_opt.max_open_files = 12345; db_opt.max_background_flushes = 301; db_opt.max_total_wal_size = 1024; ColumnFamilyOptions cf_opt; std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=3.14.0\n" " options_file_version=1\n" "[ DBOptions ]\n" " # note that we don't support space around \"=\"\n" " max_open_files=12345;\n" " max_background_flushes=301 # comment after a statement is fine\n" " # max_background_flushes=1000 # this line would be ignored\n" " # max_background_compactions=2000 # so does this one\n" " max_total_wal_size=1024 # keep_log_file_num=1000\n" "[CFOptions \"default\"] # column family must be specified\n" " # in the correct order\n" " # if a section is blank, we will use the default\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_OK( parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); ConfigOptions exact; exact.input_strings_escaped = false; exact.sanity_level = ConfigOptions::kSanityLevelExactMatch; ASSERT_OK( RocksDBOptionsParser::VerifyDBOptions(exact, *parser.db_opt(), db_opt)); ASSERT_EQ(parser.NumColumnFamilies(), 1U); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( exact, *parser.GetCFOptions("default"), cf_opt)); } TEST_F(OptionsParserTest, ExtraSpace) { std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[ Version ]\n" " rocksdb_version = 3.14.0 \n" " options_file_version=1 # some comment\n" "[DBOptions ] # some comment\n" "max_open_files=12345 \n" " max_background_flushes = 301 \n" " max_total_wal_size = 1024 # keep_log_file_num=1000\n" " [CFOptions \"default\" ]\n" " # if a section is blank, we will use the default\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_OK( parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); } TEST_F(OptionsParserTest, MissingDBOptions) { std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=3.14.0\n" " options_file_version=1\n" "[CFOptions \"default\"]\n" " # if a section is blank, we will use the default\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_NOK( parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); ; } TEST_F(OptionsParserTest, DoubleDBOptions) { DBOptions db_opt; db_opt.max_open_files = 12345; db_opt.max_background_flushes = 301; db_opt.max_total_wal_size = 1024; ColumnFamilyOptions cf_opt; std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=3.14.0\n" " options_file_version=1\n" "[DBOptions]\n" " max_open_files=12345\n" " max_background_flushes=301\n" " max_total_wal_size=1024 # keep_log_file_num=1000\n" "[DBOptions]\n" "[CFOptions \"default\"]\n" " # if a section is blank, we will use the default\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_NOK( parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); } TEST_F(OptionsParserTest, NoDefaultCFOptions) { DBOptions db_opt; db_opt.max_open_files = 12345; db_opt.max_background_flushes = 301; db_opt.max_total_wal_size = 1024; ColumnFamilyOptions cf_opt; std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=3.14.0\n" " options_file_version=1\n" "[DBOptions]\n" " max_open_files=12345\n" " max_background_flushes=301\n" " max_total_wal_size=1024 # keep_log_file_num=1000\n" "[CFOptions \"something_else\"]\n" " # if a section is blank, we will use the default\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_NOK( parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); } TEST_F(OptionsParserTest, DefaultCFOptionsMustBeTheFirst) { DBOptions db_opt; db_opt.max_open_files = 12345; db_opt.max_background_flushes = 301; db_opt.max_total_wal_size = 1024; ColumnFamilyOptions cf_opt; std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=3.14.0\n" " options_file_version=1\n" "[DBOptions]\n" " max_open_files=12345\n" " max_background_flushes=301\n" " max_total_wal_size=1024 # keep_log_file_num=1000\n" "[CFOptions \"something_else\"]\n" " # if a section is blank, we will use the default\n" "[CFOptions \"default\"]\n" " # if a section is blank, we will use the default\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_NOK( parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); } TEST_F(OptionsParserTest, DuplicateCFOptions) { DBOptions db_opt; db_opt.max_open_files = 12345; db_opt.max_background_flushes = 301; db_opt.max_total_wal_size = 1024; ColumnFamilyOptions cf_opt; std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=3.14.0\n" " options_file_version=1\n" "[DBOptions]\n" " max_open_files=12345\n" " max_background_flushes=301\n" " max_total_wal_size=1024 # keep_log_file_num=1000\n" "[CFOptions \"default\"]\n" "[CFOptions \"something_else\"]\n" "[CFOptions \"something_else\"]\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_NOK( parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); } TEST_F(OptionsParserTest, IgnoreUnknownOptions) { for (int case_id = 0; case_id < 5; case_id++) { DBOptions db_opt; db_opt.max_open_files = 12345; db_opt.max_background_flushes = 301; db_opt.max_total_wal_size = 1024; ColumnFamilyOptions cf_opt; std::string version_string; bool should_ignore = true; if (case_id == 0) { // same version should_ignore = false; version_string = ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR) + ".0"; } else if (case_id == 1) { // higher minor version should_ignore = true; version_string = ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR + 1) + ".0"; } else if (case_id == 2) { // higher major version. should_ignore = true; version_string = ToString(ROCKSDB_MAJOR + 1) + ".0.0"; } else if (case_id == 3) { // lower minor version #if ROCKSDB_MINOR == 0 continue; #else version_string = ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR - 1) + ".0"; should_ignore = false; #endif } else { // lower major version should_ignore = false; version_string = ToString(ROCKSDB_MAJOR - 1) + "." + ToString(ROCKSDB_MINOR) + ".0"; } std::string options_file_content = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=" + version_string + "\n" " options_file_version=1\n" "[DBOptions]\n" " max_open_files=12345\n" " max_background_flushes=301\n" " max_total_wal_size=1024 # keep_log_file_num=1000\n" " unknown_db_option1=321\n" " unknown_db_option2=false\n" "[CFOptions \"default\"]\n" " unknown_cf_option1=hello\n" "[CFOptions \"something_else\"]\n" " unknown_cf_option2=world\n" " # if a section is blank, we will use the default\n"; const std::string kTestFileName = "test-rocksdb-options.ini"; auto s = env_->FileExists(kTestFileName); ASSERT_TRUE(s.ok() || s.IsNotFound()); if (s.ok()) { ASSERT_OK(env_->DeleteFile(kTestFileName)); } ASSERT_OK(env_->WriteToNewFile(kTestFileName, options_file_content)); RocksDBOptionsParser parser; ASSERT_NOK(parser.Parse(kTestFileName, fs_.get(), false, 4096 /* readahead_size */)); if (should_ignore) { ASSERT_OK(parser.Parse(kTestFileName, fs_.get(), true /* ignore_unknown_options */, 4096 /* readahead_size */)); } else { ASSERT_NOK(parser.Parse(kTestFileName, fs_.get(), true /* ignore_unknown_options */, 4096 /* readahead_size */)); } } } TEST_F(OptionsParserTest, ParseVersion) { DBOptions db_opt; db_opt.max_open_files = 12345; db_opt.max_background_flushes = 301; db_opt.max_total_wal_size = 1024; ColumnFamilyOptions cf_opt; std::string file_template = "# This is a testing option string.\n" "# Currently we only support \"#\" styled comment.\n" "\n" "[Version]\n" " rocksdb_version=3.13.1\n" " options_file_version=%s\n" "[DBOptions]\n" "[CFOptions \"default\"]\n"; const int kLength = 1000; char buffer[kLength]; RocksDBOptionsParser parser; const std::vector invalid_versions = { "a.b.c", "3.2.2b", "3.-12", "3. 1", // only digits and dots are allowed "1.2.3.4", "1.2.3" // can only contains at most one dot. "0", // options_file_version must be at least one "3..2", ".", ".1.2", // must have at least one digit before each dot "1.2.", "1.", "2.34."}; // must have at least one digit after each dot for (auto iv : invalid_versions) { snprintf(buffer, kLength - 1, file_template.c_str(), iv.c_str()); parser.Reset(); ASSERT_OK(env_->WriteToNewFile(iv, buffer)); ASSERT_NOK(parser.Parse(iv, fs_.get(), false, 0 /* readahead_size */)); } const std::vector valid_versions = { "1.232", "100", "3.12", "1", "12.3 ", " 1.25 "}; for (auto vv : valid_versions) { snprintf(buffer, kLength - 1, file_template.c_str(), vv.c_str()); parser.Reset(); ASSERT_OK(env_->WriteToNewFile(vv, buffer)); ASSERT_OK(parser.Parse(vv, fs_.get(), false, 0 /* readahead_size */)); } } void VerifyCFPointerTypedOptions( ColumnFamilyOptions* base_cf_opt, const ColumnFamilyOptions* new_cf_opt, const std::unordered_map* new_cf_opt_map) { std::string name_buffer; ConfigOptions config_options; config_options.input_strings_escaped = false; ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions(config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); // change the name of merge operator back-and-forth { auto* merge_operator = dynamic_cast( base_cf_opt->merge_operator.get()); if (merge_operator != nullptr) { name_buffer = merge_operator->Name(); // change the name and expect non-ok status merge_operator->SetName("some-other-name"); ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); // change the name back and expect ok status merge_operator->SetName(name_buffer); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); } } // change the name of the compaction filter factory back-and-forth { auto* compaction_filter_factory = dynamic_cast( base_cf_opt->compaction_filter_factory.get()); if (compaction_filter_factory != nullptr) { name_buffer = compaction_filter_factory->Name(); // change the name and expect non-ok status compaction_filter_factory->SetName("some-other-name"); ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); // change the name back and expect ok status compaction_filter_factory->SetName(name_buffer); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); } } // test by setting compaction_filter to nullptr { auto* tmp_compaction_filter = base_cf_opt->compaction_filter; if (tmp_compaction_filter != nullptr) { base_cf_opt->compaction_filter = nullptr; // set compaction_filter to nullptr and expect non-ok status ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); // set the value back and expect ok status base_cf_opt->compaction_filter = tmp_compaction_filter; ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); } } // test by setting table_factory to nullptr { auto tmp_table_factory = base_cf_opt->table_factory; if (tmp_table_factory != nullptr) { base_cf_opt->table_factory.reset(); // set table_factory to nullptr and expect non-ok status ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); // set the value back and expect ok status base_cf_opt->table_factory = tmp_table_factory; ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); } } // test by setting memtable_factory to nullptr { auto tmp_memtable_factory = base_cf_opt->memtable_factory; if (tmp_memtable_factory != nullptr) { base_cf_opt->memtable_factory.reset(); // set memtable_factory to nullptr and expect non-ok status ASSERT_NOK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); // set the value back and expect ok status base_cf_opt->memtable_factory = tmp_memtable_factory; ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( config_options, *base_cf_opt, *new_cf_opt, new_cf_opt_map)); } } } TEST_F(OptionsParserTest, Readahead) { DBOptions base_db_opt; std::vector base_cf_opts; base_cf_opts.emplace_back(); base_cf_opts.emplace_back(); std::string one_mb_string = std::string(1024 * 1024, 'x'); std::vector cf_names = {"default", one_mb_string}; const std::string kOptionsFileName = "test-persisted-options.ini"; ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts, kOptionsFileName, fs_.get())); uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(kOptionsFileName, &file_size)); assert(file_size > 0); RocksDBOptionsParser parser; env_->num_seq_file_read_ = 0; size_t readahead_size = 128 * 1024; ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false, readahead_size)); ASSERT_EQ(env_->num_seq_file_read_.load(), (file_size - 1) / readahead_size + 1); env_->num_seq_file_read_.store(0); readahead_size = 1024 * 1024; ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false, readahead_size)); ASSERT_EQ(env_->num_seq_file_read_.load(), (file_size - 1) / readahead_size + 1); // Tiny readahead. 8 KB is read each time. env_->num_seq_file_read_.store(0); ASSERT_OK( parser.Parse(kOptionsFileName, fs_.get(), false, 1 /* readahead_size */)); ASSERT_GE(env_->num_seq_file_read_.load(), file_size / (8 * 1024)); ASSERT_LT(env_->num_seq_file_read_.load(), file_size / (8 * 1024) * 2); // Disable readahead means 512KB readahead. env_->num_seq_file_read_.store(0); ASSERT_OK( parser.Parse(kOptionsFileName, fs_.get(), false, 0 /* readahead_size */)); ASSERT_GE(env_->num_seq_file_read_.load(), (file_size - 1) / (512 * 1024) + 1); } TEST_F(OptionsParserTest, DumpAndParse) { DBOptions base_db_opt; std::vector base_cf_opts; std::vector cf_names = {"default", "cf1", "cf2", "cf3", "c:f:4:4:4" "p\\i\\k\\a\\chu\\\\\\", "###rocksdb#1-testcf#2###"}; const int num_cf = static_cast(cf_names.size()); Random rnd(302); test::RandomInitDBOptions(&base_db_opt, &rnd); base_db_opt.db_log_dir += "/#odd #but #could #happen #path #/\\\\#OMG"; BlockBasedTableOptions special_bbto; special_bbto.cache_index_and_filter_blocks = true; special_bbto.block_size = 999999; for (int c = 0; c < num_cf; ++c) { ColumnFamilyOptions cf_opt; Random cf_rnd(0xFB + c); test::RandomInitCFOptions(&cf_opt, base_db_opt, &cf_rnd); if (c < 4) { cf_opt.prefix_extractor.reset(test::RandomSliceTransform(&rnd, c)); } if (c < 3) { cf_opt.table_factory.reset(test::RandomTableFactory(&rnd, c)); } else if (c == 4) { cf_opt.table_factory.reset(NewBlockBasedTableFactory(special_bbto)); } base_cf_opts.emplace_back(cf_opt); } const std::string kOptionsFileName = "test-persisted-options.ini"; // Use default for escaped(true), unknown(false) and check (exact) ConfigOptions config_options; ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts, kOptionsFileName, fs_.get())); RocksDBOptionsParser parser; ASSERT_OK(parser.Parse(config_options, kOptionsFileName, fs_.get())); // Make sure block-based table factory options was deserialized correctly std::shared_ptr ttf = (*parser.cf_opts())[4].table_factory; ASSERT_EQ(BlockBasedTableFactory::kName, std::string(ttf->Name())); const BlockBasedTableOptions& parsed_bbto = static_cast(ttf.get())->table_options(); ASSERT_EQ(special_bbto.block_size, parsed_bbto.block_size); ASSERT_EQ(special_bbto.cache_index_and_filter_blocks, parsed_bbto.cache_index_and_filter_blocks); ASSERT_OK(RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( config_options, base_db_opt, cf_names, base_cf_opts, kOptionsFileName, fs_.get())); ASSERT_OK(RocksDBOptionsParser::VerifyDBOptions( config_options, *parser.db_opt(), base_db_opt)); for (int c = 0; c < num_cf; ++c) { const auto* cf_opt = parser.GetCFOptions(cf_names[c]); ASSERT_NE(cf_opt, nullptr); ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( config_options, base_cf_opts[c], *cf_opt, &(parser.cf_opt_maps()->at(c)))); } // Further verify pointer-typed options for (int c = 0; c < num_cf; ++c) { const auto* cf_opt = parser.GetCFOptions(cf_names[c]); ASSERT_NE(cf_opt, nullptr); VerifyCFPointerTypedOptions(&base_cf_opts[c], cf_opt, &(parser.cf_opt_maps()->at(c))); } ASSERT_EQ(parser.GetCFOptions("does not exist"), nullptr); base_db_opt.max_open_files++; ASSERT_NOK(RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( config_options, base_db_opt, cf_names, base_cf_opts, kOptionsFileName, fs_.get())); for (int c = 0; c < num_cf; ++c) { if (base_cf_opts[c].compaction_filter) { delete base_cf_opts[c].compaction_filter; } } } TEST_F(OptionsParserTest, DifferentDefault) { const std::string kOptionsFileName = "test-persisted-options.ini"; ColumnFamilyOptions cf_level_opts; ASSERT_EQ(CompactionPri::kMinOverlappingRatio, cf_level_opts.compaction_pri); cf_level_opts.OptimizeLevelStyleCompaction(); ColumnFamilyOptions cf_univ_opts; cf_univ_opts.OptimizeUniversalStyleCompaction(); ASSERT_OK(PersistRocksDBOptions(DBOptions(), {"default", "universal"}, {cf_level_opts, cf_univ_opts}, kOptionsFileName, fs_.get())); RocksDBOptionsParser parser; ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false, 4096 /* readahead_size */)); { Options old_default_opts; old_default_opts.OldDefaults(); ASSERT_EQ(10 * 1048576, old_default_opts.max_bytes_for_level_base); ASSERT_EQ(5000, old_default_opts.max_open_files); ASSERT_EQ(2 * 1024U * 1024U, old_default_opts.delayed_write_rate); ASSERT_EQ(WALRecoveryMode::kTolerateCorruptedTailRecords, old_default_opts.wal_recovery_mode); } { Options old_default_opts; old_default_opts.OldDefaults(4, 6); ASSERT_EQ(10 * 1048576, old_default_opts.max_bytes_for_level_base); ASSERT_EQ(5000, old_default_opts.max_open_files); } { Options old_default_opts; old_default_opts.OldDefaults(4, 7); ASSERT_NE(10 * 1048576, old_default_opts.max_bytes_for_level_base); ASSERT_NE(4, old_default_opts.table_cache_numshardbits); ASSERT_EQ(5000, old_default_opts.max_open_files); ASSERT_EQ(2 * 1024U * 1024U, old_default_opts.delayed_write_rate); } { ColumnFamilyOptions old_default_cf_opts; old_default_cf_opts.OldDefaults(); ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); ASSERT_EQ(4 << 20, old_default_cf_opts.write_buffer_size); ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); ASSERT_EQ(0, old_default_cf_opts.soft_pending_compaction_bytes_limit); ASSERT_EQ(0, old_default_cf_opts.hard_pending_compaction_bytes_limit); ASSERT_EQ(CompactionPri::kByCompensatedSize, old_default_cf_opts.compaction_pri); } { ColumnFamilyOptions old_default_cf_opts; old_default_cf_opts.OldDefaults(4, 6); ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); ASSERT_EQ(CompactionPri::kByCompensatedSize, old_default_cf_opts.compaction_pri); } { ColumnFamilyOptions old_default_cf_opts; old_default_cf_opts.OldDefaults(4, 7); ASSERT_NE(2 * 1048576, old_default_cf_opts.target_file_size_base); ASSERT_EQ(CompactionPri::kByCompensatedSize, old_default_cf_opts.compaction_pri); } { Options old_default_opts; old_default_opts.OldDefaults(5, 1); ASSERT_EQ(2 * 1024U * 1024U, old_default_opts.delayed_write_rate); } { Options old_default_opts; old_default_opts.OldDefaults(5, 2); ASSERT_EQ(16 * 1024U * 1024U, old_default_opts.delayed_write_rate); ASSERT_TRUE(old_default_opts.compaction_pri == CompactionPri::kByCompensatedSize); } { Options old_default_opts; old_default_opts.OldDefaults(5, 18); ASSERT_TRUE(old_default_opts.compaction_pri == CompactionPri::kByCompensatedSize); } Options small_opts; small_opts.OptimizeForSmallDb(); ASSERT_EQ(2 << 20, small_opts.write_buffer_size); ASSERT_EQ(5000, small_opts.max_open_files); } class OptionsSanityCheckTest : public OptionsParserTest { public: OptionsSanityCheckTest() {} protected: Status SanityCheckCFOptions(const ColumnFamilyOptions& cf_opts, ConfigOptions::SanityLevel level, bool input_strings_escaped = true) { ConfigOptions config_options; config_options.sanity_level = level; config_options.ignore_unknown_options = false; config_options.input_strings_escaped = input_strings_escaped; return RocksDBOptionsParser::VerifyRocksDBOptionsFromFile( config_options, DBOptions(), {"default"}, {cf_opts}, kOptionsFileName, fs_.get()); } Status PersistCFOptions(const ColumnFamilyOptions& cf_opts) { Status s = env_->DeleteFile(kOptionsFileName); if (!s.ok()) { return s; } return PersistRocksDBOptions(DBOptions(), {"default"}, {cf_opts}, kOptionsFileName, fs_.get()); } const std::string kOptionsFileName = "OPTIONS"; }; TEST_F(OptionsSanityCheckTest, SanityCheck) { ColumnFamilyOptions opts; Random rnd(301); // default ColumnFamilyOptions { ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); } // prefix_extractor { // Okay to change prefix_extractor form nullptr to non-nullptr ASSERT_EQ(opts.prefix_extractor.get(), nullptr); opts.prefix_extractor.reset(NewCappedPrefixTransform(10)); ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); // use same prefix extractor but with different parameter opts.prefix_extractor.reset(NewCappedPrefixTransform(15)); // expect pass only in // ConfigOptions::kSanityLevelLooselyCompatible ASSERT_NOK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // repeat the test with FixedPrefixTransform opts.prefix_extractor.reset(NewFixedPrefixTransform(10)); ASSERT_NOK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // persist the change of prefix_extractor ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); // use same prefix extractor but with different parameter opts.prefix_extractor.reset(NewFixedPrefixTransform(15)); // expect pass only in // ConfigOptions::kSanityLevelLooselyCompatible ASSERT_NOK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // Change prefix extractor from non-nullptr to nullptr opts.prefix_extractor.reset(); // expect pass as it's safe to change prefix_extractor // from non-null to null ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); } // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); // table_factory { for (int tb = 0; tb <= 2; ++tb) { // change the table factory opts.table_factory.reset(test::RandomTableFactory(&rnd, tb)); ASSERT_NOK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); } } // merge_operator { // Test when going from nullptr -> merge operator opts.merge_operator.reset(test::RandomMergeOperator(&rnd)); ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); for (int test = 0; test < 5; ++test) { // change the merge operator opts.merge_operator.reset(test::RandomMergeOperator(&rnd)); ASSERT_NOK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); } // Test when going from merge operator -> nullptr opts.merge_operator = nullptr; ASSERT_NOK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); ASSERT_OK(SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelNone)); // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); } // compaction_filter { for (int test = 0; test < 5; ++test) { // change the compaction filter opts.compaction_filter = test::RandomCompactionFilter(&rnd); ASSERT_NOK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); delete opts.compaction_filter; opts.compaction_filter = nullptr; } } // compaction_filter_factory { for (int test = 0; test < 5; ++test) { // change the compaction filter factory opts.compaction_filter_factory.reset( test::RandomCompactionFilterFactory(&rnd)); ASSERT_NOK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); ASSERT_OK(SanityCheckCFOptions( opts, ConfigOptions::kSanityLevelLooselyCompatible)); // persist the change ASSERT_OK(PersistCFOptions(opts)); ASSERT_OK( SanityCheckCFOptions(opts, ConfigOptions::kSanityLevelExactMatch)); } } } namespace { bool IsEscapedString(const std::string& str) { for (size_t i = 0; i < str.size(); ++i) { if (str[i] == '\\') { // since we already handle those two consecutive '\'s in // the next if-then branch, any '\' appear at the end // of an escaped string in such case is not valid. if (i == str.size() - 1) { return false; } if (str[i + 1] == '\\') { // if there're two consecutive '\'s, skip the second one. i++; continue; } switch (str[i + 1]) { case ':': case '\\': case '#': continue; default: // if true, '\' together with str[i + 1] is not a valid escape. if (UnescapeChar(str[i + 1]) == str[i + 1]) { return false; } } } else if (isSpecialChar(str[i]) && (i == 0 || str[i - 1] != '\\')) { return false; } } return true; } } // namespace TEST_F(OptionsParserTest, IntegerParsing) { ASSERT_EQ(ParseUint64("18446744073709551615"), 18446744073709551615U); ASSERT_EQ(ParseUint32("4294967295"), 4294967295U); ASSERT_EQ(ParseSizeT("18446744073709551615"), 18446744073709551615U); ASSERT_EQ(ParseInt64("9223372036854775807"), 9223372036854775807); ASSERT_EQ(ParseInt64("-9223372036854775808"), port::kMinInt64); ASSERT_EQ(ParseInt32("2147483647"), 2147483647); ASSERT_EQ(ParseInt32("-2147483648"), port::kMinInt32); ASSERT_EQ(ParseInt("-32767"), -32767); ASSERT_EQ(ParseDouble("-1.234567"), -1.234567); } TEST_F(OptionsParserTest, EscapeOptionString) { ASSERT_EQ(UnescapeOptionString( "This is a test string with \\# \\: and \\\\ escape chars."), "This is a test string with # : and \\ escape chars."); ASSERT_EQ( EscapeOptionString("This is a test string with # : and \\ escape chars."), "This is a test string with \\# \\: and \\\\ escape chars."); std::string readible_chars = "A String like this \"1234567890-=_)(*&^%$#@!ertyuiop[]{POIU" "YTREWQasdfghjkl;':LKJHGFDSAzxcvbnm,.?>" "(base_ptr) + opt_info.offset_; char* comp_addr = reinterpret_cast(comp_ptr) + opt_info.offset_; ASSERT_OK(opt_info.Serialize(config_options, opt_name, base_addr, &result)); ASSERT_OK(opt_info.Parse(config_options, opt_name, result, comp_addr)); ASSERT_TRUE(opt_info.AreEqual(config_options, opt_name, base_addr, comp_addr, &mismatch)); } static void TestAndCompareOption(const ConfigOptions& config_options, const OptionTypeInfo& opt_info, const std::string& opt_name, const std::string& opt_value, void* base_ptr, void* comp_ptr) { char* base_addr = reinterpret_cast(base_ptr) + opt_info.offset_; ASSERT_OK(opt_info.Parse(config_options, opt_name, opt_value, base_addr)); TestAndCompareOption(config_options, opt_info, opt_name, base_ptr, comp_ptr); } template void TestOptInfo(const ConfigOptions& config_options, OptionType opt_type, T* base, T* comp) { std::string result; OptionTypeInfo opt_info(0, opt_type); char* base_addr = reinterpret_cast(base); char* comp_addr = reinterpret_cast(comp); ASSERT_FALSE( opt_info.AreEqual(config_options, "base", base_addr, comp_addr, &result)); ASSERT_EQ(result, "base"); ASSERT_NE(*base, *comp); TestAndCompareOption(config_options, opt_info, "base", base_addr, comp_addr); ASSERT_EQ(*base, *comp); } class OptionTypeInfoTest : public testing::Test {}; TEST_F(OptionTypeInfoTest, BasicTypes) { ConfigOptions config_options; { bool a = true, b = false; TestOptInfo(config_options, OptionType::kBoolean, &a, &b); } { int a = 100, b = 200; TestOptInfo(config_options, OptionType::kInt, &a, &b); } { int32_t a = 100, b = 200; TestOptInfo(config_options, OptionType::kInt32T, &a, &b); } { int64_t a = 100, b = 200; TestOptInfo(config_options, OptionType::kInt64T, &a, &b); } { unsigned int a = 100, b = 200; TestOptInfo(config_options, OptionType::kUInt, &a, &b); } { uint32_t a = 100, b = 200; TestOptInfo(config_options, OptionType::kUInt32T, &a, &b); } { uint64_t a = 100, b = 200; TestOptInfo(config_options, OptionType::kUInt64T, &a, &b); } { size_t a = 100, b = 200; TestOptInfo(config_options, OptionType::kSizeT, &a, &b); } { std::string a = "100", b = "200"; TestOptInfo(config_options, OptionType::kString, &a, &b); } { double a = 1.0, b = 2.0; TestOptInfo(config_options, OptionType::kDouble, &a, &b); } } TEST_F(OptionTypeInfoTest, TestInvalidArgs) { ConfigOptions config_options; bool b; int i; int32_t i32; int64_t i64; unsigned int u; int32_t u32; int64_t u64; size_t sz; double d; ASSERT_NOK(OptionTypeInfo(0, OptionType::kBoolean) .Parse(config_options, "b", "x", reinterpret_cast(&b))); ASSERT_NOK(OptionTypeInfo(0, OptionType::kInt) .Parse(config_options, "b", "x", reinterpret_cast(&i))); ASSERT_NOK( OptionTypeInfo(0, OptionType::kInt32T) .Parse(config_options, "b", "x", reinterpret_cast(&i32))); ASSERT_NOK( OptionTypeInfo(0, OptionType::kInt64T) .Parse(config_options, "b", "x", reinterpret_cast(&i64))); ASSERT_NOK(OptionTypeInfo(0, OptionType::kUInt) .Parse(config_options, "b", "x", reinterpret_cast(&u))); ASSERT_NOK( OptionTypeInfo(0, OptionType::kUInt32T) .Parse(config_options, "b", "x", reinterpret_cast(&u32))); ASSERT_NOK( OptionTypeInfo(0, OptionType::kUInt64T) .Parse(config_options, "b", "x", reinterpret_cast(&u64))); ASSERT_NOK( OptionTypeInfo(0, OptionType::kSizeT) .Parse(config_options, "b", "x", reinterpret_cast(&sz))); ASSERT_NOK(OptionTypeInfo(0, OptionType::kDouble) .Parse(config_options, "b", "x", reinterpret_cast(&d))); // Don't know how to convert Unknowns to anything else ASSERT_NOK(OptionTypeInfo(0, OptionType::kUnknown) .Parse(config_options, "b", "x", reinterpret_cast(&d))); // Verify that if the parse function throws an exception, it is also trapped OptionTypeInfo func_info(0, OptionType::kUnknown, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, [](const ConfigOptions&, const std::string&, const std::string& value, char* addr) { auto ptr = reinterpret_cast(addr); *ptr = ParseInt(value); return Status::OK(); }); ASSERT_OK( func_info.Parse(config_options, "b", "1", reinterpret_cast(&i))); ASSERT_NOK( func_info.Parse(config_options, "b", "x", reinterpret_cast(&i))); } TEST_F(OptionTypeInfoTest, TestParseFunc) { OptionTypeInfo opt_info( 0, OptionType::kUnknown, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, [](const ConfigOptions& /*opts*/, const std::string& name, const std::string& value, char* addr) { auto ptr = reinterpret_cast(addr); if (name == "Oops") { return Status::InvalidArgument(value); } else { *ptr = value + " " + name; return Status::OK(); } }); ConfigOptions config_options; std::string base; ASSERT_OK(opt_info.Parse(config_options, "World", "Hello", reinterpret_cast(&base))); ASSERT_EQ(base, "Hello World"); ASSERT_NOK(opt_info.Parse(config_options, "Oops", "Hello", reinterpret_cast(&base))); } TEST_F(OptionTypeInfoTest, TestSerializeFunc) { OptionTypeInfo opt_info( 0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, nullptr, [](const ConfigOptions& /*opts*/, const std::string& name, const char* /*addr*/, std::string* value) { if (name == "Oops") { return Status::InvalidArgument(name); } else { *value = name; return Status::OK(); } }, nullptr); ConfigOptions config_options; std::string base; std::string value; ASSERT_OK(opt_info.Serialize(config_options, "Hello", reinterpret_cast(&base), &value)); ASSERT_EQ(value, "Hello"); ASSERT_NOK(opt_info.Serialize(config_options, "Oops", reinterpret_cast(&base), &value)); } TEST_F(OptionTypeInfoTest, TestEqualsFunc) { OptionTypeInfo opt_info( 0, OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, nullptr, nullptr, [](const ConfigOptions& /*opts*/, const std::string& name, const char* addr1, const char* addr2, std::string* mismatch) { auto i1 = *(reinterpret_cast(addr1)); auto i2 = *(reinterpret_cast(addr2)); if (name == "LT") { return i1 < i2; } else if (name == "GT") { return i1 > i2; } else if (name == "EQ") { return i1 == i2; } else { *mismatch = name + "???"; return false; } }); ConfigOptions config_options; int int1 = 100; int int2 = 200; std::string mismatch; ASSERT_TRUE(opt_info.AreEqual( config_options, "LT", reinterpret_cast(&int1), reinterpret_cast(&int2), &mismatch)); ASSERT_EQ(mismatch, ""); ASSERT_FALSE(opt_info.AreEqual(config_options, "GT", reinterpret_cast(&int1), reinterpret_cast(&int2), &mismatch)); ASSERT_EQ(mismatch, "GT"); ASSERT_FALSE(opt_info.AreEqual(config_options, "NO", reinterpret_cast(&int1), reinterpret_cast(&int2), &mismatch)); ASSERT_EQ(mismatch, "NO???"); } TEST_F(OptionTypeInfoTest, TestOptionFlags) { OptionTypeInfo opt_none(0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kDontSerialize, 0); OptionTypeInfo opt_never(0, OptionType::kString, OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever, 0); OptionTypeInfo opt_alias(0, OptionType::kString, OptionVerificationType::kAlias, OptionTypeFlags::kNone, 0); OptionTypeInfo opt_deprecated(0, OptionType::kString, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0); ConfigOptions config_options; std::string base = "base"; std::string comp = "comp"; // If marked string none, the serialization returns okay but does nothing ASSERT_OK(opt_none.Serialize(config_options, "None", reinterpret_cast(&base), &base)); // If marked never compare, they match even when they do not ASSERT_TRUE(opt_never.AreEqual(config_options, "Never", reinterpret_cast(&base), reinterpret_cast(&comp), &base)); ASSERT_FALSE(opt_none.AreEqual(config_options, "Never", reinterpret_cast(&base), reinterpret_cast(&comp), &base)); // An alias can change the value via parse, but does nothing on serialize on // match std::string result; ASSERT_OK(opt_alias.Parse(config_options, "Alias", "Alias", reinterpret_cast(&base))); ASSERT_OK(opt_alias.Serialize(config_options, "Alias", reinterpret_cast(&base), &result)); ASSERT_TRUE(opt_alias.AreEqual(config_options, "Alias", reinterpret_cast(&base), reinterpret_cast(&comp), &result)); ASSERT_EQ(base, "Alias"); ASSERT_NE(base, comp); // Deprecated options do nothing on any of the commands ASSERT_OK(opt_deprecated.Parse(config_options, "Alias", "Deprecated", reinterpret_cast(&base))); ASSERT_OK(opt_deprecated.Serialize(config_options, "Alias", reinterpret_cast(&base), &result)); ASSERT_TRUE(opt_deprecated.AreEqual(config_options, "Alias", reinterpret_cast(&base), reinterpret_cast(&comp), &result)); ASSERT_EQ(base, "Alias"); ASSERT_NE(base, comp); } TEST_F(OptionTypeInfoTest, TestCustomEnum) { enum TestEnum { kA, kB, kC }; std::unordered_map enum_map = { {"A", TestEnum::kA}, {"B", TestEnum::kB}, {"C", TestEnum::kC}, }; OptionTypeInfo opt_info = OptionTypeInfo::Enum(0, &enum_map); TestEnum e1, e2; ConfigOptions config_options; std::string result, mismatch; e2 = TestEnum::kA; ASSERT_OK( opt_info.Parse(config_options, "", "B", reinterpret_cast(&e1))); ASSERT_OK(opt_info.Serialize(config_options, "", reinterpret_cast(&e1), &result)); ASSERT_EQ(e1, TestEnum::kB); ASSERT_EQ(result, "B"); ASSERT_FALSE(opt_info.AreEqual(config_options, "Enum", reinterpret_cast(&e1), reinterpret_cast(&e2), &mismatch)); ASSERT_EQ(mismatch, "Enum"); TestAndCompareOption(config_options, opt_info, "", "C", reinterpret_cast(&e1), reinterpret_cast(&e2)); ASSERT_EQ(e2, TestEnum::kC); ASSERT_NOK( opt_info.Parse(config_options, "", "D", reinterpret_cast(&e1))); ASSERT_EQ(e1, TestEnum::kC); } TEST_F(OptionTypeInfoTest, TestBuiltinEnum) { ConfigOptions config_options; for (auto iter : OptionsHelper::compaction_style_string_map) { CompactionStyle e1, e2; TestAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kCompactionStyle), "CompactionStyle", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } for (auto iter : OptionsHelper::compaction_pri_string_map) { CompactionPri e1, e2; TestAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kCompactionPri), "CompactionPri", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } for (auto iter : OptionsHelper::compression_type_string_map) { CompressionType e1, e2; TestAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kCompressionType), "CompressionType", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } for (auto iter : OptionsHelper::compaction_stop_style_string_map) { CompactionStopStyle e1, e2; TestAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kCompactionStopStyle), "CompactionStopStyle", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } for (auto iter : OptionsHelper::checksum_type_string_map) { ChecksumType e1, e2; TestAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kChecksumType), "CheckSumType", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } for (auto iter : OptionsHelper::encoding_type_string_map) { EncodingType e1, e2; TestAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kEncodingType), "EncodingType", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } } TEST_F(OptionTypeInfoTest, TestStruct) { struct Basic { int i = 42; std::string s = "Hello"; }; struct Extended { int j = 11; Basic b; }; std::unordered_map basic_type_map = { {"i", {offsetof(struct Basic, i), OptionType::kInt}}, {"s", {offsetof(struct Basic, s), OptionType::kString}}, }; OptionTypeInfo basic_info = OptionTypeInfo::Struct( "b", &basic_type_map, 0, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, 0); std::unordered_map extended_type_map = { {"j", {offsetof(struct Extended, j), OptionType::kInt}}, {"b", OptionTypeInfo::Struct( "b", &basic_type_map, offsetof(struct Extended, b), OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0)}, {"m", OptionTypeInfo::Struct( "m", &basic_type_map, offsetof(struct Extended, b), OptionVerificationType::kNormal, OptionTypeFlags::kMutable, offsetof(struct Extended, b))}, }; OptionTypeInfo extended_info = OptionTypeInfo::Struct( "e", &extended_type_map, 0, OptionVerificationType::kNormal, OptionTypeFlags::kMutable, 0); Extended e1, e2; ConfigOptions config_options; std::string mismatch; TestAndCompareOption(config_options, basic_info, "b", "{i=33;s=33}", &e1.b, &e2.b); ASSERT_EQ(e1.b.i, 33); ASSERT_EQ(e1.b.s, "33"); TestAndCompareOption(config_options, basic_info, "b.i", "44", &e1.b, &e2.b); ASSERT_EQ(e1.b.i, 44); TestAndCompareOption(config_options, basic_info, "i", "55", &e1.b, &e2.b); ASSERT_EQ(e1.b.i, 55); e1.b.i = 0; auto e1bc = reinterpret_cast(&e1.b); auto e2bc = reinterpret_cast(&e2.b); ASSERT_FALSE(basic_info.AreEqual(config_options, "b", e1bc, e2bc, &mismatch)); ASSERT_EQ(mismatch, "b.i"); mismatch.clear(); ASSERT_FALSE( basic_info.AreEqual(config_options, "b.i", e1bc, e2bc, &mismatch)); ASSERT_EQ(mismatch, "b.i"); mismatch.clear(); ASSERT_FALSE(basic_info.AreEqual(config_options, "i", e1bc, e2bc, &mismatch)); ASSERT_EQ(mismatch, "b.i"); mismatch.clear(); e1 = e2; ASSERT_NOK(basic_info.Parse(config_options, "b", "{i=33;s=33;j=44}", e1bc)); ASSERT_TRUE( basic_info.AreEqual(config_options, "b.i", e1bc, e2bc, &mismatch)); ASSERT_NOK(basic_info.Parse(config_options, "b.j", "44", e1bc)); ASSERT_TRUE( basic_info.AreEqual(config_options, "b.i", e1bc, e2bc, &mismatch)); ASSERT_NOK(basic_info.Parse(config_options, "j", "44", e1bc)); ASSERT_TRUE( basic_info.AreEqual(config_options, "b.i", e1bc, e2bc, &mismatch)); TestAndCompareOption(config_options, extended_info, "e", "b={i=55;s=55}; j=22;", &e1, &e2); ASSERT_EQ(e1.b.i, 55); ASSERT_EQ(e1.j, 22); ASSERT_EQ(e1.b.s, "55"); TestAndCompareOption(config_options, extended_info, "e.b", "{i=66;s=66;}", &e1, &e2); ASSERT_EQ(e1.b.i, 66); ASSERT_EQ(e1.j, 22); ASSERT_EQ(e1.b.s, "66"); TestAndCompareOption(config_options, extended_info, "e.b.i", "77", &e1, &e2); ASSERT_EQ(e1.b.i, 77); ASSERT_EQ(e1.j, 22); ASSERT_EQ(e1.b.s, "66"); } TEST_F(OptionTypeInfoTest, TestVectorType) { OptionTypeInfo vec_info = OptionTypeInfo::Vector( 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, {0, OptionType::kString}); std::vector vec1, vec2; std::string mismatch; ConfigOptions config_options; TestAndCompareOption(config_options, vec_info, "v", "a:b:c:d", &vec1, &vec2); ASSERT_EQ(vec1.size(), 4); ASSERT_EQ(vec1[0], "a"); ASSERT_EQ(vec1[1], "b"); ASSERT_EQ(vec1[2], "c"); ASSERT_EQ(vec1[3], "d"); vec1[3] = "e"; ASSERT_FALSE(vec_info.AreEqual(config_options, "v", reinterpret_cast(&vec1), reinterpret_cast(&vec2), &mismatch)); ASSERT_EQ(mismatch, "v"); // Test vectors with inner brackets TestAndCompareOption(config_options, vec_info, "v", "a:{b}:c:d", &vec1, &vec2); ASSERT_EQ(vec1.size(), 4); ASSERT_EQ(vec1[0], "a"); ASSERT_EQ(vec1[1], "b"); ASSERT_EQ(vec1[2], "c"); ASSERT_EQ(vec1[3], "d"); OptionTypeInfo bar_info = OptionTypeInfo::Vector( 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, {0, OptionType::kString}, '|'); TestAndCompareOption(config_options, vec_info, "v", "x|y|z", &vec1, &vec2); // Test vectors with inner vector TestAndCompareOption(config_options, bar_info, "v", "a|{b1|b2}|{c1|c2|{d1|d2}}", &vec1, &vec2); ASSERT_EQ(vec1.size(), 3); ASSERT_EQ(vec1[0], "a"); ASSERT_EQ(vec1[1], "b1|b2"); ASSERT_EQ(vec1[2], "c1|c2|{d1|d2}"); } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); #ifdef GFLAGS ParseCommandLineFlags(&argc, &argv, true); #endif // GFLAGS return RUN_ALL_TESTS(); } rocksdb-6.11.4/options/options_type.h000066400000000000000000000727041370372246700176570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "rocksdb/convenience.h" #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class OptionTypeInfo; enum class OptionType { kBoolean, kInt, kInt32T, kInt64T, kUInt, kUInt32T, kUInt64T, kSizeT, kString, kDouble, kCompactionStyle, kCompactionPri, kSliceTransform, kCompressionType, kTableFactory, kComparator, kCompactionFilter, kCompactionFilterFactory, kCompactionStopStyle, kMergeOperator, kMemTableRepFactory, kFilterPolicy, kFlushBlockPolicyFactory, kChecksumType, kEncodingType, kEnv, kEnum, kStruct, kVector, kUnknown, }; enum class OptionVerificationType { kNormal, kByName, // The option is pointer typed so we can only verify // based on it's name. kByNameAllowNull, // Same as kByName, but it also allows the case // where one of them is a nullptr. kByNameAllowFromNull, // Same as kByName, but it also allows the case // where the old option is nullptr. kDeprecated, // The option is no longer used in rocksdb. The RocksDB // OptionsParser will still accept this option if it // happen to exists in some Options file. However, // the parser will not include it in serialization // and verification processes. kAlias, // This option represents is a name/shortcut for // another option and should not be written or verified // independently }; enum class OptionTypeFlags : uint32_t { kNone = 0x00, // No flags kCompareDefault = 0x0, kCompareNever = ConfigOptions::kSanityLevelNone, kCompareLoose = ConfigOptions::kSanityLevelLooselyCompatible, kCompareExact = ConfigOptions::kSanityLevelExactMatch, kMutable = 0x0100, // Option is mutable kDontSerialize = 0x2000, // Don't serialize the option }; inline OptionTypeFlags operator|(const OptionTypeFlags &a, const OptionTypeFlags &b) { return static_cast(static_cast(a) | static_cast(b)); } inline OptionTypeFlags operator&(const OptionTypeFlags &a, const OptionTypeFlags &b) { return static_cast(static_cast(a) & static_cast(b)); } // Converts an string into its enumerated value. // @param type_map Mapping between strings and enum values // @param type The string representation of the enum // @param value Returns the enum value represented by the string // @return true if the string was found in the enum map, false otherwise. template bool ParseEnum(const std::unordered_map& type_map, const std::string& type, T* value) { auto iter = type_map.find(type); if (iter != type_map.end()) { *value = iter->second; return true; } return false; } // Converts an enum into its string representation. // @param type_map Mapping between strings and enum values // @param type The enum // @param value Returned as the string representation of the enum // @return true if the enum was found in the enum map, false otherwise. template bool SerializeEnum(const std::unordered_map& type_map, const T& type, std::string* value) { for (const auto& pair : type_map) { if (pair.second == type) { *value = pair.first; return true; } } return false; } template Status ParseVector(const ConfigOptions& config_options, const OptionTypeInfo& elem_info, char separator, const std::string& name, const std::string& value, std::vector* result); template Status SerializeVector(const ConfigOptions& config_options, const OptionTypeInfo& elem_info, char separator, const std::string& name, const std::vector& vec, std::string* value); template bool VectorsAreEqual(const ConfigOptions& config_options, const OptionTypeInfo& elem_info, const std::string& name, const std::vector& vec1, const std::vector& vec2, std::string* mismatch); // Function for converting a option string value into its underlying // representation in "addr" // On success, Status::OK is returned and addr is set to the parsed form // On failure, a non-OK status is returned // @param opts The ConfigOptions controlling how the value is parsed // @param name The name of the options being parsed // @param value The string representation of the option // @param addr Pointer to the object using ParseFunc = std::function; // Function for converting an option "addr" into its string representation. // On success, Status::OK is returned and value is the serialized form. // On failure, a non-OK status is returned // @param opts The ConfigOptions controlling how the values are serialized // @param name The name of the options being serialized // @param addr Pointer to the value being serialized // @param value The result of the serialization. using SerializeFunc = std::function; // Function for comparing two option values // If they are not equal, updates "mismatch" with the name of the bad option // @param opts The ConfigOptions controlling how the values are compared // @param name The name of the options being compared // @param addr1 The first address to compare // @param addr2 The address to compare to // @param mismatch If the values are not equal, the name of the option that // first differs using EqualsFunc = std::function; // A struct for storing constant option information such as option name, // option type, and offset. class OptionTypeInfo { public: int offset_; int mutable_offset_; // A simple "normal", non-mutable Type "type" at offset OptionTypeInfo(int offset, OptionType type) : offset_(offset), mutable_offset_(0), parse_func_(nullptr), serialize_func_(nullptr), equals_func_(nullptr), type_(type), verification_(OptionVerificationType::kNormal), flags_(OptionTypeFlags::kNone) {} // A simple "normal", mutable Type "type" at offset OptionTypeInfo(int offset, OptionType type, int mutable_offset) : offset_(offset), mutable_offset_(mutable_offset), parse_func_(nullptr), serialize_func_(nullptr), equals_func_(nullptr), type_(type), verification_(OptionVerificationType::kNormal), flags_(OptionTypeFlags::kMutable) {} OptionTypeInfo(int offset, OptionType type, OptionVerificationType verification, OptionTypeFlags flags, int mutable_offset) : offset_(offset), mutable_offset_(mutable_offset), parse_func_(nullptr), serialize_func_(nullptr), equals_func_(nullptr), type_(type), verification_(verification), flags_(flags) {} OptionTypeInfo(int offset, OptionType type, OptionVerificationType verification, OptionTypeFlags flags, int mutable_offset, const ParseFunc& parse_func) : offset_(offset), mutable_offset_(mutable_offset), parse_func_(parse_func), serialize_func_(nullptr), equals_func_(nullptr), type_(type), verification_(verification), flags_(flags) {} OptionTypeInfo(int offset, OptionType type, OptionVerificationType verification, OptionTypeFlags flags, int mutable_offset, const ParseFunc& parse_func, const SerializeFunc& serialize_func, const EqualsFunc& equals_func) : offset_(offset), mutable_offset_(mutable_offset), parse_func_(parse_func), serialize_func_(serialize_func), equals_func_(equals_func), type_(type), verification_(verification), flags_(flags) {} // Creates an OptionTypeInfo for an enum type. Enums use an additional // map to convert the enums to/from their string representation. // To create an OptionTypeInfo that is an Enum, one should: // - Create a static map of string values to the corresponding enum value // - Call this method passing the static map in as a parameter. // Note that it is not necessary to add a new OptionType or make any // other changes -- the returned object handles parsing, serialiation, and // comparisons. // // @param offset The offset in the option object for this enum // @param map The string to enum mapping for this enum template static OptionTypeInfo Enum( int offset, const std::unordered_map* const map) { return OptionTypeInfo( offset, OptionType::kEnum, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, // Uses the map argument to convert the input string into // its corresponding enum value. If value is found in the map, // addr is updated to the corresponding map entry. // @return OK if the value is found in the map // @return InvalidArgument if the value is not found in the map [map](const ConfigOptions&, const std::string& name, const std::string& value, char* addr) { if (map == nullptr) { return Status::NotSupported("No enum mapping ", name); } else if (ParseEnum(*map, value, reinterpret_cast(addr))) { return Status::OK(); } else { return Status::InvalidArgument("No mapping for enum ", name); } }, // Uses the map argument to convert the input enum into // its corresponding string value. If enum value is found in the map, // value is updated to the corresponding string value in the map. // @return OK if the enum is found in the map // @return InvalidArgument if the enum is not found in the map [map](const ConfigOptions&, const std::string& name, const char* addr, std::string* value) { if (map == nullptr) { return Status::NotSupported("No enum mapping ", name); } else if (SerializeEnum(*map, (*reinterpret_cast(addr)), value)) { return Status::OK(); } else { return Status::InvalidArgument("No mapping for enum ", name); } }, // Casts addr1 and addr2 to the enum type and returns true if // they are equal, false otherwise. [](const ConfigOptions&, const std::string&, const char* addr1, const char* addr2, std::string*) { return (*reinterpret_cast(addr1) == *reinterpret_cast(addr2)); }); } // End OptionTypeInfo::Enum // Creates an OptionTypeInfo for a Struct type. Structs have a // map of string-OptionTypeInfo associated with them that describes how // to process the object for parsing, serializing, and matching. // Structs also have a struct_name, which is the name of the object // as registered in the parent map. // When processing a struct, the option name can be specified as: // - Meaning to process the entire struct. // - Meaning to process the single field // - Process the single fields // The CompactionOptionsFIFO, CompactionOptionsUniversal, and LRUCacheOptions // are all examples of Struct options. // // To create an OptionTypeInfo that is a Struct, one should: // - Create a static map of string-OptionTypeInfo corresponding to the // properties of the object that can be set via the options. // - Call this method passing the name and map in as parameters. // Note that it is not necessary to add a new OptionType or make any // other changes -- the returned object handles parsing, serialization, and // comparisons. // // @param offset The offset in the option object for this enum // @param map The string to enum mapping for this enum static OptionTypeInfo Struct( const std::string& struct_name, const std::unordered_map* struct_map, int offset, OptionVerificationType verification, OptionTypeFlags flags, int mutable_offset) { return OptionTypeInfo( offset, OptionType::kStruct, verification, flags, mutable_offset, // Parses the struct and updates the fields at addr [struct_name, struct_map](const ConfigOptions& opts, const std::string& name, const std::string& value, char* addr) { return ParseStruct(opts, struct_name, struct_map, name, value, addr); }, // Serializes the struct options into value [struct_name, struct_map](const ConfigOptions& opts, const std::string& name, const char* addr, std::string* value) { return SerializeStruct(opts, struct_name, struct_map, name, addr, value); }, // Compares the struct fields of addr1 and addr2 for equality [struct_name, struct_map](const ConfigOptions& opts, const std::string& name, const char* addr1, const char* addr2, std::string* mismatch) { return StructsAreEqual(opts, struct_name, struct_map, name, addr1, addr2, mismatch); }); } static OptionTypeInfo Struct( const std::string& struct_name, const std::unordered_map* struct_map, int offset, OptionVerificationType verification, OptionTypeFlags flags, int mutable_offset, const ParseFunc& parse_func) { return OptionTypeInfo( offset, OptionType::kStruct, verification, flags, mutable_offset, parse_func, [struct_name, struct_map](const ConfigOptions& opts, const std::string& name, const char* addr, std::string* value) { return SerializeStruct(opts, struct_name, struct_map, name, addr, value); }, [struct_name, struct_map](const ConfigOptions& opts, const std::string& name, const char* addr1, const char* addr2, std::string* mismatch) { return StructsAreEqual(opts, struct_name, struct_map, name, addr1, addr2, mismatch); }); } template static OptionTypeInfo Vector(int _offset, OptionVerificationType _verification, OptionTypeFlags _flags, int _mutable_offset, const OptionTypeInfo& elem_info, char separator = ':') { return OptionTypeInfo( _offset, OptionType::kVector, _verification, _flags, _mutable_offset, [elem_info, separator](const ConfigOptions& opts, const std::string& name, const std::string& value, char* addr) { auto result = reinterpret_cast*>(addr); return ParseVector(opts, elem_info, separator, name, value, result); }, [elem_info, separator](const ConfigOptions& opts, const std::string& name, const char* addr, std::string* value) { const auto& vec = *(reinterpret_cast*>(addr)); return SerializeVector(opts, elem_info, separator, name, vec, value); }, [elem_info](const ConfigOptions& opts, const std::string& name, const char* addr1, const char* addr2, std::string* mismatch) { const auto& vec1 = *(reinterpret_cast*>(addr1)); const auto& vec2 = *(reinterpret_cast*>(addr2)); return VectorsAreEqual(opts, elem_info, name, vec1, vec2, mismatch); }); } bool IsEnabled(OptionTypeFlags otf) const { return (flags_ & otf) == otf; } bool IsMutable() const { return IsEnabled(OptionTypeFlags::kMutable); } bool IsDeprecated() const { return IsEnabled(OptionVerificationType::kDeprecated); } // Returns true if the option is marked as an Alias. // Aliases are valid options that are parsed but are not converted to strings // or compared. bool IsAlias() const { return IsEnabled(OptionVerificationType::kAlias); } bool IsEnabled(OptionVerificationType ovf) const { return verification_ == ovf; } // Returns the sanity level for comparing the option. // If the options should not be compared, returns None // If the option has a compare flag, returns it. // Otherwise, returns "exact" ConfigOptions::SanityLevel GetSanityLevel() const { if (IsDeprecated() || IsAlias()) { return ConfigOptions::SanityLevel::kSanityLevelNone; } else { auto match = (flags_ & OptionTypeFlags::kCompareExact); if (match == OptionTypeFlags::kCompareDefault) { return ConfigOptions::SanityLevel::kSanityLevelExactMatch; } else { return (ConfigOptions::SanityLevel)match; } } } // Returns true if the option should be serialized. // Options should be serialized if the are not deprecated, aliases, // or marked as "Don't Serialize". bool ShouldSerialize() const { if (IsDeprecated() || IsAlias()) { return false; } else if (IsEnabled(OptionTypeFlags::kDontSerialize)) { return false; } else { return true; } } bool IsByName() const { return (verification_ == OptionVerificationType::kByName || verification_ == OptionVerificationType::kByNameAllowNull || verification_ == OptionVerificationType::kByNameAllowFromNull); } bool IsStruct() const { return (type_ == OptionType::kStruct); } // Parses the option in "opt_value" according to the rules of this class // and updates the value at "opt_addr". // On success, Status::OK() is returned. On failure: // NotFound means the opt_name is not valid for this option // NotSupported means we do not know how to parse the value for this option // InvalidArgument means the opt_value is not valid for this option. Status Parse(const ConfigOptions& config_options, const std::string& opt_name, const std::string& opt_value, char* opt_addr) const; // Serializes the option in "opt_addr" according to the rules of this class // into the value at "opt_value". Status Serialize(const ConfigOptions& config_options, const std::string& opt_name, const char* opt_addr, std::string* opt_value) const; // Compares the "addr1" and "addr2" values according to the rules of this // class and returns true if they match. On a failed match, mismatch is the // name of the option that failed to match. bool AreEqual(const ConfigOptions& config_options, const std::string& opt_name, const char* addr1, const char* addr2, std::string* mismatch) const; // Used to override the match rules for "ByName" options. bool AreEqualByName(const ConfigOptions& config_options, const std::string& opt_name, const char* this_offset, const char* that_offset) const; bool AreEqualByName(const ConfigOptions& config_options, const std::string& opt_name, const char* this_ptr, const std::string& that_value) const; // Parses the input value according to the map for the struct at opt_addr // struct_name is the name of the struct option as registered // opt_name is the name of the option being evaluated. This may // be the whole struct or a sub-element of it, based on struct_name and // opt_name. static Status ParseStruct( const ConfigOptions& config_options, const std::string& struct_name, const std::unordered_map* map, const std::string& opt_name, const std::string& value, char* opt_addr); // Serializes the input addr according to the map for the struct to value. // struct_name is the name of the struct option as registered // opt_name is the name of the option being evaluated. This may // be the whole struct or a sub-element of it static Status SerializeStruct( const ConfigOptions& config_options, const std::string& struct_name, const std::unordered_map* map, const std::string& opt_name, const char* opt_addr, std::string* value); // Compares the input offsets according to the map for the struct and returns // true if they are equivalent, false otherwise. // struct_name is the name of the struct option as registered // opt_name is the name of the option being evaluated. This may // be the whole struct or a sub-element of it static bool StructsAreEqual( const ConfigOptions& config_options, const std::string& struct_name, const std::unordered_map* map, const std::string& opt_name, const char* this_offset, const char* that_offset, std::string* mismatch); // Finds the entry for the opt_name in the opt_map, returning // nullptr if not found. // If found, elem_name will be the name of option to find. // This may be opt_name, or a substring of opt_name. // For "simple" options, opt_name will be equal to elem_name. Given the // opt_name "opt", elem_name will equal "opt". // For "embedded" options (like structs), elem_name may be opt_name // or a field within the opt_name. For example, given the struct "struct", // and opt_name of "struct.field", elem_name will be "field" static const OptionTypeInfo* Find( const std::string& opt_name, const std::unordered_map& opt_map, std::string* elem_name); // Returns the next token marked by the delimiter from "opts" after start in // token and updates end to point to where that token stops. Delimiters inside // of braces are ignored. Returns OK if a token is found and an error if the // input opts string is mis-formatted. // Given "a=AA;b=BB;" start=2 and delimiter=";", token is "AA" and end points // to "b" Given "{a=A;b=B}", the token would be "a=A;b=B" // // @param opts The string in which to find the next token // @param delimiter The delimiter between tokens // @param start The position in opts to start looking for the token // @parem ed Returns the end position in opts of the token // @param token Returns the token // @returns OK if a token was found // @return InvalidArgument if the braces mismatch // (e.g. "{a={b=c;}" ) -- missing closing brace // @return InvalidArgument if an expected delimiter is not found // e.g. "{a=b}c=d;" -- missing delimiter before "c" static Status NextToken(const std::string& opts, char delimiter, size_t start, size_t* end, std::string* token); private: // The optional function to convert a string to its representation ParseFunc parse_func_; // The optional function to convert a value to its string representation SerializeFunc serialize_func_; // The optional function to match two option values EqualsFunc equals_func_; OptionType type_; OptionVerificationType verification_; OptionTypeFlags flags_; }; // Parses the input value into elements of the result vector. This method // will break the input value into the individual tokens (based on the // separator), where each of those tokens will be parsed based on the rules of // elem_info. The result vector will be populated with elements based on the // input tokens. For example, if the value=1:2:3:4:5 and elem_info parses // integers, the result vector will contain the integers 1,2,3,4,5 // @param config_options Controls how the option value is parsed. // @param elem_info Controls how individual tokens in value are parsed // @param separator Character separating tokens in values (':' in the above // example) // @param name The name associated with this vector option // @param value The input string to parse into tokens // @param result Returns the results of parsing value into its elements. // @return OK if the value was successfully parse // @return InvalidArgument if the value is improperly formed or if the token // could not be parsed // @return NotFound If the tokenized value contains unknown options for // its type template Status ParseVector(const ConfigOptions& config_options, const OptionTypeInfo& elem_info, char separator, const std::string& name, const std::string& value, std::vector* result) { result->clear(); Status status; for (size_t start = 0, end = 0; status.ok() && start < value.size() && end != std::string::npos; start = end + 1) { std::string token; status = OptionTypeInfo::NextToken(value, separator, start, &end, &token); if (status.ok()) { T elem; status = elem_info.Parse(config_options, name, token, reinterpret_cast(&elem)); if (status.ok()) { result->emplace_back(elem); } } } return status; } // Serializes the input vector into its output value. Elements are // separated by the separator character. This element will convert all of the // elements in vec into their serialized form, using elem_info to perform the // serialization. // For example, if the vec contains the integers 1,2,3,4,5 and elem_info // serializes the output would be 1:2:3:4:5 for separator ":". // @param config_options Controls how the option value is serialized. // @param elem_info Controls how individual tokens in value are serialized // @param separator Character separating tokens in value (':' in the above // example) // @param name The name associated with this vector option // @param vec The input vector to serialize // @param value The output string of serialized options // @return OK if the value was successfully parse // @return InvalidArgument if the value is improperly formed or if the token // could not be parsed // @return NotFound If the tokenized value contains unknown options for // its type template Status SerializeVector(const ConfigOptions& config_options, const OptionTypeInfo& elem_info, char separator, const std::string& name, const std::vector& vec, std::string* value) { std::string result; ConfigOptions embedded = config_options; embedded.delimiter = ";"; for (size_t i = 0; i < vec.size(); ++i) { std::string elem_str; Status s = elem_info.Serialize( embedded, name, reinterpret_cast(&vec[i]), &elem_str); if (!s.ok()) { return s; } else { if (i > 0) { result += separator; } // If the element contains embedded separators, put it inside of brackets if (result.find(separator) != std::string::npos) { result += "{" + elem_str + "}"; } else { result += elem_str; } } } if (result.find("=") != std::string::npos) { *value = "{" + result + "}"; } else { *value = result; } return Status::OK(); } // Compares the input vectors vec1 and vec2 for equality // If the vectors are the same size, elements of the vectors are compared one by // one using elem_info to perform the comparison. // // @param config_options Controls how the vectors are compared. // @param elem_info Controls how individual elements in the vectors are compared // @param name The name associated with this vector option // @param vec1,vec2 The vectors to compare. // @param mismatch If the vectors are not equivalent, mismatch will point to // the first // element of the comparison tht did not match. // @return true If vec1 and vec2 are "equal", false otherwise template bool VectorsAreEqual(const ConfigOptions& config_options, const OptionTypeInfo& elem_info, const std::string& name, const std::vector& vec1, const std::vector& vec2, std::string* mismatch) { if (vec1.size() != vec2.size()) { *mismatch = name; return false; } else { for (size_t i = 0; i < vec1.size(); ++i) { if (!elem_info.AreEqual( config_options, name, reinterpret_cast(&vec1[i]), reinterpret_cast(&vec2[i]), mismatch)) { return false; } } return true; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/000077500000000000000000000000001370372246700142315ustar00rootroot00000000000000rocksdb-6.11.4/port/README000066400000000000000000000006251370372246700151140ustar00rootroot00000000000000This directory contains interfaces and implementations that isolate the rest of the package from platform details. Code in the rest of the package includes "port.h" from this directory. "port.h" in turn includes a platform specific "port_.h" file that provides the platform specific implementation. See port_posix.h for an example of what must be provided in a platform specific header file. rocksdb-6.11.4/port/jemalloc_helper.h000066400000000000000000000067671370372246700175470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #if defined(__clang__) // glibc's `posix_memalign()` declaration specifies `throw()` while clang's // declaration does not. There is a hack in clang to make its re-declaration // compatible with glibc's if they are declared consecutively. That hack breaks // if yet another `posix_memalign()` declaration comes between glibc's and // clang's declarations. Include "mm_malloc.h" here ensures glibc's and clang's // declarations both come before "jemalloc.h"'s `posix_memalign()` declaration. // // This problem could also be avoided if "jemalloc.h"'s `posix_memalign()` // declaration did not specify `throw()` when built with clang. #include #endif #ifdef ROCKSDB_JEMALLOC #ifdef __FreeBSD__ #include #else #define JEMALLOC_MANGLE #include #endif #ifndef JEMALLOC_CXX_THROW #define JEMALLOC_CXX_THROW #endif #if defined(OS_WIN) && defined(_MSC_VER) // MSVC does not have weak symbol support. As long as ROCKSDB_JEMALLOC is // defined, Jemalloc memory allocator is used. static inline bool HasJemalloc() { return true; } #else // Declare non-standard jemalloc APIs as weak symbols. We can null-check these // symbols to detect whether jemalloc is linked with the binary. extern "C" void* mallocx(size_t, int) __attribute__((__nothrow__, __weak__)); extern "C" void* rallocx(void*, size_t, int) __attribute__((__nothrow__, __weak__)); extern "C" size_t xallocx(void*, size_t, size_t, int) __attribute__((__nothrow__, __weak__)); extern "C" size_t sallocx(const void*, int) __attribute__((__nothrow__, __weak__)); extern "C" void dallocx(void*, int) __attribute__((__nothrow__, __weak__)); extern "C" void sdallocx(void*, size_t, int) __attribute__((__nothrow__, __weak__)); extern "C" size_t nallocx(size_t, int) __attribute__((__nothrow__, __weak__)); extern "C" int mallctl(const char*, void*, size_t*, void*, size_t) __attribute__((__nothrow__, __weak__)); extern "C" int mallctlnametomib(const char*, size_t*, size_t*) __attribute__((__nothrow__, __weak__)); extern "C" int mallctlbymib(const size_t*, size_t, void*, size_t*, void*, size_t) __attribute__((__nothrow__, __weak__)); extern "C" void malloc_stats_print(void (*)(void*, const char*), void*, const char*) __attribute__((__nothrow__, __weak__)); extern "C" size_t malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void*) JEMALLOC_CXX_THROW __attribute__((__weak__)); // Check if Jemalloc is linked with the binary. Note the main program might be // using a different memory allocator even this method return true. // It is loosely based on folly::usingJEMalloc(), minus the check that actually // allocate memory and see if it is through jemalloc, to handle the dlopen() // case: // https://github.com/facebook/folly/blob/76cf8b5841fb33137cfbf8b224f0226437c855bc/folly/memory/Malloc.h#L147 static inline bool HasJemalloc() { return mallocx != nullptr && rallocx != nullptr && xallocx != nullptr && sallocx != nullptr && dallocx != nullptr && sdallocx != nullptr && nallocx != nullptr && mallctl != nullptr && mallctlnametomib != nullptr && mallctlbymib != nullptr && malloc_stats_print != nullptr && malloc_usable_size != nullptr; } #endif #endif // ROCKSDB_JEMALLOC rocksdb-6.11.4/port/lang.h000066400000000000000000000010301370372246700153150ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef FALLTHROUGH_INTENDED #if defined(__clang__) #define FALLTHROUGH_INTENDED [[clang::fallthrough]] #elif defined(__GNUC__) && __GNUC__ >= 7 #define FALLTHROUGH_INTENDED [[gnu::fallthrough]] #else #define FALLTHROUGH_INTENDED do {} while (0) #endif #endif rocksdb-6.11.4/port/likely.h000066400000000000000000000012621370372246700156740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #if defined(__GNUC__) && __GNUC__ >= 4 #define LIKELY(x) (__builtin_expect((x), 1)) #define UNLIKELY(x) (__builtin_expect((x), 0)) #else #define LIKELY(x) (x) #define UNLIKELY(x) (x) #endif rocksdb-6.11.4/port/malloc.h000066400000000000000000000012201370372246700156440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifdef ROCKSDB_MALLOC_USABLE_SIZE #ifdef OS_FREEBSD #include #else #include #endif // OS_FREEBSD #endif // ROCKSDB_MALLOC_USABLE_SIZE rocksdb-6.11.4/port/port.h000066400000000000000000000015021370372246700153640ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include // Include the appropriate platform specific file below. If you are // porting to a new platform, see "port_example.h" for documentation // of what the new port_.h file must provide. #if defined(ROCKSDB_PLATFORM_POSIX) #include "port/port_posix.h" #elif defined(OS_WIN) #include "port/win/port_win.h" #endif rocksdb-6.11.4/port/port_dirent.h000066400000000000000000000020411370372246700167300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // See port_example.h for documentation for the following types/functions. #pragma once #ifdef ROCKSDB_PLATFORM_POSIX #include #include #elif defined(OS_WIN) namespace ROCKSDB_NAMESPACE { namespace port { struct dirent { char d_name[_MAX_PATH]; /* filename */ }; struct DIR; DIR* opendir(const char* name); dirent* readdir(DIR* dirp); int closedir(DIR* dirp); } // namespace port using port::dirent; using port::DIR; using port::opendir; using port::readdir; using port::closedir; } // namespace ROCKSDB_NAMESPACE #endif // OS_WIN rocksdb-6.11.4/port/port_example.h000066400000000000000000000070511370372246700171040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // This file contains the specification, but not the implementations, // of the types/operations/etc. that should be defined by a platform // specific port_.h file. Use this file as a reference for // how to port this package to a new platform. #pragma once namespace ROCKSDB_NAMESPACE { namespace port { // TODO(jorlow): Many of these belong more in the environment class rather than // here. We should try moving them and see if it affects perf. // The following boolean constant must be true on a little-endian machine // and false otherwise. static const bool kLittleEndian = true /* or some other expression */; // ------------------ Threading ------------------- // A Mutex represents an exclusive lock. class Mutex { public: Mutex(); ~Mutex(); // Lock the mutex. Waits until other lockers have exited. // Will deadlock if the mutex is already locked by this thread. void Lock(); // Unlock the mutex. // REQUIRES: This mutex was locked by this thread. void Unlock(); // Optionally crash if this thread does not hold this mutex. // The implementation must be fast, especially if NDEBUG is // defined. The implementation is allowed to skip all checks. void AssertHeld(); }; class CondVar { public: explicit CondVar(Mutex* mu); ~CondVar(); // Atomically release *mu and block on this condition variable until // either a call to SignalAll(), or a call to Signal() that picks // this thread to wakeup. // REQUIRES: this thread holds *mu void Wait(); // If there are some threads waiting, wake up at least one of them. void Signal(); // Wake up all waiting threads. void SignallAll(); }; // Thread-safe initialization. // Used as follows: // static port::OnceType init_control = LEVELDB_ONCE_INIT; // static void Initializer() { ... do something ...; } // ... // port::InitOnce(&init_control, &Initializer); typedef intptr_t OnceType; #define LEVELDB_ONCE_INIT 0 extern void InitOnce(port::OnceType*, void (*initializer)()); // ------------------ Compression ------------------- // Store the snappy compression of "input[0,input_length-1]" in *output. // Returns false if snappy is not supported by this port. extern bool Snappy_Compress(const char* input, size_t input_length, std::string* output); // If input[0,input_length-1] looks like a valid snappy compressed // buffer, store the size of the uncompressed data in *result and // return true. Else return false. extern bool Snappy_GetUncompressedLength(const char* input, size_t length, size_t* result); // Attempt to snappy uncompress input[0,input_length-1] into *output. // Returns true if successful, false if the input is invalid lightweight // compressed data. // // REQUIRES: at least the first "n" bytes of output[] must be writable // where "n" is the result of a successful call to // Snappy_GetUncompressedLength. extern bool Snappy_Uncompress(const char* input_data, size_t input_length, char* output); } // namespace port } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/port_posix.cc000066400000000000000000000161451370372246700167550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "port/port_posix.h" #include #if defined(__i386__) || defined(__x86_64__) #include #endif #include #include #include #include #include #include #include #include #include #include #include "logging/logging.h" namespace ROCKSDB_NAMESPACE { // We want to give users opportunity to default all the mutexes to adaptive if // not specified otherwise. This enables a quick way to conduct various // performance related experiements. // // NB! Support for adaptive mutexes is turned on by definining // ROCKSDB_PTHREAD_ADAPTIVE_MUTEX during the compilation. If you use RocksDB // build environment then this happens automatically; otherwise it's up to the // consumer to define the identifier. #ifdef ROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX extern const bool kDefaultToAdaptiveMutex = true; #else extern const bool kDefaultToAdaptiveMutex = false; #endif namespace port { static int PthreadCall(const char* label, int result) { if (result != 0 && result != ETIMEDOUT) { fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); abort(); } return result; } Mutex::Mutex(bool adaptive) { (void) adaptive; #ifdef ROCKSDB_PTHREAD_ADAPTIVE_MUTEX if (!adaptive) { PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr)); } else { pthread_mutexattr_t mutex_attr; PthreadCall("init mutex attr", pthread_mutexattr_init(&mutex_attr)); PthreadCall("set mutex attr", pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP)); PthreadCall("init mutex", pthread_mutex_init(&mu_, &mutex_attr)); PthreadCall("destroy mutex attr", pthread_mutexattr_destroy(&mutex_attr)); } #else PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr)); #endif // ROCKSDB_PTHREAD_ADAPTIVE_MUTEX } Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); #ifndef NDEBUG locked_ = true; #endif } void Mutex::Unlock() { #ifndef NDEBUG locked_ = false; #endif PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } void Mutex::AssertHeld() { #ifndef NDEBUG assert(locked_); #endif } CondVar::CondVar(Mutex* mu) : mu_(mu) { PthreadCall("init cv", pthread_cond_init(&cv_, nullptr)); } CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); } void CondVar::Wait() { #ifndef NDEBUG mu_->locked_ = false; #endif PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_)); #ifndef NDEBUG mu_->locked_ = true; #endif } bool CondVar::TimedWait(uint64_t abs_time_us) { struct timespec ts; ts.tv_sec = static_cast(abs_time_us / 1000000); ts.tv_nsec = static_cast((abs_time_us % 1000000) * 1000); #ifndef NDEBUG mu_->locked_ = false; #endif int err = pthread_cond_timedwait(&cv_, &mu_->mu_, &ts); #ifndef NDEBUG mu_->locked_ = true; #endif if (err == ETIMEDOUT) { return true; } if (err != 0) { PthreadCall("timedwait", err); } return false; } void CondVar::Signal() { PthreadCall("signal", pthread_cond_signal(&cv_)); } void CondVar::SignalAll() { PthreadCall("broadcast", pthread_cond_broadcast(&cv_)); } RWMutex::RWMutex() { PthreadCall("init mutex", pthread_rwlock_init(&mu_, nullptr)); } RWMutex::~RWMutex() { PthreadCall("destroy mutex", pthread_rwlock_destroy(&mu_)); } void RWMutex::ReadLock() { PthreadCall("read lock", pthread_rwlock_rdlock(&mu_)); } void RWMutex::WriteLock() { PthreadCall("write lock", pthread_rwlock_wrlock(&mu_)); } void RWMutex::ReadUnlock() { PthreadCall("read unlock", pthread_rwlock_unlock(&mu_)); } void RWMutex::WriteUnlock() { PthreadCall("write unlock", pthread_rwlock_unlock(&mu_)); } int PhysicalCoreID() { #if defined(ROCKSDB_SCHED_GETCPU_PRESENT) && defined(__x86_64__) && \ (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 22)) // sched_getcpu uses VDSO getcpu() syscall since 2.22. I believe Linux offers VDSO // support only on x86_64. This is the fastest/preferred method if available. int cpuno = sched_getcpu(); if (cpuno < 0) { return -1; } return cpuno; #elif defined(__x86_64__) || defined(__i386__) // clang/gcc both provide cpuid.h, which defines __get_cpuid(), for x86_64 and i386. unsigned eax, ebx = 0, ecx, edx; if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { return -1; } return ebx >> 24; #else // give up, the caller can generate a random number or something. return -1; #endif } void InitOnce(OnceType* once, void (*initializer)()) { PthreadCall("once", pthread_once(once, initializer)); } void Crash(const std::string& srcfile, int srcline) { fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline); fflush(stdout); kill(getpid(), SIGTERM); } int GetMaxOpenFiles() { #if defined(RLIMIT_NOFILE) struct rlimit no_files_limit; if (getrlimit(RLIMIT_NOFILE, &no_files_limit) != 0) { return -1; } // protect against overflow if (static_cast(no_files_limit.rlim_cur) >= static_cast(std::numeric_limits::max())) { return std::numeric_limits::max(); } return static_cast(no_files_limit.rlim_cur); #endif return -1; } void *cacheline_aligned_alloc(size_t size) { #if __GNUC__ < 5 && defined(__SANITIZE_ADDRESS__) return malloc(size); #elif ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || defined(__APPLE__)) void *m; errno = posix_memalign(&m, CACHE_LINE_SIZE, size); return errno ? nullptr : m; #else return malloc(size); #endif } void cacheline_aligned_free(void *memblock) { free(memblock); } static size_t GetPageSize() { #if defined(OS_LINUX) || defined(_SC_PAGESIZE) long v = sysconf(_SC_PAGESIZE); if (v >= 1024) { return static_cast(v); } #endif // Default assume 4KB return 4U * 1024U; } const size_t kPageSize = GetPageSize(); void SetCpuPriority(ThreadId id, CpuPriority priority) { #ifdef OS_LINUX sched_param param; param.sched_priority = 0; switch (priority) { case CpuPriority::kHigh: sched_setscheduler(id, SCHED_OTHER, ¶m); setpriority(PRIO_PROCESS, id, -20); break; case CpuPriority::kNormal: sched_setscheduler(id, SCHED_OTHER, ¶m); setpriority(PRIO_PROCESS, id, 0); break; case CpuPriority::kLow: sched_setscheduler(id, SCHED_OTHER, ¶m); setpriority(PRIO_PROCESS, id, 19); break; case CpuPriority::kIdle: sched_setscheduler(id, SCHED_IDLE, ¶m); break; default: assert(false); } #else (void)id; (void)priority; #endif } } // namespace port } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/port_posix.h000066400000000000000000000140241370372246700166110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // See port_example.h for documentation for the following types/functions. #pragma once #include #include "rocksdb/options.h" #include "rocksdb/rocksdb_namespace.h" // size_t printf formatting named in the manner of C99 standard formatting // strings such as PRIu64 // in fact, we could use that one #define ROCKSDB_PRIszt "zu" #define __declspec(S) #define ROCKSDB_NOEXCEPT noexcept #undef PLATFORM_IS_LITTLE_ENDIAN #if defined(OS_MACOSX) #include #if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER) #define PLATFORM_IS_LITTLE_ENDIAN \ (__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN) #endif #elif defined(OS_SOLARIS) #include #ifdef _LITTLE_ENDIAN #define PLATFORM_IS_LITTLE_ENDIAN true #else #define PLATFORM_IS_LITTLE_ENDIAN false #endif #include #elif defined(OS_AIX) #include #include #define PLATFORM_IS_LITTLE_ENDIAN (BYTE_ORDER == LITTLE_ENDIAN) #include #elif defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || \ defined(OS_DRAGONFLYBSD) || defined(OS_ANDROID) #include #include #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) #else #include #endif #include #include #include #include #include #ifndef PLATFORM_IS_LITTLE_ENDIAN #define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) #endif #if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\ defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\ defined(OS_ANDROID) || defined(CYGWIN) || defined(OS_AIX) // Use fread/fwrite/fflush on platforms without _unlocked variants #define fread_unlocked fread #define fwrite_unlocked fwrite #define fflush_unlocked fflush #endif #if defined(OS_MACOSX) || defined(OS_FREEBSD) ||\ defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) // Use fsync() on platforms without fdatasync() #define fdatasync fsync #endif #if defined(OS_ANDROID) && __ANDROID_API__ < 9 // fdatasync() was only introduced in API level 9 on Android. Use fsync() // when targeting older platforms. #define fdatasync fsync #endif namespace ROCKSDB_NAMESPACE { extern const bool kDefaultToAdaptiveMutex; namespace port { // For use at db/file_indexer.h kLevelMaxIndex const uint32_t kMaxUint32 = std::numeric_limits::max(); const int kMaxInt32 = std::numeric_limits::max(); const int kMinInt32 = std::numeric_limits::min(); const uint64_t kMaxUint64 = std::numeric_limits::max(); const int64_t kMaxInt64 = std::numeric_limits::max(); const int64_t kMinInt64 = std::numeric_limits::min(); const size_t kMaxSizet = std::numeric_limits::max(); constexpr bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN; #undef PLATFORM_IS_LITTLE_ENDIAN class CondVar; class Mutex { public: explicit Mutex(bool adaptive = kDefaultToAdaptiveMutex); // No copying Mutex(const Mutex&) = delete; void operator=(const Mutex&) = delete; ~Mutex(); void Lock(); void Unlock(); // this will assert if the mutex is not locked // it does NOT verify that mutex is held by a calling thread void AssertHeld(); private: friend class CondVar; pthread_mutex_t mu_; #ifndef NDEBUG bool locked_ = false; #endif }; class RWMutex { public: RWMutex(); // No copying allowed RWMutex(const RWMutex&) = delete; void operator=(const RWMutex&) = delete; ~RWMutex(); void ReadLock(); void WriteLock(); void ReadUnlock(); void WriteUnlock(); void AssertHeld() { } private: pthread_rwlock_t mu_; // the underlying platform mutex }; class CondVar { public: explicit CondVar(Mutex* mu); ~CondVar(); void Wait(); // Timed condition wait. Returns true if timeout occurred. bool TimedWait(uint64_t abs_time_us); void Signal(); void SignalAll(); private: pthread_cond_t cv_; Mutex* mu_; }; using Thread = std::thread; static inline void AsmVolatilePause() { #if defined(__i386__) || defined(__x86_64__) asm volatile("pause"); #elif defined(__aarch64__) asm volatile("wfe"); #elif defined(__powerpc64__) asm volatile("or 27,27,27"); #endif // it's okay for other platforms to be no-ops } // Returns -1 if not available on this platform extern int PhysicalCoreID(); typedef pthread_once_t OnceType; #define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT extern void InitOnce(OnceType* once, void (*initializer)()); #ifndef CACHE_LINE_SIZE // To test behavior with non-native cache line size, e.g. for // Bloom filters, set TEST_CACHE_LINE_SIZE to the desired test size. // This disables ALIGN_AS to keep it from failing compilation. #ifdef TEST_CACHE_LINE_SIZE #define CACHE_LINE_SIZE TEST_CACHE_LINE_SIZE #define ALIGN_AS(n) /*empty*/ #else #if defined(__s390__) #define CACHE_LINE_SIZE 256U #elif defined(__powerpc__) || defined(__aarch64__) #define CACHE_LINE_SIZE 128U #else #define CACHE_LINE_SIZE 64U #endif #define ALIGN_AS(n) alignas(n) #endif #endif static_assert((CACHE_LINE_SIZE & (CACHE_LINE_SIZE - 1)) == 0, "Cache line size must be a power of 2 number of bytes"); extern void *cacheline_aligned_alloc(size_t size); extern void cacheline_aligned_free(void *memblock); #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) extern void Crash(const std::string& srcfile, int srcline); extern int GetMaxOpenFiles(); extern const size_t kPageSize; using ThreadId = pid_t; extern void SetCpuPriority(ThreadId id, CpuPriority priority); } // namespace port } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/stack_trace.cc000066400000000000000000000104211370372246700170210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "port/stack_trace.h" #if defined(ROCKSDB_LITE) || !(defined(ROCKSDB_BACKTRACE) || defined(OS_MACOSX)) || \ defined(CYGWIN) || defined(OS_FREEBSD) || defined(OS_SOLARIS) // noop namespace ROCKSDB_NAMESPACE { namespace port { void InstallStackTraceHandler() {} void PrintStack(int /*first_frames_to_skip*/) {} void PrintAndFreeStack(void* /*callstack*/, int /*num_frames*/) {} void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) { return nullptr; } } // namespace port } // namespace ROCKSDB_NAMESPACE #else #include #include #include #include #include #include #include namespace ROCKSDB_NAMESPACE { namespace port { namespace { #if defined(OS_LINUX) || defined(OS_FREEBSD) const char* GetExecutableName() { static char name[1024]; char link[1024]; snprintf(link, sizeof(link), "/proc/%d/exe", getpid()); auto read = readlink(link, name, sizeof(name) - 1); if (-1 == read) { return nullptr; } else { name[read] = 0; return name; } } void PrintStackTraceLine(const char* symbol, void* frame) { static const char* executable = GetExecutableName(); if (symbol) { fprintf(stderr, "%s ", symbol); } if (executable) { // out source to addr2line, for the address translation const int kLineMax = 256; char cmd[kLineMax]; snprintf(cmd, kLineMax, "addr2line %p -e %s -f -C 2>&1", frame, executable); auto f = popen(cmd, "r"); if (f) { char line[kLineMax]; while (fgets(line, sizeof(line), f)) { line[strlen(line) - 1] = 0; // remove newline fprintf(stderr, "%s\t", line); } pclose(f); } } else { fprintf(stderr, " %p", frame); } fprintf(stderr, "\n"); } #elif defined(OS_MACOSX) void PrintStackTraceLine(const char* symbol, void* frame) { static int pid = getpid(); // out source to atos, for the address translation const int kLineMax = 256; char cmd[kLineMax]; snprintf(cmd, kLineMax, "xcrun atos %p -p %d 2>&1", frame, pid); auto f = popen(cmd, "r"); if (f) { char line[kLineMax]; while (fgets(line, sizeof(line), f)) { line[strlen(line) - 1] = 0; // remove newline fprintf(stderr, "%s\t", line); } pclose(f); } else if (symbol) { fprintf(stderr, "%s ", symbol); } fprintf(stderr, "\n"); } #endif } // namespace void PrintStack(void* frames[], int num_frames) { auto symbols = backtrace_symbols(frames, num_frames); for (int i = 0; i < num_frames; ++i) { fprintf(stderr, "#%-2d ", i); PrintStackTraceLine((symbols != nullptr) ? symbols[i] : nullptr, frames[i]); } free(symbols); } void PrintStack(int first_frames_to_skip) { const int kMaxFrames = 100; void* frames[kMaxFrames]; auto num_frames = backtrace(frames, kMaxFrames); PrintStack(&frames[first_frames_to_skip], num_frames - first_frames_to_skip); } void PrintAndFreeStack(void* callstack, int num_frames) { PrintStack(static_cast(callstack), num_frames); free(callstack); } void* SaveStack(int* num_frames, int first_frames_to_skip) { const int kMaxFrames = 100; void* frames[kMaxFrames]; auto count = backtrace(frames, kMaxFrames); *num_frames = count - first_frames_to_skip; void* callstack = malloc(sizeof(void*) * *num_frames); memcpy(callstack, &frames[first_frames_to_skip], sizeof(void*) * *num_frames); return callstack; } static void StackTraceHandler(int sig) { // reset to default handler signal(sig, SIG_DFL); fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig)); // skip the top three signal handler related frames PrintStack(3); // re-signal to default handler (so we still get core dump if needed...) raise(sig); } void InstallStackTraceHandler() { // just use the plain old signal as it's simple and sufficient // for this use case signal(SIGILL, StackTraceHandler); signal(SIGSEGV, StackTraceHandler); signal(SIGBUS, StackTraceHandler); signal(SIGABRT, StackTraceHandler); } } // namespace port } // namespace ROCKSDB_NAMESPACE #endif rocksdb-6.11.4/port/stack_trace.h000066400000000000000000000015731370372246700166730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { namespace port { // Install a signal handler to print callstack on the following signals: // SIGILL SIGSEGV SIGBUS SIGABRT // Currently supports linux only. No-op otherwise. void InstallStackTraceHandler(); // Prints stack, skips skip_first_frames frames void PrintStack(int first_frames_to_skip = 0); // Prints the given callstack void PrintAndFreeStack(void* callstack, int num_frames); // Save the current callstack void* SaveStack(int* num_frame, int first_frames_to_skip = 0); } // namespace port } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/sys_time.h000066400000000000000000000023201370372246700162330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // This file is a portable substitute for sys/time.h which does not exist on // Windows #pragma once #if defined(OS_WIN) && defined(_MSC_VER) #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { namespace port { // Avoid including winsock2.h for this definition typedef struct timeval { long tv_sec; long tv_usec; } timeval; void gettimeofday(struct timeval* tv, struct timezone* tz); inline struct tm* localtime_r(const time_t* timep, struct tm* result) { errno_t ret = localtime_s(result, timep); return (ret == 0) ? result : NULL; } } using port::timeval; using port::gettimeofday; using port::localtime_r; } // namespace ROCKSDB_NAMESPACE #else #include #include #endif rocksdb-6.11.4/port/util_logger.h000066400000000000000000000014671370372246700167260ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once // Include the appropriate platform specific file below. If you are // porting to a new platform, see "port_example.h" for documentation // of what the new port_.h file must provide. #if defined(ROCKSDB_PLATFORM_POSIX) #include "logging/posix_logger.h" #elif defined(OS_WIN) #include "port/win/win_logger.h" #endif rocksdb-6.11.4/port/win/000077500000000000000000000000001370372246700150265ustar00rootroot00000000000000rocksdb-6.11.4/port/win/env_default.cc000066400000000000000000000024371370372246700176370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include "port/win/env_win.h" #include "test_util/sync_point.h" #include "util/compression_context_cache.h" #include "util/thread_local.h" namespace ROCKSDB_NAMESPACE { namespace port { // We choose not to destroy the env because joining the threads from the // system loader // which destroys the statics (same as from DLLMain) creates a system loader // dead-lock. // in this manner any remaining threads are terminated OK. namespace { std::once_flag winenv_once_flag; Env* envptr; }; } Env* Env::Default() { using namespace port; ThreadLocalPtr::InitSingletons(); CompressionContextCache::InitSingleton(); INIT_SYNC_POINT_SINGLETONS(); std::call_once(winenv_once_flag, []() { envptr = new WinEnv(); }); return envptr; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/env_win.cc000066400000000000000000001330501370372246700170040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "port/win/env_win.h" #include "port/win/win_thread.h" #include #include #include #include #include // _access #include // _rmdir, _mkdir, _getcwd #include #include #include "rocksdb/env.h" #include "rocksdb/slice.h" #include "port/port.h" #include "port/port_dirent.h" #include "port/win/win_logger.h" #include "port/win/io_win.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_util.h" #include // for uuid generation #include #include #include "strsafe.h" #include namespace ROCKSDB_NAMESPACE { ThreadStatusUpdater* CreateThreadStatusUpdater() { return new ThreadStatusUpdater(); } namespace { // Sector size used when physical sector size cannot be obtained from device. static const size_t kSectorSize = 512; // RAII helpers for HANDLEs const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); }; typedef std::unique_ptr UniqueCloseHandlePtr; const auto FindCloseFunc = [](HANDLE h) { ::FindClose(h); }; typedef std::unique_ptr UniqueFindClosePtr; void WinthreadCall(const char* label, std::error_code result) { if (0 != result.value()) { fprintf(stderr, "pthread %s: %s\n", label, strerror(result.value())); abort(); } } } namespace port { WinEnvIO::WinEnvIO(Env* hosted_env) : hosted_env_(hosted_env), page_size_(4 * 1024), allocation_granularity_(page_size_), perf_counter_frequency_(0), nano_seconds_per_period_(0), GetSystemTimePreciseAsFileTime_(NULL) { SYSTEM_INFO sinfo; GetSystemInfo(&sinfo); page_size_ = sinfo.dwPageSize; allocation_granularity_ = sinfo.dwAllocationGranularity; { LARGE_INTEGER qpf; BOOL ret __attribute__((__unused__)); ret = QueryPerformanceFrequency(&qpf); assert(ret == TRUE); perf_counter_frequency_ = qpf.QuadPart; if (std::nano::den % perf_counter_frequency_ == 0) { nano_seconds_per_period_ = std::nano::den / perf_counter_frequency_; } } HMODULE module = GetModuleHandle("kernel32.dll"); if (module != NULL) { GetSystemTimePreciseAsFileTime_ = (FnGetSystemTimePreciseAsFileTime)GetProcAddress( module, "GetSystemTimePreciseAsFileTime"); } } WinEnvIO::~WinEnvIO() { } Status WinEnvIO::DeleteFile(const std::string& fname) { Status result; BOOL ret = RX_DeleteFile(RX_FN(fname).c_str()); if(!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to delete: " + fname, lastError); } return result; } Status WinEnvIO::Truncate(const std::string& fname, size_t size) { Status s; int result = ROCKSDB_NAMESPACE::port::Truncate(fname, size); if (result != 0) { s = IOError("Failed to truncate: " + fname, errno); } return s; } Status WinEnvIO::GetCurrentTime(int64_t* unix_time) { time_t time = std::time(nullptr); if (time == (time_t)(-1)) { return Status::NotSupported("Failed to get time"); } *unix_time = time; return Status::OK(); } Status WinEnvIO::NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) { Status s; result->reset(); // Corruption test needs to rename and delete files of these kind // while they are still open with another handle. For that reason we // allow share_write and delete(allows rename). HANDLE hFile = INVALID_HANDLE_VALUE; DWORD fileFlags = FILE_ATTRIBUTE_READONLY; if (options.use_direct_reads && !options.use_mmap_reads) { fileFlags |= FILE_FLAG_NO_BUFFERING; } { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, // Original fopen mode is "rb" fileFlags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Failed to open NewSequentialFile" + fname, lastError); } else { result->reset(new WinSequentialFile(fname, hFile, options)); } return s; } Status WinEnvIO::NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) { result->reset(); Status s; // Open the file for read-only random access // Random access is to disable read-ahead as the system reads too much data DWORD fileFlags = FILE_ATTRIBUTE_READONLY; if (options.use_direct_reads && !options.use_mmap_reads) { fileFlags |= FILE_FLAG_NO_BUFFERING; } else { fileFlags |= FILE_FLAG_RANDOM_ACCESS; } /// Shared access is necessary for corruption test to pass // almost all tests would work with a possible exception of fault_injection HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, fileFlags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "NewRandomAccessFile failed to Create/Open: " + fname, lastError); } UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc); // CAUTION! This will map the entire file into the process address space if (options.use_mmap_reads && sizeof(void*) >= 8) { // Use mmap when virtual address-space is plentiful. uint64_t fileSize; s = GetFileSize(fname, &fileSize); if (s.ok()) { // Will not map empty files if (fileSize == 0) { return IOError( "NewRandomAccessFile failed to map empty file: " + fname, EINVAL); } HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, // At its present length 0, NULL); // Mapping name if (!hMap) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create file mapping for NewRandomAccessFile: " + fname, lastError); } UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc); const void* mapped_region = MapViewOfFileEx(hMap, FILE_MAP_READ, 0, // High DWORD of access start 0, // Low DWORD static_cast(fileSize), NULL); // Let the OS choose the mapping if (!mapped_region) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to MapViewOfFile for NewRandomAccessFile: " + fname, lastError); } result->reset(new WinMmapReadableFile(fname, hFile, hMap, mapped_region, static_cast(fileSize))); mapGuard.release(); fileGuard.release(); } } else { result->reset(new WinRandomAccessFile(fname, hFile, std::max(GetSectorSize(fname), page_size_), options)); fileGuard.release(); } return s; } Status WinEnvIO::OpenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options, bool reopen) { const size_t c_BufferCapacity = 64 * 1024; EnvOptions local_options(options); result->reset(); Status s; DWORD fileFlags = FILE_ATTRIBUTE_NORMAL; if (local_options.use_direct_writes && !local_options.use_mmap_writes) { fileFlags = FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; } // Desired access. We are want to write only here but if we want to memory // map // the file then there is no write only mode so we have to create it // Read/Write // However, MapViewOfFile specifies only Write only DWORD desired_access = GENERIC_WRITE; DWORD shared_mode = FILE_SHARE_READ; if (local_options.use_mmap_writes) { desired_access |= GENERIC_READ; } else { // Adding this solely for tests to pass (fault_injection_test, // wal_manager_test). shared_mode |= (FILE_SHARE_WRITE | FILE_SHARE_DELETE); } // This will always truncate the file DWORD creation_disposition = CREATE_ALWAYS; if (reopen) { creation_disposition = OPEN_ALWAYS; } HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), desired_access, // Access desired shared_mode, NULL, // Security attributes // Posix env says (reopen) ? (O_CREATE | O_APPEND) : O_CREAT | O_TRUNC creation_disposition, fileFlags, // Flags NULL); // Template File } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create a NewWriteableFile: " + fname, lastError); } // We will start writing at the end, appending if (reopen) { LARGE_INTEGER zero_move; zero_move.QuadPart = 0; BOOL ret = SetFilePointerEx(hFile, zero_move, NULL, FILE_END); if (!ret) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create a ReopenWritableFile move to the end: " + fname, lastError); } } if (options.use_mmap_writes) { // We usually do not use mmmapping on SSD and thus we pass memory // page_size result->reset(new WinMmapFile(fname, hFile, page_size_, allocation_granularity_, local_options)); } else { // Here we want the buffer allocation to be aligned by the SSD page size // and to be a multiple of it result->reset(new WinWritableFile(fname, hFile, std::max(GetSectorSize(fname), GetPageSize()), c_BufferCapacity, local_options)); } return s; } Status WinEnvIO::NewRandomRWFile(const std::string & fname, std::unique_ptr* result, const EnvOptions & options) { Status s; // Open the file for read-only random access // Random access is to disable read-ahead as the system reads too much data DWORD desired_access = GENERIC_READ | GENERIC_WRITE; DWORD shared_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE; DWORD creation_disposition = OPEN_EXISTING; // Fail if file does not exist DWORD file_flags = FILE_FLAG_RANDOM_ACCESS; if (options.use_direct_reads && options.use_direct_writes) { file_flags |= FILE_FLAG_NO_BUFFERING; } /// Shared access is necessary for corruption test to pass // almost all tests would work with a possible exception of fault_injection HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile(RX_FN(fname).c_str(), desired_access, shared_mode, NULL, // Security attributes creation_disposition, file_flags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "NewRandomRWFile failed to Create/Open: " + fname, lastError); } UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc); result->reset(new WinRandomRWFile(fname, hFile, std::max(GetSectorSize(fname), GetPageSize()), options)); fileGuard.release(); return s; } Status WinEnvIO::NewMemoryMappedFileBuffer( const std::string & fname, std::unique_ptr* result) { Status s; result->reset(); DWORD fileFlags = FILE_ATTRIBUTE_READONLY; HANDLE hFile = INVALID_HANDLE_VALUE; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, // Open only if it exists fileFlags, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "Failed to open NewMemoryMappedFileBuffer: " + fname, lastError); return s; } UniqueCloseHandlePtr fileGuard(hFile, CloseHandleFunc); uint64_t fileSize = 0; s = GetFileSize(fname, &fileSize); if (!s.ok()) { return s; } // Will not map empty files if (fileSize == 0) { return Status::NotSupported( "NewMemoryMappedFileBuffer can not map zero length files: " + fname); } // size_t is 32-bit with 32-bit builds if (fileSize > std::numeric_limits::max()) { return Status::NotSupported( "The specified file size does not fit into 32-bit memory addressing: " + fname); } HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, // Whole file at its present length 0, NULL); // Mapping name if (!hMap) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to create file mapping for: " + fname, lastError); } UniqueCloseHandlePtr mapGuard(hMap, CloseHandleFunc); void* base = MapViewOfFileEx(hMap, FILE_MAP_WRITE, 0, // High DWORD of access start 0, // Low DWORD static_cast(fileSize), NULL); // Let the OS choose the mapping if (!base) { auto lastError = GetLastError(); return IOErrorFromWindowsError( "Failed to MapViewOfFile for NewMemoryMappedFileBuffer: " + fname, lastError); } result->reset(new WinMemoryMappedBuffer(hFile, hMap, base, static_cast(fileSize))); mapGuard.release(); fileGuard.release(); return s; } Status WinEnvIO::NewDirectory(const std::string& name, std::unique_ptr* result) { Status s; // Must be nullptr on failure result->reset(); if (!DirExists(name)) { s = IOErrorFromWindowsError( "open folder: " + name, ERROR_DIRECTORY); return s; } HANDLE handle = INVALID_HANDLE_VALUE; // 0 - for access means read metadata { IOSTATS_TIMER_GUARD(open_nanos); handle = RX_CreateFile( RX_FN(name).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible NULL); } if (INVALID_HANDLE_VALUE == handle) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("open folder: " + name, lastError); return s; } result->reset(new WinDirectory(handle)); return s; } Status WinEnvIO::FileExists(const std::string& fname) { Status s; // TODO: This does not follow symbolic links at this point // which is consistent with _access() impl on windows // but can be added WIN32_FILE_ATTRIBUTE_DATA attrs; if (FALSE == RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { auto lastError = GetLastError(); switch (lastError) { case ERROR_ACCESS_DENIED: case ERROR_NOT_FOUND: case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: s = Status::NotFound(); break; default: s = IOErrorFromWindowsError("Unexpected error for: " + fname, lastError); break; } } return s; } Status WinEnvIO::GetChildren(const std::string& dir, std::vector* result) { Status status; result->clear(); std::vector output; RX_WIN32_FIND_DATA data; memset(&data, 0, sizeof(data)); std::string pattern(dir); pattern.append("\\").append("*"); HANDLE handle = RX_FindFirstFileEx(RX_FN(pattern).c_str(), // Do not want alternative name FindExInfoBasic, &data, FindExSearchNameMatch, NULL, // lpSearchFilter 0); if (handle == INVALID_HANDLE_VALUE) { auto lastError = GetLastError(); switch (lastError) { case ERROR_NOT_FOUND: case ERROR_ACCESS_DENIED: case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: status = Status::NotFound(); break; default: status = IOErrorFromWindowsError( "Failed to GetChhildren for: " + dir, lastError); } return status; } UniqueFindClosePtr fc(handle, FindCloseFunc); if (result->capacity() > 0) { output.reserve(result->capacity()); } // For safety data.cFileName[MAX_PATH - 1] = 0; while (true) { auto x = RX_FILESTRING(data.cFileName, RX_FNLEN(data.cFileName)); output.emplace_back(FN_TO_RX(x)); BOOL ret =- RX_FindNextFile(handle, &data); // If the function fails the return value is zero // and non-zero otherwise. Not TRUE or FALSE. if (ret == FALSE) { // Posix does not care why we stopped break; } data.cFileName[MAX_PATH - 1] = 0; } output.swap(*result); return status; } Status WinEnvIO::CreateDir(const std::string& name) { Status result; BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL); if (!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError( "Failed to create a directory: " + name, lastError); } return result; } Status WinEnvIO::CreateDirIfMissing(const std::string& name) { Status result; if (DirExists(name)) { return result; } BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL); if (!ret) { auto lastError = GetLastError(); if (lastError != ERROR_ALREADY_EXISTS) { result = IOErrorFromWindowsError( "Failed to create a directory: " + name, lastError); } else { result = Status::IOError(name + ": exists but is not a directory"); } } return result; } Status WinEnvIO::DeleteDir(const std::string& name) { Status result; BOOL ret = RX_RemoveDirectory(RX_FN(name).c_str()); if (!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to remove dir: " + name, lastError); } return result; } Status WinEnvIO::GetFileSize(const std::string& fname, uint64_t* size) { Status s; WIN32_FILE_ATTRIBUTE_DATA attrs; if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { ULARGE_INTEGER file_size; file_size.HighPart = attrs.nFileSizeHigh; file_size.LowPart = attrs.nFileSizeLow; *size = file_size.QuadPart; } else { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Can not get size for: " + fname, lastError); } return s; } uint64_t WinEnvIO::FileTimeToUnixTime(const FILETIME& ftTime) { const uint64_t c_FileTimePerSecond = 10000000U; // UNIX epoch starts on 1970-01-01T00:00:00Z // Windows FILETIME starts on 1601-01-01T00:00:00Z // Therefore, we need to subtract the below number of seconds from // the seconds that we obtain from FILETIME with an obvious loss of // precision const uint64_t c_SecondBeforeUnixEpoch = 11644473600U; ULARGE_INTEGER li; li.HighPart = ftTime.dwHighDateTime; li.LowPart = ftTime.dwLowDateTime; uint64_t result = (li.QuadPart / c_FileTimePerSecond) - c_SecondBeforeUnixEpoch; return result; } Status WinEnvIO::GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) { Status s; WIN32_FILE_ATTRIBUTE_DATA attrs; if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { *file_mtime = FileTimeToUnixTime(attrs.ftLastWriteTime); } else { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "Can not get file modification time for: " + fname, lastError); *file_mtime = 0; } return s; } Status WinEnvIO::RenameFile(const std::string& src, const std::string& target) { Status result; // rename() is not capable of replacing the existing file as on Linux // so use OS API directly if (!RX_MoveFileEx(RX_FN(src).c_str(), RX_FN(target).c_str(), MOVEFILE_REPLACE_EXISTING)) { DWORD lastError = GetLastError(); std::string text("Failed to rename: "); text.append(src).append(" to: ").append(target); result = IOErrorFromWindowsError(text, lastError); } return result; } Status WinEnvIO::LinkFile(const std::string& src, const std::string& target) { Status result; if (!RX_CreateHardLink(RX_FN(target).c_str(), RX_FN(src).c_str(), NULL)) { DWORD lastError = GetLastError(); if (lastError == ERROR_NOT_SAME_DEVICE) { return Status::NotSupported("No cross FS links allowed"); } std::string text("Failed to link: "); text.append(src).append(" to: ").append(target); result = IOErrorFromWindowsError(text, lastError); } return result; } Status WinEnvIO::NumFileLinks(const std::string& fname, uint64_t* count) { Status s; HANDLE handle = RX_CreateFile( RX_FN(fname).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); if (INVALID_HANDLE_VALUE == handle) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("NumFileLinks: " + fname, lastError); return s; } UniqueCloseHandlePtr handle_guard(handle, CloseHandleFunc); FILE_STANDARD_INFO standard_info; if (0 != GetFileInformationByHandleEx(handle, FileStandardInfo, &standard_info, sizeof(standard_info))) { *count = standard_info.NumberOfLinks; } else { auto lastError = GetLastError(); s = IOErrorFromWindowsError("GetFileInformationByHandleEx: " + fname, lastError); } return s; } Status WinEnvIO::AreFilesSame(const std::string& first, const std::string& second, bool* res) { // For MinGW builds #if (_WIN32_WINNT == _WIN32_WINNT_VISTA) Status s = Status::NotSupported(); #else assert(res != nullptr); Status s; if (res == nullptr) { s = Status::InvalidArgument("res"); return s; } // 0 - for access means read metadata HANDLE file_1 = RX_CreateFile( RX_FN(first).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible NULL); if (INVALID_HANDLE_VALUE == file_1) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("open file: " + first, lastError); return s; } UniqueCloseHandlePtr g_1(file_1, CloseHandleFunc); HANDLE file_2 = RX_CreateFile( RX_FN(second).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible NULL); if (INVALID_HANDLE_VALUE == file_2) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("open file: " + second, lastError); return s; } UniqueCloseHandlePtr g_2(file_2, CloseHandleFunc); FILE_ID_INFO FileInfo_1; BOOL result = GetFileInformationByHandleEx(file_1, FileIdInfo, &FileInfo_1, sizeof(FileInfo_1)); if (!result) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("stat file: " + first, lastError); return s; } FILE_ID_INFO FileInfo_2; result = GetFileInformationByHandleEx(file_2, FileIdInfo, &FileInfo_2, sizeof(FileInfo_2)); if (!result) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("stat file: " + second, lastError); return s; } if (FileInfo_1.VolumeSerialNumber == FileInfo_2.VolumeSerialNumber) { *res = (0 == memcmp(FileInfo_1.FileId.Identifier, FileInfo_2.FileId.Identifier, sizeof(FileInfo_1.FileId.Identifier))); } else { *res = false; } #endif return s; } Status WinEnvIO::LockFile(const std::string& lockFname, FileLock** lock) { assert(lock != nullptr); *lock = NULL; Status result; // No-sharing, this is a LOCK file const DWORD ExclusiveAccessON = 0; // Obtain exclusive access to the LOCK file // Previously, instead of NORMAL attr we set DELETE on close and that worked // well except with fault_injection test that insists on deleting it. HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile(RX_FN(lockFname).c_str(), (GENERIC_READ | GENERIC_WRITE), ExclusiveAccessON, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); result = IOErrorFromWindowsError( "Failed to create lock file: " + lockFname, lastError); } else { *lock = new WinFileLock(hFile); } return result; } Status WinEnvIO::UnlockFile(FileLock* lock) { Status result; assert(lock != nullptr); delete lock; return result; } Status WinEnvIO::GetTestDirectory(std::string* result) { std::string output; const char* env = getenv("TEST_TMPDIR"); if (env && env[0] != '\0') { output = env; } else { env = getenv("TMP"); if (env && env[0] != '\0') { output = env; } else { output = "c:\\tmp"; } } CreateDir(output); output.append("\\testrocksdb-"); output.append(std::to_string(GetCurrentProcessId())); CreateDir(output); output.swap(*result); return Status::OK(); } Status WinEnvIO::NewLogger(const std::string& fname, std::shared_ptr* result) { Status s; result->reset(); HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); hFile = RX_CreateFile( RX_FN(fname).c_str(), GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_DELETE, // In RocksDb log files are // renamed and deleted before // they are closed. This enables // doing so. NULL, CREATE_ALWAYS, // Original fopen mode is "w" FILE_ATTRIBUTE_NORMAL, NULL); } if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Failed to open LogFile" + fname, lastError); } else { { // With log files we want to set the true creation time as of now // because the system // for some reason caches the attributes of the previous file that just // been renamed from // this name so auto_roll_logger_test fails FILETIME ft; GetSystemTimeAsFileTime(&ft); // Set creation, last access and last write time to the same value SetFileTime(hFile, &ft, &ft, &ft); } result->reset(new WinLogger(&WinEnvThreads::gettid, hosted_env_, hFile)); } return s; } Status WinEnvIO::IsDirectory(const std::string& path, bool* is_dir) { BOOL ret = RX_PathIsDirectory(RX_FN(path).c_str()); if (is_dir) { *is_dir = ret ? true : false; } return Status::OK(); } uint64_t WinEnvIO::NowMicros() { if (GetSystemTimePreciseAsFileTime_ != NULL) { // all std::chrono clocks on windows proved to return // values that may repeat that is not good enough for some uses. const int64_t c_UnixEpochStartTicks = 116444736000000000LL; const int64_t c_FtToMicroSec = 10; // This interface needs to return system time and not // just any microseconds because it is often used as an argument // to TimedWait() on condition variable FILETIME ftSystemTime; GetSystemTimePreciseAsFileTime_(&ftSystemTime); LARGE_INTEGER li; li.LowPart = ftSystemTime.dwLowDateTime; li.HighPart = ftSystemTime.dwHighDateTime; // Subtract unix epoch start li.QuadPart -= c_UnixEpochStartTicks; // Convert to microsecs li.QuadPart /= c_FtToMicroSec; return li.QuadPart; } using namespace std::chrono; return duration_cast(system_clock::now().time_since_epoch()) .count(); } uint64_t WinEnvIO::NowNanos() { if (nano_seconds_per_period_ != 0) { // all std::chrono clocks on windows have the same resolution that is only // good enough for microseconds but not nanoseconds // On Windows 8 and Windows 2012 Server // GetSystemTimePreciseAsFileTime(¤t_time) can be used LARGE_INTEGER li; QueryPerformanceCounter(&li); // Convert performance counter to nanoseconds by precomputed ratio. // Directly multiply nano::den with li.QuadPart causes overflow. // Only do this when nano::den is divisible by perf_counter_frequency_, // which most likely is the case in reality. If it's not, fall back to // high_resolution_clock, which may be less precise under old compilers. li.QuadPart *= nano_seconds_per_period_; return li.QuadPart; } using namespace std::chrono; return duration_cast( high_resolution_clock::now().time_since_epoch()).count(); } Status WinEnvIO::GetHostName(char* name, uint64_t len) { Status s; DWORD nSize = static_cast( std::min(len, std::numeric_limits::max())); if (!::GetComputerNameA(name, &nSize)) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("GetHostName", lastError); } else { name[nSize] = 0; } return s; } Status WinEnvIO::GetAbsolutePath(const std::string& db_path, std::string* output_path) { // Check if we already have an absolute path // For test compatibility we will consider starting slash as an // absolute path if ((!db_path.empty() && (db_path[0] == '\\' || db_path[0] == '/')) || !RX_PathIsRelative(RX_FN(db_path).c_str())) { *output_path = db_path; return Status::OK(); } RX_FILESTRING result; result.resize(MAX_PATH); // Hopefully no changes the current directory while we do this // however _getcwd also suffers from the same limitation DWORD len = RX_GetCurrentDirectory(MAX_PATH, &result[0]); if (len == 0) { auto lastError = GetLastError(); return IOErrorFromWindowsError("Failed to get current working directory", lastError); } result.resize(len); std::string res = FN_TO_RX(result); res.swap(*output_path); return Status::OK(); } std::string WinEnvIO::TimeToString(uint64_t secondsSince1970) { std::string result; const time_t seconds = secondsSince1970; const int maxsize = 64; struct tm t; errno_t ret = localtime_s(&t, &seconds); if (ret) { result = std::to_string(seconds); } else { result.resize(maxsize); char* p = &result[0]; int len = snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec); assert(len > 0); result.resize(len); } return result; } Status WinEnvIO::GetFreeSpace(const std::string& path, uint64_t* diskfree) { assert(diskfree != nullptr); ULARGE_INTEGER freeBytes; BOOL f = RX_GetDiskFreeSpaceEx(RX_FN(path).c_str(), &freeBytes, NULL, NULL); if (f) { *diskfree = freeBytes.QuadPart; return Status::OK(); } else { DWORD lastError = GetLastError(); return IOErrorFromWindowsError("Failed to get free space: " + path, lastError); } } EnvOptions WinEnvIO::OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const { EnvOptions optimized(env_options); // These two the same as default optimizations optimized.bytes_per_sync = db_options.wal_bytes_per_sync; optimized.writable_file_max_buffer_size = db_options.writable_file_max_buffer_size; // This adversely affects %999 on windows optimized.use_mmap_writes = false; // Direct writes will produce a huge perf impact on // Windows. Pre-allocate space for WAL. optimized.use_direct_writes = false; return optimized; } EnvOptions WinEnvIO::OptimizeForManifestWrite( const EnvOptions& env_options) const { EnvOptions optimized(env_options); optimized.use_mmap_writes = false; optimized.use_direct_reads = false; return optimized; } EnvOptions WinEnvIO::OptimizeForManifestRead( const EnvOptions& env_options) const { EnvOptions optimized(env_options); optimized.use_mmap_writes = false; optimized.use_direct_reads = false; return optimized; } // Returns true iff the named directory exists and is a directory. bool WinEnvIO::DirExists(const std::string& dname) { WIN32_FILE_ATTRIBUTE_DATA attrs; if (RX_GetFileAttributesEx(RX_FN(dname).c_str(), GetFileExInfoStandard, &attrs)) { return 0 != (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); } return false; } size_t WinEnvIO::GetSectorSize(const std::string& fname) { size_t sector_size = kSectorSize; if (RX_PathIsRelative(RX_FN(fname).c_str())) { return sector_size; } // obtain device handle char devicename[7] = "\\\\.\\"; int erresult = strncat_s(devicename, sizeof(devicename), fname.c_str(), 2); if (erresult) { assert(false); return sector_size; } HANDLE hDevice = CreateFile(devicename, 0, 0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); if (hDevice == INVALID_HANDLE_VALUE) { return sector_size; } STORAGE_PROPERTY_QUERY spropertyquery; spropertyquery.PropertyId = StorageAccessAlignmentProperty; spropertyquery.QueryType = PropertyStandardQuery; BYTE output_buffer[sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR)]; DWORD output_bytes = 0; BOOL ret = DeviceIoControl(hDevice, IOCTL_STORAGE_QUERY_PROPERTY, &spropertyquery, sizeof(spropertyquery), output_buffer, sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR), &output_bytes, nullptr); if (ret) { sector_size = ((STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR *)output_buffer)->BytesPerLogicalSector; } else { // many devices do not support StorageProcessAlignmentProperty. Any failure here and we // fall back to logical alignment DISK_GEOMETRY_EX geometry = { 0 }; ret = DeviceIoControl(hDevice, IOCTL_DISK_GET_DRIVE_GEOMETRY, nullptr, 0, &geometry, sizeof(geometry), &output_bytes, nullptr); if (ret) { sector_size = geometry.Geometry.BytesPerSector; } } if (hDevice != INVALID_HANDLE_VALUE) { CloseHandle(hDevice); } return sector_size; } //////////////////////////////////////////////////////////////////////// // WinEnvThreads WinEnvThreads::WinEnvThreads(Env* hosted_env) : hosted_env_(hosted_env), thread_pools_(Env::Priority::TOTAL) { for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) { thread_pools_[pool_id].SetThreadPriority( static_cast(pool_id)); // This allows later initializing the thread-local-env of each thread. thread_pools_[pool_id].SetHostEnv(hosted_env); } } WinEnvThreads::~WinEnvThreads() { WaitForJoin(); for (auto& thpool : thread_pools_) { thpool.JoinAllThreads(); } } void WinEnvThreads::Schedule(void(*function)(void*), void* arg, Env::Priority pri, void* tag, void(*unschedFunction)(void* arg)) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); thread_pools_[pri].Schedule(function, arg, tag, unschedFunction); } int WinEnvThreads::UnSchedule(void* arg, Env::Priority pri) { return thread_pools_[pri].UnSchedule(arg); } namespace { struct StartThreadState { void(*user_function)(void*); void* arg; }; void* StartThreadWrapper(void* arg) { std::unique_ptr state( reinterpret_cast(arg)); state->user_function(state->arg); return nullptr; } } void WinEnvThreads::StartThread(void(*function)(void* arg), void* arg) { std::unique_ptr state(new StartThreadState); state->user_function = function; state->arg = arg; try { ROCKSDB_NAMESPACE::port::WindowsThread th(&StartThreadWrapper, state.get()); state.release(); std::lock_guard lg(mu_); threads_to_join_.push_back(std::move(th)); } catch (const std::system_error& ex) { WinthreadCall("start thread", ex.code()); } } void WinEnvThreads::WaitForJoin() { for (auto& th : threads_to_join_) { th.join(); } threads_to_join_.clear(); } unsigned int WinEnvThreads::GetThreadPoolQueueLen(Env::Priority pri) const { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); return thread_pools_[pri].GetQueueLen(); } uint64_t WinEnvThreads::gettid() { uint64_t thread_id = GetCurrentThreadId(); return thread_id; } uint64_t WinEnvThreads::GetThreadID() const { return gettid(); } void WinEnvThreads::SleepForMicroseconds(int micros) { std::this_thread::sleep_for(std::chrono::microseconds(micros)); } void WinEnvThreads::SetBackgroundThreads(int num, Env::Priority pri) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); thread_pools_[pri].SetBackgroundThreads(num); } int WinEnvThreads::GetBackgroundThreads(Env::Priority pri) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); return thread_pools_[pri].GetBackgroundThreads(); } void WinEnvThreads::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) { assert(pri >= Env::Priority::BOTTOM && pri <= Env::Priority::HIGH); thread_pools_[pri].IncBackgroundThreadsIfNeeded(num); } ///////////////////////////////////////////////////////////////////////// // WinEnv WinEnv::WinEnv() : winenv_io_(this), winenv_threads_(this) { // Protected member of the base class thread_status_updater_ = CreateThreadStatusUpdater(); } WinEnv::~WinEnv() { // All threads must be joined before the deletion of // thread_status_updater_. delete thread_status_updater_; } Status WinEnv::GetThreadList(std::vector* thread_list) { assert(thread_status_updater_); return thread_status_updater_->GetThreadList(thread_list); } Status WinEnv::DeleteFile(const std::string& fname) { return winenv_io_.DeleteFile(fname); } Status WinEnv::Truncate(const std::string& fname, size_t size) { return winenv_io_.Truncate(fname, size); } Status WinEnv::GetCurrentTime(int64_t* unix_time) { return winenv_io_.GetCurrentTime(unix_time); } Status WinEnv::NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) { return winenv_io_.NewSequentialFile(fname, result, options); } Status WinEnv::NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) { return winenv_io_.NewRandomAccessFile(fname, result, options); } Status WinEnv::NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) { return winenv_io_.OpenWritableFile(fname, result, options, false); } Status WinEnv::ReopenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) { return winenv_io_.OpenWritableFile(fname, result, options, true); } Status WinEnv::NewRandomRWFile(const std::string & fname, std::unique_ptr* result, const EnvOptions & options) { return winenv_io_.NewRandomRWFile(fname, result, options); } Status WinEnv::NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) { return winenv_io_.NewMemoryMappedFileBuffer(fname, result); } Status WinEnv::NewDirectory(const std::string& name, std::unique_ptr* result) { return winenv_io_.NewDirectory(name, result); } Status WinEnv::FileExists(const std::string& fname) { return winenv_io_.FileExists(fname); } Status WinEnv::GetChildren(const std::string& dir, std::vector* result) { return winenv_io_.GetChildren(dir, result); } Status WinEnv::CreateDir(const std::string& name) { return winenv_io_.CreateDir(name); } Status WinEnv::CreateDirIfMissing(const std::string& name) { return winenv_io_.CreateDirIfMissing(name); } Status WinEnv::DeleteDir(const std::string& name) { return winenv_io_.DeleteDir(name); } Status WinEnv::GetFileSize(const std::string& fname, uint64_t* size) { return winenv_io_.GetFileSize(fname, size); } Status WinEnv::GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) { return winenv_io_.GetFileModificationTime(fname, file_mtime); } Status WinEnv::RenameFile(const std::string& src, const std::string& target) { return winenv_io_.RenameFile(src, target); } Status WinEnv::LinkFile(const std::string& src, const std::string& target) { return winenv_io_.LinkFile(src, target); } Status WinEnv::NumFileLinks(const std::string& fname, uint64_t* count) { return winenv_io_.NumFileLinks(fname, count); } Status WinEnv::AreFilesSame(const std::string& first, const std::string& second, bool* res) { return winenv_io_.AreFilesSame(first, second, res); } Status WinEnv::LockFile(const std::string& lockFname, FileLock** lock) { return winenv_io_.LockFile(lockFname, lock); } Status WinEnv::UnlockFile(FileLock* lock) { return winenv_io_.UnlockFile(lock); } Status WinEnv::GetTestDirectory(std::string* result) { return winenv_io_.GetTestDirectory(result); } Status WinEnv::NewLogger(const std::string& fname, std::shared_ptr* result) { return winenv_io_.NewLogger(fname, result); } Status WinEnv::IsDirectory(const std::string& path, bool* is_dir) { return winenv_io_.IsDirectory(path, is_dir); } uint64_t WinEnv::NowMicros() { return winenv_io_.NowMicros(); } uint64_t WinEnv::NowNanos() { return winenv_io_.NowNanos(); } Status WinEnv::GetHostName(char* name, uint64_t len) { return winenv_io_.GetHostName(name, len); } Status WinEnv::GetAbsolutePath(const std::string& db_path, std::string* output_path) { return winenv_io_.GetAbsolutePath(db_path, output_path); } std::string WinEnv::TimeToString(uint64_t secondsSince1970) { return winenv_io_.TimeToString(secondsSince1970); } void WinEnv::Schedule(void(*function)(void*), void* arg, Env::Priority pri, void* tag, void(*unschedFunction)(void* arg)) { return winenv_threads_.Schedule(function, arg, pri, tag, unschedFunction); } int WinEnv::UnSchedule(void* arg, Env::Priority pri) { return winenv_threads_.UnSchedule(arg, pri); } void WinEnv::StartThread(void(*function)(void* arg), void* arg) { return winenv_threads_.StartThread(function, arg); } void WinEnv::WaitForJoin() { return winenv_threads_.WaitForJoin(); } unsigned int WinEnv::GetThreadPoolQueueLen(Env::Priority pri) const { return winenv_threads_.GetThreadPoolQueueLen(pri); } uint64_t WinEnv::GetThreadID() const { return winenv_threads_.GetThreadID(); } Status WinEnv::GetFreeSpace(const std::string& path, uint64_t* diskfree) { return winenv_io_.GetFreeSpace(path, diskfree); } void WinEnv::SleepForMicroseconds(int micros) { return winenv_threads_.SleepForMicroseconds(micros); } // Allow increasing the number of worker threads. void WinEnv::SetBackgroundThreads(int num, Env::Priority pri) { return winenv_threads_.SetBackgroundThreads(num, pri); } int WinEnv::GetBackgroundThreads(Env::Priority pri) { return winenv_threads_.GetBackgroundThreads(pri); } void WinEnv::IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) { return winenv_threads_.IncBackgroundThreadsIfNeeded(num, pri); } EnvOptions WinEnv::OptimizeForManifestRead( const EnvOptions& env_options) const { return winenv_io_.OptimizeForManifestRead(env_options); } EnvOptions WinEnv::OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const { return winenv_io_.OptimizeForLogWrite(env_options, db_options); } EnvOptions WinEnv::OptimizeForManifestWrite( const EnvOptions& env_options) const { return winenv_io_.OptimizeForManifestWrite(env_options); } } // namespace port std::string Env::GenerateUniqueId() { std::string result; UUID uuid; UuidCreateSequential(&uuid); RPC_CSTR rpc_str; auto status = UuidToStringA(&uuid, &rpc_str); (void)status; assert(status == RPC_S_OK); result = reinterpret_cast(rpc_str); status = RpcStringFreeA(&rpc_str); assert(status == RPC_S_OK); return result; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/env_win.h000066400000000000000000000266221370372246700166540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // An Env is an interface used by the rocksdb implementation to access // operating system functionality like the filesystem etc. Callers // may wish to provide a custom Env object when opening a database to // get fine gain control; e.g., to rate limit file system operations. // // All Env implementations are safe for concurrent access from // multiple threads without any external synchronization. #pragma once #include "port/win/win_thread.h" #include #include "util/threadpool_imp.h" #include #include #include #include #include #undef GetCurrentTime #undef DeleteFile #undef GetTickCount namespace ROCKSDB_NAMESPACE { namespace port { // Currently not designed for inheritance but rather a replacement class WinEnvThreads { public: explicit WinEnvThreads(Env* hosted_env); ~WinEnvThreads(); WinEnvThreads(const WinEnvThreads&) = delete; WinEnvThreads& operator=(const WinEnvThreads&) = delete; void Schedule(void(*function)(void*), void* arg, Env::Priority pri, void* tag, void(*unschedFunction)(void* arg)); int UnSchedule(void* arg, Env::Priority pri); void StartThread(void(*function)(void* arg), void* arg); void WaitForJoin(); unsigned int GetThreadPoolQueueLen(Env::Priority pri) const; static uint64_t gettid(); uint64_t GetThreadID() const; void SleepForMicroseconds(int micros); // Allow increasing the number of worker threads. void SetBackgroundThreads(int num, Env::Priority pri); int GetBackgroundThreads(Env::Priority pri); void IncBackgroundThreadsIfNeeded(int num, Env::Priority pri); private: Env* hosted_env_; mutable std::mutex mu_; std::vector thread_pools_; std::vector threads_to_join_; }; // Designed for inheritance so can be re-used // but certain parts replaced class WinEnvIO { public: explicit WinEnvIO(Env* hosted_env); virtual ~WinEnvIO(); virtual Status DeleteFile(const std::string& fname); Status Truncate(const std::string& fname, size_t size); virtual Status GetCurrentTime(int64_t* unix_time); virtual Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options); // Helper for NewWritable and ReopenWritableFile virtual Status OpenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options, bool reopen); virtual Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options); // The returned file will only be accessed by one thread at a time. virtual Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options); virtual Status NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result); virtual Status NewDirectory(const std::string& name, std::unique_ptr* result); virtual Status FileExists(const std::string& fname); virtual Status GetChildren(const std::string& dir, std::vector* result); virtual Status CreateDir(const std::string& name); virtual Status CreateDirIfMissing(const std::string& name); virtual Status DeleteDir(const std::string& name); virtual Status GetFileSize(const std::string& fname, uint64_t* size); static uint64_t FileTimeToUnixTime(const FILETIME& ftTime); virtual Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime); virtual Status RenameFile(const std::string& src, const std::string& target); virtual Status LinkFile(const std::string& src, const std::string& target); virtual Status NumFileLinks(const std::string& /*fname*/, uint64_t* /*count*/); virtual Status AreFilesSame(const std::string& first, const std::string& second, bool* res); virtual Status LockFile(const std::string& lockFname, FileLock** lock); virtual Status UnlockFile(FileLock* lock); virtual Status GetTestDirectory(std::string* result); virtual Status NewLogger(const std::string& fname, std::shared_ptr* result); virtual Status IsDirectory(const std::string& path, bool* is_dir); virtual uint64_t NowMicros(); virtual uint64_t NowNanos(); virtual Status GetHostName(char* name, uint64_t len); virtual Status GetAbsolutePath(const std::string& db_path, std::string* output_path); // This seems to clash with a macro on Windows, so #undef it here #undef GetFreeSpace // Get the amount of free disk space virtual Status GetFreeSpace(const std::string& path, uint64_t* diskfree); virtual std::string TimeToString(uint64_t secondsSince1970); virtual EnvOptions OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const; virtual EnvOptions OptimizeForManifestWrite( const EnvOptions& env_options) const; virtual EnvOptions OptimizeForManifestRead( const EnvOptions& env_options) const; size_t GetPageSize() const { return page_size_; } size_t GetAllocationGranularity() const { return allocation_granularity_; } uint64_t GetPerfCounterFrequency() const { return perf_counter_frequency_; } static size_t GetSectorSize(const std::string& fname); private: // Returns true iff the named directory exists and is a directory. virtual bool DirExists(const std::string& dname); typedef VOID(WINAPI * FnGetSystemTimePreciseAsFileTime)(LPFILETIME); Env* hosted_env_; size_t page_size_; size_t allocation_granularity_; uint64_t perf_counter_frequency_; uint64_t nano_seconds_per_period_; FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_; }; class WinEnv : public Env { public: WinEnv(); ~WinEnv(); Status DeleteFile(const std::string& fname) override; Status Truncate(const std::string& fname, size_t size) override; Status GetCurrentTime(int64_t* unix_time) override; Status NewSequentialFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a // new file. On success, stores a pointer to the new file in // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. // // The returned file will only be accessed by one thread at a time. Status ReopenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; // The returned file will only be accessed by one thread at a time. Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& options) override; Status NewMemoryMappedFileBuffer( const std::string& fname, std::unique_ptr* result) override; Status NewDirectory(const std::string& name, std::unique_ptr* result) override; Status FileExists(const std::string& fname) override; Status GetChildren(const std::string& dir, std::vector* result) override; Status CreateDir(const std::string& name) override; Status CreateDirIfMissing(const std::string& name) override; Status DeleteDir(const std::string& name) override; Status GetFileSize(const std::string& fname, uint64_t* size) override; Status GetFileModificationTime(const std::string& fname, uint64_t* file_mtime) override; Status RenameFile(const std::string& src, const std::string& target) override; Status LinkFile(const std::string& src, const std::string& target) override; Status NumFileLinks(const std::string& fname, uint64_t* count) override; Status AreFilesSame(const std::string& first, const std::string& second, bool* res) override; Status LockFile(const std::string& lockFname, FileLock** lock) override; Status UnlockFile(FileLock* lock) override; Status GetTestDirectory(std::string* result) override; Status NewLogger(const std::string& fname, std::shared_ptr* result) override; Status IsDirectory(const std::string& path, bool* is_dir) override; uint64_t NowMicros() override; uint64_t NowNanos() override; Status GetHostName(char* name, uint64_t len) override; Status GetAbsolutePath(const std::string& db_path, std::string* output_path) override; std::string TimeToString(uint64_t secondsSince1970) override; Status GetThreadList(std::vector* thread_list) override; void Schedule(void(*function)(void*), void* arg, Env::Priority pri, void* tag, void(*unschedFunction)(void* arg)) override; int UnSchedule(void* arg, Env::Priority pri) override; void StartThread(void(*function)(void* arg), void* arg) override; void WaitForJoin(); unsigned int GetThreadPoolQueueLen(Env::Priority pri) const override; uint64_t GetThreadID() const override; // This seems to clash with a macro on Windows, so #undef it here #undef GetFreeSpace // Get the amount of free disk space Status GetFreeSpace(const std::string& path, uint64_t* diskfree) override; void SleepForMicroseconds(int micros) override; // Allow increasing the number of worker threads. void SetBackgroundThreads(int num, Env::Priority pri) override; int GetBackgroundThreads(Env::Priority pri) override; void IncBackgroundThreadsIfNeeded(int num, Env::Priority pri) override; EnvOptions OptimizeForManifestRead( const EnvOptions& env_options) const override; EnvOptions OptimizeForLogWrite(const EnvOptions& env_options, const DBOptions& db_options) const override; EnvOptions OptimizeForManifestWrite( const EnvOptions& env_options) const override; private: WinEnvIO winenv_io_; WinEnvThreads winenv_threads_; }; } // namespace port } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/io_win.cc000066400000000000000000000735151370372246700166340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "port/win/io_win.h" #include "monitoring/iostats_context_imp.h" #include "test_util/sync_point.h" #include "util/aligned_buffer.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { namespace port { /* * DirectIOHelper */ namespace { const size_t kSectorSize = 512; inline bool IsPowerOfTwo(const size_t alignment) { return ((alignment) & (alignment - 1)) == 0; } inline bool IsSectorAligned(const size_t off) { return (off & (kSectorSize - 1)) == 0; } inline bool IsAligned(size_t alignment, const void* ptr) { return ((uintptr_t(ptr)) & (alignment - 1)) == 0; } } std::string GetWindowsErrSz(DWORD err) { LPSTR lpMsgBuf; FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, err, 0, // Default language reinterpret_cast(&lpMsgBuf), 0, NULL); std::string Err = lpMsgBuf; LocalFree(lpMsgBuf); return Err; } // We preserve the original name of this interface to denote the original idea // behind it. // All reads happen by a specified offset and pwrite interface does not change // the position of the file pointer. Judging from the man page and errno it does // execute // lseek atomically to return the position of the file back where it was. // WriteFile() does not // have this capability. Therefore, for both pread and pwrite the pointer is // advanced to the next position // which is fine for writes because they are (should be) sequential. // Because all the reads/writes happen by the specified offset, the caller in // theory should not // rely on the current file offset. Status pwrite(const WinFileData* file_data, const Slice& data, uint64_t offset, size_t& bytes_written) { Status s; bytes_written = 0; size_t num_bytes = data.size(); if (num_bytes > std::numeric_limits::max()) { // May happen in 64-bit builds where size_t is 64-bits but // long is still 32-bit, but that's the API here at the moment return Status::InvalidArgument("num_bytes is too large for a single write: " + file_data->GetName()); } OVERLAPPED overlapped = { 0 }; ULARGE_INTEGER offsetUnion; offsetUnion.QuadPart = offset; overlapped.Offset = offsetUnion.LowPart; overlapped.OffsetHigh = offsetUnion.HighPart; DWORD bytesWritten = 0; if (FALSE == WriteFile(file_data->GetFileHandle(), data.data(), static_cast(num_bytes), &bytesWritten, &overlapped)) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("WriteFile failed: " + file_data->GetName(), lastError); } else { bytes_written = bytesWritten; } return s; } // See comments for pwrite above Status pread(const WinFileData* file_data, char* src, size_t num_bytes, uint64_t offset, size_t& bytes_read) { Status s; bytes_read = 0; if (num_bytes > std::numeric_limits::max()) { return Status::InvalidArgument("num_bytes is too large for a single read: " + file_data->GetName()); } OVERLAPPED overlapped = { 0 }; ULARGE_INTEGER offsetUnion; offsetUnion.QuadPart = offset; overlapped.Offset = offsetUnion.LowPart; overlapped.OffsetHigh = offsetUnion.HighPart; DWORD bytesRead = 0; if (FALSE == ReadFile(file_data->GetFileHandle(), src, static_cast(num_bytes), &bytesRead, &overlapped)) { auto lastError = GetLastError(); // EOF is OK with zero bytes read if (lastError != ERROR_HANDLE_EOF) { s = IOErrorFromWindowsError("ReadFile failed: " + file_data->GetName(), lastError); } } else { bytes_read = bytesRead; } return s; } // SetFileInformationByHandle() is capable of fast pre-allocates. // However, this does not change the file end position unless the file is // truncated and the pre-allocated space is not considered filled with zeros. Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size) { Status status; FILE_ALLOCATION_INFO alloc_info; alloc_info.AllocationSize.QuadPart = to_size; if (!SetFileInformationByHandle(hFile, FileAllocationInfo, &alloc_info, sizeof(FILE_ALLOCATION_INFO))) { auto lastError = GetLastError(); status = IOErrorFromWindowsError( "Failed to pre-allocate space: " + filename, lastError); } return status; } Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize) { Status status; FILE_END_OF_FILE_INFO end_of_file; end_of_file.EndOfFile.QuadPart = toSize; if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file, sizeof(FILE_END_OF_FILE_INFO))) { auto lastError = GetLastError(); status = IOErrorFromWindowsError("Failed to Set end of file: " + filename, lastError); } return status; } size_t GetUniqueIdFromFile(HANDLE /*hFile*/, char* /*id*/, size_t /*max_size*/) { // Returning 0 is safe as it causes the table reader to generate a unique ID. // This is suboptimal for performance as it prevents multiple table readers // for the same file from sharing cached blocks. For example, if users have // a low value for `max_open_files`, there can be many table readers opened // for the same file. // // TODO: this is a temporarily solution as it is safe but not optimal for // performance. For more details see discussion in // https://github.com/facebook/rocksdb/pull/5844. return 0; } //////////////////////////////////////////////////////////////////////////////////////////////////// // WinMmapReadableFile WinMmapReadableFile::WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap, const void* mapped_region, size_t length) : WinFileData(fileName, hFile, false /* use_direct_io */), hMap_(hMap), mapped_region_(mapped_region), length_(length) {} WinMmapReadableFile::~WinMmapReadableFile() { BOOL ret __attribute__((__unused__)); ret = ::UnmapViewOfFile(mapped_region_); assert(ret); ret = ::CloseHandle(hMap_); assert(ret); } Status WinMmapReadableFile::Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { Status s; if (offset > length_) { *result = Slice(); return IOError(filename_, EINVAL); } else if (offset + n > length_) { n = length_ - static_cast(offset); } *result = Slice(reinterpret_cast(mapped_region_)+offset, n); return s; } Status WinMmapReadableFile::InvalidateCache(size_t offset, size_t length) { return Status::OK(); } size_t WinMmapReadableFile::GetUniqueId(char* id, size_t max_size) const { return GetUniqueIdFromFile(hFile_, id, max_size); } /////////////////////////////////////////////////////////////////////////////// /// WinMmapFile // Can only truncate or reserve to a sector size aligned if // used on files that are opened with Unbuffered I/O Status WinMmapFile::TruncateFile(uint64_t toSize) { return ftruncate(filename_, hFile_, toSize); } Status WinMmapFile::UnmapCurrentRegion() { Status status; if (mapped_begin_ != nullptr) { if (!::UnmapViewOfFile(mapped_begin_)) { status = IOErrorFromWindowsError( "Failed to unmap file view: " + filename_, GetLastError()); } // Move on to the next portion of the file file_offset_ += view_size_; // UnmapView automatically sends data to disk but not the metadata // which is good and provides some equivalent of fdatasync() on Linux // therefore, we donot need separate flag for metadata mapped_begin_ = nullptr; mapped_end_ = nullptr; dst_ = nullptr; last_sync_ = nullptr; pending_sync_ = false; } return status; } Status WinMmapFile::MapNewRegion() { Status status; assert(mapped_begin_ == nullptr); size_t minDiskSize = static_cast(file_offset_) + view_size_; if (minDiskSize > reserved_size_) { status = Allocate(file_offset_, view_size_); if (!status.ok()) { return status; } } // Need to remap if (hMap_ == NULL || reserved_size_ > mapping_size_) { if (hMap_ != NULL) { // Unmap the previous one BOOL ret __attribute__((__unused__)); ret = ::CloseHandle(hMap_); assert(ret); hMap_ = NULL; } ULARGE_INTEGER mappingSize; mappingSize.QuadPart = reserved_size_; hMap_ = CreateFileMappingA( hFile_, NULL, // Security attributes PAGE_READWRITE, // There is not a write only mode for mapping mappingSize.HighPart, // Enable mapping the whole file but the actual // amount mapped is determined by MapViewOfFile mappingSize.LowPart, NULL); // Mapping name if (NULL == hMap_) { return IOErrorFromWindowsError( "WindowsMmapFile failed to create file mapping for: " + filename_, GetLastError()); } mapping_size_ = reserved_size_; } ULARGE_INTEGER offset; offset.QuadPart = file_offset_; // View must begin at the granularity aligned offset mapped_begin_ = reinterpret_cast( MapViewOfFileEx(hMap_, FILE_MAP_WRITE, offset.HighPart, offset.LowPart, view_size_, NULL)); if (!mapped_begin_) { status = IOErrorFromWindowsError( "WindowsMmapFile failed to map file view: " + filename_, GetLastError()); } else { mapped_end_ = mapped_begin_ + view_size_; dst_ = mapped_begin_; last_sync_ = mapped_begin_; pending_sync_ = false; } return status; } Status WinMmapFile::PreallocateInternal(uint64_t spaceToReserve) { return fallocate(filename_, hFile_, spaceToReserve); } WinMmapFile::WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size, size_t allocation_granularity, const EnvOptions& options) : WinFileData(fname, hFile, false), WritableFile(options), hMap_(NULL), page_size_(page_size), allocation_granularity_(allocation_granularity), reserved_size_(0), mapping_size_(0), view_size_(0), mapped_begin_(nullptr), mapped_end_(nullptr), dst_(nullptr), last_sync_(nullptr), file_offset_(0), pending_sync_(false) { // Allocation granularity must be obtained from GetSystemInfo() and must be // a power of two. assert(allocation_granularity > 0); assert((allocation_granularity & (allocation_granularity - 1)) == 0); assert(page_size > 0); assert((page_size & (page_size - 1)) == 0); // Only for memory mapped writes assert(options.use_mmap_writes); // View size must be both the multiple of allocation_granularity AND the // page size and the granularity is usually a multiple of a page size. const size_t viewSize = 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode view_size_ = Roundup(viewSize, allocation_granularity_); } WinMmapFile::~WinMmapFile() { if (hFile_) { this->Close(); } } Status WinMmapFile::Append(const Slice& data) { const char* src = data.data(); size_t left = data.size(); while (left > 0) { assert(mapped_begin_ <= dst_); size_t avail = mapped_end_ - dst_; if (avail == 0) { Status s = UnmapCurrentRegion(); if (s.ok()) { s = MapNewRegion(); } if (!s.ok()) { return s; } } else { size_t n = std::min(left, avail); memcpy(dst_, src, n); dst_ += n; src += n; left -= n; pending_sync_ = true; } } // Now make sure that the last partial page is padded with zeros if needed size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_); if (bytesToPad > 0) { memset(dst_, 0, bytesToPad); } return Status::OK(); } // Means Close() will properly take care of truncate // and it does not need any additional information Status WinMmapFile::Truncate(uint64_t size) { return Status::OK(); } Status WinMmapFile::Close() { Status s; assert(NULL != hFile_); // We truncate to the precise size so no // uninitialized data at the end. SetEndOfFile // which we use does not write zeros and it is good. uint64_t targetSize = GetFileSize(); if (mapped_begin_ != nullptr) { // Sync before unmapping to make sure everything // is on disk and there is not a lazy writing // so we are deterministic with the tests Sync(); s = UnmapCurrentRegion(); } if (NULL != hMap_) { BOOL ret = ::CloseHandle(hMap_); if (!ret && s.ok()) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "Failed to Close mapping for file: " + filename_, lastError); } hMap_ = NULL; } if (hFile_ != NULL) { TruncateFile(targetSize); BOOL ret = ::CloseHandle(hFile_); hFile_ = NULL; if (!ret && s.ok()) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "Failed to close file map handle: " + filename_, lastError); } } return s; } Status WinMmapFile::Flush() { return Status::OK(); } // Flush only data Status WinMmapFile::Sync() { Status s; // Some writes occurred since last sync if (dst_ > last_sync_) { assert(mapped_begin_); assert(dst_); assert(dst_ > mapped_begin_); assert(dst_ < mapped_end_); size_t page_begin = TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_); size_t page_end = TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1); // Flush only the amount of that is a multiple of pages if (!::FlushViewOfFile(mapped_begin_ + page_begin, (page_end - page_begin) + page_size_)) { s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_, GetLastError()); } else { last_sync_ = dst_; } } return s; } /** * Flush data as well as metadata to stable storage. */ Status WinMmapFile::Fsync() { Status s = Sync(); // Flush metadata if (s.ok() && pending_sync_) { if (!::FlushFileBuffers(hFile_)) { s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_, GetLastError()); } pending_sync_ = false; } return s; } /** * Get the size of valid data in the file. This will not match the * size that is returned from the filesystem because we use mmap * to extend file by map_size every time. */ uint64_t WinMmapFile::GetFileSize() { size_t used = dst_ - mapped_begin_; return file_offset_ + used; } Status WinMmapFile::InvalidateCache(size_t offset, size_t length) { return Status::OK(); } Status WinMmapFile::Allocate(uint64_t offset, uint64_t len) { Status status; TEST_KILL_RANDOM("WinMmapFile::Allocate", rocksdb_kill_odds); // Make sure that we reserve an aligned amount of space // since the reservation block size is driven outside so we want // to check if we are ok with reservation here size_t spaceToReserve = Roundup(static_cast(offset + len), view_size_); // Nothing to do if (spaceToReserve <= reserved_size_) { return status; } IOSTATS_TIMER_GUARD(allocate_nanos); status = PreallocateInternal(spaceToReserve); if (status.ok()) { reserved_size_ = spaceToReserve; } return status; } size_t WinMmapFile::GetUniqueId(char* id, size_t max_size) const { return GetUniqueIdFromFile(hFile_, id, max_size); } ////////////////////////////////////////////////////////////////////////////////// // WinSequentialFile WinSequentialFile::WinSequentialFile(const std::string& fname, HANDLE f, const EnvOptions& options) : WinFileData(fname, f, options.use_direct_reads) {} WinSequentialFile::~WinSequentialFile() { assert(hFile_ != INVALID_HANDLE_VALUE); } Status WinSequentialFile::Read(size_t n, Slice* result, char* scratch) { Status s; size_t r = 0; assert(result != nullptr); if (WinFileData::use_direct_io()) { return Status::NotSupported("Read() does not support direct_io"); } // Windows ReadFile API accepts a DWORD. // While it is possible to read in a loop if n is too big // it is an unlikely case. if (n > std::numeric_limits::max()) { return Status::InvalidArgument("n is too big for a single ReadFile: " + filename_); } DWORD bytesToRead = static_cast(n); //cast is safe due to the check above DWORD bytesRead = 0; BOOL ret = ReadFile(hFile_, scratch, bytesToRead, &bytesRead, NULL); if (ret != FALSE) { r = bytesRead; } else { auto lastError = GetLastError(); if (lastError != ERROR_HANDLE_EOF) { s = IOErrorFromWindowsError("ReadFile failed: " + filename_, lastError); } } *result = Slice(scratch, r); return s; } Status WinSequentialFile::PositionedReadInternal(char* src, size_t numBytes, uint64_t offset, size_t& bytes_read) const { return pread(this, src, numBytes, offset, bytes_read); } Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* result, char* scratch) { Status s; if (!WinFileData::use_direct_io()) { return Status::NotSupported("This function is only used for direct_io"); } if (!IsSectorAligned(static_cast(offset)) || !IsSectorAligned(n)) { return Status::InvalidArgument( "WinSequentialFile::PositionedRead: offset is not properly aligned"); } size_t bytes_read = 0; // out param s = PositionedReadInternal(scratch, static_cast(n), offset, bytes_read); *result = Slice(scratch, bytes_read); return s; } Status WinSequentialFile::Skip(uint64_t n) { // Can't handle more than signed max as SetFilePointerEx accepts a signed 64-bit // integer. As such it is a highly unlikley case to have n so large. if (n > static_cast(std::numeric_limits::max())) { return Status::InvalidArgument("n is too large for a single SetFilePointerEx() call" + filename_); } LARGE_INTEGER li; li.QuadPart = static_cast(n); //cast is safe due to the check above BOOL ret = SetFilePointerEx(hFile_, li, NULL, FILE_CURRENT); if (ret == FALSE) { auto lastError = GetLastError(); return IOErrorFromWindowsError("Skip SetFilePointerEx():" + filename_, lastError); } return Status::OK(); } Status WinSequentialFile::InvalidateCache(size_t offset, size_t length) { return Status::OK(); } ////////////////////////////////////////////////////////////////////////////////////////////////// /// WinRandomAccessBase inline Status WinRandomAccessImpl::PositionedReadInternal(char* src, size_t numBytes, uint64_t offset, size_t& bytes_read) const { return pread(file_base_, src, numBytes, offset, bytes_read); } inline WinRandomAccessImpl::WinRandomAccessImpl(WinFileData* file_base, size_t alignment, const EnvOptions& options) : file_base_(file_base), alignment_(alignment) { assert(!options.use_mmap_reads); } inline Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result, char* scratch) const { Status s; // Check buffer alignment if (file_base_->use_direct_io()) { if (!IsSectorAligned(static_cast(offset)) || !IsAligned(alignment_, scratch)) { return Status::InvalidArgument( "WinRandomAccessImpl::ReadImpl: offset or scratch is not properly aligned"); } } if (n == 0) { *result = Slice(scratch, 0); return s; } size_t bytes_read = 0; s = PositionedReadInternal(scratch, n, offset, bytes_read); *result = Slice(scratch, bytes_read); return s; } /////////////////////////////////////////////////////////////////////////////////////////////////// /// WinRandomAccessFile WinRandomAccessFile::WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, const EnvOptions& options) : WinFileData(fname, hFile, options.use_direct_reads), WinRandomAccessImpl(this, alignment, options) {} WinRandomAccessFile::~WinRandomAccessFile() { } Status WinRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { return ReadImpl(offset, n, result, scratch); } Status WinRandomAccessFile::InvalidateCache(size_t offset, size_t length) { return Status::OK(); } size_t WinRandomAccessFile::GetUniqueId(char* id, size_t max_size) const { return GetUniqueIdFromFile(GetFileHandle(), id, max_size); } size_t WinRandomAccessFile::GetRequiredBufferAlignment() const { return GetAlignment(); } ///////////////////////////////////////////////////////////////////////////// // WinWritableImpl // inline Status WinWritableImpl::PreallocateInternal(uint64_t spaceToReserve) { return fallocate(file_data_->GetName(), file_data_->GetFileHandle(), spaceToReserve); } inline WinWritableImpl::WinWritableImpl(WinFileData* file_data, size_t alignment) : file_data_(file_data), alignment_(alignment), next_write_offset_(0), reservedsize_(0) { // Query current position in case ReopenWritableFile is called // This position is only important for buffered writes // for unbuffered writes we explicitely specify the position. LARGE_INTEGER zero_move; zero_move.QuadPart = 0; // Do not move LARGE_INTEGER pos; pos.QuadPart = 0; BOOL ret = SetFilePointerEx(file_data_->GetFileHandle(), zero_move, &pos, FILE_CURRENT); // Querying no supped to fail if (ret != 0) { next_write_offset_ = pos.QuadPart; } else { assert(false); } } inline Status WinWritableImpl::AppendImpl(const Slice& data) { Status s; if (data.size() > std::numeric_limits::max()) { return Status::InvalidArgument("data is too long for a single write" + file_data_->GetName()); } size_t bytes_written = 0; // out param if (file_data_->use_direct_io()) { // With no offset specified we are appending // to the end of the file assert(IsSectorAligned(next_write_offset_)); if (!IsSectorAligned(data.size()) || !IsAligned(static_cast(GetAlignement()), data.data())) { s = Status::InvalidArgument( "WriteData must be page aligned, size must be sector aligned"); } else { s = pwrite(file_data_, data, next_write_offset_, bytes_written); } } else { DWORD bytesWritten = 0; if (!WriteFile(file_data_->GetFileHandle(), data.data(), static_cast(data.size()), &bytesWritten, NULL)) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "Failed to WriteFile: " + file_data_->GetName(), lastError); } else { bytes_written = bytesWritten; } } if(s.ok()) { if (bytes_written == data.size()) { // This matters for direct_io cases where // we rely on the fact that next_write_offset_ // is sector aligned next_write_offset_ += bytes_written; } else { s = Status::IOError("Failed to write all bytes: " + file_data_->GetName()); } } return s; } inline Status WinWritableImpl::PositionedAppendImpl(const Slice& data, uint64_t offset) { if(file_data_->use_direct_io()) { if (!IsSectorAligned(static_cast(offset)) || !IsSectorAligned(data.size()) || !IsAligned(static_cast(GetAlignement()), data.data())) { return Status::InvalidArgument( "Data and offset must be page aligned, size must be sector aligned"); } } size_t bytes_written = 0; Status s = pwrite(file_data_, data, offset, bytes_written); if(s.ok()) { if (bytes_written == data.size()) { // For sequential write this would be simple // size extension by data.size() uint64_t write_end = offset + bytes_written; if (write_end >= next_write_offset_) { next_write_offset_ = write_end; } } else { s = Status::IOError("Failed to write all of the requested data: " + file_data_->GetName()); } } return s; } inline Status WinWritableImpl::TruncateImpl(uint64_t size) { // It is tempting to check for the size for sector alignment // but truncation may come at the end and there is not a requirement // for this to be sector aligned so long as we do not attempt to write // after that. The interface docs state that the behavior is undefined // in that case. Status s = ftruncate(file_data_->GetName(), file_data_->GetFileHandle(), size); if (s.ok()) { next_write_offset_ = size; } return s; } inline Status WinWritableImpl::CloseImpl() { Status s; auto hFile = file_data_->GetFileHandle(); assert(INVALID_HANDLE_VALUE != hFile); if (!::FlushFileBuffers(hFile)) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("FlushFileBuffers failed at Close() for: " + file_data_->GetName(), lastError); } if(!file_data_->CloseFile() && s.ok()) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("CloseHandle failed for: " + file_data_->GetName(), lastError); } return s; } inline Status WinWritableImpl::SyncImpl() { Status s; if (!::FlushFileBuffers (file_data_->GetFileHandle())) { auto lastError = GetLastError(); s = IOErrorFromWindowsError( "FlushFileBuffers failed at Sync() for: " + file_data_->GetName(), lastError); } return s; } inline Status WinWritableImpl::AllocateImpl(uint64_t offset, uint64_t len) { Status status; TEST_KILL_RANDOM("WinWritableFile::Allocate", rocksdb_kill_odds); // Make sure that we reserve an aligned amount of space // since the reservation block size is driven outside so we want // to check if we are ok with reservation here size_t spaceToReserve = Roundup(static_cast(offset + len), static_cast(alignment_)); // Nothing to do if (spaceToReserve <= reservedsize_) { return status; } IOSTATS_TIMER_GUARD(allocate_nanos); status = PreallocateInternal(spaceToReserve); if (status.ok()) { reservedsize_ = spaceToReserve; } return status; } //////////////////////////////////////////////////////////////////////////////// /// WinWritableFile WinWritableFile::WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment, size_t /* capacity */, const EnvOptions& options) : WinFileData(fname, hFile, options.use_direct_writes), WinWritableImpl(this, alignment), WritableFile(options) { assert(!options.use_mmap_writes); } WinWritableFile::~WinWritableFile() { } // Indicates if the class makes use of direct I/O bool WinWritableFile::use_direct_io() const { return WinFileData::use_direct_io(); } size_t WinWritableFile::GetRequiredBufferAlignment() const { return static_cast(GetAlignement()); } Status WinWritableFile::Append(const Slice& data) { return AppendImpl(data); } Status WinWritableFile::PositionedAppend(const Slice& data, uint64_t offset) { return PositionedAppendImpl(data, offset); } // Need to implement this so the file is truncated correctly // when buffered and unbuffered mode Status WinWritableFile::Truncate(uint64_t size) { return TruncateImpl(size); } Status WinWritableFile::Close() { return CloseImpl(); } // write out the cached data to the OS cache // This is now taken care of the WritableFileWriter Status WinWritableFile::Flush() { return Status::OK(); } Status WinWritableFile::Sync() { return SyncImpl(); } Status WinWritableFile::Fsync() { return SyncImpl(); } bool WinWritableFile::IsSyncThreadSafe() const { return true; } uint64_t WinWritableFile::GetFileSize() { return GetFileNextWriteOffset(); } Status WinWritableFile::Allocate(uint64_t offset, uint64_t len) { return AllocateImpl(offset, len); } size_t WinWritableFile::GetUniqueId(char* id, size_t max_size) const { return GetUniqueIdFromFile(GetFileHandle(), id, max_size); } ///////////////////////////////////////////////////////////////////////// /// WinRandomRWFile WinRandomRWFile::WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment, const EnvOptions& options) : WinFileData(fname, hFile, options.use_direct_reads && options.use_direct_writes), WinRandomAccessImpl(this, alignment, options), WinWritableImpl(this, alignment) {} bool WinRandomRWFile::use_direct_io() const { return WinFileData::use_direct_io(); } size_t WinRandomRWFile::GetRequiredBufferAlignment() const { return static_cast(GetAlignement()); } Status WinRandomRWFile::Write(uint64_t offset, const Slice & data) { return PositionedAppendImpl(data, offset); } Status WinRandomRWFile::Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { return ReadImpl(offset, n, result, scratch); } Status WinRandomRWFile::Flush() { return Status::OK(); } Status WinRandomRWFile::Sync() { return SyncImpl(); } Status WinRandomRWFile::Close() { return CloseImpl(); } ////////////////////////////////////////////////////////////////////////// /// WinMemoryMappedBufer WinMemoryMappedBuffer::~WinMemoryMappedBuffer() { BOOL ret #if defined(_MSC_VER) = FALSE; #else __attribute__((__unused__)); #endif if (base_ != nullptr) { ret = ::UnmapViewOfFile(base_); assert(ret); base_ = nullptr; } if (map_handle_ != NULL && map_handle_ != INVALID_HANDLE_VALUE) { ret = ::CloseHandle(map_handle_); assert(ret); map_handle_ = NULL; } if (file_handle_ != NULL && file_handle_ != INVALID_HANDLE_VALUE) { ret = ::CloseHandle(file_handle_); assert(ret); file_handle_ = NULL; } } ////////////////////////////////////////////////////////////////////////// /// WinDirectory Status WinDirectory::Fsync() { return Status::OK(); } size_t WinDirectory::GetUniqueId(char* id, size_t max_size) const { return GetUniqueIdFromFile(handle_, id, max_size); } ////////////////////////////////////////////////////////////////////////// /// WinFileLock WinFileLock::~WinFileLock() { BOOL ret __attribute__((__unused__)); ret = ::CloseHandle(hFile_); assert(ret); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/io_win.h000066400000000000000000000346641370372246700165000ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "rocksdb/status.h" #include "rocksdb/env.h" #include "util/aligned_buffer.h" #include namespace ROCKSDB_NAMESPACE { namespace port { std::string GetWindowsErrSz(DWORD err); inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) { return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL)) ? Status::NoSpace(context, GetWindowsErrSz(err)) : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND)) ? Status::PathNotFound(context, GetWindowsErrSz(err)) : Status::IOError(context, GetWindowsErrSz(err)); } inline Status IOErrorFromLastWindowsError(const std::string& context) { return IOErrorFromWindowsError(context, GetLastError()); } inline Status IOError(const std::string& context, int err_number) { return (err_number == ENOSPC) ? Status::NoSpace(context, strerror(err_number)) : (err_number == ENOENT) ? Status::PathNotFound(context, strerror(err_number)) : Status::IOError(context, strerror(err_number)); } class WinFileData; Status pwrite(const WinFileData* file_data, const Slice& data, uint64_t offset, size_t& bytes_written); Status pread(const WinFileData* file_data, char* src, size_t num_bytes, uint64_t offset, size_t& bytes_read); Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size); Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize); size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size); class WinFileData { protected: const std::string filename_; HANDLE hFile_; // If true, the I/O issued would be direct I/O which the buffer // will need to be aligned (not sure there is a guarantee that the buffer // passed in is aligned). const bool use_direct_io_; public: // We want this class be usable both for inheritance (prive // or protected) and for containment so __ctor and __dtor public WinFileData(const std::string& filename, HANDLE hFile, bool direct_io) : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {} virtual ~WinFileData() { this->CloseFile(); } bool CloseFile() { bool result = true; if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) { result = ::CloseHandle(hFile_); assert(result); hFile_ = NULL; } return result; } const std::string& GetName() const { return filename_; } HANDLE GetFileHandle() const { return hFile_; } bool use_direct_io() const { return use_direct_io_; } WinFileData(const WinFileData&) = delete; WinFileData& operator=(const WinFileData&) = delete; }; class WinSequentialFile : protected WinFileData, public SequentialFile { // Override for behavior change when creating a custom env virtual Status PositionedReadInternal(char* src, size_t numBytes, uint64_t offset, size_t& bytes_read) const; public: WinSequentialFile(const std::string& fname, HANDLE f, const EnvOptions& options); ~WinSequentialFile(); WinSequentialFile(const WinSequentialFile&) = delete; WinSequentialFile& operator=(const WinSequentialFile&) = delete; virtual Status Read(size_t n, Slice* result, char* scratch) override; virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result, char* scratch) override; virtual Status Skip(uint64_t n) override; virtual Status InvalidateCache(size_t offset, size_t length) override; virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } }; // mmap() based random-access class WinMmapReadableFile : private WinFileData, public RandomAccessFile { HANDLE hMap_; const void* mapped_region_; const size_t length_; public: // mapped_region_[0,length-1] contains the mmapped contents of the file. WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap, const void* mapped_region, size_t length); ~WinMmapReadableFile(); WinMmapReadableFile(const WinMmapReadableFile&) = delete; WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete; virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override; virtual Status InvalidateCache(size_t offset, size_t length) override; virtual size_t GetUniqueId(char* id, size_t max_size) const override; }; // We preallocate and use memcpy to append new // data to the file. This is safe since we either properly close the // file before reading from it, or for log files, the reading code // knows enough to skip zero suffixes. class WinMmapFile : private WinFileData, public WritableFile { private: HANDLE hMap_; const size_t page_size_; // We flush the mapping view in page_size // increments. We may decide if this is a memory // page size or SSD page size const size_t allocation_granularity_; // View must start at such a granularity size_t reserved_size_; // Preallocated size size_t mapping_size_; // The max size of the mapping object // we want to guess the final file size to minimize the remapping size_t view_size_; // How much memory to map into a view at a time char* mapped_begin_; // Must begin at the file offset that is aligned with // allocation_granularity_ char* mapped_end_; char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_]) char* last_sync_; // Where have we synced up to uint64_t file_offset_; // Offset of mapped_begin_ in file // Do we have unsynced writes? bool pending_sync_; // Can only truncate or reserve to a sector size aligned if // used on files that are opened with Unbuffered I/O Status TruncateFile(uint64_t toSize); Status UnmapCurrentRegion(); Status MapNewRegion(); virtual Status PreallocateInternal(uint64_t spaceToReserve); public: WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size, size_t allocation_granularity, const EnvOptions& options); ~WinMmapFile(); WinMmapFile(const WinMmapFile&) = delete; WinMmapFile& operator=(const WinMmapFile&) = delete; virtual Status Append(const Slice& data) override; // Means Close() will properly take care of truncate // and it does not need any additional information virtual Status Truncate(uint64_t size) override; virtual Status Close() override; virtual Status Flush() override; // Flush only data virtual Status Sync() override; /** * Flush data as well as metadata to stable storage. */ virtual Status Fsync() override; /** * Get the size of valid data in the file. This will not match the * size that is returned from the filesystem because we use mmap * to extend file by map_size every time. */ virtual uint64_t GetFileSize() override; virtual Status InvalidateCache(size_t offset, size_t length) override; virtual Status Allocate(uint64_t offset, uint64_t len) override; virtual size_t GetUniqueId(char* id, size_t max_size) const override; }; class WinRandomAccessImpl { protected: WinFileData* file_base_; size_t alignment_; // Override for behavior change when creating a custom env virtual Status PositionedReadInternal(char* src, size_t numBytes, uint64_t offset, size_t& bytes_read) const; WinRandomAccessImpl(WinFileData* file_base, size_t alignment, const EnvOptions& options); virtual ~WinRandomAccessImpl() {} Status ReadImpl(uint64_t offset, size_t n, Slice* result, char* scratch) const; size_t GetAlignment() const { return alignment_; } public: WinRandomAccessImpl(const WinRandomAccessImpl&) = delete; WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete; }; // pread() based random-access class WinRandomAccessFile : private WinFileData, protected WinRandomAccessImpl, // Want to be able to override // PositionedReadInternal public RandomAccessFile { public: WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment, const EnvOptions& options); ~WinRandomAccessFile(); virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override; virtual size_t GetUniqueId(char* id, size_t max_size) const override; virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); } virtual Status InvalidateCache(size_t offset, size_t length) override; virtual size_t GetRequiredBufferAlignment() const override; }; // This is a sequential write class. It has been mimicked (as others) after // the original Posix class. We add support for unbuffered I/O on windows as // well // we utilize the original buffer as an alignment buffer to write directly to // file with no buffering. // No buffering requires that the provided buffer is aligned to the physical // sector size (SSD page size) and // that all SetFilePointer() operations to occur with such an alignment. // We thus always write in sector/page size increments to the drive and leave // the tail for the next write OR for Close() at which point we pad with zeros. // No padding is required for // buffered access. class WinWritableImpl { protected: WinFileData* file_data_; const uint64_t alignment_; uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND uint64_t reservedsize_; // how far we have reserved space virtual Status PreallocateInternal(uint64_t spaceToReserve); WinWritableImpl(WinFileData* file_data, size_t alignment); ~WinWritableImpl() {} uint64_t GetAlignement() const { return alignment_; } Status AppendImpl(const Slice& data); // Requires that the data is aligned as specified by // GetRequiredBufferAlignment() Status PositionedAppendImpl(const Slice& data, uint64_t offset); Status TruncateImpl(uint64_t size); Status CloseImpl(); Status SyncImpl(); uint64_t GetFileNextWriteOffset() { // Double accounting now here with WritableFileWriter // and this size will be wrong when unbuffered access is used // but tests implement their own writable files and do not use // WritableFileWrapper // so we need to squeeze a square peg through // a round hole here. return next_write_offset_; } Status AllocateImpl(uint64_t offset, uint64_t len); public: WinWritableImpl(const WinWritableImpl&) = delete; WinWritableImpl& operator=(const WinWritableImpl&) = delete; }; class WinWritableFile : private WinFileData, protected WinWritableImpl, public WritableFile { public: WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment, size_t capacity, const EnvOptions& options); ~WinWritableFile(); virtual Status Append(const Slice& data) override; // Requires that the data is aligned as specified by // GetRequiredBufferAlignment() virtual Status PositionedAppend(const Slice& data, uint64_t offset) override; // Need to implement this so the file is truncated correctly // when buffered and unbuffered mode virtual Status Truncate(uint64_t size) override; virtual Status Close() override; // write out the cached data to the OS cache // This is now taken care of the WritableFileWriter virtual Status Flush() override; virtual Status Sync() override; virtual Status Fsync() override; virtual bool IsSyncThreadSafe() const override; // Indicates if the class makes use of direct I/O // Use PositionedAppend virtual bool use_direct_io() const override; virtual size_t GetRequiredBufferAlignment() const override; virtual uint64_t GetFileSize() override; virtual Status Allocate(uint64_t offset, uint64_t len) override; virtual size_t GetUniqueId(char* id, size_t max_size) const override; }; class WinRandomRWFile : private WinFileData, protected WinRandomAccessImpl, protected WinWritableImpl, public RandomRWFile { public: WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment, const EnvOptions& options); ~WinRandomRWFile() {} // Indicates if the class makes use of direct I/O // If false you must pass aligned buffer to Write() virtual bool use_direct_io() const override; // Use the returned alignment value to allocate aligned // buffer for Write() when use_direct_io() returns true virtual size_t GetRequiredBufferAlignment() const override; // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. // Pass aligned buffer when use_direct_io() returns true. virtual Status Write(uint64_t offset, const Slice& data) override; // Read up to `n` bytes starting from offset `offset` and store them in // result, provided `scratch` size should be at least `n`. // Returns Status::OK() on success. virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override; virtual Status Flush() override; virtual Status Sync() override; virtual Status Fsync() { return Sync(); } virtual Status Close() override; }; class WinMemoryMappedBuffer : public MemoryMappedFileBuffer { private: HANDLE file_handle_; HANDLE map_handle_; public: WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) : MemoryMappedFileBuffer(base, size), file_handle_(file_handle), map_handle_(map_handle) {} ~WinMemoryMappedBuffer() override; }; class WinDirectory : public Directory { HANDLE handle_; public: explicit WinDirectory(HANDLE h) noexcept : handle_(h) { assert(handle_ != INVALID_HANDLE_VALUE); } ~WinDirectory() { ::CloseHandle(handle_); } virtual Status Fsync() override; size_t GetUniqueId(char* id, size_t max_size) const override; }; class WinFileLock : public FileLock { public: explicit WinFileLock(HANDLE hFile) : hFile_(hFile) { assert(hFile != NULL); assert(hFile != INVALID_HANDLE_VALUE); } ~WinFileLock(); private: HANDLE hFile_; }; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/port_win.cc000066400000000000000000000153661370372246700172110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #if !defined(OS_WIN) && !defined(WIN32) && !defined(_WIN32) #error Windows Specific Code #endif #include "port/win/port_win.h" #include #include "port/port_dirent.h" #include "port/sys_time.h" #include #include #include #include #include #include #include #ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES // utf8 <-> utf16 #include #include #include #endif #include "logging/logging.h" namespace ROCKSDB_NAMESPACE { extern const bool kDefaultToAdaptiveMutex = false; namespace port { #ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES std::string utf16_to_utf8(const std::wstring& utf16) { std::wstring_convert,wchar_t> convert; return convert.to_bytes(utf16); } std::wstring utf8_to_utf16(const std::string& utf8) { std::wstring_convert> converter; return converter.from_bytes(utf8); } #endif void gettimeofday(struct timeval* tv, struct timezone* /* tz */) { using namespace std::chrono; microseconds usNow( duration_cast(system_clock::now().time_since_epoch())); seconds secNow(duration_cast(usNow)); tv->tv_sec = static_cast(secNow.count()); tv->tv_usec = static_cast(usNow.count() - duration_cast(secNow).count()); } Mutex::~Mutex() {} CondVar::~CondVar() {} void CondVar::Wait() { // Caller must ensure that mutex is held prior to calling this method std::unique_lock lk(mu_->getLock(), std::adopt_lock); #ifndef NDEBUG mu_->locked_ = false; #endif cv_.wait(lk); #ifndef NDEBUG mu_->locked_ = true; #endif // Release ownership of the lock as we don't want it to be unlocked when // it goes out of scope (as we adopted the lock and didn't lock it ourselves) lk.release(); } bool CondVar::TimedWait(uint64_t abs_time_us) { using namespace std::chrono; // MSVC++ library implements wait_until in terms of wait_for so // we need to convert absolute wait into relative wait. microseconds usAbsTime(abs_time_us); microseconds usNow( duration_cast(system_clock::now().time_since_epoch())); microseconds relTimeUs = (usAbsTime > usNow) ? (usAbsTime - usNow) : microseconds::zero(); // Caller must ensure that mutex is held prior to calling this method std::unique_lock lk(mu_->getLock(), std::adopt_lock); #ifndef NDEBUG mu_->locked_ = false; #endif std::cv_status cvStatus = cv_.wait_for(lk, relTimeUs); #ifndef NDEBUG mu_->locked_ = true; #endif // Release ownership of the lock as we don't want it to be unlocked when // it goes out of scope (as we adopted the lock and didn't lock it ourselves) lk.release(); if (cvStatus == std::cv_status::timeout) { return true; } return false; } void CondVar::Signal() { cv_.notify_one(); } void CondVar::SignalAll() { cv_.notify_all(); } int PhysicalCoreID() { return GetCurrentProcessorNumber(); } void InitOnce(OnceType* once, void (*initializer)()) { std::call_once(once->flag_, initializer); } // Private structure, exposed only by pointer struct DIR { HANDLE handle_; bool firstread_; RX_WIN32_FIND_DATA data_; dirent entry_; DIR() : handle_(INVALID_HANDLE_VALUE), firstread_(true) {} DIR(const DIR&) = delete; DIR& operator=(const DIR&) = delete; ~DIR() { if (INVALID_HANDLE_VALUE != handle_) { ::FindClose(handle_); } } }; DIR* opendir(const char* name) { if (!name || *name == 0) { errno = ENOENT; return nullptr; } std::string pattern(name); pattern.append("\\").append("*"); std::unique_ptr dir(new DIR); dir->handle_ = RX_FindFirstFileEx(RX_FN(pattern).c_str(), FindExInfoBasic, // Do not want alternative name &dir->data_, FindExSearchNameMatch, NULL, // lpSearchFilter 0); if (dir->handle_ == INVALID_HANDLE_VALUE) { return nullptr; } RX_FILESTRING x(dir->data_.cFileName, RX_FNLEN(dir->data_.cFileName)); strcpy_s(dir->entry_.d_name, sizeof(dir->entry_.d_name), FN_TO_RX(x).c_str()); return dir.release(); } struct dirent* readdir(DIR* dirp) { if (!dirp || dirp->handle_ == INVALID_HANDLE_VALUE) { errno = EBADF; return nullptr; } if (dirp->firstread_) { dirp->firstread_ = false; return &dirp->entry_; } auto ret = RX_FindNextFile(dirp->handle_, &dirp->data_); if (ret == 0) { return nullptr; } RX_FILESTRING x(dirp->data_.cFileName, RX_FNLEN(dirp->data_.cFileName)); strcpy_s(dirp->entry_.d_name, sizeof(dirp->entry_.d_name), FN_TO_RX(x).c_str()); return &dirp->entry_; } int closedir(DIR* dirp) { delete dirp; return 0; } int truncate(const char* path, int64_t length) { if (path == nullptr) { errno = EFAULT; return -1; } return ROCKSDB_NAMESPACE::port::Truncate(path, length); } int Truncate(std::string path, int64_t len) { if (len < 0) { errno = EINVAL; return -1; } HANDLE hFile = RX_CreateFile(RX_FN(path).c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, // Security attrs OPEN_EXISTING, // Truncate existing file only FILE_ATTRIBUTE_NORMAL, NULL); if (INVALID_HANDLE_VALUE == hFile) { auto lastError = GetLastError(); if (lastError == ERROR_FILE_NOT_FOUND) { errno = ENOENT; } else if (lastError == ERROR_ACCESS_DENIED) { errno = EACCES; } else { errno = EIO; } return -1; } int result = 0; FILE_END_OF_FILE_INFO end_of_file; end_of_file.EndOfFile.QuadPart = len; if (!SetFileInformationByHandle(hFile, FileEndOfFileInfo, &end_of_file, sizeof(FILE_END_OF_FILE_INFO))) { errno = EIO; result = -1; } CloseHandle(hFile); return result; } void Crash(const std::string& srcfile, int srcline) { fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline); fflush(stdout); abort(); } int GetMaxOpenFiles() { return -1; } // Assume 4KB page size const size_t kPageSize = 4U * 1024U; void SetCpuPriority(ThreadId id, CpuPriority priority) { // Not supported (void)id; (void)priority; } } // namespace port } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/port_win.h000066400000000000000000000231501370372246700170410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // See port_example.h for documentation for the following types/functions. #pragma once // Always want minimum headers #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #include #include #include #include #include #include #include #include #include #include #include "port/win/win_thread.h" #include "rocksdb/options.h" #undef min #undef max #undef DeleteFile #undef GetCurrentTime #ifndef strcasecmp #define strcasecmp _stricmp #endif #undef GetCurrentTime #undef DeleteFile #ifndef _SSIZE_T_DEFINED typedef SSIZE_T ssize_t; #endif // size_t printf formatting named in the manner of C99 standard formatting // strings such as PRIu64 // in fact, we could use that one #ifndef ROCKSDB_PRIszt #define ROCKSDB_PRIszt "Iu" #endif #ifdef _MSC_VER #define __attribute__(A) // Thread local storage on Linux // There is thread_local in C++11 #ifndef __thread #define __thread __declspec(thread) #endif #endif namespace ROCKSDB_NAMESPACE { #define PREFETCH(addr, rw, locality) extern const bool kDefaultToAdaptiveMutex; namespace port { // VS < 2015 #if defined(_MSC_VER) && (_MSC_VER < 1900) // VS 15 has snprintf #define snprintf _snprintf #define ROCKSDB_NOEXCEPT // std::numeric_limits::max() is not constexpr just yet // therefore, use the same limits // For use at db/file_indexer.h kLevelMaxIndex const uint32_t kMaxUint32 = UINT32_MAX; const int kMaxInt32 = INT32_MAX; const int kMinInt32 = INT32_MIN; const int64_t kMaxInt64 = INT64_MAX; const int64_t kMinInt64 = INT64_MIN; const uint64_t kMaxUint64 = UINT64_MAX; #ifdef _WIN64 const size_t kMaxSizet = UINT64_MAX; #else const size_t kMaxSizet = UINT_MAX; #endif #else // VS >= 2015 or MinGW #define ROCKSDB_NOEXCEPT noexcept // For use at db/file_indexer.h kLevelMaxIndex const uint32_t kMaxUint32 = std::numeric_limits::max(); const int kMaxInt32 = std::numeric_limits::max(); const int kMinInt32 = std::numeric_limits::min(); const uint64_t kMaxUint64 = std::numeric_limits::max(); const int64_t kMaxInt64 = std::numeric_limits::max(); const int64_t kMinInt64 = std::numeric_limits::min(); const size_t kMaxSizet = std::numeric_limits::max(); #endif //_MSC_VER // "Windows is designed to run on little-endian computer architectures." // https://docs.microsoft.com/en-us/windows/win32/sysinfo/registry-value-types constexpr bool kLittleEndian = true; #undef PLATFORM_IS_LITTLE_ENDIAN class CondVar; class Mutex { public: /* implicit */ Mutex(bool adaptive = kDefaultToAdaptiveMutex) #ifndef NDEBUG : locked_(false) #endif { } ~Mutex(); void Lock() { mutex_.lock(); #ifndef NDEBUG locked_ = true; #endif } void Unlock() { #ifndef NDEBUG locked_ = false; #endif mutex_.unlock(); } // this will assert if the mutex is not locked // it does NOT verify that mutex is held by a calling thread void AssertHeld() { #ifndef NDEBUG assert(locked_); #endif } // Mutex is move only with lock ownership transfer Mutex(const Mutex&) = delete; void operator=(const Mutex&) = delete; private: friend class CondVar; std::mutex& getLock() { return mutex_; } std::mutex mutex_; #ifndef NDEBUG bool locked_; #endif }; class RWMutex { public: RWMutex() { InitializeSRWLock(&srwLock_); } // No copying allowed RWMutex(const RWMutex&) = delete; void operator=(const RWMutex&) = delete; void ReadLock() { AcquireSRWLockShared(&srwLock_); } void WriteLock() { AcquireSRWLockExclusive(&srwLock_); } void ReadUnlock() { ReleaseSRWLockShared(&srwLock_); } void WriteUnlock() { ReleaseSRWLockExclusive(&srwLock_); } // Empty as in POSIX void AssertHeld() {} private: SRWLOCK srwLock_; }; class CondVar { public: explicit CondVar(Mutex* mu) : mu_(mu) { } ~CondVar(); void Wait(); bool TimedWait(uint64_t expiration_time); void Signal(); void SignalAll(); // Condition var is not copy/move constructible CondVar(const CondVar&) = delete; CondVar& operator=(const CondVar&) = delete; CondVar(CondVar&&) = delete; CondVar& operator=(CondVar&&) = delete; private: std::condition_variable cv_; Mutex* mu_; }; #ifdef _POSIX_THREADS using Thread = std::thread; #else // Wrapper around the platform efficient // or otherwise preferrable implementation using Thread = WindowsThread; #endif // OnceInit type helps emulate // Posix semantics with initialization // adopted in the project struct OnceType { struct Init {}; OnceType() {} OnceType(const Init&) {} OnceType(const OnceType&) = delete; OnceType& operator=(const OnceType&) = delete; std::once_flag flag_; }; #define LEVELDB_ONCE_INIT port::OnceType::Init() extern void InitOnce(OnceType* once, void (*initializer)()); #ifndef CACHE_LINE_SIZE #define CACHE_LINE_SIZE 64U #endif #ifdef ROCKSDB_JEMALLOC // Separate inlines so they can be replaced if needed void* jemalloc_aligned_alloc(size_t size, size_t alignment) ROCKSDB_NOEXCEPT; void jemalloc_aligned_free(void* p) ROCKSDB_NOEXCEPT; #endif inline void *cacheline_aligned_alloc(size_t size) { #ifdef ROCKSDB_JEMALLOC return jemalloc_aligned_alloc(size, CACHE_LINE_SIZE); #else return _aligned_malloc(size, CACHE_LINE_SIZE); #endif } inline void cacheline_aligned_free(void *memblock) { #ifdef ROCKSDB_JEMALLOC jemalloc_aligned_free(memblock); #else _aligned_free(memblock); #endif } extern const size_t kPageSize; // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52991 for MINGW32 // could not be worked around with by -mno-ms-bitfields #ifndef __MINGW32__ #define ALIGN_AS(n) __declspec(align(n)) #else #define ALIGN_AS(n) #endif static inline void AsmVolatilePause() { #if defined(_M_IX86) || defined(_M_X64) YieldProcessor(); #endif // it would be nice to get "wfe" on ARM here } extern int PhysicalCoreID(); // For Thread Local Storage abstraction typedef DWORD pthread_key_t; inline int pthread_key_create(pthread_key_t* key, void (*destructor)(void*)) { // Not used (void)destructor; pthread_key_t k = TlsAlloc(); if (TLS_OUT_OF_INDEXES == k) { return ENOMEM; } *key = k; return 0; } inline int pthread_key_delete(pthread_key_t key) { if (!TlsFree(key)) { return EINVAL; } return 0; } inline int pthread_setspecific(pthread_key_t key, const void* value) { if (!TlsSetValue(key, const_cast(value))) { return ENOMEM; } return 0; } inline void* pthread_getspecific(pthread_key_t key) { void* result = TlsGetValue(key); if (!result) { if (GetLastError() != ERROR_SUCCESS) { errno = EINVAL; } else { errno = NOERROR; } } return result; } // UNIX equiv although errno numbers will be off // using C-runtime to implement. Note, this does not // feel space with zeros in case the file is extended. int truncate(const char* path, int64_t length); int Truncate(std::string path, int64_t length); void Crash(const std::string& srcfile, int srcline); extern int GetMaxOpenFiles(); std::string utf16_to_utf8(const std::wstring& utf16); std::wstring utf8_to_utf16(const std::string& utf8); using ThreadId = int; extern void SetCpuPriority(ThreadId id, CpuPriority priority); } // namespace port #ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES #define RX_FILESTRING std::wstring #define RX_FN(a) ROCKSDB_NAMESPACE::port::utf8_to_utf16(a) #define FN_TO_RX(a) ROCKSDB_NAMESPACE::port::utf16_to_utf8(a) #define RX_FNLEN(a) ::wcslen(a) #define RX_DeleteFile DeleteFileW #define RX_CreateFile CreateFileW #define RX_CreateFileMapping CreateFileMappingW #define RX_GetFileAttributesEx GetFileAttributesExW #define RX_FindFirstFileEx FindFirstFileExW #define RX_FindNextFile FindNextFileW #define RX_WIN32_FIND_DATA WIN32_FIND_DATAW #define RX_CreateDirectory CreateDirectoryW #define RX_RemoveDirectory RemoveDirectoryW #define RX_GetFileAttributesEx GetFileAttributesExW #define RX_MoveFileEx MoveFileExW #define RX_CreateHardLink CreateHardLinkW #define RX_PathIsRelative PathIsRelativeW #define RX_GetCurrentDirectory GetCurrentDirectoryW #define RX_GetDiskFreeSpaceEx GetDiskFreeSpaceExW #define RX_PathIsDirectory PathIsDirectoryW #else #define RX_FILESTRING std::string #define RX_FN(a) a #define FN_TO_RX(a) a #define RX_FNLEN(a) strlen(a) #define RX_DeleteFile DeleteFileA #define RX_CreateFile CreateFileA #define RX_CreateFileMapping CreateFileMappingA #define RX_GetFileAttributesEx GetFileAttributesExA #define RX_FindFirstFileEx FindFirstFileExA #define RX_CreateDirectory CreateDirectoryA #define RX_FindNextFile FindNextFileA #define RX_WIN32_FIND_DATA WIN32_FIND_DATA #define RX_CreateDirectory CreateDirectoryA #define RX_RemoveDirectory RemoveDirectoryA #define RX_GetFileAttributesEx GetFileAttributesExA #define RX_MoveFileEx MoveFileExA #define RX_CreateHardLink CreateHardLinkA #define RX_PathIsRelative PathIsRelativeA #define RX_GetCurrentDirectory GetCurrentDirectoryA #define RX_GetDiskFreeSpaceEx GetDiskFreeSpaceExA #define RX_PathIsDirectory PathIsDirectoryA #endif using port::pthread_key_t; using port::pthread_key_create; using port::pthread_key_delete; using port::pthread_setspecific; using port::pthread_getspecific; using port::truncate; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/win_jemalloc.cc000066400000000000000000000037611370372246700200070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_JEMALLOC # error This file can only be part of jemalloc aware build #endif #include #include "jemalloc/jemalloc.h" #include "port/win/port_win.h" #if defined(ZSTD) && defined(ZSTD_STATIC_LINKING_ONLY) #include #if (ZSTD_VERSION_NUMBER >= 500) namespace ROCKSDB_NAMESPACE { namespace port { void* JemallocAllocateForZSTD(void* /* opaque */, size_t size) { return je_malloc(size); } void JemallocDeallocateForZSTD(void* /* opaque */, void* address) { je_free(address); } ZSTD_customMem GetJeZstdAllocationOverrides() { return {JemallocAllocateForZSTD, JemallocDeallocateForZSTD, nullptr}; } } // namespace port } // namespace ROCKSDB_NAMESPACE #endif // (ZSTD_VERSION_NUMBER >= 500) #endif // defined(ZSTD) defined(ZSTD_STATIC_LINKING_ONLY) // Global operators to be replaced by a linker when this file is // a part of the build namespace ROCKSDB_NAMESPACE { namespace port { void* jemalloc_aligned_alloc(size_t size, size_t alignment) ROCKSDB_NOEXCEPT { return je_aligned_alloc(alignment, size); } void jemalloc_aligned_free(void* p) ROCKSDB_NOEXCEPT { je_free(p); } } // namespace port } // namespace ROCKSDB_NAMESPACE void* operator new(size_t size) { void* p = je_malloc(size); if (!p) { throw std::bad_alloc(); } return p; } void* operator new[](size_t size) { void* p = je_malloc(size); if (!p) { throw std::bad_alloc(); } return p; } void operator delete(void* p) { if (p) { je_free(p); } } void operator delete[](void* p) { if (p) { je_free(p); } } rocksdb-6.11.4/port/win/win_logger.cc000066400000000000000000000123341370372246700174740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Logger implementation that can be shared by all environments // where enough posix functionality is available. #include "port/win/win_logger.h" #include "port/win/io_win.h" #include #include #include #include #include #include "rocksdb/env.h" #include "monitoring/iostats_context_imp.h" #include "port/sys_time.h" namespace ROCKSDB_NAMESPACE { namespace port { WinLogger::WinLogger(uint64_t (*gettid)(), Env* env, HANDLE file, const InfoLogLevel log_level) : Logger(log_level), file_(file), gettid_(gettid), log_size_(0), last_flush_micros_(0), env_(env), flush_pending_(false) { assert(file_ != NULL); assert(file_ != INVALID_HANDLE_VALUE); } void WinLogger::DebugWriter(const char* str, int len) { assert(file_ != INVALID_HANDLE_VALUE); DWORD bytesWritten = 0; BOOL ret = WriteFile(file_, str, len, &bytesWritten, NULL); if (ret == FALSE) { std::string errSz = GetWindowsErrSz(GetLastError()); fprintf(stderr, errSz.c_str()); } } WinLogger::~WinLogger() { CloseInternal(); } Status WinLogger::CloseImpl() { return CloseInternal(); } Status WinLogger::CloseInternal() { Status s; if (INVALID_HANDLE_VALUE != file_) { BOOL ret = FlushFileBuffers(file_); if (ret == 0) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Failed to flush LOG on Close() ", lastError); } ret = CloseHandle(file_); // On error the return value is zero if (ret == 0 && s.ok()) { auto lastError = GetLastError(); s = IOErrorFromWindowsError("Failed to flush LOG on Close() ", lastError); } file_ = INVALID_HANDLE_VALUE; closed_ = true; } return s; } void WinLogger::Flush() { assert(file_ != INVALID_HANDLE_VALUE); if (flush_pending_) { flush_pending_ = false; // With Windows API writes go to OS buffers directly so no fflush needed // unlike with C runtime API. We don't flush all the way to disk // for perf reasons. } last_flush_micros_ = env_->NowMicros(); } void WinLogger::Logv(const char* format, va_list ap) { IOSTATS_TIMER_GUARD(logger_nanos); assert(file_ != INVALID_HANDLE_VALUE); const uint64_t thread_id = (*gettid_)(); // We try twice: the first time with a fixed-size stack allocated buffer, // and the second time with a much larger dynamically allocated buffer. char buffer[500]; std::unique_ptr largeBuffer; for (int iter = 0; iter < 2; ++iter) { char* base; int bufsize; if (iter == 0) { bufsize = sizeof(buffer); base = buffer; } else { bufsize = 30000; largeBuffer.reset(new char[bufsize]); base = largeBuffer.get(); } char* p = base; char* limit = base + bufsize; struct timeval now_tv; gettimeofday(&now_tv, nullptr); const time_t seconds = now_tv.tv_sec; struct tm t; localtime_s(&t, &seconds); p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(now_tv.tv_usec), static_cast(thread_id)); // Print the message if (p < limit) { va_list backup_ap; va_copy(backup_ap, ap); int done = vsnprintf(p, limit - p, format, backup_ap); if (done > 0) { p += done; } else { continue; } va_end(backup_ap); } // Truncate to available space if necessary if (p >= limit) { if (iter == 0) { continue; // Try again with larger buffer } else { p = limit - 1; } } // Add newline if necessary if (p == base || p[-1] != '\n') { *p++ = '\n'; } assert(p <= limit); const size_t write_size = p - base; DWORD bytesWritten = 0; BOOL ret = WriteFile(file_, base, static_cast(write_size), &bytesWritten, NULL); if (ret == FALSE) { std::string errSz = GetWindowsErrSz(GetLastError()); fprintf(stderr, errSz.c_str()); } flush_pending_ = true; assert((bytesWritten == write_size) || (ret == FALSE)); if (bytesWritten > 0) { log_size_ += write_size; } uint64_t now_micros = static_cast(now_tv.tv_sec) * 1000000 + now_tv.tv_usec; if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) { flush_pending_ = false; // With Windows API writes go to OS buffers directly so no fflush needed // unlike with C runtime API. We don't flush all the way to disk // for perf reasons. last_flush_micros_ = now_micros; } break; } } size_t WinLogger::GetLogFileSize() const { return log_size_; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/win_logger.h000066400000000000000000000031751370372246700173410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Logger implementation that can be shared by all environments // where enough posix functionality is available. #pragma once #include #include "rocksdb/env.h" #include #include namespace ROCKSDB_NAMESPACE { class Env; namespace port { class WinLogger : public ROCKSDB_NAMESPACE::Logger { public: WinLogger(uint64_t (*gettid)(), Env* env, HANDLE file, const InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL); virtual ~WinLogger(); WinLogger(const WinLogger&) = delete; WinLogger& operator=(const WinLogger&) = delete; void Flush() override; using ROCKSDB_NAMESPACE::Logger::Logv; void Logv(const char* format, va_list ap) override; size_t GetLogFileSize() const override; void DebugWriter(const char* str, int len); protected: Status CloseImpl() override; private: HANDLE file_; uint64_t (*gettid_)(); // Return the thread id for the current thread std::atomic_size_t log_size_; std::atomic_uint_fast64_t last_flush_micros_; Env* env_; bool flush_pending_; Status CloseInternal(); const static uint64_t flush_every_seconds_ = 5; }; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/win_thread.cc000066400000000000000000000105511370372246700174630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "port/win/win_thread.h" #include #include // __beginthreadex #include #include #include #include namespace ROCKSDB_NAMESPACE { namespace port { struct WindowsThread::Data { std::function func_; uintptr_t handle_; Data(std::function&& func) : func_(std::move(func)), handle_(0) { } Data(const Data&) = delete; Data& operator=(const Data&) = delete; static unsigned int __stdcall ThreadProc(void* arg); }; void WindowsThread::Init(std::function&& func) { data_ = std::make_shared(std::move(func)); // We create another instance of std::shared_ptr to get an additional ref // since we may detach and destroy this instance before the threadproc // may start to run. We choose to allocate this additional ref on the heap // so we do not need to synchronize and allow this thread to proceed std::unique_ptr> th_data(new std::shared_ptr(data_)); data_->handle_ = _beginthreadex(NULL, 0, // stack size &Data::ThreadProc, th_data.get(), 0, // init flag &th_id_); if (data_->handle_ == 0) { throw std::system_error(std::make_error_code( std::errc::resource_unavailable_try_again), "Unable to create a thread"); } th_data.release(); } WindowsThread::WindowsThread() : data_(nullptr), th_id_(0) {} WindowsThread::~WindowsThread() { // Must be joined or detached // before destruction. // This is the same as std::thread if (data_) { if (joinable()) { assert(false); std::terminate(); } data_.reset(); } } WindowsThread::WindowsThread(WindowsThread&& o) noexcept : WindowsThread() { *this = std::move(o); } WindowsThread& WindowsThread::operator=(WindowsThread&& o) noexcept { if (joinable()) { assert(false); std::terminate(); } data_ = std::move(o.data_); // Per spec both instances will have the same id th_id_ = o.th_id_; return *this; } bool WindowsThread::joinable() const { return (data_ && data_->handle_ != 0); } WindowsThread::native_handle_type WindowsThread::native_handle() const { return reinterpret_cast(data_->handle_); } unsigned WindowsThread::hardware_concurrency() { return std::thread::hardware_concurrency(); } void WindowsThread::join() { if (!joinable()) { assert(false); throw std::system_error( std::make_error_code(std::errc::invalid_argument), "Thread is no longer joinable"); } if (GetThreadId(GetCurrentThread()) == th_id_) { assert(false); throw std::system_error( std::make_error_code(std::errc::resource_deadlock_would_occur), "Can not join itself"); } auto ret = WaitForSingleObject(reinterpret_cast(data_->handle_), INFINITE); if (ret != WAIT_OBJECT_0) { auto lastError = GetLastError(); assert(false); throw std::system_error(static_cast(lastError), std::system_category(), "WaitForSingleObjectFailed: thread join"); } BOOL rc #if defined(_MSC_VER) = FALSE; #else __attribute__((__unused__)); #endif rc = CloseHandle(reinterpret_cast(data_->handle_)); assert(rc != 0); data_->handle_ = 0; } bool WindowsThread::detach() { if (!joinable()) { assert(false); throw std::system_error( std::make_error_code(std::errc::invalid_argument), "Thread is no longer available"); } BOOL ret = CloseHandle(reinterpret_cast(data_->handle_)); data_->handle_ = 0; return (ret != 0); } void WindowsThread::swap(WindowsThread& o) { data_.swap(o.data_); std::swap(th_id_, o.th_id_); } unsigned int __stdcall WindowsThread::Data::ThreadProc(void* arg) { auto ptr = reinterpret_cast*>(arg); std::unique_ptr> data(ptr); (*data)->func_(); return 0; } } // namespace port } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/win/win_thread.h000066400000000000000000000072501370372246700173270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { namespace port { // This class is a replacement for std::thread // 2 reasons we do not like std::thread: // -- is that it dynamically allocates its internals that are automatically // freed when the thread terminates and not on the destruction of the // object. This makes it difficult to control the source of memory // allocation // - This implements Pimpl so we can easily replace the guts of the // object in our private version if necessary. class WindowsThread { struct Data; std::shared_ptr data_; unsigned int th_id_; void Init(std::function&&); public: typedef void* native_handle_type; // Construct with no thread WindowsThread(); // Template constructor // // This templated constructor accomplishes several things // // - Allows the class as whole to be not a template // // - take "universal" references to support both _lvalues and _rvalues // // - because this constructor is a catchall case in many respects it // may prevent us from using both the default __ctor, the move __ctor. // Also it may circumvent copy __ctor deletion. To work around this // we make sure this one has at least one argument and eliminate // it from the overload selection when WindowsThread is the first // argument. // // - construct with Fx(Ax...) with a variable number of types/arguments. // // - Gathers together the callable object with its arguments and constructs // a single callable entity // // - Makes use of std::function to convert it to a specification-template // dependent type that both checks the signature conformance to ensure // that all of the necessary arguments are provided and allows pimpl // implementation. template::type, WindowsThread>::value>::type> explicit WindowsThread(Fn&& fx, Args&&... ax) : WindowsThread() { // Use binder to create a single callable entity auto binder = std::bind(std::forward(fx), std::forward(ax)...); // Use std::function to take advantage of the type erasure // so we can still hide implementation within pimpl // This also makes sure that the binder signature is compliant std::function target = binder; Init(std::move(target)); } ~WindowsThread(); WindowsThread(const WindowsThread&) = delete; WindowsThread& operator=(const WindowsThread&) = delete; WindowsThread(WindowsThread&&) noexcept; WindowsThread& operator=(WindowsThread&&) noexcept; bool joinable() const; unsigned int get_id() const { return th_id_; } native_handle_type native_handle() const; static unsigned hardware_concurrency(); void join(); bool detach(); void swap(WindowsThread&); }; } // namespace port } // namespace ROCKSDB_NAMESPACE namespace std { inline void swap(ROCKSDB_NAMESPACE::port::WindowsThread& th1, ROCKSDB_NAMESPACE::port::WindowsThread& th2) { th1.swap(th2); } } // namespace std rocksdb-6.11.4/port/win/xpress_win.cc000066400000000000000000000133011370372246700175340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "port/win/xpress_win.h" #include #include #include #include #include #ifdef XPRESS // Put this under ifdef so windows systems w/o this // can still build #include namespace ROCKSDB_NAMESPACE { namespace port { namespace xpress { // Helpers namespace { auto CloseCompressorFun = [](void* h) { if (NULL != h) { ::CloseCompressor(reinterpret_cast(h)); } }; auto CloseDecompressorFun = [](void* h) { if (NULL != h) { ::CloseDecompressor(reinterpret_cast(h)); } }; } bool Compress(const char* input, size_t length, std::string* output) { assert(input != nullptr); assert(output != nullptr); if (length == 0) { output->clear(); return true; } COMPRESS_ALLOCATION_ROUTINES* allocRoutinesPtr = nullptr; COMPRESSOR_HANDLE compressor = NULL; BOOL success = CreateCompressor( COMPRESS_ALGORITHM_XPRESS, // Compression Algorithm allocRoutinesPtr, // Optional allocation routine &compressor); // Handle if (!success) { #ifdef _DEBUG std::cerr << "XPRESS: Failed to create Compressor LastError: " << GetLastError() << std::endl; #endif return false; } std::unique_ptr compressorGuard(compressor, CloseCompressorFun); SIZE_T compressedBufferSize = 0; // Query compressed buffer size. success = ::Compress( compressor, // Compressor Handle const_cast(input), // Input buffer length, // Uncompressed data size NULL, // Compressed Buffer 0, // Compressed Buffer size &compressedBufferSize); // Compressed Data size if (!success) { auto lastError = GetLastError(); if (lastError != ERROR_INSUFFICIENT_BUFFER) { #ifdef _DEBUG std::cerr << "XPRESS: Failed to estimate compressed buffer size LastError " << lastError << std::endl; #endif return false; } } assert(compressedBufferSize > 0); std::string result; result.resize(compressedBufferSize); SIZE_T compressedDataSize = 0; // Compress success = ::Compress( compressor, // Compressor Handle const_cast(input), // Input buffer length, // Uncompressed data size &result[0], // Compressed Buffer compressedBufferSize, // Compressed Buffer size &compressedDataSize); // Compressed Data size if (!success) { #ifdef _DEBUG std::cerr << "XPRESS: Failed to compress LastError " << GetLastError() << std::endl; #endif return false; } result.resize(compressedDataSize); output->swap(result); return true; } char* Decompress(const char* input_data, size_t input_length, int* decompress_size) { assert(input_data != nullptr); assert(decompress_size != nullptr); if (input_length == 0) { return nullptr; } COMPRESS_ALLOCATION_ROUTINES* allocRoutinesPtr = nullptr; DECOMPRESSOR_HANDLE decompressor = NULL; BOOL success = CreateDecompressor( COMPRESS_ALGORITHM_XPRESS, // Compression Algorithm allocRoutinesPtr, // Optional allocation routine &decompressor); // Handle if (!success) { #ifdef _DEBUG std::cerr << "XPRESS: Failed to create Decompressor LastError " << GetLastError() << std::endl; #endif return nullptr; } std::unique_ptr compressorGuard(decompressor, CloseDecompressorFun); SIZE_T decompressedBufferSize = 0; success = ::Decompress( decompressor, // Compressor Handle const_cast(input_data), // Compressed data input_length, // Compressed data size NULL, // Buffer set to NULL 0, // Buffer size set to 0 &decompressedBufferSize); // Decompressed Data size if (!success) { auto lastError = GetLastError(); if (lastError != ERROR_INSUFFICIENT_BUFFER) { #ifdef _DEBUG std::cerr << "XPRESS: Failed to estimate decompressed buffer size LastError " << lastError << std::endl; #endif return nullptr; } } assert(decompressedBufferSize > 0); // On Windows we are limited to a 32-bit int for the // output data size argument // so we hopefully never get here if (decompressedBufferSize > std::numeric_limits::max()) { assert(false); return nullptr; } // The callers are deallocating using delete[] // thus we must allocate with new[] std::unique_ptr outputBuffer(new char[decompressedBufferSize]); SIZE_T decompressedDataSize = 0; success = ::Decompress( decompressor, const_cast(input_data), input_length, outputBuffer.get(), decompressedBufferSize, &decompressedDataSize); if (!success) { #ifdef _DEBUG std::cerr << "XPRESS: Failed to decompress LastError " << GetLastError() << std::endl; #endif return nullptr; } *decompress_size = static_cast(decompressedDataSize); // Return the raw buffer to the caller supporting the tradition return outputBuffer.release(); } } } } // namespace ROCKSDB_NAMESPACE #endif rocksdb-6.11.4/port/win/xpress_win.h000066400000000000000000000015021370372246700173760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { namespace port { namespace xpress { bool Compress(const char* input, size_t length, std::string* output); char* Decompress(const char* input_data, size_t input_length, int* decompress_size); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/port/xpress.h000066400000000000000000000012571370372246700157330ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once // Xpress on Windows is implemeted using Win API #if defined(ROCKSDB_PLATFORM_POSIX) #error "Xpress compression not implemented" #elif defined(OS_WIN) #include "port/win/xpress_win.h" #endif rocksdb-6.11.4/src.mk000066400000000000000000001071141370372246700143710ustar00rootroot00000000000000# These are the sources from which librocksdb.a is built: LIB_SOURCES = \ cache/cache.cc \ cache/clock_cache.cc \ cache/lru_cache.cc \ cache/sharded_cache.cc \ db/arena_wrapped_db_iter.cc \ db/blob/blob_file_addition.cc \ db/blob/blob_file_garbage.cc \ db/blob/blob_file_meta.cc \ db/blob/blob_log_format.cc \ db/blob/blob_log_reader.cc \ db/blob/blob_log_writer.cc \ db/builder.cc \ db/c.cc \ db/column_family.cc \ db/compacted_db_impl.cc \ db/compaction/compaction.cc \ db/compaction/compaction_iterator.cc \ db/compaction/compaction_job.cc \ db/compaction/compaction_picker.cc \ db/compaction/compaction_picker_fifo.cc \ db/compaction/compaction_picker_level.cc \ db/compaction/compaction_picker_universal.cc \ db/convenience.cc \ db/db_filesnapshot.cc \ db/db_impl/db_impl.cc \ db/db_impl/db_impl_compaction_flush.cc \ db/db_impl/db_impl_debug.cc \ db/db_impl/db_impl_experimental.cc \ db/db_impl/db_impl_files.cc \ db/db_impl/db_impl_open.cc \ db/db_impl/db_impl_readonly.cc \ db/db_impl/db_impl_secondary.cc \ db/db_impl/db_impl_write.cc \ db/db_info_dumper.cc \ db/db_iter.cc \ db/dbformat.cc \ db/error_handler.cc \ db/event_helpers.cc \ db/experimental.cc \ db/external_sst_file_ingestion_job.cc \ db/file_indexer.cc \ db/flush_job.cc \ db/flush_scheduler.cc \ db/forward_iterator.cc \ db/import_column_family_job.cc \ db/internal_stats.cc \ db/logs_with_prep_tracker.cc \ db/log_reader.cc \ db/log_writer.cc \ db/malloc_stats.cc \ db/memtable.cc \ db/memtable_list.cc \ db/merge_helper.cc \ db/merge_operator.cc \ db/range_del_aggregator.cc \ db/range_tombstone_fragmenter.cc \ db/repair.cc \ db/snapshot_impl.cc \ db/table_cache.cc \ db/table_properties_collector.cc \ db/transaction_log_impl.cc \ db/trim_history_scheduler.cc \ db/version_builder.cc \ db/version_edit.cc \ db/version_edit_handler.cc \ db/version_set.cc \ db/wal_manager.cc \ db/write_batch.cc \ db/write_batch_base.cc \ db/write_controller.cc \ db/write_thread.cc \ env/env.cc \ env/env_chroot.cc \ env/env_encryption.cc \ env/env_hdfs.cc \ env/env_posix.cc \ env/file_system.cc \ env/fs_posix.cc \ env/io_posix.cc \ env/mock_env.cc \ file/delete_scheduler.cc \ file/file_prefetch_buffer.cc \ file/file_util.cc \ file/filename.cc \ file/random_access_file_reader.cc \ file/read_write_util.cc \ file/readahead_raf.cc \ file/sequence_file_reader.cc \ file/sst_file_manager_impl.cc \ file/writable_file_writer.cc \ logging/auto_roll_logger.cc \ logging/event_logger.cc \ logging/log_buffer.cc \ memory/arena.cc \ memory/concurrent_arena.cc \ memory/jemalloc_nodump_allocator.cc \ memory/memkind_kmem_allocator.cc \ memtable/alloc_tracker.cc \ memtable/hash_linklist_rep.cc \ memtable/hash_skiplist_rep.cc \ memtable/skiplistrep.cc \ memtable/vectorrep.cc \ memtable/write_buffer_manager.cc \ monitoring/histogram.cc \ monitoring/histogram_windowing.cc \ monitoring/in_memory_stats_history.cc \ monitoring/instrumented_mutex.cc \ monitoring/iostats_context.cc \ monitoring/perf_context.cc \ monitoring/perf_level.cc \ monitoring/persistent_stats_history.cc \ monitoring/statistics.cc \ monitoring/thread_status_impl.cc \ monitoring/thread_status_updater.cc \ monitoring/thread_status_updater_debug.cc \ monitoring/thread_status_util.cc \ monitoring/thread_status_util_debug.cc \ options/cf_options.cc \ options/db_options.cc \ options/options.cc \ options/options_helper.cc \ options/options_parser.cc \ port/port_posix.cc \ port/stack_trace.cc \ table/adaptive/adaptive_table_factory.cc \ table/block_based/binary_search_index_reader.cc \ table/block_based/block.cc \ table/block_based/block_based_filter_block.cc \ table/block_based/block_based_table_builder.cc \ table/block_based/block_based_table_factory.cc \ table/block_based/block_based_table_iterator.cc \ table/block_based/block_based_table_reader.cc \ table/block_based/block_builder.cc \ table/block_based/block_prefetcher.cc \ table/block_based/block_prefix_index.cc \ table/block_based/data_block_hash_index.cc \ table/block_based/data_block_footer.cc \ table/block_based/filter_block_reader_common.cc \ table/block_based/filter_policy.cc \ table/block_based/flush_block_policy.cc \ table/block_based/full_filter_block.cc \ table/block_based/hash_index_reader.cc \ table/block_based/index_builder.cc \ table/block_based/index_reader_common.cc \ table/block_based/parsed_full_filter_block.cc \ table/block_based/partitioned_filter_block.cc \ table/block_based/partitioned_index_iterator.cc \ table/block_based/partitioned_index_reader.cc \ table/block_based/reader_common.cc \ table/block_based/uncompression_dict_reader.cc \ table/block_fetcher.cc \ table/cuckoo/cuckoo_table_builder.cc \ table/cuckoo/cuckoo_table_factory.cc \ table/cuckoo/cuckoo_table_reader.cc \ table/format.cc \ table/get_context.cc \ table/iterator.cc \ table/merging_iterator.cc \ table/meta_blocks.cc \ table/persistent_cache_helper.cc \ table/plain/plain_table_bloom.cc \ table/plain/plain_table_builder.cc \ table/plain/plain_table_factory.cc \ table/plain/plain_table_index.cc \ table/plain/plain_table_key_coding.cc \ table/plain/plain_table_reader.cc \ table/sst_file_reader.cc \ table/sst_file_writer.cc \ table/table_properties.cc \ table/two_level_iterator.cc \ test_util/sync_point.cc \ test_util/sync_point_impl.cc \ test_util/transaction_test_util.cc \ tools/dump/db_dump_tool.cc \ trace_replay/trace_replay.cc \ trace_replay/block_cache_tracer.cc \ util/build_version.cc \ util/coding.cc \ util/compaction_job_stats_impl.cc \ util/comparator.cc \ util/compression_context_cache.cc \ util/concurrent_task_limiter_impl.cc \ util/crc32c.cc \ util/dynamic_bloom.cc \ util/hash.cc \ util/murmurhash.cc \ util/random.cc \ util/rate_limiter.cc \ util/slice.cc \ util/file_checksum_helper.cc \ util/status.cc \ util/string_util.cc \ util/thread_local.cc \ util/threadpool_imp.cc \ util/xxhash.cc \ utilities/backupable/backupable_db.cc \ utilities/blob_db/blob_compaction_filter.cc \ utilities/blob_db/blob_db.cc \ utilities/blob_db/blob_db_impl.cc \ utilities/blob_db/blob_db_impl_filesnapshot.cc \ utilities/blob_db/blob_file.cc \ utilities/cassandra/cassandra_compaction_filter.cc \ utilities/cassandra/format.cc \ utilities/cassandra/merge_operator.cc \ utilities/checkpoint/checkpoint_impl.cc \ utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc \ utilities/convenience/info_log_finder.cc \ utilities/debug.cc \ utilities/env_mirror.cc \ utilities/env_timed.cc \ utilities/leveldb_options/leveldb_options.cc \ utilities/memory/memory_util.cc \ utilities/merge_operators/max.cc \ utilities/merge_operators/put.cc \ utilities/merge_operators/sortlist.cc \ utilities/merge_operators/string_append/stringappend.cc \ utilities/merge_operators/string_append/stringappend2.cc \ utilities/merge_operators/uint64add.cc \ utilities/merge_operators/bytesxor.cc \ utilities/object_registry.cc \ utilities/option_change_migration/option_change_migration.cc \ utilities/options/options_util.cc \ utilities/persistent_cache/block_cache_tier.cc \ utilities/persistent_cache/block_cache_tier_file.cc \ utilities/persistent_cache/block_cache_tier_metadata.cc \ utilities/persistent_cache/persistent_cache_tier.cc \ utilities/persistent_cache/volatile_tier_impl.cc \ utilities/simulator_cache/cache_simulator.cc \ utilities/simulator_cache/sim_cache.cc \ utilities/table_properties_collectors/compact_on_deletion_collector.cc \ utilities/trace/file_trace_reader_writer.cc \ utilities/transactions/optimistic_transaction.cc \ utilities/transactions/optimistic_transaction_db_impl.cc \ utilities/transactions/pessimistic_transaction.cc \ utilities/transactions/pessimistic_transaction_db.cc \ utilities/transactions/snapshot_checker.cc \ utilities/transactions/transaction_base.cc \ utilities/transactions/transaction_db_mutex_impl.cc \ utilities/transactions/transaction_lock_mgr.cc \ utilities/transactions/transaction_util.cc \ utilities/transactions/write_prepared_txn.cc \ utilities/transactions/write_prepared_txn_db.cc \ utilities/transactions/write_unprepared_txn.cc \ utilities/transactions/write_unprepared_txn_db.cc \ utilities/ttl/db_ttl_impl.cc \ utilities/write_batch_with_index/write_batch_with_index.cc \ utilities/write_batch_with_index/write_batch_with_index_internal.cc \ ifeq ($(ARMCRC_SOURCE),1) LIB_SOURCES +=\ util/crc32c_arm64.cc endif ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1)) LIB_SOURCES_ASM =\ util/crc32c_ppc_asm.S LIB_SOURCES_C = \ util/crc32c_ppc.c else LIB_SOURCES_ASM = LIB_SOURCES_C = endif TOOL_LIB_SOURCES = \ tools/ldb_cmd.cc \ tools/ldb_tool.cc \ tools/sst_dump_tool.cc \ utilities/blob_db/blob_dump_tool.cc \ ANALYZER_LIB_SOURCES = \ tools/block_cache_analyzer/block_cache_trace_analyzer.cc \ tools/trace_analyzer_tool.cc \ MOCK_LIB_SOURCES = \ table/mock_table.cc \ test_util/fault_injection_test_fs.cc \ test_util/fault_injection_test_env.cc BENCH_LIB_SOURCES = \ tools/db_bench_tool.cc \ STRESS_LIB_SOURCES = \ db_stress_tool/batched_ops_stress.cc \ db_stress_tool/cf_consistency_stress.cc \ db_stress_tool/db_stress_common.cc \ db_stress_tool/db_stress_driver.cc \ db_stress_tool/db_stress_test_base.cc \ db_stress_tool/db_stress_gflags.cc \ db_stress_tool/db_stress_shared_state.cc \ db_stress_tool/db_stress_tool.cc \ db_stress_tool/no_batched_ops_stress.cc \ TEST_LIB_SOURCES = \ db/db_test_util.cc \ test_util/testharness.cc \ test_util/testutil.cc \ utilities/cassandra/test_utils.cc \ FOLLY_SOURCES = \ third-party/folly/folly/detail/Futex.cpp \ third-party/folly/folly/synchronization/AtomicNotification.cpp \ third-party/folly/folly/synchronization/DistributedMutex.cpp \ third-party/folly/folly/synchronization/ParkingLot.cpp \ third-party/folly/folly/synchronization/WaitOptions.cpp \ MAIN_SOURCES = \ cache/cache_bench.cc \ cache/cache_test.cc \ db_stress_tool/db_stress.cc \ db/blob/blob_file_addition_test.cc \ db/blob/blob_file_garbage_test.cc \ db/blob/db_blob_index_test.cc \ db/column_family_test.cc \ db/compact_files_test.cc \ db/compaction/compaction_iterator_test.cc \ db/compaction/compaction_job_test.cc \ db/compaction/compaction_job_stats_test.cc \ db/compaction/compaction_picker_test.cc \ db/comparator_db_test.cc \ db/corruption_test.cc \ db/cuckoo_table_db_test.cc \ db/db_basic_test.cc \ db/db_with_timestamp_basic_test.cc \ db/db_block_cache_test.cc \ db/db_bloom_filter_test.cc \ db/db_compaction_filter_test.cc \ db/db_compaction_test.cc \ db/db_dynamic_level_test.cc \ db/db_encryption_test.cc \ db/db_flush_test.cc \ db/db_inplace_update_test.cc \ db/db_io_failure_test.cc \ db/db_iter_test.cc \ db/db_iter_stress_test.cc \ db/db_iterator_test.cc \ db/db_log_iter_test.cc \ db/db_memtable_test.cc \ db/db_merge_operator_test.cc \ db/db_merge_operand_test.cc \ db/db_options_test.cc \ db/db_properties_test.cc \ db/db_range_del_test.cc \ db/db_impl/db_secondary_test.cc \ db/db_sst_test.cc \ db/db_statistics_test.cc \ db/db_table_properties_test.cc \ db/db_tailing_iter_test.cc \ db/db_test.cc \ db/db_test2.cc \ db/db_logical_block_size_cache_test.cc \ db/db_universal_compaction_test.cc \ db/db_wal_test.cc \ db/db_with_timestamp_compaction_test.cc \ db/db_write_test.cc \ db/dbformat_test.cc \ db/deletefile_test.cc \ db/env_timed_test.cc \ db/error_handler_fs_test.cc \ db/external_sst_file_basic_test.cc \ db/external_sst_file_test.cc \ db/fault_injection_test.cc \ db/file_indexer_test.cc \ db/file_reader_writer_test.cc \ db/filename_test.cc \ db/flush_job_test.cc \ db/hash_table_test.cc \ db/hash_test.cc \ db/heap_test.cc \ db/listener_test.cc \ db/log_test.cc \ db/lru_cache_test.cc \ db/manual_compaction_test.cc \ db/memtable_list_test.cc \ db/merge_helper_test.cc \ db/merge_test.cc \ db/obsolete_files_test.cc \ db/options_settable_test.cc \ db/options_file_test.cc \ db/perf_context_test.cc \ db/persistent_cache_test.cc \ db/plain_table_db_test.cc \ db/prefix_test.cc \ db/repair_test.cc \ db/range_del_aggregator_test.cc \ db/range_del_aggregator_bench.cc \ db/range_tombstone_fragmenter_test.cc \ db/table_properties_collector_test.cc \ db/util_merge_operators_test.cc \ db/version_builder_test.cc \ db/version_edit_test.cc \ db/version_set_test.cc \ db/wal_manager_test.cc \ db/write_batch_test.cc \ db/write_callback_test.cc \ db/write_controller_test.cc \ env/env_basic_test.cc \ env/env_test.cc \ env/io_posix_test.cc \ env/mock_env_test.cc \ file/random_access_file_reader_test.cc \ logging/auto_roll_logger_test.cc \ logging/env_logger_test.cc \ logging/event_logger_test.cc \ memory/arena_test.cc \ memory/memkind_kmem_allocator_test.cc \ memtable/inlineskiplist_test.cc \ memtable/memtablerep_bench.cc \ memtable/skiplist_test.cc \ memtable/write_buffer_manager_test.cc \ monitoring/histogram_test.cc \ monitoring/iostats_context_test.cc \ monitoring/statistics_test.cc \ monitoring/stats_history_test.cc \ options/options_test.cc \ table/block_based/block_based_filter_block_test.cc \ table/block_based/block_based_table_reader_test.cc \ table/block_based/block_test.cc \ table/block_based/data_block_hash_index_test.cc \ table/block_based/full_filter_block_test.cc \ table/block_based/partitioned_filter_block_test.cc \ table/cleanable_test.cc \ table/cuckoo/cuckoo_table_builder_test.cc \ table/cuckoo/cuckoo_table_reader_test.cc \ table/merger_test.cc \ table/sst_file_reader_test.cc \ table/table_reader_bench.cc \ table/table_test.cc \ table/block_fetcher_test.cc \ third-party/gtest-1.8.1/fused-src/gtest/gtest-all.cc \ tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc \ tools/block_cache_analyzer/block_cache_trace_analyzer_tool.cc \ tools/db_bench.cc \ tools/db_bench_tool_test.cc \ tools/db_sanity_test.cc \ tools/ldb_cmd_test.cc \ tools/reduce_levels_test.cc \ tools/sst_dump_test.cc \ tools/trace_analyzer_test.cc \ trace_replay/block_cache_tracer_test.cc \ util/autovector_test.cc \ util/bloom_test.cc \ util/coding_test.cc \ util/crc32c_test.cc \ util/defer_test.cc \ util/dynamic_bloom_test.cc \ util/filelock_test.cc \ util/log_write_bench.cc \ util/rate_limiter_test.cc \ util/random_test.cc \ util/repeatable_thread_test.cc \ util/slice_test.cc \ util/slice_transform_test.cc \ util/timer_queue_test.cc \ util/timer_test.cc \ util/thread_list_test.cc \ util/thread_local_test.cc \ util/work_queue_test.cc \ utilities/backupable/backupable_db_test.cc \ utilities/blob_db/blob_db_test.cc \ utilities/cassandra/cassandra_format_test.cc \ utilities/cassandra/cassandra_functional_test.cc \ utilities/cassandra/cassandra_row_merge_test.cc \ utilities/cassandra/cassandra_serialize_test.cc \ utilities/checkpoint/checkpoint_test.cc \ utilities/memory/memory_test.cc \ utilities/merge_operators/string_append/stringappend_test.cc \ utilities/object_registry_test.cc \ utilities/option_change_migration/option_change_migration_test.cc \ utilities/options/options_util_test.cc \ utilities/simulator_cache/cache_simulator_test.cc \ utilities/simulator_cache/sim_cache_test.cc \ utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \ utilities/transactions/optimistic_transaction_test.cc \ utilities/transactions/transaction_test.cc \ utilities/transactions/transaction_lock_mgr_test.cc \ utilities/transactions/write_prepared_transaction_test.cc \ utilities/transactions/write_unprepared_transaction_test.cc \ utilities/ttl/ttl_test.cc \ utilities/write_batch_with_index/write_batch_with_index_test.cc \ JNI_NATIVE_SOURCES = \ java/rocksjni/backupenginejni.cc \ java/rocksjni/backupablejni.cc \ java/rocksjni/checkpoint.cc \ java/rocksjni/clock_cache.cc \ java/rocksjni/columnfamilyhandle.cc \ java/rocksjni/compact_range_options.cc \ java/rocksjni/compaction_filter.cc \ java/rocksjni/compaction_filter_factory.cc \ java/rocksjni/compaction_filter_factory_jnicallback.cc \ java/rocksjni/compaction_job_info.cc \ java/rocksjni/compaction_job_stats.cc \ java/rocksjni/compaction_options.cc \ java/rocksjni/compaction_options_fifo.cc \ java/rocksjni/compaction_options_universal.cc \ java/rocksjni/comparator.cc \ java/rocksjni/comparatorjnicallback.cc \ java/rocksjni/compression_options.cc \ java/rocksjni/config_options.cc \ java/rocksjni/env.cc \ java/rocksjni/env_options.cc \ java/rocksjni/ingest_external_file_options.cc \ java/rocksjni/filter.cc \ java/rocksjni/iterator.cc \ java/rocksjni/jnicallback.cc \ java/rocksjni/loggerjnicallback.cc \ java/rocksjni/lru_cache.cc \ java/rocksjni/memtablejni.cc \ java/rocksjni/memory_util.cc \ java/rocksjni/merge_operator.cc \ java/rocksjni/native_comparator_wrapper_test.cc \ java/rocksjni/optimistic_transaction_db.cc \ java/rocksjni/optimistic_transaction_options.cc \ java/rocksjni/options.cc \ java/rocksjni/options_util.cc \ java/rocksjni/persistent_cache.cc \ java/rocksjni/ratelimiterjni.cc \ java/rocksjni/remove_emptyvalue_compactionfilterjni.cc \ java/rocksjni/cassandra_compactionfilterjni.cc \ java/rocksjni/cassandra_value_operator.cc \ java/rocksjni/restorejni.cc \ java/rocksjni/rocks_callback_object.cc \ java/rocksjni/rocksjni.cc \ java/rocksjni/rocksdb_exception_test.cc \ java/rocksjni/slice.cc \ java/rocksjni/snapshot.cc \ java/rocksjni/sst_file_manager.cc \ java/rocksjni/sst_file_writerjni.cc \ java/rocksjni/sst_file_readerjni.cc \ java/rocksjni/sst_file_reader_iterator.cc \ java/rocksjni/statistics.cc \ java/rocksjni/statisticsjni.cc \ java/rocksjni/table.cc \ java/rocksjni/table_filter.cc \ java/rocksjni/table_filter_jnicallback.cc \ java/rocksjni/thread_status.cc \ java/rocksjni/trace_writer.cc \ java/rocksjni/trace_writer_jnicallback.cc \ java/rocksjni/transaction.cc \ java/rocksjni/transaction_db.cc \ java/rocksjni/transaction_options.cc \ java/rocksjni/transaction_db_options.cc \ java/rocksjni/transaction_log.cc \ java/rocksjni/transaction_notifier.cc \ java/rocksjni/transaction_notifier_jnicallback.cc \ java/rocksjni/ttl.cc \ java/rocksjni/wal_filter.cc \ java/rocksjni/wal_filter_jnicallback.cc \ java/rocksjni/write_batch.cc \ java/rocksjni/writebatchhandlerjnicallback.cc \ java/rocksjni/write_batch_test.cc \ java/rocksjni/write_batch_with_index.cc \ java/rocksjni/write_buffer_manager.cc rocksdb-6.11.4/table/000077500000000000000000000000001370372246700143345ustar00rootroot00000000000000rocksdb-6.11.4/table/adaptive/000077500000000000000000000000001370372246700161315ustar00rootroot00000000000000rocksdb-6.11.4/table/adaptive/adaptive_table_factory.cc000066400000000000000000000116561370372246700231440ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include "table/adaptive/adaptive_table_factory.h" #include "table/table_builder.h" #include "table/format.h" #include "port/port.h" namespace ROCKSDB_NAMESPACE { AdaptiveTableFactory::AdaptiveTableFactory( std::shared_ptr table_factory_to_write, std::shared_ptr block_based_table_factory, std::shared_ptr plain_table_factory, std::shared_ptr cuckoo_table_factory) : table_factory_to_write_(table_factory_to_write), block_based_table_factory_(block_based_table_factory), plain_table_factory_(plain_table_factory), cuckoo_table_factory_(cuckoo_table_factory) { if (!plain_table_factory_) { plain_table_factory_.reset(NewPlainTableFactory()); } if (!block_based_table_factory_) { block_based_table_factory_.reset(NewBlockBasedTableFactory()); } if (!cuckoo_table_factory_) { cuckoo_table_factory_.reset(NewCuckooTableFactory()); } if (!table_factory_to_write_) { table_factory_to_write_ = block_based_table_factory_; } } extern const uint64_t kPlainTableMagicNumber; extern const uint64_t kLegacyPlainTableMagicNumber; extern const uint64_t kBlockBasedTableMagicNumber; extern const uint64_t kLegacyBlockBasedTableMagicNumber; extern const uint64_t kCuckooTableMagicNumber; Status AdaptiveTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool /*prefetch_index_and_filter_in_cache*/) const { Footer footer; auto s = ReadFooterFromFile(file.get(), nullptr /* prefetch_buffer */, file_size, &footer); if (!s.ok()) { return s; } if (footer.table_magic_number() == kPlainTableMagicNumber || footer.table_magic_number() == kLegacyPlainTableMagicNumber) { return plain_table_factory_->NewTableReader( table_reader_options, std::move(file), file_size, table); } else if (footer.table_magic_number() == kBlockBasedTableMagicNumber || footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) { return block_based_table_factory_->NewTableReader( table_reader_options, std::move(file), file_size, table); } else if (footer.table_magic_number() == kCuckooTableMagicNumber) { return cuckoo_table_factory_->NewTableReader( table_reader_options, std::move(file), file_size, table); } else { return Status::NotSupported("Unidentified table format"); } } TableBuilder* AdaptiveTableFactory::NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const { return table_factory_to_write_->NewTableBuilder(table_builder_options, column_family_id, file); } std::string AdaptiveTableFactory::GetPrintableTableOptions() const { std::string ret; ret.reserve(20000); const int kBufferSize = 200; char buffer[kBufferSize]; if (table_factory_to_write_) { snprintf(buffer, kBufferSize, " write factory (%s) options:\n%s\n", (table_factory_to_write_->Name() ? table_factory_to_write_->Name() : ""), table_factory_to_write_->GetPrintableTableOptions().c_str()); ret.append(buffer); } if (plain_table_factory_) { snprintf(buffer, kBufferSize, " %s options:\n%s\n", plain_table_factory_->Name() ? plain_table_factory_->Name() : "", plain_table_factory_->GetPrintableTableOptions().c_str()); ret.append(buffer); } if (block_based_table_factory_) { snprintf( buffer, kBufferSize, " %s options:\n%s\n", (block_based_table_factory_->Name() ? block_based_table_factory_->Name() : ""), block_based_table_factory_->GetPrintableTableOptions().c_str()); ret.append(buffer); } if (cuckoo_table_factory_) { snprintf(buffer, kBufferSize, " %s options:\n%s\n", cuckoo_table_factory_->Name() ? cuckoo_table_factory_->Name() : "", cuckoo_table_factory_->GetPrintableTableOptions().c_str()); ret.append(buffer); } return ret; } extern TableFactory* NewAdaptiveTableFactory( std::shared_ptr table_factory_to_write, std::shared_ptr block_based_table_factory, std::shared_ptr plain_table_factory, std::shared_ptr cuckoo_table_factory) { return new AdaptiveTableFactory(table_factory_to_write, block_based_table_factory, plain_table_factory, cuckoo_table_factory); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/adaptive/adaptive_table_factory.h000066400000000000000000000036531370372246700230040ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include #include "rocksdb/options.h" #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { struct EnvOptions; class Status; class RandomAccessFile; class WritableFile; class Table; class TableBuilder; class AdaptiveTableFactory : public TableFactory { public: ~AdaptiveTableFactory() {} explicit AdaptiveTableFactory( std::shared_ptr table_factory_to_write, std::shared_ptr block_based_table_factory, std::shared_ptr plain_table_factory, std::shared_ptr cuckoo_table_factory); const char* Name() const override { return "AdaptiveTableFactory"; } Status NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const override; // Sanitizes the specified DB Options. Status SanitizeOptions( const DBOptions& /*db_opts*/, const ColumnFamilyOptions& /*cf_opts*/) const override { return Status::OK(); } std::string GetPrintableTableOptions() const override; private: std::shared_ptr table_factory_to_write_; std::shared_ptr block_based_table_factory_; std::shared_ptr plain_table_factory_; std::shared_ptr cuckoo_table_factory_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/block_based/000077500000000000000000000000001370372246700165645ustar00rootroot00000000000000rocksdb-6.11.4/table/block_based/binary_search_index_reader.cc000066400000000000000000000050151370372246700244160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/binary_search_index_reader.h" namespace ROCKSDB_NAMESPACE { Status BinarySearchIndexReader::Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader) { assert(table != nullptr); assert(table->get_rep()); assert(!pin || prefetch); assert(index_reader != nullptr); CachableEntry index_block; if (prefetch || !use_cache) { const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; } if (use_cache && !pin) { index_block.Reset(); } } index_reader->reset( new BinarySearchIndexReader(table, std::move(index_block))); return Status::OK(); } InternalIteratorBase* BinarySearchIndexReader::NewIterator( const ReadOptions& read_options, bool /* disable_prefix_seek */, IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) { const BlockBasedTable::Rep* rep = table()->get_rep(); const bool no_io = (read_options.read_tier == kBlockCacheTier); CachableEntry index_block; const Status s = GetOrReadIndexBlock(no_io, get_context, lookup_context, &index_block); if (!s.ok()) { if (iter != nullptr) { iter->Invalidate(s); return iter; } return NewErrorInternalIterator(s); } Statistics* kNullStats = nullptr; // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. auto it = index_block.GetValue()->NewIndexIterator( internal_comparator(), internal_comparator()->user_comparator(), rep->get_global_seqno(BlockType::kIndex), iter, kNullStats, true, index_has_first_key(), index_key_includes_seq(), index_value_is_full()); assert(it != nullptr); index_block.TransferTo(it); return it; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/binary_search_index_reader.h000066400000000000000000000040521370372246700242600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/index_reader_common.h" namespace ROCKSDB_NAMESPACE { // Index that allows binary search lookup for the first key of each block. // This class can be viewed as a thin wrapper for `Block` class which already // supports binary search. class BinarySearchIndexReader : public BlockBasedTable::IndexReaderCommon { public: // Read index from the file and create an intance for // `BinarySearchIndexReader`. // On success, index_reader will be populated; otherwise it will remain // unmodified. static Status Create(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader); InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool /* disable_prefix_seek */, IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override { size_t usage = ApproximateIndexBlockMemoryUsage(); #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size(const_cast(this)); #else usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE return usage; } private: BinarySearchIndexReader(const BlockBasedTable* t, CachableEntry&& index_block) : IndexReaderCommon(t, std::move(index_block)) {} }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block.cc000066400000000000000000001134671370372246700202010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Decodes the blocks generated by block_builder.cc. #include "table/block_based/block.h" #include #include #include #include #include "logging/logging.h" #include "monitoring/perf_context_imp.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/comparator.h" #include "table/block_based/block_prefix_index.h" #include "table/block_based/data_block_footer.h" #include "table/format.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { // Helper routine: decode the next block entry starting at "p", // storing the number of shared key bytes, non_shared key bytes, // and the length of the value in "*shared", "*non_shared", and // "*value_length", respectively. Will not derefence past "limit". // // If any errors are detected, returns nullptr. Otherwise, returns a // pointer to the key delta (just past the three decoded values). struct DecodeEntry { inline const char* operator()(const char* p, const char* limit, uint32_t* shared, uint32_t* non_shared, uint32_t* value_length) { // We need 2 bytes for shared and non_shared size. We also need one more // byte either for value size or the actual value in case of value delta // encoding. assert(limit - p >= 3); *shared = reinterpret_cast(p)[0]; *non_shared = reinterpret_cast(p)[1]; *value_length = reinterpret_cast(p)[2]; if ((*shared | *non_shared | *value_length) < 128) { // Fast path: all three values are encoded in one byte each p += 3; } else { if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) { return nullptr; } } // Using an assert in place of "return null" since we should not pay the // cost of checking for corruption on every single key decoding assert(!(static_cast(limit - p) < (*non_shared + *value_length))); return p; } }; // Helper routine: similar to DecodeEntry but does not have assertions. // Instead, returns nullptr so that caller can detect and report failure. struct CheckAndDecodeEntry { inline const char* operator()(const char* p, const char* limit, uint32_t* shared, uint32_t* non_shared, uint32_t* value_length) { // We need 2 bytes for shared and non_shared size. We also need one more // byte either for value size or the actual value in case of value delta // encoding. if (limit - p < 3) { return nullptr; } *shared = reinterpret_cast(p)[0]; *non_shared = reinterpret_cast(p)[1]; *value_length = reinterpret_cast(p)[2]; if ((*shared | *non_shared | *value_length) < 128) { // Fast path: all three values are encoded in one byte each p += 3; } else { if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) { return nullptr; } } if (static_cast(limit - p) < (*non_shared + *value_length)) { return nullptr; } return p; } }; struct DecodeKey { inline const char* operator()(const char* p, const char* limit, uint32_t* shared, uint32_t* non_shared) { uint32_t value_length; return DecodeEntry()(p, limit, shared, non_shared, &value_length); } }; // In format_version 4, which is used by index blocks, the value size is not // encoded before the entry, as the value is known to be the handle with the // known size. struct DecodeKeyV4 { inline const char* operator()(const char* p, const char* limit, uint32_t* shared, uint32_t* non_shared) { // We need 2 bytes for shared and non_shared size. We also need one more // byte either for value size or the actual value in case of value delta // encoding. if (limit - p < 3) return nullptr; *shared = reinterpret_cast(p)[0]; *non_shared = reinterpret_cast(p)[1]; if ((*shared | *non_shared) < 128) { // Fast path: all three values are encoded in one byte each p += 2; } else { if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; } return p; } }; void DataBlockIter::Next() { ParseNextDataKey(); } void DataBlockIter::NextOrReport() { ParseNextDataKey(); } void IndexBlockIter::Next() { ParseNextIndexKey(); } void IndexBlockIter::Prev() { assert(Valid()); // Scan backwards to a restart point before current_ const uint32_t original = current_; while (GetRestartPoint(restart_index_) >= original) { if (restart_index_ == 0) { // No more entries current_ = restarts_; restart_index_ = num_restarts_; return; } restart_index_--; } SeekToRestartPoint(restart_index_); // Loop until end of current entry hits the start of original entry while (ParseNextIndexKey() && NextEntryOffset() < original) { } } // Similar to IndexBlockIter::Prev but also caches the prev entries void DataBlockIter::Prev() { assert(Valid()); assert(prev_entries_idx_ == -1 || static_cast(prev_entries_idx_) < prev_entries_.size()); // Check if we can use cached prev_entries_ if (prev_entries_idx_ > 0 && prev_entries_[prev_entries_idx_].offset == current_) { // Read cached CachedPrevEntry prev_entries_idx_--; const CachedPrevEntry& current_prev_entry = prev_entries_[prev_entries_idx_]; const char* key_ptr = nullptr; if (current_prev_entry.key_ptr != nullptr) { // The key is not delta encoded and stored in the data block key_ptr = current_prev_entry.key_ptr; key_pinned_ = true; } else { // The key is delta encoded and stored in prev_entries_keys_buff_ key_ptr = prev_entries_keys_buff_.data() + current_prev_entry.key_offset; key_pinned_ = false; } const Slice current_key(key_ptr, current_prev_entry.key_size); current_ = current_prev_entry.offset; raw_key_.SetKey(current_key, false /* copy */); value_ = current_prev_entry.value; key_ = applied_key_.UpdateAndGetKey(); // This is kind of odd in that applied_key_ may say the key is pinned while // key_pinned_ ends up being false. That'll only happen when the key resides // in a transient caching buffer. key_pinned_ = key_pinned_ && applied_key_.IsKeyPinned(); return; } // Clear prev entries cache prev_entries_idx_ = -1; prev_entries_.clear(); prev_entries_keys_buff_.clear(); // Scan backwards to a restart point before current_ const uint32_t original = current_; while (GetRestartPoint(restart_index_) >= original) { if (restart_index_ == 0) { // No more entries current_ = restarts_; restart_index_ = num_restarts_; return; } restart_index_--; } SeekToRestartPoint(restart_index_); do { if (!ParseNextDataKey()) { break; } Slice current_key = raw_key_.GetKey(); if (raw_key_.IsKeyPinned()) { // The key is not delta encoded prev_entries_.emplace_back(current_, current_key.data(), 0, current_key.size(), value()); } else { // The key is delta encoded, cache decoded key in buffer size_t new_key_offset = prev_entries_keys_buff_.size(); prev_entries_keys_buff_.append(current_key.data(), current_key.size()); prev_entries_.emplace_back(current_, nullptr, new_key_offset, current_key.size(), value()); } // Loop until end of current entry hits the start of original entry } while (NextEntryOffset() < original); prev_entries_idx_ = static_cast(prev_entries_.size()) - 1; } void DataBlockIter::Seek(const Slice& target) { Slice seek_key = target; PERF_TIMER_GUARD(block_seek_nanos); if (data_ == nullptr) { // Not init yet return; } uint32_t index = 0; bool skip_linear_scan = false; bool ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, &skip_linear_scan, comparator_); if (!ok) { return; } FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); } // Optimized Seek for point lookup for an internal key `target` // target = "seek_user_key @ type | seqno". // // For any type other than kTypeValue, kTypeDeletion, kTypeSingleDeletion, // or kTypeBlobIndex, this function behaves identically as Seek(). // // For any type in kTypeValue, kTypeDeletion, kTypeSingleDeletion, // or kTypeBlobIndex: // // If the return value is FALSE, iter location is undefined, and it means: // 1) there is no key in this block falling into the range: // ["seek_user_key @ type | seqno", "seek_user_key @ kTypeDeletion | 0"], // inclusive; AND // 2) the last key of this block has a greater user_key from seek_user_key // // If the return value is TRUE, iter location has two possibilies: // 1) If iter is valid, it is set to a location as if set by BinarySeek. In // this case, it points to the first key with a larger user_key or a matching // user_key with a seqno no greater than the seeking seqno. // 2) If the iter is invalid, it means that either all the user_key is less // than the seek_user_key, or the block ends with a matching user_key but // with a smaller [ type | seqno ] (i.e. a larger seqno, or the same seqno // but larger type). bool DataBlockIter::SeekForGetImpl(const Slice& target) { Slice target_user_key = ExtractUserKey(target); uint32_t map_offset = restarts_ + num_restarts_ * sizeof(uint32_t); uint8_t entry = data_block_hash_index_->Lookup(data_, map_offset, target_user_key); if (entry == kCollision) { // HashSeek not effective, falling back Seek(target); return true; } if (entry == kNoEntry) { // Even if we cannot find the user_key in this block, the result may // exist in the next block. Consider this example: // // Block N: [aab@100, ... , app@120] // bounary key: axy@50 (we make minimal assumption about a boundary key) // Block N+1: [axy@10, ... ] // // If seek_key = axy@60, the search will starts from Block N. // Even if the user_key is not found in the hash map, the caller still // have to continue searching the next block. // // In this case, we pretend the key is the the last restart interval. // The while-loop below will search the last restart interval for the // key. It will stop at the first key that is larger than the seek_key, // or to the end of the block if no one is larger. entry = static_cast(num_restarts_ - 1); } uint32_t restart_index = entry; // check if the key is in the restart_interval assert(restart_index < num_restarts_); SeekToRestartPoint(restart_index); const char* limit = nullptr; if (restart_index_ + 1 < num_restarts_) { limit = data_ + GetRestartPoint(restart_index_ + 1); } else { limit = data_ + restarts_; } while (true) { // Here we only linear seek the target key inside the restart interval. // If a key does not exist inside a restart interval, we avoid // further searching the block content accross restart interval boundary. // // TODO(fwu): check the left and write boundary of the restart interval // to avoid linear seek a target key that is out of range. if (!ParseNextDataKey(limit) || comparator_->Compare(applied_key_.UpdateAndGetKey(), target) >= 0) { // we stop at the first potential matching user key. break; } } if (current_ == restarts_) { // Search reaches to the end of the block. There are three possibilites: // 1) there is only one user_key match in the block (otherwise collsion). // the matching user_key resides in the last restart interval, and it // is the last key of the restart interval and of the block as well. // ParseNextDataKey() skiped it as its [ type | seqno ] is smaller. // // 2) The seek_key is not found in the HashIndex Lookup(), i.e. kNoEntry, // AND all existing user_keys in the restart interval are smaller than // seek_user_key. // // 3) The seek_key is a false positive and happens to be hashed to the // last restart interval, AND all existing user_keys in the restart // interval are smaller than seek_user_key. // // The result may exist in the next block each case, so we return true. return true; } if (user_comparator_->Compare(raw_key_.GetUserKey(), target_user_key) != 0) { // the key is not in this block and cannot be at the next block either. return false; } // Here we are conservative and only support a limited set of cases ValueType value_type = ExtractValueType(applied_key_.UpdateAndGetKey()); if (value_type != ValueType::kTypeValue && value_type != ValueType::kTypeDeletion && value_type != ValueType::kTypeSingleDeletion && value_type != ValueType::kTypeBlobIndex) { Seek(target); return true; } // Result found, and the iter is correctly set. return true; } void IndexBlockIter::Seek(const Slice& target) { TEST_SYNC_POINT("IndexBlockIter::Seek:0"); PERF_TIMER_GUARD(block_seek_nanos); if (data_ == nullptr) { // Not init yet return; } Slice seek_key = target; if (!key_includes_seq_) { seek_key = ExtractUserKey(target); } status_ = Status::OK(); uint32_t index = 0; bool skip_linear_scan = false; bool ok = false; if (prefix_index_) { bool prefix_may_exist = true; ok = PrefixSeek(target, &index, &prefix_may_exist); if (!prefix_may_exist) { // This is to let the caller to distinguish between non-existing prefix, // and when key is larger than the last key, which both set Valid() to // false. current_ = restarts_; status_ = Status::NotFound(); } // restart interval must be one when hash search is enabled so the binary // search simply lands at the right place. skip_linear_scan = true; } else if (value_delta_encoded_) { ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, &skip_linear_scan, comparator_); } else { ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, &skip_linear_scan, comparator_); } if (!ok) { return; } FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); } void DataBlockIter::SeekForPrev(const Slice& target) { PERF_TIMER_GUARD(block_seek_nanos); Slice seek_key = target; if (data_ == nullptr) { // Not init yet return; } uint32_t index = 0; bool skip_linear_scan = false; bool ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, &skip_linear_scan, comparator_); if (!ok) { return; } FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); if (!Valid()) { SeekToLast(); } else { while (Valid() && comparator_->Compare(applied_key_.UpdateAndGetKey(), seek_key) > 0) { Prev(); } } } void DataBlockIter::SeekToFirst() { if (data_ == nullptr) { // Not init yet return; } SeekToRestartPoint(0); ParseNextDataKey(); } void DataBlockIter::SeekToFirstOrReport() { if (data_ == nullptr) { // Not init yet return; } SeekToRestartPoint(0); ParseNextDataKey(); } void IndexBlockIter::SeekToFirst() { if (data_ == nullptr) { // Not init yet return; } status_ = Status::OK(); SeekToRestartPoint(0); ParseNextIndexKey(); } void DataBlockIter::SeekToLast() { if (data_ == nullptr) { // Not init yet return; } SeekToRestartPoint(num_restarts_ - 1); while (ParseNextDataKey() && NextEntryOffset() < restarts_) { // Keep skipping } } void IndexBlockIter::SeekToLast() { if (data_ == nullptr) { // Not init yet return; } status_ = Status::OK(); SeekToRestartPoint(num_restarts_ - 1); while (ParseNextIndexKey() && NextEntryOffset() < restarts_) { // Keep skipping } } template void BlockIter::CorruptionError() { current_ = restarts_; restart_index_ = num_restarts_; status_ = Status::Corruption("bad entry in block"); raw_key_.Clear(); value_.clear(); } template bool DataBlockIter::ParseNextDataKey(const char* limit) { current_ = NextEntryOffset(); const char* p = data_ + current_; if (!limit) { limit = data_ + restarts_; // Restarts come right after data } if (p >= limit) { // No more entries to return. Mark as invalid. current_ = restarts_; restart_index_ = num_restarts_; return false; } // Decode next entry uint32_t shared, non_shared, value_length; p = DecodeEntryFunc()(p, limit, &shared, &non_shared, &value_length); if (p == nullptr || raw_key_.Size() < shared) { CorruptionError(); return false; } else { if (shared == 0) { // If this key doesn't share any bytes with prev key then we don't need // to decode it and can use its address in the block directly. raw_key_.SetKey(Slice(p, non_shared), false /* copy */); } else { // This key share `shared` bytes with prev key, we need to decode it raw_key_.TrimAppend(shared, p, non_shared); } key_ = applied_key_.UpdateAndGetKey(); key_pinned_ = applied_key_.IsKeyPinned(); #ifndef NDEBUG if (global_seqno_ != kDisableGlobalSequenceNumber) { // If we are reading a file with a global sequence number we should // expect that all encoded sequence numbers are zeros and any value // type is kTypeValue, kTypeMerge, kTypeDeletion, or kTypeRangeDeletion. uint64_t packed = ExtractInternalKeyFooter(raw_key_.GetKey()); SequenceNumber seqno; ValueType value_type; UnPackSequenceAndType(packed, &seqno, &value_type); assert(value_type == ValueType::kTypeValue || value_type == ValueType::kTypeMerge || value_type == ValueType::kTypeDeletion || value_type == ValueType::kTypeRangeDeletion); assert(seqno == 0); } #endif // NDEBUG value_ = Slice(p + non_shared, value_length); if (shared == 0) { while (restart_index_ + 1 < num_restarts_ && GetRestartPoint(restart_index_ + 1) < current_) { ++restart_index_; } } // else we are in the middle of a restart interval and the restart_index_ // thus has not changed return true; } } bool IndexBlockIter::ParseNextIndexKey() { current_ = NextEntryOffset(); const char* p = data_ + current_; const char* limit = data_ + restarts_; // Restarts come right after data if (p >= limit) { // No more entries to return. Mark as invalid. current_ = restarts_; restart_index_ = num_restarts_; return false; } // Decode next entry uint32_t shared, non_shared, value_length; if (value_delta_encoded_) { p = DecodeKeyV4()(p, limit, &shared, &non_shared); value_length = 0; } else { p = DecodeEntry()(p, limit, &shared, &non_shared, &value_length); } if (p == nullptr || raw_key_.Size() < shared) { CorruptionError(); return false; } if (shared == 0) { // If this key doesn't share any bytes with prev key then we don't need // to decode it and can use its address in the block directly. raw_key_.SetKey(Slice(p, non_shared), false /* copy */); } else { // This key share `shared` bytes with prev key, we need to decode it raw_key_.TrimAppend(shared, p, non_shared); } key_ = applied_key_.UpdateAndGetKey(); key_pinned_ = applied_key_.IsKeyPinned(); value_ = Slice(p + non_shared, value_length); if (shared == 0) { while (restart_index_ + 1 < num_restarts_ && GetRestartPoint(restart_index_ + 1) < current_) { ++restart_index_; } } // else we are in the middle of a restart interval and the restart_index_ // thus has not changed if (value_delta_encoded_ || global_seqno_state_ != nullptr) { DecodeCurrentValue(shared); } return true; } // The format: // restart_point 0: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) // restart_point 1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) // ... // restart_point n-1: k, v (off, sz), k, v (delta-sz), ..., k, v (delta-sz) // where, k is key, v is value, and its encoding is in parenthesis. // The format of each key is (shared_size, non_shared_size, shared, non_shared) // The format of each value, i.e., block handle, is (offset, size) whenever the // shared_size is 0, which included the first entry in each restart point. // Otherwise the format is delta-size = block handle size - size of last block // handle. void IndexBlockIter::DecodeCurrentValue(uint32_t shared) { Slice v(value_.data(), data_ + restarts_ - value_.data()); // Delta encoding is used if `shared` != 0. Status decode_s __attribute__((__unused__)) = decoded_value_.DecodeFrom( &v, have_first_key_, (value_delta_encoded_ && shared) ? &decoded_value_.handle : nullptr); assert(decode_s.ok()); value_ = Slice(value_.data(), v.data() - value_.data()); if (global_seqno_state_ != nullptr) { // Overwrite sequence number the same way as in DataBlockIter. IterKey& first_internal_key = global_seqno_state_->first_internal_key; first_internal_key.SetInternalKey(decoded_value_.first_internal_key, /* copy */ true); assert(GetInternalKeySeqno(first_internal_key.GetInternalKey()) == 0); ValueType value_type = ExtractValueType(first_internal_key.GetKey()); assert(value_type == ValueType::kTypeValue || value_type == ValueType::kTypeMerge || value_type == ValueType::kTypeDeletion || value_type == ValueType::kTypeRangeDeletion); first_internal_key.UpdateInternalKey(global_seqno_state_->global_seqno, value_type); decoded_value_.first_internal_key = first_internal_key.GetKey(); } } template void BlockIter::FindKeyAfterBinarySeek(const Slice& target, uint32_t index, bool skip_linear_scan, const Comparator* comp) { // SeekToRestartPoint() only does the lookup in the restart block. We need // to follow it up with Next() to position the iterator at the restart key. SeekToRestartPoint(index); Next(); if (!skip_linear_scan) { // Linear search (within restart block) for first key >= target uint32_t max_offset; if (index + 1 < num_restarts_) { // We are in a non-last restart interval. Since `BinarySeek()` guarantees // the next restart key is strictly greater than `target`, we can // terminate upon reaching it without any additional key comparison. max_offset = GetRestartPoint(index + 1); } else { // We are in the last restart interval. The while-loop will terminate by // `Valid()` returning false upon advancing past the block's last key. max_offset = port::kMaxUint32; } while (true) { Next(); if (!Valid()) { break; } if (current_ == max_offset) { assert(comp->Compare(applied_key_.UpdateAndGetKey(), target) > 0); break; } else if (comp->Compare(applied_key_.UpdateAndGetKey(), target) >= 0) { break; } } } } // Binary searches in restart array to find the starting restart point for the // linear scan, and stores it in `*index`. Assumes restart array does not // contain duplicate keys. It is guaranteed that the restart key at `*index + 1` // is strictly greater than `target` or does not exist (this can be used to // elide a comparison when linear scan reaches all the way to the next restart // key). Furthermore, `*skip_linear_scan` is set to indicate whether the // `*index`th restart key is the final result so that key does not need to be // compared again later. template template bool BlockIter::BinarySeek(const Slice& target, uint32_t left, uint32_t right, uint32_t* index, bool* skip_linear_scan, const Comparator* comp) { assert(left <= right); if (restarts_ == 0) { // SST files dedicated to range tombstones are written with index blocks // that have no keys while also having `num_restarts_ == 1`. This would // cause a problem for `BinarySeek()` as it'd try to access the first key // which does not exist. We identify such blocks by the offset at which // their restarts are stored, and return false to prevent any attempted // key accesses. return false; } *skip_linear_scan = false; while (left < right) { uint32_t mid = (left + right + 1) / 2; uint32_t region_offset = GetRestartPoint(mid); uint32_t shared, non_shared; const char* key_ptr = DecodeKeyFunc()( data_ + region_offset, data_ + restarts_, &shared, &non_shared); if (key_ptr == nullptr || (shared != 0)) { CorruptionError(); return false; } Slice mid_key(key_ptr, non_shared); raw_key_.SetKey(mid_key, false /* copy */); int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target); if (cmp < 0) { // Key at "mid" is smaller than "target". Therefore all // blocks before "mid" are uninteresting. left = mid; } else if (cmp > 0) { // Key at "mid" is >= "target". Therefore all blocks at or // after "mid" are uninteresting. right = mid - 1; } else { *skip_linear_scan = true; left = right = mid; } } assert(left == right); *index = left; if (*index == 0) { // Special case as we land at zero as long as restart key at index 1 is > // "target". We need to compare the restart key at index 0 so we can set // `*skip_linear_scan` when the 0th restart key is >= "target". // // GetRestartPoint() is always zero for restart key zero; skip the restart // block access. uint32_t shared, non_shared; const char* key_ptr = DecodeKeyFunc()(data_, data_ + restarts_, &shared, &non_shared); if (key_ptr == nullptr || (shared != 0)) { CorruptionError(); return false; } Slice first_key(key_ptr, non_shared); raw_key_.SetKey(first_key, false /* copy */); int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target); *skip_linear_scan = cmp >= 0; } return true; } // Compare target key and the block key of the block of `block_index`. // Return -1 if error. int IndexBlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) { uint32_t region_offset = GetRestartPoint(block_index); uint32_t shared, non_shared; const char* key_ptr = value_delta_encoded_ ? DecodeKeyV4()(data_ + region_offset, data_ + restarts_, &shared, &non_shared) : DecodeKey()(data_ + region_offset, data_ + restarts_, &shared, &non_shared); if (key_ptr == nullptr || (shared != 0)) { CorruptionError(); return 1; // Return target is smaller } Slice block_key(key_ptr, non_shared); raw_key_.SetKey(block_key, false /* copy */); return comparator_->Compare(applied_key_.UpdateAndGetKey(), target); } // Binary search in block_ids to find the first block // with a key >= target bool IndexBlockIter::BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids, uint32_t left, uint32_t right, uint32_t* index, bool* prefix_may_exist) { assert(left <= right); assert(index); assert(prefix_may_exist); *prefix_may_exist = true; uint32_t left_bound = left; while (left <= right) { uint32_t mid = (right + left) / 2; int cmp = CompareBlockKey(block_ids[mid], target); if (!status_.ok()) { return false; } if (cmp < 0) { // Key at "target" is larger than "mid". Therefore all // blocks before or at "mid" are uninteresting. left = mid + 1; } else { // Key at "target" is <= "mid". Therefore all blocks // after "mid" are uninteresting. // If there is only one block left, we found it. if (left == right) break; right = mid; } } if (left == right) { // In one of the two following cases: // (1) left is the first one of block_ids // (2) there is a gap of blocks between block of `left` and `left-1`. // we can further distinguish the case of key in the block or key not // existing, by comparing the target key and the key of the previous // block to the left of the block found. if (block_ids[left] > 0 && (left == left_bound || block_ids[left - 1] != block_ids[left] - 1) && CompareBlockKey(block_ids[left] - 1, target) > 0) { current_ = restarts_; *prefix_may_exist = false; return false; } *index = block_ids[left]; return true; } else { assert(left > right); // If the next block key is larger than seek key, it is possible that // no key shares the prefix with `target`, or all keys with the same // prefix as `target` are smaller than prefix. In the latter case, // we are mandated to set the position the same as the total order. // In the latter case, either: // (1) `target` falls into the range of the next block. In this case, // we can place the iterator to the next block, or // (2) `target` is larger than all block keys. In this case we can // keep the iterator invalidate without setting `prefix_may_exist` // to false. // We might sometimes end up with setting the total order position // while there is no key sharing the prefix as `target`, but it // still follows the contract. uint32_t right_index = block_ids[right]; assert(right_index + 1 <= num_restarts_); if (right_index + 1 < num_restarts_) { if (CompareBlockKey(right_index + 1, target) >= 0) { *index = right_index + 1; return true; } else { // We have to set the flag here because we are not positioning // the iterator to the total order position. *prefix_may_exist = false; } } // Mark iterator invalid current_ = restarts_; return false; } } bool IndexBlockIter::PrefixSeek(const Slice& target, uint32_t* index, bool* prefix_may_exist) { assert(index); assert(prefix_may_exist); assert(prefix_index_); *prefix_may_exist = true; Slice seek_key = target; if (!key_includes_seq_) { seek_key = ExtractUserKey(target); } uint32_t* block_ids = nullptr; uint32_t num_blocks = prefix_index_->GetBlocks(target, &block_ids); if (num_blocks == 0) { current_ = restarts_; *prefix_may_exist = false; return false; } else { assert(block_ids); return BinaryBlockIndexSeek(seek_key, block_ids, 0, num_blocks - 1, index, prefix_may_exist); } } uint32_t Block::NumRestarts() const { assert(size_ >= 2 * sizeof(uint32_t)); uint32_t block_footer = DecodeFixed32(data_ + size_ - sizeof(uint32_t)); uint32_t num_restarts = block_footer; if (size_ > kMaxBlockSizeSupportedByHashIndex) { // In BlockBuilder, we have ensured a block with HashIndex is less than // kMaxBlockSizeSupportedByHashIndex (64KiB). // // Therefore, if we encounter a block with a size > 64KiB, the block // cannot have HashIndex. So the footer will directly interpreted as // num_restarts. // // Such check is for backward compatibility. We can ensure legacy block // with a vary large num_restarts i.e. >= 0x80000000 can be interpreted // correctly as no HashIndex even if the MSB of num_restarts is set. return num_restarts; } BlockBasedTableOptions::DataBlockIndexType index_type; UnPackIndexTypeAndNumRestarts(block_footer, &index_type, &num_restarts); return num_restarts; } BlockBasedTableOptions::DataBlockIndexType Block::IndexType() const { assert(size_ >= 2 * sizeof(uint32_t)); if (size_ > kMaxBlockSizeSupportedByHashIndex) { // The check is for the same reason as that in NumRestarts() return BlockBasedTableOptions::kDataBlockBinarySearch; } uint32_t block_footer = DecodeFixed32(data_ + size_ - sizeof(uint32_t)); uint32_t num_restarts = block_footer; BlockBasedTableOptions::DataBlockIndexType index_type; UnPackIndexTypeAndNumRestarts(block_footer, &index_type, &num_restarts); return index_type; } Block::~Block() { // This sync point can be re-enabled if RocksDB can control the // initialization order of any/all static options created by the user. // TEST_SYNC_POINT("Block::~Block"); } Block::Block(BlockContents&& contents, size_t read_amp_bytes_per_bit, Statistics* statistics) : contents_(std::move(contents)), data_(contents_.data.data()), size_(contents_.data.size()), restart_offset_(0), num_restarts_(0) { TEST_SYNC_POINT("Block::Block:0"); if (size_ < sizeof(uint32_t)) { size_ = 0; // Error marker } else { // Should only decode restart points for uncompressed blocks num_restarts_ = NumRestarts(); switch (IndexType()) { case BlockBasedTableOptions::kDataBlockBinarySearch: restart_offset_ = static_cast(size_) - (1 + num_restarts_) * sizeof(uint32_t); if (restart_offset_ > size_ - sizeof(uint32_t)) { // The size is too small for NumRestarts() and therefore // restart_offset_ wrapped around. size_ = 0; } break; case BlockBasedTableOptions::kDataBlockBinaryAndHash: if (size_ < sizeof(uint32_t) /* block footer */ + sizeof(uint16_t) /* NUM_BUCK */) { size_ = 0; break; } uint16_t map_offset; data_block_hash_index_.Initialize( contents.data.data(), static_cast(contents.data.size() - sizeof(uint32_t)), /*chop off NUM_RESTARTS*/ &map_offset); restart_offset_ = map_offset - num_restarts_ * sizeof(uint32_t); if (restart_offset_ > map_offset) { // map_offset is too small for NumRestarts() and // therefore restart_offset_ wrapped around. size_ = 0; break; } break; default: size_ = 0; // Error marker } } if (read_amp_bytes_per_bit != 0 && statistics && size_ != 0) { read_amp_bitmap_.reset(new BlockReadAmpBitmap( restart_offset_, read_amp_bytes_per_bit, statistics)); } } DataBlockIter* Block::NewDataIterator(const Comparator* cmp, const Comparator* ucmp, SequenceNumber global_seqno, DataBlockIter* iter, Statistics* stats, bool block_contents_pinned) { DataBlockIter* ret_iter; if (iter != nullptr) { ret_iter = iter; } else { ret_iter = new DataBlockIter; } if (size_ < 2 * sizeof(uint32_t)) { ret_iter->Invalidate(Status::Corruption("bad block contents")); return ret_iter; } if (num_restarts_ == 0) { // Empty block. ret_iter->Invalidate(Status::OK()); return ret_iter; } else { ret_iter->Initialize( cmp, ucmp, data_, restart_offset_, num_restarts_, global_seqno, read_amp_bitmap_.get(), block_contents_pinned, data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr); if (read_amp_bitmap_) { if (read_amp_bitmap_->GetStatistics() != stats) { // DB changed the Statistics pointer, we need to notify read_amp_bitmap_ read_amp_bitmap_->SetStatistics(stats); } } } return ret_iter; } IndexBlockIter* Block::NewIndexIterator( const Comparator* cmp, const Comparator* ucmp, SequenceNumber global_seqno, IndexBlockIter* iter, Statistics* /*stats*/, bool total_order_seek, bool have_first_key, bool key_includes_seq, bool value_is_full, bool block_contents_pinned, BlockPrefixIndex* prefix_index) { IndexBlockIter* ret_iter; if (iter != nullptr) { ret_iter = iter; } else { ret_iter = new IndexBlockIter; } if (size_ < 2 * sizeof(uint32_t)) { ret_iter->Invalidate(Status::Corruption("bad block contents")); return ret_iter; } if (num_restarts_ == 0) { // Empty block. ret_iter->Invalidate(Status::OK()); return ret_iter; } else { BlockPrefixIndex* prefix_index_ptr = total_order_seek ? nullptr : prefix_index; ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_, global_seqno, prefix_index_ptr, have_first_key, key_includes_seq, value_is_full, block_contents_pinned); } return ret_iter; } size_t Block::ApproximateMemoryUsage() const { size_t usage = usable_size(); #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size((void*)this); #else usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE if (read_amp_bitmap_) { usage += read_amp_bitmap_->ApproximateMemoryUsage(); } return usage; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block.h000066400000000000000000000603511370372246700200340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "db/dbformat.h" #include "db/pinned_iterators_manager.h" #include "port/malloc.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "table/block_based/block_prefix_index.h" #include "table/block_based/data_block_hash_index.h" #include "table/format.h" #include "table/internal_iterator.h" #include "test_util/sync_point.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { struct BlockContents; class Comparator; template class BlockIter; class DataBlockIter; class IndexBlockIter; class BlockPrefixIndex; // BlockReadAmpBitmap is a bitmap that map the ROCKSDB_NAMESPACE::Block data // bytes to a bitmap with ratio bytes_per_bit. Whenever we access a range of // bytes in the Block we update the bitmap and increment // READ_AMP_ESTIMATE_USEFUL_BYTES. class BlockReadAmpBitmap { public: explicit BlockReadAmpBitmap(size_t block_size, size_t bytes_per_bit, Statistics* statistics) : bitmap_(nullptr), bytes_per_bit_pow_(0), statistics_(statistics), rnd_(Random::GetTLSInstance()->Uniform( static_cast(bytes_per_bit))) { TEST_SYNC_POINT_CALLBACK("BlockReadAmpBitmap:rnd", &rnd_); assert(block_size > 0 && bytes_per_bit > 0); // convert bytes_per_bit to be a power of 2 while (bytes_per_bit >>= 1) { bytes_per_bit_pow_++; } // num_bits_needed = ceil(block_size / bytes_per_bit) size_t num_bits_needed = ((block_size - 1) >> bytes_per_bit_pow_) + 1; assert(num_bits_needed > 0); // bitmap_size = ceil(num_bits_needed / kBitsPerEntry) size_t bitmap_size = (num_bits_needed - 1) / kBitsPerEntry + 1; // Create bitmap and set all the bits to 0 bitmap_ = new std::atomic[bitmap_size](); RecordTick(GetStatistics(), READ_AMP_TOTAL_READ_BYTES, block_size); } ~BlockReadAmpBitmap() { delete[] bitmap_; } void Mark(uint32_t start_offset, uint32_t end_offset) { assert(end_offset >= start_offset); // Index of first bit in mask uint32_t start_bit = (start_offset + (1 << bytes_per_bit_pow_) - rnd_ - 1) >> bytes_per_bit_pow_; // Index of last bit in mask + 1 uint32_t exclusive_end_bit = (end_offset + (1 << bytes_per_bit_pow_) - rnd_) >> bytes_per_bit_pow_; if (start_bit >= exclusive_end_bit) { return; } assert(exclusive_end_bit > 0); if (GetAndSet(start_bit) == 0) { uint32_t new_useful_bytes = (exclusive_end_bit - start_bit) << bytes_per_bit_pow_; RecordTick(GetStatistics(), READ_AMP_ESTIMATE_USEFUL_BYTES, new_useful_bytes); } } Statistics* GetStatistics() { return statistics_.load(std::memory_order_relaxed); } void SetStatistics(Statistics* stats) { statistics_.store(stats); } uint32_t GetBytesPerBit() { return 1 << bytes_per_bit_pow_; } size_t ApproximateMemoryUsage() const { #ifdef ROCKSDB_MALLOC_USABLE_SIZE return malloc_usable_size((void*)this); #endif // ROCKSDB_MALLOC_USABLE_SIZE return sizeof(*this); } private: // Get the current value of bit at `bit_idx` and set it to 1 inline bool GetAndSet(uint32_t bit_idx) { const uint32_t byte_idx = bit_idx / kBitsPerEntry; const uint32_t bit_mask = 1 << (bit_idx % kBitsPerEntry); return bitmap_[byte_idx].fetch_or(bit_mask, std::memory_order_relaxed) & bit_mask; } const uint32_t kBytesPersEntry = sizeof(uint32_t); // 4 bytes const uint32_t kBitsPerEntry = kBytesPersEntry * 8; // 32 bits // Bitmap used to record the bytes that we read, use atomic to protect // against multiple threads updating the same bit std::atomic* bitmap_; // (1 << bytes_per_bit_pow_) is bytes_per_bit. Use power of 2 to optimize // muliplication and division uint8_t bytes_per_bit_pow_; // Pointer to DB Statistics object, Since this bitmap may outlive the DB // this pointer maybe invalid, but the DB will update it to a valid pointer // by using SetStatistics() before calling Mark() std::atomic statistics_; uint32_t rnd_; }; // This Block class is not for any old block: it is designed to hold only // uncompressed blocks containing sorted key-value pairs. It is thus // suitable for storing uncompressed data blocks, index blocks (including // partitions), range deletion blocks, properties blocks, metaindex blocks, // as well as the top level of the partitioned filter structure (which is // actually an index of the filter partitions). It is NOT suitable for // compressed blocks in general, filter blocks/partitions, or compression // dictionaries (since the latter do not contain sorted key-value pairs). // Use BlockContents directly for those. // // See https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format // for details of the format and the various block types. class Block { public: // Initialize the block with the specified contents. explicit Block(BlockContents&& contents, size_t read_amp_bytes_per_bit = 0, Statistics* statistics = nullptr); // No copying allowed Block(const Block&) = delete; void operator=(const Block&) = delete; ~Block(); size_t size() const { return size_; } const char* data() const { return data_; } // The additional memory space taken by the block data. size_t usable_size() const { return contents_.usable_size(); } uint32_t NumRestarts() const; bool own_bytes() const { return contents_.own_bytes(); } BlockBasedTableOptions::DataBlockIndexType IndexType() const; // If comparator is InternalKeyComparator, user_comparator is its user // comparator; they are equal otherwise. // // If iter is null, return new Iterator // If iter is not null, update this one and return it as Iterator* // // Updates read_amp_bitmap_ if it is not nullptr. // // If `block_contents_pinned` is true, the caller will guarantee that when // the cleanup functions are transferred from the iterator to other // classes, e.g. PinnableSlice, the pointer to the bytes will still be // valid. Either the iterator holds cache handle or ownership of some resource // and release them in a release function, or caller is sure that the data // will not go away (for example, it's from mmapped file which will not be // closed). // // NOTE: for the hash based lookup, if a key prefix doesn't match any key, // the iterator will simply be set as "invalid", rather than returning // the key that is just pass the target key. DataBlockIter* NewDataIterator(const Comparator* comparator, const Comparator* user_comparator, SequenceNumber global_seqno, DataBlockIter* iter = nullptr, Statistics* stats = nullptr, bool block_contents_pinned = false); // key_includes_seq, default true, means that the keys are in internal key // format. // value_is_full, default true, means that no delta encoding is // applied to values. // // If `prefix_index` is not nullptr this block will do hash lookup for the key // prefix. If total_order_seek is true, prefix_index_ is ignored. // // `have_first_key` controls whether IndexValue will contain // first_internal_key. It affects data serialization format, so the same value // have_first_key must be used when writing and reading index. // It is determined by IndexType property of the table. IndexBlockIter* NewIndexIterator(const Comparator* comparator, const Comparator* user_comparator, SequenceNumber global_seqno, IndexBlockIter* iter, Statistics* stats, bool total_order_seek, bool have_first_key, bool key_includes_seq, bool value_is_full, bool block_contents_pinned = false, BlockPrefixIndex* prefix_index = nullptr); // Report an approximation of how much memory has been used. size_t ApproximateMemoryUsage() const; private: BlockContents contents_; const char* data_; // contents_.data.data() size_t size_; // contents_.data.size() uint32_t restart_offset_; // Offset in data_ of restart array uint32_t num_restarts_; std::unique_ptr read_amp_bitmap_; DataBlockHashIndex data_block_hash_index_; }; // A GlobalSeqnoAppliedKey exposes a key with global sequence number applied // if configured with `global_seqno != kDisableGlobalSequenceNumber`. It may // hold a user key or an internal key since `format_version>=3` index blocks // contain user keys. In case it holds user keys, it must be configured with // `global_seqno == kDisableGlobalSequenceNumber`. class GlobalSeqnoAppliedKey { public: void Initialize(IterKey* key, SequenceNumber global_seqno) { key_ = key; global_seqno_ = global_seqno; #ifndef NDEBUG init_ = true; #endif // NDEBUG } Slice UpdateAndGetKey() { assert(init_); if (global_seqno_ == kDisableGlobalSequenceNumber) { return key_->GetKey(); } ParsedInternalKey parsed(Slice(), 0, kTypeValue); if (!ParseInternalKey(key_->GetInternalKey(), &parsed)) { assert(false); // error not handled in optimized builds return Slice(); } parsed.sequence = global_seqno_; scratch_.SetInternalKey(parsed); return scratch_.GetInternalKey(); } bool IsKeyPinned() const { return global_seqno_ == kDisableGlobalSequenceNumber && key_->IsKeyPinned(); } private: const IterKey* key_; SequenceNumber global_seqno_; IterKey scratch_; #ifndef NDEBUG bool init_ = false; #endif // NDEBUG }; template class BlockIter : public InternalIteratorBase { public: void InitializeBase(const Comparator* comparator, const char* data, uint32_t restarts, uint32_t num_restarts, SequenceNumber global_seqno, bool block_contents_pinned) { assert(data_ == nullptr); // Ensure it is called only once assert(num_restarts > 0); // Ensure the param is valid applied_key_.Initialize(&raw_key_, global_seqno); comparator_ = comparator; data_ = data; restarts_ = restarts; num_restarts_ = num_restarts; current_ = restarts_; restart_index_ = num_restarts_; global_seqno_ = global_seqno; block_contents_pinned_ = block_contents_pinned; cache_handle_ = nullptr; } // Makes Valid() return false, status() return `s`, and Seek()/Prev()/etc do // nothing. Calls cleanup functions. void InvalidateBase(Status s) { // Assert that the BlockIter is never deleted while Pinning is Enabled. assert(!pinned_iters_mgr_ || (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); data_ = nullptr; current_ = restarts_; status_ = s; // Call cleanup callbacks. Cleanable::Reset(); } bool Valid() const override { return current_ < restarts_; } Status status() const override { return status_; } Slice key() const override { assert(Valid()); return key_; } #ifndef NDEBUG ~BlockIter() override { // Assert that the BlockIter is never deleted while Pinning is Enabled. assert(!pinned_iters_mgr_ || (pinned_iters_mgr_ && !pinned_iters_mgr_->PinningEnabled())); } void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { pinned_iters_mgr_ = pinned_iters_mgr; } PinnedIteratorsManager* pinned_iters_mgr_ = nullptr; #endif bool IsKeyPinned() const override { return block_contents_pinned_ && key_pinned_; } bool IsValuePinned() const override { return block_contents_pinned_; } size_t TEST_CurrentEntrySize() { return NextEntryOffset() - current_; } uint32_t ValueOffset() const { return static_cast(value_.data() - data_); } void SetCacheHandle(Cache::Handle* handle) { cache_handle_ = handle; } Cache::Handle* cache_handle() { return cache_handle_; } virtual void Next() override = 0; protected: // Note: The type could be changed to InternalKeyComparator but we see a weird // performance drop by that. const Comparator* comparator_; const char* data_; // underlying block contents uint32_t num_restarts_; // Number of uint32_t entries in restart array // Index of restart block in which current_ or current_-1 falls uint32_t restart_index_; uint32_t restarts_; // Offset of restart array (list of fixed32) // current_ is offset in data_ of current entry. >= restarts_ if !Valid uint32_t current_; // Raw key from block. IterKey raw_key_; // raw_key_ with global seqno applied if necessary. Use this one for // comparisons. GlobalSeqnoAppliedKey applied_key_; // Key to be exposed to users. Slice key_; Slice value_; Status status_; bool key_pinned_; // Whether the block data is guaranteed to outlive this iterator, and // as long as the cleanup functions are transferred to another class, // e.g. PinnableSlice, the pointer to the bytes will still be valid. bool block_contents_pinned_; SequenceNumber global_seqno_; private: // Store the cache handle, if the block is cached. We need this since the // only other place the handle is stored is as an argument to the Cleanable // function callback, which is hard to retrieve. When multiple value // PinnableSlices reference the block, they need the cache handle in order // to bump up the ref count Cache::Handle* cache_handle_; public: // Return the offset in data_ just past the end of the current entry. inline uint32_t NextEntryOffset() const { // NOTE: We don't support blocks bigger than 2GB return static_cast((value_.data() + value_.size()) - data_); } uint32_t GetRestartPoint(uint32_t index) { assert(index < num_restarts_); return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); } void SeekToRestartPoint(uint32_t index) { raw_key_.Clear(); restart_index_ = index; // current_ will be fixed by ParseNextKey(); // ParseNextKey() starts at the end of value_, so set value_ accordingly uint32_t offset = GetRestartPoint(index); value_ = Slice(data_ + offset, 0); } void CorruptionError(); protected: template inline bool BinarySeek(const Slice& target, uint32_t left, uint32_t right, uint32_t* index, bool* is_index_key_result, const Comparator* comp); void FindKeyAfterBinarySeek(const Slice& target, uint32_t index, bool is_index_key_result, const Comparator* comp); }; class DataBlockIter final : public BlockIter { public: DataBlockIter() : BlockIter(), read_amp_bitmap_(nullptr), last_bitmap_offset_(0) {} DataBlockIter(const Comparator* comparator, const Comparator* user_comparator, const char* data, uint32_t restarts, uint32_t num_restarts, SequenceNumber global_seqno, BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned, DataBlockHashIndex* data_block_hash_index) : DataBlockIter() { Initialize(comparator, user_comparator, data, restarts, num_restarts, global_seqno, read_amp_bitmap, block_contents_pinned, data_block_hash_index); } void Initialize(const Comparator* comparator, const Comparator* user_comparator, const char* data, uint32_t restarts, uint32_t num_restarts, SequenceNumber global_seqno, BlockReadAmpBitmap* read_amp_bitmap, bool block_contents_pinned, DataBlockHashIndex* data_block_hash_index) { InitializeBase(comparator, data, restarts, num_restarts, global_seqno, block_contents_pinned); user_comparator_ = user_comparator; raw_key_.SetIsUserKey(false); read_amp_bitmap_ = read_amp_bitmap; last_bitmap_offset_ = current_ + 1; data_block_hash_index_ = data_block_hash_index; } Slice value() const override { assert(Valid()); if (read_amp_bitmap_ && current_ < restarts_ && current_ != last_bitmap_offset_) { read_amp_bitmap_->Mark(current_ /* current entry offset */, NextEntryOffset() - 1); last_bitmap_offset_ = current_; } return value_; } void Seek(const Slice& target) override; inline bool SeekForGet(const Slice& target) { if (!data_block_hash_index_) { Seek(target); return true; } return SeekForGetImpl(target); } void SeekForPrev(const Slice& target) override; void Prev() override; void Next() final override; // Try to advance to the next entry in the block. If there is data corruption // or error, report it to the caller instead of aborting the process. May // incur higher CPU overhead because we need to perform check on every entry. void NextOrReport(); void SeekToFirst() override; // Try to seek to the first entry in the block. If there is data corruption // or error, report it to caller instead of aborting the process. May incur // higher CPU overhead because we need to perform check on every entry. void SeekToFirstOrReport(); void SeekToLast() override; void Invalidate(Status s) { InvalidateBase(s); // Clear prev entries cache. prev_entries_keys_buff_.clear(); prev_entries_.clear(); prev_entries_idx_ = -1; } private: // read-amp bitmap BlockReadAmpBitmap* read_amp_bitmap_; // last `current_` value we report to read-amp bitmp mutable uint32_t last_bitmap_offset_; struct CachedPrevEntry { explicit CachedPrevEntry(uint32_t _offset, const char* _key_ptr, size_t _key_offset, size_t _key_size, Slice _value) : offset(_offset), key_ptr(_key_ptr), key_offset(_key_offset), key_size(_key_size), value(_value) {} // offset of entry in block uint32_t offset; // Pointer to key data in block (nullptr if key is delta-encoded) const char* key_ptr; // offset of key in prev_entries_keys_buff_ (0 if key_ptr is not nullptr) size_t key_offset; // size of key size_t key_size; // value slice pointing to data in block Slice value; }; std::string prev_entries_keys_buff_; std::vector prev_entries_; int32_t prev_entries_idx_ = -1; DataBlockHashIndex* data_block_hash_index_; const Comparator* user_comparator_; template inline bool ParseNextDataKey(const char* limit = nullptr); bool SeekForGetImpl(const Slice& target); }; class IndexBlockIter final : public BlockIter { public: IndexBlockIter() : BlockIter(), prefix_index_(nullptr) {} // key_includes_seq, default true, means that the keys are in internal key // format. // value_is_full, default true, means that no delta encoding is // applied to values. void Initialize(const Comparator* comparator, const Comparator* user_comparator, const char* data, uint32_t restarts, uint32_t num_restarts, SequenceNumber global_seqno, BlockPrefixIndex* prefix_index, bool have_first_key, bool key_includes_seq, bool value_is_full, bool block_contents_pinned) { if (!key_includes_seq) { user_comparator_wrapper_ = std::unique_ptr( new UserComparatorWrapper(user_comparator)); } InitializeBase( key_includes_seq ? comparator : user_comparator_wrapper_.get(), data, restarts, num_restarts, kDisableGlobalSequenceNumber, block_contents_pinned); key_includes_seq_ = key_includes_seq; raw_key_.SetIsUserKey(!key_includes_seq_); prefix_index_ = prefix_index; value_delta_encoded_ = !value_is_full; have_first_key_ = have_first_key; if (have_first_key_ && global_seqno != kDisableGlobalSequenceNumber) { global_seqno_state_.reset(new GlobalSeqnoState(global_seqno)); } else { global_seqno_state_.reset(); } } Slice user_key() const override { if (key_includes_seq_) { return ExtractUserKey(key()); } return key(); } IndexValue value() const override { assert(Valid()); if (value_delta_encoded_ || global_seqno_state_ != nullptr) { return decoded_value_; } else { IndexValue entry; Slice v = value_; Status decode_s __attribute__((__unused__)) = entry.DecodeFrom(&v, have_first_key_, nullptr); assert(decode_s.ok()); return entry; } } // IndexBlockIter follows a different contract for prefix iterator // from data iterators. // If prefix of the seek key `target` exists in the file, it must // return the same result as total order seek. // If the prefix of `target` doesn't exist in the file, it can either // return the result of total order seek, or set both of Valid() = false // and status() = NotFound(). void Seek(const Slice& target) override; void SeekForPrev(const Slice&) override { assert(false); current_ = restarts_; restart_index_ = num_restarts_; status_ = Status::InvalidArgument( "RocksDB internal error: should never call SeekForPrev() on index " "blocks"); raw_key_.Clear(); value_.clear(); } void Prev() override; void Next() override; void SeekToFirst() override; void SeekToLast() override; void Invalidate(Status s) { InvalidateBase(s); } bool IsValuePinned() const override { return global_seqno_state_ != nullptr ? false : BlockIter::IsValuePinned(); } private: std::unique_ptr user_comparator_wrapper_; // Key is in InternalKey format bool key_includes_seq_; bool value_delta_encoded_; bool have_first_key_; // value includes first_internal_key BlockPrefixIndex* prefix_index_; // Whether the value is delta encoded. In that case the value is assumed to be // BlockHandle. The first value in each restart interval is the full encoded // BlockHandle; the restart of encoded size part of the BlockHandle. The // offset of delta encoded BlockHandles is computed by adding the size of // previous delta encoded values in the same restart interval to the offset of // the first value in that restart interval. IndexValue decoded_value_; // When sequence number overwriting is enabled, this struct contains the seqno // to overwrite with, and current first_internal_key with overwritten seqno. // This is rarely used, so we put it behind a pointer and only allocate when // needed. struct GlobalSeqnoState { // First internal key according to current index entry, but with sequence // number overwritten to global_seqno. IterKey first_internal_key; SequenceNumber global_seqno; explicit GlobalSeqnoState(SequenceNumber seqno) : global_seqno(seqno) {} }; std::unique_ptr global_seqno_state_; // Set *prefix_may_exist to false if no key possibly share the same prefix // as `target`. If not set, the result position should be the same as total // order Seek. bool PrefixSeek(const Slice& target, uint32_t* index, bool* prefix_may_exist); // Set *prefix_may_exist to false if no key can possibly share the same // prefix as `target`. If not set, the result position should be the same // as total order seek. bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids, uint32_t left, uint32_t right, uint32_t* index, bool* prefix_may_exist); inline int CompareBlockKey(uint32_t block_index, const Slice& target); inline bool ParseNextIndexKey(); // When value_delta_encoded_ is enabled it decodes the value which is assumed // to be BlockHandle and put it to decoded_value_ inline void DecodeCurrentValue(uint32_t shared); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_filter_block.cc000066400000000000000000000256361370372246700240560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_based_filter_block.h" #include #include "db/dbformat.h" #include "monitoring/perf_context_imp.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" #include "util/coding.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { void AppendItem(std::string* props, const std::string& key, const std::string& value) { char cspace = ' '; std::string value_str(""); size_t i = 0; const size_t dataLength = 64; const size_t tabLength = 2; const size_t offLength = 16; value_str.append(&value[i], std::min(size_t(dataLength), value.size())); i += dataLength; while (i < value.size()) { value_str.append("\n"); value_str.append(offLength, cspace); value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); i += dataLength; } std::string result(""); if (key.size() < (offLength - tabLength)) result.append(size_t((offLength - tabLength)) - key.size(), cspace); result.append(key); props->append(result + ": " + value_str + "\n"); } template void AppendItem(std::string* props, const TKey& key, const std::string& value) { std::string key_str = ROCKSDB_NAMESPACE::ToString(key); AppendItem(props, key_str, value); } } // namespace // See doc/table_format.txt for an explanation of the filter block format. // Generate new filter every 2KB of data static const size_t kFilterBaseLg = 11; static const size_t kFilterBase = 1 << kFilterBaseLg; BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( const SliceTransform* prefix_extractor, const BlockBasedTableOptions& table_opt) : policy_(table_opt.filter_policy.get()), prefix_extractor_(prefix_extractor), whole_key_filtering_(table_opt.whole_key_filtering), prev_prefix_start_(0), prev_prefix_size_(0), num_added_(0) { assert(policy_); } void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { uint64_t filter_index = (block_offset / kFilterBase); assert(filter_index >= filter_offsets_.size()); while (filter_index > filter_offsets_.size()) { GenerateFilter(); } } void BlockBasedFilterBlockBuilder::Add(const Slice& key) { if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { AddPrefix(key); } if (whole_key_filtering_) { AddKey(key); } } // Add key to filter if needed inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { num_added_++; start_.push_back(entries_.size()); entries_.append(key.data(), key.size()); } // Add prefix to filter if needed inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { // get slice for most recently added entry Slice prev; if (prev_prefix_size_ > 0) { prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); } Slice prefix = prefix_extractor_->Transform(key); // insert prefix only when it's different from the previous prefix. if (prev.size() == 0 || prefix != prev) { prev_prefix_start_ = entries_.size(); prev_prefix_size_ = prefix.size(); AddKey(prefix); } } Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, Status* status) { // In this impl we ignore BlockHandle *status = Status::OK(); if (!start_.empty()) { GenerateFilter(); } // Append array of per-filter offsets const uint32_t array_offset = static_cast(result_.size()); for (size_t i = 0; i < filter_offsets_.size(); i++) { PutFixed32(&result_, filter_offsets_[i]); } PutFixed32(&result_, array_offset); result_.push_back(kFilterBaseLg); // Save encoding parameter in result return Slice(result_); } void BlockBasedFilterBlockBuilder::GenerateFilter() { const size_t num_entries = start_.size(); if (num_entries == 0) { // Fast path if there are no keys for this filter filter_offsets_.push_back(static_cast(result_.size())); return; } // Make list of keys from flattened key structure start_.push_back(entries_.size()); // Simplify length computation tmp_entries_.resize(num_entries); for (size_t i = 0; i < num_entries; i++) { const char* base = entries_.data() + start_[i]; size_t length = start_[i + 1] - start_[i]; tmp_entries_[i] = Slice(base, length); } // Generate filter for current set of keys and append to result_. filter_offsets_.push_back(static_cast(result_.size())); policy_->CreateFilter(&tmp_entries_[0], static_cast(num_entries), &result_); tmp_entries_.clear(); entries_.clear(); start_.clear(); prev_prefix_start_ = 0; prev_prefix_size_ = 0; } BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( const BlockBasedTable* t, CachableEntry&& filter_block) : FilterBlockReaderCommon(t, std::move(filter_block)) { assert(table()); assert(table()->get_rep()); assert(table()->get_rep()->filter_policy); } std::unique_ptr BlockBasedFilterBlockReader::Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context) { assert(table); assert(table->get_rep()); assert(!pin || prefetch); CachableEntry filter_block; if (prefetch || !use_cache) { const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), use_cache, nullptr /* get_context */, lookup_context, &filter_block); if (!s.ok()) { IGNORE_STATUS_IF_ERROR(s); return std::unique_ptr(); } if (use_cache && !pin) { filter_block.Reset(); } } return std::unique_ptr( new BlockBasedFilterBlockReader(table, std::move(filter_block))); } bool BlockBasedFilterBlockReader::KeyMayMatch( const Slice& key, const SliceTransform* /* prefix_extractor */, uint64_t block_offset, const bool no_io, const Slice* const /*const_ikey_ptr*/, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(block_offset != kNotValid); if (!whole_key_filtering()) { return true; } return MayMatch(key, block_offset, no_io, get_context, lookup_context); } bool BlockBasedFilterBlockReader::PrefixMayMatch( const Slice& prefix, const SliceTransform* /* prefix_extractor */, uint64_t block_offset, const bool no_io, const Slice* const /*const_ikey_ptr*/, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(block_offset != kNotValid); return MayMatch(prefix, block_offset, no_io, get_context, lookup_context); } bool BlockBasedFilterBlockReader::ParseFieldsFromBlock( const BlockContents& contents, const char** data, const char** offset, size_t* num, size_t* base_lg) { assert(data); assert(offset); assert(num); assert(base_lg); const size_t n = contents.data.size(); if (n < 5) { // 1 byte for base_lg and 4 for start of offset array return false; } const uint32_t last_word = DecodeFixed32(contents.data.data() + n - 5); if (last_word > n - 5) { return false; } *data = contents.data.data(); *offset = (*data) + last_word; *num = (n - 5 - last_word) / 4; *base_lg = contents.data[n - 1]; return true; } bool BlockBasedFilterBlockReader::MayMatch( const Slice& entry, uint64_t block_offset, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context) const { CachableEntry filter_block; const Status s = GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block); if (!s.ok()) { return true; } assert(filter_block.GetValue()); const char* data = nullptr; const char* offset = nullptr; size_t num = 0; size_t base_lg = 0; if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, &base_lg)) { return true; // Errors are treated as potential matches } const uint64_t index = block_offset >> base_lg; if (index < num) { const uint32_t start = DecodeFixed32(offset + index * 4); const uint32_t limit = DecodeFixed32(offset + index * 4 + 4); if (start <= limit && limit <= (uint32_t)(offset - data)) { const Slice filter = Slice(data + start, limit - start); assert(table()); assert(table()->get_rep()); const FilterPolicy* const policy = table()->get_rep()->filter_policy; const bool may_match = policy->KeyMayMatch(entry, filter); if (may_match) { PERF_COUNTER_ADD(bloom_sst_hit_count, 1); return true; } else { PERF_COUNTER_ADD(bloom_sst_miss_count, 1); return false; } } else if (start == limit) { // Empty filters do not match any entries return false; } } return true; // Errors are treated as potential matches } size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { size_t usage = ApproximateFilterBlockMemoryUsage(); #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size(const_cast(this)); #else usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE return usage; } std::string BlockBasedFilterBlockReader::ToString() const { CachableEntry filter_block; const Status s = GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */, nullptr /* lookup_context */, &filter_block); if (!s.ok()) { return std::string("Unable to retrieve filter block"); } assert(filter_block.GetValue()); const char* data = nullptr; const char* offset = nullptr; size_t num = 0; size_t base_lg = 0; if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, &base_lg)) { return std::string("Error parsing filter block"); } std::string result; result.reserve(1024); std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); AppendItem(&result, s_fb, ROCKSDB_NAMESPACE::ToString(num)); AppendItem(&result, s_bo, s_hd); for (size_t index = 0; index < num; index++) { uint32_t start = DecodeFixed32(offset + index * 4); uint32_t limit = DecodeFixed32(offset + index * 4 + 4); if (start != limit) { result.append(" filter block # " + ROCKSDB_NAMESPACE::ToString(index + 1) + "\n"); Slice filter = Slice(data + start, limit - start); AppendItem(&result, start, filter.ToString(true)); } } return result; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_filter_block.h000066400000000000000000000116501370372246700237070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // A filter block is stored near the end of a Table file. It contains // filters (e.g., bloom filters) for all data blocks in the table combined // into a single filter block. #pragma once #include #include #include #include #include #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/block_based/filter_block_reader_common.h" #include "table/format.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { // A BlockBasedFilterBlockBuilder is used to construct all of the filters for a // particular Table. It generates a single string which is stored as // a special block in the Table. // // The sequence of calls to BlockBasedFilterBlockBuilder must match the regexp: // (StartBlock Add*)* Finish class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { public: BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor, const BlockBasedTableOptions& table_opt); // No copying allowed BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&) = delete; void operator=(const BlockBasedFilterBlockBuilder&) = delete; virtual bool IsBlockBased() override { return true; } virtual void StartBlock(uint64_t block_offset) override; virtual void Add(const Slice& key) override; virtual size_t NumAdded() const override { return num_added_; } virtual Slice Finish(const BlockHandle& tmp, Status* status) override; using FilterBlockBuilder::Finish; private: void AddKey(const Slice& key); void AddPrefix(const Slice& key); void GenerateFilter(); // important: all of these might point to invalid addresses // at the time of destruction of this filter block. destructor // should NOT dereference them. const FilterPolicy* policy_; const SliceTransform* prefix_extractor_; bool whole_key_filtering_; size_t prev_prefix_start_; // the position of the last appended prefix // to "entries_". size_t prev_prefix_size_; // the length of the last appended prefix to // "entries_". std::string entries_; // Flattened entry contents std::vector start_; // Starting index in entries_ of each entry std::string result_; // Filter data computed so far std::vector tmp_entries_; // policy_->CreateFilter() argument std::vector filter_offsets_; size_t num_added_; // Number of keys added }; // A FilterBlockReader is used to parse filter from SST table. // KeyMayMatch and PrefixMayMatch would trigger filter checking class BlockBasedFilterBlockReader : public FilterBlockReaderCommon { public: BlockBasedFilterBlockReader(const BlockBasedTable* t, CachableEntry&& filter_block); // No copying allowed BlockBasedFilterBlockReader(const BlockBasedFilterBlockReader&) = delete; void operator=(const BlockBasedFilterBlockReader&) = delete; static std::unique_ptr Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); bool IsBlockBased() override { return true; } bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; bool PrefixMayMatch(const Slice& prefix, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; // convert this object to a human readable form std::string ToString() const override; private: static bool ParseFieldsFromBlock(const BlockContents& contents, const char** data, const char** offset, size_t* num, size_t* base_lg); bool MayMatch(const Slice& entry, uint64_t block_offset, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context) const; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_filter_block_test.cc000066400000000000000000000434631370372246700251130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_based_filter_block.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/mock_block_based_table.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/hash.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // For testing: emit an array with one hash value per key class TestHashFilter : public FilterPolicy { public: const char* Name() const override { return "TestHashFilter"; } void CreateFilter(const Slice* keys, int n, std::string* dst) const override { for (int i = 0; i < n; i++) { uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); PutFixed32(dst, h); } } bool KeyMayMatch(const Slice& key, const Slice& filter) const override { uint32_t h = Hash(key.data(), key.size(), 1); for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { if (h == DecodeFixed32(filter.data() + i)) { return true; } } return false; } }; class MockBlockBasedTable : public BlockBasedTable { public: explicit MockBlockBasedTable(Rep* rep) : BlockBasedTable(rep, nullptr /* block_cache_tracer */) {} }; class FilterBlockTest : public mock::MockBlockBasedTableTester, public testing::Test { public: FilterBlockTest() : mock::MockBlockBasedTableTester(new TestHashFilter) {} }; TEST_F(FilterBlockTest, EmptyBuilder) { BlockBasedFilterBlockBuilder builder(nullptr, table_options_); Slice slice(builder.Finish()); ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(slice)); CachableEntry block( new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); BlockBasedFilterBlockReader reader(table_.get(), std::move(block)); ASSERT_TRUE(reader.KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } TEST_F(FilterBlockTest, SingleChunk) { BlockBasedFilterBlockBuilder builder(nullptr, table_options_); ASSERT_EQ(0, builder.NumAdded()); builder.StartBlock(100); builder.Add("foo"); builder.Add("bar"); builder.Add("box"); builder.StartBlock(200); builder.Add("box"); builder.StartBlock(300); builder.Add("hello"); ASSERT_EQ(5, builder.NumAdded()); Slice slice(builder.Finish()); CachableEntry block( new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); BlockBasedFilterBlockReader reader(table_.get(), std::move(block)); ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "other", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } TEST_F(FilterBlockTest, MultiChunk) { BlockBasedFilterBlockBuilder builder(nullptr, table_options_); // First filter builder.StartBlock(0); builder.Add("foo"); builder.StartBlock(2000); builder.Add("bar"); // Second filter builder.StartBlock(3100); builder.Add("box"); // Third filter is empty // Last filter builder.StartBlock(9000); builder.Add("box"); builder.Add("hello"); Slice slice(builder.Finish()); CachableEntry block( new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); BlockBasedFilterBlockReader reader(table_.get(), std::move(block)); // Check first filter ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); // Check second filter ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); // Check third filter (empty) ASSERT_TRUE(!reader.KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); // Check last filter ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } // Test for block based filter block // use new interface in FilterPolicy to create filter builder/reader class BlockBasedFilterBlockTest : public mock::MockBlockBasedTableTester, public testing::Test { public: BlockBasedFilterBlockTest() : mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, true)) {} }; TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(nullptr, table_options_); Slice slice(builder->Finish()); ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(slice)); CachableEntry block( new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FilterBlockReader* reader = new BlockBasedFilterBlockReader(table_.get(), std::move(block)); ASSERT_TRUE(reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/10000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); delete builder; delete reader; } TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(nullptr, table_options_); builder->StartBlock(100); builder->Add("foo"); builder->Add("bar"); builder->Add("box"); builder->StartBlock(200); builder->Add("box"); builder->StartBlock(300); builder->Add("hello"); Slice slice(builder->Finish()); CachableEntry block( new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FilterBlockReader* reader = new BlockBasedFilterBlockReader(table_.get(), std::move(block)); ASSERT_TRUE(reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader->KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader->KeyMayMatch( "box", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader->KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "other", /*prefix_extractor=*/nullptr, /*block_offset=*/100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); delete builder; delete reader; } TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder(nullptr, table_options_); // First filter builder->StartBlock(0); builder->Add("foo"); builder->StartBlock(2000); builder->Add("bar"); // Second filter builder->StartBlock(3100); builder->Add("box"); // Third filter is empty // Last filter builder->StartBlock(9000); builder->Add("box"); builder->Add("hello"); Slice slice(builder->Finish()); CachableEntry block( new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FilterBlockReader* reader = new BlockBasedFilterBlockReader(table_.get(), std::move(block)); // Check first filter ASSERT_TRUE(reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader->KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/2000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); // Check second filter ASSERT_TRUE(reader->KeyMayMatch( "box", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); // Check third filter (empty) ASSERT_TRUE(!reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); // Check last filter ASSERT_TRUE(reader->KeyMayMatch( "box", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader->KeyMayMatch( "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader->KeyMayMatch( "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); delete builder; delete reader; } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/block_based/block_based_table_builder.cc000066400000000000000000002067111370372246700242070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_based_table_builder.h" #include #include #include #include #include #include #include #include #include #include "db/dbformat.h" #include "index_builder.h" #include "port/lang.h" #include "rocksdb/cache.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/flush_block_policy.h" #include "rocksdb/merge_operator.h" #include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "table/block_based/filter_block.h" #include "table/block_based/filter_policy_internal.h" #include "table/block_based/full_filter_block.h" #include "table/block_based/partitioned_filter_block.h" #include "table/format.h" #include "table/table_builder.h" #include "memory/memory_allocator.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/work_queue.h" #include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { extern const std::string kHashIndexPrefixesBlock; extern const std::string kHashIndexPrefixesMetadataBlock; typedef BlockBasedTableOptions::IndexType IndexType; // Without anonymous namespace here, we fail the warning -Wmissing-prototypes namespace { // Create a filter block builder based on its type. FilterBlockBuilder* CreateFilterBlockBuilder( const ImmutableCFOptions& /*opt*/, const MutableCFOptions& mopt, const FilterBuildingContext& context, const bool use_delta_encoding_for_index_values, PartitionedIndexBuilder* const p_index_builder) { const BlockBasedTableOptions& table_opt = context.table_options; if (table_opt.filter_policy == nullptr) return nullptr; FilterBitsBuilder* filter_bits_builder = BloomFilterPolicy::GetBuilderFromContext(context); if (filter_bits_builder == nullptr) { return new BlockBasedFilterBlockBuilder(mopt.prefix_extractor.get(), table_opt); } else { if (table_opt.partition_filters) { assert(p_index_builder != nullptr); // Since after partition cut request from filter builder it takes time // until index builder actully cuts the partition, we take the lower bound // as partition size. assert(table_opt.block_size_deviation <= 100); auto partition_size = static_cast(((table_opt.metadata_block_size * (100 - table_opt.block_size_deviation)) + 99) / 100); partition_size = std::max(partition_size, static_cast(1)); return new PartitionedFilterBlockBuilder( mopt.prefix_extractor.get(), table_opt.whole_key_filtering, filter_bits_builder, table_opt.index_block_restart_interval, use_delta_encoding_for_index_values, p_index_builder, partition_size); } else { return new FullFilterBlockBuilder(mopt.prefix_extractor.get(), table_opt.whole_key_filtering, filter_bits_builder); } } } bool GoodCompressionRatio(size_t compressed_size, size_t raw_size) { // Check to see if compressed less than 12.5% return compressed_size < raw_size - (raw_size / 8u); } bool CompressBlockInternal(const Slice& raw, const CompressionInfo& compression_info, uint32_t format_version, std::string* compressed_output) { bool ret; // Will return compressed block contents if (1) the compression method is // supported in this platform and (2) the compression rate is "good enough". switch (compression_info.type()) { case kSnappyCompression: ret = Snappy_Compress(compression_info, raw.data(), raw.size(), compressed_output); break; case kZlibCompression: ret = Zlib_Compress( compression_info, GetCompressFormatForVersion(kZlibCompression, format_version), raw.data(), raw.size(), compressed_output); break; case kBZip2Compression: ret = BZip2_Compress( compression_info, GetCompressFormatForVersion(kBZip2Compression, format_version), raw.data(), raw.size(), compressed_output); break; case kLZ4Compression: ret = LZ4_Compress( compression_info, GetCompressFormatForVersion(kLZ4Compression, format_version), raw.data(), raw.size(), compressed_output); break; case kLZ4HCCompression: ret = LZ4HC_Compress( compression_info, GetCompressFormatForVersion(kLZ4HCCompression, format_version), raw.data(), raw.size(), compressed_output); break; case kXpressCompression: ret = XPRESS_Compress(raw.data(), raw.size(), compressed_output); break; case kZSTD: case kZSTDNotFinalCompression: ret = ZSTD_Compress(compression_info, raw.data(), raw.size(), compressed_output); break; default: // Do not recognize this compression type ret = false; } TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::CompressBlockInternal:TamperWithReturnValue", static_cast(&ret)); return ret; } } // namespace // format_version is the block format as defined in include/rocksdb/table.h Slice CompressBlock(const Slice& raw, const CompressionInfo& info, CompressionType* type, uint32_t format_version, bool do_sample, std::string* compressed_output, std::string* sampled_output_fast, std::string* sampled_output_slow) { *type = info.type(); if (info.type() == kNoCompression && !info.SampleForCompression()) { return raw; } // If requested, we sample one in every N block with a // fast and slow compression algorithm and report the stats. // The users can use these stats to decide if it is worthwhile // enabling compression and they also get a hint about which // compression algorithm wil be beneficial. if (do_sample && info.SampleForCompression() && Random::GetTLSInstance()->OneIn((int)info.SampleForCompression()) && sampled_output_fast && sampled_output_slow) { // Sampling with a fast compression algorithm if (LZ4_Supported() || Snappy_Supported()) { CompressionType c = LZ4_Supported() ? kLZ4Compression : kSnappyCompression; CompressionContext context(c); CompressionOptions options; CompressionInfo info_tmp(options, context, CompressionDict::GetEmptyDict(), c, info.SampleForCompression()); CompressBlockInternal(raw, info_tmp, format_version, sampled_output_fast); } // Sampling with a slow but high-compression algorithm if (ZSTD_Supported() || Zlib_Supported()) { CompressionType c = ZSTD_Supported() ? kZSTD : kZlibCompression; CompressionContext context(c); CompressionOptions options; CompressionInfo info_tmp(options, context, CompressionDict::GetEmptyDict(), c, info.SampleForCompression()); CompressBlockInternal(raw, info_tmp, format_version, sampled_output_slow); } } // Actually compress the data if (*type != kNoCompression) { if (CompressBlockInternal(raw, info, format_version, compressed_output) && GoodCompressionRatio(compressed_output->size(), raw.size())) { return *compressed_output; } } // Compression method is not supported, or not good // compression ratio, so just fall back to uncompressed form. *type = kNoCompression; return raw; } // kBlockBasedTableMagicNumber was picked by running // echo rocksdb.table.block_based | sha1sum // and taking the leading 64 bits. // Please note that kBlockBasedTableMagicNumber may also be accessed by other // .cc files // for that reason we declare it extern in the header but to get the space // allocated // it must be not extern in one place. const uint64_t kBlockBasedTableMagicNumber = 0x88e241b785f4cff7ull; // We also support reading and writing legacy block based table format (for // backwards compatibility) const uint64_t kLegacyBlockBasedTableMagicNumber = 0xdb4775248b80fb57ull; // A collector that collects properties of interest to block-based table. // For now this class looks heavy-weight since we only write one additional // property. // But in the foreseeable future, we will add more and more properties that are // specific to block-based table. class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector : public IntTblPropCollector { public: explicit BlockBasedTablePropertiesCollector( BlockBasedTableOptions::IndexType index_type, bool whole_key_filtering, bool prefix_filtering) : index_type_(index_type), whole_key_filtering_(whole_key_filtering), prefix_filtering_(prefix_filtering) {} Status InternalAdd(const Slice& /*key*/, const Slice& /*value*/, uint64_t /*file_size*/) override { // Intentionally left blank. Have no interest in collecting stats for // individual key/value pairs. return Status::OK(); } virtual void BlockAdd(uint64_t /* blockRawBytes */, uint64_t /* blockCompressedBytesFast */, uint64_t /* blockCompressedBytesSlow */) override { // Intentionally left blank. No interest in collecting stats for // blocks. return; } Status Finish(UserCollectedProperties* properties) override { std::string val; PutFixed32(&val, static_cast(index_type_)); properties->insert({BlockBasedTablePropertyNames::kIndexType, val}); properties->insert({BlockBasedTablePropertyNames::kWholeKeyFiltering, whole_key_filtering_ ? kPropTrue : kPropFalse}); properties->insert({BlockBasedTablePropertyNames::kPrefixFiltering, prefix_filtering_ ? kPropTrue : kPropFalse}); return Status::OK(); } // The name of the properties collector can be used for debugging purpose. const char* Name() const override { return "BlockBasedTablePropertiesCollector"; } UserCollectedProperties GetReadableProperties() const override { // Intentionally left blank. return UserCollectedProperties(); } private: BlockBasedTableOptions::IndexType index_type_; bool whole_key_filtering_; bool prefix_filtering_; }; struct BlockBasedTableBuilder::Rep { const ImmutableCFOptions ioptions; const MutableCFOptions moptions; const BlockBasedTableOptions table_options; const InternalKeyComparator& internal_comparator; WritableFileWriter* file; std::atomic offset; // Synchronize status & io_status accesses across threads from main thread, // compression thread and write thread in parallel compression. std::mutex status_mutex; size_t alignment; BlockBuilder data_block; // Buffers uncompressed data blocks and keys to replay later. Needed when // compression dictionary is enabled so we can finalize the dictionary before // compressing any data blocks. // TODO(ajkr): ideally we don't buffer all keys and all uncompressed data // blocks as it's redundant, but it's easier to implement for now. std::vector>> data_block_and_keys_buffers; BlockBuilder range_del_block; InternalKeySliceTransform internal_prefix_transform; std::unique_ptr index_builder; PartitionedIndexBuilder* p_index_builder_ = nullptr; std::string last_key; const Slice* first_key_in_next_block = nullptr; CompressionType compression_type; uint64_t sample_for_compression; CompressionOptions compression_opts; std::unique_ptr compression_dict; std::vector> compression_ctxs; std::vector> verify_ctxs; std::unique_ptr verify_dict; size_t data_begin_offset = 0; TableProperties props; // States of the builder. // // - `kBuffered`: This is the initial state where zero or more data blocks are // accumulated uncompressed in-memory. From this state, call // `EnterUnbuffered()` to finalize the compression dictionary if enabled, // compress/write out any buffered blocks, and proceed to the `kUnbuffered` // state. // // - `kUnbuffered`: This is the state when compression dictionary is finalized // either because it wasn't enabled in the first place or it's been created // from sampling previously buffered data. In this state, blocks are simply // compressed/written out as they fill up. From this state, call `Finish()` // to complete the file (write meta-blocks, etc.), or `Abandon()` to delete // the partially created file. // // - `kClosed`: This indicates either `Finish()` or `Abandon()` has been // called, so the table builder is no longer usable. We must be in this // state by the time the destructor runs. enum class State { kBuffered, kUnbuffered, kClosed, }; State state; const bool use_delta_encoding_for_index_values; std::unique_ptr filter_builder; char compressed_cache_key_prefix[BlockBasedTable::kMaxCacheKeyPrefixSize]; size_t compressed_cache_key_prefix_size; BlockHandle pending_handle; // Handle to add to index block std::string compressed_output; std::unique_ptr flush_block_policy; int level_at_creation; uint32_t column_family_id; const std::string& column_family_name; uint64_t creation_time = 0; uint64_t oldest_key_time = 0; const uint64_t target_file_size; uint64_t file_creation_time = 0; std::vector> table_properties_collectors; std::unique_ptr pc_rep; uint64_t get_offset() { return offset.load(std::memory_order_relaxed); } void set_offset(uint64_t o) { offset.store(o, std::memory_order_relaxed); } const IOStatus& GetIOStatus() { if (compression_opts.parallel_threads > 1) { std::lock_guard lock(status_mutex); return io_status; } else { return io_status; } } const Status& GetStatus() { if (compression_opts.parallel_threads > 1) { std::lock_guard lock(status_mutex); return status; } else { return status; } } void SyncStatusFromIOStatus() { if (compression_opts.parallel_threads > 1) { std::lock_guard lock(status_mutex); if (status.ok()) { status = io_status; } } else if (status.ok()) { status = io_status; } } // Never erase an existing status that is not OK. void SetStatus(Status s) { if (!s.ok()) { // Locking is an overkill for non compression_opts.parallel_threads // case but since it's unlikely that s is not OK, we take this cost // to be simplicity. std::lock_guard lock(status_mutex); if (status.ok()) { status = s; } } } // Never erase an existing I/O status that is not OK. void SetIOStatus(IOStatus ios) { if (!ios.ok()) { // Locking is an overkill for non compression_opts.parallel_threads // case but since it's unlikely that s is not OK, we take this cost // to be simplicity. std::lock_guard lock(status_mutex); if (io_status.ok()) { io_status = ios; } } } Rep(const ImmutableCFOptions& _ioptions, const MutableCFOptions& _moptions, const BlockBasedTableOptions& table_opt, const InternalKeyComparator& icomparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t _column_family_id, WritableFileWriter* f, const CompressionType _compression_type, const uint64_t _sample_for_compression, const CompressionOptions& _compression_opts, const bool skip_filters, const int _level_at_creation, const std::string& _column_family_name, const uint64_t _creation_time, const uint64_t _oldest_key_time, const uint64_t _target_file_size, const uint64_t _file_creation_time) : ioptions(_ioptions), moptions(_moptions), table_options(table_opt), internal_comparator(icomparator), file(f), offset(0), alignment(table_options.block_align ? std::min(table_options.block_size, kDefaultPageSize) : 0), data_block(table_options.block_restart_interval, table_options.use_delta_encoding, false /* use_value_delta_encoding */, icomparator.user_comparator() ->CanKeysWithDifferentByteContentsBeEqual() ? BlockBasedTableOptions::kDataBlockBinarySearch : table_options.data_block_index_type, table_options.data_block_hash_table_util_ratio), range_del_block(1 /* block_restart_interval */), internal_prefix_transform(_moptions.prefix_extractor.get()), compression_type(_compression_type), sample_for_compression(_sample_for_compression), compression_opts(_compression_opts), compression_dict(), compression_ctxs(_compression_opts.parallel_threads), verify_ctxs(_compression_opts.parallel_threads), verify_dict(), state((_compression_opts.max_dict_bytes > 0) ? State::kBuffered : State::kUnbuffered), use_delta_encoding_for_index_values(table_opt.format_version >= 4 && !table_opt.block_align), compressed_cache_key_prefix_size(0), flush_block_policy( table_options.flush_block_policy_factory->NewFlushBlockPolicy( table_options, data_block)), level_at_creation(_level_at_creation), column_family_id(_column_family_id), column_family_name(_column_family_name), creation_time(_creation_time), oldest_key_time(_oldest_key_time), target_file_size(_target_file_size), file_creation_time(_file_creation_time) { for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) { compression_ctxs[i].reset(new CompressionContext(compression_type)); } if (table_options.index_type == BlockBasedTableOptions::kTwoLevelIndexSearch) { p_index_builder_ = PartitionedIndexBuilder::CreateIndexBuilder( &internal_comparator, use_delta_encoding_for_index_values, table_options); index_builder.reset(p_index_builder_); } else { index_builder.reset(IndexBuilder::CreateIndexBuilder( table_options.index_type, &internal_comparator, &this->internal_prefix_transform, use_delta_encoding_for_index_values, table_options)); } if (skip_filters) { filter_builder = nullptr; } else { FilterBuildingContext context(table_options); context.column_family_name = column_family_name; context.compaction_style = ioptions.compaction_style; context.level_at_creation = level_at_creation; context.info_log = ioptions.info_log; filter_builder.reset(CreateFilterBlockBuilder( ioptions, moptions, context, use_delta_encoding_for_index_values, p_index_builder_)); } for (auto& collector_factories : *int_tbl_prop_collector_factories) { table_properties_collectors.emplace_back( collector_factories->CreateIntTblPropCollector(column_family_id)); } table_properties_collectors.emplace_back( new BlockBasedTablePropertiesCollector( table_options.index_type, table_options.whole_key_filtering, _moptions.prefix_extractor != nullptr)); if (table_options.verify_compression) { for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) { verify_ctxs[i].reset(new UncompressionContext(compression_type)); } } } Rep(const Rep&) = delete; Rep& operator=(const Rep&) = delete; ~Rep() {} private: Status status; IOStatus io_status; }; struct BlockBasedTableBuilder::ParallelCompressionRep { // Keys is a wrapper of vector of strings avoiding // releasing string memories during vector clear() // in order to save memory allocation overhead class Keys { public: Keys() : keys_(kKeysInitSize), size_(0) {} void PushBack(const Slice& key) { if (size_ == keys_.size()) { keys_.emplace_back(key.data(), key.size()); } else { keys_[size_].assign(key.data(), key.size()); } size_++; } void SwapAssign(std::vector& keys) { size_ = keys.size(); std::swap(keys_, keys); } void Clear() { size_ = 0; } size_t Size() { return size_; } std::string& Back() { return keys_[size_ - 1]; } std::string& operator[](size_t idx) { assert(idx < size_); return keys_[idx]; } private: const size_t kKeysInitSize = 32; std::vector keys_; size_t size_; }; std::unique_ptr curr_block_keys; class BlockRepSlot; // BlockRep instances are fetched from and recycled to // block_rep_pool during parallel compression. struct BlockRep { Slice contents; Slice compressed_contents; std::unique_ptr data; std::unique_ptr compressed_data; CompressionType compression_type; std::unique_ptr first_key_in_next_block; std::unique_ptr keys; std::unique_ptr slot; Status status; }; // Use a vector of BlockRep as a buffer for a determined number // of BlockRep structures. All data referenced by pointers in // BlockRep will be freed when this vector is destructed. typedef std::vector BlockRepBuffer; BlockRepBuffer block_rep_buf; // Use a thread-safe queue for concurrent access from block // building thread and writer thread. typedef WorkQueue BlockRepPool; BlockRepPool block_rep_pool; // Use BlockRepSlot to keep block order in write thread. // slot_ will pass references to BlockRep class BlockRepSlot { public: BlockRepSlot() : slot_(1) {} template void Fill(T&& rep) { slot_.push(std::forward(rep)); }; void Take(BlockRep*& rep) { slot_.pop(rep); } private: // slot_ will pass references to BlockRep in block_rep_buf, // and those references are always valid before the destruction of // block_rep_buf. WorkQueue slot_; }; // Compression queue will pass references to BlockRep in block_rep_buf, // and those references are always valid before the destruction of // block_rep_buf. typedef WorkQueue CompressQueue; CompressQueue compress_queue; std::vector compress_thread_pool; // Write queue will pass references to BlockRep::slot in block_rep_buf, // and those references are always valid before the corresponding // BlockRep::slot is destructed, which is before the destruction of // block_rep_buf. typedef WorkQueue WriteQueue; WriteQueue write_queue; std::unique_ptr write_thread; // Raw bytes compressed so far. uint64_t raw_bytes_compressed; // Size of current block being appended. uint64_t raw_bytes_curr_block; // Raw bytes under compression and not appended yet. std::atomic raw_bytes_inflight; // Number of blocks under compression and not appended yet. std::atomic blocks_inflight; // Current compression ratio, maintained by BGWorkWriteRawBlock. std::atomic curr_compression_ratio; // Estimated SST file size. std::atomic estimated_file_size; // Wait for the completion of first block compression to get a // non-zero compression ratio. bool first_block; std::condition_variable first_block_cond; std::mutex first_block_mutex; bool finished; ParallelCompressionRep(uint32_t parallel_threads) : curr_block_keys(new Keys()), block_rep_buf(parallel_threads), block_rep_pool(parallel_threads), compress_queue(parallel_threads), write_queue(parallel_threads), raw_bytes_compressed(0), raw_bytes_curr_block(0), raw_bytes_inflight(0), blocks_inflight(0), curr_compression_ratio(0), estimated_file_size(0), first_block(true), finished(false) { for (uint32_t i = 0; i < parallel_threads; i++) { block_rep_buf[i].contents = Slice(); block_rep_buf[i].compressed_contents = Slice(); block_rep_buf[i].data.reset(new std::string()); block_rep_buf[i].compressed_data.reset(new std::string()); block_rep_buf[i].compression_type = CompressionType(); block_rep_buf[i].first_key_in_next_block.reset(new std::string()); block_rep_buf[i].keys.reset(new Keys()); block_rep_buf[i].slot.reset(new BlockRepSlot()); block_rep_buf[i].status = Status::OK(); block_rep_pool.push(&block_rep_buf[i]); } } ~ParallelCompressionRep() { block_rep_pool.finish(); } }; BlockBasedTableBuilder::BlockBasedTableBuilder( const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, WritableFileWriter* file, const CompressionType compression_type, const uint64_t sample_for_compression, const CompressionOptions& compression_opts, const bool skip_filters, const std::string& column_family_name, const int level_at_creation, const uint64_t creation_time, const uint64_t oldest_key_time, const uint64_t target_file_size, const uint64_t file_creation_time) { BlockBasedTableOptions sanitized_table_options(table_options); if (sanitized_table_options.format_version == 0 && sanitized_table_options.checksum != kCRC32c) { ROCKS_LOG_WARN( ioptions.info_log, "Silently converting format_version to 1 because checksum is " "non-default"); // silently convert format_version to 1 to keep consistent with current // behavior sanitized_table_options.format_version = 1; } rep_ = new Rep(ioptions, moptions, sanitized_table_options, internal_comparator, int_tbl_prop_collector_factories, column_family_id, file, compression_type, sample_for_compression, compression_opts, skip_filters, level_at_creation, column_family_name, creation_time, oldest_key_time, target_file_size, file_creation_time); if (rep_->filter_builder != nullptr) { rep_->filter_builder->StartBlock(0); } if (table_options.block_cache_compressed.get() != nullptr) { BlockBasedTable::GenerateCachePrefix( table_options.block_cache_compressed.get(), file->writable_file(), &rep_->compressed_cache_key_prefix[0], &rep_->compressed_cache_key_prefix_size); } if (rep_->compression_opts.parallel_threads > 1) { rep_->pc_rep.reset( new ParallelCompressionRep(rep_->compression_opts.parallel_threads)); rep_->pc_rep->compress_thread_pool.reserve( rep_->compression_opts.parallel_threads); for (uint32_t i = 0; i < rep_->compression_opts.parallel_threads; i++) { rep_->pc_rep->compress_thread_pool.emplace_back([this, i] { BGWorkCompression(*(rep_->compression_ctxs[i]), rep_->verify_ctxs[i].get()); }); } rep_->pc_rep->write_thread.reset( new port::Thread([this] { BGWorkWriteRawBlock(); })); } } BlockBasedTableBuilder::~BlockBasedTableBuilder() { // Catch errors where caller forgot to call Finish() assert(rep_->state == Rep::State::kClosed); delete rep_; } void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { Rep* r = rep_; assert(rep_->state != Rep::State::kClosed); if (!ok()) return; ValueType value_type = ExtractValueType(key); if (IsValueType(value_type)) { #ifndef NDEBUG if (r->props.num_entries > r->props.num_range_deletions) { assert(r->internal_comparator.Compare(key, Slice(r->last_key)) > 0); } #endif // NDEBUG auto should_flush = r->flush_block_policy->Update(key, value); if (should_flush) { assert(!r->data_block.empty()); r->first_key_in_next_block = &key; Flush(); if (r->state == Rep::State::kBuffered && r->data_begin_offset > r->target_file_size) { EnterUnbuffered(); } // Add item to index block. // We do not emit the index entry for a block until we have seen the // first key for the next data block. This allows us to use shorter // keys in the index block. For example, consider a block boundary // between the keys "the quick brown fox" and "the who". We can use // "the r" as the key for the index block entry since it is >= all // entries in the first block and < all entries in subsequent // blocks. if (ok() && r->state == Rep::State::kUnbuffered) { if (r->compression_opts.parallel_threads > 1) { r->pc_rep->curr_block_keys->Clear(); } else { r->index_builder->AddIndexEntry(&r->last_key, &key, r->pending_handle); } } } // Note: PartitionedFilterBlockBuilder requires key being added to filter // builder after being added to index builder. if (r->state == Rep::State::kUnbuffered) { if (r->compression_opts.parallel_threads > 1) { r->pc_rep->curr_block_keys->PushBack(key); } else { if (r->filter_builder != nullptr) { size_t ts_sz = r->internal_comparator.user_comparator()->timestamp_size(); r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz)); } } } r->last_key.assign(key.data(), key.size()); r->data_block.Add(key, value); if (r->state == Rep::State::kBuffered) { // Buffer keys to be replayed during `Finish()` once compression // dictionary has been finalized. if (r->data_block_and_keys_buffers.empty() || should_flush) { r->data_block_and_keys_buffers.emplace_back(); } r->data_block_and_keys_buffers.back().second.emplace_back(key.ToString()); } else { if (r->compression_opts.parallel_threads == 1) { r->index_builder->OnKeyAdded(key); } } // TODO offset passed in is not accurate for parallel compression case NotifyCollectTableCollectorsOnAdd(key, value, r->get_offset(), r->table_properties_collectors, r->ioptions.info_log); } else if (value_type == kTypeRangeDeletion) { r->range_del_block.Add(key, value); // TODO offset passed in is not accurate for parallel compression case NotifyCollectTableCollectorsOnAdd(key, value, r->get_offset(), r->table_properties_collectors, r->ioptions.info_log); } else { assert(false); } r->props.num_entries++; r->props.raw_key_size += key.size(); r->props.raw_value_size += value.size(); if (value_type == kTypeDeletion || value_type == kTypeSingleDeletion) { r->props.num_deletions++; } else if (value_type == kTypeRangeDeletion) { r->props.num_deletions++; r->props.num_range_deletions++; } else if (value_type == kTypeMerge) { r->props.num_merge_operands++; } } void BlockBasedTableBuilder::Flush() { Rep* r = rep_; assert(rep_->state != Rep::State::kClosed); if (!ok()) return; if (r->data_block.empty()) return; if (r->compression_opts.parallel_threads > 1 && r->state == Rep::State::kUnbuffered) { ParallelCompressionRep::BlockRep* block_rep = nullptr; r->pc_rep->block_rep_pool.pop(block_rep); assert(block_rep != nullptr); r->data_block.Finish(); assert(block_rep->data); r->data_block.SwapAndReset(*(block_rep->data)); block_rep->contents = *(block_rep->data); block_rep->compression_type = r->compression_type; std::swap(block_rep->keys, r->pc_rep->curr_block_keys); r->pc_rep->curr_block_keys->Clear(); if (r->first_key_in_next_block == nullptr) { block_rep->first_key_in_next_block.reset(nullptr); } else { block_rep->first_key_in_next_block->assign( r->first_key_in_next_block->data(), r->first_key_in_next_block->size()); } uint64_t new_raw_bytes_inflight = r->pc_rep->raw_bytes_inflight.fetch_add(block_rep->data->size(), std::memory_order_relaxed) + block_rep->data->size(); uint64_t new_blocks_inflight = r->pc_rep->blocks_inflight.fetch_add(1, std::memory_order_relaxed) + 1; r->pc_rep->estimated_file_size.store( r->get_offset() + static_cast(static_cast(new_raw_bytes_inflight) * r->pc_rep->curr_compression_ratio.load( std::memory_order_relaxed)) + new_blocks_inflight * kBlockTrailerSize, std::memory_order_relaxed); // Read out first_block here to avoid data race with BGWorkWriteRawBlock bool first_block = r->pc_rep->first_block; assert(block_rep->status.ok()); if (!r->pc_rep->write_queue.push(block_rep->slot.get())) { return; } if (!r->pc_rep->compress_queue.push(block_rep)) { return; } if (first_block) { std::unique_lock lock(r->pc_rep->first_block_mutex); r->pc_rep->first_block_cond.wait(lock, [r] { return !r->pc_rep->first_block; }); } } else { WriteBlock(&r->data_block, &r->pending_handle, true /* is_data_block */); } } void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle, bool is_data_block) { WriteBlock(block->Finish(), handle, is_data_block); block->Reset(); } void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, BlockHandle* handle, bool is_data_block) { Rep* r = rep_; Slice block_contents; CompressionType type; if (r->state == Rep::State::kBuffered) { assert(is_data_block); assert(!r->data_block_and_keys_buffers.empty()); r->data_block_and_keys_buffers.back().first = raw_block_contents.ToString(); r->data_begin_offset += r->data_block_and_keys_buffers.back().first.size(); return; } Status compress_status; CompressAndVerifyBlock(raw_block_contents, is_data_block, *(r->compression_ctxs[0]), r->verify_ctxs[0].get(), &(r->compressed_output), &(block_contents), &type, &compress_status); r->SetStatus(compress_status); if (!ok()) { return; } WriteRawBlock(block_contents, type, handle, is_data_block); r->compressed_output.clear(); if (is_data_block) { if (r->filter_builder != nullptr) { r->filter_builder->StartBlock(r->get_offset()); } r->props.data_size = r->get_offset(); ++r->props.num_data_blocks; } } void BlockBasedTableBuilder::BGWorkCompression( CompressionContext& compression_ctx, UncompressionContext* verify_ctx) { ParallelCompressionRep::BlockRep* block_rep; while (rep_->pc_rep->compress_queue.pop(block_rep)) { CompressAndVerifyBlock(block_rep->contents, true, /* is_data_block*/ compression_ctx, verify_ctx, block_rep->compressed_data.get(), &block_rep->compressed_contents, &(block_rep->compression_type), &block_rep->status); block_rep->slot->Fill(block_rep); } } void BlockBasedTableBuilder::CompressAndVerifyBlock( const Slice& raw_block_contents, bool is_data_block, CompressionContext& compression_ctx, UncompressionContext* verify_ctx_ptr, std::string* compressed_output, Slice* block_contents, CompressionType* type, Status* out_status) { // File format contains a sequence of blocks where each block has: // block_data: uint8[n] // type: uint8 // crc: uint32 assert(ok()); Rep* r = rep_; *type = r->compression_type; uint64_t sample_for_compression = r->sample_for_compression; bool abort_compression = false; StopWatchNano timer( r->ioptions.env, ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)); if (raw_block_contents.size() < kCompressionSizeLimit) { const CompressionDict* compression_dict; if (!is_data_block || r->compression_dict == nullptr) { compression_dict = &CompressionDict::GetEmptyDict(); } else { compression_dict = r->compression_dict.get(); } assert(compression_dict != nullptr); CompressionInfo compression_info(r->compression_opts, compression_ctx, *compression_dict, *type, sample_for_compression); std::string sampled_output_fast; std::string sampled_output_slow; *block_contents = CompressBlock( raw_block_contents, compression_info, type, r->table_options.format_version, is_data_block /* do_sample */, compressed_output, &sampled_output_fast, &sampled_output_slow); // notify collectors on block add NotifyCollectTableCollectorsOnBlockAdd( r->table_properties_collectors, raw_block_contents.size(), sampled_output_fast.size(), sampled_output_slow.size()); // Some of the compression algorithms are known to be unreliable. If // the verify_compression flag is set then try to de-compress the // compressed data and compare to the input. if (*type != kNoCompression && r->table_options.verify_compression) { // Retrieve the uncompressed contents into a new buffer const UncompressionDict* verify_dict; if (!is_data_block || r->verify_dict == nullptr) { verify_dict = &UncompressionDict::GetEmptyDict(); } else { verify_dict = r->verify_dict.get(); } assert(verify_dict != nullptr); BlockContents contents; UncompressionInfo uncompression_info(*verify_ctx_ptr, *verify_dict, r->compression_type); Status stat = UncompressBlockContentsForCompressionType( uncompression_info, block_contents->data(), block_contents->size(), &contents, r->table_options.format_version, r->ioptions); if (stat.ok()) { bool compressed_ok = contents.data.compare(raw_block_contents) == 0; if (!compressed_ok) { // The result of the compression was invalid. abort. abort_compression = true; ROCKS_LOG_ERROR(r->ioptions.info_log, "Decompressed block did not match raw block"); *out_status = Status::Corruption("Decompressed block did not match raw block"); } } else { // Decompression reported an error. abort. *out_status = Status::Corruption(std::string("Could not decompress: ") + stat.getState()); abort_compression = true; } } } else { // Block is too big to be compressed. abort_compression = true; } // Abort compression if the block is too big, or did not pass // verification. if (abort_compression) { RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED); *type = kNoCompression; *block_contents = raw_block_contents; } else if (*type != kNoCompression) { if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)) { RecordTimeToHistogram(r->ioptions.statistics, COMPRESSION_TIMES_NANOS, timer.ElapsedNanos()); } RecordInHistogram(r->ioptions.statistics, BYTES_COMPRESSED, raw_block_contents.size()); RecordTick(r->ioptions.statistics, NUMBER_BLOCK_COMPRESSED); } else if (*type != r->compression_type) { RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED); } } void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, CompressionType type, BlockHandle* handle, bool is_data_block) { Rep* r = rep_; Status s = Status::OK(); IOStatus io_s = IOStatus::OK(); StopWatch sw(r->ioptions.env, r->ioptions.statistics, WRITE_RAW_BLOCK_MICROS); handle->set_offset(r->get_offset()); handle->set_size(block_contents.size()); assert(status().ok()); assert(io_status().ok()); io_s = r->file->Append(block_contents); if (io_s.ok()) { char trailer[kBlockTrailerSize]; trailer[0] = type; char* trailer_without_type = trailer + 1; switch (r->table_options.checksum) { case kNoChecksum: EncodeFixed32(trailer_without_type, 0); break; case kCRC32c: { auto crc = crc32c::Value(block_contents.data(), block_contents.size()); crc = crc32c::Extend(crc, trailer, 1); // Extend to cover block type EncodeFixed32(trailer_without_type, crc32c::Mask(crc)); break; } case kxxHash: { XXH32_state_t* const state = XXH32_createState(); XXH32_reset(state, 0); XXH32_update(state, block_contents.data(), static_cast(block_contents.size())); XXH32_update(state, trailer, 1); // Extend to cover block type EncodeFixed32(trailer_without_type, XXH32_digest(state)); XXH32_freeState(state); break; } case kxxHash64: { XXH64_state_t* const state = XXH64_createState(); XXH64_reset(state, 0); XXH64_update(state, block_contents.data(), static_cast(block_contents.size())); XXH64_update(state, trailer, 1); // Extend to cover block type EncodeFixed32( trailer_without_type, static_cast(XXH64_digest(state) & // lower 32 bits uint64_t{0xffffffff})); XXH64_freeState(state); break; } } assert(io_s.ok()); TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum", static_cast(trailer)); io_s = r->file->Append(Slice(trailer, kBlockTrailerSize)); if (io_s.ok()) { s = InsertBlockInCache(block_contents, type, handle); if (!s.ok()) { r->SetStatus(s); } } else { r->SetIOStatus(io_s); } if (s.ok() && io_s.ok()) { r->set_offset(r->get_offset() + block_contents.size() + kBlockTrailerSize); if (r->table_options.block_align && is_data_block) { size_t pad_bytes = (r->alignment - ((block_contents.size() + kBlockTrailerSize) & (r->alignment - 1))) & (r->alignment - 1); io_s = r->file->Pad(pad_bytes); if (io_s.ok()) { r->set_offset(r->get_offset() + pad_bytes); } else { r->SetIOStatus(io_s); } } if (r->compression_opts.parallel_threads > 1) { if (!r->pc_rep->finished) { assert(r->pc_rep->raw_bytes_compressed + r->pc_rep->raw_bytes_curr_block > 0); r->pc_rep->curr_compression_ratio.store( (r->pc_rep->curr_compression_ratio.load( std::memory_order_relaxed) * r->pc_rep->raw_bytes_compressed + block_contents.size()) / static_cast(r->pc_rep->raw_bytes_compressed + r->pc_rep->raw_bytes_curr_block), std::memory_order_relaxed); r->pc_rep->raw_bytes_compressed += r->pc_rep->raw_bytes_curr_block; uint64_t new_raw_bytes_inflight = r->pc_rep->raw_bytes_inflight.fetch_sub( r->pc_rep->raw_bytes_curr_block, std::memory_order_relaxed) - r->pc_rep->raw_bytes_curr_block; uint64_t new_blocks_inflight = r->pc_rep->blocks_inflight.fetch_sub( 1, std::memory_order_relaxed) - 1; assert(new_blocks_inflight < r->compression_opts.parallel_threads); r->pc_rep->estimated_file_size.store( r->get_offset() + static_cast( static_cast(new_raw_bytes_inflight) * r->pc_rep->curr_compression_ratio.load( std::memory_order_relaxed)) + new_blocks_inflight * kBlockTrailerSize, std::memory_order_relaxed); } else { r->pc_rep->estimated_file_size.store(r->get_offset(), std::memory_order_relaxed); } } } } else { r->SetIOStatus(io_s); } if (!io_s.ok() && s.ok()) { r->SetStatus(io_s); } } void BlockBasedTableBuilder::BGWorkWriteRawBlock() { Rep* r = rep_; ParallelCompressionRep::BlockRepSlot* slot; ParallelCompressionRep::BlockRep* block_rep; while (r->pc_rep->write_queue.pop(slot)) { slot->Take(block_rep); if (!block_rep->status.ok()) { r->SetStatus(block_rep->status); // Return block_rep to the pool so that blocked Flush() can finish // if there is one, and Flush() will notice !ok() next time. block_rep->status = Status::OK(); block_rep->compressed_data->clear(); r->pc_rep->block_rep_pool.push(block_rep); // Unlock first block if necessary. if (r->pc_rep->first_block) { std::lock_guard lock(r->pc_rep->first_block_mutex); r->pc_rep->first_block = false; r->pc_rep->first_block_cond.notify_one(); } break; } for (size_t i = 0; i < block_rep->keys->Size(); i++) { auto& key = (*block_rep->keys)[i]; if (r->filter_builder != nullptr) { size_t ts_sz = r->internal_comparator.user_comparator()->timestamp_size(); r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz)); } r->index_builder->OnKeyAdded(key); } r->pc_rep->raw_bytes_curr_block = block_rep->data->size(); WriteRawBlock(block_rep->compressed_contents, block_rep->compression_type, &r->pending_handle, true /* is_data_block*/); if (!ok()) { break; } if (r->pc_rep->first_block) { std::lock_guard lock(r->pc_rep->first_block_mutex); r->pc_rep->first_block = false; r->pc_rep->first_block_cond.notify_one(); } if (r->filter_builder != nullptr) { r->filter_builder->StartBlock(r->get_offset()); } r->props.data_size = r->get_offset(); ++r->props.num_data_blocks; if (block_rep->first_key_in_next_block == nullptr) { r->index_builder->AddIndexEntry(&(block_rep->keys->Back()), nullptr, r->pending_handle); } else { Slice first_key_in_next_block = Slice(*block_rep->first_key_in_next_block); r->index_builder->AddIndexEntry(&(block_rep->keys->Back()), &first_key_in_next_block, r->pending_handle); } block_rep->compressed_data->clear(); r->pc_rep->block_rep_pool.push(block_rep); } } Status BlockBasedTableBuilder::status() const { return rep_->GetStatus(); } IOStatus BlockBasedTableBuilder::io_status() const { return rep_->GetIOStatus(); } static void DeleteCachedBlockContents(const Slice& /*key*/, void* value) { BlockContents* bc = reinterpret_cast(value); delete bc; } // // Make a copy of the block contents and insert into compressed block cache // Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents, const CompressionType type, const BlockHandle* handle) { Rep* r = rep_; Cache* block_cache_compressed = r->table_options.block_cache_compressed.get(); if (type != kNoCompression && block_cache_compressed != nullptr) { size_t size = block_contents.size(); auto ubuf = AllocateBlock(size + 1, block_cache_compressed->memory_allocator()); memcpy(ubuf.get(), block_contents.data(), size); ubuf[size] = type; BlockContents* block_contents_to_cache = new BlockContents(std::move(ubuf), size); #ifndef NDEBUG block_contents_to_cache->is_raw_block = true; #endif // NDEBUG // make cache key by appending the file offset to the cache prefix id char* end = EncodeVarint64( r->compressed_cache_key_prefix + r->compressed_cache_key_prefix_size, handle->offset()); Slice key(r->compressed_cache_key_prefix, static_cast(end - r->compressed_cache_key_prefix)); // Insert into compressed block cache. block_cache_compressed->Insert( key, block_contents_to_cache, block_contents_to_cache->ApproximateMemoryUsage(), &DeleteCachedBlockContents); // Invalidate OS cache. r->file->InvalidateCache(static_cast(r->get_offset()), size); } return Status::OK(); } void BlockBasedTableBuilder::WriteFilterBlock( MetaIndexBuilder* meta_index_builder) { BlockHandle filter_block_handle; bool empty_filter_block = (rep_->filter_builder == nullptr || rep_->filter_builder->NumAdded() == 0); if (ok() && !empty_filter_block) { Status s = Status::Incomplete(); while (ok() && s.IsIncomplete()) { Slice filter_content = rep_->filter_builder->Finish(filter_block_handle, &s); assert(s.ok() || s.IsIncomplete()); rep_->props.filter_size += filter_content.size(); WriteRawBlock(filter_content, kNoCompression, &filter_block_handle); } } if (ok() && !empty_filter_block) { // Add mapping from ".Name" to location // of filter data. std::string key; if (rep_->filter_builder->IsBlockBased()) { key = BlockBasedTable::kFilterBlockPrefix; } else { key = rep_->table_options.partition_filters ? BlockBasedTable::kPartitionedFilterBlockPrefix : BlockBasedTable::kFullFilterBlockPrefix; } key.append(rep_->table_options.filter_policy->Name()); meta_index_builder->Add(key, filter_block_handle); } } void BlockBasedTableBuilder::WriteIndexBlock( MetaIndexBuilder* meta_index_builder, BlockHandle* index_block_handle) { IndexBuilder::IndexBlocks index_blocks; auto index_builder_status = rep_->index_builder->Finish(&index_blocks); if (index_builder_status.IsIncomplete()) { // We we have more than one index partition then meta_blocks are not // supported for the index. Currently meta_blocks are used only by // HashIndexBuilder which is not multi-partition. assert(index_blocks.meta_blocks.empty()); } else if (ok() && !index_builder_status.ok()) { rep_->SetStatus(index_builder_status); } if (ok()) { for (const auto& item : index_blocks.meta_blocks) { BlockHandle block_handle; WriteBlock(item.second, &block_handle, false /* is_data_block */); if (!ok()) { break; } meta_index_builder->Add(item.first, block_handle); } } if (ok()) { if (rep_->table_options.enable_index_compression) { WriteBlock(index_blocks.index_block_contents, index_block_handle, false); } else { WriteRawBlock(index_blocks.index_block_contents, kNoCompression, index_block_handle); } } // If there are more index partitions, finish them and write them out Status s = index_builder_status; while (ok() && s.IsIncomplete()) { s = rep_->index_builder->Finish(&index_blocks, *index_block_handle); if (!s.ok() && !s.IsIncomplete()) { rep_->SetStatus(s); return; } if (rep_->table_options.enable_index_compression) { WriteBlock(index_blocks.index_block_contents, index_block_handle, false); } else { WriteRawBlock(index_blocks.index_block_contents, kNoCompression, index_block_handle); } // The last index_block_handle will be for the partition index block } } void BlockBasedTableBuilder::WritePropertiesBlock( MetaIndexBuilder* meta_index_builder) { BlockHandle properties_block_handle; if (ok()) { PropertyBlockBuilder property_block_builder; rep_->props.column_family_id = rep_->column_family_id; rep_->props.column_family_name = rep_->column_family_name; rep_->props.filter_policy_name = rep_->table_options.filter_policy != nullptr ? rep_->table_options.filter_policy->Name() : ""; rep_->props.index_size = rep_->index_builder->IndexSize() + kBlockTrailerSize; rep_->props.comparator_name = rep_->ioptions.user_comparator != nullptr ? rep_->ioptions.user_comparator->Name() : "nullptr"; rep_->props.merge_operator_name = rep_->ioptions.merge_operator != nullptr ? rep_->ioptions.merge_operator->Name() : "nullptr"; rep_->props.compression_name = CompressionTypeToString(rep_->compression_type); rep_->props.compression_options = CompressionOptionsToString(rep_->compression_opts); rep_->props.prefix_extractor_name = rep_->moptions.prefix_extractor != nullptr ? rep_->moptions.prefix_extractor->Name() : "nullptr"; std::string property_collectors_names = "["; for (size_t i = 0; i < rep_->ioptions.table_properties_collector_factories.size(); ++i) { if (i != 0) { property_collectors_names += ","; } property_collectors_names += rep_->ioptions.table_properties_collector_factories[i]->Name(); } property_collectors_names += "]"; rep_->props.property_collectors_names = property_collectors_names; if (rep_->table_options.index_type == BlockBasedTableOptions::kTwoLevelIndexSearch) { assert(rep_->p_index_builder_ != nullptr); rep_->props.index_partitions = rep_->p_index_builder_->NumPartitions(); rep_->props.top_level_index_size = rep_->p_index_builder_->TopLevelIndexSize(rep_->offset); } rep_->props.index_key_is_user_key = !rep_->index_builder->seperator_is_key_plus_seq(); rep_->props.index_value_is_delta_encoded = rep_->use_delta_encoding_for_index_values; rep_->props.creation_time = rep_->creation_time; rep_->props.oldest_key_time = rep_->oldest_key_time; rep_->props.file_creation_time = rep_->file_creation_time; // Add basic properties property_block_builder.AddTableProperty(rep_->props); // Add use collected properties NotifyCollectTableCollectorsOnFinish(rep_->table_properties_collectors, rep_->ioptions.info_log, &property_block_builder); WriteRawBlock(property_block_builder.Finish(), kNoCompression, &properties_block_handle); } if (ok()) { #ifndef NDEBUG { uint64_t props_block_offset = properties_block_handle.offset(); uint64_t props_block_size = properties_block_handle.size(); TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockOffset", &props_block_offset); TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::WritePropertiesBlock:GetPropsBlockSize", &props_block_size); } #endif // !NDEBUG meta_index_builder->Add(kPropertiesBlock, properties_block_handle); } } void BlockBasedTableBuilder::WriteCompressionDictBlock( MetaIndexBuilder* meta_index_builder) { if (rep_->compression_dict != nullptr && rep_->compression_dict->GetRawDict().size()) { BlockHandle compression_dict_block_handle; if (ok()) { WriteRawBlock(rep_->compression_dict->GetRawDict(), kNoCompression, &compression_dict_block_handle); #ifndef NDEBUG Slice compression_dict = rep_->compression_dict->GetRawDict(); TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::WriteCompressionDictBlock:RawDict", &compression_dict); #endif // NDEBUG } if (ok()) { meta_index_builder->Add(kCompressionDictBlock, compression_dict_block_handle); } } } void BlockBasedTableBuilder::WriteRangeDelBlock( MetaIndexBuilder* meta_index_builder) { if (ok() && !rep_->range_del_block.empty()) { BlockHandle range_del_block_handle; WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression, &range_del_block_handle); meta_index_builder->Add(kRangeDelBlock, range_del_block_handle); } } void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle, BlockHandle& index_block_handle) { Rep* r = rep_; // No need to write out new footer if we're using default checksum. // We're writing legacy magic number because we want old versions of RocksDB // be able to read files generated with new release (just in case if // somebody wants to roll back after an upgrade) // TODO(icanadi) at some point in the future, when we're absolutely sure // nobody will roll back to RocksDB 2.x versions, retire the legacy magic // number and always write new table files with new magic number bool legacy = (r->table_options.format_version == 0); // this is guaranteed by BlockBasedTableBuilder's constructor assert(r->table_options.checksum == kCRC32c || r->table_options.format_version != 0); Footer footer( legacy ? kLegacyBlockBasedTableMagicNumber : kBlockBasedTableMagicNumber, r->table_options.format_version); footer.set_metaindex_handle(metaindex_block_handle); footer.set_index_handle(index_block_handle); footer.set_checksum(r->table_options.checksum); std::string footer_encoding; footer.EncodeTo(&footer_encoding); assert(ok()); IOStatus ios = r->file->Append(footer_encoding); r->SetIOStatus(ios); if (ios.ok()) { r->set_offset(r->get_offset() + footer_encoding.size()); } r->SyncStatusFromIOStatus(); } void BlockBasedTableBuilder::EnterUnbuffered() { Rep* r = rep_; assert(r->state == Rep::State::kBuffered); r->state = Rep::State::kUnbuffered; const size_t kSampleBytes = r->compression_opts.zstd_max_train_bytes > 0 ? r->compression_opts.zstd_max_train_bytes : r->compression_opts.max_dict_bytes; Random64 generator{r->creation_time}; std::string compression_dict_samples; std::vector compression_dict_sample_lens; if (!r->data_block_and_keys_buffers.empty()) { while (compression_dict_samples.size() < kSampleBytes) { size_t rand_idx = static_cast( generator.Uniform(r->data_block_and_keys_buffers.size())); size_t copy_len = std::min(kSampleBytes - compression_dict_samples.size(), r->data_block_and_keys_buffers[rand_idx].first.size()); compression_dict_samples.append( r->data_block_and_keys_buffers[rand_idx].first, 0, copy_len); compression_dict_sample_lens.emplace_back(copy_len); } } // final data block flushed, now we can generate dictionary from the samples. // OK if compression_dict_samples is empty, we'll just get empty dictionary. std::string dict; if (r->compression_opts.zstd_max_train_bytes > 0) { dict = ZSTD_TrainDictionary(compression_dict_samples, compression_dict_sample_lens, r->compression_opts.max_dict_bytes); } else { dict = std::move(compression_dict_samples); } r->compression_dict.reset(new CompressionDict(dict, r->compression_type, r->compression_opts.level)); r->verify_dict.reset(new UncompressionDict( dict, r->compression_type == kZSTD || r->compression_type == kZSTDNotFinalCompression)); for (size_t i = 0; ok() && i < r->data_block_and_keys_buffers.size(); ++i) { auto& data_block = r->data_block_and_keys_buffers[i].first; auto& keys = r->data_block_and_keys_buffers[i].second; assert(!data_block.empty()); assert(!keys.empty()); if (r->compression_opts.parallel_threads > 1) { ParallelCompressionRep::BlockRep* block_rep; r->pc_rep->block_rep_pool.pop(block_rep); std::swap(*(block_rep->data), data_block); block_rep->contents = *(block_rep->data); block_rep->compression_type = r->compression_type; block_rep->keys->SwapAssign(keys); if (i + 1 < r->data_block_and_keys_buffers.size()) { block_rep->first_key_in_next_block->assign( r->data_block_and_keys_buffers[i + 1].second.front()); } else { if (r->first_key_in_next_block == nullptr) { block_rep->first_key_in_next_block.reset(nullptr); } else { block_rep->first_key_in_next_block->assign( r->first_key_in_next_block->data(), r->first_key_in_next_block->size()); } } uint64_t new_raw_bytes_inflight = r->pc_rep->raw_bytes_inflight.fetch_add(block_rep->data->size(), std::memory_order_relaxed) + block_rep->data->size(); uint64_t new_blocks_inflight = r->pc_rep->blocks_inflight.fetch_add(1, std::memory_order_relaxed) + 1; r->pc_rep->estimated_file_size.store( r->get_offset() + static_cast( static_cast(new_raw_bytes_inflight) * r->pc_rep->curr_compression_ratio.load( std::memory_order_relaxed)) + new_blocks_inflight * kBlockTrailerSize, std::memory_order_relaxed); // Read out first_block here to avoid data race with BGWorkWriteRawBlock bool first_block = r->pc_rep->first_block; assert(block_rep->status.ok()); if (!r->pc_rep->write_queue.push(block_rep->slot.get())) { return; } if (!r->pc_rep->compress_queue.push(block_rep)) { return; } if (first_block) { std::unique_lock lock(r->pc_rep->first_block_mutex); r->pc_rep->first_block_cond.wait( lock, [r] { return !r->pc_rep->first_block; }); } } else { for (const auto& key : keys) { if (r->filter_builder != nullptr) { size_t ts_sz = r->internal_comparator.user_comparator()->timestamp_size(); r->filter_builder->Add(ExtractUserKeyAndStripTimestamp(key, ts_sz)); } r->index_builder->OnKeyAdded(key); } WriteBlock(Slice(data_block), &r->pending_handle, true /* is_data_block */); if (ok() && i + 1 < r->data_block_and_keys_buffers.size()) { Slice first_key_in_next_block = r->data_block_and_keys_buffers[i + 1].second.front(); Slice* first_key_in_next_block_ptr = &first_key_in_next_block; r->index_builder->AddIndexEntry( &keys.back(), first_key_in_next_block_ptr, r->pending_handle); } } } r->data_block_and_keys_buffers.clear(); } Status BlockBasedTableBuilder::Finish() { Rep* r = rep_; assert(r->state != Rep::State::kClosed); bool empty_data_block = r->data_block.empty(); r->first_key_in_next_block = nullptr; Flush(); if (r->state == Rep::State::kBuffered) { EnterUnbuffered(); } if (r->compression_opts.parallel_threads > 1) { r->pc_rep->compress_queue.finish(); for (auto& thread : r->pc_rep->compress_thread_pool) { thread.join(); } r->pc_rep->write_queue.finish(); r->pc_rep->write_thread->join(); r->pc_rep->finished = true; } else { // To make sure properties block is able to keep the accurate size of index // block, we will finish writing all index entries first. if (ok() && !empty_data_block) { r->index_builder->AddIndexEntry( &r->last_key, nullptr /* no next data block */, r->pending_handle); } } // Write meta blocks, metaindex block and footer in the following order. // 1. [meta block: filter] // 2. [meta block: index] // 3. [meta block: compression dictionary] // 4. [meta block: range deletion tombstone] // 5. [meta block: properties] // 6. [metaindex block] // 7. Footer BlockHandle metaindex_block_handle, index_block_handle; MetaIndexBuilder meta_index_builder; WriteFilterBlock(&meta_index_builder); WriteIndexBlock(&meta_index_builder, &index_block_handle); WriteCompressionDictBlock(&meta_index_builder); WriteRangeDelBlock(&meta_index_builder); WritePropertiesBlock(&meta_index_builder); if (ok()) { // flush the meta index block WriteRawBlock(meta_index_builder.Finish(), kNoCompression, &metaindex_block_handle); } if (ok()) { WriteFooter(metaindex_block_handle, index_block_handle); } r->state = Rep::State::kClosed; return r->GetStatus(); } void BlockBasedTableBuilder::Abandon() { assert(rep_->state != Rep::State::kClosed); if (rep_->compression_opts.parallel_threads > 1) { rep_->pc_rep->compress_queue.finish(); for (auto& thread : rep_->pc_rep->compress_thread_pool) { thread.join(); } rep_->pc_rep->write_queue.finish(); rep_->pc_rep->write_thread->join(); rep_->pc_rep->finished = true; } rep_->state = Rep::State::kClosed; } uint64_t BlockBasedTableBuilder::NumEntries() const { return rep_->props.num_entries; } bool BlockBasedTableBuilder::IsEmpty() const { return rep_->props.num_entries == 0 && rep_->props.num_range_deletions == 0; } uint64_t BlockBasedTableBuilder::FileSize() const { return rep_->offset; } uint64_t BlockBasedTableBuilder::EstimatedFileSize() const { if (rep_->compression_opts.parallel_threads > 1) { // Use compression ratio so far and inflight raw bytes to estimate // final SST size. return rep_->pc_rep->estimated_file_size.load(std::memory_order_relaxed); } else { return FileSize(); } } bool BlockBasedTableBuilder::NeedCompact() const { for (const auto& collector : rep_->table_properties_collectors) { if (collector->NeedCompact()) { return true; } } return false; } TableProperties BlockBasedTableBuilder::GetTableProperties() const { TableProperties ret = rep_->props; for (const auto& collector : rep_->table_properties_collectors) { for (const auto& prop : collector->GetReadableProperties()) { ret.readable_properties.insert(prop); } collector->Finish(&ret.user_collected_properties); } return ret; } std::string BlockBasedTableBuilder::GetFileChecksum() const { if (rep_->file != nullptr) { return rep_->file->GetFileChecksum(); } else { return kUnknownFileChecksum; } } const char* BlockBasedTableBuilder::GetFileChecksumFuncName() const { if (rep_->file != nullptr) { return rep_->file->GetFileChecksumFuncName(); } else { return kUnknownFileChecksumFuncName; } } const std::string BlockBasedTable::kFilterBlockPrefix = "filter."; const std::string BlockBasedTable::kFullFilterBlockPrefix = "fullfilter."; const std::string BlockBasedTable::kPartitionedFilterBlockPrefix = "partitionedfilter."; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_builder.h000066400000000000000000000162211370372246700240440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include "db/version_edit.h" #include "rocksdb/flush_block_policy.h" #include "rocksdb/listener.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "table/meta_blocks.h" #include "table/table_builder.h" #include "util/compression.h" namespace ROCKSDB_NAMESPACE { class BlockBuilder; class BlockHandle; class WritableFile; struct BlockBasedTableOptions; extern const uint64_t kBlockBasedTableMagicNumber; extern const uint64_t kLegacyBlockBasedTableMagicNumber; class BlockBasedTableBuilder : public TableBuilder { public: // Create a builder that will store the contents of the table it is // building in *file. Does not close the file. It is up to the // caller to close the file after calling Finish(). BlockBasedTableBuilder( const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, WritableFileWriter* file, const CompressionType compression_type, const uint64_t sample_for_compression, const CompressionOptions& compression_opts, const bool skip_filters, const std::string& column_family_name, const int level_at_creation, const uint64_t creation_time = 0, const uint64_t oldest_key_time = 0, const uint64_t target_file_size = 0, const uint64_t file_creation_time = 0); // No copying allowed BlockBasedTableBuilder(const BlockBasedTableBuilder&) = delete; BlockBasedTableBuilder& operator=(const BlockBasedTableBuilder&) = delete; // REQUIRES: Either Finish() or Abandon() has been called. ~BlockBasedTableBuilder(); // Add key,value to the table being constructed. // REQUIRES: key is after any previously added key according to comparator. // REQUIRES: Finish(), Abandon() have not been called void Add(const Slice& key, const Slice& value) override; // Return non-ok iff some error has been detected. Status status() const override; // Return non-ok iff some error happens during IO. IOStatus io_status() const override; // Finish building the table. Stops using the file passed to the // constructor after this function returns. // REQUIRES: Finish(), Abandon() have not been called Status Finish() override; // Indicate that the contents of this builder should be abandoned. Stops // using the file passed to the constructor after this function returns. // If the caller is not going to call Finish(), it must call Abandon() // before destroying this builder. // REQUIRES: Finish(), Abandon() have not been called void Abandon() override; // Number of calls to Add() so far. uint64_t NumEntries() const override; bool IsEmpty() const override; // Size of the file generated so far. If invoked after a successful // Finish() call, returns the size of the final generated file. uint64_t FileSize() const override; // Estimated size of the file generated so far. This is used when // FileSize() cannot estimate final SST size, e.g. parallel compression // is enabled. uint64_t EstimatedFileSize() const override; bool NeedCompact() const override; // Get table properties TableProperties GetTableProperties() const override; // Get file checksum std::string GetFileChecksum() const override; // Get file checksum function name const char* GetFileChecksumFuncName() const override; private: bool ok() const { return status().ok(); } // Transition state from buffered to unbuffered. See `Rep::State` API comment // for details of the states. // REQUIRES: `rep_->state == kBuffered` void EnterUnbuffered(); // Call block's Finish() method // and then write the compressed block contents to file. void WriteBlock(BlockBuilder* block, BlockHandle* handle, bool is_data_block); // Compress and write block content to the file. void WriteBlock(const Slice& block_contents, BlockHandle* handle, bool is_data_block); // Directly write data to the file. void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle, bool is_data_block = false); Status InsertBlockInCache(const Slice& block_contents, const CompressionType type, const BlockHandle* handle); void WriteFilterBlock(MetaIndexBuilder* meta_index_builder); void WriteIndexBlock(MetaIndexBuilder* meta_index_builder, BlockHandle* index_block_handle); void WritePropertiesBlock(MetaIndexBuilder* meta_index_builder); void WriteCompressionDictBlock(MetaIndexBuilder* meta_index_builder); void WriteRangeDelBlock(MetaIndexBuilder* meta_index_builder); void WriteFooter(BlockHandle& metaindex_block_handle, BlockHandle& index_block_handle); struct Rep; class BlockBasedTablePropertiesCollectorFactory; class BlockBasedTablePropertiesCollector; Rep* rep_; struct ParallelCompressionRep; // Advanced operation: flush any buffered key/value pairs to file. // Can be used to ensure that two adjacent entries never live in // the same data block. Most clients should not need to use this method. // REQUIRES: Finish(), Abandon() have not been called void Flush(); // Some compression libraries fail when the raw size is bigger than int. If // uncompressed size is bigger than kCompressionSizeLimit, don't compress it const uint64_t kCompressionSizeLimit = std::numeric_limits::max(); // Get blocks from mem-table walking thread, compress them and // pass them to the write thread. Used in parallel compression mode only void BGWorkCompression(CompressionContext& compression_ctx, UncompressionContext* verify_ctx); // Given raw block content, try to compress it and return result and // compression type void CompressAndVerifyBlock( const Slice& raw_block_contents, bool is_data_block, CompressionContext& compression_ctx, UncompressionContext* verify_ctx, std::string* compressed_output, Slice* result_block_contents, CompressionType* result_compression_type, Status* out_status); // Get compressed blocks from BGWorkCompression and write them into SST void BGWorkWriteRawBlock(); }; Slice CompressBlock(const Slice& raw, const CompressionInfo& info, CompressionType* type, uint32_t format_version, bool do_sample, std::string* compressed_output, std::string* sampled_output_fast, std::string* sampled_output_slow); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_factory.cc000066400000000000000000001035711370372246700242300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_based_table_factory.h" #include #include #include #include #include "options/options_helper.h" #include "options/options_parser.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/flush_block_policy.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_reader.h" #include "table/format.h" #include "util/mutexlock.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { void TailPrefetchStats::RecordEffectiveSize(size_t len) { MutexLock l(&mutex_); if (num_records_ < kNumTracked) { num_records_++; } records_[next_++] = len; if (next_ == kNumTracked) { next_ = 0; } } size_t TailPrefetchStats::GetSuggestedPrefetchSize() { std::vector sorted; { MutexLock l(&mutex_); if (num_records_ == 0) { return 0; } sorted.assign(records_, records_ + num_records_); } // Of the historic size, we find the maximum one that satisifis the condtiion // that if prefetching all, less than 1/8 will be wasted. std::sort(sorted.begin(), sorted.end()); // Assuming we have 5 data points, and after sorting it looks like this: // // +---+ // +---+ | | // | | | | // | | | | // | | | | // | | | | // +---+ | | | | // | | | | | | // +---+ | | | | | | // | | | | | | | | // +---+ | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // +---+ +---+ +---+ +---+ +---+ // // and we use every of the value as a candidate, and estimate how much we // wasted, compared to read. For example, when we use the 3rd record // as candiate. This area is what we read: // +---+ // +---+ | | // | | | | // | | | | // | | | | // | | | | // *** *** *** ***+ *** *** *** *** ** // * | | | | | | // +---+ | | | | | * // * | | | | | | | | // +---+ | | | | | | | * // * | | | | X | | | | | // | | | | | | | | | * // * | | | | | | | | | // | | | | | | | | | * // * | | | | | | | | | // *** *** ***-*** ***--*** ***--*** +**** // which is (size of the record) X (number of records). // // While wasted is this area: // +---+ // +---+ | | // | | | | // | | | | // | | | | // | | | | // *** *** *** ****---+ | | | | // * * | | | | | // * *-*** *** | | | | | // * * | | | | | | | // *--** *** | | | | | | | // | | | | | X | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // +---+ +---+ +---+ +---+ +---+ // // Which can be calculated iteratively. // The difference between wasted using 4st and 3rd record, will // be following area: // +---+ // +--+ +-+ ++ +-+ +-+ +---+ | | // + xxxxxxxxxxxxxxxxxxxxxxxx | | | | // xxxxxxxxxxxxxxxxxxxxxxxx | | | | // + xxxxxxxxxxxxxxxxxxxxxxxx | | | | // | xxxxxxxxxxxxxxxxxxxxxxxx | | | | // +-+ +-+ +-+ ++ +---+ +--+ | | | // | | | | | | | // +---+ ++ | | | | | | // | | | | | | X | | | // +---+ ++ | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // | | | | | | | | | | // +---+ +---+ +---+ +---+ +---+ // // which will be the size difference between 4st and 3rd record, // times 3, which is number of records before the 4st. // Here we assume that all data within the prefetch range will be useful. In // reality, it may not be the case when a partial block is inside the range, // or there are data in the middle that is not read. We ignore those cases // for simplicity. assert(!sorted.empty()); size_t prev_size = sorted[0]; size_t max_qualified_size = sorted[0]; size_t wasted = 0; for (size_t i = 1; i < sorted.size(); i++) { size_t read = sorted[i] * sorted.size(); wasted += (sorted[i] - prev_size) * i; if (wasted <= read / 8) { max_qualified_size = sorted[i]; } prev_size = sorted[i]; } const size_t kMaxPrefetchSize = 512 * 1024; // Never exceed 512KB return std::min(kMaxPrefetchSize, max_qualified_size); } #ifndef ROCKSDB_LITE static std::unordered_map block_base_table_index_type_string_map = { {"kBinarySearch", BlockBasedTableOptions::IndexType::kBinarySearch}, {"kHashSearch", BlockBasedTableOptions::IndexType::kHashSearch}, {"kTwoLevelIndexSearch", BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch}, {"kBinarySearchWithFirstKey", BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey}}; static std::unordered_map block_base_table_data_block_index_type_string_map = { {"kDataBlockBinarySearch", BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinarySearch}, {"kDataBlockBinaryAndHash", BlockBasedTableOptions::DataBlockIndexType::kDataBlockBinaryAndHash}}; static std::unordered_map block_base_table_index_shortening_mode_string_map = { {"kNoShortening", BlockBasedTableOptions::IndexShorteningMode::kNoShortening}, {"kShortenSeparators", BlockBasedTableOptions::IndexShorteningMode::kShortenSeparators}, {"kShortenSeparatorsAndSuccessor", BlockBasedTableOptions::IndexShorteningMode:: kShortenSeparatorsAndSuccessor}}; static std::unordered_map block_based_table_type_info = { /* currently not supported std::shared_ptr block_cache = nullptr; std::shared_ptr block_cache_compressed = nullptr; */ {"flush_block_policy_factory", {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory), OptionType::kFlushBlockPolicyFactory, OptionVerificationType::kByName, OptionTypeFlags::kCompareNever, 0}}, {"cache_index_and_filter_blocks", {offsetof(struct BlockBasedTableOptions, cache_index_and_filter_blocks), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"cache_index_and_filter_blocks_with_high_priority", {offsetof(struct BlockBasedTableOptions, cache_index_and_filter_blocks_with_high_priority), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"pin_l0_filter_and_index_blocks_in_cache", {offsetof(struct BlockBasedTableOptions, pin_l0_filter_and_index_blocks_in_cache), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"index_type", OptionTypeInfo::Enum( offsetof(struct BlockBasedTableOptions, index_type), &block_base_table_index_type_string_map)}, {"hash_index_allow_collision", {offsetof(struct BlockBasedTableOptions, hash_index_allow_collision), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"data_block_index_type", OptionTypeInfo::Enum( offsetof(struct BlockBasedTableOptions, data_block_index_type), &block_base_table_data_block_index_type_string_map)}, {"index_shortening", OptionTypeInfo::Enum( offsetof(struct BlockBasedTableOptions, index_shortening), &block_base_table_index_shortening_mode_string_map)}, {"data_block_hash_table_util_ratio", {offsetof(struct BlockBasedTableOptions, data_block_hash_table_util_ratio), OptionType::kDouble, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"checksum", {offsetof(struct BlockBasedTableOptions, checksum), OptionType::kChecksumType, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"no_block_cache", {offsetof(struct BlockBasedTableOptions, no_block_cache), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"block_size", {offsetof(struct BlockBasedTableOptions, block_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"block_size_deviation", {offsetof(struct BlockBasedTableOptions, block_size_deviation), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"block_restart_interval", {offsetof(struct BlockBasedTableOptions, block_restart_interval), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"index_block_restart_interval", {offsetof(struct BlockBasedTableOptions, index_block_restart_interval), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"index_per_partition", {0, OptionType::kUInt64T, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"metadata_block_size", {offsetof(struct BlockBasedTableOptions, metadata_block_size), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"partition_filters", {offsetof(struct BlockBasedTableOptions, partition_filters), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"filter_policy", {offsetof(struct BlockBasedTableOptions, filter_policy), OptionType::kUnknown, OptionVerificationType::kByNameAllowFromNull, OptionTypeFlags::kNone, 0, // Parses the Filter policy [](const ConfigOptions& opts, const std::string&, const std::string& value, char* addr) { auto* policy = reinterpret_cast*>(addr); return FilterPolicy::CreateFromString(opts, value, policy); }, // Converts the FilterPolicy to its string representation [](const ConfigOptions&, const std::string&, const char* addr, std::string* value) { const auto* policy = reinterpret_cast*>( addr); if (policy->get()) { *value = (*policy)->Name(); } else { *value = kNullptrString; } return Status::OK(); }, // Compares two FilterPolicy objects for equality [](const ConfigOptions&, const std::string&, const char* addr1, const char* addr2, std::string*) { const auto* policy1 = reinterpret_cast*>( addr1) ->get(); const auto* policy2 = reinterpret_cast*>(addr2) ->get(); if (policy1 == policy2) { return true; } else if (policy1 != nullptr && policy2 != nullptr) { return (strcmp(policy1->Name(), policy2->Name()) == 0); } else { return false; } }}}, {"whole_key_filtering", {offsetof(struct BlockBasedTableOptions, whole_key_filtering), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"skip_table_builder_flush", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, OptionTypeFlags::kNone, 0}}, {"format_version", {offsetof(struct BlockBasedTableOptions, format_version), OptionType::kUInt32T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"verify_compression", {offsetof(struct BlockBasedTableOptions, verify_compression), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"read_amp_bytes_per_bit", {offsetof(struct BlockBasedTableOptions, read_amp_bytes_per_bit), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"enable_index_compression", {offsetof(struct BlockBasedTableOptions, enable_index_compression), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"block_align", {offsetof(struct BlockBasedTableOptions, block_align), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"pin_top_level_index_and_filter", {offsetof(struct BlockBasedTableOptions, pin_top_level_index_and_filter), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"block_cache", {offsetof(struct BlockBasedTableOptions, block_cache), OptionType::kUnknown, OptionVerificationType::kNormal, (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize), 0, // Parses the input vsalue as a Cache [](const ConfigOptions& opts, const std::string&, const std::string& value, char* addr) { auto* cache = reinterpret_cast*>(addr); return Cache::CreateFromString(opts, value, cache); }}}, {"block_cache_compressed", {offsetof(struct BlockBasedTableOptions, block_cache_compressed), OptionType::kUnknown, OptionVerificationType::kNormal, (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize), 0, // Parses the input vsalue as a Cache [](const ConfigOptions& opts, const std::string&, const std::string& value, char* addr) { auto* cache = reinterpret_cast*>(addr); return Cache::CreateFromString(opts, value, cache); }}}, }; #endif // ROCKSDB_LITE // TODO(myabandeh): We should return an error instead of silently changing the // options BlockBasedTableFactory::BlockBasedTableFactory( const BlockBasedTableOptions& _table_options) : table_options_(_table_options) { if (table_options_.flush_block_policy_factory == nullptr) { table_options_.flush_block_policy_factory.reset( new FlushBlockBySizePolicyFactory()); } if (table_options_.no_block_cache) { table_options_.block_cache.reset(); } else if (table_options_.block_cache == nullptr) { LRUCacheOptions co; co.capacity = 8 << 20; // It makes little sense to pay overhead for mid-point insertion while the // block size is only 8MB. co.high_pri_pool_ratio = 0.0; table_options_.block_cache = NewLRUCache(co); } if (table_options_.block_size_deviation < 0 || table_options_.block_size_deviation > 100) { table_options_.block_size_deviation = 0; } if (table_options_.block_restart_interval < 1) { table_options_.block_restart_interval = 1; } if (table_options_.index_block_restart_interval < 1) { table_options_.index_block_restart_interval = 1; } if (table_options_.index_type == BlockBasedTableOptions::kHashSearch && table_options_.index_block_restart_interval != 1) { // Currently kHashSearch is incompatible with index_block_restart_interval > 1 table_options_.index_block_restart_interval = 1; } if (table_options_.partition_filters && table_options_.index_type != BlockBasedTableOptions::kTwoLevelIndexSearch) { // We do not support partitioned filters without partitioning indexes table_options_.partition_filters = false; } } Status BlockBasedTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache) const { return BlockBasedTable::Open( table_reader_options.ioptions, table_reader_options.env_options, table_options_, table_reader_options.internal_comparator, std::move(file), file_size, table_reader, table_reader_options.prefix_extractor, prefetch_index_and_filter_in_cache, table_reader_options.skip_filters, table_reader_options.level, table_reader_options.immortal, table_reader_options.largest_seqno, table_reader_options.force_direct_prefetch, &tail_prefetch_stats_, table_reader_options.block_cache_tracer, table_reader_options.max_file_size_for_l0_meta_pin); } TableBuilder* BlockBasedTableFactory::NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const { auto table_builder = new BlockBasedTableBuilder( table_builder_options.ioptions, table_builder_options.moptions, table_options_, table_builder_options.internal_comparator, table_builder_options.int_tbl_prop_collector_factories, column_family_id, file, table_builder_options.compression_type, table_builder_options.sample_for_compression, table_builder_options.compression_opts, table_builder_options.skip_filters, table_builder_options.column_family_name, table_builder_options.level, table_builder_options.creation_time, table_builder_options.oldest_key_time, table_builder_options.target_file_size, table_builder_options.file_creation_time); return table_builder; } Status BlockBasedTableFactory::SanitizeOptions( const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const { if (table_options_.index_type == BlockBasedTableOptions::kHashSearch && cf_opts.prefix_extractor == nullptr) { return Status::InvalidArgument( "Hash index is specified for block-based " "table, but prefix_extractor is not given"); } if (table_options_.cache_index_and_filter_blocks && table_options_.no_block_cache) { return Status::InvalidArgument( "Enable cache_index_and_filter_blocks, " ", but block cache is disabled"); } if (table_options_.pin_l0_filter_and_index_blocks_in_cache && table_options_.no_block_cache) { return Status::InvalidArgument( "Enable pin_l0_filter_and_index_blocks_in_cache, " ", but block cache is disabled"); } if (!BlockBasedTableSupportedVersion(table_options_.format_version)) { return Status::InvalidArgument( "Unsupported BlockBasedTable format_version. Please check " "include/rocksdb/table.h for more info"); } if (table_options_.block_align && (cf_opts.compression != kNoCompression)) { return Status::InvalidArgument( "Enable block_align, but compression " "enabled"); } if (table_options_.block_align && (table_options_.block_size & (table_options_.block_size - 1))) { return Status::InvalidArgument( "Block alignment requested but block size is not a power of 2"); } if (table_options_.block_size > port::kMaxUint32) { return Status::InvalidArgument( "block size exceeds maximum number (4GiB) allowed"); } if (table_options_.data_block_index_type == BlockBasedTableOptions::kDataBlockBinaryAndHash && table_options_.data_block_hash_table_util_ratio <= 0) { return Status::InvalidArgument( "data_block_hash_table_util_ratio should be greater than 0 when " "data_block_index_type is set to kDataBlockBinaryAndHash"); } if (db_opts.unordered_write && cf_opts.max_successive_merges > 0) { // TODO(myabandeh): support it return Status::InvalidArgument( "max_successive_merges larger than 0 is currently inconsistent with " "unordered_write"); } return Status::OK(); } std::string BlockBasedTableFactory::GetPrintableTableOptions() const { std::string ret; ret.reserve(20000); const int kBufferSize = 200; char buffer[kBufferSize]; snprintf(buffer, kBufferSize, " flush_block_policy_factory: %s (%p)\n", table_options_.flush_block_policy_factory->Name(), static_cast(table_options_.flush_block_policy_factory.get())); ret.append(buffer); snprintf(buffer, kBufferSize, " cache_index_and_filter_blocks: %d\n", table_options_.cache_index_and_filter_blocks); ret.append(buffer); snprintf(buffer, kBufferSize, " cache_index_and_filter_blocks_with_high_priority: %d\n", table_options_.cache_index_and_filter_blocks_with_high_priority); ret.append(buffer); snprintf(buffer, kBufferSize, " pin_l0_filter_and_index_blocks_in_cache: %d\n", table_options_.pin_l0_filter_and_index_blocks_in_cache); ret.append(buffer); snprintf(buffer, kBufferSize, " pin_top_level_index_and_filter: %d\n", table_options_.pin_top_level_index_and_filter); ret.append(buffer); snprintf(buffer, kBufferSize, " index_type: %d\n", table_options_.index_type); ret.append(buffer); snprintf(buffer, kBufferSize, " data_block_index_type: %d\n", table_options_.data_block_index_type); ret.append(buffer); snprintf(buffer, kBufferSize, " index_shortening: %d\n", static_cast(table_options_.index_shortening)); ret.append(buffer); snprintf(buffer, kBufferSize, " data_block_hash_table_util_ratio: %lf\n", table_options_.data_block_hash_table_util_ratio); ret.append(buffer); snprintf(buffer, kBufferSize, " hash_index_allow_collision: %d\n", table_options_.hash_index_allow_collision); ret.append(buffer); snprintf(buffer, kBufferSize, " checksum: %d\n", table_options_.checksum); ret.append(buffer); snprintf(buffer, kBufferSize, " no_block_cache: %d\n", table_options_.no_block_cache); ret.append(buffer); snprintf(buffer, kBufferSize, " block_cache: %p\n", static_cast(table_options_.block_cache.get())); ret.append(buffer); if (table_options_.block_cache) { const char* block_cache_name = table_options_.block_cache->Name(); if (block_cache_name != nullptr) { snprintf(buffer, kBufferSize, " block_cache_name: %s\n", block_cache_name); ret.append(buffer); } ret.append(" block_cache_options:\n"); ret.append(table_options_.block_cache->GetPrintableOptions()); } snprintf(buffer, kBufferSize, " block_cache_compressed: %p\n", static_cast(table_options_.block_cache_compressed.get())); ret.append(buffer); if (table_options_.block_cache_compressed) { const char* block_cache_compressed_name = table_options_.block_cache_compressed->Name(); if (block_cache_compressed_name != nullptr) { snprintf(buffer, kBufferSize, " block_cache_name: %s\n", block_cache_compressed_name); ret.append(buffer); } ret.append(" block_cache_compressed_options:\n"); ret.append(table_options_.block_cache_compressed->GetPrintableOptions()); } snprintf(buffer, kBufferSize, " persistent_cache: %p\n", static_cast(table_options_.persistent_cache.get())); ret.append(buffer); if (table_options_.persistent_cache) { snprintf(buffer, kBufferSize, " persistent_cache_options:\n"); ret.append(buffer); ret.append(table_options_.persistent_cache->GetPrintableOptions()); } snprintf(buffer, kBufferSize, " block_size: %" ROCKSDB_PRIszt "\n", table_options_.block_size); ret.append(buffer); snprintf(buffer, kBufferSize, " block_size_deviation: %d\n", table_options_.block_size_deviation); ret.append(buffer); snprintf(buffer, kBufferSize, " block_restart_interval: %d\n", table_options_.block_restart_interval); ret.append(buffer); snprintf(buffer, kBufferSize, " index_block_restart_interval: %d\n", table_options_.index_block_restart_interval); ret.append(buffer); snprintf(buffer, kBufferSize, " metadata_block_size: %" PRIu64 "\n", table_options_.metadata_block_size); ret.append(buffer); snprintf(buffer, kBufferSize, " partition_filters: %d\n", table_options_.partition_filters); ret.append(buffer); snprintf(buffer, kBufferSize, " use_delta_encoding: %d\n", table_options_.use_delta_encoding); ret.append(buffer); snprintf(buffer, kBufferSize, " filter_policy: %s\n", table_options_.filter_policy == nullptr ? "nullptr" : table_options_.filter_policy->Name()); ret.append(buffer); snprintf(buffer, kBufferSize, " whole_key_filtering: %d\n", table_options_.whole_key_filtering); ret.append(buffer); snprintf(buffer, kBufferSize, " verify_compression: %d\n", table_options_.verify_compression); ret.append(buffer); snprintf(buffer, kBufferSize, " read_amp_bytes_per_bit: %d\n", table_options_.read_amp_bytes_per_bit); ret.append(buffer); snprintf(buffer, kBufferSize, " format_version: %d\n", table_options_.format_version); ret.append(buffer); snprintf(buffer, kBufferSize, " enable_index_compression: %d\n", table_options_.enable_index_compression); ret.append(buffer); snprintf(buffer, kBufferSize, " block_align: %d\n", table_options_.block_align); ret.append(buffer); return ret; } #ifndef ROCKSDB_LITE Status BlockBasedTableFactory::GetOptionString( const ConfigOptions& config_options, std::string* opt_string) const { assert(opt_string); opt_string->clear(); return GetStringFromStruct(config_options, &table_options_, block_based_table_type_info, opt_string); } #else Status BlockBasedTableFactory::GetOptionString( const ConfigOptions& /*opts*/, std::string* /*opt_string*/) const { return Status::OK(); } #endif // !ROCKSDB_LITE const BlockBasedTableOptions& BlockBasedTableFactory::table_options() const { return table_options_; } #ifndef ROCKSDB_LITE namespace { std::string ParseBlockBasedTableOption(const ConfigOptions& config_options, const std::string& name, const std::string& org_value, BlockBasedTableOptions* new_options) { const std::string& value = config_options.input_strings_escaped ? UnescapeOptionString(org_value) : org_value; const auto iter = block_based_table_type_info.find(name); if (iter == block_based_table_type_info.end()) { if (config_options.ignore_unknown_options) { return ""; } else { return "Unrecognized option"; } } const auto& opt_info = iter->second; Status s = opt_info.Parse(config_options, iter->first, value, reinterpret_cast(new_options) + opt_info.offset_); if (s.ok()) { return ""; } else { return s.ToString(); } } } // namespace Status GetBlockBasedTableOptionsFromString( const BlockBasedTableOptions& table_options, const std::string& opts_str, BlockBasedTableOptions* new_table_options) { ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; return GetBlockBasedTableOptionsFromString(config_options, table_options, opts_str, new_table_options); } Status GetBlockBasedTableOptionsFromString( const ConfigOptions& config_options, const BlockBasedTableOptions& table_options, const std::string& opts_str, BlockBasedTableOptions* new_table_options) { std::unordered_map opts_map; Status s = StringToMap(opts_str, &opts_map); if (!s.ok()) { return s; } return GetBlockBasedTableOptionsFromMap(config_options, table_options, opts_map, new_table_options); } Status GetBlockBasedTableOptionsFromMap( const BlockBasedTableOptions& table_options, const std::unordered_map& opts_map, BlockBasedTableOptions* new_table_options, bool input_strings_escaped, bool ignore_unknown_options) { ConfigOptions config_options; config_options.input_strings_escaped = input_strings_escaped; config_options.ignore_unknown_options = ignore_unknown_options; return GetBlockBasedTableOptionsFromMap(config_options, table_options, opts_map, new_table_options); } Status GetBlockBasedTableOptionsFromMap( const ConfigOptions& config_options, const BlockBasedTableOptions& table_options, const std::unordered_map& opts_map, BlockBasedTableOptions* new_table_options) { assert(new_table_options); *new_table_options = table_options; for (const auto& o : opts_map) { auto error_message = ParseBlockBasedTableOption( config_options, o.first, o.second, new_table_options); if (error_message != "") { const auto iter = block_based_table_type_info.find(o.first); if (iter == block_based_table_type_info.end() || !config_options .input_strings_escaped || // !input_strings_escaped indicates // the old API, where everything is // parsable. (!iter->second.IsByName() && !iter->second.IsDeprecated())) { // Restore "new_options" to the default "base_options". *new_table_options = table_options; return Status::InvalidArgument("Can't parse BlockBasedTableOptions:", o.first + " " + error_message); } } } return Status::OK(); } Status VerifyBlockBasedTableFactory(const ConfigOptions& config_options, const BlockBasedTableFactory* base_tf, const BlockBasedTableFactory* file_tf) { if ((base_tf != nullptr) != (file_tf != nullptr) && config_options.sanity_level > ConfigOptions::kSanityLevelNone) { return Status::Corruption( "[RocksDBOptionsParser]: Inconsistent TableFactory class type"); } if (base_tf == nullptr) { return Status::OK(); } assert(file_tf != nullptr); const auto& base_opt = base_tf->table_options(); const auto& file_opt = file_tf->table_options(); std::string mismatch; for (auto& pair : block_based_table_type_info) { // We skip checking deprecated variables as they might // contain random values since they might not be initialized if (config_options.IsCheckEnabled(pair.second.GetSanityLevel())) { const char* base_addr = reinterpret_cast(&base_opt) + pair.second.offset_; const char* file_addr = reinterpret_cast(&file_opt) + pair.second.offset_; if (!pair.second.AreEqual(config_options, pair.first, base_addr, file_addr, &mismatch) && !pair.second.AreEqualByName(config_options, pair.first, base_addr, file_addr)) { return Status::Corruption( "[RocksDBOptionsParser]: " "failed the verification on BlockBasedTableOptions::", pair.first); } } } return Status::OK(); } #endif // !ROCKSDB_LITE TableFactory* NewBlockBasedTableFactory( const BlockBasedTableOptions& _table_options) { return new BlockBasedTableFactory(_table_options); } const std::string BlockBasedTableFactory::kName = "BlockBasedTable"; const std::string BlockBasedTablePropertyNames::kIndexType = "rocksdb.block.based.table.index.type"; const std::string BlockBasedTablePropertyNames::kWholeKeyFiltering = "rocksdb.block.based.table.whole.key.filtering"; const std::string BlockBasedTablePropertyNames::kPrefixFiltering = "rocksdb.block.based.table.prefix.filtering"; const std::string kHashIndexPrefixesBlock = "rocksdb.hashindex.prefixes"; const std::string kHashIndexPrefixesMetadataBlock = "rocksdb.hashindex.metadata"; const std::string kPropTrue = "1"; const std::string kPropFalse = "0"; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_factory.h000066400000000000000000000060311370372246700240630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include "db/dbformat.h" #include "rocksdb/flush_block_policy.h" #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { struct ConfigOptions; struct EnvOptions; class BlockBasedTableBuilder; // A class used to track actual bytes written from the tail in the recent SST // file opens, and provide a suggestion for following open. class TailPrefetchStats { public: void RecordEffectiveSize(size_t len); // 0 indicates no information to determine. size_t GetSuggestedPrefetchSize(); private: const static size_t kNumTracked = 32; size_t records_[kNumTracked]; port::Mutex mutex_; size_t next_ = 0; size_t num_records_ = 0; }; class BlockBasedTableFactory : public TableFactory { public: explicit BlockBasedTableFactory( const BlockBasedTableOptions& table_options = BlockBasedTableOptions()); ~BlockBasedTableFactory() {} const char* Name() const override { return kName.c_str(); } Status NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const override; // Sanitizes the specified DB Options. Status SanitizeOptions(const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const override; std::string GetPrintableTableOptions() const override; Status GetOptionString(const ConfigOptions& config_options, std::string* opt_string) const override; const BlockBasedTableOptions& table_options() const; void* GetOptions() override { return &table_options_; } bool IsDeleteRangeSupported() const override { return true; } TailPrefetchStats* tail_prefetch_stats() { return &tail_prefetch_stats_; } static const std::string kName; private: BlockBasedTableOptions table_options_; mutable TailPrefetchStats tail_prefetch_stats_; }; extern const std::string kHashIndexPrefixesBlock; extern const std::string kHashIndexPrefixesMetadataBlock; extern const std::string kPropTrue; extern const std::string kPropFalse; #ifndef ROCKSDB_LITE extern Status VerifyBlockBasedTableFactory( const ConfigOptions& config_options, const BlockBasedTableFactory* base_tf, const BlockBasedTableFactory* file_tf); #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_iterator.cc000066400000000000000000000300211370372246700243770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_based_table_iterator.h" namespace ROCKSDB_NAMESPACE { void BlockBasedTableIterator::Seek(const Slice& target) { SeekImpl(&target); } void BlockBasedTableIterator::SeekToFirst() { SeekImpl(nullptr); } void BlockBasedTableIterator::SeekImpl(const Slice* target) { is_out_of_bound_ = false; is_at_first_key_from_index_ = false; if (target && !CheckPrefixMayMatch(*target, IterDirection::kForward)) { ResetDataIter(); return; } bool need_seek_index = true; if (block_iter_points_to_real_block_ && block_iter_.Valid()) { // Reseek. prev_block_offset_ = index_iter_->value().handle.offset(); if (target) { // We can avoid an index seek if: // 1. The new seek key is larger than the current key // 2. The new seek key is within the upper bound of the block // Since we don't necessarily know the internal key for either // the current key or the upper bound, we check user keys and // exclude the equality case. Considering internal keys can // improve for the boundary cases, but it would complicate the // code. if (user_comparator_.Compare(ExtractUserKey(*target), block_iter_.user_key()) > 0 && user_comparator_.Compare(ExtractUserKey(*target), index_iter_->user_key()) < 0) { need_seek_index = false; } } } if (need_seek_index) { if (target) { index_iter_->Seek(*target); } else { index_iter_->SeekToFirst(); } if (!index_iter_->Valid()) { ResetDataIter(); return; } } IndexValue v = index_iter_->value(); const bool same_block = block_iter_points_to_real_block_ && v.handle.offset() == prev_block_offset_; if (!v.first_internal_key.empty() && !same_block && (!target || icomp_.Compare(*target, v.first_internal_key) <= 0) && allow_unprepared_value_) { // Index contains the first key of the block, and it's >= target. // We can defer reading the block. is_at_first_key_from_index_ = true; // ResetDataIter() will invalidate block_iter_. Thus, there is no need to // call CheckDataBlockWithinUpperBound() to check for iterate_upper_bound // as that will be done later when the data block is actually read. ResetDataIter(); } else { // Need to use the data block. if (!same_block) { InitDataBlock(); } else { // When the user does a reseek, the iterate_upper_bound might have // changed. CheckDataBlockWithinUpperBound() needs to be called // explicitly if the reseek ends up in the same data block. // If the reseek ends up in a different block, InitDataBlock() will do // the iterator upper bound check. CheckDataBlockWithinUpperBound(); } if (target) { block_iter_.Seek(*target); } else { block_iter_.SeekToFirst(); } FindKeyForward(); } CheckOutOfBound(); if (target) { assert(!Valid() || icomp_.Compare(*target, key()) <= 0); } } void BlockBasedTableIterator::SeekForPrev(const Slice& target) { is_out_of_bound_ = false; is_at_first_key_from_index_ = false; // For now totally disable prefix seek in auto prefix mode because we don't // have logic if (!CheckPrefixMayMatch(target, IterDirection::kBackward)) { ResetDataIter(); return; } SavePrevIndexValue(); // Call Seek() rather than SeekForPrev() in the index block, because the // target data block will likely to contain the position for `target`, the // same as Seek(), rather than than before. // For example, if we have three data blocks, each containing two keys: // [2, 4] [6, 8] [10, 12] // (the keys in the index block would be [4, 8, 12]) // and the user calls SeekForPrev(7), we need to go to the second block, // just like if they call Seek(7). // The only case where the block is difference is when they seek to a position // in the boundary. For example, if they SeekForPrev(5), we should go to the // first block, rather than the second. However, we don't have the information // to distinguish the two unless we read the second block. In this case, we'll // end up with reading two blocks. index_iter_->Seek(target); if (!index_iter_->Valid()) { auto seek_status = index_iter_->status(); // Check for IO error if (!seek_status.IsNotFound() && !seek_status.ok()) { ResetDataIter(); return; } // With prefix index, Seek() returns NotFound if the prefix doesn't exist if (seek_status.IsNotFound()) { // Any key less than the target is fine for prefix seek ResetDataIter(); return; } else { index_iter_->SeekToLast(); } // Check for IO error if (!index_iter_->Valid()) { ResetDataIter(); return; } } InitDataBlock(); block_iter_.SeekForPrev(target); FindKeyBackward(); CheckDataBlockWithinUpperBound(); assert(!block_iter_.Valid() || icomp_.Compare(target, block_iter_.key()) >= 0); } void BlockBasedTableIterator::SeekToLast() { is_out_of_bound_ = false; is_at_first_key_from_index_ = false; SavePrevIndexValue(); index_iter_->SeekToLast(); if (!index_iter_->Valid()) { ResetDataIter(); return; } InitDataBlock(); block_iter_.SeekToLast(); FindKeyBackward(); CheckDataBlockWithinUpperBound(); } void BlockBasedTableIterator::Next() { if (is_at_first_key_from_index_ && !MaterializeCurrentBlock()) { return; } assert(block_iter_points_to_real_block_); block_iter_.Next(); FindKeyForward(); CheckOutOfBound(); } bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) { Next(); bool is_valid = Valid(); if (is_valid) { result->key = key(); result->may_be_out_of_upper_bound = MayBeOutOfUpperBound(); result->value_prepared = !is_at_first_key_from_index_; } return is_valid; } void BlockBasedTableIterator::Prev() { if (is_at_first_key_from_index_) { is_at_first_key_from_index_ = false; index_iter_->Prev(); if (!index_iter_->Valid()) { return; } InitDataBlock(); block_iter_.SeekToLast(); } else { assert(block_iter_points_to_real_block_); block_iter_.Prev(); } FindKeyBackward(); } void BlockBasedTableIterator::InitDataBlock() { BlockHandle data_block_handle = index_iter_->value().handle; if (!block_iter_points_to_real_block_ || data_block_handle.offset() != prev_block_offset_ || // if previous attempt of reading the block missed cache, try again block_iter_.status().IsIncomplete()) { if (block_iter_points_to_real_block_) { ResetDataIter(); } auto* rep = table_->get_rep(); bool is_for_compaction = lookup_context_.caller == TableReaderCaller::kCompaction; // Prefetch additional data for range scans (iterators). // Implicit auto readahead: // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0. // Explicit user requested readahead: // Enabled from the very first IO when ReadOptions.readahead_size is set. block_prefetcher_.PrefetchIfNeeded(rep, data_block_handle, read_options_.readahead_size, is_for_compaction); Status s; table_->NewDataBlockIterator( read_options_, data_block_handle, &block_iter_, BlockType::kData, /*get_context=*/nullptr, &lookup_context_, s, block_prefetcher_.prefetch_buffer(), /*for_compaction=*/is_for_compaction); block_iter_points_to_real_block_ = true; CheckDataBlockWithinUpperBound(); } } bool BlockBasedTableIterator::MaterializeCurrentBlock() { assert(is_at_first_key_from_index_); assert(!block_iter_points_to_real_block_); assert(index_iter_->Valid()); is_at_first_key_from_index_ = false; InitDataBlock(); assert(block_iter_points_to_real_block_); if (!block_iter_.status().ok()) { return false; } block_iter_.SeekToFirst(); if (!block_iter_.Valid() || icomp_.Compare(block_iter_.key(), index_iter_->value().first_internal_key) != 0) { block_iter_.Invalidate(Status::Corruption( "first key in index doesn't match first key in block")); return false; } return true; } void BlockBasedTableIterator::FindKeyForward() { // This method's code is kept short to make it likely to be inlined. assert(!is_out_of_bound_); assert(block_iter_points_to_real_block_); if (!block_iter_.Valid()) { // This is the only call site of FindBlockForward(), but it's extracted into // a separate method to keep FindKeyForward() short and likely to be // inlined. When transitioning to a different block, we call // FindBlockForward(), which is much longer and is probably not inlined. FindBlockForward(); } else { // This is the fast path that avoids a function call. } } void BlockBasedTableIterator::FindBlockForward() { // TODO the while loop inherits from two-level-iterator. We don't know // whether a block can be empty so it can be replaced by an "if". do { if (!block_iter_.status().ok()) { return; } // Whether next data block is out of upper bound, if there is one. const bool next_block_is_out_of_bound = read_options_.iterate_upper_bound != nullptr && block_iter_points_to_real_block_ && !data_block_within_upper_bound_; assert(!next_block_is_out_of_bound || user_comparator_.CompareWithoutTimestamp( *read_options_.iterate_upper_bound, /*a_has_ts=*/false, index_iter_->user_key(), /*b_has_ts=*/true) <= 0); ResetDataIter(); index_iter_->Next(); if (next_block_is_out_of_bound) { // The next block is out of bound. No need to read it. TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr); // We need to make sure this is not the last data block before setting // is_out_of_bound_, since the index key for the last data block can be // larger than smallest key of the next file on the same level. if (index_iter_->Valid()) { is_out_of_bound_ = true; } return; } if (!index_iter_->Valid()) { return; } IndexValue v = index_iter_->value(); if (!v.first_internal_key.empty() && allow_unprepared_value_) { // Index contains the first key of the block. Defer reading the block. is_at_first_key_from_index_ = true; return; } InitDataBlock(); block_iter_.SeekToFirst(); } while (!block_iter_.Valid()); } void BlockBasedTableIterator::FindKeyBackward() { while (!block_iter_.Valid()) { if (!block_iter_.status().ok()) { return; } ResetDataIter(); index_iter_->Prev(); if (index_iter_->Valid()) { InitDataBlock(); block_iter_.SeekToLast(); } else { return; } } // We could have check lower bound here too, but we opt not to do it for // code simplicity. } void BlockBasedTableIterator::CheckOutOfBound() { if (read_options_.iterate_upper_bound != nullptr && Valid()) { is_out_of_bound_ = user_comparator_.CompareWithoutTimestamp( *read_options_.iterate_upper_bound, /*a_has_ts=*/false, user_key(), /*b_has_ts=*/true) <= 0; } } void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() { if (read_options_.iterate_upper_bound != nullptr && block_iter_points_to_real_block_) { data_block_within_upper_bound_ = (user_comparator_.CompareWithoutTimestamp( *read_options_.iterate_upper_bound, /*a_has_ts=*/false, index_iter_->user_key(), /*b_has_ts=*/true) > 0); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_iterator.h000066400000000000000000000171371370372246700242560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader_impl.h" #include "table/block_based/block_prefetcher.h" #include "table/block_based/reader_common.h" namespace ROCKSDB_NAMESPACE { // Iterates over the contents of BlockBasedTable. class BlockBasedTableIterator : public InternalIteratorBase { // compaction_readahead_size: its value will only be used if for_compaction = // true public: BlockBasedTableIterator( const BlockBasedTable* table, const ReadOptions& read_options, const InternalKeyComparator& icomp, std::unique_ptr>&& index_iter, bool check_filter, bool need_upper_bound_check, const SliceTransform* prefix_extractor, TableReaderCaller caller, size_t compaction_readahead_size = 0, bool allow_unprepared_value = false) : table_(table), read_options_(read_options), icomp_(icomp), user_comparator_(icomp.user_comparator()), allow_unprepared_value_(allow_unprepared_value), index_iter_(std::move(index_iter)), pinned_iters_mgr_(nullptr), block_iter_points_to_real_block_(false), check_filter_(check_filter), need_upper_bound_check_(need_upper_bound_check), prefix_extractor_(prefix_extractor), lookup_context_(caller), block_prefetcher_(compaction_readahead_size) {} ~BlockBasedTableIterator() {} void Seek(const Slice& target) override; void SeekForPrev(const Slice& target) override; void SeekToFirst() override; void SeekToLast() override; void Next() final override; bool NextAndGetResult(IterateResult* result) override; void Prev() override; bool Valid() const override { return !is_out_of_bound_ && (is_at_first_key_from_index_ || (block_iter_points_to_real_block_ && block_iter_.Valid())); } Slice key() const override { assert(Valid()); if (is_at_first_key_from_index_) { return index_iter_->value().first_internal_key; } else { return block_iter_.key(); } } Slice user_key() const override { assert(Valid()); if (is_at_first_key_from_index_) { return ExtractUserKey(index_iter_->value().first_internal_key); } else { return block_iter_.user_key(); } } bool PrepareValue() override { assert(Valid()); if (!is_at_first_key_from_index_) { return true; } return const_cast(this) ->MaterializeCurrentBlock(); } Slice value() const override { // PrepareValue() must have been called. assert(!is_at_first_key_from_index_); assert(Valid()); return block_iter_.value(); } Status status() const override { // Prefix index set status to NotFound when the prefix does not exist if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) { return index_iter_->status(); } else if (block_iter_points_to_real_block_) { return block_iter_.status(); } else { return Status::OK(); } } // Whether iterator invalidated for being out of bound. bool IsOutOfBound() override { return is_out_of_bound_; } inline bool MayBeOutOfUpperBound() override { assert(Valid()); return !data_block_within_upper_bound_; } void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { pinned_iters_mgr_ = pinned_iters_mgr; } bool IsKeyPinned() const override { // Our key comes either from block_iter_'s current key // or index_iter_'s current *value*. return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && ((is_at_first_key_from_index_ && index_iter_->IsValuePinned()) || (block_iter_points_to_real_block_ && block_iter_.IsKeyPinned())); } bool IsValuePinned() const override { assert(!is_at_first_key_from_index_); assert(Valid()); // BlockIter::IsValuePinned() is always true. No need to check return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && block_iter_points_to_real_block_; } void ResetDataIter() { if (block_iter_points_to_real_block_) { if (pinned_iters_mgr_ != nullptr && pinned_iters_mgr_->PinningEnabled()) { block_iter_.DelegateCleanupsTo(pinned_iters_mgr_); } block_iter_.Invalidate(Status::OK()); block_iter_points_to_real_block_ = false; } } void SavePrevIndexValue() { if (block_iter_points_to_real_block_) { // Reseek. If they end up with the same data block, we shouldn't re-fetch // the same data block. prev_block_offset_ = index_iter_->value().handle.offset(); } } private: enum class IterDirection { kForward, kBackward, }; const BlockBasedTable* table_; const ReadOptions read_options_; const InternalKeyComparator& icomp_; UserComparatorWrapper user_comparator_; const bool allow_unprepared_value_; std::unique_ptr> index_iter_; PinnedIteratorsManager* pinned_iters_mgr_; DataBlockIter block_iter_; // True if block_iter_ is initialized and points to the same block // as index iterator. bool block_iter_points_to_real_block_; // See InternalIteratorBase::IsOutOfBound(). bool is_out_of_bound_ = false; // Whether current data block being fully within iterate upper bound. bool data_block_within_upper_bound_ = false; // True if we're standing at the first key of a block, and we haven't loaded // that block yet. A call to PrepareValue() will trigger loading the block. bool is_at_first_key_from_index_ = false; bool check_filter_; // TODO(Zhongyi): pick a better name bool need_upper_bound_check_; const SliceTransform* prefix_extractor_; uint64_t prev_block_offset_ = std::numeric_limits::max(); BlockCacheLookupContext lookup_context_; BlockPrefetcher block_prefetcher_; // If `target` is null, seek to first. void SeekImpl(const Slice* target); void InitDataBlock(); bool MaterializeCurrentBlock(); void FindKeyForward(); void FindBlockForward(); void FindKeyBackward(); void CheckOutOfBound(); // Check if data block is fully within iterate_upper_bound. // // Note MyRocks may update iterate bounds between seek. To workaround it, // we need to check and update data_block_within_upper_bound_ accordingly. void CheckDataBlockWithinUpperBound(); bool CheckPrefixMayMatch(const Slice& ikey, IterDirection direction) { if (need_upper_bound_check_ && direction == IterDirection::kBackward) { // Upper bound check isn't sufficnet for backward direction to // guarantee the same result as total order, so disable prefix // check. return true; } if (check_filter_ && !table_->PrefixMayMatch(ikey, read_options_, prefix_extractor_, need_upper_bound_check_, &lookup_context_)) { // TODO remember the iterator is invalidated because of prefix // match. This can avoid the upper level file iterator to falsely // believe the position is the end of the SST file and move to // the first key of the next file. ResetDataIter(); return false; } return true; } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_reader.cc000066400000000000000000004105401370372246700240200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_based_table_reader.h" #include #include #include #include #include #include #include "cache/sharded_cache.h" #include "db/dbformat.h" #include "db/pinned_iterators_manager.h" #include "file/file_prefetch_buffer.h" #include "file/file_util.h" #include "file/random_access_file_reader.h" #include "monitoring/perf_context_imp.h" #include "options/options_helper.h" #include "rocksdb/cache.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/filter_policy.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "table/block_based/binary_search_index_reader.h" #include "table/block_based/block.h" #include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_iterator.h" #include "table/block_based/block_prefix_index.h" #include "table/block_based/filter_block.h" #include "table/block_based/full_filter_block.h" #include "table/block_based/hash_index_reader.h" #include "table/block_based/partitioned_filter_block.h" #include "table/block_based/partitioned_index_reader.h" #include "table/block_fetcher.h" #include "table/format.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/meta_blocks.h" #include "table/multiget_context.h" #include "table/persistent_cache_helper.h" #include "table/sst_file_writer_collectors.h" #include "table/two_level_iterator.h" #include "monitoring/perf_context_imp.h" #include "port/lang.h" #include "test_util/sync_point.h" #include "util/coding.h" #include "util/crc32c.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { extern const uint64_t kBlockBasedTableMagicNumber; extern const std::string kHashIndexPrefixesBlock; extern const std::string kHashIndexPrefixesMetadataBlock; typedef BlockBasedTable::IndexReader IndexReader; // Found that 256 KB readahead size provides the best performance, based on // experiments, for auto readahead. Experiment data is in PR #3282. const size_t BlockBasedTable::kMaxAutoReadaheadSize = 256 * 1024; BlockBasedTable::~BlockBasedTable() { delete rep_; } std::atomic BlockBasedTable::next_cache_key_id_(0); template class BlocklikeTraits; template <> class BlocklikeTraits { public: static BlockContents* Create(BlockContents&& contents, size_t /* read_amp_bytes_per_bit */, Statistics* /* statistics */, bool /* using_zstd */, const FilterPolicy* /* filter_policy */) { return new BlockContents(std::move(contents)); } static uint32_t GetNumRestarts(const BlockContents& /* contents */) { return 0; } }; template <> class BlocklikeTraits { public: static ParsedFullFilterBlock* Create(BlockContents&& contents, size_t /* read_amp_bytes_per_bit */, Statistics* /* statistics */, bool /* using_zstd */, const FilterPolicy* filter_policy) { return new ParsedFullFilterBlock(filter_policy, std::move(contents)); } static uint32_t GetNumRestarts(const ParsedFullFilterBlock& /* block */) { return 0; } }; template <> class BlocklikeTraits { public: static Block* Create(BlockContents&& contents, size_t read_amp_bytes_per_bit, Statistics* statistics, bool /* using_zstd */, const FilterPolicy* /* filter_policy */) { return new Block(std::move(contents), read_amp_bytes_per_bit, statistics); } static uint32_t GetNumRestarts(const Block& block) { return block.NumRestarts(); } }; template <> class BlocklikeTraits { public: static UncompressionDict* Create(BlockContents&& contents, size_t /* read_amp_bytes_per_bit */, Statistics* /* statistics */, bool using_zstd, const FilterPolicy* /* filter_policy */) { return new UncompressionDict(contents.data, std::move(contents.allocation), using_zstd); } static uint32_t GetNumRestarts(const UncompressionDict& /* dict */) { return 0; } }; namespace { // Read the block identified by "handle" from "file". // The only relevant option is options.verify_checksums for now. // On failure return non-OK. // On success fill *result and return OK - caller owns *result // @param uncompression_dict Data for presetting the compression library's // dictionary. template Status ReadBlockFromFile( RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ReadOptions& options, const BlockHandle& handle, std::unique_ptr* result, const ImmutableCFOptions& ioptions, bool do_uncompress, bool maybe_compressed, BlockType block_type, const UncompressionDict& uncompression_dict, const PersistentCacheOptions& cache_options, size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator, bool for_compaction, bool using_zstd, const FilterPolicy* filter_policy) { assert(result); BlockContents contents; BlockFetcher block_fetcher( file, prefetch_buffer, footer, options, handle, &contents, ioptions, do_uncompress, maybe_compressed, block_type, uncompression_dict, cache_options, memory_allocator, nullptr, for_compaction); Status s = block_fetcher.ReadBlockContents(); if (s.ok()) { result->reset(BlocklikeTraits::Create( std::move(contents), read_amp_bytes_per_bit, ioptions.statistics, using_zstd, filter_policy)); } return s; } // Delete the entry resided in the cache. template void DeleteCachedEntry(const Slice& /*key*/, void* value) { auto entry = reinterpret_cast(value); delete entry; } // Release the cached entry and decrement its ref count. // Do not force erase void ReleaseCachedEntry(void* arg, void* h) { Cache* cache = reinterpret_cast(arg); Cache::Handle* handle = reinterpret_cast(h); cache->Release(handle, false /* force_erase */); } // For hash based index, return true if prefix_extractor and // prefix_extractor_block mismatch, false otherwise. This flag will be used // as total_order_seek via NewIndexIterator bool PrefixExtractorChanged(const TableProperties* table_properties, const SliceTransform* prefix_extractor) { // BlockBasedTableOptions::kHashSearch requires prefix_extractor to be set. // Turn off hash index in prefix_extractor is not set; if prefix_extractor // is set but prefix_extractor_block is not set, also disable hash index if (prefix_extractor == nullptr || table_properties == nullptr || table_properties->prefix_extractor_name.empty()) { return true; } // prefix_extractor and prefix_extractor_block are both non-empty if (table_properties->prefix_extractor_name.compare( prefix_extractor->Name()) != 0) { return true; } else { return false; } } CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) { CacheAllocationPtr heap_buf; heap_buf = AllocateBlock(buf.size(), allocator); memcpy(heap_buf.get(), buf.data(), buf.size()); return heap_buf; } } // namespace void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type, GetContext* get_context, size_t usage) const { Statistics* const statistics = rep_->ioptions.statistics; PERF_COUNTER_ADD(block_cache_hit_count, 1); PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, static_cast(rep_->level)); if (get_context) { ++get_context->get_context_stats_.num_cache_hit; get_context->get_context_stats_.num_cache_bytes_read += usage; } else { RecordTick(statistics, BLOCK_CACHE_HIT); RecordTick(statistics, BLOCK_CACHE_BYTES_READ, usage); } switch (block_type) { case BlockType::kFilter: PERF_COUNTER_ADD(block_cache_filter_hit_count, 1); if (get_context) { ++get_context->get_context_stats_.num_cache_filter_hit; } else { RecordTick(statistics, BLOCK_CACHE_FILTER_HIT); } break; case BlockType::kCompressionDictionary: // TODO: introduce perf counter for compression dictionary hit count if (get_context) { ++get_context->get_context_stats_.num_cache_compression_dict_hit; } else { RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_HIT); } break; case BlockType::kIndex: PERF_COUNTER_ADD(block_cache_index_hit_count, 1); if (get_context) { ++get_context->get_context_stats_.num_cache_index_hit; } else { RecordTick(statistics, BLOCK_CACHE_INDEX_HIT); } break; default: // TODO: introduce dedicated tickers/statistics/counters // for range tombstones if (get_context) { ++get_context->get_context_stats_.num_cache_data_hit; } else { RecordTick(statistics, BLOCK_CACHE_DATA_HIT); } break; } } void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type, GetContext* get_context) const { Statistics* const statistics = rep_->ioptions.statistics; // TODO: introduce aggregate (not per-level) block cache miss count PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 1, static_cast(rep_->level)); if (get_context) { ++get_context->get_context_stats_.num_cache_miss; } else { RecordTick(statistics, BLOCK_CACHE_MISS); } // TODO: introduce perf counters for misses per block type switch (block_type) { case BlockType::kFilter: if (get_context) { ++get_context->get_context_stats_.num_cache_filter_miss; } else { RecordTick(statistics, BLOCK_CACHE_FILTER_MISS); } break; case BlockType::kCompressionDictionary: if (get_context) { ++get_context->get_context_stats_.num_cache_compression_dict_miss; } else { RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_MISS); } break; case BlockType::kIndex: if (get_context) { ++get_context->get_context_stats_.num_cache_index_miss; } else { RecordTick(statistics, BLOCK_CACHE_INDEX_MISS); } break; default: // TODO: introduce dedicated tickers/statistics/counters // for range tombstones if (get_context) { ++get_context->get_context_stats_.num_cache_data_miss; } else { RecordTick(statistics, BLOCK_CACHE_DATA_MISS); } break; } } void BlockBasedTable::UpdateCacheInsertionMetrics(BlockType block_type, GetContext* get_context, size_t usage, bool redundant) const { Statistics* const statistics = rep_->ioptions.statistics; // TODO: introduce perf counters for block cache insertions if (get_context) { ++get_context->get_context_stats_.num_cache_add; if (redundant) { ++get_context->get_context_stats_.num_cache_add_redundant; } get_context->get_context_stats_.num_cache_bytes_write += usage; } else { RecordTick(statistics, BLOCK_CACHE_ADD); if (redundant) { RecordTick(statistics, BLOCK_CACHE_ADD_REDUNDANT); } RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usage); } switch (block_type) { case BlockType::kFilter: if (get_context) { ++get_context->get_context_stats_.num_cache_filter_add; if (redundant) { ++get_context->get_context_stats_.num_cache_filter_add_redundant; } get_context->get_context_stats_.num_cache_filter_bytes_insert += usage; } else { RecordTick(statistics, BLOCK_CACHE_FILTER_ADD); if (redundant) { RecordTick(statistics, BLOCK_CACHE_FILTER_ADD_REDUNDANT); } RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT, usage); } break; case BlockType::kCompressionDictionary: if (get_context) { ++get_context->get_context_stats_.num_cache_compression_dict_add; if (redundant) { ++get_context->get_context_stats_ .num_cache_compression_dict_add_redundant; } get_context->get_context_stats_ .num_cache_compression_dict_bytes_insert += usage; } else { RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD); if (redundant) { RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT); } RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT, usage); } break; case BlockType::kIndex: if (get_context) { ++get_context->get_context_stats_.num_cache_index_add; if (redundant) { ++get_context->get_context_stats_.num_cache_index_add_redundant; } get_context->get_context_stats_.num_cache_index_bytes_insert += usage; } else { RecordTick(statistics, BLOCK_CACHE_INDEX_ADD); if (redundant) { RecordTick(statistics, BLOCK_CACHE_INDEX_ADD_REDUNDANT); } RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, usage); } break; default: // TODO: introduce dedicated tickers/statistics/counters // for range tombstones if (get_context) { ++get_context->get_context_stats_.num_cache_data_add; if (redundant) { ++get_context->get_context_stats_.num_cache_data_add_redundant; } get_context->get_context_stats_.num_cache_data_bytes_insert += usage; } else { RecordTick(statistics, BLOCK_CACHE_DATA_ADD); if (redundant) { RecordTick(statistics, BLOCK_CACHE_DATA_ADD_REDUNDANT); } RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, usage); } break; } } Cache::Handle* BlockBasedTable::GetEntryFromCache( Cache* block_cache, const Slice& key, BlockType block_type, GetContext* get_context) const { auto cache_handle = block_cache->Lookup(key, rep_->ioptions.statistics); if (cache_handle != nullptr) { UpdateCacheHitMetrics(block_type, get_context, block_cache->GetUsage(cache_handle)); } else { UpdateCacheMissMetrics(block_type, get_context); } return cache_handle; } // Helper function to setup the cache key's prefix for the Table. void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) { assert(kMaxCacheKeyPrefixSize >= 10); rep->cache_key_prefix_size = 0; rep->compressed_cache_key_prefix_size = 0; if (rep->table_options.block_cache != nullptr) { GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file->file(), &rep->cache_key_prefix[0], &rep->cache_key_prefix_size); } if (rep->table_options.persistent_cache != nullptr) { GenerateCachePrefix(/*cache=*/nullptr, rep->file->file(), &rep->persistent_cache_key_prefix[0], &rep->persistent_cache_key_prefix_size); } if (rep->table_options.block_cache_compressed != nullptr) { GenerateCachePrefix(rep->table_options.block_cache_compressed.get(), rep->file->file(), &rep->compressed_cache_key_prefix[0], &rep->compressed_cache_key_prefix_size); } } void BlockBasedTable::GenerateCachePrefix(Cache* cc, FSRandomAccessFile* file, char* buffer, size_t* size) { // generate an id from the file *size = file->GetUniqueId(buffer, kMaxCacheKeyPrefixSize); // If the prefix wasn't generated or was too long, // create one from the cache. if (cc != nullptr && *size == 0) { char* end = EncodeVarint64(buffer, cc->NewId()); *size = static_cast(end - buffer); } } void BlockBasedTable::GenerateCachePrefix(Cache* cc, FSWritableFile* file, char* buffer, size_t* size) { // generate an id from the file *size = file->GetUniqueId(buffer, kMaxCacheKeyPrefixSize); // If the prefix wasn't generated or was too long, // create one from the cache. if (cc != nullptr && *size == 0) { char* end = EncodeVarint64(buffer, cc->NewId()); *size = static_cast(end - buffer); } } namespace { // Return True if table_properties has `user_prop_name` has a `true` value // or it doesn't contain this property (for backward compatible). bool IsFeatureSupported(const TableProperties& table_properties, const std::string& user_prop_name, Logger* info_log) { auto& props = table_properties.user_collected_properties; auto pos = props.find(user_prop_name); // Older version doesn't have this value set. Skip this check. if (pos != props.end()) { if (pos->second == kPropFalse) { return false; } else if (pos->second != kPropTrue) { ROCKS_LOG_WARN(info_log, "Property %s has invalidate value %s", user_prop_name.c_str(), pos->second.c_str()); } } return true; } // Caller has to ensure seqno is not nullptr. Status GetGlobalSequenceNumber(const TableProperties& table_properties, SequenceNumber largest_seqno, SequenceNumber* seqno) { const auto& props = table_properties.user_collected_properties; const auto version_pos = props.find(ExternalSstFilePropertyNames::kVersion); const auto seqno_pos = props.find(ExternalSstFilePropertyNames::kGlobalSeqno); *seqno = kDisableGlobalSequenceNumber; if (version_pos == props.end()) { if (seqno_pos != props.end()) { std::array msg_buf; // This is not an external sst file, global_seqno is not supported. snprintf( msg_buf.data(), msg_buf.max_size(), "A non-external sst file have global seqno property with value %s", seqno_pos->second.c_str()); return Status::Corruption(msg_buf.data()); } return Status::OK(); } uint32_t version = DecodeFixed32(version_pos->second.c_str()); if (version < 2) { if (seqno_pos != props.end() || version != 1) { std::array msg_buf; // This is a v1 external sst file, global_seqno is not supported. snprintf(msg_buf.data(), msg_buf.max_size(), "An external sst file with version %u have global seqno " "property with value %s", version, seqno_pos->second.c_str()); return Status::Corruption(msg_buf.data()); } return Status::OK(); } // Since we have a plan to deprecate global_seqno, we do not return failure // if seqno_pos == props.end(). We rely on version_pos to detect whether the // SST is external. SequenceNumber global_seqno(0); if (seqno_pos != props.end()) { global_seqno = DecodeFixed64(seqno_pos->second.c_str()); } // SstTableReader open table reader with kMaxSequenceNumber as largest_seqno // to denote it is unknown. if (largest_seqno < kMaxSequenceNumber) { if (global_seqno == 0) { global_seqno = largest_seqno; } if (global_seqno != largest_seqno) { std::array msg_buf; snprintf( msg_buf.data(), msg_buf.max_size(), "An external sst file with version %u have global seqno property " "with value %s, while largest seqno in the file is %llu", version, seqno_pos->second.c_str(), static_cast(largest_seqno)); return Status::Corruption(msg_buf.data()); } } *seqno = global_seqno; if (global_seqno > kMaxSequenceNumber) { std::array msg_buf; snprintf(msg_buf.data(), msg_buf.max_size(), "An external sst file with version %u have global seqno property " "with value %llu, which is greater than kMaxSequenceNumber", version, static_cast(global_seqno)); return Status::Corruption(msg_buf.data()); } return Status::OK(); } } // namespace Slice BlockBasedTable::GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size, const BlockHandle& handle, char* cache_key) { assert(cache_key != nullptr); assert(cache_key_prefix_size != 0); assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize); memcpy(cache_key, cache_key_prefix, cache_key_prefix_size); char* end = EncodeVarint64(cache_key + cache_key_prefix_size, handle.offset()); return Slice(cache_key, static_cast(end - cache_key)); } Status BlockBasedTable::Open( const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, const SliceTransform* prefix_extractor, const bool prefetch_index_and_filter_in_cache, const bool skip_filters, const int level, const bool immortal_table, const SequenceNumber largest_seqno, const bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, BlockCacheTracer* const block_cache_tracer, size_t max_file_size_for_l0_meta_pin) { table_reader->reset(); Status s; Footer footer; std::unique_ptr prefetch_buffer; // prefetch both index and filters, down to all partitions const bool prefetch_all = prefetch_index_and_filter_in_cache || level == 0; const bool preload_all = !table_options.cache_index_and_filter_blocks; if (!ioptions.allow_mmap_reads) { s = PrefetchTail(file.get(), file_size, force_direct_prefetch, tail_prefetch_stats, prefetch_all, preload_all, &prefetch_buffer); } else { // Should not prefetch for mmap mode. prefetch_buffer.reset(new FilePrefetchBuffer( nullptr, 0, 0, false /* enable */, true /* track_min_offset */)); } // Read in the following order: // 1. Footer // 2. [metaindex block] // 3. [meta block: properties] // 4. [meta block: range deletion tombstone] // 5. [meta block: compression dictionary] // 6. [meta block: index] // 7. [meta block: filter] s = ReadFooterFromFile(file.get(), prefetch_buffer.get(), file_size, &footer, kBlockBasedTableMagicNumber); if (!s.ok()) { return s; } if (!BlockBasedTableSupportedVersion(footer.version())) { return Status::Corruption( "Unknown Footer version. Maybe this file was created with newer " "version of RocksDB?"); } // We've successfully read the footer. We are ready to serve requests. // Better not mutate rep_ after the creation. eg. internal_prefix_transform // raw pointer will be used to create HashIndexReader, whose reset may // access a dangling pointer. BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch}; Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options, internal_comparator, skip_filters, file_size, level, immortal_table); rep->file = std::move(file); rep->footer = footer; rep->hash_index_allow_collision = table_options.hash_index_allow_collision; // We need to wrap data with internal_prefix_transform to make sure it can // handle prefix correctly. if (prefix_extractor != nullptr) { rep->internal_prefix_transform.reset( new InternalKeySliceTransform(prefix_extractor)); } SetupCacheKeyPrefix(rep); std::unique_ptr new_table( new BlockBasedTable(rep, block_cache_tracer)); // page cache options rep->persistent_cache_options = PersistentCacheOptions(rep->table_options.persistent_cache, std::string(rep->persistent_cache_key_prefix, rep->persistent_cache_key_prefix_size), rep->ioptions.statistics); // Meta-blocks are not dictionary compressed. Explicitly set the dictionary // handle to null, otherwise it may be seen as uninitialized during the below // meta-block reads. rep->compression_dict_handle = BlockHandle::NullBlockHandle(); // Read metaindex std::unique_ptr metaindex; std::unique_ptr metaindex_iter; s = new_table->ReadMetaIndexBlock(prefetch_buffer.get(), &metaindex, &metaindex_iter); if (!s.ok()) { return s; } // Populates table_properties and some fields that depend on it, // such as index_type. s = new_table->ReadPropertiesBlock(prefetch_buffer.get(), metaindex_iter.get(), largest_seqno); if (!s.ok()) { return s; } s = new_table->ReadRangeDelBlock(prefetch_buffer.get(), metaindex_iter.get(), internal_comparator, &lookup_context); if (!s.ok()) { return s; } s = new_table->PrefetchIndexAndFilterBlocks( prefetch_buffer.get(), metaindex_iter.get(), new_table.get(), prefetch_all, table_options, level, file_size, max_file_size_for_l0_meta_pin, &lookup_context); if (s.ok()) { // Update tail prefetch stats assert(prefetch_buffer.get() != nullptr); if (tail_prefetch_stats != nullptr) { assert(prefetch_buffer->min_offset_read() < file_size); tail_prefetch_stats->RecordEffectiveSize( static_cast(file_size) - prefetch_buffer->min_offset_read()); } *table_reader = std::move(new_table); } return s; } Status BlockBasedTable::PrefetchTail( RandomAccessFileReader* file, uint64_t file_size, bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, const bool prefetch_all, const bool preload_all, std::unique_ptr* prefetch_buffer) { size_t tail_prefetch_size = 0; if (tail_prefetch_stats != nullptr) { // Multiple threads may get a 0 (no history) when running in parallel, // but it will get cleared after the first of them finishes. tail_prefetch_size = tail_prefetch_stats->GetSuggestedPrefetchSize(); } if (tail_prefetch_size == 0) { // Before read footer, readahead backwards to prefetch data. Do more // readahead if we're going to read index/filter. // TODO: This may incorrectly select small readahead in case partitioned // index/filter is enabled and top-level partition pinning is enabled. // That's because we need to issue readahead before we read the properties, // at which point we don't yet know the index type. tail_prefetch_size = prefetch_all || preload_all ? 512 * 1024 : 4 * 1024; } size_t prefetch_off; size_t prefetch_len; if (file_size < tail_prefetch_size) { prefetch_off = 0; prefetch_len = static_cast(file_size); } else { prefetch_off = static_cast(file_size - tail_prefetch_size); prefetch_len = tail_prefetch_size; } TEST_SYNC_POINT_CALLBACK("BlockBasedTable::Open::TailPrefetchLen", &tail_prefetch_size); Status s; // TODO should not have this special logic in the future. if (!file->use_direct_io() && !force_direct_prefetch) { prefetch_buffer->reset(new FilePrefetchBuffer( nullptr, 0, 0, false /* enable */, true /* track_min_offset */)); s = file->Prefetch(prefetch_off, prefetch_len); } else { prefetch_buffer->reset(new FilePrefetchBuffer( nullptr, 0, 0, true /* enable */, true /* track_min_offset */)); s = (*prefetch_buffer)->Prefetch(file, prefetch_off, prefetch_len); } return s; } Status BlockBasedTable::TryReadPropertiesWithGlobalSeqno( FilePrefetchBuffer* prefetch_buffer, const Slice& handle_value, TableProperties** table_properties) { assert(table_properties != nullptr); // If this is an external SST file ingested with write_global_seqno set to // true, then we expect the checksum mismatch because checksum was written // by SstFileWriter, but its global seqno in the properties block may have // been changed during ingestion. In this case, we read the properties // block, copy it to a memory buffer, change the global seqno to its // original value, i.e. 0, and verify the checksum again. BlockHandle props_block_handle; CacheAllocationPtr tmp_buf; Status s = ReadProperties(handle_value, rep_->file.get(), prefetch_buffer, rep_->footer, rep_->ioptions, table_properties, false /* verify_checksum */, &props_block_handle, &tmp_buf, false /* compression_type_missing */, nullptr /* memory_allocator */); if (s.ok() && tmp_buf) { const auto seqno_pos_iter = (*table_properties) ->properties_offsets.find( ExternalSstFilePropertyNames::kGlobalSeqno); size_t block_size = static_cast(props_block_handle.size()); if (seqno_pos_iter != (*table_properties)->properties_offsets.end()) { uint64_t global_seqno_offset = seqno_pos_iter->second; EncodeFixed64( tmp_buf.get() + global_seqno_offset - props_block_handle.offset(), 0); } uint32_t value = DecodeFixed32(tmp_buf.get() + block_size + 1); s = ROCKSDB_NAMESPACE::VerifyChecksum(rep_->footer.checksum(), tmp_buf.get(), block_size + 1, value); } return s; } Status BlockBasedTable::ReadPropertiesBlock( FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const SequenceNumber largest_seqno) { bool found_properties_block = true; Status s; s = SeekToPropertiesBlock(meta_iter, &found_properties_block); if (!s.ok()) { ROCKS_LOG_WARN(rep_->ioptions.info_log, "Error when seeking to properties block from file: %s", s.ToString().c_str()); } else if (found_properties_block) { s = meta_iter->status(); TableProperties* table_properties = nullptr; if (s.ok()) { s = ReadProperties( meta_iter->value(), rep_->file.get(), prefetch_buffer, rep_->footer, rep_->ioptions, &table_properties, true /* verify_checksum */, nullptr /* ret_block_handle */, nullptr /* ret_block_contents */, false /* compression_type_missing */, nullptr /* memory_allocator */); } IGNORE_STATUS_IF_ERROR(s); if (s.IsCorruption()) { s = TryReadPropertiesWithGlobalSeqno(prefetch_buffer, meta_iter->value(), &table_properties); IGNORE_STATUS_IF_ERROR(s); } std::unique_ptr props_guard; if (table_properties != nullptr) { props_guard.reset(table_properties); } if (!s.ok()) { ROCKS_LOG_WARN(rep_->ioptions.info_log, "Encountered error while reading data from properties " "block %s", s.ToString().c_str()); } else { assert(table_properties != nullptr); rep_->table_properties.reset(props_guard.release()); rep_->blocks_maybe_compressed = rep_->table_properties->compression_name != CompressionTypeToString(kNoCompression); rep_->blocks_definitely_zstd_compressed = (rep_->table_properties->compression_name == CompressionTypeToString(kZSTD) || rep_->table_properties->compression_name == CompressionTypeToString(kZSTDNotFinalCompression)); } } else { ROCKS_LOG_ERROR(rep_->ioptions.info_log, "Cannot find Properties block from file."); } #ifndef ROCKSDB_LITE if (rep_->table_properties) { ParseSliceTransform(rep_->table_properties->prefix_extractor_name, &(rep_->table_prefix_extractor)); } #endif // ROCKSDB_LITE // Read the table properties, if provided. if (rep_->table_properties) { rep_->whole_key_filtering &= IsFeatureSupported(*(rep_->table_properties), BlockBasedTablePropertyNames::kWholeKeyFiltering, rep_->ioptions.info_log); rep_->prefix_filtering &= IsFeatureSupported(*(rep_->table_properties), BlockBasedTablePropertyNames::kPrefixFiltering, rep_->ioptions.info_log); rep_->index_key_includes_seq = rep_->table_properties->index_key_is_user_key == 0; rep_->index_value_is_full = rep_->table_properties->index_value_is_delta_encoded == 0; // Update index_type with the true type. // If table properties don't contain index type, we assume that the table // is in very old format and has kBinarySearch index type. auto& props = rep_->table_properties->user_collected_properties; auto pos = props.find(BlockBasedTablePropertyNames::kIndexType); if (pos != props.end()) { rep_->index_type = static_cast( DecodeFixed32(pos->second.c_str())); } rep_->index_has_first_key = rep_->index_type == BlockBasedTableOptions::kBinarySearchWithFirstKey; s = GetGlobalSequenceNumber(*(rep_->table_properties), largest_seqno, &(rep_->global_seqno)); if (!s.ok()) { ROCKS_LOG_ERROR(rep_->ioptions.info_log, "%s", s.ToString().c_str()); } } return s; } Status BlockBasedTable::ReadRangeDelBlock( FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const InternalKeyComparator& internal_comparator, BlockCacheLookupContext* lookup_context) { Status s; bool found_range_del_block; BlockHandle range_del_handle; s = SeekToRangeDelBlock(meta_iter, &found_range_del_block, &range_del_handle); if (!s.ok()) { ROCKS_LOG_WARN( rep_->ioptions.info_log, "Error when seeking to range delete tombstones block from file: %s", s.ToString().c_str()); } else if (found_range_del_block && !range_del_handle.IsNull()) { ReadOptions read_options; std::unique_ptr iter(NewDataBlockIterator( read_options, range_del_handle, /*input_iter=*/nullptr, BlockType::kRangeDeletion, /*get_context=*/nullptr, lookup_context, Status(), prefetch_buffer)); assert(iter != nullptr); s = iter->status(); if (!s.ok()) { ROCKS_LOG_WARN( rep_->ioptions.info_log, "Encountered error while reading data from range del block %s", s.ToString().c_str()); IGNORE_STATUS_IF_ERROR(s); } else { rep_->fragmented_range_dels = std::make_shared(std::move(iter), internal_comparator); } } return s; } Status BlockBasedTable::PrefetchIndexAndFilterBlocks( FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, BlockBasedTable* new_table, bool prefetch_all, const BlockBasedTableOptions& table_options, const int level, size_t file_size, size_t max_file_size_for_l0_meta_pin, BlockCacheLookupContext* lookup_context) { Status s; // Find filter handle and filter type if (rep_->filter_policy) { for (auto filter_type : {Rep::FilterType::kFullFilter, Rep::FilterType::kPartitionedFilter, Rep::FilterType::kBlockFilter}) { std::string prefix; switch (filter_type) { case Rep::FilterType::kFullFilter: prefix = kFullFilterBlockPrefix; break; case Rep::FilterType::kPartitionedFilter: prefix = kPartitionedFilterBlockPrefix; break; case Rep::FilterType::kBlockFilter: prefix = kFilterBlockPrefix; break; default: assert(0); } std::string filter_block_key = prefix; filter_block_key.append(rep_->filter_policy->Name()); if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle) .ok()) { rep_->filter_type = filter_type; break; } } } // Find compression dictionary handle bool found_compression_dict = false; s = SeekToCompressionDictBlock(meta_iter, &found_compression_dict, &rep_->compression_dict_handle); if (!s.ok()) { return s; } BlockBasedTableOptions::IndexType index_type = rep_->index_type; const bool use_cache = table_options.cache_index_and_filter_blocks; // pin both index and filters, down to all partitions. const bool pin_all = rep_->table_options.pin_l0_filter_and_index_blocks_in_cache && level == 0 && file_size <= max_file_size_for_l0_meta_pin; // prefetch the first level of index const bool prefetch_index = prefetch_all || (table_options.pin_top_level_index_and_filter && index_type == BlockBasedTableOptions::kTwoLevelIndexSearch); // pin the first level of index const bool pin_index = pin_all || (table_options.pin_top_level_index_and_filter && index_type == BlockBasedTableOptions::kTwoLevelIndexSearch); std::unique_ptr index_reader; s = new_table->CreateIndexReader(prefetch_buffer, meta_iter, use_cache, prefetch_index, pin_index, lookup_context, &index_reader); if (!s.ok()) { return s; } rep_->index_reader = std::move(index_reader); // The partitions of partitioned index are always stored in cache. They // are hence follow the configuration for pin and prefetch regardless of // the value of cache_index_and_filter_blocks if (prefetch_all) { rep_->index_reader->CacheDependencies(pin_all); } // prefetch the first level of filter const bool prefetch_filter = prefetch_all || (table_options.pin_top_level_index_and_filter && rep_->filter_type == Rep::FilterType::kPartitionedFilter); // Partition fitlers cannot be enabled without partition indexes assert(!prefetch_filter || prefetch_index); // pin the first level of filter const bool pin_filter = pin_all || (table_options.pin_top_level_index_and_filter && rep_->filter_type == Rep::FilterType::kPartitionedFilter); if (rep_->filter_policy) { auto filter = new_table->CreateFilterBlockReader( prefetch_buffer, use_cache, prefetch_filter, pin_filter, lookup_context); if (filter) { // Refer to the comment above about paritioned indexes always being cached if (prefetch_all) { filter->CacheDependencies(pin_all); } rep_->filter = std::move(filter); } } if (!rep_->compression_dict_handle.IsNull()) { std::unique_ptr uncompression_dict_reader; s = UncompressionDictReader::Create(this, prefetch_buffer, use_cache, prefetch_all, pin_all, lookup_context, &uncompression_dict_reader); if (!s.ok()) { return s; } rep_->uncompression_dict_reader = std::move(uncompression_dict_reader); } assert(s.ok()); return s; } void BlockBasedTable::SetupForCompaction() { switch (rep_->ioptions.access_hint_on_compaction_start) { case Options::NONE: break; case Options::NORMAL: rep_->file->file()->Hint(FSRandomAccessFile::kNormal); break; case Options::SEQUENTIAL: rep_->file->file()->Hint(FSRandomAccessFile::kSequential); break; case Options::WILLNEED: rep_->file->file()->Hint(FSRandomAccessFile::kWillNeed); break; default: assert(false); } } std::shared_ptr BlockBasedTable::GetTableProperties() const { return rep_->table_properties; } size_t BlockBasedTable::ApproximateMemoryUsage() const { size_t usage = 0; if (rep_->filter) { usage += rep_->filter->ApproximateMemoryUsage(); } if (rep_->index_reader) { usage += rep_->index_reader->ApproximateMemoryUsage(); } if (rep_->uncompression_dict_reader) { usage += rep_->uncompression_dict_reader->ApproximateMemoryUsage(); } return usage; } // Load the meta-index-block from the file. On success, return the loaded // metaindex // block and its iterator. Status BlockBasedTable::ReadMetaIndexBlock( FilePrefetchBuffer* prefetch_buffer, std::unique_ptr* metaindex_block, std::unique_ptr* iter) { // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates // it is an empty block. std::unique_ptr metaindex; Status s = ReadBlockFromFile( rep_->file.get(), prefetch_buffer, rep_->footer, ReadOptions(), rep_->footer.metaindex_handle(), &metaindex, rep_->ioptions, true /* decompress */, true /*maybe_compressed*/, BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options, 0 /* read_amp_bytes_per_bit */, GetMemoryAllocator(rep_->table_options), false /* for_compaction */, rep_->blocks_definitely_zstd_compressed, nullptr /* filter_policy */); if (!s.ok()) { ROCKS_LOG_ERROR(rep_->ioptions.info_log, "Encountered error while reading data from properties" " block %s", s.ToString().c_str()); return s; } *metaindex_block = std::move(metaindex); // meta block uses bytewise comparator. iter->reset(metaindex_block->get()->NewDataIterator( BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber)); return Status::OK(); } template Status BlockBasedTable::GetDataBlockFromCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, const ReadOptions& read_options, CachableEntry* block, const UncompressionDict& uncompression_dict, BlockType block_type, GetContext* get_context) const { const size_t read_amp_bytes_per_bit = block_type == BlockType::kData ? rep_->table_options.read_amp_bytes_per_bit : 0; assert(block); assert(block->IsEmpty()); Status s; BlockContents* compressed_block = nullptr; Cache::Handle* block_cache_compressed_handle = nullptr; // Lookup uncompressed cache first if (block_cache != nullptr) { auto cache_handle = GetEntryFromCache(block_cache, block_cache_key, block_type, get_context); if (cache_handle != nullptr) { block->SetCachedValue( reinterpret_cast(block_cache->Value(cache_handle)), block_cache, cache_handle); return s; } } // If not found, search from the compressed block cache. assert(block->IsEmpty()); if (block_cache_compressed == nullptr) { return s; } assert(!compressed_block_cache_key.empty()); block_cache_compressed_handle = block_cache_compressed->Lookup(compressed_block_cache_key); Statistics* statistics = rep_->ioptions.statistics; // if we found in the compressed cache, then uncompress and insert into // uncompressed cache if (block_cache_compressed_handle == nullptr) { RecordTick(statistics, BLOCK_CACHE_COMPRESSED_MISS); return s; } // found compressed block RecordTick(statistics, BLOCK_CACHE_COMPRESSED_HIT); compressed_block = reinterpret_cast( block_cache_compressed->Value(block_cache_compressed_handle)); CompressionType compression_type = compressed_block->get_compression_type(); assert(compression_type != kNoCompression); // Retrieve the uncompressed contents into a new buffer BlockContents contents; UncompressionContext context(compression_type); UncompressionInfo info(context, uncompression_dict, compression_type); s = UncompressBlockContents( info, compressed_block->data.data(), compressed_block->data.size(), &contents, rep_->table_options.format_version, rep_->ioptions, GetMemoryAllocator(rep_->table_options)); // Insert uncompressed block into block cache if (s.ok()) { std::unique_ptr block_holder( BlocklikeTraits::Create( std::move(contents), read_amp_bytes_per_bit, statistics, rep_->blocks_definitely_zstd_compressed, rep_->table_options.filter_policy.get())); // uncompressed block if (block_cache != nullptr && block_holder->own_bytes() && read_options.fill_cache) { size_t charge = block_holder->ApproximateMemoryUsage(); Cache::Handle* cache_handle = nullptr; s = block_cache->Insert(block_cache_key, block_holder.get(), charge, &DeleteCachedEntry, &cache_handle); if (s.ok()) { assert(cache_handle != nullptr); block->SetCachedValue(block_holder.release(), block_cache, cache_handle); UpdateCacheInsertionMetrics(block_type, get_context, charge, s.IsOkOverwritten()); } else { RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); } } else { block->SetOwnedValue(block_holder.release()); } } // Release hold on compressed cache entry block_cache_compressed->Release(block_cache_compressed_handle); return s; } template Status BlockBasedTable::PutDataBlockToCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, CachableEntry* cached_block, BlockContents* raw_block_contents, CompressionType raw_block_comp_type, const UncompressionDict& uncompression_dict, MemoryAllocator* memory_allocator, BlockType block_type, GetContext* get_context) const { const ImmutableCFOptions& ioptions = rep_->ioptions; const uint32_t format_version = rep_->table_options.format_version; const size_t read_amp_bytes_per_bit = block_type == BlockType::kData ? rep_->table_options.read_amp_bytes_per_bit : 0; const Cache::Priority priority = rep_->table_options.cache_index_and_filter_blocks_with_high_priority && (block_type == BlockType::kFilter || block_type == BlockType::kCompressionDictionary || block_type == BlockType::kIndex) ? Cache::Priority::HIGH : Cache::Priority::LOW; assert(cached_block); assert(cached_block->IsEmpty()); Status s; Statistics* statistics = ioptions.statistics; std::unique_ptr block_holder; if (raw_block_comp_type != kNoCompression) { // Retrieve the uncompressed contents into a new buffer BlockContents uncompressed_block_contents; UncompressionContext context(raw_block_comp_type); UncompressionInfo info(context, uncompression_dict, raw_block_comp_type); s = UncompressBlockContents(info, raw_block_contents->data.data(), raw_block_contents->data.size(), &uncompressed_block_contents, format_version, ioptions, memory_allocator); if (!s.ok()) { return s; } block_holder.reset(BlocklikeTraits::Create( std::move(uncompressed_block_contents), read_amp_bytes_per_bit, statistics, rep_->blocks_definitely_zstd_compressed, rep_->table_options.filter_policy.get())); } else { block_holder.reset(BlocklikeTraits::Create( std::move(*raw_block_contents), read_amp_bytes_per_bit, statistics, rep_->blocks_definitely_zstd_compressed, rep_->table_options.filter_policy.get())); } // Insert compressed block into compressed block cache. // Release the hold on the compressed cache entry immediately. if (block_cache_compressed != nullptr && raw_block_comp_type != kNoCompression && raw_block_contents != nullptr && raw_block_contents->own_bytes()) { #ifndef NDEBUG assert(raw_block_contents->is_raw_block); #endif // NDEBUG // We cannot directly put raw_block_contents because this could point to // an object in the stack. BlockContents* block_cont_for_comp_cache = new BlockContents(std::move(*raw_block_contents)); s = block_cache_compressed->Insert( compressed_block_cache_key, block_cont_for_comp_cache, block_cont_for_comp_cache->ApproximateMemoryUsage(), &DeleteCachedEntry); if (s.ok()) { // Avoid the following code to delete this cached block. RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD); } else { RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); delete block_cont_for_comp_cache; } } // insert into uncompressed block cache if (block_cache != nullptr && block_holder->own_bytes()) { size_t charge = block_holder->ApproximateMemoryUsage(); Cache::Handle* cache_handle = nullptr; s = block_cache->Insert(block_cache_key, block_holder.get(), charge, &DeleteCachedEntry, &cache_handle, priority); if (s.ok()) { assert(cache_handle != nullptr); cached_block->SetCachedValue(block_holder.release(), block_cache, cache_handle); UpdateCacheInsertionMetrics(block_type, get_context, charge, s.IsOkOverwritten()); } else { RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); } } else { cached_block->SetOwnedValue(block_holder.release()); } return s; } std::unique_ptr BlockBasedTable::CreateFilterBlockReader( FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context) { auto& rep = rep_; auto filter_type = rep->filter_type; if (filter_type == Rep::FilterType::kNoFilter) { return std::unique_ptr(); } assert(rep->filter_policy); switch (filter_type) { case Rep::FilterType::kPartitionedFilter: return PartitionedFilterBlockReader::Create( this, prefetch_buffer, use_cache, prefetch, pin, lookup_context); case Rep::FilterType::kBlockFilter: return BlockBasedFilterBlockReader::Create( this, prefetch_buffer, use_cache, prefetch, pin, lookup_context); case Rep::FilterType::kFullFilter: return FullFilterBlockReader::Create(this, prefetch_buffer, use_cache, prefetch, pin, lookup_context); default: // filter_type is either kNoFilter (exited the function at the first if), // or it must be covered in this switch block assert(false); return std::unique_ptr(); } } // disable_prefix_seek should be set to true when prefix_extractor found in SST // differs from the one in mutable_cf_options and index type is HashBasedIndex InternalIteratorBase* BlockBasedTable::NewIndexIterator( const ReadOptions& read_options, bool disable_prefix_seek, IndexBlockIter* input_iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) const { assert(rep_ != nullptr); assert(rep_->index_reader != nullptr); // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. return rep_->index_reader->NewIterator(read_options, disable_prefix_seek, input_iter, get_context, lookup_context); } template <> DataBlockIter* BlockBasedTable::InitBlockIterator( const Rep* rep, Block* block, BlockType block_type, DataBlockIter* input_iter, bool block_contents_pinned) { return block->NewDataIterator( &rep->internal_comparator, rep->internal_comparator.user_comparator(), rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics, block_contents_pinned); } template <> IndexBlockIter* BlockBasedTable::InitBlockIterator( const Rep* rep, Block* block, BlockType block_type, IndexBlockIter* input_iter, bool block_contents_pinned) { return block->NewIndexIterator( &rep->internal_comparator, rep->internal_comparator.user_comparator(), rep->get_global_seqno(block_type), input_iter, rep->ioptions.statistics, /* total_order_seek */ true, rep->index_has_first_key, rep->index_key_includes_seq, rep->index_value_is_full, block_contents_pinned); } // If contents is nullptr, this function looks up the block caches for the // data block referenced by handle, and read the block from disk if necessary. // If contents is non-null, it skips the cache lookup and disk read, since // the caller has already read it. In both cases, if ro.fill_cache is true, // it inserts the block into the block cache. template Status BlockBasedTable::MaybeReadBlockAndLoadToCache( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, BlockContents* contents) const { assert(block_entry != nullptr); const bool no_io = (ro.read_tier == kBlockCacheTier); Cache* block_cache = rep_->table_options.block_cache.get(); Cache* block_cache_compressed = rep_->table_options.block_cache_compressed.get(); // First, try to get the block from the cache // // If either block cache is enabled, we'll try to read from it. Status s; char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; Slice key /* key to the block cache */; Slice ckey /* key to the compressed block cache */; bool is_cache_hit = false; if (block_cache != nullptr || block_cache_compressed != nullptr) { // create key for block cache if (block_cache != nullptr) { key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, handle, cache_key); } if (block_cache_compressed != nullptr) { ckey = GetCacheKey(rep_->compressed_cache_key_prefix, rep_->compressed_cache_key_prefix_size, handle, compressed_cache_key); } if (!contents) { s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed, ro, block_entry, uncompression_dict, block_type, get_context); if (block_entry->GetValue()) { // TODO(haoyu): Differentiate cache hit on uncompressed block cache and // compressed block cache. is_cache_hit = true; } } // Can't find the block from the cache. If I/O is allowed, read from the // file. if (block_entry->GetValue() == nullptr && !no_io && ro.fill_cache) { Statistics* statistics = rep_->ioptions.statistics; const bool maybe_compressed = block_type != BlockType::kFilter && block_type != BlockType::kCompressionDictionary && rep_->blocks_maybe_compressed; const bool do_uncompress = maybe_compressed && !block_cache_compressed; CompressionType raw_block_comp_type; BlockContents raw_block_contents; if (!contents) { StopWatch sw(rep_->ioptions.env, statistics, READ_BLOCK_GET_MICROS); BlockFetcher block_fetcher( rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &raw_block_contents, rep_->ioptions, do_uncompress, maybe_compressed, block_type, uncompression_dict, rep_->persistent_cache_options, GetMemoryAllocator(rep_->table_options), GetMemoryAllocatorForCompressedBlock(rep_->table_options)); s = block_fetcher.ReadBlockContents(); raw_block_comp_type = block_fetcher.get_compression_type(); contents = &raw_block_contents; } else { raw_block_comp_type = contents->get_compression_type(); } if (s.ok()) { // If filling cache is allowed and a cache is configured, try to put the // block to the cache. s = PutDataBlockToCache( key, ckey, block_cache, block_cache_compressed, block_entry, contents, raw_block_comp_type, uncompression_dict, GetMemoryAllocator(rep_->table_options), block_type, get_context); } } } // Fill lookup_context. if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled() && lookup_context) { size_t usage = 0; uint64_t nkeys = 0; if (block_entry->GetValue()) { // Approximate the number of keys in the block using restarts. nkeys = rep_->table_options.block_restart_interval * BlocklikeTraits::GetNumRestarts(*block_entry->GetValue()); usage = block_entry->GetValue()->ApproximateMemoryUsage(); } TraceType trace_block_type = TraceType::kTraceMax; switch (block_type) { case BlockType::kData: trace_block_type = TraceType::kBlockTraceDataBlock; break; case BlockType::kFilter: trace_block_type = TraceType::kBlockTraceFilterBlock; break; case BlockType::kCompressionDictionary: trace_block_type = TraceType::kBlockTraceUncompressionDictBlock; break; case BlockType::kRangeDeletion: trace_block_type = TraceType::kBlockTraceRangeDeletionBlock; break; case BlockType::kIndex: trace_block_type = TraceType::kBlockTraceIndexBlock; break; default: // This cannot happen. assert(false); break; } bool no_insert = no_io || !ro.fill_cache; if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock( trace_block_type, lookup_context->caller)) { // Defer logging the access to Get() and MultiGet() to trace additional // information, e.g., referenced_key_exist_in_block. // Make a copy of the block key here since it will be logged later. lookup_context->FillLookupContext( is_cache_hit, no_insert, trace_block_type, /*block_size=*/usage, /*block_key=*/key.ToString(), nkeys); } else { // Avoid making copy of block_key and cf_name when constructing the access // record. BlockCacheTraceRecord access_record( rep_->ioptions.env->NowMicros(), /*block_key=*/"", trace_block_type, /*block_size=*/usage, rep_->cf_id_for_tracing(), /*cf_name=*/"", rep_->level_for_tracing(), rep_->sst_number_for_tracing(), lookup_context->caller, is_cache_hit, no_insert, lookup_context->get_id, lookup_context->get_from_user_specified_snapshot, /*referenced_key=*/""); block_cache_tracer_->WriteBlockAccess(access_record, key, rep_->cf_name_for_tracing(), lookup_context->referenced_key); } } assert(s.ok() || block_entry->GetValue() == nullptr); return s; } // This function reads multiple data blocks from disk using Env::MultiRead() // and optionally inserts them into the block cache. It uses the scratch // buffer provided by the caller, which is contiguous. If scratch is a nullptr // it allocates a separate buffer for each block. Typically, if the blocks // need to be uncompressed and there is no compressed block cache, callers // can allocate a temporary scratch buffer in order to minimize memory // allocations. // If options.fill_cache is true, it inserts the blocks into cache. If its // false and scratch is non-null and the blocks are uncompressed, it copies // the buffers to heap. In any case, the CachableEntry returned will // own the data bytes. // If compression is enabled and also there is no compressed block cache, // the adjacent blocks are read out in one IO (combined read) // batch - A MultiGetRange with only those keys with unique data blocks not // found in cache // handles - A vector of block handles. Some of them me be NULL handles // scratch - An optional contiguous buffer to read compressed blocks into void BlockBasedTable::RetrieveMultipleBlocks( const ReadOptions& options, const MultiGetRange* batch, const autovector* handles, autovector* statuses, autovector, MultiGetContext::MAX_BATCH_SIZE>* results, char* scratch, const UncompressionDict& uncompression_dict) const { RandomAccessFileReader* file = rep_->file.get(); const Footer& footer = rep_->footer; const ImmutableCFOptions& ioptions = rep_->ioptions; size_t read_amp_bytes_per_bit = rep_->table_options.read_amp_bytes_per_bit; MemoryAllocator* memory_allocator = GetMemoryAllocator(rep_->table_options); if (ioptions.allow_mmap_reads) { size_t idx_in_batch = 0; for (auto mget_iter = batch->begin(); mget_iter != batch->end(); ++mget_iter, ++idx_in_batch) { BlockCacheLookupContext lookup_data_block_context( TableReaderCaller::kUserMultiGet); const BlockHandle& handle = (*handles)[idx_in_batch]; if (handle.IsNull()) { continue; } (*statuses)[idx_in_batch] = RetrieveBlock(nullptr, options, handle, uncompression_dict, &(*results)[idx_in_batch], BlockType::kData, mget_iter->get_context, &lookup_data_block_context, /* for_compaction */ false, /* use_cache */ true); } return; } // In direct IO mode, blocks share the direct io buffer. // Otherwise, blocks share the scratch buffer. const bool use_shared_buffer = file->use_direct_io() || scratch != nullptr; autovector read_reqs; size_t buf_offset = 0; size_t idx_in_batch = 0; uint64_t prev_offset = 0; size_t prev_len = 0; autovector req_idx_for_block; autovector req_offset_for_block; for (auto mget_iter = batch->begin(); mget_iter != batch->end(); ++mget_iter, ++idx_in_batch) { const BlockHandle& handle = (*handles)[idx_in_batch]; if (handle.IsNull()) { continue; } size_t prev_end = static_cast(prev_offset) + prev_len; // If current block is adjacent to the previous one, at the same time, // compression is enabled and there is no compressed cache, we combine // the two block read as one. // We don't combine block reads here in direct IO mode, because when doing // direct IO read, the block requests will be realigned and merged when // necessary. if (use_shared_buffer && !file->use_direct_io() && prev_end == handle.offset()) { req_offset_for_block.emplace_back(prev_len); prev_len += block_size(handle); } else { // No compression or current block and previous one is not adjacent: // Step 1, create a new request for previous blocks if (prev_len != 0) { FSReadRequest req; req.offset = prev_offset; req.len = prev_len; if (file->use_direct_io()) { req.scratch = nullptr; } else if (use_shared_buffer) { req.scratch = scratch + buf_offset; buf_offset += req.len; } else { req.scratch = new char[req.len]; } read_reqs.emplace_back(req); } // Step 2, remeber the previous block info prev_offset = handle.offset(); prev_len = block_size(handle); req_offset_for_block.emplace_back(0); } req_idx_for_block.emplace_back(read_reqs.size()); } // Handle the last block and process the pending last request if (prev_len != 0) { FSReadRequest req; req.offset = prev_offset; req.len = prev_len; if (file->use_direct_io()) { req.scratch = nullptr; } else if (use_shared_buffer) { req.scratch = scratch + buf_offset; } else { req.scratch = new char[req.len]; } read_reqs.emplace_back(req); } AlignedBuf direct_io_buf; { IOOptions opts; IOStatus s = PrepareIOFromReadOptions(options, file->env(), opts); if (s.IsTimedOut()) { for (FSReadRequest& req : read_reqs) { req.status = s; } } else { file->MultiRead(opts, &read_reqs[0], read_reqs.size(), &direct_io_buf); } } idx_in_batch = 0; size_t valid_batch_idx = 0; for (auto mget_iter = batch->begin(); mget_iter != batch->end(); ++mget_iter, ++idx_in_batch) { const BlockHandle& handle = (*handles)[idx_in_batch]; if (handle.IsNull()) { continue; } assert(valid_batch_idx < req_idx_for_block.size()); assert(valid_batch_idx < req_offset_for_block.size()); assert(req_idx_for_block[valid_batch_idx] < read_reqs.size()); size_t& req_idx = req_idx_for_block[valid_batch_idx]; size_t& req_offset = req_offset_for_block[valid_batch_idx]; valid_batch_idx++; FSReadRequest& req = read_reqs[req_idx]; Status s = req.status; if (s.ok()) { if ((req.result.size() != req.len) || (req_offset + block_size(handle) > req.result.size())) { s = Status::Corruption( "truncated block read from " + rep_->file->file_name() + " offset " + ToString(handle.offset()) + ", expected " + ToString(req.len) + " bytes, got " + ToString(req.result.size())); } } BlockContents raw_block_contents; if (s.ok()) { if (!use_shared_buffer) { // We allocated a buffer for this block. Give ownership of it to // BlockContents so it can free the memory assert(req.result.data() == req.scratch); assert(req.result.size() == block_size(handle)); assert(req_offset == 0); std::unique_ptr raw_block(req.scratch); raw_block_contents = BlockContents(std::move(raw_block), handle.size()); } else { // We used the scratch buffer or direct io buffer // which are shared by the blocks. // raw_block_contents does not have the ownership. raw_block_contents = BlockContents(Slice(req.result.data() + req_offset, handle.size())); } #ifndef NDEBUG raw_block_contents.is_raw_block = true; #endif if (options.verify_checksums) { PERF_TIMER_GUARD(block_checksum_time); const char* data = req.result.data(); uint32_t expected = DecodeFixed32(data + req_offset + handle.size() + 1); // Since the scratch might be shared. the offset of the data block in // the buffer might not be 0. req.result.data() only point to the // begin address of each read request, we need to add the offset // in each read request. Checksum is stored in the block trailer, // which is handle.size() + 1. s = ROCKSDB_NAMESPACE::VerifyChecksum( footer.checksum(), data + req_offset, handle.size() + 1, expected); TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s); } } else if (!use_shared_buffer) { // Free the allocated scratch buffer. delete[] req.scratch; } if (s.ok()) { // When the blocks share the same underlying buffer (scratch or direct io // buffer), if the block is compressed, the shared buffer will be // uncompressed into heap during uncompressing; otherwise, we need to // manually copy the block into heap before inserting the block to block // cache. CompressionType compression_type = raw_block_contents.get_compression_type(); if (use_shared_buffer && compression_type == kNoCompression) { Slice raw = Slice(req.result.data() + req_offset, block_size(handle)); raw_block_contents = BlockContents( CopyBufferToHeap(GetMemoryAllocator(rep_->table_options), raw), handle.size()); #ifndef NDEBUG raw_block_contents.is_raw_block = true; #endif } } if (s.ok()) { if (options.fill_cache) { BlockCacheLookupContext lookup_data_block_context( TableReaderCaller::kUserMultiGet); CachableEntry* block_entry = &(*results)[idx_in_batch]; // MaybeReadBlockAndLoadToCache will insert into the block caches if // necessary. Since we're passing the raw block contents, it will // avoid looking up the block cache s = MaybeReadBlockAndLoadToCache( nullptr, options, handle, uncompression_dict, block_entry, BlockType::kData, mget_iter->get_context, &lookup_data_block_context, &raw_block_contents); // block_entry value could be null if no block cache is present, i.e // BlockBasedTableOptions::no_block_cache is true and no compressed // block cache is configured. In that case, fall // through and set up the block explicitly if (block_entry->GetValue() != nullptr) { continue; } } CompressionType compression_type = raw_block_contents.get_compression_type(); BlockContents contents; if (compression_type != kNoCompression) { UncompressionContext context(compression_type); UncompressionInfo info(context, uncompression_dict, compression_type); s = UncompressBlockContents(info, req.result.data() + req_offset, handle.size(), &contents, footer.version(), rep_->ioptions, memory_allocator); } else { // There are two cases here: // 1) caller uses the shared buffer (scratch or direct io buffer); // 2) we use the requst buffer. // If scratch buffer or direct io buffer is used, we ensure that // all raw blocks are copyed to the heap as single blocks. If scratch // buffer is not used, we also have no combined read, so the raw // block can be used directly. contents = std::move(raw_block_contents); } if (s.ok()) { (*results)[idx_in_batch].SetOwnedValue(new Block( std::move(contents), read_amp_bytes_per_bit, ioptions.statistics)); } } (*statuses)[idx_in_batch] = s; } } template Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, bool use_cache) const { assert(block_entry); assert(block_entry->IsEmpty()); Status s; if (use_cache) { s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle, uncompression_dict, block_entry, block_type, get_context, lookup_context, /*contents=*/nullptr); if (!s.ok()) { return s; } if (block_entry->GetValue() != nullptr) { assert(s.ok()); return s; } } assert(block_entry->IsEmpty()); const bool no_io = ro.read_tier == kBlockCacheTier; if (no_io) { return Status::Incomplete("no blocking io"); } const bool maybe_compressed = block_type != BlockType::kFilter && block_type != BlockType::kCompressionDictionary && rep_->blocks_maybe_compressed; const bool do_uncompress = maybe_compressed; std::unique_ptr block; { StopWatch sw(rep_->ioptions.env, rep_->ioptions.statistics, READ_BLOCK_GET_MICROS); s = ReadBlockFromFile( rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &block, rep_->ioptions, do_uncompress, maybe_compressed, block_type, uncompression_dict, rep_->persistent_cache_options, block_type == BlockType::kData ? rep_->table_options.read_amp_bytes_per_bit : 0, GetMemoryAllocator(rep_->table_options), for_compaction, rep_->blocks_definitely_zstd_compressed, rep_->table_options.filter_policy.get()); } if (!s.ok()) { return s; } block_entry->SetOwnedValue(block.release()); assert(s.ok()); return s; } // Explicitly instantiate templates for both "blocklike" types we use. // This makes it possible to keep the template definitions in the .cc file. template Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, bool use_cache) const; template Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, bool use_cache) const; template Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, bool use_cache) const; template Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, bool use_cache) const; BlockBasedTable::PartitionedIndexIteratorState::PartitionedIndexIteratorState( const BlockBasedTable* table, std::unordered_map>* block_map) : table_(table), block_map_(block_map) {} InternalIteratorBase* BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator( const BlockHandle& handle) { // Return a block iterator on the index partition auto block = block_map_->find(handle.offset()); // This is a possible scenario since block cache might not have had space // for the partition if (block != block_map_->end()) { const Rep* rep = table_->get_rep(); assert(rep); Statistics* kNullStats = nullptr; // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. return block->second.GetValue()->NewIndexIterator( &rep->internal_comparator, rep->internal_comparator.user_comparator(), rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true, rep->index_has_first_key, rep->index_key_includes_seq, rep->index_value_is_full); } // Create an empty iterator return new IndexBlockIter(); } // This will be broken if the user specifies an unusual implementation // of Options.comparator, or if the user specifies an unusual // definition of prefixes in BlockBasedTableOptions.filter_policy. // In particular, we require the following three properties: // // 1) key.starts_with(prefix(key)) // 2) Compare(prefix(key), key) <= 0. // 3) If Compare(key1, key2) <= 0, then Compare(prefix(key1), prefix(key2)) <= 0 // // If read_options.read_tier == kBlockCacheTier, this method will do no I/O and // will return true if the filter block is not in memory and not found in block // cache. // // REQUIRES: this method shouldn't be called while the DB lock is held. bool BlockBasedTable::PrefixMayMatch( const Slice& internal_key, const ReadOptions& read_options, const SliceTransform* options_prefix_extractor, const bool need_upper_bound_check, BlockCacheLookupContext* lookup_context) const { if (!rep_->filter_policy) { return true; } const SliceTransform* prefix_extractor; if (rep_->table_prefix_extractor == nullptr) { if (need_upper_bound_check) { return true; } prefix_extractor = options_prefix_extractor; } else { prefix_extractor = rep_->table_prefix_extractor.get(); } auto user_key = ExtractUserKey(internal_key); if (!prefix_extractor->InDomain(user_key)) { return true; } bool may_match = true; Status s; // First, try check with full filter FilterBlockReader* const filter = rep_->filter.get(); bool filter_checked = true; if (filter != nullptr) { const bool no_io = read_options.read_tier == kBlockCacheTier; if (!filter->IsBlockBased()) { const Slice* const const_ikey_ptr = &internal_key; may_match = filter->RangeMayExist( read_options.iterate_upper_bound, user_key, prefix_extractor, rep_->internal_comparator.user_comparator(), const_ikey_ptr, &filter_checked, need_upper_bound_check, no_io, lookup_context); } else { // if prefix_extractor changed for block based filter, skip filter if (need_upper_bound_check) { return true; } auto prefix = prefix_extractor->Transform(user_key); InternalKey internal_key_prefix(prefix, kMaxSequenceNumber, kTypeValue); auto internal_prefix = internal_key_prefix.Encode(); // To prevent any io operation in this method, we set `read_tier` to make // sure we always read index or filter only when they have already been // loaded to memory. ReadOptions no_io_read_options; no_io_read_options.read_tier = kBlockCacheTier; // Then, try find it within each block // we already know prefix_extractor and prefix_extractor_name must match // because `CheckPrefixMayMatch` first checks `check_filter_ == true` std::unique_ptr> iiter(NewIndexIterator( no_io_read_options, /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, /*get_context=*/nullptr, lookup_context)); iiter->Seek(internal_prefix); if (!iiter->Valid()) { // we're past end of file // if it's incomplete, it means that we avoided I/O // and we're not really sure that we're past the end // of the file may_match = iiter->status().IsIncomplete(); } else if ((rep_->index_key_includes_seq ? ExtractUserKey(iiter->key()) : iiter->key()) .starts_with(ExtractUserKey(internal_prefix))) { // we need to check for this subtle case because our only // guarantee is that "the key is a string >= last key in that data // block" according to the doc/table_format.txt spec. // // Suppose iiter->key() starts with the desired prefix; it is not // necessarily the case that the corresponding data block will // contain the prefix, since iiter->key() need not be in the // block. However, the next data block may contain the prefix, so // we return true to play it safe. may_match = true; } else if (filter->IsBlockBased()) { // iiter->key() does NOT start with the desired prefix. Because // Seek() finds the first key that is >= the seek target, this // means that iiter->key() > prefix. Thus, any data blocks coming // after the data block corresponding to iiter->key() cannot // possibly contain the key. Thus, the corresponding data block // is the only on could potentially contain the prefix. BlockHandle handle = iiter->value().handle; may_match = filter->PrefixMayMatch( prefix, prefix_extractor, handle.offset(), no_io, /*const_key_ptr=*/nullptr, /*get_context=*/nullptr, lookup_context); } } } if (filter_checked) { Statistics* statistics = rep_->ioptions.statistics; RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED); if (!may_match) { RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL); } } return may_match; } InternalIterator* BlockBasedTable::NewIterator( const ReadOptions& read_options, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, TableReaderCaller caller, size_t compaction_readahead_size, bool allow_unprepared_value) { BlockCacheLookupContext lookup_context{caller}; bool need_upper_bound_check = read_options.auto_prefix_mode || PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor); std::unique_ptr> index_iter(NewIndexIterator( read_options, need_upper_bound_check && rep_->index_type == BlockBasedTableOptions::kHashSearch, /*input_iter=*/nullptr, /*get_context=*/nullptr, &lookup_context)); if (arena == nullptr) { return new BlockBasedTableIterator( this, read_options, rep_->internal_comparator, std::move(index_iter), !skip_filters && !read_options.total_order_seek && prefix_extractor != nullptr, need_upper_bound_check, prefix_extractor, caller, compaction_readahead_size, allow_unprepared_value); } else { auto* mem = arena->AllocateAligned(sizeof(BlockBasedTableIterator)); return new (mem) BlockBasedTableIterator( this, read_options, rep_->internal_comparator, std::move(index_iter), !skip_filters && !read_options.total_order_seek && prefix_extractor != nullptr, need_upper_bound_check, prefix_extractor, caller, compaction_readahead_size, allow_unprepared_value); } } FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator( const ReadOptions& read_options) { if (rep_->fragmented_range_dels == nullptr) { return nullptr; } SequenceNumber snapshot = kMaxSequenceNumber; if (read_options.snapshot != nullptr) { snapshot = read_options.snapshot->GetSequenceNumber(); } return new FragmentedRangeTombstoneIterator( rep_->fragmented_range_dels, rep_->internal_comparator, snapshot); } bool BlockBasedTable::FullFilterKeyMayMatch( const ReadOptions& read_options, FilterBlockReader* filter, const Slice& internal_key, const bool no_io, const SliceTransform* prefix_extractor, GetContext* get_context, BlockCacheLookupContext* lookup_context) const { if (filter == nullptr || filter->IsBlockBased()) { return true; } Slice user_key = ExtractUserKey(internal_key); const Slice* const const_ikey_ptr = &internal_key; bool may_match = true; if (rep_->whole_key_filtering) { size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size(); Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz); may_match = filter->KeyMayMatch(user_key_without_ts, prefix_extractor, kNotValid, no_io, const_ikey_ptr, get_context, lookup_context); } else if (!read_options.total_order_seek && prefix_extractor && rep_->table_properties->prefix_extractor_name.compare( prefix_extractor->Name()) == 0 && prefix_extractor->InDomain(user_key) && !filter->PrefixMayMatch(prefix_extractor->Transform(user_key), prefix_extractor, kNotValid, no_io, const_ikey_ptr, get_context, lookup_context)) { may_match = false; } if (may_match) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level); } return may_match; } void BlockBasedTable::FullFilterKeysMayMatch( const ReadOptions& read_options, FilterBlockReader* filter, MultiGetRange* range, const bool no_io, const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const { if (filter == nullptr || filter->IsBlockBased()) { return; } uint64_t before_keys = range->KeysLeft(); assert(before_keys > 0); // Caller should ensure if (rep_->whole_key_filtering) { filter->KeysMayMatch(range, prefix_extractor, kNotValid, no_io, lookup_context); uint64_t after_keys = range->KeysLeft(); if (after_keys) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE, after_keys); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, after_keys, rep_->level); } uint64_t filtered_keys = before_keys - after_keys; if (filtered_keys) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL, filtered_keys); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, filtered_keys, rep_->level); } } else if (!read_options.total_order_seek && prefix_extractor && rep_->table_properties->prefix_extractor_name.compare( prefix_extractor->Name()) == 0) { filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false, lookup_context); RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_PREFIX_CHECKED, before_keys); uint64_t after_keys = range->KeysLeft(); uint64_t filtered_keys = before_keys - after_keys; if (filtered_keys) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_PREFIX_USEFUL, filtered_keys); } } } Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters) { assert(key.size() >= 8); // key must be internal key assert(get_context != nullptr); Status s; const bool no_io = read_options.read_tier == kBlockCacheTier; FilterBlockReader* const filter = !skip_filters ? rep_->filter.get() : nullptr; // First check the full filter // If full filter not useful, Then go into each block uint64_t tracing_get_id = get_context->get_tracing_get_id(); BlockCacheLookupContext lookup_context{ TableReaderCaller::kUserGet, tracing_get_id, /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr}; if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) { // Trace the key since it contains both user key and sequence number. lookup_context.referenced_key = key.ToString(); lookup_context.get_from_user_specified_snapshot = read_options.snapshot != nullptr; } const bool may_match = FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor, get_context, &lookup_context); if (!may_match) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); } else { IndexBlockIter iiter_on_stack; // if prefix_extractor found in block differs from options, disable // BlockPrefixIndex. Only do this check when index_type is kHashSearch. bool need_upper_bound_check = false; if (rep_->index_type == BlockBasedTableOptions::kHashSearch) { need_upper_bound_check = PrefixExtractorChanged( rep_->table_properties.get(), prefix_extractor); } auto iiter = NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack, get_context, &lookup_context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr.reset(iiter); } size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size(); bool matched = false; // if such user key matched a key in SST bool done = false; for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) { IndexValue v = iiter->value(); bool not_exist_in_filter = filter != nullptr && filter->IsBlockBased() == true && !filter->KeyMayMatch(ExtractUserKeyAndStripTimestamp(key, ts_sz), prefix_extractor, v.handle.offset(), no_io, /*const_ikey_ptr=*/nullptr, get_context, &lookup_context); if (not_exist_in_filter) { // Not found // TODO: think about interaction with Merge. If a user key cannot // cross one data block, we should be fine. RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); break; } if (!v.first_internal_key.empty() && !skip_filters && UserComparatorWrapper(rep_->internal_comparator.user_comparator()) .Compare(ExtractUserKey(key), ExtractUserKey(v.first_internal_key)) < 0) { // The requested key falls between highest key in previous block and // lowest key in current block. break; } BlockCacheLookupContext lookup_data_block_context{ TableReaderCaller::kUserGet, tracing_get_id, /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr}; bool does_referenced_key_exist = false; DataBlockIter biter; uint64_t referenced_data_size = 0; NewDataBlockIterator( read_options, v.handle, &biter, BlockType::kData, get_context, &lookup_data_block_context, /*s=*/Status(), /*prefetch_buffer*/ nullptr); if (no_io && biter.status().IsIncomplete()) { // couldn't get block from block_cache // Update Saver.state to Found because we are only looking for // whether we can guarantee the key is not there when "no_io" is set get_context->MarkKeyMayExist(); break; } if (!biter.status().ok()) { s = biter.status(); break; } bool may_exist = biter.SeekForGet(key); // If user-specified timestamp is supported, we cannot end the search // just because hash index lookup indicates the key+ts does not exist. if (!may_exist && ts_sz == 0) { // HashSeek cannot find the key this block and the the iter is not // the end of the block, i.e. cannot be in the following blocks // either. In this case, the seek_key cannot be found, so we break // from the top level for-loop. done = true; } else { // Call the *saver function on each entry/block until it returns false for (; biter.Valid(); biter.Next()) { ParsedInternalKey parsed_key; if (!ParseInternalKey(biter.key(), &parsed_key)) { s = Status::Corruption(Slice()); } if (!get_context->SaveValue( parsed_key, biter.value(), &matched, biter.IsValuePinned() ? &biter : nullptr)) { if (get_context->State() == GetContext::GetState::kFound) { does_referenced_key_exist = true; referenced_data_size = biter.key().size() + biter.value().size(); } done = true; break; } } s = biter.status(); } // Write the block cache access record. if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) { // Avoid making copy of block_key, cf_name, and referenced_key when // constructing the access record. Slice referenced_key; if (does_referenced_key_exist) { referenced_key = biter.key(); } else { referenced_key = key; } BlockCacheTraceRecord access_record( rep_->ioptions.env->NowMicros(), /*block_key=*/"", lookup_data_block_context.block_type, lookup_data_block_context.block_size, rep_->cf_id_for_tracing(), /*cf_name=*/"", rep_->level_for_tracing(), rep_->sst_number_for_tracing(), lookup_data_block_context.caller, lookup_data_block_context.is_cache_hit, lookup_data_block_context.no_insert, lookup_data_block_context.get_id, lookup_data_block_context.get_from_user_specified_snapshot, /*referenced_key=*/"", referenced_data_size, lookup_data_block_context.num_keys_in_block, does_referenced_key_exist); block_cache_tracer_->WriteBlockAccess( access_record, lookup_data_block_context.block_key, rep_->cf_name_for_tracing(), referenced_key); } if (done) { // Avoid the extra Next which is expensive in two-level indexes break; } } if (matched && filter != nullptr && !filter->IsBlockBased()) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, rep_->level); } if (s.ok() && !iiter->status().IsNotFound()) { s = iiter->status(); } } return s; } using MultiGetRange = MultiGetContext::Range; void BlockBasedTable::MultiGet(const ReadOptions& read_options, const MultiGetRange* mget_range, const SliceTransform* prefix_extractor, bool skip_filters) { if (mget_range->empty()) { // Caller should ensure non-empty (performance bug) assert(false); return; // Nothing to do } FilterBlockReader* const filter = !skip_filters ? rep_->filter.get() : nullptr; MultiGetRange sst_file_range(*mget_range, mget_range->begin(), mget_range->end()); // First check the full filter // If full filter not useful, Then go into each block const bool no_io = read_options.read_tier == kBlockCacheTier; uint64_t tracing_mget_id = BlockCacheTraceHelper::kReservedGetId; if (sst_file_range.begin()->get_context) { tracing_mget_id = sst_file_range.begin()->get_context->get_tracing_get_id(); } BlockCacheLookupContext lookup_context{ TableReaderCaller::kUserMultiGet, tracing_mget_id, /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr}; FullFilterKeysMayMatch(read_options, filter, &sst_file_range, no_io, prefix_extractor, &lookup_context); if (!sst_file_range.empty()) { IndexBlockIter iiter_on_stack; // if prefix_extractor found in block differs from options, disable // BlockPrefixIndex. Only do this check when index_type is kHashSearch. bool need_upper_bound_check = false; if (rep_->index_type == BlockBasedTableOptions::kHashSearch) { need_upper_bound_check = PrefixExtractorChanged( rep_->table_properties.get(), prefix_extractor); } auto iiter = NewIndexIterator(read_options, need_upper_bound_check, &iiter_on_stack, sst_file_range.begin()->get_context, &lookup_context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr.reset(iiter); } uint64_t offset = std::numeric_limits::max(); autovector block_handles; autovector, MultiGetContext::MAX_BATCH_SIZE> results; autovector statuses; char stack_buf[kMultiGetReadStackBufSize]; std::unique_ptr block_buf; { MultiGetRange data_block_range(sst_file_range, sst_file_range.begin(), sst_file_range.end()); CachableEntry uncompression_dict; Status uncompression_dict_status; bool uncompression_dict_inited = false; size_t total_len = 0; ReadOptions ro = read_options; ro.read_tier = kBlockCacheTier; for (auto miter = data_block_range.begin(); miter != data_block_range.end(); ++miter) { const Slice& key = miter->ikey; iiter->Seek(miter->ikey); IndexValue v; if (iiter->Valid()) { v = iiter->value(); } if (!iiter->Valid() || (!v.first_internal_key.empty() && !skip_filters && UserComparatorWrapper(rep_->internal_comparator.user_comparator()) .Compare(ExtractUserKey(key), ExtractUserKey(v.first_internal_key)) < 0)) { // The requested key falls between highest key in previous block and // lowest key in current block. if (!iiter->status().IsNotFound()) { *(miter->s) = iiter->status(); } data_block_range.SkipKey(miter); sst_file_range.SkipKey(miter); continue; } if (!uncompression_dict_inited && rep_->uncompression_dict_reader) { uncompression_dict_status = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( nullptr /* prefetch_buffer */, no_io, sst_file_range.begin()->get_context, &lookup_context, &uncompression_dict); uncompression_dict_inited = true; } if (!uncompression_dict_status.ok()) { assert(!uncompression_dict_status.IsNotFound()); *(miter->s) = uncompression_dict_status; data_block_range.SkipKey(miter); sst_file_range.SkipKey(miter); continue; } statuses.emplace_back(); results.emplace_back(); if (v.handle.offset() == offset) { // We're going to reuse the block for this key later on. No need to // look it up now. Place a null handle block_handles.emplace_back(BlockHandle::NullBlockHandle()); continue; } // Lookup the cache for the given data block referenced by an index // iterator value (i.e BlockHandle). If it exists in the cache, // initialize block to the contents of the data block. offset = v.handle.offset(); BlockHandle handle = v.handle; BlockCacheLookupContext lookup_data_block_context( TableReaderCaller::kUserMultiGet); const UncompressionDict& dict = uncompression_dict.GetValue() ? *uncompression_dict.GetValue() : UncompressionDict::GetEmptyDict(); Status s = RetrieveBlock( nullptr, ro, handle, dict, &(results.back()), BlockType::kData, miter->get_context, &lookup_data_block_context, /* for_compaction */ false, /* use_cache */ true); if (s.IsIncomplete()) { s = Status::OK(); } if (s.ok() && !results.back().IsEmpty()) { // Found it in the cache. Add NULL handle to indicate there is // nothing to read from disk block_handles.emplace_back(BlockHandle::NullBlockHandle()); } else { block_handles.emplace_back(handle); total_len += block_size(handle); } } if (total_len) { char* scratch = nullptr; const UncompressionDict& dict = uncompression_dict.GetValue() ? *uncompression_dict.GetValue() : UncompressionDict::GetEmptyDict(); assert(uncompression_dict_inited || !rep_->uncompression_dict_reader); assert(uncompression_dict_status.ok()); // If using direct IO, then scratch is not used, so keep it nullptr. // If the blocks need to be uncompressed and we don't need the // compressed blocks, then we can use a contiguous block of // memory to read in all the blocks as it will be temporary // storage // 1. If blocks are compressed and compressed block cache is there, // alloc heap bufs // 2. If blocks are uncompressed, alloc heap bufs // 3. If blocks are compressed and no compressed block cache, use // stack buf if (!rep_->file->use_direct_io() && rep_->table_options.block_cache_compressed == nullptr && rep_->blocks_maybe_compressed) { if (total_len <= kMultiGetReadStackBufSize) { scratch = stack_buf; } else { scratch = new char[total_len]; block_buf.reset(scratch); } } RetrieveMultipleBlocks(read_options, &data_block_range, &block_handles, &statuses, &results, scratch, dict); } } DataBlockIter first_biter; DataBlockIter next_biter; size_t idx_in_batch = 0; for (auto miter = sst_file_range.begin(); miter != sst_file_range.end(); ++miter) { Status s; GetContext* get_context = miter->get_context; const Slice& key = miter->ikey; bool matched = false; // if such user key matched a key in SST bool done = false; bool first_block = true; do { DataBlockIter* biter = nullptr; bool reusing_block = true; uint64_t referenced_data_size = 0; bool does_referenced_key_exist = false; BlockCacheLookupContext lookup_data_block_context( TableReaderCaller::kUserMultiGet, tracing_mget_id, /*get_from_user_specified_snapshot=*/read_options.snapshot != nullptr); if (first_block) { if (!block_handles[idx_in_batch].IsNull() || !results[idx_in_batch].IsEmpty()) { first_biter.Invalidate(Status::OK()); NewDataBlockIterator( read_options, results[idx_in_batch], &first_biter, statuses[idx_in_batch]); reusing_block = false; } biter = &first_biter; idx_in_batch++; } else { IndexValue v = iiter->value(); if (!v.first_internal_key.empty() && !skip_filters && UserComparatorWrapper(rep_->internal_comparator.user_comparator()) .Compare(ExtractUserKey(key), ExtractUserKey(v.first_internal_key)) < 0) { // The requested key falls between highest key in previous block and // lowest key in current block. break; } next_biter.Invalidate(Status::OK()); NewDataBlockIterator( read_options, iiter->value().handle, &next_biter, BlockType::kData, get_context, &lookup_data_block_context, Status(), nullptr); biter = &next_biter; reusing_block = false; } if (read_options.read_tier == kBlockCacheTier && biter->status().IsIncomplete()) { // couldn't get block from block_cache // Update Saver.state to Found because we are only looking for // whether we can guarantee the key is not there when "no_io" is set get_context->MarkKeyMayExist(); break; } if (!biter->status().ok()) { s = biter->status(); break; } bool may_exist = biter->SeekForGet(key); if (!may_exist) { // HashSeek cannot find the key this block and the the iter is not // the end of the block, i.e. cannot be in the following blocks // either. In this case, the seek_key cannot be found, so we break // from the top level for-loop. break; } // Call the *saver function on each entry/block until it returns false for (; biter->Valid(); biter->Next()) { ParsedInternalKey parsed_key; Cleanable dummy; Cleanable* value_pinner = nullptr; if (!ParseInternalKey(biter->key(), &parsed_key)) { s = Status::Corruption(Slice()); } if (biter->IsValuePinned()) { if (reusing_block) { Cache* block_cache = rep_->table_options.block_cache.get(); assert(biter->cache_handle() != nullptr); block_cache->Ref(biter->cache_handle()); dummy.RegisterCleanup(&ReleaseCachedEntry, block_cache, biter->cache_handle()); value_pinner = &dummy; } else { value_pinner = biter; } } if (!get_context->SaveValue(parsed_key, biter->value(), &matched, value_pinner)) { if (get_context->State() == GetContext::GetState::kFound) { does_referenced_key_exist = true; referenced_data_size = biter->key().size() + biter->value().size(); } done = true; break; } s = biter->status(); } // Write the block cache access. if (block_cache_tracer_ && block_cache_tracer_->is_tracing_enabled()) { // Avoid making copy of block_key, cf_name, and referenced_key when // constructing the access record. Slice referenced_key; if (does_referenced_key_exist) { referenced_key = biter->key(); } else { referenced_key = key; } BlockCacheTraceRecord access_record( rep_->ioptions.env->NowMicros(), /*block_key=*/"", lookup_data_block_context.block_type, lookup_data_block_context.block_size, rep_->cf_id_for_tracing(), /*cf_name=*/"", rep_->level_for_tracing(), rep_->sst_number_for_tracing(), lookup_data_block_context.caller, lookup_data_block_context.is_cache_hit, lookup_data_block_context.no_insert, lookup_data_block_context.get_id, lookup_data_block_context.get_from_user_specified_snapshot, /*referenced_key=*/"", referenced_data_size, lookup_data_block_context.num_keys_in_block, does_referenced_key_exist); block_cache_tracer_->WriteBlockAccess( access_record, lookup_data_block_context.block_key, rep_->cf_name_for_tracing(), referenced_key); } s = biter->status(); if (done) { // Avoid the extra Next which is expensive in two-level indexes break; } if (first_block) { iiter->Seek(key); } first_block = false; iiter->Next(); } while (iiter->Valid()); if (matched && filter != nullptr && !filter->IsBlockBased()) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE); PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, rep_->level); } if (s.ok() && !iiter->status().IsNotFound()) { s = iiter->status(); } *(miter->s) = s; } } } Status BlockBasedTable::Prefetch(const Slice* const begin, const Slice* const end) { auto& comparator = rep_->internal_comparator; UserComparatorWrapper user_comparator(comparator.user_comparator()); // pre-condition if (begin && end && comparator.Compare(*begin, *end) > 0) { return Status::InvalidArgument(*begin, *end); } BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch}; IndexBlockIter iiter_on_stack; auto iiter = NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, &iiter_on_stack, /*get_context=*/nullptr, &lookup_context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr = std::unique_ptr>(iiter); } if (!iiter->status().ok()) { // error opening index iterator return iiter->status(); } // indicates if we are on the last page that need to be pre-fetched bool prefetching_boundary_page = false; for (begin ? iiter->Seek(*begin) : iiter->SeekToFirst(); iiter->Valid(); iiter->Next()) { BlockHandle block_handle = iiter->value().handle; const bool is_user_key = !rep_->index_key_includes_seq; if (end && ((!is_user_key && comparator.Compare(iiter->key(), *end) >= 0) || (is_user_key && user_comparator.Compare(iiter->key(), ExtractUserKey(*end)) >= 0))) { if (prefetching_boundary_page) { break; } // The index entry represents the last key in the data block. // We should load this page into memory as well, but no more prefetching_boundary_page = true; } // Load the block specified by the block_handle into the block cache DataBlockIter biter; NewDataBlockIterator( ReadOptions(), block_handle, &biter, /*type=*/BlockType::kData, /*get_context=*/nullptr, &lookup_context, Status(), /*prefetch_buffer=*/nullptr); if (!biter.status().ok()) { // there was an unexpected error while pre-fetching return biter.status(); } } return Status::OK(); } Status BlockBasedTable::VerifyChecksum(const ReadOptions& read_options, TableReaderCaller caller) { Status s; // Check Meta blocks std::unique_ptr metaindex; std::unique_ptr metaindex_iter; s = ReadMetaIndexBlock(nullptr /* prefetch buffer */, &metaindex, &metaindex_iter); if (s.ok()) { s = VerifyChecksumInMetaBlocks(metaindex_iter.get()); if (!s.ok()) { return s; } } else { return s; } // Check Data blocks IndexBlockIter iiter_on_stack; BlockCacheLookupContext context{caller}; InternalIteratorBase* iiter = NewIndexIterator( read_options, /*disable_prefix_seek=*/false, &iiter_on_stack, /*get_context=*/nullptr, &context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr = std::unique_ptr>(iiter); } if (!iiter->status().ok()) { // error opening index iterator return iiter->status(); } s = VerifyChecksumInBlocks(read_options, iiter); return s; } Status BlockBasedTable::VerifyChecksumInBlocks( const ReadOptions& read_options, InternalIteratorBase* index_iter) { Status s; // We are scanning the whole file, so no need to do exponential // increasing of the buffer size. size_t readahead_size = (read_options.readahead_size != 0) ? read_options.readahead_size : kMaxAutoReadaheadSize; // FilePrefetchBuffer doesn't work in mmap mode and readahead is not // needed there. FilePrefetchBuffer prefetch_buffer( rep_->file.get(), readahead_size /* readadhead_size */, readahead_size /* max_readahead_size */, !rep_->ioptions.allow_mmap_reads /* enable */); for (index_iter->SeekToFirst(); index_iter->Valid(); index_iter->Next()) { s = index_iter->status(); if (!s.ok()) { break; } BlockHandle handle = index_iter->value().handle; BlockContents contents; BlockFetcher block_fetcher( rep_->file.get(), &prefetch_buffer, rep_->footer, ReadOptions(), handle, &contents, rep_->ioptions, false /* decompress */, false /*maybe_compressed*/, BlockType::kData, UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { break; } } if (s.ok()) { // In the case of two level indexes, we would have exited the above loop // by checking index_iter->Valid(), but Valid() might have returned false // due to an IO error. So check the index_iter status s = index_iter->status(); } return s; } BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( const Slice& meta_block_name) { if (meta_block_name.starts_with(kFilterBlockPrefix) || meta_block_name.starts_with(kFullFilterBlockPrefix) || meta_block_name.starts_with(kPartitionedFilterBlockPrefix)) { return BlockType::kFilter; } if (meta_block_name == kPropertiesBlock) { return BlockType::kProperties; } if (meta_block_name == kCompressionDictBlock) { return BlockType::kCompressionDictionary; } if (meta_block_name == kRangeDelBlock) { return BlockType::kRangeDeletion; } if (meta_block_name == kHashIndexPrefixesBlock) { return BlockType::kHashIndexPrefixes; } if (meta_block_name == kHashIndexPrefixesMetadataBlock) { return BlockType::kHashIndexMetadata; } assert(false); return BlockType::kInvalid; } Status BlockBasedTable::VerifyChecksumInMetaBlocks( InternalIteratorBase* index_iter) { Status s; for (index_iter->SeekToFirst(); index_iter->Valid(); index_iter->Next()) { s = index_iter->status(); if (!s.ok()) { break; } BlockHandle handle; Slice input = index_iter->value(); s = handle.DecodeFrom(&input); BlockContents contents; const Slice meta_block_name = index_iter->key(); BlockFetcher block_fetcher( rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer, ReadOptions(), handle, &contents, rep_->ioptions, false /* decompress */, false /*maybe_compressed*/, GetBlockTypeForMetaBlockByName(meta_block_name), UncompressionDict::GetEmptyDict(), rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); if (s.IsCorruption() && meta_block_name == kPropertiesBlock) { TableProperties* table_properties; s = TryReadPropertiesWithGlobalSeqno(nullptr /* prefetch_buffer */, index_iter->value(), &table_properties); delete table_properties; } if (!s.ok()) { break; } } return s; } bool BlockBasedTable::TEST_BlockInCache(const BlockHandle& handle) const { assert(rep_ != nullptr); Cache* const cache = rep_->table_options.block_cache.get(); if (cache == nullptr) { return false; } char cache_key_storage[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; Slice cache_key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, handle, cache_key_storage); Cache::Handle* const cache_handle = cache->Lookup(cache_key); if (cache_handle == nullptr) { return false; } cache->Release(cache_handle); return true; } bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, const Slice& key) { std::unique_ptr> iiter(NewIndexIterator( options, /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); iiter->Seek(key); assert(iiter->Valid()); return TEST_BlockInCache(iiter->value().handle); } // REQUIRES: The following fields of rep_ should have already been populated: // 1. file // 2. index_handle, // 3. options // 4. internal_comparator // 5. index_type Status BlockBasedTable::CreateIndexReader( FilePrefetchBuffer* prefetch_buffer, InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader) { // kHashSearch requires non-empty prefix_extractor but bypass checking // prefix_extractor here since we have no access to MutableCFOptions. // Add need_upper_bound_check flag in BlockBasedTable::NewIndexIterator. // If prefix_extractor does not match prefix_extractor_name from table // properties, turn off Hash Index by setting total_order_seek to true switch (rep_->index_type) { case BlockBasedTableOptions::kTwoLevelIndexSearch: { return PartitionIndexReader::Create(this, prefetch_buffer, use_cache, prefetch, pin, lookup_context, index_reader); } case BlockBasedTableOptions::kBinarySearch: FALLTHROUGH_INTENDED; case BlockBasedTableOptions::kBinarySearchWithFirstKey: { return BinarySearchIndexReader::Create(this, prefetch_buffer, use_cache, prefetch, pin, lookup_context, index_reader); } case BlockBasedTableOptions::kHashSearch: { std::unique_ptr metaindex_guard; std::unique_ptr metaindex_iter_guard; auto meta_index_iter = preloaded_meta_index_iter; bool should_fallback = false; if (rep_->internal_prefix_transform.get() == nullptr) { ROCKS_LOG_WARN(rep_->ioptions.info_log, "No prefix extractor passed in. Fall back to binary" " search index."); should_fallback = true; } else if (meta_index_iter == nullptr) { auto s = ReadMetaIndexBlock(prefetch_buffer, &metaindex_guard, &metaindex_iter_guard); if (!s.ok()) { // we simply fall back to binary search in case there is any // problem with prefix hash index loading. ROCKS_LOG_WARN(rep_->ioptions.info_log, "Unable to read the metaindex block." " Fall back to binary search index."); should_fallback = true; } meta_index_iter = metaindex_iter_guard.get(); } if (should_fallback) { return BinarySearchIndexReader::Create(this, prefetch_buffer, use_cache, prefetch, pin, lookup_context, index_reader); } else { return HashIndexReader::Create(this, prefetch_buffer, meta_index_iter, use_cache, prefetch, pin, lookup_context, index_reader); } } default: { std::string error_message = "Unrecognized index type: " + ToString(rep_->index_type); return Status::InvalidArgument(error_message.c_str()); } } } uint64_t BlockBasedTable::ApproximateDataOffsetOf( const InternalIteratorBase& index_iter, uint64_t data_size) const { if (index_iter.Valid()) { BlockHandle handle = index_iter.value().handle; return handle.offset(); } else { // The iterator is past the last key in the file. return data_size; } } uint64_t BlockBasedTable::GetApproximateDataSize() { // Should be in table properties unless super old version if (rep_->table_properties) { return rep_->table_properties->data_size; } // Fall back to rough estimate from footer return rep_->footer.metaindex_handle().offset(); } uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key, TableReaderCaller caller) { uint64_t data_size = GetApproximateDataSize(); if (UNLIKELY(data_size == 0)) { // Hmm. Let's just split in half to avoid skewing one way or another, // since we don't know whether we're operating on lower bound or // upper bound. return rep_->file_size / 2; } BlockCacheLookupContext context(caller); IndexBlockIter iiter_on_stack; ReadOptions ro; ro.total_order_seek = true; auto index_iter = NewIndexIterator(ro, /*disable_prefix_seek=*/true, /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr, /*lookup_context=*/&context); std::unique_ptr> iiter_unique_ptr; if (index_iter != &iiter_on_stack) { iiter_unique_ptr.reset(index_iter); } index_iter->Seek(key); uint64_t offset = ApproximateDataOffsetOf(*index_iter, data_size); // Pro-rate file metadata (incl filters) size-proportionally across data // blocks. double size_ratio = static_cast(offset) / static_cast(data_size); return static_cast(size_ratio * static_cast(rep_->file_size)); } uint64_t BlockBasedTable::ApproximateSize(const Slice& start, const Slice& end, TableReaderCaller caller) { assert(rep_->internal_comparator.Compare(start, end) <= 0); uint64_t data_size = GetApproximateDataSize(); if (UNLIKELY(data_size == 0)) { // Hmm. Assume whole file is involved, since we have lower and upper // bound. return rep_->file_size; } BlockCacheLookupContext context(caller); IndexBlockIter iiter_on_stack; ReadOptions ro; ro.total_order_seek = true; auto index_iter = NewIndexIterator(ro, /*disable_prefix_seek=*/true, /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr, /*lookup_context=*/&context); std::unique_ptr> iiter_unique_ptr; if (index_iter != &iiter_on_stack) { iiter_unique_ptr.reset(index_iter); } index_iter->Seek(start); uint64_t start_offset = ApproximateDataOffsetOf(*index_iter, data_size); index_iter->Seek(end); uint64_t end_offset = ApproximateDataOffsetOf(*index_iter, data_size); assert(end_offset >= start_offset); // Pro-rate file metadata (incl filters) size-proportionally across data // blocks. double size_ratio = static_cast(end_offset - start_offset) / static_cast(data_size); return static_cast(size_ratio * static_cast(rep_->file_size)); } bool BlockBasedTable::TEST_FilterBlockInCache() const { assert(rep_ != nullptr); return TEST_BlockInCache(rep_->filter_handle); } bool BlockBasedTable::TEST_IndexBlockInCache() const { assert(rep_ != nullptr); return TEST_BlockInCache(rep_->footer.index_handle()); } Status BlockBasedTable::GetKVPairsFromDataBlocks( std::vector* kv_pair_blocks) { std::unique_ptr> blockhandles_iter( NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, /*get_context=*/nullptr, /*lookup_contex=*/nullptr)); Status s = blockhandles_iter->status(); if (!s.ok()) { // Cannot read Index Block return s; } for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid(); blockhandles_iter->Next()) { s = blockhandles_iter->status(); if (!s.ok()) { break; } std::unique_ptr datablock_iter; datablock_iter.reset(NewDataBlockIterator( ReadOptions(), blockhandles_iter->value().handle, /*input_iter=*/nullptr, /*type=*/BlockType::kData, /*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(), /*prefetch_buffer=*/nullptr)); s = datablock_iter->status(); if (!s.ok()) { // Error reading the block - Skipped continue; } KVPairBlock kv_pair_block; for (datablock_iter->SeekToFirst(); datablock_iter->Valid(); datablock_iter->Next()) { s = datablock_iter->status(); if (!s.ok()) { // Error reading the block - Skipped break; } const Slice& key = datablock_iter->key(); const Slice& value = datablock_iter->value(); std::string key_copy = std::string(key.data(), key.size()); std::string value_copy = std::string(value.data(), value.size()); kv_pair_block.push_back( std::make_pair(std::move(key_copy), std::move(value_copy))); } kv_pair_blocks->push_back(std::move(kv_pair_block)); } return Status::OK(); } Status BlockBasedTable::DumpTable(WritableFile* out_file) { // Output Footer out_file->Append( "Footer Details:\n" "--------------------------------------\n" " "); out_file->Append(rep_->footer.ToString().c_str()); out_file->Append("\n"); // Output MetaIndex out_file->Append( "Metaindex Details:\n" "--------------------------------------\n"); std::unique_ptr metaindex; std::unique_ptr metaindex_iter; Status s = ReadMetaIndexBlock(nullptr /* prefetch_buffer */, &metaindex, &metaindex_iter); if (s.ok()) { for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid(); metaindex_iter->Next()) { s = metaindex_iter->status(); if (!s.ok()) { return s; } if (metaindex_iter->key() == ROCKSDB_NAMESPACE::kPropertiesBlock) { out_file->Append(" Properties block handle: "); out_file->Append(metaindex_iter->value().ToString(true).c_str()); out_file->Append("\n"); } else if (metaindex_iter->key() == ROCKSDB_NAMESPACE::kCompressionDictBlock) { out_file->Append(" Compression dictionary block handle: "); out_file->Append(metaindex_iter->value().ToString(true).c_str()); out_file->Append("\n"); } else if (strstr(metaindex_iter->key().ToString().c_str(), "filter.rocksdb.") != nullptr) { out_file->Append(" Filter block handle: "); out_file->Append(metaindex_iter->value().ToString(true).c_str()); out_file->Append("\n"); } else if (metaindex_iter->key() == ROCKSDB_NAMESPACE::kRangeDelBlock) { out_file->Append(" Range deletion block handle: "); out_file->Append(metaindex_iter->value().ToString(true).c_str()); out_file->Append("\n"); } } out_file->Append("\n"); } else { return s; } // Output TableProperties const ROCKSDB_NAMESPACE::TableProperties* table_properties; table_properties = rep_->table_properties.get(); if (table_properties != nullptr) { out_file->Append( "Table Properties:\n" "--------------------------------------\n" " "); out_file->Append(table_properties->ToString("\n ", ": ").c_str()); out_file->Append("\n"); } if (rep_->filter) { out_file->Append( "Filter Details:\n" "--------------------------------------\n" " "); out_file->Append(rep_->filter->ToString().c_str()); out_file->Append("\n"); } // Output Index block s = DumpIndexBlock(out_file); if (!s.ok()) { return s; } // Output compression dictionary if (rep_->uncompression_dict_reader) { CachableEntry uncompression_dict; s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( nullptr /* prefetch_buffer */, false /* no_io */, nullptr /* get_context */, nullptr /* lookup_context */, &uncompression_dict); if (!s.ok()) { return s; } assert(uncompression_dict.GetValue()); const Slice& raw_dict = uncompression_dict.GetValue()->GetRawDict(); out_file->Append( "Compression Dictionary:\n" "--------------------------------------\n"); out_file->Append(" size (bytes): "); out_file->Append(ROCKSDB_NAMESPACE::ToString(raw_dict.size())); out_file->Append("\n\n"); out_file->Append(" HEX "); out_file->Append(raw_dict.ToString(true).c_str()); out_file->Append("\n\n"); } // Output range deletions block auto* range_del_iter = NewRangeTombstoneIterator(ReadOptions()); if (range_del_iter != nullptr) { range_del_iter->SeekToFirst(); if (range_del_iter->Valid()) { out_file->Append( "Range deletions:\n" "--------------------------------------\n" " "); for (; range_del_iter->Valid(); range_del_iter->Next()) { DumpKeyValue(range_del_iter->key(), range_del_iter->value(), out_file); } out_file->Append("\n"); } delete range_del_iter; } // Output Data blocks s = DumpDataBlocks(out_file); return s; } Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) { out_file->Append( "Index Details:\n" "--------------------------------------\n"); std::unique_ptr> blockhandles_iter( NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, /*get_context=*/nullptr, /*lookup_contex=*/nullptr)); Status s = blockhandles_iter->status(); if (!s.ok()) { out_file->Append("Can not read Index Block \n\n"); return s; } out_file->Append(" Block key hex dump: Data block handle\n"); out_file->Append(" Block key ascii\n\n"); for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid(); blockhandles_iter->Next()) { s = blockhandles_iter->status(); if (!s.ok()) { break; } Slice key = blockhandles_iter->key(); Slice user_key; InternalKey ikey; if (!rep_->index_key_includes_seq) { user_key = key; } else { ikey.DecodeFrom(key); user_key = ikey.user_key(); } out_file->Append(" HEX "); out_file->Append(user_key.ToString(true).c_str()); out_file->Append(": "); out_file->Append(blockhandles_iter->value() .ToString(true, rep_->index_has_first_key) .c_str()); out_file->Append("\n"); std::string str_key = user_key.ToString(); std::string res_key(""); char cspace = ' '; for (size_t i = 0; i < str_key.size(); i++) { res_key.append(&str_key[i], 1); res_key.append(1, cspace); } out_file->Append(" ASCII "); out_file->Append(res_key.c_str()); out_file->Append("\n ------\n"); } out_file->Append("\n"); return Status::OK(); } Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) { std::unique_ptr> blockhandles_iter( NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, /*get_context=*/nullptr, /*lookup_contex=*/nullptr)); Status s = blockhandles_iter->status(); if (!s.ok()) { out_file->Append("Can not read Index Block \n\n"); return s; } uint64_t datablock_size_min = std::numeric_limits::max(); uint64_t datablock_size_max = 0; uint64_t datablock_size_sum = 0; size_t block_id = 1; for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid(); block_id++, blockhandles_iter->Next()) { s = blockhandles_iter->status(); if (!s.ok()) { break; } BlockHandle bh = blockhandles_iter->value().handle; uint64_t datablock_size = bh.size(); datablock_size_min = std::min(datablock_size_min, datablock_size); datablock_size_max = std::max(datablock_size_max, datablock_size); datablock_size_sum += datablock_size; out_file->Append("Data Block # "); out_file->Append(ROCKSDB_NAMESPACE::ToString(block_id)); out_file->Append(" @ "); out_file->Append(blockhandles_iter->value().handle.ToString(true).c_str()); out_file->Append("\n"); out_file->Append("--------------------------------------\n"); std::unique_ptr datablock_iter; datablock_iter.reset(NewDataBlockIterator( ReadOptions(), blockhandles_iter->value().handle, /*input_iter=*/nullptr, /*type=*/BlockType::kData, /*get_context=*/nullptr, /*lookup_context=*/nullptr, Status(), /*prefetch_buffer=*/nullptr)); s = datablock_iter->status(); if (!s.ok()) { out_file->Append("Error reading the block - Skipped \n\n"); continue; } for (datablock_iter->SeekToFirst(); datablock_iter->Valid(); datablock_iter->Next()) { s = datablock_iter->status(); if (!s.ok()) { out_file->Append("Error reading the block - Skipped \n"); break; } DumpKeyValue(datablock_iter->key(), datablock_iter->value(), out_file); } out_file->Append("\n"); } uint64_t num_datablocks = block_id - 1; if (num_datablocks) { double datablock_size_avg = static_cast(datablock_size_sum) / num_datablocks; out_file->Append("Data Block Summary:\n"); out_file->Append("--------------------------------------"); out_file->Append("\n # data blocks: "); out_file->Append(ROCKSDB_NAMESPACE::ToString(num_datablocks)); out_file->Append("\n min data block size: "); out_file->Append(ROCKSDB_NAMESPACE::ToString(datablock_size_min)); out_file->Append("\n max data block size: "); out_file->Append(ROCKSDB_NAMESPACE::ToString(datablock_size_max)); out_file->Append("\n avg data block size: "); out_file->Append(ROCKSDB_NAMESPACE::ToString(datablock_size_avg)); out_file->Append("\n"); } return Status::OK(); } void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value, WritableFile* out_file) { InternalKey ikey; ikey.DecodeFrom(key); out_file->Append(" HEX "); out_file->Append(ikey.user_key().ToString(true).c_str()); out_file->Append(": "); out_file->Append(value.ToString(true).c_str()); out_file->Append("\n"); std::string str_key = ikey.user_key().ToString(); std::string str_value = value.ToString(); std::string res_key(""), res_value(""); char cspace = ' '; for (size_t i = 0; i < str_key.size(); i++) { if (str_key[i] == '\0') { res_key.append("\\0", 2); } else { res_key.append(&str_key[i], 1); } res_key.append(1, cspace); } for (size_t i = 0; i < str_value.size(); i++) { if (str_value[i] == '\0') { res_value.append("\\0", 2); } else { res_value.append(&str_value[i], 1); } res_value.append(1, cspace); } out_file->Append(" ASCII "); out_file->Append(res_key.c_str()); out_file->Append(": "); out_file->Append(res_value.c_str()); out_file->Append("\n ------\n"); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_reader.h000066400000000000000000000674171370372246700236750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "db/range_tombstone_fragmenter.h" #include "file/filename.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_type.h" #include "table/block_based/cachable_entry.h" #include "table/block_based/filter_block.h" #include "table/block_based/uncompression_dict_reader.h" #include "table/table_properties_internal.h" #include "table/table_reader.h" #include "table/two_level_iterator.h" #include "trace_replay/block_cache_tracer.h" namespace ROCKSDB_NAMESPACE { class Cache; class FilterBlockReader; class BlockBasedFilterBlockReader; class FullFilterBlockReader; class Footer; class InternalKeyComparator; class Iterator; class FSRandomAccessFile; class TableCache; class TableReader; class WritableFile; struct BlockBasedTableOptions; struct EnvOptions; struct ReadOptions; class GetContext; typedef std::vector> KVPairBlock; // Reader class for BlockBasedTable format. // For the format of BlockBasedTable refer to // https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format. // This is the default table type. Data is chucked into fixed size blocks and // each block in-turn stores entries. When storing data, we can compress and/or // encode data efficiently within a block, which often results in a much smaller // data size compared with the raw data size. As for the record retrieval, we'll // first locate the block where target record may reside, then read the block to // memory, and finally search that record within the block. Of course, to avoid // frequent reads of the same block, we introduced the block cache to keep the // loaded blocks in the memory. class BlockBasedTable : public TableReader { public: static const std::string kFilterBlockPrefix; static const std::string kFullFilterBlockPrefix; static const std::string kPartitionedFilterBlockPrefix; // The longest prefix of the cache key used to identify blocks. // For Posix files the unique ID is three varints. static const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length * 3 + 1; // All the below fields control iterator readahead static const size_t kInitAutoReadaheadSize = 8 * 1024; // Found that 256 KB readahead size provides the best performance, based on // experiments, for auto readahead. Experiment data is in PR #3282. static const size_t kMaxAutoReadaheadSize; static const int kMinNumFileReadsToStartAutoReadahead = 2; // Attempt to open the table that is stored in bytes [0..file_size) // of "file", and read the metadata entries necessary to allow // retrieving data from the table. // // If successful, returns ok and sets "*table_reader" to the newly opened // table. The client should delete "*table_reader" when no longer needed. // If there was an error while initializing the table, sets "*table_reader" // to nullptr and returns a non-ok status. // // @param file must remain live while this Table is in use. // @param prefetch_index_and_filter_in_cache can be used to disable // prefetching of // index and filter blocks into block cache at startup // @param skip_filters Disables loading/accessing the filter block. Overrides // prefetch_index_and_filter_in_cache, so filter will be skipped if both // are set. // @param force_direct_prefetch if true, always prefetching to RocksDB // buffer, rather than calling RandomAccessFile::Prefetch(). static Status Open(const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_key_comparator, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, const SliceTransform* prefix_extractor = nullptr, bool prefetch_index_and_filter_in_cache = true, bool skip_filters = false, int level = -1, const bool immortal_table = false, const SequenceNumber largest_seqno = 0, bool force_direct_prefetch = false, TailPrefetchStats* tail_prefetch_stats = nullptr, BlockCacheTracer* const block_cache_tracer = nullptr, size_t max_file_size_for_l0_meta_pin = 0); bool PrefixMayMatch(const Slice& internal_key, const ReadOptions& read_options, const SliceTransform* options_prefix_extractor, const bool need_upper_bound_check, BlockCacheLookupContext* lookup_context) const; // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must // call one of the Seek methods on the iterator before using it). // @param skip_filters Disables loading/accessing the filter block // compaction_readahead_size: its value will only be used if caller = // kCompaction. InternalIterator* NewIterator(const ReadOptions&, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, TableReaderCaller caller, size_t compaction_readahead_size = 0, bool allow_unprepared_value = false) override; FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& read_options) override; // @param skip_filters Disables loading/accessing the filter block Status Get(const ReadOptions& readOptions, const Slice& key, GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters = false) override; void MultiGet(const ReadOptions& readOptions, const MultiGetContext::Range* mget_range, const SliceTransform* prefix_extractor, bool skip_filters = false) override; // Pre-fetch the disk blocks that correspond to the key range specified by // (kbegin, kend). The call will return error status in the event of // IO or iteration error. Status Prefetch(const Slice* begin, const Slice* end) override; // Given a key, return an approximate byte offset in the file where // the data for that key begins (or would begin if the key were // present in the file). The returned value is in terms of file // bytes, and so includes effects like compression of the underlying data. // E.g., the approximate offset of the last key in the table will // be close to the file length. uint64_t ApproximateOffsetOf(const Slice& key, TableReaderCaller caller) override; // Given start and end keys, return the approximate data size in the file // between the keys. The returned value is in terms of file bytes, and so // includes effects like compression of the underlying data. // The start key must not be greater than the end key. uint64_t ApproximateSize(const Slice& start, const Slice& end, TableReaderCaller caller) override; bool TEST_BlockInCache(const BlockHandle& handle) const; // Returns true if the block for the specified key is in cache. // REQUIRES: key is in this table && block cache enabled bool TEST_KeyInCache(const ReadOptions& options, const Slice& key); // Set up the table for Compaction. Might change some parameters with // posix_fadvise void SetupForCompaction() override; std::shared_ptr GetTableProperties() const override; size_t ApproximateMemoryUsage() const override; // convert SST file to a human readable form Status DumpTable(WritableFile* out_file) override; Status VerifyChecksum(const ReadOptions& readOptions, TableReaderCaller caller) override; ~BlockBasedTable(); bool TEST_FilterBlockInCache() const; bool TEST_IndexBlockInCache() const; // IndexReader is the interface that provides the functionality for index // access. class IndexReader { public: virtual ~IndexReader() = default; // Create an iterator for index access. If iter is null, then a new object // is created on the heap, and the callee will have the ownership. // If a non-null iter is passed in, it will be used, and the returned value // is either the same as iter or a new on-heap object that // wraps the passed iter. In the latter case the return value points // to a different object then iter, and the callee has the ownership of the // returned object. virtual InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool disable_prefix_seek, IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) = 0; // Report an approximation of how much memory has been used other than // memory that was allocated in block cache. virtual size_t ApproximateMemoryUsage() const = 0; // Cache the dependencies of the index reader (e.g. the partitions // of a partitioned index). virtual void CacheDependencies(bool /* pin */) {} }; class IndexReaderCommon; static Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size, const BlockHandle& handle, char* cache_key); // Retrieve all key value pairs from data blocks in the table. // The key retrieved are internal keys. Status GetKVPairsFromDataBlocks(std::vector* kv_pair_blocks); struct Rep; Rep* get_rep() { return rep_; } const Rep* get_rep() const { return rep_; } // input_iter: if it is not null, update this one and return it as Iterator template TBlockIter* NewDataBlockIterator( const ReadOptions& ro, const BlockHandle& block_handle, TBlockIter* input_iter, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, Status s, FilePrefetchBuffer* prefetch_buffer, bool for_compaction = false) const; // input_iter: if it is not null, update this one and return it as Iterator template TBlockIter* NewDataBlockIterator(const ReadOptions& ro, CachableEntry& block, TBlockIter* input_iter, Status s) const; class PartitionedIndexIteratorState; template friend class FilterBlockReaderCommon; friend class PartitionIndexReader; friend class UncompressionDictReader; protected: Rep* rep_; explicit BlockBasedTable(Rep* rep, BlockCacheTracer* const block_cache_tracer) : rep_(rep), block_cache_tracer_(block_cache_tracer) {} // No copying allowed explicit BlockBasedTable(const TableReader&) = delete; void operator=(const TableReader&) = delete; private: friend class MockedBlockBasedTable; friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test; static std::atomic next_cache_key_id_; BlockCacheTracer* const block_cache_tracer_; void UpdateCacheHitMetrics(BlockType block_type, GetContext* get_context, size_t usage) const; void UpdateCacheMissMetrics(BlockType block_type, GetContext* get_context) const; void UpdateCacheInsertionMetrics(BlockType block_type, GetContext* get_context, size_t usage, bool redundant) const; Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key, BlockType block_type, GetContext* get_context) const; // Either Block::NewDataIterator() or Block::NewIndexIterator(). template static TBlockIter* InitBlockIterator(const Rep* rep, Block* block, BlockType block_type, TBlockIter* input_iter, bool block_contents_pinned); // If block cache enabled (compressed or uncompressed), looks for the block // identified by handle in (1) uncompressed cache, (2) compressed cache, and // then (3) file. If found, inserts into the cache(s) that were searched // unsuccessfully (e.g., if found in file, will add to both uncompressed and // compressed caches if they're enabled). // // @param block_entry value is set to the uncompressed block if found. If // in uncompressed block cache, also sets cache_handle to reference that // block. template Status MaybeReadBlockAndLoadToCache( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, BlockContents* contents) const; // Similar to the above, with one crucial difference: it will retrieve the // block from the file even if there are no caches configured (assuming the // read options allow I/O). template Status RetrieveBlock(FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, bool use_cache) const; void RetrieveMultipleBlocks( const ReadOptions& options, const MultiGetRange* batch, const autovector* handles, autovector* statuses, autovector, MultiGetContext::MAX_BATCH_SIZE>* results, char* scratch, const UncompressionDict& uncompression_dict) const; // Get the iterator from the index reader. // // If input_iter is not set, return a new Iterator. // If input_iter is set, try to update it and return it as Iterator. // However note that in some cases the returned iterator may be different // from input_iter. In such case the returned iterator should be freed. // // Note: ErrorIterator with Status::Incomplete shall be returned if all the // following conditions are met: // 1. We enabled table_options.cache_index_and_filter_blocks. // 2. index is not present in block cache. // 3. We disallowed any io to be performed, that is, read_options == // kBlockCacheTier InternalIteratorBase* NewIndexIterator( const ReadOptions& read_options, bool need_upper_bound_check, IndexBlockIter* input_iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) const; // Read block cache from block caches (if set): block_cache and // block_cache_compressed. // On success, Status::OK with be returned and @block will be populated with // pointer to the block as well as its block handle. // @param uncompression_dict Data for presetting the compression library's // dictionary. template Status GetDataBlockFromCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, const ReadOptions& read_options, CachableEntry* block, const UncompressionDict& uncompression_dict, BlockType block_type, GetContext* get_context) const; // Put a raw block (maybe compressed) to the corresponding block caches. // This method will perform decompression against raw_block if needed and then // populate the block caches. // On success, Status::OK will be returned; also @block will be populated with // uncompressed block and its cache handle. // // Allocated memory managed by raw_block_contents will be transferred to // PutDataBlockToCache(). After the call, the object will be invalid. // @param uncompression_dict Data for presetting the compression library's // dictionary. template Status PutDataBlockToCache(const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, CachableEntry* cached_block, BlockContents* raw_block_contents, CompressionType raw_block_comp_type, const UncompressionDict& uncompression_dict, MemoryAllocator* memory_allocator, BlockType block_type, GetContext* get_context) const; // Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found // after a call to Seek(key), until handle_result returns false. // May not make such a call if filter policy says that key is not present. friend class TableCache; friend class BlockBasedTableBuilder; // Create a index reader based on the index type stored in the table. // Optionally, user can pass a preloaded meta_index_iter for the index that // need to access extra meta blocks for index construction. This parameter // helps avoid re-reading meta index block if caller already created one. Status CreateIndexReader(FilePrefetchBuffer* prefetch_buffer, InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader); bool FullFilterKeyMayMatch(const ReadOptions& read_options, FilterBlockReader* filter, const Slice& user_key, const bool no_io, const SliceTransform* prefix_extractor, GetContext* get_context, BlockCacheLookupContext* lookup_context) const; void FullFilterKeysMayMatch(const ReadOptions& read_options, FilterBlockReader* filter, MultiGetRange* range, const bool no_io, const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const; // If force_direct_prefetch is true, always prefetching to RocksDB // buffer, rather than calling RandomAccessFile::Prefetch(). static Status PrefetchTail( RandomAccessFileReader* file, uint64_t file_size, bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, const bool prefetch_all, const bool preload_all, std::unique_ptr* prefetch_buffer); Status ReadMetaIndexBlock(FilePrefetchBuffer* prefetch_buffer, std::unique_ptr* metaindex_block, std::unique_ptr* iter); Status TryReadPropertiesWithGlobalSeqno(FilePrefetchBuffer* prefetch_buffer, const Slice& handle_value, TableProperties** table_properties); Status ReadPropertiesBlock(FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const SequenceNumber largest_seqno); Status ReadRangeDelBlock(FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, const InternalKeyComparator& internal_comparator, BlockCacheLookupContext* lookup_context); Status PrefetchIndexAndFilterBlocks( FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_iter, BlockBasedTable* new_table, bool prefetch_all, const BlockBasedTableOptions& table_options, const int level, size_t file_size, size_t max_file_size_for_l0_meta_pin, BlockCacheLookupContext* lookup_context); static BlockType GetBlockTypeForMetaBlockByName(const Slice& meta_block_name); Status VerifyChecksumInMetaBlocks(InternalIteratorBase* index_iter); Status VerifyChecksumInBlocks(const ReadOptions& read_options, InternalIteratorBase* index_iter); // Create the filter from the filter block. std::unique_ptr CreateFilterBlockReader( FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); static void SetupCacheKeyPrefix(Rep* rep); // Generate a cache key prefix from the file static void GenerateCachePrefix(Cache* cc, FSRandomAccessFile* file, char* buffer, size_t* size); static void GenerateCachePrefix(Cache* cc, FSWritableFile* file, char* buffer, size_t* size); // Size of all data blocks, maybe approximate uint64_t GetApproximateDataSize(); // Given an iterator return its offset in data block section of file. uint64_t ApproximateDataOffsetOf( const InternalIteratorBase& index_iter, uint64_t data_size) const; // Helper functions for DumpTable() Status DumpIndexBlock(WritableFile* out_file); Status DumpDataBlocks(WritableFile* out_file); void DumpKeyValue(const Slice& key, const Slice& value, WritableFile* out_file); // A cumulative data block file read in MultiGet lower than this size will // use a stack buffer static constexpr size_t kMultiGetReadStackBufSize = 8192; friend class PartitionedFilterBlockReader; friend class PartitionedFilterBlockTest; friend class DBBasicTest_MultiGetIOBufferOverrun_Test; }; // Maintaining state of a two-level iteration on a partitioned index structure. class BlockBasedTable::PartitionedIndexIteratorState : public TwoLevelIteratorState { public: PartitionedIndexIteratorState( const BlockBasedTable* table, std::unordered_map>* block_map); InternalIteratorBase* NewSecondaryIterator( const BlockHandle& index_value) override; private: // Don't own table_ const BlockBasedTable* table_; std::unordered_map>* block_map_; }; // Stores all the properties associated with a BlockBasedTable. // These are immutable. struct BlockBasedTable::Rep { Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, const BlockBasedTableOptions& _table_opt, const InternalKeyComparator& _internal_comparator, bool skip_filters, uint64_t _file_size, int _level, const bool _immortal_table) : ioptions(_ioptions), env_options(_env_options), table_options(_table_opt), filter_policy(skip_filters ? nullptr : _table_opt.filter_policy.get()), internal_comparator(_internal_comparator), filter_type(FilterType::kNoFilter), index_type(BlockBasedTableOptions::IndexType::kBinarySearch), hash_index_allow_collision(false), whole_key_filtering(_table_opt.whole_key_filtering), prefix_filtering(true), global_seqno(kDisableGlobalSequenceNumber), file_size(_file_size), level(_level), immortal_table(_immortal_table) {} const ImmutableCFOptions& ioptions; const EnvOptions& env_options; const BlockBasedTableOptions table_options; const FilterPolicy* const filter_policy; const InternalKeyComparator& internal_comparator; Status status; std::unique_ptr file; char cache_key_prefix[kMaxCacheKeyPrefixSize]; size_t cache_key_prefix_size = 0; char persistent_cache_key_prefix[kMaxCacheKeyPrefixSize]; size_t persistent_cache_key_prefix_size = 0; char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize]; size_t compressed_cache_key_prefix_size = 0; PersistentCacheOptions persistent_cache_options; // Footer contains the fixed table information Footer footer; std::unique_ptr index_reader; std::unique_ptr filter; std::unique_ptr uncompression_dict_reader; enum class FilterType { kNoFilter, kFullFilter, kBlockFilter, kPartitionedFilter, }; FilterType filter_type; BlockHandle filter_handle; BlockHandle compression_dict_handle; std::shared_ptr table_properties; BlockBasedTableOptions::IndexType index_type; bool hash_index_allow_collision; bool whole_key_filtering; bool prefix_filtering; // TODO(kailiu) It is very ugly to use internal key in table, since table // module should not be relying on db module. However to make things easier // and compatible with existing code, we introduce a wrapper that allows // block to extract prefix without knowing if a key is internal or not. // null if no prefix_extractor is passed in when opening the table reader. std::unique_ptr internal_prefix_transform; std::shared_ptr table_prefix_extractor; std::shared_ptr fragmented_range_dels; // If global_seqno is used, all Keys in this file will have the same // seqno with value `global_seqno`. // // A value of kDisableGlobalSequenceNumber means that this feature is disabled // and every key have it's own seqno. SequenceNumber global_seqno; // Size of the table file on disk uint64_t file_size; // the level when the table is opened, could potentially change when trivial // move is involved int level; // If false, blocks in this file are definitely all uncompressed. Knowing this // before reading individual blocks enables certain optimizations. bool blocks_maybe_compressed = true; // If true, data blocks in this file are definitely ZSTD compressed. If false // they might not be. When false we skip creating a ZSTD digested // uncompression dictionary. Even if we get a false negative, things should // still work, just not as quickly. bool blocks_definitely_zstd_compressed = false; // These describe how index is encoded. bool index_has_first_key = false; bool index_key_includes_seq = true; bool index_value_is_full = true; const bool immortal_table; SequenceNumber get_global_seqno(BlockType block_type) const { return (block_type == BlockType::kFilter || block_type == BlockType::kCompressionDictionary) ? kDisableGlobalSequenceNumber : global_seqno; } uint64_t cf_id_for_tracing() const { return table_properties ? table_properties->column_family_id : ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory::Context:: kUnknownColumnFamily; } Slice cf_name_for_tracing() const { return table_properties ? table_properties->column_family_name : BlockCacheTraceHelper::kUnknownColumnFamilyName; } uint32_t level_for_tracing() const { return level >= 0 ? level : UINT32_MAX; } uint64_t sst_number_for_tracing() const { return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX; } void CreateFilePrefetchBuffer( size_t readahead_size, size_t max_readahead_size, std::unique_ptr* fpb) const { fpb->reset(new FilePrefetchBuffer(file.get(), readahead_size, max_readahead_size, !ioptions.allow_mmap_reads /* enable */)); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_reader_impl.h000066400000000000000000000205431370372246700247030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/block_based_table_reader.h" #include "table/block_based/reader_common.h" // The file contains some member functions of BlockBasedTable that // cannot be implemented in block_based_table_reader.cc because // it's called by other files (e.g. block_based_iterator.h) and // are templates. namespace ROCKSDB_NAMESPACE { // Convert an index iterator value (i.e., an encoded BlockHandle) // into an iterator over the contents of the corresponding block. // If input_iter is null, new a iterator // If input_iter is not null, update this iter and return it template TBlockIter* BlockBasedTable::NewDataBlockIterator( const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter, BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, Status s, FilePrefetchBuffer* prefetch_buffer, bool for_compaction) const { PERF_TIMER_GUARD(new_table_block_iter_nanos); TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter; if (!s.ok()) { iter->Invalidate(s); return iter; } CachableEntry uncompression_dict; if (rep_->uncompression_dict_reader) { const bool no_io = (ro.read_tier == kBlockCacheTier); s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( prefetch_buffer, no_io, get_context, lookup_context, &uncompression_dict); if (!s.ok()) { iter->Invalidate(s); return iter; } } const UncompressionDict& dict = uncompression_dict.GetValue() ? *uncompression_dict.GetValue() : UncompressionDict::GetEmptyDict(); CachableEntry block; s = RetrieveBlock(prefetch_buffer, ro, handle, dict, &block, block_type, get_context, lookup_context, for_compaction, /* use_cache */ true); if (!s.ok()) { assert(block.IsEmpty()); iter->Invalidate(s); return iter; } assert(block.GetValue() != nullptr); // Block contents are pinned and it is still pinned after the iterator // is destroyed as long as cleanup functions are moved to another object, // when: // 1. block cache handle is set to be released in cleanup function, or // 2. it's pointing to immortal source. If own_bytes is true then we are // not reading data from the original source, whether immortal or not. // Otherwise, the block is pinned iff the source is immortal. const bool block_contents_pinned = block.IsCached() || (!block.GetValue()->own_bytes() && rep_->immortal_table); iter = InitBlockIterator(rep_, block.GetValue(), block_type, iter, block_contents_pinned); if (!block.IsCached()) { if (!ro.fill_cache && rep_->cache_key_prefix_size != 0) { // insert a dummy record to block cache to track the memory usage Cache* const block_cache = rep_->table_options.block_cache.get(); Cache::Handle* cache_handle = nullptr; // There are two other types of cache keys: 1) SST cache key added in // `MaybeReadBlockAndLoadToCache` 2) dummy cache key added in // `write_buffer_manager`. Use longer prefix (41 bytes) to differentiate // from SST cache key(31 bytes), and use non-zero prefix to // differentiate from `write_buffer_manager` const size_t kExtraCacheKeyPrefix = kMaxVarint64Length * 4 + 1; char cache_key[kExtraCacheKeyPrefix + kMaxVarint64Length]; // Prefix: use rep_->cache_key_prefix padded by 0s memset(cache_key, 0, kExtraCacheKeyPrefix + kMaxVarint64Length); assert(rep_->cache_key_prefix_size != 0); assert(rep_->cache_key_prefix_size <= kExtraCacheKeyPrefix); memcpy(cache_key, rep_->cache_key_prefix, rep_->cache_key_prefix_size); char* end = EncodeVarint64(cache_key + kExtraCacheKeyPrefix, next_cache_key_id_++); assert(end - cache_key <= static_cast(kExtraCacheKeyPrefix + kMaxVarint64Length)); const Slice unique_key(cache_key, static_cast(end - cache_key)); s = block_cache->Insert(unique_key, nullptr, block.GetValue()->ApproximateMemoryUsage(), nullptr, &cache_handle); if (s.ok()) { assert(cache_handle != nullptr); iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache, cache_handle); } } } else { iter->SetCacheHandle(block.GetCacheHandle()); } block.TransferTo(iter); return iter; } // Convert an uncompressed data block (i.e CachableEntry) // into an iterator over the contents of the corresponding block. // If input_iter is null, new a iterator // If input_iter is not null, update this iter and return it template TBlockIter* BlockBasedTable::NewDataBlockIterator(const ReadOptions& ro, CachableEntry& block, TBlockIter* input_iter, Status s) const { PERF_TIMER_GUARD(new_table_block_iter_nanos); TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter; if (!s.ok()) { iter->Invalidate(s); return iter; } assert(block.GetValue() != nullptr); // Block contents are pinned and it is still pinned after the iterator // is destroyed as long as cleanup functions are moved to another object, // when: // 1. block cache handle is set to be released in cleanup function, or // 2. it's pointing to immortal source. If own_bytes is true then we are // not reading data from the original source, whether immortal or not. // Otherwise, the block is pinned iff the source is immortal. const bool block_contents_pinned = block.IsCached() || (!block.GetValue()->own_bytes() && rep_->immortal_table); iter = InitBlockIterator(rep_, block.GetValue(), BlockType::kData, iter, block_contents_pinned); if (!block.IsCached()) { if (!ro.fill_cache && rep_->cache_key_prefix_size != 0) { // insert a dummy record to block cache to track the memory usage Cache* const block_cache = rep_->table_options.block_cache.get(); Cache::Handle* cache_handle = nullptr; // There are two other types of cache keys: 1) SST cache key added in // `MaybeReadBlockAndLoadToCache` 2) dummy cache key added in // `write_buffer_manager`. Use longer prefix (41 bytes) to differentiate // from SST cache key(31 bytes), and use non-zero prefix to // differentiate from `write_buffer_manager` const size_t kExtraCacheKeyPrefix = kMaxVarint64Length * 4 + 1; char cache_key[kExtraCacheKeyPrefix + kMaxVarint64Length]; // Prefix: use rep_->cache_key_prefix padded by 0s memset(cache_key, 0, kExtraCacheKeyPrefix + kMaxVarint64Length); assert(rep_->cache_key_prefix_size != 0); assert(rep_->cache_key_prefix_size <= kExtraCacheKeyPrefix); memcpy(cache_key, rep_->cache_key_prefix, rep_->cache_key_prefix_size); char* end = EncodeVarint64(cache_key + kExtraCacheKeyPrefix, next_cache_key_id_++); assert(end - cache_key <= static_cast(kExtraCacheKeyPrefix + kMaxVarint64Length)); const Slice unique_key(cache_key, static_cast(end - cache_key)); s = block_cache->Insert(unique_key, nullptr, block.GetValue()->ApproximateMemoryUsage(), nullptr, &cache_handle); if (s.ok()) { assert(cache_handle != nullptr); iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache, cache_handle); } } } else { iter->SetCacheHandle(block.GetCacheHandle()); } block.TransferTo(iter); return iter; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_based_table_reader_test.cc000066400000000000000000000315621370372246700250620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/block_based/block_based_table_reader.h" #include "rocksdb/file_system.h" #include "table/block_based/partitioned_index_iterator.h" #include "db/table_properties_collector.h" #include "options/options_helper.h" #include "port/port.h" #include "port/stack_trace.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_factory.h" #include "table/format.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class BlockBasedTableReaderTest : public testing::Test, public testing::WithParamInterface> { protected: CompressionType compression_type_; bool use_direct_reads_; void SetUp() override { BlockBasedTableOptions::IndexType index_type; bool no_block_cache; std::tie(compression_type_, use_direct_reads_, index_type, no_block_cache) = GetParam(); test::SetupSyncPointsToMockDirectIO(); test_dir_ = test::PerThreadDBPath("block_based_table_reader_test"); env_ = Env::Default(); fs_ = FileSystem::Default(); ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); BlockBasedTableOptions opts; opts.index_type = index_type; opts.no_block_cache = no_block_cache; table_factory_.reset( static_cast(NewBlockBasedTableFactory(opts))); } void TearDown() override { EXPECT_OK(test::DestroyDir(env_, test_dir_)); } // Creates a table with the specificied key value pairs (kv). void CreateTable(const std::string& table_name, const CompressionType& compression_type, const std::map& kv) { std::unique_ptr writer; NewFileWriter(table_name, &writer); // Create table builder. Options options; ImmutableCFOptions ioptions(options); InternalKeyComparator comparator(options.comparator); ColumnFamilyOptions cf_options; MutableCFOptions moptions(cf_options); std::vector> factories; std::unique_ptr table_builder(table_factory_->NewTableBuilder( TableBuilderOptions(ioptions, moptions, comparator, &factories, compression_type, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, kDefaultColumnFamilyName, -1 /* level */), 0 /* column_family_id */, writer.get())); // Build table. for (auto it = kv.begin(); it != kv.end(); it++) { std::string k = ToInternalKey(it->first); std::string v = it->second; table_builder->Add(k, v); } ASSERT_OK(table_builder->Finish()); } void NewBlockBasedTableReader(const FileOptions& foptions, const ImmutableCFOptions& ioptions, const InternalKeyComparator& comparator, const std::string& table_name, std::unique_ptr* table) { std::unique_ptr file; NewFileReader(table_name, foptions, &file); uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size)); std::unique_ptr table_reader; ASSERT_OK(BlockBasedTable::Open(ioptions, EnvOptions(), table_factory_->table_options(), comparator, std::move(file), file_size, &table_reader)); table->reset(reinterpret_cast(table_reader.release())); } std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } const std::shared_ptr& fs() const { return fs_; } private: std::string test_dir_; Env* env_; std::shared_ptr fs_; std::unique_ptr table_factory_; void WriteToFile(const std::string& content, const std::string& filename) { std::unique_ptr f; ASSERT_OK(fs_->NewWritableFile(Path(filename), FileOptions(), &f, nullptr)); ASSERT_OK(f->Append(content, IOOptions(), nullptr)); ASSERT_OK(f->Close(IOOptions(), nullptr)); } void NewFileWriter(const std::string& filename, std::unique_ptr* writer) { std::string path = Path(filename); EnvOptions env_options; FileOptions foptions; std::unique_ptr file; ASSERT_OK(fs_->NewWritableFile(path, foptions, &file, nullptr)); writer->reset(new WritableFileWriter(std::move(file), path, env_options)); } void NewFileReader(const std::string& filename, const FileOptions& opt, std::unique_ptr* reader) { std::string path = Path(filename); std::unique_ptr f; ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr)); reader->reset(new RandomAccessFileReader(std::move(f), path, env_)); } std::string ToInternalKey(const std::string& key) { InternalKey internal_key(key, 0, ValueType::kTypeValue); return internal_key.Encode().ToString(); } }; // Tests MultiGet in both direct IO and non-direct IO mode. // The keys should be in cache after MultiGet. TEST_P(BlockBasedTableReaderTest, MultiGet) { // Prepare key-value pairs to occupy multiple blocks. // Each value is 256B, every 16 pairs constitute 1 block. // Adjacent blocks contain values with different compression complexity: // human readable strings are easier to compress than random strings. std::map kv; { Random rnd(101); uint32_t key = 0; for (int block = 0; block < 100; block++) { for (int i = 0; i < 16; i++) { char k[9] = {0}; // Internal key is constructed directly from this key, // and internal key size is required to be >= 8 bytes, // so use %08u as the format string. sprintf(k, "%08u", key); std::string v; if (block % 2) { v = test::RandomHumanReadableString(&rnd, 256); } else { test::RandomString(&rnd, 256, &v); } kv[std::string(k)] = v; key++; } } } // Prepare keys, values, and statuses for MultiGet. autovector keys; autovector values; autovector statuses; { const int step = static_cast(kv.size()) / MultiGetContext::MAX_BATCH_SIZE; auto it = kv.begin(); for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE; i++) { keys.emplace_back(it->first); values.emplace_back(); statuses.emplace_back(); std::advance(it, step); } } std::string table_name = "BlockBasedTableReaderTest" + CompressionTypeToString(compression_type_); CreateTable(table_name, compression_type_, kv); std::unique_ptr table; Options options; ImmutableCFOptions ioptions(options); FileOptions foptions; foptions.use_direct_reads = use_direct_reads_; InternalKeyComparator comparator(options.comparator); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table); // Ensure that keys are not in cache before MultiGet. for (auto& key : keys) { ASSERT_FALSE(table->TEST_KeyInCache(ReadOptions(), key)); } // Prepare MultiGetContext. autovector get_context; autovector key_context; autovector sorted_keys; for (size_t i = 0; i < keys.size(); ++i) { get_context.emplace_back( BytewiseComparator(), nullptr, nullptr, nullptr, GetContext::kNotFound, keys[i], &values[i], nullptr, nullptr, nullptr, true /* do_merge */, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); key_context.emplace_back(nullptr, keys[i], &values[i], nullptr, &statuses.back()); key_context.back().get_context = &get_context.back(); } for (auto& key_ctx : key_context) { sorted_keys.emplace_back(&key_ctx); } MultiGetContext ctx(&sorted_keys, 0, sorted_keys.size(), 0, ReadOptions()); // Execute MultiGet. MultiGetContext::Range range = ctx.GetMultiGetRange(); table->MultiGet(ReadOptions(), &range, nullptr); for (const Status& status : statuses) { ASSERT_OK(status); } // Check that keys are in cache after MultiGet. for (size_t i = 0; i < keys.size(); i++) { ASSERT_TRUE(table->TEST_KeyInCache(ReadOptions(), keys[i])); ASSERT_EQ(values[i].ToString(), kv[keys[i].ToString()]); } } class BlockBasedTableReaderTestVerifyChecksum : public BlockBasedTableReaderTest { public: BlockBasedTableReaderTestVerifyChecksum() : BlockBasedTableReaderTest() {} }; TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) { // Prepare key-value pairs to occupy multiple blocks. // Each value is 256B, every 16 pairs constitute 1 block. // Adjacent blocks contain values with different compression complexity: // human readable strings are easier to compress than random strings. Random rnd(101); std::map kv; { uint32_t key = 0; for (int block = 0; block < 800; block++) { for (int i = 0; i < 16; i++) { char k[9] = {0}; // Internal key is constructed directly from this key, // and internal key size is required to be >= 8 bytes, // so use %08u as the format string. sprintf(k, "%08u", key); std::string v; test::RandomString(&rnd, 256, &v); kv[std::string(k)] = v; key++; } } } std::string table_name = "BlockBasedTableReaderTest" + CompressionTypeToString(compression_type_); CreateTable(table_name, compression_type_, kv); std::unique_ptr table; Options options; ImmutableCFOptions ioptions(options); FileOptions foptions; foptions.use_direct_reads = use_direct_reads_; InternalKeyComparator comparator(options.comparator); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table); // Use the top level iterator to find the offset/size of the first // 2nd level index block and corrupt the block IndexBlockIter iiter_on_stack; BlockCacheLookupContext context{TableReaderCaller::kUserVerifyChecksum}; InternalIteratorBase* iiter = table->NewIndexIterator( ReadOptions(), /*disable_prefix_seek=*/false, &iiter_on_stack, /*get_context=*/nullptr, &context); std::unique_ptr> iiter_unique_ptr; if (iiter != &iiter_on_stack) { iiter_unique_ptr = std::unique_ptr>(iiter); } ASSERT_OK(iiter->status()); iiter->SeekToFirst(); BlockHandle handle = static_cast(iiter) ->index_iter_->value() .handle; table.reset(); // Corrupt the block pointed to by handle test::CorruptFile(Path(table_name), static_cast(handle.offset()), 128); NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table); Status s = table->VerifyChecksum(ReadOptions(), TableReaderCaller::kUserVerifyChecksum); ASSERT_EQ(s.code(), Status::kCorruption); } // Param 1: compression type // Param 2: whether to use direct reads // Param 3: Block Based Table Index type // Param 4: BBTO no_block_cache option #ifdef ROCKSDB_LITE // Skip direct I/O tests in lite mode since direct I/O is unsupported. INSTANTIATE_TEST_CASE_P( MultiGet, BlockBasedTableReaderTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Values(false), ::testing::Values(BlockBasedTableOptions::IndexType::kBinarySearch), ::testing::Values(false))); #else // ROCKSDB_LITE INSTANTIATE_TEST_CASE_P( MultiGet, BlockBasedTableReaderTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Bool(), ::testing::Values(BlockBasedTableOptions::IndexType::kBinarySearch), ::testing::Values(false))); #endif // ROCKSDB_LITE INSTANTIATE_TEST_CASE_P( VerifyChecksum, BlockBasedTableReaderTestVerifyChecksum, ::testing::Combine( ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Values(false), ::testing::Values( BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch), ::testing::Values(true))); } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/block_based/block_builder.cc000066400000000000000000000163241370372246700217010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // BlockBuilder generates blocks where keys are prefix-compressed: // // When we store a key, we drop the prefix shared with the previous // string. This helps reduce the space requirement significantly. // Furthermore, once every K keys, we do not apply the prefix // compression and store the entire key. We call this a "restart // point". The tail end of the block stores the offsets of all of the // restart points, and can be used to do a binary search when looking // for a particular key. Values are stored as-is (without compression) // immediately following the corresponding key. // // An entry for a particular key-value pair has the form: // shared_bytes: varint32 // unshared_bytes: varint32 // value_length: varint32 // key_delta: char[unshared_bytes] // value: char[value_length] // shared_bytes == 0 for restart points. // // The trailer of the block has the form: // restarts: uint32[num_restarts] // num_restarts: uint32 // restarts[i] contains the offset within the block of the ith restart point. #include "table/block_based/block_builder.h" #include #include #include "db/dbformat.h" #include "rocksdb/comparator.h" #include "table/block_based/data_block_footer.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { BlockBuilder::BlockBuilder( int block_restart_interval, bool use_delta_encoding, bool use_value_delta_encoding, BlockBasedTableOptions::DataBlockIndexType index_type, double data_block_hash_table_util_ratio) : block_restart_interval_(block_restart_interval), use_delta_encoding_(use_delta_encoding), use_value_delta_encoding_(use_value_delta_encoding), restarts_(), counter_(0), finished_(false) { switch (index_type) { case BlockBasedTableOptions::kDataBlockBinarySearch: break; case BlockBasedTableOptions::kDataBlockBinaryAndHash: data_block_hash_index_builder_.Initialize( data_block_hash_table_util_ratio); break; default: assert(0); } assert(block_restart_interval_ >= 1); restarts_.push_back(0); // First restart point is at offset 0 estimate_ = sizeof(uint32_t) + sizeof(uint32_t); } void BlockBuilder::Reset() { buffer_.clear(); restarts_.clear(); restarts_.push_back(0); // First restart point is at offset 0 estimate_ = sizeof(uint32_t) + sizeof(uint32_t); counter_ = 0; finished_ = false; last_key_.clear(); if (data_block_hash_index_builder_.Valid()) { data_block_hash_index_builder_.Reset(); } } void BlockBuilder::SwapAndReset(std::string& buffer) { std::swap(buffer_, buffer); Reset(); } size_t BlockBuilder::EstimateSizeAfterKV(const Slice& key, const Slice& value) const { size_t estimate = CurrentSizeEstimate(); // Note: this is an imprecise estimate as it accounts for the whole key size // instead of non-shared key size. estimate += key.size(); // In value delta encoding we estimate the value delta size as half the full // value size since only the size field of block handle is encoded. estimate += !use_value_delta_encoding_ || (counter_ >= block_restart_interval_) ? value.size() : value.size() / 2; if (counter_ >= block_restart_interval_) { estimate += sizeof(uint32_t); // a new restart entry. } estimate += sizeof(int32_t); // varint for shared prefix length. // Note: this is an imprecise estimate as we will have to encoded size, one // for shared key and one for non-shared key. estimate += VarintLength(key.size()); // varint for key length. if (!use_value_delta_encoding_ || (counter_ >= block_restart_interval_)) { estimate += VarintLength(value.size()); // varint for value length. } return estimate; } Slice BlockBuilder::Finish() { // Append restart array for (size_t i = 0; i < restarts_.size(); i++) { PutFixed32(&buffer_, restarts_[i]); } uint32_t num_restarts = static_cast(restarts_.size()); BlockBasedTableOptions::DataBlockIndexType index_type = BlockBasedTableOptions::kDataBlockBinarySearch; if (data_block_hash_index_builder_.Valid() && CurrentSizeEstimate() <= kMaxBlockSizeSupportedByHashIndex) { data_block_hash_index_builder_.Finish(buffer_); index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash; } // footer is a packed format of data_block_index_type and num_restarts uint32_t block_footer = PackIndexTypeAndNumRestarts(index_type, num_restarts); PutFixed32(&buffer_, block_footer); finished_ = true; return Slice(buffer_); } void BlockBuilder::Add(const Slice& key, const Slice& value, const Slice* const delta_value) { assert(!finished_); assert(counter_ <= block_restart_interval_); assert(!use_value_delta_encoding_ || delta_value); size_t shared = 0; // number of bytes shared with prev key if (counter_ >= block_restart_interval_) { // Restart compression restarts_.push_back(static_cast(buffer_.size())); estimate_ += sizeof(uint32_t); counter_ = 0; if (use_delta_encoding_) { // Update state last_key_.assign(key.data(), key.size()); } } else if (use_delta_encoding_) { Slice last_key_piece(last_key_); // See how much sharing to do with previous string shared = key.difference_offset(last_key_piece); // Update state // We used to just copy the changed data here, but it appears to be // faster to just copy the whole thing. last_key_.assign(key.data(), key.size()); } const size_t non_shared = key.size() - shared; const size_t curr_size = buffer_.size(); if (use_value_delta_encoding_) { // Add "" to buffer_ PutVarint32Varint32(&buffer_, static_cast(shared), static_cast(non_shared)); } else { // Add "" to buffer_ PutVarint32Varint32Varint32(&buffer_, static_cast(shared), static_cast(non_shared), static_cast(value.size())); } // Add string delta to buffer_ followed by value buffer_.append(key.data() + shared, non_shared); // Use value delta encoding only when the key has shared bytes. This would // simplify the decoding, where it can figure which decoding to use simply by // looking at the shared bytes size. if (shared != 0 && use_value_delta_encoding_) { buffer_.append(delta_value->data(), delta_value->size()); } else { buffer_.append(value.data(), value.size()); } if (data_block_hash_index_builder_.Valid()) { data_block_hash_index_builder_.Add(ExtractUserKey(key), restarts_.size() - 1); } counter_++; estimate_ += buffer_.size() - curr_size; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_builder.h000066400000000000000000000060011370372246700215320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "rocksdb/slice.h" #include "rocksdb/table.h" #include "table/block_based/data_block_hash_index.h" namespace ROCKSDB_NAMESPACE { class BlockBuilder { public: BlockBuilder(const BlockBuilder&) = delete; void operator=(const BlockBuilder&) = delete; explicit BlockBuilder(int block_restart_interval, bool use_delta_encoding = true, bool use_value_delta_encoding = false, BlockBasedTableOptions::DataBlockIndexType index_type = BlockBasedTableOptions::kDataBlockBinarySearch, double data_block_hash_table_util_ratio = 0.75); // Reset the contents as if the BlockBuilder was just constructed. void Reset(); // Swap the contents in BlockBuilder with buffer, then reset the BlockBuilder. void SwapAndReset(std::string& buffer); // REQUIRES: Finish() has not been called since the last call to Reset(). // REQUIRES: key is larger than any previously added key void Add(const Slice& key, const Slice& value, const Slice* const delta_value = nullptr); // Finish building the block and return a slice that refers to the // block contents. The returned slice will remain valid for the // lifetime of this builder or until Reset() is called. Slice Finish(); // Returns an estimate of the current (uncompressed) size of the block // we are building. inline size_t CurrentSizeEstimate() const { return estimate_ + (data_block_hash_index_builder_.Valid() ? data_block_hash_index_builder_.EstimateSize() : 0); } // Returns an estimated block size after appending key and value. size_t EstimateSizeAfterKV(const Slice& key, const Slice& value) const; // Return true iff no entries have been added since the last Reset() bool empty() const { return buffer_.empty(); } private: const int block_restart_interval_; // TODO(myabandeh): put it into a separate IndexBlockBuilder const bool use_delta_encoding_; // Refer to BlockIter::DecodeCurrentValue for format of delta encoded values const bool use_value_delta_encoding_; std::string buffer_; // Destination buffer std::vector restarts_; // Restart points size_t estimate_; int counter_; // Number of entries emitted since restart bool finished_; // Has Finish() been called? std::string last_key_; DataBlockHashIndexBuilder data_block_hash_index_builder_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_prefetcher.cc000066400000000000000000000052231370372246700223760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_prefetcher.h" namespace ROCKSDB_NAMESPACE { void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, const BlockHandle& handle, size_t readahead_size, bool is_for_compaction) { if (!is_for_compaction) { if (readahead_size == 0) { // Implicit auto readahead num_file_reads_++; if (num_file_reads_ > BlockBasedTable::kMinNumFileReadsToStartAutoReadahead) { if (!rep->file->use_direct_io() && (handle.offset() + static_cast(block_size(handle)) > readahead_limit_)) { // Buffered I/O // Discarding the return status of Prefetch calls intentionally, as // we can fallback to reading from disk if Prefetch fails. rep->file->Prefetch(handle.offset(), readahead_size_); readahead_limit_ = static_cast(handle.offset() + readahead_size_); // Keep exponentially increasing readahead size until // kMaxAutoReadaheadSize. readahead_size_ = std::min(BlockBasedTable::kMaxAutoReadaheadSize, readahead_size_ * 2); } else if (rep->file->use_direct_io() && !prefetch_buffer_) { // Direct I/O // Let FilePrefetchBuffer take care of the readahead. rep->CreateFilePrefetchBuffer(BlockBasedTable::kInitAutoReadaheadSize, BlockBasedTable::kMaxAutoReadaheadSize, &prefetch_buffer_); } } } else if (!prefetch_buffer_) { // Explicit user requested readahead // The actual condition is: // if (readahead_size != 0 && !prefetch_buffer_) rep->CreateFilePrefetchBuffer(readahead_size, readahead_size, &prefetch_buffer_); } } else if (!prefetch_buffer_) { rep->CreateFilePrefetchBuffer(compaction_readahead_size_, compaction_readahead_size_, &prefetch_buffer_); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_prefetcher.h000066400000000000000000000025401370372246700222370ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/block_based_table_reader.h" namespace ROCKSDB_NAMESPACE { class BlockPrefetcher { public: explicit BlockPrefetcher(size_t compaction_readahead_size) : compaction_readahead_size_(compaction_readahead_size) {} void PrefetchIfNeeded(const BlockBasedTable::Rep* rep, const BlockHandle& handle, size_t readahead_size, bool is_for_compaction); FilePrefetchBuffer* prefetch_buffer() { return prefetch_buffer_.get(); } private: // Readahead size used in compaction, its value is used only if // lookup_context_.caller = kCompaction. size_t compaction_readahead_size_; size_t readahead_size_ = BlockBasedTable::kInitAutoReadaheadSize; size_t readahead_limit_ = 0; int64_t num_file_reads_ = 0; std::unique_ptr prefetch_buffer_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_prefix_index.cc000066400000000000000000000174161370372246700227420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/block_based/block_prefix_index.h" #include #include "memory/arena.h" #include "rocksdb/comparator.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "util/coding.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { inline uint32_t Hash(const Slice& s) { return ROCKSDB_NAMESPACE::Hash(s.data(), s.size(), 0); } inline uint32_t PrefixToBucket(const Slice& prefix, uint32_t num_buckets) { return Hash(prefix) % num_buckets; } // The prefix block index is simply a bucket array, with each entry pointing to // the blocks that span the prefixes hashed to this bucket. // // To reduce memory footprint, if there is only one block per bucket, the entry // stores the block id directly. If there are more than one blocks per bucket, // because of hash collision or a single prefix spanning multiple blocks, // the entry points to an array of block ids. The block array is an array of // uint32_t's. The first uint32_t indicates the total number of blocks, followed // by the block ids. // // To differentiate the two cases, the high order bit of the entry indicates // whether it is a 'pointer' into a separate block array. // 0x7FFFFFFF is reserved for empty bucket. const uint32_t kNoneBlock = 0x7FFFFFFF; const uint32_t kBlockArrayMask = 0x80000000; inline bool IsNone(uint32_t block_id) { return block_id == kNoneBlock; } inline bool IsBlockId(uint32_t block_id) { return (block_id & kBlockArrayMask) == 0; } inline uint32_t DecodeIndex(uint32_t block_id) { uint32_t index = block_id ^ kBlockArrayMask; assert(index < kBlockArrayMask); return index; } inline uint32_t EncodeIndex(uint32_t index) { assert(index < kBlockArrayMask); return index | kBlockArrayMask; } // temporary storage for prefix information during index building struct PrefixRecord { Slice prefix; uint32_t start_block; uint32_t end_block; uint32_t num_blocks; PrefixRecord* next; }; class BlockPrefixIndex::Builder { public: explicit Builder(const SliceTransform* internal_prefix_extractor) : internal_prefix_extractor_(internal_prefix_extractor) {} void Add(const Slice& key_prefix, uint32_t start_block, uint32_t num_blocks) { PrefixRecord* record = reinterpret_cast( arena_.AllocateAligned(sizeof(PrefixRecord))); record->prefix = key_prefix; record->start_block = start_block; record->end_block = start_block + num_blocks - 1; record->num_blocks = num_blocks; prefixes_.push_back(record); } BlockPrefixIndex* Finish() { // For now, use roughly 1:1 prefix to bucket ratio. uint32_t num_buckets = static_cast(prefixes_.size()) + 1; // Collect prefix records that hash to the same bucket, into a single // linklist. std::vector prefixes_per_bucket(num_buckets, nullptr); std::vector num_blocks_per_bucket(num_buckets, 0); for (PrefixRecord* current : prefixes_) { uint32_t bucket = PrefixToBucket(current->prefix, num_buckets); // merge the prefix block span if the first block of this prefix is // connected to the last block of the previous prefix. PrefixRecord* prev = prefixes_per_bucket[bucket]; if (prev) { assert(current->start_block >= prev->end_block); auto distance = current->start_block - prev->end_block; if (distance <= 1) { prev->end_block = current->end_block; prev->num_blocks = prev->end_block - prev->start_block + 1; num_blocks_per_bucket[bucket] += (current->num_blocks + distance - 1); continue; } } current->next = prev; prefixes_per_bucket[bucket] = current; num_blocks_per_bucket[bucket] += current->num_blocks; } // Calculate the block array buffer size uint32_t total_block_array_entries = 0; for (uint32_t i = 0; i < num_buckets; i++) { uint32_t num_blocks = num_blocks_per_bucket[i]; if (num_blocks > 1) { total_block_array_entries += (num_blocks + 1); } } // Populate the final prefix block index uint32_t* block_array_buffer = new uint32_t[total_block_array_entries]; uint32_t* buckets = new uint32_t[num_buckets]; uint32_t offset = 0; for (uint32_t i = 0; i < num_buckets; i++) { uint32_t num_blocks = num_blocks_per_bucket[i]; if (num_blocks == 0) { assert(prefixes_per_bucket[i] == nullptr); buckets[i] = kNoneBlock; } else if (num_blocks == 1) { assert(prefixes_per_bucket[i] != nullptr); assert(prefixes_per_bucket[i]->next == nullptr); buckets[i] = prefixes_per_bucket[i]->start_block; } else { assert(total_block_array_entries > 0); assert(prefixes_per_bucket[i] != nullptr); buckets[i] = EncodeIndex(offset); block_array_buffer[offset] = num_blocks; uint32_t* last_block = &block_array_buffer[offset + num_blocks]; auto current = prefixes_per_bucket[i]; // populate block ids from largest to smallest while (current != nullptr) { for (uint32_t iter = 0; iter < current->num_blocks; iter++) { *last_block = current->end_block - iter; last_block--; } current = current->next; } assert(last_block == &block_array_buffer[offset]); offset += (num_blocks + 1); } } assert(offset == total_block_array_entries); return new BlockPrefixIndex(internal_prefix_extractor_, num_buckets, buckets, total_block_array_entries, block_array_buffer); } private: const SliceTransform* internal_prefix_extractor_; std::vector prefixes_; Arena arena_; }; Status BlockPrefixIndex::Create(const SliceTransform* internal_prefix_extractor, const Slice& prefixes, const Slice& prefix_meta, BlockPrefixIndex** prefix_index) { uint64_t pos = 0; auto meta_pos = prefix_meta; Status s; Builder builder(internal_prefix_extractor); while (!meta_pos.empty()) { uint32_t prefix_size = 0; uint32_t entry_index = 0; uint32_t num_blocks = 0; if (!GetVarint32(&meta_pos, &prefix_size) || !GetVarint32(&meta_pos, &entry_index) || !GetVarint32(&meta_pos, &num_blocks)) { s = Status::Corruption( "Corrupted prefix meta block: unable to read from it."); break; } if (pos + prefix_size > prefixes.size()) { s = Status::Corruption( "Corrupted prefix meta block: size inconsistency."); break; } Slice prefix(prefixes.data() + pos, prefix_size); builder.Add(prefix, entry_index, num_blocks); pos += prefix_size; } if (s.ok() && pos != prefixes.size()) { s = Status::Corruption("Corrupted prefix meta block"); } if (s.ok()) { *prefix_index = builder.Finish(); } return s; } uint32_t BlockPrefixIndex::GetBlocks(const Slice& key, uint32_t** blocks) { Slice prefix = internal_prefix_extractor_->Transform(key); uint32_t bucket = PrefixToBucket(prefix, num_buckets_); uint32_t block_id = buckets_[bucket]; if (IsNone(block_id)) { return 0; } else if (IsBlockId(block_id)) { *blocks = &buckets_[bucket]; return 1; } else { uint32_t index = DecodeIndex(block_id); assert(index < num_block_array_buffer_entries_); *blocks = &block_array_buffer_[index + 1]; uint32_t num_blocks = block_array_buffer_[index]; assert(num_blocks > 1); assert(index + num_blocks < num_block_array_buffer_entries_); return num_blocks; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_prefix_index.h000066400000000000000000000043151370372246700225760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { class Comparator; class Iterator; class Slice; class SliceTransform; // Build a hash-based index to speed up the lookup for "index block". // BlockHashIndex accepts a key and, if found, returns its restart index within // that index block. class BlockPrefixIndex { public: // Maps a key to a list of data blocks that could potentially contain // the key, based on the prefix. // Returns the total number of relevant blocks, 0 means the key does // not exist. uint32_t GetBlocks(const Slice& key, uint32_t** blocks); size_t ApproximateMemoryUsage() const { return sizeof(BlockPrefixIndex) + (num_block_array_buffer_entries_ + num_buckets_) * sizeof(uint32_t); } // Create hash index by reading from the metadata blocks. // @params prefixes: a sequence of prefixes. // @params prefix_meta: contains the "metadata" to of the prefixes. static Status Create(const SliceTransform* hash_key_extractor, const Slice& prefixes, const Slice& prefix_meta, BlockPrefixIndex** prefix_index); ~BlockPrefixIndex() { delete[] buckets_; delete[] block_array_buffer_; } private: class Builder; friend Builder; BlockPrefixIndex(const SliceTransform* internal_prefix_extractor, uint32_t num_buckets, uint32_t* buckets, uint32_t num_block_array_buffer_entries, uint32_t* block_array_buffer) : internal_prefix_extractor_(internal_prefix_extractor), num_buckets_(num_buckets), num_block_array_buffer_entries_(num_block_array_buffer_entries), buckets_(buckets), block_array_buffer_(block_array_buffer) {} const SliceTransform* internal_prefix_extractor_; uint32_t num_buckets_; uint32_t num_block_array_buffer_entries_; uint32_t* buckets_; uint32_t* block_array_buffer_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/block_test.cc000066400000000000000000000525211370372246700212310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include #include #include #include #include #include #include #include "db/dbformat.h" #include "db/memtable.h" #include "db/write_batch_internal.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/block_based/block_builder.h" #include "table/format.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { static std::string RandomString(Random *rnd, int len) { std::string r; test::RandomString(rnd, len, &r); return r; } std::string GenerateKey(int primary_key, int secondary_key, int padding_size, Random *rnd) { char buf[50]; char *p = &buf[0]; snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); std::string k(p); if (padding_size) { k += RandomString(rnd, padding_size); } return k; } // Generate random key value pairs. // The generated key will be sorted. You can tune the parameters to generated // different kinds of test key/value pairs for different scenario. void GenerateRandomKVs(std::vector *keys, std::vector *values, const int from, const int len, const int step = 1, const int padding_size = 0, const int keys_share_prefix = 1) { Random rnd(302); // generate different prefix for (int i = from; i < from + len; i += step) { // generating keys that shares the prefix for (int j = 0; j < keys_share_prefix; ++j) { keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); // 100 bytes values values->emplace_back(RandomString(&rnd, 100)); } } } class BlockTest : public testing::Test {}; // block test TEST_F(BlockTest, SimpleTest) { Random rnd(301); Options options = Options(); std::vector keys; std::vector values; BlockBuilder builder(16); int num_records = 100000; GenerateRandomKVs(&keys, &values, 0, num_records); // add a bunch of records to a block for (int i = 0; i < num_records; i++) { builder.Add(keys[i], values[i]); } // read serialized contents of the block Slice rawblock = builder.Finish(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); // read contents of block sequentially int count = 0; InternalIterator *iter = reader.NewDataIterator( options.comparator, options.comparator, kDisableGlobalSequenceNumber); for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) { // read kv from block Slice k = iter->key(); Slice v = iter->value(); // compare with lookaside array ASSERT_EQ(k.ToString().compare(keys[count]), 0); ASSERT_EQ(v.ToString().compare(values[count]), 0); } delete iter; // read block contents randomly iter = reader.NewDataIterator(options.comparator, options.comparator, kDisableGlobalSequenceNumber); for (int i = 0; i < num_records; i++) { // find a random key in the lookaside array int index = rnd.Uniform(num_records); Slice k(keys[index]); // search in block for this key iter->Seek(k); ASSERT_TRUE(iter->Valid()); Slice v = iter->value(); ASSERT_EQ(v.ToString().compare(values[index]), 0); } delete iter; } // return the block contents BlockContents GetBlockContents(std::unique_ptr *builder, const std::vector &keys, const std::vector &values, const int /*prefix_group_size*/ = 1) { builder->reset(new BlockBuilder(1 /* restart interval */)); // Add only half of the keys for (size_t i = 0; i < keys.size(); ++i) { (*builder)->Add(keys[i], values[i]); } Slice rawblock = (*builder)->Finish(); BlockContents contents; contents.data = rawblock; return contents; } void CheckBlockContents(BlockContents contents, const int max_key, const std::vector &keys, const std::vector &values) { const size_t prefix_size = 6; // create block reader BlockContents contents_ref(contents.data); Block reader1(std::move(contents)); Block reader2(std::move(contents_ref)); std::unique_ptr prefix_extractor( NewFixedPrefixTransform(prefix_size)); std::unique_ptr regular_iter( reader2.NewDataIterator(BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber)); // Seek existent keys for (size_t i = 0; i < keys.size(); i++) { regular_iter->Seek(keys[i]); ASSERT_OK(regular_iter->status()); ASSERT_TRUE(regular_iter->Valid()); Slice v = regular_iter->value(); ASSERT_EQ(v.ToString().compare(values[i]), 0); } // Seek non-existent keys. // For hash index, if no key with a given prefix is not found, iterator will // simply be set as invalid; whereas the binary search based iterator will // return the one that is closest. for (int i = 1; i < max_key - 1; i += 2) { auto key = GenerateKey(i, 0, 0, nullptr); regular_iter->Seek(key); ASSERT_TRUE(regular_iter->Valid()); } } // In this test case, no two key share same prefix. TEST_F(BlockTest, SimpleIndexHash) { const int kMaxKey = 100000; std::vector keys; std::vector values; GenerateRandomKVs(&keys, &values, 0 /* first key id */, kMaxKey /* last key id */, 2 /* step */, 8 /* padding size (8 bytes randomly generated suffix) */); std::unique_ptr builder; auto contents = GetBlockContents(&builder, keys, values); CheckBlockContents(std::move(contents), kMaxKey, keys, values); } TEST_F(BlockTest, IndexHashWithSharedPrefix) { const int kMaxKey = 100000; // for each prefix, there will be 5 keys starts with it. const int kPrefixGroup = 5; std::vector keys; std::vector values; // Generate keys with same prefix. GenerateRandomKVs(&keys, &values, 0, // first key id kMaxKey, // last key id 2, // step 10, // padding size, kPrefixGroup); std::unique_ptr builder; auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup); CheckBlockContents(std::move(contents), kMaxKey, keys, values); } // A slow and accurate version of BlockReadAmpBitmap that simply store // all the marked ranges in a set. class BlockReadAmpBitmapSlowAndAccurate { public: void Mark(size_t start_offset, size_t end_offset) { assert(end_offset >= start_offset); marked_ranges_.emplace(end_offset, start_offset); } void ResetCheckSequence() { iter_valid_ = false; } // Return true if any byte in this range was Marked // This does linear search from the previous position. When calling // multiple times, `offset` needs to be incremental to get correct results. // Call ResetCheckSequence() to reset it. bool IsPinMarked(size_t offset) { if (iter_valid_) { // Has existing iterator, try linear search from // the iterator. for (int i = 0; i < 64; i++) { if (offset < iter_->second) { return false; } if (offset <= iter_->first) { return true; } iter_++; if (iter_ == marked_ranges_.end()) { iter_valid_ = false; return false; } } } // Initial call or have linear searched too many times. // Do binary search. iter_ = marked_ranges_.lower_bound( std::make_pair(offset, static_cast(0))); if (iter_ == marked_ranges_.end()) { iter_valid_ = false; return false; } iter_valid_ = true; return offset <= iter_->first && offset >= iter_->second; } private: std::set> marked_ranges_; std::set>::iterator iter_; bool iter_valid_ = false; }; TEST_F(BlockTest, BlockReadAmpBitmap) { uint32_t pin_offset = 0; SyncPoint::GetInstance()->SetCallBack( "BlockReadAmpBitmap:rnd", [&pin_offset](void *arg) { pin_offset = *(static_cast(arg)); }); SyncPoint::GetInstance()->EnableProcessing(); std::vector block_sizes = { 1, // 1 byte 32, // 32 bytes 61, // 61 bytes 64, // 64 bytes 512, // 0.5 KB 1024, // 1 KB 1024 * 4, // 4 KB 1024 * 10, // 10 KB 1024 * 50, // 50 KB 1024 * 1024 * 4, // 5 MB 777, 124653, }; const size_t kBytesPerBit = 64; Random rnd(301); for (size_t block_size : block_sizes) { std::shared_ptr stats = ROCKSDB_NAMESPACE::CreateDBStatistics(); BlockReadAmpBitmap read_amp_bitmap(block_size, kBytesPerBit, stats.get()); BlockReadAmpBitmapSlowAndAccurate read_amp_slow_and_accurate; size_t needed_bits = (block_size / kBytesPerBit); if (block_size % kBytesPerBit != 0) { needed_bits++; } ASSERT_EQ(stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES), block_size); // Generate some random entries std::vector random_entry_offsets; for (int i = 0; i < 1000; i++) { random_entry_offsets.push_back(rnd.Next() % block_size); } std::sort(random_entry_offsets.begin(), random_entry_offsets.end()); auto it = std::unique(random_entry_offsets.begin(), random_entry_offsets.end()); random_entry_offsets.resize( std::distance(random_entry_offsets.begin(), it)); std::vector> random_entries; for (size_t i = 0; i < random_entry_offsets.size(); i++) { size_t entry_start = random_entry_offsets[i]; size_t entry_end; if (i + 1 < random_entry_offsets.size()) { entry_end = random_entry_offsets[i + 1] - 1; } else { entry_end = block_size - 1; } random_entries.emplace_back(entry_start, entry_end); } for (size_t i = 0; i < random_entries.size(); i++) { read_amp_slow_and_accurate.ResetCheckSequence(); auto ¤t_entry = random_entries[rnd.Next() % random_entries.size()]; read_amp_bitmap.Mark(static_cast(current_entry.first), static_cast(current_entry.second)); read_amp_slow_and_accurate.Mark(current_entry.first, current_entry.second); size_t total_bits = 0; for (size_t bit_idx = 0; bit_idx < needed_bits; bit_idx++) { total_bits += read_amp_slow_and_accurate.IsPinMarked( bit_idx * kBytesPerBit + pin_offset); } size_t expected_estimate_useful = total_bits * kBytesPerBit; size_t got_estimate_useful = stats->getTickerCount(READ_AMP_ESTIMATE_USEFUL_BYTES); ASSERT_EQ(expected_estimate_useful, got_estimate_useful); } } SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } TEST_F(BlockTest, BlockWithReadAmpBitmap) { Random rnd(301); Options options = Options(); std::vector keys; std::vector values; BlockBuilder builder(16); int num_records = 10000; GenerateRandomKVs(&keys, &values, 0, num_records, 1); // add a bunch of records to a block for (int i = 0; i < num_records; i++) { builder.Add(keys[i], values[i]); } Slice rawblock = builder.Finish(); const size_t kBytesPerBit = 8; // Read the block sequentially using Next() { std::shared_ptr stats = ROCKSDB_NAMESPACE::CreateDBStatistics(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents), kBytesPerBit, stats.get()); // read contents of block sequentially size_t read_bytes = 0; DataBlockIter *iter = reader.NewDataIterator( options.comparator, options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get()); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { iter->value(); read_bytes += iter->TEST_CurrentEntrySize(); double semi_acc_read_amp = static_cast(read_bytes) / rawblock.size(); double read_amp = static_cast(stats->getTickerCount( READ_AMP_ESTIMATE_USEFUL_BYTES)) / stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES); // Error in read amplification will be less than 1% if we are reading // sequentially double error_pct = fabs(semi_acc_read_amp - read_amp) * 100; EXPECT_LT(error_pct, 1); } delete iter; } // Read the block sequentially using Seek() { std::shared_ptr stats = ROCKSDB_NAMESPACE::CreateDBStatistics(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents), kBytesPerBit, stats.get()); size_t read_bytes = 0; DataBlockIter *iter = reader.NewDataIterator( options.comparator, options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get()); for (int i = 0; i < num_records; i++) { Slice k(keys[i]); // search in block for this key iter->Seek(k); iter->value(); read_bytes += iter->TEST_CurrentEntrySize(); double semi_acc_read_amp = static_cast(read_bytes) / rawblock.size(); double read_amp = static_cast(stats->getTickerCount( READ_AMP_ESTIMATE_USEFUL_BYTES)) / stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES); // Error in read amplification will be less than 1% if we are reading // sequentially double error_pct = fabs(semi_acc_read_amp - read_amp) * 100; EXPECT_LT(error_pct, 1); } delete iter; } // Read the block randomly { std::shared_ptr stats = ROCKSDB_NAMESPACE::CreateDBStatistics(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents), kBytesPerBit, stats.get()); size_t read_bytes = 0; DataBlockIter *iter = reader.NewDataIterator( options.comparator, options.comparator, kDisableGlobalSequenceNumber, nullptr, stats.get()); std::unordered_set read_keys; for (int i = 0; i < num_records; i++) { int index = rnd.Uniform(num_records); Slice k(keys[index]); iter->Seek(k); iter->value(); if (read_keys.find(index) == read_keys.end()) { read_keys.insert(index); read_bytes += iter->TEST_CurrentEntrySize(); } double semi_acc_read_amp = static_cast(read_bytes) / rawblock.size(); double read_amp = static_cast(stats->getTickerCount( READ_AMP_ESTIMATE_USEFUL_BYTES)) / stats->getTickerCount(READ_AMP_TOTAL_READ_BYTES); double error_pct = fabs(semi_acc_read_amp - read_amp) * 100; // Error in read amplification will be less than 2% if we are reading // randomly EXPECT_LT(error_pct, 2); } delete iter; } } TEST_F(BlockTest, ReadAmpBitmapPow2) { std::shared_ptr stats = ROCKSDB_NAMESPACE::CreateDBStatistics(); ASSERT_EQ(BlockReadAmpBitmap(100, 1, stats.get()).GetBytesPerBit(), 1u); ASSERT_EQ(BlockReadAmpBitmap(100, 2, stats.get()).GetBytesPerBit(), 2u); ASSERT_EQ(BlockReadAmpBitmap(100, 4, stats.get()).GetBytesPerBit(), 4u); ASSERT_EQ(BlockReadAmpBitmap(100, 8, stats.get()).GetBytesPerBit(), 8u); ASSERT_EQ(BlockReadAmpBitmap(100, 16, stats.get()).GetBytesPerBit(), 16u); ASSERT_EQ(BlockReadAmpBitmap(100, 32, stats.get()).GetBytesPerBit(), 32u); ASSERT_EQ(BlockReadAmpBitmap(100, 3, stats.get()).GetBytesPerBit(), 2u); ASSERT_EQ(BlockReadAmpBitmap(100, 7, stats.get()).GetBytesPerBit(), 4u); ASSERT_EQ(BlockReadAmpBitmap(100, 11, stats.get()).GetBytesPerBit(), 8u); ASSERT_EQ(BlockReadAmpBitmap(100, 17, stats.get()).GetBytesPerBit(), 16u); ASSERT_EQ(BlockReadAmpBitmap(100, 33, stats.get()).GetBytesPerBit(), 32u); ASSERT_EQ(BlockReadAmpBitmap(100, 35, stats.get()).GetBytesPerBit(), 32u); } class IndexBlockTest : public testing::Test, public testing::WithParamInterface> { public: IndexBlockTest() = default; bool useValueDeltaEncoding() const { return std::get<0>(GetParam()); } bool includeFirstKey() const { return std::get<1>(GetParam()); } }; // Similar to GenerateRandomKVs but for index block contents. void GenerateRandomIndexEntries(std::vector *separators, std::vector *block_handles, std::vector *first_keys, const int len) { Random rnd(42); // For each of `len` blocks, we need to generate a first and last key. // Let's generate n*2 random keys, sort them, group into consecutive pairs. std::set keys; while ((int)keys.size() < len * 2) { // Keys need to be at least 8 bytes long to look like internal keys. keys.insert(test::RandomKey(&rnd, 12)); } uint64_t offset = 0; for (auto it = keys.begin(); it != keys.end();) { first_keys->emplace_back(*it++); separators->emplace_back(*it++); uint64_t size = rnd.Uniform(1024 * 16); BlockHandle handle(offset, size); offset += size + kBlockTrailerSize; block_handles->emplace_back(handle); } } TEST_P(IndexBlockTest, IndexValueEncodingTest) { Random rnd(301); Options options = Options(); std::vector separators; std::vector block_handles; std::vector first_keys; const bool kUseDeltaEncoding = true; BlockBuilder builder(16, kUseDeltaEncoding, useValueDeltaEncoding()); int num_records = 100; GenerateRandomIndexEntries(&separators, &block_handles, &first_keys, num_records); BlockHandle last_encoded_handle; for (int i = 0; i < num_records; i++) { IndexValue entry(block_handles[i], first_keys[i]); std::string encoded_entry; std::string delta_encoded_entry; entry.EncodeTo(&encoded_entry, includeFirstKey(), nullptr); if (useValueDeltaEncoding() && i > 0) { entry.EncodeTo(&delta_encoded_entry, includeFirstKey(), &last_encoded_handle); } last_encoded_handle = entry.handle; const Slice delta_encoded_entry_slice(delta_encoded_entry); builder.Add(separators[i], encoded_entry, &delta_encoded_entry_slice); } // read serialized contents of the block Slice rawblock = builder.Finish(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); const bool kTotalOrderSeek = true; const bool kIncludesSeq = true; const bool kValueIsFull = !useValueDeltaEncoding(); IndexBlockIter *kNullIter = nullptr; Statistics *kNullStats = nullptr; // read contents of block sequentially InternalIteratorBase *iter = reader.NewIndexIterator( options.comparator, options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats, kTotalOrderSeek, includeFirstKey(), kIncludesSeq, kValueIsFull); iter->SeekToFirst(); for (int index = 0; index < num_records; ++index) { ASSERT_TRUE(iter->Valid()); Slice k = iter->key(); IndexValue v = iter->value(); EXPECT_EQ(separators[index], k.ToString()); EXPECT_EQ(block_handles[index].offset(), v.handle.offset()); EXPECT_EQ(block_handles[index].size(), v.handle.size()); EXPECT_EQ(includeFirstKey() ? first_keys[index] : "", v.first_internal_key.ToString()); iter->Next(); } delete iter; // read block contents randomly iter = reader.NewIndexIterator(options.comparator, options.comparator, kDisableGlobalSequenceNumber, kNullIter, kNullStats, kTotalOrderSeek, includeFirstKey(), kIncludesSeq, kValueIsFull); for (int i = 0; i < num_records * 2; i++) { // find a random key in the lookaside array int index = rnd.Uniform(num_records); Slice k(separators[index]); // search in block for this key iter->Seek(k); ASSERT_TRUE(iter->Valid()); IndexValue v = iter->value(); EXPECT_EQ(separators[index], iter->key().ToString()); EXPECT_EQ(block_handles[index].offset(), v.handle.offset()); EXPECT_EQ(block_handles[index].size(), v.handle.size()); EXPECT_EQ(includeFirstKey() ? first_keys[index] : "", v.first_internal_key.ToString()); } delete iter; } INSTANTIATE_TEST_CASE_P(P, IndexBlockTest, ::testing::Values(std::make_tuple(false, false), std::make_tuple(false, true), std::make_tuple(true, false), std::make_tuple(true, true))); } // namespace ROCKSDB_NAMESPACE int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/block_based/block_type.h000066400000000000000000000014321370372246700210700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include namespace ROCKSDB_NAMESPACE { // Represents the types of blocks used in the block based table format. // See https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format // for details. enum class BlockType : uint8_t { kData, kFilter, kProperties, kCompressionDictionary, kRangeDeletion, kHashIndexPrefixes, kHashIndexMetadata, kMetaIndex, kIndex, // Note: keep kInvalid the last value when adding new enum values. kInvalid }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/cachable_entry.h000066400000000000000000000140101370372246700216740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "port/likely.h" #include "rocksdb/cache.h" #include "rocksdb/cleanable.h" namespace ROCKSDB_NAMESPACE { // CachableEntry is a handle to an object that may or may not be in the block // cache. It is used in a variety of ways: // // 1) It may refer to an object in the block cache. In this case, cache_ and // cache_handle_ are not nullptr, and the cache handle has to be released when // the CachableEntry is destroyed (the lifecycle of the cached object, on the // other hand, is managed by the cache itself). // 2) It may uniquely own the (non-cached) object it refers to (examples include // a block read directly from file, or uncompressed blocks when there is a // compressed block cache but no uncompressed block cache). In such cases, the // object has to be destroyed when the CachableEntry is destroyed. // 3) It may point to an object (cached or not) without owning it. In this case, // no action is needed when the CachableEntry is destroyed. // 4) Sometimes, management of a cached or owned object (see #1 and #2 above) // is transferred to some other object. This is used for instance with iterators // (where cleanup is performed using a chain of cleanup functions, // see Cleanable). // // Because of #1 and #2 above, copying a CachableEntry is not safe (and thus not // allowed); hence, this is a move-only type, where a move transfers the // management responsibilities, and leaves the source object in an empty state. template class CachableEntry { public: CachableEntry() = default; CachableEntry(T* value, Cache* cache, Cache::Handle* cache_handle, bool own_value) : value_(value) , cache_(cache) , cache_handle_(cache_handle) , own_value_(own_value) { assert(value_ != nullptr || (cache_ == nullptr && cache_handle_ == nullptr && !own_value_)); assert(!!cache_ == !!cache_handle_); assert(!cache_handle_ || !own_value_); } CachableEntry(const CachableEntry&) = delete; CachableEntry& operator=(const CachableEntry&) = delete; CachableEntry(CachableEntry&& rhs) : value_(rhs.value_) , cache_(rhs.cache_) , cache_handle_(rhs.cache_handle_) , own_value_(rhs.own_value_) { assert(value_ != nullptr || (cache_ == nullptr && cache_handle_ == nullptr && !own_value_)); assert(!!cache_ == !!cache_handle_); assert(!cache_handle_ || !own_value_); rhs.ResetFields(); } CachableEntry& operator=(CachableEntry&& rhs) { if (UNLIKELY(this == &rhs)) { return *this; } ReleaseResource(); value_ = rhs.value_; cache_ = rhs.cache_; cache_handle_ = rhs.cache_handle_; own_value_ = rhs.own_value_; assert(value_ != nullptr || (cache_ == nullptr && cache_handle_ == nullptr && !own_value_)); assert(!!cache_ == !!cache_handle_); assert(!cache_handle_ || !own_value_); rhs.ResetFields(); return *this; } ~CachableEntry() { ReleaseResource(); } bool IsEmpty() const { return value_ == nullptr && cache_ == nullptr && cache_handle_ == nullptr && !own_value_; } bool IsCached() const { assert(!!cache_ == !!cache_handle_); return cache_handle_ != nullptr; } T* GetValue() const { return value_; } Cache* GetCache() const { return cache_; } Cache::Handle* GetCacheHandle() const { return cache_handle_; } bool GetOwnValue() const { return own_value_; } void Reset() { ReleaseResource(); ResetFields(); } void TransferTo(Cleanable* cleanable) { if (cleanable) { if (cache_handle_ != nullptr) { assert(cache_ != nullptr); cleanable->RegisterCleanup(&ReleaseCacheHandle, cache_, cache_handle_); } else if (own_value_) { cleanable->RegisterCleanup(&DeleteValue, value_, nullptr); } } ResetFields(); } void SetOwnedValue(T* value) { assert(value != nullptr); if (UNLIKELY(value_ == value && own_value_)) { assert(cache_ == nullptr && cache_handle_ == nullptr); return; } Reset(); value_ = value; own_value_ = true; } void SetUnownedValue(T* value) { assert(value != nullptr); if (UNLIKELY(value_ == value && cache_ == nullptr && cache_handle_ == nullptr && !own_value_)) { return; } Reset(); value_ = value; assert(!own_value_); } void SetCachedValue(T* value, Cache* cache, Cache::Handle* cache_handle) { assert(value != nullptr); assert(cache != nullptr); assert(cache_handle != nullptr); if (UNLIKELY(value_ == value && cache_ == cache && cache_handle_ == cache_handle && !own_value_)) { return; } Reset(); value_ = value; cache_ = cache; cache_handle_ = cache_handle; assert(!own_value_); } private: void ReleaseResource() { if (LIKELY(cache_handle_ != nullptr)) { assert(cache_ != nullptr); cache_->Release(cache_handle_); } else if (own_value_) { delete value_; } } void ResetFields() { value_ = nullptr; cache_ = nullptr; cache_handle_ = nullptr; own_value_ = false; } static void ReleaseCacheHandle(void* arg1, void* arg2) { Cache* const cache = static_cast(arg1); assert(cache); Cache::Handle* const cache_handle = static_cast(arg2); assert(cache_handle); cache->Release(cache_handle); } static void DeleteValue(void* arg1, void* /* arg2 */) { delete static_cast(arg1); } private: T* value_ = nullptr; Cache* cache_ = nullptr; Cache::Handle* cache_handle_ = nullptr; bool own_value_ = false; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/data_block_footer.cc000066400000000000000000000035311370372246700225360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/data_block_footer.h" #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { const int kDataBlockIndexTypeBitShift = 31; // 0x7FFFFFFF const uint32_t kMaxNumRestarts = (1u << kDataBlockIndexTypeBitShift) - 1u; // 0x7FFFFFFF const uint32_t kNumRestartsMask = (1u << kDataBlockIndexTypeBitShift) - 1u; uint32_t PackIndexTypeAndNumRestarts( BlockBasedTableOptions::DataBlockIndexType index_type, uint32_t num_restarts) { if (num_restarts > kMaxNumRestarts) { assert(0); // mute travis "unused" warning } uint32_t block_footer = num_restarts; if (index_type == BlockBasedTableOptions::kDataBlockBinaryAndHash) { block_footer |= 1u << kDataBlockIndexTypeBitShift; } else if (index_type != BlockBasedTableOptions::kDataBlockBinarySearch) { assert(0); } return block_footer; } void UnPackIndexTypeAndNumRestarts( uint32_t block_footer, BlockBasedTableOptions::DataBlockIndexType* index_type, uint32_t* num_restarts) { if (index_type) { if (block_footer & 1u << kDataBlockIndexTypeBitShift) { *index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash; } else { *index_type = BlockBasedTableOptions::kDataBlockBinarySearch; } } if (num_restarts) { *num_restarts = block_footer & kNumRestartsMask; assert(*num_restarts <= kMaxNumRestarts); } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/data_block_footer.h000066400000000000000000000015441370372246700224020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { uint32_t PackIndexTypeAndNumRestarts( BlockBasedTableOptions::DataBlockIndexType index_type, uint32_t num_restarts); void UnPackIndexTypeAndNumRestarts( uint32_t block_footer, BlockBasedTableOptions::DataBlockIndexType* index_type, uint32_t* num_restarts); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/data_block_hash_index.cc000066400000000000000000000062121370372246700233510ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include "rocksdb/slice.h" #include "table/block_based/data_block_hash_index.h" #include "util/coding.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { void DataBlockHashIndexBuilder::Add(const Slice& key, const size_t restart_index) { assert(Valid()); if (restart_index > kMaxRestartSupportedByHashIndex) { valid_ = false; return; } uint32_t hash_value = GetSliceHash(key); hash_and_restart_pairs_.emplace_back(hash_value, static_cast(restart_index)); estimated_num_buckets_ += bucket_per_key_; } void DataBlockHashIndexBuilder::Finish(std::string& buffer) { assert(Valid()); uint16_t num_buckets = static_cast(estimated_num_buckets_); if (num_buckets == 0) { num_buckets = 1; // sanity check } // The build-in hash cannot well distribute strings when into different // buckets when num_buckets is power of two, resulting in high hash // collision. // We made the num_buckets to be odd to avoid this issue. num_buckets |= 1; std::vector buckets(num_buckets, kNoEntry); // write the restart_index array for (auto& entry : hash_and_restart_pairs_) { uint32_t hash_value = entry.first; uint8_t restart_index = entry.second; uint16_t buck_idx = static_cast(hash_value % num_buckets); if (buckets[buck_idx] == kNoEntry) { buckets[buck_idx] = restart_index; } else if (buckets[buck_idx] != restart_index) { // same bucket cannot store two different restart_index, mark collision buckets[buck_idx] = kCollision; } } for (uint8_t restart_index : buckets) { buffer.append( const_cast(reinterpret_cast(&restart_index)), sizeof(restart_index)); } // write NUM_BUCK PutFixed16(&buffer, num_buckets); assert(buffer.size() <= kMaxBlockSizeSupportedByHashIndex); } void DataBlockHashIndexBuilder::Reset() { estimated_num_buckets_ = 0; valid_ = true; hash_and_restart_pairs_.clear(); } void DataBlockHashIndex::Initialize(const char* data, uint16_t size, uint16_t* map_offset) { assert(size >= sizeof(uint16_t)); // NUM_BUCKETS num_buckets_ = DecodeFixed16(data + size - sizeof(uint16_t)); assert(num_buckets_ > 0); assert(size > num_buckets_ * sizeof(uint8_t)); *map_offset = static_cast(size - sizeof(uint16_t) - num_buckets_ * sizeof(uint8_t)); } uint8_t DataBlockHashIndex::Lookup(const char* data, uint32_t map_offset, const Slice& key) const { uint32_t hash_value = GetSliceHash(key); uint16_t idx = static_cast(hash_value % num_buckets_); const char* bucket_table = data + map_offset; return static_cast(*(bucket_table + idx * sizeof(uint8_t))); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/data_block_hash_index.h000066400000000000000000000117261370372246700232210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { // This is an experimental feature aiming to reduce the CPU utilization of // point-lookup within a data-block. It is only used in data blocks, and not // in meta-data blocks or per-table index blocks. // // It only used to support BlockBasedTable::Get(). // // A serialized hash index is appended to the data-block. The new block data // format is as follows: // // DATA_BLOCK: [RI RI RI ... RI RI_IDX HASH_IDX FOOTER] // // RI: Restart Interval (the same as the default data-block format) // RI_IDX: Restart Interval index (the same as the default data-block format) // HASH_IDX: The new data-block hash index feature. // FOOTER: A 32bit block footer, which is the NUM_RESTARTS with the MSB as // the flag indicating if this hash index is in use. Note that // given a data block < 32KB, the MSB is never used. So we can // borrow the MSB as the hash index flag. Therefore, this format is // compatible with the legacy data-blocks with num_restarts < 32768, // as the MSB is 0. // // The format of the data-block hash index is as follows: // // HASH_IDX: [B B B ... B NUM_BUCK] // // B: bucket, an array of restart index. Each buckets is uint8_t. // NUM_BUCK: Number of buckets, which is the length of the bucket array. // // We reserve two special flag: // kNoEntry=255, // kCollision=254. // // Therefore, the max number of restarts this hash index can supoport is 253. // // Buckets are initialized to be kNoEntry. // // When storing a key in the hash index, the key is first hashed to a bucket. // If there the bucket is empty (kNoEntry), the restart index is stored in // the bucket. If there is already a restart index there, we will update the // existing restart index to a collision marker (kCollision). If the // the bucket is already marked as collision, we do not store the restart // index either. // // During query process, a key is first hashed to a bucket. Then we examine if // the buckets store nothing (kNoEntry) or the bucket had a collision // (kCollision). If either of those happens, we get the restart index of // the key and will directly go to the restart interval to search the key. // // Note that we only support blocks with #restart_interval < 254. If a block // has more restart interval than that, hash index will not be create for it. const uint8_t kNoEntry = 255; const uint8_t kCollision = 254; const uint8_t kMaxRestartSupportedByHashIndex = 253; // Because we use uint16_t address, we only support block no more than 64KB const size_t kMaxBlockSizeSupportedByHashIndex = 1u << 16; const double kDefaultUtilRatio = 0.75; class DataBlockHashIndexBuilder { public: DataBlockHashIndexBuilder() : bucket_per_key_(-1 /*uninitialized marker*/), estimated_num_buckets_(0), valid_(false) {} void Initialize(double util_ratio) { if (util_ratio <= 0) { util_ratio = kDefaultUtilRatio; // sanity check } bucket_per_key_ = 1 / util_ratio; valid_ = true; } inline bool Valid() const { return valid_ && bucket_per_key_ > 0; } void Add(const Slice& key, const size_t restart_index); void Finish(std::string& buffer); void Reset(); inline size_t EstimateSize() const { uint16_t estimated_num_buckets = static_cast(estimated_num_buckets_); // Maching the num_buckets number in DataBlockHashIndexBuilder::Finish. estimated_num_buckets |= 1; return sizeof(uint16_t) + static_cast(estimated_num_buckets * sizeof(uint8_t)); } private: double bucket_per_key_; // is the multiplicative inverse of util_ratio_ double estimated_num_buckets_; // Now the only usage for `valid_` is to mark false when the inserted // restart_index is larger than supported. In this case HashIndex is not // appended to the block content. bool valid_; std::vector> hash_and_restart_pairs_; friend class DataBlockHashIndex_DataBlockHashTestSmall_Test; }; class DataBlockHashIndex { public: DataBlockHashIndex() : num_buckets_(0) {} void Initialize(const char* data, uint16_t size, uint16_t* map_offset); uint8_t Lookup(const char* data, uint32_t map_offset, const Slice& key) const; inline bool Valid() { return num_buckets_ != 0; } private: // To make the serialized hash index compact and to save the space overhead, // here all the data fields persisted in the block are in uint16 format. // We find that a uint16 is large enough to index every offset of a 64KiB // block. // So in other words, DataBlockHashIndex does not support block size equal // or greater then 64KiB. uint16_t num_buckets_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/data_block_hash_index_test.cc000066400000000000000000000567361370372246700244300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include "db/table_properties_collector.h" #include "rocksdb/slice.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "table/block_based/data_block_hash_index.h" #include "table/get_context.h" #include "table/table_builder.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { bool SearchForOffset(DataBlockHashIndex& index, const char* data, uint16_t map_offset, const Slice& key, uint8_t& restart_point) { uint8_t entry = index.Lookup(data, map_offset, key); if (entry == kCollision) { return true; } if (entry == kNoEntry) { return false; } return entry == restart_point; } // Random KV generator similer to block_test static std::string RandomString(Random* rnd, int len) { std::string r; test::RandomString(rnd, len, &r); return r; } std::string GenerateKey(int primary_key, int secondary_key, int padding_size, Random* rnd) { char buf[50]; char* p = &buf[0]; snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); std::string k(p); if (padding_size) { k += RandomString(rnd, padding_size); } return k; } // Generate random key value pairs. // The generated key will be sorted. You can tune the parameters to generated // different kinds of test key/value pairs for different scenario. void GenerateRandomKVs(std::vector* keys, std::vector* values, const int from, const int len, const int step = 1, const int padding_size = 0, const int keys_share_prefix = 1) { Random rnd(302); // generate different prefix for (int i = from; i < from + len; i += step) { // generating keys that shares the prefix for (int j = 0; j < keys_share_prefix; ++j) { keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); // 100 bytes values values->emplace_back(RandomString(&rnd, 100)); } } } TEST(DataBlockHashIndex, DataBlockHashTestSmall) { DataBlockHashIndexBuilder builder; builder.Initialize(0.75 /*util_ratio*/); for (int j = 0; j < 5; j++) { for (uint8_t i = 0; i < 2 + j; i++) { std::string key("key" + std::to_string(i)); uint8_t restart_point = i; builder.Add(key, restart_point); } size_t estimated_size = builder.EstimateSize(); std::string buffer("fake"), buffer2; size_t original_size = buffer.size(); estimated_size += original_size; builder.Finish(buffer); ASSERT_EQ(buffer.size(), estimated_size); buffer2 = buffer; // test for the correctness of relative offset Slice s(buffer2); DataBlockHashIndex index; uint16_t map_offset; index.Initialize(s.data(), static_cast(s.size()), &map_offset); // the additional hash map should start at the end of the buffer ASSERT_EQ(original_size, map_offset); for (uint8_t i = 0; i < 2; i++) { std::string key("key" + std::to_string(i)); uint8_t restart_point = i; ASSERT_TRUE( SearchForOffset(index, s.data(), map_offset, key, restart_point)); } builder.Reset(); } } TEST(DataBlockHashIndex, DataBlockHashTest) { // bucket_num = 200, #keys = 100. 50% utilization DataBlockHashIndexBuilder builder; builder.Initialize(0.75 /*util_ratio*/); for (uint8_t i = 0; i < 100; i++) { std::string key("key" + std::to_string(i)); uint8_t restart_point = i; builder.Add(key, restart_point); } size_t estimated_size = builder.EstimateSize(); std::string buffer("fake content"), buffer2; size_t original_size = buffer.size(); estimated_size += original_size; builder.Finish(buffer); ASSERT_EQ(buffer.size(), estimated_size); buffer2 = buffer; // test for the correctness of relative offset Slice s(buffer2); DataBlockHashIndex index; uint16_t map_offset; index.Initialize(s.data(), static_cast(s.size()), &map_offset); // the additional hash map should start at the end of the buffer ASSERT_EQ(original_size, map_offset); for (uint8_t i = 0; i < 100; i++) { std::string key("key" + std::to_string(i)); uint8_t restart_point = i; ASSERT_TRUE( SearchForOffset(index, s.data(), map_offset, key, restart_point)); } } TEST(DataBlockHashIndex, DataBlockHashTestCollision) { // bucket_num = 2. There will be intense hash collisions DataBlockHashIndexBuilder builder; builder.Initialize(0.75 /*util_ratio*/); for (uint8_t i = 0; i < 100; i++) { std::string key("key" + std::to_string(i)); uint8_t restart_point = i; builder.Add(key, restart_point); } size_t estimated_size = builder.EstimateSize(); std::string buffer("some other fake content to take up space"), buffer2; size_t original_size = buffer.size(); estimated_size += original_size; builder.Finish(buffer); ASSERT_EQ(buffer.size(), estimated_size); buffer2 = buffer; // test for the correctness of relative offset Slice s(buffer2); DataBlockHashIndex index; uint16_t map_offset; index.Initialize(s.data(), static_cast(s.size()), &map_offset); // the additional hash map should start at the end of the buffer ASSERT_EQ(original_size, map_offset); for (uint8_t i = 0; i < 100; i++) { std::string key("key" + std::to_string(i)); uint8_t restart_point = i; ASSERT_TRUE( SearchForOffset(index, s.data(), map_offset, key, restart_point)); } } TEST(DataBlockHashIndex, DataBlockHashTestLarge) { DataBlockHashIndexBuilder builder; builder.Initialize(0.75 /*util_ratio*/); std::unordered_map m; for (uint8_t i = 0; i < 100; i++) { if (i % 2) { continue; // leave half of the keys out } std::string key = "key" + std::to_string(i); uint8_t restart_point = i; builder.Add(key, restart_point); m[key] = restart_point; } size_t estimated_size = builder.EstimateSize(); std::string buffer("filling stuff"), buffer2; size_t original_size = buffer.size(); estimated_size += original_size; builder.Finish(buffer); ASSERT_EQ(buffer.size(), estimated_size); buffer2 = buffer; // test for the correctness of relative offset Slice s(buffer2); DataBlockHashIndex index; uint16_t map_offset; index.Initialize(s.data(), static_cast(s.size()), &map_offset); // the additional hash map should start at the end of the buffer ASSERT_EQ(original_size, map_offset); for (uint8_t i = 0; i < 100; i++) { std::string key = "key" + std::to_string(i); uint8_t restart_point = i; if (m.count(key)) { ASSERT_TRUE(m[key] == restart_point); ASSERT_TRUE( SearchForOffset(index, s.data(), map_offset, key, restart_point)); } else { // we allow false positve, so don't test the nonexisting keys. // when false positive happens, the search will continue to the // restart intervals to see if the key really exist. } } } TEST(DataBlockHashIndex, RestartIndexExceedMax) { DataBlockHashIndexBuilder builder; builder.Initialize(0.75 /*util_ratio*/); std::unordered_map m; for (uint8_t i = 0; i <= 253; i++) { std::string key = "key" + std::to_string(i); uint8_t restart_point = i; builder.Add(key, restart_point); } ASSERT_TRUE(builder.Valid()); builder.Reset(); for (uint8_t i = 0; i <= 254; i++) { std::string key = "key" + std::to_string(i); uint8_t restart_point = i; builder.Add(key, restart_point); } ASSERT_FALSE(builder.Valid()); builder.Reset(); ASSERT_TRUE(builder.Valid()); } TEST(DataBlockHashIndex, BlockRestartIndexExceedMax) { Options options = Options(); BlockBuilder builder(1 /* block_restart_interval */, true /* use_delta_encoding */, false /* use_value_delta_encoding */, BlockBasedTableOptions::kDataBlockBinaryAndHash); // #restarts <= 253. HashIndex is valid for (int i = 0; i <= 253; i++) { std::string ukey = "key" + std::to_string(i); InternalKey ikey(ukey, 0, kTypeValue); builder.Add(ikey.Encode().ToString(), "value"); } { // read serialized contents of the block Slice rawblock = builder.Finish(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); ASSERT_EQ(reader.IndexType(), BlockBasedTableOptions::kDataBlockBinaryAndHash); } builder.Reset(); // #restarts > 253. HashIndex is not used for (int i = 0; i <= 254; i++) { std::string ukey = "key" + std::to_string(i); InternalKey ikey(ukey, 0, kTypeValue); builder.Add(ikey.Encode().ToString(), "value"); } { // read serialized contents of the block Slice rawblock = builder.Finish(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); ASSERT_EQ(reader.IndexType(), BlockBasedTableOptions::kDataBlockBinarySearch); } } TEST(DataBlockHashIndex, BlockSizeExceedMax) { Options options = Options(); std::string ukey(10, 'k'); InternalKey ikey(ukey, 0, kTypeValue); BlockBuilder builder(1 /* block_restart_interval */, false /* use_delta_encoding */, false /* use_value_delta_encoding */, BlockBasedTableOptions::kDataBlockBinaryAndHash); { // insert a large value. The block size plus HashIndex is 65536. std::string value(65502, 'v'); builder.Add(ikey.Encode().ToString(), value); // read serialized contents of the block Slice rawblock = builder.Finish(); ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex); std::cerr << "block size: " << rawblock.size() << std::endl; // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); ASSERT_EQ(reader.IndexType(), BlockBasedTableOptions::kDataBlockBinaryAndHash); } builder.Reset(); { // insert a large value. The block size plus HashIndex would be 65537. // This excceed the max block size supported by HashIndex (65536). // So when build finishes HashIndex will not be created for the block. std::string value(65503, 'v'); builder.Add(ikey.Encode().ToString(), value); // read serialized contents of the block Slice rawblock = builder.Finish(); ASSERT_LE(rawblock.size(), kMaxBlockSizeSupportedByHashIndex); std::cerr << "block size: " << rawblock.size() << std::endl; // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); // the index type have fallen back to binary when build finish. ASSERT_EQ(reader.IndexType(), BlockBasedTableOptions::kDataBlockBinarySearch); } } TEST(DataBlockHashIndex, BlockTestSingleKey) { Options options = Options(); BlockBuilder builder(16 /* block_restart_interval */, true /* use_delta_encoding */, false /* use_value_delta_encoding */, BlockBasedTableOptions::kDataBlockBinaryAndHash); std::string ukey("gopher"); std::string value("gold"); InternalKey ikey(ukey, 10, kTypeValue); builder.Add(ikey.Encode().ToString(), value /*value*/); // read serialized contents of the block Slice rawblock = builder.Finish(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); const InternalKeyComparator icmp(BytewiseComparator()); auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(), kDisableGlobalSequenceNumber); bool may_exist; // search in block for the key just inserted { InternalKey seek_ikey(ukey, 10, kValueTypeForSeek); may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); ASSERT_TRUE(may_exist); ASSERT_TRUE(iter->Valid()); ASSERT_EQ( options.comparator->Compare(iter->key(), ikey.Encode().ToString()), 0); ASSERT_EQ(iter->value(), value); } // search in block for the existing ukey, but with higher seqno { InternalKey seek_ikey(ukey, 20, kValueTypeForSeek); // HashIndex should be able to set the iter correctly may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); ASSERT_TRUE(may_exist); ASSERT_TRUE(iter->Valid()); // user key should match ASSERT_EQ(options.comparator->Compare(ExtractUserKey(iter->key()), ukey), 0); // seek_key seqno number should be greater than that of iter result ASSERT_GT(GetInternalKeySeqno(seek_ikey.Encode()), GetInternalKeySeqno(iter->key())); ASSERT_EQ(iter->value(), value); } // Search in block for the existing ukey, but with lower seqno // in this case, hash can find the only occurrence of the user_key, but // ParseNextDataKey() will skip it as it does not have a older seqno. // In this case, GetForSeek() is effective to locate the user_key, and // iter->Valid() == false indicates that we've reached to the end of // the block and the caller should continue searching the next block. { InternalKey seek_ikey(ukey, 5, kValueTypeForSeek); may_exist = iter->SeekForGet(seek_ikey.Encode().ToString()); ASSERT_TRUE(may_exist); ASSERT_FALSE(iter->Valid()); // should have reached to the end of block } delete iter; } TEST(DataBlockHashIndex, BlockTestLarge) { Random rnd(1019); Options options = Options(); std::vector keys; std::vector values; BlockBuilder builder(16 /* block_restart_interval */, true /* use_delta_encoding */, false /* use_value_delta_encoding */, BlockBasedTableOptions::kDataBlockBinaryAndHash); int num_records = 500; GenerateRandomKVs(&keys, &values, 0, num_records); // Generate keys. Adding a trailing "1" to indicate existent keys. // Later will Seeking for keys with a trailing "0" to test seeking // non-existent keys. for (int i = 0; i < num_records; i++) { std::string ukey(keys[i] + "1" /* existing key marker */); InternalKey ikey(ukey, 0, kTypeValue); builder.Add(ikey.Encode().ToString(), values[i]); } // read serialized contents of the block Slice rawblock = builder.Finish(); // create block reader BlockContents contents; contents.data = rawblock; Block reader(std::move(contents)); const InternalKeyComparator icmp(BytewiseComparator()); // random seek existent keys for (int i = 0; i < num_records; i++) { auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(), kDisableGlobalSequenceNumber); // find a random key in the lookaside array int index = rnd.Uniform(num_records); std::string ukey(keys[index] + "1" /* existing key marker */); InternalKey ikey(ukey, 0, kTypeValue); // search in block for this key bool may_exist = iter->SeekForGet(ikey.Encode().ToString()); ASSERT_TRUE(may_exist); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(values[index], iter->value()); delete iter; } // random seek non-existent user keys // In this case A), the user_key cannot be found in HashIndex. The key may // exist in the next block. So the iter is set invalidated to tell the // caller to search the next block. This test case belongs to this case A). // // Note that for non-existent keys, there is possibility of false positive, // i.e. the key is still hashed into some restart interval. // Two additional possible outcome: // B) linear seek the restart interval and not found, the iter stops at the // starting of the next restart interval. The key does not exist // anywhere. // C) linear seek the restart interval and not found, the iter stops at the // the end of the block, i.e. restarts_. The key may exist in the next // block. // So these combinations are possible when searching non-existent user_key: // // case# may_exist iter->Valid() // A true false // B false true // C true false for (int i = 0; i < num_records; i++) { auto iter = reader.NewDataIterator(&icmp, icmp.user_comparator(), kDisableGlobalSequenceNumber); // find a random key in the lookaside array int index = rnd.Uniform(num_records); std::string ukey(keys[index] + "0" /* non-existing key marker */); InternalKey ikey(ukey, 0, kTypeValue); // search in block for this key bool may_exist = iter->SeekForGet(ikey.Encode().ToString()); if (!may_exist) { ASSERT_TRUE(iter->Valid()); } if (!iter->Valid()) { ASSERT_TRUE(may_exist); } delete iter; } } // helper routine for DataBlockHashIndex.BlockBoundary void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, std::string& v2, InternalKey& seek_ikey, GetContext& get_context, Options& options) { std::unique_ptr file_writer; std::unique_ptr file_reader; std::unique_ptr table_reader; int level_ = -1; std::vector keys; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); const InternalKeyComparator internal_comparator(options.comparator); EnvOptions soptions; soptions.use_mmap_reads = ioptions.allow_mmap_reads; file_writer.reset( test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */)); std::unique_ptr builder; std::vector> int_tbl_prop_collector_factories; std::string column_family_name; builder.reset(ioptions.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, internal_comparator, &int_tbl_prop_collector_factories, options.compression, options.sample_for_compression, CompressionOptions(), false /* skip_filters */, column_family_name, level_), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, file_writer.get())); builder->Add(ik1.Encode().ToString(), v1); builder->Add(ik2.Encode().ToString(), v2); EXPECT_TRUE(builder->status().ok()); Status s = builder->Finish(); file_writer->Flush(); EXPECT_TRUE(s.ok()) << s.ToString(); EXPECT_EQ( test::GetStringSinkFromLegacyWriter(file_writer.get())->contents().size(), builder->FileSize()); // Open the table file_reader.reset(test::GetRandomAccessFileReader(new test::StringSource( test::GetStringSinkFromLegacyWriter(file_writer.get())->contents(), 0 /*uniq_id*/, ioptions.allow_mmap_reads))); const bool kSkipFilters = true; const bool kImmortal = true; ioptions.table_factory->NewTableReader( TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions, internal_comparator, !kSkipFilters, !kImmortal, level_), std::move(file_reader), test::GetStringSinkFromLegacyWriter(file_writer.get())->contents().size(), &table_reader); // Search using Get() ReadOptions ro; ASSERT_OK(table_reader->Get(ro, seek_ikey.Encode().ToString(), &get_context, moptions.prefix_extractor.get())); } TEST(DataBlockHashIndex, BlockBoundary) { BlockBasedTableOptions table_options; table_options.data_block_index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash; table_options.block_restart_interval = 1; table_options.block_size = 4096; Options options; options.comparator = BytewiseComparator(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); // insert two large k/v pair. Given that the block_size is 4096, one k/v // pair will take up one block. // [ k1/v1 ][ k2/v2 ] // [ Block N ][ Block N+1 ] { // [ "aab"@100 ][ "axy"@10 ] // | Block N ][ Block N+1 ] // seek for "axy"@60 std::string uk1("aab"); InternalKey ik1(uk1, 100, kTypeValue); std::string v1(4100, '1'); // large value std::string uk2("axy"); InternalKey ik2(uk2, 10, kTypeValue); std::string v2(4100, '2'); // large value PinnableSlice value; std::string seek_ukey("axy"); InternalKey seek_ikey(seek_ukey, 60, kTypeValue); GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, seek_ukey, &value, nullptr, nullptr, true, nullptr, nullptr); TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); ASSERT_EQ(get_context.State(), GetContext::kFound); ASSERT_EQ(value, v2); value.Reset(); } { // [ "axy"@100 ][ "axy"@10 ] // | Block N ][ Block N+1 ] // seek for "axy"@60 std::string uk1("axy"); InternalKey ik1(uk1, 100, kTypeValue); std::string v1(4100, '1'); // large value std::string uk2("axy"); InternalKey ik2(uk2, 10, kTypeValue); std::string v2(4100, '2'); // large value PinnableSlice value; std::string seek_ukey("axy"); InternalKey seek_ikey(seek_ukey, 60, kTypeValue); GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, seek_ukey, &value, nullptr, nullptr, true, nullptr, nullptr); TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); ASSERT_EQ(get_context.State(), GetContext::kFound); ASSERT_EQ(value, v2); value.Reset(); } { // [ "axy"@100 ][ "axy"@10 ] // | Block N ][ Block N+1 ] // seek for "axy"@120 std::string uk1("axy"); InternalKey ik1(uk1, 100, kTypeValue); std::string v1(4100, '1'); // large value std::string uk2("axy"); InternalKey ik2(uk2, 10, kTypeValue); std::string v2(4100, '2'); // large value PinnableSlice value; std::string seek_ukey("axy"); InternalKey seek_ikey(seek_ukey, 120, kTypeValue); GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, seek_ukey, &value, nullptr, nullptr, true, nullptr, nullptr); TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); ASSERT_EQ(get_context.State(), GetContext::kFound); ASSERT_EQ(value, v1); value.Reset(); } { // [ "axy"@100 ][ "axy"@10 ] // | Block N ][ Block N+1 ] // seek for "axy"@5 std::string uk1("axy"); InternalKey ik1(uk1, 100, kTypeValue); std::string v1(4100, '1'); // large value std::string uk2("axy"); InternalKey ik2(uk2, 10, kTypeValue); std::string v2(4100, '2'); // large value PinnableSlice value; std::string seek_ukey("axy"); InternalKey seek_ikey(seek_ukey, 5, kTypeValue); GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, seek_ukey, &value, nullptr, nullptr, true, nullptr, nullptr); TestBoundary(ik1, v1, ik2, v2, seek_ikey, get_context, options); ASSERT_EQ(get_context.State(), GetContext::kNotFound); value.Reset(); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/block_based/filter_block.h000066400000000000000000000155541370372246700214060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // A filter block is stored near the end of a Table file. It contains // filters (e.g., bloom filters) for all data blocks in the table combined // into a single filter block. // // It is a base class for BlockBasedFilter and FullFilter. // These two are both used in BlockBasedTable. The first one contain filter // For a part of keys in sst file, the second contain filter for all keys // in sst file. #pragma once #include #include #include #include #include #include "db/dbformat.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "table/format.h" #include "table/multiget_context.h" #include "trace_replay/block_cache_tracer.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { const uint64_t kNotValid = ULLONG_MAX; class FilterPolicy; class GetContext; using MultiGetRange = MultiGetContext::Range; // A FilterBlockBuilder is used to construct all of the filters for a // particular Table. It generates a single string which is stored as // a special block in the Table. // // The sequence of calls to FilterBlockBuilder must match the regexp: // (StartBlock Add*)* Finish // // BlockBased/Full FilterBlock would be called in the same way. class FilterBlockBuilder { public: explicit FilterBlockBuilder() {} // No copying allowed FilterBlockBuilder(const FilterBlockBuilder&) = delete; void operator=(const FilterBlockBuilder&) = delete; virtual ~FilterBlockBuilder() {} virtual bool IsBlockBased() = 0; // If is blockbased filter virtual void StartBlock(uint64_t block_offset) = 0; // Start new block filter virtual void Add(const Slice& key) = 0; // Add a key to current filter virtual size_t NumAdded() const = 0; // Number of keys added Slice Finish() { // Generate Filter const BlockHandle empty_handle; Status dont_care_status; auto ret = Finish(empty_handle, &dont_care_status); assert(dont_care_status.ok()); return ret; } virtual Slice Finish(const BlockHandle& tmp, Status* status) = 0; }; // A FilterBlockReader is used to parse filter from SST table. // KeyMayMatch and PrefixMayMatch would trigger filter checking // // BlockBased/Full FilterBlock would be called in the same way. class FilterBlockReader { public: FilterBlockReader() = default; virtual ~FilterBlockReader() = default; FilterBlockReader(const FilterBlockReader&) = delete; FilterBlockReader& operator=(const FilterBlockReader&) = delete; virtual bool IsBlockBased() = 0; // If is blockbased filter /** * If no_io is set, then it returns true if it cannot answer the query without * reading data from disk. This is used in PartitionedFilterBlockReader to * avoid reading partitions that are not in block cache already * * Normally filters are built on only the user keys and the InternalKey is not * needed for a query. The index in PartitionedFilterBlockReader however is * built upon InternalKey and must be provided via const_ikey_ptr when running * queries. */ virtual bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) = 0; virtual void KeysMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey = iter->ukey; const Slice ikey = iter->ikey; GetContext* const get_context = iter->get_context; if (!KeyMayMatch(ukey, prefix_extractor, block_offset, no_io, &ikey, get_context, lookup_context)) { range->SkipKey(iter); } } } /** * no_io and const_ikey_ptr here means the same as in KeyMayMatch */ virtual bool PrefixMayMatch(const Slice& prefix, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) = 0; virtual void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) { for (auto iter = range->begin(); iter != range->end(); ++iter) { const Slice ukey = iter->ukey; const Slice ikey = iter->ikey; GetContext* const get_context = iter->get_context; if (prefix_extractor->InDomain(ukey) && !PrefixMayMatch(prefix_extractor->Transform(ukey), prefix_extractor, block_offset, no_io, &ikey, get_context, lookup_context)) { range->SkipKey(iter); } } } virtual size_t ApproximateMemoryUsage() const = 0; // convert this object to a human readable form virtual std::string ToString() const { std::string error_msg("Unsupported filter \n"); return error_msg; } virtual void CacheDependencies(bool /*pin*/) {} virtual bool RangeMayExist(const Slice* /*iterate_upper_bound*/, const Slice& user_key, const SliceTransform* prefix_extractor, const Comparator* /*comparator*/, const Slice* const const_ikey_ptr, bool* filter_checked, bool need_upper_bound_check, bool no_io, BlockCacheLookupContext* lookup_context) { if (need_upper_bound_check) { return true; } *filter_checked = true; Slice prefix = prefix_extractor->Transform(user_key); return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io, const_ikey_ptr, /* get_context */ nullptr, lookup_context); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/filter_block_reader_common.cc000066400000000000000000000065761370372246700244420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "table/block_based/filter_block_reader_common.h" #include "monitoring/perf_context_imp.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/parsed_full_filter_block.h" namespace ROCKSDB_NAMESPACE { template Status FilterBlockReaderCommon::ReadFilterBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block) { PERF_TIMER_GUARD(read_filter_block_nanos); assert(table); assert(filter_block); assert(filter_block->IsEmpty()); const BlockBasedTable::Rep* const rep = table->get_rep(); assert(rep); const Status s = table->RetrieveBlock(prefetch_buffer, read_options, rep->filter_handle, UncompressionDict::GetEmptyDict(), filter_block, BlockType::kFilter, get_context, lookup_context, /* for_compaction */ false, use_cache); return s; } template const SliceTransform* FilterBlockReaderCommon::table_prefix_extractor() const { assert(table_); const BlockBasedTable::Rep* const rep = table_->get_rep(); assert(rep); return rep->prefix_filtering ? rep->table_prefix_extractor.get() : nullptr; } template bool FilterBlockReaderCommon::whole_key_filtering() const { assert(table_); assert(table_->get_rep()); return table_->get_rep()->whole_key_filtering; } template bool FilterBlockReaderCommon::cache_filter_blocks() const { assert(table_); assert(table_->get_rep()); return table_->get_rep()->table_options.cache_index_and_filter_blocks; } template Status FilterBlockReaderCommon::GetOrReadFilterBlock( bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block) const { assert(filter_block); if (!filter_block_.IsEmpty()) { filter_block->SetUnownedValue(filter_block_.GetValue()); return Status::OK(); } ReadOptions read_options; if (no_io) { read_options.read_tier = kBlockCacheTier; } return ReadFilterBlock(table_, nullptr /* prefetch_buffer */, read_options, cache_filter_blocks(), get_context, lookup_context, filter_block); } template size_t FilterBlockReaderCommon::ApproximateFilterBlockMemoryUsage() const { assert(!filter_block_.GetOwnValue() || filter_block_.GetValue() != nullptr); return filter_block_.GetOwnValue() ? filter_block_.GetValue()->ApproximateMemoryUsage() : 0; } // Explicitly instantiate templates for both "blocklike" types we use. // This makes it possible to keep the template definitions in the .cc file. template class FilterBlockReaderCommon; template class FilterBlockReaderCommon; template class FilterBlockReaderCommon; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/filter_block_reader_common.h000066400000000000000000000037651370372246700243010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include "table/block_based/cachable_entry.h" #include "table/block_based/filter_block.h" namespace ROCKSDB_NAMESPACE { class BlockBasedTable; class FilePrefetchBuffer; // Encapsulates common functionality for the various filter block reader // implementations. Provides access to the filter block regardless of whether // it is owned by the reader or stored in the cache, or whether it is pinned // in the cache or not. template class FilterBlockReaderCommon : public FilterBlockReader { public: FilterBlockReaderCommon(const BlockBasedTable* t, CachableEntry&& filter_block) : table_(t), filter_block_(std::move(filter_block)) { assert(table_); } protected: static Status ReadFilterBlock(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block); const BlockBasedTable* table() const { return table_; } const SliceTransform* table_prefix_extractor() const; bool whole_key_filtering() const; bool cache_filter_blocks() const; Status GetOrReadFilterBlock(bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block) const; size_t ApproximateFilterBlockMemoryUsage() const; private: const BlockBasedTable* table_; CachableEntry filter_block_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/filter_policy.cc000066400000000000000000000675121370372246700217520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include "rocksdb/filter_policy.h" #include "rocksdb/slice.h" #include "table/block_based/block_based_filter_block.h" #include "table/block_based/full_filter_block.h" #include "table/block_based/filter_policy_internal.h" #include "third-party/folly/folly/ConstexprMath.h" #include "util/bloom_impl.h" #include "util/coding.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { namespace { // See description in FastLocalBloomImpl class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder { public: explicit FastLocalBloomBitsBuilder(const int millibits_per_key) : millibits_per_key_(millibits_per_key), num_probes_(FastLocalBloomImpl::ChooseNumProbes(millibits_per_key_)) { assert(millibits_per_key >= 1000); } // No Copy allowed FastLocalBloomBitsBuilder(const FastLocalBloomBitsBuilder&) = delete; void operator=(const FastLocalBloomBitsBuilder&) = delete; ~FastLocalBloomBitsBuilder() override {} virtual void AddKey(const Slice& key) override { uint64_t hash = GetSliceHash64(key); if (hash_entries_.empty() || hash != hash_entries_.back()) { hash_entries_.push_back(hash); } } virtual Slice Finish(std::unique_ptr* buf) override { uint32_t len_with_metadata = CalculateSpace(static_cast(hash_entries_.size())); char* data = new char[len_with_metadata]; memset(data, 0, len_with_metadata); assert(data); assert(len_with_metadata >= 5); uint32_t len = len_with_metadata - 5; if (len > 0) { AddAllEntries(data, len); } // See BloomFilterPolicy::GetBloomBitsReader re: metadata // -1 = Marker for newer Bloom implementations data[len] = static_cast(-1); // 0 = Marker for this sub-implementation data[len + 1] = static_cast(0); // num_probes (and 0 in upper bits for 64-byte block size) data[len + 2] = static_cast(num_probes_); // rest of metadata stays zero const char* const_data = data; buf->reset(const_data); assert(hash_entries_.empty()); return Slice(data, len_with_metadata); } int CalculateNumEntry(const uint32_t bytes) override { uint32_t bytes_no_meta = bytes >= 5u ? bytes - 5u : 0; return static_cast(uint64_t{8000} * bytes_no_meta / millibits_per_key_); } uint32_t CalculateSpace(const int num_entry) override { uint32_t num_cache_lines = 0; if (millibits_per_key_ > 0 && num_entry > 0) { num_cache_lines = static_cast( (int64_t{num_entry} * millibits_per_key_ + 511999) / 512000); } return num_cache_lines * 64 + /*metadata*/ 5; } double EstimatedFpRate(size_t keys, size_t bytes) override { return FastLocalBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5, num_probes_, /*hash bits*/ 64); } private: void AddAllEntries(char* data, uint32_t len) { // Simple version without prefetching: // // for (auto h : hash_entries_) { // FastLocalBloomImpl::AddHash(Lower32of64(h), Upper32of64(h), len, // num_probes_, data); // } const size_t num_entries = hash_entries_.size(); constexpr size_t kBufferMask = 7; static_assert(((kBufferMask + 1) & kBufferMask) == 0, "Must be power of 2 minus 1"); std::array hashes; std::array byte_offsets; // Prime the buffer size_t i = 0; for (; i <= kBufferMask && i < num_entries; ++i) { uint64_t h = hash_entries_.front(); hash_entries_.pop_front(); FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data, /*out*/ &byte_offsets[i]); hashes[i] = Upper32of64(h); } // Process and buffer for (; i < num_entries; ++i) { uint32_t& hash_ref = hashes[i & kBufferMask]; uint32_t& byte_offset_ref = byte_offsets[i & kBufferMask]; // Process (add) FastLocalBloomImpl::AddHashPrepared(hash_ref, num_probes_, data + byte_offset_ref); // And buffer uint64_t h = hash_entries_.front(); hash_entries_.pop_front(); FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data, /*out*/ &byte_offset_ref); hash_ref = Upper32of64(h); } // Finish processing for (i = 0; i <= kBufferMask && i < num_entries; ++i) { FastLocalBloomImpl::AddHashPrepared(hashes[i], num_probes_, data + byte_offsets[i]); } } int millibits_per_key_; int num_probes_; // A deque avoids unnecessary copying of already-saved values // and has near-minimal peak memory use. std::deque hash_entries_; }; // See description in FastLocalBloomImpl class FastLocalBloomBitsReader : public FilterBitsReader { public: FastLocalBloomBitsReader(const char* data, int num_probes, uint32_t len_bytes) : data_(data), num_probes_(num_probes), len_bytes_(len_bytes) {} // No Copy allowed FastLocalBloomBitsReader(const FastLocalBloomBitsReader&) = delete; void operator=(const FastLocalBloomBitsReader&) = delete; ~FastLocalBloomBitsReader() override {} bool MayMatch(const Slice& key) override { uint64_t h = GetSliceHash64(key); uint32_t byte_offset; FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_, /*out*/ &byte_offset); return FastLocalBloomImpl::HashMayMatchPrepared(Upper32of64(h), num_probes_, data_ + byte_offset); } virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { std::array hashes; std::array byte_offsets; for (int i = 0; i < num_keys; ++i) { uint64_t h = GetSliceHash64(*keys[i]); FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_, /*out*/ &byte_offsets[i]); hashes[i] = Upper32of64(h); } for (int i = 0; i < num_keys; ++i) { may_match[i] = FastLocalBloomImpl::HashMayMatchPrepared( hashes[i], num_probes_, data_ + byte_offsets[i]); } } private: const char* data_; const int num_probes_; const uint32_t len_bytes_; }; using LegacyBloomImpl = LegacyLocalityBloomImpl; class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder { public: explicit LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log); // No Copy allowed LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete; void operator=(const LegacyBloomBitsBuilder&) = delete; ~LegacyBloomBitsBuilder() override; void AddKey(const Slice& key) override; Slice Finish(std::unique_ptr* buf) override; int CalculateNumEntry(const uint32_t bytes) override; uint32_t CalculateSpace(const int num_entry) override { uint32_t dont_care1; uint32_t dont_care2; return CalculateSpace(num_entry, &dont_care1, &dont_care2); } double EstimatedFpRate(size_t keys, size_t bytes) override { return LegacyBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5, num_probes_); } private: int bits_per_key_; int num_probes_; std::vector hash_entries_; Logger* info_log_; // Get totalbits that optimized for cpu cache line uint32_t GetTotalBitsForLocality(uint32_t total_bits); // Reserve space for new filter char* ReserveSpace(const int num_entry, uint32_t* total_bits, uint32_t* num_lines); // Implementation-specific variant of public CalculateSpace uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits, uint32_t* num_lines); // Assuming single threaded access to this function. void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits); }; LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log) : bits_per_key_(bits_per_key), num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)), info_log_(info_log) { assert(bits_per_key_); } LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() {} void LegacyBloomBitsBuilder::AddKey(const Slice& key) { uint32_t hash = BloomHash(key); if (hash_entries_.size() == 0 || hash != hash_entries_.back()) { hash_entries_.push_back(hash); } } Slice LegacyBloomBitsBuilder::Finish(std::unique_ptr* buf) { uint32_t total_bits, num_lines; size_t num_entries = hash_entries_.size(); char* data = ReserveSpace(static_cast(num_entries), &total_bits, &num_lines); assert(data); if (total_bits != 0 && num_lines != 0) { for (auto h : hash_entries_) { AddHash(h, data, num_lines, total_bits); } // Check for excessive entries for 32-bit hash function if (num_entries >= /* minimum of 3 million */ 3000000U) { // More specifically, we can detect that the 32-bit hash function // is causing significant increase in FP rate by comparing current // estimated FP rate to what we would get with a normal number of // keys at same memory ratio. double est_fp_rate = LegacyBloomImpl::EstimatedFpRate( num_entries, total_bits / 8, num_probes_); double vs_fp_rate = LegacyBloomImpl::EstimatedFpRate( 1U << 16, (1U << 16) * bits_per_key_ / 8, num_probes_); if (est_fp_rate >= 1.50 * vs_fp_rate) { // For more details, see // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter ROCKS_LOG_WARN( info_log_, "Using legacy SST/BBT Bloom filter with excessive key count " "(%.1fM @ %dbpk), causing estimated %.1fx higher filter FP rate. " "Consider using new Bloom with format_version>=5, smaller SST " "file size, or partitioned filters.", num_entries / 1000000.0, bits_per_key_, est_fp_rate / vs_fp_rate); } } } // See BloomFilterPolicy::GetFilterBitsReader for metadata data[total_bits / 8] = static_cast(num_probes_); EncodeFixed32(data + total_bits / 8 + 1, static_cast(num_lines)); const char* const_data = data; buf->reset(const_data); hash_entries_.clear(); return Slice(data, total_bits / 8 + 5); } uint32_t LegacyBloomBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) { uint32_t num_lines = (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8); // Make num_lines an odd number to make sure more bits are involved // when determining which block. if (num_lines % 2 == 0) { num_lines++; } return num_lines * (CACHE_LINE_SIZE * 8); } uint32_t LegacyBloomBitsBuilder::CalculateSpace(const int num_entry, uint32_t* total_bits, uint32_t* num_lines) { assert(bits_per_key_); if (num_entry != 0) { uint32_t total_bits_tmp = static_cast(num_entry * bits_per_key_); *total_bits = GetTotalBitsForLocality(total_bits_tmp); *num_lines = *total_bits / (CACHE_LINE_SIZE * 8); assert(*total_bits > 0 && *total_bits % 8 == 0); } else { // filter is empty, just leave space for metadata *total_bits = 0; *num_lines = 0; } // Reserve space for Filter uint32_t sz = *total_bits / 8; sz += 5; // 4 bytes for num_lines, 1 byte for num_probes return sz; } char* LegacyBloomBitsBuilder::ReserveSpace(const int num_entry, uint32_t* total_bits, uint32_t* num_lines) { uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines); char* data = new char[sz]; memset(data, 0, sz); return data; } int LegacyBloomBitsBuilder::CalculateNumEntry(const uint32_t bytes) { assert(bits_per_key_); assert(bytes > 0); int high = static_cast(bytes * 8 / bits_per_key_ + 1); int low = 1; int n = high; for (; n >= low; n--) { if (CalculateSpace(n) <= bytes) { break; } } assert(n < high); // High should be an overestimation return n; } inline void LegacyBloomBitsBuilder::AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits) { #ifdef NDEBUG static_cast(total_bits); #endif assert(num_lines > 0 && total_bits > 0); LegacyBloomImpl::AddHash(h, num_lines, num_probes_, data, folly::constexpr_log2(CACHE_LINE_SIZE)); } class LegacyBloomBitsReader : public FilterBitsReader { public: LegacyBloomBitsReader(const char* data, int num_probes, uint32_t num_lines, uint32_t log2_cache_line_size) : data_(data), num_probes_(num_probes), num_lines_(num_lines), log2_cache_line_size_(log2_cache_line_size) {} // No Copy allowed LegacyBloomBitsReader(const LegacyBloomBitsReader&) = delete; void operator=(const LegacyBloomBitsReader&) = delete; ~LegacyBloomBitsReader() override {} // "contents" contains the data built by a preceding call to // FilterBitsBuilder::Finish. MayMatch must return true if the key was // passed to FilterBitsBuilder::AddKey. This method may return true or false // if the key was not on the list, but it should aim to return false with a // high probability. bool MayMatch(const Slice& key) override { uint32_t hash = BloomHash(key); uint32_t byte_offset; LegacyBloomImpl::PrepareHashMayMatch( hash, num_lines_, data_, /*out*/ &byte_offset, log2_cache_line_size_); return LegacyBloomImpl::HashMayMatchPrepared( hash, num_probes_, data_ + byte_offset, log2_cache_line_size_); } virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override { std::array hashes; std::array byte_offsets; for (int i = 0; i < num_keys; ++i) { hashes[i] = BloomHash(*keys[i]); LegacyBloomImpl::PrepareHashMayMatch(hashes[i], num_lines_, data_, /*out*/ &byte_offsets[i], log2_cache_line_size_); } for (int i = 0; i < num_keys; ++i) { may_match[i] = LegacyBloomImpl::HashMayMatchPrepared( hashes[i], num_probes_, data_ + byte_offsets[i], log2_cache_line_size_); } } private: const char* data_; const int num_probes_; const uint32_t num_lines_; const uint32_t log2_cache_line_size_; }; class AlwaysTrueFilter : public FilterBitsReader { public: bool MayMatch(const Slice&) override { return true; } using FilterBitsReader::MayMatch; // inherit overload }; class AlwaysFalseFilter : public FilterBitsReader { public: bool MayMatch(const Slice&) override { return false; } using FilterBitsReader::MayMatch; // inherit overload }; } // namespace const std::vector BloomFilterPolicy::kAllFixedImpls = { kLegacyBloom, kDeprecatedBlock, kFastLocalBloom, }; const std::vector BloomFilterPolicy::kAllUserModes = { kDeprecatedBlock, kAuto, }; BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode) : mode_(mode), warned_(false) { // Sanitize bits_per_key if (bits_per_key < 1.0) { bits_per_key = 1.0; } else if (!(bits_per_key < 100.0)) { // including NaN bits_per_key = 100.0; } // Includes a nudge toward rounding up, to ensure on all platforms // that doubles specified with three decimal digits after the decimal // point are interpreted accurately. millibits_per_key_ = static_cast(bits_per_key * 1000.0 + 0.500001); // For better or worse, this is a rounding up of a nudged rounding up, // e.g. 7.4999999999999 will round up to 8, but that provides more // predictability against small arithmetic errors in floating point. whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000; } BloomFilterPolicy::~BloomFilterPolicy() {} const char* BloomFilterPolicy::Name() const { return "rocksdb.BuiltinBloomFilter"; } void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, std::string* dst) const { // We should ideally only be using this deprecated interface for // appropriately constructed BloomFilterPolicy assert(mode_ == kDeprecatedBlock); // Compute bloom filter size (in both bits and bytes) uint32_t bits = static_cast(n * whole_bits_per_key_); // For small n, we can see a very high false positive rate. Fix it // by enforcing a minimum bloom filter length. if (bits < 64) bits = 64; uint32_t bytes = (bits + 7) / 8; bits = bytes * 8; int num_probes = LegacyNoLocalityBloomImpl::ChooseNumProbes(whole_bits_per_key_); const size_t init_size = dst->size(); dst->resize(init_size + bytes, 0); dst->push_back(static_cast(num_probes)); // Remember # of probes char* array = &(*dst)[init_size]; for (int i = 0; i < n; i++) { LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes, array); } } bool BloomFilterPolicy::KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { const size_t len = bloom_filter.size(); if (len < 2 || len > 0xffffffffU) { return false; } const char* array = bloom_filter.data(); const uint32_t bits = static_cast(len - 1) * 8; // Use the encoded k so that we can read filters generated by // bloom filters created using different parameters. const int k = static_cast(array[len - 1]); if (k > 30) { // Reserved for potentially new encodings for short bloom filters. // Consider it a match. return true; } // NB: using stored k not num_probes for whole_bits_per_key_ return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k, array); } FilterBitsBuilder* BloomFilterPolicy::GetFilterBitsBuilder() const { // This code path should no longer be used, for the built-in // BloomFilterPolicy. Internal to RocksDB and outside // BloomFilterPolicy, only get a FilterBitsBuilder with // BloomFilterPolicy::GetBuilderFromContext(), which will call // BloomFilterPolicy::GetBuilderWithContext(). RocksDB users have // been warned (HISTORY.md) that they can no longer call this on // the built-in BloomFilterPolicy (unlikely). assert(false); return GetBuilderWithContext(FilterBuildingContext(BlockBasedTableOptions())); } FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( const FilterBuildingContext& context) const { Mode cur = mode_; // Unusual code construction so that we can have just // one exhaustive switch without (risky) recursion for (int i = 0; i < 2; ++i) { switch (cur) { case kAuto: if (context.table_options.format_version < 5) { cur = kLegacyBloom; } else { cur = kFastLocalBloom; } break; case kDeprecatedBlock: return nullptr; case kFastLocalBloom: return new FastLocalBloomBitsBuilder(millibits_per_key_); case kLegacyBloom: if (whole_bits_per_key_ >= 14 && context.info_log && !warned_.load(std::memory_order_relaxed)) { warned_ = true; const char* adjective; if (whole_bits_per_key_ >= 20) { adjective = "Dramatic"; } else { adjective = "Significant"; } // For more details, see // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter ROCKS_LOG_WARN( context.info_log, "Using legacy Bloom filter with high (%d) bits/key. " "%s filter space and/or accuracy improvement is available " "with format_version>=5.", whole_bits_per_key_, adjective); } return new LegacyBloomBitsBuilder(whole_bits_per_key_, context.info_log); } } assert(false); return nullptr; // something legal } FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext( const FilterBuildingContext& context) { if (context.table_options.filter_policy) { return context.table_options.filter_policy->GetBuilderWithContext(context); } else { return nullptr; } } // Read metadata to determine what kind of FilterBitsReader is needed // and return a new one. FilterBitsReader* BloomFilterPolicy::GetFilterBitsReader( const Slice& contents) const { uint32_t len_with_meta = static_cast(contents.size()); if (len_with_meta <= 5) { // filter is empty or broken. Treat like zero keys added. return new AlwaysFalseFilter(); } // Legacy Bloom filter data: // 0 +-----------------------------------+ // | Raw Bloom filter data | // | ... | // len +-----------------------------------+ // | byte for num_probes or | // | marker for new implementations | // len+1 +-----------------------------------+ // | four bytes for number of cache | // | lines | // len_with_meta +-----------------------------------+ int8_t raw_num_probes = static_cast(contents.data()[len_with_meta - 5]); // NB: *num_probes > 30 and < 128 probably have not been used, because of // BloomFilterPolicy::initialize, unless directly calling // LegacyBloomBitsBuilder as an API, but we are leaving those cases in // limbo with LegacyBloomBitsReader for now. if (raw_num_probes < 1) { // Note: < 0 (or unsigned > 127) indicate special new implementations // (or reserved for future use) if (raw_num_probes == -1) { // Marker for newer Bloom implementations return GetBloomBitsReader(contents); } // otherwise // Treat as zero probes (always FP) for now. return new AlwaysTrueFilter(); } // else attempt decode for LegacyBloomBitsReader int num_probes = raw_num_probes; assert(num_probes >= 1); assert(num_probes <= 127); uint32_t len = len_with_meta - 5; assert(len > 0); uint32_t num_lines = DecodeFixed32(contents.data() + len_with_meta - 4); uint32_t log2_cache_line_size; if (num_lines * CACHE_LINE_SIZE == len) { // Common case log2_cache_line_size = folly::constexpr_log2(CACHE_LINE_SIZE); } else if (num_lines == 0 || len % num_lines != 0) { // Invalid (no solution to num_lines * x == len) // Treat as zero probes (always FP) for now. return new AlwaysTrueFilter(); } else { // Determine the non-native cache line size (from another system) log2_cache_line_size = 0; while ((num_lines << log2_cache_line_size) < len) { ++log2_cache_line_size; } if ((num_lines << log2_cache_line_size) != len) { // Invalid (block size not a power of two) // Treat as zero probes (always FP) for now. return new AlwaysTrueFilter(); } } // if not early return return new LegacyBloomBitsReader(contents.data(), num_probes, num_lines, log2_cache_line_size); } // For newer Bloom filter implementations FilterBitsReader* BloomFilterPolicy::GetBloomBitsReader( const Slice& contents) const { uint32_t len_with_meta = static_cast(contents.size()); uint32_t len = len_with_meta - 5; assert(len > 0); // precondition // New Bloom filter data: // 0 +-----------------------------------+ // | Raw Bloom filter data | // | ... | // len +-----------------------------------+ // | char{-1} byte -> new Bloom filter | // len+1 +-----------------------------------+ // | byte for subimplementation | // | 0: FastLocalBloom | // | other: reserved | // len+2 +-----------------------------------+ // | byte for block_and_probes | // | 0 in top 3 bits -> 6 -> 64-byte | // | reserved: | // | 1 in top 3 bits -> 7 -> 128-byte| // | 2 in top 3 bits -> 8 -> 256-byte| // | ... | // | num_probes in bottom 5 bits, | // | except 0 and 31 reserved | // len+3 +-----------------------------------+ // | two bytes reserved | // | possibly for hash seed | // len_with_meta +-----------------------------------+ // Read more metadata (see above) char sub_impl_val = contents.data()[len_with_meta - 4]; char block_and_probes = contents.data()[len_with_meta - 3]; int log2_block_bytes = ((block_and_probes >> 5) & 7) + 6; int num_probes = (block_and_probes & 31); if (num_probes < 1 || num_probes > 30) { // Reserved / future safe return new AlwaysTrueFilter(); } uint16_t rest = DecodeFixed16(contents.data() + len_with_meta - 2); if (rest != 0) { // Reserved, possibly for hash seed // Future safe return new AlwaysTrueFilter(); } if (sub_impl_val == 0) { // FastLocalBloom if (log2_block_bytes == 6) { // Only block size supported for now return new FastLocalBloomBitsReader(contents.data(), num_probes, len); } } // otherwise // Reserved / future safe return new AlwaysTrueFilter(); } const FilterPolicy* NewBloomFilterPolicy(double bits_per_key, bool use_block_based_builder) { BloomFilterPolicy::Mode m; if (use_block_based_builder) { m = BloomFilterPolicy::kDeprecatedBlock; } else { m = BloomFilterPolicy::kAuto; } assert(std::find(BloomFilterPolicy::kAllUserModes.begin(), BloomFilterPolicy::kAllUserModes.end(), m) != BloomFilterPolicy::kAllUserModes.end()); return new BloomFilterPolicy(bits_per_key, m); } FilterBuildingContext::FilterBuildingContext( const BlockBasedTableOptions& _table_options) : table_options(_table_options) {} FilterPolicy::~FilterPolicy() { } Status FilterPolicy::CreateFromString( const ConfigOptions& /*options*/, const std::string& value, std::shared_ptr* policy) { const std::string kBloomName = "bloomfilter:"; if (value == kNullptrString || value == "rocksdb.BuiltinBloomFilter") { policy->reset(); #ifndef ROCKSDB_LITE } else if (value.compare(0, kBloomName.size(), kBloomName) == 0) { size_t pos = value.find(':', kBloomName.size()); if (pos == std::string::npos) { return Status::InvalidArgument( "Invalid filter policy config, missing bits_per_key"); } else { double bits_per_key = ParseDouble( trim(value.substr(kBloomName.size(), pos - kBloomName.size()))); bool use_block_based_builder = ParseBoolean("use_block_based_builder", trim(value.substr(pos + 1))); policy->reset( NewBloomFilterPolicy(bits_per_key, use_block_based_builder)); } } else { return Status::InvalidArgument("Invalid filter policy name ", value); #else } else { return Status::NotSupported("Cannot load filter policy in LITE mode ", value); #endif // ROCKSDB_LITE } return Status::OK(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/filter_policy_internal.h000066400000000000000000000137411370372246700235030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2012 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "rocksdb/filter_policy.h" #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { class Slice; // Exposes any extra information needed for testing built-in // FilterBitsBuilders class BuiltinFilterBitsBuilder : public FilterBitsBuilder { public: // Calculate number of bytes needed for a new filter, including // metadata. Passing the result to CalculateNumEntry should // return >= the num_entry passed in. virtual uint32_t CalculateSpace(const int num_entry) = 0; // Returns an estimate of the FP rate of the returned filter if // `keys` keys are added and the filter returned by Finish is `bytes` // bytes. virtual double EstimatedFpRate(size_t keys, size_t bytes) = 0; }; // RocksDB built-in filter policy for Bloom or Bloom-like filters. // This class is considered internal API and subject to change. // See NewBloomFilterPolicy. class BloomFilterPolicy : public FilterPolicy { public: // An internal marker for operating modes of BloomFilterPolicy, in terms // of selecting an implementation. This makes it easier for tests to track // or to walk over the built-in set of Bloom filter implementations. The // only variance in BloomFilterPolicy by mode/implementation is in // GetFilterBitsBuilder(), so an enum is practical here vs. subclasses. // // This enum is essentially the union of all the different kinds of return // value from GetFilterBitsBuilder, or "underlying implementation", and // higher-level modes that choose an underlying implementation based on // context information. enum Mode { // Legacy implementation of Bloom filter for full and partitioned filters. // Set to 0 in case of value confusion with bool use_block_based_builder // NOTE: TESTING ONLY as this mode does not use best compatible // implementation kLegacyBloom = 0, // Deprecated block-based Bloom filter implementation. // Set to 1 in case of value confusion with bool use_block_based_builder // NOTE: DEPRECATED but user exposed kDeprecatedBlock = 1, // A fast, cache-local Bloom filter implementation. See description in // FastLocalBloomImpl. // NOTE: TESTING ONLY as this mode does not check format_version kFastLocalBloom = 2, // Automatically choose from the above (except kDeprecatedBlock) based on // context at build time, including compatibility with format_version. // NOTE: This is currently the only recommended mode that is user exposed. kAuto = 100, }; // All the different underlying implementations that a BloomFilterPolicy // might use, as a mode that says "always use this implementation." // Only appropriate for unit tests. static const std::vector kAllFixedImpls; // All the different modes of BloomFilterPolicy that are exposed from // user APIs. Only appropriate for higher-level unit tests. Integration // tests should prefer using NewBloomFilterPolicy (user-exposed). static const std::vector kAllUserModes; explicit BloomFilterPolicy(double bits_per_key, Mode mode); ~BloomFilterPolicy() override; const char* Name() const override; // Deprecated block-based filter only void CreateFilter(const Slice* keys, int n, std::string* dst) const override; // Deprecated block-based filter only bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override; FilterBitsBuilder* GetFilterBitsBuilder() const override; // To use this function, call GetBuilderFromContext(). // // Neither the context nor any objects therein should be saved beyond // the call to this function, unless it's shared_ptr. FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const override; // Returns a new FilterBitsBuilder from the filter_policy in // table_options of a context, or nullptr if not applicable. // (An internal convenience function to save boilerplate.) static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&); // Read metadata to determine what kind of FilterBitsReader is needed // and return a new one. This must successfully process any filter data // generated by a built-in FilterBitsBuilder, regardless of the impl // chosen for this BloomFilterPolicy. Not compatible with CreateFilter. FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override; // Essentially for testing only: configured millibits/key int GetMillibitsPerKey() const { return millibits_per_key_; } // Essentially for testing only: legacy whole bits/key int GetWholeBitsPerKey() const { return whole_bits_per_key_; } private: // Newer filters support fractional bits per key. For predictable behavior // of 0.001-precision values across floating point implementations, we // round to thousandths of a bit (on average) per key. int millibits_per_key_; // Older filters round to whole number bits per key. (There *should* be no // compatibility issue with fractional bits per key, but preserving old // behavior with format_version < 5 just in case.) int whole_bits_per_key_; // Selected mode (a specific implementation or way of selecting an // implementation) for building new SST filters. Mode mode_; // Whether relevant warnings have been logged already. (Remember so we // only report once per BloomFilterPolicy instance, to keep the noise down.) mutable std::atomic warned_; // For newer Bloom filter implementation(s) FilterBitsReader* GetBloomBitsReader(const Slice& contents) const; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/flush_block_policy.cc000066400000000000000000000061741370372246700227550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/flush_block_policy.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "table/block_based/block_builder.h" #include "table/format.h" #include namespace ROCKSDB_NAMESPACE { // Flush block by size class FlushBlockBySizePolicy : public FlushBlockPolicy { public: // @params block_size: Approximate size of user data packed per // block. // @params block_size_deviation: This is used to close a block before it // reaches the configured FlushBlockBySizePolicy(const uint64_t block_size, const uint64_t block_size_deviation, const bool align, const BlockBuilder& data_block_builder) : block_size_(block_size), block_size_deviation_limit_( ((block_size * (100 - block_size_deviation)) + 99) / 100), align_(align), data_block_builder_(data_block_builder) {} bool Update(const Slice& key, const Slice& value) override { // it makes no sense to flush when the data block is empty if (data_block_builder_.empty()) { return false; } auto curr_size = data_block_builder_.CurrentSizeEstimate(); // Do flush if one of the below two conditions is true: // 1) if the current estimated size already exceeds the block size, // 2) block_size_deviation is set and the estimated size after appending // the kv will exceed the block size and the current size is under the // the deviation. return curr_size >= block_size_ || BlockAlmostFull(key, value); } private: bool BlockAlmostFull(const Slice& key, const Slice& value) const { if (block_size_deviation_limit_ == 0) { return false; } const auto curr_size = data_block_builder_.CurrentSizeEstimate(); auto estimated_size_after = data_block_builder_.EstimateSizeAfterKV(key, value); if (align_) { estimated_size_after += kBlockTrailerSize; return estimated_size_after > block_size_; } return estimated_size_after > block_size_ && curr_size > block_size_deviation_limit_; } const uint64_t block_size_; const uint64_t block_size_deviation_limit_; const bool align_; const BlockBuilder& data_block_builder_; }; FlushBlockPolicy* FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( const BlockBasedTableOptions& table_options, const BlockBuilder& data_block_builder) const { return new FlushBlockBySizePolicy( table_options.block_size, table_options.block_size_deviation, table_options.block_align, data_block_builder); } FlushBlockPolicy* FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( const uint64_t size, const int deviation, const BlockBuilder& data_block_builder) { return new FlushBlockBySizePolicy(size, deviation, false, data_block_builder); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/flush_block_policy.h000066400000000000000000000021441370372246700226100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/flush_block_policy.h" namespace ROCKSDB_NAMESPACE { // FlushBlockEveryKeyPolicy currently used only in tests. class FlushBlockEveryKeyPolicy : public FlushBlockPolicy { public: bool Update(const Slice& /*key*/, const Slice& /*value*/) override { if (!start_) { start_ = true; return false; } return true; } private: bool start_ = false; }; class FlushBlockEveryKeyPolicyFactory : public FlushBlockPolicyFactory { public: explicit FlushBlockEveryKeyPolicyFactory() {} const char* Name() const override { return "FlushBlockEveryKeyPolicyFactory"; } FlushBlockPolicy* NewFlushBlockPolicy( const BlockBasedTableOptions& /*table_options*/, const BlockBuilder& /*data_block_builder*/) const override { return new FlushBlockEveryKeyPolicy; } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/full_filter_block.cc000066400000000000000000000267371370372246700225730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/block_based/full_filter_block.h" #include #include "monitoring/perf_context_imp.h" #include "port/malloc.h" #include "port/port.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { FullFilterBlockBuilder::FullFilterBlockBuilder( const SliceTransform* _prefix_extractor, bool whole_key_filtering, FilterBitsBuilder* filter_bits_builder) : prefix_extractor_(_prefix_extractor), whole_key_filtering_(whole_key_filtering), last_whole_key_recorded_(false), last_prefix_recorded_(false), num_added_(0) { assert(filter_bits_builder != nullptr); filter_bits_builder_.reset(filter_bits_builder); } void FullFilterBlockBuilder::Add(const Slice& key) { const bool add_prefix = prefix_extractor_ && prefix_extractor_->InDomain(key); if (whole_key_filtering_) { if (!add_prefix) { AddKey(key); } else { // if both whole_key and prefix are added to bloom then we will have whole // key and prefix addition being interleaved and thus cannot rely on the // bits builder to properly detect the duplicates by comparing with the // last item. Slice last_whole_key = Slice(last_whole_key_str_); if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) { AddKey(key); last_whole_key_recorded_ = true; last_whole_key_str_.assign(key.data(), key.size()); } } } if (add_prefix) { AddPrefix(key); } } // Add key to filter if needed inline void FullFilterBlockBuilder::AddKey(const Slice& key) { filter_bits_builder_->AddKey(key); num_added_++; } // Add prefix to filter if needed void FullFilterBlockBuilder::AddPrefix(const Slice& key) { Slice prefix = prefix_extractor_->Transform(key); if (whole_key_filtering_) { // if both whole_key and prefix are added to bloom then we will have whole // key and prefix addition being interleaved and thus cannot rely on the // bits builder to properly detect the duplicates by comparing with the last // item. Slice last_prefix = Slice(last_prefix_str_); if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) { AddKey(prefix); last_prefix_recorded_ = true; last_prefix_str_.assign(prefix.data(), prefix.size()); } } else { AddKey(prefix); } } void FullFilterBlockBuilder::Reset() { last_whole_key_recorded_ = false; last_prefix_recorded_ = false; } Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, Status* status) { Reset(); // In this impl we ignore BlockHandle *status = Status::OK(); if (num_added_ != 0) { num_added_ = 0; return filter_bits_builder_->Finish(&filter_data_); } return Slice(); } FullFilterBlockReader::FullFilterBlockReader( const BlockBasedTable* t, CachableEntry&& filter_block) : FilterBlockReaderCommon(t, std::move(filter_block)) { const SliceTransform* const prefix_extractor = table_prefix_extractor(); if (prefix_extractor) { full_length_enabled_ = prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_); } } bool FullFilterBlockReader::KeyMayMatch( const Slice& key, const SliceTransform* /*prefix_extractor*/, uint64_t block_offset, const bool no_io, const Slice* const /*const_ikey_ptr*/, GetContext* get_context, BlockCacheLookupContext* lookup_context) { #ifdef NDEBUG (void)block_offset; #endif assert(block_offset == kNotValid); if (!whole_key_filtering()) { return true; } return MayMatch(key, no_io, get_context, lookup_context); } std::unique_ptr FullFilterBlockReader::Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context) { assert(table); assert(table->get_rep()); assert(!pin || prefetch); CachableEntry filter_block; if (prefetch || !use_cache) { const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), use_cache, nullptr /* get_context */, lookup_context, &filter_block); if (!s.ok()) { IGNORE_STATUS_IF_ERROR(s); return std::unique_ptr(); } if (use_cache && !pin) { filter_block.Reset(); } } return std::unique_ptr( new FullFilterBlockReader(table, std::move(filter_block))); } bool FullFilterBlockReader::PrefixMayMatch( const Slice& prefix, const SliceTransform* /* prefix_extractor */, uint64_t block_offset, const bool no_io, const Slice* const /*const_ikey_ptr*/, GetContext* get_context, BlockCacheLookupContext* lookup_context) { #ifdef NDEBUG (void)block_offset; #endif assert(block_offset == kNotValid); return MayMatch(prefix, no_io, get_context, lookup_context); } bool FullFilterBlockReader::MayMatch( const Slice& entry, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context) const { CachableEntry filter_block; const Status s = GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block); if (!s.ok()) { IGNORE_STATUS_IF_ERROR(s); return true; } assert(filter_block.GetValue()); FilterBitsReader* const filter_bits_reader = filter_block.GetValue()->filter_bits_reader(); if (filter_bits_reader) { if (filter_bits_reader->MayMatch(entry)) { PERF_COUNTER_ADD(bloom_sst_hit_count, 1); return true; } else { PERF_COUNTER_ADD(bloom_sst_miss_count, 1); return false; } } return true; // remain the same with block_based filter } void FullFilterBlockReader::KeysMayMatch( MultiGetRange* range, const SliceTransform* /*prefix_extractor*/, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) { #ifdef NDEBUG (void)block_offset; #endif assert(block_offset == kNotValid); if (!whole_key_filtering()) { // Simply return. Don't skip any key - consider all keys as likely to be // present return; } MayMatch(range, no_io, nullptr, lookup_context); } void FullFilterBlockReader::PrefixesMayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) { #ifdef NDEBUG (void)block_offset; #endif assert(block_offset == kNotValid); MayMatch(range, no_io, prefix_extractor, lookup_context); } void FullFilterBlockReader::MayMatch( MultiGetRange* range, bool no_io, const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const { CachableEntry filter_block; const Status s = GetOrReadFilterBlock(no_io, range->begin()->get_context, lookup_context, &filter_block); if (!s.ok()) { IGNORE_STATUS_IF_ERROR(s); return; } assert(filter_block.GetValue()); FilterBitsReader* const filter_bits_reader = filter_block.GetValue()->filter_bits_reader(); if (!filter_bits_reader) { return; } // We need to use an array instead of autovector for may_match since // &may_match[0] doesn't work for autovector (compiler error). So // declare both keys and may_match as arrays, which is also slightly less // expensive compared to autovector std::array keys; std::array may_match = {{true}}; autovector prefixes; int num_keys = 0; MultiGetRange filter_range(*range, range->begin(), range->end()); for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) { if (!prefix_extractor) { keys[num_keys++] = &iter->ukey; } else if (prefix_extractor->InDomain(iter->ukey)) { prefixes.emplace_back(prefix_extractor->Transform(iter->ukey)); keys[num_keys++] = &prefixes.back(); } else { filter_range.SkipKey(iter); } } filter_bits_reader->MayMatch(num_keys, &keys[0], &may_match[0]); int i = 0; for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) { if (!may_match[i]) { // Update original MultiGet range to skip this key. The filter_range // was temporarily used just to skip keys not in prefix_extractor domain range->SkipKey(iter); PERF_COUNTER_ADD(bloom_sst_miss_count, 1); } else { // PERF_COUNTER_ADD(bloom_sst_hit_count, 1); PerfContext* perf_ctx = get_perf_context(); perf_ctx->bloom_sst_hit_count++; } ++i; } } size_t FullFilterBlockReader::ApproximateMemoryUsage() const { size_t usage = ApproximateFilterBlockMemoryUsage(); #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size(const_cast(this)); #else usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE return usage; } bool FullFilterBlockReader::RangeMayExist( const Slice* iterate_upper_bound, const Slice& user_key, const SliceTransform* prefix_extractor, const Comparator* comparator, const Slice* const const_ikey_ptr, bool* filter_checked, bool need_upper_bound_check, bool no_io, BlockCacheLookupContext* lookup_context) { if (!prefix_extractor || !prefix_extractor->InDomain(user_key)) { *filter_checked = false; return true; } Slice prefix = prefix_extractor->Transform(user_key); if (need_upper_bound_check && !IsFilterCompatible(iterate_upper_bound, prefix, comparator)) { *filter_checked = false; return true; } else { *filter_checked = true; return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io, const_ikey_ptr, /* get_context */ nullptr, lookup_context); } } bool FullFilterBlockReader::IsFilterCompatible( const Slice* iterate_upper_bound, const Slice& prefix, const Comparator* comparator) const { // Try to reuse the bloom filter in the SST table if prefix_extractor in // mutable_cf_options has changed. If range [user_key, upper_bound) all // share the same prefix then we may still be able to use the bloom filter. const SliceTransform* const prefix_extractor = table_prefix_extractor(); if (iterate_upper_bound != nullptr && prefix_extractor) { if (!prefix_extractor->InDomain(*iterate_upper_bound)) { return false; } Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound); // first check if user_key and upper_bound all share the same prefix if (!comparator->Equal(prefix, upper_bound_xform)) { // second check if user_key's prefix is the immediate predecessor of // upper_bound and have the same length. If so, we know for sure all // keys in the range [user_key, upper_bound) share the same prefix. // Also need to make sure upper_bound are full length to ensure // correctness if (!full_length_enabled_ || iterate_upper_bound->size() != prefix_extractor_full_length_ || !comparator->IsSameLengthImmediateSuccessor(prefix, *iterate_upper_bound)) { return false; } } return true; } else { return false; } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/full_filter_block.h000066400000000000000000000130371370372246700224220ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include "db/dbformat.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/block_based/filter_block_reader_common.h" #include "table/block_based/parsed_full_filter_block.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { class FilterPolicy; class FilterBitsBuilder; class FilterBitsReader; // A FullFilterBlockBuilder is used to construct a full filter for a // particular Table. It generates a single string which is stored as // a special block in the Table. // The format of full filter block is: // +----------------------------------------------------------------+ // | full filter for all keys in sst file | // +----------------------------------------------------------------+ // The full filter can be very large. At the end of it, we put // num_probes: how many hash functions are used in bloom filter // class FullFilterBlockBuilder : public FilterBlockBuilder { public: explicit FullFilterBlockBuilder(const SliceTransform* prefix_extractor, bool whole_key_filtering, FilterBitsBuilder* filter_bits_builder); // No copying allowed FullFilterBlockBuilder(const FullFilterBlockBuilder&) = delete; void operator=(const FullFilterBlockBuilder&) = delete; // bits_builder is created in filter_policy, it should be passed in here // directly. and be deleted here ~FullFilterBlockBuilder() {} virtual bool IsBlockBased() override { return false; } virtual void StartBlock(uint64_t /*block_offset*/) override {} virtual void Add(const Slice& key) override; virtual size_t NumAdded() const override { return num_added_; } virtual Slice Finish(const BlockHandle& tmp, Status* status) override; using FilterBlockBuilder::Finish; protected: virtual void AddKey(const Slice& key); std::unique_ptr filter_bits_builder_; virtual void Reset(); void AddPrefix(const Slice& key); const SliceTransform* prefix_extractor() { return prefix_extractor_; } private: // important: all of these might point to invalid addresses // at the time of destruction of this filter block. destructor // should NOT dereference them. const SliceTransform* prefix_extractor_; bool whole_key_filtering_; bool last_whole_key_recorded_; std::string last_whole_key_str_; bool last_prefix_recorded_; std::string last_prefix_str_; uint32_t num_added_; std::unique_ptr filter_data_; }; // A FilterBlockReader is used to parse filter from SST table. // KeyMayMatch and PrefixMayMatch would trigger filter checking class FullFilterBlockReader : public FilterBlockReaderCommon { public: FullFilterBlockReader(const BlockBasedTable* t, CachableEntry&& filter_block); static std::unique_ptr Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); bool IsBlockBased() override { return false; } bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; bool PrefixMayMatch(const Slice& prefix, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; void KeysMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) override; void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key, const SliceTransform* prefix_extractor, const Comparator* comparator, const Slice* const const_ikey_ptr, bool* filter_checked, bool need_upper_bound_check, bool no_io, BlockCacheLookupContext* lookup_context) override; private: bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context) const; void MayMatch(MultiGetRange* range, bool no_io, const SliceTransform* prefix_extractor, BlockCacheLookupContext* lookup_context) const; bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix, const Comparator* comparator) const; private: bool full_length_enabled_; size_t prefix_extractor_full_length_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/full_filter_block_test.cc000066400000000000000000000316461370372246700236250ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "table/block_based/full_filter_block.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/mock_block_based_table.h" #include "table/block_based/filter_policy_internal.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/hash.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { class TestFilterBitsBuilder : public FilterBitsBuilder { public: explicit TestFilterBitsBuilder() {} // Add Key to filter void AddKey(const Slice& key) override { hash_entries_.push_back(Hash(key.data(), key.size(), 1)); } // Generate the filter using the keys that are added Slice Finish(std::unique_ptr* buf) override { uint32_t len = static_cast(hash_entries_.size()) * 4; char* data = new char[len]; for (size_t i = 0; i < hash_entries_.size(); i++) { EncodeFixed32(data + i * 4, hash_entries_[i]); } const char* const_data = data; buf->reset(const_data); return Slice(data, len); } private: std::vector hash_entries_; }; class MockBlockBasedTable : public BlockBasedTable { public: explicit MockBlockBasedTable(Rep* rep) : BlockBasedTable(rep, nullptr /* block_cache_tracer */) {} }; class TestFilterBitsReader : public FilterBitsReader { public: explicit TestFilterBitsReader(const Slice& contents) : data_(contents.data()), len_(static_cast(contents.size())) {} // Silence compiler warning about overloaded virtual using FilterBitsReader::MayMatch; bool MayMatch(const Slice& entry) override { uint32_t h = Hash(entry.data(), entry.size(), 1); for (size_t i = 0; i + 4 <= len_; i += 4) { if (h == DecodeFixed32(data_ + i)) { return true; } } return false; } private: const char* data_; uint32_t len_; }; class TestHashFilter : public FilterPolicy { public: const char* Name() const override { return "TestHashFilter"; } void CreateFilter(const Slice* keys, int n, std::string* dst) const override { for (int i = 0; i < n; i++) { uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); PutFixed32(dst, h); } } bool KeyMayMatch(const Slice& key, const Slice& filter) const override { uint32_t h = Hash(key.data(), key.size(), 1); for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { if (h == DecodeFixed32(filter.data() + i)) { return true; } } return false; } FilterBitsBuilder* GetFilterBitsBuilder() const override { return new TestFilterBitsBuilder(); } FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override { return new TestFilterBitsReader(contents); } }; class PluginFullFilterBlockTest : public mock::MockBlockBasedTableTester, public testing::Test { public: PluginFullFilterBlockTest() : mock::MockBlockBasedTableTester(new TestHashFilter) {} }; TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) { FullFilterBlockBuilder builder(nullptr, true, GetBuilder()); Slice slice = builder.Finish(); ASSERT_EQ("", EscapeString(slice)); CachableEntry block( new ParsedFullFilterBlock(table_options_.filter_policy.get(), BlockContents(slice)), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); // Remain same symantic with blockbased filter ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) { FullFilterBlockBuilder builder(nullptr, true, GetBuilder()); builder.Add("foo"); builder.Add("bar"); builder.Add("box"); builder.Add("box"); builder.Add("hello"); Slice slice = builder.Finish(); CachableEntry block( new ParsedFullFilterBlock(table_options_.filter_policy.get(), BlockContents(slice)), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } class FullFilterBlockTest : public mock::MockBlockBasedTableTester, public testing::Test { public: FullFilterBlockTest() : mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, false)) {} }; TEST_F(FullFilterBlockTest, EmptyBuilder) { FullFilterBlockBuilder builder(nullptr, true, GetBuilder()); Slice slice = builder.Finish(); ASSERT_EQ("", EscapeString(slice)); CachableEntry block( new ParsedFullFilterBlock(table_options_.filter_policy.get(), BlockContents(slice)), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); // Remain same symantic with blockbased filter ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder { std::unique_ptr b_; std::set uniq_; public: explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {} ~CountUniqueFilterBitsBuilderWrapper() override {} void AddKey(const Slice& key) override { b_->AddKey(key); uniq_.insert(key.ToString()); } Slice Finish(std::unique_ptr* buf) override { Slice rv = b_->Finish(buf); uniq_.clear(); return rv; } int CalculateNumEntry(const uint32_t bytes) override { return b_->CalculateNumEntry(bytes); } size_t CountUnique() { return uniq_.size(); } }; TEST_F(FullFilterBlockTest, DuplicateEntries) { { // empty prefixes std::unique_ptr prefix_extractor( NewFixedPrefixTransform(0)); auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(GetBuilder()); const bool WHOLE_KEY = true; FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, bits_builder); ASSERT_EQ(0, builder.NumAdded()); ASSERT_EQ(0, bits_builder->CountUnique()); // adds key and empty prefix; both abstractions count them builder.Add("key1"); ASSERT_EQ(2, builder.NumAdded()); ASSERT_EQ(2, bits_builder->CountUnique()); // Add different key (unique) and also empty prefix (not unique). // From here in this test, it's immaterial whether the block builder // can count unique keys. builder.Add("key2"); ASSERT_EQ(3, bits_builder->CountUnique()); // Empty key -> nothing unique builder.Add(""); ASSERT_EQ(3, bits_builder->CountUnique()); } // mix of empty and non-empty std::unique_ptr prefix_extractor( NewFixedPrefixTransform(7)); auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(GetBuilder()); const bool WHOLE_KEY = true; FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, bits_builder); ASSERT_EQ(0, builder.NumAdded()); builder.Add(""); // test with empty key too builder.Add("prefix1key1"); builder.Add("prefix1key1"); builder.Add("prefix1key2"); builder.Add("prefix1key3"); builder.Add("prefix2key4"); // 1 empty, 2 non-empty prefixes, and 4 non-empty keys ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique()); } TEST_F(FullFilterBlockTest, SingleChunk) { FullFilterBlockBuilder builder(nullptr, true, GetBuilder()); ASSERT_EQ(0, builder.NumAdded()); builder.Add("foo"); builder.Add("bar"); builder.Add("box"); builder.Add("box"); builder.Add("hello"); ASSERT_EQ(5, builder.NumAdded()); Slice slice = builder.Finish(); CachableEntry block( new ParsedFullFilterBlock(table_options_.filter_policy.get(), BlockContents(slice)), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); FullFilterBlockReader reader(table_.get(), std::move(block)); ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); ASSERT_TRUE(!reader.KeyMayMatch( "other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid, /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/block_based/hash_index_reader.cc000066400000000000000000000124751370372246700225400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/hash_index_reader.h" #include "table/block_fetcher.h" #include "table/meta_blocks.h" namespace ROCKSDB_NAMESPACE { Status HashIndexReader::Create(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_index_iter, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader) { assert(table != nullptr); assert(index_reader != nullptr); assert(!pin || prefetch); const BlockBasedTable::Rep* rep = table->get_rep(); assert(rep != nullptr); CachableEntry index_block; if (prefetch || !use_cache) { const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; } if (use_cache && !pin) { index_block.Reset(); } } // Note, failure to create prefix hash index does not need to be a // hard error. We can still fall back to the original binary search index. // So, Create will succeed regardless, from this point on. index_reader->reset(new HashIndexReader(table, std::move(index_block))); // Get prefixes block BlockHandle prefixes_handle; Status s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesBlock, &prefixes_handle); if (!s.ok()) { // TODO: log error return Status::OK(); } // Get index metadata block BlockHandle prefixes_meta_handle; s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesMetadataBlock, &prefixes_meta_handle); if (!s.ok()) { // TODO: log error return Status::OK(); } RandomAccessFileReader* const file = rep->file.get(); const Footer& footer = rep->footer; const ImmutableCFOptions& ioptions = rep->ioptions; const PersistentCacheOptions& cache_options = rep->persistent_cache_options; MemoryAllocator* const memory_allocator = GetMemoryAllocator(rep->table_options); // Read contents for the blocks BlockContents prefixes_contents; BlockFetcher prefixes_block_fetcher( file, prefetch_buffer, footer, ReadOptions(), prefixes_handle, &prefixes_contents, ioptions, true /*decompress*/, true /*maybe_compressed*/, BlockType::kHashIndexPrefixes, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = prefixes_block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; } BlockContents prefixes_meta_contents; BlockFetcher prefixes_meta_block_fetcher( file, prefetch_buffer, footer, ReadOptions(), prefixes_meta_handle, &prefixes_meta_contents, ioptions, true /*decompress*/, true /*maybe_compressed*/, BlockType::kHashIndexMetadata, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = prefixes_meta_block_fetcher.ReadBlockContents(); if (!s.ok()) { // TODO: log error return Status::OK(); } BlockPrefixIndex* prefix_index = nullptr; assert(rep->internal_prefix_transform.get() != nullptr); s = BlockPrefixIndex::Create(rep->internal_prefix_transform.get(), prefixes_contents.data, prefixes_meta_contents.data, &prefix_index); // TODO: log error if (s.ok()) { HashIndexReader* const hash_index_reader = static_cast(index_reader->get()); hash_index_reader->prefix_index_.reset(prefix_index); } return Status::OK(); } InternalIteratorBase* HashIndexReader::NewIterator( const ReadOptions& read_options, bool disable_prefix_seek, IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) { const BlockBasedTable::Rep* rep = table()->get_rep(); const bool no_io = (read_options.read_tier == kBlockCacheTier); CachableEntry index_block; const Status s = GetOrReadIndexBlock(no_io, get_context, lookup_context, &index_block); if (!s.ok()) { if (iter != nullptr) { iter->Invalidate(s); return iter; } return NewErrorInternalIterator(s); } Statistics* kNullStats = nullptr; const bool total_order_seek = read_options.total_order_seek || disable_prefix_seek; // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. auto it = index_block.GetValue()->NewIndexIterator( internal_comparator(), internal_comparator()->user_comparator(), rep->get_global_seqno(BlockType::kIndex), iter, kNullStats, total_order_seek, index_has_first_key(), index_key_includes_seq(), index_value_is_full(), false /* block_contents_pinned */, prefix_index_.get()); assert(it != nullptr); index_block.TransferTo(it); return it; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/hash_index_reader.h000066400000000000000000000036521370372246700223770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/index_reader_common.h" namespace ROCKSDB_NAMESPACE { // Index that leverages an internal hash table to quicken the lookup for a given // key. class HashIndexReader : public BlockBasedTable::IndexReaderCommon { public: static Status Create(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, InternalIterator* meta_index_iter, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader); InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool disable_prefix_seek, IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override { size_t usage = ApproximateIndexBlockMemoryUsage(); #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size(const_cast(this)); #else if (prefix_index_) { usage += prefix_index_->ApproximateMemoryUsage(); } usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE return usage; } private: HashIndexReader(const BlockBasedTable* t, CachableEntry&& index_block) : IndexReaderCommon(t, std::move(index_block)) {} std::unique_ptr prefix_index_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/index_builder.cc000066400000000000000000000244431370372246700217170ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/index_builder.h" #include #include #include #include #include "rocksdb/comparator.h" #include "rocksdb/flush_block_policy.h" #include "table/block_based/partitioned_filter_block.h" #include "table/format.h" // Without anonymous namespace here, we fail the warning -Wmissing-prototypes namespace ROCKSDB_NAMESPACE { // using namespace rocksdb; // Create a index builder based on its type. IndexBuilder* IndexBuilder::CreateIndexBuilder( BlockBasedTableOptions::IndexType index_type, const InternalKeyComparator* comparator, const InternalKeySliceTransform* int_key_slice_transform, const bool use_value_delta_encoding, const BlockBasedTableOptions& table_opt) { IndexBuilder* result = nullptr; switch (index_type) { case BlockBasedTableOptions::kBinarySearch: { result = new ShortenedIndexBuilder( comparator, table_opt.index_block_restart_interval, table_opt.format_version, use_value_delta_encoding, table_opt.index_shortening, /* include_first_key */ false); } break; case BlockBasedTableOptions::kHashSearch: { // Currently kHashSearch is incompatible with index_block_restart_interval // > 1 assert(table_opt.index_block_restart_interval == 1); result = new HashIndexBuilder( comparator, int_key_slice_transform, table_opt.index_block_restart_interval, table_opt.format_version, use_value_delta_encoding, table_opt.index_shortening); } break; case BlockBasedTableOptions::kTwoLevelIndexSearch: { result = PartitionedIndexBuilder::CreateIndexBuilder( comparator, use_value_delta_encoding, table_opt); } break; case BlockBasedTableOptions::kBinarySearchWithFirstKey: { result = new ShortenedIndexBuilder( comparator, table_opt.index_block_restart_interval, table_opt.format_version, use_value_delta_encoding, table_opt.index_shortening, /* include_first_key */ true); } break; default: { assert(!"Do not recognize the index type "); } break; } return result; } PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder( const InternalKeyComparator* comparator, const bool use_value_delta_encoding, const BlockBasedTableOptions& table_opt) { return new PartitionedIndexBuilder(comparator, table_opt, use_value_delta_encoding); } PartitionedIndexBuilder::PartitionedIndexBuilder( const InternalKeyComparator* comparator, const BlockBasedTableOptions& table_opt, const bool use_value_delta_encoding) : IndexBuilder(comparator), index_block_builder_(table_opt.index_block_restart_interval, true /*use_delta_encoding*/, use_value_delta_encoding), index_block_builder_without_seq_(table_opt.index_block_restart_interval, true /*use_delta_encoding*/, use_value_delta_encoding), sub_index_builder_(nullptr), table_opt_(table_opt), // We start by false. After each partition we revise the value based on // what the sub_index_builder has decided. If the feature is disabled // entirely, this will be set to true after switching the first // sub_index_builder. Otherwise, it could be set to true even one of the // sub_index_builders could not safely exclude seq from the keys, then it // wil be enforced on all sub_index_builders on ::Finish. seperator_is_key_plus_seq_(false), use_value_delta_encoding_(use_value_delta_encoding) {} PartitionedIndexBuilder::~PartitionedIndexBuilder() { delete sub_index_builder_; } void PartitionedIndexBuilder::MakeNewSubIndexBuilder() { assert(sub_index_builder_ == nullptr); sub_index_builder_ = new ShortenedIndexBuilder( comparator_, table_opt_.index_block_restart_interval, table_opt_.format_version, use_value_delta_encoding_, table_opt_.index_shortening, /* include_first_key */ false); // Set sub_index_builder_->seperator_is_key_plus_seq_ to true if // seperator_is_key_plus_seq_ is true (internal-key mode) (set to false by // default on Creation) so that flush policy can point to // sub_index_builder_->index_block_builder_ if (seperator_is_key_plus_seq_) { sub_index_builder_->seperator_is_key_plus_seq_ = true; } flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( table_opt_.metadata_block_size, table_opt_.block_size_deviation, // Note: this is sub-optimal since sub_index_builder_ could later reset // seperator_is_key_plus_seq_ but the probability of that is low. sub_index_builder_->seperator_is_key_plus_seq_ ? sub_index_builder_->index_block_builder_ : sub_index_builder_->index_block_builder_without_seq_)); partition_cut_requested_ = false; } void PartitionedIndexBuilder::RequestPartitionCut() { partition_cut_requested_ = true; } void PartitionedIndexBuilder::AddIndexEntry( std::string* last_key_in_current_block, const Slice* first_key_in_next_block, const BlockHandle& block_handle) { // Note: to avoid two consecuitive flush in the same method call, we do not // check flush policy when adding the last key if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys if (sub_index_builder_ == nullptr) { MakeNewSubIndexBuilder(); } sub_index_builder_->AddIndexEntry(last_key_in_current_block, first_key_in_next_block, block_handle); if (!seperator_is_key_plus_seq_ && sub_index_builder_->seperator_is_key_plus_seq_) { // then we need to apply it to all sub-index builders and reset // flush_policy to point to Block Builder of sub_index_builder_ that store // internal keys. seperator_is_key_plus_seq_ = true; flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( table_opt_.metadata_block_size, table_opt_.block_size_deviation, sub_index_builder_->index_block_builder_)); } sub_index_last_key_ = std::string(*last_key_in_current_block); entries_.push_back( {sub_index_last_key_, std::unique_ptr(sub_index_builder_)}); sub_index_builder_ = nullptr; cut_filter_block = true; } else { // apply flush policy only to non-empty sub_index_builder_ if (sub_index_builder_ != nullptr) { std::string handle_encoding; block_handle.EncodeTo(&handle_encoding); bool do_flush = partition_cut_requested_ || flush_policy_->Update(*last_key_in_current_block, handle_encoding); if (do_flush) { entries_.push_back( {sub_index_last_key_, std::unique_ptr(sub_index_builder_)}); cut_filter_block = true; sub_index_builder_ = nullptr; } } if (sub_index_builder_ == nullptr) { MakeNewSubIndexBuilder(); } sub_index_builder_->AddIndexEntry(last_key_in_current_block, first_key_in_next_block, block_handle); sub_index_last_key_ = std::string(*last_key_in_current_block); if (!seperator_is_key_plus_seq_ && sub_index_builder_->seperator_is_key_plus_seq_) { // then we need to apply it to all sub-index builders and reset // flush_policy to point to Block Builder of sub_index_builder_ that store // internal keys. seperator_is_key_plus_seq_ = true; flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( table_opt_.metadata_block_size, table_opt_.block_size_deviation, sub_index_builder_->index_block_builder_)); } } } Status PartitionedIndexBuilder::Finish( IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) { if (partition_cnt_ == 0) { partition_cnt_ = entries_.size(); } // It must be set to null after last key is added assert(sub_index_builder_ == nullptr); if (finishing_indexes == true) { Entry& last_entry = entries_.front(); std::string handle_encoding; last_partition_block_handle.EncodeTo(&handle_encoding); std::string handle_delta_encoding; PutVarsignedint64( &handle_delta_encoding, last_partition_block_handle.size() - last_encoded_handle_.size()); last_encoded_handle_ = last_partition_block_handle; const Slice handle_delta_encoding_slice(handle_delta_encoding); index_block_builder_.Add(last_entry.key, handle_encoding, &handle_delta_encoding_slice); if (!seperator_is_key_plus_seq_) { index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key), handle_encoding, &handle_delta_encoding_slice); } entries_.pop_front(); } // If there is no sub_index left, then return the 2nd level index. if (UNLIKELY(entries_.empty())) { if (seperator_is_key_plus_seq_) { index_blocks->index_block_contents = index_block_builder_.Finish(); } else { index_blocks->index_block_contents = index_block_builder_without_seq_.Finish(); } top_level_index_size_ = index_blocks->index_block_contents.size(); index_size_ += top_level_index_size_; return Status::OK(); } else { // Finish the next partition index in line and Incomplete() to indicate we // expect more calls to Finish Entry& entry = entries_.front(); // Apply the policy to all sub-indexes entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_; auto s = entry.value->Finish(index_blocks); index_size_ += index_blocks->index_block_contents.size(); finishing_indexes = true; return s.ok() ? Status::Incomplete() : s; } } size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/index_builder.h000066400000000000000000000440731370372246700215620ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include "rocksdb/comparator.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_builder.h" #include "table/format.h" namespace ROCKSDB_NAMESPACE { // The interface for building index. // Instruction for adding a new concrete IndexBuilder: // 1. Create a subclass instantiated from IndexBuilder. // 2. Add a new entry associated with that subclass in TableOptions::IndexType. // 3. Add a create function for the new subclass in CreateIndexBuilder. // Note: we can devise more advanced design to simplify the process for adding // new subclass, which will, on the other hand, increase the code complexity and // catch unwanted attention from readers. Given that we won't add/change // indexes frequently, it makes sense to just embrace a more straightforward // design that just works. class IndexBuilder { public: static IndexBuilder* CreateIndexBuilder( BlockBasedTableOptions::IndexType index_type, const ROCKSDB_NAMESPACE::InternalKeyComparator* comparator, const InternalKeySliceTransform* int_key_slice_transform, const bool use_value_delta_encoding, const BlockBasedTableOptions& table_opt); // Index builder will construct a set of blocks which contain: // 1. One primary index block. // 2. (Optional) a set of metablocks that contains the metadata of the // primary index. struct IndexBlocks { Slice index_block_contents; std::unordered_map meta_blocks; }; explicit IndexBuilder(const InternalKeyComparator* comparator) : comparator_(comparator) {} virtual ~IndexBuilder() {} // Add a new index entry to index block. // To allow further optimization, we provide `last_key_in_current_block` and // `first_key_in_next_block`, based on which the specific implementation can // determine the best index key to be used for the index block. // Called before the OnKeyAdded() call for first_key_in_next_block. // @last_key_in_current_block: this parameter maybe overridden with the value // "substitute key". // @first_key_in_next_block: it will be nullptr if the entry being added is // the last one in the table // // REQUIRES: Finish() has not yet been called. virtual void AddIndexEntry(std::string* last_key_in_current_block, const Slice* first_key_in_next_block, const BlockHandle& block_handle) = 0; // This method will be called whenever a key is added. The subclasses may // override OnKeyAdded() if they need to collect additional information. virtual void OnKeyAdded(const Slice& /*key*/) {} // Inform the index builder that all entries has been written. Block builder // may therefore perform any operation required for block finalization. // // REQUIRES: Finish() has not yet been called. inline Status Finish(IndexBlocks* index_blocks) { // Throw away the changes to last_partition_block_handle. It has no effect // on the first call to Finish anyway. BlockHandle last_partition_block_handle; return Finish(index_blocks, last_partition_block_handle); } // This override of Finish can be utilized to build the 2nd level index in // PartitionIndexBuilder. // // index_blocks will be filled with the resulting index data. If the return // value is Status::InComplete() then it means that the index is partitioned // and the callee should keep calling Finish until Status::OK() is returned. // In that case, last_partition_block_handle is pointer to the block written // with the result of the last call to Finish. This can be utilized to build // the second level index pointing to each block of partitioned indexes. The // last call to Finish() that returns Status::OK() populates index_blocks with // the 2nd level index content. virtual Status Finish(IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) = 0; // Get the size for index block. Must be called after ::Finish. virtual size_t IndexSize() const = 0; virtual bool seperator_is_key_plus_seq() { return true; } protected: const InternalKeyComparator* comparator_; // Set after ::Finish is called size_t index_size_ = 0; }; // This index builder builds space-efficient index block. // // Optimizations: // 1. Made block's `block_restart_interval` to be 1, which will avoid linear // search when doing index lookup (can be disabled by setting // index_block_restart_interval). // 2. Shorten the key length for index block. Other than honestly using the // last key in the data block as the index key, we instead find a shortest // substitute key that serves the same function. class ShortenedIndexBuilder : public IndexBuilder { public: explicit ShortenedIndexBuilder( const InternalKeyComparator* comparator, const int index_block_restart_interval, const uint32_t format_version, const bool use_value_delta_encoding, BlockBasedTableOptions::IndexShorteningMode shortening_mode, bool include_first_key) : IndexBuilder(comparator), index_block_builder_(index_block_restart_interval, true /*use_delta_encoding*/, use_value_delta_encoding), index_block_builder_without_seq_(index_block_restart_interval, true /*use_delta_encoding*/, use_value_delta_encoding), use_value_delta_encoding_(use_value_delta_encoding), include_first_key_(include_first_key), shortening_mode_(shortening_mode) { // Making the default true will disable the feature for old versions seperator_is_key_plus_seq_ = (format_version <= 2); } virtual void OnKeyAdded(const Slice& key) override { if (include_first_key_ && current_block_first_internal_key_.empty()) { current_block_first_internal_key_.assign(key.data(), key.size()); } } virtual void AddIndexEntry(std::string* last_key_in_current_block, const Slice* first_key_in_next_block, const BlockHandle& block_handle) override { if (first_key_in_next_block != nullptr) { if (shortening_mode_ != BlockBasedTableOptions::IndexShorteningMode::kNoShortening) { comparator_->FindShortestSeparator(last_key_in_current_block, *first_key_in_next_block); } if (!seperator_is_key_plus_seq_ && comparator_->user_comparator()->Compare( ExtractUserKey(*last_key_in_current_block), ExtractUserKey(*first_key_in_next_block)) == 0) { seperator_is_key_plus_seq_ = true; } } else { if (shortening_mode_ == BlockBasedTableOptions::IndexShorteningMode:: kShortenSeparatorsAndSuccessor) { comparator_->FindShortSuccessor(last_key_in_current_block); } } auto sep = Slice(*last_key_in_current_block); assert(!include_first_key_ || !current_block_first_internal_key_.empty()); IndexValue entry(block_handle, current_block_first_internal_key_); std::string encoded_entry; std::string delta_encoded_entry; entry.EncodeTo(&encoded_entry, include_first_key_, nullptr); if (use_value_delta_encoding_ && !last_encoded_handle_.IsNull()) { entry.EncodeTo(&delta_encoded_entry, include_first_key_, &last_encoded_handle_); } else { // If it's the first block, or delta encoding is disabled, // BlockBuilder::Add() below won't use delta-encoded slice. } last_encoded_handle_ = block_handle; const Slice delta_encoded_entry_slice(delta_encoded_entry); index_block_builder_.Add(sep, encoded_entry, &delta_encoded_entry_slice); if (!seperator_is_key_plus_seq_) { index_block_builder_without_seq_.Add(ExtractUserKey(sep), encoded_entry, &delta_encoded_entry_slice); } current_block_first_internal_key_.clear(); } using IndexBuilder::Finish; virtual Status Finish( IndexBlocks* index_blocks, const BlockHandle& /*last_partition_block_handle*/) override { if (seperator_is_key_plus_seq_) { index_blocks->index_block_contents = index_block_builder_.Finish(); } else { index_blocks->index_block_contents = index_block_builder_without_seq_.Finish(); } index_size_ = index_blocks->index_block_contents.size(); return Status::OK(); } virtual size_t IndexSize() const override { return index_size_; } virtual bool seperator_is_key_plus_seq() override { return seperator_is_key_plus_seq_; } friend class PartitionedIndexBuilder; private: BlockBuilder index_block_builder_; BlockBuilder index_block_builder_without_seq_; const bool use_value_delta_encoding_; bool seperator_is_key_plus_seq_; const bool include_first_key_; BlockBasedTableOptions::IndexShorteningMode shortening_mode_; BlockHandle last_encoded_handle_ = BlockHandle::NullBlockHandle(); std::string current_block_first_internal_key_; }; // HashIndexBuilder contains a binary-searchable primary index and the // metadata for secondary hash index construction. // The metadata for hash index consists two parts: // - a metablock that compactly contains a sequence of prefixes. All prefixes // are stored consectively without any metadata (like, prefix sizes) being // stored, which is kept in the other metablock. // - a metablock contains the metadata of the prefixes, including prefix size, // restart index and number of block it spans. The format looks like: // // +-----------------+---------------------------+---------------------+ // <=prefix 1 // | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes | // +-----------------+---------------------------+---------------------+ // <=prefix 2 // | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes | // +-----------------+---------------------------+---------------------+ // | | // | .... | // | | // +-----------------+---------------------------+---------------------+ // <=prefix n // | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes | // +-----------------+---------------------------+---------------------+ // // The reason of separating these two metablocks is to enable the efficiently // reuse the first metablock during hash index construction without unnecessary // data copy or small heap allocations for prefixes. class HashIndexBuilder : public IndexBuilder { public: explicit HashIndexBuilder( const InternalKeyComparator* comparator, const SliceTransform* hash_key_extractor, int index_block_restart_interval, int format_version, bool use_value_delta_encoding, BlockBasedTableOptions::IndexShorteningMode shortening_mode) : IndexBuilder(comparator), primary_index_builder_(comparator, index_block_restart_interval, format_version, use_value_delta_encoding, shortening_mode, /* include_first_key */ false), hash_key_extractor_(hash_key_extractor) {} virtual void AddIndexEntry(std::string* last_key_in_current_block, const Slice* first_key_in_next_block, const BlockHandle& block_handle) override { ++current_restart_index_; primary_index_builder_.AddIndexEntry(last_key_in_current_block, first_key_in_next_block, block_handle); } virtual void OnKeyAdded(const Slice& key) override { auto key_prefix = hash_key_extractor_->Transform(key); bool is_first_entry = pending_block_num_ == 0; // Keys may share the prefix if (is_first_entry || pending_entry_prefix_ != key_prefix) { if (!is_first_entry) { FlushPendingPrefix(); } // need a hard copy otherwise the underlying data changes all the time. // TODO(kailiu) ToString() is expensive. We may speed up can avoid data // copy. pending_entry_prefix_ = key_prefix.ToString(); pending_block_num_ = 1; pending_entry_index_ = static_cast(current_restart_index_); } else { // entry number increments when keys share the prefix reside in // different data blocks. auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1; assert(last_restart_index <= current_restart_index_); if (last_restart_index != current_restart_index_) { ++pending_block_num_; } } } virtual Status Finish( IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) override { if (pending_block_num_ != 0) { FlushPendingPrefix(); } primary_index_builder_.Finish(index_blocks, last_partition_block_handle); index_blocks->meta_blocks.insert( {kHashIndexPrefixesBlock.c_str(), prefix_block_}); index_blocks->meta_blocks.insert( {kHashIndexPrefixesMetadataBlock.c_str(), prefix_meta_block_}); return Status::OK(); } virtual size_t IndexSize() const override { return primary_index_builder_.IndexSize() + prefix_block_.size() + prefix_meta_block_.size(); } virtual bool seperator_is_key_plus_seq() override { return primary_index_builder_.seperator_is_key_plus_seq(); } private: void FlushPendingPrefix() { prefix_block_.append(pending_entry_prefix_.data(), pending_entry_prefix_.size()); PutVarint32Varint32Varint32( &prefix_meta_block_, static_cast(pending_entry_prefix_.size()), pending_entry_index_, pending_block_num_); } ShortenedIndexBuilder primary_index_builder_; const SliceTransform* hash_key_extractor_; // stores a sequence of prefixes std::string prefix_block_; // stores the metadata of prefixes std::string prefix_meta_block_; // The following 3 variables keeps unflushed prefix and its metadata. // The details of block_num and entry_index can be found in // "block_hash_index.{h,cc}" uint32_t pending_block_num_ = 0; uint32_t pending_entry_index_ = 0; std::string pending_entry_prefix_; uint64_t current_restart_index_ = 0; }; /** * IndexBuilder for two-level indexing. Internally it creates a new index for * each partition and Finish then in order when Finish is called on it * continiously until Status::OK() is returned. * * The format on the disk would be I I I I I I IP where I is block containing a * partition of indexes built using ShortenedIndexBuilder and IP is a block * containing a secondary index on the partitions, built using * ShortenedIndexBuilder. */ class PartitionedIndexBuilder : public IndexBuilder { public: static PartitionedIndexBuilder* CreateIndexBuilder( const ROCKSDB_NAMESPACE::InternalKeyComparator* comparator, const bool use_value_delta_encoding, const BlockBasedTableOptions& table_opt); explicit PartitionedIndexBuilder(const InternalKeyComparator* comparator, const BlockBasedTableOptions& table_opt, const bool use_value_delta_encoding); virtual ~PartitionedIndexBuilder(); virtual void AddIndexEntry(std::string* last_key_in_current_block, const Slice* first_key_in_next_block, const BlockHandle& block_handle) override; virtual Status Finish( IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) override; virtual size_t IndexSize() const override { return index_size_; } size_t TopLevelIndexSize(uint64_t) const { return top_level_index_size_; } size_t NumPartitions() const; inline bool ShouldCutFilterBlock() { // Current policy is to align the partitions of index and filters if (cut_filter_block) { cut_filter_block = false; return true; } return false; } std::string& GetPartitionKey() { return sub_index_last_key_; } // Called when an external entity (such as filter partition builder) request // cutting the next partition void RequestPartitionCut(); virtual bool seperator_is_key_plus_seq() override { return seperator_is_key_plus_seq_; } bool get_use_value_delta_encoding() { return use_value_delta_encoding_; } private: // Set after ::Finish is called size_t top_level_index_size_ = 0; // Set after ::Finish is called size_t partition_cnt_ = 0; void MakeNewSubIndexBuilder(); struct Entry { std::string key; std::unique_ptr value; }; std::list entries_; // list of partitioned indexes and their keys BlockBuilder index_block_builder_; // top-level index builder BlockBuilder index_block_builder_without_seq_; // same for user keys // the active partition index builder ShortenedIndexBuilder* sub_index_builder_; // the last key in the active partition index builder std::string sub_index_last_key_; std::unique_ptr flush_policy_; // true if Finish is called once but not complete yet. bool finishing_indexes = false; const BlockBasedTableOptions& table_opt_; bool seperator_is_key_plus_seq_; bool use_value_delta_encoding_; // true if an external entity (such as filter partition builder) request // cutting the next partition bool partition_cut_requested_ = true; // true if it should cut the next filter partition block bool cut_filter_block = false; BlockHandle last_encoded_handle_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/index_reader_common.cc000066400000000000000000000037331370372246700231020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/index_reader_common.h" namespace ROCKSDB_NAMESPACE { Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block) { PERF_TIMER_GUARD(read_index_block_nanos); assert(table != nullptr); assert(index_block != nullptr); assert(index_block->IsEmpty()); const Rep* const rep = table->get_rep(); assert(rep != nullptr); const Status s = table->RetrieveBlock( prefetch_buffer, read_options, rep->footer.index_handle(), UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex, get_context, lookup_context, /* for_compaction */ false, use_cache); return s; } Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock( bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block) const { assert(index_block != nullptr); if (!index_block_.IsEmpty()) { index_block->SetUnownedValue(index_block_.GetValue()); return Status::OK(); } ReadOptions read_options; if (no_io) { read_options.read_tier = kBlockCacheTier; } return ReadIndexBlock(table_, /*prefetch_buffer=*/nullptr, read_options, cache_index_blocks(), get_context, lookup_context, index_block); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/index_reader_common.h000066400000000000000000000060421370372246700227400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/block_based_table_reader.h" #include "table/block_based/reader_common.h" namespace ROCKSDB_NAMESPACE { // Encapsulates common functionality for the various index reader // implementations. Provides access to the index block regardless of whether // it is owned by the reader or stored in the cache, or whether it is pinned // in the cache or not. class BlockBasedTable::IndexReaderCommon : public BlockBasedTable::IndexReader { public: IndexReaderCommon(const BlockBasedTable* t, CachableEntry&& index_block) : table_(t), index_block_(std::move(index_block)) { assert(table_ != nullptr); } protected: static Status ReadIndexBlock(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block); const BlockBasedTable* table() const { return table_; } const InternalKeyComparator* internal_comparator() const { assert(table_ != nullptr); assert(table_->get_rep() != nullptr); return &table_->get_rep()->internal_comparator; } bool index_has_first_key() const { assert(table_ != nullptr); assert(table_->get_rep() != nullptr); return table_->get_rep()->index_has_first_key; } bool index_key_includes_seq() const { assert(table_ != nullptr); assert(table_->get_rep() != nullptr); return table_->get_rep()->index_key_includes_seq; } bool index_value_is_full() const { assert(table_ != nullptr); assert(table_->get_rep() != nullptr); return table_->get_rep()->index_value_is_full; } bool cache_index_blocks() const { assert(table_ != nullptr); assert(table_->get_rep() != nullptr); return table_->get_rep()->table_options.cache_index_and_filter_blocks; } Status GetOrReadIndexBlock(bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* index_block) const; size_t ApproximateIndexBlockMemoryUsage() const { assert(!index_block_.GetOwnValue() || index_block_.GetValue() != nullptr); return index_block_.GetOwnValue() ? index_block_.GetValue()->ApproximateMemoryUsage() : 0; } private: const BlockBasedTable* table_; CachableEntry index_block_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/mock_block_based_table.h000066400000000000000000000035431370372246700233520ustar00rootroot00000000000000// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_filter_block.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/filter_policy_internal.h" namespace ROCKSDB_NAMESPACE { namespace mock { class MockBlockBasedTable : public BlockBasedTable { public: explicit MockBlockBasedTable(Rep* rep) : BlockBasedTable(rep, nullptr /* block_cache_tracer */) {} }; class MockBlockBasedTableTester { static constexpr int kMockLevel = 0; public: Options options_; ImmutableCFOptions ioptions_; EnvOptions env_options_; BlockBasedTableOptions table_options_; InternalKeyComparator icomp_; std::unique_ptr table_; MockBlockBasedTableTester(const FilterPolicy *filter_policy) : ioptions_(options_), env_options_(options_), icomp_(options_.comparator) { table_options_.filter_policy.reset(filter_policy); constexpr bool skip_filters = false; constexpr bool immortal_table = false; table_.reset(new MockBlockBasedTable(new BlockBasedTable::Rep( ioptions_, env_options_, table_options_, icomp_, skip_filters, 12345 /*file_size*/, kMockLevel, immortal_table))); } FilterBitsBuilder* GetBuilder() const { FilterBuildingContext context(table_options_); context.column_family_name = "mock_cf"; context.compaction_style = ioptions_.compaction_style; context.level_at_creation = kMockLevel; context.info_log = ioptions_.info_log; return BloomFilterPolicy::GetBuilderFromContext(context); } }; } // namespace mock } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/parsed_full_filter_block.cc000066400000000000000000000015171370372246700241160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "table/block_based/parsed_full_filter_block.h" #include "rocksdb/filter_policy.h" namespace ROCKSDB_NAMESPACE { ParsedFullFilterBlock::ParsedFullFilterBlock(const FilterPolicy* filter_policy, BlockContents&& contents) : block_contents_(std::move(contents)), filter_bits_reader_( !block_contents_.data.empty() ? filter_policy->GetFilterBitsReader(block_contents_.data) : nullptr) {} ParsedFullFilterBlock::~ParsedFullFilterBlock() = default; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/parsed_full_filter_block.h000066400000000000000000000020701370372246700237530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "table/format.h" namespace ROCKSDB_NAMESPACE { class FilterBitsReader; class FilterPolicy; // The sharable/cachable part of the full filter. class ParsedFullFilterBlock { public: ParsedFullFilterBlock(const FilterPolicy* filter_policy, BlockContents&& contents); ~ParsedFullFilterBlock(); FilterBitsReader* filter_bits_reader() const { return filter_bits_reader_.get(); } // TODO: consider memory usage of the FilterBitsReader size_t ApproximateMemoryUsage() const { return block_contents_.ApproximateMemoryUsage(); } bool own_bytes() const { return block_contents_.own_bytes(); } private: BlockContents block_contents_; std::unique_ptr filter_bits_reader_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/partitioned_filter_block.cc000066400000000000000000000454361370372246700241500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/block_based/partitioned_filter_block.h" #include #include "monitoring/perf_context_imp.h" #include "port/malloc.h" #include "port/port.h" #include "rocksdb/filter_policy.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder( const SliceTransform* _prefix_extractor, bool whole_key_filtering, FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval, const bool use_value_delta_encoding, PartitionedIndexBuilder* const p_index_builder, const uint32_t partition_size) : FullFilterBlockBuilder(_prefix_extractor, whole_key_filtering, filter_bits_builder), index_on_filter_block_builder_(index_block_restart_interval, true /*use_delta_encoding*/, use_value_delta_encoding), index_on_filter_block_builder_without_seq_(index_block_restart_interval, true /*use_delta_encoding*/, use_value_delta_encoding), p_index_builder_(p_index_builder), keys_added_to_partition_(0) { keys_per_partition_ = filter_bits_builder_->CalculateNumEntry(partition_size); if (keys_per_partition_ < 1) { // partition_size (minus buffer, ~10%) might be smaller than minimum // filter size, sometimes based on cache line size. Try to find that // minimum size without CalculateSpace (not necessarily available). uint32_t larger = std::max(partition_size + 4, uint32_t{16}); for (;;) { keys_per_partition_ = filter_bits_builder_->CalculateNumEntry(larger); if (keys_per_partition_ >= 1) { break; } larger += larger / 4; if (larger > 100000) { // might be a broken implementation. substitute something reasonable: // 1 key / byte. keys_per_partition_ = partition_size; break; } } } } PartitionedFilterBlockBuilder::~PartitionedFilterBlockBuilder() {} void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock( const Slice* next_key) { // Use == to send the request only once if (keys_added_to_partition_ == keys_per_partition_) { // Currently only index builder is in charge of cutting a partition. We keep // requesting until it is granted. p_index_builder_->RequestPartitionCut(); } if (!p_index_builder_->ShouldCutFilterBlock()) { return; } filter_gc.push_back(std::unique_ptr(nullptr)); // Add the prefix of the next key before finishing the partition. This hack, // fixes a bug with format_verison=3 where seeking for the prefix would lead // us to the previous partition. const bool add_prefix = next_key && prefix_extractor() && prefix_extractor()->InDomain(*next_key); if (add_prefix) { FullFilterBlockBuilder::AddPrefix(*next_key); } Slice filter = filter_bits_builder_->Finish(&filter_gc.back()); std::string& index_key = p_index_builder_->GetPartitionKey(); filters.push_back({index_key, filter}); keys_added_to_partition_ = 0; Reset(); } void PartitionedFilterBlockBuilder::Add(const Slice& key) { MaybeCutAFilterBlock(&key); FullFilterBlockBuilder::Add(key); } void PartitionedFilterBlockBuilder::AddKey(const Slice& key) { FullFilterBlockBuilder::AddKey(key); keys_added_to_partition_++; } Slice PartitionedFilterBlockBuilder::Finish( const BlockHandle& last_partition_block_handle, Status* status) { if (finishing_filters == true) { // Record the handle of the last written filter block in the index FilterEntry& last_entry = filters.front(); std::string handle_encoding; last_partition_block_handle.EncodeTo(&handle_encoding); std::string handle_delta_encoding; PutVarsignedint64( &handle_delta_encoding, last_partition_block_handle.size() - last_encoded_handle_.size()); last_encoded_handle_ = last_partition_block_handle; const Slice handle_delta_encoding_slice(handle_delta_encoding); index_on_filter_block_builder_.Add(last_entry.key, handle_encoding, &handle_delta_encoding_slice); if (!p_index_builder_->seperator_is_key_plus_seq()) { index_on_filter_block_builder_without_seq_.Add( ExtractUserKey(last_entry.key), handle_encoding, &handle_delta_encoding_slice); } filters.pop_front(); } else { MaybeCutAFilterBlock(nullptr); } // If there is no filter partition left, then return the index on filter // partitions if (UNLIKELY(filters.empty())) { *status = Status::OK(); if (finishing_filters) { if (p_index_builder_->seperator_is_key_plus_seq()) { return index_on_filter_block_builder_.Finish(); } else { return index_on_filter_block_builder_without_seq_.Finish(); } } else { // This is the rare case where no key was added to the filter return Slice(); } } else { // Return the next filter partition in line and set Incomplete() status to // indicate we expect more calls to Finish *status = Status::Incomplete(); finishing_filters = true; return filters.front().filter; } } PartitionedFilterBlockReader::PartitionedFilterBlockReader( const BlockBasedTable* t, CachableEntry&& filter_block) : FilterBlockReaderCommon(t, std::move(filter_block)) {} std::unique_ptr PartitionedFilterBlockReader::Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context) { assert(table); assert(table->get_rep()); assert(!pin || prefetch); CachableEntry filter_block; if (prefetch || !use_cache) { const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(), use_cache, nullptr /* get_context */, lookup_context, &filter_block); if (!s.ok()) { IGNORE_STATUS_IF_ERROR(s); return std::unique_ptr(); } if (use_cache && !pin) { filter_block.Reset(); } } return std::unique_ptr( new PartitionedFilterBlockReader(table, std::move(filter_block))); } bool PartitionedFilterBlockReader::KeyMayMatch( const Slice& key, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(const_ikey_ptr != nullptr); assert(block_offset == kNotValid); if (!whole_key_filtering()) { return true; } return MayMatch(key, prefix_extractor, block_offset, no_io, const_ikey_ptr, get_context, lookup_context, &FullFilterBlockReader::KeyMayMatch); } void PartitionedFilterBlockReader::KeysMayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) { assert(block_offset == kNotValid); if (!whole_key_filtering()) { return; // Any/all may match } MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context, &FullFilterBlockReader::KeysMayMatch); } bool PartitionedFilterBlockReader::PrefixMayMatch( const Slice& prefix, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) { assert(const_ikey_ptr != nullptr); assert(block_offset == kNotValid); if (!table_prefix_extractor() && !prefix_extractor) { return true; } return MayMatch(prefix, prefix_extractor, block_offset, no_io, const_ikey_ptr, get_context, lookup_context, &FullFilterBlockReader::PrefixMayMatch); } void PartitionedFilterBlockReader::PrefixesMayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) { assert(block_offset == kNotValid); if (!table_prefix_extractor() && !prefix_extractor) { return; // Any/all may match } MayMatch(range, prefix_extractor, block_offset, no_io, lookup_context, &FullFilterBlockReader::PrefixesMayMatch); } BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle( const CachableEntry& filter_block, const Slice& entry) const { IndexBlockIter iter; const InternalKeyComparator* const comparator = internal_comparator(); Statistics* kNullStats = nullptr; filter_block.GetValue()->NewIndexIterator( comparator, comparator->user_comparator(), table()->get_rep()->get_global_seqno(BlockType::kFilter), &iter, kNullStats, true /* total_order_seek */, false /* have_first_key */, index_key_includes_seq(), index_value_is_full()); iter.Seek(entry); if (UNLIKELY(!iter.Valid())) { // entry is larger than all the keys. However its prefix might still be // present in the last partition. If this is called by PrefixMayMatch this // is necessary for correct behavior. Otherwise it is unnecessary but safe. // Assuming this is an unlikely case for full key search, the performance // overhead should be negligible. iter.SeekToLast(); } assert(iter.Valid()); BlockHandle fltr_blk_handle = iter.value().handle; return fltr_blk_handle; } Status PartitionedFilterBlockReader::GetFilterPartitionBlock( FilePrefetchBuffer* prefetch_buffer, const BlockHandle& fltr_blk_handle, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block) const { assert(table()); assert(filter_block); assert(filter_block->IsEmpty()); if (!filter_map_.empty()) { auto iter = filter_map_.find(fltr_blk_handle.offset()); // This is a possible scenario since block cache might not have had space // for the partition if (iter != filter_map_.end()) { filter_block->SetUnownedValue(iter->second.GetValue()); return Status::OK(); } } ReadOptions read_options; if (no_io) { read_options.read_tier = kBlockCacheTier; } const Status s = table()->RetrieveBlock(prefetch_buffer, read_options, fltr_blk_handle, UncompressionDict::GetEmptyDict(), filter_block, BlockType::kFilter, get_context, lookup_context, /* for_compaction */ false, /* use_cache */ true); return s; } bool PartitionedFilterBlockReader::MayMatch( const Slice& slice, const SliceTransform* prefix_extractor, uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context, FilterFunction filter_function) const { CachableEntry filter_block; Status s = GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block); if (UNLIKELY(!s.ok())) { IGNORE_STATUS_IF_ERROR(s); return true; } if (UNLIKELY(filter_block.GetValue()->size() == 0)) { return true; } auto filter_handle = GetFilterPartitionHandle(filter_block, *const_ikey_ptr); if (UNLIKELY(filter_handle.size() == 0)) { // key is out of range return false; } CachableEntry filter_partition_block; s = GetFilterPartitionBlock(nullptr /* prefetch_buffer */, filter_handle, no_io, get_context, lookup_context, &filter_partition_block); if (UNLIKELY(!s.ok())) { IGNORE_STATUS_IF_ERROR(s); return true; } FullFilterBlockReader filter_partition(table(), std::move(filter_partition_block)); return (filter_partition.*filter_function)( slice, prefix_extractor, block_offset, no_io, const_ikey_ptr, get_context, lookup_context); } void PartitionedFilterBlockReader::MayMatch( MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const { CachableEntry filter_block; Status s = GetOrReadFilterBlock(no_io, range->begin()->get_context, lookup_context, &filter_block); if (UNLIKELY(!s.ok())) { IGNORE_STATUS_IF_ERROR(s); return; // Any/all may match } if (UNLIKELY(filter_block.GetValue()->size() == 0)) { return; // Any/all may match } auto start_iter_same_handle = range->begin(); BlockHandle prev_filter_handle = BlockHandle::NullBlockHandle(); // For all keys mapping to same partition (must be adjacent in sorted order) // share block cache lookup and use full filter multiget on the partition // filter. for (auto iter = start_iter_same_handle; iter != range->end(); ++iter) { // TODO: re-use one top-level index iterator BlockHandle this_filter_handle = GetFilterPartitionHandle(filter_block, iter->ikey); if (!prev_filter_handle.IsNull() && this_filter_handle != prev_filter_handle) { MultiGetRange subrange(*range, start_iter_same_handle, iter); MayMatchPartition(&subrange, prefix_extractor, block_offset, prev_filter_handle, no_io, lookup_context, filter_function); range->AddSkipsFrom(subrange); start_iter_same_handle = iter; } if (UNLIKELY(this_filter_handle.size() == 0)) { // key is out of range // Not reachable with current behavior of GetFilterPartitionHandle assert(false); range->SkipKey(iter); prev_filter_handle = BlockHandle::NullBlockHandle(); } else { prev_filter_handle = this_filter_handle; } } if (!prev_filter_handle.IsNull()) { MultiGetRange subrange(*range, start_iter_same_handle, range->end()); MayMatchPartition(&subrange, prefix_extractor, block_offset, prev_filter_handle, no_io, lookup_context, filter_function); range->AddSkipsFrom(subrange); } } void PartitionedFilterBlockReader::MayMatchPartition( MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, BlockHandle filter_handle, bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const { CachableEntry filter_partition_block; Status s = GetFilterPartitionBlock( nullptr /* prefetch_buffer */, filter_handle, no_io, range->begin()->get_context, lookup_context, &filter_partition_block); if (UNLIKELY(!s.ok())) { IGNORE_STATUS_IF_ERROR(s); return; // Any/all may match } FullFilterBlockReader filter_partition(table(), std::move(filter_partition_block)); (filter_partition.*filter_function)(range, prefix_extractor, block_offset, no_io, lookup_context); } size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const { size_t usage = ApproximateFilterBlockMemoryUsage(); #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size(const_cast(this)); #else usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE return usage; // TODO(myabandeh): better estimation for filter_map_ size } // TODO(myabandeh): merge this with the same function in IndexReader void PartitionedFilterBlockReader::CacheDependencies(bool pin) { assert(table()); const BlockBasedTable::Rep* const rep = table()->get_rep(); assert(rep); BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch}; CachableEntry filter_block; Status s = GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */, &lookup_context, &filter_block); if (!s.ok()) { ROCKS_LOG_WARN(rep->ioptions.info_log, "Error retrieving top-level filter block while trying to " "cache filter partitions: %s", s.ToString().c_str()); IGNORE_STATUS_IF_ERROR(s); return; } // Before read partitions, prefetch them to avoid lots of IOs assert(filter_block.GetValue()); IndexBlockIter biter; const InternalKeyComparator* const comparator = internal_comparator(); Statistics* kNullStats = nullptr; filter_block.GetValue()->NewIndexIterator( comparator, comparator->user_comparator(), rep->get_global_seqno(BlockType::kFilter), &biter, kNullStats, true /* total_order_seek */, false /* have_first_key */, index_key_includes_seq(), index_value_is_full()); // Index partitions are assumed to be consecuitive. Prefetch them all. // Read the first block offset biter.SeekToFirst(); BlockHandle handle = biter.value().handle; uint64_t prefetch_off = handle.offset(); // Read the last block's offset biter.SeekToLast(); handle = biter.value().handle; uint64_t last_off = handle.offset() + handle.size() + kBlockTrailerSize; uint64_t prefetch_len = last_off - prefetch_off; std::unique_ptr prefetch_buffer; prefetch_buffer.reset(new FilePrefetchBuffer()); s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off, static_cast(prefetch_len)); // After prefetch, read the partitions one by one ReadOptions read_options; for (biter.SeekToFirst(); biter.Valid(); biter.Next()) { handle = biter.value().handle; CachableEntry block; // TODO: Support counter batch update for partitioned index and // filter blocks s = table()->MaybeReadBlockAndLoadToCache( prefetch_buffer.get(), read_options, handle, UncompressionDict::GetEmptyDict(), &block, BlockType::kFilter, nullptr /* get_context */, &lookup_context, nullptr /* contents */); assert(s.ok() || block.GetValue() == nullptr); if (s.ok() && block.GetValue() != nullptr) { if (block.IsCached()) { if (pin) { filter_map_[handle.offset()] = std::move(block); } } } IGNORE_STATUS_IF_ERROR(s); } } const InternalKeyComparator* PartitionedFilterBlockReader::internal_comparator() const { assert(table()); assert(table()->get_rep()); return &table()->get_rep()->internal_comparator; } bool PartitionedFilterBlockReader::index_key_includes_seq() const { assert(table()); assert(table()->get_rep()); return table()->get_rep()->index_key_includes_seq; } bool PartitionedFilterBlockReader::index_value_is_full() const { assert(table()); assert(table()->get_rep()); return table()->get_rep()->index_value_is_full; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/partitioned_filter_block.h000066400000000000000000000142311370372246700237770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "db/dbformat.h" #include "index_builder.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/block_based/block.h" #include "table/block_based/filter_block_reader_common.h" #include "table/block_based/full_filter_block.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { public: explicit PartitionedFilterBlockBuilder( const SliceTransform* prefix_extractor, bool whole_key_filtering, FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval, const bool use_value_delta_encoding, PartitionedIndexBuilder* const p_index_builder, const uint32_t partition_size); virtual ~PartitionedFilterBlockBuilder(); void AddKey(const Slice& key) override; void Add(const Slice& key) override; virtual Slice Finish(const BlockHandle& last_partition_block_handle, Status* status) override; private: // Filter data BlockBuilder index_on_filter_block_builder_; // top-level index builder BlockBuilder index_on_filter_block_builder_without_seq_; // same for user keys struct FilterEntry { std::string key; Slice filter; }; std::list filters; // list of partitioned indexes and their keys std::unique_ptr value; std::vector> filter_gc; bool finishing_filters = false; // true if Finish is called once but not complete yet. // The policy of when cut a filter block and Finish it void MaybeCutAFilterBlock(const Slice* next_key); // Currently we keep the same number of partitions for filters and indexes. // This would allow for some potentioal optimizations in future. If such // optimizations did not realize we can use different number of partitions and // eliminate p_index_builder_ PartitionedIndexBuilder* const p_index_builder_; // The desired number of keys per partition uint32_t keys_per_partition_; // The number of keys added to the last partition so far uint32_t keys_added_to_partition_; BlockHandle last_encoded_handle_; }; class PartitionedFilterBlockReader : public FilterBlockReaderCommon { public: PartitionedFilterBlockReader(const BlockBasedTable* t, CachableEntry&& filter_block); static std::unique_ptr Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context); bool IsBlockBased() override { return false; } bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; void KeysMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) override; bool PrefixMayMatch(const Slice& prefix, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; void PrefixesMayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context) override; size_t ApproximateMemoryUsage() const override; private: BlockHandle GetFilterPartitionHandle(const CachableEntry& filter_block, const Slice& entry) const; Status GetFilterPartitionBlock( FilePrefetchBuffer* prefetch_buffer, const BlockHandle& handle, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* filter_block) const; using FilterFunction = bool (FullFilterBlockReader::*)( const Slice& slice, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, const Slice* const const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context); bool MayMatch(const Slice& slice, const SliceTransform* prefix_extractor, uint64_t block_offset, bool no_io, const Slice* const_ikey_ptr, GetContext* get_context, BlockCacheLookupContext* lookup_context, FilterFunction filter_function) const; using FilterManyFunction = void (FullFilterBlockReader::*)( MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, const bool no_io, BlockCacheLookupContext* lookup_context); void MayMatch(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const; void MayMatchPartition(MultiGetRange* range, const SliceTransform* prefix_extractor, uint64_t block_offset, BlockHandle filter_handle, bool no_io, BlockCacheLookupContext* lookup_context, FilterManyFunction filter_function) const; void CacheDependencies(bool pin) override; const InternalKeyComparator* internal_comparator() const; bool index_key_includes_seq() const; bool index_value_is_full() const; protected: std::unordered_map> filter_map_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/partitioned_filter_block_test.cc000066400000000000000000000366271370372246700252110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "rocksdb/filter_policy.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/partitioned_filter_block.h" #include "table/block_based/filter_policy_internal.h" #include "index_builder.h" #include "logging/logging.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { std::map blooms; class MockedBlockBasedTable : public BlockBasedTable { public: MockedBlockBasedTable(Rep* rep, PartitionedIndexBuilder* pib) : BlockBasedTable(rep, /*block_cache_tracer=*/nullptr) { // Initialize what Open normally does as much as necessary for the test rep->index_key_includes_seq = pib->seperator_is_key_plus_seq(); rep->index_value_is_full = !pib->get_use_value_delta_encoding(); } }; class MyPartitionedFilterBlockReader : public PartitionedFilterBlockReader { public: MyPartitionedFilterBlockReader(BlockBasedTable* t, CachableEntry&& filter_block) : PartitionedFilterBlockReader(t, std::move(filter_block)) { for (const auto& pair : blooms) { const uint64_t offset = pair.first; const std::string& bloom = pair.second; assert(t); assert(t->get_rep()); CachableEntry block( new ParsedFullFilterBlock( t->get_rep()->table_options.filter_policy.get(), BlockContents(Slice(bloom))), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); filter_map_[offset] = std::move(block); } } }; class PartitionedFilterBlockTest : public testing::Test, virtual public ::testing::WithParamInterface { public: Options options_; ImmutableCFOptions ioptions_; EnvOptions env_options_; BlockBasedTableOptions table_options_; InternalKeyComparator icomp_; std::unique_ptr table_; std::shared_ptr cache_; int bits_per_key_; PartitionedFilterBlockTest() : ioptions_(options_), env_options_(options_), icomp_(options_.comparator), bits_per_key_(10) { table_options_.filter_policy.reset( NewBloomFilterPolicy(bits_per_key_, false)); table_options_.format_version = GetParam(); table_options_.index_block_restart_interval = 3; } ~PartitionedFilterBlockTest() override {} const std::string keys[4] = {"afoo", "bar", "box", "hello"}; const std::string missing_keys[2] = {"missing", "other"}; uint64_t MaxIndexSize() { int num_keys = sizeof(keys) / sizeof(*keys); uint64_t max_key_size = 0; for (int i = 1; i < num_keys; i++) { max_key_size = std::max(max_key_size, static_cast(keys[i].size())); } uint64_t max_index_size = num_keys * (max_key_size + 8 /*handle*/); return max_index_size; } uint64_t MaxFilterSize() { int num_keys = sizeof(keys) / sizeof(*keys); // General, rough over-approximation return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5); } uint64_t last_offset = 10; BlockHandle Write(const Slice& slice) { BlockHandle bh(last_offset + 1, slice.size()); blooms[bh.offset()] = slice.ToString(); last_offset += bh.size(); return bh; } PartitionedIndexBuilder* NewIndexBuilder() { const bool kValueDeltaEncoded = true; return PartitionedIndexBuilder::CreateIndexBuilder( &icomp_, !kValueDeltaEncoded, table_options_); } PartitionedFilterBlockBuilder* NewBuilder( PartitionedIndexBuilder* const p_index_builder, const SliceTransform* prefix_extractor = nullptr) { assert(table_options_.block_size_deviation <= 100); auto partition_size = static_cast( ((table_options_.metadata_block_size * (100 - table_options_.block_size_deviation)) + 99) / 100); partition_size = std::max(partition_size, static_cast(1)); const bool kValueDeltaEncoded = true; return new PartitionedFilterBlockBuilder( prefix_extractor, table_options_.whole_key_filtering, BloomFilterPolicy::GetBuilderFromContext( FilterBuildingContext(table_options_)), table_options_.index_block_restart_interval, !kValueDeltaEncoded, p_index_builder, partition_size); } PartitionedFilterBlockReader* NewReader( PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) { BlockHandle bh; Status status; Slice slice; do { slice = builder->Finish(bh, &status); bh = Write(slice); } while (status.IsIncomplete()); constexpr bool skip_filters = false; constexpr uint64_t file_size = 12345; constexpr int level = 0; constexpr bool immortal_table = false; table_.reset(new MockedBlockBasedTable( new BlockBasedTable::Rep(ioptions_, env_options_, table_options_, icomp_, skip_filters, file_size, level, immortal_table), pib)); BlockContents contents(slice); CachableEntry block( new Block(std::move(contents), 0 /* read_amp_bytes_per_bit */, nullptr), nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */); auto reader = new MyPartitionedFilterBlockReader(table_.get(), std::move(block)); return reader; } void VerifyReader(PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib, bool empty = false, const SliceTransform* prefix_extractor = nullptr) { std::unique_ptr reader( NewReader(builder, pib)); // Querying added keys const bool no_io = true; for (auto key : keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } { // querying a key twice auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->KeyMayMatch( keys[0], prefix_extractor, kNotValid, !no_io, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } // querying missing keys for (auto key : missing_keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); if (empty) { ASSERT_TRUE(reader->KeyMayMatch( key, prefix_extractor, kNotValid, !no_io, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } else { // assuming a good hash function ASSERT_FALSE(reader->KeyMayMatch( key, prefix_extractor, kNotValid, !no_io, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } } } int TestBlockPerKey() { std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder( NewBuilder(pib.get())); int i = 0; builder->Add(keys[i]); CutABlock(pib.get(), keys[i], keys[i + 1]); i++; builder->Add(keys[i]); CutABlock(pib.get(), keys[i], keys[i + 1]); i++; builder->Add(keys[i]); builder->Add(keys[i]); CutABlock(pib.get(), keys[i], keys[i + 1]); i++; builder->Add(keys[i]); CutABlock(pib.get(), keys[i]); VerifyReader(builder.get(), pib.get()); return CountNumOfIndexPartitions(pib.get()); } void TestBlockPerTwoKeys(const SliceTransform* prefix_extractor = nullptr) { std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder( NewBuilder(pib.get(), prefix_extractor)); int i = 0; builder->Add(keys[i]); i++; builder->Add(keys[i]); CutABlock(pib.get(), keys[i], keys[i + 1]); i++; builder->Add(keys[i]); builder->Add(keys[i]); i++; builder->Add(keys[i]); CutABlock(pib.get(), keys[i]); VerifyReader(builder.get(), pib.get(), prefix_extractor); } void TestBlockPerAllKeys() { std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder( NewBuilder(pib.get())); int i = 0; builder->Add(keys[i]); i++; builder->Add(keys[i]); i++; builder->Add(keys[i]); builder->Add(keys[i]); i++; builder->Add(keys[i]); CutABlock(pib.get(), keys[i]); VerifyReader(builder.get(), pib.get()); } void CutABlock(PartitionedIndexBuilder* builder, const std::string& user_key) { // Assuming a block is cut, add an entry to the index std::string key = std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep()); BlockHandle dont_care_block_handle(1, 1); builder->AddIndexEntry(&key, nullptr, dont_care_block_handle); } void CutABlock(PartitionedIndexBuilder* builder, const std::string& user_key, const std::string& next_user_key) { // Assuming a block is cut, add an entry to the index std::string key = std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep()); std::string next_key = std::string( *InternalKey(next_user_key, 0, ValueType::kTypeValue).rep()); BlockHandle dont_care_block_handle(1, 1); Slice slice = Slice(next_key.data(), next_key.size()); builder->AddIndexEntry(&key, &slice, dont_care_block_handle); } int CountNumOfIndexPartitions(PartitionedIndexBuilder* builder) { IndexBuilder::IndexBlocks dont_care_ib; BlockHandle dont_care_bh(10, 10); Status s; int cnt = 0; do { s = builder->Finish(&dont_care_ib, dont_care_bh); cnt++; } while (s.IsIncomplete()); return cnt - 1; // 1 is 2nd level index } }; INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest, testing::Values(test::kLatestFormatVersion)); TEST_P(PartitionedFilterBlockTest, EmptyBuilder) { std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder(NewBuilder(pib.get())); const bool empty = true; VerifyReader(builder.get(), pib.get(), empty); } TEST_P(PartitionedFilterBlockTest, OneBlock) { uint64_t max_index_size = MaxIndexSize(); for (uint64_t i = 1; i < max_index_size + 1; i++) { table_options_.metadata_block_size = i; TestBlockPerAllKeys(); } } TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) { uint64_t max_index_size = MaxIndexSize(); for (uint64_t i = 1; i < max_index_size + 1; i++) { table_options_.metadata_block_size = i; TestBlockPerTwoKeys(); } } // This reproduces the bug that a prefix is the same among multiple consecutive // blocks but the bug would add it only to the first block. TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { // some small number to cause partition cuts table_options_.metadata_block_size = 1; std::unique_ptr prefix_extractor( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(1)); std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder( NewBuilder(pib.get(), prefix_extractor.get())); const std::string pkeys[3] = {"p-key10", "p-key20", "p-key30"}; builder->Add(pkeys[0]); CutABlock(pib.get(), pkeys[0], pkeys[1]); builder->Add(pkeys[1]); CutABlock(pib.get(), pkeys[1], pkeys[2]); builder->Add(pkeys[2]); CutABlock(pib.get(), pkeys[2]); std::unique_ptr reader( NewReader(builder.get(), pib.get())); for (auto key : pkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->PrefixMayMatch( prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } // Non-existent keys but with the same prefix const std::string pnonkeys[4] = {"p-key9", "p-key11", "p-key21", "p-key31"}; for (auto key : pnonkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->PrefixMayMatch( prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } } // This reproduces the bug in format_version=3 that the seeking the prefix will // lead us to the partition before the one that has filter for the prefix. TEST_P(PartitionedFilterBlockTest, PrefixInWrongPartitionBug) { // some small number to cause partition cuts table_options_.metadata_block_size = 1; std::unique_ptr prefix_extractor( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(2)); std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder( NewBuilder(pib.get(), prefix_extractor.get())); // In the bug, searching for prefix "p3" on an index with format version 3, // will give the key "p3" and the partition of the keys that are <= p3, i.e., // p2-keys, where the filter for prefix "p3" does not exist. const std::string pkeys[] = {"p1-key1", "p2-key2", "p3-key3", "p4-key3", "p5-key3"}; builder->Add(pkeys[0]); CutABlock(pib.get(), pkeys[0], pkeys[1]); builder->Add(pkeys[1]); CutABlock(pib.get(), pkeys[1], pkeys[2]); builder->Add(pkeys[2]); CutABlock(pib.get(), pkeys[2], pkeys[3]); builder->Add(pkeys[3]); CutABlock(pib.get(), pkeys[3], pkeys[4]); builder->Add(pkeys[4]); CutABlock(pib.get(), pkeys[4]); std::unique_ptr reader( NewReader(builder.get(), pib.get())); for (auto key : pkeys) { auto prefix = prefix_extractor->Transform(key); auto ikey = InternalKey(prefix, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->PrefixMayMatch( prefix, prefix_extractor.get(), kNotValid, /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } } TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) { uint64_t max_index_size = MaxIndexSize(); for (uint64_t i = 1; i < max_index_size + 1; i++) { table_options_.metadata_block_size = i; TestBlockPerKey(); } } TEST_P(PartitionedFilterBlockTest, PartitionCount) { int num_keys = sizeof(keys) / sizeof(*keys); table_options_.metadata_block_size = std::max(MaxIndexSize(), MaxFilterSize()); int partitions = TestBlockPerKey(); ASSERT_EQ(partitions, 1); // A low number ensures cutting a block after each key table_options_.metadata_block_size = 1; partitions = TestBlockPerKey(); ASSERT_EQ(partitions, num_keys - 1 /* last two keys make one flush */); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/block_based/partitioned_index_iterator.cc000066400000000000000000000122171370372246700245200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/partitioned_index_iterator.h" namespace ROCKSDB_NAMESPACE { void ParititionedIndexIterator::Seek(const Slice& target) { SeekImpl(&target); } void ParititionedIndexIterator::SeekToFirst() { SeekImpl(nullptr); } void ParititionedIndexIterator::SeekImpl(const Slice* target) { SavePrevIndexValue(); if (target) { index_iter_->Seek(*target); } else { index_iter_->SeekToFirst(); } if (!index_iter_->Valid()) { ResetPartitionedIndexIter(); return; } InitPartitionedIndexBlock(); if (target) { block_iter_.Seek(*target); } else { block_iter_.SeekToFirst(); } FindKeyForward(); // We could check upper bound here, but that would be too complicated // and checking index upper bound is less useful than for data blocks. if (target) { assert(!Valid() || (table_->get_rep()->index_key_includes_seq ? (icomp_.Compare(*target, key()) <= 0) : (user_comparator_.Compare(ExtractUserKey(*target), key()) <= 0))); } } void ParititionedIndexIterator::SeekToLast() { SavePrevIndexValue(); index_iter_->SeekToLast(); if (!index_iter_->Valid()) { ResetPartitionedIndexIter(); return; } InitPartitionedIndexBlock(); block_iter_.SeekToLast(); FindKeyBackward(); } void ParititionedIndexIterator::Next() { assert(block_iter_points_to_real_block_); block_iter_.Next(); FindKeyForward(); } void ParititionedIndexIterator::Prev() { assert(block_iter_points_to_real_block_); block_iter_.Prev(); FindKeyBackward(); } void ParititionedIndexIterator::InitPartitionedIndexBlock() { BlockHandle partitioned_index_handle = index_iter_->value().handle; if (!block_iter_points_to_real_block_ || partitioned_index_handle.offset() != prev_block_offset_ || // if previous attempt of reading the block missed cache, try again block_iter_.status().IsIncomplete()) { if (block_iter_points_to_real_block_) { ResetPartitionedIndexIter(); } auto* rep = table_->get_rep(); bool is_for_compaction = lookup_context_.caller == TableReaderCaller::kCompaction; // Prefetch additional data for range scans (iterators). // Implicit auto readahead: // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0. // Explicit user requested readahead: // Enabled from the very first IO when ReadOptions.readahead_size is set. block_prefetcher_.PrefetchIfNeeded(rep, partitioned_index_handle, read_options_.readahead_size, is_for_compaction); Status s; table_->NewDataBlockIterator( read_options_, partitioned_index_handle, &block_iter_, BlockType::kIndex, /*get_context=*/nullptr, &lookup_context_, s, block_prefetcher_.prefetch_buffer(), /*for_compaction=*/is_for_compaction); block_iter_points_to_real_block_ = true; // We could check upper bound here but it is complicated to reason about // upper bound in index iterator. On the other than, in large scans, index // iterators are moved much less frequently compared to data blocks. So // the upper bound check is skipped for simplicity. } } void ParititionedIndexIterator::FindKeyForward() { // This method's code is kept short to make it likely to be inlined. assert(block_iter_points_to_real_block_); if (!block_iter_.Valid()) { // This is the only call site of FindBlockForward(), but it's extracted into // a separate method to keep FindKeyForward() short and likely to be // inlined. When transitioning to a different block, we call // FindBlockForward(), which is much longer and is probably not inlined. FindBlockForward(); } else { // This is the fast path that avoids a function call. } } void ParititionedIndexIterator::FindBlockForward() { // TODO the while loop inherits from two-level-iterator. We don't know // whether a block can be empty so it can be replaced by an "if". do { if (!block_iter_.status().ok()) { return; } ResetPartitionedIndexIter(); index_iter_->Next(); if (!index_iter_->Valid()) { return; } InitPartitionedIndexBlock(); block_iter_.SeekToFirst(); } while (!block_iter_.Valid()); } void ParititionedIndexIterator::FindKeyBackward() { while (!block_iter_.Valid()) { if (!block_iter_.status().ok()) { return; } ResetPartitionedIndexIter(); index_iter_->Prev(); if (index_iter_->Valid()) { InitPartitionedIndexBlock(); block_iter_.SeekToLast(); } else { return; } } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/partitioned_index_iterator.h000066400000000000000000000113421370372246700243600ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader_impl.h" #include "table/block_based/block_prefetcher.h" #include "table/block_based/reader_common.h" namespace ROCKSDB_NAMESPACE { // Iterator that iterates over partitioned index. // Some upper and lower bound tricks played in block based table iterators // could be played here, but it's too complicated to reason about index // keys with upper or lower bound, so we skip it for simplicity. class ParititionedIndexIterator : public InternalIteratorBase { // compaction_readahead_size: its value will only be used if for_compaction = // true public: ParititionedIndexIterator( const BlockBasedTable* table, const ReadOptions& read_options, const InternalKeyComparator& icomp, std::unique_ptr>&& index_iter, TableReaderCaller caller, size_t compaction_readahead_size = 0) : table_(table), read_options_(read_options), #ifndef NDEBUG icomp_(icomp), #endif user_comparator_(icomp.user_comparator()), index_iter_(std::move(index_iter)), block_iter_points_to_real_block_(false), lookup_context_(caller), block_prefetcher_(compaction_readahead_size) {} ~ParititionedIndexIterator() {} void Seek(const Slice& target) override; void SeekForPrev(const Slice&) override { // Shouldn't be called. assert(false); } void SeekToFirst() override; void SeekToLast() override; void Next() final override; bool NextAndGetResult(IterateResult*) override { assert(false); return false; } void Prev() override; bool Valid() const override { return block_iter_points_to_real_block_ && block_iter_.Valid(); } Slice key() const override { assert(Valid()); return block_iter_.key(); } Slice user_key() const override { assert(Valid()); return block_iter_.user_key(); } IndexValue value() const override { assert(Valid()); return block_iter_.value(); } Status status() const override { // Prefix index set status to NotFound when the prefix does not exist if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) { return index_iter_->status(); } else if (block_iter_points_to_real_block_) { return block_iter_.status(); } else { return Status::OK(); } } // Whether iterator invalidated for being out of bound. bool IsOutOfBound() override { // Shoulldn't be called assert(false); return false; } inline bool MayBeOutOfUpperBound() override { // Shouldn't be called. assert(false); return true; } void SetPinnedItersMgr(PinnedIteratorsManager*) override { // Shouldn't be called. assert(false); } bool IsKeyPinned() const override { // Shouldn't be called. assert(false); return false; } bool IsValuePinned() const override { // Shouldn't be called. assert(false); return false; } void ResetPartitionedIndexIter() { if (block_iter_points_to_real_block_) { block_iter_.Invalidate(Status::OK()); block_iter_points_to_real_block_ = false; } } void SavePrevIndexValue() { if (block_iter_points_to_real_block_) { // Reseek. If they end up with the same data block, we shouldn't re-fetch // the same data block. prev_block_offset_ = index_iter_->value().handle.offset(); } } private: friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test; const BlockBasedTable* table_; const ReadOptions read_options_; #ifndef NDEBUG const InternalKeyComparator& icomp_; #endif UserComparatorWrapper user_comparator_; std::unique_ptr> index_iter_; IndexBlockIter block_iter_; // True if block_iter_ is initialized and points to the same block // as index iterator. bool block_iter_points_to_real_block_; uint64_t prev_block_offset_ = std::numeric_limits::max(); BlockCacheLookupContext lookup_context_; BlockPrefetcher block_prefetcher_; // If `target` is null, seek to first. void SeekImpl(const Slice* target); void InitPartitionedIndexBlock(); void FindKeyForward(); void FindBlockForward(); void FindKeyBackward(); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/partitioned_index_reader.cc000066400000000000000000000150611370372246700241310ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/partitioned_index_reader.h" #include "table/block_based/partitioned_index_iterator.h" namespace ROCKSDB_NAMESPACE { Status PartitionIndexReader::Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader) { assert(table != nullptr); assert(table->get_rep()); assert(!pin || prefetch); assert(index_reader != nullptr); CachableEntry index_block; if (prefetch || !use_cache) { const Status s = ReadIndexBlock(table, prefetch_buffer, ReadOptions(), use_cache, /*get_context=*/nullptr, lookup_context, &index_block); if (!s.ok()) { return s; } if (use_cache && !pin) { index_block.Reset(); } } index_reader->reset(new PartitionIndexReader(table, std::move(index_block))); return Status::OK(); } InternalIteratorBase* PartitionIndexReader::NewIterator( const ReadOptions& read_options, bool /* disable_prefix_seek */, IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) { const bool no_io = (read_options.read_tier == kBlockCacheTier); CachableEntry index_block; const Status s = GetOrReadIndexBlock(no_io, get_context, lookup_context, &index_block); if (!s.ok()) { if (iter != nullptr) { iter->Invalidate(s); return iter; } return NewErrorInternalIterator(s); } const BlockBasedTable::Rep* rep = table()->rep_; InternalIteratorBase* it = nullptr; Statistics* kNullStats = nullptr; // Filters are already checked before seeking the index if (!partition_map_.empty()) { // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. it = NewTwoLevelIterator( new BlockBasedTable::PartitionedIndexIteratorState(table(), &partition_map_), index_block.GetValue()->NewIndexIterator( internal_comparator(), internal_comparator()->user_comparator(), rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true, index_has_first_key(), index_key_includes_seq(), index_value_is_full())); } else { ReadOptions ro; ro.fill_cache = read_options.fill_cache; // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. std::unique_ptr> index_iter( index_block.GetValue()->NewIndexIterator( internal_comparator(), internal_comparator()->user_comparator(), rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true, index_has_first_key(), index_key_includes_seq(), index_value_is_full())); it = new ParititionedIndexIterator( table(), ro, *internal_comparator(), std::move(index_iter), lookup_context ? lookup_context->caller : TableReaderCaller::kUncategorized); } assert(it != nullptr); index_block.TransferTo(it); return it; // TODO(myabandeh): Update TwoLevelIterator to be able to make use of // on-stack BlockIter while the state is on heap. Currentlly it assumes // the first level iter is always on heap and will attempt to delete it // in its destructor. } void PartitionIndexReader::CacheDependencies(bool pin) { // Before read partitions, prefetch them to avoid lots of IOs BlockCacheLookupContext lookup_context{TableReaderCaller::kPrefetch}; const BlockBasedTable::Rep* rep = table()->rep_; IndexBlockIter biter; BlockHandle handle; Statistics* kNullStats = nullptr; CachableEntry index_block; Status s = GetOrReadIndexBlock(false /* no_io */, nullptr /* get_context */, &lookup_context, &index_block); if (!s.ok()) { ROCKS_LOG_WARN(rep->ioptions.info_log, "Error retrieving top-level index block while trying to " "cache index partitions: %s", s.ToString().c_str()); IGNORE_STATUS_IF_ERROR(s); return; } // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. index_block.GetValue()->NewIndexIterator( internal_comparator(), internal_comparator()->user_comparator(), rep->get_global_seqno(BlockType::kIndex), &biter, kNullStats, true, index_has_first_key(), index_key_includes_seq(), index_value_is_full()); // Index partitions are assumed to be consecuitive. Prefetch them all. // Read the first block offset biter.SeekToFirst(); if (!biter.Valid()) { // Empty index. return; } handle = biter.value().handle; uint64_t prefetch_off = handle.offset(); // Read the last block's offset biter.SeekToLast(); if (!biter.Valid()) { // Empty index. return; } handle = biter.value().handle; uint64_t last_off = handle.offset() + block_size(handle); uint64_t prefetch_len = last_off - prefetch_off; std::unique_ptr prefetch_buffer; rep->CreateFilePrefetchBuffer(0, 0, &prefetch_buffer); s = prefetch_buffer->Prefetch(rep->file.get(), prefetch_off, static_cast(prefetch_len)); // After prefetch, read the partitions one by one biter.SeekToFirst(); auto ro = ReadOptions(); for (; biter.Valid(); biter.Next()) { handle = biter.value().handle; CachableEntry block; // TODO: Support counter batch update for partitioned index and // filter blocks s = table()->MaybeReadBlockAndLoadToCache( prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(), &block, BlockType::kIndex, /*get_context=*/nullptr, &lookup_context, /*contents=*/nullptr); IGNORE_STATUS_IF_ERROR(s); assert(s.ok() || block.GetValue() == nullptr); if (s.ok() && block.GetValue() != nullptr) { if (block.IsCached()) { if (pin) { partition_map_[handle.offset()] = std::move(block); } } } } } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/partitioned_index_reader.h000066400000000000000000000043061370372246700237730ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/block_based/index_reader_common.h" namespace ROCKSDB_NAMESPACE { // Index that allows binary search lookup in a two-level index structure. class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { public: // Read the partition index from the file and create an instance for // `PartitionIndexReader`. // On success, index_reader will be populated; otherwise it will remain // unmodified. static Status Create(const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader); // return a two-level iterator: first level is on the partition index InternalIteratorBase* NewIterator( const ReadOptions& read_options, bool /* disable_prefix_seek */, IndexBlockIter* iter, GetContext* get_context, BlockCacheLookupContext* lookup_context) override; void CacheDependencies(bool pin) override; size_t ApproximateMemoryUsage() const override { size_t usage = ApproximateIndexBlockMemoryUsage(); #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size(const_cast(this)); #else usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE // TODO(myabandeh): more accurate estimate of partition_map_ mem usage return usage; } private: PartitionIndexReader(const BlockBasedTable* t, CachableEntry&& index_block) : IndexReaderCommon(t, std::move(index_block)) {} std::unordered_map> partition_map_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/reader_common.cc000066400000000000000000000031331370372246700217050ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/reader_common.h" #include "util/crc32c.h" #include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { void ForceReleaseCachedEntry(void* arg, void* h) { Cache* cache = reinterpret_cast(arg); Cache::Handle* handle = reinterpret_cast(h); cache->Release(handle, true /* force_erase */); } Status VerifyChecksum(const ChecksumType type, const char* buf, size_t len, uint32_t expected) { Status s; uint32_t actual = 0; switch (type) { case kNoChecksum: break; case kCRC32c: expected = crc32c::Unmask(expected); actual = crc32c::Value(buf, len); break; case kxxHash: actual = XXH32(buf, static_cast(len), 0); break; case kxxHash64: actual = static_cast(XXH64(buf, static_cast(len), 0) & uint64_t{0xffffffff}); break; default: s = Status::Corruption("unknown checksum type"); } if (s.ok() && actual != expected) { s = Status::Corruption("properties block checksum mismatched"); } return s; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/reader_common.h000066400000000000000000000024521370372246700215520ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { // Release the cached entry and decrement its ref count. extern void ForceReleaseCachedEntry(void* arg, void* h); inline MemoryAllocator* GetMemoryAllocator( const BlockBasedTableOptions& table_options) { return table_options.block_cache.get() ? table_options.block_cache->memory_allocator() : nullptr; } inline MemoryAllocator* GetMemoryAllocatorForCompressedBlock( const BlockBasedTableOptions& table_options) { return table_options.block_cache_compressed.get() ? table_options.block_cache_compressed->memory_allocator() : nullptr; } extern Status VerifyChecksum(const ChecksumType type, const char* buf, size_t len, uint32_t expected); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/uncompression_dict_reader.cc000066400000000000000000000076211370372246700243320ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #include "table/block_based/uncompression_dict_reader.h" #include "monitoring/perf_context_imp.h" #include "table/block_based/block_based_table_reader.h" #include "util/compression.h" namespace ROCKSDB_NAMESPACE { Status UncompressionDictReader::Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* uncompression_dict_reader) { assert(table); assert(table->get_rep()); assert(!pin || prefetch); assert(uncompression_dict_reader); CachableEntry uncompression_dict; if (prefetch || !use_cache) { const Status s = ReadUncompressionDictionary( table, prefetch_buffer, ReadOptions(), use_cache, nullptr /* get_context */, lookup_context, &uncompression_dict); if (!s.ok()) { return s; } if (use_cache && !pin) { uncompression_dict.Reset(); } } uncompression_dict_reader->reset( new UncompressionDictReader(table, std::move(uncompression_dict))); return Status::OK(); } Status UncompressionDictReader::ReadUncompressionDictionary( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict) { // TODO: add perf counter for compression dictionary read time assert(table); assert(uncompression_dict); assert(uncompression_dict->IsEmpty()); const BlockBasedTable::Rep* const rep = table->get_rep(); assert(rep); assert(!rep->compression_dict_handle.IsNull()); const Status s = table->RetrieveBlock( prefetch_buffer, read_options, rep->compression_dict_handle, UncompressionDict::GetEmptyDict(), uncompression_dict, BlockType::kCompressionDictionary, get_context, lookup_context, /* for_compaction */ false, use_cache); if (!s.ok()) { ROCKS_LOG_WARN( rep->ioptions.info_log, "Encountered error while reading data from compression dictionary " "block %s", s.ToString().c_str()); } return s; } Status UncompressionDictReader::GetOrReadUncompressionDictionary( FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict) const { assert(uncompression_dict); if (!uncompression_dict_.IsEmpty()) { uncompression_dict->SetUnownedValue(uncompression_dict_.GetValue()); return Status::OK(); } ReadOptions read_options; if (no_io) { read_options.read_tier = kBlockCacheTier; } return ReadUncompressionDictionary(table_, prefetch_buffer, read_options, cache_dictionary_blocks(), get_context, lookup_context, uncompression_dict); } size_t UncompressionDictReader::ApproximateMemoryUsage() const { assert(!uncompression_dict_.GetOwnValue() || uncompression_dict_.GetValue() != nullptr); size_t usage = uncompression_dict_.GetOwnValue() ? uncompression_dict_.GetValue()->ApproximateMemoryUsage() : 0; #ifdef ROCKSDB_MALLOC_USABLE_SIZE usage += malloc_usable_size(const_cast(this)); #else usage += sizeof(*this); #endif // ROCKSDB_MALLOC_USABLE_SIZE return usage; } bool UncompressionDictReader::cache_dictionary_blocks() const { assert(table_); assert(table_->get_rep()); return table_->get_rep()->table_options.cache_index_and_filter_blocks; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_based/uncompression_dict_reader.h000066400000000000000000000037531370372246700241760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include "table/block_based/cachable_entry.h" #include "table/format.h" namespace ROCKSDB_NAMESPACE { class BlockBasedTable; struct BlockCacheLookupContext; class FilePrefetchBuffer; class GetContext; struct ReadOptions; struct UncompressionDict; // Provides access to the uncompression dictionary regardless of whether // it is owned by the reader or stored in the cache, or whether it is pinned // in the cache or not. class UncompressionDictReader { public: static Status Create( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* uncompression_dict_reader); Status GetOrReadUncompressionDictionary( FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict) const; size_t ApproximateMemoryUsage() const; private: UncompressionDictReader(const BlockBasedTable* t, CachableEntry&& uncompression_dict) : table_(t), uncompression_dict_(std::move(uncompression_dict)) { assert(table_); } bool cache_dictionary_blocks() const; static Status ReadUncompressionDictionary( const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& read_options, bool use_cache, GetContext* get_context, BlockCacheLookupContext* lookup_context, CachableEntry* uncompression_dict); const BlockBasedTable* table_; CachableEntry uncompression_dict_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_fetcher.cc000066400000000000000000000271261370372246700174450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_fetcher.h" #include #include #include "file/file_util.h" #include "logging/logging.h" #include "memory/memory_allocator.h" #include "monitoring/perf_context_imp.h" #include "rocksdb/env.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" #include "table/format.h" #include "table/persistent_cache_helper.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/xxhash.h" namespace ROCKSDB_NAMESPACE { inline void BlockFetcher::CheckBlockChecksum() { // Check the crc of the type and the block contents if (read_options_.verify_checksums) { const char* data = slice_.data(); // Pointer to where Read put the data PERF_TIMER_GUARD(block_checksum_time); uint32_t value = DecodeFixed32(data + block_size_ + 1); uint32_t actual = 0; switch (footer_.checksum()) { case kNoChecksum: break; case kCRC32c: value = crc32c::Unmask(value); actual = crc32c::Value(data, block_size_ + 1); break; case kxxHash: actual = XXH32(data, static_cast(block_size_) + 1, 0); break; case kxxHash64: actual = static_cast( XXH64(data, static_cast(block_size_) + 1, 0) & uint64_t{0xffffffff}); break; default: status_ = Status::Corruption( "unknown checksum type " + ToString(footer_.checksum()) + " in " + file_->file_name() + " offset " + ToString(handle_.offset()) + " size " + ToString(block_size_)); } if (status_.ok() && actual != value) { status_ = Status::Corruption( "block checksum mismatch: expected " + ToString(actual) + ", got " + ToString(value) + " in " + file_->file_name() + " offset " + ToString(handle_.offset()) + " size " + ToString(block_size_)); } } } inline bool BlockFetcher::TryGetUncompressBlockFromPersistentCache() { if (cache_options_.persistent_cache && !cache_options_.persistent_cache->IsCompressed()) { Status status = PersistentCacheHelper::LookupUncompressedPage( cache_options_, handle_, contents_); if (status.ok()) { // uncompressed page is found for the block handle return true; } else { // uncompressed page is not found if (ioptions_.info_log && !status.IsNotFound()) { assert(!status.ok()); ROCKS_LOG_INFO(ioptions_.info_log, "Error reading from persistent cache. %s", status.ToString().c_str()); } } } return false; } inline bool BlockFetcher::TryGetFromPrefetchBuffer() { if (prefetch_buffer_ != nullptr && prefetch_buffer_->TryReadFromCache( handle_.offset(), block_size_with_trailer_, &slice_, for_compaction_)) { CheckBlockChecksum(); if (!status_.ok()) { return true; } got_from_prefetch_buffer_ = true; used_buf_ = const_cast(slice_.data()); } return got_from_prefetch_buffer_; } inline bool BlockFetcher::TryGetCompressedBlockFromPersistentCache() { if (cache_options_.persistent_cache && cache_options_.persistent_cache->IsCompressed()) { // lookup uncompressed cache mode p-cache std::unique_ptr raw_data; status_ = PersistentCacheHelper::LookupRawPage( cache_options_, handle_, &raw_data, block_size_with_trailer_); if (status_.ok()) { heap_buf_ = CacheAllocationPtr(raw_data.release()); used_buf_ = heap_buf_.get(); slice_ = Slice(heap_buf_.get(), block_size_); return true; } else if (!status_.IsNotFound() && ioptions_.info_log) { assert(!status_.ok()); ROCKS_LOG_INFO(ioptions_.info_log, "Error reading from persistent cache. %s", status_.ToString().c_str()); } } return false; } inline void BlockFetcher::PrepareBufferForBlockFromFile() { // cache miss read from device if (do_uncompress_ && block_size_with_trailer_ < kDefaultStackBufferSize) { // If we've got a small enough hunk of data, read it in to the // trivially allocated stack buffer instead of needing a full malloc() used_buf_ = &stack_buf_[0]; } else if (maybe_compressed_ && !do_uncompress_) { compressed_buf_ = AllocateBlock(block_size_with_trailer_, memory_allocator_compressed_); used_buf_ = compressed_buf_.get(); } else { heap_buf_ = AllocateBlock(block_size_with_trailer_, memory_allocator_); used_buf_ = heap_buf_.get(); } } inline void BlockFetcher::InsertCompressedBlockToPersistentCacheIfNeeded() { if (status_.ok() && read_options_.fill_cache && cache_options_.persistent_cache && cache_options_.persistent_cache->IsCompressed()) { // insert to raw cache PersistentCacheHelper::InsertRawPage(cache_options_, handle_, used_buf_, block_size_with_trailer_); } } inline void BlockFetcher::InsertUncompressedBlockToPersistentCacheIfNeeded() { if (status_.ok() && !got_from_prefetch_buffer_ && read_options_.fill_cache && cache_options_.persistent_cache && !cache_options_.persistent_cache->IsCompressed()) { // insert to uncompressed cache PersistentCacheHelper::InsertUncompressedPage(cache_options_, handle_, *contents_); } } inline void BlockFetcher::CopyBufferToHeapBuf() { assert(used_buf_ != heap_buf_.get()); heap_buf_ = AllocateBlock(block_size_with_trailer_, memory_allocator_); memcpy(heap_buf_.get(), used_buf_, block_size_with_trailer_); #ifndef NDEBUG num_heap_buf_memcpy_++; #endif } inline void BlockFetcher::CopyBufferToCompressedBuf() { assert(used_buf_ != compressed_buf_.get()); compressed_buf_ = AllocateBlock(block_size_with_trailer_, memory_allocator_compressed_); memcpy(compressed_buf_.get(), used_buf_, block_size_with_trailer_); #ifndef NDEBUG num_compressed_buf_memcpy_++; #endif } // Entering this method means the block is not compressed or do not need to be // uncompressed. The block can be in one of the following buffers: // 1. prefetch buffer if prefetch is enabled and the block is prefetched before // 2. stack_buf_ if block size is smaller than the stack_buf_ size and block // is not compressed // 3. heap_buf_ if the block is not compressed // 4. compressed_buf_ if the block is compressed // 5. direct_io_buf_ if direct IO is enabled // After this method, if the block is compressed, it should be in // compressed_buf_, otherwise should be in heap_buf_. inline void BlockFetcher::GetBlockContents() { if (slice_.data() != used_buf_) { // the slice content is not the buffer provided *contents_ = BlockContents(Slice(slice_.data(), block_size_)); } else { // page can be either uncompressed or compressed, the buffer either stack // or heap provided. Refer to https://github.com/facebook/rocksdb/pull/4096 if (got_from_prefetch_buffer_ || used_buf_ == &stack_buf_[0]) { CopyBufferToHeapBuf(); } else if (used_buf_ == compressed_buf_.get()) { if (compression_type_ == kNoCompression && memory_allocator_ != memory_allocator_compressed_) { CopyBufferToHeapBuf(); } else { heap_buf_ = std::move(compressed_buf_); } } else if (direct_io_buf_.get() != nullptr) { if (compression_type_ == kNoCompression) { CopyBufferToHeapBuf(); } else { CopyBufferToCompressedBuf(); heap_buf_ = std::move(compressed_buf_); } } *contents_ = BlockContents(std::move(heap_buf_), block_size_); } #ifndef NDEBUG contents_->is_raw_block = true; #endif } Status BlockFetcher::ReadBlockContents() { if (TryGetUncompressBlockFromPersistentCache()) { compression_type_ = kNoCompression; #ifndef NDEBUG contents_->is_raw_block = true; #endif // NDEBUG return Status::OK(); } if (TryGetFromPrefetchBuffer()) { if (!status_.ok()) { return status_; } } else if (!TryGetCompressedBlockFromPersistentCache()) { IOOptions opts; status_ = PrepareIOFromReadOptions(read_options_, file_->env(), opts); // Actual file read if (status_.ok()) { if (file_->use_direct_io()) { PERF_TIMER_GUARD(block_read_time); status_ = file_->Read(opts, handle_.offset(), block_size_with_trailer_, &slice_, nullptr, &direct_io_buf_, for_compaction_); PERF_COUNTER_ADD(block_read_count, 1); used_buf_ = const_cast(slice_.data()); } else { PrepareBufferForBlockFromFile(); PERF_TIMER_GUARD(block_read_time); status_ = file_->Read(opts, handle_.offset(), block_size_with_trailer_, &slice_, used_buf_, nullptr, for_compaction_); PERF_COUNTER_ADD(block_read_count, 1); #ifndef NDEBUG if (used_buf_ == &stack_buf_[0]) { num_stack_buf_memcpy_++; } else if (used_buf_ == heap_buf_.get()) { num_heap_buf_memcpy_++; } else if (used_buf_ == compressed_buf_.get()) { num_compressed_buf_memcpy_++; } #endif } } // TODO: introduce dedicated perf counter for range tombstones switch (block_type_) { case BlockType::kFilter: PERF_COUNTER_ADD(filter_block_read_count, 1); break; case BlockType::kCompressionDictionary: PERF_COUNTER_ADD(compression_dict_block_read_count, 1); break; case BlockType::kIndex: PERF_COUNTER_ADD(index_block_read_count, 1); break; // Nothing to do here as we don't have counters for the other types. default: break; } PERF_COUNTER_ADD(block_read_byte, block_size_with_trailer_); if (!status_.ok()) { return status_; } if (slice_.size() != block_size_with_trailer_) { return Status::Corruption("truncated block read from " + file_->file_name() + " offset " + ToString(handle_.offset()) + ", expected " + ToString(block_size_with_trailer_) + " bytes, got " + ToString(slice_.size())); } CheckBlockChecksum(); if (status_.ok()) { InsertCompressedBlockToPersistentCacheIfNeeded(); } else { return status_; } } compression_type_ = get_block_compression_type(slice_.data(), block_size_); if (do_uncompress_ && compression_type_ != kNoCompression) { PERF_TIMER_GUARD(block_decompress_time); // compressed page, uncompress, update cache UncompressionContext context(compression_type_); UncompressionInfo info(context, uncompression_dict_, compression_type_); status_ = UncompressBlockContents(info, slice_.data(), block_size_, contents_, footer_.version(), ioptions_, memory_allocator_); #ifndef NDEBUG num_heap_buf_memcpy_++; #endif compression_type_ = kNoCompression; } else { GetBlockContents(); } InsertUncompressedBlockToPersistentCacheIfNeeded(); return status_; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_fetcher.h000066400000000000000000000120731370372246700173020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "memory/memory_allocator.h" #include "table/block_based/block.h" #include "table/block_based/block_type.h" #include "table/format.h" namespace ROCKSDB_NAMESPACE { // Retrieves a single block of a given file. Utilizes the prefetch buffer and/or // persistent cache provided (if any) to try to avoid reading from the file // directly. Note that both the prefetch buffer and the persistent cache are // optional; also, note that the persistent cache may be configured to store either // compressed or uncompressed blocks. // // If the retrieved block is compressed and the do_uncompress flag is set, // BlockFetcher uncompresses the block (using the uncompression dictionary, // if provided, to prime the compression algorithm), and returns the resulting // uncompressed block data. Otherwise, it returns the original block. // // Two read options affect the behavior of BlockFetcher: if verify_checksums is // true, the checksum of the (original) block is checked; if fill_cache is true, // the block is added to the persistent cache if needed. // // Memory for uncompressed and compressed blocks is allocated as needed // using memory_allocator and memory_allocator_compressed, respectively // (if provided; otherwise, the default allocator is used). class BlockFetcher { public: BlockFetcher(RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ReadOptions& read_options, const BlockHandle& handle, BlockContents* contents, const ImmutableCFOptions& ioptions, bool do_uncompress, bool maybe_compressed, BlockType block_type, const UncompressionDict& uncompression_dict, const PersistentCacheOptions& cache_options, MemoryAllocator* memory_allocator = nullptr, MemoryAllocator* memory_allocator_compressed = nullptr, bool for_compaction = false) : file_(file), prefetch_buffer_(prefetch_buffer), footer_(footer), read_options_(read_options), handle_(handle), contents_(contents), ioptions_(ioptions), do_uncompress_(do_uncompress), maybe_compressed_(maybe_compressed), block_type_(block_type), block_size_(static_cast(handle_.size())), block_size_with_trailer_(block_size(handle_)), uncompression_dict_(uncompression_dict), cache_options_(cache_options), memory_allocator_(memory_allocator), memory_allocator_compressed_(memory_allocator_compressed), for_compaction_(for_compaction) {} Status ReadBlockContents(); CompressionType get_compression_type() const { return compression_type_; } #ifndef NDEBUG int TEST_GetNumStackBufMemcpy() const { return num_stack_buf_memcpy_; } int TEST_GetNumHeapBufMemcpy() const { return num_heap_buf_memcpy_; } int TEST_GetNumCompressedBufMemcpy() const { return num_compressed_buf_memcpy_; } #endif private: #ifndef NDEBUG int num_stack_buf_memcpy_ = 0; int num_heap_buf_memcpy_ = 0; int num_compressed_buf_memcpy_ = 0; #endif static const uint32_t kDefaultStackBufferSize = 5000; RandomAccessFileReader* file_; FilePrefetchBuffer* prefetch_buffer_; const Footer& footer_; const ReadOptions read_options_; const BlockHandle& handle_; BlockContents* contents_; const ImmutableCFOptions& ioptions_; const bool do_uncompress_; const bool maybe_compressed_; const BlockType block_type_; const size_t block_size_; const size_t block_size_with_trailer_; const UncompressionDict& uncompression_dict_; const PersistentCacheOptions& cache_options_; MemoryAllocator* memory_allocator_; MemoryAllocator* memory_allocator_compressed_; Status status_; Slice slice_; char* used_buf_ = nullptr; AlignedBuf direct_io_buf_; CacheAllocationPtr heap_buf_; CacheAllocationPtr compressed_buf_; char stack_buf_[kDefaultStackBufferSize]; bool got_from_prefetch_buffer_ = false; CompressionType compression_type_; bool for_compaction_ = false; // return true if found bool TryGetUncompressBlockFromPersistentCache(); // return true if found bool TryGetFromPrefetchBuffer(); bool TryGetCompressedBlockFromPersistentCache(); void PrepareBufferForBlockFromFile(); // Copy content from used_buf_ to new heap_buf_. void CopyBufferToHeapBuf(); // Copy content from used_buf_ to new compressed_buf_. void CopyBufferToCompressedBuf(); void GetBlockContents(); void InsertCompressedBlockToPersistentCacheIfNeeded(); void InsertUncompressedBlockToPersistentCacheIfNeeded(); void CheckBlockChecksum(); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/block_fetcher_test.cc000066400000000000000000000431311370372246700204760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/block_fetcher.h" #include "db/table_properties_collector.h" #include "options/options_helper.h" #include "port/port.h" #include "port/stack_trace.h" #include "table/block_based/binary_search_index_reader.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_reader.h" #include "table/format.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { namespace { class CountedMemoryAllocator : public MemoryAllocator { public: const char* Name() const override { return "CountedMemoryAllocator"; } void* Allocate(size_t size) override { num_allocations_++; return static_cast(new char[size]); } void Deallocate(void* p) override { num_deallocations_++; delete[] static_cast(p); } int GetNumAllocations() const { return num_allocations_; } int GetNumDeallocations() const { return num_deallocations_; } private: int num_allocations_ = 0; int num_deallocations_ = 0; }; struct MemcpyStats { int num_stack_buf_memcpy = 0; int num_heap_buf_memcpy = 0; int num_compressed_buf_memcpy = 0; }; struct BufAllocationStats { int num_heap_buf_allocations = 0; int num_compressed_buf_allocations = 0; }; struct TestStats { MemcpyStats memcpy_stats; BufAllocationStats buf_allocation_stats; }; class BlockFetcherTest : public testing::Test { protected: void SetUp() override { test::SetupSyncPointsToMockDirectIO(); test_dir_ = test::PerThreadDBPath("block_fetcher_test"); env_ = Env::Default(); fs_ = FileSystem::Default(); ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); } void TearDown() override { EXPECT_OK(test::DestroyDir(env_, test_dir_)); } void AssertSameBlock(const BlockContents& block1, const BlockContents& block2) { ASSERT_EQ(block1.data.ToString(), block2.data.ToString()); } // Creates a table with kv pairs (i, i) where i ranges from 0 to 9, inclusive. void CreateTable(const std::string& table_name, const CompressionType& compression_type) { std::unique_ptr writer; NewFileWriter(table_name, &writer); // Create table builder. Options options; ImmutableCFOptions ioptions(options); InternalKeyComparator comparator(options.comparator); ColumnFamilyOptions cf_options; MutableCFOptions moptions(cf_options); std::vector> factories; std::unique_ptr table_builder(table_factory_.NewTableBuilder( TableBuilderOptions(ioptions, moptions, comparator, &factories, compression_type, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, kDefaultColumnFamilyName, -1 /* level */), 0 /* column_family_id */, writer.get())); // Build table. for (int i = 0; i < 9; i++) { std::string key = ToInternalKey(std::to_string(i)); std::string value = std::to_string(i); table_builder->Add(key, value); } ASSERT_OK(table_builder->Finish()); } void FetchIndexBlock(const std::string& table_name, bool use_direct_io, CountedMemoryAllocator* heap_buf_allocator, CountedMemoryAllocator* compressed_buf_allocator, MemcpyStats* memcpy_stats, BlockContents* index_block) { FileOptions fopt; fopt.use_direct_reads = use_direct_io; std::unique_ptr file; NewFileReader(table_name, fopt, &file); // Get handle of the index block. Footer footer; ReadFooter(file.get(), &footer); const BlockHandle& index_handle = footer.index_handle(); CompressionType compression_type; FetchBlock(file.get(), index_handle, BlockType::kIndex, false /* compressed */, false /* do_uncompress */, heap_buf_allocator, compressed_buf_allocator, index_block, memcpy_stats, &compression_type); ASSERT_EQ(compression_type, CompressionType::kNoCompression); } // Fetches the first data block in both direct IO and non-direct IO mode. // // compressed: whether the data blocks are compressed; // do_uncompress: whether the data blocks should be uncompressed on fetching. // compression_type: the expected compression type. // // Expects: // Block contents are the same. // Bufferr allocation and memory copy statistics are expected. void TestFetchDataBlock(const std::string& table_name_prefix, bool compressed, bool do_uncompress, const TestStats& expected_non_direct_io_stats, const TestStats& expected_direct_io_stats) { for (CompressionType compression_type : GetSupportedCompressions()) { bool do_compress = compression_type != kNoCompression; if (compressed != do_compress) continue; std::string compression_type_str = CompressionTypeToString(compression_type); std::string table_name = table_name_prefix + compression_type_str; CreateTable(table_name, compression_type); CompressionType expected_compression_type_after_fetch = (compressed && !do_uncompress) ? compression_type : kNoCompression; BlockContents blocks[2]; MemcpyStats memcpy_stats[2]; CountedMemoryAllocator heap_buf_allocators[2]; CountedMemoryAllocator compressed_buf_allocators[2]; for (bool use_direct_io : {false, true}) { FetchFirstDataBlock( table_name, use_direct_io, compressed, do_uncompress, expected_compression_type_after_fetch, &heap_buf_allocators[use_direct_io], &compressed_buf_allocators[use_direct_io], &blocks[use_direct_io], &memcpy_stats[use_direct_io]); } AssertSameBlock(blocks[0], blocks[1]); // Check memcpy and buffer allocation statistics. for (bool use_direct_io : {false, true}) { const TestStats& expected_stats = use_direct_io ? expected_direct_io_stats : expected_non_direct_io_stats; ASSERT_EQ(memcpy_stats[use_direct_io].num_stack_buf_memcpy, expected_stats.memcpy_stats.num_stack_buf_memcpy); ASSERT_EQ(memcpy_stats[use_direct_io].num_heap_buf_memcpy, expected_stats.memcpy_stats.num_heap_buf_memcpy); ASSERT_EQ(memcpy_stats[use_direct_io].num_compressed_buf_memcpy, expected_stats.memcpy_stats.num_compressed_buf_memcpy); ASSERT_EQ(heap_buf_allocators[use_direct_io].GetNumAllocations(), expected_stats.buf_allocation_stats.num_heap_buf_allocations); ASSERT_EQ( compressed_buf_allocators[use_direct_io].GetNumAllocations(), expected_stats.buf_allocation_stats.num_compressed_buf_allocations); // The allocated buffers are not deallocated until // the block content is deleted. ASSERT_EQ(heap_buf_allocators[use_direct_io].GetNumDeallocations(), 0); ASSERT_EQ( compressed_buf_allocators[use_direct_io].GetNumDeallocations(), 0); blocks[use_direct_io].allocation.reset(); ASSERT_EQ(heap_buf_allocators[use_direct_io].GetNumDeallocations(), expected_stats.buf_allocation_stats.num_heap_buf_allocations); ASSERT_EQ( compressed_buf_allocators[use_direct_io].GetNumDeallocations(), expected_stats.buf_allocation_stats.num_compressed_buf_allocations); } } } private: std::string test_dir_; Env* env_; std::shared_ptr fs_; BlockBasedTableFactory table_factory_; std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } void WriteToFile(const std::string& content, const std::string& filename) { std::unique_ptr f; ASSERT_OK(fs_->NewWritableFile(Path(filename), FileOptions(), &f, nullptr)); ASSERT_OK(f->Append(content, IOOptions(), nullptr)); ASSERT_OK(f->Close(IOOptions(), nullptr)); } void NewFileWriter(const std::string& filename, std::unique_ptr* writer) { std::string path = Path(filename); EnvOptions env_options; std::unique_ptr file; ASSERT_OK(env_->NewWritableFile(path, &file, env_options)); writer->reset(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), path, env_options)); } void NewFileReader(const std::string& filename, const FileOptions& opt, std::unique_ptr* reader) { std::string path = Path(filename); std::unique_ptr f; ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr)); reader->reset(new RandomAccessFileReader(std::move(f), path, env_)); } void NewTableReader(const ImmutableCFOptions& ioptions, const FileOptions& foptions, const InternalKeyComparator& comparator, const std::string& table_name, std::unique_ptr* table) { std::unique_ptr file; NewFileReader(table_name, foptions, &file); uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size)); std::unique_ptr table_reader; ASSERT_OK(BlockBasedTable::Open(ioptions, EnvOptions(), table_factory_.table_options(), comparator, std::move(file), file_size, &table_reader)); table->reset(reinterpret_cast(table_reader.release())); } std::string ToInternalKey(const std::string& key) { InternalKey internal_key(key, 0, ValueType::kTypeValue); return internal_key.Encode().ToString(); } void ReadFooter(RandomAccessFileReader* file, Footer* footer) { uint64_t file_size = 0; ASSERT_OK(env_->GetFileSize(file->file_name(), &file_size)); ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, footer, kBlockBasedTableMagicNumber); } // NOTE: compression_type returns the compression type of the fetched block // contents, so if the block is fetched and uncompressed, then it's // kNoCompression. void FetchBlock(RandomAccessFileReader* file, const BlockHandle& block, BlockType block_type, bool compressed, bool do_uncompress, MemoryAllocator* heap_buf_allocator, MemoryAllocator* compressed_buf_allocator, BlockContents* contents, MemcpyStats* stats, CompressionType* compresstion_type) { Options options; ImmutableCFOptions ioptions(options); ReadOptions roptions; PersistentCacheOptions persistent_cache_options; Footer footer; ReadFooter(file, &footer); std::unique_ptr fetcher(new BlockFetcher( file, nullptr /* prefetch_buffer */, footer, roptions, block, contents, ioptions, do_uncompress, compressed, block_type, UncompressionDict::GetEmptyDict(), persistent_cache_options, heap_buf_allocator, compressed_buf_allocator)); ASSERT_OK(fetcher->ReadBlockContents()); stats->num_stack_buf_memcpy = fetcher->TEST_GetNumStackBufMemcpy(); stats->num_heap_buf_memcpy = fetcher->TEST_GetNumHeapBufMemcpy(); stats->num_compressed_buf_memcpy = fetcher->TEST_GetNumCompressedBufMemcpy(); *compresstion_type = fetcher->get_compression_type(); } // NOTE: expected_compression_type is the expected compression // type of the fetched block content, if the block is uncompressed, // then the expected compression type is kNoCompression. void FetchFirstDataBlock(const std::string& table_name, bool use_direct_io, bool compressed, bool do_uncompress, CompressionType expected_compression_type, MemoryAllocator* heap_buf_allocator, MemoryAllocator* compressed_buf_allocator, BlockContents* block, MemcpyStats* memcpy_stats) { Options options; ImmutableCFOptions ioptions(options); InternalKeyComparator comparator(options.comparator); FileOptions foptions; foptions.use_direct_reads = use_direct_io; // Get block handle for the first data block. std::unique_ptr table; NewTableReader(ioptions, foptions, comparator, table_name, &table); std::unique_ptr index_reader; ASSERT_OK(BinarySearchIndexReader::Create( table.get(), nullptr /* prefetch_buffer */, false /* use_cache */, false /* prefetch */, false /* pin */, nullptr /* lookup_context */, &index_reader)); std::unique_ptr> iter( index_reader->NewIterator( ReadOptions(), false /* disable_prefix_seek */, nullptr /* iter */, nullptr /* get_context */, nullptr /* lookup_context */)); ASSERT_OK(iter->status()); iter->SeekToFirst(); BlockHandle first_block_handle = iter->value().handle; // Fetch first data block. std::unique_ptr file; NewFileReader(table_name, foptions, &file); CompressionType compression_type; FetchBlock(file.get(), first_block_handle, BlockType::kData, compressed, do_uncompress, heap_buf_allocator, compressed_buf_allocator, block, memcpy_stats, &compression_type); ASSERT_EQ(compression_type, expected_compression_type); } }; // Skip the following tests in lite mode since direct I/O is unsupported. #ifndef ROCKSDB_LITE // Fetch index block under both direct IO and non-direct IO. // Expects: // the index block contents are the same for both read modes. TEST_F(BlockFetcherTest, FetchIndexBlock) { for (CompressionType compression : GetSupportedCompressions()) { std::string table_name = "FetchIndexBlock" + CompressionTypeToString(compression); CreateTable(table_name, compression); CountedMemoryAllocator allocator; MemcpyStats memcpy_stats; BlockContents indexes[2]; for (bool use_direct_io : {false, true}) { FetchIndexBlock(table_name, use_direct_io, &allocator, &allocator, &memcpy_stats, &indexes[use_direct_io]); } AssertSameBlock(indexes[0], indexes[1]); } } // Data blocks are not compressed, // fetch data block under both direct IO and non-direct IO. // Expects: // 1. in non-direct IO mode, allocate a heap buffer and memcpy the block // into the buffer; // 2. in direct IO mode, allocate a heap buffer and memcpy from the // direct IO buffer to the heap buffer. TEST_F(BlockFetcherTest, FetchUncompressedDataBlock) { MemcpyStats memcpy_stats; memcpy_stats.num_heap_buf_memcpy = 1; BufAllocationStats buf_allocation_stats; buf_allocation_stats.num_heap_buf_allocations = 1; TestStats expected_stats{memcpy_stats, buf_allocation_stats}; TestFetchDataBlock("FetchUncompressedDataBlock", false, false, expected_stats, expected_stats); } // Data blocks are compressed, // fetch data block under both direct IO and non-direct IO, // but do not uncompress. // Expects: // 1. in non-direct IO mode, allocate a compressed buffer and memcpy the block // into the buffer; // 2. in direct IO mode, allocate a compressed buffer and memcpy from the // direct IO buffer to the compressed buffer. TEST_F(BlockFetcherTest, FetchCompressedDataBlock) { MemcpyStats memcpy_stats; memcpy_stats.num_compressed_buf_memcpy = 1; BufAllocationStats buf_allocation_stats; buf_allocation_stats.num_compressed_buf_allocations = 1; TestStats expected_stats{memcpy_stats, buf_allocation_stats}; TestFetchDataBlock("FetchCompressedDataBlock", true, false, expected_stats, expected_stats); } // Data blocks are compressed, // fetch and uncompress data block under both direct IO and non-direct IO. // Expects: // 1. in non-direct IO mode, since the block is small, so it's first memcpyed // to the stack buffer, then a heap buffer is allocated and the block is // uncompressed into the heap. // 2. in direct IO mode mode, allocate a heap buffer, then directly uncompress // and memcpy from the direct IO buffer to the heap buffer. TEST_F(BlockFetcherTest, FetchAndUncompressCompressedDataBlock) { TestStats expected_non_direct_io_stats; { MemcpyStats memcpy_stats; memcpy_stats.num_stack_buf_memcpy = 1; memcpy_stats.num_heap_buf_memcpy = 1; BufAllocationStats buf_allocation_stats; buf_allocation_stats.num_heap_buf_allocations = 1; buf_allocation_stats.num_compressed_buf_allocations = 0; expected_non_direct_io_stats = {memcpy_stats, buf_allocation_stats}; } TestStats expected_direct_io_stats; { MemcpyStats memcpy_stats; memcpy_stats.num_heap_buf_memcpy = 1; BufAllocationStats buf_allocation_stats; buf_allocation_stats.num_heap_buf_allocations = 1; expected_direct_io_stats = {memcpy_stats, buf_allocation_stats}; } TestFetchDataBlock("FetchAndUncompressCompressedDataBlock", true, true, expected_non_direct_io_stats, expected_direct_io_stats); } #endif // ROCKSDB_LITE } // namespace } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/cleanable_test.cc000066400000000000000000000146051370372246700176160ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/iostats_context.h" #include "rocksdb/perf_context.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class CleanableTest : public testing::Test {}; // Use this to keep track of the cleanups that were actually performed void Multiplier(void* arg1, void* arg2) { int* res = reinterpret_cast(arg1); int* num = reinterpret_cast(arg2); *res *= *num; } // the first Cleanup is on stack and the rest on heap, so test with both cases TEST_F(CleanableTest, Register) { int n2 = 2, n3 = 3; int res = 1; { Cleanable c1; } // ~Cleanable ASSERT_EQ(1, res); res = 1; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; } // ~Cleanable ASSERT_EQ(2, res); res = 1; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; } // ~Cleanable ASSERT_EQ(6, res); // Test the Reset does cleanup res = 1; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; c1.Reset(); ASSERT_EQ(6, res); } // ~Cleanable ASSERT_EQ(6, res); // Test Clenable is usable after Reset res = 1; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.Reset(); ASSERT_EQ(2, res); c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; } // ~Cleanable ASSERT_EQ(6, res); } // the first Cleanup is on stack and the rest on heap, // so test all the combinations of them TEST_F(CleanableTest, Delegation) { int n2 = 2, n3 = 3, n5 = 5, n7 = 7; int res = 1; { Cleanable c2; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.DelegateCleanupsTo(&c2); } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(2, res); res = 1; { Cleanable c2; { Cleanable c1; c1.DelegateCleanupsTo(&c2); } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(1, res); res = 1; { Cleanable c2; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; c1.DelegateCleanupsTo(&c2); } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(6, res); res = 1; { Cleanable c2; c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; c1.DelegateCleanupsTo(&c2); // res = 2 * 3 * 5; } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(30, res); res = 1; { Cleanable c2; c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.RegisterCleanup(Multiplier, &res, &n3); // res = 2 * 3; c1.DelegateCleanupsTo(&c2); // res = 2 * 3 * 5 * 7; } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(210, res); res = 1; { Cleanable c2; c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; c1.DelegateCleanupsTo(&c2); // res = 2 * 5 * 7; } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(70, res); res = 1; { Cleanable c2; c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; c2.RegisterCleanup(Multiplier, &res, &n7); // res = 5 * 7; { Cleanable c1; c1.DelegateCleanupsTo(&c2); // res = 5 * 7; } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(35, res); res = 1; { Cleanable c2; c2.RegisterCleanup(Multiplier, &res, &n5); // res = 5; { Cleanable c1; c1.DelegateCleanupsTo(&c2); // res = 5; } // ~Cleanable ASSERT_EQ(1, res); } // ~Cleanable ASSERT_EQ(5, res); } static void ReleaseStringHeap(void* s, void*) { delete reinterpret_cast(s); } class PinnableSlice4Test : public PinnableSlice { public: void TestStringIsRegistered(std::string* s) { ASSERT_TRUE(cleanup_.function == ReleaseStringHeap); ASSERT_EQ(cleanup_.arg1, s); ASSERT_EQ(cleanup_.arg2, nullptr); ASSERT_EQ(cleanup_.next, nullptr); } }; // Putting the PinnableSlice tests here due to similarity to Cleanable tests TEST_F(CleanableTest, PinnableSlice) { int n2 = 2; int res = 1; const std::string const_str = "123"; { res = 1; PinnableSlice4Test value; Slice slice(const_str); value.PinSlice(slice, Multiplier, &res, &n2); std::string str; str.assign(value.data(), value.size()); ASSERT_EQ(const_str, str); } // ~Cleanable ASSERT_EQ(2, res); { res = 1; PinnableSlice4Test value; Slice slice(const_str); { Cleanable c1; c1.RegisterCleanup(Multiplier, &res, &n2); // res = 2; value.PinSlice(slice, &c1); } // ~Cleanable ASSERT_EQ(1, res); // cleanups must have be delegated to value std::string str; str.assign(value.data(), value.size()); ASSERT_EQ(const_str, str); } // ~Cleanable ASSERT_EQ(2, res); { PinnableSlice4Test value; Slice slice(const_str); value.PinSelf(slice); std::string str; str.assign(value.data(), value.size()); ASSERT_EQ(const_str, str); } { PinnableSlice4Test value; std::string* self_str_ptr = value.GetSelf(); self_str_ptr->assign(const_str); value.PinSelf(); std::string str; str.assign(value.data(), value.size()); ASSERT_EQ(const_str, str); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/cuckoo/000077500000000000000000000000001370372246700156175ustar00rootroot00000000000000rocksdb-6.11.4/table/cuckoo/cuckoo_table_builder.cc000066400000000000000000000504521370372246700222740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "table/cuckoo/cuckoo_table_builder.h" #include #include #include #include #include #include "db/dbformat.h" #include "file/writable_file_writer.h" #include "rocksdb/env.h" #include "rocksdb/table.h" #include "table/block_based/block_builder.h" #include "table/cuckoo/cuckoo_table_factory.h" #include "table/format.h" #include "table/meta_blocks.h" #include "util/autovector.h" #include "util/random.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { const std::string CuckooTablePropertyNames::kEmptyKey = "rocksdb.cuckoo.bucket.empty.key"; const std::string CuckooTablePropertyNames::kNumHashFunc = "rocksdb.cuckoo.hash.num"; const std::string CuckooTablePropertyNames::kHashTableSize = "rocksdb.cuckoo.hash.size"; const std::string CuckooTablePropertyNames::kValueLength = "rocksdb.cuckoo.value.length"; const std::string CuckooTablePropertyNames::kIsLastLevel = "rocksdb.cuckoo.file.islastlevel"; const std::string CuckooTablePropertyNames::kCuckooBlockSize = "rocksdb.cuckoo.hash.cuckooblocksize"; const std::string CuckooTablePropertyNames::kIdentityAsFirstHash = "rocksdb.cuckoo.hash.identityfirst"; const std::string CuckooTablePropertyNames::kUseModuleHash = "rocksdb.cuckoo.hash.usemodule"; const std::string CuckooTablePropertyNames::kUserKeyLength = "rocksdb.cuckoo.hash.userkeylength"; // Obtained by running echo rocksdb.table.cuckoo | sha1sum extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; CuckooTableBuilder::CuckooTableBuilder( WritableFileWriter* file, double max_hash_table_ratio, uint32_t max_num_hash_table, uint32_t max_search_depth, const Comparator* user_comparator, uint32_t cuckoo_block_size, bool use_module_hash, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t), uint32_t column_family_id, const std::string& column_family_name) : num_hash_func_(2), file_(file), max_hash_table_ratio_(max_hash_table_ratio), max_num_hash_func_(max_num_hash_table), max_search_depth_(max_search_depth), cuckoo_block_size_(std::max(1U, cuckoo_block_size)), hash_table_size_(use_module_hash ? 0 : 2), is_last_level_file_(false), has_seen_first_key_(false), has_seen_first_value_(false), key_size_(0), value_size_(0), num_entries_(0), num_values_(0), ucomp_(user_comparator), use_module_hash_(use_module_hash), identity_as_first_hash_(identity_as_first_hash), get_slice_hash_(get_slice_hash), closed_(false) { // Data is in a huge block. properties_.num_data_blocks = 1; properties_.index_size = 0; properties_.filter_size = 0; properties_.column_family_id = column_family_id; properties_.column_family_name = column_family_name; } void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { if (num_entries_ >= kMaxVectorIdx - 1) { status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1"); return; } ParsedInternalKey ikey; if (!ParseInternalKey(key, &ikey)) { status_ = Status::Corruption("Unable to parse key into inernal key."); return; } if (ikey.type != kTypeDeletion && ikey.type != kTypeValue) { status_ = Status::NotSupported("Unsupported key type " + ToString(ikey.type)); return; } // Determine if we can ignore the sequence number and value type from // internal keys by looking at sequence number from first key. We assume // that if first key has a zero sequence number, then all the remaining // keys will have zero seq. no. if (!has_seen_first_key_) { is_last_level_file_ = ikey.sequence == 0; has_seen_first_key_ = true; smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size(); } if (key_size_ != (is_last_level_file_ ? ikey.user_key.size() : key.size())) { status_ = Status::NotSupported("all keys have to be the same size"); return; } if (ikey.type == kTypeValue) { if (!has_seen_first_value_) { has_seen_first_value_ = true; value_size_ = value.size(); } if (value_size_ != value.size()) { status_ = Status::NotSupported("all values have to be the same size"); return; } if (is_last_level_file_) { kvs_.append(ikey.user_key.data(), ikey.user_key.size()); } else { kvs_.append(key.data(), key.size()); } kvs_.append(value.data(), value.size()); ++num_values_; } else { if (is_last_level_file_) { deleted_keys_.append(ikey.user_key.data(), ikey.user_key.size()); } else { deleted_keys_.append(key.data(), key.size()); } } ++num_entries_; // In order to fill the empty buckets in the hash table, we identify a // key which is not used so far (unused_user_key). We determine this by // maintaining smallest and largest keys inserted so far in bytewise order // and use them to find a key outside this range in Finish() operation. // Note that this strategy is independent of user comparator used here. if (ikey.user_key.compare(smallest_user_key_) < 0) { smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); } else if (ikey.user_key.compare(largest_user_key_) > 0) { largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); } if (!use_module_hash_) { if (hash_table_size_ < num_entries_ / max_hash_table_ratio_) { hash_table_size_ *= 2; } } } bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const { assert(closed_); return idx >= num_values_; } Slice CuckooTableBuilder::GetKey(uint64_t idx) const { assert(closed_); if (IsDeletedKey(idx)) { return Slice(&deleted_keys_[static_cast((idx - num_values_) * key_size_)], static_cast(key_size_)); } return Slice(&kvs_[static_cast(idx * (key_size_ + value_size_))], static_cast(key_size_)); } Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const { assert(closed_); return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx)); } Slice CuckooTableBuilder::GetValue(uint64_t idx) const { assert(closed_); if (IsDeletedKey(idx)) { static std::string empty_value(static_cast(value_size_), 'a'); return Slice(empty_value); } return Slice(&kvs_[static_cast(idx * (key_size_ + value_size_) + key_size_)], static_cast(value_size_)); } Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { buckets->resize(static_cast(hash_table_size_ + cuckoo_block_size_ - 1)); uint32_t make_space_for_key_call_id = 0; for (uint32_t vector_idx = 0; vector_idx < num_entries_; vector_idx++) { uint64_t bucket_id = 0; bool bucket_found = false; autovector hash_vals; Slice user_key = GetUserKey(vector_idx); for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found; ++hash_cnt) { uint64_t hash_val = CuckooHash(user_key, hash_cnt, use_module_hash_, hash_table_size_, identity_as_first_hash_, get_slice_hash_); // If there is a collision, check next cuckoo_block_size_ locations for // empty locations. While checking, if we reach end of the hash table, // stop searching and proceed for next hash function. for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++hash_val) { if ((*buckets)[static_cast(hash_val)].vector_idx == kMaxVectorIdx) { bucket_id = hash_val; bucket_found = true; break; } else { if (ucomp_->Compare(user_key, GetUserKey((*buckets)[static_cast(hash_val)].vector_idx)) == 0) { return Status::NotSupported("Same key is being inserted again."); } hash_vals.push_back(hash_val); } } } while (!bucket_found && !MakeSpaceForKey(hash_vals, ++make_space_for_key_call_id, buckets, &bucket_id)) { // Rehash by increashing number of hash tables. if (num_hash_func_ >= max_num_hash_func_) { return Status::NotSupported("Too many collisions. Unable to hash."); } // We don't really need to rehash the entire table because old hashes are // still valid and we only increased the number of hash functions. uint64_t hash_val = CuckooHash(user_key, num_hash_func_, use_module_hash_, hash_table_size_, identity_as_first_hash_, get_slice_hash_); ++num_hash_func_; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++hash_val) { if ((*buckets)[static_cast(hash_val)].vector_idx == kMaxVectorIdx) { bucket_found = true; bucket_id = hash_val; break; } else { hash_vals.push_back(hash_val); } } } (*buckets)[static_cast(bucket_id)].vector_idx = vector_idx; } return Status::OK(); } Status CuckooTableBuilder::Finish() { assert(!closed_); closed_ = true; std::vector buckets; Status s; std::string unused_bucket; if (num_entries_ > 0) { // Calculate the real hash size if module hash is enabled. if (use_module_hash_) { hash_table_size_ = static_cast(num_entries_ / max_hash_table_ratio_); } status_ = MakeHashTable(&buckets); if (!status_.ok()) { return status_; } // Determine unused_user_key to fill empty buckets. std::string unused_user_key = smallest_user_key_; int curr_pos = static_cast(unused_user_key.size()) - 1; while (curr_pos >= 0) { --unused_user_key[curr_pos]; if (Slice(unused_user_key).compare(smallest_user_key_) < 0) { break; } --curr_pos; } if (curr_pos < 0) { // Try using the largest key to identify an unused key. unused_user_key = largest_user_key_; curr_pos = static_cast(unused_user_key.size()) - 1; while (curr_pos >= 0) { ++unused_user_key[curr_pos]; if (Slice(unused_user_key).compare(largest_user_key_) > 0) { break; } --curr_pos; } } if (curr_pos < 0) { return Status::Corruption("Unable to find unused key"); } if (is_last_level_file_) { unused_bucket = unused_user_key; } else { ParsedInternalKey ikey(unused_user_key, 0, kTypeValue); AppendInternalKey(&unused_bucket, ikey); } } properties_.num_entries = num_entries_; properties_.num_deletions = num_entries_ - num_values_; properties_.fixed_key_len = key_size_; properties_.user_collected_properties[ CuckooTablePropertyNames::kValueLength].assign( reinterpret_cast(&value_size_), sizeof(value_size_)); uint64_t bucket_size = key_size_ + value_size_; unused_bucket.resize(static_cast(bucket_size), 'a'); // Write the table. uint32_t num_added = 0; for (auto& bucket : buckets) { if (bucket.vector_idx == kMaxVectorIdx) { io_status_ = file_->Append(Slice(unused_bucket)); } else { ++num_added; io_status_ = file_->Append(GetKey(bucket.vector_idx)); if (io_status_.ok()) { if (value_size_ > 0) { io_status_ = file_->Append(GetValue(bucket.vector_idx)); } } } if (!io_status_.ok()) { status_ = io_status_; return status_; } } assert(num_added == NumEntries()); properties_.raw_key_size = num_added * properties_.fixed_key_len; properties_.raw_value_size = num_added * value_size_; uint64_t offset = buckets.size() * bucket_size; properties_.data_size = offset; unused_bucket.resize(static_cast(properties_.fixed_key_len)); properties_.user_collected_properties[ CuckooTablePropertyNames::kEmptyKey] = unused_bucket; properties_.user_collected_properties[ CuckooTablePropertyNames::kNumHashFunc].assign( reinterpret_cast(&num_hash_func_), sizeof(num_hash_func_)); properties_.user_collected_properties[ CuckooTablePropertyNames::kHashTableSize].assign( reinterpret_cast(&hash_table_size_), sizeof(hash_table_size_)); properties_.user_collected_properties[ CuckooTablePropertyNames::kIsLastLevel].assign( reinterpret_cast(&is_last_level_file_), sizeof(is_last_level_file_)); properties_.user_collected_properties[ CuckooTablePropertyNames::kCuckooBlockSize].assign( reinterpret_cast(&cuckoo_block_size_), sizeof(cuckoo_block_size_)); properties_.user_collected_properties[ CuckooTablePropertyNames::kIdentityAsFirstHash].assign( reinterpret_cast(&identity_as_first_hash_), sizeof(identity_as_first_hash_)); properties_.user_collected_properties[ CuckooTablePropertyNames::kUseModuleHash].assign( reinterpret_cast(&use_module_hash_), sizeof(use_module_hash_)); uint32_t user_key_len = static_cast(smallest_user_key_.size()); properties_.user_collected_properties[ CuckooTablePropertyNames::kUserKeyLength].assign( reinterpret_cast(&user_key_len), sizeof(user_key_len)); // Write meta blocks. MetaIndexBuilder meta_index_builder; PropertyBlockBuilder property_block_builder; property_block_builder.AddTableProperty(properties_); property_block_builder.Add(properties_.user_collected_properties); Slice property_block = property_block_builder.Finish(); BlockHandle property_block_handle; property_block_handle.set_offset(offset); property_block_handle.set_size(property_block.size()); io_status_ = file_->Append(property_block); offset += property_block.size(); if (!io_status_.ok()) { status_ = io_status_; return status_; } meta_index_builder.Add(kPropertiesBlock, property_block_handle); Slice meta_index_block = meta_index_builder.Finish(); BlockHandle meta_index_block_handle; meta_index_block_handle.set_offset(offset); meta_index_block_handle.set_size(meta_index_block.size()); io_status_ = file_->Append(meta_index_block); if (!io_status_.ok()) { status_ = io_status_; return status_; } Footer footer(kCuckooTableMagicNumber, 1); footer.set_metaindex_handle(meta_index_block_handle); footer.set_index_handle(BlockHandle::NullBlockHandle()); std::string footer_encoding; footer.EncodeTo(&footer_encoding); io_status_ = file_->Append(footer_encoding); status_ = io_status_; return status_; } void CuckooTableBuilder::Abandon() { assert(!closed_); closed_ = true; } uint64_t CuckooTableBuilder::NumEntries() const { return num_entries_; } uint64_t CuckooTableBuilder::FileSize() const { if (closed_) { return file_->GetFileSize(); } else if (num_entries_ == 0) { return 0; } if (use_module_hash_) { return static_cast((key_size_ + value_size_) * num_entries_ / max_hash_table_ratio_); } else { // Account for buckets being a power of two. // As elements are added, file size remains constant for a while and // doubles its size. Since compaction algorithm stops adding elements // only after it exceeds the file limit, we account for the extra element // being added here. uint64_t expected_hash_table_size = hash_table_size_; if (expected_hash_table_size < (num_entries_ + 1) / max_hash_table_ratio_) { expected_hash_table_size *= 2; } return (key_size_ + value_size_) * expected_hash_table_size - 1; } } // This method is invoked when there is no place to insert the target key. // It searches for a set of elements that can be moved to accommodate target // key. The search is a BFS graph traversal with first level (hash_vals) // being all the buckets target key could go to. // Then, from each node (curr_node), we find all the buckets that curr_node // could go to. They form the children of curr_node in the tree. // We continue the traversal until we find an empty bucket, in which case, we // move all elements along the path from first level to this empty bucket, to // make space for target key which is inserted at first level (*bucket_id). // If tree depth exceedes max depth, we return false indicating failure. bool CuckooTableBuilder::MakeSpaceForKey( const autovector& hash_vals, const uint32_t make_space_for_key_call_id, std::vector* buckets, uint64_t* bucket_id) { struct CuckooNode { uint64_t bucket_id; uint32_t depth; uint32_t parent_pos; CuckooNode(uint64_t _bucket_id, uint32_t _depth, int _parent_pos) : bucket_id(_bucket_id), depth(_depth), parent_pos(_parent_pos) {} }; // This is BFS search tree that is stored simply as a vector. // Each node stores the index of parent node in the vector. std::vector tree; // We want to identify already visited buckets in the current method call so // that we don't add same buckets again for exploration in the tree. // We do this by maintaining a count of current method call in // make_space_for_key_call_id, which acts as a unique id for this invocation // of the method. We store this number into the nodes that we explore in // current method call. // It is unlikely for the increment operation to overflow because the maximum // no. of times this will be called is <= max_num_hash_func_ + num_entries_. for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { uint64_t bid = hash_vals[hash_cnt]; (*buckets)[static_cast(bid)].make_space_for_key_call_id = make_space_for_key_call_id; tree.push_back(CuckooNode(bid, 0, 0)); } bool null_found = false; uint32_t curr_pos = 0; while (!null_found && curr_pos < tree.size()) { CuckooNode& curr_node = tree[curr_pos]; uint32_t curr_depth = curr_node.depth; if (curr_depth >= max_search_depth_) { break; } CuckooBucket& curr_bucket = (*buckets)[static_cast(curr_node.bucket_id)]; for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) { uint64_t child_bucket_id = CuckooHash(GetUserKey(curr_bucket.vector_idx), hash_cnt, use_module_hash_, hash_table_size_, identity_as_first_hash_, get_slice_hash_); // Iterate inside Cuckoo Block. for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++child_bucket_id) { if ((*buckets)[static_cast(child_bucket_id)].make_space_for_key_call_id == make_space_for_key_call_id) { continue; } (*buckets)[static_cast(child_bucket_id)].make_space_for_key_call_id = make_space_for_key_call_id; tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, curr_pos)); if ((*buckets)[static_cast(child_bucket_id)].vector_idx == kMaxVectorIdx) { null_found = true; break; } } } ++curr_pos; } if (null_found) { // There is an empty node in tree.back(). Now, traverse the path from this // empty node to top of the tree and at every node in the path, replace // child with the parent. Stop when first level is reached in the tree // (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return // this location in first level for target key to be inserted. uint32_t bucket_to_replace_pos = static_cast(tree.size()) - 1; while (bucket_to_replace_pos >= num_hash_func_) { CuckooNode& curr_node = tree[bucket_to_replace_pos]; (*buckets)[static_cast(curr_node.bucket_id)] = (*buckets)[static_cast(tree[curr_node.parent_pos].bucket_id)]; bucket_to_replace_pos = curr_node.parent_pos; } *bucket_id = tree[bucket_to_replace_pos].bucket_id; } return null_found; } std::string CuckooTableBuilder::GetFileChecksum() const { if (file_ != nullptr) { return file_->GetFileChecksum(); } else { return kUnknownFileChecksum; } } const char* CuckooTableBuilder::GetFileChecksumFuncName() const { if (file_ != nullptr) { return file_->GetFileChecksumFuncName(); } else { return kUnknownFileChecksumFuncName; } } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/cuckoo/cuckoo_table_builder.h000066400000000000000000000116251370372246700221350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include #include #include "db/version_edit.h" #include "port/port.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "table/table_builder.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { class CuckooTableBuilder: public TableBuilder { public: CuckooTableBuilder(WritableFileWriter* file, double max_hash_table_ratio, uint32_t max_num_hash_func, uint32_t max_search_depth, const Comparator* user_comparator, uint32_t cuckoo_block_size, bool use_module_hash, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t), uint32_t column_family_id, const std::string& column_family_name); // No copying allowed CuckooTableBuilder(const CuckooTableBuilder&) = delete; void operator=(const CuckooTableBuilder&) = delete; // REQUIRES: Either Finish() or Abandon() has been called. ~CuckooTableBuilder() {} // Add key,value to the table being constructed. // REQUIRES: key is after any previously added key according to comparator. // REQUIRES: Finish(), Abandon() have not been called void Add(const Slice& key, const Slice& value) override; // Return non-ok iff some error has been detected. Status status() const override { return status_; } // Return non-ok iff some error happens during IO. IOStatus io_status() const override { return io_status_; } // Finish building the table. Stops using the file passed to the // constructor after this function returns. // REQUIRES: Finish(), Abandon() have not been called Status Finish() override; // Indicate that the contents of this builder should be abandoned. Stops // using the file passed to the constructor after this function returns. // If the caller is not going to call Finish(), it must call Abandon() // before destroying this builder. // REQUIRES: Finish(), Abandon() have not been called void Abandon() override; // Number of calls to Add() so far. uint64_t NumEntries() const override; // Size of the file generated so far. If invoked after a successful // Finish() call, returns the size of the final generated file. uint64_t FileSize() const override; TableProperties GetTableProperties() const override { return properties_; } // Get file checksum std::string GetFileChecksum() const override; // Get file checksum function name const char* GetFileChecksumFuncName() const override; private: struct CuckooBucket { CuckooBucket() : vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {} uint32_t vector_idx; // This number will not exceed kvs_.size() + max_num_hash_func_. // We assume number of items is <= 2^32. uint32_t make_space_for_key_call_id; }; static const uint32_t kMaxVectorIdx = port::kMaxInt32; bool MakeSpaceForKey(const autovector& hash_vals, const uint32_t call_id, std::vector* buckets, uint64_t* bucket_id); Status MakeHashTable(std::vector* buckets); inline bool IsDeletedKey(uint64_t idx) const; inline Slice GetKey(uint64_t idx) const; inline Slice GetUserKey(uint64_t idx) const; inline Slice GetValue(uint64_t idx) const; uint32_t num_hash_func_; WritableFileWriter* file_; const double max_hash_table_ratio_; const uint32_t max_num_hash_func_; const uint32_t max_search_depth_; const uint32_t cuckoo_block_size_; uint64_t hash_table_size_; bool is_last_level_file_; bool has_seen_first_key_; bool has_seen_first_value_; uint64_t key_size_; uint64_t value_size_; // A list of fixed-size key-value pairs concatenating into a string. // Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific // key / value given an index std::string kvs_; std::string deleted_keys_; // Number of key-value pairs stored in kvs_ + number of deleted keys uint64_t num_entries_; // Number of keys that contain value (non-deletion op) uint64_t num_values_; Status status_; IOStatus io_status_; TableProperties properties_; const Comparator* ucomp_; bool use_module_hash_; bool identity_as_first_hash_; uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, uint64_t max_num_buckets); std::string largest_user_key_ = ""; std::string smallest_user_key_ = ""; bool closed_; // Either Finish() or Abandon() has been called. }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/cuckoo/cuckoo_table_builder_test.cc000066400000000000000000000674641370372246700233460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include #include #include #include "file/random_access_file_reader.h" #include "file/writable_file_writer.h" #include "table/cuckoo/cuckoo_table_builder.h" #include "table/meta_blocks.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { extern const uint64_t kCuckooTableMagicNumber; namespace { std::unordered_map> hash_map; uint64_t GetSliceHash(const Slice& s, uint32_t index, uint64_t /*max_num_buckets*/) { return hash_map[s.ToString()][index]; } } // namespace class CuckooBuilderTest : public testing::Test { public: CuckooBuilderTest() { env_ = Env::Default(); Options options; options.allow_mmap_reads = true; env_options_ = EnvOptions(options); } void CheckFileContents(const std::vector& keys, const std::vector& values, const std::vector& expected_locations, std::string expected_unused_bucket, uint64_t expected_table_size, uint32_t expected_num_hash_func, bool expected_is_last_level, uint32_t expected_cuckoo_block_size = 1) { uint64_t num_deletions = 0; for (const auto& key : keys) { ParsedInternalKey parsed; if (ParseInternalKey(key, &parsed) && parsed.type == kTypeDeletion) { num_deletions++; } } // Read file std::unique_ptr read_file; ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); uint64_t read_file_size; ASSERT_OK(env_->GetFileSize(fname, &read_file_size)); Options options; options.allow_mmap_reads = true; ImmutableCFOptions ioptions(options); // Assert Table Properties. TableProperties* props = nullptr; std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(read_file), fname)); ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size, kCuckooTableMagicNumber, ioptions, &props, true /* compression_type_missing */)); // Check unused bucket. std::string unused_key = props->user_collected_properties[ CuckooTablePropertyNames::kEmptyKey]; ASSERT_EQ(expected_unused_bucket.substr(0, props->fixed_key_len), unused_key); uint64_t value_len_found = *reinterpret_cast(props->user_collected_properties[ CuckooTablePropertyNames::kValueLength].data()); ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found); ASSERT_EQ(props->raw_value_size, values.size()*value_len_found); const uint64_t table_size = *reinterpret_cast(props->user_collected_properties[ CuckooTablePropertyNames::kHashTableSize].data()); ASSERT_EQ(expected_table_size, table_size); const uint32_t num_hash_func_found = *reinterpret_cast(props->user_collected_properties[ CuckooTablePropertyNames::kNumHashFunc].data()); ASSERT_EQ(expected_num_hash_func, num_hash_func_found); const uint32_t cuckoo_block_size = *reinterpret_cast(props->user_collected_properties[ CuckooTablePropertyNames::kCuckooBlockSize].data()); ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size); const bool is_last_level_found = *reinterpret_cast(props->user_collected_properties[ CuckooTablePropertyNames::kIsLastLevel].data()); ASSERT_EQ(expected_is_last_level, is_last_level_found); ASSERT_EQ(props->num_entries, keys.size()); ASSERT_EQ(props->num_deletions, num_deletions); ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); ASSERT_EQ(props->data_size, expected_unused_bucket.size() * (expected_table_size + expected_cuckoo_block_size - 1)); ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len); ASSERT_EQ(props->column_family_id, 0); ASSERT_EQ(props->column_family_name, kDefaultColumnFamilyName); delete props; // Check contents of the bucket. std::vector keys_found(keys.size(), false); size_t bucket_size = expected_unused_bucket.size(); for (uint32_t i = 0; i + 1 < table_size + cuckoo_block_size; ++i) { Slice read_slice; ASSERT_OK(file_reader->Read(IOOptions(), i * bucket_size, bucket_size, &read_slice, nullptr, nullptr)); size_t key_idx = std::find(expected_locations.begin(), expected_locations.end(), i) - expected_locations.begin(); if (key_idx == keys.size()) { // i is not one of the expected locations. Empty bucket. if (read_slice.data() == nullptr) { ASSERT_EQ(0, expected_unused_bucket.size()); } else { ASSERT_EQ(read_slice.compare(expected_unused_bucket), 0); } } else { keys_found[key_idx] = true; ASSERT_EQ(read_slice.compare(keys[key_idx] + values[key_idx]), 0); } } for (auto key_found : keys_found) { // Check that all keys wereReader found. ASSERT_TRUE(key_found); } } std::string GetInternalKey(Slice user_key, bool zero_seqno, ValueType type = kTypeValue) { IterKey ikey; ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type); return ikey.GetInternalKey().ToString(); } uint64_t NextPowOf2(uint64_t num) { uint64_t n = 2; while (n <= num) { n *= 2; } return n; } uint64_t GetExpectedTableSize(uint64_t num) { return NextPowOf2(static_cast(num / kHashTableRatio)); } Env* env_; EnvOptions env_options_; std::string fname; const double kHashTableRatio = 0.9; }; TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) { std::unique_ptr writable_file; fname = test::PerThreadDBPath("EmptyFile"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, 4, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); ASSERT_EQ(0UL, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); CheckFileContents({}, {}, {}, "", 2, 2, false); } TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { for (auto type : {kTypeValue, kTypeDeletion}) { uint32_t num_hash_fun = 4; std::vector user_keys = {"key01", "key02", "key03", "key04"}; std::vector values; if (type == kTypeValue) { values = {"v01", "v02", "v03", "v04"}; } else { values = {"", "", "", ""}; } // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1, 2, 3}}, {user_keys[1], {1, 2, 3, 4}}, {user_keys[2], {2, 3, 4, 5}}, {user_keys[3], {3, 4, 5, 6}}}; hash_map = std::move(hm); std::vector expected_locations = {0, 1, 2, 3}; std::vector keys; for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false, type)); } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); std::unique_ptr writable_file; fname = test::PerThreadDBPath("NoCollisionFullKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, expected_unused_bucket, expected_table_size, 2, false); } } TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { uint32_t num_hash_fun = 4; std::vector user_keys = {"key01", "key02", "key03", "key04"}; std::vector values = {"v01", "v02", "v03", "v04"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1, 2, 3}}, {user_keys[1], {0, 1, 2, 3}}, {user_keys[2], {0, 1, 2, 3}}, {user_keys[3], {0, 1, 2, 3}}, }; hash_map = std::move(hm); std::vector expected_locations = {0, 1, 2, 3}; std::vector keys; for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionFullKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, expected_unused_bucket, expected_table_size, 4, false); } TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { uint32_t num_hash_fun = 4; std::vector user_keys = {"key01", "key02", "key03", "key04"}; std::vector values = {"v01", "v02", "v03", "v04"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1, 2, 3}}, {user_keys[1], {0, 1, 2, 3}}, {user_keys[2], {0, 1, 2, 3}}, {user_keys[3], {0, 1, 2, 3}}, }; hash_map = std::move(hm); std::vector expected_locations = {0, 1, 2, 3}; std::vector keys; for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); std::unique_ptr writable_file; uint32_t cuckoo_block_size = 2; fname = test::PerThreadDBPath("WithCollisionFullKey2"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder( file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, expected_unused_bucket, expected_table_size, 3, false, cuckoo_block_size); } TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) { // Have two hash functions. Insert elements with overlapping hashes. // Finally insert an element with hash value somewhere in the middle // so that it displaces all the elements after that. uint32_t num_hash_fun = 2; std::vector user_keys = {"key01", "key02", "key03", "key04", "key05"}; std::vector values = {"v01", "v02", "v03", "v04", "v05"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}}, {user_keys[3], {3, 4}}, {user_keys[4], {0, 2}}, }; hash_map = std::move(hm); std::vector expected_locations = {0, 1, 3, 4, 2}; std::vector keys; for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathFullKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, expected_unused_bucket, expected_table_size, 2, false); } TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { uint32_t num_hash_fun = 2; std::vector user_keys = {"key01", "key02", "key03", "key04", "key05"}; std::vector values = {"v01", "v02", "v03", "v04", "v05"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {3, 4}}, {user_keys[3], {4, 5}}, {user_keys[4], {0, 3}}, }; hash_map = std::move(hm); std::vector expected_locations = {2, 1, 3, 4, 0}; std::vector keys; for (auto& user_key : user_keys) { keys.push_back(GetInternalKey(user_key, false)); } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathFullKeyAndCuckooBlock"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 2, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, expected_unused_bucket, expected_table_size, 2, false, 2); } TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { uint32_t num_hash_fun = 4; std::vector user_keys = {"key01", "key02", "key03", "key04"}; std::vector values = {"v01", "v02", "v03", "v04"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1, 2, 3}}, {user_keys[1], {1, 2, 3, 4}}, {user_keys[2], {2, 3, 4, 5}}, {user_keys[3], {3, 4, 5, 6}}}; hash_map = std::move(hm); std::vector expected_locations = {0, 1, 2, 3}; uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); std::unique_ptr writable_file; fname = test::PerThreadDBPath("NoCollisionUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = user_keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, expected_unused_bucket, expected_table_size, 2, true); } TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { uint32_t num_hash_fun = 4; std::vector user_keys = {"key01", "key02", "key03", "key04"}; std::vector values = {"v01", "v02", "v03", "v04"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1, 2, 3}}, {user_keys[1], {0, 1, 2, 3}}, {user_keys[2], {0, 1, 2, 3}}, {user_keys[3], {0, 1, 2, 3}}, }; hash_map = std::move(hm); std::vector expected_locations = {0, 1, 2, 3}; uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = user_keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, expected_unused_bucket, expected_table_size, 4, true); } TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) { uint32_t num_hash_fun = 2; std::vector user_keys = {"key01", "key02", "key03", "key04", "key05"}; std::vector values = {"v01", "v02", "v03", "v04", "v05"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}}, {user_keys[3], {3, 4}}, {user_keys[4], {0, 2}}, }; hash_map = std::move(hm); std::vector expected_locations = {0, 1, 3, 4, 2}; uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } size_t bucket_size = user_keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(file_writer->Close()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, expected_unused_bucket, expected_table_size, 2, true); } TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) { // Have two hash functions. Insert elements with overlapping hashes. // Finally try inserting an element with hash value somewhere in the middle // and it should fail because the no. of elements to displace is too high. uint32_t num_hash_fun = 2; std::vector user_keys = {"key01", "key02", "key03", "key04", "key05"}; // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}}, {user_keys[3], {3, 4}}, {user_keys[4], {0, 1}}, }; hash_map = std::move(hm); std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value")); ASSERT_EQ(builder.NumEntries(), i + 1); ASSERT_OK(builder.status()); } ASSERT_TRUE(builder.Finish().IsNotSupported()); ASSERT_OK(file_writer->Close()); } TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) { // Need to have a temporary variable here as VS compiler does not currently // support operator= with initializer_list as a parameter std::unordered_map> hm = { {"repeatedkey", {0, 1, 2, 3}}}; hash_map = std::move(hm); uint32_t num_hash_fun = 4; std::string user_key = "repeatedkey"; std::unique_ptr writable_file; fname = test::PerThreadDBPath("FailWhenSameKeyInserted"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1")); ASSERT_EQ(builder.NumEntries(), 1u); ASSERT_OK(builder.status()); builder.Add(Slice(GetInternalKey(user_key, true)), Slice("value2")); ASSERT_EQ(builder.NumEntries(), 2u); ASSERT_OK(builder.status()); ASSERT_TRUE(builder.Finish().IsNotSupported()); ASSERT_OK(file_writer->Close()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/table/cuckoo/cuckoo_table_factory.cc000066400000000000000000000051451370372246700223140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "table/cuckoo/cuckoo_table_factory.h" #include "db/dbformat.h" #include "table/cuckoo/cuckoo_table_builder.h" #include "table/cuckoo/cuckoo_table_reader.h" namespace ROCKSDB_NAMESPACE { Status CuckooTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool /*prefetch_index_and_filter_in_cache*/) const { std::unique_ptr new_reader(new CuckooTableReader( table_reader_options.ioptions, std::move(file), file_size, table_reader_options.internal_comparator.user_comparator(), nullptr)); Status s = new_reader->status(); if (s.ok()) { *table = std::move(new_reader); } return s; } TableBuilder* CuckooTableFactory::NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const { // Ignore the skipFIlters flag. Does not apply to this file format // // TODO: change builder to take the option struct return new CuckooTableBuilder( file, table_options_.hash_table_ratio, 64, table_options_.max_search_depth, table_builder_options.internal_comparator.user_comparator(), table_options_.cuckoo_block_size, table_options_.use_module_hash, table_options_.identity_as_first_hash, nullptr /* get_slice_hash */, column_family_id, table_builder_options.column_family_name); } std::string CuckooTableFactory::GetPrintableTableOptions() const { std::string ret; ret.reserve(2000); const int kBufferSize = 200; char buffer[kBufferSize]; snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n", table_options_.hash_table_ratio); ret.append(buffer); snprintf(buffer, kBufferSize, " max_search_depth: %u\n", table_options_.max_search_depth); ret.append(buffer); snprintf(buffer, kBufferSize, " cuckoo_block_size: %u\n", table_options_.cuckoo_block_size); ret.append(buffer); snprintf(buffer, kBufferSize, " identity_as_first_hash: %d\n", table_options_.identity_as_first_hash); ret.append(buffer); return ret; } TableFactory* NewCuckooTableFactory(const CuckooTableOptions& table_options) { return new CuckooTableFactory(table_options); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/cuckoo/cuckoo_table_factory.h000066400000000000000000000057041370372246700221570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include "rocksdb/table.h" #include "util/murmurhash.h" #include "rocksdb/options.h" namespace ROCKSDB_NAMESPACE { const uint32_t kCuckooMurmurSeedMultiplier = 816922183; static inline uint64_t CuckooHash( const Slice& user_key, uint32_t hash_cnt, bool use_module_hash, uint64_t table_size_, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) { #if !defined NDEBUG || defined OS_WIN // This part is used only in unit tests but we have to keep it for Windows // build as we run test in both debug and release modes under Windows. if (get_slice_hash != nullptr) { return get_slice_hash(user_key, hash_cnt, table_size_); } #else (void)get_slice_hash; #endif uint64_t value = 0; if (hash_cnt == 0 && identity_as_first_hash) { value = (*reinterpret_cast(user_key.data())); } else { value = MurmurHash(user_key.data(), static_cast(user_key.size()), kCuckooMurmurSeedMultiplier * hash_cnt); } if (use_module_hash) { return value % table_size_; } else { return value & (table_size_ - 1); } } // Cuckoo Table is designed for applications that require fast point lookups // but not fast range scans. // // Some assumptions: // - Key length and Value length are fixed. // - Does not support Snapshot. // - Does not support Merge operations. // - Does not support prefix bloom filters. class CuckooTableFactory : public TableFactory { public: explicit CuckooTableFactory(const CuckooTableOptions& table_options) : table_options_(table_options) {} ~CuckooTableFactory() {} const char* Name() const override { return "CuckooTable"; } Status NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const override; // Sanitizes the specified DB Options. Status SanitizeOptions( const DBOptions& /*db_opts*/, const ColumnFamilyOptions& /*cf_opts*/) const override { return Status::OK(); } std::string GetPrintableTableOptions() const override; void* GetOptions() override { return &table_options_; } Status GetOptionString(const ConfigOptions& /*config_options*/, std::string* /*opt_string*/) const override { return Status::OK(); } private: CuckooTableOptions table_options_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/cuckoo/cuckoo_table_reader.cc000066400000000000000000000335121370372246700221060ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include "table/cuckoo/cuckoo_table_reader.h" #include #include #include #include #include #include "memory/arena.h" #include "rocksdb/iterator.h" #include "rocksdb/table.h" #include "table/cuckoo/cuckoo_table_factory.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/meta_blocks.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { namespace { const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1); const uint32_t kInvalidIndex = std::numeric_limits::max(); } extern const uint64_t kCuckooTableMagicNumber; CuckooTableReader::CuckooTableReader( const ImmutableCFOptions& ioptions, std::unique_ptr&& file, uint64_t file_size, const Comparator* comparator, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) : file_(std::move(file)), is_last_level_(false), identity_as_first_hash_(false), use_module_hash_(false), num_hash_func_(0), unused_key_(""), key_length_(0), user_key_length_(0), value_length_(0), bucket_length_(0), cuckoo_block_size_(0), cuckoo_block_bytes_minus_one_(0), table_size_(0), ucomp_(comparator), get_slice_hash_(get_slice_hash) { if (!ioptions.allow_mmap_reads) { status_ = Status::InvalidArgument("File is not mmaped"); return; } TableProperties* props = nullptr; status_ = ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber, ioptions, &props, true /* compression_type_missing */); if (!status_.ok()) { return; } table_props_.reset(props); auto& user_props = props->user_collected_properties; auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc); if (hash_funs == user_props.end()) { status_ = Status::Corruption("Number of hash functions not found"); return; } num_hash_func_ = *reinterpret_cast(hash_funs->second.data()); auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey); if (unused_key == user_props.end()) { status_ = Status::Corruption("Empty bucket value not found"); return; } unused_key_ = unused_key->second; key_length_ = static_cast(props->fixed_key_len); auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength); if (user_key_len == user_props.end()) { status_ = Status::Corruption("User key length not found"); return; } user_key_length_ = *reinterpret_cast( user_key_len->second.data()); auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength); if (value_length == user_props.end()) { status_ = Status::Corruption("Value length not found"); return; } value_length_ = *reinterpret_cast( value_length->second.data()); bucket_length_ = key_length_ + value_length_; auto hash_table_size = user_props.find( CuckooTablePropertyNames::kHashTableSize); if (hash_table_size == user_props.end()) { status_ = Status::Corruption("Hash table size not found"); return; } table_size_ = *reinterpret_cast( hash_table_size->second.data()); auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel); if (is_last_level == user_props.end()) { status_ = Status::Corruption("Is last level not found"); return; } is_last_level_ = *reinterpret_cast(is_last_level->second.data()); auto identity_as_first_hash = user_props.find( CuckooTablePropertyNames::kIdentityAsFirstHash); if (identity_as_first_hash == user_props.end()) { status_ = Status::Corruption("identity as first hash not found"); return; } identity_as_first_hash_ = *reinterpret_cast( identity_as_first_hash->second.data()); auto use_module_hash = user_props.find( CuckooTablePropertyNames::kUseModuleHash); if (use_module_hash == user_props.end()) { status_ = Status::Corruption("hash type is not found"); return; } use_module_hash_ = *reinterpret_cast( use_module_hash->second.data()); auto cuckoo_block_size = user_props.find( CuckooTablePropertyNames::kCuckooBlockSize); if (cuckoo_block_size == user_props.end()) { status_ = Status::Corruption("Cuckoo block size not found"); return; } cuckoo_block_size_ = *reinterpret_cast( cuckoo_block_size->second.data()); cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1; status_ = file_->Read(IOOptions(), 0, static_cast(file_size), &file_data_, nullptr, nullptr); } Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/, const Slice& key, GetContext* get_context, const SliceTransform* /* prefix_extractor */, bool /*skip_filters*/) { assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0)); Slice user_key = ExtractUserKey(key); for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { uint64_t offset = bucket_length_ * CuckooHash( user_key, hash_cnt, use_module_hash_, table_size_, identity_as_first_hash_, get_slice_hash_); const char* bucket = &file_data_.data()[offset]; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, bucket += bucket_length_) { if (ucomp_->Equal(Slice(unused_key_.data(), user_key.size()), Slice(bucket, user_key.size()))) { return Status::OK(); } // Here, we compare only the user key part as we support only one entry // per user key and we don't support snapshot. if (ucomp_->Equal(user_key, Slice(bucket, user_key.size()))) { Slice value(bucket + key_length_, value_length_); if (is_last_level_) { // Sequence number is not stored at the last level, so we will use // kMaxSequenceNumber since it is unknown. This could cause some // transactions to fail to lock a key due to known sequence number. // However, it is expected for anyone to use a CuckooTable in a // TransactionDB. get_context->SaveValue(value, kMaxSequenceNumber); } else { Slice full_key(bucket, key_length_); ParsedInternalKey found_ikey; ParseInternalKey(full_key, &found_ikey); bool dont_care __attribute__((__unused__)); get_context->SaveValue(found_ikey, value, &dont_care); } // We don't support merge operations. So, we return here. return Status::OK(); } } } return Status::OK(); } void CuckooTableReader::Prepare(const Slice& key) { // Prefetch the first Cuckoo Block. Slice user_key = ExtractUserKey(key); uint64_t addr = reinterpret_cast(file_data_.data()) + bucket_length_ * CuckooHash(user_key, 0, use_module_hash_, table_size_, identity_as_first_hash_, nullptr); uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_; for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) { PREFETCH(reinterpret_cast(addr), 0, 3); } } class CuckooTableIterator : public InternalIterator { public: explicit CuckooTableIterator(CuckooTableReader* reader); // No copying allowed CuckooTableIterator(const CuckooTableIterator&) = delete; void operator=(const Iterator&) = delete; ~CuckooTableIterator() override {} bool Valid() const override; void SeekToFirst() override; void SeekToLast() override; void Seek(const Slice& target) override; void SeekForPrev(const Slice& target) override; void Next() override; void Prev() override; Slice key() const override; Slice value() const override; Status status() const override { return Status::OK(); } void InitIfNeeded(); private: struct BucketComparator { BucketComparator(const Slice& file_data, const Comparator* ucomp, uint32_t bucket_len, uint32_t user_key_len, const Slice& target = Slice()) : file_data_(file_data), ucomp_(ucomp), bucket_len_(bucket_len), user_key_len_(user_key_len), target_(target) {} bool operator()(const uint32_t first, const uint32_t second) const { const char* first_bucket = (first == kInvalidIndex) ? target_.data() : &file_data_.data()[first * bucket_len_]; const char* second_bucket = (second == kInvalidIndex) ? target_.data() : &file_data_.data()[second * bucket_len_]; return ucomp_->Compare(Slice(first_bucket, user_key_len_), Slice(second_bucket, user_key_len_)) < 0; } private: const Slice file_data_; const Comparator* ucomp_; const uint32_t bucket_len_; const uint32_t user_key_len_; const Slice target_; }; const BucketComparator bucket_comparator_; void PrepareKVAtCurrIdx(); CuckooTableReader* reader_; bool initialized_; // Contains a map of keys to bucket_id sorted in key order. std::vector sorted_bucket_ids_; // We assume that the number of items can be stored in uint32 (4 Billion). uint32_t curr_key_idx_; Slice curr_value_; IterKey curr_key_; }; CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader) : bucket_comparator_(reader->file_data_, reader->ucomp_, reader->bucket_length_, reader->user_key_length_), reader_(reader), initialized_(false), curr_key_idx_(kInvalidIndex) { sorted_bucket_ids_.clear(); curr_value_.clear(); curr_key_.Clear(); } void CuckooTableIterator::InitIfNeeded() { if (initialized_) { return; } sorted_bucket_ids_.reserve(static_cast(reader_->GetTableProperties()->num_entries)); uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1; assert(num_buckets < kInvalidIndex); const char* bucket = reader_->file_data_.data(); for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) { if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) { sorted_bucket_ids_.push_back(bucket_id); } bucket += reader_->bucket_length_; } assert(sorted_bucket_ids_.size() == reader_->GetTableProperties()->num_entries); std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(), bucket_comparator_); curr_key_idx_ = kInvalidIndex; initialized_ = true; } void CuckooTableIterator::SeekToFirst() { InitIfNeeded(); curr_key_idx_ = 0; PrepareKVAtCurrIdx(); } void CuckooTableIterator::SeekToLast() { InitIfNeeded(); curr_key_idx_ = static_cast(sorted_bucket_ids_.size()) - 1; PrepareKVAtCurrIdx(); } void CuckooTableIterator::Seek(const Slice& target) { InitIfNeeded(); const BucketComparator seek_comparator( reader_->file_data_, reader_->ucomp_, reader_->bucket_length_, reader_->user_key_length_, ExtractUserKey(target)); auto seek_it = std::lower_bound(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(), kInvalidIndex, seek_comparator); curr_key_idx_ = static_cast(std::distance(sorted_bucket_ids_.begin(), seek_it)); PrepareKVAtCurrIdx(); } void CuckooTableIterator::SeekForPrev(const Slice& /*target*/) { // Not supported assert(false); } bool CuckooTableIterator::Valid() const { return curr_key_idx_ < sorted_bucket_ids_.size(); } void CuckooTableIterator::PrepareKVAtCurrIdx() { if (!Valid()) { curr_value_.clear(); curr_key_.Clear(); return; } uint32_t id = sorted_bucket_ids_[curr_key_idx_]; const char* offset = reader_->file_data_.data() + id * reader_->bucket_length_; if (reader_->is_last_level_) { // Always return internal key. curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_), 0, kTypeValue); } else { curr_key_.SetInternalKey(Slice(offset, reader_->key_length_)); } curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_); } void CuckooTableIterator::Next() { if (!Valid()) { curr_value_.clear(); curr_key_.Clear(); return; } ++curr_key_idx_; PrepareKVAtCurrIdx(); } void CuckooTableIterator::Prev() { if (curr_key_idx_ == 0) { curr_key_idx_ = static_cast(sorted_bucket_ids_.size()); } if (!Valid()) { curr_value_.clear(); curr_key_.Clear(); return; } --curr_key_idx_; PrepareKVAtCurrIdx(); } Slice CuckooTableIterator::key() const { assert(Valid()); return curr_key_.GetInternalKey(); } Slice CuckooTableIterator::value() const { assert(Valid()); return curr_value_; } InternalIterator* CuckooTableReader::NewIterator( const ReadOptions& /*read_options*/, const SliceTransform* /* prefix_extractor */, Arena* arena, bool /*skip_filters*/, TableReaderCaller /*caller*/, size_t /*compaction_readahead_size*/, bool /* allow_unprepared_value */) { if (!status().ok()) { return NewErrorInternalIterator( Status::Corruption("CuckooTableReader status is not okay."), arena); } CuckooTableIterator* iter; if (arena == nullptr) { iter = new CuckooTableIterator(this); } else { auto iter_mem = arena->AllocateAligned(sizeof(CuckooTableIterator)); iter = new (iter_mem) CuckooTableIterator(this); } return iter; } size_t CuckooTableReader::ApproximateMemoryUsage() const { return 0; } } // namespace ROCKSDB_NAMESPACE #endif rocksdb-6.11.4/table/cuckoo/cuckoo_table_reader.h000066400000000000000000000067141370372246700217540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include #include #include #include #include "db/dbformat.h" #include "file/random_access_file_reader.h" #include "options/cf_options.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "table/table_reader.h" namespace ROCKSDB_NAMESPACE { class Arena; class TableReader; class CuckooTableReader: public TableReader { public: CuckooTableReader(const ImmutableCFOptions& ioptions, std::unique_ptr&& file, uint64_t file_size, const Comparator* user_comparator, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)); ~CuckooTableReader() {} std::shared_ptr GetTableProperties() const override { return table_props_; } Status status() const { return status_; } Status Get(const ReadOptions& readOptions, const Slice& key, GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters = false) override; // Returns a new iterator over table contents // compaction_readahead_size: its value will only be used if for_compaction = // true InternalIterator* NewIterator(const ReadOptions&, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, TableReaderCaller caller, size_t compaction_readahead_size = 0, bool allow_unprepared_value = false) override; void Prepare(const Slice& target) override; // Report an approximation of how much memory has been used. size_t ApproximateMemoryUsage() const override; // Following methods are not implemented for Cuckoo Table Reader uint64_t ApproximateOffsetOf(const Slice& /*key*/, TableReaderCaller /*caller*/) override { return 0; } uint64_t ApproximateSize(const Slice& /*start*/, const Slice& /*end*/, TableReaderCaller /*caller*/) override { return 0; } void SetupForCompaction() override {} // End of methods not implemented. private: friend class CuckooTableIterator; void LoadAllKeys(std::vector>* key_to_bucket_id); std::unique_ptr file_; Slice file_data_; bool is_last_level_; bool identity_as_first_hash_; bool use_module_hash_; std::shared_ptr table_props_; Status status_; uint32_t num_hash_func_; std::string unused_key_; uint32_t key_length_; uint32_t user_key_length_; uint32_t value_length_; uint32_t bucket_length_; uint32_t cuckoo_block_size_; uint32_t cuckoo_block_bytes_minus_one_; uint64_t table_size_; const Comparator* ucomp_; uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, uint64_t max_num_buckets); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/cuckoo/cuckoo_table_reader_test.cc000066400000000000000000000512331370372246700231450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #ifndef GFLAGS #include int main() { fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); return 0; } #else #include #include #include #include #include "memory/arena.h" #include "table/cuckoo/cuckoo_table_builder.h" #include "table/cuckoo/cuckoo_table_factory.h" #include "table/cuckoo/cuckoo_table_reader.h" #include "table/get_context.h" #include "table/meta_blocks.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/gflags_compat.h" #include "util/random.h" #include "util/string_util.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::SetUsageMessage; DEFINE_string(file_dir, "", "Directory where the files will be created" " for benchmark. Added for using tmpfs."); DEFINE_bool(enable_perf, false, "Run Benchmark Tests too."); DEFINE_bool(write, false, "Should write new values to file in performance tests?"); DEFINE_bool(identity_as_first_hash, true, "use identity as first hash"); namespace ROCKSDB_NAMESPACE { namespace { const uint32_t kNumHashFunc = 10; // Methods, variables related to Hash functions. std::unordered_map> hash_map; void AddHashLookups(const std::string& s, uint64_t bucket_id, uint32_t num_hash_fun) { std::vector v; for (uint32_t i = 0; i < num_hash_fun; i++) { v.push_back(bucket_id + i); } hash_map[s] = v; } uint64_t GetSliceHash(const Slice& s, uint32_t index, uint64_t /*max_num_buckets*/) { return hash_map[s.ToString()][index]; } } // namespace class CuckooReaderTest : public testing::Test { public: using testing::Test::SetUp; CuckooReaderTest() { options.allow_mmap_reads = true; env = options.env; env_options = EnvOptions(options); } void SetUp(int num) { num_items = num; hash_map.clear(); keys.clear(); keys.resize(num_items); user_keys.clear(); user_keys.resize(num_items); values.clear(); values.resize(num_items); } std::string NumToStr(int64_t i) { return std::string(reinterpret_cast(&i), sizeof(i)); } void CreateCuckooFileAndCheckReader( const Comparator* ucomp = BytewiseComparator()) { std::unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, env_options)); CuckooTableBuilder builder( file_writer.get(), 0.9, kNumHashFunc, 100, ucomp, 2, false, false, GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) { builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); ASSERT_OK(builder.status()); ASSERT_EQ(builder.NumEntries(), key_idx + 1); } ASSERT_OK(builder.Finish()); ASSERT_EQ(num_items, builder.NumEntries()); file_size = builder.FileSize(); ASSERT_OK(file_writer->Close()); // Check reader now. std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp, GetSliceHash); ASSERT_OK(reader.status()); // Assume no merge/deletion for (uint32_t i = 0; i < num_items; ++i) { PinnableSlice value; GetContext get_context(ucomp, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(user_keys[i]), &value, nullptr, nullptr, true, nullptr, nullptr); ASSERT_OK( reader.Get(ReadOptions(), Slice(keys[i]), &get_context, nullptr)); ASSERT_STREQ(values[i].c_str(), value.data()); } } void UpdateKeys(bool with_zero_seqno) { for (uint32_t i = 0; i < num_items; i++) { ParsedInternalKey ikey(user_keys[i], with_zero_seqno ? 0 : i + 1000, kTypeValue); keys[i].clear(); AppendInternalKey(&keys[i], ikey); } } void CheckIterator(const Comparator* ucomp = BytewiseComparator()) { std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp, GetSliceHash); ASSERT_OK(reader.status()); InternalIterator* it = reader.NewIterator( ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized); ASSERT_OK(it->status()); ASSERT_TRUE(!it->Valid()); it->SeekToFirst(); int cnt = 0; while (it->Valid()) { ASSERT_OK(it->status()); ASSERT_TRUE(Slice(keys[cnt]) == it->key()); ASSERT_TRUE(Slice(values[cnt]) == it->value()); ++cnt; it->Next(); } ASSERT_EQ(static_cast(cnt), num_items); it->SeekToLast(); cnt = static_cast(num_items) - 1; ASSERT_TRUE(it->Valid()); while (it->Valid()) { ASSERT_OK(it->status()); ASSERT_TRUE(Slice(keys[cnt]) == it->key()); ASSERT_TRUE(Slice(values[cnt]) == it->value()); --cnt; it->Prev(); } ASSERT_EQ(cnt, -1); cnt = static_cast(num_items) / 2; it->Seek(keys[cnt]); while (it->Valid()) { ASSERT_OK(it->status()); ASSERT_TRUE(Slice(keys[cnt]) == it->key()); ASSERT_TRUE(Slice(values[cnt]) == it->value()); ++cnt; it->Next(); } ASSERT_EQ(static_cast(cnt), num_items); delete it; Arena arena; it = reader.NewIterator(ReadOptions(), /*prefix_extractor=*/nullptr, &arena, /*skip_filters=*/false, TableReaderCaller::kUncategorized); ASSERT_OK(it->status()); ASSERT_TRUE(!it->Valid()); it->Seek(keys[num_items/2]); ASSERT_TRUE(it->Valid()); ASSERT_OK(it->status()); ASSERT_TRUE(keys[num_items/2] == it->key()); ASSERT_TRUE(values[num_items/2] == it->value()); ASSERT_OK(it->status()); it->~InternalIterator(); } std::vector keys; std::vector user_keys; std::vector values; uint64_t num_items; std::string fname; uint64_t file_size; Options options; Env* env; EnvOptions env_options; }; TEST_F(CuckooReaderTest, FileNotMmaped) { options.allow_mmap_reads = false; ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, nullptr, 0, nullptr, nullptr); ASSERT_TRUE(reader.status().IsInvalidArgument()); ASSERT_STREQ("File is not mmaped", reader.status().getState()); } TEST_F(CuckooReaderTest, WhenKeyExists) { SetUp(kNumHashFunc); fname = test::PerThreadDBPath("CuckooReader_WhenKeyExists"); for (uint64_t i = 0; i < num_items; i++) { user_keys[i] = "key" + NumToStr(i); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); AppendInternalKey(&keys[i], ikey); values[i] = "value" + NumToStr(i); // Give disjoint hash values. AddHashLookups(user_keys[i], i, kNumHashFunc); } CreateCuckooFileAndCheckReader(); // Last level file. UpdateKeys(true); CreateCuckooFileAndCheckReader(); // Test with collision. Make all hash values collide. hash_map.clear(); for (uint32_t i = 0; i < num_items; i++) { AddHashLookups(user_keys[i], 0, kNumHashFunc); } UpdateKeys(false); CreateCuckooFileAndCheckReader(); // Last level file. UpdateKeys(true); CreateCuckooFileAndCheckReader(); } TEST_F(CuckooReaderTest, WhenKeyExistsWithUint64Comparator) { SetUp(kNumHashFunc); fname = test::PerThreadDBPath("CuckooReaderUint64_WhenKeyExists"); for (uint64_t i = 0; i < num_items; i++) { user_keys[i].resize(8); memcpy(&user_keys[i][0], static_cast(&i), 8); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); AppendInternalKey(&keys[i], ikey); values[i] = "value" + NumToStr(i); // Give disjoint hash values. AddHashLookups(user_keys[i], i, kNumHashFunc); } CreateCuckooFileAndCheckReader(test::Uint64Comparator()); // Last level file. UpdateKeys(true); CreateCuckooFileAndCheckReader(test::Uint64Comparator()); // Test with collision. Make all hash values collide. hash_map.clear(); for (uint32_t i = 0; i < num_items; i++) { AddHashLookups(user_keys[i], 0, kNumHashFunc); } UpdateKeys(false); CreateCuckooFileAndCheckReader(test::Uint64Comparator()); // Last level file. UpdateKeys(true); CreateCuckooFileAndCheckReader(test::Uint64Comparator()); } TEST_F(CuckooReaderTest, CheckIterator) { SetUp(2*kNumHashFunc); fname = test::PerThreadDBPath("CuckooReader_CheckIterator"); for (uint64_t i = 0; i < num_items; i++) { user_keys[i] = "key" + NumToStr(i); ParsedInternalKey ikey(user_keys[i], 1000, kTypeValue); AppendInternalKey(&keys[i], ikey); values[i] = "value" + NumToStr(i); // Give disjoint hash values, in reverse order. AddHashLookups(user_keys[i], num_items-i-1, kNumHashFunc); } CreateCuckooFileAndCheckReader(); CheckIterator(); // Last level file. UpdateKeys(true); CreateCuckooFileAndCheckReader(); CheckIterator(); } TEST_F(CuckooReaderTest, CheckIteratorUint64) { SetUp(2*kNumHashFunc); fname = test::PerThreadDBPath("CuckooReader_CheckIterator"); for (uint64_t i = 0; i < num_items; i++) { user_keys[i].resize(8); memcpy(&user_keys[i][0], static_cast(&i), 8); ParsedInternalKey ikey(user_keys[i], 1000, kTypeValue); AppendInternalKey(&keys[i], ikey); values[i] = "value" + NumToStr(i); // Give disjoint hash values, in reverse order. AddHashLookups(user_keys[i], num_items-i-1, kNumHashFunc); } CreateCuckooFileAndCheckReader(test::Uint64Comparator()); CheckIterator(test::Uint64Comparator()); // Last level file. UpdateKeys(true); CreateCuckooFileAndCheckReader(test::Uint64Comparator()); CheckIterator(test::Uint64Comparator()); } TEST_F(CuckooReaderTest, WhenKeyNotFound) { // Add keys with colliding hash values. SetUp(kNumHashFunc); fname = test::PerThreadDBPath("CuckooReader_WhenKeyNotFound"); for (uint64_t i = 0; i < num_items; i++) { user_keys[i] = "key" + NumToStr(i); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); AppendInternalKey(&keys[i], ikey); values[i] = "value" + NumToStr(i); // Make all hash values collide. AddHashLookups(user_keys[i], 0, kNumHashFunc); } auto* ucmp = BytewiseComparator(); CreateCuckooFileAndCheckReader(); std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucmp, GetSliceHash); ASSERT_OK(reader.status()); // Search for a key with colliding hash values. std::string not_found_user_key = "key" + NumToStr(num_items); std::string not_found_key; AddHashLookups(not_found_user_key, 0, kNumHashFunc); ParsedInternalKey ikey(not_found_user_key, 1000, kTypeValue); AppendInternalKey(¬_found_key, ikey); PinnableSlice value; GetContext get_context(ucmp, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(not_found_key), &value, nullptr, nullptr, true, nullptr, nullptr); ASSERT_OK( reader.Get(ReadOptions(), Slice(not_found_key), &get_context, nullptr)); ASSERT_TRUE(value.empty()); ASSERT_OK(reader.status()); // Search for a key with an independent hash value. std::string not_found_user_key2 = "key" + NumToStr(num_items + 1); AddHashLookups(not_found_user_key2, kNumHashFunc, kNumHashFunc); ParsedInternalKey ikey2(not_found_user_key2, 1000, kTypeValue); std::string not_found_key2; AppendInternalKey(¬_found_key2, ikey2); value.Reset(); GetContext get_context2(ucmp, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(not_found_key2), &value, nullptr, nullptr, true, nullptr, nullptr); ASSERT_OK( reader.Get(ReadOptions(), Slice(not_found_key2), &get_context2, nullptr)); ASSERT_TRUE(value.empty()); ASSERT_OK(reader.status()); // Test read when key is unused key. std::string unused_key = reader.GetTableProperties()->user_collected_properties.at( CuckooTablePropertyNames::kEmptyKey); // Add hash values that map to empty buckets. AddHashLookups(ExtractUserKey(unused_key).ToString(), kNumHashFunc, kNumHashFunc); value.Reset(); GetContext get_context3(ucmp, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(unused_key), &value, nullptr, nullptr, true, nullptr, nullptr); ASSERT_OK( reader.Get(ReadOptions(), Slice(unused_key), &get_context3, nullptr)); ASSERT_TRUE(value.empty()); ASSERT_OK(reader.status()); } // Performance tests namespace { void GetKeys(uint64_t num, std::vector* keys) { keys->clear(); IterKey k; k.SetInternalKey("", 0, kTypeValue); std::string internal_key_suffix = k.GetInternalKey().ToString(); ASSERT_EQ(static_cast(8), internal_key_suffix.size()); for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { uint64_t value = 2 * key_idx; std::string new_key(reinterpret_cast(&value), sizeof(value)); new_key += internal_key_suffix; keys->push_back(new_key); } } std::string GetFileName(uint64_t num) { if (FLAGS_file_dir.empty()) { FLAGS_file_dir = test::TmpDir(); } return test::PerThreadDBPath(FLAGS_file_dir, "cuckoo_read_benchmark") + ToString(num / 1000000) + "Mkeys"; } // Create last level file as we are interested in measuring performance of // last level file only. void WriteFile(const std::vector& keys, const uint64_t num, double hash_ratio) { Options options; options.allow_mmap_reads = true; Env* env = options.env; EnvOptions env_options = EnvOptions(options); std::string fname = GetFileName(num); std::unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); std::unique_ptr file_writer(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(writable_file)), fname, env_options)); CuckooTableBuilder builder( file_writer.get(), hash_ratio, 64, 1000, test::Uint64Comparator(), 5, false, FLAGS_identity_as_first_hash, nullptr, 0 /* column_family_id */, kDefaultColumnFamilyName); ASSERT_OK(builder.status()); for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { // Value is just a part of key. builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], 4)); ASSERT_EQ(builder.NumEntries(), key_idx + 1); ASSERT_OK(builder.status()); } ASSERT_OK(builder.Finish()); ASSERT_EQ(num, builder.NumEntries()); ASSERT_OK(file_writer->Close()); uint64_t file_size; env->GetFileSize(fname, &file_size); std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, test::Uint64Comparator(), nullptr); ASSERT_OK(reader.status()); ReadOptions r_options; PinnableSlice value; // Assume only the fast path is triggered GetContext get_context(nullptr, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(), &value, nullptr, nullptr, true, nullptr, nullptr); for (uint64_t i = 0; i < num; ++i) { value.Reset(); value.clear(); ASSERT_OK(reader.Get(r_options, Slice(keys[i]), &get_context, nullptr)); ASSERT_TRUE(Slice(keys[i]) == Slice(&keys[i][0], 4)); } } void ReadKeys(uint64_t num, uint32_t batch_size) { Options options; options.allow_mmap_reads = true; Env* env = options.env; EnvOptions env_options = EnvOptions(options); std::string fname = GetFileName(num); uint64_t file_size; env->GetFileSize(fname, &file_size); std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, test::Uint64Comparator(), nullptr); ASSERT_OK(reader.status()); const UserCollectedProperties user_props = reader.GetTableProperties()->user_collected_properties; const uint32_t num_hash_fun = *reinterpret_cast( user_props.at(CuckooTablePropertyNames::kNumHashFunc).data()); const uint64_t table_size = *reinterpret_cast( user_props.at(CuckooTablePropertyNames::kHashTableSize).data()); fprintf(stderr, "With %" PRIu64 " items, utilization is %.2f%%, number of" " hash functions: %u.\n", num, num * 100.0 / (table_size), num_hash_fun); ReadOptions r_options; std::vector keys; keys.reserve(num); for (uint64_t i = 0; i < num; ++i) { keys.push_back(2 * i); } std::random_shuffle(keys.begin(), keys.end()); PinnableSlice value; // Assume only the fast path is triggered GetContext get_context(nullptr, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(), &value, nullptr, nullptr, true, nullptr, nullptr); uint64_t start_time = env->NowMicros(); if (batch_size > 0) { for (uint64_t i = 0; i < num; i += batch_size) { for (uint64_t j = i; j < i+batch_size && j < num; ++j) { reader.Prepare(Slice(reinterpret_cast(&keys[j]), 16)); } for (uint64_t j = i; j < i+batch_size && j < num; ++j) { reader.Get(r_options, Slice(reinterpret_cast(&keys[j]), 16), &get_context, nullptr); } } } else { for (uint64_t i = 0; i < num; i++) { reader.Get(r_options, Slice(reinterpret_cast(&keys[i]), 16), &get_context, nullptr); } } float time_per_op = (env->NowMicros() - start_time) * 1.0f / num; fprintf(stderr, "Time taken per op is %.3fus (%.1f Mqps) with batch size of %u\n", time_per_op, 1.0 / time_per_op, batch_size); } } // namespace. TEST_F(CuckooReaderTest, TestReadPerformance) { if (!FLAGS_enable_perf) { return; } double hash_ratio = 0.95; // These numbers are chosen to have a hash utilization % close to // 0.9, 0.75, 0.6 and 0.5 respectively. // They all create 128 M buckets. std::vector nums = {120*1024*1024, 100*1024*1024, 80*1024*1024, 70*1024*1024}; #ifndef NDEBUG fprintf(stdout, "WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n"); #endif for (uint64_t num : nums) { if (FLAGS_write || Env::Default()->FileExists(GetFileName(num)).IsNotFound()) { std::vector all_keys; GetKeys(num, &all_keys); WriteFile(all_keys, num, hash_ratio); } ReadKeys(num, 0); ReadKeys(num, 10); ReadKeys(num, 25); ReadKeys(num, 50); ReadKeys(num, 100); fprintf(stderr, "\n"); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { if (ROCKSDB_NAMESPACE::port::kLittleEndian) { ::testing::InitGoogleTest(&argc, argv); ParseCommandLineFlags(&argc, &argv, true); return RUN_ALL_TESTS(); } else { fprintf(stderr, "SKIPPED as Cuckoo table doesn't support Big Endian\n"); return 0; } } #endif // GFLAGS. #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/table/format.cc000066400000000000000000000424421370372246700161410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/format.h" #include #include #include "block_fetcher.h" #include "file/random_access_file_reader.h" #include "logging/logging.h" #include "memory/memory_allocator.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" #include "rocksdb/env.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" #include "table/persistent_cache_helper.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" #include "util/stop_watch.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { extern const uint64_t kLegacyBlockBasedTableMagicNumber; extern const uint64_t kBlockBasedTableMagicNumber; #ifndef ROCKSDB_LITE extern const uint64_t kLegacyPlainTableMagicNumber; extern const uint64_t kPlainTableMagicNumber; #else // ROCKSDB_LITE doesn't have plain table const uint64_t kLegacyPlainTableMagicNumber = 0; const uint64_t kPlainTableMagicNumber = 0; #endif bool ShouldReportDetailedTime(Env* env, Statistics* stats) { return env != nullptr && stats != nullptr && stats->get_stats_level() > kExceptDetailedTimers; } void BlockHandle::EncodeTo(std::string* dst) const { // Sanity check that all fields have been set assert(offset_ != ~static_cast(0)); assert(size_ != ~static_cast(0)); PutVarint64Varint64(dst, offset_, size_); } Status BlockHandle::DecodeFrom(Slice* input) { if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) { return Status::OK(); } else { // reset in case failure after partially decoding offset_ = 0; size_ = 0; return Status::Corruption("bad block handle"); } } Status BlockHandle::DecodeSizeFrom(uint64_t _offset, Slice* input) { if (GetVarint64(input, &size_)) { offset_ = _offset; return Status::OK(); } else { // reset in case failure after partially decoding offset_ = 0; size_ = 0; return Status::Corruption("bad block handle"); } } // Return a string that contains the copy of handle. std::string BlockHandle::ToString(bool hex) const { std::string handle_str; EncodeTo(&handle_str); if (hex) { return Slice(handle_str).ToString(true); } else { return handle_str; } } const BlockHandle BlockHandle::kNullBlockHandle(0, 0); void IndexValue::EncodeTo(std::string* dst, bool have_first_key, const BlockHandle* previous_handle) const { if (previous_handle) { assert(handle.offset() == previous_handle->offset() + previous_handle->size() + kBlockTrailerSize); PutVarsignedint64(dst, handle.size() - previous_handle->size()); } else { handle.EncodeTo(dst); } assert(dst->size() != 0); if (have_first_key) { PutLengthPrefixedSlice(dst, first_internal_key); } } Status IndexValue::DecodeFrom(Slice* input, bool have_first_key, const BlockHandle* previous_handle) { if (previous_handle) { int64_t delta; if (!GetVarsignedint64(input, &delta)) { return Status::Corruption("bad delta-encoded index value"); } handle = BlockHandle( previous_handle->offset() + previous_handle->size() + kBlockTrailerSize, previous_handle->size() + delta); } else { Status s = handle.DecodeFrom(input); if (!s.ok()) { return s; } } if (!have_first_key) { first_internal_key = Slice(); } else if (!GetLengthPrefixedSlice(input, &first_internal_key)) { return Status::Corruption("bad first key in block info"); } return Status::OK(); } std::string IndexValue::ToString(bool hex, bool have_first_key) const { std::string s; EncodeTo(&s, have_first_key, nullptr); if (hex) { return Slice(s).ToString(true); } else { return s; } } namespace { inline bool IsLegacyFooterFormat(uint64_t magic_number) { return magic_number == kLegacyBlockBasedTableMagicNumber || magic_number == kLegacyPlainTableMagicNumber; } inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) { if (magic_number == kLegacyBlockBasedTableMagicNumber) { return kBlockBasedTableMagicNumber; } if (magic_number == kLegacyPlainTableMagicNumber) { return kPlainTableMagicNumber; } assert(false); return 0; } } // namespace // legacy footer format: // metaindex handle (varint64 offset, varint64 size) // index handle (varint64 offset, varint64 size) // to make the total size 2 * BlockHandle::kMaxEncodedLength // table_magic_number (8 bytes) // new footer format: // checksum type (char, 1 byte) // metaindex handle (varint64 offset, varint64 size) // index handle (varint64 offset, varint64 size) // to make the total size 2 * BlockHandle::kMaxEncodedLength + 1 // footer version (4 bytes) // table_magic_number (8 bytes) void Footer::EncodeTo(std::string* dst) const { assert(HasInitializedTableMagicNumber()); if (IsLegacyFooterFormat(table_magic_number())) { // has to be default checksum with legacy footer assert(checksum_ == kCRC32c); const size_t original_size = dst->size(); metaindex_handle_.EncodeTo(dst); index_handle_.EncodeTo(dst); dst->resize(original_size + 2 * BlockHandle::kMaxEncodedLength); // Padding PutFixed32(dst, static_cast(table_magic_number() & 0xffffffffu)); PutFixed32(dst, static_cast(table_magic_number() >> 32)); assert(dst->size() == original_size + kVersion0EncodedLength); } else { const size_t original_size = dst->size(); dst->push_back(static_cast(checksum_)); metaindex_handle_.EncodeTo(dst); index_handle_.EncodeTo(dst); dst->resize(original_size + kNewVersionsEncodedLength - 12); // Padding PutFixed32(dst, version()); PutFixed32(dst, static_cast(table_magic_number() & 0xffffffffu)); PutFixed32(dst, static_cast(table_magic_number() >> 32)); assert(dst->size() == original_size + kNewVersionsEncodedLength); } } Footer::Footer(uint64_t _table_magic_number, uint32_t _version) : version_(_version), checksum_(kCRC32c), table_magic_number_(_table_magic_number) { // This should be guaranteed by constructor callers assert(!IsLegacyFooterFormat(_table_magic_number) || version_ == 0); } Status Footer::DecodeFrom(Slice* input) { assert(!HasInitializedTableMagicNumber()); assert(input != nullptr); assert(input->size() >= kMinEncodedLength); const char* magic_ptr = input->data() + input->size() - kMagicNumberLengthByte; const uint32_t magic_lo = DecodeFixed32(magic_ptr); const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4); uint64_t magic = ((static_cast(magic_hi) << 32) | (static_cast(magic_lo))); // We check for legacy formats here and silently upconvert them bool legacy = IsLegacyFooterFormat(magic); if (legacy) { magic = UpconvertLegacyFooterFormat(magic); } set_table_magic_number(magic); if (legacy) { // The size is already asserted to be at least kMinEncodedLength // at the beginning of the function input->remove_prefix(input->size() - kVersion0EncodedLength); version_ = 0 /* legacy */; checksum_ = kCRC32c; } else { version_ = DecodeFixed32(magic_ptr - 4); // Footer version 1 and higher will always occupy exactly this many bytes. // It consists of the checksum type, two block handles, padding, // a version number, and a magic number if (input->size() < kNewVersionsEncodedLength) { return Status::Corruption("input is too short to be an sstable"); } else { input->remove_prefix(input->size() - kNewVersionsEncodedLength); } uint32_t chksum; if (!GetVarint32(input, &chksum)) { return Status::Corruption("bad checksum type"); } checksum_ = static_cast(chksum); } Status result = metaindex_handle_.DecodeFrom(input); if (result.ok()) { result = index_handle_.DecodeFrom(input); } if (result.ok()) { // We skip over any leftover data (just padding for now) in "input" const char* end = magic_ptr + kMagicNumberLengthByte; *input = Slice(end, input->data() + input->size() - end); } return result; } std::string Footer::ToString() const { std::string result; result.reserve(1024); bool legacy = IsLegacyFooterFormat(table_magic_number_); if (legacy) { result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n "); result.append("index handle: " + index_handle_.ToString() + "\n "); result.append("table_magic_number: " + ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n "); } else { result.append("checksum: " + ROCKSDB_NAMESPACE::ToString(checksum_) + "\n "); result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n "); result.append("index handle: " + index_handle_.ToString() + "\n "); result.append("footer version: " + ROCKSDB_NAMESPACE::ToString(version_) + "\n "); result.append("table_magic_number: " + ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n "); } return result; } Status ReadFooterFromFile(RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, uint64_t file_size, Footer* footer, uint64_t enforce_table_magic_number) { if (file_size < Footer::kMinEncodedLength) { return Status::Corruption("file is too short (" + ToString(file_size) + " bytes) to be an " "sstable: " + file->file_name()); } std::string footer_buf; AlignedBuf internal_buf; Slice footer_input; size_t read_offset = (file_size > Footer::kMaxEncodedLength) ? static_cast(file_size - Footer::kMaxEncodedLength) : 0; Status s; if (prefetch_buffer == nullptr || !prefetch_buffer->TryReadFromCache(read_offset, Footer::kMaxEncodedLength, &footer_input)) { if (file->use_direct_io()) { s = file->Read(IOOptions(), read_offset, Footer::kMaxEncodedLength, &footer_input, nullptr, &internal_buf); } else { footer_buf.reserve(Footer::kMaxEncodedLength); s = file->Read(IOOptions(), read_offset, Footer::kMaxEncodedLength, &footer_input, &footer_buf[0], nullptr); } if (!s.ok()) return s; } // Check that we actually read the whole footer from the file. It may be // that size isn't correct. if (footer_input.size() < Footer::kMinEncodedLength) { return Status::Corruption("file is too short (" + ToString(file_size) + " bytes) to be an " "sstable" + file->file_name()); } s = footer->DecodeFrom(&footer_input); if (!s.ok()) { return s; } if (enforce_table_magic_number != 0 && enforce_table_magic_number != footer->table_magic_number()) { return Status::Corruption( "Bad table magic number: expected " + ToString(enforce_table_magic_number) + ", found " + ToString(footer->table_magic_number()) + " in " + file->file_name()); } return Status::OK(); } Status UncompressBlockContentsForCompressionType( const UncompressionInfo& uncompression_info, const char* data, size_t n, BlockContents* contents, uint32_t format_version, const ImmutableCFOptions& ioptions, MemoryAllocator* allocator) { Status ret = Status::OK(); CacheAllocationPtr ubuf; assert(uncompression_info.type() != kNoCompression && "Invalid compression type"); StopWatchNano timer(ioptions.env, ShouldReportDetailedTime( ioptions.env, ioptions.statistics)); int decompress_size = 0; switch (uncompression_info.type()) { case kSnappyCompression: { size_t ulength = 0; static char snappy_corrupt_msg[] = "Snappy not supported or corrupted Snappy compressed block contents"; if (!Snappy_GetUncompressedLength(data, n, &ulength)) { return Status::Corruption(snappy_corrupt_msg); } ubuf = AllocateBlock(ulength, allocator); if (!Snappy_Uncompress(data, n, ubuf.get())) { return Status::Corruption(snappy_corrupt_msg); } *contents = BlockContents(std::move(ubuf), ulength); break; } case kZlibCompression: ubuf = Zlib_Uncompress( uncompression_info, data, n, &decompress_size, GetCompressFormatForVersion(kZlibCompression, format_version), allocator); if (!ubuf) { static char zlib_corrupt_msg[] = "Zlib not supported or corrupted Zlib compressed block contents"; return Status::Corruption(zlib_corrupt_msg); } *contents = BlockContents(std::move(ubuf), decompress_size); break; case kBZip2Compression: ubuf = BZip2_Uncompress( data, n, &decompress_size, GetCompressFormatForVersion(kBZip2Compression, format_version), allocator); if (!ubuf) { static char bzip2_corrupt_msg[] = "Bzip2 not supported or corrupted Bzip2 compressed block contents"; return Status::Corruption(bzip2_corrupt_msg); } *contents = BlockContents(std::move(ubuf), decompress_size); break; case kLZ4Compression: ubuf = LZ4_Uncompress( uncompression_info, data, n, &decompress_size, GetCompressFormatForVersion(kLZ4Compression, format_version), allocator); if (!ubuf) { static char lz4_corrupt_msg[] = "LZ4 not supported or corrupted LZ4 compressed block contents"; return Status::Corruption(lz4_corrupt_msg); } *contents = BlockContents(std::move(ubuf), decompress_size); break; case kLZ4HCCompression: ubuf = LZ4_Uncompress( uncompression_info, data, n, &decompress_size, GetCompressFormatForVersion(kLZ4HCCompression, format_version), allocator); if (!ubuf) { static char lz4hc_corrupt_msg[] = "LZ4HC not supported or corrupted LZ4HC compressed block contents"; return Status::Corruption(lz4hc_corrupt_msg); } *contents = BlockContents(std::move(ubuf), decompress_size); break; case kXpressCompression: // XPRESS allocates memory internally, thus no support for custom // allocator. ubuf.reset(XPRESS_Uncompress(data, n, &decompress_size)); if (!ubuf) { static char xpress_corrupt_msg[] = "XPRESS not supported or corrupted XPRESS compressed block " "contents"; return Status::Corruption(xpress_corrupt_msg); } *contents = BlockContents(std::move(ubuf), decompress_size); break; case kZSTD: case kZSTDNotFinalCompression: ubuf = ZSTD_Uncompress(uncompression_info, data, n, &decompress_size, allocator); if (!ubuf) { static char zstd_corrupt_msg[] = "ZSTD not supported or corrupted ZSTD compressed block contents"; return Status::Corruption(zstd_corrupt_msg); } *contents = BlockContents(std::move(ubuf), decompress_size); break; default: return Status::Corruption("bad block type"); } if (ShouldReportDetailedTime(ioptions.env, ioptions.statistics)) { RecordTimeToHistogram(ioptions.statistics, DECOMPRESSION_TIMES_NANOS, timer.ElapsedNanos()); } RecordTimeToHistogram(ioptions.statistics, BYTES_DECOMPRESSED, contents->data.size()); RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED); TEST_SYNC_POINT_CALLBACK( "UncompressBlockContentsForCompressionType:TamperWithReturnValue", static_cast(&ret)); TEST_SYNC_POINT_CALLBACK( "UncompressBlockContentsForCompressionType:" "TamperWithDecompressionOutput", static_cast(contents)); return ret; } // // The 'data' points to the raw block contents that was read in from file. // This method allocates a new heap buffer and the raw block // contents are uncompresed into this buffer. This // buffer is returned via 'result' and it is upto the caller to // free this buffer. // format_version is the block format as defined in include/rocksdb/table.h Status UncompressBlockContents(const UncompressionInfo& uncompression_info, const char* data, size_t n, BlockContents* contents, uint32_t format_version, const ImmutableCFOptions& ioptions, MemoryAllocator* allocator) { assert(data[n] != kNoCompression); assert(data[n] == static_cast(uncompression_info.type())); return UncompressBlockContentsForCompressionType(uncompression_info, data, n, contents, format_version, ioptions, allocator); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/format.h000066400000000000000000000316671370372246700160120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include "file/file_prefetch_buffer.h" #include "file/random_access_file_reader.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "memory/memory_allocator.h" #include "options/cf_options.h" #include "port/malloc.h" #include "port/port.h" // noexcept #include "table/persistent_cache_options.h" namespace ROCKSDB_NAMESPACE { class RandomAccessFile; struct ReadOptions; extern bool ShouldReportDetailedTime(Env* env, Statistics* stats); // the length of the magic number in bytes. const int kMagicNumberLengthByte = 8; // BlockHandle is a pointer to the extent of a file that stores a data // block or a meta block. class BlockHandle { public: // Creates a block handle with special values indicating "uninitialized," // distinct from the "null" block handle. BlockHandle(); BlockHandle(uint64_t offset, uint64_t size); // The offset of the block in the file. uint64_t offset() const { return offset_; } void set_offset(uint64_t _offset) { offset_ = _offset; } // The size of the stored block uint64_t size() const { return size_; } void set_size(uint64_t _size) { size_ = _size; } void EncodeTo(std::string* dst) const; Status DecodeFrom(Slice* input); Status DecodeSizeFrom(uint64_t offset, Slice* input); // Return a string that contains the copy of handle. std::string ToString(bool hex = true) const; // if the block handle's offset and size are both "0", we will view it // as a null block handle that points to no where. bool IsNull() const { return offset_ == 0 && size_ == 0; } static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; } // Maximum encoding length of a BlockHandle enum { kMaxEncodedLength = 10 + 10 }; inline bool operator==(const BlockHandle& rhs) const { return offset_ == rhs.offset_ && size_ == rhs.size_; } inline bool operator!=(const BlockHandle& rhs) const { return !(*this == rhs); } private: uint64_t offset_; uint64_t size_; static const BlockHandle kNullBlockHandle; }; // Value in block-based table file index. // // The index entry for block n is: y -> h, [x], // where: y is some key between the last key of block n (inclusive) and the // first key of block n+1 (exclusive); h is BlockHandle pointing to block n; // x, if present, is the first key of block n (unshortened). // This struct represents the "h, [x]" part. struct IndexValue { BlockHandle handle; // Empty means unknown. Slice first_internal_key; IndexValue() = default; IndexValue(BlockHandle _handle, Slice _first_internal_key) : handle(_handle), first_internal_key(_first_internal_key) {} // have_first_key indicates whether the `first_internal_key` is used. // If previous_handle is not null, delta encoding is used; // in this case, the two handles must point to consecutive blocks: // handle.offset() == // previous_handle->offset() + previous_handle->size() + kBlockTrailerSize void EncodeTo(std::string* dst, bool have_first_key, const BlockHandle* previous_handle) const; Status DecodeFrom(Slice* input, bool have_first_key, const BlockHandle* previous_handle); std::string ToString(bool hex, bool have_first_key) const; }; inline uint32_t GetCompressFormatForVersion(CompressionType compression_type, uint32_t version) { #ifdef NDEBUG (void)compression_type; #endif // snappy is not versioned assert(compression_type != kSnappyCompression && compression_type != kXpressCompression && compression_type != kNoCompression); // As of version 2, we encode compressed block with // compress_format_version == 2. Before that, the version is 1. // DO NOT CHANGE THIS FUNCTION, it affects disk format return version >= 2 ? 2 : 1; } inline bool BlockBasedTableSupportedVersion(uint32_t version) { return version <= 5; } // Footer encapsulates the fixed information stored at the tail // end of every table file. class Footer { public: // Constructs a footer without specifying its table magic number. // In such case, the table magic number of such footer should be // initialized via @ReadFooterFromFile(). // Use this when you plan to load Footer with DecodeFrom(). Never use this // when you plan to EncodeTo. Footer() : Footer(kInvalidTableMagicNumber, 0) {} // Use this constructor when you plan to write out the footer using // EncodeTo(). Never use this constructor with DecodeFrom(). Footer(uint64_t table_magic_number, uint32_t version); // The version of the footer in this file uint32_t version() const { return version_; } // The checksum type used in this file ChecksumType checksum() const { return checksum_; } void set_checksum(const ChecksumType c) { checksum_ = c; } // The block handle for the metaindex block of the table const BlockHandle& metaindex_handle() const { return metaindex_handle_; } void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } // The block handle for the index block of the table const BlockHandle& index_handle() const { return index_handle_; } void set_index_handle(const BlockHandle& h) { index_handle_ = h; } uint64_t table_magic_number() const { return table_magic_number_; } void EncodeTo(std::string* dst) const; // Set the current footer based on the input slice. // // REQUIRES: table_magic_number_ is not set (i.e., // HasInitializedTableMagicNumber() is true). The function will initialize the // magic number Status DecodeFrom(Slice* input); // Encoded length of a Footer. Note that the serialization of a Footer will // always occupy at least kMinEncodedLength bytes. If fields are changed // the version number should be incremented and kMaxEncodedLength should be // increased accordingly. enum { // Footer version 0 (legacy) will always occupy exactly this many bytes. // It consists of two block handles, padding, and a magic number. kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8, // Footer of versions 1 and higher will always occupy exactly this many // bytes. It consists of the checksum type, two block handles, padding, // a version number (bigger than 1), and a magic number kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8, kMinEncodedLength = kVersion0EncodedLength, kMaxEncodedLength = kNewVersionsEncodedLength, }; static const uint64_t kInvalidTableMagicNumber = 0; // convert this object to a human readable form std::string ToString() const; private: // REQUIRES: magic number wasn't initialized. void set_table_magic_number(uint64_t magic_number) { assert(!HasInitializedTableMagicNumber()); table_magic_number_ = magic_number; } // return true if @table_magic_number_ is set to a value different // from @kInvalidTableMagicNumber. bool HasInitializedTableMagicNumber() const { return (table_magic_number_ != kInvalidTableMagicNumber); } uint32_t version_; ChecksumType checksum_; BlockHandle metaindex_handle_; BlockHandle index_handle_; uint64_t table_magic_number_ = 0; }; // Read the footer from file // If enforce_table_magic_number != 0, ReadFooterFromFile() will return // corruption if table_magic number is not equal to enforce_table_magic_number Status ReadFooterFromFile(RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, uint64_t file_size, Footer* footer, uint64_t enforce_table_magic_number = 0); // 1-byte type + 32-bit crc static const size_t kBlockTrailerSize = 5; // Make block size calculation for IO less error prone inline uint64_t block_size(const BlockHandle& handle) { return handle.size() + kBlockTrailerSize; } inline CompressionType get_block_compression_type(const char* block_data, size_t block_size) { return static_cast(block_data[block_size]); } // Represents the contents of a block read from an SST file. Depending on how // it's created, it may or may not own the actual block bytes. As an example, // BlockContents objects representing data read from mmapped files only point // into the mmapped region. struct BlockContents { Slice data; // Actual contents of data CacheAllocationPtr allocation; #ifndef NDEBUG // Whether the block is a raw block, which contains compression type // byte. It is only used for assertion. bool is_raw_block = false; #endif // NDEBUG BlockContents() {} // Does not take ownership of the underlying data bytes. BlockContents(const Slice& _data) : data(_data) {} // Takes ownership of the underlying data bytes. BlockContents(CacheAllocationPtr&& _data, size_t _size) : data(_data.get(), _size), allocation(std::move(_data)) {} // Takes ownership of the underlying data bytes. BlockContents(std::unique_ptr&& _data, size_t _size) : data(_data.get(), _size) { allocation.reset(_data.release()); } // Returns whether the object has ownership of the underlying data bytes. bool own_bytes() const { return allocation.get() != nullptr; } // It's the caller's responsibility to make sure that this is // for raw block contents, which contains the compression // byte in the end. CompressionType get_compression_type() const { assert(is_raw_block); return get_block_compression_type(data.data(), data.size()); } // The additional memory space taken by the block data. size_t usable_size() const { if (allocation.get() != nullptr) { auto allocator = allocation.get_deleter().allocator; if (allocator) { return allocator->UsableSize(allocation.get(), data.size()); } #ifdef ROCKSDB_MALLOC_USABLE_SIZE return malloc_usable_size(allocation.get()); #else return data.size(); #endif // ROCKSDB_MALLOC_USABLE_SIZE } else { return 0; // no extra memory is occupied by the data } } size_t ApproximateMemoryUsage() const { return usable_size() + sizeof(*this); } BlockContents(BlockContents&& other) ROCKSDB_NOEXCEPT { *this = std::move(other); } BlockContents& operator=(BlockContents&& other) { data = std::move(other.data); allocation = std::move(other.allocation); #ifndef NDEBUG is_raw_block = other.is_raw_block; #endif // NDEBUG return *this; } }; // Read the block identified by "handle" from "file". On failure // return non-OK. On success fill *result and return OK. extern Status ReadBlockContents( RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ReadOptions& options, const BlockHandle& handle, BlockContents* contents, const ImmutableCFOptions& ioptions, bool do_uncompress = true, const Slice& compression_dict = Slice(), const PersistentCacheOptions& cache_options = PersistentCacheOptions()); // The 'data' points to the raw block contents read in from file. // This method allocates a new heap buffer and the raw block // contents are uncompresed into this buffer. This buffer is // returned via 'result' and it is upto the caller to // free this buffer. // For description of compress_format_version and possible values, see // util/compression.h extern Status UncompressBlockContents(const UncompressionInfo& info, const char* data, size_t n, BlockContents* contents, uint32_t compress_format_version, const ImmutableCFOptions& ioptions, MemoryAllocator* allocator = nullptr); // This is an extension to UncompressBlockContents that accepts // a specific compression type. This is used by un-wrapped blocks // with no compression header. extern Status UncompressBlockContentsForCompressionType( const UncompressionInfo& info, const char* data, size_t n, BlockContents* contents, uint32_t compress_format_version, const ImmutableCFOptions& ioptions, MemoryAllocator* allocator = nullptr); // Implementation details follow. Clients should ignore, // TODO(andrewkr): we should prefer one way of representing a null/uninitialized // BlockHandle. Currently we use zeros for null and use negation-of-zeros for // uninitialized. inline BlockHandle::BlockHandle() : BlockHandle(~static_cast(0), ~static_cast(0)) {} inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size) : offset_(_offset), size_(_size) {} } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/get_context.cc000066400000000000000000000362761370372246700172040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/get_context.h" #include "db/merge_helper.h" #include "db/pinned_iterators_manager.h" #include "db/read_callback.h" #include "monitoring/file_read_sample.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" #include "rocksdb/statistics.h" namespace ROCKSDB_NAMESPACE { namespace { void appendToReplayLog(std::string* replay_log, ValueType type, Slice value) { #ifndef ROCKSDB_LITE if (replay_log) { if (replay_log->empty()) { // Optimization: in the common case of only one operation in the // log, we allocate the exact amount of space needed. replay_log->reserve(1 + VarintLength(value.size()) + value.size()); } replay_log->push_back(type); PutLengthPrefixedSlice(replay_log, value); } #else (void)replay_log; (void)type; (void)value; #endif // ROCKSDB_LITE } } // namespace GetContext::GetContext( const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger, Statistics* statistics, GetState init_state, const Slice& user_key, PinnableSlice* pinnable_val, std::string* timestamp, bool* value_found, MergeContext* merge_context, bool do_merge, SequenceNumber* _max_covering_tombstone_seq, Env* env, SequenceNumber* seq, PinnedIteratorsManager* _pinned_iters_mgr, ReadCallback* callback, bool* is_blob_index, uint64_t tracing_get_id) : ucmp_(ucmp), merge_operator_(merge_operator), logger_(logger), statistics_(statistics), state_(init_state), user_key_(user_key), pinnable_val_(pinnable_val), timestamp_(timestamp), value_found_(value_found), merge_context_(merge_context), max_covering_tombstone_seq_(_max_covering_tombstone_seq), env_(env), seq_(seq), replay_log_(nullptr), pinned_iters_mgr_(_pinned_iters_mgr), callback_(callback), do_merge_(do_merge), is_blob_index_(is_blob_index), tracing_get_id_(tracing_get_id) { if (seq_) { *seq_ = kMaxSequenceNumber; } sample_ = should_sample_file_read(); } GetContext::GetContext( const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger, Statistics* statistics, GetState init_state, const Slice& user_key, PinnableSlice* pinnable_val, bool* value_found, MergeContext* merge_context, bool do_merge, SequenceNumber* _max_covering_tombstone_seq, Env* env, SequenceNumber* seq, PinnedIteratorsManager* _pinned_iters_mgr, ReadCallback* callback, bool* is_blob_index, uint64_t tracing_get_id) : GetContext(ucmp, merge_operator, logger, statistics, init_state, user_key, pinnable_val, nullptr, value_found, merge_context, do_merge, _max_covering_tombstone_seq, env, seq, _pinned_iters_mgr, callback, is_blob_index, tracing_get_id) {} // Called from TableCache::Get and Table::Get when file/block in which // key may exist are not there in TableCache/BlockCache respectively. In this // case we can't guarantee that key does not exist and are not permitted to do // IO to be certain.Set the status=kFound and value_found=false to let the // caller know that key may exist but is not there in memory void GetContext::MarkKeyMayExist() { state_ = kFound; if (value_found_ != nullptr) { *value_found_ = false; } } void GetContext::SaveValue(const Slice& value, SequenceNumber /*seq*/) { assert(state_ == kNotFound); appendToReplayLog(replay_log_, kTypeValue, value); state_ = kFound; if (LIKELY(pinnable_val_ != nullptr)) { pinnable_val_->PinSelf(value); } } void GetContext::ReportCounters() { if (get_context_stats_.num_cache_hit > 0) { RecordTick(statistics_, BLOCK_CACHE_HIT, get_context_stats_.num_cache_hit); } if (get_context_stats_.num_cache_index_hit > 0) { RecordTick(statistics_, BLOCK_CACHE_INDEX_HIT, get_context_stats_.num_cache_index_hit); } if (get_context_stats_.num_cache_data_hit > 0) { RecordTick(statistics_, BLOCK_CACHE_DATA_HIT, get_context_stats_.num_cache_data_hit); } if (get_context_stats_.num_cache_filter_hit > 0) { RecordTick(statistics_, BLOCK_CACHE_FILTER_HIT, get_context_stats_.num_cache_filter_hit); } if (get_context_stats_.num_cache_compression_dict_hit > 0) { RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_HIT, get_context_stats_.num_cache_compression_dict_hit); } if (get_context_stats_.num_cache_index_miss > 0) { RecordTick(statistics_, BLOCK_CACHE_INDEX_MISS, get_context_stats_.num_cache_index_miss); } if (get_context_stats_.num_cache_filter_miss > 0) { RecordTick(statistics_, BLOCK_CACHE_FILTER_MISS, get_context_stats_.num_cache_filter_miss); } if (get_context_stats_.num_cache_data_miss > 0) { RecordTick(statistics_, BLOCK_CACHE_DATA_MISS, get_context_stats_.num_cache_data_miss); } if (get_context_stats_.num_cache_compression_dict_miss > 0) { RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_MISS, get_context_stats_.num_cache_compression_dict_miss); } if (get_context_stats_.num_cache_bytes_read > 0) { RecordTick(statistics_, BLOCK_CACHE_BYTES_READ, get_context_stats_.num_cache_bytes_read); } if (get_context_stats_.num_cache_miss > 0) { RecordTick(statistics_, BLOCK_CACHE_MISS, get_context_stats_.num_cache_miss); } if (get_context_stats_.num_cache_add > 0) { RecordTick(statistics_, BLOCK_CACHE_ADD, get_context_stats_.num_cache_add); } if (get_context_stats_.num_cache_add_redundant > 0) { RecordTick(statistics_, BLOCK_CACHE_ADD_REDUNDANT, get_context_stats_.num_cache_add_redundant); } if (get_context_stats_.num_cache_bytes_write > 0) { RecordTick(statistics_, BLOCK_CACHE_BYTES_WRITE, get_context_stats_.num_cache_bytes_write); } if (get_context_stats_.num_cache_index_add > 0) { RecordTick(statistics_, BLOCK_CACHE_INDEX_ADD, get_context_stats_.num_cache_index_add); } if (get_context_stats_.num_cache_index_add_redundant > 0) { RecordTick(statistics_, BLOCK_CACHE_INDEX_ADD_REDUNDANT, get_context_stats_.num_cache_index_add_redundant); } if (get_context_stats_.num_cache_index_bytes_insert > 0) { RecordTick(statistics_, BLOCK_CACHE_INDEX_BYTES_INSERT, get_context_stats_.num_cache_index_bytes_insert); } if (get_context_stats_.num_cache_data_add > 0) { RecordTick(statistics_, BLOCK_CACHE_DATA_ADD, get_context_stats_.num_cache_data_add); } if (get_context_stats_.num_cache_data_add_redundant > 0) { RecordTick(statistics_, BLOCK_CACHE_DATA_ADD_REDUNDANT, get_context_stats_.num_cache_data_add_redundant); } if (get_context_stats_.num_cache_data_bytes_insert > 0) { RecordTick(statistics_, BLOCK_CACHE_DATA_BYTES_INSERT, get_context_stats_.num_cache_data_bytes_insert); } if (get_context_stats_.num_cache_filter_add > 0) { RecordTick(statistics_, BLOCK_CACHE_FILTER_ADD, get_context_stats_.num_cache_filter_add); } if (get_context_stats_.num_cache_filter_add_redundant > 0) { RecordTick(statistics_, BLOCK_CACHE_FILTER_ADD_REDUNDANT, get_context_stats_.num_cache_filter_add_redundant); } if (get_context_stats_.num_cache_filter_bytes_insert > 0) { RecordTick(statistics_, BLOCK_CACHE_FILTER_BYTES_INSERT, get_context_stats_.num_cache_filter_bytes_insert); } if (get_context_stats_.num_cache_compression_dict_add > 0) { RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_ADD, get_context_stats_.num_cache_compression_dict_add); } if (get_context_stats_.num_cache_compression_dict_add_redundant > 0) { RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_ADD_REDUNDANT, get_context_stats_.num_cache_compression_dict_add_redundant); } if (get_context_stats_.num_cache_compression_dict_bytes_insert > 0) { RecordTick(statistics_, BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT, get_context_stats_.num_cache_compression_dict_bytes_insert); } } bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, const Slice& value, bool* matched, Cleanable* value_pinner) { assert(matched); assert((state_ != kMerge && parsed_key.type != kTypeMerge) || merge_context_ != nullptr); if (ucmp_->CompareWithoutTimestamp(parsed_key.user_key, user_key_) == 0) { *matched = true; // If the value is not in the snapshot, skip it if (!CheckCallback(parsed_key.sequence)) { return true; // to continue to the next seq } appendToReplayLog(replay_log_, parsed_key.type, value); if (seq_ != nullptr) { // Set the sequence number if it is uninitialized if (*seq_ == kMaxSequenceNumber) { *seq_ = parsed_key.sequence; } } auto type = parsed_key.type; // Key matches. Process it if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex) && max_covering_tombstone_seq_ != nullptr && *max_covering_tombstone_seq_ > parsed_key.sequence) { type = kTypeRangeDeletion; } switch (type) { case kTypeValue: case kTypeBlobIndex: assert(state_ == kNotFound || state_ == kMerge); if (type == kTypeBlobIndex && is_blob_index_ == nullptr) { // Blob value not supported. Stop. state_ = kBlobIndex; return false; } if (kNotFound == state_) { state_ = kFound; if (do_merge_) { if (LIKELY(pinnable_val_ != nullptr)) { if (LIKELY(value_pinner != nullptr)) { // If the backing resources for the value are provided, pin them pinnable_val_->PinSlice(value, value_pinner); } else { TEST_SYNC_POINT_CALLBACK("GetContext::SaveValue::PinSelf", this); // Otherwise copy the value pinnable_val_->PinSelf(value); } } } else { // It means this function is called as part of DB GetMergeOperands // API and the current value should be part of // merge_context_->operand_list push_operand(value, value_pinner); } } else if (kMerge == state_) { assert(merge_operator_ != nullptr); state_ = kFound; if (do_merge_) { if (LIKELY(pinnable_val_ != nullptr)) { Status merge_status = MergeHelper::TimedFullMerge( merge_operator_, user_key_, &value, merge_context_->GetOperands(), pinnable_val_->GetSelf(), logger_, statistics_, env_); pinnable_val_->PinSelf(); if (!merge_status.ok()) { state_ = kCorrupt; } } } else { // It means this function is called as part of DB GetMergeOperands // API and the current value should be part of // merge_context_->operand_list push_operand(value, value_pinner); } } if (state_ == kFound) { size_t ts_sz = ucmp_->timestamp_size(); if (ts_sz > 0 && timestamp_ != nullptr) { Slice ts = ExtractTimestampFromUserKey(parsed_key.user_key, ts_sz); timestamp_->assign(ts.data(), ts.size()); } } if (is_blob_index_ != nullptr) { *is_blob_index_ = (type == kTypeBlobIndex); } return false; case kTypeDeletion: case kTypeDeletionWithTimestamp: case kTypeSingleDeletion: case kTypeRangeDeletion: // TODO(noetzli): Verify correctness once merge of single-deletes // is supported assert(state_ == kNotFound || state_ == kMerge); if (kNotFound == state_) { state_ = kDeleted; } else if (kMerge == state_) { state_ = kFound; if (LIKELY(pinnable_val_ != nullptr)) { if (do_merge_) { Status merge_status = MergeHelper::TimedFullMerge( merge_operator_, user_key_, nullptr, merge_context_->GetOperands(), pinnable_val_->GetSelf(), logger_, statistics_, env_); pinnable_val_->PinSelf(); if (!merge_status.ok()) { state_ = kCorrupt; } } // If do_merge_ = false then the current value shouldn't be part of // merge_context_->operand_list } } return false; case kTypeMerge: assert(state_ == kNotFound || state_ == kMerge); state_ = kMerge; // value_pinner is not set from plain_table_reader.cc for example. push_operand(value, value_pinner); if (do_merge_ && merge_operator_ != nullptr && merge_operator_->ShouldMerge( merge_context_->GetOperandsDirectionBackward())) { state_ = kFound; if (LIKELY(pinnable_val_ != nullptr)) { // do_merge_ = true this is the case where this function is called // as part of DB Get API hence merge operators should be merged. if (do_merge_) { Status merge_status = MergeHelper::TimedFullMerge( merge_operator_, user_key_, nullptr, merge_context_->GetOperands(), pinnable_val_->GetSelf(), logger_, statistics_, env_); pinnable_val_->PinSelf(); if (!merge_status.ok()) { state_ = kCorrupt; } } } return false; } return true; default: assert(false); break; } } // state_ could be Corrupt, merge or notfound return false; } void GetContext::push_operand(const Slice& value, Cleanable* value_pinner) { if (pinned_iters_mgr() && pinned_iters_mgr()->PinningEnabled() && value_pinner != nullptr) { value_pinner->DelegateCleanupsTo(pinned_iters_mgr()); merge_context_->PushOperand(value, true /*value_pinned*/); } else { merge_context_->PushOperand(value, false); } } void replayGetContextLog(const Slice& replay_log, const Slice& user_key, GetContext* get_context, Cleanable* value_pinner) { #ifndef ROCKSDB_LITE Slice s = replay_log; while (s.size()) { auto type = static_cast(*s.data()); s.remove_prefix(1); Slice value; bool ret = GetLengthPrefixedSlice(&s, &value); assert(ret); (void)ret; bool dont_care __attribute__((__unused__)); // Since SequenceNumber is not stored and unknown, we will use // kMaxSequenceNumber. get_context->SaveValue( ParsedInternalKey(user_key, kMaxSequenceNumber, type), value, &dont_care, value_pinner); } #else // ROCKSDB_LITE (void)replay_log; (void)user_key; (void)get_context; (void)value_pinner; assert(false); #endif // ROCKSDB_LITE } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/get_context.h000066400000000000000000000205671370372246700170420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "db/dbformat.h" #include "db/merge_context.h" #include "db/read_callback.h" #include "rocksdb/env.h" #include "rocksdb/statistics.h" #include "rocksdb/types.h" #include "table/block_based/block.h" namespace ROCKSDB_NAMESPACE { class MergeContext; class PinnedIteratorsManager; // Data structure for accumulating statistics during a point lookup. At the // end of the point lookup, the corresponding ticker stats are updated. This // avoids the overhead of frequent ticker stats updates struct GetContextStats { uint64_t num_cache_hit = 0; uint64_t num_cache_index_hit = 0; uint64_t num_cache_data_hit = 0; uint64_t num_cache_filter_hit = 0; uint64_t num_cache_compression_dict_hit = 0; uint64_t num_cache_index_miss = 0; uint64_t num_cache_filter_miss = 0; uint64_t num_cache_data_miss = 0; uint64_t num_cache_compression_dict_miss = 0; uint64_t num_cache_bytes_read = 0; uint64_t num_cache_miss = 0; uint64_t num_cache_add = 0; uint64_t num_cache_add_redundant = 0; uint64_t num_cache_bytes_write = 0; uint64_t num_cache_index_add = 0; uint64_t num_cache_index_add_redundant = 0; uint64_t num_cache_index_bytes_insert = 0; uint64_t num_cache_data_add = 0; uint64_t num_cache_data_add_redundant = 0; uint64_t num_cache_data_bytes_insert = 0; uint64_t num_cache_filter_add = 0; uint64_t num_cache_filter_add_redundant = 0; uint64_t num_cache_filter_bytes_insert = 0; uint64_t num_cache_compression_dict_add = 0; uint64_t num_cache_compression_dict_add_redundant = 0; uint64_t num_cache_compression_dict_bytes_insert = 0; }; // A class to hold context about a point lookup, such as pointer to value // slice, key, merge context etc, as well as the current state of the // lookup. Any user using GetContext to track the lookup result must call // SaveValue() whenever the internal key is found. This can happen // repeatedly in case of merge operands. In case the key may exist with // high probability, but IO is required to confirm and the user doesn't allow // it, MarkKeyMayExist() must be called instead of SaveValue(). class GetContext { public: // Current state of the point lookup. All except kNotFound and kMerge are // terminal states enum GetState { kNotFound, kFound, kDeleted, kCorrupt, kMerge, // saver contains the current merge result (the operands) kBlobIndex, }; GetContextStats get_context_stats_; // Constructor // @param value Holds the value corresponding to user_key. If its nullptr // then return all merge operands corresponding to user_key // via merge_context // @param value_found If non-nullptr, set to false if key may be present // but we can't be certain because we cannot do IO // @param max_covering_tombstone_seq Pointer to highest sequence number of // range deletion covering the key. When an internal key // is found with smaller sequence number, the lookup // terminates // @param seq If non-nullptr, the sequence number of the found key will be // saved here // @param callback Pointer to ReadCallback to perform additional checks // for visibility of a key // @param is_blob_index If non-nullptr, will be used to indicate if a found // key is of type blob index // @param do_merge True if value associated with user_key has to be returned // and false if all the merge operands associated with user_key has to be // returned. Id do_merge=false then all the merge operands are stored in // merge_context and they are never merged. The value pointer is untouched. GetContext(const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger, Statistics* statistics, GetState init_state, const Slice& user_key, PinnableSlice* value, bool* value_found, MergeContext* merge_context, bool do_merge, SequenceNumber* max_covering_tombstone_seq, Env* env, SequenceNumber* seq = nullptr, PinnedIteratorsManager* _pinned_iters_mgr = nullptr, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, uint64_t tracing_get_id = 0); GetContext(const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger, Statistics* statistics, GetState init_state, const Slice& user_key, PinnableSlice* value, std::string* timestamp, bool* value_found, MergeContext* merge_context, bool do_merge, SequenceNumber* max_covering_tombstone_seq, Env* env, SequenceNumber* seq = nullptr, PinnedIteratorsManager* _pinned_iters_mgr = nullptr, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr, uint64_t tracing_get_id = 0); GetContext() = delete; // This can be called to indicate that a key may be present, but cannot be // confirmed due to IO not allowed void MarkKeyMayExist(); // Records this key, value, and any meta-data (such as sequence number and // state) into this GetContext. // // If the parsed_key matches the user key that we are looking for, sets // matched to true. // // Returns True if more keys need to be read (due to merges) or // False if the complete value has been found. bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value, bool* matched, Cleanable* value_pinner = nullptr); // Simplified version of the previous function. Should only be used when we // know that the operation is a Put. void SaveValue(const Slice& value, SequenceNumber seq); GetState State() const { return state_; } SequenceNumber* max_covering_tombstone_seq() { return max_covering_tombstone_seq_; } PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; } // If a non-null string is passed, all the SaveValue calls will be // logged into the string. The operations can then be replayed on // another GetContext with replayGetContextLog. void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; } // Do we need to fetch the SequenceNumber for this key? bool NeedToReadSequence() const { return (seq_ != nullptr); } bool sample() const { return sample_; } bool CheckCallback(SequenceNumber seq) { if (callback_) { return callback_->IsVisible(seq); } return true; } void ReportCounters(); bool has_callback() const { return callback_ != nullptr; } uint64_t get_tracing_get_id() const { return tracing_get_id_; } void push_operand(const Slice& value, Cleanable* value_pinner); private: const Comparator* ucmp_; const MergeOperator* merge_operator_; // the merge operations encountered; Logger* logger_; Statistics* statistics_; GetState state_; Slice user_key_; PinnableSlice* pinnable_val_; std::string* timestamp_; bool* value_found_; // Is value set correctly? Used by KeyMayExist MergeContext* merge_context_; SequenceNumber* max_covering_tombstone_seq_; Env* env_; // If a key is found, seq_ will be set to the SequenceNumber of most recent // write to the key or kMaxSequenceNumber if unknown SequenceNumber* seq_; std::string* replay_log_; // Used to temporarily pin blocks when state_ == GetContext::kMerge PinnedIteratorsManager* pinned_iters_mgr_; ReadCallback* callback_; bool sample_; // Value is true if it's called as part of DB Get API and false if it's // called as part of DB GetMergeOperands API. When it's false merge operators // are never merged. bool do_merge_; bool* is_blob_index_; // Used for block cache tracing only. A tracing get id uniquely identifies a // Get or a MultiGet. const uint64_t tracing_get_id_; }; // Call this to replay a log and bring the get_context up to date. The replay // log must have been created by another GetContext object, whose replay log // must have been set by calling GetContext::SetReplayLog(). void replayGetContextLog(const Slice& replay_log, const Slice& user_key, GetContext* get_context, Cleanable* value_pinner = nullptr); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/internal_iterator.h000066400000000000000000000171231370372246700202360ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include #include "db/dbformat.h" #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" #include "rocksdb/status.h" #include "table/format.h" namespace ROCKSDB_NAMESPACE { class PinnedIteratorsManager; struct IterateResult { Slice key; bool may_be_out_of_upper_bound; // If false, PrepareValue() needs to be called before value(). bool value_prepared = true; }; template class InternalIteratorBase : public Cleanable { public: InternalIteratorBase() {} // No copying allowed InternalIteratorBase(const InternalIteratorBase&) = delete; InternalIteratorBase& operator=(const InternalIteratorBase&) = delete; virtual ~InternalIteratorBase() {} // An iterator is either positioned at a key/value pair, or // not valid. This method returns true iff the iterator is valid. // Always returns false if !status().ok(). virtual bool Valid() const = 0; // Position at the first key in the source. The iterator is Valid() // after this call iff the source is not empty. virtual void SeekToFirst() = 0; // Position at the last key in the source. The iterator is // Valid() after this call iff the source is not empty. virtual void SeekToLast() = 0; // Position at the first key in the source that at or past target // The iterator is Valid() after this call iff the source contains // an entry that comes at or past target. // All Seek*() methods clear any error status() that the iterator had prior to // the call; after the seek, status() indicates only the error (if any) that // happened during the seek, not any past errors. // 'target' contains user timestamp if timestamp is enabled. virtual void Seek(const Slice& target) = 0; // Position at the first key in the source that at or before target // The iterator is Valid() after this call iff the source contains // an entry that comes at or before target. virtual void SeekForPrev(const Slice& target) = 0; // Moves to the next entry in the source. After this call, Valid() is // true iff the iterator was not positioned at the last entry in the source. // REQUIRES: Valid() virtual void Next() = 0; // Moves to the next entry in the source, and return result. Iterator // implementation should override this method to help methods inline better, // or when MayBeOutOfUpperBound() is non-trivial. // REQUIRES: Valid() virtual bool NextAndGetResult(IterateResult* result) { Next(); bool is_valid = Valid(); if (is_valid) { result->key = key(); // Default may_be_out_of_upper_bound to true to avoid unnecessary virtual // call. If an implementation has non-trivial MayBeOutOfUpperBound(), // it should also override NextAndGetResult(). result->may_be_out_of_upper_bound = true; result->value_prepared = false; assert(MayBeOutOfUpperBound()); } return is_valid; } // Moves to the previous entry in the source. After this call, Valid() is // true iff the iterator was not positioned at the first entry in source. // REQUIRES: Valid() virtual void Prev() = 0; // Return the key for the current entry. The underlying storage for // the returned slice is valid only until the next modification of // the iterator. // REQUIRES: Valid() virtual Slice key() const = 0; // Return user key for the current entry. // REQUIRES: Valid() virtual Slice user_key() const { return ExtractUserKey(key()); } // Return the value for the current entry. The underlying storage for // the returned slice is valid only until the next modification of // the iterator. // REQUIRES: Valid() // REQUIRES: PrepareValue() has been called if needed (see PrepareValue()). virtual TValue value() const = 0; // If an error has occurred, return it. Else return an ok status. // If non-blocking IO is requested and this operation cannot be // satisfied without doing some IO, then this returns Status::Incomplete(). virtual Status status() const = 0; // For some types of iterators, sometimes Seek()/Next()/SeekForPrev()/etc may // load key but not value (to avoid the IO cost of reading the value from disk // if it won't be not needed). This method loads the value in such situation. // // Needs to be called before value() at least once after each iterator // movement (except if IterateResult::value_prepared = true), for iterators // created with allow_unprepared_value = true. // // Returns false if an error occurred; in this case Valid() is also changed // to false, and status() is changed to non-ok. // REQUIRES: Valid() virtual bool PrepareValue() { return true; } // True if the iterator is invalidated because it reached a key that is above // the iterator upper bound. Used by LevelIterator to decide whether it should // stop or move on to the next file. // Important: if iterator reached the end of the file without encountering any // keys above the upper bound, IsOutOfBound() must return false. virtual bool IsOutOfBound() { return false; } // Keys return from this iterator can be smaller than iterate_lower_bound. virtual bool MayBeOutOfLowerBound() { return true; } // Keys return from this iterator can be larger or equal to // iterate_upper_bound. virtual bool MayBeOutOfUpperBound() { return true; } // Pass the PinnedIteratorsManager to the Iterator, most Iterators don't // communicate with PinnedIteratorsManager so default implementation is no-op // but for Iterators that need to communicate with PinnedIteratorsManager // they will implement this function and use the passed pointer to communicate // with PinnedIteratorsManager. virtual void SetPinnedItersMgr(PinnedIteratorsManager* /*pinned_iters_mgr*/) { } // If true, this means that the Slice returned by key() is valid as long as // PinnedIteratorsManager::ReleasePinnedData is not called and the // Iterator is not deleted. // // IsKeyPinned() is guaranteed to always return true if // - Iterator is created with ReadOptions::pin_data = true // - DB tables were created with BlockBasedTableOptions::use_delta_encoding // set to false. virtual bool IsKeyPinned() const { return false; } // If true, this means that the Slice returned by value() is valid as long as // PinnedIteratorsManager::ReleasePinnedData is not called and the // Iterator is not deleted. // REQUIRES: Same as for value(). virtual bool IsValuePinned() const { return false; } virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) { return Status::NotSupported(""); } protected: void SeekForPrevImpl(const Slice& target, const Comparator* cmp) { Seek(target); if (!Valid()) { SeekToLast(); } while (Valid() && cmp->Compare(target, key()) < 0) { Prev(); } } bool is_mutable_; }; using InternalIterator = InternalIteratorBase; // Return an empty iterator (yields nothing). template extern InternalIteratorBase* NewEmptyInternalIterator(); // Return an empty iterator with the specified status. template extern InternalIteratorBase* NewErrorInternalIterator( const Status& status); // Return an empty iterator with the specified status, allocated arena. template extern InternalIteratorBase* NewErrorInternalIterator( const Status& status, Arena* arena); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/iter_heap.h000066400000000000000000000024141370372246700164460ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // #pragma once #include "db/dbformat.h" #include "table/iterator_wrapper.h" namespace ROCKSDB_NAMESPACE { // When used with std::priority_queue, this comparison functor puts the // iterator with the max/largest key on top. class MaxIteratorComparator { public: MaxIteratorComparator(const InternalKeyComparator* comparator) : comparator_(comparator) {} bool operator()(IteratorWrapper* a, IteratorWrapper* b) const { return comparator_->Compare(a->key(), b->key()) < 0; } private: const InternalKeyComparator* comparator_; }; // When used with std::priority_queue, this comparison functor puts the // iterator with the min/smallest key on top. class MinIteratorComparator { public: MinIteratorComparator(const InternalKeyComparator* comparator) : comparator_(comparator) {} bool operator()(IteratorWrapper* a, IteratorWrapper* b) const { return comparator_->Compare(a->key(), b->key()) > 0; } private: const InternalKeyComparator* comparator_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/iterator.cc000066400000000000000000000153041370372246700164770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "rocksdb/iterator.h" #include "memory/arena.h" #include "table/internal_iterator.h" #include "table/iterator_wrapper.h" namespace ROCKSDB_NAMESPACE { Cleanable::Cleanable() { cleanup_.function = nullptr; cleanup_.next = nullptr; } Cleanable::~Cleanable() { DoCleanup(); } Cleanable::Cleanable(Cleanable&& other) { *this = std::move(other); } Cleanable& Cleanable::operator=(Cleanable&& other) { if (this != &other) { cleanup_ = other.cleanup_; other.cleanup_.function = nullptr; other.cleanup_.next = nullptr; } return *this; } // If the entire linked list was on heap we could have simply add attach one // link list to another. However the head is an embeded object to avoid the cost // of creating objects for most of the use cases when the Cleanable has only one // Cleanup to do. We could put evernything on heap if benchmarks show no // negative impact on performance. // Also we need to iterate on the linked list since there is no pointer to the // tail. We can add the tail pointer but maintainin it might negatively impact // the perforamnce for the common case of one cleanup where tail pointer is not // needed. Again benchmarks could clarify that. // Even without a tail pointer we could iterate on the list, find the tail, and // have only that node updated without the need to insert the Cleanups one by // one. This however would be redundant when the source Cleanable has one or a // few Cleanups which is the case most of the time. // TODO(myabandeh): if the list is too long we should maintain a tail pointer // and have the entire list (minus the head that has to be inserted separately) // merged with the target linked list at once. void Cleanable::DelegateCleanupsTo(Cleanable* other) { assert(other != nullptr); if (cleanup_.function == nullptr) { return; } Cleanup* c = &cleanup_; other->RegisterCleanup(c->function, c->arg1, c->arg2); c = c->next; while (c != nullptr) { Cleanup* next = c->next; other->RegisterCleanup(c); c = next; } cleanup_.function = nullptr; cleanup_.next = nullptr; } void Cleanable::RegisterCleanup(Cleanable::Cleanup* c) { assert(c != nullptr); if (cleanup_.function == nullptr) { cleanup_.function = c->function; cleanup_.arg1 = c->arg1; cleanup_.arg2 = c->arg2; delete c; } else { c->next = cleanup_.next; cleanup_.next = c; } } void Cleanable::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { assert(func != nullptr); Cleanup* c; if (cleanup_.function == nullptr) { c = &cleanup_; } else { c = new Cleanup; c->next = cleanup_.next; cleanup_.next = c; } c->function = func; c->arg1 = arg1; c->arg2 = arg2; } Status Iterator::GetProperty(std::string prop_name, std::string* prop) { if (prop == nullptr) { return Status::InvalidArgument("prop is nullptr"); } if (prop_name == "rocksdb.iterator.is-key-pinned") { *prop = "0"; return Status::OK(); } return Status::InvalidArgument("Unidentified property."); } namespace { class EmptyIterator : public Iterator { public: explicit EmptyIterator(const Status& s) : status_(s) { } bool Valid() const override { return false; } void Seek(const Slice& /*target*/) override {} void SeekForPrev(const Slice& /*target*/) override {} void SeekToFirst() override {} void SeekToLast() override {} void Next() override { assert(false); } void Prev() override { assert(false); } Slice key() const override { assert(false); return Slice(); } Slice value() const override { assert(false); return Slice(); } Status status() const override { return status_; } private: Status status_; }; template class EmptyInternalIterator : public InternalIteratorBase { public: explicit EmptyInternalIterator(const Status& s) : status_(s) {} bool Valid() const override { return false; } void Seek(const Slice& /*target*/) override {} void SeekForPrev(const Slice& /*target*/) override {} void SeekToFirst() override {} void SeekToLast() override {} void Next() override { assert(false); } void Prev() override { assert(false); } Slice key() const override { assert(false); return Slice(); } TValue value() const override { assert(false); return TValue(); } Status status() const override { return status_; } private: Status status_; }; } // namespace Iterator* NewEmptyIterator() { return new EmptyIterator(Status::OK()); } Iterator* NewErrorIterator(const Status& status) { return new EmptyIterator(status); } template InternalIteratorBase* NewErrorInternalIterator(const Status& status) { return new EmptyInternalIterator(status); } template InternalIteratorBase* NewErrorInternalIterator( const Status& status); template InternalIteratorBase* NewErrorInternalIterator( const Status& status); template InternalIteratorBase* NewErrorInternalIterator(const Status& status, Arena* arena) { if (arena == nullptr) { return NewErrorInternalIterator(status); } else { auto mem = arena->AllocateAligned(sizeof(EmptyInternalIterator)); return new (mem) EmptyInternalIterator(status); } } template InternalIteratorBase* NewErrorInternalIterator( const Status& status, Arena* arena); template InternalIteratorBase* NewErrorInternalIterator( const Status& status, Arena* arena); template InternalIteratorBase* NewEmptyInternalIterator() { return new EmptyInternalIterator(Status::OK()); } template InternalIteratorBase* NewEmptyInternalIterator(); template InternalIteratorBase* NewEmptyInternalIterator(); template InternalIteratorBase* NewEmptyInternalIterator(Arena* arena) { if (arena == nullptr) { return NewEmptyInternalIterator(); } else { auto mem = arena->AllocateAligned(sizeof(EmptyInternalIterator)); return new (mem) EmptyInternalIterator(Status::OK()); } } template InternalIteratorBase* NewEmptyInternalIterator( Arena* arena); template InternalIteratorBase* NewEmptyInternalIterator(Arena* arena); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/iterator_wrapper.h000066400000000000000000000077631370372246700201130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "table/internal_iterator.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { // A internal wrapper class with an interface similar to Iterator that caches // the valid() and key() results for an underlying iterator. // This can help avoid virtual function calls and also gives better // cache locality. template class IteratorWrapperBase { public: IteratorWrapperBase() : iter_(nullptr), valid_(false) {} explicit IteratorWrapperBase(InternalIteratorBase* _iter) : iter_(nullptr) { Set(_iter); } ~IteratorWrapperBase() {} InternalIteratorBase* iter() const { return iter_; } // Set the underlying Iterator to _iter and return // previous underlying Iterator. InternalIteratorBase* Set(InternalIteratorBase* _iter) { InternalIteratorBase* old_iter = iter_; iter_ = _iter; if (iter_ == nullptr) { valid_ = false; } else { Update(); } return old_iter; } void DeleteIter(bool is_arena_mode) { if (iter_) { if (!is_arena_mode) { delete iter_; } else { iter_->~InternalIteratorBase(); } } } // Iterator interface methods bool Valid() const { return valid_; } Slice key() const { assert(Valid()); return result_.key; } TValue value() const { assert(Valid()); return iter_->value(); } // Methods below require iter() != nullptr Status status() const { assert(iter_); return iter_->status(); } bool PrepareValue() { assert(Valid()); if (result_.value_prepared) { return true; } if (iter_->PrepareValue()) { result_.value_prepared = true; return true; } assert(!iter_->Valid()); valid_ = false; return false; } void Next() { assert(iter_); valid_ = iter_->NextAndGetResult(&result_); assert(!valid_ || iter_->status().ok()); } void Prev() { assert(iter_); iter_->Prev(); Update(); } void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } void SeekForPrev(const Slice& k) { assert(iter_); iter_->SeekForPrev(k); Update(); } void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } bool MayBeOutOfLowerBound() { assert(Valid()); return iter_->MayBeOutOfLowerBound(); } bool MayBeOutOfUpperBound() { assert(Valid()); return result_.may_be_out_of_upper_bound; } void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) { assert(iter_); iter_->SetPinnedItersMgr(pinned_iters_mgr); } bool IsKeyPinned() const { assert(Valid()); return iter_->IsKeyPinned(); } bool IsValuePinned() const { assert(Valid()); return iter_->IsValuePinned(); } bool IsValuePrepared() const { return result_.value_prepared; } private: void Update() { valid_ = iter_->Valid(); if (valid_) { assert(iter_->status().ok()); result_.key = iter_->key(); result_.may_be_out_of_upper_bound = true; result_.value_prepared = false; } } InternalIteratorBase* iter_; IterateResult result_; bool valid_; }; using IteratorWrapper = IteratorWrapperBase; class Arena; // Return an empty iterator (yields nothing) allocated from arena. template extern InternalIteratorBase* NewEmptyInternalIterator(Arena* arena); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/merger_test.cc000066400000000000000000000110651370372246700171660ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include "table/merging_iterator.h" #include "test_util/testharness.h" #include "test_util/testutil.h" namespace ROCKSDB_NAMESPACE { class MergerTest : public testing::Test { public: MergerTest() : icomp_(BytewiseComparator()), rnd_(3), merging_iterator_(nullptr), single_iterator_(nullptr) {} ~MergerTest() override = default; std::vector GenerateStrings(size_t len, int string_len) { std::vector ret; for (size_t i = 0; i < len; ++i) { InternalKey ik(test::RandomHumanReadableString(&rnd_, string_len), 0, ValueType::kTypeValue); ret.push_back(ik.Encode().ToString(false)); } return ret; } void AssertEquivalence() { auto a = merging_iterator_.get(); auto b = single_iterator_.get(); if (!a->Valid()) { ASSERT_TRUE(!b->Valid()); } else { ASSERT_TRUE(b->Valid()); ASSERT_EQ(b->key().ToString(), a->key().ToString()); ASSERT_EQ(b->value().ToString(), a->value().ToString()); } } void SeekToRandom() { InternalKey ik(test::RandomHumanReadableString(&rnd_, 5), 0, ValueType::kTypeValue); Seek(ik.Encode().ToString(false)); } void Seek(std::string target) { merging_iterator_->Seek(target); single_iterator_->Seek(target); } void SeekToFirst() { merging_iterator_->SeekToFirst(); single_iterator_->SeekToFirst(); } void SeekToLast() { merging_iterator_->SeekToLast(); single_iterator_->SeekToLast(); } void Next(int times) { for (int i = 0; i < times && merging_iterator_->Valid(); ++i) { AssertEquivalence(); merging_iterator_->Next(); single_iterator_->Next(); } AssertEquivalence(); } void Prev(int times) { for (int i = 0; i < times && merging_iterator_->Valid(); ++i) { AssertEquivalence(); merging_iterator_->Prev(); single_iterator_->Prev(); } AssertEquivalence(); } void NextAndPrev(int times) { for (int i = 0; i < times && merging_iterator_->Valid(); ++i) { AssertEquivalence(); if (rnd_.OneIn(2)) { merging_iterator_->Prev(); single_iterator_->Prev(); } else { merging_iterator_->Next(); single_iterator_->Next(); } } AssertEquivalence(); } void Generate(size_t num_iterators, size_t strings_per_iterator, int letters_per_string) { std::vector small_iterators; for (size_t i = 0; i < num_iterators; ++i) { auto strings = GenerateStrings(strings_per_iterator, letters_per_string); small_iterators.push_back(new test::VectorIterator(strings)); all_keys_.insert(all_keys_.end(), strings.begin(), strings.end()); } merging_iterator_.reset( NewMergingIterator(&icomp_, &small_iterators[0], static_cast(small_iterators.size()))); single_iterator_.reset(new test::VectorIterator(all_keys_)); } InternalKeyComparator icomp_; Random rnd_; std::unique_ptr merging_iterator_; std::unique_ptr single_iterator_; std::vector all_keys_; }; TEST_F(MergerTest, SeekToRandomNextTest) { Generate(1000, 50, 50); for (int i = 0; i < 10; ++i) { SeekToRandom(); AssertEquivalence(); Next(50000); } } TEST_F(MergerTest, SeekToRandomNextSmallStringsTest) { Generate(1000, 50, 2); for (int i = 0; i < 10; ++i) { SeekToRandom(); AssertEquivalence(); Next(50000); } } TEST_F(MergerTest, SeekToRandomPrevTest) { Generate(1000, 50, 50); for (int i = 0; i < 10; ++i) { SeekToRandom(); AssertEquivalence(); Prev(50000); } } TEST_F(MergerTest, SeekToRandomRandomTest) { Generate(200, 50, 50); for (int i = 0; i < 3; ++i) { SeekToRandom(); AssertEquivalence(); NextAndPrev(5000); } } TEST_F(MergerTest, SeekToFirstTest) { Generate(1000, 50, 50); for (int i = 0; i < 10; ++i) { SeekToFirst(); AssertEquivalence(); Next(50000); } } TEST_F(MergerTest, SeekToLastTest) { Generate(1000, 50, 50); for (int i = 0; i < 10; ++i) { SeekToLast(); AssertEquivalence(); Prev(50000); } } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/merging_iterator.cc000066400000000000000000000334761370372246700202210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/merging_iterator.h" #include #include #include "db/dbformat.h" #include "db/pinned_iterators_manager.h" #include "memory/arena.h" #include "monitoring/perf_context_imp.h" #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" #include "rocksdb/options.h" #include "table/internal_iterator.h" #include "table/iter_heap.h" #include "table/iterator_wrapper.h" #include "test_util/sync_point.h" #include "util/autovector.h" #include "util/heap.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { // Without anonymous namespace here, we fail the warning -Wmissing-prototypes namespace { typedef BinaryHeap MergerMaxIterHeap; typedef BinaryHeap MergerMinIterHeap; } // namespace const size_t kNumIterReserve = 4; class MergingIterator : public InternalIterator { public: MergingIterator(const InternalKeyComparator* comparator, InternalIterator** children, int n, bool is_arena_mode, bool prefix_seek_mode) : is_arena_mode_(is_arena_mode), comparator_(comparator), current_(nullptr), direction_(kForward), minHeap_(comparator_), prefix_seek_mode_(prefix_seek_mode), pinned_iters_mgr_(nullptr) { children_.resize(n); for (int i = 0; i < n; i++) { children_[i].Set(children[i]); } for (auto& child : children_) { AddToMinHeapOrCheckStatus(&child); } current_ = CurrentForward(); } void considerStatus(Status s) { if (!s.ok() && status_.ok()) { status_ = s; } } virtual void AddIterator(InternalIterator* iter) { assert(direction_ == kForward); children_.emplace_back(iter); if (pinned_iters_mgr_) { iter->SetPinnedItersMgr(pinned_iters_mgr_); } auto new_wrapper = children_.back(); AddToMinHeapOrCheckStatus(&new_wrapper); if (new_wrapper.Valid()) { current_ = CurrentForward(); } } ~MergingIterator() override { for (auto& child : children_) { child.DeleteIter(is_arena_mode_); } } bool Valid() const override { return current_ != nullptr && status_.ok(); } Status status() const override { return status_; } void SeekToFirst() override { ClearHeaps(); status_ = Status::OK(); for (auto& child : children_) { child.SeekToFirst(); AddToMinHeapOrCheckStatus(&child); } direction_ = kForward; current_ = CurrentForward(); } void SeekToLast() override { ClearHeaps(); InitMaxHeap(); status_ = Status::OK(); for (auto& child : children_) { child.SeekToLast(); AddToMaxHeapOrCheckStatus(&child); } direction_ = kReverse; current_ = CurrentReverse(); } void Seek(const Slice& target) override { ClearHeaps(); status_ = Status::OK(); for (auto& child : children_) { { PERF_TIMER_GUARD(seek_child_seek_time); child.Seek(target); } PERF_COUNTER_ADD(seek_child_seek_count, 1); { // Strictly, we timed slightly more than min heap operation, // but these operations are very cheap. PERF_TIMER_GUARD(seek_min_heap_time); AddToMinHeapOrCheckStatus(&child); } } direction_ = kForward; { PERF_TIMER_GUARD(seek_min_heap_time); current_ = CurrentForward(); } } void SeekForPrev(const Slice& target) override { ClearHeaps(); InitMaxHeap(); status_ = Status::OK(); for (auto& child : children_) { { PERF_TIMER_GUARD(seek_child_seek_time); child.SeekForPrev(target); } PERF_COUNTER_ADD(seek_child_seek_count, 1); { PERF_TIMER_GUARD(seek_max_heap_time); AddToMaxHeapOrCheckStatus(&child); } } direction_ = kReverse; { PERF_TIMER_GUARD(seek_max_heap_time); current_ = CurrentReverse(); } } void Next() override { assert(Valid()); // Ensure that all children are positioned after key(). // If we are moving in the forward direction, it is already // true for all of the non-current children since current_ is // the smallest child and key() == current_->key(). if (direction_ != kForward) { SwitchToForward(); // The loop advanced all non-current children to be > key() so current_ // should still be strictly the smallest key. } // For the heap modifications below to be correct, current_ must be the // current top of the heap. assert(current_ == CurrentForward()); // as the current points to the current record. move the iterator forward. current_->Next(); if (current_->Valid()) { // current is still valid after the Next() call above. Call // replace_top() to restore the heap property. When the same child // iterator yields a sequence of keys, this is cheap. assert(current_->status().ok()); minHeap_.replace_top(current_); } else { // current stopped being valid, remove it from the heap. considerStatus(current_->status()); minHeap_.pop(); } current_ = CurrentForward(); } bool NextAndGetResult(IterateResult* result) override { Next(); bool is_valid = Valid(); if (is_valid) { result->key = key(); result->may_be_out_of_upper_bound = MayBeOutOfUpperBound(); result->value_prepared = current_->IsValuePrepared(); } return is_valid; } void Prev() override { assert(Valid()); // Ensure that all children are positioned before key(). // If we are moving in the reverse direction, it is already // true for all of the non-current children since current_ is // the largest child and key() == current_->key(). if (direction_ != kReverse) { // Otherwise, retreat the non-current children. We retreat current_ // just after the if-block. SwitchToBackward(); } // For the heap modifications below to be correct, current_ must be the // current top of the heap. assert(current_ == CurrentReverse()); current_->Prev(); if (current_->Valid()) { // current is still valid after the Prev() call above. Call // replace_top() to restore the heap property. When the same child // iterator yields a sequence of keys, this is cheap. assert(current_->status().ok()); maxHeap_->replace_top(current_); } else { // current stopped being valid, remove it from the heap. considerStatus(current_->status()); maxHeap_->pop(); } current_ = CurrentReverse(); } Slice key() const override { assert(Valid()); return current_->key(); } Slice value() const override { assert(Valid()); return current_->value(); } bool PrepareValue() override { assert(Valid()); if (current_->PrepareValue()) { return true; } considerStatus(current_->status()); assert(!status_.ok()); return false; } // Here we simply relay MayBeOutOfLowerBound/MayBeOutOfUpperBound result // from current child iterator. Potentially as long as one of child iterator // report out of bound is not possible, we know current key is within bound. bool MayBeOutOfLowerBound() override { assert(Valid()); return current_->MayBeOutOfLowerBound(); } bool MayBeOutOfUpperBound() override { assert(Valid()); return current_->MayBeOutOfUpperBound(); } void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { pinned_iters_mgr_ = pinned_iters_mgr; for (auto& child : children_) { child.SetPinnedItersMgr(pinned_iters_mgr); } } bool IsKeyPinned() const override { assert(Valid()); return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && current_->IsKeyPinned(); } bool IsValuePinned() const override { assert(Valid()); return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && current_->IsValuePinned(); } private: // Clears heaps for both directions, used when changing direction or seeking void ClearHeaps(); // Ensures that maxHeap_ is initialized when starting to go in the reverse // direction void InitMaxHeap(); bool is_arena_mode_; const InternalKeyComparator* comparator_; autovector children_; // Cached pointer to child iterator with the current key, or nullptr if no // child iterators are valid. This is the top of minHeap_ or maxHeap_ // depending on the direction. IteratorWrapper* current_; // If any of the children have non-ok status, this is one of them. Status status_; // Which direction is the iterator moving? enum Direction { kForward, kReverse }; Direction direction_; MergerMinIterHeap minHeap_; bool prefix_seek_mode_; // Max heap is used for reverse iteration, which is way less common than // forward. Lazily initialize it to save memory. std::unique_ptr maxHeap_; PinnedIteratorsManager* pinned_iters_mgr_; // In forward direction, process a child that is not in the min heap. // If valid, add to the min heap. Otherwise, check status. void AddToMinHeapOrCheckStatus(IteratorWrapper*); // In backward direction, process a child that is not in the max heap. // If valid, add to the min heap. Otherwise, check status. void AddToMaxHeapOrCheckStatus(IteratorWrapper*); void SwitchToForward(); // Switch the direction from forward to backward without changing the // position. Iterator should still be valid. void SwitchToBackward(); IteratorWrapper* CurrentForward() const { assert(direction_ == kForward); return !minHeap_.empty() ? minHeap_.top() : nullptr; } IteratorWrapper* CurrentReverse() const { assert(direction_ == kReverse); assert(maxHeap_); return !maxHeap_->empty() ? maxHeap_->top() : nullptr; } }; void MergingIterator::AddToMinHeapOrCheckStatus(IteratorWrapper* child) { if (child->Valid()) { assert(child->status().ok()); minHeap_.push(child); } else { considerStatus(child->status()); } } void MergingIterator::AddToMaxHeapOrCheckStatus(IteratorWrapper* child) { if (child->Valid()) { assert(child->status().ok()); maxHeap_->push(child); } else { considerStatus(child->status()); } } void MergingIterator::SwitchToForward() { // Otherwise, advance the non-current children. We advance current_ // just after the if-block. ClearHeaps(); Slice target = key(); for (auto& child : children_) { if (&child != current_) { child.Seek(target); if (child.Valid() && comparator_->Equal(target, child.key())) { assert(child.status().ok()); child.Next(); } } AddToMinHeapOrCheckStatus(&child); } direction_ = kForward; } void MergingIterator::SwitchToBackward() { ClearHeaps(); InitMaxHeap(); Slice target = key(); for (auto& child : children_) { if (&child != current_) { child.SeekForPrev(target); TEST_SYNC_POINT_CALLBACK("MergeIterator::Prev:BeforePrev", &child); if (child.Valid() && comparator_->Equal(target, child.key())) { assert(child.status().ok()); child.Prev(); } } AddToMaxHeapOrCheckStatus(&child); } direction_ = kReverse; if (!prefix_seek_mode_) { // Note that we don't do assert(current_ == CurrentReverse()) here // because it is possible to have some keys larger than the seek-key // inserted between Seek() and SeekToLast(), which makes current_ not // equal to CurrentReverse(). current_ = CurrentReverse(); } assert(current_ == CurrentReverse()); } void MergingIterator::ClearHeaps() { minHeap_.clear(); if (maxHeap_) { maxHeap_->clear(); } } void MergingIterator::InitMaxHeap() { if (!maxHeap_) { maxHeap_.reset(new MergerMaxIterHeap(comparator_)); } } InternalIterator* NewMergingIterator(const InternalKeyComparator* cmp, InternalIterator** list, int n, Arena* arena, bool prefix_seek_mode) { assert(n >= 0); if (n == 0) { return NewEmptyInternalIterator(arena); } else if (n == 1) { return list[0]; } else { if (arena == nullptr) { return new MergingIterator(cmp, list, n, false, prefix_seek_mode); } else { auto mem = arena->AllocateAligned(sizeof(MergingIterator)); return new (mem) MergingIterator(cmp, list, n, true, prefix_seek_mode); } } } MergeIteratorBuilder::MergeIteratorBuilder( const InternalKeyComparator* comparator, Arena* a, bool prefix_seek_mode) : first_iter(nullptr), use_merging_iter(false), arena(a) { auto mem = arena->AllocateAligned(sizeof(MergingIterator)); merge_iter = new (mem) MergingIterator(comparator, nullptr, 0, true, prefix_seek_mode); } MergeIteratorBuilder::~MergeIteratorBuilder() { if (first_iter != nullptr) { first_iter->~InternalIterator(); } if (merge_iter != nullptr) { merge_iter->~MergingIterator(); } } void MergeIteratorBuilder::AddIterator(InternalIterator* iter) { if (!use_merging_iter && first_iter != nullptr) { merge_iter->AddIterator(first_iter); use_merging_iter = true; first_iter = nullptr; } if (use_merging_iter) { merge_iter->AddIterator(iter); } else { first_iter = iter; } } InternalIterator* MergeIteratorBuilder::Finish() { InternalIterator* ret = nullptr; if (!use_merging_iter) { ret = first_iter; first_iter = nullptr; } else { ret = merge_iter; merge_iter = nullptr; } return ret; } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/merging_iterator.h000066400000000000000000000041661370372246700200550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "db/dbformat.h" #include "rocksdb/types.h" namespace ROCKSDB_NAMESPACE { class Comparator; class Env; class Arena; template class InternalIteratorBase; using InternalIterator = InternalIteratorBase; // Return an iterator that provided the union of the data in // children[0,n-1]. Takes ownership of the child iterators and // will delete them when the result iterator is deleted. // // The result does no duplicate suppression. I.e., if a particular // key is present in K child iterators, it will be yielded K times. // // REQUIRES: n >= 0 extern InternalIterator* NewMergingIterator( const InternalKeyComparator* comparator, InternalIterator** children, int n, Arena* arena = nullptr, bool prefix_seek_mode = false); class MergingIterator; // A builder class to build a merging iterator by adding iterators one by one. class MergeIteratorBuilder { public: // comparator: the comparator used in merging comparator // arena: where the merging iterator needs to be allocated from. explicit MergeIteratorBuilder(const InternalKeyComparator* comparator, Arena* arena, bool prefix_seek_mode = false); ~MergeIteratorBuilder(); // Add iter to the merging iterator. void AddIterator(InternalIterator* iter); // Get arena used to build the merging iterator. It is called one a child // iterator needs to be allocated. Arena* GetArena() { return arena; } // Return the result merging iterator. InternalIterator* Finish(); private: MergingIterator* merge_iter; InternalIterator* first_iter; bool use_merging_iter; Arena* arena; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/meta_blocks.cc000066400000000000000000000473471370372246700171450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/meta_blocks.h" #include #include #include "block_fetcher.h" #include "db/table_properties_collector.h" #include "file/random_access_file_reader.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "table/block_based/block.h" #include "table/format.h" #include "table/internal_iterator.h" #include "table/persistent_cache_helper.h" #include "table/table_properties_internal.h" #include "test_util/sync_point.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { MetaIndexBuilder::MetaIndexBuilder() : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {} void MetaIndexBuilder::Add(const std::string& key, const BlockHandle& handle) { std::string handle_encoding; handle.EncodeTo(&handle_encoding); meta_block_handles_.insert({key, handle_encoding}); } Slice MetaIndexBuilder::Finish() { for (const auto& metablock : meta_block_handles_) { meta_index_block_->Add(metablock.first, metablock.second); } return meta_index_block_->Finish(); } // Property block will be read sequentially and cached in a heap located // object, so there's no need for restart points. Thus we set the restart // interval to infinity to save space. PropertyBlockBuilder::PropertyBlockBuilder() : properties_block_( new BlockBuilder(port::kMaxInt32 /* restart interval */)) {} void PropertyBlockBuilder::Add(const std::string& name, const std::string& val) { props_.insert({name, val}); } void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) { assert(props_.find(name) == props_.end()); std::string dst; PutVarint64(&dst, val); Add(name, dst); } void PropertyBlockBuilder::Add( const UserCollectedProperties& user_collected_properties) { for (const auto& prop : user_collected_properties) { Add(prop.first, prop.second); } } void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start", const_cast(&props)); Add(TablePropertiesNames::kRawKeySize, props.raw_key_size); Add(TablePropertiesNames::kRawValueSize, props.raw_value_size); Add(TablePropertiesNames::kDataSize, props.data_size); Add(TablePropertiesNames::kIndexSize, props.index_size); if (props.index_partitions != 0) { Add(TablePropertiesNames::kIndexPartitions, props.index_partitions); Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size); } Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key); Add(TablePropertiesNames::kIndexValueIsDeltaEncoded, props.index_value_is_delta_encoded); Add(TablePropertiesNames::kNumEntries, props.num_entries); Add(TablePropertiesNames::kDeletedKeys, props.num_deletions); Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands); Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions); Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); Add(TablePropertiesNames::kFilterSize, props.filter_size); Add(TablePropertiesNames::kFormatVersion, props.format_version); Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len); Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id); Add(TablePropertiesNames::kCreationTime, props.creation_time); Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time); if (props.file_creation_time > 0) { Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time); } if (!props.filter_policy_name.empty()) { Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name); } if (!props.comparator_name.empty()) { Add(TablePropertiesNames::kComparator, props.comparator_name); } if (!props.merge_operator_name.empty()) { Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name); } if (!props.prefix_extractor_name.empty()) { Add(TablePropertiesNames::kPrefixExtractorName, props.prefix_extractor_name); } if (!props.property_collectors_names.empty()) { Add(TablePropertiesNames::kPropertyCollectors, props.property_collectors_names); } if (!props.column_family_name.empty()) { Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name); } if (!props.compression_name.empty()) { Add(TablePropertiesNames::kCompression, props.compression_name); } if (!props.compression_options.empty()) { Add(TablePropertiesNames::kCompressionOptions, props.compression_options); } } Slice PropertyBlockBuilder::Finish() { for (const auto& prop : props_) { properties_block_->Add(prop.first, prop.second); } return properties_block_->Finish(); } void LogPropertiesCollectionError( Logger* info_log, const std::string& method, const std::string& name) { assert(method == "Add" || method == "Finish"); std::string msg = "Encountered error when calling TablePropertiesCollector::" + method + "() with collector name: " + name; ROCKS_LOG_ERROR(info_log, "%s", msg.c_str()); } bool NotifyCollectTableCollectorsOnAdd( const Slice& key, const Slice& value, uint64_t file_size, const std::vector>& collectors, Logger* info_log) { bool all_succeeded = true; for (auto& collector : collectors) { Status s = collector->InternalAdd(key, value, file_size); all_succeeded = all_succeeded && s.ok(); if (!s.ok()) { LogPropertiesCollectionError(info_log, "Add" /* method */, collector->Name()); } } return all_succeeded; } void NotifyCollectTableCollectorsOnBlockAdd( const std::vector>& collectors, const uint64_t blockRawBytes, const uint64_t blockCompressedBytesFast, const uint64_t blockCompressedBytesSlow) { for (auto& collector : collectors) { collector->BlockAdd(blockRawBytes, blockCompressedBytesFast, blockCompressedBytesSlow); } } bool NotifyCollectTableCollectorsOnFinish( const std::vector>& collectors, Logger* info_log, PropertyBlockBuilder* builder) { bool all_succeeded = true; for (auto& collector : collectors) { UserCollectedProperties user_collected_properties; Status s = collector->Finish(&user_collected_properties); all_succeeded = all_succeeded && s.ok(); if (!s.ok()) { LogPropertiesCollectionError(info_log, "Finish" /* method */, collector->Name()); } else { builder->Add(user_collected_properties); } } return all_succeeded; } Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ImmutableCFOptions& ioptions, TableProperties** table_properties, bool verify_checksum, BlockHandle* ret_block_handle, CacheAllocationPtr* verification_buf, bool /*compression_type_missing*/, MemoryAllocator* memory_allocator) { assert(table_properties); Slice v = handle_value; BlockHandle handle; if (!handle.DecodeFrom(&v).ok()) { return Status::InvalidArgument("Failed to decode properties block handle"); } BlockContents block_contents; ReadOptions read_options; read_options.verify_checksums = verify_checksum; Status s; PersistentCacheOptions cache_options; BlockFetcher block_fetcher( file, prefetch_buffer, footer, read_options, handle, &block_contents, ioptions, false /* decompress */, false /*maybe_compressed*/, BlockType::kProperties, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = block_fetcher.ReadBlockContents(); // property block is never compressed. Need to add uncompress logic if we are // to compress it.. if (!s.ok()) { return s; } Block properties_block(std::move(block_contents)); DataBlockIter iter; properties_block.NewDataIterator(BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber, &iter); auto new_table_properties = new TableProperties(); // All pre-defined properties of type uint64_t std::unordered_map predefined_uint64_properties = { {TablePropertiesNames::kDataSize, &new_table_properties->data_size}, {TablePropertiesNames::kIndexSize, &new_table_properties->index_size}, {TablePropertiesNames::kIndexPartitions, &new_table_properties->index_partitions}, {TablePropertiesNames::kTopLevelIndexSize, &new_table_properties->top_level_index_size}, {TablePropertiesNames::kIndexKeyIsUserKey, &new_table_properties->index_key_is_user_key}, {TablePropertiesNames::kIndexValueIsDeltaEncoded, &new_table_properties->index_value_is_delta_encoded}, {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size}, {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size}, {TablePropertiesNames::kRawValueSize, &new_table_properties->raw_value_size}, {TablePropertiesNames::kNumDataBlocks, &new_table_properties->num_data_blocks}, {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries}, {TablePropertiesNames::kDeletedKeys, &new_table_properties->num_deletions}, {TablePropertiesNames::kMergeOperands, &new_table_properties->num_merge_operands}, {TablePropertiesNames::kNumRangeDeletions, &new_table_properties->num_range_deletions}, {TablePropertiesNames::kFormatVersion, &new_table_properties->format_version}, {TablePropertiesNames::kFixedKeyLen, &new_table_properties->fixed_key_len}, {TablePropertiesNames::kColumnFamilyId, &new_table_properties->column_family_id}, {TablePropertiesNames::kCreationTime, &new_table_properties->creation_time}, {TablePropertiesNames::kOldestKeyTime, &new_table_properties->oldest_key_time}, {TablePropertiesNames::kFileCreationTime, &new_table_properties->file_creation_time}, }; std::string last_key; for (iter.SeekToFirstOrReport(); iter.Valid(); iter.NextOrReport()) { s = iter.status(); if (!s.ok()) { break; } auto key = iter.key().ToString(); // properties block should be strictly sorted with no duplicate key. if (!last_key.empty() && BytewiseComparator()->Compare(key, last_key) <= 0) { s = Status::Corruption("properties unsorted"); break; } last_key = key; auto raw_val = iter.value(); auto pos = predefined_uint64_properties.find(key); new_table_properties->properties_offsets.insert( {key, handle.offset() + iter.ValueOffset()}); if (pos != predefined_uint64_properties.end()) { if (key == TablePropertiesNames::kDeletedKeys || key == TablePropertiesNames::kMergeOperands) { // Insert in user-collected properties for API backwards compatibility new_table_properties->user_collected_properties.insert( {key, raw_val.ToString()}); } // handle predefined rocksdb properties uint64_t val; if (!GetVarint64(&raw_val, &val)) { // skip malformed value auto error_msg = "Detect malformed value in properties meta-block:" "\tkey: " + key + "\tval: " + raw_val.ToString(); ROCKS_LOG_ERROR(ioptions.info_log, "%s", error_msg.c_str()); continue; } *(pos->second) = val; } else if (key == TablePropertiesNames::kFilterPolicy) { new_table_properties->filter_policy_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kColumnFamilyName) { new_table_properties->column_family_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kComparator) { new_table_properties->comparator_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kMergeOperator) { new_table_properties->merge_operator_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kPrefixExtractorName) { new_table_properties->prefix_extractor_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kPropertyCollectors) { new_table_properties->property_collectors_names = raw_val.ToString(); } else if (key == TablePropertiesNames::kCompression) { new_table_properties->compression_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kCompressionOptions) { new_table_properties->compression_options = raw_val.ToString(); } else { // handle user-collected properties new_table_properties->user_collected_properties.insert( {key, raw_val.ToString()}); } } if (s.ok()) { *table_properties = new_table_properties; if (ret_block_handle != nullptr) { *ret_block_handle = handle; } if (verification_buf != nullptr) { size_t len = static_cast(handle.size() + kBlockTrailerSize); *verification_buf = ROCKSDB_NAMESPACE::AllocateBlock(len, memory_allocator); if (verification_buf->get() != nullptr) { memcpy(verification_buf->get(), block_contents.data.data(), len); } } } else { delete new_table_properties; } return s; } Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, TableProperties** properties, bool compression_type_missing, MemoryAllocator* memory_allocator, FilePrefetchBuffer* prefetch_buffer) { // -- Read metaindex block Footer footer; auto s = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer, table_magic_number); if (!s.ok()) { return s; } auto metaindex_handle = footer.metaindex_handle(); BlockContents metaindex_contents; ReadOptions read_options; read_options.verify_checksums = false; PersistentCacheOptions cache_options; BlockFetcher block_fetcher( file, prefetch_buffer, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, false /*maybe_compressed*/, BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; } // property blocks are never compressed. Need to add uncompress logic if we // are to compress it. Block metaindex_block(std::move(metaindex_contents)); std::unique_ptr meta_iter(metaindex_block.NewDataIterator( BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber)); // -- Read property block bool found_properties_block = true; s = SeekToPropertiesBlock(meta_iter.get(), &found_properties_block); if (!s.ok()) { return s; } TableProperties table_properties; if (found_properties_block == true) { s = ReadProperties(meta_iter->value(), file, prefetch_buffer, footer, ioptions, properties, false /* verify_checksum */, nullptr /* ret_block_hanel */, nullptr /* ret_block_contents */, compression_type_missing, memory_allocator); } else { s = Status::NotFound(); } return s; } Status FindMetaBlock(InternalIterator* meta_index_iter, const std::string& meta_block_name, BlockHandle* block_handle) { meta_index_iter->Seek(meta_block_name); if (meta_index_iter->status().ok() && meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) { Slice v = meta_index_iter->value(); return block_handle->DecodeFrom(&v); } else { return Status::Corruption("Cannot find the meta block", meta_block_name); } } Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, const std::string& meta_block_name, BlockHandle* block_handle, bool /*compression_type_missing*/, MemoryAllocator* memory_allocator) { Footer footer; auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, &footer, table_magic_number); if (!s.ok()) { return s; } auto metaindex_handle = footer.metaindex_handle(); BlockContents metaindex_contents; ReadOptions read_options; read_options.verify_checksums = false; PersistentCacheOptions cache_options; BlockFetcher block_fetcher( file, nullptr /* prefetch_buffer */, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, false /* do decompression */, false /*maybe_compressed*/, BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; } // meta blocks are never compressed. Need to add uncompress logic if we are to // compress it. Block metaindex_block(std::move(metaindex_contents)); std::unique_ptr meta_iter; meta_iter.reset(metaindex_block.NewDataIterator( BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber)); return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle); } Status ReadMetaBlock(RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, const std::string& meta_block_name, BlockType block_type, BlockContents* contents, bool /*compression_type_missing*/, MemoryAllocator* memory_allocator) { Status status; Footer footer; status = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer, table_magic_number); if (!status.ok()) { return status; } // Reading metaindex block auto metaindex_handle = footer.metaindex_handle(); BlockContents metaindex_contents; ReadOptions read_options; read_options.verify_checksums = false; PersistentCacheOptions cache_options; BlockFetcher block_fetcher( file, prefetch_buffer, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, false /*maybe_compressed*/, BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); status = block_fetcher.ReadBlockContents(); if (!status.ok()) { return status; } // meta block is never compressed. Need to add uncompress logic if we are to // compress it. // Finding metablock Block metaindex_block(std::move(metaindex_contents)); std::unique_ptr meta_iter; meta_iter.reset(metaindex_block.NewDataIterator( BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber)); BlockHandle block_handle; status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle); if (!status.ok()) { return status; } // Reading metablock BlockFetcher block_fetcher2( file, prefetch_buffer, footer, read_options, block_handle, contents, ioptions, false /* decompress */, false /*maybe_compressed*/, block_type, UncompressionDict::GetEmptyDict(), cache_options, memory_allocator); return block_fetcher2.ReadBlockContents(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/meta_blocks.h000066400000000000000000000145071370372246700167770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "db/builder.h" #include "db/table_properties_collector.h" #include "rocksdb/comparator.h" #include "rocksdb/memory_allocator.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "table/block_based/block_builder.h" #include "table/block_based/block_type.h" #include "table/format.h" #include "util/kv_map.h" namespace ROCKSDB_NAMESPACE { class BlockBuilder; class BlockHandle; class Env; class Footer; class Logger; class RandomAccessFile; struct TableProperties; class MetaIndexBuilder { public: MetaIndexBuilder(const MetaIndexBuilder&) = delete; MetaIndexBuilder& operator=(const MetaIndexBuilder&) = delete; MetaIndexBuilder(); void Add(const std::string& key, const BlockHandle& handle); // Write all the added key/value pairs to the block and return the contents // of the block. Slice Finish(); private: // store the sorted key/handle of the metablocks. stl_wrappers::KVMap meta_block_handles_; std::unique_ptr meta_index_block_; }; class PropertyBlockBuilder { public: PropertyBlockBuilder(const PropertyBlockBuilder&) = delete; PropertyBlockBuilder& operator=(const PropertyBlockBuilder&) = delete; PropertyBlockBuilder(); void AddTableProperty(const TableProperties& props); void Add(const std::string& key, uint64_t value); void Add(const std::string& key, const std::string& value); void Add(const UserCollectedProperties& user_collected_properties); // Write all the added entries to the block and return the block contents Slice Finish(); private: std::unique_ptr properties_block_; stl_wrappers::KVMap props_; }; // Were we encounter any error occurs during user-defined statistics collection, // we'll write the warning message to info log. void LogPropertiesCollectionError( Logger* info_log, const std::string& method, const std::string& name); // Utility functions help table builder to trigger batch events for user // defined property collectors. // Return value indicates if there is any error occurred; if error occurred, // the warning message will be logged. // NotifyCollectTableCollectorsOnAdd() triggers the `Add` event for all // property collectors. bool NotifyCollectTableCollectorsOnAdd( const Slice& key, const Slice& value, uint64_t file_size, const std::vector>& collectors, Logger* info_log); void NotifyCollectTableCollectorsOnBlockAdd( const std::vector>& collectors, uint64_t blockRawBytes, uint64_t blockCompressedBytesFast, uint64_t blockCompressedBytesSlow); // NotifyCollectTableCollectorsOnFinish() triggers the `Finish` event for all // property collectors. The collected properties will be added to `builder`. bool NotifyCollectTableCollectorsOnFinish( const std::vector>& collectors, Logger* info_log, PropertyBlockBuilder* builder); // Read the properties from the table. // @returns a status to indicate if the operation succeeded. On success, // *table_properties will point to a heap-allocated TableProperties // object, otherwise value of `table_properties` will not be modified. Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ImmutableCFOptions& ioptions, TableProperties** table_properties, bool verify_checksum, BlockHandle* block_handle, CacheAllocationPtr* verification_buf, bool compression_type_missing = false, MemoryAllocator* memory_allocator = nullptr); // Directly read the properties from the properties block of a plain table. // @returns a status to indicate if the operation succeeded. On success, // *table_properties will point to a heap-allocated TableProperties // object, otherwise value of `table_properties` will not be modified. // certain tables do not have compression_type byte setup properly for // uncompressed blocks, caller can request to reset compression type by // passing compression_type_missing = true, the same applies to // `ReadProperties`, `FindMetaBlock`, and `ReadMetaBlock` Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, TableProperties** properties, bool compression_type_missing = false, MemoryAllocator* memory_allocator = nullptr, FilePrefetchBuffer* prefetch_buffer = nullptr); // Find the meta block from the meta index block. Status FindMetaBlock(InternalIterator* meta_index_iter, const std::string& meta_block_name, BlockHandle* block_handle); // Find the meta block Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, const std::string& meta_block_name, BlockHandle* block_handle, bool compression_type_missing = false, MemoryAllocator* memory_allocator = nullptr); // Read the specified meta block with name meta_block_name // from `file` and initialize `contents` with contents of this block. // Return Status::OK in case of success. Status ReadMetaBlock(RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, uint64_t file_size, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, const std::string& meta_block_name, BlockType block_type, BlockContents* contents, bool compression_type_missing = false, MemoryAllocator* memory_allocator = nullptr); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/mock_table.cc000066400000000000000000000112331370372246700167430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/mock_table.h" #include "db/dbformat.h" #include "env/composite_env_wrapper.h" #include "file/random_access_file_reader.h" #include "port/port.h" #include "rocksdb/table_properties.h" #include "table/get_context.h" #include "util/coding.h" namespace ROCKSDB_NAMESPACE { namespace mock { namespace { const InternalKeyComparator icmp_(BytewiseComparator()); } // namespace stl_wrappers::KVMap MakeMockFile( std::initializer_list> l) { return stl_wrappers::KVMap(l, stl_wrappers::LessOfComparator(&icmp_)); } InternalIterator* MockTableReader::NewIterator( const ReadOptions&, const SliceTransform* /* prefix_extractor */, Arena* /*arena*/, bool /*skip_filters*/, TableReaderCaller /*caller*/, size_t /*compaction_readahead_size*/, bool /* allow_unprepared_value */) { return new MockTableIterator(table_); } Status MockTableReader::Get(const ReadOptions&, const Slice& key, GetContext* get_context, const SliceTransform* /*prefix_extractor*/, bool /*skip_filters*/) { std::unique_ptr iter(new MockTableIterator(table_)); for (iter->Seek(key); iter->Valid(); iter->Next()) { ParsedInternalKey parsed_key; if (!ParseInternalKey(iter->key(), &parsed_key)) { return Status::Corruption(Slice()); } bool dont_care __attribute__((__unused__)); if (!get_context->SaveValue(parsed_key, iter->value(), &dont_care)) { break; } } return Status::OK(); } std::shared_ptr MockTableReader::GetTableProperties() const { return std::shared_ptr(new TableProperties()); } MockTableFactory::MockTableFactory() : next_id_(1) {} Status MockTableFactory::NewTableReader( const TableReaderOptions& /*table_reader_options*/, std::unique_ptr&& file, uint64_t /*file_size*/, std::unique_ptr* table_reader, bool /*prefetch_index_and_filter_in_cache*/) const { uint32_t id = GetIDFromFile(file.get()); MutexLock lock_guard(&file_system_.mutex); auto it = file_system_.files.find(id); if (it == file_system_.files.end()) { return Status::IOError("Mock file not found"); } table_reader->reset(new MockTableReader(it->second)); return Status::OK(); } TableBuilder* MockTableFactory::NewTableBuilder( const TableBuilderOptions& /*table_builder_options*/, uint32_t /*column_family_id*/, WritableFileWriter* file) const { uint32_t id = GetAndWriteNextID(file); return new MockTableBuilder(id, &file_system_); } Status MockTableFactory::CreateMockTable(Env* env, const std::string& fname, stl_wrappers::KVMap file_contents) { std::unique_ptr file; auto s = env->NewWritableFile(fname, &file, EnvOptions()); if (!s.ok()) { return s; } WritableFileWriter file_writer(NewLegacyWritableFileWrapper(std::move(file)), fname, EnvOptions()); uint32_t id = GetAndWriteNextID(&file_writer); file_system_.files.insert({id, std::move(file_contents)}); return Status::OK(); } uint32_t MockTableFactory::GetAndWriteNextID(WritableFileWriter* file) const { uint32_t next_id = next_id_.fetch_add(1); char buf[4]; EncodeFixed32(buf, next_id); file->Append(Slice(buf, 4)); return next_id; } uint32_t MockTableFactory::GetIDFromFile(RandomAccessFileReader* file) const { char buf[4]; Slice result; file->Read(IOOptions(), 0, 4, &result, buf, nullptr); assert(result.size() == 4); return DecodeFixed32(buf); } void MockTableFactory::AssertSingleFile( const stl_wrappers::KVMap& file_contents) { ASSERT_EQ(file_system_.files.size(), 1U); ASSERT_EQ(file_contents, file_system_.files.begin()->second); } void MockTableFactory::AssertLatestFile( const stl_wrappers::KVMap& file_contents) { ASSERT_GE(file_system_.files.size(), 1U); auto latest = file_system_.files.end(); --latest; if (file_contents != latest->second) { std::cout << "Wrong content! Content of latest file:" << std::endl; for (const auto& kv : latest->second) { ParsedInternalKey ikey; std::string key, value; std::tie(key, value) = kv; ParseInternalKey(Slice(key), &ikey); std::cout << ikey.DebugString(false) << " -> " << value << std::endl; } FAIL(); } } } // namespace mock } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/mock_table.h000066400000000000000000000151171370372246700166120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include #include #include "db/version_edit.h" #include "port/port.h" #include "rocksdb/comparator.h" #include "rocksdb/io_status.h" #include "rocksdb/table.h" #include "table/internal_iterator.h" #include "table/table_builder.h" #include "table/table_reader.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/kv_map.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { namespace mock { stl_wrappers::KVMap MakeMockFile( std::initializer_list> l = {}); struct MockTableFileSystem { port::Mutex mutex; std::map files; }; class MockTableReader : public TableReader { public: explicit MockTableReader(const stl_wrappers::KVMap& table) : table_(table) {} InternalIterator* NewIterator(const ReadOptions&, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, TableReaderCaller caller, size_t compaction_readahead_size = 0, bool allow_unprepared_value = false) override; Status Get(const ReadOptions& readOptions, const Slice& key, GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters = false) override; uint64_t ApproximateOffsetOf(const Slice& /*key*/, TableReaderCaller /*caller*/) override { return 0; } uint64_t ApproximateSize(const Slice& /*start*/, const Slice& /*end*/, TableReaderCaller /*caller*/) override { return 0; } size_t ApproximateMemoryUsage() const override { return 0; } void SetupForCompaction() override {} std::shared_ptr GetTableProperties() const override; ~MockTableReader() {} private: const stl_wrappers::KVMap& table_; }; class MockTableIterator : public InternalIterator { public: explicit MockTableIterator(const stl_wrappers::KVMap& table) : table_(table) { itr_ = table_.end(); } bool Valid() const override { return itr_ != table_.end(); } void SeekToFirst() override { itr_ = table_.begin(); } void SeekToLast() override { itr_ = table_.end(); --itr_; } void Seek(const Slice& target) override { std::string str_target(target.data(), target.size()); itr_ = table_.lower_bound(str_target); } void SeekForPrev(const Slice& target) override { std::string str_target(target.data(), target.size()); itr_ = table_.upper_bound(str_target); Prev(); } void Next() override { ++itr_; } void Prev() override { if (itr_ == table_.begin()) { itr_ = table_.end(); } else { --itr_; } } Slice key() const override { return Slice(itr_->first); } Slice value() const override { return Slice(itr_->second); } Status status() const override { return Status::OK(); } private: const stl_wrappers::KVMap& table_; stl_wrappers::KVMap::const_iterator itr_; }; class MockTableBuilder : public TableBuilder { public: MockTableBuilder(uint32_t id, MockTableFileSystem* file_system) : id_(id), file_system_(file_system) { table_ = MakeMockFile({}); } // REQUIRES: Either Finish() or Abandon() has been called. ~MockTableBuilder() {} // Add key,value to the table being constructed. // REQUIRES: key is after any previously added key according to comparator. // REQUIRES: Finish(), Abandon() have not been called void Add(const Slice& key, const Slice& value) override { table_.insert({key.ToString(), value.ToString()}); } // Return non-ok iff some error has been detected. Status status() const override { return Status::OK(); } // Return non-ok iff some error happens during IO. IOStatus io_status() const override { return IOStatus::OK(); } Status Finish() override { MutexLock lock_guard(&file_system_->mutex); file_system_->files.insert({id_, table_}); return Status::OK(); } void Abandon() override {} uint64_t NumEntries() const override { return table_.size(); } uint64_t FileSize() const override { return table_.size(); } TableProperties GetTableProperties() const override { return TableProperties(); } // Get file checksum std::string GetFileChecksum() const override { return kUnknownFileChecksum; } // Get file checksum function name const char* GetFileChecksumFuncName() const override { return kUnknownFileChecksumFuncName; } private: uint32_t id_; MockTableFileSystem* file_system_; stl_wrappers::KVMap table_; }; class MockTableFactory : public TableFactory { public: MockTableFactory(); const char* Name() const override { return "MockTable"; } Status NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_familly_id, WritableFileWriter* file) const override; // This function will directly create mock table instead of going through // MockTableBuilder. file_contents has to have a format of . Those key-value pairs will then be inserted into the mock table. Status CreateMockTable(Env* env, const std::string& fname, stl_wrappers::KVMap file_contents); virtual Status SanitizeOptions( const DBOptions& /*db_opts*/, const ColumnFamilyOptions& /*cf_opts*/) const override { return Status::OK(); } virtual std::string GetPrintableTableOptions() const override { return std::string(); } // This function will assert that only a single file exists and that the // contents are equal to file_contents void AssertSingleFile(const stl_wrappers::KVMap& file_contents); void AssertLatestFile(const stl_wrappers::KVMap& file_contents); private: uint32_t GetAndWriteNextID(WritableFileWriter* file) const; uint32_t GetIDFromFile(RandomAccessFileReader* file) const; mutable MockTableFileSystem file_system_; mutable std::atomic next_id_; }; } // namespace mock } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/multiget_context.h000066400000000000000000000215021370372246700201030ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include "db/lookup_key.h" #include "db/merge_context.h" #include "rocksdb/env.h" #include "rocksdb/statistics.h" #include "rocksdb/types.h" #include "util/autovector.h" #include "util/math.h" namespace ROCKSDB_NAMESPACE { class GetContext; struct KeyContext { const Slice* key; LookupKey* lkey; Slice ukey; Slice ikey; ColumnFamilyHandle* column_family; Status* s; MergeContext merge_context; SequenceNumber max_covering_tombstone_seq; bool key_exists; void* cb_arg; PinnableSlice* value; std::string* timestamp; GetContext* get_context; KeyContext(ColumnFamilyHandle* col_family, const Slice& user_key, PinnableSlice* val, std::string* ts, Status* stat) : key(&user_key), lkey(nullptr), column_family(col_family), s(stat), max_covering_tombstone_seq(0), key_exists(false), cb_arg(nullptr), value(val), timestamp(ts), get_context(nullptr) {} KeyContext() = default; }; // The MultiGetContext class is a container for the sorted list of keys that // we need to lookup in a batch. Its main purpose is to make batch execution // easier by allowing various stages of the MultiGet lookups to operate on // subsets of keys, potentially non-contiguous. In order to accomplish this, // it defines the following classes - // // MultiGetContext::Range // MultiGetContext::Range::Iterator // MultiGetContext::Range::IteratorWrapper // // Here is an example of how this can be used - // // { // MultiGetContext ctx(...); // MultiGetContext::Range range = ctx.GetMultiGetRange(); // // // Iterate to determine some subset of the keys // MultiGetContext::Range::Iterator start = range.begin(); // MultiGetContext::Range::Iterator end = ...; // // // Make a new range with a subset of keys // MultiGetContext::Range subrange(range, start, end); // // // Define an auxillary vector, if needed, to hold additional data for // // each key // std::array aux; // // // Iterate over the subrange and the auxillary vector simultaneously // MultiGetContext::Range::Iterator iter = subrange.begin(); // for (; iter != subrange.end(); ++iter) { // KeyContext& key = *iter; // Foo& aux_key = aux_iter[iter.index()]; // ... // } // } class MultiGetContext { public: // Limit the number of keys in a batch to this number. Benchmarks show that // there is negligible benefit for batches exceeding this. Keeping this < 64 // simplifies iteration, as well as reduces the amount of stack allocations // htat need to be performed static const int MAX_BATCH_SIZE = 32; MultiGetContext(autovector* sorted_keys, size_t begin, size_t num_keys, SequenceNumber snapshot, const ReadOptions& read_opts) : num_keys_(num_keys), value_mask_(0), value_size_(0), lookup_key_ptr_(reinterpret_cast(lookup_key_stack_buf)) { if (num_keys > MAX_LOOKUP_KEYS_ON_STACK) { lookup_key_heap_buf.reset(new char[sizeof(LookupKey) * num_keys]); lookup_key_ptr_ = reinterpret_cast( lookup_key_heap_buf.get()); } for (size_t iter = 0; iter != num_keys_; ++iter) { // autovector may not be contiguous storage, so make a copy sorted_keys_[iter] = (*sorted_keys)[begin + iter]; sorted_keys_[iter]->lkey = new (&lookup_key_ptr_[iter]) LookupKey(*sorted_keys_[iter]->key, snapshot, read_opts.timestamp); sorted_keys_[iter]->ukey = sorted_keys_[iter]->lkey->user_key(); sorted_keys_[iter]->ikey = sorted_keys_[iter]->lkey->internal_key(); } } ~MultiGetContext() { for (size_t i = 0; i < num_keys_; ++i) { lookup_key_ptr_[i].~LookupKey(); } } private: static const int MAX_LOOKUP_KEYS_ON_STACK = 16; alignas(alignof(LookupKey)) char lookup_key_stack_buf[sizeof(LookupKey) * MAX_LOOKUP_KEYS_ON_STACK]; std::array sorted_keys_; size_t num_keys_; uint64_t value_mask_; uint64_t value_size_; std::unique_ptr lookup_key_heap_buf; LookupKey* lookup_key_ptr_; public: // MultiGetContext::Range - Specifies a range of keys, by start and end index, // from the parent MultiGetContext. Each range contains a bit vector that // indicates whether the corresponding keys need to be processed or skipped. // A Range object can be copy constructed, and the new object inherits the // original Range's bit vector. This is useful for progressively skipping // keys as the lookup goes through various stages. For example, when looking // up keys in the same SST file, a Range is created excluding keys not // belonging to that file. A new Range is then copy constructed and individual // keys are skipped based on bloom filter lookup. class Range { public: // MultiGetContext::Range::Iterator - A forward iterator that iterates over // non-skippable keys in a Range, as well as keys whose final value has been // found. The latter is tracked by MultiGetContext::value_mask_ class Iterator { public: // -- iterator traits typedef Iterator self_type; typedef KeyContext value_type; typedef KeyContext& reference; typedef KeyContext* pointer; typedef int difference_type; typedef std::forward_iterator_tag iterator_category; Iterator(const Range* range, size_t idx) : range_(range), ctx_(range->ctx_), index_(idx) { while (index_ < range_->end_ && (uint64_t{1} << index_) & (range_->ctx_->value_mask_ | range_->skip_mask_)) index_++; } Iterator(const Iterator&) = default; Iterator& operator=(const Iterator&) = default; Iterator& operator++() { while (++index_ < range_->end_ && (uint64_t{1} << index_) & (range_->ctx_->value_mask_ | range_->skip_mask_)) ; return *this; } bool operator==(Iterator other) const { assert(range_->ctx_ == other.range_->ctx_); return index_ == other.index_; } bool operator!=(Iterator other) const { assert(range_->ctx_ == other.range_->ctx_); return index_ != other.index_; } KeyContext& operator*() { assert(index_ < range_->end_ && index_ >= range_->start_); return *(ctx_->sorted_keys_[index_]); } KeyContext* operator->() { assert(index_ < range_->end_ && index_ >= range_->start_); return ctx_->sorted_keys_[index_]; } size_t index() { return index_; } private: friend Range; const Range* range_; const MultiGetContext* ctx_; size_t index_; }; Range(const Range& mget_range, const Iterator& first, const Iterator& last) { ctx_ = mget_range.ctx_; start_ = first.index_; end_ = last.index_; skip_mask_ = mget_range.skip_mask_; assert(start_ < 64); assert(end_ < 64); } Range() = default; Iterator begin() const { return Iterator(this, start_); } Iterator end() const { return Iterator(this, end_); } bool empty() const { return RemainingMask() == 0; } void SkipKey(const Iterator& iter) { skip_mask_ |= uint64_t{1} << iter.index_; } // Update the value_mask_ in MultiGetContext so its // immediately reflected in all the Range Iterators void MarkKeyDone(Iterator& iter) { ctx_->value_mask_ |= (uint64_t{1} << iter.index_); } bool CheckKeyDone(Iterator& iter) const { return ctx_->value_mask_ & (uint64_t{1} << iter.index_); } uint64_t KeysLeft() const { return BitsSetToOne(RemainingMask()); } void AddSkipsFrom(const Range& other) { assert(ctx_ == other.ctx_); skip_mask_ |= other.skip_mask_; } uint64_t GetValueSize() { return ctx_->value_size_; } void AddValueSize(uint64_t value_size) { ctx_->value_size_ += value_size; } private: friend MultiGetContext; MultiGetContext* ctx_; size_t start_; size_t end_; uint64_t skip_mask_; Range(MultiGetContext* ctx, size_t num_keys) : ctx_(ctx), start_(0), end_(num_keys), skip_mask_(0) { assert(num_keys < 64); } uint64_t RemainingMask() const { return (((uint64_t{1} << end_) - 1) & ~((uint64_t{1} << start_) - 1) & ~(ctx_->value_mask_ | skip_mask_)); } }; // Return the initial range that encompasses all the keys in the batch Range GetMultiGetRange() { return Range(this, num_keys_); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/persistent_cache_helper.cc000066400000000000000000000103771370372246700215350ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/persistent_cache_helper.h" #include "table/block_based/block_based_table_reader.h" #include "table/format.h" namespace ROCKSDB_NAMESPACE { void PersistentCacheHelper::InsertRawPage( const PersistentCacheOptions& cache_options, const BlockHandle& handle, const char* data, const size_t size) { assert(cache_options.persistent_cache); assert(cache_options.persistent_cache->IsCompressed()); // construct the page key char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; auto key = BlockBasedTable::GetCacheKey(cache_options.key_prefix.c_str(), cache_options.key_prefix.size(), handle, cache_key); // insert content to cache cache_options.persistent_cache->Insert(key, data, size); } void PersistentCacheHelper::InsertUncompressedPage( const PersistentCacheOptions& cache_options, const BlockHandle& handle, const BlockContents& contents) { assert(cache_options.persistent_cache); assert(!cache_options.persistent_cache->IsCompressed()); // Precondition: // (1) content is cacheable // (2) content is not compressed // construct the page key char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; auto key = BlockBasedTable::GetCacheKey(cache_options.key_prefix.c_str(), cache_options.key_prefix.size(), handle, cache_key); // insert block contents to page cache cache_options.persistent_cache->Insert(key, contents.data.data(), contents.data.size()); } Status PersistentCacheHelper::LookupRawPage( const PersistentCacheOptions& cache_options, const BlockHandle& handle, std::unique_ptr* raw_data, const size_t raw_data_size) { #ifdef NDEBUG (void)raw_data_size; #endif assert(cache_options.persistent_cache); assert(cache_options.persistent_cache->IsCompressed()); // construct the page key char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; auto key = BlockBasedTable::GetCacheKey(cache_options.key_prefix.c_str(), cache_options.key_prefix.size(), handle, cache_key); // Lookup page size_t size; Status s = cache_options.persistent_cache->Lookup(key, raw_data, &size); if (!s.ok()) { // cache miss RecordTick(cache_options.statistics, PERSISTENT_CACHE_MISS); return s; } // cache hit assert(raw_data_size == handle.size() + kBlockTrailerSize); assert(size == raw_data_size); RecordTick(cache_options.statistics, PERSISTENT_CACHE_HIT); return Status::OK(); } Status PersistentCacheHelper::LookupUncompressedPage( const PersistentCacheOptions& cache_options, const BlockHandle& handle, BlockContents* contents) { assert(cache_options.persistent_cache); assert(!cache_options.persistent_cache->IsCompressed()); if (!contents) { // We shouldn't lookup in the cache. Either // (1) Nowhere to store return Status::NotFound(); } // construct the page key char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; auto key = BlockBasedTable::GetCacheKey(cache_options.key_prefix.c_str(), cache_options.key_prefix.size(), handle, cache_key); // Lookup page std::unique_ptr data; size_t size; Status s = cache_options.persistent_cache->Lookup(key, &data, &size); if (!s.ok()) { // cache miss RecordTick(cache_options.statistics, PERSISTENT_CACHE_MISS); return s; } // please note we are potentially comparing compressed data size with // uncompressed data size assert(handle.size() <= size); // update stats RecordTick(cache_options.statistics, PERSISTENT_CACHE_HIT); // construct result and return *contents = BlockContents(std::move(data), size); return Status::OK(); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/persistent_cache_helper.h000066400000000000000000000030231370372246700213650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "monitoring/statistics.h" #include "table/format.h" #include "table/persistent_cache_options.h" namespace ROCKSDB_NAMESPACE { struct BlockContents; // PersistentCacheHelper // // Encapsulates some of the helper logic for read and writing from the cache class PersistentCacheHelper { public: // insert block into raw page cache static void InsertRawPage(const PersistentCacheOptions& cache_options, const BlockHandle& handle, const char* data, const size_t size); // insert block into uncompressed cache static void InsertUncompressedPage( const PersistentCacheOptions& cache_options, const BlockHandle& handle, const BlockContents& contents); // lookup block from raw page cacge static Status LookupRawPage(const PersistentCacheOptions& cache_options, const BlockHandle& handle, std::unique_ptr* raw_data, const size_t raw_data_size); // lookup block from uncompressed cache static Status LookupUncompressedPage( const PersistentCacheOptions& cache_options, const BlockHandle& handle, BlockContents* contents); }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/persistent_cache_options.h000066400000000000000000000021001370372246700215740ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "monitoring/statistics.h" #include "rocksdb/persistent_cache.h" namespace ROCKSDB_NAMESPACE { // PersistentCacheOptions // // This describe the caching behavior for page cache // This is used to pass the context for caching and the cache handle struct PersistentCacheOptions { PersistentCacheOptions() {} explicit PersistentCacheOptions( const std::shared_ptr& _persistent_cache, const std::string _key_prefix, Statistics* const _statistics) : persistent_cache(_persistent_cache), key_prefix(_key_prefix), statistics(_statistics) {} virtual ~PersistentCacheOptions() {} std::shared_ptr persistent_cache; std::string key_prefix; Statistics* statistics = nullptr; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/plain/000077500000000000000000000000001370372246700154375ustar00rootroot00000000000000rocksdb-6.11.4/table/plain/plain_table_bloom.cc000066400000000000000000000045721370372246700214200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "table/plain/plain_table_bloom.h" #include #include #include "util/dynamic_bloom.h" #include "memory/allocator.h" namespace ROCKSDB_NAMESPACE { namespace { uint32_t GetTotalBitsForLocality(uint32_t total_bits) { uint32_t num_blocks = (total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8); // Make num_blocks an odd number to make sure more bits are involved // when determining which block. if (num_blocks % 2 == 0) { num_blocks++; } return num_blocks * (CACHE_LINE_SIZE * 8); } } // namespace PlainTableBloomV1::PlainTableBloomV1(uint32_t num_probes) : kTotalBits(0), kNumBlocks(0), kNumProbes(num_probes), data_(nullptr) {} void PlainTableBloomV1::SetRawData(char* raw_data, uint32_t total_bits, uint32_t num_blocks) { data_ = raw_data; kTotalBits = total_bits; kNumBlocks = num_blocks; } void PlainTableBloomV1::SetTotalBits(Allocator* allocator, uint32_t total_bits, uint32_t locality, size_t huge_page_tlb_size, Logger* logger) { kTotalBits = (locality > 0) ? GetTotalBitsForLocality(total_bits) : (total_bits + 7) / 8 * 8; kNumBlocks = (locality > 0) ? (kTotalBits / (CACHE_LINE_SIZE * 8)) : 0; assert(kNumBlocks > 0 || kTotalBits > 0); assert(kNumProbes > 0); uint32_t sz = kTotalBits / 8; if (kNumBlocks > 0) { sz += CACHE_LINE_SIZE - 1; } assert(allocator); char* raw = allocator->AllocateAligned(sz, huge_page_tlb_size, logger); memset(raw, 0, sz); auto cache_line_offset = reinterpret_cast(raw) % CACHE_LINE_SIZE; if (kNumBlocks > 0 && cache_line_offset > 0) { raw += CACHE_LINE_SIZE - cache_line_offset; } data_ = raw; } void BloomBlockBuilder::AddKeysHashes( const std::vector& keys_hashes) { for (auto hash : keys_hashes) { bloom_.AddHash(hash); } } Slice BloomBlockBuilder::Finish() { return bloom_.GetRawData(); } const std::string BloomBlockBuilder::kBloomBlock = "kBloomBlock"; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/plain/plain_table_bloom.h000066400000000000000000000101761370372246700212570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "rocksdb/slice.h" #include "port/port.h" #include "util/bloom_impl.h" #include "util/hash.h" #include "third-party/folly/folly/ConstexprMath.h" #include namespace ROCKSDB_NAMESPACE { class Slice; class Allocator; class Logger; // A legacy Bloom filter implementation used by Plain Table db format, for // schema backward compatibility. Not for use in new filter applications. class PlainTableBloomV1 { public: // allocator: pass allocator to bloom filter, hence trace the usage of memory // total_bits: fixed total bits for the bloom // num_probes: number of hash probes for a single key // locality: If positive, optimize for cache line locality, 0 otherwise. // hash_func: customized hash function // huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB // within this page size. Need to reserve huge pages for // it to be allocated, like: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt explicit PlainTableBloomV1(uint32_t num_probes = 6); void SetTotalBits(Allocator* allocator, uint32_t total_bits, uint32_t locality, size_t huge_page_tlb_size, Logger* logger); ~PlainTableBloomV1() {} // Assuming single threaded access to this function. void AddHash(uint32_t hash); // Multithreaded access to this function is OK bool MayContainHash(uint32_t hash) const; void Prefetch(uint32_t hash); uint32_t GetNumBlocks() const { return kNumBlocks; } Slice GetRawData() const { return Slice(data_, GetTotalBits() / 8); } void SetRawData(char* raw_data, uint32_t total_bits, uint32_t num_blocks = 0); uint32_t GetTotalBits() const { return kTotalBits; } bool IsInitialized() const { return kNumBlocks > 0 || kTotalBits > 0; } private: uint32_t kTotalBits; uint32_t kNumBlocks; const uint32_t kNumProbes; char* data_; static constexpr int LOG2_CACHE_LINE_SIZE = folly::constexpr_log2(CACHE_LINE_SIZE); }; #if defined(_MSC_VER) #pragma warning(push) // local variable is initialized but not referenced #pragma warning(disable : 4189) #endif inline void PlainTableBloomV1::Prefetch(uint32_t h) { if (kNumBlocks != 0) { uint32_t ignored; LegacyLocalityBloomImpl::PrepareHashMayMatch( h, kNumBlocks, data_, &ignored, LOG2_CACHE_LINE_SIZE); } } #if defined(_MSC_VER) #pragma warning(pop) #endif inline bool PlainTableBloomV1::MayContainHash(uint32_t h) const { assert(IsInitialized()); if (kNumBlocks != 0) { return LegacyLocalityBloomImpl::HashMayMatch( h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE); } else { return LegacyNoLocalityBloomImpl::HashMayMatch(h, kTotalBits, kNumProbes, data_); } } inline void PlainTableBloomV1::AddHash(uint32_t h) { assert(IsInitialized()); if (kNumBlocks != 0) { LegacyLocalityBloomImpl::AddHash(h, kNumBlocks, kNumProbes, data_, LOG2_CACHE_LINE_SIZE); } else { LegacyNoLocalityBloomImpl::AddHash(h, kTotalBits, kNumProbes, data_); } } class BloomBlockBuilder { public: static const std::string kBloomBlock; explicit BloomBlockBuilder(uint32_t num_probes = 6) : bloom_(num_probes) {} void SetTotalBits(Allocator* allocator, uint32_t total_bits, uint32_t locality, size_t huge_page_tlb_size, Logger* logger) { bloom_.SetTotalBits(allocator, total_bits, locality, huge_page_tlb_size, logger); } uint32_t GetNumBlocks() const { return bloom_.GetNumBlocks(); } void AddKeysHashes(const std::vector& keys_hashes); Slice Finish(); private: PlainTableBloomV1 bloom_; }; }; // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/plain/plain_table_builder.cc000066400000000000000000000247121370372246700217340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "table/plain/plain_table_builder.h" #include #include #include #include #include "db/dbformat.h" #include "file/writable_file_writer.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/table.h" #include "table/block_based/block_builder.h" #include "table/format.h" #include "table/meta_blocks.h" #include "table/plain/plain_table_bloom.h" #include "table/plain/plain_table_factory.h" #include "table/plain/plain_table_index.h" #include "util/coding.h" #include "util/crc32c.h" #include "util/stop_watch.h" namespace ROCKSDB_NAMESPACE { namespace { // a utility that helps writing block content to the file // @offset will advance if @block_contents was successfully written. // @block_handle the block handle this particular block. IOStatus WriteBlock(const Slice& block_contents, WritableFileWriter* file, uint64_t* offset, BlockHandle* block_handle) { block_handle->set_offset(*offset); block_handle->set_size(block_contents.size()); IOStatus io_s = file->Append(block_contents); if (io_s.ok()) { *offset += block_contents.size(); } return io_s; } } // namespace // kPlainTableMagicNumber was picked by running // echo rocksdb.table.plain | sha1sum // and taking the leading 64 bits. extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull; extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull; PlainTableBuilder::PlainTableBuilder( const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len, EncodingType encoding_type, size_t index_sparseness, uint32_t bloom_bits_per_key, const std::string& column_family_name, uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio, bool store_index_in_file) : ioptions_(ioptions), moptions_(moptions), bloom_block_(num_probes), file_(file), bloom_bits_per_key_(bloom_bits_per_key), huge_page_tlb_size_(huge_page_tlb_size), encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(), index_sparseness), store_index_in_file_(store_index_in_file), prefix_extractor_(moptions.prefix_extractor.get()) { // Build index block and save it in the file if hash_table_ratio > 0 if (store_index_in_file_) { assert(hash_table_ratio > 0 || IsTotalOrderMode()); index_builder_.reset(new PlainTableIndexBuilder( &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness, hash_table_ratio, huge_page_tlb_size_)); properties_.user_collected_properties [PlainTablePropertyNames::kBloomVersion] = "1"; // For future use } properties_.fixed_key_len = user_key_len; // for plain table, we put all the data in a big chuck. properties_.num_data_blocks = 1; // Fill it later if store_index_in_file_ == true properties_.index_size = 0; properties_.filter_size = 0; // To support roll-back to previous version, now still use version 0 for // plain encoding. properties_.format_version = (encoding_type == kPlain) ? 0 : 1; properties_.column_family_id = column_family_id; properties_.column_family_name = column_family_name; properties_.prefix_extractor_name = moptions_.prefix_extractor != nullptr ? moptions_.prefix_extractor->Name() : "nullptr"; std::string val; PutFixed32(&val, static_cast(encoder_.GetEncodingType())); properties_.user_collected_properties [PlainTablePropertyNames::kEncodingType] = val; for (auto& collector_factories : *int_tbl_prop_collector_factories) { table_properties_collectors_.emplace_back( collector_factories->CreateIntTblPropCollector(column_family_id)); } } PlainTableBuilder::~PlainTableBuilder() { } void PlainTableBuilder::Add(const Slice& key, const Slice& value) { // temp buffer for metadata bytes between key and value. char meta_bytes_buf[6]; size_t meta_bytes_buf_size = 0; ParsedInternalKey internal_key; if (!ParseInternalKey(key, &internal_key)) { assert(false); return; } if (internal_key.type == kTypeRangeDeletion) { status_ = Status::NotSupported("Range deletion unsupported"); return; } // Store key hash if (store_index_in_file_) { if (moptions_.prefix_extractor == nullptr) { keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key)); } else { Slice prefix = moptions_.prefix_extractor->Transform(internal_key.user_key); keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix)); } } // Write value assert(offset_ <= std::numeric_limits::max()); auto prev_offset = static_cast(offset_); // Write out the key io_status_ = encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf, &meta_bytes_buf_size); if (SaveIndexInFile()) { index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset); } // Write value length uint32_t value_size = static_cast(value.size()); if (io_status_.ok()) { char* end_ptr = EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size); assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf)); meta_bytes_buf_size = end_ptr - meta_bytes_buf; io_status_ = file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size)); } // Write value if (io_status_.ok()) { io_status_ = file_->Append(value); offset_ += value_size + meta_bytes_buf_size; } if (io_status_.ok()) { properties_.num_entries++; properties_.raw_key_size += key.size(); properties_.raw_value_size += value.size(); if (internal_key.type == kTypeDeletion || internal_key.type == kTypeSingleDeletion) { properties_.num_deletions++; } else if (internal_key.type == kTypeMerge) { properties_.num_merge_operands++; } } // notify property collectors NotifyCollectTableCollectorsOnAdd( key, value, offset_, table_properties_collectors_, ioptions_.info_log); status_ = io_status_; } Status PlainTableBuilder::Finish() { assert(!closed_); closed_ = true; properties_.data_size = offset_; // Write the following blocks // 1. [meta block: bloom] - optional // 2. [meta block: index] - optional // 3. [meta block: properties] // 4. [metaindex block] // 5. [footer] MetaIndexBuilder meta_index_builer; if (store_index_in_file_ && (properties_.num_entries > 0)) { assert(properties_.num_entries <= std::numeric_limits::max()); Status s; BlockHandle bloom_block_handle; if (bloom_bits_per_key_ > 0) { bloom_block_.SetTotalBits( &arena_, static_cast(properties_.num_entries) * bloom_bits_per_key_, ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log); PutVarint32(&properties_.user_collected_properties [PlainTablePropertyNames::kNumBloomBlocks], bloom_block_.GetNumBlocks()); bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_); Slice bloom_finish_result = bloom_block_.Finish(); properties_.filter_size = bloom_finish_result.size(); io_status_ = WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle); if (!io_status_.ok()) { status_ = io_status_; return status_; } meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle); } BlockHandle index_block_handle; Slice index_finish_result = index_builder_->Finish(); properties_.index_size = index_finish_result.size(); io_status_ = WriteBlock(index_finish_result, file_, &offset_, &index_block_handle); if (!io_status_.ok()) { status_ = io_status_; return status_; } meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock, index_block_handle); } // Calculate bloom block size and index block size PropertyBlockBuilder property_block_builder; // -- Add basic properties property_block_builder.AddTableProperty(properties_); property_block_builder.Add(properties_.user_collected_properties); // -- Add user collected properties NotifyCollectTableCollectorsOnFinish(table_properties_collectors_, ioptions_.info_log, &property_block_builder); // -- Write property block BlockHandle property_block_handle; IOStatus s = WriteBlock(property_block_builder.Finish(), file_, &offset_, &property_block_handle); if (!s.ok()) { return std::move(s); } meta_index_builer.Add(kPropertiesBlock, property_block_handle); // -- write metaindex block BlockHandle metaindex_block_handle; io_status_ = WriteBlock(meta_index_builer.Finish(), file_, &offset_, &metaindex_block_handle); if (!io_status_.ok()) { status_ = io_status_; return status_; } // Write Footer // no need to write out new footer if we're using default checksum Footer footer(kLegacyPlainTableMagicNumber, 0); footer.set_metaindex_handle(metaindex_block_handle); footer.set_index_handle(BlockHandle::NullBlockHandle()); std::string footer_encoding; footer.EncodeTo(&footer_encoding); io_status_ = file_->Append(footer_encoding); if (io_status_.ok()) { offset_ += footer_encoding.size(); } status_ = io_status_; return status_; } void PlainTableBuilder::Abandon() { closed_ = true; } uint64_t PlainTableBuilder::NumEntries() const { return properties_.num_entries; } uint64_t PlainTableBuilder::FileSize() const { return offset_; } std::string PlainTableBuilder::GetFileChecksum() const { if (file_ != nullptr) { return file_->GetFileChecksum(); } else { return kUnknownFileChecksum; } } const char* PlainTableBuilder::GetFileChecksumFuncName() const { if (file_ != nullptr) { return file_->GetFileChecksumFuncName(); } else { return kUnknownFileChecksumFuncName; } } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_builder.h000066400000000000000000000122771370372246700216010ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include #include "db/version_edit.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "table/plain/plain_table_bloom.h" #include "table/plain/plain_table_index.h" #include "table/plain/plain_table_key_coding.h" #include "table/table_builder.h" namespace ROCKSDB_NAMESPACE { class BlockBuilder; class BlockHandle; class WritableFile; class TableBuilder; // The builder class of PlainTable. For description of PlainTable format // See comments of class PlainTableFactory, where instances of // PlainTableReader are created. class PlainTableBuilder: public TableBuilder { public: // Create a builder that will store the contents of the table it is // building in *file. Does not close the file. It is up to the // caller to close the file after calling Finish(). The output file // will be part of level specified by 'level'. A value of -1 means // that the caller does not know which level the output file will reside. PlainTableBuilder( const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const std::vector>* int_tbl_prop_collector_factories, uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_size, EncodingType encoding_type, size_t index_sparseness, uint32_t bloom_bits_per_key, const std::string& column_family_name, uint32_t num_probes = 6, size_t huge_page_tlb_size = 0, double hash_table_ratio = 0, bool store_index_in_file = false); // No copying allowed PlainTableBuilder(const PlainTableBuilder&) = delete; void operator=(const PlainTableBuilder&) = delete; // REQUIRES: Either Finish() or Abandon() has been called. ~PlainTableBuilder(); // Add key,value to the table being constructed. // REQUIRES: key is after any previously added key according to comparator. // REQUIRES: Finish(), Abandon() have not been called void Add(const Slice& key, const Slice& value) override; // Return non-ok iff some error has been detected. Status status() const override { return status_; } // Return non-ok iff some error happens during IO. IOStatus io_status() const override { return io_status_; } // Finish building the table. Stops using the file passed to the // constructor after this function returns. // REQUIRES: Finish(), Abandon() have not been called Status Finish() override; // Indicate that the contents of this builder should be abandoned. Stops // using the file passed to the constructor after this function returns. // If the caller is not going to call Finish(), it must call Abandon() // before destroying this builder. // REQUIRES: Finish(), Abandon() have not been called void Abandon() override; // Number of calls to Add() so far. uint64_t NumEntries() const override; // Size of the file generated so far. If invoked after a successful // Finish() call, returns the size of the final generated file. uint64_t FileSize() const override; TableProperties GetTableProperties() const override { return properties_; } bool SaveIndexInFile() const { return store_index_in_file_; } // Get file checksum std::string GetFileChecksum() const override; // Get file checksum function name const char* GetFileChecksumFuncName() const override; private: Arena arena_; const ImmutableCFOptions& ioptions_; const MutableCFOptions& moptions_; std::vector> table_properties_collectors_; BloomBlockBuilder bloom_block_; std::unique_ptr index_builder_; WritableFileWriter* file_; uint64_t offset_ = 0; uint32_t bloom_bits_per_key_; size_t huge_page_tlb_size_; Status status_; IOStatus io_status_; TableProperties properties_; PlainTableKeyEncoder encoder_; bool store_index_in_file_; std::vector keys_or_prefixes_hashes_; bool closed_ = false; // Either Finish() or Abandon() has been called. const SliceTransform* prefix_extractor_; Slice GetPrefix(const Slice& target) const { assert(target.size() >= 8); // target is internal key return GetPrefixFromUserKey(GetUserKey(target)); } Slice GetPrefix(const ParsedInternalKey& target) const { return GetPrefixFromUserKey(target.user_key); } Slice GetUserKey(const Slice& key) const { return Slice(key.data(), key.size() - 8); } Slice GetPrefixFromUserKey(const Slice& user_key) const { if (!IsTotalOrderMode()) { return prefix_extractor_->Transform(user_key); } else { // Use empty slice as prefix if prefix_extractor is not set. // In that case, // it falls back to pure binary search and // total iterator seek is supported. return Slice(); } } bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); } }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_factory.cc000066400000000000000000000261561370372246700217610ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include "table/plain/plain_table_factory.h" #include #include #include "db/dbformat.h" #include "options/options_helper.h" #include "port/port.h" #include "rocksdb/convenience.h" #include "table/plain/plain_table_builder.h" #include "table/plain/plain_table_reader.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { static std::unordered_map plain_table_type_info = { {"user_key_len", {offsetof(struct PlainTableOptions, user_key_len), OptionType::kUInt32T, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"bloom_bits_per_key", {offsetof(struct PlainTableOptions, bloom_bits_per_key), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"hash_table_ratio", {offsetof(struct PlainTableOptions, hash_table_ratio), OptionType::kDouble, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"index_sparseness", {offsetof(struct PlainTableOptions, index_sparseness), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"huge_page_tlb_size", {offsetof(struct PlainTableOptions, huge_page_tlb_size), OptionType::kSizeT, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"encoding_type", {offsetof(struct PlainTableOptions, encoding_type), OptionType::kEncodingType, OptionVerificationType::kByName, OptionTypeFlags::kNone, 0}}, {"full_scan_mode", {offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}, {"store_index_in_file", {offsetof(struct PlainTableOptions, store_index_in_file), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0}}}; Status PlainTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool /*prefetch_index_and_filter_in_cache*/) const { return PlainTableReader::Open( table_reader_options.ioptions, table_reader_options.env_options, table_reader_options.internal_comparator, std::move(file), file_size, table, table_options_.bloom_bits_per_key, table_options_.hash_table_ratio, table_options_.index_sparseness, table_options_.huge_page_tlb_size, table_options_.full_scan_mode, table_reader_options.immortal, table_reader_options.prefix_extractor); } TableBuilder* PlainTableFactory::NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const { // Ignore the skip_filters flag. PlainTable format is optimized for small // in-memory dbs. The skip_filters optimization is not useful for plain // tables // return new PlainTableBuilder( table_builder_options.ioptions, table_builder_options.moptions, table_builder_options.int_tbl_prop_collector_factories, column_family_id, file, table_options_.user_key_len, table_options_.encoding_type, table_options_.index_sparseness, table_options_.bloom_bits_per_key, table_builder_options.column_family_name, 6, table_options_.huge_page_tlb_size, table_options_.hash_table_ratio, table_options_.store_index_in_file); } std::string PlainTableFactory::GetPrintableTableOptions() const { std::string ret; ret.reserve(20000); const int kBufferSize = 200; char buffer[kBufferSize]; snprintf(buffer, kBufferSize, " user_key_len: %u\n", table_options_.user_key_len); ret.append(buffer); snprintf(buffer, kBufferSize, " bloom_bits_per_key: %d\n", table_options_.bloom_bits_per_key); ret.append(buffer); snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n", table_options_.hash_table_ratio); ret.append(buffer); snprintf(buffer, kBufferSize, " index_sparseness: %" ROCKSDB_PRIszt "\n", table_options_.index_sparseness); ret.append(buffer); snprintf(buffer, kBufferSize, " huge_page_tlb_size: %" ROCKSDB_PRIszt "\n", table_options_.huge_page_tlb_size); ret.append(buffer); snprintf(buffer, kBufferSize, " encoding_type: %d\n", table_options_.encoding_type); ret.append(buffer); snprintf(buffer, kBufferSize, " full_scan_mode: %d\n", table_options_.full_scan_mode); ret.append(buffer); snprintf(buffer, kBufferSize, " store_index_in_file: %d\n", table_options_.store_index_in_file); ret.append(buffer); return ret; } const PlainTableOptions& PlainTableFactory::table_options() const { return table_options_; } Status GetPlainTableOptionsFromString(const PlainTableOptions& table_options, const std::string& opts_str, PlainTableOptions* new_table_options) { ConfigOptions config_options; config_options.input_strings_escaped = false; config_options.ignore_unknown_options = false; return GetPlainTableOptionsFromString(config_options, table_options, opts_str, new_table_options); } Status GetPlainTableOptionsFromString(const ConfigOptions& config_options, const PlainTableOptions& table_options, const std::string& opts_str, PlainTableOptions* new_table_options) { std::unordered_map opts_map; Status s = StringToMap(opts_str, &opts_map); if (!s.ok()) { return s; } return GetPlainTableOptionsFromMap(config_options, table_options, opts_map, new_table_options); } Status GetMemTableRepFactoryFromString( const std::string& opts_str, std::unique_ptr* new_mem_factory) { std::vector opts_list = StringSplit(opts_str, ':'); size_t len = opts_list.size(); if (opts_list.empty() || opts_list.size() > 2) { return Status::InvalidArgument("Can't parse memtable_factory option ", opts_str); } MemTableRepFactory* mem_factory = nullptr; if (opts_list[0] == "skip_list") { // Expecting format // skip_list: if (2 == len) { size_t lookahead = ParseSizeT(opts_list[1]); mem_factory = new SkipListFactory(lookahead); } else if (1 == len) { mem_factory = new SkipListFactory(); } } else if (opts_list[0] == "prefix_hash") { // Expecting format // prfix_hash: if (2 == len) { size_t hash_bucket_count = ParseSizeT(opts_list[1]); mem_factory = NewHashSkipListRepFactory(hash_bucket_count); } else if (1 == len) { mem_factory = NewHashSkipListRepFactory(); } } else if (opts_list[0] == "hash_linkedlist") { // Expecting format // hash_linkedlist: if (2 == len) { size_t hash_bucket_count = ParseSizeT(opts_list[1]); mem_factory = NewHashLinkListRepFactory(hash_bucket_count); } else if (1 == len) { mem_factory = NewHashLinkListRepFactory(); } } else if (opts_list[0] == "vector") { // Expecting format // vector: if (2 == len) { size_t count = ParseSizeT(opts_list[1]); mem_factory = new VectorRepFactory(count); } else if (1 == len) { mem_factory = new VectorRepFactory(); } } else if (opts_list[0] == "cuckoo") { return Status::NotSupported( "cuckoo hash memtable is not supported anymore."); } else { return Status::InvalidArgument("Unrecognized memtable_factory option ", opts_str); } if (mem_factory != nullptr) { new_mem_factory->reset(mem_factory); } return Status::OK(); } std::string ParsePlainTableOptions(const ConfigOptions& config_options, const std::string& name, const std::string& org_value, PlainTableOptions* new_options) { const std::string& value = config_options.input_strings_escaped ? UnescapeOptionString(org_value) : org_value; const auto iter = plain_table_type_info.find(name); if (iter == plain_table_type_info.end()) { if (config_options.ignore_unknown_options) { return ""; } else { return "Unrecognized option"; } } const auto& opt_info = iter->second; Status s = opt_info.Parse(config_options, name, value, reinterpret_cast(new_options) + opt_info.offset_); if (s.ok()) { return ""; } else { return s.ToString(); } } Status GetPlainTableOptionsFromMap( const PlainTableOptions& table_options, const std::unordered_map& opts_map, PlainTableOptions* new_table_options, bool input_strings_escaped, bool ignore_unknown_options) { ConfigOptions config_options; config_options.input_strings_escaped = input_strings_escaped; config_options.ignore_unknown_options = ignore_unknown_options; return GetPlainTableOptionsFromMap(config_options, table_options, opts_map, new_table_options); } Status GetPlainTableOptionsFromMap( const ConfigOptions& config_options, const PlainTableOptions& table_options, const std::unordered_map& opts_map, PlainTableOptions* new_table_options) { assert(new_table_options); *new_table_options = table_options; for (const auto& o : opts_map) { auto error_message = ParsePlainTableOptions(config_options, o.first, o.second, new_table_options); if (error_message != "") { const auto iter = plain_table_type_info.find(o.first); if (iter == plain_table_type_info.end() || !config_options .input_strings_escaped || // !input_strings_escaped indicates // the old API, where everything is // parsable. (!iter->second.IsByName() && !iter->second.IsDeprecated())) { // Restore "new_options" to the default "base_options". *new_table_options = table_options; return Status::InvalidArgument("Can't parse PlainTableOptions:", o.first + " " + error_message); } } } return Status::OK(); } extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options) { return new PlainTableFactory(options); } const std::string PlainTablePropertyNames::kEncodingType = "rocksdb.plain.table.encoding.type"; const std::string PlainTablePropertyNames::kBloomVersion = "rocksdb.plain.table.bloom.version"; const std::string PlainTablePropertyNames::kNumBloomBlocks = "rocksdb.plain.table.bloom.numblocks"; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_factory.h000066400000000000000000000173641370372246700216240ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include #include #include #include "options/options_helper.h" #include "rocksdb/options.h" #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { struct EnvOptions; class Status; class RandomAccessFile; class WritableFile; class Table; class TableBuilder; // PlainTableFactory is the entrance function to the PlainTable format of // SST files. It returns instances PlainTableBuilder as the builder // class and PlainTableReader as the reader class, where the format is // actually implemented. // // The PlainTable is designed for memory-mapped file systems, e.g. tmpfs. // Data is not organized in blocks, which allows fast access. Because of // following downsides // 1. Data compression is not supported. // 2. Data is not checksumed. // it is not recommended to use this format on other type of file systems. // // PlainTable requires fixed length key, configured as a constructor // parameter of the factory class. Output file format: // +-------------+-----------------+ // | version | user_key_length | // +------------++------------+-----------------+ <= key1 offset // | encoded key1 | value_size | | // +------------+-------------+-------------+ | // | value1 | // | | // +--------------------------+-------------+---+ <= key2 offset // | encoded key2 | value_size | | // +------------+-------------+-------------+ | // | value2 | // | | // | ...... | // +-----------------+--------------------------+ // // When the key encoding type is kPlain. Key part is encoded as: // +------------+--------------------+ // | [key_size] | internal key | // +------------+--------------------+ // for the case of user_key_len = kPlainTableVariableLength case, // and simply: // +----------------------+ // | internal key | // +----------------------+ // for user_key_len != kPlainTableVariableLength case. // // If key encoding type is kPrefix. Keys are encoding in this format. // There are three ways to encode a key: // (1) Full Key // +---------------+---------------+-------------------+ // | Full Key Flag | Full Key Size | Full Internal Key | // +---------------+---------------+-------------------+ // which simply encodes a full key // // (2) A key shared the same prefix as the previous key, which is encoded as // format of (1). // +-------------+-------------+-------------+-------------+------------+ // | Prefix Flag | Prefix Size | Suffix Flag | Suffix Size | Key Suffix | // +-------------+-------------+-------------+-------------+------------+ // where key is the suffix part of the key, including the internal bytes. // the actual key will be constructed by concatenating prefix part of the // previous key, with the suffix part of the key here, with sizes given here. // // (3) A key shared the same prefix as the previous key, which is encoded as // the format of (2). // +-----------------+-----------------+------------------------+ // | Key Suffix Flag | Key Suffix Size | Suffix of Internal Key | // +-----------------+-----------------+------------------------+ // The key will be constructed by concatenating previous key's prefix (which is // also a prefix which the last key encoded in the format of (1)) and the // key given here. // // For example, we for following keys (prefix and suffix are separated by // spaces): // 0000 0001 // 0000 00021 // 0000 0002 // 00011 00 // 0002 0001 // Will be encoded like this: // FK 8 00000001 // PF 4 SF 5 00021 // SF 4 0002 // FK 7 0001100 // FK 8 00020001 // (where FK means full key flag, PF means prefix flag and SF means suffix flag) // // All those "key flag + key size" shown above are in this format: // The 8 bits of the first byte: // +----+----+----+----+----+----+----+----+ // | Type | Size | // +----+----+----+----+----+----+----+----+ // Type indicates: full key, prefix, or suffix. // The last 6 bits are for size. If the size bits are not all 1, it means the // size of the key. Otherwise, varint32 is read after this byte. This varint // value + 0x3F (the value of all 1) will be the key size. // // For example, full key with length 16 will be encoded as (binary): // 00 010000 // (00 means full key) // and a prefix with 100 bytes will be encoded as: // 01 111111 00100101 // (63) (37) // (01 means key suffix) // // All the internal keys above (including kPlain and kPrefix) are encoded in // this format: // There are two types: // (1) normal internal key format // +----------- ...... -------------+----+---+---+---+---+---+---+---+ // | user key |type| sequence ID | // +----------- ..... --------------+----+---+---+---+---+---+---+---+ // (2) Special case for keys whose sequence ID is 0 and is value type // +----------- ...... -------------+----+ // | user key |0x80| // +----------- ..... --------------+----+ // To save 7 bytes for the special case where sequence ID = 0. // // class PlainTableFactory : public TableFactory { public: ~PlainTableFactory() {} // user_key_len is the length of the user key. If it is set to be // kPlainTableVariableLength, then it means variable length. Otherwise, all // the keys need to have the fix length of this value. bloom_bits_per_key is // number of bits used for bloom filer per key. hash_table_ratio is // the desired utilization of the hash table used for prefix hashing. // hash_table_ratio = number of prefixes / #buckets in the hash table // hash_table_ratio = 0 means skip hash table but only replying on binary // search. // index_sparseness determines index interval for keys // inside the same prefix. It will be the maximum number of linear search // required after hash and binary search. // index_sparseness = 0 means index for every key. // huge_page_tlb_size determines whether to allocate hash indexes from huge // page TLB and the page size if allocating from there. See comments of // Arena::AllocateAligned() for details. explicit PlainTableFactory( const PlainTableOptions& _table_options = PlainTableOptions()) : table_options_(_table_options) {} const char* Name() const override { return "PlainTable"; } Status NewTableReader(const TableReaderOptions& table_reader_options, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool prefetch_index_and_filter_in_cache) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, uint32_t column_family_id, WritableFileWriter* file) const override; std::string GetPrintableTableOptions() const override; const PlainTableOptions& table_options() const; static const char kValueTypeSeqId0 = char(~0); // Sanitizes the specified DB Options. Status SanitizeOptions( const DBOptions& /*db_opts*/, const ColumnFamilyOptions& /*cf_opts*/) const override { return Status::OK(); } void* GetOptions() override { return &table_options_; } Status GetOptionString(const ConfigOptions& /*config_options*/, std::string* /*opt_string*/) const override { return Status::OK(); } private: PlainTableOptions table_options_; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_index.cc000066400000000000000000000164471370372246700214230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include "table/plain/plain_table_index.h" #include "util/coding.h" #include "util/hash.h" namespace ROCKSDB_NAMESPACE { namespace { inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) { assert(num_buckets > 0); return hash % num_buckets; } } Status PlainTableIndex::InitFromRawData(Slice data) { if (!GetVarint32(&data, &index_size_)) { return Status::Corruption("Couldn't read the index size!"); } assert(index_size_ > 0); if (!GetVarint32(&data, &num_prefixes_)) { return Status::Corruption("Couldn't read the index size!"); } sub_index_size_ = static_cast(data.size()) - index_size_ * kOffsetLen; char* index_data_begin = const_cast(data.data()); index_ = reinterpret_cast(index_data_begin); sub_index_ = reinterpret_cast(index_ + index_size_); return Status::OK(); } PlainTableIndex::IndexSearchResult PlainTableIndex::GetOffset( uint32_t prefix_hash, uint32_t* bucket_value) const { int bucket = GetBucketIdFromHash(prefix_hash, index_size_); GetUnaligned(index_ + bucket, bucket_value); if ((*bucket_value & kSubIndexMask) == kSubIndexMask) { *bucket_value ^= kSubIndexMask; return kSubindex; } if (*bucket_value >= kMaxFileSize) { return kNoPrefixForBucket; } else { // point directly to the file return kDirectToFile; } } void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash, uint32_t offset) { if (num_records_in_current_group_ == kNumRecordsPerGroup) { current_group_ = AllocateNewGroup(); num_records_in_current_group_ = 0; } auto& new_record = current_group_[num_records_in_current_group_++]; new_record.hash = hash; new_record.offset = offset; new_record.next = nullptr; } void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice, uint32_t key_offset) { if (is_first_record_ || prev_key_prefix_ != key_prefix_slice.ToString()) { ++num_prefixes_; if (!is_first_record_) { keys_per_prefix_hist_.Add(num_keys_per_prefix_); } num_keys_per_prefix_ = 0; prev_key_prefix_ = key_prefix_slice.ToString(); prev_key_prefix_hash_ = GetSliceHash(key_prefix_slice); due_index_ = true; } if (due_index_) { // Add an index key for every kIndexIntervalForSamePrefixKeys keys record_list_.AddRecord(prev_key_prefix_hash_, key_offset); due_index_ = false; } num_keys_per_prefix_++; if (index_sparseness_ == 0 || num_keys_per_prefix_ % index_sparseness_ == 0) { due_index_ = true; } is_first_record_ = false; } Slice PlainTableIndexBuilder::Finish() { AllocateIndex(); std::vector hash_to_offsets(index_size_, nullptr); std::vector entries_per_bucket(index_size_, 0); BucketizeIndexes(&hash_to_offsets, &entries_per_bucket); keys_per_prefix_hist_.Add(num_keys_per_prefix_); ROCKS_LOG_INFO(ioptions_.info_log, "Number of Keys per prefix Histogram: %s", keys_per_prefix_hist_.ToString().c_str()); // From the temp data structure, populate indexes. return FillIndexes(hash_to_offsets, entries_per_bucket); } void PlainTableIndexBuilder::AllocateIndex() { if (prefix_extractor_ == nullptr || hash_table_ratio_ <= 0) { // Fall back to pure binary search if the user fails to specify a prefix // extractor. index_size_ = 1; } else { double hash_table_size_multipier = 1.0 / hash_table_ratio_; index_size_ = static_cast(num_prefixes_ * hash_table_size_multipier) + 1; assert(index_size_ > 0); } } void PlainTableIndexBuilder::BucketizeIndexes( std::vector* hash_to_offsets, std::vector* entries_per_bucket) { bool first = true; uint32_t prev_hash = 0; size_t num_records = record_list_.GetNumRecords(); for (size_t i = 0; i < num_records; i++) { IndexRecord* index_record = record_list_.At(i); uint32_t cur_hash = index_record->hash; if (first || prev_hash != cur_hash) { prev_hash = cur_hash; first = false; } uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_); IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket]; index_record->next = prev_bucket_head; (*hash_to_offsets)[bucket] = index_record; (*entries_per_bucket)[bucket]++; } sub_index_size_ = 0; for (auto entry_count : *entries_per_bucket) { if (entry_count <= 1) { continue; } // Only buckets with more than 1 entry will have subindex. sub_index_size_ += VarintLength(entry_count); // total bytes needed to store these entries' in-file offsets. sub_index_size_ += entry_count * PlainTableIndex::kOffsetLen; } } Slice PlainTableIndexBuilder::FillIndexes( const std::vector& hash_to_offsets, const std::vector& entries_per_bucket) { ROCKS_LOG_DEBUG(ioptions_.info_log, "Reserving %" PRIu32 " bytes for plain table's sub_index", sub_index_size_); auto total_allocate_size = GetTotalSize(); char* allocated = arena_->AllocateAligned( total_allocate_size, huge_page_tlb_size_, ioptions_.info_log); auto temp_ptr = EncodeVarint32(allocated, index_size_); uint32_t* index = reinterpret_cast(EncodeVarint32(temp_ptr, num_prefixes_)); char* sub_index = reinterpret_cast(index + index_size_); uint32_t sub_index_offset = 0; for (uint32_t i = 0; i < index_size_; i++) { uint32_t num_keys_for_bucket = entries_per_bucket[i]; switch (num_keys_for_bucket) { case 0: // No key for bucket PutUnaligned(index + i, (uint32_t)PlainTableIndex::kMaxFileSize); break; case 1: // point directly to the file offset PutUnaligned(index + i, hash_to_offsets[i]->offset); break; default: // point to second level indexes. PutUnaligned(index + i, sub_index_offset | PlainTableIndex::kSubIndexMask); char* prev_ptr = &sub_index[sub_index_offset]; char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket); sub_index_offset += static_cast(cur_ptr - prev_ptr); char* sub_index_pos = &sub_index[sub_index_offset]; IndexRecord* record = hash_to_offsets[i]; int j; for (j = num_keys_for_bucket - 1; j >= 0 && record; j--, record = record->next) { EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset); } assert(j == -1 && record == nullptr); sub_index_offset += PlainTableIndex::kOffsetLen * num_keys_for_bucket; assert(sub_index_offset <= sub_index_size_); break; } } assert(sub_index_offset == sub_index_size_); ROCKS_LOG_DEBUG(ioptions_.info_log, "hash table size: %" PRIu32 ", suffix_map length %" PRIu32, index_size_, sub_index_size_); return Slice(allocated, GetTotalSize()); } const std::string PlainTableIndexBuilder::kPlainTableIndexBlock = "PlainTableIndexBlock"; }; // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_index.h000066400000000000000000000203271370372246700212550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include #include "db/dbformat.h" #include "memory/arena.h" #include "monitoring/histogram.h" #include "options/cf_options.h" #include "rocksdb/options.h" namespace ROCKSDB_NAMESPACE { // The file contains two classes PlainTableIndex and PlainTableIndexBuilder // The two classes implement the index format of PlainTable. // For descripton of PlainTable format, see comments of class // PlainTableFactory // // // PlainTableIndex contains buckets size of index_size_, each is a // 32-bit integer. The lower 31 bits contain an offset value (explained below) // and the first bit of the integer indicates type of the offset. // // +--------------+------------------------------------------------------+ // | Flag (1 bit) | Offset to binary search buffer or file (31 bits) + // +--------------+------------------------------------------------------+ // // Explanation for the "flag bit": // // 0 indicates that the bucket contains only one prefix (no conflict when // hashing this prefix), whose first row starts from this offset of the // file. // 1 indicates that the bucket contains more than one prefixes, or there // are too many rows for one prefix so we need a binary search for it. In // this case, the offset indicates the offset of sub_index_ holding the // binary search indexes of keys for those rows. Those binary search indexes // are organized in this way: // // The first 4 bytes, indicate how many indexes (N) are stored after it. After // it, there are N 32-bit integers, each points of an offset of the file, // which // points to starting of a row. Those offsets need to be guaranteed to be in // ascending order so the keys they are pointing to are also in ascending // order // to make sure we can use them to do binary searches. Below is visual // presentation of a bucket. // // // number_of_records: varint32 // record 1 file offset: fixedint32 // record 2 file offset: fixedint32 // .... // record N file offset: fixedint32 // // The class loads the index block from a PlainTable SST file, and executes // the index lookup. // The class is used by PlainTableReader class. class PlainTableIndex { public: enum IndexSearchResult { kNoPrefixForBucket = 0, kDirectToFile = 1, kSubindex = 2 }; explicit PlainTableIndex(Slice data) { InitFromRawData(data); } PlainTableIndex() : index_size_(0), sub_index_size_(0), num_prefixes_(0), index_(nullptr), sub_index_(nullptr) {} // The function that executes the lookup the hash table. // The hash key is `prefix_hash`. The function fills the hash bucket // content in `bucket_value`, which is up to the caller to interpret. IndexSearchResult GetOffset(uint32_t prefix_hash, uint32_t* bucket_value) const; // Initialize data from `index_data`, which points to raw data for // index stored in the SST file. Status InitFromRawData(Slice index_data); // Decode the sub index for specific hash bucket. // The `offset` is the value returned as `bucket_value` by GetOffset() // and is only valid when the return value is `kSubindex`. // The return value is the pointer to the starting address of the // sub-index. `upper_bound` is filled with the value indicating how many // entries the sub-index has. const char* GetSubIndexBasePtrAndUpperBound(uint32_t offset, uint32_t* upper_bound) const { const char* index_ptr = &sub_index_[offset]; return GetVarint32Ptr(index_ptr, index_ptr + 4, upper_bound); } uint32_t GetIndexSize() const { return index_size_; } uint32_t GetSubIndexSize() const { return sub_index_size_; } uint32_t GetNumPrefixes() const { return num_prefixes_; } static const uint64_t kMaxFileSize = (1u << 31) - 1; static const uint32_t kSubIndexMask = 0x80000000; static const size_t kOffsetLen = sizeof(uint32_t); private: uint32_t index_size_; uint32_t sub_index_size_; uint32_t num_prefixes_; uint32_t* index_; char* sub_index_; }; // PlainTableIndexBuilder is used to create plain table index. // After calling Finish(), it returns Slice, which is usually // used either to initialize PlainTableIndex or // to save index to sst file. // For more details about the index, please refer to: // https://github.com/facebook/rocksdb/wiki/PlainTable-Format // #wiki-in-memory-index-format // The class is used by PlainTableBuilder class. class PlainTableIndexBuilder { public: PlainTableIndexBuilder(Arena* arena, const ImmutableCFOptions& ioptions, const SliceTransform* prefix_extractor, size_t index_sparseness, double hash_table_ratio, size_t huge_page_tlb_size) : arena_(arena), ioptions_(ioptions), record_list_(kRecordsPerGroup), is_first_record_(true), due_index_(false), num_prefixes_(0), num_keys_per_prefix_(0), prev_key_prefix_hash_(0), index_sparseness_(index_sparseness), index_size_(0), sub_index_size_(0), prefix_extractor_(prefix_extractor), hash_table_ratio_(hash_table_ratio), huge_page_tlb_size_(huge_page_tlb_size) {} void AddKeyPrefix(Slice key_prefix_slice, uint32_t key_offset); Slice Finish(); uint32_t GetTotalSize() const { return VarintLength(index_size_) + VarintLength(num_prefixes_) + PlainTableIndex::kOffsetLen * index_size_ + sub_index_size_; } static const std::string kPlainTableIndexBlock; private: struct IndexRecord { uint32_t hash; // hash of the prefix uint32_t offset; // offset of a row IndexRecord* next; }; // Helper class to track all the index records class IndexRecordList { public: explicit IndexRecordList(size_t num_records_per_group) : kNumRecordsPerGroup(num_records_per_group), current_group_(nullptr), num_records_in_current_group_(num_records_per_group) {} ~IndexRecordList() { for (size_t i = 0; i < groups_.size(); i++) { delete[] groups_[i]; } } void AddRecord(uint32_t hash, uint32_t offset); size_t GetNumRecords() const { return (groups_.size() - 1) * kNumRecordsPerGroup + num_records_in_current_group_; } IndexRecord* At(size_t index) { return &(groups_[index / kNumRecordsPerGroup] [index % kNumRecordsPerGroup]); } private: IndexRecord* AllocateNewGroup() { IndexRecord* result = new IndexRecord[kNumRecordsPerGroup]; groups_.push_back(result); return result; } // Each group in `groups_` contains fix-sized records (determined by // kNumRecordsPerGroup). Which can help us minimize the cost if resizing // occurs. const size_t kNumRecordsPerGroup; IndexRecord* current_group_; // List of arrays allocated std::vector groups_; size_t num_records_in_current_group_; }; void AllocateIndex(); // Internal helper function to bucket index record list to hash buckets. void BucketizeIndexes(std::vector* hash_to_offsets, std::vector* entries_per_bucket); // Internal helper class to fill the indexes and bloom filters to internal // data structures. Slice FillIndexes(const std::vector& hash_to_offsets, const std::vector& entries_per_bucket); Arena* arena_; const ImmutableCFOptions ioptions_; HistogramImpl keys_per_prefix_hist_; IndexRecordList record_list_; bool is_first_record_; bool due_index_; uint32_t num_prefixes_; uint32_t num_keys_per_prefix_; uint32_t prev_key_prefix_hash_; size_t index_sparseness_; uint32_t index_size_; uint32_t sub_index_size_; const SliceTransform* prefix_extractor_; double hash_table_ratio_; size_t huge_page_tlb_size_; std::string prev_key_prefix_; static const size_t kRecordsPerGroup = 256; }; }; // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_key_coding.cc000066400000000000000000000430531370372246700224200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "table/plain/plain_table_key_coding.h" #include #include #include "db/dbformat.h" #include "file/writable_file_writer.h" #include "table/plain/plain_table_factory.h" #include "table/plain/plain_table_reader.h" namespace ROCKSDB_NAMESPACE { enum PlainTableEntryType : unsigned char { kFullKey = 0, kPrefixFromPreviousKey = 1, kKeySuffix = 2, }; namespace { // Control byte: // First two bits indicate type of entry // Other bytes are inlined sizes. If all bits are 1 (0x03F), overflow bytes // are used. key_size-0x3F will be encoded as a variint32 after this bytes. const unsigned char kSizeInlineLimit = 0x3F; // Return 0 for error size_t EncodeSize(PlainTableEntryType type, uint32_t key_size, char* out_buffer) { out_buffer[0] = type << 6; if (key_size < static_cast(kSizeInlineLimit)) { // size inlined out_buffer[0] |= static_cast(key_size); return 1; } else { out_buffer[0] |= kSizeInlineLimit; char* ptr = EncodeVarint32(out_buffer + 1, key_size - kSizeInlineLimit); return ptr - out_buffer; } } } // namespace // Fill bytes_read with number of bytes read. inline Status PlainTableKeyDecoder::DecodeSize(uint32_t start_offset, PlainTableEntryType* entry_type, uint32_t* key_size, uint32_t* bytes_read) { Slice next_byte_slice; bool success = file_reader_.Read(start_offset, 1, &next_byte_slice); if (!success) { return file_reader_.status(); } *entry_type = static_cast( (static_cast(next_byte_slice[0]) & ~kSizeInlineLimit) >> 6); char inline_key_size = next_byte_slice[0] & kSizeInlineLimit; if (inline_key_size < kSizeInlineLimit) { *key_size = inline_key_size; *bytes_read = 1; return Status::OK(); } else { uint32_t extra_size; uint32_t tmp_bytes_read; success = file_reader_.ReadVarint32(start_offset + 1, &extra_size, &tmp_bytes_read); if (!success) { return file_reader_.status(); } assert(tmp_bytes_read > 0); *key_size = kSizeInlineLimit + extra_size; *bytes_read = tmp_bytes_read + 1; return Status::OK(); } } IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key, WritableFileWriter* file, uint64_t* offset, char* meta_bytes_buf, size_t* meta_bytes_buf_size) { ParsedInternalKey parsed_key; if (!ParseInternalKey(key, &parsed_key)) { return IOStatus::Corruption(Slice()); } Slice key_to_write = key; // Portion of internal key to write out. uint32_t user_key_size = static_cast(key.size() - 8); if (encoding_type_ == kPlain) { if (fixed_user_key_len_ == kPlainTableVariableLength) { // Write key length char key_size_buf[5]; // tmp buffer for key size as varint32 char* ptr = EncodeVarint32(key_size_buf, user_key_size); assert(ptr <= key_size_buf + sizeof(key_size_buf)); auto len = ptr - key_size_buf; IOStatus io_s = file->Append(Slice(key_size_buf, len)); if (!io_s.ok()) { return io_s; } *offset += len; } } else { assert(encoding_type_ == kPrefix); char size_bytes[12]; size_t size_bytes_pos = 0; Slice prefix = prefix_extractor_->Transform(Slice(key.data(), user_key_size)); if (key_count_for_prefix_ == 0 || prefix != pre_prefix_.GetUserKey() || key_count_for_prefix_ % index_sparseness_ == 0) { key_count_for_prefix_ = 1; pre_prefix_.SetUserKey(prefix); size_bytes_pos += EncodeSize(kFullKey, user_key_size, size_bytes); IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos)); if (!io_s.ok()) { return io_s; } *offset += size_bytes_pos; } else { key_count_for_prefix_++; if (key_count_for_prefix_ == 2) { // For second key within a prefix, need to encode prefix length size_bytes_pos += EncodeSize(kPrefixFromPreviousKey, static_cast(pre_prefix_.GetUserKey().size()), size_bytes + size_bytes_pos); } uint32_t prefix_len = static_cast(pre_prefix_.GetUserKey().size()); size_bytes_pos += EncodeSize(kKeySuffix, user_key_size - prefix_len, size_bytes + size_bytes_pos); IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos)); if (!io_s.ok()) { return io_s; } *offset += size_bytes_pos; key_to_write = Slice(key.data() + prefix_len, key.size() - prefix_len); } } // Encode full key // For value size as varint32 (up to 5 bytes). // If the row is of value type with seqId 0, flush the special flag together // in this buffer to safe one file append call, which takes 1 byte. if (parsed_key.sequence == 0 && parsed_key.type == kTypeValue) { IOStatus io_s = file->Append(Slice(key_to_write.data(), key_to_write.size() - 8)); if (!io_s.ok()) { return io_s; } *offset += key_to_write.size() - 8; meta_bytes_buf[*meta_bytes_buf_size] = PlainTableFactory::kValueTypeSeqId0; *meta_bytes_buf_size += 1; } else { IOStatus io_s = file->Append(key_to_write); if (!io_s.ok()) { return io_s; } *offset += key_to_write.size(); } return IOStatus::OK(); } Slice PlainTableFileReader::GetFromBuffer(Buffer* buffer, uint32_t file_offset, uint32_t len) { assert(file_offset + len <= file_info_->data_end_offset); return Slice(buffer->buf.get() + (file_offset - buffer->buf_start_offset), len); } bool PlainTableFileReader::ReadNonMmap(uint32_t file_offset, uint32_t len, Slice* out) { const uint32_t kPrefetchSize = 256u; // Try to read from buffers. for (uint32_t i = 0; i < num_buf_; i++) { Buffer* buffer = buffers_[num_buf_ - 1 - i].get(); if (file_offset >= buffer->buf_start_offset && file_offset + len <= buffer->buf_start_offset + buffer->buf_len) { *out = GetFromBuffer(buffer, file_offset, len); return true; } } Buffer* new_buffer; // Data needed is not in any of the buffer. Allocate a new buffer. if (num_buf_ < buffers_.size()) { // Add a new buffer new_buffer = new Buffer(); buffers_[num_buf_++].reset(new_buffer); } else { // Now simply replace the last buffer. Can improve the placement policy // if needed. new_buffer = buffers_[num_buf_ - 1].get(); } assert(file_offset + len <= file_info_->data_end_offset); uint32_t size_to_read = std::min(file_info_->data_end_offset - file_offset, std::max(kPrefetchSize, len)); if (size_to_read > new_buffer->buf_capacity) { new_buffer->buf.reset(new char[size_to_read]); new_buffer->buf_capacity = size_to_read; new_buffer->buf_len = 0; } Slice read_result; Status s = file_info_->file->Read(IOOptions(), file_offset, size_to_read, &read_result, new_buffer->buf.get(), nullptr); if (!s.ok()) { status_ = s; return false; } new_buffer->buf_start_offset = file_offset; new_buffer->buf_len = size_to_read; *out = GetFromBuffer(new_buffer, file_offset, len); return true; } inline bool PlainTableFileReader::ReadVarint32(uint32_t offset, uint32_t* out, uint32_t* bytes_read) { if (file_info_->is_mmap_mode) { const char* start = file_info_->file_data.data() + offset; const char* limit = file_info_->file_data.data() + file_info_->data_end_offset; const char* key_ptr = GetVarint32Ptr(start, limit, out); assert(key_ptr != nullptr); *bytes_read = static_cast(key_ptr - start); return true; } else { return ReadVarint32NonMmap(offset, out, bytes_read); } } bool PlainTableFileReader::ReadVarint32NonMmap(uint32_t offset, uint32_t* out, uint32_t* bytes_read) { const char* start; const char* limit; const uint32_t kMaxVarInt32Size = 6u; uint32_t bytes_to_read = std::min(file_info_->data_end_offset - offset, kMaxVarInt32Size); Slice bytes; if (!Read(offset, bytes_to_read, &bytes)) { return false; } start = bytes.data(); limit = bytes.data() + bytes.size(); const char* key_ptr = GetVarint32Ptr(start, limit, out); *bytes_read = (key_ptr != nullptr) ? static_cast(key_ptr - start) : 0; return true; } Status PlainTableKeyDecoder::ReadInternalKey( uint32_t file_offset, uint32_t user_key_size, ParsedInternalKey* parsed_key, uint32_t* bytes_read, bool* internal_key_valid, Slice* internal_key) { Slice tmp_slice; bool success = file_reader_.Read(file_offset, user_key_size + 1, &tmp_slice); if (!success) { return file_reader_.status(); } if (tmp_slice[user_key_size] == PlainTableFactory::kValueTypeSeqId0) { // Special encoding for the row with seqID=0 parsed_key->user_key = Slice(tmp_slice.data(), user_key_size); parsed_key->sequence = 0; parsed_key->type = kTypeValue; *bytes_read += user_key_size + 1; *internal_key_valid = false; } else { success = file_reader_.Read(file_offset, user_key_size + 8, internal_key); if (!success) { return file_reader_.status(); } *internal_key_valid = true; if (!ParseInternalKey(*internal_key, parsed_key)) { return Status::Corruption( Slice("Incorrect value type found when reading the next key")); } *bytes_read += user_key_size + 8; } return Status::OK(); } Status PlainTableKeyDecoder::NextPlainEncodingKey(uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, uint32_t* bytes_read, bool* /*seekable*/) { uint32_t user_key_size = 0; Status s; if (fixed_user_key_len_ != kPlainTableVariableLength) { user_key_size = fixed_user_key_len_; } else { uint32_t tmp_size = 0; uint32_t tmp_read; bool success = file_reader_.ReadVarint32(start_offset, &tmp_size, &tmp_read); if (!success) { return file_reader_.status(); } assert(tmp_read > 0); user_key_size = tmp_size; *bytes_read = tmp_read; } // dummy initial value to avoid compiler complain bool decoded_internal_key_valid = true; Slice decoded_internal_key; s = ReadInternalKey(start_offset + *bytes_read, user_key_size, parsed_key, bytes_read, &decoded_internal_key_valid, &decoded_internal_key); if (!s.ok()) { return s; } if (!file_reader_.file_info()->is_mmap_mode) { cur_key_.SetInternalKey(*parsed_key); parsed_key->user_key = Slice(cur_key_.GetInternalKey().data(), user_key_size); if (internal_key != nullptr) { *internal_key = cur_key_.GetInternalKey(); } } else if (internal_key != nullptr) { if (decoded_internal_key_valid) { *internal_key = decoded_internal_key; } else { // Need to copy out the internal key cur_key_.SetInternalKey(*parsed_key); *internal_key = cur_key_.GetInternalKey(); } } return Status::OK(); } Status PlainTableKeyDecoder::NextPrefixEncodingKey( uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, uint32_t* bytes_read, bool* seekable) { PlainTableEntryType entry_type; bool expect_suffix = false; Status s; do { uint32_t size = 0; // dummy initial value to avoid compiler complain bool decoded_internal_key_valid = true; uint32_t my_bytes_read = 0; s = DecodeSize(start_offset + *bytes_read, &entry_type, &size, &my_bytes_read); if (!s.ok()) { return s; } if (my_bytes_read == 0) { return Status::Corruption("Unexpected EOF when reading size of the key"); } *bytes_read += my_bytes_read; switch (entry_type) { case kFullKey: { expect_suffix = false; Slice decoded_internal_key; s = ReadInternalKey(start_offset + *bytes_read, size, parsed_key, bytes_read, &decoded_internal_key_valid, &decoded_internal_key); if (!s.ok()) { return s; } if (!file_reader_.file_info()->is_mmap_mode || (internal_key != nullptr && !decoded_internal_key_valid)) { // In non-mmap mode, always need to make a copy of keys returned to // users, because after reading value for the key, the key might // be invalid. cur_key_.SetInternalKey(*parsed_key); saved_user_key_ = cur_key_.GetUserKey(); if (!file_reader_.file_info()->is_mmap_mode) { parsed_key->user_key = Slice(cur_key_.GetInternalKey().data(), size); } if (internal_key != nullptr) { *internal_key = cur_key_.GetInternalKey(); } } else { if (internal_key != nullptr) { *internal_key = decoded_internal_key; } saved_user_key_ = parsed_key->user_key; } break; } case kPrefixFromPreviousKey: { if (seekable != nullptr) { *seekable = false; } prefix_len_ = size; assert(prefix_extractor_ == nullptr || prefix_extractor_->Transform(saved_user_key_).size() == prefix_len_); // Need read another size flag for suffix expect_suffix = true; break; } case kKeySuffix: { expect_suffix = false; if (seekable != nullptr) { *seekable = false; } Slice tmp_slice; s = ReadInternalKey(start_offset + *bytes_read, size, parsed_key, bytes_read, &decoded_internal_key_valid, &tmp_slice); if (!s.ok()) { return s; } if (!file_reader_.file_info()->is_mmap_mode) { // In non-mmap mode, we need to make a copy of keys returned to // users, because after reading value for the key, the key might // be invalid. // saved_user_key_ points to cur_key_. We are making a copy of // the prefix part to another string, and construct the current // key from the prefix part and the suffix part back to cur_key_. std::string tmp = Slice(saved_user_key_.data(), prefix_len_).ToString(); cur_key_.Reserve(prefix_len_ + size); cur_key_.SetInternalKey(tmp, *parsed_key); parsed_key->user_key = Slice(cur_key_.GetInternalKey().data(), prefix_len_ + size); saved_user_key_ = cur_key_.GetUserKey(); } else { cur_key_.Reserve(prefix_len_ + size); cur_key_.SetInternalKey(Slice(saved_user_key_.data(), prefix_len_), *parsed_key); } parsed_key->user_key = cur_key_.GetUserKey(); if (internal_key != nullptr) { *internal_key = cur_key_.GetInternalKey(); } break; } default: return Status::Corruption("Un-identified size flag."); } } while (expect_suffix); // Another round if suffix is expected. return Status::OK(); } Status PlainTableKeyDecoder::NextKey(uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, Slice* value, uint32_t* bytes_read, bool* seekable) { assert(value != nullptr); Status s = NextKeyNoValue(start_offset, parsed_key, internal_key, bytes_read, seekable); if (s.ok()) { assert(bytes_read != nullptr); uint32_t value_size; uint32_t value_size_bytes; bool success = file_reader_.ReadVarint32(start_offset + *bytes_read, &value_size, &value_size_bytes); if (!success) { return file_reader_.status(); } if (value_size_bytes == 0) { return Status::Corruption( "Unexpected EOF when reading the next value's size."); } *bytes_read += value_size_bytes; success = file_reader_.Read(start_offset + *bytes_read, value_size, value); if (!success) { return file_reader_.status(); } *bytes_read += value_size; } return s; } Status PlainTableKeyDecoder::NextKeyNoValue(uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, uint32_t* bytes_read, bool* seekable) { *bytes_read = 0; if (seekable != nullptr) { *seekable = true; } Status s; if (encoding_type_ == kPlain) { return NextPlainEncodingKey(start_offset, parsed_key, internal_key, bytes_read, seekable); } else { assert(encoding_type_ == kPrefix); return NextPrefixEncodingKey(start_offset, parsed_key, internal_key, bytes_read, seekable); } } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LIT rocksdb-6.11.4/table/plain/plain_table_key_coding.h000066400000000000000000000176431370372246700222700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include #include "db/dbformat.h" #include "rocksdb/slice.h" #include "table/plain/plain_table_reader.h" // The file contains three helper classes of PlainTable format, // PlainTableKeyEncoder, PlainTableKeyDecoder and PlainTableFileReader. // These classes issue the lowest level of operations of PlainTable. // Actual data format of the key is documented in comments of class // PlainTableFactory. namespace ROCKSDB_NAMESPACE { class WritableFile; struct ParsedInternalKey; struct PlainTableReaderFileInfo; enum PlainTableEntryType : unsigned char; // Helper class for PlainTable format to write out a key to an output file // The class is used in PlainTableBuilder. class PlainTableKeyEncoder { public: explicit PlainTableKeyEncoder(EncodingType encoding_type, uint32_t user_key_len, const SliceTransform* prefix_extractor, size_t index_sparseness) : encoding_type_((prefix_extractor != nullptr) ? encoding_type : kPlain), fixed_user_key_len_(user_key_len), prefix_extractor_(prefix_extractor), index_sparseness_((index_sparseness > 1) ? index_sparseness : 1), key_count_for_prefix_(0) {} // key: the key to write out, in the format of internal key. // file: the output file to write out // offset: offset in the file. Needs to be updated after appending bytes // for the key // meta_bytes_buf: buffer for extra meta bytes // meta_bytes_buf_size: offset to append extra meta bytes. Will be updated // if meta_bytes_buf is updated. IOStatus AppendKey(const Slice& key, WritableFileWriter* file, uint64_t* offset, char* meta_bytes_buf, size_t* meta_bytes_buf_size); // Return actual encoding type to be picked EncodingType GetEncodingType() { return encoding_type_; } private: EncodingType encoding_type_; uint32_t fixed_user_key_len_; const SliceTransform* prefix_extractor_; const size_t index_sparseness_; size_t key_count_for_prefix_; IterKey pre_prefix_; }; // The class does raw file reads for PlainTableReader. // It hides whether it is a mmap-read, or a non-mmap read. // The class is implemented in a way to favor the performance of mmap case. // The class is used by PlainTableReader. class PlainTableFileReader { public: explicit PlainTableFileReader(const PlainTableReaderFileInfo* _file_info) : file_info_(_file_info), num_buf_(0) {} // In mmaped mode, the results point to mmaped area of the file, which // means it is always valid before closing the file. // In non-mmap mode, the results point to an internal buffer. If the caller // makes another read call, the results may not be valid. So callers should // make a copy when needed. // In order to save read calls to files, we keep two internal buffers: // the first read and the most recent read. This is efficient because it // columns these two common use cases: // (1) hash index only identify one location, we read the key to verify // the location, and read key and value if it is the right location. // (2) after hash index checking, we identify two locations (because of // hash bucket conflicts), we binary search the two location to see // which one is what we need and start to read from the location. // These two most common use cases will be covered by the two buffers // so that we don't need to re-read the same location. // Currently we keep a fixed size buffer. If a read doesn't exactly fit // the buffer, we replace the second buffer with the location user reads. // // If return false, status code is stored in status_. bool Read(uint32_t file_offset, uint32_t len, Slice* out) { if (file_info_->is_mmap_mode) { assert(file_offset + len <= file_info_->data_end_offset); *out = Slice(file_info_->file_data.data() + file_offset, len); return true; } else { return ReadNonMmap(file_offset, len, out); } } // If return false, status code is stored in status_. bool ReadNonMmap(uint32_t file_offset, uint32_t len, Slice* output); // *bytes_read = 0 means eof. false means failure and status is saved // in status_. Not directly returning Status to save copying status // object to map previous performance of mmap mode. inline bool ReadVarint32(uint32_t offset, uint32_t* output, uint32_t* bytes_read); bool ReadVarint32NonMmap(uint32_t offset, uint32_t* output, uint32_t* bytes_read); Status status() const { return status_; } const PlainTableReaderFileInfo* file_info() { return file_info_; } private: const PlainTableReaderFileInfo* file_info_; struct Buffer { Buffer() : buf_start_offset(0), buf_len(0), buf_capacity(0) {} std::unique_ptr buf; uint32_t buf_start_offset; uint32_t buf_len; uint32_t buf_capacity; }; // Keep buffers for two recent reads. std::array, 2> buffers_; uint32_t num_buf_; Status status_; Slice GetFromBuffer(Buffer* buf, uint32_t file_offset, uint32_t len); }; // A helper class to decode keys from input buffer // The class is used by PlainTableBuilder. class PlainTableKeyDecoder { public: explicit PlainTableKeyDecoder(const PlainTableReaderFileInfo* file_info, EncodingType encoding_type, uint32_t user_key_len, const SliceTransform* prefix_extractor) : file_reader_(file_info), encoding_type_(encoding_type), prefix_len_(0), fixed_user_key_len_(user_key_len), prefix_extractor_(prefix_extractor), in_prefix_(false) {} // Find the next key. // start: char array where the key starts. // limit: boundary of the char array // parsed_key: the output of the result key // internal_key: if not null, fill with the output of the result key in // un-parsed format // bytes_read: how many bytes read from start. Output // seekable: whether key can be read from this place. Used when building // indexes. Output. Status NextKey(uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, Slice* value, uint32_t* bytes_read, bool* seekable = nullptr); Status NextKeyNoValue(uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, uint32_t* bytes_read, bool* seekable = nullptr); PlainTableFileReader file_reader_; EncodingType encoding_type_; uint32_t prefix_len_; uint32_t fixed_user_key_len_; Slice saved_user_key_; IterKey cur_key_; const SliceTransform* prefix_extractor_; bool in_prefix_; private: Status NextPlainEncodingKey(uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, uint32_t* bytes_read, bool* seekable = nullptr); Status NextPrefixEncodingKey(uint32_t start_offset, ParsedInternalKey* parsed_key, Slice* internal_key, uint32_t* bytes_read, bool* seekable = nullptr); Status ReadInternalKey(uint32_t file_offset, uint32_t user_key_size, ParsedInternalKey* parsed_key, uint32_t* bytes_read, bool* internal_key_valid, Slice* internal_key); inline Status DecodeSize(uint32_t start_offset, PlainTableEntryType* entry_type, uint32_t* key_size, uint32_t* bytes_read); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_reader.cc000066400000000000000000000623101370372246700215440ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef ROCKSDB_LITE #include "table/plain/plain_table_reader.h" #include #include #include "db/dbformat.h" #include "rocksdb/cache.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" #include "rocksdb/statistics.h" #include "table/block_based/block.h" #include "table/block_based/filter_block.h" #include "table/format.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/meta_blocks.h" #include "table/plain/plain_table_bloom.h" #include "table/plain/plain_table_factory.h" #include "table/plain/plain_table_key_coding.h" #include "table/two_level_iterator.h" #include "memory/arena.h" #include "monitoring/histogram.h" #include "monitoring/perf_context_imp.h" #include "util/coding.h" #include "util/dynamic_bloom.h" #include "util/hash.h" #include "util/stop_watch.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { namespace { // Safely getting a uint32_t element from a char array, where, starting from // `base`, every 4 bytes are considered as an fixed 32 bit integer. inline uint32_t GetFixed32Element(const char* base, size_t offset) { return DecodeFixed32(base + offset * sizeof(uint32_t)); } } // namespace // Iterator to iterate IndexedTable class PlainTableIterator : public InternalIterator { public: explicit PlainTableIterator(PlainTableReader* table, bool use_prefix_seek); // No copying allowed PlainTableIterator(const PlainTableIterator&) = delete; void operator=(const Iterator&) = delete; ~PlainTableIterator() override; bool Valid() const override; void SeekToFirst() override; void SeekToLast() override; void Seek(const Slice& target) override; void SeekForPrev(const Slice& target) override; void Next() override; void Prev() override; Slice key() const override; Slice value() const override; Status status() const override; private: PlainTableReader* table_; PlainTableKeyDecoder decoder_; bool use_prefix_seek_; uint32_t offset_; uint32_t next_offset_; Slice key_; Slice value_; Status status_; }; extern const uint64_t kPlainTableMagicNumber; PlainTableReader::PlainTableReader( const ImmutableCFOptions& ioptions, std::unique_ptr&& file, const EnvOptions& storage_options, const InternalKeyComparator& icomparator, EncodingType encoding_type, uint64_t file_size, const TableProperties* table_properties, const SliceTransform* prefix_extractor) : internal_comparator_(icomparator), encoding_type_(encoding_type), full_scan_mode_(false), user_key_len_(static_cast(table_properties->fixed_key_len)), prefix_extractor_(prefix_extractor), enable_bloom_(false), bloom_(6), file_info_(std::move(file), storage_options, static_cast(table_properties->data_size)), ioptions_(ioptions), file_size_(file_size), table_properties_(nullptr) {} PlainTableReader::~PlainTableReader() { } Status PlainTableReader::Open( const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table_reader, const int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size, bool full_scan_mode, const bool immortal_table, const SliceTransform* prefix_extractor) { if (file_size > PlainTableIndex::kMaxFileSize) { return Status::NotSupported("File is too large for PlainTableReader!"); } TableProperties* props_ptr = nullptr; auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, ioptions, &props_ptr, true /* compression_type_missing */); std::shared_ptr props(props_ptr); if (!s.ok()) { return s; } assert(hash_table_ratio >= 0.0); auto& user_props = props->user_collected_properties; auto prefix_extractor_in_file = props->prefix_extractor_name; if (!full_scan_mode && !prefix_extractor_in_file.empty() /* old version sst file*/ && prefix_extractor_in_file != "nullptr") { if (!prefix_extractor) { return Status::InvalidArgument( "Prefix extractor is missing when opening a PlainTable built " "using a prefix extractor"); } else if (prefix_extractor_in_file.compare(prefix_extractor->Name()) != 0) { return Status::InvalidArgument( "Prefix extractor given doesn't match the one used to build " "PlainTable"); } } EncodingType encoding_type = kPlain; auto encoding_type_prop = user_props.find(PlainTablePropertyNames::kEncodingType); if (encoding_type_prop != user_props.end()) { encoding_type = static_cast( DecodeFixed32(encoding_type_prop->second.c_str())); } std::unique_ptr new_reader(new PlainTableReader( ioptions, std::move(file), env_options, internal_comparator, encoding_type, file_size, props.get(), prefix_extractor)); s = new_reader->MmapDataIfNeeded(); if (!s.ok()) { return s; } if (!full_scan_mode) { s = new_reader->PopulateIndex(props.get(), bloom_bits_per_key, hash_table_ratio, index_sparseness, huge_page_tlb_size); if (!s.ok()) { return s; } } else { // Flag to indicate it is a full scan mode so that none of the indexes // can be used. new_reader->full_scan_mode_ = true; } // PopulateIndex can add to the props, so don't store them until now new_reader->table_properties_ = props; if (immortal_table && new_reader->file_info_.is_mmap_mode) { new_reader->dummy_cleanable_.reset(new Cleanable()); } *table_reader = std::move(new_reader); return s; } void PlainTableReader::SetupForCompaction() { } InternalIterator* PlainTableReader::NewIterator( const ReadOptions& options, const SliceTransform* /* prefix_extractor */, Arena* arena, bool /*skip_filters*/, TableReaderCaller /*caller*/, size_t /*compaction_readahead_size*/, bool /* allow_unprepared_value */) { // Not necessarily used here, but make sure this has been initialized assert(table_properties_); // Auto prefix mode is not implemented in PlainTable. bool use_prefix_seek = !IsTotalOrderMode() && !options.total_order_seek && !options.auto_prefix_mode; if (arena == nullptr) { return new PlainTableIterator(this, use_prefix_seek); } else { auto mem = arena->AllocateAligned(sizeof(PlainTableIterator)); return new (mem) PlainTableIterator(this, use_prefix_seek); } } Status PlainTableReader::PopulateIndexRecordList( PlainTableIndexBuilder* index_builder, std::vector* prefix_hashes) { Slice prev_key_prefix_slice; std::string prev_key_prefix_buf; uint32_t pos = data_start_offset_; bool is_first_record = true; Slice key_prefix_slice; PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_, prefix_extractor_); while (pos < file_info_.data_end_offset) { uint32_t key_offset = pos; ParsedInternalKey key; Slice value_slice; bool seekable = false; Status s = Next(&decoder, &pos, &key, nullptr, &value_slice, &seekable); if (!s.ok()) { return s; } key_prefix_slice = GetPrefix(key); if (enable_bloom_) { bloom_.AddHash(GetSliceHash(key.user_key)); } else { if (is_first_record || prev_key_prefix_slice != key_prefix_slice) { if (!is_first_record) { prefix_hashes->push_back(GetSliceHash(prev_key_prefix_slice)); } if (file_info_.is_mmap_mode) { prev_key_prefix_slice = key_prefix_slice; } else { prev_key_prefix_buf = key_prefix_slice.ToString(); prev_key_prefix_slice = prev_key_prefix_buf; } } } index_builder->AddKeyPrefix(GetPrefix(key), key_offset); if (!seekable && is_first_record) { return Status::Corruption("Key for a prefix is not seekable"); } is_first_record = false; } prefix_hashes->push_back(GetSliceHash(key_prefix_slice)); auto s = index_.InitFromRawData(index_builder->Finish()); return s; } void PlainTableReader::AllocateBloom(int bloom_bits_per_key, int num_keys, size_t huge_page_tlb_size) { uint32_t bloom_total_bits = num_keys * bloom_bits_per_key; if (bloom_total_bits > 0) { enable_bloom_ = true; bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality, huge_page_tlb_size, ioptions_.info_log); } } void PlainTableReader::FillBloom(const std::vector& prefix_hashes) { assert(bloom_.IsInitialized()); for (const auto prefix_hash : prefix_hashes) { bloom_.AddHash(prefix_hash); } } Status PlainTableReader::MmapDataIfNeeded() { if (file_info_.is_mmap_mode) { // Get mmapped memory. return file_info_.file->Read(IOOptions(), 0, static_cast(file_size_), &file_info_.file_data, nullptr, nullptr); } return Status::OK(); } Status PlainTableReader::PopulateIndex(TableProperties* props, int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size) { assert(props != nullptr); BlockContents index_block_contents; Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */, file_size_, kPlainTableMagicNumber, ioptions_, PlainTableIndexBuilder::kPlainTableIndexBlock, BlockType::kIndex, &index_block_contents, true /* compression_type_missing */); bool index_in_file = s.ok(); BlockContents bloom_block_contents; bool bloom_in_file = false; // We only need to read the bloom block if index block is in file. if (index_in_file) { s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */, file_size_, kPlainTableMagicNumber, ioptions_, BloomBlockBuilder::kBloomBlock, BlockType::kFilter, &bloom_block_contents, true /* compression_type_missing */); bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0; } Slice* bloom_block; if (bloom_in_file) { // If bloom_block_contents.allocation is not empty (which will be the case // for non-mmap mode), it holds the alloated memory for the bloom block. // It needs to be kept alive to keep `bloom_block` valid. bloom_block_alloc_ = std::move(bloom_block_contents.allocation); bloom_block = &bloom_block_contents.data; } else { bloom_block = nullptr; } Slice* index_block; if (index_in_file) { // If index_block_contents.allocation is not empty (which will be the case // for non-mmap mode), it holds the alloated memory for the index block. // It needs to be kept alive to keep `index_block` valid. index_block_alloc_ = std::move(index_block_contents.allocation); index_block = &index_block_contents.data; } else { index_block = nullptr; } if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) { // moptions.prefix_extractor is requried for a hash-based look-up. return Status::NotSupported( "PlainTable requires a prefix extractor enable prefix hash mode."); } // First, read the whole file, for every kIndexIntervalForSamePrefixKeys rows // for a prefix (starting from the first one), generate a record of (hash, // offset) and append it to IndexRecordList, which is a data structure created // to store them. if (!index_in_file) { // Allocate bloom filter here for total order mode. if (IsTotalOrderMode()) { AllocateBloom(bloom_bits_per_key, static_cast(props->num_entries), huge_page_tlb_size); } } else if (bloom_in_file) { enable_bloom_ = true; auto num_blocks_property = props->user_collected_properties.find( PlainTablePropertyNames::kNumBloomBlocks); uint32_t num_blocks = 0; if (num_blocks_property != props->user_collected_properties.end()) { Slice temp_slice(num_blocks_property->second); if (!GetVarint32(&temp_slice, &num_blocks)) { num_blocks = 0; } } // cast away const qualifier, because bloom_ won't be changed bloom_.SetRawData(const_cast(bloom_block->data()), static_cast(bloom_block->size()) * 8, num_blocks); } else { // Index in file but no bloom in file. Disable bloom filter in this case. enable_bloom_ = false; bloom_bits_per_key = 0; } PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_, index_sparseness, hash_table_ratio, huge_page_tlb_size); std::vector prefix_hashes; if (!index_in_file) { // Populates _bloom if enabled (total order mode) s = PopulateIndexRecordList(&index_builder, &prefix_hashes); if (!s.ok()) { return s; } } else { s = index_.InitFromRawData(*index_block); if (!s.ok()) { return s; } } if (!index_in_file) { if (!IsTotalOrderMode()) { // Calculated bloom filter size and allocate memory for // bloom filter based on the number of prefixes, then fill it. AllocateBloom(bloom_bits_per_key, index_.GetNumPrefixes(), huge_page_tlb_size); if (enable_bloom_) { FillBloom(prefix_hashes); } } } // Fill two table properties. if (!index_in_file) { props->user_collected_properties["plain_table_hash_table_size"] = ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen); props->user_collected_properties["plain_table_sub_index_size"] = ToString(index_.GetSubIndexSize()); } else { props->user_collected_properties["plain_table_hash_table_size"] = ToString(0); props->user_collected_properties["plain_table_sub_index_size"] = ToString(0); } return Status::OK(); } Status PlainTableReader::GetOffset(PlainTableKeyDecoder* decoder, const Slice& target, const Slice& prefix, uint32_t prefix_hash, bool& prefix_matched, uint32_t* offset) const { prefix_matched = false; uint32_t prefix_index_offset; auto res = index_.GetOffset(prefix_hash, &prefix_index_offset); if (res == PlainTableIndex::kNoPrefixForBucket) { *offset = file_info_.data_end_offset; return Status::OK(); } else if (res == PlainTableIndex::kDirectToFile) { *offset = prefix_index_offset; return Status::OK(); } // point to sub-index, need to do a binary search uint32_t upper_bound; const char* base_ptr = index_.GetSubIndexBasePtrAndUpperBound(prefix_index_offset, &upper_bound); uint32_t low = 0; uint32_t high = upper_bound; ParsedInternalKey mid_key; ParsedInternalKey parsed_target; if (!ParseInternalKey(target, &parsed_target)) { return Status::Corruption(Slice()); } // The key is between [low, high). Do a binary search between it. while (high - low > 1) { uint32_t mid = (high + low) / 2; uint32_t file_offset = GetFixed32Element(base_ptr, mid); uint32_t tmp; Status s = decoder->NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp); if (!s.ok()) { return s; } int cmp_result = internal_comparator_.Compare(mid_key, parsed_target); if (cmp_result < 0) { low = mid; } else { if (cmp_result == 0) { // Happen to have found the exact key or target is smaller than the // first key after base_offset. prefix_matched = true; *offset = file_offset; return Status::OK(); } else { high = mid; } } } // Both of the key at the position low or low+1 could share the same // prefix as target. We need to rule out one of them to avoid to go // to the wrong prefix. ParsedInternalKey low_key; uint32_t tmp; uint32_t low_key_offset = GetFixed32Element(base_ptr, low); Status s = decoder->NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp); if (!s.ok()) { return s; } if (GetPrefix(low_key) == prefix) { prefix_matched = true; *offset = low_key_offset; } else if (low + 1 < upper_bound) { // There is possible a next prefix, return it prefix_matched = false; *offset = GetFixed32Element(base_ptr, low + 1); } else { // target is larger than a key of the last prefix in this bucket // but with a different prefix. Key does not exist. *offset = file_info_.data_end_offset; } return Status::OK(); } bool PlainTableReader::MatchBloom(uint32_t hash) const { if (!enable_bloom_) { return true; } if (bloom_.MayContainHash(hash)) { PERF_COUNTER_ADD(bloom_sst_hit_count, 1); return true; } else { PERF_COUNTER_ADD(bloom_sst_miss_count, 1); return false; } } Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset, ParsedInternalKey* parsed_key, Slice* internal_key, Slice* value, bool* seekable) const { if (*offset == file_info_.data_end_offset) { *offset = file_info_.data_end_offset; return Status::OK(); } if (*offset > file_info_.data_end_offset) { return Status::Corruption("Offset is out of file size"); } uint32_t bytes_read; Status s = decoder->NextKey(*offset, parsed_key, internal_key, value, &bytes_read, seekable); if (!s.ok()) { return s; } *offset = *offset + bytes_read; return Status::OK(); } void PlainTableReader::Prepare(const Slice& target) { if (enable_bloom_) { uint32_t prefix_hash = GetSliceHash(GetPrefix(target)); bloom_.Prefetch(prefix_hash); } } Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target, GetContext* get_context, const SliceTransform* /* prefix_extractor */, bool /*skip_filters*/) { // Check bloom filter first. Slice prefix_slice; uint32_t prefix_hash; if (IsTotalOrderMode()) { if (full_scan_mode_) { status_ = Status::InvalidArgument("Get() is not allowed in full scan mode."); } // Match whole user key for bloom filter check. if (!MatchBloom(GetSliceHash(GetUserKey(target)))) { return Status::OK(); } // in total order mode, there is only one bucket 0, and we always use empty // prefix. prefix_slice = Slice(); prefix_hash = 0; } else { prefix_slice = GetPrefix(target); prefix_hash = GetSliceHash(prefix_slice); if (!MatchBloom(prefix_hash)) { return Status::OK(); } } uint32_t offset; bool prefix_match; PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_, prefix_extractor_); Status s = GetOffset(&decoder, target, prefix_slice, prefix_hash, prefix_match, &offset); if (!s.ok()) { return s; } ParsedInternalKey found_key; ParsedInternalKey parsed_target; if (!ParseInternalKey(target, &parsed_target)) { return Status::Corruption(Slice()); } Slice found_value; while (offset < file_info_.data_end_offset) { s = Next(&decoder, &offset, &found_key, nullptr, &found_value); if (!s.ok()) { return s; } if (!prefix_match) { // Need to verify prefix for the first key found if it is not yet // checked. if (GetPrefix(found_key) != prefix_slice) { return Status::OK(); } prefix_match = true; } // TODO(ljin): since we know the key comparison result here, // can we enable the fast path? if (internal_comparator_.Compare(found_key, parsed_target) >= 0) { bool dont_care __attribute__((__unused__)); if (!get_context->SaveValue(found_key, found_value, &dont_care, dummy_cleanable_.get())) { break; } } } return Status::OK(); } uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/, TableReaderCaller /*caller*/) { return 0; } uint64_t PlainTableReader::ApproximateSize(const Slice& /*start*/, const Slice& /*end*/, TableReaderCaller /*caller*/) { return 0; } PlainTableIterator::PlainTableIterator(PlainTableReader* table, bool use_prefix_seek) : table_(table), decoder_(&table_->file_info_, table_->encoding_type_, table_->user_key_len_, table_->prefix_extractor_), use_prefix_seek_(use_prefix_seek) { next_offset_ = offset_ = table_->file_info_.data_end_offset; } PlainTableIterator::~PlainTableIterator() { } bool PlainTableIterator::Valid() const { return offset_ < table_->file_info_.data_end_offset && offset_ >= table_->data_start_offset_; } void PlainTableIterator::SeekToFirst() { status_ = Status::OK(); next_offset_ = table_->data_start_offset_; if (next_offset_ >= table_->file_info_.data_end_offset) { next_offset_ = offset_ = table_->file_info_.data_end_offset; } else { Next(); } } void PlainTableIterator::SeekToLast() { assert(false); status_ = Status::NotSupported("SeekToLast() is not supported in PlainTable"); next_offset_ = offset_ = table_->file_info_.data_end_offset; } void PlainTableIterator::Seek(const Slice& target) { if (use_prefix_seek_ != !table_->IsTotalOrderMode()) { // This check is done here instead of NewIterator() to permit creating an // iterator with total_order_seek = true even if we won't be able to Seek() // it. This is needed for compaction: it creates iterator with // total_order_seek = true but usually never does Seek() on it, // only SeekToFirst(). status_ = Status::InvalidArgument( "total_order_seek not implemented for PlainTable."); offset_ = next_offset_ = table_->file_info_.data_end_offset; return; } // If the user doesn't set prefix seek option and we are not able to do a // total Seek(). assert failure. if (table_->IsTotalOrderMode()) { if (table_->full_scan_mode_) { status_ = Status::InvalidArgument("Seek() is not allowed in full scan mode."); offset_ = next_offset_ = table_->file_info_.data_end_offset; return; } else if (table_->GetIndexSize() > 1) { assert(false); status_ = Status::NotSupported( "PlainTable cannot issue non-prefix seek unless in total order " "mode."); offset_ = next_offset_ = table_->file_info_.data_end_offset; return; } } Slice prefix_slice = table_->GetPrefix(target); uint32_t prefix_hash = 0; // Bloom filter is ignored in total-order mode. if (!table_->IsTotalOrderMode()) { prefix_hash = GetSliceHash(prefix_slice); if (!table_->MatchBloom(prefix_hash)) { status_ = Status::OK(); offset_ = next_offset_ = table_->file_info_.data_end_offset; return; } } bool prefix_match; status_ = table_->GetOffset(&decoder_, target, prefix_slice, prefix_hash, prefix_match, &next_offset_); if (!status_.ok()) { offset_ = next_offset_ = table_->file_info_.data_end_offset; return; } if (next_offset_ < table_->file_info_.data_end_offset) { for (Next(); status_.ok() && Valid(); Next()) { if (!prefix_match) { // Need to verify the first key's prefix if (table_->GetPrefix(key()) != prefix_slice) { offset_ = next_offset_ = table_->file_info_.data_end_offset; break; } prefix_match = true; } if (table_->internal_comparator_.Compare(key(), target) >= 0) { break; } } } else { offset_ = table_->file_info_.data_end_offset; } } void PlainTableIterator::SeekForPrev(const Slice& /*target*/) { assert(false); status_ = Status::NotSupported("SeekForPrev() is not supported in PlainTable"); offset_ = next_offset_ = table_->file_info_.data_end_offset; } void PlainTableIterator::Next() { offset_ = next_offset_; if (offset_ < table_->file_info_.data_end_offset) { Slice tmp_slice; ParsedInternalKey parsed_key; status_ = table_->Next(&decoder_, &next_offset_, &parsed_key, &key_, &value_); if (!status_.ok()) { offset_ = next_offset_ = table_->file_info_.data_end_offset; } } } void PlainTableIterator::Prev() { assert(false); } Slice PlainTableIterator::key() const { assert(Valid()); return key_; } Slice PlainTableIterator::value() const { assert(Valid()); return value_; } Status PlainTableIterator::status() const { return status_; } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/plain/plain_table_reader.h000066400000000000000000000217101370372246700214050ustar00rootroot00000000000000// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifndef ROCKSDB_LITE #include #include #include #include #include #include "db/dbformat.h" #include "file/random_access_file_reader.h" #include "memory/arena.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" #include "table/plain/plain_table_bloom.h" #include "table/plain/plain_table_factory.h" #include "table/plain/plain_table_index.h" #include "table/table_reader.h" namespace ROCKSDB_NAMESPACE { class Block; struct BlockContents; class BlockHandle; class Footer; struct Options; class RandomAccessFile; struct ReadOptions; class TableCache; class TableReader; class InternalKeyComparator; class PlainTableKeyDecoder; class GetContext; extern const uint32_t kPlainTableVariableLength; struct PlainTableReaderFileInfo { bool is_mmap_mode; Slice file_data; uint32_t data_end_offset; std::unique_ptr file; PlainTableReaderFileInfo(std::unique_ptr&& _file, const EnvOptions& storage_options, uint32_t _data_size_offset) : is_mmap_mode(storage_options.use_mmap_reads), data_end_offset(_data_size_offset), file(std::move(_file)) {} }; // The reader class of PlainTable. For description of PlainTable format // See comments of class PlainTableFactory, where instances of // PlainTableReader are created. class PlainTableReader: public TableReader { public: // Based on following output file format shown in plain_table_factory.h // When opening the output file, PlainTableReader creates a hash table // from key prefixes to offset of the output file. PlainTable will decide // whether it points to the data offset of the first key with the key prefix // or the offset of it. If there are too many keys share this prefix, it will // create a binary search-able index from the suffix to offset on disk. static Status Open(const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, const int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size, bool full_scan_mode, const bool immortal_table = false, const SliceTransform* prefix_extractor = nullptr); // Returns new iterator over table contents // compaction_readahead_size: its value will only be used if for_compaction = // true InternalIterator* NewIterator(const ReadOptions&, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, TableReaderCaller caller, size_t compaction_readahead_size = 0, bool allow_unprepared_value = false) override; void Prepare(const Slice& target) override; Status Get(const ReadOptions& readOptions, const Slice& key, GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters = false) override; uint64_t ApproximateOffsetOf(const Slice& key, TableReaderCaller caller) override; uint64_t ApproximateSize(const Slice& start, const Slice& end, TableReaderCaller caller) override; uint32_t GetIndexSize() const { return index_.GetIndexSize(); } void SetupForCompaction() override; std::shared_ptr GetTableProperties() const override { return table_properties_; } virtual size_t ApproximateMemoryUsage() const override { return arena_.MemoryAllocatedBytes(); } PlainTableReader(const ImmutableCFOptions& ioptions, std::unique_ptr&& file, const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, EncodingType encoding_type, uint64_t file_size, const TableProperties* table_properties, const SliceTransform* prefix_extractor); virtual ~PlainTableReader(); protected: // Check bloom filter to see whether it might contain this prefix. // The hash of the prefix is given, since it can be reused for index lookup // too. virtual bool MatchBloom(uint32_t hash) const; // PopulateIndex() builds index of keys. It must be called before any query // to the table. // // props: the table properties object that need to be stored. Ownership of // the object will be passed. // Status PopulateIndex(TableProperties* props, int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size); Status MmapDataIfNeeded(); private: const InternalKeyComparator internal_comparator_; EncodingType encoding_type_; // represents plain table's current status. Status status_; PlainTableIndex index_; bool full_scan_mode_; // data_start_offset_ and data_end_offset_ defines the range of the // sst file that stores data. const uint32_t data_start_offset_ = 0; const uint32_t user_key_len_; const SliceTransform* prefix_extractor_; static const size_t kNumInternalBytes = 8; // Bloom filter is used to rule out non-existent key bool enable_bloom_; PlainTableBloomV1 bloom_; PlainTableReaderFileInfo file_info_; Arena arena_; CacheAllocationPtr index_block_alloc_; CacheAllocationPtr bloom_block_alloc_; const ImmutableCFOptions& ioptions_; std::unique_ptr dummy_cleanable_; uint64_t file_size_; protected: // for testing std::shared_ptr table_properties_; private: bool IsFixedLength() const { return user_key_len_ != kPlainTableVariableLength; } size_t GetFixedInternalKeyLength() const { return user_key_len_ + kNumInternalBytes; } Slice GetPrefix(const Slice& target) const { assert(target.size() >= 8); // target is internal key return GetPrefixFromUserKey(GetUserKey(target)); } Slice GetPrefix(const ParsedInternalKey& target) const { return GetPrefixFromUserKey(target.user_key); } Slice GetUserKey(const Slice& key) const { return Slice(key.data(), key.size() - 8); } Slice GetPrefixFromUserKey(const Slice& user_key) const { if (!IsTotalOrderMode()) { return prefix_extractor_->Transform(user_key); } else { // Use empty slice as prefix if prefix_extractor is not set. // In that case, // it falls back to pure binary search and // total iterator seek is supported. return Slice(); } } friend class TableCache; friend class PlainTableIterator; // Internal helper function to generate an IndexRecordList object from all // the rows, which contains index records as a list. // If bloom_ is not null, all the keys' full-key hash will be added to the // bloom filter. Status PopulateIndexRecordList(PlainTableIndexBuilder* index_builder, std::vector* prefix_hashes); // Internal helper function to allocate memory for bloom filter void AllocateBloom(int bloom_bits_per_key, int num_prefixes, size_t huge_page_tlb_size); void FillBloom(const std::vector& prefix_hashes); // Read the key and value at `offset` to parameters for keys, the and // `seekable`. // On success, `offset` will be updated as the offset for the next key. // `parsed_key` will be key in parsed format. // if `internal_key` is not empty, it will be filled with key with slice // format. // if `seekable` is not null, it will return whether we can directly read // data using this offset. Status Next(PlainTableKeyDecoder* decoder, uint32_t* offset, ParsedInternalKey* parsed_key, Slice* internal_key, Slice* value, bool* seekable = nullptr) const; // Get file offset for key target. // return value prefix_matched is set to true if the offset is confirmed // for a key with the same prefix as target. Status GetOffset(PlainTableKeyDecoder* decoder, const Slice& target, const Slice& prefix, uint32_t prefix_hash, bool& prefix_matched, uint32_t* offset) const; bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); } // No copying allowed explicit PlainTableReader(const TableReader&) = delete; void operator=(const TableReader&) = delete; }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/table/scoped_arena_iterator.h000066400000000000000000000031611370372246700210420ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "table/internal_iterator.h" #include "port/port.h" namespace ROCKSDB_NAMESPACE { class ScopedArenaIterator { void reset(InternalIterator* iter) ROCKSDB_NOEXCEPT { if (iter_ != nullptr) { iter_->~InternalIterator(); } iter_ = iter; } public: explicit ScopedArenaIterator(InternalIterator* iter = nullptr) : iter_(iter) {} ScopedArenaIterator(const ScopedArenaIterator&) = delete; ScopedArenaIterator& operator=(const ScopedArenaIterator&) = delete; ScopedArenaIterator(ScopedArenaIterator&& o) ROCKSDB_NOEXCEPT { iter_ = o.iter_; o.iter_ = nullptr; } ScopedArenaIterator& operator=(ScopedArenaIterator&& o) ROCKSDB_NOEXCEPT { reset(o.iter_); o.iter_ = nullptr; return *this; } InternalIterator* operator->() { return iter_; } InternalIterator* get() { return iter_; } void set(InternalIterator* iter) { reset(iter); } InternalIterator* release() { assert(iter_ != nullptr); auto* res = iter_; iter_ = nullptr; return res; } ~ScopedArenaIterator() { reset(nullptr); } private: InternalIterator* iter_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/sst_file_reader.cc000066400000000000000000000061351370372246700200020ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "rocksdb/sst_file_reader.h" #include "db/db_iter.h" #include "db/dbformat.h" #include "env/composite_env_wrapper.h" #include "file/random_access_file_reader.h" #include "options/cf_options.h" #include "table/get_context.h" #include "table/table_builder.h" #include "table/table_reader.h" namespace ROCKSDB_NAMESPACE { struct SstFileReader::Rep { Options options; EnvOptions soptions; ImmutableCFOptions ioptions; MutableCFOptions moptions; std::unique_ptr table_reader; Rep(const Options& opts) : options(opts), soptions(options), ioptions(options), moptions(ColumnFamilyOptions(options)) {} }; SstFileReader::SstFileReader(const Options& options) : rep_(new Rep(options)) {} SstFileReader::~SstFileReader() {} Status SstFileReader::Open(const std::string& file_path) { auto r = rep_.get(); Status s; uint64_t file_size = 0; std::unique_ptr file; std::unique_ptr file_reader; s = r->options.env->GetFileSize(file_path, &file_size); if (s.ok()) { s = r->options.env->NewRandomAccessFile(file_path, &file, r->soptions); } if (s.ok()) { file_reader.reset(new RandomAccessFileReader( NewLegacyRandomAccessFileWrapper(file), file_path)); } if (s.ok()) { TableReaderOptions t_opt(r->ioptions, r->moptions.prefix_extractor.get(), r->soptions, r->ioptions.internal_comparator); // Allow open file with global sequence number for backward compatibility. t_opt.largest_seqno = kMaxSequenceNumber; s = r->options.table_factory->NewTableReader(t_opt, std::move(file_reader), file_size, &r->table_reader); } return s; } Iterator* SstFileReader::NewIterator(const ReadOptions& options) { auto r = rep_.get(); auto sequence = options.snapshot != nullptr ? options.snapshot->GetSequenceNumber() : kMaxSequenceNumber; auto internal_iter = r->table_reader->NewIterator( options, r->moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kSSTFileReader); return NewDBIterator(r->options.env, options, r->ioptions, r->moptions, r->ioptions.user_comparator, internal_iter, sequence, r->moptions.max_sequential_skip_in_iterations, nullptr /* read_callback */); } std::shared_ptr SstFileReader::GetTableProperties() const { return rep_->table_reader->GetTableProperties(); } Status SstFileReader::VerifyChecksum(const ReadOptions& read_options) { return rep_->table_reader->VerifyChecksum(read_options, TableReaderCaller::kSSTFileReader); } } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE rocksdb-6.11.4/table/sst_file_reader_test.cc000066400000000000000000000135241370372246700210410ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include #include "port/stack_trace.h" #include "rocksdb/db.h" #include "rocksdb/sst_file_reader.h" #include "rocksdb/sst_file_writer.h" #include "table/sst_file_writer_collectors.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { std::string EncodeAsString(uint64_t v) { char buf[16]; snprintf(buf, sizeof(buf), "%08" PRIu64, v); return std::string(buf); } std::string EncodeAsUint64(uint64_t v) { std::string dst; PutFixed64(&dst, v); return dst; } class SstFileReaderTest : public testing::Test { public: SstFileReaderTest() { options_.merge_operator = MergeOperators::CreateUInt64AddOperator(); sst_name_ = test::PerThreadDBPath("sst_file"); Env* base_env = Env::Default(); const char* test_env_uri = getenv("TEST_ENV_URI"); if(test_env_uri) { Env* test_env = nullptr; Status s = Env::LoadEnv(test_env_uri, &test_env, &env_guard_); base_env = test_env; EXPECT_OK(s); EXPECT_NE(Env::Default(), base_env); } EXPECT_NE(nullptr, base_env); env_ = base_env; options_.env = env_; } ~SstFileReaderTest() { Status s = env_->DeleteFile(sst_name_); EXPECT_OK(s); } void CreateFile(const std::string& file_name, const std::vector& keys) { SstFileWriter writer(soptions_, options_); ASSERT_OK(writer.Open(file_name)); for (size_t i = 0; i + 2 < keys.size(); i += 3) { ASSERT_OK(writer.Put(keys[i], keys[i])); ASSERT_OK(writer.Merge(keys[i + 1], EncodeAsUint64(i + 1))); ASSERT_OK(writer.Delete(keys[i + 2])); } ASSERT_OK(writer.Finish()); } void CheckFile(const std::string& file_name, const std::vector& keys, bool check_global_seqno = false) { ReadOptions ropts; SstFileReader reader(options_); ASSERT_OK(reader.Open(file_name)); ASSERT_OK(reader.VerifyChecksum()); std::unique_ptr iter(reader.NewIterator(ropts)); iter->SeekToFirst(); for (size_t i = 0; i + 2 < keys.size(); i += 3) { ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(keys[i]), 0); ASSERT_EQ(iter->value().compare(keys[i]), 0); iter->Next(); ASSERT_TRUE(iter->Valid()); ASSERT_EQ(iter->key().compare(keys[i + 1]), 0); ASSERT_EQ(iter->value().compare(EncodeAsUint64(i + 1)), 0); iter->Next(); } ASSERT_FALSE(iter->Valid()); if (check_global_seqno) { auto properties = reader.GetTableProperties(); ASSERT_TRUE(properties); auto& user_properties = properties->user_collected_properties; ASSERT_TRUE( user_properties.count(ExternalSstFilePropertyNames::kGlobalSeqno)); } } void CreateFileAndCheck(const std::vector& keys) { CreateFile(sst_name_, keys); CheckFile(sst_name_, keys); } protected: Options options_; EnvOptions soptions_; std::string sst_name_; std::shared_ptr env_guard_; Env* env_; }; const uint64_t kNumKeys = 100; TEST_F(SstFileReaderTest, Basic) { std::vector keys; for (uint64_t i = 0; i < kNumKeys; i++) { keys.emplace_back(EncodeAsString(i)); } CreateFileAndCheck(keys); } TEST_F(SstFileReaderTest, Uint64Comparator) { options_.comparator = test::Uint64Comparator(); std::vector keys; for (uint64_t i = 0; i < kNumKeys; i++) { keys.emplace_back(EncodeAsUint64(i)); } CreateFileAndCheck(keys); } TEST_F(SstFileReaderTest, ReadFileWithGlobalSeqno) { std::vector keys; for (uint64_t i = 0; i < kNumKeys; i++) { keys.emplace_back(EncodeAsString(i)); } // Generate a SST file. CreateFile(sst_name_, keys); // Ingest the file into a db, to assign it a global sequence number. Options options; options.create_if_missing = true; std::string db_name = test::PerThreadDBPath("test_db"); DB* db; ASSERT_OK(DB::Open(options, db_name, &db)); // Bump sequence number. ASSERT_OK(db->Put(WriteOptions(), keys[0], "foo")); ASSERT_OK(db->Flush(FlushOptions())); // Ingest the file. IngestExternalFileOptions ingest_options; ingest_options.write_global_seqno = true; ASSERT_OK(db->IngestExternalFile({sst_name_}, ingest_options)); std::vector live_files; uint64_t manifest_file_size = 0; ASSERT_OK(db->GetLiveFiles(live_files, &manifest_file_size)); // Get the ingested file. std::string ingested_file; for (auto& live_file : live_files) { if (live_file.substr(live_file.size() - 4, std::string::npos) == ".sst") { if (ingested_file.empty() || ingested_file < live_file) { ingested_file = live_file; } } } ASSERT_FALSE(ingested_file.empty()); delete db; // Verify the file can be open and read by SstFileReader. CheckFile(db_name + ingested_file, keys, true /* check_global_seqno */); // Cleanup. ASSERT_OK(DestroyDB(db_name, options)); } } // namespace ROCKSDB_NAMESPACE #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS extern "C" { void RegisterCustomObjects(int argc, char** argv); } #else void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} #endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } #else #include int main(int /*argc*/, char** /*argv*/) { fprintf(stderr, "SKIPPED as SstFileReader is not supported in ROCKSDB_LITE\n"); return 0; } #endif // ROCKSDB_LITE rocksdb-6.11.4/table/sst_file_writer.cc000066400000000000000000000264771370372246700200670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/sst_file_writer.h" #include #include "db/dbformat.h" #include "env/composite_env_wrapper.h" #include "file/writable_file_writer.h" #include "rocksdb/table.h" #include "table/block_based/block_based_table_builder.h" #include "table/sst_file_writer_collectors.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { const std::string ExternalSstFilePropertyNames::kVersion = "rocksdb.external_sst_file.version"; const std::string ExternalSstFilePropertyNames::kGlobalSeqno = "rocksdb.external_sst_file.global_seqno"; #ifndef ROCKSDB_LITE const size_t kFadviseTrigger = 1024 * 1024; // 1MB struct SstFileWriter::Rep { Rep(const EnvOptions& _env_options, const Options& options, Env::IOPriority _io_priority, const Comparator* _user_comparator, ColumnFamilyHandle* _cfh, bool _invalidate_page_cache, bool _skip_filters) : env_options(_env_options), ioptions(options), mutable_cf_options(options), io_priority(_io_priority), internal_comparator(_user_comparator), cfh(_cfh), invalidate_page_cache(_invalidate_page_cache), last_fadvise_size(0), skip_filters(_skip_filters) {} std::unique_ptr file_writer; std::unique_ptr builder; EnvOptions env_options; ImmutableCFOptions ioptions; MutableCFOptions mutable_cf_options; Env::IOPriority io_priority; InternalKeyComparator internal_comparator; ExternalSstFileInfo file_info; InternalKey ikey; std::string column_family_name; ColumnFamilyHandle* cfh; // If true, We will give the OS a hint that this file pages is not needed // every time we write 1MB to the file. bool invalidate_page_cache; // The size of the file during the last time we called Fadvise to remove // cached pages from page cache. uint64_t last_fadvise_size; bool skip_filters; Status Add(const Slice& user_key, const Slice& value, const ValueType value_type) { if (!builder) { return Status::InvalidArgument("File is not opened"); } if (file_info.num_entries == 0) { file_info.smallest_key.assign(user_key.data(), user_key.size()); } else { if (internal_comparator.user_comparator()->Compare( user_key, file_info.largest_key) <= 0) { // Make sure that keys are added in order return Status::InvalidArgument( "Keys must be added in strict ascending order."); } } // TODO(tec) : For external SST files we could omit the seqno and type. switch (value_type) { case ValueType::kTypeValue: ikey.Set(user_key, 0 /* Sequence Number */, ValueType::kTypeValue /* Put */); break; case ValueType::kTypeMerge: ikey.Set(user_key, 0 /* Sequence Number */, ValueType::kTypeMerge /* Merge */); break; case ValueType::kTypeDeletion: ikey.Set(user_key, 0 /* Sequence Number */, ValueType::kTypeDeletion /* Delete */); break; default: return Status::InvalidArgument("Value type is not supported"); } builder->Add(ikey.Encode(), value); // update file info file_info.num_entries++; file_info.largest_key.assign(user_key.data(), user_key.size()); file_info.file_size = builder->FileSize(); InvalidatePageCache(false /* closing */); return Status::OK(); } Status DeleteRange(const Slice& begin_key, const Slice& end_key) { if (!builder) { return Status::InvalidArgument("File is not opened"); } RangeTombstone tombstone(begin_key, end_key, 0 /* Sequence Number */); if (file_info.num_range_del_entries == 0) { file_info.smallest_range_del_key.assign(tombstone.start_key_.data(), tombstone.start_key_.size()); file_info.largest_range_del_key.assign(tombstone.end_key_.data(), tombstone.end_key_.size()); } else { if (internal_comparator.user_comparator()->Compare( tombstone.start_key_, file_info.smallest_range_del_key) < 0) { file_info.smallest_range_del_key.assign(tombstone.start_key_.data(), tombstone.start_key_.size()); } if (internal_comparator.user_comparator()->Compare( tombstone.end_key_, file_info.largest_range_del_key) > 0) { file_info.largest_range_del_key.assign(tombstone.end_key_.data(), tombstone.end_key_.size()); } } auto ikey_and_end_key = tombstone.Serialize(); builder->Add(ikey_and_end_key.first.Encode(), ikey_and_end_key.second); // update file info file_info.num_range_del_entries++; file_info.file_size = builder->FileSize(); InvalidatePageCache(false /* closing */); return Status::OK(); } void InvalidatePageCache(bool closing) { if (invalidate_page_cache == false) { // Fadvise disabled return; } uint64_t bytes_since_last_fadvise = builder->FileSize() - last_fadvise_size; if (bytes_since_last_fadvise > kFadviseTrigger || closing) { TEST_SYNC_POINT_CALLBACK("SstFileWriter::Rep::InvalidatePageCache", &(bytes_since_last_fadvise)); // Tell the OS that we don't need this file in page cache file_writer->InvalidateCache(0, 0); last_fadvise_size = builder->FileSize(); } } }; SstFileWriter::SstFileWriter(const EnvOptions& env_options, const Options& options, const Comparator* user_comparator, ColumnFamilyHandle* column_family, bool invalidate_page_cache, Env::IOPriority io_priority, bool skip_filters) : rep_(new Rep(env_options, options, io_priority, user_comparator, column_family, invalidate_page_cache, skip_filters)) { rep_->file_info.file_size = 0; } SstFileWriter::~SstFileWriter() { if (rep_->builder) { // User did not call Finish() or Finish() failed, we need to // abandon the builder. rep_->builder->Abandon(); } } Status SstFileWriter::Open(const std::string& file_path) { Rep* r = rep_.get(); Status s; std::unique_ptr sst_file; s = r->ioptions.env->NewWritableFile(file_path, &sst_file, r->env_options); if (!s.ok()) { return s; } sst_file->SetIOPriority(r->io_priority); CompressionType compression_type; CompressionOptions compression_opts; if (r->mutable_cf_options.bottommost_compression != kDisableCompressionOption) { compression_type = r->mutable_cf_options.bottommost_compression; if (r->mutable_cf_options.bottommost_compression_opts.enabled) { compression_opts = r->mutable_cf_options.bottommost_compression_opts; } else { compression_opts = r->mutable_cf_options.compression_opts; } } else if (!r->ioptions.compression_per_level.empty()) { // Use the compression of the last level if we have per level compression compression_type = *(r->ioptions.compression_per_level.rbegin()); compression_opts = r->mutable_cf_options.compression_opts; } else { compression_type = r->mutable_cf_options.compression; compression_opts = r->mutable_cf_options.compression_opts; } uint64_t sample_for_compression = r->mutable_cf_options.sample_for_compression; std::vector> int_tbl_prop_collector_factories; // SstFileWriter properties collector to add SstFileWriter version. int_tbl_prop_collector_factories.emplace_back( new SstFileWriterPropertiesCollectorFactory(2 /* version */, 0 /* global_seqno*/)); // User collector factories auto user_collector_factories = r->ioptions.table_properties_collector_factories; for (size_t i = 0; i < user_collector_factories.size(); i++) { int_tbl_prop_collector_factories.emplace_back( new UserKeyTablePropertiesCollectorFactory( user_collector_factories[i])); } int unknown_level = -1; uint32_t cf_id; if (r->cfh != nullptr) { // user explicitly specified that this file will be ingested into cfh, // we can persist this information in the file. cf_id = r->cfh->GetID(); r->column_family_name = r->cfh->GetName(); } else { r->column_family_name = ""; cf_id = TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; } TableBuilderOptions table_builder_options( r->ioptions, r->mutable_cf_options, r->internal_comparator, &int_tbl_prop_collector_factories, compression_type, sample_for_compression, compression_opts, r->skip_filters, r->column_family_name, unknown_level); r->file_writer.reset(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(sst_file)), file_path, r->env_options, r->ioptions.env, nullptr /* stats */, r->ioptions.listeners, r->ioptions.file_checksum_gen_factory)); // TODO(tec) : If table_factory is using compressed block cache, we will // be adding the external sst file blocks into it, which is wasteful. r->builder.reset(r->ioptions.table_factory->NewTableBuilder( table_builder_options, cf_id, r->file_writer.get())); r->file_info = ExternalSstFileInfo(); r->file_info.file_path = file_path; r->file_info.version = 2; return s; } Status SstFileWriter::Add(const Slice& user_key, const Slice& value) { return rep_->Add(user_key, value, ValueType::kTypeValue); } Status SstFileWriter::Put(const Slice& user_key, const Slice& value) { return rep_->Add(user_key, value, ValueType::kTypeValue); } Status SstFileWriter::Merge(const Slice& user_key, const Slice& value) { return rep_->Add(user_key, value, ValueType::kTypeMerge); } Status SstFileWriter::Delete(const Slice& user_key) { return rep_->Add(user_key, Slice(), ValueType::kTypeDeletion); } Status SstFileWriter::DeleteRange(const Slice& begin_key, const Slice& end_key) { return rep_->DeleteRange(begin_key, end_key); } Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) { Rep* r = rep_.get(); if (!r->builder) { return Status::InvalidArgument("File is not opened"); } if (r->file_info.num_entries == 0 && r->file_info.num_range_del_entries == 0) { return Status::InvalidArgument("Cannot create sst file with no entries"); } Status s = r->builder->Finish(); r->file_info.file_size = r->builder->FileSize(); if (s.ok()) { s = r->file_writer->Sync(r->ioptions.use_fsync); r->InvalidatePageCache(true /* closing */); if (s.ok()) { s = r->file_writer->Close(); } } if (s.ok()) { r->file_info.file_checksum = r->file_writer->GetFileChecksum(); r->file_info.file_checksum_func_name = r->file_writer->GetFileChecksumFuncName(); } if (!s.ok()) { r->ioptions.env->DeleteFile(r->file_info.file_path); } if (file_info != nullptr) { *file_info = r->file_info; } r->builder.reset(); return s; } uint64_t SstFileWriter::FileSize() { return rep_->file_info.file_size; } #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/sst_file_writer_collectors.h000066400000000000000000000061771370372246700221550ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include "db/dbformat.h" #include "db/table_properties_collector.h" #include "rocksdb/types.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { // Table Properties that are specific to tables created by SstFileWriter. struct ExternalSstFilePropertyNames { // value of this property is a fixed uint32 number. static const std::string kVersion; // value of this property is a fixed uint64 number. static const std::string kGlobalSeqno; }; // PropertiesCollector used to add properties specific to tables // generated by SstFileWriter class SstFileWriterPropertiesCollector : public IntTblPropCollector { public: explicit SstFileWriterPropertiesCollector(int32_t version, SequenceNumber global_seqno) : version_(version), global_seqno_(global_seqno) {} virtual Status InternalAdd(const Slice& /*key*/, const Slice& /*value*/, uint64_t /*file_size*/) override { // Intentionally left blank. Have no interest in collecting stats for // individual key/value pairs. return Status::OK(); } virtual void BlockAdd(uint64_t /* blockRawBytes */, uint64_t /* blockCompressedBytesFast */, uint64_t /* blockCompressedBytesSlow */) override { // Intentionally left blank. No interest in collecting stats for // blocks. return; } virtual Status Finish(UserCollectedProperties* properties) override { // File version std::string version_val; PutFixed32(&version_val, static_cast(version_)); properties->insert({ExternalSstFilePropertyNames::kVersion, version_val}); // Global Sequence number std::string seqno_val; PutFixed64(&seqno_val, static_cast(global_seqno_)); properties->insert({ExternalSstFilePropertyNames::kGlobalSeqno, seqno_val}); return Status::OK(); } virtual const char* Name() const override { return "SstFileWriterPropertiesCollector"; } virtual UserCollectedProperties GetReadableProperties() const override { return {{ExternalSstFilePropertyNames::kVersion, ToString(version_)}}; } private: int32_t version_; SequenceNumber global_seqno_; }; class SstFileWriterPropertiesCollectorFactory : public IntTblPropCollectorFactory { public: explicit SstFileWriterPropertiesCollectorFactory(int32_t version, SequenceNumber global_seqno) : version_(version), global_seqno_(global_seqno) {} virtual IntTblPropCollector* CreateIntTblPropCollector( uint32_t /*column_family_id*/) override { return new SstFileWriterPropertiesCollector(version_, global_seqno_); } virtual const char* Name() const override { return "SstFileWriterPropertiesCollector"; } private: int32_t version_; SequenceNumber global_seqno_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/table_builder.h000066400000000000000000000200041370372246700172760ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "db/dbformat.h" #include "db/table_properties_collector.h" #include "file/writable_file_writer.h" #include "options/cf_options.h" #include "rocksdb/options.h" #include "rocksdb/table_properties.h" #include "trace_replay/block_cache_tracer.h" namespace ROCKSDB_NAMESPACE { class Slice; class Status; struct TableReaderOptions { // @param skip_filters Disables loading/accessing the filter block TableReaderOptions(const ImmutableCFOptions& _ioptions, const SliceTransform* _prefix_extractor, const EnvOptions& _env_options, const InternalKeyComparator& _internal_comparator, bool _skip_filters = false, bool _immortal = false, bool _force_direct_prefetch = false, int _level = -1, BlockCacheTracer* const _block_cache_tracer = nullptr, size_t _max_file_size_for_l0_meta_pin = 0) : TableReaderOptions(_ioptions, _prefix_extractor, _env_options, _internal_comparator, _skip_filters, _immortal, _force_direct_prefetch, _level, 0 /* _largest_seqno */, _block_cache_tracer, _max_file_size_for_l0_meta_pin) {} // @param skip_filters Disables loading/accessing the filter block TableReaderOptions(const ImmutableCFOptions& _ioptions, const SliceTransform* _prefix_extractor, const EnvOptions& _env_options, const InternalKeyComparator& _internal_comparator, bool _skip_filters, bool _immortal, bool _force_direct_prefetch, int _level, SequenceNumber _largest_seqno, BlockCacheTracer* const _block_cache_tracer, size_t _max_file_size_for_l0_meta_pin) : ioptions(_ioptions), prefix_extractor(_prefix_extractor), env_options(_env_options), internal_comparator(_internal_comparator), skip_filters(_skip_filters), immortal(_immortal), force_direct_prefetch(_force_direct_prefetch), level(_level), largest_seqno(_largest_seqno), block_cache_tracer(_block_cache_tracer), max_file_size_for_l0_meta_pin(_max_file_size_for_l0_meta_pin) {} const ImmutableCFOptions& ioptions; const SliceTransform* prefix_extractor; const EnvOptions& env_options; const InternalKeyComparator& internal_comparator; // This is only used for BlockBasedTable (reader) bool skip_filters; // Whether the table will be valid as long as the DB is open bool immortal; // When data prefetching is needed, even if direct I/O is off, read data to // fetch into RocksDB's buffer, rather than relying // RandomAccessFile::Prefetch(). bool force_direct_prefetch; // what level this table/file is on, -1 for "not set, don't know" int level; // largest seqno in the table SequenceNumber largest_seqno; BlockCacheTracer* const block_cache_tracer; // Largest L0 file size whose meta-blocks may be pinned (can be zero when // unknown). const size_t max_file_size_for_l0_meta_pin; }; struct TableBuilderOptions { TableBuilderOptions( const ImmutableCFOptions& _ioptions, const MutableCFOptions& _moptions, const InternalKeyComparator& _internal_comparator, const std::vector>* _int_tbl_prop_collector_factories, CompressionType _compression_type, uint64_t _sample_for_compression, const CompressionOptions& _compression_opts, bool _skip_filters, const std::string& _column_family_name, int _level, const uint64_t _creation_time = 0, const int64_t _oldest_key_time = 0, const uint64_t _target_file_size = 0, const uint64_t _file_creation_time = 0) : ioptions(_ioptions), moptions(_moptions), internal_comparator(_internal_comparator), int_tbl_prop_collector_factories(_int_tbl_prop_collector_factories), compression_type(_compression_type), sample_for_compression(_sample_for_compression), compression_opts(_compression_opts), skip_filters(_skip_filters), column_family_name(_column_family_name), level(_level), creation_time(_creation_time), oldest_key_time(_oldest_key_time), target_file_size(_target_file_size), file_creation_time(_file_creation_time) {} const ImmutableCFOptions& ioptions; const MutableCFOptions& moptions; const InternalKeyComparator& internal_comparator; const std::vector>* int_tbl_prop_collector_factories; CompressionType compression_type; uint64_t sample_for_compression; const CompressionOptions& compression_opts; bool skip_filters; // only used by BlockBasedTableBuilder const std::string& column_family_name; int level; // what level this table/file is on, -1 for "not set, don't know" const uint64_t creation_time; const int64_t oldest_key_time; const uint64_t target_file_size; const uint64_t file_creation_time; }; // TableBuilder provides the interface used to build a Table // (an immutable and sorted map from keys to values). // // Multiple threads can invoke const methods on a TableBuilder without // external synchronization, but if any of the threads may call a // non-const method, all threads accessing the same TableBuilder must use // external synchronization. class TableBuilder { public: // REQUIRES: Either Finish() or Abandon() has been called. virtual ~TableBuilder() {} // Add key,value to the table being constructed. // REQUIRES: key is after any previously added key according to comparator. // REQUIRES: Finish(), Abandon() have not been called virtual void Add(const Slice& key, const Slice& value) = 0; // Return non-ok iff some error has been detected. virtual Status status() const = 0; // Return non-ok iff some error happens during IO. virtual IOStatus io_status() const = 0; // Finish building the table. // REQUIRES: Finish(), Abandon() have not been called virtual Status Finish() = 0; // Indicate that the contents of this builder should be abandoned. // If the caller is not going to call Finish(), it must call Abandon() // before destroying this builder. // REQUIRES: Finish(), Abandon() have not been called virtual void Abandon() = 0; // Number of calls to Add() so far. virtual uint64_t NumEntries() const = 0; // Whether the output file is completely empty. It has neither entries // or tombstones. virtual bool IsEmpty() const { return NumEntries() == 0 && GetTableProperties().num_range_deletions == 0; } // Size of the file generated so far. If invoked after a successful // Finish() call, returns the size of the final generated file. virtual uint64_t FileSize() const = 0; // Estimated size of the file generated so far. This is used when // FileSize() cannot estimate final SST size, e.g. parallel compression // is enabled. virtual uint64_t EstimatedFileSize() const { return FileSize(); } // If the user defined table properties collector suggest the file to // be further compacted. virtual bool NeedCompact() const { return false; } // Returns table properties virtual TableProperties GetTableProperties() const = 0; // Return file checksum virtual std::string GetFileChecksum() const = 0; // Return file checksum function name virtual const char* GetFileChecksumFuncName() const = 0; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/table_properties.cc000066400000000000000000000247651370372246700202240ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "rocksdb/table_properties.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "table/block_based/block.h" #include "table/internal_iterator.h" #include "table/table_properties_internal.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { const uint32_t TablePropertiesCollectorFactory::Context::kUnknownColumnFamily = port::kMaxInt32; namespace { void AppendProperty( std::string& props, const std::string& key, const std::string& value, const std::string& prop_delim, const std::string& kv_delim) { props.append(key); props.append(kv_delim); props.append(value); props.append(prop_delim); } template void AppendProperty( std::string& props, const std::string& key, const TValue& value, const std::string& prop_delim, const std::string& kv_delim) { AppendProperty( props, key, ToString(value), prop_delim, kv_delim ); } // Seek to the specified meta block. // Return true if it successfully seeks to that block. Status SeekToMetaBlock(InternalIterator* meta_iter, const std::string& block_name, bool* is_found, BlockHandle* block_handle = nullptr) { if (block_handle != nullptr) { *block_handle = BlockHandle::NullBlockHandle(); } *is_found = true; meta_iter->Seek(block_name); if (meta_iter->status().ok()) { if (meta_iter->Valid() && meta_iter->key() == block_name) { *is_found = true; if (block_handle) { Slice v = meta_iter->value(); return block_handle->DecodeFrom(&v); } } else { *is_found = false; return Status::OK(); } } return meta_iter->status(); } } std::string TableProperties::ToString( const std::string& prop_delim, const std::string& kv_delim) const { std::string result; result.reserve(1024); // Basic Info AppendProperty(result, "# data blocks", num_data_blocks, prop_delim, kv_delim); AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim); AppendProperty(result, "# deletions", num_deletions, prop_delim, kv_delim); AppendProperty(result, "# merge operands", num_merge_operands, prop_delim, kv_delim); AppendProperty(result, "# range deletions", num_range_deletions, prop_delim, kv_delim); AppendProperty(result, "raw key size", raw_key_size, prop_delim, kv_delim); AppendProperty(result, "raw average key size", num_entries != 0 ? 1.0 * raw_key_size / num_entries : 0.0, prop_delim, kv_delim); AppendProperty(result, "raw value size", raw_value_size, prop_delim, kv_delim); AppendProperty(result, "raw average value size", num_entries != 0 ? 1.0 * raw_value_size / num_entries : 0.0, prop_delim, kv_delim); AppendProperty(result, "data block size", data_size, prop_delim, kv_delim); char index_block_size_str[80]; snprintf(index_block_size_str, sizeof(index_block_size_str), "index block size (user-key? %d, delta-value? %d)", static_cast(index_key_is_user_key), static_cast(index_value_is_delta_encoded)); AppendProperty(result, index_block_size_str, index_size, prop_delim, kv_delim); if (index_partitions != 0) { AppendProperty(result, "# index partitions", index_partitions, prop_delim, kv_delim); AppendProperty(result, "top-level index size", top_level_index_size, prop_delim, kv_delim); } AppendProperty(result, "filter block size", filter_size, prop_delim, kv_delim); AppendProperty(result, "(estimated) table size", data_size + index_size + filter_size, prop_delim, kv_delim); AppendProperty( result, "filter policy name", filter_policy_name.empty() ? std::string("N/A") : filter_policy_name, prop_delim, kv_delim); AppendProperty(result, "prefix extractor name", prefix_extractor_name.empty() ? std::string("N/A") : prefix_extractor_name, prop_delim, kv_delim); AppendProperty(result, "column family ID", column_family_id == ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory:: Context::kUnknownColumnFamily ? std::string("N/A") : ROCKSDB_NAMESPACE::ToString(column_family_id), prop_delim, kv_delim); AppendProperty( result, "column family name", column_family_name.empty() ? std::string("N/A") : column_family_name, prop_delim, kv_delim); AppendProperty(result, "comparator name", comparator_name.empty() ? std::string("N/A") : comparator_name, prop_delim, kv_delim); AppendProperty( result, "merge operator name", merge_operator_name.empty() ? std::string("N/A") : merge_operator_name, prop_delim, kv_delim); AppendProperty(result, "property collectors names", property_collectors_names.empty() ? std::string("N/A") : property_collectors_names, prop_delim, kv_delim); AppendProperty( result, "SST file compression algo", compression_name.empty() ? std::string("N/A") : compression_name, prop_delim, kv_delim); AppendProperty( result, "SST file compression options", compression_options.empty() ? std::string("N/A") : compression_options, prop_delim, kv_delim); AppendProperty(result, "creation time", creation_time, prop_delim, kv_delim); AppendProperty(result, "time stamp of earliest key", oldest_key_time, prop_delim, kv_delim); AppendProperty(result, "file creation time", file_creation_time, prop_delim, kv_delim); return result; } void TableProperties::Add(const TableProperties& tp) { data_size += tp.data_size; index_size += tp.index_size; index_partitions += tp.index_partitions; top_level_index_size += tp.top_level_index_size; index_key_is_user_key += tp.index_key_is_user_key; index_value_is_delta_encoded += tp.index_value_is_delta_encoded; filter_size += tp.filter_size; raw_key_size += tp.raw_key_size; raw_value_size += tp.raw_value_size; num_data_blocks += tp.num_data_blocks; num_entries += tp.num_entries; num_deletions += tp.num_deletions; num_merge_operands += tp.num_merge_operands; num_range_deletions += tp.num_range_deletions; } const std::string TablePropertiesNames::kDataSize = "rocksdb.data.size"; const std::string TablePropertiesNames::kIndexSize = "rocksdb.index.size"; const std::string TablePropertiesNames::kIndexPartitions = "rocksdb.index.partitions"; const std::string TablePropertiesNames::kTopLevelIndexSize = "rocksdb.top-level.index.size"; const std::string TablePropertiesNames::kIndexKeyIsUserKey = "rocksdb.index.key.is.user.key"; const std::string TablePropertiesNames::kIndexValueIsDeltaEncoded = "rocksdb.index.value.is.delta.encoded"; const std::string TablePropertiesNames::kFilterSize = "rocksdb.filter.size"; const std::string TablePropertiesNames::kRawKeySize = "rocksdb.raw.key.size"; const std::string TablePropertiesNames::kRawValueSize = "rocksdb.raw.value.size"; const std::string TablePropertiesNames::kNumDataBlocks = "rocksdb.num.data.blocks"; const std::string TablePropertiesNames::kNumEntries = "rocksdb.num.entries"; const std::string TablePropertiesNames::kDeletedKeys = "rocksdb.deleted.keys"; const std::string TablePropertiesNames::kMergeOperands = "rocksdb.merge.operands"; const std::string TablePropertiesNames::kNumRangeDeletions = "rocksdb.num.range-deletions"; const std::string TablePropertiesNames::kFilterPolicy = "rocksdb.filter.policy"; const std::string TablePropertiesNames::kFormatVersion = "rocksdb.format.version"; const std::string TablePropertiesNames::kFixedKeyLen = "rocksdb.fixed.key.length"; const std::string TablePropertiesNames::kColumnFamilyId = "rocksdb.column.family.id"; const std::string TablePropertiesNames::kColumnFamilyName = "rocksdb.column.family.name"; const std::string TablePropertiesNames::kComparator = "rocksdb.comparator"; const std::string TablePropertiesNames::kMergeOperator = "rocksdb.merge.operator"; const std::string TablePropertiesNames::kPrefixExtractorName = "rocksdb.prefix.extractor.name"; const std::string TablePropertiesNames::kPropertyCollectors = "rocksdb.property.collectors"; const std::string TablePropertiesNames::kCompression = "rocksdb.compression"; const std::string TablePropertiesNames::kCompressionOptions = "rocksdb.compression_options"; const std::string TablePropertiesNames::kCreationTime = "rocksdb.creation.time"; const std::string TablePropertiesNames::kOldestKeyTime = "rocksdb.oldest.key.time"; const std::string TablePropertiesNames::kFileCreationTime = "rocksdb.file.creation.time"; extern const std::string kPropertiesBlock = "rocksdb.properties"; // Old property block name for backward compatibility extern const std::string kPropertiesBlockOldName = "rocksdb.stats"; extern const std::string kCompressionDictBlock = "rocksdb.compression_dict"; extern const std::string kRangeDelBlock = "rocksdb.range_del"; // Seek to the properties block. // Return true if it successfully seeks to the properties block. Status SeekToPropertiesBlock(InternalIterator* meta_iter, bool* is_found) { Status status = SeekToMetaBlock(meta_iter, kPropertiesBlock, is_found); if (!*is_found && status.ok()) { status = SeekToMetaBlock(meta_iter, kPropertiesBlockOldName, is_found); } return status; } // Seek to the compression dictionary block. // Return true if it successfully seeks to that block. Status SeekToCompressionDictBlock(InternalIterator* meta_iter, bool* is_found, BlockHandle* block_handle) { return SeekToMetaBlock(meta_iter, kCompressionDictBlock, is_found, block_handle); } Status SeekToRangeDelBlock(InternalIterator* meta_iter, bool* is_found, BlockHandle* block_handle = nullptr) { return SeekToMetaBlock(meta_iter, kRangeDelBlock, is_found, block_handle); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/table_properties_internal.h000066400000000000000000000020761370372246700217510ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/status.h" #include "rocksdb/iterator.h" namespace ROCKSDB_NAMESPACE { class BlockHandle; // Seek to the properties block. // If it successfully seeks to the properties block, "is_found" will be // set to true. Status SeekToPropertiesBlock(InternalIterator* meta_iter, bool* is_found); // Seek to the compression dictionary block. // If it successfully seeks to the properties block, "is_found" will be // set to true. Status SeekToCompressionDictBlock(InternalIterator* meta_iter, bool* is_found, BlockHandle* block_handle); // TODO(andrewkr) should not put all meta block in table_properties.h/cc Status SeekToRangeDelBlock(InternalIterator* meta_iter, bool* is_found, BlockHandle* block_handle); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/table_reader.h000066400000000000000000000145201370372246700171200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include "db/range_tombstone_fragmenter.h" #include "rocksdb/slice_transform.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/multiget_context.h" #include "table/table_reader_caller.h" namespace ROCKSDB_NAMESPACE { class Iterator; struct ParsedInternalKey; class Slice; class Arena; struct ReadOptions; struct TableProperties; class GetContext; class MultiGetContext; // A Table (also referred to as SST) is a sorted map from strings to strings. // Tables are immutable and persistent. A Table may be safely accessed from // multiple threads without external synchronization. Table readers are used // for reading various types of table formats supported by rocksdb including // BlockBasedTable, PlainTable and CuckooTable format. class TableReader { public: virtual ~TableReader() {} // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must // call one of the Seek methods on the iterator before using it). // arena: If not null, the arena needs to be used to allocate the Iterator. // When destroying the iterator, the caller will not call "delete" // but Iterator::~Iterator() directly. The destructor needs to destroy // all the states but those allocated in arena. // skip_filters: disables checking the bloom filters even if they exist. This // option is effective only for block-based table format. // compaction_readahead_size: its value will only be used if caller = // kCompaction virtual InternalIterator* NewIterator( const ReadOptions&, const SliceTransform* prefix_extractor, Arena* arena, bool skip_filters, TableReaderCaller caller, size_t compaction_readahead_size = 0, bool allow_unprepared_value = false) = 0; virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& /*read_options*/) { return nullptr; } // Given a key, return an approximate byte offset in the file where // the data for that key begins (or would begin if the key were // present in the file). The returned value is in terms of file // bytes, and so includes effects like compression of the underlying data. // E.g., the approximate offset of the last key in the table will // be close to the file length. // TODO(peterd): Since this function is only used for approximate size // from beginning of file, reduce code duplication by removing this // function and letting ApproximateSize take optional start and end, so // that absolute start and end can be specified and optimized without // key / index work. virtual uint64_t ApproximateOffsetOf(const Slice& key, TableReaderCaller caller) = 0; // Given start and end keys, return the approximate data size in the file // between the keys. The returned value is in terms of file bytes, and so // includes effects like compression of the underlying data and applicable // portions of metadata including filters and indexes. Nullptr for start or // end (or both) indicates absolute start or end of the table. virtual uint64_t ApproximateSize(const Slice& start, const Slice& end, TableReaderCaller caller) = 0; // Set up the table for Compaction. Might change some parameters with // posix_fadvise virtual void SetupForCompaction() = 0; virtual std::shared_ptr GetTableProperties() const = 0; // Prepare work that can be done before the real Get() virtual void Prepare(const Slice& /*target*/) {} // Report an approximation of how much memory has been used. virtual size_t ApproximateMemoryUsage() const = 0; // Calls get_context->SaveValue() repeatedly, starting with // the entry found after a call to Seek(key), until it returns false. // May not make such a call if filter policy says that key is not present. // // get_context->MarkKeyMayExist needs to be called when it is configured to be // memory only and the key is not found in the block cache. // // readOptions is the options for the read // key is the key to search for // skip_filters: disables checking the bloom filters even if they exist. This // option is effective only for block-based table format. virtual Status Get(const ReadOptions& readOptions, const Slice& key, GetContext* get_context, const SliceTransform* prefix_extractor, bool skip_filters = false) = 0; virtual void MultiGet(const ReadOptions& readOptions, const MultiGetContext::Range* mget_range, const SliceTransform* prefix_extractor, bool skip_filters = false) { for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) { *iter->s = Get(readOptions, iter->ikey, iter->get_context, prefix_extractor, skip_filters); } } // Prefetch data corresponding to a give range of keys // Typically this functionality is required for table implementations that // persists the data on a non volatile storage medium like disk/SSD virtual Status Prefetch(const Slice* begin = nullptr, const Slice* end = nullptr) { (void) begin; (void) end; // Default implementation is NOOP. // The child class should implement functionality when applicable return Status::OK(); } // convert db file to a human readable form virtual Status DumpTable(WritableFile* /*out_file*/) { return Status::NotSupported("DumpTable() not supported"); } // check whether there is corruption in this db file virtual Status VerifyChecksum(const ReadOptions& /*read_options*/, TableReaderCaller /*caller*/) { return Status::NotSupported("VerifyChecksum() not supported"); } }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/table_reader_bench.cc000066400000000000000000000314771370372246700204270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef GFLAGS #include int main() { fprintf(stderr, "Please install gflags to run rocksdb tools\n"); return 1; } #else #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "env/composite_env_wrapper.h" #include "file/random_access_file_reader.h" #include "monitoring/histogram.h" #include "rocksdb/db.h" #include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "table/block_based/block_based_table_factory.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/plain/plain_table_factory.h" #include "table/table_builder.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/gflags_compat.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::SetUsageMessage; namespace ROCKSDB_NAMESPACE { namespace { // Make a key that i determines the first 4 characters and j determines the // last 4 characters. static std::string MakeKey(int i, int j, bool through_db) { char buf[100]; snprintf(buf, sizeof(buf), "%04d__key___%04d", i, j); if (through_db) { return std::string(buf); } // If we directly query table, which operates on internal keys // instead of user keys, we need to add 8 bytes of internal // information (row type etc) to user key to make an internal // key. InternalKey key(std::string(buf), 0, ValueType::kTypeValue); return key.Encode().ToString(); } uint64_t Now(Env* env, bool measured_by_nanosecond) { return measured_by_nanosecond ? env->NowNanos() : env->NowMicros(); } } // namespace // A very simple benchmark that. // Create a table with roughly numKey1 * numKey2 keys, // where there are numKey1 prefixes of the key, each has numKey2 number of // distinguished key, differing in the suffix part. // If if_query_empty_keys = false, query the existing keys numKey1 * numKey2 // times randomly. // If if_query_empty_keys = true, query numKey1 * numKey2 random empty keys. // Print out the total time. // If through_db=true, a full DB will be created and queries will be against // it. Otherwise, operations will be directly through table level. // // If for_terator=true, instead of just query one key each time, it queries // a range sharing the same prefix. namespace { void TableReaderBenchmark(Options& opts, EnvOptions& env_options, ReadOptions& read_options, int num_keys1, int num_keys2, int num_iter, int /*prefix_len*/, bool if_query_empty_keys, bool for_iterator, bool through_db, bool measured_by_nanosecond) { ROCKSDB_NAMESPACE::InternalKeyComparator ikc(opts.comparator); std::string file_name = test::PerThreadDBPath("rocksdb_table_reader_benchmark"); std::string dbname = test::PerThreadDBPath("rocksdb_table_reader_bench_db"); WriteOptions wo; Env* env = Env::Default(); TableBuilder* tb = nullptr; DB* db = nullptr; Status s; const ImmutableCFOptions ioptions(opts); const ColumnFamilyOptions cfo(opts); const MutableCFOptions moptions(cfo); std::unique_ptr file_writer; if (!through_db) { std::unique_ptr file; env->NewWritableFile(file_name, &file, env_options); std::vector > int_tbl_prop_collector_factories; file_writer.reset(new WritableFileWriter( NewLegacyWritableFileWrapper(std::move(file)), file_name, env_options)); int unknown_level = -1; tb = opts.table_factory->NewTableBuilder( TableBuilderOptions( ioptions, moptions, ikc, &int_tbl_prop_collector_factories, CompressionType::kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, kDefaultColumnFamilyName, unknown_level), 0 /* column_family_id */, file_writer.get()); } else { s = DB::Open(opts, dbname, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); } // Populate slightly more than 1M keys for (int i = 0; i < num_keys1; i++) { for (int j = 0; j < num_keys2; j++) { std::string key = MakeKey(i * 2, j, through_db); if (!through_db) { tb->Add(key, key); } else { db->Put(wo, key, key); } } } if (!through_db) { tb->Finish(); file_writer->Close(); } else { db->Flush(FlushOptions()); } std::unique_ptr table_reader; if (!through_db) { std::unique_ptr raf; s = env->NewRandomAccessFile(file_name, &raf, env_options); if (!s.ok()) { fprintf(stderr, "Create File Error: %s\n", s.ToString().c_str()); exit(1); } uint64_t file_size; env->GetFileSize(file_name, &file_size); std::unique_ptr file_reader( new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(raf), file_name)); s = opts.table_factory->NewTableReader( TableReaderOptions(ioptions, moptions.prefix_extractor.get(), env_options, ikc), std::move(file_reader), file_size, &table_reader); if (!s.ok()) { fprintf(stderr, "Open Table Error: %s\n", s.ToString().c_str()); exit(1); } } Random rnd(301); std::string result; HistogramImpl hist; for (int it = 0; it < num_iter; it++) { for (int i = 0; i < num_keys1; i++) { for (int j = 0; j < num_keys2; j++) { int r1 = rnd.Uniform(num_keys1) * 2; int r2 = rnd.Uniform(num_keys2); if (if_query_empty_keys) { r1++; r2 = num_keys2 * 2 - r2; } if (!for_iterator) { // Query one existing key; std::string key = MakeKey(r1, r2, through_db); uint64_t start_time = Now(env, measured_by_nanosecond); if (!through_db) { PinnableSlice value; MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; GetContext get_context(ioptions.user_comparator, ioptions.merge_operator, ioptions.info_log, ioptions.statistics, GetContext::kNotFound, Slice(key), &value, nullptr, &merge_context, true, &max_covering_tombstone_seq, env); s = table_reader->Get(read_options, key, &get_context, nullptr); } else { s = db->Get(read_options, key, &result); } hist.Add(Now(env, measured_by_nanosecond) - start_time); } else { int r2_len; if (if_query_empty_keys) { r2_len = 0; } else { r2_len = rnd.Uniform(num_keys2) + 1; if (r2_len + r2 > num_keys2) { r2_len = num_keys2 - r2; } } std::string start_key = MakeKey(r1, r2, through_db); std::string end_key = MakeKey(r1, r2 + r2_len, through_db); uint64_t total_time = 0; uint64_t start_time = Now(env, measured_by_nanosecond); Iterator* iter = nullptr; InternalIterator* iiter = nullptr; if (!through_db) { iiter = table_reader->NewIterator( read_options, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized); } else { iter = db->NewIterator(read_options); } int count = 0; for (through_db ? iter->Seek(start_key) : iiter->Seek(start_key); through_db ? iter->Valid() : iiter->Valid(); through_db ? iter->Next() : iiter->Next()) { if (if_query_empty_keys) { break; } // verify key; total_time += Now(env, measured_by_nanosecond) - start_time; assert(Slice(MakeKey(r1, r2 + count, through_db)) == (through_db ? iter->key() : iiter->key())); start_time = Now(env, measured_by_nanosecond); if (++count >= r2_len) { break; } } if (count != r2_len) { fprintf( stderr, "Iterator cannot iterate expected number of entries. " "Expected %d but got %d\n", r2_len, count); assert(false); } delete iter; total_time += Now(env, measured_by_nanosecond) - start_time; hist.Add(total_time); } } } } fprintf( stderr, "===================================================" "====================================================\n" "InMemoryTableSimpleBenchmark: %20s num_key1: %5d " "num_key2: %5d %10s\n" "===================================================" "====================================================" "\nHistogram (unit: %s): \n%s", opts.table_factory->Name(), num_keys1, num_keys2, for_iterator ? "iterator" : (if_query_empty_keys ? "empty" : "non_empty"), measured_by_nanosecond ? "nanosecond" : "microsecond", hist.ToString().c_str()); if (!through_db) { env->DeleteFile(file_name); } else { delete db; db = nullptr; DestroyDB(dbname, opts); } } } // namespace } // namespace ROCKSDB_NAMESPACE DEFINE_bool(query_empty, false, "query non-existing keys instead of existing " "ones."); DEFINE_int32(num_keys1, 4096, "number of distinguish prefix of keys"); DEFINE_int32(num_keys2, 512, "number of distinguish keys for each prefix"); DEFINE_int32(iter, 3, "query non-existing keys instead of existing ones"); DEFINE_int32(prefix_len, 16, "Prefix length used for iterators and indexes"); DEFINE_bool(iterator, false, "For test iterator"); DEFINE_bool(through_db, false, "If enable, a DB instance will be created and " "the query will be against DB. Otherwise, will be directly against " "a table reader."); DEFINE_bool(mmap_read, true, "Whether use mmap read"); DEFINE_string(table_factory, "block_based", "Table factory to use: `block_based` (default), `plain_table` or " "`cuckoo_hash`."); DEFINE_string(time_unit, "microsecond", "The time unit used for measuring performance. User can specify " "`microsecond` (default) or `nanosecond`"); int main(int argc, char** argv) { SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + " [OPTIONS]..."); ParseCommandLineFlags(&argc, &argv, true); std::shared_ptr tf; ROCKSDB_NAMESPACE::Options options; if (FLAGS_prefix_len < 16) { options.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(FLAGS_prefix_len)); } ROCKSDB_NAMESPACE::ReadOptions ro; ROCKSDB_NAMESPACE::EnvOptions env_options; options.create_if_missing = true; options.compression = ROCKSDB_NAMESPACE::CompressionType::kNoCompression; if (FLAGS_table_factory == "cuckoo_hash") { #ifndef ROCKSDB_LITE options.allow_mmap_reads = FLAGS_mmap_read; env_options.use_mmap_reads = FLAGS_mmap_read; ROCKSDB_NAMESPACE::CuckooTableOptions table_options; table_options.hash_table_ratio = 0.75; tf.reset(ROCKSDB_NAMESPACE::NewCuckooTableFactory(table_options)); #else fprintf(stderr, "Plain table is not supported in lite mode\n"); exit(1); #endif // ROCKSDB_LITE } else if (FLAGS_table_factory == "plain_table") { #ifndef ROCKSDB_LITE options.allow_mmap_reads = FLAGS_mmap_read; env_options.use_mmap_reads = FLAGS_mmap_read; ROCKSDB_NAMESPACE::PlainTableOptions plain_table_options; plain_table_options.user_key_len = 16; plain_table_options.bloom_bits_per_key = (FLAGS_prefix_len == 16) ? 0 : 8; plain_table_options.hash_table_ratio = 0.75; tf.reset(new ROCKSDB_NAMESPACE::PlainTableFactory(plain_table_options)); options.prefix_extractor.reset( ROCKSDB_NAMESPACE::NewFixedPrefixTransform(FLAGS_prefix_len)); #else fprintf(stderr, "Cuckoo table is not supported in lite mode\n"); exit(1); #endif // ROCKSDB_LITE } else if (FLAGS_table_factory == "block_based") { tf.reset(new ROCKSDB_NAMESPACE::BlockBasedTableFactory()); } else { fprintf(stderr, "Invalid table type %s\n", FLAGS_table_factory.c_str()); } if (tf) { // if user provides invalid options, just fall back to microsecond. bool measured_by_nanosecond = FLAGS_time_unit == "nanosecond"; options.table_factory = tf; ROCKSDB_NAMESPACE::TableReaderBenchmark( options, env_options, ro, FLAGS_num_keys1, FLAGS_num_keys2, FLAGS_iter, FLAGS_prefix_len, FLAGS_query_empty, FLAGS_iterator, FLAGS_through_db, measured_by_nanosecond); } else { return 1; } return 0; } #endif // GFLAGS rocksdb-6.11.4/table/table_reader_caller.h000066400000000000000000000027601370372246700204450ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once namespace ROCKSDB_NAMESPACE { // A list of callers for a table reader. It is used to trace the caller that // accesses on a block. This is only used for block cache tracing and analysis. // A user may use kUncategorized if the caller is not interesting for analysis // or the table reader is called in the test environment, e.g., unit test, table // reader benchmark, etc. enum TableReaderCaller : char { kUserGet = 1, kUserMultiGet = 2, kUserIterator = 3, kUserApproximateSize = 4, kUserVerifyChecksum = 5, kSSTDumpTool = 6, kExternalSSTIngestion = 7, kRepair = 8, kPrefetch = 9, kCompaction = 10, // A compaction job may refill the block cache with blocks in the new SST // files if paranoid_file_checks is true. kCompactionRefill = 11, // After building a table, it may load all its blocks into the block cache if // paranoid_file_checks is true. kFlush = 12, // sst_file_reader. kSSTFileReader = 13, // A list of callers that are either not interesting for analysis or are // calling from a test environment, e.g., unit test, benchmark, etc. kUncategorized = 14, // All callers should be added before kMaxBlockCacheLookupCaller. kMaxBlockCacheLookupCaller }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/table_test.cc000066400000000000000000005347361370372246700170130ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include #include #include #include #include #include #include #include "block_fetcher.h" #include "cache/lru_cache.h" #include "db/dbformat.h" #include "db/memtable.h" #include "db/write_batch_internal.h" #include "memtable/stl_wrappers.h" #include "meta_blocks.h" #include "monitoring/statistics.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/file_checksum.h" #include "rocksdb/file_system.h" #include "rocksdb/iterator.h" #include "rocksdb/memtablerep.h" #include "rocksdb/perf_context.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/write_buffer_manager.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_factory.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "table/block_based/flush_block_policy.h" #include "table/format.h" #include "table/get_context.h" #include "table/internal_iterator.h" #include "table/plain/plain_table_factory.h" #include "table/scoped_arena_iterator.h" #include "table/sst_file_writer_collectors.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/compression.h" #include "util/file_checksum_helper.h" #include "util/random.h" #include "util/string_util.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { extern const uint64_t kLegacyBlockBasedTableMagicNumber; extern const uint64_t kLegacyPlainTableMagicNumber; extern const uint64_t kBlockBasedTableMagicNumber; extern const uint64_t kPlainTableMagicNumber; namespace { const std::string kDummyValue(10000, 'o'); // DummyPropertiesCollector used to test BlockBasedTableProperties class DummyPropertiesCollector : public TablePropertiesCollector { public: const char* Name() const override { return ""; } Status Finish(UserCollectedProperties* /*properties*/) override { return Status::OK(); } Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override { return Status::OK(); } UserCollectedProperties GetReadableProperties() const override { return UserCollectedProperties{}; } }; class DummyPropertiesCollectorFactory1 : public TablePropertiesCollectorFactory { public: TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context /*context*/) override { return new DummyPropertiesCollector(); } const char* Name() const override { return "DummyPropertiesCollector1"; } }; class DummyPropertiesCollectorFactory2 : public TablePropertiesCollectorFactory { public: TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context /*context*/) override { return new DummyPropertiesCollector(); } const char* Name() const override { return "DummyPropertiesCollector2"; } }; // Return reverse of "key". // Used to test non-lexicographic comparators. std::string Reverse(const Slice& key) { auto rev = key.ToString(); std::reverse(rev.begin(), rev.end()); return rev; } class ReverseKeyComparator : public Comparator { public: const char* Name() const override { return "rocksdb.ReverseBytewiseComparator"; } int Compare(const Slice& a, const Slice& b) const override { return BytewiseComparator()->Compare(Reverse(a), Reverse(b)); } void FindShortestSeparator(std::string* start, const Slice& limit) const override { std::string s = Reverse(*start); std::string l = Reverse(limit); BytewiseComparator()->FindShortestSeparator(&s, l); *start = Reverse(s); } void FindShortSuccessor(std::string* key) const override { std::string s = Reverse(*key); BytewiseComparator()->FindShortSuccessor(&s); *key = Reverse(s); } }; ReverseKeyComparator reverse_key_comparator; void Increment(const Comparator* cmp, std::string* key) { if (cmp == BytewiseComparator()) { key->push_back('\0'); } else { assert(cmp == &reverse_key_comparator); std::string rev = Reverse(*key); rev.push_back('\0'); *key = Reverse(rev); } } } // namespace // Helper class for tests to unify the interface between // BlockBuilder/TableBuilder and Block/Table. class Constructor { public: explicit Constructor(const Comparator* cmp) : data_(stl_wrappers::LessOfComparator(cmp)) {} virtual ~Constructor() { } void Add(const std::string& key, const Slice& value) { data_[key] = value.ToString(); } // Finish constructing the data structure with all the keys that have // been added so far. Returns the keys in sorted order in "*keys" // and stores the key/value pairs in "*kvmap" void Finish(const Options& options, const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, std::vector* keys, stl_wrappers::KVMap* kvmap) { last_internal_key_ = &internal_comparator; *kvmap = data_; keys->clear(); for (const auto& kv : data_) { keys->push_back(kv.first); } data_.clear(); Status s = FinishImpl(options, ioptions, moptions, table_options, internal_comparator, *kvmap); ASSERT_TRUE(s.ok()) << s.ToString(); } // Construct the data structure from the data in "data" virtual Status FinishImpl(const Options& options, const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, const stl_wrappers::KVMap& data) = 0; virtual InternalIterator* NewIterator( const SliceTransform* prefix_extractor = nullptr) const = 0; virtual const stl_wrappers::KVMap& data() { return data_; } virtual bool IsArenaMode() const { return false; } virtual DB* db() const { return nullptr; } // Overridden in DBConstructor virtual bool AnywayDeleteIterator() const { return false; } protected: const InternalKeyComparator* last_internal_key_; private: stl_wrappers::KVMap data_; }; class BlockConstructor: public Constructor { public: explicit BlockConstructor(const Comparator* cmp) : Constructor(cmp), comparator_(cmp), block_(nullptr) { } ~BlockConstructor() override { delete block_; } Status FinishImpl(const Options& /*options*/, const ImmutableCFOptions& /*ioptions*/, const MutableCFOptions& /*moptions*/, const BlockBasedTableOptions& table_options, const InternalKeyComparator& /*internal_comparator*/, const stl_wrappers::KVMap& kv_map) override { delete block_; block_ = nullptr; BlockBuilder builder(table_options.block_restart_interval); for (const auto kv : kv_map) { builder.Add(kv.first, kv.second); } // Open the block data_ = builder.Finish().ToString(); BlockContents contents; contents.data = data_; block_ = new Block(std::move(contents)); return Status::OK(); } InternalIterator* NewIterator( const SliceTransform* /*prefix_extractor*/) const override { return block_->NewDataIterator(comparator_, comparator_, kDisableGlobalSequenceNumber); } private: const Comparator* comparator_; std::string data_; Block* block_; BlockConstructor(); }; // A helper class that converts internal format keys into user keys class KeyConvertingIterator : public InternalIterator { public: explicit KeyConvertingIterator(InternalIterator* iter, bool arena_mode = false) : iter_(iter), arena_mode_(arena_mode) {} ~KeyConvertingIterator() override { if (arena_mode_) { iter_->~InternalIterator(); } else { delete iter_; } } bool Valid() const override { return iter_->Valid() && status_.ok(); } void Seek(const Slice& target) override { ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); std::string encoded; AppendInternalKey(&encoded, ikey); iter_->Seek(encoded); } void SeekForPrev(const Slice& target) override { ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); std::string encoded; AppendInternalKey(&encoded, ikey); iter_->SeekForPrev(encoded); } void SeekToFirst() override { iter_->SeekToFirst(); } void SeekToLast() override { iter_->SeekToLast(); } void Next() override { iter_->Next(); } void Prev() override { iter_->Prev(); } bool IsOutOfBound() override { return iter_->IsOutOfBound(); } Slice key() const override { assert(Valid()); ParsedInternalKey parsed_key; if (!ParseInternalKey(iter_->key(), &parsed_key)) { status_ = Status::Corruption("malformed internal key"); return Slice("corrupted key"); } return parsed_key.user_key; } Slice value() const override { return iter_->value(); } Status status() const override { return status_.ok() ? iter_->status() : status_; } private: mutable Status status_; InternalIterator* iter_; bool arena_mode_; // No copying allowed KeyConvertingIterator(const KeyConvertingIterator&); void operator=(const KeyConvertingIterator&); }; class TableConstructor: public Constructor { public: explicit TableConstructor(const Comparator* cmp, bool convert_to_internal_key = false, int level = -1, SequenceNumber largest_seqno = 0) : Constructor(cmp), largest_seqno_(largest_seqno), convert_to_internal_key_(convert_to_internal_key), level_(level) { env_ = ROCKSDB_NAMESPACE::Env::Default(); } ~TableConstructor() override { Reset(); } Status FinishImpl(const Options& options, const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, const BlockBasedTableOptions& /*table_options*/, const InternalKeyComparator& internal_comparator, const stl_wrappers::KVMap& kv_map) override { Reset(); soptions.use_mmap_reads = ioptions.allow_mmap_reads; file_writer_.reset(test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */)); std::unique_ptr builder; std::vector> int_tbl_prop_collector_factories; if (largest_seqno_ != 0) { // Pretend that it's an external file written by SstFileWriter. int_tbl_prop_collector_factories.emplace_back( new SstFileWriterPropertiesCollectorFactory(2 /* version */, 0 /* global_seqno*/)); } std::string column_family_name; builder.reset(ioptions.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, internal_comparator, &int_tbl_prop_collector_factories, options.compression, options.sample_for_compression, options.compression_opts, false /* skip_filters */, column_family_name, level_), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, file_writer_.get())); for (const auto kv : kv_map) { if (convert_to_internal_key_) { ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); std::string encoded; AppendInternalKey(&encoded, ikey); builder->Add(encoded, kv.second); } else { builder->Add(kv.first, kv.second); } EXPECT_TRUE(builder->status().ok()); } Status s = builder->Finish(); file_writer_->Flush(); EXPECT_TRUE(s.ok()) << s.ToString(); EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize()); // Open the table uniq_id_ = cur_uniq_id_++; file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource( TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads))); const bool kSkipFilters = true; const bool kImmortal = true; return ioptions.table_factory->NewTableReader( TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions, internal_comparator, !kSkipFilters, !kImmortal, false, level_, largest_seqno_, &block_cache_tracer_, moptions.write_buffer_size), std::move(file_reader_), TEST_GetSink()->contents().size(), &table_reader_); } InternalIterator* NewIterator( const SliceTransform* prefix_extractor) const override { ReadOptions ro; InternalIterator* iter = table_reader_->NewIterator( ro, prefix_extractor, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized); if (convert_to_internal_key_) { return new KeyConvertingIterator(iter); } else { return iter; } } uint64_t ApproximateOffsetOf(const Slice& key) const { if (convert_to_internal_key_) { InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); const Slice skey = ikey.Encode(); return table_reader_->ApproximateOffsetOf( skey, TableReaderCaller::kUncategorized); } return table_reader_->ApproximateOffsetOf( key, TableReaderCaller::kUncategorized); } virtual Status Reopen(const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions) { file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource( TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads))); return ioptions.table_factory->NewTableReader( TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions, *last_internal_key_), std::move(file_reader_), TEST_GetSink()->contents().size(), &table_reader_); } virtual TableReader* GetTableReader() { return table_reader_.get(); } bool AnywayDeleteIterator() const override { return convert_to_internal_key_; } void ResetTableReader() { table_reader_.reset(); } bool ConvertToInternalKey() { return convert_to_internal_key_; } test::StringSink* TEST_GetSink() { return ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter( file_writer_.get()); } BlockCacheTracer block_cache_tracer_; private: void Reset() { uniq_id_ = 0; table_reader_.reset(); file_writer_.reset(); file_reader_.reset(); } uint64_t uniq_id_; std::unique_ptr file_writer_; std::unique_ptr file_reader_; std::unique_ptr table_reader_; SequenceNumber largest_seqno_; bool convert_to_internal_key_; int level_; TableConstructor(); static uint64_t cur_uniq_id_; EnvOptions soptions; Env* env_; }; uint64_t TableConstructor::cur_uniq_id_ = 1; class MemTableConstructor: public Constructor { public: explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb) : Constructor(cmp), internal_comparator_(cmp), write_buffer_manager_(wb), table_factory_(new SkipListFactory) { options_.memtable_factory = table_factory_; ImmutableCFOptions ioptions(options_); memtable_ = new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_), wb, kMaxSequenceNumber, 0 /* column_family_id */); memtable_->Ref(); } ~MemTableConstructor() override { delete memtable_->Unref(); } Status FinishImpl(const Options&, const ImmutableCFOptions& ioptions, const MutableCFOptions& /*moptions*/, const BlockBasedTableOptions& /*table_options*/, const InternalKeyComparator& /*internal_comparator*/, const stl_wrappers::KVMap& kv_map) override { delete memtable_->Unref(); ImmutableCFOptions mem_ioptions(ioptions); memtable_ = new MemTable(internal_comparator_, mem_ioptions, MutableCFOptions(options_), write_buffer_manager_, kMaxSequenceNumber, 0 /* column_family_id */); memtable_->Ref(); int seq = 1; for (const auto kv : kv_map) { memtable_->Add(seq, kTypeValue, kv.first, kv.second); seq++; } return Status::OK(); } InternalIterator* NewIterator( const SliceTransform* /*prefix_extractor*/) const override { return new KeyConvertingIterator( memtable_->NewIterator(ReadOptions(), &arena_), true); } bool AnywayDeleteIterator() const override { return true; } bool IsArenaMode() const override { return true; } private: mutable Arena arena_; InternalKeyComparator internal_comparator_; Options options_; WriteBufferManager* write_buffer_manager_; MemTable* memtable_; std::shared_ptr table_factory_; }; class InternalIteratorFromIterator : public InternalIterator { public: explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {} bool Valid() const override { return it_->Valid(); } void Seek(const Slice& target) override { it_->Seek(target); } void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); } void SeekToFirst() override { it_->SeekToFirst(); } void SeekToLast() override { it_->SeekToLast(); } void Next() override { it_->Next(); } void Prev() override { it_->Prev(); } Slice key() const override { return it_->key(); } Slice value() const override { return it_->value(); } Status status() const override { return it_->status(); } private: std::unique_ptr it_; }; class DBConstructor: public Constructor { public: explicit DBConstructor(const Comparator* cmp) : Constructor(cmp), comparator_(cmp) { db_ = nullptr; NewDB(); } ~DBConstructor() override { delete db_; } Status FinishImpl(const Options& /*options*/, const ImmutableCFOptions& /*ioptions*/, const MutableCFOptions& /*moptions*/, const BlockBasedTableOptions& /*table_options*/, const InternalKeyComparator& /*internal_comparator*/, const stl_wrappers::KVMap& kv_map) override { delete db_; db_ = nullptr; NewDB(); for (const auto kv : kv_map) { WriteBatch batch; batch.Put(kv.first, kv.second); EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok()); } return Status::OK(); } InternalIterator* NewIterator( const SliceTransform* /*prefix_extractor*/) const override { return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions())); } DB* db() const override { return db_; } private: void NewDB() { std::string name = test::PerThreadDBPath("table_testdb"); Options options; options.comparator = comparator_; Status status = DestroyDB(name, options); ASSERT_TRUE(status.ok()) << status.ToString(); options.create_if_missing = true; options.error_if_exists = true; options.write_buffer_size = 10000; // Something small to force merging status = DB::Open(options, name, &db_); ASSERT_TRUE(status.ok()) << status.ToString(); } const Comparator* comparator_; DB* db_; }; enum TestType { BLOCK_BASED_TABLE_TEST, #ifndef ROCKSDB_LITE PLAIN_TABLE_SEMI_FIXED_PREFIX, PLAIN_TABLE_FULL_STR_PREFIX, PLAIN_TABLE_TOTAL_ORDER, #endif // !ROCKSDB_LITE BLOCK_TEST, MEMTABLE_TEST, DB_TEST }; struct TestArgs { TestType type; bool reverse_compare; int restart_interval; CompressionType compression; uint32_t compression_parallel_threads; uint32_t format_version; bool use_mmap; }; static std::vector GenerateArgList() { std::vector test_args; std::vector test_types = { BLOCK_BASED_TABLE_TEST, #ifndef ROCKSDB_LITE PLAIN_TABLE_SEMI_FIXED_PREFIX, PLAIN_TABLE_FULL_STR_PREFIX, PLAIN_TABLE_TOTAL_ORDER, #endif // !ROCKSDB_LITE BLOCK_TEST, MEMTABLE_TEST, DB_TEST}; std::vector reverse_compare_types = {false, true}; std::vector restart_intervals = {16, 1, 1024}; std::vector compression_parallel_threads = {1, 4}; // Only add compression if it is supported std::vector> compression_types; compression_types.emplace_back(kNoCompression, false); if (Snappy_Supported()) { compression_types.emplace_back(kSnappyCompression, false); } if (Zlib_Supported()) { compression_types.emplace_back(kZlibCompression, false); compression_types.emplace_back(kZlibCompression, true); } if (BZip2_Supported()) { compression_types.emplace_back(kBZip2Compression, false); compression_types.emplace_back(kBZip2Compression, true); } if (LZ4_Supported()) { compression_types.emplace_back(kLZ4Compression, false); compression_types.emplace_back(kLZ4Compression, true); compression_types.emplace_back(kLZ4HCCompression, false); compression_types.emplace_back(kLZ4HCCompression, true); } if (XPRESS_Supported()) { compression_types.emplace_back(kXpressCompression, false); compression_types.emplace_back(kXpressCompression, true); } if (ZSTD_Supported()) { compression_types.emplace_back(kZSTD, false); compression_types.emplace_back(kZSTD, true); } for (auto test_type : test_types) { for (auto reverse_compare : reverse_compare_types) { #ifndef ROCKSDB_LITE if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX || test_type == PLAIN_TABLE_FULL_STR_PREFIX || test_type == PLAIN_TABLE_TOTAL_ORDER) { // Plain table doesn't use restart index or compression. TestArgs one_arg; one_arg.type = test_type; one_arg.reverse_compare = reverse_compare; one_arg.restart_interval = restart_intervals[0]; one_arg.compression = compression_types[0].first; one_arg.compression_parallel_threads = 1; one_arg.use_mmap = true; test_args.push_back(one_arg); one_arg.use_mmap = false; test_args.push_back(one_arg); continue; } #endif // !ROCKSDB_LITE for (auto restart_interval : restart_intervals) { for (auto compression_type : compression_types) { for (auto num_threads : compression_parallel_threads) { TestArgs one_arg; one_arg.type = test_type; one_arg.reverse_compare = reverse_compare; one_arg.restart_interval = restart_interval; one_arg.compression = compression_type.first; one_arg.format_version = compression_type.second ? 2 : 1; one_arg.compression_parallel_threads = num_threads; one_arg.use_mmap = false; test_args.push_back(one_arg); } } } } } return test_args; } // In order to make all tests run for plain table format, including // those operating on empty keys, create a new prefix transformer which // return fixed prefix if the slice is not shorter than the prefix length, // and the full slice if it is shorter. class FixedOrLessPrefixTransform : public SliceTransform { private: const size_t prefix_len_; public: explicit FixedOrLessPrefixTransform(size_t prefix_len) : prefix_len_(prefix_len) { } const char* Name() const override { return "rocksdb.FixedPrefix"; } Slice Transform(const Slice& src) const override { assert(InDomain(src)); if (src.size() < prefix_len_) { return src; } return Slice(src.data(), prefix_len_); } bool InDomain(const Slice& /*src*/) const override { return true; } bool InRange(const Slice& dst) const override { return (dst.size() <= prefix_len_); } bool FullLengthEnabled(size_t* /*len*/) const override { return false; } }; class HarnessTest : public testing::Test { public: HarnessTest() : ioptions_(options_), moptions_(options_), constructor_(nullptr), write_buffer_(options_.db_write_buffer_size) {} void Init(const TestArgs& args) { delete constructor_; constructor_ = nullptr; options_ = Options(); options_.compression = args.compression; options_.compression_opts.parallel_threads = args.compression_parallel_threads; // Use shorter block size for tests to exercise block boundary // conditions more. if (args.reverse_compare) { options_.comparator = &reverse_key_comparator; } internal_comparator_.reset( new test::PlainInternalKeyComparator(options_.comparator)); support_prev_ = true; only_support_prefix_seek_ = false; options_.allow_mmap_reads = args.use_mmap; switch (args.type) { case BLOCK_BASED_TABLE_TEST: table_options_.flush_block_policy_factory.reset( new FlushBlockBySizePolicyFactory()); table_options_.block_size = 256; table_options_.block_restart_interval = args.restart_interval; table_options_.index_block_restart_interval = args.restart_interval; table_options_.format_version = args.format_version; options_.table_factory.reset( new BlockBasedTableFactory(table_options_)); constructor_ = new TableConstructor( options_.comparator, true /* convert_to_internal_key_ */); internal_comparator_.reset( new InternalKeyComparator(options_.comparator)); break; // Plain table is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE case PLAIN_TABLE_SEMI_FIXED_PREFIX: support_prev_ = false; only_support_prefix_seek_ = true; options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2)); options_.table_factory.reset(NewPlainTableFactory()); constructor_ = new TableConstructor( options_.comparator, true /* convert_to_internal_key_ */); internal_comparator_.reset( new InternalKeyComparator(options_.comparator)); break; case PLAIN_TABLE_FULL_STR_PREFIX: support_prev_ = false; only_support_prefix_seek_ = true; options_.prefix_extractor.reset(NewNoopTransform()); options_.table_factory.reset(NewPlainTableFactory()); constructor_ = new TableConstructor( options_.comparator, true /* convert_to_internal_key_ */); internal_comparator_.reset( new InternalKeyComparator(options_.comparator)); break; case PLAIN_TABLE_TOTAL_ORDER: support_prev_ = false; only_support_prefix_seek_ = false; options_.prefix_extractor = nullptr; { PlainTableOptions plain_table_options; plain_table_options.user_key_len = kPlainTableVariableLength; plain_table_options.bloom_bits_per_key = 0; plain_table_options.hash_table_ratio = 0; options_.table_factory.reset( NewPlainTableFactory(plain_table_options)); } constructor_ = new TableConstructor( options_.comparator, true /* convert_to_internal_key_ */); internal_comparator_.reset( new InternalKeyComparator(options_.comparator)); break; #endif // !ROCKSDB_LITE case BLOCK_TEST: table_options_.block_size = 256; options_.table_factory.reset( new BlockBasedTableFactory(table_options_)); constructor_ = new BlockConstructor(options_.comparator); break; case MEMTABLE_TEST: table_options_.block_size = 256; options_.table_factory.reset( new BlockBasedTableFactory(table_options_)); constructor_ = new MemTableConstructor(options_.comparator, &write_buffer_); break; case DB_TEST: table_options_.block_size = 256; options_.table_factory.reset( new BlockBasedTableFactory(table_options_)); constructor_ = new DBConstructor(options_.comparator); break; } ioptions_ = ImmutableCFOptions(options_); moptions_ = MutableCFOptions(options_); } ~HarnessTest() override { delete constructor_; } void Add(const std::string& key, const std::string& value) { constructor_->Add(key, value); } void Test(Random* rnd) { std::vector keys; stl_wrappers::KVMap data; constructor_->Finish(options_, ioptions_, moptions_, table_options_, *internal_comparator_, &keys, &data); TestForwardScan(keys, data); if (support_prev_) { TestBackwardScan(keys, data); } TestRandomAccess(rnd, keys, data); } void TestForwardScan(const std::vector& /*keys*/, const stl_wrappers::KVMap& data) { InternalIterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); iter->SeekToFirst(); for (stl_wrappers::KVMap::const_iterator model_iter = data.begin(); model_iter != data.end(); ++model_iter) { ASSERT_EQ(ToString(data, model_iter), ToString(iter)); iter->Next(); } ASSERT_TRUE(!iter->Valid()); if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { iter->~InternalIterator(); } else { delete iter; } } void TestBackwardScan(const std::vector& /*keys*/, const stl_wrappers::KVMap& data) { InternalIterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); iter->SeekToLast(); for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin(); model_iter != data.rend(); ++model_iter) { ASSERT_EQ(ToString(data, model_iter), ToString(iter)); iter->Prev(); } ASSERT_TRUE(!iter->Valid()); if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { iter->~InternalIterator(); } else { delete iter; } } void TestRandomAccess(Random* rnd, const std::vector& keys, const stl_wrappers::KVMap& data) { static const bool kVerbose = false; InternalIterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); stl_wrappers::KVMap::const_iterator model_iter = data.begin(); if (kVerbose) fprintf(stderr, "---\n"); for (int i = 0; i < 200; i++) { const int toss = rnd->Uniform(support_prev_ ? 5 : 3); switch (toss) { case 0: { if (iter->Valid()) { if (kVerbose) fprintf(stderr, "Next\n"); iter->Next(); ++model_iter; ASSERT_EQ(ToString(data, model_iter), ToString(iter)); } break; } case 1: { if (kVerbose) fprintf(stderr, "SeekToFirst\n"); iter->SeekToFirst(); model_iter = data.begin(); ASSERT_EQ(ToString(data, model_iter), ToString(iter)); break; } case 2: { std::string key = PickRandomKey(rnd, keys); model_iter = data.lower_bound(key); if (kVerbose) fprintf(stderr, "Seek '%s'\n", EscapeString(key).c_str()); iter->Seek(Slice(key)); ASSERT_EQ(ToString(data, model_iter), ToString(iter)); break; } case 3: { if (iter->Valid()) { if (kVerbose) fprintf(stderr, "Prev\n"); iter->Prev(); if (model_iter == data.begin()) { model_iter = data.end(); // Wrap around to invalid value } else { --model_iter; } ASSERT_EQ(ToString(data, model_iter), ToString(iter)); } break; } case 4: { if (kVerbose) fprintf(stderr, "SeekToLast\n"); iter->SeekToLast(); if (keys.empty()) { model_iter = data.end(); } else { std::string last = data.rbegin()->first; model_iter = data.lower_bound(last); } ASSERT_EQ(ToString(data, model_iter), ToString(iter)); break; } } } if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) { iter->~InternalIterator(); } else { delete iter; } } std::string ToString(const stl_wrappers::KVMap& data, const stl_wrappers::KVMap::const_iterator& it) { if (it == data.end()) { return "END"; } else { return "'" + it->first + "->" + it->second + "'"; } } std::string ToString(const stl_wrappers::KVMap& data, const stl_wrappers::KVMap::const_reverse_iterator& it) { if (it == data.rend()) { return "END"; } else { return "'" + it->first + "->" + it->second + "'"; } } std::string ToString(const InternalIterator* it) { if (!it->Valid()) { return "END"; } else { return "'" + it->key().ToString() + "->" + it->value().ToString() + "'"; } } std::string PickRandomKey(Random* rnd, const std::vector& keys) { if (keys.empty()) { return "foo"; } else { const int index = rnd->Uniform(static_cast(keys.size())); std::string result = keys[index]; switch (rnd->Uniform(support_prev_ ? 3 : 1)) { case 0: // Return an existing key break; case 1: { // Attempt to return something smaller than an existing key if (result.size() > 0 && result[result.size() - 1] > '\0' && (!only_support_prefix_seek_ || options_.prefix_extractor->Transform(result).size() < result.size())) { result[result.size() - 1]--; } break; } case 2: { // Return something larger than an existing key Increment(options_.comparator, &result); break; } } return result; } } // Returns nullptr if not running against a DB DB* db() const { return constructor_->db(); } void RandomizedHarnessTest(size_t part, size_t total) { std::vector args = GenerateArgList(); assert(part); assert(part <= total); for (size_t i = 0; i < args.size(); i++) { if ((i % total) + 1 != part) { continue; } Init(args[i]); Random rnd(test::RandomSeed() + 5); for (int num_entries = 0; num_entries < 2000; num_entries += (num_entries < 50 ? 1 : 200)) { for (int e = 0; e < num_entries; e++) { std::string v; Add(test::RandomKey(&rnd, rnd.Skewed(4)), test::RandomString(&rnd, rnd.Skewed(5), &v).ToString()); } Test(&rnd); } } } private: Options options_ = Options(); ImmutableCFOptions ioptions_; MutableCFOptions moptions_; BlockBasedTableOptions table_options_ = BlockBasedTableOptions(); Constructor* constructor_; WriteBufferManager write_buffer_; bool support_prev_; bool only_support_prefix_seek_; std::shared_ptr internal_comparator_; }; static bool Between(uint64_t val, uint64_t low, uint64_t high) { bool result = (val >= low) && (val <= high); if (!result) { fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", (unsigned long long)(val), (unsigned long long)(low), (unsigned long long)(high)); } return result; } // Tests against all kinds of tables class TableTest : public testing::Test { public: const InternalKeyComparator& GetPlainInternalComparator( const Comparator* comp) { if (!plain_internal_comparator) { plain_internal_comparator.reset( new test::PlainInternalKeyComparator(comp)); } return *plain_internal_comparator; } void IndexTest(BlockBasedTableOptions table_options); private: std::unique_ptr plain_internal_comparator; }; class GeneralTableTest : public TableTest {}; class BlockBasedTableTest : public TableTest, virtual public ::testing::WithParamInterface { public: BlockBasedTableTest() : format_(GetParam()) { env_ = ROCKSDB_NAMESPACE::Env::Default(); } BlockBasedTableOptions GetBlockBasedTableOptions() { BlockBasedTableOptions options; options.format_version = format_; return options; } void SetupTracingTest(TableConstructor* c) { test_path_ = test::PerThreadDBPath("block_based_table_tracing_test"); EXPECT_OK(env_->CreateDir(test_path_)); trace_file_path_ = test_path_ + "/block_cache_trace_file"; TraceOptions trace_opt; std::unique_ptr trace_writer; EXPECT_OK(NewFileTraceWriter(env_, EnvOptions(), trace_file_path_, &trace_writer)); c->block_cache_tracer_.StartTrace(env_, trace_opt, std::move(trace_writer)); { std::string user_key = "k01"; InternalKey internal_key(user_key, 0, kTypeValue); std::string encoded_key = internal_key.Encode().ToString(); c->Add(encoded_key, kDummyValue); } { std::string user_key = "k02"; InternalKey internal_key(user_key, 0, kTypeValue); std::string encoded_key = internal_key.Encode().ToString(); c->Add(encoded_key, kDummyValue); } } void VerifyBlockAccessTrace( TableConstructor* c, const std::vector& expected_records) { c->block_cache_tracer_.EndTrace(); { std::unique_ptr trace_reader; Status s = NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader); EXPECT_OK(s); BlockCacheTraceReader reader(std::move(trace_reader)); BlockCacheTraceHeader header; EXPECT_OK(reader.ReadHeader(&header)); uint32_t index = 0; while (s.ok()) { BlockCacheTraceRecord access; s = reader.ReadAccess(&access); if (!s.ok()) { break; } ASSERT_LT(index, expected_records.size()); EXPECT_NE("", access.block_key); EXPECT_EQ(access.block_type, expected_records[index].block_type); EXPECT_GT(access.block_size, 0); EXPECT_EQ(access.caller, expected_records[index].caller); EXPECT_EQ(access.no_insert, expected_records[index].no_insert); EXPECT_EQ(access.is_cache_hit, expected_records[index].is_cache_hit); // Get if (access.caller == TableReaderCaller::kUserGet) { EXPECT_EQ(access.referenced_key, expected_records[index].referenced_key); EXPECT_EQ(access.get_id, expected_records[index].get_id); EXPECT_EQ(access.get_from_user_specified_snapshot, expected_records[index].get_from_user_specified_snapshot); if (access.block_type == TraceType::kBlockTraceDataBlock) { EXPECT_GT(access.referenced_data_size, 0); EXPECT_GT(access.num_keys_in_block, 0); EXPECT_EQ(access.referenced_key_exist_in_block, expected_records[index].referenced_key_exist_in_block); } } else { EXPECT_EQ(access.referenced_key, ""); EXPECT_EQ(access.get_id, 0); EXPECT_TRUE(access.get_from_user_specified_snapshot == Boolean::kFalse); EXPECT_EQ(access.referenced_data_size, 0); EXPECT_EQ(access.num_keys_in_block, 0); EXPECT_TRUE(access.referenced_key_exist_in_block == Boolean::kFalse); } index++; } EXPECT_EQ(index, expected_records.size()); } EXPECT_OK(env_->DeleteFile(trace_file_path_)); EXPECT_OK(env_->DeleteDir(test_path_)); } protected: uint64_t IndexUncompressedHelper(bool indexCompress); private: uint32_t format_; Env* env_; std::string trace_file_path_; std::string test_path_; }; class PlainTableTest : public TableTest {}; class TablePropertyTest : public testing::Test {}; class BBTTailPrefetchTest : public TableTest {}; // The helper class to test the file checksum class FileChecksumTestHelper { public: FileChecksumTestHelper(bool convert_to_internal_key = false) : convert_to_internal_key_(convert_to_internal_key) { } ~FileChecksumTestHelper() {} void CreateWriteableFile() { sink_ = new test::StringSink(); file_writer_.reset(test::GetWritableFileWriter(sink_, "" /* don't care */)); } void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) { if (file_writer_ != nullptr) { file_writer_->TEST_SetFileChecksumGenerator(checksum_generator); } else { delete checksum_generator; } } WritableFileWriter* GetFileWriter() { return file_writer_.get(); } Status ResetTableBuilder(std::unique_ptr&& builder) { assert(builder != nullptr); table_builder_ = std::move(builder); return Status::OK(); } void AddKVtoKVMap(int num_entries) { Random rnd(test::RandomSeed()); for (int i = 0; i < num_entries; i++) { std::string v; test::RandomString(&rnd, 100, &v); kv_map_[test::RandomKey(&rnd, 20)] = v; } } Status WriteKVAndFlushTable() { for (const auto kv : kv_map_) { if (convert_to_internal_key_) { ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue); std::string encoded; AppendInternalKey(&encoded, ikey); table_builder_->Add(encoded, kv.second); } else { table_builder_->Add(kv.first, kv.second); } EXPECT_TRUE(table_builder_->status().ok()); } Status s = table_builder_->Finish(); file_writer_->Flush(); EXPECT_TRUE(s.ok()); EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize()); return s; } std::string GetFileChecksum() { file_writer_->Close(); return table_builder_->GetFileChecksum(); } const char* GetFileChecksumFuncName() { return table_builder_->GetFileChecksumFuncName(); } Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator, std::string* checksum) { assert(file_checksum_generator != nullptr); cur_uniq_id_ = checksum_uniq_id_++; test::StringSink* ss_rw = ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter( file_writer_.get()); file_reader_.reset(test::GetRandomAccessFileReader( new test::StringSource(ss_rw->contents()))); std::unique_ptr scratch(new char[2048]); Slice result; uint64_t offset = 0; Status s; s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), nullptr, false); if (!s.ok()) { return s; } while (result.size() != 0) { file_checksum_generator->Update(scratch.get(), result.size()); offset += static_cast(result.size()); s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), nullptr, false); if (!s.ok()) { return s; } } EXPECT_EQ(offset, static_cast(table_builder_->FileSize())); file_checksum_generator->Finalize(); *checksum = file_checksum_generator->GetChecksum(); return Status::OK(); } private: bool convert_to_internal_key_; uint64_t cur_uniq_id_; std::unique_ptr file_writer_; std::unique_ptr file_reader_; std::unique_ptr table_builder_; stl_wrappers::KVMap kv_map_; test::StringSink* sink_ = nullptr; static uint64_t checksum_uniq_id_; }; uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1; INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest, testing::Values(test::kLatestFormatVersion)); // This test serves as the living tutorial for the prefix scan of user collected // properties. TEST_F(TablePropertyTest, PrefixScanTest) { UserCollectedProperties props{{"num.111.1", "1"}, {"num.111.2", "2"}, {"num.111.3", "3"}, {"num.333.1", "1"}, {"num.333.2", "2"}, {"num.333.3", "3"}, {"num.555.1", "1"}, {"num.555.2", "2"}, {"num.555.3", "3"}, }; // prefixes that exist for (const std::string& prefix : {"num.111", "num.333", "num.555"}) { int num = 0; for (auto pos = props.lower_bound(prefix); pos != props.end() && pos->first.compare(0, prefix.size(), prefix) == 0; ++pos) { ++num; auto key = prefix + "." + ToString(num); ASSERT_EQ(key, pos->first); ASSERT_EQ(ToString(num), pos->second); } ASSERT_EQ(3, num); } // prefixes that don't exist for (const std::string& prefix : {"num.000", "num.222", "num.444", "num.666"}) { auto pos = props.lower_bound(prefix); ASSERT_TRUE(pos == props.end() || pos->first.compare(0, prefix.size(), prefix) != 0); } } // This test include all the basic checks except those for index size and block // size, which will be conducted in separated unit tests. TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) { TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("a1", "val1"); c.Add("b2", "val2"); c.Add("c3", "val3"); c.Add("d4", "val4"); c.Add("e5", "val5"); c.Add("f6", "val6"); c.Add("g7", "val7"); c.Add("h8", "val8"); c.Add("j9", "val9"); uint64_t diff_internal_user_bytes = 9 * 8; // 8 is seq size, 9 k-v totally std::vector keys; stl_wrappers::KVMap kvmap; Options options; options.compression = kNoCompression; options.statistics = CreateDBStatistics(); options.statistics->set_stats_level(StatsLevel::kAll); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_restart_interval = 1; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); ioptions.statistics = options.statistics.get(); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0); auto& props = *c.GetTableReader()->GetTableProperties(); ASSERT_EQ(kvmap.size(), props.num_entries); auto raw_key_size = kvmap.size() * 2ul; auto raw_value_size = kvmap.size() * 4ul; ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size); ASSERT_EQ(raw_value_size, props.raw_value_size); ASSERT_EQ(1ul, props.num_data_blocks); ASSERT_EQ("", props.filter_policy_name); // no filter policy is used // Verify data size. BlockBuilder block_builder(1); for (const auto& item : kvmap) { block_builder.Add(item.first, item.second); } Slice content = block_builder.Finish(); ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes, props.data_size); c.ResetTableReader(); } #ifdef SNAPPY uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) { TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); constexpr size_t kNumKeys = 10000; for (size_t k = 0; k < kNumKeys; ++k) { c.Add("key" + ToString(k), "val" + ToString(k)); } std::vector keys; stl_wrappers::KVMap kvmap; Options options; options.compression = kSnappyCompression; options.statistics = CreateDBStatistics(); options.statistics->set_stats_level(StatsLevel::kAll); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_restart_interval = 1; table_options.enable_index_compression = compressed; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); ioptions.statistics = options.statistics.get(); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); c.ResetTableReader(); return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); } TEST_P(BlockBasedTableTest, IndexUncompressed) { uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true); uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false); // tbl1_compressed_cnt should include 1 index block EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt); } #endif // SNAPPY TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) { TableConstructor c(&reverse_key_comparator); std::vector keys; stl_wrappers::KVMap kvmap; { Options options; options.compression = CompressionType::kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); auto& props = *c.GetTableReader()->GetTableProperties(); // Default comparator ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name); // No merge operator ASSERT_EQ("nullptr", props.merge_operator_name); // No prefix extractor ASSERT_EQ("nullptr", props.prefix_extractor_name); // No property collectors ASSERT_EQ("[]", props.property_collectors_names); // No filter policy is used ASSERT_EQ("", props.filter_policy_name); // Compression type == that set: ASSERT_EQ("NoCompression", props.compression_name); c.ResetTableReader(); } { Options options; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.comparator = &reverse_key_comparator; options.merge_operator = MergeOperators::CreateUInt64AddOperator(); options.prefix_extractor.reset(NewNoopTransform()); options.table_properties_collector_factories.emplace_back( new DummyPropertiesCollectorFactory1()); options.table_properties_collector_factories.emplace_back( new DummyPropertiesCollectorFactory2()); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); auto& props = *c.GetTableReader()->GetTableProperties(); ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name); ASSERT_EQ("UInt64AddOperator", props.merge_operator_name); ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name); ASSERT_EQ("[DummyPropertiesCollector1,DummyPropertiesCollector2]", props.property_collectors_names); ASSERT_EQ("", props.filter_policy_name); // no filter policy is used c.ResetTableReader(); } } TEST_P(BlockBasedTableTest, RangeDelBlock) { TableConstructor c(BytewiseComparator()); std::vector keys = {"1pika", "2chu"}; std::vector vals = {"p", "c"}; std::vector expected_tombstones = { {"1pika", "2chu", 0}, {"2chu", "c", 1}, {"2chu", "c", 0}, {"c", "p", 0}, }; for (int i = 0; i < 2; i++) { RangeTombstone t(keys[i], vals[i], i); std::pair p = t.Serialize(); c.Add(p.first.Encode().ToString(), p.second); } std::vector sorted_keys; stl_wrappers::KVMap kvmap; Options options; options.compression = kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_restart_interval = 1; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); std::unique_ptr internal_cmp( new InternalKeyComparator(options.comparator)); c.Finish(options, ioptions, moptions, table_options, *internal_cmp, &sorted_keys, &kvmap); for (int j = 0; j < 2; ++j) { std::unique_ptr iter( c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions())); if (j > 0) { // For second iteration, delete the table reader object and verify the // iterator can still access its metablock's range tombstones. c.ResetTableReader(); } ASSERT_FALSE(iter->Valid()); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); for (size_t i = 0; i < expected_tombstones.size(); i++) { ASSERT_TRUE(iter->Valid()); ParsedInternalKey parsed_key; ASSERT_TRUE(ParseInternalKey(iter->key(), &parsed_key)); RangeTombstone t(parsed_key, iter->value()); const auto& expected_t = expected_tombstones[i]; ASSERT_EQ(t.start_key_, expected_t.start_key_); ASSERT_EQ(t.end_key_, expected_t.end_key_); ASSERT_EQ(t.seq_, expected_t.seq_); iter->Next(); } ASSERT_TRUE(!iter->Valid()); } } TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) { TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("a1", "val1"); std::vector keys; stl_wrappers::KVMap kvmap; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.filter_policy.reset(NewBloomFilterPolicy(10)); Options options; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); auto& props = *c.GetTableReader()->GetTableProperties(); ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name); c.ResetTableReader(); } // // BlockBasedTableTest::PrefetchTest // void AssertKeysInCache(BlockBasedTable* table_reader, const std::vector& keys_in_cache, const std::vector& keys_not_in_cache, bool convert = false) { if (convert) { for (auto key : keys_in_cache) { InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); } for (auto key : keys_not_in_cache) { InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); } } else { for (auto key : keys_in_cache) { ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key)); } for (auto key : keys_not_in_cache) { ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key)); } } } void PrefetchRange(TableConstructor* c, Options* opt, BlockBasedTableOptions* table_options, const char* key_begin, const char* key_end, const std::vector& keys_in_cache, const std::vector& keys_not_in_cache, const Status expected_status = Status::OK()) { // reset the cache and reopen the table table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4); opt->table_factory.reset(NewBlockBasedTableFactory(*table_options)); const ImmutableCFOptions ioptions2(*opt); const MutableCFOptions moptions(*opt); ASSERT_OK(c->Reopen(ioptions2, moptions)); // prefetch auto* table_reader = dynamic_cast(c->GetTableReader()); Status s; std::unique_ptr begin, end; std::unique_ptr i_begin, i_end; if (key_begin != nullptr) { if (c->ConvertToInternalKey()) { i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue)); begin.reset(new Slice(i_begin->Encode())); } else { begin.reset(new Slice(key_begin)); } } if (key_end != nullptr) { if (c->ConvertToInternalKey()) { i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue)); end.reset(new Slice(i_end->Encode())); } else { end.reset(new Slice(key_end)); } } s = table_reader->Prefetch(begin.get(), end.get()); ASSERT_TRUE(s.code() == expected_status.code()); // assert our expectation in cache warmup AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache, c->ConvertToInternalKey()); c->ResetTableReader(); } TEST_P(BlockBasedTableTest, PrefetchTest) { // The purpose of this test is to test the prefetching operation built into // BlockBasedTable. Options opt; std::unique_ptr ikc; ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); opt.compression = kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_size = 1024; // big enough so we don't ever lose cached values. table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("k01", "hello"); c.Add("k02", "hello2"); c.Add("k03", std::string(10000, 'x')); c.Add("k04", std::string(200000, 'x')); c.Add("k05", std::string(300000, 'x')); c.Add("k06", "hello3"); c.Add("k07", std::string(100000, 'x')); std::vector keys; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(opt); const MutableCFOptions moptions(opt); c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); c.ResetTableReader(); // We get the following data spread : // // Data block Index // ======================== // [ k01 k02 k03 ] k03 // [ k04 ] k04 // [ k05 ] k05 // [ k06 k07 ] k07 // Simple PrefetchRange(&c, &opt, &table_options, /*key_range=*/"k01", "k05", /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"}, /*keys_not_in_cache=*/{"k06", "k07"}); PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"}, {"k04", "k05", "k06", "k07"}); // odd PrefetchRange(&c, &opt, &table_options, "a", "z", {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"}, {"k04", "k05", "k06", "k07"}); // Edge cases PrefetchRange(&c, &opt, &table_options, "k00", "k06", {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); PrefetchRange(&c, &opt, &table_options, "k00", "zzz", {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); // null keys PrefetchRange(&c, &opt, &table_options, nullptr, nullptr, {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {}); PrefetchRange(&c, &opt, &table_options, "k04", nullptr, {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"}); PrefetchRange(&c, &opt, &table_options, nullptr, "k05", {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"}); // invalid PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {}, Status::InvalidArgument(Slice("k06 "), Slice("k07"))); c.ResetTableReader(); } TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); for (int i = 0; i <= 5; ++i) { Options options; // Make each key/value an individual block table_options.block_size = 64; switch (i) { case 0: // Binary search index table_options.index_type = BlockBasedTableOptions::kBinarySearch; options.table_factory.reset(new BlockBasedTableFactory(table_options)); break; case 1: // Hash search index table_options.index_type = BlockBasedTableOptions::kHashSearch; options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(4)); break; case 2: // Hash search index with hash_index_allow_collision table_options.index_type = BlockBasedTableOptions::kHashSearch; table_options.hash_index_allow_collision = true; options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(4)); break; case 3: // Hash search index with filter policy table_options.index_type = BlockBasedTableOptions::kHashSearch; table_options.filter_policy.reset(NewBloomFilterPolicy(10)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(4)); break; case 4: // Two-level index table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; options.table_factory.reset(new BlockBasedTableFactory(table_options)); break; case 5: // Binary search with first key table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey; options.table_factory.reset(new BlockBasedTableFactory(table_options)); break; } TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("aaaa1", std::string('a', 56)); c.Add("bbaa1", std::string('a', 56)); c.Add("cccc1", std::string('a', 56)); c.Add("bbbb1", std::string('a', 56)); c.Add("baaa1", std::string('a', 56)); c.Add("abbb1", std::string('a', 56)); c.Add("cccc2", std::string('a', 56)); std::vector keys; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); auto props = c.GetTableReader()->GetTableProperties(); ASSERT_EQ(7u, props->num_data_blocks); auto* reader = c.GetTableReader(); ReadOptions ro; ro.total_order_seek = true; std::unique_ptr iter(reader->NewIterator( ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); iter->Seek(InternalKey("b", 0, kTypeValue).Encode()); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString()); iter->Seek(InternalKey("bb", 0, kTypeValue).Encode()); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString()); iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode()); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString()); iter->Next(); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString()); } } TEST_P(BlockBasedTableTest, NoopTransformSeek) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.filter_policy.reset(NewBloomFilterPolicy(10)); Options options; options.comparator = BytewiseComparator(); options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewNoopTransform()); TableConstructor c(options.comparator); // To tickle the PrefixMayMatch bug it is important that the // user-key is a single byte so that the index key exactly matches // the user-key. InternalKey key("a", 1, kTypeValue); c.Add(key.Encode().ToString(), "b"); std::vector keys; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); const InternalKeyComparator internal_comparator(options.comparator); c.Finish(options, ioptions, moptions, table_options, internal_comparator, &keys, &kvmap); auto* reader = c.GetTableReader(); for (int i = 0; i < 2; ++i) { ReadOptions ro; ro.total_order_seek = (i == 0); std::unique_ptr iter(reader->NewIterator( ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); iter->Seek(key.Encode()); ASSERT_OK(iter->status()); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString()); } } TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) { // if DB is opened with a prefix extractor of a different name, // prefix bloom is skipped when read the file BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.filter_policy.reset(NewBloomFilterPolicy(2)); table_options.whole_key_filtering = false; Options options; options.comparator = BytewiseComparator(); options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); TableConstructor c(options.comparator); InternalKey key("abcdefghijk", 1, kTypeValue); c.Add(key.Encode().ToString(), "test"); std::vector keys; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); const InternalKeyComparator internal_comparator(options.comparator); c.Finish(options, ioptions, moptions, table_options, internal_comparator, &keys, &kvmap); // TODO(Zhongyi): update test to use MutableCFOptions options.prefix_extractor.reset(NewFixedPrefixTransform(9)); const ImmutableCFOptions new_ioptions(options); const MutableCFOptions new_moptions(options); c.Reopen(new_ioptions, new_moptions); auto reader = c.GetTableReader(); std::unique_ptr db_iter(reader->NewIterator( ReadOptions(), new_moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); // Test point lookup // only one kv for (auto& kv : kvmap) { db_iter->Seek(kv.first); ASSERT_TRUE(db_iter->Valid()); ASSERT_OK(db_iter->status()); ASSERT_EQ(db_iter->key(), kv.first); ASSERT_EQ(db_iter->value(), kv.second); } } static std::string RandomString(Random* rnd, int len) { std::string r; test::RandomString(rnd, len, &r); return r; } void AddInternalKey(TableConstructor* c, const std::string& prefix, std::string value = "v", int /*suffix_len*/ = 800) { static Random rnd(1023); InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue); c->Add(k.Encode().ToString(), value); } void TableTest::IndexTest(BlockBasedTableOptions table_options) { TableConstructor c(BytewiseComparator()); // keys with prefix length 3, make sure the key/value is big enough to fill // one block AddInternalKey(&c, "0015"); AddInternalKey(&c, "0035"); AddInternalKey(&c, "0054"); AddInternalKey(&c, "0055"); AddInternalKey(&c, "0056"); AddInternalKey(&c, "0057"); AddInternalKey(&c, "0058"); AddInternalKey(&c, "0075"); AddInternalKey(&c, "0076"); AddInternalKey(&c, "0095"); std::vector keys; stl_wrappers::KVMap kvmap; Options options; options.prefix_extractor.reset(NewFixedPrefixTransform(3)); table_options.block_size = 1700; table_options.block_cache = NewLRUCache(1024, 4); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); std::unique_ptr comparator( new InternalKeyComparator(BytewiseComparator())); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, &kvmap); auto reader = c.GetTableReader(); auto props = reader->GetTableProperties(); ASSERT_EQ(5u, props->num_data_blocks); // TODO(Zhongyi): update test to use MutableCFOptions std::unique_ptr index_iter(reader->NewIterator( ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); // -- Find keys do not exist, but have common prefix. std::vector prefixes = {"001", "003", "005", "007", "009"}; std::vector lower_bound = { keys[0], keys[1], keys[2], keys[7], keys[9], }; // find the lower bound of the prefix for (size_t i = 0; i < prefixes.size(); ++i) { index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode()); ASSERT_OK(index_iter->status()); ASSERT_TRUE(index_iter->Valid()); // seek the first element in the block ASSERT_EQ(lower_bound[i], index_iter->key().ToString()); ASSERT_EQ("v", index_iter->value().ToString()); } // find the upper bound of prefixes std::vector upper_bound = {keys[1], keys[2], keys[7], keys[9], }; // find existing keys for (const auto& item : kvmap) { auto ukey = ExtractUserKey(item.first).ToString(); index_iter->Seek(ukey); // ASSERT_OK(regular_iter->status()); ASSERT_OK(index_iter->status()); // ASSERT_TRUE(regular_iter->Valid()); ASSERT_TRUE(index_iter->Valid()); ASSERT_EQ(item.first, index_iter->key().ToString()); ASSERT_EQ(item.second, index_iter->value().ToString()); } for (size_t i = 0; i < prefixes.size(); ++i) { // the key is greater than any existing keys. auto key = prefixes[i] + "9"; index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode()); ASSERT_TRUE(index_iter->status().ok() || index_iter->status().IsNotFound()); ASSERT_TRUE(!index_iter->status().IsNotFound() || !index_iter->Valid()); if (i == prefixes.size() - 1) { // last key ASSERT_TRUE(!index_iter->Valid()); } else { ASSERT_TRUE(index_iter->Valid()); // seek the first element in the block ASSERT_EQ(upper_bound[i], index_iter->key().ToString()); ASSERT_EQ("v", index_iter->value().ToString()); } } // find keys with prefix that don't match any of the existing prefixes. std::vector non_exist_prefixes = {"002", "004", "006", "008"}; for (const auto& prefix : non_exist_prefixes) { index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode()); // regular_iter->Seek(prefix); ASSERT_OK(index_iter->status()); // Seek to non-existing prefixes should yield either invalid, or a // key with prefix greater than the target. if (index_iter->Valid()) { Slice ukey = ExtractUserKey(index_iter->key()); Slice ukey_prefix = options.prefix_extractor->Transform(ukey); ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0); } } for (const auto& prefix : non_exist_prefixes) { index_iter->SeekForPrev(InternalKey(prefix, 0, kTypeValue).Encode()); // regular_iter->Seek(prefix); ASSERT_OK(index_iter->status()); // Seek to non-existing prefixes should yield either invalid, or a // key with prefix greater than the target. if (index_iter->Valid()) { Slice ukey = ExtractUserKey(index_iter->key()); Slice ukey_prefix = options.prefix_extractor->Transform(ukey); ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) > 0); } } { // Test reseek case. It should impact partitioned index more. ReadOptions ro; ro.total_order_seek = true; std::unique_ptr index_iter2(reader->NewIterator( ro, moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); // Things to cover in partitioned index: // 1. Both of Seek() and SeekToLast() has optimization to prevent // rereek leaf index block if it remains to the same one, and // they reuse the same variable. // 2. When Next() or Prev() is called, the block moves, so the // optimization should kick in only with the current one. index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode()); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4)); index_iter2->SeekToLast(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode()); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4)); index_iter2->SeekToLast(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); index_iter2->Prev(); ASSERT_TRUE(index_iter2->Valid()); index_iter2->Prev(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode()); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); index_iter2->Prev(); ASSERT_TRUE(index_iter2->Valid()); index_iter2->Prev(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); index_iter2->SeekToLast(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode()); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); index_iter2->Prev(); ASSERT_TRUE(index_iter2->Valid()); index_iter2->Prev(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); index_iter2->Seek(InternalKey("0075", 0, kTypeValue).Encode()); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4)); index_iter2->Next(); ASSERT_TRUE(index_iter2->Valid()); index_iter2->Next(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); index_iter2->SeekToLast(); ASSERT_TRUE(index_iter2->Valid()); ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4)); } c.ResetTableReader(); } TEST_P(BlockBasedTableTest, BinaryIndexTest) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.index_type = BlockBasedTableOptions::kBinarySearch; IndexTest(table_options); } TEST_P(BlockBasedTableTest, HashIndexTest) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.index_type = BlockBasedTableOptions::kHashSearch; IndexTest(table_options); } TEST_P(BlockBasedTableTest, PartitionIndexTest) { const int max_index_keys = 5; const int est_max_index_key_value_size = 32; const int est_max_index_size = max_index_keys * est_max_index_key_value_size; for (int i = 1; i <= est_max_index_size + 1; i++) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; table_options.metadata_block_size = i; IndexTest(table_options); } } TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) { std::unique_ptr comparator( new InternalKeyComparator(BytewiseComparator())); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); Options options; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); TableConstructor c(BytewiseComparator()); AddInternalKey(&c, "pika"); std::vector keys; stl_wrappers::KVMap kvmap; c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, &kvmap); ASSERT_EQ(1, keys.size()); auto reader = c.GetTableReader(); ReadOptions ropt; ropt.read_tier = ReadTier::kBlockCacheTier; std::unique_ptr iter(reader->NewIterator( ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); auto ikey = [](Slice user_key) { return InternalKey(user_key, 0, kTypeValue).Encode().ToString(); }; iter->Seek(ikey("pika")); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->status().IsIncomplete()); // This used to crash at some point. iter->Seek(ikey("pika")); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->status().IsIncomplete()); } TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey; IndexTest(table_options); } class CustomFlushBlockPolicy : public FlushBlockPolicyFactory, public FlushBlockPolicy { public: explicit CustomFlushBlockPolicy(std::vector keys_per_block) : keys_per_block_(keys_per_block) {} const char* Name() const override { return "table_test"; } FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&, const BlockBuilder&) const override { return new CustomFlushBlockPolicy(keys_per_block_); } bool Update(const Slice&, const Slice&) override { if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) { ++current_block_idx_; keys_in_current_block_ = 1; return true; } ++keys_in_current_block_; return false; } std::vector keys_per_block_; int current_block_idx_ = 0; int keys_in_current_block_ = 0; }; TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) { for (int use_first_key = 0; use_first_key < 2; ++use_first_key) { SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key)); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.index_type = use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey : BlockBasedTableOptions::kBinarySearch; table_options.block_cache = NewLRUCache(10000); // fits all blocks table_options.index_shortening = BlockBasedTableOptions::IndexShorteningMode::kNoShortening; table_options.flush_block_policy_factory = std::make_shared(std::vector{2, 1, 3, 2}); Options options; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.statistics = CreateDBStatistics(); Statistics* stats = options.statistics.get(); std::unique_ptr comparator( new InternalKeyComparator(BytewiseComparator())); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); TableConstructor c(BytewiseComparator()); // Block 0. AddInternalKey(&c, "aaaa", "v0"); AddInternalKey(&c, "aaac", "v1"); // Block 1. AddInternalKey(&c, "aaca", "v2"); // Block 2. AddInternalKey(&c, "caaa", "v3"); AddInternalKey(&c, "caac", "v4"); AddInternalKey(&c, "caae", "v5"); // Block 3. AddInternalKey(&c, "ccaa", "v6"); AddInternalKey(&c, "ccac", "v7"); // Write the file. std::vector keys; stl_wrappers::KVMap kvmap; c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, &kvmap); ASSERT_EQ(8, keys.size()); auto reader = c.GetTableReader(); auto props = reader->GetTableProperties(); ASSERT_EQ(4u, props->num_data_blocks); std::unique_ptr iter(reader->NewIterator( ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized, /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true)); // Shouldn't have read data blocks before iterator is seeked. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); auto ikey = [](Slice user_key) { return InternalKey(user_key, 0, kTypeValue).Encode().ToString(); }; // Seek to a key between blocks. If index contains first key, we shouldn't // read any data blocks until value is requested. iter->Seek(ikey("aaba")); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[2], iter->key().ToString()); EXPECT_EQ(use_first_key ? 0 : 1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v2", iter->value().ToString()); EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Seek to the middle of a block. The block should be read right away. iter->Seek(ikey("caab")); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[4], iter->key().ToString()); EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v4", iter->value().ToString()); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Seek to just before the same block and don't access value. // The iterator should keep pinning the block contents. iter->Seek(ikey("baaa")); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[3], iter->key().ToString()); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Seek to the same block again to check that the block is still pinned. iter->Seek(ikey("caae")); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[5], iter->key().ToString()); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v5", iter->value().ToString()); EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Step forward and fall through to the next block. Don't access value. iter->Next(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[6], iter->key().ToString()); EXPECT_EQ(use_first_key ? 2 : 3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Step forward again. Block should be read. iter->Next(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[7], iter->key().ToString()); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v7", iter->value().ToString()); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Step forward and reach the end. iter->Next(); EXPECT_FALSE(iter->Valid()); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Seek to a single-key block and step forward without accessing value. iter->Seek(ikey("aaca")); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[2], iter->key().ToString()); EXPECT_EQ(use_first_key ? 0 : 1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); iter->Next(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[3], iter->key().ToString()); EXPECT_EQ(use_first_key ? 1 : 2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v3", iter->value().ToString()); EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); // Seek between blocks and step back without accessing value. iter->Seek(ikey("aaca")); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[2], iter->key().ToString()); EXPECT_EQ(use_first_key ? 2 : 3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); iter->Prev(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[1], iter->key().ToString()); EXPECT_EQ(use_first_key ? 2 : 3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // All blocks are in cache now, there'll be no more misses ever. EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v1", iter->value().ToString()); // Next into the next block again. iter->Next(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[2], iter->key().ToString()); EXPECT_EQ(use_first_key ? 2 : 4, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Seek to first and step back without accessing value. iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[0], iter->key().ToString()); EXPECT_EQ(use_first_key ? 2 : 5, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); iter->Prev(); EXPECT_FALSE(iter->Valid()); EXPECT_EQ(use_first_key ? 2 : 5, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); // Do some SeekForPrev() and SeekToLast() just to cover all methods. iter->SeekForPrev(ikey("caad")); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[4], iter->key().ToString()); EXPECT_EQ(use_first_key ? 3 : 6, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v4", iter->value().ToString()); EXPECT_EQ(use_first_key ? 3 : 6, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); iter->SeekToLast(); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(keys[7], iter->key().ToString()); EXPECT_EQ(use_first_key ? 4 : 7, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("v7", iter->value().ToString()); EXPECT_EQ(use_first_key ? 4 : 7, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); c.ResetTableReader(); } } TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) { BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey; table_options.block_cache = NewLRUCache(10000); Options options; options.statistics = CreateDBStatistics(); Statistics* stats = options.statistics.get(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); std::unique_ptr comparator( new InternalKeyComparator(BytewiseComparator())); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false, /* level */ -1, /* largest_seqno */ 42); c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x"); c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y"); std::vector keys; stl_wrappers::KVMap kvmap; c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, &kvmap); ASSERT_EQ(2, keys.size()); auto reader = c.GetTableReader(); auto props = reader->GetTableProperties(); ASSERT_EQ(1u, props->num_data_blocks); std::unique_ptr iter(reader->NewIterator( ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized, /*compaction_readahead_size=*/0, /*allow_unprepared_value=*/true)); iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString()); ASSERT_TRUE(iter->Valid()); EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(), iter->key().ToString()); EXPECT_NE(keys[0], iter->key().ToString()); // Key should have been served from index, without reading data blocks. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); ASSERT_TRUE(iter->PrepareValue()); EXPECT_EQ("x", iter->value().ToString()); EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(), iter->key().ToString()); c.ResetTableReader(); } // It's very hard to figure out the index block size of a block accurately. // To make sure we get the index size, we just make sure as key number // grows, the filter block size also grows. TEST_P(BlockBasedTableTest, IndexSizeStat) { uint64_t last_index_size = 0; // we need to use random keys since the pure human readable texts // may be well compressed, resulting insignifcant change of index // block size. Random rnd(test::RandomSeed()); std::vector keys; for (int i = 0; i < 100; ++i) { keys.push_back(RandomString(&rnd, 10000)); } // Each time we load one more key to the table. the table index block // size is expected to be larger than last time's. for (size_t i = 1; i < keys.size(); ++i) { TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); for (size_t j = 0; j < i; ++j) { c.Add(keys[j], "val"); } std::vector ks; stl_wrappers::KVMap kvmap; Options options; options.compression = kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_restart_interval = 1; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &ks, &kvmap); auto index_size = c.GetTableReader()->GetTableProperties()->index_size; ASSERT_GT(index_size, last_index_size); last_index_size = index_size; c.ResetTableReader(); } } TEST_P(BlockBasedTableTest, NumBlockStat) { Random rnd(test::RandomSeed()); TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); Options options; options.compression = kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_restart_interval = 1; table_options.block_size = 1000; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); for (int i = 0; i < 10; ++i) { // the key/val are slightly smaller than block size, so that each block // holds roughly one key/value pair. c.Add(RandomString(&rnd, 900), "val"); } std::vector ks; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &ks, &kvmap); ASSERT_EQ(kvmap.size(), c.GetTableReader()->GetTableProperties()->num_data_blocks); c.ResetTableReader(); } TEST_P(BlockBasedTableTest, TracingGetTest) { TableConstructor c(BytewiseComparator()); Options options; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); options.create_if_missing = true; table_options.block_cache = NewLRUCache(1024 * 1024, 0); table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); SetupTracingTest(&c); std::vector keys; stl_wrappers::KVMap kvmap; ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); std::string user_key = "k01"; InternalKey internal_key(user_key, 0, kTypeValue); std::string encoded_key = internal_key.Encode().ToString(); for (uint32_t i = 1; i <= 2; i++) { PinnableSlice value; GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, user_key, &value, nullptr, nullptr, true, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, /*tracing_get_id=*/i); get_perf_context()->Reset(); ASSERT_OK(c.GetTableReader()->Get(ReadOptions(), encoded_key, &get_context, moptions.prefix_extractor.get())); ASSERT_EQ(get_context.State(), GetContext::kFound); ASSERT_EQ(value.ToString(), kDummyValue); } // Verify traces. std::vector expected_records; // The first two records should be prefetching index and filter blocks. BlockCacheTraceRecord record; record.block_type = TraceType::kBlockTraceIndexBlock; record.caller = TableReaderCaller::kPrefetch; record.is_cache_hit = Boolean::kFalse; record.no_insert = Boolean::kFalse; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceFilterBlock; expected_records.push_back(record); // Then we should have three records for one index, one filter, and one data // block access. record.get_id = 1; record.block_type = TraceType::kBlockTraceIndexBlock; record.caller = TableReaderCaller::kUserGet; record.get_from_user_specified_snapshot = Boolean::kFalse; record.referenced_key = encoded_key; record.referenced_key_exist_in_block = Boolean::kTrue; record.is_cache_hit = Boolean::kTrue; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceFilterBlock; expected_records.push_back(record); record.is_cache_hit = Boolean::kFalse; record.block_type = TraceType::kBlockTraceDataBlock; expected_records.push_back(record); // The second get should all observe cache hits. record.is_cache_hit = Boolean::kTrue; record.get_id = 2; record.block_type = TraceType::kBlockTraceIndexBlock; record.caller = TableReaderCaller::kUserGet; record.get_from_user_specified_snapshot = Boolean::kFalse; record.referenced_key = encoded_key; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceFilterBlock; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceDataBlock; expected_records.push_back(record); VerifyBlockAccessTrace(&c, expected_records); c.ResetTableReader(); } TEST_P(BlockBasedTableTest, TracingApproximateOffsetOfTest) { TableConstructor c(BytewiseComparator()); Options options; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); options.create_if_missing = true; table_options.block_cache = NewLRUCache(1024 * 1024, 0); table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); SetupTracingTest(&c); std::vector keys; stl_wrappers::KVMap kvmap; ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); for (uint32_t i = 1; i <= 2; i++) { std::string user_key = "k01"; InternalKey internal_key(user_key, 0, kTypeValue); std::string encoded_key = internal_key.Encode().ToString(); c.GetTableReader()->ApproximateOffsetOf( encoded_key, TableReaderCaller::kUserApproximateSize); } // Verify traces. std::vector expected_records; // The first two records should be prefetching index and filter blocks. BlockCacheTraceRecord record; record.block_type = TraceType::kBlockTraceIndexBlock; record.caller = TableReaderCaller::kPrefetch; record.is_cache_hit = Boolean::kFalse; record.no_insert = Boolean::kFalse; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceFilterBlock; expected_records.push_back(record); // Then we should have two records for only index blocks. record.block_type = TraceType::kBlockTraceIndexBlock; record.caller = TableReaderCaller::kUserApproximateSize; record.is_cache_hit = Boolean::kTrue; expected_records.push_back(record); expected_records.push_back(record); VerifyBlockAccessTrace(&c, expected_records); c.ResetTableReader(); } TEST_P(BlockBasedTableTest, TracingIterator) { TableConstructor c(BytewiseComparator()); Options options; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); options.create_if_missing = true; table_options.block_cache = NewLRUCache(1024 * 1024, 0); table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); SetupTracingTest(&c); std::vector keys; stl_wrappers::KVMap kvmap; ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); for (uint32_t i = 1; i <= 2; i++) { std::unique_ptr iter(c.GetTableReader()->NewIterator( ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUserIterator)); iter->SeekToFirst(); while (iter->Valid()) { iter->key(); iter->value(); iter->Next(); } ASSERT_OK(iter->status()); iter.reset(); } // Verify traces. std::vector expected_records; // The first two records should be prefetching index and filter blocks. BlockCacheTraceRecord record; record.block_type = TraceType::kBlockTraceIndexBlock; record.caller = TableReaderCaller::kPrefetch; record.is_cache_hit = Boolean::kFalse; record.no_insert = Boolean::kFalse; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceFilterBlock; expected_records.push_back(record); // Then we should have three records for index and two data block access. record.block_type = TraceType::kBlockTraceIndexBlock; record.caller = TableReaderCaller::kUserIterator; record.is_cache_hit = Boolean::kTrue; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceDataBlock; record.is_cache_hit = Boolean::kFalse; expected_records.push_back(record); expected_records.push_back(record); // When we iterate this file for the second time, we should observe all cache // hits. record.block_type = TraceType::kBlockTraceIndexBlock; record.is_cache_hit = Boolean::kTrue; expected_records.push_back(record); record.block_type = TraceType::kBlockTraceDataBlock; expected_records.push_back(record); expected_records.push_back(record); VerifyBlockAccessTrace(&c, expected_records); c.ResetTableReader(); } // A simple tool that takes the snapshot of block cache statistics. class BlockCachePropertiesSnapshot { public: explicit BlockCachePropertiesSnapshot(Statistics* statistics) { block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS); block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT); index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS); index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT); data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS); data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT); filter_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS); filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT); block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ); block_cache_bytes_write = statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE); } void AssertIndexBlockStat(int64_t expected_index_block_cache_miss, int64_t expected_index_block_cache_hit) { ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss); ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit); } void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss, int64_t expected_filter_block_cache_hit) { ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss); ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit); } // Check if the fetched props matches the expected ones. // TODO(kailiu) Use this only when you disabled filter policy! void AssertEqual(int64_t expected_index_block_cache_miss, int64_t expected_index_block_cache_hit, int64_t expected_data_block_cache_miss, int64_t expected_data_block_cache_hit) const { ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss); ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit); ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss); ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit); ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss, block_cache_miss); ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit, block_cache_hit); } int64_t GetCacheBytesRead() { return block_cache_bytes_read; } int64_t GetCacheBytesWrite() { return block_cache_bytes_write; } private: int64_t block_cache_miss = 0; int64_t block_cache_hit = 0; int64_t index_block_cache_miss = 0; int64_t index_block_cache_hit = 0; int64_t data_block_cache_miss = 0; int64_t data_block_cache_hit = 0; int64_t filter_block_cache_miss = 0; int64_t filter_block_cache_hit = 0; int64_t block_cache_bytes_read = 0; int64_t block_cache_bytes_write = 0; }; // Make sure, by default, index/filter blocks were pre-loaded (meaning we won't // use block cache to store them). TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) { Options options; options.create_if_missing = true; options.statistics = CreateDBStatistics(); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_cache = NewLRUCache(1024, 4); table_options.filter_policy.reset(NewBloomFilterPolicy(10)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); std::vector keys; stl_wrappers::KVMap kvmap; TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("key", "value"); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); // preloading filter/index blocks is enabled. auto reader = dynamic_cast(c.GetTableReader()); ASSERT_FALSE(reader->TEST_FilterBlockInCache()); ASSERT_FALSE(reader->TEST_IndexBlockInCache()); { // nothing happens in the beginning BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertIndexBlockStat(0, 0); props.AssertFilterBlockStat(0, 0); } { GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(), nullptr, nullptr, nullptr, true, nullptr, nullptr); // a hack that just to trigger BlockBasedTable::GetFilter. reader->Get(ReadOptions(), "non-exist-key", &get_context, moptions.prefix_extractor.get()); BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertIndexBlockStat(0, 0); props.AssertFilterBlockStat(0, 0); } } // Due to the difficulities of the intersaction between statistics, this test // only tests the case when "index block is put to block cache" TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) { // -- Table construction Options options; options.create_if_missing = true; options.statistics = CreateDBStatistics(); // Enable the cache for index/filter blocks BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); LRUCacheOptions co; co.capacity = 2048; co.num_shard_bits = 2; co.metadata_charge_policy = kDontChargeCacheMetadata; table_options.block_cache = NewLRUCache(co); table_options.cache_index_and_filter_blocks = true; options.table_factory.reset(new BlockBasedTableFactory(table_options)); std::vector keys; stl_wrappers::KVMap kvmap; TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("key", "value"); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); // preloading filter/index blocks is prohibited. auto* reader = dynamic_cast(c.GetTableReader()); ASSERT_FALSE(reader->TEST_FilterBlockInCache()); ASSERT_TRUE(reader->TEST_IndexBlockInCache()); // -- PART 1: Open with regular block cache. // Since block_cache is disabled, no cache activities will be involved. std::unique_ptr iter; int64_t last_cache_bytes_read = 0; // At first, no block will be accessed. { BlockCachePropertiesSnapshot props(options.statistics.get()); // index will be added to block cache. props.AssertEqual(1, // index block miss 0, 0, 0); ASSERT_EQ(props.GetCacheBytesRead(), 0); ASSERT_EQ(props.GetCacheBytesWrite(), static_cast(table_options.block_cache->GetUsage())); last_cache_bytes_read = props.GetCacheBytesRead(); } // Only index block will be accessed { iter.reset(c.NewIterator(moptions.prefix_extractor.get())); BlockCachePropertiesSnapshot props(options.statistics.get()); // NOTE: to help better highlight the "detla" of each ticker, I use // + to indicate the increment of changed // value; other numbers remain the same. props.AssertEqual(1, 0 + 1, // index block hit 0, 0); // Cache hit, bytes read from cache should increase ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read); ASSERT_EQ(props.GetCacheBytesWrite(), static_cast(table_options.block_cache->GetUsage())); last_cache_bytes_read = props.GetCacheBytesRead(); } // Only data block will be accessed { iter->SeekToFirst(); BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, 1, 0 + 1, // data block miss 0); // Cache miss, Bytes read from cache should not change ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read); ASSERT_EQ(props.GetCacheBytesWrite(), static_cast(table_options.block_cache->GetUsage())); last_cache_bytes_read = props.GetCacheBytesRead(); } // Data block will be in cache { iter.reset(c.NewIterator(moptions.prefix_extractor.get())); iter->SeekToFirst(); BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, 1 + 1, /* index block hit */ 1, 0 + 1 /* data block hit */); // Cache hit, bytes read from cache should increase ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read); ASSERT_EQ(props.GetCacheBytesWrite(), static_cast(table_options.block_cache->GetUsage())); } // release the iterator so that the block cache can reset correctly. iter.reset(); c.ResetTableReader(); // -- PART 2: Open with very small block cache // In this test, no block will ever get hit since the block cache is // too small to fit even one entry. table_options.block_cache = NewLRUCache(1, 4); options.statistics = CreateDBStatistics(); options.table_factory.reset(new BlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions2(options); const MutableCFOptions moptions2(options); c.Reopen(ioptions2, moptions2); { BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, // index block miss 0, 0, 0); // Cache miss, Bytes read from cache should not change ASSERT_EQ(props.GetCacheBytesRead(), 0); } { // Both index and data block get accessed. // It first cache index block then data block. But since the cache size // is only 1, index block will be purged after data block is inserted. iter.reset(c.NewIterator(moptions2.prefix_extractor.get())); BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1 + 1, // index block miss 0, 0, // data block miss 0); // Cache hit, bytes read from cache should increase ASSERT_EQ(props.GetCacheBytesRead(), 0); } { // SeekToFirst() accesses data block. With similar reason, we expect data // block's cache miss. iter->SeekToFirst(); BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(2, 0, 0 + 1, // data block miss 0); // Cache miss, Bytes read from cache should not change ASSERT_EQ(props.GetCacheBytesRead(), 0); } iter.reset(); c.ResetTableReader(); // -- PART 3: Open table with bloom filter enabled but not in SST file table_options.block_cache = NewLRUCache(4096, 4); table_options.cache_index_and_filter_blocks = false; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); TableConstructor c3(BytewiseComparator()); std::string user_key = "k01"; InternalKey internal_key(user_key, 0, kTypeValue); c3.Add(internal_key.Encode().ToString(), "hello"); ImmutableCFOptions ioptions3(options); MutableCFOptions moptions3(options); // Generate table without filter policy c3.Finish(options, ioptions3, moptions3, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); c3.ResetTableReader(); // Open table with filter policy table_options.filter_policy.reset(NewBloomFilterPolicy(1)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); options.statistics = CreateDBStatistics(); ImmutableCFOptions ioptions4(options); MutableCFOptions moptions4(options); ASSERT_OK(c3.Reopen(ioptions4, moptions4)); reader = dynamic_cast(c3.GetTableReader()); ASSERT_FALSE(reader->TEST_FilterBlockInCache()); PinnableSlice value; GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, user_key, &value, nullptr, nullptr, true, nullptr, nullptr); ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context, moptions4.prefix_extractor.get())); ASSERT_STREQ(value.data(), "hello"); BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertFilterBlockStat(0, 0); c3.ResetTableReader(); } void ValidateBlockSizeDeviation(int value, int expected) { BlockBasedTableOptions table_options; table_options.block_size_deviation = value; BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options); const BlockBasedTableOptions* normalized_table_options = (const BlockBasedTableOptions*)factory->GetOptions(); ASSERT_EQ(normalized_table_options->block_size_deviation, expected); delete factory; } void ValidateBlockRestartInterval(int value, int expected) { BlockBasedTableOptions table_options; table_options.block_restart_interval = value; BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options); const BlockBasedTableOptions* normalized_table_options = (const BlockBasedTableOptions*)factory->GetOptions(); ASSERT_EQ(normalized_table_options->block_restart_interval, expected); delete factory; } TEST_P(BlockBasedTableTest, InvalidOptions) { // invalid values for block_size_deviation (<0 or >100) are silently set to 0 ValidateBlockSizeDeviation(-10, 0); ValidateBlockSizeDeviation(-1, 0); ValidateBlockSizeDeviation(0, 0); ValidateBlockSizeDeviation(1, 1); ValidateBlockSizeDeviation(99, 99); ValidateBlockSizeDeviation(100, 100); ValidateBlockSizeDeviation(101, 0); ValidateBlockSizeDeviation(1000, 0); // invalid values for block_restart_interval (<1) are silently set to 1 ValidateBlockRestartInterval(-10, 1); ValidateBlockRestartInterval(-1, 1); ValidateBlockRestartInterval(0, 1); ValidateBlockRestartInterval(1, 1); ValidateBlockRestartInterval(2, 2); ValidateBlockRestartInterval(1000, 1000); } TEST_P(BlockBasedTableTest, BlockReadCountTest) { // bloom_filter_type = 0 -- block-based filter // bloom_filter_type = 0 -- full filter for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) { for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2; ++index_and_filter_in_cache) { Options options; options.create_if_missing = true; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_cache = NewLRUCache(1, 0); table_options.cache_index_and_filter_blocks = index_and_filter_in_cache; table_options.filter_policy.reset( NewBloomFilterPolicy(10, bloom_filter_type == 0)); options.table_factory.reset(new BlockBasedTableFactory(table_options)); std::vector keys; stl_wrappers::KVMap kvmap; TableConstructor c(BytewiseComparator()); std::string user_key = "k04"; InternalKey internal_key(user_key, 0, kTypeValue); std::string encoded_key = internal_key.Encode().ToString(); c.Add(encoded_key, "hello"); ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); // Generate table with filter policy c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); auto reader = c.GetTableReader(); PinnableSlice value; { GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, user_key, &value, nullptr, nullptr, true, nullptr, nullptr); get_perf_context()->Reset(); ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, moptions.prefix_extractor.get())); if (index_and_filter_in_cache) { // data, index and filter block ASSERT_EQ(get_perf_context()->block_read_count, 3); ASSERT_EQ(get_perf_context()->index_block_read_count, 1); ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); } else { // just the data block ASSERT_EQ(get_perf_context()->block_read_count, 1); } ASSERT_EQ(get_context.State(), GetContext::kFound); ASSERT_STREQ(value.data(), "hello"); } // Get non-existing key user_key = "does-not-exist"; internal_key = InternalKey(user_key, 0, kTypeValue); encoded_key = internal_key.Encode().ToString(); value.Reset(); { GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, user_key, &value, nullptr, nullptr, true, nullptr, nullptr); get_perf_context()->Reset(); ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context, moptions.prefix_extractor.get())); ASSERT_EQ(get_context.State(), GetContext::kNotFound); } if (index_and_filter_in_cache) { if (bloom_filter_type == 0) { // with block-based, we read index and then the filter ASSERT_EQ(get_perf_context()->block_read_count, 2); ASSERT_EQ(get_perf_context()->index_block_read_count, 1); ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); } else { // with full-filter, we read filter first and then we stop ASSERT_EQ(get_perf_context()->block_read_count, 1); ASSERT_EQ(get_perf_context()->filter_block_read_count, 1); } } else { // filter is already in memory and it figures out that the key doesn't // exist ASSERT_EQ(get_perf_context()->block_read_count, 0); } } } } TEST_P(BlockBasedTableTest, BlockCacheLeak) { // Check that when we reopen a table we don't lose access to blocks already // in the cache. This test checks whether the Table actually makes use of the // unique ID from the file. Options opt; std::unique_ptr ikc; ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); opt.compression = kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.block_size = 1024; // big enough so we don't ever lose cached values. table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("k01", "hello"); c.Add("k02", "hello2"); c.Add("k03", std::string(10000, 'x')); c.Add("k04", std::string(200000, 'x')); c.Add("k05", std::string(300000, 'x')); c.Add("k06", "hello3"); c.Add("k07", std::string(100000, 'x')); std::vector keys; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(opt); const MutableCFOptions moptions(opt); c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); std::unique_ptr iter( c.NewIterator(moptions.prefix_extractor.get())); iter->SeekToFirst(); while (iter->Valid()) { iter->key(); iter->value(); iter->Next(); } ASSERT_OK(iter->status()); iter.reset(); const ImmutableCFOptions ioptions1(opt); const MutableCFOptions moptions1(opt); ASSERT_OK(c.Reopen(ioptions1, moptions1)); auto table_reader = dynamic_cast(c.GetTableReader()); for (const std::string& key : keys) { InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); } c.ResetTableReader(); // rerun with different block cache table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4); opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); const ImmutableCFOptions ioptions2(opt); const MutableCFOptions moptions2(opt); ASSERT_OK(c.Reopen(ioptions2, moptions2)); table_reader = dynamic_cast(c.GetTableReader()); for (const std::string& key : keys) { InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); } c.ResetTableReader(); } namespace { class CustomMemoryAllocator : public MemoryAllocator { public: const char* Name() const override { return "CustomMemoryAllocator"; } void* Allocate(size_t size) override { ++numAllocations; auto ptr = new char[size + 16]; memcpy(ptr, "memory_allocator_", 16); // mangle first 16 bytes return reinterpret_cast(ptr + 16); } void Deallocate(void* p) override { ++numDeallocations; char* ptr = reinterpret_cast(p) - 16; delete[] ptr; } std::atomic numAllocations; std::atomic numDeallocations; }; } // namespace TEST_P(BlockBasedTableTest, MemoryAllocator) { auto custom_memory_allocator = std::make_shared(); { Options opt; std::unique_ptr ikc; ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); opt.compression = kNoCompression; BlockBasedTableOptions table_options; table_options.block_size = 1024; LRUCacheOptions lruOptions; lruOptions.memory_allocator = custom_memory_allocator; lruOptions.capacity = 16 * 1024 * 1024; lruOptions.num_shard_bits = 4; table_options.block_cache = NewLRUCache(std::move(lruOptions)); opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("k01", "hello"); c.Add("k02", "hello2"); c.Add("k03", std::string(10000, 'x')); c.Add("k04", std::string(200000, 'x')); c.Add("k05", std::string(300000, 'x')); c.Add("k06", "hello3"); c.Add("k07", std::string(100000, 'x')); std::vector keys; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(opt); const MutableCFOptions moptions(opt); c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); std::unique_ptr iter( c.NewIterator(moptions.prefix_extractor.get())); iter->SeekToFirst(); while (iter->Valid()) { iter->key(); iter->value(); iter->Next(); } ASSERT_OK(iter->status()); } // out of scope, block cache should have been deleted, all allocations // deallocated EXPECT_EQ(custom_memory_allocator->numAllocations.load(), custom_memory_allocator->numDeallocations.load()); // make sure that allocations actually happened through the cache allocator EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0); } // Test the file checksum of block based table TEST_P(BlockBasedTableTest, NoFileChecksum) { Options options; ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); std::unique_ptr comparator( new InternalKeyComparator(BytewiseComparator())); int level = 0; std::vector> int_tbl_prop_collector_factories; std::string column_family_name; FileChecksumTestHelper f(true); f.CreateWriteableFile(); std::unique_ptr builder; builder.reset(ioptions.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, *comparator, &int_tbl_prop_collector_factories, options.compression, options.sample_for_compression, options.compression_opts, false /* skip_filters */, column_family_name, level), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, f.GetFileWriter())); f.ResetTableBuilder(std::move(builder)); f.AddKVtoKVMap(1000); f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName); ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum); } TEST_P(BlockBasedTableTest, Crc32cFileChecksum) { FileChecksumGenCrc32cFactory* file_checksum_gen_factory = new FileChecksumGenCrc32cFactory(); Options options; options.file_checksum_gen_factory.reset(file_checksum_gen_factory); ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); std::unique_ptr comparator( new InternalKeyComparator(BytewiseComparator())); int level = 0; std::vector> int_tbl_prop_collector_factories; std::string column_family_name; FileChecksumGenContext gen_context; gen_context.file_name = "db/tmp"; std::unique_ptr checksum_crc32c_gen1 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); FileChecksumTestHelper f(true); f.CreateWriteableFile(); f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); std::unique_ptr builder; builder.reset(ioptions.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, *comparator, &int_tbl_prop_collector_factories, options.compression, options.sample_for_compression, options.compression_opts, false /* skip_filters */, column_family_name, level), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, f.GetFileWriter())); f.ResetTableBuilder(std::move(builder)); f.AddKVtoKVMap(1000); f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); std::unique_ptr checksum_crc32c_gen2 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); std::string checksum; ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); // Unit test the generator itself for schema stability std::unique_ptr checksum_crc32c_gen3 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); const char data[] = "here is some data"; checksum_crc32c_gen3->Update(data, sizeof(data)); checksum_crc32c_gen3->Finalize(); checksum = checksum_crc32c_gen3->GetChecksum(); ASSERT_STREQ(checksum.c_str(), "\345\245\277\110"); } // Plain table is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(PlainTableTest, BasicPlainTableProperties) { PlainTableOptions plain_table_options; plain_table_options.user_key_len = 8; plain_table_options.bloom_bits_per_key = 8; plain_table_options.hash_table_ratio = 0; PlainTableFactory factory(plain_table_options); test::StringSink sink; std::unique_ptr file_writer( test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */)); Options options; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); InternalKeyComparator ikc(options.comparator); std::vector> int_tbl_prop_collector_factories; std::string column_family_name; int unknown_level = -1; std::unique_ptr builder(factory.NewTableBuilder( TableBuilderOptions( ioptions, moptions, ikc, &int_tbl_prop_collector_factories, kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, column_family_name, unknown_level), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, file_writer.get())); for (char c = 'a'; c <= 'z'; ++c) { std::string key(8, c); key.append("\1 "); // PlainTable expects internal key structure std::string value(28, c + 42); builder->Add(key, value); } ASSERT_OK(builder->Finish()); file_writer->Flush(); test::StringSink* ss = ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(file_writer.get()); std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss->contents(), 72242, true))); TableProperties* props = nullptr; auto s = ReadTableProperties(file_reader.get(), ss->contents().size(), kPlainTableMagicNumber, ioptions, &props, true /* compression_type_missing */); std::unique_ptr props_guard(props); ASSERT_OK(s); ASSERT_EQ(0ul, props->index_size); ASSERT_EQ(0ul, props->filter_size); ASSERT_EQ(16ul * 26, props->raw_key_size); ASSERT_EQ(28ul * 26, props->raw_value_size); ASSERT_EQ(26ul, props->num_entries); ASSERT_EQ(1ul, props->num_data_blocks); } TEST_F(PlainTableTest, NoFileChecksum) { PlainTableOptions plain_table_options; plain_table_options.user_key_len = 20; plain_table_options.bloom_bits_per_key = 8; plain_table_options.hash_table_ratio = 0; PlainTableFactory factory(plain_table_options); Options options; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); InternalKeyComparator ikc(options.comparator); std::vector> int_tbl_prop_collector_factories; std::string column_family_name; int unknown_level = -1; FileChecksumTestHelper f(true); f.CreateWriteableFile(); std::unique_ptr builder(factory.NewTableBuilder( TableBuilderOptions( ioptions, moptions, ikc, &int_tbl_prop_collector_factories, kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, column_family_name, unknown_level), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, f.GetFileWriter())); f.ResetTableBuilder(std::move(builder)); f.AddKVtoKVMap(1000); f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), kUnknownFileChecksumFuncName); EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum); } TEST_F(PlainTableTest, Crc32cFileChecksum) { PlainTableOptions plain_table_options; plain_table_options.user_key_len = 20; plain_table_options.bloom_bits_per_key = 8; plain_table_options.hash_table_ratio = 0; PlainTableFactory factory(plain_table_options); FileChecksumGenCrc32cFactory* file_checksum_gen_factory = new FileChecksumGenCrc32cFactory(); Options options; options.file_checksum_gen_factory.reset(file_checksum_gen_factory); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); InternalKeyComparator ikc(options.comparator); std::vector> int_tbl_prop_collector_factories; std::string column_family_name; int unknown_level = -1; FileChecksumGenContext gen_context; gen_context.file_name = "db/tmp"; std::unique_ptr checksum_crc32c_gen1 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); FileChecksumTestHelper f(true); f.CreateWriteableFile(); f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); std::unique_ptr builder(factory.NewTableBuilder( TableBuilderOptions( ioptions, moptions, ikc, &int_tbl_prop_collector_factories, kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, column_family_name, unknown_level), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, f.GetFileWriter())); f.ResetTableBuilder(std::move(builder)); f.AddKVtoKVMap(1000); f.WriteKVAndFlushTable(); ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c"); std::unique_ptr checksum_crc32c_gen2 = options.file_checksum_gen_factory->CreateFileChecksumGenerator( gen_context); std::string checksum; ASSERT_OK(f.CalculateFileChecksum(checksum_crc32c_gen2.get(), &checksum)); EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); } #endif // !ROCKSDB_LITE TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) { TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("k01", "hello"); c.Add("k02", "hello2"); c.Add("k03", std::string(10000, 'x')); c.Add("k04", std::string(200000, 'x')); c.Add("k05", std::string(300000, 'x')); c.Add("k06", "hello3"); c.Add("k07", std::string(100000, 'x')); std::vector keys; stl_wrappers::KVMap kvmap; Options options; test::PlainInternalKeyComparator internal_comparator(options.comparator); options.compression = kNoCompression; BlockBasedTableOptions table_options; table_options.block_size = 1024; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, internal_comparator, &keys, &kvmap); ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); // k04 and k05 will be in two consecutive blocks, the index is // an arbitrary slice between k04 and k05, either before or after k04a ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); c.ResetTableReader(); } static void DoCompressionTest(CompressionType comp) { Random rnd(301); TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); std::string tmp; c.Add("k01", "hello"); c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); c.Add("k03", "hello3"); c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); std::vector keys; stl_wrappers::KVMap kvmap; Options options; test::PlainInternalKeyComparator ikc(options.comparator); options.compression = comp; BlockBasedTableOptions table_options; table_options.block_size = 1024; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap); ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3500)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3500)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 7000)); c.ResetTableReader(); } TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) { std::vector compression_state; if (!Snappy_Supported()) { fprintf(stderr, "skipping snappy compression tests\n"); } else { compression_state.push_back(kSnappyCompression); } if (!Zlib_Supported()) { fprintf(stderr, "skipping zlib compression tests\n"); } else { compression_state.push_back(kZlibCompression); } // TODO(kailiu) DoCompressionTest() doesn't work with BZip2. /* if (!BZip2_Supported()) { fprintf(stderr, "skipping bzip2 compression tests\n"); } else { compression_state.push_back(kBZip2Compression); } */ if (!LZ4_Supported()) { fprintf(stderr, "skipping lz4 and lz4hc compression tests\n"); } else { compression_state.push_back(kLZ4Compression); compression_state.push_back(kLZ4HCCompression); } if (!XPRESS_Supported()) { fprintf(stderr, "skipping xpress and xpress compression tests\n"); } else { compression_state.push_back(kXpressCompression); } for (auto state : compression_state) { DoCompressionTest(state); } } #ifndef ROCKSDB_VALGRIND_RUN // RandomizedHarnessTest is very slow for certain combination of arguments // Split into 8 pieces to reduce the time individual tests take. TEST_F(HarnessTest, Randomized1) { // part 1 out of 8 const size_t part = 1; const size_t total = 8; RandomizedHarnessTest(part, total); } TEST_F(HarnessTest, Randomized2) { // part 2 out of 8 const size_t part = 2; const size_t total = 8; RandomizedHarnessTest(part, total); } TEST_F(HarnessTest, Randomized3) { // part 3 out of 8 const size_t part = 3; const size_t total = 8; RandomizedHarnessTest(part, total); } TEST_F(HarnessTest, Randomized4) { // part 4 out of 8 const size_t part = 4; const size_t total = 8; RandomizedHarnessTest(part, total); } TEST_F(HarnessTest, Randomized5) { // part 5 out of 8 const size_t part = 5; const size_t total = 8; RandomizedHarnessTest(part, total); } TEST_F(HarnessTest, Randomized6) { // part 6 out of 8 const size_t part = 6; const size_t total = 8; RandomizedHarnessTest(part, total); } TEST_F(HarnessTest, Randomized7) { // part 7 out of 8 const size_t part = 7; const size_t total = 8; RandomizedHarnessTest(part, total); } TEST_F(HarnessTest, Randomized8) { // part 8 out of 8 const size_t part = 8; const size_t total = 8; RandomizedHarnessTest(part, total); } #ifndef ROCKSDB_LITE TEST_F(HarnessTest, RandomizedLongDB) { Random rnd(test::RandomSeed()); TestArgs args = {DB_TEST, false, 16, kNoCompression, 0, false}; Init(args); int num_entries = 100000; for (int e = 0; e < num_entries; e++) { std::string v; Add(test::RandomKey(&rnd, rnd.Skewed(4)), test::RandomString(&rnd, rnd.Skewed(5), &v).ToString()); } Test(&rnd); // We must have created enough data to force merging int files = 0; for (int level = 0; level < db()->NumberLevels(); level++) { std::string value; char name[100]; snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level); ASSERT_TRUE(db()->GetProperty(name, &value)); files += atoi(value.c_str()); } ASSERT_GT(files, 0); } #endif // ROCKSDB_LITE #endif // ROCKSDB_VALGRIND_RUN class MemTableTest : public testing::Test {}; TEST_F(MemTableTest, Simple) { InternalKeyComparator cmp(BytewiseComparator()); auto table_factory = std::make_shared(); Options options; options.memtable_factory = table_factory; ImmutableCFOptions ioptions(options); WriteBufferManager wb(options.db_write_buffer_size); MemTable* memtable = new MemTable(cmp, ioptions, MutableCFOptions(options), &wb, kMaxSequenceNumber, 0 /* column_family_id */); memtable->Ref(); WriteBatch batch; WriteBatchInternal::SetSequence(&batch, 100); batch.Put(std::string("k1"), std::string("v1")); batch.Put(std::string("k2"), std::string("v2")); batch.Put(std::string("k3"), std::string("v3")); batch.Put(std::string("largekey"), std::string("vlarge")); batch.DeleteRange(std::string("chi"), std::string("xigua")); batch.DeleteRange(std::string("begin"), std::string("end")); ColumnFamilyMemTablesDefault cf_mems_default(memtable); ASSERT_TRUE( WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr, nullptr) .ok()); for (int i = 0; i < 2; ++i) { Arena arena; ScopedArenaIterator arena_iter_guard; std::unique_ptr iter_guard; InternalIterator* iter; if (i == 0) { iter = memtable->NewIterator(ReadOptions(), &arena); arena_iter_guard.set(iter); } else { iter = memtable->NewRangeTombstoneIterator( ReadOptions(), kMaxSequenceNumber /* read_seq */); iter_guard.reset(iter); } if (iter == nullptr) { continue; } iter->SeekToFirst(); while (iter->Valid()) { fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(), iter->value().ToString().c_str()); iter->Next(); } } delete memtable->Unref(); } // Test the empty key TEST_F(HarnessTest, SimpleEmptyKey) { auto args = GenerateArgList(); for (const auto& arg : args) { Init(arg); Random rnd(test::RandomSeed() + 1); Add("", "v"); Test(&rnd); } } TEST_F(HarnessTest, SimpleSingle) { auto args = GenerateArgList(); for (const auto& arg : args) { Init(arg); Random rnd(test::RandomSeed() + 2); Add("abc", "v"); Test(&rnd); } } TEST_F(HarnessTest, SimpleMulti) { auto args = GenerateArgList(); for (const auto& arg : args) { Init(arg); Random rnd(test::RandomSeed() + 3); Add("abc", "v"); Add("abcd", "v"); Add("ac", "v2"); Test(&rnd); } } TEST_F(HarnessTest, SimpleSpecialKey) { auto args = GenerateArgList(); for (const auto& arg : args) { Init(arg); Random rnd(test::RandomSeed() + 4); Add("\xff\xff", "v3"); Test(&rnd); } } TEST_F(HarnessTest, FooterTests) { { // upconvert legacy block based std::string encoded; Footer footer(kLegacyBlockBasedTableMagicNumber, 0); BlockHandle meta_index(10, 5), index(20, 15); footer.set_metaindex_handle(meta_index); footer.set_index_handle(index); footer.EncodeTo(&encoded); Footer decoded_footer; Slice encoded_slice(encoded); decoded_footer.DecodeFrom(&encoded_slice); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_EQ(decoded_footer.checksum(), kCRC32c); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 0U); } { // xxhash block based std::string encoded; Footer footer(kBlockBasedTableMagicNumber, 1); BlockHandle meta_index(10, 5), index(20, 15); footer.set_metaindex_handle(meta_index); footer.set_index_handle(index); footer.set_checksum(kxxHash); footer.EncodeTo(&encoded); Footer decoded_footer; Slice encoded_slice(encoded); decoded_footer.DecodeFrom(&encoded_slice); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_EQ(decoded_footer.checksum(), kxxHash); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 1U); } { // xxhash64 block based std::string encoded; Footer footer(kBlockBasedTableMagicNumber, 1); BlockHandle meta_index(10, 5), index(20, 15); footer.set_metaindex_handle(meta_index); footer.set_index_handle(index); footer.set_checksum(kxxHash64); footer.EncodeTo(&encoded); Footer decoded_footer; Slice encoded_slice(encoded); decoded_footer.DecodeFrom(&encoded_slice); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_EQ(decoded_footer.checksum(), kxxHash64); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 1U); } // Plain table is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE { // upconvert legacy plain table std::string encoded; Footer footer(kLegacyPlainTableMagicNumber, 0); BlockHandle meta_index(10, 5), index(20, 15); footer.set_metaindex_handle(meta_index); footer.set_index_handle(index); footer.EncodeTo(&encoded); Footer decoded_footer; Slice encoded_slice(encoded); decoded_footer.DecodeFrom(&encoded_slice); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); ASSERT_EQ(decoded_footer.checksum(), kCRC32c); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 0U); } { // xxhash block based std::string encoded; Footer footer(kPlainTableMagicNumber, 1); BlockHandle meta_index(10, 5), index(20, 15); footer.set_metaindex_handle(meta_index); footer.set_index_handle(index); footer.set_checksum(kxxHash); footer.EncodeTo(&encoded); Footer decoded_footer; Slice encoded_slice(encoded); decoded_footer.DecodeFrom(&encoded_slice); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); ASSERT_EQ(decoded_footer.checksum(), kxxHash); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 1U); } #endif // !ROCKSDB_LITE { // version == 2 std::string encoded; Footer footer(kBlockBasedTableMagicNumber, 2); BlockHandle meta_index(10, 5), index(20, 15); footer.set_metaindex_handle(meta_index); footer.set_index_handle(index); footer.EncodeTo(&encoded); Footer decoded_footer; Slice encoded_slice(encoded); decoded_footer.DecodeFrom(&encoded_slice); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); ASSERT_EQ(decoded_footer.checksum(), kCRC32c); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 2U); } } class IndexBlockRestartIntervalTest : public TableTest, public ::testing::WithParamInterface> { public: static std::vector> GetRestartValues() { return {{-1, false}, {0, false}, {1, false}, {8, false}, {16, false}, {32, false}, {-1, true}, {0, true}, {1, true}, {8, true}, {16, true}, {32, true}}; } }; INSTANTIATE_TEST_CASE_P( IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest, ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues())); TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) { const int kKeysInTable = 10000; const int kKeySize = 100; const int kValSize = 500; const int index_block_restart_interval = std::get<0>(GetParam()); const bool value_delta_encoding = std::get<1>(GetParam()); Options options; BlockBasedTableOptions table_options; table_options.block_size = 64; // small block size to get big index block table_options.index_block_restart_interval = index_block_restart_interval; if (value_delta_encoding) { table_options.format_version = 4; } options.table_factory.reset(new BlockBasedTableFactory(table_options)); TableConstructor c(BytewiseComparator()); static Random rnd(301); for (int i = 0; i < kKeysInTable; i++) { InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue); c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize)); } std::vector keys; stl_wrappers::KVMap kvmap; std::unique_ptr comparator( new InternalKeyComparator(BytewiseComparator())); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_options, *comparator, &keys, &kvmap); auto reader = c.GetTableReader(); std::unique_ptr db_iter(reader->NewIterator( ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); // Test point lookup for (auto& kv : kvmap) { db_iter->Seek(kv.first); ASSERT_TRUE(db_iter->Valid()); ASSERT_OK(db_iter->status()); ASSERT_EQ(db_iter->key(), kv.first); ASSERT_EQ(db_iter->value(), kv.second); } // Test iterating auto kv_iter = kvmap.begin(); for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { ASSERT_EQ(db_iter->key(), kv_iter->first); ASSERT_EQ(db_iter->value(), kv_iter->second); kv_iter++; } ASSERT_EQ(kv_iter, kvmap.end()); c.ResetTableReader(); } class PrefixTest : public testing::Test { public: PrefixTest() : testing::Test() {} ~PrefixTest() override {} }; namespace { // A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform { public: ~TestPrefixExtractor() override{}; const char* Name() const override { return "TestPrefixExtractor"; } ROCKSDB_NAMESPACE::Slice Transform( const ROCKSDB_NAMESPACE::Slice& src) const override { assert(IsValid(src)); return ROCKSDB_NAMESPACE::Slice(src.data(), 3); } bool InDomain(const ROCKSDB_NAMESPACE::Slice& src) const override { assert(IsValid(src)); return true; } bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override { return true; } bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const { if (src.size() != 4) { return false; } if (src[0] != '[') { return false; } if (src[1] < '0' || src[1] > '9') { return false; } if (src[2] != ']') { return false; } if (src[3] < '0' || src[3] > '9') { return false; } return true; } }; } // namespace TEST_F(PrefixTest, PrefixAndWholeKeyTest) { ROCKSDB_NAMESPACE::Options options; options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleUniversal; options.num_levels = 20; options.create_if_missing = true; options.optimize_filters_for_hits = false; options.target_file_size_base = 268435456; options.prefix_extractor = std::make_shared(); ROCKSDB_NAMESPACE::BlockBasedTableOptions bbto; bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10)); bbto.block_size = 262144; bbto.whole_key_filtering = true; const std::string kDBPath = test::PerThreadDBPath("table_prefix_test"); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyDB(kDBPath, options); ROCKSDB_NAMESPACE::DB* db; ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); // Create a bunch of keys with 10 filters. for (int i = 0; i < 10; i++) { std::string prefix = "[" + std::to_string(i) + "]"; for (int j = 0; j < 10; j++) { std::string key = prefix + std::to_string(j); db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, "1"); } } // Trigger compaction. db->CompactRange(CompactRangeOptions(), nullptr, nullptr); delete db; // In the second round, turn whole_key_filtering off and expect // rocksdb still works. } /* * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in * the SST file any more. Instead, RocksDB deduces global_seqno from the * MANIFEST while reading from an SST. Therefore, it's not possible to test the * functionality of global_seqno in a single, isolated unit test without the * involvement of Version, VersionSet, etc. */ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); test::StringSink* sink = new test::StringSink(); std::unique_ptr file_writer( test::GetWritableFileWriter(sink, "" /* don't care */)); Options options; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); InternalKeyComparator ikc(options.comparator); std::vector> int_tbl_prop_collector_factories; int_tbl_prop_collector_factories.emplace_back( new SstFileWriterPropertiesCollectorFactory(2 /* version */, 0 /* global_seqno*/)); std::string column_family_name; std::unique_ptr builder(options.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, ikc, &int_tbl_prop_collector_factories, kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, column_family_name, -1), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, file_writer.get())); for (char c = 'a'; c <= 'z'; ++c) { std::string key(8, c); std::string value = key; InternalKey ik(key, 0, kTypeValue); builder->Add(ik.Encode(), value); } ASSERT_OK(builder->Finish()); file_writer->Flush(); test::RandomRWStringSink ss_rw(sink); uint32_t version; uint64_t global_seqno; uint64_t global_seqno_offset; // Helper function to get version, global_seqno, global_seqno_offset std::function GetVersionAndGlobalSeqno = [&]() { std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); TableProperties* props = nullptr; ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(), kBlockBasedTableMagicNumber, ioptions, &props, true /* compression_type_missing */)); UserCollectedProperties user_props = props->user_collected_properties; version = DecodeFixed32( user_props[ExternalSstFilePropertyNames::kVersion].c_str()); global_seqno = DecodeFixed64( user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str()); global_seqno_offset = props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno]; delete props; }; // Helper function to update the value of the global seqno in the file std::function SetGlobalSeqno = [&](uint64_t val) { std::string new_global_seqno; PutFixed64(&new_global_seqno, val); ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno)); }; // Helper function to get the contents of the table InternalIterator std::unique_ptr table_reader; std::function GetTableInternalIter = [&]() { std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); options.table_factory->NewTableReader( TableReaderOptions(ioptions, moptions.prefix_extractor.get(), EnvOptions(), ikc), std::move(file_reader), ss_rw.contents().size(), &table_reader); return table_reader->NewIterator( ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized); }; GetVersionAndGlobalSeqno(); ASSERT_EQ(2u, version); ASSERT_EQ(0u, global_seqno); InternalIterator* iter = GetTableInternalIter(); char current_c = 'a'; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ParsedInternalKey pik; ASSERT_TRUE(ParseInternalKey(iter->key(), &pik)); ASSERT_EQ(pik.type, ValueType::kTypeValue); ASSERT_EQ(pik.sequence, 0); ASSERT_EQ(pik.user_key, iter->value()); ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); current_c++; } ASSERT_EQ(current_c, 'z' + 1); delete iter; // Update global sequence number to 10 SetGlobalSeqno(10); GetVersionAndGlobalSeqno(); ASSERT_EQ(2u, version); ASSERT_EQ(10u, global_seqno); iter = GetTableInternalIter(); current_c = 'a'; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ParsedInternalKey pik; ASSERT_TRUE(ParseInternalKey(iter->key(), &pik)); ASSERT_EQ(pik.type, ValueType::kTypeValue); ASSERT_EQ(pik.sequence, 10); ASSERT_EQ(pik.user_key, iter->value()); ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); current_c++; } ASSERT_EQ(current_c, 'z' + 1); // Verify Seek for (char c = 'a'; c <= 'z'; c++) { std::string k = std::string(8, c); InternalKey ik(k, 10, kValueTypeForSeek); iter->Seek(ik.Encode()); ASSERT_TRUE(iter->Valid()); ParsedInternalKey pik; ASSERT_TRUE(ParseInternalKey(iter->key(), &pik)); ASSERT_EQ(pik.type, ValueType::kTypeValue); ASSERT_EQ(pik.sequence, 10); ASSERT_EQ(pik.user_key.ToString(), k); ASSERT_EQ(iter->value().ToString(), k); } delete iter; // Update global sequence number to 3 SetGlobalSeqno(3); GetVersionAndGlobalSeqno(); ASSERT_EQ(2u, version); ASSERT_EQ(3u, global_seqno); iter = GetTableInternalIter(); current_c = 'a'; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ParsedInternalKey pik; ASSERT_TRUE(ParseInternalKey(iter->key(), &pik)); ASSERT_EQ(pik.type, ValueType::kTypeValue); ASSERT_EQ(pik.sequence, 3); ASSERT_EQ(pik.user_key, iter->value()); ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c)); current_c++; } ASSERT_EQ(current_c, 'z' + 1); // Verify Seek for (char c = 'a'; c <= 'z'; c++) { std::string k = std::string(8, c); // seqno=4 is less than 3 so we still should get our key InternalKey ik(k, 4, kValueTypeForSeek); iter->Seek(ik.Encode()); ASSERT_TRUE(iter->Valid()); ParsedInternalKey pik; ASSERT_TRUE(ParseInternalKey(iter->key(), &pik)); ASSERT_EQ(pik.type, ValueType::kTypeValue); ASSERT_EQ(pik.sequence, 3); ASSERT_EQ(pik.user_key.ToString(), k); ASSERT_EQ(iter->value().ToString(), k); } delete iter; } TEST_P(BlockBasedTableTest, BlockAlignTest) { BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); bbto.block_align = true; test::StringSink* sink = new test::StringSink(); std::unique_ptr file_writer( test::GetWritableFileWriter(sink, "" /* don't care */)); Options options; options.compression = kNoCompression; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); InternalKeyComparator ikc(options.comparator); std::vector> int_tbl_prop_collector_factories; std::string column_family_name; std::unique_ptr builder(options.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, ikc, &int_tbl_prop_collector_factories, kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, column_family_name, -1), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, file_writer.get())); for (int i = 1; i <= 10000; ++i) { std::ostringstream ostr; ostr << std::setfill('0') << std::setw(5) << i; std::string key = ostr.str(); std::string value = "val"; InternalKey ik(key, 0, kTypeValue); builder->Add(ik.Encode(), value); } ASSERT_OK(builder->Finish()); file_writer->Flush(); test::RandomRWStringSink ss_rw(sink); std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); // Helper function to get version, global_seqno, global_seqno_offset std::function VerifyBlockAlignment = [&]() { TableProperties* props = nullptr; ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(), kBlockBasedTableMagicNumber, ioptions, &props, true /* compression_type_missing */)); uint64_t data_block_size = props->data_size / props->num_data_blocks; ASSERT_EQ(data_block_size, 4096); ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks); delete props; }; VerifyBlockAlignment(); // The below block of code verifies that we can read back the keys. Set // block_align to false when creating the reader to ensure we can flip between // the two modes without any issues std::unique_ptr table_reader; bbto.block_align = false; Options options2; options2.table_factory.reset(NewBlockBasedTableFactory(bbto)); ImmutableCFOptions ioptions2(options2); const MutableCFOptions moptions2(options2); ASSERT_OK(ioptions.table_factory->NewTableReader( TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(), EnvOptions(), GetPlainInternalComparator(options2.comparator)), std::move(file_reader), ss_rw.contents().size(), &table_reader)); std::unique_ptr db_iter(table_reader->NewIterator( ReadOptions(), moptions2.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); int expected_key = 1; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { std::ostringstream ostr; ostr << std::setfill('0') << std::setw(5) << expected_key++; std::string key = ostr.str(); std::string value = "val"; ASSERT_OK(db_iter->status()); ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key); ASSERT_EQ(db_iter->value().ToString(), value); } expected_key--; ASSERT_EQ(expected_key, 10000); table_reader.reset(); } TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); bbto.block_align = true; test::StringSink* sink = new test::StringSink(); std::unique_ptr file_writer( test::GetWritableFileWriter(sink, "" /* don't care */)); Options options; options.compression = kNoCompression; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); InternalKeyComparator ikc(options.comparator); std::vector> int_tbl_prop_collector_factories; std::string column_family_name; std::unique_ptr builder(options.table_factory->NewTableBuilder( TableBuilderOptions(ioptions, moptions, ikc, &int_tbl_prop_collector_factories, kNoCompression, 0 /* sample_for_compression */, CompressionOptions(), false /* skip_filters */, column_family_name, -1), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, file_writer.get())); for (int i = 1; i <= 10000; ++i) { std::ostringstream ostr; ostr << std::setfill('0') << std::setw(5) << i; std::string key = ostr.str(); std::string value = "val"; InternalKey ik(key, 0, kTypeValue); builder->Add(ik.Encode(), value); } ASSERT_OK(builder->Finish()); file_writer->Flush(); test::RandomRWStringSink ss_rw(sink); std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); { RandomAccessFileReader* file = file_reader.get(); uint64_t file_size = ss_rw.contents().size(); Footer footer; ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, &footer, kBlockBasedTableMagicNumber)); auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type, BlockContents* contents) { ReadOptions read_options; read_options.verify_checksums = false; PersistentCacheOptions cache_options; BlockFetcher block_fetcher( file, nullptr /* prefetch_buffer */, footer, read_options, handle, contents, ioptions, false /* decompress */, false /*maybe_compressed*/, block_type, UncompressionDict::GetEmptyDict(), cache_options); ASSERT_OK(block_fetcher.ReadBlockContents()); }; // -- Read metaindex block auto metaindex_handle = footer.metaindex_handle(); BlockContents metaindex_contents; BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex, &metaindex_contents); Block metaindex_block(std::move(metaindex_contents)); std::unique_ptr meta_iter(metaindex_block.NewDataIterator( BytewiseComparator(), BytewiseComparator(), kDisableGlobalSequenceNumber)); bool found_properties_block = true; ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block)); ASSERT_TRUE(found_properties_block); // -- Read properties block Slice v = meta_iter->value(); BlockHandle properties_handle; ASSERT_OK(properties_handle.DecodeFrom(&v)); BlockContents properties_contents; BlockFetchHelper(properties_handle, BlockType::kProperties, &properties_contents); Block properties_block(std::move(properties_contents)); ASSERT_EQ(properties_block.NumRestarts(), 1u); } } TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) { // The properties meta-block should come at the end since we always need to // read it when opening a file, unlike index/filter/other meta-blocks, which // are sometimes read depending on the user's configuration. This ordering // allows us to do a small readahead on the end of the file to read properties // and meta-index blocks with one I/O. TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("a1", "val1"); c.Add("b2", "val2"); c.Add("c3", "val3"); c.Add("d4", "val4"); c.Add("e5", "val5"); c.Add("f6", "val6"); c.Add("g7", "val7"); c.Add("h8", "val8"); c.Add("j9", "val9"); // write an SST file Options options; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.filter_policy.reset(NewBloomFilterPolicy( 8 /* bits_per_key */, false /* use_block_based_filter */)); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); ImmutableCFOptions ioptions(options); MutableCFOptions moptions(options); std::vector keys; stl_wrappers::KVMap kvmap; c.Finish(options, ioptions, moptions, table_options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); // get file reader test::StringSink* table_sink = c.TEST_GetSink(); std::unique_ptr table_reader{ test::GetRandomAccessFileReader( new test::StringSource(table_sink->contents(), 0 /* unique_id */, false /* allow_mmap_reads */))}; size_t table_size = table_sink->contents().size(); // read footer Footer footer; ASSERT_OK(ReadFooterFromFile(table_reader.get(), nullptr /* prefetch_buffer */, table_size, &footer, kBlockBasedTableMagicNumber)); // read metaindex auto metaindex_handle = footer.metaindex_handle(); BlockContents metaindex_contents; PersistentCacheOptions pcache_opts; BlockFetcher block_fetcher( table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(), metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, false /*maybe_compressed*/, BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), pcache_opts, nullptr /*memory_allocator*/); ASSERT_OK(block_fetcher.ReadBlockContents()); Block metaindex_block(std::move(metaindex_contents)); // verify properties block comes last std::unique_ptr metaindex_iter{ metaindex_block.NewDataIterator(options.comparator, options.comparator, kDisableGlobalSequenceNumber)}; uint64_t max_offset = 0; std::string key_at_max_offset; for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid(); metaindex_iter->Next()) { BlockHandle handle; Slice value = metaindex_iter->value(); ASSERT_OK(handle.DecodeFrom(&value)); if (handle.offset() > max_offset) { max_offset = handle.offset(); key_at_max_offset = metaindex_iter->key().ToString(); } } ASSERT_EQ(kPropertiesBlock, key_at_max_offset); // index handle is stored in footer rather than metaindex block, so need // separate logic to verify it comes before properties block. ASSERT_GT(max_offset, footer.index_handle().offset()); c.ResetTableReader(); } TEST_P(BlockBasedTableTest, BadOptions) { ROCKSDB_NAMESPACE::Options options; options.compression = kNoCompression; BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); bbto.block_size = 4000; bbto.block_align = true; const std::string kDBPath = test::PerThreadDBPath("block_based_table_bad_options_test"); options.table_factory.reset(NewBlockBasedTableFactory(bbto)); DestroyDB(kDBPath, options); ROCKSDB_NAMESPACE::DB* db; ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); bbto.block_size = 4096; options.compression = kSnappyCompression; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db)); } TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) { TailPrefetchStats tpstats; ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize()); tpstats.RecordEffectiveSize(size_t{1000}); tpstats.RecordEffectiveSize(size_t{1005}); tpstats.RecordEffectiveSize(size_t{1002}); ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize()); // One single super large value shouldn't influence much tpstats.RecordEffectiveSize(size_t{1002000}); tpstats.RecordEffectiveSize(size_t{999}); ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize()); ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize()); // Only history of 32 is kept for (int i = 0; i < 32; i++) { tpstats.RecordEffectiveSize(size_t{100}); } ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize()); // 16 large values and 16 small values. The result should be closer // to the small value as the algorithm. for (int i = 0; i < 16; i++) { tpstats.RecordEffectiveSize(size_t{1000}); } tpstats.RecordEffectiveSize(size_t{10}); tpstats.RecordEffectiveSize(size_t{20}); for (int i = 0; i < 6; i++) { tpstats.RecordEffectiveSize(size_t{100}); } ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize()); ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize()); } TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) { TailPrefetchStats tpstats; FilePrefetchBuffer buffer(nullptr, 0, 0, false, true); buffer.TryReadFromCache(500, 10, nullptr); buffer.TryReadFromCache(480, 10, nullptr); buffer.TryReadFromCache(490, 10, nullptr); ASSERT_EQ(480, buffer.min_offset_read()); } TEST_P(BlockBasedTableTest, DataBlockHashIndex) { const int kNumKeys = 500; const int kKeySize = 8; const int kValSize = 40; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); table_options.data_block_index_type = BlockBasedTableOptions::kDataBlockBinaryAndHash; Options options; options.comparator = BytewiseComparator(); options.table_factory.reset(new BlockBasedTableFactory(table_options)); TableConstructor c(options.comparator); static Random rnd(1048); for (int i = 0; i < kNumKeys; i++) { // padding one "0" to mark existent keys. std::string random_key(RandomString(&rnd, kKeySize - 1) + "1"); InternalKey k(random_key, 0, kTypeValue); c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize)); } std::vector keys; stl_wrappers::KVMap kvmap; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); const InternalKeyComparator internal_comparator(options.comparator); c.Finish(options, ioptions, moptions, table_options, internal_comparator, &keys, &kvmap); auto reader = c.GetTableReader(); std::unique_ptr seek_iter; seek_iter.reset(reader->NewIterator( ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); for (int i = 0; i < 2; ++i) { ReadOptions ro; // for every kv, we seek using two method: Get() and Seek() // Get() will use the SuffixIndexHash in Block. For non-existent key it // will invalidate the iterator // Seek() will use the default BinarySeek() in Block. So for non-existent // key it will land at the closest key that is large than target. // Search for existent keys for (auto& kv : kvmap) { if (i == 0) { // Search using Seek() seek_iter->Seek(kv.first); ASSERT_OK(seek_iter->status()); ASSERT_TRUE(seek_iter->Valid()); ASSERT_EQ(seek_iter->key(), kv.first); ASSERT_EQ(seek_iter->value(), kv.second); } else { // Search using Get() PinnableSlice value; std::string user_key = ExtractUserKey(kv.first).ToString(); GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, user_key, &value, nullptr, nullptr, true, nullptr, nullptr); ASSERT_OK(reader->Get(ro, kv.first, &get_context, moptions.prefix_extractor.get())); ASSERT_EQ(get_context.State(), GetContext::kFound); ASSERT_EQ(value, Slice(kv.second)); value.Reset(); } } // Search for non-existent keys for (auto& kv : kvmap) { std::string user_key = ExtractUserKey(kv.first).ToString(); user_key.back() = '0'; // make it non-existent key InternalKey internal_key(user_key, 0, kTypeValue); std::string encoded_key = internal_key.Encode().ToString(); if (i == 0) { // Search using Seek() seek_iter->Seek(encoded_key); ASSERT_OK(seek_iter->status()); if (seek_iter->Valid()) { ASSERT_TRUE(BytewiseComparator()->Compare( user_key, ExtractUserKey(seek_iter->key())) < 0); } } else { // Search using Get() PinnableSlice value; GetContext get_context(options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, user_key, &value, nullptr, nullptr, true, nullptr, nullptr); ASSERT_OK(reader->Get(ro, encoded_key, &get_context, moptions.prefix_extractor.get())); ASSERT_EQ(get_context.State(), GetContext::kNotFound); value.Reset(); } } } } // BlockBasedTableIterator should invalidate itself and return // OutOfBound()=true immediately after Seek(), to allow LevelIterator // filter out corresponding level. TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) { TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/); c.Add("foo", "v1"); std::vector keys; stl_wrappers::KVMap kvmap; Options options; BlockBasedTableOptions table_opt(GetBlockBasedTableOptions()); options.table_factory.reset(NewBlockBasedTableFactory(table_opt)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_opt, GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap); auto* reader = c.GetTableReader(); ReadOptions read_opt; std::string upper_bound = "bar"; Slice upper_bound_slice(upper_bound); read_opt.iterate_upper_bound = &upper_bound_slice; std::unique_ptr iter; iter.reset(new KeyConvertingIterator(reader->NewIterator( read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized))); iter->SeekToFirst(); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->IsOutOfBound()); iter.reset(new KeyConvertingIterator(reader->NewIterator( read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized))); iter->Seek("foo"); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->IsOutOfBound()); } // BlockBasedTableIterator should invalidate itself and return // OutOfBound()=true after Next(), if it finds current index key is no smaller // than upper bound, unless it is pointing to the last data block. TEST_P(BlockBasedTableTest, OutOfBoundOnNext) { TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/); c.Add("bar", "v"); c.Add("foo", "v"); std::vector keys; stl_wrappers::KVMap kvmap; Options options; BlockBasedTableOptions table_opt(GetBlockBasedTableOptions()); table_opt.flush_block_policy_factory = std::make_shared(); options.table_factory.reset(NewBlockBasedTableFactory(table_opt)); const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); c.Finish(options, ioptions, moptions, table_opt, GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap); auto* reader = c.GetTableReader(); ReadOptions read_opt; std::string ub1 = "bar_after"; Slice ub_slice1(ub1); read_opt.iterate_upper_bound = &ub_slice1; std::unique_ptr iter; iter.reset(new KeyConvertingIterator(reader->NewIterator( read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized))); iter->Seek("bar"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("bar", iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_TRUE(iter->IsOutOfBound()); std::string ub2 = "foo_after"; Slice ub_slice2(ub2); read_opt.iterate_upper_bound = &ub_slice2; iter.reset(new KeyConvertingIterator(reader->NewIterator( read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized))); iter->Seek("foo"); ASSERT_TRUE(iter->Valid()); ASSERT_EQ("foo", iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); ASSERT_FALSE(iter->IsOutOfBound()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/table/two_level_iterator.cc000066400000000000000000000145141370372246700205610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/two_level_iterator.h" #include "db/pinned_iterators_manager.h" #include "memory/arena.h" #include "rocksdb/options.h" #include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/format.h" namespace ROCKSDB_NAMESPACE { namespace { class TwoLevelIndexIterator : public InternalIteratorBase { public: explicit TwoLevelIndexIterator( TwoLevelIteratorState* state, InternalIteratorBase* first_level_iter); ~TwoLevelIndexIterator() override { first_level_iter_.DeleteIter(false /* is_arena_mode */); second_level_iter_.DeleteIter(false /* is_arena_mode */); delete state_; } void Seek(const Slice& target) override; void SeekForPrev(const Slice& target) override; void SeekToFirst() override; void SeekToLast() override; void Next() override; void Prev() override; bool Valid() const override { return second_level_iter_.Valid(); } Slice key() const override { assert(Valid()); return second_level_iter_.key(); } IndexValue value() const override { assert(Valid()); return second_level_iter_.value(); } Status status() const override { if (!first_level_iter_.status().ok()) { assert(second_level_iter_.iter() == nullptr); return first_level_iter_.status(); } else if (second_level_iter_.iter() != nullptr && !second_level_iter_.status().ok()) { return second_level_iter_.status(); } else { return status_; } } void SetPinnedItersMgr( PinnedIteratorsManager* /*pinned_iters_mgr*/) override {} bool IsKeyPinned() const override { return false; } bool IsValuePinned() const override { return false; } private: void SaveError(const Status& s) { if (status_.ok() && !s.ok()) status_ = s; } void SkipEmptyDataBlocksForward(); void SkipEmptyDataBlocksBackward(); void SetSecondLevelIterator(InternalIteratorBase* iter); void InitDataBlock(); TwoLevelIteratorState* state_; IteratorWrapperBase first_level_iter_; IteratorWrapperBase second_level_iter_; // May be nullptr Status status_; // If second_level_iter is non-nullptr, then "data_block_handle_" holds the // "index_value" passed to block_function_ to create the second_level_iter. BlockHandle data_block_handle_; }; TwoLevelIndexIterator::TwoLevelIndexIterator( TwoLevelIteratorState* state, InternalIteratorBase* first_level_iter) : state_(state), first_level_iter_(first_level_iter) {} void TwoLevelIndexIterator::Seek(const Slice& target) { first_level_iter_.Seek(target); InitDataBlock(); if (second_level_iter_.iter() != nullptr) { second_level_iter_.Seek(target); } SkipEmptyDataBlocksForward(); } void TwoLevelIndexIterator::SeekForPrev(const Slice& target) { first_level_iter_.Seek(target); InitDataBlock(); if (second_level_iter_.iter() != nullptr) { second_level_iter_.SeekForPrev(target); } if (!Valid()) { if (!first_level_iter_.Valid() && first_level_iter_.status().ok()) { first_level_iter_.SeekToLast(); InitDataBlock(); if (second_level_iter_.iter() != nullptr) { second_level_iter_.SeekForPrev(target); } } SkipEmptyDataBlocksBackward(); } } void TwoLevelIndexIterator::SeekToFirst() { first_level_iter_.SeekToFirst(); InitDataBlock(); if (second_level_iter_.iter() != nullptr) { second_level_iter_.SeekToFirst(); } SkipEmptyDataBlocksForward(); } void TwoLevelIndexIterator::SeekToLast() { first_level_iter_.SeekToLast(); InitDataBlock(); if (second_level_iter_.iter() != nullptr) { second_level_iter_.SeekToLast(); } SkipEmptyDataBlocksBackward(); } void TwoLevelIndexIterator::Next() { assert(Valid()); second_level_iter_.Next(); SkipEmptyDataBlocksForward(); } void TwoLevelIndexIterator::Prev() { assert(Valid()); second_level_iter_.Prev(); SkipEmptyDataBlocksBackward(); } void TwoLevelIndexIterator::SkipEmptyDataBlocksForward() { while (second_level_iter_.iter() == nullptr || (!second_level_iter_.Valid() && second_level_iter_.status().ok())) { // Move to next block if (!first_level_iter_.Valid()) { SetSecondLevelIterator(nullptr); return; } first_level_iter_.Next(); InitDataBlock(); if (second_level_iter_.iter() != nullptr) { second_level_iter_.SeekToFirst(); } } } void TwoLevelIndexIterator::SkipEmptyDataBlocksBackward() { while (second_level_iter_.iter() == nullptr || (!second_level_iter_.Valid() && second_level_iter_.status().ok())) { // Move to next block if (!first_level_iter_.Valid()) { SetSecondLevelIterator(nullptr); return; } first_level_iter_.Prev(); InitDataBlock(); if (second_level_iter_.iter() != nullptr) { second_level_iter_.SeekToLast(); } } } void TwoLevelIndexIterator::SetSecondLevelIterator( InternalIteratorBase* iter) { InternalIteratorBase* old_iter = second_level_iter_.Set(iter); delete old_iter; } void TwoLevelIndexIterator::InitDataBlock() { if (!first_level_iter_.Valid()) { SetSecondLevelIterator(nullptr); } else { BlockHandle handle = first_level_iter_.value().handle; if (second_level_iter_.iter() != nullptr && !second_level_iter_.status().IsIncomplete() && handle.offset() == data_block_handle_.offset()) { // second_level_iter is already constructed with this iterator, so // no need to change anything } else { InternalIteratorBase* iter = state_->NewSecondaryIterator(handle); data_block_handle_ = handle; SetSecondLevelIterator(iter); } } } } // namespace InternalIteratorBase* NewTwoLevelIterator( TwoLevelIteratorState* state, InternalIteratorBase* first_level_iter) { return new TwoLevelIndexIterator(state, first_level_iter); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/table/two_level_iterator.h000066400000000000000000000033021370372246700204140ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include "rocksdb/iterator.h" #include "rocksdb/env.h" #include "table/iterator_wrapper.h" namespace ROCKSDB_NAMESPACE { struct ReadOptions; class InternalKeyComparator; // TwoLevelIteratorState expects iterators are not created using the arena struct TwoLevelIteratorState { TwoLevelIteratorState() {} virtual ~TwoLevelIteratorState() {} virtual InternalIteratorBase* NewSecondaryIterator( const BlockHandle& handle) = 0; }; // Return a new two level iterator. A two-level iterator contains an // index iterator whose values point to a sequence of blocks where // each block is itself a sequence of key,value pairs. The returned // two-level iterator yields the concatenation of all key/value pairs // in the sequence of blocks. Takes ownership of "index_iter" and // will delete it when no longer needed. // // Uses a supplied function to convert an index_iter value into // an iterator over the contents of the corresponding block. // Note: this function expects first_level_iter was not created using the arena extern InternalIteratorBase* NewTwoLevelIterator( TwoLevelIteratorState* state, InternalIteratorBase* first_level_iter); } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/000077500000000000000000000000001370372246700152615ustar00rootroot00000000000000rocksdb-6.11.4/test_util/fault_injection_test_env.cc000066400000000000000000000313511370372246700226570ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright 2014 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // This test uses a custom Env to keep track of the state of a filesystem as of // the last "sync". It then checks for data loss errors by purposely dropping // file data (or entire files) not protected by a "sync". #include "test_util/fault_injection_test_env.h" #include #include namespace ROCKSDB_NAMESPACE { // Assume a filename, and not a directory name like "/foo/bar/" std::string GetDirName(const std::string filename) { size_t found = filename.find_last_of("/\\"); if (found == std::string::npos) { return ""; } else { return filename.substr(0, found); } } // A basic file truncation function suitable for this test. Status Truncate(Env* env, const std::string& filename, uint64_t length) { std::unique_ptr orig_file; const EnvOptions options; Status s = env->NewSequentialFile(filename, &orig_file, options); if (!s.ok()) { fprintf(stderr, "Cannot open file %s for truncation: %s\n", filename.c_str(), s.ToString().c_str()); return s; } std::unique_ptr scratch(new char[length]); ROCKSDB_NAMESPACE::Slice result; s = orig_file->Read(length, &result, scratch.get()); #ifdef OS_WIN orig_file.reset(); #endif if (s.ok()) { std::string tmp_name = GetDirName(filename) + "/truncate.tmp"; std::unique_ptr tmp_file; s = env->NewWritableFile(tmp_name, &tmp_file, options); if (s.ok()) { s = tmp_file->Append(result); if (s.ok()) { s = env->RenameFile(tmp_name, filename); } else { fprintf(stderr, "Cannot rename file %s to %s: %s\n", tmp_name.c_str(), filename.c_str(), s.ToString().c_str()); env->DeleteFile(tmp_name); } } } if (!s.ok()) { fprintf(stderr, "Cannot truncate file %s: %s\n", filename.c_str(), s.ToString().c_str()); } return s; } // Trim the tailing "/" in the end of `str` std::string TrimDirname(const std::string& str) { size_t found = str.find_last_not_of("/"); if (found == std::string::npos) { return str; } return str.substr(0, found + 1); } // Return pair of a full path. std::pair GetDirAndName(const std::string& name) { std::string dirname = GetDirName(name); std::string fname = name.substr(dirname.size() + 1); return std::make_pair(dirname, fname); } Status FileState::DropUnsyncedData(Env* env) const { ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_; return Truncate(env, filename_, sync_pos); } Status FileState::DropRandomUnsyncedData(Env* env, Random* rand) const { ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_; assert(pos_ >= sync_pos); int range = static_cast(pos_ - sync_pos); uint64_t truncated_size = static_cast(sync_pos) + rand->Uniform(range); return Truncate(env, filename_, truncated_size); } Status TestDirectory::Fsync() { if (!env_->IsFilesystemActive()) { return env_->GetError(); } env_->SyncDir(dirname_); return dir_->Fsync(); } TestWritableFile::TestWritableFile(const std::string& fname, std::unique_ptr&& f, FaultInjectionTestEnv* env) : state_(fname), target_(std::move(f)), writable_file_opened_(true), env_(env) { assert(target_ != nullptr); state_.pos_ = 0; } TestWritableFile::~TestWritableFile() { if (writable_file_opened_) { Close(); } } Status TestWritableFile::Append(const Slice& data) { if (!env_->IsFilesystemActive()) { return env_->GetError(); } Status s = target_->Append(data); if (s.ok()) { state_.pos_ += data.size(); env_->WritableFileAppended(state_); } return s; } Status TestWritableFile::Close() { writable_file_opened_ = false; Status s = target_->Close(); if (s.ok()) { env_->WritableFileClosed(state_); } return s; } Status TestWritableFile::Flush() { Status s = target_->Flush(); if (s.ok() && env_->IsFilesystemActive()) { state_.pos_at_last_flush_ = state_.pos_; } return s; } Status TestWritableFile::Sync() { if (!env_->IsFilesystemActive()) { return Status::IOError("FaultInjectionTestEnv: not active"); } // No need to actual sync. state_.pos_at_last_sync_ = state_.pos_; env_->WritableFileSynced(state_); return Status::OK(); } TestRandomRWFile::TestRandomRWFile(const std::string& /*fname*/, std::unique_ptr&& f, FaultInjectionTestEnv* env) : target_(std::move(f)), file_opened_(true), env_(env) { assert(target_ != nullptr); } TestRandomRWFile::~TestRandomRWFile() { if (file_opened_) { Close(); } } Status TestRandomRWFile::Write(uint64_t offset, const Slice& data) { if (!env_->IsFilesystemActive()) { return env_->GetError(); } return target_->Write(offset, data); } Status TestRandomRWFile::Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { if (!env_->IsFilesystemActive()) { return env_->GetError(); } return target_->Read(offset, n, result, scratch); } Status TestRandomRWFile::Close() { file_opened_ = false; return target_->Close(); } Status TestRandomRWFile::Flush() { if (!env_->IsFilesystemActive()) { return env_->GetError(); } return target_->Flush(); } Status TestRandomRWFile::Sync() { if (!env_->IsFilesystemActive()) { return env_->GetError(); } return target_->Sync(); } Status FaultInjectionTestEnv::NewDirectory(const std::string& name, std::unique_ptr* result) { std::unique_ptr r; Status s = target()->NewDirectory(name, &r); assert(s.ok()); if (!s.ok()) { return s; } result->reset(new TestDirectory(this, TrimDirname(name), r.release())); return Status::OK(); } Status FaultInjectionTestEnv::NewWritableFile( const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) { if (!IsFilesystemActive()) { return GetError(); } // Not allow overwriting files Status s = target()->FileExists(fname); if (s.ok()) { return Status::Corruption("File already exists."); } else if (!s.IsNotFound()) { assert(s.IsIOError()); return s; } s = target()->NewWritableFile(fname, result, soptions); if (s.ok()) { result->reset(new TestWritableFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); open_files_.insert(fname); auto dir_and_name = GetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); } return s; } Status FaultInjectionTestEnv::ReopenWritableFile( const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) { if (!IsFilesystemActive()) { return GetError(); } Status s = target()->ReopenWritableFile(fname, result, soptions); if (s.ok()) { result->reset(new TestWritableFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); open_files_.insert(fname); auto dir_and_name = GetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); } return s; } Status FaultInjectionTestEnv::NewRandomRWFile( const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) { if (!IsFilesystemActive()) { return GetError(); } Status s = target()->NewRandomRWFile(fname, result, soptions); if (s.ok()) { result->reset(new TestRandomRWFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); open_files_.insert(fname); auto dir_and_name = GetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); } return s; } Status FaultInjectionTestEnv::NewRandomAccessFile( const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) { if (!IsFilesystemActive()) { return GetError(); } return target()->NewRandomAccessFile(fname, result, soptions); } Status FaultInjectionTestEnv::DeleteFile(const std::string& f) { if (!IsFilesystemActive()) { return GetError(); } Status s = EnvWrapper::DeleteFile(f); if (s.ok()) { UntrackFile(f); } return s; } Status FaultInjectionTestEnv::RenameFile(const std::string& s, const std::string& t) { if (!IsFilesystemActive()) { return GetError(); } Status ret = EnvWrapper::RenameFile(s, t); if (ret.ok()) { MutexLock l(&mutex_); if (db_file_state_.find(s) != db_file_state_.end()) { db_file_state_[t] = db_file_state_[s]; db_file_state_.erase(s); } auto sdn = GetDirAndName(s); auto tdn = GetDirAndName(t); if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; assert(tlist.find(tdn.second) == tlist.end()); tlist.insert(tdn.second); } } return ret; } void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) { MutexLock l(&mutex_); if (open_files_.find(state.filename_) != open_files_.end()) { db_file_state_[state.filename_] = state; open_files_.erase(state.filename_); } } void FaultInjectionTestEnv::WritableFileSynced(const FileState& state) { MutexLock l(&mutex_); if (open_files_.find(state.filename_) != open_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { db_file_state_[state.filename_] = state; } } } void FaultInjectionTestEnv::WritableFileAppended(const FileState& state) { MutexLock l(&mutex_); if (open_files_.find(state.filename_) != open_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { db_file_state_[state.filename_] = state; } } } // For every file that is not fully synced, make a call to `func` with // FileState of the file as the parameter. Status FaultInjectionTestEnv::DropFileData( std::function func) { Status s; MutexLock l(&mutex_); for (std::map::const_iterator it = db_file_state_.begin(); s.ok() && it != db_file_state_.end(); ++it) { const FileState& state = it->second; if (!state.IsFullySynced()) { s = func(target(), state); } } return s; } Status FaultInjectionTestEnv::DropUnsyncedFileData() { return DropFileData([&](Env* env, const FileState& state) { return state.DropUnsyncedData(env); }); } Status FaultInjectionTestEnv::DropRandomUnsyncedFileData(Random* rnd) { return DropFileData([&](Env* env, const FileState& state) { return state.DropRandomUnsyncedData(env, rnd); }); } Status FaultInjectionTestEnv::DeleteFilesCreatedAfterLastDirSync() { // Because DeleteFile access this container make a copy to avoid deadlock std::map> map_copy; { MutexLock l(&mutex_); map_copy.insert(dir_to_new_files_since_last_sync_.begin(), dir_to_new_files_since_last_sync_.end()); } for (auto& pair : map_copy) { for (std::string name : pair.second) { Status s = DeleteFile(pair.first + "/" + name); if (!s.ok()) { return s; } } } return Status::OK(); } void FaultInjectionTestEnv::ResetState() { MutexLock l(&mutex_); db_file_state_.clear(); dir_to_new_files_since_last_sync_.clear(); SetFilesystemActiveNoLock(true); } void FaultInjectionTestEnv::UntrackFile(const std::string& f) { MutexLock l(&mutex_); auto dir_and_name = GetDirAndName(f); dir_to_new_files_since_last_sync_[dir_and_name.first].erase( dir_and_name.second); db_file_state_.erase(f); open_files_.erase(f); } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/fault_injection_test_env.h000066400000000000000000000161751370372246700225300ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright 2014 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // This test uses a custom Env to keep track of the state of a filesystem as of // the last "sync". It then checks for data loss errors by purposely dropping // file data (or entire files) not protected by a "sync". #pragma once #include #include #include #include "db/version_set.h" #include "env/mock_env.h" #include "file/filename.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "util/mutexlock.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { class TestWritableFile; class FaultInjectionTestEnv; struct FileState { std::string filename_; ssize_t pos_; ssize_t pos_at_last_sync_; ssize_t pos_at_last_flush_; explicit FileState(const std::string& filename) : filename_(filename), pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {} FileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {} bool IsFullySynced() const { return pos_ <= 0 || pos_ == pos_at_last_sync_; } Status DropUnsyncedData(Env* env) const; Status DropRandomUnsyncedData(Env* env, Random* rand) const; }; // A wrapper around WritableFileWriter* file // is written to or sync'ed. class TestWritableFile : public WritableFile { public: explicit TestWritableFile(const std::string& fname, std::unique_ptr&& f, FaultInjectionTestEnv* env); virtual ~TestWritableFile(); virtual Status Append(const Slice& data) override; virtual Status Truncate(uint64_t size) override { return target_->Truncate(size); } virtual Status Close() override; virtual Status Flush() override; virtual Status Sync() override; virtual bool IsSyncThreadSafe() const override { return true; } virtual Status PositionedAppend(const Slice& data, uint64_t offset) override { return target_->PositionedAppend(data, offset); } virtual bool use_direct_io() const override { return target_->use_direct_io(); }; private: FileState state_; std::unique_ptr target_; bool writable_file_opened_; FaultInjectionTestEnv* env_; }; // A wrapper around WritableFileWriter* file // is written to or sync'ed. class TestRandomRWFile : public RandomRWFile { public: explicit TestRandomRWFile(const std::string& fname, std::unique_ptr&& f, FaultInjectionTestEnv* env); virtual ~TestRandomRWFile(); Status Write(uint64_t offset, const Slice& data) override; Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override; Status Close() override; Status Flush() override; Status Sync() override; size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } bool use_direct_io() const override { return target_->use_direct_io(); }; private: std::unique_ptr target_; bool file_opened_; FaultInjectionTestEnv* env_; }; class TestDirectory : public Directory { public: explicit TestDirectory(FaultInjectionTestEnv* env, std::string dirname, Directory* dir) : env_(env), dirname_(dirname), dir_(dir) {} ~TestDirectory() {} virtual Status Fsync() override; private: FaultInjectionTestEnv* env_; std::string dirname_; std::unique_ptr dir_; }; class FaultInjectionTestEnv : public EnvWrapper { public: explicit FaultInjectionTestEnv(Env* base) : EnvWrapper(base), filesystem_active_(true) {} virtual ~FaultInjectionTestEnv() {} Status NewDirectory(const std::string& name, std::unique_ptr* result) override; Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) override; Status ReopenWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) override; Status NewRandomRWFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) override; Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) override; virtual Status DeleteFile(const std::string& f) override; virtual Status RenameFile(const std::string& s, const std::string& t) override; // Undef to eliminate clash on Windows #undef GetFreeSpace virtual Status GetFreeSpace(const std::string& path, uint64_t* disk_free) override { if (!IsFilesystemActive() && error_ == Status::NoSpace()) { *disk_free = 0; return Status::OK(); } else { return target()->GetFreeSpace(path, disk_free); } } void WritableFileClosed(const FileState& state); void WritableFileSynced(const FileState& state); void WritableFileAppended(const FileState& state); // For every file that is not fully synced, make a call to `func` with // FileState of the file as the parameter. Status DropFileData(std::function func); Status DropUnsyncedFileData(); Status DropRandomUnsyncedFileData(Random* rnd); Status DeleteFilesCreatedAfterLastDirSync(); void ResetState(); void UntrackFile(const std::string& f); void SyncDir(const std::string& dirname) { MutexLock l(&mutex_); dir_to_new_files_since_last_sync_.erase(dirname); } // Setting the filesystem to inactive is the test equivalent to simulating a // system reset. Setting to inactive will freeze our saved filesystem state so // that it will stop being recorded. It can then be reset back to the state at // the time of the reset. bool IsFilesystemActive() { MutexLock l(&mutex_); return filesystem_active_; } void SetFilesystemActiveNoLock(bool active, Status error = Status::Corruption("Not active")) { filesystem_active_ = active; if (!active) { error_ = error; } } void SetFilesystemActive(bool active, Status error = Status::Corruption("Not active")) { MutexLock l(&mutex_); SetFilesystemActiveNoLock(active, error); } void AssertNoOpenFile() { assert(open_files_.empty()); } Status GetError() { return error_; } private: port::Mutex mutex_; std::map db_file_state_; std::set open_files_; std::unordered_map> dir_to_new_files_since_last_sync_; bool filesystem_active_; // Record flushes, syncs, writes Status error_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/fault_injection_test_fs.cc000066400000000000000000000431501370372246700224770ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright 2014 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // This test uses a custom FileSystem to keep track of the state of a file // system the last "Sync". The data being written is cached in a "buffer". // Only when "Sync" is called, the data will be persistent. It can similate // file data loss (or entire files) not protected by a "Sync". For any of the // FileSystem related operations, by specify the "IOStatus Error", a specific // error can be returned when file system is not activated. #include "test_util/fault_injection_test_fs.h" #include #include #include "port/lang.h" #include "port/stack_trace.h" namespace ROCKSDB_NAMESPACE { // Assume a filename, and not a directory name like "/foo/bar/" std::string TestFSGetDirName(const std::string filename) { size_t found = filename.find_last_of("/\\"); if (found == std::string::npos) { return ""; } else { return filename.substr(0, found); } } // Trim the tailing "/" in the end of `str` std::string TestFSTrimDirname(const std::string& str) { size_t found = str.find_last_not_of("/"); if (found == std::string::npos) { return str; } return str.substr(0, found + 1); } // Return pair of a full path. std::pair TestFSGetDirAndName( const std::string& name) { std::string dirname = TestFSGetDirName(name); std::string fname = name.substr(dirname.size() + 1); return std::make_pair(dirname, fname); } IOStatus FSFileState::DropUnsyncedData() { buffer_.resize(0); return IOStatus::OK(); } IOStatus FSFileState::DropRandomUnsyncedData(Random* rand) { int range = static_cast(buffer_.size()); size_t truncated_size = static_cast(rand->Uniform(range)); buffer_.resize(truncated_size); return IOStatus::OK(); } IOStatus TestFSDirectory::Fsync(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } fs_->SyncDir(dirname_); return dir_->Fsync(options, dbg); } TestFSWritableFile::TestFSWritableFile(const std::string& fname, std::unique_ptr&& f, FaultInjectionTestFS* fs) : state_(fname), target_(std::move(f)), writable_file_opened_(true), fs_(fs) { assert(target_ != nullptr); state_.pos_ = 0; } TestFSWritableFile::~TestFSWritableFile() { if (writable_file_opened_) { Close(IOOptions(), nullptr); } } IOStatus TestFSWritableFile::Append(const Slice& data, const IOOptions&, IODebugContext*) { MutexLock l(&mutex_); if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } state_.buffer_.append(data.data(), data.size()); state_.pos_ += data.size(); fs_->WritableFileAppended(state_); return IOStatus::OK(); } IOStatus TestFSWritableFile::Close(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } writable_file_opened_ = false; IOStatus io_s; io_s = target_->Append(state_.buffer_, options, dbg); if (io_s.ok()) { state_.buffer_.resize(0); target_->Sync(options, dbg); io_s = target_->Close(options, dbg); } if (io_s.ok()) { fs_->WritableFileClosed(state_); } return io_s; } IOStatus TestFSWritableFile::Flush(const IOOptions&, IODebugContext*) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } IOStatus io_s; if (io_s.ok() && fs_->IsFilesystemActive()) { state_.pos_at_last_flush_ = state_.pos_; } return io_s; } IOStatus TestFSWritableFile::Sync(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } IOStatus io_s = target_->Append(state_.buffer_, options, dbg); state_.buffer_.resize(0); target_->Sync(options, dbg); state_.pos_at_last_sync_ = state_.pos_; fs_->WritableFileSynced(state_); return io_s; } TestFSRandomRWFile::TestFSRandomRWFile(const std::string& /*fname*/, std::unique_ptr&& f, FaultInjectionTestFS* fs) : target_(std::move(f)), file_opened_(true), fs_(fs) { assert(target_ != nullptr); } TestFSRandomRWFile::~TestFSRandomRWFile() { if (file_opened_) { Close(IOOptions(), nullptr); } } IOStatus TestFSRandomRWFile::Write(uint64_t offset, const Slice& data, const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } return target_->Write(offset, data, options, dbg); } IOStatus TestFSRandomRWFile::Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } return target_->Read(offset, n, options, result, scratch, dbg); } IOStatus TestFSRandomRWFile::Close(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } file_opened_ = false; return target_->Close(options, dbg); } IOStatus TestFSRandomRWFile::Flush(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } return target_->Flush(options, dbg); } IOStatus TestFSRandomRWFile::Sync(const IOOptions& options, IODebugContext* dbg) { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } return target_->Sync(options, dbg); } TestFSRandomAccessFile::TestFSRandomAccessFile(const std::string& /*fname*/, std::unique_ptr&& f, FaultInjectionTestFS* fs) : target_(std::move(f)), fs_(fs) { assert(target_ != nullptr); } IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const { if (!fs_->IsFilesystemActive()) { return fs_->GetError(); } IOStatus s = target_->Read(offset, n, options, result, scratch, dbg); if (s.ok()) { s = fs_->InjectError(FaultInjectionTestFS::ErrorOperation::kRead, result, use_direct_io(), scratch); } return s; } IOStatus FaultInjectionTestFS::NewDirectory( const std::string& name, const IOOptions& options, std::unique_ptr* result, IODebugContext* dbg) { std::unique_ptr r; IOStatus io_s = target()->NewDirectory(name, options, &r, dbg); assert(io_s.ok()); if (!io_s.ok()) { return io_s; } result->reset( new TestFSDirectory(this, TestFSTrimDirname(name), r.release())); return IOStatus::OK(); } IOStatus FaultInjectionTestFS::NewWritableFile( const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) { if (!IsFilesystemActive()) { return GetError(); } if (IsFilesystemDirectWritable()) { return target()->NewWritableFile(fname, file_opts, result, dbg); } IOStatus io_s = target()->NewWritableFile(fname, file_opts, result, dbg); if (io_s.ok()) { result->reset(new TestFSWritableFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); open_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); } return io_s; } IOStatus FaultInjectionTestFS::ReopenWritableFile( const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) { if (!IsFilesystemActive()) { return GetError(); } if (IsFilesystemDirectWritable()) { return target()->ReopenWritableFile(fname, file_opts, result, dbg); } IOStatus io_s = target()->ReopenWritableFile(fname, file_opts, result, dbg); if (io_s.ok()) { result->reset(new TestFSWritableFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); open_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); } return io_s; } IOStatus FaultInjectionTestFS::NewRandomRWFile( const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) { if (!IsFilesystemActive()) { return GetError(); } if (IsFilesystemDirectWritable()) { return target()->NewRandomRWFile(fname, file_opts, result, dbg); } IOStatus io_s = target()->NewRandomRWFile(fname, file_opts, result, dbg); if (io_s.ok()) { result->reset(new TestFSRandomRWFile(fname, std::move(*result), this)); // WritableFileWriter* file is opened // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); open_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); } return io_s; } IOStatus FaultInjectionTestFS::NewRandomAccessFile( const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) { if (!IsFilesystemActive()) { return GetError(); } IOStatus io_s = InjectError(ErrorOperation::kOpen, nullptr, false, nullptr); if (io_s.ok()) { io_s = target()->NewRandomAccessFile(fname, file_opts, result, dbg); } if (io_s.ok()) { result->reset(new TestFSRandomAccessFile(fname, std::move(*result), this)); } return io_s; } IOStatus FaultInjectionTestFS::DeleteFile(const std::string& f, const IOOptions& options, IODebugContext* dbg) { if (!IsFilesystemActive()) { return GetError(); } IOStatus io_s = FileSystemWrapper::DeleteFile(f, options, dbg); if (io_s.ok()) { UntrackFile(f); } return io_s; } IOStatus FaultInjectionTestFS::RenameFile(const std::string& s, const std::string& t, const IOOptions& options, IODebugContext* dbg) { if (!IsFilesystemActive()) { return GetError(); } IOStatus io_s = FileSystemWrapper::RenameFile(s, t, options, dbg); if (io_s.ok()) { MutexLock l(&mutex_); if (db_file_state_.find(s) != db_file_state_.end()) { db_file_state_[t] = db_file_state_[s]; db_file_state_.erase(s); } auto sdn = TestFSGetDirAndName(s); auto tdn = TestFSGetDirAndName(t); if (dir_to_new_files_since_last_sync_[sdn.first].erase(sdn.second) != 0) { auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; assert(tlist.find(tdn.second) == tlist.end()); tlist.insert(tdn.second); } } return io_s; } void FaultInjectionTestFS::WritableFileClosed(const FSFileState& state) { MutexLock l(&mutex_); if (open_files_.find(state.filename_) != open_files_.end()) { db_file_state_[state.filename_] = state; open_files_.erase(state.filename_); } } void FaultInjectionTestFS::WritableFileSynced(const FSFileState& state) { MutexLock l(&mutex_); if (open_files_.find(state.filename_) != open_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { db_file_state_[state.filename_] = state; } } } void FaultInjectionTestFS::WritableFileAppended(const FSFileState& state) { MutexLock l(&mutex_); if (open_files_.find(state.filename_) != open_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { db_file_state_[state.filename_] = state; } } } IOStatus FaultInjectionTestFS::DropUnsyncedFileData() { IOStatus io_s; MutexLock l(&mutex_); for (std::map::iterator it = db_file_state_.begin(); io_s.ok() && it != db_file_state_.end(); ++it) { FSFileState& fs_state = it->second; if (!fs_state.IsFullySynced()) { io_s = fs_state.DropUnsyncedData(); } } return io_s; } IOStatus FaultInjectionTestFS::DropRandomUnsyncedFileData(Random* rnd) { IOStatus io_s; MutexLock l(&mutex_); for (std::map::iterator it = db_file_state_.begin(); io_s.ok() && it != db_file_state_.end(); ++it) { FSFileState& fs_state = it->second; if (!fs_state.IsFullySynced()) { io_s = fs_state.DropRandomUnsyncedData(rnd); } } return io_s; } IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync( const IOOptions& options, IODebugContext* dbg) { // Because DeleteFile access this container make a copy to avoid deadlock std::map> map_copy; { MutexLock l(&mutex_); map_copy.insert(dir_to_new_files_since_last_sync_.begin(), dir_to_new_files_since_last_sync_.end()); } for (auto& pair : map_copy) { for (std::string name : pair.second) { IOStatus io_s = DeleteFile(pair.first + "/" + name, options, dbg); if (!io_s.ok()) { return io_s; } } } return IOStatus::OK(); } void FaultInjectionTestFS::ResetState() { MutexLock l(&mutex_); db_file_state_.clear(); dir_to_new_files_since_last_sync_.clear(); SetFilesystemActiveNoLock(true); } void FaultInjectionTestFS::UntrackFile(const std::string& f) { MutexLock l(&mutex_); auto dir_and_name = TestFSGetDirAndName(f); dir_to_new_files_since_last_sync_[dir_and_name.first].erase( dir_and_name.second); db_file_state_.erase(f); open_files_.erase(f); } IOStatus FaultInjectionTestFS::InjectError(ErrorOperation op, Slice* result, bool direct_io, char* scratch) { ErrorContext* ctx = static_cast(thread_local_error_->Get()); if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in) { return IOStatus::OK(); } if (ctx->rand.OneIn(ctx->one_in)) { ctx->count++; if (ctx->callstack) { free(ctx->callstack); } ctx->callstack = port::SaveStack(&ctx->frames); switch (op) { case kRead: { if (!direct_io) { ctx->type = static_cast(ctx->rand.Uniform(ErrorType::kErrorTypeMax)); } else { // In Direct IO mode, the actual read will read extra data due to // alignment restrictions. So don't inject corruption or // truncated reads as we don't know if it will actually cause a // detectable error ctx->type = ErrorType::kErrorTypeStatus; } switch (ctx->type) { // Inject IO error case ErrorType::kErrorTypeStatus: return IOStatus::IOError(); // Inject random corruption case ErrorType::kErrorTypeCorruption: { if (result->data() == scratch) { uint64_t offset = ctx->rand.Uniform((uint32_t)result->size()); uint64_t len = std::min(result->size() - offset, 64UL); assert(offset < result->size()); assert(offset + len <= result->size()); std::string str; // The randomly generated string could be identical to the // original one, so retry do { str = DBTestBase::RandomString(&ctx->rand, static_cast(len)); } while (str == std::string(scratch + offset, len)); memcpy(scratch + offset, str.data(), len); break; } else { FALLTHROUGH_INTENDED; } } // Truncate the result case ErrorType::kErrorTypeTruncated: { assert(result->size() > 0); uint64_t offset = ctx->rand.Uniform((uint32_t)result->size()); assert(offset < result->size()); *result = Slice(result->data(), offset); break; } default: assert(false); } break; } case kOpen: return IOStatus::IOError(); default: assert(false); } } return IOStatus::OK(); } void FaultInjectionTestFS::PrintFaultBacktrace() { #if defined(OS_LINUX) ErrorContext* ctx = static_cast(thread_local_error_->Get()); if (ctx == nullptr) { return; } fprintf(stderr, "Injected error type = %d\n", ctx->type); port::PrintAndFreeStack(ctx->callstack, ctx->frames); ctx->callstack = nullptr; #endif } } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/fault_injection_test_fs.h000066400000000000000000000320471370372246700223440ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright 2014 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // This test uses a custom FileSystem to keep track of the state of a file // system the last "Sync". The data being written is cached in a "buffer". // Only when "Sync" is called, the data will be persistent. It can similate // file data loss (or entire files) not protected by a "Sync". For any of the // FileSystem related operations, by specify the "IOStatus Error", a specific // error can be returned when file system is not activated. #pragma once #include #include #include #include #include "db/db_test_util.h" #include "db/version_set.h" #include "env/mock_env.h" #include "file/filename.h" #include "include/rocksdb/file_system.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "util/mutexlock.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { class TestFSWritableFile; class FaultInjectionTestFS; struct FSFileState { std::string filename_; ssize_t pos_; ssize_t pos_at_last_sync_; ssize_t pos_at_last_flush_; std::string buffer_; explicit FSFileState(const std::string& filename) : filename_(filename), pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {} FSFileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {} bool IsFullySynced() const { return pos_ <= 0 || pos_ == pos_at_last_sync_; } IOStatus DropUnsyncedData(); IOStatus DropRandomUnsyncedData(Random* rand); }; // A wrapper around WritableFileWriter* file // is written to or sync'ed. class TestFSWritableFile : public FSWritableFile { public: explicit TestFSWritableFile(const std::string& fname, std::unique_ptr&& f, FaultInjectionTestFS* fs); virtual ~TestFSWritableFile(); virtual IOStatus Append(const Slice& data, const IOOptions&, IODebugContext*) override; virtual IOStatus Truncate(uint64_t size, const IOOptions& options, IODebugContext* dbg) override { return target_->Truncate(size, options, dbg); } virtual IOStatus Close(const IOOptions& options, IODebugContext* dbg) override; virtual IOStatus Flush(const IOOptions&, IODebugContext*) override; virtual IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override; virtual bool IsSyncThreadSafe() const override { return true; } virtual IOStatus PositionedAppend(const Slice& data, uint64_t offset, const IOOptions& options, IODebugContext* dbg) override { return target_->PositionedAppend(data, offset, options, dbg); } virtual size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } virtual bool use_direct_io() const override { return target_->use_direct_io(); }; private: FSFileState state_; std::unique_ptr target_; bool writable_file_opened_; FaultInjectionTestFS* fs_; port::Mutex mutex_; }; // A wrapper around WritableFileWriter* file // is written to or sync'ed. class TestFSRandomRWFile : public FSRandomRWFile { public: explicit TestFSRandomRWFile(const std::string& fname, std::unique_ptr&& f, FaultInjectionTestFS* fs); virtual ~TestFSRandomRWFile(); IOStatus Write(uint64_t offset, const Slice& data, const IOOptions& options, IODebugContext* dbg) override; IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const override; IOStatus Close(const IOOptions& options, IODebugContext* dbg) override; IOStatus Flush(const IOOptions& options, IODebugContext* dbg) override; IOStatus Sync(const IOOptions& options, IODebugContext* dbg) override; size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } bool use_direct_io() const override { return target_->use_direct_io(); }; private: std::unique_ptr target_; bool file_opened_; FaultInjectionTestFS* fs_; }; class TestFSRandomAccessFile : public FSRandomAccessFile { public: explicit TestFSRandomAccessFile(const std::string& fname, std::unique_ptr&& f, FaultInjectionTestFS* fs); ~TestFSRandomAccessFile() override {} IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, Slice* result, char* scratch, IODebugContext* dbg) const override; size_t GetRequiredBufferAlignment() const override { return target_->GetRequiredBufferAlignment(); } bool use_direct_io() const override { return target_->use_direct_io(); } private: std::unique_ptr target_; FaultInjectionTestFS* fs_; }; class TestFSDirectory : public FSDirectory { public: explicit TestFSDirectory(FaultInjectionTestFS* fs, std::string dirname, FSDirectory* dir) : fs_(fs), dirname_(dirname), dir_(dir) {} ~TestFSDirectory() {} virtual IOStatus Fsync(const IOOptions& options, IODebugContext* dbg) override; private: FaultInjectionTestFS* fs_; std::string dirname_; std::unique_ptr dir_; }; class FaultInjectionTestFS : public FileSystemWrapper { public: explicit FaultInjectionTestFS(std::shared_ptr base) : FileSystemWrapper(base), filesystem_active_(true), filesystem_writable_(false), thread_local_error_( new ThreadLocalPtr(DeleteThreadLocalErrorContext)) {} virtual ~FaultInjectionTestFS() {} const char* Name() const override { return "FaultInjectionTestFS"; } IOStatus NewDirectory(const std::string& name, const IOOptions& options, std::unique_ptr* result, IODebugContext* dbg) override; IOStatus NewWritableFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) override; IOStatus ReopenWritableFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) override; IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) override; IOStatus NewRandomAccessFile(const std::string& fname, const FileOptions& file_opts, std::unique_ptr* result, IODebugContext* dbg) override; virtual IOStatus DeleteFile(const std::string& f, const IOOptions& options, IODebugContext* dbg) override; virtual IOStatus RenameFile(const std::string& s, const std::string& t, const IOOptions& options, IODebugContext* dbg) override; // Undef to eliminate clash on Windows #undef GetFreeSpace virtual IOStatus GetFreeSpace(const std::string& path, const IOOptions& options, uint64_t* disk_free, IODebugContext* dbg) override { if (!IsFilesystemActive() && error_ == IOStatus::NoSpace()) { *disk_free = 0; return IOStatus::OK(); } else { return target()->GetFreeSpace(path, options, disk_free, dbg); } } void WritableFileClosed(const FSFileState& state); void WritableFileSynced(const FSFileState& state); void WritableFileAppended(const FSFileState& state); IOStatus DropUnsyncedFileData(); IOStatus DropRandomUnsyncedFileData(Random* rnd); IOStatus DeleteFilesCreatedAfterLastDirSync(const IOOptions& options, IODebugContext* dbg); void ResetState(); void UntrackFile(const std::string& f); void SyncDir(const std::string& dirname) { MutexLock l(&mutex_); dir_to_new_files_since_last_sync_.erase(dirname); } // Setting the filesystem to inactive is the test equivalent to simulating a // system reset. Setting to inactive will freeze our saved filesystem state so // that it will stop being recorded. It can then be reset back to the state at // the time of the reset. bool IsFilesystemActive() { MutexLock l(&mutex_); return filesystem_active_; } // Setting filesystem_writable_ makes NewWritableFile. ReopenWritableFile, // and NewRandomRWFile bypass FaultInjectionTestFS and go directly to the // target FS bool IsFilesystemDirectWritable() { MutexLock l(&mutex_); return filesystem_writable_; } void SetFilesystemActiveNoLock( bool active, IOStatus error = IOStatus::Corruption("Not active")) { filesystem_active_ = active; if (!active) { error_ = error; } } void SetFilesystemActive( bool active, IOStatus error = IOStatus::Corruption("Not active")) { MutexLock l(&mutex_); SetFilesystemActiveNoLock(active, error); } void SetFilesystemDirectWritable( bool writable) { MutexLock l(&mutex_); filesystem_writable_ = writable; } void AssertNoOpenFile() { assert(open_files_.empty()); } IOStatus GetError() { return error_; } void SetFileSystemIOError(IOStatus io_error) { MutexLock l(&mutex_); error_ = io_error; } // Specify what the operation, so we can inject the right type of error enum ErrorOperation : char { kRead = 0, kOpen, }; // Set thread-local parameters for error injection. The first argument, // seed is the seed for the random number generator, and one_in determines // the probability of injecting error (i.e an error is injected with // 1/one_in probability) void SetThreadLocalReadErrorContext(uint32_t seed, int one_in) { struct ErrorContext* ctx = static_cast(thread_local_error_->Get()); if (ctx == nullptr) { ctx = new ErrorContext(seed); thread_local_error_->Reset(ctx); } ctx->one_in = one_in; ctx->count = 0; } static void DeleteThreadLocalErrorContext(void *p) { ErrorContext* ctx = static_cast(p); delete ctx; } // Inject an error. For a READ operation, a status of IOError(), a // corruption in the contents of scratch, or truncation of slice // are the types of error with equal probability. For OPEN, // its always an IOError. IOStatus InjectError(ErrorOperation op, Slice* slice, bool direct_io, char* scratch); // Get the count of how many times we injected since the previous call int GetAndResetErrorCount() { ErrorContext* ctx = static_cast(thread_local_error_->Get()); int count = 0; if (ctx != nullptr) { count = ctx->count; ctx->count = 0; } return count; } void EnableErrorInjection() { ErrorContext* ctx = static_cast(thread_local_error_->Get()); if (ctx) { ctx->enable_error_injection = true; } } void DisableErrorInjection() { ErrorContext* ctx = static_cast(thread_local_error_->Get()); if (ctx) { ctx->enable_error_injection = false; } } // We capture a backtrace every time a fault is injected, for debugging // purposes. This call prints the backtrace to stderr and frees the // saved callstack void PrintFaultBacktrace(); private: port::Mutex mutex_; std::map db_file_state_; std::set open_files_; std::unordered_map> dir_to_new_files_since_last_sync_; bool filesystem_active_; // Record flushes, syncs, writes bool filesystem_writable_; // Bypass FaultInjectionTestFS and go directly // to underlying FS for writable files IOStatus error_; enum ErrorType : int { kErrorTypeStatus = 0, kErrorTypeCorruption, kErrorTypeTruncated, kErrorTypeMax }; struct ErrorContext { Random rand; int one_in; int count; bool enable_error_injection; void* callstack; int frames; ErrorType type; explicit ErrorContext(uint32_t seed) : rand(seed), enable_error_injection(false), callstack(nullptr), frames(0) {} ~ErrorContext() { if (callstack) { free(callstack); } } }; std::unique_ptr thread_local_error_; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/mock_time_env.h000066400000000000000000000024111370372246700202470ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { class MockTimeEnv : public EnvWrapper { public: explicit MockTimeEnv(Env* base) : EnvWrapper(base) {} virtual Status GetCurrentTime(int64_t* time) override { assert(time != nullptr); assert(current_time_ <= static_cast(std::numeric_limits::max())); *time = static_cast(current_time_); return Status::OK(); } virtual uint64_t NowMicros() override { assert(current_time_ <= std::numeric_limits::max() / 1000000); return current_time_ * 1000000; } virtual uint64_t NowNanos() override { assert(current_time_ <= std::numeric_limits::max() / 1000000000); return current_time_ * 1000000000; } uint64_t RealNowMicros() { return target()->NowMicros(); } void set_current_time(uint64_t time) { assert(time >= current_time_); current_time_ = time; } private: std::atomic current_time_{0}; }; } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/sync_point.cc000066400000000000000000000031411370372246700177540ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "test_util/sync_point.h" #include "test_util/sync_point_impl.h" int rocksdb_kill_odds = 0; std::vector rocksdb_kill_prefix_blacklist; #ifndef NDEBUG namespace ROCKSDB_NAMESPACE { SyncPoint* SyncPoint::GetInstance() { static SyncPoint sync_point; return &sync_point; } SyncPoint::SyncPoint() : impl_(new Data) {} SyncPoint:: ~SyncPoint() { delete impl_; } void SyncPoint::LoadDependency(const std::vector& dependencies) { impl_->LoadDependency(dependencies); } void SyncPoint::LoadDependencyAndMarkers( const std::vector& dependencies, const std::vector& markers) { impl_->LoadDependencyAndMarkers(dependencies, markers); } void SyncPoint::SetCallBack(const std::string& point, const std::function& callback) { impl_->SetCallBack(point, callback); } void SyncPoint::ClearCallBack(const std::string& point) { impl_->ClearCallBack(point); } void SyncPoint::ClearAllCallBacks() { impl_->ClearAllCallBacks(); } void SyncPoint::EnableProcessing() { impl_->EnableProcessing(); } void SyncPoint::DisableProcessing() { impl_->DisableProcessing(); } void SyncPoint::ClearTrace() { impl_->ClearTrace(); } void SyncPoint::Process(const std::string& point, void* cb_arg) { impl_->Process(point, cb_arg); } } // namespace ROCKSDB_NAMESPACE #endif // NDEBUG rocksdb-6.11.4/test_util/sync_point.h000066400000000000000000000131521370372246700176210ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include #include "rocksdb/rocksdb_namespace.h" // This is only set from db_stress.cc and for testing only. // If non-zero, kill at various points in source code with probability 1/this extern int rocksdb_kill_odds; // If kill point has a prefix on this list, will skip killing. extern std::vector rocksdb_kill_prefix_blacklist; #ifdef NDEBUG // empty in release build #define TEST_KILL_RANDOM(kill_point, rocksdb_kill_odds) #else namespace ROCKSDB_NAMESPACE { // Kill the process with probability 1/odds for testing. extern void TestKillRandom(std::string kill_point, int odds, const std::string& srcfile, int srcline); // To avoid crashing always at some frequently executed codepaths (during // kill random test), use this factor to reduce odds #define REDUCE_ODDS 2 #define REDUCE_ODDS2 4 #define TEST_KILL_RANDOM(kill_point, rocksdb_kill_odds) \ { \ if (rocksdb_kill_odds > 0) { \ TestKillRandom(kill_point, rocksdb_kill_odds, __FILE__, __LINE__); \ } \ } } // namespace ROCKSDB_NAMESPACE #endif #ifdef NDEBUG #define TEST_SYNC_POINT(x) #define TEST_IDX_SYNC_POINT(x, index) #define TEST_SYNC_POINT_CALLBACK(x, y) #define INIT_SYNC_POINT_SINGLETONS() #else namespace ROCKSDB_NAMESPACE { // This class provides facility to reproduce race conditions deterministically // in unit tests. // Developer could specify sync points in the codebase via TEST_SYNC_POINT. // Each sync point represents a position in the execution stream of a thread. // In the unit test, 'Happens After' relationship among sync points could be // setup via SyncPoint::LoadDependency, to reproduce a desired interleave of // threads execution. // Refer to (DBTest,TransactionLogIteratorRace), for an example use case. class SyncPoint { public: static SyncPoint* GetInstance(); SyncPoint(const SyncPoint&) = delete; SyncPoint& operator=(const SyncPoint&) = delete; ~SyncPoint(); struct SyncPointPair { std::string predecessor; std::string successor; }; // call once at the beginning of a test to setup the dependency between // sync points void LoadDependency(const std::vector& dependencies); // call once at the beginning of a test to setup the dependency between // sync points and setup markers indicating the successor is only enabled // when it is processed on the same thread as the predecessor. // When adding a marker, it implicitly adds a dependency for the marker pair. void LoadDependencyAndMarkers(const std::vector& dependencies, const std::vector& markers); // The argument to the callback is passed through from // TEST_SYNC_POINT_CALLBACK(); nullptr if TEST_SYNC_POINT or // TEST_IDX_SYNC_POINT was used. void SetCallBack(const std::string& point, const std::function& callback); // Clear callback function by point void ClearCallBack(const std::string& point); // Clear all call back functions. void ClearAllCallBacks(); // enable sync point processing (disabled on startup) void EnableProcessing(); // disable sync point processing void DisableProcessing(); // remove the execution trace of all sync points void ClearTrace(); // triggered by TEST_SYNC_POINT, blocking execution until all predecessors // are executed. // And/or call registered callback function, with argument `cb_arg` void Process(const std::string& point, void* cb_arg = nullptr); // TODO: it might be useful to provide a function that blocks until all // sync points are cleared. // We want this to be public so we can // subclass the implementation struct Data; private: // Singleton SyncPoint(); Data* impl_; }; } // namespace ROCKSDB_NAMESPACE // Use TEST_SYNC_POINT to specify sync points inside code base. // Sync points can have happens-after dependency on other sync points, // configured at runtime via SyncPoint::LoadDependency. This could be // utilized to re-produce race conditions between threads. // See TransactionLogIteratorRace in db_test.cc for an example use case. // TEST_SYNC_POINT is no op in release build. #define TEST_SYNC_POINT(x) \ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->Process(x) #define TEST_IDX_SYNC_POINT(x, index) \ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->Process(x + \ std::to_string(index)) #define TEST_SYNC_POINT_CALLBACK(x, y) \ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->Process(x, y) #define INIT_SYNC_POINT_SINGLETONS() \ (void)ROCKSDB_NAMESPACE::SyncPoint::GetInstance(); #endif // NDEBUG // Callback sync point for any read IO errors that should be ignored by // the fault injection framework // Disable in release mode #ifdef NDEBUG #define IGNORE_STATUS_IF_ERROR(_status_) #else #define IGNORE_STATUS_IF_ERROR(_status_) \ { \ if (!_status_.ok()) { \ TEST_SYNC_POINT("FaultInjectionIgnoreError"); \ } \ } #endif // NDEBUG rocksdb-6.11.4/test_util/sync_point_impl.cc000066400000000000000000000070571370372246700210070ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "test_util/sync_point_impl.h" #ifndef NDEBUG namespace ROCKSDB_NAMESPACE { void TestKillRandom(std::string kill_point, int odds, const std::string& srcfile, int srcline) { for (auto& p : rocksdb_kill_prefix_blacklist) { if (kill_point.substr(0, p.length()) == p) { return; } } assert(odds > 0); if (odds % 7 == 0) { // class Random uses multiplier 16807, which is 7^5. If odds are // multiplier of 7, there might be limited values generated. odds++; } auto* r = Random::GetTLSInstance(); bool crash = r->OneIn(odds); if (crash) { port::Crash(srcfile, srcline); } } void SyncPoint::Data::LoadDependency(const std::vector& dependencies) { std::lock_guard lock(mutex_); successors_.clear(); predecessors_.clear(); cleared_points_.clear(); for (const auto& dependency : dependencies) { successors_[dependency.predecessor].push_back(dependency.successor); predecessors_[dependency.successor].push_back(dependency.predecessor); } cv_.notify_all(); } void SyncPoint::Data::LoadDependencyAndMarkers( const std::vector& dependencies, const std::vector& markers) { std::lock_guard lock(mutex_); successors_.clear(); predecessors_.clear(); cleared_points_.clear(); markers_.clear(); marked_thread_id_.clear(); for (const auto& dependency : dependencies) { successors_[dependency.predecessor].push_back(dependency.successor); predecessors_[dependency.successor].push_back(dependency.predecessor); } for (const auto& marker : markers) { successors_[marker.predecessor].push_back(marker.successor); predecessors_[marker.successor].push_back(marker.predecessor); markers_[marker.predecessor].push_back(marker.successor); } cv_.notify_all(); } bool SyncPoint::Data::PredecessorsAllCleared(const std::string& point) { for (const auto& pred : predecessors_[point]) { if (cleared_points_.count(pred) == 0) { return false; } } return true; } void SyncPoint::Data::ClearCallBack(const std::string& point) { std::unique_lock lock(mutex_); while (num_callbacks_running_ > 0) { cv_.wait(lock); } callbacks_.erase(point); } void SyncPoint::Data::ClearAllCallBacks() { std::unique_lock lock(mutex_); while (num_callbacks_running_ > 0) { cv_.wait(lock); } callbacks_.clear(); } void SyncPoint::Data::Process(const std::string& point, void* cb_arg) { if (!enabled_) { return; } std::unique_lock lock(mutex_); auto thread_id = std::this_thread::get_id(); auto marker_iter = markers_.find(point); if (marker_iter != markers_.end()) { for (auto& marked_point : marker_iter->second) { marked_thread_id_.emplace(marked_point, thread_id); } } if (DisabledByMarker(point, thread_id)) { return; } while (!PredecessorsAllCleared(point)) { cv_.wait(lock); if (DisabledByMarker(point, thread_id)) { return; } } auto callback_pair = callbacks_.find(point); if (callback_pair != callbacks_.end()) { num_callbacks_running_++; mutex_.unlock(); callback_pair->second(cb_arg); mutex_.lock(); num_callbacks_running_--; } cleared_points_.insert(point); cv_.notify_all(); } } // namespace ROCKSDB_NAMESPACE #endif rocksdb-6.11.4/test_util/sync_point_impl.h000066400000000000000000000046471370372246700206530ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "test_util/sync_point.h" #include #include #include #include #include #include #include #include #include #include "port/port.h" #include "util/random.h" #pragma once #ifndef NDEBUG namespace ROCKSDB_NAMESPACE { struct SyncPoint::Data { Data() : enabled_(false) {} // Enable proper deletion by subclasses virtual ~Data() {} // successor/predecessor map loaded from LoadDependency std::unordered_map> successors_; std::unordered_map> predecessors_; std::unordered_map > callbacks_; std::unordered_map > markers_; std::unordered_map marked_thread_id_; std::mutex mutex_; std::condition_variable cv_; // sync points that have been passed through std::unordered_set cleared_points_; std::atomic enabled_; int num_callbacks_running_ = 0; void LoadDependency(const std::vector& dependencies); void LoadDependencyAndMarkers(const std::vector& dependencies, const std::vector& markers); bool PredecessorsAllCleared(const std::string& point); void SetCallBack(const std::string& point, const std::function& callback) { std::lock_guard lock(mutex_); callbacks_[point] = callback; } void ClearCallBack(const std::string& point); void ClearAllCallBacks(); void EnableProcessing() { enabled_ = true; } void DisableProcessing() { enabled_ = false; } void ClearTrace() { std::lock_guard lock(mutex_); cleared_points_.clear(); } bool DisabledByMarker(const std::string& point, std::thread::id thread_id) { auto marked_point_iter = marked_thread_id_.find(point); return marked_point_iter != marked_thread_id_.end() && thread_id != marked_point_iter->second; } void Process(const std::string& point, void* cb_arg); }; } // namespace ROCKSDB_NAMESPACE #endif // NDEBUG rocksdb-6.11.4/test_util/testharness.cc000066400000000000000000000032061370372246700201340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "test_util/testharness.h" #include #include namespace ROCKSDB_NAMESPACE { namespace test { ::testing::AssertionResult AssertStatus(const char* s_expr, const Status& s) { if (s.ok()) { return ::testing::AssertionSuccess(); } else { return ::testing::AssertionFailure() << s_expr << std::endl << s.ToString(); } } std::string TmpDir(Env* env) { std::string dir; Status s = env->GetTestDirectory(&dir); EXPECT_TRUE(s.ok()) << s.ToString(); return dir; } std::string PerThreadDBPath(std::string dir, std::string name) { size_t tid = std::hash()(std::this_thread::get_id()); return dir + "/" + name + "_" + std::to_string(tid); } std::string PerThreadDBPath(std::string name) { return PerThreadDBPath(test::TmpDir(), name); } std::string PerThreadDBPath(Env* env, std::string name) { return PerThreadDBPath(test::TmpDir(env), name); } int RandomSeed() { const char* env = getenv("TEST_RANDOM_SEED"); int result = (env != nullptr ? atoi(env) : 301); if (result <= 0) { result = 301; } return result; } } // namespace test } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/testharness.h000066400000000000000000000030341370372246700177750ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #ifdef OS_AIX #include "gtest/gtest.h" #else #include #endif #include #include "rocksdb/env.h" namespace ROCKSDB_NAMESPACE { namespace test { // Return the directory to use for temporary storage. std::string TmpDir(Env* env = Env::Default()); // A path unique within the thread std::string PerThreadDBPath(std::string name); std::string PerThreadDBPath(Env* env, std::string name); std::string PerThreadDBPath(std::string dir, std::string name); // Return a randomization seed for this run. Typically returns the // same number on repeated invocations of this binary, but automated // runs may be able to vary the seed. int RandomSeed(); ::testing::AssertionResult AssertStatus(const char* s_expr, const Status& s); #define ASSERT_OK(s) \ ASSERT_PRED_FORMAT1(ROCKSDB_NAMESPACE::test::AssertStatus, s) #define ASSERT_NOK(s) ASSERT_FALSE((s).ok()) #define EXPECT_OK(s) \ EXPECT_PRED_FORMAT1(ROCKSDB_NAMESPACE::test::AssertStatus, s) #define EXPECT_NOK(s) EXPECT_FALSE((s).ok()) } // namespace test } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/testutil.cc000066400000000000000000000465521370372246700174610ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "test_util/testutil.h" #include #include #include #include #include #include #include "db/memtable_list.h" #include "env/composite_env_wrapper.h" #include "file/random_access_file_reader.h" #include "file/sequence_file_reader.h" #include "file/writable_file_writer.h" #include "port/port.h" #include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { namespace test { const uint32_t kDefaultFormatVersion = BlockBasedTableOptions().format_version; const uint32_t kLatestFormatVersion = 5u; Slice RandomString(Random* rnd, int len, std::string* dst) { dst->resize(len); for (int i = 0; i < len; i++) { (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' } return Slice(*dst); } extern std::string RandomHumanReadableString(Random* rnd, int len) { std::string ret; ret.resize(len); for (int i = 0; i < len; ++i) { ret[i] = static_cast('a' + rnd->Uniform(26)); } return ret; } std::string RandomKey(Random* rnd, int len, RandomKeyType type) { // Make sure to generate a wide variety of characters so we // test the boundary conditions for short-key optimizations. static const char kTestChars[] = {'\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff'}; std::string result; for (int i = 0; i < len; i++) { std::size_t indx = 0; switch (type) { case RandomKeyType::RANDOM: indx = rnd->Uniform(sizeof(kTestChars)); break; case RandomKeyType::LARGEST: indx = sizeof(kTestChars) - 1; break; case RandomKeyType::MIDDLE: indx = sizeof(kTestChars) / 2; break; case RandomKeyType::SMALLEST: indx = 0; break; } result += kTestChars[indx]; } return result; } extern Slice CompressibleString(Random* rnd, double compressed_fraction, int len, std::string* dst) { int raw = static_cast(len * compressed_fraction); if (raw < 1) raw = 1; std::string raw_data; RandomString(rnd, raw, &raw_data); // Duplicate the random data until we have filled "len" bytes dst->clear(); while (dst->size() < (unsigned int)len) { dst->append(raw_data); } dst->resize(len); return Slice(*dst); } namespace { class Uint64ComparatorImpl : public Comparator { public: Uint64ComparatorImpl() {} const char* Name() const override { return "rocksdb.Uint64Comparator"; } int Compare(const Slice& a, const Slice& b) const override { assert(a.size() == sizeof(uint64_t) && b.size() == sizeof(uint64_t)); const uint64_t* left = reinterpret_cast(a.data()); const uint64_t* right = reinterpret_cast(b.data()); uint64_t leftValue; uint64_t rightValue; GetUnaligned(left, &leftValue); GetUnaligned(right, &rightValue); if (leftValue == rightValue) { return 0; } else if (leftValue < rightValue) { return -1; } else { return 1; } } void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const override { return; } void FindShortSuccessor(std::string* /*key*/) const override { return; } }; // A test implementation of comparator with 64-bit integer timestamp. class ComparatorWithU64TsImpl : public Comparator { public: ComparatorWithU64TsImpl() : Comparator(/*ts_sz=*/sizeof(uint64_t)), cmp_without_ts_(BytewiseComparator()) { assert(cmp_without_ts_); assert(cmp_without_ts_->timestamp_size() == 0); } const char* Name() const override { return "ComparatorWithU64Ts"; } void FindShortSuccessor(std::string*) const override {} void FindShortestSeparator(std::string*, const Slice&) const override {} int Compare(const Slice& a, const Slice& b) const override { int ret = CompareWithoutTimestamp(a, b); size_t ts_sz = timestamp_size(); if (ret != 0) { return ret; } // Compare timestamp. // For the same user key with different timestamps, larger (newer) timestamp // comes first. return -CompareTimestamp(ExtractTimestampFromUserKey(a, ts_sz), ExtractTimestampFromUserKey(b, ts_sz)); } using Comparator::CompareWithoutTimestamp; int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b, bool b_has_ts) const override { const size_t ts_sz = timestamp_size(); assert(!a_has_ts || a.size() >= ts_sz); assert(!b_has_ts || b.size() >= ts_sz); Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, ts_sz) : a; Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, ts_sz) : b; return cmp_without_ts_->Compare(lhs, rhs); } int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override { assert(ts1.size() == sizeof(uint64_t)); assert(ts2.size() == sizeof(uint64_t)); uint64_t lhs = DecodeFixed64(ts1.data()); uint64_t rhs = DecodeFixed64(ts2.data()); if (lhs < rhs) { return -1; } else if (lhs > rhs) { return 1; } else { return 0; } } private: const Comparator* cmp_without_ts_{nullptr}; }; } // namespace const Comparator* Uint64Comparator() { static Uint64ComparatorImpl uint64comp; return &uint64comp; } const Comparator* ComparatorWithU64Ts() { static ComparatorWithU64TsImpl comp_with_u64_ts; return &comp_with_u64_ts; } WritableFileWriter* GetWritableFileWriter(WritableFile* wf, const std::string& fname) { std::unique_ptr file(wf); return new WritableFileWriter(NewLegacyWritableFileWrapper(std::move(file)), fname, EnvOptions()); } RandomAccessFileReader* GetRandomAccessFileReader(RandomAccessFile* raf) { std::unique_ptr file(raf); return new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file), "[test RandomAccessFileReader]"); } SequentialFileReader* GetSequentialFileReader(SequentialFile* se, const std::string& fname) { std::unique_ptr file(se); return new SequentialFileReader(NewLegacySequentialFileWrapper(file), fname); } void CorruptKeyType(InternalKey* ikey) { std::string keystr = ikey->Encode().ToString(); keystr[keystr.size() - 8] = kTypeLogData; ikey->DecodeFrom(Slice(keystr.data(), keystr.size())); } std::string KeyStr(const std::string& user_key, const SequenceNumber& seq, const ValueType& t, bool corrupt) { InternalKey k(user_key, seq, t); if (corrupt) { CorruptKeyType(&k); } return k.Encode().ToString(); } std::string RandomName(Random* rnd, const size_t len) { std::stringstream ss; for (size_t i = 0; i < len; ++i) { ss << static_cast(rnd->Uniform(26) + 'a'); } return ss.str(); } CompressionType RandomCompressionType(Random* rnd) { auto ret = static_cast(rnd->Uniform(6)); while (!CompressionTypeSupported(ret)) { ret = static_cast((static_cast(ret) + 1) % 6); } return ret; } void RandomCompressionTypeVector(const size_t count, std::vector* types, Random* rnd) { types->clear(); for (size_t i = 0; i < count; ++i) { types->emplace_back(RandomCompressionType(rnd)); } } const SliceTransform* RandomSliceTransform(Random* rnd, int pre_defined) { int random_num = pre_defined >= 0 ? pre_defined : rnd->Uniform(4); switch (random_num) { case 0: return NewFixedPrefixTransform(rnd->Uniform(20) + 1); case 1: return NewCappedPrefixTransform(rnd->Uniform(20) + 1); case 2: return NewNoopTransform(); default: return nullptr; } } BlockBasedTableOptions RandomBlockBasedTableOptions(Random* rnd) { BlockBasedTableOptions opt; opt.cache_index_and_filter_blocks = rnd->Uniform(2); opt.pin_l0_filter_and_index_blocks_in_cache = rnd->Uniform(2); opt.pin_top_level_index_and_filter = rnd->Uniform(2); using IndexType = BlockBasedTableOptions::IndexType; const std::array index_types = { {IndexType::kBinarySearch, IndexType::kHashSearch, IndexType::kTwoLevelIndexSearch, IndexType::kBinarySearchWithFirstKey}}; opt.index_type = index_types[rnd->Uniform(static_cast(index_types.size()))]; opt.hash_index_allow_collision = rnd->Uniform(2); opt.checksum = static_cast(rnd->Uniform(3)); opt.block_size = rnd->Uniform(10000000); opt.block_size_deviation = rnd->Uniform(100); opt.block_restart_interval = rnd->Uniform(100); opt.index_block_restart_interval = rnd->Uniform(100); opt.whole_key_filtering = rnd->Uniform(2); return opt; } TableFactory* RandomTableFactory(Random* rnd, int pre_defined) { #ifndef ROCKSDB_LITE int random_num = pre_defined >= 0 ? pre_defined : rnd->Uniform(4); switch (random_num) { case 0: return NewPlainTableFactory(); case 1: return NewCuckooTableFactory(); default: return NewBlockBasedTableFactory(); } #else (void)rnd; (void)pre_defined; return NewBlockBasedTableFactory(); #endif // !ROCKSDB_LITE } MergeOperator* RandomMergeOperator(Random* rnd) { return new ChanglingMergeOperator(RandomName(rnd, 10)); } CompactionFilter* RandomCompactionFilter(Random* rnd) { return new ChanglingCompactionFilter(RandomName(rnd, 10)); } CompactionFilterFactory* RandomCompactionFilterFactory(Random* rnd) { return new ChanglingCompactionFilterFactory(RandomName(rnd, 10)); } void RandomInitDBOptions(DBOptions* db_opt, Random* rnd) { // boolean options db_opt->advise_random_on_open = rnd->Uniform(2); db_opt->allow_mmap_reads = rnd->Uniform(2); db_opt->allow_mmap_writes = rnd->Uniform(2); db_opt->use_direct_reads = rnd->Uniform(2); db_opt->use_direct_io_for_flush_and_compaction = rnd->Uniform(2); db_opt->create_if_missing = rnd->Uniform(2); db_opt->create_missing_column_families = rnd->Uniform(2); db_opt->enable_thread_tracking = rnd->Uniform(2); db_opt->error_if_exists = rnd->Uniform(2); db_opt->is_fd_close_on_exec = rnd->Uniform(2); db_opt->paranoid_checks = rnd->Uniform(2); db_opt->skip_log_error_on_recovery = rnd->Uniform(2); db_opt->skip_stats_update_on_db_open = rnd->Uniform(2); db_opt->skip_checking_sst_file_sizes_on_db_open = rnd->Uniform(2); db_opt->use_adaptive_mutex = rnd->Uniform(2); db_opt->use_fsync = rnd->Uniform(2); db_opt->recycle_log_file_num = rnd->Uniform(2); db_opt->avoid_flush_during_recovery = rnd->Uniform(2); db_opt->avoid_flush_during_shutdown = rnd->Uniform(2); // int options db_opt->max_background_compactions = rnd->Uniform(100); db_opt->max_background_flushes = rnd->Uniform(100); db_opt->max_file_opening_threads = rnd->Uniform(100); db_opt->max_open_files = rnd->Uniform(100); db_opt->table_cache_numshardbits = rnd->Uniform(100); // size_t options db_opt->db_write_buffer_size = rnd->Uniform(10000); db_opt->keep_log_file_num = rnd->Uniform(10000); db_opt->log_file_time_to_roll = rnd->Uniform(10000); db_opt->manifest_preallocation_size = rnd->Uniform(10000); db_opt->max_log_file_size = rnd->Uniform(10000); // std::string options db_opt->db_log_dir = "path/to/db_log_dir"; db_opt->wal_dir = "path/to/wal_dir"; // uint32_t options db_opt->max_subcompactions = rnd->Uniform(100000); // uint64_t options static const uint64_t uint_max = static_cast(UINT_MAX); db_opt->WAL_size_limit_MB = uint_max + rnd->Uniform(100000); db_opt->WAL_ttl_seconds = uint_max + rnd->Uniform(100000); db_opt->bytes_per_sync = uint_max + rnd->Uniform(100000); db_opt->delayed_write_rate = uint_max + rnd->Uniform(100000); db_opt->delete_obsolete_files_period_micros = uint_max + rnd->Uniform(100000); db_opt->max_manifest_file_size = uint_max + rnd->Uniform(100000); db_opt->max_total_wal_size = uint_max + rnd->Uniform(100000); db_opt->wal_bytes_per_sync = uint_max + rnd->Uniform(100000); // unsigned int options db_opt->stats_dump_period_sec = rnd->Uniform(100000); } void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, DBOptions& db_options, Random* rnd) { cf_opt->compaction_style = (CompactionStyle)(rnd->Uniform(4)); // boolean options cf_opt->report_bg_io_stats = rnd->Uniform(2); cf_opt->disable_auto_compactions = rnd->Uniform(2); cf_opt->inplace_update_support = rnd->Uniform(2); cf_opt->level_compaction_dynamic_level_bytes = rnd->Uniform(2); cf_opt->optimize_filters_for_hits = rnd->Uniform(2); cf_opt->paranoid_file_checks = rnd->Uniform(2); cf_opt->purge_redundant_kvs_while_flush = rnd->Uniform(2); cf_opt->force_consistency_checks = rnd->Uniform(2); cf_opt->compaction_options_fifo.allow_compaction = rnd->Uniform(2); cf_opt->memtable_whole_key_filtering = rnd->Uniform(2); // double options cf_opt->hard_rate_limit = static_cast(rnd->Uniform(10000)) / 13; cf_opt->soft_rate_limit = static_cast(rnd->Uniform(10000)) / 13; cf_opt->memtable_prefix_bloom_size_ratio = static_cast(rnd->Uniform(10000)) / 20000.0; // int options cf_opt->level0_file_num_compaction_trigger = rnd->Uniform(100); cf_opt->level0_slowdown_writes_trigger = rnd->Uniform(100); cf_opt->level0_stop_writes_trigger = rnd->Uniform(100); cf_opt->max_bytes_for_level_multiplier = rnd->Uniform(100); cf_opt->max_mem_compaction_level = rnd->Uniform(100); cf_opt->max_write_buffer_number = rnd->Uniform(100); cf_opt->max_write_buffer_number_to_maintain = rnd->Uniform(100); cf_opt->max_write_buffer_size_to_maintain = rnd->Uniform(10000); cf_opt->min_write_buffer_number_to_merge = rnd->Uniform(100); cf_opt->num_levels = rnd->Uniform(100); cf_opt->target_file_size_multiplier = rnd->Uniform(100); // vector int options cf_opt->max_bytes_for_level_multiplier_additional.resize(cf_opt->num_levels); for (int i = 0; i < cf_opt->num_levels; i++) { cf_opt->max_bytes_for_level_multiplier_additional[i] = rnd->Uniform(100); } // size_t options cf_opt->arena_block_size = rnd->Uniform(10000); cf_opt->inplace_update_num_locks = rnd->Uniform(10000); cf_opt->max_successive_merges = rnd->Uniform(10000); cf_opt->memtable_huge_page_size = rnd->Uniform(10000); cf_opt->write_buffer_size = rnd->Uniform(10000); // uint32_t options cf_opt->bloom_locality = rnd->Uniform(10000); cf_opt->max_bytes_for_level_base = rnd->Uniform(10000); // uint64_t options static const uint64_t uint_max = static_cast(UINT_MAX); cf_opt->ttl = db_options.max_open_files == -1 ? uint_max + rnd->Uniform(10000) : 0; cf_opt->periodic_compaction_seconds = db_options.max_open_files == -1 ? uint_max + rnd->Uniform(10000) : 0; cf_opt->max_sequential_skip_in_iterations = uint_max + rnd->Uniform(10000); cf_opt->target_file_size_base = uint_max + rnd->Uniform(10000); cf_opt->max_compaction_bytes = cf_opt->target_file_size_base * rnd->Uniform(100); cf_opt->compaction_options_fifo.max_table_files_size = uint_max + rnd->Uniform(10000); // unsigned int options cf_opt->rate_limit_delay_max_milliseconds = rnd->Uniform(10000); // pointer typed options cf_opt->prefix_extractor.reset(RandomSliceTransform(rnd)); cf_opt->table_factory.reset(RandomTableFactory(rnd)); cf_opt->merge_operator.reset(RandomMergeOperator(rnd)); if (cf_opt->compaction_filter) { delete cf_opt->compaction_filter; } cf_opt->compaction_filter = RandomCompactionFilter(rnd); cf_opt->compaction_filter_factory.reset(RandomCompactionFilterFactory(rnd)); // custom typed options cf_opt->compression = RandomCompressionType(rnd); RandomCompressionTypeVector(cf_opt->num_levels, &cf_opt->compression_per_level, rnd); } Status DestroyDir(Env* env, const std::string& dir) { Status s; if (env->FileExists(dir).IsNotFound()) { return s; } std::vector files_in_dir; s = env->GetChildren(dir, &files_in_dir); if (s.ok()) { for (auto& file_in_dir : files_in_dir) { if (file_in_dir == "." || file_in_dir == "..") { continue; } std::string path = dir + "/" + file_in_dir; bool is_dir = false; s = env->IsDirectory(path, &is_dir); if (s.ok()) { if (is_dir) { s = DestroyDir(env, path); } else { s = env->DeleteFile(path); } } if (!s.ok()) { break; } } } if (s.ok()) { s = env->DeleteDir(dir); } return s; } bool IsDirectIOSupported(Env* env, const std::string& dir) { EnvOptions env_options; env_options.use_mmap_writes = false; env_options.use_direct_writes = true; std::string tmp = TempFileName(dir, 999); Status s; { std::unique_ptr file; s = env->NewWritableFile(tmp, &file, env_options); } if (s.ok()) { s = env->DeleteFile(tmp); } return s.ok(); } size_t GetLinesCount(const std::string& fname, const std::string& pattern) { std::stringstream ssbuf; std::string line; size_t count = 0; std::ifstream inFile(fname.c_str()); ssbuf << inFile.rdbuf(); while (getline(ssbuf, line)) { if (line.find(pattern) != std::string::npos) { count++; } } return count; } void SetupSyncPointsToMockDirectIO() { #if !defined(NDEBUG) && !defined(OS_MACOSX) && !defined(OS_WIN) && \ !defined(OS_SOLARIS) && !defined(OS_AIX) && !defined(OS_OPENBSD) ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "NewWritableFile:O_DIRECT", [&](void* arg) { int* val = static_cast(arg); *val &= ~O_DIRECT; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "NewRandomAccessFile:O_DIRECT", [&](void* arg) { int* val = static_cast(arg); *val &= ~O_DIRECT; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); #endif } void CorruptFile(const std::string& fname, int offset, int bytes_to_corrupt) { struct stat sbuf; if (stat(fname.c_str(), &sbuf) != 0) { // strerror is not thread-safe so should not be used in the "passing" path // of unit tests (sometimes parallelized) but is OK here where test fails const char* msg = strerror(errno); fprintf(stderr, "%s:%s\n", fname.c_str(), msg); assert(false); } if (offset < 0) { // Relative to end of file; make it absolute if (-offset > sbuf.st_size) { offset = 0; } else { offset = static_cast(sbuf.st_size + offset); } } if (offset > sbuf.st_size) { offset = static_cast(sbuf.st_size); } if (offset + bytes_to_corrupt > sbuf.st_size) { bytes_to_corrupt = static_cast(sbuf.st_size - offset); } // Do it std::string contents; Status s = ReadFileToString(Env::Default(), fname, &contents); assert(s.ok()); for (int i = 0; i < bytes_to_corrupt; i++) { contents[i + offset] ^= 0x80; } s = WriteStringToFile(Env::Default(), contents, fname); assert(s.ok()); Options options; EnvOptions env_options; #ifndef ROCKSDB_LITE assert(!VerifySstFileChecksum(options, env_options, fname).ok()); #endif } } // namespace test } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/testutil.h000066400000000000000000000630731370372246700173200ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include "env/composite_env_wrapper.h" #include "file/writable_file_writer.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/table.h" #include "table/block_based/block_based_table_factory.h" #include "table/internal_iterator.h" #include "table/plain/plain_table_factory.h" #include "util/mutexlock.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { class SequentialFile; class SequentialFileReader; namespace test { extern const uint32_t kDefaultFormatVersion; extern const uint32_t kLatestFormatVersion; // Store in *dst a random string of length "len" and return a Slice that // references the generated data. extern Slice RandomString(Random* rnd, int len, std::string* dst); extern std::string RandomHumanReadableString(Random* rnd, int len); // Return a random key with the specified length that may contain interesting // characters (e.g. \x00, \xff, etc.). enum RandomKeyType : char { RANDOM, LARGEST, SMALLEST, MIDDLE }; extern std::string RandomKey(Random* rnd, int len, RandomKeyType type = RandomKeyType::RANDOM); // Store in *dst a string of length "len" that will compress to // "N*compressed_fraction" bytes and return a Slice that references // the generated data. extern Slice CompressibleString(Random* rnd, double compressed_fraction, int len, std::string* dst); // A wrapper that allows injection of errors. class ErrorEnv : public EnvWrapper { public: bool writable_file_error_; int num_writable_file_errors_; ErrorEnv() : EnvWrapper(Env::Default()), writable_file_error_(false), num_writable_file_errors_(0) { } virtual Status NewWritableFile(const std::string& fname, std::unique_ptr* result, const EnvOptions& soptions) override { result->reset(); if (writable_file_error_) { ++num_writable_file_errors_; return Status::IOError(fname, "fake error"); } return target()->NewWritableFile(fname, result, soptions); } }; #ifndef NDEBUG // An internal comparator that just forward comparing results from the // user comparator in it. Can be used to test entities that have no dependency // on internal key structure but consumes InternalKeyComparator, like // BlockBasedTable. class PlainInternalKeyComparator : public InternalKeyComparator { public: explicit PlainInternalKeyComparator(const Comparator* c) : InternalKeyComparator(c) {} virtual ~PlainInternalKeyComparator() {} virtual int Compare(const Slice& a, const Slice& b) const override { return user_comparator()->Compare(a, b); } }; #endif // A test comparator which compare two strings in this way: // (1) first compare prefix of 8 bytes in alphabet order, // (2) if two strings share the same prefix, sort the other part of the string // in the reverse alphabet order. // This helps simulate the case of compounded key of [entity][timestamp] and // latest timestamp first. class SimpleSuffixReverseComparator : public Comparator { public: SimpleSuffixReverseComparator() {} virtual const char* Name() const override { return "SimpleSuffixReverseComparator"; } virtual int Compare(const Slice& a, const Slice& b) const override { Slice prefix_a = Slice(a.data(), 8); Slice prefix_b = Slice(b.data(), 8); int prefix_comp = prefix_a.compare(prefix_b); if (prefix_comp != 0) { return prefix_comp; } else { Slice suffix_a = Slice(a.data() + 8, a.size() - 8); Slice suffix_b = Slice(b.data() + 8, b.size() - 8); return -(suffix_a.compare(suffix_b)); } } virtual void FindShortestSeparator(std::string* /*start*/, const Slice& /*limit*/) const override {} virtual void FindShortSuccessor(std::string* /*key*/) const override {} }; // Returns a user key comparator that can be used for comparing two uint64_t // slices. Instead of comparing slices byte-wise, it compares all the 8 bytes // at once. Assumes same endian-ness is used though the database's lifetime. // Symantics of comparison would differ from Bytewise comparator in little // endian machines. extern const Comparator* Uint64Comparator(); // Iterator over a vector of keys/values class VectorIterator : public InternalIterator { public: explicit VectorIterator(const std::vector& keys) : keys_(keys), current_(keys.size()) { std::sort(keys_.begin(), keys_.end()); values_.resize(keys.size()); } VectorIterator(const std::vector& keys, const std::vector& values) : keys_(keys), values_(values), current_(keys.size()) { assert(keys_.size() == values_.size()); } virtual bool Valid() const override { return current_ < keys_.size(); } virtual void SeekToFirst() override { current_ = 0; } virtual void SeekToLast() override { current_ = keys_.size() - 1; } virtual void Seek(const Slice& target) override { current_ = std::lower_bound(keys_.begin(), keys_.end(), target.ToString()) - keys_.begin(); } virtual void SeekForPrev(const Slice& target) override { current_ = std::upper_bound(keys_.begin(), keys_.end(), target.ToString()) - keys_.begin(); if (!Valid()) { SeekToLast(); } else { Prev(); } } virtual void Next() override { current_++; } virtual void Prev() override { current_--; } virtual Slice key() const override { return Slice(keys_[current_]); } virtual Slice value() const override { return Slice(values_[current_]); } virtual Status status() const override { return Status::OK(); } virtual bool IsKeyPinned() const override { return true; } virtual bool IsValuePinned() const override { return true; } private: std::vector keys_; std::vector values_; size_t current_; }; extern WritableFileWriter* GetWritableFileWriter(WritableFile* wf, const std::string& fname); extern RandomAccessFileReader* GetRandomAccessFileReader(RandomAccessFile* raf); extern SequentialFileReader* GetSequentialFileReader(SequentialFile* se, const std::string& fname); class StringSink: public WritableFile { public: std::string contents_; explicit StringSink(Slice* reader_contents = nullptr) : WritableFile(), contents_(""), reader_contents_(reader_contents), last_flush_(0) { if (reader_contents_ != nullptr) { *reader_contents_ = Slice(contents_.data(), 0); } } const std::string& contents() const { return contents_; } virtual Status Truncate(uint64_t size) override { contents_.resize(static_cast(size)); return Status::OK(); } virtual Status Close() override { return Status::OK(); } virtual Status Flush() override { if (reader_contents_ != nullptr) { assert(reader_contents_->size() <= last_flush_); size_t offset = last_flush_ - reader_contents_->size(); *reader_contents_ = Slice( contents_.data() + offset, contents_.size() - offset); last_flush_ = contents_.size(); } return Status::OK(); } virtual Status Sync() override { return Status::OK(); } virtual Status Append(const Slice& slice) override { contents_.append(slice.data(), slice.size()); return Status::OK(); } void Drop(size_t bytes) { if (reader_contents_ != nullptr) { contents_.resize(contents_.size() - bytes); *reader_contents_ = Slice( reader_contents_->data(), reader_contents_->size() - bytes); last_flush_ = contents_.size(); } } private: Slice* reader_contents_; size_t last_flush_; }; // A wrapper around a StringSink to give it a RandomRWFile interface class RandomRWStringSink : public RandomRWFile { public: explicit RandomRWStringSink(StringSink* ss) : ss_(ss) {} Status Write(uint64_t offset, const Slice& data) override { if (offset + data.size() > ss_->contents_.size()) { ss_->contents_.resize(static_cast(offset) + data.size(), '\0'); } char* pos = const_cast(ss_->contents_.data() + offset); memcpy(pos, data.data(), data.size()); return Status::OK(); } Status Read(uint64_t offset, size_t n, Slice* result, char* /*scratch*/) const override { *result = Slice(nullptr, 0); if (offset < ss_->contents_.size()) { size_t str_res_sz = std::min(static_cast(ss_->contents_.size() - offset), n); *result = Slice(ss_->contents_.data() + offset, str_res_sz); } return Status::OK(); } Status Flush() override { return Status::OK(); } Status Sync() override { return Status::OK(); } Status Close() override { return Status::OK(); } const std::string& contents() const { return ss_->contents(); } private: StringSink* ss_; }; // Like StringSink, this writes into a string. Unlink StringSink, it // has some initial content and overwrites it, just like a recycled // log file. class OverwritingStringSink : public WritableFile { public: explicit OverwritingStringSink(Slice* reader_contents) : WritableFile(), contents_(""), reader_contents_(reader_contents), last_flush_(0) {} const std::string& contents() const { return contents_; } virtual Status Truncate(uint64_t size) override { contents_.resize(static_cast(size)); return Status::OK(); } virtual Status Close() override { return Status::OK(); } virtual Status Flush() override { if (last_flush_ < contents_.size()) { assert(reader_contents_->size() >= contents_.size()); memcpy((char*)reader_contents_->data() + last_flush_, contents_.data() + last_flush_, contents_.size() - last_flush_); last_flush_ = contents_.size(); } return Status::OK(); } virtual Status Sync() override { return Status::OK(); } virtual Status Append(const Slice& slice) override { contents_.append(slice.data(), slice.size()); return Status::OK(); } void Drop(size_t bytes) { contents_.resize(contents_.size() - bytes); if (last_flush_ > contents_.size()) last_flush_ = contents_.size(); } private: std::string contents_; Slice* reader_contents_; size_t last_flush_; }; class StringSource: public RandomAccessFile { public: explicit StringSource(const Slice& contents, uint64_t uniq_id = 0, bool mmap = false) : contents_(contents.data(), contents.size()), uniq_id_(uniq_id), mmap_(mmap), total_reads_(0) {} virtual ~StringSource() { } uint64_t Size() const { return contents_.size(); } virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { total_reads_++; if (offset > contents_.size()) { return Status::InvalidArgument("invalid Read offset"); } if (offset + n > contents_.size()) { n = contents_.size() - static_cast(offset); } if (!mmap_) { memcpy(scratch, &contents_[static_cast(offset)], n); *result = Slice(scratch, n); } else { *result = Slice(&contents_[static_cast(offset)], n); } return Status::OK(); } virtual size_t GetUniqueId(char* id, size_t max_size) const override { if (max_size < 20) { return 0; } char* rid = id; rid = EncodeVarint64(rid, uniq_id_); rid = EncodeVarint64(rid, 0); return static_cast(rid-id); } int total_reads() const { return total_reads_; } void set_total_reads(int tr) { total_reads_ = tr; } private: std::string contents_; uint64_t uniq_id_; bool mmap_; mutable int total_reads_; }; inline StringSink* GetStringSinkFromLegacyWriter( const WritableFileWriter* writer) { LegacyWritableFileWrapper* file = static_cast(writer->writable_file()); return static_cast(file->target()); } class NullLogger : public Logger { public: using Logger::Logv; virtual void Logv(const char* /*format*/, va_list /*ap*/) override {} virtual size_t GetLogFileSize() const override { return 0; } }; // Corrupts key by changing the type extern void CorruptKeyType(InternalKey* ikey); extern std::string KeyStr(const std::string& user_key, const SequenceNumber& seq, const ValueType& t, bool corrupt = false); class SleepingBackgroundTask { public: SleepingBackgroundTask() : bg_cv_(&mutex_), should_sleep_(true), done_with_sleep_(false), sleeping_(false) {} bool IsSleeping() { MutexLock l(&mutex_); return sleeping_; } void DoSleep() { MutexLock l(&mutex_); sleeping_ = true; bg_cv_.SignalAll(); while (should_sleep_) { bg_cv_.Wait(); } sleeping_ = false; done_with_sleep_ = true; bg_cv_.SignalAll(); } void WaitUntilSleeping() { MutexLock l(&mutex_); while (!sleeping_ || !should_sleep_) { bg_cv_.Wait(); } } // Waits for the status to change to sleeping, // otherwise times out. // wait_time is in microseconds. // Returns true when times out, false otherwise. bool TimedWaitUntilSleeping(uint64_t wait_time) { auto abs_time = Env::Default()->NowMicros() + wait_time; MutexLock l(&mutex_); while (!sleeping_ || !should_sleep_) { if (bg_cv_.TimedWait(abs_time)) { return true; } } return false; } void WakeUp() { MutexLock l(&mutex_); should_sleep_ = false; bg_cv_.SignalAll(); } void WaitUntilDone() { MutexLock l(&mutex_); while (!done_with_sleep_) { bg_cv_.Wait(); } } // Similar to TimedWaitUntilSleeping. // Waits until the task is done. bool TimedWaitUntilDone(uint64_t wait_time) { auto abs_time = Env::Default()->NowMicros() + wait_time; MutexLock l(&mutex_); while (!done_with_sleep_) { if (bg_cv_.TimedWait(abs_time)) { return true; } } return false; } bool WokenUp() { MutexLock l(&mutex_); return should_sleep_ == false; } void Reset() { MutexLock l(&mutex_); should_sleep_ = true; done_with_sleep_ = false; } static void DoSleepTask(void* arg) { reinterpret_cast(arg)->DoSleep(); } private: port::Mutex mutex_; port::CondVar bg_cv_; // Signalled when background work finishes bool should_sleep_; bool done_with_sleep_; bool sleeping_; }; // Filters merge operands and values that are equal to `num`. class FilterNumber : public CompactionFilter { public: explicit FilterNumber(uint64_t num) : num_(num) {} std::string last_merge_operand_key() { return last_merge_operand_key_; } bool Filter(int /*level*/, const ROCKSDB_NAMESPACE::Slice& /*key*/, const ROCKSDB_NAMESPACE::Slice& value, std::string* /*new_value*/, bool* /*value_changed*/) const override { if (value.size() == sizeof(uint64_t)) { return num_ == DecodeFixed64(value.data()); } return true; } bool FilterMergeOperand( int /*level*/, const ROCKSDB_NAMESPACE::Slice& key, const ROCKSDB_NAMESPACE::Slice& value) const override { last_merge_operand_key_ = key.ToString(); if (value.size() == sizeof(uint64_t)) { return num_ == DecodeFixed64(value.data()); } return true; } const char* Name() const override { return "FilterBadMergeOperand"; } private: mutable std::string last_merge_operand_key_; uint64_t num_; }; inline std::string EncodeInt(uint64_t x) { std::string result; PutFixed64(&result, x); return result; } class SeqStringSource : public SequentialFile { public: SeqStringSource(const std::string& data, std::atomic* read_count) : data_(data), offset_(0), read_count_(read_count) {} ~SeqStringSource() override {} Status Read(size_t n, Slice* result, char* scratch) override { std::string output; if (offset_ < data_.size()) { n = std::min(data_.size() - offset_, n); memcpy(scratch, data_.data() + offset_, n); offset_ += n; *result = Slice(scratch, n); } else { return Status::InvalidArgument( "Attemp to read when it already reached eof."); } (*read_count_)++; return Status::OK(); } Status Skip(uint64_t n) override { if (offset_ >= data_.size()) { return Status::InvalidArgument( "Attemp to read when it already reached eof."); } // TODO(yhchiang): Currently doesn't handle the overflow case. offset_ += static_cast(n); return Status::OK(); } private: std::string data_; size_t offset_; std::atomic* read_count_; }; class StringEnv : public EnvWrapper { public: class StringSink : public WritableFile { public: explicit StringSink(std::string* contents) : WritableFile(), contents_(contents) {} virtual Status Truncate(uint64_t size) override { contents_->resize(static_cast(size)); return Status::OK(); } virtual Status Close() override { return Status::OK(); } virtual Status Flush() override { return Status::OK(); } virtual Status Sync() override { return Status::OK(); } virtual Status Append(const Slice& slice) override { contents_->append(slice.data(), slice.size()); return Status::OK(); } private: std::string* contents_; }; explicit StringEnv(Env* t) : EnvWrapper(t) {} ~StringEnv() override {} const std::string& GetContent(const std::string& f) { return files_[f]; } const Status WriteToNewFile(const std::string& file_name, const std::string& content) { std::unique_ptr r; auto s = NewWritableFile(file_name, &r, EnvOptions()); if (s.ok()) { s = r->Append(content); } if (s.ok()) { s = r->Flush(); } if (s.ok()) { s = r->Close(); } assert(!s.ok() || files_[file_name] == content); return s; } // The following text is boilerplate that forwards all methods to target() Status NewSequentialFile(const std::string& f, std::unique_ptr* r, const EnvOptions& /*options*/) override { auto iter = files_.find(f); if (iter == files_.end()) { return Status::NotFound("The specified file does not exist", f); } r->reset(new SeqStringSource(iter->second, &num_seq_file_read_)); return Status::OK(); } Status NewRandomAccessFile(const std::string& /*f*/, std::unique_ptr* /*r*/, const EnvOptions& /*options*/) override { return Status::NotSupported(); } Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& /*options*/) override { auto iter = files_.find(f); if (iter != files_.end()) { return Status::IOError("The specified file already exists", f); } r->reset(new StringSink(&files_[f])); return Status::OK(); } virtual Status NewDirectory( const std::string& /*name*/, std::unique_ptr* /*result*/) override { return Status::NotSupported(); } Status FileExists(const std::string& f) override { if (files_.find(f) == files_.end()) { return Status::NotFound(); } return Status::OK(); } Status GetChildren(const std::string& /*dir*/, std::vector* /*r*/) override { return Status::NotSupported(); } Status DeleteFile(const std::string& f) override { files_.erase(f); return Status::OK(); } Status CreateDir(const std::string& /*d*/) override { return Status::NotSupported(); } Status CreateDirIfMissing(const std::string& /*d*/) override { return Status::NotSupported(); } Status DeleteDir(const std::string& /*d*/) override { return Status::NotSupported(); } Status GetFileSize(const std::string& f, uint64_t* s) override { auto iter = files_.find(f); if (iter == files_.end()) { return Status::NotFound("The specified file does not exist:", f); } *s = iter->second.size(); return Status::OK(); } Status GetFileModificationTime(const std::string& /*fname*/, uint64_t* /*file_mtime*/) override { return Status::NotSupported(); } Status RenameFile(const std::string& /*s*/, const std::string& /*t*/) override { return Status::NotSupported(); } Status LinkFile(const std::string& /*s*/, const std::string& /*t*/) override { return Status::NotSupported(); } Status LockFile(const std::string& /*f*/, FileLock** /*l*/) override { return Status::NotSupported(); } Status UnlockFile(FileLock* /*l*/) override { return Status::NotSupported(); } std::atomic num_seq_file_read_; protected: std::unordered_map files_; }; // Randomly initialize the given DBOptions void RandomInitDBOptions(DBOptions* db_opt, Random* rnd); // Randomly initialize the given ColumnFamilyOptions // Note that the caller is responsible for releasing non-null // cf_opt->compaction_filter. void RandomInitCFOptions(ColumnFamilyOptions* cf_opt, DBOptions&, Random* rnd); // A dummy merge operator which can change its name class ChanglingMergeOperator : public MergeOperator { public: explicit ChanglingMergeOperator(const std::string& name) : name_(name + "MergeOperator") {} ~ChanglingMergeOperator() {} void SetName(const std::string& name) { name_ = name; } virtual bool FullMergeV2(const MergeOperationInput& /*merge_in*/, MergeOperationOutput* /*merge_out*/) const override { return false; } virtual bool PartialMergeMulti(const Slice& /*key*/, const std::deque& /*operand_list*/, std::string* /*new_value*/, Logger* /*logger*/) const override { return false; } virtual const char* Name() const override { return name_.c_str(); } protected: std::string name_; }; // Returns a dummy merge operator with random name. MergeOperator* RandomMergeOperator(Random* rnd); // A dummy compaction filter which can change its name class ChanglingCompactionFilter : public CompactionFilter { public: explicit ChanglingCompactionFilter(const std::string& name) : name_(name + "CompactionFilter") {} ~ChanglingCompactionFilter() {} void SetName(const std::string& name) { name_ = name; } bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*existing_value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { return false; } const char* Name() const override { return name_.c_str(); } private: std::string name_; }; // Returns a dummy compaction filter with a random name. CompactionFilter* RandomCompactionFilter(Random* rnd); // A dummy compaction filter factory which can change its name class ChanglingCompactionFilterFactory : public CompactionFilterFactory { public: explicit ChanglingCompactionFilterFactory(const std::string& name) : name_(name + "CompactionFilterFactory") {} ~ChanglingCompactionFilterFactory() {} void SetName(const std::string& name) { name_ = name; } std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { return std::unique_ptr(); } // Returns a name that identifies this compaction filter factory. const char* Name() const override { return name_.c_str(); } protected: std::string name_; }; extern const Comparator* ComparatorWithU64Ts(); CompressionType RandomCompressionType(Random* rnd); void RandomCompressionTypeVector(const size_t count, std::vector* types, Random* rnd); CompactionFilterFactory* RandomCompactionFilterFactory(Random* rnd); const SliceTransform* RandomSliceTransform(Random* rnd, int pre_defined = -1); TableFactory* RandomTableFactory(Random* rnd, int pre_defined = -1); std::string RandomName(Random* rnd, const size_t len); Status DestroyDir(Env* env, const std::string& dir); bool IsDirectIOSupported(Env* env, const std::string& dir); // Return the number of lines where a given pattern was found in a file. size_t GetLinesCount(const std::string& fname, const std::string& pattern); // TEST_TMPDIR may be set to /dev/shm in Makefile, // but /dev/shm does not support direct IO. // Tries to set TEST_TMPDIR to a directory supporting direct IO. void ResetTmpDirForDirectIO(); // Sets up sync points to mock direct IO instead of actually issuing direct IO // to the file system. void SetupSyncPointsToMockDirectIO(); void CorruptFile(const std::string& fname, int offset, int bytes_to_corrupt); } // namespace test } // namespace ROCKSDB_NAMESPACE rocksdb-6.11.4/test_util/testutil_test.cc000066400000000000000000000023471370372246700205120ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include "test_util/testutil.h" #include "port/port.h" #include "port/stack_trace.h" #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { void CreateFile(Env* env, const std::string& path) { std::unique_ptr f; ASSERT_OK(env->NewWritableFile(path, &f, EnvOptions())); f->Close(); } TEST(TestUtil, DestroyDirRecursively) { auto env = Env::Default(); // test_util/file // /dir // /dir/file std::string test_dir = test::PerThreadDBPath("test_util"); ASSERT_OK(env->CreateDir(test_dir)); CreateFile(env, test_dir + "/file"); ASSERT_OK(env->CreateDir(test_dir + "/dir")); CreateFile(env, test_dir + "/dir/file"); ASSERT_OK(test::DestroyDir(env, test_dir)); auto s = env->FileExists(test_dir); ASSERT_TRUE(s.IsNotFound()); } } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } rocksdb-6.11.4/test_util/transaction_test_util.cc000066400000000000000000000321401370372246700222110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #ifndef ROCKSDB_LITE #include "test_util/transaction_test_util.h" #include #include #include #include #include #include #include "rocksdb/db.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" #include "db/dbformat.h" #include "db/snapshot_impl.h" #include "logging/logging.h" #include "util/random.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { RandomTransactionInserter::RandomTransactionInserter( Random64* rand, const WriteOptions& write_options, const ReadOptions& read_options, uint64_t num_keys, uint16_t num_sets, const uint64_t cmt_delay_ms, const uint64_t first_id) : rand_(rand), write_options_(write_options), read_options_(read_options), num_keys_(num_keys), num_sets_(num_sets), txn_id_(first_id), cmt_delay_ms_(cmt_delay_ms) {} RandomTransactionInserter::~RandomTransactionInserter() { if (txn_ != nullptr) { delete txn_; } if (optimistic_txn_ != nullptr) { delete optimistic_txn_; } } bool RandomTransactionInserter::TransactionDBInsert( TransactionDB* db, const TransactionOptions& txn_options) { txn_ = db->BeginTransaction(write_options_, txn_options, txn_); std::hash hasher; char name[64]; snprintf(name, 64, "txn%" ROCKSDB_PRIszt "-%" PRIu64, hasher(std::this_thread::get_id()), txn_id_++); assert(strlen(name) < 64 - 1); assert(txn_->SetName(name).ok()); // Take a snapshot if set_snapshot was not set or with 50% change otherwise bool take_snapshot = txn_->GetSnapshot() == nullptr || rand_->OneIn(2); if (take_snapshot) { txn_->SetSnapshot(); read_options_.snapshot = txn_->GetSnapshot(); } auto res = DoInsert(db, txn_, false); if (take_snapshot) { read_options_.snapshot = nullptr; } return res; } bool RandomTransactionInserter::OptimisticTransactionDBInsert( OptimisticTransactionDB* db, const OptimisticTransactionOptions& txn_options) { optimistic_txn_ = db->BeginTransaction(write_options_, txn_options, optimistic_txn_); return DoInsert(db, optimistic_txn_, true); } bool RandomTransactionInserter::DBInsert(DB* db) { return DoInsert(db, nullptr, false); } Status RandomTransactionInserter::DBGet( DB* db, Transaction* txn, ReadOptions& read_options, uint16_t set_i, uint64_t ikey, bool get_for_update, uint64_t* int_value, std::string* full_key, bool* unexpected_error) { Status s; // Five digits (since the largest uint16_t is 65535) plus the NUL // end char. char prefix_buf[6]; // Pad prefix appropriately so we can iterate over each set assert(set_i + 1 <= 9999); snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", set_i + 1); // key format: [SET#][random#] std::string skey = ToString(ikey); Slice base_key(skey); *full_key = std::string(prefix_buf) + base_key.ToString(); Slice key(*full_key); std::string value; if (txn != nullptr) { if (get_for_update) { s = txn->GetForUpdate(read_options, key, &value); } else { s = txn->Get(read_options, key, &value); } } else { s = db->Get(read_options, key, &value); } if (s.ok()) { // Found key, parse its value *int_value = std::stoull(value); if (*int_value == 0 || *int_value == ULONG_MAX) { *unexpected_error = true; fprintf(stderr, "Get returned unexpected value: %s\n", value.c_str()); s = Status::Corruption(); } } else if (s.IsNotFound()) { // Have not yet written to this key, so assume its value is 0 *int_value = 0; s = Status::OK(); } return s; } bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn, bool is_optimistic) { Status s; WriteBatch batch; // pick a random number to use to increment a key in each set uint64_t incr = (rand_->Next() % 100) + 1; bool unexpected_error = false; std::vector set_vec(num_sets_); std::iota(set_vec.begin(), set_vec.end(), static_cast(0)); RandomShuffle(set_vec.begin(), set_vec.end()); // For each set, pick a key at random and increment it for (uint16_t set_i : set_vec) { uint64_t int_value = 0; std::string full_key; uint64_t rand_key = rand_->Next() % num_keys_; const bool get_for_update = txn ? rand_->OneIn(2) : false; s = DBGet(db, txn, read_options_, set_i, rand_key, get_for_update, &int_value, &full_key, &unexpected_error); Slice key(full_key); if (!s.ok()) { // Optimistic transactions should never return non-ok status here. // Non-optimistic transactions may return write-coflict/timeout errors. if (is_optimistic || !(s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) { fprintf(stderr, "Get returned an unexpected error: %s\n", s.ToString().c_str()); unexpected_error = true; } break; } if (s.ok()) { // Increment key std::string sum = ToString(int_value + incr); if (txn != nullptr) { s = txn->Put(key, sum); if (!get_for_update && (s.IsBusy() || s.IsTimedOut())) { // If the initial get was not for update, then the key is not locked // before put and put could fail due to concurrent writes. break; } else if (!s.ok()) { // Since we did a GetForUpdate, Put should not fail. fprintf(stderr, "Put returned an unexpected error: %s\n", s.ToString().c_str()); unexpected_error = true; } } else { batch.Put(key, sum); } bytes_inserted_ += key.size() + sum.size(); } if (txn != nullptr) { ROCKS_LOG_DEBUG(db->GetDBOptions().info_log, "Insert (%s) %s snap: %" PRIu64 " key:%s value: %" PRIu64 "+%" PRIu64 "=%" PRIu64, txn->GetName().c_str(), s.ToString().c_str(), txn->GetSnapshot()->GetSequenceNumber(), full_key.c_str(), int_value, incr, int_value + incr); } } if (s.ok()) { if (txn != nullptr) { bool with_prepare = !is_optimistic && !rand_->OneIn(10); if (with_prepare) { // Also try commit without prepare s = txn->Prepare(); assert(s.ok()); ROCKS_LOG_DEBUG(db->GetDBOptions().info_log, "Prepare of %" PRIu64 " %s (%s)", txn->GetId(), s.ToString().c_str(), txn->GetName().c_str()); if (rand_->OneIn(20)) { // This currently only tests the mechanics of writing commit time // write batch so the exact values would not matter. s = txn_->GetCommitTimeWriteBatch()->Put("cat", "dog"); assert(s.ok()); } db->GetDBOptions().env->SleepForMicroseconds( static_cast(cmt_delay_ms_ * 1000)); } if (!rand_->OneIn(20)) { s = txn->Commit(); assert(!with_prepare || s.ok()); ROCKS_LOG_DEBUG(db->GetDBOptions().info_log, "Commit of %" PRIu64 " %s (%s)", txn->GetId(), s.ToString().c_str(), txn->GetName().c_str()); } else { // Also try 5% rollback s = txn->Rollback(); ROCKS_LOG_DEBUG(db->GetDBOptions().info_log, "Rollback %" PRIu64 " %s %s", txn->GetId(), txn->GetName().c_str(), s.ToString().c_str()); assert(s.ok()); } assert(is_optimistic || s.ok()); if (!s.ok()) { if (is_optimistic) { // Optimistic transactions can have write-conflict errors on commit. // Any other error is unexpected. if (!(s.IsBusy() || s.IsTimedOut() || s.IsTryAgain())) { unexpected_error = true; } } else { // Non-optimistic transactions should only fail due to expiration // or write failures. For testing purproses, we do not expect any // write failures. if (!s.IsExpired()) { unexpected_error = true; } } if (unexpected_error) { fprintf(stderr, "Commit returned an unexpected error: %s\n", s.ToString().c_str()); } } } else { s = db->Write(write_options_, &batch); if (!s.ok()) { unexpected_error = true; fprintf(stderr, "Write returned an unexpected error: %s\n", s.ToString().c_str()); } } } else { if (txn != nullptr) { assert(txn->Rollback().ok()); ROCKS_LOG_DEBUG(db->GetDBOptions().info_log, "Error %s for txn %s", s.ToString().c_str(), txn->GetName().c_str()); } } if (s.ok()) { success_count_++; } else { failure_count_++; } last_status_ = s; // return success if we didn't get any unexpected errors return !unexpected_error; } // Verify that the sum of the keys in each set are equal Status RandomTransactionInserter::Verify(DB* db, uint16_t num_sets, uint64_t num_keys_per_set, bool take_snapshot, Random64* rand, uint64_t delay_ms) { // delay_ms is the delay between taking a snapshot and doing the reads. It // emulates reads from a long-running backup job. assert(delay_ms == 0 || take_snapshot); uint64_t prev_total = 0; uint32_t prev_i = 0; bool prev_assigned = false; ReadOptions roptions; if (take_snapshot) { roptions.snapshot = db->GetSnapshot(); db->GetDBOptions().env->SleepForMicroseconds( static_cast(delay_ms * 1000)); } std::vector set_vec(num_sets); std::iota(set_vec.begin(), set_vec.end(), static_cast(0)); RandomShuffle(set_vec.begin(), set_vec.end()); // For each set of keys with the same prefix, sum all the values for (uint16_t set_i : set_vec) { // Five digits (since the largest uint16_t is 65535) plus the NUL // end char. char prefix_buf[6]; assert(set_i + 1 <= 9999); snprintf(prefix_buf, sizeof(prefix_buf), "%.4u", set_i + 1); uint64_t total = 0; // Use either point lookup or iterator. Point lookups are slower so we use // it less often. const bool use_point_lookup = num_keys_per_set != 0 && rand && rand->OneIn(10); if (use_point_lookup) { ReadOptions read_options; for (uint64_t k = 0; k < num_keys_per_set; k++) { std::string dont_care; uint64_t int_value = 0; bool unexpected_error = false; const bool FOR_UPDATE = false; Status s = DBGet(db, nullptr, roptions, set_i, k, FOR_UPDATE, &int_value, &dont_care, &unexpected_error); assert(s.ok()); assert(!unexpected_error); total += int_value; } } else { // user iterators Iterator* iter = db->NewIterator(roptions); for (iter->Seek(Slice(prefix_buf, 4)); iter->Valid(); iter->Next()) { Slice key = iter->key(); // stop when we reach a different prefix if (key.ToString().compare(0, 4, prefix_buf) != 0) { break; } Slice value = iter->value(); uint64_t int_value = std::stoull(value.ToString()); if (int_value == 0 || int_value == ULONG_MAX) { fprintf(stderr, "Iter returned unexpected value: %s\n", value.ToString().c_str()); return Status::Corruption(); } ROCKS_LOG_DEBUG( db->GetDBOptions().info_log, "VerifyRead at %" PRIu64 " (%" PRIu64 "): %.*s value: %" PRIu64, roptions.snapshot ? roptions.snapshot->GetSequenceNumber() : 0ul, roptions.snapshot ? ((SnapshotImpl*)roptions.snapshot)->min_uncommitted_ : 0ul, static_cast(key.size()), key.data(), int_value); total += int_value; } delete iter; } if (prev_assigned && total != prev_total) { db->GetDBOptions().info_log->Flush(); fprintf(stdout, "RandomTransactionVerify found inconsistent totals using " "pointlookup? %d " "Set[%" PRIu32 "]: %" PRIu64 ", Set[%" PRIu32 "]: %" PRIu64 " at snapshot %" PRIu64 "\n", use_point_lookup, prev_i, prev_total, set_i, total, roptions.snapshot ? roptions.snapshot->GetSequenceNumber() : 0ul); fflush(stdout); return Status::Corruption(); } else { ROCKS_LOG_DEBUG( db->GetDBOptions().info_log, "RandomTransactionVerify pass pointlookup? %d total: %" PRIu64 " snap: %" PRIu64, use_point_lookup, total, roptions.snapshot ? roptions.snapshot->GetSequenceNumber() : 0ul); } prev_total = total; prev_i = set_i; prev_assigned = true; } if (take_snapshot) { db->ReleaseSnapshot(roptions.snapshot); } return Status::OK(); } } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/test_util/transaction_test_util.h000066400000000000000000000114751370372246700220630ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #ifndef ROCKSDB_LITE #include "rocksdb/options.h" #include "port/port.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/transaction_db.h" namespace ROCKSDB_NAMESPACE { class DB; class Random64; // Utility class for stress testing transactions. Can be used to write many // transactions in parallel and then validate that the data written is logically // consistent. This class assumes the input DB is initially empty. // // Each call to TransactionDBInsert()/OptimisticTransactionDBInsert() will // increment the value of a key in #num_sets sets of keys. Regardless of // whether the transaction succeeds, the total sum of values of keys in each // set is an invariant that should remain equal. // // After calling TransactionDBInsert()/OptimisticTransactionDBInsert() many // times, Verify() can be called to validate that the invariant holds. // // To test writing Transaction in parallel, multiple threads can create a // RandomTransactionInserter with similar arguments using the same DB. class RandomTransactionInserter { public: // num_keys is the number of keys in each set. // num_sets is the number of sets of keys. // cmt_delay_ms is the delay between prepare (if there is any) and commit // first_id is the id of the first transaction explicit RandomTransactionInserter( Random64* rand, const WriteOptions& write_options = WriteOptions(), const ReadOptions& read_options = ReadOptions(), uint64_t num_keys = 1000, uint16_t num_sets = 3, const uint64_t cmt_delay_ms = 0, const uint64_t first_id = 0); ~RandomTransactionInserter(); // Increment a key in each set using a Transaction on a TransactionDB. // // Returns true if the transaction succeeded OR if any error encountered was // expected (eg a write-conflict). Error status may be obtained by calling // GetLastStatus(); bool TransactionDBInsert( TransactionDB* db, const TransactionOptions& txn_options = TransactionOptions()); // Increment a key in each set using a Transaction on an // OptimisticTransactionDB // // Returns true if the transaction succeeded OR if any error encountered was // expected (eg a write-conflict). Error status may be obtained by calling // GetLastStatus(); bool OptimisticTransactionDBInsert( OptimisticTransactionDB* db, const OptimisticTransactionOptions& txn_options = OptimisticTransactionOptions()); // Increment a key in each set without using a transaction. If this function // is called in parallel, then Verify() may fail. // // Returns true if the write succeeds. // Error status may be obtained by calling GetLastStatus(). bool DBInsert(DB* db); // Get the ikey'th key from set set_i static Status DBGet(DB* db, Transaction* txn, ReadOptions& read_options, uint16_t set_i, uint64_t ikey, bool get_for_update, uint64_t* int_value, std::string* full_key, bool* unexpected_error); // Returns OK if Invariant is true. static Status Verify(DB* db, uint16_t num_sets, uint64_t num_keys_per_set = 0, bool take_snapshot = false, Random64* rand = nullptr, uint64_t delay_ms = 0); // Returns the status of the previous Insert operation Status GetLastStatus() { return last_status_; } // Returns the number of successfully written calls to // TransactionDBInsert/OptimisticTransactionDBInsert/DBInsert uint64_t GetSuccessCount() { return success_count_; } // Returns the number of calls to // TransactionDBInsert/OptimisticTransactionDBInsert/DBInsert that did not // write any data. uint64_t GetFailureCount() { return failure_count_; } // Returns the sum of user keys/values Put() to the DB. size_t GetBytesInserted() { return bytes_inserted_; } private: // Input options Random64* rand_; const WriteOptions write_options_; ReadOptions read_options_; const uint64_t num_keys_; const uint16_t num_sets_; // Number of successful insert batches performed uint64_t success_count_ = 0; // Number of failed insert batches attempted uint64_t failure_count_ = 0; size_t bytes_inserted_ = 0; // Status returned by most recent insert operation Status last_status_; // optimization: re-use allocated transaction objects. Transaction* txn_ = nullptr; Transaction* optimistic_txn_ = nullptr; uint64_t txn_id_; // The delay between ::Prepare and ::Commit const uint64_t cmt_delay_ms_; bool DoInsert(DB* db, Transaction* txn, bool is_optimistic); }; } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE rocksdb-6.11.4/third-party/000077500000000000000000000000001370372246700155145ustar00rootroot00000000000000rocksdb-6.11.4/third-party/folly/000077500000000000000000000000001370372246700166415ustar00rootroot00000000000000rocksdb-6.11.4/third-party/folly/folly/000077500000000000000000000000001370372246700177665ustar00rootroot00000000000000rocksdb-6.11.4/third-party/folly/folly/CPortability.h000066400000000000000000000013411370372246700225430ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once /** * Macro for marking functions as having public visibility. */ #if defined(__GNUC__) #define FOLLY_EXPORT __attribute__((__visibility__("default"))) #else #define FOLLY_EXPORT #endif #if defined(__has_feature) #define FOLLY_HAS_FEATURE(...) __has_feature(__VA_ARGS__) #else #define FOLLY_HAS_FEATURE(...) 0 #endif #if FOLLY_HAS_FEATURE(thread_sanitizer) || __SANITIZE_THREAD__ #ifndef FOLLY_SANITIZE_THREAD #define FOLLY_SANITIZE_THREAD 1 #endif #endif rocksdb-6.11.4/third-party/folly/folly/ConstexprMath.h000066400000000000000000000021501370372246700227340ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once namespace folly { template constexpr T constexpr_max(T a) { return a; } template constexpr T constexpr_max(T a, T b, Ts... ts) { return b < a ? constexpr_max(a, ts...) : constexpr_max(b, ts...); } namespace detail { template constexpr T constexpr_log2_(T a, T e) { return e == T(1) ? a : constexpr_log2_(a + T(1), e / T(2)); } template constexpr T constexpr_log2_ceil_(T l2, T t) { return l2 + T(T(1) << l2 < t ? 1 : 0); } template constexpr T constexpr_square_(T t) { return t * t; } } // namespace detail template constexpr T constexpr_log2(T t) { return detail::constexpr_log2_(T(0), t); } template constexpr T constexpr_log2_ceil(T t) { return detail::constexpr_log2_ceil_(constexpr_log2(t), t); } } // namespace folly rocksdb-6.11.4/third-party/folly/folly/Indestructible.h000066400000000000000000000122361370372246700231230ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include namespace folly { /*** * Indestructible * * When you need a Meyers singleton that will not get destructed, even at * shutdown, and you also want the object stored inline. * * Use like: * * void doSomethingWithExpensiveData(); * * void doSomethingWithExpensiveData() { * static const Indestructible> data{ * map{{"key1", 17}, {"key2", 19}, {"key3", 23}}, * }; * callSomethingTakingAMapByRef(*data); * } * * This should be used only for Meyers singletons, and, even then, only when * the instance does not need to be destructed ever. * * This should not be used more generally, e.g., as member fields, etc. * * This is designed as an alternative, but with one fewer allocation at * construction time and one fewer pointer dereference at access time, to the * Meyers singleton pattern of: * * void doSomethingWithExpensiveData() { * static const auto data = // never `delete`d * new map{{"key1", 17}, {"key2", 19}, {"key3", 23}}; * callSomethingTakingAMapByRef(*data); * } */ template class Indestructible final { public: template constexpr Indestructible() noexcept(noexcept(T())) {} /** * Constructor accepting a single argument by forwarding reference, this * allows using list initialzation without the overhead of things like * in_place, etc and also works with std::initializer_list constructors * which can't be deduced, the default parameter helps there. * * auto i = folly::Indestructible>{{{1, 2}}}; * * This provides convenience * * There are two versions of this constructor - one for when the element is * implicitly constructible from the given argument and one for when the * type is explicitly but not implicitly constructible from the given * argument. */ template < typename U = T, _t::value>>* = nullptr, _t, remove_cvref_t>::value>>* = nullptr, _t::value>>* = nullptr> explicit constexpr Indestructible(U&& u) noexcept( noexcept(T(std::declval()))) : storage_(std::forward(u)) {} template < typename U = T, _t::value>>* = nullptr, _t, remove_cvref_t>::value>>* = nullptr, _t::value>>* = nullptr> /* implicit */ constexpr Indestructible(U&& u) noexcept( noexcept(T(std::declval()))) : storage_(std::forward(u)) {} template ()...))> explicit constexpr Indestructible(Args&&... args) noexcept( noexcept(T(std::declval()...))) : storage_(std::forward(args)...) {} template < typename U, typename... Args, typename = decltype( T(std::declval&>(), std::declval()...))> explicit constexpr Indestructible(std::initializer_list il, Args... args) noexcept( noexcept( T(std::declval&>(), std::declval()...))) : storage_(il, std::forward(args)...) {} ~Indestructible() = default; Indestructible(Indestructible const&) = delete; Indestructible& operator=(Indestructible const&) = delete; Indestructible(Indestructible&& other) noexcept( noexcept(T(std::declval()))) : storage_(std::move(other.storage_.value)) { other.erased_ = true; } Indestructible& operator=(Indestructible&& other) noexcept( noexcept(T(std::declval()))) { storage_.value = std::move(other.storage_.value); other.erased_ = true; } T* get() noexcept { check(); return &storage_.value; } T const* get() const noexcept { check(); return &storage_.value; } T& operator*() noexcept { return *get(); } T const& operator*() const noexcept { return *get(); } T* operator->() noexcept { return get(); } T const* operator->() const noexcept { return get(); } private: void check() const noexcept { assert(!erased_); } union Storage { T value; template constexpr Storage() noexcept(noexcept(T())) : value() {} template ()...))> explicit constexpr Storage(Args&&... args) noexcept( noexcept(T(std::declval()...))) : value(std::forward(args)...) {} ~Storage() {} }; Storage storage_{}; bool erased_{false}; }; } // namespace folly rocksdb-6.11.4/third-party/folly/folly/Optional.h000066400000000000000000000340061370372246700217270ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once /* * Optional - For conditional initialization of values, like boost::optional, * but with support for move semantics and emplacement. Reference type support * has not been included due to limited use cases and potential confusion with * semantics of assignment: Assigning to an optional reference could quite * reasonably copy its value or redirect the reference. * * Optional can be useful when a variable might or might not be needed: * * Optional maybeLogger = ...; * if (maybeLogger) { * maybeLogger->log("hello"); * } * * Optional enables a 'null' value for types which do not otherwise have * nullability, especially useful for parameter passing: * * void testIterator(const unique_ptr& it, * initializer_list idsExpected, * Optional> ranksExpected = none) { * for (int i = 0; it->next(); ++i) { * EXPECT_EQ(it->doc().id(), idsExpected[i]); * if (ranksExpected) { * EXPECT_EQ(it->doc().rank(), (*ranksExpected)[i]); * } * } * } * * Optional models OptionalPointee, so calling 'get_pointer(opt)' will return a * pointer to nullptr if the 'opt' is empty, and a pointer to the value if it is * not: * * Optional maybeInt = ...; * if (int* v = get_pointer(maybeInt)) { * cout << *v << endl; * } */ #include #include #include #include #include #include #include #include #include namespace folly { template class Optional; namespace detail { template struct OptionalPromiseReturn; } // namespace detail struct None { enum class _secret { _token }; /** * No default constructor to support both `op = {}` and `op = none` * as syntax for clearing an Optional, just like std::nullopt_t. */ constexpr explicit None(_secret) {} }; constexpr None none{None::_secret::_token}; class FOLLY_EXPORT OptionalEmptyException : public std::runtime_error { public: OptionalEmptyException() : std::runtime_error("Empty Optional cannot be unwrapped") {} }; template class Optional { public: typedef Value value_type; static_assert( !std::is_reference::value, "Optional may not be used with reference types"); static_assert( !std::is_abstract::value, "Optional may not be used with abstract types"); Optional() noexcept {} Optional(const Optional& src) noexcept( std::is_nothrow_copy_constructible::value) { if (src.hasValue()) { construct(src.value()); } } Optional(Optional&& src) noexcept( std::is_nothrow_move_constructible::value) { if (src.hasValue()) { construct(std::move(src.value())); src.clear(); } } /* implicit */ Optional(const None&) noexcept {} /* implicit */ Optional(Value&& newValue) noexcept( std::is_nothrow_move_constructible::value) { construct(std::move(newValue)); } /* implicit */ Optional(const Value& newValue) noexcept( std::is_nothrow_copy_constructible::value) { construct(newValue); } template explicit Optional(in_place_t, Args&&... args) noexcept( std::is_nothrow_constructible::value) : Optional{PrivateConstructor{}, std::forward(args)...} {} template explicit Optional( in_place_t, std::initializer_list il, Args&&... args) noexcept(std:: is_nothrow_constructible< Value, std::initializer_list, Args...>::value) : Optional{PrivateConstructor{}, il, std::forward(args)...} {} // Used only when an Optional is used with coroutines on MSVC /* implicit */ Optional(const detail::OptionalPromiseReturn& p) : Optional{} { p.promise_->value_ = this; } void assign(const None&) { clear(); } void assign(Optional&& src) { if (this != &src) { if (src.hasValue()) { assign(std::move(src.value())); src.clear(); } else { clear(); } } } void assign(const Optional& src) { if (src.hasValue()) { assign(src.value()); } else { clear(); } } void assign(Value&& newValue) { if (hasValue()) { storage_.value = std::move(newValue); } else { construct(std::move(newValue)); } } void assign(const Value& newValue) { if (hasValue()) { storage_.value = newValue; } else { construct(newValue); } } Optional& operator=(None) noexcept { reset(); return *this; } template Optional& operator=(Arg&& arg) { assign(std::forward(arg)); return *this; } Optional& operator=(Optional&& other) noexcept( std::is_nothrow_move_assignable::value) { assign(std::move(other)); return *this; } Optional& operator=(const Optional& other) noexcept( std::is_nothrow_copy_assignable::value) { assign(other); return *this; } template Value& emplace(Args&&... args) { clear(); construct(std::forward(args)...); return value(); } template typename std::enable_if< std::is_constructible&, Args&&...>::value, Value&>::type emplace(std::initializer_list ilist, Args&&... args) { clear(); construct(ilist, std::forward(args)...); return value(); } void reset() noexcept { storage_.clear(); } void clear() noexcept { reset(); } void swap(Optional& that) noexcept(IsNothrowSwappable::value) { if (hasValue() && that.hasValue()) { using std::swap; swap(value(), that.value()); } else if (hasValue()) { that.emplace(std::move(value())); reset(); } else if (that.hasValue()) { emplace(std::move(that.value())); that.reset(); } } const Value& value() const& { require_value(); return storage_.value; } Value& value() & { require_value(); return storage_.value; } Value&& value() && { require_value(); return std::move(storage_.value); } const Value&& value() const&& { require_value(); return std::move(storage_.value); } const Value* get_pointer() const& { return storage_.hasValue ? &storage_.value : nullptr; } Value* get_pointer() & { return storage_.hasValue ? &storage_.value : nullptr; } Value* get_pointer() && = delete; bool has_value() const noexcept { return storage_.hasValue; } bool hasValue() const noexcept { return has_value(); } explicit operator bool() const noexcept { return has_value(); } const Value& operator*() const& { return value(); } Value& operator*() & { return value(); } const Value&& operator*() const&& { return std::move(value()); } Value&& operator*() && { return std::move(value()); } const Value* operator->() const { return &value(); } Value* operator->() { return &value(); } // Return a copy of the value if set, or a given default if not. template Value value_or(U&& dflt) const& { if (storage_.hasValue) { return storage_.value; } return std::forward(dflt); } template Value value_or(U&& dflt) && { if (storage_.hasValue) { return std::move(storage_.value); } return std::forward(dflt); } private: template friend Optional<_t>> make_optional(T&&); template friend Optional make_optional(Args&&... args); template friend Optional make_optional(std::initializer_list, As&&...); /** * Construct the optional in place, this is duplicated as a non-explicit * constructor to allow returning values that are non-movable from * make_optional using list initialization. * * Until C++17, at which point this will become unnecessary because of * specified prvalue elision. */ struct PrivateConstructor { explicit PrivateConstructor() = default; }; template Optional(PrivateConstructor, Args&&... args) noexcept( std::is_constructible::value) { construct(std::forward(args)...); } void require_value() const { if (!storage_.hasValue) { throw OptionalEmptyException{}; } } template void construct(Args&&... args) { const void* ptr = &storage_.value; // For supporting const types. new (const_cast(ptr)) Value(std::forward(args)...); storage_.hasValue = true; } struct StorageTriviallyDestructible { union { char emptyState; Value value; }; bool hasValue; StorageTriviallyDestructible() : emptyState('\0'), hasValue{false} {} void clear() { hasValue = false; } }; struct StorageNonTriviallyDestructible { union { char emptyState; Value value; }; bool hasValue; StorageNonTriviallyDestructible() : hasValue{false} {} ~StorageNonTriviallyDestructible() { clear(); } void clear() { if (hasValue) { hasValue = false; value.~Value(); } } }; using Storage = typename std::conditional< std::is_trivially_destructible::value, StorageTriviallyDestructible, StorageNonTriviallyDestructible>::type; Storage storage_; }; template const T* get_pointer(const Optional& opt) { return opt.get_pointer(); } template T* get_pointer(Optional& opt) { return opt.get_pointer(); } template void swap(Optional& a, Optional& b) noexcept(noexcept(a.swap(b))) { a.swap(b); } template Optional<_t>> make_optional(T&& v) { using PrivateConstructor = typename folly::Optional<_t>>::PrivateConstructor; return {PrivateConstructor{}, std::forward(v)}; } template folly::Optional make_optional(Args&&... args) { using PrivateConstructor = typename folly::Optional::PrivateConstructor; return {PrivateConstructor{}, std::forward(args)...}; } template folly::Optional make_optional( std::initializer_list il, Args&&... args) { using PrivateConstructor = typename folly::Optional::PrivateConstructor; return {PrivateConstructor{}, il, std::forward(args)...}; } /////////////////////////////////////////////////////////////////////////////// // Comparisons. template bool operator==(const Optional& a, const V& b) { return a.hasValue() && a.value() == b; } template bool operator!=(const Optional& a, const V& b) { return !(a == b); } template bool operator==(const U& a, const Optional& b) { return b.hasValue() && b.value() == a; } template bool operator!=(const U& a, const Optional& b) { return !(a == b); } template bool operator==(const Optional& a, const Optional& b) { if (a.hasValue() != b.hasValue()) { return false; } if (a.hasValue()) { return a.value() == b.value(); } return true; } template bool operator!=(const Optional& a, const Optional& b) { return !(a == b); } template bool operator<(const Optional& a, const Optional& b) { if (a.hasValue() != b.hasValue()) { return a.hasValue() < b.hasValue(); } if (a.hasValue()) { return a.value() < b.value(); } return false; } template bool operator>(const Optional& a, const Optional& b) { return b < a; } template bool operator<=(const Optional& a, const Optional& b) { return !(b < a); } template bool operator>=(const Optional& a, const Optional& b) { return !(a < b); } // Suppress comparability of Optional with T, despite implicit conversion. template bool operator<(const Optional&, const V& other) = delete; template bool operator<=(const Optional&, const V& other) = delete; template bool operator>=(const Optional&, const V& other) = delete; template bool operator>(const Optional&, const V& other) = delete; template bool operator<(const V& other, const Optional&) = delete; template bool operator<=(const V& other, const Optional&) = delete; template bool operator>=(const V& other, const Optional&) = delete; template bool operator>(const V& other, const Optional&) = delete; // Comparisons with none template bool operator==(const Optional& a, None) noexcept { return !a.hasValue(); } template bool operator==(None, const Optional& a) noexcept { return !a.hasValue(); } template bool operator<(const Optional&, None) noexcept { return false; } template bool operator<(None, const Optional& a) noexcept { return a.hasValue(); } template bool operator>(const Optional& a, None) noexcept { return a.hasValue(); } template bool operator>(None, const Optional&) noexcept { return false; } template bool operator<=(None, const Optional&) noexcept { return true; } template bool operator<=(const Optional& a, None) noexcept { return !a.hasValue(); } template bool operator>=(const Optional&, None) noexcept { return true; } template bool operator>=(None, const Optional& a) noexcept { return !a.hasValue(); } /////////////////////////////////////////////////////////////////////////////// } // namespace folly rocksdb-6.11.4/third-party/folly/folly/Portability.h000066400000000000000000000036621370372246700224500ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #if defined(__arm__) #define FOLLY_ARM 1 #else #define FOLLY_ARM 0 #endif #if defined(__x86_64__) || defined(_M_X64) #define FOLLY_X64 1 #else #define FOLLY_X64 0 #endif #if defined(__aarch64__) #define FOLLY_AARCH64 1 #else #define FOLLY_AARCH64 0 #endif #if defined(__powerpc64__) #define FOLLY_PPC64 1 #else #define FOLLY_PPC64 0 #endif #if defined(__has_builtin) #define FOLLY_HAS_BUILTIN(...) __has_builtin(__VA_ARGS__) #else #define FOLLY_HAS_BUILTIN(...) 0 #endif #if defined(__has_cpp_attribute) #if __has_cpp_attribute(nodiscard) #define FOLLY_NODISCARD [[nodiscard]] #endif #endif #if !defined FOLLY_NODISCARD #if defined(_MSC_VER) && (_MSC_VER >= 1700) #define FOLLY_NODISCARD _Check_return_ #elif defined(__GNUC__) #define FOLLY_NODISCARD __attribute__((__warn_unused_result__)) #else #define FOLLY_NODISCARD #endif #endif namespace folly { constexpr bool kIsArchArm = FOLLY_ARM == 1; constexpr bool kIsArchAmd64 = FOLLY_X64 == 1; constexpr bool kIsArchAArch64 = FOLLY_AARCH64 == 1; constexpr bool kIsArchPPC64 = FOLLY_PPC64 == 1; } // namespace folly namespace folly { #ifdef NDEBUG constexpr auto kIsDebug = false; #else constexpr auto kIsDebug = true; #endif } // namespace folly namespace folly { #if defined(_MSC_VER) constexpr bool kIsMsvc = true; #else constexpr bool kIsMsvc = false; #endif } // namespace folly namespace folly { #if FOLLY_SANITIZE_THREAD constexpr bool kIsSanitizeThread = true; #else constexpr bool kIsSanitizeThread = false; #endif } // namespace folly namespace folly { #if defined(__linux__) && !FOLLY_MOBILE constexpr auto kIsLinux = true; #else constexpr auto kIsLinux = false; #endif } // namespace folly rocksdb-6.11.4/third-party/folly/folly/ScopeGuard.h000066400000000000000000000030301370372246700221670ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include namespace folly { namespace scope_guard_detail { template class ScopeGuardImpl { public: explicit ScopeGuardImpl(F&& f) : f_{std::forward(f)} {} ~ScopeGuardImpl() { f_(); } private: F f_; }; enum class ScopeGuardEnum {}; template >> ScopeGuardImpl operator+(ScopeGuardEnum, Func&& func) { return ScopeGuardImpl{std::forward(func)}; } } // namespace scope_guard_detail } // namespace folly /** * FB_ANONYMOUS_VARIABLE(str) introduces an identifier starting with * str and ending with a number that varies with the line. */ #ifndef FB_ANONYMOUS_VARIABLE #define FB_CONCATENATE_IMPL(s1, s2) s1##s2 #define FB_CONCATENATE(s1, s2) FB_CONCATENATE_IMPL(s1, s2) #ifdef __COUNTER__ #define FB_ANONYMOUS_VARIABLE(str) \ FB_CONCATENATE(FB_CONCATENATE(FB_CONCATENATE(str, __COUNTER__), _), __LINE__) #else #define FB_ANONYMOUS_VARIABLE(str) FB_CONCATENATE(str, __LINE__) #endif #endif #ifndef SCOPE_EXIT #define SCOPE_EXIT \ auto FB_ANONYMOUS_VARIABLE(SCOPE_EXIT_STATE) = \ ::folly::scope_guard_detail::ScopeGuardEnum{} + [&]() noexcept #endif rocksdb-6.11.4/third-party/folly/folly/Traits.h000066400000000000000000000105441370372246700214110ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include namespace folly { #if !defined(_MSC_VER) template struct is_trivially_copyable : std::integral_constant {}; #else template using is_trivially_copyable = std::is_trivially_copyable; #endif /*** * _t * * Instead of: * * using decayed = typename std::decay::type; * * With the C++14 standard trait aliases, we could use: * * using decayed = std::decay_t; * * Without them, we could use: * * using decayed = _t>; * * Also useful for any other library with template types having dependent * member types named `type`, like the standard trait types. */ template using _t = typename T::type; /** * type_t * * A type alias for the first template type argument. `type_t` is useful for * controlling class-template and function-template partial specialization. * * Example: * * template * class Container { * public: * template * Container( * type_t()...))>, * Args&&...); * }; * * void_t * * A type alias for `void`. `void_t` is useful for controling class-template * and function-template partial specialization. * * Example: * * // has_value_type::value is true if T has a nested type `value_type` * template * struct has_value_type * : std::false_type {}; * * template * struct has_value_type> * : std::true_type {}; */ /** * There is a bug in libstdc++, libc++, and MSVC's STL that causes it to * ignore unused template parameter arguments in template aliases and does not * cause substitution failures. This defect has been recorded here: * http://open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#1558. * * This causes the implementation of std::void_t to be buggy, as it is likely * defined as something like the following: * * template * using void_t = void; * * This causes the compiler to ignore all the template arguments and does not * help when one wants to cause substitution failures. Rather declarations * which have void_t in orthogonal specializations are treated as the same. * For example, assuming the possible `T` types are only allowed to have * either the alias `one` or `two` and never both or none: * * template ::one>* = nullptr> * void foo(T&&) {} * template ::two>* = nullptr> * void foo(T&&) {} * * The second foo() will be a redefinition because it conflicts with the first * one; void_t does not cause substitution failures - the template types are * just ignored. */ namespace traits_detail { template struct type_t_ { using type = T; }; } // namespace traits_detail template using type_t = typename traits_detail::type_t_::type; template using void_t = type_t; /** * A type trait to remove all const volatile and reference qualifiers on a * type T */ template struct remove_cvref { using type = typename std::remove_cv::type>::type; }; template using remove_cvref_t = typename remove_cvref::type; template struct IsNothrowSwappable : std::integral_constant< bool, std::is_nothrow_move_constructible::value&& noexcept( std::swap(std::declval(), std::declval()))> {}; template struct Conjunction : std::true_type {}; template struct Conjunction : T {}; template struct Conjunction : std::conditional, T>::type {}; template struct Negation : std::integral_constant {}; template using index_constant = std::integral_constant; } // namespace folly rocksdb-6.11.4/third-party/folly/folly/Unit.h000066400000000000000000000033131370372246700210560ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include namespace folly { /// In functional programming, the degenerate case is often called "unit". In /// C++, "void" is often the best analogue. However, because of the syntactic /// special-casing required for void, it is frequently a liability for template /// metaprogramming. So, instead of writing specializations to handle cases like /// SomeContainer, a library author may instead rule that out and simply /// have library users use SomeContainer. Contained values may be ignored. /// Much easier. /// /// "void" is the type that admits of no values at all. It is not possible to /// construct a value of this type. /// "unit" is the type that admits of precisely one unique value. It is /// possible to construct a value of this type, but it is always the same value /// every time, so it is uninteresting. struct Unit { constexpr bool operator==(const Unit& /*other*/) const { return true; } constexpr bool operator!=(const Unit& /*other*/) const { return false; } }; constexpr Unit unit{}; template struct lift_unit { using type = T; }; template <> struct lift_unit { using type = Unit; }; template using lift_unit_t = typename lift_unit::type; template struct drop_unit { using type = T; }; template <> struct drop_unit { using type = void; }; template using drop_unit_t = typename drop_unit::type; } // namespace folly rocksdb-6.11.4/third-party/folly/folly/Utility.h000066400000000000000000000071441370372246700216100ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include namespace folly { /** * Backports from C++17 of: * std::in_place_t * std::in_place_type_t * std::in_place_index_t * std::in_place * std::in_place_type * std::in_place_index */ struct in_place_tag {}; template struct in_place_type_tag {}; template struct in_place_index_tag {}; using in_place_t = in_place_tag (&)(in_place_tag); template using in_place_type_t = in_place_type_tag (&)(in_place_type_tag); template using in_place_index_t = in_place_index_tag (&)(in_place_index_tag); inline in_place_tag in_place(in_place_tag = {}) { return {}; } template inline in_place_type_tag in_place_type(in_place_type_tag = {}) { return {}; } template inline in_place_index_tag in_place_index(in_place_index_tag = {}) { return {}; } template T exchange(T& obj, U&& new_value) { T old_value = std::move(obj); obj = std::forward(new_value); return old_value; } namespace utility_detail { template struct make_seq_cat; template < template class S, typename T, T... Ta, T... Tb, T... Tc> struct make_seq_cat, S, S> { using type = S; }; // Not parameterizing by `template class, typename` because // clang precisely v4.0 fails to compile that. Note that clang v3.9 and v5.0 // handle that code correctly. // // For this to work, `S0` is required to be `Sequence` and `S1` is required // to be `Sequence`. template struct make_seq { template using apply = typename make_seq_cat< typename make_seq::template apply, typename make_seq::template apply, typename make_seq::template apply>::type; }; template <> struct make_seq<1> { template using apply = S1; }; template <> struct make_seq<0> { template using apply = S0; }; } // namespace utility_detail // TODO: Remove after upgrading to C++14 baseline template struct integer_sequence { using value_type = T; static constexpr std::size_t size() noexcept { return sizeof...(Ints); } }; template using index_sequence = integer_sequence; template using make_integer_sequence = typename utility_detail::make_seq< Size>::template apply, integer_sequence>; template using make_index_sequence = make_integer_sequence; template using index_sequence_for = make_index_sequence; /** * A simple helper for getting a constant reference to an object. * * Example: * * std::vector v{1,2,3}; * // The following two lines are equivalent: * auto a = const_cast&>(v).begin(); * auto b = folly::as_const(v).begin(); * * Like C++17's std::as_const. See http://wg21.link/p0007 */ template T const& as_const(T& t) noexcept { return t; } template void as_const(T const&&) = delete; } // namespace folly rocksdb-6.11.4/third-party/folly/folly/chrono/000077500000000000000000000000001370372246700212565ustar00rootroot00000000000000rocksdb-6.11.4/third-party/folly/folly/chrono/Hardware.h000066400000000000000000000014511370372246700231650ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #if _MSC_VER extern "C" std::uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif namespace folly { inline std::uint64_t hardware_timestamp() { #if _MSC_VER return __rdtsc(); #elif __GNUC__ && (__i386__ || FOLLY_X64) return __builtin_ia32_rdtsc(); #else // use steady_clock::now() as an approximation for the timestamp counter on // non-x86 systems return std::chrono::steady_clock::now().time_since_epoch().count(); #endif } } // namespace folly rocksdb-6.11.4/third-party/folly/folly/container/000077500000000000000000000000001370372246700217505ustar00rootroot00000000000000rocksdb-6.11.4/third-party/folly/folly/container/Array.h000066400000000000000000000043711370372246700232040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include namespace folly { namespace array_detail { template struct is_ref_wrapper : std::false_type {}; template struct is_ref_wrapper> : std::true_type {}; template using not_ref_wrapper = folly::Negation::type>>; template struct return_type_helper { using type = D; }; template struct return_type_helper { static_assert( folly::Conjunction...>::value, "TList cannot contain reference_wrappers when D is void"); using type = typename std::common_type::type; }; template using return_type = std:: array::type, sizeof...(TList)>; } // namespace array_detail template constexpr array_detail::return_type make_array(TList&&... t) { using value_type = typename array_detail::return_type_helper::type; return {{static_cast(std::forward(t))...}}; } namespace array_detail { template inline constexpr auto make_array_with( MakeItem const& make, folly::index_sequence) -> std::array { return std::array{{make(Index)...}}; } } // namespace array_detail // make_array_with // // Constructs a std::array<..., Size> with elements m(i) for i in [0, Size). template constexpr auto make_array_with(MakeItem const& make) -> decltype(array_detail::make_array_with( make, folly::make_index_sequence{})) { return array_detail::make_array_with( make, folly::make_index_sequence{}); } } // namespace folly rocksdb-6.11.4/third-party/folly/folly/detail/000077500000000000000000000000001370372246700212305ustar00rootroot00000000000000rocksdb-6.11.4/third-party/folly/folly/detail/Futex-inl.h000066400000000000000000000077351370372246700232700ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include namespace folly { namespace detail { /** Optimal when TargetClock is the same type as Clock. * * Otherwise, both Clock::now() and TargetClock::now() must be invoked. */ template typename TargetClock::time_point time_point_conv( std::chrono::time_point const& time) { using std::chrono::duration_cast; using TimePoint = std::chrono::time_point; using TargetDuration = typename TargetClock::duration; using TargetTimePoint = typename TargetClock::time_point; if (time == TimePoint::max()) { return TargetTimePoint::max(); } else if (std::is_same::value) { // in place of time_point_cast, which cannot compile without if-constexpr auto const delta = time.time_since_epoch(); return TargetTimePoint(duration_cast(delta)); } else { // different clocks with different epochs, so non-optimal case auto const delta = time - Clock::now(); return TargetClock::now() + duration_cast(delta); } } /** * Available overloads, with definitions elsewhere * * These functions are treated as ADL-extension points, the templates above * call these functions without them having being pre-declared. This works * because ADL lookup finds the definitions of these functions when you pass * the relevant arguments */ int futexWakeImpl( const Futex* futex, int count, uint32_t wakeMask); FutexResult futexWaitImpl( const Futex* futex, uint32_t expected, std::chrono::system_clock::time_point const* absSystemTime, std::chrono::steady_clock::time_point const* absSteadyTime, uint32_t waitMask); int futexWakeImpl( const Futex* futex, int count, uint32_t wakeMask); FutexResult futexWaitImpl( const Futex* futex, uint32_t expected, std::chrono::system_clock::time_point const* absSystemTime, std::chrono::steady_clock::time_point const* absSteadyTime, uint32_t waitMask); template typename std::enable_if::type futexWaitImpl( Futex* futex, uint32_t expected, Deadline const& deadline, uint32_t waitMask) { return futexWaitImpl(futex, expected, nullptr, &deadline, waitMask); } template typename std::enable_if::type futexWaitImpl( Futex* futex, uint32_t expected, Deadline const& deadline, uint32_t waitMask) { return futexWaitImpl(futex, expected, &deadline, nullptr, waitMask); } template FutexResult futexWait(const Futex* futex, uint32_t expected, uint32_t waitMask) { auto rv = futexWaitImpl(futex, expected, nullptr, nullptr, waitMask); assert(rv != FutexResult::TIMEDOUT); return rv; } template int futexWake(const Futex* futex, int count, uint32_t wakeMask) { return futexWakeImpl(futex, count, wakeMask); } template FutexResult futexWaitUntil( const Futex* futex, uint32_t expected, std::chrono::time_point const& deadline, uint32_t waitMask) { using Target = typename std::conditional< Clock::is_steady, std::chrono::steady_clock, std::chrono::system_clock>::type; auto const converted = time_point_conv(deadline); return converted == Target::time_point::max() ? futexWaitImpl(futex, expected, nullptr, nullptr, waitMask) : futexWaitImpl(futex, expected, converted, waitMask); } } // namespace detail } // namespace folly rocksdb-6.11.4/third-party/folly/folly/detail/Futex.cpp000066400000000000000000000164261370372246700230400ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #include #include #include #include #include #include #include #ifdef __linux__ #include #endif #ifndef _WIN32 #include #endif using namespace std::chrono; namespace folly { namespace detail { namespace { //////////////////////////////////////////////////// // native implementation using the futex() syscall #ifdef __linux__ /// Certain toolchains (like Android's) don't include the full futex API in /// their headers even though they support it. Make sure we have our constants /// even if the headers don't have them. #ifndef FUTEX_WAIT_BITSET #define FUTEX_WAIT_BITSET 9 #endif #ifndef FUTEX_WAKE_BITSET #define FUTEX_WAKE_BITSET 10 #endif #ifndef FUTEX_PRIVATE_FLAG #define FUTEX_PRIVATE_FLAG 128 #endif #ifndef FUTEX_CLOCK_REALTIME #define FUTEX_CLOCK_REALTIME 256 #endif int nativeFutexWake(const void* addr, int count, uint32_t wakeMask) { long rv = syscall( __NR_futex, addr, /* addr1 */ FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */ count, /* val */ nullptr, /* timeout */ nullptr, /* addr2 */ wakeMask); /* val3 */ /* NOTE: we ignore errors on wake for the case of a futex guarding its own destruction, similar to this glibc bug with sem_post/sem_wait: https://sourceware.org/bugzilla/show_bug.cgi?id=12674 */ if (rv < 0) { return 0; } return static_cast(rv); } template struct timespec timeSpecFromTimePoint(time_point absTime) { auto epoch = absTime.time_since_epoch(); if (epoch.count() < 0) { // kernel timespec_valid requires non-negative seconds and nanos in [0,1G) epoch = Clock::duration::zero(); } // timespec-safe seconds and nanoseconds; // chrono::{nano,}seconds are `long long int` // whereas timespec uses smaller types using time_t_seconds = duration; using long_nanos = duration; auto secs = duration_cast(epoch); auto nanos = duration_cast(epoch - secs); struct timespec result = {secs.count(), nanos.count()}; return result; } FutexResult nativeFutexWaitImpl( const void* addr, uint32_t expected, system_clock::time_point const* absSystemTime, steady_clock::time_point const* absSteadyTime, uint32_t waitMask) { assert(absSystemTime == nullptr || absSteadyTime == nullptr); int op = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG; struct timespec ts; struct timespec* timeout = nullptr; if (absSystemTime != nullptr) { op |= FUTEX_CLOCK_REALTIME; ts = timeSpecFromTimePoint(*absSystemTime); timeout = &ts; } else if (absSteadyTime != nullptr) { ts = timeSpecFromTimePoint(*absSteadyTime); timeout = &ts; } // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout // value - http://locklessinc.com/articles/futex_cheat_sheet/ long rv = syscall( __NR_futex, addr, /* addr1 */ op, /* op */ expected, /* val */ timeout, /* timeout */ nullptr, /* addr2 */ waitMask); /* val3 */ if (rv == 0) { return FutexResult::AWOKEN; } else { switch (errno) { case ETIMEDOUT: assert(timeout != nullptr); return FutexResult::TIMEDOUT; case EINTR: return FutexResult::INTERRUPTED; case EWOULDBLOCK: return FutexResult::VALUE_CHANGED; default: assert(false); // EINVAL, EACCESS, or EFAULT. EINVAL means there was an invalid // op (should be impossible) or an invalid timeout (should have // been sanitized by timeSpecFromTimePoint). EACCESS or EFAULT // means *addr points to invalid memory, which is unlikely because // the caller should have segfaulted already. We can either // crash, or return a value that lets the process continue for // a bit. We choose the latter. VALUE_CHANGED probably turns the // caller into a spin lock. return FutexResult::VALUE_CHANGED; } } } #endif // __linux__ /////////////////////////////////////////////////////// // compatibility implementation using standard C++ API using Lot = ParkingLot; Lot parkingLot; int emulatedFutexWake(const void* addr, int count, uint32_t waitMask) { int woken = 0; parkingLot.unpark(addr, [&](const uint32_t& mask) { if ((mask & waitMask) == 0) { return UnparkControl::RetainContinue; } assert(count > 0); count--; woken++; return count > 0 ? UnparkControl::RemoveContinue : UnparkControl::RemoveBreak; }); return woken; } template FutexResult emulatedFutexWaitImpl( F* futex, uint32_t expected, system_clock::time_point const* absSystemTime, steady_clock::time_point const* absSteadyTime, uint32_t waitMask) { static_assert( std::is_same>::value || std::is_same>::value, "Type F must be either Futex or Futex"); ParkResult res; if (absSystemTime) { res = parkingLot.park_until( futex, waitMask, [&] { return *futex == expected; }, [] {}, *absSystemTime); } else if (absSteadyTime) { res = parkingLot.park_until( futex, waitMask, [&] { return *futex == expected; }, [] {}, *absSteadyTime); } else { res = parkingLot.park( futex, waitMask, [&] { return *futex == expected; }, [] {}); } switch (res) { case ParkResult::Skip: return FutexResult::VALUE_CHANGED; case ParkResult::Unpark: return FutexResult::AWOKEN; case ParkResult::Timeout: return FutexResult::TIMEDOUT; } return FutexResult::INTERRUPTED; } } // namespace ///////////////////////////////// // Futex<> overloads int futexWakeImpl( const Futex* futex, int count, uint32_t wakeMask) { #ifdef __linux__ return nativeFutexWake(futex, count, wakeMask); #else return emulatedFutexWake(futex, count, wakeMask); #endif } int futexWakeImpl( const Futex* futex, int count, uint32_t wakeMask) { return emulatedFutexWake(futex, count, wakeMask); } FutexResult futexWaitImpl( const Futex* futex, uint32_t expected, system_clock::time_point const* absSystemTime, steady_clock::time_point const* absSteadyTime, uint32_t waitMask) { #ifdef __linux__ return nativeFutexWaitImpl( futex, expected, absSystemTime, absSteadyTime, waitMask); #else return emulatedFutexWaitImpl( futex, expected, absSystemTime, absSteadyTime, waitMask); #endif } FutexResult futexWaitImpl( const Futex* futex, uint32_t expected, system_clock::time_point const* absSystemTime, steady_clock::time_point const* absSteadyTime, uint32_t waitMask) { return emulatedFutexWaitImpl( futex, expected, absSystemTime, absSteadyTime, waitMask); } } // namespace detail } // namespace folly rocksdb-6.11.4/third-party/folly/folly/detail/Futex.h000066400000000000000000000066421370372246700225040ustar00rootroot00000000000000// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include #include namespace folly { namespace detail { enum class FutexResult { VALUE_CHANGED, /* futex value didn't match expected */ AWOKEN, /* wakeup by matching futex wake, or spurious wakeup */ INTERRUPTED, /* wakeup by interrupting signal */ TIMEDOUT, /* wakeup by expiring deadline */ }; /** * Futex is an atomic 32 bit unsigned integer that provides access to the * futex() syscall on that value. It is templated in such a way that it * can interact properly with DeterministicSchedule testing. * * If you don't know how to use futex(), you probably shouldn't be using * this class. Even if you do know how, you should have a good reason * (and benchmarks to back you up). * * Because of the semantics of the futex syscall, the futex family of * functions are available as free functions rather than member functions */ template